aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/llvm/FREEBSD-Xlist392
-rw-r--r--contrib/llvm/LICENSE.TXT2
-rw-r--r--contrib/llvm/include/llvm-c/Core.h371
-rw-r--r--contrib/llvm/include/llvm-c/DebugInfo.h88
-rw-r--r--contrib/llvm/include/llvm-c/Error.h69
-rw-r--r--contrib/llvm/include/llvm-c/ExecutionEngine.h2
-rw-r--r--contrib/llvm/include/llvm-c/OptRemarks.h204
-rw-r--r--contrib/llvm/include/llvm-c/OrcBindings.h73
-rw-r--r--contrib/llvm/include/llvm-c/TargetMachine.h6
-rw-r--r--contrib/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h43
-rw-r--r--contrib/llvm/include/llvm-c/Transforms/Coroutines.h55
-rw-r--r--contrib/llvm/include/llvm-c/Transforms/Scalar.h9
-rw-r--r--contrib/llvm/include/llvm-c/Types.h14
-rw-r--r--contrib/llvm/include/llvm-c/lto.h12
-rw-r--r--contrib/llvm/include/llvm/ADT/APFloat.h36
-rw-r--r--contrib/llvm/include/llvm/ADT/APInt.h64
-rw-r--r--contrib/llvm/include/llvm/ADT/Any.h10
-rw-r--r--contrib/llvm/include/llvm/ADT/BitVector.h17
-rw-r--r--contrib/llvm/include/llvm/ADT/DenseMap.h81
-rw-r--r--contrib/llvm/include/llvm/ADT/DenseSet.h35
-rw-r--r--contrib/llvm/include/llvm/ADT/GraphTraits.h7
-rw-r--r--contrib/llvm/include/llvm/ADT/Hashing.h15
-rw-r--r--contrib/llvm/include/llvm/ADT/ImmutableList.h36
-rw-r--r--contrib/llvm/include/llvm/ADT/IntervalMap.h24
-rw-r--r--contrib/llvm/include/llvm/ADT/Optional.h22
-rw-r--r--contrib/llvm/include/llvm/ADT/PointerIntPair.h2
-rw-r--r--contrib/llvm/include/llvm/ADT/PointerSumType.h128
-rw-r--r--contrib/llvm/include/llvm/ADT/PostOrderIterator.h3
-rw-r--r--contrib/llvm/include/llvm/ADT/STLExtras.h358
-rw-r--r--contrib/llvm/include/llvm/ADT/SmallBitVector.h59
-rw-r--r--contrib/llvm/include/llvm/ADT/SmallVector.h10
-rw-r--r--contrib/llvm/include/llvm/ADT/SparseBitVector.h75
-rw-r--r--contrib/llvm/include/llvm/ADT/StringExtras.h11
-rw-r--r--contrib/llvm/include/llvm/ADT/Triple.h32
-rw-r--r--contrib/llvm/include/llvm/ADT/bit.h59
-rw-r--r--contrib/llvm/include/llvm/ADT/iterator.h38
-rw-r--r--contrib/llvm/include/llvm/Analysis/AliasAnalysis.h143
-rw-r--r--contrib/llvm/include/llvm/Analysis/AliasSetTracker.h57
-rw-r--r--contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h18
-rw-r--r--contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h2
-rw-r--r--contrib/llvm/include/llvm/Analysis/CFG.h3
-rw-r--r--contrib/llvm/include/llvm/Analysis/CFGPrinter.h5
-rw-r--r--contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h39
-rw-r--r--contrib/llvm/include/llvm/Analysis/CaptureTracking.h23
-rw-r--r--contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h17
-rw-r--r--contrib/llvm/include/llvm/Analysis/DemandedBits.h18
-rw-r--r--contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h11
-rw-r--r--contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h198
-rw-r--r--contrib/llvm/include/llvm/Analysis/GlobalsModRef.h6
-rw-r--r--contrib/llvm/include/llvm/Analysis/GuardUtils.h26
-rw-r--r--contrib/llvm/include/llvm/Analysis/IVDescriptors.h357
-rw-r--r--contrib/llvm/include/llvm/Analysis/IndirectCallSiteVisitor.h35
-rw-r--r--contrib/llvm/include/llvm/Analysis/IndirectCallVisitor.h39
-rw-r--r--contrib/llvm/include/llvm/Analysis/InlineCost.h36
-rw-r--r--contrib/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h150
-rw-r--r--contrib/llvm/include/llvm/Analysis/InstructionSimplify.h47
-rw-r--r--contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h28
-rw-r--r--contrib/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h69
-rw-r--r--contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h56
-rw-r--r--contrib/llvm/include/llvm/Analysis/LoopInfo.h26
-rw-r--r--contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h9
-rw-r--r--contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h11
-rw-r--r--contrib/llvm/include/llvm/Analysis/MemoryLocation.h167
-rw-r--r--contrib/llvm/include/llvm/Analysis/MemorySSA.h83
-rw-r--r--contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h96
-rw-r--r--contrib/llvm/include/llvm/Analysis/MustExecute.h140
-rw-r--r--contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h2
-rw-r--r--contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h38
-rw-r--r--contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h3
-rw-r--r--contrib/llvm/include/llvm/Analysis/OrderedInstructions.h (renamed from contrib/llvm/include/llvm/Transforms/Utils/OrderedInstructions.h)6
-rw-r--r--contrib/llvm/include/llvm/Analysis/Passes.h4
-rw-r--r--contrib/llvm/include/llvm/Analysis/PhiValues.h16
-rw-r--r--contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h15
-rw-r--r--contrib/llvm/include/llvm/Analysis/ScalarEvolution.h4
-rw-r--r--contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h6
-rw-r--r--contrib/llvm/include/llvm/Analysis/SparsePropagation.h18
-rw-r--r--contrib/llvm/include/llvm/Analysis/StackSafetyAnalysis.h120
-rw-r--r--contrib/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h86
-rw-r--r--contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h15
-rw-r--r--contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def27
-rw-r--r--contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h55
-rw-r--r--contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h18
-rw-r--r--contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h8
-rw-r--r--contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h7
-rw-r--r--contrib/llvm/include/llvm/Analysis/ValueTracking.h68
-rw-r--r--contrib/llvm/include/llvm/Analysis/VectorUtils.h424
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h70
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/Dwarf.def111
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/Dwarf.h14
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/ELF.h56
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/ELFRelocs/MSP430.def16
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/MachO.h5
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/MsgPack.def108
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/MsgPack.h93
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/MsgPackReader.h148
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/MsgPackTypes.h372
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/MsgPackWriter.h131
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/Wasm.h130
-rw-r--r--contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def2
-rw-r--r--contrib/llvm/include/llvm/Bitcode/BitcodeReader.h1
-rw-r--r--contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h11
-rw-r--r--contrib/llvm/include/llvm/CodeGen/AsmPrinter.h16
-rw-r--r--contrib/llvm/include/llvm/CodeGen/AsmPrinterHandler.h (renamed from contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h)6
-rw-r--r--contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h253
-rw-r--r--contrib/llvm/include/llvm/CodeGen/BuiltinGCs.h (renamed from contrib/llvm/include/llvm/CodeGen/GCs.h)23
-rw-r--r--contrib/llvm/include/llvm/CodeGen/CommandFlags.inc28
-rw-r--r--contrib/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h (renamed from contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h)46
-rw-r--r--contrib/llvm/include/llvm/CodeGen/DebugHandlerBase.h (renamed from contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h)17
-rw-r--r--contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h47
-rw-r--r--contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h3
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GCMetadata.h9
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h6
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GCStrategy.h50
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h237
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h110
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h10
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h8
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h19
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h16
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h104
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h111
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h35
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h12
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h145
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h12
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h25
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h471
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h11
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h3
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h100
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h2
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h8
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LiveIntervals.h6
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h17
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h8
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h7
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h6
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h42
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineFunction.h119
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineInstr.h281
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h17
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h8
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h22
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineOutliner.h78
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h80
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachinePipeliner.h608
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h19
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineScheduler.h25
-rw-r--r--contrib/llvm/include/llvm/CodeGen/Passes.h12
-rw-r--r--contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h3
-rw-r--r--contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h10
-rw-r--r--contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h16
-rw-r--r--contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h6
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h23
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h3
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h13
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SelectionDAG.h119
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h11
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h12
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h141
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SlotIndexes.h16
-rw-r--r--contrib/llvm/include/llvm/CodeGen/StackMaps.h44
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h7
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h67
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetLowering.h304
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h2
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h34
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h14
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h38
-rw-r--r--contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h4
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h5
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h19
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h35
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def577
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h12
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h23
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h2
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h8
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h31
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h62
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h9
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h42
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h28
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h9
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DIContext.h7
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h20
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h147
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h26
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h31
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h20
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h4
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h3
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h9
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h5
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h13
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h9
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h26
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h15
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h5
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h15
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h206
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h59
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h30
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h5
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h36
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h4
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h34
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h39
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h27
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h3
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h42
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h15
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h36
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h47
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h8
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h3
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h2
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h13
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h10
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h11
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h7
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h5
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h43
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h7
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h60
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h11
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h12
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h38
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h30
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h51
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h50
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h (renamed from contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h)21
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h75
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h74
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h61
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h42
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h74
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h46
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h4
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h34
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h1
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h148
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h48
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h13
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h12
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h48
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h5
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h7
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h7
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h11
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h6
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h11
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h5
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h46
-rw-r--r--contrib/llvm/include/llvm/Demangle/Compiler.h (renamed from contrib/llvm/lib/Demangle/Compiler.h)0
-rw-r--r--contrib/llvm/include/llvm/Demangle/Demangle.h10
-rw-r--r--contrib/llvm/include/llvm/Demangle/ItaniumDemangle.h5184
-rw-r--r--contrib/llvm/include/llvm/Demangle/MicrosoftDemangle.h276
-rw-r--r--contrib/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h605
-rw-r--r--contrib/llvm/include/llvm/Demangle/StringView.h (renamed from contrib/llvm/lib/Demangle/StringView.h)0
-rw-r--r--contrib/llvm/include/llvm/Demangle/Utility.h (renamed from contrib/llvm/lib/Demangle/Utility.h)31
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h35
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h93
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h173
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h10
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h777
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h94
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h17
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h26
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h257
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h130
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h124
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h76
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h195
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h82
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h4
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h14
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h75
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h83
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h3
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h76
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h98
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h27
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h163
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h25
-rw-r--r--contrib/llvm/include/llvm/IR/Attributes.h117
-rw-r--r--contrib/llvm/include/llvm/IR/Attributes.td9
-rw-r--r--contrib/llvm/include/llvm/IR/BasicBlock.h27
-rw-r--r--contrib/llvm/include/llvm/IR/CFG.h169
-rw-r--r--contrib/llvm/include/llvm/IR/CFGDiff.h285
-rw-r--r--contrib/llvm/include/llvm/IR/CallSite.h5
-rw-r--r--contrib/llvm/include/llvm/IR/CallingConv.h3
-rw-r--r--contrib/llvm/include/llvm/IR/Constant.h3
-rw-r--r--contrib/llvm/include/llvm/IR/Constants.h13
-rw-r--r--contrib/llvm/include/llvm/IR/DIBuilder.h70
-rw-r--r--contrib/llvm/include/llvm/IR/DataLayout.h8
-rw-r--r--contrib/llvm/include/llvm/IR/DebugInfoFlags.def41
-rw-r--r--contrib/llvm/include/llvm/IR/DebugInfoMetadata.h448
-rw-r--r--contrib/llvm/include/llvm/IR/DebugLoc.h7
-rw-r--r--contrib/llvm/include/llvm/IR/DiagnosticInfo.h24
-rw-r--r--contrib/llvm/include/llvm/IR/DomTreeUpdater.h8
-rw-r--r--contrib/llvm/include/llvm/IR/Dominators.h100
-rw-r--r--contrib/llvm/include/llvm/IR/Function.h20
-rw-r--r--contrib/llvm/include/llvm/IR/GlobalValue.h1
-rw-r--r--contrib/llvm/include/llvm/IR/IRBuilder.h266
-rw-r--r--contrib/llvm/include/llvm/IR/IRPrintingPasses.h16
-rw-r--r--contrib/llvm/include/llvm/IR/InstVisitor.h72
-rw-r--r--contrib/llvm/include/llvm/IR/InstrTypes.h1162
-rw-r--r--contrib/llvm/include/llvm/IR/Instruction.def153
-rw-r--r--contrib/llvm/include/llvm/IR/Instruction.h50
-rw-r--r--contrib/llvm/include/llvm/IR/Instructions.h1279
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicInst.h42
-rw-r--r--contrib/llvm/include/llvm/IR/Intrinsics.td152
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td17
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td188
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td14893
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td6
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsRISCV.td44
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td72
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsX86.td801
-rw-r--r--contrib/llvm/include/llvm/IR/LLVMContext.h1
-rw-r--r--contrib/llvm/include/llvm/IR/LegacyPassManager.h3
-rw-r--r--contrib/llvm/include/llvm/IR/LegacyPassManagers.h17
-rw-r--r--contrib/llvm/include/llvm/IR/Metadata.h16
-rw-r--r--contrib/llvm/include/llvm/IR/Module.h43
-rw-r--r--contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h187
-rw-r--r--contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h15
-rw-r--r--contrib/llvm/include/llvm/IR/Operator.h31
-rw-r--r--contrib/llvm/include/llvm/IR/PassInstrumentation.h207
-rw-r--r--contrib/llvm/include/llvm/IR/PassManager.h133
-rw-r--r--contrib/llvm/include/llvm/IR/PassManagerInternal.h11
-rw-r--r--contrib/llvm/include/llvm/IR/PassTimingInfo.h108
-rw-r--r--contrib/llvm/include/llvm/IR/PatternMatch.h252
-rw-r--r--contrib/llvm/include/llvm/IR/RuntimeLibcalls.def8
-rw-r--r--contrib/llvm/include/llvm/IR/TypeBuilder.h407
-rw-r--r--contrib/llvm/include/llvm/IR/Value.h3
-rw-r--r--contrib/llvm/include/llvm/InitializePasses.h19
-rw-r--r--contrib/llvm/include/llvm/LTO/Config.h7
-rw-r--r--contrib/llvm/include/llvm/LTO/LTO.h24
-rw-r--r--contrib/llvm/include/llvm/LTO/SummaryBasedOptimizations.h17
-rw-r--r--contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h5
-rw-r--r--contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h15
-rw-r--r--contrib/llvm/include/llvm/LinkAllPasses.h8
-rw-r--r--contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCAsmMacro.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCAssembler.h11
-rw-r--r--contrib/llvm/include/llvm/MC/MCCodeView.h72
-rw-r--r--contrib/llvm/include/llvm/MC/MCContext.h4
-rw-r--r--contrib/llvm/include/llvm/MC/MCDwarf.h7
-rw-r--r--contrib/llvm/include/llvm/MC/MCELFObjectWriter.h5
-rw-r--r--contrib/llvm/include/llvm/MC/MCExpr.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCInst.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCInstrAnalysis.h70
-rw-r--r--contrib/llvm/include/llvm/MC/MCInstrDesc.h9
-rw-r--r--contrib/llvm/include/llvm/MC/MCObjectFileInfo.h33
-rw-r--r--contrib/llvm/include/llvm/MC/MCObjectStreamer.h13
-rw-r--r--contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h16
-rw-r--r--contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h9
-rw-r--r--contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h6
-rw-r--r--contrib/llvm/include/llvm/MC/MCRegisterInfo.h6
-rw-r--r--contrib/llvm/include/llvm/MC/MCSchedule.h21
-rw-r--r--contrib/llvm/include/llvm/MC/MCSection.h7
-rw-r--r--contrib/llvm/include/llvm/MC/MCStreamer.h44
-rw-r--r--contrib/llvm/include/llvm/MC/MCSymbolWasm.h48
-rw-r--r--contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCWin64EH.h8
-rw-r--r--contrib/llvm/include/llvm/MC/MCWinEH.h9
-rw-r--r--contrib/llvm/include/llvm/MCA/Context.h (renamed from contrib/llvm/tools/llvm-mca/Context.h)29
-rw-r--r--contrib/llvm/include/llvm/MCA/HWEventListener.h (renamed from contrib/llvm/tools/llvm-mca/HWEventListener.h)45
-rw-r--r--contrib/llvm/include/llvm/MCA/HardwareUnits/HardwareUnit.h (renamed from contrib/llvm/tools/llvm-mca/HardwareUnit.h)8
-rw-r--r--contrib/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h207
-rw-r--r--contrib/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h (renamed from contrib/llvm/tools/llvm-mca/RegisterFile.h)145
-rw-r--r--contrib/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h410
-rw-r--r--contrib/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h (renamed from contrib/llvm/tools/llvm-mca/RetireControlUnit.h)20
-rw-r--r--contrib/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h214
-rw-r--r--contrib/llvm/include/llvm/MCA/InstrBuilder.h77
-rw-r--r--contrib/llvm/include/llvm/MCA/Instruction.h (renamed from contrib/llvm/tools/llvm-mca/Instruction.h)285
-rw-r--r--contrib/llvm/include/llvm/MCA/Pipeline.h (renamed from contrib/llvm/tools/llvm-mca/Pipeline.h)30
-rw-r--r--contrib/llvm/include/llvm/MCA/SourceMgr.h57
-rw-r--r--contrib/llvm/include/llvm/MCA/Stages/DispatchStage.h (renamed from contrib/llvm/tools/llvm-mca/DispatchStage.h)75
-rw-r--r--contrib/llvm/include/llvm/MCA/Stages/EntryStage.h52
-rw-r--r--contrib/llvm/include/llvm/MCA/Stages/ExecuteStage.h80
-rw-r--r--contrib/llvm/include/llvm/MCA/Stages/InstructionTables.h (renamed from contrib/llvm/tools/llvm-mca/InstructionTables.h)33
-rw-r--r--contrib/llvm/include/llvm/MCA/Stages/RetireStage.h (renamed from contrib/llvm/tools/llvm-mca/RetireStage.h)34
-rw-r--r--contrib/llvm/include/llvm/MCA/Stages/Stage.h (renamed from contrib/llvm/tools/llvm-mca/Stage.h)62
-rw-r--r--contrib/llvm/include/llvm/MCA/Support.h119
-rw-r--r--contrib/llvm/include/llvm/Object/COFF.h12
-rw-r--r--contrib/llvm/include/llvm/Object/ELF.h8
-rw-r--r--contrib/llvm/include/llvm/Object/ELFObjectFile.h29
-rw-r--r--contrib/llvm/include/llvm/Object/ELFTypes.h25
-rw-r--r--contrib/llvm/include/llvm/Object/Error.h1
-rw-r--r--contrib/llvm/include/llvm/Object/MachO.h5
-rw-r--r--contrib/llvm/include/llvm/Object/ObjectFile.h22
-rw-r--r--contrib/llvm/include/llvm/Object/RelocVisitor.h3
-rw-r--r--contrib/llvm/include/llvm/Object/Wasm.h89
-rw-r--r--contrib/llvm/include/llvm/Object/WasmTraits.h14
-rw-r--r--contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h6
-rw-r--r--contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h2
-rw-r--r--contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h37
-rw-r--r--contrib/llvm/include/llvm/Option/OptTable.h8
-rw-r--r--contrib/llvm/include/llvm/Pass.h11
-rw-r--r--contrib/llvm/include/llvm/Passes/PassBuilder.h84
-rw-r--r--contrib/llvm/include/llvm/Passes/StandardInstrumentations.h70
-rw-r--r--contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h13
-rw-r--r--contrib/llvm/include/llvm/ProfileData/GCOV.h20
-rw-r--r--contrib/llvm/include/llvm/ProfileData/InstrProf.h6
-rw-r--r--contrib/llvm/include/llvm/ProfileData/InstrProfReader.h30
-rw-r--r--contrib/llvm/include/llvm/ProfileData/SampleProf.h91
-rw-r--r--contrib/llvm/include/llvm/ProfileData/SampleProfReader.h80
-rw-r--r--contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h51
-rw-r--r--contrib/llvm/include/llvm/Support/AArch64TargetParser.def18
-rw-r--r--contrib/llvm/include/llvm/Support/AArch64TargetParser.h124
-rw-r--r--contrib/llvm/include/llvm/Support/AMDGPUMetadata.h15
-rw-r--r--contrib/llvm/include/llvm/Support/ARMTargetParser.def15
-rw-r--r--contrib/llvm/include/llvm/Support/ARMTargetParser.h264
-rw-r--r--contrib/llvm/include/llvm/Support/ARMWinEH.h88
-rw-r--r--contrib/llvm/include/llvm/Support/Allocator.h55
-rw-r--r--contrib/llvm/include/llvm/Support/BinaryStreamArray.h31
-rw-r--r--contrib/llvm/include/llvm/Support/BinaryStreamReader.h5
-rw-r--r--contrib/llvm/include/llvm/Support/BuryPointer.h30
-rw-r--r--contrib/llvm/include/llvm/Support/CFGUpdate.h118
-rw-r--r--contrib/llvm/include/llvm/Support/Chrono.h8
-rw-r--r--contrib/llvm/include/llvm/Support/CodeGen.h7
-rw-r--r--contrib/llvm/include/llvm/Support/CommandLine.h19
-rw-r--r--contrib/llvm/include/llvm/Support/Compiler.h19
-rw-r--r--contrib/llvm/include/llvm/Support/Compression.h13
-rw-r--r--contrib/llvm/include/llvm/Support/Debug.h4
-rw-r--r--contrib/llvm/include/llvm/Support/DebugCounter.h2
-rw-r--r--contrib/llvm/include/llvm/Support/Error.h105
-rw-r--r--contrib/llvm/include/llvm/Support/ErrorHandling.h4
-rw-r--r--contrib/llvm/include/llvm/Support/FileCheck.h282
-rw-r--r--contrib/llvm/include/llvm/Support/FileOutputBuffer.h4
-rw-r--r--contrib/llvm/include/llvm/Support/FileSystem.h143
-rw-r--r--contrib/llvm/include/llvm/Support/FormatVariadicDetails.h2
-rw-r--r--contrib/llvm/include/llvm/Support/GenericDomTree.h58
-rw-r--r--contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h106
-rw-r--r--contrib/llvm/include/llvm/Support/GraphWriter.h23
-rw-r--r--contrib/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h93
-rw-r--r--contrib/llvm/include/llvm/Support/JSON.h9
-rw-r--r--contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h9
-rw-r--r--contrib/llvm/include/llvm/Support/MSVCErrorWorkarounds.h84
-rw-r--r--contrib/llvm/include/llvm/Support/Path.h16
-rw-r--r--contrib/llvm/include/llvm/Support/ScopedPrinter.h2
-rw-r--r--contrib/llvm/include/llvm/Support/SymbolRemappingReader.h133
-rw-r--r--contrib/llvm/include/llvm/Support/TargetOpcodes.def54
-rw-r--r--contrib/llvm/include/llvm/Support/TargetParser.h293
-rw-r--r--contrib/llvm/include/llvm/Support/Threading.h3
-rw-r--r--contrib/llvm/include/llvm/Support/Timer.h14
-rw-r--r--contrib/llvm/include/llvm/Support/VirtualFileSystem.h764
-rw-r--r--contrib/llvm/include/llvm/Support/Win64EH.h19
-rw-r--r--contrib/llvm/include/llvm/Support/WithColor.h63
-rw-r--r--contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h2
-rw-r--r--contrib/llvm/include/llvm/Support/X86TargetParser.def46
-rw-r--r--contrib/llvm/include/llvm/Support/YAMLTraits.h326
-rw-r--r--contrib/llvm/include/llvm/Support/raw_ostream.h12
-rw-r--r--contrib/llvm/include/llvm/Support/type_traits.h5
-rw-r--r--contrib/llvm/include/llvm/TableGen/StringMatcher.h7
-rw-r--r--contrib/llvm/include/llvm/Target/CodeGenCWrappers.h4
-rw-r--r--contrib/llvm/include/llvm/Target/GenericOpcodes.td119
-rw-r--r--contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td7
-rw-r--r--contrib/llvm/include/llvm/Target/Target.td13
-rw-r--r--contrib/llvm/include/llvm/Target/TargetInstrPredicate.td242
-rw-r--r--contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h22
-rw-r--r--contrib/llvm/include/llvm/Target/TargetMachine.h61
-rw-r--r--contrib/llvm/include/llvm/Target/TargetOptions.h12
-rw-r--r--contrib/llvm/include/llvm/Target/TargetPfmCounters.td50
-rw-r--r--contrib/llvm/include/llvm/Target/TargetSchedule.td66
-rw-r--r--contrib/llvm/include/llvm/Target/TargetSelectionDAG.td60
-rw-r--r--contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h9
-rw-r--r--contrib/llvm/include/llvm/TextAPI/ELF/ELFStub.h69
-rw-r--r--contrib/llvm/include/llvm/TextAPI/ELF/TBEHandler.h45
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO.h5
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h3
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h64
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h31
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h7
-rw-r--r--contrib/llvm/include/llvm/Transforms/Instrumentation.h33
-rw-r--r--contrib/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h31
-rw-r--r--contrib/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h48
-rw-r--r--contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h4
-rw-r--r--contrib/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h33
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar.h20
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h67
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/GVN.h17
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h38
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h27
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h74
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h47
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h14
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/Scalarizer.h35
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h38
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils.h7
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h67
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h13
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h32
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/Cloning.h66
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h23
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h3
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/GuardUtils.h30
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/Local.h74
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h5
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h450
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h18
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h2
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h25
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h23
-rw-r--r--contrib/llvm/include/llvm/Transforms/Vectorize.h4
-rw-r--r--contrib/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h27
-rw-r--r--contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h21
-rw-r--r--contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h11
-rw-r--r--contrib/llvm/include/llvm/XRay/BlockIndexer.h69
-rw-r--r--contrib/llvm/include/llvm/XRay/BlockPrinter.h62
-rw-r--r--contrib/llvm/include/llvm/XRay/BlockVerifier.h72
-rw-r--r--contrib/llvm/include/llvm/XRay/FDRLogBuilder.h41
-rw-r--r--contrib/llvm/include/llvm/XRay/FDRRecordConsumer.h55
-rw-r--r--contrib/llvm/include/llvm/XRay/FDRRecordProducer.h51
-rw-r--r--contrib/llvm/include/llvm/XRay/FDRRecords.h450
-rw-r--r--contrib/llvm/include/llvm/XRay/FDRTraceExpander.h63
-rw-r--r--contrib/llvm/include/llvm/XRay/FDRTraceWriter.h56
-rw-r--r--contrib/llvm/include/llvm/XRay/FileHeaderReader.h33
-rw-r--r--contrib/llvm/include/llvm/XRay/Profile.h150
-rw-r--r--contrib/llvm/include/llvm/XRay/RecordPrinter.h50
-rw-r--r--contrib/llvm/include/llvm/XRay/Trace.h22
-rw-r--r--contrib/llvm/include/llvm/XRay/XRayRecord.h23
-rw-r--r--contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h9
-rw-r--r--contrib/llvm/include/llvm/module.extern.modulemap5
-rw-r--r--contrib/llvm/include/llvm/module.install.modulemap27
-rw-r--r--contrib/llvm/include/llvm/module.modulemap82
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp179
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp86
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp204
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp327
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp16
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp10
-rw-r--r--contrib/llvm/lib/Analysis/CFG.cpp5
-rw-r--r--contrib/llvm/lib/Analysis/CFGPrinter.cpp14
-rw-r--r--contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp20
-rw-r--r--contrib/llvm/lib/Analysis/CFLGraph.h2
-rw-r--r--contrib/llvm/lib/Analysis/CGSCCPassManager.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/CallGraph.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp51
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp50
-rw-r--r--contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp28
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp317
-rw-r--r--contrib/llvm/lib/Analysis/DemandedBits.cpp195
-rw-r--r--contrib/llvm/lib/Analysis/DependenceAnalysis.cpp11
-rw-r--r--contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp579
-rw-r--r--contrib/llvm/lib/Analysis/EHPersonalities.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/GlobalsModRef.cpp40
-rw-r--r--contrib/llvm/lib/Analysis/GuardUtils.cpp21
-rw-r--r--contrib/llvm/lib/Analysis/IVDescriptors.cpp1089
-rw-r--r--contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp133
-rw-r--r--contrib/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp157
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp482
-rw-r--r--contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp21
-rw-r--r--contrib/llvm/lib/Analysis/LazyCallGraph.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp75
-rw-r--r--contrib/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp391
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp60
-rw-r--r--contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp135
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp32
-rw-r--r--contrib/llvm/lib/Analysis/MemDepPrinter.cpp5
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp100
-rw-r--r--contrib/llvm/lib/Analysis/MemoryLocation.cpp96
-rw-r--r--contrib/llvm/lib/Analysis/MemorySSA.cpp427
-rw-r--r--contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp544
-rw-r--r--contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp118
-rw-r--r--contrib/llvm/lib/Analysis/MustExecute.cpp255
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp152
-rw-r--r--contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/OrderedInstructions.cpp (renamed from contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp)2
-rw-r--r--contrib/llvm/lib/Analysis/PhiValues.cpp17
-rw-r--r--contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp45
-rw-r--r--contrib/llvm/lib/Analysis/RegionPass.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp480
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp14
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp33
-rw-r--r--contrib/llvm/lib/Analysis/StackSafetyAnalysis.cpp673
-rw-r--r--contrib/llvm/lib/Analysis/SyncDependenceAnalysis.cpp380
-rw-r--r--contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp34
-rw-r--r--contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp34
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp119
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp31
-rw-r--r--contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp42
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp649
-rw-r--r--contrib/llvm/lib/Analysis/VectorUtils.cpp530
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.cpp18
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.cpp465
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.h17
-rw-r--r--contrib/llvm/lib/AsmParser/LLToken.h7
-rw-r--r--contrib/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp324
-rw-r--r--contrib/llvm/lib/BinaryFormat/Dwarf.cpp40
-rw-r--r--contrib/llvm/lib/BinaryFormat/Magic.cpp2
-rw-r--r--contrib/llvm/lib/BinaryFormat/MsgPackReader.cpp255
-rw-r--r--contrib/llvm/lib/BinaryFormat/MsgPackTypes.cpp303
-rw-r--r--contrib/llvm/lib/BinaryFormat/MsgPackWriter.cpp209
-rw-r--r--contrib/llvm/lib/BinaryFormat/Wasm.cpp6
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp237
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp119
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h4
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/ValueList.cpp2
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp182
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h6
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp128
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp115
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp850
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h104
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp (renamed from contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp)90
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp202
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h50
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp806
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h180
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp33
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h5
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h69
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp194
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h35
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp39
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h13
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp97
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.h42
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h7
-rw-r--r--contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp158
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/BuiltinGCs.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp484
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp233
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadata.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/GCRootLowering.cpp133
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp370
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp231
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp82
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp306
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp40
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp411
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp113
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp479
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp797
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp36
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp53
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalMerge.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp24
-rw-r--r--contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp20
-rw-r--r--contrib/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp1359
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp56
-rw-r--r--contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp79
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp227
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h7
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp22
-rw-r--r--contrib/llvm/lib/CodeGen/LivePhysRegs.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp57
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.h9
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp227
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.cpp53
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp78
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCombiner.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp296
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp208
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp36
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp350
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/MachineOperand.cpp19
-rw-r--r--contrib/llvm/lib/CodeGen/MachineOutliner.cpp1101
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePipeliner.cpp818
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp268
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp92
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp164
-rw-r--r--contrib/llvm/lib/CodeGen/MacroFusion.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp34
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp124
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp767
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp69
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp47
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp246
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStack.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackColoring.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp279
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp100
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp40
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3150
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp48
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp42
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp425
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp412
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h32
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp221
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp696
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h13
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp50
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1046
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp592
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h3
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp40
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp167
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1926
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h17
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/StackMaps.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp51
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp22
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp269
-rw-r--r--contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/TargetPassConfig.cpp147
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp40
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp64
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp9
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp35
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp32
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp8
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp36
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp94
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp74
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp3
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp3
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp53
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp115
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp54
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp406
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp13
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp34
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp59
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp105
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp12
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp21
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp113
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp167
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp1
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp82
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp23
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp34
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp350
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp29
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp342
-rw-r--r--contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp30
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp22
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp7
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp42
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp10
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp7
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp7
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp7
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp7
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp12
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp31
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp53
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp464
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp25
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp14
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp17
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp55
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp32
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp45
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp53
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp68
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp11
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp20
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp47
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp19
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp55
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp23
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbol.cpp108
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp62
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp60
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp96
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp140
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp123
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp67
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp47
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp382
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp200
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp194
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp27
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp221
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp35
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp34
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp30
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp299
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp43
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp85
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDB.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp43
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp3
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp77
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp10
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp7
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp11
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp4
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp3
-rw-r--r--contrib/llvm/lib/Demangle/ItaniumDemangle.cpp5052
-rw-r--r--contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp2751
-rw-r--r--contrib/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp635
-rw-r--r--contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp32
-rw-r--r--contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp24
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp13
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp8
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp25
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h6
-rw-r--r--contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp24
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp488
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp1566
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp104
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp18
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp14
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp156
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp55
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp166
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp154
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp208
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp58
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp4
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp12
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp443
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp121
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h286
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h27
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp235
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp65
-rw-r--r--contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp13
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp85
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp251
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp29
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h3
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp6
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h11
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h15
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h6
-rw-r--r--contrib/llvm/lib/FuzzMutate/IRMutator.cpp1
-rw-r--r--contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp2
-rw-r--r--contrib/llvm/lib/IR/AsmWriter.cpp228
-rw-r--r--contrib/llvm/lib/IR/Attributes.cpp36
-rw-r--r--contrib/llvm/lib/IR/AutoUpgrade.cpp575
-rw-r--r--contrib/llvm/lib/IR/BasicBlock.cpp29
-rw-r--r--contrib/llvm/lib/IR/ConstantFold.cpp20
-rw-r--r--contrib/llvm/lib/IR/Constants.cpp88
-rw-r--r--contrib/llvm/lib/IR/ConstantsContext.h4
-rw-r--r--contrib/llvm/lib/IR/Core.cpp442
-rw-r--r--contrib/llvm/lib/IR/DIBuilder.cpp86
-rw-r--r--contrib/llvm/lib/IR/DataLayout.cpp24
-rw-r--r--contrib/llvm/lib/IR/DebugInfo.cpp113
-rw-r--r--contrib/llvm/lib/IR/DebugInfoMetadata.cpp225
-rw-r--r--contrib/llvm/lib/IR/DebugLoc.cpp17
-rw-r--r--contrib/llvm/lib/IR/DiagnosticInfo.cpp50
-rw-r--r--contrib/llvm/lib/IR/DomTreeUpdater.cpp29
-rw-r--r--contrib/llvm/lib/IR/Dominators.cpp199
-rw-r--r--contrib/llvm/lib/IR/Function.cpp41
-rw-r--r--contrib/llvm/lib/IR/Globals.cpp7
-rw-r--r--contrib/llvm/lib/IR/IRBuilder.cpp26
-rw-r--r--contrib/llvm/lib/IR/IRPrintingPasses.cpp3
-rw-r--r--contrib/llvm/lib/IR/Instruction.cpp56
-rw-r--r--contrib/llvm/lib/IR/Instructions.cpp722
-rw-r--r--contrib/llvm/lib/IR/IntrinsicInst.cpp16
-rw-r--r--contrib/llvm/lib/IR/LLVMContext.cpp1
-rw-r--r--contrib/llvm/lib/IR/LLVMContextImpl.h76
-rw-r--r--contrib/llvm/lib/IR/LegacyPassManager.cpp352
-rw-r--r--contrib/llvm/lib/IR/MDBuilder.cpp7
-rw-r--r--contrib/llvm/lib/IR/Metadata.cpp6
-rw-r--r--contrib/llvm/lib/IR/Module.cpp89
-rw-r--r--contrib/llvm/lib/IR/ModuleSummaryIndex.cpp165
-rw-r--r--contrib/llvm/lib/IR/PassInstrumentation.cpp22
-rw-r--r--contrib/llvm/lib/IR/PassTimingInfo.cpp268
-rw-r--r--contrib/llvm/lib/IR/SafepointIRVerifier.cpp6
-rw-r--r--contrib/llvm/lib/IR/Type.cpp41
-rw-r--r--contrib/llvm/lib/IR/Value.cpp47
-rw-r--r--contrib/llvm/lib/IR/Verifier.cpp777
-rw-r--r--contrib/llvm/lib/LTO/LTO.cpp122
-rw-r--r--contrib/llvm/lib/LTO/LTOBackend.cpp27
-rw-r--r--contrib/llvm/lib/LTO/LTOCodeGenerator.cpp1
-rw-r--r--contrib/llvm/lib/LTO/LTOModule.cpp2
-rw-r--r--contrib/llvm/lib/LTO/SummaryBasedOptimizations.cpp86
-rw-r--r--contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp198
-rw-r--r--contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp16
-rw-r--r--contrib/llvm/lib/Linker/IRMover.cpp32
-rw-r--r--contrib/llvm/lib/MC/ConstantPools.cpp6
-rw-r--r--contrib/llvm/lib/MC/ELFObjectWriter.cpp19
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoWasm.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCAsmStreamer.cpp86
-rw-r--r--contrib/llvm/lib/MC/MCAssembler.cpp1
-rw-r--r--contrib/llvm/lib/MC/MCCodeView.cpp71
-rw-r--r--contrib/llvm/lib/MC/MCContext.cpp7
-rw-r--r--contrib/llvm/lib/MC/MCDwarf.cpp43
-rw-r--r--contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCExpr.cpp6
-rw-r--r--contrib/llvm/lib/MC/MCFragment.cpp4
-rw-r--r--contrib/llvm/lib/MC/MCInst.cpp12
-rw-r--r--contrib/llvm/lib/MC/MCInstrAnalysis.cpp5
-rw-r--r--contrib/llvm/lib/MC/MCInstrDesc.cpp14
-rw-r--r--contrib/llvm/lib/MC/MCMachOStreamer.cpp20
-rw-r--r--contrib/llvm/lib/MC/MCNullStreamer.cpp4
-rw-r--r--contrib/llvm/lib/MC/MCObjectFileInfo.cpp226
-rw-r--r--contrib/llvm/lib/MC/MCObjectStreamer.cpp98
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmLexer.cpp43
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmParser.cpp164
-rw-r--r--contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp112
-rw-r--r--contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCParser/WasmAsmParser.cpp145
-rw-r--r--contrib/llvm/lib/MC/MCRegisterInfo.cpp5
-rw-r--r--contrib/llvm/lib/MC/MCSection.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCSectionELF.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCStreamer.cpp92
-rw-r--r--contrib/llvm/lib/MC/MCWasmStreamer.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCWin64EH.cpp352
-rw-r--r--contrib/llvm/lib/MC/MachObjectWriter.cpp27
-rw-r--r--contrib/llvm/lib/MC/WasmObjectWriter.cpp292
-rw-r--r--contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp101
-rw-r--r--contrib/llvm/lib/MCA/Context.cpp65
-rw-r--r--contrib/llvm/lib/MCA/HWEventListener.cpp (renamed from contrib/llvm/tools/llvm-mca/HWEventListener.cpp)4
-rw-r--r--contrib/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp (renamed from contrib/llvm/tools/llvm-mca/HardwareUnit.cpp)4
-rw-r--r--contrib/llvm/lib/MCA/HardwareUnits/LSUnit.cpp190
-rw-r--r--contrib/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp (renamed from contrib/llvm/tools/llvm-mca/RegisterFile.cpp)260
-rw-r--r--contrib/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp331
-rw-r--r--contrib/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp (renamed from contrib/llvm/tools/llvm-mca/RetireControlUnit.cpp)19
-rw-r--r--contrib/llvm/lib/MCA/HardwareUnits/Scheduler.cpp247
-rw-r--r--contrib/llvm/lib/MCA/InstrBuilder.cpp (renamed from contrib/llvm/tools/llvm-mca/InstrBuilder.cpp)447
-rw-r--r--contrib/llvm/lib/MCA/Instruction.cpp (renamed from contrib/llvm/tools/llvm-mca/Instruction.cpp)72
-rw-r--r--contrib/llvm/lib/MCA/Pipeline.cpp97
-rw-r--r--contrib/llvm/lib/MCA/Stages/DispatchStage.cpp (renamed from contrib/llvm/tools/llvm-mca/DispatchStage.cpp)126
-rw-r--r--contrib/llvm/lib/MCA/Stages/EntryStage.cpp76
-rw-r--r--contrib/llvm/lib/MCA/Stages/ExecuteStage.cpp225
-rw-r--r--contrib/llvm/lib/MCA/Stages/InstructionTables.cpp (renamed from contrib/llvm/tools/llvm-mca/InstructionTables.cpp)21
-rw-r--r--contrib/llvm/lib/MCA/Stages/RetireStage.cpp (renamed from contrib/llvm/tools/llvm-mca/RetireStage.cpp)31
-rw-r--r--contrib/llvm/lib/MCA/Stages/Stage.cpp (renamed from contrib/llvm/tools/llvm-mca/Stage.cpp)6
-rw-r--r--contrib/llvm/lib/MCA/Support.cpp (renamed from contrib/llvm/tools/llvm-mca/Support.cpp)25
-rw-r--r--contrib/llvm/lib/Object/ArchiveWriter.cpp188
-rw-r--r--contrib/llvm/lib/Object/Binary.cpp3
-rw-r--r--contrib/llvm/lib/Object/COFFObjectFile.cpp72
-rw-r--r--contrib/llvm/lib/Object/ELF.cpp26
-rw-r--r--contrib/llvm/lib/Object/ELFObjectFile.cpp67
-rw-r--r--contrib/llvm/lib/Object/Error.cpp1
-rw-r--r--contrib/llvm/lib/Object/MachOObjectFile.cpp8
-rw-r--r--contrib/llvm/lib/Object/ModuleSymbolTable.cpp1
-rw-r--r--contrib/llvm/lib/Object/Object.cpp8
-rw-r--r--contrib/llvm/lib/Object/ObjectFile.cpp8
-rw-r--r--contrib/llvm/lib/Object/WasmObjectFile.cpp310
-rw-r--r--contrib/llvm/lib/Object/WindowsResource.cpp7
-rw-r--r--contrib/llvm/lib/ObjectYAML/COFFYAML.cpp3
-rw-r--r--contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp2
-rw-r--r--contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp2
-rw-r--r--contrib/llvm/lib/ObjectYAML/ELFYAML.cpp12
-rw-r--r--contrib/llvm/lib/ObjectYAML/WasmYAML.cpp66
-rw-r--r--contrib/llvm/lib/OptRemarks/OptRemarksParser.cpp368
-rw-r--r--contrib/llvm/lib/Option/OptTable.cpp12
-rw-r--r--contrib/llvm/lib/Passes/PassBuilder.cpp512
-rw-r--r--contrib/llvm/lib/Passes/PassRegistry.def27
-rw-r--r--contrib/llvm/lib/Passes/StandardInstrumentations.cpp243
-rw-r--r--contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp31
-rw-r--r--contrib/llvm/lib/ProfileData/GCOV.cpp154
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProf.cpp11
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProfReader.cpp164
-rw-r--r--contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp2
-rw-r--r--contrib/llvm/lib/ProfileData/SampleProf.cpp10
-rw-r--r--contrib/llvm/lib/ProfileData/SampleProfReader.cpp167
-rw-r--r--contrib/llvm/lib/ProfileData/SampleProfWriter.cpp57
-rw-r--r--contrib/llvm/lib/Support/AArch64TargetParser.cpp206
-rw-r--r--contrib/llvm/lib/Support/APInt.cpp336
-rw-r--r--contrib/llvm/lib/Support/ARMTargetParser.cpp577
-rw-r--r--contrib/llvm/lib/Support/BinaryStreamError.cpp2
-rw-r--r--contrib/llvm/lib/Support/BuryPointer.cpp31
-rw-r--r--contrib/llvm/lib/Support/COM.cpp2
-rw-r--r--contrib/llvm/lib/Support/CachePruning.cpp47
-rw-r--r--contrib/llvm/lib/Support/CodeGenCoverage.cpp4
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp50
-rw-r--r--contrib/llvm/lib/Support/Compression.cpp28
-rw-r--r--contrib/llvm/lib/Support/DebugCounter.cpp37
-rw-r--r--contrib/llvm/lib/Support/Error.cpp47
-rw-r--r--contrib/llvm/lib/Support/FileCheck.cpp1446
-rw-r--r--contrib/llvm/lib/Support/FileOutputBuffer.cpp6
-rw-r--r--contrib/llvm/lib/Support/FoldingSet.cpp2
-rw-r--r--contrib/llvm/lib/Support/FormatVariadic.cpp2
-rw-r--r--contrib/llvm/lib/Support/Hashing.cpp4
-rw-r--r--contrib/llvm/lib/Support/Host.cpp167
-rw-r--r--contrib/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp322
-rw-r--r--contrib/llvm/lib/Support/JSON.cpp2
-rw-r--r--contrib/llvm/lib/Support/Locale.cpp13
-rw-r--r--contrib/llvm/lib/Support/LockFileManager.cpp10
-rw-r--r--contrib/llvm/lib/Support/Path.cpp139
-rw-r--r--contrib/llvm/lib/Support/Process.cpp10
-rw-r--r--contrib/llvm/lib/Support/RandomNumberGenerator.cpp2
-rw-r--r--contrib/llvm/lib/Support/Signals.cpp14
-rw-r--r--contrib/llvm/lib/Support/SourceMgr.cpp124
-rw-r--r--contrib/llvm/lib/Support/StringSaver.cpp3
-rw-r--r--contrib/llvm/lib/Support/SymbolRemappingReader.cpp81
-rw-r--r--contrib/llvm/lib/Support/TargetParser.cpp1022
-rw-r--r--contrib/llvm/lib/Support/TargetRegistry.cpp2
-rw-r--r--contrib/llvm/lib/Support/Timer.cpp17
-rw-r--r--contrib/llvm/lib/Support/Triple.cpp50
-rw-r--r--contrib/llvm/lib/Support/Unix/Path.inc187
-rw-r--r--contrib/llvm/lib/Support/Unix/Signals.inc5
-rw-r--r--contrib/llvm/lib/Support/VirtualFileSystem.cpp2070
-rw-r--r--contrib/llvm/lib/Support/Windows/Path.inc128
-rw-r--r--contrib/llvm/lib/Support/Windows/Process.inc133
-rw-r--r--contrib/llvm/lib/Support/Windows/Program.inc21
-rw-r--r--contrib/llvm/lib/Support/Windows/Threading.inc2
-rw-r--r--contrib/llvm/lib/Support/Windows/WindowsSupport.h51
-rw-r--r--contrib/llvm/lib/Support/WithColor.cpp63
-rw-r--r--contrib/llvm/lib/Support/YAMLTraits.cpp144
-rw-r--r--contrib/llvm/lib/Support/raw_ostream.cpp94
-rw-r--r--contrib/llvm/lib/TableGen/Main.cpp38
-rw-r--r--contrib/llvm/lib/TableGen/Record.cpp11
-rw-r--r--contrib/llvm/lib/TableGen/TGLexer.cpp557
-rw-r--r--contrib/llvm/lib/TableGen/TGLexer.h243
-rw-r--r--contrib/llvm/lib/TableGen/TGParser.h5
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.h8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.td242
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp29
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp282
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp130
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp61
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td18
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp162
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp69
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp43
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp904
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h11
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp48
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp714
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h25
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td398
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp1147
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h51
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td505
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp222
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp64
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp72
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h23
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp375
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td19
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp108
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp118
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h18
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td6
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td120
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td200
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td1004
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td157
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td423
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Schedule.td11
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp641
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp6
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp81
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h113
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td182
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp81
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp324
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp53
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp40
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h56
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp110
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp6
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp31
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp28
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h1
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp15
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h69
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp157
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h14
-rw-r--r--contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h27
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.h47
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.td134
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp71
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h31
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp30
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp24
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp106
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp458
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp9
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp24
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp63
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def78
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp601
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h109
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp360
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp323
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp23
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td101
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp135
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp16
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp20
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp55
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp247
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp34
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h196
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp95
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp58
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp182
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td356
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/DSInstructions.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td66
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp446
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp31
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp522
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h37
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp70
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600Instructions.td8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp9
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp181
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp101
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIDefines.h5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp385
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp231
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp164
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp19
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp2272
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h43
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp99
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp1504
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1112
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h116
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td161
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstructions.td159
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td32
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp1062
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp29
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp830
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp20
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp25
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp406
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp133
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp97
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp80
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp361
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SMInstructions.td177
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td96
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp352
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h204
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp75
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h24
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td47
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td279
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td173
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td124
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td117
-rw-r--r--contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td93
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp48
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp116
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp84
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallLowering.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp1022
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp32
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp66
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp672
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td31
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td172
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td145
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td20
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td89
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td66
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp94
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp72
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp37
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMacroFusion.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp414
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp41
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp136
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp15
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h22
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp18
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp20
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp36
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp65
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp8
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp13
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRInstrInfo.td59
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td20
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp10
-rw-r--r--contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp8
-rw-r--r--contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp2
-rw-r--r--contrib/llvm/lib/Target/BPF/BPF.h2
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp14
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp96
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp14
-rw-r--r--contrib/llvm/lib/Target/BPF/BTF.def33
-rw-r--r--contrib/llvm/lib/Target/BPF/BTF.h209
-rw-r--r--contrib/llvm/lib/Target/BPF/BTFDebug.cpp759
-rw-r--r--contrib/llvm/lib/Target/BPF/BTFDebug.h285
-rw-r--r--contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp2
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp18
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp3
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp74
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.h27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.td42
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp1
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp8
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp88
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp1
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp3
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h5
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h79
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td2974
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td4736
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td907
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td5240
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td3337
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h168
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp93
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp104
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp14
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp20
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp146
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp350
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp73
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td88
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td (renamed from contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td)34
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp238
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h24
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td1539
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td305
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td308
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp8
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp37
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td688
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td64
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td11
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV5.td (renamed from contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td)22
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td41
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h17
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp28
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp12
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp86
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h85
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp88
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h44
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp58
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h3
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp6
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp4
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp36
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h12
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp3
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp9
-rw-r--r--contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp580
-rw-r--r--contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp387
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp36
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h7
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp178
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp59
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp81
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h53
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp211
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp27
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h27
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.h2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.td18
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp37
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp127
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h8
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td422
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp45
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h16
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td1483
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp3
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td29
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp8
-rw-r--r--contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp208
-rw-r--r--contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp29
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp17
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp15
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp9
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp8
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h1
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp15
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td15
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td5
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td11
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td51
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp115
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp14
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp23
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h11
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td10
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td12
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td7
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp34
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp254
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCCState.cpp8
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp335
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallLowering.h44
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCondMov.td10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp99
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td32
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp115
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp46
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td29
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp92
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp26
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td28
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp20
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp24
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSchedule.td2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp18
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp4
-rw-r--r--contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp37
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h7
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.td5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp59
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp35
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp78
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td66
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp19
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp122
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp21
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h13
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp10
-rw-r--r--contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp66
-rw-r--r--contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h49
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp130
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h81
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h38
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp43
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h41
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp44
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp76
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h60
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp102
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h52
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp22
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2.h35
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2.td59
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp153
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td34
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp27
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h39
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp76
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp188
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h63
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td235
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp54
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h49
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td109
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp117
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp14
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h62
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp55
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h52
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td60
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2Schedule.td39
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp56
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h97
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp119
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h45
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp18
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h26
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h30
-rw-r--r--contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp165
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp211
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp37
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp194
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h109
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h60
-rw-r--r--contrib/llvm/lib/Target/PowerPC/P9InstrResources.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td18
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp42
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp139
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp491
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp1041
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h39
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td174
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td21
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp559
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h82
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td380
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td50
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td316
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td (renamed from contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td)11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp95
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h19
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h4
-rw-r--r--contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp688
-rw-r--r--contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp38
-rw-r--r--contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp33
-rw-r--r--contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp130
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h113
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp83
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h10
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp16
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h8
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCV.h6
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCV.td6
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp556
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp3
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp126
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp328
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h24
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td9
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td13
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td271
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td189
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td39
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td35
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td37
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td31
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp2
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td352
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp18
-rw-r--r--contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h (renamed from contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h)56
-rw-r--r--contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp79
-rw-r--r--contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h36
-rw-r--r--contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp55
-rw-r--r--contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp4
-rw-r--r--contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h2
-rwxr-xr-xcontrib/llvm/lib/Target/Sparc/LeonFeatures.td4
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp20
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h (renamed from contrib/llvm/lib/Target/Sparc/SparcTargetStreamer.h)5
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.td6
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp378
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h20
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td5
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td79
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.h4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp66
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp7
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp37
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp30
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp203
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td10
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td23
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td19
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td26
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperators.td86
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp44
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td7
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td12
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td80
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td85
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td66
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td64
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp10
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp15
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp489
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h16
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp3
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp16
-rw-r--r--contrib/llvm/lib/Target/TargetMachineC.cpp9
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp778
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp104
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp243
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h14
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp14
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h8
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp78
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp31
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h95
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp143
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h64
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp21
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/README.txt18
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssembly.h6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssembly.td12
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp87
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp112
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h9
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp238
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp693
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp43
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp46
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h38
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp87
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp7
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp116
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp218
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp161
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp442
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp94
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h29
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp611
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h25
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td448
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td71
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td104
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td116
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td54
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td131
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp17
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td121
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td46
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td67
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td794
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp79
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp123
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp287
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp84
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp164
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h26
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp (renamed from contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp)71
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp10
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp10
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp19
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp157
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp23
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h17
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp29
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp7
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h20
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp43
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp119
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp408
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp53
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp12
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h13
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp443
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h2
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h1
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp55
-rw-r--r--contrib/llvm/lib/Target/X86/ShadowCallStack.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp45
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h13
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h28
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td208
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp55
-rw-r--r--contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp33
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallLowering.cpp41
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallLowering.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td4
-rw-r--r--contrib/llvm/lib/Target/X86/X86CmovConversion.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp585
-rw-r--r--contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp156
-rw-r--r--contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp9
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp40
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp113
-rw-r--r--contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp47
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp1314
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp8589
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h117
-rw-r--r--contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp253
-rw-r--r--contrib/llvm/lib/Target/X86/X86Instr3DNow.td4
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrAVX512.td2047
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td214
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td642
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td14
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td20
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFMA.td38
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td3
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td163
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp535
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h29
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td222
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td10
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td1304
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td58
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td228
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrXOP.td156
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp337
-rw-r--r--contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h477
-rw-r--r--contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp40
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp192
-rw-r--r--contrib/llvm/lib/Target/X86/X86MacroFusion.cpp24
-rw-r--r--contrib/llvm/lib/Target/X86/X86MacroFusion.h5
-rw-r--r--contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp8
-rw-r--r--contrib/llvm/lib/Target/X86/X86PfmCounters.td275
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp27
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td23
-rw-r--r--contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp27
-rwxr-xr-xcontrib/llvm/lib/Target/X86/X86SchedBroadwell.td433
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedHaswell.td457
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedPredicates.td24
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td324
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td468
-rwxr-xr-xcontrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td498
-rw-r--r--contrib/llvm/lib/Target/X86/X86Schedule.td421
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleAtom.td85
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td1282
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td246
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleSLM.td36
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td144
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp58
-rw-r--r--contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h13
-rw-r--r--contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp481
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp30
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h51
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp52
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp876
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h12
-rw-r--r--contrib/llvm/lib/Target/X86/X86WinEHState.cpp6
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp4
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp5
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp12
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp5
-rw-r--r--contrib/llvm/lib/Testing/Support/SupportHelpers.cpp53
-rw-r--r--contrib/llvm/lib/TextAPI/ELF/ELFStub.cpp29
-rw-r--r--contrib/llvm/lib/TextAPI/ELF/TBEHandler.cpp161
-rw-r--r--contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp3
-rw-r--r--contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp109
-rw-r--r--contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h5
-rw-r--r--contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp134
-rw-r--r--contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp108
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp30
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp146
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp261
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp643
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp212
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp130
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp41
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp84
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SCCP.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp131
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp102
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp321
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp599
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp532
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp126
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp661
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h64
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp149
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp102
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp323
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp255
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp538
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp262
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp149
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h2
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp2074
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp404
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp366
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp50
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp834
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp58
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp81
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp97
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h74
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h2
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp33
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BDCE.cpp52
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp102
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp246
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp73
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp68
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp201
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp241
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp364
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp186
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp255
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp981
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp50
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp185
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp44
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp424
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp565
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp68
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp196
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp116
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp103
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp57
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp120
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp100
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp102
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp263
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp111
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp136
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp653
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp86
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp149
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp200
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp68
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp47
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp105
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp357
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Evaluator.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp64
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp423
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp29
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp92
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp143
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp1487
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp43
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp312
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp607
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SplitModule.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp108
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp186
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp1680
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h3
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp171
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VPlan.h216
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp468
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h20
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp2
-rw-r--r--contrib/llvm/lib/XRay/BlockIndexer.cpp98
-rw-r--r--contrib/llvm/lib/XRay/BlockPrinter.cpp114
-rw-r--r--contrib/llvm/lib/XRay/BlockVerifier.cpp205
-rw-r--r--contrib/llvm/lib/XRay/FDRRecordProducer.cpp198
-rw-r--r--contrib/llvm/lib/XRay/FDRRecords.cpp67
-rw-r--r--contrib/llvm/lib/XRay/FDRTraceExpander.cpp132
-rw-r--r--contrib/llvm/lib/XRay/FDRTraceWriter.cpp154
-rw-r--r--contrib/llvm/lib/XRay/FileHeaderReader.cpp70
-rw-r--r--contrib/llvm/lib/XRay/InstrumentationMap.cpp53
-rw-r--r--contrib/llvm/lib/XRay/LogBuilderConsumer.cpp38
-rw-r--r--contrib/llvm/lib/XRay/Profile.cpp403
-rw-r--r--contrib/llvm/lib/XRay/RecordInitializer.cpp418
-rw-r--r--contrib/llvm/lib/XRay/RecordPrinter.cpp109
-rw-r--r--contrib/llvm/lib/XRay/Trace.cpp736
-rw-r--r--contrib/llvm/tools/bugpoint/CrashDebugger.cpp100
-rw-r--r--contrib/llvm/tools/bugpoint/ExecutionDriver.cpp39
-rw-r--r--contrib/llvm/tools/bugpoint/OptimizerDriver.cpp4
-rw-r--r--contrib/llvm/tools/bugpoint/ToolRunner.cpp92
-rw-r--r--contrib/llvm/tools/bugpoint/ToolRunner.h14
-rw-r--r--contrib/llvm/tools/lli/lli.cpp270
-rw-r--r--contrib/llvm/tools/llvm-ar/llvm-ar.cpp263
-rw-r--r--contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp7
-rw-r--r--contrib/llvm/tools/llvm-cov/CodeCoverage.cpp60
-rw-r--r--contrib/llvm/tools/llvm-cov/CoverageExporter.h6
-rw-r--r--contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp458
-rw-r--r--contrib/llvm/tools/llvm-cov/CoverageExporterJson.h84
-rw-r--r--contrib/llvm/tools/llvm-cov/CoverageExporterLcov.cpp125
-rw-r--r--contrib/llvm/tools/llvm-cov/CoverageExporterLcov.h36
-rw-r--r--contrib/llvm/tools/llvm-cov/CoverageViewOptions.h3
-rw-r--r--contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp10
-rw-r--r--contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp2
-rw-r--r--contrib/llvm/tools/llvm-cov/TestingSupport.cpp2
-rw-r--r--contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp18
-rw-r--r--contrib/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp155
-rw-r--r--contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp32
-rw-r--r--contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp117
-rw-r--r--contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp75
-rw-r--r--contrib/llvm/tools/llvm-lto/llvm-lto.cpp18
-rw-r--r--contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp2
-rw-r--r--contrib/llvm/tools/llvm-mc/llvm-mc.cpp6
-rw-r--r--contrib/llvm/tools/llvm-mca/CodeRegion.cpp21
-rw-r--r--contrib/llvm/tools/llvm-mca/CodeRegion.h27
-rw-r--r--contrib/llvm/tools/llvm-mca/CodeRegionGenerator.cpp137
-rw-r--r--contrib/llvm/tools/llvm-mca/CodeRegionGenerator.h70
-rw-r--r--contrib/llvm/tools/llvm-mca/Context.cpp63
-rw-r--r--contrib/llvm/tools/llvm-mca/DispatchStatistics.cpp71
-rw-r--r--contrib/llvm/tools/llvm-mca/ExecuteStage.cpp210
-rw-r--r--contrib/llvm/tools/llvm-mca/ExecuteStage.h67
-rw-r--r--contrib/llvm/tools/llvm-mca/FetchStage.cpp46
-rw-r--r--contrib/llvm/tools/llvm-mca/FetchStage.h45
-rw-r--r--contrib/llvm/tools/llvm-mca/InstrBuilder.h85
-rw-r--r--contrib/llvm/tools/llvm-mca/LSUnit.cpp148
-rw-r--r--contrib/llvm/tools/llvm-mca/LSUnit.h147
-rw-r--r--contrib/llvm/tools/llvm-mca/Pipeline.cpp99
-rw-r--r--contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp6
-rw-r--r--contrib/llvm/tools/llvm-mca/PipelinePrinter.h6
-rw-r--r--contrib/llvm/tools/llvm-mca/RegisterFileStatistics.cpp107
-rw-r--r--contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.cpp49
-rw-r--r--contrib/llvm/tools/llvm-mca/Scheduler.cpp403
-rw-r--r--contrib/llvm/tools/llvm-mca/Scheduler.h515
-rw-r--r--contrib/llvm/tools/llvm-mca/SchedulerStatistics.cpp94
-rw-r--r--contrib/llvm/tools/llvm-mca/SourceMgr.h63
-rw-r--r--contrib/llvm/tools/llvm-mca/Support.h58
-rw-r--r--contrib/llvm/tools/llvm-mca/TimelineView.cpp240
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp86
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.h (renamed from contrib/llvm/tools/llvm-mca/DispatchStatistics.h)6
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp (renamed from contrib/llvm/tools/llvm-mca/InstructionInfoView.cpp)10
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.h (renamed from contrib/llvm/tools/llvm-mca/InstructionInfoView.h)13
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp168
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h (renamed from contrib/llvm/tools/llvm-mca/RegisterFileStatistics.h)28
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp (renamed from contrib/llvm/tools/llvm-mca/ResourcePressureView.cpp)38
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.h (renamed from contrib/llvm/tools/llvm-mca/ResourcePressureView.h)33
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp91
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h (renamed from contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.h)35
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp183
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.h (renamed from contrib/llvm/tools/llvm-mca/SchedulerStatistics.h)61
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/SummaryView.cpp (renamed from contrib/llvm/tools/llvm-mca/SummaryView.cpp)40
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/SummaryView.h (renamed from contrib/llvm/tools/llvm-mca/SummaryView.h)11
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/TimelineView.cpp294
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/TimelineView.h (renamed from contrib/llvm/tools/llvm-mca/TimelineView.h)34
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/View.cpp (renamed from contrib/llvm/tools/llvm-mca/View.cpp)4
-rw-r--r--contrib/llvm/tools/llvm-mca/Views/View.h (renamed from contrib/llvm/tools/llvm-mca/View.h)4
-rw-r--r--contrib/llvm/tools/llvm-mca/llvm-mca.cpp252
-rw-r--r--contrib/llvm/tools/llvm-nm/llvm-nm.cpp103
-rw-r--r--contrib/llvm/tools/llvm-objcopy/Buffer.cpp51
-rw-r--r--contrib/llvm/tools/llvm-objcopy/Buffer.h66
-rw-r--r--contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp98
-rw-r--r--contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h31
-rw-r--r--contrib/llvm/tools/llvm-objcopy/COFF/Object.cpp70
-rw-r--r--contrib/llvm/tools/llvm-objcopy/COFF/Object.h148
-rw-r--r--contrib/llvm/tools/llvm-objcopy/COFF/Reader.cpp171
-rw-r--r--contrib/llvm/tools/llvm-objcopy/COFF/Reader.h43
-rw-r--r--contrib/llvm/tools/llvm-objcopy/COFF/Writer.cpp337
-rw-r--r--contrib/llvm/tools/llvm-objcopy/COFF/Writer.h61
-rw-r--r--contrib/llvm/tools/llvm-objcopy/CopyConfig.cpp474
-rw-r--r--contrib/llvm/tools/llvm-objcopy/CopyConfig.h119
-rw-r--r--contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp584
-rw-r--r--contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h34
-rw-r--r--contrib/llvm/tools/llvm-objcopy/ELF/Object.cpp (renamed from contrib/llvm/tools/llvm-objcopy/Object.cpp)514
-rw-r--r--contrib/llvm/tools/llvm-objcopy/ELF/Object.h (renamed from contrib/llvm/tools/llvm-objcopy/Object.h)231
-rw-r--r--contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td243
-rw-r--r--contrib/llvm/tools/llvm-objcopy/StripOpts.td86
-rw-r--r--contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp682
-rw-r--r--contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h2
-rw-r--r--contrib/llvm/tools/llvm-objdump/COFFDump.cpp29
-rw-r--r--contrib/llvm/tools/llvm-objdump/ELFDump.cpp30
-rw-r--r--contrib/llvm/tools/llvm-objdump/MachODump.cpp458
-rw-r--r--contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp786
-rw-r--r--contrib/llvm/tools/llvm-objdump/llvm-objdump.h58
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/Analyze.cpp148
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/Analyze.h30
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp339
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h6
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/InputFile.cpp17
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/InputFile.h2
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp30
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h7
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp38
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h9
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp6
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PdbYaml.h6
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp7
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp11
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h1
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp12
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp12
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp178
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h6
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp6
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp45
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h1
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp191
-rw-r--r--contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h8
-rw-r--r--contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp182
-rw-r--r--contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp445
-rw-r--r--contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h51
-rw-r--r--contrib/llvm/tools/llvm-readobj/COFFDumper.cpp95
-rw-r--r--contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h13
-rw-r--r--contrib/llvm/tools/llvm-readobj/ELFDumper.cpp667
-rw-r--r--contrib/llvm/tools/llvm-readobj/MachODumper.cpp10
-rw-r--r--contrib/llvm/tools/llvm-readobj/ObjDumper.h2
-rw-r--r--contrib/llvm/tools/llvm-readobj/WasmDumper.cpp41
-rw-r--r--contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp212
-rw-r--r--contrib/llvm/tools/llvm-readobj/llvm-readobj.h11
-rw-r--r--contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp25
-rw-r--r--contrib/llvm/tools/llvm-stress/llvm-stress.cpp4
-rw-r--r--contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp110
-rw-r--r--contrib/llvm/tools/llvm-xray/xray-account.cpp138
-rw-r--r--contrib/llvm/tools/llvm-xray/xray-converter.cpp130
-rw-r--r--contrib/llvm/tools/llvm-xray/xray-fdr-dump.cpp119
-rw-r--r--contrib/llvm/tools/llvm-xray/xray-graph.cpp4
-rw-r--r--contrib/llvm/tools/llvm-xray/xray-stacks.cpp5
-rw-r--r--contrib/llvm/tools/opt/Debugify.cpp11
-rw-r--r--contrib/llvm/tools/opt/NewPMDriver.cpp125
-rw-r--r--contrib/llvm/tools/opt/opt.cpp8
-rw-r--r--contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp7
-rw-r--r--contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp23
-rw-r--r--contrib/llvm/utils/TableGen/CTagsEmitter.cpp2
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp240
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h111
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenInstruction.cpp99
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenInstruction.h13
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenIntrinsics.h3
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenRegisters.cpp78
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenRegisters.h2
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenSchedule.cpp327
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenSchedule.h175
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenTarget.cpp14
-rw-r--r--contrib/llvm/utils/TableGen/DAGISelMatcher.cpp14
-rw-r--r--contrib/llvm/utils/TableGen/DAGISelMatcher.h6
-rw-r--r--contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp90
-rw-r--r--contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp91
-rw-r--r--contrib/llvm/utils/TableGen/ExegesisEmitter.cpp216
-rw-r--r--contrib/llvm/utils/TableGen/FastISelEmitter.cpp21
-rw-r--r--contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp146
-rw-r--r--contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp85
-rw-r--r--contrib/llvm/utils/TableGen/InfoByHwMode.cpp4
-rw-r--r--contrib/llvm/utils/TableGen/InfoByHwMode.h4
-rw-r--r--contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp2
-rw-r--r--contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp101
-rw-r--r--contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp11
-rw-r--r--contrib/llvm/utils/TableGen/PredicateExpander.cpp368
-rw-r--r--contrib/llvm/utils/TableGen/PredicateExpander.h100
-rw-r--r--contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp68
-rw-r--r--contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp29
-rw-r--r--contrib/llvm/utils/TableGen/SubtargetEmitter.cpp416
-rw-r--r--contrib/llvm/utils/TableGen/TableGen.cpp8
-rw-r--r--contrib/llvm/utils/TableGen/TableGenBackends.h1
-rw-r--r--contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp69
-rw-r--r--contrib/llvm/utils/TableGen/X86ModRMFilters.h6
-rw-r--r--contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp4
2298 files changed, 190907 insertions, 94214 deletions
diff --git a/contrib/llvm/FREEBSD-Xlist b/contrib/llvm/FREEBSD-Xlist
index 9fbda5f30544..f694223a183f 100644
--- a/contrib/llvm/FREEBSD-Xlist
+++ b/contrib/llvm/FREEBSD-Xlist
@@ -2,17 +2,15 @@
.arcconfig
.clang-format
.clang-tidy
+.gitattributes
.gitignore
CMakeLists.txt
CODE_OWNERS.TXT
CREDITS.TXT
LLVMBuild.txt
-Makefile
-Makefile.common
-Makefile.config.in
-Makefile.rules
README.txt
-autoconf/
+RELEASE_TESTERS.TXT
+benchmarks/
bindings/
cmake/
configure
@@ -21,473 +19,485 @@ examples/
include/llvm/CMakeLists.txt
include/llvm/Config/
include/llvm/IR/CMakeLists.txt
-include/llvm/Support/DataTypes.h.cmake
+include/llvm/Support/CMakeLists.txt
include/llvm/Support/LICENSE.TXT
lib/Analysis/CMakeLists.txt
-lib/Analysis/IPA/CMakeLists.txt
-lib/Analysis/IPA/LLVMBuild.txt
-lib/Analysis/IPA/Makefile
lib/Analysis/LLVMBuild.txt
-lib/Analysis/Makefile
lib/Analysis/README.txt
lib/AsmParser/CMakeLists.txt
lib/AsmParser/LLVMBuild.txt
-lib/AsmParser/Makefile
+lib/BinaryFormat/CMakeLists.txt
+lib/BinaryFormat/LLVMBuild.txt
lib/Bitcode/CMakeLists.txt
lib/Bitcode/LLVMBuild.txt
-lib/Bitcode/Makefile
lib/Bitcode/Reader/CMakeLists.txt
lib/Bitcode/Reader/LLVMBuild.txt
-lib/Bitcode/Reader/Makefile
lib/Bitcode/Writer/CMakeLists.txt
lib/Bitcode/Writer/LLVMBuild.txt
-lib/Bitcode/Writer/Makefile
lib/CMakeLists.txt
lib/CodeGen/AsmPrinter/CMakeLists.txt
lib/CodeGen/AsmPrinter/LLVMBuild.txt
-lib/CodeGen/AsmPrinter/Makefile
lib/CodeGen/CMakeLists.txt
+lib/CodeGen/GlobalISel/CMakeLists.txt
+lib/CodeGen/GlobalISel/LLVMBuild.txt
lib/CodeGen/LLVMBuild.txt
-lib/CodeGen/Makefile
+lib/CodeGen/MIRParser/CMakeLists.txt
+lib/CodeGen/MIRParser/LLVMBuild.txt
lib/CodeGen/README.txt
lib/CodeGen/SelectionDAG/CMakeLists.txt
lib/CodeGen/SelectionDAG/LLVMBuild.txt
-lib/CodeGen/SelectionDAG/Makefile
lib/DebugInfo/CMakeLists.txt
+lib/DebugInfo/CodeView/CMakeLists.txt
+lib/DebugInfo/CodeView/LLVMBuild.txt
lib/DebugInfo/DWARF/CMakeLists.txt
lib/DebugInfo/DWARF/LLVMBuild.txt
-lib/DebugInfo/DWARF/Makefile
lib/DebugInfo/LLVMBuild.txt
-lib/DebugInfo/Makefile
+lib/DebugInfo/MSF/CMakeLists.txt
+lib/DebugInfo/MSF/LLVMBuild.txt
lib/DebugInfo/PDB/CMakeLists.txt
lib/DebugInfo/PDB/LLVMBuild.txt
-lib/DebugInfo/PDB/Makefile
+lib/DebugInfo/Symbolize/CMakeLists.txt
+lib/DebugInfo/Symbolize/LLVMBuild.txt
+lib/Demangle/CMakeLists.txt
+lib/Demangle/LLVMBuild.txt
lib/ExecutionEngine/CMakeLists.txt
lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
-lib/ExecutionEngine/IntelJITEvents/Makefile
lib/ExecutionEngine/Interpreter/CMakeLists.txt
lib/ExecutionEngine/Interpreter/LLVMBuild.txt
-lib/ExecutionEngine/Interpreter/Makefile
lib/ExecutionEngine/LLVMBuild.txt
lib/ExecutionEngine/MCJIT/CMakeLists.txt
lib/ExecutionEngine/MCJIT/LLVMBuild.txt
-lib/ExecutionEngine/MCJIT/Makefile
-lib/ExecutionEngine/Makefile
lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
-lib/ExecutionEngine/OProfileJIT/Makefile
lib/ExecutionEngine/Orc/CMakeLists.txt
lib/ExecutionEngine/Orc/LLVMBuild.txt
-lib/ExecutionEngine/Orc/Makefile
+lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
+lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
-lib/ExecutionEngine/RuntimeDyld/Makefile
+lib/FuzzMutate/CMakeLists.txt
+lib/FuzzMutate/LLVMBuild.txt
lib/Fuzzer/
lib/IR/CMakeLists.txt
lib/IR/LLVMBuild.txt
-lib/IR/Makefile
lib/IRReader/CMakeLists.txt
lib/IRReader/LLVMBuild.txt
-lib/IRReader/Makefile
lib/LLVMBuild.txt
lib/LTO/CMakeLists.txt
lib/LTO/LLVMBuild.txt
-lib/LTO/Makefile
lib/LineEditor/CMakeLists.txt
lib/LineEditor/LLVMBuild.txt
-lib/LineEditor/Makefile
lib/Linker/CMakeLists.txt
lib/Linker/LLVMBuild.txt
-lib/Linker/Makefile
lib/MC/CMakeLists.txt
lib/MC/LLVMBuild.txt
lib/MC/MCDisassembler/CMakeLists.txt
lib/MC/MCDisassembler/LLVMBuild.txt
-lib/MC/MCDisassembler/Makefile
lib/MC/MCParser/CMakeLists.txt
lib/MC/MCParser/LLVMBuild.txt
-lib/MC/MCParser/Makefile
-lib/MC/Makefile
-lib/Makefile
+lib/MCA/CMakeLists.txt
+lib/MCA/LLVMBuild.txt
lib/Object/CMakeLists.txt
lib/Object/LLVMBuild.txt
-lib/Object/Makefile
+lib/ObjectYAML/CMakeLists.txt
+lib/ObjectYAML/LLVMBuild.txt
+lib/OptRemarks/CMakeLists.txt
+lib/OptRemarks/LLVMBuild.txt
lib/Option/CMakeLists.txt
lib/Option/LLVMBuild.txt
-lib/Option/Makefile
lib/Passes/CMakeLists.txt
lib/Passes/LLVMBuild.txt
-lib/Passes/Makefile
lib/ProfileData/CMakeLists.txt
+lib/ProfileData/Coverage/CMakeLists.txt
+lib/ProfileData/Coverage/LLVMBuild.txt
lib/ProfileData/LLVMBuild.txt
-lib/ProfileData/Makefile
lib/Support/CMakeLists.txt
lib/Support/LLVMBuild.txt
-lib/Support/Makefile
lib/Support/README.txt.system
lib/TableGen/CMakeLists.txt
lib/TableGen/LLVMBuild.txt
-lib/TableGen/Makefile
lib/Target/AArch64/AsmParser/CMakeLists.txt
lib/Target/AArch64/AsmParser/LLVMBuild.txt
-lib/Target/AArch64/AsmParser/Makefile
lib/Target/AArch64/CMakeLists.txt
lib/Target/AArch64/Disassembler/CMakeLists.txt
lib/Target/AArch64/Disassembler/LLVMBuild.txt
-lib/Target/AArch64/Disassembler/Makefile
lib/Target/AArch64/InstPrinter/CMakeLists.txt
lib/Target/AArch64/InstPrinter/LLVMBuild.txt
-lib/Target/AArch64/InstPrinter/Makefile
lib/Target/AArch64/LLVMBuild.txt
lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
-lib/Target/AArch64/MCTargetDesc/Makefile
-lib/Target/AArch64/Makefile
lib/Target/AArch64/TargetInfo/CMakeLists.txt
lib/Target/AArch64/TargetInfo/LLVMBuild.txt
-lib/Target/AArch64/TargetInfo/Makefile
lib/Target/AArch64/Utils/CMakeLists.txt
lib/Target/AArch64/Utils/LLVMBuild.txt
-lib/Target/AArch64/Utils/Makefile
+lib/Target/AMDGPU/AsmParser/CMakeLists.txt
+lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
+lib/Target/AMDGPU/CMakeLists.txt
+lib/Target/AMDGPU/Disassembler/CMakeLists.txt
+lib/Target/AMDGPU/Disassembler/LLVMBuild.txt
+lib/Target/AMDGPU/InstPrinter/CMakeLists.txt
+lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt
+lib/Target/AMDGPU/LLVMBuild.txt
+lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
+lib/Target/AMDGPU/TargetInfo/CMakeLists.txt
+lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt
+lib/Target/AMDGPU/Utils/CMakeLists.txt
+lib/Target/AMDGPU/Utils/LLVMBuild.txt
+lib/Target/ARC/CMakeLists.txt
+lib/Target/ARC/Disassembler/CMakeLists.txt
+lib/Target/ARC/Disassembler/LLVMBuild.txt
+lib/Target/ARC/InstPrinter/CMakeLists.txt
+lib/Target/ARC/InstPrinter/LLVMBuild.txt
+lib/Target/ARC/LLVMBuild.txt
+lib/Target/ARC/MCTargetDesc/CMakeLists.txt
+lib/Target/ARC/MCTargetDesc/LLVMBuild.txt
+lib/Target/ARC/TargetInfo/CMakeLists.txt
+lib/Target/ARC/TargetInfo/LLVMBuild.txt
lib/Target/ARM/AsmParser/CMakeLists.txt
lib/Target/ARM/AsmParser/LLVMBuild.txt
-lib/Target/ARM/AsmParser/Makefile
lib/Target/ARM/CMakeLists.txt
lib/Target/ARM/Disassembler/CMakeLists.txt
lib/Target/ARM/Disassembler/LLVMBuild.txt
-lib/Target/ARM/Disassembler/Makefile
lib/Target/ARM/InstPrinter/CMakeLists.txt
lib/Target/ARM/InstPrinter/LLVMBuild.txt
-lib/Target/ARM/InstPrinter/Makefile
lib/Target/ARM/LLVMBuild.txt
lib/Target/ARM/MCTargetDesc/CMakeLists.txt
lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
-lib/Target/ARM/MCTargetDesc/Makefile
-lib/Target/ARM/Makefile
lib/Target/ARM/README-Thumb.txt
lib/Target/ARM/README-Thumb2.txt
lib/Target/ARM/README.txt
lib/Target/ARM/TargetInfo/CMakeLists.txt
lib/Target/ARM/TargetInfo/LLVMBuild.txt
-lib/Target/ARM/TargetInfo/Makefile
+lib/Target/ARM/Utils/CMakeLists.txt
+lib/Target/ARM/Utils/LLVMBuild.txt
+lib/Target/AVR/AsmParser/CMakeLists.txt
+lib/Target/AVR/AsmParser/LLVMBuild.txt
+lib/Target/AVR/CMakeLists.txt
+lib/Target/AVR/Disassembler/CMakeLists.txt
+lib/Target/AVR/Disassembler/LLVMBuild.txt
+lib/Target/AVR/InstPrinter/CMakeLists.txt
+lib/Target/AVR/InstPrinter/LLVMBuild.txt
+lib/Target/AVR/LLVMBuild.txt
+lib/Target/AVR/MCTargetDesc/CMakeLists.txt
+lib/Target/AVR/MCTargetDesc/LLVMBuild.txt
+lib/Target/AVR/TargetInfo/CMakeLists.txt
+lib/Target/AVR/TargetInfo/LLVMBuild.txt
+lib/Target/BPF/AsmParser/CMakeLists.txt
+lib/Target/BPF/AsmParser/LLVMBuild.txt
lib/Target/BPF/CMakeLists.txt
+lib/Target/BPF/Disassembler/CMakeLists.txt
+lib/Target/BPF/Disassembler/LLVMBuild.txt
lib/Target/BPF/InstPrinter/CMakeLists.txt
lib/Target/BPF/InstPrinter/LLVMBuild.txt
-lib/Target/BPF/InstPrinter/Makefile
lib/Target/BPF/LLVMBuild.txt
lib/Target/BPF/MCTargetDesc/CMakeLists.txt
lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
-lib/Target/BPF/MCTargetDesc/Makefile
-lib/Target/BPF/Makefile
lib/Target/BPF/TargetInfo/CMakeLists.txt
lib/Target/BPF/TargetInfo/LLVMBuild.txt
-lib/Target/BPF/TargetInfo/Makefile
lib/Target/CMakeLists.txt
-lib/Target/CppBackend/CMakeLists.txt
-lib/Target/CppBackend/LLVMBuild.txt
-lib/Target/CppBackend/Makefile
-lib/Target/CppBackend/TargetInfo/CMakeLists.txt
-lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
-lib/Target/CppBackend/TargetInfo/Makefile
+lib/Target/Hexagon/AsmParser/CMakeLists.txt
+lib/Target/Hexagon/AsmParser/LLVMBuild.txt
lib/Target/Hexagon/CMakeLists.txt
lib/Target/Hexagon/Disassembler/CMakeLists.txt
lib/Target/Hexagon/Disassembler/LLVMBuild.txt
-lib/Target/Hexagon/Disassembler/Makefile
lib/Target/Hexagon/LLVMBuild.txt
lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
-lib/Target/Hexagon/MCTargetDesc/Makefile
-lib/Target/Hexagon/Makefile
lib/Target/Hexagon/TargetInfo/CMakeLists.txt
lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
-lib/Target/Hexagon/TargetInfo/Makefile
lib/Target/LLVMBuild.txt
+lib/Target/Lanai/AsmParser/CMakeLists.txt
+lib/Target/Lanai/AsmParser/LLVMBuild.txt
+lib/Target/Lanai/CMakeLists.txt
+lib/Target/Lanai/Disassembler/CMakeLists.txt
+lib/Target/Lanai/Disassembler/LLVMBuild.txt
+lib/Target/Lanai/InstPrinter/CMakeLists.txt
+lib/Target/Lanai/InstPrinter/LLVMBuild.txt
+lib/Target/Lanai/LLVMBuild.txt
+lib/Target/Lanai/MCTargetDesc/CMakeLists.txt
+lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt
+lib/Target/Lanai/TargetInfo/CMakeLists.txt
+lib/Target/Lanai/TargetInfo/LLVMBuild.txt
+lib/Target/MSP430/AsmParser/CMakeLists.txt
+lib/Target/MSP430/AsmParser/LLVMBuild.txt
lib/Target/MSP430/CMakeLists.txt
+lib/Target/MSP430/Disassembler/CMakeLists.txt
+lib/Target/MSP430/Disassembler/LLVMBuild.txt
lib/Target/MSP430/InstPrinter/CMakeLists.txt
lib/Target/MSP430/InstPrinter/LLVMBuild.txt
-lib/Target/MSP430/InstPrinter/Makefile
lib/Target/MSP430/LLVMBuild.txt
lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
-lib/Target/MSP430/MCTargetDesc/Makefile
-lib/Target/MSP430/Makefile
lib/Target/MSP430/README.txt
lib/Target/MSP430/TargetInfo/CMakeLists.txt
lib/Target/MSP430/TargetInfo/LLVMBuild.txt
-lib/Target/MSP430/TargetInfo/Makefile
-lib/Target/Makefile
lib/Target/Mips/AsmParser/CMakeLists.txt
lib/Target/Mips/AsmParser/LLVMBuild.txt
-lib/Target/Mips/AsmParser/Makefile
lib/Target/Mips/CMakeLists.txt
lib/Target/Mips/Disassembler/CMakeLists.txt
lib/Target/Mips/Disassembler/LLVMBuild.txt
-lib/Target/Mips/Disassembler/Makefile
lib/Target/Mips/InstPrinter/CMakeLists.txt
lib/Target/Mips/InstPrinter/LLVMBuild.txt
-lib/Target/Mips/InstPrinter/Makefile
lib/Target/Mips/LLVMBuild.txt
lib/Target/Mips/MCTargetDesc/CMakeLists.txt
lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
-lib/Target/Mips/MCTargetDesc/Makefile
-lib/Target/Mips/Makefile
lib/Target/Mips/TargetInfo/CMakeLists.txt
lib/Target/Mips/TargetInfo/LLVMBuild.txt
-lib/Target/Mips/TargetInfo/Makefile
lib/Target/NVPTX/CMakeLists.txt
lib/Target/NVPTX/InstPrinter/CMakeLists.txt
lib/Target/NVPTX/InstPrinter/LLVMBuild.txt
-lib/Target/NVPTX/InstPrinter/Makefile
lib/Target/NVPTX/LLVMBuild.txt
lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt
-lib/Target/NVPTX/MCTargetDesc/Makefile
-lib/Target/NVPTX/Makefile
lib/Target/NVPTX/TargetInfo/CMakeLists.txt
lib/Target/NVPTX/TargetInfo/LLVMBuild.txt
-lib/Target/NVPTX/TargetInfo/Makefile
lib/Target/PowerPC/AsmParser/CMakeLists.txt
lib/Target/PowerPC/AsmParser/LLVMBuild.txt
-lib/Target/PowerPC/AsmParser/Makefile
lib/Target/PowerPC/CMakeLists.txt
lib/Target/PowerPC/Disassembler/CMakeLists.txt
lib/Target/PowerPC/Disassembler/LLVMBuild.txt
-lib/Target/PowerPC/Disassembler/Makefile
lib/Target/PowerPC/InstPrinter/CMakeLists.txt
lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
-lib/Target/PowerPC/InstPrinter/Makefile
lib/Target/PowerPC/LLVMBuild.txt
lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
-lib/Target/PowerPC/MCTargetDesc/Makefile
-lib/Target/PowerPC/Makefile
lib/Target/PowerPC/README.txt
lib/Target/PowerPC/README_ALTIVEC.txt
lib/Target/PowerPC/TargetInfo/CMakeLists.txt
lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
-lib/Target/PowerPC/TargetInfo/Makefile
-lib/Target/R600/AsmParser/CMakeLists.txt
-lib/Target/R600/AsmParser/LLVMBuild.txt
-lib/Target/R600/AsmParser/Makefile
-lib/Target/R600/CMakeLists.txt
-lib/Target/R600/InstPrinter/CMakeLists.txt
-lib/Target/R600/InstPrinter/LLVMBuild.txt
-lib/Target/R600/InstPrinter/Makefile
-lib/Target/R600/LLVMBuild.txt
-lib/Target/R600/MCTargetDesc/CMakeLists.txt
-lib/Target/R600/MCTargetDesc/LLVMBuild.txt
-lib/Target/R600/MCTargetDesc/Makefile
-lib/Target/R600/Makefile
-lib/Target/R600/TargetInfo/CMakeLists.txt
-lib/Target/R600/TargetInfo/LLVMBuild.txt
-lib/Target/R600/TargetInfo/Makefile
lib/Target/README.txt
+lib/Target/RISCV/AsmParser/CMakeLists.txt
+lib/Target/RISCV/AsmParser/LLVMBuild.txt
+lib/Target/RISCV/CMakeLists.txt
+lib/Target/RISCV/Disassembler/CMakeLists.txt
+lib/Target/RISCV/Disassembler/LLVMBuild.txt
+lib/Target/RISCV/InstPrinter/CMakeLists.txt
+lib/Target/RISCV/InstPrinter/LLVMBuild.txt
+lib/Target/RISCV/LLVMBuild.txt
+lib/Target/RISCV/MCTargetDesc/CMakeLists.txt
+lib/Target/RISCV/MCTargetDesc/LLVMBuild.txt
+lib/Target/RISCV/TargetInfo/CMakeLists.txt
+lib/Target/RISCV/TargetInfo/LLVMBuild.txt
+lib/Target/RISCV/Utils/CMakeLists.txt
+lib/Target/RISCV/Utils/LLVMBuild.txt
lib/Target/Sparc/AsmParser/CMakeLists.txt
lib/Target/Sparc/AsmParser/LLVMBuild.txt
-lib/Target/Sparc/AsmParser/Makefile
lib/Target/Sparc/CMakeLists.txt
lib/Target/Sparc/Disassembler/CMakeLists.txt
lib/Target/Sparc/Disassembler/LLVMBuild.txt
-lib/Target/Sparc/Disassembler/Makefile
lib/Target/Sparc/InstPrinter/CMakeLists.txt
lib/Target/Sparc/InstPrinter/LLVMBuild.txt
-lib/Target/Sparc/InstPrinter/Makefile
lib/Target/Sparc/LLVMBuild.txt
lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
-lib/Target/Sparc/MCTargetDesc/Makefile
-lib/Target/Sparc/Makefile
lib/Target/Sparc/README.txt
lib/Target/Sparc/TargetInfo/CMakeLists.txt
lib/Target/Sparc/TargetInfo/LLVMBuild.txt
-lib/Target/Sparc/TargetInfo/Makefile
lib/Target/SystemZ/AsmParser/CMakeLists.txt
lib/Target/SystemZ/AsmParser/LLVMBuild.txt
-lib/Target/SystemZ/AsmParser/Makefile
lib/Target/SystemZ/CMakeLists.txt
lib/Target/SystemZ/Disassembler/CMakeLists.txt
lib/Target/SystemZ/Disassembler/LLVMBuild.txt
-lib/Target/SystemZ/Disassembler/Makefile
lib/Target/SystemZ/InstPrinter/CMakeLists.txt
lib/Target/SystemZ/InstPrinter/LLVMBuild.txt
-lib/Target/SystemZ/InstPrinter/Makefile
lib/Target/SystemZ/LLVMBuild.txt
lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt
-lib/Target/SystemZ/MCTargetDesc/Makefile
-lib/Target/SystemZ/Makefile
lib/Target/SystemZ/TargetInfo/CMakeLists.txt
lib/Target/SystemZ/TargetInfo/LLVMBuild.txt
-lib/Target/SystemZ/TargetInfo/Makefile
+lib/Target/WebAssembly/AsmParser/CMakeLists.txt
+lib/Target/WebAssembly/AsmParser/LLVMBuild.txt
+lib/Target/WebAssembly/CMakeLists.txt
+lib/Target/WebAssembly/Disassembler/CMakeLists.txt
+lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
+lib/Target/WebAssembly/InstPrinter/CMakeLists.txt
+lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt
+lib/Target/WebAssembly/LLVMBuild.txt
+lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
+lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt
+lib/Target/WebAssembly/TargetInfo/CMakeLists.txt
+lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt
lib/Target/X86/AsmParser/CMakeLists.txt
lib/Target/X86/AsmParser/LLVMBuild.txt
-lib/Target/X86/AsmParser/Makefile
lib/Target/X86/CMakeLists.txt
lib/Target/X86/Disassembler/CMakeLists.txt
lib/Target/X86/Disassembler/LLVMBuild.txt
-lib/Target/X86/Disassembler/Makefile
lib/Target/X86/InstPrinter/CMakeLists.txt
lib/Target/X86/InstPrinter/LLVMBuild.txt
-lib/Target/X86/InstPrinter/Makefile
lib/Target/X86/LLVMBuild.txt
lib/Target/X86/MCTargetDesc/CMakeLists.txt
lib/Target/X86/MCTargetDesc/LLVMBuild.txt
-lib/Target/X86/MCTargetDesc/Makefile
-lib/Target/X86/Makefile
lib/Target/X86/README-FPStack.txt
-lib/Target/X86/README-MMX.txt
lib/Target/X86/README-SSE.txt
-lib/Target/X86/README-UNIMPLEMENTED.txt
lib/Target/X86/README-X86-64.txt
lib/Target/X86/README.txt
lib/Target/X86/TargetInfo/CMakeLists.txt
lib/Target/X86/TargetInfo/LLVMBuild.txt
-lib/Target/X86/TargetInfo/Makefile
lib/Target/X86/Utils/CMakeLists.txt
lib/Target/X86/Utils/LLVMBuild.txt
-lib/Target/X86/Utils/Makefile
-lib/Target/X86/X86CompilationCallback_Win64.asm
lib/Target/XCore/CMakeLists.txt
lib/Target/XCore/Disassembler/CMakeLists.txt
lib/Target/XCore/Disassembler/LLVMBuild.txt
-lib/Target/XCore/Disassembler/Makefile
lib/Target/XCore/InstPrinter/CMakeLists.txt
lib/Target/XCore/InstPrinter/LLVMBuild.txt
-lib/Target/XCore/InstPrinter/Makefile
lib/Target/XCore/LLVMBuild.txt
lib/Target/XCore/MCTargetDesc/CMakeLists.txt
lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
-lib/Target/XCore/MCTargetDesc/Makefile
-lib/Target/XCore/Makefile
lib/Target/XCore/README.txt
lib/Target/XCore/TargetInfo/CMakeLists.txt
lib/Target/XCore/TargetInfo/LLVMBuild.txt
-lib/Target/XCore/TargetInfo/Makefile
+lib/Testing/CMakeLists.txt
+lib/Testing/LLVMBuild.txt
+lib/Testing/Support/CMakeLists.txt
+lib/Testing/Support/LLVMBuild.txt
+lib/TextAPI/CMakeLists.txt
+lib/TextAPI/LLVMBuild.txt
+lib/ToolDrivers/CMakeLists.txt
+lib/ToolDrivers/LLVMBuild.txt
+lib/ToolDrivers/llvm-dlltool/CMakeLists.txt
+lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt
+lib/ToolDrivers/llvm-lib/CMakeLists.txt
+lib/ToolDrivers/llvm-lib/LLVMBuild.txt
+lib/Transforms/AggressiveInstCombine/CMakeLists.txt
+lib/Transforms/AggressiveInstCombine/LLVMBuild.txt
lib/Transforms/CMakeLists.txt
+lib/Transforms/Coroutines/CMakeLists.txt
+lib/Transforms/Coroutines/LLVMBuild.txt
lib/Transforms/Hello/
lib/Transforms/IPO/CMakeLists.txt
lib/Transforms/IPO/LLVMBuild.txt
-lib/Transforms/IPO/Makefile
lib/Transforms/InstCombine/CMakeLists.txt
lib/Transforms/InstCombine/LLVMBuild.txt
-lib/Transforms/InstCombine/Makefile
lib/Transforms/Instrumentation/CMakeLists.txt
lib/Transforms/Instrumentation/LLVMBuild.txt
-lib/Transforms/Instrumentation/Makefile
lib/Transforms/LLVMBuild.txt
-lib/Transforms/Makefile
lib/Transforms/ObjCARC/CMakeLists.txt
lib/Transforms/ObjCARC/LLVMBuild.txt
-lib/Transforms/ObjCARC/Makefile
lib/Transforms/Scalar/CMakeLists.txt
lib/Transforms/Scalar/LLVMBuild.txt
-lib/Transforms/Scalar/Makefile
lib/Transforms/Utils/CMakeLists.txt
lib/Transforms/Utils/LLVMBuild.txt
-lib/Transforms/Utils/Makefile
lib/Transforms/Vectorize/CMakeLists.txt
lib/Transforms/Vectorize/LLVMBuild.txt
-lib/Transforms/Vectorize/Makefile
+lib/WindowsManifest/CMakeLists.txt
+lib/WindowsManifest/LLVMBuild.txt
+lib/XRay/CMakeLists.txt
+lib/XRay/LLVMBuild.txt
llvm.spec.in
-projects/
+projects/CMakeLists.txt
+projects/LLVMBuild.txt
+resources/
+runtimes/
test/
tools/CMakeLists.txt
tools/LLVMBuild.txt
-tools/Makefile
+tools/bugpoint-passes/
tools/bugpoint/CMakeLists.txt
tools/bugpoint/LLVMBuild.txt
-tools/bugpoint/Makefile
-tools/bugpoint-passes/
tools/dsymutil/
tools/gold/
tools/llc/CMakeLists.txt
tools/llc/LLVMBuild.txt
-tools/llc/Makefile
tools/lli/CMakeLists.txt
tools/lli/ChildTarget/CMakeLists.txt
tools/lli/ChildTarget/LLVMBuild.txt
-tools/lli/ChildTarget/Makefile
tools/lli/LLVMBuild.txt
-tools/lli/Makefile
tools/llvm-ar/CMakeLists.txt
tools/llvm-ar/LLVMBuild.txt
-tools/llvm-ar/Makefile
-tools/llvm-ar/install_symlink.cmake
+tools/llvm-as-fuzzer/
+tools/llvm-as-parasitic-coverage-repro/
tools/llvm-as/CMakeLists.txt
tools/llvm-as/LLVMBuild.txt
-tools/llvm-as/Makefile
tools/llvm-bcanalyzer/CMakeLists.txt
tools/llvm-bcanalyzer/LLVMBuild.txt
-tools/llvm-bcanalyzer/Makefile
tools/llvm-c-test/
+tools/llvm-cat/
+tools/llvm-cfi-verify/
tools/llvm-config/
tools/llvm-cov/CMakeLists.txt
tools/llvm-cov/LLVMBuild.txt
-tools/llvm-cov/Makefile
+tools/llvm-cvtres/
tools/llvm-cxxdump/CMakeLists.txt
tools/llvm-cxxdump/LLVMBuild.txt
-tools/llvm-cxxdump/Makefile
+tools/llvm-cxxfilt/CMakeLists.txt
+tools/llvm-cxxmap/CMakeLists.txt
+tools/llvm-cxxmap/LLVMBuild.txt
tools/llvm-diff/CMakeLists.txt
tools/llvm-diff/LLVMBuild.txt
-tools/llvm-diff/Makefile
tools/llvm-dis/CMakeLists.txt
tools/llvm-dis/LLVMBuild.txt
-tools/llvm-dis/Makefile
tools/llvm-dwarfdump/CMakeLists.txt
tools/llvm-dwarfdump/LLVMBuild.txt
-tools/llvm-dwarfdump/Makefile
+tools/llvm-dwarfdump/fuzzer/
+tools/llvm-dwp/
+tools/llvm-elfabi/
+tools/llvm-exegesis/
tools/llvm-extract/CMakeLists.txt
tools/llvm-extract/LLVMBuild.txt
-tools/llvm-extract/Makefile
tools/llvm-go/
+tools/llvm-isel-fuzzer/
+tools/llvm-itanium-demangle-fuzzer/
tools/llvm-jitlistener/
tools/llvm-link/CMakeLists.txt
tools/llvm-link/LLVMBuild.txt
-tools/llvm-link/Makefile
tools/llvm-lto/CMakeLists.txt
tools/llvm-lto/LLVMBuild.txt
-tools/llvm-lto/Makefile
+tools/llvm-lto2/CMakeLists.txt
+tools/llvm-lto2/LLVMBuild.txt
+tools/llvm-mc-assemble-fuzzer/
+tools/llvm-mc-disassemble-fuzzer/
tools/llvm-mc/CMakeLists.txt
tools/llvm-mc/LLVMBuild.txt
-tools/llvm-mc/Makefile
+tools/llvm-mca/CMakeLists.txt
+tools/llvm-mca/LLVMBuild.txt
tools/llvm-mcmarkup/
+tools/llvm-microsoft-demangle-fuzzer/
+tools/llvm-modextract/CMakeLists.txt
+tools/llvm-modextract/LLVMBuild.txt
+tools/llvm-mt/
tools/llvm-nm/CMakeLists.txt
tools/llvm-nm/LLVMBuild.txt
-tools/llvm-nm/Makefile
+tools/llvm-objcopy/CMakeLists.txt
+tools/llvm-objcopy/LLVMBuild.txt
tools/llvm-objdump/CMakeLists.txt
tools/llvm-objdump/LLVMBuild.txt
-tools/llvm-objdump/Makefile
-tools/llvm-pdbdump/CMakeLists.txt
-tools/llvm-pdbdump/LLVMBuild.txt
-tools/llvm-pdbdump/Makefile
+tools/llvm-opt-fuzzer/
+tools/llvm-opt-report/
+tools/llvm-pdbutil/CMakeLists.txt
+tools/llvm-pdbutil/LLVMBuild.txt
+tools/llvm-pdbutil/fuzzer/
tools/llvm-profdata/CMakeLists.txt
tools/llvm-profdata/LLVMBuild.txt
-tools/llvm-profdata/Makefile
+tools/llvm-rc/
tools/llvm-readobj/CMakeLists.txt
tools/llvm-readobj/LLVMBuild.txt
-tools/llvm-readobj/Makefile
tools/llvm-rtdyld/CMakeLists.txt
tools/llvm-rtdyld/LLVMBuild.txt
-tools/llvm-rtdyld/Makefile
tools/llvm-shlib/
tools/llvm-size/
+tools/llvm-special-case-list-fuzzer/
+tools/llvm-split/
tools/llvm-stress/CMakeLists.txt
tools/llvm-stress/LLVMBuild.txt
-tools/llvm-stress/Makefile
+tools/llvm-strings/
tools/llvm-symbolizer/CMakeLists.txt
-tools/llvm-symbolizer/Makefile
+tools/llvm-undname/
tools/llvm-vtabledump/
+tools/llvm-xray/CMakeLists.txt
+tools/llvm-yaml-numeric-parser-fuzzer/
tools/lto/
-tools/macho-dump/CMakeLists.txt
-tools/macho-dump/LLVMBuild.txt
-tools/macho-dump/Makefile
tools/msbuild/
tools/obj2yaml/
+tools/opt-remarks/
+tools/opt-viewer/
tools/opt/CMakeLists.txt
tools/opt/LLVMBuild.txt
-tools/opt/Makefile
+tools/sancov/
+tools/sanstats/
tools/verify-uselistorder/
+tools/xcode-toolchain/
tools/yaml2obj/
unittests/
utils/DSAclean.py
@@ -498,33 +508,47 @@ utils/GetRepositoryPath
utils/GetSourceVersion
utils/KillTheDoctor/
utils/LLVMBuild.txt
-utils/Makefile
+utils/LLVMVisualizers/
utils/Misc/
utils/PerfectShuffle/
+utils/Reviewing/
utils/TableGen/CMakeLists.txt
utils/TableGen/LLVMBuild.txt
-utils/TableGen/Makefile
utils/TableGen/tdtags
utils/Target/
utils/UpdateCMakeLists.pl
+utils/UpdateTestChecks/
+utils/abtest/
+utils/abtest.py
+utils/benchmark/
utils/bisect
+utils/bisect-skip-count
utils/bugpoint/
+utils/bugpoint_gisel_reducer.py
utils/buildit/
utils/check-each-file
utils/clang-parse-diagnostics-file
utils/codegen-diff
+utils/collect_and_build_with_pgo.py
utils/count/
utils/countloc.sh
utils/create_ladder_graph.py
utils/crosstool/
+utils/demangle_tree.py
+utils/docker/
utils/emacs/
+utils/extract_symbols.py
+utils/extract_vplan.py
utils/findmisopt
utils/findoptdiff
utils/findsym.pl
utils/fpcmp/
+utils/gdb-scripts/
utils/getsrcs.sh
utils/git/
utils/git-svn/
+utils/gn/
+utils/indirect_calls.py
utils/jedit/
utils/kate/
utils/lint/
@@ -532,23 +556,33 @@ utils/lit/
utils/lldbDataFormatters.py
utils/llvm-build/
utils/llvm-compilers-check
+utils/llvm-gisel-cov.py
utils/llvm-lit/
utils/llvm-native-gxx
utils/llvm.grm
-utils/llvm.natvis
utils/llvmdo
utils/llvmgrep
-utils/makellvm
utils/not/
+utils/opt-viewer/
+utils/prepare-code-coverage-artifact.py
utils/release/
+utils/sanitizers/
+utils/schedcover.py
utils/shuffle_fuzz.py
+utils/shuffle_select_fuzz_tester.py
utils/sort_includes.py
-utils/test_debuginfo.pl
utils/testgen/
utils/textmate/
+utils/unicode-case-fold.py
utils/unittest/
+utils/update_analyze_test_checks.py
+utils/update_cc_test_checks.py
utils/update_llc_test_checks.py
+utils/update_mca_test_checks.py
+utils/update_mir_test_checks.py
+utils/update_test_checks.py
utils/valgrind/
utils/vim/
+utils/vscode/
utils/wciia.py
utils/yaml-bench/
diff --git a/contrib/llvm/LICENSE.TXT b/contrib/llvm/LICENSE.TXT
index 461398bab7a7..e4d67d16fea1 100644
--- a/contrib/llvm/LICENSE.TXT
+++ b/contrib/llvm/LICENSE.TXT
@@ -4,7 +4,7 @@ LLVM Release License
University of Illinois/NCSA
Open Source License
-Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign.
All rights reserved.
Developed by:
diff --git a/contrib/llvm/include/llvm-c/Core.h b/contrib/llvm/include/llvm-c/Core.h
index 6792219f8730..06de058bdc58 100644
--- a/contrib/llvm/include/llvm-c/Core.h
+++ b/contrib/llvm/include/llvm-c/Core.h
@@ -54,6 +54,8 @@ extern "C" {
* @{
*/
+/// External users depend on the following values being stable. It is not safe
+/// to reorder them.
typedef enum {
/* Terminator Instructions */
LLVMRet = 1,
@@ -64,6 +66,9 @@ typedef enum {
/* removed 6 due to API changes */
LLVMUnreachable = 7,
+ /* Standard Unary Operators */
+ LLVMFNeg = 66,
+
/* Standard Binary Operators */
LLVMAdd = 8,
LLVMFAdd = 9,
@@ -516,6 +521,23 @@ void LLVMContextSetYieldCallback(LLVMContextRef C, LLVMYieldCallback Callback,
void *OpaqueHandle);
/**
+ * Retrieve whether the given context is set to discard all value names.
+ *
+ * @see LLVMContext::shouldDiscardValueNames()
+ */
+LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C);
+
+/**
+ * Set whether the given context discards all value names.
+ *
+ * If true, only the names of GlobalValue objects will be available in the IR.
+ * This can be used to save memory and runtime, especially in release mode.
+ *
+ * @see LLVMContext::setDiscardValueNames()
+ */
+void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard);
+
+/**
* Destroy a context instance.
*
* This should be called for every call to LLVMContextCreate() or memory
@@ -843,6 +865,63 @@ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
/**
+ * Obtain an iterator to the first NamedMDNode in a Module.
+ *
+ * @see llvm::Module::named_metadata_begin()
+ */
+LLVMNamedMDNodeRef LLVMGetFirstNamedMetadata(LLVMModuleRef M);
+
+/**
+ * Obtain an iterator to the last NamedMDNode in a Module.
+ *
+ * @see llvm::Module::named_metadata_end()
+ */
+LLVMNamedMDNodeRef LLVMGetLastNamedMetadata(LLVMModuleRef M);
+
+/**
+ * Advance a NamedMDNode iterator to the next NamedMDNode.
+ *
+ * Returns NULL if the iterator was already at the end and there are no more
+ * named metadata nodes.
+ */
+LLVMNamedMDNodeRef LLVMGetNextNamedMetadata(LLVMNamedMDNodeRef NamedMDNode);
+
+/**
+ * Decrement a NamedMDNode iterator to the previous NamedMDNode.
+ *
+ * Returns NULL if the iterator was already at the beginning and there are
+ * no previous named metadata nodes.
+ */
+LLVMNamedMDNodeRef LLVMGetPreviousNamedMetadata(LLVMNamedMDNodeRef NamedMDNode);
+
+/**
+ * Retrieve a NamedMDNode with the given name, returning NULL if no such
+ * node exists.
+ *
+ * @see llvm::Module::getNamedMetadata()
+ */
+LLVMNamedMDNodeRef LLVMGetNamedMetadata(LLVMModuleRef M,
+ const char *Name, size_t NameLen);
+
+/**
+ * Retrieve a NamedMDNode with the given name, creating a new node if no such
+ * node exists.
+ *
+ * @see llvm::Module::getOrInsertNamedMetadata()
+ */
+LLVMNamedMDNodeRef LLVMGetOrInsertNamedMetadata(LLVMModuleRef M,
+ const char *Name,
+ size_t NameLen);
+
+/**
+ * Retrieve the name of a NamedMDNode.
+ *
+ * @see llvm::NamedMDNode::getName()
+ */
+const char *LLVMGetNamedMetadataName(LLVMNamedMDNodeRef NamedMD,
+ size_t *NameLen);
+
+/**
* Obtain the number of operands for named metadata in a module.
*
* @see llvm::Module::getNamedMetadata()
@@ -873,6 +952,44 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name,
LLVMValueRef Val);
/**
+ * Return the directory of the debug location for this value, which must be
+ * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ * @see llvm::GlobalVariable::getDebugInfo()
+ * @see llvm::Function::getSubprogram()
+ */
+const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length);
+
+/**
+ * Return the filename of the debug location for this value, which must be
+ * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ * @see llvm::GlobalVariable::getDebugInfo()
+ * @see llvm::Function::getSubprogram()
+ */
+const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length);
+
+/**
+ * Return the line number of the debug location for this value, which must be
+ * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ * @see llvm::GlobalVariable::getDebugInfo()
+ * @see llvm::Function::getSubprogram()
+ */
+unsigned LLVMGetDebugLocLine(LLVMValueRef Val);
+
+/**
+ * Return the column number of the debug location for this value, which must be
+ * an llvm::Instruction.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ */
+unsigned LLVMGetDebugLocColumn(LLVMValueRef Val);
+
+/**
* Add a function to a module under a specified name.
*
* @see llvm::Function::Create()
@@ -1222,6 +1339,13 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy);
LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy);
/**
+ * Determine whether a structure is literal.
+ *
+ * @see llvm::StructType::isLiteral()
+ */
+LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy);
+
+/**
* @}
*/
@@ -1408,6 +1532,7 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(ConstantVector) \
macro(GlobalValue) \
macro(GlobalAlias) \
+ macro(GlobalIFunc) \
macro(GlobalObject) \
macro(Function) \
macro(GlobalVariable) \
@@ -1417,7 +1542,9 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(CallInst) \
macro(IntrinsicInst) \
macro(DbgInfoIntrinsic) \
- macro(DbgDeclareInst) \
+ macro(DbgVariableIntrinsic) \
+ macro(DbgDeclareInst) \
+ macro(DbgLabelInst) \
macro(MemIntrinsic) \
macro(MemCpyInst) \
macro(MemMoveInst) \
@@ -1434,16 +1561,15 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(SelectInst) \
macro(ShuffleVectorInst) \
macro(StoreInst) \
- macro(TerminatorInst) \
- macro(BranchInst) \
- macro(IndirectBrInst) \
- macro(InvokeInst) \
- macro(ReturnInst) \
- macro(SwitchInst) \
- macro(UnreachableInst) \
- macro(ResumeInst) \
- macro(CleanupReturnInst) \
- macro(CatchReturnInst) \
+ macro(BranchInst) \
+ macro(IndirectBrInst) \
+ macro(InvokeInst) \
+ macro(ReturnInst) \
+ macro(SwitchInst) \
+ macro(UnreachableInst) \
+ macro(ResumeInst) \
+ macro(CleanupReturnInst) \
+ macro(CatchReturnInst) \
macro(FuncletPadInst) \
macro(CatchPadInst) \
macro(CleanupPadInst) \
@@ -1959,9 +2085,14 @@ LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices, unsigned NumIndices);
+LLVMValueRef LLVMConstGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices, unsigned NumIndices);
LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices,
unsigned NumIndices);
+LLVMValueRef LLVMConstInBoundsGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices,
+ unsigned NumIndices);
LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
@@ -2037,6 +2168,14 @@ void LLVMSetDLLStorageClass(LLVMValueRef Global, LLVMDLLStorageClass Class);
LLVMUnnamedAddr LLVMGetUnnamedAddress(LLVMValueRef Global);
void LLVMSetUnnamedAddress(LLVMValueRef Global, LLVMUnnamedAddr UnnamedAddr);
+/**
+ * Returns the "value type" of a global value. This differs from the formal
+ * type of a global value which is always a pointer type.
+ *
+ * @see llvm::GlobalValue::getValueType()
+ */
+LLVMTypeRef LLVMGlobalGetValueType(LLVMValueRef Global);
+
/** Deprecated: Use LLVMGetUnnamedAddress instead. */
LLVMBool LLVMHasUnnamedAddr(LLVMValueRef Global);
/** Deprecated: Use LLVMSetUnnamedAddress instead. */
@@ -2068,6 +2207,58 @@ unsigned LLVMGetAlignment(LLVMValueRef V);
void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes);
/**
+ * Sets a metadata attachment, erasing the existing metadata attachment if
+ * it already exists for the given kind.
+ *
+ * @see llvm::GlobalObject::setMetadata()
+ */
+void LLVMGlobalSetMetadata(LLVMValueRef Global, unsigned Kind,
+ LLVMMetadataRef MD);
+
+/**
+ * Erases a metadata attachment of the given kind if it exists.
+ *
+ * @see llvm::GlobalObject::eraseMetadata()
+ */
+void LLVMGlobalEraseMetadata(LLVMValueRef Global, unsigned Kind);
+
+/**
+ * Removes all metadata attachments from this value.
+ *
+ * @see llvm::GlobalObject::clearMetadata()
+ */
+void LLVMGlobalClearMetadata(LLVMValueRef Global);
+
+/**
+ * Retrieves an array of metadata entries representing the metadata attached to
+ * this value. The caller is responsible for freeing this array by calling
+ * \c LLVMDisposeValueMetadataEntries.
+ *
+ * @see llvm::GlobalObject::getAllMetadata()
+ */
+LLVMValueMetadataEntry *LLVMGlobalCopyAllMetadata(LLVMValueRef Value,
+ size_t *NumEntries);
+
+/**
+ * Destroys value metadata entries.
+ */
+void LLVMDisposeValueMetadataEntries(LLVMValueMetadataEntry *Entries);
+
+/**
+ * Returns the kind of a value metadata entry at a specific index.
+ */
+unsigned LLVMValueMetadataEntriesGetKind(LLVMValueMetadataEntry *Entries,
+ unsigned Index);
+
+/**
+ * Returns the underlying metadata node of a value metadata entry at a
+ * specific index.
+ */
+LLVMMetadataRef
+LLVMValueMetadataEntriesGetMetadata(LLVMValueMetadataEntry *Entries,
+ unsigned Index);
+
+/**
* @}
*/
@@ -2218,6 +2409,54 @@ void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn);
unsigned LLVMGetIntrinsicID(LLVMValueRef Fn);
/**
+ * Create or insert the declaration of an intrinsic. For overloaded intrinsics,
+ * parameter types must be provided to uniquely identify an overload.
+ *
+ * @see llvm::Intrinsic::getDeclaration()
+ */
+LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod,
+ unsigned ID,
+ LLVMTypeRef *ParamTypes,
+ size_t ParamCount);
+
+/**
+ * Retrieves the type of an intrinsic. For overloaded intrinsics, parameter
+ * types must be provided to uniquely identify an overload.
+ *
+ * @see llvm::Intrinsic::getType()
+ */
+LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID,
+ LLVMTypeRef *ParamTypes, size_t ParamCount);
+
+/**
+ * Retrieves the name of an intrinsic.
+ *
+ * @see llvm::Intrinsic::getName()
+ */
+const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength);
+
+/**
+ * Copies the name of an overloaded intrinsic identified by a given list of
+ * parameter types.
+ *
+ * Unlike LLVMIntrinsicGetName, the caller is responsible for freeing the
+ * returned string.
+ *
+ * @see llvm::Intrinsic::getName()
+ */
+const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
+ LLVMTypeRef *ParamTypes,
+ size_t ParamCount,
+ size_t *NameLength);
+
+/**
+ * Obtain if the intrinsic identified by the given ID is overloaded.
+ *
+ * @see llvm::Intrinsic::isOverloaded()
+ */
+LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID);
+
+/**
* Obtain the calling function of a function.
*
* The returned value corresponds to the LLVMCallConv enumeration.
@@ -2514,7 +2753,7 @@ LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB);
* If the basic block does not have a terminator (it is not well-formed
* if it doesn't), then NULL is returned.
*
- * The returned LLVMValueRef corresponds to a llvm::TerminatorInst.
+ * The returned LLVMValueRef corresponds to an llvm::Instruction.
*
* @see llvm::BasicBlock::getTerminator()
*/
@@ -2573,6 +2812,14 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB);
LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn);
/**
+ * Create a new basic block without inserting it into a function.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
+LLVMBasicBlockRef LLVMCreateBasicBlockInContext(LLVMContextRef C,
+ const char *Name);
+
+/**
* Append a basic block to the end of a function.
*
* @see llvm::BasicBlock::Create()
@@ -2695,6 +2942,16 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID);
void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node);
/**
+ * Returns the metadata associated with an instruction value, but filters out
+ * all the debug locations.
+ *
+ * @see llvm::Instruction::getAllMetadataOtherThanDebugLoc()
+ */
+LLVMValueMetadataEntry *
+LLVMInstructionGetAllMetadataOtherThanDebugLoc(LLVMValueRef Instr,
+ size_t *NumEntries);
+
+/**
* Obtain the basic block to which an instruction belongs.
*
* @see llvm::Instruction::getParent()
@@ -2777,6 +3034,15 @@ LLVMRealPredicate LLVMGetFCmpPredicate(LLVMValueRef Inst);
LLVMValueRef LLVMInstructionClone(LLVMValueRef Inst);
/**
+ * Determine whether an instruction is a terminator. This routine is named to
+ * be compatible with historical functions that did this by querying the
+ * underlying C++ type.
+ *
+ * @see llvm::Instruction::isTerminator()
+ */
+LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst);
+
+/**
* @defgroup LLVMCCoreValueInstructionCall Call Sites and Invocations
*
* Functions in this group apply to instructions that refer to call
@@ -2839,6 +3105,13 @@ void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
const char *K, unsigned KLen);
/**
+ * Obtain the function type called by this instruction.
+ *
+ * @see llvm::CallBase::getFunctionType()
+ */
+LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef C);
+
+/**
* Obtain the pointer to the function invoked by this instruction.
*
* This expects an LLVMValueRef that corresponds to a llvm::CallInst or
@@ -2916,8 +3189,8 @@ void LLVMSetUnwindDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B);
/**
* @defgroup LLVMCCoreValueInstructionTerminator Terminators
*
- * Functions in this group only apply to instructions that map to
- * llvm::TerminatorInst instances.
+ * Functions in this group only apply to instructions for which
+ * LLVMIsATerminatorInst returns true.
*
* @{
*/
@@ -2925,21 +3198,21 @@ void LLVMSetUnwindDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B);
/**
* Return the number of successors that this terminator has.
*
- * @see llvm::TerminatorInst::getNumSuccessors
+ * @see llvm::Instruction::getNumSuccessors
*/
unsigned LLVMGetNumSuccessors(LLVMValueRef Term);
/**
* Return the specified successor.
*
- * @see llvm::TerminatorInst::getSuccessor
+ * @see llvm::Instruction::getSuccessor
*/
LLVMBasicBlockRef LLVMGetSuccessor(LLVMValueRef Term, unsigned i);
/**
* Update the specified successor to point at the provided block.
*
- * @see llvm::TerminatorInst::setSuccessor
+ * @see llvm::Instruction::setSuccessor
*/
void LLVMSetSuccessor(LLVMValueRef Term, unsigned i, LLVMBasicBlockRef block);
@@ -3130,10 +3403,16 @@ LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef, LLVMValueRef V,
LLVMBasicBlockRef Else, unsigned NumCases);
LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
unsigned NumDests);
+// LLVMBuildInvoke is deprecated in favor of LLVMBuildInvoke2, in preparation
+// for opaque pointer types.
LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn,
LLVMValueRef *Args, unsigned NumArgs,
LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
const char *Name);
+LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+ const char *Name);
LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef);
/* Exception Handling */
@@ -3290,13 +3569,48 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef, LLVMValueRef V, const char *Name);
LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef, LLVMTypeRef Ty,
LLVMValueRef Val, const char *Name);
+
+/**
+ * Creates and inserts a memset to the specified pointer and the
+ * specified value.
+ *
+ * @see llvm::IRRBuilder::CreateMemSet()
+ */
+LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr,
+ LLVMValueRef Val, LLVMValueRef Len,
+ unsigned Align);
+/**
+ * Creates and inserts a memcpy between the specified pointers.
+ *
+ * @see llvm::IRRBuilder::CreateMemCpy()
+ */
+LLVMValueRef LLVMBuildMemCpy(LLVMBuilderRef B,
+ LLVMValueRef Dst, unsigned DstAlign,
+ LLVMValueRef Src, unsigned SrcAlign,
+ LLVMValueRef Size);
+/**
+ * Creates and inserts a memmove between the specified pointers.
+ *
+ * @see llvm::IRRBuilder::CreateMemMove()
+ */
+LLVMValueRef LLVMBuildMemMove(LLVMBuilderRef B,
+ LLVMValueRef Dst, unsigned DstAlign,
+ LLVMValueRef Src, unsigned SrcAlign,
+ LLVMValueRef Size);
+
LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef, LLVMTypeRef Ty,
LLVMValueRef Val, const char *Name);
LLVMValueRef LLVMBuildFree(LLVMBuilderRef, LLVMValueRef PointerVal);
+// LLVMBuildLoad is deprecated in favor of LLVMBuildLoad2, in preparation for
+// opaque pointer types.
LLVMValueRef LLVMBuildLoad(LLVMBuilderRef, LLVMValueRef PointerVal,
const char *Name);
+LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef, LLVMTypeRef Ty,
+ LLVMValueRef PointerVal, const char *Name);
LLVMValueRef LLVMBuildStore(LLVMBuilderRef, LLVMValueRef Val, LLVMValueRef Ptr);
+// LLVMBuildGEP, LLVMBuildInBoundsGEP, and LLVMBuildStructGEP are deprecated in
+// favor of LLVMBuild*GEP2, in preparation for opaque pointer types.
LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
LLVMValueRef *Indices, unsigned NumIndices,
const char *Name);
@@ -3305,6 +3619,15 @@ LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
const char *Name);
LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
unsigned Idx, const char *Name);
+LLVMValueRef LLVMBuildGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Pointer, LLVMValueRef *Indices,
+ unsigned NumIndices, const char *Name);
+LLVMValueRef LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Pointer, LLVMValueRef *Indices,
+ unsigned NumIndices, const char *Name);
+LLVMValueRef LLVMBuildStructGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Pointer, unsigned Idx,
+ const char *Name);
LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
const char *Name);
LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
@@ -3351,11 +3674,16 @@ LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
LLVMTypeRef DestTy, const char *Name);
LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef, LLVMValueRef Val,
LLVMTypeRef DestTy, const char *Name);
-LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/
- LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildIntCast2(LLVMBuilderRef, LLVMValueRef Val,
+ LLVMTypeRef DestTy, LLVMBool IsSigned,
+ const char *Name);
LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val,
LLVMTypeRef DestTy, const char *Name);
+/** Deprecated: This cast is always signed. Use LLVMBuildIntCast2 instead. */
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/
+ LLVMTypeRef DestTy, const char *Name);
+
/* Comparisons */
LLVMValueRef LLVMBuildICmp(LLVMBuilderRef, LLVMIntPredicate Op,
LLVMValueRef LHS, LLVMValueRef RHS,
@@ -3366,9 +3694,14 @@ LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef, LLVMRealPredicate Op,
/* Miscellaneous instructions */
LLVMValueRef LLVMBuildPhi(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
+// LLVMBuildCall is deprecated in favor of LLVMBuildCall2, in preparation for
+// opaque pointer types.
LLVMValueRef LLVMBuildCall(LLVMBuilderRef, LLVMValueRef Fn,
LLVMValueRef *Args, unsigned NumArgs,
const char *Name);
+LLVMValueRef LLVMBuildCall2(LLVMBuilderRef, LLVMTypeRef, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ const char *Name);
LLVMValueRef LLVMBuildSelect(LLVMBuilderRef, LLVMValueRef If,
LLVMValueRef Then, LLVMValueRef Else,
const char *Name);
diff --git a/contrib/llvm/include/llvm-c/DebugInfo.h b/contrib/llvm/include/llvm-c/DebugInfo.h
index cee6755f1874..87a72034b0e8 100644
--- a/contrib/llvm/include/llvm-c/DebugInfo.h
+++ b/contrib/llvm/include/llvm-c/DebugInfo.h
@@ -54,9 +54,12 @@ typedef enum {
LLVMDIFlagMainSubprogram = 1 << 21,
LLVMDIFlagTypePassByValue = 1 << 22,
LLVMDIFlagTypePassByReference = 1 << 23,
- LLVMDIFlagFixedEnum = 1 << 24,
+ LLVMDIFlagEnumClass = 1 << 24,
+ LLVMDIFlagFixedEnum = LLVMDIFlagEnumClass, // Deprecated.
LLVMDIFlagThunk = 1 << 25,
LLVMDIFlagTrivial = 1 << 26,
+ LLVMDIFlagBigEndian = 1 << 27,
+ LLVMDIFlagLittleEndian = 1 << 28,
LLVMDIFlagIndirectVirtualBase = (1 << 2) | (1 << 5),
LLVMDIFlagAccessibility = LLVMDIFlagPrivate | LLVMDIFlagProtected |
LLVMDIFlagPublic,
@@ -125,6 +128,44 @@ typedef enum {
} LLVMDWARFEmissionKind;
/**
+ * The kind of metadata nodes.
+ */
+enum {
+ LLVMMDStringMetadataKind,
+ LLVMConstantAsMetadataMetadataKind,
+ LLVMLocalAsMetadataMetadataKind,
+ LLVMDistinctMDOperandPlaceholderMetadataKind,
+ LLVMMDTupleMetadataKind,
+ LLVMDILocationMetadataKind,
+ LLVMDIExpressionMetadataKind,
+ LLVMDIGlobalVariableExpressionMetadataKind,
+ LLVMGenericDINodeMetadataKind,
+ LLVMDISubrangeMetadataKind,
+ LLVMDIEnumeratorMetadataKind,
+ LLVMDIBasicTypeMetadataKind,
+ LLVMDIDerivedTypeMetadataKind,
+ LLVMDICompositeTypeMetadataKind,
+ LLVMDISubroutineTypeMetadataKind,
+ LLVMDIFileMetadataKind,
+ LLVMDICompileUnitMetadataKind,
+ LLVMDISubprogramMetadataKind,
+ LLVMDILexicalBlockMetadataKind,
+ LLVMDILexicalBlockFileMetadataKind,
+ LLVMDINamespaceMetadataKind,
+ LLVMDIModuleMetadataKind,
+ LLVMDITemplateTypeParameterMetadataKind,
+ LLVMDITemplateValueParameterMetadataKind,
+ LLVMDIGlobalVariableMetadataKind,
+ LLVMDILocalVariableMetadataKind,
+ LLVMDILabelMetadataKind,
+ LLVMDIObjCPropertyMetadataKind,
+ LLVMDIImportedEntityMetadataKind,
+ LLVMDIMacroMetadataKind,
+ LLVMDIMacroFileMetadataKind
+};
+typedef unsigned LLVMMetadataKind;
+
+/**
* An LLVM DWARF type encoding.
*/
typedef unsigned LLVMDWARFTypeEncoding;
@@ -531,11 +572,13 @@ LLVMDIBuilderCreateUnspecifiedType(LLVMDIBuilderRef Builder, const char *Name,
* \param NameLen Length of type name.
* \param SizeInBits Size of the type.
* \param Encoding DWARF encoding code, e.g. \c LLVMDWARFTypeEncoding_float.
+ * \param Flags Flags to encode optional attribute like endianity
*/
LLVMMetadataRef
LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef Builder, const char *Name,
size_t NameLen, uint64_t SizeInBits,
- LLVMDWARFTypeEncoding Encoding);
+ LLVMDWARFTypeEncoding Encoding,
+ LLVMDIFlags Flags);
/**
* Create debugging information entry for a pointer.
@@ -965,21 +1008,15 @@ LLVMDIBuilderCreateConstantValueExpression(LLVMDIBuilderRef Builder,
* \param Expr The location of the global relative to the attached
* GlobalVariable.
* \param Decl Reference to the corresponding declaration.
+ * variables.
* \param AlignInBits Variable alignment(or 0 if no alignment attr was
* specified)
*/
-LLVMMetadataRef
-LLVMDIBuilderCreateGlobalVariableExpression(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- const char *Name, size_t NameLen,
- const char *Linkage, size_t LinkLen,
- LLVMMetadataRef File,
- unsigned LineNo,
- LLVMMetadataRef Ty,
- LLVMBool LocalToUnit,
- LLVMMetadataRef Expr,
- LLVMMetadataRef Decl,
- uint32_t AlignInBits);
+LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
+ size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File,
+ unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
+ LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits);
/**
* Create a new temporary \c MDNode. Suitable for use in constructing cyclic
* \c MDNode structures. A temporary \c MDNode is not uniqued, may be RAUW'd,
@@ -1025,17 +1062,11 @@ void LLVMMetadataReplaceAllUsesWith(LLVMMetadataRef TempTargetMetadata,
* \param AlignInBits Variable alignment(or 0 if no alignment attr was
* specified)
*/
-LLVMMetadataRef
-LLVMDIBuilderCreateTempGlobalVariableFwdDecl(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- const char *Name, size_t NameLen,
- const char *Linkage, size_t LnkLen,
- LLVMMetadataRef File,
- unsigned LineNo,
- LLVMMetadataRef Ty,
- LLVMBool LocalToUnit,
- LLVMMetadataRef Decl,
- uint32_t AlignInBits);
+LLVMMetadataRef LLVMDIBuilderCreateTempGlobalVariableFwdDecl(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
+ size_t NameLen, const char *Linkage, size_t LnkLen, LLVMMetadataRef File,
+ unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
+ LLVMMetadataRef Decl, uint32_t AlignInBits);
/**
* Insert a new llvm.dbg.declare intrinsic call before the given instruction.
@@ -1149,6 +1180,13 @@ LLVMMetadataRef LLVMGetSubprogram(LLVMValueRef Func);
*/
void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP);
+/**
+ * Obtain the enumerated type of a Metadata instance.
+ *
+ * @see llvm::Metadata::getMetadataID()
+ */
+LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata);
+
#ifdef __cplusplus
} /* end extern "C" */
#endif
diff --git a/contrib/llvm/include/llvm-c/Error.h b/contrib/llvm/include/llvm-c/Error.h
new file mode 100644
index 000000000000..71e84661222b
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/Error.h
@@ -0,0 +1,69 @@
+/*===------- llvm-c/Error.h - llvm::Error class C Interface -------*- C -*-===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This file defines the C interface to LLVM's Error class. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ERROR_H
+#define LLVM_C_ERROR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LLVMErrorSuccess 0
+
+/**
+ * Opaque reference to an error instance. Null serves as the 'success' value.
+ */
+typedef struct LLVMOpaqueError *LLVMErrorRef;
+
+/**
+ * Error type identifier.
+ */
+typedef const void *LLVMErrorTypeId;
+
+/**
+ * Returns the type id for the given error instance, which must be a failure
+ * value (i.e. non-null).
+ */
+LLVMErrorTypeId LLVMGetErrorTypeId(LLVMErrorRef Err);
+
+/**
+ * Dispose of the given error without handling it. This operation consumes the
+ * error, and the given LLVMErrorRef value is not usable once this call returns.
+ * Note: This method *only* needs to be called if the error is not being passed
+ * to some other consuming operation, e.g. LLVMGetErrorMessage.
+ */
+void LLVMConsumeError(LLVMErrorRef Err);
+
+/**
+ * Returns the given string's error message. This operation consumes the error,
+ * and the given LLVMErrorRef value is not usable once this call returns.
+ * The caller is responsible for disposing of the string by calling
+ * LLVMDisposeErrorMessage.
+ */
+char *LLVMGetErrorMessage(LLVMErrorRef Err);
+
+/**
+ * Dispose of the given error message.
+ */
+void LLVMDisposeErrorMessage(char *ErrMsg);
+
+/**
+ * Returns the type id for llvm StringError.
+ */
+LLVMErrorTypeId LLVMGetStringErrorTypeId();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/llvm/include/llvm-c/ExecutionEngine.h b/contrib/llvm/include/llvm-c/ExecutionEngine.h
index 49ae6fee45f0..e8ebef9ab15d 100644
--- a/contrib/llvm/include/llvm-c/ExecutionEngine.h
+++ b/contrib/llvm/include/llvm-c/ExecutionEngine.h
@@ -186,7 +186,7 @@ void LLVMDisposeMCJITMemoryManager(LLVMMCJITMemoryManagerRef MM);
LLVMJITEventListenerRef LLVMCreateGDBRegistrationListener(void);
LLVMJITEventListenerRef LLVMCreateIntelJITEventListener(void);
-LLVMJITEventListenerRef LLVMCreateOprofileJITEventListener(void);
+LLVMJITEventListenerRef LLVMCreateOProfileJITEventListener(void);
LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void);
/**
diff --git a/contrib/llvm/include/llvm-c/OptRemarks.h b/contrib/llvm/include/llvm-c/OptRemarks.h
new file mode 100644
index 000000000000..6a90394e711c
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/OptRemarks.h
@@ -0,0 +1,204 @@
+/*===-- llvm-c/OptRemarks.h - OptRemarks Public C Interface -------*- C -*-===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header provides a public interface to an opt-remark library. *|
+|* LLVM provides an implementation of this interface. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_OPT_REMARKS_H
+#define LLVM_C_OPT_REMARKS_H
+
+#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
+#ifdef __cplusplus
+#include <cstddef>
+extern "C" {
+#else
+#include <stddef.h>
+#endif /* !defined(__cplusplus) */
+
+/**
+ * @defgroup LLVMCOPTREMARKS OptRemarks
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
+#define OPT_REMARKS_API_VERSION 0
+
+/**
+ * String containing a buffer and a length. The buffer is not guaranteed to be
+ * zero-terminated.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+typedef struct {
+ const char *Str;
+ uint32_t Len;
+} LLVMOptRemarkStringRef;
+
+/**
+ * DebugLoc containing File, Line and Column.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+typedef struct {
+ // File:
+ LLVMOptRemarkStringRef SourceFile;
+ // Line:
+ uint32_t SourceLineNumber;
+ // Column:
+ uint32_t SourceColumnNumber;
+} LLVMOptRemarkDebugLoc;
+
+/**
+ * Element of the "Args" list. The key might give more information about what
+ * are the semantics of the value, e.g. "Callee" will tell you that the value
+ * is a symbol that names a function.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+typedef struct {
+ // e.g. "Callee"
+ LLVMOptRemarkStringRef Key;
+ // e.g. "malloc"
+ LLVMOptRemarkStringRef Value;
+
+ // "DebugLoc": Optional
+ LLVMOptRemarkDebugLoc DebugLoc;
+} LLVMOptRemarkArg;
+
+/**
+ * One remark entry.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+typedef struct {
+ // e.g. !Missed, !Passed
+ LLVMOptRemarkStringRef RemarkType;
+ // "Pass": Required
+ LLVMOptRemarkStringRef PassName;
+ // "Name": Required
+ LLVMOptRemarkStringRef RemarkName;
+ // "Function": Required
+ LLVMOptRemarkStringRef FunctionName;
+
+ // "DebugLoc": Optional
+ LLVMOptRemarkDebugLoc DebugLoc;
+ // "Hotness": Optional
+ uint32_t Hotness;
+ // "Args": Optional. It is an array of `num_args` elements.
+ uint32_t NumArgs;
+ LLVMOptRemarkArg *Args;
+} LLVMOptRemarkEntry;
+
+typedef struct LLVMOptRemarkOpaqueParser *LLVMOptRemarkParserRef;
+
+/**
+ * Creates a remark parser that can be used to read and parse the buffer located
+ * in \p Buf of size \p Size.
+ *
+ * \p Buf cannot be NULL.
+ *
+ * This function should be paired with LLVMOptRemarkParserDispose() to avoid
+ * leaking resources.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf,
+ uint64_t Size);
+
+/**
+ * Returns the next remark in the file.
+ *
+ * The value pointed to by the return value is invalidated by the next call to
+ * LLVMOptRemarkParserGetNext().
+ *
+ * If the parser reaches the end of the buffer, the return value will be NULL.
+ *
+ * In the case of an error, the return value will be NULL, and:
+ *
+ * 1) LLVMOptRemarkParserHasError() will return `1`.
+ *
+ * 2) LLVMOptRemarkParserGetErrorMessage() will return a descriptive error
+ * message.
+ *
+ * An error may occur if:
+ *
+ * 1) An argument is invalid.
+ *
+ * 2) There is a YAML parsing error. This type of error aborts parsing
+ * immediately and returns `1`. It can occur on malformed YAML.
+ *
+ * 3) Remark parsing error. If this type of error occurs, the parser won't call
+ * the handler and will continue to the next one. It can occur on malformed
+ * remarks, like missing or extra fields in the file.
+ *
+ * Here is a quick example of the usage:
+ *
+ * ```
+ * LLVMOptRemarkParserRef Parser = LLVMOptRemarkParserCreate(Buf, Size);
+ * LLVMOptRemarkEntry *Remark = NULL;
+ * while ((Remark == LLVMOptRemarkParserGetNext(Parser))) {
+ * // use Remark
+ * }
+ * bool HasError = LLVMOptRemarkParserHasError(Parser);
+ * LLVMOptRemarkParserDispose(Parser);
+ * ```
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern LLVMOptRemarkEntry *
+LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser);
+
+/**
+ * Returns `1` if the parser encountered an error while parsing the buffer.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser);
+
+/**
+ * Returns a null-terminated string containing an error message.
+ *
+ * In case of no error, the result is `NULL`.
+ *
+ * The memory of the string is bound to the lifetime of \p Parser. If
+ * LLVMOptRemarkParserDispose() is called, the memory of the string will be
+ * released.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern const char *
+LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser);
+
+/**
+ * Releases all the resources used by \p Parser.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser);
+
+/**
+ * Returns the version of the opt-remarks dylib.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMOptRemarkVersion(void);
+
+/**
+ * @} // endgoup LLVMCOPTREMARKS
+ */
+
+#ifdef __cplusplus
+}
+#endif /* !defined(__cplusplus) */
+
+#endif /* LLVM_C_OPT_REMARKS_H */
diff --git a/contrib/llvm/include/llvm-c/OrcBindings.h b/contrib/llvm/include/llvm-c/OrcBindings.h
index 9497f0d40776..570db87fee94 100644
--- a/contrib/llvm/include/llvm-c/OrcBindings.h
+++ b/contrib/llvm/include/llvm-c/OrcBindings.h
@@ -22,6 +22,7 @@
#ifndef LLVM_C_ORCBINDINGS_H
#define LLVM_C_ORCBINDINGS_H
+#include "llvm-c/Error.h"
#include "llvm-c/Object.h"
#include "llvm-c/TargetMachine.h"
@@ -36,8 +37,6 @@ typedef uint64_t (*LLVMOrcSymbolResolverFn)(const char *Name, void *LookupCtx);
typedef uint64_t (*LLVMOrcLazyCompileCallbackFn)(LLVMOrcJITStackRef JITStack,
void *CallbackCtx);
-typedef enum { LLVMOrcErrSuccess = 0, LLVMOrcErrGeneric } LLVMOrcErrorCode;
-
/**
* Create an ORC JIT stack.
*
@@ -72,43 +71,41 @@ void LLVMOrcDisposeMangledSymbol(char *MangledSymbol);
/**
* Create a lazy compile callback.
*/
-LLVMOrcErrorCode
-LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- LLVMOrcLazyCompileCallbackFn Callback,
- void *CallbackCtx);
+LLVMErrorRef LLVMOrcCreateLazyCompileCallback(
+ LLVMOrcJITStackRef JITStack, LLVMOrcTargetAddress *RetAddr,
+ LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx);
/**
* Create a named indirect call stub.
*/
-LLVMOrcErrorCode LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
- const char *StubName,
- LLVMOrcTargetAddress InitAddr);
+LLVMErrorRef LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
+ const char *StubName,
+ LLVMOrcTargetAddress InitAddr);
/**
* Set the pointer for the given indirect stub.
*/
-LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
- const char *StubName,
- LLVMOrcTargetAddress NewAddr);
+LLVMErrorRef LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
+ const char *StubName,
+ LLVMOrcTargetAddress NewAddr);
/**
* Add module to be eagerly compiled.
*/
-LLVMOrcErrorCode
-LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle, LLVMModuleRef Mod,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx);
+LLVMErrorRef LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle *RetHandle,
+ LLVMModuleRef Mod,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx);
/**
* Add module to be lazily compiled one function at a time.
*/
-LLVMOrcErrorCode
-LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle, LLVMModuleRef Mod,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx);
+LLVMErrorRef LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle *RetHandle,
+ LLVMModuleRef Mod,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx);
/**
* Add an object file.
@@ -118,11 +115,11 @@ LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
* Clients should *not* dispose of the 'Obj' argument: the JIT will manage it
* from this call onwards.
*/
-LLVMOrcErrorCode LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle,
- LLVMMemoryBufferRef Obj,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx);
+LLVMErrorRef LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle *RetHandle,
+ LLVMMemoryBufferRef Obj,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx);
/**
* Remove a module set from the JIT.
@@ -130,29 +127,29 @@ LLVMOrcErrorCode LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
* This works for all modules that can be added via OrcAdd*, including object
* files.
*/
-LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle H);
+LLVMErrorRef LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle H);
/**
* Get symbol address from JIT instance.
*/
-LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- const char *SymbolName);
+LLVMErrorRef LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+ LLVMOrcTargetAddress *RetAddr,
+ const char *SymbolName);
/**
* Get symbol address from JIT instance, searching only the specified
* handle.
*/
-LLVMOrcErrorCode LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- LLVMOrcModuleHandle H,
- const char *SymbolName);
+LLVMErrorRef LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
+ LLVMOrcTargetAddress *RetAddr,
+ LLVMOrcModuleHandle H,
+ const char *SymbolName);
/**
* Dispose of an ORC JIT stack.
*/
-LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack);
+LLVMErrorRef LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack);
/**
* Register a JIT Event Listener.
diff --git a/contrib/llvm/include/llvm-c/TargetMachine.h b/contrib/llvm/include/llvm-c/TargetMachine.h
index 7f672b5d10d6..c06e9edc9aaf 100644
--- a/contrib/llvm/include/llvm-c/TargetMachine.h
+++ b/contrib/llvm/include/llvm-c/TargetMachine.h
@@ -39,12 +39,16 @@ typedef enum {
LLVMRelocDefault,
LLVMRelocStatic,
LLVMRelocPIC,
- LLVMRelocDynamicNoPic
+ LLVMRelocDynamicNoPic,
+ LLVMRelocROPI,
+ LLVMRelocRWPI,
+ LLVMRelocROPI_RWPI
} LLVMRelocMode;
typedef enum {
LLVMCodeModelDefault,
LLVMCodeModelJITDefault,
+ LLVMCodeModelTiny,
LLVMCodeModelSmall,
LLVMCodeModelKernel,
LLVMCodeModelMedium,
diff --git a/contrib/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h b/contrib/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h
new file mode 100644
index 000000000000..8756a22e917a
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h
@@ -0,0 +1,43 @@
+/*===-- AggressiveInstCombine.h ---------------------------------*- C++ -*-===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header declares the C interface to libLLVMAggressiveInstCombine.a, *|
+|* which combines instructions to form fewer, simple IR instructions. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TRANSFORMS_AGGRESSIVEINSTCOMBINE_H
+#define LLVM_C_TRANSFORMS_AGGRESSIVEINSTCOMBINE_H
+
+#include "llvm-c/Types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup LLVMCTransformsAggressiveInstCombine Aggressive Instruction Combining transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
+/** See llvm::createAggressiveInstCombinerPass function. */
+void LLVMAddAggressiveInstCombinerPass(LLVMPassManagerRef PM);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* defined(__cplusplus) */
+
+#endif
+
diff --git a/contrib/llvm/include/llvm-c/Transforms/Coroutines.h b/contrib/llvm/include/llvm-c/Transforms/Coroutines.h
new file mode 100644
index 000000000000..827e30fb2d7c
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/Transforms/Coroutines.h
@@ -0,0 +1,55 @@
+/*===-- Coroutines.h - Coroutines Library C Interface -----------*- C++ -*-===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header declares the C interface to libLLVMCoroutines.a, which *|
+|* implements various scalar transformations of the LLVM IR. *|
+|* *|
+|* Many exotic languages can interoperate with C code but have a harder time *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TRANSFORMS_COROUTINES_H
+#define LLVM_C_TRANSFORMS_COROUTINES_H
+
+#include "llvm-c/Types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup LLVMCTransformsCoroutines Coroutine transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
+/** See llvm::createCoroEarlyPass function. */
+void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM);
+
+/** See llvm::createCoroSplitPass function. */
+void LLVMAddCoroSplitPass(LLVMPassManagerRef PM);
+
+/** See llvm::createCoroElidePass function. */
+void LLVMAddCoroElidePass(LLVMPassManagerRef PM);
+
+/** See llvm::createCoroCleanupPass function. */
+void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* defined(__cplusplus) */
+
+#endif
diff --git a/contrib/llvm/include/llvm-c/Transforms/Scalar.h b/contrib/llvm/include/llvm-c/Transforms/Scalar.h
index f55cdce86be9..3c3bb4eb9b82 100644
--- a/contrib/llvm/include/llvm-c/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm-c/Transforms/Scalar.h
@@ -35,9 +35,6 @@ extern "C" {
/** See llvm::createAggressiveDCEPass function. */
void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
-/** See llvm::createAggressiveInstCombinerPass function. */
-void LLVMAddAggressiveInstCombinerPass(LLVMPassManagerRef PM);
-
/** See llvm::createBitTrackingDCEPass function. */
void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM);
@@ -95,6 +92,9 @@ void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM);
/** See llvm::createLoopUnswitchPass function. */
void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM);
+/** See llvm::createLowerAtomicPass function. */
+void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM);
+
/** See llvm::createMemCpyOptPass function. */
void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM);
@@ -153,6 +153,9 @@ void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM);
/** See llvm::createBasicAliasAnalysisPass function */
void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM);
+/** See llvm::createUnifyFunctionExitNodesPass function */
+void LLVMAddUnifyFunctionExitNodesPass(LLVMPassManagerRef PM);
+
/**
* @}
*/
diff --git a/contrib/llvm/include/llvm-c/Types.h b/contrib/llvm/include/llvm-c/Types.h
index 4a33542e86cc..ce1acf3e0421 100644
--- a/contrib/llvm/include/llvm-c/Types.h
+++ b/contrib/llvm/include/llvm-c/Types.h
@@ -90,6 +90,20 @@ typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
typedef struct LLVMOpaqueMetadata *LLVMMetadataRef;
/**
+ * Represents an LLVM Named Metadata Node.
+ *
+ * This models llvm::NamedMDNode.
+ */
+typedef struct LLVMOpaqueNamedMDNode *LLVMNamedMDNodeRef;
+
+/**
+ * Represents an entry in a Global Object's metadata attachments.
+ *
+ * This models std::pair<unsigned, MDNode *>
+ */
+typedef struct LLVMOpaqueValueMetadataEntry LLVMValueMetadataEntry;
+
+/**
* Represents an LLVM basic block builder.
*
* This models llvm::IRBuilder.
diff --git a/contrib/llvm/include/llvm-c/lto.h b/contrib/llvm/include/llvm-c/lto.h
index 1acd610f70ac..090cd34af4e9 100644
--- a/contrib/llvm/include/llvm-c/lto.h
+++ b/contrib/llvm/include/llvm-c/lto.h
@@ -44,7 +44,7 @@ typedef bool lto_bool_t;
* @{
*/
-#define LTO_API_VERSION 22
+#define LTO_API_VERSION 23
/**
* \since prior to LTO_API_VERSION=3
@@ -828,6 +828,16 @@ extern void thinlto_codegen_set_cache_size_bytes(thinlto_code_gen_t cg,
unsigned max_size_bytes);
/**
+ * Same as thinlto_codegen_set_cache_size_bytes, except the maximum size is in
+ * megabytes (2^20 bytes).
+ *
+ * \since LTO_API_VERSION=23
+ */
+extern void
+thinlto_codegen_set_cache_size_megabytes(thinlto_code_gen_t cg,
+ unsigned max_size_megabytes);
+
+/**
* Sets the maximum number of files in the cache directory. An unspecified
* default value will be applied. A value of 0 will be ignored.
*
diff --git a/contrib/llvm/include/llvm/ADT/APFloat.h b/contrib/llvm/include/llvm/ADT/APFloat.h
index 5c59af4c04ba..c6fa5ad674f6 100644
--- a/contrib/llvm/include/llvm/ADT/APFloat.h
+++ b/contrib/llvm/include/llvm/ADT/APFloat.h
@@ -870,13 +870,13 @@ public:
/// Factory for NaN values.
///
/// \param Negative - True iff the NaN generated should be negative.
- /// \param type - The unspecified fill bits for creating the NaN, 0 by
+ /// \param payload - The unspecified fill bits for creating the NaN, 0 by
/// default. The value is truncated as necessary.
static APFloat getNaN(const fltSemantics &Sem, bool Negative = false,
- unsigned type = 0) {
- if (type) {
- APInt fill(64, type);
- return getQNaN(Sem, Negative, &fill);
+ uint64_t payload = 0) {
+ if (payload) {
+ APInt intPayload(64, payload);
+ return getQNaN(Sem, Negative, &intPayload);
} else {
return getQNaN(Sem, Negative, nullptr);
}
@@ -1243,6 +1243,32 @@ inline APFloat maxnum(const APFloat &A, const APFloat &B) {
return (A.compare(B) == APFloat::cmpLessThan) ? B : A;
}
+/// Implements IEEE 754-2018 minimum semantics. Returns the smaller of 2
+/// arguments, propagating NaNs and treating -0 as less than +0.
+LLVM_READONLY
+inline APFloat minimum(const APFloat &A, const APFloat &B) {
+ if (A.isNaN())
+ return A;
+ if (B.isNaN())
+ return B;
+ if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+ return A.isNegative() ? A : B;
+ return (B.compare(A) == APFloat::cmpLessThan) ? B : A;
+}
+
+/// Implements IEEE 754-2018 maximum semantics. Returns the larger of 2
+/// arguments, propagating NaNs and treating -0 as less than +0.
+LLVM_READONLY
+inline APFloat maximum(const APFloat &A, const APFloat &B) {
+ if (A.isNaN())
+ return A;
+ if (B.isNaN())
+ return B;
+ if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+ return A.isNegative() ? B : A;
+ return (A.compare(B) == APFloat::cmpLessThan) ? B : A;
+}
+
} // namespace llvm
#undef APFLOAT_DISPATCH_ON_SEMANTICS
diff --git a/contrib/llvm/include/llvm/ADT/APInt.h b/contrib/llvm/include/llvm/ADT/APInt.h
index 6bf6b22fb010..6e106ff8bf5d 100644
--- a/contrib/llvm/include/llvm/ADT/APInt.h
+++ b/contrib/llvm/include/llvm/ADT/APInt.h
@@ -31,6 +31,7 @@ class raw_ostream;
template <typename T> class SmallVectorImpl;
template <typename T> class ArrayRef;
+template <typename T> class Optional;
class APInt;
@@ -84,7 +85,7 @@ public:
UP,
};
- static const WordType WORD_MAX = ~WordType(0);
+ static const WordType WORDTYPE_MAX = ~WordType(0);
private:
/// This union is used to store the integer value. When the
@@ -149,7 +150,7 @@ private:
unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
// Mask out the high bits.
- uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - WordBits);
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
if (isSingleWord())
U.VAL &= mask;
else
@@ -394,7 +395,7 @@ public:
/// This checks to see if the value has all bits of the APInt are set or not.
bool isAllOnesValue() const {
if (isSingleWord())
- return U.VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth);
+ return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
return countTrailingOnesSlowCase() == BitWidth;
}
@@ -495,7 +496,7 @@ public:
assert(numBits != 0 && "numBits must be non-zero");
assert(numBits <= BitWidth && "numBits out of range");
if (isSingleWord())
- return U.VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits));
+ return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
unsigned Ones = countTrailingOnesSlowCase();
return (numBits == Ones) &&
((Ones + countLeadingZerosSlowCase()) == BitWidth);
@@ -559,7 +560,7 @@ public:
///
/// \returns the all-ones value for an APInt of the specified bit-width.
static APInt getAllOnesValue(unsigned numBits) {
- return APInt(numBits, WORD_MAX, true);
+ return APInt(numBits, WORDTYPE_MAX, true);
}
/// Get the '0' value.
@@ -1104,6 +1105,12 @@ public:
APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
+ // Operations that saturate
+ APInt sadd_sat(const APInt &RHS) const;
+ APInt uadd_sat(const APInt &RHS) const;
+ APInt ssub_sat(const APInt &RHS) const;
+ APInt usub_sat(const APInt &RHS) const;
+
/// Array-indexing support.
///
/// \returns the bit value at bitPosition
@@ -1382,7 +1389,7 @@ public:
/// Set every bit to 1.
void setAllBits() {
if (isSingleWord())
- U.VAL = WORD_MAX;
+ U.VAL = WORDTYPE_MAX;
else
// Set all the bits in all the words.
memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
@@ -1394,7 +1401,7 @@ public:
///
/// Set the given bit to 1 whose position is given as "bitPosition".
void setBit(unsigned BitPosition) {
- assert(BitPosition <= BitWidth && "BitPosition out of range");
+ assert(BitPosition < BitWidth && "BitPosition out of range");
WordType Mask = maskBit(BitPosition);
if (isSingleWord())
U.VAL |= Mask;
@@ -1415,7 +1422,7 @@ public:
if (loBit == hiBit)
return;
if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
- uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
mask <<= loBit;
if (isSingleWord())
U.VAL |= mask;
@@ -1453,7 +1460,7 @@ public:
///
/// Set the given bit to 0 whose position is given as "bitPosition".
void clearBit(unsigned BitPosition) {
- assert(BitPosition <= BitWidth && "BitPosition out of range");
+ assert(BitPosition < BitWidth && "BitPosition out of range");
WordType Mask = ~maskBit(BitPosition);
if (isSingleWord())
U.VAL &= Mask;
@@ -1469,7 +1476,7 @@ public:
/// Toggle every bit to its opposite value.
void flipAllBits() {
if (isSingleWord()) {
- U.VAL ^= WORD_MAX;
+ U.VAL ^= WORDTYPE_MAX;
clearUnusedBits();
} else {
flipAllBitsSlowCase();
@@ -1758,7 +1765,7 @@ public:
/// referencing 2 in a space where 2 does no exist.
unsigned nearestLogBase2() const {
// Special case when we have a bitwidth of 1. If VAL is 1, then we
- // get 0. If VAL is 0, we get WORD_MAX which gets truncated to
+ // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
// UINT32_MAX.
if (BitWidth == 1)
return U.VAL - 1;
@@ -2166,6 +2173,41 @@ APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
/// Return A sign-divided by B, rounded by the given rounding mode.
APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
+/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
+/// (e.g. 32 for i32).
+/// This function finds the smallest number n, such that
+/// (a) n >= 0 and q(n) = 0, or
+/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
+/// integers, belong to two different intervals [Rk, Rk+R),
+/// where R = 2^BW, and k is an integer.
+/// The idea here is to find when q(n) "overflows" 2^BW, while at the
+/// same time "allowing" subtraction. In unsigned modulo arithmetic a
+/// subtraction (treated as addition of negated numbers) would always
+/// count as an overflow, but here we want to allow values to decrease
+/// and increase as long as they are within the same interval.
+/// Specifically, adding of two negative numbers should not cause an
+/// overflow (as long as the magnitude does not exceed the bith width).
+/// On the other hand, given a positive number, adding a negative
+/// number to it can give a negative result, which would cause the
+/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
+/// treated as a special case of an overflow.
+///
+/// This function returns None if after finding k that minimizes the
+/// positive solution to q(n) = kR, both solutions are contained between
+/// two consecutive integers.
+///
+/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
+/// in arithmetic modulo 2^BW, and treating the values as signed) by the
+/// virtue of *signed* overflow. This function will *not* find such an n,
+/// however it may find a value of n satisfying the inequalities due to
+/// an *unsigned* overflow (if the values are treated as unsigned).
+/// To find a solution for a signed overflow, treat it as a problem of
+/// finding an unsigned overflow with a range with of BW-1.
+///
+/// The returned value may have a different bit width from the input
+/// coefficients.
+Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
+ unsigned RangeWidth);
} // End of APIntOps namespace
// See friend declaration above. This additional declaration is required in
diff --git a/contrib/llvm/include/llvm/ADT/Any.h b/contrib/llvm/include/llvm/ADT/Any.h
index c64c39987542..7faa4c963d3d 100644
--- a/contrib/llvm/include/llvm/ADT/Any.h
+++ b/contrib/llvm/include/llvm/ADT/Any.h
@@ -65,6 +65,16 @@ public:
typename std::enable_if<
llvm::conjunction<
llvm::negation<std::is_same<typename std::decay<T>::type, Any>>,
+ // We also disable this overload when an `Any` object can be
+ // converted to the parameter type because in that case, this
+ // constructor may combine with that conversion during overload
+ // resolution for determining copy constructibility, and then
+ // when we try to determine copy constructibility below we may
+ // infinitely recurse. This is being evaluated by the standards
+ // committee as a potential DR in `std::any` as well, but we're
+ // going ahead and adopting it to work-around usage of `Any` with
+ // types that need to be implicitly convertible from an `Any`.
+ llvm::negation<std::is_convertible<Any, typename std::decay<T>::type>>,
std::is_copy_constructible<typename std::decay<T>::type>>::value,
int>::type = 0>
Any(T &&Value) {
diff --git a/contrib/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm/include/llvm/ADT/BitVector.h
index 438c7d84c581..9ab1da7c6913 100644
--- a/contrib/llvm/include/llvm/ADT/BitVector.h
+++ b/contrib/llvm/include/llvm/ADT/BitVector.h
@@ -503,6 +503,23 @@ public:
return (*this)[Idx];
}
+ // Push single bit to end of vector.
+ void push_back(bool Val) {
+ unsigned OldSize = Size;
+ unsigned NewSize = Size + 1;
+
+ // Resize, which will insert zeros.
+ // If we already fit then the unused bits will be already zero.
+ if (NewSize > getBitCapacity())
+ resize(NewSize, false);
+ else
+ Size = NewSize;
+
+ // If true, set single bit.
+ if (Val)
+ set(OldSize);
+ }
+
/// Test if any common bits are set.
bool anyCommon(const BitVector &RHS) const {
unsigned ThisWords = NumBitWords(size());
diff --git a/contrib/llvm/include/llvm/ADT/DenseMap.h b/contrib/llvm/include/llvm/ADT/DenseMap.h
index ba60b7972a8f..1f50502fff92 100644
--- a/contrib/llvm/include/llvm/ADT/DenseMap.h
+++ b/contrib/llvm/include/llvm/ADT/DenseMap.h
@@ -25,6 +25,7 @@
#include <cassert>
#include <cstddef>
#include <cstring>
+#include <initializer_list>
#include <iterator>
#include <new>
#include <type_traits>
@@ -38,6 +39,34 @@ namespace detail {
// implementation without requiring two members.
template <typename KeyT, typename ValueT>
struct DenseMapPair : public std::pair<KeyT, ValueT> {
+
+ // FIXME: Switch to inheriting constructors when we drop support for older
+ // clang versions.
+ // NOTE: This default constructor is declared with '{}' rather than
+ // '= default' to work around a separate bug in clang-3.8. This can
+ // also go when we switch to inheriting constructors.
+ DenseMapPair() {}
+
+ DenseMapPair(const KeyT &Key, const ValueT &Value)
+ : std::pair<KeyT, ValueT>(Key, Value) {}
+
+ DenseMapPair(KeyT &&Key, ValueT &&Value)
+ : std::pair<KeyT, ValueT>(std::move(Key), std::move(Value)) {}
+
+ template <typename AltKeyT, typename AltValueT>
+ DenseMapPair(AltKeyT &&AltKey, AltValueT &&AltValue,
+ typename std::enable_if<
+ std::is_convertible<AltKeyT, KeyT>::value &&
+ std::is_convertible<AltValueT, ValueT>::value>::type * = 0)
+ : std::pair<KeyT, ValueT>(std::forward<AltKeyT>(AltKey),
+ std::forward<AltValueT>(AltValue)) {}
+
+ template <typename AltPairT>
+ DenseMapPair(AltPairT &&AltPair,
+ typename std::enable_if<std::is_convertible<
+ AltPairT, std::pair<KeyT, ValueT>>::value>::type * = 0)
+ : std::pair<KeyT, ValueT>(std::forward<AltPairT>(AltPair)) {}
+
KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; }
const KeyT &getFirst() const { return std::pair<KeyT, ValueT>::first; }
ValueT &getSecond() { return std::pair<KeyT, ValueT>::second; }
@@ -46,9 +75,10 @@ struct DenseMapPair : public std::pair<KeyT, ValueT> {
} // end namespace detail
-template <
- typename KeyT, typename ValueT, typename KeyInfoT = DenseMapInfo<KeyT>,
- typename Bucket = detail::DenseMapPair<KeyT, ValueT>, bool IsConst = false>
+template <typename KeyT, typename ValueT,
+ typename KeyInfoT = DenseMapInfo<KeyT>,
+ typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>,
+ bool IsConst = false>
class DenseMapIterator;
template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
@@ -393,7 +423,7 @@ protected:
setNumTombstones(other.getNumTombstones());
if (isPodLike<KeyT>::value && isPodLike<ValueT>::value)
- memcpy(getBuckets(), other.getBuckets(),
+ memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(),
getNumBuckets() * sizeof(BucketT));
else
for (size_t i = 0; i < getNumBuckets(); ++i) {
@@ -639,9 +669,43 @@ public:
}
};
+/// Equality comparison for DenseMap.
+///
+/// Iterates over elements of LHS confirming that each (key, value) pair in LHS
+/// is also in RHS, and that no additional pairs are in RHS.
+/// Equivalent to N calls to RHS.find and N value comparisons. Amortized
+/// complexity is linear, worst case is O(N^2) (if every hash collides).
+template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
+ typename BucketT>
+bool operator==(
+ const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS,
+ const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) {
+ if (LHS.size() != RHS.size())
+ return false;
+
+ for (auto &KV : LHS) {
+ auto I = RHS.find(KV.first);
+ if (I == RHS.end() || I->second != KV.second)
+ return false;
+ }
+
+ return true;
+}
+
+/// Inequality comparison for DenseMap.
+///
+/// Equivalent to !(LHS == RHS). See operator== for performance notes.
+template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
+ typename BucketT>
+bool operator!=(
+ const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS,
+ const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) {
+ return !(LHS == RHS);
+}
+
template <typename KeyT, typename ValueT,
typename KeyInfoT = DenseMapInfo<KeyT>,
- typename BucketT = detail::DenseMapPair<KeyT, ValueT>>
+ typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>>
class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
KeyT, ValueT, KeyInfoT, BucketT> {
friend class DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT, BucketT>;
@@ -676,6 +740,11 @@ public:
this->insert(I, E);
}
+ DenseMap(std::initializer_list<typename BaseT::value_type> Vals) {
+ init(Vals.size());
+ this->insert(Vals.begin(), Vals.end());
+ }
+
~DenseMap() {
this->destroyAll();
operator delete(Buckets);
@@ -798,7 +867,7 @@ private:
template <typename KeyT, typename ValueT, unsigned InlineBuckets = 4,
typename KeyInfoT = DenseMapInfo<KeyT>,
- typename BucketT = detail::DenseMapPair<KeyT, ValueT>>
+ typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>>
class SmallDenseMap
: public DenseMapBase<
SmallDenseMap<KeyT, ValueT, InlineBuckets, KeyInfoT, BucketT>, KeyT,
diff --git a/contrib/llvm/include/llvm/ADT/DenseSet.h b/contrib/llvm/include/llvm/ADT/DenseSet.h
index b495e25dd5e5..e85a38587e41 100644
--- a/contrib/llvm/include/llvm/ADT/DenseSet.h
+++ b/contrib/llvm/include/llvm/ADT/DenseSet.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/type_traits.h"
#include <algorithm>
#include <cstddef>
@@ -67,7 +68,7 @@ public:
explicit DenseSetImpl(unsigned InitialReserve = 0) : TheMap(InitialReserve) {}
DenseSetImpl(std::initializer_list<ValueT> Elems)
- : DenseSetImpl(Elems.size()) {
+ : DenseSetImpl(PowerOf2Ceil(Elems.size())) {
insert(Elems.begin(), Elems.end());
}
@@ -136,8 +137,8 @@ public:
public:
using difference_type = typename MapTy::const_iterator::difference_type;
using value_type = ValueT;
- using pointer = value_type *;
- using reference = value_type &;
+ using pointer = const value_type *;
+ using reference = const value_type &;
using iterator_category = std::forward_iterator_tag;
ConstIterator() = default;
@@ -214,6 +215,34 @@ public:
}
};
+/// Equality comparison for DenseSet.
+///
+/// Iterates over elements of LHS confirming that each element is also a member
+/// of RHS, and that RHS contains no additional values.
+/// Equivalent to N calls to RHS.count. Amortized complexity is linear, worst
+/// case is O(N^2) (if every hash collides).
+template <typename ValueT, typename MapTy, typename ValueInfoT>
+bool operator==(const DenseSetImpl<ValueT, MapTy, ValueInfoT> &LHS,
+ const DenseSetImpl<ValueT, MapTy, ValueInfoT> &RHS) {
+ if (LHS.size() != RHS.size())
+ return false;
+
+ for (auto &E : LHS)
+ if (!RHS.count(E))
+ return false;
+
+ return true;
+}
+
+/// Inequality comparison for DenseSet.
+///
+/// Equivalent to !(LHS == RHS). See operator== for performance notes.
+template <typename ValueT, typename MapTy, typename ValueInfoT>
+bool operator!=(const DenseSetImpl<ValueT, MapTy, ValueInfoT> &LHS,
+ const DenseSetImpl<ValueT, MapTy, ValueInfoT> &RHS) {
+ return !(LHS == RHS);
+}
+
} // end namespace detail
/// Implements a dense probed hash-table based set.
diff --git a/contrib/llvm/include/llvm/ADT/GraphTraits.h b/contrib/llvm/include/llvm/ADT/GraphTraits.h
index 27c647f4bbbd..d39b50fdc488 100644
--- a/contrib/llvm/include/llvm/ADT/GraphTraits.h
+++ b/contrib/llvm/include/llvm/ADT/GraphTraits.h
@@ -25,6 +25,13 @@ namespace llvm {
// GraphTraits - This class should be specialized by different graph types...
// which is why the default version is empty.
//
+// This template evolved from supporting `BasicBlock` to also later supporting
+// more complex types (e.g. CFG and DomTree).
+//
+// GraphTraits can be used to create a view over a graph interpreting it
+// differently without requiring a copy of the original graph. This could
+// be achieved by carrying more data in NodeRef. See LoopBodyTraits for one
+// example.
template<class GraphType>
struct GraphTraits {
// Elements to provide:
diff --git a/contrib/llvm/include/llvm/ADT/Hashing.h b/contrib/llvm/include/llvm/ADT/Hashing.h
index 9f830baa4243..9175c545b7c9 100644
--- a/contrib/llvm/include/llvm/ADT/Hashing.h
+++ b/contrib/llvm/include/llvm/ADT/Hashing.h
@@ -133,7 +133,7 @@ hash_code hash_value(const std::basic_string<T> &arg);
/// undone. This makes it thread-hostile and very hard to use outside of
/// immediately on start of a simple program designed for reproducible
/// behavior.
-void set_fixed_execution_hash_seed(size_t fixed_value);
+void set_fixed_execution_hash_seed(uint64_t fixed_value);
// All of the implementation details of actually computing the various hash
@@ -316,9 +316,9 @@ struct hash_state {
/// This variable can be set using the \see llvm::set_fixed_execution_seed
/// function. See that function for details. Do not, under any circumstances,
/// set or read this variable.
-extern size_t fixed_seed_override;
+extern uint64_t fixed_seed_override;
-inline size_t get_execution_seed() {
+inline uint64_t get_execution_seed() {
// FIXME: This needs to be a per-execution seed. This is just a placeholder
// implementation. Switching to a per-execution seed is likely to flush out
// instability bugs and so will happen as its own commit.
@@ -326,8 +326,7 @@ inline size_t get_execution_seed() {
// However, if there is a fixed seed override set the first time this is
// called, return that instead of the per-execution seed.
const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
- static size_t seed = fixed_seed_override ? fixed_seed_override
- : (size_t)seed_prime;
+ static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime;
return seed;
}
@@ -402,7 +401,7 @@ bool store_and_advance(char *&buffer_ptr, char *buffer_end, const T& value,
/// combining them, this (as an optimization) directly combines the integers.
template <typename InputIteratorT>
hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
- const size_t seed = get_execution_seed();
+ const uint64_t seed = get_execution_seed();
char buffer[64], *buffer_ptr = buffer;
char *const buffer_end = std::end(buffer);
while (first != last && store_and_advance(buffer_ptr, buffer_end,
@@ -446,7 +445,7 @@ hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
template <typename ValueT>
typename std::enable_if<is_hashable_data<ValueT>::value, hash_code>::type
hash_combine_range_impl(ValueT *first, ValueT *last) {
- const size_t seed = get_execution_seed();
+ const uint64_t seed = get_execution_seed();
const char *s_begin = reinterpret_cast<const char *>(first);
const char *s_end = reinterpret_cast<const char *>(last);
const size_t length = std::distance(s_begin, s_end);
@@ -496,7 +495,7 @@ namespace detail {
struct hash_combine_recursive_helper {
char buffer[64];
hash_state state;
- const size_t seed;
+ const uint64_t seed;
public:
/// Construct a recursive hash combining helper.
diff --git a/contrib/llvm/include/llvm/ADT/ImmutableList.h b/contrib/llvm/include/llvm/ADT/ImmutableList.h
index 1f5e9813798d..0541dc2566ed 100644
--- a/contrib/llvm/include/llvm/ADT/ImmutableList.h
+++ b/contrib/llvm/include/llvm/ADT/ImmutableList.h
@@ -31,8 +31,9 @@ class ImmutableListImpl : public FoldingSetNode {
T Head;
const ImmutableListImpl* Tail;
- ImmutableListImpl(const T& head, const ImmutableListImpl* tail = nullptr)
- : Head(head), Tail(tail) {}
+ template <typename ElemT>
+ ImmutableListImpl(ElemT &&head, const ImmutableListImpl *tail = nullptr)
+ : Head(std::forward<ElemT>(head)), Tail(tail) {}
public:
ImmutableListImpl(const ImmutableListImpl &) = delete;
@@ -66,6 +67,9 @@ public:
using value_type = T;
using Factory = ImmutableListFactory<T>;
+ static_assert(std::is_trivially_destructible<T>::value,
+ "T must be trivially destructible!");
+
private:
const ImmutableListImpl<T>* X;
@@ -90,6 +94,9 @@ public:
bool operator==(const iterator& I) const { return L == I.L; }
bool operator!=(const iterator& I) const { return L != I.L; }
const value_type& operator*() const { return L->getHead(); }
+ const typename std::remove_reference<value_type>::type* operator->() const {
+ return &L->getHead();
+ }
ImmutableList getList() const { return L; }
};
@@ -123,14 +130,14 @@ public:
bool operator==(const ImmutableList& L) const { return isEqual(L); }
/// getHead - Returns the head of the list.
- const T& getHead() {
+ const T& getHead() const {
assert(!isEmpty() && "Cannot get the head of an empty list.");
return X->getHead();
}
/// getTail - Returns the tail of the list, which is another (possibly empty)
/// ImmutableList.
- ImmutableList getTail() {
+ ImmutableList getTail() const {
return X ? X->getTail() : nullptr;
}
@@ -166,7 +173,8 @@ public:
if (ownsAllocator()) delete &getAllocator();
}
- LLVM_NODISCARD ImmutableList<T> concat(const T &Head, ImmutableList<T> Tail) {
+ template <typename ElemT>
+ LLVM_NODISCARD ImmutableList<T> concat(ElemT &&Head, ImmutableList<T> Tail) {
// Profile the new list to see if it already exists in our cache.
FoldingSetNodeID ID;
void* InsertPos;
@@ -179,7 +187,7 @@ public:
// The list does not exist in our cache. Create it.
BumpPtrAllocator& A = getAllocator();
L = (ListTy*) A.Allocate<ListTy>();
- new (L) ListTy(Head, TailImpl);
+ new (L) ListTy(std::forward<ElemT>(Head), TailImpl);
// Insert the new list into the cache.
Cache.InsertNode(L, InsertPos);
@@ -188,16 +196,24 @@ public:
return L;
}
- LLVM_NODISCARD ImmutableList<T> add(const T& D, ImmutableList<T> L) {
- return concat(D, L);
+ template <typename ElemT>
+ LLVM_NODISCARD ImmutableList<T> add(ElemT &&Data, ImmutableList<T> L) {
+ return concat(std::forward<ElemT>(Data), L);
+ }
+
+ template <typename ...CtorArgs>
+ LLVM_NODISCARD ImmutableList<T> emplace(ImmutableList<T> Tail,
+ CtorArgs &&...Args) {
+ return concat(T(std::forward<CtorArgs>(Args)...), Tail);
}
ImmutableList<T> getEmptyList() const {
return ImmutableList<T>(nullptr);
}
- ImmutableList<T> create(const T& X) {
- return Concat(X, getEmptyList());
+ template <typename ElemT>
+ ImmutableList<T> create(ElemT &&Data) {
+ return concat(std::forward<ElemT>(Data), getEmptyList());
}
};
diff --git a/contrib/llvm/include/llvm/ADT/IntervalMap.h b/contrib/llvm/include/llvm/ADT/IntervalMap.h
index f71366811218..2af61049e5af 100644
--- a/contrib/llvm/include/llvm/ADT/IntervalMap.h
+++ b/contrib/llvm/include/llvm/ADT/IntervalMap.h
@@ -101,6 +101,7 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/bit.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/RecyclingAllocator.h"
@@ -963,6 +964,7 @@ public:
private:
// The root data is either a RootLeaf or a RootBranchData instance.
+ LLVM_ALIGNAS(RootLeaf) LLVM_ALIGNAS(RootBranchData)
AlignedCharArrayUnion<RootLeaf, RootBranchData> data;
// Tree height.
@@ -977,15 +979,10 @@ private:
// Allocator used for creating external nodes.
Allocator &allocator;
- /// dataAs - Represent data as a node type without breaking aliasing rules.
+ /// Represent data as a node type without breaking aliasing rules.
template <typename T>
T &dataAs() const {
- union {
- const char *d;
- T *t;
- } u;
- u.d = data.buffer;
- return *u.t;
+ return *bit_cast<T *>(const_cast<char *>(data.buffer));
}
const RootLeaf &rootLeaf() const {
@@ -1137,6 +1134,19 @@ public:
I.find(x);
return I;
}
+
+ /// overlaps(a, b) - Return true if the intervals in this map overlap with the
+ /// interval [a;b].
+ bool overlaps(KeyT a, KeyT b) {
+ assert(Traits::nonEmpty(a, b));
+ const_iterator I = find(a);
+ if (!I.valid())
+ return false;
+ // [a;b] and [x;y] overlap iff x<=b and a<=y. The find() call guarantees the
+ // second part (y = find(a).stop()), so it is sufficient to check the first
+ // one.
+ return !Traits::stopLess(b, I.start());
+ }
};
/// treeSafeLookup - Return the mapped value at x or NotFound, assuming a
diff --git a/contrib/llvm/include/llvm/ADT/Optional.h b/contrib/llvm/include/llvm/ADT/Optional.h
index 353e5d0ec9df..76937d632ae1 100644
--- a/contrib/llvm/include/llvm/ADT/Optional.h
+++ b/contrib/llvm/include/llvm/ADT/Optional.h
@@ -29,7 +29,7 @@ namespace llvm {
namespace optional_detail {
/// Storage for any type.
-template <typename T, bool IsPodLike> struct OptionalStorage {
+template <typename T, bool = isPodLike<T>::value> struct OptionalStorage {
AlignedCharArrayUnion<T> storage;
bool hasVal = false;
@@ -108,28 +108,10 @@ template <typename T, bool IsPodLike> struct OptionalStorage {
}
};
-#if !defined(__GNUC__) || defined(__clang__) // GCC up to GCC7 miscompiles this.
-/// Storage for trivially copyable types only.
-template <typename T> struct OptionalStorage<T, true> {
- AlignedCharArrayUnion<T> storage;
- bool hasVal = false;
-
- OptionalStorage() = default;
-
- OptionalStorage(const T &y) : hasVal(true) { new (storage.buffer) T(y); }
- OptionalStorage &operator=(const T &y) {
- *reinterpret_cast<T *>(storage.buffer) = y;
- hasVal = true;
- return *this;
- }
-
- void reset() { hasVal = false; }
-};
-#endif
} // namespace optional_detail
template <typename T> class Optional {
- optional_detail::OptionalStorage<T, isPodLike<T>::value> Storage;
+ optional_detail::OptionalStorage<T> Storage;
public:
using value_type = T;
diff --git a/contrib/llvm/include/llvm/ADT/PointerIntPair.h b/contrib/llvm/include/llvm/ADT/PointerIntPair.h
index 884d05155bff..6d1b53a90ad2 100644
--- a/contrib/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/contrib/llvm/include/llvm/ADT/PointerIntPair.h
@@ -42,6 +42,8 @@ template <typename PointerTy, unsigned IntBits, typename IntType = unsigned,
typename PtrTraits = PointerLikeTypeTraits<PointerTy>,
typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>>
class PointerIntPair {
+ // Used by MSVC visualizer and generally helpful for debugging/visualizing.
+ using InfoTy = Info;
intptr_t Value = 0;
public:
diff --git a/contrib/llvm/include/llvm/ADT/PointerSumType.h b/contrib/llvm/include/llvm/ADT/PointerSumType.h
index e37957160d98..a19e45a46218 100644
--- a/contrib/llvm/include/llvm/ADT/PointerSumType.h
+++ b/contrib/llvm/include/llvm/ADT/PointerSumType.h
@@ -10,6 +10,7 @@
#ifndef LLVM_ADT_POINTERSUMTYPE_H
#define LLVM_ADT_POINTERSUMTYPE_H
+#include "llvm/ADT/bit.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
#include <cassert>
@@ -58,56 +59,142 @@ template <typename TagT, typename... MemberTs> struct PointerSumTypeHelper;
/// and may be desirable to set to a state that is particularly desirable to
/// default construct.
///
+/// Having a supported zero-valued tag also enables getting the address of a
+/// pointer stored with that tag provided it is stored in its natural bit
+/// representation. This works because in the case of a zero-valued tag, the
+/// pointer's value is directly stored into this object and we can expose the
+/// address of that internal storage. This is especially useful when building an
+/// `ArrayRef` of a single pointer stored in a sum type.
+///
/// There is no support for constructing or accessing with a dynamic tag as
/// that would fundamentally violate the type safety provided by the sum type.
template <typename TagT, typename... MemberTs> class PointerSumType {
- uintptr_t Value = 0;
-
using HelperT = detail::PointerSumTypeHelper<TagT, MemberTs...>;
+ // We keep both the raw value and the min tag value's pointer in a union. When
+ // the minimum tag value is zero, this allows code below to cleanly expose the
+ // address of the zero-tag pointer instead of just the zero-tag pointer
+ // itself. This is especially useful when building `ArrayRef`s out of a single
+ // pointer. However, we have to carefully access the union due to the active
+ // member potentially changing. When we *store* a new value, we directly
+ // access the union to allow us to store using the obvious types. However,
+ // when we *read* a value, we copy the underlying storage out to avoid relying
+ // on one member or the other being active.
+ union StorageT {
+ // Ensure we get a null default constructed value. We don't use a member
+ // initializer because some compilers seem to not implement those correctly
+ // for a union.
+ StorageT() : Value(0) {}
+
+ uintptr_t Value;
+
+ typename HelperT::template Lookup<HelperT::MinTag>::PointerT MinTagPointer;
+ };
+
+ StorageT Storage;
+
public:
constexpr PointerSumType() = default;
+ /// A typed setter to a given tagged member of the sum type.
+ template <TagT N>
+ void set(typename HelperT::template Lookup<N>::PointerT Pointer) {
+ void *V = HelperT::template Lookup<N>::TraitsT::getAsVoidPointer(Pointer);
+ assert((reinterpret_cast<uintptr_t>(V) & HelperT::TagMask) == 0 &&
+ "Pointer is insufficiently aligned to store the discriminant!");
+ Storage.Value = reinterpret_cast<uintptr_t>(V) | N;
+ }
+
/// A typed constructor for a specific tagged member of the sum type.
template <TagT N>
static PointerSumType
create(typename HelperT::template Lookup<N>::PointerT Pointer) {
PointerSumType Result;
- void *V = HelperT::template Lookup<N>::TraitsT::getAsVoidPointer(Pointer);
- assert((reinterpret_cast<uintptr_t>(V) & HelperT::TagMask) == 0 &&
- "Pointer is insufficiently aligned to store the discriminant!");
- Result.Value = reinterpret_cast<uintptr_t>(V) | N;
+ Result.set<N>(Pointer);
return Result;
}
- TagT getTag() const { return static_cast<TagT>(Value & HelperT::TagMask); }
+ /// Clear the value to null with the min tag type.
+ void clear() { set<HelperT::MinTag>(nullptr); }
+
+ TagT getTag() const {
+ return static_cast<TagT>(getOpaqueValue() & HelperT::TagMask);
+ }
template <TagT N> bool is() const { return N == getTag(); }
template <TagT N> typename HelperT::template Lookup<N>::PointerT get() const {
- void *P = is<N>() ? getImpl() : nullptr;
+ void *P = is<N>() ? getVoidPtr() : nullptr;
return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(P);
}
template <TagT N>
typename HelperT::template Lookup<N>::PointerT cast() const {
assert(is<N>() && "This instance has a different active member.");
- return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(getImpl());
+ return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(
+ getVoidPtr());
+ }
+
+ /// If the tag is zero and the pointer's value isn't changed when being
+ /// stored, get the address of the stored value type-punned to the zero-tag's
+ /// pointer type.
+ typename HelperT::template Lookup<HelperT::MinTag>::PointerT const *
+ getAddrOfZeroTagPointer() const {
+ return const_cast<PointerSumType *>(this)->getAddrOfZeroTagPointer();
}
- explicit operator bool() const { return Value & HelperT::PointerMask; }
- bool operator==(const PointerSumType &R) const { return Value == R.Value; }
- bool operator!=(const PointerSumType &R) const { return Value != R.Value; }
- bool operator<(const PointerSumType &R) const { return Value < R.Value; }
- bool operator>(const PointerSumType &R) const { return Value > R.Value; }
- bool operator<=(const PointerSumType &R) const { return Value <= R.Value; }
- bool operator>=(const PointerSumType &R) const { return Value >= R.Value; }
+ /// If the tag is zero and the pointer's value isn't changed when being
+ /// stored, get the address of the stored value type-punned to the zero-tag's
+ /// pointer type.
+ typename HelperT::template Lookup<HelperT::MinTag>::PointerT *
+ getAddrOfZeroTagPointer() {
+ static_assert(HelperT::MinTag == 0, "Non-zero minimum tag value!");
+ assert(is<HelperT::MinTag>() && "The active tag is not zero!");
+ // Store the initial value of the pointer when read out of our storage.
+ auto InitialPtr = get<HelperT::MinTag>();
+ // Now update the active member of the union to be the actual pointer-typed
+ // member so that accessing it indirectly through the returned address is
+ // valid.
+ Storage.MinTagPointer = InitialPtr;
+ // Finally, validate that this was a no-op as expected by reading it back
+ // out using the same underlying-storage read as above.
+ assert(InitialPtr == get<HelperT::MinTag>() &&
+ "Switching to typed storage changed the pointer returned!");
+ // Now we can correctly return an address to typed storage.
+ return &Storage.MinTagPointer;
+ }
+
+ explicit operator bool() const {
+ return getOpaqueValue() & HelperT::PointerMask;
+ }
+ bool operator==(const PointerSumType &R) const {
+ return getOpaqueValue() == R.getOpaqueValue();
+ }
+ bool operator!=(const PointerSumType &R) const {
+ return getOpaqueValue() != R.getOpaqueValue();
+ }
+ bool operator<(const PointerSumType &R) const {
+ return getOpaqueValue() < R.getOpaqueValue();
+ }
+ bool operator>(const PointerSumType &R) const {
+ return getOpaqueValue() > R.getOpaqueValue();
+ }
+ bool operator<=(const PointerSumType &R) const {
+ return getOpaqueValue() <= R.getOpaqueValue();
+ }
+ bool operator>=(const PointerSumType &R) const {
+ return getOpaqueValue() >= R.getOpaqueValue();
+ }
- uintptr_t getOpaqueValue() const { return Value; }
+ uintptr_t getOpaqueValue() const {
+ // Read the underlying storage of the union, regardless of the active
+ // member.
+ return bit_cast<uintptr_t>(Storage);
+ }
protected:
- void *getImpl() const {
- return reinterpret_cast<void *>(Value & HelperT::PointerMask);
+ void *getVoidPtr() const {
+ return reinterpret_cast<void *>(getOpaqueValue() & HelperT::PointerMask);
}
};
@@ -151,8 +238,9 @@ struct PointerSumTypeHelper : MemberTs... {
enum { NumTagBits = Min<MemberTs::TraitsT::NumLowBitsAvailable...>::value };
// Also compute the smallest discriminant and various masks for convenience.
+ constexpr static TagT MinTag =
+ static_cast<TagT>(Min<MemberTs::Tag...>::value);
enum : uint64_t {
- MinTag = Min<MemberTs::Tag...>::value,
PointerMask = static_cast<uint64_t>(-1) << NumTagBits,
TagMask = ~PointerMask
};
diff --git a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
index dc8a9b6e78b2..d77b12228cb1 100644
--- a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -296,12 +296,15 @@ class ReversePostOrderTraversal {
public:
using rpo_iterator = typename std::vector<NodeRef>::reverse_iterator;
+ using const_rpo_iterator = typename std::vector<NodeRef>::const_reverse_iterator;
ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); }
// Because we want a reverse post order, use reverse iterators from the vector
rpo_iterator begin() { return Blocks.rbegin(); }
+ const_rpo_iterator begin() const { return Blocks.crbegin(); }
rpo_iterator end() { return Blocks.rend(); }
+ const_rpo_iterator end() const { return Blocks.crend(); }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/ADT/STLExtras.h b/contrib/llvm/include/llvm/ADT/STLExtras.h
index 94365dd9ced1..f66ca7c08a73 100644
--- a/contrib/llvm/include/llvm/ADT/STLExtras.h
+++ b/contrib/llvm/include/llvm/ADT/STLExtras.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Config/abi-breaking.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
@@ -70,6 +71,16 @@ template <typename B1, typename... Bn>
struct conjunction<B1, Bn...>
: std::conditional<bool(B1::value), conjunction<Bn...>, B1>::type {};
+template <typename T> struct make_const_ptr {
+ using type =
+ typename std::add_pointer<typename std::add_const<T>::type>::type;
+};
+
+template <typename T> struct make_const_ref {
+ using type = typename std::add_lvalue_reference<
+ typename std::add_const<T>::type>::type;
+};
+
//===----------------------------------------------------------------------===//
// Extra additions to <functional>
//===----------------------------------------------------------------------===//
@@ -194,6 +205,12 @@ void adl_swap(T &&lhs, T &&rhs) noexcept(
adl_detail::adl_swap(std::forward<T>(lhs), std::forward<T>(rhs));
}
+/// Test whether \p RangeOrContainer is empty. Similar to C++17 std::empty.
+template <typename T>
+constexpr bool empty(const T &RangeOrContainer) {
+ return adl_begin(RangeOrContainer) == adl_end(RangeOrContainer);
+}
+
// mapped_iterator - This is a simple iterator adapter that causes a function to
// be applied whenever operator* is invoked on the iterator.
@@ -418,9 +435,94 @@ make_filter_range(RangeT &&Range, PredicateT Pred) {
std::end(std::forward<RangeT>(Range)), Pred));
}
-// forward declarations required by zip_shortest/zip_first
+/// A pseudo-iterator adaptor that is designed to implement "early increment"
+/// style loops.
+///
+/// This is *not a normal iterator* and should almost never be used directly. It
+/// is intended primarily to be used with range based for loops and some range
+/// algorithms.
+///
+/// The iterator isn't quite an `OutputIterator` or an `InputIterator` but
+/// somewhere between them. The constraints of these iterators are:
+///
+/// - On construction or after being incremented, it is comparable and
+/// dereferencable. It is *not* incrementable.
+/// - After being dereferenced, it is neither comparable nor dereferencable, it
+/// is only incrementable.
+///
+/// This means you can only dereference the iterator once, and you can only
+/// increment it once between dereferences.
+template <typename WrappedIteratorT>
+class early_inc_iterator_impl
+ : public iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
+ WrappedIteratorT, std::input_iterator_tag> {
+ using BaseT =
+ iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
+ WrappedIteratorT, std::input_iterator_tag>;
+
+ using PointerT = typename std::iterator_traits<WrappedIteratorT>::pointer;
+
+protected:
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ bool IsEarlyIncremented = false;
+#endif
+
+public:
+ early_inc_iterator_impl(WrappedIteratorT I) : BaseT(I) {}
+
+ using BaseT::operator*;
+ typename BaseT::reference operator*() {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ assert(!IsEarlyIncremented && "Cannot dereference twice!");
+ IsEarlyIncremented = true;
+#endif
+ return *(this->I)++;
+ }
+
+ using BaseT::operator++;
+ early_inc_iterator_impl &operator++() {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ assert(IsEarlyIncremented && "Cannot increment before dereferencing!");
+ IsEarlyIncremented = false;
+#endif
+ return *this;
+ }
+
+ using BaseT::operator==;
+ bool operator==(const early_inc_iterator_impl &RHS) const {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ assert(!IsEarlyIncremented && "Cannot compare after dereferencing!");
+#endif
+ return BaseT::operator==(RHS);
+ }
+};
+
+/// Make a range that does early increment to allow mutation of the underlying
+/// range without disrupting iteration.
+///
+/// The underlying iterator will be incremented immediately after it is
+/// dereferenced, allowing deletion of the current node or insertion of nodes to
+/// not disrupt iteration provided they do not invalidate the *next* iterator --
+/// the current iterator can be invalidated.
+///
+/// This requires a very exact pattern of use that is only really suitable to
+/// range based for loops and other range algorithms that explicitly guarantee
+/// to dereference exactly once each element, and to increment exactly once each
+/// element.
+template <typename RangeT>
+iterator_range<early_inc_iterator_impl<detail::IterOfRange<RangeT>>>
+make_early_inc_range(RangeT &&Range) {
+ using EarlyIncIteratorT =
+ early_inc_iterator_impl<detail::IterOfRange<RangeT>>;
+ return make_range(EarlyIncIteratorT(std::begin(std::forward<RangeT>(Range))),
+ EarlyIncIteratorT(std::end(std::forward<RangeT>(Range))));
+}
+
+// forward declarations required by zip_shortest/zip_first/zip_longest
template <typename R, typename UnaryPredicate>
bool all_of(R &&range, UnaryPredicate P);
+template <typename R, typename UnaryPredicate>
+bool any_of(R &&range, UnaryPredicate P);
template <size_t... I> struct index_sequence;
@@ -571,6 +673,132 @@ detail::zippy<detail::zip_first, T, U, Args...> zip_first(T &&t, U &&u,
std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...);
}
+namespace detail {
+template <typename Iter>
+static Iter next_or_end(const Iter &I, const Iter &End) {
+ if (I == End)
+ return End;
+ return std::next(I);
+}
+
+template <typename Iter>
+static auto deref_or_none(const Iter &I, const Iter &End)
+ -> llvm::Optional<typename std::remove_const<
+ typename std::remove_reference<decltype(*I)>::type>::type> {
+ if (I == End)
+ return None;
+ return *I;
+}
+
+template <typename Iter> struct ZipLongestItemType {
+ using type =
+ llvm::Optional<typename std::remove_const<typename std::remove_reference<
+ decltype(*std::declval<Iter>())>::type>::type>;
+};
+
+template <typename... Iters> struct ZipLongestTupleType {
+ using type = std::tuple<typename ZipLongestItemType<Iters>::type...>;
+};
+
+template <typename... Iters>
+class zip_longest_iterator
+ : public iterator_facade_base<
+ zip_longest_iterator<Iters...>,
+ typename std::common_type<
+ std::forward_iterator_tag,
+ typename std::iterator_traits<Iters>::iterator_category...>::type,
+ typename ZipLongestTupleType<Iters...>::type,
+ typename std::iterator_traits<typename std::tuple_element<
+ 0, std::tuple<Iters...>>::type>::difference_type,
+ typename ZipLongestTupleType<Iters...>::type *,
+ typename ZipLongestTupleType<Iters...>::type> {
+public:
+ using value_type = typename ZipLongestTupleType<Iters...>::type;
+
+private:
+ std::tuple<Iters...> iterators;
+ std::tuple<Iters...> end_iterators;
+
+ template <size_t... Ns>
+ bool test(const zip_longest_iterator<Iters...> &other,
+ index_sequence<Ns...>) const {
+ return llvm::any_of(
+ std::initializer_list<bool>{std::get<Ns>(this->iterators) !=
+ std::get<Ns>(other.iterators)...},
+ identity<bool>{});
+ }
+
+ template <size_t... Ns> value_type deref(index_sequence<Ns...>) const {
+ return value_type(
+ deref_or_none(std::get<Ns>(iterators), std::get<Ns>(end_iterators))...);
+ }
+
+ template <size_t... Ns>
+ decltype(iterators) tup_inc(index_sequence<Ns...>) const {
+ return std::tuple<Iters...>(
+ next_or_end(std::get<Ns>(iterators), std::get<Ns>(end_iterators))...);
+ }
+
+public:
+ zip_longest_iterator(std::pair<Iters &&, Iters &&>... ts)
+ : iterators(std::forward<Iters>(ts.first)...),
+ end_iterators(std::forward<Iters>(ts.second)...) {}
+
+ value_type operator*() { return deref(index_sequence_for<Iters...>{}); }
+
+ value_type operator*() const { return deref(index_sequence_for<Iters...>{}); }
+
+ zip_longest_iterator<Iters...> &operator++() {
+ iterators = tup_inc(index_sequence_for<Iters...>{});
+ return *this;
+ }
+
+ bool operator==(const zip_longest_iterator<Iters...> &other) const {
+ return !test(other, index_sequence_for<Iters...>{});
+ }
+};
+
+template <typename... Args> class zip_longest_range {
+public:
+ using iterator =
+ zip_longest_iterator<decltype(adl_begin(std::declval<Args>()))...>;
+ using iterator_category = typename iterator::iterator_category;
+ using value_type = typename iterator::value_type;
+ using difference_type = typename iterator::difference_type;
+ using pointer = typename iterator::pointer;
+ using reference = typename iterator::reference;
+
+private:
+ std::tuple<Args...> ts;
+
+ template <size_t... Ns> iterator begin_impl(index_sequence<Ns...>) const {
+ return iterator(std::make_pair(adl_begin(std::get<Ns>(ts)),
+ adl_end(std::get<Ns>(ts)))...);
+ }
+
+ template <size_t... Ns> iterator end_impl(index_sequence<Ns...>) const {
+ return iterator(std::make_pair(adl_end(std::get<Ns>(ts)),
+ adl_end(std::get<Ns>(ts)))...);
+ }
+
+public:
+ zip_longest_range(Args &&... ts_) : ts(std::forward<Args>(ts_)...) {}
+
+ iterator begin() const { return begin_impl(index_sequence_for<Args...>{}); }
+ iterator end() const { return end_impl(index_sequence_for<Args...>{}); }
+};
+} // namespace detail
+
+/// Iterate over two or more iterators at the same time. Iteration continues
+/// until all iterators reach the end. The llvm::Optional only contains a value
+/// if the iterator has not reached the end.
+template <typename T, typename U, typename... Args>
+detail::zip_longest_range<T, U, Args...> zip_longest(T &&t, U &&u,
+ Args &&... args) {
+ return detail::zip_longest_range<T, U, Args...>(
+ std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...);
+}
+
/// Iterator wrapper that concatenates sequences together.
///
/// This can concatenate different iterators, even with different types, into
@@ -593,18 +821,20 @@ class concat_iterator
/// Note that something like iterator_range seems nice at first here, but the
/// range properties are of little benefit and end up getting in the way
/// because we need to do mutation on the current iterators.
- std::tuple<std::pair<IterTs, IterTs>...> IterPairs;
+ std::tuple<IterTs...> Begins;
+ std::tuple<IterTs...> Ends;
/// Attempts to increment a specific iterator.
///
/// Returns true if it was able to increment the iterator. Returns false if
/// the iterator is already at the end iterator.
template <size_t Index> bool incrementHelper() {
- auto &IterPair = std::get<Index>(IterPairs);
- if (IterPair.first == IterPair.second)
+ auto &Begin = std::get<Index>(Begins);
+ auto &End = std::get<Index>(Ends);
+ if (Begin == End)
return false;
- ++IterPair.first;
+ ++Begin;
return true;
}
@@ -628,11 +858,12 @@ class concat_iterator
/// dereferences the iterator and returns the address of the resulting
/// reference.
template <size_t Index> ValueT *getHelper() const {
- auto &IterPair = std::get<Index>(IterPairs);
- if (IterPair.first == IterPair.second)
+ auto &Begin = std::get<Index>(Begins);
+ auto &End = std::get<Index>(Ends);
+ if (Begin == End)
return nullptr;
- return &*IterPair.first;
+ return &*Begin;
}
/// Finds the first non-end iterator, dereferences, and returns the resulting
@@ -659,7 +890,7 @@ public:
/// iterators.
template <typename... RangeTs>
explicit concat_iterator(RangeTs &&... Ranges)
- : IterPairs({std::begin(Ranges), std::end(Ranges)}...) {}
+ : Begins(std::begin(Ranges)...), Ends(std::end(Ranges)...) {}
using BaseT::operator++;
@@ -671,7 +902,7 @@ public:
ValueT &operator*() const { return get(index_sequence_for<IterTs...>()); }
bool operator==(const concat_iterator &RHS) const {
- return IterPairs == RHS.IterPairs;
+ return Begins == RHS.Begins && Ends == RHS.Ends;
}
};
@@ -740,6 +971,19 @@ struct less_second {
}
};
+/// \brief Function object to apply a binary function to the first component of
+/// a std::pair.
+template<typename FuncTy>
+struct on_first {
+ FuncTy func;
+
+ template <typename T>
+ auto operator()(const T &lhs, const T &rhs) const
+ -> decltype(func(lhs.first, rhs.first)) {
+ return func(lhs.first, rhs.first);
+ }
+};
+
// A subset of N3658. More stuff can be added as-needed.
/// Represents a compile-time sequence of integers.
@@ -877,6 +1121,10 @@ inline void sort(IteratorTy Start, IteratorTy End) {
std::sort(Start, End);
}
+template <typename Container> inline void sort(Container &&C) {
+ llvm::sort(adl_begin(C), adl_end(C));
+}
+
template <typename IteratorTy, typename Compare>
inline void sort(IteratorTy Start, IteratorTy End, Compare Comp) {
#ifdef EXPENSIVE_CHECKS
@@ -886,6 +1134,11 @@ inline void sort(IteratorTy Start, IteratorTy End, Compare Comp) {
std::sort(Start, End, Comp);
}
+template <typename Container, typename Compare>
+inline void sort(Container &&C, Compare Comp) {
+ llvm::sort(adl_begin(C), adl_end(C), Comp);
+}
+
//===----------------------------------------------------------------------===//
// Extra additions to <algorithm>
//===----------------------------------------------------------------------===//
@@ -908,6 +1161,18 @@ void DeleteContainerSeconds(Container &C) {
C.clear();
}
+/// Get the size of a range. This is a wrapper function around std::distance
+/// which is only enabled when the operation is O(1).
+template <typename R>
+auto size(R &&Range, typename std::enable_if<
+ std::is_same<typename std::iterator_traits<decltype(
+ Range.begin())>::iterator_category,
+ std::random_access_iterator_tag>::value,
+ void>::type * = nullptr)
+ -> decltype(std::distance(Range.begin(), Range.end())) {
+ return std::distance(Range.begin(), Range.end());
+}
+
/// Provide wrappers to std::for_each which take ranges instead of having to
/// pass begin/end explicitly.
template <typename R, typename UnaryPredicate>
@@ -1018,6 +1283,33 @@ auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range)) {
return std::lower_bound(adl_begin(Range), adl_end(Range), I);
}
+template <typename R, typename ForwardIt, typename Compare>
+auto lower_bound(R &&Range, ForwardIt I, Compare C)
+ -> decltype(adl_begin(Range)) {
+ return std::lower_bound(adl_begin(Range), adl_end(Range), I, C);
+}
+
+/// Provide wrappers to std::upper_bound which take ranges instead of having to
+/// pass begin/end explicitly.
+template <typename R, typename ForwardIt>
+auto upper_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range)) {
+ return std::upper_bound(adl_begin(Range), adl_end(Range), I);
+}
+
+template <typename R, typename ForwardIt, typename Compare>
+auto upper_bound(R &&Range, ForwardIt I, Compare C)
+ -> decltype(adl_begin(Range)) {
+ return std::upper_bound(adl_begin(Range), adl_end(Range), I, C);
+}
+/// Wrapper function around std::equal to detect if all elements
+/// in a container are same.
+template <typename R>
+bool is_splat(R &&Range) {
+ size_t range_size = size(Range);
+ return range_size != 0 && (range_size == 1 ||
+ std::equal(adl_begin(Range) + 1, adl_end(Range), adl_begin(Range)));
+}
+
/// Given a range of type R, iterate the entire range and return a
/// SmallVector with elements of the vector. This is useful, for example,
/// when you want to iterate a range and then sort the results.
@@ -1039,18 +1331,6 @@ void erase_if(Container &C, UnaryPredicate P) {
C.erase(remove_if(C, P), C.end());
}
-/// Get the size of a range. This is a wrapper function around std::distance
-/// which is only enabled when the operation is O(1).
-template <typename R>
-auto size(R &&Range, typename std::enable_if<
- std::is_same<typename std::iterator_traits<decltype(
- Range.begin())>::iterator_category,
- std::random_access_iterator_tag>::value,
- void>::type * = nullptr)
- -> decltype(std::distance(Range.begin(), Range.end())) {
- return std::distance(Range.begin(), Range.end());
-}
-
//===----------------------------------------------------------------------===//
// Extra additions to <memory>
//===----------------------------------------------------------------------===//
@@ -1263,6 +1543,40 @@ auto apply_tuple(F &&f, Tuple &&t) -> decltype(detail::apply_tuple_impl(
Indices{});
}
+/// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N)
+/// time. Not meant for use with random-access iterators.
+template <typename IterTy>
+bool hasNItems(
+ IterTy &&Begin, IterTy &&End, unsigned N,
+ typename std::enable_if<
+ !std::is_same<
+ typename std::iterator_traits<typename std::remove_reference<
+ decltype(Begin)>::type>::iterator_category,
+ std::random_access_iterator_tag>::value,
+ void>::type * = nullptr) {
+ for (; N; --N, ++Begin)
+ if (Begin == End)
+ return false; // Too few.
+ return Begin == End;
+}
+
+/// Return true if the sequence [Begin, End) has N or more items. Runs in O(N)
+/// time. Not meant for use with random-access iterators.
+template <typename IterTy>
+bool hasNItemsOrMore(
+ IterTy &&Begin, IterTy &&End, unsigned N,
+ typename std::enable_if<
+ !std::is_same<
+ typename std::iterator_traits<typename std::remove_reference<
+ decltype(Begin)>::type>::iterator_category,
+ std::random_access_iterator_tag>::value,
+ void>::type * = nullptr) {
+ for (; N; --N, ++Begin)
+ if (Begin == End)
+ return false; // Too few.
+ return true;
+}
+
} // end namespace llvm
#endif // LLVM_ADT_STLEXTRAS_H
diff --git a/contrib/llvm/include/llvm/ADT/SmallBitVector.h b/contrib/llvm/include/llvm/ADT/SmallBitVector.h
index b6391746639b..0a73dbd60671 100644
--- a/contrib/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallBitVector.h
@@ -92,10 +92,6 @@ public:
};
private:
- bool isSmall() const {
- return X & uintptr_t(1);
- }
-
BitVector *getPointer() const {
assert(!isSmall());
return reinterpret_cast<BitVector *>(X);
@@ -186,6 +182,8 @@ public:
return make_range(set_bits_begin(), set_bits_end());
}
+ bool isSmall() const { return X & uintptr_t(1); }
+
/// Tests whether there are no bits in this bitvector.
bool empty() const {
return isSmall() ? getSmallSize() == 0 : getPointer()->empty();
@@ -242,7 +240,7 @@ public:
uintptr_t Bits = getSmallBits();
if (Bits == 0)
return -1;
- return NumBaseBits - countLeadingZeros(Bits);
+ return NumBaseBits - countLeadingZeros(Bits) - 1;
}
return getPointer()->find_last();
}
@@ -265,7 +263,9 @@ public:
return -1;
uintptr_t Bits = getSmallBits();
- return NumBaseBits - countLeadingOnes(Bits);
+ // Set unused bits.
+ Bits |= ~uintptr_t(0) << getSmallSize();
+ return NumBaseBits - countLeadingOnes(Bits) - 1;
}
return getPointer()->find_last_unset();
}
@@ -465,6 +465,11 @@ public:
return (*this)[Idx];
}
+ // Push single bit to end of vector.
+ void push_back(bool Val) {
+ resize(size() + 1, Val);
+ }
+
/// Test if any common bits are set.
bool anyCommon(const SmallBitVector &RHS) const {
if (isSmall() && RHS.isSmall())
@@ -482,10 +487,17 @@ public:
bool operator==(const SmallBitVector &RHS) const {
if (size() != RHS.size())
return false;
- if (isSmall())
+ if (isSmall() && RHS.isSmall())
return getSmallBits() == RHS.getSmallBits();
- else
+ else if (!isSmall() && !RHS.isSmall())
return *getPointer() == *RHS.getPointer();
+ else {
+ for (size_t i = 0, e = size(); i != e; ++i) {
+ if ((*this)[i] != RHS[i])
+ return false;
+ }
+ return true;
+ }
}
bool operator!=(const SmallBitVector &RHS) const {
@@ -493,16 +505,19 @@ public:
}
// Intersection, union, disjoint union.
+ // FIXME BitVector::operator&= does not resize the LHS but this does
SmallBitVector &operator&=(const SmallBitVector &RHS) {
resize(std::max(size(), RHS.size()));
- if (isSmall())
+ if (isSmall() && RHS.isSmall())
setSmallBits(getSmallBits() & RHS.getSmallBits());
- else if (!RHS.isSmall())
+ else if (!isSmall() && !RHS.isSmall())
getPointer()->operator&=(*RHS.getPointer());
else {
- SmallBitVector Copy = RHS;
- Copy.resize(size());
- getPointer()->operator&=(*Copy.getPointer());
+ size_t i, e;
+ for (i = 0, e = std::min(size(), RHS.size()); i != e; ++i)
+ (*this)[i] = test(i) && RHS.test(i);
+ for (e = size(); i != e; ++i)
+ reset(i);
}
return *this;
}
@@ -542,28 +557,26 @@ public:
SmallBitVector &operator|=(const SmallBitVector &RHS) {
resize(std::max(size(), RHS.size()));
- if (isSmall())
+ if (isSmall() && RHS.isSmall())
setSmallBits(getSmallBits() | RHS.getSmallBits());
- else if (!RHS.isSmall())
+ else if (!isSmall() && !RHS.isSmall())
getPointer()->operator|=(*RHS.getPointer());
else {
- SmallBitVector Copy = RHS;
- Copy.resize(size());
- getPointer()->operator|=(*Copy.getPointer());
+ for (size_t i = 0, e = RHS.size(); i != e; ++i)
+ (*this)[i] = test(i) || RHS.test(i);
}
return *this;
}
SmallBitVector &operator^=(const SmallBitVector &RHS) {
resize(std::max(size(), RHS.size()));
- if (isSmall())
+ if (isSmall() && RHS.isSmall())
setSmallBits(getSmallBits() ^ RHS.getSmallBits());
- else if (!RHS.isSmall())
+ else if (!isSmall() && !RHS.isSmall())
getPointer()->operator^=(*RHS.getPointer());
else {
- SmallBitVector Copy = RHS;
- Copy.resize(size());
- getPointer()->operator^=(*Copy.getPointer());
+ for (size_t i = 0, e = RHS.size(); i != e; ++i)
+ (*this)[i] = test(i) != RHS.test(i);
}
return *this;
}
diff --git a/contrib/llvm/include/llvm/ADT/SmallVector.h b/contrib/llvm/include/llvm/ADT/SmallVector.h
index acb4426b4f45..0636abbb1fbf 100644
--- a/contrib/llvm/include/llvm/ADT/SmallVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallVector.h
@@ -182,7 +182,7 @@ public:
/// SmallVectorTemplateBase<isPodLike = false> - This is where we put method
/// implementations that are designed to work with non-POD-like T's.
-template <typename T, bool isPodLike>
+template <typename T, bool = isPodLike<T>::value>
class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
protected:
SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
@@ -299,7 +299,7 @@ protected:
// use memcpy here. Note that I and E are iterators and thus might be
// invalid for memcpy if they are equal.
if (I != E)
- memcpy(Dest, I, (E - I) * sizeof(T));
+ memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
}
/// Double the size of the allocated memory, guaranteeing space for at
@@ -310,7 +310,7 @@ public:
void push_back(const T &Elt) {
if (LLVM_UNLIKELY(this->size() >= this->capacity()))
this->grow();
- memcpy(this->end(), &Elt, sizeof(T));
+ memcpy(reinterpret_cast<void *>(this->end()), &Elt, sizeof(T));
this->set_size(this->size() + 1);
}
@@ -320,8 +320,8 @@ public:
/// This class consists of common code factored out of the SmallVector class to
/// reduce code duplication based on the SmallVector 'N' template parameter.
template <typename T>
-class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
- using SuperClass = SmallVectorTemplateBase<T, isPodLike<T>::value>;
+class SmallVectorImpl : public SmallVectorTemplateBase<T> {
+ using SuperClass = SmallVectorTemplateBase<T>;
public:
using iterator = typename SuperClass::iterator;
diff --git a/contrib/llvm/include/llvm/ADT/SparseBitVector.h b/contrib/llvm/include/llvm/ADT/SparseBitVector.h
index 4cbf40c76805..84e73bcbace8 100644
--- a/contrib/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/contrib/llvm/include/llvm/ADT/SparseBitVector.h
@@ -261,21 +261,33 @@ class SparseBitVector {
BITWORD_SIZE = SparseBitVectorElement<ElementSize>::BITWORD_SIZE
};
- // Pointer to our current Element.
- ElementListIter CurrElementIter;
ElementList Elements;
+ // Pointer to our current Element. This has no visible effect on the external
+ // state of a SparseBitVector, it's just used to improve performance in the
+ // common case of testing/modifying bits with similar indices.
+ mutable ElementListIter CurrElementIter;
// This is like std::lower_bound, except we do linear searching from the
// current position.
- ElementListIter FindLowerBound(unsigned ElementIndex) {
+ ElementListIter FindLowerBoundImpl(unsigned ElementIndex) const {
+
+ // We cache a non-const iterator so we're forced to resort to const_cast to
+ // get the begin/end in the case where 'this' is const. To avoid duplication
+ // of code with the only difference being whether the const cast is present
+ // 'this' is always const in this particular function and we sort out the
+ // difference in FindLowerBound and FindLowerBoundConst.
+ ElementListIter Begin =
+ const_cast<SparseBitVector<ElementSize> *>(this)->Elements.begin();
+ ElementListIter End =
+ const_cast<SparseBitVector<ElementSize> *>(this)->Elements.end();
if (Elements.empty()) {
- CurrElementIter = Elements.begin();
- return Elements.begin();
+ CurrElementIter = Begin;
+ return CurrElementIter;
}
// Make sure our current iterator is valid.
- if (CurrElementIter == Elements.end())
+ if (CurrElementIter == End)
--CurrElementIter;
// Search from our current iterator, either backwards or forwards,
@@ -284,17 +296,23 @@ class SparseBitVector {
if (CurrElementIter->index() == ElementIndex) {
return ElementIter;
} else if (CurrElementIter->index() > ElementIndex) {
- while (ElementIter != Elements.begin()
+ while (ElementIter != Begin
&& ElementIter->index() > ElementIndex)
--ElementIter;
} else {
- while (ElementIter != Elements.end() &&
+ while (ElementIter != End &&
ElementIter->index() < ElementIndex)
++ElementIter;
}
CurrElementIter = ElementIter;
return ElementIter;
}
+ ElementListConstIter FindLowerBoundConst(unsigned ElementIndex) const {
+ return FindLowerBoundImpl(ElementIndex);
+ }
+ ElementListIter FindLowerBound(unsigned ElementIndex) {
+ return FindLowerBoundImpl(ElementIndex);
+ }
// Iterator to walk set bits in the bitmap. This iterator is a lot uglier
// than it would be, in order to be efficient.
@@ -423,22 +441,12 @@ class SparseBitVector {
public:
using iterator = SparseBitVectorIterator;
- SparseBitVector() {
- CurrElementIter = Elements.begin();
- }
+ SparseBitVector() : Elements(), CurrElementIter(Elements.begin()) {}
- // SparseBitVector copy ctor.
- SparseBitVector(const SparseBitVector &RHS) {
- ElementListConstIter ElementIter = RHS.Elements.begin();
- while (ElementIter != RHS.Elements.end()) {
- Elements.push_back(SparseBitVectorElement<ElementSize>(*ElementIter));
- ++ElementIter;
- }
-
- CurrElementIter = Elements.begin ();
- }
-
- ~SparseBitVector() = default;
+ SparseBitVector(const SparseBitVector &RHS)
+ : Elements(RHS.Elements), CurrElementIter(Elements.begin()) {}
+ SparseBitVector(SparseBitVector &&RHS)
+ : Elements(std::move(RHS.Elements)), CurrElementIter(Elements.begin()) {}
// Clear.
void clear() {
@@ -450,26 +458,23 @@ public:
if (this == &RHS)
return *this;
- Elements.clear();
-
- ElementListConstIter ElementIter = RHS.Elements.begin();
- while (ElementIter != RHS.Elements.end()) {
- Elements.push_back(SparseBitVectorElement<ElementSize>(*ElementIter));
- ++ElementIter;
- }
-
- CurrElementIter = Elements.begin ();
-
+ Elements = RHS.Elements;
+ CurrElementIter = Elements.begin();
+ return *this;
+ }
+ SparseBitVector &operator=(SparseBitVector &&RHS) {
+ Elements = std::move(RHS.Elements);
+ CurrElementIter = Elements.begin();
return *this;
}
// Test, Reset, and Set a bit in the bitmap.
- bool test(unsigned Idx) {
+ bool test(unsigned Idx) const {
if (Elements.empty())
return false;
unsigned ElementIndex = Idx / ElementSize;
- ElementListIter ElementIter = FindLowerBound(ElementIndex);
+ ElementListConstIter ElementIter = FindLowerBoundConst(ElementIndex);
// If we can't find an element that is supposed to contain this bit, there
// is nothing more to do.
diff --git a/contrib/llvm/include/llvm/ADT/StringExtras.h b/contrib/llvm/include/llvm/ADT/StringExtras.h
index 71b0e7527cb7..60a03633a8a6 100644
--- a/contrib/llvm/include/llvm/ADT/StringExtras.h
+++ b/contrib/llvm/include/llvm/ADT/StringExtras.h
@@ -139,22 +139,23 @@ inline std::string utohexstr(uint64_t X, bool LowerCase = false) {
/// Convert buffer \p Input to its hexadecimal representation.
/// The returned string is double the size of \p Input.
-inline std::string toHex(StringRef Input) {
+inline std::string toHex(StringRef Input, bool LowerCase = false) {
static const char *const LUT = "0123456789ABCDEF";
+ const uint8_t Offset = LowerCase ? 32 : 0;
size_t Length = Input.size();
std::string Output;
Output.reserve(2 * Length);
for (size_t i = 0; i < Length; ++i) {
const unsigned char c = Input[i];
- Output.push_back(LUT[c >> 4]);
- Output.push_back(LUT[c & 15]);
+ Output.push_back(LUT[c >> 4] | Offset);
+ Output.push_back(LUT[c & 15] | Offset);
}
return Output;
}
-inline std::string toHex(ArrayRef<uint8_t> Input) {
- return toHex(toStringRef(Input));
+inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) {
+ return toHex(toStringRef(Input), LowerCase);
}
inline uint8_t hexFromNibbles(char MSB, char LSB) {
diff --git a/contrib/llvm/include/llvm/ADT/Triple.h b/contrib/llvm/include/llvm/ADT/Triple.h
index c95b16dd4e8c..e06a68e27317 100644
--- a/contrib/llvm/include/llvm/ADT/Triple.h
+++ b/contrib/llvm/include/llvm/ADT/Triple.h
@@ -55,12 +55,11 @@ public:
bpfel, // eBPF or extended BPF or 64-bit BPF (little endian)
bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian)
hexagon, // Hexagon: hexagon
- mips, // MIPS: mips, mipsallegrex
- mipsel, // MIPSEL: mipsel, mipsallegrexel
- mips64, // MIPS64: mips64
- mips64el, // MIPS64EL: mips64el
+ mips, // MIPS: mips, mipsallegrex, mipsr6
+ mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el
+ mips64, // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6
+ mips64el, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el
msp430, // MSP430: msp430
- nios2, // NIOSII: nios2
ppc, // PPC: powerpc
ppc64, // PPC64: powerpc64, ppu
ppc64le, // PPC64LE: powerpc64le
@@ -101,6 +100,7 @@ public:
enum SubArchType {
NoSubArch,
+ ARMSubArch_v8_5a,
ARMSubArch_v8_4a,
ARMSubArch_v8_3a,
ARMSubArch_v8_2a,
@@ -125,7 +125,9 @@ public:
KalimbaSubArch_v3,
KalimbaSubArch_v4,
- KalimbaSubArch_v5
+ KalimbaSubArch_v5,
+
+ MipsSubArch_r6
};
enum VendorType {
UnknownVendor,
@@ -182,7 +184,10 @@ public:
Mesa3D,
Contiki,
AMDPAL, // AMD PAL Runtime
- LastOSType = AMDPAL
+ HermitCore, // HermitCore Unikernel/Multikernel
+ Hurd, // GNU/Hurd
+ WASI, // Experimental WebAssembly OS
+ LastOSType = WASI
};
enum EnvironmentType {
UnknownEnvironment,
@@ -578,9 +583,20 @@ public:
return getOS() == Triple::KFreeBSD;
}
+ /// Tests whether the OS is Hurd.
+ bool isOSHurd() const {
+ return getOS() == Triple::Hurd;
+ }
+
+ /// Tests whether the OS is WASI.
+ bool isOSWASI() const {
+ return getOS() == Triple::WASI;
+ }
+
/// Tests whether the OS uses glibc.
bool isOSGlibc() const {
- return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD) &&
+ return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD ||
+ getOS() == Triple::Hurd) &&
!isAndroid();
}
diff --git a/contrib/llvm/include/llvm/ADT/bit.h b/contrib/llvm/include/llvm/ADT/bit.h
new file mode 100644
index 000000000000..a4aba7b6a9ee
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/bit.h
@@ -0,0 +1,59 @@
+//===-- llvm/ADT/bit.h - C++20 <bit> ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C++20 <bit> header.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_BIT_H
+#define LLVM_ADT_BIT_H
+
+#include "llvm/Support/Compiler.h"
+#include <cstring>
+#include <type_traits>
+
+namespace llvm {
+
+// This implementation of bit_cast is different from the C++17 one in two ways:
+// - It isn't constexpr because that requires compiler support.
+// - It requires trivially-constructible To, to avoid UB in the implementation.
+template <typename To, typename From
+ , typename = typename std::enable_if<sizeof(To) == sizeof(From)>::type
+#if (__has_feature(is_trivially_constructible) && defined(_LIBCPP_VERSION)) || \
+ (defined(__GNUC__) && __GNUC__ >= 5)
+ , typename = typename std::is_trivially_constructible<To>::type
+#elif __has_feature(is_trivially_constructible)
+ , typename = typename std::enable_if<__is_trivially_constructible(To)>::type
+#else
+ // See comment below.
+#endif
+#if (__has_feature(is_trivially_copyable) && defined(_LIBCPP_VERSION)) || \
+ (defined(__GNUC__) && __GNUC__ >= 5)
+ , typename = typename std::enable_if<std::is_trivially_copyable<To>::value>::type
+ , typename = typename std::enable_if<std::is_trivially_copyable<From>::value>::type
+#elif __has_feature(is_trivially_copyable)
+ , typename = typename std::enable_if<__is_trivially_copyable(To)>::type
+ , typename = typename std::enable_if<__is_trivially_copyable(From)>::type
+#else
+ // This case is GCC 4.x. clang with libc++ or libstdc++ never get here. Unlike
+ // llvm/Support/type_traits.h's isPodLike we don't want to provide a
+ // good-enough answer here: developers in that configuration will hit
+ // compilation failures on the bots instead of locally. That's acceptable
+ // because it's very few developers, and only until we move past C++11.
+#endif
+>
+inline To bit_cast(const From &from) noexcept {
+ To to;
+ std::memcpy(&to, &from, sizeof(To));
+ return to;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ADT/iterator.h b/contrib/llvm/include/llvm/ADT/iterator.h
index 549c5221173d..40e490cf7864 100644
--- a/contrib/llvm/include/llvm/ADT/iterator.h
+++ b/contrib/llvm/include/llvm/ADT/iterator.h
@@ -202,9 +202,7 @@ template <
typename ReferenceT = typename std::conditional<
std::is_same<T, typename std::iterator_traits<
WrappedIteratorT>::value_type>::value,
- typename std::iterator_traits<WrappedIteratorT>::reference, T &>::type,
- // Don't provide these, they are mostly to act as aliases below.
- typename WrappedTraitsT = std::iterator_traits<WrappedIteratorT>>
+ typename std::iterator_traits<WrappedIteratorT>::reference, T &>::type>
class iterator_adaptor_base
: public iterator_facade_base<DerivedT, IteratorCategoryT, T,
DifferenceTypeT, PointerT, ReferenceT> {
@@ -311,8 +309,10 @@ make_pointee_range(RangeT &&Range) {
template <typename WrappedIteratorT,
typename T = decltype(&*std::declval<WrappedIteratorT>())>
class pointer_iterator
- : public iterator_adaptor_base<pointer_iterator<WrappedIteratorT, T>,
- WrappedIteratorT, T> {
+ : public iterator_adaptor_base<
+ pointer_iterator<WrappedIteratorT, T>, WrappedIteratorT,
+ typename std::iterator_traits<WrappedIteratorT>::iterator_category,
+ T> {
mutable T Ptr;
public:
@@ -334,6 +334,34 @@ make_pointer_range(RangeT &&Range) {
PointerIteratorT(std::end(std::forward<RangeT>(Range))));
}
+// Wrapper iterator over iterator ItType, adding DataRef to the type of ItType,
+// to create NodeRef = std::pair<InnerTypeOfItType, DataRef>.
+template <typename ItType, typename NodeRef, typename DataRef>
+class WrappedPairNodeDataIterator
+ : public iterator_adaptor_base<
+ WrappedPairNodeDataIterator<ItType, NodeRef, DataRef>, ItType,
+ typename std::iterator_traits<ItType>::iterator_category, NodeRef,
+ std::ptrdiff_t, NodeRef *, NodeRef &> {
+ using BaseT = iterator_adaptor_base<
+ WrappedPairNodeDataIterator, ItType,
+ typename std::iterator_traits<ItType>::iterator_category, NodeRef,
+ std::ptrdiff_t, NodeRef *, NodeRef &>;
+
+ const DataRef DR;
+ mutable NodeRef NR;
+
+public:
+ WrappedPairNodeDataIterator(ItType Begin, const DataRef DR)
+ : BaseT(Begin), DR(DR) {
+ NR.first = DR;
+ }
+
+ NodeRef &operator*() const {
+ NR.second = *this->I;
+ return NR;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_ADT_ITERATOR_H
diff --git a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
index be3496bbd955..e2a2ac0622e8 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -43,7 +43,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -335,8 +334,7 @@ public:
/// A convenience wrapper around the primary \c alias interface.
AliasResult alias(const Value *V1, const Value *V2) {
- return alias(V1, MemoryLocation::UnknownSize, V2,
- MemoryLocation::UnknownSize);
+ return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown());
}
/// A trivial helper function to check to see if the specified pointers are
@@ -364,7 +362,8 @@ public:
/// A convenience wrapper around the \c isMustAlias helper interface.
bool isMustAlias(const Value *V1, const Value *V2) {
- return alias(V1, 1, V2, 1) == MustAlias;
+ return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) ==
+ MustAlias;
}
/// Checks whether the given location points to constant memory, or if
@@ -382,15 +381,15 @@ public:
/// \name Simple mod/ref information
/// @{
- /// Get the ModRef info associated with a pointer argument of a callsite. The
+ /// Get the ModRef info associated with a pointer argument of a call. The
/// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
/// that these bits do not necessarily account for the overall behavior of
/// the function, but rather only provide additional per-argument
/// information. This never sets ModRefInfo::Must.
- ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
+ ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
/// Return the behavior of the given call site.
- FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
/// Return the behavior when calling the given function.
FunctionModRefBehavior getModRefBehavior(const Function *F);
@@ -406,8 +405,8 @@ public:
/// property (e.g. calls to 'sin' and 'cos').
///
/// This property corresponds to the GCC 'const' attribute.
- bool doesNotAccessMemory(ImmutableCallSite CS) {
- return getModRefBehavior(CS) == FMRB_DoesNotAccessMemory;
+ bool doesNotAccessMemory(const CallBase *Call) {
+ return getModRefBehavior(Call) == FMRB_DoesNotAccessMemory;
}
/// Checks if the specified function is known to never read or write memory.
@@ -434,8 +433,8 @@ public:
/// absence of interfering store instructions, such as CSE of strlen calls.
///
/// This property corresponds to the GCC 'pure' attribute.
- bool onlyReadsMemory(ImmutableCallSite CS) {
- return onlyReadsMemory(getModRefBehavior(CS));
+ bool onlyReadsMemory(const CallBase *Call) {
+ return onlyReadsMemory(getModRefBehavior(Call));
}
/// Checks if the specified function is known to only read from non-volatile
@@ -500,36 +499,12 @@ public:
/// getModRefInfo (for call sites) - Return information about whether
/// a particular call site modifies or reads the specified memory location.
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
/// getModRefInfo (for call sites) - A convenience wrapper.
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const Value *P,
+ ModRefInfo getModRefInfo(const CallBase *Call, const Value *P,
LocationSize Size) {
- return getModRefInfo(CS, MemoryLocation(P, Size));
- }
-
- /// getModRefInfo (for calls) - Return information about whether
- /// a particular call modifies or reads the specified memory location.
- ModRefInfo getModRefInfo(const CallInst *C, const MemoryLocation &Loc) {
- return getModRefInfo(ImmutableCallSite(C), Loc);
- }
-
- /// getModRefInfo (for calls) - A convenience wrapper.
- ModRefInfo getModRefInfo(const CallInst *C, const Value *P,
- LocationSize Size) {
- return getModRefInfo(C, MemoryLocation(P, Size));
- }
-
- /// getModRefInfo (for invokes) - Return information about whether
- /// a particular invoke modifies or reads the specified memory location.
- ModRefInfo getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) {
- return getModRefInfo(ImmutableCallSite(I), Loc);
- }
-
- /// getModRefInfo (for invokes) - A convenience wrapper.
- ModRefInfo getModRefInfo(const InvokeInst *I, const Value *P,
- LocationSize Size) {
- return getModRefInfo(I, MemoryLocation(P, Size));
+ return getModRefInfo(Call, MemoryLocation(P, Size));
}
/// getModRefInfo (for loads) - Return information about whether
@@ -569,7 +544,7 @@ public:
/// getModRefInfo (for cmpxchges) - A convenience wrapper.
ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P,
- unsigned Size) {
+ LocationSize Size) {
return getModRefInfo(CX, MemoryLocation(P, Size));
}
@@ -579,7 +554,7 @@ public:
/// getModRefInfo (for atomicrmws) - A convenience wrapper.
ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P,
- unsigned Size) {
+ LocationSize Size) {
return getModRefInfo(RMW, MemoryLocation(P, Size));
}
@@ -626,8 +601,8 @@ public:
ModRefInfo getModRefInfo(const Instruction *I,
const Optional<MemoryLocation> &OptLoc) {
if (OptLoc == None) {
- if (auto CS = ImmutableCallSite(I)) {
- return createModRefInfo(getModRefBehavior(CS));
+ if (const auto *Call = dyn_cast<CallBase>(I)) {
+ return createModRefInfo(getModRefBehavior(Call));
}
}
@@ -661,12 +636,12 @@ public:
/// Return information about whether a call and an instruction may refer to
/// the same memory locations.
- ModRefInfo getModRefInfo(Instruction *I, ImmutableCallSite Call);
+ ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call);
/// Return information about whether two call sites may refer to the same set
/// of memory locations. See the AA documentation for details:
/// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
- ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
/// Return information about whether a particular call site modifies
/// or reads the specified memory location \p MemLoc before instruction \p I
@@ -777,25 +752,25 @@ public:
/// that these bits do not necessarily account for the overall behavior of
/// the function, but rather only provide additional per-argument
/// information.
- virtual ModRefInfo getArgModRefInfo(ImmutableCallSite CS,
+ virtual ModRefInfo getArgModRefInfo(const CallBase *Call,
unsigned ArgIdx) = 0;
/// Return the behavior of the given call site.
- virtual FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) = 0;
+ virtual FunctionModRefBehavior getModRefBehavior(const CallBase *Call) = 0;
/// Return the behavior when calling the given function.
virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0;
/// getModRefInfo (for call sites) - Return information about whether
/// a particular call site modifies or reads the specified memory location.
- virtual ModRefInfo getModRefInfo(ImmutableCallSite CS,
+ virtual ModRefInfo getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) = 0;
/// Return information about whether two call sites may refer to the same set
/// of memory locations. See the AA documentation for details:
/// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
- virtual ModRefInfo getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) = 0;
+ virtual ModRefInfo getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) = 0;
/// @}
};
@@ -827,26 +802,26 @@ public:
return Result.pointsToConstantMemory(Loc, OrLocal);
}
- ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) override {
- return Result.getArgModRefInfo(CS, ArgIdx);
+ ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override {
+ return Result.getArgModRefInfo(Call, ArgIdx);
}
- FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
- return Result.getModRefBehavior(CS);
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call) override {
+ return Result.getModRefBehavior(Call);
}
FunctionModRefBehavior getModRefBehavior(const Function *F) override {
return Result.getModRefBehavior(F);
}
- ModRefInfo getModRefInfo(ImmutableCallSite CS,
+ ModRefInfo getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) override {
- return Result.getModRefInfo(CS, Loc);
+ return Result.getModRefInfo(Call, Loc);
}
- ModRefInfo getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return Result.getModRefInfo(CS1, CS2);
+ ModRefInfo getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) override {
+ return Result.getModRefInfo(Call1, Call2);
}
};
@@ -901,25 +876,28 @@ protected:
: CurrentResult.pointsToConstantMemory(Loc, OrLocal);
}
- ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
- return AAR ? AAR->getArgModRefInfo(CS, ArgIdx) : CurrentResult.getArgModRefInfo(CS, ArgIdx);
+ ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
+ return AAR ? AAR->getArgModRefInfo(Call, ArgIdx)
+ : CurrentResult.getArgModRefInfo(Call, ArgIdx);
}
- FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
- return AAR ? AAR->getModRefBehavior(CS) : CurrentResult.getModRefBehavior(CS);
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
+ return AAR ? AAR->getModRefBehavior(Call)
+ : CurrentResult.getModRefBehavior(Call);
}
FunctionModRefBehavior getModRefBehavior(const Function *F) {
return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F);
}
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
- return AAR ? AAR->getModRefInfo(CS, Loc)
- : CurrentResult.getModRefInfo(CS, Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
+ return AAR ? AAR->getModRefInfo(Call, Loc)
+ : CurrentResult.getModRefInfo(Call, Loc);
}
- ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
- return AAR ? AAR->getModRefInfo(CS1, CS2) : CurrentResult.getModRefInfo(CS1, CS2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
+ return AAR ? AAR->getModRefInfo(Call1, Call2)
+ : CurrentResult.getModRefInfo(Call1, Call2);
}
};
@@ -951,11 +929,11 @@ public:
return false;
}
- ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
return ModRefInfo::ModRef;
}
- FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
return FMRB_UnknownModRefBehavior;
}
@@ -963,11 +941,11 @@ public:
return FMRB_UnknownModRefBehavior;
}
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
return ModRefInfo::ModRef;
}
- ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
return ModRefInfo::ModRef;
}
};
@@ -1075,6 +1053,29 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
+/// A wrapper pass for external alias analyses. This just squirrels away the
+/// callback used to run any analyses and register their results.
+struct ExternalAAWrapperPass : ImmutablePass {
+ using CallbackT = std::function<void(Pass &, Function &, AAResults &)>;
+
+ CallbackT CB;
+
+ static char ID;
+
+ ExternalAAWrapperPass() : ImmutablePass(ID) {
+ initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit ExternalAAWrapperPass(CallbackT CB)
+ : ImmutablePass(ID), CB(std::move(CB)) {
+ initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
FunctionPass *createAAResultsWrapperPass();
/// A wrapper pass around a callback which can be used to populate the
diff --git a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
index c9680ff40d1e..7ed5cd5c4734 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -52,9 +52,13 @@ class AliasSet : public ilist_node<AliasSet> {
PointerRec **PrevInList = nullptr;
PointerRec *NextInList = nullptr;
AliasSet *AS = nullptr;
- LocationSize Size = 0;
+ LocationSize Size = LocationSize::mapEmpty();
AAMDNodes AAInfo;
+ // Whether the size for this record has been set at all. This makes no
+ // guarantees about the size being known.
+ bool isSizeSet() const { return Size != LocationSize::mapEmpty(); }
+
public:
PointerRec(Value *V)
: Val(V), AAInfo(DenseMapInfo<AAMDNodes>::getEmptyKey()) {}
@@ -71,9 +75,10 @@ class AliasSet : public ilist_node<AliasSet> {
bool updateSizeAndAAInfo(LocationSize NewSize, const AAMDNodes &NewAAInfo) {
bool SizeChanged = false;
- if (NewSize > Size) {
- Size = NewSize;
- SizeChanged = true;
+ if (NewSize != Size) {
+ LocationSize OldSize = Size;
+ Size = isSizeSet() ? Size.unionWith(NewSize) : NewSize;
+ SizeChanged = OldSize != Size;
}
if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey())
@@ -91,7 +96,10 @@ class AliasSet : public ilist_node<AliasSet> {
return SizeChanged;
}
- LocationSize getSize() const { return Size; }
+ LocationSize getSize() const {
+ assert(isSizeSet() && "Getting an unset size!");
+ return Size;
+ }
/// Return the AAInfo, or null if there is no information or conflicting
/// information.
@@ -175,9 +183,6 @@ class AliasSet : public ilist_node<AliasSet> {
};
unsigned Alias : 1;
- /// True if this alias set contains volatile loads or stores.
- unsigned Volatile : 1;
-
unsigned SetSize = 0;
void addRef() { ++RefCount; }
@@ -203,9 +208,6 @@ public:
bool isMustAlias() const { return Alias == SetMustAlias; }
bool isMayAlias() const { return Alias == SetMayAlias; }
- /// Return true if this alias set contains volatile loads or stores.
- bool isVolatile() const { return Volatile; }
-
/// Return true if this alias set should be ignored as part of the
/// AliasSetTracker object.
bool isForwardingAliasSet() const { return Forward; }
@@ -224,6 +226,10 @@ public:
// track of the list's exact size.
unsigned size() { return SetSize; }
+ /// If this alias set is known to contain a single instruction and *only* a
+ /// single unique instruction, return it. Otherwise, return nullptr.
+ Instruction* getUniqueInstruction();
+
void print(raw_ostream &OS) const;
void dump() const;
@@ -264,7 +270,7 @@ private:
// Can only be created by AliasSetTracker.
AliasSet()
: PtrListEnd(&PtrList), RefCount(0), AliasAny(false), Access(NoAccess),
- Alias(SetMustAlias), Volatile(false) {}
+ Alias(SetMustAlias) {}
PointerRec *getSomePointer() const {
return PtrList;
@@ -303,8 +309,6 @@ private:
dropRef(AST);
}
- void setVolatile() { Volatile = true; }
-
public:
/// Return true if the specified pointer "may" (or must) alias one of the
/// members in the set.
@@ -379,23 +383,11 @@ public:
/// Return the alias sets that are active.
const ilist<AliasSet> &getAliasSets() const { return AliasSets; }
- /// Return the alias set that the specified pointer lives in. If the New
- /// argument is non-null, this method sets the value to true if a new alias
- /// set is created to contain the pointer (because the pointer didn't alias
- /// anything).
- AliasSet &getAliasSetForPointer(Value *P, LocationSize Size,
- const AAMDNodes &AAInfo);
-
- /// Return the alias set containing the location specified if one exists,
- /// otherwise return null.
- AliasSet *getAliasSetForPointerIfExists(const Value *P, LocationSize Size,
- const AAMDNodes &AAInfo) {
- return mergeAliasSetsForPointer(P, Size, AAInfo);
- }
-
- /// Return true if the specified instruction "may" (or must) alias one of the
- /// members in any of the sets.
- bool containsUnknown(const Instruction *I) const;
+ /// Return the alias set which contains the specified memory location. If
+ /// the memory location aliases two or more existing alias sets, will have
+ /// the effect of merging those alias sets before the single resulting alias
+ /// set is returned.
+ AliasSet &getAliasSetFor(const MemoryLocation &MemLoc);
/// Return the underlying alias analysis object used by this tracker.
AliasAnalysis &getAliasAnalysis() const { return AA; }
@@ -445,8 +437,7 @@ private:
return *Entry;
}
- AliasSet &addPointer(Value *P, LocationSize Size, const AAMDNodes &AAInfo,
- AliasSet::AccessLattice E);
+ AliasSet &addPointer(MemoryLocation Loc, AliasSet::AccessLattice E);
AliasSet *mergeAliasSetsForPointer(const Value *Ptr, LocationSize Size,
const AAMDNodes &AAInfo);
diff --git a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 6344e84b58eb..820d7ac0935a 100644
--- a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -21,7 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include <algorithm>
@@ -84,18 +84,18 @@ public:
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
- ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
/// Chases pointers until we find a (constant global) or not.
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
/// Get the location associated with a pointer argument of a callsite.
- ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
+ ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
/// Returns the behavior when calling the given call site.
- FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
/// Returns the behavior when calling the given function. For use when the
/// call site is not known.
@@ -115,7 +115,7 @@ private:
unsigned ZExtBits;
unsigned SExtBits;
- int64_t Scale;
+ APInt Scale;
bool operator==(const VariableGEPIndex &Other) const {
return V == Other.V && ZExtBits == Other.ZExtBits &&
@@ -133,10 +133,10 @@ private:
// Base pointer of the GEP
const Value *Base;
// Total constant offset w.r.t the base from indexing into structs
- int64_t StructOffset;
+ APInt StructOffset;
// Total constant offset w.r.t the base from indexing through
// pointers/arrays/vectors
- int64_t OtherOffset;
+ APInt OtherOffset;
// Scaled variable (non-constant) indices.
SmallVector<VariableGEPIndex, 4> VarIndices;
};
@@ -189,7 +189,7 @@ private:
bool
constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices,
LocationSize V1Size, LocationSize V2Size,
- int64_t BaseOffset, AssumptionCache *AC,
+ APInt BaseOffset, AssumptionCache *AC,
DominatorTree *DT);
bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
index ca12db6208b8..0b2618735697 100644
--- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -56,7 +56,7 @@ public:
const Function *getFunction() const;
const BranchProbabilityInfo *getBPI() const;
- void view() const;
+ void view(StringRef = "BlockFrequencyDAGs") const;
/// getblockFreq - Return block frequency. Return 0 if we don't have the
/// information. Please note that initial frequency is equal to ENTRY_FREQ. It
diff --git a/contrib/llvm/include/llvm/Analysis/CFG.h b/contrib/llvm/include/llvm/Analysis/CFG.h
index cccdd1637411..caae0b6e2a8f 100644
--- a/contrib/llvm/include/llvm/Analysis/CFG.h
+++ b/contrib/llvm/include/llvm/Analysis/CFG.h
@@ -25,7 +25,6 @@ class DominatorTree;
class Function;
class Instruction;
class LoopInfo;
-class TerminatorInst;
/// Analyze the specified function to find all of the loop backedges in the
/// function and return them. This is a relatively cheap (compared to
@@ -46,7 +45,7 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
/// edges from a block with multiple successors to a block with multiple
/// predecessors.
///
-bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges = false);
/// Determine whether instruction 'To' is reachable from 'From',
diff --git a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
index 5786769cc500..5996dd90bcfd 100644
--- a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -150,7 +150,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
/// Display the raw branch weights from PGO.
std::string getEdgeAttributes(const BasicBlock *Node, succ_const_iterator I,
const Function *F) {
- const TerminatorInst *TI = Node->getTerminator();
+ const Instruction *TI = Node->getTerminator();
if (TI->getNumSuccessors() == 1)
return "";
@@ -172,8 +172,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
// Prepend a 'W' to indicate that this is a weight rather than the actual
// profile count (due to scaling).
- Twine Attrs = "label=\"W:" + Twine(Weight->getZExtValue()) + "\"";
- return Attrs.str();
+ return ("label=\"W:" + Twine(Weight->getZExtValue()) + "\"").str();
}
};
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 5e83ea2a6e2b..61b99f6c3e6b 100644
--- a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -364,6 +364,10 @@ public:
InvalidSCCSet, nullptr, nullptr,
InlinedInternalEdges};
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
PreservedAnalyses PA = PreservedAnalyses::all();
CG.buildRefSCCs();
for (auto RCI = CG.postorder_ref_scc_begin(),
@@ -428,8 +432,20 @@ public:
UR.UpdatedRC = nullptr;
UR.UpdatedC = nullptr;
+
+ // Check the PassInstrumentation's BeforePass callbacks before
+ // running the pass, skip its execution completely if asked to
+ // (callback returns false).
+ if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
+ continue;
+
PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
+ if (UR.InvalidatedSCCs.count(C))
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
+ else
+ PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
+
// Update the SCC and RefSCC if necessary.
C = UR.UpdatedC ? UR.UpdatedC : C;
RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
@@ -615,12 +631,20 @@ public:
if (CG.lookupSCC(*N) != CurrentC)
continue;
- PreservedAnalyses PassPA = Pass.run(N->getFunction(), FAM);
+ Function &F = N->getFunction();
+
+ PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
+ if (!PI.runBeforePass<Function>(Pass, F))
+ continue;
+
+ PreservedAnalyses PassPA = Pass.run(F, FAM);
+
+ PI.runAfterPass<Function>(Pass, F);
// We know that the function pass couldn't have invalidated any other
// function's analyses (that's the contract of a function pass), so
// directly handle the function analysis manager's invalidation here.
- FAM.invalidate(N->getFunction(), PassPA);
+ FAM.invalidate(F, PassPA);
// Then intersect the preserved set so that invalidation of module
// analyses will eventually occur when the module pass completes.
@@ -690,6 +714,8 @@ public:
PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR) {
PreservedAnalyses PA = PreservedAnalyses::all();
+ PassInstrumentation PI =
+ AM.getResult<PassInstrumentationAnalysis>(InitialC, CG);
// The SCC may be refined while we are running passes over it, so set up
// a pointer that we can update.
@@ -733,8 +759,17 @@ public:
auto CallCounts = ScanSCC(*C, CallHandles);
for (int Iteration = 0;; ++Iteration) {
+
+ if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
+ continue;
+
PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR);
+ if (UR.InvalidatedSCCs.count(C))
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
+ else
+ PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
+
// If the SCC structure has changed, bail immediately and let the outer
// CGSCC layer handle any iteration to reflect the refined structure.
if (UR.UpdatedC && UR.UpdatedC != C) {
diff --git a/contrib/llvm/include/llvm/Analysis/CaptureTracking.h b/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
index 7a869a51233a..aaaaff9ae252 100644
--- a/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
+++ b/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
@@ -22,6 +22,14 @@ namespace llvm {
class DominatorTree;
class OrderedBasicBlock;
+ /// The default value for MaxUsesToExplore argument. It's relatively small to
+ /// keep the cost of analysis reasonable for clients like BasicAliasAnalysis,
+ /// where the results can't be cached.
+ /// TODO: we should probably introduce a caching CaptureTracking analysis and
+ /// use it where possible. The caching version can use much higher limit or
+ /// don't have this cap at all.
+ unsigned constexpr DefaultMaxUsesToExplore = 20;
+
/// PointerMayBeCaptured - Return true if this pointer value may be captured
/// by the enclosing function (which is required to exist). This routine can
/// be expensive, so consider caching the results. The boolean ReturnCaptures
@@ -29,9 +37,12 @@ namespace llvm {
/// counts as capturing it or not. The boolean StoreCaptures specified
/// whether storing the value (or part of it) into memory anywhere
/// automatically counts as capturing it or not.
+ /// MaxUsesToExplore specifies how many uses should the analysis explore for
+ /// one value before giving up due too "too many uses".
bool PointerMayBeCaptured(const Value *V,
bool ReturnCaptures,
- bool StoreCaptures);
+ bool StoreCaptures,
+ unsigned MaxUsesToExplore = DefaultMaxUsesToExplore);
/// PointerMayBeCapturedBefore - Return true if this pointer value may be
/// captured by the enclosing function (which is required to exist). If a
@@ -44,10 +55,13 @@ namespace llvm {
/// or not. Captures by the provided instruction are considered if the
/// final parameter is true. An ordered basic block in \p OBB could be used
/// to speed up capture-tracker queries.
+ /// MaxUsesToExplore specifies how many uses should the analysis explore for
+ /// one value before giving up due too "too many uses".
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
const DominatorTree *DT, bool IncludeI = false,
- OrderedBasicBlock *OBB = nullptr);
+ OrderedBasicBlock *OBB = nullptr,
+ unsigned MaxUsesToExplore = DefaultMaxUsesToExplore);
/// This callback is used in conjunction with PointerMayBeCaptured. In
/// addition to the interface here, you'll need to provide your own getters
@@ -75,7 +89,10 @@ namespace llvm {
/// PointerMayBeCaptured - Visit the value and the values derived from it and
/// find values which appear to be capturing the pointer value. This feeds
/// results into and is controlled by the CaptureTracker object.
- void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker);
+ /// MaxUsesToExplore specifies how many uses should the analysis explore for
+ /// one value before giving up due too "too many uses".
+ void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
+ unsigned MaxUsesToExplore = DefaultMaxUsesToExplore);
} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h
index 3cc69d9fea29..0e9c6a96b0f4 100644
--- a/contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h
@@ -46,19 +46,18 @@ namespace llvm {
///
unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false);
- /// This is the complement of getICmpCode, which turns an opcode and two
- /// operands into either a constant true or false, or the predicate for a new
- /// ICmp instruction. The sign is passed in to determine which kind of
- /// predicate to use in the new icmp instruction.
+ /// This is the complement of getICmpCode. It turns a predicate code into
+ /// either a constant true or false or the predicate for a new ICmp.
+ /// The sign is passed in to determine which kind of predicate to use in the
+ /// new ICmp instruction.
/// Non-NULL return value will be a true or false constant.
- /// NULL return means a new ICmp is needed. The predicate for which is output
- /// in NewICmpPred.
- Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
- CmpInst::Predicate &NewICmpPred);
+ /// NULL return means a new ICmp is needed. The predicate is output in Pred.
+ Constant *getPredForICmpCode(unsigned Code, bool Sign, Type *OpTy,
+ CmpInst::Predicate &Pred);
/// Return true if both predicates match sign or if at least one of them is an
/// equality comparison (which is signless).
- bool PredicatesFoldable(CmpInst::Predicate p1, CmpInst::Predicate p2);
+ bool predicatesFoldable(CmpInst::Predicate P1, CmpInst::Predicate P2);
/// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The
/// returned predicate is either == or !=. Returns false if decomposition
diff --git a/contrib/llvm/include/llvm/Analysis/DemandedBits.h b/contrib/llvm/include/llvm/Analysis/DemandedBits.h
index d4384609762d..4c4e3f6c99e7 100644
--- a/contrib/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/contrib/llvm/include/llvm/Analysis/DemandedBits.h
@@ -44,19 +44,30 @@ public:
F(F), AC(AC), DT(DT) {}
/// Return the bits demanded from instruction I.
+ ///
+ /// For vector instructions individual vector elements are not distinguished:
+ /// A bit is demanded if it is demanded for any of the vector elements. The
+ /// size of the return value corresponds to the type size in bits of the
+ /// scalar type.
+ ///
+ /// Instructions that do not have integer or vector of integer type are
+ /// accepted, but will always produce a mask with all bits set.
APInt getDemandedBits(Instruction *I);
/// Return true if, during analysis, I could not be reached.
bool isInstructionDead(Instruction *I);
+ /// Return whether this use is dead by means of not having any demanded bits.
+ bool isUseDead(Use *U);
+
void print(raw_ostream &OS);
private:
void performAnalysis();
void determineLiveOperandBits(const Instruction *UserI,
- const Instruction *I, unsigned OperandNo,
+ const Value *Val, unsigned OperandNo,
const APInt &AOut, APInt &AB,
- KnownBits &Known, KnownBits &Known2);
+ KnownBits &Known, KnownBits &Known2, bool &KnownBitsComputed);
Function &F;
AssumptionCache &AC;
@@ -67,6 +78,9 @@ private:
// The set of visited instructions (non-integer-typed only).
SmallPtrSet<Instruction*, 32> Visited;
DenseMap<Instruction *, APInt> AliveBits;
+ // Uses with no demanded bits. If the user also has no demanded bits, the use
+ // might not be stored explicitly in this map, to save memory during analysis.
+ SmallPtrSet<Use *, 16> DeadUses;
};
class DemandedBitsWrapperPass : public FunctionPass {
diff --git a/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
index c8ec737a2cb9..69d0e2c1513e 100644
--- a/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -936,6 +936,17 @@ template <typename T> class ArrayRef;
friend struct AnalysisInfoMixin<DependenceAnalysis>;
}; // class DependenceAnalysis
+ /// Printer pass to dump DA results.
+ struct DependenceAnalysisPrinterPass
+ : public PassInfoMixin<DependenceAnalysisPrinterPass> {
+ DependenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+
+ private:
+ raw_ostream &OS;
+ }; // class DependenceAnalysisPrinterPass
+
/// Legacy pass manager pass to access dependence information
class DependenceAnalysisWrapperPass : public FunctionPass {
public:
diff --git a/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index 328c8645d3c0..d834862db095 100644
--- a/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -7,55 +7,199 @@
//
//===----------------------------------------------------------------------===//
//
-// The divergence analysis is an LLVM pass which can be used to find out
-// if a branch instruction in a GPU program is divergent or not. It can help
-// branch optimizations such as jump threading and loop unswitching to make
-// better decisions.
+// \file
+// The divergence analysis determines which instructions and branches are
+// divergent given a set of divergent source instructions.
//
//===----------------------------------------------------------------------===//
+
#ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
#define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/SyncDependenceAnalysis.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
+#include <vector>
namespace llvm {
+class Module;
class Value;
-class DivergenceAnalysis : public FunctionPass {
+class Instruction;
+class Loop;
+class raw_ostream;
+class TargetTransformInfo;
+
+/// \brief Generic divergence analysis for reducible CFGs.
+///
+/// This analysis propagates divergence in a data-parallel context from sources
+/// of divergence to all users. It requires reducible CFGs. All assignments
+/// should be in SSA form.
+class DivergenceAnalysis {
public:
- static char ID;
+ /// \brief This instance will analyze the whole function \p F or the loop \p
+ /// RegionLoop.
+ ///
+ /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop.
+ /// Otherwise the whole function is analyzed.
+ /// \param IsLCSSAForm whether the analysis may assume that the IR in the
+ /// region in in LCSSA form.
+ DivergenceAnalysis(const Function &F, const Loop *RegionLoop,
+ const DominatorTree &DT, const LoopInfo &LI,
+ SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
- DivergenceAnalysis() : FunctionPass(ID) {
- initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
- }
+ /// \brief The loop that defines the analyzed region (if any).
+ const Loop *getRegionLoop() const { return RegionLoop; }
+ const Function &getFunction() const { return F; }
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ /// \brief Whether \p BB is part of the region.
+ bool inRegion(const BasicBlock &BB) const;
+ /// \brief Whether \p I is part of the region.
+ bool inRegion(const Instruction &I) const;
- bool runOnFunction(Function &F) override;
+ /// \brief Mark \p UniVal as a value that is always uniform.
+ void addUniformOverride(const Value &UniVal);
- // Print all divergent branches in the function.
- void print(raw_ostream &OS, const Module *) const override;
+ /// \brief Mark \p DivVal as a value that is always divergent.
+ void markDivergent(const Value &DivVal);
- // Returns true if V is divergent at its definition.
- //
- // Even if this function returns false, V may still be divergent when used
- // in a different basic block.
- bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+ /// \brief Propagate divergence to all instructions in the region.
+ /// Divergence is seeded by calls to \p markDivergent.
+ void compute();
+
+ /// \brief Whether any value was marked or analyzed to be divergent.
+ bool hasDetectedDivergence() const { return !DivergentValues.empty(); }
+
+ /// \brief Whether \p Val will always return a uniform value regardless of its
+ /// operands
+ bool isAlwaysUniform(const Value &Val) const;
+
+ /// \brief Whether \p Val is a divergent value
+ bool isDivergent(const Value &Val) const;
+
+ void print(raw_ostream &OS, const Module *) const;
+
+private:
+ bool updateTerminator(const Instruction &Term) const;
+ bool updatePHINode(const PHINode &Phi) const;
+
+ /// \brief Computes whether \p Inst is divergent based on the
+ /// divergence of its operands.
+ ///
+ /// \returns Whether \p Inst is divergent.
+ ///
+ /// This should only be called for non-phi, non-terminator instructions.
+ bool updateNormalInstruction(const Instruction &Inst) const;
+
+ /// \brief Mark users of live-out users as divergent.
+ ///
+ /// \param LoopHeader the header of the divergent loop.
+ ///
+ /// Marks all users of live-out values of the loop headed by \p LoopHeader
+ /// as divergent and puts them on the worklist.
+ void taintLoopLiveOuts(const BasicBlock &LoopHeader);
+
+ /// \brief Push all users of \p Val (in the region) to the worklist
+ void pushUsers(const Value &I);
+
+ /// \brief Push all phi nodes in @block to the worklist
+ void pushPHINodes(const BasicBlock &Block);
+
+ /// \brief Mark \p Block as join divergent
+ ///
+ /// A block is join divergent if two threads may reach it from different
+ /// incoming blocks at the same time.
+ void markBlockJoinDivergent(const BasicBlock &Block) {
+ DivergentJoinBlocks.insert(&Block);
+ }
+
+ /// \brief Whether \p Val is divergent when read in \p ObservingBlock.
+ bool isTemporalDivergent(const BasicBlock &ObservingBlock,
+ const Value &Val) const;
+
+ /// \brief Whether \p Block is join divergent
+ ///
+ /// (see markBlockJoinDivergent).
+ bool isJoinDivergent(const BasicBlock &Block) const {
+ return DivergentJoinBlocks.find(&Block) != DivergentJoinBlocks.end();
+ }
- // Returns true if V is uniform/non-divergent.
+ /// \brief Propagate control-induced divergence to users (phi nodes and
+ /// instructions).
//
- // Even if this function returns true, V may still be divergent when used
- // in a different basic block.
- bool isUniform(const Value *V) const { return !isDivergent(V); }
+ // \param JoinBlock is a divergent loop exit or join point of two disjoint
+ // paths.
+ // \returns Whether \p JoinBlock is a divergent loop exit of \p TermLoop.
+ bool propagateJoinDivergence(const BasicBlock &JoinBlock,
+ const Loop *TermLoop);
- // Keep the analysis results uptodate by removing an erased value.
- void removeValue(const Value *V) { DivergentValues.erase(V); }
+ /// \brief Propagate induced value divergence due to control divergence in \p
+ /// Term.
+ void propagateBranchDivergence(const Instruction &Term);
+
+ /// \brief Propagate divergent caused by a divergent loop exit.
+ ///
+ /// \param ExitingLoop is a divergent loop.
+ void propagateLoopDivergence(const Loop &ExitingLoop);
private:
- // Stores all divergent values.
+ const Function &F;
+ // If regionLoop != nullptr, analysis is only performed within \p RegionLoop.
+ // Otw, analyze the whole function
+ const Loop *RegionLoop;
+
+ const DominatorTree &DT;
+ const LoopInfo &LI;
+
+ // Recognized divergent loops
+ DenseSet<const Loop *> DivergentLoops;
+
+ // The SDA links divergent branches to divergent control-flow joins.
+ SyncDependenceAnalysis &SDA;
+
+ // Use simplified code path for LCSSA form.
+ bool IsLCSSAForm;
+
+ // Set of known-uniform values.
+ DenseSet<const Value *> UniformOverrides;
+
+ // Blocks with joining divergent control from different predecessors.
+ DenseSet<const BasicBlock *> DivergentJoinBlocks;
+
+ // Detected/marked divergent values.
DenseSet<const Value *> DivergentValues;
+
+ // Internal worklist for divergence propagation.
+ std::vector<const Instruction *> Worklist;
};
-} // End llvm namespace
-#endif //LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H \ No newline at end of file
+/// \brief Divergence analysis frontend for GPU kernels.
+class GPUDivergenceAnalysis {
+ SyncDependenceAnalysis SDA;
+ DivergenceAnalysis DA;
+
+public:
+ /// Runs the divergence analysis on @F, a GPU kernel
+ GPUDivergenceAnalysis(Function &F, const DominatorTree &DT,
+ const PostDominatorTree &PDT, const LoopInfo &LI,
+ const TargetTransformInfo &TTI);
+
+ /// Whether any divergence was detected.
+ bool hasDivergence() const { return DA.hasDetectedDivergence(); }
+
+ /// The GPU kernel this analysis result is for
+ const Function &getFunction() const { return DA.getFunction(); }
+
+ /// Whether \p V is divergent.
+ bool isDivergent(const Value &V) const;
+
+ /// Whether \p V is uniform/non-divergent
+ bool isUniform(const Value &V) const { return !isDivergent(V); }
+
+ /// Print all divergent values in the kernel.
+ void print(raw_ostream &OS, const Module *) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
diff --git a/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h b/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
index 09cef68ce70f..3a664ca6ef50 100644
--- a/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
+++ b/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
@@ -88,7 +88,7 @@ public:
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
using AAResultBase::getModRefInfo;
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
@@ -98,7 +98,7 @@ public:
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
/// case the most generic behavior of this function should be returned.
- FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
private:
FunctionInfo *getFunctionInfo(const Function *F);
@@ -113,7 +113,7 @@ private:
void CollectSCCMembership(CallGraph &CG);
bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
- ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS,
+ ModRefInfo getModRefInfoForArgument(const CallBase *Call,
const GlobalValue *GV);
};
diff --git a/contrib/llvm/include/llvm/Analysis/GuardUtils.h b/contrib/llvm/include/llvm/Analysis/GuardUtils.h
new file mode 100644
index 000000000000..3b151eeafc81
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/GuardUtils.h
@@ -0,0 +1,26 @@
+//===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform analyzes related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_GUARDUTILS_H
+#define LLVM_ANALYSIS_GUARDUTILS_H
+
+namespace llvm {
+
+class User;
+
+/// Returns true iff \p U has semantics of a guard.
+bool isGuard(const User *U);
+
+} // llvm
+
+#endif // LLVM_ANALYSIS_GUARDUTILS_H
+
diff --git a/contrib/llvm/include/llvm/Analysis/IVDescriptors.h b/contrib/llvm/include/llvm/Analysis/IVDescriptors.h
new file mode 100644
index 000000000000..64b4ae23cc59
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -0,0 +1,357 @@
+//===- llvm/Analysis/IVDescriptors.h - IndVar Descriptors -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file "describes" induction and recurrence variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_IVDESCRIPTORS_H
+#define LLVM_ANALYSIS_IVDESCRIPTORS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+
+class AliasSet;
+class AliasSetTracker;
+class BasicBlock;
+class DataLayout;
+class Loop;
+class LoopInfo;
+class OptimizationRemarkEmitter;
+class PredicatedScalarEvolution;
+class PredIteratorCache;
+class ScalarEvolution;
+class SCEV;
+class TargetLibraryInfo;
+class TargetTransformInfo;
+
+/// The RecurrenceDescriptor is used to identify recurrences variables in a
+/// loop. Reduction is a special case of recurrence that has uses of the
+/// recurrence variable outside the loop. The method isReductionPHI identifies
+/// reductions that are basic recurrences.
+///
+/// Basic recurrences are defined as the summation, product, OR, AND, XOR, min,
+/// or max of a set of terms. For example: for(i=0; i<n; i++) { total +=
+/// array[i]; } is a summation of array elements. Basic recurrences are a
+/// special case of chains of recurrences (CR). See ScalarEvolution for CR
+/// references.
+
+/// This struct holds information about recurrence variables.
+class RecurrenceDescriptor {
+public:
+ /// This enum represents the kinds of recurrences that we support.
+ enum RecurrenceKind {
+ RK_NoRecurrence, ///< Not a recurrence.
+ RK_IntegerAdd, ///< Sum of integers.
+ RK_IntegerMult, ///< Product of integers.
+ RK_IntegerOr, ///< Bitwise or logical OR of numbers.
+ RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
+ RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
+ RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
+ RK_FloatAdd, ///< Sum of floats.
+ RK_FloatMult, ///< Product of floats.
+ RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
+ };
+
+ // This enum represents the kind of minmax recurrence.
+ enum MinMaxRecurrenceKind {
+ MRK_Invalid,
+ MRK_UIntMin,
+ MRK_UIntMax,
+ MRK_SIntMin,
+ MRK_SIntMax,
+ MRK_FloatMin,
+ MRK_FloatMax
+ };
+
+ RecurrenceDescriptor() = default;
+
+ RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
+ MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
+ bool Signed, SmallPtrSetImpl<Instruction *> &CI)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
+ UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+ CastInsts.insert(CI.begin(), CI.end());
+ }
+
+ /// This POD struct holds information about a potential recurrence operation.
+ class InstDesc {
+ public:
+ InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
+ : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
+ UnsafeAlgebraInst(UAI) {}
+
+ InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr)
+ : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
+ UnsafeAlgebraInst(UAI) {}
+
+ bool isRecurrence() { return IsRecurrence; }
+
+ bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+
+ Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+
+ MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
+
+ Instruction *getPatternInst() { return PatternLastInst; }
+
+ private:
+ // Is this instruction a recurrence candidate.
+ bool IsRecurrence;
+ // The last instruction in a min/max pattern (select of the select(icmp())
+ // pattern), or the current recurrence instruction otherwise.
+ Instruction *PatternLastInst;
+ // If this is a min/max pattern the comparison predicate.
+ MinMaxRecurrenceKind MinMaxKind;
+ // Recurrence has unsafe algebra.
+ Instruction *UnsafeAlgebraInst;
+ };
+
+ /// Returns a struct describing if the instruction 'I' can be a recurrence
+ /// variable of type 'Kind'. If the recurrence is a min/max pattern of
+ /// select(icmp()) this function advances the instruction pointer 'I' from the
+ /// compare instruction to the select instruction and stores this pointer in
+ /// 'PatternLastInst' member of the returned struct.
+ static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+ InstDesc &Prev, bool HasFunNoNaNAttr);
+
+ /// Returns true if instruction I has multiple uses in Insts
+ static bool hasMultipleUsesOf(Instruction *I,
+ SmallPtrSetImpl<Instruction *> &Insts,
+ unsigned MaxNumUses);
+
+ /// Returns true if all uses of the instruction I is within the Set.
+ static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
+
+ /// Returns a struct describing if the instruction if the instruction is a
+ /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
+ /// or max(X, Y).
+ static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev);
+
+ /// Returns a struct describing if the instruction is a
+ /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
+ static InstDesc isConditionalRdxPattern(RecurrenceKind Kind, Instruction *I);
+
+ /// Returns identity corresponding to the RecurrenceKind.
+ static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp);
+
+ /// Returns the opcode of binary operation corresponding to the
+ /// RecurrenceKind.
+ static unsigned getRecurrenceBinOp(RecurrenceKind Kind);
+
+ /// Returns true if Phi is a reduction of type Kind and adds it to the
+ /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
+ /// non-null, the minimal bit width needed to compute the reduction will be
+ /// computed.
+ static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
+ bool HasFunNoNaNAttr,
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB = nullptr,
+ AssumptionCache *AC = nullptr,
+ DominatorTree *DT = nullptr);
+
+ /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
+ /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
+ /// non-null, the minimal bit width needed to compute the reduction will be
+ /// computed.
+ static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB = nullptr,
+ AssumptionCache *AC = nullptr,
+ DominatorTree *DT = nullptr);
+
+ /// Returns true if Phi is a first-order recurrence. A first-order recurrence
+ /// is a non-reduction recurrence relation in which the value of the
+ /// recurrence in the current loop iteration equals a value defined in the
+ /// previous iteration. \p SinkAfter includes pairs of instructions where the
+ /// first will be rescheduled to appear after the second if/when the loop is
+ /// vectorized. It may be augmented with additional pairs if needed in order
+ /// to handle Phi as a first-order recurrence.
+ static bool
+ isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DenseMap<Instruction *, Instruction *> &SinkAfter,
+ DominatorTree *DT);
+
+ RecurrenceKind getRecurrenceKind() { return Kind; }
+
+ MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
+
+ TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
+
+ Instruction *getLoopExitInstr() { return LoopExitInstr; }
+
+ /// Returns true if the recurrence has unsafe algebra which requires a relaxed
+ /// floating-point model.
+ bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+
+ /// Returns first unsafe algebra instruction in the PHI node's use-chain.
+ Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+
+ /// Returns true if the recurrence kind is an integer kind.
+ static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
+
+ /// Returns true if the recurrence kind is a floating point kind.
+ static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
+
+ /// Returns true if the recurrence kind is an arithmetic kind.
+ static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
+
+ /// Returns the type of the recurrence. This type can be narrower than the
+ /// actual type of the Phi if the recurrence has been type-promoted.
+ Type *getRecurrenceType() { return RecurrenceType; }
+
+ /// Returns a reference to the instructions used for type-promoting the
+ /// recurrence.
+ SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
+
+ /// Returns true if all source operands of the recurrence are SExtInsts.
+ bool isSigned() { return IsSigned; }
+
+private:
+ // The starting value of the recurrence.
+ // It does not have to be zero!
+ TrackingVH<Value> StartValue;
+ // The instruction who's value is used outside the loop.
+ Instruction *LoopExitInstr = nullptr;
+ // The kind of the recurrence.
+ RecurrenceKind Kind = RK_NoRecurrence;
+ // If this a min/max recurrence the kind of recurrence.
+ MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
+ // First occurrence of unasfe algebra in the PHI's use-chain.
+ Instruction *UnsafeAlgebraInst = nullptr;
+ // The type of the recurrence.
+ Type *RecurrenceType = nullptr;
+ // True if all source operands of the recurrence are SExtInsts.
+ bool IsSigned = false;
+ // Instructions used for type-promoting the recurrence.
+ SmallPtrSet<Instruction *, 8> CastInsts;
+};
+
+/// A struct for saving information about induction variables.
+class InductionDescriptor {
+public:
+ /// This enum represents the kinds of inductions that we support.
+ enum InductionKind {
+ IK_NoInduction, ///< Not an induction variable.
+ IK_IntInduction, ///< Integer induction variable. Step = C.
+ IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
+ IK_FpInduction ///< Floating point induction variable.
+ };
+
+public:
+ /// Default constructor - creates an invalid induction.
+ InductionDescriptor() = default;
+
+ /// Get the consecutive direction. Returns:
+ /// 0 - unknown or non-consecutive.
+ /// 1 - consecutive and increasing.
+ /// -1 - consecutive and decreasing.
+ int getConsecutiveDirection() const;
+
+ Value *getStartValue() const { return StartValue; }
+ InductionKind getKind() const { return IK; }
+ const SCEV *getStep() const { return Step; }
+ BinaryOperator *getInductionBinOp() const { return InductionBinOp; }
+ ConstantInt *getConstIntStepValue() const;
+
+ /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
+ /// induction, the induction descriptor \p D will contain the data describing
+ /// this induction. If by some other means the caller has a better SCEV
+ /// expression for \p Phi than the one returned by the ScalarEvolution
+ /// analysis, it can be passed through \p Expr. If the def-use chain
+ /// associated with the phi includes casts (that we know we can ignore
+ /// under proper runtime checks), they are passed through \p CastsToIgnore.
+ static bool
+ isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE,
+ InductionDescriptor &D, const SCEV *Expr = nullptr,
+ SmallVectorImpl<Instruction *> *CastsToIgnore = nullptr);
+
+ /// Returns true if \p Phi is a floating point induction in the loop \p L.
+ /// If \p Phi is an induction, the induction descriptor \p D will contain
+ /// the data describing this induction.
+ static bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE,
+ InductionDescriptor &D);
+
+ /// Returns true if \p Phi is a loop \p L induction, in the context associated
+ /// with the run-time predicate of PSE. If \p Assume is true, this can add
+ /// further SCEV predicates to \p PSE in order to prove that \p Phi is an
+ /// induction.
+ /// If \p Phi is an induction, \p D will contain the data describing this
+ /// induction.
+ static bool isInductionPHI(PHINode *Phi, const Loop *L,
+ PredicatedScalarEvolution &PSE,
+ InductionDescriptor &D, bool Assume = false);
+
+ /// Returns true if the induction type is FP and the binary operator does
+ /// not have the "fast-math" property. Such operation requires a relaxed FP
+ /// mode.
+ bool hasUnsafeAlgebra() {
+ return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast();
+ }
+
+ /// Returns induction operator that does not have "fast-math" property
+ /// and requires FP unsafe mode.
+ Instruction *getUnsafeAlgebraInst() {
+ if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
+ return nullptr;
+ return InductionBinOp;
+ }
+
+ /// Returns binary opcode of the induction operator.
+ Instruction::BinaryOps getInductionOpcode() const {
+ return InductionBinOp ? InductionBinOp->getOpcode()
+ : Instruction::BinaryOpsEnd;
+ }
+
+ /// Returns a reference to the type cast instructions in the induction
+ /// update chain, that are redundant when guarded with a runtime
+ /// SCEV overflow check.
+ const SmallVectorImpl<Instruction *> &getCastInsts() const {
+ return RedundantCasts;
+ }
+
+private:
+ /// Private constructor - used by \c isInductionPHI.
+ InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
+ BinaryOperator *InductionBinOp = nullptr,
+ SmallVectorImpl<Instruction *> *Casts = nullptr);
+
+ /// Start value.
+ TrackingVH<Value> StartValue;
+ /// Induction kind.
+ InductionKind IK = IK_NoInduction;
+ /// Step value.
+ const SCEV *Step = nullptr;
+ // Instruction that advances induction variable.
+ BinaryOperator *InductionBinOp = nullptr;
+ // Instructions used for type-casts of the induction variable,
+ // that are redundant when guarded with a runtime SCEV overflow check.
+ SmallVector<Instruction *, 2> RedundantCasts;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_IVDESCRIPTORS_H
diff --git a/contrib/llvm/include/llvm/Analysis/IndirectCallSiteVisitor.h b/contrib/llvm/include/llvm/Analysis/IndirectCallSiteVisitor.h
deleted file mode 100644
index dde56a143c51..000000000000
--- a/contrib/llvm/include/llvm/Analysis/IndirectCallSiteVisitor.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===-- IndirectCallSiteVisitor.h - indirect call-sites visitor -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements defines a visitor class and a helper function that find
-// all indirect call-sites in a function.
-
-#include "llvm/IR/InstVisitor.h"
-#include <vector>
-
-namespace llvm {
-// Visitor class that finds all indirect call sites.
-struct PGOIndirectCallSiteVisitor
- : public InstVisitor<PGOIndirectCallSiteVisitor> {
- std::vector<Instruction *> IndirectCallInsts;
- PGOIndirectCallSiteVisitor() {}
-
- void visitCallSite(CallSite CS) {
- if (CS.isIndirectCall())
- IndirectCallInsts.push_back(CS.getInstruction());
- }
-};
-
-// Helper function that finds all indirect call sites.
-inline std::vector<Instruction *> findIndirectCallSites(Function &F) {
- PGOIndirectCallSiteVisitor ICV;
- ICV.visit(F);
- return ICV.IndirectCallInsts;
-}
-}
diff --git a/contrib/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/contrib/llvm/include/llvm/Analysis/IndirectCallVisitor.h
new file mode 100644
index 000000000000..d00cf63368f1
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -0,0 +1,39 @@
+//===-- IndirectCallVisitor.h - indirect call visitor ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements defines a visitor class and a helper function that find
+// all indirect call-sites in a function.
+
+#ifndef LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
+#define LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
+
+#include "llvm/IR/InstVisitor.h"
+#include <vector>
+
+namespace llvm {
+// Visitor class that finds all indirect call.
+struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
+ std::vector<Instruction *> IndirectCalls;
+ PGOIndirectCallVisitor() {}
+
+ void visitCallBase(CallBase &Call) {
+ if (Call.isIndirectCall())
+ IndirectCalls.push_back(&Call);
+ }
+};
+
+// Helper function that finds all indirect call sites.
+inline std::vector<Instruction *> findIndirectCalls(Function &F) {
+ PGOIndirectCallVisitor ICV;
+ ICV.visit(F);
+ return ICV.IndirectCalls;
+}
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/InlineCost.h b/contrib/llvm/include/llvm/Analysis/InlineCost.h
index 8c412057fb81..4c270354b0c4 100644
--- a/contrib/llvm/include/llvm/Analysis/InlineCost.h
+++ b/contrib/llvm/include/llvm/Analysis/InlineCost.h
@@ -46,7 +46,6 @@ const int IndirectCallThreshold = 100;
const int CallPenalty = 25;
const int LastCallToStaticBonus = 15000;
const int ColdccPenalty = 2000;
-const int NoreturnPenalty = 10000;
/// Do not inline functions which allocate this many bytes on the stack
/// when the caller is recursive.
const unsigned TotalAllocaSizeRecursiveCaller = 1024;
@@ -74,8 +73,15 @@ class InlineCost {
/// The adjusted threshold against which this cost was computed.
const int Threshold;
+ /// Must be set for Always and Never instances.
+ const char *Reason = nullptr;
+
// Trivial constructor, interesting logic in the factory functions below.
- InlineCost(int Cost, int Threshold) : Cost(Cost), Threshold(Threshold) {}
+ InlineCost(int Cost, int Threshold, const char *Reason = nullptr)
+ : Cost(Cost), Threshold(Threshold), Reason(Reason) {
+ assert((isVariable() || Reason) &&
+ "Reason must be provided for Never or Always");
+ }
public:
static InlineCost get(int Cost, int Threshold) {
@@ -83,11 +89,11 @@ public:
assert(Cost < NeverInlineCost && "Cost crosses sentinel value");
return InlineCost(Cost, Threshold);
}
- static InlineCost getAlways() {
- return InlineCost(AlwaysInlineCost, 0);
+ static InlineCost getAlways(const char *Reason) {
+ return InlineCost(AlwaysInlineCost, 0, Reason);
}
- static InlineCost getNever() {
- return InlineCost(NeverInlineCost, 0);
+ static InlineCost getNever(const char *Reason) {
+ return InlineCost(NeverInlineCost, 0, Reason);
}
/// Test whether the inline cost is low enough for inlining.
@@ -112,12 +118,30 @@ public:
return Threshold;
}
+ /// Get the reason of Always or Never.
+ const char *getReason() const {
+ assert((Reason || isVariable()) &&
+ "InlineCost reason must be set for Always or Never");
+ return Reason;
+ }
+
/// Get the cost delta from the threshold for inlining.
/// Only valid if the cost is of the variable kind. Returns a negative
/// value if the cost is too high to inline.
int getCostDelta() const { return Threshold - getCost(); }
};
+/// InlineResult is basically true or false. For false results the message
+/// describes a reason why it is decided not to inline.
+struct InlineResult {
+ const char *message = nullptr;
+ InlineResult(bool result, const char *message = nullptr)
+ : message(result ? nullptr : (message ? message : "cost > threshold")) {}
+ InlineResult(const char *message = nullptr) : message(message) {}
+ operator bool() const { return !message; }
+ operator const char *() const { return message; }
+};
+
/// Thresholds to tune inline cost analysis. The inline cost analysis decides
/// the condition to apply a threshold and applies it. Otherwise,
/// DefaultThreshold is used. If a threshold is Optional, it is applied only
diff --git a/contrib/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h b/contrib/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
new file mode 100644
index 000000000000..073e6ec3b7f6
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -0,0 +1,150 @@
+//===-- InstructionPrecedenceTracking.h -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implements a class that is able to define some instructions as "special"
+// (e.g. as having implicit control flow, or writing memory, or having another
+// interesting property) and then efficiently answers queries of the types:
+// 1. Are there any special instructions in the block of interest?
+// 2. Return first of the special instructions in the given block;
+// 3. Check if the given instruction is preceeded by the first special
+// instruction in the same block.
+// The class provides caching that allows to answer these queries quickly. The
+// user must make sure that the cached data is invalidated properly whenever
+// a content of some tracked block is changed.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H
+#define LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H
+
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/OrderedInstructions.h"
+
+namespace llvm {
+
+class InstructionPrecedenceTracking {
+ // Maps a block to the topmost special instruction in it. If the value is
+ // nullptr, it means that it is known that this block does not contain any
+ // special instructions.
+ DenseMap<const BasicBlock *, const Instruction *> FirstSpecialInsts;
+ // Allows to answer queries about precedence of instructions within one block.
+ OrderedInstructions OI;
+
+ // Fills information about the given block's special instructions.
+ void fill(const BasicBlock *BB);
+
+#ifndef NDEBUG
+ /// Asserts that the cached info for \p BB is up-to-date. This helps to catch
+ /// the usage error of accessing a block without properly invalidating after a
+ /// previous transform.
+ void validate(const BasicBlock *BB) const;
+
+ /// Asserts whether or not the contents of this tracking is up-to-date. This
+ /// helps to catch the usage error of accessing a block without properly
+ /// invalidating after a previous transform.
+ void validateAll() const;
+#endif
+
+protected:
+ InstructionPrecedenceTracking(DominatorTree *DT)
+ : OI(OrderedInstructions(DT)) {}
+
+ /// Returns the topmost special instruction from the block \p BB. Returns
+ /// nullptr if there is no special instructions in the block.
+ const Instruction *getFirstSpecialInstruction(const BasicBlock *BB);
+
+ /// Returns true iff at least one instruction from the basic block \p BB is
+ /// special.
+ bool hasSpecialInstructions(const BasicBlock *BB);
+
+ /// Returns true iff the first special instruction of \p Insn's block exists
+ /// and dominates \p Insn.
+ bool isPreceededBySpecialInstruction(const Instruction *Insn);
+
+ /// A predicate that defines whether or not the instruction \p Insn is
+ /// considered special and needs to be tracked. Implementing this method in
+ /// children classes allows to implement tracking of implicit control flow,
+ /// memory writing instructions or any other kinds of instructions we might
+ /// be interested in.
+ virtual bool isSpecialInstruction(const Instruction *Insn) const = 0;
+
+ virtual ~InstructionPrecedenceTracking() = default;
+
+public:
+ /// Notifies this tracking that we are going to insert a new instruction \p
+ /// Inst to the basic block \p BB. It makes all necessary updates to internal
+ /// caches to keep them consistent.
+ void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
+
+ /// Notifies this tracking that we are going to remove the instruction \p Inst
+ /// It makes all necessary updates to internal caches to keep them consistent.
+ void removeInstruction(const Instruction *Inst);
+
+ /// Invalidates all information from this tracking.
+ void clear();
+};
+
+/// This class allows to keep track on instructions with implicit control flow.
+/// These are instructions that may not pass execution to their successors. For
+/// example, throwing calls and guards do not always do this. If we need to know
+/// for sure that some instruction is guaranteed to execute if the given block
+/// is reached, then we need to make sure that there is no implicit control flow
+/// instruction (ICFI) preceeding it. For example, this check is required if we
+/// perform PRE moving non-speculable instruction to other place.
+class ImplicitControlFlowTracking : public InstructionPrecedenceTracking {
+public:
+ ImplicitControlFlowTracking(DominatorTree *DT)
+ : InstructionPrecedenceTracking(DT) {}
+
+ /// Returns the topmost instruction with implicit control flow from the given
+ /// basic block. Returns nullptr if there is no such instructions in the block.
+ const Instruction *getFirstICFI(const BasicBlock *BB) {
+ return getFirstSpecialInstruction(BB);
+ }
+
+ /// Returns true if at least one instruction from the given basic block has
+ /// implicit control flow.
+ bool hasICF(const BasicBlock *BB) {
+ return hasSpecialInstructions(BB);
+ }
+
+ /// Returns true if the first ICFI of Insn's block exists and dominates Insn.
+ bool isDominatedByICFIFromSameBlock(const Instruction *Insn) {
+ return isPreceededBySpecialInstruction(Insn);
+ }
+
+ virtual bool isSpecialInstruction(const Instruction *Insn) const;
+};
+
+class MemoryWriteTracking : public InstructionPrecedenceTracking {
+public:
+ MemoryWriteTracking(DominatorTree *DT) : InstructionPrecedenceTracking(DT) {}
+
+ /// Returns the topmost instruction that may write memory from the given
+ /// basic block. Returns nullptr if there is no such instructions in the block.
+ const Instruction *getFirstMemoryWrite(const BasicBlock *BB) {
+ return getFirstSpecialInstruction(BB);
+ }
+
+ /// Returns true if at least one instruction from the given basic block may
+ /// write memory.
+ bool mayWriteToMemory(const BasicBlock *BB) {
+ return hasSpecialInstructions(BB);
+ }
+
+ /// Returns true if the first memory writing instruction of Insn's block
+ /// exists and dominates Insn.
+ bool isDominatedByMemoryWriteFromSameBlock(const Instruction *Insn) {
+ return isPreceededBySpecialInstruction(Insn);
+ }
+
+ virtual bool isSpecialInstruction(const Instruction *Insn) const;
+};
+
+} // llvm
+
+#endif // LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H
diff --git a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
index 4f896bddff87..6662e91037e1 100644
--- a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -32,6 +32,8 @@
#ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
#define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
namespace llvm {
@@ -40,7 +42,6 @@ template <typename T, typename... TArgs> class AnalysisManager;
template <class T> class ArrayRef;
class AssumptionCache;
class DominatorTree;
-class Instruction;
class ImmutableCallSite;
class DataLayout;
class FastMathFlags;
@@ -50,6 +51,41 @@ class Pass;
class TargetLibraryInfo;
class Type;
class Value;
+class MDNode;
+class BinaryOperator;
+
+/// InstrInfoQuery provides an interface to query additional information for
+/// instructions like metadata or keywords like nsw, which provides conservative
+/// results if the users specified it is safe to use.
+struct InstrInfoQuery {
+ InstrInfoQuery(bool UMD) : UseInstrInfo(UMD) {}
+ InstrInfoQuery() : UseInstrInfo(true) {}
+ bool UseInstrInfo = true;
+
+ MDNode *getMetadata(const Instruction *I, unsigned KindID) const {
+ if (UseInstrInfo)
+ return I->getMetadata(KindID);
+ return nullptr;
+ }
+
+ template <class InstT> bool hasNoUnsignedWrap(const InstT *Op) const {
+ if (UseInstrInfo)
+ return Op->hasNoUnsignedWrap();
+ return false;
+ }
+
+ template <class InstT> bool hasNoSignedWrap(const InstT *Op) const {
+ if (UseInstrInfo)
+ return Op->hasNoSignedWrap();
+ return false;
+ }
+
+ bool isExact(const BinaryOperator *Op) const {
+ if (UseInstrInfo && isa<PossiblyExactOperator>(Op))
+ return cast<PossiblyExactOperator>(Op)->isExact();
+ return false;
+ }
+};
struct SimplifyQuery {
const DataLayout &DL;
@@ -58,14 +94,19 @@ struct SimplifyQuery {
AssumptionCache *AC = nullptr;
const Instruction *CxtI = nullptr;
+ // Wrapper to query additional information for instructions like metadata or
+ // keywords like nsw, which provides conservative results if those cannot
+ // be safely used.
+ const InstrInfoQuery IIQ;
+
SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr)
: DL(DL), CxtI(CXTI) {}
SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT = nullptr,
AssumptionCache *AC = nullptr,
- const Instruction *CXTI = nullptr)
- : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI) {}
+ const Instruction *CXTI = nullptr, bool UseInstrInfo = true)
+ : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI), IIQ(UseInstrInfo) {}
SimplifyQuery getWithInstruction(Instruction *I) const {
SimplifyQuery Copy(*this);
Copy.CxtI = I;
diff --git a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index 6b1950733246..3083db75b81c 100644
--- a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -6,7 +6,7 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
+/// \file
/// Compute iterated dominance frontiers using a linear time algorithm.
///
/// The algorithm used here is based on:
@@ -28,6 +28,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFGDiff.h"
#include "llvm/IR/Dominators.h"
namespace llvm {
@@ -45,17 +46,21 @@ namespace llvm {
template <class NodeTy, bool IsPostDom>
class IDFCalculator {
public:
- IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
- : DT(DT), useLiveIn(false) {}
+ IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
+ : DT(DT), GD(nullptr), useLiveIn(false) {}
- /// Give the IDF calculator the set of blocks in which the value is
- /// defined. This is equivalent to the set of starting blocks it should be
- /// calculating the IDF for (though later gets pruned based on liveness).
- ///
- /// Note: This set *must* live for the entire lifetime of the IDF calculator.
- void setDefiningBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
- DefBlocks = &Blocks;
- }
+ IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT,
+ const GraphDiff<BasicBlock *, IsPostDom> *GD)
+ : DT(DT), GD(GD), useLiveIn(false) {}
+
+ /// Give the IDF calculator the set of blocks in which the value is
+ /// defined. This is equivalent to the set of starting blocks it should be
+ /// calculating the IDF for (though later gets pruned based on liveness).
+ ///
+ /// Note: This set *must* live for the entire lifetime of the IDF calculator.
+ void setDefiningBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
+ DefBlocks = &Blocks;
+ }
/// Give the IDF calculator the set of blocks in which the value is
/// live on entry to the block. This is used to prune the IDF calculation to
@@ -85,6 +90,7 @@ class IDFCalculator {
private:
DominatorTreeBase<BasicBlock, IsPostDom> &DT;
+ const GraphDiff<BasicBlock *, IsPostDom> *GD;
bool useLiveIn;
const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
diff --git a/contrib/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
new file mode 100644
index 000000000000..fc426ad7fb64
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -0,0 +1,69 @@
+//===- llvm/Analysis/LegacyDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The kernel divergence analysis is an LLVM pass which can be used to find out
+// if a branch instruction in a GPU program (kernel) is divergent or not. It can help
+// branch optimizations such as jump threading and loop unswitching to make
+// better decisions.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
+#define LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+
+namespace llvm {
+class Value;
+class GPUDivergenceAnalysis;
+class LegacyDivergenceAnalysis : public FunctionPass {
+public:
+ static char ID;
+
+ LegacyDivergenceAnalysis() : FunctionPass(ID) {
+ initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnFunction(Function &F) override;
+
+ // Print all divergent branches in the function.
+ void print(raw_ostream &OS, const Module *) const override;
+
+ // Returns true if V is divergent at its definition.
+ //
+ // Even if this function returns false, V may still be divergent when used
+ // in a different basic block.
+ bool isDivergent(const Value *V) const;
+
+ // Returns true if V is uniform/non-divergent.
+ //
+ // Even if this function returns true, V may still be divergent when used
+ // in a different basic block.
+ bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+ // Keep the analysis results uptodate by removing an erased value.
+ void removeValue(const Value *V) { DivergentValues.erase(V); }
+
+private:
+ // Whether analysis should be performed by GPUDivergenceAnalysis.
+ bool shouldUseGPUDivergenceAnalysis(const Function &F) const;
+
+ // (optional) handle to new DivergenceAnalysis
+ std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
+
+ // Stores all divergent values.
+ DenseSet<const Value *> DivergentValues;
+};
+} // End llvm namespace
+
+#endif //LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
diff --git a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index d27b3e42bbeb..4ed00e207753 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -97,6 +97,19 @@ public:
/// Set of potential dependent memory accesses.
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
+ /// Type to keep track of the status of the dependence check. The order of
+ /// the elements is important and has to be from most permissive to least
+ /// permissive.
+ enum class VectorizationSafetyStatus {
+ // Can vectorize safely without RT checks. All dependences are known to be
+ // safe.
+ Safe,
+ // Can possibly vectorize with RT checks to overcome unknown dependencies.
+ PossiblySafeWithRtChecks,
+ // Cannot vectorize due to known unsafe dependencies.
+ Unsafe,
+ };
+
/// Dependece between memory access instructions.
struct Dependence {
/// The type of the dependence.
@@ -146,7 +159,7 @@ public:
Instruction *getDestination(const LoopAccessInfo &LAI) const;
/// Dependence types that don't prevent vectorization.
- static bool isSafeForVectorization(DepType Type);
+ static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
/// Lexically forward dependence.
bool isForward() const;
@@ -164,8 +177,8 @@ public:
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
: PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U),
- ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
- RecordDependences(true) {}
+ FoundNonConstantDistanceDependence(false),
+ Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
/// Register the location (instructions are given increasing numbers)
/// of a write access.
@@ -193,7 +206,9 @@ public:
/// No memory dependence was encountered that would inhibit
/// vectorization.
- bool isSafeForVectorization() const { return SafeForVectorization; }
+ bool isSafeForVectorization() const {
+ return Status == VectorizationSafetyStatus::Safe;
+ }
/// The maximum number of bytes of a vector register we can vectorize
/// the accesses safely with.
@@ -205,7 +220,10 @@ public:
/// In same cases when the dependency check fails we can still
/// vectorize the loop with a dynamic array access check.
- bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
+ bool shouldRetryWithRuntimeCheck() const {
+ return FoundNonConstantDistanceDependence &&
+ Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
+ }
/// Returns the memory dependences. If null is returned we exceeded
/// the MaxDependences threshold and this information is not
@@ -267,11 +285,12 @@ private:
/// If we see a non-constant dependence distance we can still try to
/// vectorize this loop with runtime checks.
- bool ShouldRetryWithRuntimeCheck;
+ bool FoundNonConstantDistanceDependence;
- /// No memory dependence was encountered that would inhibit
- /// vectorization.
- bool SafeForVectorization;
+ /// Result of the dependence checks, indicating whether the checked
+ /// dependences are safe for vectorization, require RT checks or are known to
+ /// be unsafe.
+ VectorizationSafetyStatus Status;
//// True if Dependences reflects the dependences in the
//// loop. If false we exceeded MaxDependences and
@@ -304,6 +323,11 @@ private:
/// \return false if we shouldn't vectorize at all or avoid larger
/// vectorization factors by limiting MaxSafeDepDistBytes.
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
+
+ /// Updates the current safety status with \p S. We can go from Safe to
+ /// either PossiblySafeWithRtChecks or Unsafe and from
+ /// PossiblySafeWithRtChecks to Unsafe.
+ void mergeInStatus(VectorizationSafetyStatus S);
};
/// Holds information about the memory runtime legality checks to verify
@@ -564,11 +588,10 @@ public:
/// Print the information about the memory accesses in the loop.
void print(raw_ostream &OS, unsigned Depth = 0) const;
- /// Checks existence of store to invariant address inside loop.
- /// If the loop has any store to invariant address, then it returns true,
- /// else returns false.
- bool hasStoreToLoopInvariantAddress() const {
- return StoreToLoopInvariantAddress;
+ /// If the loop has memory dependence involving an invariant address, i.e. two
+ /// stores or a store and a load, then return true, else return false.
+ bool hasDependenceInvolvingLoopInvariantAddress() const {
+ return HasDependenceInvolvingLoopInvariantAddress;
}
/// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
@@ -621,9 +644,8 @@ private:
/// Cache the result of analyzeLoop.
bool CanVecMem;
- /// Indicator for storing to uniform addresses.
- /// If a loop has write to a loop invariant address then it should be true.
- bool StoreToLoopInvariantAddress;
+ /// Indicator that there are non vectorizable stores to a uniform address.
+ bool HasDependenceInvolvingLoopInvariantAddress;
/// The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
index 30b29d66a1d1..72873546a068 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
@@ -408,6 +408,12 @@ public:
/// Verify loop structure of this loop and all nested loops.
void verifyLoopNest(DenseSet<const LoopT *> *Loops) const;
+ /// Returns true if the loop is annotated parallel.
+ ///
+ /// Derived classes can override this method using static template
+ /// polymorphism.
+ bool isAnnotatedParallel() const { return false; }
+
/// Print loop with all the BBs inside it.
void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const;
@@ -989,6 +995,26 @@ public:
/// Function to print a loop's contents as LLVM's text IR assembly.
void printLoop(Loop &L, raw_ostream &OS, const std::string &Banner = "");
+/// Find and return the loop attribute node for the attribute @p Name in
+/// @p LoopID. Return nullptr if there is no such attribute.
+MDNode *findOptionMDForLoopID(MDNode *LoopID, StringRef Name);
+
+/// Find string metadata for a loop.
+///
+/// Returns the MDNode where the first operand is the metadata's name. The
+/// following operands are the metadata's values. If no metadata with @p Name is
+/// found, return nullptr.
+MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name);
+
+/// Return whether an MDNode might represent an access group.
+///
+/// Access group metadata nodes have to be distinct and empty. Being
+/// always-empty ensures that it never needs to be changed (which -- because
+/// MDNodes are designed immutable -- would require creating a new MDNode). Note
+/// that this is not a sufficient condition: not every distinct and empty NDNode
+/// is representing an access group.
+bool isValidAsAccessGroup(MDNode *AccGroup);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 941389858868..2b807919fedf 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -392,7 +392,10 @@ void LoopBase<BlockT, LoopT>::verifyLoopNest(
template <class BlockT, class LoopT>
void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth,
bool Verbose) const {
- OS.indent(Depth * 2) << "Loop at depth " << getLoopDepth() << " containing: ";
+ OS.indent(Depth * 2);
+ if (static_cast<const LoopT *>(this)->isAnnotatedParallel())
+ OS << "Parallel ";
+ OS << "Loop at depth " << getLoopDepth() << " containing: ";
BlockT *H = getHeader();
for (unsigned i = 0; i < getBlocks().size(); ++i) {
@@ -640,8 +643,8 @@ void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const {
template <typename T>
bool compareVectors(std::vector<T> &BB1, std::vector<T> &BB2) {
- llvm::sort(BB1.begin(), BB1.end());
- llvm::sort(BB2.begin(), BB2.end());
+ llvm::sort(BB1);
+ llvm::sort(BB2);
return BB1 == BB2;
}
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 1c40cffc7f67..958d4fe4b832 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -37,7 +37,6 @@
namespace llvm {
class AssumptionCache;
-class CallSite;
class DominatorTree;
class Function;
class Instruction;
@@ -304,7 +303,7 @@ private:
/// The maximum size of the dereferences of the pointer.
///
/// May be UnknownSize if the sizes are unknown.
- LocationSize Size = MemoryLocation::UnknownSize;
+ LocationSize Size = LocationSize::unknown();
/// The AA tags associated with dereferences of the pointer.
///
/// The members may be null if there are no tags or conflicting tags.
@@ -398,7 +397,7 @@ public:
/// invalidated on the next non-local query or when an instruction is
/// removed. Clients must copy this data if they want it around longer than
/// that.
- const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS);
+ const NonLocalDepInfo &getNonLocalCallDependency(CallBase *QueryCall);
/// Perform a full dependency query for an access to the QueryInst's
/// specified memory location, returning the set of instructions that either
@@ -482,9 +481,9 @@ public:
void releaseMemory();
private:
- MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
- BasicBlock::iterator ScanIt,
- BasicBlock *BB);
+ MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall,
+ BasicBlock::iterator ScanIt,
+ BasicBlock *BB);
bool getNonLocalPointerDepFromBB(Instruction *QueryInst,
const PHITransAddr &Pointer,
const MemoryLocation &Loc, bool isLoad,
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryLocation.h b/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
index 6b680000312c..fca18c1b5999 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -16,9 +16,9 @@
#ifndef LLVM_ANALYSIS_MEMORYLOCATION_H
#define LLVM_ANALYSIS_MEMORYLOCATION_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
namespace llvm {
@@ -34,8 +34,134 @@ class AnyMemIntrinsic;
class TargetLibraryInfo;
// Represents the size of a MemoryLocation. Logically, it's an
-// Optional<uint64_t>, with a special UnknownSize value from `MemoryLocation`.
-using LocationSize = uint64_t;
+// Optional<uint63_t> that also carries a bit to represent whether the integer
+// it contains, N, is 'precise'. Precise, in this context, means that we know
+// that the area of storage referenced by the given MemoryLocation must be
+// precisely N bytes. An imprecise value is formed as the union of two or more
+// precise values, and can conservatively represent all of the values unioned
+// into it. Importantly, imprecise values are an *upper-bound* on the size of a
+// MemoryLocation.
+//
+// Concretely, a precise MemoryLocation is (%p, 4) in
+// store i32 0, i32* %p
+//
+// Since we know that %p must be at least 4 bytes large at this point.
+// Otherwise, we have UB. An example of an imprecise MemoryLocation is (%p, 4)
+// at the memcpy in
+//
+// %n = select i1 %foo, i64 1, i64 4
+// call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %baz, i64 %n, i32 1,
+// i1 false)
+//
+// ...Since we'll copy *up to* 4 bytes into %p, but we can't guarantee that
+// we'll ever actually do so.
+//
+// If asked to represent a pathologically large value, this will degrade to
+// None.
+class LocationSize {
+ enum : uint64_t {
+ Unknown = ~uint64_t(0),
+ ImpreciseBit = uint64_t(1) << 63,
+ MapEmpty = Unknown - 1,
+ MapTombstone = Unknown - 2,
+
+ // The maximum value we can represent without falling back to 'unknown'.
+ MaxValue = (MapTombstone - 1) & ~ImpreciseBit,
+ };
+
+ uint64_t Value;
+
+ // Hack to support implicit construction. This should disappear when the
+ // public LocationSize ctor goes away.
+ enum DirectConstruction { Direct };
+
+ constexpr LocationSize(uint64_t Raw, DirectConstruction): Value(Raw) {}
+
+ static_assert(Unknown & ImpreciseBit, "Unknown is imprecise by definition.");
+public:
+ // FIXME: Migrate all users to construct via either `precise` or `upperBound`,
+ // to make it more obvious at the callsite the kind of size that they're
+ // providing.
+ //
+ // Since the overwhelming majority of users of this provide precise values,
+ // this assumes the provided value is precise.
+ constexpr LocationSize(uint64_t Raw)
+ : Value(Raw > MaxValue ? Unknown : Raw) {}
+
+ static LocationSize precise(uint64_t Value) { return LocationSize(Value); }
+
+ static LocationSize upperBound(uint64_t Value) {
+ // You can't go lower than 0, so give a precise result.
+ if (LLVM_UNLIKELY(Value == 0))
+ return precise(0);
+ if (LLVM_UNLIKELY(Value > MaxValue))
+ return unknown();
+ return LocationSize(Value | ImpreciseBit, Direct);
+ }
+
+ constexpr static LocationSize unknown() {
+ return LocationSize(Unknown, Direct);
+ }
+
+ // Sentinel values, generally used for maps.
+ constexpr static LocationSize mapTombstone() {
+ return LocationSize(MapTombstone, Direct);
+ }
+ constexpr static LocationSize mapEmpty() {
+ return LocationSize(MapEmpty, Direct);
+ }
+
+ // Returns a LocationSize that can correctly represent either `*this` or
+ // `Other`.
+ LocationSize unionWith(LocationSize Other) const {
+ if (Other == *this)
+ return *this;
+
+ if (!hasValue() || !Other.hasValue())
+ return unknown();
+
+ return upperBound(std::max(getValue(), Other.getValue()));
+ }
+
+ bool hasValue() const { return Value != Unknown; }
+ uint64_t getValue() const {
+ assert(hasValue() && "Getting value from an unknown LocationSize!");
+ return Value & ~ImpreciseBit;
+ }
+
+ // Returns whether or not this value is precise. Note that if a value is
+ // precise, it's guaranteed to not be `unknown()`.
+ bool isPrecise() const {
+ return (Value & ImpreciseBit) == 0;
+ }
+
+ // Convenience method to check if this LocationSize's value is 0.
+ bool isZero() const { return hasValue() && getValue() == 0; }
+
+ bool operator==(const LocationSize &Other) const {
+ return Value == Other.Value;
+ }
+
+ bool operator!=(const LocationSize &Other) const {
+ return !(*this == Other);
+ }
+
+ // Ordering operators are not provided, since it's unclear if there's only one
+ // reasonable way to compare:
+ // - values that don't exist against values that do, and
+ // - precise values to imprecise values
+
+ void print(raw_ostream &OS) const;
+
+ // Returns an opaque value that represents this LocationSize. Cannot be
+ // reliably converted back into a LocationSize.
+ uint64_t toRaw() const { return Value; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, LocationSize Size) {
+ Size.print(OS);
+ return OS;
+}
/// Representation for a specific memory location.
///
@@ -108,11 +234,15 @@ public:
static MemoryLocation getForDest(const AnyMemIntrinsic *MI);
/// Return a location representing a particular argument of a call.
- static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx,
- const TargetLibraryInfo &TLI);
+ static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx,
+ const TargetLibraryInfo *TLI);
+ static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx,
+ const TargetLibraryInfo &TLI) {
+ return getForArgument(Call, ArgIdx, &TLI);
+ }
explicit MemoryLocation(const Value *Ptr = nullptr,
- LocationSize Size = UnknownSize,
+ LocationSize Size = LocationSize::unknown(),
const AAMDNodes &AATags = AAMDNodes())
: Ptr(Ptr), Size(Size), AATags(AATags) {}
@@ -139,13 +269,30 @@ public:
}
};
-// Specialize DenseMapInfo for MemoryLocation.
+// Specialize DenseMapInfo.
+template <> struct DenseMapInfo<LocationSize> {
+ static inline LocationSize getEmptyKey() {
+ return LocationSize::mapEmpty();
+ }
+ static inline LocationSize getTombstoneKey() {
+ return LocationSize::mapTombstone();
+ }
+ static unsigned getHashValue(const LocationSize &Val) {
+ return DenseMapInfo<uint64_t>::getHashValue(Val.toRaw());
+ }
+ static bool isEqual(const LocationSize &LHS, const LocationSize &RHS) {
+ return LHS == RHS;
+ }
+};
+
template <> struct DenseMapInfo<MemoryLocation> {
static inline MemoryLocation getEmptyKey() {
- return MemoryLocation(DenseMapInfo<const Value *>::getEmptyKey(), 0);
+ return MemoryLocation(DenseMapInfo<const Value *>::getEmptyKey(),
+ DenseMapInfo<LocationSize>::getEmptyKey());
}
static inline MemoryLocation getTombstoneKey() {
- return MemoryLocation(DenseMapInfo<const Value *>::getTombstoneKey(), 0);
+ return MemoryLocation(DenseMapInfo<const Value *>::getTombstoneKey(),
+ DenseMapInfo<LocationSize>::getTombstoneKey());
}
static unsigned getHashValue(const MemoryLocation &Val) {
return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^
diff --git a/contrib/llvm/include/llvm/Analysis/MemorySSA.h b/contrib/llvm/include/llvm/Analysis/MemorySSA.h
index d445e4430e5c..17e2d0c73977 100644
--- a/contrib/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/contrib/llvm/include/llvm/Analysis/MemorySSA.h
@@ -280,9 +280,10 @@ protected:
friend class MemorySSAUpdater;
MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty,
- DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB)
- : MemoryAccess(C, Vty, DeleteValue, BB, 1), MemoryInstruction(MI),
- OptimizedAccessAlias(MayAlias) {
+ DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB,
+ unsigned NumOperands)
+ : MemoryAccess(C, Vty, DeleteValue, BB, NumOperands),
+ MemoryInstruction(MI), OptimizedAccessAlias(MayAlias) {
setDefiningAccess(DMA);
}
@@ -308,11 +309,6 @@ private:
Optional<AliasResult> OptimizedAccessAlias;
};
-template <>
-struct OperandTraits<MemoryUseOrDef>
- : public FixedNumOperandTraits<MemoryUseOrDef, 1> {};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)
-
/// Represents read-only accesses to memory
///
/// In particular, the set of Instructions that will be represented by
@@ -323,7 +319,8 @@ public:
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB)
- : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB) {}
+ : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB,
+ /*NumOperands=*/1) {}
// allocate space for exactly one operand
void *operator new(size_t s) { return User::operator new(s, 1); }
@@ -381,31 +378,33 @@ public:
MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB,
unsigned Ver)
- : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB), ID(Ver) {}
+ : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB,
+ /*NumOperands=*/2),
+ ID(Ver) {}
- // allocate space for exactly one operand
- void *operator new(size_t s) { return User::operator new(s, 1); }
+ // allocate space for exactly two operands
+ void *operator new(size_t s) { return User::operator new(s, 2); }
static bool classof(const Value *MA) {
return MA->getValueID() == MemoryDefVal;
}
void setOptimized(MemoryAccess *MA) {
- Optimized = MA;
- OptimizedID = getDefiningAccess()->getID();
+ setOperand(1, MA);
+ OptimizedID = MA->getID();
}
MemoryAccess *getOptimized() const {
- return cast_or_null<MemoryAccess>(Optimized);
+ return cast_or_null<MemoryAccess>(getOperand(1));
}
bool isOptimized() const {
- return getOptimized() && getDefiningAccess() &&
- OptimizedID == getDefiningAccess()->getID();
+ return getOptimized() && OptimizedID == getOptimized()->getID();
}
void resetOptimized() {
OptimizedID = INVALID_MEMORYACCESS_ID;
+ setOperand(1, nullptr);
}
void print(raw_ostream &OS) const;
@@ -417,13 +416,34 @@ private:
const unsigned ID;
unsigned OptimizedID = INVALID_MEMORYACCESS_ID;
- WeakVH Optimized;
};
template <>
-struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 1> {};
+struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 2> {};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess)
+template <>
+struct OperandTraits<MemoryUseOrDef> {
+ static Use *op_begin(MemoryUseOrDef *MUD) {
+ if (auto *MU = dyn_cast<MemoryUse>(MUD))
+ return OperandTraits<MemoryUse>::op_begin(MU);
+ return OperandTraits<MemoryDef>::op_begin(cast<MemoryDef>(MUD));
+ }
+
+ static Use *op_end(MemoryUseOrDef *MUD) {
+ if (auto *MU = dyn_cast<MemoryUse>(MUD))
+ return OperandTraits<MemoryUse>::op_end(MU);
+ return OperandTraits<MemoryDef>::op_end(cast<MemoryDef>(MUD));
+ }
+
+ static unsigned operands(const MemoryUseOrDef *MUD) {
+ if (const auto *MU = dyn_cast<MemoryUse>(MUD))
+ return OperandTraits<MemoryUse>::operands(MU);
+ return OperandTraits<MemoryDef>::operands(cast<MemoryDef>(MUD));
+ }
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)
+
/// Represents phi nodes for memory accesses.
///
/// These have the same semantic as regular phi nodes, with the exception that
@@ -684,13 +704,19 @@ public:
~MemorySSA();
MemorySSAWalker *getWalker();
+ MemorySSAWalker *getSkipSelfWalker();
/// Given a memory Mod/Ref'ing instruction, get the MemorySSA
/// access associated with it. If passed a basic block gets the memory phi
/// node that exists for that block, if there is one. Otherwise, this will get
/// a MemoryUseOrDef.
- MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
- MemoryPhi *getMemoryAccess(const BasicBlock *BB) const;
+ MemoryUseOrDef *getMemoryAccess(const Instruction *I) const {
+ return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
+ }
+
+ MemoryPhi *getMemoryAccess(const BasicBlock *BB) const {
+ return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
+ }
void dump() const;
void print(raw_ostream &) const;
@@ -750,6 +776,9 @@ public:
/// all uses, uses appear in the right places). This is used by unit tests.
void verifyMemorySSA() const;
+ /// Check clobber sanity for an access.
+ void checkClobberSanityAccess(const MemoryAccess *MA) const;
+
/// Used in various insertion functions to specify whether we are talking
/// about the beginning or end of a block.
enum InsertionPlace { Beginning, End };
@@ -764,6 +793,7 @@ protected:
void verifyDomination(Function &F) const;
void verifyOrdering(Function &F) const;
void verifyDominationNumbers(const Function &F) const;
+ void verifyClobberSanity(const Function &F) const;
// This is used by the use optimizer and updater.
AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
@@ -796,16 +826,20 @@ protected:
InsertionPlace);
void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
AccessList::iterator);
- MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *);
+ MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *,
+ const MemoryUseOrDef *Template = nullptr);
private:
+ class ClobberWalkerBase;
class CachingWalker;
+ class SkipSelfWalker;
class OptimizeUses;
CachingWalker *getWalkerImpl();
void buildMemorySSA();
void optimizeUses();
+ void prepareForMoveTo(MemoryAccess *, BasicBlock *);
void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const;
using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>;
@@ -816,7 +850,8 @@ private:
void markUnreachableAsLiveOnEntry(BasicBlock *BB);
bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
MemoryPhi *createMemoryPhi(BasicBlock *BB);
- MemoryUseOrDef *createNewAccess(Instruction *);
+ MemoryUseOrDef *createNewAccess(Instruction *,
+ const MemoryUseOrDef *Template = nullptr);
MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &);
MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool);
@@ -851,7 +886,9 @@ private:
mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
// Memory SSA building info
+ std::unique_ptr<ClobberWalkerBase> WalkerBase;
std::unique_ptr<CachingWalker> Walker;
+ std::unique_ptr<SkipSelfWalker> SkipWalker;
unsigned NextID;
};
diff --git a/contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index 38f08c1eebdc..169d5bd9fa8b 100644
--- a/contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -35,8 +35,11 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFGDiff.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
@@ -45,6 +48,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
@@ -57,6 +61,12 @@ class MemoryAccess;
class LLVMContext;
class raw_ostream;
+using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
+using PhiToDefMap = SmallDenseMap<MemoryPhi *, MemoryAccess *>;
+using CFGUpdate = cfg::Update<BasicBlock *>;
+using GraphDiffInvBBPair =
+ std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>;
+
class MemorySSAUpdater {
private:
MemorySSA *MSSA;
@@ -70,6 +80,7 @@ private:
public:
MemorySSAUpdater(MemorySSA *MSSA) : MSSA(MSSA) {}
+
/// Insert a definition into the MemorySSA IR. RenameUses will rename any use
/// below the new def block (and any inserted phis). RenameUses should be set
/// to true if the definition may cause new aliases for loads below it. This
@@ -89,15 +100,48 @@ public:
/// Where a mayalias b, *does* require RenameUses be set to true.
void insertDef(MemoryDef *Def, bool RenameUses = false);
void insertUse(MemoryUse *Use);
+ /// Update the MemoryPhi in `To` following an edge deletion between `From` and
+ /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made.
+ void removeEdge(BasicBlock *From, BasicBlock *To);
+ /// Update the MemoryPhi in `To` to have a single incoming edge from `From`,
+ /// following a CFG change that replaced multiple edges (switch) with a direct
+ /// branch.
+ void removeDuplicatePhiEdgesBetween(BasicBlock *From, BasicBlock *To);
+ /// Update MemorySSA after a loop was cloned, given the blocks in RPO order,
+ /// the exit blocks and a 1:1 mapping of all blocks and instructions
+ /// cloned. This involves duplicating all defs and uses in the cloned blocks
+ /// Updating phi nodes in exit block successors is done separately.
+ void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
+ ArrayRef<BasicBlock *> ExitBlocks,
+ const ValueToValueMapTy &VM,
+ bool IgnoreIncomingWithNoClones = false);
+ // Block BB was fully or partially cloned into its predecessor P1. Map
+ // contains the 1:1 mapping of instructions cloned and VM[BB]=P1.
+ void updateForClonedBlockIntoPred(BasicBlock *BB, BasicBlock *P1,
+ const ValueToValueMapTy &VM);
+ /// Update phi nodes in exit block successors following cloning. Exit blocks
+ /// that were not cloned don't have additional predecessors added.
+ void updateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
+ const ValueToValueMapTy &VMap,
+ DominatorTree &DT);
+ void updateExitBlocksForClonedLoop(
+ ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, DominatorTree &DT);
+
+ /// Apply CFG updates, analogous with the DT edge updates.
+ void applyUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT);
+ /// Apply CFG insert updates, analogous with the DT edge updates.
+ void applyInsertUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT);
+
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where);
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where);
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
MemorySSA::InsertionPlace Where);
- /// `From` block was spliced into `From` and `To`.
- /// Move all accesses from `From` to `To` starting at instruction `Start`.
- /// `To` is newly created BB, so empty of MemorySSA::MemoryAccesses.
- /// Edges are already updated, so successors of `To` with MPhi nodes need to
- /// update incoming block.
+ /// `From` block was spliced into `From` and `To`. There is a CFG edge from
+ /// `From` to `To`. Move all accesses from `From` to `To` starting at
+ /// instruction `Start`. `To` is newly created BB, so empty of
+ /// MemorySSA::MemoryAccesses. Edges are already updated, so successors of
+ /// `To` with MPhi nodes need to update incoming block.
/// |------| |------|
/// | From | | From |
/// | | |------|
@@ -108,12 +152,12 @@ public:
/// |------| |------|
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To,
Instruction *Start);
- /// `From` block was merged into `To`. All instructions were moved and
- /// `From` is an empty block with successor edges; `From` is about to be
- /// deleted. Move all accesses from `From` to `To` starting at instruction
- /// `Start`. `To` may have multiple successors, `From` has a single
- /// predecessor. `From` may have successors with MPhi nodes, replace their
- /// incoming block with `To`.
+ /// `From` block was merged into `To`. There is a CFG edge from `To` to
+ /// `From`.`To` still branches to `From`, but all instructions were moved and
+ /// `From` is now an empty block; `From` is about to be deleted. Move all
+ /// accesses from `From` to `To` starting at instruction `Start`. `To` may
+ /// have multiple successors, `From` has a single predecessor. `From` may have
+ /// successors with MPhi nodes, replace their incoming block with `To`.
/// |------| |------|
/// | To | | To |
/// |------| | |
@@ -124,15 +168,14 @@ public:
/// |------| |------|
void moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To,
Instruction *Start);
- /// BasicBlock Old had New, an empty BasicBlock, added directly before it,
- /// and the predecessors in Preds that used to point to Old, now point to
- /// New. If New is the only predecessor, move Old's Phi, if present, to New.
+ /// A new empty BasicBlock (New) now branches directly to Old. Some of
+ /// Old's predecessors (Preds) are now branching to New instead of Old.
+ /// If New is the only predecessor, move Old's Phi, if present, to New.
/// Otherwise, add a new Phi in New with appropriate incoming values, and
/// update the incoming values in Old's Phi node too, if present.
- void
- wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New,
- ArrayRef<BasicBlock *> Preds);
-
+ void wireOldPredecessorsToNewImmediatePredecessor(
+ BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds,
+ bool IdenticalEdgesWereMerged = true);
// The below are utility functions. Other than creation of accesses to pass
// to insertDef, and removeAccess to remove accesses, you should generally
// not attempt to update memoryssa yourself. It is very non-trivial to get
@@ -220,6 +263,23 @@ private:
template <class RangeType>
MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
void fixupDefs(const SmallVectorImpl<WeakVH> &);
+ // Clone all uses and defs from BB to NewBB given a 1:1 map of all
+ // instructions and blocks cloned, and a map of MemoryPhi : Definition
+ // (MemoryAccess Phi or Def). VMap maps old instructions to cloned
+ // instructions and old blocks to cloned blocks. MPhiMap, is created in the
+ // caller of this private method, and maps existing MemoryPhis to new
+ // definitions that new MemoryAccesses must point to. These definitions may
+ // not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such,
+ // the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses
+ // may be MemoryPhis or MemoryDefs and not MemoryUses.
+ void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
+ const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap);
+ template <typename Iter>
+ void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
+ Iter ValuesBegin, Iter ValuesEnd,
+ DominatorTree &DT);
+ void applyInsertUpdates(ArrayRef<CFGUpdate>, DominatorTree &DT,
+ const GraphDiff<BasicBlock *> *GD);
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Analysis/MustExecute.h b/contrib/llvm/include/llvm/Analysis/MustExecute.h
index 97ad76d451ca..ad3222c17e62 100644
--- a/contrib/llvm/include/llvm/Analysis/MustExecute.h
+++ b/contrib/llvm/include/llvm/Analysis/MustExecute.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
@@ -31,33 +32,138 @@ class DominatorTree;
class Loop;
/// Captures loop safety information.
-/// It keep information for loop & its header may throw exception or otherwise
+/// It keep information for loop blocks may throw exception or otherwise
/// exit abnormaly on any iteration of the loop which might actually execute
/// at runtime. The primary way to consume this infromation is via
/// isGuaranteedToExecute below, but some callers bailout or fallback to
/// alternate reasoning if a loop contains any implicit control flow.
-struct LoopSafetyInfo {
- bool MayThrow = false; // The current loop contains an instruction which
- // may throw.
- bool HeaderMayThrow = false; // Same as previous, but specific to loop header
+/// NOTE: LoopSafetyInfo contains cached information regarding loops and their
+/// particular blocks. This information is only dropped on invocation of
+/// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if
+/// any thrower instructions have been added or removed from them, or if the
+/// control flow has changed, or in case of other meaningful modifications, the
+/// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the
+/// loop were made and the info wasn't recomputed properly, the behavior of all
+/// methods except for computeLoopSafetyInfo is undefined.
+class LoopSafetyInfo {
// Used to update funclet bundle operands.
DenseMap<BasicBlock *, ColorVector> BlockColors;
+protected:
+ /// Computes block colors.
+ void computeBlockColors(const Loop *CurLoop);
+
+public:
+ /// Returns block colors map that is used to update funclet operand bundles.
+ const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const;
+
+ /// Copy colors of block \p Old into the block \p New.
+ void copyColors(BasicBlock *New, BasicBlock *Old);
+
+ /// Returns true iff the block \p BB potentially may throw exception. It can
+ /// be false-positive in cases when we want to avoid complex analysis.
+ virtual bool blockMayThrow(const BasicBlock *BB) const = 0;
+
+ /// Returns true iff any block of the loop for which this info is contains an
+ /// instruction that may throw or otherwise exit abnormally.
+ virtual bool anyBlockMayThrow() const = 0;
+
+ /// Return true if we must reach the block \p BB under assumption that the
+ /// loop \p CurLoop is entered.
+ bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB,
+ const DominatorTree *DT) const;
+
+ /// Computes safety information for a loop checks loop body & header for
+ /// the possibility of may throw exception, it takes LoopSafetyInfo and loop
+ /// as argument. Updates safety information in LoopSafetyInfo argument.
+ /// Note: This is defined to clear and reinitialize an already initialized
+ /// LoopSafetyInfo. Some callers rely on this fact.
+ virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0;
+
+ /// Returns true if the instruction in a loop is guaranteed to execute at
+ /// least once (under the assumption that the loop is entered).
+ virtual bool isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop) const = 0;
+
LoopSafetyInfo() = default;
+
+ virtual ~LoopSafetyInfo() = default;
};
-/// Computes safety information for a loop checks loop body & header for
-/// the possibility of may throw exception, it takes LoopSafetyInfo and loop as
-/// argument. Updates safety information in LoopSafetyInfo argument.
-/// Note: This is defined to clear and reinitialize an already initialized
-/// LoopSafetyInfo. Some callers rely on this fact.
-void computeLoopSafetyInfo(LoopSafetyInfo *, Loop *);
-
-/// Returns true if the instruction in a loop is guaranteed to execute at least
-/// once (under the assumption that the loop is entered).
-bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT,
- const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo);
+
+/// Simple and conservative implementation of LoopSafetyInfo that can give
+/// false-positive answers to its queries in order to avoid complicated
+/// analysis.
+class SimpleLoopSafetyInfo: public LoopSafetyInfo {
+ bool MayThrow = false; // The current loop contains an instruction which
+ // may throw.
+ bool HeaderMayThrow = false; // Same as previous, but specific to loop header
+
+public:
+ virtual bool blockMayThrow(const BasicBlock *BB) const;
+
+ virtual bool anyBlockMayThrow() const;
+
+ virtual void computeLoopSafetyInfo(const Loop *CurLoop);
+
+ virtual bool isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop) const;
+
+ SimpleLoopSafetyInfo() : LoopSafetyInfo() {};
+
+ virtual ~SimpleLoopSafetyInfo() {};
+};
+
+/// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to
+/// give precise answers on "may throw" queries. This implementation uses cache
+/// that should be invalidated by calling the methods insertInstructionTo and
+/// removeInstruction whenever we modify a basic block's contents by adding or
+/// removing instructions.
+class ICFLoopSafetyInfo: public LoopSafetyInfo {
+ bool MayThrow = false; // The current loop contains an instruction which
+ // may throw.
+ // Contains information about implicit control flow in this loop's blocks.
+ mutable ImplicitControlFlowTracking ICF;
+ // Contains information about instruction that may possibly write memory.
+ mutable MemoryWriteTracking MW;
+
+public:
+ virtual bool blockMayThrow(const BasicBlock *BB) const;
+
+ virtual bool anyBlockMayThrow() const;
+
+ virtual void computeLoopSafetyInfo(const Loop *CurLoop);
+
+ virtual bool isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop) const;
+
+ /// Returns true if we could not execute a memory-modifying instruction before
+ /// we enter \p BB under assumption that \p CurLoop is entered.
+ bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop)
+ const;
+
+ /// Returns true if we could not execute a memory-modifying instruction before
+ /// we execute \p I under assumption that \p CurLoop is entered.
+ bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop)
+ const;
+
+ /// Inform the safety info that we are planning to insert a new instruction
+ /// \p Inst into the basic block \p BB. It will make all cache updates to keep
+ /// it correct after this insertion.
+ void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
+
+ /// Inform safety info that we are planning to remove the instruction \p Inst
+ /// from its block. It will make all cache updates to keep it correct after
+ /// this removal.
+ void removeInstruction(const Instruction *Inst);
+
+ ICFLoopSafetyInfo(DominatorTree *DT) : LoopSafetyInfo(), ICF(DT), MW(DT) {};
+
+ virtual ~ICFLoopSafetyInfo() {};
+};
}
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
index 559c77c30811..58a67042ea2d 100644
--- a/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
@@ -60,7 +60,7 @@ public:
FunctionModRefBehavior getModRefBehavior(const Function *F);
using AAResultBase::getModRefInfo;
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 07beb0bb60a3..1f497fab35da 100644
--- a/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -51,25 +51,25 @@ extern bool EnableARCOpts;
/// on.
inline bool ModuleHasARC(const Module &M) {
return
- M.getNamedValue("objc_retain") ||
- M.getNamedValue("objc_release") ||
- M.getNamedValue("objc_autorelease") ||
- M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
- M.getNamedValue("objc_unsafeClaimAutoreleasedReturnValue") ||
- M.getNamedValue("objc_retainBlock") ||
- M.getNamedValue("objc_autoreleaseReturnValue") ||
- M.getNamedValue("objc_autoreleasePoolPush") ||
- M.getNamedValue("objc_loadWeakRetained") ||
- M.getNamedValue("objc_loadWeak") ||
- M.getNamedValue("objc_destroyWeak") ||
- M.getNamedValue("objc_storeWeak") ||
- M.getNamedValue("objc_initWeak") ||
- M.getNamedValue("objc_moveWeak") ||
- M.getNamedValue("objc_copyWeak") ||
- M.getNamedValue("objc_retainedObject") ||
- M.getNamedValue("objc_unretainedObject") ||
- M.getNamedValue("objc_unretainedPointer") ||
- M.getNamedValue("clang.arc.use");
+ M.getNamedValue("llvm.objc.retain") ||
+ M.getNamedValue("llvm.objc.release") ||
+ M.getNamedValue("llvm.objc.autorelease") ||
+ M.getNamedValue("llvm.objc.retainAutoreleasedReturnValue") ||
+ M.getNamedValue("llvm.objc.unsafeClaimAutoreleasedReturnValue") ||
+ M.getNamedValue("llvm.objc.retainBlock") ||
+ M.getNamedValue("llvm.objc.autoreleaseReturnValue") ||
+ M.getNamedValue("llvm.objc.autoreleasePoolPush") ||
+ M.getNamedValue("llvm.objc.loadWeakRetained") ||
+ M.getNamedValue("llvm.objc.loadWeak") ||
+ M.getNamedValue("llvm.objc.destroyWeak") ||
+ M.getNamedValue("llvm.objc.storeWeak") ||
+ M.getNamedValue("llvm.objc.initWeak") ||
+ M.getNamedValue("llvm.objc.moveWeak") ||
+ M.getNamedValue("llvm.objc.copyWeak") ||
+ M.getNamedValue("llvm.objc.retainedObject") ||
+ M.getNamedValue("llvm.objc.unretainedObject") ||
+ M.getNamedValue("llvm.objc.unretainedPointer") ||
+ M.getNamedValue("llvm.objc.clang.arc.use");
}
/// This is a wrapper around getUnderlyingObject which also knows how to
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h b/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h
index 0b92d8b48356..018ea1f851be 100644
--- a/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h
@@ -11,6 +11,7 @@
#define LLVM_ANALYSIS_OBJCARCINSTKIND_H
#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Instructions.h"
namespace llvm {
@@ -48,7 +49,7 @@ enum class ARCInstKind {
CopyWeak, ///< objc_copyWeak (derived)
DestroyWeak, ///< objc_destroyWeak (derived)
StoreStrong, ///< objc_storeStrong (derived)
- IntrinsicUser, ///< clang.arc.use
+ IntrinsicUser, ///< llvm.objc.clang.arc.use
CallOrUser, ///< could call objc_release and/or "use" pointers
Call, ///< could call objc_release
User, ///< could "use" a pointer
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/OrderedInstructions.h b/contrib/llvm/include/llvm/Analysis/OrderedInstructions.h
index 7f57fde638b8..7e3850b87c57 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/OrderedInstructions.h
+++ b/contrib/llvm/include/llvm/Analysis/OrderedInstructions.h
@@ -17,8 +17,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
-#define LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
+#ifndef LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H
+#define LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
@@ -62,4 +62,4 @@ public:
} // end namespace llvm
-#endif // LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
+#endif // LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H
diff --git a/contrib/llvm/include/llvm/Analysis/Passes.h b/contrib/llvm/include/llvm/Analysis/Passes.h
index 09b28a0b0884..081dd5000835 100644
--- a/contrib/llvm/include/llvm/Analysis/Passes.h
+++ b/contrib/llvm/include/llvm/Analysis/Passes.h
@@ -61,10 +61,10 @@ namespace llvm {
//===--------------------------------------------------------------------===//
//
- // createDivergenceAnalysisPass - This pass determines which branches in a GPU
+ // createLegacyDivergenceAnalysisPass - This pass determines which branches in a GPU
// program are divergent.
//
- FunctionPass *createDivergenceAnalysisPass();
+ FunctionPass *createLegacyDivergenceAnalysisPass();
//===--------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/include/llvm/Analysis/PhiValues.h b/contrib/llvm/include/llvm/Analysis/PhiValues.h
index 6607b329c04f..76204ac1bc6c 100644
--- a/contrib/llvm/include/llvm/Analysis/PhiValues.h
+++ b/contrib/llvm/include/llvm/Analysis/PhiValues.h
@@ -88,6 +88,22 @@ private:
/// All values reachable from each component.
DenseMap<unsigned int, ConstValueSet> ReachableMap;
+ /// A CallbackVH to notify PhiValues when a value is deleted or replaced, so
+ /// that the cached information for that value can be cleared to avoid
+ /// dangling pointers to invalid values.
+ class PhiValuesCallbackVH final : public CallbackVH {
+ PhiValues *PV;
+ void deleted() override;
+ void allUsesReplacedWith(Value *New) override;
+
+ public:
+ PhiValuesCallbackVH(Value *V, PhiValues *PV = nullptr)
+ : CallbackVH(V), PV(PV) {}
+ };
+
+ /// A set of callbacks to the values that processPhi has seen.
+ DenseSet<PhiValuesCallbackVH, DenseMapInfo<Value *>> TrackedValues;
+
/// The function that the PhiValues is for.
const Function &F;
diff --git a/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index 58b67e74ba51..3aef4be72d71 100644
--- a/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -98,14 +98,14 @@ public:
bool isFunctionEntryCold(const Function *F);
/// Returns true if \p F contains only cold code.
bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
- /// Returns true if \p F is a hot function.
+ /// Returns true if count \p C is considered hot.
bool isHotCount(uint64_t C);
/// Returns true if count \p C is considered cold.
bool isColdCount(uint64_t C);
- /// Returns true if BasicBlock \p B is considered hot.
- bool isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
- /// Returns true if BasicBlock \p B is considered cold.
- bool isColdBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
+ /// Returns true if BasicBlock \p BB is considered hot.
+ bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
+ /// Returns true if BasicBlock \p BB is considered cold.
+ bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
/// Returns true if CallSite \p CS is considered hot.
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
/// Returns true if Callsite \p CS is considered cold.
@@ -134,9 +134,8 @@ public:
static char ID;
ProfileSummaryInfoWrapperPass();
- ProfileSummaryInfo *getPSI() {
- return &*PSI;
- }
+ ProfileSummaryInfo &getPSI() { return *PSI; }
+ const ProfileSummaryInfo &getPSI() const { return *PSI; }
bool doInitialization(Module &M) override;
bool doFinalization(Module &M) override;
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
index 89918e3c205b..8f4200b07e5c 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1833,6 +1833,10 @@ private:
const SCEV *getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags);
+ // Get addrec expr already created or create a new one.
+ const SCEV *getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L, SCEV::NoWrapFlags Flags);
+
/// Return x if \p Val is f(x) where f is a 1-1 function.
const SCEV *stripInjectiveFunctions(const SCEV *Val) const;
diff --git a/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
index 508968e16e5d..1356c6e9198a 100644
--- a/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -16,7 +16,7 @@
#define LLVM_ANALYSIS_SCOPEDNOALIASAA_H
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include <memory>
@@ -41,8 +41,8 @@ public:
}
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
- ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
private:
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
diff --git a/contrib/llvm/include/llvm/Analysis/SparsePropagation.h b/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
index defcf96afb25..02a2e64268b7 100644
--- a/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -189,12 +189,12 @@ private:
/// getFeasibleSuccessors - Return a vector of booleans to indicate which
/// successors are reachable from a given terminator instruction.
- void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl<bool> &Succs,
+ void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs,
bool AggressiveUndef);
void visitInst(Instruction &I);
void visitPHINode(PHINode &I);
- void visitTerminatorInst(TerminatorInst &TI);
+ void visitTerminator(Instruction &TI);
};
//===----------------------------------------------------------------------===//
@@ -286,7 +286,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::markEdgeExecutable(
template <class LatticeKey, class LatticeVal, class KeyInfo>
void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors(
- TerminatorInst &TI, SmallVectorImpl<bool> &Succs, bool AggressiveUndef) {
+ Instruction &TI, SmallVectorImpl<bool> &Succs, bool AggressiveUndef) {
Succs.resize(TI.getNumSuccessors());
if (TI.getNumSuccessors() == 0)
return;
@@ -330,7 +330,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors(
return;
}
- if (TI.isExceptional()) {
+ if (TI.isExceptionalTerminator()) {
Succs.assign(Succs.size(), true);
return;
}
@@ -374,7 +374,7 @@ template <class LatticeKey, class LatticeVal, class KeyInfo>
bool SparseSolver<LatticeKey, LatticeVal, KeyInfo>::isEdgeFeasible(
BasicBlock *From, BasicBlock *To, bool AggressiveUndef) {
SmallVector<bool, 16> SuccFeasible;
- TerminatorInst *TI = From->getTerminator();
+ Instruction *TI = From->getTerminator();
getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
@@ -385,8 +385,8 @@ bool SparseSolver<LatticeKey, LatticeVal, KeyInfo>::isEdgeFeasible(
}
template <class LatticeKey, class LatticeVal, class KeyInfo>
-void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitTerminatorInst(
- TerminatorInst &TI) {
+void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitTerminator(
+ Instruction &TI) {
SmallVector<bool, 16> SuccFeasible;
getFeasibleSuccessors(TI, SuccFeasible, true);
@@ -465,8 +465,8 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitInst(Instruction &I) {
if (ChangedValue.second != LatticeFunc->getUntrackedVal())
UpdateState(ChangedValue.first, ChangedValue.second);
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
- visitTerminatorInst(*TI);
+ if (I.isTerminator())
+ visitTerminator(I);
}
template <class LatticeKey, class LatticeVal, class KeyInfo>
diff --git a/contrib/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/contrib/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
new file mode 100644
index 000000000000..8a151650a34c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -0,0 +1,120 @@
+//===- StackSafetyAnalysis.h - Stack memory safety analysis -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Stack Safety Analysis detects allocas and arguments with safe access.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_STACKSAFETYANALYSIS_H
+#define LLVM_ANALYSIS_STACKSAFETYANALYSIS_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// Interface to access stack safety analysis results for single function.
+class StackSafetyInfo {
+public:
+ struct FunctionInfo;
+
+private:
+ std::unique_ptr<FunctionInfo> Info;
+
+public:
+ StackSafetyInfo();
+ StackSafetyInfo(FunctionInfo &&Info);
+ StackSafetyInfo(StackSafetyInfo &&);
+ StackSafetyInfo &operator=(StackSafetyInfo &&);
+ ~StackSafetyInfo();
+
+ // TODO: Add useful for client methods.
+ void print(raw_ostream &O) const;
+};
+
+/// StackSafetyInfo wrapper for the new pass manager.
+class StackSafetyAnalysis : public AnalysisInfoMixin<StackSafetyAnalysis> {
+ friend AnalysisInfoMixin<StackSafetyAnalysis>;
+ static AnalysisKey Key;
+
+public:
+ using Result = StackSafetyInfo;
+ StackSafetyInfo run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// Printer pass for the \c StackSafetyAnalysis results.
+class StackSafetyPrinterPass : public PassInfoMixin<StackSafetyPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit StackSafetyPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// StackSafetyInfo wrapper for the legacy pass manager
+class StackSafetyInfoWrapperPass : public FunctionPass {
+ StackSafetyInfo SSI;
+
+public:
+ static char ID;
+ StackSafetyInfoWrapperPass();
+
+ const StackSafetyInfo &getResult() const { return SSI; }
+
+ void print(raw_ostream &O, const Module *M) const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnFunction(Function &F) override;
+};
+
+using StackSafetyGlobalInfo = std::map<const GlobalValue *, StackSafetyInfo>;
+
+/// This pass performs the global (interprocedural) stack safety analysis (new
+/// pass manager).
+class StackSafetyGlobalAnalysis
+ : public AnalysisInfoMixin<StackSafetyGlobalAnalysis> {
+ friend AnalysisInfoMixin<StackSafetyGlobalAnalysis>;
+ static AnalysisKey Key;
+
+public:
+ using Result = StackSafetyGlobalInfo;
+ Result run(Module &M, ModuleAnalysisManager &AM);
+};
+
+/// Printer pass for the \c StackSafetyGlobalAnalysis results.
+class StackSafetyGlobalPrinterPass
+ : public PassInfoMixin<StackSafetyGlobalPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit StackSafetyGlobalPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+/// This pass performs the global (interprocedural) stack safety analysis
+/// (legacy pass manager).
+class StackSafetyGlobalInfoWrapperPass : public ModulePass {
+ StackSafetyGlobalInfo SSI;
+
+public:
+ static char ID;
+
+ StackSafetyGlobalInfoWrapperPass();
+
+ const StackSafetyGlobalInfo &getResult() const { return SSI; }
+
+ void print(raw_ostream &O, const Module *M) const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnModule(Module &M) override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_STACKSAFETYANALYSIS_H
diff --git a/contrib/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
new file mode 100644
index 000000000000..df693d9d8e8c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
@@ -0,0 +1,86 @@
+//===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file defines the SyncDependenceAnalysis class, which computes for
+// every divergent branch the set of phi nodes that the branch will make
+// divergent.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
+#define LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include <memory>
+
+namespace llvm {
+
+class BasicBlock;
+class DominatorTree;
+class Loop;
+class PostDominatorTree;
+
+using ConstBlockSet = SmallPtrSet<const BasicBlock *, 4>;
+
+/// \brief Relates points of divergent control to join points in
+/// reducible CFGs.
+///
+/// This analysis relates points of divergent control to points of converging
+/// divergent control. The analysis requires all loops to be reducible.
+class SyncDependenceAnalysis {
+ void visitSuccessor(const BasicBlock &succBlock, const Loop *termLoop,
+ const BasicBlock *defBlock);
+
+public:
+ bool inRegion(const BasicBlock &BB) const;
+
+ ~SyncDependenceAnalysis();
+ SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT,
+ const LoopInfo &LI);
+
+ /// \brief Computes divergent join points and loop exits caused by branch
+ /// divergence in \p Term.
+ ///
+ /// The set of blocks which are reachable by disjoint paths from \p Term.
+ /// The set also contains loop exits if there two disjoint paths:
+ /// one from \p Term to the loop exit and another from \p Term to the loop
+ /// header. Those exit blocks are added to the returned set.
+ /// If L is the parent loop of \p Term and an exit of L is in the returned
+ /// set then L is a divergent loop.
+ const ConstBlockSet &join_blocks(const Instruction &Term);
+
+ /// \brief Computes divergent join points and loop exits (in the surrounding
+ /// loop) caused by the divergent loop exits of\p Loop.
+ ///
+ /// The set of blocks which are reachable by disjoint paths from the
+ /// loop exits of \p Loop.
+ /// This treats the loop as a single node in \p Loop's parent loop.
+ /// The returned set has the same properties as for join_blocks(TermInst&).
+ const ConstBlockSet &join_blocks(const Loop &Loop);
+
+private:
+ static ConstBlockSet EmptyBlockSet;
+
+ ReversePostOrderTraversal<const Function *> FuncRPOT;
+ const DominatorTree &DT;
+ const PostDominatorTree &PDT;
+ const LoopInfo &LI;
+
+ std::map<const Loop *, std::unique_ptr<ConstBlockSet>> CachedLoopExitJoins;
+ std::map<const Instruction *, std::unique_ptr<ConstBlockSet>>
+ CachedBranchJoins;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
diff --git a/contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h b/contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
index 87f4a0100b38..db80bef001e2 100644
--- a/contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
@@ -36,16 +36,17 @@ public:
using EdgeRef = typename CGT::EdgeRef;
using SccTy = std::vector<NodeRef>;
- using GetRelBBFreqTy = function_ref<Optional<Scaled64>(EdgeRef)>;
- using GetCountTy = function_ref<uint64_t(NodeRef)>;
- using AddCountTy = function_ref<void(NodeRef, uint64_t)>;
+ // Not all EdgeRef have information about the source of the edge. Hence
+ // NodeRef corresponding to the source of the EdgeRef is explicitly passed.
+ using GetProfCountTy = function_ref<Optional<Scaled64>(NodeRef, EdgeRef)>;
+ using AddCountTy = function_ref<void(NodeRef, Scaled64)>;
- static void propagate(const CallGraphType &CG, GetRelBBFreqTy GetRelBBFreq,
- GetCountTy GetCount, AddCountTy AddCount);
+ static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount,
+ AddCountTy AddCount);
private:
- static void propagateFromSCC(const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq,
- GetCountTy GetCount, AddCountTy AddCount);
+ static void propagateFromSCC(const SccTy &SCC, GetProfCountTy GetProfCount,
+ AddCountTy AddCount);
};
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index f94debba9c52..518a85ee1a01 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -565,6 +565,30 @@ TLI_DEFINE_STRING_INTERNAL("cosl")
/// char *ctermid(char *s);
TLI_DEFINE_ENUM_INTERNAL(ctermid)
TLI_DEFINE_STRING_INTERNAL("ctermid")
+/// int execl(const char *path, const char *arg, ...);
+TLI_DEFINE_ENUM_INTERNAL(execl)
+TLI_DEFINE_STRING_INTERNAL("execl")
+/// int execle(const char *file, const char *arg, ..., char * const envp[]);
+TLI_DEFINE_ENUM_INTERNAL(execle)
+TLI_DEFINE_STRING_INTERNAL("execle")
+/// int execlp(const char *file, const char *arg, ...);
+TLI_DEFINE_ENUM_INTERNAL(execlp)
+TLI_DEFINE_STRING_INTERNAL("execlp")
+/// int execv(const char *path, char *const argv[]);
+TLI_DEFINE_ENUM_INTERNAL(execv)
+TLI_DEFINE_STRING_INTERNAL("execv")
+/// int execvP(const char *file, const char *search_path, char *const argv[]);
+TLI_DEFINE_ENUM_INTERNAL(execvP)
+TLI_DEFINE_STRING_INTERNAL("execvP")
+/// int execve(const char *filename, char *const argv[], char *const envp[]);
+TLI_DEFINE_ENUM_INTERNAL(execve)
+TLI_DEFINE_STRING_INTERNAL("execve")
+/// int execvp(const char *file, char *const argv[]);
+TLI_DEFINE_ENUM_INTERNAL(execvp)
+TLI_DEFINE_STRING_INTERNAL("execvp")
+/// int execvpe(const char *file, char *const argv[], char *const envp[]);
+TLI_DEFINE_ENUM_INTERNAL(execvpe)
+TLI_DEFINE_STRING_INTERNAL("execvpe")
/// double exp(double x);
TLI_DEFINE_ENUM_INTERNAL(exp)
TLI_DEFINE_STRING_INTERNAL("exp")
@@ -709,6 +733,9 @@ TLI_DEFINE_STRING_INTERNAL("fopen")
/// FILE *fopen64(const char *filename, const char *opentype)
TLI_DEFINE_ENUM_INTERNAL(fopen64)
TLI_DEFINE_STRING_INTERNAL("fopen64")
+/// int fork();
+TLI_DEFINE_ENUM_INTERNAL(fork)
+TLI_DEFINE_STRING_INTERNAL("fork")
/// int fprintf(FILE *stream, const char *format, ...);
TLI_DEFINE_ENUM_INTERNAL(fprintf)
TLI_DEFINE_STRING_INTERNAL("fprintf")
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 59657cca40f5..223175d17c2d 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -289,7 +289,7 @@ public:
/// Returns whether V is a source of divergence.
///
/// This function provides the target-dependent information for
- /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
+ /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
/// builds the dependency graph, and then runs the reachability algorithm
/// starting with the sources of divergence.
bool isSourceOfDivergence(const Value *V) const;
@@ -581,12 +581,21 @@ public:
struct MemCmpExpansionOptions {
// The list of available load sizes (in bytes), sorted in decreasing order.
SmallVector<unsigned, 8> LoadSizes;
+ // Set to true to allow overlapping loads. For example, 7-byte compares can
+ // be done with two 4-byte compares instead of 4+2+1-byte compares. This
+ // requires all loads in LoadSizes to be doable in an unaligned way.
+ bool AllowOverlappingLoads = false;
};
const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
/// Enable matching of interleaved access groups.
bool enableInterleavedAccessVectorization() const;
+ /// Enable matching of interleaved access groups that contain predicated
+ /// accesses or gaps and therefore vectorized using masked
+ /// vector loads/stores.
+ bool enableMaskedInterleavedAccessVectorization() const;
+
/// Indicate that it is potentially unsafe to automatically vectorize
/// floating-point operations because the semantics of vector and scalar
/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
@@ -739,6 +748,10 @@ public:
/// and the number of execution units in the CPU.
unsigned getMaxInterleaveFactor(unsigned VF) const;
+ /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
+ static OperandValueKind getOperandInfo(Value *V,
+ OperandValueProperties &OpProps);
+
/// This is an approximation of reciprocal throughput of a math/logic op.
/// A higher cost indicates less expected throughput.
/// From Agner Fog's guides, reciprocal throughput is "the average number of
@@ -762,7 +775,9 @@ public:
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The index and subtype parameters are used by the subvector insertion and
- /// extraction shuffle kinds.
+ /// extraction shuffle kinds to show the insert/extract point and the type of
+ /// the subvector being inserted/extracted.
+ /// NOTE: For subvector extractions Tp represents the source type.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
Type *SubTp = nullptr) const;
@@ -817,9 +832,13 @@ public:
/// load allows gaps)
/// \p Alignment is the alignment of the memory operation
/// \p AddressSpace is address space of the pointer.
+ /// \p UseMaskForCond indicates if the memory access is predicated.
+ /// \p UseMaskForGaps indicates if gaps should be masked.
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace) const;
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false) const;
/// Calculate the cost of performing a vector reduction.
///
@@ -915,6 +934,14 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
+ /// \returns True if the caller and callee agree on how \p Args will be passed
+ /// to the callee.
+ /// \param[out] Args The list of compatible arguments. The implementation may
+ /// filter out any incompatible args from this list.
+ bool areFunctionArgsABICompatible(const Function *Caller,
+ const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const;
+
/// The type of load/store indexing.
enum MemIndexedMode {
MIM_Unindexed, ///< No indexing.
@@ -1068,6 +1095,7 @@ public:
virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
bool IsZeroCmp) const = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
+ virtual bool enableMaskedInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
@@ -1128,7 +1156,9 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) = 0;
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false) = 0;
virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) = 0;
virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
@@ -1157,6 +1187,9 @@ public:
unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
+ virtual bool
+ areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const = 0;
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
@@ -1342,6 +1375,9 @@ public:
bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();
}
+ bool enableMaskedInterleavedAccessVectorization() override {
+ return Impl.enableMaskedInterleavedAccessVectorization();
+ }
bool isFPVectorizationPotentiallyUnsafe() override {
return Impl.isFPVectorizationPotentiallyUnsafe();
}
@@ -1467,9 +1503,11 @@ public:
}
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace) override {
+ unsigned AddressSpace, bool UseMaskForCond,
+ bool UseMaskForGaps) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
}
int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) override {
@@ -1530,6 +1568,11 @@ public:
const Function *Callee) const override {
return Impl.areInlineCompatible(Caller, Callee);
}
+ bool areFunctionArgsABICompatible(
+ const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const override {
+ return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
+ }
bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
}
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d80ae1d6845d..c9a234deeb7d 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -158,6 +158,9 @@ public:
case Intrinsic::dbg_label:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ case Intrinsic::is_constant:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::objectsize:
@@ -311,6 +314,8 @@ public:
bool enableInterleavedAccessVectorization() { return false; }
+ bool enableMaskedInterleavedAccessVectorization() { return false; }
+
bool isFPVectorizationPotentiallyUnsafe() { return false; }
bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
@@ -448,8 +453,9 @@ public:
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned Alignment, unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false) {
return 1;
}
@@ -520,6 +526,14 @@ public:
Callee->getFnAttribute("target-features"));
}
+ bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const {
+ return (Caller->getFnAttribute("target-cpu") ==
+ Callee->getFnAttribute("target-cpu")) &&
+ (Caller->getFnAttribute("target-features") ==
+ Callee->getFnAttribute("target-features"));
+ }
+
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
const DataLayout &DL) const {
return false;
diff --git a/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
index 7fcfdb3a817c..d2e6df22425e 100644
--- a/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
@@ -17,7 +17,7 @@
#define LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include <memory>
@@ -43,10 +43,10 @@ public:
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
- FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
FunctionModRefBehavior getModRefBehavior(const Function *F);
- ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
- ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
private:
bool Aliases(const MDNode *A, const MDNode *B) const;
diff --git a/contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h
index 6764563f6830..3bf9c5d20741 100644
--- a/contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h
@@ -20,6 +20,8 @@
namespace llvm {
+class DominatorTree;
+
/// The type of CFI jumptable needed for a function.
enum CfiFunctionLinkage {
CFL_Definition = 0,
@@ -39,7 +41,8 @@ struct DevirtCallSite {
/// call sites based on the call and return them in DevirtCalls.
void findDevirtualizableCallsForTypeTest(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI);
+ SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI,
+ DominatorTree &DT);
/// Given a call to the intrinsic \@llvm.type.checked.load, find all
/// devirtualizable call sites based on the call and return them in DevirtCalls.
@@ -47,7 +50,7 @@ void findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
SmallVectorImpl<Instruction *> &LoadedPtrs,
SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
- const CallInst *CI);
+ const CallInst *CI, DominatorTree &DT);
}
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm/include/llvm/Analysis/ValueTracking.h
index c1a91a8e5981..f46fdfcb608e 100644
--- a/contrib/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/contrib/llvm/include/llvm/Analysis/ValueTracking.h
@@ -55,14 +55,16 @@ class Value;
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr);
+ OptimizationRemarkEmitter *ORE = nullptr,
+ bool UseInstrInfo = true);
/// Returns the known bits rather than passing by reference.
KnownBits computeKnownBits(const Value *V, const DataLayout &DL,
unsigned Depth = 0, AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr);
+ OptimizationRemarkEmitter *ORE = nullptr,
+ bool UseInstrInfo = true);
/// Compute known bits from the range metadata.
/// \p KnownZero the set of bits that are known to be zero
@@ -75,7 +77,8 @@ class Value;
const DataLayout &DL,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
/// Return true if the given value is known to have exactly one bit set when
/// defined. For vectors return true if every element is known to be a power
@@ -86,7 +89,8 @@ class Value;
bool OrZero = false, unsigned Depth = 0,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
@@ -99,7 +103,8 @@ class Value;
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
/// Return true if the two given values are negation.
/// Currently can recoginze Value pair:
@@ -112,28 +117,32 @@ class Value;
unsigned Depth = 0,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
/// Returns true if the given value is known be positive (i.e. non-negative
/// and non-zero).
bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
/// Returns true if the given value is known be negative (i.e. non-positive
/// and non-zero).
bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
/// Return true if the given values are known to be non-equal when defined.
/// Supports scalar integer types only.
bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
- AssumptionCache *AC = nullptr,
- const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
/// Return true if 'V & Mask' is known to be zero. We use this predicate to
/// simplify operations downstream. Mask is known to be zero for bits that V
@@ -148,7 +157,8 @@ class Value;
const DataLayout &DL,
unsigned Depth = 0, AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
/// Return the number of times the sign bit of the register is replicated into
/// the other bits. We know that at least 1 bit is always equal to the sign
@@ -160,7 +170,8 @@ class Value;
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL,
unsigned Depth = 0, AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
/// This function computes the integer multiple of Base that equals V. If
/// successful, it returns true and returns the multiple in Multiple. If
@@ -194,7 +205,8 @@ class Value;
/// Return true if the floating-point scalar value is not a NaN or if the
/// floating-point vector value has no NaN elements. Return false if a value
/// could ever be NaN.
- bool isKnownNeverNaN(const Value *V);
+ bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
+ unsigned Depth = 0);
/// Return true if we can prove that the specified FP value's sign bit is 0.
///
@@ -209,7 +221,8 @@ class Value;
/// return the i8 value that it is represented with. This is true for all i8
/// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double
/// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
- /// i16 0x1234), return null.
+ /// i16 0x1234), return null. If the value is entirely undef and padding,
+ /// return undef.
Value *isBytewiseValue(Value *V);
/// Given an aggregrate and an sequence of indices, see if the scalar value
@@ -284,10 +297,10 @@ class Value;
/// This function returns call pointer argument that is considered the same by
/// aliasing rules. You CAN'T use it to replace one value with another.
- const Value *getArgumentAliasingToReturnedPointer(ImmutableCallSite CS);
- inline Value *getArgumentAliasingToReturnedPointer(CallSite CS) {
- return const_cast<Value *>(
- getArgumentAliasingToReturnedPointer(ImmutableCallSite(CS)));
+ const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call);
+ inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) {
+ return const_cast<Value *>(getArgumentAliasingToReturnedPointer(
+ const_cast<const CallBase *>(Call)));
}
// {launder,strip}.invariant.group returns pointer that aliases its argument,
@@ -296,7 +309,7 @@ class Value;
// considered as capture. The arguments are not marked as returned neither,
// because it would make it useless.
bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
- ImmutableCallSite CS);
+ const CallBase *Call);
/// This method strips off any GEP address adjustments and pointer casts from
/// the specified value, returning the original object being addressed. Note
@@ -405,18 +418,21 @@ class Value;
const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
- const DominatorTree *DT);
+ const DominatorTree *DT,
+ bool UseInstrInfo = true);
OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
- const DominatorTree *DT);
+ const DominatorTree *DT,
+ bool UseInstrInfo = true);
OverflowResult computeOverflowForUnsignedAdd(const Value *LHS,
const Value *RHS,
const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
- const DominatorTree *DT);
+ const DominatorTree *DT,
+ bool UseInstrInfo = true);
OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS,
const DataLayout &DL,
AssumptionCache *AC = nullptr,
@@ -594,6 +610,12 @@ class Value;
Optional<bool> isImpliedCondition(const Value *LHS, const Value *RHS,
const DataLayout &DL, bool LHSIsTrue = true,
unsigned Depth = 0);
+
+ /// Return the boolean condition value in the context of the given instruction
+ /// if it is known based on dominating conditions.
+ Optional<bool> isImpliedByDomCondition(const Value *Cond,
+ const Instruction *ContextI,
+ const DataLayout &DL);
} // end namespace llvm
#endif // LLVM_ANALYSIS_VALUETRACKING_H
diff --git a/contrib/llvm/include/llvm/Analysis/VectorUtils.h b/contrib/llvm/include/llvm/Analysis/VectorUtils.h
index 9fde36d61091..be4d4f17b9ad 100644
--- a/contrib/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/VectorUtils.h
@@ -15,6 +15,7 @@
#define LLVM_ANALYSIS_VECTORUTILS_H
#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IRBuilder.h"
@@ -23,6 +24,7 @@ namespace llvm {
template <typename T> class ArrayRef;
class DemandedBits;
class GetElementPtrInst;
+template <typename InstTy> class InterleaveGroup;
class Loop;
class ScalarEvolution;
class TargetTransformInfo;
@@ -115,8 +117,24 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
DemandedBits &DB,
const TargetTransformInfo *TTI=nullptr);
+/// Compute the union of two access-group lists.
+///
+/// If the list contains just one access group, it is returned directly. If the
+/// list is empty, returns nullptr.
+MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
+
+/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
+/// are both in. If either instruction does not access memory at all, it is
+/// considered to be in every list.
+///
+/// If the list contains just one access group, it is returned directly. If the
+/// list is empty, returns nullptr.
+MDNode *intersectAccessGroups(const Instruction *Inst1,
+ const Instruction *Inst2);
+
/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
-/// MD_nontemporal]. For K in Kinds, we get the MDNode for K from each of the
+/// MD_nontemporal, MD_access_group].
+/// For K in Kinds, we get the MDNode for K from each of the
/// elements of VL, compute their "intersection" (i.e., the most generic
/// metadata value that covers all of the individual values), and set I's
/// metadata for M equal to the intersection value.
@@ -124,6 +142,35 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
/// This function always sets a (possibly null) value for each K in Kinds.
Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
+/// Create a mask that filters the members of an interleave group where there
+/// are gaps.
+///
+/// For example, the mask for \p Group with interleave-factor 3
+/// and \p VF 4, that has only its first member present is:
+///
+/// <1,0,0,1,0,0,1,0,0,1,0,0>
+///
+/// Note: The result is a mask of 0's and 1's, as opposed to the other
+/// create[*]Mask() utilities which create a shuffle mask (mask that
+/// consists of indices).
+Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
+ const InterleaveGroup<Instruction> &Group);
+
+/// Create a mask with replicated elements.
+///
+/// This function creates a shuffle mask for replicating each of the \p VF
+/// elements in a vector \p ReplicationFactor times. It can be used to
+/// transform a mask of \p VF elements into a mask of
+/// \p VF * \p ReplicationFactor elements used by a predicated
+/// interleaved-group of loads/stores whose Interleaved-factor ==
+/// \p ReplicationFactor.
+///
+/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+///
+/// <0,0,0,1,1,1,2,2,2,3,3,3>
+Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
+ unsigned VF);
+
/// Create an interleave shuffle mask.
///
/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
@@ -176,6 +223,381 @@ Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
/// elements, it will be padded with undefs.
Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
+/// The group of interleaved loads/stores sharing the same stride and
+/// close to each other.
+///
+/// Each member in this group has an index starting from 0, and the largest
+/// index should be less than interleaved factor, which is equal to the absolute
+/// value of the access's stride.
+///
+/// E.g. An interleaved load group of factor 4:
+/// for (unsigned i = 0; i < 1024; i+=4) {
+/// a = A[i]; // Member of index 0
+/// b = A[i+1]; // Member of index 1
+/// d = A[i+3]; // Member of index 3
+/// ...
+/// }
+///
+/// An interleaved store group of factor 4:
+/// for (unsigned i = 0; i < 1024; i+=4) {
+/// ...
+/// A[i] = a; // Member of index 0
+/// A[i+1] = b; // Member of index 1
+/// A[i+2] = c; // Member of index 2
+/// A[i+3] = d; // Member of index 3
+/// }
+///
+/// Note: the interleaved load group could have gaps (missing members), but
+/// the interleaved store group doesn't allow gaps.
+template <typename InstTy> class InterleaveGroup {
+public:
+ InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align)
+ : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
+
+ InterleaveGroup(InstTy *Instr, int Stride, unsigned Align)
+ : Align(Align), InsertPos(Instr) {
+ assert(Align && "The alignment should be non-zero");
+
+ Factor = std::abs(Stride);
+ assert(Factor > 1 && "Invalid interleave factor");
+
+ Reverse = Stride < 0;
+ Members[0] = Instr;
+ }
+
+ bool isReverse() const { return Reverse; }
+ unsigned getFactor() const { return Factor; }
+ unsigned getAlignment() const { return Align; }
+ unsigned getNumMembers() const { return Members.size(); }
+
+ /// Try to insert a new member \p Instr with index \p Index and
+ /// alignment \p NewAlign. The index is related to the leader and it could be
+ /// negative if it is the new leader.
+ ///
+ /// \returns false if the instruction doesn't belong to the group.
+ bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) {
+ assert(NewAlign && "The new member's alignment should be non-zero");
+
+ int Key = Index + SmallestKey;
+
+ // Skip if there is already a member with the same index.
+ if (Members.find(Key) != Members.end())
+ return false;
+
+ if (Key > LargestKey) {
+ // The largest index is always less than the interleave factor.
+ if (Index >= static_cast<int>(Factor))
+ return false;
+
+ LargestKey = Key;
+ } else if (Key < SmallestKey) {
+ // The largest index is always less than the interleave factor.
+ if (LargestKey - Key >= static_cast<int>(Factor))
+ return false;
+
+ SmallestKey = Key;
+ }
+
+ // It's always safe to select the minimum alignment.
+ Align = std::min(Align, NewAlign);
+ Members[Key] = Instr;
+ return true;
+ }
+
+ /// Get the member with the given index \p Index
+ ///
+ /// \returns nullptr if contains no such member.
+ InstTy *getMember(unsigned Index) const {
+ int Key = SmallestKey + Index;
+ auto Member = Members.find(Key);
+ if (Member == Members.end())
+ return nullptr;
+
+ return Member->second;
+ }
+
+ /// Get the index for the given member. Unlike the key in the member
+ /// map, the index starts from 0.
+ unsigned getIndex(const InstTy *Instr) const {
+ for (auto I : Members) {
+ if (I.second == Instr)
+ return I.first - SmallestKey;
+ }
+
+ llvm_unreachable("InterleaveGroup contains no such member");
+ }
+
+ InstTy *getInsertPos() const { return InsertPos; }
+ void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
+
+ /// Add metadata (e.g. alias info) from the instructions in this group to \p
+ /// NewInst.
+ ///
+ /// FIXME: this function currently does not add noalias metadata a'la
+ /// addNewMedata. To do that we need to compute the intersection of the
+ /// noalias info from all members.
+ void addMetadata(InstTy *NewInst) const;
+
+ /// Returns true if this Group requires a scalar iteration to handle gaps.
+ bool requiresScalarEpilogue() const {
+ // If the last member of the Group exists, then a scalar epilog is not
+ // needed for this group.
+ if (getMember(getFactor() - 1))
+ return false;
+
+ // We have a group with gaps. It therefore cannot be a group of stores,
+ // and it can't be a reversed access, because such groups get invalidated.
+ assert(!getMember(0)->mayWriteToMemory() &&
+ "Group should have been invalidated");
+ assert(!isReverse() && "Group should have been invalidated");
+
+ // This is a group of loads, with gaps, and without a last-member
+ return true;
+ }
+
+private:
+ unsigned Factor; // Interleave Factor.
+ bool Reverse;
+ unsigned Align;
+ DenseMap<int, InstTy *> Members;
+ int SmallestKey = 0;
+ int LargestKey = 0;
+
+ // To avoid breaking dependences, vectorized instructions of an interleave
+ // group should be inserted at either the first load or the last store in
+ // program order.
+ //
+ // E.g. %even = load i32 // Insert Position
+ // %add = add i32 %even // Use of %even
+ // %odd = load i32
+ //
+ // store i32 %even
+ // %odd = add i32 // Def of %odd
+ // store i32 %odd // Insert Position
+ InstTy *InsertPos;
+};
+
+/// Drive the analysis of interleaved memory accesses in the loop.
+///
+/// Use this class to analyze interleaved accesses only when we can vectorize
+/// a loop. Otherwise it's meaningless to do analysis as the vectorization
+/// on interleaved accesses is unsafe.
+///
+/// The analysis collects interleave groups and records the relationships
+/// between the member and the group in a map.
+class InterleavedAccessInfo {
+public:
+ InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
+ DominatorTree *DT, LoopInfo *LI,
+ const LoopAccessInfo *LAI)
+ : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
+
+ ~InterleavedAccessInfo() { reset(); }
+
+ /// Analyze the interleaved accesses and collect them in interleave
+ /// groups. Substitute symbolic strides using \p Strides.
+ /// Consider also predicated loads/stores in the analysis if
+ /// \p EnableMaskedInterleavedGroup is true.
+ void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
+
+ /// Invalidate groups, e.g., in case all blocks in loop will be predicated
+ /// contrary to original assumption. Although we currently prevent group
+ /// formation for predicated accesses, we may be able to relax this limitation
+ /// in the future once we handle more complicated blocks.
+ void reset() {
+ SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
+ // Avoid releasing a pointer twice.
+ for (auto &I : InterleaveGroupMap)
+ DelSet.insert(I.second);
+ for (auto *Ptr : DelSet)
+ delete Ptr;
+ InterleaveGroupMap.clear();
+ RequiresScalarEpilogue = false;
+ }
+
+
+ /// Check if \p Instr belongs to any interleave group.
+ bool isInterleaved(Instruction *Instr) const {
+ return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
+ }
+
+ /// Get the interleave group that \p Instr belongs to.
+ ///
+ /// \returns nullptr if doesn't have such group.
+ InterleaveGroup<Instruction> *
+ getInterleaveGroup(const Instruction *Instr) const {
+ if (InterleaveGroupMap.count(Instr))
+ return InterleaveGroupMap.find(Instr)->second;
+ return nullptr;
+ }
+
+ iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>>
+ getInterleaveGroups() {
+ return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
+ }
+
+ /// Returns true if an interleaved group that may access memory
+ /// out-of-bounds requires a scalar epilogue iteration for correctness.
+ bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
+
+ /// Invalidate groups that require a scalar epilogue (due to gaps). This can
+ /// happen when optimizing for size forbids a scalar epilogue, and the gap
+ /// cannot be filtered by masking the load/store.
+ void invalidateGroupsRequiringScalarEpilogue();
+
+private:
+ /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
+ /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
+ /// The interleaved access analysis can also add new predicates (for example
+ /// by versioning strides of pointers).
+ PredicatedScalarEvolution &PSE;
+
+ Loop *TheLoop;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ const LoopAccessInfo *LAI;
+
+ /// True if the loop may contain non-reversed interleaved groups with
+ /// out-of-bounds accesses. We ensure we don't speculatively access memory
+ /// out-of-bounds by executing at least one scalar epilogue iteration.
+ bool RequiresScalarEpilogue = false;
+
+ /// Holds the relationships between the members and the interleave group.
+ DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap;
+
+ SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
+
+ /// Holds dependences among the memory accesses in the loop. It maps a source
+ /// access to a set of dependent sink accesses.
+ DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
+
+ /// The descriptor for a strided memory access.
+ struct StrideDescriptor {
+ StrideDescriptor() = default;
+ StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
+ unsigned Align)
+ : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
+
+ // The access's stride. It is negative for a reverse access.
+ int64_t Stride = 0;
+
+ // The scalar expression of this access.
+ const SCEV *Scev = nullptr;
+
+ // The size of the memory object.
+ uint64_t Size = 0;
+
+ // The alignment of this access.
+ unsigned Align = 0;
+ };
+
+ /// A type for holding instructions and their stride descriptors.
+ using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
+
+ /// Create a new interleave group with the given instruction \p Instr,
+ /// stride \p Stride and alignment \p Align.
+ ///
+ /// \returns the newly created interleave group.
+ InterleaveGroup<Instruction> *
+ createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
+ assert(!InterleaveGroupMap.count(Instr) &&
+ "Already in an interleaved access group");
+ InterleaveGroupMap[Instr] =
+ new InterleaveGroup<Instruction>(Instr, Stride, Align);
+ InterleaveGroups.insert(InterleaveGroupMap[Instr]);
+ return InterleaveGroupMap[Instr];
+ }
+
+ /// Release the group and remove all the relationships.
+ void releaseGroup(InterleaveGroup<Instruction> *Group) {
+ for (unsigned i = 0; i < Group->getFactor(); i++)
+ if (Instruction *Member = Group->getMember(i))
+ InterleaveGroupMap.erase(Member);
+
+ InterleaveGroups.erase(Group);
+ delete Group;
+ }
+
+ /// Collect all the accesses with a constant stride in program order.
+ void collectConstStrideAccesses(
+ MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
+ const ValueToValueMap &Strides);
+
+ /// Returns true if \p Stride is allowed in an interleaved group.
+ static bool isStrided(int Stride);
+
+ /// Returns true if \p BB is a predicated block.
+ bool isPredicated(BasicBlock *BB) const {
+ return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
+ }
+
+ /// Returns true if LoopAccessInfo can be used for dependence queries.
+ bool areDependencesValid() const {
+ return LAI && LAI->getDepChecker().getDependences();
+ }
+
+ /// Returns true if memory accesses \p A and \p B can be reordered, if
+ /// necessary, when constructing interleaved groups.
+ ///
+ /// \p A must precede \p B in program order. We return false if reordering is
+ /// not necessary or is prevented because \p A and \p B may be dependent.
+ bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
+ StrideEntry *B) const {
+ // Code motion for interleaved accesses can potentially hoist strided loads
+ // and sink strided stores. The code below checks the legality of the
+ // following two conditions:
+ //
+ // 1. Potentially moving a strided load (B) before any store (A) that
+ // precedes B, or
+ //
+ // 2. Potentially moving a strided store (A) after any load or store (B)
+ // that A precedes.
+ //
+ // It's legal to reorder A and B if we know there isn't a dependence from A
+ // to B. Note that this determination is conservative since some
+ // dependences could potentially be reordered safely.
+
+ // A is potentially the source of a dependence.
+ auto *Src = A->first;
+ auto SrcDes = A->second;
+
+ // B is potentially the sink of a dependence.
+ auto *Sink = B->first;
+ auto SinkDes = B->second;
+
+ // Code motion for interleaved accesses can't violate WAR dependences.
+ // Thus, reordering is legal if the source isn't a write.
+ if (!Src->mayWriteToMemory())
+ return true;
+
+ // At least one of the accesses must be strided.
+ if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
+ return true;
+
+ // If dependence information is not available from LoopAccessInfo,
+ // conservatively assume the instructions can't be reordered.
+ if (!areDependencesValid())
+ return false;
+
+ // If we know there is a dependence from source to sink, assume the
+ // instructions can't be reordered. Otherwise, reordering is legal.
+ return Dependences.find(Src) == Dependences.end() ||
+ !Dependences.lookup(Src).count(Sink);
+ }
+
+ /// Collect the dependences from LoopAccessInfo.
+ ///
+ /// We process the dependences once during the interleaved access analysis to
+ /// enable constant-time dependence queries.
+ void collectDependences() {
+ if (!areDependencesValid())
+ return;
+ auto *Deps = LAI->getDepChecker().getDependences();
+ for (auto Dep : *Deps)
+ Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
+ }
+};
+
} // llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h b/contrib/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
new file mode 100644
index 000000000000..de44f41720ed
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
@@ -0,0 +1,70 @@
+//===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This is a verifier for AMDGPU HSA metadata, which can verify both
+/// well-typed metadata and untyped metadata. When verifying in the non-strict
+/// mode, untyped metadata is coerced into the correct type if possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
+#define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
+
+#include "llvm/BinaryFormat/MsgPackTypes.h"
+
+namespace llvm {
+namespace AMDGPU {
+namespace HSAMD {
+namespace V3 {
+
+/// Verifier for AMDGPU HSA metadata.
+///
+/// Operates in two modes:
+///
+/// In strict mode, metadata must already be well-typed.
+///
+/// In non-strict mode, metadata is coerced into expected types when possible.
+class MetadataVerifier {
+ bool Strict;
+
+ bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
+ function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
+ bool verifyInteger(msgpack::Node &Node);
+ bool verifyArray(msgpack::Node &Node,
+ function_ref<bool(msgpack::Node &)> verifyNode,
+ Optional<size_t> Size = None);
+ bool verifyEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
+ function_ref<bool(msgpack::Node &)> verifyNode);
+ bool
+ verifyScalarEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
+ msgpack::ScalarNode::ScalarKind SKind,
+ function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
+ bool verifyIntegerEntry(msgpack::MapNode &MapNode, StringRef Key,
+ bool Required);
+ bool verifyKernelArgs(msgpack::Node &Node);
+ bool verifyKernel(msgpack::Node &Node);
+
+public:
+ /// Construct a MetadataVerifier, specifying whether it will operate in \p
+ /// Strict mode.
+ MetadataVerifier(bool Strict) : Strict(Strict) {}
+
+ /// Verify given HSA metadata.
+ ///
+ /// \returns True when successful, false when metadata is invalid.
+ bool verify(msgpack::Node &HSAMetadataRoot);
+};
+
+} // end namespace V3
+} // end namespace HSAMD
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def
index 944c5dd1c157..6ad3cb57f62f 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -18,9 +18,11 @@
defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \
defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \
defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \
- defined HANDLE_DW_RLE || defined HANDLE_DW_CFA || \
+ defined HANDLE_DW_RLE || \
+ (defined HANDLE_DW_CFA && defined HANDLE_DW_CFA_PRED) || \
defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT || \
- defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX)
+ defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX || \
+ defined HANDLE_DW_END)
#error "Missing macro definition of HANDLE_DW*"
#endif
@@ -41,7 +43,7 @@
#endif
#ifndef HANDLE_DW_LANG
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)
#endif
#ifndef HANDLE_DW_ATE
@@ -84,6 +86,10 @@
#define HANDLE_DW_CFA(ID, NAME)
#endif
+#ifndef HANDLE_DW_CFA_PRED
+#define HANDLE_DW_CFA_PRED(ID, NAME, PRED)
+#endif
+
#ifndef HANDLE_DW_APPLE_PROPERTY
#define HANDLE_DW_APPLE_PROPERTY(ID, NAME)
#endif
@@ -100,6 +106,10 @@
#define HANDLE_DW_IDX(ID, NAME)
#endif
+#ifndef HANDLE_DW_END
+#define HANDLE_DW_END(ID, NAME)
+#endif
+
HANDLE_DW_TAG(0x0000, null, 2, DWARF)
HANDLE_DW_TAG(0x0001, array_type, 2, DWARF)
HANDLE_DW_TAG(0x0002, class_type, 2, DWARF)
@@ -622,50 +632,50 @@ HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU)
HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU)
// DWARF languages.
-HANDLE_DW_LANG(0x0001, C89, 2, DWARF)
-HANDLE_DW_LANG(0x0002, C, 2, DWARF)
-HANDLE_DW_LANG(0x0003, Ada83, 2, DWARF)
-HANDLE_DW_LANG(0x0004, C_plus_plus, 2, DWARF)
-HANDLE_DW_LANG(0x0005, Cobol74, 2, DWARF)
-HANDLE_DW_LANG(0x0006, Cobol85, 2, DWARF)
-HANDLE_DW_LANG(0x0007, Fortran77, 2, DWARF)
-HANDLE_DW_LANG(0x0008, Fortran90, 2, DWARF)
-HANDLE_DW_LANG(0x0009, Pascal83, 2, DWARF)
-HANDLE_DW_LANG(0x000a, Modula2, 2, DWARF)
+HANDLE_DW_LANG(0x0001, C89, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0002, C, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0003, Ada83, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0004, C_plus_plus, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0005, Cobol74, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0006, Cobol85, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0007, Fortran77, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0008, Fortran90, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0009, Pascal83, 1, 2, DWARF)
+HANDLE_DW_LANG(0x000a, Modula2, 1, 2, DWARF)
// New in DWARF v3:
-HANDLE_DW_LANG(0x000b, Java, 3, DWARF)
-HANDLE_DW_LANG(0x000c, C99, 3, DWARF)
-HANDLE_DW_LANG(0x000d, Ada95, 3, DWARF)
-HANDLE_DW_LANG(0x000e, Fortran95, 3, DWARF)
-HANDLE_DW_LANG(0x000f, PLI, 3, DWARF)
-HANDLE_DW_LANG(0x0010, ObjC, 3, DWARF)
-HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 3, DWARF)
-HANDLE_DW_LANG(0x0012, UPC, 3, DWARF)
-HANDLE_DW_LANG(0x0013, D, 3, DWARF)
+HANDLE_DW_LANG(0x000b, Java, 0, 3, DWARF)
+HANDLE_DW_LANG(0x000c, C99, 0, 3, DWARF)
+HANDLE_DW_LANG(0x000d, Ada95, 1, 3, DWARF)
+HANDLE_DW_LANG(0x000e, Fortran95, 1, 3, DWARF)
+HANDLE_DW_LANG(0x000f, PLI, 1, 3, DWARF)
+HANDLE_DW_LANG(0x0010, ObjC, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0012, UPC, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0013, D, 0, 3, DWARF)
// New in DWARF v4:
-HANDLE_DW_LANG(0x0014, Python, 4, DWARF)
+HANDLE_DW_LANG(0x0014, Python, 0, 4, DWARF)
// New in DWARF v5:
-HANDLE_DW_LANG(0x0015, OpenCL, 5, DWARF)
-HANDLE_DW_LANG(0x0016, Go, 5, DWARF)
-HANDLE_DW_LANG(0x0017, Modula3, 5, DWARF)
-HANDLE_DW_LANG(0x0018, Haskell, 5, DWARF)
-HANDLE_DW_LANG(0x0019, C_plus_plus_03, 5, DWARF)
-HANDLE_DW_LANG(0x001a, C_plus_plus_11, 5, DWARF)
-HANDLE_DW_LANG(0x001b, OCaml, 5, DWARF)
-HANDLE_DW_LANG(0x001c, Rust, 5, DWARF)
-HANDLE_DW_LANG(0x001d, C11, 5, DWARF)
-HANDLE_DW_LANG(0x001e, Swift, 5, DWARF)
-HANDLE_DW_LANG(0x001f, Julia, 5, DWARF)
-HANDLE_DW_LANG(0x0020, Dylan, 5, DWARF)
-HANDLE_DW_LANG(0x0021, C_plus_plus_14, 5, DWARF)
-HANDLE_DW_LANG(0x0022, Fortran03, 5, DWARF)
-HANDLE_DW_LANG(0x0023, Fortran08, 5, DWARF)
-HANDLE_DW_LANG(0x0024, RenderScript, 5, DWARF)
-HANDLE_DW_LANG(0x0025, BLISS, 5, DWARF)
+HANDLE_DW_LANG(0x0015, OpenCL, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0016, Go, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0017, Modula3, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0018, Haskell, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0019, C_plus_plus_03, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001a, C_plus_plus_11, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001b, OCaml, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001c, Rust, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001d, C11, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001e, Swift, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001f, Julia, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0020, Dylan, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0021, C_plus_plus_14, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0022, Fortran03, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0023, Fortran08, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0024, RenderScript, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0025, BLISS, 0, 5, DWARF)
// Vendor extensions:
-HANDLE_DW_LANG(0x8001, Mips_Assembler, 0, MIPS)
-HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, GOOGLE)
-HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, BORLAND)
+HANDLE_DW_LANG(0x8001, Mips_Assembler, None, 0, MIPS)
+HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, 0, GOOGLE)
+HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, 0, BORLAND)
// DWARF attribute type encodings.
HANDLE_DW_ATE(0x01, address, 2, DWARF)
@@ -690,6 +700,11 @@ HANDLE_DW_ATE(0x10, UTF, 4, DWARF)
HANDLE_DW_ATE(0x11, UCS, 5, DWARF)
HANDLE_DW_ATE(0x12, ASCII, 5, DWARF)
+// DWARF attribute endianity
+HANDLE_DW_END(0x00, default)
+HANDLE_DW_END(0x01, big)
+HANDLE_DW_END(0x02, little)
+
// DWARF virtuality codes.
HANDLE_DW_VIRTUALITY(0x00, none)
HANDLE_DW_VIRTUALITY(0x01, virtual)
@@ -821,9 +836,10 @@ HANDLE_DW_CFA(0x14, val_offset)
HANDLE_DW_CFA(0x15, val_offset_sf)
HANDLE_DW_CFA(0x16, val_expression)
// Vendor extensions:
-HANDLE_DW_CFA(0x1d, MIPS_advance_loc8)
-HANDLE_DW_CFA(0x2d, GNU_window_save)
-HANDLE_DW_CFA(0x2e, GNU_args_size)
+HANDLE_DW_CFA_PRED(0x1d, MIPS_advance_loc8, SELECT_MIPS64)
+HANDLE_DW_CFA_PRED(0x2d, GNU_window_save, SELECT_SPARC)
+HANDLE_DW_CFA_PRED(0x2d, AARCH64_negate_ra_state, SELECT_AARCH64)
+HANDLE_DW_CFA_PRED(0x2e, GNU_args_size, SELECT_X86)
// Apple Objective-C Property Attributes.
// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind!
@@ -863,6 +879,7 @@ HANDLE_DWARF_SECTION(DebugTypes, ".debug_types", "debug-types")
HANDLE_DWARF_SECTION(DebugLine, ".debug_line", "debug-line")
HANDLE_DWARF_SECTION(DebugLineStr, ".debug_line_str", "debug-line-str")
HANDLE_DWARF_SECTION(DebugLoc, ".debug_loc", "debug-loc")
+HANDLE_DWARF_SECTION(DebugLoclists, ".debug_loclists", "debug-loclists")
HANDLE_DWARF_SECTION(DebugFrame, ".debug_frame", "debug-frame")
HANDLE_DWARF_SECTION(DebugMacro, ".debug_macro", "debug-macro")
HANDLE_DWARF_SECTION(DebugNames, ".debug_names", "debug-names")
@@ -905,7 +922,9 @@ HANDLE_DW_IDX(0x05, type_hash)
#undef HANDLE_DW_MACRO
#undef HANDLE_DW_RLE
#undef HANDLE_DW_CFA
+#undef HANDLE_DW_CFA_PRED
#undef HANDLE_DW_APPLE_PROPERTY
#undef HANDLE_DW_UT
#undef HANDLE_DWARF_SECTION
#undef HANDLE_DW_IDX
+#undef HANDLE_DW_END
diff --git a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.h b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.h
index 9036f405eaea..525a04d5e6cf 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -26,6 +26,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadicDetails.h"
+#include "llvm/ADT/Triple.h"
namespace llvm {
class StringRef;
@@ -150,9 +151,8 @@ enum DecimalSignEncoding {
enum EndianityEncoding {
// Endianity attribute values
- DW_END_default = 0x00,
- DW_END_big = 0x01,
- DW_END_little = 0x02,
+#define HANDLE_DW_END(ID, NAME) DW_END_##NAME = ID,
+#include "llvm/BinaryFormat/Dwarf.def"
DW_END_lo_user = 0x40,
DW_END_hi_user = 0xff
};
@@ -184,7 +184,8 @@ enum DefaultedMemberAttribute {
};
enum SourceLanguage {
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID,
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
+ DW_LANG_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
DW_LANG_lo_user = 0x8000,
DW_LANG_hi_user = 0xffff
@@ -273,6 +274,7 @@ enum RangeListEntries {
/// Call frame instruction encodings.
enum CallFrameInfo {
#define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID,
+#define HANDLE_DW_CFA_PRED(ID, NAME, ARCH) DW_CFA_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
DW_CFA_extended = 0x00,
@@ -431,7 +433,7 @@ StringRef LNStandardString(unsigned Standard);
StringRef LNExtendedString(unsigned Encoding);
StringRef MacinfoString(unsigned Encoding);
StringRef RangeListEncodingString(unsigned Encoding);
-StringRef CallFrameString(unsigned Encoding);
+StringRef CallFrameString(unsigned Encoding, Triple::ArchType Arch);
StringRef ApplePropertyString(unsigned);
StringRef UnitTypeString(unsigned);
StringRef AtomTypeString(unsigned Atom);
@@ -489,6 +491,8 @@ unsigned AttributeEncodingVendor(TypeKind E);
unsigned LanguageVendor(SourceLanguage L);
/// @}
+Optional<unsigned> LanguageLowerBound(SourceLanguage L);
+
/// A helper struct providing information about the byte size of DW_FORM
/// values that vary in size depending on the DWARF version, address byte
/// size, or DWARF32/DWARF64.
diff --git a/contrib/llvm/include/llvm/BinaryFormat/ELF.h b/contrib/llvm/include/llvm/BinaryFormat/ELF.h
index 2e778779117b..ce35d127d433 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/contrib/llvm/include/llvm/BinaryFormat/ELF.h
@@ -582,6 +582,7 @@ enum {
EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60
EF_HEXAGON_MACH_V62 = 0x00000062, // Hexagon V62
EF_HEXAGON_MACH_V65 = 0x00000065, // Hexagon V65
+ EF_HEXAGON_MACH_V66 = 0x00000066, // Hexagon V66
// Highest ISA version flags
EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0]
@@ -594,6 +595,7 @@ enum {
EF_HEXAGON_ISA_V60 = 0x00000060, // Hexagon V60 ISA
EF_HEXAGON_ISA_V62 = 0x00000062, // Hexagon V62 ISA
EF_HEXAGON_ISA_V65 = 0x00000065, // Hexagon V65 ISA
+ EF_HEXAGON_ISA_V66 = 0x00000066, // Hexagon V66 ISA
};
// Hexagon-specific section indexes for common small data
@@ -701,6 +703,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+ EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
// Reserved for AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
@@ -708,11 +711,14 @@ enum : unsigned {
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX906,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
- // Indicates if the xnack target feature is enabled for all code contained in
- // the object.
+ // Indicates if the "xnack" target feature is enabled for all code contained
+ // in the object.
EF_AMDGPU_XNACK = 0x100,
+ // Indicates if the "sram-ecc" target feature is enabled for all code
+ // contained in the object.
+ EF_AMDGPU_SRAM_ECC = 0x200,
};
// ELF Relocation types for AMDGPU
@@ -725,6 +731,38 @@ enum {
#include "ELFRelocs/BPF.def"
};
+// MSP430 specific e_flags
+enum : unsigned {
+ EF_MSP430_MACH_MSP430x11 = 11,
+ EF_MSP430_MACH_MSP430x11x1 = 110,
+ EF_MSP430_MACH_MSP430x12 = 12,
+ EF_MSP430_MACH_MSP430x13 = 13,
+ EF_MSP430_MACH_MSP430x14 = 14,
+ EF_MSP430_MACH_MSP430x15 = 15,
+ EF_MSP430_MACH_MSP430x16 = 16,
+ EF_MSP430_MACH_MSP430x20 = 20,
+ EF_MSP430_MACH_MSP430x22 = 22,
+ EF_MSP430_MACH_MSP430x23 = 23,
+ EF_MSP430_MACH_MSP430x24 = 24,
+ EF_MSP430_MACH_MSP430x26 = 26,
+ EF_MSP430_MACH_MSP430x31 = 31,
+ EF_MSP430_MACH_MSP430x32 = 32,
+ EF_MSP430_MACH_MSP430x33 = 33,
+ EF_MSP430_MACH_MSP430x41 = 41,
+ EF_MSP430_MACH_MSP430x42 = 42,
+ EF_MSP430_MACH_MSP430x43 = 43,
+ EF_MSP430_MACH_MSP430x44 = 44,
+ EF_MSP430_MACH_MSP430X = 45,
+ EF_MSP430_MACH_MSP430x46 = 46,
+ EF_MSP430_MACH_MSP430x47 = 47,
+ EF_MSP430_MACH_MSP430x54 = 54,
+};
+
+// ELF Relocation types for MSP430
+enum {
+#include "ELFRelocs/MSP430.def"
+};
+
#undef ELF_RELOC
// Section header.
@@ -829,6 +867,8 @@ enum : unsigned {
SHT_MIPS_DWARF = 0x7000001e, // DWARF debugging section.
SHT_MIPS_ABIFLAGS = 0x7000002a, // ABI information.
+ SHT_MSP430_ATTRIBUTES = 0x70000003U,
+
SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type.
SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
SHT_HIUSER = 0xffffffff // Highest type reserved for applications.
@@ -1321,7 +1361,7 @@ enum {
GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1
};
-// AMDGPU specific notes.
+// AMD specific notes. (Code Object V2)
enum {
// Note types with values between 0 and 9 (inclusive) are reserved.
NT_AMD_AMDGPU_HSA_METADATA = 10,
@@ -1329,6 +1369,12 @@ enum {
NT_AMD_AMDGPU_PAL_METADATA = 12
};
+// AMDGPU specific notes. (Code Object V3)
+enum {
+ // Note types with values between 0 and 31 (inclusive) are reserved.
+ NT_AMDGPU_METADATA = 32
+};
+
enum {
GNU_ABI_TAG_LINUX = 0,
GNU_ABI_TAG_HURD = 1,
@@ -1339,6 +1385,8 @@ enum {
GNU_ABI_TAG_NACL = 6,
};
+constexpr const char *ELF_NOTE_GNU = "GNU";
+
// Android packed relocation group flags.
enum {
RELOCATION_GROUPED_BY_INFO_FLAG = 1,
diff --git a/contrib/llvm/include/llvm/BinaryFormat/ELFRelocs/MSP430.def b/contrib/llvm/include/llvm/BinaryFormat/ELFRelocs/MSP430.def
new file mode 100644
index 000000000000..96990abf2db4
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/ELFRelocs/MSP430.def
@@ -0,0 +1,16 @@
+
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_MSP430_NONE, 0)
+ELF_RELOC(R_MSP430_32, 1)
+ELF_RELOC(R_MSP430_10_PCREL, 2)
+ELF_RELOC(R_MSP430_16, 3)
+ELF_RELOC(R_MSP430_16_PCREL, 4)
+ELF_RELOC(R_MSP430_16_BYTE, 5)
+ELF_RELOC(R_MSP430_16_PCREL_BYTE, 6)
+ELF_RELOC(R_MSP430_2X_PCREL, 7)
+ELF_RELOC(R_MSP430_RL_PCREL, 8)
+ELF_RELOC(R_MSP430_8, 9)
+ELF_RELOC(R_MSP430_SYM_DIFF, 10)
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MachO.h b/contrib/llvm/include/llvm/BinaryFormat/MachO.h
index c5294c76ebf7..b3d60984249f 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/contrib/llvm/include/llvm/BinaryFormat/MachO.h
@@ -486,7 +486,10 @@ enum PlatformType {
PLATFORM_IOS = 2,
PLATFORM_TVOS = 3,
PLATFORM_WATCHOS = 4,
- PLATFORM_BRIDGEOS = 5
+ PLATFORM_BRIDGEOS = 5,
+ PLATFORM_IOSSIMULATOR = 7,
+ PLATFORM_TVOSSIMULATOR = 8,
+ PLATFORM_WATCHOSSIMULATOR = 9
};
// Values for tools enum in build_tool_version.
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPack.def b/contrib/llvm/include/llvm/BinaryFormat/MsgPack.def
new file mode 100644
index 000000000000..781b49f46aeb
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPack.def
@@ -0,0 +1,108 @@
+//===- MsgPack.def - MessagePack definitions --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Macros for running through MessagePack enumerators.
+///
+//===----------------------------------------------------------------------===//
+
+#if !( \
+ defined HANDLE_MP_FIRST_BYTE || defined HANDLE_MP_FIX_BITS || \
+ defined HANDLE_MP_FIX_BITS_MASK || defined HANDLE_MP_FIX_MAX || \
+ defined HANDLE_MP_FIX_LEN || defined HANDLE_MP_FIX_MIN)
+#error "Missing macro definition of HANDLE_MP*"
+#endif
+
+#ifndef HANDLE_MP_FIRST_BYTE
+#define HANDLE_MP_FIRST_BYTE(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_BITS
+#define HANDLE_MP_FIX_BITS(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_BITS_MASK
+#define HANDLE_MP_FIX_BITS_MASK(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_MAX
+#define HANDLE_MP_FIX_MAX(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_LEN
+#define HANDLE_MP_FIX_LEN(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_MIN
+#define HANDLE_MP_FIX_MIN(ID, NAME)
+#endif
+
+HANDLE_MP_FIRST_BYTE(0xc0, Nil)
+HANDLE_MP_FIRST_BYTE(0xc2, False)
+HANDLE_MP_FIRST_BYTE(0xc3, True)
+HANDLE_MP_FIRST_BYTE(0xc4, Bin8)
+HANDLE_MP_FIRST_BYTE(0xc5, Bin16)
+HANDLE_MP_FIRST_BYTE(0xc6, Bin32)
+HANDLE_MP_FIRST_BYTE(0xc7, Ext8)
+HANDLE_MP_FIRST_BYTE(0xc8, Ext16)
+HANDLE_MP_FIRST_BYTE(0xc9, Ext32)
+HANDLE_MP_FIRST_BYTE(0xca, Float32)
+HANDLE_MP_FIRST_BYTE(0xcb, Float64)
+HANDLE_MP_FIRST_BYTE(0xcc, UInt8)
+HANDLE_MP_FIRST_BYTE(0xcd, UInt16)
+HANDLE_MP_FIRST_BYTE(0xce, UInt32)
+HANDLE_MP_FIRST_BYTE(0xcf, UInt64)
+HANDLE_MP_FIRST_BYTE(0xd0, Int8)
+HANDLE_MP_FIRST_BYTE(0xd1, Int16)
+HANDLE_MP_FIRST_BYTE(0xd2, Int32)
+HANDLE_MP_FIRST_BYTE(0xd3, Int64)
+HANDLE_MP_FIRST_BYTE(0xd4, FixExt1)
+HANDLE_MP_FIRST_BYTE(0xd5, FixExt2)
+HANDLE_MP_FIRST_BYTE(0xd6, FixExt4)
+HANDLE_MP_FIRST_BYTE(0xd7, FixExt8)
+HANDLE_MP_FIRST_BYTE(0xd8, FixExt16)
+HANDLE_MP_FIRST_BYTE(0xd9, Str8)
+HANDLE_MP_FIRST_BYTE(0xda, Str16)
+HANDLE_MP_FIRST_BYTE(0xdb, Str32)
+HANDLE_MP_FIRST_BYTE(0xdc, Array16)
+HANDLE_MP_FIRST_BYTE(0xdd, Array32)
+HANDLE_MP_FIRST_BYTE(0xde, Map16)
+HANDLE_MP_FIRST_BYTE(0xdf, Map32)
+
+HANDLE_MP_FIX_BITS(0x00, PositiveInt)
+HANDLE_MP_FIX_BITS(0x80, Map)
+HANDLE_MP_FIX_BITS(0x90, Array)
+HANDLE_MP_FIX_BITS(0xa0, String)
+HANDLE_MP_FIX_BITS(0xe0, NegativeInt)
+
+HANDLE_MP_FIX_BITS_MASK(0x80, PositiveInt)
+HANDLE_MP_FIX_BITS_MASK(0xf0, Map)
+HANDLE_MP_FIX_BITS_MASK(0xf0, Array)
+HANDLE_MP_FIX_BITS_MASK(0xe0, String)
+HANDLE_MP_FIX_BITS_MASK(0xe0, NegativeInt)
+
+HANDLE_MP_FIX_MAX(0x7f, PositiveInt)
+HANDLE_MP_FIX_MAX(0x0f, Map)
+HANDLE_MP_FIX_MAX(0x0f, Array)
+HANDLE_MP_FIX_MAX(0x1f, String)
+
+HANDLE_MP_FIX_LEN(0x01, Ext1)
+HANDLE_MP_FIX_LEN(0x02, Ext2)
+HANDLE_MP_FIX_LEN(0x04, Ext4)
+HANDLE_MP_FIX_LEN(0x08, Ext8)
+HANDLE_MP_FIX_LEN(0x10, Ext16)
+
+HANDLE_MP_FIX_MIN(-0x20, NegativeInt)
+
+#undef HANDLE_MP_FIRST_BYTE
+#undef HANDLE_MP_FIX_BITS
+#undef HANDLE_MP_FIX_BITS_MASK
+#undef HANDLE_MP_FIX_MAX
+#undef HANDLE_MP_FIX_LEN
+#undef HANDLE_MP_FIX_MIN
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPack.h b/contrib/llvm/include/llvm/BinaryFormat/MsgPack.h
new file mode 100644
index 000000000000..d431912a53e5
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPack.h
@@ -0,0 +1,93 @@
+//===-- MsgPack.h - MessagePack Constants -----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains constants used for implementing MessagePack support.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MSGPACK_H
+#define LLVM_BINARYFORMAT_MSGPACK_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace msgpack {
+
+/// The endianness of all multi-byte encoded values in MessagePack.
+constexpr support::endianness Endianness = support::big;
+
+/// The first byte identifiers of MessagePack object formats.
+namespace FirstByte {
+#define HANDLE_MP_FIRST_BYTE(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// Most significant bits used to identify "Fix" variants in MessagePack.
+///
+/// For example, FixStr objects encode their size in the five least significant
+/// bits of their first byte, which is identified by the bit pattern "101" in
+/// the three most significant bits. So FixBits::String contains 0b10100000.
+///
+/// A corresponding mask of the bit pattern is found in \c FixBitsMask.
+namespace FixBits {
+#define HANDLE_MP_FIX_BITS(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// Mask of bits used to identify "Fix" variants in MessagePack.
+///
+/// For example, FixStr objects encode their size in the five least significant
+/// bits of their first byte, which is identified by the bit pattern "101" in
+/// the three most significant bits. So FixBitsMask::String contains
+/// 0b11100000.
+///
+/// The corresponding bit pattern to mask for is found in FixBits.
+namespace FixBitsMask {
+#define HANDLE_MP_FIX_BITS_MASK(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// The maximum value or size encodable in "Fix" variants of formats.
+///
+/// For example, FixStr objects encode their size in the five least significant
+/// bits of their first byte, so the largest encodable size is 0b00011111.
+namespace FixMax {
+#define HANDLE_MP_FIX_MAX(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// The exact size encodable in "Fix" variants of formats.
+///
+/// The only objects for which an exact size makes sense are of Extension type.
+///
+/// For example, FixExt4 stores an extension type containing exactly four bytes.
+namespace FixLen {
+#define HANDLE_MP_FIX_LEN(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// The minimum value or size encodable in "Fix" variants of formats.
+///
+/// The only object for which a minimum makes sense is a negative FixNum.
+///
+/// Negative FixNum objects encode their signed integer value in one byte, but
+/// they must have the pattern "111" as their three most significant bits. This
+/// means all values are negative, and the smallest representable value is
+/// 0b11100000.
+namespace FixMin {
+#define HANDLE_MP_FIX_MIN(ID, NAME) constexpr int8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+} // end namespace msgpack
+} // end namespace llvm
+
+#endif // LLVM_BINARYFORMAT_MSGPACK_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPackReader.h b/contrib/llvm/include/llvm/BinaryFormat/MsgPackReader.h
new file mode 100644
index 000000000000..511c31407455
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPackReader.h
@@ -0,0 +1,148 @@
+//===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is a MessagePack reader.
+///
+/// See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
+/// standard.
+///
+/// Typical usage:
+/// \code
+/// StringRef input = GetInput();
+/// msgpack::Reader MPReader(input);
+/// msgpack::Object Obj;
+///
+/// while (MPReader.read(Obj)) {
+/// switch (Obj.Kind) {
+/// case msgpack::Type::Int:
+// // Use Obj.Int
+/// break;
+/// // ...
+/// }
+/// }
+/// \endcode
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSGPACKREADER_H
+#define LLVM_SUPPORT_MSGPACKREADER_H
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+
+namespace llvm {
+namespace msgpack {
+
+/// MessagePack types as defined in the standard, with the exception of Integer
+/// being divided into a signed Int and unsigned UInt variant in order to map
+/// directly to C++ types.
+///
+/// The types map onto corresponding union members of the \c Object struct.
+enum class Type : uint8_t {
+ Int,
+ UInt,
+ Nil,
+ Boolean,
+ Float,
+ String,
+ Binary,
+ Array,
+ Map,
+ Extension,
+};
+
+/// Extension types are composed of a user-defined type ID and an uninterpreted
+/// sequence of bytes.
+struct ExtensionType {
+ /// User-defined extension type.
+ int8_t Type;
+ /// Raw bytes of the extension object.
+ StringRef Bytes;
+};
+
+/// MessagePack object, represented as a tagged union of C++ types.
+///
+/// All types except \c Type::Nil (which has only one value, and so is
+/// completely represented by the \c Kind itself) map to a exactly one union
+/// member.
+struct Object {
+ Type Kind;
+ union {
+ /// Value for \c Type::Int.
+ int64_t Int;
+ /// Value for \c Type::Uint.
+ uint64_t UInt;
+ /// Value for \c Type::Boolean.
+ bool Bool;
+ /// Value for \c Type::Float.
+ double Float;
+ /// Value for \c Type::String and \c Type::Binary.
+ StringRef Raw;
+ /// Value for \c Type::Array and \c Type::Map.
+ size_t Length;
+ /// Value for \c Type::Extension.
+ ExtensionType Extension;
+ };
+
+ Object() : Kind(Type::Int), Int(0) {}
+};
+
+/// Reads MessagePack objects from memory, one at a time.
+class Reader {
+public:
+ /// Construct a reader, keeping a reference to the \p InputBuffer.
+ Reader(MemoryBufferRef InputBuffer);
+ /// Construct a reader, keeping a reference to the \p Input.
+ Reader(StringRef Input);
+
+ Reader(const Reader &) = delete;
+ Reader &operator=(const Reader &) = delete;
+
+ /// Read one object from the input buffer, advancing past it.
+ ///
+ /// The \p Obj is updated with the kind of the object read, and the
+ /// corresponding union member is updated.
+ ///
+ /// For the collection objects (Array and Map), only the length is read, and
+ /// the caller must make and additional \c N calls (in the case of Array) or
+ /// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection
+ /// elements.
+ ///
+ /// \param [out] Obj filled with next object on success.
+ ///
+ /// \returns true when object successfully read, false when at end of
+ /// input (and so \p Obj was not updated), otherwise an error.
+ Expected<bool> read(Object &Obj);
+
+private:
+ MemoryBufferRef InputBuffer;
+ StringRef::iterator Current;
+ StringRef::iterator End;
+
+ size_t remainingSpace() {
+ // The rest of the code maintains the invariant that End >= Current, so
+ // that this cast is always defined behavior.
+ return static_cast<size_t>(End - Current);
+ }
+
+ template <class T> Expected<bool> readRaw(Object &Obj);
+ template <class T> Expected<bool> readInt(Object &Obj);
+ template <class T> Expected<bool> readUInt(Object &Obj);
+ template <class T> Expected<bool> readLength(Object &Obj);
+ template <class T> Expected<bool> readExt(Object &Obj);
+ Expected<bool> createRaw(Object &Obj, uint32_t Size);
+ Expected<bool> createExt(Object &Obj, uint32_t Size);
+};
+
+} // end namespace msgpack
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_MSGPACKREADER_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPackTypes.h b/contrib/llvm/include/llvm/BinaryFormat/MsgPackTypes.h
new file mode 100644
index 000000000000..f96cd4c338fd
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPackTypes.h
@@ -0,0 +1,372 @@
+//===- MsgPackTypes.h - MsgPack Types ---------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This is a data structure for representing MessagePack "documents", with
+/// methods to go to and from MessagePack. The types also specialize YAMLIO
+/// traits in order to go to and from YAML.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/BinaryFormat/MsgPackReader.h"
+#include "llvm/BinaryFormat/MsgPackWriter.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <vector>
+
+#ifndef LLVM_BINARYFORMAT_MSGPACKTYPES_H
+#define LLVM_BINARYFORMAT_MSGPACKTYPES_H
+
+namespace llvm {
+namespace msgpack {
+
+class Node;
+
+/// Short-hand for a Node pointer.
+using NodePtr = std::shared_ptr<Node>;
+
+/// Short-hand for an Optional Node pointer.
+using OptNodePtr = Optional<NodePtr>;
+
+/// Abstract base-class which can be any MessagePack type.
+class Node {
+public:
+ enum NodeKind {
+ NK_Scalar,
+ NK_Array,
+ NK_Map,
+ };
+
+private:
+ virtual void anchor() = 0;
+ const NodeKind Kind;
+
+ static Expected<OptNodePtr> readArray(Reader &MPReader, size_t Length);
+ static Expected<OptNodePtr> readMap(Reader &MPReader, size_t Length);
+
+public:
+ NodeKind getKind() const { return Kind; }
+
+ /// Construct a Node. Used by derived classes to track kind information.
+ Node(NodeKind Kind) : Kind(Kind) {}
+
+ virtual ~Node() = default;
+
+ /// Read from a MessagePack reader \p MPReader, returning an error if one is
+ /// encountered, or None if \p MPReader is at the end of stream, or some Node
+ /// pointer if some type is read.
+ static Expected<OptNodePtr> read(Reader &MPReader);
+
+ /// Write to a MessagePack writer \p MPWriter.
+ virtual void write(Writer &MPWriter) = 0;
+};
+
+/// A MessagePack scalar.
+class ScalarNode : public Node {
+public:
+ enum ScalarKind {
+ SK_Int,
+ SK_UInt,
+ SK_Nil,
+ SK_Boolean,
+ SK_Float,
+ SK_String,
+ SK_Binary,
+ };
+
+private:
+ void anchor() override;
+
+ void destroy();
+
+ ScalarKind SKind;
+
+ union {
+ int64_t IntValue;
+ uint64_t UIntValue;
+ bool BoolValue;
+ double FloatValue;
+ std::string StringValue;
+ };
+
+public:
+ /// Construct an Int ScalarNode.
+ ScalarNode(int64_t IntValue);
+ /// Construct an Int ScalarNode.
+ ScalarNode(int32_t IntValue);
+ /// Construct an UInt ScalarNode.
+ ScalarNode(uint64_t UIntValue);
+ /// Construct an UInt ScalarNode.
+ ScalarNode(uint32_t UIntValue);
+ /// Construct a Nil ScalarNode.
+ ScalarNode();
+ /// Construct a Boolean ScalarNode.
+ ScalarNode(bool BoolValue);
+ /// Construct a Float ScalarNode.
+ ScalarNode(double FloatValue);
+ /// Construct a String ScalarNode.
+ ScalarNode(StringRef StringValue);
+ /// Construct a String ScalarNode.
+ ScalarNode(const char *StringValue);
+ /// Construct a String ScalarNode.
+ ScalarNode(std::string &&StringValue);
+ /// Construct a Binary ScalarNode.
+ ScalarNode(MemoryBufferRef BinaryValue);
+
+ ~ScalarNode();
+
+ ScalarNode &operator=(const ScalarNode &RHS) = delete;
+ /// A ScalarNode can only be move assigned.
+ ScalarNode &operator=(ScalarNode &&RHS);
+
+ /// Change the kind of this ScalarNode, zero initializing it to the new type.
+ void setScalarKind(ScalarKind SKind) {
+ switch (SKind) {
+ case SK_Int:
+ *this = int64_t(0);
+ break;
+ case SK_UInt:
+ *this = uint64_t(0);
+ break;
+ case SK_Boolean:
+ *this = false;
+ break;
+ case SK_Float:
+ *this = 0.0;
+ break;
+ case SK_String:
+ *this = StringRef();
+ break;
+ case SK_Binary:
+ *this = MemoryBufferRef("", "");
+ break;
+ case SK_Nil:
+ *this = ScalarNode();
+ break;
+ }
+ }
+
+ /// Get the current kind of ScalarNode.
+ ScalarKind getScalarKind() { return SKind; }
+
+ /// Get the value of an Int scalar.
+ ///
+ /// \warning Assumes getScalarKind() == SK_Int
+ int64_t getInt() {
+ assert(SKind == SK_Int);
+ return IntValue;
+ }
+
+ /// Get the value of a UInt scalar.
+ ///
+ /// \warning Assumes getScalarKind() == SK_UInt
+ uint64_t getUInt() {
+ assert(SKind == SK_UInt);
+ return UIntValue;
+ }
+
+ /// Get the value of an Boolean scalar.
+ ///
+ /// \warning Assumes getScalarKind() == SK_Boolean
+ bool getBool() {
+ assert(SKind == SK_Boolean);
+ return BoolValue;
+ }
+
+ /// Get the value of an Float scalar.
+ ///
+ /// \warning Assumes getScalarKind() == SK_Float
+ double getFloat() {
+ assert(SKind == SK_Float);
+ return FloatValue;
+ }
+
+ /// Get the value of a String scalar.
+ ///
+ /// \warning Assumes getScalarKind() == SK_String
+ StringRef getString() {
+ assert(SKind == SK_String);
+ return StringValue;
+ }
+
+ /// Get the value of a Binary scalar.
+ ///
+ /// \warning Assumes getScalarKind() == SK_Binary
+ StringRef getBinary() {
+ assert(SKind == SK_Binary);
+ return StringValue;
+ }
+
+ static bool classof(const Node *N) { return N->getKind() == NK_Scalar; }
+
+ void write(Writer &MPWriter) override;
+
+ /// Parse a YAML scalar of the current ScalarKind from \p ScalarStr.
+ ///
+ /// \returns An empty string on success, otherwise an error message.
+ StringRef inputYAML(StringRef ScalarStr);
+
+ /// Output a YAML scalar of the current ScalarKind into \p OS.
+ void outputYAML(raw_ostream &OS) const;
+
+ /// Determine which YAML quoting type the current value would need when
+ /// output.
+ yaml::QuotingType mustQuoteYAML(StringRef ScalarStr) const;
+
+ /// Get the YAML tag for the current ScalarKind.
+ StringRef getYAMLTag() const;
+
+ /// Flag which affects how the type handles YAML tags when reading and
+ /// writing.
+ ///
+ /// When false, tags are used when reading and writing. When reading, the tag
+ /// is used to decide the ScalarKind before parsing. When writing, the tag is
+ /// output along with the value.
+ ///
+ /// When true, tags are ignored when reading and writing. When reading, the
+ /// ScalarKind is always assumed to be String. When writing, the tag is not
+ /// output.
+ bool IgnoreTag = false;
+
+ static const char *IntTag;
+ static const char *NilTag;
+ static const char *BooleanTag;
+ static const char *FloatTag;
+ static const char *StringTag;
+ static const char *BinaryTag;
+};
+
+class ArrayNode : public Node, public std::vector<NodePtr> {
+ void anchor() override;
+
+public:
+ ArrayNode() : Node(NK_Array) {}
+ static bool classof(const Node *N) { return N->getKind() == NK_Array; }
+
+ void write(Writer &MPWriter) override {
+ MPWriter.writeArraySize(this->size());
+ for (auto &N : *this)
+ N->write(MPWriter);
+ }
+};
+
+class MapNode : public Node, public StringMap<NodePtr> {
+ void anchor() override;
+
+public:
+ MapNode() : Node(NK_Map) {}
+ static bool classof(const Node *N) { return N->getKind() == NK_Map; }
+
+ void write(Writer &MPWriter) override {
+ MPWriter.writeMapSize(this->size());
+ for (auto &N : *this) {
+ MPWriter.write(N.first());
+ N.second->write(MPWriter);
+ }
+ }
+};
+
+} // end namespace msgpack
+
+namespace yaml {
+
+template <> struct PolymorphicTraits<msgpack::NodePtr> {
+ static NodeKind getKind(const msgpack::NodePtr &N) {
+ if (isa<msgpack::ScalarNode>(*N))
+ return NodeKind::Scalar;
+ if (isa<msgpack::MapNode>(*N))
+ return NodeKind::Map;
+ if (isa<msgpack::ArrayNode>(*N))
+ return NodeKind::Sequence;
+ llvm_unreachable("NodeKind not supported");
+ }
+ static msgpack::ScalarNode &getAsScalar(msgpack::NodePtr &N) {
+ if (!N || !isa<msgpack::ScalarNode>(*N))
+ N.reset(new msgpack::ScalarNode());
+ return *cast<msgpack::ScalarNode>(N.get());
+ }
+ static msgpack::MapNode &getAsMap(msgpack::NodePtr &N) {
+ if (!N || !isa<msgpack::MapNode>(*N))
+ N.reset(new msgpack::MapNode());
+ return *cast<msgpack::MapNode>(N.get());
+ }
+ static msgpack::ArrayNode &getAsSequence(msgpack::NodePtr &N) {
+ if (!N || !isa<msgpack::ArrayNode>(*N))
+ N.reset(new msgpack::ArrayNode());
+ return *cast<msgpack::ArrayNode>(N.get());
+ }
+};
+
+template <> struct TaggedScalarTraits<msgpack::ScalarNode> {
+ static void output(const msgpack::ScalarNode &S, void *Ctxt,
+ raw_ostream &ScalarOS, raw_ostream &TagOS) {
+ if (!S.IgnoreTag)
+ TagOS << S.getYAMLTag();
+ S.outputYAML(ScalarOS);
+ }
+
+ static StringRef input(StringRef ScalarStr, StringRef Tag, void *Ctxt,
+ msgpack::ScalarNode &S) {
+ if (Tag == msgpack::ScalarNode::IntTag) {
+ S.setScalarKind(msgpack::ScalarNode::SK_UInt);
+ if (S.inputYAML(ScalarStr) == StringRef())
+ return StringRef();
+ S.setScalarKind(msgpack::ScalarNode::SK_Int);
+ return S.inputYAML(ScalarStr);
+ }
+
+ if (S.IgnoreTag || Tag == msgpack::ScalarNode::StringTag ||
+ Tag == "tag:yaml.org,2002:str")
+ S.setScalarKind(msgpack::ScalarNode::SK_String);
+ else if (Tag == msgpack::ScalarNode::NilTag)
+ S.setScalarKind(msgpack::ScalarNode::SK_Nil);
+ else if (Tag == msgpack::ScalarNode::BooleanTag)
+ S.setScalarKind(msgpack::ScalarNode::SK_Boolean);
+ else if (Tag == msgpack::ScalarNode::FloatTag)
+ S.setScalarKind(msgpack::ScalarNode::SK_Float);
+ else if (Tag == msgpack::ScalarNode::StringTag)
+ S.setScalarKind(msgpack::ScalarNode::SK_String);
+ else if (Tag == msgpack::ScalarNode::BinaryTag)
+ S.setScalarKind(msgpack::ScalarNode::SK_Binary);
+ else
+ return "Unsupported messagepack tag";
+
+ return S.inputYAML(ScalarStr);
+ }
+
+ static QuotingType mustQuote(const msgpack::ScalarNode &S, StringRef Str) {
+ return S.mustQuoteYAML(Str);
+ }
+};
+
+template <> struct CustomMappingTraits<msgpack::MapNode> {
+ static void inputOne(IO &IO, StringRef Key, msgpack::MapNode &M) {
+ IO.mapRequired(Key.str().c_str(), M[Key]);
+ }
+ static void output(IO &IO, msgpack::MapNode &M) {
+ for (auto &N : M)
+ IO.mapRequired(N.getKey().str().c_str(), N.getValue());
+ }
+};
+
+template <> struct SequenceTraits<msgpack::ArrayNode> {
+ static size_t size(IO &IO, msgpack::ArrayNode &A) { return A.size(); }
+ static msgpack::NodePtr &element(IO &IO, msgpack::ArrayNode &A,
+ size_t Index) {
+ if (Index >= A.size())
+ A.resize(Index + 1);
+ return A[Index];
+ }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+#endif // LLVM_BINARYFORMAT_MSGPACKTYPES_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPackWriter.h b/contrib/llvm/include/llvm/BinaryFormat/MsgPackWriter.h
new file mode 100644
index 000000000000..98af422c9f19
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPackWriter.h
@@ -0,0 +1,131 @@
+//===- MsgPackWriter.h - Simple MsgPack writer ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a MessagePack writer.
+///
+/// See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
+/// specification.
+///
+/// Typical usage:
+/// \code
+/// raw_ostream output = GetOutputStream();
+/// msgpack::Writer MPWriter(output);
+/// MPWriter.writeNil();
+/// MPWriter.write(false);
+/// MPWriter.write("string");
+/// // ...
+/// \endcode
+///
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSGPACKPARSER_H
+#define LLVM_SUPPORT_MSGPACKPARSER_H
+
+#include "llvm/BinaryFormat/MsgPack.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace msgpack {
+
+/// Writes MessagePack objects to an output stream, one at a time.
+class Writer {
+public:
+ /// Construct a writer, optionally enabling "Compatibility Mode" as defined
+ /// in the MessagePack specification.
+ ///
+ /// When in \p Compatible mode, the writer will write \c Str16 formats
+ /// instead of \c Str8 formats, and will refuse to write any \c Bin formats.
+ ///
+ /// \param OS stream to output MessagePack objects to.
+ /// \param Compatible when set, write in "Compatibility Mode".
+ Writer(raw_ostream &OS, bool Compatible = false);
+
+ Writer(const Writer &) = delete;
+ Writer &operator=(const Writer &) = delete;
+
+ /// Write a \em Nil to the output stream.
+ ///
+ /// The output will be the \em nil format.
+ void writeNil();
+
+ /// Write a \em Boolean to the output stream.
+ ///
+ /// The output will be a \em bool format.
+ void write(bool b);
+
+ /// Write a signed integer to the output stream.
+ ///
+ /// The output will be in the smallest possible \em int format.
+ ///
+ /// The format chosen may be for an unsigned integer.
+ void write(int64_t i);
+
+ /// Write an unsigned integer to the output stream.
+ ///
+ /// The output will be in the smallest possible \em int format.
+ void write(uint64_t u);
+
+ /// Write a floating point number to the output stream.
+ ///
+ /// The output will be in the smallest possible \em float format.
+ void write(double d);
+
+ /// Write a string to the output stream.
+ ///
+ /// The output will be in the smallest possible \em str format.
+ void write(StringRef s);
+
+ /// Write a memory buffer to the output stream.
+ ///
+ /// The output will be in the smallest possible \em bin format.
+ ///
+ /// \warning Do not use this overload if in \c Compatible mode.
+ void write(MemoryBufferRef Buffer);
+
+ /// Write the header for an \em Array of the given size.
+ ///
+ /// The output will be in the smallest possible \em array format.
+ //
+ /// The header contains an identifier for the \em array format used, as well
+ /// as an encoding of the size of the array.
+ ///
+ /// N.B. The caller must subsequently call \c Write an additional \p Size
+ /// times to complete the array.
+ void writeArraySize(uint32_t Size);
+
+ /// Write the header for a \em Map of the given size.
+ ///
+ /// The output will be in the smallest possible \em map format.
+ //
+ /// The header contains an identifier for the \em map format used, as well
+ /// as an encoding of the size of the map.
+ ///
+ /// N.B. The caller must subsequently call \c Write and additional \c Size*2
+ /// times to complete the map. Each even numbered call to \c Write defines a
+ /// new key, and each odd numbered call defines the previous key's value.
+ void writeMapSize(uint32_t Size);
+
+ /// Write a typed memory buffer (an extension type) to the output stream.
+ ///
+ /// The output will be in the smallest possible \em ext format.
+ void writeExt(int8_t Type, MemoryBufferRef Buffer);
+
+private:
+ support::endian::Writer EW;
+ bool Compatible;
+};
+
+} // end namespace msgpack
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_MSGPACKPARSER_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/Wasm.h b/contrib/llvm/include/llvm/BinaryFormat/Wasm.h
index fa5448dacec4..d9f0f94b298d 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/contrib/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -16,6 +16,7 @@
#define LLVM_BINARYFORMAT_WASM_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
namespace llvm {
namespace wasm {
@@ -25,7 +26,7 @@ const char WasmMagic[] = {'\0', 'a', 's', 'm'};
// Wasm binary format version
const uint32_t WasmVersion = 0x1;
// Wasm linking metadata version
-const uint32_t WasmMetadataVersion = 0x1;
+const uint32_t WasmMetadataVersion = 0x2;
// Wasm uses a 64k page size
const uint32_t WasmPageSize = 65536;
@@ -34,9 +35,12 @@ struct WasmObjectHeader {
uint32_t Version;
};
-struct WasmSignature {
- std::vector<uint8_t> ParamTypes;
- uint8_t ReturnType;
+struct WasmDylinkInfo {
+ uint32_t MemorySize; // Memory size in bytes
+ uint32_t MemoryAlignment; // P2 alignment of memory
+ uint32_t TableSize; // Table size in elements
+ uint32_t TableAlignment; // P2 alignment of table
+ std::vector<StringRef> Needed; // Shared library depenedencies
};
struct WasmExport {
@@ -79,6 +83,18 @@ struct WasmGlobal {
StringRef SymbolName; // from the "linking" section
};
+struct WasmEventType {
+ // Kind of event. Currently only WASM_EVENT_ATTRIBUTE_EXCEPTION is possible.
+ uint32_t Attribute;
+ uint32_t SigIndex;
+};
+
+struct WasmEvent {
+ uint32_t Index;
+ WasmEventType Type;
+ StringRef SymbolName; // from the "linking" section
+};
+
struct WasmImport {
StringRef Module;
StringRef Field;
@@ -88,6 +104,7 @@ struct WasmImport {
WasmGlobalType Global;
WasmTable Table;
WasmLimits Memory;
+ WasmEventType Event;
};
};
@@ -104,8 +121,8 @@ struct WasmFunction {
uint32_t Size;
uint32_t CodeOffset; // start of Locals and Body
StringRef SymbolName; // from the "linking" section
- StringRef DebugName; // from the "name" section
- uint32_t Comdat; // from the "comdat info" section
+ StringRef DebugName; // from the "name" section
+ uint32_t Comdat; // from the "comdat info" section
};
struct WasmDataSegment {
@@ -171,18 +188,20 @@ struct WasmLinkingData {
};
enum : unsigned {
- WASM_SEC_CUSTOM = 0, // Custom / User-defined section
- WASM_SEC_TYPE = 1, // Function signature declarations
- WASM_SEC_IMPORT = 2, // Import declarations
- WASM_SEC_FUNCTION = 3, // Function declarations
- WASM_SEC_TABLE = 4, // Indirect function table and other tables
- WASM_SEC_MEMORY = 5, // Memory attributes
- WASM_SEC_GLOBAL = 6, // Global declarations
- WASM_SEC_EXPORT = 7, // Exports
- WASM_SEC_START = 8, // Start function declaration
- WASM_SEC_ELEM = 9, // Elements section
- WASM_SEC_CODE = 10, // Function bodies (code)
- WASM_SEC_DATA = 11 // Data segments
+ WASM_SEC_CUSTOM = 0, // Custom / User-defined section
+ WASM_SEC_TYPE = 1, // Function signature declarations
+ WASM_SEC_IMPORT = 2, // Import declarations
+ WASM_SEC_FUNCTION = 3, // Function declarations
+ WASM_SEC_TABLE = 4, // Indirect function table and other tables
+ WASM_SEC_MEMORY = 5, // Memory attributes
+ WASM_SEC_GLOBAL = 6, // Global declarations
+ WASM_SEC_EXPORT = 7, // Exports
+ WASM_SEC_START = 8, // Start function declaration
+ WASM_SEC_ELEM = 9, // Elements section
+ WASM_SEC_CODE = 10, // Function bodies (code)
+ WASM_SEC_DATA = 11, // Data segments
+ WASM_SEC_DATACOUNT = 12, // Data segment count
+ WASM_SEC_EVENT = 13 // Event declarations
};
// Type immediate encodings used in various contexts.
@@ -191,7 +210,8 @@ enum : unsigned {
WASM_TYPE_I64 = 0x7E,
WASM_TYPE_F32 = 0x7D,
WASM_TYPE_F64 = 0x7C,
- WASM_TYPE_ANYFUNC = 0x70,
+ WASM_TYPE_V128 = 0x7B,
+ WASM_TYPE_FUNCREF = 0x70,
WASM_TYPE_EXCEPT_REF = 0x68,
WASM_TYPE_FUNC = 0x60,
WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
@@ -203,12 +223,13 @@ enum : unsigned {
WASM_EXTERNAL_TABLE = 0x1,
WASM_EXTERNAL_MEMORY = 0x2,
WASM_EXTERNAL_GLOBAL = 0x3,
+ WASM_EXTERNAL_EVENT = 0x4,
};
// Opcodes used in initializer expressions.
enum : unsigned {
WASM_OPCODE_END = 0x0b,
- WASM_OPCODE_GET_GLOBAL = 0x23,
+ WASM_OPCODE_GLOBAL_GET = 0x23,
WASM_OPCODE_I32_CONST = 0x41,
WASM_OPCODE_I64_CONST = 0x42,
WASM_OPCODE_F32_CONST = 0x43,
@@ -217,35 +238,27 @@ enum : unsigned {
enum : unsigned {
WASM_LIMITS_FLAG_HAS_MAX = 0x1,
-};
-
-// Subset of types that a value can have
-enum class ValType {
- I32 = WASM_TYPE_I32,
- I64 = WASM_TYPE_I64,
- F32 = WASM_TYPE_F32,
- F64 = WASM_TYPE_F64,
- EXCEPT_REF = WASM_TYPE_EXCEPT_REF,
+ WASM_LIMITS_FLAG_IS_SHARED = 0x2,
};
// Kind codes used in the custom "name" section
enum : unsigned {
WASM_NAMES_FUNCTION = 0x1,
- WASM_NAMES_LOCAL = 0x2,
+ WASM_NAMES_LOCAL = 0x2,
};
// Kind codes used in the custom "linking" section
enum : unsigned {
- WASM_SEGMENT_INFO = 0x5,
- WASM_INIT_FUNCS = 0x6,
- WASM_COMDAT_INFO = 0x7,
- WASM_SYMBOL_TABLE = 0x8,
+ WASM_SEGMENT_INFO = 0x5,
+ WASM_INIT_FUNCS = 0x6,
+ WASM_COMDAT_INFO = 0x7,
+ WASM_SYMBOL_TABLE = 0x8,
};
// Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
enum : unsigned {
- WASM_COMDAT_DATA = 0x0,
- WASM_COMDAT_FUNCTION = 0x1,
+ WASM_COMDAT_DATA = 0x0,
+ WASM_COMDAT_FUNCTION = 0x1,
};
// Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE
@@ -254,17 +267,23 @@ enum WasmSymbolType : unsigned {
WASM_SYMBOL_TYPE_DATA = 0x1,
WASM_SYMBOL_TYPE_GLOBAL = 0x2,
WASM_SYMBOL_TYPE_SECTION = 0x3,
+ WASM_SYMBOL_TYPE_EVENT = 0x4,
+};
+
+// Kinds of event attributes.
+enum WasmEventAttribute : unsigned {
+ WASM_EVENT_ATTRIBUTE_EXCEPTION = 0x0,
};
-const unsigned WASM_SYMBOL_BINDING_MASK = 0x3;
-const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc;
+const unsigned WASM_SYMBOL_BINDING_MASK = 0x3;
+const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc;
-const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0;
-const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1;
-const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
+const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0;
+const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1;
+const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
-const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
-const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
+const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
+const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
#define WASM_RELOC(name, value) name = value,
@@ -274,9 +293,32 @@ enum : unsigned {
#undef WASM_RELOC
+// Subset of types that a value can have
+enum class ValType {
+ I32 = WASM_TYPE_I32,
+ I64 = WASM_TYPE_I64,
+ F32 = WASM_TYPE_F32,
+ F64 = WASM_TYPE_F64,
+ V128 = WASM_TYPE_V128,
+ EXCEPT_REF = WASM_TYPE_EXCEPT_REF,
+};
+
+struct WasmSignature {
+ SmallVector<wasm::ValType, 1> Returns;
+ SmallVector<wasm::ValType, 4> Params;
+ // Support empty and tombstone instances, needed by DenseMap.
+ enum { Plain, Empty, Tombstone } State = Plain;
+
+ WasmSignature(SmallVector<wasm::ValType, 1> &&InReturns,
+ SmallVector<wasm::ValType, 4> &&InParams)
+ : Returns(InReturns), Params(InParams) {}
+ WasmSignature() = default;
+};
+
// Useful comparison operators
inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) {
- return LHS.ReturnType == RHS.ReturnType && LHS.ParamTypes == RHS.ParamTypes;
+ return LHS.State == RHS.State && LHS.Returns == RHS.Returns &&
+ LHS.Params == RHS.Params;
}
inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) {
diff --git a/contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def
index 8ffd51e483f3..b3a08e70c1d5 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def
@@ -1,4 +1,3 @@
-
#ifndef WASM_RELOC
#error "WASM_RELOC must be defined"
#endif
@@ -13,3 +12,4 @@ WASM_RELOC(R_WEBASSEMBLY_TYPE_INDEX_LEB, 6)
WASM_RELOC(R_WEBASSEMBLY_GLOBAL_INDEX_LEB, 7)
WASM_RELOC(R_WEBASSEMBLY_FUNCTION_OFFSET_I32, 8)
WASM_RELOC(R_WEBASSEMBLY_SECTION_OFFSET_I32, 9)
+WASM_RELOC(R_WEBASSEMBLY_EVENT_INDEX_LEB, 10)
diff --git a/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h b/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h
index ce8bdd9cf0b4..0d7cc141f2ce 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -51,6 +51,7 @@ class Module;
struct BitcodeLTOInfo {
bool IsThinLTO;
bool HasSummary;
+ bool EnableSplitLTOUnit;
};
/// Represents a module in a bitcode file.
diff --git a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 6723cf42dd2c..f0d11e9c1689 100644
--- a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -342,6 +342,7 @@ enum ConstantsCodes {
CST_CODE_INLINEASM = 23, // INLINEASM: [sideeffect|alignstack|
// asmdialect,asmstr,conststr]
CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24, // [opty, flags, n x operands]
+ CST_CODE_CE_UNOP = 25, // CE_UNOP: [opcode, opval]
};
/// CastOpcodes - These are values used in the bitcode files to encode which
@@ -364,6 +365,14 @@ enum CastOpcodes {
CAST_ADDRSPACECAST = 12
};
+/// UnaryOpcodes - These are values used in the bitcode files to encode which
+/// unop a CST_CODE_CE_UNOP or a XXX refers to. The values of these enums
+/// have no fixed relation to the LLVM IR enum values. Changing these will
+/// break compatibility with old files.
+enum UnaryOpcodes {
+ UNOP_NEG = 0
+};
+
/// BinaryOpcodes - These are values used in the bitcode files to encode which
/// binop a CST_CODE_CE_BINOP or a XXX refers to. The values of these enums
/// have no fixed relation to the LLVM IR enum values. Changing these will
@@ -524,6 +533,7 @@ enum FunctionCodes {
// 53 is unused.
// 54 is unused.
FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
+ FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval]
};
enum UseListCodes {
@@ -591,6 +601,7 @@ enum AttributeKindCodes {
ATTR_KIND_NOCF_CHECK = 56,
ATTR_KIND_OPT_FOR_FUZZING = 57,
ATTR_KIND_SHADOWCALLSTACK = 58,
+ ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59,
};
enum ComdatSelectionKindCodes {
diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
index b6056380916c..413901d218f9 100644
--- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -71,6 +71,7 @@ class MCTargetOptions;
class MDNode;
class Module;
class raw_ostream;
+class StackMaps;
class TargetLoweringObjectFile;
class TargetMachine;
@@ -137,6 +138,9 @@ private:
static char ID;
+protected:
+ /// Protected struct HandlerInfo and Handlers permit target extended
+ /// AsmPrinter adds their own handlers.
struct HandlerInfo {
AsmPrinterHandler *Handler;
const char *TimerName;
@@ -365,6 +369,9 @@ public:
/// emit the proxies we previously omitted in EmitGlobalVariable.
void emitGlobalGOTEquivs();
+ /// Emit the stack maps.
+ void emitStackMaps(StackMaps &SM);
+
//===------------------------------------------------------------------===//
// Overridable Hooks
//===------------------------------------------------------------------===//
@@ -542,7 +549,7 @@ public:
///
/// \p Value - The value to emit.
/// \p Size - The size of the integer (in bytes) to emit.
- virtual void EmitDebugThreadLocal(const MCExpr *Value, unsigned Size) const;
+ virtual void EmitDebugValue(const MCExpr *Value, unsigned Size) const;
//===------------------------------------------------------------------===//
// Dwarf Lowering Routines
@@ -631,6 +638,11 @@ private:
/// inline asm.
void EmitInlineAsm(const MachineInstr *MI) const;
+ /// Add inline assembly info to the diagnostics machinery, so we can
+ /// emit file and position info. Returns SrcMgr memory buffer position.
+ unsigned addInlineAsmDiagBuffer(StringRef AsmStr,
+ const MDNode *LocMDNode) const;
+
//===------------------------------------------------------------------===//
// Internal Implementation Details
//===------------------------------------------------------------------===//
@@ -647,6 +659,8 @@ private:
void EmitLLVMUsedList(const ConstantArray *InitList);
/// Emit llvm.ident metadata in an '.ident' directive.
void EmitModuleIdents(Module &M);
+ /// Emit bytes for llvm.commandline metadata.
+ void EmitModuleCommandLines(Module &M);
void EmitXXStructorList(const DataLayout &DL, const Constant *List,
bool isCtor);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinterHandler.h
index f5ac95a20b10..a8b13200dd4e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinterHandler.h
@@ -1,4 +1,4 @@
-//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===//
+//===-- llvm/CodeGen/AsmPrinterHandler.h -----------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
+#ifndef LLVM_CODEGEN_ASMPRINTERHANDLER_H
+#define LLVM_CODEGEN_ASMPRINTERHANDLER_H
#include "llvm/Support/DataTypes.h"
diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f76a2426377a..f105d887c397 100644
--- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -80,6 +80,23 @@ private:
using BaseT = TargetTransformInfoImplCRTPBase<T>;
using TTI = TargetTransformInfo;
+ /// Estimate a cost of Broadcast as an extract and sequence of insert
+ /// operations.
+ unsigned getBroadcastShuffleOverhead(Type *Ty) {
+ assert(Ty->isVectorTy() && "Can only shuffle vectors");
+ unsigned Cost = 0;
+ // Broadcast cost is equal to the cost of extracting the zero'th element
+ // plus the cost of inserting it into every element of the result vector.
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::ExtractElement, Ty, 0);
+
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::InsertElement, Ty, i);
+ }
+ return Cost;
+ }
+
/// Estimate a cost of shuffle as a sequence of extract and insert
/// operations.
unsigned getPermuteShuffleOverhead(Type *Ty) {
@@ -101,6 +118,50 @@ private:
return Cost;
}
+ /// Estimate a cost of subvector extraction as a sequence of extract and
+ /// insert operations.
+ unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
+ assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
+ "Can only extract subvectors from vectors");
+ int NumSubElts = SubTy->getVectorNumElements();
+ assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
+ "SK_ExtractSubvector index out of range");
+
+ unsigned Cost = 0;
+ // Subvector extraction cost is equal to the cost of extracting element from
+ // the source type plus the cost of inserting them into the result vector
+ // type.
+ for (int i = 0; i != NumSubElts; ++i) {
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::ExtractElement, Ty, i + Index);
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::InsertElement, SubTy, i);
+ }
+ return Cost;
+ }
+
+ /// Estimate a cost of subvector insertion as a sequence of extract and
+ /// insert operations.
+ unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
+ assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
+ "Can only insert subvectors into vectors");
+ int NumSubElts = SubTy->getVectorNumElements();
+ assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
+ "SK_InsertSubvector index out of range");
+
+ unsigned Cost = 0;
+ // Subvector insertion cost is equal to the cost of extracting element from
+ // the source type plus the cost of inserting them into the result vector
+ // type.
+ for (int i = 0; i != NumSubElts; ++i) {
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::ExtractElement, SubTy, i);
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::InsertElement, Ty, i + Index);
+ }
+ return Cost;
+ }
+
/// Local query method delegates up to T which *must* implement this!
const TargetSubtargetInfo *getST() const {
return static_cast<const T *>(this)->getST();
@@ -554,14 +615,20 @@ public:
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
switch (Kind) {
+ case TTI::SK_Broadcast:
+ return getBroadcastShuffleOverhead(Tp);
case TTI::SK_Select:
+ case TTI::SK_Reverse:
case TTI::SK_Transpose:
case TTI::SK_PermuteSingleSrc:
case TTI::SK_PermuteTwoSrc:
return getPermuteShuffleOverhead(Tp);
- default:
- return 1;
+ case TTI::SK_ExtractSubvector:
+ return getExtractSubvectorOverhead(Tp, Index, SubTp);
+ case TTI::SK_InsertSubvector:
+ return getInsertSubvectorOverhead(Tp, Index, SubTp);
}
+ llvm_unreachable("Unknown TTI::ShuffleKind");
}
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -783,8 +850,9 @@ public:
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned Alignment, unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false) {
VectorType *VT = dyn_cast<VectorType>(VecTy);
assert(VT && "Expect a vector type for interleaved memory op");
@@ -795,8 +863,13 @@ public:
VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
// Firstly, the cost of load/store operation.
- unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
- Opcode, VecTy, Alignment, AddressSpace);
+ unsigned Cost;
+ if (UseMaskForCond || UseMaskForGaps)
+ Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
+ Opcode, VecTy, Alignment, AddressSpace);
+ else
+ Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
+ AddressSpace);
// Legalize the vector type, and get the legalized and unlegalized type
// sizes.
@@ -892,6 +965,40 @@ public:
->getVectorInstrCost(Instruction::InsertElement, VT, i);
}
+ if (!UseMaskForCond)
+ return Cost;
+
+ Type *I8Type = Type::getInt8Ty(VT->getContext());
+ VectorType *MaskVT = VectorType::get(I8Type, NumElts);
+ SubVT = VectorType::get(I8Type, NumSubElts);
+
+ // The Mask shuffling cost is extract all the elements of the Mask
+ // and insert each of them Factor times into the wide vector:
+ //
+ // E.g. an interleaved group with factor 3:
+ // %mask = icmp ult <8 x i32> %vec1, %vec2
+ // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
+ // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
+ // The cost is estimated as extract all mask elements from the <8xi1> mask
+ // vector and insert them factor times into the <24xi1> shuffled mask
+ // vector.
+ for (unsigned i = 0; i < NumSubElts; i++)
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::ExtractElement, SubVT, i);
+
+ for (unsigned i = 0; i < NumElts; i++)
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::InsertElement, MaskVT, i);
+
+ // The Gaps mask is invariant and created outside the loop, therefore the
+ // cost of creating it is not accounted for here. However if we have both
+ // a MaskForGaps and some other mask that guards the execution of the
+ // memory access, we need to account for the cost of And-ing the two masks
+ // inside the loop.
+ if (UseMaskForGaps)
+ Cost += static_cast<T *>(this)->getArithmeticInstrCost(
+ BinaryOperator::And, MaskVT);
+
return Cost;
}
@@ -901,6 +1008,7 @@ public:
unsigned VF = 1) {
unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+ auto *ConcreteTTI = static_cast<T *>(this);
switch (IID) {
default: {
@@ -926,29 +1034,24 @@ public:
ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
}
- return static_cast<T *>(this)->
- getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
+ return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
+ ScalarizationCost);
}
case Intrinsic::masked_scatter: {
assert(VF == 1 && "Can't vectorize types here.");
Value *Mask = Args[3];
bool VarMask = !isa<Constant>(Mask);
unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
- return
- static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
- Args[0]->getType(),
- Args[1], VarMask,
- Alignment);
+ return ConcreteTTI->getGatherScatterOpCost(
+ Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
}
case Intrinsic::masked_gather: {
assert(VF == 1 && "Can't vectorize types here.");
Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);
unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
- return
- static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
- RetTy, Args[0], VarMask,
- Alignment);
+ return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
+ Args[0], VarMask, Alignment);
}
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
@@ -964,6 +1067,45 @@ public:
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ Value *X = Args[0];
+ Value *Y = Args[1];
+ Value *Z = Args[2];
+ TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
+ TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
+ TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
+ TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
+ TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
+ OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
+ : TTI::OP_None;
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
+ OpKindX, OpKindZ, OpPropsX);
+ Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
+ OpKindY, OpKindZ, OpPropsY);
+ // Non-constant shift amounts requires a modulo.
+ if (OpKindZ != TTI::OK_UniformConstantValue &&
+ OpKindZ != TTI::OK_NonUniformConstantValue)
+ Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
+ OpKindZ, OpKindBW, OpPropsZ,
+ OpPropsBW);
+ // For non-rotates (X != Y) we must add shift-by-zero handling costs.
+ if (X != Y) {
+ Type *CondTy = Type::getInt1Ty(RetTy->getContext());
+ if (RetVF > 1)
+ CondTy = VectorType::get(CondTy, RetVF);
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
+ CondTy, nullptr);
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+ CondTy, nullptr);
+ }
+ return Cost;
+ }
}
}
@@ -1036,15 +1178,18 @@ public:
case Intrinsic::fabs:
ISDs.push_back(ISD::FABS);
break;
+ case Intrinsic::canonicalize:
+ ISDs.push_back(ISD::FCANONICALIZE);
+ break;
case Intrinsic::minnum:
ISDs.push_back(ISD::FMINNUM);
if (FMF.noNaNs())
- ISDs.push_back(ISD::FMINNAN);
+ ISDs.push_back(ISD::FMINIMUM);
break;
case Intrinsic::maxnum:
ISDs.push_back(ISD::FMAXNUM);
if (FMF.noNaNs())
- ISDs.push_back(ISD::FMAXNAN);
+ ISDs.push_back(ISD::FMAXIMUM);
break;
case Intrinsic::copysign:
ISDs.push_back(ISD::FCOPYSIGN);
@@ -1136,7 +1281,8 @@ public:
SmallVector<unsigned, 2> CustomCost;
for (unsigned ISD : ISDs) {
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
- if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
+ if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
+ TLI->isFAbsFree(LT.second)) {
return 0;
}
@@ -1280,24 +1426,36 @@ public:
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
+ Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
// Assume the pairwise shuffles add a cost.
ShuffleCost += (IsPairwise + 1) *
ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
- NumVecElts, Ty);
- ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
- Ty = VectorType::get(ScalarTy, NumVecElts);
+ NumVecElts, SubTy);
+ ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
+ Ty = SubTy;
++LongVectorCount;
}
+
+ NumReduxLevels -= LongVectorCount;
+
// The minimal length of the vector is limited by the real length of vector
// operations performed on the current platform. That's why several final
// reduction operations are performed on the vectors with the same
// architecture-dependent length.
- ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
- ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
- NumVecElts, Ty);
- ArithCost += (NumReduxLevels - LongVectorCount) *
+
+ // Non pairwise reductions need one shuffle per reduction level. Pairwise
+ // reductions need two shuffles on every level, but the last one. On that
+ // level one of the shuffles is <0, u, u, ...> which is identity.
+ unsigned NumShuffles = NumReduxLevels;
+ if (IsPairwise && NumReduxLevels >= 1)
+ NumShuffles += NumReduxLevels - 1;
+ ShuffleCost += NumShuffles *
+ ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
+ 0, Ty);
+ ArithCost += NumReduxLevels *
ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
- return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
+ return ShuffleCost + ArithCost +
+ ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
/// Try to calculate op costs for min/max reduction operations.
@@ -1327,37 +1485,46 @@ public:
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
+ Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
+ CondTy = VectorType::get(ScalarCondTy, NumVecElts);
+
// Assume the pairwise shuffles add a cost.
ShuffleCost += (IsPairwise + 1) *
ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
- NumVecElts, Ty);
+ NumVecElts, SubTy);
MinMaxCost +=
- ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
- ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
+ ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
+ ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
nullptr);
- Ty = VectorType::get(ScalarTy, NumVecElts);
- CondTy = VectorType::get(ScalarCondTy, NumVecElts);
+ Ty = SubTy;
++LongVectorCount;
}
+
+ NumReduxLevels -= LongVectorCount;
+
// The minimal length of the vector is limited by the real length of vector
// operations performed on the current platform. That's why several final
// reduction opertions are perfomed on the vectors with the same
// architecture-dependent length.
- ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
- ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
- NumVecElts, Ty);
+
+ // Non pairwise reductions need one shuffle per reduction level. Pairwise
+ // reductions need two shuffles on every level, but the last one. On that
+ // level one of the shuffles is <0, u, u, ...> which is identity.
+ unsigned NumShuffles = NumReduxLevels;
+ if (IsPairwise && NumReduxLevels >= 1)
+ NumShuffles += NumReduxLevels - 1;
+ ShuffleCost += NumShuffles *
+ ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
+ 0, Ty);
MinMaxCost +=
- (NumReduxLevels - LongVectorCount) *
+ NumReduxLevels *
(ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
nullptr));
- // Need 3 extractelement instructions for scalarization + an additional
- // scalar select instruction.
+ // The last min/max should be in vector registers and we counted it above.
+ // So just need a single extractelement.
return ShuffleCost + MinMaxCost +
- 3 * getScalarizationOverhead(Ty, /*Insert=*/false,
- /*Extract=*/true) +
- ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
- ScalarCondTy, nullptr);
+ ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
unsigned getVectorSplitCost() { return 1; }
diff --git a/contrib/llvm/include/llvm/CodeGen/GCs.h b/contrib/llvm/include/llvm/CodeGen/BuiltinGCs.h
index 5207f801c84e..1767922fb5ac 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCs.h
+++ b/contrib/llvm/include/llvm/CodeGen/BuiltinGCs.h
@@ -1,4 +1,4 @@
-//===-- GCs.h - Garbage collector linkage hacks ---------------------------===//
+//===-- BuiltinGCs.h - Garbage collector linkage hacks --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains hack functions to force linking in the GC components.
+// This file contains hack functions to force linking in the builtin GC
+// components.
//
//===----------------------------------------------------------------------===//
@@ -15,32 +16,18 @@
#define LLVM_CODEGEN_GCS_H
namespace llvm {
-class GCStrategy;
-class GCMetadataPrinter;
/// FIXME: Collector instances are not useful on their own. These no longer
/// serve any purpose except to link in the plugins.
-/// Creates a CoreCLR-compatible garbage collector.
-void linkCoreCLRGC();
-
-/// Creates an ocaml-compatible garbage collector.
-void linkOcamlGC();
+/// Ensure the definition of the builtin GCs gets linked in
+void linkAllBuiltinGCs();
/// Creates an ocaml-compatible metadata printer.
void linkOcamlGCPrinter();
-/// Creates an erlang-compatible garbage collector.
-void linkErlangGC();
-
/// Creates an erlang-compatible metadata printer.
void linkErlangGCPrinter();
-
-/// Creates a shadow stack garbage collector. This collector requires no code
-/// generator support.
-void linkShadowStackGC();
-
-void linkStatepointExampleGC();
}
#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/CommandFlags.inc b/contrib/llvm/include/llvm/CodeGen/CommandFlags.inc
index 7d2d167289e0..568d329a5e8c 100644
--- a/contrib/llvm/include/llvm/CodeGen/CommandFlags.inc
+++ b/contrib/llvm/include/llvm/CodeGen/CommandFlags.inc
@@ -74,7 +74,8 @@ static cl::opt<ThreadModel::Model> TMModel(
static cl::opt<llvm::CodeModel::Model> CMModel(
"code-model", cl::desc("Choose code model"),
- cl::values(clEnumValN(CodeModel::Small, "small", "Small code model"),
+ cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"),
+ clEnumValN(CodeModel::Small, "small", "Small code model"),
clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"),
clEnumValN(CodeModel::Medium, "medium", "Medium code model"),
clEnumValN(CodeModel::Large, "large", "Large code model")));
@@ -113,10 +114,16 @@ static cl::opt<TargetMachine::CodeGenFileType> FileType(
clEnumValN(TargetMachine::CGFT_Null, "null",
"Emit nothing, for performance testing")));
-static cl::opt<bool>
- DisableFPElim("disable-fp-elim",
- cl::desc("Disable frame pointer elimination optimization"),
- cl::init(false));
+static cl::opt<llvm::FramePointer::FP> FramePointerUsage(
+ "frame-pointer", cl::desc("Specify frame pointer elimination optimization"),
+ cl::init(llvm::FramePointer::None),
+ cl::values(
+ clEnumValN(llvm::FramePointer::All, "all",
+ "Disable frame pointer elimination"),
+ clEnumValN(llvm::FramePointer::NonLeaf, "non-leaf",
+ "Disable frame pointer elimination for non-leaf frame"),
+ clEnumValN(llvm::FramePointer::None, "none",
+ "Enable frame pointer elimination")));
static cl::opt<bool> EnableUnsafeFPMath(
"enable-unsafe-fp-math",
@@ -367,9 +374,14 @@ setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) {
NewAttrs.addAttribute("target-cpu", CPU);
if (!Features.empty())
NewAttrs.addAttribute("target-features", Features);
- if (DisableFPElim.getNumOccurrences() > 0)
- NewAttrs.addAttribute("no-frame-pointer-elim",
- DisableFPElim ? "true" : "false");
+ if (FramePointerUsage.getNumOccurrences() > 0) {
+ if (FramePointerUsage == llvm::FramePointer::All)
+ NewAttrs.addAttribute("frame-pointer", "all");
+ else if (FramePointerUsage == llvm::FramePointer::NonLeaf)
+ NewAttrs.addAttribute("frame-pointer", "non-leaf");
+ else if (FramePointerUsage == llvm::FramePointer::None)
+ NewAttrs.addAttribute("frame-pointer", "none");
+ }
if (DisableTailCalls.getNumOccurrences() > 0)
NewAttrs.addAttribute("disable-tail-calls",
toStringRef(DisableTailCalls));
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
index a262cb38b175..befc28f084e7 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/contrib/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ------*- C++ -*-===//
+//===- llvm/CodeGen/DbgEntityHistoryCalculator.h ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
+#ifndef LLVM_CODEGEN_DBGVALUEHISTORYCALCULATOR_H
+#define LLVM_CODEGEN_DBGVALUEHISTORYCALCULATOR_H
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -33,20 +33,19 @@ class DbgValueHistoryMap {
public:
using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>;
using InstrRanges = SmallVector<InstrRange, 4>;
- using InlinedVariable =
- std::pair<const DILocalVariable *, const DILocation *>;
- using InstrRangesMap = MapVector<InlinedVariable, InstrRanges>;
+ using InlinedEntity = std::pair<const DINode *, const DILocation *>;
+ using InstrRangesMap = MapVector<InlinedEntity, InstrRanges>;
private:
InstrRangesMap VarInstrRanges;
public:
- void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
- void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
+ void startInstrRange(InlinedEntity Var, const MachineInstr &MI);
+ void endInstrRange(InlinedEntity Var, const MachineInstr &MI);
// Returns register currently describing @Var. If @Var is currently
// unaccessible or is not described by a register, returns 0.
- unsigned getRegisterForVar(InlinedVariable Var) const;
+ unsigned getRegisterForVar(InlinedEntity Var) const;
bool empty() const { return VarInstrRanges.empty(); }
void clear() { VarInstrRanges.clear(); }
@@ -58,10 +57,31 @@ public:
#endif
};
-void calculateDbgValueHistory(const MachineFunction *MF,
- const TargetRegisterInfo *TRI,
- DbgValueHistoryMap &Result);
+/// For each inlined instance of a source-level label, keep the corresponding
+/// DBG_LABEL instruction. The DBG_LABEL instruction could be used to generate
+/// a temporary (assembler) label before it.
+class DbgLabelInstrMap {
+public:
+ using InlinedEntity = std::pair<const DINode *, const DILocation *>;
+ using InstrMap = MapVector<InlinedEntity, const MachineInstr *>;
+
+private:
+ InstrMap LabelInstr;
+
+public:
+ void addInstr(InlinedEntity Label, const MachineInstr &MI);
+
+ bool empty() const { return LabelInstr.empty(); }
+ void clear() { LabelInstr.clear(); }
+ InstrMap::const_iterator begin() const { return LabelInstr.begin(); }
+ InstrMap::const_iterator end() const { return LabelInstr.end(); }
+};
+
+void calculateDbgEntityHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &DbgValues,
+ DbgLabelInstrMap &DbgLabels);
} // end namespace llvm
-#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
+#endif // LLVM_CODEGEN_DBGVALUEHISTORYCALCULATOR_H
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/include/llvm/CodeGen/DebugHandlerBase.h
index 1ccefe32be75..4f0d14d317f2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/contrib/llvm/include/llvm/CodeGen/DebugHandlerBase.h
@@ -1,4 +1,4 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h --------*- C++ -*--===//
+//===-- llvm/CodeGen/DebugHandlerBase.h -----------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
+#ifndef LLVM_CODEGEN_DEBUGHANDLERBASE_H
+#define LLVM_CODEGEN_DEBUGHANDLERBASE_H
-#include "AsmPrinterHandler.h"
-#include "DbgValueHistoryCalculator.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -82,6 +82,9 @@ protected:
/// variable. Variables are listed in order of appearance.
DbgValueHistoryMap DbgValues;
+ /// Mapping of inlined labels and DBG_LABEL machine instruction.
+ DbgLabelInstrMap DbgLabels;
+
/// Maps instruction with label emitted before instruction.
/// FIXME: Make this private from DwarfDebug, we have the necessary accessors
/// for it.
@@ -122,6 +125,10 @@ public:
/// Return Label immediately following the instruction.
MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+ /// Return the function-local offset of an instruction. A label for the
+ /// instruction \p MI should exist (\ref getLabelAfterInsn).
+ const MCExpr *getFunctionLocalOffsetAfterInsn(const MachineInstr *MI);
+
/// If this type is derived from a base type then return base type size.
static uint64_t getBaseTypeSize(const DITypeRef TyRef);
};
diff --git a/contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h b/contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
index e6c0483cfc35..8b1a7af17bbf 100644
--- a/contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -10,6 +10,7 @@
#ifndef LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
#define LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/StringMap.h"
namespace llvm {
@@ -18,34 +19,52 @@ class MCSymbol;
/// Data for a string pool entry.
struct DwarfStringPoolEntry {
+ static constexpr unsigned NotIndexed = -1;
+
MCSymbol *Symbol;
unsigned Offset;
unsigned Index;
+
+ bool isIndexed() const { return Index != NotIndexed; }
};
/// String pool entry reference.
-struct DwarfStringPoolEntryRef {
- const StringMapEntry<DwarfStringPoolEntry> *I = nullptr;
+class DwarfStringPoolEntryRef {
+ PointerIntPair<const StringMapEntry<DwarfStringPoolEntry> *, 1, bool>
+ MapEntryAndIndexed;
+
+ const StringMapEntry<DwarfStringPoolEntry> *getMapEntry() const {
+ return MapEntryAndIndexed.getPointer();
+ }
public:
DwarfStringPoolEntryRef() = default;
- explicit DwarfStringPoolEntryRef(
- const StringMapEntry<DwarfStringPoolEntry> &I)
- : I(&I) {}
+ DwarfStringPoolEntryRef(const StringMapEntry<DwarfStringPoolEntry> &Entry,
+ bool Indexed)
+ : MapEntryAndIndexed(&Entry, Indexed) {}
- explicit operator bool() const { return I; }
+ explicit operator bool() const { return getMapEntry(); }
MCSymbol *getSymbol() const {
- assert(I->second.Symbol && "No symbol available!");
- return I->second.Symbol;
+ assert(getMapEntry()->second.Symbol && "No symbol available!");
+ return getMapEntry()->second.Symbol;
}
- unsigned getOffset() const { return I->second.Offset; }
- unsigned getIndex() const { return I->second.Index; }
- StringRef getString() const { return I->first(); }
+ unsigned getOffset() const { return getMapEntry()->second.Offset; }
+ bool isIndexed() const { return MapEntryAndIndexed.getInt(); }
+ unsigned getIndex() const {
+ assert(isIndexed());
+ assert(getMapEntry()->getValue().isIndexed());
+ return getMapEntry()->second.Index;
+ }
+ StringRef getString() const { return getMapEntry()->first(); }
/// Return the entire string pool entry for convenience.
- DwarfStringPoolEntry getEntry() const { return I->getValue(); }
+ DwarfStringPoolEntry getEntry() const { return getMapEntry()->getValue(); }
- bool operator==(const DwarfStringPoolEntryRef &X) const { return I == X.I; }
- bool operator!=(const DwarfStringPoolEntryRef &X) const { return I != X.I; }
+ bool operator==(const DwarfStringPoolEntryRef &X) const {
+ return getMapEntry() == X.getMapEntry();
+ }
+ bool operator!=(const DwarfStringPoolEntryRef &X) const {
+ return getMapEntry() != X.getMapEntry();
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 2da00b7d61ab..7c658515de09 100644
--- a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -1,4 +1,4 @@
-//===- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen ---===//
+//===- FunctionLoweringInfo.h - Lower functions from LLVM IR ---*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -246,6 +246,7 @@ public:
return 0;
unsigned &R = ValueMap[V];
assert(R == 0 && "Already initialized this value register!");
+ assert(VirtReg2Value.empty());
return R = CreateRegs(V->getType());
}
diff --git a/contrib/llvm/include/llvm/CodeGen/GCMetadata.h b/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
index ad2599fc120e..7fb27202c122 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
@@ -55,12 +55,11 @@ class MCSymbol;
/// GCPoint - Metadata for a collector-safe point in machine code.
///
struct GCPoint {
- GC::PointKind Kind; ///< The kind of the safe point.
MCSymbol *Label; ///< A label.
DebugLoc Loc;
- GCPoint(GC::PointKind K, MCSymbol *L, DebugLoc DL)
- : Kind(K), Label(L), Loc(std::move(DL)) {}
+ GCPoint(MCSymbol *L, DebugLoc DL)
+ : Label(L), Loc(std::move(DL)) {}
};
/// GCRoot - Metadata for a pointer to an object managed by the garbage
@@ -124,8 +123,8 @@ public:
/// addSafePoint - Notes the existence of a safe point. Num is the ID of the
/// label just prior to the safe point (if the code generator is using
/// MachineModuleInfo).
- void addSafePoint(GC::PointKind Kind, MCSymbol *Label, const DebugLoc &DL) {
- SafePoints.emplace_back(Kind, Label, DL);
+ void addSafePoint(MCSymbol *Label, const DebugLoc &DL) {
+ SafePoints.emplace_back(Label, DL);
}
/// getFrameSize/setFrameSize - Records the function's frame size.
diff --git a/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h b/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
index 1cc69a7b71af..5f1efb2ce02c 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -29,6 +29,7 @@ class GCMetadataPrinter;
class GCModuleInfo;
class GCStrategy;
class Module;
+class StackMaps;
/// GCMetadataPrinterRegistry - The GC assembly printer registry uses all the
/// defaults from Registry.
@@ -60,6 +61,11 @@ public:
/// Called after the assembly for the module is generated by
/// the AsmPrinter (but before target specific hooks)
virtual void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {}
+
+ /// Called when the stack maps are generated. Return true if
+ /// stack maps with a custom format are generated. Otherwise
+ /// returns false and the default format will be used.
+ virtual bool emitStackMaps(StackMaps &SM, AsmPrinter &AP) { return false; }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
index f835bacfb548..5a60cd7cb823 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
@@ -59,19 +59,6 @@ namespace llvm {
class Type;
-namespace GC {
-
-/// PointKind - Used to indicate whether the address of the call instruction
-/// or the address after the call instruction is listed in the stackmap. For
-/// most runtimes, PostCall safepoints are appropriate.
-///
-enum PointKind {
- PreCall, ///< Instr is a call instruction.
- PostCall ///< Instr is the return address of a call.
-};
-
-} // end namespace GC
-
/// GCStrategy describes a garbage collector algorithm's code generation
/// requirements, and provides overridable hooks for those needs which cannot
/// be abstractly described. GCStrategy objects must be looked up through
@@ -88,11 +75,7 @@ protected:
/// if set, none of the other options can be
/// anything but their default values.
- unsigned NeededSafePoints = 0; ///< Bitmask of required safe points.
- bool CustomReadBarriers = false; ///< Default is to insert loads.
- bool CustomWriteBarriers = false; ///< Default is to insert stores.
- bool CustomRoots = false; ///< Default is to pass through to backend.
- bool InitRoots= true; ///< If set, roots are nulled during lowering.
+ bool NeededSafePoints = false; ///< if set, calls are inferred to be safepoints
bool UsesMetadata = false; ///< If set, backend must emit metadata tables.
public:
@@ -103,16 +86,6 @@ public:
/// name string specified on functions which use this strategy.
const std::string &getName() const { return Name; }
- /// By default, write barriers are replaced with simple store
- /// instructions. If true, you must provide a custom pass to lower
- /// calls to \@llvm.gcwrite.
- bool customWriteBarrier() const { return CustomWriteBarriers; }
-
- /// By default, read barriers are replaced with simple load
- /// instructions. If true, you must provide a custom pass to lower
- /// calls to \@llvm.gcread.
- bool customReadBarrier() const { return CustomReadBarriers; }
-
/// Returns true if this strategy is expecting the use of gc.statepoints,
/// and false otherwise.
bool useStatepoints() const { return UseStatepoints; }
@@ -135,25 +108,8 @@ public:
*/
///@{
- /// True if safe points of any kind are required. By default, none are
- /// recorded.
- bool needsSafePoints() const { return NeededSafePoints != 0; }
-
- /// True if the given kind of safe point is required. By default, none are
- /// recorded.
- bool needsSafePoint(GC::PointKind Kind) const {
- return (NeededSafePoints & 1 << Kind) != 0;
- }
-
- /// By default, roots are left for the code generator so it can generate a
- /// stack map. If true, you must provide a custom pass to lower
- /// calls to \@llvm.gcroot.
- bool customRoots() const { return CustomRoots; }
-
- /// If set, gcroot intrinsics should initialize their allocas to null
- /// before the first use. This is necessary for most GCs and is enabled by
- /// default.
- bool initializeRoots() const { return InitRoots; }
+ /// True if safe points need to be inferred on call sites
+ bool needsSafePoints() const { return NeededSafePoints; }
/// If set, appropriate metadata tables must be emitted by the back-end
/// (assembler, JIT, or otherwise). For statepoint, this method is
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
new file mode 100644
index 000000000000..ce2d285a99e5
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
@@ -0,0 +1,237 @@
+//===- llvm/CodeGen/GlobalISel/CSEInfo.h ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Provides analysis for continuously CSEing during GISel passes.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_CSEINFO_H
+#define LLVM_CODEGEN_GLOBALISEL_CSEINFO_H
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+/// A class that wraps MachineInstrs and derives from FoldingSetNode in order to
+/// be uniqued in a CSEMap. The tradeoff here is extra memory allocations for
+/// UniqueMachineInstr vs making MachineInstr bigger.
+class UniqueMachineInstr : public FoldingSetNode {
+ friend class GISelCSEInfo;
+ const MachineInstr *MI;
+ explicit UniqueMachineInstr(const MachineInstr *MI) : MI(MI) {}
+
+public:
+ void Profile(FoldingSetNodeID &ID);
+};
+
+// Class representing some configuration that can be done during CSE analysis.
+// Currently it only supports shouldCSE method that each pass can set.
+class CSEConfig {
+public:
+ virtual ~CSEConfig() = default;
+ // Hook for defining which Generic instructions should be CSEd.
+ // GISelCSEInfo currently only calls this hook when dealing with generic
+ // opcodes.
+ virtual bool shouldCSEOpc(unsigned Opc);
+};
+
+// TODO: Find a better place for this.
+// Commonly used for O0 config.
+class CSEConfigConstantOnly : public CSEConfig {
+public:
+ virtual ~CSEConfigConstantOnly() = default;
+ virtual bool shouldCSEOpc(unsigned Opc) override;
+};
+
+/// The CSE Analysis object.
+/// This installs itself as a delegate to the MachineFunction to track
+/// new instructions as well as deletions. It however will not be able to
+/// track instruction mutations. In such cases, recordNewInstruction should be
+/// called (for eg inside MachineIRBuilder::recordInsertion).
+/// Also because of how just the instruction can be inserted without adding any
+/// operands to the instruction, instructions are uniqued and inserted lazily.
+/// CSEInfo should assert when trying to enter an incomplete instruction into
+/// the CSEMap. There is Opcode level granularity on which instructions can be
+/// CSE'd and for now, only Generic instructions are CSEable.
+class GISelCSEInfo : public GISelChangeObserver {
+ // Make it accessible only to CSEMIRBuilder.
+ friend class CSEMIRBuilder;
+
+ BumpPtrAllocator UniqueInstrAllocator;
+ FoldingSet<UniqueMachineInstr> CSEMap;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineFunction *MF = nullptr;
+ std::unique_ptr<CSEConfig> CSEOpt;
+ /// Keep a cache of UniqueInstrs for each MachineInstr. In GISel,
+ /// often instructions are mutated (while their ID has completely changed).
+ /// Whenever mutation happens, invalidate the UniqueMachineInstr for the
+ /// MachineInstr
+ DenseMap<const MachineInstr *, UniqueMachineInstr *> InstrMapping;
+
+ /// Store instructions that are not fully formed in TemporaryInsts.
+ /// Also because CSE insertion happens lazily, we can remove insts from this
+ /// list and avoid inserting and then removing from the CSEMap.
+ GISelWorkList<8> TemporaryInsts;
+
+ // Only used in asserts.
+ DenseMap<unsigned, unsigned> OpcodeHitTable;
+
+ bool isUniqueMachineInstValid(const UniqueMachineInstr &UMI) const;
+
+ void invalidateUniqueMachineInstr(UniqueMachineInstr *UMI);
+
+ UniqueMachineInstr *getNodeIfExists(FoldingSetNodeID &ID,
+ MachineBasicBlock *MBB, void *&InsertPos);
+
+ /// Allocate and construct a new UniqueMachineInstr for MI and return.
+ UniqueMachineInstr *getUniqueInstrForMI(const MachineInstr *MI);
+
+ void insertNode(UniqueMachineInstr *UMI, void *InsertPos = nullptr);
+
+ /// Get the MachineInstr(Unique) if it exists already in the CSEMap and the
+ /// same MachineBasicBlock.
+ MachineInstr *getMachineInstrIfExists(FoldingSetNodeID &ID,
+ MachineBasicBlock *MBB,
+ void *&InsertPos);
+
+ /// Use this method to allocate a new UniqueMachineInstr for MI and insert it
+ /// into the CSEMap. MI should return true for shouldCSE(MI->getOpcode())
+ void insertInstr(MachineInstr *MI, void *InsertPos = nullptr);
+
+public:
+ GISelCSEInfo() = default;
+
+ virtual ~GISelCSEInfo();
+
+ void setMF(MachineFunction &MF);
+
+ /// Records a newly created inst in a list and lazily insert it to the CSEMap.
+ /// Sometimes, this method might be called with a partially constructed
+ /// MachineInstr,
+ // (right after BuildMI without adding any operands) - and in such cases,
+ // defer the hashing of the instruction to a later stage.
+ void recordNewInstruction(MachineInstr *MI);
+
+ /// Use this callback to inform CSE about a newly fully created instruction.
+ void handleRecordedInst(MachineInstr *MI);
+
+ /// Use this callback to insert all the recorded instructions. At this point,
+ /// all of these insts need to be fully constructed and should not be missing
+ /// any operands.
+ void handleRecordedInsts();
+
+ /// Remove this inst from the CSE map. If this inst has not been inserted yet,
+ /// it will be removed from the Tempinsts list if it exists.
+ void handleRemoveInst(MachineInstr *MI);
+
+ void releaseMemory();
+
+ void setCSEConfig(std::unique_ptr<CSEConfig> Opt) { CSEOpt = std::move(Opt); }
+
+ bool shouldCSE(unsigned Opc) const;
+
+ void analyze(MachineFunction &MF);
+
+ void countOpcodeHit(unsigned Opc);
+
+ void print();
+
+ // Observer API
+ void erasingInstr(MachineInstr &MI) override;
+ void createdInstr(MachineInstr &MI) override;
+ void changingInstr(MachineInstr &MI) override;
+ void changedInstr(MachineInstr &MI) override;
+};
+
+class TargetRegisterClass;
+class RegisterBank;
+
+// Simple builder class to easily profile properties about MIs.
+class GISelInstProfileBuilder {
+ FoldingSetNodeID &ID;
+ const MachineRegisterInfo &MRI;
+
+public:
+ GISelInstProfileBuilder(FoldingSetNodeID &ID, const MachineRegisterInfo &MRI)
+ : ID(ID), MRI(MRI) {}
+ // Profiling methods.
+ const GISelInstProfileBuilder &addNodeIDOpcode(unsigned Opc) const;
+ const GISelInstProfileBuilder &addNodeIDRegType(const LLT &Ty) const;
+ const GISelInstProfileBuilder &addNodeIDRegType(const unsigned) const;
+
+ const GISelInstProfileBuilder &
+ addNodeIDRegType(const TargetRegisterClass *RC) const;
+ const GISelInstProfileBuilder &addNodeIDRegType(const RegisterBank *RB) const;
+
+ const GISelInstProfileBuilder &addNodeIDRegNum(unsigned Reg) const;
+
+ const GISelInstProfileBuilder &addNodeIDImmediate(int64_t Imm) const;
+ const GISelInstProfileBuilder &
+ addNodeIDMBB(const MachineBasicBlock *MBB) const;
+
+ const GISelInstProfileBuilder &
+ addNodeIDMachineOperand(const MachineOperand &MO) const;
+
+ const GISelInstProfileBuilder &addNodeIDFlag(unsigned Flag) const;
+ const GISelInstProfileBuilder &addNodeID(const MachineInstr *MI) const;
+};
+
+/// Simple wrapper that does the following.
+/// 1) Lazily evaluate the MachineFunction to compute CSEable instructions.
+/// 2) Allows configuration of which instructions are CSEd through CSEConfig
+/// object. Provides a method called get which takes a CSEConfig object.
+class GISelCSEAnalysisWrapper {
+ GISelCSEInfo Info;
+ MachineFunction *MF = nullptr;
+ bool AlreadyComputed = false;
+
+public:
+ /// Takes a CSEConfig object that defines what opcodes get CSEd.
+ /// If CSEConfig is already set, and the CSE Analysis has been preserved,
+ /// it will not use the new CSEOpt(use Recompute to force using the new
+ /// CSEOpt).
+ GISelCSEInfo &get(std::unique_ptr<CSEConfig> CSEOpt, bool ReCompute = false);
+ void setMF(MachineFunction &MFunc) { MF = &MFunc; }
+ void setComputed(bool Computed) { AlreadyComputed = Computed; }
+ void releaseMemory() { Info.releaseMemory(); }
+};
+
+/// The actual analysis pass wrapper.
+class GISelCSEAnalysisWrapperPass : public MachineFunctionPass {
+ GISelCSEAnalysisWrapper Wrapper;
+
+public:
+ static char ID;
+ GISelCSEAnalysisWrapperPass() : MachineFunctionPass(ID) {
+ initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ const GISelCSEAnalysisWrapper &getCSEWrapper() const { return Wrapper; }
+ GISelCSEAnalysisWrapper &getCSEWrapper() { return Wrapper; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void releaseMemory() override {
+ Wrapper.releaseMemory();
+ Wrapper.setComputed(false);
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
new file mode 100644
index 000000000000..a8fb736ebbb5
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
@@ -0,0 +1,110 @@
+//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.h --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a version of MachineIRBuilder which CSEs insts within
+/// a MachineBasicBlock.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_CSEMIRBUILDER_H
+#define LLVM_CODEGEN_GLOBALISEL_CSEMIRBUILDER_H
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+
+namespace llvm {
+
+/// Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
+/// Eg usage.
+///
+///
+/// GISelCSEInfo *Info =
+/// &getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEInfo(); CSEMIRBuilder
+/// CB(Builder.getState()); CB.setCSEInfo(Info); auto A = CB.buildConstant(s32,
+/// 42); auto B = CB.buildConstant(s32, 42); assert(A == B); unsigned CReg =
+/// MRI.createGenericVirtualRegister(s32); auto C = CB.buildConstant(CReg, 42);
+/// assert(C->getOpcode() == TargetOpcode::COPY);
+/// Explicitly passing in a register would materialize a copy if possible.
+/// CSEMIRBuilder also does trivial constant folding for binary ops.
+class CSEMIRBuilder : public MachineIRBuilder {
+
+ /// Returns true if A dominates B (within the same basic block).
+ /// Both iterators must be in the same basic block.
+ //
+ // TODO: Another approach for checking dominance is having two iterators and
+ // making them go towards each other until they meet or reach begin/end. Which
+ // approach is better? Should this even change dynamically? For G_CONSTANTS
+ // most of which will be at the top of the BB, the top down approach would be
+ // a better choice. Does IRTranslator placing constants at the beginning still
+ // make sense? Should this change based on Opcode?
+ bool dominates(MachineBasicBlock::const_iterator A,
+ MachineBasicBlock::const_iterator B) const;
+
+ /// For given ID, find a machineinstr in the CSE Map. If found, check if it
+ /// dominates the current insertion point and if not, move it just before the
+ /// current insertion point and return it. If not found, return Null
+ /// MachineInstrBuilder.
+ MachineInstrBuilder getDominatingInstrForID(FoldingSetNodeID &ID,
+ void *&NodeInsertPos);
+ /// Simple check if we can CSE (we have the CSEInfo) or if this Opcode is
+ /// safe to CSE.
+ bool canPerformCSEForOpc(unsigned Opc) const;
+
+ void profileDstOp(const DstOp &Op, GISelInstProfileBuilder &B) const;
+
+ void profileDstOps(ArrayRef<DstOp> Ops, GISelInstProfileBuilder &B) const {
+ for (const DstOp &Op : Ops)
+ profileDstOp(Op, B);
+ }
+
+ void profileSrcOp(const SrcOp &Op, GISelInstProfileBuilder &B) const;
+
+ void profileSrcOps(ArrayRef<SrcOp> Ops, GISelInstProfileBuilder &B) const {
+ for (const SrcOp &Op : Ops)
+ profileSrcOp(Op, B);
+ }
+
+ void profileMBBOpcode(GISelInstProfileBuilder &B, unsigned Opc) const;
+
+ void profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps, Optional<unsigned> Flags,
+ GISelInstProfileBuilder &B) const;
+
+ // Takes a MachineInstrBuilder and inserts it into the CSEMap using the
+ // NodeInsertPos.
+ MachineInstrBuilder memoizeMI(MachineInstrBuilder MIB, void *NodeInsertPos);
+
+ // If we have can CSE an instruction, but still need to materialize to a VReg,
+ // we emit a copy from the CSE'd inst to the VReg.
+ MachineInstrBuilder generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
+ MachineInstrBuilder &MIB);
+
+ // If we have can CSE an instruction, but still need to materialize to a VReg,
+ // check if we can generate copies. It's not possible to return a single MIB,
+ // while emitting copies to multiple vregs.
+ bool checkCopyToDefsPossible(ArrayRef<DstOp> DstOps);
+
+public:
+ // Pull in base class constructors.
+ using MachineIRBuilder::MachineIRBuilder;
+ // Unhide buildInstr
+ MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flag = None) override;
+ // Bring in the other overload from the base class.
+ using MachineIRBuilder::buildConstant;
+
+ MachineInstrBuilder buildConstant(const DstOp &Res,
+ const ConstantInt &Val) override;
+
+ // Bring in the other overload from the base class.
+ using MachineIRBuilder::buildFConstant;
+ MachineInstrBuilder buildFConstant(const DstOp &Res,
+ const ConstantFP &Val) override;
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 58eb412d8c24..ab498e8f070b 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -40,6 +40,7 @@ class Value;
class CallLowering {
const TargetLowering *TLI;
+ virtual void anchor();
public:
struct ArgInfo {
unsigned Reg;
@@ -108,6 +109,9 @@ public:
MachineIRBuilder &MIRBuilder;
MachineRegisterInfo &MRI;
CCAssignFn *AssignFn;
+
+ private:
+ virtual void anchor();
};
protected:
@@ -138,12 +142,12 @@ public:
virtual ~CallLowering() = default;
/// This hook must be implemented to lower outgoing return values, described
- /// by \p Val, into the specified virtual register \p VReg.
+ /// by \p Val, into the specified virtual registers \p VRegs.
/// This hook is used by GlobalISel.
///
/// \return True if the lowering succeeds, false otherwise.
- virtual bool lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
+ virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
return false;
}
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
index 36a33deb4a64..b097c7817762 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -21,6 +21,7 @@
namespace llvm {
class MachineRegisterInfo;
class CombinerInfo;
+class GISelCSEInfo;
class TargetPassConfig;
class MachineFunction;
@@ -28,14 +29,17 @@ class Combiner {
public:
Combiner(CombinerInfo &CombinerInfo, const TargetPassConfig *TPC);
- bool combineMachineInstrs(MachineFunction &MF);
+ /// If CSEInfo is not null, then the Combiner will setup observer for
+ /// CSEInfo and instantiate a CSEMIRBuilder. Pass nullptr if CSE is not
+ /// needed.
+ bool combineMachineInstrs(MachineFunction &MF, GISelCSEInfo *CSEInfo);
protected:
CombinerInfo &CInfo;
MachineRegisterInfo *MRI = nullptr;
const TargetPassConfig *TPC;
- MachineIRBuilder Builder;
+ std::unique_ptr<MachineIRBuilder> Builder;
};
} // End namespace llvm.
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5d5b8398452c..6e9ac01c1ee2 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -1,4 +1,4 @@
-//== llvm/CodeGen/GlobalISel/CombinerHelper.h -------------- -*- C++ -*-==//
+//===-- llvm/CodeGen/GlobalISel/CombinerHelper.h --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,21 +20,36 @@
namespace llvm {
+class GISelChangeObserver;
class MachineIRBuilder;
class MachineRegisterInfo;
class MachineInstr;
+class MachineOperand;
class CombinerHelper {
MachineIRBuilder &Builder;
MachineRegisterInfo &MRI;
+ GISelChangeObserver &Observer;
public:
- CombinerHelper(MachineIRBuilder &B);
+ CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B);
+
+ /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
+ void replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, unsigned ToReg) const;
+
+ /// Replace a single register operand with a new register and inform the
+ /// observer of the changes.
+ void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
+ unsigned ToReg) const;
/// If \p MI is COPY, try to combine it.
/// Returns true if MI changed.
bool tryCombineCopy(MachineInstr &MI);
+ /// If \p MI is extend that consumes the result of a load, try to combine it.
+ /// Returns true if MI changed.
+ bool tryCombineExtendingLoads(MachineInstr &MI);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 1d248547adbf..d21aa3f725d9 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -17,10 +17,12 @@
#include <cassert>
namespace llvm {
+class GISelChangeObserver;
class LegalizerInfo;
class MachineInstr;
class MachineIRBuilder;
class MachineRegisterInfo;
+
// Contains information relevant to enabling/disabling various combines for a
// pass.
class CombinerInfo {
@@ -41,7 +43,19 @@ public:
/// illegal ops that are created.
bool LegalizeIllegalOps; // TODO: Make use of this.
const LegalizerInfo *LInfo;
- virtual bool combine(MachineInstr &MI, MachineIRBuilder &B) const = 0;
+
+ /// Attempt to combine instructions using MI as the root.
+ ///
+ /// Use Observer to report the creation, modification, and erasure of
+ /// instructions. GISelChangeObserver will automatically report certain
+ /// kinds of operations. These operations are:
+ /// * Instructions that are newly inserted into the MachineFunction
+ /// * Instructions that are erased from the MachineFunction.
+ ///
+ /// However, it is important to report instruction modification and this is
+ /// not automatic.
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const = 0;
};
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
index 8d61f9a68279..220a571b21db 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
@@ -15,91 +15,20 @@
namespace llvm {
-static Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
- const unsigned Op2,
- const MachineRegisterInfo &MRI) {
- auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
- auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
- if (MaybeOp1Cst && MaybeOp2Cst) {
- LLT Ty = MRI.getType(Op1);
- APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
- APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
- switch (Opcode) {
- default:
- break;
- case TargetOpcode::G_ADD:
- return C1 + C2;
- case TargetOpcode::G_AND:
- return C1 & C2;
- case TargetOpcode::G_ASHR:
- return C1.ashr(C2);
- case TargetOpcode::G_LSHR:
- return C1.lshr(C2);
- case TargetOpcode::G_MUL:
- return C1 * C2;
- case TargetOpcode::G_OR:
- return C1 | C2;
- case TargetOpcode::G_SHL:
- return C1 << C2;
- case TargetOpcode::G_SUB:
- return C1 - C2;
- case TargetOpcode::G_XOR:
- return C1 ^ C2;
- case TargetOpcode::G_UDIV:
- if (!C2.getBoolValue())
- break;
- return C1.udiv(C2);
- case TargetOpcode::G_SDIV:
- if (!C2.getBoolValue())
- break;
- return C1.sdiv(C2);
- case TargetOpcode::G_UREM:
- if (!C2.getBoolValue())
- break;
- return C1.urem(C2);
- case TargetOpcode::G_SREM:
- if (!C2.getBoolValue())
- break;
- return C1.srem(C2);
- }
- }
- return None;
-}
-
/// An MIRBuilder which does trivial constant folding of binary ops.
/// Calls to buildInstr will also try to constant fold binary ops.
-class ConstantFoldingMIRBuilder
- : public FoldableInstructionsBuilder<ConstantFoldingMIRBuilder> {
+class ConstantFoldingMIRBuilder : public MachineIRBuilder {
public:
// Pull in base class constructors.
- using FoldableInstructionsBuilder<
- ConstantFoldingMIRBuilder>::FoldableInstructionsBuilder;
- // Unhide buildInstr
- using FoldableInstructionsBuilder<ConstantFoldingMIRBuilder>::buildInstr;
+ using MachineIRBuilder::MachineIRBuilder;
- // Implement buildBinaryOp required by FoldableInstructionsBuilder which
- // tries to constant fold.
- MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Dst,
- unsigned Src0, unsigned Src1) {
- validateBinaryOp(Dst, Src0, Src1);
- auto MaybeCst = ConstantFoldBinOp(Opcode, Src0, Src1, getMF().getRegInfo());
- if (MaybeCst)
- return buildConstant(Dst, MaybeCst->getSExtValue());
- return buildInstr(Opcode).addDef(Dst).addUse(Src0).addUse(Src1);
- }
-
- template <typename DstTy, typename UseArg1Ty, typename UseArg2Ty>
- MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, UseArg1Ty &&Arg1,
- UseArg2Ty &&Arg2) {
- unsigned Dst = getDestFromArg(Ty);
- return buildInstr(Opc, Dst, getRegFromArg(std::forward<UseArg1Ty>(Arg1)),
- getRegFromArg(std::forward<UseArg2Ty>(Arg2)));
- }
+ virtual ~ConstantFoldingMIRBuilder() = default;
// Try to provide an overload for buildInstr for binary ops in order to
// constant fold.
- MachineInstrBuilder buildInstr(unsigned Opc, unsigned Dst, unsigned Src0,
- unsigned Src1) {
+ MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flags = None) override {
switch (Opc) {
default:
break;
@@ -116,19 +45,18 @@ public:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UREM:
case TargetOpcode::G_SREM: {
- return buildBinaryOp(Opc, Dst, Src0, Src1);
+ assert(DstOps.size() == 1 && "Invalid dst ops");
+ assert(SrcOps.size() == 2 && "Invalid src ops");
+ const DstOp &Dst = DstOps[0];
+ const SrcOp &Src0 = SrcOps[0];
+ const SrcOp &Src1 = SrcOps[1];
+ if (auto MaybeCst =
+ ConstantFoldBinOp(Opc, Src0.getReg(), Src1.getReg(), *getMRI()))
+ return buildConstant(Dst, MaybeCst->getSExtValue());
+ break;
}
}
- return buildInstr(Opc).addDef(Dst).addUse(Src0).addUse(Src1);
- }
-
- // Fallback implementation of buildInstr.
- template <typename DstTy, typename... UseArgsTy>
- MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty,
- UseArgsTy &&... Args) {
- auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty));
- addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
- return MIB;
+ return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps);
}
};
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
new file mode 100644
index 000000000000..c8e8a7a5a7cb
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
@@ -0,0 +1,111 @@
+//===----- llvm/CodeGen/GlobalISel/GISelChangeObserver.h ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This contains common code to allow clients to notify changes to machine
+/// instr.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_GISELCHANGEOBSERVER_H
+#define LLVM_CODEGEN_GLOBALISEL_GISELCHANGEOBSERVER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+class MachineInstr;
+class MachineRegisterInfo;
+
+/// Abstract class that contains various methods for clients to notify about
+/// changes. This should be the preferred way for APIs to notify changes.
+/// Typically calling erasingInstr/createdInstr multiple times should not affect
+/// the result. The observer would likely need to check if it was already
+/// notified earlier (consider using GISelWorkList).
+class GISelChangeObserver {
+ SmallPtrSet<MachineInstr *, 4> ChangingAllUsesOfReg;
+
+public:
+ virtual ~GISelChangeObserver() {}
+
+ /// An instruction is about to be erased.
+ virtual void erasingInstr(MachineInstr &MI) = 0;
+ /// An instruction was created and inserted into the function.
+ virtual void createdInstr(MachineInstr &MI) = 0;
+ /// This instruction is about to be mutated in some way.
+ virtual void changingInstr(MachineInstr &MI) = 0;
+ /// This instruction was mutated in some way.
+ virtual void changedInstr(MachineInstr &MI) = 0;
+
+ /// All the instructions using the given register are being changed.
+ /// For convenience, finishedChangingAllUsesOfReg() will report the completion
+ /// of the changes. The use list may change between this call and
+ /// finishedChangingAllUsesOfReg().
+ void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg);
+ /// All instructions reported as changing by changingAllUsesOfReg() have
+ /// finished being changed.
+ void finishedChangingAllUsesOfReg();
+
+};
+
+/// Simple wrapper observer that takes several observers, and calls
+/// each one for each event. If there are multiple observers (say CSE,
+/// Legalizer, Combiner), it's sufficient to register this to the machine
+/// function as the delegate.
+class GISelObserverWrapper : public MachineFunction::Delegate,
+ public GISelChangeObserver {
+ SmallVector<GISelChangeObserver *, 4> Observers;
+
+public:
+ GISelObserverWrapper() = default;
+ GISelObserverWrapper(ArrayRef<GISelChangeObserver *> Obs)
+ : Observers(Obs.begin(), Obs.end()) {}
+ // Adds an observer.
+ void addObserver(GISelChangeObserver *O) { Observers.push_back(O); }
+ // Removes an observer from the list and does nothing if observer is not
+ // present.
+ void removeObserver(GISelChangeObserver *O) {
+ auto It = std::find(Observers.begin(), Observers.end(), O);
+ if (It != Observers.end())
+ Observers.erase(It);
+ }
+ // API for Observer.
+ void erasingInstr(MachineInstr &MI) override {
+ for (auto &O : Observers)
+ O->erasingInstr(MI);
+ }
+ void createdInstr(MachineInstr &MI) override {
+ for (auto &O : Observers)
+ O->createdInstr(MI);
+ }
+ void changingInstr(MachineInstr &MI) override {
+ for (auto &O : Observers)
+ O->changingInstr(MI);
+ }
+ void changedInstr(MachineInstr &MI) override {
+ for (auto &O : Observers)
+ O->changedInstr(MI);
+ }
+ // API for MachineFunction::Delegate
+ void MF_HandleInsertion(MachineInstr &MI) override { createdInstr(MI); }
+ void MF_HandleRemoval(MachineInstr &MI) override { erasingInstr(MI); }
+};
+
+/// A simple RAII based CSEInfo installer.
+/// Use this in a scope to install a delegate to the MachineFunction and reset
+/// it at the end of the scope.
+class RAIIDelegateInstaller {
+ MachineFunction &MF;
+ MachineFunction::Delegate *Delegate;
+
+public:
+ RAIIDelegateInstaller(MachineFunction &MF, MachineFunction::Delegate *Del);
+ ~RAIIDelegateInstaller();
+};
+
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
index 167905dc9aa1..1571841a208d 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
@@ -12,38 +12,42 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/Debug.h"
namespace llvm {
class MachineInstr;
+class MachineFunction;
-// Worklist which mostly works similar to InstCombineWorkList, but on MachineInstrs.
-// The main difference with something like a SetVector is that erasing an element doesn't
-// move all elements over one place - instead just nulls out the element of the vector.
-// FIXME: Does it make sense to factor out common code with the instcombinerWorkList?
+// Worklist which mostly works similar to InstCombineWorkList, but on
+// MachineInstrs. The main difference with something like a SetVector is that
+// erasing an element doesn't move all elements over one place - instead just
+// nulls out the element of the vector.
+//
+// FIXME: Does it make sense to factor out common code with the
+// instcombinerWorkList?
template<unsigned N>
class GISelWorkList {
- SmallVector<MachineInstr*, N> Worklist;
- DenseMap<MachineInstr*, unsigned> WorklistMap;
+ SmallVector<MachineInstr *, N> Worklist;
+ DenseMap<MachineInstr *, unsigned> WorklistMap;
public:
- GISelWorkList() = default;
+ GISelWorkList() {}
bool empty() const { return WorklistMap.empty(); }
unsigned size() const { return WorklistMap.size(); }
- /// Add - Add the specified instruction to the worklist if it isn't already
- /// in it.
+ /// Add the specified instruction to the worklist if it isn't already in it.
void insert(MachineInstr *I) {
- if (WorklistMap.try_emplace(I, Worklist.size()).second) {
+ if (WorklistMap.try_emplace(I, Worklist.size()).second)
Worklist.push_back(I);
- }
}
- /// Remove - remove I from the worklist if it exists.
- void remove(MachineInstr *I) {
+ /// Remove I from the worklist if it exists.
+ void remove(const MachineInstr *I) {
auto It = WorklistMap.find(I);
if (It == WorklistMap.end()) return; // Not in worklist.
@@ -53,6 +57,11 @@ public:
WorklistMap.erase(It);
}
+ void clear() {
+ Worklist.clear();
+ WorklistMap.clear();
+ }
+
MachineInstr *pop_back_val() {
MachineInstr *I;
do {
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 2498ee933210..d1770bf6e4ce 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -21,11 +21,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Types.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/Allocator.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Allocator.h"
#include <memory>
#include <utility>
@@ -300,6 +300,8 @@ private:
bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder);
+ bool translateFNeg(const User &U, MachineIRBuilder &MIRBuilder);
+
bool translateAdd(const User &U, MachineIRBuilder &MIRBuilder) {
return translateBinaryOp(TargetOpcode::G_ADD, U, MIRBuilder);
}
@@ -442,11 +444,13 @@ private:
// I.e., compared to regular MIBuilder, this one also inserts the instruction
// in the current block, it can creates block, etc., basically a kind of
// IRBuilder, but for Machine IR.
- MachineIRBuilder CurBuilder;
+ // CSEMIRBuilder CurBuilder;
+ std::unique_ptr<MachineIRBuilder> CurBuilder;
// Builder set to the entry block (just after ABI lowering instructions). Used
// as a convenient location for Constants.
- MachineIRBuilder EntryBuilder;
+ // CSEMIRBuilder EntryBuilder;
+ std::unique_ptr<MachineIRBuilder> EntryBuilder;
// The MachineFunction currently being translated.
MachineFunction *MF;
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 873587651efd..20bec7650179 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --===========//
+//===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
//
// The LLVM Compiler Infrastructure
//
@@ -14,12 +14,14 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "legalizer"
+using namespace llvm::MIPatternMatch;
namespace llvm {
class LegalizationArtifactCombiner {
@@ -36,15 +38,29 @@ public:
SmallVectorImpl<MachineInstr *> &DeadInsts) {
if (MI.getOpcode() != TargetOpcode::G_ANYEXT)
return false;
- if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
- MI.getOperand(1).getReg(), MRI)) {
+
+ Builder.setInstr(MI);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+
+ // aext(trunc x) - > aext/copy/trunc x
+ unsigned TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = DefMI->getOperand(1).getReg();
- Builder.setInstr(MI);
- // We get a copy/trunc/extend depending on the sizes
- Builder.buildAnyExtOrTrunc(DstReg, SrcReg);
- markInstAndDefDead(MI, *DefMI, DeadInsts);
+ Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
+ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+ return true;
+ }
+
+ // aext([asz]ext x) -> [asz]ext x
+ unsigned ExtSrc;
+ MachineInstr *ExtMI;
+ if (mi_match(SrcReg, MRI,
+ m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
+ m_GSExt(m_Reg(ExtSrc)),
+ m_GZExt(m_Reg(ExtSrc)))))) {
+ Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
+ markInstAndDefDead(MI, *ExtMI, DeadInsts);
return true;
}
return tryFoldImplicitDef(MI, DeadInsts);
@@ -55,24 +71,25 @@ public:
if (MI.getOpcode() != TargetOpcode::G_ZEXT)
return false;
- if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
- MI.getOperand(1).getReg(), MRI)) {
- unsigned DstReg = MI.getOperand(0).getReg();
+
+ Builder.setInstr(MI);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+
+ // zext(trunc x) - > and (aext/copy/trunc x), mask
+ unsigned TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLT DstTy = MRI.getType(DstReg);
if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
- Builder.setInstr(MI);
- unsigned ZExtSrc = MI.getOperand(1).getReg();
- LLT ZExtSrcTy = MRI.getType(ZExtSrc);
- APInt Mask = APInt::getAllOnesValue(ZExtSrcTy.getSizeInBits());
- auto MaskCstMIB = Builder.buildConstant(DstTy, Mask.getZExtValue());
- unsigned TruncSrc = DefMI->getOperand(1).getReg();
- // We get a copy/trunc/extend depending on the sizes
- auto SrcCopyOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc);
- Builder.buildAnd(DstReg, SrcCopyOrTrunc, MaskCstMIB);
- markInstAndDefDead(MI, *DefMI, DeadInsts);
+ LLT SrcTy = MRI.getType(SrcReg);
+ APInt Mask = APInt::getAllOnesValue(SrcTy.getSizeInBits());
+ auto MIBMask = Builder.buildConstant(DstTy, Mask.getZExtValue());
+ Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc),
+ MIBMask);
+ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
return tryFoldImplicitDef(MI, DeadInsts);
@@ -83,33 +100,34 @@ public:
if (MI.getOpcode() != TargetOpcode::G_SEXT)
return false;
- if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
- MI.getOperand(1).getReg(), MRI)) {
- unsigned DstReg = MI.getOperand(0).getReg();
+
+ Builder.setInstr(MI);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+
+ // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c
+ unsigned TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLT DstTy = MRI.getType(DstReg);
if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy}}) ||
isInstUnsupported({TargetOpcode::G_ASHR, {DstTy}}) ||
isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
- Builder.setInstr(MI);
- unsigned SExtSrc = MI.getOperand(1).getReg();
- LLT SExtSrcTy = MRI.getType(SExtSrc);
- unsigned SizeDiff = DstTy.getSizeInBits() - SExtSrcTy.getSizeInBits();
- auto SizeDiffMIB = Builder.buildConstant(DstTy, SizeDiff);
- unsigned TruncSrcReg = DefMI->getOperand(1).getReg();
- // We get a copy/trunc/extend depending on the sizes
- auto SrcCopyExtOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrcReg);
- auto ShlMIB = Builder.buildInstr(TargetOpcode::G_SHL, DstTy,
- SrcCopyExtOrTrunc, SizeDiffMIB);
- Builder.buildInstr(TargetOpcode::G_ASHR, DstReg, ShlMIB, SizeDiffMIB);
- markInstAndDefDead(MI, *DefMI, DeadInsts);
+ LLT SrcTy = MRI.getType(SrcReg);
+ unsigned ShAmt = DstTy.getSizeInBits() - SrcTy.getSizeInBits();
+ auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt);
+ auto MIBShl = Builder.buildInstr(
+ TargetOpcode::G_SHL, {DstTy},
+ {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBShAmt});
+ Builder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {MIBShl, MIBShAmt});
+ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
return tryFoldImplicitDef(MI, DeadInsts);
}
- /// Try to fold sb = EXTEND (G_IMPLICIT_DEF sa) -> sb = G_IMPLICIT_DEF
+ /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
bool tryFoldImplicitDef(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts) {
unsigned Opcode = MI.getOpcode();
@@ -119,13 +137,25 @@ public:
if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
MI.getOperand(1).getReg(), MRI)) {
+ Builder.setInstr(MI);
unsigned DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
- if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
- return false;
- LLVM_DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;);
- Builder.setInstr(MI);
- Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, DstReg);
+
+ if (Opcode == TargetOpcode::G_ANYEXT) {
+ // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
+ if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
+ return false;
+ LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
+ Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
+ } else {
+ // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
+ // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
+ if (isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
+ return false;
+ LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
+ Builder.buildConstant(DstReg, 0);
+ }
+
markInstAndDefDead(MI, *DefMI, DeadInsts);
return true;
}
@@ -139,8 +169,20 @@ public:
return false;
unsigned NumDefs = MI.getNumOperands() - 1;
- MachineInstr *MergeI = getOpcodeDef(TargetOpcode::G_MERGE_VALUES,
- MI.getOperand(NumDefs).getReg(), MRI);
+
+ unsigned MergingOpcode;
+ LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg());
+ LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
+ if (OpTy.isVector() && DestTy.isVector())
+ MergingOpcode = TargetOpcode::G_CONCAT_VECTORS;
+ else if (OpTy.isVector() && !DestTy.isVector())
+ MergingOpcode = TargetOpcode::G_BUILD_VECTOR;
+ else
+ MergingOpcode = TargetOpcode::G_MERGE_VALUES;
+
+ MachineInstr *MergeI =
+ getOpcodeDef(MergingOpcode, MI.getOperand(NumDefs).getReg(), MRI);
+
if (!MergeI)
return false;
@@ -277,6 +319,19 @@ private:
auto Step = LI.getAction(Query);
return Step.Action == Unsupported || Step.Action == NotFound;
}
+
+ /// Looks through copy instructions and returns the actual
+ /// source register.
+ unsigned lookThroughCopyInstrs(unsigned Reg) {
+ unsigned TmpReg;
+ while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
+ if (MRI.getType(TmpReg).isValid())
+ Reg = TmpReg;
+ else
+ break;
+ }
+ return Reg;
+ }
};
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index d122e67b87b8..9b4ecf9284e3 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -32,6 +32,7 @@ namespace llvm {
class LegalizerInfo;
class Legalizer;
class MachineRegisterInfo;
+class GISelChangeObserver;
class LegalizerHelper {
public:
@@ -48,7 +49,10 @@ public:
UnableToLegalize,
};
- LegalizerHelper(MachineFunction &MF);
+ LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer,
+ MachineIRBuilder &B);
+ LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
+ GISelChangeObserver &Observer, MachineIRBuilder &B);
/// Replace \p MI by a sequence of legal instructions that can implement the
/// same operation. Note that this means \p MI may be deleted, so any iterator
@@ -87,7 +91,7 @@ public:
/// Expose MIRBuilder so clients can set their own RecordInsertInstruction
/// functions
- MachineIRBuilder MIRBuilder;
+ MachineIRBuilder &MIRBuilder;
/// Expose LegalizerInfo so the clients can re-use.
const LegalizerInfo &getLegalizerInfo() const { return LI; }
@@ -112,8 +116,12 @@ private:
void extractParts(unsigned Reg, LLT Ty, int NumParts,
SmallVectorImpl<unsigned> &VRegs);
+ LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+
MachineRegisterInfo &MRI;
const LegalizerInfo &LI;
+ /// To keep track of changes made by the LegalizerHelper.
+ GISelChangeObserver &Observer;
};
/// Helper function that creates the given libcall.
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index a8c26082f221..13776dd3e87d 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -39,6 +39,7 @@ class MachineInstr;
class MachineIRBuilder;
class MachineRegisterInfo;
class MCInstrInfo;
+class GISelChangeObserver;
namespace LegalizeActions {
enum LegalizeAction : std::uint8_t {
@@ -121,7 +122,7 @@ struct LegalityQuery {
ArrayRef<LLT> Types;
struct MemDesc {
- uint64_t Size;
+ uint64_t SizeInBits;
AtomicOrdering Ordering;
};
@@ -651,6 +652,20 @@ public:
return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy);
}
+ /// Widen the scalar to match the size of another.
+ LegalizeRuleSet &minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx) {
+ typeIdx(TypeIdx);
+ return widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[LargeTypeIdx].getScalarSizeInBits() >
+ Query.Types[TypeIdx].getSizeInBits();
+ },
+ [=](const LegalityQuery &Query) {
+ return std::make_pair(TypeIdx,
+ Query.Types[LargeTypeIdx].getElementType());
+ });
+ }
+
/// Add more elements to the vector to reach the next power of two.
/// No effect if the type is not a vector or the element count is a power of
/// two.
@@ -693,6 +708,8 @@ public:
},
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
+ if (MaxElements == 1)
+ return std::make_pair(TypeIdx, VecTy.getElementType());
return std::make_pair(
TypeIdx, LLT::vector(MaxElements, VecTy.getScalarSizeInBits()));
});
@@ -947,9 +964,9 @@ public:
bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
- virtual bool legalizeCustom(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
+ virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const;
private:
/// Determine what action should be taken to legalize the given generic
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index ac1673de5f3f..37de8f030410 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
#define LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/Types.h"
#include "llvm/CodeGen/LowLevelType.h"
@@ -30,6 +31,7 @@ namespace llvm {
class MachineFunction;
class MachineInstr;
class TargetInstrInfo;
+class GISelChangeObserver;
/// Class which stores all the state required in a MachineIRBuilder.
/// Since MachineIRBuilders will only store state in this object, it allows
@@ -50,62 +52,177 @@ struct MachineIRBuilderState {
MachineBasicBlock::iterator II;
/// @}
- std::function<void(MachineInstr *)> InsertedInstr;
+ GISelChangeObserver *Observer;
+
+ GISelCSEInfo *CSEInfo;
};
-/// Helper class to build MachineInstr.
-/// It keeps internally the insertion point and debug location for all
-/// the new instructions we want to create.
-/// This information can be modify via the related setters.
-class MachineIRBuilderBase {
+class DstOp {
+ union {
+ LLT LLTTy;
+ unsigned Reg;
+ const TargetRegisterClass *RC;
+ };
- MachineIRBuilderState State;
- const TargetInstrInfo &getTII() {
- assert(State.TII && "TargetInstrInfo is not set");
- return *State.TII;
+public:
+ enum class DstType { Ty_LLT, Ty_Reg, Ty_RC };
+ DstOp(unsigned R) : Reg(R), Ty(DstType::Ty_Reg) {}
+ DstOp(const LLT &T) : LLTTy(T), Ty(DstType::Ty_LLT) {}
+ DstOp(const TargetRegisterClass *TRC) : RC(TRC), Ty(DstType::Ty_RC) {}
+
+ void addDefToMIB(MachineRegisterInfo &MRI, MachineInstrBuilder &MIB) const {
+ switch (Ty) {
+ case DstType::Ty_Reg:
+ MIB.addDef(Reg);
+ break;
+ case DstType::Ty_LLT:
+ MIB.addDef(MRI.createGenericVirtualRegister(LLTTy));
+ break;
+ case DstType::Ty_RC:
+ MIB.addDef(MRI.createVirtualRegister(RC));
+ break;
+ }
}
- void validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend);
+ LLT getLLTTy(const MachineRegisterInfo &MRI) const {
+ switch (Ty) {
+ case DstType::Ty_RC:
+ return LLT{};
+ case DstType::Ty_LLT:
+ return LLTTy;
+ case DstType::Ty_Reg:
+ return MRI.getType(Reg);
+ }
+ llvm_unreachable("Unrecognised DstOp::DstType enum");
+ }
-protected:
- unsigned getDestFromArg(unsigned Reg) { return Reg; }
- unsigned getDestFromArg(LLT Ty) {
- return getMF().getRegInfo().createGenericVirtualRegister(Ty);
+ unsigned getReg() const {
+ assert(Ty == DstType::Ty_Reg && "Not a register");
+ return Reg;
}
- unsigned getDestFromArg(const TargetRegisterClass *RC) {
- return getMF().getRegInfo().createVirtualRegister(RC);
+
+ const TargetRegisterClass *getRegClass() const {
+ switch (Ty) {
+ case DstType::Ty_RC:
+ return RC;
+ default:
+ llvm_unreachable("Not a RC Operand");
+ }
}
- void addUseFromArg(MachineInstrBuilder &MIB, unsigned Reg) {
- MIB.addUse(Reg);
+ DstType getDstOpKind() const { return Ty; }
+
+private:
+ DstType Ty;
+};
+
+class SrcOp {
+ union {
+ MachineInstrBuilder SrcMIB;
+ unsigned Reg;
+ CmpInst::Predicate Pred;
+ };
+
+public:
+ enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate };
+ SrcOp(unsigned R) : Reg(R), Ty(SrcType::Ty_Reg) {}
+ SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {}
+ SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {}
+
+ void addSrcToMIB(MachineInstrBuilder &MIB) const {
+ switch (Ty) {
+ case SrcType::Ty_Predicate:
+ MIB.addPredicate(Pred);
+ break;
+ case SrcType::Ty_Reg:
+ MIB.addUse(Reg);
+ break;
+ case SrcType::Ty_MIB:
+ MIB.addUse(SrcMIB->getOperand(0).getReg());
+ break;
+ }
}
- void addUseFromArg(MachineInstrBuilder &MIB, const MachineInstrBuilder &UseMIB) {
- MIB.addUse(UseMIB->getOperand(0).getReg());
+ LLT getLLTTy(const MachineRegisterInfo &MRI) const {
+ switch (Ty) {
+ case SrcType::Ty_Predicate:
+ llvm_unreachable("Not a register operand");
+ case SrcType::Ty_Reg:
+ return MRI.getType(Reg);
+ case SrcType::Ty_MIB:
+ return MRI.getType(SrcMIB->getOperand(0).getReg());
+ }
+ llvm_unreachable("Unrecognised SrcOp::SrcType enum");
}
- void addUsesFromArgs(MachineInstrBuilder &MIB) { }
- template<typename UseArgTy, typename ... UseArgsTy>
- void addUsesFromArgs(MachineInstrBuilder &MIB, UseArgTy &&Arg1, UseArgsTy &&... Args) {
- addUseFromArg(MIB, Arg1);
- addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
+ unsigned getReg() const {
+ switch (Ty) {
+ case SrcType::Ty_Predicate:
+ llvm_unreachable("Not a register operand");
+ case SrcType::Ty_Reg:
+ return Reg;
+ case SrcType::Ty_MIB:
+ return SrcMIB->getOperand(0).getReg();
+ }
+ llvm_unreachable("Unrecognised SrcOp::SrcType enum");
}
- unsigned getRegFromArg(unsigned Reg) { return Reg; }
- unsigned getRegFromArg(const MachineInstrBuilder &MIB) {
- return MIB->getOperand(0).getReg();
+
+ CmpInst::Predicate getPredicate() const {
+ switch (Ty) {
+ case SrcType::Ty_Predicate:
+ return Pred;
+ default:
+ llvm_unreachable("Not a register operand");
+ }
}
- void validateBinaryOp(unsigned Res, unsigned Op0, unsigned Op1);
+ SrcType getSrcOpKind() const { return Ty; }
+
+private:
+ SrcType Ty;
+};
+
+class FlagsOp {
+ Optional<unsigned> Flags;
+
+public:
+ explicit FlagsOp(unsigned F) : Flags(F) {}
+ FlagsOp() : Flags(None) {}
+ Optional<unsigned> getFlags() const { return Flags; }
+};
+/// Helper class to build MachineInstr.
+/// It keeps internally the insertion point and debug location for all
+/// the new instructions we want to create.
+/// This information can be modify via the related setters.
+class MachineIRBuilder {
+
+ MachineIRBuilderState State;
+
+protected:
+ void validateTruncExt(const LLT &Dst, const LLT &Src, bool IsExtend);
+
+ void validateBinaryOp(const LLT &Res, const LLT &Op0, const LLT &Op1);
+
+ void validateSelectOp(const LLT &ResTy, const LLT &TstTy, const LLT &Op0Ty,
+ const LLT &Op1Ty);
+ void recordInsertion(MachineInstr *MI) const;
public:
/// Some constructors for easy use.
- MachineIRBuilderBase() = default;
- MachineIRBuilderBase(MachineFunction &MF) { setMF(MF); }
- MachineIRBuilderBase(MachineInstr &MI) : MachineIRBuilderBase(*MI.getMF()) {
+ MachineIRBuilder() = default;
+ MachineIRBuilder(MachineFunction &MF) { setMF(MF); }
+ MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) {
setInstr(MI);
}
- MachineIRBuilderBase(const MachineIRBuilderState &BState) : State(BState) {}
+ virtual ~MachineIRBuilder() = default;
+
+ MachineIRBuilder(const MachineIRBuilderState &BState) : State(BState) {}
+
+ const TargetInstrInfo &getTII() {
+ assert(State.TII && "TargetInstrInfo is not set");
+ return *State.TII;
+ }
/// Getter for the function we currently build.
MachineFunction &getMF() {
@@ -118,16 +235,25 @@ public:
/// Getter for MRI
MachineRegisterInfo *getMRI() { return State.MRI; }
+ const MachineRegisterInfo *getMRI() const { return State.MRI; }
/// Getter for the State
MachineIRBuilderState &getState() { return State; }
/// Getter for the basic block we currently build.
- MachineBasicBlock &getMBB() {
+ const MachineBasicBlock &getMBB() const {
assert(State.MBB && "MachineBasicBlock is not set");
return *State.MBB;
}
+ MachineBasicBlock &getMBB() {
+ return const_cast<MachineBasicBlock &>(
+ const_cast<const MachineIRBuilder *>(this)->getMBB());
+ }
+
+ GISelCSEInfo *getCSEInfo() { return State.CSEInfo; }
+ const GISelCSEInfo *getCSEInfo() const { return State.CSEInfo; }
+
/// Current insertion point for new instructions.
MachineBasicBlock::iterator getInsertPt() { return State.II; }
@@ -137,10 +263,12 @@ public:
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II);
/// @}
+ void setCSEInfo(GISelCSEInfo *Info);
+
/// \name Setters for the insertion point.
/// @{
/// Set the MachineFunction where to build instructions.
- void setMF(MachineFunction &);
+ void setMF(MachineFunction &MF);
/// Set the insertion point to the end of \p MBB.
/// \pre \p MBB must be contained by getMF().
@@ -151,12 +279,8 @@ public:
void setInstr(MachineInstr &MI);
/// @}
- /// \name Control where instructions we create are recorded (typically for
- /// visiting again later during legalization).
- /// @{
- void recordInsertion(MachineInstr *InsertedInstr) const;
- void recordInsertions(std::function<void(MachineInstr *)> InsertedInstr);
- void stopRecordingInsertions();
+ void setChangeObserver(GISelChangeObserver &Observer);
+ void stopObservingChanges();
/// @}
/// Set the debug location to \p DL for all the next build instructions.
@@ -208,6 +332,10 @@ public:
const MDNode *Variable,
const MDNode *Expr);
+ /// Build and insert a DBG_LABEL instructions specifying that \p Label is
+ /// given. Convert "llvm.dbg.label Label" to "DBG_LABEL Label".
+ MachineInstrBuilder buildDbgLabel(const MDNode *Label);
+
/// Build and insert \p Res = G_FRAME_INDEX \p Idx
///
/// G_FRAME_INDEX materializes the address of an alloca value or other
@@ -296,9 +424,9 @@ public:
/// registers with the same scalar type (typically s1)
///
/// \return The newly created instruction.
- MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0,
- unsigned Op1, unsigned CarryIn);
-
+ MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1,
+ const SrcOp &CarryIn);
/// Build and insert \p Res = G_ANYEXT \p Op0
///
@@ -314,11 +442,7 @@ public:
///
/// \return The newly created instruction.
- MachineInstrBuilder buildAnyExt(unsigned Res, unsigned Op);
- template <typename DstType, typename ArgType>
- MachineInstrBuilder buildAnyExt(DstType &&Res, ArgType &&Arg) {
- return buildAnyExt(getDestFromArg(Res), getRegFromArg(Arg));
- }
+ MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op);
/// Build and insert \p Res = G_SEXT \p Op
///
@@ -332,11 +456,7 @@ public:
/// \pre \p Op must be smaller than \p Res
///
/// \return The newly created instruction.
- template <typename DstType, typename ArgType>
- MachineInstrBuilder buildSExt(DstType &&Res, ArgType &&Arg) {
- return buildSExt(getDestFromArg(Res), getRegFromArg(Arg));
- }
- MachineInstrBuilder buildSExt(unsigned Res, unsigned Op);
+ MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op);
/// Build and insert \p Res = G_ZEXT \p Op
///
@@ -350,11 +470,7 @@ public:
/// \pre \p Op must be smaller than \p Res
///
/// \return The newly created instruction.
- template <typename DstType, typename ArgType>
- MachineInstrBuilder buildZExt(DstType &&Res, ArgType &&Arg) {
- return buildZExt(getDestFromArg(Res), getRegFromArg(Arg));
- }
- MachineInstrBuilder buildZExt(unsigned Res, unsigned Op);
+ MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op);
/// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
/// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
@@ -364,11 +480,7 @@ public:
/// \pre \p Op must be a generic virtual register with scalar or vector type.
///
/// \return The newly created instruction.
- template <typename DstTy, typename UseArgTy>
- MachineInstrBuilder buildSExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
- return buildSExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
- }
- MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op);
+ MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op);
/// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or
/// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
@@ -378,11 +490,7 @@ public:
/// \pre \p Op must be a generic virtual register with scalar or vector type.
///
/// \return The newly created instruction.
- template <typename DstTy, typename UseArgTy>
- MachineInstrBuilder buildZExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
- return buildZExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
- }
- MachineInstrBuilder buildZExtOrTrunc(unsigned Res, unsigned Op);
+ MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op);
// Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or
/// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
@@ -392,11 +500,7 @@ public:
/// \pre \p Op must be a generic virtual register with scalar or vector type.
///
/// \return The newly created instruction.
- template <typename DstTy, typename UseArgTy>
- MachineInstrBuilder buildAnyExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
- return buildAnyExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
- }
- MachineInstrBuilder buildAnyExtOrTrunc(unsigned Res, unsigned Op);
+ MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op);
/// Build and insert \p Res = \p ExtOpc, \p Res = G_TRUNC \p
/// Op, or \p Res = COPY \p Op depending on the differing sizes of \p Res and
@@ -407,15 +511,11 @@ public:
/// \pre \p Op must be a generic virtual register with scalar or vector type.
///
/// \return The newly created instruction.
- MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, unsigned Res,
- unsigned Op);
+ MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res,
+ const SrcOp &Op);
/// Build and insert an appropriate cast between two registers of equal size.
- template <typename DstType, typename ArgType>
- MachineInstrBuilder buildCast(DstType &&Res, ArgType &&Arg) {
- return buildCast(getDestFromArg(Res), getRegFromArg(Arg));
- }
- MachineInstrBuilder buildCast(unsigned Dst, unsigned Src);
+ MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src);
/// Build and insert G_BR \p Dest
///
@@ -460,7 +560,8 @@ public:
/// type.
///
/// \return The newly created instruction.
- MachineInstrBuilder buildConstant(unsigned Res, const ConstantInt &Val);
+ virtual MachineInstrBuilder buildConstant(const DstOp &Res,
+ const ConstantInt &Val);
/// Build and insert \p Res = G_CONSTANT \p Val
///
@@ -470,12 +571,8 @@ public:
/// \pre \p Res must be a generic virtual register with scalar type.
///
/// \return The newly created instruction.
- MachineInstrBuilder buildConstant(unsigned Res, int64_t Val);
+ MachineInstrBuilder buildConstant(const DstOp &Res, int64_t Val);
- template <typename DstType>
- MachineInstrBuilder buildConstant(DstType &&Res, int64_t Val) {
- return buildConstant(getDestFromArg(Res), Val);
- }
/// Build and insert \p Res = G_FCONSTANT \p Val
///
/// G_FCONSTANT is a floating-point constant with the specified size and
@@ -485,17 +582,10 @@ public:
/// \pre \p Res must be a generic virtual register with scalar type.
///
/// \return The newly created instruction.
- template <typename DstType>
- MachineInstrBuilder buildFConstant(DstType &&Res, const ConstantFP &Val) {
- return buildFConstant(getDestFromArg(Res), Val);
- }
- MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val);
+ virtual MachineInstrBuilder buildFConstant(const DstOp &Res,
+ const ConstantFP &Val);
- template <typename DstType>
- MachineInstrBuilder buildFConstant(DstType &&Res, double Val) {
- return buildFConstant(getDestFromArg(Res), Val);
- }
- MachineInstrBuilder buildFConstant(unsigned Res, double Val);
+ MachineInstrBuilder buildFConstant(const DstOp &Res, double Val);
/// Build and insert \p Res = COPY Op
///
@@ -504,11 +594,7 @@ public:
/// \pre setBasicBlock or setMI must have been called.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildCopy(unsigned Res, unsigned Op);
- template <typename DstType, typename SrcType>
- MachineInstrBuilder buildCopy(DstType &&Res, SrcType &&Src) {
- return buildCopy(getDestFromArg(Res), getRegFromArg(Src));
- }
+ MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op);
/// Build and insert `Res = G_LOAD Addr, MMO`.
///
@@ -555,10 +641,7 @@ public:
MachineInstrBuilder buildExtract(unsigned Res, unsigned Src, uint64_t Index);
/// Build and insert \p Res = IMPLICIT_DEF.
- template <typename DstType> MachineInstrBuilder buildUndef(DstType &&Res) {
- return buildUndef(getDestFromArg(Res));
- }
- MachineInstrBuilder buildUndef(unsigned Res);
+ MachineInstrBuilder buildUndef(const DstOp &Res);
/// Build and insert instructions to put \p Ops together at the specified p
/// Indices to form a larger register.
@@ -587,7 +670,7 @@ public:
/// \pre The type of all \p Ops registers must be identical.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildMerge(unsigned Res, ArrayRef<unsigned> Ops);
+ MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef<unsigned> Ops);
/// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op
///
@@ -599,7 +682,50 @@ public:
/// \pre The type of all \p Res registers must be identical.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildUnmerge(ArrayRef<unsigned> Res, unsigned Op);
+ MachineInstrBuilder buildUnmerge(ArrayRef<LLT> Res, const SrcOp &Op);
+ MachineInstrBuilder buildUnmerge(ArrayRef<unsigned> Res, const SrcOp &Op);
+
+ /// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ...
+ ///
+ /// G_BUILD_VECTOR creates a vector value from multiple scalar registers.
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre The entire register \p Res (and no more) must be covered by the
+ /// input scalar registers.
+ /// \pre The type of all \p Ops registers must be identical.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildBuildVector(const DstOp &Res,
+ ArrayRef<unsigned> Ops);
+
+ /// Build and insert \p Res = G_BUILD_VECTOR_TRUNC \p Op0, ...
+ ///
+ /// G_BUILD_VECTOR_TRUNC creates a vector value from multiple scalar registers
+ /// which have types larger than the destination vector element type, and
+ /// truncates the values to fit.
+ ///
+ /// If the operands given are already the same size as the vector elt type,
+ /// then this method will instead create a G_BUILD_VECTOR instruction.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre The type of all \p Ops registers must be identical.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res,
+ ArrayRef<unsigned> Ops);
+
+ /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ...
+ ///
+ /// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more
+ /// vectors.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre The entire register \p Res (and no more) must be covered by the input
+ /// registers.
+ /// \pre The type of all source operands must be identical.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildConcatVectors(const DstOp &Res,
+ ArrayRef<unsigned> Ops);
MachineInstrBuilder buildInsert(unsigned Res, unsigned Src,
unsigned Op, unsigned Index);
@@ -627,11 +753,7 @@ public:
/// \pre \p Res must be smaller than \p Op
///
/// \return The newly created instruction.
- template <typename DstType, typename SrcType>
- MachineInstrBuilder buildFPTrunc(DstType &&Res, SrcType &&Src) {
- return buildFPTrunc(getDestFromArg(Res), getRegFromArg(Src));
- }
- MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op);
+ MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op);
/// Build and insert \p Res = G_TRUNC \p Op
///
@@ -644,11 +766,7 @@ public:
/// \pre \p Res must be smaller than \p Op
///
/// \return The newly created instruction.
- MachineInstrBuilder buildTrunc(unsigned Res, unsigned Op);
- template <typename DstType, typename SrcType>
- MachineInstrBuilder buildTrunc(DstType &&Res, SrcType &&Src) {
- return buildTrunc(getDestFromArg(Res), getRegFromArg(Src));
- }
+ MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op);
/// Build and insert a \p Res = G_ICMP \p Pred, \p Op0, \p Op1
///
@@ -662,8 +780,8 @@ public:
/// \pre \p Pred must be an integer predicate.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildICmp(CmpInst::Predicate Pred,
- unsigned Res, unsigned Op0, unsigned Op1);
+ MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res,
+ const SrcOp &Op0, const SrcOp &Op1);
/// Build and insert a \p Res = G_FCMP \p Pred\p Op0, \p Op1
///
@@ -677,8 +795,8 @@ public:
/// \pre \p Pred must be a floating-point predicate.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred,
- unsigned Res, unsigned Op0, unsigned Op1);
+ MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res,
+ const SrcOp &Op0, const SrcOp &Op1);
/// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1
///
@@ -690,8 +808,8 @@ public:
/// elements as the other parameters.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildSelect(unsigned Res, unsigned Tst,
- unsigned Op0, unsigned Op1);
+ MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst,
+ const SrcOp &Op0, const SrcOp &Op1);
/// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val,
/// \p Elt, \p Idx
@@ -703,8 +821,10 @@ public:
/// with scalar type.
///
/// \return The newly created instruction.
- MachineInstrBuilder buildInsertVectorElement(unsigned Res, unsigned Val,
- unsigned Elt, unsigned Idx);
+ MachineInstrBuilder buildInsertVectorElement(const DstOp &Res,
+ const SrcOp &Val,
+ const SrcOp &Elt,
+ const SrcOp &Idx);
/// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
///
@@ -714,8 +834,9 @@ public:
/// \pre \p Idx must be a generic virtual register with scalar type.
///
/// \return The newly created instruction.
- MachineInstrBuilder buildExtractVectorElement(unsigned Res, unsigned Val,
- unsigned Idx);
+ MachineInstrBuilder buildExtractVectorElement(const DstOp &Res,
+ const SrcOp &Val,
+ const SrcOp &Idx);
/// Build and insert `OldValRes<def>, SuccessRes<def> =
/// G_ATOMIC_CMPXCHG_WITH_SUCCESS Addr, CmpVal, NewVal, MMO`.
@@ -952,19 +1073,7 @@ public:
///
/// \return The newly created instruction.
MachineInstrBuilder buildBlockAddress(unsigned Res, const BlockAddress *BA);
-};
-
-/// A CRTP class that contains methods for building instructions that can
-/// be constant folded. MachineIRBuilders that want to inherit from this will
-/// need to implement buildBinaryOp (for constant folding binary ops).
-/// Alternatively, they can implement buildInstr(Opc, Dst, Uses...) to perform
-/// additional folding for Opc.
-template <typename Base>
-class FoldableInstructionsBuilder : public MachineIRBuilderBase {
- Base &base() { return static_cast<Base &>(*this); }
-public:
- using MachineIRBuilderBase::MachineIRBuilderBase;
/// Build and insert \p Res = G_ADD \p Op0, \p Op1
///
/// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
@@ -976,13 +1085,10 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAdd(unsigned Dst, unsigned Src0, unsigned Src1) {
- return base().buildBinaryOp(TargetOpcode::G_ADD, Dst, Src0, Src1);
- }
- template <typename DstTy, typename... UseArgsTy>
- MachineInstrBuilder buildAdd(DstTy &&Ty, UseArgsTy &&... UseArgs) {
- unsigned Res = base().getDestFromArg(Ty);
- return base().buildAdd(Res, (base().getRegFromArg(UseArgs))...);
+ MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_ADD, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_SUB \p Op0, \p Op1
@@ -996,13 +1102,10 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildSub(unsigned Dst, unsigned Src0, unsigned Src1) {
- return base().buildBinaryOp(TargetOpcode::G_SUB, Dst, Src0, Src1);
- }
- template <typename DstTy, typename... UseArgsTy>
- MachineInstrBuilder buildSub(DstTy &&Ty, UseArgsTy &&... UseArgs) {
- unsigned Res = base().getDestFromArg(Ty);
- return base().buildSub(Res, (base().getRegFromArg(UseArgs))...);
+ MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_SUB, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_MUL \p Op0, \p Op1
@@ -1015,13 +1118,10 @@ public:
/// with the same (scalar or vector) type).
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildMul(unsigned Dst, unsigned Src0, unsigned Src1) {
- return base().buildBinaryOp(TargetOpcode::G_MUL, Dst, Src0, Src1);
- }
- template <typename DstTy, typename... UseArgsTy>
- MachineInstrBuilder buildMul(DstTy &&Ty, UseArgsTy &&... UseArgs) {
- unsigned Res = base().getDestFromArg(Ty);
- return base().buildMul(Res, (base().getRegFromArg(UseArgs))...);
+ MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_MUL, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_AND \p Op0, \p Op1
@@ -1035,13 +1135,9 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAnd(unsigned Dst, unsigned Src0, unsigned Src1) {
- return base().buildBinaryOp(TargetOpcode::G_AND, Dst, Src0, Src1);
- }
- template <typename DstTy, typename... UseArgsTy>
- MachineInstrBuilder buildAnd(DstTy &&Ty, UseArgsTy &&... UseArgs) {
- unsigned Res = base().getDestFromArg(Ty);
- return base().buildAnd(Res, (base().getRegFromArg(UseArgs))...);
+ MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_AND, {Dst}, {Src0, Src1});
}
/// Build and insert \p Res = G_OR \p Op0, \p Op1
@@ -1054,39 +1150,14 @@ public:
/// with the same (scalar or vector) type).
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildOr(unsigned Dst, unsigned Src0, unsigned Src1) {
- return base().buildBinaryOp(TargetOpcode::G_OR, Dst, Src0, Src1);
- }
- template <typename DstTy, typename... UseArgsTy>
- MachineInstrBuilder buildOr(DstTy &&Ty, UseArgsTy &&... UseArgs) {
- unsigned Res = base().getDestFromArg(Ty);
- return base().buildOr(Res, (base().getRegFromArg(UseArgs))...);
+ MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_OR, {Dst}, {Src0, Src1});
}
-};
-class MachineIRBuilder : public FoldableInstructionsBuilder<MachineIRBuilder> {
-public:
- using FoldableInstructionsBuilder<
- MachineIRBuilder>::FoldableInstructionsBuilder;
- MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Dst,
- unsigned Src0, unsigned Src1) {
- validateBinaryOp(Dst, Src0, Src1);
- return buildInstr(Opcode).addDef(Dst).addUse(Src0).addUse(Src1);
- }
- using FoldableInstructionsBuilder<MachineIRBuilder>::buildInstr;
- /// DAG like Generic method for building arbitrary instructions as above.
- /// \Opc opcode for the instruction.
- /// \Ty Either LLT/TargetRegisterClass/unsigned types for Dst
- /// \Args Variadic list of uses of types(unsigned/MachineInstrBuilder)
- /// Uses of type MachineInstrBuilder will perform
- /// getOperand(0).getReg() to convert to register.
- template <typename DstTy, typename... UseArgsTy>
- MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty,
- UseArgsTy &&... Args) {
- auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty));
- addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
- return MIB;
- }
+ virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flags = None);
};
} // End namespace llvm.
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index 82fd7eddb68a..c33b32b2db40 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -103,8 +103,8 @@ public:
/// Currently the TableGen-like file would look like:
/// \code
/// PartialMapping[] = {
- /// /*32-bit add*/ {0, 32, GPR},
- /// /*2x32-bit add*/ {0, 32, GPR}, {0, 32, GPR}, // <-- Same entry 3x
+ /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first vec elt.
+ /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR},
/// /*<2x32-bit> vadd {0, 64, VPR}
/// }; // PartialMapping duplicated.
///
@@ -118,14 +118,15 @@ public:
/// With the array of pointer, we would have:
/// \code
/// PartialMapping[] = {
- /// /*32-bit add*/ {0, 32, GPR},
+ /// /*32-bit add lower */ {0, 32, GPR},
+ /// /*32-bit add upper */ {32, 32, GPR},
/// /*<2x32-bit> vadd {0, 64, VPR}
/// }; // No more duplication.
///
/// BreakDowns[] = {
/// /*AddBreakDown*/ &PartialMapping[0],
- /// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[0],
- /// /*VAddBreakDown*/ &PartialMapping[1]
+ /// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[1],
+ /// /*VAddBreakDown*/ &PartialMapping[2]
/// }; // Addresses of PartialMapping duplicated (smaller).
///
/// ValueMapping[] {
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 51e3a2732972..82b791d35b2b 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -108,5 +108,8 @@ APFloat getAPFloatFromSize(double Val, unsigned Size);
/// fallback.
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU);
+Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
+ const unsigned Op2,
+ const MachineRegisterInfo &MRI);
} // End namespace llvm.
#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 80bd796d5374..9c918ae1104f 100644
--- a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -70,7 +70,7 @@ namespace ISD {
/// of the frame or return address to return. An index of zero corresponds
/// to the current function's frame or return address, an index of one to
/// the parent's frame or return address, and so on.
- FRAMEADDR, RETURNADDR, ADDROFRETURNADDR,
+ FRAMEADDR, RETURNADDR, ADDROFRETURNADDR, SPONENTRY,
/// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
/// Materializes the offset from the local object pointer of another
@@ -256,6 +256,29 @@ namespace ISD {
/// Same for multiplication.
SMULO, UMULO,
+ /// RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2
+ /// integers with the same bit width (W). If the true value of LHS + RHS
+ /// exceeds the largest value that can be represented by W bits, the
+ /// resulting value is this maximum value. Otherwise, if this value is less
+ /// than the smallest value that can be represented by W bits, the
+ /// resulting value is this minimum value.
+ SADDSAT, UADDSAT,
+
+ /// RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2
+ /// integers with the same bit width (W). If the true value of LHS - RHS
+ /// exceeds the largest value that can be represented by W bits, the
+ /// resulting value is this maximum value. Otherwise, if this value is less
+ /// than the smallest value that can be represented by W bits, the
+ /// resulting value is this minimum value.
+ SSUBSAT, USUBSAT,
+
+ /// RESULT = SMULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
+ /// 2 integers with the same width and scale. SCALE represents the scale of
+ /// both operands as fixed point numbers. This SCALE parameter must be a
+ /// constant integer. A scale of zero is effectively performing
+ /// multiplication on 2 integers.
+ SMULFIX,
+
/// Simple binary floating point operators.
FADD, FSUB, FMUL, FDIV, FREM,
@@ -272,7 +295,8 @@ namespace ISD {
/// They are used to limit optimizations while the DAG is being optimized.
STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS,
STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2,
- STRICT_FRINT, STRICT_FNEARBYINT,
+ STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
+ STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
@@ -377,9 +401,13 @@ namespace ISD {
/// When the 1st operand is a vector, the shift amount must be in the same
/// type. (TLI.getShiftAmountTy() will return the same type when the input
/// type is a vector.)
- /// For rotates, the shift amount is treated as an unsigned amount modulo
- /// the element size of the first operand.
- SHL, SRA, SRL, ROTL, ROTR,
+ /// For rotates and funnel shifts, the shift amount is treated as an unsigned
+ /// amount modulo the element size of the first operand.
+ ///
+ /// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
+ /// fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ /// fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ SHL, SRA, SRL, ROTL, ROTR, FSHL, FSHR,
/// Byte Swap and Counting operators.
BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
@@ -461,31 +489,33 @@ namespace ISD {
/// in-register any-extension of the low lanes of an integer vector. The
/// result type must have fewer elements than the operand type, and those
/// elements must be larger integer types such that the total size of the
- /// operand type and the result type match. Each of the low operand
- /// elements is any-extended into the corresponding, wider result
- /// elements with the high bits becoming undef.
+ /// operand type is less than or equal to the size of the result type. Each
+ /// of the low operand elements is any-extended into the corresponding,
+ /// wider result elements with the high bits becoming undef.
+ /// NOTE: The type legalizer prefers to make the operand and result size
+ /// the same to allow expansion to shuffle vector during op legalization.
ANY_EXTEND_VECTOR_INREG,
/// SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an
/// in-register sign-extension of the low lanes of an integer vector. The
/// result type must have fewer elements than the operand type, and those
/// elements must be larger integer types such that the total size of the
- /// operand type and the result type match. Each of the low operand
- /// elements is sign-extended into the corresponding, wider result
- /// elements.
- // FIXME: The SIGN_EXTEND_INREG node isn't specifically limited to
- // scalars, but it also doesn't handle vectors well. Either it should be
- // restricted to scalars or this node (and its handling) should be merged
- // into it.
+ /// operand type is less than or equal to the size of the result type. Each
+ /// of the low operand elements is sign-extended into the corresponding,
+ /// wider result elements.
+ /// NOTE: The type legalizer prefers to make the operand and result size
+ /// the same to allow expansion to shuffle vector during op legalization.
SIGN_EXTEND_VECTOR_INREG,
/// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an
/// in-register zero-extension of the low lanes of an integer vector. The
/// result type must have fewer elements than the operand type, and those
/// elements must be larger integer types such that the total size of the
- /// operand type and the result type match. Each of the low operand
- /// elements is zero-extended into the corresponding, wider result
- /// elements.
+ /// operand type is less than or equal to the size of the result type. Each
+ /// of the low operand elements is zero-extended into the corresponding,
+ /// wider result elements.
+ /// NOTE: The type legalizer prefers to make the operand and result size
+ /// the same to allow expansion to shuffle vector during op legalization.
ZERO_EXTEND_VECTOR_INREG,
/// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
@@ -550,22 +580,29 @@ namespace ISD {
/// is often a storage-only type but has native conversions.
FP16_TO_FP, FP_TO_FP16,
- /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
- /// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
- /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary
- /// floating point operations. These are inspired by libm.
- FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+ /// Perform various unary floating-point operations inspired by libm.
+ FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
FLOG, FLOG2, FLOG10, FEXP, FEXP2,
FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
/// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
/// values.
- /// In the case where a single input is NaN, the non-NaN input is returned.
+ //
+ /// In the case where a single input is a NaN (either signaling or quiet),
+ /// the non-NaN input is returned.
///
/// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
FMINNUM, FMAXNUM,
- /// FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that
- /// when a single input is NaN, NaN is returned.
- FMINNAN, FMAXNAN,
+
+ /// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
+ /// two values, following the IEEE-754 2008 definition. This differs from
+ /// FMINNUM/FMAXNUM in the handling of signaling NaNs. If one input is a
+ /// signaling NaN, returns a quiet NaN.
+ FMINNUM_IEEE, FMAXNUM_IEEE,
+
+ /// FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0
+ /// as less than 0.0. While FMINNUM_IEEE/FMAXNUM_IEEE follow IEEE 754-2008
+ /// semantics, FMINIMUM/FMAXIMUM follow IEEE 754-2018 draft semantics.
+ FMINIMUM, FMAXIMUM,
/// FSINCOS - Compute both fsin and fcos as a single operation.
FSINCOS,
@@ -786,11 +823,20 @@ namespace ISD {
// Masked load and store - consecutive vector load and store operations
// with additional mask operand that prevents memory accesses to the
// masked-off lanes.
+ //
+ // Val, OutChain = MLOAD(BasePtr, Mask, PassThru)
+ // OutChain = MSTORE(Value, BasePtr, Mask)
MLOAD, MSTORE,
// Masked gather and scatter - load and store operations for a vector of
// random addresses with additional mask operand that prevents memory
// accesses to the masked-off lanes.
+ //
+ // Val, OutChain = GATHER(InChain, PassThru, Mask, BasePtr, Index, Scale)
+ // OutChain = SCATTER(InChain, Value, Mask, BasePtr, Index, Scale)
+ //
+ // The Index operand can have more vector elements than the other operands
+ // due to type legalization. The extra elements are ignored.
MGATHER, MSCATTER,
/// This corresponds to the llvm.lifetime.* intrinsics. The first operand
diff --git a/contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index c3046da90b8d..38fcb37b1e69 100644
--- a/contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -15,7 +15,7 @@
#ifndef LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
#define LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
-#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
#include <cstdlib>
namespace {
diff --git a/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
index fee131e4a3c6..18c13ca8f598 100644
--- a/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -15,7 +15,7 @@
#ifndef LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
#define LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
-#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/Target/TargetMachine.h"
@@ -36,11 +36,7 @@ namespace {
(void) llvm::createGreedyRegisterAllocator();
(void) llvm::createDefaultPBQPRegisterAllocator();
- llvm::linkCoreCLRGC();
- llvm::linkOcamlGC();
- llvm::linkErlangGC();
- llvm::linkShadowStackGC();
- llvm::linkStatepointExampleGC();
+ llvm::linkAllBuiltinGCs();
(void) llvm::createBURRListDAGScheduler(nullptr,
llvm::CodeGenOpt::Default);
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveIntervals.h b/contrib/llvm/include/llvm/CodeGen/LiveIntervals.h
index 291a07a712cb..16ab1dc475c4 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -198,10 +198,10 @@ class VirtRegMap;
void pruneValue(LiveRange &LR, SlotIndex Kill,
SmallVectorImpl<SlotIndex> *EndPoints);
- /// This function should not be used. Its intend is to tell you that
- /// you are doing something wrong if you call pruveValue directly on a
+ /// This function should not be used. Its intent is to tell you that you are
+ /// doing something wrong if you call pruneValue directly on a
/// LiveInterval. Indeed, you are supposed to call pruneValue on the main
- /// LiveRange and all the LiveRange of the subranges if any.
+ /// LiveRange and all the LiveRanges of the subranges if any.
LLVM_ATTRIBUTE_UNUSED void pruneValue(LiveInterval &, SlotIndex,
SmallVectorImpl<SlotIndex> *) {
llvm_unreachable(
diff --git a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 301a45066b4c..7312902e21b7 100644
--- a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -48,7 +48,8 @@ class raw_ostream;
/// when walking backward/forward through a basic block.
class LivePhysRegs {
const TargetRegisterInfo *TRI = nullptr;
- SparseSet<unsigned> LiveRegs;
+ using RegisterSet = SparseSet<MCPhysReg, identity<MCPhysReg>>;
+ RegisterSet LiveRegs;
public:
/// Constructs an unitialized set. init() needs to be called to initialize it.
@@ -76,7 +77,7 @@ public:
bool empty() const { return LiveRegs.empty(); }
/// Adds a physical register and all its sub-registers to the set.
- void addReg(unsigned Reg) {
+ void addReg(MCPhysReg Reg) {
assert(TRI && "LivePhysRegs is not initialized.");
assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -86,7 +87,7 @@ public:
/// Removes a physical register, all its sub-registers, and all its
/// super-registers from the set.
- void removeReg(unsigned Reg) {
+ void removeReg(MCPhysReg Reg) {
assert(TRI && "LivePhysRegs is not initialized.");
assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R)
@@ -95,7 +96,7 @@ public:
/// Removes physical registers clobbered by the regmask operand \p MO.
void removeRegsInMask(const MachineOperand &MO,
- SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers =
+ SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> *Clobbers =
nullptr);
/// Returns true if register \p Reg is contained in the set. This also
@@ -103,10 +104,10 @@ public:
/// addReg() always adds all sub-registers to the set as well.
/// Note: Returns false if just some sub registers are live, use available()
/// when searching a free register.
- bool contains(unsigned Reg) const { return LiveRegs.count(Reg); }
+ bool contains(MCPhysReg Reg) const { return LiveRegs.count(Reg); }
/// Returns true if register \p Reg and no aliasing register is in the set.
- bool available(const MachineRegisterInfo &MRI, unsigned Reg) const;
+ bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const;
/// Remove defined registers and regmask kills from the set.
void removeDefs(const MachineInstr &MI);
@@ -126,7 +127,7 @@ public:
/// defined or clobbered by a regmask. The operand will identify whether this
/// is a regmask or register operand.
void stepForward(const MachineInstr &MI,
- SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers);
+ SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers);
/// Adds all live-in registers of basic block \p MBB.
/// Live in registers are the registers in the blocks live-in list and the
@@ -143,7 +144,7 @@ public:
/// registers.
void addLiveOutsNoPristines(const MachineBasicBlock &MBB);
- using const_iterator = SparseSet<unsigned>::const_iterator;
+ using const_iterator = RegisterSet::const_iterator;
const_iterator begin() const { return LiveRegs.begin(); }
const_iterator end() const { return LiveRegs.end(); }
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h b/contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h
index 249545906e01..5e9dd8b3cdf6 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h
@@ -85,14 +85,14 @@ public:
bool empty() const { return Units.none(); }
/// Adds register units covered by physical register \p Reg.
- void addReg(unsigned Reg) {
+ void addReg(MCPhysReg Reg) {
for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit)
Units.set(*Unit);
}
/// Adds register units covered by physical register \p Reg that are
/// part of the lanemask \p Mask.
- void addRegMasked(unsigned Reg, LaneBitmask Mask) {
+ void addRegMasked(MCPhysReg Reg, LaneBitmask Mask) {
for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
LaneBitmask UnitMask = (*Unit).second;
if (UnitMask.none() || (UnitMask & Mask).any())
@@ -101,7 +101,7 @@ public:
}
/// Removes all register units covered by physical register \p Reg.
- void removeReg(unsigned Reg) {
+ void removeReg(MCPhysReg Reg) {
for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit)
Units.reset(*Unit);
}
@@ -115,7 +115,7 @@ public:
void addRegsInMask(const uint32_t *RegMask);
/// Returns true if no part of physical register \p Reg is live.
- bool available(unsigned Reg) const {
+ bool available(MCPhysReg Reg) const {
for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
if (Units.test(*Unit))
return false;
diff --git a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 7f46406c4789..98ac81915dc0 100644
--- a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -425,6 +425,7 @@ struct MachineFrameInfo {
StringValue StackProtector;
// TODO: Serialize FunctionContextIdx
unsigned MaxCallFrameSize = ~0u; ///< ~0u means: not computed yet.
+ unsigned CVBytesOfCalleeSavedRegisters = 0;
bool HasOpaqueSPAdjustment = false;
bool HasVAStart = false;
bool HasMustTailInVarArgFunc = false;
@@ -443,6 +444,8 @@ struct MachineFrameInfo {
AdjustsStack == Other.AdjustsStack && HasCalls == Other.HasCalls &&
StackProtector == Other.StackProtector &&
MaxCallFrameSize == Other.MaxCallFrameSize &&
+ CVBytesOfCalleeSavedRegisters ==
+ Other.CVBytesOfCalleeSavedRegisters &&
HasOpaqueSPAdjustment == Other.HasOpaqueSPAdjustment &&
HasVAStart == Other.HasVAStart &&
HasMustTailInVarArgFunc == Other.HasMustTailInVarArgFunc &&
@@ -465,6 +468,8 @@ template <> struct MappingTraits<MachineFrameInfo> {
YamlIO.mapOptional("stackProtector", MFI.StackProtector,
StringValue()); // Don't print it out when it's empty.
YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, (unsigned)~0);
+ YamlIO.mapOptional("cvBytesOfCalleeSavedRegisters",
+ MFI.CVBytesOfCalleeSavedRegisters, 0U);
YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment,
false);
YamlIO.mapOptional("hasVAStart", MFI.HasVAStart, false);
@@ -489,6 +494,7 @@ struct MachineFunction {
bool FailedISel = false;
// Register information
bool TracksRegLiveness = false;
+ bool HasWinCFI = false;
std::vector<VirtualRegisterDefinition> VirtualRegisters;
std::vector<MachineFunctionLiveIn> LiveIns;
Optional<std::vector<FlowStringValue>> CalleeSavedRegisters;
@@ -512,6 +518,7 @@ template <> struct MappingTraits<MachineFunction> {
YamlIO.mapOptional("selected", MF.Selected, false);
YamlIO.mapOptional("failedISel", MF.FailedISel, false);
YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false);
+ YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false);
YamlIO.mapOptional("registers", MF.VirtualRegisters,
std::vector<VirtualRegisterDefinition>());
YamlIO.mapOptional("liveins", MF.LiveIns,
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index ace33efd8713..ec2f270fcb3f 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -569,6 +569,12 @@ public:
return !empty() && back().isReturn();
}
+ /// Convenience function that returns true if the bock ends in a EH scope
+ /// return instruction.
+ bool isEHScopeReturnBlock() const {
+ return !empty() && back().isEHScopeReturn();
+ }
+
/// Split the critical edge from this block to the given successor block, and
/// return the newly created block, or null if splitting is not possible.
///
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 2d6081f3577d..c2706a21a177 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -28,9 +28,14 @@ class AllocaInst;
/// The CalleeSavedInfo class tracks the information need to locate where a
/// callee saved register is in the current frame.
+/// Callee saved reg can also be saved to a different register rather than
+/// on the stack by setting DstReg instead of FrameIdx.
class CalleeSavedInfo {
unsigned Reg;
- int FrameIdx;
+ union {
+ int FrameIdx;
+ unsigned DstReg;
+ };
/// Flag indicating whether the register is actually restored in the epilog.
/// In most cases, if a register is saved, it is also restored. There are
/// some situations, though, when this is not the case. For example, the
@@ -44,17 +49,29 @@ class CalleeSavedInfo {
/// by implicit uses on the return instructions, however, the required
/// changes in the ARM backend would be quite extensive.
bool Restored;
+ /// Flag indicating whether the register is spilled to stack or another
+ /// register.
+ bool SpilledToReg;
public:
explicit CalleeSavedInfo(unsigned R, int FI = 0)
- : Reg(R), FrameIdx(FI), Restored(true) {}
+ : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {}
// Accessors.
unsigned getReg() const { return Reg; }
int getFrameIdx() const { return FrameIdx; }
- void setFrameIdx(int FI) { FrameIdx = FI; }
+ unsigned getDstReg() const { return DstReg; }
+ void setFrameIdx(int FI) {
+ FrameIdx = FI;
+ SpilledToReg = false;
+ }
+ void setDstReg(unsigned SpillReg) {
+ DstReg = SpillReg;
+ SpilledToReg = true;
+ }
bool isRestored() const { return Restored; }
void setRestored(bool R) { Restored = R; }
+ bool isSpilledToReg() const { return SpilledToReg; }
};
/// The MachineFrameInfo class represents an abstract stack frame until
@@ -266,10 +283,14 @@ private:
/// It is only valid during and after prolog/epilog code insertion.
unsigned MaxCallFrameSize = ~0u;
+ /// The number of bytes of callee saved registers that the target wants to
+ /// report for the current function in the CodeView S_FRAMEPROC record.
+ unsigned CVBytesOfCalleeSavedRegisters = 0;
+
/// The prolog/epilog code inserter fills in this vector with each
- /// callee saved register saved in the frame. Beyond its use by the prolog/
- /// epilog code inserter, this data used for debug info and exception
- /// handling.
+ /// callee saved register saved in either the frame or a different
+ /// register. Beyond its use by the prolog/ epilog code inserter,
+ /// this data is used for debug info and exception handling.
std::vector<CalleeSavedInfo> CSInfo;
/// Has CSInfo been set yet?
@@ -603,6 +624,15 @@ public:
}
void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
+ /// Returns how many bytes of callee-saved registers the target pushed in the
+ /// prologue. Only used for debug info.
+ unsigned getCVBytesOfCalleeSavedRegisters() const {
+ return CVBytesOfCalleeSavedRegisters;
+ }
+ void setCVBytesOfCalleeSavedRegisters(unsigned S) {
+ CVBytesOfCalleeSavedRegisters = S;
+ }
+
/// Create a new object at a fixed location on the stack.
/// All fixed objects should be created before other objects are created for
/// efficiency. By default, fixed objects are not pointed to by LLVM IR
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
index e8a4d529faac..25edf5bcce51 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -58,6 +58,7 @@ class DILocalVariable;
class DILocation;
class Function;
class GlobalValue;
+class LLVMTargetMachine;
class MachineConstantPool;
class MachineFrameInfo;
class MachineFunction;
@@ -70,7 +71,6 @@ class Pass;
class PseudoSourceValueManager;
class raw_ostream;
class SlotIndexes;
-class TargetMachine;
class TargetRegisterClass;
class TargetSubtargetInfo;
struct WasmEHFuncInfo;
@@ -225,7 +225,7 @@ struct LandingPadInfo {
class MachineFunction {
const Function &F;
- const TargetMachine &Target;
+ const LLVMTargetMachine &Target;
const TargetSubtargetInfo *STI;
MCContext &Ctx;
MachineModuleInfo &MMI;
@@ -294,7 +294,7 @@ class MachineFunction {
bool HasInlineAsm = false;
/// True if any WinCFI instruction have been emitted in this function.
- Optional<bool> HasWinCFI;
+ bool HasWinCFI = false;
/// Current high-level properties of the IR of the function (e.g. is in SSA
/// form or whether registers have been allocated)
@@ -316,6 +316,9 @@ class MachineFunction {
/// Map a landing pad's EH symbol to the call site indexes.
DenseMap<MCSymbol*, SmallVector<unsigned, 4>> LPadToCallSiteMap;
+ /// Map a landing pad to its index.
+ DenseMap<const MachineBasicBlock *, unsigned> WasmLPadToIndexMap;
+
/// Map of invoke call site index values to associated begin EH_LABEL.
DenseMap<MCSymbol*, unsigned> CallSiteMap;
@@ -363,10 +366,31 @@ public:
int Slot, const DILocation *Loc)
: Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
};
+
+ class Delegate {
+ virtual void anchor();
+
+ public:
+ virtual ~Delegate() = default;
+ /// Callback after an insertion. This should not modify the MI directly.
+ virtual void MF_HandleInsertion(MachineInstr &MI) = 0;
+ /// Callback before a removal. This should not modify the MI directly.
+ virtual void MF_HandleRemoval(MachineInstr &MI) = 0;
+ };
+
+private:
+ Delegate *TheDelegate = nullptr;
+
+ // Callbacks for insertion and removal.
+ void handleInsertion(MachineInstr &MI);
+ void handleRemoval(MachineInstr &MI);
+ friend struct ilist_traits<MachineInstr>;
+
+public:
using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>;
VariableDbgInfoMapTy VariableDbgInfos;
- MachineFunction(const Function &F, const TargetMachine &Target,
+ MachineFunction(const Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI, unsigned FunctionNum,
MachineModuleInfo &MMI);
MachineFunction(const MachineFunction &) = delete;
@@ -379,6 +403,23 @@ public:
init();
}
+ /// Reset the currently registered delegate - otherwise assert.
+ void resetDelegate(Delegate *delegate) {
+ assert(TheDelegate == delegate &&
+ "Only the current delegate can perform reset!");
+ TheDelegate = nullptr;
+ }
+
+ /// Set the delegate. resetDelegate must be called before attempting
+ /// to set.
+ void setDelegate(Delegate *delegate) {
+ assert(delegate && !TheDelegate &&
+ "Attempted to set delegate to null, or to change it without "
+ "first resetting it!");
+
+ TheDelegate = delegate;
+ }
+
MachineModuleInfo &getMMI() const { return MMI; }
MCContext &getContext() const { return Ctx; }
@@ -397,7 +438,7 @@ public:
unsigned getFunctionNumber() const { return FunctionNumber; }
/// getTarget - Return the target machine this machine code is compiled with
- const TargetMachine &getTarget() const { return Target; }
+ const LLVMTargetMachine &getTarget() const { return Target; }
/// getSubtarget - Return the subtarget for which this machine code is being
/// compiled.
@@ -484,8 +525,7 @@ public:
}
bool hasWinCFI() const {
- assert(HasWinCFI.hasValue() && "HasWinCFI not set yet!");
- return *HasWinCFI;
+ return HasWinCFI;
}
void setHasWinCFI(bool v) { HasWinCFI = v; }
@@ -619,6 +659,14 @@ public:
BasicBlocks.sort(comp);
}
+ /// Return the number of \p MachineInstrs in this \p MachineFunction.
+ unsigned getInstructionCount() const {
+ unsigned InstrCount = 0;
+ for (const MachineBasicBlock &MBB : BasicBlocks)
+ InstrCount += MBB.size();
+ return InstrCount;
+ }
+
//===--------------------------------------------------------------------===//
// Internal functions used to automatically number MachineBasicBlocks
@@ -711,23 +759,14 @@ public:
/// Allocate and initialize a register mask with @p NumRegister bits.
uint32_t *allocateRegMask();
- /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand
- /// pointers. This array is owned by the MachineFunction.
- MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num);
-
- /// extractLoadMemRefs - Allocate an array and populate it with just the
- /// load information from the given MachineMemOperand sequence.
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator>
- extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
- MachineInstr::mmo_iterator End);
-
- /// extractStoreMemRefs - Allocate an array and populate it with just the
- /// store information from the given MachineMemOperand sequence.
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator>
- extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
- MachineInstr::mmo_iterator End);
+ /// Allocate and construct an extra info structure for a `MachineInstr`.
+ ///
+ /// This is allocated on the function's allocator and so lives the life of
+ /// the function.
+ MachineInstr::ExtraInfo *
+ createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
+ MCSymbol *PreInstrSymbol = nullptr,
+ MCSymbol *PostInstrSymbol = nullptr);
/// Allocate a string and populate it with the given external symbol name.
const char *createExternalSymbolName(StringRef Name);
@@ -776,7 +815,8 @@ public:
LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
/// Remap landing pad labels and remove any deleted landing pads.
- void tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap = nullptr);
+ void tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap = nullptr,
+ bool TidyIfNoBeginLabels = true);
/// Return a reference to the landing pad info for the current function.
const std::vector<LandingPadInfo> &getLandingPads() const {
@@ -788,7 +828,9 @@ public:
void addInvoke(MachineBasicBlock *LandingPad,
MCSymbol *BeginLabel, MCSymbol *EndLabel);
- /// Add a new panding pad. Returns the label ID for the landing pad entry.
+ /// Add a new panding pad, and extract the exception handling information from
+ /// the landingpad instruction. Returns the label ID for the landing pad
+ /// entry.
MCSymbol *addLandingPad(MachineBasicBlock *LandingPad);
/// Provide the catch typeinfo for a landing pad.
@@ -817,6 +859,22 @@ public:
/// Map the landing pad's EH symbol to the call site indexes.
void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites);
+ /// Map the landing pad to its index. Used for Wasm exception handling.
+ void setWasmLandingPadIndex(const MachineBasicBlock *LPad, unsigned Index) {
+ WasmLPadToIndexMap[LPad] = Index;
+ }
+
+ /// Returns true if the landing pad has an associate index in wasm EH.
+ bool hasWasmLandingPadIndex(const MachineBasicBlock *LPad) const {
+ return WasmLPadToIndexMap.count(LPad);
+ }
+
+ /// Get the index in wasm EH for a given landing pad.
+ unsigned getWasmLandingPadIndex(const MachineBasicBlock *LPad) const {
+ assert(hasWasmLandingPadIndex(LPad));
+ return WasmLPadToIndexMap.lookup(LPad);
+ }
+
/// Get the call site indexes for a landing pad EH symbol.
SmallVectorImpl<unsigned> &getCallSiteLandingPad(MCSymbol *Sym) {
assert(hasCallSiteLandingPad(Sym) &&
@@ -880,15 +938,6 @@ public:
}
};
-/// \name Exception Handling
-/// \{
-
-/// Extract the exception handling information from the landingpad instruction
-/// and add them to the specified machine module info.
-void addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB);
-
-/// \}
-
//===--------------------------------------------------------------------===//
// GraphTraits specializations for function basic block graphs (CFGs)
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
index 88e13cdf4138..ea1a2a536fc7 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -17,16 +17,20 @@
#define LLVM_CODEGEN_MACHINEINSTR_H
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/PointerSumType.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ArrayRecycler.h"
+#include "llvm/Support/TrailingObjects.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -61,7 +65,7 @@ class MachineInstr
: public ilist_node_with_parent<MachineInstr, MachineBasicBlock,
ilist_sentinel_tracking<true>> {
public:
- using mmo_iterator = MachineMemOperand **;
+ using mmo_iterator = ArrayRef<MachineMemOperand *>::iterator;
/// Flags to specify different kinds of comments to output in
/// assembly code. These flags carry semantic information not
@@ -93,8 +97,14 @@ public:
// contraction operations like fma.
FmAfn = 1 << 9, // Instruction may map to Fast math
// instrinsic approximation.
- FmReassoc = 1 << 10 // Instruction supports Fast math
+ FmReassoc = 1 << 10, // Instruction supports Fast math
// reassociation of operand order.
+ NoUWrap = 1 << 11, // Instruction supports binary operator
+ // no unsigned wrap.
+ NoSWrap = 1 << 12, // Instruction supports binary operator
+ // no signed wrap.
+ IsExact = 1 << 13 // Instruction supports division is
+ // known to be exact.
};
private:
@@ -118,14 +128,102 @@ private:
// anything other than to convey comment
// information to AsmPrinter.
- uint8_t NumMemRefs = 0; // Information on memory references.
- // Note that MemRefs == nullptr, means 'don't know', not 'no memory access'.
- // Calling code must treat missing information conservatively. If the number
- // of memory operands required to be precise exceeds the maximum value of
- // NumMemRefs - currently 256 - we remove the operands entirely. Note also
- // that this is a non-owning reference to a shared copy on write buffer owned
- // by the MachineFunction and created via MF.allocateMemRefsArray.
- mmo_iterator MemRefs = nullptr;
+ /// Internal implementation detail class that provides out-of-line storage for
+ /// extra info used by the machine instruction when this info cannot be stored
+ /// in-line within the instruction itself.
+ ///
+ /// This has to be defined eagerly due to the implementation constraints of
+ /// `PointerSumType` where it is used.
+ class ExtraInfo final
+ : TrailingObjects<ExtraInfo, MachineMemOperand *, MCSymbol *> {
+ public:
+ static ExtraInfo *create(BumpPtrAllocator &Allocator,
+ ArrayRef<MachineMemOperand *> MMOs,
+ MCSymbol *PreInstrSymbol = nullptr,
+ MCSymbol *PostInstrSymbol = nullptr) {
+ bool HasPreInstrSymbol = PreInstrSymbol != nullptr;
+ bool HasPostInstrSymbol = PostInstrSymbol != nullptr;
+ auto *Result = new (Allocator.Allocate(
+ totalSizeToAlloc<MachineMemOperand *, MCSymbol *>(
+ MMOs.size(), HasPreInstrSymbol + HasPostInstrSymbol),
+ alignof(ExtraInfo)))
+ ExtraInfo(MMOs.size(), HasPreInstrSymbol, HasPostInstrSymbol);
+
+ // Copy the actual data into the trailing objects.
+ std::copy(MMOs.begin(), MMOs.end(),
+ Result->getTrailingObjects<MachineMemOperand *>());
+
+ if (HasPreInstrSymbol)
+ Result->getTrailingObjects<MCSymbol *>()[0] = PreInstrSymbol;
+ if (HasPostInstrSymbol)
+ Result->getTrailingObjects<MCSymbol *>()[HasPreInstrSymbol] =
+ PostInstrSymbol;
+
+ return Result;
+ }
+
+ ArrayRef<MachineMemOperand *> getMMOs() const {
+ return makeArrayRef(getTrailingObjects<MachineMemOperand *>(), NumMMOs);
+ }
+
+ MCSymbol *getPreInstrSymbol() const {
+ return HasPreInstrSymbol ? getTrailingObjects<MCSymbol *>()[0] : nullptr;
+ }
+
+ MCSymbol *getPostInstrSymbol() const {
+ return HasPostInstrSymbol
+ ? getTrailingObjects<MCSymbol *>()[HasPreInstrSymbol]
+ : nullptr;
+ }
+
+ private:
+ friend TrailingObjects;
+
+ // Description of the extra info, used to interpret the actual optional
+ // data appended.
+ //
+ // Note that this is not terribly space optimized. This leaves a great deal
+ // of flexibility to fit more in here later.
+ const int NumMMOs;
+ const bool HasPreInstrSymbol;
+ const bool HasPostInstrSymbol;
+
+ // Implement the `TrailingObjects` internal API.
+ size_t numTrailingObjects(OverloadToken<MachineMemOperand *>) const {
+ return NumMMOs;
+ }
+ size_t numTrailingObjects(OverloadToken<MCSymbol *>) const {
+ return HasPreInstrSymbol + HasPostInstrSymbol;
+ }
+
+ // Just a boring constructor to allow us to initialize the sizes. Always use
+ // the `create` routine above.
+ ExtraInfo(int NumMMOs, bool HasPreInstrSymbol, bool HasPostInstrSymbol)
+ : NumMMOs(NumMMOs), HasPreInstrSymbol(HasPreInstrSymbol),
+ HasPostInstrSymbol(HasPostInstrSymbol) {}
+ };
+
+ /// Enumeration of the kinds of inline extra info available. It is important
+ /// that the `MachineMemOperand` inline kind has a tag value of zero to make
+ /// it accessible as an `ArrayRef`.
+ enum ExtraInfoInlineKinds {
+ EIIK_MMO = 0,
+ EIIK_PreInstrSymbol,
+ EIIK_PostInstrSymbol,
+ EIIK_OutOfLine
+ };
+
+ // We store extra information about the instruction here. The common case is
+ // expected to be nothing or a single pointer (typically a MMO or a symbol).
+ // We work to optimize this common case by storing it inline here rather than
+ // requiring a separate allocation, but we fall back to an allocation when
+ // multiple pointers are needed.
+ PointerSumType<ExtraInfoInlineKinds,
+ PointerSumTypeMember<EIIK_MMO, MachineMemOperand *>,
+ PointerSumTypeMember<EIIK_PreInstrSymbol, MCSymbol *>,
+ PointerSumTypeMember<EIIK_PostInstrSymbol, MCSymbol *>,
+ PointerSumTypeMember<EIIK_OutOfLine, ExtraInfo *>>
+ Info;
DebugLoc debugLoc; // Source line information.
@@ -310,7 +408,7 @@ public:
/// Returns the opcode of this MachineInstr.
unsigned getOpcode() const { return MCID->Opcode; }
- /// Access to explicit operands of the instruction.
+ /// Retuns the total number of operands.
unsigned getNumOperands() const { return NumOperands; }
const MachineOperand& getOperand(unsigned i) const {
@@ -412,28 +510,70 @@ public:
return I - operands_begin();
}
- /// Access to memory operands of the instruction
- mmo_iterator memoperands_begin() const { return MemRefs; }
- mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
+ /// Access to memory operands of the instruction. If there are none, that does
+ /// not imply anything about whether the function accesses memory. Instead,
+ /// the caller must behave conservatively.
+ ArrayRef<MachineMemOperand *> memoperands() const {
+ if (!Info)
+ return {};
+
+ if (Info.is<EIIK_MMO>())
+ return makeArrayRef(Info.getAddrOfZeroTagPointer(), 1);
+
+ if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>())
+ return EI->getMMOs();
+
+ return {};
+ }
+
+ /// Access to memory operands of the instruction.
+ ///
+ /// If `memoperands_begin() == memoperands_end()`, that does not imply
+ /// anything about whether the function accesses memory. Instead, the caller
+ /// must behave conservatively.
+ mmo_iterator memoperands_begin() const { return memoperands().begin(); }
+
+ /// Access to memory operands of the instruction.
+ ///
+ /// If `memoperands_begin() == memoperands_end()`, that does not imply
+ /// anything about whether the function accesses memory. Instead, the caller
+ /// must behave conservatively.
+ mmo_iterator memoperands_end() const { return memoperands().end(); }
+
/// Return true if we don't have any memory operands which described the
/// memory access done by this instruction. If this is true, calling code
/// must be conservative.
- bool memoperands_empty() const { return NumMemRefs == 0; }
-
- iterator_range<mmo_iterator> memoperands() {
- return make_range(memoperands_begin(), memoperands_end());
- }
- iterator_range<mmo_iterator> memoperands() const {
- return make_range(memoperands_begin(), memoperands_end());
- }
+ bool memoperands_empty() const { return memoperands().empty(); }
/// Return true if this instruction has exactly one MachineMemOperand.
- bool hasOneMemOperand() const {
- return NumMemRefs == 1;
- }
+ bool hasOneMemOperand() const { return memoperands().size() == 1; }
/// Return the number of memory operands.
- unsigned getNumMemOperands() const { return NumMemRefs; }
+ unsigned getNumMemOperands() const { return memoperands().size(); }
+
+ /// Helper to extract a pre-instruction symbol if one has been added.
+ MCSymbol *getPreInstrSymbol() const {
+ if (!Info)
+ return nullptr;
+ if (MCSymbol *S = Info.get<EIIK_PreInstrSymbol>())
+ return S;
+ if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>())
+ return EI->getPreInstrSymbol();
+
+ return nullptr;
+ }
+
+ /// Helper to extract a post-instruction symbol if one has been added.
+ MCSymbol *getPostInstrSymbol() const {
+ if (!Info)
+ return nullptr;
+ if (MCSymbol *S = Info.get<EIIK_PostInstrSymbol>())
+ return S;
+ if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>())
+ return EI->getPostInstrSymbol();
+
+ return nullptr;
+ }
/// API for querying MachineInstr properties. They are the same as MCInstrDesc
/// queries but they are bundle aware.
@@ -450,6 +590,8 @@ public:
/// The second argument indicates whether the query should look inside
/// instruction bundles.
bool hasProperty(unsigned MCFlag, QueryType Type = AnyInBundle) const {
+ assert(MCFlag < 64 &&
+ "MCFlag out of range for bit mask in getFlags/hasPropertyInBundle.");
// Inline the fast path for unbundled or bundle-internal instructions.
if (Type == IgnoreBundle || !isBundled() || isBundledWithPred())
return getDesc().getFlags() & (1ULL << MCFlag);
@@ -482,6 +624,12 @@ public:
return hasProperty(MCID::Return, Type);
}
+ /// Return true if this is an instruction that marks the end of an EH scope,
+ /// i.e., a catchpad or a cleanuppad instruction.
+ bool isEHScopeReturn(QueryType Type = AnyInBundle) const {
+ return hasProperty(MCID::EHScopeReturn, Type);
+ }
+
bool isCall(QueryType Type = AnyInBundle) const {
return hasProperty(MCID::Call, Type);
}
@@ -1323,47 +1471,63 @@ public:
/// fewer operand than it started with.
void RemoveOperand(unsigned OpNo);
+ /// Clear this MachineInstr's memory reference descriptor list. This resets
+ /// the memrefs to their most conservative state. This should be used only
+ /// as a last resort since it greatly pessimizes our knowledge of the memory
+ /// access performed by the instruction.
+ void dropMemRefs(MachineFunction &MF);
+
+ /// Assign this MachineInstr's memory reference descriptor list.
+ ///
+ /// Unlike other methods, this *will* allocate them into a new array
+ /// associated with the provided `MachineFunction`.
+ void setMemRefs(MachineFunction &MF, ArrayRef<MachineMemOperand *> MemRefs);
+
/// Add a MachineMemOperand to the machine instruction.
/// This function should be used only occasionally. The setMemRefs function
/// is the primary method for setting up a MachineInstr's MemRefs list.
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO);
- /// Assign this MachineInstr's memory reference descriptor list.
- /// This does not transfer ownership.
- void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
- setMemRefs(std::make_pair(NewMemRefs, NewMemRefsEnd-NewMemRefs));
- }
+ /// Clone another MachineInstr's memory reference descriptor list and replace
+ /// ours with it.
+ ///
+ /// Note that `*this` may be the incoming MI!
+ ///
+ /// Prefer this API whenever possible as it can avoid allocations in common
+ /// cases.
+ void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI);
- /// Assign this MachineInstr's memory reference descriptor list. First
- /// element in the pair is the begin iterator/pointer to the array; the
- /// second is the number of MemoryOperands. This does not transfer ownership
- /// of the underlying memory.
- void setMemRefs(std::pair<mmo_iterator, unsigned> NewMemRefs) {
- MemRefs = NewMemRefs.first;
- NumMemRefs = uint8_t(NewMemRefs.second);
- assert(NumMemRefs == NewMemRefs.second &&
- "Too many memrefs - must drop memory operands");
- }
+ /// Clone the merge of multiple MachineInstrs' memory reference descriptors
+ /// list and replace ours with it.
+ ///
+ /// Note that `*this` may be one of the incoming MIs!
+ ///
+ /// Prefer this API whenever possible as it can avoid allocations in common
+ /// cases.
+ void cloneMergedMemRefs(MachineFunction &MF,
+ ArrayRef<const MachineInstr *> MIs);
- /// Return a set of memrefs (begin iterator, size) which conservatively
- /// describe the memory behavior of both MachineInstrs. This is appropriate
- /// for use when merging two MachineInstrs into one. This routine does not
- /// modify the memrefs of the this MachineInstr.
- std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other);
+ /// Set a symbol that will be emitted just prior to the instruction itself.
+ ///
+ /// Setting this to a null pointer will remove any such symbol.
+ ///
+ /// FIXME: This is not fully implemented yet.
+ void setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol);
+
+ /// Set a symbol that will be emitted just after the instruction itself.
+ ///
+ /// Setting this to a null pointer will remove any such symbol.
+ ///
+ /// FIXME: This is not fully implemented yet.
+ void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol);
/// Return the MIFlags which represent both MachineInstrs. This
/// should be used when merging two MachineInstrs into one. This routine does
/// not modify the MIFlags of this MachineInstr.
uint16_t mergeFlagsWith(const MachineInstr& Other) const;
- /// Clear this MachineInstr's memory reference descriptor list. This resets
- /// the memrefs to their most conservative state. This should be used only
- /// as a last resort since it greatly pessimizes our knowledge of the memory
- /// access performed by the instruction.
- void dropMemRefs() {
- MemRefs = nullptr;
- NumMemRefs = 0;
- }
+ /// Copy all flags to MachineInst MIFlags
+ void copyIRFlags(const Instruction &I);
/// Break any tie involving OpIdx.
void untieRegOperand(unsigned OpIdx) {
@@ -1377,6 +1541,13 @@ public:
/// Add all implicit def and use operands to this instruction.
void addImplicitDefUseOperands(MachineFunction &MF);
+ /// Scan instructions following MI and collect any matching DBG_VALUEs.
+ void collectDebugValues(SmallVectorImpl<MachineInstr *> &DbgValues);
+
+ /// Find all DBG_VALUEs immediately following this instruction that point
+ /// to a register def in this instruction and point them to \p Reg instead.
+ void changeDebugValuesDefReg(unsigned Reg);
+
private:
/// If this instruction is embedded into a MachineFunction, return the
/// MachineRegisterInfo object for the current function, otherwise
@@ -1394,7 +1565,7 @@ private:
void AddRegOperandsToUseLists(MachineRegisterInfo&);
/// Slow path for hasProperty when we're dealing with a bundle.
- bool hasPropertyInBundle(unsigned Mask, QueryType Type) const;
+ bool hasPropertyInBundle(uint64_t Mask, QueryType Type) const;
/// Implements the logic of getRegClassConstraintEffectForVReg for the
/// this MI and the given operand index \p OpIdx.
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index 665608755741..b5e523f655e7 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -191,15 +191,20 @@ public:
return *this;
}
- const MachineInstrBuilder &setMemRefs(MachineInstr::mmo_iterator b,
- MachineInstr::mmo_iterator e) const {
- MI->setMemRefs(b, e);
+ const MachineInstrBuilder &
+ setMemRefs(ArrayRef<MachineMemOperand *> MMOs) const {
+ MI->setMemRefs(*MF, MMOs);
return *this;
}
- const MachineInstrBuilder &setMemRefs(std::pair<MachineInstr::mmo_iterator,
- unsigned> MemOperandsRef) const {
- MI->setMemRefs(MemOperandsRef);
+ const MachineInstrBuilder &cloneMemRefs(const MachineInstr &OtherMI) const {
+ MI->cloneMemRefs(*MF, OtherMI);
+ return *this;
+ }
+
+ const MachineInstrBuilder &
+ cloneMergedMemRefs(ArrayRef<const MachineInstr *> OtherMIs) const {
+ MI->cloneMergedMemRefs(*MF, OtherMIs);
return *this;
}
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index 554e89019b76..4371420bc7a2 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -46,10 +46,10 @@ namespace llvm {
class BasicBlock;
class CallInst;
class Function;
-class MachineFunction;
+class LLVMTargetMachine;
class MMIAddrLabelMap;
+class MachineFunction;
class Module;
-class TargetMachine;
//===----------------------------------------------------------------------===//
/// This class can be derived from and used by targets to hold private
@@ -76,7 +76,7 @@ protected:
/// for specific use.
///
class MachineModuleInfo : public ImmutablePass {
- const TargetMachine &TM;
+ const LLVMTargetMachine &TM;
/// This is the MCContext used for the entire code generator.
MCContext Context;
@@ -145,7 +145,7 @@ class MachineModuleInfo : public ImmutablePass {
public:
static char ID; // Pass identification, replacement for typeid
- explicit MachineModuleInfo(const TargetMachine *TM = nullptr);
+ explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr);
~MachineModuleInfo() override;
// Initialization and Finalization
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
index 6a87fa2fbf00..17df1fa792b7 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -80,6 +80,28 @@ public:
SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
};
+/// MachineModuleInfoCOFF - This is a MachineModuleInfoImpl implementation
+/// for COFF targets.
+class MachineModuleInfoCOFF : public MachineModuleInfoImpl {
+ /// GVStubs - These stubs are used to materialize global addresses in PIC
+ /// mode.
+ DenseMap<MCSymbol *, StubValueTy> GVStubs;
+
+ virtual void anchor(); // Out of line virtual method.
+
+public:
+ MachineModuleInfoCOFF(const MachineModuleInfo &) {}
+
+ StubValueTy &getGVStubEntry(MCSymbol *Sym) {
+ assert(Sym && "Key cannot be null");
+ return GVStubs[Sym];
+ }
+
+ /// Accessor methods to return the set of stubs in sorted order.
+
+ SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
+};
+
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h b/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h
index 95bfc24b57ff..bfd1e994053a 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -61,9 +61,6 @@ public:
/// \p OutlinedFunctions.
unsigned FunctionIdx;
- /// Set to false if the candidate overlapped with another candidate.
- bool InCandidateList = true;
-
/// Identifier denoting the instructions to emit to call an outlined function
/// from this point. Defined by the target.
unsigned CallConstructionID;
@@ -82,6 +79,12 @@ public:
/// been used across the sequence.
LiveRegUnits UsedInSequence;
+ /// Target-specific flags for this Candidate's MBB.
+ unsigned Flags = 0x0;
+
+ /// True if initLRU has been called on this Candidate.
+ bool LRUWasSet = false;
+
/// Return the number of instructions in this Candidate.
unsigned getLength() const { return Len; }
@@ -99,9 +102,7 @@ public:
}
/// Returns the call overhead of this candidate if it is in the list.
- unsigned getCallOverhead() const {
- return InCandidateList ? CallOverhead : 0;
- }
+ unsigned getCallOverhead() const { return CallOverhead; }
MachineBasicBlock::iterator &front() { return FirstInst; }
MachineBasicBlock::iterator &back() { return LastInst; }
@@ -120,9 +121,9 @@ public:
Candidate(unsigned StartIdx, unsigned Len,
MachineBasicBlock::iterator &FirstInst,
MachineBasicBlock::iterator &LastInst, MachineBasicBlock *MBB,
- unsigned FunctionIdx)
+ unsigned FunctionIdx, unsigned Flags)
: StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst),
- MBB(MBB), FunctionIdx(FunctionIdx) {}
+ MBB(MBB), FunctionIdx(FunctionIdx), Flags(Flags) {}
Candidate() {}
/// Used to ensure that \p Candidates are outlined in an order that
@@ -138,6 +139,10 @@ public:
void initLRU(const TargetRegisterInfo &TRI) {
assert(MBB->getParent()->getRegInfo().tracksLiveness() &&
"Candidate's Machine Function must track liveness");
+ // Only initialize once.
+ if (LRUWasSet)
+ return;
+ LRUWasSet = true;
LRU.init(TRI);
LRU.addLiveOuts(*MBB);
@@ -158,24 +163,13 @@ public:
/// class of candidate.
struct OutlinedFunction {
-private:
- /// The number of candidates for this \p OutlinedFunction.
- unsigned OccurrenceCount = 0;
-
public:
- std::vector<std::shared_ptr<Candidate>> Candidates;
+ std::vector<Candidate> Candidates;
/// The actual outlined function created.
/// This is initialized after we go through and create the actual function.
MachineFunction *MF = nullptr;
- /// A number assigned to this function which appears at the end of its name.
- unsigned Name;
-
- /// The sequence of integers corresponding to the instructions in this
- /// function.
- std::vector<unsigned> Sequence;
-
/// Represents the size of a sequence in bytes. (Some instructions vary
/// widely in size, so just counting the instructions isn't very useful.)
unsigned SequenceSize;
@@ -187,49 +181,41 @@ public:
unsigned FrameConstructionID;
/// Return the number of candidates for this \p OutlinedFunction.
- unsigned getOccurrenceCount() { return OccurrenceCount; }
-
- /// Decrement the occurrence count of this OutlinedFunction and return the
- /// new count.
- unsigned decrement() {
- assert(OccurrenceCount > 0 && "Can't decrement an empty function!");
- OccurrenceCount--;
- return getOccurrenceCount();
- }
+ unsigned getOccurrenceCount() const { return Candidates.size(); }
/// Return the number of bytes it would take to outline this
/// function.
- unsigned getOutliningCost() {
+ unsigned getOutliningCost() const {
unsigned CallOverhead = 0;
- for (std::shared_ptr<Candidate> &C : Candidates)
- CallOverhead += C->getCallOverhead();
+ for (const Candidate &C : Candidates)
+ CallOverhead += C.getCallOverhead();
return CallOverhead + SequenceSize + FrameOverhead;
}
/// Return the size in bytes of the unoutlined sequences.
- unsigned getNotOutlinedCost() { return OccurrenceCount * SequenceSize; }
+ unsigned getNotOutlinedCost() const {
+ return getOccurrenceCount() * SequenceSize;
+ }
/// Return the number of instructions that would be saved by outlining
/// this function.
- unsigned getBenefit() {
+ unsigned getBenefit() const {
unsigned NotOutlinedCost = getNotOutlinedCost();
unsigned OutlinedCost = getOutliningCost();
return (NotOutlinedCost < OutlinedCost) ? 0
: NotOutlinedCost - OutlinedCost;
}
- OutlinedFunction(std::vector<Candidate> &Cands,
- unsigned SequenceSize, unsigned FrameOverhead,
- unsigned FrameConstructionID)
- : SequenceSize(SequenceSize), FrameOverhead(FrameOverhead),
- FrameConstructionID(FrameConstructionID) {
- OccurrenceCount = Cands.size();
- for (Candidate &C : Cands)
- Candidates.push_back(std::make_shared<outliner::Candidate>(C));
-
- unsigned B = getBenefit();
- for (std::shared_ptr<Candidate> &C : Candidates)
- C->Benefit = B;
+ /// Return the number of instructions in this sequence.
+ unsigned getNumInstrs() const { return Candidates[0].getLength(); }
+
+ OutlinedFunction(std::vector<Candidate> &Candidates, unsigned SequenceSize,
+ unsigned FrameOverhead, unsigned FrameConstructionID)
+ : Candidates(Candidates), SequenceSize(SequenceSize),
+ FrameOverhead(FrameOverhead), FrameConstructionID(FrameConstructionID) {
+ const unsigned B = getBenefit();
+ for (Candidate &C : Candidates)
+ C.Benefit = B;
}
OutlinedFunction() {}
diff --git a/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h b/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
index 3aba0bba7d1a..a031c92d914f 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
@@ -24,22 +24,20 @@
namespace llvm {
-using MachinePassCtor = void *(*)();
-
//===----------------------------------------------------------------------===//
///
/// MachinePassRegistryListener - Listener to adds and removals of nodes in
/// registration list.
///
//===----------------------------------------------------------------------===//
-class MachinePassRegistryListener {
- virtual void anchor();
+template <class PassCtorTy> class MachinePassRegistryListener {
+ virtual void anchor() {}
public:
MachinePassRegistryListener() = default;
virtual ~MachinePassRegistryListener() = default;
- virtual void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) = 0;
+ virtual void NotifyAdd(StringRef N, PassCtorTy C, StringRef D) = 0;
virtual void NotifyRemove(StringRef N) = 0;
};
@@ -48,15 +46,15 @@ public:
/// MachinePassRegistryNode - Machine pass node stored in registration list.
///
//===----------------------------------------------------------------------===//
-class MachinePassRegistryNode {
+template <typename PassCtorTy> class MachinePassRegistryNode {
private:
MachinePassRegistryNode *Next = nullptr; // Next function pass in list.
StringRef Name; // Name of function pass.
StringRef Description; // Description string.
- MachinePassCtor Ctor; // Function pass creator.
+ PassCtorTy Ctor; // Pass creator.
public:
- MachinePassRegistryNode(const char *N, const char *D, MachinePassCtor C)
+ MachinePassRegistryNode(const char *N, const char *D, PassCtorTy C)
: Name(N), Description(D), Ctor(C) {}
// Accessors
@@ -64,7 +62,7 @@ public:
MachinePassRegistryNode **getNextAddress() { return &Next; }
StringRef getName() const { return Name; }
StringRef getDescription() const { return Description; }
- MachinePassCtor getCtor() const { return Ctor; }
+ PassCtorTy getCtor() const { return Ctor; }
void setNext(MachinePassRegistryNode *N) { Next = N; }
};
@@ -73,11 +71,12 @@ public:
/// MachinePassRegistry - Track the registration of machine passes.
///
//===----------------------------------------------------------------------===//
-class MachinePassRegistry {
+template <typename PassCtorTy> class MachinePassRegistry {
private:
- MachinePassRegistryNode *List; // List of registry nodes.
- MachinePassCtor Default; // Default function pass creator.
- MachinePassRegistryListener *Listener; // Listener for list adds are removes.
+ MachinePassRegistryNode<PassCtorTy> *List; // List of registry nodes.
+ PassCtorTy Default; // Default function pass creator.
+ MachinePassRegistryListener<PassCtorTy>
+ *Listener; // Listener for list adds are removes.
public:
// NO CONSTRUCTOR - we don't want static constructor ordering to mess
@@ -85,19 +84,47 @@ public:
// Accessors.
//
- MachinePassRegistryNode *getList() { return List; }
- MachinePassCtor getDefault() { return Default; }
- void setDefault(MachinePassCtor C) { Default = C; }
- void setDefault(StringRef Name);
- void setListener(MachinePassRegistryListener *L) { Listener = L; }
+ MachinePassRegistryNode<PassCtorTy> *getList() { return List; }
+ PassCtorTy getDefault() { return Default; }
+ void setDefault(PassCtorTy C) { Default = C; }
+ /// setDefault - Set the default constructor by name.
+ void setDefault(StringRef Name) {
+ PassCtorTy Ctor = nullptr;
+ for (MachinePassRegistryNode<PassCtorTy> *R = getList(); R;
+ R = R->getNext()) {
+ if (R->getName() == Name) {
+ Ctor = R->getCtor();
+ break;
+ }
+ }
+ assert(Ctor && "Unregistered pass name");
+ setDefault(Ctor);
+ }
+ void setListener(MachinePassRegistryListener<PassCtorTy> *L) { Listener = L; }
/// Add - Adds a function pass to the registration list.
///
- void Add(MachinePassRegistryNode *Node);
+ void Add(MachinePassRegistryNode<PassCtorTy> *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener)
+ Listener->NotifyAdd(Node->getName(), Node->getCtor(),
+ Node->getDescription());
+ }
/// Remove - Removes a function pass from the registration list.
///
- void Remove(MachinePassRegistryNode *Node);
+ void Remove(MachinePassRegistryNode<PassCtorTy> *Node) {
+ for (MachinePassRegistryNode<PassCtorTy> **I = &List; *I;
+ I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener)
+ Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+ }
};
//===----------------------------------------------------------------------===//
@@ -105,9 +132,11 @@ public:
/// RegisterPassParser class - Handle the addition of new machine passes.
///
//===----------------------------------------------------------------------===//
-template<class RegistryClass>
-class RegisterPassParser : public MachinePassRegistryListener,
- public cl::parser<typename RegistryClass::FunctionPassCtor> {
+template <class RegistryClass>
+class RegisterPassParser
+ : public MachinePassRegistryListener<
+ typename RegistryClass::FunctionPassCtor>,
+ public cl::parser<typename RegistryClass::FunctionPassCtor> {
public:
RegisterPassParser(cl::Option &O)
: cl::parser<typename RegistryClass::FunctionPassCtor>(O) {}
@@ -129,8 +158,9 @@ public:
}
// Implement the MachinePassRegistryListener callbacks.
- void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) override {
- this->addLiteralOption(N, (typename RegistryClass::FunctionPassCtor)C, D);
+ void NotifyAdd(StringRef N, typename RegistryClass::FunctionPassCtor C,
+ StringRef D) override {
+ this->addLiteralOption(N, C, D);
}
void NotifyRemove(StringRef N) override {
this->removeLiteralOption(N);
diff --git a/contrib/llvm/include/llvm/CodeGen/MachinePipeliner.h b/contrib/llvm/include/llvm/CodeGen/MachinePipeliner.h
new file mode 100644
index 000000000000..38cb33e90e63
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -0,0 +1,608 @@
+//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
+//
+// Software pipelining (SWP) is an instruction scheduling technique for loops
+// that overlap loop iterations and exploits ILP via a compiler transformation.
+//
+// Swing Modulo Scheduling is an implementation of software pipelining
+// that generates schedules that are near optimal in terms of initiation
+// interval, register requirements, and stage count. See the papers:
+//
+// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
+// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
+// Conference on Parallel Architectures and Compilation Techiniques.
+//
+// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
+// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
+// Transactions on Computers, Vol. 50, No. 3, 2001.
+//
+// "An Implementation of Swing Modulo Scheduling With Extensions for
+// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
+// Urbana-Champaign, 2005.
+//
+//
+// The SMS algorithm consists of three main steps after computing the minimal
+// initiation interval (MII).
+// 1) Analyze the dependence graph and compute information about each
+// instruction in the graph.
+// 2) Order the nodes (instructions) by priority based upon the heuristics
+// described in the algorithm.
+// 3) Attempt to schedule the nodes in the specified order using the MII.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
+#define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+namespace llvm {
+
+class NodeSet;
+class SMSchedule;
+
+extern cl::opt<bool> SwpEnableCopyToPhi;
+
+/// The main class in the implementation of the target independent
+/// software pipeliner pass.
+class MachinePipeliner : public MachineFunctionPass {
+public:
+ MachineFunction *MF = nullptr;
+ const MachineLoopInfo *MLI = nullptr;
+ const MachineDominatorTree *MDT = nullptr;
+ const InstrItineraryData *InstrItins;
+ const TargetInstrInfo *TII = nullptr;
+ RegisterClassInfo RegClassInfo;
+
+#ifndef NDEBUG
+ static int NumTries;
+#endif
+
+ /// Cache the target analysis information about the loop.
+ struct LoopInfo {
+ MachineBasicBlock *TBB = nullptr;
+ MachineBasicBlock *FBB = nullptr;
+ SmallVector<MachineOperand, 4> BrCond;
+ MachineInstr *LoopInductionVar = nullptr;
+ MachineInstr *LoopCompare = nullptr;
+ };
+ LoopInfo LI;
+
+ static char ID;
+
+ MachinePipeliner() : MachineFunctionPass(ID) {
+ initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ void preprocessPhiNodes(MachineBasicBlock &B);
+ bool canPipelineLoop(MachineLoop &L);
+ bool scheduleLoop(MachineLoop &L);
+ bool swingModuloScheduler(MachineLoop &L);
+};
+
+/// This class builds the dependence graph for the instructions in a loop,
+/// and attempts to schedule the instructions using the SMS algorithm.
+class SwingSchedulerDAG : public ScheduleDAGInstrs {
+ MachinePipeliner &Pass;
+ /// The minimum initiation interval between iterations for this schedule.
+ unsigned MII = 0;
+ /// Set to true if a valid pipelined schedule is found for the loop.
+ bool Scheduled = false;
+ MachineLoop &Loop;
+ LiveIntervals &LIS;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// A toplogical ordering of the SUnits, which is needed for changing
+ /// dependences and iterating over the SUnits.
+ ScheduleDAGTopologicalSort Topo;
+
+ struct NodeInfo {
+ int ASAP = 0;
+ int ALAP = 0;
+ int ZeroLatencyDepth = 0;
+ int ZeroLatencyHeight = 0;
+
+ NodeInfo() = default;
+ };
+ /// Computed properties for each node in the graph.
+ std::vector<NodeInfo> ScheduleInfo;
+
+ enum OrderKind { BottomUp = 0, TopDown = 1 };
+ /// Computed node ordering for scheduling.
+ SetVector<SUnit *> NodeOrder;
+
+ using NodeSetType = SmallVector<NodeSet, 8>;
+ using ValueMapTy = DenseMap<unsigned, unsigned>;
+ using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
+ using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
+
+ /// Instructions to change when emitting the final schedule.
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
+
+ /// We may create a new instruction, so remember it because it
+ /// must be deleted when the pass is finished.
+ SmallPtrSet<MachineInstr *, 4> NewMIs;
+
+ /// Ordered list of DAG postprocessing steps.
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
+ /// Helper class to implement Johnson's circuit finding algorithm.
+ class Circuits {
+ std::vector<SUnit> &SUnits;
+ SetVector<SUnit *> Stack;
+ BitVector Blocked;
+ SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
+ SmallVector<SmallVector<int, 4>, 16> AdjK;
+ // Node to Index from ScheduleDAGTopologicalSort
+ std::vector<int> *Node2Idx;
+ unsigned NumPaths;
+ static unsigned MaxPaths;
+
+ public:
+ Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
+ : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
+ Node2Idx = new std::vector<int>(SUs.size());
+ unsigned Idx = 0;
+ for (const auto &NodeNum : Topo)
+ Node2Idx->at(NodeNum) = Idx++;
+ }
+
+ ~Circuits() { delete Node2Idx; }
+
+ /// Reset the data structures used in the circuit algorithm.
+ void reset() {
+ Stack.clear();
+ Blocked.reset();
+ B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
+ NumPaths = 0;
+ }
+
+ void createAdjacencyStructure(SwingSchedulerDAG *DAG);
+ bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
+ void unblock(int U);
+ };
+
+ struct CopyToPhiMutation : public ScheduleDAGMutation {
+ void apply(ScheduleDAGInstrs *DAG) override;
+ };
+
+public:
+ SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
+ const RegisterClassInfo &rci)
+ : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
+ RegClassInfo(rci), Topo(SUnits, &ExitSU) {
+ P.MF->getSubtarget().getSMSMutations(Mutations);
+ if (SwpEnableCopyToPhi)
+ Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
+ }
+
+ void schedule() override;
+ void finishBlock() override;
+
+ /// Return true if the loop kernel has been scheduled.
+ bool hasNewSchedule() { return Scheduled; }
+
+ /// Return the earliest time an instruction may be scheduled.
+ int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
+
+ /// Return the latest time an instruction my be scheduled.
+ int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
+
+ /// The mobility function, which the number of slots in which
+ /// an instruction may be scheduled.
+ int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
+
+ /// The depth, in the dependence graph, for a node.
+ unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
+
+ /// The maximum unweighted length of a path from an arbitrary node to the
+ /// given node in which each edge has latency 0
+ int getZeroLatencyDepth(SUnit *Node) {
+ return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
+ }
+
+ /// The height, in the dependence graph, for a node.
+ unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
+
+ /// The maximum unweighted length of a path from the given node to an
+ /// arbitrary node in which each edge has latency 0
+ int getZeroLatencyHeight(SUnit *Node) {
+ return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
+ }
+
+ /// Return true if the dependence is a back-edge in the data dependence graph.
+ /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
+ /// using an anti dependence from a Phi to an instruction.
+ bool isBackedge(SUnit *Source, const SDep &Dep) {
+ if (Dep.getKind() != SDep::Anti)
+ return false;
+ return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
+ }
+
+ bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
+
+ /// The distance function, which indicates that operation V of iteration I
+ /// depends on operations U of iteration I-distance.
+ unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
+ // Instructions that feed a Phi have a distance of 1. Computing larger
+ // values for arrays requires data dependence information.
+ if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
+ return 1;
+ return 0;
+ }
+
+ /// Set the Minimum Initiation Interval for this schedule attempt.
+ void setMII(unsigned mii) { MII = mii; }
+
+ void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
+
+ void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
+
+ /// Return the new base register that was stored away for the changed
+ /// instruction.
+ unsigned getInstrBaseReg(SUnit *SU) {
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(SU);
+ if (It != InstrChanges.end())
+ return It->second.first;
+ return 0;
+ }
+
+ void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
+ Mutations.push_back(std::move(Mutation));
+ }
+
+ static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
+
+private:
+ void addLoopCarriedDependences(AliasAnalysis *AA);
+ void updatePhiDependences();
+ void changeDependences();
+ unsigned calculateResMII();
+ unsigned calculateRecMII(NodeSetType &RecNodeSets);
+ void findCircuits(NodeSetType &NodeSets);
+ void fuseRecs(NodeSetType &NodeSets);
+ void removeDuplicateNodes(NodeSetType &NodeSets);
+ void computeNodeFunctions(NodeSetType &NodeSets);
+ void registerPressureFilter(NodeSetType &NodeSets);
+ void colocateNodeSets(NodeSetType &NodeSets);
+ void checkNodeSets(NodeSetType &NodeSets);
+ void groupRemainingNodes(NodeSetType &NodeSets);
+ void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+ SetVector<SUnit *> &NodesAdded);
+ void computeNodeOrder(NodeSetType &NodeSets);
+ void checkValidNodeOrder(const NodeSetType &Circuits) const;
+ bool schedulePipeline(SMSchedule &Schedule);
+ void generatePipelinedLoop(SMSchedule &Schedule);
+ void generateProlog(SMSchedule &Schedule, unsigned LastStage,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+ MBBVectorTy &PrologBBs);
+ void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+ MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
+ void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ SMSchedule &Schedule, ValueMapTy *VRMap,
+ InstrMapTy &InstrMap, unsigned LastStageNum,
+ unsigned CurStageNum, bool IsLast);
+ void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ SMSchedule &Schedule, ValueMapTy *VRMap,
+ InstrMapTy &InstrMap, unsigned LastStageNum,
+ unsigned CurStageNum, bool IsLast);
+ void removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs);
+ void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+ SMSchedule &Schedule);
+ void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
+ ValueMapTy *VRMap);
+ bool computeDelta(MachineInstr &MI, unsigned &Delta);
+ void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
+ unsigned Num);
+ MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum);
+ MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum,
+ SMSchedule &Schedule);
+ void updateInstruction(MachineInstr *NewMI, bool LastDef,
+ unsigned CurStageNum, unsigned InstrStageNum,
+ SMSchedule &Schedule, ValueMapTy *VRMap);
+ MachineInstr *findDefInLoop(unsigned Reg);
+ unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
+ unsigned LoopStage, ValueMapTy *VRMap,
+ MachineBasicBlock *BB);
+ void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
+ SMSchedule &Schedule, ValueMapTy *VRMap,
+ InstrMapTy &InstrMap);
+ void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
+ InstrMapTy &InstrMap, unsigned CurStageNum,
+ unsigned PhiNum, MachineInstr *Phi,
+ unsigned OldReg, unsigned NewReg,
+ unsigned PrevReg = 0);
+ bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
+ unsigned &OffsetPos, unsigned &NewBase,
+ int64_t &NewOffset);
+ void postprocessDAG();
+};
+
+/// A NodeSet contains a set of SUnit DAG nodes with additional information
+/// that assigns a priority to the set.
+class NodeSet {
+ SetVector<SUnit *> Nodes;
+ bool HasRecurrence = false;
+ unsigned RecMII = 0;
+ int MaxMOV = 0;
+ unsigned MaxDepth = 0;
+ unsigned Colocate = 0;
+ SUnit *ExceedPressure = nullptr;
+ unsigned Latency = 0;
+
+public:
+ using iterator = SetVector<SUnit *>::const_iterator;
+
+ NodeSet() = default;
+ NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
+ Latency = 0;
+ for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
+ for (const SDep &Succ : Nodes[i]->Succs)
+ if (Nodes.count(Succ.getSUnit()))
+ Latency += Succ.getLatency();
+ }
+
+ bool insert(SUnit *SU) { return Nodes.insert(SU); }
+
+ void insert(iterator S, iterator E) { Nodes.insert(S, E); }
+
+ template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
+ return Nodes.remove_if(P);
+ }
+
+ unsigned count(SUnit *SU) const { return Nodes.count(SU); }
+
+ bool hasRecurrence() { return HasRecurrence; };
+
+ unsigned size() const { return Nodes.size(); }
+
+ bool empty() const { return Nodes.empty(); }
+
+ SUnit *getNode(unsigned i) const { return Nodes[i]; };
+
+ void setRecMII(unsigned mii) { RecMII = mii; };
+
+ void setColocate(unsigned c) { Colocate = c; };
+
+ void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
+
+ bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
+
+ int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
+
+ int getRecMII() { return RecMII; }
+
+ /// Summarize node functions for the entire node set.
+ void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
+ for (SUnit *SU : *this) {
+ MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
+ MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
+ }
+ }
+
+ unsigned getLatency() { return Latency; }
+
+ unsigned getMaxDepth() { return MaxDepth; }
+
+ void clear() {
+ Nodes.clear();
+ RecMII = 0;
+ HasRecurrence = false;
+ MaxMOV = 0;
+ MaxDepth = 0;
+ Colocate = 0;
+ ExceedPressure = nullptr;
+ }
+
+ operator SetVector<SUnit *> &() { return Nodes; }
+
+ /// Sort the node sets by importance. First, rank them by recurrence MII,
+ /// then by mobility (least mobile done first), and finally by depth.
+ /// Each node set may contain a colocate value which is used as the first
+ /// tie breaker, if it's set.
+ bool operator>(const NodeSet &RHS) const {
+ if (RecMII == RHS.RecMII) {
+ if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
+ return Colocate < RHS.Colocate;
+ if (MaxMOV == RHS.MaxMOV)
+ return MaxDepth > RHS.MaxDepth;
+ return MaxMOV < RHS.MaxMOV;
+ }
+ return RecMII > RHS.RecMII;
+ }
+
+ bool operator==(const NodeSet &RHS) const {
+ return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
+ MaxDepth == RHS.MaxDepth;
+ }
+
+ bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
+
+ iterator begin() { return Nodes.begin(); }
+ iterator end() { return Nodes.end(); }
+ void print(raw_ostream &os) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const;
+#endif
+};
+
+/// This class represents the scheduled code. The main data structure is a
+/// map from scheduled cycle to instructions. During scheduling, the
+/// data structure explicitly represents all stages/iterations. When
+/// the algorithm finshes, the schedule is collapsed into a single stage,
+/// which represents instructions from different loop iterations.
+///
+/// The SMS algorithm allows negative values for cycles, so the first cycle
+/// in the schedule is the smallest cycle value.
+class SMSchedule {
+private:
+ /// Map from execution cycle to instructions.
+ DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
+
+ /// Map from instruction to execution cycle.
+ std::map<SUnit *, int> InstrToCycle;
+
+ /// Map for each register and the max difference between its uses and def.
+ /// The first element in the pair is the max difference in stages. The
+ /// second is true if the register defines a Phi value and loop value is
+ /// scheduled before the Phi.
+ std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
+
+ /// Keep track of the first cycle value in the schedule. It starts
+ /// as zero, but the algorithm allows negative values.
+ int FirstCycle = 0;
+
+ /// Keep track of the last cycle value in the schedule.
+ int LastCycle = 0;
+
+ /// The initiation interval (II) for the schedule.
+ int InitiationInterval = 0;
+
+ /// Target machine information.
+ const TargetSubtargetInfo &ST;
+
+ /// Virtual register information.
+ MachineRegisterInfo &MRI;
+
+ std::unique_ptr<DFAPacketizer> Resources;
+
+public:
+ SMSchedule(MachineFunction *mf)
+ : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
+ Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
+
+ void reset() {
+ ScheduledInstrs.clear();
+ InstrToCycle.clear();
+ RegToStageDiff.clear();
+ FirstCycle = 0;
+ LastCycle = 0;
+ InitiationInterval = 0;
+ }
+
+ /// Set the initiation interval for this schedule.
+ void setInitiationInterval(int ii) { InitiationInterval = ii; }
+
+ /// Return the first cycle in the completed schedule. This
+ /// can be a negative value.
+ int getFirstCycle() const { return FirstCycle; }
+
+ /// Return the last cycle in the finalized schedule.
+ int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
+
+ /// Return the cycle of the earliest scheduled instruction in the dependence
+ /// chain.
+ int earliestCycleInChain(const SDep &Dep);
+
+ /// Return the cycle of the latest scheduled instruction in the dependence
+ /// chain.
+ int latestCycleInChain(const SDep &Dep);
+
+ void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+ int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
+ bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
+
+ /// Iterators for the cycle to instruction map.
+ using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
+ using const_sched_iterator =
+ DenseMap<int, std::deque<SUnit *>>::const_iterator;
+
+ /// Return true if the instruction is scheduled at the specified stage.
+ bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
+ return (stageScheduled(SU) == (int)StageNum);
+ }
+
+ /// Return the stage for a scheduled instruction. Return -1 if
+ /// the instruction has not been scheduled.
+ int stageScheduled(SUnit *SU) const {
+ std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+ if (it == InstrToCycle.end())
+ return -1;
+ return (it->second - FirstCycle) / InitiationInterval;
+ }
+
+ /// Return the cycle for a scheduled instruction. This function normalizes
+ /// the first cycle to be 0.
+ unsigned cycleScheduled(SUnit *SU) const {
+ std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+ assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
+ return (it->second - FirstCycle) % InitiationInterval;
+ }
+
+ /// Return the maximum stage count needed for this schedule.
+ unsigned getMaxStageCount() {
+ return (LastCycle - FirstCycle) / InitiationInterval;
+ }
+
+ /// Return the max. number of stages/iterations that can occur between a
+ /// register definition and its uses.
+ unsigned getStagesForReg(int Reg, unsigned CurStage) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
+ return 1;
+ return Stages.first;
+ }
+
+ /// The number of stages for a Phi is a little different than other
+ /// instructions. The minimum value computed in RegToStageDiff is 1
+ /// because we assume the Phi is needed for at least 1 iteration.
+ /// This is not the case if the loop value is scheduled prior to the
+ /// Phi in the same stage. This function returns the number of stages
+ /// or iterations needed between the Phi definition and any uses.
+ unsigned getStagesForPhi(int Reg) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if (Stages.second)
+ return Stages.first;
+ return Stages.first - 1;
+ }
+
+ /// Return the instructions that are scheduled at the specified cycle.
+ std::deque<SUnit *> &getInstructions(int cycle) {
+ return ScheduledInstrs[cycle];
+ }
+
+ bool isValidSchedule(SwingSchedulerDAG *SSD);
+ void finalizeSchedule(SwingSchedulerDAG *SSD);
+ void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+ std::deque<SUnit *> &Insts);
+ bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
+ bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
+ MachineOperand &MO);
+ void print(raw_ostream &os) const;
+ void dump() const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 5bf4a49c8b3b..fef010a23ef9 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -689,15 +689,14 @@ public:
unsigned MinNumRegs = 0);
/// Constrain the register class or the register bank of the virtual register
- /// \p Reg to be a common subclass and a common bank of both registers
- /// provided respectively. Do nothing if any of the attributes (classes,
- /// banks, or low-level types) of the registers are deemed incompatible, or if
- /// the resulting register will have a class smaller than before and of size
- /// less than \p MinNumRegs. Return true if such register attributes exist,
- /// false otherwise.
+ /// \p Reg (and low-level type) to be a common subclass or a common bank of
+ /// both registers provided respectively (and a common low-level type). Do
+ /// nothing if any of the attributes (classes, banks, or low-level types) of
+ /// the registers are deemed incompatible, or if the resulting register will
+ /// have a class smaller than before and of size less than \p MinNumRegs.
+ /// Return true if such register attributes exist, false otherwise.
///
- /// \note Assumes that each register has either a low-level type or a class
- /// assigned, but not both. Use this method instead of constrainRegClass and
+ /// \note Use this method instead of constrainRegClass and
/// RegisterBankInfo::constrainGenericRegister everywhere but SelectionDAG
/// ISel / FastISel and GlobalISel's InstructionSelect pass respectively.
bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg,
@@ -717,6 +716,10 @@ public:
unsigned createVirtualRegister(const TargetRegisterClass *RegClass,
StringRef Name = "");
+ /// Create and return a new virtual register in the function with the same
+ /// attributes as the given register.
+ unsigned cloneVirtualRegister(unsigned VReg, StringRef Name = "");
+
/// Get the low-level type of \p Reg or LLT{} if Reg is not a generic
/// (target independent) virtual register.
LLT getType(unsigned Reg) const {
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
index 85ffa4eda2b8..4bc31ae7c61a 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -132,17 +132,19 @@ struct MachineSchedContext {
/// MachineSchedRegistry provides a selection of available machine instruction
/// schedulers.
-class MachineSchedRegistry : public MachinePassRegistryNode {
+class MachineSchedRegistry
+ : public MachinePassRegistryNode<
+ ScheduleDAGInstrs *(*)(MachineSchedContext *)> {
public:
using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *);
// RegisterPassParser requires a (misnamed) FunctionPassCtor type.
using FunctionPassCtor = ScheduleDAGCtor;
- static MachinePassRegistry Registry;
+ static MachinePassRegistry<ScheduleDAGCtor> Registry;
MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
- : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
+ : MachinePassRegistryNode(N, D, C) {
Registry.Add(this);
}
@@ -158,7 +160,7 @@ public:
return (MachineSchedRegistry *)Registry.getList();
}
- static void setListener(MachinePassRegistryListener *L) {
+ static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
Registry.setListener(L);
}
};
@@ -466,6 +468,9 @@ public:
PressureDiff &getPressureDiff(const SUnit *SU) {
return SUPressureDiffs[SU->NodeNum];
}
+ const PressureDiff &getPressureDiff(const SUnit *SU) const {
+ return SUPressureDiffs[SU->NodeNum];
+ }
/// Compute a DFSResult after DAG building is complete, and before any
/// queue comparisons.
@@ -491,6 +496,8 @@ public:
/// Compute the cyclic critical path through the DAG.
unsigned computeCyclicCriticalPath();
+ void dump() const override;
+
protected:
// Top-Level entry points for the schedule() driver...
@@ -787,7 +794,7 @@ public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
enum CandReason : uint8_t {
- NoCand, Only1, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak,
+ NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak,
RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
@@ -895,6 +902,10 @@ protected:
#ifndef NDEBUG
void traceCandidate(const SchedCandidate &Cand);
#endif
+
+private:
+ bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone,
+ bool ComputeRemLatency, unsigned &RemLatency) const;
};
// Utility functions used by heuristics in tryCandidate().
@@ -917,7 +928,7 @@ bool tryPressure(const PressureChange &TryP,
const TargetRegisterInfo *TRI,
const MachineFunction &MF);
unsigned getWeakLeft(const SUnit *SU, bool isTop);
-int biasPhysRegCopy(const SUnit *SU, bool isTop);
+int biasPhysReg(const SUnit *SU, bool isTop);
/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule.
@@ -995,7 +1006,7 @@ protected:
const RegPressureTracker &RPTracker,
SchedCandidate &Candidate);
- void reschedulePhysRegCopies(SUnit *SU, bool isTop);
+ void reschedulePhysReg(SUnit *SU, bool isTop);
};
/// PostGenericScheduler - Interface to the scheduling algorithm used by
diff --git a/contrib/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm/include/llvm/CodeGen/Passes.h
index cb12b14f4435..acf1ebb5bc83 100644
--- a/contrib/llvm/include/llvm/CodeGen/Passes.h
+++ b/contrib/llvm/include/llvm/CodeGen/Passes.h
@@ -379,14 +379,20 @@ namespace llvm {
///
FunctionPass *createInterleavedAccessPass();
+ /// InterleavedLoadCombines Pass - This pass identifies interleaved loads and
+ /// combines them into wide loads detectable by InterleavedAccessPass
+ ///
+ FunctionPass *createInterleavedLoadCombinePass();
+
/// LowerEmuTLS - This pass generates __emutls_[vt].xyz variables for all
/// TLS variables for the emulated TLS model.
///
ModulePass *createLowerEmuTLSPass();
- /// This pass lowers the \@llvm.load.relative intrinsic to instructions.
- /// This is unsafe to do earlier because a pass may combine the constant
- /// initializer into the load, which may result in an overflowing evaluation.
+ /// This pass lowers the \@llvm.load.relative and \@llvm.objc.* intrinsics to
+ /// instructions. This is unsafe to do earlier because a pass may combine the
+ /// constant initializer into the load, which may result in an overflowing
+ /// evaluation.
ModulePass *createPreISelIntrinsicLoweringPass();
/// GlobalMerge - This pass merges internal (by default) globals into structs
diff --git a/contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h b/contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h
index 7a007eb8bcea..b7f83e515b7e 100644
--- a/contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass implements IR lowering for the llvm.load.relative intrinsic.
+// This pass implements IR lowering for the llvm.load.relative and llvm.objc.*
+// intrinsics.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_PREISELINTRINSICLOWERING_H
diff --git a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
index bdf0bb731540..f66191bc9fb4 100644
--- a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
@@ -36,7 +36,7 @@ raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
/// below the stack frame (e.g., argument space), or constant pool.
class PseudoSourceValue {
public:
- enum PSVKind {
+ enum PSVKind : unsigned {
Stack,
GOT,
JumpTable,
@@ -48,7 +48,7 @@ public:
};
private:
- PSVKind Kind;
+ unsigned Kind;
unsigned AddressSpace;
friend raw_ostream &llvm::operator<<(raw_ostream &OS,
const PseudoSourceValue* PSV);
@@ -60,11 +60,11 @@ private:
virtual void printCustom(raw_ostream &O) const;
public:
- explicit PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII);
+ explicit PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII);
virtual ~PseudoSourceValue();
- PSVKind kind() const { return Kind; }
+ unsigned kind() const { return Kind; }
bool isStack() const { return Kind == Stack; }
bool isGOT() const { return Kind == GOT; }
@@ -116,7 +116,7 @@ public:
class CallEntryPseudoSourceValue : public PseudoSourceValue {
protected:
- CallEntryPseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII);
+ CallEntryPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII);
public:
bool isConstant(const MachineFrameInfo *) const override;
diff --git a/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h b/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
index 481747dc163e..b518fbb9c9da 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
@@ -26,14 +26,14 @@ class FunctionPass;
/// RegisterRegAlloc class - Track the registration of register allocators.
///
//===----------------------------------------------------------------------===//
-class RegisterRegAlloc : public MachinePassRegistryNode {
+class RegisterRegAlloc : public MachinePassRegistryNode<FunctionPass *(*)()> {
public:
using FunctionPassCtor = FunctionPass *(*)();
- static MachinePassRegistry Registry;
+ static MachinePassRegistry<FunctionPassCtor> Registry;
RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
- : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
+ : MachinePassRegistryNode(N, D, C) {
Registry.Add(this);
}
@@ -48,15 +48,11 @@ public:
return (RegisterRegAlloc *)Registry.getList();
}
- static FunctionPassCtor getDefault() {
- return (FunctionPassCtor)Registry.getDefault();
- }
+ static FunctionPassCtor getDefault() { return Registry.getDefault(); }
- static void setDefault(FunctionPassCtor C) {
- Registry.setDefault((MachinePassCtor)C);
- }
+ static void setDefault(FunctionPassCtor C) { Registry.setDefault(C); }
- static void setListener(MachinePassRegistryListener *L) {
+ static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
Registry.setListener(L);
}
};
diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
index efd175eeed30..efecc61d9c30 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -29,7 +29,7 @@
namespace llvm {
class Function;
-class TargetMachine;
+class LLVMTargetMachine;
class PhysicalRegisterUsageInfo : public ImmutablePass {
public:
@@ -41,7 +41,7 @@ public:
}
/// Set TargetMachine which is used to print analysis.
- void setTargetMachine(const TargetMachine &TM);
+ void setTargetMachine(const LLVMTargetMachine &TM);
bool doInitialization(Module &M) override;
@@ -63,7 +63,7 @@ private:
/// and 1 means content of register will be preserved around function call.
DenseMap<const Function *, std::vector<uint32_t>> RegMasks;
- const TargetMachine *TM;
+ const LLVMTargetMachine *TM;
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 56adc2e2fbfa..0870d67db390 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -33,15 +33,15 @@
namespace llvm {
template<class Graph> class GraphWriter;
+class LLVMTargetMachine;
class MachineFunction;
class MachineRegisterInfo;
class MCInstrDesc;
struct MCSchedClassDesc;
-class ScheduleDAG;
class SDNode;
class SUnit;
+class ScheduleDAG;
class TargetInstrInfo;
-class TargetMachine;
class TargetRegisterClass;
class TargetRegisterInfo;
@@ -236,8 +236,7 @@ class TargetRegisterInfo;
Contents.Reg = Reg;
}
- raw_ostream &print(raw_ostream &O,
- const TargetRegisterInfo *TRI = nullptr) const;
+ void dump(const TargetRegisterInfo *TRI = nullptr) const;
};
template <>
@@ -459,12 +458,7 @@ class TargetRegisterInfo;
/// edge occurs first.
void biasCriticalPath();
- void dump(const ScheduleDAG *G) const;
- void dumpAll(const ScheduleDAG *G) const;
- raw_ostream &print(raw_ostream &O,
- const SUnit *Entry = nullptr,
- const SUnit *Exit = nullptr) const;
- raw_ostream &print(raw_ostream &O, const ScheduleDAG *G) const;
+ void dumpAttributes() const;
private:
void ComputeDepth();
@@ -564,7 +558,7 @@ class TargetRegisterInfo;
class ScheduleDAG {
public:
- const TargetMachine &TM; ///< Target processor
+ const LLVMTargetMachine &TM; ///< Target processor
const TargetInstrInfo *TII; ///< Target instruction information
const TargetRegisterInfo *TRI; ///< Target processor register info
MachineFunction &MF; ///< Machine function
@@ -597,7 +591,9 @@ class TargetRegisterInfo;
virtual void viewGraph(const Twine &Name, const Twine &Title);
virtual void viewGraph();
- virtual void dumpNode(const SUnit *SU) const = 0;
+ virtual void dumpNode(const SUnit &SU) const = 0;
+ virtual void dump() const = 0;
+ void dumpNodeName(const SUnit &SU) const;
/// Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual std::string getGraphNodeLabel(const SUnit *SU) const = 0;
@@ -614,6 +610,9 @@ class TargetRegisterInfo;
unsigned VerifyScheduledDAG(bool isBottomUp);
#endif
+ protected:
+ void dumpNodeAll(const SUnit &SU) const;
+
private:
/// Returns the MCInstrDesc of this SDNode or NULL.
const MCInstrDesc *getNodeDesc(const SDNode *Node) const;
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 520a23846f6e..daad18125db9 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -327,7 +327,8 @@ namespace llvm {
/// whole MachineFunction. By default does nothing.
virtual void finalizeSchedule() {}
- void dumpNode(const SUnit *SU) const override;
+ void dumpNode(const SUnit &SU) const override;
+ void dump() const override;
/// Returns a label for a DAG node that points to an instruction.
std::string getGraphNodeLabel(const SUnit *SU) const override;
diff --git a/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h b/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
index badf927d0e95..fbe559f25556 100644
--- a/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
@@ -29,16 +29,19 @@ namespace llvm {
class ScheduleDAGSDNodes;
class SelectionDAGISel;
-class RegisterScheduler : public MachinePassRegistryNode {
+class RegisterScheduler
+ : public MachinePassRegistryNode<
+ ScheduleDAGSDNodes *(*)(SelectionDAGISel *, CodeGenOpt::Level)> {
public:
using FunctionPassCtor = ScheduleDAGSDNodes *(*)(SelectionDAGISel*,
CodeGenOpt::Level);
- static MachinePassRegistry Registry;
+ static MachinePassRegistry<FunctionPassCtor> Registry;
RegisterScheduler(const char *N, const char *D, FunctionPassCtor C)
- : MachinePassRegistryNode(N, D, (MachinePassCtor)C)
- { Registry.Add(this); }
+ : MachinePassRegistryNode(N, D, C) {
+ Registry.Add(this);
+ }
~RegisterScheduler() { Registry.Remove(this); }
@@ -51,7 +54,7 @@ public:
return (RegisterScheduler *)Registry.getList();
}
- static void setListener(MachinePassRegistryListener *L) {
+ static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
Registry.setListener(L);
}
};
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
index 888f9425ff90..67fe87fc96af 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -28,7 +28,7 @@
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -188,8 +188,8 @@ public:
return DbgValues.empty() && ByvalParmDbgValues.empty() && DbgLabels.empty();
}
- ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) {
- DbgValMapType::iterator I = DbgValMap.find(Node);
+ ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) const {
+ auto I = DbgValMap.find(Node);
if (I != DbgValMap.end())
return I->second;
return ArrayRef<SDDbgValue*>();
@@ -229,7 +229,7 @@ class SelectionDAG {
LLVMContext *Context;
CodeGenOpt::Level OptLevel;
- DivergenceAnalysis * DA = nullptr;
+ LegacyDivergenceAnalysis * DA = nullptr;
FunctionLoweringInfo * FLI = nullptr;
/// The function-level optimization remark emitter. Used to emit remarks
@@ -308,6 +308,9 @@ public:
: DAGUpdateListener(DAG), Callback(std::move(Callback)) {}
void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); }
+
+ private:
+ virtual void anchor();
};
/// When true, additional steps are taken to
@@ -382,7 +385,7 @@ public:
/// Prepare this SelectionDAG to process code in the given MachineFunction.
void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- DivergenceAnalysis * Divergence);
+ LegacyDivergenceAnalysis * Divergence);
void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
FLI = FuncInfo;
@@ -471,7 +474,9 @@ public:
return Root;
}
+#ifndef NDEBUG
void VerifyDAGDiverence();
+#endif
/// This iterates over the nodes in the SelectionDAG, folding
/// certain types of nodes together, or eliminating superfluous nodes. The
@@ -784,24 +789,6 @@ public:
/// value assuming it was the smaller SrcTy value.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
- /// Return an operation which will any-extend the low lanes of the operand
- /// into the specified vector type. For example,
- /// this can convert a v16i8 into a v4i32 by any-extending the low four
- /// lanes of the operand from i8 to i32.
- SDValue getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
-
- /// Return an operation which will sign extend the low lanes of the operand
- /// into the specified vector type. For example,
- /// this can convert a v16i8 into a v4i32 by sign extending the low four
- /// lanes of the operand from i8 to i32.
- SDValue getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
-
- /// Return an operation which will zero extend the low lanes of the operand
- /// into the specified vector type. For example,
- /// this can convert a v16i8 into a v4i32 by zero extending the low four
- /// lanes of the operand from i8 to i32.
- SDValue getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
-
/// Convert Op, which must be of integer type, to the integer type VT,
/// by using an extension appropriate for the target's
/// BooleanContent for type OpVT or truncating it.
@@ -945,41 +932,45 @@ public:
Type *SizeTy, unsigned ElemSz, bool isTailCall,
MachinePointerInfo DstPtrInfo);
- /// Helper function to make it easier to build SetCC's if you just
- /// have an ISD::CondCode instead of an SDValue.
- ///
+ /// Helper function to make it easier to build SetCC's if you just have an
+ /// ISD::CondCode instead of an SDValue.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS,
ISD::CondCode Cond) {
assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() &&
- "Cannot compare scalars to vectors");
+ "Cannot compare scalars to vectors");
assert(LHS.getValueType().isVector() == VT.isVector() &&
- "Cannot compare scalars to vectors");
+ "Cannot compare scalars to vectors");
assert(Cond != ISD::SETCC_INVALID &&
- "Cannot create a setCC of an invalid node.");
+ "Cannot create a setCC of an invalid node.");
return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
}
- /// Helper function to make it easier to build Select's if you just
- /// have operands and don't want to check for vector.
+ /// Helper function to make it easier to build Select's if you just have
+ /// operands and don't want to check for vector.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS,
SDValue RHS) {
assert(LHS.getValueType() == RHS.getValueType() &&
"Cannot use select on differing types");
assert(VT.isVector() == LHS.getValueType().isVector() &&
"Cannot mix vectors and scalars");
- return getNode(Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
- Cond, LHS, RHS);
+ auto Opcode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
+ return getNode(Opcode, DL, VT, Cond, LHS, RHS);
}
- /// Helper function to make it easier to build SelectCC's if you
- /// just have an ISD::CondCode instead of an SDValue.
- ///
+ /// Helper function to make it easier to build SelectCC's if you just have an
+ /// ISD::CondCode instead of an SDValue.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True,
SDValue False, ISD::CondCode Cond) {
- return getNode(ISD::SELECT_CC, DL, True.getValueType(),
- LHS, RHS, True, False, getCondCode(Cond));
+ return getNode(ISD::SELECT_CC, DL, True.getValueType(), LHS, RHS, True,
+ False, getCondCode(Cond));
}
+ /// Try to simplify a select/vselect into 1 of its operands or a constant.
+ SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal);
+
+ /// Try to simplify a shift into 1 of its operands or a constant.
+ SDValue simplifyShift(SDValue X, SDValue Y);
+
/// VAArg produces a result and token chain, and takes a pointer
/// and a source value as input.
SDValue getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
@@ -1140,6 +1131,13 @@ public:
/// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
SDValue expandVACopy(SDNode *Node);
+ /// Returs an GlobalAddress of the function from the current module with
+ /// name matching the given ExternalSymbol. Additionally can provide the
+ /// matched function.
+ /// Panics the function doesn't exists.
+ SDValue getSymbolFunctionGlobalAddress(SDValue Op,
+ Function **TargetFunction = nullptr);
+
/// *Mutate* the specified node in-place to have the
/// specified operands. If the resultant node already exists in the DAG,
/// this does not modify the specified node, instead it returns the node that
@@ -1156,6 +1154,11 @@ public:
SDValue Op3, SDValue Op4, SDValue Op5);
SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
+ /// *Mutate* the specified machine node's memory references to the provided
+ /// list.
+ void setNodeMemRefs(MachineSDNode *N,
+ ArrayRef<MachineMemOperand *> NewMemRefs);
+
// Propagates the change in divergence to users
void updateDivergence(SDNode * N);
@@ -1346,7 +1349,7 @@ public:
void AddDbgLabel(SDDbgLabel *DB);
/// Get the debug values which reference the given SDNode.
- ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) {
+ ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) const {
return DbgInfo->getSDDbgValues(SD);
}
@@ -1429,15 +1432,15 @@ public:
/// every vector element.
/// Targets can implement the computeKnownBitsForTargetNode method in the
/// TargetLowering class to allow target nodes to be understood.
- void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth = 0) const;
+ KnownBits computeKnownBits(SDValue Op, unsigned Depth = 0) const;
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. The DemandedElts argument allows us to only collect the
/// known bits that are shared by the requested vector elements.
/// Targets can implement the computeKnownBitsForTargetNode method in the
/// TargetLowering class to allow target nodes to be understood.
- void computeKnownBits(SDValue Op, KnownBits &Known, const APInt &DemandedElts,
- unsigned Depth = 0) const;
+ KnownBits computeKnownBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth = 0) const;
/// Used to represent the possible overflow behavior of an operation.
/// Never: the operation cannot overflow.
@@ -1484,8 +1487,15 @@ public:
/// X|Cst == X+Cst iff X&Cst = 0.
bool isBaseWithConstantOffset(SDValue Op) const;
- /// Test whether the given SDValue is known to never be NaN.
- bool isKnownNeverNaN(SDValue Op) const;
+ /// Test whether the given SDValue is known to never be NaN. If \p SNaN is
+ /// true, returns if \p Op is known to never be a signaling NaN (it may still
+ /// be a qNaN).
+ bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;
+
+ /// \returns true if \p Op is known to never be a signaling NaN.
+ bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
+ return isKnownNeverNaN(Op, true, Depth);
+ }
/// Test whether the given floating point SDValue is known to never be
/// positive or negative zero.
@@ -1503,6 +1513,27 @@ public:
/// allow an 'add' to be transformed into an 'or'.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const;
+ /// Test whether \p V has a splatted value for all the demanded elements.
+ ///
+ /// On success \p UndefElts will indicate the elements that have UNDEF
+ /// values instead of the splat value, this is only guaranteed to be correct
+ /// for \p DemandedElts.
+ ///
+ /// NOTE: The function will return true for a demanded splat of UNDEF values.
+ bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts);
+
+ /// Test whether \p V has a splatted value.
+ bool isSplatValue(SDValue V, bool AllowUndefs = false);
+
+ /// Match a binop + shuffle pyramid that represents a horizontal reduction
+ /// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p
+ /// Extract. The reduction must use one of the opcodes listed in /p
+ /// CandidateBinOps and on success /p BinOp will contain the matching opcode.
+ /// Returns the vector that is being reduced on, or SDValue() if a reduction
+ /// was not matched.
+ SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
+ ArrayRef<ISD::NodeType> CandidateBinOps);
+
/// Utility function used by legalize and lowering to
/// "unroll" a vector operation by splitting out the scalars and operating
/// on each element individually. If the ResNE is 0, fully unroll the vector
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 580606441a9d..2b2c48d57bc0 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -45,18 +45,21 @@ public:
IsIndexSignExt(IsIndexSignExt) {}
SDValue getBase() { return Base; }
+ SDValue getBase() const { return Base; }
SDValue getIndex() { return Index; }
+ SDValue getIndex() const { return Index; }
- bool equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG) {
+ bool equalBaseIndex(const BaseIndexOffset &Other,
+ const SelectionDAG &DAG) const {
int64_t Off;
return equalBaseIndex(Other, DAG, Off);
}
- bool equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG,
- int64_t &Off);
+ bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG,
+ int64_t &Off) const;
/// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG);
+ static BaseIndexOffset match(const LSBaseSDNode *N, const SelectionDAG &DAG);
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 86df0af7303f..6758c55c696a 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -132,6 +132,7 @@ public:
OPC_CheckChild2Same, OPC_CheckChild3Same,
OPC_CheckPatternPredicate,
OPC_CheckPredicate,
+ OPC_CheckPredicateWithOperands,
OPC_CheckOpcode,
OPC_SwitchOpcode,
OPC_CheckType,
@@ -267,6 +268,17 @@ public:
llvm_unreachable("Tblgen should generate the implementation of this!");
}
+ /// CheckNodePredicateWithOperands - This function is generated by tblgen in
+ /// the target.
+ /// It runs node predicate number PredNo and returns true if it succeeds or
+ /// false if it fails. The number is a private implementation detail to the
+ /// code tblgen produces.
+ virtual bool CheckNodePredicateWithOperands(
+ SDNode *N, unsigned PredNo,
+ const SmallVectorImpl<SDValue> &Operands) const {
+ llvm_unreachable("Tblgen should generate the implementation of this!");
+ }
+
virtual bool CheckComplexPattern(SDNode *Root, SDNode *Parent, SDValue N,
unsigned PatternNo,
SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) {
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 1af22185d366..10f284179084 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -672,6 +672,12 @@ public:
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
return true;
}
}
@@ -1589,15 +1595,38 @@ bool isAllOnesConstant(SDValue V);
/// Returns true if \p V is a constant integer one.
bool isOneConstant(SDValue V);
+/// Return the non-bitcasted source operand of \p V if it exists.
+/// If \p V is not a bitcasted value, it is returned as-is.
+SDValue peekThroughBitcasts(SDValue V);
+
+/// Return the non-bitcasted and one-use source operand of \p V if it exists.
+/// If \p V is not a bitcasted one-use value, it is returned as-is.
+SDValue peekThroughOneUseBitcasts(SDValue V);
+
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
/// constant is canonicalized to be operand 1.
bool isBitwiseNot(SDValue V);
/// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
/// Returns the SDNode if it is a constant splat BuildVector or constant float.
-ConstantFPSDNode *isConstOrConstSplatFP(SDValue N);
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
+
+/// Return true if the value is a constant 0 integer or a splatted vector of
+/// a constant 0 integer (with no undefs).
+/// Build vector implicit truncation is not an issue for null values.
+bool isNullOrNullSplat(SDValue V);
+
+/// Return true if the value is a constant 1 integer or a splatted vector of a
+/// constant 1 integer (with no undefs).
+/// Does not permit build vector implicit truncation.
+bool isOneOrOneSplat(SDValue V);
+
+/// Return true if the value is a constant -1 integer or a splatted vector of a
+/// constant -1 integer (with no undefs).
+/// Does not permit build vector implicit truncation.
+bool isAllOnesOrAllOnesSplat(SDValue V);
class GlobalAddressSDNode : public SDNode {
friend class SelectionDAG;
@@ -2113,12 +2142,15 @@ public:
MachineMemOperand *MMO)
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
- // In the both nodes address is Op1, mask is Op2:
- // MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
- // MaskedStoreSDNode (Chain, ptr, mask, data)
+ // MaskedLoadSDNode (Chain, ptr, mask, passthru)
+ // MaskedStoreSDNode (Chain, data, ptr, mask)
// Mask is a vector of i1 elements
- const SDValue &getBasePtr() const { return getOperand(1); }
- const SDValue &getMask() const { return getOperand(2); }
+ const SDValue &getBasePtr() const {
+ return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2);
+ }
+ const SDValue &getMask() const {
+ return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
+ }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD ||
@@ -2143,7 +2175,10 @@ public:
return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
}
- const SDValue &getSrc0() const { return getOperand(3); }
+ const SDValue &getBasePtr() const { return getOperand(1); }
+ const SDValue &getMask() const { return getOperand(2); }
+ const SDValue &getPassThru() const { return getOperand(3); }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD;
}
@@ -2175,7 +2210,9 @@ public:
/// memory at base_addr.
bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
- const SDValue &getValue() const { return getOperand(3); }
+ const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getBasePtr() const { return getOperand(2); }
+ const SDValue &getMask() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSTORE;
@@ -2201,7 +2238,6 @@ public:
const SDValue &getBasePtr() const { return getOperand(3); }
const SDValue &getIndex() const { return getOperand(4); }
const SDValue &getMask() const { return getOperand(2); }
- const SDValue &getValue() const { return getOperand(1); }
const SDValue &getScale() const { return getOperand(5); }
static bool classof(const SDNode *N) {
@@ -2220,6 +2256,8 @@ public:
EVT MemVT, MachineMemOperand *MMO)
: MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {}
+ const SDValue &getPassThru() const { return getOperand(1); }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MGATHER;
}
@@ -2235,6 +2273,8 @@ public:
EVT MemVT, MachineMemOperand *MMO)
: MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {}
+ const SDValue &getValue() const { return getOperand(1); }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSCATTER;
}
@@ -2243,32 +2283,60 @@ public:
/// An SDNode that represents everything that will be needed
/// to construct a MachineInstr. These nodes are created during the
/// instruction selection proper phase.
+///
+/// Note that the only supported way to set the `memoperands` is by calling the
+/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
+/// inside the DAG rather than in the node.
class MachineSDNode : public SDNode {
-public:
- using mmo_iterator = MachineMemOperand **;
-
private:
friend class SelectionDAG;
MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
: SDNode(Opc, Order, DL, VTs) {}
- /// Memory reference descriptions for this instruction.
- mmo_iterator MemRefs = nullptr;
- mmo_iterator MemRefsEnd = nullptr;
+ // We use a pointer union between a single `MachineMemOperand` pointer and
+ // a pointer to an array of `MachineMemOperand` pointers. This is null when
+ // the number of these is zero, the single pointer variant used when the
+ // number is one, and the array is used for larger numbers.
+ //
+ // The array is allocated via the `SelectionDAG`'s allocator and so will
+ // always live until the DAG is cleaned up and doesn't require ownership here.
+ //
+ // We can't use something simpler like `TinyPtrVector` here because `SDNode`
+ // subclasses aren't managed in a conforming C++ manner. See the comments on
+ // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
+ // constraint here is that these don't manage memory with their constructor or
+ // destructor and can be initialized to a good state even if they start off
+ // uninitialized.
+ PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
+
+ // Note that this could be folded into the above `MemRefs` member if doing so
+ // is advantageous at some point. We don't need to store this in most cases.
+ // However, at the moment this doesn't appear to make the allocation any
+ // smaller and makes the code somewhat simpler to read.
+ int NumMemRefs = 0;
public:
- mmo_iterator memoperands_begin() const { return MemRefs; }
- mmo_iterator memoperands_end() const { return MemRefsEnd; }
- bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
+ using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
+
+ ArrayRef<MachineMemOperand *> memoperands() const {
+ // Special case the common cases.
+ if (NumMemRefs == 0)
+ return {};
+ if (NumMemRefs == 1)
+ return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
+
+ // Otherwise we have an actual array.
+ return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
+ }
+ mmo_iterator memoperands_begin() const { return memoperands().begin(); }
+ mmo_iterator memoperands_end() const { return memoperands().end(); }
+ bool memoperands_empty() const { return memoperands().empty(); }
- /// Assign this MachineSDNodes's memory reference descriptor
- /// list. This does not transfer ownership.
- void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
- for (mmo_iterator MMI = NewMemRefs, MME = NewMemRefsEnd; MMI != MME; ++MMI)
- assert(*MMI && "Null mem ref detected!");
- MemRefs = NewMemRefs;
- MemRefsEnd = NewMemRefsEnd;
+ /// Clear out the memory reference descriptor list.
+ void clearMemRefs() {
+ MemRefs = nullptr;
+ NumMemRefs = 0;
}
static bool classof(const SDNode *N) {
@@ -2405,17 +2473,32 @@ namespace ISD {
cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
}
+ /// Return true if the node is a math/logic binary operator. This corresponds
+ /// to the IR function of the same name.
+ inline bool isBinaryOp(const SDNode *N) {
+ auto Op = N->getOpcode();
+ return (Op == ISD::ADD || Op == ISD::SUB || Op == ISD::MUL ||
+ Op == ISD::AND || Op == ISD::OR || Op == ISD::XOR ||
+ Op == ISD::SHL || Op == ISD::SRL || Op == ISD::SRA ||
+ Op == ISD::SDIV || Op == ISD::UDIV || Op == ISD::SREM ||
+ Op == ISD::UREM || Op == ISD::FADD || Op == ISD::FSUB ||
+ Op == ISD::FMUL || Op == ISD::FDIV || Op == ISD::FREM);
+ }
+
/// Attempt to match a unary predicate against a scalar/splat constant or
/// every element of a constant BUILD_VECTOR.
+ /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
bool matchUnaryPredicate(SDValue Op,
- std::function<bool(ConstantSDNode *)> Match);
+ std::function<bool(ConstantSDNode *)> Match,
+ bool AllowUndefs = false);
/// Attempt to match a binary predicate against a pair of scalar/splat
/// constants or every element of a pair of constant BUILD_VECTORs.
+ /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
bool matchBinaryPredicate(
SDValue LHS, SDValue RHS,
- std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match);
-
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
+ bool AllowUndefs = false);
} // end namespace ISD
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
index 334267d9828b..8c8a7be459fd 100644
--- a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -413,8 +413,14 @@ class raw_ostream;
/// Returns the base index for the given instruction.
SlotIndex getInstructionIndex(const MachineInstr &MI) const {
// Instructions inside a bundle have the same number as the bundle itself.
- const MachineInstr &BundleStart = *getBundleStart(MI.getIterator());
- Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleStart);
+ auto BundleStart = getBundleStart(MI.getIterator());
+ auto BundleEnd = getBundleEnd(MI.getIterator());
+ // Use the first non-debug instruction in the bundle to get SlotIndex.
+ const MachineInstr &BundleNonDebug =
+ *skipDebugInstructionsForward(BundleStart, BundleEnd);
+ assert(!BundleNonDebug.isDebugInstr() &&
+ "Could not use a debug instruction to query mi2iMap.");
+ Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleNonDebug);
assert(itr != mi2iMap.end() && "Instruction not found in maps.");
return itr->second;
}
@@ -442,7 +448,7 @@ class raw_ostream;
/// MI is not required to have an index.
SlotIndex getIndexBefore(const MachineInstr &MI) const {
const MachineBasicBlock *MBB = MI.getParent();
- assert(MBB && "MI must be inserted inna basic block");
+ assert(MBB && "MI must be inserted in a basic block");
MachineBasicBlock::const_iterator I = MI, B = MBB->begin();
while (true) {
if (I == B)
@@ -459,7 +465,7 @@ class raw_ostream;
/// MI is not required to have an index.
SlotIndex getIndexAfter(const MachineInstr &MI) const {
const MachineBasicBlock *MBB = MI.getParent();
- assert(MBB && "MI must be inserted inna basic block");
+ assert(MBB && "MI must be inserted in a basic block");
MachineBasicBlock::const_iterator I = MI, E = MBB->end();
while (true) {
++I;
@@ -674,7 +680,7 @@ class raw_ostream;
idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb));
renumberIndexes(newItr);
- llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+ llvm::sort(idx2MBBMap, Idx2MBBCompare());
}
/// Free the resources that were required to maintain a SlotIndex.
diff --git a/contrib/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
index e584a4136e4f..8be9ae378557 100644
--- a/contrib/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
@@ -236,25 +236,6 @@ public:
FnInfos.clear();
}
- /// Generate a stackmap record for a stackmap instruction.
- ///
- /// MI must be a raw STACKMAP, not a PATCHPOINT.
- void recordStackMap(const MachineInstr &MI);
-
- /// Generate a stackmap record for a patchpoint instruction.
- void recordPatchPoint(const MachineInstr &MI);
-
- /// Generate a stackmap record for a statepoint instruction.
- void recordStatepoint(const MachineInstr &MI);
-
- /// If there is any stack map data, create a stack map section and serialize
- /// the map info into it. This clears the stack map data structures
- /// afterwards.
- void serializeToStackMapSection();
-
-private:
- static const char *WSMP;
-
using LocationVec = SmallVector<Location, 8>;
using LiveOutVec = SmallVector<LiveOutReg, 8>;
using ConstantPool = MapVector<uint64_t, uint64_t>;
@@ -283,6 +264,31 @@ private:
using FnInfoMap = MapVector<const MCSymbol *, FunctionInfo>;
using CallsiteInfoList = std::vector<CallsiteInfo>;
+ /// Generate a stackmap record for a stackmap instruction.
+ ///
+ /// MI must be a raw STACKMAP, not a PATCHPOINT.
+ void recordStackMap(const MachineInstr &MI);
+
+ /// Generate a stackmap record for a patchpoint instruction.
+ void recordPatchPoint(const MachineInstr &MI);
+
+ /// Generate a stackmap record for a statepoint instruction.
+ void recordStatepoint(const MachineInstr &MI);
+
+ /// If there is any stack map data, create a stack map section and serialize
+ /// the map info into it. This clears the stack map data structures
+ /// afterwards.
+ void serializeToStackMapSection();
+
+ /// Get call site info.
+ CallsiteInfoList &getCSInfos() { return CSInfos; }
+
+ /// Get function info.
+ FnInfoMap &getFnInfos() { return FnInfos; }
+
+private:
+ static const char *WSMP;
+
AsmPrinter &AP;
CallsiteInfoList CSInfos;
ConstantPool ConstPool;
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index f8effee998e3..b4d1da941433 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -207,8 +207,11 @@ public:
return false;
}
- /// Return true if the target needs to disable frame pointer elimination.
- virtual bool noFramePointerElim(const MachineFunction &MF) const;
+ /// Return true if the target wants to keep the frame pointer regardless of
+ /// the function attribute "frame-pointer".
+ virtual bool keepFramePointer(const MachineFunction &MF) const {
+ return false;
+ }
/// hasFP - Return true if the specified function should have a dedicated
/// frame pointer register. For most targets this is true only if the function
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index b5bc561d834c..961b90e9bc12 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -246,14 +246,14 @@ public:
}
/// If the specified machine instruction has a load from a stack slot,
- /// return true along with the FrameIndex of the loaded stack slot and the
- /// machine mem operand containing the reference.
+ /// return true along with the FrameIndices of the loaded stack slot and the
+ /// machine mem operands containing the reference.
/// If not, return false. Unlike isLoadFromStackSlot, this returns true for
/// any instructions that loads from the stack. This is just a hint, as some
/// cases may be missed.
- virtual bool hasLoadFromStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
+ virtual bool hasLoadFromStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const;
/// If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
@@ -284,14 +284,14 @@ public:
}
/// If the specified machine instruction has a store to a stack slot,
- /// return true along with the FrameIndex of the loaded stack slot and the
- /// machine mem operand containing the reference.
+ /// return true along with the FrameIndices of the loaded stack slot and the
+ /// machine mem operands containing the reference.
/// If not, return false. Unlike isStoreToStackSlot,
/// this returns true for any instructions that stores to the
/// stack. This is just a hint, as some cases may be missed.
- virtual bool hasStoreToStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
+ virtual bool hasStoreToStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const;
/// Return true if the specified machine instruction
/// is a copy of one stack slot to another and has no other effect.
@@ -846,15 +846,33 @@ public:
llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!");
}
+protected:
+ /// Target-dependent implemenation for IsCopyInstr.
/// If the specific machine instruction is a instruction that moves/copies
/// value from one register to another register return true along with
/// @Source machine operand and @Destination machine operand.
- virtual bool isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&SourceOpNum,
- const MachineOperand *&Destination) const {
+ virtual bool isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Source,
+ const MachineOperand *&Destination) const {
return false;
}
+public:
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ /// For COPY-instruction the method naturally returns true, for all other
+ /// instructions the method calls target-dependent implementation.
+ bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const {
+ if (MI.isCopy()) {
+ Destination = &MI.getOperand(0);
+ Source = &MI.getOperand(1);
+ return true;
+ }
+ return isCopyInstrImpl(MI, Source, Destination);
+ }
+
/// Store the specified register of the given register class to the specified
/// stack frame index. The store instruction is to be added to the given
/// machine basic block before the specified machine instruction. If isKill
@@ -1063,7 +1081,7 @@ public:
/// getAddressSpaceForPseudoSourceKind - Given the kind of memory
/// (e.g. stack) the target returns the corresponding address space.
virtual unsigned
- getAddressSpaceForPseudoSourceKind(PseudoSourceValue::PSVKind Kind) const {
+ getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
return 0;
}
@@ -1118,11 +1136,11 @@ public:
return false;
}
- /// Get the base register and byte offset of an instruction that reads/writes
+ /// Get the base operand and byte offset of an instruction that reads/writes
/// memory.
- virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+ virtual bool getMemOperandWithOffset(MachineInstr &MI,
+ MachineOperand *&BaseOp, int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
return false;
}
@@ -1146,8 +1164,8 @@ public:
/// or
/// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
/// to TargetPassConfig::createMachineScheduler() to have an effect.
- virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
- MachineInstr &SecondLdSt, unsigned BaseReg2,
+ virtual bool shouldClusterMemOps(MachineOperand &BaseOp1,
+ MachineOperand &BaseOp2,
unsigned NumLoads) const {
llvm_unreachable("target did not implement shouldClusterMemOps()");
}
@@ -1617,10 +1635,11 @@ public:
"Target didn't implement TargetInstrInfo::getOutliningType!");
}
- /// Returns target-defined flags defining properties of the MBB for
- /// the outliner.
- virtual unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
- return 0x0;
+ /// Optional target hook that returns true if \p MBB is safe to outline from,
+ /// and returns any target-specific information in \p Flags.
+ virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
+ return true;
}
/// Insert a custom frame for outlined functions.
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h
index 40540bd6e1ff..23dbaac03ebe 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -29,7 +29,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -163,6 +163,7 @@ public:
LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
+ MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
};
/// Enum that specifies when a multiplication should be expanded.
@@ -268,6 +269,14 @@ public:
return true;
}
+ /// Return true if it is profitable to convert a select of FP constants into
+ /// a constant pool load whose address depends on the select condition. The
+ /// parameter may be used to differentiate a select with FP compare from
+ /// integer compare.
+ virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+ return true;
+ }
+
/// Return true if multiple condition registers are available.
bool hasMultipleConditionRegisters() const {
return HasMultipleConditionRegisters;
@@ -278,7 +287,7 @@ public:
/// Return the preferred vector type legalization action.
virtual TargetLoweringBase::LegalizeTypeAction
- getPreferredVectorAction(EVT VT) const {
+ getPreferredVectorAction(MVT VT) const {
// The default action for one element vectors is to scalarize
if (VT.getVectorNumElements() == 1)
return TypeScalarizeVector;
@@ -545,6 +554,12 @@ public:
return false;
}
+ /// Return true if inserting a scalar into a variable element of an undef
+ /// vector is more efficiently handled by splatting the scalar instead.
+ virtual bool shouldSplatInsEltVarIndex(EVT) const {
+ return false;
+ }
+
/// Return true if target supports floating point exceptions.
bool hasFloatingPointExceptions() const {
return HasFloatingPointExceptions;
@@ -790,6 +805,38 @@ public:
return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
}
+ /// Custom method defined by each target to indicate if an operation which
+ /// may require a scale is supported natively by the target.
+ /// If not, the operation is illegal.
+ virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
+ unsigned Scale) const {
+ return false;
+ }
+
+ /// Some fixed point operations may be natively supported by the target but
+ /// only for specific scales. This method allows for checking
+ /// if the width is supported by the target for a given operation that may
+ /// depend on scale.
+ LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
+ unsigned Scale) const {
+ auto Action = getOperationAction(Op, VT);
+ if (Action != Legal)
+ return Action;
+
+ // This operation is supported in this type but may only work on specific
+ // scales.
+ bool Supported;
+ switch (Op) {
+ default:
+ llvm_unreachable("Unexpected fixed point operation.");
+ case ISD::SMULFIX:
+ Supported = isSupportedFixedPointOperation(Op, VT, Scale);
+ break;
+ }
+
+ return Supported ? Action : Expand;
+ }
+
LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
unsigned EqOpc;
switch (Op) {
@@ -798,6 +845,7 @@ public:
case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
+ case ISD::STRICT_FREM: EqOpc = ISD::FREM; break;
case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
@@ -811,6 +859,12 @@ public:
case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
+ case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
+ case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
+ case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
+ case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
+ case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
+ case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
}
auto Action = getOperationAction(EqOpc, VT);
@@ -1199,13 +1253,15 @@ public:
/// reduce runtime.
virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
- // Return true if it is profitable to reduce the given load node to a smaller
- // type.
- //
- // e.g. (i16 (trunc (i32 (load x))) -> i16 load x should be performed
- virtual bool shouldReduceLoadWidth(SDNode *Load,
- ISD::LoadExtType ExtTy,
+ /// Return true if it is profitable to reduce a load to a smaller type.
+ /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
+ virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const {
+ // By default, assume that it is cheaper to extract a subvector from a wide
+ // vector load rather than creating multiple narrow vector loads.
+ if (NewVT.isVector() && !Load->hasOneUse())
+ return false;
+
return true;
}
@@ -1428,6 +1484,12 @@ public:
return PrefLoopAlignment;
}
+ /// Should loops be aligned even when the function is marked OptSize (but not
+ /// MinSize).
+ virtual bool alignLoopsWithOptSize() const {
+ return false;
+ }
+
/// If the target has a standard location for the stack protector guard,
/// returns the address of that location. Otherwise, returns nullptr.
/// DEPRECATED: please override useLoadStackGuardNode and customize
@@ -1549,6 +1611,26 @@ public:
llvm_unreachable("Store conditional unimplemented on this target");
}
+ /// Perform a masked atomicrmw using a target-specific intrinsic. This
+ /// represents the core LL/SC loop which will be lowered at a late stage by
+ /// the backend.
+ virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
+ AtomicRMWInst *AI,
+ Value *AlignedAddr, Value *Incr,
+ Value *Mask, Value *ShiftAmt,
+ AtomicOrdering Ord) const {
+ llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
+ }
+
+ /// Perform a masked cmpxchg using a target-specific intrinsic. This
+ /// represents the core LL/SC loop which will be lowered at a late stage by
+ /// the backend.
+ virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
+ IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
+ Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+ llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
+ }
+
/// Inserts in the IR a target-specific intrinsic specifying a fence.
/// It is called by AtomicExpandPass before expanding an
/// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
@@ -1625,11 +1707,11 @@ public:
return AtomicExpansionKind::None;
}
- /// Returns true if the given atomic cmpxchg should be expanded by the
- /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence
- /// (through emitLoadLinked() and emitStoreConditional()).
- virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
- return false;
+ /// Returns how the given atomic cmpxchg should be expanded by the IR-level
+ /// AtomicExpand pass.
+ virtual AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
+ return AtomicExpansionKind::None;
}
/// Returns how the IR-level AtomicExpand pass should expand the given
@@ -1687,6 +1769,25 @@ public:
return false;
}
+ /// Return true if it is profitable to transform an integer
+ /// multiplication-by-constant into simpler operations like shifts and adds.
+ /// This may be true if the target does not directly support the
+ /// multiplication operation for the specified type or the sequence of simpler
+ /// ops is faster than the multiply.
+ virtual bool decomposeMulByConstant(EVT VT, SDValue C) const {
+ return false;
+ }
+
+ /// Return true if it is more correct/profitable to use strict FP_TO_INT
+ /// conversion operations - canonicalizing the FP source value instead of
+ /// converting all cases and then selecting based on value.
+ /// This may be true if the target throws exceptions for out of bounds
+ /// conversions or has fast FP CMOV.
+ virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+ bool IsSigned) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
// TargetLowering Configuration Methods - These methods should be invoked by
// the derived class constructor to configure this object for the target.
@@ -2015,6 +2116,14 @@ public:
return true;
}
+ /// Return true if the specified immediate is legal for the value input of a
+ /// store instruction.
+ virtual bool isLegalStoreImmediate(int64_t Value) const {
+ // Default implementation assumes that at least 0 works since it is likely
+ // that a zero register exists or a zero immediate is allowed.
+ return Value == 0;
+ }
+
/// Return true if it's significantly cheaper to shift a vector by a uniform
/// scalar than by an amount which will vary across each lane. On x86, for
/// example, there is a "psllw" instruction for the former case, but no simple
@@ -2046,10 +2155,12 @@ public:
case ISD::UADDO:
case ISD::ADDC:
case ISD::ADDE:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
return true;
default: return false;
}
@@ -2153,6 +2264,12 @@ public:
return false;
}
+ /// Return true if sign-extension from FromTy to ToTy is cheaper than
+ /// zero-extension.
+ virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
+ return false;
+ }
+
/// Return true if the target supplies and combines to a paired load
/// two loaded values of type LoadedType next to each other in memory.
/// RequiredAlignment gives the minimal alignment constraints that must be met
@@ -2292,6 +2409,12 @@ public:
return false;
}
+ /// Try to convert an extract element of a vector binary operation into an
+ /// extract element followed by a scalar operation.
+ virtual bool shouldScalarizeBinop(SDValue VecOp) const {
+ return false;
+ }
+
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
// even if the vector itself has multiple uses.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
@@ -2648,7 +2771,7 @@ public:
virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
FunctionLoweringInfo *FLI,
- DivergenceAnalysis *DA) const {
+ LegacyDivergenceAnalysis *DA) const {
return false;
}
@@ -2774,36 +2897,33 @@ public:
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
TargetLoweringOpt &TLO) const;
- /// Helper for SimplifyDemandedBits that can simplify an operation with
- /// multiple uses. This function simplifies operand \p OpIdx of \p User and
- /// then updates \p User with the simplified version. No other uses of
- /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this
- /// function behaves exactly like function SimplifyDemandedBits declared
- /// below except that it also updates the DAG by calling
- /// DCI.CommitTargetLoweringOpt.
- bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded,
- DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const;
-
- /// Look at Op. At this point, we know that only the DemandedMask bits of the
+ /// Look at Op. At this point, we know that only the DemandedBits bits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning
/// the original and new nodes in Old and New. Otherwise, analyze the
/// expression and return a mask of KnownOne and KnownZero bits for the
/// expression (used to simplify the caller). The KnownZero/One bits may only
- /// be accurate for those bits in the DemandedMask.
+ /// be accurate for those bits in the Demanded masks.
/// \p AssumeSingleUse When this parameter is true, this function will
/// attempt to simplify \p Op even if there are multiple uses.
/// Callers are responsible for correctly updating the DAG based on the
/// results of this function, because simply replacing replacing TLO.Old
/// with TLO.New will be incorrect when this parameter is true and TLO.Old
/// has multiple uses.
- bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
- KnownBits &Known,
- TargetLoweringOpt &TLO,
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts, KnownBits &Known,
+ TargetLoweringOpt &TLO, unsigned Depth = 0,
+ bool AssumeSingleUse = false) const;
+
+ /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
+ /// Adds Op back to the worklist upon success.
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth = 0,
bool AssumeSingleUse = false) const;
- /// Helper wrapper around SimplifyDemandedBits
+ /// Helper wrapper around SimplifyDemandedBits.
+ /// Adds Op back to the worklist upon success.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
DAGCombinerInfo &DCI) const;
@@ -2826,7 +2946,8 @@ public:
TargetLoweringOpt &TLO, unsigned Depth = 0,
bool AssumeSingleUse = false) const;
- /// Helper wrapper around SimplifyDemandedVectorElts
+ /// Helper wrapper around SimplifyDemandedVectorElts.
+ /// Adds Op back to the worklist upon success.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
APInt &KnownUndef, APInt &KnownZero,
DAGCombinerInfo &DCI) const;
@@ -2863,11 +2984,30 @@ public:
/// elements, returning true on success. Otherwise, analyze the expression and
/// return a mask of KnownUndef and KnownZero elements for the expression
/// (used to simplify the caller). The KnownUndef/Zero elements may only be
- /// accurate for those bits in the DemandedMask
+ /// accurate for those bits in the DemandedMask.
virtual bool SimplifyDemandedVectorEltsForTargetNode(
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
+ /// Attempt to simplify any target nodes based on the demanded bits/elts,
+ /// returning true on success. Otherwise, analyze the
+ /// expression and return a mask of KnownOne and KnownZero bits for the
+ /// expression (used to simplify the caller). The KnownZero/One bits may only
+ /// be accurate for those bits in the Demanded masks.
+ virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth = 0) const;
+
+ /// If \p SNaN is false, \returns true if \p Op is known to never be any
+ /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
+ /// NaN.
+ virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN = false,
+ unsigned Depth = 0) const;
struct DAGCombinerInfo {
void *DC; // The DAG Combiner object.
CombineLevel Level;
@@ -2935,12 +3075,25 @@ public:
///
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- /// Return true if it is profitable to move a following shift through this
- // node, adjusting any immediate operands as necessary to preserve semantics.
- // This transformation may not be desirable if it disrupts a particularly
- // auspicious target-specific tree (e.g. bitfield extraction in AArch64).
- // By default, it returns true.
- virtual bool isDesirableToCommuteWithShift(const SDNode *N) const {
+ /// Return true if it is profitable to move this shift by a constant amount
+ /// though its operand, adjusting any immediate operands as necessary to
+ /// preserve semantics. This transformation may not be desirable if it
+ /// disrupts a particularly auspicious target-specific tree (e.g. bitfield
+ /// extraction in AArch64). By default, it returns true.
+ ///
+ /// @param N the shift node
+ /// @param Level the current DAGCombine legalization level.
+ virtual bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const {
+ return true;
+ }
+
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ /// This is usually true on most targets. But some targets, like Thumb1,
+ /// have immediate shift instructions, but no immediate "and" instruction;
+ /// this makes the fold unprofitable.
+ virtual bool shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const {
return true;
}
@@ -3488,11 +3641,9 @@ public:
//===--------------------------------------------------------------------===//
// Div utility functions
//
- SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
- bool IsAfterLegalization,
+ SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const;
- SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
- bool IsAfterLegalization,
+ SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const;
/// Targets may override this function to provide custom SDIV lowering for
@@ -3584,12 +3735,68 @@ public:
SDValue LL = SDValue(), SDValue LH = SDValue(),
SDValue RL = SDValue(), SDValue RH = SDValue()) const;
+ /// Expand funnel shift.
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+ /// Expand rotations.
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
/// Expand float(f32) to SINT(i64) conversion
/// \param N Node to expand
/// \param Result output after conversion
/// \returns True, if the expansion was successful, false otherwise
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ /// Expand float to UINT conversion
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+ /// Expand UINT(i64) to double(f64) conversion
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+ /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
+ SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
+
+ /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
+ /// vector nodes can only succeed if all operations are legal/custom.
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+ /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
+ /// vector nodes can only succeed if all operations are legal/custom.
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+ /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
+ /// vector nodes can only succeed if all operations are legal/custom.
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+ /// Expand ABS nodes. Expands vector/scalar ABS nodes,
+ /// vector nodes can only succeed if all operations are legal/custom.
+ /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
/// Turn load of vector type into a load of the individual elements.
/// \param LD load to expand
/// \returns MERGE_VALUEs of the scalar loads with their chains.
@@ -3627,6 +3834,15 @@ public:
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
SDValue Index) const;
+ /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
+ /// method accepts integers as its arguments.
+ SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
+
+ /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts
+ /// integers as its arguments.
+ SDValue getExpandedFixedPointMultiplication(SDNode *Node,
+ SelectionDAG &DAG) const;
+
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
//
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index f5c7fc824ab4..052d1f8bc686 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -90,6 +90,8 @@ public:
const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
const GlobalValue *RHS,
const TargetMachine &TM) const override;
+
+ MCSection *getSectionForCommandLines() const override;
};
class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 8f5c9cb8c3fa..3288711a335d 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -90,6 +90,19 @@ private:
AnalysisID StartAfter = nullptr;
AnalysisID StopBefore = nullptr;
AnalysisID StopAfter = nullptr;
+
+ unsigned StartBeforeInstanceNum = 0;
+ unsigned StartBeforeCount = 0;
+
+ unsigned StartAfterInstanceNum = 0;
+ unsigned StartAfterCount = 0;
+
+ unsigned StopBeforeInstanceNum = 0;
+ unsigned StopBeforeCount = 0;
+
+ unsigned StopAfterInstanceNum = 0;
+ unsigned StopAfterCount = 0;
+
bool Started = true;
bool Stopped = false;
bool AddingMachinePasses = false;
@@ -145,13 +158,13 @@ public:
CodeGenOpt::Level getOptLevel() const;
- /// Describe the status of the codegen
- /// pipeline set by this target pass config.
- /// Having a limited codegen pipeline means that options
- /// have been used to restrict what codegen is doing.
- /// In particular, that means that codegen won't emit
- /// assembly code.
- bool hasLimitedCodeGenPipeline() const;
+ /// Returns true if one of the `-start-after`, `-start-before`, `-stop-after`
+ /// or `-stop-before` options is set.
+ static bool hasLimitedCodeGenPipeline();
+
+ /// Returns true if none of the `-stop-before` and `-stop-after` options is
+ /// set.
+ static bool willCompleteCodeGenPipeline();
/// If hasLimitedCodeGenPipeline is true, this method
/// returns a string with the name of the options, separated
@@ -159,13 +172,6 @@ public:
std::string
getLimitedCodeGenPipelineReason(const char *Separator = "/") const;
- /// Check if the codegen pipeline is limited in such a way that it
- /// won't be complete. When the codegen pipeline is not complete,
- /// this means it may not be possible to generate assembly from it.
- bool willCompleteCodeGenPipeline() const {
- return !hasLimitedCodeGenPipeline() || (!StopAfter && !StopBefore);
- }
-
void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); }
bool getEnableTailMerge() const { return EnableTailMerge; }
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 55a8ba630a59..0fbff3137653 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -510,6 +510,13 @@ public:
/// markSuperRegs() and checkAllSuperRegsMarked() in this case.
virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
+ /// Returns false if we can't guarantee that Physreg, specified as an IR asm
+ /// clobber constraint, will be preserved across the statement.
+ virtual bool isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ return true;
+ }
+
/// Returns true if PhysReg is unallocatable and constant throughout the
/// function. Used by MachineRegisterInfo::isConstantPhysReg().
virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
@@ -817,13 +824,6 @@ public:
// Do nothing.
}
- /// The creation of multiple copy hints have been implemented in
- /// weightCalcHelper(), but since this affects so many tests for many
- /// targets, this is temporarily disabled per default. THIS SHOULD BE
- /// "GENERAL GOODNESS" and hopefully all targets will update their tests
- /// and enable this soon. This hook should then be removed.
- virtual bool enableMultipleCopyHints() const { return false; }
-
/// Allow the target to reverse allocation order of local live ranges. This
/// will generally allocate shorter local live ranges first. For targets with
/// many registers, this could reduce regalloc compile time by a large
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 227e591f5a7d..968e4c4b8102 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CODEGEN_TARGETSUBTARGETINFO_H
#define LLVM_CODEGEN_TARGETSUBTARGETINFO_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -144,6 +145,43 @@ public:
return 0;
}
+ /// Returns true if MI is a dependency breaking zero-idiom instruction for the
+ /// subtarget.
+ ///
+ /// This function also sets bits in Mask related to input operands that
+ /// are not in a data dependency relationship. There is one bit for each
+ /// machine operand; implicit operands follow explicit operands in the bit
+ /// representation used for Mask. An empty (i.e. a mask with all bits
+ /// cleared) means: data dependencies are "broken" for all the explicit input
+ /// machine operands of MI.
+ virtual bool isZeroIdiom(const MachineInstr *MI, APInt &Mask) const {
+ return false;
+ }
+
+ /// Returns true if MI is a dependency breaking instruction for the subtarget.
+ ///
+ /// Similar in behavior to `isZeroIdiom`. However, it knows how to identify
+ /// all dependency breaking instructions (i.e. not just zero-idioms).
+ ///
+ /// As for `isZeroIdiom`, this method returns a mask of "broken" dependencies.
+ /// (See method `isZeroIdiom` for a detailed description of Mask).
+ virtual bool isDependencyBreaking(const MachineInstr *MI, APInt &Mask) const {
+ return isZeroIdiom(MI, Mask);
+ }
+
+ /// Returns true if MI is a candidate for move elimination.
+ ///
+ /// A candidate for move elimination may be optimized out at register renaming
+ /// stage. Subtargets can specify the set of optimizable moves by
+ /// instantiating tablegen class `IsOptimizableRegisterMove` (see
+ /// llvm/Target/TargetInstrPredicate.td).
+ ///
+ /// SubtargetEmitter is responsible for processing all the definitions of class
+ /// IsOptimizableRegisterMove, and auto-generate an override for this method.
+ virtual bool isOptimizableRegisterMove(const MachineInstr *MI) const {
+ return false;
+ }
+
/// True if the subtarget should run MachineScheduler after aggressive
/// coalescing.
///
diff --git a/contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
index 3ad6760d8813..219fff988f6e 100644
--- a/contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -14,13 +14,15 @@
#ifndef LLVM_CODEGEN_WASMEHFUNCINFO_H
#define LLVM_CODEGEN_WASMEHFUNCINFO_H
-#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/BasicBlock.h"
namespace llvm {
+enum EventTag { CPP_EXCEPTION = 0, C_LONGJMP = 1 };
+
using BBOrMBB = PointerUnion<const BasicBlock *, MachineBasicBlock *>;
struct WasmEHFuncInfo {
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
index 9dbeb438f4ae..11ca9ff108de 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -45,13 +45,8 @@ public:
return RecordData.drop_front(sizeof(RecordPrefix));
}
- Optional<uint32_t> hash() const { return Hash; }
-
- void setHash(uint32_t Value) { Hash = Value; }
-
Kind Type;
ArrayRef<uint8_t> RecordData;
- Optional<uint32_t> Hash;
};
template <typename Kind> struct RemappedRecord {
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
index 4ce9f68cffd9..8e0d9f608e93 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -231,6 +231,8 @@ enum class FrameProcedureOptions : uint32_t {
Inlined = 0x00000800,
StrictSecurityChecks = 0x00001000,
SafeBuffers = 0x00002000,
+ EncodedLocalBasePointerMask = 0x0000C000,
+ EncodedParamBasePointerMask = 0x00030000,
ProfileGuidedOptimization = 0x00040000,
ValidProfileCounts = 0x00080000,
OptimizedForSpeed = 0x00100000,
@@ -356,7 +358,9 @@ enum class PointerOptions : uint32_t {
Const = 0x00000400,
Unaligned = 0x00000800,
Restrict = 0x00001000,
- WinRTSmartPointer = 0x00080000
+ WinRTSmartPointer = 0x00080000,
+ LValueRefThisPointer = 0x00100000,
+ RValueRefThisPointer = 0x00200000
};
CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(PointerOptions)
@@ -510,6 +514,19 @@ enum class RegisterId : uint16_t {
#undef CV_REGISTER
};
+/// Two-bit value indicating which register is the designated frame pointer
+/// register. Appears in the S_FRAMEPROC record flags.
+enum class EncodedFramePtrReg : uint8_t {
+ None = 0,
+ StackPtr = 1,
+ FramePtr = 2,
+ BasePtr = 3,
+};
+
+RegisterId decodeFramePtrReg(EncodedFramePtrReg EncodedReg, CPUType CPU);
+
+EncodedFramePtrReg encodeFramePtrReg(RegisterId Reg, CPUType CPU);
+
/// These values correspond to the THUNK_ORDINAL enumeration.
enum class ThunkOrdinal : uint8_t {
Standard,
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
index 586a720ce6e4..d4615d02220d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
@@ -24,23 +24,32 @@ enum class cv_error_code {
no_records,
unknown_member_record,
};
+} // namespace codeview
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::codeview::cv_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace codeview {
+const std::error_category &CVErrorCategory();
+
+inline std::error_code make_error_code(cv_error_code E) {
+ return std::error_code(static_cast<int>(E), CVErrorCategory());
+}
/// Base class for errors originating when parsing raw PDB files
-class CodeViewError : public ErrorInfo<CodeViewError> {
+class CodeViewError : public ErrorInfo<CodeViewError, StringError> {
public:
+ using ErrorInfo<CodeViewError,
+ StringError>::ErrorInfo; // inherit constructors
+ CodeViewError(const Twine &S) : ErrorInfo(S, cv_error_code::unspecified) {}
static char ID;
- CodeViewError(cv_error_code C);
- CodeViewError(const std::string &Context);
- CodeViewError(cv_error_code C, const std::string &Context);
+};
- void log(raw_ostream &OS) const override;
- const std::string &getErrorMessage() const;
- std::error_code convertToErrorCode() const override;
+} // namespace codeview
+} // namespace llvm
-private:
- std::string ErrMsg;
- cv_error_code Code;
-};
-}
-}
#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index 6da8893bd61a..fdfcf4d53a23 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -18,251 +18,342 @@
// This currently only contains the "register subset shared by all processor
// types" (ERR etc.) and the x86 registers.
-CV_REGISTER(CVRegERR, 30000)
-CV_REGISTER(CVRegTEB, 30001)
-CV_REGISTER(CVRegTIMER, 30002)
-CV_REGISTER(CVRegEFAD1, 30003)
-CV_REGISTER(CVRegEFAD2, 30004)
-CV_REGISTER(CVRegEFAD3, 30005)
-CV_REGISTER(CVRegVFRAME, 30006)
-CV_REGISTER(CVRegHANDLE, 30007)
-CV_REGISTER(CVRegPARAMS, 30008)
-CV_REGISTER(CVRegLOCALS, 30009)
-CV_REGISTER(CVRegTID, 30010)
-CV_REGISTER(CVRegENV, 30011)
-CV_REGISTER(CVRegCMDLN, 30012)
-
-CV_REGISTER(CVRegNONE, 0)
-CV_REGISTER(CVRegAL, 1)
-CV_REGISTER(CVRegCL, 2)
-CV_REGISTER(CVRegDL, 3)
-CV_REGISTER(CVRegBL, 4)
-CV_REGISTER(CVRegAH, 5)
-CV_REGISTER(CVRegCH, 6)
-CV_REGISTER(CVRegDH, 7)
-CV_REGISTER(CVRegBH, 8)
-CV_REGISTER(CVRegAX, 9)
-CV_REGISTER(CVRegCX, 10)
-CV_REGISTER(CVRegDX, 11)
-CV_REGISTER(CVRegBX, 12)
-CV_REGISTER(CVRegSP, 13)
-CV_REGISTER(CVRegBP, 14)
-CV_REGISTER(CVRegSI, 15)
-CV_REGISTER(CVRegDI, 16)
-CV_REGISTER(CVRegEAX, 17)
-CV_REGISTER(CVRegECX, 18)
-CV_REGISTER(CVRegEDX, 19)
-CV_REGISTER(CVRegEBX, 20)
-CV_REGISTER(CVRegESP, 21)
-CV_REGISTER(CVRegEBP, 22)
-CV_REGISTER(CVRegESI, 23)
-CV_REGISTER(CVRegEDI, 24)
-CV_REGISTER(CVRegES, 25)
-CV_REGISTER(CVRegCS, 26)
-CV_REGISTER(CVRegSS, 27)
-CV_REGISTER(CVRegDS, 28)
-CV_REGISTER(CVRegFS, 29)
-CV_REGISTER(CVRegGS, 30)
-CV_REGISTER(CVRegIP, 31)
-CV_REGISTER(CVRegFLAGS, 32)
-CV_REGISTER(CVRegEIP, 33)
-CV_REGISTER(CVRegEFLAGS, 34)
-CV_REGISTER(CVRegTEMP, 40)
-CV_REGISTER(CVRegTEMPH, 41)
-CV_REGISTER(CVRegQUOTE, 42)
-CV_REGISTER(CVRegPCDR3, 43)
-CV_REGISTER(CVRegPCDR4, 44)
-CV_REGISTER(CVRegPCDR5, 45)
-CV_REGISTER(CVRegPCDR6, 46)
-CV_REGISTER(CVRegPCDR7, 47)
-CV_REGISTER(CVRegCR0, 80)
-CV_REGISTER(CVRegCR1, 81)
-CV_REGISTER(CVRegCR2, 82)
-CV_REGISTER(CVRegCR3, 83)
-CV_REGISTER(CVRegCR4, 84)
-CV_REGISTER(CVRegDR0, 90)
-CV_REGISTER(CVRegDR1, 91)
-CV_REGISTER(CVRegDR2, 92)
-CV_REGISTER(CVRegDR3, 93)
-CV_REGISTER(CVRegDR4, 94)
-CV_REGISTER(CVRegDR5, 95)
-CV_REGISTER(CVRegDR6, 96)
-CV_REGISTER(CVRegDR7, 97)
-CV_REGISTER(CVRegGDTR, 110)
-CV_REGISTER(CVRegGDTL, 111)
-CV_REGISTER(CVRegIDTR, 112)
-CV_REGISTER(CVRegIDTL, 113)
-CV_REGISTER(CVRegLDTR, 114)
-CV_REGISTER(CVRegTR, 115)
-
-CV_REGISTER(CVRegPSEUDO1, 116)
-CV_REGISTER(CVRegPSEUDO2, 117)
-CV_REGISTER(CVRegPSEUDO3, 118)
-CV_REGISTER(CVRegPSEUDO4, 119)
-CV_REGISTER(CVRegPSEUDO5, 120)
-CV_REGISTER(CVRegPSEUDO6, 121)
-CV_REGISTER(CVRegPSEUDO7, 122)
-CV_REGISTER(CVRegPSEUDO8, 123)
-CV_REGISTER(CVRegPSEUDO9, 124)
-
-CV_REGISTER(CVRegST0, 128)
-CV_REGISTER(CVRegST1, 129)
-CV_REGISTER(CVRegST2, 130)
-CV_REGISTER(CVRegST3, 131)
-CV_REGISTER(CVRegST4, 132)
-CV_REGISTER(CVRegST5, 133)
-CV_REGISTER(CVRegST6, 134)
-CV_REGISTER(CVRegST7, 135)
-CV_REGISTER(CVRegCTRL, 136)
-CV_REGISTER(CVRegSTAT, 137)
-CV_REGISTER(CVRegTAG, 138)
-CV_REGISTER(CVRegFPIP, 139)
-CV_REGISTER(CVRegFPCS, 140)
-CV_REGISTER(CVRegFPDO, 141)
-CV_REGISTER(CVRegFPDS, 142)
-CV_REGISTER(CVRegISEM, 143)
-CV_REGISTER(CVRegFPEIP, 144)
-CV_REGISTER(CVRegFPEDO, 145)
-
-CV_REGISTER(CVRegMM0, 146)
-CV_REGISTER(CVRegMM1, 147)
-CV_REGISTER(CVRegMM2, 148)
-CV_REGISTER(CVRegMM3, 149)
-CV_REGISTER(CVRegMM4, 150)
-CV_REGISTER(CVRegMM5, 151)
-CV_REGISTER(CVRegMM6, 152)
-CV_REGISTER(CVRegMM7, 153)
-
-CV_REGISTER(CVRegXMM0, 154)
-CV_REGISTER(CVRegXMM1, 155)
-CV_REGISTER(CVRegXMM2, 156)
-CV_REGISTER(CVRegXMM3, 157)
-CV_REGISTER(CVRegXMM4, 158)
-CV_REGISTER(CVRegXMM5, 159)
-CV_REGISTER(CVRegXMM6, 160)
-CV_REGISTER(CVRegXMM7, 161)
-
-CV_REGISTER(CVRegMXCSR, 211)
-
-CV_REGISTER(CVRegEDXEAX, 212)
-
-CV_REGISTER(CVRegEMM0L, 220)
-CV_REGISTER(CVRegEMM1L, 221)
-CV_REGISTER(CVRegEMM2L, 222)
-CV_REGISTER(CVRegEMM3L, 223)
-CV_REGISTER(CVRegEMM4L, 224)
-CV_REGISTER(CVRegEMM5L, 225)
-CV_REGISTER(CVRegEMM6L, 226)
-CV_REGISTER(CVRegEMM7L, 227)
-
-CV_REGISTER(CVRegEMM0H, 228)
-CV_REGISTER(CVRegEMM1H, 229)
-CV_REGISTER(CVRegEMM2H, 230)
-CV_REGISTER(CVRegEMM3H, 231)
-CV_REGISTER(CVRegEMM4H, 232)
-CV_REGISTER(CVRegEMM5H, 233)
-CV_REGISTER(CVRegEMM6H, 234)
-CV_REGISTER(CVRegEMM7H, 235)
-
-CV_REGISTER(CVRegMM00, 236)
-CV_REGISTER(CVRegMM01, 237)
-CV_REGISTER(CVRegMM10, 238)
-CV_REGISTER(CVRegMM11, 239)
-CV_REGISTER(CVRegMM20, 240)
-CV_REGISTER(CVRegMM21, 241)
-CV_REGISTER(CVRegMM30, 242)
-CV_REGISTER(CVRegMM31, 243)
-CV_REGISTER(CVRegMM40, 244)
-CV_REGISTER(CVRegMM41, 245)
-CV_REGISTER(CVRegMM50, 246)
-CV_REGISTER(CVRegMM51, 247)
-CV_REGISTER(CVRegMM60, 248)
-CV_REGISTER(CVRegMM61, 249)
-CV_REGISTER(CVRegMM70, 250)
-CV_REGISTER(CVRegMM71, 251)
-
-CV_REGISTER(CVRegBND0, 396)
-CV_REGISTER(CVRegBND1, 397)
-CV_REGISTER(CVRegBND2, 398)
-
-
-CV_REGISTER(CVRegXMM8, 252)
-CV_REGISTER(CVRegXMM9, 253)
-CV_REGISTER(CVRegXMM10, 254)
-CV_REGISTER(CVRegXMM11, 255)
-CV_REGISTER(CVRegXMM12, 256)
-CV_REGISTER(CVRegXMM13, 257)
-CV_REGISTER(CVRegXMM14, 258)
-CV_REGISTER(CVRegXMM15, 259)
-
-
-CV_REGISTER(CVRegSIL, 324)
-CV_REGISTER(CVRegDIL, 325)
-CV_REGISTER(CVRegBPL, 326)
-CV_REGISTER(CVRegSPL, 327)
-
-CV_REGISTER(CVRegRAX, 328)
-CV_REGISTER(CVRegRBX, 329)
-CV_REGISTER(CVRegRCX, 330)
-CV_REGISTER(CVRegRDX, 331)
-CV_REGISTER(CVRegRSI, 332)
-CV_REGISTER(CVRegRDI, 333)
-CV_REGISTER(CVRegRBP, 334)
-CV_REGISTER(CVRegRSP, 335)
-
-CV_REGISTER(CVRegR8, 336)
-CV_REGISTER(CVRegR9, 337)
-CV_REGISTER(CVRegR10, 338)
-CV_REGISTER(CVRegR11, 339)
-CV_REGISTER(CVRegR12, 340)
-CV_REGISTER(CVRegR13, 341)
-CV_REGISTER(CVRegR14, 342)
-CV_REGISTER(CVRegR15, 343)
-
-CV_REGISTER(CVRegR8B, 344)
-CV_REGISTER(CVRegR9B, 345)
-CV_REGISTER(CVRegR10B, 346)
-CV_REGISTER(CVRegR11B, 347)
-CV_REGISTER(CVRegR12B, 348)
-CV_REGISTER(CVRegR13B, 349)
-CV_REGISTER(CVRegR14B, 350)
-CV_REGISTER(CVRegR15B, 351)
-
-CV_REGISTER(CVRegR8W, 352)
-CV_REGISTER(CVRegR9W, 353)
-CV_REGISTER(CVRegR10W, 354)
-CV_REGISTER(CVRegR11W, 355)
-CV_REGISTER(CVRegR12W, 356)
-CV_REGISTER(CVRegR13W, 357)
-CV_REGISTER(CVRegR14W, 358)
-CV_REGISTER(CVRegR15W, 359)
-
-CV_REGISTER(CVRegR8D, 360)
-CV_REGISTER(CVRegR9D, 361)
-CV_REGISTER(CVRegR10D, 362)
-CV_REGISTER(CVRegR11D, 363)
-CV_REGISTER(CVRegR12D, 364)
-CV_REGISTER(CVRegR13D, 365)
-CV_REGISTER(CVRegR14D, 366)
-CV_REGISTER(CVRegR15D, 367)
+// Some system headers define macros that conflict with our enums. Every
+// compiler supported by LLVM has the push_macro and pop_macro pragmas, so use
+// them to avoid the conflict.
+#pragma push_macro("CR0")
+#pragma push_macro("CR1")
+#pragma push_macro("CR2")
+#pragma push_macro("CR3")
+#pragma push_macro("CR4")
+
+CV_REGISTER(ERR, 30000)
+CV_REGISTER(TEB, 30001)
+CV_REGISTER(TIMER, 30002)
+CV_REGISTER(EFAD1, 30003)
+CV_REGISTER(EFAD2, 30004)
+CV_REGISTER(EFAD3, 30005)
+CV_REGISTER(VFRAME, 30006)
+CV_REGISTER(HANDLE, 30007)
+CV_REGISTER(PARAMS, 30008)
+CV_REGISTER(LOCALS, 30009)
+CV_REGISTER(TID, 30010)
+CV_REGISTER(ENV, 30011)
+CV_REGISTER(CMDLN, 30012)
+
+CV_REGISTER(NONE, 0)
+CV_REGISTER(AL, 1)
+CV_REGISTER(CL, 2)
+CV_REGISTER(DL, 3)
+CV_REGISTER(BL, 4)
+CV_REGISTER(AH, 5)
+CV_REGISTER(CH, 6)
+CV_REGISTER(DH, 7)
+CV_REGISTER(BH, 8)
+CV_REGISTER(AX, 9)
+CV_REGISTER(CX, 10)
+CV_REGISTER(DX, 11)
+CV_REGISTER(BX, 12)
+CV_REGISTER(SP, 13)
+CV_REGISTER(BP, 14)
+CV_REGISTER(SI, 15)
+CV_REGISTER(DI, 16)
+CV_REGISTER(EAX, 17)
+CV_REGISTER(ECX, 18)
+CV_REGISTER(EDX, 19)
+CV_REGISTER(EBX, 20)
+CV_REGISTER(ESP, 21)
+CV_REGISTER(EBP, 22)
+CV_REGISTER(ESI, 23)
+CV_REGISTER(EDI, 24)
+CV_REGISTER(ES, 25)
+CV_REGISTER(CS, 26)
+CV_REGISTER(SS, 27)
+CV_REGISTER(DS, 28)
+CV_REGISTER(FS, 29)
+CV_REGISTER(GS, 30)
+CV_REGISTER(IP, 31)
+CV_REGISTER(FLAGS, 32)
+CV_REGISTER(EIP, 33)
+CV_REGISTER(EFLAGS, 34)
+CV_REGISTER(TEMP, 40)
+CV_REGISTER(TEMPH, 41)
+CV_REGISTER(QUOTE, 42)
+CV_REGISTER(PCDR3, 43)
+CV_REGISTER(PCDR4, 44)
+CV_REGISTER(PCDR5, 45)
+CV_REGISTER(PCDR6, 46)
+CV_REGISTER(PCDR7, 47)
+CV_REGISTER(CR0, 80)
+CV_REGISTER(CR1, 81)
+CV_REGISTER(CR2, 82)
+CV_REGISTER(CR3, 83)
+CV_REGISTER(CR4, 84)
+CV_REGISTER(DR0, 90)
+CV_REGISTER(DR1, 91)
+CV_REGISTER(DR2, 92)
+CV_REGISTER(DR3, 93)
+CV_REGISTER(DR4, 94)
+CV_REGISTER(DR5, 95)
+CV_REGISTER(DR6, 96)
+CV_REGISTER(DR7, 97)
+CV_REGISTER(GDTR, 110)
+CV_REGISTER(GDTL, 111)
+CV_REGISTER(IDTR, 112)
+CV_REGISTER(IDTL, 113)
+CV_REGISTER(LDTR, 114)
+CV_REGISTER(TR, 115)
+
+CV_REGISTER(PSEUDO1, 116)
+CV_REGISTER(PSEUDO2, 117)
+CV_REGISTER(PSEUDO3, 118)
+CV_REGISTER(PSEUDO4, 119)
+CV_REGISTER(PSEUDO5, 120)
+CV_REGISTER(PSEUDO6, 121)
+CV_REGISTER(PSEUDO7, 122)
+CV_REGISTER(PSEUDO8, 123)
+CV_REGISTER(PSEUDO9, 124)
+
+CV_REGISTER(ST0, 128)
+CV_REGISTER(ST1, 129)
+CV_REGISTER(ST2, 130)
+CV_REGISTER(ST3, 131)
+CV_REGISTER(ST4, 132)
+CV_REGISTER(ST5, 133)
+CV_REGISTER(ST6, 134)
+CV_REGISTER(ST7, 135)
+CV_REGISTER(CTRL, 136)
+CV_REGISTER(STAT, 137)
+CV_REGISTER(TAG, 138)
+CV_REGISTER(FPIP, 139)
+CV_REGISTER(FPCS, 140)
+CV_REGISTER(FPDO, 141)
+CV_REGISTER(FPDS, 142)
+CV_REGISTER(ISEM, 143)
+CV_REGISTER(FPEIP, 144)
+CV_REGISTER(FPEDO, 145)
+
+CV_REGISTER(MM0, 146)
+CV_REGISTER(MM1, 147)
+CV_REGISTER(MM2, 148)
+CV_REGISTER(MM3, 149)
+CV_REGISTER(MM4, 150)
+CV_REGISTER(MM5, 151)
+CV_REGISTER(MM6, 152)
+CV_REGISTER(MM7, 153)
+
+CV_REGISTER(XMM0, 154)
+CV_REGISTER(XMM1, 155)
+CV_REGISTER(XMM2, 156)
+CV_REGISTER(XMM3, 157)
+CV_REGISTER(XMM4, 158)
+CV_REGISTER(XMM5, 159)
+CV_REGISTER(XMM6, 160)
+CV_REGISTER(XMM7, 161)
+
+CV_REGISTER(MXCSR, 211)
+
+CV_REGISTER(EDXEAX, 212)
+
+CV_REGISTER(EMM0L, 220)
+CV_REGISTER(EMM1L, 221)
+CV_REGISTER(EMM2L, 222)
+CV_REGISTER(EMM3L, 223)
+CV_REGISTER(EMM4L, 224)
+CV_REGISTER(EMM5L, 225)
+CV_REGISTER(EMM6L, 226)
+CV_REGISTER(EMM7L, 227)
+
+CV_REGISTER(EMM0H, 228)
+CV_REGISTER(EMM1H, 229)
+CV_REGISTER(EMM2H, 230)
+CV_REGISTER(EMM3H, 231)
+CV_REGISTER(EMM4H, 232)
+CV_REGISTER(EMM5H, 233)
+CV_REGISTER(EMM6H, 234)
+CV_REGISTER(EMM7H, 235)
+
+CV_REGISTER(MM00, 236)
+CV_REGISTER(MM01, 237)
+CV_REGISTER(MM10, 238)
+CV_REGISTER(MM11, 239)
+CV_REGISTER(MM20, 240)
+CV_REGISTER(MM21, 241)
+CV_REGISTER(MM30, 242)
+CV_REGISTER(MM31, 243)
+CV_REGISTER(MM40, 244)
+CV_REGISTER(MM41, 245)
+CV_REGISTER(MM50, 246)
+CV_REGISTER(MM51, 247)
+CV_REGISTER(MM60, 248)
+CV_REGISTER(MM61, 249)
+CV_REGISTER(MM70, 250)
+CV_REGISTER(MM71, 251)
+
+CV_REGISTER(BND0, 396)
+CV_REGISTER(BND1, 397)
+CV_REGISTER(BND2, 398)
+
+
+CV_REGISTER(XMM8, 252)
+CV_REGISTER(XMM9, 253)
+CV_REGISTER(XMM10, 254)
+CV_REGISTER(XMM11, 255)
+CV_REGISTER(XMM12, 256)
+CV_REGISTER(XMM13, 257)
+CV_REGISTER(XMM14, 258)
+CV_REGISTER(XMM15, 259)
+
+
+CV_REGISTER(SIL, 324)
+CV_REGISTER(DIL, 325)
+CV_REGISTER(BPL, 326)
+CV_REGISTER(SPL, 327)
+
+CV_REGISTER(RAX, 328)
+CV_REGISTER(RBX, 329)
+CV_REGISTER(RCX, 330)
+CV_REGISTER(RDX, 331)
+CV_REGISTER(RSI, 332)
+CV_REGISTER(RDI, 333)
+CV_REGISTER(RBP, 334)
+CV_REGISTER(RSP, 335)
+
+CV_REGISTER(R8, 336)
+CV_REGISTER(R9, 337)
+CV_REGISTER(R10, 338)
+CV_REGISTER(R11, 339)
+CV_REGISTER(R12, 340)
+CV_REGISTER(R13, 341)
+CV_REGISTER(R14, 342)
+CV_REGISTER(R15, 343)
+
+CV_REGISTER(R8B, 344)
+CV_REGISTER(R9B, 345)
+CV_REGISTER(R10B, 346)
+CV_REGISTER(R11B, 347)
+CV_REGISTER(R12B, 348)
+CV_REGISTER(R13B, 349)
+CV_REGISTER(R14B, 350)
+CV_REGISTER(R15B, 351)
+
+CV_REGISTER(R8W, 352)
+CV_REGISTER(R9W, 353)
+CV_REGISTER(R10W, 354)
+CV_REGISTER(R11W, 355)
+CV_REGISTER(R12W, 356)
+CV_REGISTER(R13W, 357)
+CV_REGISTER(R14W, 358)
+CV_REGISTER(R15W, 359)
+
+CV_REGISTER(R8D, 360)
+CV_REGISTER(R9D, 361)
+CV_REGISTER(R10D, 362)
+CV_REGISTER(R11D, 363)
+CV_REGISTER(R12D, 364)
+CV_REGISTER(R13D, 365)
+CV_REGISTER(R14D, 366)
+CV_REGISTER(R15D, 367)
// cvconst.h defines both CV_REG_YMM0 (252) and CV_AMD64_YMM0 (368). Keep the
// original prefix to distinguish them.
-CV_REGISTER(CVRegAMD64_YMM0, 368)
-CV_REGISTER(CVRegAMD64_YMM1, 369)
-CV_REGISTER(CVRegAMD64_YMM2, 370)
-CV_REGISTER(CVRegAMD64_YMM3, 371)
-CV_REGISTER(CVRegAMD64_YMM4, 372)
-CV_REGISTER(CVRegAMD64_YMM5, 373)
-CV_REGISTER(CVRegAMD64_YMM6, 374)
-CV_REGISTER(CVRegAMD64_YMM7, 375)
-CV_REGISTER(CVRegAMD64_YMM8, 376)
-CV_REGISTER(CVRegAMD64_YMM9, 377)
-CV_REGISTER(CVRegAMD64_YMM10, 378)
-CV_REGISTER(CVRegAMD64_YMM11, 379)
-CV_REGISTER(CVRegAMD64_YMM12, 380)
-CV_REGISTER(CVRegAMD64_YMM13, 381)
-CV_REGISTER(CVRegAMD64_YMM14, 382)
-CV_REGISTER(CVRegAMD64_YMM15, 383)
+CV_REGISTER(AMD64_YMM0, 368)
+CV_REGISTER(AMD64_YMM1, 369)
+CV_REGISTER(AMD64_YMM2, 370)
+CV_REGISTER(AMD64_YMM3, 371)
+CV_REGISTER(AMD64_YMM4, 372)
+CV_REGISTER(AMD64_YMM5, 373)
+CV_REGISTER(AMD64_YMM6, 374)
+CV_REGISTER(AMD64_YMM7, 375)
+CV_REGISTER(AMD64_YMM8, 376)
+CV_REGISTER(AMD64_YMM9, 377)
+CV_REGISTER(AMD64_YMM10, 378)
+CV_REGISTER(AMD64_YMM11, 379)
+CV_REGISTER(AMD64_YMM12, 380)
+CV_REGISTER(AMD64_YMM13, 381)
+CV_REGISTER(AMD64_YMM14, 382)
+CV_REGISTER(AMD64_YMM15, 383)
+
+CV_REGISTER(AMD64_XMM16, 694)
+CV_REGISTER(AMD64_XMM17, 695)
+CV_REGISTER(AMD64_XMM18, 696)
+CV_REGISTER(AMD64_XMM19, 697)
+CV_REGISTER(AMD64_XMM20, 698)
+CV_REGISTER(AMD64_XMM21, 699)
+CV_REGISTER(AMD64_XMM22, 700)
+CV_REGISTER(AMD64_XMM23, 701)
+CV_REGISTER(AMD64_XMM24, 702)
+CV_REGISTER(AMD64_XMM25, 703)
+CV_REGISTER(AMD64_XMM26, 704)
+CV_REGISTER(AMD64_XMM27, 705)
+CV_REGISTER(AMD64_XMM28, 706)
+CV_REGISTER(AMD64_XMM29, 707)
+CV_REGISTER(AMD64_XMM30, 708)
+CV_REGISTER(AMD64_XMM31, 709)
+
+CV_REGISTER(AMD64_YMM16, 710)
+CV_REGISTER(AMD64_YMM17, 711)
+CV_REGISTER(AMD64_YMM18, 712)
+CV_REGISTER(AMD64_YMM19, 713)
+CV_REGISTER(AMD64_YMM20, 714)
+CV_REGISTER(AMD64_YMM21, 715)
+CV_REGISTER(AMD64_YMM22, 716)
+CV_REGISTER(AMD64_YMM23, 717)
+CV_REGISTER(AMD64_YMM24, 718)
+CV_REGISTER(AMD64_YMM25, 719)
+CV_REGISTER(AMD64_YMM26, 720)
+CV_REGISTER(AMD64_YMM27, 721)
+CV_REGISTER(AMD64_YMM28, 722)
+CV_REGISTER(AMD64_YMM29, 723)
+CV_REGISTER(AMD64_YMM30, 724)
+CV_REGISTER(AMD64_YMM31, 725)
+
+CV_REGISTER(AMD64_ZMM0, 726)
+CV_REGISTER(AMD64_ZMM1, 727)
+CV_REGISTER(AMD64_ZMM2, 728)
+CV_REGISTER(AMD64_ZMM3, 729)
+CV_REGISTER(AMD64_ZMM4, 730)
+CV_REGISTER(AMD64_ZMM5, 731)
+CV_REGISTER(AMD64_ZMM6, 732)
+CV_REGISTER(AMD64_ZMM7, 733)
+CV_REGISTER(AMD64_ZMM8, 734)
+CV_REGISTER(AMD64_ZMM9, 735)
+CV_REGISTER(AMD64_ZMM10, 736)
+CV_REGISTER(AMD64_ZMM11, 737)
+CV_REGISTER(AMD64_ZMM12, 738)
+CV_REGISTER(AMD64_ZMM13, 739)
+CV_REGISTER(AMD64_ZMM14, 740)
+CV_REGISTER(AMD64_ZMM15, 741)
+CV_REGISTER(AMD64_ZMM16, 742)
+CV_REGISTER(AMD64_ZMM17, 743)
+CV_REGISTER(AMD64_ZMM18, 744)
+CV_REGISTER(AMD64_ZMM19, 745)
+CV_REGISTER(AMD64_ZMM20, 746)
+CV_REGISTER(AMD64_ZMM21, 747)
+CV_REGISTER(AMD64_ZMM22, 748)
+CV_REGISTER(AMD64_ZMM23, 749)
+CV_REGISTER(AMD64_ZMM24, 750)
+CV_REGISTER(AMD64_ZMM25, 751)
+CV_REGISTER(AMD64_ZMM26, 752)
+CV_REGISTER(AMD64_ZMM27, 753)
+CV_REGISTER(AMD64_ZMM28, 754)
+CV_REGISTER(AMD64_ZMM29, 755)
+CV_REGISTER(AMD64_ZMM30, 756)
+CV_REGISTER(AMD64_ZMM31, 757)
+
+CV_REGISTER(AMD64_K0, 758)
+CV_REGISTER(AMD64_K1, 759)
+CV_REGISTER(AMD64_K2, 760)
+CV_REGISTER(AMD64_K3, 761)
+CV_REGISTER(AMD64_K4, 762)
+CV_REGISTER(AMD64_K5, 763)
+CV_REGISTER(AMD64_K6, 764)
+CV_REGISTER(AMD64_K7, 765)
+
+#pragma pop_macro("CR0")
+#pragma pop_macro("CR1")
+#pragma pop_macro("CR2")
+#pragma pop_macro("CR3")
+#pragma pop_macro("CR4")
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
index 1e329c7c3f14..847d93f0e985 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
@@ -13,6 +13,7 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
namespace llvm {
@@ -26,21 +27,23 @@ public:
}
Error initialize(BinaryStreamReader Reader);
+ Error initialize(BinaryStreamRef Stream);
FixedStreamArray<FrameData>::Iterator begin() const { return Frames.begin(); }
FixedStreamArray<FrameData>::Iterator end() const { return Frames.end(); }
- const void *getRelocPtr() const { return RelocPtr; }
+ const support::ulittle32_t *getRelocPtr() const { return RelocPtr; }
private:
- const uint32_t *RelocPtr = nullptr;
+ const support::ulittle32_t *RelocPtr = nullptr;
FixedStreamArray<FrameData> Frames;
};
class DebugFrameDataSubsection final : public DebugSubsection {
public:
- DebugFrameDataSubsection()
- : DebugSubsection(DebugSubsectionKind::FrameData) {}
+ DebugFrameDataSubsection(bool IncludeRelocPtr)
+ : DebugSubsection(DebugSubsectionKind::FrameData),
+ IncludeRelocPtr(IncludeRelocPtr) {}
static bool classof(const DebugSubsection *S) {
return S->kind() == DebugSubsectionKind::FrameData;
}
@@ -52,6 +55,7 @@ public:
void setFrames(ArrayRef<FrameData> Frames);
private:
+ bool IncludeRelocPtr = false;
std::vector<FrameData> Frames;
};
}
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
index 58449c2c7565..36237e1a4d9e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
@@ -180,26 +180,6 @@ template <typename T> serialize_numeric_impl<T> serialize_numeric(T &Item) {
return serialize_numeric_impl<T>(Item);
}
-// This field is only present in the byte record if the condition is true. The
-// condition is evaluated lazily, so it can depend on items that were
-// deserialized
-// earlier.
-#define CV_CONDITIONAL_FIELD(I, C) \
- serialize_conditional(I, [&]() { return !!(C); })
-
-// This is an array of N items, where N is evaluated lazily, so it can refer
-// to a field deserialized earlier.
-#define CV_ARRAY_FIELD_N(I, N) serialize_array(I, [&]() { return N; })
-
-// This is an array that exhausts the remainder of the input buffer.
-#define CV_ARRAY_FIELD_TAIL(I) serialize_array_tail(I)
-
-// This is an array that consumes null terminated strings until a double null
-// is encountered.
-#define CV_STRING_ARRAY_NULL_TERM(I) serialize_null_term_string_array(I)
-
-#define CV_NUMERIC_FIELD(I) serialize_numeric(I)
-
template <typename T, typename U>
Error consume(BinaryStreamReader &Reader,
const serialize_conditional_impl<T, U> &Item) {
@@ -242,9 +222,6 @@ Error consume(BinaryStreamReader &Reader, T &&X, U &&Y, Args &&... Rest) {
return consume(Reader, Y, std::forward<Args>(Rest)...);
}
-#define CV_DESERIALIZE(...) \
- if (auto EC = consume(__VA_ARGS__)) \
- return std::move(EC);
}
}
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
index b5479db97a15..6b5dd2d20d17 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
@@ -47,7 +47,7 @@ public:
return Error::success();
}
template <typename T> static Expected<T> deserializeAs(CVSymbol Symbol) {
- T Record(Symbol.kind());
+ T Record(static_cast<SymbolRecordKind>(Symbol.kind()));
if (auto EC = deserializeAs<T>(Symbol, Record))
return std::move(EC);
return Record;
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
index 293daa851bdd..215da2e2b522 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
@@ -27,10 +27,10 @@ class CVSymbolDumper {
public:
CVSymbolDumper(ScopedPrinter &W, TypeCollection &Types,
CodeViewContainer Container,
- std::unique_ptr<SymbolDumpDelegate> ObjDelegate,
+ std::unique_ptr<SymbolDumpDelegate> ObjDelegate, CPUType CPU,
bool PrintRecordBytes)
: W(W), Types(Types), Container(Container),
- ObjDelegate(std::move(ObjDelegate)),
+ ObjDelegate(std::move(ObjDelegate)), CompilationCPUType(CPU),
PrintRecordBytes(PrintRecordBytes) {}
/// Dumps one type record. Returns false if there was a type parsing error,
@@ -43,12 +43,14 @@ public:
/// parse error, and true otherwise.
Error dump(const CVSymbolArray &Symbols);
+ CPUType getCompilationCPUType() const { return CompilationCPUType; }
+
private:
ScopedPrinter &W;
TypeCollection &Types;
CodeViewContainer Container;
std::unique_ptr<SymbolDumpDelegate> ObjDelegate;
-
+ CPUType CompilationCPUType;
bool PrintRecordBytes;
};
} // end namespace codeview
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 93306824012e..b58825c4a788 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -358,6 +358,7 @@ public:
// S_PUB32
class PublicSym32 : public SymbolRecord {
public:
+ PublicSym32() : SymbolRecord(SymbolRecordKind::PublicSym32) {}
explicit PublicSym32(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
explicit PublicSym32(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::PublicSym32),
@@ -399,6 +400,7 @@ public:
uint16_t Module;
StringRef Name;
+ uint16_t modi() const { return Module - 1; }
uint32_t RecordOffset;
};
@@ -636,6 +638,7 @@ public:
// S_OBJNAME
class ObjNameSym : public SymbolRecord {
public:
+ explicit ObjNameSym() : SymbolRecord(SymbolRecordKind::ObjNameSym) {}
explicit ObjNameSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
ObjNameSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset) {
@@ -718,6 +721,7 @@ public:
// S_COMPILE3
class Compile3Sym : public SymbolRecord {
public:
+ Compile3Sym() : SymbolRecord(SymbolRecordKind::Compile3Sym) {}
explicit Compile3Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
Compile3Sym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::Compile3Sym),
@@ -739,8 +743,17 @@ public:
Flags = CompileSym3Flags((uint32_t(Flags) & 0xFFFFFF00) | uint32_t(Lang));
}
- uint8_t getLanguage() const { return static_cast<uint32_t>(Flags) & 0xFF; }
- uint32_t getFlags() const { return static_cast<uint32_t>(Flags) & ~0xFF; }
+ SourceLanguage getLanguage() const {
+ return static_cast<SourceLanguage>(static_cast<uint32_t>(Flags) & 0xFF);
+ }
+ CompileSym3Flags getFlags() const {
+ return static_cast<CompileSym3Flags>(static_cast<uint32_t>(Flags) & ~0xFF);
+ }
+
+ bool hasOptimizations() const {
+ return CompileSym3Flags::None !=
+ (getFlags() & (CompileSym3Flags::PGO | CompileSym3Flags::LTCG));
+ }
uint32_t RecordOffset;
};
@@ -761,7 +774,21 @@ public:
uint16_t SectionIdOfExceptionHandler;
FrameProcedureOptions Flags;
+ /// Extract the register this frame uses to refer to local variables.
+ RegisterId getLocalFramePtrReg(CPUType CPU) const {
+ return decodeFramePtrReg(
+ EncodedFramePtrReg((uint32_t(Flags) >> 14U) & 0x3U), CPU);
+ }
+
+ /// Extract the register this frame uses to refer to parameters.
+ RegisterId getParamFramePtrReg(CPUType CPU) const {
+ return decodeFramePtrReg(
+ EncodedFramePtrReg((uint32_t(Flags) >> 16U) & 0x3U), CPU);
+ }
+
uint32_t RecordOffset;
+
+private:
};
// S_CALLSITEINFO
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
new file mode 100644
index 000000000000..3713fe118eaa
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
@@ -0,0 +1,62 @@
+//===- SymbolRecordHelpers.h ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H
+#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+
+namespace llvm {
+namespace codeview {
+/// Return true if this symbol opens a scope. This implies that the symbol has
+/// "parent" and "end" fields, which contain the offset of the S_END or
+/// S_INLINESITE_END record.
+inline bool symbolOpensScope(SymbolKind Kind) {
+ switch (Kind) {
+ case SymbolKind::S_GPROC32:
+ case SymbolKind::S_LPROC32:
+ case SymbolKind::S_LPROC32_ID:
+ case SymbolKind::S_GPROC32_ID:
+ case SymbolKind::S_BLOCK32:
+ case SymbolKind::S_SEPCODE:
+ case SymbolKind::S_THUNK32:
+ case SymbolKind::S_INLINESITE:
+ case SymbolKind::S_INLINESITE2:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/// Return true if this ssymbol ends a scope.
+inline bool symbolEndsScope(SymbolKind Kind) {
+ switch (Kind) {
+ case SymbolKind::S_END:
+ case SymbolKind::S_PROC_ID_END:
+ case SymbolKind::S_INLINESITE_END:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/// Given a symbol P for which symbolOpensScope(P) == true, return the
+/// corresponding end offset.
+uint32_t getScopeEndOffset(const CVSymbol &Symbol);
+uint32_t getScopeParentOffset(const CVSymbol &Symbol);
+
+CVSymbolArray limitSymbolArrayToScope(const CVSymbolArray &Symbols,
+ uint32_t ScopeBegin);
+
+} // namespace codeview
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
index c71281de7145..58463a6b13df 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -134,6 +134,8 @@ public:
return static_cast<SimpleTypeMode>(Index & SimpleModeMask);
}
+ TypeIndex makeDirect() const { return TypeIndex{getSimpleKind()}; }
+
static TypeIndex None() { return TypeIndex(SimpleTypeKind::None); }
static TypeIndex Void() { return TypeIndex(SimpleTypeKind::Void); }
static TypeIndex VoidPointer32() {
@@ -143,6 +145,13 @@ public:
return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer64);
}
+ static TypeIndex NullptrT() {
+ // std::nullptr_t uses the pointer mode that doesn't indicate bit-width,
+ // presumably because std::nullptr_t is intended to be compatible with any
+ // pointer type.
+ return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer);
+ }
+
static TypeIndex SignedCharacter() {
return TypeIndex(SimpleTypeKind::SignedCharacter);
}
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
index 61ebdf878ce7..7b4a30ee622d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -95,6 +95,11 @@ struct MemberAttributes {
return MP == MethodKind::IntroducingVirtual ||
MP == MethodKind::PureIntroducingVirtual;
}
+
+ /// Is this method static.
+ bool isStatic() const {
+ return getMethodKind() == MethodKind::Static;
+ }
};
// Does not correspond to any tag, this is the tail of an LF_POINTER record
@@ -264,14 +269,18 @@ public:
// LF_POINTER
class PointerRecord : public TypeRecord {
public:
+ // ---------------------------XXXXX
static const uint32_t PointerKindShift = 0;
static const uint32_t PointerKindMask = 0x1F;
+ // ------------------------XXX-----
static const uint32_t PointerModeShift = 5;
static const uint32_t PointerModeMask = 0x07;
- static const uint32_t PointerOptionMask = 0xFF;
+ // ----------XXX------XXXXX--------
+ static const uint32_t PointerOptionMask = 0x381f00;
+ // -------------XXXXXX------------
static const uint32_t PointerSizeShift = 13;
static const uint32_t PointerSizeMask = 0xFF;
@@ -305,7 +314,7 @@ public:
}
PointerOptions getOptions() const {
- return static_cast<PointerOptions>(Attrs);
+ return static_cast<PointerOptions>(Attrs & PointerOptionMask);
}
uint8_t getSize() const {
@@ -334,6 +343,14 @@ public:
return !!(Attrs & uint32_t(PointerOptions::Restrict));
}
+ bool isLValueReferenceThisPtr() const {
+ return !!(Attrs & uint32_t(PointerOptions::LValueRefThisPointer));
+ }
+
+ bool isRValueReferenceThisPtr() const {
+ return !!(Attrs & uint32_t(PointerOptions::RValueRefThisPointer));
+ }
+
TypeIndex ReferentType;
uint32_t Attrs;
Optional<MemberPointerInfo> MemberInfo;
@@ -429,6 +446,14 @@ public:
return (Options & ClassOptions::ForwardReference) != ClassOptions::None;
}
+ bool containsNestedClass() const {
+ return (Options & ClassOptions::ContainsNestedClass) != ClassOptions::None;
+ }
+
+ bool isScoped() const {
+ return (Options & ClassOptions::Scoped) != ClassOptions::None;
+ }
+
uint16_t getMemberCount() const { return MemberCount; }
ClassOptions getOptions() const { return Options; }
TypeIndex getFieldList() const { return FieldList; }
@@ -655,7 +680,17 @@ public:
ArrayRef<TypeIndex> getArgs() const { return ArgIndices; }
- SmallVector<TypeIndex, 4> ArgIndices;
+ /// Indices of known build info arguments.
+ enum BuildInfoArg {
+ CurrentDirectory, ///< Absolute CWD path
+ BuildTool, ///< Absolute compiler path
+ SourceFile, ///< Path to main source file, relative or absolute
+ TypeServerPDB, ///< Absolute path of type server PDB (/Fd)
+ CommandLine, ///< Full canonical command line (maybe -cc1)
+ MaxArgs
+ };
+
+ SmallVector<TypeIndex, MaxArgs> ArgIndices;
};
// LF_VFTABLE
@@ -923,6 +958,7 @@ public:
uint32_t Signature;
};
+
} // end namespace codeview
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
new file mode 100644
index 000000000000..389472ed1aea
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
@@ -0,0 +1,28 @@
+//===- TypeRecordHelpers.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H
+
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+
+namespace llvm {
+ namespace codeview {
+ /// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE,
+ /// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class
+ /// option.
+ bool isUdtForwardRef(CVType CVT);
+
+ /// Given a CVType which is assumed to be an LF_MODIFIER, return the
+ /// TypeIndex of the type that the LF_MODIFIER modifies.
+ TypeIndex getModifiedType(const CVType &CVT);
+ }
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
index 583740d2eb4b..0b9f54ec60bf 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
@@ -83,18 +83,21 @@ Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef<TypeIndex> Types,
Error mergeTypeAndIdRecords(MergingTypeTableBuilder &DestIds,
MergingTypeTableBuilder &DestTypes,
SmallVectorImpl<TypeIndex> &SourceToDest,
- const CVTypeArray &IdsAndTypes);
+ const CVTypeArray &IdsAndTypes,
+ Optional<uint32_t> &PCHSignature);
Error mergeTypeAndIdRecords(GlobalTypeTableBuilder &DestIds,
GlobalTypeTableBuilder &DestTypes,
SmallVectorImpl<TypeIndex> &SourceToDest,
const CVTypeArray &IdsAndTypes,
- ArrayRef<GloballyHashedType> Hashes);
+ ArrayRef<GloballyHashedType> Hashes,
+ Optional<uint32_t> &PCHSignature);
Error mergeTypeRecords(GlobalTypeTableBuilder &Dest,
SmallVectorImpl<TypeIndex> &SourceToDest,
const CVTypeArray &Types,
- ArrayRef<GloballyHashedType> Hashes);
+ ArrayRef<GloballyHashedType> Hashes,
+ Optional<uint32_t> &PCHSignature);
Error mergeIdRecords(GlobalTypeTableBuilder &Dest, ArrayRef<TypeIndex> Types,
SmallVectorImpl<TypeIndex> &SourceToDest,
diff --git a/contrib/llvm/include/llvm/DebugInfo/DIContext.h b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
index bbdd5e0d9c3f..85e96402a246 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
@@ -81,7 +81,7 @@ class DIInliningInfo {
public:
DIInliningInfo() = default;
- DILineInfo getFrame(unsigned Index) const {
+ const DILineInfo & getFrame(unsigned Index) const {
assert(Index < Frames.size());
return Frames[Index];
}
@@ -98,6 +98,11 @@ public:
void addFrame(const DILineInfo &Frame) {
Frames.push_back(Frame);
}
+
+ void resize(unsigned i) {
+ Frames.resize(i);
+ }
+
};
/// Container for description of a global variable.
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index c219ca75e640..33797419a7b8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -18,20 +18,20 @@ namespace llvm {
class DWARFCompileUnit : public DWARFUnit {
public:
DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section,
- const DWARFUnitHeader &Header,
- const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+ const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
+ const DWARFSection *RS, const DWARFSection *LocSection,
StringRef SS, const DWARFSection &SOS,
const DWARFSection *AOS, const DWARFSection &LS, bool LE,
- bool IsDWO, const DWARFUnitSectionBase &UnitSection)
- : DWARFUnit(Context, Section, Header, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
- UnitSection) {}
+ bool IsDWO, const DWARFUnitVector &UnitVector)
+ : DWARFUnit(Context, Section, Header, DA, RS, LocSection, SS, SOS, AOS,
+ LS, LE, IsDWO, UnitVector) {}
- // VTable anchor.
+ /// VTable anchor.
~DWARFCompileUnit() override;
-
- void dump(raw_ostream &OS, DIDumpOptions DumpOpts);
-
- static const DWARFSectionKind Section = DW_SECT_INFO;
+ /// Dump this compile unit to \p OS.
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts) override;
+ /// Enable LLVM-style RTTI.
+ static bool classof(const DWARFUnit *U) { return !U->isTypeUnit(); }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index f5419fe02421..dbb6be04544b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -57,8 +57,7 @@ enum class ErrorPolicy { Halt, Continue };
/// This data structure is the top level entity that deals with dwarf debug
/// information parsing. The actual data is supplied through DWARFObj.
class DWARFContext : public DIContext {
- DWARFUnitSection<DWARFCompileUnit> CUs;
- std::deque<DWARFUnitSection<DWARFTypeUnit>> TUs;
+ DWARFUnitVector NormalUnits;
std::unique_ptr<DWARFUnitIndex> CUIndex;
std::unique_ptr<DWARFGdbIndex> GdbIndex;
std::unique_ptr<DWARFUnitIndex> TUIndex;
@@ -75,10 +74,9 @@ class DWARFContext : public DIContext {
std::unique_ptr<AppleAcceleratorTable> AppleNamespaces;
std::unique_ptr<AppleAcceleratorTable> AppleObjC;
- DWARFUnitSection<DWARFCompileUnit> DWOCUs;
- std::deque<DWARFUnitSection<DWARFTypeUnit>> DWOTUs;
+ DWARFUnitVector DWOUnits;
std::unique_ptr<DWARFDebugAbbrev> AbbrevDWO;
- std::unique_ptr<DWARFDebugLocDWO> LocDWO;
+ std::unique_ptr<DWARFDebugLoclists> LocDWO;
/// The maximum DWARF version of all units.
unsigned MaxVersion = 0;
@@ -95,22 +93,17 @@ class DWARFContext : public DIContext {
std::unique_ptr<MCRegisterInfo> RegInfo;
/// Read compile units from the debug_info section (if necessary)
- /// and store them in CUs.
- void parseCompileUnits();
-
- /// Read type units from the debug_types sections (if necessary)
- /// and store them in TUs.
- void parseTypeUnits();
+ /// and type units from the debug_types sections (if necessary)
+ /// and store them in NormalUnits.
+ void parseNormalUnits();
/// Read compile units from the debug_info.dwo section (if necessary)
- /// and store them in DWOCUs.
- void parseDWOCompileUnits();
-
- /// Read type units from the debug_types.dwo section (if necessary)
- /// and store them in DWOTUs.
- void parseDWOTypeUnits();
+ /// and type units from the debug_types.dwo section (if necessary)
+ /// and store them in DWOUnits.
+ /// If \p Lazy is true, set up to parse but don't actually parse them.
+ enum { EagerParse = false, LazyParse = true };
+ void parseDWOUnits(bool Lazy = false);
-protected:
std::unique_ptr<const DWARFObject> DObj;
public:
@@ -139,68 +132,95 @@ public:
bool verify(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override;
- using cu_iterator_range = DWARFUnitSection<DWARFCompileUnit>::iterator_range;
- using tu_iterator_range = DWARFUnitSection<DWARFTypeUnit>::iterator_range;
- using tu_section_iterator_range = iterator_range<decltype(TUs)::iterator>;
+ using unit_iterator_range = DWARFUnitVector::iterator_range;
- /// Get compile units in this context.
- cu_iterator_range compile_units() {
- parseCompileUnits();
- return cu_iterator_range(CUs.begin(), CUs.end());
+ /// Get units from .debug_info in this context.
+ unit_iterator_range info_section_units() {
+ parseNormalUnits();
+ return unit_iterator_range(NormalUnits.begin(),
+ NormalUnits.begin() +
+ NormalUnits.getNumInfoUnits());
}
+ /// Get units from .debug_types in this context.
+ unit_iterator_range types_section_units() {
+ parseNormalUnits();
+ return unit_iterator_range(
+ NormalUnits.begin() + NormalUnits.getNumInfoUnits(), NormalUnits.end());
+ }
+
+ /// Get compile units in this context.
+ unit_iterator_range compile_units() { return info_section_units(); }
+
/// Get type units in this context.
- tu_section_iterator_range type_unit_sections() {
- parseTypeUnits();
- return tu_section_iterator_range(TUs.begin(), TUs.end());
+ unit_iterator_range type_units() { return types_section_units(); }
+
+ /// Get all normal compile/type units in this context.
+ unit_iterator_range normal_units() {
+ parseNormalUnits();
+ return unit_iterator_range(NormalUnits.begin(), NormalUnits.end());
}
- /// Get compile units in the DWO context.
- cu_iterator_range dwo_compile_units() {
- parseDWOCompileUnits();
- return cu_iterator_range(DWOCUs.begin(), DWOCUs.end());
+ /// Get units from .debug_info..dwo in the DWO context.
+ unit_iterator_range dwo_info_section_units() {
+ parseDWOUnits();
+ return unit_iterator_range(DWOUnits.begin(),
+ DWOUnits.begin() + DWOUnits.getNumInfoUnits());
+ }
+
+ /// Get units from .debug_types.dwo in the DWO context.
+ unit_iterator_range dwo_types_section_units() {
+ parseDWOUnits();
+ return unit_iterator_range(DWOUnits.begin() + DWOUnits.getNumInfoUnits(),
+ DWOUnits.end());
}
+ /// Get compile units in the DWO context.
+ unit_iterator_range dwo_compile_units() { return dwo_info_section_units(); }
+
/// Get type units in the DWO context.
- tu_section_iterator_range dwo_type_unit_sections() {
- parseDWOTypeUnits();
- return tu_section_iterator_range(DWOTUs.begin(), DWOTUs.end());
+ unit_iterator_range dwo_type_units() { return dwo_types_section_units(); }
+
+ /// Get all units in the DWO context.
+ unit_iterator_range dwo_units() {
+ parseDWOUnits();
+ return unit_iterator_range(DWOUnits.begin(), DWOUnits.end());
}
/// Get the number of compile units in this context.
unsigned getNumCompileUnits() {
- parseCompileUnits();
- return CUs.size();
+ parseNormalUnits();
+ return NormalUnits.getNumInfoUnits();
}
- /// Get the number of compile units in this context.
+ /// Get the number of type units in this context.
unsigned getNumTypeUnits() {
- parseTypeUnits();
- return TUs.size();
+ parseNormalUnits();
+ return NormalUnits.getNumTypesUnits();
}
/// Get the number of compile units in the DWO context.
unsigned getNumDWOCompileUnits() {
- parseDWOCompileUnits();
- return DWOCUs.size();
+ parseDWOUnits();
+ return DWOUnits.getNumInfoUnits();
}
- /// Get the number of compile units in the DWO context.
+ /// Get the number of type units in the DWO context.
unsigned getNumDWOTypeUnits() {
- parseDWOTypeUnits();
- return DWOTUs.size();
+ parseDWOUnits();
+ return DWOUnits.getNumTypesUnits();
}
- /// Get the compile unit at the specified index for this compile unit.
- DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) {
- parseCompileUnits();
- return CUs[index].get();
+ /// Get the unit at the specified index.
+ DWARFUnit *getUnitAtIndex(unsigned index) {
+ parseNormalUnits();
+ return NormalUnits[index].get();
}
- /// Get the compile unit at the specified index for the DWO compile units.
- DWARFCompileUnit *getDWOCompileUnitAtIndex(unsigned index) {
- parseDWOCompileUnits();
- return DWOCUs[index].get();
+ /// Get the unit at the specified index for the DWO units.
+ DWARFUnit *getDWOUnitAtIndex(unsigned index) {
+ parseDWOUnits();
+ return DWOUnits[index].get();
}
DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash);
@@ -211,7 +231,17 @@ public:
/// Get a DIE given an exact offset.
DWARFDie getDIEForOffset(uint32_t Offset);
- unsigned getMaxVersion() const { return MaxVersion; }
+ unsigned getMaxVersion() {
+ // Ensure info units have been parsed to discover MaxVersion
+ info_section_units();
+ return MaxVersion;
+ }
+
+ unsigned getMaxDWOVersion() {
+ // Ensure DWO info units have been parsed to discover MaxVersion
+ dwo_info_section_units();
+ return MaxVersion;
+ }
void setMaxVersionIfGreater(unsigned Version) {
if (Version > MaxVersion)
@@ -232,7 +262,7 @@ public:
const DWARFDebugAbbrev *getDebugAbbrevDWO();
/// Get a pointer to the parsed DebugLoc object.
- const DWARFDebugLocDWO *getDebugLocDWO();
+ const DWARFDebugLoclists *getDebugLocDWO();
/// Get a pointer to the parsed DebugAranges object.
const DWARFDebugAranges *getDebugAranges();
@@ -327,6 +357,13 @@ public:
/// TODO: refactor compile_units() to make this const.
uint8_t getCUAddrSize();
+ /// Dump Error as warning message to stderr.
+ static void dumpWarning(Error Warning);
+
+ Triple::ArchType getArch() const {
+ return getDWARFObj().getFile()->getArch();
+ }
+
private:
/// Return the compile unit which contains instruction with provided
/// address.
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index ff1c7fb38389..7dc07d774aba 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/Support/Error.h"
@@ -59,9 +60,11 @@ public:
unsigned size() const { return (unsigned)Instructions.size(); }
bool empty() const { return Instructions.empty(); }
- CFIProgram(uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor)
+ CFIProgram(uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor,
+ Triple::ArchType Arch)
: CodeAlignmentFactor(CodeAlignmentFactor),
- DataAlignmentFactor(DataAlignmentFactor) {}
+ DataAlignmentFactor(DataAlignmentFactor),
+ Arch(Arch) {}
/// Parse and store a sequence of CFI instructions from Data,
/// starting at *Offset and ending at EndOffset. *Offset is updated
@@ -76,6 +79,7 @@ private:
std::vector<Instruction> Instructions;
const uint64_t CodeAlignmentFactor;
const int64_t DataAlignmentFactor;
+ Triple::ArchType Arch;
/// Convenience method to add a new instruction with the given opcode.
void addInstruction(uint8_t Opcode) {
@@ -130,8 +134,9 @@ public:
enum FrameKind { FK_CIE, FK_FDE };
FrameEntry(FrameKind K, uint64_t Offset, uint64_t Length, uint64_t CodeAlign,
- int64_t DataAlign)
- : Kind(K), Offset(Offset), Length(Length), CFIs(CodeAlign, DataAlign) {}
+ int64_t DataAlign, Triple::ArchType Arch)
+ : Kind(K), Offset(Offset), Length(Length),
+ CFIs(CodeAlign, DataAlign, Arch) {}
virtual ~FrameEntry() {}
@@ -168,9 +173,9 @@ public:
int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister,
SmallString<8> AugmentationData, uint32_t FDEPointerEncoding,
uint32_t LSDAPointerEncoding, Optional<uint64_t> Personality,
- Optional<uint32_t> PersonalityEnc)
+ Optional<uint32_t> PersonalityEnc, Triple::ArchType Arch)
: FrameEntry(FK_CIE, Offset, Length, CodeAlignmentFactor,
- DataAlignmentFactor),
+ DataAlignmentFactor, Arch),
Version(Version), Augmentation(std::move(Augmentation)),
AddressSize(AddressSize), SegmentDescriptorSize(SegmentDescriptorSize),
CodeAlignmentFactor(CodeAlignmentFactor),
@@ -224,10 +229,11 @@ public:
// is obtained lazily once it's actually required.
FDE(uint64_t Offset, uint64_t Length, int64_t LinkedCIEOffset,
uint64_t InitialLocation, uint64_t AddressRange, CIE *Cie,
- Optional<uint64_t> LSDAAddress)
+ Optional<uint64_t> LSDAAddress, Triple::ArchType Arch)
: FrameEntry(FK_FDE, Offset, Length,
Cie ? Cie->getCodeAlignmentFactor() : 0,
- Cie ? Cie->getDataAlignmentFactor() : 0),
+ Cie ? Cie->getDataAlignmentFactor() : 0,
+ Arch),
LinkedCIEOffset(LinkedCIEOffset), InitialLocation(InitialLocation),
AddressRange(AddressRange), LinkedCIE(Cie), LSDAAddress(LSDAAddress) {}
@@ -256,6 +262,7 @@ private:
/// A parsed .debug_frame or .eh_frame section
class DWARFDebugFrame {
+ const Triple::ArchType Arch;
// True if this is parsing an eh_frame section.
const bool IsEH;
// Not zero for sane pointer values coming out of eh_frame
@@ -272,7 +279,8 @@ public:
// it is a .debug_frame section. EHFrameAddress should be different
// than zero for correct parsing of .eh_frame addresses when they
// use a PC-relative encoding.
- DWARFDebugFrame(bool IsEH = false, uint64_t EHFrameAddress = 0);
+ DWARFDebugFrame(Triple::ArchType Arch,
+ bool IsEH = false, uint64_t EHFrameAddress = 0);
~DWARFDebugFrame();
/// Dump the section data into the given stream.
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 5b2af34bbcf5..d50af5a057f1 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -247,10 +247,11 @@ public:
void clear();
/// Parse prologue and all rows.
- Error parse(DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
- const DWARFContext &Ctx, const DWARFUnit *U,
- std::function<void(Error)> RecoverableErrorCallback = warn,
- raw_ostream *OS = nullptr);
+ Error parse(
+ DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
+ const DWARFContext &Ctx, const DWARFUnit *U,
+ std::function<void(Error)> RecoverableErrorCallback,
+ raw_ostream *OS = nullptr);
using RowVector = std::vector<Row>;
using RowIter = RowVector::const_iterator;
@@ -273,14 +274,13 @@ public:
Expected<const LineTable *> getOrParseLineTable(
DWARFDataExtractor &DebugLineData, uint32_t Offset,
const DWARFContext &Ctx, const DWARFUnit *U,
- std::function<void(Error)> RecoverableErrorCallback = warn);
+ std::function<void(Error)> RecoverableErrorCallback);
/// Helper to allow for parsing of an entire .debug_line section in sequence.
class SectionParser {
public:
- using cu_range = DWARFUnitSection<DWARFCompileUnit>::iterator_range;
- using tu_range =
- iterator_range<std::deque<DWARFUnitSection<DWARFTypeUnit>>::iterator>;
+ using cu_range = DWARFUnitVector::iterator_range;
+ using tu_range = DWARFUnitVector::iterator_range;
using LineToUnitMap = std::map<uint64_t, DWARFUnit *>;
SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, cu_range CUs,
@@ -296,16 +296,17 @@ public:
/// \param OS - if not null, the parser will print information about the
/// table as it parses it.
LineTable
- parseNext(function_ref<void(Error)> RecoverableErrorCallback = warn,
- function_ref<void(Error)> UnrecoverableErrorCallback = warn,
- raw_ostream *OS = nullptr);
+ parseNext(
+ function_ref<void(Error)> RecoverableErrorCallback,
+ function_ref<void(Error)> UnrecoverableErrorCallback,
+ raw_ostream *OS = nullptr);
/// Skip the current line table and go to the following line table (if
/// present) immediately.
///
/// \param ErrorCallback - report any prologue parsing issues via this
/// callback.
- void skip(function_ref<void(Error)> ErrorCallback = warn);
+ void skip(function_ref<void(Error)> ErrorCallback);
/// Indicates if the parser has parsed as much as possible.
///
@@ -328,12 +329,6 @@ public:
bool Done = false;
};
- /// Helper function for DWARFDebugLine parse functions, to report issues
- /// identified during parsing.
- ///
- /// \param Err The Error to report.
- static void warn(Error Err);
-
private:
struct ParsingState {
ParsingState(struct LineTable *LT);
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index 9a73745fb6b4..da2098e15402 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -73,19 +73,21 @@ public:
uint32_t *Offset);
};
-class DWARFDebugLocDWO {
+class DWARFDebugLoclists {
public:
struct Entry {
- uint64_t Start;
- uint32_t Length;
+ uint8_t Kind;
+ uint64_t Value0;
+ uint64_t Value1;
SmallVector<char, 4> Loc;
};
struct LocationList {
unsigned Offset;
SmallVector<Entry, 2> Entries;
- void dump(raw_ostream &OS, bool IsLittleEndian, unsigned AddressSize,
- const MCRegisterInfo *RegInfo, unsigned Indent) const;
+ void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian,
+ unsigned AddressSize, const MCRegisterInfo *RegInfo,
+ unsigned Indent) const;
};
private:
@@ -98,15 +100,15 @@ private:
bool IsLittleEndian;
public:
- void parse(DataExtractor data);
- void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo,
+ void parse(DataExtractor data, unsigned Version);
+ void dump(raw_ostream &OS, uint64_t BaseAddr, const MCRegisterInfo *RegInfo,
Optional<uint64_t> Offset) const;
/// Return the location list at the given offset or nullptr.
LocationList const *getLocationListAtOffset(uint64_t Offset) const;
- static Optional<LocationList> parseOneLocationList(DataExtractor Data,
- uint32_t *Offset);
+ static Optional<LocationList>
+ parseOneLocationList(DataExtractor Data, unsigned *Offset, unsigned Version);
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
index cae4804e61d3..9e1656eb1615 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
#include <cstdint>
#include <vector>
@@ -67,7 +68,8 @@ private:
bool GnuStyle;
public:
- DWARFDebugPubTable(StringRef Data, bool LittleEndian, bool GnuStyle);
+ DWARFDebugPubTable(const DWARFObject &Obj, const DWARFSection &Sec,
+ bool LittleEndian, bool GnuStyle);
void dump(raw_ostream &OS) const;
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index ce7436d9faa3..bc26edf00647 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -18,7 +18,6 @@
namespace llvm {
-struct BaseAddress;
class raw_ostream;
class DWARFDebugRangeList {
@@ -78,7 +77,7 @@ public:
/// list. Has to be passed base address of the compile unit referencing this
/// range list.
DWARFAddressRangesVector
- getAbsoluteRanges(llvm::Optional<BaseAddress> BaseAddr) const;
+ getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
index e2e8ab5ed219..5cc8d789e598 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
@@ -10,6 +10,7 @@
#ifndef LLVM_DEBUGINFO_DWARFDEBUGRNGLISTS_H
#define LLVM_DEBUGINFO_DWARFDEBUGRNGLISTS_H
+#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
@@ -23,6 +24,7 @@ namespace llvm {
class Error;
class raw_ostream;
+class DWARFUnit;
/// A class representing a single range list entry.
struct RangeListEntry : public DWARFListEntryBase {
@@ -35,7 +37,9 @@ struct RangeListEntry : public DWARFListEntryBase {
Error extract(DWARFDataExtractor Data, uint32_t End, uint32_t *OffsetPtr);
void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
- uint64_t &CurrentBase, DIDumpOptions DumpOpts) const;
+ uint64_t &CurrentBase, DIDumpOptions DumpOpts,
+ llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ LookupPooledAddress) const;
bool isSentinel() const { return EntryKind == dwarf::DW_RLE_end_of_list; }
};
@@ -44,7 +48,8 @@ class DWARFDebugRnglist : public DWARFListType<RangeListEntry> {
public:
/// Build a DWARFAddressRangesVector from a rangelist.
DWARFAddressRangesVector
- getAbsoluteRanges(llvm::Optional<BaseAddress> BaseAddr) const;
+ getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
+ DWARFUnit &U) const;
};
class DWARFDebugRnglistTable : public DWARFListTableBase<DWARFDebugRnglist> {
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
index c77034f6348f..56d46cd739a2 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -180,6 +180,7 @@ public:
/// \returns a valid DWARFDie instance if the attribute exists, or an invalid
/// DWARFDie object if it doesn't.
DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const;
+ DWARFDie getAttributeValueAsReferencedDie(const DWARFFormValue &V) const;
/// Extract the range base attribute from this DIE as absolute section offset.
///
@@ -404,6 +405,10 @@ public:
Die = Die.getPreviousSibling();
}
+ llvm::DWARFDie::iterator base() const {
+ return llvm::DWARFDie::iterator(AtEnd ? Die : Die.getSibling());
+ }
+
reverse_iterator<llvm::DWARFDie::iterator> &operator++() {
assert(!AtEnd && "Incrementing rend");
llvm::DWARFDie D = Die.getPreviousSibling();
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 1b5f71c946f9..727e853c09fb 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -61,7 +61,6 @@ public:
dwarf::Form getForm() const { return Form; }
uint64_t getRawUValue() const { return Value.uval; }
- uint64_t getSectionIndex() const { return Value.SectionIndex; }
void setForm(dwarf::Form F) { Form = F; }
void setUValue(uint64_t V) { Value.uval = V; }
void setSValue(int64_t V) { Value.sval = V; }
@@ -75,6 +74,10 @@ public:
bool isFormClass(FormClass FC) const;
const DWARFUnit *getUnit() const { return U; }
void dump(raw_ostream &OS, DIDumpOptions DumpOpts = DIDumpOptions()) const;
+ void dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts,
+ SectionedAddress SA) const;
+ static void dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS,
+ DIDumpOptions DumpOpts, uint64_t SectionIndex);
/// Extracts a value in \p Data at offset \p *OffsetPtr. The information
/// in \p FormParams is needed to interpret some forms. The optional
@@ -101,6 +104,7 @@ public:
Optional<int64_t> getAsSignedConstant() const;
Optional<const char *> getAsCString() const;
Optional<uint64_t> getAsAddress() const;
+ Optional<SectionedAddress> getAsSectionedAddress() const;
Optional<uint64_t> getAsSectionOffset() const;
Optional<ArrayRef<uint8_t>> getAsBlock() const;
Optional<uint64_t> getAsCStringOffset() const;
@@ -238,6 +242,13 @@ inline Optional<uint64_t> toAddress(const Optional<DWARFFormValue> &V) {
return None;
}
+inline Optional<SectionedAddress>
+toSectionedAddress(const Optional<DWARFFormValue> &V) {
+ if (V)
+ return V->getAsSectionedAddress();
+ return None;
+}
+
/// Take an optional DWARFFormValue and extract a address.
///
/// \param V and optional DWARFFormValue to attempt to extract the value from.
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
index 8d1ac5c83c23..073e02903c39 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
@@ -24,6 +24,7 @@ class DWARFGdbIndex {
uint32_t Version;
uint32_t CuListOffset;
+ uint32_t TuListOffset;
uint32_t AddressAreaOffset;
uint32_t SymbolTableOffset;
uint32_t ConstantPoolOffset;
@@ -34,6 +35,13 @@ class DWARFGdbIndex {
};
SmallVector<CompUnitEntry, 0> CuList;
+ struct TypeUnitEntry {
+ uint64_t Offset;
+ uint64_t TypeOffset;
+ uint64_t TypeSignature;
+ };
+ SmallVector<TypeUnitEntry, 0> TuList;
+
struct AddressEntry {
uint64_t LowAddress; /// The low address.
uint64_t HighAddress; /// The high address.
@@ -55,6 +63,7 @@ class DWARFGdbIndex {
uint32_t StringPoolOffset;
void dumpCUList(raw_ostream &OS) const;
+ void dumpTUList(raw_ostream &OS) const;
void dumpAddressArea(raw_ostream &OS) const;
void dumpSymbolTable(raw_ostream &OS) const;
void dumpConstantPool(raw_ostream &OS) const;
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
index ab12f3bc08b0..9b987314f209 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
@@ -13,6 +13,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -43,10 +44,6 @@ protected:
ListEntries Entries;
public:
- // FIXME: We need to consolidate the various verions of "createError"
- // that are used in the DWARF consumer. Until then, this is a workaround.
- Error createError(const char *, const char *, uint32_t);
-
const ListEntries &getEntries() const { return Entries; }
bool empty() const { return Entries.empty(); }
void clear() { Entries.clear(); }
@@ -102,6 +99,7 @@ public:
uint32_t getHeaderOffset() const { return HeaderOffset; }
uint8_t getAddrSize() const { return HeaderData.AddrSize; }
uint32_t getLength() const { return HeaderData.Length; }
+ uint16_t getVersion() const { return HeaderData.Version; }
StringRef getSectionName() const { return SectionName; }
StringRef getListTypeString() const { return ListTypeString; }
dwarf::DwarfFormat getFormat() const { return Format; }
@@ -159,7 +157,10 @@ public:
uint32_t getHeaderOffset() const { return Header.getHeaderOffset(); }
uint8_t getAddrSize() const { return Header.getAddrSize(); }
- void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const;
+ void dump(raw_ostream &OS,
+ llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ LookupPooledAddress,
+ DIDumpOptions DumpOpts = {}) const;
/// Return the contents of the offset entry designated by a given index.
Optional<uint32_t> getOffsetEntry(uint32_t Index) const {
@@ -213,7 +214,8 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
StringRef SectionName,
StringRef ListTypeString) {
if (*OffsetPtr < HeaderOffset || *OffsetPtr >= End)
- return createError("invalid %s list offset 0x%" PRIx32,
+ return createStringError(errc::invalid_argument,
+ "invalid %s list offset 0x%" PRIx32,
ListTypeString.data(), *OffsetPtr);
Entries.clear();
while (*OffsetPtr < End) {
@@ -224,14 +226,18 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
if (Entry.isSentinel())
return Error::success();
}
- return createError("no end of list marker detected at end of %s table "
+ return createStringError(errc::illegal_byte_sequence,
+ "no end of list marker detected at end of %s table "
"starting at offset 0x%" PRIx32,
SectionName.data(), HeaderOffset);
}
template <typename DWARFListType>
-void DWARFListTableBase<DWARFListType>::dump(raw_ostream &OS,
- DIDumpOptions DumpOpts) const {
+void DWARFListTableBase<DWARFListType>::dump(
+ raw_ostream &OS,
+ llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ LookupPooledAddress,
+ DIDumpOptions DumpOpts) const {
Header.dump(OS, DumpOpts);
OS << HeaderString << "\n";
@@ -250,7 +256,7 @@ void DWARFListTableBase<DWARFListType>::dump(raw_ostream &OS,
for (const auto &List : ListMap)
for (const auto &Entry : List.second.getEntries())
Entry.dump(OS, getAddrSize(), MaxEncodingStringLength, CurrentBase,
- DumpOpts);
+ DumpOpts, LookupPooledAddress);
}
template <typename DWARFListType>
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
index 6e8f370f4aea..d611b5d075c8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
@@ -33,11 +33,13 @@ public:
virtual ArrayRef<SectionName> getSectionNames() const { return {}; }
virtual bool isLittleEndian() const = 0;
virtual uint8_t getAddressSize() const { llvm_unreachable("unimplemented"); }
- virtual const DWARFSection &getInfoSection() const { return Dummy; }
+ virtual void
+ forEachInfoSections(function_ref<void(const DWARFSection &)> F) const {}
virtual void
forEachTypesSections(function_ref<void(const DWARFSection &)> F) const {}
virtual StringRef getAbbrevSection() const { return ""; }
virtual const DWARFSection &getLocSection() const { return Dummy; }
+ virtual const DWARFSection &getLoclistsSection() const { return Dummy; }
virtual StringRef getARangeSection() const { return ""; }
virtual StringRef getDebugFrameSection() const { return ""; }
virtual StringRef getEHFrameSection() const { return ""; }
@@ -47,12 +49,13 @@ public:
virtual const DWARFSection &getRangeSection() const { return Dummy; }
virtual const DWARFSection &getRnglistsSection() const { return Dummy; }
virtual StringRef getMacinfoSection() const { return ""; }
- virtual StringRef getPubNamesSection() const { return ""; }
- virtual StringRef getPubTypesSection() const { return ""; }
- virtual StringRef getGnuPubNamesSection() const { return ""; }
- virtual StringRef getGnuPubTypesSection() const { return ""; }
+ virtual const DWARFSection &getPubNamesSection() const { return Dummy; }
+ virtual const DWARFSection &getPubTypesSection() const { return Dummy; }
+ virtual const DWARFSection &getGnuPubNamesSection() const { return Dummy; }
+ virtual const DWARFSection &getGnuPubTypesSection() const { return Dummy; }
virtual const DWARFSection &getStringOffsetSection() const { return Dummy; }
- virtual const DWARFSection &getInfoDWOSection() const { return Dummy; }
+ virtual void
+ forEachInfoDWOSections(function_ref<void(const DWARFSection &)> F) const {}
virtual void
forEachTypesDWOSections(function_ref<void(const DWARFSection &)> F) const {}
virtual StringRef getAbbrevDWOSection() const { return ""; }
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h
index 77045f0794ae..7f8235965297 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h
@@ -23,6 +23,11 @@ struct SectionName {
bool IsNameUnique;
};
+struct SectionedAddress {
+ uint64_t Address;
+ uint64_t SectionIndex;
+};
+
} // end namespace llvm
#endif // LLVM_DEBUGINFO_DWARF_DWARFSECTION_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index cb5a78ee3dbf..8ca5ba13fc23 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -26,19 +26,20 @@ class raw_ostream;
class DWARFTypeUnit : public DWARFUnit {
public:
DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section,
- const DWARFUnitHeader &Header,
- const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+ const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
+ const DWARFSection *RS, const DWARFSection *LocSection,
StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
const DWARFSection &LS, bool LE, bool IsDWO,
- const DWARFUnitSectionBase &UnitSection)
- : DWARFUnit(Context, Section, Header, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
- UnitSection) {}
+ const DWARFUnitVector &UnitVector)
+ : DWARFUnit(Context, Section, Header, DA, RS, LocSection, SS, SOS, AOS,
+ LS, LE, IsDWO, UnitVector) {}
uint64_t getTypeHash() const { return getHeader().getTypeHash(); }
uint32_t getTypeOffset() const { return getHeader().getTypeOffset(); }
- void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {});
- static const DWARFSectionKind Section = DW_SECT_TYPES;
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override;
+ // Enable LLVM-style RTTI.
+ static bool classof(const DWARFUnit *U) { return U->isTypeUnit(); }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 988a7958184c..79c3ce1106d5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -72,7 +72,8 @@ public:
/// Parse a unit header from \p debug_info starting at \p offset_ptr.
bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info,
uint32_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO,
- const DWARFUnitIndex *Index = nullptr);
+ const DWARFUnitIndex *Index = nullptr,
+ const DWARFUnitIndex::Entry *Entry = nullptr);
uint32_t getOffset() const { return Offset; }
const dwarf::FormParams &getFormParams() const { return FormParams; }
uint16_t getVersion() const { return FormParams.Version; }
@@ -101,133 +102,66 @@ public:
uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
};
-/// Base class for all DWARFUnitSection classes. This provides the
-/// functionality common to all unit types.
-class DWARFUnitSectionBase {
-public:
- /// Returns the Unit that contains the given section offset in the
- /// same section this Unit originated from.
- virtual DWARFUnit *getUnitForOffset(uint32_t Offset) const = 0;
- virtual DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) = 0;
-
- void parse(DWARFContext &C, const DWARFSection &Section);
- void parseDWO(DWARFContext &C, const DWARFSection &DWOSection,
- bool Lazy = false);
-
-protected:
- ~DWARFUnitSectionBase() = default;
-
- virtual void parseImpl(DWARFContext &Context, const DWARFObject &Obj,
- const DWARFSection &Section,
- const DWARFDebugAbbrev *DA, const DWARFSection *RS,
- StringRef SS, const DWARFSection &SOS,
- const DWARFSection *AOS, const DWARFSection &LS,
- bool isLittleEndian, bool isDWO, bool Lazy) = 0;
-};
-
const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
DWARFSectionKind Kind);
-/// Concrete instance of DWARFUnitSection, specialized for one Unit type.
-template<typename UnitType>
-class DWARFUnitSection final : public SmallVector<std::unique_ptr<UnitType>, 1>,
- public DWARFUnitSectionBase {
- bool Parsed = false;
- std::function<std::unique_ptr<UnitType>(uint32_t)> Parser;
+/// Describe a collection of units. Intended to hold all units either from
+/// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo.
+class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> {
+ std::function<std::unique_ptr<DWARFUnit>(uint32_t, DWARFSectionKind,
+ const DWARFSection *,
+ const DWARFUnitIndex::Entry *)>
+ Parser;
+ int NumInfoUnits = -1;
public:
- using UnitVector = SmallVectorImpl<std::unique_ptr<UnitType>>;
+ using UnitVector = SmallVectorImpl<std::unique_ptr<DWARFUnit>>;
using iterator = typename UnitVector::iterator;
using iterator_range = llvm::iterator_range<typename UnitVector::iterator>;
- UnitType *getUnitForOffset(uint32_t Offset) const override {
- auto *CU = std::upper_bound(
- this->begin(), this->end(), Offset,
- [](uint32_t LHS, const std::unique_ptr<UnitType> &RHS) {
- return LHS < RHS->getNextUnitOffset();
- });
- if (CU != this->end() && (*CU)->getOffset() <= Offset)
- return CU->get();
- return nullptr;
- }
- UnitType *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) override {
- const auto *CUOff = E.getOffset(DW_SECT_INFO);
- if (!CUOff)
- return nullptr;
-
- auto Offset = CUOff->Offset;
-
- auto *CU = std::upper_bound(
- this->begin(), this->end(), CUOff->Offset,
- [](uint32_t LHS, const std::unique_ptr<UnitType> &RHS) {
- return LHS < RHS->getNextUnitOffset();
- });
- if (CU != this->end() && (*CU)->getOffset() <= Offset)
- return CU->get();
-
- if (!Parser)
- return nullptr;
-
- auto U = Parser(Offset);
- if (!U)
- U = nullptr;
-
- auto *NewCU = U.get();
- this->insert(CU, std::move(U));
- return NewCU;
+ DWARFUnit *getUnitForOffset(uint32_t Offset) const;
+ DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
+
+ /// Read units from a .debug_info or .debug_types section. Calls made
+ /// before finishedInfoUnits() are assumed to be for .debug_info sections,
+ /// calls after finishedInfoUnits() are for .debug_types sections. Caller
+ /// must not mix calls to addUnitsForSection and addUnitsForDWOSection.
+ void addUnitsForSection(DWARFContext &C, const DWARFSection &Section,
+ DWARFSectionKind SectionKind);
+ /// Read units from a .debug_info.dwo or .debug_types.dwo section. Calls
+ /// made before finishedInfoUnits() are assumed to be for .debug_info.dwo
+ /// sections, calls after finishedInfoUnits() are for .debug_types.dwo
+ /// sections. Caller must not mix calls to addUnitsForSection and
+ /// addUnitsForDWOSection.
+ void addUnitsForDWOSection(DWARFContext &C, const DWARFSection &DWOSection,
+ DWARFSectionKind SectionKind, bool Lazy = false);
+
+ /// Add an existing DWARFUnit to this UnitVector. This is used by the DWARF
+ /// verifier to process unit separately.
+ DWARFUnit *addUnit(std::unique_ptr<DWARFUnit> Unit);
+
+ /// Returns number of all units held by this instance.
+ unsigned getNumUnits() const { return size(); }
+ /// Returns number of units from all .debug_info[.dwo] sections.
+ unsigned getNumInfoUnits() const {
+ return NumInfoUnits == -1 ? size() : NumInfoUnits;
}
+ /// Returns number of units from all .debug_types[.dwo] sections.
+ unsigned getNumTypesUnits() const { return size() - NumInfoUnits; }
+ /// Indicate that parsing .debug_info[.dwo] is done, and remaining units
+ /// will be from .debug_types[.dwo].
+ void finishedInfoUnits() { NumInfoUnits = size(); }
private:
- void parseImpl(DWARFContext &Context, const DWARFObject &Obj,
- const DWARFSection &Section, const DWARFDebugAbbrev *DA,
- const DWARFSection *RS, StringRef SS, const DWARFSection &SOS,
- const DWARFSection *AOS, const DWARFSection &LS, bool LE,
- bool IsDWO, bool Lazy) override {
- if (Parsed)
- return;
- DWARFDataExtractor Data(Obj, Section, LE, 0);
- if (!Parser) {
- const DWARFUnitIndex *Index = nullptr;
- if (IsDWO)
- Index = &getDWARFUnitIndex(Context, UnitType::Section);
- Parser = [=, &Context, &Section, &SOS,
- &LS](uint32_t Offset) -> std::unique_ptr<UnitType> {
- if (!Data.isValidOffset(Offset))
- return nullptr;
- DWARFUnitHeader Header;
- if (!Header.extract(Context, Data, &Offset, UnitType::Section, Index))
- return nullptr;
- auto U = llvm::make_unique<UnitType>(
- Context, Section, Header, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
- *this);
- return U;
- };
- }
- if (Lazy)
- return;
- auto I = this->begin();
- uint32_t Offset = 0;
- while (Data.isValidOffset(Offset)) {
- if (I != this->end() && (*I)->getOffset() == Offset) {
- ++I;
- continue;
- }
- auto U = Parser(Offset);
- if (!U)
- break;
- Offset = U->getNextUnitOffset();
- I = std::next(this->insert(I, std::move(U)));
- }
- Parsed = true;
- }
+ void addUnitsImpl(DWARFContext &Context, const DWARFObject &Obj,
+ const DWARFSection &Section, const DWARFDebugAbbrev *DA,
+ const DWARFSection *RS, const DWARFSection *LocSection,
+ StringRef SS, const DWARFSection &SOS,
+ const DWARFSection *AOS, const DWARFSection &LS, bool LE,
+ bool IsDWO, bool Lazy, DWARFSectionKind SectionKind);
};
/// Represents base address of the CU.
-struct BaseAddress {
- uint64_t Address;
- uint64_t SectionIndex;
-};
-
/// Represents a unit's contribution to the string offsets table.
struct StrOffsetsContributionDescriptor {
uint64_t Base = 0;
@@ -261,14 +195,20 @@ class DWARFUnit {
const DWARFDebugAbbrev *Abbrev;
const DWARFSection *RangeSection;
uint32_t RangeSectionBase;
+ /// We either keep track of the location list section or its data, depending
+ /// on whether we are handling a split DWARF section or not.
+ union {
+ const DWARFSection *LocSection;
+ StringRef LocSectionData;
+ };
const DWARFSection &LineSection;
StringRef StringSection;
const DWARFSection &StringOffsetSection;
const DWARFSection *AddrOffsetSection;
uint32_t AddrOffsetSectionBase = 0;
bool isLittleEndian;
- bool isDWO;
- const DWARFUnitSectionBase &UnitSection;
+ bool IsDWO;
+ const DWARFUnitVector &UnitVector;
/// Start, length, and DWARF format of the unit's contribution to the string
/// offsets table (DWARF v5).
@@ -278,7 +218,7 @@ class DWARFUnit {
Optional<DWARFDebugRnglistTable> RngListTable;
mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
- llvm::Optional<BaseAddress> BaseAddr;
+ llvm::Optional<SectionedAddress> BaseAddr;
/// The compile unit debug information entry items.
std::vector<DWARFDebugInfoEntry> DieArray;
@@ -308,28 +248,30 @@ protected:
/// length and form. The given offset is expected to be derived from the unit
/// DIE's DW_AT_str_offsets_base attribute.
Optional<StrOffsetsContributionDescriptor>
- determineStringOffsetsTableContribution(DWARFDataExtractor &DA,
- uint64_t Offset);
+ determineStringOffsetsTableContribution(DWARFDataExtractor &DA);
/// Find the unit's contribution to the string offsets table and determine its
/// length and form. The given offset is expected to be 0 in a dwo file or,
/// in a dwp file, the start of the unit's contribution to the string offsets
/// table section (as determined by the index table).
Optional<StrOffsetsContributionDescriptor>
- determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA,
- uint64_t Offset);
+ determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA);
public:
DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
- const DWARFUnitHeader &Header,
- const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS,
- const DWARFSection &SOS, const DWARFSection *AOS,
+ const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
+ const DWARFSection *RS, const DWARFSection *LocSection,
+ StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
const DWARFSection &LS, bool LE, bool IsDWO,
- const DWARFUnitSectionBase &UnitSection);
+ const DWARFUnitVector &UnitVector);
virtual ~DWARFUnit();
+ bool isDWOUnit() const { return IsDWO; }
DWARFContext& getContext() const { return Context; }
+ const DWARFSection &getInfoSection() const { return InfoSection; }
+ const DWARFSection *getLocSection() const { return LocSection; }
+ StringRef getLocSectionData() const { return LocSectionData; }
uint32_t getOffset() const { return Header.getOffset(); }
const dwarf::FormParams &getFormParams() const {
return Header.getFormParams();
@@ -342,6 +284,7 @@ public:
}
uint32_t getLength() const { return Header.getLength(); }
uint8_t getUnitType() const { return Header.getUnitType(); }
+ bool isTypeUnit() const { return Header.isTypeUnit(); }
uint32_t getNextUnitOffset() const { return Header.getNextUnitOffset(); }
const DWARFSection &getLineSection() const { return LineSection; }
StringRef getStringSection() const { return StringSection; }
@@ -362,8 +305,8 @@ public:
RangeSectionBase = Base;
}
- bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
- bool getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
+ Optional<SectionedAddress> getAddrOffsetSectionItem(uint32_t Index) const;
+ Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const;
DWARFDataExtractor getDebugInfoExtractor() const;
@@ -433,7 +376,7 @@ public:
llvm_unreachable("Invalid UnitType.");
}
- llvm::Optional<BaseAddress> getBaseAddress();
+ llvm::Optional<SectionedAddress> getBaseAddress();
DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
extractDIEsIfNeeded(ExtractUnitDIEOnly);
@@ -467,7 +410,7 @@ public:
return None;
}
- void collectAddressRanges(DWARFAddressRangesVector &CURanges);
+ Expected<DWARFAddressRangesVector> collectAddressRanges();
/// Returns subprogram DIE with address range encompassing the provided
/// address. The pointer is alive as long as parsed compile unit DIEs are not
@@ -480,8 +423,8 @@ public:
void getInlinedChainForAddress(uint64_t Address,
SmallVectorImpl<DWARFDie> &InlinedChain);
- /// getUnitSection - Return the DWARFUnitSection containing this unit.
- const DWARFUnitSectionBase &getUnitSection() const { return UnitSection; }
+ /// Return the DWARFUnitVector containing this unit.
+ const DWARFUnitVector &getUnitVector() const { return UnitVector; }
/// Returns the number of DIEs in the unit. Parses the unit
/// if necessary.
@@ -541,6 +484,7 @@ public:
return die_iterator_range(DieArray.begin(), DieArray.end());
}
+ virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0;
private:
/// Size in bytes of the .debug_info data associated with this compile unit.
size_t getDebugInfoSize() const {
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
index 49ed4bb222f3..16be5f9401c0 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
@@ -74,6 +74,7 @@ private:
int InfoColumn = -1;
std::unique_ptr<DWARFSectionKind[]> ColumnKinds;
std::unique_ptr<Entry[]> Rows;
+ mutable std::vector<Entry *> OffsetLookup;
static StringRef getColumnHeader(DWARFSectionKind DS);
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index a829510a219d..e47fbea5646e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -14,6 +14,7 @@
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include <cstdint>
#include <map>
@@ -96,10 +97,14 @@ private:
/// lies between to valid DIEs.
std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets;
uint32_t NumDebugLineErrors = 0;
+ // Used to relax some checks that do not currently work portably
+ bool IsObjectFile;
+ bool IsMachOObject;
raw_ostream &error() const;
raw_ostream &warn() const;
raw_ostream &note() const;
+ raw_ostream &dump(const DWARFDie &Die, unsigned indent = 0) const;
/// Verifies the abbreviations section.
///
@@ -113,20 +118,20 @@ private:
/// \returns The number of errors that occurred during verification.
unsigned verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev);
- /// Verifies the header of a unit in the .debug_info section.
+ /// Verifies the header of a unit in a .debug_info or .debug_types section.
///
/// This function currently checks for:
/// - Unit is in 32-bit DWARF format. The function can be modified to
/// support 64-bit format.
/// - The DWARF version is valid
/// - The unit type is valid (if unit is in version >=5)
- /// - The unit doesn't extend beyond .debug_info section
+ /// - The unit doesn't extend beyond the containing section
/// - The address size is valid
/// - The offset in the .debug_abbrev section is valid
///
- /// \param DebugInfoData The .debug_info section data
+ /// \param DebugInfoData The section data
/// \param Offset A reference to the offset start of the unit. The offset will
- /// be updated to point to the next unit in .debug_info
+ /// be updated to point to the next unit in the section
/// \param UnitIndex The index of the unit to be verified
/// \param UnitType A reference to the type of the unit
/// \param isUnitDWARF64 A reference to a flag that shows whether the unit is
@@ -137,7 +142,7 @@ private:
uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType,
bool &isUnitDWARF64);
- /// Verifies the header of a unit in the .debug_info section.
+ /// Verifies the header of a unit in a .debug_info or .debug_types section.
///
/// This function currently verifies:
/// - The debug info attributes.
@@ -146,13 +151,29 @@ private:
/// - That the root DIE is a unit DIE.
/// - If a unit type is provided, that the unit DIE matches the unit type.
/// - The DIE ranges.
+ /// - That call site entries are only nested within subprograms with a
+ /// DW_AT_call attribute.
///
- /// \param Unit The DWARF Unit to verifiy.
- /// \param UnitType An optional unit type which will be used to verify the
- /// type of the unit DIE.
+ /// \param Unit The DWARF Unit to verify.
///
- /// \returns true if the content is verified successfully, false otherwise.
- bool verifyUnitContents(DWARFUnit &Unit, uint8_t UnitType = 0);
+ /// \returns The number of errors that occurred during verification.
+ unsigned verifyUnitContents(DWARFUnit &Unit);
+
+ /// Verifies the unit headers and contents in a .debug_info or .debug_types
+ /// section.
+ ///
+ /// \param S The DWARF Section to verify.
+ /// \param SectionKind The object-file section kind that S comes from.
+ ///
+ /// \returns The number of errors that occurred during verification.
+ unsigned verifyUnitSection(const DWARFSection &S,
+ DWARFSectionKind SectionKind);
+
+ /// Verifies that a call site entry is nested within a subprogram with a
+ /// DW_AT_call attribute.
+ ///
+ /// \returns Number of errors that occurred during verification.
+ unsigned verifyDebugInfoCallSite(const DWARFDie &Die);
/// Verify that all Die ranges are valid.
///
@@ -172,7 +193,7 @@ private:
/// \param AttrValue The DWARF attribute value to check
///
/// \returns NumErrors The number of errors occurred during verification of
- /// attributes' values in a .debug_info section unit
+ /// attributes' values in a unit
unsigned verifyDebugInfoAttribute(const DWARFDie &Die,
DWARFAttribute &AttrValue);
@@ -180,14 +201,14 @@ private:
///
/// This function currently checks for:
/// - All DW_FORM_ref values that are CU relative have valid CU offsets
- /// - All DW_FORM_ref_addr values have valid .debug_info offsets
+ /// - All DW_FORM_ref_addr values have valid section offsets
/// - All DW_FORM_strp values have valid .debug_str offsets
///
/// \param Die The DWARF DIE that owns the attribute value
/// \param AttrValue The DWARF attribute value to check
///
/// \returns NumErrors The number of errors occurred during verification of
- /// attributes' forms in a .debug_info section unit
+ /// attributes' forms in a unit
unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue);
/// Verifies the all valid references that were found when iterating through
@@ -199,7 +220,7 @@ private:
/// CU relative and absolute references.
///
/// \returns NumErrors The number of errors occurred during verification of
- /// references for the .debug_info section
+ /// references for the .debug_info and .debug_types sections
unsigned verifyDebugInfoReferences();
/// Verify the DW_AT_stmt_list encoding and value and ensure that no
@@ -268,8 +289,8 @@ private:
public:
DWARFVerifier(raw_ostream &S, DWARFContext &D,
- DIDumpOptions DumpOpts = DIDumpOptions::getForSingleDIE())
- : OS(S), DCtx(D), DumpOpts(std::move(DumpOpts)) {}
+ DIDumpOptions DumpOpts = DIDumpOptions::getForSingleDIE());
+
/// Verify the information in any of the following sections, if available:
/// .debug_abbrev, debug_abbrev.dwo
///
@@ -280,12 +301,12 @@ public:
/// false otherwise.
bool handleDebugAbbrev();
- /// Verify the information in the .debug_info section.
+ /// Verify the information in the .debug_info and .debug_types sections.
///
- /// Any errors are reported to the stream that was this object was
+ /// Any errors are reported to the stream that this object was
/// constructed with.
///
- /// \returns true if the .debug_info verifies successfully, false otherwise.
+ /// \returns true if all sections verify successfully, false otherwise.
bool handleDebugInfo();
/// Verify the information in the .debug_line section.
diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h
index e66aeca3cd45..5c043a7837b3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h
@@ -24,22 +24,28 @@ enum class msf_error_code {
invalid_format,
block_in_use
};
+} // namespace msf
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::msf::msf_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace msf {
+const std::error_category &MSFErrCategory();
+
+inline std::error_code make_error_code(msf_error_code E) {
+ return std::error_code(static_cast<int>(E), MSFErrCategory());
+}
/// Base class for errors originating when parsing raw PDB files
-class MSFError : public ErrorInfo<MSFError> {
+class MSFError : public ErrorInfo<MSFError, StringError> {
public:
+ using ErrorInfo<MSFError, StringError>::ErrorInfo; // inherit constructors
+ MSFError(const Twine &S) : ErrorInfo(S, msf_error_code::unspecified) {}
static char ID;
- MSFError(msf_error_code C);
- MSFError(const std::string &Context);
- MSFError(msf_error_code C, const std::string &Context);
-
- void log(raw_ostream &OS) const override;
- const std::string &getErrorMessage() const;
- std::error_code convertToErrorCode() const override;
-
-private:
- std::string ErrMsg;
- msf_error_code Code;
};
} // namespace msf
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
index 9713dce362d2..ac7f19637ab1 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
@@ -43,11 +43,6 @@ public:
void reset() override { Enumerator->reset(); }
- ConcreteSymbolEnumerator<ChildType> *clone() const override {
- std::unique_ptr<IPDBEnumSymbols> WrappedClone(Enumerator->clone());
- return new ConcreteSymbolEnumerator<ChildType>(std::move(WrappedClone));
- }
-
private:
std::unique_ptr<IPDBEnumSymbols> Enumerator;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
index 930bea6060b2..881d7329ab66 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
@@ -24,7 +24,6 @@ public:
llvm::Optional<RecordType> getItemAtIndex(uint32_t Index) const override;
bool getNext(RecordType &Record) override;
void reset() override;
- DIADataStream *clone() const override;
private:
CComPtr<IDiaEnumDebugStreamData> StreamData;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
index ffae6645e94b..1f129052d034 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
@@ -27,7 +27,6 @@ public:
ChildTypePtr getChildAtIndex(uint32_t Index) const override;
ChildTypePtr getNext() override;
void reset() override;
- DIAEnumDebugStreams *clone() const override;
private:
CComPtr<IDiaEnumDebugStreams> Enumerator;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
new file mode 100644
index 000000000000..f3b02f07e648
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
@@ -0,0 +1,36 @@
+//==- DIAEnumFrameData.h --------------------------------------- -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_DIA_DIAENUMFRAMEDATA_H
+#define LLVM_DEBUGINFO_PDB_DIA_DIAENUMFRAMEDATA_H
+
+#include "DIASupport.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBFrameData.h"
+
+namespace llvm {
+namespace pdb {
+
+class DIAEnumFrameData : public IPDBEnumChildren<IPDBFrameData> {
+public:
+ explicit DIAEnumFrameData(CComPtr<IDiaEnumFrameData> DiaEnumerator);
+
+ uint32_t getChildCount() const override;
+ ChildTypePtr getChildAtIndex(uint32_t Index) const override;
+ ChildTypePtr getNext() override;
+ void reset() override;
+
+private:
+ CComPtr<IDiaEnumFrameData> Enumerator;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
index 39490a4b2209..4669a8d31196 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
@@ -16,22 +16,18 @@
namespace llvm {
namespace pdb {
-class DIASession;
class DIAEnumInjectedSources : public IPDBEnumChildren<IPDBInjectedSource> {
public:
explicit DIAEnumInjectedSources(
- const DIASession &PDBSession,
CComPtr<IDiaEnumInjectedSources> DiaEnumerator);
uint32_t getChildCount() const override;
ChildTypePtr getChildAtIndex(uint32_t Index) const override;
ChildTypePtr getNext() override;
void reset() override;
- DIAEnumInjectedSources *clone() const override;
private:
- const DIASession &Session;
CComPtr<IDiaEnumInjectedSources> Enumerator;
};
} // namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
index 08f0de124ede..f1cb6268a26d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
@@ -26,7 +26,6 @@ public:
ChildTypePtr getChildAtIndex(uint32_t Index) const override;
ChildTypePtr getNext() override;
void reset() override;
- DIAEnumLineNumbers *clone() const override;
private:
CComPtr<IDiaEnumLineNumbers> Enumerator;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
index 52c9563b5d5f..ac2ae317d263 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
@@ -28,7 +28,6 @@ public:
ChildTypePtr getChildAtIndex(uint32_t Index) const override;
ChildTypePtr getNext() override;
void reset() override;
- DIAEnumSectionContribs *clone() const override;
private:
const DIASession &Session;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
index e69d18f5ba37..dac3df06a178 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
@@ -27,7 +27,6 @@ public:
ChildTypePtr getChildAtIndex(uint32_t Index) const override;
ChildTypePtr getNext() override;
void reset() override;
- DIAEnumSourceFiles *clone() const override;
private:
const DIASession &Session;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
index f779cd1f4be3..9689859ae0f8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
@@ -27,7 +27,6 @@ public:
std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
std::unique_ptr<PDBSymbol> getNext() override;
void reset() override;
- DIAEnumSymbols *clone() const override;
private:
const DIASession &Session;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
index 926fcfe69648..f4f856ebb6fd 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
@@ -26,7 +26,6 @@ public:
std::unique_ptr<IPDBTable> getChildAtIndex(uint32_t Index) const override;
std::unique_ptr<IPDBTable> getNext() override;
void reset() override;
- DIAEnumTables *clone() const override;
private:
CComPtr<IDiaEnumTables> Enumerator;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
index 35a39a0df5ca..2b33a65a0a14 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
@@ -23,23 +23,29 @@ enum class dia_error_code {
already_loaded,
debug_info_mismatch,
};
+} // namespace pdb
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::pdb::dia_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace pdb {
+const std::error_category &DIAErrCategory();
+
+inline std::error_code make_error_code(dia_error_code E) {
+ return std::error_code(static_cast<int>(E), DIAErrCategory());
+}
/// Base class for errors originating in DIA SDK, e.g. COM calls
-class DIAError : public ErrorInfo<DIAError> {
+class DIAError : public ErrorInfo<DIAError, StringError> {
public:
+ using ErrorInfo<DIAError, StringError>::ErrorInfo;
+ DIAError(const Twine &S) : ErrorInfo(S, dia_error_code::unspecified) {}
static char ID;
- DIAError(dia_error_code C);
- DIAError(StringRef Context);
- DIAError(dia_error_code C, StringRef Context);
-
- void log(raw_ostream &OS) const override;
- StringRef getErrorMessage() const;
- std::error_code convertToErrorCode() const override;
-
-private:
- std::string ErrMsg;
- dia_error_code Code;
};
-}
-}
+} // namespace pdb
+} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
new file mode 100644
index 000000000000..0ce6cfc93030
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
@@ -0,0 +1,39 @@
+//===- DIAFrameData.h - DIA Impl. of IPDBFrameData ---------------- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_DIA_DIAFRAMEDATA_H
+#define LLVM_DEBUGINFO_PDB_DIA_DIAFRAMEDATA_H
+
+#include "DIASupport.h"
+#include "llvm/DebugInfo/PDB/IPDBFrameData.h"
+
+namespace llvm {
+namespace pdb {
+
+class DIASession;
+
+class DIAFrameData : public IPDBFrameData {
+public:
+ explicit DIAFrameData(CComPtr<IDiaFrameData> DiaFrameData);
+
+ uint32_t getAddressOffset() const override;
+ uint32_t getAddressSection() const override;
+ uint32_t getLengthBlock() const override;
+ std::string getProgram() const override;
+ uint32_t getRelativeVirtualAddress() const override;
+ uint64_t getVirtualAddress() const override;
+
+private:
+ CComPtr<IDiaFrameData> FrameData;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
index dfb35647055a..5d4f855c63ca 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
@@ -20,7 +20,8 @@ class DIARawSymbol : public IPDBRawSymbol {
public:
DIARawSymbol(const DIASession &PDBSession, CComPtr<IDiaSymbol> DiaSymbol);
- void dump(raw_ostream &OS, int Indent) const override;
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
CComPtr<IDiaSymbol> getDiaSymbol() const { return Symbol; }
@@ -63,25 +64,25 @@ public:
uint32_t getAddressOffset() const override;
uint32_t getAddressSection() const override;
uint32_t getAge() const override;
- uint32_t getArrayIndexTypeId() const override;
+ SymIndexId getArrayIndexTypeId() const override;
uint32_t getBaseDataOffset() const override;
uint32_t getBaseDataSlot() const override;
- uint32_t getBaseSymbolId() const override;
+ SymIndexId getBaseSymbolId() const override;
PDB_BuiltinType getBuiltinType() const override;
uint32_t getBitPosition() const override;
PDB_CallingConv getCallingConvention() const override;
- uint32_t getClassParentId() const override;
+ SymIndexId getClassParentId() const override;
std::string getCompilerName() const override;
uint32_t getCount() const override;
uint32_t getCountLiveRanges() const override;
PDB_Lang getLanguage() const override;
- uint32_t getLexicalParentId() const override;
+ SymIndexId getLexicalParentId() const override;
std::string getLibraryName() const override;
uint32_t getLiveRangeStartAddressOffset() const override;
uint32_t getLiveRangeStartAddressSection() const override;
uint32_t getLiveRangeStartRelativeVirtualAddress() const override;
codeview::RegisterId getLocalBasePointerRegisterId() const override;
- uint32_t getLowerBoundId() const override;
+ SymIndexId getLowerBoundId() const override;
uint32_t getMemorySpaceKind() const override;
std::string getName() const override;
uint32_t getNumberOfAcceleratorPointerTags() const override;
@@ -91,7 +92,7 @@ public:
uint32_t getNumberOfRows() const override;
std::string getObjectFileName() const override;
uint32_t getOemId() const override;
- uint32_t getOemSymbolId() const override;
+ SymIndexId getOemSymbolId() const override;
uint32_t getOffsetInUdt() const override;
PDB_Cpu getPlatform() const override;
uint32_t getRank() const override;
@@ -105,9 +106,9 @@ public:
std::string getSourceFileName() const override;
std::unique_ptr<IPDBLineNumber> getSrcLineOnTypeDefn() const override;
uint32_t getStride() const override;
- uint32_t getSubTypeId() const override;
+ SymIndexId getSubTypeId() const override;
std::string getSymbolsFileName() const override;
- uint32_t getSymIndexId() const override;
+ SymIndexId getSymIndexId() const override;
uint32_t getTargetOffset() const override;
uint32_t getTargetRelativeVirtualAddress() const override;
uint64_t getTargetVirtualAddress() const override;
@@ -115,16 +116,16 @@ public:
uint32_t getTextureSlot() const override;
uint32_t getTimeStamp() const override;
uint32_t getToken() const override;
- uint32_t getTypeId() const override;
+ SymIndexId getTypeId() const override;
uint32_t getUavSlot() const override;
std::string getUndecoratedName() const override;
std::string getUndecoratedNameEx(PDB_UndnameFlags Flags) const override;
- uint32_t getUnmodifiedTypeId() const override;
- uint32_t getUpperBoundId() const override;
+ SymIndexId getUnmodifiedTypeId() const override;
+ SymIndexId getUpperBoundId() const override;
Variant getValue() const override;
uint32_t getVirtualBaseDispIndex() const override;
uint32_t getVirtualBaseOffset() const override;
- uint32_t getVirtualTableShapeId() const override;
+ SymIndexId getVirtualTableShapeId() const override;
std::unique_ptr<PDBSymbolTypeBuiltin>
getVirtualBaseTableType() const override;
PDB_DataKind getDataKind() const override;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h
index a63659439389..592e061a8d83 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h
@@ -32,7 +32,7 @@ public:
uint64_t getLoadAddress() const override;
bool setLoadAddress(uint64_t Address) override;
std::unique_ptr<PDBSymbolExe> getGlobalScope() override;
- std::unique_ptr<PDBSymbol> getSymbolById(uint32_t SymbolId) const override;
+ std::unique_ptr<PDBSymbol> getSymbolById(SymIndexId SymbolId) const override;
bool addressForVA(uint64_t VA, uint32_t &Section,
uint32_t &Offset) const override;
@@ -85,6 +85,7 @@ public:
std::unique_ptr<IPDBEnumSectionContribs> getSectionContribs() const override;
+ std::unique_ptr<IPDBEnumFrameData> getFrameData() const override;
private:
CComPtr<IDiaSession> Session;
};
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
index 03205a986f1a..997f13f5f30e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
@@ -16,29 +16,37 @@
namespace llvm {
namespace pdb {
-enum class generic_error_code {
- invalid_path = 1,
+enum class pdb_error_code {
+ invalid_utf8_path = 1,
dia_sdk_not_present,
- type_server_not_found,
+ dia_failed_loading,
+ signature_out_of_date,
+ external_cmdline_ref,
unspecified,
};
+} // namespace pdb
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::pdb::pdb_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace pdb {
+const std::error_category &PDBErrCategory();
+
+inline std::error_code make_error_code(pdb_error_code E) {
+ return std::error_code(static_cast<int>(E), PDBErrCategory());
+}
/// Base class for errors originating when parsing raw PDB files
-class GenericError : public ErrorInfo<GenericError> {
+class PDBError : public ErrorInfo<PDBError, StringError> {
public:
+ using ErrorInfo<PDBError, StringError>::ErrorInfo; // inherit constructors
+ PDBError(const Twine &S) : ErrorInfo(S, pdb_error_code::unspecified) {}
static char ID;
- GenericError(generic_error_code C);
- GenericError(StringRef Context);
- GenericError(generic_error_code C, StringRef Context);
-
- void log(raw_ostream &OS) const override;
- StringRef getErrorMessage() const;
- std::error_code convertToErrorCode() const override;
-
-private:
- std::string ErrMsg;
- generic_error_code Code;
};
-}
-}
+} // namespace pdb
+} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
index 67b5a06d7c59..0d7a286a11a6 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
@@ -32,7 +32,6 @@ public:
virtual Optional<RecordType> getItemAtIndex(uint32_t Index) const = 0;
virtual bool getNext(RecordType &Record) = 0;
virtual void reset() = 0;
- virtual IPDBDataStream *clone() const = 0;
};
} // end namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index b6b7d95f6282..7017f2600e9b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -10,6 +10,7 @@
#ifndef LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
#define LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
+#include <cassert>
#include <cstdint>
#include <memory>
@@ -27,7 +28,19 @@ public:
virtual ChildTypePtr getChildAtIndex(uint32_t Index) const = 0;
virtual ChildTypePtr getNext() = 0;
virtual void reset() = 0;
- virtual MyType *clone() const = 0;
+};
+
+template <typename ChildType>
+class NullEnumerator : public IPDBEnumChildren<ChildType> {
+ virtual uint32_t getChildCount() const override { return 0; }
+ virtual std::unique_ptr<ChildType>
+ getChildAtIndex(uint32_t Index) const override {
+ return nullptr;
+ }
+ virtual std::unique_ptr<ChildType> getNext() override {
+ return nullptr;
+ }
+ virtual void reset() override {}
};
} // end namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h
new file mode 100644
index 000000000000..74679215b880
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h
@@ -0,0 +1,36 @@
+//===- IPDBFrameData.h - base interface for frame data ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_IPDBFRAMEDATA_H
+#define LLVM_DEBUGINFO_PDB_IPDBFRAMEDATA_H
+
+#include <cstdint>
+#include <string>
+
+namespace llvm {
+namespace pdb {
+
+/// IPDBFrameData defines an interface used to represent a frame data of some
+/// code block.
+class IPDBFrameData {
+public:
+ virtual ~IPDBFrameData();
+
+ virtual uint32_t getAddressOffset() const = 0;
+ virtual uint32_t getAddressSection() const = 0;
+ virtual uint32_t getLengthBlock() const = 0;
+ virtual std::string getProgram() const = 0;
+ virtual uint32_t getRelativeVirtualAddress() const = 0;
+ virtual uint64_t getVirtualAddress() const = 0;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
index bcb2eaa35630..7c818d7cadeb 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
@@ -11,6 +11,7 @@
#define LLVM_DEBUGINFO_PDB_IPDBRAWSYMBOL_H
#include "PDBTypes.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -21,9 +22,26 @@ class raw_ostream;
namespace pdb {
+class IPDBSession;
class PDBSymbolTypeVTable;
class PDBSymbolTypeVTableShape;
+enum class PdbSymbolIdField : uint32_t {
+ None = 0,
+ SymIndexId = 1 << 0,
+ LexicalParent = 1 << 1,
+ ClassParent = 1 << 2,
+ Type = 1 << 3,
+ UnmodifiedType = 1 << 4,
+ All = 0xFFFFFFFF,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ All)
+};
+
+void dumpSymbolIdField(raw_ostream &OS, StringRef Name, SymIndexId Value,
+ int Indent, const IPDBSession &Session,
+ PdbSymbolIdField FieldId, PdbSymbolIdField ShowFlags,
+ PdbSymbolIdField RecurseFlags);
+
/// IPDBRawSymbol defines an interface used to represent an arbitrary symbol.
/// It exposes a monolithic interface consisting of accessors for the union of
/// all properties that are valid for any symbol type. This interface is then
@@ -33,7 +51,8 @@ class IPDBRawSymbol {
public:
virtual ~IPDBRawSymbol();
- virtual void dump(raw_ostream &OS, int Indent) const = 0;
+ virtual void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const = 0;
virtual std::unique_ptr<IPDBEnumSymbols>
findChildren(PDB_SymType Type) const = 0;
@@ -74,26 +93,26 @@ public:
virtual uint32_t getAddressOffset() const = 0;
virtual uint32_t getAddressSection() const = 0;
virtual uint32_t getAge() const = 0;
- virtual uint32_t getArrayIndexTypeId() const = 0;
+ virtual SymIndexId getArrayIndexTypeId() const = 0;
virtual uint32_t getBaseDataOffset() const = 0;
virtual uint32_t getBaseDataSlot() const = 0;
- virtual uint32_t getBaseSymbolId() const = 0;
+ virtual SymIndexId getBaseSymbolId() const = 0;
virtual PDB_BuiltinType getBuiltinType() const = 0;
virtual uint32_t getBitPosition() const = 0;
virtual PDB_CallingConv getCallingConvention() const = 0;
- virtual uint32_t getClassParentId() const = 0;
+ virtual SymIndexId getClassParentId() const = 0;
virtual std::string getCompilerName() const = 0;
virtual uint32_t getCount() const = 0;
virtual uint32_t getCountLiveRanges() const = 0;
virtual void getFrontEndVersion(VersionInfo &Version) const = 0;
virtual PDB_Lang getLanguage() const = 0;
- virtual uint32_t getLexicalParentId() const = 0;
+ virtual SymIndexId getLexicalParentId() const = 0;
virtual std::string getLibraryName() const = 0;
virtual uint32_t getLiveRangeStartAddressOffset() const = 0;
virtual uint32_t getLiveRangeStartAddressSection() const = 0;
virtual uint32_t getLiveRangeStartRelativeVirtualAddress() const = 0;
virtual codeview::RegisterId getLocalBasePointerRegisterId() const = 0;
- virtual uint32_t getLowerBoundId() const = 0;
+ virtual SymIndexId getLowerBoundId() const = 0;
virtual uint32_t getMemorySpaceKind() const = 0;
virtual std::string getName() const = 0;
virtual uint32_t getNumberOfAcceleratorPointerTags() const = 0;
@@ -103,7 +122,7 @@ public:
virtual uint32_t getNumberOfRows() const = 0;
virtual std::string getObjectFileName() const = 0;
virtual uint32_t getOemId() const = 0;
- virtual uint32_t getOemSymbolId() const = 0;
+ virtual SymIndexId getOemSymbolId() const = 0;
virtual uint32_t getOffsetInUdt() const = 0;
virtual PDB_Cpu getPlatform() const = 0;
virtual uint32_t getRank() const = 0;
@@ -118,9 +137,9 @@ public:
virtual std::unique_ptr<IPDBLineNumber>
getSrcLineOnTypeDefn() const = 0;
virtual uint32_t getStride() const = 0;
- virtual uint32_t getSubTypeId() const = 0;
+ virtual SymIndexId getSubTypeId() const = 0;
virtual std::string getSymbolsFileName() const = 0;
- virtual uint32_t getSymIndexId() const = 0;
+ virtual SymIndexId getSymIndexId() const = 0;
virtual uint32_t getTargetOffset() const = 0;
virtual uint32_t getTargetRelativeVirtualAddress() const = 0;
virtual uint64_t getTargetVirtualAddress() const = 0;
@@ -128,18 +147,18 @@ public:
virtual uint32_t getTextureSlot() const = 0;
virtual uint32_t getTimeStamp() const = 0;
virtual uint32_t getToken() const = 0;
- virtual uint32_t getTypeId() const = 0;
+ virtual SymIndexId getTypeId() const = 0;
virtual uint32_t getUavSlot() const = 0;
virtual std::string getUndecoratedName() const = 0;
virtual std::string getUndecoratedNameEx(PDB_UndnameFlags Flags) const = 0;
- virtual uint32_t getUnmodifiedTypeId() const = 0;
- virtual uint32_t getUpperBoundId() const = 0;
+ virtual SymIndexId getUnmodifiedTypeId() const = 0;
+ virtual SymIndexId getUpperBoundId() const = 0;
virtual Variant getValue() const = 0;
virtual uint32_t getVirtualBaseDispIndex() const = 0;
virtual uint32_t getVirtualBaseOffset() const = 0;
virtual std::unique_ptr<PDBSymbolTypeBuiltin>
getVirtualBaseTableType() const = 0;
- virtual uint32_t getVirtualTableShapeId() const = 0;
+ virtual SymIndexId getVirtualTableShapeId() const = 0;
virtual PDB_DataKind getDataKind() const = 0;
virtual PDB_SymType getSymTag() const = 0;
virtual codeview::GUID getGuid() const = 0;
@@ -237,6 +256,8 @@ public:
virtual std::string getUnused() const = 0;
};
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
} // namespace pdb
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
index 88ec517bc4a5..88fd02c0a345 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
@@ -30,7 +30,8 @@ public:
virtual uint64_t getLoadAddress() const = 0;
virtual bool setLoadAddress(uint64_t Address) = 0;
virtual std::unique_ptr<PDBSymbolExe> getGlobalScope() = 0;
- virtual std::unique_ptr<PDBSymbol> getSymbolById(uint32_t SymbolId) const = 0;
+ virtual std::unique_ptr<PDBSymbol>
+ getSymbolById(SymIndexId SymbolId) const = 0;
virtual bool addressForVA(uint64_t VA, uint32_t &Section,
uint32_t &Offset) const = 0;
@@ -38,7 +39,7 @@ public:
uint32_t &Offset) const = 0;
template <typename T>
- std::unique_ptr<T> getConcreteSymbolById(uint32_t SymbolId) const {
+ std::unique_ptr<T> getConcreteSymbolById(SymIndexId SymbolId) const {
return unique_dyn_cast_or_null<T>(getSymbolById(SymbolId));
}
@@ -90,6 +91,9 @@ public:
virtual std::unique_ptr<IPDBEnumSectionContribs>
getSectionContribs() const = 0;
+
+ virtual std::unique_ptr<IPDBEnumFrameData>
+ getFrameData() const = 0;
};
} // namespace pdb
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index ce4d07917755..ac7f741afefa 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -51,6 +51,7 @@ public:
void setObjFileName(StringRef Name);
void setFirstSectionContrib(const SectionContrib &SC);
void addSymbol(codeview::CVSymbol Symbol);
+ void addSymbolsInBulk(ArrayRef<uint8_t> BulkSymbols);
void
addDebugSubsection(std::shared_ptr<codeview::DebugSubsection> Subsection);
@@ -91,7 +92,7 @@ private:
std::string ModuleName;
std::string ObjFileName;
std::vector<std::string> SourceFiles;
- std::vector<codeview::CVSymbol> Symbols;
+ std::vector<ArrayRef<uint8_t>> Symbols;
std::vector<std::unique_ptr<codeview::DebugSubsectionRecordBuilder>>
C13Builders;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index 280615bdb507..a3ca607efbef 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -78,7 +78,7 @@ public:
const DbiModuleList &modules() const;
- FixedStreamArray<object::coff_section> getSectionHeaders();
+ FixedStreamArray<object::coff_section> getSectionHeaders() const;
FixedStreamArray<object::FpoData> getFpoRecords();
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
index 51befcdac775..b538de576677 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
@@ -15,6 +15,7 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Support/Error.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
@@ -24,11 +25,15 @@
#include "llvm/Support/Endian.h"
namespace llvm {
+namespace codeview {
+struct FrameData;
+}
namespace msf {
class MSFBuilder;
}
namespace object {
struct coff_section;
+struct FpoData;
}
namespace pdb {
class DbiStream;
@@ -65,6 +70,8 @@ public:
void setGlobalsStreamIndex(uint32_t Index);
void setPublicsStreamIndex(uint32_t Index);
void setSymbolRecordStreamIndex(uint32_t Index);
+ void addNewFpoData(const codeview::FrameData &FD);
+ void addOldFpoData(const object::FpoData &Fpo);
Expected<DbiModuleDescriptorBuilder &> addModuleInfo(StringRef ModuleName);
Error addModuleSourceFile(DbiModuleDescriptorBuilder &Module, StringRef File);
@@ -84,7 +91,8 @@ public:
private:
struct DebugStream {
- ArrayRef<uint8_t> Data;
+ std::function<Error(BinaryStreamWriter &)> WriteFn;
+ uint32_t Size = 0;
uint16_t StreamNumber = kInvalidStreamIndex;
};
@@ -117,6 +125,9 @@ private:
std::vector<std::unique_ptr<DbiModuleDescriptorBuilder>> ModiList;
+ Optional<codeview::DebugFrameDataSubsection> NewFpoData;
+ std::vector<object::FpoData> OldFpoData;
+
StringMap<uint32_t> SourceFileNames;
PDBStringTableBuilder ECNamesBuilder;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
index 1a4f89d607df..4c39ca762b5b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
@@ -61,7 +61,6 @@ public:
void addGlobalSymbol(const codeview::ProcRefSym &Sym);
void addGlobalSymbol(const codeview::DataSym &Sym);
void addGlobalSymbol(const codeview::ConstantSym &Sym);
- void addGlobalSymbol(const codeview::UDTSym &Sym);
void addGlobalSymbol(const codeview::CVSymbol &Sym);
private:
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
index dd04b5c5681d..7f84564ee988 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
@@ -10,18 +10,20 @@
#ifndef LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H
#define LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H
+#include "llvm/ADT/iterator.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/Error.h"
-#include "llvm/ADT/iterator.h"
namespace llvm {
namespace pdb {
class DbiStream;
class PDBFile;
+class SymbolStream;
/// Iterator over hash records producing symbol record offsets. Abstracts away
/// the fact that symbol record offsets on disk are off-by-one.
@@ -50,8 +52,9 @@ class GSIHashTable {
public:
const GSIHashHeader *HashHdr;
FixedStreamArray<PSHashRecord> HashRecords;
- ArrayRef<uint8_t> HashBitmap;
+ FixedStreamArray<support::ulittle32_t> HashBitmap;
FixedStreamArray<support::ulittle32_t> HashBuckets;
+ std::array<int32_t, IPHR_HASH + 1> BucketMap;
Error read(BinaryStreamReader &Reader);
@@ -72,6 +75,9 @@ public:
const GSIHashTable &getGlobalsTable() const { return GlobalsTable; }
Error reload();
+ std::vector<std::pair<uint32_t, codeview::CVSymbol>>
+ findRecordsByName(StringRef Name, const SymbolStream &Symbols) const;
+
private:
GSIHashTable GlobalsTable;
std::unique_ptr<msf::MappedBlockStream> Stream;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
index 419e8ada06f7..101127a355f5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
@@ -35,11 +35,18 @@ public:
InfoStreamBuilder &operator=(const InfoStreamBuilder &) = delete;
void setVersion(PdbRaw_ImplVer V);
+ void addFeature(PdbRaw_FeatureSig Sig);
+
+ // If this is true, the PDB contents are hashed and this hash is used as
+ // PDB GUID and as Signature. The age is always 1.
+ void setHashPDBContentsToGUID(bool B);
+
+ // These only have an effect if hashPDBContentsToGUID() is false.
void setSignature(uint32_t S);
void setAge(uint32_t A);
void setGuid(codeview::GUID G);
- void addFeature(PdbRaw_FeatureSig Sig);
+ bool hashPDBContentsToGUID() const { return HashPDBContentsToGUID; }
uint32_t getAge() const { return Age; }
codeview::GUID getGuid() const { return Guid; }
Optional<uint32_t> getSignature() const { return Signature; }
@@ -60,6 +67,8 @@ private:
Optional<uint32_t> Signature;
codeview::GUID Guid;
+ bool HashPDBContentsToGUID = false;
+
NamedStreamMap &NamedStreams;
};
}
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index efc25e0559b9..8d590df288f3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -15,6 +15,7 @@
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
#include <cstdint>
@@ -43,6 +44,8 @@ public:
symbols(bool *HadError) const;
const codeview::CVSymbolArray &getSymbolArray() const { return SymbolArray; }
+ const codeview::CVSymbolArray
+ getSymbolArrayForScope(uint32_t ScopeBegin) const;
BinarySubstreamRef getSymbolsSubstream() const;
BinarySubstreamRef getC11LinesSubstream() const;
@@ -51,6 +54,8 @@ public:
ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = delete;
+ codeview::CVSymbol readSymbolAtOffset(uint32_t Offset) const;
+
iterator_range<DebugSubsectionIterator> subsections() const;
codeview::DebugSubsectionArray getSubsectionsArray() const {
return Subsections;
@@ -64,7 +69,7 @@ public:
findChecksumsSubsection() const;
private:
- const DbiModuleDescriptor &Mod;
+ DbiModuleDescriptor Mod;
uint32_t Signature;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
index bd5c09e5ff76..3cd465503044 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
@@ -21,11 +21,12 @@ public:
NativeCompilandSymbol(NativeSession &Session, SymIndexId SymbolId,
DbiModuleDescriptor MI);
- std::unique_ptr<NativeRawSymbol> clone() const override;
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
PDB_SymType getSymTag() const override;
bool isEditAndContinueEnabled() const override;
- uint32_t getLexicalParentId() const override;
+ SymIndexId getLexicalParentId() const override;
std::string getLibraryName() const override;
std::string getName() const override;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
new file mode 100644
index 000000000000..4442a1ec41fb
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
@@ -0,0 +1,43 @@
+//==- NativeEnumGlobals.h - Native Global Enumerator impl --------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMGLOBALS_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMGLOBALS_H
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+
+#include <vector>
+
+namespace llvm {
+namespace pdb {
+
+class NativeSession;
+
+class NativeEnumGlobals : public IPDBEnumChildren<PDBSymbol> {
+public:
+ NativeEnumGlobals(NativeSession &Session,
+ std::vector<codeview::SymbolKind> Kinds);
+
+ uint32_t getChildCount() const override;
+ std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
+ std::unique_ptr<PDBSymbol> getNext() override;
+ void reset() override;
+
+private:
+ std::vector<uint32_t> MatchOffsets;
+ uint32_t Index;
+ NativeSession &Session;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
index 6aa1460dbb4e..c268641a1008 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
@@ -11,28 +11,23 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMMODULES_H
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
namespace llvm {
namespace pdb {
-class DbiModuleList;
class NativeSession;
class NativeEnumModules : public IPDBEnumChildren<PDBSymbol> {
public:
- NativeEnumModules(NativeSession &Session, const DbiModuleList &Modules,
- uint32_t Index = 0);
+ NativeEnumModules(NativeSession &Session, uint32_t Index = 0);
uint32_t getChildCount() const override;
std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
std::unique_ptr<PDBSymbol> getNext() override;
void reset() override;
- NativeEnumModules *clone() const override;
private:
NativeSession &Session;
- const DbiModuleList &Modules;
uint32_t Index;
};
}
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h
deleted file mode 100644
index 41b7b78b8d80..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===- NativeEnumSymbol.h - info about enum type ----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOL_H
-#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOL_H
-
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-
-namespace llvm {
-namespace pdb {
-
-class NativeEnumSymbol : public NativeRawSymbol,
- public codeview::TypeVisitorCallbacks {
-public:
- NativeEnumSymbol(NativeSession &Session, SymIndexId Id,
- const codeview::CVType &CV);
- ~NativeEnumSymbol() override;
-
- std::unique_ptr<NativeRawSymbol> clone() const override;
-
- std::unique_ptr<IPDBEnumSymbols>
- findChildren(PDB_SymType Type) const override;
-
- Error visitKnownRecord(codeview::CVType &CVR,
- codeview::EnumRecord &Record) override;
- Error visitKnownMember(codeview::CVMemberRecord &CVM,
- codeview::EnumeratorRecord &Record) override;
-
- PDB_SymType getSymTag() const override;
- uint32_t getClassParentId() const override;
- uint32_t getUnmodifiedTypeId() const override;
- bool hasConstructor() const override;
- bool hasAssignmentOperator() const override;
- bool hasCastOperator() const override;
- uint64_t getLength() const override;
- std::string getName() const override;
- bool isNested() const override;
- bool hasOverloadedOperator() const override;
- bool isPacked() const override;
- bool isScoped() const override;
- uint32_t getTypeId() const override;
-
-protected:
- codeview::CVType CV;
- codeview::EnumRecord Record;
-};
-
-} // namespace pdb
-} // namespace llvm
-
-#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOL_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
index e0a5c8d9ad81..f8ac1655dc61 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
@@ -26,23 +26,20 @@ class NativeEnumTypes : public IPDBEnumChildren<PDBSymbol> {
public:
NativeEnumTypes(NativeSession &Session,
codeview::LazyRandomTypeCollection &TypeCollection,
- codeview::TypeLeafKind Kind);
+ std::vector<codeview::TypeLeafKind> Kinds);
+
+ NativeEnumTypes(NativeSession &Session,
+ std::vector<codeview::TypeIndex> Indices);
uint32_t getChildCount() const override;
std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
std::unique_ptr<PDBSymbol> getNext() override;
void reset() override;
- NativeEnumTypes *clone() const override;
private:
- NativeEnumTypes(NativeSession &Session,
- const std::vector<codeview::TypeIndex> &Matches,
- codeview::TypeLeafKind Kind);
-
std::vector<codeview::TypeIndex> Matches;
uint32_t Index;
NativeSession &Session;
- codeview::TypeLeafKind Kind;
};
} // namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
index 587c7ff2b092..f4030da1d026 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
@@ -16,11 +16,14 @@
namespace llvm {
namespace pdb {
+class DbiStream;
+
class NativeExeSymbol : public NativeRawSymbol {
-public:
- NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId);
+ // EXE symbol is the authority on the various symbol types.
+ DbiStream *Dbi = nullptr;
- std::unique_ptr<NativeRawSymbol> clone() const override;
+public:
+ NativeExeSymbol(NativeSession &Session, SymIndexId Id);
std::unique_ptr<IPDBEnumSymbols>
findChildren(PDB_SymType Type) const override;
@@ -30,9 +33,6 @@ public:
codeview::GUID getGuid() const override;
bool hasCTypes() const override;
bool hasPrivateSymbols() const override;
-
-private:
- PDBFile &File;
};
} // namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
index 5b70ecfa2056..6505a7d39573 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
@@ -19,15 +19,16 @@ namespace pdb {
class NativeSession;
-typedef uint32_t SymIndexId;
-
class NativeRawSymbol : public IPDBRawSymbol {
-public:
- NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId);
+ friend class SymbolCache;
+ virtual void initialize() {}
- virtual std::unique_ptr<NativeRawSymbol> clone() const = 0;
+public:
+ NativeRawSymbol(NativeSession &PDBSession, PDB_SymType Tag,
+ SymIndexId SymbolId);
- void dump(raw_ostream &OS, int Indent) const override;
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
std::unique_ptr<IPDBEnumSymbols>
findChildren(PDB_SymType Type) const override;
@@ -68,25 +69,25 @@ public:
uint32_t getAddressOffset() const override;
uint32_t getAddressSection() const override;
uint32_t getAge() const override;
- uint32_t getArrayIndexTypeId() const override;
+ SymIndexId getArrayIndexTypeId() const override;
uint32_t getBaseDataOffset() const override;
uint32_t getBaseDataSlot() const override;
- uint32_t getBaseSymbolId() const override;
+ SymIndexId getBaseSymbolId() const override;
PDB_BuiltinType getBuiltinType() const override;
uint32_t getBitPosition() const override;
PDB_CallingConv getCallingConvention() const override;
- uint32_t getClassParentId() const override;
+ SymIndexId getClassParentId() const override;
std::string getCompilerName() const override;
uint32_t getCount() const override;
uint32_t getCountLiveRanges() const override;
PDB_Lang getLanguage() const override;
- uint32_t getLexicalParentId() const override;
+ SymIndexId getLexicalParentId() const override;
std::string getLibraryName() const override;
uint32_t getLiveRangeStartAddressOffset() const override;
uint32_t getLiveRangeStartAddressSection() const override;
uint32_t getLiveRangeStartRelativeVirtualAddress() const override;
codeview::RegisterId getLocalBasePointerRegisterId() const override;
- uint32_t getLowerBoundId() const override;
+ SymIndexId getLowerBoundId() const override;
uint32_t getMemorySpaceKind() const override;
std::string getName() const override;
uint32_t getNumberOfAcceleratorPointerTags() const override;
@@ -96,7 +97,7 @@ public:
uint32_t getNumberOfRows() const override;
std::string getObjectFileName() const override;
uint32_t getOemId() const override;
- uint32_t getOemSymbolId() const override;
+ SymIndexId getOemSymbolId() const override;
uint32_t getOffsetInUdt() const override;
PDB_Cpu getPlatform() const override;
uint32_t getRank() const override;
@@ -110,9 +111,9 @@ public:
std::string getSourceFileName() const override;
std::unique_ptr<IPDBLineNumber> getSrcLineOnTypeDefn() const override;
uint32_t getStride() const override;
- uint32_t getSubTypeId() const override;
+ SymIndexId getSubTypeId() const override;
std::string getSymbolsFileName() const override;
- uint32_t getSymIndexId() const override;
+ SymIndexId getSymIndexId() const override;
uint32_t getTargetOffset() const override;
uint32_t getTargetRelativeVirtualAddress() const override;
uint64_t getTargetVirtualAddress() const override;
@@ -120,16 +121,16 @@ public:
uint32_t getTextureSlot() const override;
uint32_t getTimeStamp() const override;
uint32_t getToken() const override;
- uint32_t getTypeId() const override;
+ SymIndexId getTypeId() const override;
uint32_t getUavSlot() const override;
std::string getUndecoratedName() const override;
std::string getUndecoratedNameEx(PDB_UndnameFlags Flags) const override;
- uint32_t getUnmodifiedTypeId() const override;
- uint32_t getUpperBoundId() const override;
+ SymIndexId getUnmodifiedTypeId() const override;
+ SymIndexId getUpperBoundId() const override;
Variant getValue() const override;
uint32_t getVirtualBaseDispIndex() const override;
uint32_t getVirtualBaseOffset() const override;
- uint32_t getVirtualTableShapeId() const override;
+ SymIndexId getVirtualTableShapeId() const override;
std::unique_ptr<PDBSymbolTypeBuiltin>
getVirtualBaseTableType() const override;
PDB_DataKind getDataKind() const override;
@@ -230,6 +231,7 @@ public:
protected:
NativeSession &Session;
+ PDB_SymType Tag;
SymIndexId SymbolId;
};
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
index aff7ef2f8f21..4878e47d3121 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
@@ -15,9 +15,8 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
-#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
@@ -25,6 +24,7 @@ namespace llvm {
class MemoryBuffer;
namespace pdb {
class PDBFile;
+class NativeExeSymbol;
class NativeSession : public IPDBSession {
public:
@@ -37,21 +37,10 @@ public:
static Error createFromExe(StringRef Path,
std::unique_ptr<IPDBSession> &Session);
- std::unique_ptr<PDBSymbolCompiland>
- createCompilandSymbol(DbiModuleDescriptor MI);
-
- std::unique_ptr<PDBSymbolTypeEnum>
- createEnumSymbol(codeview::TypeIndex Index);
-
- std::unique_ptr<IPDBEnumSymbols>
- createTypeEnumerator(codeview::TypeLeafKind Kind);
-
- SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI);
-
uint64_t getLoadAddress() const override;
bool setLoadAddress(uint64_t Address) override;
std::unique_ptr<PDBSymbolExe> getGlobalScope() override;
- std::unique_ptr<PDBSymbol> getSymbolById(uint32_t SymbolId) const override;
+ std::unique_ptr<PDBSymbol> getSymbolById(SymIndexId SymbolId) const override;
bool addressForVA(uint64_t VA, uint32_t &Section,
uint32_t &Offset) const override;
@@ -104,14 +93,23 @@ public:
std::unique_ptr<IPDBEnumSectionContribs> getSectionContribs() const override;
+ std::unique_ptr<IPDBEnumFrameData> getFrameData() const override;
+
PDBFile &getPDBFile() { return *Pdb; }
const PDBFile &getPDBFile() const { return *Pdb; }
+ NativeExeSymbol &getNativeGlobalScope() const;
+ SymbolCache &getSymbolCache() { return Cache; }
+ const SymbolCache &getSymbolCache() const { return Cache; }
+
private:
+ void initializeExeSymbol();
+
std::unique_ptr<PDBFile> Pdb;
std::unique_ptr<BumpPtrAllocator> Allocator;
- std::vector<std::unique_ptr<NativeRawSymbol>> SymbolCache;
- DenseMap<codeview::TypeIndex, SymIndexId> TypeIndexToSymbolId;
+
+ SymbolCache Cache;
+ SymIndexId ExeSymbol = 0;
};
} // namespace pdb
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
new file mode 100644
index 000000000000..acc5eb8ff2c2
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
@@ -0,0 +1,51 @@
+//===- NativeSymbolEnumerator.h - info about enumerator values --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESYMBOLENUMERATOR_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESYMBOLENUMERATOR_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+class NativeTypeEnum;
+
+class NativeSymbolEnumerator : public NativeRawSymbol {
+public:
+ NativeSymbolEnumerator(NativeSession &Session, SymIndexId Id,
+ const NativeTypeEnum &Parent,
+ codeview::EnumeratorRecord Record);
+
+ ~NativeSymbolEnumerator() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ SymIndexId getClassParentId() const override;
+ SymIndexId getLexicalParentId() const override;
+ std::string getName() const override;
+ SymIndexId getTypeId() const override;
+ PDB_DataKind getDataKind() const override;
+ PDB_LocType getLocationType() const override;
+ bool isConstType() const override;
+ bool isVolatileType() const override;
+ bool isUnalignedType() const override;
+ Variant getValue() const override;
+
+protected:
+ const NativeTypeEnum &Parent;
+ codeview::EnumeratorRecord Record;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
new file mode 100644
index 000000000000..10e68e6df450
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
@@ -0,0 +1,50 @@
+//===- NativeTypeArray.h ------------------------------------------ C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEARRAY_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEARRAY_H
+
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeSession;
+
+class NativeTypeArray : public NativeRawSymbol {
+public:
+ NativeTypeArray(NativeSession &Session, SymIndexId Id, codeview::TypeIndex TI,
+ codeview::ArrayRecord Record);
+ ~NativeTypeArray() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ SymIndexId getArrayIndexTypeId() const override;
+
+ bool isConstType() const override;
+ bool isUnalignedType() const override;
+ bool isVolatileType() const override;
+
+ uint32_t getCount() const override;
+ SymIndexId getTypeId() const override;
+ uint64_t getLength() const override;
+
+protected:
+ codeview::ArrayRecord Record;
+ codeview::TypeIndex Index;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
index 4f532c6e3829..725dfb89222f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
@@ -1,4 +1,4 @@
-//===- NativeBuiltinSymbol.h -------------------------------------- C++ -*-===//
+//===- NativeTypeBuiltin.h ---------------------------------------- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEBUILTINSYMBOL_H
-#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEBUILTINSYMBOL_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEBUILTIN_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEBUILTIN_H
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
@@ -19,15 +19,15 @@ namespace pdb {
class NativeSession;
-class NativeBuiltinSymbol : public NativeRawSymbol {
+class NativeTypeBuiltin : public NativeRawSymbol {
public:
- NativeBuiltinSymbol(NativeSession &PDBSession, SymIndexId Id,
- PDB_BuiltinType T, uint64_t L);
- ~NativeBuiltinSymbol() override;
+ NativeTypeBuiltin(NativeSession &PDBSession, SymIndexId Id,
+ codeview::ModifierOptions Mods, PDB_BuiltinType T,
+ uint64_t L);
+ ~NativeTypeBuiltin() override;
- virtual std::unique_ptr<NativeRawSymbol> clone() const override;
-
- void dump(raw_ostream &OS, int Indent) const override;
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
PDB_SymType getSymTag() const override;
@@ -39,6 +39,7 @@ public:
protected:
NativeSession &Session;
+ codeview::ModifierOptions Mods;
PDB_BuiltinType Type;
uint64_t Length;
};
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
new file mode 100644
index 000000000000..a5cbefc18111
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
@@ -0,0 +1,75 @@
+//===- NativeTypeEnum.h - info about enum type ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeBuiltin;
+
+class NativeTypeEnum : public NativeRawSymbol {
+public:
+ NativeTypeEnum(NativeSession &Session, SymIndexId Id, codeview::TypeIndex TI,
+ codeview::EnumRecord Record);
+
+ NativeTypeEnum(NativeSession &Session, SymIndexId Id,
+ NativeTypeEnum &UnmodifiedType,
+ codeview::ModifierRecord Modifier);
+ ~NativeTypeEnum() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ std::unique_ptr<IPDBEnumSymbols>
+ findChildren(PDB_SymType Type) const override;
+
+ PDB_BuiltinType getBuiltinType() const override;
+ PDB_SymType getSymTag() const override;
+ SymIndexId getUnmodifiedTypeId() const override;
+ bool hasConstructor() const override;
+ bool hasAssignmentOperator() const override;
+ bool hasCastOperator() const override;
+ uint64_t getLength() const override;
+ std::string getName() const override;
+ bool isConstType() const override;
+ bool isVolatileType() const override;
+ bool isUnalignedType() const override;
+ bool isNested() const override;
+ bool hasOverloadedOperator() const override;
+ bool hasNestedTypes() const override;
+ bool isIntrinsic() const override;
+ bool isPacked() const override;
+ bool isScoped() const override;
+ SymIndexId getTypeId() const override;
+ bool isRefUdt() const override;
+ bool isValueUdt() const override;
+ bool isInterfaceUdt() const override;
+
+ const NativeTypeBuiltin &getUnderlyingBuiltinType() const;
+ const codeview::EnumRecord &getEnumRecord() const { return *Record; }
+
+protected:
+ codeview::TypeIndex Index;
+ Optional<codeview::EnumRecord> Record;
+ NativeTypeEnum *UnmodifiedType = nullptr;
+ Optional<codeview::ModifierRecord> Modifiers;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
new file mode 100644
index 000000000000..1b1b87f6581f
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
@@ -0,0 +1,74 @@
+//===- NativeTypeFunctionSig.h - info about function signature ---*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeUDT;
+
+class NativeTypeFunctionSig : public NativeRawSymbol {
+protected:
+ void initialize() override;
+
+public:
+ NativeTypeFunctionSig(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI, codeview::ProcedureRecord Proc);
+
+ NativeTypeFunctionSig(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI,
+ codeview::MemberFunctionRecord MemberFunc);
+
+ ~NativeTypeFunctionSig() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ std::unique_ptr<IPDBEnumSymbols>
+ findChildren(PDB_SymType Type) const override;
+
+ SymIndexId getClassParentId() const override;
+ PDB_CallingConv getCallingConvention() const override;
+ uint32_t getCount() const override;
+ SymIndexId getTypeId() const override;
+ int32_t getThisAdjust() const override;
+ bool hasConstructor() const override;
+ bool isConstType() const override;
+ bool isConstructorVirtualBase() const override;
+ bool isCxxReturnUdt() const override;
+ bool isUnalignedType() const override;
+ bool isVolatileType() const override;
+
+private:
+ void initializeArgList(codeview::TypeIndex ArgListTI);
+
+ union {
+ codeview::MemberFunctionRecord MemberFunc;
+ codeview::ProcedureRecord Proc;
+ };
+
+ SymIndexId ClassParentId = 0;
+ codeview::TypeIndex Index;
+ codeview::ArgListRecord ArgList;
+ bool IsMemberFunction = false;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H \ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
new file mode 100644
index 000000000000..bcb7431fecf1
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
@@ -0,0 +1,61 @@
+//===- NativeTypePointer.h - info about pointer type -------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypePointer : public NativeRawSymbol {
+public:
+ // Create a pointer record for a simple type.
+ NativeTypePointer(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI);
+
+ // Create a pointer record for a non-simple type.
+ NativeTypePointer(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI, codeview::PointerRecord PR);
+ ~NativeTypePointer() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ SymIndexId getClassParentId() const override;
+ bool isConstType() const override;
+ uint64_t getLength() const override;
+ bool isReference() const override;
+ bool isRValueReference() const override;
+ bool isPointerToDataMember() const override;
+ bool isPointerToMemberFunction() const override;
+ SymIndexId getTypeId() const override;
+ bool isRestrictedType() const override;
+ bool isVolatileType() const override;
+ bool isUnalignedType() const override;
+
+ bool isSingleInheritance() const override;
+ bool isMultipleInheritance() const override;
+ bool isVirtualInheritance() const override;
+
+protected:
+ bool isMemberPointer() const;
+ codeview::TypeIndex TI;
+ Optional<codeview::PointerRecord> Record;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H \ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
new file mode 100644
index 000000000000..06eb6fcf3764
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
@@ -0,0 +1,42 @@
+//===- NativeTypeTypedef.h - info about typedef ------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPETYPEDEF_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPETYPEDEF_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeTypedef : public NativeRawSymbol {
+public:
+ // Create a pointer record for a non-simple type.
+ NativeTypeTypedef(NativeSession &Session, SymIndexId Id,
+ codeview::UDTSym Typedef);
+
+ ~NativeTypeTypedef() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ std::string getName() const override;
+ SymIndexId getTypeId() const override;
+
+protected:
+ codeview::UDTSym Record;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H \ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
new file mode 100644
index 000000000000..84821d8731be
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
@@ -0,0 +1,74 @@
+//===- NativeTypeUDT.h - info about class/struct type ------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEUDT_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEUDT_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeUDT : public NativeRawSymbol {
+public:
+ NativeTypeUDT(NativeSession &Session, SymIndexId Id, codeview::TypeIndex TI,
+ codeview::ClassRecord Class);
+
+ NativeTypeUDT(NativeSession &Session, SymIndexId Id, codeview::TypeIndex TI,
+ codeview::UnionRecord Union);
+
+ NativeTypeUDT(NativeSession &Session, SymIndexId Id,
+ NativeTypeUDT &UnmodifiedType,
+ codeview::ModifierRecord Modifier);
+
+ ~NativeTypeUDT() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ std::string getName() const override;
+ SymIndexId getLexicalParentId() const override;
+ SymIndexId getUnmodifiedTypeId() const override;
+ SymIndexId getVirtualTableShapeId() const override;
+ uint64_t getLength() const override;
+ PDB_UdtType getUdtKind() const override;
+ bool hasConstructor() const override;
+ bool isConstType() const override;
+ bool hasAssignmentOperator() const override;
+ bool hasCastOperator() const override;
+ bool hasNestedTypes() const override;
+ bool hasOverloadedOperator() const override;
+ bool isInterfaceUdt() const override;
+ bool isIntrinsic() const override;
+ bool isNested() const override;
+ bool isPacked() const override;
+ bool isRefUdt() const override;
+ bool isScoped() const override;
+ bool isValueUdt() const override;
+ bool isUnalignedType() const override;
+ bool isVolatileType() const override;
+
+protected:
+ codeview::TypeIndex Index;
+
+ Optional<codeview::ClassRecord> Class;
+ Optional<codeview::UnionRecord> Union;
+ NativeTypeUDT *UnmodifiedType = nullptr;
+ codeview::TagRecord *Tag = nullptr;
+ Optional<codeview::ModifierRecord> Modifiers;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEUDT_H \ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
new file mode 100644
index 000000000000..a996f34ef859
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
@@ -0,0 +1,46 @@
+//===- NativeTypeVTShape.h - info about virtual table shape ------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeVTShape : public NativeRawSymbol {
+public:
+ // Create a pointer record for a non-simple type.
+ NativeTypeVTShape(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI, codeview::VFTableShapeRecord SR);
+
+ ~NativeTypeVTShape() override;
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override;
+
+ bool isConstType() const override;
+ bool isVolatileType() const override;
+ bool isUnalignedType() const override;
+ uint32_t getCount() const override;
+
+protected:
+ codeview::TypeIndex TI;
+ codeview::VFTableShapeRecord Record;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H \ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 7f9c4cf9fa83..37458749a8d8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -53,7 +53,9 @@ public:
PDBStringTableBuilder &getStringTableBuilder();
GSIStreamBuilder &getGsiBuilder();
- Error commit(StringRef Filename);
+ // If HashPDBContentsToGUID is true on the InfoStreamBuilder, Guid is filled
+ // with the computed PDB GUID on return.
+ Error commit(StringRef Filename, codeview::GUID *Guid);
Expected<uint32_t> getNamedStreamIndex(StringRef Name) const;
Error addNamedStream(StringRef Name, StringRef Data);
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h
index 3624a7682e38..97d11b4f20d1 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h
@@ -31,23 +31,29 @@ enum class raw_error_code {
stream_too_long,
invalid_tpi_hash,
};
+} // namespace pdb
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::pdb::raw_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace pdb {
+const std::error_category &RawErrCategory();
+
+inline std::error_code make_error_code(raw_error_code E) {
+ return std::error_code(static_cast<int>(E), RawErrCategory());
+}
/// Base class for errors originating when parsing raw PDB files
-class RawError : public ErrorInfo<RawError> {
+class RawError : public ErrorInfo<RawError, StringError> {
public:
+ using ErrorInfo<RawError, StringError>::ErrorInfo; // inherit constructors
+ RawError(const Twine &S) : ErrorInfo(S, raw_error_code::unspecified) {}
static char ID;
- RawError(raw_error_code C);
- RawError(const std::string &Context);
- RawError(raw_error_code C, const std::string &Context);
-
- void log(raw_ostream &OS) const override;
- const std::string &getErrorMessage() const;
- std::error_code convertToErrorCode() const override;
-
-private:
- std::string ErrMsg;
- raw_error_code Code;
};
-}
-}
+} // namespace pdb
+} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h
index 19f592d562e4..8f6d6611c032 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h
@@ -343,7 +343,6 @@ struct SrcHeaderBlockEntry {
char Reserved[8];
};
-constexpr int I = sizeof(SrcHeaderBlockEntry);
static_assert(sizeof(SrcHeaderBlockEntry) == 40, "Incorrect struct size!");
} // namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
new file mode 100644
index 000000000000..08e1d41e6ee9
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
@@ -0,0 +1,148 @@
+//==- SymbolCache.h - Cache of native symbols and ids ------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLCACHE_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLCACHE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/Support/Allocator.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace pdb {
+class DbiStream;
+class PDBFile;
+
+class SymbolCache {
+ NativeSession &Session;
+ DbiStream *Dbi = nullptr;
+
+ /// Cache of all stable symbols, indexed by SymIndexId. Just because a
+ /// symbol has been parsed does not imply that it will be stable and have
+ /// an Id. Id allocation is an implementation, with the only guarantee
+ /// being that once an Id is allocated, the symbol can be assumed to be
+ /// cached.
+ std::vector<std::unique_ptr<NativeRawSymbol>> Cache;
+
+ /// For type records from the TPI stream which have been paresd and cached,
+ /// stores a mapping to SymIndexId of the cached symbol.
+ DenseMap<codeview::TypeIndex, SymIndexId> TypeIndexToSymbolId;
+
+ /// For field list members which have been parsed and cached, stores a mapping
+ /// from (IndexOfClass, MemberIndex) to the corresponding SymIndexId of the
+ /// cached symbol.
+ DenseMap<std::pair<codeview::TypeIndex, uint32_t>, SymIndexId>
+ FieldListMembersToSymbolId;
+
+ /// List of SymIndexIds for each compiland, indexed by compiland index as they
+ /// appear in the PDB file.
+ std::vector<SymIndexId> Compilands;
+
+ /// Map from global symbol offset to SymIndexId.
+ DenseMap<uint32_t, SymIndexId> GlobalOffsetToSymbolId;
+
+ SymIndexId createSymbolPlaceholder() {
+ SymIndexId Id = Cache.size();
+ Cache.push_back(nullptr);
+ return Id;
+ }
+
+ template <typename ConcreteSymbolT, typename CVRecordT, typename... Args>
+ SymIndexId createSymbolForType(codeview::TypeIndex TI, codeview::CVType CVT,
+ Args &&... ConstructorArgs) {
+ CVRecordT Record;
+ if (auto EC =
+ codeview::TypeDeserializer::deserializeAs<CVRecordT>(CVT, Record)) {
+ consumeError(std::move(EC));
+ return 0;
+ }
+
+ return createSymbol<ConcreteSymbolT>(
+ TI, std::move(Record), std::forward<Args>(ConstructorArgs)...);
+ }
+
+ SymIndexId createSymbolForModifiedType(codeview::TypeIndex ModifierTI,
+ codeview::CVType CVT);
+
+ SymIndexId createSimpleType(codeview::TypeIndex TI,
+ codeview::ModifierOptions Mods);
+
+public:
+ SymbolCache(NativeSession &Session, DbiStream *Dbi);
+
+ template <typename ConcreteSymbolT, typename... Args>
+ SymIndexId createSymbol(Args &&... ConstructorArgs) {
+ SymIndexId Id = Cache.size();
+
+ // Initial construction must not access the cache, since it must be done
+ // atomically.
+ auto Result = llvm::make_unique<ConcreteSymbolT>(
+ Session, Id, std::forward<Args>(ConstructorArgs)...);
+ Result->SymbolId = Id;
+
+ NativeRawSymbol *NRS = static_cast<NativeRawSymbol *>(Result.get());
+ Cache.push_back(std::move(Result));
+
+ // After the item is in the cache, we can do further initialization which
+ // is then allowed to access the cache.
+ NRS->initialize();
+ return Id;
+ }
+
+ std::unique_ptr<IPDBEnumSymbols>
+ createTypeEnumerator(codeview::TypeLeafKind Kind);
+
+ std::unique_ptr<IPDBEnumSymbols>
+ createTypeEnumerator(std::vector<codeview::TypeLeafKind> Kinds);
+
+ std::unique_ptr<IPDBEnumSymbols>
+ createGlobalsEnumerator(codeview::SymbolKind Kind);
+
+ SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI);
+
+ template <typename ConcreteSymbolT, typename... Args>
+ SymIndexId getOrCreateFieldListMember(codeview::TypeIndex FieldListTI,
+ uint32_t Index,
+ Args &&... ConstructorArgs) {
+ SymIndexId SymId = Cache.size();
+ std::pair<codeview::TypeIndex, uint32_t> Key{FieldListTI, Index};
+ auto Result = FieldListMembersToSymbolId.try_emplace(Key, SymId);
+ if (Result.second)
+ SymId =
+ createSymbol<ConcreteSymbolT>(std::forward<Args>(ConstructorArgs)...);
+ else
+ SymId = Result.first->second;
+ return SymId;
+ }
+
+ SymIndexId getOrCreateGlobalSymbolByOffset(uint32_t Offset);
+
+ std::unique_ptr<PDBSymbolCompiland> getOrCreateCompiland(uint32_t Index);
+ uint32_t getNumCompilands() const;
+
+ std::unique_ptr<PDBSymbol> getSymbolById(SymIndexId SymbolId) const;
+
+ NativeRawSymbol &getNativeSymbolById(SymIndexId SymbolId) const;
+
+ template <typename ConcreteT>
+ ConcreteT &getNativeSymbolById(SymIndexId SymbolId) const {
+ return static_cast<ConcreteT &>(getNativeSymbolById(SymbolId));
+ }
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
index c1edec7a26fe..c2996ccf1825 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
@@ -18,6 +18,54 @@ namespace pdb {
Expected<uint32_t> hashTypeRecord(const llvm::codeview::CVType &Type);
+struct TagRecordHash {
+ explicit TagRecordHash(codeview::ClassRecord CR, uint32_t Full,
+ uint32_t Forward)
+ : FullRecordHash(Full), ForwardDeclHash(Forward), Class(std::move(CR)) {
+ State = 0;
+ }
+
+ explicit TagRecordHash(codeview::EnumRecord ER, uint32_t Full,
+ uint32_t Forward)
+ : FullRecordHash(Full), ForwardDeclHash(Forward), Enum(std::move(ER)) {
+ State = 1;
+ }
+
+ explicit TagRecordHash(codeview::UnionRecord UR, uint32_t Full,
+ uint32_t Forward)
+ : FullRecordHash(Full), ForwardDeclHash(Forward), Union(std::move(UR)) {
+ State = 2;
+ }
+
+ uint32_t FullRecordHash;
+ uint32_t ForwardDeclHash;
+
+ codeview::TagRecord &getRecord() {
+ switch (State) {
+ case 0:
+ return Class;
+ case 1:
+ return Enum;
+ case 2:
+ return Union;
+ }
+ llvm_unreachable("unreachable!");
+ }
+
+private:
+ union {
+ codeview::ClassRecord Class;
+ codeview::EnumRecord Enum;
+ codeview::UnionRecord Union;
+ };
+
+ uint8_t State = 0;
+};
+
+/// Given a CVType referring to a class, structure, union, or enum, compute
+/// the hash of its forward decl and full decl.
+Expected<TagRecordHash> hashTagRecord(const codeview::CVType &Type);
+
} // end namespace pdb
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
index b77939929ecf..b76576a7a263 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
@@ -58,10 +58,21 @@ public:
codeview::LazyRandomTypeCollection &typeCollection() { return *Types; }
+ Expected<codeview::TypeIndex>
+ findFullDeclForForwardRef(codeview::TypeIndex ForwardRefTI) const;
+
+ std::vector<codeview::TypeIndex> findRecordsByName(StringRef Name) const;
+
+ codeview::CVType getType(codeview::TypeIndex Index);
+
BinarySubstreamRef getTypeRecordsSubstream() const;
Error commit();
+ void buildHashMap();
+
+ bool supportsTypeLookup() const;
+
private:
PDBFile &Pdb;
std::unique_ptr<msf::MappedBlockStream> Stream;
@@ -77,6 +88,8 @@ private:
FixedStreamArray<codeview::TypeIndexOffset> TypeIndexOffsets;
HashTable<support::ulittle32_t> HashAdjusters;
+ std::vector<std::vector<codeview::TypeIndex>> HashMap;
+
const TpiStreamHeader *Header;
};
}
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
index 3c9a19801f89..aaec71aa8c90 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -12,6 +12,8 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/Support/raw_ostream.h"
+
#include <unordered_map>
namespace llvm {
@@ -24,6 +26,7 @@ using TagStats = std::unordered_map<PDB_SymType, int>;
raw_ostream &operator<<(raw_ostream &OS, const PDB_VariantType &Value);
raw_ostream &operator<<(raw_ostream &OS, const PDB_CallingConv &Conv);
+raw_ostream &operator<<(raw_ostream &OS, const PDB_BuiltinType &Type);
raw_ostream &operator<<(raw_ostream &OS, const PDB_DataKind &Data);
raw_ostream &operator<<(raw_ostream &OS, const codeview::RegisterId &Reg);
raw_ostream &operator<<(raw_ostream &OS, const PDB_LocType &Loc);
@@ -41,6 +44,15 @@ raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
+
+template <typename T>
+void dumpSymbolField(raw_ostream &OS, StringRef Name, T Value, int Indent) {
+ OS << "\n";
+ OS.indent(Indent);
+ OS << Name << ": " << Value;
+}
+
+
} // end namespace pdb
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
index 04373463212b..3a74f7c3aace 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
@@ -49,9 +49,22 @@ class IPDBRawSymbol;
class IPDBSession;
#define DECLARE_PDB_SYMBOL_CONCRETE_TYPE(TagValue) \
+private: \
+ using PDBSymbol::PDBSymbol; \
+ friend class PDBSymbol; \
+ \
+public: \
static const PDB_SymType Tag = TagValue; \
static bool classof(const PDBSymbol *S) { return S->getSymTag() == Tag; }
+#define DECLARE_PDB_SYMBOL_CUSTOM_TYPE(Condition) \
+private: \
+ using PDBSymbol::PDBSymbol; \
+ friend class PDBSymbol; \
+ \
+public: \
+ static bool classof(const PDBSymbol *S) { return Condition; }
+
/// PDBSymbol defines the base of the inheritance hierarchy for concrete symbol
/// types (e.g. functions, executables, vtables, etc). All concrete symbol
/// types inherit from PDBSymbol and expose the exact set of methods that are
@@ -59,14 +72,33 @@ class IPDBSession;
/// reference "Lexical and Class Hierarchy of Symbol Types":
/// https://msdn.microsoft.com/en-us/library/370hs6k4.aspx
class PDBSymbol {
+ static std::unique_ptr<PDBSymbol> createSymbol(const IPDBSession &PDBSession,
+ PDB_SymType Tag);
+
protected:
- PDBSymbol(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
- PDBSymbol(PDBSymbol &Symbol);
+ explicit PDBSymbol(const IPDBSession &PDBSession);
+ PDBSymbol(PDBSymbol &&Other);
public:
static std::unique_ptr<PDBSymbol>
- create(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol);
+ create(const IPDBSession &PDBSession,
+ std::unique_ptr<IPDBRawSymbol> RawSymbol);
+ static std::unique_ptr<PDBSymbol> create(const IPDBSession &PDBSession,
+ IPDBRawSymbol &RawSymbol);
+
+ template <typename ConcreteT>
+ static std::unique_ptr<ConcreteT>
+ createAs(const IPDBSession &PDBSession,
+ std::unique_ptr<IPDBRawSymbol> RawSymbol) {
+ std::unique_ptr<PDBSymbol> S = create(PDBSession, std::move(RawSymbol));
+ return unique_dyn_cast_or_null<ConcreteT>(std::move(S));
+ }
+ template <typename ConcreteT>
+ static std::unique_ptr<ConcreteT> createAs(const IPDBSession &PDBSession,
+ IPDBRawSymbol &RawSymbol) {
+ std::unique_ptr<PDBSymbol> S = create(PDBSession, RawSymbol);
+ return unique_dyn_cast_or_null<ConcreteT>(std::move(S));
+ }
virtual ~PDBSymbol();
@@ -80,7 +112,8 @@ public:
/// normally goes on the right side of the symbol.
virtual void dumpRight(PDBSymDumper &Dumper) const {}
- void defaultDump(raw_ostream &OS, int Indent) const;
+ void defaultDump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowFlags,
+ PdbSymbolIdField RecurseFlags) const;
void dumpProperties() const;
void dumpChildStats() const;
@@ -94,8 +127,6 @@ public:
return Enumerator->getNext();
}
- std::unique_ptr<PDBSymbol> clone() const;
-
template <typename T>
std::unique_ptr<ConcreteSymbolEnumerator<T>> findAllChildren() const {
auto BaseIter = RawSymbol->findChildren(T::Tag);
@@ -131,7 +162,8 @@ protected:
}
const IPDBSession &Session;
- std::unique_ptr<IPDBRawSymbol> RawSymbol;
+ std::unique_ptr<IPDBRawSymbol> OwnedRawSymbol;
+ IPDBRawSymbol *RawSymbol = nullptr;
};
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
index 3169146e5b12..ef00df15cb0a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
@@ -18,12 +18,9 @@ class raw_ostream;
namespace pdb {
class PDBSymbolAnnotation : public PDBSymbol {
-public:
- PDBSymbolAnnotation(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Annotation)
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
index d81da1eaa023..2cf9c72a8886 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolBlock : public PDBSymbol {
-public:
- PDBSymbolBlock(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Block)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
index 9549089c7eb4..04dbd962ebd4 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
@@ -20,12 +20,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolCompiland : public PDBSymbol {
-public:
- PDBSymbolCompiland(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> CompilandSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Compiland)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(isEditAndContinueEnabled)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
index dba50c42cf81..3d651a464d94 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolCompilandDetails : public PDBSymbol {
-public:
- PDBSymbolCompilandDetails(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::CompilandDetails)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
void getFrontEndVersion(VersionInfo &Version) const {
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
index 7868f0459086..ffc408314d9a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
@@ -18,12 +18,8 @@ namespace llvm {
class raw_ostream;
namespace pdb {
class PDBSymbolCompilandEnv : public PDBSymbol {
-public:
- PDBSymbolCompilandEnv(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::CompilandEnv)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_ID_METHOD(getLexicalParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
index 54f089404262..c29e4c31d3f3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
@@ -23,12 +23,8 @@ namespace pdb {
/// fit anywhere else in the lexical hierarchy.
/// https://msdn.microsoft.com/en-us/library/d88sf09h.aspx
class PDBSymbolCustom : public PDBSymbol {
-public:
- PDBSymbolCustom(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> CustomSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Custom)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
void getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes);
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
index 76b14bf17784..217e1e976e6b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
@@ -21,12 +21,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolData : public PDBSymbol {
-public:
- PDBSymbolData(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> DataSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Data)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAccess)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
index 2c2d74665040..366d0cf4777f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
@@ -20,12 +20,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolExe : public PDBSymbol {
-public:
- PDBSymbolExe(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> ExeSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Exe)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAge)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
index 05d585d25763..129e557c7f25 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
@@ -22,18 +22,14 @@ class raw_ostream;
namespace pdb {
class PDBSymbolFunc : public PDBSymbol {
+ DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Function)
public:
- PDBSymbolFunc(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> FuncSymbol);
-
void dump(PDBSymDumper &Dumper) const override;
bool isDestructor() const;
std::unique_ptr<IPDBEnumChildren<PDBSymbolData>> getArguments() const;
- DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Function)
-
FORWARD_SYMBOL_METHOD(getAccess)
FORWARD_SYMBOL_METHOD(getAddressOffset)
FORWARD_SYMBOL_METHOD(getAddressSection)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
index 3341bd9b30fd..18db8a50fd1b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
@@ -20,12 +20,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolFuncDebugEnd : public PDBSymbol {
-public:
- PDBSymbolFuncDebugEnd(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> FuncDebugEndSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::FuncDebugEnd)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
index 6729838597c8..83d82f0cbcc5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolFuncDebugStart : public PDBSymbol {
-public:
- PDBSymbolFuncDebugStart(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> FuncDebugStartSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::FuncDebugStart)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
index c2b1c28c929e..8b2617fcd757 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolLabel : public PDBSymbol {
-public:
- PDBSymbolLabel(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> LabelSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Label)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
index c9e6ee67c575..9def3edb469a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolPublicSymbol : public PDBSymbol {
-public:
- PDBSymbolPublicSymbol(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> PublicSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::PublicSymbol)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
index 614fad86caa8..7bb0555362db 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolThunk : public PDBSymbol {
-public:
- PDBSymbolThunk(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> ThunkSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Thunk)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAccess)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
index 39b7d3b300ea..488f668bdc10 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeArray : public PDBSymbol {
-public:
- PDBSymbolTypeArray(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> ArrayTypeSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::ArrayType)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
void dumpRight(PDBSymDumper &Dumper) const override;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
index d607a3d81170..550deedd7504 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
@@ -22,12 +22,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeBaseClass : public PDBSymbol {
-public:
- PDBSymbolTypeBaseClass(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::BaseClass)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getAccess)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
index 5b1863c42a04..e07e88802b8f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeBuiltin : public PDBSymbol {
-public:
- PDBSymbolTypeBuiltin(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::BuiltinType)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getBuiltinType)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
index 199b3f8b304e..0d8979c9c5c5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeCustom : public PDBSymbol {
-public:
- PDBSymbolTypeCustom(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::CustomType)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getOemId)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
index e635eb5bbf6f..58292a63501f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeDimension : public PDBSymbol {
-public:
- PDBSymbolTypeDimension(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Dimension)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getLowerBoundId)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
index ddbe7e58f183..f463047bb5b5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
@@ -21,12 +21,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeEnum : public PDBSymbol {
-public:
- PDBSymbolTypeEnum(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> EnumTypeSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Enum)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getBuiltinType)
@@ -38,6 +34,7 @@ public:
FORWARD_SYMBOL_METHOD(hasNestedTypes)
FORWARD_SYMBOL_METHOD(getLength)
FORWARD_SYMBOL_ID_METHOD(getLexicalParent)
+ FORWARD_SYMBOL_ID_METHOD(getUnmodifiedType)
FORWARD_SYMBOL_METHOD(getName)
FORWARD_SYMBOL_METHOD(getSrcLineOnTypeDefn)
FORWARD_SYMBOL_METHOD(isNested)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
index 24c13128111f..5b940b0737af 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeFriend : public PDBSymbol {
-public:
- PDBSymbolTypeFriend(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Friend)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_ID_METHOD(getClassParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
index 3855999c473f..074cb418fc82 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeFunctionArg : public PDBSymbol {
-public:
- PDBSymbolTypeFunctionArg(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::FunctionArg)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_ID_METHOD(getClassParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
index abd4cf5effa2..dfdf436197c3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeFunctionSig : public PDBSymbol {
-public:
- PDBSymbolTypeFunctionSig(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::FunctionSig)
-
+public:
std::unique_ptr<IPDBEnumSymbols> getArguments() const;
void dump(PDBSymDumper &Dumper) const override;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
index 31cf5363dde1..d716abd640c6 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeManaged : public PDBSymbol {
-public:
- PDBSymbolTypeManaged(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::ManagedType)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getName)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
index 7612ebac31dd..300d6722fc4d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
@@ -19,16 +19,13 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypePointer : public PDBSymbol {
-public:
- PDBSymbolTypePointer(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::PointerType)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
void dumpRight(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(isConstType)
+ FORWARD_SYMBOL_ID_METHOD(getClassParent)
FORWARD_SYMBOL_METHOD(getLength)
FORWARD_SYMBOL_ID_METHOD(getLexicalParent)
FORWARD_SYMBOL_METHOD(isReference)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
index 16c1d1b88c6d..d6e2a36486d5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeTypedef : public PDBSymbol {
-public:
- PDBSymbolTypeTypedef(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Typedef)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(getBuiltinType)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
index e259b6dca3d5..937dd6c87221 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
@@ -23,17 +23,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeUDT : public PDBSymbol {
-public:
- PDBSymbolTypeUDT(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> UDTSymbol);
-
- std::unique_ptr<PDBSymbolTypeUDT> clone() const {
- return getSession().getConcreteSymbolById<PDBSymbolTypeUDT>(
- getSymIndexId());
- }
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::UDT)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_ID_METHOD(getClassParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
index e270c2b7eb95..6efce4bbd686 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeVTable : public PDBSymbol {
-public:
- PDBSymbolTypeVTable(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> VtblSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::VTable)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_ID_METHOD(getClassParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
index 8acaabea5bb8..8949052b0c0f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
@@ -19,12 +19,8 @@ class raw_ostream;
namespace pdb {
class PDBSymbolTypeVTableShape : public PDBSymbol {
-public:
- PDBSymbolTypeVTableShape(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> VtblShapeSymbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::VTableShape)
-
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_METHOD(isConstType)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
index de43e47badbd..e935ac6ce0dc 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
@@ -18,16 +18,11 @@ class raw_ostream;
namespace pdb {
class PDBSymbolUnknown : public PDBSymbol {
-public:
- PDBSymbolUnknown(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> UnknownSymbol);
+ DECLARE_PDB_SYMBOL_CUSTOM_TYPE(S->getSymTag() == PDB_SymType::None ||
+ S->getSymTag() >= PDB_SymType::Max)
+public:
void dump(PDBSymDumper &Dumper) const override;
-
- static bool classof(const PDBSymbol *S) {
- return (S->getSymTag() == PDB_SymType::None ||
- S->getSymTag() >= PDB_SymType::Max);
- }
};
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
index 70fbd5b84c34..4e8c99fc8d89 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
@@ -19,12 +19,9 @@ class raw_ostream;
namespace pdb {
class PDBSymbolUsingNamespace : public PDBSymbol {
-public:
- PDBSymbolUsingNamespace(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol);
-
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::UsingNamespace)
+public:
void dump(PDBSymDumper &Dumper) const override;
FORWARD_SYMBOL_ID_METHOD(getLexicalParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
index da6cb1d26771..917f3ed73910 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -12,6 +12,7 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBFrameData.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include <cctype>
#include <cstddef>
@@ -22,6 +23,8 @@
namespace llvm {
namespace pdb {
+typedef uint32_t SymIndexId;
+
class IPDBDataStream;
class IPDBInjectedSource;
class IPDBLineNumber;
@@ -69,6 +72,7 @@ using IPDBEnumLineNumbers = IPDBEnumChildren<IPDBLineNumber>;
using IPDBEnumTables = IPDBEnumChildren<IPDBTable>;
using IPDBEnumInjectedSources = IPDBEnumChildren<IPDBInjectedSource>;
using IPDBEnumSectionContribs = IPDBEnumChildren<IPDBSectionContrib>;
+using IPDBEnumFrameData = IPDBEnumChildren<IPDBFrameData>;
/// Specifies which PDB reader implementation is to be used. Only a value
/// of PDB_ReaderType::DIA is currently supported, but Native is in the works.
@@ -208,6 +212,18 @@ enum class PDB_SymType {
CustomType,
ManagedType,
Dimension,
+ CallSite,
+ InlineSite,
+ BaseInterface,
+ VectorType,
+ MatrixType,
+ HLSLType,
+ Caller,
+ Callee,
+ Export,
+ HeapAllocationSite,
+ CoffGroup,
+ Inlinee,
Max
};
@@ -334,6 +350,36 @@ enum PDB_VariantType {
struct Variant {
Variant() = default;
+ explicit Variant(bool V) : Type(PDB_VariantType::Bool) { Value.Bool = V; }
+ explicit Variant(int8_t V) : Type(PDB_VariantType::Int8) { Value.Int8 = V; }
+ explicit Variant(int16_t V) : Type(PDB_VariantType::Int16) {
+ Value.Int16 = V;
+ }
+ explicit Variant(int32_t V) : Type(PDB_VariantType::Int32) {
+ Value.Int32 = V;
+ }
+ explicit Variant(int64_t V) : Type(PDB_VariantType::Int64) {
+ Value.Int64 = V;
+ }
+ explicit Variant(float V) : Type(PDB_VariantType::Single) {
+ Value.Single = V;
+ }
+ explicit Variant(double V) : Type(PDB_VariantType::Double) {
+ Value.Double = V;
+ }
+ explicit Variant(uint8_t V) : Type(PDB_VariantType::UInt8) {
+ Value.UInt8 = V;
+ }
+ explicit Variant(uint16_t V) : Type(PDB_VariantType::UInt16) {
+ Value.UInt16 = V;
+ }
+ explicit Variant(uint32_t V) : Type(PDB_VariantType::UInt32) {
+ Value.UInt32 = V;
+ }
+ explicit Variant(uint64_t V) : Type(PDB_VariantType::UInt64) {
+ Value.UInt64 = V;
+ }
+
Variant(const Variant &Other) {
*this = Other;
}
diff --git a/contrib/llvm/lib/Demangle/Compiler.h b/contrib/llvm/include/llvm/Demangle/Compiler.h
index 248d6e3a7faa..248d6e3a7faa 100644
--- a/contrib/llvm/lib/Demangle/Compiler.h
+++ b/contrib/llvm/include/llvm/Demangle/Compiler.h
diff --git a/contrib/llvm/include/llvm/Demangle/Demangle.h b/contrib/llvm/include/llvm/Demangle/Demangle.h
index df7753f23b87..4c9dc9569e18 100644
--- a/contrib/llvm/include/llvm/Demangle/Demangle.h
+++ b/contrib/llvm/include/llvm/Demangle/Demangle.h
@@ -7,6 +7,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_DEMANGLE_DEMANGLE_H
+#define LLVM_DEMANGLE_DEMANGLE_H
+
#include <cstddef>
namespace llvm {
@@ -27,8 +30,11 @@ enum : int {
char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
int *status);
+
+
+enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0 };
char *microsoftDemangle(const char *mangled_name, char *buf, size_t *n,
- int *status);
+ int *status, MSDemangleFlags Flags = MSDF_None);
/// "Partial" demangler. This supports demangling a string into an AST
/// (typically an intermediate stage in itaniumDemangle) and querying certain
@@ -86,3 +92,5 @@ private:
void *Context;
};
} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Demangle/ItaniumDemangle.h b/contrib/llvm/include/llvm/Demangle/ItaniumDemangle.h
new file mode 100644
index 000000000000..0b9187f30a5a
--- /dev/null
+++ b/contrib/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -0,0 +1,5184 @@
+//===------------------------- ItaniumDemangle.h ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+
+// FIXME: (possibly) incomplete list of features that clang mangles that this
+// file does not yet support:
+// - C++ modules TS
+
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/Utility.h"
+
+#include <cassert>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <numeric>
+#include <utility>
+
+#define FOR_EACH_NODE_KIND(X) \
+ X(NodeArrayNode) \
+ X(DotSuffix) \
+ X(VendorExtQualType) \
+ X(QualType) \
+ X(ConversionOperatorType) \
+ X(PostfixQualifiedType) \
+ X(ElaboratedTypeSpefType) \
+ X(NameType) \
+ X(AbiTagAttr) \
+ X(EnableIfAttr) \
+ X(ObjCProtoName) \
+ X(PointerType) \
+ X(ReferenceType) \
+ X(PointerToMemberType) \
+ X(ArrayType) \
+ X(FunctionType) \
+ X(NoexceptSpec) \
+ X(DynamicExceptionSpec) \
+ X(FunctionEncoding) \
+ X(LiteralOperator) \
+ X(SpecialName) \
+ X(CtorVtableSpecialName) \
+ X(QualifiedName) \
+ X(NestedName) \
+ X(LocalName) \
+ X(VectorType) \
+ X(PixelVectorType) \
+ X(ParameterPack) \
+ X(TemplateArgumentPack) \
+ X(ParameterPackExpansion) \
+ X(TemplateArgs) \
+ X(ForwardTemplateReference) \
+ X(NameWithTemplateArgs) \
+ X(GlobalQualifiedName) \
+ X(StdQualifiedName) \
+ X(ExpandedSpecialSubstitution) \
+ X(SpecialSubstitution) \
+ X(CtorDtorName) \
+ X(DtorName) \
+ X(UnnamedTypeName) \
+ X(ClosureTypeName) \
+ X(StructuredBindingName) \
+ X(BinaryExpr) \
+ X(ArraySubscriptExpr) \
+ X(PostfixExpr) \
+ X(ConditionalExpr) \
+ X(MemberExpr) \
+ X(EnclosingExpr) \
+ X(CastExpr) \
+ X(SizeofParamPackExpr) \
+ X(CallExpr) \
+ X(NewExpr) \
+ X(DeleteExpr) \
+ X(PrefixExpr) \
+ X(FunctionParam) \
+ X(ConversionExpr) \
+ X(InitListExpr) \
+ X(FoldExpr) \
+ X(ThrowExpr) \
+ X(BoolExpr) \
+ X(IntegerCastExpr) \
+ X(IntegerLiteral) \
+ X(FloatLiteral) \
+ X(DoubleLiteral) \
+ X(LongDoubleLiteral) \
+ X(BracedExpr) \
+ X(BracedRangeExpr)
+
+namespace llvm {
+namespace itanium_demangle {
+// Base class of all AST nodes. The AST is built by the parser, then is
+// traversed by the printLeft/Right functions to produce a demangled string.
+class Node {
+public:
+ enum Kind : unsigned char {
+#define ENUMERATOR(NodeKind) K ## NodeKind,
+ FOR_EACH_NODE_KIND(ENUMERATOR)
+#undef ENUMERATOR
+ };
+
+ /// Three-way bool to track a cached value. Unknown is possible if this node
+ /// has an unexpanded parameter pack below it that may affect this cache.
+ enum class Cache : unsigned char { Yes, No, Unknown, };
+
+private:
+ Kind K;
+
+ // FIXME: Make these protected.
+public:
+ /// Tracks if this node has a component on its right side, in which case we
+ /// need to call printRight.
+ Cache RHSComponentCache;
+
+ /// Track if this node is a (possibly qualified) array type. This can affect
+ /// how we format the output string.
+ Cache ArrayCache;
+
+ /// Track if this node is a (possibly qualified) function type. This can
+ /// affect how we format the output string.
+ Cache FunctionCache;
+
+public:
+ Node(Kind K_, Cache RHSComponentCache_ = Cache::No,
+ Cache ArrayCache_ = Cache::No, Cache FunctionCache_ = Cache::No)
+ : K(K_), RHSComponentCache(RHSComponentCache_), ArrayCache(ArrayCache_),
+ FunctionCache(FunctionCache_) {}
+
+ /// Visit the most-derived object corresponding to this object.
+ template<typename Fn> void visit(Fn F) const;
+
+ // The following function is provided by all derived classes:
+ //
+ // Call F with arguments that, when passed to the constructor of this node,
+ // would construct an equivalent node.
+ //template<typename Fn> void match(Fn F) const;
+
+ bool hasRHSComponent(OutputStream &S) const {
+ if (RHSComponentCache != Cache::Unknown)
+ return RHSComponentCache == Cache::Yes;
+ return hasRHSComponentSlow(S);
+ }
+
+ bool hasArray(OutputStream &S) const {
+ if (ArrayCache != Cache::Unknown)
+ return ArrayCache == Cache::Yes;
+ return hasArraySlow(S);
+ }
+
+ bool hasFunction(OutputStream &S) const {
+ if (FunctionCache != Cache::Unknown)
+ return FunctionCache == Cache::Yes;
+ return hasFunctionSlow(S);
+ }
+
+ Kind getKind() const { return K; }
+
+ virtual bool hasRHSComponentSlow(OutputStream &) const { return false; }
+ virtual bool hasArraySlow(OutputStream &) const { return false; }
+ virtual bool hasFunctionSlow(OutputStream &) const { return false; }
+
+ // Dig through "glue" nodes like ParameterPack and ForwardTemplateReference to
+ // get at a node that actually represents some concrete syntax.
+ virtual const Node *getSyntaxNode(OutputStream &) const {
+ return this;
+ }
+
+ void print(OutputStream &S) const {
+ printLeft(S);
+ if (RHSComponentCache != Cache::No)
+ printRight(S);
+ }
+
+ // Print the "left" side of this Node into OutputStream.
+ virtual void printLeft(OutputStream &) const = 0;
+
+ // Print the "right". This distinction is necessary to represent C++ types
+ // that appear on the RHS of their subtype, such as arrays or functions.
+ // Since most types don't have such a component, provide a default
+ // implementation.
+ virtual void printRight(OutputStream &) const {}
+
+ virtual StringView getBaseName() const { return StringView(); }
+
+ // Silence compiler warnings, this dtor will never be called.
+ virtual ~Node() = default;
+
+#ifndef NDEBUG
+ LLVM_DUMP_METHOD void dump() const;
+#endif
+};
+
+class NodeArray {
+ Node **Elements;
+ size_t NumElements;
+
+public:
+ NodeArray() : Elements(nullptr), NumElements(0) {}
+ NodeArray(Node **Elements_, size_t NumElements_)
+ : Elements(Elements_), NumElements(NumElements_) {}
+
+ bool empty() const { return NumElements == 0; }
+ size_t size() const { return NumElements; }
+
+ Node **begin() const { return Elements; }
+ Node **end() const { return Elements + NumElements; }
+
+ Node *operator[](size_t Idx) const { return Elements[Idx]; }
+
+ void printWithComma(OutputStream &S) const {
+ bool FirstElement = true;
+ for (size_t Idx = 0; Idx != NumElements; ++Idx) {
+ size_t BeforeComma = S.getCurrentPosition();
+ if (!FirstElement)
+ S += ", ";
+ size_t AfterComma = S.getCurrentPosition();
+ Elements[Idx]->print(S);
+
+ // Elements[Idx] is an empty parameter pack expansion, we should erase the
+ // comma we just printed.
+ if (AfterComma == S.getCurrentPosition()) {
+ S.setCurrentPosition(BeforeComma);
+ continue;
+ }
+
+ FirstElement = false;
+ }
+ }
+};
+
+struct NodeArrayNode : Node {
+ NodeArray Array;
+ NodeArrayNode(NodeArray Array_) : Node(KNodeArrayNode), Array(Array_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Array); }
+
+ void printLeft(OutputStream &S) const override {
+ Array.printWithComma(S);
+ }
+};
+
+class DotSuffix final : public Node {
+ const Node *Prefix;
+ const StringView Suffix;
+
+public:
+ DotSuffix(const Node *Prefix_, StringView Suffix_)
+ : Node(KDotSuffix), Prefix(Prefix_), Suffix(Suffix_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Prefix, Suffix); }
+
+ void printLeft(OutputStream &s) const override {
+ Prefix->print(s);
+ s += " (";
+ s += Suffix;
+ s += ")";
+ }
+};
+
+class VendorExtQualType final : public Node {
+ const Node *Ty;
+ StringView Ext;
+
+public:
+ VendorExtQualType(const Node *Ty_, StringView Ext_)
+ : Node(KVendorExtQualType), Ty(Ty_), Ext(Ext_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Ty, Ext); }
+
+ void printLeft(OutputStream &S) const override {
+ Ty->print(S);
+ S += " ";
+ S += Ext;
+ }
+};
+
+enum FunctionRefQual : unsigned char {
+ FrefQualNone,
+ FrefQualLValue,
+ FrefQualRValue,
+};
+
+enum Qualifiers {
+ QualNone = 0,
+ QualConst = 0x1,
+ QualVolatile = 0x2,
+ QualRestrict = 0x4,
+};
+
+inline Qualifiers operator|=(Qualifiers &Q1, Qualifiers Q2) {
+ return Q1 = static_cast<Qualifiers>(Q1 | Q2);
+}
+
+class QualType : public Node {
+protected:
+ const Qualifiers Quals;
+ const Node *Child;
+
+ void printQuals(OutputStream &S) const {
+ if (Quals & QualConst)
+ S += " const";
+ if (Quals & QualVolatile)
+ S += " volatile";
+ if (Quals & QualRestrict)
+ S += " restrict";
+ }
+
+public:
+ QualType(const Node *Child_, Qualifiers Quals_)
+ : Node(KQualType, Child_->RHSComponentCache,
+ Child_->ArrayCache, Child_->FunctionCache),
+ Quals(Quals_), Child(Child_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Child, Quals); }
+
+ bool hasRHSComponentSlow(OutputStream &S) const override {
+ return Child->hasRHSComponent(S);
+ }
+ bool hasArraySlow(OutputStream &S) const override {
+ return Child->hasArray(S);
+ }
+ bool hasFunctionSlow(OutputStream &S) const override {
+ return Child->hasFunction(S);
+ }
+
+ void printLeft(OutputStream &S) const override {
+ Child->printLeft(S);
+ printQuals(S);
+ }
+
+ void printRight(OutputStream &S) const override { Child->printRight(S); }
+};
+
+class ConversionOperatorType final : public Node {
+ const Node *Ty;
+
+public:
+ ConversionOperatorType(const Node *Ty_)
+ : Node(KConversionOperatorType), Ty(Ty_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Ty); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "operator ";
+ Ty->print(S);
+ }
+};
+
+class PostfixQualifiedType final : public Node {
+ const Node *Ty;
+ const StringView Postfix;
+
+public:
+ PostfixQualifiedType(Node *Ty_, StringView Postfix_)
+ : Node(KPostfixQualifiedType), Ty(Ty_), Postfix(Postfix_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Ty, Postfix); }
+
+ void printLeft(OutputStream &s) const override {
+ Ty->printLeft(s);
+ s += Postfix;
+ }
+};
+
+class NameType final : public Node {
+ const StringView Name;
+
+public:
+ NameType(StringView Name_) : Node(KNameType), Name(Name_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Name); }
+
+ StringView getName() const { return Name; }
+ StringView getBaseName() const override { return Name; }
+
+ void printLeft(OutputStream &s) const override { s += Name; }
+};
+
+class ElaboratedTypeSpefType : public Node {
+ StringView Kind;
+ Node *Child;
+public:
+ ElaboratedTypeSpefType(StringView Kind_, Node *Child_)
+ : Node(KElaboratedTypeSpefType), Kind(Kind_), Child(Child_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Kind, Child); }
+
+ void printLeft(OutputStream &S) const override {
+ S += Kind;
+ S += ' ';
+ Child->print(S);
+ }
+};
+
+struct AbiTagAttr : Node {
+ Node *Base;
+ StringView Tag;
+
+ AbiTagAttr(Node* Base_, StringView Tag_)
+ : Node(KAbiTagAttr, Base_->RHSComponentCache,
+ Base_->ArrayCache, Base_->FunctionCache),
+ Base(Base_), Tag(Tag_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Base, Tag); }
+
+ void printLeft(OutputStream &S) const override {
+ Base->printLeft(S);
+ S += "[abi:";
+ S += Tag;
+ S += "]";
+ }
+};
+
+class EnableIfAttr : public Node {
+ NodeArray Conditions;
+public:
+ EnableIfAttr(NodeArray Conditions_)
+ : Node(KEnableIfAttr), Conditions(Conditions_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Conditions); }
+
+ void printLeft(OutputStream &S) const override {
+ S += " [enable_if:";
+ Conditions.printWithComma(S);
+ S += ']';
+ }
+};
+
+class ObjCProtoName : public Node {
+ const Node *Ty;
+ StringView Protocol;
+
+ friend class PointerType;
+
+public:
+ ObjCProtoName(const Node *Ty_, StringView Protocol_)
+ : Node(KObjCProtoName), Ty(Ty_), Protocol(Protocol_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Ty, Protocol); }
+
+ bool isObjCObject() const {
+ return Ty->getKind() == KNameType &&
+ static_cast<const NameType *>(Ty)->getName() == "objc_object";
+ }
+
+ void printLeft(OutputStream &S) const override {
+ Ty->print(S);
+ S += "<";
+ S += Protocol;
+ S += ">";
+ }
+};
+
+class PointerType final : public Node {
+ const Node *Pointee;
+
+public:
+ PointerType(const Node *Pointee_)
+ : Node(KPointerType, Pointee_->RHSComponentCache),
+ Pointee(Pointee_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Pointee); }
+
+ bool hasRHSComponentSlow(OutputStream &S) const override {
+ return Pointee->hasRHSComponent(S);
+ }
+
+ void printLeft(OutputStream &s) const override {
+ // We rewrite objc_object<SomeProtocol>* into id<SomeProtocol>.
+ if (Pointee->getKind() != KObjCProtoName ||
+ !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
+ Pointee->printLeft(s);
+ if (Pointee->hasArray(s))
+ s += " ";
+ if (Pointee->hasArray(s) || Pointee->hasFunction(s))
+ s += "(";
+ s += "*";
+ } else {
+ const auto *objcProto = static_cast<const ObjCProtoName *>(Pointee);
+ s += "id<";
+ s += objcProto->Protocol;
+ s += ">";
+ }
+ }
+
+ void printRight(OutputStream &s) const override {
+ if (Pointee->getKind() != KObjCProtoName ||
+ !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
+ if (Pointee->hasArray(s) || Pointee->hasFunction(s))
+ s += ")";
+ Pointee->printRight(s);
+ }
+ }
+};
+
+enum class ReferenceKind {
+ LValue,
+ RValue,
+};
+
+// Represents either a LValue or an RValue reference type.
+class ReferenceType : public Node {
+ const Node *Pointee;
+ ReferenceKind RK;
+
+ mutable bool Printing = false;
+
+ // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
+ // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
+ // other combination collapses to a lvalue ref.
+ std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
+ auto SoFar = std::make_pair(RK, Pointee);
+ for (;;) {
+ const Node *SN = SoFar.second->getSyntaxNode(S);
+ if (SN->getKind() != KReferenceType)
+ break;
+ auto *RT = static_cast<const ReferenceType *>(SN);
+ SoFar.second = RT->Pointee;
+ SoFar.first = std::min(SoFar.first, RT->RK);
+ }
+ return SoFar;
+ }
+
+public:
+ ReferenceType(const Node *Pointee_, ReferenceKind RK_)
+ : Node(KReferenceType, Pointee_->RHSComponentCache),
+ Pointee(Pointee_), RK(RK_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Pointee, RK); }
+
+ bool hasRHSComponentSlow(OutputStream &S) const override {
+ return Pointee->hasRHSComponent(S);
+ }
+
+ void printLeft(OutputStream &s) const override {
+ if (Printing)
+ return;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
+ std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
+ Collapsed.second->printLeft(s);
+ if (Collapsed.second->hasArray(s))
+ s += " ";
+ if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
+ s += "(";
+
+ s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
+ }
+ void printRight(OutputStream &s) const override {
+ if (Printing)
+ return;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
+ std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
+ if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
+ s += ")";
+ Collapsed.second->printRight(s);
+ }
+};
+
+class PointerToMemberType final : public Node {
+ const Node *ClassType;
+ const Node *MemberType;
+
+public:
+ PointerToMemberType(const Node *ClassType_, const Node *MemberType_)
+ : Node(KPointerToMemberType, MemberType_->RHSComponentCache),
+ ClassType(ClassType_), MemberType(MemberType_) {}
+
+ template<typename Fn> void match(Fn F) const { F(ClassType, MemberType); }
+
+ bool hasRHSComponentSlow(OutputStream &S) const override {
+ return MemberType->hasRHSComponent(S);
+ }
+
+ void printLeft(OutputStream &s) const override {
+ MemberType->printLeft(s);
+ if (MemberType->hasArray(s) || MemberType->hasFunction(s))
+ s += "(";
+ else
+ s += " ";
+ ClassType->print(s);
+ s += "::*";
+ }
+
+ void printRight(OutputStream &s) const override {
+ if (MemberType->hasArray(s) || MemberType->hasFunction(s))
+ s += ")";
+ MemberType->printRight(s);
+ }
+};
+
+class NodeOrString {
+ const void *First;
+ const void *Second;
+
+public:
+ /* implicit */ NodeOrString(StringView Str) {
+ const char *FirstChar = Str.begin();
+ const char *SecondChar = Str.end();
+ if (SecondChar == nullptr) {
+ assert(FirstChar == SecondChar);
+ ++FirstChar, ++SecondChar;
+ }
+ First = static_cast<const void *>(FirstChar);
+ Second = static_cast<const void *>(SecondChar);
+ }
+
+ /* implicit */ NodeOrString(Node *N)
+ : First(static_cast<const void *>(N)), Second(nullptr) {}
+ NodeOrString() : First(nullptr), Second(nullptr) {}
+
+ bool isString() const { return Second && First; }
+ bool isNode() const { return First && !Second; }
+ bool isEmpty() const { return !First && !Second; }
+
+ StringView asString() const {
+ assert(isString());
+ return StringView(static_cast<const char *>(First),
+ static_cast<const char *>(Second));
+ }
+
+ const Node *asNode() const {
+ assert(isNode());
+ return static_cast<const Node *>(First);
+ }
+};
+
+class ArrayType final : public Node {
+ const Node *Base;
+ NodeOrString Dimension;
+
+public:
+ ArrayType(const Node *Base_, NodeOrString Dimension_)
+ : Node(KArrayType,
+ /*RHSComponentCache=*/Cache::Yes,
+ /*ArrayCache=*/Cache::Yes),
+ Base(Base_), Dimension(Dimension_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Base, Dimension); }
+
+ bool hasRHSComponentSlow(OutputStream &) const override { return true; }
+ bool hasArraySlow(OutputStream &) const override { return true; }
+
+ void printLeft(OutputStream &S) const override { Base->printLeft(S); }
+
+ void printRight(OutputStream &S) const override {
+ if (S.back() != ']')
+ S += " ";
+ S += "[";
+ if (Dimension.isString())
+ S += Dimension.asString();
+ else if (Dimension.isNode())
+ Dimension.asNode()->print(S);
+ S += "]";
+ Base->printRight(S);
+ }
+};
+
+class FunctionType final : public Node {
+ const Node *Ret;
+ NodeArray Params;
+ Qualifiers CVQuals;
+ FunctionRefQual RefQual;
+ const Node *ExceptionSpec;
+
+public:
+ FunctionType(const Node *Ret_, NodeArray Params_, Qualifiers CVQuals_,
+ FunctionRefQual RefQual_, const Node *ExceptionSpec_)
+ : Node(KFunctionType,
+ /*RHSComponentCache=*/Cache::Yes, /*ArrayCache=*/Cache::No,
+ /*FunctionCache=*/Cache::Yes),
+ Ret(Ret_), Params(Params_), CVQuals(CVQuals_), RefQual(RefQual_),
+ ExceptionSpec(ExceptionSpec_) {}
+
+ template<typename Fn> void match(Fn F) const {
+ F(Ret, Params, CVQuals, RefQual, ExceptionSpec);
+ }
+
+ bool hasRHSComponentSlow(OutputStream &) const override { return true; }
+ bool hasFunctionSlow(OutputStream &) const override { return true; }
+
+ // Handle C++'s ... quirky decl grammar by using the left & right
+ // distinction. Consider:
+ // int (*f(float))(char) {}
+ // f is a function that takes a float and returns a pointer to a function
+ // that takes a char and returns an int. If we're trying to print f, start
+ // by printing out the return types's left, then print our parameters, then
+ // finally print right of the return type.
+ void printLeft(OutputStream &S) const override {
+ Ret->printLeft(S);
+ S += " ";
+ }
+
+ void printRight(OutputStream &S) const override {
+ S += "(";
+ Params.printWithComma(S);
+ S += ")";
+ Ret->printRight(S);
+
+ if (CVQuals & QualConst)
+ S += " const";
+ if (CVQuals & QualVolatile)
+ S += " volatile";
+ if (CVQuals & QualRestrict)
+ S += " restrict";
+
+ if (RefQual == FrefQualLValue)
+ S += " &";
+ else if (RefQual == FrefQualRValue)
+ S += " &&";
+
+ if (ExceptionSpec != nullptr) {
+ S += ' ';
+ ExceptionSpec->print(S);
+ }
+ }
+};
+
+class NoexceptSpec : public Node {
+ const Node *E;
+public:
+ NoexceptSpec(const Node *E_) : Node(KNoexceptSpec), E(E_) {}
+
+ template<typename Fn> void match(Fn F) const { F(E); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "noexcept(";
+ E->print(S);
+ S += ")";
+ }
+};
+
+class DynamicExceptionSpec : public Node {
+ NodeArray Types;
+public:
+ DynamicExceptionSpec(NodeArray Types_)
+ : Node(KDynamicExceptionSpec), Types(Types_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Types); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "throw(";
+ Types.printWithComma(S);
+ S += ')';
+ }
+};
+
+class FunctionEncoding final : public Node {
+ const Node *Ret;
+ const Node *Name;
+ NodeArray Params;
+ const Node *Attrs;
+ Qualifiers CVQuals;
+ FunctionRefQual RefQual;
+
+public:
+ FunctionEncoding(const Node *Ret_, const Node *Name_, NodeArray Params_,
+ const Node *Attrs_, Qualifiers CVQuals_,
+ FunctionRefQual RefQual_)
+ : Node(KFunctionEncoding,
+ /*RHSComponentCache=*/Cache::Yes, /*ArrayCache=*/Cache::No,
+ /*FunctionCache=*/Cache::Yes),
+ Ret(Ret_), Name(Name_), Params(Params_), Attrs(Attrs_),
+ CVQuals(CVQuals_), RefQual(RefQual_) {}
+
+ template<typename Fn> void match(Fn F) const {
+ F(Ret, Name, Params, Attrs, CVQuals, RefQual);
+ }
+
+ Qualifiers getCVQuals() const { return CVQuals; }
+ FunctionRefQual getRefQual() const { return RefQual; }
+ NodeArray getParams() const { return Params; }
+ const Node *getReturnType() const { return Ret; }
+
+ bool hasRHSComponentSlow(OutputStream &) const override { return true; }
+ bool hasFunctionSlow(OutputStream &) const override { return true; }
+
+ const Node *getName() const { return Name; }
+
+ void printLeft(OutputStream &S) const override {
+ if (Ret) {
+ Ret->printLeft(S);
+ if (!Ret->hasRHSComponent(S))
+ S += " ";
+ }
+ Name->print(S);
+ }
+
+ void printRight(OutputStream &S) const override {
+ S += "(";
+ Params.printWithComma(S);
+ S += ")";
+ if (Ret)
+ Ret->printRight(S);
+
+ if (CVQuals & QualConst)
+ S += " const";
+ if (CVQuals & QualVolatile)
+ S += " volatile";
+ if (CVQuals & QualRestrict)
+ S += " restrict";
+
+ if (RefQual == FrefQualLValue)
+ S += " &";
+ else if (RefQual == FrefQualRValue)
+ S += " &&";
+
+ if (Attrs != nullptr)
+ Attrs->print(S);
+ }
+};
+
+class LiteralOperator : public Node {
+ const Node *OpName;
+
+public:
+ LiteralOperator(const Node *OpName_)
+ : Node(KLiteralOperator), OpName(OpName_) {}
+
+ template<typename Fn> void match(Fn F) const { F(OpName); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "operator\"\" ";
+ OpName->print(S);
+ }
+};
+
+class SpecialName final : public Node {
+ const StringView Special;
+ const Node *Child;
+
+public:
+ SpecialName(StringView Special_, const Node *Child_)
+ : Node(KSpecialName), Special(Special_), Child(Child_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Special, Child); }
+
+ void printLeft(OutputStream &S) const override {
+ S += Special;
+ Child->print(S);
+ }
+};
+
+class CtorVtableSpecialName final : public Node {
+ const Node *FirstType;
+ const Node *SecondType;
+
+public:
+ CtorVtableSpecialName(const Node *FirstType_, const Node *SecondType_)
+ : Node(KCtorVtableSpecialName),
+ FirstType(FirstType_), SecondType(SecondType_) {}
+
+ template<typename Fn> void match(Fn F) const { F(FirstType, SecondType); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "construction vtable for ";
+ FirstType->print(S);
+ S += "-in-";
+ SecondType->print(S);
+ }
+};
+
+struct NestedName : Node {
+ Node *Qual;
+ Node *Name;
+
+ NestedName(Node *Qual_, Node *Name_)
+ : Node(KNestedName), Qual(Qual_), Name(Name_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Qual, Name); }
+
+ StringView getBaseName() const override { return Name->getBaseName(); }
+
+ void printLeft(OutputStream &S) const override {
+ Qual->print(S);
+ S += "::";
+ Name->print(S);
+ }
+};
+
+struct LocalName : Node {
+ Node *Encoding;
+ Node *Entity;
+
+ LocalName(Node *Encoding_, Node *Entity_)
+ : Node(KLocalName), Encoding(Encoding_), Entity(Entity_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Encoding, Entity); }
+
+ void printLeft(OutputStream &S) const override {
+ Encoding->print(S);
+ S += "::";
+ Entity->print(S);
+ }
+};
+
+class QualifiedName final : public Node {
+ // qualifier::name
+ const Node *Qualifier;
+ const Node *Name;
+
+public:
+ QualifiedName(const Node *Qualifier_, const Node *Name_)
+ : Node(KQualifiedName), Qualifier(Qualifier_), Name(Name_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Qualifier, Name); }
+
+ StringView getBaseName() const override { return Name->getBaseName(); }
+
+ void printLeft(OutputStream &S) const override {
+ Qualifier->print(S);
+ S += "::";
+ Name->print(S);
+ }
+};
+
+class VectorType final : public Node {
+ const Node *BaseType;
+ const NodeOrString Dimension;
+
+public:
+ VectorType(const Node *BaseType_, NodeOrString Dimension_)
+ : Node(KVectorType), BaseType(BaseType_),
+ Dimension(Dimension_) {}
+
+ template<typename Fn> void match(Fn F) const { F(BaseType, Dimension); }
+
+ void printLeft(OutputStream &S) const override {
+ BaseType->print(S);
+ S += " vector[";
+ if (Dimension.isNode())
+ Dimension.asNode()->print(S);
+ else if (Dimension.isString())
+ S += Dimension.asString();
+ S += "]";
+ }
+};
+
+class PixelVectorType final : public Node {
+ const NodeOrString Dimension;
+
+public:
+ PixelVectorType(NodeOrString Dimension_)
+ : Node(KPixelVectorType), Dimension(Dimension_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Dimension); }
+
+ void printLeft(OutputStream &S) const override {
+ // FIXME: This should demangle as "vector pixel".
+ S += "pixel vector[";
+ S += Dimension.asString();
+ S += "]";
+ }
+};
+
+/// An unexpanded parameter pack (either in the expression or type context). If
+/// this AST is correct, this node will have a ParameterPackExpansion node above
+/// it.
+///
+/// This node is created when some <template-args> are found that apply to an
+/// <encoding>, and is stored in the TemplateParams table. In order for this to
+/// appear in the final AST, it has to referenced via a <template-param> (ie,
+/// T_).
+class ParameterPack final : public Node {
+ NodeArray Data;
+
+ // Setup OutputStream for a pack expansion unless we're already expanding one.
+ void initializePackExpansion(OutputStream &S) const {
+ if (S.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
+ S.CurrentPackMax = static_cast<unsigned>(Data.size());
+ S.CurrentPackIndex = 0;
+ }
+ }
+
+public:
+ ParameterPack(NodeArray Data_) : Node(KParameterPack), Data(Data_) {
+ ArrayCache = FunctionCache = RHSComponentCache = Cache::Unknown;
+ if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
+ return P->ArrayCache == Cache::No;
+ }))
+ ArrayCache = Cache::No;
+ if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
+ return P->FunctionCache == Cache::No;
+ }))
+ FunctionCache = Cache::No;
+ if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
+ return P->RHSComponentCache == Cache::No;
+ }))
+ RHSComponentCache = Cache::No;
+ }
+
+ template<typename Fn> void match(Fn F) const { F(Data); }
+
+ bool hasRHSComponentSlow(OutputStream &S) const override {
+ initializePackExpansion(S);
+ size_t Idx = S.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasRHSComponent(S);
+ }
+ bool hasArraySlow(OutputStream &S) const override {
+ initializePackExpansion(S);
+ size_t Idx = S.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasArray(S);
+ }
+ bool hasFunctionSlow(OutputStream &S) const override {
+ initializePackExpansion(S);
+ size_t Idx = S.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasFunction(S);
+ }
+ const Node *getSyntaxNode(OutputStream &S) const override {
+ initializePackExpansion(S);
+ size_t Idx = S.CurrentPackIndex;
+ return Idx < Data.size() ? Data[Idx]->getSyntaxNode(S) : this;
+ }
+
+ void printLeft(OutputStream &S) const override {
+ initializePackExpansion(S);
+ size_t Idx = S.CurrentPackIndex;
+ if (Idx < Data.size())
+ Data[Idx]->printLeft(S);
+ }
+ void printRight(OutputStream &S) const override {
+ initializePackExpansion(S);
+ size_t Idx = S.CurrentPackIndex;
+ if (Idx < Data.size())
+ Data[Idx]->printRight(S);
+ }
+};
+
+/// A variadic template argument. This node represents an occurrence of
+/// J<something>E in some <template-args>. It isn't itself unexpanded, unless
+/// one of it's Elements is. The parser inserts a ParameterPack into the
+/// TemplateParams table if the <template-args> this pack belongs to apply to an
+/// <encoding>.
+class TemplateArgumentPack final : public Node {
+ NodeArray Elements;
+public:
+ TemplateArgumentPack(NodeArray Elements_)
+ : Node(KTemplateArgumentPack), Elements(Elements_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Elements); }
+
+ NodeArray getElements() const { return Elements; }
+
+ void printLeft(OutputStream &S) const override {
+ Elements.printWithComma(S);
+ }
+};
+
+/// A pack expansion. Below this node, there are some unexpanded ParameterPacks
+/// which each have Child->ParameterPackSize elements.
+class ParameterPackExpansion final : public Node {
+ const Node *Child;
+
+public:
+ ParameterPackExpansion(const Node *Child_)
+ : Node(KParameterPackExpansion), Child(Child_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Child); }
+
+ const Node *getChild() const { return Child; }
+
+ void printLeft(OutputStream &S) const override {
+ constexpr unsigned Max = std::numeric_limits<unsigned>::max();
+ SwapAndRestore<unsigned> SavePackIdx(S.CurrentPackIndex, Max);
+ SwapAndRestore<unsigned> SavePackMax(S.CurrentPackMax, Max);
+ size_t StreamPos = S.getCurrentPosition();
+
+ // Print the first element in the pack. If Child contains a ParameterPack,
+ // it will set up S.CurrentPackMax and print the first element.
+ Child->print(S);
+
+ // No ParameterPack was found in Child. This can occur if we've found a pack
+ // expansion on a <function-param>.
+ if (S.CurrentPackMax == Max) {
+ S += "...";
+ return;
+ }
+
+ // We found a ParameterPack, but it has no elements. Erase whatever we may
+ // of printed.
+ if (S.CurrentPackMax == 0) {
+ S.setCurrentPosition(StreamPos);
+ return;
+ }
+
+ // Else, iterate through the rest of the elements in the pack.
+ for (unsigned I = 1, E = S.CurrentPackMax; I < E; ++I) {
+ S += ", ";
+ S.CurrentPackIndex = I;
+ Child->print(S);
+ }
+ }
+};
+
+class TemplateArgs final : public Node {
+ NodeArray Params;
+
+public:
+ TemplateArgs(NodeArray Params_) : Node(KTemplateArgs), Params(Params_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Params); }
+
+ NodeArray getParams() { return Params; }
+
+ void printLeft(OutputStream &S) const override {
+ S += "<";
+ Params.printWithComma(S);
+ if (S.back() == '>')
+ S += " ";
+ S += ">";
+ }
+};
+
+/// A forward-reference to a template argument that was not known at the point
+/// where the template parameter name was parsed in a mangling.
+///
+/// This is created when demangling the name of a specialization of a
+/// conversion function template:
+///
+/// \code
+/// struct A {
+/// template<typename T> operator T*();
+/// };
+/// \endcode
+///
+/// When demangling a specialization of the conversion function template, we
+/// encounter the name of the template (including the \c T) before we reach
+/// the template argument list, so we cannot substitute the parameter name
+/// for the corresponding argument while parsing. Instead, we create a
+/// \c ForwardTemplateReference node that is resolved after we parse the
+/// template arguments.
+struct ForwardTemplateReference : Node {
+ size_t Index;
+ Node *Ref = nullptr;
+
+ // If we're currently printing this node. It is possible (though invalid) for
+ // a forward template reference to refer to itself via a substitution. This
+ // creates a cyclic AST, which will stack overflow printing. To fix this, bail
+ // out if more than one print* function is active.
+ mutable bool Printing = false;
+
+ ForwardTemplateReference(size_t Index_)
+ : Node(KForwardTemplateReference, Cache::Unknown, Cache::Unknown,
+ Cache::Unknown),
+ Index(Index_) {}
+
+ // We don't provide a matcher for these, because the value of the node is
+ // not determined by its construction parameters, and it generally needs
+ // special handling.
+ template<typename Fn> void match(Fn F) const = delete;
+
+ bool hasRHSComponentSlow(OutputStream &S) const override {
+ if (Printing)
+ return false;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
+ return Ref->hasRHSComponent(S);
+ }
+ bool hasArraySlow(OutputStream &S) const override {
+ if (Printing)
+ return false;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
+ return Ref->hasArray(S);
+ }
+ bool hasFunctionSlow(OutputStream &S) const override {
+ if (Printing)
+ return false;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
+ return Ref->hasFunction(S);
+ }
+ const Node *getSyntaxNode(OutputStream &S) const override {
+ if (Printing)
+ return this;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
+ return Ref->getSyntaxNode(S);
+ }
+
+ void printLeft(OutputStream &S) const override {
+ if (Printing)
+ return;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
+ Ref->printLeft(S);
+ }
+ void printRight(OutputStream &S) const override {
+ if (Printing)
+ return;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
+ Ref->printRight(S);
+ }
+};
+
+struct NameWithTemplateArgs : Node {
+ // name<template_args>
+ Node *Name;
+ Node *TemplateArgs;
+
+ NameWithTemplateArgs(Node *Name_, Node *TemplateArgs_)
+ : Node(KNameWithTemplateArgs), Name(Name_), TemplateArgs(TemplateArgs_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Name, TemplateArgs); }
+
+ StringView getBaseName() const override { return Name->getBaseName(); }
+
+ void printLeft(OutputStream &S) const override {
+ Name->print(S);
+ TemplateArgs->print(S);
+ }
+};
+
+class GlobalQualifiedName final : public Node {
+ Node *Child;
+
+public:
+ GlobalQualifiedName(Node* Child_)
+ : Node(KGlobalQualifiedName), Child(Child_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Child); }
+
+ StringView getBaseName() const override { return Child->getBaseName(); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "::";
+ Child->print(S);
+ }
+};
+
+struct StdQualifiedName : Node {
+ Node *Child;
+
+ StdQualifiedName(Node *Child_) : Node(KStdQualifiedName), Child(Child_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Child); }
+
+ StringView getBaseName() const override { return Child->getBaseName(); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "std::";
+ Child->print(S);
+ }
+};
+
+enum class SpecialSubKind {
+ allocator,
+ basic_string,
+ string,
+ istream,
+ ostream,
+ iostream,
+};
+
+class ExpandedSpecialSubstitution final : public Node {
+ SpecialSubKind SSK;
+
+public:
+ ExpandedSpecialSubstitution(SpecialSubKind SSK_)
+ : Node(KExpandedSpecialSubstitution), SSK(SSK_) {}
+
+ template<typename Fn> void match(Fn F) const { F(SSK); }
+
+ StringView getBaseName() const override {
+ switch (SSK) {
+ case SpecialSubKind::allocator:
+ return StringView("allocator");
+ case SpecialSubKind::basic_string:
+ return StringView("basic_string");
+ case SpecialSubKind::string:
+ return StringView("basic_string");
+ case SpecialSubKind::istream:
+ return StringView("basic_istream");
+ case SpecialSubKind::ostream:
+ return StringView("basic_ostream");
+ case SpecialSubKind::iostream:
+ return StringView("basic_iostream");
+ }
+ LLVM_BUILTIN_UNREACHABLE;
+ }
+
+ void printLeft(OutputStream &S) const override {
+ switch (SSK) {
+ case SpecialSubKind::allocator:
+ S += "std::allocator";
+ break;
+ case SpecialSubKind::basic_string:
+ S += "std::basic_string";
+ break;
+ case SpecialSubKind::string:
+ S += "std::basic_string<char, std::char_traits<char>, "
+ "std::allocator<char> >";
+ break;
+ case SpecialSubKind::istream:
+ S += "std::basic_istream<char, std::char_traits<char> >";
+ break;
+ case SpecialSubKind::ostream:
+ S += "std::basic_ostream<char, std::char_traits<char> >";
+ break;
+ case SpecialSubKind::iostream:
+ S += "std::basic_iostream<char, std::char_traits<char> >";
+ break;
+ }
+ }
+};
+
+class SpecialSubstitution final : public Node {
+public:
+ SpecialSubKind SSK;
+
+ SpecialSubstitution(SpecialSubKind SSK_)
+ : Node(KSpecialSubstitution), SSK(SSK_) {}
+
+ template<typename Fn> void match(Fn F) const { F(SSK); }
+
+ StringView getBaseName() const override {
+ switch (SSK) {
+ case SpecialSubKind::allocator:
+ return StringView("allocator");
+ case SpecialSubKind::basic_string:
+ return StringView("basic_string");
+ case SpecialSubKind::string:
+ return StringView("string");
+ case SpecialSubKind::istream:
+ return StringView("istream");
+ case SpecialSubKind::ostream:
+ return StringView("ostream");
+ case SpecialSubKind::iostream:
+ return StringView("iostream");
+ }
+ LLVM_BUILTIN_UNREACHABLE;
+ }
+
+ void printLeft(OutputStream &S) const override {
+ switch (SSK) {
+ case SpecialSubKind::allocator:
+ S += "std::allocator";
+ break;
+ case SpecialSubKind::basic_string:
+ S += "std::basic_string";
+ break;
+ case SpecialSubKind::string:
+ S += "std::string";
+ break;
+ case SpecialSubKind::istream:
+ S += "std::istream";
+ break;
+ case SpecialSubKind::ostream:
+ S += "std::ostream";
+ break;
+ case SpecialSubKind::iostream:
+ S += "std::iostream";
+ break;
+ }
+ }
+};
+
+class CtorDtorName final : public Node {
+ const Node *Basename;
+ const bool IsDtor;
+ const int Variant;
+
+public:
+ CtorDtorName(const Node *Basename_, bool IsDtor_, int Variant_)
+ : Node(KCtorDtorName), Basename(Basename_), IsDtor(IsDtor_),
+ Variant(Variant_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Basename, IsDtor, Variant); }
+
+ void printLeft(OutputStream &S) const override {
+ if (IsDtor)
+ S += "~";
+ S += Basename->getBaseName();
+ }
+};
+
+class DtorName : public Node {
+ const Node *Base;
+
+public:
+ DtorName(const Node *Base_) : Node(KDtorName), Base(Base_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Base); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "~";
+ Base->printLeft(S);
+ }
+};
+
+class UnnamedTypeName : public Node {
+ const StringView Count;
+
+public:
+ UnnamedTypeName(StringView Count_) : Node(KUnnamedTypeName), Count(Count_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Count); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "'unnamed";
+ S += Count;
+ S += "\'";
+ }
+};
+
+class ClosureTypeName : public Node {
+ NodeArray Params;
+ StringView Count;
+
+public:
+ ClosureTypeName(NodeArray Params_, StringView Count_)
+ : Node(KClosureTypeName), Params(Params_), Count(Count_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Params, Count); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "\'lambda";
+ S += Count;
+ S += "\'(";
+ Params.printWithComma(S);
+ S += ")";
+ }
+};
+
+class StructuredBindingName : public Node {
+ NodeArray Bindings;
+public:
+ StructuredBindingName(NodeArray Bindings_)
+ : Node(KStructuredBindingName), Bindings(Bindings_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Bindings); }
+
+ void printLeft(OutputStream &S) const override {
+ S += '[';
+ Bindings.printWithComma(S);
+ S += ']';
+ }
+};
+
+// -- Expression Nodes --
+
+class BinaryExpr : public Node {
+ const Node *LHS;
+ const StringView InfixOperator;
+ const Node *RHS;
+
+public:
+ BinaryExpr(const Node *LHS_, StringView InfixOperator_, const Node *RHS_)
+ : Node(KBinaryExpr), LHS(LHS_), InfixOperator(InfixOperator_), RHS(RHS_) {
+ }
+
+ template<typename Fn> void match(Fn F) const { F(LHS, InfixOperator, RHS); }
+
+ void printLeft(OutputStream &S) const override {
+ // might be a template argument expression, then we need to disambiguate
+ // with parens.
+ if (InfixOperator == ">")
+ S += "(";
+
+ S += "(";
+ LHS->print(S);
+ S += ") ";
+ S += InfixOperator;
+ S += " (";
+ RHS->print(S);
+ S += ")";
+
+ if (InfixOperator == ">")
+ S += ")";
+ }
+};
+
+class ArraySubscriptExpr : public Node {
+ const Node *Op1;
+ const Node *Op2;
+
+public:
+ ArraySubscriptExpr(const Node *Op1_, const Node *Op2_)
+ : Node(KArraySubscriptExpr), Op1(Op1_), Op2(Op2_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Op1, Op2); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "(";
+ Op1->print(S);
+ S += ")[";
+ Op2->print(S);
+ S += "]";
+ }
+};
+
+class PostfixExpr : public Node {
+ const Node *Child;
+ const StringView Operator;
+
+public:
+ PostfixExpr(const Node *Child_, StringView Operator_)
+ : Node(KPostfixExpr), Child(Child_), Operator(Operator_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Child, Operator); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "(";
+ Child->print(S);
+ S += ")";
+ S += Operator;
+ }
+};
+
+class ConditionalExpr : public Node {
+ const Node *Cond;
+ const Node *Then;
+ const Node *Else;
+
+public:
+ ConditionalExpr(const Node *Cond_, const Node *Then_, const Node *Else_)
+ : Node(KConditionalExpr), Cond(Cond_), Then(Then_), Else(Else_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Cond, Then, Else); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "(";
+ Cond->print(S);
+ S += ") ? (";
+ Then->print(S);
+ S += ") : (";
+ Else->print(S);
+ S += ")";
+ }
+};
+
+class MemberExpr : public Node {
+ const Node *LHS;
+ const StringView Kind;
+ const Node *RHS;
+
+public:
+ MemberExpr(const Node *LHS_, StringView Kind_, const Node *RHS_)
+ : Node(KMemberExpr), LHS(LHS_), Kind(Kind_), RHS(RHS_) {}
+
+ template<typename Fn> void match(Fn F) const { F(LHS, Kind, RHS); }
+
+ void printLeft(OutputStream &S) const override {
+ LHS->print(S);
+ S += Kind;
+ RHS->print(S);
+ }
+};
+
+class EnclosingExpr : public Node {
+ const StringView Prefix;
+ const Node *Infix;
+ const StringView Postfix;
+
+public:
+ EnclosingExpr(StringView Prefix_, Node *Infix_, StringView Postfix_)
+ : Node(KEnclosingExpr), Prefix(Prefix_), Infix(Infix_),
+ Postfix(Postfix_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Prefix, Infix, Postfix); }
+
+ void printLeft(OutputStream &S) const override {
+ S += Prefix;
+ Infix->print(S);
+ S += Postfix;
+ }
+};
+
+class CastExpr : public Node {
+ // cast_kind<to>(from)
+ const StringView CastKind;
+ const Node *To;
+ const Node *From;
+
+public:
+ CastExpr(StringView CastKind_, const Node *To_, const Node *From_)
+ : Node(KCastExpr), CastKind(CastKind_), To(To_), From(From_) {}
+
+ template<typename Fn> void match(Fn F) const { F(CastKind, To, From); }
+
+ void printLeft(OutputStream &S) const override {
+ S += CastKind;
+ S += "<";
+ To->printLeft(S);
+ S += ">(";
+ From->printLeft(S);
+ S += ")";
+ }
+};
+
+class SizeofParamPackExpr : public Node {
+ const Node *Pack;
+
+public:
+ SizeofParamPackExpr(const Node *Pack_)
+ : Node(KSizeofParamPackExpr), Pack(Pack_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Pack); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "sizeof...(";
+ ParameterPackExpansion PPE(Pack);
+ PPE.printLeft(S);
+ S += ")";
+ }
+};
+
+class CallExpr : public Node {
+ const Node *Callee;
+ NodeArray Args;
+
+public:
+ CallExpr(const Node *Callee_, NodeArray Args_)
+ : Node(KCallExpr), Callee(Callee_), Args(Args_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Callee, Args); }
+
+ void printLeft(OutputStream &S) const override {
+ Callee->print(S);
+ S += "(";
+ Args.printWithComma(S);
+ S += ")";
+ }
+};
+
+class NewExpr : public Node {
+ // new (expr_list) type(init_list)
+ NodeArray ExprList;
+ Node *Type;
+ NodeArray InitList;
+ bool IsGlobal; // ::operator new ?
+ bool IsArray; // new[] ?
+public:
+ NewExpr(NodeArray ExprList_, Node *Type_, NodeArray InitList_, bool IsGlobal_,
+ bool IsArray_)
+ : Node(KNewExpr), ExprList(ExprList_), Type(Type_), InitList(InitList_),
+ IsGlobal(IsGlobal_), IsArray(IsArray_) {}
+
+ template<typename Fn> void match(Fn F) const {
+ F(ExprList, Type, InitList, IsGlobal, IsArray);
+ }
+
+ void printLeft(OutputStream &S) const override {
+ if (IsGlobal)
+ S += "::operator ";
+ S += "new";
+ if (IsArray)
+ S += "[]";
+ S += ' ';
+ if (!ExprList.empty()) {
+ S += "(";
+ ExprList.printWithComma(S);
+ S += ")";
+ }
+ Type->print(S);
+ if (!InitList.empty()) {
+ S += "(";
+ InitList.printWithComma(S);
+ S += ")";
+ }
+
+ }
+};
+
+class DeleteExpr : public Node {
+ Node *Op;
+ bool IsGlobal;
+ bool IsArray;
+
+public:
+ DeleteExpr(Node *Op_, bool IsGlobal_, bool IsArray_)
+ : Node(KDeleteExpr), Op(Op_), IsGlobal(IsGlobal_), IsArray(IsArray_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Op, IsGlobal, IsArray); }
+
+ void printLeft(OutputStream &S) const override {
+ if (IsGlobal)
+ S += "::";
+ S += "delete";
+ if (IsArray)
+ S += "[] ";
+ Op->print(S);
+ }
+};
+
+class PrefixExpr : public Node {
+ StringView Prefix;
+ Node *Child;
+
+public:
+ PrefixExpr(StringView Prefix_, Node *Child_)
+ : Node(KPrefixExpr), Prefix(Prefix_), Child(Child_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Prefix, Child); }
+
+ void printLeft(OutputStream &S) const override {
+ S += Prefix;
+ S += "(";
+ Child->print(S);
+ S += ")";
+ }
+};
+
+class FunctionParam : public Node {
+ StringView Number;
+
+public:
+ FunctionParam(StringView Number_) : Node(KFunctionParam), Number(Number_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Number); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "fp";
+ S += Number;
+ }
+};
+
+class ConversionExpr : public Node {
+ const Node *Type;
+ NodeArray Expressions;
+
+public:
+ ConversionExpr(const Node *Type_, NodeArray Expressions_)
+ : Node(KConversionExpr), Type(Type_), Expressions(Expressions_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Type, Expressions); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "(";
+ Type->print(S);
+ S += ")(";
+ Expressions.printWithComma(S);
+ S += ")";
+ }
+};
+
+class InitListExpr : public Node {
+ const Node *Ty;
+ NodeArray Inits;
+public:
+ InitListExpr(const Node *Ty_, NodeArray Inits_)
+ : Node(KInitListExpr), Ty(Ty_), Inits(Inits_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Ty, Inits); }
+
+ void printLeft(OutputStream &S) const override {
+ if (Ty)
+ Ty->print(S);
+ S += '{';
+ Inits.printWithComma(S);
+ S += '}';
+ }
+};
+
+class BracedExpr : public Node {
+ const Node *Elem;
+ const Node *Init;
+ bool IsArray;
+public:
+ BracedExpr(const Node *Elem_, const Node *Init_, bool IsArray_)
+ : Node(KBracedExpr), Elem(Elem_), Init(Init_), IsArray(IsArray_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Elem, Init, IsArray); }
+
+ void printLeft(OutputStream &S) const override {
+ if (IsArray) {
+ S += '[';
+ Elem->print(S);
+ S += ']';
+ } else {
+ S += '.';
+ Elem->print(S);
+ }
+ if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
+ S += " = ";
+ Init->print(S);
+ }
+};
+
+class BracedRangeExpr : public Node {
+ const Node *First;
+ const Node *Last;
+ const Node *Init;
+public:
+ BracedRangeExpr(const Node *First_, const Node *Last_, const Node *Init_)
+ : Node(KBracedRangeExpr), First(First_), Last(Last_), Init(Init_) {}
+
+ template<typename Fn> void match(Fn F) const { F(First, Last, Init); }
+
+ void printLeft(OutputStream &S) const override {
+ S += '[';
+ First->print(S);
+ S += " ... ";
+ Last->print(S);
+ S += ']';
+ if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
+ S += " = ";
+ Init->print(S);
+ }
+};
+
+class FoldExpr : public Node {
+ const Node *Pack, *Init;
+ StringView OperatorName;
+ bool IsLeftFold;
+
+public:
+ FoldExpr(bool IsLeftFold_, StringView OperatorName_, const Node *Pack_,
+ const Node *Init_)
+ : Node(KFoldExpr), Pack(Pack_), Init(Init_), OperatorName(OperatorName_),
+ IsLeftFold(IsLeftFold_) {}
+
+ template<typename Fn> void match(Fn F) const {
+ F(IsLeftFold, OperatorName, Pack, Init);
+ }
+
+ void printLeft(OutputStream &S) const override {
+ auto PrintPack = [&] {
+ S += '(';
+ ParameterPackExpansion(Pack).print(S);
+ S += ')';
+ };
+
+ S += '(';
+
+ if (IsLeftFold) {
+ // init op ... op pack
+ if (Init != nullptr) {
+ Init->print(S);
+ S += ' ';
+ S += OperatorName;
+ S += ' ';
+ }
+ // ... op pack
+ S += "... ";
+ S += OperatorName;
+ S += ' ';
+ PrintPack();
+ } else { // !IsLeftFold
+ // pack op ...
+ PrintPack();
+ S += ' ';
+ S += OperatorName;
+ S += " ...";
+ // pack op ... op init
+ if (Init != nullptr) {
+ S += ' ';
+ S += OperatorName;
+ S += ' ';
+ Init->print(S);
+ }
+ }
+ S += ')';
+ }
+};
+
+class ThrowExpr : public Node {
+ const Node *Op;
+
+public:
+ ThrowExpr(const Node *Op_) : Node(KThrowExpr), Op(Op_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Op); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "throw ";
+ Op->print(S);
+ }
+};
+
+class BoolExpr : public Node {
+ bool Value;
+
+public:
+ BoolExpr(bool Value_) : Node(KBoolExpr), Value(Value_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Value); }
+
+ void printLeft(OutputStream &S) const override {
+ S += Value ? StringView("true") : StringView("false");
+ }
+};
+
+class IntegerCastExpr : public Node {
+ // ty(integer)
+ const Node *Ty;
+ StringView Integer;
+
+public:
+ IntegerCastExpr(const Node *Ty_, StringView Integer_)
+ : Node(KIntegerCastExpr), Ty(Ty_), Integer(Integer_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Ty, Integer); }
+
+ void printLeft(OutputStream &S) const override {
+ S += "(";
+ Ty->print(S);
+ S += ")";
+ S += Integer;
+ }
+};
+
+class IntegerLiteral : public Node {
+ StringView Type;
+ StringView Value;
+
+public:
+ IntegerLiteral(StringView Type_, StringView Value_)
+ : Node(KIntegerLiteral), Type(Type_), Value(Value_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Type, Value); }
+
+ void printLeft(OutputStream &S) const override {
+ if (Type.size() > 3) {
+ S += "(";
+ S += Type;
+ S += ")";
+ }
+
+ if (Value[0] == 'n') {
+ S += "-";
+ S += Value.dropFront(1);
+ } else
+ S += Value;
+
+ if (Type.size() <= 3)
+ S += Type;
+ }
+};
+
+template <class Float> struct FloatData;
+
+namespace float_literal_impl {
+constexpr Node::Kind getFloatLiteralKind(float *) {
+ return Node::KFloatLiteral;
+}
+constexpr Node::Kind getFloatLiteralKind(double *) {
+ return Node::KDoubleLiteral;
+}
+constexpr Node::Kind getFloatLiteralKind(long double *) {
+ return Node::KLongDoubleLiteral;
+}
+}
+
+template <class Float> class FloatLiteralImpl : public Node {
+ const StringView Contents;
+
+ static constexpr Kind KindForClass =
+ float_literal_impl::getFloatLiteralKind((Float *)nullptr);
+
+public:
+ FloatLiteralImpl(StringView Contents_)
+ : Node(KindForClass), Contents(Contents_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Contents); }
+
+ void printLeft(OutputStream &s) const override {
+ const char *first = Contents.begin();
+ const char *last = Contents.end() + 1;
+
+ const size_t N = FloatData<Float>::mangled_size;
+ if (static_cast<std::size_t>(last - first) > N) {
+ last = first + N;
+ union {
+ Float value;
+ char buf[sizeof(Float)];
+ };
+ const char *t = first;
+ char *e = buf;
+ for (; t != last; ++t, ++e) {
+ unsigned d1 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
+ : static_cast<unsigned>(*t - 'a' + 10);
+ ++t;
+ unsigned d0 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
+ : static_cast<unsigned>(*t - 'a' + 10);
+ *e = static_cast<char>((d1 << 4) + d0);
+ }
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ std::reverse(buf, e);
+#endif
+ char num[FloatData<Float>::max_demangled_size] = {0};
+ int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value);
+ s += StringView(num, num + n);
+ }
+ }
+};
+
+using FloatLiteral = FloatLiteralImpl<float>;
+using DoubleLiteral = FloatLiteralImpl<double>;
+using LongDoubleLiteral = FloatLiteralImpl<long double>;
+
+/// Visit the node. Calls \c F(P), where \c P is the node cast to the
+/// appropriate derived class.
+template<typename Fn>
+void Node::visit(Fn F) const {
+ switch (K) {
+#define CASE(X) case K ## X: return F(static_cast<const X*>(this));
+ FOR_EACH_NODE_KIND(CASE)
+#undef CASE
+ }
+ assert(0 && "unknown mangling node kind");
+}
+
+/// Determine the kind of a node from its type.
+template<typename NodeT> struct NodeKind;
+#define SPECIALIZATION(X) \
+ template<> struct NodeKind<X> { \
+ static constexpr Node::Kind Kind = Node::K##X; \
+ static constexpr const char *name() { return #X; } \
+ };
+FOR_EACH_NODE_KIND(SPECIALIZATION)
+#undef SPECIALIZATION
+
+#undef FOR_EACH_NODE_KIND
+
+template <class T, size_t N>
+class PODSmallVector {
+ static_assert(std::is_pod<T>::value,
+ "T is required to be a plain old data type");
+
+ T* First;
+ T* Last;
+ T* Cap;
+ T Inline[N];
+
+ bool isInline() const { return First == Inline; }
+
+ void clearInline() {
+ First = Inline;
+ Last = Inline;
+ Cap = Inline + N;
+ }
+
+ void reserve(size_t NewCap) {
+ size_t S = size();
+ if (isInline()) {
+ auto* Tmp = static_cast<T*>(std::malloc(NewCap * sizeof(T)));
+ if (Tmp == nullptr)
+ std::terminate();
+ std::copy(First, Last, Tmp);
+ First = Tmp;
+ } else {
+ First = static_cast<T*>(std::realloc(First, NewCap * sizeof(T)));
+ if (First == nullptr)
+ std::terminate();
+ }
+ Last = First + S;
+ Cap = First + NewCap;
+ }
+
+public:
+ PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
+
+ PODSmallVector(const PODSmallVector&) = delete;
+ PODSmallVector& operator=(const PODSmallVector&) = delete;
+
+ PODSmallVector(PODSmallVector&& Other) : PODSmallVector() {
+ if (Other.isInline()) {
+ std::copy(Other.begin(), Other.end(), First);
+ Last = First + Other.size();
+ Other.clear();
+ return;
+ }
+
+ First = Other.First;
+ Last = Other.Last;
+ Cap = Other.Cap;
+ Other.clearInline();
+ }
+
+ PODSmallVector& operator=(PODSmallVector&& Other) {
+ if (Other.isInline()) {
+ if (!isInline()) {
+ std::free(First);
+ clearInline();
+ }
+ std::copy(Other.begin(), Other.end(), First);
+ Last = First + Other.size();
+ Other.clear();
+ return *this;
+ }
+
+ if (isInline()) {
+ First = Other.First;
+ Last = Other.Last;
+ Cap = Other.Cap;
+ Other.clearInline();
+ return *this;
+ }
+
+ std::swap(First, Other.First);
+ std::swap(Last, Other.Last);
+ std::swap(Cap, Other.Cap);
+ Other.clear();
+ return *this;
+ }
+
+ void push_back(const T& Elem) {
+ if (Last == Cap)
+ reserve(size() * 2);
+ *Last++ = Elem;
+ }
+
+ void pop_back() {
+ assert(Last != First && "Popping empty vector!");
+ --Last;
+ }
+
+ void dropBack(size_t Index) {
+ assert(Index <= size() && "dropBack() can't expand!");
+ Last = First + Index;
+ }
+
+ T* begin() { return First; }
+ T* end() { return Last; }
+
+ bool empty() const { return First == Last; }
+ size_t size() const { return static_cast<size_t>(Last - First); }
+ T& back() {
+ assert(Last != First && "Calling back() on empty vector!");
+ return *(Last - 1);
+ }
+ T& operator[](size_t Index) {
+ assert(Index < size() && "Invalid access!");
+ return *(begin() + Index);
+ }
+ void clear() { Last = First; }
+
+ ~PODSmallVector() {
+ if (!isInline())
+ std::free(First);
+ }
+};
+
+template <typename Derived, typename Alloc> struct AbstractManglingParser {
+ const char *First;
+ const char *Last;
+
+ // Name stack, this is used by the parser to hold temporary names that were
+ // parsed. The parser collapses multiple names into new nodes to construct
+ // the AST. Once the parser is finished, names.size() == 1.
+ PODSmallVector<Node *, 32> Names;
+
+ // Substitution table. Itanium supports name substitutions as a means of
+ // compression. The string "S42_" refers to the 44nd entry (base-36) in this
+ // table.
+ PODSmallVector<Node *, 32> Subs;
+
+ // Template parameter table. Like the above, but referenced like "T42_".
+ // This has a smaller size compared to Subs and Names because it can be
+ // stored on the stack.
+ PODSmallVector<Node *, 8> TemplateParams;
+
+ // Set of unresolved forward <template-param> references. These can occur in a
+ // conversion operator's type, and are resolved in the enclosing <encoding>.
+ PODSmallVector<ForwardTemplateReference *, 4> ForwardTemplateRefs;
+
+ bool TryToParseTemplateArgs = true;
+ bool PermitForwardTemplateReferences = false;
+ bool ParsingLambdaParams = false;
+
+ Alloc ASTAllocator;
+
+ AbstractManglingParser(const char *First_, const char *Last_)
+ : First(First_), Last(Last_) {}
+
+ Derived &getDerived() { return static_cast<Derived &>(*this); }
+
+ void reset(const char *First_, const char *Last_) {
+ First = First_;
+ Last = Last_;
+ Names.clear();
+ Subs.clear();
+ TemplateParams.clear();
+ ParsingLambdaParams = false;
+ TryToParseTemplateArgs = true;
+ PermitForwardTemplateReferences = false;
+ ASTAllocator.reset();
+ }
+
+ template <class T, class... Args> Node *make(Args &&... args) {
+ return ASTAllocator.template makeNode<T>(std::forward<Args>(args)...);
+ }
+
+ template <class It> NodeArray makeNodeArray(It begin, It end) {
+ size_t sz = static_cast<size_t>(end - begin);
+ void *mem = ASTAllocator.allocateNodeArray(sz);
+ Node **data = new (mem) Node *[sz];
+ std::copy(begin, end, data);
+ return NodeArray(data, sz);
+ }
+
+ NodeArray popTrailingNodeArray(size_t FromPosition) {
+ assert(FromPosition <= Names.size());
+ NodeArray res =
+ makeNodeArray(Names.begin() + (long)FromPosition, Names.end());
+ Names.dropBack(FromPosition);
+ return res;
+ }
+
+ bool consumeIf(StringView S) {
+ if (StringView(First, Last).startsWith(S)) {
+ First += S.size();
+ return true;
+ }
+ return false;
+ }
+
+ bool consumeIf(char C) {
+ if (First != Last && *First == C) {
+ ++First;
+ return true;
+ }
+ return false;
+ }
+
+ char consume() { return First != Last ? *First++ : '\0'; }
+
+ char look(unsigned Lookahead = 0) {
+ if (static_cast<size_t>(Last - First) <= Lookahead)
+ return '\0';
+ return First[Lookahead];
+ }
+
+ size_t numLeft() const { return static_cast<size_t>(Last - First); }
+
+ StringView parseNumber(bool AllowNegative = false);
+ Qualifiers parseCVQualifiers();
+ bool parsePositiveInteger(size_t *Out);
+ StringView parseBareSourceName();
+
+ bool parseSeqId(size_t *Out);
+ Node *parseSubstitution();
+ Node *parseTemplateParam();
+ Node *parseTemplateArgs(bool TagTemplates = false);
+ Node *parseTemplateArg();
+
+ /// Parse the <expr> production.
+ Node *parseExpr();
+ Node *parsePrefixExpr(StringView Kind);
+ Node *parseBinaryExpr(StringView Kind);
+ Node *parseIntegerLiteral(StringView Lit);
+ Node *parseExprPrimary();
+ template <class Float> Node *parseFloatingLiteral();
+ Node *parseFunctionParam();
+ Node *parseNewExpr();
+ Node *parseConversionExpr();
+ Node *parseBracedExpr();
+ Node *parseFoldExpr();
+
+ /// Parse the <type> production.
+ Node *parseType();
+ Node *parseFunctionType();
+ Node *parseVectorType();
+ Node *parseDecltype();
+ Node *parseArrayType();
+ Node *parsePointerToMemberType();
+ Node *parseClassEnumType();
+ Node *parseQualifiedType();
+
+ Node *parseEncoding();
+ bool parseCallOffset();
+ Node *parseSpecialName();
+
+ /// Holds some extra information about a <name> that is being parsed. This
+ /// information is only pertinent if the <name> refers to an <encoding>.
+ struct NameState {
+ bool CtorDtorConversion = false;
+ bool EndsWithTemplateArgs = false;
+ Qualifiers CVQualifiers = QualNone;
+ FunctionRefQual ReferenceQualifier = FrefQualNone;
+ size_t ForwardTemplateRefsBegin;
+
+ NameState(AbstractManglingParser *Enclosing)
+ : ForwardTemplateRefsBegin(Enclosing->ForwardTemplateRefs.size()) {}
+ };
+
+ bool resolveForwardTemplateRefs(NameState &State) {
+ size_t I = State.ForwardTemplateRefsBegin;
+ size_t E = ForwardTemplateRefs.size();
+ for (; I < E; ++I) {
+ size_t Idx = ForwardTemplateRefs[I]->Index;
+ if (Idx >= TemplateParams.size())
+ return true;
+ ForwardTemplateRefs[I]->Ref = TemplateParams[Idx];
+ }
+ ForwardTemplateRefs.dropBack(State.ForwardTemplateRefsBegin);
+ return false;
+ }
+
+ /// Parse the <name> production>
+ Node *parseName(NameState *State = nullptr);
+ Node *parseLocalName(NameState *State);
+ Node *parseOperatorName(NameState *State);
+ Node *parseUnqualifiedName(NameState *State);
+ Node *parseUnnamedTypeName(NameState *State);
+ Node *parseSourceName(NameState *State);
+ Node *parseUnscopedName(NameState *State);
+ Node *parseNestedName(NameState *State);
+ Node *parseCtorDtorName(Node *&SoFar, NameState *State);
+
+ Node *parseAbiTags(Node *N);
+
+ /// Parse the <unresolved-name> production.
+ Node *parseUnresolvedName();
+ Node *parseSimpleId();
+ Node *parseBaseUnresolvedName();
+ Node *parseUnresolvedType();
+ Node *parseDestructorName();
+
+ /// Top-level entry point into the parser.
+ Node *parse();
+};
+
+const char* parse_discriminator(const char* first, const char* last);
+
+// <name> ::= <nested-name> // N
+// ::= <local-name> # See Scope Encoding below // Z
+// ::= <unscoped-template-name> <template-args>
+// ::= <unscoped-name>
+//
+// <unscoped-template-name> ::= <unscoped-name>
+// ::= <substitution>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseName(NameState *State) {
+ consumeIf('L'); // extension
+
+ if (look() == 'N')
+ return getDerived().parseNestedName(State);
+ if (look() == 'Z')
+ return getDerived().parseLocalName(State);
+
+ // ::= <unscoped-template-name> <template-args>
+ if (look() == 'S' && look(1) != 't') {
+ Node *S = getDerived().parseSubstitution();
+ if (S == nullptr)
+ return nullptr;
+ if (look() != 'I')
+ return nullptr;
+ Node *TA = getDerived().parseTemplateArgs(State != nullptr);
+ if (TA == nullptr)
+ return nullptr;
+ if (State) State->EndsWithTemplateArgs = true;
+ return make<NameWithTemplateArgs>(S, TA);
+ }
+
+ Node *N = getDerived().parseUnscopedName(State);
+ if (N == nullptr)
+ return nullptr;
+ // ::= <unscoped-template-name> <template-args>
+ if (look() == 'I') {
+ Subs.push_back(N);
+ Node *TA = getDerived().parseTemplateArgs(State != nullptr);
+ if (TA == nullptr)
+ return nullptr;
+ if (State) State->EndsWithTemplateArgs = true;
+ return make<NameWithTemplateArgs>(N, TA);
+ }
+ // ::= <unscoped-name>
+ return N;
+}
+
+// <local-name> := Z <function encoding> E <entity name> [<discriminator>]
+// := Z <function encoding> E s [<discriminator>]
+// := Z <function encoding> Ed [ <parameter number> ] _ <entity name>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseLocalName(NameState *State) {
+ if (!consumeIf('Z'))
+ return nullptr;
+ Node *Encoding = getDerived().parseEncoding();
+ if (Encoding == nullptr || !consumeIf('E'))
+ return nullptr;
+
+ if (consumeIf('s')) {
+ First = parse_discriminator(First, Last);
+ auto *StringLitName = make<NameType>("string literal");
+ if (!StringLitName)
+ return nullptr;
+ return make<LocalName>(Encoding, StringLitName);
+ }
+
+ if (consumeIf('d')) {
+ parseNumber(true);
+ if (!consumeIf('_'))
+ return nullptr;
+ Node *N = getDerived().parseName(State);
+ if (N == nullptr)
+ return nullptr;
+ return make<LocalName>(Encoding, N);
+ }
+
+ Node *Entity = getDerived().parseName(State);
+ if (Entity == nullptr)
+ return nullptr;
+ First = parse_discriminator(First, Last);
+ return make<LocalName>(Encoding, Entity);
+}
+
+// <unscoped-name> ::= <unqualified-name>
+// ::= St <unqualified-name> # ::std::
+// extension ::= StL<unqualified-name>
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseUnscopedName(NameState *State) {
+ if (consumeIf("StL") || consumeIf("St")) {
+ Node *R = getDerived().parseUnqualifiedName(State);
+ if (R == nullptr)
+ return nullptr;
+ return make<StdQualifiedName>(R);
+ }
+ return getDerived().parseUnqualifiedName(State);
+}
+
+// <unqualified-name> ::= <operator-name> [abi-tags]
+// ::= <ctor-dtor-name>
+// ::= <source-name>
+// ::= <unnamed-type-name>
+// ::= DC <source-name>+ E # structured binding declaration
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseUnqualifiedName(NameState *State) {
+ // <ctor-dtor-name>s are special-cased in parseNestedName().
+ Node *Result;
+ if (look() == 'U')
+ Result = getDerived().parseUnnamedTypeName(State);
+ else if (look() >= '1' && look() <= '9')
+ Result = getDerived().parseSourceName(State);
+ else if (consumeIf("DC")) {
+ size_t BindingsBegin = Names.size();
+ do {
+ Node *Binding = getDerived().parseSourceName(State);
+ if (Binding == nullptr)
+ return nullptr;
+ Names.push_back(Binding);
+ } while (!consumeIf('E'));
+ Result = make<StructuredBindingName>(popTrailingNodeArray(BindingsBegin));
+ } else
+ Result = getDerived().parseOperatorName(State);
+ if (Result != nullptr)
+ Result = getDerived().parseAbiTags(Result);
+ return Result;
+}
+
+// <unnamed-type-name> ::= Ut [<nonnegative number>] _
+// ::= <closure-type-name>
+//
+// <closure-type-name> ::= Ul <lambda-sig> E [ <nonnegative number> ] _
+//
+// <lambda-sig> ::= <parameter type>+ # Parameter types or "v" if the lambda has no parameters
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *) {
+ if (consumeIf("Ut")) {
+ StringView Count = parseNumber();
+ if (!consumeIf('_'))
+ return nullptr;
+ return make<UnnamedTypeName>(Count);
+ }
+ if (consumeIf("Ul")) {
+ NodeArray Params;
+ SwapAndRestore<bool> SwapParams(ParsingLambdaParams, true);
+ if (!consumeIf("vE")) {
+ size_t ParamsBegin = Names.size();
+ do {
+ Node *P = getDerived().parseType();
+ if (P == nullptr)
+ return nullptr;
+ Names.push_back(P);
+ } while (!consumeIf('E'));
+ Params = popTrailingNodeArray(ParamsBegin);
+ }
+ StringView Count = parseNumber();
+ if (!consumeIf('_'))
+ return nullptr;
+ return make<ClosureTypeName>(Params, Count);
+ }
+ return nullptr;
+}
+
+// <source-name> ::= <positive length number> <identifier>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSourceName(NameState *) {
+ size_t Length = 0;
+ if (parsePositiveInteger(&Length))
+ return nullptr;
+ if (numLeft() < Length || Length == 0)
+ return nullptr;
+ StringView Name(First, First + Length);
+ First += Length;
+ if (Name.startsWith("_GLOBAL__N"))
+ return make<NameType>("(anonymous namespace)");
+ return make<NameType>(Name);
+}
+
+// <operator-name> ::= aa # &&
+// ::= ad # & (unary)
+// ::= an # &
+// ::= aN # &=
+// ::= aS # =
+// ::= cl # ()
+// ::= cm # ,
+// ::= co # ~
+// ::= cv <type> # (cast)
+// ::= da # delete[]
+// ::= de # * (unary)
+// ::= dl # delete
+// ::= dv # /
+// ::= dV # /=
+// ::= eo # ^
+// ::= eO # ^=
+// ::= eq # ==
+// ::= ge # >=
+// ::= gt # >
+// ::= ix # []
+// ::= le # <=
+// ::= li <source-name> # operator ""
+// ::= ls # <<
+// ::= lS # <<=
+// ::= lt # <
+// ::= mi # -
+// ::= mI # -=
+// ::= ml # *
+// ::= mL # *=
+// ::= mm # -- (postfix in <expression> context)
+// ::= na # new[]
+// ::= ne # !=
+// ::= ng # - (unary)
+// ::= nt # !
+// ::= nw # new
+// ::= oo # ||
+// ::= or # |
+// ::= oR # |=
+// ::= pm # ->*
+// ::= pl # +
+// ::= pL # +=
+// ::= pp # ++ (postfix in <expression> context)
+// ::= ps # + (unary)
+// ::= pt # ->
+// ::= qu # ?
+// ::= rm # %
+// ::= rM # %=
+// ::= rs # >>
+// ::= rS # >>=
+// ::= ss # <=> C++2a
+// ::= v <digit> <source-name> # vendor extended operator
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseOperatorName(NameState *State) {
+ switch (look()) {
+ case 'a':
+ switch (look(1)) {
+ case 'a':
+ First += 2;
+ return make<NameType>("operator&&");
+ case 'd':
+ case 'n':
+ First += 2;
+ return make<NameType>("operator&");
+ case 'N':
+ First += 2;
+ return make<NameType>("operator&=");
+ case 'S':
+ First += 2;
+ return make<NameType>("operator=");
+ }
+ return nullptr;
+ case 'c':
+ switch (look(1)) {
+ case 'l':
+ First += 2;
+ return make<NameType>("operator()");
+ case 'm':
+ First += 2;
+ return make<NameType>("operator,");
+ case 'o':
+ First += 2;
+ return make<NameType>("operator~");
+ // ::= cv <type> # (cast)
+ case 'v': {
+ First += 2;
+ SwapAndRestore<bool> SaveTemplate(TryToParseTemplateArgs, false);
+ // If we're parsing an encoding, State != nullptr and the conversion
+ // operators' <type> could have a <template-param> that refers to some
+ // <template-arg>s further ahead in the mangled name.
+ SwapAndRestore<bool> SavePermit(PermitForwardTemplateReferences,
+ PermitForwardTemplateReferences ||
+ State != nullptr);
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ if (State) State->CtorDtorConversion = true;
+ return make<ConversionOperatorType>(Ty);
+ }
+ }
+ return nullptr;
+ case 'd':
+ switch (look(1)) {
+ case 'a':
+ First += 2;
+ return make<NameType>("operator delete[]");
+ case 'e':
+ First += 2;
+ return make<NameType>("operator*");
+ case 'l':
+ First += 2;
+ return make<NameType>("operator delete");
+ case 'v':
+ First += 2;
+ return make<NameType>("operator/");
+ case 'V':
+ First += 2;
+ return make<NameType>("operator/=");
+ }
+ return nullptr;
+ case 'e':
+ switch (look(1)) {
+ case 'o':
+ First += 2;
+ return make<NameType>("operator^");
+ case 'O':
+ First += 2;
+ return make<NameType>("operator^=");
+ case 'q':
+ First += 2;
+ return make<NameType>("operator==");
+ }
+ return nullptr;
+ case 'g':
+ switch (look(1)) {
+ case 'e':
+ First += 2;
+ return make<NameType>("operator>=");
+ case 't':
+ First += 2;
+ return make<NameType>("operator>");
+ }
+ return nullptr;
+ case 'i':
+ if (look(1) == 'x') {
+ First += 2;
+ return make<NameType>("operator[]");
+ }
+ return nullptr;
+ case 'l':
+ switch (look(1)) {
+ case 'e':
+ First += 2;
+ return make<NameType>("operator<=");
+ // ::= li <source-name> # operator ""
+ case 'i': {
+ First += 2;
+ Node *SN = getDerived().parseSourceName(State);
+ if (SN == nullptr)
+ return nullptr;
+ return make<LiteralOperator>(SN);
+ }
+ case 's':
+ First += 2;
+ return make<NameType>("operator<<");
+ case 'S':
+ First += 2;
+ return make<NameType>("operator<<=");
+ case 't':
+ First += 2;
+ return make<NameType>("operator<");
+ }
+ return nullptr;
+ case 'm':
+ switch (look(1)) {
+ case 'i':
+ First += 2;
+ return make<NameType>("operator-");
+ case 'I':
+ First += 2;
+ return make<NameType>("operator-=");
+ case 'l':
+ First += 2;
+ return make<NameType>("operator*");
+ case 'L':
+ First += 2;
+ return make<NameType>("operator*=");
+ case 'm':
+ First += 2;
+ return make<NameType>("operator--");
+ }
+ return nullptr;
+ case 'n':
+ switch (look(1)) {
+ case 'a':
+ First += 2;
+ return make<NameType>("operator new[]");
+ case 'e':
+ First += 2;
+ return make<NameType>("operator!=");
+ case 'g':
+ First += 2;
+ return make<NameType>("operator-");
+ case 't':
+ First += 2;
+ return make<NameType>("operator!");
+ case 'w':
+ First += 2;
+ return make<NameType>("operator new");
+ }
+ return nullptr;
+ case 'o':
+ switch (look(1)) {
+ case 'o':
+ First += 2;
+ return make<NameType>("operator||");
+ case 'r':
+ First += 2;
+ return make<NameType>("operator|");
+ case 'R':
+ First += 2;
+ return make<NameType>("operator|=");
+ }
+ return nullptr;
+ case 'p':
+ switch (look(1)) {
+ case 'm':
+ First += 2;
+ return make<NameType>("operator->*");
+ case 'l':
+ First += 2;
+ return make<NameType>("operator+");
+ case 'L':
+ First += 2;
+ return make<NameType>("operator+=");
+ case 'p':
+ First += 2;
+ return make<NameType>("operator++");
+ case 's':
+ First += 2;
+ return make<NameType>("operator+");
+ case 't':
+ First += 2;
+ return make<NameType>("operator->");
+ }
+ return nullptr;
+ case 'q':
+ if (look(1) == 'u') {
+ First += 2;
+ return make<NameType>("operator?");
+ }
+ return nullptr;
+ case 'r':
+ switch (look(1)) {
+ case 'm':
+ First += 2;
+ return make<NameType>("operator%");
+ case 'M':
+ First += 2;
+ return make<NameType>("operator%=");
+ case 's':
+ First += 2;
+ return make<NameType>("operator>>");
+ case 'S':
+ First += 2;
+ return make<NameType>("operator>>=");
+ }
+ return nullptr;
+ case 's':
+ if (look(1) == 's') {
+ First += 2;
+ return make<NameType>("operator<=>");
+ }
+ return nullptr;
+ // ::= v <digit> <source-name> # vendor extended operator
+ case 'v':
+ if (std::isdigit(look(1))) {
+ First += 2;
+ Node *SN = getDerived().parseSourceName(State);
+ if (SN == nullptr)
+ return nullptr;
+ return make<ConversionOperatorType>(SN);
+ }
+ return nullptr;
+ }
+ return nullptr;
+}
+
+// <ctor-dtor-name> ::= C1 # complete object constructor
+// ::= C2 # base object constructor
+// ::= C3 # complete object allocating constructor
+// extension ::= C5 # ?
+// ::= D0 # deleting destructor
+// ::= D1 # complete object destructor
+// ::= D2 # base object destructor
+// extension ::= D5 # ?
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
+ NameState *State) {
+ if (SoFar->getKind() == Node::KSpecialSubstitution) {
+ auto SSK = static_cast<SpecialSubstitution *>(SoFar)->SSK;
+ switch (SSK) {
+ case SpecialSubKind::string:
+ case SpecialSubKind::istream:
+ case SpecialSubKind::ostream:
+ case SpecialSubKind::iostream:
+ SoFar = make<ExpandedSpecialSubstitution>(SSK);
+ if (!SoFar)
+ return nullptr;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (consumeIf('C')) {
+ bool IsInherited = consumeIf('I');
+ if (look() != '1' && look() != '2' && look() != '3' && look() != '5')
+ return nullptr;
+ int Variant = look() - '0';
+ ++First;
+ if (State) State->CtorDtorConversion = true;
+ if (IsInherited) {
+ if (getDerived().parseName(State) == nullptr)
+ return nullptr;
+ }
+ return make<CtorDtorName>(SoFar, false, Variant);
+ }
+
+ if (look() == 'D' &&
+ (look(1) == '0' || look(1) == '1' || look(1) == '2' || look(1) == '5')) {
+ int Variant = look(1) - '0';
+ First += 2;
+ if (State) State->CtorDtorConversion = true;
+ return make<CtorDtorName>(SoFar, true, Variant);
+ }
+
+ return nullptr;
+}
+
+// <nested-name> ::= N [<CV-Qualifiers>] [<ref-qualifier>] <prefix> <unqualified-name> E
+// ::= N [<CV-Qualifiers>] [<ref-qualifier>] <template-prefix> <template-args> E
+//
+// <prefix> ::= <prefix> <unqualified-name>
+// ::= <template-prefix> <template-args>
+// ::= <template-param>
+// ::= <decltype>
+// ::= # empty
+// ::= <substitution>
+// ::= <prefix> <data-member-prefix>
+// extension ::= L
+//
+// <data-member-prefix> := <member source-name> [<template-args>] M
+//
+// <template-prefix> ::= <prefix> <template unqualified-name>
+// ::= <template-param>
+// ::= <substitution>
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseNestedName(NameState *State) {
+ if (!consumeIf('N'))
+ return nullptr;
+
+ Qualifiers CVTmp = parseCVQualifiers();
+ if (State) State->CVQualifiers = CVTmp;
+
+ if (consumeIf('O')) {
+ if (State) State->ReferenceQualifier = FrefQualRValue;
+ } else if (consumeIf('R')) {
+ if (State) State->ReferenceQualifier = FrefQualLValue;
+ } else
+ if (State) State->ReferenceQualifier = FrefQualNone;
+
+ Node *SoFar = nullptr;
+ auto PushComponent = [&](Node *Comp) {
+ if (!Comp) return false;
+ if (SoFar) SoFar = make<NestedName>(SoFar, Comp);
+ else SoFar = Comp;
+ if (State) State->EndsWithTemplateArgs = false;
+ return SoFar != nullptr;
+ };
+
+ if (consumeIf("St")) {
+ SoFar = make<NameType>("std");
+ if (!SoFar)
+ return nullptr;
+ }
+
+ while (!consumeIf('E')) {
+ consumeIf('L'); // extension
+
+ // <data-member-prefix> := <member source-name> [<template-args>] M
+ if (consumeIf('M')) {
+ if (SoFar == nullptr)
+ return nullptr;
+ continue;
+ }
+
+ // ::= <template-param>
+ if (look() == 'T') {
+ if (!PushComponent(getDerived().parseTemplateParam()))
+ return nullptr;
+ Subs.push_back(SoFar);
+ continue;
+ }
+
+ // ::= <template-prefix> <template-args>
+ if (look() == 'I') {
+ Node *TA = getDerived().parseTemplateArgs(State != nullptr);
+ if (TA == nullptr || SoFar == nullptr)
+ return nullptr;
+ SoFar = make<NameWithTemplateArgs>(SoFar, TA);
+ if (!SoFar)
+ return nullptr;
+ if (State) State->EndsWithTemplateArgs = true;
+ Subs.push_back(SoFar);
+ continue;
+ }
+
+ // ::= <decltype>
+ if (look() == 'D' && (look(1) == 't' || look(1) == 'T')) {
+ if (!PushComponent(getDerived().parseDecltype()))
+ return nullptr;
+ Subs.push_back(SoFar);
+ continue;
+ }
+
+ // ::= <substitution>
+ if (look() == 'S' && look(1) != 't') {
+ Node *S = getDerived().parseSubstitution();
+ if (!PushComponent(S))
+ return nullptr;
+ if (SoFar != S)
+ Subs.push_back(S);
+ continue;
+ }
+
+ // Parse an <unqualified-name> thats actually a <ctor-dtor-name>.
+ if (look() == 'C' || (look() == 'D' && look(1) != 'C')) {
+ if (SoFar == nullptr)
+ return nullptr;
+ if (!PushComponent(getDerived().parseCtorDtorName(SoFar, State)))
+ return nullptr;
+ SoFar = getDerived().parseAbiTags(SoFar);
+ if (SoFar == nullptr)
+ return nullptr;
+ Subs.push_back(SoFar);
+ continue;
+ }
+
+ // ::= <prefix> <unqualified-name>
+ if (!PushComponent(getDerived().parseUnqualifiedName(State)))
+ return nullptr;
+ Subs.push_back(SoFar);
+ }
+
+ if (SoFar == nullptr || Subs.empty())
+ return nullptr;
+
+ Subs.pop_back();
+ return SoFar;
+}
+
+// <simple-id> ::= <source-name> [ <template-args> ]
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSimpleId() {
+ Node *SN = getDerived().parseSourceName(/*NameState=*/nullptr);
+ if (SN == nullptr)
+ return nullptr;
+ if (look() == 'I') {
+ Node *TA = getDerived().parseTemplateArgs();
+ if (TA == nullptr)
+ return nullptr;
+ return make<NameWithTemplateArgs>(SN, TA);
+ }
+ return SN;
+}
+
+// <destructor-name> ::= <unresolved-type> # e.g., ~T or ~decltype(f())
+// ::= <simple-id> # e.g., ~A<2*N>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseDestructorName() {
+ Node *Result;
+ if (std::isdigit(look()))
+ Result = getDerived().parseSimpleId();
+ else
+ Result = getDerived().parseUnresolvedType();
+ if (Result == nullptr)
+ return nullptr;
+ return make<DtorName>(Result);
+}
+
+// <unresolved-type> ::= <template-param>
+// ::= <decltype>
+// ::= <substitution>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseUnresolvedType() {
+ if (look() == 'T') {
+ Node *TP = getDerived().parseTemplateParam();
+ if (TP == nullptr)
+ return nullptr;
+ Subs.push_back(TP);
+ return TP;
+ }
+ if (look() == 'D') {
+ Node *DT = getDerived().parseDecltype();
+ if (DT == nullptr)
+ return nullptr;
+ Subs.push_back(DT);
+ return DT;
+ }
+ return getDerived().parseSubstitution();
+}
+
+// <base-unresolved-name> ::= <simple-id> # unresolved name
+// extension ::= <operator-name> # unresolved operator-function-id
+// extension ::= <operator-name> <template-args> # unresolved operator template-id
+// ::= on <operator-name> # unresolved operator-function-id
+// ::= on <operator-name> <template-args> # unresolved operator template-id
+// ::= dn <destructor-name> # destructor or pseudo-destructor;
+// # e.g. ~X or ~X<N-1>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseBaseUnresolvedName() {
+ if (std::isdigit(look()))
+ return getDerived().parseSimpleId();
+
+ if (consumeIf("dn"))
+ return getDerived().parseDestructorName();
+
+ consumeIf("on");
+
+ Node *Oper = getDerived().parseOperatorName(/*NameState=*/nullptr);
+ if (Oper == nullptr)
+ return nullptr;
+ if (look() == 'I') {
+ Node *TA = getDerived().parseTemplateArgs();
+ if (TA == nullptr)
+ return nullptr;
+ return make<NameWithTemplateArgs>(Oper, TA);
+ }
+ return Oper;
+}
+
+// <unresolved-name>
+// extension ::= srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
+// ::= [gs] <base-unresolved-name> # x or (with "gs") ::x
+// ::= [gs] sr <unresolved-qualifier-level>+ E <base-unresolved-name>
+// # A::x, N::y, A<T>::z; "gs" means leading "::"
+// ::= sr <unresolved-type> <base-unresolved-name> # T::x / decltype(p)::x
+// extension ::= sr <unresolved-type> <template-args> <base-unresolved-name>
+// # T::N::x /decltype(p)::N::x
+// (ignored) ::= srN <unresolved-type> <unresolved-qualifier-level>+ E <base-unresolved-name>
+//
+// <unresolved-qualifier-level> ::= <simple-id>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseUnresolvedName() {
+ Node *SoFar = nullptr;
+
+ // srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
+ // srN <unresolved-type> <unresolved-qualifier-level>+ E <base-unresolved-name>
+ if (consumeIf("srN")) {
+ SoFar = getDerived().parseUnresolvedType();
+ if (SoFar == nullptr)
+ return nullptr;
+
+ if (look() == 'I') {
+ Node *TA = getDerived().parseTemplateArgs();
+ if (TA == nullptr)
+ return nullptr;
+ SoFar = make<NameWithTemplateArgs>(SoFar, TA);
+ if (!SoFar)
+ return nullptr;
+ }
+
+ while (!consumeIf('E')) {
+ Node *Qual = getDerived().parseSimpleId();
+ if (Qual == nullptr)
+ return nullptr;
+ SoFar = make<QualifiedName>(SoFar, Qual);
+ if (!SoFar)
+ return nullptr;
+ }
+
+ Node *Base = getDerived().parseBaseUnresolvedName();
+ if (Base == nullptr)
+ return nullptr;
+ return make<QualifiedName>(SoFar, Base);
+ }
+
+ bool Global = consumeIf("gs");
+
+ // [gs] <base-unresolved-name> # x or (with "gs") ::x
+ if (!consumeIf("sr")) {
+ SoFar = getDerived().parseBaseUnresolvedName();
+ if (SoFar == nullptr)
+ return nullptr;
+ if (Global)
+ SoFar = make<GlobalQualifiedName>(SoFar);
+ return SoFar;
+ }
+
+ // [gs] sr <unresolved-qualifier-level>+ E <base-unresolved-name>
+ if (std::isdigit(look())) {
+ do {
+ Node *Qual = getDerived().parseSimpleId();
+ if (Qual == nullptr)
+ return nullptr;
+ if (SoFar)
+ SoFar = make<QualifiedName>(SoFar, Qual);
+ else if (Global)
+ SoFar = make<GlobalQualifiedName>(Qual);
+ else
+ SoFar = Qual;
+ if (!SoFar)
+ return nullptr;
+ } while (!consumeIf('E'));
+ }
+ // sr <unresolved-type> <base-unresolved-name>
+ // sr <unresolved-type> <template-args> <base-unresolved-name>
+ else {
+ SoFar = getDerived().parseUnresolvedType();
+ if (SoFar == nullptr)
+ return nullptr;
+
+ if (look() == 'I') {
+ Node *TA = getDerived().parseTemplateArgs();
+ if (TA == nullptr)
+ return nullptr;
+ SoFar = make<NameWithTemplateArgs>(SoFar, TA);
+ if (!SoFar)
+ return nullptr;
+ }
+ }
+
+ assert(SoFar != nullptr);
+
+ Node *Base = getDerived().parseBaseUnresolvedName();
+ if (Base == nullptr)
+ return nullptr;
+ return make<QualifiedName>(SoFar, Base);
+}
+
+// <abi-tags> ::= <abi-tag> [<abi-tags>]
+// <abi-tag> ::= B <source-name>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseAbiTags(Node *N) {
+ while (consumeIf('B')) {
+ StringView SN = parseBareSourceName();
+ if (SN.empty())
+ return nullptr;
+ N = make<AbiTagAttr>(N, SN);
+ if (!N)
+ return nullptr;
+ }
+ return N;
+}
+
+// <number> ::= [n] <non-negative decimal integer>
+template <typename Alloc, typename Derived>
+StringView
+AbstractManglingParser<Alloc, Derived>::parseNumber(bool AllowNegative) {
+ const char *Tmp = First;
+ if (AllowNegative)
+ consumeIf('n');
+ if (numLeft() == 0 || !std::isdigit(*First))
+ return StringView();
+ while (numLeft() != 0 && std::isdigit(*First))
+ ++First;
+ return StringView(Tmp, First);
+}
+
+// <positive length number> ::= [0-9]*
+template <typename Alloc, typename Derived>
+bool AbstractManglingParser<Alloc, Derived>::parsePositiveInteger(size_t *Out) {
+ *Out = 0;
+ if (look() < '0' || look() > '9')
+ return true;
+ while (look() >= '0' && look() <= '9') {
+ *Out *= 10;
+ *Out += static_cast<size_t>(consume() - '0');
+ }
+ return false;
+}
+
+template <typename Alloc, typename Derived>
+StringView AbstractManglingParser<Alloc, Derived>::parseBareSourceName() {
+ size_t Int = 0;
+ if (parsePositiveInteger(&Int) || numLeft() < Int)
+ return StringView();
+ StringView R(First, First + Int);
+ First += Int;
+ return R;
+}
+
+// <function-type> ::= [<CV-qualifiers>] [<exception-spec>] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E
+//
+// <exception-spec> ::= Do # non-throwing exception-specification (e.g., noexcept, throw())
+// ::= DO <expression> E # computed (instantiation-dependent) noexcept
+// ::= Dw <type>+ E # dynamic exception specification with instantiation-dependent types
+//
+// <ref-qualifier> ::= R # & ref-qualifier
+// <ref-qualifier> ::= O # && ref-qualifier
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseFunctionType() {
+ Qualifiers CVQuals = parseCVQualifiers();
+
+ Node *ExceptionSpec = nullptr;
+ if (consumeIf("Do")) {
+ ExceptionSpec = make<NameType>("noexcept");
+ if (!ExceptionSpec)
+ return nullptr;
+ } else if (consumeIf("DO")) {
+ Node *E = getDerived().parseExpr();
+ if (E == nullptr || !consumeIf('E'))
+ return nullptr;
+ ExceptionSpec = make<NoexceptSpec>(E);
+ if (!ExceptionSpec)
+ return nullptr;
+ } else if (consumeIf("Dw")) {
+ size_t SpecsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *T = getDerived().parseType();
+ if (T == nullptr)
+ return nullptr;
+ Names.push_back(T);
+ }
+ ExceptionSpec =
+ make<DynamicExceptionSpec>(popTrailingNodeArray(SpecsBegin));
+ if (!ExceptionSpec)
+ return nullptr;
+ }
+
+ consumeIf("Dx"); // transaction safe
+
+ if (!consumeIf('F'))
+ return nullptr;
+ consumeIf('Y'); // extern "C"
+ Node *ReturnType = getDerived().parseType();
+ if (ReturnType == nullptr)
+ return nullptr;
+
+ FunctionRefQual ReferenceQualifier = FrefQualNone;
+ size_t ParamsBegin = Names.size();
+ while (true) {
+ if (consumeIf('E'))
+ break;
+ if (consumeIf('v'))
+ continue;
+ if (consumeIf("RE")) {
+ ReferenceQualifier = FrefQualLValue;
+ break;
+ }
+ if (consumeIf("OE")) {
+ ReferenceQualifier = FrefQualRValue;
+ break;
+ }
+ Node *T = getDerived().parseType();
+ if (T == nullptr)
+ return nullptr;
+ Names.push_back(T);
+ }
+
+ NodeArray Params = popTrailingNodeArray(ParamsBegin);
+ return make<FunctionType>(ReturnType, Params, CVQuals,
+ ReferenceQualifier, ExceptionSpec);
+}
+
+// extension:
+// <vector-type> ::= Dv <positive dimension number> _ <extended element type>
+// ::= Dv [<dimension expression>] _ <element type>
+// <extended element type> ::= <element type>
+// ::= p # AltiVec vector pixel
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseVectorType() {
+ if (!consumeIf("Dv"))
+ return nullptr;
+ if (look() >= '1' && look() <= '9') {
+ StringView DimensionNumber = parseNumber();
+ if (!consumeIf('_'))
+ return nullptr;
+ if (consumeIf('p'))
+ return make<PixelVectorType>(DimensionNumber);
+ Node *ElemType = getDerived().parseType();
+ if (ElemType == nullptr)
+ return nullptr;
+ return make<VectorType>(ElemType, DimensionNumber);
+ }
+
+ if (!consumeIf('_')) {
+ Node *DimExpr = getDerived().parseExpr();
+ if (!DimExpr)
+ return nullptr;
+ if (!consumeIf('_'))
+ return nullptr;
+ Node *ElemType = getDerived().parseType();
+ if (!ElemType)
+ return nullptr;
+ return make<VectorType>(ElemType, DimExpr);
+ }
+ Node *ElemType = getDerived().parseType();
+ if (!ElemType)
+ return nullptr;
+ return make<VectorType>(ElemType, StringView());
+}
+
+// <decltype> ::= Dt <expression> E # decltype of an id-expression or class member access (C++0x)
+// ::= DT <expression> E # decltype of an expression (C++0x)
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseDecltype() {
+ if (!consumeIf('D'))
+ return nullptr;
+ if (!consumeIf('t') && !consumeIf('T'))
+ return nullptr;
+ Node *E = getDerived().parseExpr();
+ if (E == nullptr)
+ return nullptr;
+ if (!consumeIf('E'))
+ return nullptr;
+ return make<EnclosingExpr>("decltype(", E, ")");
+}
+
+// <array-type> ::= A <positive dimension number> _ <element type>
+// ::= A [<dimension expression>] _ <element type>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseArrayType() {
+ if (!consumeIf('A'))
+ return nullptr;
+
+ NodeOrString Dimension;
+
+ if (std::isdigit(look())) {
+ Dimension = parseNumber();
+ if (!consumeIf('_'))
+ return nullptr;
+ } else if (!consumeIf('_')) {
+ Node *DimExpr = getDerived().parseExpr();
+ if (DimExpr == nullptr)
+ return nullptr;
+ if (!consumeIf('_'))
+ return nullptr;
+ Dimension = DimExpr;
+ }
+
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ return make<ArrayType>(Ty, Dimension);
+}
+
+// <pointer-to-member-type> ::= M <class type> <member type>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parsePointerToMemberType() {
+ if (!consumeIf('M'))
+ return nullptr;
+ Node *ClassType = getDerived().parseType();
+ if (ClassType == nullptr)
+ return nullptr;
+ Node *MemberType = getDerived().parseType();
+ if (MemberType == nullptr)
+ return nullptr;
+ return make<PointerToMemberType>(ClassType, MemberType);
+}
+
+// <class-enum-type> ::= <name> # non-dependent type name, dependent type name, or dependent typename-specifier
+// ::= Ts <name> # dependent elaborated type specifier using 'struct' or 'class'
+// ::= Tu <name> # dependent elaborated type specifier using 'union'
+// ::= Te <name> # dependent elaborated type specifier using 'enum'
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseClassEnumType() {
+ StringView ElabSpef;
+ if (consumeIf("Ts"))
+ ElabSpef = "struct";
+ else if (consumeIf("Tu"))
+ ElabSpef = "union";
+ else if (consumeIf("Te"))
+ ElabSpef = "enum";
+
+ Node *Name = getDerived().parseName();
+ if (Name == nullptr)
+ return nullptr;
+
+ if (!ElabSpef.empty())
+ return make<ElaboratedTypeSpefType>(ElabSpef, Name);
+
+ return Name;
+}
+
+// <qualified-type> ::= <qualifiers> <type>
+// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers>
+// <extended-qualifier> ::= U <source-name> [<template-args>] # vendor extended type qualifier
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseQualifiedType() {
+ if (consumeIf('U')) {
+ StringView Qual = parseBareSourceName();
+ if (Qual.empty())
+ return nullptr;
+
+ // FIXME parse the optional <template-args> here!
+
+ // extension ::= U <objc-name> <objc-type> # objc-type<identifier>
+ if (Qual.startsWith("objcproto")) {
+ StringView ProtoSourceName = Qual.dropFront(std::strlen("objcproto"));
+ StringView Proto;
+ {
+ SwapAndRestore<const char *> SaveFirst(First, ProtoSourceName.begin()),
+ SaveLast(Last, ProtoSourceName.end());
+ Proto = parseBareSourceName();
+ }
+ if (Proto.empty())
+ return nullptr;
+ Node *Child = getDerived().parseQualifiedType();
+ if (Child == nullptr)
+ return nullptr;
+ return make<ObjCProtoName>(Child, Proto);
+ }
+
+ Node *Child = getDerived().parseQualifiedType();
+ if (Child == nullptr)
+ return nullptr;
+ return make<VendorExtQualType>(Child, Qual);
+ }
+
+ Qualifiers Quals = parseCVQualifiers();
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ if (Quals != QualNone)
+ Ty = make<QualType>(Ty, Quals);
+ return Ty;
+}
+
+// <type> ::= <builtin-type>
+// ::= <qualified-type>
+// ::= <function-type>
+// ::= <class-enum-type>
+// ::= <array-type>
+// ::= <pointer-to-member-type>
+// ::= <template-param>
+// ::= <template-template-param> <template-args>
+// ::= <decltype>
+// ::= P <type> # pointer
+// ::= R <type> # l-value reference
+// ::= O <type> # r-value reference (C++11)
+// ::= C <type> # complex pair (C99)
+// ::= G <type> # imaginary (C99)
+// ::= <substitution> # See Compression below
+// extension ::= U <objc-name> <objc-type> # objc-type<identifier>
+// extension ::= <vector-type> # <vector-type> starts with Dv
+//
+// <objc-name> ::= <k0 number> objcproto <k1 number> <identifier> # k0 = 9 + <number of digits in k1> + k1
+// <objc-type> ::= <source-name> # PU<11+>objcproto 11objc_object<source-name> 11objc_object -> id<source-name>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseType() {
+ Node *Result = nullptr;
+
+ switch (look()) {
+ // ::= <qualified-type>
+ case 'r':
+ case 'V':
+ case 'K': {
+ unsigned AfterQuals = 0;
+ if (look(AfterQuals) == 'r') ++AfterQuals;
+ if (look(AfterQuals) == 'V') ++AfterQuals;
+ if (look(AfterQuals) == 'K') ++AfterQuals;
+
+ if (look(AfterQuals) == 'F' ||
+ (look(AfterQuals) == 'D' &&
+ (look(AfterQuals + 1) == 'o' || look(AfterQuals + 1) == 'O' ||
+ look(AfterQuals + 1) == 'w' || look(AfterQuals + 1) == 'x'))) {
+ Result = getDerived().parseFunctionType();
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ }
+ case 'U': {
+ Result = getDerived().parseQualifiedType();
+ break;
+ }
+ // <builtin-type> ::= v # void
+ case 'v':
+ ++First;
+ return make<NameType>("void");
+ // ::= w # wchar_t
+ case 'w':
+ ++First;
+ return make<NameType>("wchar_t");
+ // ::= b # bool
+ case 'b':
+ ++First;
+ return make<NameType>("bool");
+ // ::= c # char
+ case 'c':
+ ++First;
+ return make<NameType>("char");
+ // ::= a # signed char
+ case 'a':
+ ++First;
+ return make<NameType>("signed char");
+ // ::= h # unsigned char
+ case 'h':
+ ++First;
+ return make<NameType>("unsigned char");
+ // ::= s # short
+ case 's':
+ ++First;
+ return make<NameType>("short");
+ // ::= t # unsigned short
+ case 't':
+ ++First;
+ return make<NameType>("unsigned short");
+ // ::= i # int
+ case 'i':
+ ++First;
+ return make<NameType>("int");
+ // ::= j # unsigned int
+ case 'j':
+ ++First;
+ return make<NameType>("unsigned int");
+ // ::= l # long
+ case 'l':
+ ++First;
+ return make<NameType>("long");
+ // ::= m # unsigned long
+ case 'm':
+ ++First;
+ return make<NameType>("unsigned long");
+ // ::= x # long long, __int64
+ case 'x':
+ ++First;
+ return make<NameType>("long long");
+ // ::= y # unsigned long long, __int64
+ case 'y':
+ ++First;
+ return make<NameType>("unsigned long long");
+ // ::= n # __int128
+ case 'n':
+ ++First;
+ return make<NameType>("__int128");
+ // ::= o # unsigned __int128
+ case 'o':
+ ++First;
+ return make<NameType>("unsigned __int128");
+ // ::= f # float
+ case 'f':
+ ++First;
+ return make<NameType>("float");
+ // ::= d # double
+ case 'd':
+ ++First;
+ return make<NameType>("double");
+ // ::= e # long double, __float80
+ case 'e':
+ ++First;
+ return make<NameType>("long double");
+ // ::= g # __float128
+ case 'g':
+ ++First;
+ return make<NameType>("__float128");
+ // ::= z # ellipsis
+ case 'z':
+ ++First;
+ return make<NameType>("...");
+
+ // <builtin-type> ::= u <source-name> # vendor extended type
+ case 'u': {
+ ++First;
+ StringView Res = parseBareSourceName();
+ if (Res.empty())
+ return nullptr;
+ return make<NameType>(Res);
+ }
+ case 'D':
+ switch (look(1)) {
+ // ::= Dd # IEEE 754r decimal floating point (64 bits)
+ case 'd':
+ First += 2;
+ return make<NameType>("decimal64");
+ // ::= De # IEEE 754r decimal floating point (128 bits)
+ case 'e':
+ First += 2;
+ return make<NameType>("decimal128");
+ // ::= Df # IEEE 754r decimal floating point (32 bits)
+ case 'f':
+ First += 2;
+ return make<NameType>("decimal32");
+ // ::= Dh # IEEE 754r half-precision floating point (16 bits)
+ case 'h':
+ First += 2;
+ return make<NameType>("decimal16");
+ // ::= Di # char32_t
+ case 'i':
+ First += 2;
+ return make<NameType>("char32_t");
+ // ::= Ds # char16_t
+ case 's':
+ First += 2;
+ return make<NameType>("char16_t");
+ // ::= Da # auto (in dependent new-expressions)
+ case 'a':
+ First += 2;
+ return make<NameType>("auto");
+ // ::= Dc # decltype(auto)
+ case 'c':
+ First += 2;
+ return make<NameType>("decltype(auto)");
+ // ::= Dn # std::nullptr_t (i.e., decltype(nullptr))
+ case 'n':
+ First += 2;
+ return make<NameType>("std::nullptr_t");
+
+ // ::= <decltype>
+ case 't':
+ case 'T': {
+ Result = getDerived().parseDecltype();
+ break;
+ }
+ // extension ::= <vector-type> # <vector-type> starts with Dv
+ case 'v': {
+ Result = getDerived().parseVectorType();
+ break;
+ }
+ // ::= Dp <type> # pack expansion (C++0x)
+ case 'p': {
+ First += 2;
+ Node *Child = getDerived().parseType();
+ if (!Child)
+ return nullptr;
+ Result = make<ParameterPackExpansion>(Child);
+ break;
+ }
+ // Exception specifier on a function type.
+ case 'o':
+ case 'O':
+ case 'w':
+ // Transaction safe function type.
+ case 'x':
+ Result = getDerived().parseFunctionType();
+ break;
+ }
+ break;
+ // ::= <function-type>
+ case 'F': {
+ Result = getDerived().parseFunctionType();
+ break;
+ }
+ // ::= <array-type>
+ case 'A': {
+ Result = getDerived().parseArrayType();
+ break;
+ }
+ // ::= <pointer-to-member-type>
+ case 'M': {
+ Result = getDerived().parsePointerToMemberType();
+ break;
+ }
+ // ::= <template-param>
+ case 'T': {
+ // This could be an elaborate type specifier on a <class-enum-type>.
+ if (look(1) == 's' || look(1) == 'u' || look(1) == 'e') {
+ Result = getDerived().parseClassEnumType();
+ break;
+ }
+
+ Result = getDerived().parseTemplateParam();
+ if (Result == nullptr)
+ return nullptr;
+
+ // Result could be either of:
+ // <type> ::= <template-param>
+ // <type> ::= <template-template-param> <template-args>
+ //
+ // <template-template-param> ::= <template-param>
+ // ::= <substitution>
+ //
+ // If this is followed by some <template-args>, and we're permitted to
+ // parse them, take the second production.
+
+ if (TryToParseTemplateArgs && look() == 'I') {
+ Node *TA = getDerived().parseTemplateArgs();
+ if (TA == nullptr)
+ return nullptr;
+ Result = make<NameWithTemplateArgs>(Result, TA);
+ }
+ break;
+ }
+ // ::= P <type> # pointer
+ case 'P': {
+ ++First;
+ Node *Ptr = getDerived().parseType();
+ if (Ptr == nullptr)
+ return nullptr;
+ Result = make<PointerType>(Ptr);
+ break;
+ }
+ // ::= R <type> # l-value reference
+ case 'R': {
+ ++First;
+ Node *Ref = getDerived().parseType();
+ if (Ref == nullptr)
+ return nullptr;
+ Result = make<ReferenceType>(Ref, ReferenceKind::LValue);
+ break;
+ }
+ // ::= O <type> # r-value reference (C++11)
+ case 'O': {
+ ++First;
+ Node *Ref = getDerived().parseType();
+ if (Ref == nullptr)
+ return nullptr;
+ Result = make<ReferenceType>(Ref, ReferenceKind::RValue);
+ break;
+ }
+ // ::= C <type> # complex pair (C99)
+ case 'C': {
+ ++First;
+ Node *P = getDerived().parseType();
+ if (P == nullptr)
+ return nullptr;
+ Result = make<PostfixQualifiedType>(P, " complex");
+ break;
+ }
+ // ::= G <type> # imaginary (C99)
+ case 'G': {
+ ++First;
+ Node *P = getDerived().parseType();
+ if (P == nullptr)
+ return P;
+ Result = make<PostfixQualifiedType>(P, " imaginary");
+ break;
+ }
+ // ::= <substitution> # See Compression below
+ case 'S': {
+ if (look(1) && look(1) != 't') {
+ Node *Sub = getDerived().parseSubstitution();
+ if (Sub == nullptr)
+ return nullptr;
+
+ // Sub could be either of:
+ // <type> ::= <substitution>
+ // <type> ::= <template-template-param> <template-args>
+ //
+ // <template-template-param> ::= <template-param>
+ // ::= <substitution>
+ //
+ // If this is followed by some <template-args>, and we're permitted to
+ // parse them, take the second production.
+
+ if (TryToParseTemplateArgs && look() == 'I') {
+ Node *TA = getDerived().parseTemplateArgs();
+ if (TA == nullptr)
+ return nullptr;
+ Result = make<NameWithTemplateArgs>(Sub, TA);
+ break;
+ }
+
+ // If all we parsed was a substitution, don't re-insert into the
+ // substitution table.
+ return Sub;
+ }
+ LLVM_FALLTHROUGH;
+ }
+ // ::= <class-enum-type>
+ default: {
+ Result = getDerived().parseClassEnumType();
+ break;
+ }
+ }
+
+ // If we parsed a type, insert it into the substitution table. Note that all
+ // <builtin-type>s and <substitution>s have already bailed out, because they
+ // don't get substitutions.
+ if (Result != nullptr)
+ Subs.push_back(Result);
+ return Result;
+}
+
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parsePrefixExpr(StringView Kind) {
+ Node *E = getDerived().parseExpr();
+ if (E == nullptr)
+ return nullptr;
+ return make<PrefixExpr>(Kind, E);
+}
+
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseBinaryExpr(StringView Kind) {
+ Node *LHS = getDerived().parseExpr();
+ if (LHS == nullptr)
+ return nullptr;
+ Node *RHS = getDerived().parseExpr();
+ if (RHS == nullptr)
+ return nullptr;
+ return make<BinaryExpr>(LHS, Kind, RHS);
+}
+
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseIntegerLiteral(StringView Lit) {
+ StringView Tmp = parseNumber(true);
+ if (!Tmp.empty() && consumeIf('E'))
+ return make<IntegerLiteral>(Lit, Tmp);
+ return nullptr;
+}
+
+// <CV-Qualifiers> ::= [r] [V] [K]
+template <typename Alloc, typename Derived>
+Qualifiers AbstractManglingParser<Alloc, Derived>::parseCVQualifiers() {
+ Qualifiers CVR = QualNone;
+ if (consumeIf('r'))
+ CVR |= QualRestrict;
+ if (consumeIf('V'))
+ CVR |= QualVolatile;
+ if (consumeIf('K'))
+ CVR |= QualConst;
+ return CVR;
+}
+
+// <function-param> ::= fp <top-level CV-Qualifiers> _ # L == 0, first parameter
+// ::= fp <top-level CV-Qualifiers> <parameter-2 non-negative number> _ # L == 0, second and later parameters
+// ::= fL <L-1 non-negative number> p <top-level CV-Qualifiers> _ # L > 0, first parameter
+// ::= fL <L-1 non-negative number> p <top-level CV-Qualifiers> <parameter-2 non-negative number> _ # L > 0, second and later parameters
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseFunctionParam() {
+ if (consumeIf("fp")) {
+ parseCVQualifiers();
+ StringView Num = parseNumber();
+ if (!consumeIf('_'))
+ return nullptr;
+ return make<FunctionParam>(Num);
+ }
+ if (consumeIf("fL")) {
+ if (parseNumber().empty())
+ return nullptr;
+ if (!consumeIf('p'))
+ return nullptr;
+ parseCVQualifiers();
+ StringView Num = parseNumber();
+ if (!consumeIf('_'))
+ return nullptr;
+ return make<FunctionParam>(Num);
+ }
+ return nullptr;
+}
+
+// [gs] nw <expression>* _ <type> E # new (expr-list) type
+// [gs] nw <expression>* _ <type> <initializer> # new (expr-list) type (init)
+// [gs] na <expression>* _ <type> E # new[] (expr-list) type
+// [gs] na <expression>* _ <type> <initializer> # new[] (expr-list) type (init)
+// <initializer> ::= pi <expression>* E # parenthesized initialization
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseNewExpr() {
+ bool Global = consumeIf("gs");
+ bool IsArray = look(1) == 'a';
+ if (!consumeIf("nw") && !consumeIf("na"))
+ return nullptr;
+ size_t Exprs = Names.size();
+ while (!consumeIf('_')) {
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return nullptr;
+ Names.push_back(Ex);
+ }
+ NodeArray ExprList = popTrailingNodeArray(Exprs);
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return Ty;
+ if (consumeIf("pi")) {
+ size_t InitsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *Init = getDerived().parseExpr();
+ if (Init == nullptr)
+ return Init;
+ Names.push_back(Init);
+ }
+ NodeArray Inits = popTrailingNodeArray(InitsBegin);
+ return make<NewExpr>(ExprList, Ty, Inits, Global, IsArray);
+ } else if (!consumeIf('E'))
+ return nullptr;
+ return make<NewExpr>(ExprList, Ty, NodeArray(), Global, IsArray);
+}
+
+// cv <type> <expression> # conversion with one argument
+// cv <type> _ <expression>* E # conversion with a different number of arguments
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseConversionExpr() {
+ if (!consumeIf("cv"))
+ return nullptr;
+ Node *Ty;
+ {
+ SwapAndRestore<bool> SaveTemp(TryToParseTemplateArgs, false);
+ Ty = getDerived().parseType();
+ }
+
+ if (Ty == nullptr)
+ return nullptr;
+
+ if (consumeIf('_')) {
+ size_t ExprsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseExpr();
+ if (E == nullptr)
+ return E;
+ Names.push_back(E);
+ }
+ NodeArray Exprs = popTrailingNodeArray(ExprsBegin);
+ return make<ConversionExpr>(Ty, Exprs);
+ }
+
+ Node *E[1] = {getDerived().parseExpr()};
+ if (E[0] == nullptr)
+ return nullptr;
+ return make<ConversionExpr>(Ty, makeNodeArray(E, E + 1));
+}
+
+// <expr-primary> ::= L <type> <value number> E # integer literal
+// ::= L <type> <value float> E # floating literal
+// ::= L <string type> E # string literal
+// ::= L <nullptr type> E # nullptr literal (i.e., "LDnE")
+// FIXME: ::= L <type> <real-part float> _ <imag-part float> E # complex floating point literal (C 2000)
+// ::= L <mangled-name> E # external name
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseExprPrimary() {
+ if (!consumeIf('L'))
+ return nullptr;
+ switch (look()) {
+ case 'w':
+ ++First;
+ return getDerived().parseIntegerLiteral("wchar_t");
+ case 'b':
+ if (consumeIf("b0E"))
+ return make<BoolExpr>(0);
+ if (consumeIf("b1E"))
+ return make<BoolExpr>(1);
+ return nullptr;
+ case 'c':
+ ++First;
+ return getDerived().parseIntegerLiteral("char");
+ case 'a':
+ ++First;
+ return getDerived().parseIntegerLiteral("signed char");
+ case 'h':
+ ++First;
+ return getDerived().parseIntegerLiteral("unsigned char");
+ case 's':
+ ++First;
+ return getDerived().parseIntegerLiteral("short");
+ case 't':
+ ++First;
+ return getDerived().parseIntegerLiteral("unsigned short");
+ case 'i':
+ ++First;
+ return getDerived().parseIntegerLiteral("");
+ case 'j':
+ ++First;
+ return getDerived().parseIntegerLiteral("u");
+ case 'l':
+ ++First;
+ return getDerived().parseIntegerLiteral("l");
+ case 'm':
+ ++First;
+ return getDerived().parseIntegerLiteral("ul");
+ case 'x':
+ ++First;
+ return getDerived().parseIntegerLiteral("ll");
+ case 'y':
+ ++First;
+ return getDerived().parseIntegerLiteral("ull");
+ case 'n':
+ ++First;
+ return getDerived().parseIntegerLiteral("__int128");
+ case 'o':
+ ++First;
+ return getDerived().parseIntegerLiteral("unsigned __int128");
+ case 'f':
+ ++First;
+ return getDerived().template parseFloatingLiteral<float>();
+ case 'd':
+ ++First;
+ return getDerived().template parseFloatingLiteral<double>();
+ case 'e':
+ ++First;
+ return getDerived().template parseFloatingLiteral<long double>();
+ case '_':
+ if (consumeIf("_Z")) {
+ Node *R = getDerived().parseEncoding();
+ if (R != nullptr && consumeIf('E'))
+ return R;
+ }
+ return nullptr;
+ case 'T':
+ // Invalid mangled name per
+ // http://sourcerytools.com/pipermail/cxx-abi-dev/2011-August/002422.html
+ return nullptr;
+ default: {
+ // might be named type
+ Node *T = getDerived().parseType();
+ if (T == nullptr)
+ return nullptr;
+ StringView N = parseNumber();
+ if (!N.empty()) {
+ if (!consumeIf('E'))
+ return nullptr;
+ return make<IntegerCastExpr>(T, N);
+ }
+ if (consumeIf('E'))
+ return T;
+ return nullptr;
+ }
+ }
+}
+
+// <braced-expression> ::= <expression>
+// ::= di <field source-name> <braced-expression> # .name = expr
+// ::= dx <index expression> <braced-expression> # [expr] = expr
+// ::= dX <range begin expression> <range end expression> <braced-expression>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseBracedExpr() {
+ if (look() == 'd') {
+ switch (look(1)) {
+ case 'i': {
+ First += 2;
+ Node *Field = getDerived().parseSourceName(/*NameState=*/nullptr);
+ if (Field == nullptr)
+ return nullptr;
+ Node *Init = getDerived().parseBracedExpr();
+ if (Init == nullptr)
+ return nullptr;
+ return make<BracedExpr>(Field, Init, /*isArray=*/false);
+ }
+ case 'x': {
+ First += 2;
+ Node *Index = getDerived().parseExpr();
+ if (Index == nullptr)
+ return nullptr;
+ Node *Init = getDerived().parseBracedExpr();
+ if (Init == nullptr)
+ return nullptr;
+ return make<BracedExpr>(Index, Init, /*isArray=*/true);
+ }
+ case 'X': {
+ First += 2;
+ Node *RangeBegin = getDerived().parseExpr();
+ if (RangeBegin == nullptr)
+ return nullptr;
+ Node *RangeEnd = getDerived().parseExpr();
+ if (RangeEnd == nullptr)
+ return nullptr;
+ Node *Init = getDerived().parseBracedExpr();
+ if (Init == nullptr)
+ return nullptr;
+ return make<BracedRangeExpr>(RangeBegin, RangeEnd, Init);
+ }
+ }
+ }
+ return getDerived().parseExpr();
+}
+
+// (not yet in the spec)
+// <fold-expr> ::= fL <binary-operator-name> <expression> <expression>
+// ::= fR <binary-operator-name> <expression> <expression>
+// ::= fl <binary-operator-name> <expression>
+// ::= fr <binary-operator-name> <expression>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseFoldExpr() {
+ if (!consumeIf('f'))
+ return nullptr;
+
+ char FoldKind = look();
+ bool IsLeftFold, HasInitializer;
+ HasInitializer = FoldKind == 'L' || FoldKind == 'R';
+ if (FoldKind == 'l' || FoldKind == 'L')
+ IsLeftFold = true;
+ else if (FoldKind == 'r' || FoldKind == 'R')
+ IsLeftFold = false;
+ else
+ return nullptr;
+ ++First;
+
+ // FIXME: This map is duplicated in parseOperatorName and parseExpr.
+ StringView OperatorName;
+ if (consumeIf("aa")) OperatorName = "&&";
+ else if (consumeIf("an")) OperatorName = "&";
+ else if (consumeIf("aN")) OperatorName = "&=";
+ else if (consumeIf("aS")) OperatorName = "=";
+ else if (consumeIf("cm")) OperatorName = ",";
+ else if (consumeIf("ds")) OperatorName = ".*";
+ else if (consumeIf("dv")) OperatorName = "/";
+ else if (consumeIf("dV")) OperatorName = "/=";
+ else if (consumeIf("eo")) OperatorName = "^";
+ else if (consumeIf("eO")) OperatorName = "^=";
+ else if (consumeIf("eq")) OperatorName = "==";
+ else if (consumeIf("ge")) OperatorName = ">=";
+ else if (consumeIf("gt")) OperatorName = ">";
+ else if (consumeIf("le")) OperatorName = "<=";
+ else if (consumeIf("ls")) OperatorName = "<<";
+ else if (consumeIf("lS")) OperatorName = "<<=";
+ else if (consumeIf("lt")) OperatorName = "<";
+ else if (consumeIf("mi")) OperatorName = "-";
+ else if (consumeIf("mI")) OperatorName = "-=";
+ else if (consumeIf("ml")) OperatorName = "*";
+ else if (consumeIf("mL")) OperatorName = "*=";
+ else if (consumeIf("ne")) OperatorName = "!=";
+ else if (consumeIf("oo")) OperatorName = "||";
+ else if (consumeIf("or")) OperatorName = "|";
+ else if (consumeIf("oR")) OperatorName = "|=";
+ else if (consumeIf("pl")) OperatorName = "+";
+ else if (consumeIf("pL")) OperatorName = "+=";
+ else if (consumeIf("rm")) OperatorName = "%";
+ else if (consumeIf("rM")) OperatorName = "%=";
+ else if (consumeIf("rs")) OperatorName = ">>";
+ else if (consumeIf("rS")) OperatorName = ">>=";
+ else return nullptr;
+
+ Node *Pack = getDerived().parseExpr(), *Init = nullptr;
+ if (Pack == nullptr)
+ return nullptr;
+ if (HasInitializer) {
+ Init = getDerived().parseExpr();
+ if (Init == nullptr)
+ return nullptr;
+ }
+
+ if (IsLeftFold && Init)
+ std::swap(Pack, Init);
+
+ return make<FoldExpr>(IsLeftFold, OperatorName, Pack, Init);
+}
+
+// <expression> ::= <unary operator-name> <expression>
+// ::= <binary operator-name> <expression> <expression>
+// ::= <ternary operator-name> <expression> <expression> <expression>
+// ::= cl <expression>+ E # call
+// ::= cv <type> <expression> # conversion with one argument
+// ::= cv <type> _ <expression>* E # conversion with a different number of arguments
+// ::= [gs] nw <expression>* _ <type> E # new (expr-list) type
+// ::= [gs] nw <expression>* _ <type> <initializer> # new (expr-list) type (init)
+// ::= [gs] na <expression>* _ <type> E # new[] (expr-list) type
+// ::= [gs] na <expression>* _ <type> <initializer> # new[] (expr-list) type (init)
+// ::= [gs] dl <expression> # delete expression
+// ::= [gs] da <expression> # delete[] expression
+// ::= pp_ <expression> # prefix ++
+// ::= mm_ <expression> # prefix --
+// ::= ti <type> # typeid (type)
+// ::= te <expression> # typeid (expression)
+// ::= dc <type> <expression> # dynamic_cast<type> (expression)
+// ::= sc <type> <expression> # static_cast<type> (expression)
+// ::= cc <type> <expression> # const_cast<type> (expression)
+// ::= rc <type> <expression> # reinterpret_cast<type> (expression)
+// ::= st <type> # sizeof (a type)
+// ::= sz <expression> # sizeof (an expression)
+// ::= at <type> # alignof (a type)
+// ::= az <expression> # alignof (an expression)
+// ::= nx <expression> # noexcept (expression)
+// ::= <template-param>
+// ::= <function-param>
+// ::= dt <expression> <unresolved-name> # expr.name
+// ::= pt <expression> <unresolved-name> # expr->name
+// ::= ds <expression> <expression> # expr.*expr
+// ::= sZ <template-param> # size of a parameter pack
+// ::= sZ <function-param> # size of a function parameter pack
+// ::= sP <template-arg>* E # sizeof...(T), size of a captured template parameter pack from an alias template
+// ::= sp <expression> # pack expansion
+// ::= tw <expression> # throw expression
+// ::= tr # throw with no operand (rethrow)
+// ::= <unresolved-name> # f(p), N::f(p), ::f(p),
+// # freestanding dependent name (e.g., T::x),
+// # objectless nonstatic member reference
+// ::= fL <binary-operator-name> <expression> <expression>
+// ::= fR <binary-operator-name> <expression> <expression>
+// ::= fl <binary-operator-name> <expression>
+// ::= fr <binary-operator-name> <expression>
+// ::= <expr-primary>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
+ bool Global = consumeIf("gs");
+ if (numLeft() < 2)
+ return nullptr;
+
+ switch (*First) {
+ case 'L':
+ return getDerived().parseExprPrimary();
+ case 'T':
+ return getDerived().parseTemplateParam();
+ case 'f': {
+ // Disambiguate a fold expression from a <function-param>.
+ if (look(1) == 'p' || (look(1) == 'L' && std::isdigit(look(2))))
+ return getDerived().parseFunctionParam();
+ return getDerived().parseFoldExpr();
+ }
+ case 'a':
+ switch (First[1]) {
+ case 'a':
+ First += 2;
+ return getDerived().parseBinaryExpr("&&");
+ case 'd':
+ First += 2;
+ return getDerived().parsePrefixExpr("&");
+ case 'n':
+ First += 2;
+ return getDerived().parseBinaryExpr("&");
+ case 'N':
+ First += 2;
+ return getDerived().parseBinaryExpr("&=");
+ case 'S':
+ First += 2;
+ return getDerived().parseBinaryExpr("=");
+ case 't': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ return make<EnclosingExpr>("alignof (", Ty, ")");
+ }
+ case 'z': {
+ First += 2;
+ Node *Ty = getDerived().parseExpr();
+ if (Ty == nullptr)
+ return nullptr;
+ return make<EnclosingExpr>("alignof (", Ty, ")");
+ }
+ }
+ return nullptr;
+ case 'c':
+ switch (First[1]) {
+ // cc <type> <expression> # const_cast<type>(expression)
+ case 'c': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return Ty;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<CastExpr>("const_cast", Ty, Ex);
+ }
+ // cl <expression>+ E # call
+ case 'l': {
+ First += 2;
+ Node *Callee = getDerived().parseExpr();
+ if (Callee == nullptr)
+ return Callee;
+ size_t ExprsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseExpr();
+ if (E == nullptr)
+ return E;
+ Names.push_back(E);
+ }
+ return make<CallExpr>(Callee, popTrailingNodeArray(ExprsBegin));
+ }
+ case 'm':
+ First += 2;
+ return getDerived().parseBinaryExpr(",");
+ case 'o':
+ First += 2;
+ return getDerived().parsePrefixExpr("~");
+ case 'v':
+ return getDerived().parseConversionExpr();
+ }
+ return nullptr;
+ case 'd':
+ switch (First[1]) {
+ case 'a': {
+ First += 2;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<DeleteExpr>(Ex, Global, /*is_array=*/true);
+ }
+ case 'c': {
+ First += 2;
+ Node *T = getDerived().parseType();
+ if (T == nullptr)
+ return T;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<CastExpr>("dynamic_cast", T, Ex);
+ }
+ case 'e':
+ First += 2;
+ return getDerived().parsePrefixExpr("*");
+ case 'l': {
+ First += 2;
+ Node *E = getDerived().parseExpr();
+ if (E == nullptr)
+ return E;
+ return make<DeleteExpr>(E, Global, /*is_array=*/false);
+ }
+ case 'n':
+ return getDerived().parseUnresolvedName();
+ case 's': {
+ First += 2;
+ Node *LHS = getDerived().parseExpr();
+ if (LHS == nullptr)
+ return nullptr;
+ Node *RHS = getDerived().parseExpr();
+ if (RHS == nullptr)
+ return nullptr;
+ return make<MemberExpr>(LHS, ".*", RHS);
+ }
+ case 't': {
+ First += 2;
+ Node *LHS = getDerived().parseExpr();
+ if (LHS == nullptr)
+ return LHS;
+ Node *RHS = getDerived().parseExpr();
+ if (RHS == nullptr)
+ return nullptr;
+ return make<MemberExpr>(LHS, ".", RHS);
+ }
+ case 'v':
+ First += 2;
+ return getDerived().parseBinaryExpr("/");
+ case 'V':
+ First += 2;
+ return getDerived().parseBinaryExpr("/=");
+ }
+ return nullptr;
+ case 'e':
+ switch (First[1]) {
+ case 'o':
+ First += 2;
+ return getDerived().parseBinaryExpr("^");
+ case 'O':
+ First += 2;
+ return getDerived().parseBinaryExpr("^=");
+ case 'q':
+ First += 2;
+ return getDerived().parseBinaryExpr("==");
+ }
+ return nullptr;
+ case 'g':
+ switch (First[1]) {
+ case 'e':
+ First += 2;
+ return getDerived().parseBinaryExpr(">=");
+ case 't':
+ First += 2;
+ return getDerived().parseBinaryExpr(">");
+ }
+ return nullptr;
+ case 'i':
+ switch (First[1]) {
+ case 'x': {
+ First += 2;
+ Node *Base = getDerived().parseExpr();
+ if (Base == nullptr)
+ return nullptr;
+ Node *Index = getDerived().parseExpr();
+ if (Index == nullptr)
+ return Index;
+ return make<ArraySubscriptExpr>(Base, Index);
+ }
+ case 'l': {
+ First += 2;
+ size_t InitsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseBracedExpr();
+ if (E == nullptr)
+ return nullptr;
+ Names.push_back(E);
+ }
+ return make<InitListExpr>(nullptr, popTrailingNodeArray(InitsBegin));
+ }
+ }
+ return nullptr;
+ case 'l':
+ switch (First[1]) {
+ case 'e':
+ First += 2;
+ return getDerived().parseBinaryExpr("<=");
+ case 's':
+ First += 2;
+ return getDerived().parseBinaryExpr("<<");
+ case 'S':
+ First += 2;
+ return getDerived().parseBinaryExpr("<<=");
+ case 't':
+ First += 2;
+ return getDerived().parseBinaryExpr("<");
+ }
+ return nullptr;
+ case 'm':
+ switch (First[1]) {
+ case 'i':
+ First += 2;
+ return getDerived().parseBinaryExpr("-");
+ case 'I':
+ First += 2;
+ return getDerived().parseBinaryExpr("-=");
+ case 'l':
+ First += 2;
+ return getDerived().parseBinaryExpr("*");
+ case 'L':
+ First += 2;
+ return getDerived().parseBinaryExpr("*=");
+ case 'm':
+ First += 2;
+ if (consumeIf('_'))
+ return getDerived().parsePrefixExpr("--");
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return nullptr;
+ return make<PostfixExpr>(Ex, "--");
+ }
+ return nullptr;
+ case 'n':
+ switch (First[1]) {
+ case 'a':
+ case 'w':
+ return getDerived().parseNewExpr();
+ case 'e':
+ First += 2;
+ return getDerived().parseBinaryExpr("!=");
+ case 'g':
+ First += 2;
+ return getDerived().parsePrefixExpr("-");
+ case 't':
+ First += 2;
+ return getDerived().parsePrefixExpr("!");
+ case 'x':
+ First += 2;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<EnclosingExpr>("noexcept (", Ex, ")");
+ }
+ return nullptr;
+ case 'o':
+ switch (First[1]) {
+ case 'n':
+ return getDerived().parseUnresolvedName();
+ case 'o':
+ First += 2;
+ return getDerived().parseBinaryExpr("||");
+ case 'r':
+ First += 2;
+ return getDerived().parseBinaryExpr("|");
+ case 'R':
+ First += 2;
+ return getDerived().parseBinaryExpr("|=");
+ }
+ return nullptr;
+ case 'p':
+ switch (First[1]) {
+ case 'm':
+ First += 2;
+ return getDerived().parseBinaryExpr("->*");
+ case 'l':
+ First += 2;
+ return getDerived().parseBinaryExpr("+");
+ case 'L':
+ First += 2;
+ return getDerived().parseBinaryExpr("+=");
+ case 'p': {
+ First += 2;
+ if (consumeIf('_'))
+ return getDerived().parsePrefixExpr("++");
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<PostfixExpr>(Ex, "++");
+ }
+ case 's':
+ First += 2;
+ return getDerived().parsePrefixExpr("+");
+ case 't': {
+ First += 2;
+ Node *L = getDerived().parseExpr();
+ if (L == nullptr)
+ return nullptr;
+ Node *R = getDerived().parseExpr();
+ if (R == nullptr)
+ return nullptr;
+ return make<MemberExpr>(L, "->", R);
+ }
+ }
+ return nullptr;
+ case 'q':
+ if (First[1] == 'u') {
+ First += 2;
+ Node *Cond = getDerived().parseExpr();
+ if (Cond == nullptr)
+ return nullptr;
+ Node *LHS = getDerived().parseExpr();
+ if (LHS == nullptr)
+ return nullptr;
+ Node *RHS = getDerived().parseExpr();
+ if (RHS == nullptr)
+ return nullptr;
+ return make<ConditionalExpr>(Cond, LHS, RHS);
+ }
+ return nullptr;
+ case 'r':
+ switch (First[1]) {
+ case 'c': {
+ First += 2;
+ Node *T = getDerived().parseType();
+ if (T == nullptr)
+ return T;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<CastExpr>("reinterpret_cast", T, Ex);
+ }
+ case 'm':
+ First += 2;
+ return getDerived().parseBinaryExpr("%");
+ case 'M':
+ First += 2;
+ return getDerived().parseBinaryExpr("%=");
+ case 's':
+ First += 2;
+ return getDerived().parseBinaryExpr(">>");
+ case 'S':
+ First += 2;
+ return getDerived().parseBinaryExpr(">>=");
+ }
+ return nullptr;
+ case 's':
+ switch (First[1]) {
+ case 'c': {
+ First += 2;
+ Node *T = getDerived().parseType();
+ if (T == nullptr)
+ return T;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<CastExpr>("static_cast", T, Ex);
+ }
+ case 'p': {
+ First += 2;
+ Node *Child = getDerived().parseExpr();
+ if (Child == nullptr)
+ return nullptr;
+ return make<ParameterPackExpansion>(Child);
+ }
+ case 'r':
+ return getDerived().parseUnresolvedName();
+ case 't': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return Ty;
+ return make<EnclosingExpr>("sizeof (", Ty, ")");
+ }
+ case 'z': {
+ First += 2;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<EnclosingExpr>("sizeof (", Ex, ")");
+ }
+ case 'Z':
+ First += 2;
+ if (look() == 'T') {
+ Node *R = getDerived().parseTemplateParam();
+ if (R == nullptr)
+ return nullptr;
+ return make<SizeofParamPackExpr>(R);
+ } else if (look() == 'f') {
+ Node *FP = getDerived().parseFunctionParam();
+ if (FP == nullptr)
+ return nullptr;
+ return make<EnclosingExpr>("sizeof... (", FP, ")");
+ }
+ return nullptr;
+ case 'P': {
+ First += 2;
+ size_t ArgsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *Arg = getDerived().parseTemplateArg();
+ if (Arg == nullptr)
+ return nullptr;
+ Names.push_back(Arg);
+ }
+ auto *Pack = make<NodeArrayNode>(popTrailingNodeArray(ArgsBegin));
+ if (!Pack)
+ return nullptr;
+ return make<EnclosingExpr>("sizeof... (", Pack, ")");
+ }
+ }
+ return nullptr;
+ case 't':
+ switch (First[1]) {
+ case 'e': {
+ First += 2;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<EnclosingExpr>("typeid (", Ex, ")");
+ }
+ case 'i': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return Ty;
+ return make<EnclosingExpr>("typeid (", Ty, ")");
+ }
+ case 'l': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ size_t InitsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseBracedExpr();
+ if (E == nullptr)
+ return nullptr;
+ Names.push_back(E);
+ }
+ return make<InitListExpr>(Ty, popTrailingNodeArray(InitsBegin));
+ }
+ case 'r':
+ First += 2;
+ return make<NameType>("throw");
+ case 'w': {
+ First += 2;
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return nullptr;
+ return make<ThrowExpr>(Ex);
+ }
+ }
+ return nullptr;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return getDerived().parseUnresolvedName();
+ }
+ return nullptr;
+}
+
+// <call-offset> ::= h <nv-offset> _
+// ::= v <v-offset> _
+//
+// <nv-offset> ::= <offset number>
+// # non-virtual base override
+//
+// <v-offset> ::= <offset number> _ <virtual offset number>
+// # virtual base override, with vcall offset
+template <typename Alloc, typename Derived>
+bool AbstractManglingParser<Alloc, Derived>::parseCallOffset() {
+ // Just scan through the call offset, we never add this information into the
+ // output.
+ if (consumeIf('h'))
+ return parseNumber(true).empty() || !consumeIf('_');
+ if (consumeIf('v'))
+ return parseNumber(true).empty() || !consumeIf('_') ||
+ parseNumber(true).empty() || !consumeIf('_');
+ return true;
+}
+
+// <special-name> ::= TV <type> # virtual table
+// ::= TT <type> # VTT structure (construction vtable index)
+// ::= TI <type> # typeinfo structure
+// ::= TS <type> # typeinfo name (null-terminated byte string)
+// ::= Tc <call-offset> <call-offset> <base encoding>
+// # base is the nominal target function of thunk
+// # first call-offset is 'this' adjustment
+// # second call-offset is result adjustment
+// ::= T <call-offset> <base encoding>
+// # base is the nominal target function of thunk
+// ::= GV <object name> # Guard variable for one-time initialization
+// # No <type>
+// ::= TW <object name> # Thread-local wrapper
+// ::= TH <object name> # Thread-local initialization
+// ::= GR <object name> _ # First temporary
+// ::= GR <object name> <seq-id> _ # Subsequent temporaries
+// extension ::= TC <first type> <number> _ <second type> # construction vtable for second-in-first
+// extension ::= GR <object name> # reference temporary for object
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSpecialName() {
+ switch (look()) {
+ case 'T':
+ switch (look(1)) {
+ // TV <type> # virtual table
+ case 'V': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ return make<SpecialName>("vtable for ", Ty);
+ }
+ // TT <type> # VTT structure (construction vtable index)
+ case 'T': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ return make<SpecialName>("VTT for ", Ty);
+ }
+ // TI <type> # typeinfo structure
+ case 'I': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ return make<SpecialName>("typeinfo for ", Ty);
+ }
+ // TS <type> # typeinfo name (null-terminated byte string)
+ case 'S': {
+ First += 2;
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ return make<SpecialName>("typeinfo name for ", Ty);
+ }
+ // Tc <call-offset> <call-offset> <base encoding>
+ case 'c': {
+ First += 2;
+ if (parseCallOffset() || parseCallOffset())
+ return nullptr;
+ Node *Encoding = getDerived().parseEncoding();
+ if (Encoding == nullptr)
+ return nullptr;
+ return make<SpecialName>("covariant return thunk to ", Encoding);
+ }
+ // extension ::= TC <first type> <number> _ <second type>
+ // # construction vtable for second-in-first
+ case 'C': {
+ First += 2;
+ Node *FirstType = getDerived().parseType();
+ if (FirstType == nullptr)
+ return nullptr;
+ if (parseNumber(true).empty() || !consumeIf('_'))
+ return nullptr;
+ Node *SecondType = getDerived().parseType();
+ if (SecondType == nullptr)
+ return nullptr;
+ return make<CtorVtableSpecialName>(SecondType, FirstType);
+ }
+ // TW <object name> # Thread-local wrapper
+ case 'W': {
+ First += 2;
+ Node *Name = getDerived().parseName();
+ if (Name == nullptr)
+ return nullptr;
+ return make<SpecialName>("thread-local wrapper routine for ", Name);
+ }
+ // TH <object name> # Thread-local initialization
+ case 'H': {
+ First += 2;
+ Node *Name = getDerived().parseName();
+ if (Name == nullptr)
+ return nullptr;
+ return make<SpecialName>("thread-local initialization routine for ", Name);
+ }
+ // T <call-offset> <base encoding>
+ default: {
+ ++First;
+ bool IsVirt = look() == 'v';
+ if (parseCallOffset())
+ return nullptr;
+ Node *BaseEncoding = getDerived().parseEncoding();
+ if (BaseEncoding == nullptr)
+ return nullptr;
+ if (IsVirt)
+ return make<SpecialName>("virtual thunk to ", BaseEncoding);
+ else
+ return make<SpecialName>("non-virtual thunk to ", BaseEncoding);
+ }
+ }
+ case 'G':
+ switch (look(1)) {
+ // GV <object name> # Guard variable for one-time initialization
+ case 'V': {
+ First += 2;
+ Node *Name = getDerived().parseName();
+ if (Name == nullptr)
+ return nullptr;
+ return make<SpecialName>("guard variable for ", Name);
+ }
+ // GR <object name> # reference temporary for object
+ // GR <object name> _ # First temporary
+ // GR <object name> <seq-id> _ # Subsequent temporaries
+ case 'R': {
+ First += 2;
+ Node *Name = getDerived().parseName();
+ if (Name == nullptr)
+ return nullptr;
+ size_t Count;
+ bool ParsedSeqId = !parseSeqId(&Count);
+ if (!consumeIf('_') && ParsedSeqId)
+ return nullptr;
+ return make<SpecialName>("reference temporary for ", Name);
+ }
+ }
+ }
+ return nullptr;
+}
+
+// <encoding> ::= <function name> <bare-function-type>
+// ::= <data name>
+// ::= <special-name>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseEncoding() {
+ if (look() == 'G' || look() == 'T')
+ return getDerived().parseSpecialName();
+
+ auto IsEndOfEncoding = [&] {
+ // The set of chars that can potentially follow an <encoding> (none of which
+ // can start a <type>). Enumerating these allows us to avoid speculative
+ // parsing.
+ return numLeft() == 0 || look() == 'E' || look() == '.' || look() == '_';
+ };
+
+ NameState NameInfo(this);
+ Node *Name = getDerived().parseName(&NameInfo);
+ if (Name == nullptr)
+ return nullptr;
+
+ if (resolveForwardTemplateRefs(NameInfo))
+ return nullptr;
+
+ if (IsEndOfEncoding())
+ return Name;
+
+ Node *Attrs = nullptr;
+ if (consumeIf("Ua9enable_ifI")) {
+ size_t BeforeArgs = Names.size();
+ while (!consumeIf('E')) {
+ Node *Arg = getDerived().parseTemplateArg();
+ if (Arg == nullptr)
+ return nullptr;
+ Names.push_back(Arg);
+ }
+ Attrs = make<EnableIfAttr>(popTrailingNodeArray(BeforeArgs));
+ if (!Attrs)
+ return nullptr;
+ }
+
+ Node *ReturnType = nullptr;
+ if (!NameInfo.CtorDtorConversion && NameInfo.EndsWithTemplateArgs) {
+ ReturnType = getDerived().parseType();
+ if (ReturnType == nullptr)
+ return nullptr;
+ }
+
+ if (consumeIf('v'))
+ return make<FunctionEncoding>(ReturnType, Name, NodeArray(),
+ Attrs, NameInfo.CVQualifiers,
+ NameInfo.ReferenceQualifier);
+
+ size_t ParamsBegin = Names.size();
+ do {
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ Names.push_back(Ty);
+ } while (!IsEndOfEncoding());
+
+ return make<FunctionEncoding>(ReturnType, Name,
+ popTrailingNodeArray(ParamsBegin),
+ Attrs, NameInfo.CVQualifiers,
+ NameInfo.ReferenceQualifier);
+}
+
+template <class Float>
+struct FloatData;
+
+template <>
+struct FloatData<float>
+{
+ static const size_t mangled_size = 8;
+ static const size_t max_demangled_size = 24;
+ static constexpr const char* spec = "%af";
+};
+
+template <>
+struct FloatData<double>
+{
+ static const size_t mangled_size = 16;
+ static const size_t max_demangled_size = 32;
+ static constexpr const char* spec = "%a";
+};
+
+template <>
+struct FloatData<long double>
+{
+#if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \
+ defined(__wasm__)
+ static const size_t mangled_size = 32;
+#elif defined(__arm__) || defined(__mips__) || defined(__hexagon__)
+ static const size_t mangled_size = 16;
+#else
+ static const size_t mangled_size = 20; // May need to be adjusted to 16 or 24 on other platforms
+#endif
+ static const size_t max_demangled_size = 40;
+ static constexpr const char *spec = "%LaL";
+};
+
+template <typename Alloc, typename Derived>
+template <class Float>
+Node *AbstractManglingParser<Alloc, Derived>::parseFloatingLiteral() {
+ const size_t N = FloatData<Float>::mangled_size;
+ if (numLeft() <= N)
+ return nullptr;
+ StringView Data(First, First + N);
+ for (char C : Data)
+ if (!std::isxdigit(C))
+ return nullptr;
+ First += N;
+ if (!consumeIf('E'))
+ return nullptr;
+ return make<FloatLiteralImpl<Float>>(Data);
+}
+
+// <seq-id> ::= <0-9A-Z>+
+template <typename Alloc, typename Derived>
+bool AbstractManglingParser<Alloc, Derived>::parseSeqId(size_t *Out) {
+ if (!(look() >= '0' && look() <= '9') &&
+ !(look() >= 'A' && look() <= 'Z'))
+ return true;
+
+ size_t Id = 0;
+ while (true) {
+ if (look() >= '0' && look() <= '9') {
+ Id *= 36;
+ Id += static_cast<size_t>(look() - '0');
+ } else if (look() >= 'A' && look() <= 'Z') {
+ Id *= 36;
+ Id += static_cast<size_t>(look() - 'A') + 10;
+ } else {
+ *Out = Id;
+ return false;
+ }
+ ++First;
+ }
+}
+
+// <substitution> ::= S <seq-id> _
+// ::= S_
+// <substitution> ::= Sa # ::std::allocator
+// <substitution> ::= Sb # ::std::basic_string
+// <substitution> ::= Ss # ::std::basic_string < char,
+// ::std::char_traits<char>,
+// ::std::allocator<char> >
+// <substitution> ::= Si # ::std::basic_istream<char, std::char_traits<char> >
+// <substitution> ::= So # ::std::basic_ostream<char, std::char_traits<char> >
+// <substitution> ::= Sd # ::std::basic_iostream<char, std::char_traits<char> >
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSubstitution() {
+ if (!consumeIf('S'))
+ return nullptr;
+
+ if (std::islower(look())) {
+ Node *SpecialSub;
+ switch (look()) {
+ case 'a':
+ ++First;
+ SpecialSub = make<SpecialSubstitution>(SpecialSubKind::allocator);
+ break;
+ case 'b':
+ ++First;
+ SpecialSub = make<SpecialSubstitution>(SpecialSubKind::basic_string);
+ break;
+ case 's':
+ ++First;
+ SpecialSub = make<SpecialSubstitution>(SpecialSubKind::string);
+ break;
+ case 'i':
+ ++First;
+ SpecialSub = make<SpecialSubstitution>(SpecialSubKind::istream);
+ break;
+ case 'o':
+ ++First;
+ SpecialSub = make<SpecialSubstitution>(SpecialSubKind::ostream);
+ break;
+ case 'd':
+ ++First;
+ SpecialSub = make<SpecialSubstitution>(SpecialSubKind::iostream);
+ break;
+ default:
+ return nullptr;
+ }
+ if (!SpecialSub)
+ return nullptr;
+ // Itanium C++ ABI 5.1.2: If a name that would use a built-in <substitution>
+ // has ABI tags, the tags are appended to the substitution; the result is a
+ // substitutable component.
+ Node *WithTags = getDerived().parseAbiTags(SpecialSub);
+ if (WithTags != SpecialSub) {
+ Subs.push_back(WithTags);
+ SpecialSub = WithTags;
+ }
+ return SpecialSub;
+ }
+
+ // ::= S_
+ if (consumeIf('_')) {
+ if (Subs.empty())
+ return nullptr;
+ return Subs[0];
+ }
+
+ // ::= S <seq-id> _
+ size_t Index = 0;
+ if (parseSeqId(&Index))
+ return nullptr;
+ ++Index;
+ if (!consumeIf('_') || Index >= Subs.size())
+ return nullptr;
+ return Subs[Index];
+}
+
+// <template-param> ::= T_ # first template parameter
+// ::= T <parameter-2 non-negative number> _
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseTemplateParam() {
+ if (!consumeIf('T'))
+ return nullptr;
+
+ size_t Index = 0;
+ if (!consumeIf('_')) {
+ if (parsePositiveInteger(&Index))
+ return nullptr;
+ ++Index;
+ if (!consumeIf('_'))
+ return nullptr;
+ }
+
+ // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter list
+ // are mangled as the corresponding artificial template type parameter.
+ if (ParsingLambdaParams)
+ return make<NameType>("auto");
+
+ // If we're in a context where this <template-param> refers to a
+ // <template-arg> further ahead in the mangled name (currently just conversion
+ // operator types), then we should only look it up in the right context.
+ if (PermitForwardTemplateReferences) {
+ Node *ForwardRef = make<ForwardTemplateReference>(Index);
+ if (!ForwardRef)
+ return nullptr;
+ assert(ForwardRef->getKind() == Node::KForwardTemplateReference);
+ ForwardTemplateRefs.push_back(
+ static_cast<ForwardTemplateReference *>(ForwardRef));
+ return ForwardRef;
+ }
+
+ if (Index >= TemplateParams.size())
+ return nullptr;
+ return TemplateParams[Index];
+}
+
+// <template-arg> ::= <type> # type or template
+// ::= X <expression> E # expression
+// ::= <expr-primary> # simple expressions
+// ::= J <template-arg>* E # argument pack
+// ::= LZ <encoding> E # extension
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseTemplateArg() {
+ switch (look()) {
+ case 'X': {
+ ++First;
+ Node *Arg = getDerived().parseExpr();
+ if (Arg == nullptr || !consumeIf('E'))
+ return nullptr;
+ return Arg;
+ }
+ case 'J': {
+ ++First;
+ size_t ArgsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *Arg = getDerived().parseTemplateArg();
+ if (Arg == nullptr)
+ return nullptr;
+ Names.push_back(Arg);
+ }
+ NodeArray Args = popTrailingNodeArray(ArgsBegin);
+ return make<TemplateArgumentPack>(Args);
+ }
+ case 'L': {
+ // ::= LZ <encoding> E # extension
+ if (look(1) == 'Z') {
+ First += 2;
+ Node *Arg = getDerived().parseEncoding();
+ if (Arg == nullptr || !consumeIf('E'))
+ return nullptr;
+ return Arg;
+ }
+ // ::= <expr-primary> # simple expressions
+ return getDerived().parseExprPrimary();
+ }
+ default:
+ return getDerived().parseType();
+ }
+}
+
+// <template-args> ::= I <template-arg>* E
+// extension, the abi says <template-arg>+
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseTemplateArgs(bool TagTemplates) {
+ if (!consumeIf('I'))
+ return nullptr;
+
+ // <template-params> refer to the innermost <template-args>. Clear out any
+ // outer args that we may have inserted into TemplateParams.
+ if (TagTemplates)
+ TemplateParams.clear();
+
+ size_t ArgsBegin = Names.size();
+ while (!consumeIf('E')) {
+ if (TagTemplates) {
+ auto OldParams = std::move(TemplateParams);
+ Node *Arg = getDerived().parseTemplateArg();
+ TemplateParams = std::move(OldParams);
+ if (Arg == nullptr)
+ return nullptr;
+ Names.push_back(Arg);
+ Node *TableEntry = Arg;
+ if (Arg->getKind() == Node::KTemplateArgumentPack) {
+ TableEntry = make<ParameterPack>(
+ static_cast<TemplateArgumentPack*>(TableEntry)->getElements());
+ if (!TableEntry)
+ return nullptr;
+ }
+ TemplateParams.push_back(TableEntry);
+ } else {
+ Node *Arg = getDerived().parseTemplateArg();
+ if (Arg == nullptr)
+ return nullptr;
+ Names.push_back(Arg);
+ }
+ }
+ return make<TemplateArgs>(popTrailingNodeArray(ArgsBegin));
+}
+
+// <mangled-name> ::= _Z <encoding>
+// ::= <type>
+// extension ::= ___Z <encoding> _block_invoke
+// extension ::= ___Z <encoding> _block_invoke<decimal-digit>+
+// extension ::= ___Z <encoding> _block_invoke_<decimal-digit>+
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parse() {
+ if (consumeIf("_Z")) {
+ Node *Encoding = getDerived().parseEncoding();
+ if (Encoding == nullptr)
+ return nullptr;
+ if (look() == '.') {
+ Encoding = make<DotSuffix>(Encoding, StringView(First, Last));
+ First = Last;
+ }
+ if (numLeft() != 0)
+ return nullptr;
+ return Encoding;
+ }
+
+ if (consumeIf("___Z")) {
+ Node *Encoding = getDerived().parseEncoding();
+ if (Encoding == nullptr || !consumeIf("_block_invoke"))
+ return nullptr;
+ bool RequireNumber = consumeIf('_');
+ if (parseNumber().empty() && RequireNumber)
+ return nullptr;
+ if (look() == '.')
+ First = Last;
+ if (numLeft() != 0)
+ return nullptr;
+ return make<SpecialName>("invocation function for block in ", Encoding);
+ }
+
+ Node *Ty = getDerived().parseType();
+ if (numLeft() != 0)
+ return nullptr;
+ return Ty;
+}
+
+template <typename Alloc>
+struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> {
+ using AbstractManglingParser<ManglingParser<Alloc>,
+ Alloc>::AbstractManglingParser;
+};
+
+} // namespace itanium_demangle
+} // namespace llvm
+
+#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/contrib/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/contrib/llvm/include/llvm/Demangle/MicrosoftDemangle.h
new file mode 100644
index 000000000000..97b918fc9459
--- /dev/null
+++ b/contrib/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -0,0 +1,276 @@
+//===------------------------- MicrosoftDemangle.h --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
+#define LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
+
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/MicrosoftDemangleNodes.h"
+#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/Utility.h"
+
+#include <utility>
+
+namespace llvm {
+namespace ms_demangle {
+// This memory allocator is extremely fast, but it doesn't call dtors
+// for allocated objects. That means you can't use STL containers
+// (such as std::vector) with this allocator. But it pays off --
+// the demangler is 3x faster with this allocator compared to one with
+// STL containers.
+constexpr size_t AllocUnit = 4096;
+
+class ArenaAllocator {
+ struct AllocatorNode {
+ uint8_t *Buf = nullptr;
+ size_t Used = 0;
+ size_t Capacity = 0;
+ AllocatorNode *Next = nullptr;
+ };
+
+ void addNode(size_t Capacity) {
+ AllocatorNode *NewHead = new AllocatorNode;
+ NewHead->Buf = new uint8_t[Capacity];
+ NewHead->Next = Head;
+ NewHead->Capacity = Capacity;
+ Head = NewHead;
+ NewHead->Used = 0;
+ }
+
+public:
+ ArenaAllocator() { addNode(AllocUnit); }
+
+ ~ArenaAllocator() {
+ while (Head) {
+ assert(Head->Buf);
+ delete[] Head->Buf;
+ AllocatorNode *Next = Head->Next;
+ delete Head;
+ Head = Next;
+ }
+ }
+
+ char *allocUnalignedBuffer(size_t Length) {
+ uint8_t *Buf = Head->Buf + Head->Used;
+
+ Head->Used += Length;
+ if (Head->Used > Head->Capacity) {
+ // It's possible we need a buffer which is larger than our default unit
+ // size, so we need to be careful to add a node with capacity that is at
+ // least as large as what we need.
+ addNode(std::max(AllocUnit, Length));
+ Head->Used = Length;
+ Buf = Head->Buf;
+ }
+
+ return reinterpret_cast<char *>(Buf);
+ }
+
+ template <typename T, typename... Args> T *allocArray(size_t Count) {
+
+ size_t Size = Count * sizeof(T);
+ assert(Head && Head->Buf);
+
+ size_t P = (size_t)Head->Buf + Head->Used;
+ uintptr_t AlignedP =
+ (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
+ uint8_t *PP = (uint8_t *)AlignedP;
+ size_t Adjustment = AlignedP - P;
+
+ Head->Used += Size + Adjustment;
+ if (Head->Used < Head->Capacity)
+ return new (PP) T[Count]();
+
+ addNode(AllocUnit);
+ Head->Used = Size;
+ return new (Head->Buf) T[Count]();
+ }
+
+ template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
+
+ size_t Size = sizeof(T);
+ assert(Head && Head->Buf);
+
+ size_t P = (size_t)Head->Buf + Head->Used;
+ uintptr_t AlignedP =
+ (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
+ uint8_t *PP = (uint8_t *)AlignedP;
+ size_t Adjustment = AlignedP - P;
+
+ Head->Used += Size + Adjustment;
+ if (Head->Used < Head->Capacity)
+ return new (PP) T(std::forward<Args>(ConstructorArgs)...);
+
+ addNode(AllocUnit);
+ Head->Used = Size;
+ return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
+ }
+
+private:
+ AllocatorNode *Head = nullptr;
+};
+
+struct BackrefContext {
+ static constexpr size_t Max = 10;
+
+ TypeNode *FunctionParams[Max];
+ size_t FunctionParamCount = 0;
+
+ // The first 10 BackReferences in a mangled name can be back-referenced by
+ // special name @[0-9]. This is a storage for the first 10 BackReferences.
+ NamedIdentifierNode *Names[Max];
+ size_t NamesCount = 0;
+};
+
+enum class QualifierMangleMode { Drop, Mangle, Result };
+
+enum NameBackrefBehavior : uint8_t {
+ NBB_None = 0, // don't save any names as backrefs.
+ NBB_Template = 1 << 0, // save template instanations.
+ NBB_Simple = 1 << 1, // save simple names.
+};
+
+enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder };
+
+// Demangler class takes the main role in demangling symbols.
+// It has a set of functions to parse mangled symbols into Type instances.
+// It also has a set of functions to convert Type instances to strings.
+class Demangler {
+public:
+ Demangler() = default;
+ virtual ~Demangler() = default;
+
+ // You are supposed to call parse() first and then check if error is true. If
+ // it is false, call output() to write the formatted name to the given stream.
+ SymbolNode *parse(StringView &MangledName);
+
+ TagTypeNode *parseTagUniqueName(StringView &MangledName);
+
+ // True if an error occurred.
+ bool Error = false;
+
+ void dumpBackReferences();
+
+private:
+ SymbolNode *demangleEncodedSymbol(StringView &MangledName,
+ QualifiedNameNode *QN);
+
+ VariableSymbolNode *demangleVariableEncoding(StringView &MangledName,
+ StorageClass SC);
+ FunctionSymbolNode *demangleFunctionEncoding(StringView &MangledName);
+
+ Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
+
+ // Parser functions. This is a recursive-descent parser.
+ TypeNode *demangleType(StringView &MangledName, QualifierMangleMode QMM);
+ PrimitiveTypeNode *demanglePrimitiveType(StringView &MangledName);
+ CustomTypeNode *demangleCustomType(StringView &MangledName);
+ TagTypeNode *demangleClassType(StringView &MangledName);
+ PointerTypeNode *demanglePointerType(StringView &MangledName);
+ PointerTypeNode *demangleMemberPointerType(StringView &MangledName);
+ FunctionSignatureNode *demangleFunctionType(StringView &MangledName,
+ bool HasThisQuals);
+
+ ArrayTypeNode *demangleArrayType(StringView &MangledName);
+
+ NodeArrayNode *demangleTemplateParameterList(StringView &MangledName);
+ NodeArrayNode *demangleFunctionParameterList(StringView &MangledName);
+
+ std::pair<uint64_t, bool> demangleNumber(StringView &MangledName);
+ uint64_t demangleUnsigned(StringView &MangledName);
+ int64_t demangleSigned(StringView &MangledName);
+
+ void memorizeString(StringView s);
+ void memorizeIdentifier(IdentifierNode *Identifier);
+
+ /// Allocate a copy of \p Borrowed into memory that we own.
+ StringView copyString(StringView Borrowed);
+
+ QualifiedNameNode *demangleFullyQualifiedTypeName(StringView &MangledName);
+ QualifiedNameNode *demangleFullyQualifiedSymbolName(StringView &MangledName);
+
+ IdentifierNode *demangleUnqualifiedTypeName(StringView &MangledName,
+ bool Memorize);
+ IdentifierNode *demangleUnqualifiedSymbolName(StringView &MangledName,
+ NameBackrefBehavior NBB);
+
+ QualifiedNameNode *demangleNameScopeChain(StringView &MangledName,
+ IdentifierNode *UnqualifiedName);
+ IdentifierNode *demangleNameScopePiece(StringView &MangledName);
+
+ NamedIdentifierNode *demangleBackRefName(StringView &MangledName);
+ IdentifierNode *demangleTemplateInstantiationName(StringView &MangledName,
+ NameBackrefBehavior NBB);
+ IdentifierNode *demangleFunctionIdentifierCode(StringView &MangledName);
+ IdentifierNode *
+ demangleFunctionIdentifierCode(StringView &MangledName,
+ FunctionIdentifierCodeGroup Group);
+ StructorIdentifierNode *demangleStructorIdentifier(StringView &MangledName,
+ bool IsDestructor);
+ ConversionOperatorIdentifierNode *
+ demangleConversionOperatorIdentifier(StringView &MangledName);
+ LiteralOperatorIdentifierNode *
+ demangleLiteralOperatorIdentifier(StringView &MangledName);
+
+ SymbolNode *demangleSpecialIntrinsic(StringView &MangledName);
+ SpecialTableSymbolNode *
+ demangleSpecialTableSymbolNode(StringView &MangledName,
+ SpecialIntrinsicKind SIK);
+ LocalStaticGuardVariableNode *
+ demangleLocalStaticGuard(StringView &MangledName);
+ VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena,
+ StringView &MangledName,
+ StringView VariableName);
+ VariableSymbolNode *
+ demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
+ StringView &MangledName);
+ FunctionSymbolNode *demangleInitFiniStub(StringView &MangledName,
+ bool IsDestructor);
+
+ NamedIdentifierNode *demangleSimpleName(StringView &MangledName,
+ bool Memorize);
+ NamedIdentifierNode *demangleAnonymousNamespaceName(StringView &MangledName);
+ NamedIdentifierNode *demangleLocallyScopedNamePiece(StringView &MangledName);
+ EncodedStringLiteralNode *demangleStringLiteral(StringView &MangledName);
+ FunctionSymbolNode *demangleVcallThunkNode(StringView &MangledName);
+
+ StringView demangleSimpleString(StringView &MangledName, bool Memorize);
+
+ FuncClass demangleFunctionClass(StringView &MangledName);
+ CallingConv demangleCallingConvention(StringView &MangledName);
+ StorageClass demangleVariableStorageClass(StringView &MangledName);
+ bool demangleThrowSpecification(StringView &MangledName);
+ wchar_t demangleWcharLiteral(StringView &MangledName);
+ uint8_t demangleCharLiteral(StringView &MangledName);
+
+ std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
+
+ // Memory allocator.
+ ArenaAllocator Arena;
+
+ // A single type uses one global back-ref table for all function params.
+ // This means back-refs can even go "into" other types. Examples:
+ //
+ // // Second int* is a back-ref to first.
+ // void foo(int *, int*);
+ //
+ // // Second int* is not a back-ref to first (first is not a function param).
+ // int* foo(int*);
+ //
+ // // Second int* is a back-ref to first (ALL function types share the same
+ // // back-ref map.
+ // using F = void(*)(int*);
+ // F G(int *);
+ BackrefContext Backrefs;
+};
+
+} // namespace ms_demangle
+} // namespace llvm
+
+#endif // LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
diff --git a/contrib/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/contrib/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
new file mode 100644
index 000000000000..9e3478e9fd29
--- /dev/null
+++ b/contrib/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -0,0 +1,605 @@
+#ifndef LLVM_SUPPORT_MICROSOFTDEMANGLENODES_H
+#define LLVM_SUPPORT_MICROSOFTDEMANGLENODES_H
+
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/StringView.h"
+#include <array>
+
+class OutputStream;
+
+namespace llvm {
+namespace ms_demangle {
+
+// Storage classes
+enum Qualifiers : uint8_t {
+ Q_None = 0,
+ Q_Const = 1 << 0,
+ Q_Volatile = 1 << 1,
+ Q_Far = 1 << 2,
+ Q_Huge = 1 << 3,
+ Q_Unaligned = 1 << 4,
+ Q_Restrict = 1 << 5,
+ Q_Pointer64 = 1 << 6
+};
+
+enum class StorageClass : uint8_t {
+ None,
+ PrivateStatic,
+ ProtectedStatic,
+ PublicStatic,
+ Global,
+ FunctionLocalStatic,
+};
+
+enum class PointerAffinity { None, Pointer, Reference, RValueReference };
+enum class FunctionRefQualifier { None, Reference, RValueReference };
+
+// Calling conventions
+enum class CallingConv : uint8_t {
+ None,
+ Cdecl,
+ Pascal,
+ Thiscall,
+ Stdcall,
+ Fastcall,
+ Clrcall,
+ Eabi,
+ Vectorcall,
+ Regcall,
+};
+
+enum class ReferenceKind : uint8_t { None, LValueRef, RValueRef };
+
+enum OutputFlags {
+ OF_Default = 0,
+ OF_NoCallingConvention = 1,
+ OF_NoTagSpecifier = 2,
+};
+
+// Types
+enum class PrimitiveKind {
+ Void,
+ Bool,
+ Char,
+ Schar,
+ Uchar,
+ Char16,
+ Char32,
+ Short,
+ Ushort,
+ Int,
+ Uint,
+ Long,
+ Ulong,
+ Int64,
+ Uint64,
+ Wchar,
+ Float,
+ Double,
+ Ldouble,
+ Nullptr,
+};
+
+enum class CharKind {
+ Char,
+ Char16,
+ Char32,
+ Wchar,
+};
+
+enum class IntrinsicFunctionKind : uint8_t {
+ None,
+ New, // ?2 # operator new
+ Delete, // ?3 # operator delete
+ Assign, // ?4 # operator=
+ RightShift, // ?5 # operator>>
+ LeftShift, // ?6 # operator<<
+ LogicalNot, // ?7 # operator!
+ Equals, // ?8 # operator==
+ NotEquals, // ?9 # operator!=
+ ArraySubscript, // ?A # operator[]
+ Pointer, // ?C # operator->
+ Dereference, // ?D # operator*
+ Increment, // ?E # operator++
+ Decrement, // ?F # operator--
+ Minus, // ?G # operator-
+ Plus, // ?H # operator+
+ BitwiseAnd, // ?I # operator&
+ MemberPointer, // ?J # operator->*
+ Divide, // ?K # operator/
+ Modulus, // ?L # operator%
+ LessThan, // ?M operator<
+ LessThanEqual, // ?N operator<=
+ GreaterThan, // ?O operator>
+ GreaterThanEqual, // ?P operator>=
+ Comma, // ?Q operator,
+ Parens, // ?R operator()
+ BitwiseNot, // ?S operator~
+ BitwiseXor, // ?T operator^
+ BitwiseOr, // ?U operator|
+ LogicalAnd, // ?V operator&&
+ LogicalOr, // ?W operator||
+ TimesEqual, // ?X operator*=
+ PlusEqual, // ?Y operator+=
+ MinusEqual, // ?Z operator-=
+ DivEqual, // ?_0 operator/=
+ ModEqual, // ?_1 operator%=
+ RshEqual, // ?_2 operator>>=
+ LshEqual, // ?_3 operator<<=
+ BitwiseAndEqual, // ?_4 operator&=
+ BitwiseOrEqual, // ?_5 operator|=
+ BitwiseXorEqual, // ?_6 operator^=
+ VbaseDtor, // ?_D # vbase destructor
+ VecDelDtor, // ?_E # vector deleting destructor
+ DefaultCtorClosure, // ?_F # default constructor closure
+ ScalarDelDtor, // ?_G # scalar deleting destructor
+ VecCtorIter, // ?_H # vector constructor iterator
+ VecDtorIter, // ?_I # vector destructor iterator
+ VecVbaseCtorIter, // ?_J # vector vbase constructor iterator
+ VdispMap, // ?_K # virtual displacement map
+ EHVecCtorIter, // ?_L # eh vector constructor iterator
+ EHVecDtorIter, // ?_M # eh vector destructor iterator
+ EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
+ CopyCtorClosure, // ?_O # copy constructor closure
+ LocalVftableCtorClosure, // ?_T # local vftable constructor closure
+ ArrayNew, // ?_U operator new[]
+ ArrayDelete, // ?_V operator delete[]
+ ManVectorCtorIter, // ?__A managed vector ctor iterator
+ ManVectorDtorIter, // ?__B managed vector dtor iterator
+ EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator
+ EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iterator
+ VectorCopyCtorIter, // ?__G vector copy constructor iterator
+ VectorVbaseCopyCtorIter, // ?__H vector vbase copy constructor iterator
+ ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy constructor
+ CoAwait, // ?__L co_await
+ Spaceship, // operator<=>
+ MaxIntrinsic
+};
+
+enum class SpecialIntrinsicKind {
+ None,
+ Vftable,
+ Vbtable,
+ Typeof,
+ VcallThunk,
+ LocalStaticGuard,
+ StringLiteralSymbol,
+ UdtReturning,
+ Unknown,
+ DynamicInitializer,
+ DynamicAtexitDestructor,
+ RttiTypeDescriptor,
+ RttiBaseClassDescriptor,
+ RttiBaseClassArray,
+ RttiClassHierarchyDescriptor,
+ RttiCompleteObjLocator,
+ LocalVftable,
+ LocalStaticThreadGuard,
+};
+
+// Function classes
+enum FuncClass : uint16_t {
+ FC_None = 0,
+ FC_Public = 1 << 0,
+ FC_Protected = 1 << 1,
+ FC_Private = 1 << 2,
+ FC_Global = 1 << 3,
+ FC_Static = 1 << 4,
+ FC_Virtual = 1 << 5,
+ FC_Far = 1 << 6,
+ FC_ExternC = 1 << 7,
+ FC_NoParameterList = 1 << 8,
+ FC_VirtualThisAdjust = 1 << 9,
+ FC_VirtualThisAdjustEx = 1 << 10,
+ FC_StaticThisAdjust = 1 << 11,
+};
+
+enum class TagKind { Class, Struct, Union, Enum };
+
+enum class NodeKind {
+ Unknown,
+ Md5Symbol,
+ PrimitiveType,
+ FunctionSignature,
+ Identifier,
+ NamedIdentifier,
+ VcallThunkIdentifier,
+ LocalStaticGuardIdentifier,
+ IntrinsicFunctionIdentifier,
+ ConversionOperatorIdentifier,
+ DynamicStructorIdentifier,
+ StructorIdentifier,
+ LiteralOperatorIdentifier,
+ ThunkSignature,
+ PointerType,
+ TagType,
+ ArrayType,
+ Custom,
+ IntrinsicType,
+ NodeArray,
+ QualifiedName,
+ TemplateParameterReference,
+ EncodedStringLiteral,
+ IntegerLiteral,
+ RttiBaseClassDescriptor,
+ LocalStaticGuardVariable,
+ FunctionSymbol,
+ VariableSymbol,
+ SpecialTableSymbol
+};
+
+struct Node {
+ explicit Node(NodeKind K) : Kind(K) {}
+ virtual ~Node() = default;
+
+ NodeKind kind() const { return Kind; }
+
+ virtual void output(OutputStream &OS, OutputFlags Flags) const = 0;
+
+ std::string toString(OutputFlags Flags = OF_Default) const;
+
+private:
+ NodeKind Kind;
+};
+
+struct TypeNode;
+struct PrimitiveTypeNode;
+struct FunctionSignatureNode;
+struct IdentifierNode;
+struct NamedIdentifierNode;
+struct VcallThunkIdentifierNode;
+struct IntrinsicFunctionIdentifierNode;
+struct LiteralOperatorIdentifierNode;
+struct ConversionOperatorIdentifierNode;
+struct StructorIdentifierNode;
+struct ThunkSignatureNode;
+struct PointerTypeNode;
+struct ArrayTypeNode;
+struct CustomNode;
+struct TagTypeNode;
+struct IntrinsicTypeNode;
+struct NodeArrayNode;
+struct QualifiedNameNode;
+struct TemplateParameterReferenceNode;
+struct EncodedStringLiteralNode;
+struct IntegerLiteralNode;
+struct RttiBaseClassDescriptorNode;
+struct LocalStaticGuardVariableNode;
+struct SymbolNode;
+struct FunctionSymbolNode;
+struct VariableSymbolNode;
+struct SpecialTableSymbolNode;
+
+struct TypeNode : public Node {
+ explicit TypeNode(NodeKind K) : Node(K) {}
+
+ virtual void outputPre(OutputStream &OS, OutputFlags Flags) const = 0;
+ virtual void outputPost(OutputStream &OS, OutputFlags Flags) const = 0;
+
+ void output(OutputStream &OS, OutputFlags Flags) const override {
+ outputPre(OS, Flags);
+ outputPost(OS, Flags);
+ }
+
+ void outputQuals(bool SpaceBefore, bool SpaceAfter) const;
+
+ Qualifiers Quals = Q_None;
+};
+
+struct PrimitiveTypeNode : public TypeNode {
+ explicit PrimitiveTypeNode(PrimitiveKind K)
+ : TypeNode(NodeKind::PrimitiveType), PrimKind(K) {}
+
+ void outputPre(OutputStream &OS, OutputFlags Flags) const;
+ void outputPost(OutputStream &OS, OutputFlags Flags) const {}
+
+ PrimitiveKind PrimKind;
+};
+
+struct FunctionSignatureNode : public TypeNode {
+ explicit FunctionSignatureNode(NodeKind K) : TypeNode(K) {}
+ FunctionSignatureNode() : TypeNode(NodeKind::FunctionSignature) {}
+
+ void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+
+ // Valid if this FunctionTypeNode is the Pointee of a PointerType or
+ // MemberPointerType.
+ PointerAffinity Affinity = PointerAffinity::None;
+
+ // The function's calling convention.
+ CallingConv CallConvention = CallingConv::None;
+
+ // Function flags (gloabl, public, etc)
+ FuncClass FunctionClass = FC_Global;
+
+ FunctionRefQualifier RefQualifier = FunctionRefQualifier::None;
+
+ // The return type of the function.
+ TypeNode *ReturnType = nullptr;
+
+ // True if this is a C-style ... varargs function.
+ bool IsVariadic = false;
+
+ // Function parameters
+ NodeArrayNode *Params = nullptr;
+
+ // True if the function type is noexcept
+ bool IsNoexcept = false;
+};
+
+struct IdentifierNode : public Node {
+ explicit IdentifierNode(NodeKind K) : Node(K) {}
+
+ NodeArrayNode *TemplateParams = nullptr;
+
+protected:
+ void outputTemplateParameters(OutputStream &OS, OutputFlags Flags) const;
+};
+
+struct VcallThunkIdentifierNode : public IdentifierNode {
+ VcallThunkIdentifierNode() : IdentifierNode(NodeKind::VcallThunkIdentifier) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ uint64_t OffsetInVTable = 0;
+};
+
+struct DynamicStructorIdentifierNode : public IdentifierNode {
+ DynamicStructorIdentifierNode()
+ : IdentifierNode(NodeKind::DynamicStructorIdentifier) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ VariableSymbolNode *Variable = nullptr;
+ QualifiedNameNode *Name = nullptr;
+ bool IsDestructor = false;
+};
+
+struct NamedIdentifierNode : public IdentifierNode {
+ NamedIdentifierNode() : IdentifierNode(NodeKind::NamedIdentifier) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ StringView Name;
+};
+
+struct IntrinsicFunctionIdentifierNode : public IdentifierNode {
+ explicit IntrinsicFunctionIdentifierNode(IntrinsicFunctionKind Operator)
+ : IdentifierNode(NodeKind::IntrinsicFunctionIdentifier),
+ Operator(Operator) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ IntrinsicFunctionKind Operator;
+};
+
+struct LiteralOperatorIdentifierNode : public IdentifierNode {
+ LiteralOperatorIdentifierNode()
+ : IdentifierNode(NodeKind::LiteralOperatorIdentifier) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ StringView Name;
+};
+
+struct LocalStaticGuardIdentifierNode : public IdentifierNode {
+ LocalStaticGuardIdentifierNode()
+ : IdentifierNode(NodeKind::LocalStaticGuardIdentifier) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ uint32_t ScopeIndex = 0;
+};
+
+struct ConversionOperatorIdentifierNode : public IdentifierNode {
+ ConversionOperatorIdentifierNode()
+ : IdentifierNode(NodeKind::ConversionOperatorIdentifier) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ // The type that this operator converts too.
+ TypeNode *TargetType = nullptr;
+};
+
+struct StructorIdentifierNode : public IdentifierNode {
+ StructorIdentifierNode() : IdentifierNode(NodeKind::StructorIdentifier) {}
+ explicit StructorIdentifierNode(bool IsDestructor)
+ : IdentifierNode(NodeKind::StructorIdentifier),
+ IsDestructor(IsDestructor) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ // The name of the class that this is a structor of.
+ IdentifierNode *Class = nullptr;
+ bool IsDestructor = false;
+};
+
+struct ThunkSignatureNode : public FunctionSignatureNode {
+ ThunkSignatureNode() : FunctionSignatureNode(NodeKind::ThunkSignature) {}
+
+ void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+
+ struct ThisAdjustor {
+ uint32_t StaticOffset = 0;
+ int32_t VBPtrOffset = 0;
+ int32_t VBOffsetOffset = 0;
+ int32_t VtordispOffset = 0;
+ };
+
+ ThisAdjustor ThisAdjust;
+};
+
+struct PointerTypeNode : public TypeNode {
+ PointerTypeNode() : TypeNode(NodeKind::PointerType) {}
+ void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+
+ // Is this a pointer, reference, or rvalue-reference?
+ PointerAffinity Affinity = PointerAffinity::None;
+
+ // If this is a member pointer, this is the class that the member is in.
+ QualifiedNameNode *ClassParent = nullptr;
+
+ // Represents a type X in "a pointer to X", "a reference to X", or
+ // "rvalue-reference to X"
+ TypeNode *Pointee = nullptr;
+};
+
+struct TagTypeNode : public TypeNode {
+ explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {}
+
+ void outputPre(OutputStream &OS, OutputFlags Flags) const;
+ void outputPost(OutputStream &OS, OutputFlags Flags) const;
+
+ QualifiedNameNode *QualifiedName = nullptr;
+ TagKind Tag;
+};
+
+struct ArrayTypeNode : public TypeNode {
+ ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {}
+
+ void outputPre(OutputStream &OS, OutputFlags Flags) const;
+ void outputPost(OutputStream &OS, OutputFlags Flags) const;
+
+ void outputDimensionsImpl(OutputStream &OS, OutputFlags Flags) const;
+ void outputOneDimension(OutputStream &OS, OutputFlags Flags, Node *N) const;
+
+ // A list of array dimensions. e.g. [3,4,5] in `int Foo[3][4][5]`
+ NodeArrayNode *Dimensions = nullptr;
+
+ // The type of array element.
+ TypeNode *ElementType = nullptr;
+};
+
+struct IntrinsicNode : public TypeNode {
+ IntrinsicNode() : TypeNode(NodeKind::IntrinsicType) {}
+ void output(OutputStream &OS, OutputFlags Flags) const override {}
+};
+
+struct CustomTypeNode : public TypeNode {
+ CustomTypeNode() : TypeNode(NodeKind::Custom) {}
+
+ void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+
+ IdentifierNode *Identifier;
+};
+
+struct NodeArrayNode : public Node {
+ NodeArrayNode() : Node(NodeKind::NodeArray) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const;
+
+ Node **Nodes = 0;
+ size_t Count = 0;
+};
+
+struct QualifiedNameNode : public Node {
+ QualifiedNameNode() : Node(NodeKind::QualifiedName) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ NodeArrayNode *Components = nullptr;
+
+ IdentifierNode *getUnqualifiedIdentifier() {
+ Node *LastComponent = Components->Nodes[Components->Count - 1];
+ return static_cast<IdentifierNode *>(LastComponent);
+ }
+};
+
+struct TemplateParameterReferenceNode : public Node {
+ TemplateParameterReferenceNode()
+ : Node(NodeKind::TemplateParameterReference) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ SymbolNode *Symbol = nullptr;
+
+ int ThunkOffsetCount = 0;
+ std::array<int64_t, 3> ThunkOffsets;
+ PointerAffinity Affinity = PointerAffinity::None;
+ bool IsMemberPointer = false;
+};
+
+struct IntegerLiteralNode : public Node {
+ IntegerLiteralNode() : Node(NodeKind::IntegerLiteral) {}
+ IntegerLiteralNode(uint64_t Value, bool IsNegative)
+ : Node(NodeKind::IntegerLiteral), Value(Value), IsNegative(IsNegative) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ uint64_t Value = 0;
+ bool IsNegative = false;
+};
+
+struct RttiBaseClassDescriptorNode : public IdentifierNode {
+ RttiBaseClassDescriptorNode()
+ : IdentifierNode(NodeKind::RttiBaseClassDescriptor) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ uint32_t NVOffset = 0;
+ int32_t VBPtrOffset = 0;
+ uint32_t VBTableOffset = 0;
+ uint32_t Flags = 0;
+};
+
+struct SymbolNode : public Node {
+ explicit SymbolNode(NodeKind K) : Node(K) {}
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+ QualifiedNameNode *Name = nullptr;
+};
+
+struct SpecialTableSymbolNode : public SymbolNode {
+ explicit SpecialTableSymbolNode()
+ : SymbolNode(NodeKind::SpecialTableSymbol) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+ QualifiedNameNode *TargetName = nullptr;
+ Qualifiers Quals;
+};
+
+struct LocalStaticGuardVariableNode : public SymbolNode {
+ LocalStaticGuardVariableNode()
+ : SymbolNode(NodeKind::LocalStaticGuardVariable) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ bool IsVisible = false;
+};
+
+struct EncodedStringLiteralNode : public SymbolNode {
+ EncodedStringLiteralNode() : SymbolNode(NodeKind::EncodedStringLiteral) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ StringView DecodedString;
+ bool IsTruncated = false;
+ CharKind Char = CharKind::Char;
+};
+
+struct VariableSymbolNode : public SymbolNode {
+ VariableSymbolNode() : SymbolNode(NodeKind::VariableSymbol) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ StorageClass SC = StorageClass::None;
+ TypeNode *Type = nullptr;
+};
+
+struct FunctionSymbolNode : public SymbolNode {
+ FunctionSymbolNode() : SymbolNode(NodeKind::FunctionSymbol) {}
+
+ void output(OutputStream &OS, OutputFlags Flags) const override;
+
+ FunctionSignatureNode *Signature = nullptr;
+};
+
+} // namespace ms_demangle
+} // namespace llvm
+
+#endif \ No newline at end of file
diff --git a/contrib/llvm/lib/Demangle/StringView.h b/contrib/llvm/include/llvm/Demangle/StringView.h
index a89deda694c2..a89deda694c2 100644
--- a/contrib/llvm/lib/Demangle/StringView.h
+++ b/contrib/llvm/include/llvm/Demangle/StringView.h
diff --git a/contrib/llvm/lib/Demangle/Utility.h b/contrib/llvm/include/llvm/Demangle/Utility.h
index 54cd99e5026b..1d1601c81635 100644
--- a/contrib/llvm/lib/Demangle/Utility.h
+++ b/contrib/llvm/include/llvm/Demangle/Utility.h
@@ -70,22 +70,6 @@ public:
BufferCapacity = BufferCapacity_;
}
- /// Create an OutputStream from a buffer and a size. If either of these are
- /// null a buffer is allocated.
- static OutputStream create(char *StartBuf, size_t *Size, size_t AllocSize) {
- OutputStream Result;
-
- if (!StartBuf || !Size) {
- StartBuf = static_cast<char *>(std::malloc(AllocSize));
- if (StartBuf == nullptr)
- std::terminate();
- Size = &AllocSize;
- }
-
- Result.reset(StartBuf, *Size);
- return Result;
- }
-
/// If a ParameterPackExpansion (or similar type) is encountered, the offset
/// into the pack that we're currently printing.
unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
@@ -185,4 +169,19 @@ public:
SwapAndRestore &operator=(const SwapAndRestore &) = delete;
};
+inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
+ size_t InitSize) {
+ size_t BufferSize;
+ if (Buf == nullptr) {
+ Buf = static_cast<char *>(std::malloc(InitSize));
+ if (Buf == nullptr)
+ return false;
+ BufferSize = InitSize;
+ } else
+ BufferSize = *N;
+
+ S.reset(Buf, BufferSize);
+ return true;
+}
+
#endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h b/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
index 1ce772ccde95..1b08379b8c3b 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
@@ -35,25 +35,6 @@ class ObjectFile;
} // end namespace object
-/// JITEvent_EmittedFunctionDetails - Helper struct for containing information
-/// about a generated machine code function.
-struct JITEvent_EmittedFunctionDetails {
- struct LineStart {
- /// The address at which the current line changes.
- uintptr_t Address;
-
- /// The new location information. These can be translated to DebugLocTuples
- /// using MF->getDebugLocTuple().
- DebugLoc Loc;
- };
-
- /// The machine function the struct contains information for.
- const MachineFunction *MF;
-
- /// The list of line boundary information, sorted by address.
- std::vector<LineStart> LineStarts;
-};
-
/// JITEventListener - Abstract interface for use by the JIT to notify clients
/// about significant events during compilation. For example, to notify
/// profilers and debuggers that need to know where functions have been emitted.
@@ -61,26 +42,26 @@ struct JITEvent_EmittedFunctionDetails {
/// The default implementation of each method does nothing.
class JITEventListener {
public:
- using EmittedFunctionDetails = JITEvent_EmittedFunctionDetails;
+ using ObjectKey = uint64_t;
-public:
JITEventListener() = default;
virtual ~JITEventListener() = default;
- /// NotifyObjectEmitted - Called after an object has been successfully
- /// emitted to memory. NotifyFunctionEmitted will not be called for
+ /// notifyObjectLoaded - Called after an object has had its sections allocated
+ /// and addresses assigned to all symbols. Note: Section memory will not have
+ /// been relocated yet. notifyFunctionLoaded will not be called for
/// individual functions in the object.
///
/// ELF-specific information
/// The ObjectImage contains the generated object image
/// with section headers updated to reflect the address at which sections
/// were loaded and with relocations performed in-place on debug sections.
- virtual void NotifyObjectEmitted(const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &L) {}
+ virtual void notifyObjectLoaded(ObjectKey K, const object::ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) {}
- /// NotifyFreeingObject - Called just before the memory associated with
+ /// notifyFreeingObject - Called just before the memory associated with
/// a previously emitted object is released.
- virtual void NotifyFreeingObject(const object::ObjectFile &Obj) {}
+ virtual void notifyFreeingObject(ObjectKey K) {}
// Get a pointe to the GDB debugger registration listener.
static JITEventListener *createGDBRegistrationListener();
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h
index 53037c3dbc72..05c9590726df 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h
@@ -23,6 +23,7 @@
#include <set>
#include <string>
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
@@ -32,13 +33,25 @@ class GlobalValue;
namespace object {
-class BasicSymbolRef;
+class SymbolRef;
} // end namespace object
/// Represents an address in the target process's address space.
using JITTargetAddress = uint64_t;
+/// Convert a JITTargetAddress to a pointer.
+template <typename T> T jitTargetAddressToPointer(JITTargetAddress Addr) {
+ static_assert(std::is_pointer<T>::value, "T must be a pointer type");
+ uintptr_t IntPtr = static_cast<uintptr_t>(Addr);
+ assert(IntPtr == Addr && "JITTargetAddress value out of range for uintptr_t");
+ return reinterpret_cast<T>(IntPtr);
+}
+
+template <typename T> JITTargetAddress pointerToJITTargetAddress(T *Ptr) {
+ return static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(Ptr));
+}
+
/// Flags for symbols in the JIT.
class JITSymbolFlags {
public:
@@ -52,8 +65,10 @@ public:
Common = 1U << 2,
Absolute = 1U << 3,
Exported = 1U << 4,
- Lazy = 1U << 5,
- Materializing = 1U << 6
+ Callable = 1U << 5,
+ Lazy = 1U << 6,
+ Materializing = 1U << 7,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Materializing)
};
static JITSymbolFlags stripTransientFlags(JITSymbolFlags Orig) {
@@ -71,6 +86,26 @@ public:
JITSymbolFlags(FlagNames Flags, TargetFlagsType TargetFlags)
: Flags(Flags), TargetFlags(TargetFlags) {}
+ /// Implicitly convert to bool. Returs true if any flag is set.
+ explicit operator bool() const { return Flags != None || TargetFlags != 0; }
+
+ /// Compare for equality.
+ bool operator==(const JITSymbolFlags &RHS) const {
+ return Flags == RHS.Flags && TargetFlags == RHS.TargetFlags;
+ }
+
+ /// Bitwise AND-assignment for FlagNames.
+ JITSymbolFlags &operator&=(const FlagNames &RHS) {
+ Flags &= RHS;
+ return *this;
+ }
+
+ /// Bitwise OR-assignment for FlagNames.
+ JITSymbolFlags &operator|=(const FlagNames &RHS) {
+ Flags |= RHS;
+ return *this;
+ }
+
/// Return true if there was an error retrieving this symbol.
bool hasError() const {
return (Flags & HasError) == HasError;
@@ -109,11 +144,13 @@ public:
return (Flags & Exported) == Exported;
}
- /// Implicitly convert to the underlying flags type.
- operator UnderlyingType&() { return Flags; }
+ /// Returns true if the given symbol is known to be callable.
+ bool isCallable() const { return (Flags & Callable) == Callable; }
- /// Implicitly convert to the underlying flags type.
- operator const UnderlyingType&() const { return Flags; }
+ /// Get the underlying flags value as an integer.
+ UnderlyingType getRawFlagsValue() const {
+ return static_cast<UnderlyingType>(Flags);
+ }
/// Return a reference to the target-specific flags.
TargetFlagsType& getTargetFlags() { return TargetFlags; }
@@ -127,13 +164,28 @@ public:
/// Construct a JITSymbolFlags value based on the flags of the given libobject
/// symbol.
- static JITSymbolFlags fromObjectSymbol(const object::BasicSymbolRef &Symbol);
+ static Expected<JITSymbolFlags>
+ fromObjectSymbol(const object::SymbolRef &Symbol);
private:
- UnderlyingType Flags = None;
+ FlagNames Flags = None;
TargetFlagsType TargetFlags = 0;
};
+inline JITSymbolFlags operator&(const JITSymbolFlags &LHS,
+ const JITSymbolFlags::FlagNames &RHS) {
+ JITSymbolFlags Tmp = LHS;
+ Tmp &= RHS;
+ return Tmp;
+}
+
+inline JITSymbolFlags operator|(const JITSymbolFlags &LHS,
+ const JITSymbolFlags::FlagNames &RHS) {
+ JITSymbolFlags Tmp = LHS;
+ Tmp |= RHS;
+ return Tmp;
+}
+
/// ARM-specific JIT symbol flags.
/// FIXME: This should be moved into a target-specific header.
class ARMJITSymbolFlags {
@@ -147,8 +199,8 @@ public:
operator JITSymbolFlags::TargetFlagsType&() { return Flags; }
- static ARMJITSymbolFlags fromObjectSymbol(
- const object::BasicSymbolRef &Symbol);
+ static ARMJITSymbolFlags fromObjectSymbol(const object::SymbolRef &Symbol);
+
private:
JITSymbolFlags::TargetFlagsType Flags = 0;
};
@@ -293,7 +345,7 @@ class JITSymbolResolver {
public:
using LookupSet = std::set<StringRef>;
using LookupResult = std::map<StringRef, JITEvaluatedSymbol>;
- using LookupFlagsResult = std::map<StringRef, JITSymbolFlags>;
+ using OnResolvedFunction = std::function<void(Expected<LookupResult>)>;
virtual ~JITSymbolResolver() = default;
@@ -302,13 +354,14 @@ public:
///
/// This method will return an error if any of the given symbols can not be
/// resolved, or if the resolution process itself triggers an error.
- virtual Expected<LookupResult> lookup(const LookupSet &Symbols) = 0;
+ virtual void lookup(const LookupSet &Symbols,
+ OnResolvedFunction OnResolved) = 0;
- /// Returns the symbol flags for each of the given symbols.
- ///
- /// This method does NOT return an error if any of the given symbols is
- /// missing. Instead, that symbol will be left out of the result map.
- virtual Expected<LookupFlagsResult> lookupFlags(const LookupSet &Symbols) = 0;
+ /// Returns the subset of the given symbols that should be materialized by
+ /// the caller. Only weak/common symbols should be looked up, as strong
+ /// definitions are implicitly always part of the caller's responsibility.
+ virtual Expected<LookupSet>
+ getResponsibilitySet(const LookupSet &Symbols) = 0;
private:
virtual void anchor();
@@ -320,11 +373,11 @@ public:
/// Performs lookup by, for each symbol, first calling
/// findSymbolInLogicalDylib and if that fails calling
/// findSymbol.
- Expected<LookupResult> lookup(const LookupSet &Symbols) final;
+ void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) final;
/// Performs flags lookup by calling findSymbolInLogicalDylib and
/// returning the flags value for that symbol.
- Expected<LookupFlagsResult> lookupFlags(const LookupSet &Symbols) final;
+ Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) final;
/// This method returns the address of the specified symbol if it exists
/// within the logical dynamic library represented by this JITSymbolResolver.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 8bd21a0e3dd6..884878925cde 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -16,6 +16,7 @@
#define LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -23,6 +24,7 @@
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/Layer.h"
+#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
#include "llvm/ExecutionEngine/Orc/Legacy.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
@@ -60,42 +62,73 @@ namespace orc {
class ExtractingIRMaterializationUnit;
-class CompileOnDemandLayer2 : public IRLayer {
- friend class ExtractingIRMaterializationUnit;
+class CompileOnDemandLayer : public IRLayer {
+ friend class PartitioningIRMaterializationUnit;
public:
/// Builder for IndirectStubsManagers.
using IndirectStubsManagerBuilder =
std::function<std::unique_ptr<IndirectStubsManager>()>;
- using GetAvailableContextFunction = std::function<LLVMContext &()>;
+ using GlobalValueSet = std::set<const GlobalValue *>;
- CompileOnDemandLayer2(ExecutionSession &ES, IRLayer &BaseLayer,
- JITCompileCallbackManager &CCMgr,
- IndirectStubsManagerBuilder BuildIndirectStubsManager,
- GetAvailableContextFunction GetAvailableContext);
+ /// Partitioning function.
+ using PartitionFunction =
+ std::function<Optional<GlobalValueSet>(GlobalValueSet Requested)>;
- Error add(VSO &V, VModuleKey K, std::unique_ptr<Module> M) override;
+ /// Off-the-shelf partitioning which compiles all requested symbols (usually
+ /// a single function at a time).
+ static Optional<GlobalValueSet> compileRequested(GlobalValueSet Requested);
- void emit(MaterializationResponsibility R, VModuleKey K,
- std::unique_ptr<Module> M) override;
+ /// Off-the-shelf partitioning which compiles whole modules whenever any
+ /// symbol in them is requested.
+ static Optional<GlobalValueSet> compileWholeModule(GlobalValueSet Requested);
+
+ /// Construct a CompileOnDemandLayer.
+ CompileOnDemandLayer(ExecutionSession &ES, IRLayer &BaseLayer,
+ LazyCallThroughManager &LCTMgr,
+ IndirectStubsManagerBuilder BuildIndirectStubsManager);
+
+ /// Sets the partition function.
+ void setPartitionFunction(PartitionFunction Partition);
+
+ /// Emits the given module. This should not be called by clients: it will be
+ /// called by the JIT when a definition added via the add method is requested.
+ void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
private:
- using StubManagersMap =
- std::map<const VSO *, std::unique_ptr<IndirectStubsManager>>;
+ struct PerDylibResources {
+ public:
+ PerDylibResources(JITDylib &ImplD,
+ std::unique_ptr<IndirectStubsManager> ISMgr)
+ : ImplD(ImplD), ISMgr(std::move(ISMgr)) {}
+ JITDylib &getImplDylib() { return ImplD; }
+ IndirectStubsManager &getISManager() { return *ISMgr; }
+
+ private:
+ JITDylib &ImplD;
+ std::unique_ptr<IndirectStubsManager> ISMgr;
+ };
+
+ using PerDylibResourcesMap = std::map<const JITDylib *, PerDylibResources>;
+
+ PerDylibResources &getPerDylibResources(JITDylib &TargetD);
- IndirectStubsManager &getStubsManager(const VSO &V);
+ void cleanUpModule(Module &M);
- void emitExtractedFunctionsModule(MaterializationResponsibility R,
- std::unique_ptr<Module> M);
+ void expandPartition(GlobalValueSet &Partition);
+
+ void emitPartition(MaterializationResponsibility R, ThreadSafeModule TSM,
+ IRMaterializationUnit::SymbolNameToDefinitionMap Defs);
mutable std::mutex CODLayerMutex;
IRLayer &BaseLayer;
- JITCompileCallbackManager &CCMgr;
+ LazyCallThroughManager &LCTMgr;
IndirectStubsManagerBuilder BuildIndirectStubsManager;
- StubManagersMap StubsMgrs;
- GetAvailableContextFunction GetAvailableContext;
+ PerDylibResourcesMap DylibResources;
+ PartitionFunction Partition = compileRequested;
+ SymbolLinkagePromoter PromoteSymbols;
};
/// Compile-on-demand layer.
@@ -108,7 +141,7 @@ private:
template <typename BaseLayerT,
typename CompileCallbackMgrT = JITCompileCallbackManager,
typename IndirectStubsMgrT = IndirectStubsManager>
-class CompileOnDemandLayer {
+class LegacyCompileOnDemandLayer {
private:
template <typename MaterializerFtor>
class LambdaMaterializer final : public ValueMaterializer {
@@ -158,25 +191,6 @@ private:
return llvm::make_unique<RO>(std::move(ResourcePtr));
}
- class StaticGlobalRenamer {
- public:
- StaticGlobalRenamer() = default;
- StaticGlobalRenamer(StaticGlobalRenamer &&) = default;
- StaticGlobalRenamer &operator=(StaticGlobalRenamer &&) = default;
-
- void rename(Module &M) {
- for (auto &F : M)
- if (F.hasLocalLinkage())
- F.setName("$static." + Twine(NextId++));
- for (auto &G : M.globals())
- if (G.hasLocalLinkage())
- G.setName("$static." + Twine(NextId++));
- }
-
- private:
- unsigned NextId = 0;
- };
-
struct LogicalDylib {
struct SourceModuleEntry {
std::unique_ptr<Module> SourceMod;
@@ -230,7 +244,7 @@ private:
VModuleKey K;
std::shared_ptr<SymbolResolver> BackingResolver;
std::unique_ptr<IndirectStubsMgrT> StubsMgr;
- StaticGlobalRenamer StaticRenamer;
+ SymbolLinkagePromoter PromoteSymbols;
SourceModulesList SourceModules;
std::vector<VModuleKey> BaseLayerVModuleKeys;
};
@@ -251,13 +265,13 @@ public:
std::function<void(VModuleKey K, std::shared_ptr<SymbolResolver> R)>;
/// Construct a compile-on-demand layer instance.
- CompileOnDemandLayer(ExecutionSession &ES, BaseLayerT &BaseLayer,
- SymbolResolverGetter GetSymbolResolver,
- SymbolResolverSetter SetSymbolResolver,
- PartitioningFtor Partition,
- CompileCallbackMgrT &CallbackMgr,
- IndirectStubsManagerBuilderT CreateIndirectStubsManager,
- bool CloneStubsIntoPartitions = true)
+ LegacyCompileOnDemandLayer(ExecutionSession &ES, BaseLayerT &BaseLayer,
+ SymbolResolverGetter GetSymbolResolver,
+ SymbolResolverSetter SetSymbolResolver,
+ PartitioningFtor Partition,
+ CompileCallbackMgrT &CallbackMgr,
+ IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+ bool CloneStubsIntoPartitions = true)
: ES(ES), BaseLayer(BaseLayer),
GetSymbolResolver(std::move(GetSymbolResolver)),
SetSymbolResolver(std::move(SetSymbolResolver)),
@@ -265,7 +279,7 @@ public:
CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
- ~CompileOnDemandLayer() {
+ ~LegacyCompileOnDemandLayer() {
// FIXME: Report error on log.
while (!LogicalDylibs.empty())
consumeError(removeModule(LogicalDylibs.begin()->first));
@@ -352,14 +366,9 @@ public:
private:
Error addLogicalModule(LogicalDylib &LD, std::unique_ptr<Module> SrcMPtr) {
- // Rename all static functions / globals to $static.X :
- // This will unique the names across all modules in the logical dylib,
- // simplifying symbol lookup.
- LD.StaticRenamer.rename(*SrcMPtr);
-
- // Bump the linkage and rename any anonymous/private members in SrcM to
- // ensure that everything will resolve properly after we partition SrcM.
- makeAllSymbolsExternallyAccessible(*SrcMPtr);
+ // Rename anonymous globals and promote linkage to ensure that everything
+ // will resolve properly after we partition SrcM.
+ LD.PromoteSymbols(*SrcMPtr);
// Create a logical module handle for SrcM within the logical dylib.
Module &SrcM = *SrcMPtr;
@@ -500,28 +509,29 @@ private:
auto GVsResolver = createSymbolResolver(
[&LD, LegacyLookup](const SymbolNameSet &Symbols) {
- auto SymbolFlags = lookupFlagsWithLegacyFn(Symbols, LegacyLookup);
+ auto RS = getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup);
- if (!SymbolFlags) {
- logAllUnhandledErrors(SymbolFlags.takeError(), errs(),
- "CODLayer/GVsResolver flags lookup failed: ");
- return SymbolFlagsMap();
+ if (!RS) {
+ logAllUnhandledErrors(
+ RS.takeError(), errs(),
+ "CODLayer/GVsResolver responsibility set lookup failed: ");
+ return SymbolNameSet();
}
- if (SymbolFlags->size() == Symbols.size())
- return *SymbolFlags;
+ if (RS->size() == Symbols.size())
+ return *RS;
SymbolNameSet NotFoundViaLegacyLookup;
for (auto &S : Symbols)
- if (!SymbolFlags->count(S))
+ if (!RS->count(S))
NotFoundViaLegacyLookup.insert(S);
- auto SymbolFlags2 =
- LD.BackingResolver->lookupFlags(NotFoundViaLegacyLookup);
+ auto RS2 =
+ LD.BackingResolver->getResponsibilitySet(NotFoundViaLegacyLookup);
- for (auto &KV : SymbolFlags2)
- (*SymbolFlags)[KV.first] = std::move(KV.second);
+ for (auto &S : RS2)
+ (*RS).insert(S);
- return *SymbolFlags;
+ return *RS;
},
[this, &LD,
LegacyLookup](std::shared_ptr<AsynchronousSymbolQuery> Query,
@@ -669,28 +679,29 @@ private:
// Create memory manager and symbol resolver.
auto Resolver = createSymbolResolver(
[&LD, LegacyLookup](const SymbolNameSet &Symbols) {
- auto SymbolFlags = lookupFlagsWithLegacyFn(Symbols, LegacyLookup);
- if (!SymbolFlags) {
- logAllUnhandledErrors(SymbolFlags.takeError(), errs(),
- "CODLayer/SubResolver flags lookup failed: ");
- return SymbolFlagsMap();
+ auto RS = getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup);
+ if (!RS) {
+ logAllUnhandledErrors(
+ RS.takeError(), errs(),
+ "CODLayer/SubResolver responsibility set lookup failed: ");
+ return SymbolNameSet();
}
- if (SymbolFlags->size() == Symbols.size())
- return *SymbolFlags;
+ if (RS->size() == Symbols.size())
+ return *RS;
SymbolNameSet NotFoundViaLegacyLookup;
for (auto &S : Symbols)
- if (!SymbolFlags->count(S))
+ if (!RS->count(S))
NotFoundViaLegacyLookup.insert(S);
- auto SymbolFlags2 =
- LD.BackingResolver->lookupFlags(NotFoundViaLegacyLookup);
+ auto RS2 =
+ LD.BackingResolver->getResponsibilitySet(NotFoundViaLegacyLookup);
- for (auto &KV : SymbolFlags2)
- (*SymbolFlags)[KV.first] = std::move(KV.second);
+ for (auto &S : RS2)
+ (*RS).insert(S);
- return *SymbolFlags;
+ return *RS;
},
[this, &LD, LegacyLookup](std::shared_ptr<AsynchronousSymbolQuery> Q,
SymbolNameSet Symbols) {
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index 213a59124c85..f34f88311ba5 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
-#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
@@ -38,7 +38,7 @@ namespace orc {
/// Simple compile functor: Takes a single IR module and returns an ObjectFile.
/// This compiler supports a single compilation thread and LLVMContext only.
-/// For multithreaded compilation, use MultiThreadedSimpleCompiler below.
+/// For multithreaded compilation, use ConcurrentIRCompiler below.
class SimpleCompiler {
public:
using CompileResult = std::unique_ptr<MemoryBuffer>;
@@ -105,10 +105,10 @@ private:
///
/// This class creates a new TargetMachine and SimpleCompiler instance for each
/// compile.
-class MultiThreadedSimpleCompiler {
+class ConcurrentIRCompiler {
public:
- MultiThreadedSimpleCompiler(JITTargetMachineBuilder JTMB,
- ObjectCache *ObjCache = nullptr)
+ ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
+ ObjectCache *ObjCache = nullptr)
: JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
void setObjectCache(ObjectCache *ObjCache) { this->ObjCache = ObjCache; }
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 11d7c091947e..39d306e0bd4c 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -18,13 +18,13 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
-#include <list>
-#include <map>
#include <memory>
-#include <set>
#include <vector>
+#define DEBUG_TYPE "orc"
+
namespace llvm {
namespace orc {
@@ -33,7 +33,7 @@ class AsynchronousSymbolQuery;
class ExecutionSession;
class MaterializationUnit;
class MaterializationResponsibility;
-class VSO;
+class JITDylib;
/// VModuleKey provides a unique identifier (allocated and managed by
/// ExecutionSessions) for a module added to the JIT.
@@ -41,36 +41,52 @@ using VModuleKey = uint64_t;
/// A set of symbol names (represented by SymbolStringPtrs for
// efficiency).
-using SymbolNameSet = std::set<SymbolStringPtr>;
-
-/// Render a SymbolNameSet to an ostream.
-raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols);
+using SymbolNameSet = DenseSet<SymbolStringPtr>;
/// A map from symbol names (as SymbolStringPtrs) to JITSymbols
/// (address/flags pairs).
-using SymbolMap = std::map<SymbolStringPtr, JITEvaluatedSymbol>;
-
-/// Render a SymbolMap to an ostream.
-raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols);
+using SymbolMap = DenseMap<SymbolStringPtr, JITEvaluatedSymbol>;
/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
-using SymbolFlagsMap = std::map<SymbolStringPtr, JITSymbolFlags>;
-
-/// Render a SymbolMap to an ostream.
-raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &Symbols);
+using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;
/// A base class for materialization failures that allows the failing
/// symbols to be obtained for logging.
-using SymbolDependenceMap = std::map<VSO *, SymbolNameSet>;
+using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;
+
+/// A list of (JITDylib*, bool) pairs.
+using JITDylibSearchList = std::vector<std::pair<JITDylib *, bool>>;
+
+/// Render a SymbolStringPtr.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym);
+
+/// Render a SymbolNameSet.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols);
+
+/// Render a SymbolFlagsMap entry.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap::value_type &KV);
+
+/// Render a SymbolMap entry.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolMap::value_type &KV);
+
+/// Render a SymbolFlagsMap.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags);
+
+/// Render a SymbolMap.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols);
+
+/// Render a SymbolDependenceMap entry.
+raw_ostream &operator<<(raw_ostream &OS,
+ const SymbolDependenceMap::value_type &KV);
/// Render a SymbolDependendeMap.
raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps);
-/// A list of VSO pointers.
-using VSOList = std::vector<VSO *>;
+/// Render a MaterializationUnit.
+raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU);
-/// Render a VSOList.
-raw_ostream &operator<<(raw_ostream &OS, const VSOList &VSOs);
+/// Render a JITDylibSearchList.
+raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs);
/// Callback to notify client that symbols have been resolved.
using SymbolsResolvedCallback = std::function<void(Expected<SymbolMap>)>;
@@ -86,7 +102,8 @@ using RegisterDependenciesFunction =
/// are no dependants to register with.
extern RegisterDependenciesFunction NoDependenciesToRegister;
-/// Used to notify a VSO that the given set of symbols failed to materialize.
+/// Used to notify a JITDylib that the given set of symbols failed to
+/// materialize.
class FailedToMaterialize : public ErrorInfo<FailedToMaterialize> {
public:
static char ID;
@@ -114,12 +131,26 @@ private:
SymbolNameSet Symbols;
};
+/// Used to notify clients that a set of symbols could not be removed.
+class SymbolsCouldNotBeRemoved : public ErrorInfo<SymbolsCouldNotBeRemoved> {
+public:
+ static char ID;
+
+ SymbolsCouldNotBeRemoved(SymbolNameSet Symbols);
+ std::error_code convertToErrorCode() const override;
+ void log(raw_ostream &OS) const override;
+ const SymbolNameSet &getSymbols() const { return Symbols; }
+
+private:
+ SymbolNameSet Symbols;
+};
+
/// Tracks responsibility for materialization, and mediates interactions between
-/// MaterializationUnits and VSOs.
+/// MaterializationUnits and JDs.
///
/// An instance of this class is passed to MaterializationUnits when their
/// materialize method is called. It allows MaterializationUnits to resolve and
-/// finalize symbols, or abandon materialization by notifying any unmaterialized
+/// emit symbols, or abandon materialization by notifying any unmaterialized
/// symbols of an error.
class MaterializationResponsibility {
friend class MaterializationUnit;
@@ -130,41 +161,54 @@ public:
/// Destruct a MaterializationResponsibility instance. In debug mode
/// this asserts that all symbols being tracked have been either
- /// finalized or notified of an error.
+ /// emitted or notified of an error.
~MaterializationResponsibility();
- /// Returns the target VSO that these symbols are being materialized
+ /// Returns the target JITDylib that these symbols are being materialized
/// into.
- VSO &getTargetVSO() const { return V; }
+ JITDylib &getTargetJITDylib() const { return JD; }
+
+ /// Returns the VModuleKey for this instance.
+ VModuleKey getVModuleKey() const { return K; }
/// Returns the symbol flags map for this responsibility instance.
- SymbolFlagsMap getSymbols() { return SymbolFlags; }
+ /// Note: The returned flags may have transient flags (Lazy, Materializing)
+ /// set. These should be stripped with JITSymbolFlags::stripTransientFlags
+ /// before using.
+ const SymbolFlagsMap &getSymbols() { return SymbolFlags; }
/// Returns the names of any symbols covered by this
/// MaterializationResponsibility object that have queries pending. This
/// information can be used to return responsibility for unrequested symbols
- /// back to the VSO via the delegate method.
- SymbolNameSet getRequestedSymbols();
-
- /// Resolves the given symbols. Individual calls to this method may
- /// resolve a subset of the symbols, but all symbols must have been
- /// resolved prior to calling finalize.
+ /// back to the JITDylib via the delegate method.
+ SymbolNameSet getRequestedSymbols() const;
+
+ /// Notifies the target JITDylib that the given symbols have been resolved.
+ /// This will update the given symbols' addresses in the JITDylib, and notify
+ /// any pending queries on the given symbols of their resolution. The given
+ /// symbols must be ones covered by this MaterializationResponsibility
+ /// instance. Individual calls to this method may resolve a subset of the
+ /// symbols, but all symbols must have been resolved prior to calling emit.
void resolve(const SymbolMap &Symbols);
- /// Finalizes all symbols tracked by this instance.
- void finalize();
+ /// Notifies the target JITDylib (and any pending queries on that JITDylib)
+ /// that all symbols covered by this MaterializationResponsibility instance
+ /// have been emitted.
+ void emit();
- /// Adds new symbols to the VSO and this responsibility instance.
- /// VSO entries start out in the materializing state.
+ /// Adds new symbols to the JITDylib and this responsibility instance.
+ /// JITDylib entries start out in the materializing state.
///
/// This method can be used by materialization units that want to add
/// additional symbols at materialization time (e.g. stubs, compile
/// callbacks, metadata).
Error defineMaterializing(const SymbolFlagsMap &SymbolFlags);
- /// Notify all unfinalized symbols that an error has occurred.
+ /// Notify all not-yet-emitted covered by this MaterializationResponsibility
+ /// instance that an error has occurred.
/// This will remove all symbols covered by this MaterializationResponsibilty
- /// from V, and send an error to any queries waiting on these symbols.
+ /// from the target JITDylib, and send an error to any queries waiting on
+ /// these symbols.
void failMaterialization();
/// Transfers responsibility to the given MaterializationUnit for all
@@ -177,7 +221,8 @@ public:
/// Delegates responsibility for the given symbols to the returned
/// materialization responsibility. Useful for breaking up work between
/// threads, or different kinds of materialization processes.
- MaterializationResponsibility delegate(const SymbolNameSet &Symbols);
+ MaterializationResponsibility delegate(const SymbolNameSet &Symbols,
+ VModuleKey NewKey = VModuleKey());
void addDependencies(const SymbolStringPtr &Name,
const SymbolDependenceMap &Dependencies);
@@ -186,12 +231,14 @@ public:
void addDependenciesForAll(const SymbolDependenceMap &Dependencies);
private:
- /// Create a MaterializationResponsibility for the given VSO and
+ /// Create a MaterializationResponsibility for the given JITDylib and
/// initial symbols.
- MaterializationResponsibility(VSO &V, SymbolFlagsMap SymbolFlags);
+ MaterializationResponsibility(JITDylib &JD, SymbolFlagsMap SymbolFlags,
+ VModuleKey K);
- VSO &V;
+ JITDylib &JD;
SymbolFlagsMap SymbolFlags;
+ VModuleKey K;
};
/// A MaterializationUnit represents a set of symbol definitions that can
@@ -199,35 +246,41 @@ private:
/// overriding definitions are encountered).
///
/// MaterializationUnits are used when providing lazy definitions of symbols to
-/// VSOs. The VSO will call materialize when the address of a symbol is
-/// requested via the lookup method. The VSO will call discard if a stronger
-/// definition is added or already present.
+/// JITDylibs. The JITDylib will call materialize when the address of a symbol
+/// is requested via the lookup method. The JITDylib will call discard if a
+/// stronger definition is added or already present.
class MaterializationUnit {
public:
- MaterializationUnit(SymbolFlagsMap InitalSymbolFlags)
- : SymbolFlags(std::move(InitalSymbolFlags)) {}
+ MaterializationUnit(SymbolFlagsMap InitalSymbolFlags, VModuleKey K)
+ : SymbolFlags(std::move(InitalSymbolFlags)), K(std::move(K)) {}
virtual ~MaterializationUnit() {}
+ /// Return the name of this materialization unit. Useful for debugging
+ /// output.
+ virtual StringRef getName() const = 0;
+
/// Return the set of symbols that this source provides.
const SymbolFlagsMap &getSymbols() const { return SymbolFlags; }
/// Called by materialization dispatchers (see
/// ExecutionSession::DispatchMaterializationFunction) to trigger
/// materialization of this MaterializationUnit.
- void doMaterialize(VSO &V) {
- materialize(MaterializationResponsibility(V, std::move(SymbolFlags)));
+ void doMaterialize(JITDylib &JD) {
+ materialize(MaterializationResponsibility(JD, std::move(SymbolFlags),
+ std::move(K)));
}
- /// Called by VSOs to notify MaterializationUnits that the given symbol has
- /// been overridden.
- void doDiscard(const VSO &V, SymbolStringPtr Name) {
+ /// Called by JITDylibs to notify MaterializationUnits that the given symbol
+ /// has been overridden.
+ void doDiscard(const JITDylib &JD, const SymbolStringPtr &Name) {
SymbolFlags.erase(Name);
- discard(V, std::move(Name));
+ discard(JD, std::move(Name));
}
protected:
SymbolFlagsMap SymbolFlags;
+ VModuleKey K;
private:
virtual void anchor();
@@ -241,7 +294,7 @@ private:
/// from the source (e.g. if the source is an LLVM IR Module and the
/// symbol is a function, delete the function body or mark it available
/// externally).
- virtual void discard(const VSO &V, SymbolStringPtr Name) = 0;
+ virtual void discard(const JITDylib &JD, const SymbolStringPtr &Name) = 0;
};
using MaterializationUnitList =
@@ -253,30 +306,32 @@ using MaterializationUnitList =
/// materialized.
class AbsoluteSymbolsMaterializationUnit : public MaterializationUnit {
public:
- AbsoluteSymbolsMaterializationUnit(SymbolMap Symbols);
+ AbsoluteSymbolsMaterializationUnit(SymbolMap Symbols, VModuleKey K);
+
+ StringRef getName() const override;
private:
void materialize(MaterializationResponsibility R) override;
- void discard(const VSO &V, SymbolStringPtr Name) override;
+ void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
static SymbolFlagsMap extractFlags(const SymbolMap &Symbols);
SymbolMap Symbols;
};
/// Create an AbsoluteSymbolsMaterializationUnit with the given symbols.
-/// Useful for inserting absolute symbols into a VSO. E.g.:
+/// Useful for inserting absolute symbols into a JITDylib. E.g.:
/// \code{.cpp}
-/// VSO &V = ...;
+/// JITDylib &JD = ...;
/// SymbolStringPtr Foo = ...;
/// JITEvaluatedSymbol FooSym = ...;
-/// if (auto Err = V.define(absoluteSymbols({{Foo, FooSym}})))
+/// if (auto Err = JD.define(absoluteSymbols({{Foo, FooSym}})))
/// return Err;
/// \endcode
///
inline std::unique_ptr<AbsoluteSymbolsMaterializationUnit>
-absoluteSymbols(SymbolMap Symbols) {
+absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) {
return llvm::make_unique<AbsoluteSymbolsMaterializationUnit>(
- std::move(Symbols));
+ std::move(Symbols), std::move(K));
}
struct SymbolAliasMapEntry {
@@ -289,191 +344,87 @@ struct SymbolAliasMapEntry {
};
/// A map of Symbols to (Symbol, Flags) pairs.
-using SymbolAliasMap = std::map<SymbolStringPtr, SymbolAliasMapEntry>;
+using SymbolAliasMap = DenseMap<SymbolStringPtr, SymbolAliasMapEntry>;
/// A materialization unit for symbol aliases. Allows existing symbols to be
/// aliased with alternate flags.
class ReExportsMaterializationUnit : public MaterializationUnit {
public:
- /// SourceVSO is allowed to be nullptr, in which case the source VSO is
- /// taken to be whatever VSO these definitions are materialized in. This
- /// is useful for defining aliases within a VSO.
+ /// SourceJD is allowed to be nullptr, in which case the source JITDylib is
+ /// taken to be whatever JITDylib these definitions are materialized in (and
+ /// MatchNonExported has no effect). This is useful for defining aliases
+ /// within a JITDylib.
///
/// Note: Care must be taken that no sets of aliases form a cycle, as such
/// a cycle will result in a deadlock when any symbol in the cycle is
/// resolved.
- ReExportsMaterializationUnit(VSO *SourceVSO, SymbolAliasMap Aliases);
+ ReExportsMaterializationUnit(JITDylib *SourceJD, bool MatchNonExported,
+ SymbolAliasMap Aliases, VModuleKey K);
+
+ StringRef getName() const override;
private:
void materialize(MaterializationResponsibility R) override;
- void discard(const VSO &V, SymbolStringPtr Name) override;
+ void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases);
- VSO *SourceVSO = nullptr;
+ JITDylib *SourceJD = nullptr;
+ bool MatchNonExported = false;
SymbolAliasMap Aliases;
};
/// Create a ReExportsMaterializationUnit with the given aliases.
-/// Useful for defining symbol aliases.: E.g., given a VSO V containing symbols
-/// "foo" and "bar", we can define aliases "baz" (for "foo") and "qux" (for
-/// "bar") with:
-/// \code{.cpp}
+/// Useful for defining symbol aliases.: E.g., given a JITDylib JD containing
+/// symbols "foo" and "bar", we can define aliases "baz" (for "foo") and "qux"
+/// (for "bar") with: \code{.cpp}
/// SymbolStringPtr Baz = ...;
/// SymbolStringPtr Qux = ...;
-/// if (auto Err = V.define(symbolAliases({
+/// if (auto Err = JD.define(symbolAliases({
/// {Baz, { Foo, JITSymbolFlags::Exported }},
/// {Qux, { Bar, JITSymbolFlags::Weak }}}))
/// return Err;
/// \endcode
inline std::unique_ptr<ReExportsMaterializationUnit>
-symbolAliases(SymbolAliasMap Aliases) {
- return llvm::make_unique<ReExportsMaterializationUnit>(nullptr,
- std::move(Aliases));
+symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) {
+ return llvm::make_unique<ReExportsMaterializationUnit>(
+ nullptr, true, std::move(Aliases), std::move(K));
}
-/// Create a materialization unit for re-exporting symbols from another VSO
+/// Create a materialization unit for re-exporting symbols from another JITDylib
/// with alternative names/flags.
+/// If MatchNonExported is true then non-exported symbols from SourceJD can be
+/// re-exported. If it is false, attempts to re-export a non-exported symbol
+/// will result in a "symbol not found" error.
inline std::unique_ptr<ReExportsMaterializationUnit>
-reexports(VSO &SourceV, SymbolAliasMap Aliases) {
- return llvm::make_unique<ReExportsMaterializationUnit>(&SourceV,
- std::move(Aliases));
+reexports(JITDylib &SourceJD, SymbolAliasMap Aliases,
+ bool MatchNonExported = false, VModuleKey K = VModuleKey()) {
+ return llvm::make_unique<ReExportsMaterializationUnit>(
+ &SourceJD, MatchNonExported, std::move(Aliases), std::move(K));
}
/// Build a SymbolAliasMap for the common case where you want to re-export
-/// symbols from another VSO with the same linkage/flags.
+/// symbols from another JITDylib with the same linkage/flags.
Expected<SymbolAliasMap>
-buildSimpleReexportsAliasMap(VSO &SourceV, const SymbolNameSet &Symbols);
-
-/// Base utilities for ExecutionSession.
-class ExecutionSessionBase {
- // FIXME: Remove this when we remove the old ORC layers.
- friend class VSO;
+buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols);
+/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically
+/// re-export a subset of the source JITDylib's symbols in the target.
+class ReexportsGenerator {
public:
- /// For reporting errors.
- using ErrorReporter = std::function<void(Error)>;
-
- /// For dispatching MaterializationUnit::materialize calls.
- using DispatchMaterializationFunction =
- std::function<void(VSO &V, std::unique_ptr<MaterializationUnit> MU)>;
-
- /// Construct an ExecutionSessionBase.
- ///
- /// SymbolStringPools may be shared between ExecutionSessions.
- ExecutionSessionBase(std::shared_ptr<SymbolStringPool> SSP = nullptr)
- : SSP(SSP ? std::move(SSP) : std::make_shared<SymbolStringPool>()) {}
-
- /// Returns the SymbolStringPool for this ExecutionSession.
- SymbolStringPool &getSymbolStringPool() const { return *SSP; }
-
- /// Run the given lambda with the session mutex locked.
- template <typename Func> auto runSessionLocked(Func &&F) -> decltype(F()) {
- std::lock_guard<std::recursive_mutex> Lock(SessionMutex);
- return F();
- }
-
- /// Set the error reporter function.
- ExecutionSessionBase &setErrorReporter(ErrorReporter ReportError) {
- this->ReportError = std::move(ReportError);
- return *this;
- }
-
- /// Set the materialization dispatch function.
- ExecutionSessionBase &setDispatchMaterialization(
- DispatchMaterializationFunction DispatchMaterialization) {
- this->DispatchMaterialization = std::move(DispatchMaterialization);
- return *this;
- }
-
- /// Report a error for this execution session.
- ///
- /// Unhandled errors can be sent here to log them.
- void reportError(Error Err) { ReportError(std::move(Err)); }
+ using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
- /// Allocate a module key for a new module to add to the JIT.
- VModuleKey allocateVModule() { return ++LastKey; }
+ /// Create a reexports generator. If an Allow predicate is passed, only
+ /// symbols for which the predicate returns true will be reexported. If no
+ /// Allow predicate is passed, all symbols will be exported.
+ ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false,
+ SymbolPredicate Allow = SymbolPredicate());
- /// Return a module key to the ExecutionSession so that it can be
- /// re-used. This should only be done once all resources associated
- /// with the original key have been released.
- void releaseVModule(VModuleKey Key) { /* FIXME: Recycle keys */
- }
-
- void legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err);
-
- using LegacyAsyncLookupFunction = std::function<SymbolNameSet(
- std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names)>;
-
- /// A legacy lookup function for JITSymbolResolverAdapter.
- /// Do not use -- this will be removed soon.
- Expected<SymbolMap>
- legacyLookup(ExecutionSessionBase &ES, LegacyAsyncLookupFunction AsyncLookup,
- SymbolNameSet Names, bool WaiUntilReady,
- RegisterDependenciesFunction RegisterDependencies);
-
- /// Search the given VSO list for the given symbols.
- ///
- ///
- /// The OnResolve callback will be called once all requested symbols are
- /// resolved, or if an error occurs prior to resolution.
- ///
- /// The OnReady callback will be called once all requested symbols are ready,
- /// or if an error occurs after resolution but before all symbols are ready.
- ///
- /// If all symbols are found, the RegisterDependencies function will be called
- /// while the session lock is held. This gives clients a chance to register
- /// dependencies for on the queried symbols for any symbols they are
- /// materializing (if a MaterializationResponsibility instance is present,
- /// this can be implemented by calling
- /// MaterializationResponsibility::addDependencies). If there are no
- /// dependenant symbols for this query (e.g. it is being made by a top level
- /// client to get an address to call) then the value NoDependenciesToRegister
- /// can be used.
- void lookup(const VSOList &VSOs, const SymbolNameSet &Symbols,
- SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
- RegisterDependenciesFunction RegisterDependencies);
-
- /// Blocking version of lookup above. Returns the resolved symbol map.
- /// If WaitUntilReady is true (the default), will not return until all
- /// requested symbols are ready (or an error occurs). If WaitUntilReady is
- /// false, will return as soon as all requested symbols are resolved,
- /// or an error occurs. If WaitUntilReady is false and an error occurs
- /// after resolution, the function will return a success value, but the
- /// error will be reported via reportErrors.
- Expected<SymbolMap> lookup(const VSOList &VSOs, const SymbolNameSet &Symbols,
- RegisterDependenciesFunction RegisterDependencies,
- bool WaitUntilReady = true);
-
- /// Materialize the given unit.
- void dispatchMaterialization(VSO &V,
- std::unique_ptr<MaterializationUnit> MU) {
- DispatchMaterialization(V, std::move(MU));
- }
+ SymbolNameSet operator()(JITDylib &JD, const SymbolNameSet &Names);
private:
- static void logErrorsToStdErr(Error Err) {
- logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: ");
- }
-
- static void
- materializeOnCurrentThread(VSO &V, std::unique_ptr<MaterializationUnit> MU) {
- MU->doMaterialize(V);
- }
-
- void runOutstandingMUs();
-
- mutable std::recursive_mutex SessionMutex;
- std::shared_ptr<SymbolStringPool> SSP;
- VModuleKey LastKey = 0;
- ErrorReporter ReportError = logErrorsToStdErr;
- DispatchMaterializationFunction DispatchMaterialization =
- materializeOnCurrentThread;
-
- // FIXME: Remove this (and runOutstandingMUs) once the linking layer works
- // with callbacks from asynchronous queries.
- mutable std::recursive_mutex OutstandingMUsMutex;
- std::vector<std::pair<VSO *, std::unique_ptr<MaterializationUnit>>>
- OutstandingMUs;
+ JITDylib &SourceJD;
+ bool MatchNonExported = false;
+ SymbolPredicate Allow;
};
/// A symbol query that returns results via a callback when results are
@@ -481,8 +432,9 @@ private:
///
/// makes a callback when all symbols are available.
class AsynchronousSymbolQuery {
- friend class ExecutionSessionBase;
- friend class VSO;
+ friend class ExecutionSession;
+ friend class JITDylib;
+ friend class JITSymbolResolverAdapter;
public:
@@ -517,9 +469,9 @@ public:
void handleFullyReady();
private:
- void addQueryDependence(VSO &V, SymbolStringPtr Name);
+ void addQueryDependence(JITDylib &JD, SymbolStringPtr Name);
- void removeQueryDependence(VSO &V, const SymbolStringPtr &Name);
+ void removeQueryDependence(JITDylib &JD, const SymbolStringPtr &Name);
bool canStillFail();
@@ -539,110 +491,118 @@ private:
///
/// Represents a virtual shared object. Instances can not be copied or moved, so
/// their addresses may be used as keys for resource management.
-/// VSO state changes must be made via an ExecutionSession to guarantee that
-/// they are synchronized with respect to other VSO operations.
-class VSO {
+/// JITDylib state changes must be made via an ExecutionSession to guarantee
+/// that they are synchronized with respect to other JITDylib operations.
+class JITDylib {
friend class AsynchronousSymbolQuery;
friend class ExecutionSession;
- friend class ExecutionSessionBase;
friend class MaterializationResponsibility;
public:
- using FallbackDefinitionGeneratorFunction =
- std::function<SymbolNameSet(VSO &Parent, const SymbolNameSet &Names)>;
+ using GeneratorFunction = std::function<SymbolNameSet(
+ JITDylib &Parent, const SymbolNameSet &Names)>;
using AsynchronousSymbolQuerySet =
- std::set<std::shared_ptr<AsynchronousSymbolQuery>>;
+ std::set<std::shared_ptr<AsynchronousSymbolQuery>>;
- VSO(const VSO &) = delete;
- VSO &operator=(const VSO &) = delete;
- VSO(VSO &&) = delete;
- VSO &operator=(VSO &&) = delete;
+ JITDylib(const JITDylib &) = delete;
+ JITDylib &operator=(const JITDylib &) = delete;
+ JITDylib(JITDylib &&) = delete;
+ JITDylib &operator=(JITDylib &&) = delete;
- /// Get the name for this VSO.
- const std::string &getName() const { return VSOName; }
+ /// Get the name for this JITDylib.
+ const std::string &getName() const { return JITDylibName; }
- /// Get a reference to the ExecutionSession for this VSO.
- ExecutionSessionBase &getExecutionSession() const { return ES; }
+ /// Get a reference to the ExecutionSession for this JITDylib.
+ ExecutionSession &getExecutionSession() const { return ES; }
- /// Set a fallback defenition generator. If set, lookup and lookupFlags will
- /// pass the unresolved symbols set to the fallback definition generator,
- /// allowing it to add a new definition to the VSO.
- void setFallbackDefinitionGenerator(
- FallbackDefinitionGeneratorFunction FallbackDefinitionGenerator) {
- this->FallbackDefinitionGenerator = std::move(FallbackDefinitionGenerator);
+ /// Set a definition generator. If set, whenever a symbol fails to resolve
+ /// within this JITDylib, lookup and lookupFlags will pass the unresolved
+ /// symbols set to the definition generator. The generator can optionally
+ /// add a definition for the unresolved symbols to the dylib.
+ void setGenerator(GeneratorFunction DefGenerator) {
+ this->DefGenerator = std::move(DefGenerator);
}
- /// Set the search order to be used when fixing up definitions in VSO.
+ /// Set the search order to be used when fixing up definitions in JITDylib.
/// This will replace the previous search order, and apply to any symbol
- /// resolutions made for definitions in this VSO after the call to
+ /// resolutions made for definitions in this JITDylib after the call to
/// setSearchOrder (even if the definition itself was added before the
/// call).
///
- /// If SearchThisVSOFirst is set, which by default it is, then this VSO will
- /// add itself to the beginning of the SearchOrder (Clients should *not*
- /// put this VSO in the list in this case, to avoid redundant lookups).
+ /// If SearchThisJITDylibFirst is set, which by default it is, then this
+ /// JITDylib will add itself to the beginning of the SearchOrder (Clients
+ /// should *not* put this JITDylib in the list in this case, to avoid
+ /// redundant lookups).
///
- /// If SearchThisVSOFirst is false then the search order will be used as
+ /// If SearchThisJITDylibFirst is false then the search order will be used as
/// given. The main motivation for this feature is to support deliberate
- /// shadowing of symbols in this VSO by a facade VSO. For example, the
- /// facade may resolve function names to stubs, and the stubs may compile
+ /// shadowing of symbols in this JITDylib by a facade JITDylib. For example,
+ /// the facade may resolve function names to stubs, and the stubs may compile
/// lazily by looking up symbols in this dylib. Adding the facade dylib
/// as the first in the search order (instead of this dylib) ensures that
/// definitions within this dylib resolve to the lazy-compiling stubs,
/// rather than immediately materializing the definitions in this dylib.
- void setSearchOrder(VSOList NewSearchOrder, bool SearchThisVSOFirst = true);
+ void setSearchOrder(JITDylibSearchList NewSearchOrder,
+ bool SearchThisJITDylibFirst = true,
+ bool MatchNonExportedInThisDylib = true);
- /// Add the given VSO to the search order for definitions in this VSO.
- void addToSearchOrder(VSO &V);
+ /// Add the given JITDylib to the search order for definitions in this
+ /// JITDylib.
+ void addToSearchOrder(JITDylib &JD, bool MatcNonExported = false);
- /// Replace OldV with NewV in the search order if OldV is present. Otherwise
- /// this operation is a no-op.
- void replaceInSearchOrder(VSO &OldV, VSO &NewV);
+ /// Replace OldJD with NewJD in the search order if OldJD is present.
+ /// Otherwise this operation is a no-op.
+ void replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD,
+ bool MatchNonExported = false);
- /// Remove the given VSO from the search order for this VSO if it is
+ /// Remove the given JITDylib from the search order for this JITDylib if it is
/// present. Otherwise this operation is a no-op.
- void removeFromSearchOrder(VSO &V);
+ void removeFromSearchOrder(JITDylib &JD);
/// Do something with the search order (run under the session lock).
template <typename Func>
auto withSearchOrderDo(Func &&F)
- -> decltype(F(std::declval<const VSOList &>())) {
- return ES.runSessionLocked([&]() { return F(SearchOrder); });
- }
+ -> decltype(F(std::declval<const JITDylibSearchList &>()));
- /// Define all symbols provided by the materialization unit to be part
- /// of the given VSO.
- template <typename UniquePtrToMaterializationUnit>
- typename std::enable_if<
- std::is_convertible<
- typename std::decay<UniquePtrToMaterializationUnit>::type,
- std::unique_ptr<MaterializationUnit>>::value,
- Error>::type
- define(UniquePtrToMaterializationUnit &&MU) {
- return ES.runSessionLocked([&, this]() -> Error {
- assert(MU && "Can't define with a null MU");
-
- if (auto Err = defineImpl(*MU))
- return Err;
-
- /// defineImpl succeeded.
- auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
- for (auto &KV : UMI->MU->getSymbols())
- UnmaterializedInfos[KV.first] = UMI;
-
- return Error::success();
- });
- }
+ /// Define all symbols provided by the materialization unit to be part of this
+ /// JITDylib.
+ ///
+ /// This overload always takes ownership of the MaterializationUnit. If any
+ /// errors occur, the MaterializationUnit consumed.
+ template <typename MaterializationUnitType>
+ Error define(std::unique_ptr<MaterializationUnitType> &&MU);
- /// Search the given VSO for the symbols in Symbols. If found, store
+ /// Define all symbols provided by the materialization unit to be part of this
+ /// JITDylib.
+ ///
+ /// This overload only takes ownership of the MaterializationUnit no error is
+ /// generated. If an error occurs, ownership remains with the caller. This
+ /// may allow the caller to modify the MaterializationUnit to correct the
+ /// issue, then re-call define.
+ template <typename MaterializationUnitType>
+ Error define(std::unique_ptr<MaterializationUnitType> &MU);
+
+ /// Tries to remove the given symbols.
+ ///
+ /// If any symbols are not defined in this JITDylib this method will return
+ /// a SymbolsNotFound error covering the missing symbols.
+ ///
+ /// If all symbols are found but some symbols are in the process of being
+ /// materialized this method will return a SymbolsCouldNotBeRemoved error.
+ ///
+ /// On success, all symbols are removed. On failure, the JITDylib state is
+ /// left unmodified (no symbols are removed).
+ Error remove(const SymbolNameSet &Names);
+
+ /// Search the given JITDylib for the symbols in Symbols. If found, store
/// the flags for each symbol in Flags. Returns any unresolved symbols.
SymbolFlagsMap lookupFlags(const SymbolNameSet &Names);
- /// Dump current VSO state to OS.
+ /// Dump current JITDylib state to OS.
void dump(raw_ostream &OS);
/// FIXME: Remove this when we remove the old ORC layers.
- /// Search the given VSOs in order for the symbols in Symbols. Results
+ /// Search the given JITDylibs in order for the symbols in Symbols. Results
/// (once they become available) will be returned via the given Query.
///
/// If any symbol is not found then the unresolved symbols will be returned,
@@ -664,16 +624,16 @@ private:
};
using UnmaterializedInfosMap =
- std::map<SymbolStringPtr, std::shared_ptr<UnmaterializedInfo>>;
+ DenseMap<SymbolStringPtr, std::shared_ptr<UnmaterializedInfo>>;
struct MaterializingInfo {
AsynchronousSymbolQueryList PendingQueries;
SymbolDependenceMap Dependants;
- SymbolDependenceMap UnfinalizedDependencies;
- bool IsFinalized = false;
+ SymbolDependenceMap UnemittedDependencies;
+ bool IsEmitted = false;
};
- using MaterializingInfosMap = std::map<SymbolStringPtr, MaterializingInfo>;
+ using MaterializingInfosMap = DenseMap<SymbolStringPtr, MaterializingInfo>;
using LookupImplActionFlags = enum {
None = 0,
@@ -682,7 +642,7 @@ private:
LLVM_MARK_AS_BITMASK_ENUM(NotifyFullyReady)
};
- VSO(ExecutionSessionBase &ES, std::string Name);
+ JITDylib(ExecutionSession &ES, std::string Name);
Error defineImpl(MaterializationUnit &MU);
@@ -690,10 +650,12 @@ private:
const SymbolNameSet &Names);
void lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, MaterializationUnitList &MUs);
+ SymbolNameSet &Unresolved, bool MatchNonExported,
+ MaterializationUnitList &MUs);
void lodgeQueryImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, MaterializationUnitList &MUs);
+ SymbolNameSet &Unresolved, bool MatchNonExported,
+ MaterializationUnitList &MUs);
LookupImplActionFlags
lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
@@ -703,77 +665,266 @@ private:
void detachQueryHelper(AsynchronousSymbolQuery &Q,
const SymbolNameSet &QuerySymbols);
- void transferFinalizedNodeDependencies(MaterializingInfo &DependantMI,
- const SymbolStringPtr &DependantName,
- MaterializingInfo &FinalizedMI);
+ void transferEmittedNodeDependencies(MaterializingInfo &DependantMI,
+ const SymbolStringPtr &DependantName,
+ MaterializingInfo &EmittedMI);
Error defineMaterializing(const SymbolFlagsMap &SymbolFlags);
void replace(std::unique_ptr<MaterializationUnit> MU);
- SymbolNameSet getRequestedSymbols(const SymbolFlagsMap &SymbolFlags);
+ SymbolNameSet getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const;
void addDependencies(const SymbolStringPtr &Name,
const SymbolDependenceMap &Dependants);
void resolve(const SymbolMap &Resolved);
- void finalize(const SymbolFlagsMap &Finalized);
+ void emit(const SymbolFlagsMap &Emitted);
void notifyFailed(const SymbolNameSet &FailedSymbols);
- ExecutionSessionBase &ES;
- std::string VSOName;
+ ExecutionSession &ES;
+ std::string JITDylibName;
SymbolMap Symbols;
UnmaterializedInfosMap UnmaterializedInfos;
MaterializingInfosMap MaterializingInfos;
- FallbackDefinitionGeneratorFunction FallbackDefinitionGenerator;
- VSOList SearchOrder;
+ GeneratorFunction DefGenerator;
+ JITDylibSearchList SearchOrder;
};
/// An ExecutionSession represents a running JIT program.
-class ExecutionSession : public ExecutionSessionBase {
+class ExecutionSession {
+ // FIXME: Remove this when we remove the old ORC layers.
+ friend class JITDylib;
+
public:
+ /// For reporting errors.
using ErrorReporter = std::function<void(Error)>;
- using DispatchMaterializationFunction =
- std::function<void(VSO &V, std::unique_ptr<MaterializationUnit> MU)>;
+ /// For dispatching MaterializationUnit::materialize calls.
+ using DispatchMaterializationFunction = std::function<void(
+ JITDylib &JD, std::unique_ptr<MaterializationUnit> MU)>;
- /// Construct an ExecutionEngine.
+ /// Construct an ExecutionSession.
///
/// SymbolStringPools may be shared between ExecutionSessions.
- ExecutionSession(std::shared_ptr<SymbolStringPool> SSP = nullptr)
- : ExecutionSessionBase(std::move(SSP)) {}
+ ExecutionSession(std::shared_ptr<SymbolStringPool> SSP = nullptr);
+
+ /// Add a symbol name to the SymbolStringPool and return a pointer to it.
+ SymbolStringPtr intern(StringRef SymName) { return SSP->intern(SymName); }
+
+ /// Returns a shared_ptr to the SymbolStringPool for this ExecutionSession.
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() const { return SSP; }
+
+ /// Run the given lambda with the session mutex locked.
+ template <typename Func> auto runSessionLocked(Func &&F) -> decltype(F()) {
+ std::lock_guard<std::recursive_mutex> Lock(SessionMutex);
+ return F();
+ }
+
+ /// Get the "main" JITDylib, which is created automatically on construction of
+ /// the ExecutionSession.
+ JITDylib &getMainJITDylib();
+
+ /// Add a new JITDylib to this ExecutionSession.
+ JITDylib &createJITDylib(std::string Name,
+ bool AddToMainDylibSearchOrder = true);
+
+ /// Allocate a module key for a new module to add to the JIT.
+ VModuleKey allocateVModule() {
+ return runSessionLocked([this]() { return ++LastKey; });
+ }
+
+ /// Return a module key to the ExecutionSession so that it can be
+ /// re-used. This should only be done once all resources associated
+ /// with the original key have been released.
+ void releaseVModule(VModuleKey Key) { /* FIXME: Recycle keys */
+ }
+
+ /// Set the error reporter function.
+ ExecutionSession &setErrorReporter(ErrorReporter ReportError) {
+ this->ReportError = std::move(ReportError);
+ return *this;
+ }
+
+ /// Report a error for this execution session.
+ ///
+ /// Unhandled errors can be sent here to log them.
+ void reportError(Error Err) { ReportError(std::move(Err)); }
+
+ /// Set the materialization dispatch function.
+ ExecutionSession &setDispatchMaterialization(
+ DispatchMaterializationFunction DispatchMaterialization) {
+ this->DispatchMaterialization = std::move(DispatchMaterialization);
+ return *this;
+ }
+
+ void legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err);
+
+ using LegacyAsyncLookupFunction = std::function<SymbolNameSet(
+ std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names)>;
+
+ /// A legacy lookup function for JITSymbolResolverAdapter.
+ /// Do not use -- this will be removed soon.
+ Expected<SymbolMap>
+ legacyLookup(LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
+ bool WaiUntilReady,
+ RegisterDependenciesFunction RegisterDependencies);
+
+ /// Search the given JITDylib list for the given symbols.
+ ///
+ /// SearchOrder lists the JITDylibs to search. For each dylib, the associated
+ /// boolean indicates whether the search should match against non-exported
+ /// (hidden visibility) symbols in that dylib (true means match against
+ /// non-exported symbols, false means do not match).
+ ///
+ /// The OnResolve callback will be called once all requested symbols are
+ /// resolved, or if an error occurs prior to resolution.
+ ///
+ /// The OnReady callback will be called once all requested symbols are ready,
+ /// or if an error occurs after resolution but before all symbols are ready.
+ ///
+ /// If all symbols are found, the RegisterDependencies function will be called
+ /// while the session lock is held. This gives clients a chance to register
+ /// dependencies for on the queried symbols for any symbols they are
+ /// materializing (if a MaterializationResponsibility instance is present,
+ /// this can be implemented by calling
+ /// MaterializationResponsibility::addDependencies). If there are no
+ /// dependenant symbols for this query (e.g. it is being made by a top level
+ /// client to get an address to call) then the value NoDependenciesToRegister
+ /// can be used.
+ void lookup(const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
+ SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
+ RegisterDependenciesFunction RegisterDependencies);
- /// Add a new VSO to this ExecutionSession.
- VSO &createVSO(std::string Name);
+ /// Blocking version of lookup above. Returns the resolved symbol map.
+ /// If WaitUntilReady is true (the default), will not return until all
+ /// requested symbols are ready (or an error occurs). If WaitUntilReady is
+ /// false, will return as soon as all requested symbols are resolved,
+ /// or an error occurs. If WaitUntilReady is false and an error occurs
+ /// after resolution, the function will return a success value, but the
+ /// error will be reported via reportErrors.
+ Expected<SymbolMap> lookup(const JITDylibSearchList &SearchOrder,
+ const SymbolNameSet &Symbols,
+ RegisterDependenciesFunction RegisterDependencies =
+ NoDependenciesToRegister,
+ bool WaitUntilReady = true);
+
+ /// Convenience version of blocking lookup.
+ /// Searches each of the JITDylibs in the search order in turn for the given
+ /// symbol.
+ Expected<JITEvaluatedSymbol> lookup(const JITDylibSearchList &SearchOrder,
+ SymbolStringPtr Symbol);
+
+ /// Convenience version of blocking lookup.
+ /// Searches each of the JITDylibs in the search order in turn for the given
+ /// symbol. The search will not find non-exported symbols.
+ Expected<JITEvaluatedSymbol> lookup(ArrayRef<JITDylib *> SearchOrder,
+ SymbolStringPtr Symbol);
+
+ /// Convenience version of blocking lookup.
+ /// Searches each of the JITDylibs in the search order in turn for the given
+ /// symbol. The search will not find non-exported symbols.
+ Expected<JITEvaluatedSymbol> lookup(ArrayRef<JITDylib *> SearchOrder,
+ StringRef Symbol);
+
+ /// Materialize the given unit.
+ void dispatchMaterialization(JITDylib &JD,
+ std::unique_ptr<MaterializationUnit> MU) {
+ LLVM_DEBUG(runSessionLocked([&]() {
+ dbgs() << "Compiling, for " << JD.getName() << ", " << *MU
+ << "\n";
+ }););
+ DispatchMaterialization(JD, std::move(MU));
+ }
+
+ /// Dump the state of all the JITDylibs in this session.
+ void dump(raw_ostream &OS);
private:
- std::vector<std::unique_ptr<VSO>> VSOs;
+ static void logErrorsToStdErr(Error Err) {
+ logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: ");
+ }
+
+ static void
+ materializeOnCurrentThread(JITDylib &JD,
+ std::unique_ptr<MaterializationUnit> MU) {
+ MU->doMaterialize(JD);
+ }
+
+ void runOutstandingMUs();
+
+ mutable std::recursive_mutex SessionMutex;
+ std::shared_ptr<SymbolStringPool> SSP;
+ VModuleKey LastKey = 0;
+ ErrorReporter ReportError = logErrorsToStdErr;
+ DispatchMaterializationFunction DispatchMaterialization =
+ materializeOnCurrentThread;
+
+ std::vector<std::unique_ptr<JITDylib>> JDs;
+
+ // FIXME: Remove this (and runOutstandingMUs) once the linking layer works
+ // with callbacks from asynchronous queries.
+ mutable std::recursive_mutex OutstandingMUsMutex;
+ std::vector<std::pair<JITDylib *, std::unique_ptr<MaterializationUnit>>>
+ OutstandingMUs;
};
-/// Look up the given names in the given VSOs.
-/// VSOs will be searched in order and no VSO pointer may be null.
-/// All symbols must be found within the given VSOs or an error
-/// will be returned.
-Expected<SymbolMap> lookup(const VSOList &VSOs, SymbolNameSet Names);
+template <typename Func>
+auto JITDylib::withSearchOrderDo(Func &&F)
+ -> decltype(F(std::declval<const JITDylibSearchList &>())) {
+ return ES.runSessionLocked([&]() { return F(SearchOrder); });
+}
+
+template <typename MaterializationUnitType>
+Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU) {
+ assert(MU && "Can not define with a null MU");
+ return ES.runSessionLocked([&, this]() -> Error {
+ if (auto Err = defineImpl(*MU))
+ return Err;
-/// Look up a symbol by searching a list of VSOs.
-Expected<JITEvaluatedSymbol> lookup(const VSOList &VSOs, SymbolStringPtr Name);
+ /// defineImpl succeeded.
+ auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
+ for (auto &KV : UMI->MU->getSymbols())
+ UnmaterializedInfos[KV.first] = UMI;
+
+ return Error::success();
+ });
+}
+
+template <typename MaterializationUnitType>
+Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU) {
+ assert(MU && "Can not define with a null MU");
+
+ return ES.runSessionLocked([&, this]() -> Error {
+ if (auto Err = defineImpl(*MU))
+ return Err;
+
+ /// defineImpl succeeded.
+ auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
+ for (auto &KV : UMI->MU->getSymbols())
+ UnmaterializedInfos[KV.first] = UMI;
+
+ return Error::success();
+ });
+}
/// Mangles symbol names then uniques them in the context of an
/// ExecutionSession.
class MangleAndInterner {
public:
- MangleAndInterner(ExecutionSessionBase &ES, const DataLayout &DL);
+ MangleAndInterner(ExecutionSession &ES, const DataLayout &DL);
SymbolStringPtr operator()(StringRef Name);
private:
- ExecutionSessionBase &ES;
+ ExecutionSession &ES;
const DataLayout &DL;
};
} // End namespace orc
} // End namespace llvm
+#undef DEBUG_TYPE // "orc"
+
#endif // LLVM_EXECUTIONENGINE_ORC_CORE_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index e27f6e1e2cd6..88559f822e5d 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -21,7 +21,6 @@
#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cstdint>
#include <string>
@@ -39,45 +38,6 @@ class Value;
namespace orc {
-/// A utility class for building TargetMachines for JITs.
-class JITTargetMachineBuilder {
-public:
- JITTargetMachineBuilder(Triple TT);
- static Expected<JITTargetMachineBuilder> detectHost();
- Expected<std::unique_ptr<TargetMachine>> createTargetMachine();
-
- JITTargetMachineBuilder &setArch(std::string Arch) {
- this->Arch = std::move(Arch);
- return *this;
- }
- JITTargetMachineBuilder &setCPU(std::string CPU) {
- this->CPU = std::move(CPU);
- return *this;
- }
- JITTargetMachineBuilder &setRelocationModel(Optional<Reloc::Model> RM) {
- this->RM = std::move(RM);
- return *this;
- }
- JITTargetMachineBuilder &setCodeModel(Optional<CodeModel::Model> CM) {
- this->CM = std::move(CM);
- return *this;
- }
- JITTargetMachineBuilder &
- addFeatures(const std::vector<std::string> &FeatureVec);
- SubtargetFeatures &getFeatures() { return Features; }
- TargetOptions &getOptions() { return Options; }
-
-private:
- Triple TT;
- std::string Arch;
- std::string CPU;
- SubtargetFeatures Features;
- TargetOptions Options;
- Optional<Reloc::Model> RM;
- Optional<CodeModel::Model> CM;
- CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
-};
-
/// This iterator provides a convenient way to iterate over the elements
/// of an llvm.global_ctors/llvm.global_dtors instance.
///
@@ -134,11 +94,11 @@ iterator_range<CtorDtorIterator> getDestructors(const Module &M);
/// Convenience class for recording constructor/destructor names for
/// later execution.
template <typename JITLayerT>
-class CtorDtorRunner {
+class LegacyCtorDtorRunner {
public:
/// Construct a CtorDtorRunner for the given range using the given
/// name mangling function.
- CtorDtorRunner(std::vector<std::string> CtorDtorNames, VModuleKey K)
+ LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames, VModuleKey K)
: CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
/// Run the recorded constructors/destructors through the given JIT
@@ -169,9 +129,9 @@ private:
orc::VModuleKey K;
};
-class CtorDtorRunner2 {
+class CtorDtorRunner {
public:
- CtorDtorRunner2(VSO &V) : V(V) {}
+ CtorDtorRunner(JITDylib &JD) : JD(JD) {}
void add(iterator_range<CtorDtorIterator> CtorDtors);
Error run();
@@ -179,7 +139,7 @@ private:
using CtorDtorList = std::vector<SymbolStringPtr>;
using CtorDtorPriorityMap = std::map<unsigned, CtorDtorList>;
- VSO &V;
+ JITDylib &JD;
CtorDtorPriorityMap CtorDtorsByPriority;
};
@@ -217,11 +177,11 @@ protected:
void *DSOHandle);
};
-class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
+class LegacyLocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
public:
/// Create a runtime-overrides class.
template <typename MangleFtorT>
- LocalCXXRuntimeOverrides(const MangleFtorT &Mangle) {
+ LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle) {
addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
}
@@ -242,22 +202,44 @@ private:
StringMap<JITTargetAddress> CXXRuntimeOverrides;
};
-class LocalCXXRuntimeOverrides2 : public LocalCXXRuntimeOverridesBase {
+class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
public:
- Error enable(VSO &V, MangleAndInterner &Mangler);
+ Error enable(JITDylib &JD, MangleAndInterner &Mangler);
};
/// A utility class to expose symbols found via dlsym to the JIT.
///
-/// If an instance of this class is attached to a VSO as a fallback definition
-/// generator, then any symbol found in the given DynamicLibrary that passes
-/// the 'Allow' predicate will be added to the VSO.
-class DynamicLibraryFallbackGenerator {
+/// If an instance of this class is attached to a JITDylib as a fallback
+/// definition generator, then any symbol found in the given DynamicLibrary that
+/// passes the 'Allow' predicate will be added to the JITDylib.
+class DynamicLibrarySearchGenerator {
public:
using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
- DynamicLibraryFallbackGenerator(sys::DynamicLibrary Dylib,
- const DataLayout &DL, SymbolPredicate Allow);
- SymbolNameSet operator()(VSO &V, const SymbolNameSet &Names);
+
+ /// Create a DynamicLibrarySearchGenerator that searches for symbols in the
+ /// given sys::DynamicLibrary.
+ /// If the Allow predicate is given then only symbols matching the predicate
+ /// will be searched for in the DynamicLibrary. If the predicate is not given
+ /// then all symbols will be searched for.
+ DynamicLibrarySearchGenerator(sys::DynamicLibrary Dylib, const DataLayout &DL,
+ SymbolPredicate Allow = SymbolPredicate());
+
+ /// Permanently loads the library at the given path and, on success, returns
+ /// a DynamicLibrarySearchGenerator that will search it for symbol definitions
+ /// in the library. On failure returns the reason the library failed to load.
+ static Expected<DynamicLibrarySearchGenerator>
+ Load(const char *FileName, const DataLayout &DL,
+ SymbolPredicate Allow = SymbolPredicate());
+
+ /// Creates a DynamicLibrarySearchGenerator that searches for symbols in
+ /// the current process.
+ static Expected<DynamicLibrarySearchGenerator>
+ GetForCurrentProcess(const DataLayout &DL,
+ SymbolPredicate Allow = SymbolPredicate()) {
+ return Load(nullptr, DL, std::move(Allow));
+ }
+
+ SymbolNameSet operator()(JITDylib &JD, const SymbolNameSet &Names);
private:
sys::DynamicLibrary Dylib;
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index ad6481548d59..30d71e69cd70 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -28,21 +28,20 @@ class Module;
namespace orc {
-class IRCompileLayer2 : public IRLayer {
+class IRCompileLayer : public IRLayer {
public:
using CompileFunction =
std::function<Expected<std::unique_ptr<MemoryBuffer>>(Module &)>;
using NotifyCompiledFunction =
- std::function<void(VModuleKey K, std::unique_ptr<Module>)>;
+ std::function<void(VModuleKey K, ThreadSafeModule TSM)>;
- IRCompileLayer2(ExecutionSession &ES, ObjectLayer &BaseLayer,
- CompileFunction Compile);
+ IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
+ CompileFunction Compile);
void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled);
- void emit(MaterializationResponsibility R, VModuleKey K,
- std::unique_ptr<Module> M) override;
+ void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
private:
mutable std::mutex IRLayerMutex;
@@ -57,15 +56,15 @@ private:
/// object file and adds this module file to the layer below, which must
/// implement the object layer concept.
template <typename BaseLayerT, typename CompileFtor>
-class IRCompileLayer {
+class LegacyIRCompileLayer {
public:
/// Callback type for notifications when modules are compiled.
using NotifyCompiledCallback =
std::function<void(VModuleKey K, std::unique_ptr<Module>)>;
- /// Construct an IRCompileLayer with the given BaseLayer, which must
+ /// Construct an LegacyIRCompileLayer with the given BaseLayer, which must
/// implement the ObjectLayer concept.
- IRCompileLayer(
+ LegacyIRCompileLayer(
BaseLayerT &BaseLayer, CompileFtor Compile,
NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback())
: BaseLayer(BaseLayer), Compile(std::move(Compile)),
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 266a0f45b3e4..49e65b9f2a80 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -23,24 +23,24 @@ namespace llvm {
class Module;
namespace orc {
-class IRTransformLayer2 : public IRLayer {
+class IRTransformLayer : public IRLayer {
public:
+ using TransformFunction = std::function<Expected<ThreadSafeModule>(
+ ThreadSafeModule, const MaterializationResponsibility &R)>;
- using TransformFunction =
- std::function<Expected<std::unique_ptr<Module>>(std::unique_ptr<Module>)>;
-
- IRTransformLayer2(ExecutionSession &ES, IRLayer &BaseLayer,
- TransformFunction Transform = identityTransform);
+ IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer,
+ TransformFunction Transform = identityTransform);
void setTransform(TransformFunction Transform) {
this->Transform = std::move(Transform);
}
- void emit(MaterializationResponsibility R, VModuleKey K,
- std::unique_ptr<Module> M) override;
+ void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
- static std::unique_ptr<Module> identityTransform(std::unique_ptr<Module> M) {
- return M;
+ static ThreadSafeModule
+ identityTransform(ThreadSafeModule TSM,
+ const MaterializationResponsibility &R) {
+ return TSM;
}
private:
@@ -53,11 +53,11 @@ private:
/// This layer applies a user supplied transform to each module that is added,
/// then adds the transformed module to the layer below.
template <typename BaseLayerT, typename TransformFtor>
-class IRTransformLayer {
+class LegacyIRTransformLayer {
public:
- /// Construct an IRTransformLayer with the given BaseLayer
- IRTransformLayer(BaseLayerT &BaseLayer,
+ /// Construct an LegacyIRTransformLayer with the given BaseLayer
+ LegacyIRTransformLayer(BaseLayerT &BaseLayer,
TransformFtor Transform = TransformFtor())
: BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 8b0b3fdb7df4..c2527802f6a7 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -47,92 +47,101 @@ class Value;
namespace orc {
-/// Target-independent base class for compile callback management.
-class JITCompileCallbackManager {
+/// Base class for pools of compiler re-entry trampolines.
+/// These trampolines are callable addresses that save all register state
+/// before calling a supplied function to return the trampoline landing
+/// address, then restore all state before jumping to that address. They
+/// are used by various ORC APIs to support lazy compilation
+class TrampolinePool {
public:
- using CompileFunction = std::function<JITTargetAddress()>;
+ virtual ~TrampolinePool() {}
- /// Construct a JITCompileCallbackManager.
- /// @param ErrorHandlerAddress The address of an error handler in the target
- /// process to be used if a compile callback fails.
- JITCompileCallbackManager(ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress)
- : ES(ES), CallbacksVSO(ES.createVSO("<Callbacks>")),
- ErrorHandlerAddress(ErrorHandlerAddress) {}
-
- virtual ~JITCompileCallbackManager() = default;
-
- /// Reserve a compile callback.
- Expected<JITTargetAddress> getCompileCallback(CompileFunction Compile);
+ /// Get an available trampoline address.
+ /// Returns an error if no trampoline can be created.
+ virtual Expected<JITTargetAddress> getTrampoline() = 0;
- /// Execute the callback for the given trampoline id. Called by the JIT
- /// to compile functions on demand.
- JITTargetAddress executeCompileCallback(JITTargetAddress TrampolineAddr);
+private:
+ virtual void anchor();
+};
-protected:
- std::vector<JITTargetAddress> AvailableTrampolines;
+/// A trampoline pool for trampolines within the current process.
+template <typename ORCABI> class LocalTrampolinePool : public TrampolinePool {
+public:
+ using GetTrampolineLandingFunction =
+ std::function<JITTargetAddress(JITTargetAddress TrampolineAddr)>;
+
+ /// Creates a LocalTrampolinePool with the given RunCallback function.
+ /// Returns an error if this function is unable to correctly allocate, write
+ /// and protect the resolver code block.
+ static Expected<std::unique_ptr<LocalTrampolinePool>>
+ Create(GetTrampolineLandingFunction GetTrampolineLanding) {
+ Error Err = Error::success();
+
+ auto LTP = std::unique_ptr<LocalTrampolinePool>(
+ new LocalTrampolinePool(std::move(GetTrampolineLanding), Err));
+
+ if (Err)
+ return std::move(Err);
+ return std::move(LTP);
+ }
-private:
- Expected<JITTargetAddress> getAvailableTrampolineAddr() {
- if (this->AvailableTrampolines.empty())
+ /// Get a free trampoline. Returns an error if one can not be provide (e.g.
+ /// because the pool is empty and can not be grown).
+ Expected<JITTargetAddress> getTrampoline() override {
+ std::lock_guard<std::mutex> Lock(LTPMutex);
+ if (AvailableTrampolines.empty()) {
if (auto Err = grow())
return std::move(Err);
- assert(!this->AvailableTrampolines.empty() &&
- "Failed to grow available trampolines.");
- JITTargetAddress TrampolineAddr = this->AvailableTrampolines.back();
- this->AvailableTrampolines.pop_back();
+ }
+ assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool");
+ auto TrampolineAddr = AvailableTrampolines.back();
+ AvailableTrampolines.pop_back();
return TrampolineAddr;
}
- // Create new trampolines - to be implemented in subclasses.
- virtual Error grow() = 0;
+ /// Returns the given trampoline to the pool for re-use.
+ void releaseTrampoline(JITTargetAddress TrampolineAddr) {
+ std::lock_guard<std::mutex> Lock(LTPMutex);
+ AvailableTrampolines.push_back(TrampolineAddr);
+ }
- virtual void anchor();
+private:
+ static JITTargetAddress reenter(void *TrampolinePoolPtr, void *TrampolineId) {
+ LocalTrampolinePool<ORCABI> *TrampolinePool =
+ static_cast<LocalTrampolinePool *>(TrampolinePoolPtr);
+ return TrampolinePool->GetTrampolineLanding(static_cast<JITTargetAddress>(
+ reinterpret_cast<uintptr_t>(TrampolineId)));
+ }
- std::mutex CCMgrMutex;
- ExecutionSession &ES;
- VSO &CallbacksVSO;
- JITTargetAddress ErrorHandlerAddress;
- std::map<JITTargetAddress, SymbolStringPtr> AddrToSymbol;
- size_t NextCallbackId = 0;
-};
+ LocalTrampolinePool(GetTrampolineLandingFunction GetTrampolineLanding,
+ Error &Err)
+ : GetTrampolineLanding(std::move(GetTrampolineLanding)) {
-/// Manage compile callbacks for in-process JITs.
-template <typename TargetT>
-class LocalJITCompileCallbackManager : public JITCompileCallbackManager {
-public:
- /// Construct a InProcessJITCompileCallbackManager.
- /// @param ErrorHandlerAddress The address of an error handler in the target
- /// process to be used if a compile callback fails.
- LocalJITCompileCallbackManager(ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress)
- : JITCompileCallbackManager(ES, ErrorHandlerAddress) {
- /// Set up the resolver block.
+ ErrorAsOutParameter _(&Err);
+
+ /// Try to set up the resolver block.
std::error_code EC;
ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
- TargetT::ResolverCodeSize, nullptr,
+ ORCABI::ResolverCodeSize, nullptr,
sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
- assert(!EC && "Failed to allocate resolver block");
+ if (EC) {
+ Err = errorCodeToError(EC);
+ return;
+ }
- TargetT::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
- &reenter, this);
+ ORCABI::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
+ &reenter, this);
EC = sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(),
sys::Memory::MF_READ |
sys::Memory::MF_EXEC);
- assert(!EC && "Failed to mprotect resolver block");
+ if (EC) {
+ Err = errorCodeToError(EC);
+ return;
+ }
}
-private:
- static JITTargetAddress reenter(void *CCMgr, void *TrampolineId) {
- JITCompileCallbackManager *Mgr =
- static_cast<JITCompileCallbackManager *>(CCMgr);
- return Mgr->executeCompileCallback(
- static_cast<JITTargetAddress>(
- reinterpret_cast<uintptr_t>(TrampolineId)));
- }
-
- Error grow() override {
+ Error grow() {
assert(this->AvailableTrampolines.empty() && "Growing prematurely?");
std::error_code EC;
@@ -144,17 +153,17 @@ private:
return errorCodeToError(EC);
unsigned NumTrampolines =
- (sys::Process::getPageSize() - TargetT::PointerSize) /
- TargetT::TrampolineSize;
+ (sys::Process::getPageSize() - ORCABI::PointerSize) /
+ ORCABI::TrampolineSize;
uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
- TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
- NumTrampolines);
+ ORCABI::writeTrampolines(TrampolineMem, ResolverBlock.base(),
+ NumTrampolines);
for (unsigned I = 0; I < NumTrampolines; ++I)
this->AvailableTrampolines.push_back(
static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(
- TrampolineMem + (I * TargetT::TrampolineSize))));
+ TrampolineMem + (I * ORCABI::TrampolineSize))));
if (auto EC = sys::Memory::protectMappedMemory(
TrampolineBlock.getMemoryBlock(),
@@ -165,8 +174,87 @@ private:
return Error::success();
}
+ GetTrampolineLandingFunction GetTrampolineLanding;
+
+ std::mutex LTPMutex;
sys::OwningMemoryBlock ResolverBlock;
std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
+ std::vector<JITTargetAddress> AvailableTrampolines;
+};
+
+/// Target-independent base class for compile callback management.
+class JITCompileCallbackManager {
+public:
+ using CompileFunction = std::function<JITTargetAddress()>;
+
+ virtual ~JITCompileCallbackManager() = default;
+
+ /// Reserve a compile callback.
+ Expected<JITTargetAddress> getCompileCallback(CompileFunction Compile);
+
+ /// Execute the callback for the given trampoline id. Called by the JIT
+ /// to compile functions on demand.
+ JITTargetAddress executeCompileCallback(JITTargetAddress TrampolineAddr);
+
+protected:
+ /// Construct a JITCompileCallbackManager.
+ JITCompileCallbackManager(std::unique_ptr<TrampolinePool> TP,
+ ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddress)
+ : TP(std::move(TP)), ES(ES),
+ CallbacksJD(ES.createJITDylib("<Callbacks>")),
+ ErrorHandlerAddress(ErrorHandlerAddress) {}
+
+ void setTrampolinePool(std::unique_ptr<TrampolinePool> TP) {
+ this->TP = std::move(TP);
+ }
+
+private:
+ std::mutex CCMgrMutex;
+ std::unique_ptr<TrampolinePool> TP;
+ ExecutionSession &ES;
+ JITDylib &CallbacksJD;
+ JITTargetAddress ErrorHandlerAddress;
+ std::map<JITTargetAddress, SymbolStringPtr> AddrToSymbol;
+ size_t NextCallbackId = 0;
+};
+
+/// Manage compile callbacks for in-process JITs.
+template <typename ORCABI>
+class LocalJITCompileCallbackManager : public JITCompileCallbackManager {
+public:
+ /// Create a new LocalJITCompileCallbackManager.
+ static Expected<std::unique_ptr<LocalJITCompileCallbackManager>>
+ Create(ExecutionSession &ES, JITTargetAddress ErrorHandlerAddress) {
+ Error Err = Error::success();
+ auto CCMgr = std::unique_ptr<LocalJITCompileCallbackManager>(
+ new LocalJITCompileCallbackManager(ES, ErrorHandlerAddress, Err));
+ if (Err)
+ return std::move(Err);
+ return std::move(CCMgr);
+ }
+
+private:
+ /// Construct a InProcessJITCompileCallbackManager.
+ /// @param ErrorHandlerAddress The address of an error handler in the target
+ /// process to be used if a compile callback fails.
+ LocalJITCompileCallbackManager(ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddress,
+ Error &Err)
+ : JITCompileCallbackManager(nullptr, ES, ErrorHandlerAddress) {
+ ErrorAsOutParameter _(&Err);
+ auto TP = LocalTrampolinePool<ORCABI>::Create(
+ [this](JITTargetAddress TrampolineAddr) {
+ return executeCompileCallback(TrampolineAddr);
+ });
+
+ if (!TP) {
+ Err = TP.takeError();
+ return;
+ }
+
+ setTrampolinePool(std::move(*TP));
+ }
};
/// Base class for managing collections of named indirect stubs.
@@ -207,6 +295,7 @@ class LocalIndirectStubsManager : public IndirectStubsManager {
public:
Error createStub(StringRef StubName, JITTargetAddress StubAddr,
JITSymbolFlags StubFlags) override {
+ std::lock_guard<std::mutex> Lock(StubsMutex);
if (auto Err = reserveStubs(1))
return Err;
@@ -216,6 +305,7 @@ public:
}
Error createStubs(const StubInitsMap &StubInits) override {
+ std::lock_guard<std::mutex> Lock(StubsMutex);
if (auto Err = reserveStubs(StubInits.size()))
return Err;
@@ -227,6 +317,7 @@ public:
}
JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
+ std::lock_guard<std::mutex> Lock(StubsMutex);
auto I = StubIndexes.find(Name);
if (I == StubIndexes.end())
return nullptr;
@@ -242,6 +333,7 @@ public:
}
JITEvaluatedSymbol findPointer(StringRef Name) override {
+ std::lock_guard<std::mutex> Lock(StubsMutex);
auto I = StubIndexes.find(Name);
if (I == StubIndexes.end())
return nullptr;
@@ -254,11 +346,15 @@ public:
}
Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override {
+ using AtomicIntPtr = std::atomic<uintptr_t>;
+
+ std::lock_guard<std::mutex> Lock(StubsMutex);
auto I = StubIndexes.find(Name);
assert(I != StubIndexes.end() && "No stub pointer for symbol");
auto Key = I->second.first;
- *IndirectStubsInfos[Key.first].getPtr(Key.second) =
- reinterpret_cast<void *>(static_cast<uintptr_t>(NewAddr));
+ AtomicIntPtr *AtomicStubPtr = reinterpret_cast<AtomicIntPtr *>(
+ IndirectStubsInfos[Key.first].getPtr(Key.second));
+ *AtomicStubPtr = static_cast<uintptr_t>(NewAddr);
return Error::success();
}
@@ -288,6 +384,7 @@ private:
StubIndexes[StubName] = std::make_pair(Key, StubFlags);
}
+ std::mutex StubsMutex;
std::vector<typename TargetT::IndirectStubsInfo> IndirectStubsInfos;
using StubKey = std::pair<uint16_t, uint16_t>;
std::vector<StubKey> FreeStubs;
@@ -299,7 +396,7 @@ private:
/// The given target triple will determine the ABI, and the given
/// ErrorHandlerAddress will be used by the resulting compile callback
/// manager if a compile callback fails.
-std::unique_ptr<JITCompileCallbackManager>
+Expected<std::unique_ptr<JITCompileCallbackManager>>
createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
JITTargetAddress ErrorHandlerAddress);
@@ -325,12 +422,18 @@ GlobalVariable *createImplPointer(PointerType &PT, Module &M, const Twine &Name,
/// indirect call using the given function pointer.
void makeStub(Function &F, Value &ImplPointer);
-/// Raise linkage types and rename as necessary to ensure that all
-/// symbols are accessible for other modules.
-///
-/// This should be called before partitioning a module to ensure that the
-/// partitions retain access to each other's symbols.
-void makeAllSymbolsExternallyAccessible(Module &M);
+/// Promotes private symbols to global hidden, and renames to prevent clashes
+/// with other promoted symbols. The same SymbolPromoter instance should be
+/// used for all symbols to be added to a single JITDylib.
+class SymbolLinkagePromoter {
+public:
+ /// Promote symbols in the given module. Returns the set of global values
+ /// that have been renamed/promoted.
+ std::vector<GlobalValue *> operator()(Module &M);
+
+private:
+ unsigned NextId = 0;
+};
/// Clone a function declaration into a new module.
///
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
new file mode 100644
index 000000000000..eb9b6bf2dea6
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
@@ -0,0 +1,130 @@
+//===- JITTargetMachineBuilder.h - Build TargetMachines for JIT -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A utitily for building TargetMachines for JITs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_JITTARGETMACHINEBUILDER_H
+#define LLVM_EXECUTIONENGINE_ORC_JITTARGETMACHINEBUILDER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+/// A utility class for building TargetMachines for JITs.
+class JITTargetMachineBuilder {
+public:
+ /// Create a JITTargetMachineBuilder based on the given triple.
+ ///
+ /// Note: TargetOptions is default-constructed, then EmulatedTLS and
+ /// ExplicitEmulatedTLS are set to true. If EmulatedTLS is not
+ /// required, these values should be reset before calling
+ /// createTargetMachine.
+ JITTargetMachineBuilder(Triple TT);
+
+ /// Create a JITTargetMachineBuilder for the host system.
+ ///
+ /// Note: TargetOptions is default-constructed, then EmulatedTLS and
+ /// ExplicitEmulatedTLS are set to true. If EmulatedTLS is not
+ /// required, these values should be reset before calling
+ /// createTargetMachine.
+ static Expected<JITTargetMachineBuilder> detectHost();
+
+ /// Create a TargetMachine.
+ ///
+ /// This operation will fail if the requested target is not registered,
+ /// in which case see llvm/Support/TargetSelect.h. To JIT IR the Target and
+ /// the target's AsmPrinter must both be registered. To JIT assembly
+ /// (including inline and module level assembly) the target's AsmParser must
+ /// also be registered.
+ Expected<std::unique_ptr<TargetMachine>> createTargetMachine();
+
+ /// Get the default DataLayout for the target.
+ ///
+ /// Note: This is reasonably expensive, as it creates a temporary
+ /// TargetMachine instance under the hood. It is only suitable for use during
+ /// JIT setup.
+ Expected<DataLayout> getDefaultDataLayoutForTarget() {
+ auto TM = createTargetMachine();
+ if (!TM)
+ return TM.takeError();
+ return (*TM)->createDataLayout();
+ }
+
+ /// Set the CPU string.
+ JITTargetMachineBuilder &setCPU(std::string CPU) {
+ this->CPU = std::move(CPU);
+ return *this;
+ }
+
+ /// Set the relocation model.
+ JITTargetMachineBuilder &setRelocationModel(Optional<Reloc::Model> RM) {
+ this->RM = std::move(RM);
+ return *this;
+ }
+
+ /// Set the code model.
+ JITTargetMachineBuilder &setCodeModel(Optional<CodeModel::Model> CM) {
+ this->CM = std::move(CM);
+ return *this;
+ }
+
+ /// Set the LLVM CodeGen optimization level.
+ JITTargetMachineBuilder &setCodeGenOptLevel(CodeGenOpt::Level OptLevel) {
+ this->OptLevel = OptLevel;
+ return *this;
+ }
+
+ /// Add subtarget features.
+ JITTargetMachineBuilder &
+ addFeatures(const std::vector<std::string> &FeatureVec);
+
+ /// Access subtarget features.
+ SubtargetFeatures &getFeatures() { return Features; }
+
+ /// Access subtarget features.
+ const SubtargetFeatures &getFeatures() const { return Features; }
+
+ /// Access TargetOptions.
+ TargetOptions &getOptions() { return Options; }
+
+ /// Access TargetOptions.
+ const TargetOptions &getOptions() const { return Options; }
+
+ /// Access Triple.
+ Triple &getTargetTriple() { return TT; }
+
+ /// Access Triple.
+ const Triple &getTargetTriple() const { return TT; }
+
+private:
+ Triple TT;
+ std::string CPU;
+ SubtargetFeatures Features;
+ TargetOptions Options;
+ Optional<Reloc::Model> RM;
+ Optional<CodeModel::Model> CM;
+ CodeGenOpt::Level OptLevel = CodeGenOpt::None;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_JITTARGETMACHINEBUILDER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index df655bd82006..ce3e5d519c73 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -19,9 +19,11 @@
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
+#include "llvm/Support/ThreadPool.h"
namespace llvm {
namespace orc {
@@ -29,44 +31,68 @@ namespace orc {
/// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT.
class LLJIT {
public:
+
+ /// Destruct this instance. If a multi-threaded instance, waits for all
+ /// compile threads to complete.
+ ~LLJIT();
+
/// Create an LLJIT instance.
+ /// If NumCompileThreads is not equal to zero, creates a multi-threaded
+ /// LLJIT with the given number of compile threads.
static Expected<std::unique_ptr<LLJIT>>
- Create(std::unique_ptr<ExecutionSession> ES,
- std::unique_ptr<TargetMachine> TM, DataLayout DL);
+ Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+ unsigned NumCompileThreads = 0);
- /// Returns a reference to the ExecutionSession for this JIT instance.
+ /// Returns the ExecutionSession for this instance.
ExecutionSession &getExecutionSession() { return *ES; }
- /// Returns a reference to the VSO representing the JIT'd main program.
- VSO &getMainVSO() { return Main; }
+ /// Returns a reference to the JITDylib representing the JIT'd main program.
+ JITDylib &getMainJITDylib() { return Main; }
+
+ /// Create a new JITDylib with the given name and return a reference to it.
+ JITDylib &createJITDylib(std::string Name) {
+ return ES->createJITDylib(std::move(Name));
+ }
/// Convenience method for defining an absolute symbol.
Error defineAbsolute(StringRef Name, JITEvaluatedSymbol Address);
- /// Adds an IR module to the given VSO.
- Error addIRModule(VSO &V, std::unique_ptr<Module> M);
+ /// Convenience method for defining an
- /// Adds an IR module to the Main VSO.
- Error addIRModule(std::unique_ptr<Module> M) {
- return addIRModule(Main, std::move(M));
+ /// Adds an IR module to the given JITDylib.
+ Error addIRModule(JITDylib &JD, ThreadSafeModule TSM);
+
+ /// Adds an IR module to the Main JITDylib.
+ Error addIRModule(ThreadSafeModule TSM) {
+ return addIRModule(Main, std::move(TSM));
}
- /// Look up a symbol in VSO V by the symbol's linker-mangled name (to look up
- /// symbols based on their IR name use the lookup function instead).
- Expected<JITEvaluatedSymbol> lookupLinkerMangled(VSO &V, StringRef Name);
+ /// Adds an object file to the given JITDylib.
+ Error addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj);
+
+ /// Adds an object file to the given JITDylib.
+ Error addObjectFile(std::unique_ptr<MemoryBuffer> Obj) {
+ return addObjectFile(Main, std::move(Obj));
+ }
- /// Look up a symbol in the main VSO by the symbol's linker-mangled name (to
+ /// Look up a symbol in JITDylib JD by the symbol's linker-mangled name (to
/// look up symbols based on their IR name use the lookup function instead).
+ Expected<JITEvaluatedSymbol> lookupLinkerMangled(JITDylib &JD,
+ StringRef Name);
+
+ /// Look up a symbol in the main JITDylib by the symbol's linker-mangled name
+ /// (to look up symbols based on their IR name use the lookup function
+ /// instead).
Expected<JITEvaluatedSymbol> lookupLinkerMangled(StringRef Name) {
return lookupLinkerMangled(Main, Name);
}
- /// Look up a symbol in VSO V based on its IR symbol name.
- Expected<JITEvaluatedSymbol> lookup(VSO &V, StringRef UnmangledName) {
- return lookupLinkerMangled(V, mangle(UnmangledName));
+ /// Look up a symbol in JITDylib JD based on its IR symbol name.
+ Expected<JITEvaluatedSymbol> lookup(JITDylib &JD, StringRef UnmangledName) {
+ return lookupLinkerMangled(JD, mangle(UnmangledName));
}
- /// Look up a symbol in the main VSO based on its IR symbol name.
+ /// Look up a symbol in the main JITDylib based on its IR symbol name.
Expected<JITEvaluatedSymbol> lookup(StringRef UnmangledName) {
return lookup(Main, UnmangledName);
}
@@ -77,11 +103,18 @@ public:
/// Runs all not-yet-run static destructors.
Error runDestructors() { return DtorRunner.run(); }
+ /// Returns a reference to the ObjLinkingLayer
+ RTDyldObjectLinkingLayer &getObjLinkingLayer() { return ObjLinkingLayer; }
+
protected:
+
+ /// Create an LLJIT instance with a single compile thread.
LLJIT(std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
DataLayout DL);
- std::shared_ptr<RuntimeDyld::MemoryManager> getMemoryManager(VModuleKey K);
+ /// Create an LLJIT instance with multiple compile threads.
+ LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+ DataLayout DL, unsigned NumCompileThreads);
std::string mangle(StringRef UnmangledName);
@@ -90,51 +123,68 @@ protected:
void recordCtorDtors(Module &M);
std::unique_ptr<ExecutionSession> ES;
- VSO &Main;
+ JITDylib &Main;
- std::unique_ptr<TargetMachine> TM;
DataLayout DL;
+ std::unique_ptr<ThreadPool> CompileThreads;
- RTDyldObjectLinkingLayer2 ObjLinkingLayer;
- IRCompileLayer2 CompileLayer;
+ RTDyldObjectLinkingLayer ObjLinkingLayer;
+ IRCompileLayer CompileLayer;
- CtorDtorRunner2 CtorRunner, DtorRunner;
+ CtorDtorRunner CtorRunner, DtorRunner;
};
/// An extended version of LLJIT that supports lazy function-at-a-time
/// compilation of LLVM IR.
class LLLazyJIT : public LLJIT {
public:
+
/// Create an LLLazyJIT instance.
+ /// If NumCompileThreads is not equal to zero, creates a multi-threaded
+ /// LLLazyJIT with the given number of compile threads.
static Expected<std::unique_ptr<LLLazyJIT>>
- Create(std::unique_ptr<ExecutionSession> ES,
- std::unique_ptr<TargetMachine> TM, DataLayout DL, LLVMContext &Ctx);
+ Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+ JITTargetAddress ErrorAddr, unsigned NumCompileThreads = 0);
/// Set an IR transform (e.g. pass manager pipeline) to run on each function
/// when it is compiled.
- void setLazyCompileTransform(IRTransformLayer2::TransformFunction Transform) {
+ void setLazyCompileTransform(IRTransformLayer::TransformFunction Transform) {
TransformLayer.setTransform(std::move(Transform));
}
- /// Add a module to be lazily compiled to VSO V.
- Error addLazyIRModule(VSO &V, std::unique_ptr<Module> M);
+ /// Sets the partition function.
+ void
+ setPartitionFunction(CompileOnDemandLayer::PartitionFunction Partition) {
+ CODLayer.setPartitionFunction(std::move(Partition));
+ }
+
+ /// Add a module to be lazily compiled to JITDylib JD.
+ Error addLazyIRModule(JITDylib &JD, ThreadSafeModule M);
- /// Add a module to be lazily compiled to the main VSO.
- Error addLazyIRModule(std::unique_ptr<Module> M) {
+ /// Add a module to be lazily compiled to the main JITDylib.
+ Error addLazyIRModule(ThreadSafeModule M) {
return addLazyIRModule(Main, std::move(M));
}
private:
+
+ // Create a single-threaded LLLazyJIT instance.
LLLazyJIT(std::unique_ptr<ExecutionSession> ES,
- std::unique_ptr<TargetMachine> TM, DataLayout DL, LLVMContext &Ctx,
- std::unique_ptr<JITCompileCallbackManager> CCMgr,
+ std::unique_ptr<TargetMachine> TM, DataLayout DL,
+ std::unique_ptr<LazyCallThroughManager> LCTMgr,
+ std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
+
+ // Create a multi-threaded LLLazyJIT instance.
+ LLLazyJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+ DataLayout DL, unsigned NumCompileThreads,
+ std::unique_ptr<LazyCallThroughManager> LCTMgr,
std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
- std::unique_ptr<JITCompileCallbackManager> CCMgr;
+ std::unique_ptr<LazyCallThroughManager> LCTMgr;
std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder;
- IRTransformLayer2 TransformLayer;
- CompileOnDemandLayer2 CODLayer;
+ IRTransformLayer TransformLayer;
+ CompileOnDemandLayer CODLayer;
};
} // End namespace orc
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
index 91bd4fb83e6f..cd797445a2e6 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
@@ -15,7 +15,9 @@
#define LLVM_EXECUTIONENGINE_ORC_LAYER_H
#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
namespace llvm {
namespace orc {
@@ -29,14 +31,32 @@ public:
/// Returns the ExecutionSession for this layer.
ExecutionSession &getExecutionSession() { return ES; }
- /// Adds a MaterializationUnit representing the given IR to the given VSO.
- virtual Error add(VSO &V, VModuleKey K, std::unique_ptr<Module> M);
+ /// Sets the CloneToNewContextOnEmit flag (false by default).
+ ///
+ /// When set, IR modules added to this layer will be cloned on to a new
+ /// context before emit is called. This can be used by clients who want
+ /// to load all IR using one LLVMContext (to save memory via type and
+ /// constant uniquing), but want to move Modules to fresh contexts before
+ /// compiling them to enable concurrent compilation.
+ /// Single threaded clients, or clients who load every module on a new
+ /// context, need not set this.
+ void setCloneToNewContextOnEmit(bool CloneToNewContextOnEmit) {
+ this->CloneToNewContextOnEmit = CloneToNewContextOnEmit;
+ }
+
+ /// Returns the current value of the CloneToNewContextOnEmit flag.
+ bool getCloneToNewContextOnEmit() const { return CloneToNewContextOnEmit; }
+
+ /// Adds a MaterializationUnit representing the given IR to the given
+ /// JITDylib.
+ virtual Error add(JITDylib &JD, ThreadSafeModule TSM,
+ VModuleKey K = VModuleKey());
/// Emit should materialize the given IR.
- virtual void emit(MaterializationResponsibility R, VModuleKey K,
- std::unique_ptr<Module> M) = 0;
+ virtual void emit(MaterializationResponsibility R, ThreadSafeModule TSM) = 0;
private:
+ bool CloneToNewContextOnEmit = false;
ExecutionSession &ES;
};
@@ -50,22 +70,29 @@ public:
/// Create an IRMaterializationLayer. Scans the module to build the
/// SymbolFlags and SymbolToDefinition maps.
- IRMaterializationUnit(ExecutionSession &ES, std::unique_ptr<Module> M);
+ IRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM,
+ VModuleKey K);
/// Create an IRMaterializationLayer from a module, and pre-existing
/// SymbolFlags and SymbolToDefinition maps. The maps must provide
/// entries for each definition in M.
/// This constructor is useful for delegating work from one
/// IRMaterializationUnit to another.
- IRMaterializationUnit(std::unique_ptr<Module> M, SymbolFlagsMap SymbolFlags,
+ IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K,
+ SymbolFlagsMap SymbolFlags,
SymbolNameToDefinitionMap SymbolToDefinition);
+ /// Return the ModuleIdentifier as the name for this MaterializationUnit.
+ StringRef getName() const override;
+
+ const ThreadSafeModule &getModule() const { return TSM; }
+
protected:
- std::unique_ptr<Module> M;
+ ThreadSafeModule TSM;
SymbolNameToDefinitionMap SymbolToDefinition;
private:
- void discard(const VSO &V, SymbolStringPtr Name) override;
+ void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
};
/// MaterializationUnit that materializes modules by calling the 'emit' method
@@ -73,7 +100,8 @@ private:
class BasicIRLayerMaterializationUnit : public IRMaterializationUnit {
public:
BasicIRLayerMaterializationUnit(IRLayer &L, VModuleKey K,
- std::unique_ptr<Module> M);
+ ThreadSafeModule TSM);
+
private:
void materialize(MaterializationResponsibility R) override;
@@ -91,11 +119,13 @@ public:
/// Returns the execution session for this layer.
ExecutionSession &getExecutionSession() { return ES; }
- /// Adds a MaterializationUnit representing the given IR to the given VSO.
- virtual Error add(VSO &V, VModuleKey K, std::unique_ptr<MemoryBuffer> O);
+ /// Adds a MaterializationUnit representing the given IR to the given
+ /// JITDylib.
+ virtual Error add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O,
+ VModuleKey K = VModuleKey());
/// Emit should materialize the given IR.
- virtual void emit(MaterializationResponsibility R, VModuleKey K,
+ virtual void emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) = 0;
private:
@@ -106,23 +136,31 @@ private:
/// instance) by calling 'emit' on the given ObjectLayer.
class BasicObjectLayerMaterializationUnit : public MaterializationUnit {
public:
+ static Expected<std::unique_ptr<BasicObjectLayerMaterializationUnit>>
+ Create(ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O);
-
- /// The MemoryBuffer should represent a valid object file.
- /// If there is any chance that the file is invalid it should be validated
- /// prior to constructing a BasicObjectLayerMaterializationUnit.
BasicObjectLayerMaterializationUnit(ObjectLayer &L, VModuleKey K,
- std::unique_ptr<MemoryBuffer> O);
+ std::unique_ptr<MemoryBuffer> O,
+ SymbolFlagsMap SymbolFlags);
+
+ /// Return the buffer's identifier as the name for this MaterializationUnit.
+ StringRef getName() const override;
private:
+
void materialize(MaterializationResponsibility R) override;
- void discard(const VSO &V, SymbolStringPtr Name) override;
+ void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
ObjectLayer &L;
- VModuleKey K;
std::unique_ptr<MemoryBuffer> O;
};
+/// Returns a SymbolFlagsMap for the object file represented by the given
+/// buffer, or an error if the buffer does not contain a valid object file.
+// FIXME: Maybe move to Core.h?
+Expected<SymbolFlagsMap> getObjectSymbolFlags(ExecutionSession &ES,
+ MemoryBufferRef ObjBuffer);
+
} // End namespace orc
} // End namespace llvm
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
new file mode 100644
index 000000000000..b5041325bce2
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -0,0 +1,195 @@
+//===------ LazyReexports.h -- Utilities for lazy reexports -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Lazy re-exports are similar to normal re-exports, except that for callable
+// symbols the definitions are replaced with trampolines that will look up and
+// call through to the re-exported symbol at runtime. This can be used to
+// enable lazy compilation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_LAZYREEXPORTS_H
+#define LLVM_EXECUTIONENGINE_ORC_LAZYREEXPORTS_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
+
+namespace llvm {
+
+class Triple;
+
+namespace orc {
+
+/// Manages a set of 'lazy call-through' trampolines. These are compiler
+/// re-entry trampolines that are pre-bound to look up a given symbol in a given
+/// JITDylib, then jump to that address. Since compilation of symbols is
+/// triggered on first lookup, these call-through trampolines can be used to
+/// implement lazy compilation.
+///
+/// The easiest way to construct these call-throughs is using the lazyReexport
+/// function.
+class LazyCallThroughManager {
+public:
+ /// Clients will want to take some action on first resolution, e.g. updating
+ /// a stub pointer. Instances of this class can be used to implement this.
+ class NotifyResolvedFunction {
+ public:
+ virtual ~NotifyResolvedFunction() {}
+
+ /// Called the first time a lazy call through is executed and the target
+ /// symbol resolved.
+ virtual Error operator()(JITDylib &SourceJD,
+ const SymbolStringPtr &SymbolName,
+ JITTargetAddress ResolvedAddr) = 0;
+
+ private:
+ virtual void anchor();
+ };
+
+ template <typename NotifyResolvedImpl>
+ class NotifyResolvedFunctionImpl : public NotifyResolvedFunction {
+ public:
+ NotifyResolvedFunctionImpl(NotifyResolvedImpl NotifyResolved)
+ : NotifyResolved(std::move(NotifyResolved)) {}
+ Error operator()(JITDylib &SourceJD, const SymbolStringPtr &SymbolName,
+ JITTargetAddress ResolvedAddr) {
+ return NotifyResolved(SourceJD, SymbolName, ResolvedAddr);
+ }
+
+ private:
+ NotifyResolvedImpl NotifyResolved;
+ };
+
+ /// Create a shared NotifyResolvedFunction from a given type that is
+ /// callable with the correct signature.
+ template <typename NotifyResolvedImpl>
+ static std::unique_ptr<NotifyResolvedFunction>
+ createNotifyResolvedFunction(NotifyResolvedImpl NotifyResolved) {
+ return llvm::make_unique<NotifyResolvedFunctionImpl<NotifyResolvedImpl>>(
+ std::move(NotifyResolved));
+ }
+
+ // Return a free call-through trampoline and bind it to look up and call
+ // through to the given symbol.
+ Expected<JITTargetAddress> getCallThroughTrampoline(
+ JITDylib &SourceJD, SymbolStringPtr SymbolName,
+ std::shared_ptr<NotifyResolvedFunction> NotifyResolved);
+
+protected:
+ LazyCallThroughManager(ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddr,
+ std::unique_ptr<TrampolinePool> TP);
+
+ JITTargetAddress callThroughToSymbol(JITTargetAddress TrampolineAddr);
+
+ void setTrampolinePool(std::unique_ptr<TrampolinePool> TP) {
+ this->TP = std::move(TP);
+ }
+
+private:
+ using ReexportsMap =
+ std::map<JITTargetAddress, std::pair<JITDylib *, SymbolStringPtr>>;
+
+ using NotifiersMap =
+ std::map<JITTargetAddress, std::shared_ptr<NotifyResolvedFunction>>;
+
+ std::mutex LCTMMutex;
+ ExecutionSession &ES;
+ JITTargetAddress ErrorHandlerAddr;
+ std::unique_ptr<TrampolinePool> TP;
+ ReexportsMap Reexports;
+ NotifiersMap Notifiers;
+};
+
+/// A lazy call-through manager that builds trampolines in the current process.
+class LocalLazyCallThroughManager : public LazyCallThroughManager {
+private:
+ LocalLazyCallThroughManager(ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddr)
+ : LazyCallThroughManager(ES, ErrorHandlerAddr, nullptr) {}
+
+ template <typename ORCABI> Error init() {
+ auto TP = LocalTrampolinePool<ORCABI>::Create(
+ [this](JITTargetAddress TrampolineAddr) {
+ return callThroughToSymbol(TrampolineAddr);
+ });
+
+ if (!TP)
+ return TP.takeError();
+
+ setTrampolinePool(std::move(*TP));
+ return Error::success();
+ }
+
+public:
+ /// Create a LocalLazyCallThroughManager using the given ABI. See
+ /// createLocalLazyCallThroughManager.
+ template <typename ORCABI>
+ static Expected<std::unique_ptr<LocalLazyCallThroughManager>>
+ Create(ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) {
+ auto LLCTM = std::unique_ptr<LocalLazyCallThroughManager>(
+ new LocalLazyCallThroughManager(ES, ErrorHandlerAddr));
+
+ if (auto Err = LLCTM->init<ORCABI>())
+ return std::move(Err);
+
+ return std::move(LLCTM);
+ }
+};
+
+/// Create a LocalLazyCallThroughManager from the given triple and execution
+/// session.
+Expected<std::unique_ptr<LazyCallThroughManager>>
+createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddr);
+
+/// A materialization unit that builds lazy re-exports. These are callable
+/// entry points that call through to the given symbols.
+/// Unlike a 'true' re-export, the address of the lazy re-export will not
+/// match the address of the re-exported symbol, but calling it will behave
+/// the same as calling the re-exported symbol.
+class LazyReexportsMaterializationUnit : public MaterializationUnit {
+public:
+ LazyReexportsMaterializationUnit(LazyCallThroughManager &LCTManager,
+ IndirectStubsManager &ISManager,
+ JITDylib &SourceJD,
+ SymbolAliasMap CallableAliases,
+ VModuleKey K);
+
+ StringRef getName() const override;
+
+private:
+ void materialize(MaterializationResponsibility R) override;
+ void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
+ static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases);
+
+ LazyCallThroughManager &LCTManager;
+ IndirectStubsManager &ISManager;
+ JITDylib &SourceJD;
+ SymbolAliasMap CallableAliases;
+ std::shared_ptr<LazyCallThroughManager::NotifyResolvedFunction>
+ NotifyResolved;
+};
+
+/// Define lazy-reexports based on the given SymbolAliasMap. Each lazy re-export
+/// is a callable symbol that will look up and dispatch to the given aliasee on
+/// first call. All subsequent calls will go directly to the aliasee.
+inline std::unique_ptr<LazyReexportsMaterializationUnit>
+lazyReexports(LazyCallThroughManager &LCTManager,
+ IndirectStubsManager &ISManager, JITDylib &SourceJD,
+ SymbolAliasMap CallableAliases, VModuleKey K = VModuleKey()) {
+ return llvm::make_unique<LazyReexportsMaterializationUnit>(
+ LCTManager, ISManager, SourceJD, std::move(CallableAliases),
+ std::move(K));
+}
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_LAZYREEXPORTS_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h
index 52c8c162ff0b..4c6162ac4b8b 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h
@@ -31,12 +31,12 @@ class SymbolResolver {
public:
virtual ~SymbolResolver() = default;
- /// Returns the flags for each symbol in Symbols that can be found,
- /// along with the set of symbol that could not be found.
- virtual SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) = 0;
+ /// Returns the subset of the given symbols that the caller is responsible for
+ /// materializing.
+ virtual SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) = 0;
/// For each symbol in Symbols that can be found, assigns that symbols
- /// value in Query. Returns the set of symbols that could not be found.
+ /// value in Query. Returns the set of symbols that could not be found.
virtual SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
SymbolNameSet Symbols) = 0;
@@ -46,16 +46,18 @@ private:
/// Implements SymbolResolver with a pair of supplied function objects
/// for convenience. See createSymbolResolver.
-template <typename LookupFlagsFn, typename LookupFn>
+template <typename GetResponsibilitySetFn, typename LookupFn>
class LambdaSymbolResolver final : public SymbolResolver {
public:
- template <typename LookupFlagsFnRef, typename LookupFnRef>
- LambdaSymbolResolver(LookupFlagsFnRef &&LookupFlags, LookupFnRef &&Lookup)
- : LookupFlags(std::forward<LookupFlagsFnRef>(LookupFlags)),
+ template <typename GetResponsibilitySetFnRef, typename LookupFnRef>
+ LambdaSymbolResolver(GetResponsibilitySetFnRef &&GetResponsibilitySet,
+ LookupFnRef &&Lookup)
+ : GetResponsibilitySet(
+ std::forward<GetResponsibilitySetFnRef>(GetResponsibilitySet)),
Lookup(std::forward<LookupFnRef>(Lookup)) {}
- SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) final {
- return LookupFlags(Symbols);
+ SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final {
+ return GetResponsibilitySet(Symbols);
}
SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
@@ -64,34 +66,37 @@ public:
}
private:
- LookupFlagsFn LookupFlags;
+ GetResponsibilitySetFn GetResponsibilitySet;
LookupFn Lookup;
};
/// Creates a SymbolResolver implementation from the pair of supplied
/// function objects.
-template <typename LookupFlagsFn, typename LookupFn>
+template <typename GetResponsibilitySetFn, typename LookupFn>
std::unique_ptr<LambdaSymbolResolver<
typename std::remove_cv<
- typename std::remove_reference<LookupFlagsFn>::type>::type,
+ typename std::remove_reference<GetResponsibilitySetFn>::type>::type,
typename std::remove_cv<
typename std::remove_reference<LookupFn>::type>::type>>
-createSymbolResolver(LookupFlagsFn &&LookupFlags, LookupFn &&Lookup) {
+createSymbolResolver(GetResponsibilitySetFn &&GetResponsibilitySet,
+ LookupFn &&Lookup) {
using LambdaSymbolResolverImpl = LambdaSymbolResolver<
typename std::remove_cv<
- typename std::remove_reference<LookupFlagsFn>::type>::type,
+ typename std::remove_reference<GetResponsibilitySetFn>::type>::type,
typename std::remove_cv<
typename std::remove_reference<LookupFn>::type>::type>;
return llvm::make_unique<LambdaSymbolResolverImpl>(
- std::forward<LookupFlagsFn>(LookupFlags), std::forward<LookupFn>(Lookup));
+ std::forward<GetResponsibilitySetFn>(GetResponsibilitySet),
+ std::forward<LookupFn>(Lookup));
}
+/// Legacy adapter. Remove once we kill off the old ORC layers.
class JITSymbolResolverAdapter : public JITSymbolResolver {
public:
JITSymbolResolverAdapter(ExecutionSession &ES, SymbolResolver &R,
MaterializationResponsibility *MR);
- Expected<LookupFlagsResult> lookupFlags(const LookupSet &Symbols) override;
- Expected<LookupResult> lookup(const LookupSet &Symbols) override;
+ Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override;
+ void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) override;
private:
ExecutionSession &ES;
@@ -100,27 +105,29 @@ private:
MaterializationResponsibility *MR;
};
-/// Use the given legacy-style FindSymbol function (i.e. a function that
-/// takes a const std::string& or StringRef and returns a JITSymbol) to
-/// find the flags for each symbol in Symbols and store their flags in
-/// SymbolFlags. If any JITSymbol returned by FindSymbol is in an error
-/// state the function returns immediately with that error, otherwise it
-/// returns the set of symbols not found.
+/// Use the given legacy-style FindSymbol function (i.e. a function that takes
+/// a const std::string& or StringRef and returns a JITSymbol) to get the
+/// subset of symbols that the caller is responsible for materializing. If any
+/// JITSymbol returned by FindSymbol is in an error state the function returns
+/// immediately with that error.
///
-/// Useful for implementing lookupFlags bodies that query legacy resolvers.
+/// Useful for implementing getResponsibilitySet bodies that query legacy
+/// resolvers.
template <typename FindSymbolFn>
-Expected<SymbolFlagsMap> lookupFlagsWithLegacyFn(const SymbolNameSet &Symbols,
- FindSymbolFn FindSymbol) {
- SymbolFlagsMap SymbolFlags;
+Expected<SymbolNameSet>
+getResponsibilitySetWithLegacyFn(const SymbolNameSet &Symbols,
+ FindSymbolFn FindSymbol) {
+ SymbolNameSet Result;
for (auto &S : Symbols) {
- if (JITSymbol Sym = FindSymbol(*S))
- SymbolFlags[S] = Sym.getFlags();
- else if (auto Err = Sym.takeError())
+ if (JITSymbol Sym = FindSymbol(*S)) {
+ if (!Sym.getFlags().isStrong())
+ Result.insert(S);
+ } else if (auto Err = Sym.takeError())
return std::move(Err);
}
- return SymbolFlags;
+ return Result;
}
/// Use the given legacy-style FindSymbol function (i.e. a function that
@@ -177,12 +184,13 @@ public:
: ES(ES), LegacyLookup(std::move(LegacyLookup)),
ReportError(std::move(ReportError)) {}
- SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) final {
- if (auto SymbolFlags = lookupFlagsWithLegacyFn(Symbols, LegacyLookup))
- return std::move(*SymbolFlags);
+ SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final {
+ if (auto ResponsibilitySet =
+ getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup))
+ return std::move(*ResponsibilitySet);
else {
- ReportError(SymbolFlags.takeError());
- return SymbolFlagsMap();
+ ReportError(ResponsibilitySet.takeError());
+ return SymbolNameSet();
}
}
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
index 3dd3cfe05b8d..03fefb69a928 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
@@ -23,10 +23,10 @@ namespace orc {
class NullResolver : public SymbolResolver {
public:
- SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) override;
+ SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final;
SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
- SymbolNameSet Symbols) override;
+ SymbolNameSet Symbols) final;
};
/// SymbolResolver impliementation that rejects all resolution requests.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index c6b43a9c8ed6..44d6b490e19d 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -23,16 +23,16 @@
namespace llvm {
namespace orc {
-class ObjectTransformLayer2 : public ObjectLayer {
+class ObjectTransformLayer : public ObjectLayer {
public:
using TransformFunction =
std::function<Expected<std::unique_ptr<MemoryBuffer>>(
std::unique_ptr<MemoryBuffer>)>;
- ObjectTransformLayer2(ExecutionSession &ES, ObjectLayer &BaseLayer,
- TransformFunction Transform);
+ ObjectTransformLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
+ TransformFunction Transform);
- void emit(MaterializationResponsibility R, VModuleKey K,
+ void emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) override;
private:
@@ -46,11 +46,11 @@ private:
/// immediately applies the user supplied functor to each object, then adds
/// the set of transformed objects to the layer below.
template <typename BaseLayerT, typename TransformFtor>
-class ObjectTransformLayer {
+class LegacyObjectTransformLayer {
public:
/// Construct an ObjectTransformLayer with the given BaseLayer
- ObjectTransformLayer(BaseLayerT &BaseLayer,
- TransformFtor Transform = TransformFtor())
+ LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
+ TransformFtor Transform = TransformFtor())
: BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
/// Apply the transform functor to each object in the object set, then
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 581c598aff62..a70fc373713d 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -238,7 +238,78 @@ public:
unsigned MinStubs, void *InitialPtrVal);
};
-} // end namespace orc
-} // end namespace llvm
+// @brief Mips32 support.
+//
+// Mips32 supports lazy JITing.
+class OrcMips32_Base {
+public:
+ static const unsigned PointerSize = 4;
+ static const unsigned TrampolineSize = 20;
+ static const unsigned ResolverCodeSize = 0xfc;
+ using IndirectStubsInfo = GenericIndirectStubsInfo<16>;
+
+ using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
+ void *TrampolineId);
+ /// @brief Write the requsted number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
+ static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines);
+
+ /// @brief Write the resolver code into the given memory. The user is be
+ /// responsible for allocating the memory and setting permissions.
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr, bool isBigEndian);
+ /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+ /// the nearest page size.
+ ///
+ /// E.g. Asking for 4 stubs on Mips32, where stubs are 8-bytes, with 4k
+ /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+ /// will return a block of 1024 (2-pages worth).
+ static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,unsigned MinStubs, void *InitialPtrVal);
+};
+
+
+class OrcMips32Le : public OrcMips32_Base {
+public:
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr)
+ { OrcMips32_Base::writeResolverCode(ResolveMem, Reentry, CallbackMgr, false); }
+};
+
+class OrcMips32Be : public OrcMips32_Base {
+public:
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr)
+ { OrcMips32_Base::writeResolverCode(ResolveMem, Reentry, CallbackMgr, true); }
+};
+
+// @brief Mips64 support.
+//
+// Mips64 supports lazy JITing.
+class OrcMips64 {
+public:
+ static const unsigned PointerSize = 8;
+ static const unsigned TrampolineSize = 40;
+ static const unsigned ResolverCodeSize = 0x120;
+
+ using IndirectStubsInfo = GenericIndirectStubsInfo<32>;
+ using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
+ void *TrampolineId);
+ /// @brief Write the resolver code into the given memory. The user is be
+ /// responsible for allocating the memory and setting permissions.
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr);
+
+ /// @brief Write the requsted number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
+ static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines);
+
+ /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+ /// the nearest page size.
+ ///
+ /// E.g. Asking for 4 stubs on Mips64, where stubs are 8-bytes, with 4k
+ /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+ /// will return a block of 1024 (2-pages worth).
+ static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,unsigned MinStubs, void *InitialPtrVal);
+};
+ } // end namespace orc
+ } // end namespace llvm
#endif // LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
index 45f95f63e70f..3e07f5cf3742 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -118,30 +118,33 @@ public:
Unmapped.back().RemoteCodeAddr =
Client.reserveMem(Id, CodeSize, CodeAlign);
- LLVM_DEBUG(dbgs() << " code: "
- << format("0x%016x", Unmapped.back().RemoteCodeAddr)
- << " (" << CodeSize << " bytes, alignment "
- << CodeAlign << ")\n");
+ LLVM_DEBUG(
+ dbgs() << " code: "
+ << format("0x%016" PRIx64, Unmapped.back().RemoteCodeAddr)
+ << " (" << CodeSize << " bytes, alignment " << CodeAlign
+ << ")\n");
}
if (RODataSize != 0) {
Unmapped.back().RemoteRODataAddr =
Client.reserveMem(Id, RODataSize, RODataAlign);
- LLVM_DEBUG(dbgs() << " ro-data: "
- << format("0x%016x", Unmapped.back().RemoteRODataAddr)
- << " (" << RODataSize << " bytes, alignment "
- << RODataAlign << ")\n");
+ LLVM_DEBUG(
+ dbgs() << " ro-data: "
+ << format("0x%016" PRIx64, Unmapped.back().RemoteRODataAddr)
+ << " (" << RODataSize << " bytes, alignment " << RODataAlign
+ << ")\n");
}
if (RWDataSize != 0) {
Unmapped.back().RemoteRWDataAddr =
Client.reserveMem(Id, RWDataSize, RWDataAlign);
- LLVM_DEBUG(dbgs() << " rw-data: "
- << format("0x%016x", Unmapped.back().RemoteRWDataAddr)
- << " (" << RWDataSize << " bytes, alignment "
- << RWDataAlign << ")\n");
+ LLVM_DEBUG(
+ dbgs() << " rw-data: "
+ << format("0x%016" PRIx64, Unmapped.back().RemoteRWDataAddr)
+ << " (" << RWDataSize << " bytes, alignment " << RWDataAlign
+ << ")\n");
}
}
@@ -269,9 +272,9 @@ public:
for (auto &Alloc : Allocs) {
NextAddr = alignTo(NextAddr, Alloc.getAlign());
Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextAddr);
- LLVM_DEBUG(dbgs() << " "
- << static_cast<void *>(Alloc.getLocalAddress())
- << " -> " << format("0x%016x", NextAddr) << "\n");
+ LLVM_DEBUG(
+ dbgs() << " " << static_cast<void *>(Alloc.getLocalAddress())
+ << " -> " << format("0x%016" PRIx64, NextAddr) << "\n");
Alloc.setRemoteAddress(NextAddr);
// Only advance NextAddr if it was non-null to begin with,
@@ -293,7 +296,7 @@ public:
LLVM_DEBUG(dbgs() << " copying section: "
<< static_cast<void *>(Alloc.getLocalAddress())
<< " -> "
- << format("0x%016x", Alloc.getRemoteAddress())
+ << format("0x%016" PRIx64, Alloc.getRemoteAddress())
<< " (" << Alloc.getSize() << " bytes)\n";);
if (Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
@@ -306,7 +309,8 @@ public:
<< (Permissions & sys::Memory::MF_WRITE ? 'W' : '-')
<< (Permissions & sys::Memory::MF_EXEC ? 'X' : '-')
<< " permissions on block: "
- << format("0x%016x", RemoteSegmentAddr) << "\n");
+ << format("0x%016" PRIx64, RemoteSegmentAddr)
+ << "\n");
if (Client.setProtections(Id, RemoteSegmentAddr, Permissions))
return true;
}
@@ -446,16 +450,24 @@ public:
StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
};
- /// Remote compile callback manager.
- class RemoteCompileCallbackManager : public JITCompileCallbackManager {
+ class RemoteTrampolinePool : public TrampolinePool {
public:
- RemoteCompileCallbackManager(OrcRemoteTargetClient &Client,
- ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress)
- : JITCompileCallbackManager(ES, ErrorHandlerAddress), Client(Client) {}
+ RemoteTrampolinePool(OrcRemoteTargetClient &Client) : Client(Client) {}
+
+ Expected<JITTargetAddress> getTrampoline() override {
+ std::lock_guard<std::mutex> Lock(RTPMutex);
+ if (AvailableTrampolines.empty()) {
+ if (auto Err = grow())
+ return std::move(Err);
+ }
+ assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool");
+ auto TrampolineAddr = AvailableTrampolines.back();
+ AvailableTrampolines.pop_back();
+ return TrampolineAddr;
+ }
private:
- Error grow() override {
+ Error grow() {
JITTargetAddress BlockAddr = 0;
uint32_t NumTrampolines = 0;
if (auto TrampolineInfoOrErr = Client.emitTrampolineBlock())
@@ -470,7 +482,20 @@ public:
return Error::success();
}
+ std::mutex RTPMutex;
OrcRemoteTargetClient &Client;
+ std::vector<JITTargetAddress> AvailableTrampolines;
+ };
+
+ /// Remote compile callback manager.
+ class RemoteCompileCallbackManager : public JITCompileCallbackManager {
+ public:
+ RemoteCompileCallbackManager(OrcRemoteTargetClient &Client,
+ ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddress)
+ : JITCompileCallbackManager(
+ llvm::make_unique<RemoteTrampolinePool>(Client), ES,
+ ErrorHandlerAddress) {}
};
/// Create an OrcRemoteTargetClient.
@@ -489,8 +514,8 @@ public:
/// Call the int(void) function at the given address in the target and return
/// its result.
Expected<int> callIntVoid(JITTargetAddress Addr) {
- LLVM_DEBUG(dbgs() << "Calling int(*)(void) " << format("0x%016x", Addr)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Calling int(*)(void) "
+ << format("0x%016" PRIx64, Addr) << "\n");
return callB<exec::CallIntVoid>(Addr);
}
@@ -499,15 +524,15 @@ public:
Expected<int> callMain(JITTargetAddress Addr,
const std::vector<std::string> &Args) {
LLVM_DEBUG(dbgs() << "Calling int(*)(int, char*[]) "
- << format("0x%016x", Addr) << "\n");
+ << format("0x%016" PRIx64, Addr) << "\n");
return callB<exec::CallMain>(Addr, Args);
}
/// Call the void() function at the given address in the target and wait for
/// it to finish.
Error callVoidVoid(JITTargetAddress Addr) {
- LLVM_DEBUG(dbgs() << "Calling void(*)(void) " << format("0x%016x", Addr)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Calling void(*)(void) "
+ << format("0x%016" PRIx64, Addr) << "\n");
return callB<exec::CallVoidVoid>(Addr);
}
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
index bc0da0f9a730..8db9e317a18a 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -87,8 +87,7 @@ class SerializationTraits<ChannelT, JITSymbolFlags> {
public:
static Error serialize(ChannelT &C, const JITSymbolFlags &Flags) {
- return serializeSeq(C, static_cast<JITSymbolFlags::UnderlyingType>(Flags),
- Flags.getTargetFlags());
+ return serializeSeq(C, Flags.getRawFlagsValue(), Flags.getTargetFlags());
}
static Error deserialize(ChannelT &C, JITSymbolFlags &Flags) {
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
index 47bd90bb1bad..953b73e10e43 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/ExecutionEngine/Orc/RPCSerialization.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
#include <future>
@@ -207,73 +208,6 @@ private:
namespace detail {
-// FIXME: Remove MSVCPError/MSVCPExpected once MSVC's future implementation
-// supports classes without default constructors.
-#ifdef _MSC_VER
-
-namespace msvc_hacks {
-
-// Work around MSVC's future implementation's use of default constructors:
-// A default constructed value in the promise will be overwritten when the
-// real error is set - so the default constructed Error has to be checked
-// already.
-class MSVCPError : public Error {
-public:
- MSVCPError() { (void)!!*this; }
-
- MSVCPError(MSVCPError &&Other) : Error(std::move(Other)) {}
-
- MSVCPError &operator=(MSVCPError Other) {
- Error::operator=(std::move(Other));
- return *this;
- }
-
- MSVCPError(Error Err) : Error(std::move(Err)) {}
-};
-
-// Work around MSVC's future implementation, similar to MSVCPError.
-template <typename T> class MSVCPExpected : public Expected<T> {
-public:
- MSVCPExpected()
- : Expected<T>(make_error<StringError>("", inconvertibleErrorCode())) {
- consumeError(this->takeError());
- }
-
- MSVCPExpected(MSVCPExpected &&Other) : Expected<T>(std::move(Other)) {}
-
- MSVCPExpected &operator=(MSVCPExpected &&Other) {
- Expected<T>::operator=(std::move(Other));
- return *this;
- }
-
- MSVCPExpected(Error Err) : Expected<T>(std::move(Err)) {}
-
- template <typename OtherT>
- MSVCPExpected(
- OtherT &&Val,
- typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
- nullptr)
- : Expected<T>(std::move(Val)) {}
-
- template <class OtherT>
- MSVCPExpected(
- Expected<OtherT> &&Other,
- typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
- nullptr)
- : Expected<T>(std::move(Other)) {}
-
- template <class OtherT>
- explicit MSVCPExpected(
- Expected<OtherT> &&Other,
- typename std::enable_if<!std::is_convertible<OtherT, T>::value>::type * =
- nullptr)
- : Expected<T>(std::move(Other)) {}
-};
-
-} // end namespace msvc_hacks
-
-#endif // _MSC_VER
-
/// Provides a typedef for a tuple containing the decayed argument types.
template <typename T> class FunctionArgsTuple;
@@ -293,10 +227,10 @@ public:
#ifdef _MSC_VER
// The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<msvc_hacks::MSVCPExpected<RetT>>;
+ using ReturnPromiseType = std::promise<MSVCPExpected<RetT>>;
// The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<msvc_hacks::MSVCPExpected<RetT>>;
+ using ReturnFutureType = std::future<MSVCPExpected<RetT>>;
#else
// The ErrorReturnType wrapped in a std::promise.
using ReturnPromiseType = std::promise<ErrorReturnType>;
@@ -325,10 +259,10 @@ public:
#ifdef _MSC_VER
// The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<msvc_hacks::MSVCPError>;
+ using ReturnPromiseType = std::promise<MSVCPError>;
// The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<msvc_hacks::MSVCPError>;
+ using ReturnFutureType = std::future<MSVCPError>;
#else
// The ErrorReturnType wrapped in a std::promise.
using ReturnPromiseType = std::promise<ErrorReturnType>;
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index 48b3f7a58ed7..6f90f0380d95 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -36,7 +36,7 @@
namespace llvm {
namespace orc {
-class RTDyldObjectLinkingLayer2 : public ObjectLayer {
+class RTDyldObjectLinkingLayer : public ObjectLayer {
public:
/// Functor for receiving object-loaded notifications.
using NotifyLoadedFunction =
@@ -44,48 +44,84 @@ public:
const RuntimeDyld::LoadedObjectInfo &)>;
/// Functor for receiving finalization notifications.
- using NotifyFinalizedFunction = std::function<void(VModuleKey)>;
+ using NotifyEmittedFunction = std::function<void(VModuleKey)>;
using GetMemoryManagerFunction =
- std::function<std::shared_ptr<RuntimeDyld::MemoryManager>(VModuleKey)>;
+ std::function<std::unique_ptr<RuntimeDyld::MemoryManager>()>;
/// Construct an ObjectLinkingLayer with the given NotifyLoaded,
- /// and NotifyFinalized functors.
- RTDyldObjectLinkingLayer2(
+ /// and NotifyEmitted functors.
+ RTDyldObjectLinkingLayer(
ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager,
NotifyLoadedFunction NotifyLoaded = NotifyLoadedFunction(),
- NotifyFinalizedFunction NotifyFinalized = NotifyFinalizedFunction());
+ NotifyEmittedFunction NotifyEmitted = NotifyEmittedFunction());
/// Emit the object.
- void emit(MaterializationResponsibility R, VModuleKey K,
+ void emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) override;
- /// Map section addresses for the object associated with the
- /// VModuleKey K.
- void mapSectionAddress(VModuleKey K, const void *LocalAddress,
- JITTargetAddress TargetAddr) const;
-
/// Set the 'ProcessAllSections' flag.
///
/// If set to true, all sections in each object file will be allocated using
/// the memory manager, rather than just the sections required for execution.
///
/// This is kludgy, and may be removed in the future.
- void setProcessAllSections(bool ProcessAllSections) {
+ RTDyldObjectLinkingLayer &setProcessAllSections(bool ProcessAllSections) {
this->ProcessAllSections = ProcessAllSections;
+ return *this;
+ }
+
+ /// Instructs this RTDyldLinkingLayer2 instance to override the symbol flags
+ /// returned by RuntimeDyld for any given object file with the flags supplied
+ /// by the MaterializationResponsibility instance. This is a workaround to
+ /// support symbol visibility in COFF, which does not use the libObject's
+ /// SF_Exported flag. Use only when generating / adding COFF object files.
+ ///
+ /// FIXME: We should be able to remove this if/when COFF properly tracks
+ /// exported symbols.
+ RTDyldObjectLinkingLayer &
+ setOverrideObjectFlagsWithResponsibilityFlags(bool OverrideObjectFlags) {
+ this->OverrideObjectFlags = OverrideObjectFlags;
+ return *this;
+ }
+
+ /// If set, this RTDyldObjectLinkingLayer instance will claim responsibility
+ /// for any symbols provided by a given object file that were not already in
+ /// the MaterializationResponsibility instance. Setting this flag allows
+ /// higher-level program representations (e.g. LLVM IR) to be added based on
+ /// only a subset of the symbols they provide, without having to write
+ /// intervening layers to scan and add the additional symbols. This trades
+ /// diagnostic quality for convenience however: If all symbols are enumerated
+ /// up-front then clashes can be detected and reported early (and usually
+ /// deterministically). If this option is set, clashes for the additional
+ /// symbols may not be detected until late, and detection may depend on
+ /// the flow of control through JIT'd code. Use with care.
+ RTDyldObjectLinkingLayer &
+ setAutoClaimResponsibilityForObjectSymbols(bool AutoClaimObjectSymbols) {
+ this->AutoClaimObjectSymbols = AutoClaimObjectSymbols;
+ return *this;
}
private:
+ Error onObjLoad(VModuleKey K, MaterializationResponsibility &R,
+ object::ObjectFile &Obj,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+ std::map<StringRef, JITEvaluatedSymbol> Resolved,
+ std::set<StringRef> &InternalSymbols);
+
+ void onObjEmit(VModuleKey K, MaterializationResponsibility &R, Error Err);
+
mutable std::mutex RTDyldLayerMutex;
GetMemoryManagerFunction GetMemoryManager;
NotifyLoadedFunction NotifyLoaded;
- NotifyFinalizedFunction NotifyFinalized;
- bool ProcessAllSections;
- std::map<VModuleKey, RuntimeDyld *> ActiveRTDylds;
- std::map<VModuleKey, std::shared_ptr<RuntimeDyld::MemoryManager>> MemMgrs;
+ NotifyEmittedFunction NotifyEmitted;
+ bool ProcessAllSections = false;
+ bool OverrideObjectFlags = false;
+ bool AutoClaimObjectSymbols = false;
+ std::vector<std::unique_ptr<RuntimeDyld::MemoryManager>> MemMgrs;
};
-class RTDyldObjectLinkingLayerBase {
+class LegacyRTDyldObjectLinkingLayerBase {
public:
using ObjectPtr = std::unique_ptr<MemoryBuffer>;
@@ -137,10 +173,10 @@ protected:
/// object files to be loaded into memory, linked, and the addresses of their
/// symbols queried. All objects added to this layer can see each other's
/// symbols.
-class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase {
+class LegacyRTDyldObjectLinkingLayer : public LegacyRTDyldObjectLinkingLayerBase {
public:
- using RTDyldObjectLinkingLayerBase::ObjectPtr;
+ using LegacyRTDyldObjectLinkingLayerBase::ObjectPtr;
/// Functor for receiving object-loaded notifications.
using NotifyLoadedFtor =
@@ -161,7 +197,7 @@ private:
template <typename MemoryManagerPtrT>
class ConcreteLinkedObject : public LinkedObject {
public:
- ConcreteLinkedObject(RTDyldObjectLinkingLayer &Parent, VModuleKey K,
+ ConcreteLinkedObject(LegacyRTDyldObjectLinkingLayer &Parent, VModuleKey K,
OwnedObject Obj, MemoryManagerPtrT MemMgr,
std::shared_ptr<SymbolResolver> Resolver,
bool ProcessAllSections)
@@ -175,7 +211,7 @@ private:
}
~ConcreteLinkedObject() override {
- if (this->Parent.NotifyFreed)
+ if (this->Parent.NotifyFreed && ObjForNotify.getBinary())
this->Parent.NotifyFreed(K, *ObjForNotify.getBinary());
MemMgr->deregisterEHFrames();
@@ -249,9 +285,14 @@ private:
consumeError(SymbolName.takeError());
continue;
}
+ // FIXME: Raise an error for bad symbols.
auto Flags = JITSymbolFlags::fromObjectSymbol(Symbol);
+ if (!Flags) {
+ consumeError(Flags.takeError());
+ continue;
+ }
SymbolTable.insert(
- std::make_pair(*SymbolName, JITEvaluatedSymbol(0, Flags)));
+ std::make_pair(*SymbolName, JITEvaluatedSymbol(0, *Flags)));
}
}
@@ -272,7 +313,7 @@ private:
};
VModuleKey K;
- RTDyldObjectLinkingLayer &Parent;
+ LegacyRTDyldObjectLinkingLayer &Parent;
MemoryManagerPtrT MemMgr;
OwnedObject ObjForNotify;
std::unique_ptr<PreFinalizeContents> PFC;
@@ -280,7 +321,7 @@ private:
template <typename MemoryManagerPtrT>
std::unique_ptr<ConcreteLinkedObject<MemoryManagerPtrT>>
- createLinkedObject(RTDyldObjectLinkingLayer &Parent, VModuleKey K,
+ createLinkedObject(LegacyRTDyldObjectLinkingLayer &Parent, VModuleKey K,
OwnedObject Obj, MemoryManagerPtrT MemMgr,
std::shared_ptr<SymbolResolver> Resolver,
bool ProcessAllSections) {
@@ -300,7 +341,7 @@ public:
/// Construct an ObjectLinkingLayer with the given NotifyLoaded,
/// and NotifyFinalized functors.
- RTDyldObjectLinkingLayer(
+ LegacyRTDyldObjectLinkingLayer(
ExecutionSession &ES, ResourcesGetter GetResources,
NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
@@ -402,11 +443,14 @@ public:
private:
ExecutionSession &ES;
- std::map<VModuleKey, std::unique_ptr<LinkedObject>> LinkedObjects;
ResourcesGetter GetResources;
NotifyLoadedFtor NotifyLoaded;
NotifyFinalizedFtor NotifyFinalized;
NotifyFreedFtor NotifyFreed;
+
+ // NB! `LinkedObjects` needs to be destroyed before `NotifyFreed` because
+ // `~ConcreteLinkedObject` calls `NotifyFreed`
+ std::map<VModuleKey, std::unique_ptr<LinkedObject>> LinkedObjects;
bool ProcessAllSections = false;
};
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
index 4c45cfd199dd..717076e25609 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
@@ -14,6 +14,7 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H
#define LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include <atomic>
#include <mutex>
@@ -49,10 +50,13 @@ private:
/// Pointer to a pooled string representing a symbol name.
class SymbolStringPtr {
friend class SymbolStringPool;
+ friend struct DenseMapInfo<SymbolStringPtr>;
friend bool operator==(const SymbolStringPtr &LHS,
const SymbolStringPtr &RHS);
friend bool operator<(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS);
+ static SymbolStringPool::PoolMapEntry Tombstone;
+
public:
SymbolStringPtr() = default;
SymbolStringPtr(const SymbolStringPtr &Other)
@@ -142,6 +146,29 @@ inline bool SymbolStringPool::empty() const {
}
} // end namespace orc
+
+template <>
+struct DenseMapInfo<orc::SymbolStringPtr> {
+
+ static orc::SymbolStringPtr getEmptyKey() {
+ return orc::SymbolStringPtr();
+ }
+
+ static orc::SymbolStringPtr getTombstoneKey() {
+ return orc::SymbolStringPtr(&orc::SymbolStringPtr::Tombstone);
+ }
+
+ static unsigned getHashValue(orc::SymbolStringPtr V) {
+ uintptr_t IV = reinterpret_cast<uintptr_t>(V.S);
+ return unsigned(IV) ^ unsigned(IV >> 9);
+ }
+
+ static bool isEqual(const orc::SymbolStringPtr &LHS,
+ const orc::SymbolStringPtr &RHS) {
+ return LHS.S == RHS.S;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
new file mode 100644
index 000000000000..bf946de532d3
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
@@ -0,0 +1,163 @@
+//===----------- ThreadSafeModule.h -- Layer interfaces ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Thread safe wrappers and utilities for Module and LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_THREADSAFEMODULEWRAPPER_H
+#define LLVM_EXECUTIONENGINE_ORC_THREADSAFEMODULEWRAPPER_H
+
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compiler.h"
+
+#include <functional>
+#include <memory>
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+/// An LLVMContext together with an associated mutex that can be used to lock
+/// the context to prevent concurrent access by other threads.
+class ThreadSafeContext {
+private:
+ struct State {
+ State(std::unique_ptr<LLVMContext> Ctx) : Ctx(std::move(Ctx)) {}
+
+ std::unique_ptr<LLVMContext> Ctx;
+ std::recursive_mutex Mutex;
+ };
+
+public:
+ // RAII based lock for ThreadSafeContext.
+ class LLVM_NODISCARD Lock {
+ private:
+ using UnderlyingLock = std::lock_guard<std::recursive_mutex>;
+
+ public:
+ Lock(std::shared_ptr<State> S)
+ : S(std::move(S)),
+ L(llvm::make_unique<UnderlyingLock>(this->S->Mutex)) {}
+
+ private:
+ std::shared_ptr<State> S;
+ std::unique_ptr<UnderlyingLock> L;
+ };
+
+ /// Construct a null context.
+ ThreadSafeContext() = default;
+
+ /// Construct a ThreadSafeContext from the given LLVMContext.
+ ThreadSafeContext(std::unique_ptr<LLVMContext> NewCtx)
+ : S(std::make_shared<State>(std::move(NewCtx))) {
+ assert(S->Ctx != nullptr &&
+ "Can not construct a ThreadSafeContext from a nullptr");
+ }
+
+ /// Returns a pointer to the LLVMContext that was used to construct this
+ /// instance, or null if the instance was default constructed.
+ LLVMContext *getContext() { return S ? S->Ctx.get() : nullptr; }
+
+ /// Returns a pointer to the LLVMContext that was used to construct this
+ /// instance, or null if the instance was default constructed.
+ const LLVMContext *getContext() const { return S ? S->Ctx.get() : nullptr; }
+
+ Lock getLock() {
+ assert(S && "Can not lock an empty ThreadSafeContext");
+ return Lock(S);
+ }
+
+private:
+ std::shared_ptr<State> S;
+};
+
+/// An LLVM Module together with a shared ThreadSafeContext.
+class ThreadSafeModule {
+public:
+ /// Default construct a ThreadSafeModule. This results in a null module and
+ /// null context.
+ ThreadSafeModule() = default;
+
+ ThreadSafeModule(ThreadSafeModule &&Other) = default;
+
+ ThreadSafeModule &operator=(ThreadSafeModule &&Other) {
+ // We have to explicitly define this move operator to copy the fields in
+ // reverse order (i.e. module first) to ensure the dependencies are
+ // protected: The old module that is being overwritten must be destroyed
+ // *before* the context that it depends on.
+ // We also need to lock the context to make sure the module tear-down
+ // does not overlap any other work on the context.
+ if (M) {
+ auto L = getContextLock();
+ M = nullptr;
+ }
+ M = std::move(Other.M);
+ TSCtx = std::move(Other.TSCtx);
+ return *this;
+ }
+
+ /// Construct a ThreadSafeModule from a unique_ptr<Module> and a
+ /// unique_ptr<LLVMContext>. This creates a new ThreadSafeContext from the
+ /// given context.
+ ThreadSafeModule(std::unique_ptr<Module> M, std::unique_ptr<LLVMContext> Ctx)
+ : M(std::move(M)), TSCtx(std::move(Ctx)) {}
+
+ /// Construct a ThreadSafeModule from a unique_ptr<Module> and an
+ /// existing ThreadSafeContext.
+ ThreadSafeModule(std::unique_ptr<Module> M, ThreadSafeContext TSCtx)
+ : M(std::move(M)), TSCtx(std::move(TSCtx)) {}
+
+ ~ThreadSafeModule() {
+ // We need to lock the context while we destruct the module.
+ if (M) {
+ auto L = getContextLock();
+ M = nullptr;
+ }
+ }
+
+ /// Get the module wrapped by this ThreadSafeModule.
+ Module *getModule() { return M.get(); }
+
+ /// Get the module wrapped by this ThreadSafeModule.
+ const Module *getModule() const { return M.get(); }
+
+ /// Take out a lock on the ThreadSafeContext for this module.
+ ThreadSafeContext::Lock getContextLock() { return TSCtx.getLock(); }
+
+ /// Boolean conversion: This ThreadSafeModule will evaluate to true if it
+ /// wraps a non-null module.
+ explicit operator bool() {
+ if (M) {
+ assert(TSCtx.getContext() &&
+ "Non-null module must have non-null context");
+ return true;
+ }
+ return false;
+ }
+
+private:
+ std::unique_ptr<Module> M;
+ ThreadSafeContext TSCtx;
+};
+
+using GVPredicate = std::function<bool(const GlobalValue &)>;
+using GVModifier = std::function<void(GlobalValue &)>;
+
+/// Clones the given module on to a new context.
+ThreadSafeModule
+cloneToNewContext(ThreadSafeModule &TSMW,
+ GVPredicate ShouldCloneDef = GVPredicate(),
+ GVModifier UpdateClonedDefSource = GVModifier());
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_THREADSAFEMODULEWRAPPER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index 5dd5add1bb39..e419ee05e566 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -250,6 +250,16 @@ public:
void finalizeWithMemoryManagerLocking();
private:
+ friend void
+ jitLinkForORC(object::ObjectFile &Obj,
+ std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
+ RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver,
+ bool ProcessAllSections,
+ std::function<Error(std::unique_ptr<LoadedObjectInfo>,
+ std::map<StringRef, JITEvaluatedSymbol>)>
+ OnLoaded,
+ std::function<void(Error)> OnEmitted);
+
// RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
// interface.
std::unique_ptr<RuntimeDyldImpl> Dyld;
@@ -259,6 +269,21 @@ private:
RuntimeDyldCheckerImpl *Checker;
};
+// Asynchronous JIT link for ORC.
+//
+// Warning: This API is experimental and probably should not be used by anyone
+// but ORC's RTDyldObjectLinkingLayer2. Internally it constructs a RuntimeDyld
+// instance and uses continuation passing to perform the fix-up and finalize
+// steps asynchronously.
+void jitLinkForORC(object::ObjectFile &Obj,
+ std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
+ RuntimeDyld::MemoryManager &MemMgr,
+ JITSymbolResolver &Resolver, bool ProcessAllSections,
+ std::function<Error(std::unique_ptr<LoadedObjectInfo>,
+ std::map<StringRef, JITEvaluatedSymbol>)>
+ OnLoaded,
+ std::function<void(Error)> OnEmitted);
+
} // end namespace llvm
#endif // LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
diff --git a/contrib/llvm/include/llvm/IR/Attributes.h b/contrib/llvm/include/llvm/IR/Attributes.h
index 5aaaaf3c396b..9fc4614af010 100644
--- a/contrib/llvm/include/llvm/IR/Attributes.h
+++ b/contrib/llvm/include/llvm/IR/Attributes.h
@@ -230,29 +230,33 @@ public:
/// Add an argument attribute. Returns a new set because attribute sets are
/// immutable.
- AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const;
+ LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C,
+ Attribute::AttrKind Kind) const;
/// Add a target-dependent attribute. Returns a new set because attribute sets
/// are immutable.
- AttributeSet addAttribute(LLVMContext &C, StringRef Kind,
- StringRef Value = StringRef()) const;
+ LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C, StringRef Kind,
+ StringRef Value = StringRef()) const;
/// Add attributes to the attribute set. Returns a new set because attribute
/// sets are immutable.
- AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const;
+ LLVM_NODISCARD AttributeSet addAttributes(LLVMContext &C,
+ AttributeSet AS) const;
/// Remove the specified attribute from this set. Returns a new set because
/// attribute sets are immutable.
- AttributeSet removeAttribute(LLVMContext &C, Attribute::AttrKind Kind) const;
+ LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C,
+ Attribute::AttrKind Kind) const;
/// Remove the specified attribute from this set. Returns a new set because
/// attribute sets are immutable.
- AttributeSet removeAttribute(LLVMContext &C, StringRef Kind) const;
+ LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C,
+ StringRef Kind) const;
/// Remove the specified attributes from this set. Returns a new set because
/// attribute sets are immutable.
- AttributeSet removeAttributes(LLVMContext &C,
- const AttrBuilder &AttrsToRemove) const;
+ LLVM_NODISCARD AttributeSet
+ removeAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const;
/// Return the number of attributes in this set.
unsigned getNumAttributes() const;
@@ -375,133 +379,140 @@ public:
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- AttributeList addAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const;
+ LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
+ Attribute::AttrKind Kind) const;
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- AttributeList addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
- StringRef Value = StringRef()) const;
+ LLVM_NODISCARD AttributeList
+ addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
+ StringRef Value = StringRef()) const;
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute A) const;
+ LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
+ Attribute A) const;
/// Add attributes to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- AttributeList addAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &B) const;
+ LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index,
+ const AttrBuilder &B) const;
/// Add an argument attribute to the list. Returns a new list because
/// attribute lists are immutable.
- AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo,
- Attribute::AttrKind Kind) const {
+ LLVM_NODISCARD AttributeList addParamAttribute(
+ LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
return addAttribute(C, ArgNo + FirstArgIndex, Kind);
}
/// Add an argument attribute to the list. Returns a new list because
/// attribute lists are immutable.
- AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo,
- StringRef Kind,
- StringRef Value = StringRef()) const {
+ LLVM_NODISCARD AttributeList
+ addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind,
+ StringRef Value = StringRef()) const {
return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value);
}
/// Add an attribute to the attribute list at the given arg indices. Returns a
/// new list because attribute lists are immutable.
- AttributeList addParamAttribute(LLVMContext &C, ArrayRef<unsigned> ArgNos,
- Attribute A) const;
+ LLVM_NODISCARD AttributeList addParamAttribute(LLVMContext &C,
+ ArrayRef<unsigned> ArgNos,
+ Attribute A) const;
/// Add an argument attribute to the list. Returns a new list because
/// attribute lists are immutable.
- AttributeList addParamAttributes(LLVMContext &C, unsigned ArgNo,
- const AttrBuilder &B) const {
+ LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C,
+ unsigned ArgNo,
+ const AttrBuilder &B) const {
return addAttributes(C, ArgNo + FirstArgIndex, B);
}
/// Remove the specified attribute at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- AttributeList removeAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const;
+ LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
+ Attribute::AttrKind Kind) const;
/// Remove the specified attribute at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- AttributeList removeAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind) const;
+ LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
+ StringRef Kind) const;
/// Remove the specified attributes at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- AttributeList removeAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &AttrsToRemove) const;
+ LLVM_NODISCARD AttributeList removeAttributes(
+ LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const;
/// Remove all attributes at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- AttributeList removeAttributes(LLVMContext &C, unsigned Index) const;
+ LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C,
+ unsigned Index) const;
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo,
- Attribute::AttrKind Kind) const {
+ LLVM_NODISCARD AttributeList removeParamAttribute(
+ LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
}
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo,
- StringRef Kind) const {
+ LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C,
+ unsigned ArgNo,
+ StringRef Kind) const {
return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
}
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo,
- const AttrBuilder &AttrsToRemove) const {
+ LLVM_NODISCARD AttributeList removeParamAttributes(
+ LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const {
return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove);
}
/// Remove all attributes at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo) const {
+ LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C,
+ unsigned ArgNo) const {
return removeAttributes(C, ArgNo + FirstArgIndex);
}
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// index. Returns a new list because attribute lists are immutable.
- AttributeList addDereferenceableAttr(LLVMContext &C, unsigned Index,
- uint64_t Bytes) const;
+ LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C,
+ unsigned Index,
+ uint64_t Bytes) const;
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// arg index. Returns a new list because attribute lists are immutable.
- AttributeList addDereferenceableParamAttr(LLVMContext &C, unsigned ArgNo,
- uint64_t Bytes) const {
+ LLVM_NODISCARD AttributeList addDereferenceableParamAttr(
+ LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes);
}
/// Add the dereferenceable_or_null attribute to the attribute set at
/// the given index. Returns a new list because attribute lists are immutable.
- AttributeList addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index,
- uint64_t Bytes) const;
+ LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr(
+ LLVMContext &C, unsigned Index, uint64_t Bytes) const;
/// Add the dereferenceable_or_null attribute to the attribute set at
/// the given arg index. Returns a new list because attribute lists are
/// immutable.
- AttributeList addDereferenceableOrNullParamAttr(LLVMContext &C,
- unsigned ArgNo,
- uint64_t Bytes) const {
+ LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr(
+ LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes);
}
/// Add the allocsize attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- AttributeList addAllocSizeAttr(LLVMContext &C, unsigned Index,
- unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg);
+ LLVM_NODISCARD AttributeList
+ addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg,
+ const Optional<unsigned> &NumElemsArg);
/// Add the allocsize attribute to the attribute set at the given arg index.
/// Returns a new list because attribute lists are immutable.
- AttributeList addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo,
- unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg) {
+ LLVM_NODISCARD AttributeList
+ addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg,
+ const Optional<unsigned> &NumElemsArg) {
return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg);
}
diff --git a/contrib/llvm/include/llvm/IR/Attributes.td b/contrib/llvm/include/llvm/IR/Attributes.td
index 39978c41ac72..e786d85d05a8 100644
--- a/contrib/llvm/include/llvm/IR/Attributes.td
+++ b/contrib/llvm/include/llvm/IR/Attributes.td
@@ -176,6 +176,14 @@ def SanitizeMemory : EnumAttr<"sanitize_memory">;
/// HWAddressSanitizer is on.
def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress">;
+/// Speculative Load Hardening is enabled.
+///
+/// Note that this uses the default compatibility (always compatible during
+/// inlining) and a conservative merge strategy where inlining an attributed
+/// body will add the attribute to the caller. This ensures that code carrying
+/// this attribute will always be lowered with hardening enabled.
+def SpeculativeLoadHardening : EnumAttr<"speculative_load_hardening">;
+
/// Argument is swift error.
def SwiftError : EnumAttr<"swifterror">;
@@ -232,6 +240,7 @@ def : MergeRule<"setAND<UnsafeFPMathAttr>">;
def : MergeRule<"setOR<NoImplicitFloatAttr>">;
def : MergeRule<"setOR<NoJumpTablesAttr>">;
def : MergeRule<"setOR<ProfileSampleAccurateAttr>">;
+def : MergeRule<"setOR<SpeculativeLoadHardeningAttr>">;
def : MergeRule<"adjustCallerSSPLevel">;
def : MergeRule<"adjustCallerStackProbes">;
def : MergeRule<"adjustCallerStackProbeSize">;
diff --git a/contrib/llvm/include/llvm/IR/BasicBlock.h b/contrib/llvm/include/llvm/IR/BasicBlock.h
index 1ee19975af75..99eac33f742e 100644
--- a/contrib/llvm/include/llvm/IR/BasicBlock.h
+++ b/contrib/llvm/include/llvm/IR/BasicBlock.h
@@ -38,7 +38,6 @@ class LandingPadInst;
class LLVMContext;
class Module;
class PHINode;
-class TerminatorInst;
class ValueSymbolTable;
/// LLVM Basic Block Representation
@@ -50,12 +49,12 @@ class ValueSymbolTable;
/// represents a label to which a branch can jump.
///
/// A well formed basic block is formed of a list of non-terminating
-/// instructions followed by a single TerminatorInst instruction.
-/// TerminatorInst's may not occur in the middle of basic blocks, and must
-/// terminate the blocks. The BasicBlock class allows malformed basic blocks to
-/// occur because it may be useful in the intermediate stage of constructing or
-/// modifying a program. However, the verifier will ensure that basic blocks
-/// are "well formed".
+/// instructions followed by a single terminator instruction. Terminator
+/// instructions may not occur in the middle of basic blocks, and must terminate
+/// the blocks. The BasicBlock class allows malformed basic blocks to occur
+/// because it may be useful in the intermediate stage of constructing or
+/// modifying a program. However, the verifier will ensure that basic blocks are
+/// "well formed".
class BasicBlock final : public Value, // Basic blocks are data objects also
public ilist_node_with_parent<BasicBlock, Function> {
public:
@@ -120,10 +119,10 @@ public:
/// Returns the terminator instruction if the block is well formed or null
/// if the block is not well formed.
- const TerminatorInst *getTerminator() const LLVM_READONLY;
- TerminatorInst *getTerminator() {
- return const_cast<TerminatorInst *>(
- static_cast<const BasicBlock *>(this)->getTerminator());
+ const Instruction *getTerminator() const LLVM_READONLY;
+ Instruction *getTerminator() {
+ return const_cast<Instruction *>(
+ static_cast<const BasicBlock *>(this)->getTerminator());
}
/// Returns the call instruction calling \@llvm.experimental.deoptimize
@@ -238,6 +237,12 @@ public:
static_cast<const BasicBlock *>(this)->getUniquePredecessor());
}
+ /// Return true if this block has exactly N predecessors.
+ bool hasNPredecessors(unsigned N) const;
+
+ /// Return true if this block has N predecessors or more.
+ bool hasNPredecessorsOrMore(unsigned N) const;
+
/// Return the successor of this block if it has a single successor.
/// Otherwise return a null pointer.
///
diff --git a/contrib/llvm/include/llvm/IR/CFG.h b/contrib/llvm/include/llvm/IR/CFG.h
index f4988e7f1fec..8385c4647e12 100644
--- a/contrib/llvm/include/llvm/IR/CFG.h
+++ b/contrib/llvm/include/llvm/IR/CFG.h
@@ -1,4 +1,4 @@
-//===- CFG.h - Process LLVM structures as graphs ----------------*- C++ -*-===//
+//===- CFG.h ----------------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,15 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file defines specializations of GraphTraits that allow Function and
-// BasicBlock graphs to be treated as proper graphs for generic algorithms.
-//
+/// \file
+///
+/// This file provides various utilities for inspecting and working with the
+/// control flow graph in LLVM IR. This includes generic facilities for
+/// iterating successors and predecessors of basic blocks, the successors of
+/// specific terminator instructions, etc. It also defines specializations of
+/// GraphTraits that allow Function and BasicBlock graphs to be treated as
+/// proper graphs for generic algorithms.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_IR_CFG_H
@@ -44,8 +49,13 @@ class PredIterator : public std::iterator<std::forward_iterator_tag,
inline void advancePastNonTerminators() {
// Loop to ignore non-terminator uses (for example BlockAddresses).
- while (!It.atEnd() && !isa<TerminatorInst>(*It))
+ while (!It.atEnd()) {
+ if (auto *Inst = dyn_cast<Instruction>(*It))
+ if (Inst->isTerminator())
+ break;
+
++It;
+ }
}
public:
@@ -63,7 +73,7 @@ public:
inline reference operator*() const {
assert(!It.atEnd() && "pred_iterator out of range!");
- return cast<TerminatorInst>(*It)->getParent();
+ return cast<Instruction>(*It)->getParent();
}
inline pointer *operator->() const { return &operator*(); }
@@ -107,6 +117,8 @@ inline const_pred_iterator pred_end(const BasicBlock *BB) {
inline bool pred_empty(const BasicBlock *BB) {
return pred_begin(BB) == pred_end(BB);
}
+/// Get the number of predecessors of \p BB. This is a linear time operation.
+/// Use \ref BasicBlock::hasNPredecessors() or hasNPredecessorsOrMore if able.
inline unsigned pred_size(const BasicBlock *BB) {
return std::distance(pred_begin(BB), pred_end(BB));
}
@@ -118,16 +130,144 @@ inline pred_const_range predecessors(const BasicBlock *BB) {
}
//===----------------------------------------------------------------------===//
-// BasicBlock succ_iterator helpers
+// Instruction and BasicBlock succ_iterator helpers
//===----------------------------------------------------------------------===//
-using succ_iterator =
- TerminatorInst::SuccIterator<TerminatorInst *, BasicBlock>;
-using succ_const_iterator =
- TerminatorInst::SuccIterator<const TerminatorInst *, const BasicBlock>;
+template <class InstructionT, class BlockT>
+class SuccIterator
+ : public iterator_facade_base<SuccIterator<InstructionT, BlockT>,
+ std::random_access_iterator_tag, BlockT, int,
+ BlockT *, BlockT *> {
+public:
+ using difference_type = int;
+ using pointer = BlockT *;
+ using reference = BlockT *;
+
+private:
+ InstructionT *Inst;
+ int Idx;
+ using Self = SuccIterator<InstructionT, BlockT>;
+
+ inline bool index_is_valid(int Idx) {
+ // Note that we specially support the index of zero being valid even in the
+ // face of a null instruction.
+ return Idx >= 0 && (Idx == 0 || Idx <= (int)Inst->getNumSuccessors());
+ }
+
+ /// Proxy object to allow write access in operator[]
+ class SuccessorProxy {
+ Self It;
+
+ public:
+ explicit SuccessorProxy(const Self &It) : It(It) {}
+
+ SuccessorProxy(const SuccessorProxy &) = default;
+
+ SuccessorProxy &operator=(SuccessorProxy RHS) {
+ *this = reference(RHS);
+ return *this;
+ }
+
+ SuccessorProxy &operator=(reference RHS) {
+ It.Inst->setSuccessor(It.Idx, RHS);
+ return *this;
+ }
+
+ operator reference() const { return *It; }
+ };
+
+public:
+ // begin iterator
+ explicit inline SuccIterator(InstructionT *Inst) : Inst(Inst), Idx(0) {}
+ // end iterator
+ inline SuccIterator(InstructionT *Inst, bool) : Inst(Inst) {
+ if (Inst)
+ Idx = Inst->getNumSuccessors();
+ else
+ // Inst == NULL happens, if a basic block is not fully constructed and
+ // consequently getTerminator() returns NULL. In this case we construct
+ // a SuccIterator which describes a basic block that has zero
+ // successors.
+ // Defining SuccIterator for incomplete and malformed CFGs is especially
+ // useful for debugging.
+ Idx = 0;
+ }
+
+ /// This is used to interface between code that wants to
+ /// operate on terminator instructions directly.
+ int getSuccessorIndex() const { return Idx; }
+
+ inline bool operator==(const Self &x) const { return Idx == x.Idx; }
+
+ inline BlockT *operator*() const { return Inst->getSuccessor(Idx); }
+
+ // We use the basic block pointer directly for operator->.
+ inline BlockT *operator->() const { return operator*(); }
+
+ inline bool operator<(const Self &RHS) const {
+ assert(Inst == RHS.Inst && "Cannot compare iterators of different blocks!");
+ return Idx < RHS.Idx;
+ }
+
+ int operator-(const Self &RHS) const {
+ assert(Inst == RHS.Inst && "Cannot compare iterators of different blocks!");
+ return Idx - RHS.Idx;
+ }
+
+ inline Self &operator+=(int RHS) {
+ int NewIdx = Idx + RHS;
+ assert(index_is_valid(NewIdx) && "Iterator index out of bound");
+ Idx = NewIdx;
+ return *this;
+ }
+
+ inline Self &operator-=(int RHS) { return operator+=(-RHS); }
+
+ // Specially implement the [] operation using a proxy object to support
+ // assignment.
+ inline SuccessorProxy operator[](int Offset) {
+ Self TmpIt = *this;
+ TmpIt += Offset;
+ return SuccessorProxy(TmpIt);
+ }
+
+ /// Get the source BlockT of this iterator.
+ inline BlockT *getSource() {
+ assert(Inst && "Source not available, if basic block was malformed");
+ return Inst->getParent();
+ }
+};
+
+template <typename T, typename U> struct isPodLike<SuccIterator<T, U>> {
+ static const bool value = isPodLike<T>::value;
+};
+
+using succ_iterator = SuccIterator<Instruction, BasicBlock>;
+using succ_const_iterator = SuccIterator<const Instruction, const BasicBlock>;
using succ_range = iterator_range<succ_iterator>;
using succ_const_range = iterator_range<succ_const_iterator>;
+inline succ_iterator succ_begin(Instruction *I) { return succ_iterator(I); }
+inline succ_const_iterator succ_begin(const Instruction *I) {
+ return succ_const_iterator(I);
+}
+inline succ_iterator succ_end(Instruction *I) { return succ_iterator(I, true); }
+inline succ_const_iterator succ_end(const Instruction *I) {
+ return succ_const_iterator(I, true);
+}
+inline bool succ_empty(const Instruction *I) {
+ return succ_begin(I) == succ_end(I);
+}
+inline unsigned succ_size(const Instruction *I) {
+ return std::distance(succ_begin(I), succ_end(I));
+}
+inline succ_range successors(Instruction *I) {
+ return succ_range(succ_begin(I), succ_end(I));
+}
+inline succ_const_range successors(const Instruction *I) {
+ return succ_const_range(succ_begin(I), succ_end(I));
+}
+
inline succ_iterator succ_begin(BasicBlock *BB) {
return succ_iterator(BB->getTerminator());
}
@@ -153,11 +293,6 @@ inline succ_const_range successors(const BasicBlock *BB) {
return succ_const_range(succ_begin(BB), succ_end(BB));
}
-template <typename T, typename U>
-struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
- static const bool value = isPodLike<T>::value;
-};
-
//===--------------------------------------------------------------------===//
// GraphTraits specializations for basic block graphs (CFGs)
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/IR/CFGDiff.h b/contrib/llvm/include/llvm/IR/CFGDiff.h
new file mode 100644
index 000000000000..da4373f7bce2
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/CFGDiff.h
@@ -0,0 +1,285 @@
+//===- CFGDiff.h - Define a CFG snapshot. -----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines specializations of GraphTraits that allows generic
+// algorithms to see a different snapshot of a CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_CFGDIFF_H
+#define LLVM_IR_CFGDIFF_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Support/CFGUpdate.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+
+// Two booleans are used to define orders in graphs:
+// InverseGraph defines when we need to reverse the whole graph and is as such
+// also equivalent to applying updates in reverse.
+// InverseEdge defines whether we want to change the edges direction. E.g., for
+// a non-inversed graph, the children are naturally the successors when
+// InverseEdge is false and the predecessors when InverseEdge is true.
+
+// We define two base clases that call into GraphDiff, one for successors
+// (CFGSuccessors), where InverseEdge is false, and one for predecessors
+// (CFGPredecessors), where InverseEdge is true.
+// FIXME: Further refactoring may merge the two base classes into a single one
+// templated / parametrized on using succ_iterator/pred_iterator and false/true
+// for the InverseEdge.
+
+// CFGViewSuccessors and CFGViewPredecessors, both can be parametrized to
+// consider the graph inverted or not (i.e. InverseGraph). Successors
+// implicitly has InverseEdge = false and Predecessors implicitly has
+// InverseEdge = true (see calls to GraphDiff methods in there). The GraphTraits
+// instantiations that follow define the value of InverseGraph.
+
+// GraphTraits instantiations:
+// - GraphDiff<BasicBlock *> is equivalent to InverseGraph = false
+// - GraphDiff<Inverse<BasicBlock *>> is equivalent to InverseGraph = true
+// - second pair item is BasicBlock *, then InverseEdge = false (so it inherits
+// from CFGViewSuccessors).
+// - second pair item is Inverse<BasicBlock *>, then InverseEdge = true (so it
+// inherits from CFGViewPredecessors).
+
+// The 4 GraphTraits are as follows:
+// 1. std::pair<const GraphDiff<BasicBlock *> *, BasicBlock *>> :
+// CFGViewSuccessors<false>
+// Regular CFG, children means successors, InverseGraph = false,
+// InverseEdge = false.
+// 2. std::pair<const GraphDiff<Inverse<BasicBlock *>> *, BasicBlock *>> :
+// CFGViewSuccessors<true>
+// Reverse the graph, get successors but reverse-apply updates,
+// InverseGraph = true, InverseEdge = false.
+// 3. std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>> :
+// CFGViewPredecessors<false>
+// Regular CFG, reverse edges, so children mean predecessors,
+// InverseGraph = false, InverseEdge = true.
+// 4. std::pair<const GraphDiff<Inverse<BasicBlock *>> *, Inverse<BasicBlock *>>
+// : CFGViewPredecessors<true>
+// Reverse the graph and the edges, InverseGraph = true, InverseEdge = true.
+
+namespace llvm {
+
+// GraphDiff defines a CFG snapshot: given a set of Update<NodePtr>, provide
+// utilities to skip edges marked as deleted and return a set of edges marked as
+// newly inserted. The current diff treats the CFG as a graph rather than a
+// multigraph. Added edges are pruned to be unique, and deleted edges will
+// remove all existing edges between two blocks.
+template <typename NodePtr, bool InverseGraph = false> class GraphDiff {
+ using UpdateMapType = SmallDenseMap<NodePtr, SmallVector<NodePtr, 2>>;
+ UpdateMapType SuccInsert;
+ UpdateMapType SuccDelete;
+ UpdateMapType PredInsert;
+ UpdateMapType PredDelete;
+ // Using a singleton empty vector for all BasicBlock requests with no
+ // children.
+ SmallVector<NodePtr, 1> Empty;
+
+ void printMap(raw_ostream &OS, const UpdateMapType &M) const {
+ for (auto Pair : M)
+ for (auto Child : Pair.second) {
+ OS << "(";
+ Pair.first->printAsOperand(OS, false);
+ OS << ", ";
+ Child->printAsOperand(OS, false);
+ OS << ") ";
+ }
+ OS << "\n";
+ }
+
+public:
+ GraphDiff() {}
+ GraphDiff(ArrayRef<cfg::Update<NodePtr>> Updates) {
+ SmallVector<cfg::Update<NodePtr>, 4> LegalizedUpdates;
+ cfg::LegalizeUpdates<NodePtr>(Updates, LegalizedUpdates, InverseGraph);
+ for (auto U : LegalizedUpdates) {
+ if (U.getKind() == cfg::UpdateKind::Insert) {
+ SuccInsert[U.getFrom()].push_back(U.getTo());
+ PredInsert[U.getTo()].push_back(U.getFrom());
+ } else {
+ SuccDelete[U.getFrom()].push_back(U.getTo());
+ PredDelete[U.getTo()].push_back(U.getFrom());
+ }
+ }
+ }
+
+ bool ignoreChild(const NodePtr BB, NodePtr EdgeEnd, bool InverseEdge) const {
+ auto &DeleteChildren =
+ (InverseEdge != InverseGraph) ? PredDelete : SuccDelete;
+ auto It = DeleteChildren.find(BB);
+ if (It == DeleteChildren.end())
+ return false;
+ auto &EdgesForBB = It->second;
+ return llvm::find(EdgesForBB, EdgeEnd) != EdgesForBB.end();
+ }
+
+ iterator_range<typename SmallVectorImpl<NodePtr>::const_iterator>
+ getAddedChildren(const NodePtr BB, bool InverseEdge) const {
+ auto &InsertChildren =
+ (InverseEdge != InverseGraph) ? PredInsert : SuccInsert;
+ auto It = InsertChildren.find(BB);
+ if (It == InsertChildren.end())
+ return make_range(Empty.begin(), Empty.end());
+ return make_range(It->second.begin(), It->second.end());
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << "===== GraphDiff: CFG edge changes to create a CFG snapshot. \n"
+ "===== (Note: notion of children/inverse_children depends on "
+ "the direction of edges and the graph.)\n";
+ OS << "Children to insert:\n\t";
+ printMap(OS, SuccInsert);
+ OS << "Children to delete:\n\t";
+ printMap(OS, SuccDelete);
+ OS << "Inverse_children to insert:\n\t";
+ printMap(OS, PredInsert);
+ OS << "Inverse_children to delete:\n\t";
+ printMap(OS, PredDelete);
+ OS << "\n";
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
+};
+
+template <bool InverseGraph = false> struct CFGViewSuccessors {
+ using DataRef = const GraphDiff<BasicBlock *, InverseGraph> *;
+ using NodeRef = std::pair<DataRef, BasicBlock *>;
+
+ using ExistingChildIterator =
+ WrappedPairNodeDataIterator<succ_iterator, NodeRef, DataRef>;
+ struct DeletedEdgesFilter {
+ BasicBlock *BB;
+ DeletedEdgesFilter(BasicBlock *BB) : BB(BB){};
+ bool operator()(NodeRef N) const {
+ return !N.first->ignoreChild(BB, N.second, false);
+ }
+ };
+ using FilterExistingChildrenIterator =
+ filter_iterator<ExistingChildIterator, DeletedEdgesFilter>;
+
+ using vec_iterator = SmallVectorImpl<BasicBlock *>::const_iterator;
+ using AddNewChildrenIterator =
+ WrappedPairNodeDataIterator<vec_iterator, NodeRef, DataRef>;
+ using ChildIteratorType =
+ concat_iterator<NodeRef, FilterExistingChildrenIterator,
+ AddNewChildrenIterator>;
+
+ static ChildIteratorType child_begin(NodeRef N) {
+ auto InsertVec = N.first->getAddedChildren(N.second, false);
+ // filter iterator init:
+ auto firstit = make_filter_range(
+ make_range<ExistingChildIterator>({succ_begin(N.second), N.first},
+ {succ_end(N.second), N.first}),
+ DeletedEdgesFilter(N.second));
+ // new inserts iterator init:
+ auto secondit = make_range<AddNewChildrenIterator>(
+ {InsertVec.begin(), N.first}, {InsertVec.end(), N.first});
+
+ return concat_iterator<NodeRef, FilterExistingChildrenIterator,
+ AddNewChildrenIterator>(firstit, secondit);
+ }
+
+ static ChildIteratorType child_end(NodeRef N) {
+ auto InsertVec = N.first->getAddedChildren(N.second, false);
+ // filter iterator init:
+ auto firstit = make_filter_range(
+ make_range<ExistingChildIterator>({succ_end(N.second), N.first},
+ {succ_end(N.second), N.first}),
+ DeletedEdgesFilter(N.second));
+ // new inserts iterator init:
+ auto secondit = make_range<AddNewChildrenIterator>(
+ {InsertVec.end(), N.first}, {InsertVec.end(), N.first});
+
+ return concat_iterator<NodeRef, FilterExistingChildrenIterator,
+ AddNewChildrenIterator>(firstit, secondit);
+ }
+};
+
+template <bool InverseGraph = false> struct CFGViewPredecessors {
+ using DataRef = const GraphDiff<BasicBlock *, InverseGraph> *;
+ using NodeRef = std::pair<DataRef, BasicBlock *>;
+
+ using ExistingChildIterator =
+ WrappedPairNodeDataIterator<pred_iterator, NodeRef, DataRef>;
+ struct DeletedEdgesFilter {
+ BasicBlock *BB;
+ DeletedEdgesFilter(BasicBlock *BB) : BB(BB){};
+ bool operator()(NodeRef N) const {
+ return !N.first->ignoreChild(BB, N.second, true);
+ }
+ };
+ using FilterExistingChildrenIterator =
+ filter_iterator<ExistingChildIterator, DeletedEdgesFilter>;
+
+ using vec_iterator = SmallVectorImpl<BasicBlock *>::const_iterator;
+ using AddNewChildrenIterator =
+ WrappedPairNodeDataIterator<vec_iterator, NodeRef, DataRef>;
+ using ChildIteratorType =
+ concat_iterator<NodeRef, FilterExistingChildrenIterator,
+ AddNewChildrenIterator>;
+
+ static ChildIteratorType child_begin(NodeRef N) {
+ auto InsertVec = N.first->getAddedChildren(N.second, true);
+ // filter iterator init:
+ auto firstit = make_filter_range(
+ make_range<ExistingChildIterator>({pred_begin(N.second), N.first},
+ {pred_end(N.second), N.first}),
+ DeletedEdgesFilter(N.second));
+ // new inserts iterator init:
+ auto secondit = make_range<AddNewChildrenIterator>(
+ {InsertVec.begin(), N.first}, {InsertVec.end(), N.first});
+
+ return concat_iterator<NodeRef, FilterExistingChildrenIterator,
+ AddNewChildrenIterator>(firstit, secondit);
+ }
+
+ static ChildIteratorType child_end(NodeRef N) {
+ auto InsertVec = N.first->getAddedChildren(N.second, true);
+ // filter iterator init:
+ auto firstit = make_filter_range(
+ make_range<ExistingChildIterator>({pred_end(N.second), N.first},
+ {pred_end(N.second), N.first}),
+ DeletedEdgesFilter(N.second));
+ // new inserts iterator init:
+ auto secondit = make_range<AddNewChildrenIterator>(
+ {InsertVec.end(), N.first}, {InsertVec.end(), N.first});
+
+ return concat_iterator<NodeRef, FilterExistingChildrenIterator,
+ AddNewChildrenIterator>(firstit, secondit);
+ }
+};
+
+template <>
+struct GraphTraits<
+ std::pair<const GraphDiff<BasicBlock *, false> *, BasicBlock *>>
+ : CFGViewSuccessors<false> {};
+template <>
+struct GraphTraits<
+ std::pair<const GraphDiff<BasicBlock *, true> *, BasicBlock *>>
+ : CFGViewSuccessors<true> {};
+template <>
+struct GraphTraits<
+ std::pair<const GraphDiff<BasicBlock *, false> *, Inverse<BasicBlock *>>>
+ : CFGViewPredecessors<false> {};
+template <>
+struct GraphTraits<
+ std::pair<const GraphDiff<BasicBlock *, true> *, Inverse<BasicBlock *>>>
+ : CFGViewPredecessors<true> {};
+} // end namespace llvm
+
+#endif // LLVM_IR_CFGDIFF_H
diff --git a/contrib/llvm/include/llvm/IR/CallSite.h b/contrib/llvm/include/llvm/IR/CallSite.h
index 2162ccb982b0..a3e78049f4be 100644
--- a/contrib/llvm/include/llvm/IR/CallSite.h
+++ b/contrib/llvm/include/llvm/IR/CallSite.h
@@ -656,10 +656,7 @@ public:
private:
IterTy getCallee() const {
- if (isCall()) // Skip Callee
- return cast<CallInst>(getInstruction())->op_end() - 1;
- else // Skip BB, BB, Callee
- return cast<InvokeInst>(getInstruction())->op_end() - 3;
+ return cast<CallBase>(getInstruction())->op_end() - 1;
}
};
diff --git a/contrib/llvm/include/llvm/IR/CallingConv.h b/contrib/llvm/include/llvm/IR/CallingConv.h
index b9c02d7ed424..49c3be960373 100644
--- a/contrib/llvm/include/llvm/IR/CallingConv.h
+++ b/contrib/llvm/include/llvm/IR/CallingConv.h
@@ -220,6 +220,9 @@ namespace CallingConv {
/// shader if tessellation is in use, or otherwise the vertex shader.
AMDGPU_ES = 96,
+ // Calling convention between AArch64 Advanced SIMD functions
+ AArch64_VectorCall = 97,
+
/// The highest possible calling convention ID. Must be some 2^k - 1.
MaxID = 1023
};
diff --git a/contrib/llvm/include/llvm/IR/Constant.h b/contrib/llvm/include/llvm/IR/Constant.h
index 5fdf0ea00f00..98437f8eff1f 100644
--- a/contrib/llvm/include/llvm/IR/Constant.h
+++ b/contrib/llvm/include/llvm/IR/Constant.h
@@ -114,7 +114,8 @@ public:
/// For aggregates (struct/array/vector) return the constant that corresponds
/// to the specified element if possible, or null if not. This can return null
- /// if the element index is a ConstantExpr, or if 'this' is a constant expr.
+ /// if the element index is a ConstantExpr, if 'this' is a constant expr or
+ /// if the constant does not fit into an uint64_t.
Constant *getAggregateElement(unsigned Elt) const;
Constant *getAggregateElement(Constant *Elt) const;
diff --git a/contrib/llvm/include/llvm/IR/Constants.h b/contrib/llvm/include/llvm/IR/Constants.h
index f9d5ebc560c7..afc93cd61d47 100644
--- a/contrib/llvm/include/llvm/IR/Constants.h
+++ b/contrib/llvm/include/llvm/IR/Constants.h
@@ -290,7 +290,11 @@ public:
static Constant *get(Type* Ty, StringRef Str);
static ConstantFP *get(LLVMContext &Context, const APFloat &V);
- static Constant *getNaN(Type *Ty, bool Negative = false, unsigned type = 0);
+ static Constant *getNaN(Type *Ty, bool Negative = false, uint64_t Payload = 0);
+ static Constant *getQNaN(Type *Ty, bool Negative = false,
+ APInt *Payload = nullptr);
+ static Constant *getSNaN(Type *Ty, bool Negative = false,
+ APInt *Payload = nullptr);
static Constant *getNegativeZero(Type *Ty);
static Constant *getInfinity(Type *Ty, bool Negative = false);
@@ -1114,6 +1118,13 @@ public:
static Constant *getSelect(Constant *C, Constant *V1, Constant *V2,
Type *OnlyIfReducedTy = nullptr);
+ /// get - Return a unary operator constant expression,
+ /// folding if possible.
+ ///
+ /// \param OnlyIfReducedTy see \a getWithOperands() docs.
+ static Constant *get(unsigned Opcode, Constant *C1, unsigned Flags = 0,
+ Type *OnlyIfReducedTy = nullptr);
+
/// get - Return a binary or shift operator constant expression,
/// folding if possible.
///
diff --git a/contrib/llvm/include/llvm/IR/DIBuilder.h b/contrib/llvm/include/llvm/IR/DIBuilder.h
index 06c9421ec1d6..443332b1b23c 100644
--- a/contrib/llvm/include/llvm/IR/DIBuilder.h
+++ b/contrib/llvm/include/llvm/IR/DIBuilder.h
@@ -134,8 +134,8 @@ namespace llvm {
/// \param SplitDebugInlining Whether to emit inline debug info.
/// \param DebugInfoForProfiling Whether to emit extra debug info for
/// profile collection.
- /// \param GnuPubnames Whether to emit .debug_gnu_pubnames section instead
- /// of .debug_pubnames.
+ /// \param NameTableKind Whether to emit .debug_gnu_pubnames,
+ /// .debug_pubnames, or no pubnames at all.
DICompileUnit *
createCompileUnit(unsigned Lang, DIFile *File, StringRef Producer,
bool isOptimized, StringRef Flags, unsigned RV,
@@ -144,7 +144,9 @@ namespace llvm {
DICompileUnit::DebugEmissionKind::FullDebug,
uint64_t DWOId = 0, bool SplitDebugInlining = true,
bool DebugInfoForProfiling = false,
- bool GnuPubnames = false);
+ DICompileUnit::DebugNameTableKind NameTableKind =
+ DICompileUnit::DebugNameTableKind::Default,
+ bool RangesBaseAddress = false);
/// Create a file descriptor to hold debugging information for a file.
/// \param Filename File name.
@@ -188,9 +190,11 @@ namespace llvm {
/// type.
/// \param Name Type name.
/// \param SizeInBits Size of the type.
- /// \param Encoding DWARF encoding code, e.g. dwarf::DW_ATE_float.
+ /// \param Encoding DWARF encoding code, e.g., dwarf::DW_ATE_float.
+ /// \param Flags Optional DWARF attributes, e.g., DW_AT_endianity.
DIBasicType *createBasicType(StringRef Name, uint64_t SizeInBits,
- unsigned Encoding);
+ unsigned Encoding,
+ DINode::DIFlags Flags = DINode::FlagZero);
/// Create debugging information entry for a qualified
/// type, e.g. 'const int'.
@@ -498,11 +502,11 @@ namespace llvm {
/// \param Elements Enumeration elements.
/// \param UnderlyingType Underlying type of a C++11/ObjC fixed enum.
/// \param UniqueIdentifier A unique identifier for the enum.
- /// \param IsFixed Boolean flag indicate if this is C++11/ObjC fixed enum.
+ /// \param IsScoped Boolean flag indicate if this is C++11/ObjC 'enum class'.
DICompositeType *createEnumerationType(
DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
- DIType *UnderlyingType, StringRef UniqueIdentifier = "", bool IsFixed = false);
+ DIType *UnderlyingType, StringRef UniqueIdentifier = "", bool IsScoped = false);
/// Create subroutine type.
/// \param ParameterTypes An array of subroutine parameter types. This
@@ -580,14 +584,14 @@ namespace llvm {
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
unsigned LineNo, DIType *Ty, bool isLocalToUnit,
DIExpression *Expr = nullptr, MDNode *Decl = nullptr,
- uint32_t AlignInBits = 0);
+ MDTuple *templateParams = nullptr, uint32_t AlignInBits = 0);
/// Identical to createGlobalVariable
/// except that the resulting DbgNode is temporary and meant to be RAUWed.
DIGlobalVariable *createTempGlobalVariableFwdDecl(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
unsigned LineNo, DIType *Ty, bool isLocalToUnit, MDNode *Decl = nullptr,
- uint32_t AlignInBits = 0);
+ MDTuple *templateParams = nullptr, uint32_t AlignInBits = 0);
/// Create a new descriptor for an auto variable. This is a local variable
/// that is not a subprogram parameter.
@@ -649,29 +653,28 @@ namespace llvm {
/// \param File File where this variable is defined.
/// \param LineNo Line number.
/// \param Ty Function type.
- /// \param isLocalToUnit True if this function is not externally visible.
- /// \param isDefinition True if this is a function definition.
/// \param ScopeLine Set to the beginning of the scope this starts
/// \param Flags e.g. is this function prototyped or not.
/// These flags are used to emit dwarf attributes.
- /// \param isOptimized True if optimization is ON.
+ /// \param SPFlags Additional flags specific to subprograms.
/// \param TParams Function template parameters.
/// \param ThrownTypes Exception types this function may throw.
- DISubprogram *createFunction(
- DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
- unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
- bool isDefinition, unsigned ScopeLine,
- DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
- DITemplateParameterArray TParams = nullptr,
- DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr);
+ DISubprogram *
+ createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName,
+ DIFile *File, unsigned LineNo, DISubroutineType *Ty,
+ unsigned ScopeLine, DINode::DIFlags Flags = DINode::FlagZero,
+ DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
+ DITemplateParameterArray TParams = nullptr,
+ DISubprogram *Decl = nullptr,
+ DITypeArray ThrownTypes = nullptr);
/// Identical to createFunction,
/// except that the resulting DbgNode is meant to be RAUWed.
DISubprogram *createTempFunctionFwdDecl(
DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
- unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
- bool isDefinition, unsigned ScopeLine,
- DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
+ unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
+ DINode::DIFlags Flags = DINode::FlagZero,
+ DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
DITemplateParameterArray TParams = nullptr,
DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr);
@@ -683,10 +686,6 @@ namespace llvm {
/// \param File File where this variable is defined.
/// \param LineNo Line number.
/// \param Ty Function type.
- /// \param isLocalToUnit True if this function is not externally visible..
- /// \param isDefinition True if this is a function definition.
- /// \param Virtuality Attributes describing virtualness. e.g. pure
- /// virtual function.
/// \param VTableIndex Index no of this method in virtual table, or -1u if
/// unrepresentable.
/// \param ThisAdjustment
@@ -695,17 +694,18 @@ namespace llvm {
/// \param VTableHolder Type that holds vtable.
/// \param Flags e.g. is this function prototyped or not.
/// This flags are used to emit dwarf attributes.
- /// \param isOptimized True if optimization is ON.
+ /// \param SPFlags Additional flags specific to subprograms.
/// \param TParams Function template parameters.
/// \param ThrownTypes Exception types this function may throw.
- DISubprogram *createMethod(
- DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
- unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
- bool isDefinition, unsigned Virtuality = 0, unsigned VTableIndex = 0,
- int ThisAdjustment = 0, DIType *VTableHolder = nullptr,
- DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
- DITemplateParameterArray TParams = nullptr,
- DITypeArray ThrownTypes = nullptr);
+ DISubprogram *
+ createMethod(DIScope *Scope, StringRef Name, StringRef LinkageName,
+ DIFile *File, unsigned LineNo, DISubroutineType *Ty,
+ unsigned VTableIndex = 0, int ThisAdjustment = 0,
+ DIType *VTableHolder = nullptr,
+ DINode::DIFlags Flags = DINode::FlagZero,
+ DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
+ DITemplateParameterArray TParams = nullptr,
+ DITypeArray ThrownTypes = nullptr);
/// This creates new descriptor for a namespace with the specified
/// parent scope.
diff --git a/contrib/llvm/include/llvm/IR/DataLayout.h b/contrib/llvm/include/llvm/IR/DataLayout.h
index d796a65e6129..c144d1c13c34 100644
--- a/contrib/llvm/include/llvm/IR/DataLayout.h
+++ b/contrib/llvm/include/llvm/IR/DataLayout.h
@@ -334,6 +334,9 @@ public:
/// the backends/clients are updated.
unsigned getPointerSize(unsigned AS = 0) const;
+ /// Returns the maximum pointer size over all address spaces.
+ unsigned getMaxPointerSize() const;
+
// Index size used for address calculation.
unsigned getIndexSize(unsigned AS) const;
@@ -361,6 +364,11 @@ public:
return getPointerSize(AS) * 8;
}
+ /// Returns the maximum pointer size over all address spaces.
+ unsigned getMaxPointerSizeInBits() const {
+ return getMaxPointerSize() * 8;
+ }
+
/// Size in bits of index used for address calculation in getelementptr.
unsigned getIndexSizeInBits(unsigned AS) const {
return getIndexSize(AS) * 8;
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoFlags.def b/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
index b1f5fac64232..ce117aa452aa 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
+++ b/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
@@ -11,11 +11,20 @@
//
//===----------------------------------------------------------------------===//
-// TODO: Add other DW-based macros.
+#if !(defined HANDLE_DI_FLAG || defined HANDLE_DISP_FLAG)
+#error "Missing macro definition of HANDLE_DI*"
+#endif
+
#ifndef HANDLE_DI_FLAG
-#error "Missing macro definition of HANDLE_DI_FLAG"
+#define HANDLE_DI_FLAG(ID, NAME)
#endif
+#ifndef HANDLE_DISP_FLAG
+#define HANDLE_DISP_FLAG(ID, NAME)
+#endif
+
+// General flags kept in DINode.
+
HANDLE_DI_FLAG(0, Zero) // Use it as zero value.
// For example: void foo(DIFlags Flags = FlagZero).
HANDLE_DI_FLAG(1, Private)
@@ -45,9 +54,12 @@ HANDLE_DI_FLAG((1 << 20), NoReturn)
HANDLE_DI_FLAG((1 << 21), MainSubprogram)
HANDLE_DI_FLAG((1 << 22), TypePassByValue)
HANDLE_DI_FLAG((1 << 23), TypePassByReference)
-HANDLE_DI_FLAG((1 << 24), FixedEnum)
+HANDLE_DI_FLAG((1 << 24), EnumClass)
HANDLE_DI_FLAG((1 << 25), Thunk)
HANDLE_DI_FLAG((1 << 26), Trivial)
+HANDLE_DI_FLAG((1 << 27), BigEndian)
+HANDLE_DI_FLAG((1 << 28), LittleEndian)
+HANDLE_DI_FLAG((1 << 29), AllCallsDescribed)
// To avoid needing a dedicated value for IndirectVirtualBase, we use
// the bitwise or of Virtual and FwdDecl, which does not otherwise
@@ -57,8 +69,29 @@ HANDLE_DI_FLAG((1 << 2) | (1 << 5), IndirectVirtualBase)
#ifdef DI_FLAG_LARGEST_NEEDED
// intended to be used with ADT/BitmaskEnum.h
// NOTE: always must be equal to largest flag, check this when adding new flag
-HANDLE_DI_FLAG((1 << 26), Largest)
+HANDLE_DI_FLAG((1 << 29), Largest)
#undef DI_FLAG_LARGEST_NEEDED
#endif
+// Subprogram-specific flags kept in DISubprogram.
+
+// Use this as a zero/initialization value.
+// For example: void foo(DISPFlags Flags = SPFlagZero).
+HANDLE_DISP_FLAG(0, Zero)
+// Virtuality is a two-bit enum field in the LSB of the word.
+// Values should match DW_VIRTUALITY_*.
+HANDLE_DISP_FLAG(1u, Virtual)
+HANDLE_DISP_FLAG(2u, PureVirtual)
+HANDLE_DISP_FLAG((1u << 2), LocalToUnit)
+HANDLE_DISP_FLAG((1u << 3), Definition)
+HANDLE_DISP_FLAG((1u << 4), Optimized)
+
+#ifdef DISP_FLAG_LARGEST_NEEDED
+// Intended to be used with ADT/BitmaskEnum.h.
+// NOTE: Always must be equal to largest flag, check this when adding new flags.
+HANDLE_DISP_FLAG((1 << 4), Largest)
+#undef DISP_FLAG_LARGEST_NEEDED
+#endif
+
#undef HANDLE_DI_FLAG
+#undef HANDLE_DISP_FLAG
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
index 820746851104..a461d1bd4fe8 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -713,6 +713,8 @@ public:
bool isTypePassByReference() const {
return getFlags() & FlagTypePassByReference;
}
+ bool isBigEndian() const { return getFlags() & FlagBigEndian; }
+ bool isLittleEndian() const { return getFlags() & FlagLittleEndian; }
static bool classof(const Metadata *MD) {
switch (MD->getMetadataID()) {
@@ -739,40 +741,43 @@ class DIBasicType : public DIType {
DIBasicType(LLVMContext &C, StorageType Storage, unsigned Tag,
uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding,
- ArrayRef<Metadata *> Ops)
+ DIFlags Flags, ArrayRef<Metadata *> Ops)
: DIType(C, DIBasicTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0,
- FlagZero, Ops),
+ Flags, Ops),
Encoding(Encoding) {}
~DIBasicType() = default;
static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag,
StringRef Name, uint64_t SizeInBits,
uint32_t AlignInBits, unsigned Encoding,
- StorageType Storage, bool ShouldCreate = true) {
+ DIFlags Flags, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(Context, Tag, getCanonicalMDString(Context, Name),
- SizeInBits, AlignInBits, Encoding, Storage, ShouldCreate);
+ SizeInBits, AlignInBits, Encoding, Flags, Storage,
+ ShouldCreate);
}
static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag,
MDString *Name, uint64_t SizeInBits,
uint32_t AlignInBits, unsigned Encoding,
- StorageType Storage, bool ShouldCreate = true);
+ DIFlags Flags, StorageType Storage,
+ bool ShouldCreate = true);
TempDIBasicType cloneImpl() const {
return getTemporary(getContext(), getTag(), getName(), getSizeInBits(),
- getAlignInBits(), getEncoding());
+ getAlignInBits(), getEncoding(), getFlags());
}
public:
DEFINE_MDNODE_GET(DIBasicType, (unsigned Tag, StringRef Name),
- (Tag, Name, 0, 0, 0))
+ (Tag, Name, 0, 0, 0, FlagZero))
DEFINE_MDNODE_GET(DIBasicType,
(unsigned Tag, StringRef Name, uint64_t SizeInBits,
- uint32_t AlignInBits, unsigned Encoding),
- (Tag, Name, SizeInBits, AlignInBits, Encoding))
+ uint32_t AlignInBits, unsigned Encoding, DIFlags Flags),
+ (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags))
DEFINE_MDNODE_GET(DIBasicType,
(unsigned Tag, MDString *Name, uint64_t SizeInBits,
- uint32_t AlignInBits, unsigned Encoding),
- (Tag, Name, SizeInBits, AlignInBits, Encoding))
+ uint32_t AlignInBits, unsigned Encoding, DIFlags Flags),
+ (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags))
TempDIBasicType clone() const { return cloneImpl(); }
@@ -1162,11 +1167,21 @@ public:
NoDebug = 0,
FullDebug,
LineTablesOnly,
- LastEmissionKind = LineTablesOnly
+ DebugDirectivesOnly,
+ LastEmissionKind = DebugDirectivesOnly
+ };
+
+ enum class DebugNameTableKind : unsigned {
+ Default = 0,
+ GNU = 1,
+ None = 2,
+ LastDebugNameTableKind = None
};
static Optional<DebugEmissionKind> getEmissionKind(StringRef Str);
static const char *emissionKindString(DebugEmissionKind EK);
+ static Optional<DebugNameTableKind> getNameTableKind(StringRef Str);
+ static const char *nameTableKindString(DebugNameTableKind PK);
private:
unsigned SourceLanguage;
@@ -1176,17 +1191,20 @@ private:
uint64_t DWOId;
bool SplitDebugInlining;
bool DebugInfoForProfiling;
- bool GnuPubnames;
+ unsigned NameTableKind;
+ bool RangesBaseAddress;
DICompileUnit(LLVMContext &C, StorageType Storage, unsigned SourceLanguage,
bool IsOptimized, unsigned RuntimeVersion,
unsigned EmissionKind, uint64_t DWOId, bool SplitDebugInlining,
- bool DebugInfoForProfiling, bool GnuPubnames, ArrayRef<Metadata *> Ops)
+ bool DebugInfoForProfiling, unsigned NameTableKind,
+ bool RangesBaseAddress, ArrayRef<Metadata *> Ops)
: DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind),
DWOId(DWOId), SplitDebugInlining(SplitDebugInlining),
- DebugInfoForProfiling(DebugInfoForProfiling), GnuPubnames(GnuPubnames) {
+ DebugInfoForProfiling(DebugInfoForProfiling),
+ NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) {
assert(Storage != Uniqued);
}
~DICompileUnit() = default;
@@ -1200,14 +1218,16 @@ private:
DIGlobalVariableExpressionArray GlobalVariables,
DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling,
- bool GnuPubnames, StorageType Storage, bool ShouldCreate = true) {
- return getImpl(
- Context, SourceLanguage, File, getCanonicalMDString(Context, Producer),
- IsOptimized, getCanonicalMDString(Context, Flags), RuntimeVersion,
- getCanonicalMDString(Context, SplitDebugFilename), EmissionKind,
- EnumTypes.get(), RetainedTypes.get(), GlobalVariables.get(),
- ImportedEntities.get(), Macros.get(), DWOId, SplitDebugInlining,
- DebugInfoForProfiling, GnuPubnames, Storage, ShouldCreate);
+ unsigned NameTableKind, bool RangesBaseAddress, StorageType Storage,
+ bool ShouldCreate = true) {
+ return getImpl(Context, SourceLanguage, File,
+ getCanonicalMDString(Context, Producer), IsOptimized,
+ getCanonicalMDString(Context, Flags), RuntimeVersion,
+ getCanonicalMDString(Context, SplitDebugFilename),
+ EmissionKind, EnumTypes.get(), RetainedTypes.get(),
+ GlobalVariables.get(), ImportedEntities.get(), Macros.get(),
+ DWOId, SplitDebugInlining, DebugInfoForProfiling,
+ NameTableKind, RangesBaseAddress, Storage, ShouldCreate);
}
static DICompileUnit *
getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
@@ -1216,17 +1236,17 @@ private:
unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
Metadata *GlobalVariables, Metadata *ImportedEntities,
Metadata *Macros, uint64_t DWOId, bool SplitDebugInlining,
- bool DebugInfoForProfiling, bool GnuPubnames, StorageType Storage,
- bool ShouldCreate = true);
+ bool DebugInfoForProfiling, unsigned NameTableKind,
+ bool RangesBaseAddress, StorageType Storage, bool ShouldCreate = true);
TempDICompileUnit cloneImpl() const {
- return getTemporary(getContext(), getSourceLanguage(), getFile(),
- getProducer(), isOptimized(), getFlags(),
- getRuntimeVersion(), getSplitDebugFilename(),
- getEmissionKind(), getEnumTypes(), getRetainedTypes(),
- getGlobalVariables(), getImportedEntities(),
- getMacros(), DWOId, getSplitDebugInlining(),
- getDebugInfoForProfiling(), getGnuPubnames());
+ return getTemporary(
+ getContext(), getSourceLanguage(), getFile(), getProducer(),
+ isOptimized(), getFlags(), getRuntimeVersion(), getSplitDebugFilename(),
+ getEmissionKind(), getEnumTypes(), getRetainedTypes(),
+ getGlobalVariables(), getImportedEntities(), getMacros(), DWOId,
+ getSplitDebugInlining(), getDebugInfoForProfiling(), getNameTableKind(),
+ getRangesBaseAddress());
}
public:
@@ -1242,11 +1262,11 @@ public:
DIGlobalVariableExpressionArray GlobalVariables,
DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling,
- bool GnuPubnames),
+ DebugNameTableKind NameTableKind, bool RangesBaseAddress),
(SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes,
GlobalVariables, ImportedEntities, Macros, DWOId, SplitDebugInlining,
- DebugInfoForProfiling, GnuPubnames))
+ DebugInfoForProfiling, (unsigned)NameTableKind, RangesBaseAddress))
DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
DICompileUnit,
(unsigned SourceLanguage, Metadata *File, MDString *Producer,
@@ -1254,11 +1274,12 @@ public:
MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes,
Metadata *RetainedTypes, Metadata *GlobalVariables,
Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId,
- bool SplitDebugInlining, bool DebugInfoForProfiling, bool GnuPubnames),
+ bool SplitDebugInlining, bool DebugInfoForProfiling,
+ unsigned NameTableKind, bool RangesBaseAddress),
(SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes,
GlobalVariables, ImportedEntities, Macros, DWOId, SplitDebugInlining,
- DebugInfoForProfiling, GnuPubnames))
+ DebugInfoForProfiling, NameTableKind, RangesBaseAddress))
TempDICompileUnit clone() const { return cloneImpl(); }
@@ -1268,11 +1289,21 @@ public:
DebugEmissionKind getEmissionKind() const {
return (DebugEmissionKind)EmissionKind;
}
+ bool isDebugDirectivesOnly() const {
+ return EmissionKind == DebugDirectivesOnly;
+ }
bool getDebugInfoForProfiling() const { return DebugInfoForProfiling; }
- bool getGnuPubnames() const { return GnuPubnames; }
- StringRef getProducer() const { return getStringOperand(1); }
- StringRef getFlags() const { return getStringOperand(2); }
- StringRef getSplitDebugFilename() const { return getStringOperand(3); }
+ DebugNameTableKind getNameTableKind() const {
+ return (DebugNameTableKind)NameTableKind;
+ }
+ bool getRangesBaseAddress() const {
+ return RangesBaseAddress; }
+ StringRef getProducer() const {
+ return getStringOperand(1); }
+ StringRef getFlags() const {
+ return getStringOperand(2); }
+ StringRef getSplitDebugFilename() const {
+ return getStringOperand(3); }
DICompositeTypeArray getEnumTypes() const {
return cast_or_null<MDTuple>(getRawEnumTypes());
}
@@ -1372,19 +1403,20 @@ class DILocation : public MDNode {
friend class MDNode;
DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
- unsigned Column, ArrayRef<Metadata *> MDs);
+ unsigned Column, ArrayRef<Metadata *> MDs, bool ImplicitCode);
~DILocation() { dropAllReferences(); }
static DILocation *getImpl(LLVMContext &Context, unsigned Line,
unsigned Column, Metadata *Scope,
- Metadata *InlinedAt, StorageType Storage,
- bool ShouldCreate = true);
+ Metadata *InlinedAt, bool ImplicitCode,
+ StorageType Storage, bool ShouldCreate = true);
static DILocation *getImpl(LLVMContext &Context, unsigned Line,
unsigned Column, DILocalScope *Scope,
- DILocation *InlinedAt, StorageType Storage,
- bool ShouldCreate = true) {
+ DILocation *InlinedAt, bool ImplicitCode,
+ StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, Line, Column, static_cast<Metadata *>(Scope),
- static_cast<Metadata *>(InlinedAt), Storage, ShouldCreate);
+ static_cast<Metadata *>(InlinedAt), ImplicitCode, Storage,
+ ShouldCreate);
}
/// With a given unsigned int \p U, use up to 13 bits to represent it.
@@ -1398,6 +1430,9 @@ class DILocation : public MDNode {
/// Reverse transformation as getPrefixEncodingFromUnsigned.
static unsigned getUnsignedFromPrefixEncoding(unsigned U) {
+ if (U & 1)
+ return 0;
+ U >>= 1;
return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f);
}
@@ -1413,7 +1448,15 @@ class DILocation : public MDNode {
// Get the raw scope/inlinedAt since it is possible to invoke this on
// a DILocation containing temporary metadata.
return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
- getRawInlinedAt());
+ getRawInlinedAt(), isImplicitCode());
+ }
+
+ static unsigned encodeComponent(unsigned C) {
+ return (C == 0) ? 1U : (getPrefixEncodingFromUnsigned(C) << 1);
+ }
+
+ static unsigned encodingBits(unsigned C) {
+ return (C == 0) ? 1 : (C > 0x1f ? 14 : 7);
}
public:
@@ -1422,12 +1465,13 @@ public:
DEFINE_MDNODE_GET(DILocation,
(unsigned Line, unsigned Column, Metadata *Scope,
- Metadata *InlinedAt = nullptr),
- (Line, Column, Scope, InlinedAt))
+ Metadata *InlinedAt = nullptr, bool ImplicitCode = false),
+ (Line, Column, Scope, InlinedAt, ImplicitCode))
DEFINE_MDNODE_GET(DILocation,
(unsigned Line, unsigned Column, DILocalScope *Scope,
- DILocation *InlinedAt = nullptr),
- (Line, Column, Scope, InlinedAt))
+ DILocation *InlinedAt = nullptr,
+ bool ImplicitCode = false),
+ (Line, Column, Scope, InlinedAt, ImplicitCode))
/// Return a (temporary) clone of this.
TempDILocation clone() const { return cloneImpl(); }
@@ -1440,6 +1484,15 @@ public:
return cast_or_null<DILocation>(getRawInlinedAt());
}
+ /// Check if the location corresponds to an implicit code.
+ /// When the ImplicitCode flag is true, it means that the Instruction
+ /// with this DILocation has been added by the front-end but it hasn't been
+ /// written explicitly by the user (e.g. cleanup stuff in C++ put on a closing
+ /// bracket). It's useful for code coverage to not show a counter on "empty"
+ /// lines.
+ bool isImplicitCode() const { return ImplicitCode; }
+ void setImplicitCode(bool ImplicitCode) { this->ImplicitCode = ImplicitCode; }
+
DIFile *getFile() const { return getScope()->getFile(); }
StringRef getFilename() const { return getScope()->getFilename(); }
StringRef getDirectory() const { return getScope()->getDirectory(); }
@@ -1455,19 +1508,6 @@ public:
return getScope();
}
- /// Check whether this can be discriminated from another location.
- ///
- /// Check \c this can be discriminated from \c RHS in a linetable entry.
- /// Scope and inlined-at chains are not recorded in the linetable, so they
- /// cannot be used to distinguish basic blocks.
- bool canDiscriminate(const DILocation &RHS) const {
- return getLine() != RHS.getLine() ||
- getColumn() != RHS.getColumn() ||
- getDiscriminator() != RHS.getDiscriminator() ||
- getFilename() != RHS.getFilename() ||
- getDirectory() != RHS.getDirectory();
- }
-
/// Get the DWARF discriminator.
///
/// DWARF discriminators distinguish identical file locations between
@@ -1489,20 +1529,35 @@ public:
/// order. If the lowest bit is 1, the current component is empty, and the
/// next component will start in the next bit. Otherwise, the current
/// component is non-empty, and its content starts in the next bit. The
- /// length of each components is either 5 bit or 12 bit: if the 7th bit
+ /// value of each components is either 5 bit or 12 bit: if the 7th bit
/// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
/// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to
- /// represent the component.
+ /// represent the component. Thus, the number of bits used for a component
+ /// is either 0 (if it and all the next components are empty); 1 - if it is
+ /// empty; 7 - if its value is up to and including 0x1f (lsb and msb are both
+ /// 0); or 14, if its value is up to and including 0x1ff. Note that the last
+ /// component is also capped at 0x1ff, even in the case when both first
+ /// components are 0, and we'd technically have 29 bits available.
+ ///
+ /// For precise control over the data being encoded in the discriminator,
+ /// use encodeDiscriminator/decodeDiscriminator.
+ ///
+ /// Use {get|set}BaseDiscriminator and cloneWithDuplicationFactor after reading
+ /// their documentation, as their behavior has side-effects.
inline unsigned getDiscriminator() const;
/// Returns a new DILocation with updated \p Discriminator.
inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
- /// Returns a new DILocation with updated base discriminator \p BD.
- inline const DILocation *setBaseDiscriminator(unsigned BD) const;
+ /// Returns a new DILocation with updated base discriminator \p BD. Only the
+ /// base discriminator is set in the new DILocation, the other encoded values
+ /// are elided.
+ /// If the discriminator cannot be encoded, the function returns None.
+ inline Optional<const DILocation *> setBaseDiscriminator(unsigned BD) const;
- /// Returns the duplication factor stored in the discriminator.
+ /// Returns the duplication factor stored in the discriminator, or 1 if no
+ /// duplication factor (or 0) is encoded.
inline unsigned getDuplicationFactor() const;
/// Returns the copy identifier stored in the discriminator.
@@ -1511,11 +1566,11 @@ public:
/// Returns the base discriminator stored in the discriminator.
inline unsigned getBaseDiscriminator() const;
- /// Returns a new DILocation with duplication factor \p DF encoded in the
- /// discriminator.
- inline const DILocation *cloneWithDuplicationFactor(unsigned DF) const;
-
- enum { NoGeneratedLocation = false, WithGeneratedLocation = true };
+ /// Returns a new DILocation with duplication factor \p DF * current
+ /// duplication factor encoded in the discriminator. The current duplication
+ /// factor is as defined by getDuplicationFactor().
+ /// Returns None if encoding failed.
+ inline Optional<const DILocation *> cloneWithDuplicationFactor(unsigned DF) const;
/// When two instructions are combined into a single instruction we also
/// need to combine the original locations into a single location.
@@ -1531,25 +1586,36 @@ public:
///
/// \p GenerateLocation: Whether the merged location can be generated when
/// \p LocA and \p LocB differ.
- static const DILocation *
- getMergedLocation(const DILocation *LocA, const DILocation *LocB,
- bool GenerateLocation = NoGeneratedLocation);
+ static const DILocation *getMergedLocation(const DILocation *LocA,
+ const DILocation *LocB);
/// Returns the base discriminator for a given encoded discriminator \p D.
static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) {
- if ((D & 1) == 0)
- return getUnsignedFromPrefixEncoding(D >> 1);
- else
- return 0;
+ return getUnsignedFromPrefixEncoding(D);
}
- /// Returns the duplication factor for a given encoded discriminator \p D.
+ /// Raw encoding of the discriminator. APIs such as setBaseDiscriminator or
+ /// cloneWithDuplicationFactor have certain side-effects. This API, in
+ /// conjunction with cloneWithDiscriminator, may be used to encode precisely
+ /// the values provided. \p BD: base discriminator \p DF: duplication factor
+ /// \p CI: copy index
+ /// The return is None if the values cannot be encoded in 32 bits - for
+ /// example, values for BD or DF larger than 12 bits. Otherwise, the return
+ /// is the encoded value.
+ static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI);
+
+ /// Raw decoder for values in an encoded discriminator D.
+ static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
+ unsigned &CI);
+
+ /// Returns the duplication factor for a given encoded discriminator \p D, or
+ /// 1 if no value or 0 is encoded.
static unsigned getDuplicationFactorFromDiscriminator(unsigned D) {
D = getNextComponentInDiscriminator(D);
- if (D == 0 || (D & 1))
+ unsigned Ret = getUnsignedFromPrefixEncoding(D);
+ if (Ret == 0)
return 1;
- else
- return getUnsignedFromPrefixEncoding(D >> 1);
+ return Ret;
}
/// Returns the copy identifier for a given encoded discriminator \p D.
@@ -1588,102 +1654,118 @@ class DISubprogram : public DILocalScope {
/// negative.
int ThisAdjustment;
- // Virtuality can only assume three values, so we can pack
- // in 2 bits (none/pure/pure_virtual).
- unsigned Virtuality : 2;
+public:
+ /// Debug info subprogram flags.
+ enum DISPFlags : uint32_t {
+#define HANDLE_DISP_FLAG(ID, NAME) SPFlag##NAME = ID,
+#define DISP_FLAG_LARGEST_NEEDED
+#include "llvm/IR/DebugInfoFlags.def"
+ SPFlagNonvirtual = SPFlagZero,
+ SPFlagVirtuality = SPFlagVirtual | SPFlagPureVirtual,
+ LLVM_MARK_AS_BITMASK_ENUM(SPFlagLargest)
+ };
- // These are boolean flags so one bit is enough.
- // MSVC starts a new container field every time the base
- // type changes so we can't use 'bool' to ensure these bits
- // are packed.
- unsigned IsLocalToUnit : 1;
- unsigned IsDefinition : 1;
- unsigned IsOptimized : 1;
+ static DISPFlags getFlag(StringRef Flag);
+ static StringRef getFlagString(DISPFlags Flag);
- unsigned Padding : 3;
+ /// Split up a flags bitfield for easier printing.
+ ///
+ /// Split \c Flags into \c SplitFlags, a vector of its components. Returns
+ /// any remaining (unrecognized) bits.
+ static DISPFlags splitFlags(DISPFlags Flags,
+ SmallVectorImpl<DISPFlags> &SplitFlags);
+
+ // Helper for converting old bitfields to new flags word.
+ static DISPFlags toSPFlags(bool IsLocalToUnit, bool IsDefinition,
+ bool IsOptimized,
+ unsigned Virtuality = SPFlagNonvirtual) {
+ // We're assuming virtuality is the low-order field.
+ static_assert(
+ int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
+ int(SPFlagPureVirtual) == int(dwarf::DW_VIRTUALITY_pure_virtual),
+ "Virtuality constant mismatch");
+ return static_cast<DISPFlags>(
+ (Virtuality & SPFlagVirtuality) |
+ (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
+ (IsDefinition ? SPFlagDefinition : SPFlagZero) |
+ (IsOptimized ? SPFlagOptimized : SPFlagZero));
+ }
+private:
DIFlags Flags;
+ DISPFlags SPFlags;
DISubprogram(LLVMContext &C, StorageType Storage, unsigned Line,
- unsigned ScopeLine, unsigned Virtuality, unsigned VirtualIndex,
- int ThisAdjustment, DIFlags Flags, bool IsLocalToUnit,
- bool IsDefinition, bool IsOptimized, ArrayRef<Metadata *> Ops)
+ unsigned ScopeLine, unsigned VirtualIndex, int ThisAdjustment,
+ DIFlags Flags, DISPFlags SPFlags, ArrayRef<Metadata *> Ops)
: DILocalScope(C, DISubprogramKind, Storage, dwarf::DW_TAG_subprogram,
Ops),
Line(Line), ScopeLine(ScopeLine), VirtualIndex(VirtualIndex),
- ThisAdjustment(ThisAdjustment), Virtuality(Virtuality),
- IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition),
- IsOptimized(IsOptimized), Flags(Flags) {
+ ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags) {
static_assert(dwarf::DW_VIRTUALITY_max < 4, "Virtuality out of range");
- assert(Virtuality < 4 && "Virtuality out of range");
}
~DISubprogram() = default;
static DISubprogram *
getImpl(LLVMContext &Context, DIScopeRef Scope, StringRef Name,
StringRef LinkageName, DIFile *File, unsigned Line,
- DISubroutineType *Type, bool IsLocalToUnit, bool IsDefinition,
- unsigned ScopeLine, DITypeRef ContainingType, unsigned Virtuality,
+ DISubroutineType *Type, unsigned ScopeLine, DITypeRef ContainingType,
unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
- bool IsOptimized, DICompileUnit *Unit,
+ DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
DINodeArray RetainedNodes, DITypeArray ThrownTypes,
StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, LinkageName), File, Line, Type,
- IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
- Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
- Unit, TemplateParams.get(), Declaration, RetainedNodes.get(),
- ThrownTypes.get(), Storage, ShouldCreate);
+ ScopeLine, ContainingType, VirtualIndex, ThisAdjustment,
+ Flags, SPFlags, Unit, TemplateParams.get(), Declaration,
+ RetainedNodes.get(), ThrownTypes.get(), Storage,
+ ShouldCreate);
}
- static DISubprogram *
- getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
- MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
- bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
- Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
- int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
- Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes,
- Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate = true);
+ static DISubprogram *getImpl(LLVMContext &Context, Metadata *Scope,
+ MDString *Name, MDString *LinkageName,
+ Metadata *File, unsigned Line, Metadata *Type,
+ unsigned ScopeLine, Metadata *ContainingType,
+ unsigned VirtualIndex, int ThisAdjustment,
+ DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
+ Metadata *TemplateParams, Metadata *Declaration,
+ Metadata *RetainedNodes, Metadata *ThrownTypes,
+ StorageType Storage, bool ShouldCreate = true);
TempDISubprogram cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
- getFile(), getLine(), getType(), isLocalToUnit(),
- isDefinition(), getScopeLine(), getContainingType(),
- getVirtuality(), getVirtualIndex(), getThisAdjustment(),
- getFlags(), isOptimized(), getUnit(),
- getTemplateParams(), getDeclaration(), getRetainedNodes(),
- getThrownTypes());
+ getFile(), getLine(), getType(), getScopeLine(),
+ getContainingType(), getVirtualIndex(),
+ getThisAdjustment(), getFlags(), getSPFlags(),
+ getUnit(), getTemplateParams(), getDeclaration(),
+ getRetainedNodes(), getThrownTypes());
}
public:
- DEFINE_MDNODE_GET(DISubprogram,
- (DIScopeRef Scope, StringRef Name, StringRef LinkageName,
- DIFile *File, unsigned Line, DISubroutineType *Type,
- bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
- DITypeRef ContainingType, unsigned Virtuality,
- unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
- bool IsOptimized, DICompileUnit *Unit,
- DITemplateParameterArray TemplateParams = nullptr,
- DISubprogram *Declaration = nullptr,
- DINodeArray RetainedNodes = nullptr,
- DITypeArray ThrownTypes = nullptr),
- (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
- IsDefinition, ScopeLine, ContainingType, Virtuality,
- VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit,
- TemplateParams, Declaration, RetainedNodes, ThrownTypes))
+ DEFINE_MDNODE_GET(
+ DISubprogram,
+ (DIScopeRef Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+ unsigned Line, DISubroutineType *Type, unsigned ScopeLine,
+ DITypeRef ContainingType, unsigned VirtualIndex, int ThisAdjustment,
+ DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit,
+ DITemplateParameterArray TemplateParams = nullptr,
+ DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr,
+ DITypeArray ThrownTypes = nullptr),
+ (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
+ VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
+ Declaration, RetainedNodes, ThrownTypes))
+
DEFINE_MDNODE_GET(
DISubprogram,
(Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
- unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
- unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality,
- unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
- bool IsOptimized, Metadata *Unit, Metadata *TemplateParams = nullptr,
- Metadata *Declaration = nullptr, Metadata *RetainedNodes = nullptr,
- Metadata *ThrownTypes = nullptr),
- (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
- ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment,
- Flags, IsOptimized, Unit, TemplateParams, Declaration, RetainedNodes,
- ThrownTypes))
+ unsigned Line, Metadata *Type, unsigned ScopeLine,
+ Metadata *ContainingType, unsigned VirtualIndex, int ThisAdjustment,
+ DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
+ Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr,
+ Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr),
+ (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
+ VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
+ Declaration, RetainedNodes, ThrownTypes))
TempDISubprogram clone() const { return cloneImpl(); }
@@ -1696,14 +1778,15 @@ public:
public:
unsigned getLine() const { return Line; }
- unsigned getVirtuality() const { return Virtuality; }
+ unsigned getVirtuality() const { return getSPFlags() & SPFlagVirtuality; }
unsigned getVirtualIndex() const { return VirtualIndex; }
int getThisAdjustment() const { return ThisAdjustment; }
unsigned getScopeLine() const { return ScopeLine; }
DIFlags getFlags() const { return Flags; }
- bool isLocalToUnit() const { return IsLocalToUnit; }
- bool isDefinition() const { return IsDefinition; }
- bool isOptimized() const { return IsOptimized; }
+ DISPFlags getSPFlags() const { return SPFlags; }
+ bool isLocalToUnit() const { return getSPFlags() & SPFlagLocalToUnit; }
+ bool isDefinition() const { return getSPFlags() & SPFlagDefinition; }
+ bool isOptimized() const { return getSPFlags() & SPFlagOptimized; }
bool isArtificial() const { return getFlags() & FlagArtificial; }
bool isPrivate() const {
@@ -1717,6 +1800,9 @@ public:
}
bool isExplicit() const { return getFlags() & FlagExplicit; }
bool isPrototyped() const { return getFlags() & FlagPrototyped; }
+ bool areAllCallsDescribed() const {
+ return getFlags() & FlagAllCallsDescribed;
+ }
bool isMainSubprogram() const { return getFlags() & FlagMainSubprogram; }
/// Check if this is reference-qualified.
@@ -1953,28 +2039,24 @@ unsigned DILocation::getCopyIdentifier() const {
return getCopyIdentifierFromDiscriminator(getDiscriminator());
}
-const DILocation *DILocation::setBaseDiscriminator(unsigned D) const {
+Optional<const DILocation *> DILocation::setBaseDiscriminator(unsigned D) const {
if (D == 0)
return this;
- else
- return cloneWithDiscriminator(getPrefixEncodingFromUnsigned(D) << 1);
+ if (D > 0xfff)
+ return None;
+ return cloneWithDiscriminator(encodeComponent(D));
}
-const DILocation *DILocation::cloneWithDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *> DILocation::cloneWithDuplicationFactor(unsigned DF) const {
DF *= getDuplicationFactor();
if (DF <= 1)
return this;
unsigned BD = getBaseDiscriminator();
- unsigned CI = getCopyIdentifier() << (DF > 0x1f ? 14 : 7);
- unsigned D = CI | (getPrefixEncodingFromUnsigned(DF) << 1);
-
- if (BD == 0)
- D = (D << 1) | 1;
- else
- D = (D << (BD > 0x1f ? 14 : 7)) | (getPrefixEncodingFromUnsigned(BD) << 1);
-
- return cloneWithDiscriminator(D);
+ unsigned CI = getCopyIdentifier();
+ if (Optional<unsigned> D = encodeDiscriminator(BD, DF, CI))
+ return cloneWithDiscriminator(*D);
+ return None;
}
class DINamespace : public DIScope {
@@ -2515,30 +2597,30 @@ class DIGlobalVariable : public DIVariable {
IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {}
~DIGlobalVariable() = default;
- static DIGlobalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
- StringRef Name, StringRef LinkageName,
- DIFile *File, unsigned Line, DITypeRef Type,
- bool IsLocalToUnit, bool IsDefinition,
- DIDerivedType *StaticDataMemberDeclaration,
- uint32_t AlignInBits, StorageType Storage,
- bool ShouldCreate = true) {
+ static DIGlobalVariable *
+ getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
+ StringRef LinkageName, DIFile *File, unsigned Line, DITypeRef Type,
+ bool IsLocalToUnit, bool IsDefinition,
+ DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
+ uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, LinkageName), File, Line, Type,
IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration,
- AlignInBits, Storage, ShouldCreate);
+ cast_or_null<Metadata>(TemplateParams), AlignInBits, Storage,
+ ShouldCreate);
}
static DIGlobalVariable *
getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition,
- Metadata *StaticDataMemberDeclaration, uint32_t AlignInBits,
- StorageType Storage, bool ShouldCreate = true);
+ Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
+ uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true);
TempDIGlobalVariable cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
getFile(), getLine(), getType(), isLocalToUnit(),
isDefinition(), getStaticDataMemberDeclaration(),
- getAlignInBits());
+ getTemplateParams(), getAlignInBits());
}
public:
@@ -2547,17 +2629,19 @@ public:
DIFile *File, unsigned Line, DITypeRef Type,
bool IsLocalToUnit, bool IsDefinition,
DIDerivedType *StaticDataMemberDeclaration,
- uint32_t AlignInBits),
+ MDTuple *TemplateParams, uint32_t AlignInBits),
(Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
- IsDefinition, StaticDataMemberDeclaration, AlignInBits))
+ IsDefinition, StaticDataMemberDeclaration, TemplateParams,
+ AlignInBits))
DEFINE_MDNODE_GET(DIGlobalVariable,
(Metadata * Scope, MDString *Name, MDString *LinkageName,
Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition,
Metadata *StaticDataMemberDeclaration,
- uint32_t AlignInBits),
+ Metadata *TemplateParams, uint32_t AlignInBits),
(Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
- IsDefinition, StaticDataMemberDeclaration, AlignInBits))
+ IsDefinition, StaticDataMemberDeclaration, TemplateParams,
+ AlignInBits))
TempDIGlobalVariable clone() const { return cloneImpl(); }
@@ -2571,6 +2655,8 @@ public:
MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); }
Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(6); }
+ Metadata *getRawTemplateParams() const { return getOperand(7); }
+ MDTuple *getTemplateParams() const { return getOperandAs<MDTuple>(7); }
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DIGlobalVariableKind;
diff --git a/contrib/llvm/include/llvm/IR/DebugLoc.h b/contrib/llvm/include/llvm/IR/DebugLoc.h
index 9f619ffc5c4d..4f0d7f51b5f9 100644
--- a/contrib/llvm/include/llvm/IR/DebugLoc.h
+++ b/contrib/llvm/include/llvm/IR/DebugLoc.h
@@ -78,7 +78,8 @@ namespace llvm {
///
/// FIXME: Remove this. Users should use DILocation::get().
static DebugLoc get(unsigned Line, unsigned Col, const MDNode *Scope,
- const MDNode *InlinedAt = nullptr);
+ const MDNode *InlinedAt = nullptr,
+ bool ImplicitCode = false);
enum { ReplaceLastInlinedAt = true };
/// Rebuild the entire inlined-at chain for this instruction so that the top of
@@ -112,6 +113,10 @@ namespace llvm {
/// Return \c this as a bar \a MDNode.
MDNode *getAsMDNode() const { return Loc; }
+ /// Check if the DebugLoc corresponds to an implicit code.
+ bool isImplicitCode() const;
+ void setImplicitCode(bool ImplicitCode);
+
bool operator==(const DebugLoc &DL) const { return Loc == DL.Loc; }
bool operator!=(const DebugLoc &DL) const { return Loc != DL.Loc; }
diff --git a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
index 81d4ae84bf01..3a55a7dca7f4 100644
--- a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -101,6 +101,7 @@ private:
/// Severity gives the severity of the diagnostic.
const DiagnosticSeverity Severity;
+ virtual void anchor();
public:
DiagnosticInfo(/* DiagnosticKind */ int Kind, DiagnosticSeverity Severity)
: Kind(Kind), Severity(Severity) {}
@@ -210,6 +211,7 @@ public:
};
class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit {
+ virtual void anchor() override;
public:
DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize,
DiagnosticSeverity Severity = DS_Warning,
@@ -340,7 +342,7 @@ private:
};
class DiagnosticLocation {
- StringRef Filename;
+ DIFile *File = nullptr;
unsigned Line = 0;
unsigned Column = 0;
@@ -349,14 +351,18 @@ public:
DiagnosticLocation(const DebugLoc &DL);
DiagnosticLocation(const DISubprogram *SP);
- bool isValid() const { return !Filename.empty(); }
- StringRef getFilename() const { return Filename; }
+ bool isValid() const { return File; }
+ /// Return the full path to the file.
+ std::string getAbsolutePath() const;
+ /// Return the file name relative to the compilation directory.
+ StringRef getRelativePath() const;
unsigned getLine() const { return Line; }
unsigned getColumn() const { return Column; }
};
/// Common features for diagnostics with an associated location.
class DiagnosticInfoWithLocationBase : public DiagnosticInfo {
+ virtual void anchor() override;
public:
/// \p Fn is the function where the diagnostic is being emitted. \p Loc is
/// the location information to use in the diagnostic.
@@ -375,9 +381,13 @@ public:
const std::string getLocationStr() const;
/// Return location information for this diagnostic in three parts:
- /// the source file name, line number and column.
- void getLocation(StringRef *Filename, unsigned *Line, unsigned *Column) const;
+ /// the relative source file path, line number and column.
+ void getLocation(StringRef &RelativePath, unsigned &Line,
+ unsigned &Column) const;
+ /// Return the absolute path tot the file.
+ std::string getAbsolutePath() const;
+
const Function &getFunction() const { return Fn; }
DiagnosticLocation getLocation() const { return Loc; }
@@ -414,6 +424,7 @@ public:
Argument(StringRef Key, const Value *V);
Argument(StringRef Key, const Type *T);
Argument(StringRef Key, StringRef S);
+ Argument(StringRef Key, const char *S) : Argument(Key, StringRef(S)) {};
Argument(StringRef Key, int N);
Argument(StringRef Key, float N);
Argument(StringRef Key, long N);
@@ -590,6 +601,7 @@ operator<<(RemarkT &R,
/// Common features for diagnostics dealing with optimization remarks
/// that are used by IR passes.
class DiagnosticInfoIROptimization : public DiagnosticInfoOptimizationBase {
+ virtual void anchor() override;
public:
/// \p PassName is the name of the pass emitting this diagnostic. \p
/// RemarkName is a textual identifier for the remark (single-word,
@@ -810,6 +822,7 @@ private:
/// Diagnostic information for optimization analysis remarks related to
/// floating-point non-commutativity.
class OptimizationRemarkAnalysisFPCommute : public OptimizationRemarkAnalysis {
+ virtual void anchor();
public:
/// \p PassName is the name of the pass emitting this diagnostic. If this name
/// matches the regular expression given in -Rpass-analysis=, then the
@@ -851,6 +864,7 @@ private:
/// Diagnostic information for optimization analysis remarks related to
/// pointer aliasing.
class OptimizationRemarkAnalysisAliasing : public OptimizationRemarkAnalysis {
+ virtual void anchor();
public:
/// \p PassName is the name of the pass emitting this diagnostic. If this name
/// matches the regular expression given in -Rpass-analysis=, then the
diff --git a/contrib/llvm/include/llvm/IR/DomTreeUpdater.h b/contrib/llvm/include/llvm/IR/DomTreeUpdater.h
index 81ba670ac0f5..e5bb092d21ca 100644
--- a/contrib/llvm/include/llvm/IR/DomTreeUpdater.h
+++ b/contrib/llvm/include/llvm/IR/DomTreeUpdater.h
@@ -159,11 +159,9 @@ public:
void callbackDeleteBB(BasicBlock *DelBB,
std::function<void(BasicBlock *)> Callback);
- /// Recalculate all available trees.
- /// Under Lazy Strategy, available trees will only be recalculated if there
- /// are pending updates or there is BasicBlock awaiting deletion. Returns true
- /// if at least one tree is recalculated.
- bool recalculate(Function &F);
+ /// Recalculate all available trees and flush all BasicBlocks
+ /// awaiting deletion immediately.
+ void recalculate(Function &F);
/// Flush DomTree updates and return DomTree.
/// It also flush out of date updates applied by all available trees
diff --git a/contrib/llvm/include/llvm/IR/Dominators.h b/contrib/llvm/include/llvm/IR/Dominators.h
index f9e992b0ef0c..f7da47d07663 100644
--- a/contrib/llvm/include/llvm/IR/Dominators.h
+++ b/contrib/llvm/include/llvm/IR/Dominators.h
@@ -37,15 +37,18 @@ extern template class DomTreeNodeBase<BasicBlock>;
extern template class DominatorTreeBase<BasicBlock, false>; // DomTree
extern template class DominatorTreeBase<BasicBlock, true>; // PostDomTree
+extern template class cfg::Update<BasicBlock *>;
+
namespace DomTreeBuilder {
using BBDomTree = DomTreeBase<BasicBlock>;
using BBPostDomTree = PostDomTreeBase<BasicBlock>;
-extern template struct Update<BasicBlock *>;
-
-using BBUpdates = ArrayRef<Update<BasicBlock *>>;
+using BBUpdates = ArrayRef<llvm::cfg::Update<BasicBlock *>>;
extern template void Calculate<BBDomTree>(BBDomTree &DT);
+extern template void CalculateWithUpdates<BBDomTree>(BBDomTree &DT,
+ BBUpdates U);
+
extern template void Calculate<BBPostDomTree>(BBPostDomTree &DT);
extern template void InsertEdge<BBDomTree>(BBDomTree &DT, BasicBlock *From,
@@ -145,6 +148,9 @@ class DominatorTree : public DominatorTreeBase<BasicBlock, false> {
DominatorTree() = default;
explicit DominatorTree(Function &F) { recalculate(F); }
+ explicit DominatorTree(DominatorTree &DT, DomTreeBuilder::BBUpdates U) {
+ recalculate(*DT.Parent, U);
+ }
/// Handle invalidation explicitly.
bool invalidate(Function &F, const PreservedAnalyses &PA,
@@ -276,94 +282,6 @@ public:
void print(raw_ostream &OS, const Module *M = nullptr) const override;
};
-
-//===-------------------------------------
-/// Class to defer updates to a DominatorTree.
-///
-/// Definition: Applying updates to every edge insertion and deletion is
-/// expensive and not necessary. When one needs the DominatorTree for analysis
-/// they can request a flush() to perform a larger batch update. This has the
-/// advantage of the DominatorTree inspecting the set of updates to find
-/// duplicates or unnecessary subtree updates.
-///
-/// The scope of DeferredDominance operates at a Function level.
-///
-/// It is not necessary for the user to scrub the updates for duplicates or
-/// updates that point to the same block (Delete, BB_A, BB_A). Performance
-/// can be gained if the caller attempts to batch updates before submitting
-/// to applyUpdates(ArrayRef) in cases where duplicate edge requests will
-/// occur.
-///
-/// It is required for the state of the LLVM IR to be applied *before*
-/// submitting updates. The update routines must analyze the current state
-/// between a pair of (From, To) basic blocks to determine if the update
-/// needs to be queued.
-/// Example (good):
-/// TerminatorInstructionBB->removeFromParent();
-/// DDT->deleteEdge(BB, Successor);
-/// Example (bad):
-/// DDT->deleteEdge(BB, Successor);
-/// TerminatorInstructionBB->removeFromParent();
-class DeferredDominance {
-public:
- DeferredDominance(DominatorTree &DT_) : DT(DT_) {}
-
- /// Queues multiple updates and discards duplicates.
- void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates);
-
- /// Helper method for a single edge insertion. It's almost always
- /// better to batch updates and call applyUpdates to quickly remove duplicate
- /// edges. This is best used when there is only a single insertion needed to
- /// update Dominators.
- void insertEdge(BasicBlock *From, BasicBlock *To);
-
- /// Helper method for a single edge deletion. It's almost always better
- /// to batch updates and call applyUpdates to quickly remove duplicate edges.
- /// This is best used when there is only a single deletion needed to update
- /// Dominators.
- void deleteEdge(BasicBlock *From, BasicBlock *To);
-
- /// Delays the deletion of a basic block until a flush() event.
- void deleteBB(BasicBlock *DelBB);
-
- /// Returns true if DelBB is awaiting deletion at a flush() event.
- bool pendingDeletedBB(BasicBlock *DelBB);
-
- /// Returns true if pending DT updates are queued for a flush() event.
- bool pending();
-
- /// Flushes all pending updates and block deletions. Returns a
- /// correct DominatorTree reference to be used by the caller for analysis.
- DominatorTree &flush();
-
- /// Drops all internal state and forces a (slow) recalculation of the
- /// DominatorTree based on the current state of the LLVM IR in F. This should
- /// only be used in corner cases such as the Entry block of F being deleted.
- void recalculate(Function &F);
-
- /// Debug method to help view the state of pending updates.
- LLVM_DUMP_METHOD void dump() const;
-
-private:
- DominatorTree &DT;
- SmallVector<DominatorTree::UpdateType, 16> PendUpdates;
- SmallPtrSet<BasicBlock *, 8> DeletedBBs;
-
- /// Apply an update (Kind, From, To) to the internal queued updates. The
- /// update is only added when determined to be necessary. Checks for
- /// self-domination, unnecessary updates, duplicate requests, and balanced
- /// pairs of requests are all performed. Returns true if the update is
- /// queued and false if it is discarded.
- bool applyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From,
- BasicBlock *To);
-
- /// Performs all pending basic block deletions. We have to defer the deletion
- /// of these blocks until after the DominatorTree updates are applied. The
- /// internal workings of the DominatorTree code expect every update's From
- /// and To blocks to exist and to be a member of the same Function.
- bool flushDelBB();
-};
-
} // end namespace llvm
#endif // LLVM_IR_DOMINATORS_H
diff --git a/contrib/llvm/include/llvm/IR/Function.h b/contrib/llvm/include/llvm/IR/Function.h
index 02e3ecc8e27f..630f47e8bb57 100644
--- a/contrib/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm/include/llvm/IR/Function.h
@@ -120,7 +120,7 @@ private:
/// function is automatically inserted into the end of the function list for
/// the module.
///
- Function(FunctionType *Ty, LinkageTypes Linkage,
+ Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
const Twine &N = "", Module *M = nullptr);
public:
@@ -134,17 +134,31 @@ public:
const Function &getFunction() const { return *this; }
static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
+ unsigned AddrSpace, const Twine &N = "",
+ Module *M = nullptr) {
+ return new Function(Ty, Linkage, AddrSpace, N, M);
+ }
+
+ // TODO: remove this once all users have been updated to pass an AddrSpace
+ static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
const Twine &N = "", Module *M = nullptr) {
- return new Function(Ty, Linkage, N, M);
+ return new Function(Ty, Linkage, static_cast<unsigned>(-1), N, M);
}
+ /// Creates a new function and attaches it to a module.
+ ///
+ /// Places the function in the program address space as specified
+ /// by the module's data layout.
+ static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
+ const Twine &N, Module &M);
+
// Provide fast operand accessors.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
/// Returns the number of non-debug IR instructions in this function.
/// This is equivalent to the sum of the sizes of each basic block contained
/// within this function.
- unsigned getInstructionCount();
+ unsigned getInstructionCount() const;
/// Returns the FunctionType for me.
FunctionType *getFunctionType() const {
diff --git a/contrib/llvm/include/llvm/IR/GlobalValue.h b/contrib/llvm/include/llvm/IR/GlobalValue.h
index 9d9f4f65a6b5..c07d4051c803 100644
--- a/contrib/llvm/include/llvm/IR/GlobalValue.h
+++ b/contrib/llvm/include/llvm/IR/GlobalValue.h
@@ -189,6 +189,7 @@ public:
GlobalValue(const GlobalValue &) = delete;
unsigned getAlignment() const;
+ unsigned getAddressSpace() const;
enum class UnnamedAddr {
None,
diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h
index 70641ba25d2e..fac2ff46c453 100644
--- a/contrib/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm/include/llvm/IR/IRBuilder.h
@@ -651,7 +651,7 @@ public:
ArrayRef<Use> DeoptArgs, ArrayRef<Value *> GCArgs,
const Twine &Name = "");
- // Conveninence function for the common case when CallArgs are filled in using
+ // Convenience function for the common case when CallArgs are filled in using
// makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be .get()'ed to
// get the Value *.
InvokeInst *
@@ -675,32 +675,44 @@ public:
Type *ResultType,
const Twine &Name = "");
+ /// Create a call to intrinsic \p ID with 1 operand which is mangled on its
+ /// type.
+ CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
+ Instruction *FMFSource = nullptr,
+ const Twine &Name = "");
+
/// Create a call to intrinsic \p ID with 2 operands which is mangled on the
/// first type.
- CallInst *CreateBinaryIntrinsic(Intrinsic::ID ID,
- Value *LHS, Value *RHS,
+ CallInst *CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS,
+ Instruction *FMFSource = nullptr,
const Twine &Name = "");
- /// Create a call to intrinsic \p ID with no operands.
- CallInst *CreateIntrinsic(Intrinsic::ID ID,
- Instruction *FMFSource = nullptr,
- const Twine &Name = "");
-
- /// Create a call to intrinsic \p ID with 1 or more operands assuming the
- /// intrinsic and all operands have the same type. If \p FMFSource is
- /// provided, copy fast-math-flags from that instruction to the intrinsic.
- CallInst *CreateIntrinsic(Intrinsic::ID ID, ArrayRef<Value *> Args,
+ /// Create a call to intrinsic \p ID with \p args, mangled using \p Types. If
+ /// \p FMFSource is provided, copy fast-math-flags from that instruction to
+ /// the intrinsic.
+ CallInst *CreateIntrinsic(Intrinsic::ID ID, ArrayRef<Type *> Types,
+ ArrayRef<Value *> Args,
Instruction *FMFSource = nullptr,
const Twine &Name = "");
/// Create call to the minnum intrinsic.
CallInst *CreateMinNum(Value *LHS, Value *RHS, const Twine &Name = "") {
- return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, Name);
+ return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, nullptr, Name);
}
/// Create call to the maxnum intrinsic.
CallInst *CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name = "") {
- return CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS, Name);
+ return CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS, nullptr, Name);
+ }
+
+ /// Create call to the minimum intrinsic.
+ CallInst *CreateMinimum(Value *LHS, Value *RHS, const Twine &Name = "") {
+ return CreateBinaryIntrinsic(Intrinsic::minimum, LHS, RHS, nullptr, Name);
+ }
+
+ /// Create call to the maximum intrinsic.
+ CallInst *CreateMaximum(Value *LHS, Value *RHS, const Twine &Name = "") {
+ return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name);
}
private:
@@ -877,19 +889,59 @@ public:
}
/// Create an invoke instruction.
- InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
- BasicBlock *UnwindDest,
+ InvokeInst *CreateInvoke(FunctionType *Ty, Value *Callee,
+ BasicBlock *NormalDest, BasicBlock *UnwindDest,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> OpBundles,
+ const Twine &Name = "") {
+ return Insert(
+ InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args, OpBundles),
+ Name);
+ }
+ InvokeInst *CreateInvoke(FunctionType *Ty, Value *Callee,
+ BasicBlock *NormalDest, BasicBlock *UnwindDest,
ArrayRef<Value *> Args = None,
const Twine &Name = "") {
- return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
+ return Insert(InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args),
Name);
}
+
+ InvokeInst *CreateInvoke(Function *Callee, BasicBlock *NormalDest,
+ BasicBlock *UnwindDest, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> OpBundles,
+ const Twine &Name = "") {
+ return CreateInvoke(Callee->getFunctionType(), Callee, NormalDest,
+ UnwindDest, Args, OpBundles, Name);
+ }
+
+ InvokeInst *CreateInvoke(Function *Callee, BasicBlock *NormalDest,
+ BasicBlock *UnwindDest,
+ ArrayRef<Value *> Args = None,
+ const Twine &Name = "") {
+ return CreateInvoke(Callee->getFunctionType(), Callee, NormalDest,
+ UnwindDest, Args, Name);
+ }
+
+ // Deprecated [opaque pointer types]
InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
BasicBlock *UnwindDest, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> OpBundles,
const Twine &Name = "") {
- return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
- OpBundles), Name);
+ return CreateInvoke(
+ cast<FunctionType>(
+ cast<PointerType>(Callee->getType())->getElementType()),
+ Callee, NormalDest, UnwindDest, Args, OpBundles, Name);
+ }
+
+ // Deprecated [opaque pointer types]
+ InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+ BasicBlock *UnwindDest,
+ ArrayRef<Value *> Args = None,
+ const Twine &Name = "") {
+ return CreateInvoke(
+ cast<FunctionType>(
+ cast<PointerType>(Callee->getType())->getElementType()),
+ Callee, NormalDest, UnwindDest, Args, Name);
}
ResumeInst *CreateResume(Value *Exn) {
@@ -1300,22 +1352,35 @@ public:
return Insert(new AllocaInst(Ty, DL.getAllocaAddrSpace(), ArraySize), Name);
}
- /// Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
+ /// Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of
/// converting the string to 'bool' for the isVolatile parameter.
- LoadInst *CreateLoad(Value *Ptr, const char *Name) {
- return Insert(new LoadInst(Ptr), Name);
- }
-
- LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
- return Insert(new LoadInst(Ptr), Name);
+ LoadInst *CreateLoad(Type *Ty, Value *Ptr, const char *Name) {
+ return Insert(new LoadInst(Ty, Ptr), Name);
}
LoadInst *CreateLoad(Type *Ty, Value *Ptr, const Twine &Name = "") {
return Insert(new LoadInst(Ty, Ptr), Name);
}
+ LoadInst *CreateLoad(Type *Ty, Value *Ptr, bool isVolatile,
+ const Twine &Name = "") {
+ return Insert(new LoadInst(Ty, Ptr, Twine(), isVolatile), Name);
+ }
+
+ // Deprecated [opaque pointer types]
+ LoadInst *CreateLoad(Value *Ptr, const char *Name) {
+ return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name);
+ }
+
+ // Deprecated [opaque pointer types]
+ LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
+ return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name);
+ }
+
+ // Deprecated [opaque pointer types]
LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const Twine &Name = "") {
- return Insert(new LoadInst(Ptr, nullptr, isVolatile), Name);
+ return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, isVolatile,
+ Name);
}
StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
@@ -1325,24 +1390,43 @@ public:
/// Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")'
/// correctly, instead of converting the string to 'bool' for the isVolatile
/// parameter.
- LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
- LoadInst *LI = CreateLoad(Ptr, Name);
+ LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
+ const char *Name) {
+ LoadInst *LI = CreateLoad(Ty, Ptr, Name);
LI->setAlignment(Align);
return LI;
}
- LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align,
+ LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
const Twine &Name = "") {
- LoadInst *LI = CreateLoad(Ptr, Name);
+ LoadInst *LI = CreateLoad(Ty, Ptr, Name);
LI->setAlignment(Align);
return LI;
}
- LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile,
- const Twine &Name = "") {
- LoadInst *LI = CreateLoad(Ptr, isVolatile, Name);
+ LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
+ bool isVolatile, const Twine &Name = "") {
+ LoadInst *LI = CreateLoad(Ty, Ptr, isVolatile, Name);
LI->setAlignment(Align);
return LI;
}
+ // Deprecated [opaque pointer types]
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
+ return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+ Align, Name);
+ }
+ // Deprecated [opaque pointer types]
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align,
+ const Twine &Name = "") {
+ return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+ Align, Name);
+ }
+ // Deprecated [opaque pointer types]
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile,
+ const Twine &Name = "") {
+ return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+ Align, isVolatile, Name);
+ }
+
StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
bool isVolatile = false) {
StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
@@ -1479,50 +1563,69 @@ public:
return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name);
}
- Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
+ Value *CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0,
+ const Twine &Name = "") {
Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
if (auto *PC = dyn_cast<Constant>(Ptr))
- return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idx), Name);
+ return Insert(Folder.CreateGetElementPtr(Ty, PC, Idx), Name);
- return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idx), Name);
+ return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
}
- Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
+ Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
+ return CreateConstGEP1_64(nullptr, Ptr, Idx0, Name);
+ }
+
+ Value *CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0,
const Twine &Name = "") {
Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
if (auto *PC = dyn_cast<Constant>(Ptr))
- return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idx), Name);
+ return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idx), Name);
- return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idx), Name);
+ return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
}
- Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
- const Twine &Name = "") {
+ Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
+ const Twine &Name = "") {
+ return CreateConstInBoundsGEP1_64(nullptr, Ptr, Idx0, Name);
+ }
+
+ Value *CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+ const Twine &Name = "") {
Value *Idxs[] = {
ConstantInt::get(Type::getInt64Ty(Context), Idx0),
ConstantInt::get(Type::getInt64Ty(Context), Idx1)
};
if (auto *PC = dyn_cast<Constant>(Ptr))
- return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idxs), Name);
+ return Insert(Folder.CreateGetElementPtr(Ty, PC, Idxs), Name);
- return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idxs), Name);
+ return Insert(GetElementPtrInst::Create(Ty, Ptr, Idxs), Name);
}
- Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
- const Twine &Name = "") {
+ Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+ const Twine &Name = "") {
+ return CreateConstGEP2_64(nullptr, Ptr, Idx0, Idx1, Name);
+ }
+
+ Value *CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0,
+ uint64_t Idx1, const Twine &Name = "") {
Value *Idxs[] = {
ConstantInt::get(Type::getInt64Ty(Context), Idx0),
ConstantInt::get(Type::getInt64Ty(Context), Idx1)
};
if (auto *PC = dyn_cast<Constant>(Ptr))
- return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idxs),
- Name);
+ return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idxs), Name);
- return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idxs), Name);
+ return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name);
+ }
+
+ Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+ const Twine &Name = "") {
+ return CreateConstInBoundsGEP2_64(nullptr, Ptr, Idx0, Idx1, Name);
}
Value *CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx,
@@ -1868,15 +1971,8 @@ public:
return Insert(PHINode::Create(Ty, NumReservedValues), Name);
}
- CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
- const Twine &Name = "", MDNode *FPMathTag = nullptr) {
- auto *PTy = cast<PointerType>(Callee->getType());
- auto *FTy = cast<FunctionType>(PTy->getElementType());
- return CreateCall(FTy, Callee, Args, Name, FPMathTag);
- }
-
CallInst *CreateCall(FunctionType *FTy, Value *Callee,
- ArrayRef<Value *> Args, const Twine &Name = "",
+ ArrayRef<Value *> Args = None, const Twine &Name = "",
MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
if (isa<FPMathOperator>(CI))
@@ -1884,20 +1980,44 @@ public:
return Insert(CI, Name);
}
- CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
+ CallInst *CreateCall(FunctionType *FTy, Value *Callee, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> OpBundles,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
- CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
+ CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles);
if (isa<FPMathOperator>(CI))
CI = cast<CallInst>(setFPAttrs(CI, FPMathTag, FMF));
return Insert(CI, Name);
}
- CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
+ CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args = None,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag);
}
+ CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> OpBundles,
+ const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+ return CreateCall(Callee->getFunctionType(), Callee, Args, OpBundles, Name,
+ FPMathTag);
+ }
+
+ // Deprecated [opaque pointer types]
+ CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
+ const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+ return CreateCall(
+ cast<FunctionType>(Callee->getType()->getPointerElementType()), Callee,
+ Args, Name, FPMathTag);
+ }
+
+ // Deprecated [opaque pointer types]
+ CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> OpBundles,
+ const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+ return CreateCall(
+ cast<FunctionType>(Callee->getType()->getPointerElementType()), Callee,
+ Args, OpBundles, Name, FPMathTag);
+ }
+
Value *CreateSelect(Value *C, Value *True, Value *False,
const Twine &Name = "", Instruction *MDFrom = nullptr) {
if (auto *CC = dyn_cast<Constant>(C))
@@ -2114,11 +2234,12 @@ public:
private:
/// Helper function that creates an assume intrinsic call that
/// represents an alignment assumption on the provided Ptr, Mask, Type
- /// and Offset.
+ /// and Offset. It may be sometimes useful to do some other logic
+ /// based on this alignment check, thus it can be stored into 'TheCheck'.
CallInst *CreateAlignmentAssumptionHelper(const DataLayout &DL,
Value *PtrValue, Value *Mask,
- Type *IntPtrTy,
- Value *OffsetValue) {
+ Type *IntPtrTy, Value *OffsetValue,
+ Value **TheCheck) {
Value *PtrIntValue = CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
if (OffsetValue) {
@@ -2137,6 +2258,9 @@ private:
Value *Zero = ConstantInt::get(IntPtrTy, 0);
Value *MaskedPtr = CreateAnd(PtrIntValue, Mask, "maskedptr");
Value *InvCond = CreateICmpEQ(MaskedPtr, Zero, "maskcond");
+ if (TheCheck)
+ *TheCheck = InvCond;
+
return CreateAssumption(InvCond);
}
@@ -2147,9 +2271,13 @@ public:
/// An optional offset can be provided, and if it is provided, the offset
/// must be subtracted from the provided pointer to get the pointer with the
/// specified alignment.
+ ///
+ /// It may be sometimes useful to do some other logic
+ /// based on this alignment check, thus it can be stored into 'TheCheck'.
CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
unsigned Alignment,
- Value *OffsetValue = nullptr) {
+ Value *OffsetValue = nullptr,
+ Value **TheCheck = nullptr) {
assert(isa<PointerType>(PtrValue->getType()) &&
"trying to create an alignment assumption on a non-pointer?");
auto *PtrTy = cast<PointerType>(PtrValue->getType());
@@ -2157,7 +2285,7 @@ public:
Value *Mask = ConstantInt::get(IntPtrTy, Alignment > 0 ? Alignment - 1 : 0);
return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
- OffsetValue);
+ OffsetValue, TheCheck);
}
/// Create an assume intrinsic call that represents an alignment
@@ -2167,11 +2295,15 @@ public:
/// must be subtracted from the provided pointer to get the pointer with the
/// specified alignment.
///
+ /// It may be sometimes useful to do some other logic
+ /// based on this alignment check, thus it can be stored into 'TheCheck'.
+ ///
/// This overload handles the condition where the Alignment is dependent
/// on an existing value rather than a static value.
CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
Value *Alignment,
- Value *OffsetValue = nullptr) {
+ Value *OffsetValue = nullptr,
+ Value **TheCheck = nullptr) {
assert(isa<PointerType>(PtrValue->getType()) &&
"trying to create an alignment assumption on a non-pointer?");
auto *PtrTy = cast<PointerType>(PtrValue->getType());
@@ -2189,7 +2321,7 @@ public:
ConstantInt::get(IntPtrTy, 0), "mask");
return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
- OffsetValue);
+ OffsetValue, TheCheck);
}
};
diff --git a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
index e4ac5d4d88a3..75f80567dbd5 100644
--- a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -58,6 +58,22 @@ void printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name);
/// Return true if a pass is for IR printing.
bool isIRPrintingPass(Pass *P);
+/// isFunctionInPrintList - returns true if a function should be printed via
+// debugging options like -print-after-all/-print-before-all.
+// Tells if the function IR should be printed by PrinterPass.
+extern bool isFunctionInPrintList(StringRef FunctionName);
+
+/// forcePrintModuleIR - returns true if IR printing passes should
+// be printing module IR (even for local-pass printers e.g. function-pass)
+// to provide more context, as enabled by debugging option -print-module-scope
+// Tells if IR printer should be printing module IR
+extern bool forcePrintModuleIR();
+
+extern bool shouldPrintBeforePass();
+extern bool shouldPrintBeforePass(StringRef);
+extern bool shouldPrintAfterPass();
+extern bool shouldPrintAfterPass(StringRef);
+
/// Pass for printing a Module as LLVM's text IR assembly.
///
/// Note: This pass is for use with the new pass manager. Use the create...Pass
diff --git a/contrib/llvm/include/llvm/IR/InstVisitor.h b/contrib/llvm/include/llvm/IR/InstVisitor.h
index 65074025a083..c5b4c6f71d7d 100644
--- a/contrib/llvm/include/llvm/IR/InstVisitor.h
+++ b/contrib/llvm/include/llvm/IR/InstVisitor.h
@@ -166,15 +166,6 @@ public:
// Specific Instruction type classes... note that all of the casts are
// necessary because we use the instruction classes as opaque types...
//
- RetTy visitReturnInst(ReturnInst &I) { DELEGATE(TerminatorInst);}
- RetTy visitBranchInst(BranchInst &I) { DELEGATE(TerminatorInst);}
- RetTy visitSwitchInst(SwitchInst &I) { DELEGATE(TerminatorInst);}
- RetTy visitIndirectBrInst(IndirectBrInst &I) { DELEGATE(TerminatorInst);}
- RetTy visitResumeInst(ResumeInst &I) { DELEGATE(TerminatorInst);}
- RetTy visitUnreachableInst(UnreachableInst &I) { DELEGATE(TerminatorInst);}
- RetTy visitCleanupReturnInst(CleanupReturnInst &I) { DELEGATE(TerminatorInst);}
- RetTy visitCatchReturnInst(CatchReturnInst &I) { DELEGATE(TerminatorInst); }
- RetTy visitCatchSwitchInst(CatchSwitchInst &I) { DELEGATE(TerminatorInst);}
RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);}
RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);}
RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(UnaryInstruction);}
@@ -211,10 +202,12 @@ public:
RetTy visitCatchPadInst(CatchPadInst &I) { DELEGATE(FuncletPadInst); }
// Handle the special instrinsic instruction classes.
- RetTy visitDbgDeclareInst(DbgDeclareInst &I) { DELEGATE(DbgInfoIntrinsic);}
- RetTy visitDbgValueInst(DbgValueInst &I) { DELEGATE(DbgInfoIntrinsic);}
+ RetTy visitDbgDeclareInst(DbgDeclareInst &I) { DELEGATE(DbgVariableIntrinsic);}
+ RetTy visitDbgValueInst(DbgValueInst &I) { DELEGATE(DbgVariableIntrinsic);}
+ RetTy visitDbgVariableIntrinsic(DbgVariableIntrinsic &I)
+ { DELEGATE(DbgInfoIntrinsic);}
RetTy visitDbgLabelInst(DbgLabelInst &I) { DELEGATE(DbgInfoIntrinsic);}
- RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { DELEGATE(IntrinsicInst); }
+ RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); }
RetTy visitMemSetInst(MemSetInst &I) { DELEGATE(MemIntrinsic); }
RetTy visitMemCpyInst(MemCpyInst &I) { DELEGATE(MemTransferInst); }
RetTy visitMemMoveInst(MemMoveInst &I) { DELEGATE(MemTransferInst); }
@@ -234,27 +227,64 @@ public:
return static_cast<SubClass*>(this)->visitCallSite(&I);
}
+ // While terminators don't have a distinct type modeling them, we support
+ // intercepting them with dedicated a visitor callback.
+ RetTy visitReturnInst(ReturnInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitBranchInst(BranchInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitSwitchInst(SwitchInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitIndirectBrInst(IndirectBrInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitResumeInst(ResumeInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitUnreachableInst(UnreachableInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitCleanupReturnInst(CleanupReturnInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitCatchReturnInst(CatchReturnInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitCatchSwitchInst(CatchSwitchInst &I) {
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+ }
+ RetTy visitTerminator(Instruction &I) { DELEGATE(Instruction);}
+
// Next level propagators: If the user does not overload a specific
// instruction type, they can overload one of these to get the whole class
// of instructions...
//
RetTy visitCastInst(CastInst &I) { DELEGATE(UnaryInstruction);}
+ RetTy visitUnaryOperator(UnaryOperator &I) { DELEGATE(UnaryInstruction);}
RetTy visitBinaryOperator(BinaryOperator &I) { DELEGATE(Instruction);}
RetTy visitCmpInst(CmpInst &I) { DELEGATE(Instruction);}
- RetTy visitTerminatorInst(TerminatorInst &I) { DELEGATE(Instruction);}
RetTy visitUnaryInstruction(UnaryInstruction &I){ DELEGATE(Instruction);}
- // Provide a special visitor for a 'callsite' that visits both calls and
- // invokes. When unimplemented, properly delegates to either the terminator or
- // regular instruction visitor.
+ // The next level delegation for `CallBase` is slightly more complex in order
+ // to support visiting cases where the call is also a terminator.
+ RetTy visitCallBase(CallBase &I) {
+ if (isa<InvokeInst>(I))
+ return static_cast<SubClass *>(this)->visitTerminator(I);
+
+ DELEGATE(Instruction);
+ }
+
+ // Provide a legacy visitor for a 'callsite' that visits both calls and
+ // invokes.
+ //
+ // Prefer overriding the type system based `CallBase` instead.
RetTy visitCallSite(CallSite CS) {
assert(CS);
Instruction &I = *CS.getInstruction();
- if (CS.isCall())
- DELEGATE(Instruction);
-
- assert(CS.isInvoke());
- DELEGATE(TerminatorInst);
+ DELEGATE(CallBase);
}
// If the user wants a 'default' case, they can choose to override this
diff --git a/contrib/llvm/include/llvm/IR/InstrTypes.h b/contrib/llvm/include/llvm/IR/InstrTypes.h
index ad0012048ac9..3f384a6ee40c 100644
--- a/contrib/llvm/include/llvm/IR/InstrTypes.h
+++ b/contrib/llvm/include/llvm/IR/InstrTypes.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
@@ -45,233 +46,9 @@
namespace llvm {
-//===----------------------------------------------------------------------===//
-// TerminatorInst Class
-//===----------------------------------------------------------------------===//
-
-/// Subclasses of this class are all able to terminate a basic
-/// block. Thus, these are all the flow control type of operations.
-///
-class TerminatorInst : public Instruction {
-protected:
- TerminatorInst(Type *Ty, Instruction::TermOps iType,
- Use *Ops, unsigned NumOps,
- Instruction *InsertBefore = nullptr)
- : Instruction(Ty, iType, Ops, NumOps, InsertBefore) {}
-
- TerminatorInst(Type *Ty, Instruction::TermOps iType,
- Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd)
- : Instruction(Ty, iType, Ops, NumOps, InsertAtEnd) {}
-
-public:
- /// Return the number of successors that this terminator has.
- unsigned getNumSuccessors() const;
-
- /// Return the specified successor.
- BasicBlock *getSuccessor(unsigned idx) const;
-
- /// Update the specified successor to point at the provided block.
- void setSuccessor(unsigned idx, BasicBlock *B);
-
- // Methods for support type inquiry through isa, cast, and dyn_cast:
- static bool classof(const Instruction *I) {
- return I->isTerminator();
- }
- static bool classof(const Value *V) {
- return isa<Instruction>(V) && classof(cast<Instruction>(V));
- }
-
- // Returns true if this terminator relates to exception handling.
- bool isExceptional() const {
- switch (getOpcode()) {
- case Instruction::CatchSwitch:
- case Instruction::CatchRet:
- case Instruction::CleanupRet:
- case Instruction::Invoke:
- case Instruction::Resume:
- return true;
- default:
- return false;
- }
- }
-
- //===--------------------------------------------------------------------===//
- // succ_iterator definition
- //===--------------------------------------------------------------------===//
-
- template <class Term, class BB> // Successor Iterator
- class SuccIterator : public std::iterator<std::random_access_iterator_tag, BB,
- int, BB *, BB *> {
- using super =
- std::iterator<std::random_access_iterator_tag, BB, int, BB *, BB *>;
-
- public:
- using pointer = typename super::pointer;
- using reference = typename super::reference;
-
- private:
- Term TermInst;
- unsigned idx;
- using Self = SuccIterator<Term, BB>;
-
- inline bool index_is_valid(unsigned idx) {
- return idx < TermInst->getNumSuccessors();
- }
-
- /// Proxy object to allow write access in operator[]
- class SuccessorProxy {
- Self it;
-
- public:
- explicit SuccessorProxy(const Self &it) : it(it) {}
-
- SuccessorProxy(const SuccessorProxy &) = default;
-
- SuccessorProxy &operator=(SuccessorProxy r) {
- *this = reference(r);
- return *this;
- }
-
- SuccessorProxy &operator=(reference r) {
- it.TermInst->setSuccessor(it.idx, r);
- return *this;
- }
-
- operator reference() const { return *it; }
- };
-
- public:
- // begin iterator
- explicit inline SuccIterator(Term T) : TermInst(T), idx(0) {}
- // end iterator
- inline SuccIterator(Term T, bool) : TermInst(T) {
- if (TermInst)
- idx = TermInst->getNumSuccessors();
- else
- // Term == NULL happens, if a basic block is not fully constructed and
- // consequently getTerminator() returns NULL. In this case we construct
- // a SuccIterator which describes a basic block that has zero
- // successors.
- // Defining SuccIterator for incomplete and malformed CFGs is especially
- // useful for debugging.
- idx = 0;
- }
-
- /// This is used to interface between code that wants to
- /// operate on terminator instructions directly.
- unsigned getSuccessorIndex() const { return idx; }
-
- inline bool operator==(const Self &x) const { return idx == x.idx; }
- inline bool operator!=(const Self &x) const { return !operator==(x); }
-
- inline reference operator*() const { return TermInst->getSuccessor(idx); }
- inline pointer operator->() const { return operator*(); }
-
- inline Self &operator++() {
- ++idx;
- return *this;
- } // Preincrement
-
- inline Self operator++(int) { // Postincrement
- Self tmp = *this;
- ++*this;
- return tmp;
- }
-
- inline Self &operator--() {
- --idx;
- return *this;
- } // Predecrement
- inline Self operator--(int) { // Postdecrement
- Self tmp = *this;
- --*this;
- return tmp;
- }
-
- inline bool operator<(const Self &x) const {
- assert(TermInst == x.TermInst &&
- "Cannot compare iterators of different blocks!");
- return idx < x.idx;
- }
-
- inline bool operator<=(const Self &x) const {
- assert(TermInst == x.TermInst &&
- "Cannot compare iterators of different blocks!");
- return idx <= x.idx;
- }
- inline bool operator>=(const Self &x) const {
- assert(TermInst == x.TermInst &&
- "Cannot compare iterators of different blocks!");
- return idx >= x.idx;
- }
-
- inline bool operator>(const Self &x) const {
- assert(TermInst == x.TermInst &&
- "Cannot compare iterators of different blocks!");
- return idx > x.idx;
- }
-
- inline Self &operator+=(int Right) {
- unsigned new_idx = idx + Right;
- assert(index_is_valid(new_idx) && "Iterator index out of bound");
- idx = new_idx;
- return *this;
- }
-
- inline Self operator+(int Right) const {
- Self tmp = *this;
- tmp += Right;
- return tmp;
- }
-
- inline Self &operator-=(int Right) { return operator+=(-Right); }
-
- inline Self operator-(int Right) const { return operator+(-Right); }
-
- inline int operator-(const Self &x) const {
- assert(TermInst == x.TermInst &&
- "Cannot work on iterators of different blocks!");
- int distance = idx - x.idx;
- return distance;
- }
-
- inline SuccessorProxy operator[](int offset) {
- Self tmp = *this;
- tmp += offset;
- return SuccessorProxy(tmp);
- }
-
- /// Get the source BB of this iterator.
- inline BB *getSource() {
- assert(TermInst && "Source not available, if basic block was malformed");
- return TermInst->getParent();
- }
- };
-
- using succ_iterator = SuccIterator<TerminatorInst *, BasicBlock>;
- using succ_const_iterator =
- SuccIterator<const TerminatorInst *, const BasicBlock>;
- using succ_range = iterator_range<succ_iterator>;
- using succ_const_range = iterator_range<succ_const_iterator>;
-
-private:
- inline succ_iterator succ_begin() { return succ_iterator(this); }
- inline succ_const_iterator succ_begin() const {
- return succ_const_iterator(this);
- }
- inline succ_iterator succ_end() { return succ_iterator(this, true); }
- inline succ_const_iterator succ_end() const {
- return succ_const_iterator(this, true);
- }
-
-public:
- inline succ_range successors() {
- return succ_range(succ_begin(), succ_end());
- }
- inline succ_const_range successors() const {
- return succ_const_range(succ_begin(), succ_end());
- }
-};
+namespace Intrinsic {
+enum ID : unsigned;
+}
//===----------------------------------------------------------------------===//
// UnaryInstruction Class
@@ -536,22 +313,6 @@ public:
static BinaryOperator *CreateNot(Value *Op, const Twine &Name,
BasicBlock *InsertAtEnd);
- /// Check if the given Value is a NEG, FNeg, or NOT instruction.
- ///
- static bool isNeg(const Value *V);
- static bool isFNeg(const Value *V, bool IgnoreZeroSign=false);
- static bool isNot(const Value *V);
-
- /// Helper functions to extract the unary argument of a NEG, FNEG or NOT
- /// operation implemented via Sub, FSub, or Xor.
- ///
- static const Value *getNegArgument(const Value *BinOp);
- static Value *getNegArgument( Value *BinOp);
- static const Value *getFNegArgument(const Value *BinOp);
- static Value *getFNegArgument( Value *BinOp);
- static const Value *getNotArgument(const Value *BinOp);
- static Value *getNotArgument( Value *BinOp);
-
BinaryOps getOpcode() const {
return static_cast<BinaryOps>(Instruction::getOpcode());
}
@@ -921,7 +682,8 @@ public:
protected:
CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
Value *LHS, Value *RHS, const Twine &Name = "",
- Instruction *InsertBefore = nullptr);
+ Instruction *InsertBefore = nullptr,
+ Instruction *FlagsSource = nullptr);
CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
Value *LHS, Value *RHS, const Twine &Name,
@@ -1147,76 +909,6 @@ struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
-//===----------------------------------------------------------------------===//
-// FuncletPadInst Class
-//===----------------------------------------------------------------------===//
-class FuncletPadInst : public Instruction {
-private:
- FuncletPadInst(const FuncletPadInst &CPI);
-
- explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
- ArrayRef<Value *> Args, unsigned Values,
- const Twine &NameStr, Instruction *InsertBefore);
- explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
- ArrayRef<Value *> Args, unsigned Values,
- const Twine &NameStr, BasicBlock *InsertAtEnd);
-
- void init(Value *ParentPad, ArrayRef<Value *> Args, const Twine &NameStr);
-
-protected:
- // Note: Instruction needs to be a friend here to call cloneImpl.
- friend class Instruction;
- friend class CatchPadInst;
- friend class CleanupPadInst;
-
- FuncletPadInst *cloneImpl() const;
-
-public:
- /// Provide fast operand accessors
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
- /// getNumArgOperands - Return the number of funcletpad arguments.
- ///
- unsigned getNumArgOperands() const { return getNumOperands() - 1; }
-
- /// Convenience accessors
-
- /// Return the outer EH-pad this funclet is nested within.
- ///
- /// Note: This returns the associated CatchSwitchInst if this FuncletPadInst
- /// is a CatchPadInst.
- Value *getParentPad() const { return Op<-1>(); }
- void setParentPad(Value *ParentPad) {
- assert(ParentPad);
- Op<-1>() = ParentPad;
- }
-
- /// getArgOperand/setArgOperand - Return/set the i-th funcletpad argument.
- ///
- Value *getArgOperand(unsigned i) const { return getOperand(i); }
- void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
-
- /// arg_operands - iteration adapter for range-for loops.
- op_range arg_operands() { return op_range(op_begin(), op_end() - 1); }
-
- /// arg_operands - iteration adapter for range-for loops.
- const_op_range arg_operands() const {
- return const_op_range(op_begin(), op_end() - 1);
- }
-
- // Methods for support type inquiry through isa, cast, and dyn_cast:
- static bool classof(const Instruction *I) { return I->isFuncletPad(); }
- static bool classof(const Value *V) {
- return isa<Instruction>(V) && classof(cast<Instruction>(V));
- }
-};
-
-template <>
-struct OperandTraits<FuncletPadInst>
- : public VariadicOperandTraits<FuncletPadInst, /*MINARITY=*/1> {};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value)
-
/// A lightweight accessor for an operand bundle meant to be passed
/// around by value.
struct OperandBundleUse {
@@ -1301,54 +993,609 @@ public:
using OperandBundleDef = OperandBundleDefT<Value *>;
using ConstOperandBundleDef = OperandBundleDefT<const Value *>;
-/// A mixin to add operand bundle functionality to llvm instruction
-/// classes.
-///
-/// OperandBundleUser uses the descriptor area co-allocated with the host User
-/// to store some meta information about which operands are "normal" operands,
-/// and which ones belong to some operand bundle.
-///
-/// The layout of an operand bundle user is
-///
-/// +-----------uint32_t End-------------------------------------+
-/// | |
-/// | +--------uint32_t Begin--------------------+ |
-/// | | | |
-/// ^ ^ v v
-/// |------|------|----|----|----|----|----|---------|----|---------|----|-----
-/// | BOI0 | BOI1 | .. | DU | U0 | U1 | .. | BOI0_U0 | .. | BOI1_U0 | .. | Un
-/// |------|------|----|----|----|----|----|---------|----|---------|----|-----
-/// v v ^ ^
-/// | | | |
-/// | +--------uint32_t Begin------------+ |
-/// | |
-/// +-----------uint32_t End-----------------------------+
-///
-///
-/// BOI0, BOI1 ... are descriptions of operand bundles in this User's use list.
-/// These descriptions are installed and managed by this class, and they're all
-/// instances of OperandBundleUser<T>::BundleOpInfo.
-///
-/// DU is an additional descriptor installed by User's 'operator new' to keep
-/// track of the 'BOI0 ... BOIN' co-allocation. OperandBundleUser does not
-/// access or modify DU in any way, it's an implementation detail private to
-/// User.
-///
-/// The regular Use& vector for the User starts at U0. The operand bundle uses
-/// are part of the Use& vector, just like normal uses. In the diagram above,
-/// the operand bundle uses start at BOI0_U0. Each instance of BundleOpInfo has
-/// information about a contiguous set of uses constituting an operand bundle,
-/// and the total set of operand bundle uses themselves form a contiguous set of
-/// uses (i.e. there are no gaps between uses corresponding to individual
-/// operand bundles).
+//===----------------------------------------------------------------------===//
+// CallBase Class
+//===----------------------------------------------------------------------===//
+
+/// Base class for all callable instructions (InvokeInst and CallInst)
+/// Holds everything related to calling a function.
///
-/// This class does not know the location of the set of operand bundle uses
-/// within the use list -- that is decided by the User using this class via the
-/// BeginIdx argument in populateBundleOperandInfos.
+/// All call-like instructions are required to use a common operand layout:
+/// - Zero or more arguments to the call,
+/// - Zero or more operand bundles with zero or more operand inputs each
+/// bundle,
+/// - Zero or more subclass controlled operands
+/// - The called function.
///
-/// Currently operand bundle users with hung-off operands are not supported.
-template <typename InstrTy, typename OpIteratorTy> class OperandBundleUser {
+/// This allows this base class to easily access the called function and the
+/// start of the arguments without knowing how many other operands a particular
+/// subclass requires. Note that accessing the end of the argument list isn't
+/// as cheap as most other operations on the base class.
+class CallBase : public Instruction {
+protected:
+ /// The last operand is the called operand.
+ static constexpr int CalledOperandOpEndIdx = -1;
+
+ AttributeList Attrs; ///< parameter attributes for callable
+ FunctionType *FTy;
+
+ template <class... ArgsTy>
+ CallBase(AttributeList const &A, FunctionType *FT, ArgsTy &&... Args)
+ : Instruction(std::forward<ArgsTy>(Args)...), Attrs(A), FTy(FT) {}
+
+ using Instruction::Instruction;
+
+ bool hasDescriptor() const { return Value::HasDescriptor; }
+
+ unsigned getNumSubclassExtraOperands() const {
+ switch (getOpcode()) {
+ case Instruction::Call:
+ return 0;
+ case Instruction::Invoke:
+ return 2;
+ }
+ llvm_unreachable("Invalid opcode!");
+ }
+
public:
+ using Instruction::getContext;
+
+ static bool classof(const Instruction *I) {
+ return I->getOpcode() == Instruction::Call ||
+ I->getOpcode() == Instruction::Invoke;
+ }
+ static bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+
+ FunctionType *getFunctionType() const { return FTy; }
+
+ void mutateFunctionType(FunctionType *FTy) {
+ Value::mutateType(FTy->getReturnType());
+ this->FTy = FTy;
+ }
+
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ /// data_operands_begin/data_operands_end - Return iterators iterating over
+ /// the call / invoke argument list and bundle operands. For invokes, this is
+ /// the set of instruction operands except the invoke target and the two
+ /// successor blocks; and for calls this is the set of instruction operands
+ /// except the call target.
+ User::op_iterator data_operands_begin() { return op_begin(); }
+ User::const_op_iterator data_operands_begin() const {
+ return const_cast<CallBase *>(this)->data_operands_begin();
+ }
+ User::op_iterator data_operands_end() {
+ // Walk from the end of the operands over the called operand and any
+ // subclass operands.
+ return op_end() - getNumSubclassExtraOperands() - 1;
+ }
+ User::const_op_iterator data_operands_end() const {
+ return const_cast<CallBase *>(this)->data_operands_end();
+ }
+ iterator_range<User::op_iterator> data_ops() {
+ return make_range(data_operands_begin(), data_operands_end());
+ }
+ iterator_range<User::const_op_iterator> data_ops() const {
+ return make_range(data_operands_begin(), data_operands_end());
+ }
+ bool data_operands_empty() const {
+ return data_operands_end() == data_operands_begin();
+ }
+ unsigned data_operands_size() const {
+ return std::distance(data_operands_begin(), data_operands_end());
+ }
+
+ bool isDataOperand(const Use *U) const {
+ assert(this == U->getUser() &&
+ "Only valid to query with a use of this instruction!");
+ return data_operands_begin() <= U && U < data_operands_end();
+ }
+ bool isDataOperand(Value::const_user_iterator UI) const {
+ return isDataOperand(&UI.getUse());
+ }
+
+ /// Return the iterator pointing to the beginning of the argument list.
+ User::op_iterator arg_begin() { return op_begin(); }
+ User::const_op_iterator arg_begin() const {
+ return const_cast<CallBase *>(this)->arg_begin();
+ }
+
+ /// Return the iterator pointing to the end of the argument list.
+ User::op_iterator arg_end() {
+ // From the end of the data operands, walk backwards past the bundle
+ // operands.
+ return data_operands_end() - getNumTotalBundleOperands();
+ }
+ User::const_op_iterator arg_end() const {
+ return const_cast<CallBase *>(this)->arg_end();
+ }
+
+ /// Iteration adapter for range-for loops.
+ iterator_range<User::op_iterator> args() {
+ return make_range(arg_begin(), arg_end());
+ }
+ iterator_range<User::const_op_iterator> args() const {
+ return make_range(arg_begin(), arg_end());
+ }
+ bool arg_empty() const { return arg_end() == arg_begin(); }
+ unsigned arg_size() const { return arg_end() - arg_begin(); }
+
+ // Legacy API names that duplicate the above and will be removed once users
+ // are migrated.
+ iterator_range<User::op_iterator> arg_operands() {
+ return make_range(arg_begin(), arg_end());
+ }
+ iterator_range<User::const_op_iterator> arg_operands() const {
+ return make_range(arg_begin(), arg_end());
+ }
+ unsigned getNumArgOperands() const { return arg_size(); }
+
+ Value *getArgOperand(unsigned i) const {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return getOperand(i);
+ }
+
+ void setArgOperand(unsigned i, Value *v) {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ setOperand(i, v);
+ }
+
+ /// Wrappers for getting the \c Use of a call argument.
+ const Use &getArgOperandUse(unsigned i) const {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return User::getOperandUse(i);
+ }
+ Use &getArgOperandUse(unsigned i) {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return User::getOperandUse(i);
+ }
+
+ bool isArgOperand(const Use *U) const {
+ assert(this == U->getUser() &&
+ "Only valid to query with a use of this instruction!");
+ return arg_begin() <= U && U < arg_end();
+ }
+ bool isArgOperand(Value::const_user_iterator UI) const {
+ return isArgOperand(&UI.getUse());
+ }
+
+ /// Returns true if this CallSite passes the given Value* as an argument to
+ /// the called function.
+ bool hasArgument(const Value *V) const {
+ return llvm::any_of(args(), [V](const Value *Arg) { return Arg == V; });
+ }
+
+ Value *getCalledOperand() const { return Op<CalledOperandOpEndIdx>(); }
+
+ // DEPRECATED: This routine will be removed in favor of `getCalledOperand` in
+ // the near future.
+ Value *getCalledValue() const { return getCalledOperand(); }
+
+ const Use &getCalledOperandUse() const { return Op<CalledOperandOpEndIdx>(); }
+ Use &getCalledOperandUse() { return Op<CalledOperandOpEndIdx>(); }
+
+ /// Returns the function called, or null if this is an
+ /// indirect function invocation.
+ Function *getCalledFunction() const {
+ return dyn_cast_or_null<Function>(getCalledOperand());
+ }
+
+ /// Return true if the callsite is an indirect call.
+ bool isIndirectCall() const;
+
+ /// Determine whether the passed iterator points to the callee operand's Use.
+ bool isCallee(Value::const_user_iterator UI) const {
+ return isCallee(&UI.getUse());
+ }
+
+ /// Determine whether this Use is the callee operand's Use.
+ bool isCallee(const Use *U) const { return &getCalledOperandUse() == U; }
+
+ /// Helper to get the caller (the parent function).
+ Function *getCaller();
+ const Function *getCaller() const {
+ return const_cast<CallBase *>(this)->getCaller();
+ }
+
+ /// Returns the intrinsic ID of the intrinsic called or
+ /// Intrinsic::not_intrinsic if the called function is not an intrinsic, or if
+ /// this is an indirect call.
+ Intrinsic::ID getIntrinsicID() const;
+
+ void setCalledOperand(Value *V) { Op<CalledOperandOpEndIdx>() = V; }
+
+ /// Sets the function called, including updating the function type.
+ void setCalledFunction(Value *Fn) {
+ setCalledFunction(
+ cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
+ Fn);
+ }
+
+ /// Sets the function called, including updating to the specified function
+ /// type.
+ void setCalledFunction(FunctionType *FTy, Value *Fn) {
+ this->FTy = FTy;
+ assert(FTy == cast<FunctionType>(
+ cast<PointerType>(Fn->getType())->getElementType()));
+ setCalledOperand(Fn);
+ }
+
+ CallingConv::ID getCallingConv() const {
+ return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 2);
+ }
+
+ void setCallingConv(CallingConv::ID CC) {
+ auto ID = static_cast<unsigned>(CC);
+ assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
+ setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
+ (ID << 2));
+ }
+
+ /// \name Attribute API
+ ///
+ /// These methods access and modify attributes on this call (including
+ /// looking through to the attributes on the called function when necessary).
+ ///@{
+
+ /// Return the parameter attributes for this call.
+ ///
+ AttributeList getAttributes() const { return Attrs; }
+
+ /// Set the parameter attributes for this call.
+ ///
+ void setAttributes(AttributeList A) { Attrs = A; }
+
+ /// Determine whether this call has the given attribute.
+ bool hasFnAttr(Attribute::AttrKind Kind) const {
+ assert(Kind != Attribute::NoBuiltin &&
+ "Use CallBase::isNoBuiltin() to check for Attribute::NoBuiltin");
+ return hasFnAttrImpl(Kind);
+ }
+
+ /// Determine whether this call has the given attribute.
+ bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
+
+ /// adds the attribute to the list of attributes.
+ void addAttribute(unsigned i, Attribute::AttrKind Kind) {
+ AttributeList PAL = getAttributes();
+ PAL = PAL.addAttribute(getContext(), i, Kind);
+ setAttributes(PAL);
+ }
+
+ /// adds the attribute to the list of attributes.
+ void addAttribute(unsigned i, Attribute Attr) {
+ AttributeList PAL = getAttributes();
+ PAL = PAL.addAttribute(getContext(), i, Attr);
+ setAttributes(PAL);
+ }
+
+ /// Adds the attribute to the indicated argument
+ void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+ assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ AttributeList PAL = getAttributes();
+ PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
+ setAttributes(PAL);
+ }
+
+ /// Adds the attribute to the indicated argument
+ void addParamAttr(unsigned ArgNo, Attribute Attr) {
+ assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ AttributeList PAL = getAttributes();
+ PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
+ setAttributes(PAL);
+ }
+
+ /// removes the attribute from the list of attributes.
+ void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
+ AttributeList PAL = getAttributes();
+ PAL = PAL.removeAttribute(getContext(), i, Kind);
+ setAttributes(PAL);
+ }
+
+ /// removes the attribute from the list of attributes.
+ void removeAttribute(unsigned i, StringRef Kind) {
+ AttributeList PAL = getAttributes();
+ PAL = PAL.removeAttribute(getContext(), i, Kind);
+ setAttributes(PAL);
+ }
+
+ /// Removes the attribute from the given argument
+ void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+ assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ AttributeList PAL = getAttributes();
+ PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
+ setAttributes(PAL);
+ }
+
+ /// Removes the attribute from the given argument
+ void removeParamAttr(unsigned ArgNo, StringRef Kind) {
+ assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ AttributeList PAL = getAttributes();
+ PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
+ setAttributes(PAL);
+ }
+
+ /// adds the dereferenceable attribute to the list of attributes.
+ void addDereferenceableAttr(unsigned i, uint64_t Bytes) {
+ AttributeList PAL = getAttributes();
+ PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
+ setAttributes(PAL);
+ }
+
+ /// adds the dereferenceable_or_null attribute to the list of
+ /// attributes.
+ void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+ AttributeList PAL = getAttributes();
+ PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+ setAttributes(PAL);
+ }
+
+ /// Determine whether the return value has the given attribute.
+ bool hasRetAttr(Attribute::AttrKind Kind) const;
+
+ /// Determine whether the argument or parameter has the given attribute.
+ bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
+
+ /// Get the attribute of a given kind at a position.
+ Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
+ return getAttributes().getAttribute(i, Kind);
+ }
+
+ /// Get the attribute of a given kind at a position.
+ Attribute getAttribute(unsigned i, StringRef Kind) const {
+ return getAttributes().getAttribute(i, Kind);
+ }
+
+ /// Get the attribute of a given kind from a given arg
+ Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
+ assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ return getAttributes().getParamAttr(ArgNo, Kind);
+ }
+
+ /// Get the attribute of a given kind from a given arg
+ Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
+ assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ return getAttributes().getParamAttr(ArgNo, Kind);
+ }
+
+ /// Return true if the data operand at index \p i has the attribute \p
+ /// A.
+ ///
+ /// Data operands include call arguments and values used in operand bundles,
+ /// but does not include the callee operand. This routine dispatches to the
+ /// underlying AttributeList or the OperandBundleUser as appropriate.
+ ///
+ /// The index \p i is interpreted as
+ ///
+ /// \p i == Attribute::ReturnIndex -> the return value
+ /// \p i in [1, arg_size + 1) -> argument number (\p i - 1)
+ /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
+ /// (\p i - 1) in the operand list.
+ bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
+ // Note that we have to add one because `i` isn't zero-indexed.
+ assert(i < (getNumArgOperands() + getNumTotalBundleOperands() + 1) &&
+ "Data operand index out of bounds!");
+
+ // The attribute A can either be directly specified, if the operand in
+ // question is a call argument; or be indirectly implied by the kind of its
+ // containing operand bundle, if the operand is a bundle operand.
+
+ if (i == AttributeList::ReturnIndex)
+ return hasRetAttr(Kind);
+
+ // FIXME: Avoid these i - 1 calculations and update the API to use
+ // zero-based indices.
+ if (i < (getNumArgOperands() + 1))
+ return paramHasAttr(i - 1, Kind);
+
+ assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+ "Must be either a call argument or an operand bundle!");
+ return bundleOperandHasAttr(i - 1, Kind);
+ }
+
+ /// Determine whether this data operand is not captured.
+ // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
+ // better indicate that this may return a conservative answer.
+ bool doesNotCapture(unsigned OpNo) const {
+ return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
+ }
+
+ /// Determine whether this argument is passed by value.
+ bool isByValArgument(unsigned ArgNo) const {
+ return paramHasAttr(ArgNo, Attribute::ByVal);
+ }
+
+ /// Determine whether this argument is passed in an alloca.
+ bool isInAllocaArgument(unsigned ArgNo) const {
+ return paramHasAttr(ArgNo, Attribute::InAlloca);
+ }
+
+ /// Determine whether this argument is passed by value or in an alloca.
+ bool isByValOrInAllocaArgument(unsigned ArgNo) const {
+ return paramHasAttr(ArgNo, Attribute::ByVal) ||
+ paramHasAttr(ArgNo, Attribute::InAlloca);
+ }
+
+ /// Determine if there are is an inalloca argument. Only the last argument can
+ /// have the inalloca attribute.
+ bool hasInAllocaArgument() const {
+ return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca);
+ }
+
+ // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
+ // better indicate that this may return a conservative answer.
+ bool doesNotAccessMemory(unsigned OpNo) const {
+ return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ }
+
+ // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
+ // better indicate that this may return a conservative answer.
+ bool onlyReadsMemory(unsigned OpNo) const {
+ return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
+ dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ }
+
+ // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
+ // better indicate that this may return a conservative answer.
+ bool doesNotReadMemory(unsigned OpNo) const {
+ return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) ||
+ dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ }
+
+ /// Extract the alignment of the return value.
+ unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
+
+ /// Extract the alignment for a call or parameter (0=unknown).
+ unsigned getParamAlignment(unsigned ArgNo) const {
+ return Attrs.getParamAlignment(ArgNo);
+ }
+
+ /// Extract the number of dereferenceable bytes for a call or
+ /// parameter (0=unknown).
+ uint64_t getDereferenceableBytes(unsigned i) const {
+ return Attrs.getDereferenceableBytes(i);
+ }
+
+ /// Extract the number of dereferenceable_or_null bytes for a call or
+ /// parameter (0=unknown).
+ uint64_t getDereferenceableOrNullBytes(unsigned i) const {
+ return Attrs.getDereferenceableOrNullBytes(i);
+ }
+
+ /// Return true if the return value is known to be not null.
+ /// This may be because it has the nonnull attribute, or because at least
+ /// one byte is dereferenceable and the pointer is in addrspace(0).
+ bool isReturnNonNull() const;
+
+ /// Determine if the return value is marked with NoAlias attribute.
+ bool returnDoesNotAlias() const {
+ return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ }
+
+ /// If one of the arguments has the 'returned' attribute, returns its
+ /// operand value. Otherwise, return nullptr.
+ Value *getReturnedArgOperand() const;
+
+ /// Return true if the call should not be treated as a call to a
+ /// builtin.
+ bool isNoBuiltin() const {
+ return hasFnAttrImpl(Attribute::NoBuiltin) &&
+ !hasFnAttrImpl(Attribute::Builtin);
+ }
+
+ /// Determine if the call requires strict floating point semantics.
+ bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); }
+
+ /// Return true if the call should not be inlined.
+ bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
+ void setIsNoInline() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
+ }
+ /// Determine if the call does not access memory.
+ bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); }
+ void setDoesNotAccessMemory() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+ }
+
+ /// Determine if the call does not access or only reads memory.
+ bool onlyReadsMemory() const {
+ return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
+ }
+ void setOnlyReadsMemory() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
+ }
+
+ /// Determine if the call does not access or only writes memory.
+ bool doesNotReadMemory() const {
+ return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
+ }
+ void setDoesNotReadMemory() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
+ }
+
+ /// Determine if the call can access memmory only using pointers based
+ /// on its arguments.
+ bool onlyAccessesArgMemory() const {
+ return hasFnAttr(Attribute::ArgMemOnly);
+ }
+ void setOnlyAccessesArgMemory() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
+ }
+
+ /// Determine if the function may only access memory that is
+ /// inaccessible from the IR.
+ bool onlyAccessesInaccessibleMemory() const {
+ return hasFnAttr(Attribute::InaccessibleMemOnly);
+ }
+ void setOnlyAccessesInaccessibleMemory() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
+ }
+
+ /// Determine if the function may only access memory that is
+ /// either inaccessible from the IR or pointed to by its arguments.
+ bool onlyAccessesInaccessibleMemOrArgMem() const {
+ return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+ }
+ void setOnlyAccessesInaccessibleMemOrArgMem() {
+ addAttribute(AttributeList::FunctionIndex,
+ Attribute::InaccessibleMemOrArgMemOnly);
+ }
+ /// Determine if the call cannot return.
+ bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
+ void setDoesNotReturn() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
+ }
+
+ /// Determine if the call should not perform indirect branch tracking.
+ bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
+
+ /// Determine if the call cannot unwind.
+ bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
+ void setDoesNotThrow() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+ }
+
+ /// Determine if the invoke cannot be duplicated.
+ bool cannotDuplicate() const { return hasFnAttr(Attribute::NoDuplicate); }
+ void setCannotDuplicate() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
+ }
+
+ /// Determine if the invoke is convergent
+ bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
+ void setConvergent() {
+ addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
+ }
+ void setNotConvergent() {
+ removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
+ }
+
+ /// Determine if the call returns a structure through first
+ /// pointer argument.
+ bool hasStructRetAttr() const {
+ if (getNumArgOperands() == 0)
+ return false;
+
+ // Be friendly and also check the callee.
+ return paramHasAttr(0, Attribute::StructRet);
+ }
+
+ /// Determine if any call argument is an aggregate passed by value.
+ bool hasByValArgument() const {
+ return Attrs.hasAttrSomewhere(Attribute::ByVal);
+ }
+
+ ///@{
+ // End of attribute API.
+
+ /// \name Operand Bundle API
+ ///
+ /// This group of methods provides the API to access and manipulate operand
+ /// bundles on this call.
+ /// @{
+
/// Return the number of operand bundles associated with this User.
unsigned getNumOperandBundles() const {
return std::distance(bundle_op_info_begin(), bundle_op_info_end());
@@ -1375,6 +1622,16 @@ public:
Idx < getBundleOperandsEndIndex();
}
+ /// Returns true if the use is a bundle operand.
+ bool isBundleOperand(const Use *U) const {
+ assert(this == U->getUser() &&
+ "Only valid to query with a use of this instruction!");
+ return hasOperandBundles() && isBundleOperand(U - op_begin());
+ }
+ bool isBundleOperand(Value::const_user_iterator UI) const {
+ return isBundleOperand(&UI.getUse());
+ }
+
/// Return the total number operands (not operand bundles) used by
/// every operand bundle in this OperandBundleUser.
unsigned getNumTotalBundleOperands() const {
@@ -1504,8 +1761,7 @@ public:
/// Return true if \p Other has the same sequence of operand bundle
/// tags with the same number of operands on each one of them as this
/// OperandBundleUser.
- bool hasIdenticalOperandBundleSchema(
- const OperandBundleUser<InstrTy, OpIteratorTy> &Other) const {
+ bool hasIdenticalOperandBundleSchema(const CallBase &Other) const {
if (getNumOperandBundles() != Other.getNumOperandBundles())
return false;
@@ -1524,7 +1780,6 @@ public:
return false;
}
-protected:
/// Is the function attribute S disallowed by some operand bundle on
/// this operand bundle user?
bool isFnAttrDisallowedByOpBundle(StringRef S) const {
@@ -1583,8 +1838,8 @@ protected:
/// OperandBundleUse.
OperandBundleUse
operandBundleFromBundleOpInfo(const BundleOpInfo &BOI) const {
- auto op_begin = static_cast<const InstrTy *>(this)->op_begin();
- ArrayRef<Use> Inputs(op_begin + BOI.Begin, op_begin + BOI.End);
+ auto begin = op_begin();
+ ArrayRef<Use> Inputs(begin + BOI.Begin, begin + BOI.End);
return OperandBundleUse(BOI.Tag, Inputs);
}
@@ -1593,37 +1848,79 @@ protected:
/// Return the start of the list of BundleOpInfo instances associated
/// with this OperandBundleUser.
+ ///
+ /// OperandBundleUser uses the descriptor area co-allocated with the host User
+ /// to store some meta information about which operands are "normal" operands,
+ /// and which ones belong to some operand bundle.
+ ///
+ /// The layout of an operand bundle user is
+ ///
+ /// +-----------uint32_t End-------------------------------------+
+ /// | |
+ /// | +--------uint32_t Begin--------------------+ |
+ /// | | | |
+ /// ^ ^ v v
+ /// |------|------|----|----|----|----|----|---------|----|---------|----|-----
+ /// | BOI0 | BOI1 | .. | DU | U0 | U1 | .. | BOI0_U0 | .. | BOI1_U0 | .. | Un
+ /// |------|------|----|----|----|----|----|---------|----|---------|----|-----
+ /// v v ^ ^
+ /// | | | |
+ /// | +--------uint32_t Begin------------+ |
+ /// | |
+ /// +-----------uint32_t End-----------------------------+
+ ///
+ ///
+ /// BOI0, BOI1 ... are descriptions of operand bundles in this User's use
+ /// list. These descriptions are installed and managed by this class, and
+ /// they're all instances of OperandBundleUser<T>::BundleOpInfo.
+ ///
+ /// DU is an additional descriptor installed by User's 'operator new' to keep
+ /// track of the 'BOI0 ... BOIN' co-allocation. OperandBundleUser does not
+ /// access or modify DU in any way, it's an implementation detail private to
+ /// User.
+ ///
+ /// The regular Use& vector for the User starts at U0. The operand bundle
+ /// uses are part of the Use& vector, just like normal uses. In the diagram
+ /// above, the operand bundle uses start at BOI0_U0. Each instance of
+ /// BundleOpInfo has information about a contiguous set of uses constituting
+ /// an operand bundle, and the total set of operand bundle uses themselves
+ /// form a contiguous set of uses (i.e. there are no gaps between uses
+ /// corresponding to individual operand bundles).
+ ///
+ /// This class does not know the location of the set of operand bundle uses
+ /// within the use list -- that is decided by the User using this class via
+ /// the BeginIdx argument in populateBundleOperandInfos.
+ ///
+ /// Currently operand bundle users with hung-off operands are not supported.
bundle_op_iterator bundle_op_info_begin() {
- if (!static_cast<InstrTy *>(this)->hasDescriptor())
+ if (!hasDescriptor())
return nullptr;
- uint8_t *BytesBegin = static_cast<InstrTy *>(this)->getDescriptor().begin();
+ uint8_t *BytesBegin = getDescriptor().begin();
return reinterpret_cast<bundle_op_iterator>(BytesBegin);
}
/// Return the start of the list of BundleOpInfo instances associated
/// with this OperandBundleUser.
const_bundle_op_iterator bundle_op_info_begin() const {
- auto *NonConstThis =
- const_cast<OperandBundleUser<InstrTy, OpIteratorTy> *>(this);
+ auto *NonConstThis = const_cast<CallBase *>(this);
return NonConstThis->bundle_op_info_begin();
}
/// Return the end of the list of BundleOpInfo instances associated
/// with this OperandBundleUser.
bundle_op_iterator bundle_op_info_end() {
- if (!static_cast<InstrTy *>(this)->hasDescriptor())
+ if (!hasDescriptor())
return nullptr;
- uint8_t *BytesEnd = static_cast<InstrTy *>(this)->getDescriptor().end();
+ uint8_t *BytesEnd = getDescriptor().end();
return reinterpret_cast<bundle_op_iterator>(BytesEnd);
}
/// Return the end of the list of BundleOpInfo instances associated
/// with this OperandBundleUser.
const_bundle_op_iterator bundle_op_info_end() const {
- auto *NonConstThis =
- const_cast<OperandBundleUser<InstrTy, OpIteratorTy> *>(this);
+ auto *NonConstThis = const_cast<CallBase *>(this);
return NonConstThis->bundle_op_info_end();
}
@@ -1643,30 +1940,8 @@ protected:
///
/// Each \p OperandBundleDef instance is tracked by a OperandBundleInfo
/// instance allocated in this User's descriptor.
- OpIteratorTy populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
- const unsigned BeginIndex) {
- auto It = static_cast<InstrTy *>(this)->op_begin() + BeginIndex;
- for (auto &B : Bundles)
- It = std::copy(B.input_begin(), B.input_end(), It);
-
- auto *ContextImpl = static_cast<InstrTy *>(this)->getContext().pImpl;
- auto BI = Bundles.begin();
- unsigned CurrentIndex = BeginIndex;
-
- for (auto &BOI : bundle_op_infos()) {
- assert(BI != Bundles.end() && "Incorrect allocation?");
-
- BOI.Tag = ContextImpl->getOrInsertBundleTag(BI->getTag());
- BOI.Begin = CurrentIndex;
- BOI.End = CurrentIndex + BI->input_size();
- CurrentIndex = BOI.End;
- BI++;
- }
-
- assert(BI == Bundles.end() && "Incorrect allocation?");
-
- return It;
- }
+ op_iterator populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
+ const unsigned BeginIndex);
/// Return the BundleOpInfo for the operand at index OpIdx.
///
@@ -1680,6 +1955,7 @@ protected:
llvm_unreachable("Did not find operand bundle for operand!");
}
+protected:
/// Return the total number of values used in \p Bundles.
static unsigned CountBundleInputs(ArrayRef<OperandBundleDef> Bundles) {
unsigned Total = 0;
@@ -1687,8 +1963,102 @@ protected:
Total += B.input_size();
return Total;
}
+
+ /// @}
+ // End of operand bundle API.
+
+private:
+ bool hasFnAttrOnCalledFunction(Attribute::AttrKind Kind) const;
+ bool hasFnAttrOnCalledFunction(StringRef Kind) const;
+
+ template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
+ if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind))
+ return true;
+
+ // Operand bundles override attributes on the called function, but don't
+ // override attributes directly present on the call instruction.
+ if (isFnAttrDisallowedByOpBundle(Kind))
+ return false;
+
+ return hasFnAttrOnCalledFunction(Kind);
+ }
};
+template <>
+struct OperandTraits<CallBase> : public VariadicOperandTraits<CallBase, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallBase, Value)
+
+//===----------------------------------------------------------------------===//
+// FuncletPadInst Class
+//===----------------------------------------------------------------------===//
+class FuncletPadInst : public Instruction {
+private:
+ FuncletPadInst(const FuncletPadInst &CPI);
+
+ explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+ ArrayRef<Value *> Args, unsigned Values,
+ const Twine &NameStr, Instruction *InsertBefore);
+ explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+ ArrayRef<Value *> Args, unsigned Values,
+ const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+ void init(Value *ParentPad, ArrayRef<Value *> Args, const Twine &NameStr);
+
+protected:
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+ friend class CatchPadInst;
+ friend class CleanupPadInst;
+
+ FuncletPadInst *cloneImpl() const;
+
+public:
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ /// getNumArgOperands - Return the number of funcletpad arguments.
+ ///
+ unsigned getNumArgOperands() const { return getNumOperands() - 1; }
+
+ /// Convenience accessors
+
+ /// Return the outer EH-pad this funclet is nested within.
+ ///
+ /// Note: This returns the associated CatchSwitchInst if this FuncletPadInst
+ /// is a CatchPadInst.
+ Value *getParentPad() const { return Op<-1>(); }
+ void setParentPad(Value *ParentPad) {
+ assert(ParentPad);
+ Op<-1>() = ParentPad;
+ }
+
+ /// getArgOperand/setArgOperand - Return/set the i-th funcletpad argument.
+ ///
+ Value *getArgOperand(unsigned i) const { return getOperand(i); }
+ void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+
+ /// arg_operands - iteration adapter for range-for loops.
+ op_range arg_operands() { return op_range(op_begin(), op_end() - 1); }
+
+ /// arg_operands - iteration adapter for range-for loops.
+ const_op_range arg_operands() const {
+ return const_op_range(op_begin(), op_end() - 1);
+ }
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const Instruction *I) { return I->isFuncletPad(); }
+ static bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+};
+
+template <>
+struct OperandTraits<FuncletPadInst>
+ : public VariadicOperandTraits<FuncletPadInst, /*MINARITY=*/1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value)
+
} // end namespace llvm
#endif // LLVM_IR_INSTRTYPES_H
diff --git a/contrib/llvm/include/llvm/IR/Instruction.def b/contrib/llvm/include/llvm/IR/Instruction.def
index 86617299c44a..58e4e2e1d6cc 100644
--- a/contrib/llvm/include/llvm/IR/Instruction.def
+++ b/contrib/llvm/include/llvm/IR/Instruction.def
@@ -32,6 +32,20 @@
#define LAST_TERM_INST(num)
#endif
+#ifndef FIRST_UNARY_INST
+#define FIRST_UNARY_INST(num)
+#endif
+#ifndef HANDLE_UNARY_INST
+#ifndef HANDLE_INST
+#define HANDLE_UNARY_INST(num, opcode, instclass)
+#else
+#define HANDLE_UNARY_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_UNARY_INST
+#define LAST_UNARY_INST(num)
+#endif
+
#ifndef FIRST_BINARY_INST
#define FIRST_BINARY_INST(num)
#endif
@@ -123,87 +137,96 @@ HANDLE_TERM_INST ( 9, CatchRet , CatchReturnInst)
HANDLE_TERM_INST (10, CatchSwitch , CatchSwitchInst)
LAST_TERM_INST (10)
+// Standard unary operators...
+ FIRST_UNARY_INST(11)
+HANDLE_UNARY_INST(11, FNeg , UnaryOperator)
+ LAST_UNARY_INST(11)
+
// Standard binary operators...
- FIRST_BINARY_INST(11)
-HANDLE_BINARY_INST(11, Add , BinaryOperator)
-HANDLE_BINARY_INST(12, FAdd , BinaryOperator)
-HANDLE_BINARY_INST(13, Sub , BinaryOperator)
-HANDLE_BINARY_INST(14, FSub , BinaryOperator)
-HANDLE_BINARY_INST(15, Mul , BinaryOperator)
-HANDLE_BINARY_INST(16, FMul , BinaryOperator)
-HANDLE_BINARY_INST(17, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(18, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(19, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(20, URem , BinaryOperator)
-HANDLE_BINARY_INST(21, SRem , BinaryOperator)
-HANDLE_BINARY_INST(22, FRem , BinaryOperator)
+ FIRST_BINARY_INST(12)
+HANDLE_BINARY_INST(12, Add , BinaryOperator)
+HANDLE_BINARY_INST(13, FAdd , BinaryOperator)
+HANDLE_BINARY_INST(14, Sub , BinaryOperator)
+HANDLE_BINARY_INST(15, FSub , BinaryOperator)
+HANDLE_BINARY_INST(16, Mul , BinaryOperator)
+HANDLE_BINARY_INST(17, FMul , BinaryOperator)
+HANDLE_BINARY_INST(18, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(19, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(20, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(21, URem , BinaryOperator)
+HANDLE_BINARY_INST(22, SRem , BinaryOperator)
+HANDLE_BINARY_INST(23, FRem , BinaryOperator)
// Logical operators (integer operands)
-HANDLE_BINARY_INST(23, Shl , BinaryOperator) // Shift left (logical)
-HANDLE_BINARY_INST(24, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(25, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(26, And , BinaryOperator)
-HANDLE_BINARY_INST(27, Or , BinaryOperator)
-HANDLE_BINARY_INST(28, Xor , BinaryOperator)
- LAST_BINARY_INST(28)
+HANDLE_BINARY_INST(24, Shl , BinaryOperator) // Shift left (logical)
+HANDLE_BINARY_INST(25, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(26, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(27, And , BinaryOperator)
+HANDLE_BINARY_INST(28, Or , BinaryOperator)
+HANDLE_BINARY_INST(29, Xor , BinaryOperator)
+ LAST_BINARY_INST(29)
// Memory operators...
- FIRST_MEMORY_INST(29)
-HANDLE_MEMORY_INST(29, Alloca, AllocaInst) // Stack management
-HANDLE_MEMORY_INST(30, Load , LoadInst ) // Memory manipulation instrs
-HANDLE_MEMORY_INST(31, Store , StoreInst )
-HANDLE_MEMORY_INST(32, GetElementPtr, GetElementPtrInst)
-HANDLE_MEMORY_INST(33, Fence , FenceInst )
-HANDLE_MEMORY_INST(34, AtomicCmpXchg , AtomicCmpXchgInst )
-HANDLE_MEMORY_INST(35, AtomicRMW , AtomicRMWInst )
- LAST_MEMORY_INST(35)
+ FIRST_MEMORY_INST(30)
+HANDLE_MEMORY_INST(30, Alloca, AllocaInst) // Stack management
+HANDLE_MEMORY_INST(31, Load , LoadInst ) // Memory manipulation instrs
+HANDLE_MEMORY_INST(32, Store , StoreInst )
+HANDLE_MEMORY_INST(33, GetElementPtr, GetElementPtrInst)
+HANDLE_MEMORY_INST(34, Fence , FenceInst )
+HANDLE_MEMORY_INST(35, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(36, AtomicRMW , AtomicRMWInst )
+ LAST_MEMORY_INST(36)
// Cast operators ...
// NOTE: The order matters here because CastInst::isEliminableCastPair
// NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(36)
-HANDLE_CAST_INST(36, Trunc , TruncInst ) // Truncate integers
-HANDLE_CAST_INST(37, ZExt , ZExtInst ) // Zero extend integers
-HANDLE_CAST_INST(38, SExt , SExtInst ) // Sign extend integers
-HANDLE_CAST_INST(39, FPToUI , FPToUIInst ) // floating point -> UInt
-HANDLE_CAST_INST(40, FPToSI , FPToSIInst ) // floating point -> SInt
-HANDLE_CAST_INST(41, UIToFP , UIToFPInst ) // UInt -> floating point
-HANDLE_CAST_INST(42, SIToFP , SIToFPInst ) // SInt -> floating point
-HANDLE_CAST_INST(43, FPTrunc , FPTruncInst ) // Truncate floating point
-HANDLE_CAST_INST(44, FPExt , FPExtInst ) // Extend floating point
-HANDLE_CAST_INST(45, PtrToInt, PtrToIntInst) // Pointer -> Integer
-HANDLE_CAST_INST(46, IntToPtr, IntToPtrInst) // Integer -> Pointer
-HANDLE_CAST_INST(47, BitCast , BitCastInst ) // Type cast
-HANDLE_CAST_INST(48, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast
- LAST_CAST_INST(48)
-
- FIRST_FUNCLETPAD_INST(49)
-HANDLE_FUNCLETPAD_INST(49, CleanupPad, CleanupPadInst)
-HANDLE_FUNCLETPAD_INST(50, CatchPad , CatchPadInst)
- LAST_FUNCLETPAD_INST(50)
+ FIRST_CAST_INST(37)
+HANDLE_CAST_INST(37, Trunc , TruncInst ) // Truncate integers
+HANDLE_CAST_INST(38, ZExt , ZExtInst ) // Zero extend integers
+HANDLE_CAST_INST(39, SExt , SExtInst ) // Sign extend integers
+HANDLE_CAST_INST(40, FPToUI , FPToUIInst ) // floating point -> UInt
+HANDLE_CAST_INST(41, FPToSI , FPToSIInst ) // floating point -> SInt
+HANDLE_CAST_INST(42, UIToFP , UIToFPInst ) // UInt -> floating point
+HANDLE_CAST_INST(43, SIToFP , SIToFPInst ) // SInt -> floating point
+HANDLE_CAST_INST(44, FPTrunc , FPTruncInst ) // Truncate floating point
+HANDLE_CAST_INST(45, FPExt , FPExtInst ) // Extend floating point
+HANDLE_CAST_INST(46, PtrToInt, PtrToIntInst) // Pointer -> Integer
+HANDLE_CAST_INST(47, IntToPtr, IntToPtrInst) // Integer -> Pointer
+HANDLE_CAST_INST(48, BitCast , BitCastInst ) // Type cast
+HANDLE_CAST_INST(49, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast
+ LAST_CAST_INST(49)
+
+ FIRST_FUNCLETPAD_INST(50)
+HANDLE_FUNCLETPAD_INST(50, CleanupPad, CleanupPadInst)
+HANDLE_FUNCLETPAD_INST(51, CatchPad , CatchPadInst)
+ LAST_FUNCLETPAD_INST(51)
// Other operators...
- FIRST_OTHER_INST(51)
-HANDLE_OTHER_INST(51, ICmp , ICmpInst ) // Integer comparison instruction
-HANDLE_OTHER_INST(52, FCmp , FCmpInst ) // Floating point comparison instr.
-HANDLE_OTHER_INST(53, PHI , PHINode ) // PHI node instruction
-HANDLE_OTHER_INST(54, Call , CallInst ) // Call a function
-HANDLE_OTHER_INST(55, Select , SelectInst ) // select instruction
-HANDLE_USER_INST (56, UserOp1, Instruction) // May be used internally in a pass
-HANDLE_USER_INST (57, UserOp2, Instruction) // Internal to passes only
-HANDLE_OTHER_INST(58, VAArg , VAArgInst ) // vaarg instruction
-HANDLE_OTHER_INST(59, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(60, InsertElement, InsertElementInst) // insert into vector
-HANDLE_OTHER_INST(61, ShuffleVector, ShuffleVectorInst) // shuffle two vectors.
-HANDLE_OTHER_INST(62, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(63, InsertValue, InsertValueInst) // insert into aggregate
-HANDLE_OTHER_INST(64, LandingPad, LandingPadInst) // Landing pad instruction.
- LAST_OTHER_INST(64)
+ FIRST_OTHER_INST(52)
+HANDLE_OTHER_INST(52, ICmp , ICmpInst ) // Integer comparison instruction
+HANDLE_OTHER_INST(53, FCmp , FCmpInst ) // Floating point comparison instr.
+HANDLE_OTHER_INST(54, PHI , PHINode ) // PHI node instruction
+HANDLE_OTHER_INST(55, Call , CallInst ) // Call a function
+HANDLE_OTHER_INST(56, Select , SelectInst ) // select instruction
+HANDLE_USER_INST (57, UserOp1, Instruction) // May be used internally in a pass
+HANDLE_USER_INST (58, UserOp2, Instruction) // Internal to passes only
+HANDLE_OTHER_INST(59, VAArg , VAArgInst ) // vaarg instruction
+HANDLE_OTHER_INST(60, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(61, InsertElement, InsertElementInst) // insert into vector
+HANDLE_OTHER_INST(62, ShuffleVector, ShuffleVectorInst) // shuffle two vectors.
+HANDLE_OTHER_INST(63, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(64, InsertValue, InsertValueInst) // insert into aggregate
+HANDLE_OTHER_INST(65, LandingPad, LandingPadInst) // Landing pad instruction.
+ LAST_OTHER_INST(65)
#undef FIRST_TERM_INST
#undef HANDLE_TERM_INST
#undef LAST_TERM_INST
+#undef FIRST_UNARY_INST
+#undef HANDLE_UNARY_INST
+#undef LAST_UNARY_INST
+
#undef FIRST_BINARY_INST
#undef HANDLE_BINARY_INST
#undef LAST_BINARY_INST
diff --git a/contrib/llvm/include/llvm/IR/Instruction.h b/contrib/llvm/include/llvm/IR/Instruction.h
index 643c2a0761d1..5e78cb1edf02 100644
--- a/contrib/llvm/include/llvm/IR/Instruction.h
+++ b/contrib/llvm/include/llvm/IR/Instruction.h
@@ -127,11 +127,15 @@ public:
const char *getOpcodeName() const { return getOpcodeName(getOpcode()); }
bool isTerminator() const { return isTerminator(getOpcode()); }
+ bool isUnaryOp() const { return isUnaryOp(getOpcode()); }
bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
bool isIntDivRem() const { return isIntDivRem(getOpcode()); }
bool isShift() { return isShift(getOpcode()); }
bool isCast() const { return isCast(getOpcode()); }
bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
+ bool isExceptionalTerminator() const {
+ return isExceptionalTerminator(getOpcode());
+ }
static const char* getOpcodeName(unsigned OpCode);
@@ -139,6 +143,9 @@ public:
return OpCode >= TermOpsBegin && OpCode < TermOpsEnd;
}
+ static inline bool isUnaryOp(unsigned Opcode) {
+ return Opcode >= UnaryOpsBegin && Opcode < UnaryOpsEnd;
+ }
static inline bool isBinaryOp(unsigned Opcode) {
return Opcode >= BinaryOpsBegin && Opcode < BinaryOpsEnd;
}
@@ -182,6 +189,20 @@ public:
return OpCode >= FuncletPadOpsBegin && OpCode < FuncletPadOpsEnd;
}
+ /// Returns true if the OpCode is a terminator related to exception handling.
+ static inline bool isExceptionalTerminator(unsigned OpCode) {
+ switch (OpCode) {
+ case Instruction::CatchSwitch:
+ case Instruction::CatchRet:
+ case Instruction::CleanupRet:
+ case Instruction::Invoke:
+ case Instruction::Resume:
+ return true;
+ default:
+ return false;
+ }
+ }
+
//===--------------------------------------------------------------------===//
// Metadata manipulation.
//===--------------------------------------------------------------------===//
@@ -561,6 +582,10 @@ public:
}
}
+ /// Return true if the instruction is a llvm.lifetime.start or
+ /// llvm.lifetime.end marker.
+ bool isLifetimeStartOrEnd() const;
+
/// Return a pointer to the next non-debug instruction in the same basic
/// block as 'this', or nullptr if no such instruction exists.
const Instruction *getNextNonDebugInstruction() const;
@@ -569,6 +594,14 @@ public:
static_cast<const Instruction *>(this)->getNextNonDebugInstruction());
}
+ /// Return a pointer to the previous non-debug instruction in the same basic
+ /// block as 'this', or nullptr if no such instruction exists.
+ const Instruction *getPrevNonDebugInstruction() const;
+ Instruction *getPrevNonDebugInstruction() {
+ return const_cast<Instruction *>(
+ static_cast<const Instruction *>(this)->getPrevNonDebugInstruction());
+ }
+
/// Create a copy of 'this' instruction that is identical in all ways except
/// the following:
/// * The instruction has no parent
@@ -611,6 +644,16 @@ public:
/// operands in the corresponding predecessor block.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const;
+ /// Return the number of successors that this instruction has. The instruction
+ /// must be a terminator.
+ unsigned getNumSuccessors() const;
+
+ /// Return the specified successor. This instruction must be a terminator.
+ BasicBlock *getSuccessor(unsigned Idx) const;
+
+ /// Update the specified successor to point at the provided block. This
+ /// instruction must be a terminator.
+ void setSuccessor(unsigned Idx, BasicBlock *BB);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
@@ -627,6 +670,13 @@ public:
#include "llvm/IR/Instruction.def"
};
+ enum UnaryOps {
+#define FIRST_UNARY_INST(N) UnaryOpsBegin = N,
+#define HANDLE_UNARY_INST(N, OPC, CLASS) OPC = N,
+#define LAST_UNARY_INST(N) UnaryOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+ };
+
enum BinaryOps {
#define FIRST_BINARY_INST(N) BinaryOpsBegin = N,
#define HANDLE_BINARY_INST(N, OPC, CLASS) OPC = N,
diff --git a/contrib/llvm/include/llvm/IR/Instructions.h b/contrib/llvm/include/llvm/IR/Instructions.h
index 9be8bd1a07bc..0ff8f56f213a 100644
--- a/contrib/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm/include/llvm/IR/Instructions.h
@@ -175,47 +175,58 @@ protected:
LoadInst *cloneImpl() const;
public:
- LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore);
- LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
- LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile = false,
+ LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr = "",
Instruction *InsertBefore = nullptr);
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile = false,
- Instruction *InsertBefore = nullptr)
- : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
- NameStr, isVolatile, InsertBefore) {}
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+ LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
+ LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
+ Instruction *InsertBefore = nullptr);
+ LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
BasicBlock *InsertAtEnd);
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
- Instruction *InsertBefore = nullptr)
- : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
- NameStr, isVolatile, Align, InsertBefore) {}
LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
unsigned Align, Instruction *InsertBefore = nullptr);
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+ LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
unsigned Align, BasicBlock *InsertAtEnd);
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
- AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System,
- Instruction *InsertBefore = nullptr)
- : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
- NameStr, isVolatile, Align, Order, SSID, InsertBefore) {}
LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
unsigned Align, AtomicOrdering Order,
SyncScope::ID SSID = SyncScope::System,
Instruction *InsertBefore = nullptr);
- LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+ LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
unsigned Align, AtomicOrdering Order, SyncScope::ID SSID,
BasicBlock *InsertAtEnd);
- LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore);
- LoadInst(Value *Ptr, const char *NameStr, BasicBlock *InsertAtEnd);
- LoadInst(Type *Ty, Value *Ptr, const char *NameStr = nullptr,
- bool isVolatile = false, Instruction *InsertBefore = nullptr);
- explicit LoadInst(Value *Ptr, const char *NameStr = nullptr,
- bool isVolatile = false,
+
+ // Deprecated [opaque pointer types]
+ explicit LoadInst(Value *Ptr, const Twine &NameStr = "",
Instruction *InsertBefore = nullptr)
- : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
- NameStr, isVolatile, InsertBefore) {}
- LoadInst(Value *Ptr, const char *NameStr, bool isVolatile,
- BasicBlock *InsertAtEnd);
+ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+ InsertBefore) {}
+ LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd)
+ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+ InsertAtEnd) {}
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+ Instruction *InsertBefore = nullptr)
+ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+ isVolatile, InsertBefore) {}
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+ BasicBlock *InsertAtEnd)
+ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+ isVolatile, InsertAtEnd) {}
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+ Instruction *InsertBefore = nullptr)
+ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+ isVolatile, Align, InsertBefore) {}
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+ BasicBlock *InsertAtEnd)
+ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+ isVolatile, Align, InsertAtEnd) {}
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+ AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System,
+ Instruction *InsertBefore = nullptr)
+ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+ isVolatile, Align, Order, SSID, InsertBefore) {}
+ LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+ AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd)
+ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+ isVolatile, Align, Order, SSID, InsertAtEnd) {}
/// Return true if this is a load from a volatile memory location.
bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
@@ -735,6 +746,8 @@ public:
return static_cast<BinOp>(getSubclassDataFromInstruction() >> 5);
}
+ static StringRef getOperationName(BinOp Op);
+
void setOperation(BinOp Operation) {
unsigned short SubclassData = getSubclassDataFromInstruction();
setInstructionSubclassData((SubclassData & 31) |
@@ -1102,6 +1115,71 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
//===----------------------------------------------------------------------===//
+// UnaryOperator Class
+//===----------------------------------------------------------------------===//
+
+/// a unary instruction
+class UnaryOperator : public UnaryInstruction {
+ void AssertOK();
+
+protected:
+ UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
+ const Twine &Name, Instruction *InsertBefore);
+ UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
+ const Twine &Name, BasicBlock *InsertAtEnd);
+
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+
+ UnaryOperator *cloneImpl() const;
+
+public:
+
+ /// Construct a unary instruction, given the opcode and an operand.
+ /// Optionally (if InstBefore is specified) insert the instruction
+ /// into a BasicBlock right before the specified instruction. The specified
+ /// Instruction is allowed to be a dereferenced end iterator.
+ ///
+ static UnaryOperator *Create(UnaryOps Op, Value *S,
+ const Twine &Name = Twine(),
+ Instruction *InsertBefore = nullptr);
+
+ /// Construct a unary instruction, given the opcode and an operand.
+ /// Also automatically insert this instruction to the end of the
+ /// BasicBlock specified.
+ ///
+ static UnaryOperator *Create(UnaryOps Op, Value *S,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd);
+
+ /// These methods just forward to Create, and are useful when you
+ /// statically know what type of instruction you're going to create. These
+ /// helpers just save some typing.
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+ static UnaryInstruction *Create##OPC(Value *V, \
+ const Twine &Name = "") {\
+ return Create(Instruction::OPC, V, Name);\
+ }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+ static UnaryInstruction *Create##OPC(Value *V, \
+ const Twine &Name, BasicBlock *BB) {\
+ return Create(Instruction::OPC, V, Name, BB);\
+ }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+ static UnaryInstruction *Create##OPC(Value *V, \
+ const Twine &Name, Instruction *I) {\
+ return Create(Instruction::OPC, V, Name, I);\
+ }
+#include "llvm/IR/Instruction.def"
+
+ UnaryOps getOpcode() const {
+ return static_cast<UnaryOps>(Instruction::getOpcode());
+ }
+};
+
+//===----------------------------------------------------------------------===//
// ICmpInst Class
//===----------------------------------------------------------------------===//
@@ -1297,12 +1375,13 @@ public:
/// Constructor with no-insertion semantics
FCmpInst(
- Predicate pred, ///< The predicate to use for the comparison
+ Predicate Pred, ///< The predicate to use for the comparison
Value *LHS, ///< The left-hand-side of the expression
Value *RHS, ///< The right-hand-side of the expression
- const Twine &NameStr = "" ///< Name of the instruction
- ) : CmpInst(makeCmpResultType(LHS->getType()),
- Instruction::FCmp, pred, LHS, RHS, NameStr) {
+ const Twine &NameStr = "", ///< Name of the instruction
+ Instruction *FlagsSource = nullptr
+ ) : CmpInst(makeCmpResultType(LHS->getType()), Instruction::FCmp, Pred, LHS,
+ RHS, NameStr, nullptr, FlagsSource) {
AssertOK();
}
@@ -1350,537 +1429,13 @@ public:
}
};
-class CallInst;
-class InvokeInst;
-
-template <class T> struct CallBaseParent { using type = Instruction; };
-
-template <> struct CallBaseParent<InvokeInst> { using type = TerminatorInst; };
-
-//===----------------------------------------------------------------------===//
-/// Base class for all callable instructions (InvokeInst and CallInst)
-/// Holds everything related to calling a function, abstracting from the base
-/// type @p BaseInstTy and the concrete instruction @p InstTy
-///
-template <class InstTy>
-class CallBase : public CallBaseParent<InstTy>::type,
- public OperandBundleUser<InstTy, User::op_iterator> {
-protected:
- AttributeList Attrs; ///< parameter attributes for callable
- FunctionType *FTy;
- using BaseInstTy = typename CallBaseParent<InstTy>::type;
-
- template <class... ArgsTy>
- CallBase(AttributeList const &A, FunctionType *FT, ArgsTy &&... Args)
- : BaseInstTy(std::forward<ArgsTy>(Args)...), Attrs(A), FTy(FT) {}
- bool hasDescriptor() const { return Value::HasDescriptor; }
-
- using BaseInstTy::BaseInstTy;
-
- using OperandBundleUser<InstTy,
- User::op_iterator>::isFnAttrDisallowedByOpBundle;
- using OperandBundleUser<InstTy, User::op_iterator>::getNumTotalBundleOperands;
- using OperandBundleUser<InstTy, User::op_iterator>::bundleOperandHasAttr;
- using Instruction::getSubclassDataFromInstruction;
- using Instruction::setInstructionSubclassData;
-
-public:
- using Instruction::getContext;
- using OperandBundleUser<InstTy, User::op_iterator>::hasOperandBundles;
- using OperandBundleUser<InstTy,
- User::op_iterator>::getBundleOperandsStartIndex;
-
- static bool classof(const Instruction *I) {
- llvm_unreachable(
- "CallBase is not meant to be used as part of the classof hierarchy");
- }
-
-public:
- /// Return the parameter attributes for this call.
- ///
- AttributeList getAttributes() const { return Attrs; }
-
- /// Set the parameter attributes for this call.
- ///
- void setAttributes(AttributeList A) { Attrs = A; }
-
- FunctionType *getFunctionType() const { return FTy; }
-
- void mutateFunctionType(FunctionType *FTy) {
- Value::mutateType(FTy->getReturnType());
- this->FTy = FTy;
- }
-
- /// Return the number of call arguments.
- ///
- unsigned getNumArgOperands() const {
- return getNumOperands() - getNumTotalBundleOperands() - InstTy::ArgOffset;
- }
-
- /// getArgOperand/setArgOperand - Return/set the i-th call argument.
- ///
- Value *getArgOperand(unsigned i) const {
- assert(i < getNumArgOperands() && "Out of bounds!");
- return getOperand(i);
- }
- void setArgOperand(unsigned i, Value *v) {
- assert(i < getNumArgOperands() && "Out of bounds!");
- setOperand(i, v);
- }
-
- /// Return the iterator pointing to the beginning of the argument list.
- User::op_iterator arg_begin() { return op_begin(); }
-
- /// Return the iterator pointing to the end of the argument list.
- User::op_iterator arg_end() {
- // [ call args ], [ operand bundles ], callee
- return op_end() - getNumTotalBundleOperands() - InstTy::ArgOffset;
- }
-
- /// Iteration adapter for range-for loops.
- iterator_range<User::op_iterator> arg_operands() {
- return make_range(arg_begin(), arg_end());
- }
-
- /// Return the iterator pointing to the beginning of the argument list.
- User::const_op_iterator arg_begin() const { return op_begin(); }
-
- /// Return the iterator pointing to the end of the argument list.
- User::const_op_iterator arg_end() const {
- // [ call args ], [ operand bundles ], callee
- return op_end() - getNumTotalBundleOperands() - InstTy::ArgOffset;
- }
-
- /// Iteration adapter for range-for loops.
- iterator_range<User::const_op_iterator> arg_operands() const {
- return make_range(arg_begin(), arg_end());
- }
-
- /// Wrappers for getting the \c Use of a call argument.
- const Use &getArgOperandUse(unsigned i) const {
- assert(i < getNumArgOperands() && "Out of bounds!");
- return User::getOperandUse(i);
- }
- Use &getArgOperandUse(unsigned i) {
- assert(i < getNumArgOperands() && "Out of bounds!");
- return User::getOperandUse(i);
- }
-
- /// If one of the arguments has the 'returned' attribute, return its
- /// operand value. Otherwise, return nullptr.
- Value *getReturnedArgOperand() const {
- unsigned Index;
-
- if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
- return getArgOperand(Index - AttributeList::FirstArgIndex);
- if (const Function *F = getCalledFunction())
- if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
- Index)
- return getArgOperand(Index - AttributeList::FirstArgIndex);
-
- return nullptr;
- }
-
- User::op_iterator op_begin() {
- return OperandTraits<CallBase>::op_begin(this);
- }
-
- User::const_op_iterator op_begin() const {
- return OperandTraits<CallBase>::op_begin(const_cast<CallBase *>(this));
- }
-
- User::op_iterator op_end() { return OperandTraits<CallBase>::op_end(this); }
-
- User::const_op_iterator op_end() const {
- return OperandTraits<CallBase>::op_end(const_cast<CallBase *>(this));
- }
-
- Value *getOperand(unsigned i_nocapture) const {
- assert(i_nocapture < OperandTraits<CallBase>::operands(this) &&
- "getOperand() out of range!");
- return cast_or_null<Value>(OperandTraits<CallBase>::op_begin(
- const_cast<CallBase *>(this))[i_nocapture]
- .get());
- }
-
- void setOperand(unsigned i_nocapture, Value *Val_nocapture) {
- assert(i_nocapture < OperandTraits<CallBase>::operands(this) &&
- "setOperand() out of range!");
- OperandTraits<CallBase>::op_begin(this)[i_nocapture] = Val_nocapture;
- }
-
- unsigned getNumOperands() const {
- return OperandTraits<CallBase>::operands(this);
- }
- template <int Idx_nocapture> Use &Op() {
- return User::OpFrom<Idx_nocapture>(this);
- }
- template <int Idx_nocapture> const Use &Op() const {
- return User::OpFrom<Idx_nocapture>(this);
- }
-
- /// Return the function called, or null if this is an
- /// indirect function invocation.
- ///
- Function *getCalledFunction() const {
- return dyn_cast<Function>(Op<-InstTy::ArgOffset>());
- }
-
- /// Determine whether this call has the given attribute.
- bool hasFnAttr(Attribute::AttrKind Kind) const {
- assert(Kind != Attribute::NoBuiltin &&
- "Use CallBase::isNoBuiltin() to check for Attribute::NoBuiltin");
- return hasFnAttrImpl(Kind);
- }
-
- /// Determine whether this call has the given attribute.
- bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
-
- /// getCallingConv/setCallingConv - Get or set the calling convention of this
- /// function call.
- CallingConv::ID getCallingConv() const {
- return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 2);
- }
- void setCallingConv(CallingConv::ID CC) {
- auto ID = static_cast<unsigned>(CC);
- assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
- setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
- (ID << 2));
- }
-
-
- /// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Kind);
- setAttributes(PAL);
- }
-
- /// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute Attr) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Attr);
- setAttributes(PAL);
- }
-
- /// Adds the attribute to the indicated argument
- void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
- }
-
- /// Adds the attribute to the indicated argument
- void addParamAttr(unsigned ArgNo, Attribute Attr) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
- setAttributes(PAL);
- }
-
- /// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
- }
-
- /// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, StringRef Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
- }
-
- /// Removes the attribute from the given argument
- void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
- }
-
- /// Removes the attribute from the given argument
- void removeParamAttr(unsigned ArgNo, StringRef Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
- }
-
- /// adds the dereferenceable attribute to the list of attributes.
- void addDereferenceableAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
- setAttributes(PAL);
- }
-
- /// adds the dereferenceable_or_null attribute to the list of
- /// attributes.
- void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
- setAttributes(PAL);
- }
-
- /// Determine whether the return value has the given attribute.
- bool hasRetAttr(Attribute::AttrKind Kind) const {
- if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
- return true;
-
- // Look at the callee, if available.
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
- return false;
- }
-
- /// Determine whether the argument or parameter has the given attribute.
- bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- assert(ArgNo < getNumArgOperands() && "Param index out of bounds!");
-
- if (Attrs.hasParamAttribute(ArgNo, Kind))
- return true;
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasParamAttribute(ArgNo, Kind);
- return false;
- }
-
- /// Get the attribute of a given kind at a position.
- Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
- return getAttributes().getAttribute(i, Kind);
- }
-
- /// Get the attribute of a given kind at a position.
- Attribute getAttribute(unsigned i, StringRef Kind) const {
- return getAttributes().getAttribute(i, Kind);
- }
-
- /// Get the attribute of a given kind from a given arg
- Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- return getAttributes().getParamAttr(ArgNo, Kind);
- }
-
- /// Get the attribute of a given kind from a given arg
- Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- return getAttributes().getParamAttr(ArgNo, Kind);
- }
- /// Return true if the data operand at index \p i has the attribute \p
- /// A.
- ///
- /// Data operands include call arguments and values used in operand bundles,
- /// but does not include the callee operand. This routine dispatches to the
- /// underlying AttributeList or the OperandBundleUser as appropriate.
- ///
- /// The index \p i is interpreted as
- ///
- /// \p i == Attribute::ReturnIndex -> the return value
- /// \p i in [1, arg_size + 1) -> argument number (\p i - 1)
- /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
- /// (\p i - 1) in the operand list.
- bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
- // There are getNumOperands() - (InstTy::ArgOffset - 1) data operands.
- // The last operand is the callee.
- assert(i < (getNumOperands() - InstTy::ArgOffset + 1) &&
- "Data operand index out of bounds!");
-
- // The attribute A can either be directly specified, if the operand in
- // question is a call argument; or be indirectly implied by the kind of its
- // containing operand bundle, if the operand is a bundle operand.
-
- if (i == AttributeList::ReturnIndex)
- return hasRetAttr(Kind);
-
- // FIXME: Avoid these i - 1 calculations and update the API to use
- // zero-based indices.
- if (i < (getNumArgOperands() + 1))
- return paramHasAttr(i - 1, Kind);
-
- assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
- "Must be either a call argument or an operand bundle!");
- return bundleOperandHasAttr(i - 1, Kind);
- }
-
- /// Extract the alignment of the return value.
- unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
-
- /// Extract the alignment for a call or parameter (0=unknown).
- unsigned getParamAlignment(unsigned ArgNo) const {
- return Attrs.getParamAlignment(ArgNo);
- }
-
- /// Extract the number of dereferenceable bytes for a call or
- /// parameter (0=unknown).
- uint64_t getDereferenceableBytes(unsigned i) const {
- return Attrs.getDereferenceableBytes(i);
- }
-
- /// Extract the number of dereferenceable_or_null bytes for a call or
- /// parameter (0=unknown).
- uint64_t getDereferenceableOrNullBytes(unsigned i) const {
- return Attrs.getDereferenceableOrNullBytes(i);
- }
-
- /// Determine if the return value is marked with NoAlias attribute.
- bool returnDoesNotAlias() const {
- return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- }
-
- /// Return true if the call should not be treated as a call to a
- /// builtin.
- bool isNoBuiltin() const {
- return hasFnAttrImpl(Attribute::NoBuiltin) &&
- !hasFnAttrImpl(Attribute::Builtin);
- }
-
- /// Determine if the call requires strict floating point semantics.
- bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); }
-
- /// Return true if the call should not be inlined.
- bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
- void setIsNoInline() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
- }
- /// Determine if the call does not access memory.
- bool doesNotAccessMemory() const {
- return hasFnAttr(Attribute::ReadNone);
- }
- void setDoesNotAccessMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
- }
-
- /// Determine if the call does not access or only reads memory.
- bool onlyReadsMemory() const {
- return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
- }
- void setOnlyReadsMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
- }
-
- /// Determine if the call does not access or only writes memory.
- bool doesNotReadMemory() const {
- return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
- }
- void setDoesNotReadMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
- }
-
- /// Determine if the call can access memmory only using pointers based
- /// on its arguments.
- bool onlyAccessesArgMemory() const {
- return hasFnAttr(Attribute::ArgMemOnly);
- }
- void setOnlyAccessesArgMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
- }
-
- /// Determine if the function may only access memory that is
- /// inaccessible from the IR.
- bool onlyAccessesInaccessibleMemory() const {
- return hasFnAttr(Attribute::InaccessibleMemOnly);
- }
- void setOnlyAccessesInaccessibleMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
- }
-
- /// Determine if the function may only access memory that is
- /// either inaccessible from the IR or pointed to by its arguments.
- bool onlyAccessesInaccessibleMemOrArgMem() const {
- return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
- }
- void setOnlyAccessesInaccessibleMemOrArgMem() {
- addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOrArgMemOnly);
- }
- /// Determine if the call cannot return.
- bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
- void setDoesNotReturn() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
- }
-
- /// Determine if the call should not perform indirect branch tracking.
- bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
-
- /// Determine if the call cannot unwind.
- bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
- void setDoesNotThrow() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- }
-
- /// Determine if the invoke cannot be duplicated.
- bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
- void setCannotDuplicate() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
- }
-
- /// Determine if the invoke is convergent
- bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
- void setConvergent() {
- addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
- }
- void setNotConvergent() {
- removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
- }
-
- /// Determine if the call returns a structure through first
- /// pointer argument.
- bool hasStructRetAttr() const {
- if (getNumArgOperands() == 0)
- return false;
-
- // Be friendly and also check the callee.
- return paramHasAttr(0, Attribute::StructRet);
- }
-
- /// Determine if any call argument is an aggregate passed by value.
- bool hasByValArgument() const {
- return Attrs.hasAttrSomewhere(Attribute::ByVal);
- }
- /// Get a pointer to the function that is invoked by this
- /// instruction.
- const Value *getCalledValue() const { return Op<-InstTy::ArgOffset>(); }
- Value *getCalledValue() { return Op<-InstTy::ArgOffset>(); }
-
- /// Set the function called.
- void setCalledFunction(Value* Fn) {
- setCalledFunction(
- cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
- Fn);
- }
- void setCalledFunction(FunctionType *FTy, Value *Fn) {
- this->FTy = FTy;
- assert(FTy == cast<FunctionType>(
- cast<PointerType>(Fn->getType())->getElementType()));
- Op<-InstTy::ArgOffset>() = Fn;
- }
-
-protected:
- template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
- if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind))
- return true;
-
- // Operand bundles override attributes on the called function, but don't
- // override attributes directly present on the call instruction.
- if (isFnAttrDisallowedByOpBundle(Kind))
- return false;
-
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasAttribute(AttributeList::FunctionIndex,
- Kind);
- return false;
- }
-};
-
//===----------------------------------------------------------------------===//
/// This class represents a function call, abstracting a target
/// machine's calling convention. This class uses low bit of the SubClassData
/// field to indicate whether or not this is a tail call. The rest of the bits
/// hold the calling convention of the call.
///
-class CallInst : public CallBase<CallInst> {
- friend class OperandBundleUser<CallInst, User::op_iterator>;
-
+class CallInst : public CallBase {
CallInst(const CallInst &CI);
/// Construct a CallInst given a range of arguments.
@@ -1889,36 +1444,32 @@ class CallInst : public CallBase<CallInst> {
ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
Instruction *InsertBefore);
- inline CallInst(Value *Func, ArrayRef<Value *> Args,
- ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
- Instruction *InsertBefore)
- : CallInst(cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType()),
- Func, Args, Bundles, NameStr, InsertBefore) {}
-
- inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
- Instruction *InsertBefore)
- : CallInst(Func, Args, None, NameStr, InsertBefore) {}
+ inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr, Instruction *InsertBefore)
+ : CallInst(Ty, Func, Args, None, NameStr, InsertBefore) {}
/// Construct a CallInst given a range of arguments.
/// Construct a CallInst from a range of arguments
- inline CallInst(Value *Func, ArrayRef<Value *> Args,
+ inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
BasicBlock *InsertAtEnd);
- explicit CallInst(Value *F, const Twine &NameStr, Instruction *InsertBefore);
+ explicit CallInst(FunctionType *Ty, Value *F, const Twine &NameStr,
+ Instruction *InsertBefore);
- CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
+ CallInst(FunctionType *ty, Value *F, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
- void init(Value *Func, ArrayRef<Value *> Args,
- ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
- init(cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType()),
- Func, Args, Bundles, NameStr);
- }
void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
- void init(Value *Func, const Twine &NameStr);
+ void init(FunctionType *FTy, Value *Func, const Twine &NameStr);
+
+ /// Compute the number of operands to allocate.
+ static int ComputeNumOperands(int NumArgs, int NumBundleInputs = 0) {
+ // We need one operand for the called function, plus the input operand
+ // counts provided.
+ return 1 + NumArgs + NumBundleInputs;
+ }
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
@@ -1927,29 +1478,15 @@ protected:
CallInst *cloneImpl() const;
public:
- static constexpr int ArgOffset = 1;
-
- static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
- ArrayRef<OperandBundleDef> Bundles = None,
- const Twine &NameStr = "",
+ static CallInst *Create(FunctionType *Ty, Value *F, const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
- return Create(cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType()),
- Func, Args, Bundles, NameStr, InsertBefore);
- }
-
- static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
- const Twine &NameStr,
- Instruction *InsertBefore = nullptr) {
- return Create(cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType()),
- Func, Args, None, NameStr, InsertBefore);
+ return new (ComputeNumOperands(0)) CallInst(Ty, F, NameStr, InsertBefore);
}
static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
const Twine &NameStr,
Instruction *InsertBefore = nullptr) {
- return new (unsigned(Args.size() + 1))
+ return new (ComputeNumOperands(Args.size()))
CallInst(Ty, Func, Args, None, NameStr, InsertBefore);
}
@@ -1957,39 +1494,107 @@ public:
ArrayRef<OperandBundleDef> Bundles = None,
const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
- const unsigned TotalOps =
- unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+ const int NumOperands =
+ ComputeNumOperands(Args.size(), CountBundleInputs(Bundles));
const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
- return new (TotalOps, DescriptorBytes)
+ return new (NumOperands, DescriptorBytes)
CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore);
}
- static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ static CallInst *Create(FunctionType *Ty, Value *F, const Twine &NameStr,
+ BasicBlock *InsertAtEnd) {
+ return new (ComputeNumOperands(0)) CallInst(Ty, F, NameStr, InsertAtEnd);
+ }
+
+ static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return new (ComputeNumOperands(Args.size()))
+ CallInst(Ty, Func, Args, None, NameStr, InsertAtEnd);
+ }
+
+ static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles,
const Twine &NameStr, BasicBlock *InsertAtEnd) {
- const unsigned TotalOps =
- unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+ const int NumOperands =
+ ComputeNumOperands(Args.size(), CountBundleInputs(Bundles));
const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
- return new (TotalOps, DescriptorBytes)
- CallInst(Func, Args, Bundles, NameStr, InsertAtEnd);
+ return new (NumOperands, DescriptorBytes)
+ CallInst(Ty, Func, Args, Bundles, NameStr, InsertAtEnd);
}
- static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ static CallInst *Create(Function *Func, const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ return Create(Func->getFunctionType(), Func, NameStr, InsertBefore);
+ }
+
+ static CallInst *Create(Function *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ return Create(Func->getFunctionType(), Func, Args, NameStr, InsertBefore);
+ }
+
+ static CallInst *Create(Function *Func, const Twine &NameStr,
+ BasicBlock *InsertAtEnd) {
+ return Create(Func->getFunctionType(), Func, NameStr, InsertAtEnd);
+ }
+
+ static CallInst *Create(Function *Func, ArrayRef<Value *> Args,
const Twine &NameStr, BasicBlock *InsertAtEnd) {
- return new (unsigned(Args.size() + 1))
- CallInst(Func, Args, None, NameStr, InsertAtEnd);
+ return Create(Func->getFunctionType(), Func, Args, NameStr, InsertAtEnd);
}
- static CallInst *Create(Value *F, const Twine &NameStr = "",
+ // Deprecated [opaque pointer types]
+ static CallInst *Create(Value *Func, const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, NameStr, InsertBefore);
+ }
+
+ // Deprecated [opaque pointer types]
+ static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr,
Instruction *InsertBefore = nullptr) {
- return new (1) CallInst(F, NameStr, InsertBefore);
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, Args, NameStr, InsertBefore);
}
- static CallInst *Create(Value *F, const Twine &NameStr,
+ // Deprecated [opaque pointer types]
+ static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, Args, Bundles, NameStr, InsertBefore);
+ }
+
+ // Deprecated [opaque pointer types]
+ static CallInst *Create(Value *Func, const Twine &NameStr,
BasicBlock *InsertAtEnd) {
- return new (1) CallInst(F, NameStr, InsertAtEnd);
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, NameStr, InsertAtEnd);
+ }
+
+ // Deprecated [opaque pointer types]
+ static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, Args, NameStr, InsertAtEnd);
+ }
+
+ // Deprecated [opaque pointer types]
+ static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, Args, Bundles, NameStr, InsertAtEnd);
}
/// Create a clone of \p CI with a different set of operand bundles and
@@ -2080,7 +1685,7 @@ public:
}
/// Check if this call is an inline asm statement.
- bool isInlineAsm() const { return isa<InlineAsm>(Op<-1>()); }
+ bool isInlineAsm() const { return isa<InlineAsm>(getCalledOperand()); }
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
@@ -2098,32 +1703,25 @@ private:
}
};
-template <>
-struct OperandTraits<CallBase<CallInst>>
- : public VariadicOperandTraits<CallBase<CallInst>, 1> {};
-
-CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
+CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
BasicBlock *InsertAtEnd)
- : CallBase<CallInst>(
- cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType())
- ->getReturnType(),
- Instruction::Call,
- OperandTraits<CallBase<CallInst>>::op_end(this) -
- (Args.size() + CountBundleInputs(Bundles) + 1),
- unsigned(Args.size() + CountBundleInputs(Bundles) + 1), InsertAtEnd) {
- init(Func, Args, Bundles, NameStr);
+ : CallBase(Ty->getReturnType(), Instruction::Call,
+ OperandTraits<CallBase>::op_end(this) -
+ (Args.size() + CountBundleInputs(Bundles) + 1),
+ unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
+ InsertAtEnd) {
+ init(Ty, Func, Args, Bundles, NameStr);
}
CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
Instruction *InsertBefore)
- : CallBase<CallInst>(Ty->getReturnType(), Instruction::Call,
- OperandTraits<CallBase<CallInst>>::op_end(this) -
- (Args.size() + CountBundleInputs(Bundles) + 1),
- unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
- InsertBefore) {
+ : CallBase(Ty->getReturnType(), Instruction::Call,
+ OperandTraits<CallBase>::op_end(this) -
+ (Args.size() + CountBundleInputs(Bundles) + 1),
+ unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
+ InsertBefore) {
init(Ty, Func, Args, Bundles, NameStr);
}
@@ -2456,14 +2054,24 @@ public:
}
/// Return true if this shuffle returns a vector with a different number of
- /// elements than its source elements.
- /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2>
+ /// elements than its source vectors.
+ /// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3>
+ /// shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5>
bool changesLength() const {
unsigned NumSourceElts = Op<0>()->getType()->getVectorNumElements();
unsigned NumMaskElts = getMask()->getType()->getVectorNumElements();
return NumSourceElts != NumMaskElts;
}
+ /// Return true if this shuffle returns a vector with a greater number of
+ /// elements than its source vectors.
+ /// Example: shufflevector <2 x n> A, <2 x n> B, <1,2,3>
+ bool increasesLength() const {
+ unsigned NumSourceElts = Op<0>()->getType()->getVectorNumElements();
+ unsigned NumMaskElts = getMask()->getType()->getVectorNumElements();
+ return NumSourceElts < NumMaskElts;
+ }
+
/// Return true if this shuffle mask chooses elements from exactly one source
/// vector.
/// Example: <7,5,undef,7>
@@ -2497,15 +2105,27 @@ public:
return isIdentityMask(MaskAsInts);
}
- /// Return true if this shuffle mask chooses elements from exactly one source
+ /// Return true if this shuffle chooses elements from exactly one source
/// vector without lane crossings and does not change the number of elements
/// from its input vectors.
/// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
- /// TODO: Optionally allow length-changing shuffles.
bool isIdentity() const {
return !changesLength() && isIdentityMask(getShuffleMask());
}
+ /// Return true if this shuffle lengthens exactly one source vector with
+ /// undefs in the high elements.
+ bool isIdentityWithPadding() const;
+
+ /// Return true if this shuffle extracts the first N elements of exactly one
+ /// source vector.
+ bool isIdentityWithExtract() const;
+
+ /// Return true if this shuffle concatenates its 2 source vectors. This
+ /// returns false if either input is undefined. In that case, the shuffle is
+ /// is better classified as an identity with padding operation.
+ bool isConcat() const;
+
/// Return true if this shuffle mask chooses elements from its source vectors
/// without lane crossings. A shuffle using this mask would be
/// equivalent to a vector select with a constant condition operand.
@@ -2625,6 +2245,25 @@ public:
return !changesLength() && isTransposeMask(getMask());
}
+ /// Return true if this shuffle mask is an extract subvector mask.
+ /// A valid extract subvector mask returns a smaller vector from a single
+ /// source operand. The base extraction index is returned as well.
+ static bool isExtractSubvectorMask(ArrayRef<int> Mask, int NumSrcElts,
+ int &Index);
+ static bool isExtractSubvectorMask(const Constant *Mask, int NumSrcElts,
+ int &Index) {
+ assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+ SmallVector<int, 16> MaskAsInts;
+ getShuffleMask(Mask, MaskAsInts);
+ return isExtractSubvectorMask(MaskAsInts, NumSrcElts, Index);
+ }
+
+ /// Return true if this shuffle mask is an extract subvector mask.
+ bool isExtractSubvectorMask(int &Index) const {
+ int NumSrcElts = Op<0>()->getType()->getVectorNumElements();
+ return isExtractSubvectorMask(getMask(), NumSrcElts, Index);
+ }
+
/// Change values in a shuffle permute mask assuming the two vector operands
/// of length InVecNumElts have swapped position.
static void commuteShuffleMask(MutableArrayRef<int> Mask,
@@ -3241,7 +2880,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LandingPadInst, Value)
/// Return a value (possibly void), from a function. Execution
/// does not continue in this function any longer.
///
-class ReturnInst : public TerminatorInst {
+class ReturnInst : public Instruction {
ReturnInst(const ReturnInst &RI);
private:
@@ -3301,8 +2940,6 @@ public:
}
private:
- friend TerminatorInst;
-
BasicBlock *getSuccessor(unsigned idx) const {
llvm_unreachable("ReturnInst has no successors!");
}
@@ -3325,7 +2962,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)
//===---------------------------------------------------------------------------
/// Conditional or Unconditional Branch instruction.
///
-class BranchInst : public TerminatorInst {
+class BranchInst : public Instruction {
/// Ops list - Branches are strange. The operands are ordered:
/// [Cond, FalseDest,] TrueDest. This makes some accessors faster because
/// they don't have to check for cond/uncond branchness. These are mostly
@@ -3354,6 +2991,33 @@ protected:
BranchInst *cloneImpl() const;
public:
+ /// Iterator type that casts an operand to a basic block.
+ ///
+ /// This only makes sense because the successors are stored as adjacent
+ /// operands for branch instructions.
+ struct succ_op_iterator
+ : iterator_adaptor_base<succ_op_iterator, value_op_iterator,
+ std::random_access_iterator_tag, BasicBlock *,
+ ptrdiff_t, BasicBlock *, BasicBlock *> {
+ explicit succ_op_iterator(value_op_iterator I) : iterator_adaptor_base(I) {}
+
+ BasicBlock *operator*() const { return cast<BasicBlock>(*I); }
+ BasicBlock *operator->() const { return operator*(); }
+ };
+
+ /// The const version of `succ_op_iterator`.
+ struct const_succ_op_iterator
+ : iterator_adaptor_base<const_succ_op_iterator, const_value_op_iterator,
+ std::random_access_iterator_tag,
+ const BasicBlock *, ptrdiff_t, const BasicBlock *,
+ const BasicBlock *> {
+ explicit const_succ_op_iterator(const_value_op_iterator I)
+ : iterator_adaptor_base(I) {}
+
+ const BasicBlock *operator*() const { return cast<BasicBlock>(*I); }
+ const BasicBlock *operator->() const { return operator*(); }
+ };
+
static BranchInst *Create(BasicBlock *IfTrue,
Instruction *InsertBefore = nullptr) {
return new(1) BranchInst(IfTrue, InsertBefore);
@@ -3408,6 +3072,18 @@ public:
/// continues to map correctly to each operand.
void swapSuccessors();
+ iterator_range<succ_op_iterator> successors() {
+ return make_range(
+ succ_op_iterator(std::next(value_op_begin(), isConditional() ? 1 : 0)),
+ succ_op_iterator(value_op_end()));
+ }
+
+ iterator_range<const_succ_op_iterator> successors() const {
+ return make_range(const_succ_op_iterator(
+ std::next(value_op_begin(), isConditional() ? 1 : 0)),
+ const_succ_op_iterator(value_op_end()));
+ }
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::Br);
@@ -3430,7 +3106,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
//===---------------------------------------------------------------------------
/// Multiway switch
///
-class SwitchInst : public TerminatorInst {
+class SwitchInst : public Instruction {
unsigned ReservedSpace;
// Operand[0] = Value to switch on
@@ -3513,7 +3189,7 @@ public:
/// Returns number of current case.
unsigned getCaseIndex() const { return Index; }
- /// Returns TerminatorInst's successor index for current case successor.
+ /// Returns successor index for current case successor.
unsigned getSuccessorIndex() const {
assert(((unsigned)Index == DefaultPseudoIndex ||
(unsigned)Index < SI->getNumCases()) &&
@@ -3569,7 +3245,7 @@ public:
CaseIteratorImpl(SwitchInstT *SI, unsigned CaseNum) : Case(SI, CaseNum) {}
/// Initializes case iterator for given SwitchInst and for given
- /// TerminatorInst's successor index.
+ /// successor index.
static CaseIteratorImpl fromSuccessorIndex(SwitchInstT *SI,
unsigned SuccessorIndex) {
assert(SuccessorIndex < SI->getNumSuccessors() &&
@@ -3787,7 +3463,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
//===---------------------------------------------------------------------------
/// Indirect Branch Instruction.
///
-class IndirectBrInst : public TerminatorInst {
+class IndirectBrInst : public Instruction {
unsigned ReservedSpace;
// Operand[0] = Address to jump to
@@ -3821,6 +3497,33 @@ protected:
IndirectBrInst *cloneImpl() const;
public:
+ /// Iterator type that casts an operand to a basic block.
+ ///
+ /// This only makes sense because the successors are stored as adjacent
+ /// operands for indirectbr instructions.
+ struct succ_op_iterator
+ : iterator_adaptor_base<succ_op_iterator, value_op_iterator,
+ std::random_access_iterator_tag, BasicBlock *,
+ ptrdiff_t, BasicBlock *, BasicBlock *> {
+ explicit succ_op_iterator(value_op_iterator I) : iterator_adaptor_base(I) {}
+
+ BasicBlock *operator*() const { return cast<BasicBlock>(*I); }
+ BasicBlock *operator->() const { return operator*(); }
+ };
+
+ /// The const version of `succ_op_iterator`.
+ struct const_succ_op_iterator
+ : iterator_adaptor_base<const_succ_op_iterator, const_value_op_iterator,
+ std::random_access_iterator_tag,
+ const BasicBlock *, ptrdiff_t, const BasicBlock *,
+ const BasicBlock *> {
+ explicit const_succ_op_iterator(const_value_op_iterator I)
+ : iterator_adaptor_base(I) {}
+
+ const BasicBlock *operator*() const { return cast<BasicBlock>(*I); }
+ const BasicBlock *operator->() const { return operator*(); }
+ };
+
static IndirectBrInst *Create(Value *Address, unsigned NumDests,
Instruction *InsertBefore = nullptr) {
return new IndirectBrInst(Address, NumDests, InsertBefore);
@@ -3863,6 +3566,16 @@ public:
setOperand(i + 1, NewSucc);
}
+ iterator_range<succ_op_iterator> successors() {
+ return make_range(succ_op_iterator(std::next(value_op_begin())),
+ succ_op_iterator(value_op_end()));
+ }
+
+ iterator_range<const_succ_op_iterator> successors() const {
+ return make_range(const_succ_op_iterator(std::next(value_op_begin())),
+ const_succ_op_iterator(value_op_end()));
+ }
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::IndirectBr;
@@ -3885,48 +3598,43 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
/// Invoke instruction. The SubclassData field is used to hold the
/// calling convention of the call.
///
-class InvokeInst : public CallBase<InvokeInst> {
- friend class OperandBundleUser<InvokeInst, User::op_iterator>;
+class InvokeInst : public CallBase {
+ /// The number of operands for this call beyond the called function,
+ /// arguments, and operand bundles.
+ static constexpr int NumExtraOperands = 2;
+
+ /// The index from the end of the operand array to the normal destination.
+ static constexpr int NormalDestOpEndIdx = -3;
+
+ /// The index from the end of the operand array to the unwind destination.
+ static constexpr int UnwindDestOpEndIdx = -2;
InvokeInst(const InvokeInst &BI);
/// Construct an InvokeInst given a range of arguments.
///
/// Construct an InvokeInst from a range of arguments
- inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
- unsigned Values, const Twine &NameStr,
- Instruction *InsertBefore)
- : InvokeInst(cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType()),
- Func, IfNormal, IfException, Args, Bundles, Values, NameStr,
- InsertBefore) {}
-
inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
- ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+ ArrayRef<OperandBundleDef> Bundles, int NumOperands,
const Twine &NameStr, Instruction *InsertBefore);
- /// Construct an InvokeInst given a range of arguments.
- ///
- /// Construct an InvokeInst from a range of arguments
- inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
- unsigned Values, const Twine &NameStr,
- BasicBlock *InsertAtEnd);
-
- void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
- const Twine &NameStr) {
- init(cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType()),
- Func, IfNormal, IfException, Args, Bundles, NameStr);
- }
+ inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+ const Twine &NameStr, BasicBlock *InsertAtEnd);
- void init(FunctionType *FTy, Value *Func, BasicBlock *IfNormal,
+ void init(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
+ /// Compute the number of operands to allocate.
+ static int ComputeNumOperands(int NumArgs, int NumBundleInputs = 0) {
+ // We need one operand for the called function, plus our extra operands and
+ // the input operand counts provided.
+ return 1 + NumExtraOperands + NumArgs + NumBundleInputs;
+ }
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -3934,69 +3642,125 @@ protected:
InvokeInst *cloneImpl() const;
public:
- static constexpr int ArgOffset = 3;
- static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+ static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
const Twine &NameStr,
Instruction *InsertBefore = nullptr) {
- return Create(cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType()),
- Func, IfNormal, IfException, Args, None, NameStr,
- InsertBefore);
+ int NumOperands = ComputeNumOperands(Args.size());
+ return new (NumOperands)
+ InvokeInst(Ty, Func, IfNormal, IfException, Args, None, NumOperands,
+ NameStr, InsertBefore);
}
- static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+ static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles = None,
const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
- return Create(cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType()),
- Func, IfNormal, IfException, Args, Bundles, NameStr,
- InsertBefore);
+ int NumOperands =
+ ComputeNumOperands(Args.size(), CountBundleInputs(Bundles));
+ unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+ return new (NumOperands, DescriptorBytes)
+ InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, NumOperands,
+ NameStr, InsertBefore);
+ }
+
+ static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ int NumOperands = ComputeNumOperands(Args.size());
+ return new (NumOperands)
+ InvokeInst(Ty, Func, IfNormal, IfException, Args, None, NumOperands,
+ NameStr, InsertAtEnd);
}
static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ int NumOperands =
+ ComputeNumOperands(Args.size(), CountBundleInputs(Bundles));
+ unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+ return new (NumOperands, DescriptorBytes)
+ InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, NumOperands,
+ NameStr, InsertAtEnd);
+ }
+
+ static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
const Twine &NameStr,
Instruction *InsertBefore = nullptr) {
- unsigned Values = unsigned(Args.size()) + 3;
- return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args, None,
- Values, NameStr, InsertBefore);
+ return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
+ None, NameStr, InsertBefore);
}
- static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+ static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles = None,
const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
- unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3;
- unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+ return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
+ Bundles, NameStr, InsertBefore);
+ }
- return new (Values, DescriptorBytes)
- InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, Values,
- NameStr, InsertBefore);
+ static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
+ NameStr, InsertAtEnd);
}
- static InvokeInst *Create(Value *Func,
- BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, const Twine &NameStr,
- BasicBlock *InsertAtEnd) {
- unsigned Values = unsigned(Args.size()) + 3;
- return new (Values) InvokeInst(Func, IfNormal, IfException, Args, None,
- Values, NameStr, InsertAtEnd);
+ static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
+ Bundles, NameStr, InsertAtEnd);
}
+ // Deprecated [opaque pointer types]
+ static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ const Twine &NameStr,
+ Instruction *InsertBefore = nullptr) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, IfNormal, IfException, Args, None, NameStr,
+ InsertBefore);
+ }
+
+ // Deprecated [opaque pointer types]
+ static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, IfNormal, IfException, Args, Bundles, NameStr,
+ InsertBefore);
+ }
+
+ // Deprecated [opaque pointer types]
static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
- ArrayRef<OperandBundleDef> Bundles,
const Twine &NameStr, BasicBlock *InsertAtEnd) {
- unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3;
- unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, IfNormal, IfException, Args, NameStr, InsertAtEnd);
+ }
- return new (Values, DescriptorBytes)
- InvokeInst(Func, IfNormal, IfException, Args, Bundles, Values, NameStr,
- InsertAtEnd);
+ // Deprecated [opaque pointer types]
+ static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, IfNormal, IfException, Args, Bundles, NameStr,
+ InsertAtEnd);
}
/// Create a clone of \p II with a different set of operand bundles and
@@ -4017,43 +3781,18 @@ public:
addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
}
- /// Return the function called, or null if this is an
- /// indirect function invocation.
- ///
- Function *getCalledFunction() const {
- return dyn_cast<Function>(Op<-3>());
- }
-
- /// Get a pointer to the function that is invoked by this
- /// instruction
- const Value *getCalledValue() const { return Op<-3>(); }
- Value *getCalledValue() { return Op<-3>(); }
-
- /// Set the function called.
- void setCalledFunction(Value* Fn) {
- setCalledFunction(
- cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
- Fn);
- }
- void setCalledFunction(FunctionType *FTy, Value *Fn) {
- this->FTy = FTy;
- assert(FTy == cast<FunctionType>(
- cast<PointerType>(Fn->getType())->getElementType()));
- Op<-3>() = Fn;
- }
-
// get*Dest - Return the destination basic blocks...
BasicBlock *getNormalDest() const {
- return cast<BasicBlock>(Op<-2>());
+ return cast<BasicBlock>(Op<NormalDestOpEndIdx>());
}
BasicBlock *getUnwindDest() const {
- return cast<BasicBlock>(Op<-1>());
+ return cast<BasicBlock>(Op<UnwindDestOpEndIdx>());
}
void setNormalDest(BasicBlock *B) {
- Op<-2>() = reinterpret_cast<Value*>(B);
+ Op<NormalDestOpEndIdx>() = reinterpret_cast<Value *>(B);
}
void setUnwindDest(BasicBlock *B) {
- Op<-1>() = reinterpret_cast<Value*>(B);
+ Op<UnwindDestOpEndIdx>() = reinterpret_cast<Value *>(B);
}
/// Get the landingpad instruction from the landing pad
@@ -4065,9 +3804,12 @@ public:
return i == 0 ? getNormalDest() : getUnwindDest();
}
- void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
- assert(idx < 2 && "Successor # out of range for invoke!");
- *(&Op<-2>() + idx) = reinterpret_cast<Value*>(NewSucc);
+ void setSuccessor(unsigned i, BasicBlock *NewSucc) {
+ assert(i < 2 && "Successor # out of range for invoke!");
+ if (i == 0)
+ setNormalDest(NewSucc);
+ else
+ setUnwindDest(NewSucc);
}
unsigned getNumSuccessors() const { return 2; }
@@ -4089,36 +3831,26 @@ private:
}
};
-template <>
-struct OperandTraits<CallBase<InvokeInst>>
- : public VariadicOperandTraits<CallBase<InvokeInst>, 3> {};
-
InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
- ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+ ArrayRef<OperandBundleDef> Bundles, int NumOperands,
const Twine &NameStr, Instruction *InsertBefore)
- : CallBase<InvokeInst>(Ty->getReturnType(), Instruction::Invoke,
- OperandTraits<CallBase<InvokeInst>>::op_end(this) -
- Values,
- Values, InsertBefore) {
+ : CallBase(Ty->getReturnType(), Instruction::Invoke,
+ OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+ InsertBefore) {
init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
}
-InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal,
+InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
- ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+ ArrayRef<OperandBundleDef> Bundles, int NumOperands,
const Twine &NameStr, BasicBlock *InsertAtEnd)
- : CallBase<InvokeInst>(
- cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType())
- ->getReturnType(),
- Instruction::Invoke,
- OperandTraits<CallBase<InvokeInst>>::op_end(this) - Values, Values,
- InsertAtEnd) {
- init(Func, IfNormal, IfException, Args, Bundles, NameStr);
+ : CallBase(Ty->getReturnType(), Instruction::Invoke,
+ OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+ InsertAtEnd) {
+ init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
}
-
//===----------------------------------------------------------------------===//
// ResumeInst Class
//===----------------------------------------------------------------------===//
@@ -4126,7 +3858,7 @@ InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal,
//===---------------------------------------------------------------------------
/// Resume the propagation of an exception.
///
-class ResumeInst : public TerminatorInst {
+class ResumeInst : public Instruction {
ResumeInst(const ResumeInst &RI);
explicit ResumeInst(Value *Exn, Instruction *InsertBefore=nullptr);
@@ -4164,8 +3896,6 @@ public:
}
private:
- friend TerminatorInst;
-
BasicBlock *getSuccessor(unsigned idx) const {
llvm_unreachable("ResumeInst has no successors!");
}
@@ -4185,7 +3915,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
//===----------------------------------------------------------------------===//
// CatchSwitchInst Class
//===----------------------------------------------------------------------===//
-class CatchSwitchInst : public TerminatorInst {
+class CatchSwitchInst : public Instruction {
/// The number of operands actually allocated. NumOperands is
/// the number actually in use.
unsigned ReservedSpace;
@@ -4451,7 +4181,7 @@ public:
// CatchReturnInst Class
//===----------------------------------------------------------------------===//
-class CatchReturnInst : public TerminatorInst {
+class CatchReturnInst : public Instruction {
CatchReturnInst(const CatchReturnInst &RI);
CatchReturnInst(Value *CatchPad, BasicBlock *BB, Instruction *InsertBefore);
CatchReturnInst(Value *CatchPad, BasicBlock *BB, BasicBlock *InsertAtEnd);
@@ -4511,8 +4241,6 @@ public:
}
private:
- friend TerminatorInst;
-
BasicBlock *getSuccessor(unsigned Idx) const {
assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
return getSuccessor();
@@ -4534,7 +4262,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchReturnInst, Value)
// CleanupReturnInst Class
//===----------------------------------------------------------------------===//
-class CleanupReturnInst : public TerminatorInst {
+class CleanupReturnInst : public Instruction {
private:
CleanupReturnInst(const CleanupReturnInst &RI);
CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values,
@@ -4607,8 +4335,6 @@ public:
}
private:
- friend TerminatorInst;
-
BasicBlock *getSuccessor(unsigned Idx) const {
assert(Idx == 0);
return getUnwindDest();
@@ -4641,7 +4367,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CleanupReturnInst, Value)
/// presence of this instruction indicates some higher level knowledge that the
/// end of the block cannot be reached.
///
-class UnreachableInst : public TerminatorInst {
+class UnreachableInst : public Instruction {
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -4668,8 +4394,6 @@ public:
}
private:
- friend TerminatorInst;
-
BasicBlock *getSuccessor(unsigned idx) const {
llvm_unreachable("UnreachableInst has no successors!");
}
@@ -5248,6 +4972,25 @@ inline Value *getPointerOperand(Value *V) {
return nullptr;
}
+/// A helper function that returns the alignment of load or store instruction.
+inline unsigned getLoadStoreAlignment(Value *I) {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ "Expected Load or Store instruction");
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return LI->getAlignment();
+ return cast<StoreInst>(I)->getAlignment();
+}
+
+/// A helper function that returns the address space of the pointer operand of
+/// load or store instruction.
+inline unsigned getLoadStoreAddressSpace(Value *I) {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ "Expected Load or Store instruction");
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerAddressSpace();
+ return cast<StoreInst>(I)->getPointerAddressSpace();
+}
+
} // end namespace llvm
#endif // LLVM_IR_INSTRUCTIONS_H
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
index 6650afcca7fb..80a7a7052574 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
@@ -66,6 +66,27 @@ namespace llvm {
/// This is the common base class for debug info intrinsics.
class DbgInfoIntrinsic : public IntrinsicInst {
public:
+ /// \name Casting methods
+ /// @{
+ static bool classof(const IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::dbg_addr:
+ case Intrinsic::dbg_label:
+ return true;
+ default: return false;
+ }
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ /// @}
+ };
+
+ /// This is the common base class for debug info intrinsics for variables.
+ class DbgVariableIntrinsic : public DbgInfoIntrinsic {
+ public:
/// Get the location corresponding to the variable referenced by the debug
/// info intrinsic. Depending on the intrinsic, this could be the
/// variable's value or its address.
@@ -104,7 +125,6 @@ namespace llvm {
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_addr:
- case Intrinsic::dbg_label:
return true;
default: return false;
}
@@ -116,7 +136,7 @@ namespace llvm {
};
/// This represents the llvm.dbg.declare instruction.
- class DbgDeclareInst : public DbgInfoIntrinsic {
+ class DbgDeclareInst : public DbgVariableIntrinsic {
public:
Value *getAddress() const { return getVariableLocation(); }
@@ -132,7 +152,7 @@ namespace llvm {
};
/// This represents the llvm.dbg.addr instruction.
- class DbgAddrIntrinsic : public DbgInfoIntrinsic {
+ class DbgAddrIntrinsic : public DbgVariableIntrinsic {
public:
Value *getAddress() const { return getVariableLocation(); }
@@ -147,7 +167,7 @@ namespace llvm {
};
/// This represents the llvm.dbg.value instruction.
- class DbgValueInst : public DbgInfoIntrinsic {
+ class DbgValueInst : public DbgVariableIntrinsic {
public:
Value *getValue() const {
return getVariableLocation(/* AllowNullOp = */ false);
@@ -168,17 +188,13 @@ namespace llvm {
class DbgLabelInst : public DbgInfoIntrinsic {
public:
DILabel *getLabel() const {
- return cast<DILabel>(getRawVariable());
+ return cast<DILabel>(getRawLabel());
}
- Metadata *getRawVariable() const {
+ Metadata *getRawLabel() const {
return cast<MetadataAsValue>(getArgOperand(0))->getMetadata();
}
- Metadata *getRawExpression() const {
- return nullptr;
- }
-
/// Methods for support type inquiry through isa, cast, and dyn_cast:
/// @{
static bool classof(const IntrinsicInst *I) {
@@ -235,6 +251,12 @@ namespace llvm {
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_trunc:
return true;
default: return false;
}
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td
index 0cec754dd649..64603d8ea030 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.td
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.td
@@ -90,6 +90,10 @@ class ReadNone<int argNo> : IntrinsicProperty {
def IntrNoReturn : IntrinsicProperty;
+// IntrCold - Calls to this intrinsic are cold.
+// Parallels the cold attribute on LLVM IR functions.
+def IntrCold : IntrinsicProperty;
+
// IntrNoduplicate - Calls to this intrinsic cannot be duplicated.
// Parallels the noduplicate attribute on LLVM IR functions.
def IntrNoDuplicate : IntrinsicProperty;
@@ -315,11 +319,84 @@ def int_gcwrite : Intrinsic<[],
[llvm_ptr_ty, llvm_ptr_ty, llvm_ptrptr_ty],
[IntrArgMemOnly, NoCapture<1>, NoCapture<2>]>;
+//===------------------- ObjC ARC runtime Intrinsics --------------------===//
+//
+// Note these are to support the Objective-C ARC optimizer which wants to
+// eliminate retain and releases where possible.
+
+def int_objc_autorelease : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_autoreleasePoolPop : Intrinsic<[], [llvm_ptr_ty]>;
+def int_objc_autoreleasePoolPush : Intrinsic<[llvm_ptr_ty], []>;
+def int_objc_autoreleaseReturnValue : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_copyWeak : Intrinsic<[],
+ [llvm_ptrptr_ty,
+ llvm_ptrptr_ty]>;
+def int_objc_destroyWeak : Intrinsic<[], [llvm_ptrptr_ty]>;
+def int_objc_initWeak : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptrptr_ty,
+ llvm_ptr_ty]>;
+def int_objc_loadWeak : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptrptr_ty]>;
+def int_objc_loadWeakRetained : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptrptr_ty]>;
+def int_objc_moveWeak : Intrinsic<[],
+ [llvm_ptrptr_ty,
+ llvm_ptrptr_ty]>;
+def int_objc_release : Intrinsic<[], [llvm_ptr_ty]>;
+def int_objc_retain : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_retainAutorelease : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_retainAutoreleaseReturnValue : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_retainAutoreleasedReturnValue : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_retainBlock : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_storeStrong : Intrinsic<[],
+ [llvm_ptrptr_ty,
+ llvm_ptr_ty]>;
+def int_objc_storeWeak : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptrptr_ty,
+ llvm_ptr_ty]>;
+def int_objc_clang_arc_use : Intrinsic<[],
+ [llvm_vararg_ty]>;
+def int_objc_unsafeClaimAutoreleasedReturnValue : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_retainedObject : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_unretainedObject : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_unretainedPointer : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_retain_autorelease : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty]>;
+def int_objc_sync_enter : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty]>;
+def int_objc_sync_exit : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty]>;
+def int_objc_arc_annotation_topdown_bbstart : Intrinsic<[],
+ [llvm_ptrptr_ty,
+ llvm_ptrptr_ty]>;
+def int_objc_arc_annotation_topdown_bbend : Intrinsic<[],
+ [llvm_ptrptr_ty,
+ llvm_ptrptr_ty]>;
+def int_objc_arc_annotation_bottomup_bbstart : Intrinsic<[],
+ [llvm_ptrptr_ty,
+ llvm_ptrptr_ty]>;
+def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[],
+ [llvm_ptrptr_ty,
+ llvm_ptrptr_ty]>;
+
+
//===--------------------- Code Generator Intrinsics ----------------------===//
//
def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_sponentry : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
[IntrReadMem], "llvm.read_register">;
def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty],
@@ -337,6 +414,13 @@ def int_localescape : Intrinsic<[], [llvm_vararg_ty]>;
def int_localrecover : Intrinsic<[llvm_ptr_ty],
[llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrNoMem]>;
+
+// Given the frame pointer passed into an SEH filter function, returns a
+// pointer to the local variable area suitable for use with llvm.localrecover.
+def int_eh_recoverfp : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty, llvm_ptr_ty],
+ [IntrNoMem]>;
+
// Note: we treat stacksave/stackrestore as writemem because we don't otherwise
// model their dependencies on allocas.
def int_stacksave : Intrinsic<[llvm_ptr_ty]>,
@@ -453,6 +537,14 @@ def int_maxnum : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, Commutative]
>;
+def int_minimum : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, Commutative]
+>;
+def int_maximum : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, Commutative]
+>;
// NOTE: these are internal interfaces.
def int_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
@@ -557,9 +649,35 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_minnum : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_ceil : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_floor : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_trunc : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
}
// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
-// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round?
+// FIXME: Add intrinsics for fabs and copysign?
//===------------------------- Expect Intrinsics --------------------------===//
@@ -700,6 +818,27 @@ def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
+//===------------------------- Saturation Arithmetic Intrinsics ---------------------===//
+//
+def int_sadd_sat : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+def int_uadd_sat : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+def int_ssub_sat : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_usub_sat : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+//===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===//
+//
+def int_smul_fix : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+
//===------------------------- Memory Use Markers -------------------------===//
//
def int_lifetime_start : Intrinsic<[],
@@ -817,7 +956,7 @@ def int_coro_subfn_addr : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty],
//
def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
GCCBuiltin<"__builtin_flt_rounds">;
-def int_trap : Intrinsic<[], [], [IntrNoReturn]>,
+def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>,
GCCBuiltin<"__builtin_trap">;
def int_debugtrap : Intrinsic<[]>,
GCCBuiltin<"__builtin_debugtrap">;
@@ -830,6 +969,10 @@ def int_experimental_deoptimize : Intrinsic<[llvm_any_ty], [llvm_vararg_ty],
def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty],
[Throws]>;
+// Supports widenable conditions for guards represented as explicit branches.
+def int_experimental_widenable_condition : Intrinsic<[llvm_i1_ty], [],
+ [IntrInaccessibleMemOnly]>;
+
// NOP: calls/invokes to this intrinsic are removed by codegen
def int_donothing : Intrinsic<[], [], [IntrNoMem]>;
@@ -850,6 +993,10 @@ def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>;
def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[], "llvm.clear_cache">;
+// Intrinsic to detect whether its argument is a constant.
+def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">;
+
+
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
@@ -1008,3 +1155,4 @@ include "llvm/IR/IntrinsicsAMDGPU.td"
include "llvm/IR/IntrinsicsBPF.td"
include "llvm/IR/IntrinsicsSystemZ.td"
include "llvm/IR/IntrinsicsWebAssembly.td"
+include "llvm/IR/IntrinsicsRISCV.td"
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td b/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 688e863c1afe..ff25750fe399 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -44,6 +44,12 @@ def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, Intri
def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, Intrinsic<[], [llvm_i32_ty]>;
def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, Intrinsic<[], [llvm_i32_ty]>;
+// A space-consuming intrinsic primarily for testing block and jump table
+// placements. The first argument is the number of bytes this "instruction"
+// takes up, the second and return value are essentially chains, used to force
+// ordering during ISel.
+def int_aarch64_space : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>;
+
}
//===----------------------------------------------------------------------===//
@@ -154,6 +160,11 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
+
+ class AdvSIMD_FP16FML_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
+ [IntrNoMem]>;
}
// Arithmetic ops
@@ -424,6 +435,12 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
// v8.2-A Dot Product
def int_aarch64_neon_udot : AdvSIMD_Dot_Intrinsic;
def int_aarch64_neon_sdot : AdvSIMD_Dot_Intrinsic;
+
+ // v8.2-A FP16 Fused Multiply-Add Long
+ def int_aarch64_neon_fmlal : AdvSIMD_FP16FML_Intrinsic;
+ def int_aarch64_neon_fmlsl : AdvSIMD_FP16FML_Intrinsic;
+ def int_aarch64_neon_fmlal2 : AdvSIMD_FP16FML_Intrinsic;
+ def int_aarch64_neon_fmlsl2 : AdvSIMD_FP16FML_Intrinsic;
}
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 9f361410b9b8..7913ce828fbc 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -590,7 +590,7 @@ class AMDGPUDimSampleProfile<string opmod,
AMDGPUDimProps dim,
AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
let IsSample = 1;
- let RetTypes = [llvm_anyfloat_ty];
+ let RetTypes = [llvm_any_ty];
let ExtraAddrArgs = sample.ExtraAddrArgs;
let Gradients = sample.Gradients;
let LodClampMip = sample.LodOrClamp;
@@ -683,11 +683,11 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
}
defm int_amdgcn_image_load
- : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_anyfloat_ty], [], [IntrReadMem],
+ : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_any_ty], [], [IntrReadMem],
[SDNPMemOperand]>,
AMDGPUImageDMaskIntrinsic;
defm int_amdgcn_image_load_mip
- : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_anyfloat_ty], [],
+ : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_any_ty], [],
[IntrReadMem], [SDNPMemOperand], 1>,
AMDGPUImageDMaskIntrinsic;
@@ -802,6 +802,14 @@ class AMDGPUBufferLoad : Intrinsic <
def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
def int_amdgcn_buffer_load : AMDGPUBufferLoad;
+def int_amdgcn_s_buffer_load : Intrinsic <
+ [llvm_any_ty],
+ [llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // byte offset(SGPR/VGPR/imm)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc)
+ [IntrNoMem]>,
+ AMDGPURsrcIntrinsic<0>;
+
class AMDGPUBufferStore : Intrinsic <
[],
[llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
@@ -815,6 +823,124 @@ class AMDGPUBufferStore : Intrinsic <
def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
def int_amdgcn_buffer_store : AMDGPUBufferStore;
+// New buffer intrinsics with separate raw and struct variants. The raw
+// variant never has an index. The struct variant always has an index, even if
+// it is const 0. A struct intrinsic with constant 0 index is different to the
+// corresponding raw intrinsic on gfx9+ because the behavior of bound checking
+// and swizzling changes depending on whether idxen is set in the instruction.
+// These new instrinsics also keep the offset and soffset arguments separate as
+// they behave differently in bounds checking and swizzling.
+class AMDGPURawBufferLoad : Intrinsic <
+ [llvm_any_ty],
+ [llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ [IntrReadMem], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
+def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
+
+class AMDGPUStructBufferLoad : Intrinsic <
+ [llvm_any_ty],
+ [llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ [IntrReadMem], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
+def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
+
+class AMDGPURawBufferStore : Intrinsic <
+ [],
+ [llvm_any_ty, // vdata(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ [IntrWriteMem], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1>;
+def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore;
+def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
+
+class AMDGPUStructBufferStore : Intrinsic <
+ [],
+ [llvm_any_ty, // vdata(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ [IntrWriteMem], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1>;
+def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
+def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
+
+class AMDGPURawBufferAtomic : Intrinsic <
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // vdata(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
+ [], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1, 0>;
+def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // src(VGPR)
+ LLVMMatchType<0>, // cmp(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
+ [], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<2, 0>;
+
+class AMDGPUStructBufferAtomic : Intrinsic <
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // vdata(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
+ [], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1, 0>;
+def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_sub : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_smin : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_umin : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_smax : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // src(VGPR)
+ LLVMMatchType<0>, // cmp(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
+ [], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<2, 0>;
+
+// Obsolescent tbuffer intrinsics.
def int_amdgcn_tbuffer_load : Intrinsic <
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
[llvm_v4i32_ty, // rsrc(SGPR)
@@ -844,6 +970,54 @@ def int_amdgcn_tbuffer_store : Intrinsic <
[IntrWriteMem], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
+// New tbuffer intrinsics, with:
+// - raw and struct variants
+// - joint format field
+// - joint cachepolicy field
+def int_amdgcn_raw_tbuffer_load : Intrinsic <
+ [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+ [llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ [IntrReadMem], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>;
+
+def int_amdgcn_raw_tbuffer_store : Intrinsic <
+ [],
+ [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ [IntrWriteMem], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1>;
+
+def int_amdgcn_struct_tbuffer_load : Intrinsic <
+ [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+ [llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ [IntrReadMem], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>;
+
+def int_amdgcn_struct_tbuffer_store : Intrinsic <
+ [],
+ [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ [IntrWriteMem], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1>;
+
class AMDGPUBufferAtomic : Intrinsic <
[llvm_i32_ty],
[llvm_i32_ty, // vdata(VGPR)
@@ -1310,18 +1484,10 @@ def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
[llvm_i64_ty], [IntrConvergent]
>;
-def int_amdgcn_break : Intrinsic<[llvm_i64_ty],
- [llvm_i64_ty], [IntrNoMem, IntrConvergent]
->;
-
def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
[llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
>;
-def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty],
- [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
->;
-
def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
[llvm_i64_ty], [IntrConvergent]
>;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
index 25f4215d68a8..ecc69a679553 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -15,7 +15,7 @@
//
// All Hexagon intrinsics start with "llvm.hexagon.".
let TargetPrefix = "hexagon" in {
- /// Hexagon_Intrinsic - Base class for all Hexagon intrinsics.
+ /// Hexagon_Intrinsic - Base class for the majority of Hexagon intrinsics.
class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
@@ -30,397 +30,6 @@ let TargetPrefix = "hexagon" in {
: Intrinsic<ret_types, param_types, properties>;
}
-//===----------------------------------------------------------------------===//
-//
-// DEF_FUNCTION_TYPE_1(QI_ftype_MEM,BT_BOOL,BT_PTR) ->
-// Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_ptr_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) ->
-// Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i16_ty], [llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(SI_ftype_SI,BT_INT,BT_INT) ->
-// Hexagon_si_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_si_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(DI_ftype_SI,BT_LONGLONG,BT_INT) ->
-// Hexagon_di_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_si_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(SI_ftype_DI,BT_INT,BT_LONGLONG) ->
-// Hexagon_si_di_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_di_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(DI_ftype_DI,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_di_di_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_di_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(QI_ftype_QI,BT_BOOL,BT_BOOL) ->
-// Hexagon_qi_qi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_qi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(QI_ftype_SI,BT_BOOL,BT_INT) ->
-// Hexagon_qi_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_si_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(DI_ftype_QI,BT_LONGLONG,BT_BOOL) ->
-// Hexagon_di_qi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_qi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(SI_ftype_QI,BT_INT,BT_BOOL) ->
-// Hexagon_si_qi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_qi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_SISI,BT_BOOL,BT_INT,BT_INT) ->
-// Hexagon_qi_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_sisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(void_ftype_SISI,BT_VOID,BT_INT,BT_INT) ->
-// Hexagon_void_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_void_sisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_void_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_SISI,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_si_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(USI_ftype_SISI,BT_UINT,BT_INT,BT_INT) ->
-// Hexagon_usi_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_usi_sisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(DI_ftype_SISI,BT_LONGLONG,BT_INT,BT_INT) ->
-// Hexagon_di_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_sisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(UDI_ftype_SISI,BT_ULONGLONG,BT_INT,BT_INT) ->
-// Hexagon_udi_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_udi_sisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(DI_ftype_SIDI,BT_LONGLONG,BT_INT,BT_LONGLONG) ->
-// Hexagon_di_sidi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_sidi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(DI_ftype_DISI,BT_LONGLONG,BT_LONGLONG,BT_INT) ->
-// Hexagon_di_disi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_disi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_SIDI,BT_INT,BT_INT,BT_LONGLONG) ->
-// Hexagon_si_sidi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sidi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_DIDI,BT_INT,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_si_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_didi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(DI_ftype_DIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_di_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_didi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(UDI_ftype_DIDI,BT_ULONGLONG,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_udi_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_udi_didi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_DISI,BT_INT,BT_LONGLONG,BT_INT) ->
-// Hexagon_si_disi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_disi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_DIDI,BT_BOOL,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_i64_ty, llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_SIDI,BT_BOOL,BT_INT,BT_LONGLONG) ->
-// Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_sidi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_DISI,BT_BOOL,BT_LONGLONG,BT_INT) ->
-// Hexagon_qi_disi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_disi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_QIQI,BT_BOOL,BT_BOOL,BT_BOOL) ->
-// Hexagon_qi_qiqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_qiqi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_QIQIQI,BT_BOOL,BT_BOOL,BT_BOOL) ->
-// Hexagon_qi_qiqiqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_qiqiqi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_i1_ty, llvm_i1_ty, llvm_i1_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_QIQI,BT_INT,BT_BOOL,BT_BOOL) ->
-// Hexagon_si_qiqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_qiqi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_QISI,BT_INT,BT_BOOL,BT_INT) ->
-// Hexagon_si_qisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_qisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i1_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(void_ftype_SISISI,BT_VOID,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_void_sisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_void_sisisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_void_ty], [llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_SISISI,BT_INT,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_si_sisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sisisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_SISISI,BT_LONGLONG,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_di_sisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_sisisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_DISISI,BT_INT,BT_LONGLONG,BT_INT,BT_INT) ->
-// Hexagon_si_disisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_disisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_DISISI,BT_LONGLONG,BT_LONGLONG,BT_INT,BT_INT) ->
-// Hexagon_di_disisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_disisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_SIDISI,BT_INT,BT_INT,BT_LONGLONG,BT_INT) ->
-// Hexagon_si_sidisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sidisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_DIDISI,BT_LONGLONG,BT_LONGLONG,
-// BT_LONGLONG,BT_INT) ->
-// Hexagon_di_didisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_didisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_SIDIDI,BT_INT,BT_INT,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_si_sididi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sididi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty,
- llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG,
-// BT_LONGLONG) ->
-// Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
- llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_SISIDI,BT_INT,BT_INT,BT_INT,BT_LONGLONG) ->
-// Hexagon_si_sisidi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sisidi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
- llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_QISISI,BT_INT,BT_BOOL,BT_INT,BT_INT) ->
-// Hexagon_si_qisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_qisisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_QISISI,BT_LONGLONG,BT_BOOL,BT_INT,BT_INT) ->
-// Hexagon_di_qisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_qisisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i1_ty, llvm_i32_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_QIDIDI,BT_LONGLONG,BT_BOOL,BT_LONGLONG,
-// BT_LONGLONG) ->
-// Hexagon_di_qididi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_qididi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty,
- llvm_i64_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIQI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG,
-// BT_BOOL) ->
-// Hexagon_di_didiqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_didiqi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
- llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_4(SI_ftype_SISISISI,BT_INT,BT_INT,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_si_sisisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sisisisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_4(DI_ftype_DIDISISI,BT_LONGLONG,BT_LONGLONG,
-// BT_LONGLONG,BT_INT,BT_INT) ->
-// Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
- llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
-
class Hexagon_mem_memmemsi_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty,
@@ -457,191 +66,6 @@ class Hexagon_mem_memdisisi_Intrinsic<string GCCIntSuffix>
llvm_i32_ty, llvm_i32_ty],
[IntrWriteMem]>;
-class Hexagon_v256_v256v256_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
- [IntrArgMemOnly]>;
-
-//
-// Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_si_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_float_ty], [llvm_i32_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_float_ty], [llvm_double_ty],
- [IntrNoMem]>;
-//
-// Hexagon_sf_di_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_di_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_float_ty], [llvm_i64_ty],
- [IntrNoMem]>;
-//
-// Hexagon_df_sf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_sf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_double_ty], [llvm_float_ty],
- [IntrNoMem]>;
-//
-// Hexagon_di_sf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_sf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_float_ty],
- [IntrNoMem]>;
-//
-// Hexagon_sf_sf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_sf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_float_ty], [llvm_float_ty],
- [IntrNoMem]>;
-//
-// Hexagon_si_sf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_float_ty],
- [IntrNoMem]>;
-//
-// Hexagon_si_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_df_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_double_ty],
- [IntrNoMem]>;
-//
-// Hexagon_sf_sfsf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_sfsf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_si_sfsf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sfsf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_si_sfsi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sfsi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_float_ty, llvm_i32_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_qi_sfqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_sfqi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i1_ty], [llvm_float_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// Hexagon_sf_sfsfsf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_sfsfsf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_float_ty], [llvm_float_ty, llvm_float_ty,
- llvm_float_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_sf_sfsfsfqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_sfsfsfqi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_float_ty], [llvm_float_ty, llvm_float_ty,
- llvm_float_ty,
- llvm_i32_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_dididisi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
- llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
-//
-// Hexagon_df_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_si_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_double_ty], [llvm_i32_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_df_di_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_di_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_double_ty], [llvm_i64_ty],
- [IntrNoMem]>;
-//
-// Hexagon_di_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_df_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_double_ty],
- [IntrNoMem]>;
-//
-// Hexagon_df_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_df_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_double_ty], [llvm_double_ty],
- [IntrNoMem]>;
-//
-// Hexagon_df_dfdf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_dfdf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_si_dfdf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_dfdf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_si_dfsi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_dfsi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_double_ty, llvm_i32_ty],
- [IntrNoMem, Throws]>;
-//
-//
-// Hexagon_df_dfdfdf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_dfdfdf_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_double_ty], [llvm_double_ty, llvm_double_ty,
- llvm_double_ty],
- [IntrNoMem, Throws]>;
-//
-// Hexagon_df_dfdfdf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_dfdfdfqi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_double_ty], [llvm_double_ty, llvm_double_ty,
- llvm_double_ty,
- llvm_i32_ty],
- [IntrNoMem, Throws]>;
-
-
-// This one below will not be auto-generated,
-// so make sure, you don't overwrite this one.
//
// BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4)
//
@@ -699,4204 +123,6 @@ Hexagon_mem_memsisisi_Intrinsic<"circ_sthhi">;
def int_hexagon_circ_stb :
Hexagon_mem_memsisisi_Intrinsic<"circ_stb">;
-
-def int_hexagon_mm256i_vaddw :
-Hexagon_v256_v256v256_Intrinsic<"_mm256i_vaddw">;
-
-
-// This one above will not be auto-generated,
-// so make sure, you don't overwrite this one.
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpeq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgt,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgt :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgt">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgtu,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgtu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgtu">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpeqp,QI_ftype_DIDI,2)
-//
-def int_hexagon_C2_cmpeqp :
-Hexagon_si_didi_Intrinsic<"HEXAGON_C2_cmpeqp">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgtp,QI_ftype_DIDI,2)
-//
-def int_hexagon_C2_cmpgtp :
-Hexagon_si_didi_Intrinsic<"HEXAGON_C2_cmpgtp">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgtup,QI_ftype_DIDI,2)
-//
-def int_hexagon_C2_cmpgtup :
-Hexagon_si_didi_Intrinsic<"HEXAGON_C2_cmpgtup">;
-//
-// BUILTIN_INFO(HEXAGON.A4_rcmpeqi,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_rcmpeqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpeqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_rcmpneqi,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_rcmpneqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpneqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_rcmpeq,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_rcmpeq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.A4_rcmpneq,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_rcmpneq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpneq">;
-//
-// BUILTIN_INFO(HEXAGON.C2_bitsset,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_bitsset :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_bitsset">;
-//
-// BUILTIN_INFO(HEXAGON.C2_bitsclr,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_bitsclr :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_bitsclr">;
-//
-// BUILTIN_INFO(HEXAGON.C4_nbitsset,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_nbitsset :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_nbitsset">;
-//
-// BUILTIN_INFO(HEXAGON.C4_nbitsclr,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_nbitsclr :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_nbitsclr">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpeqi,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpeqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpeqi">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgti,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgti :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgti">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgtui,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgtui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgtui">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgei,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgei :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgei">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgeui,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgeui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgeui">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmplt,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmplt :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmplt">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpltu,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpltu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpltu">;
-//
-// BUILTIN_INFO(HEXAGON.C2_bitsclri,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_bitsclri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_bitsclri">;
-//
-// BUILTIN_INFO(HEXAGON.C4_nbitsclri,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_nbitsclri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_nbitsclri">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmpneqi,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmpneqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmpneqi">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmpltei,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmpltei :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmpltei">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmplteui,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmplteui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmplteui">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmpneq,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmpneq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmpneq">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmplte,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmplte :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmplte">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmplteu,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmplteu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmplteu">;
-//
-// BUILTIN_INFO(HEXAGON.C2_and,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_and :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_and">;
-//
-// BUILTIN_INFO(HEXAGON.C2_or,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_or :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_or">;
-//
-// BUILTIN_INFO(HEXAGON.C2_xor,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_xor :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_xor">;
-//
-// BUILTIN_INFO(HEXAGON.C2_andn,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_andn :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_andn">;
-//
-// BUILTIN_INFO(HEXAGON.C2_not,QI_ftype_QI,1)
-//
-def int_hexagon_C2_not :
-Hexagon_si_si_Intrinsic<"HEXAGON_C2_not">;
-//
-// BUILTIN_INFO(HEXAGON.C2_orn,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_orn :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_orn">;
-//
-// BUILTIN_INFO(HEXAGON.C4_and_and,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_and_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_and_and">;
-//
-// BUILTIN_INFO(HEXAGON.C4_and_or,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_and_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_and_or">;
-//
-// BUILTIN_INFO(HEXAGON.C4_or_and,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_or_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_or_and">;
-//
-// BUILTIN_INFO(HEXAGON.C4_or_or,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_or_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_or_or">;
-//
-// BUILTIN_INFO(HEXAGON.C4_and_andn,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_and_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_and_andn">;
-//
-// BUILTIN_INFO(HEXAGON.C4_and_orn,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_and_orn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_and_orn">;
-//
-// BUILTIN_INFO(HEXAGON.C4_or_andn,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_or_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_or_andn">;
-//
-// BUILTIN_INFO(HEXAGON.C4_or_orn,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_or_orn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_or_orn">;
-//
-// BUILTIN_INFO(HEXAGON.C2_pxfer_map,QI_ftype_QI,1)
-//
-def int_hexagon_C2_pxfer_map :
-Hexagon_si_qi_Intrinsic<"HEXAGON_C2_pxfer_map">;
-//
-// BUILTIN_INFO(HEXAGON.C2_any8,QI_ftype_QI,1)
-//
-def int_hexagon_C2_any8 :
-Hexagon_si_qi_Intrinsic<"HEXAGON_C2_any8">;
-//
-// BUILTIN_INFO(HEXAGON.C2_all8,QI_ftype_QI,1)
-//
-def int_hexagon_C2_all8 :
-Hexagon_si_qi_Intrinsic<"HEXAGON_C2_all8">;
-//
-// BUILTIN_INFO(HEXAGON.C2_vitpack,SI_ftype_QIQI,2)
-//
-def int_hexagon_C2_vitpack :
-Hexagon_si_qiqi_Intrinsic<"HEXAGON_C2_vitpack">;
-//
-// BUILTIN_INFO(HEXAGON.C2_mux,SI_ftype_QISISI,3)
-//
-def int_hexagon_C2_mux :
-Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_mux">;
-//
-// BUILTIN_INFO(HEXAGON.C2_muxii,SI_ftype_QISISI,3)
-//
-def int_hexagon_C2_muxii :
-Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxii">;
-//
-// BUILTIN_INFO(HEXAGON.C2_muxir,SI_ftype_QISISI,3)
-//
-def int_hexagon_C2_muxir :
-Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxir">;
-//
-// BUILTIN_INFO(HEXAGON.C2_muxri,SI_ftype_QISISI,3)
-//
-def int_hexagon_C2_muxri :
-Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxri">;
-//
-// BUILTIN_INFO(HEXAGON.C2_vmux,DI_ftype_QIDIDI,3)
-//
-def int_hexagon_C2_vmux :
-Hexagon_di_qididi_Intrinsic<"HEXAGON_C2_vmux">;
-//
-// BUILTIN_INFO(HEXAGON.C2_mask,DI_ftype_QI,1)
-//
-def int_hexagon_C2_mask :
-Hexagon_di_qi_Intrinsic<"HEXAGON_C2_mask">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpbeq,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpbeq :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpbeq">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbeqi,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpbeqi :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpbeqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbeq_any,QI_ftype_DIDI,2)
-//
-def int_hexagon_A4_vcmpbeq_any :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A4_vcmpbeq_any">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpbgtu,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpbgtu :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpbgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbgtui,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpbgtui :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpbgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbgt,QI_ftype_DIDI,2)
-//
-def int_hexagon_A4_vcmpbgt :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A4_vcmpbgt">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbgti,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpbgti :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpbgti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbeq,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbeq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbeq">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbeqi,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbeqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbeqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbgtu,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbgtu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbgtui,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbgtui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbgt,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbgt :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbgt">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbgti,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbgti :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbgti">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpheq,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpheq :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpheq">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmphgt,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmphgt :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmphgt">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmphgtu,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmphgtu :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmphgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpheqi,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpheqi :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpheqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmphgti,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmphgti :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmphgti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmphgtui,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmphgtui :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmphgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpheq,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpheq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpheq">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmphgt,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmphgt :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmphgt">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmphgtu,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmphgtu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmphgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpheqi,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpheqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpheqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmphgti,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmphgti :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmphgti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmphgtui,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmphgtui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmphgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpweq,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpweq :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpweq">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpwgt,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpwgt :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpwgt">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpwgtu,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpwgtu :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpwgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpweqi,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpweqi :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpweqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpwgti,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpwgti :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpwgti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpwgtui,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpwgtui :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpwgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A4_boundscheck,QI_ftype_SIDI,2)
-//
-def int_hexagon_A4_boundscheck :
-Hexagon_si_sidi_Intrinsic<"HEXAGON_A4_boundscheck">;
-//
-// BUILTIN_INFO(HEXAGON.A4_tlbmatch,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_tlbmatch :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_tlbmatch">;
-//
-// BUILTIN_INFO(HEXAGON.C2_tfrpr,SI_ftype_QI,1)
-//
-def int_hexagon_C2_tfrpr :
-Hexagon_si_qi_Intrinsic<"HEXAGON_C2_tfrpr">;
-//
-// BUILTIN_INFO(HEXAGON.C2_tfrrp,QI_ftype_SI,1)
-//
-def int_hexagon_C2_tfrrp :
-Hexagon_si_si_Intrinsic<"HEXAGON_C2_tfrrp">;
-//
-// BUILTIN_INFO(HEXAGON.C4_fastcorner9,QI_ftype_QIQI,2)
-//
-def int_hexagon_C4_fastcorner9 :
-Hexagon_si_qiqi_Intrinsic<"HEXAGON_C4_fastcorner9">;
-//
-// BUILTIN_INFO(HEXAGON.C4_fastcorner9_not,QI_ftype_QIQI,2)
-//
-def int_hexagon_C4_fastcorner9_not :
-Hexagon_si_qiqi_Intrinsic<"HEXAGON_C4_fastcorner9_not">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_hh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_hh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_hl_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_hl_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_lh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_lh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_ll_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_ll_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_hh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_hh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_hl_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_hl_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_lh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_lh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_ll_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_ll_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_hh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_hh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_hl_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_hl_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_lh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_lh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_ll_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_ll_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_hh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_hh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_hl_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_hl_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_lh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_lh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_ll_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_ll_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s0,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s1,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s0,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s1,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s0,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s1,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s0,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s1,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_hh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_hh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_hl_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_hl_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_lh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_lh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_ll_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_ll_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_hh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_hh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_hl_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_hl_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_lh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_lh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_ll_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_ll_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_hh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s1,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_hh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_hl_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s1,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_hl_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_lh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s1,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_lh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_ll_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s1,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_ll_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpysmi,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpysmi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpysmi">;
-//
-// BUILTIN_INFO(HEXAGON.M2_macsip,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_macsip :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_macsip">;
-//
-// BUILTIN_INFO(HEXAGON.M2_macsin,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_macsin :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_macsin">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyss_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_dpmpyss_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_dpmpyss_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyss_acc_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_dpmpyss_acc_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyss_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyss_nac_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_dpmpyss_nac_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyss_nac_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_dpmpyuu_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_dpmpyuu_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_acc_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_dpmpyuu_acc_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyuu_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_nac_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_dpmpyuu_nac_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyuu_nac_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_up,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_up :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_up_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_up_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_up_s1_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_up_s1_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up_s1_sat">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_up,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_up :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_up">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpysu_up,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpysu_up :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpysu_up">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyss_rnd_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_dpmpyss_rnd_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mac_up_s1_sat,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mac_up_s1_sat :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mac_up_s1_sat">;
-//
-// BUILTIN_INFO(HEXAGON.M4_nac_up_s1_sat,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_nac_up_s1_sat :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_nac_up_s1_sat">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyi,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyi">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyui,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyui">;
-//
-// BUILTIN_INFO(HEXAGON.M2_maci,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_maci :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_maci">;
-//
-// BUILTIN_INFO(HEXAGON.M2_acci,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_acci :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_acci">;
-//
-// BUILTIN_INFO(HEXAGON.M2_accii,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_accii :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_accii">;
-//
-// BUILTIN_INFO(HEXAGON.M2_nacci,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_nacci :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_nacci">;
-//
-// BUILTIN_INFO(HEXAGON.M2_naccii,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_naccii :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_naccii">;
-//
-// BUILTIN_INFO(HEXAGON.M2_subacc,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_subacc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_subacc">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyrr_addr,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyrr_addr :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyrr_addr">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyri_addr_u2,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyri_addr_u2 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addr_u2">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyri_addr,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyri_addr :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addr">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyri_addi,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyri_addi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addi">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyrr_addi,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyrr_addi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyrr_addi">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2s_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2s_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2s_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2s_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2s_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2s_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2s_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2s_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2su_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2su_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2su_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2su_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2su_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2su_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2su_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2su_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2su_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2su_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2su_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2su_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0pack,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2s_s0pack :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s0pack">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1pack,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2s_s1pack :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s1pack">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vmpy2es_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vmpy2es_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vmpy2es_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vmpy2es_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2es_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vmac2es_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2es_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vmac2es_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2es,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vmac2es :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrmac_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrmac_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrmac_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrmpy_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrmpy_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrmpy_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s0,SI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vdmpyrs_s0 :
-Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vdmpyrs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s1,SI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vdmpyrs_s1 :
-Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vdmpyrs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vrmpybuu,DI_ftype_DIDI,2)
-//
-def int_hexagon_M5_vrmpybuu :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vrmpybuu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vrmacbuu,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M5_vrmacbuu :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vrmacbuu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vrmpybsu,DI_ftype_DIDI,2)
-//
-def int_hexagon_M5_vrmpybsu :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vrmpybsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vrmacbsu,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M5_vrmacbsu :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vrmacbsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vmpybuu,DI_ftype_SISI,2)
-//
-def int_hexagon_M5_vmpybuu :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M5_vmpybuu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vmpybsu,DI_ftype_SISI,2)
-//
-def int_hexagon_M5_vmpybsu :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M5_vmpybsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vmacbuu,DI_ftype_DISISI,3)
-//
-def int_hexagon_M5_vmacbuu :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M5_vmacbuu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vmacbsu,DI_ftype_DISISI,3)
-//
-def int_hexagon_M5_vmacbsu :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M5_vmacbsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vdmpybsu,DI_ftype_DIDI,2)
-//
-def int_hexagon_M5_vdmpybsu :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vdmpybsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vdmacbsu,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M5_vdmacbsu :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vdmacbsu">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmacs_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vdmacs_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vdmacs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmacs_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vdmacs_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vdmacs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmpys_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vdmpys_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vdmpys_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmpys_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vdmpys_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vdmpys_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyrs_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyrs_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyrsc_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrsc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyrsc_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrsc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacs_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacs_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacs_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacs_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacsc_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacsc_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacsc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacsc_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacsc_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacsc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpys_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpys_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpys_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpys_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpys_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpys_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpysc_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpysc_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpysc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpysc_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpysc_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpysc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cnacs_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cnacs_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cnacs_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cnacs_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cnacsc_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cnacsc_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacsc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cnacsc_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cnacsc_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacsc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1,DI_ftype_DISI,2)
-//
-def int_hexagon_M2_vrcmpys_s1 :
-Hexagon_di_disi_Intrinsic<"HEXAGON_M2_vrcmpys_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpys_acc_s1,DI_ftype_DIDISI,3)
-//
-def int_hexagon_M2_vrcmpys_acc_s1 :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_M2_vrcmpys_acc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1rp,SI_ftype_DISI,2)
-//
-def int_hexagon_M2_vrcmpys_s1rp :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M2_vrcmpys_s1rp">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacls_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacls_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacls_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacls_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmachs_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmachs_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmachs_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmachs_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyl_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyl_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyl_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyl_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyh_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyh_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyh_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyh_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacls_rs0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacls_rs0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacls_rs1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacls_rs1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmachs_rs0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmachs_rs0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmachs_rs1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmachs_rs1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyl_rs0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyl_rs1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyh_rs0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyh_rs1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyeh_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M4_vrmpyeh_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyeh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyeh_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M4_vrmpyeh_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyeh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyeh_acc_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_vrmpyeh_acc_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyeh_acc_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_vrmpyeh_acc_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyoh_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M4_vrmpyoh_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyoh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyoh_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M4_vrmpyoh_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyoh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyoh_acc_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_vrmpyoh_acc_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyoh_acc_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_vrmpyoh_acc_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_hmmpyl_rs1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_hmmpyl_rs1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyl_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_hmmpyh_rs1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_hmmpyh_rs1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_hmmpyl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_hmmpyl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_hmmpyh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_hmmpyh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmaculs_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmaculs_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmaculs_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmaculs_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacuhs_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacuhs_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyul_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyul_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyul_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyul_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyuh_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyuh_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmaculs_rs0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmaculs_rs1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacuhs_rs0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacuhs_rs1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyul_rs0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyul_rs1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyuh_rs0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyuh_rs1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrcmaci_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmaci_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrcmacr_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmacr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0c,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrcmaci_s0c :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmaci_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0c,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrcmacr_s0c :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmacr_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmaci_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmaci_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmaci_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacr_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacr_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrcmpyi_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyi_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrcmpyr_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0c,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrcmpyi_s0c :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyi_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0c,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrcmpyr_s0c :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyr_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyi_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyi_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpyi_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyr_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyr_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpyr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_cmpyi_wh,SI_ftype_DISI,2)
-//
-def int_hexagon_M4_cmpyi_wh :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyi_wh">;
-//
-// BUILTIN_INFO(HEXAGON.M4_cmpyr_wh,SI_ftype_DISI,2)
-//
-def int_hexagon_M4_cmpyr_wh :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyr_wh">;
-//
-// BUILTIN_INFO(HEXAGON.M4_cmpyi_whc,SI_ftype_DISI,2)
-//
-def int_hexagon_M4_cmpyi_whc :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyi_whc">;
-//
-// BUILTIN_INFO(HEXAGON.M4_cmpyr_whc,SI_ftype_DISI,2)
-//
-def int_hexagon_M4_cmpyr_whc :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyr_whc">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_i,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vcmpy_s0_sat_i :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_i">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_r,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vcmpy_s0_sat_r :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_r">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_i,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vcmpy_s1_sat_i :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_i">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_r,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vcmpy_s1_sat_r :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_r">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_i,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vcmac_s0_sat_i :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_i">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_r,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vcmac_s0_sat_r :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vcrotate,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_vcrotate :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_vcrotate">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vrcrotate_acc,DI_ftype_DIDISISI,4)
-//
-def int_hexagon_S4_vrcrotate_acc :
-Hexagon_di_didisisi_Intrinsic<"HEXAGON_S4_vrcrotate_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vrcrotate,DI_ftype_DISISI,3)
-//
-def int_hexagon_S4_vrcrotate :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_S4_vrcrotate">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vcnegh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_vcnegh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_vcnegh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vrcnegh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_vrcnegh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_vrcnegh">;
-//
-// BUILTIN_INFO(HEXAGON.M4_pmpyw,DI_ftype_SISI,2)
-//
-def int_hexagon_M4_pmpyw :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M4_pmpyw">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vpmpyh,DI_ftype_SISI,2)
-//
-def int_hexagon_M4_vpmpyh :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M4_vpmpyh">;
-//
-// BUILTIN_INFO(HEXAGON.M4_pmpyw_acc,DI_ftype_DISISI,3)
-//
-def int_hexagon_M4_pmpyw_acc :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M4_pmpyw_acc">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vpmpyh_acc,DI_ftype_DISISI,3)
-//
-def int_hexagon_M4_vpmpyh_acc :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M4_vpmpyh_acc">;
-//
-// BUILTIN_INFO(HEXAGON.A2_add,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_add :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_add">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sub,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_sub :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_sub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addsat,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addsat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subsat,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subsat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addi,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addi">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_l16_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_l16_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_l16_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_l16_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_l16_sat_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_l16_sat_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_l16_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_l16_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_l16_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_l16_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_l16_sat_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_l16_sat_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_sat_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_sat_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_sat_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_sat_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_sat_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_sat_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_sat_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_sat_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_aslh,SI_ftype_SI,1)
-//
-def int_hexagon_A2_aslh :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_aslh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_asrh,SI_ftype_SI,1)
-//
-def int_hexagon_A2_asrh :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_asrh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_addp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_addp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addpsat,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_addpsat :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_addpsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addsp,DI_ftype_SIDI,2)
-//
-def int_hexagon_A2_addsp :
-Hexagon_di_sidi_Intrinsic<"HEXAGON_A2_addsp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_subp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_subp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_neg,SI_ftype_SI,1)
-//
-def int_hexagon_A2_neg :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_neg">;
-//
-// BUILTIN_INFO(HEXAGON.A2_negsat,SI_ftype_SI,1)
-//
-def int_hexagon_A2_negsat :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_negsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_abs,SI_ftype_SI,1)
-//
-def int_hexagon_A2_abs :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_abs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_abssat,SI_ftype_SI,1)
-//
-def int_hexagon_A2_abssat :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_abssat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vconj,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vconj :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vconj">;
-//
-// BUILTIN_INFO(HEXAGON.A2_negp,DI_ftype_DI,1)
-//
-def int_hexagon_A2_negp :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_negp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_absp,DI_ftype_DI,1)
-//
-def int_hexagon_A2_absp :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_absp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_max,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_max :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_max">;
-//
-// BUILTIN_INFO(HEXAGON.A2_maxu,USI_ftype_SISI,2)
-//
-def int_hexagon_A2_maxu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_maxu">;
-//
-// BUILTIN_INFO(HEXAGON.A2_min,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_min :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_min">;
-//
-// BUILTIN_INFO(HEXAGON.A2_minu,USI_ftype_SISI,2)
-//
-def int_hexagon_A2_minu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_minu">;
-//
-// BUILTIN_INFO(HEXAGON.A2_maxp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_maxp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_maxp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_maxup,UDI_ftype_DIDI,2)
-//
-def int_hexagon_A2_maxup :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_maxup">;
-//
-// BUILTIN_INFO(HEXAGON.A2_minp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_minp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_minp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_minup,UDI_ftype_DIDI,2)
-//
-def int_hexagon_A2_minup :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_minup">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfr,SI_ftype_SI,1)
-//
-def int_hexagon_A2_tfr :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_tfr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfrsi,SI_ftype_SI,1)
-//
-def int_hexagon_A2_tfrsi :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_tfrsi">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfrp,DI_ftype_DI,1)
-//
-def int_hexagon_A2_tfrp :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_tfrp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfrpi,DI_ftype_SI,1)
-//
-def int_hexagon_A2_tfrpi :
-Hexagon_di_si_Intrinsic<"HEXAGON_A2_tfrpi">;
-//
-// BUILTIN_INFO(HEXAGON.A2_zxtb,SI_ftype_SI,1)
-//
-def int_hexagon_A2_zxtb :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_zxtb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sxtb,SI_ftype_SI,1)
-//
-def int_hexagon_A2_sxtb :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_sxtb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_zxth,SI_ftype_SI,1)
-//
-def int_hexagon_A2_zxth :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_zxth">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sxth,SI_ftype_SI,1)
-//
-def int_hexagon_A2_sxth :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_sxth">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combinew,DI_ftype_SISI,2)
-//
-def int_hexagon_A2_combinew :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A2_combinew">;
-//
-// BUILTIN_INFO(HEXAGON.A4_combineri,DI_ftype_SISI,2)
-//
-def int_hexagon_A4_combineri :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_combineri">;
-//
-// BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_SISI,2)
-//
-def int_hexagon_A4_combineir :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_combineir">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combineii,DI_ftype_SISI,2)
-//
-def int_hexagon_A2_combineii :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A2_combineii">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combine_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_combine_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combine_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_combine_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combine_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_combine_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combine_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_combine_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfril,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_tfril :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_tfril">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfrih,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_tfrih :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_tfrih">;
-//
-// BUILTIN_INFO(HEXAGON.A2_and,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_and :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_and">;
-//
-// BUILTIN_INFO(HEXAGON.A2_or,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_or :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_or">;
-//
-// BUILTIN_INFO(HEXAGON.A2_xor,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_xor :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_xor">;
-//
-// BUILTIN_INFO(HEXAGON.A2_not,SI_ftype_SI,1)
-//
-def int_hexagon_A2_not :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_not">;
-//
-// BUILTIN_INFO(HEXAGON.M2_xor_xacc,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_xor_xacc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_xor_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.M4_xor_xacc,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_xor_xacc :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_xor_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.A4_andn,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_andn :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_andn">;
-//
-// BUILTIN_INFO(HEXAGON.A4_orn,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_orn :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_orn">;
-//
-// BUILTIN_INFO(HEXAGON.A4_andnp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A4_andnp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A4_andnp">;
-//
-// BUILTIN_INFO(HEXAGON.A4_ornp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A4_ornp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A4_ornp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_addaddi,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_addaddi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addaddi">;
-//
-// BUILTIN_INFO(HEXAGON.S4_subaddi,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_subaddi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subaddi">;
-//
-// BUILTIN_INFO(HEXAGON.M4_and_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_and_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_and">;
-//
-// BUILTIN_INFO(HEXAGON.M4_and_andn,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_and_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_andn">;
-//
-// BUILTIN_INFO(HEXAGON.M4_and_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_and_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_or">;
-//
-// BUILTIN_INFO(HEXAGON.M4_and_xor,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_and_xor :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_xor">;
-//
-// BUILTIN_INFO(HEXAGON.M4_or_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_or_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_and">;
-//
-// BUILTIN_INFO(HEXAGON.M4_or_andn,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_or_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_andn">;
-//
-// BUILTIN_INFO(HEXAGON.M4_or_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_or_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_or">;
-//
-// BUILTIN_INFO(HEXAGON.M4_or_xor,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_or_xor :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S4_or_andix,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_or_andix :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_andix">;
-//
-// BUILTIN_INFO(HEXAGON.S4_or_andi,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_or_andi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_andi">;
-//
-// BUILTIN_INFO(HEXAGON.S4_or_ori,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_or_ori :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_ori">;
-//
-// BUILTIN_INFO(HEXAGON.M4_xor_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_xor_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_and">;
-//
-// BUILTIN_INFO(HEXAGON.M4_xor_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_xor_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_or">;
-//
-// BUILTIN_INFO(HEXAGON.M4_xor_andn,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_xor_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_andn">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subri,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subri">;
-//
-// BUILTIN_INFO(HEXAGON.A2_andir,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_andir :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_andir">;
-//
-// BUILTIN_INFO(HEXAGON.A2_orir,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_orir :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_orir">;
-//
-// BUILTIN_INFO(HEXAGON.A2_andp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_andp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_andp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_orp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_orp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_orp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_xorp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_xorp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_xorp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_notp,DI_ftype_DI,1)
-//
-def int_hexagon_A2_notp :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_notp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sxtw,DI_ftype_SI,1)
-//
-def int_hexagon_A2_sxtw :
-Hexagon_di_si_Intrinsic<"HEXAGON_A2_sxtw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sat,SI_ftype_DI,1)
-//
-def int_hexagon_A2_sat :
-Hexagon_si_di_Intrinsic<"HEXAGON_A2_sat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_roundsat,SI_ftype_DI,1)
-//
-def int_hexagon_A2_roundsat :
-Hexagon_si_di_Intrinsic<"HEXAGON_A2_roundsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sath,SI_ftype_SI,1)
-//
-def int_hexagon_A2_sath :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_sath">;
-//
-// BUILTIN_INFO(HEXAGON.A2_satuh,SI_ftype_SI,1)
-//
-def int_hexagon_A2_satuh :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_satuh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_satub,SI_ftype_SI,1)
-//
-def int_hexagon_A2_satub :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_satub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_satb,SI_ftype_SI,1)
-//
-def int_hexagon_A2_satb :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_satb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddb_map,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddb_map :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddb_map">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddubs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddubs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddubs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddhs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddhs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vadduhs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vadduhs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vadduhs">;
-//
-// BUILTIN_INFO(HEXAGON.A5_vaddhubs,SI_ftype_DIDI,2)
-//
-def int_hexagon_A5_vaddhubs :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A5_vaddhubs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddws,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddws :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddws">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxaddsubw,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxaddsubw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubw">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxsubaddw,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxsubaddw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddw">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxaddsubh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxaddsubh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubh">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxsubaddh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxsubaddh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddh">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxaddsubhr,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxaddsubhr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubhr">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxsubaddhr,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxsubaddhr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddhr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svavgh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svavgh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svavgh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svavghs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svavghs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svavghs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svnavgh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svnavgh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svnavgh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svaddh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svaddh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svaddh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svaddhs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svaddhs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svaddhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svadduhs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svadduhs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svadduhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svsubh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svsubh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svsubhs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svsubhs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svsubuhs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svsubuhs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubuhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vraddub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vraddub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vraddub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vraddub_acc,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_A2_vraddub_acc :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_A2_vraddub_acc">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vraddh,SI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vraddh :
-Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vraddh">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vradduh,SI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vradduh :
-Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vradduh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubb_map,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubb_map :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubb_map">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsububs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsububs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsububs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubhs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubhs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubuhs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubuhs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubuhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubws,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubws :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubws">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vabsh,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vabsh :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabsh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vabshsat,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vabshsat :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabshsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vabsw,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vabsw :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabsw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vabswsat,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vabswsat :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabswsat">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vabsdiffw,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vabsdiffw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vabsdiffw">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vabsdiffh,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vabsdiffh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vabsdiffh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vrsadub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vrsadub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vrsadub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vrsadub_acc,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_A2_vrsadub_acc :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_A2_vrsadub_acc">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavguh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavguh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavgh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavgh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavgw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavgw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgwr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgwr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgwr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavgwr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavgwr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgwr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgwcr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgwcr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgwcr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavgwcr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavgwcr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgwcr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavghcr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavghcr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavghcr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavghcr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavghcr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavghcr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavguw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavguw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavguwr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavguwr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguwr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgubr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgubr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgubr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavguhr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavguhr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguhr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavghr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavghr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavghr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavghr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavghr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavghr">;
-//
-// BUILTIN_INFO(HEXAGON.A4_round_ri,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_round_ri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_ri">;
-//
-// BUILTIN_INFO(HEXAGON.A4_round_rr,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_round_rr :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_rr">;
-//
-// BUILTIN_INFO(HEXAGON.A4_round_ri_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_round_ri_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_ri_sat">;
-//
-// BUILTIN_INFO(HEXAGON.A4_round_rr_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_round_rr_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_rr_sat">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cround_ri,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_cround_ri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cround_ri">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cround_rr,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_cround_rr :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cround_rr">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrminh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrminh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminh">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrmaxh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrmaxh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxh">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrminuh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrminuh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminuh">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrmaxuh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrmaxuh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxuh">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrminw,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrminw :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminw">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrmaxw,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrmaxw :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxw">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrminuw,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrminuw :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminuw">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrmaxuw,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrmaxuw :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxuw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminb,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminb :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxb,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxb :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminuh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminuh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminuh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxuh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxuh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxuh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminuw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminuw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminuw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxuw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxuw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxuw">;
-//
-// BUILTIN_INFO(HEXAGON.A4_modwrapu,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_modwrapu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_modwrapu">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfadd,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfadd :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfadd">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfsub,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfsub :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfsub">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfmpy,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfmpy :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmpy">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffma,SF_ftype_SFSFSF,3)
-//
-def int_hexagon_F2_sffma :
-Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffma">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffma_sc,SF_ftype_SFSFSFQI,4)
-//
-def int_hexagon_F2_sffma_sc :
-Hexagon_sf_sfsfsfqi_Intrinsic<"HEXAGON_F2_sffma_sc">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffms,SF_ftype_SFSFSF,3)
-//
-def int_hexagon_F2_sffms :
-Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffms">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffma_lib,SF_ftype_SFSFSF,3)
-//
-def int_hexagon_F2_sffma_lib :
-Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffma_lib">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffms_lib,SF_ftype_SFSFSF,3)
-//
-def int_hexagon_F2_sffms_lib :
-Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffms_lib">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfcmpeq,QI_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfcmpeq :
-Hexagon_si_sfsf_Intrinsic<"HEXAGON_F2_sfcmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfcmpgt,QI_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfcmpgt :
-Hexagon_si_sfsf_Intrinsic<"HEXAGON_F2_sfcmpgt">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfcmpge,QI_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfcmpge :
-Hexagon_si_sfsf_Intrinsic<"HEXAGON_F2_sfcmpge">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfcmpuo,QI_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfcmpuo :
-Hexagon_si_sfsf_Intrinsic<"HEXAGON_F2_sfcmpuo">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfmax,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfmax :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmax">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfmin,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfmin :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmin">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfclass,QI_ftype_SFSI,2)
-//
-def int_hexagon_F2_sfclass :
-Hexagon_si_sfsi_Intrinsic<"HEXAGON_F2_sfclass">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfimm_p,SF_ftype_SI,1)
-//
-def int_hexagon_F2_sfimm_p :
-Hexagon_sf_si_Intrinsic<"HEXAGON_F2_sfimm_p">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfimm_n,SF_ftype_SI,1)
-//
-def int_hexagon_F2_sfimm_n :
-Hexagon_sf_si_Intrinsic<"HEXAGON_F2_sfimm_n">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffixupn,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sffixupn :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sffixupn">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffixupd,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sffixupd :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sffixupd">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffixupr,SF_ftype_SF,1)
-//
-def int_hexagon_F2_sffixupr :
-Hexagon_sf_sf_Intrinsic<"HEXAGON_F2_sffixupr">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfcmpeq,QI_ftype_DFDF,2)
-//
-def int_hexagon_F2_dfcmpeq :
-Hexagon_si_dfdf_Intrinsic<"HEXAGON_F2_dfcmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfcmpgt,QI_ftype_DFDF,2)
-//
-def int_hexagon_F2_dfcmpgt :
-Hexagon_si_dfdf_Intrinsic<"HEXAGON_F2_dfcmpgt">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfcmpge,QI_ftype_DFDF,2)
-//
-def int_hexagon_F2_dfcmpge :
-Hexagon_si_dfdf_Intrinsic<"HEXAGON_F2_dfcmpge">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfcmpuo,QI_ftype_DFDF,2)
-//
-def int_hexagon_F2_dfcmpuo :
-Hexagon_si_dfdf_Intrinsic<"HEXAGON_F2_dfcmpuo">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfclass,QI_ftype_DFSI,2)
-//
-def int_hexagon_F2_dfclass :
-Hexagon_si_dfsi_Intrinsic<"HEXAGON_F2_dfclass">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfimm_p,DF_ftype_SI,1)
-//
-def int_hexagon_F2_dfimm_p :
-Hexagon_df_si_Intrinsic<"HEXAGON_F2_dfimm_p">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfimm_n,DF_ftype_SI,1)
-//
-def int_hexagon_F2_dfimm_n :
-Hexagon_df_si_Intrinsic<"HEXAGON_F2_dfimm_n">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2df,DF_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2df :
-Hexagon_df_sf_Intrinsic<"HEXAGON_F2_conv_sf2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2sf,SF_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2sf :
-Hexagon_sf_df_Intrinsic<"HEXAGON_F2_conv_df2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_uw2sf,SF_ftype_SI,1)
-//
-def int_hexagon_F2_conv_uw2sf :
-Hexagon_sf_si_Intrinsic<"HEXAGON_F2_conv_uw2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_uw2df,DF_ftype_SI,1)
-//
-def int_hexagon_F2_conv_uw2df :
-Hexagon_df_si_Intrinsic<"HEXAGON_F2_conv_uw2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_w2sf,SF_ftype_SI,1)
-//
-def int_hexagon_F2_conv_w2sf :
-Hexagon_sf_si_Intrinsic<"HEXAGON_F2_conv_w2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_w2df,DF_ftype_SI,1)
-//
-def int_hexagon_F2_conv_w2df :
-Hexagon_df_si_Intrinsic<"HEXAGON_F2_conv_w2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_ud2sf,SF_ftype_DI,1)
-//
-def int_hexagon_F2_conv_ud2sf :
-Hexagon_sf_di_Intrinsic<"HEXAGON_F2_conv_ud2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_ud2df,DF_ftype_DI,1)
-//
-def int_hexagon_F2_conv_ud2df :
-Hexagon_df_di_Intrinsic<"HEXAGON_F2_conv_ud2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_d2sf,SF_ftype_DI,1)
-//
-def int_hexagon_F2_conv_d2sf :
-Hexagon_sf_di_Intrinsic<"HEXAGON_F2_conv_d2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_d2df,DF_ftype_DI,1)
-//
-def int_hexagon_F2_conv_d2df :
-Hexagon_df_di_Intrinsic<"HEXAGON_F2_conv_d2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2uw,SI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2uw :
-Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2uw">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2w,SI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2w :
-Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2w">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2ud,DI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2ud :
-Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2ud">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2d,DI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2d :
-Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2d">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2uw,SI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2uw :
-Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2uw">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2w,SI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2w :
-Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2w">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2ud,DI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2ud :
-Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2ud">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2d,DI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2d :
-Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2d">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2uw_chop,SI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2uw_chop :
-Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2uw_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2w_chop,SI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2w_chop :
-Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2w_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2ud_chop,DI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2ud_chop :
-Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2ud_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2d_chop,DI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2d_chop :
-Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2d_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2uw_chop,SI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2uw_chop :
-Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2uw_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2w_chop,SI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2w_chop :
-Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2w_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2ud_chop,DI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2ud_chop :
-Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2ud_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2d_chop,DI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2d_chop :
-Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2d_chop">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_r_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asl_r_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_lsr_r_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsr_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_lsl_r_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsl_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_r_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_r_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_r_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsl_r_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_r_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_r_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_r_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsl_r_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_r_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_r_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_r_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsl_r_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_r_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_r_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_r_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsl_r_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_r_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_r_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_r_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsl_r_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_xor,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_xor :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_xor,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_xor :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_xor,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_xor :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_xor,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_xor :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_r_r_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_r_r_sat">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asl_r_r_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_r_r_sat">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_i_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_lsr_i_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsr_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asl_i_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_i_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_i_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_i_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_i_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_i_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_i_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_xacc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_xacc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_xacc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_xacc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_xacc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_xacc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_xacc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_xacc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_i_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_i_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_i_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_i_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asl_i_r_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_i_r_sat">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_i_r_rnd :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r_rnd">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd_goodsyntax,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_i_r_rnd_goodsyntax :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_rnd,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_p_rnd :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p_rnd">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_rnd_goodsyntax,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_p_rnd_goodsyntax :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S4_lsli,SI_ftype_SISI,2)
-//
-def int_hexagon_S4_lsli :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_lsli">;
-//
-// BUILTIN_INFO(HEXAGON.S2_addasl_rrri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_addasl_rrri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_addasl_rrri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_andi_asl_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_andi_asl_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_andi_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_ori_asl_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_ori_asl_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_ori_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_addi_asl_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_addi_asl_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addi_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_subi_asl_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_subi_asl_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subi_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_andi_lsr_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_andi_lsr_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_andi_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_ori_lsr_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_ori_lsr_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_ori_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_addi_lsr_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_addi_lsr_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addi_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_subi_lsr_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_subi_lsr_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subi_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S2_valignib,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_valignib :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_valignib">;
-//
-// BUILTIN_INFO(HEXAGON.S2_valignrb,DI_ftype_DIDIQI,3)
-//
-def int_hexagon_S2_valignrb :
-Hexagon_di_didiqi_Intrinsic<"HEXAGON_S2_valignrb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vspliceib,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_vspliceib :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_vspliceib">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsplicerb,DI_ftype_DIDIQI,3)
-//
-def int_hexagon_S2_vsplicerb :
-Hexagon_di_didiqi_Intrinsic<"HEXAGON_S2_vsplicerb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsplatrh,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vsplatrh :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsplatrh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsplatrb,SI_ftype_SI,1)
-//
-def int_hexagon_S2_vsplatrb :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_vsplatrb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_insert,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_insert :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_insert">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tableidxb_goodsyntax,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_tableidxb_goodsyntax :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxb_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tableidxh_goodsyntax,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_tableidxh_goodsyntax :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxh_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tableidxw_goodsyntax,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_tableidxw_goodsyntax :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxw_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tableidxd_goodsyntax,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_tableidxd_goodsyntax :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.A4_bitspliti,DI_ftype_SISI,2)
-//
-def int_hexagon_A4_bitspliti :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_bitspliti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_bitsplit,DI_ftype_SISI,2)
-//
-def int_hexagon_A4_bitsplit :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_bitsplit">;
-//
-// BUILTIN_INFO(HEXAGON.S4_extract,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_extract :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_extract">;
-//
-// BUILTIN_INFO(HEXAGON.S2_extractu,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_extractu :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_extractu">;
-//
-// BUILTIN_INFO(HEXAGON.S2_insertp,DI_ftype_DIDISISI,4)
-//
-def int_hexagon_S2_insertp :
-Hexagon_di_didisisi_Intrinsic<"HEXAGON_S2_insertp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_extractp,DI_ftype_DISISI,3)
-//
-def int_hexagon_S4_extractp :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_S4_extractp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_extractup,DI_ftype_DISISI,3)
-//
-def int_hexagon_S2_extractup :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_S2_extractup">;
-//
-// BUILTIN_INFO(HEXAGON.S2_insert_rp,SI_ftype_SISIDI,3)
-//
-def int_hexagon_S2_insert_rp :
-Hexagon_si_sisidi_Intrinsic<"HEXAGON_S2_insert_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_extract_rp,SI_ftype_SIDI,2)
-//
-def int_hexagon_S4_extract_rp :
-Hexagon_si_sidi_Intrinsic<"HEXAGON_S4_extract_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_extractu_rp,SI_ftype_SIDI,2)
-//
-def int_hexagon_S2_extractu_rp :
-Hexagon_si_sidi_Intrinsic<"HEXAGON_S2_extractu_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_insertp_rp,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_S2_insertp_rp :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_S2_insertp_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_extractp_rp,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_extractp_rp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_extractp_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_extractup_rp,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_extractup_rp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_extractup_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tstbit_i,QI_ftype_SISI,2)
-//
-def int_hexagon_S2_tstbit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_tstbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S4_ntstbit_i,QI_ftype_SISI,2)
-//
-def int_hexagon_S4_ntstbit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_ntstbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S2_setbit_i,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_setbit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_setbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S2_togglebit_i,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_togglebit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_togglebit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clrbit_i,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_clrbit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_clrbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tstbit_r,QI_ftype_SISI,2)
-//
-def int_hexagon_S2_tstbit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_tstbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S4_ntstbit_r,QI_ftype_SISI,2)
-//
-def int_hexagon_S4_ntstbit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_ntstbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_setbit_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_setbit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_setbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_togglebit_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_togglebit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_togglebit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clrbit_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_clrbit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_clrbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_i_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_i_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_r_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S5_asrhub_rnd_sat_goodsyntax,SI_ftype_DISI,2)
-//
-def int_hexagon_S5_asrhub_rnd_sat_goodsyntax :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S5_asrhub_sat,SI_ftype_DISI,2)
-//
-def int_hexagon_S5_asrhub_sat :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S5_asrhub_sat">;
-//
-// BUILTIN_INFO(HEXAGON.S5_vasrhrnd_goodsyntax,DI_ftype_DISI,2)
-//
-def int_hexagon_S5_vasrhrnd_goodsyntax :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_r_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_r_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsl_r_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_svw_trun,SI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_svw_trun :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S2_asr_i_svw_trun">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_svw_trun,SI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_r_svw_trun :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S2_asr_r_svw_trun">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_i_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_i_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_r_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_r_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_r_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsl_r_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vrndpackwh,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vrndpackwh :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vrndpackwh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vrndpackwhs,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vrndpackwhs :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vrndpackwhs">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsxtbh,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vsxtbh :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsxtbh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vzxtbh,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vzxtbh :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vzxtbh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsathub,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vsathub :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsathub">;
-//
-// BUILTIN_INFO(HEXAGON.S2_svsathub,SI_ftype_SI,1)
-//
-def int_hexagon_S2_svsathub :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_svsathub">;
-//
-// BUILTIN_INFO(HEXAGON.S2_svsathb,SI_ftype_SI,1)
-//
-def int_hexagon_S2_svsathb :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_svsathb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsathb,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vsathb :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsathb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vtrunohb,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vtrunohb :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vtrunohb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vtrunewh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_vtrunewh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_vtrunewh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vtrunowh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_vtrunowh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_vtrunowh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vtrunehb,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vtrunehb :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vtrunehb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsxthw,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vsxthw :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsxthw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vzxthw,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vzxthw :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vzxthw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsatwh,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vsatwh :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsatwh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsatwuh,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vsatwuh :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsatwuh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_packhl,DI_ftype_SISI,2)
-//
-def int_hexagon_S2_packhl :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_S2_packhl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_swiz,SI_ftype_SI,1)
-//
-def int_hexagon_A2_swiz :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_swiz">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsathub_nopack,DI_ftype_DI,1)
-//
-def int_hexagon_S2_vsathub_nopack :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsathub_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsathb_nopack,DI_ftype_DI,1)
-//
-def int_hexagon_S2_vsathb_nopack :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsathb_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsatwh_nopack,DI_ftype_DI,1)
-//
-def int_hexagon_S2_vsatwh_nopack :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsatwh_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsatwuh_nopack,DI_ftype_DI,1)
-//
-def int_hexagon_S2_vsatwuh_nopack :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsatwuh_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.S2_shuffob,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_shuffob :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffob">;
-//
-// BUILTIN_INFO(HEXAGON.S2_shuffeb,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_shuffeb :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffeb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_shuffoh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_shuffoh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffoh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_shuffeh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_shuffeh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffeh">;
-//
-// BUILTIN_INFO(HEXAGON.S5_popcountp,SI_ftype_DI,1)
-//
-def int_hexagon_S5_popcountp :
-Hexagon_si_di_Intrinsic<"HEXAGON_S5_popcountp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_parity,SI_ftype_SISI,2)
-//
-def int_hexagon_S4_parity :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_parity">;
-//
-// BUILTIN_INFO(HEXAGON.S2_parityp,SI_ftype_DIDI,2)
-//
-def int_hexagon_S2_parityp :
-Hexagon_si_didi_Intrinsic<"HEXAGON_S2_parityp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lfsp,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_lfsp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_lfsp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clbnorm,SI_ftype_SI,1)
-//
-def int_hexagon_S2_clbnorm :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_clbnorm">;
-//
-// BUILTIN_INFO(HEXAGON.S4_clbaddi,SI_ftype_SISI,2)
-//
-def int_hexagon_S4_clbaddi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_clbaddi">;
-//
-// BUILTIN_INFO(HEXAGON.S4_clbpnorm,SI_ftype_DI,1)
-//
-def int_hexagon_S4_clbpnorm :
-Hexagon_si_di_Intrinsic<"HEXAGON_S4_clbpnorm">;
-//
-// BUILTIN_INFO(HEXAGON.S4_clbpaddi,SI_ftype_DISI,2)
-//
-def int_hexagon_S4_clbpaddi :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S4_clbpaddi">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clb,SI_ftype_SI,1)
-//
-def int_hexagon_S2_clb :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_clb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_cl0,SI_ftype_SI,1)
-//
-def int_hexagon_S2_cl0 :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_cl0">;
-//
-// BUILTIN_INFO(HEXAGON.S2_cl1,SI_ftype_SI,1)
-//
-def int_hexagon_S2_cl1 :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_cl1">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clbp,SI_ftype_DI,1)
-//
-def int_hexagon_S2_clbp :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_clbp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_cl0p,SI_ftype_DI,1)
-//
-def int_hexagon_S2_cl0p :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_cl0p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_cl1p,SI_ftype_DI,1)
-//
-def int_hexagon_S2_cl1p :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_cl1p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_brev,SI_ftype_SI,1)
-//
-def int_hexagon_S2_brev :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_brev">;
-//
-// BUILTIN_INFO(HEXAGON.S2_brevp,DI_ftype_DI,1)
-//
-def int_hexagon_S2_brevp :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_brevp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_ct0,SI_ftype_SI,1)
-//
-def int_hexagon_S2_ct0 :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_ct0">;
-//
-// BUILTIN_INFO(HEXAGON.S2_ct1,SI_ftype_SI,1)
-//
-def int_hexagon_S2_ct1 :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_ct1">;
-//
-// BUILTIN_INFO(HEXAGON.S2_ct0p,SI_ftype_DI,1)
-//
-def int_hexagon_S2_ct0p :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_ct0p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_ct1p,SI_ftype_DI,1)
-//
-def int_hexagon_S2_ct1p :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_ct1p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_interleave,DI_ftype_DI,1)
-//
-def int_hexagon_S2_interleave :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">;
-//
-// BUILTIN_INFO(HEXAGON.S2_deinterleave,DI_ftype_DI,1)
-//
-def int_hexagon_S2_deinterleave :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">;
-
//
// BUILTIN_INFO(HEXAGON.dcfetch_A,v_ftype_DI*,1)
//
@@ -4934,6042 +160,6197 @@ def int_hexagon_S4_stored_locked :
Hexagon_Intrinsic<"HEXAGON_S4_stored_locked", [llvm_i32_ty],
[llvm_ptr64_ty, llvm_i64_ty], [IntrArgMemOnly, NoCapture<0>]>;
-// V60
+def int_hexagon_vmemcpy : Hexagon_Intrinsic<"hexagon_vmemcpy",
+ [], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
+ [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>]>;
-class Hexagon_v2048v2048_Intrinsic_T<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty],
- [IntrNoMem]>;
-
-// tag : V6_hi_W
-// tag : V6_lo_W
-class Hexagon_v512v1024_Intrinsic_T<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v32i32_ty],
- [IntrNoMem]>;
-
-// tag : V6_hi_W_128B
-// tag : V6_lo_W_128B
-class Hexagon_v1024v2048_Intrinsic_T<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v64i32_ty],
- [IntrNoMem]>;
+def int_hexagon_vmemset : Hexagon_Intrinsic<"hexagon_vmemset",
+ [], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
-class Hexagon_v1024v1024_Intrinsic_T<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty],
- [IntrNoMem]>;
-
-// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1)
-// tag : V6_hi
-def int_hexagon_V6_hi :
-Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_hi">;
+multiclass Hexagon_custom_circ_ld_Intrinsic<LLVMType ElTy> {
+ def NAME#_pci : Hexagon_NonGCC_Intrinsic<
+ [ElTy, llvm_ptr_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty],
+ [IntrArgMemOnly, NoCapture<3>]>;
+ def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
+ [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty],
+ [IntrArgMemOnly, NoCapture<2>]>;
+}
-// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1)
-// tag : V6_lo
-def int_hexagon_V6_lo :
-Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_lo">;
+defm int_hexagon_L2_loadrub : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrb : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadruh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadri : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrd : Hexagon_custom_circ_ld_Intrinsic<llvm_i64_ty>;
-// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1)
-// tag : V6_hi_128B
-def int_hexagon_V6_hi_128B :
-Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_hi_128B">;
+multiclass Hexagon_custom_circ_st_Intrinsic<LLVMType ElTy> {
+ def NAME#_pci : Hexagon_NonGCC_Intrinsic<
+ [llvm_ptr_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
+ [IntrArgMemOnly, NoCapture<4>]>;
+ def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
+ [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
+ [IntrArgMemOnly, NoCapture<3>]>;
+}
-// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1)
-// tag : V6_lo_128B
-def int_hexagon_V6_lo_128B :
-Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_lo_128B">;
+defm int_hexagon_S2_storerb : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerh : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerf : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storeri : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerd : Hexagon_custom_circ_st_Intrinsic<llvm_i64_ty>;
-// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1)
-// tag : V6_vassignp
-def int_hexagon_V6_vassignp :
-Hexagon_v1024v1024_Intrinsic_T<"HEXAGON_V6_vassignp">;
+// The front-end emits the intrinsic call with only two arguments. The third
+// argument from the builtin is already used by front-end to write to memory
+// by generating a store.
+class Hexagon_custom_brev_ld_Intrinsic<LLVMType ElTy>
+ : Hexagon_NonGCC_Intrinsic<
+ [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
+ [IntrReadMem]>;
-// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1)
-// tag : V6_vassignp_128B
-def int_hexagon_V6_vassignp_128B :
-Hexagon_v2048v2048_Intrinsic_T<"HEXAGON_V6_vassignp_128B">;
+def int_hexagon_L2_loadrub_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrb_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadruh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadri_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrd_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i64_ty>;
+def int_hexagon_S2_storerb_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stb">;
+def int_hexagon_S2_storerh_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sth">;
+def int_hexagon_S2_storerf_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sthhi">;
+def int_hexagon_S2_storeri_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stw">;
+def int_hexagon_S2_storerd_pbr : Hexagon_mem_memdisi_Intrinsic<"brev_std">;
//
-// Hexagon_iii_Intrinsic<string GCCIntSuffix>
-// tag : S6_rol_i_r
-class Hexagon_iii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
-
+// Masked vector stores
//
-// Hexagon_LLiLLii_Intrinsic<string GCCIntSuffix>
-// tag : S6_rol_i_p
-class Hexagon_LLiLLii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty],
- [IntrNoMem]>;
//
-// Hexagon_iiii_Intrinsic<string GCCIntSuffix>
-// tag : S6_rol_i_r_acc
-class Hexagon_iiii_Intrinsic<string GCCIntSuffix>
+// Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
+// tag: V6_vS32b_qpred_ai
+class Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
+ [IntrArgMemOnly]>;
//
-// Hexagon_LLiLLiLLii_Intrinsic<string GCCIntSuffix>
-// tag : S6_rol_i_p_acc
-class Hexagon_LLiLLiLLii_Intrinsic<string GCCIntSuffix>
+// Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
+// tag: V6_vS32b_qpred_ai_128B
+class Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
+ [IntrArgMemOnly]>;
-//
-// Hexagon_v512v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_valignb
-class Hexagon_v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vS32b_qpred_ai :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai">;
-//
-// Hexagon_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_valignb_128B
-class Hexagon_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nqpred_ai :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai">;
-//
-// Hexagon_v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vror
-class Hexagon_v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nt_qpred_ai :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai">;
-//
-// Hexagon_v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vror_128B
-class Hexagon_v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nt_nqpred_ai :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai">;
-//
-// Hexagon_v1024v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vunpackub
-class Hexagon_v1024v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vS32b_qpred_ai_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B">;
-//
-// Hexagon_v2048v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vunpackub_128B
-class Hexagon_v2048v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nqpred_ai_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B">;
-//
-// Hexagon_v1024v1024v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vunpackob
-class Hexagon_v1024v1024v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nt_qpred_ai_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B">;
-//
-// Hexagon_v2048v2048v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vunpackob_128B
-class Hexagon_v2048v2048v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nt_nqpred_ai_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B">;
-//
-// Hexagon_v512v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vpackeb
-class Hexagon_v512v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstoreq :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">;
-//
-// Hexagon_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vpackeb_128B
-class Hexagon_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorenq :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorenq">;
-//
-// Hexagon_v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpybus_dv_128B
-class Hexagon_v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorentq :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentq">;
-//
-// Hexagon_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpybus_dv_acc_128B
-class Hexagon_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorentnq :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentnq">;
-//
-// Hexagon_v512v512v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhvsat_acc
-class Hexagon_v512v512v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstoreq_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstoreq_128B">;
-//
-// Hexagon_v1024v1024v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhvsat_acc_128B
-class Hexagon_v1024v1024v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorenq_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorenq_128B">;
-//
-// Hexagon_v512v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhisat
-class Hexagon_v512v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorentq_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentq_128B">;
-//
-// Hexagon_v1024v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhisat_128B
-class Hexagon_v1024v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorentnq_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentnq_128B">;
-//
-// Hexagon_v512v512v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhisat_acc
-class Hexagon_v512v512v1024i_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemiiv512_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
+ llvm_v16i32_ty],
+ [IntrArgMemOnly]>;
-//
-// Hexagon_v1024v1024v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhisat_acc_128B
-class Hexagon_v1024v1024v2048i_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemiiv1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v64i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
+ llvm_v32i32_ty],
+ [IntrArgMemOnly]>;
-//
-// Hexagon_v1024v1024ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyubi
-class Hexagon_v1024v1024ii_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemiiv2048_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
+ llvm_v64i32_ty],
+ [IntrArgMemOnly]>;
-//
-// Hexagon_v2048v2048ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyubi_128B
-class Hexagon_v2048v2048ii_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemv64iiiv512_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,
+ llvm_i32_ty,llvm_v16i32_ty],
+ [IntrArgMemOnly]>;
-//
-// Hexagon_v1024v1024v1024ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyubi_acc
-class Hexagon_v1024v1024v1024ii_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemv128iiiv1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,
+ llvm_i32_ty,llvm_v32i32_ty],
+ [IntrArgMemOnly]>;
-//
-// Hexagon_v2048v2048v2048ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyubi_acc_128B
-class Hexagon_v2048v2048v2048ii_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemv64iiiv1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,
+ llvm_i32_ty,llvm_v32i32_ty],
+ [IntrArgMemOnly]>;
-//
-// Hexagon_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddb_dv_128B
-class Hexagon_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemv128iiiv2048_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
- [IntrNoMem]>;
+ [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,
+ llvm_i32_ty,llvm_v64i32_ty],
+ [IntrArgMemOnly]>;
-//
-// Hexagon_v1024v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddubh
-class Hexagon_v1024v512v512_Intrinsic<string GCCIntSuffix>
+def int_hexagon_V6_vgathermw :
+Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermw">;
+
+def int_hexagon_V6_vgathermw_128B :
+Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermw_128B">;
+
+def int_hexagon_V6_vgathermh :
+Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermh">;
+
+def int_hexagon_V6_vgathermh_128B :
+Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermh_128B">;
+
+def int_hexagon_V6_vgathermhw :
+Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermhw">;
+
+def int_hexagon_V6_vgathermhw_128B :
+Hexagon_V65_vvmemiiv2048_Intrinsic<"HEXAGON_V6_vgathermhw_128B">;
+
+def int_hexagon_V6_vgathermwq :
+Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermwq">;
+
+def int_hexagon_V6_vgathermwq_128B :
+Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermwq_128B">;
+
+def int_hexagon_V6_vgathermhq :
+Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermhq">;
+
+def int_hexagon_V6_vgathermhq_128B :
+Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhq_128B">;
+
+def int_hexagon_V6_vgathermhwq :
+Hexagon_V65_vvmemv64iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhwq">;
+
+def int_hexagon_V6_vgathermhwq_128B :
+Hexagon_V65_vvmemv128iiiv2048_Intrinsic<"HEXAGON_V6_vgathermhwq_128B">;
+
+class Hexagon_V65_viiv512v512_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+ [], [llvm_i32_ty,llvm_i32_ty,
+ llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrWriteMem]>;
-//
-// Hexagon_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddubh_128B
-class Hexagon_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_viiv1024v1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+ [], [llvm_i32_ty,llvm_i32_ty,
+ llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrWriteMem]>;
-//
-// Hexagon_v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vd0
-class Hexagon_v512_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vv64iiiv512v512_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [],
- [IntrNoMem]>;
+ [], [llvm_v512i1_ty,llvm_i32_ty,
+ llvm_i32_ty,llvm_v16i32_ty,
+ llvm_v16i32_ty],
+ [IntrWriteMem]>;
-//
-// Hexagon_v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vd0_128B
-class Hexagon_v1024_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vv128iiiv1024v1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [],
- [IntrNoMem]>;
+ [], [llvm_v1024i1_ty,llvm_i32_ty,
+ llvm_i32_ty,llvm_v32i32_ty,
+ llvm_v32i32_ty],
+ [IntrWriteMem]>;
-//
-// Hexagon_v512v64iv512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddbq
-class Hexagon_v512v64iv512v512_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_viiv1024v512_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+ [], [llvm_i32_ty,llvm_i32_ty,
+ llvm_v32i32_ty,llvm_v16i32_ty],
+ [IntrWriteMem]>;
-//
-// Hexagon_v1024v128iv1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddbq_128B
-class Hexagon_v1024v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_viiv2048v1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+ [], [llvm_i32_ty,llvm_i32_ty,
+ llvm_v64i32_ty,llvm_v32i32_ty],
+ [IntrWriteMem]>;
-//
-// Hexagon_v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vabsh
-class Hexagon_v512v512_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vv64iiiv1024v512_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty],
- [IntrNoMem]>;
+ [], [llvm_v512i1_ty,llvm_i32_ty,
+ llvm_i32_ty,llvm_v32i32_ty,
+ llvm_v16i32_ty],
+ [IntrWriteMem]>;
-//
-// Hexagon_v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vabsh_128B
-class Hexagon_v1024v1024_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vv128iiiv2048v1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty],
- [IntrNoMem]>;
+ [], [llvm_v1024i1_ty,llvm_i32_ty,
+ llvm_i32_ty,llvm_v64i32_ty,
+ llvm_v32i32_ty],
+ [IntrWriteMem]>;
-//
-// Hexagon_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpybv_acc
-class Hexagon_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_v2048_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [llvm_v64i32_ty], [],
[IntrNoMem]>;
//
-// Hexagon_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpybv_acc_128B
-class Hexagon_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermw,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermw
+def int_hexagon_V6_vscattermw :
+Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw">;
//
-// Hexagon_v1024v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyub
-class Hexagon_v1024v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermw_128B,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermw_128B
+def int_hexagon_V6_vscattermw_128B :
+Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_128B">;
//
-// Hexagon_v2048v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyub_128B
-class Hexagon_v2048v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermh,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermh
+def int_hexagon_V6_vscattermh :
+Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh">;
//
-// Hexagon_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyub_acc
-class Hexagon_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermh_128B,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermh_128B
+def int_hexagon_V6_vscattermh_128B :
+Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_128B">;
//
-// Hexagon_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyub_acc_128B
-class Hexagon_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermw_add,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermw_add
+def int_hexagon_V6_vscattermw_add :
+Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw_add">;
//
-// Hexagon_v512v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandqrt
-class Hexagon_v512v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermw_add_128B,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermw_add_128B
+def int_hexagon_V6_vscattermw_add_128B :
+Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_add_128B">;
//
-// Hexagon_v1024v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandqrt_128B
-class Hexagon_v1024v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermh_add,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermh_add
+def int_hexagon_V6_vscattermh_add :
+Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh_add">;
//
-// Hexagon_v512v512v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandqrt_acc
-class Hexagon_v512v512v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermh_add_128B,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermh_add_128B
+def int_hexagon_V6_vscattermh_add_128B :
+Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_add_128B">;
//
-// Hexagon_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandqrt_acc_128B
-class Hexagon_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermwq,v_ftype_QVSISIVIVI,5)
+// tag : V6_vscattermwq
+def int_hexagon_V6_vscattermwq :
+Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermwq">;
//
-// Hexagon_v64iv512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvrt
-class Hexagon_v64iv512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermwq_128B,v_ftype_QVSISIVIVI,5)
+// tag : V6_vscattermwq_128B
+def int_hexagon_V6_vscattermwq_128B :
+Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermwq_128B">;
//
-// Hexagon_v128iv1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvrt_128B
-class Hexagon_v128iv1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhq,v_ftype_QVSISIVIVI,5)
+// tag : V6_vscattermhq
+def int_hexagon_V6_vscattermhq :
+Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermhq">;
//
-// Hexagon_v64iv64iv512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvrt_acc
-class Hexagon_v64iv64iv512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhq_128B,v_ftype_QVSISIVIVI,5)
+// tag : V6_vscattermhq_128B
+def int_hexagon_V6_vscattermhq_128B :
+Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermhq_128B">;
//
-// Hexagon_v128iv128iv1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvrt_acc_128B
-class Hexagon_v128iv128iv1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhw,v_ftype_SISIVDVI,4)
+// tag : V6_vscattermhw
+def int_hexagon_V6_vscattermhw :
+Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw">;
//
-// Hexagon_v64iv512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vgtw
-class Hexagon_v64iv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhw_128B,v_ftype_SISIVDVI,4)
+// tag : V6_vscattermhw_128B
+def int_hexagon_V6_vscattermhw_128B :
+Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_128B">;
//
-// Hexagon_v128iv1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vgtw_128B
-class Hexagon_v128iv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhwq,v_ftype_QVSISIVDVI,5)
+// tag : V6_vscattermhwq
+def int_hexagon_V6_vscattermhwq :
+Hexagon_V65_vv64iiiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhwq">;
//
-// Hexagon_v64iv64iv512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vgtw_and
-class Hexagon_v64iv64iv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhwq_128B,v_ftype_QVSISIVDVI,5)
+// tag : V6_vscattermhwq_128B
+def int_hexagon_V6_vscattermhwq_128B :
+Hexagon_V65_vv128iiiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhwq_128B">;
//
-// Hexagon_v128iv128iv1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vgtw_and_128B
-class Hexagon_v128iv128iv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add,v_ftype_SISIVDVI,4)
+// tag : V6_vscattermhw_add
+def int_hexagon_V6_vscattermhw_add :
+Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw_add">;
//
-// Hexagon_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
+// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add_128B,v_ftype_SISIVDVI,4)
+// tag : V6_vscattermhw_add_128B
+def int_hexagon_V6_vscattermhw_add_128B :
+Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B">;
+
+// Auto-generated intrinsics
+
+// tag : S2_vsatwh
+class Hexagon_i32_i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrmpybusv
+class Hexagon_v16i32_v16i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrmpybusv
+class Hexagon_v32i32_v32i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vaslw_acc
+class Hexagon_v16i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vaslw_acc
+class Hexagon_v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vmux
+class Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vmux
+class Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : S2_tableidxd_goodsyntax
+class Hexagon_i32_i32i32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandnqrt_acc
+class Hexagon_v16i32_v16i32v512i1i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandnqrt_acc
+class Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrmpybusi
+class Hexagon_v32i32_v32i32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrmpybusi
+class Hexagon_v64i32_v64i32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vsubb_dv
+class Hexagon_v64i32_v64i32v64i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
+ [IntrNoMem]>;
+
+// tag : M2_mpysu_up
+class Hexagon_i32_i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : M2_mpyud_acc_ll_s0
+class Hexagon_i64_i64i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : S2_lsr_i_r_nac
+class Hexagon_i32_i32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : M2_cmpysc_s0
+class Hexagon_i64_i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_lo
+class Hexagon_v16i32_v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_lo
+class Hexagon_v32i32_v64i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v64i32_ty],
+ [IntrNoMem]>;
+
+// tag : S2_shuffoh
+class Hexagon_i64_i64i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : F2_sfmax
+class Hexagon_float_floatfloat_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty], [llvm_float_ty,llvm_float_ty],
+ [IntrNoMem, Throws]>;
+
+// tag : A2_vabswsat
+class Hexagon_i64_i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag :
+class Hexagon_v32i32_v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_ldnp0
+class Hexagon_v16i32_i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_ldnp0
+class Hexagon_v32i32_i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vdmpyhb
+class Hexagon_v16i32_v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vdmpyhb
+class Hexagon_v32i32_v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : A4_vcmphgti
+class Hexagon_i32_i64i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag :
+class Hexagon_v32i32_v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : S6_rol_i_p_or
+class Hexagon_i64_i64i64i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vgtuh_and
+class Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vgtuh_and
+class Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : A2_abssat
+class Hexagon_i32_i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : A2_vcmpwgtu
+class Hexagon_i32_i64i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vtmpybus_acc
+class Hexagon_v64i32_v64i32v64i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : F2_conv_df2uw_chop
+class Hexagon_i32_double_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_double_ty],
+ [IntrNoMem]>;
+
// tag : V6_pred_or
-class Hexagon_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
- [IntrNoMem]>;
+class Hexagon_v512i1_v512i1v512i1_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_or_128B
-class Hexagon_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
- [IntrNoMem]>;
+// tag : V6_pred_or
+class Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v64iv64i_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_not
-class Hexagon_v64iv64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty],
- [IntrNoMem]>;
+// tag : S2_asr_i_p_rnd_goodsyntax
+class Hexagon_i64_i64i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v128iv128i_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_not_128B
-class Hexagon_v128iv128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty],
- [IntrNoMem]>;
+// tag : F2_conv_w2df
+class Hexagon_double_i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_double_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_scalar2
-class Hexagon_v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+// tag : V6_vunpackuh
+class Hexagon_v32i32_v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_scalar2_128B
-class Hexagon_v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+// tag : V6_vunpackuh
+class Hexagon_v64i32_v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v1024v64iv512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vswap
-class Hexagon_v1024v64iv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+// tag : V6_vadduhw_acc
+class Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v2048v128iv1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vswap_128B
-class Hexagon_v2048v128iv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+// tag : V6_vadduhw_acc
+class Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v1024v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vshuffvdd
-class Hexagon_v1024v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// tag : M2_vdmacs_s0
+class Hexagon_i64_i64i64i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i64_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vshuffvdd_128B
-class Hexagon_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// tag : V6_vrmpybub_rtt_acc
+class Hexagon_v32i32_v32i32v16i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrmpybub_rtt_acc
+class Hexagon_v64i32_v64i32v32i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+// tag : V6_ldu0
+class Hexagon_v16i32_i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_ldu0
+class Hexagon_v32i32_i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : S4_extract_rp
+class Hexagon_i32_i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vdmpyhsuisat
+class Hexagon_v16i32_v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vdmpyhsuisat
+class Hexagon_v32i32_v64i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : A2_addsp
+class Hexagon_i64_i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_iv512i_Intrinsic<string GCCIntSuffix>
// tag : V6_extractw
-class Hexagon_iv512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+class Hexagon_i32_v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_iv1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_extractw_128B
-class Hexagon_iv1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// tag : V6_extractw
+class Hexagon_i32_v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_lvsplatw
-class Hexagon_v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+// tag : V6_vlutvwhi
+class Hexagon_v32i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_lvsplatw_128B
-class Hexagon_v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+// tag : V6_vlutvwhi
+class Hexagon_v64i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vgtuh
+class Hexagon_v512i1_v16i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vgtuh
+class Hexagon_v1024i1_v32i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : F2_sffma_lib
+class Hexagon_float_floatfloatfloat_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty], [llvm_float_ty,llvm_float_ty,llvm_float_ty],
+ [IntrNoMem, Throws]>;
+
+// tag : F2_conv_ud2df
+class Hexagon_double_i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_double_ty], [llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : S2_vzxthw
+class Hexagon_i64_i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vtmpyhb
+class Hexagon_v64i32_v64i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vshufoeh
+class Hexagon_v32i32_v16i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vshufoeh
+class Hexagon_v64i32_v32i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vlut4
+class Hexagon_v16i32_v16i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vlut4
+class Hexagon_v32i32_v32i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag :
+class Hexagon_v16i32_v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : F2_conv_uw2sf
+class Hexagon_float_i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vswap
+class Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vswap
+class Hexagon_v64i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandnqrt
+class Hexagon_v16i32_v512i1i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandnqrt
+class Hexagon_v32i32_v1024i1i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vmpyub
+class Hexagon_v64i32_v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : A5_ACS
+class Hexagon_i64i32_i64i64i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty,llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vunpackob
+class Hexagon_v32i32_v32i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vunpackob
+class Hexagon_v64i32_v64i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vmpyhsat_acc
+class Hexagon_v32i32_v32i32v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vmpyhsat_acc
+class Hexagon_v64i32_v64i32v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vaddcarrysat
+class Hexagon_v16i32_v16i32v16i32v512i1_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vaddcarrysat
+class Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
// tag : V6_vlutvvb_oracc
-class Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+class Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvvb_oracc_128B
-class Hexagon_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// tag : V6_vlutvvb_oracc
+class Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrmpybub_rtt
+class Hexagon_v32i32_v16i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrmpybub_rtt
+class Hexagon_v64i32_v32i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : A4_addp_c
+class Hexagon_i64i32_i64i64i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty,llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrsadubi_acc
+class Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vrsadubi_acc
+class Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : F2_conv_df2sf
+class Hexagon_float_double_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty], [llvm_double_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandvqv
+class Hexagon_v16i32_v512i1v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandvqv
+class Hexagon_v32i32_v1024i1v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : C2_vmux
+class Hexagon_i64_i32i64i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i32_ty,llvm_i64_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : F2_sfcmpeq
+class Hexagon_i32_floatfloat_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_float_ty,llvm_float_ty],
+ [IntrNoMem, Throws]>;
+
+// tag : V6_vmpahhsat
+class Hexagon_v16i32_v16i32v16i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vmpahhsat
+class Hexagon_v32i32_v32i32v32i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandvrt
+class Hexagon_v512i1_v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandvrt
+class Hexagon_v1024i1_v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vsubcarry
+class Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v16i32_ty,llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vsubcarry
+class Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty,llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
+ [IntrNoMem]>;
+
+// tag : F2_sffixupr
+class Hexagon_float_float_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty], [llvm_float_ty],
+ [IntrNoMem, Throws]>;
+
+// tag : V6_vandvrt_acc
+class Hexagon_v512i1_v512i1v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vandvrt_acc
+class Hexagon_v1024i1_v1024i1v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : F2_dfsub
+class Hexagon_double_doubledouble_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_double_ty], [llvm_double_ty,llvm_double_ty],
+ [IntrNoMem, Throws]>;
+
+// tag : V6_vmpyowh_sacc
+class Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vmpyowh_sacc
+class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : S2_insertp
+class Hexagon_i64_i64i64i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : F2_sfinvsqrta
+class Hexagon_floati32_float_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty,llvm_i32_ty], [llvm_float_ty],
+ [IntrNoMem, Throws]>;
+
+// tag : V6_vtran2x2_map
+class Hexagon_v16i32v16i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty,llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_vtran2x2_map
+class Hexagon_v32i32v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty,llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
// tag : V6_vlutvwh_oracc
-class Hexagon_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+class Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwh_oracc_128B
-class Hexagon_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+// tag : V6_vlutvwh_oracc
+class Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
-// tag: V6_vS32b_qpred_ai
-class Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
- [IntrArgMemOnly]>;
+// tag : F2_dfcmpge
+class Hexagon_i32_doubledouble_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_double_ty,llvm_double_ty],
+ [IntrNoMem, Throws]>;
-//
-// Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
-// tag: V6_vS32b_qpred_ai_128B
-class Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
- [IntrArgMemOnly]>;
+// tag : F2_conv_df2d_chop
+class Hexagon_i64_double_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_double_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r,SI_ftype_SISI,2)
-// tag : S6_rol_i_r
-def int_hexagon_S6_rol_i_r :
-Hexagon_iii_Intrinsic<"HEXAGON_S6_rol_i_r">;
+// tag : F2_conv_sf2w
+class Hexagon_i32_float_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_float_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p,DI_ftype_DISI,2)
-// tag : S6_rol_i_p
-def int_hexagon_S6_rol_i_p :
-Hexagon_LLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p">;
+// tag : F2_sfclass
+class Hexagon_i32_floati32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_float_ty,llvm_i32_ty],
+ [IntrNoMem, Throws]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_acc,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_acc
-def int_hexagon_S6_rol_i_r_acc :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_acc">;
+// tag : F2_conv_sf2ud_chop
+class Hexagon_i64_float_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_float_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_acc,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_acc
-def int_hexagon_S6_rol_i_p_acc :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_acc">;
+// tag : V6_pred_scalar2v2
+class Hexagon_v512i1_i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_nac,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_nac
-def int_hexagon_S6_rol_i_r_nac :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_nac">;
+// tag : V6_pred_scalar2v2
+class Hexagon_v1024i1_i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_nac,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_nac
-def int_hexagon_S6_rol_i_p_nac :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_nac">;
+// tag : F2_sfrecipa
+class Hexagon_floati32_floatfloat_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty,llvm_i32_ty], [llvm_float_ty,llvm_float_ty],
+ [IntrNoMem, Throws]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_xacc,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_xacc
-def int_hexagon_S6_rol_i_r_xacc :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">;
+// tag : V6_vprefixqh
+class Hexagon_v16i32_v512i1_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v512i1_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_xacc,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_xacc
-def int_hexagon_S6_rol_i_p_xacc :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">;
+// tag : V6_vprefixqh
+class Hexagon_v32i32_v1024i1_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v1024i1_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_and,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_and
-def int_hexagon_S6_rol_i_r_and :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_and">;
+// tag : V6_vdmpyhisat_acc
+class Hexagon_v16i32_v16i32v32i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_or,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_or
-def int_hexagon_S6_rol_i_r_or :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_or">;
+// tag : V6_vdmpyhisat_acc
+class Hexagon_v32i32_v32i32v64i32i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v64i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_and,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_and
-def int_hexagon_S6_rol_i_p_and :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_and">;
+// tag : F2_conv_ud2sf
+class Hexagon_float_i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty], [llvm_i64_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_or,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_or
-def int_hexagon_S6_rol_i_p_or :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_or">;
+// tag : F2_conv_sf2df
+class Hexagon_double_float_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_double_ty], [llvm_float_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.S2_cabacencbin,DI_ftype_DIDIQI,3)
-// tag : S2_cabacencbin
-def int_hexagon_S2_cabacencbin :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S2_cabacencbin">;
+// tag : F2_sffma_sc
+class Hexagon_float_floatfloatfloati32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_float_ty], [llvm_float_ty,llvm_float_ty,llvm_float_ty,llvm_i32_ty],
+ [IntrNoMem, Throws]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_valignb,VI_ftype_VIVISI,3)
-// tag : V6_valignb
-def int_hexagon_V6_valignb :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignb">;
+// tag : F2_dfclass
+class Hexagon_i32_doublei32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_double_ty,llvm_i32_ty],
+ [IntrNoMem, Throws]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_valignb_128B,VI_ftype_VIVISI,3)
-// tag : V6_valignb_128B
-def int_hexagon_V6_valignb_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignb_128B">;
+// tag : V6_vd0
+class Hexagon_v16i32__Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlalignb,VI_ftype_VIVISI,3)
-// tag : V6_vlalignb
-def int_hexagon_V6_vlalignb :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignb">;
+// tag : V6_vd0
+class Hexagon_v32i32__Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlalignb_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlalignb_128B
-def int_hexagon_V6_vlalignb_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignb_128B">;
+// tag : V6_vdd0
+class Hexagon_v64i32__Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_valignbi,VI_ftype_VIVISI,3)
-// tag : V6_valignbi
-def int_hexagon_V6_valignbi :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignbi">;
+// tag : S2_insert_rp
+class Hexagon_i32_i32i32i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i64_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_valignbi_128B,VI_ftype_VIVISI,3)
-// tag : V6_valignbi_128B
-def int_hexagon_V6_valignbi_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignbi_128B">;
+// tag : V6_vassignp
+class Hexagon_v64i32_v64i32_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlalignbi,VI_ftype_VIVISI,3)
-// tag : V6_vlalignbi
-def int_hexagon_V6_vlalignbi :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignbi">;
+// tag : A6_vminub_RdP
+class Hexagon_i64i32_i64i64_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty,llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlalignbi_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlalignbi_128B
-def int_hexagon_V6_vlalignbi_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignbi_128B">;
+// tag : V6_pred_not
+class Hexagon_v512i1_v512i1_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v512i1_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_vror,VI_ftype_VISI,2)
-// tag : V6_vror
-def int_hexagon_V6_vror :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vror">;
+// tag : V6_pred_not
+class Hexagon_v1024i1_v1024i1_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v1024i1_ty],
+ [IntrNoMem]>;
-//
-// BUILTIN_INFO(HEXAGON.V6_vror_128B,VI_ftype_VISI,2)
-// tag : V6_vror_128B
-def int_hexagon_V6_vror_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vror_128B">;
+// V5 Scalar Instructions.
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackub,VD_ftype_VI,1)
-// tag : V6_vunpackub
-def int_hexagon_V6_vunpackub :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackub">;
+def int_hexagon_S2_asr_r_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackub_128B,VD_ftype_VI,1)
-// tag : V6_vunpackub_128B
-def int_hexagon_V6_vunpackub_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackub_128B">;
+def int_hexagon_S2_vsatwh :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vsatwh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackb,VD_ftype_VI,1)
-// tag : V6_vunpackb
-def int_hexagon_V6_vunpackb :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackb">;
+def int_hexagon_S2_tableidxd_goodsyntax :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackb_128B,VD_ftype_VI,1)
-// tag : V6_vunpackb_128B
-def int_hexagon_V6_vunpackb_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackb_128B">;
+def int_hexagon_M2_mpysu_up :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysu_up">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackuh,VD_ftype_VI,1)
-// tag : V6_vunpackuh
-def int_hexagon_V6_vunpackuh :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackuh">;
+def int_hexagon_M2_mpyud_acc_ll_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackuh_128B,VD_ftype_VI,1)
-// tag : V6_vunpackuh_128B
-def int_hexagon_V6_vunpackuh_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackuh_128B">;
+def int_hexagon_M2_mpyud_acc_ll_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackh,VD_ftype_VI,1)
-// tag : V6_vunpackh
-def int_hexagon_V6_vunpackh :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackh">;
+def int_hexagon_M2_cmpysc_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpysc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackh_128B,VD_ftype_VI,1)
-// tag : V6_vunpackh_128B
-def int_hexagon_V6_vunpackh_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackh_128B">;
+def int_hexagon_M2_cmpysc_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpysc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackob,VD_ftype_VDVI,2)
-// tag : V6_vunpackob
-def int_hexagon_V6_vunpackob :
-Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackob">;
+def int_hexagon_M4_cmpyi_whc :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyi_whc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackob_128B,VD_ftype_VDVI,2)
-// tag : V6_vunpackob_128B
-def int_hexagon_V6_vunpackob_128B :
-Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackob_128B">;
+def int_hexagon_M2_mpy_sat_rnd_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackoh,VD_ftype_VDVI,2)
-// tag : V6_vunpackoh
-def int_hexagon_V6_vunpackoh :
-Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackoh">;
+def int_hexagon_M2_mpy_sat_rnd_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackoh_128B,VD_ftype_VDVI,2)
-// tag : V6_vunpackoh_128B
-def int_hexagon_V6_vunpackoh_128B :
-Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackoh_128B">;
+def int_hexagon_S2_tableidxb_goodsyntax :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxb_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackeb,VI_ftype_VIVI,2)
-// tag : V6_vpackeb
-def int_hexagon_V6_vpackeb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeb">;
+def int_hexagon_S2_shuffoh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffoh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackeb_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackeb_128B
-def int_hexagon_V6_vpackeb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeb_128B">;
+def int_hexagon_F2_sfmax :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfmax">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackeh,VI_ftype_VIVI,2)
-// tag : V6_vpackeh
-def int_hexagon_V6_vpackeh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeh">;
+def int_hexagon_A2_vabswsat :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabswsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackeh_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackeh_128B
-def int_hexagon_V6_vpackeh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeh_128B">;
+def int_hexagon_S2_asr_i_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackob,VI_ftype_VIVI,2)
-// tag : V6_vpackob
-def int_hexagon_V6_vpackob :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackob">;
+def int_hexagon_S2_asr_i_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackob_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackob_128B
-def int_hexagon_V6_vpackob_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackob_128B">;
+def int_hexagon_A4_combineri :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineri">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackoh,VI_ftype_VIVI,2)
-// tag : V6_vpackoh
-def int_hexagon_V6_vpackoh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackoh">;
+def int_hexagon_M2_mpy_nac_sat_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackoh_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackoh_128B
-def int_hexagon_V6_vpackoh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackoh_128B">;
+def int_hexagon_M4_vpmpyh_acc :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M4_vpmpyh_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat,VI_ftype_VIVI,2)
-// tag : V6_vpackhub_sat
-def int_hexagon_V6_vpackhub_sat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhub_sat">;
+def int_hexagon_M2_vcmpy_s0_sat_i :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_i">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackhub_sat_128B
-def int_hexagon_V6_vpackhub_sat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhub_sat_128B">;
+def int_hexagon_A2_notp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_notp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat,VI_ftype_VIVI,2)
-// tag : V6_vpackhb_sat
-def int_hexagon_V6_vpackhb_sat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhb_sat">;
+def int_hexagon_M2_mpy_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackhb_sat_128B
-def int_hexagon_V6_vpackhb_sat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhb_sat_128B">;
+def int_hexagon_M2_mpy_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat,VI_ftype_VIVI,2)
-// tag : V6_vpackwuh_sat
-def int_hexagon_V6_vpackwuh_sat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwuh_sat">;
+def int_hexagon_C4_or_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_or_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackwuh_sat_128B
-def int_hexagon_V6_vpackwuh_sat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwuh_sat_128B">;
+def int_hexagon_M2_vmac2s_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2s_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat,VI_ftype_VIVI,2)
-// tag : V6_vpackwh_sat
-def int_hexagon_V6_vpackwh_sat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwh_sat">;
+def int_hexagon_M2_vmac2s_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2s_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackwh_sat_128B
-def int_hexagon_V6_vpackwh_sat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwh_sat_128B">;
+def int_hexagon_S2_brevp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_brevp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vzb,VD_ftype_VI,1)
-// tag : V6_vzb
-def int_hexagon_V6_vzb :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzb">;
+def int_hexagon_M4_pmpyw_acc :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M4_pmpyw_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vzb_128B,VD_ftype_VI,1)
-// tag : V6_vzb_128B
-def int_hexagon_V6_vzb_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzb_128B">;
+def int_hexagon_S2_cl1 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_cl1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsb,VD_ftype_VI,1)
-// tag : V6_vsb
-def int_hexagon_V6_vsb :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsb">;
+def int_hexagon_C4_cmplte :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplte">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsb_128B,VD_ftype_VI,1)
-// tag : V6_vsb_128B
-def int_hexagon_V6_vsb_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsb_128B">;
+def int_hexagon_M2_mmpyul_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vzh,VD_ftype_VI,1)
-// tag : V6_vzh
-def int_hexagon_V6_vzh :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzh">;
+def int_hexagon_A2_vaddws :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddws">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vzh_128B,VD_ftype_VI,1)
-// tag : V6_vzh_128B
-def int_hexagon_V6_vzh_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzh_128B">;
+def int_hexagon_A2_maxup :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_maxup">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsh,VD_ftype_VI,1)
-// tag : V6_vsh
-def int_hexagon_V6_vsh :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsh">;
+def int_hexagon_A4_vcmphgti :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgti">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsh_128B,VD_ftype_VI,1)
-// tag : V6_vsh_128B
-def int_hexagon_V6_vsh_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsh_128B">;
+def int_hexagon_S2_interleave :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_interleave">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus,VI_ftype_VISI,2)
-// tag : V6_vdmpybus
-def int_hexagon_V6_vdmpybus :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus">;
+def int_hexagon_M2_vrcmpyi_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyi_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_128B,VI_ftype_VISI,2)
-// tag : V6_vdmpybus_128B
-def int_hexagon_V6_vdmpybus_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_128B">;
+def int_hexagon_A2_abssat :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_abssat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc,VI_ftype_VIVISI,3)
-// tag : V6_vdmpybus_acc
-def int_hexagon_V6_vdmpybus_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus_acc">;
+def int_hexagon_A2_vcmpwgtu :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpwgtu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vdmpybus_acc_128B
-def int_hexagon_V6_vdmpybus_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_acc_128B">;
+def int_hexagon_C2_cmpgtu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv,VD_ftype_VDSI,2)
-// tag : V6_vdmpybus_dv
-def int_hexagon_V6_vdmpybus_dv :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv">;
+def int_hexagon_C2_cmpgtp :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_128B,VD_ftype_VDSI,2)
-// tag : V6_vdmpybus_dv_128B
-def int_hexagon_V6_vdmpybus_dv_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_128B">;
+def int_hexagon_A4_cmphgtui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtui">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vdmpybus_dv_acc
-def int_hexagon_V6_vdmpybus_dv_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc">;
+def int_hexagon_C2_cmpgti :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgti">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vdmpybus_dv_acc_128B
-def int_hexagon_V6_vdmpybus_dv_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc_128B">;
+def int_hexagon_M2_mpyi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb,VI_ftype_VISI,2)
-// tag : V6_vdmpyhb
-def int_hexagon_V6_vdmpyhb :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb">;
+def int_hexagon_F2_conv_df2uw_chop :
+Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2uw_chop">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_128B,VI_ftype_VISI,2)
-// tag : V6_vdmpyhb_128B
-def int_hexagon_V6_vdmpyhb_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_128B">;
+def int_hexagon_A4_cmpheq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhb_acc
-def int_hexagon_V6_vdmpyhb_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc">;
+def int_hexagon_M2_mpy_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhb_acc_128B
-def int_hexagon_V6_vdmpyhb_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc_128B">;
+def int_hexagon_M2_mpy_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv,VD_ftype_VDSI,2)
-// tag : V6_vdmpyhb_dv
-def int_hexagon_V6_vdmpyhb_dv :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv">;
+def int_hexagon_S2_lsr_i_r_xacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_128B,VD_ftype_VDSI,2)
-// tag : V6_vdmpyhb_dv_128B
-def int_hexagon_V6_vdmpyhb_dv_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_128B">;
+def int_hexagon_S2_vrcnegh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vrcnegh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vdmpyhb_dv_acc
-def int_hexagon_V6_vdmpyhb_dv_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">;
+def int_hexagon_S2_extractup :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S2_extractup">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vdmpyhb_dv_acc_128B
-def int_hexagon_V6_vdmpyhb_dv_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc_128B">;
+def int_hexagon_S2_asr_i_p_rnd_goodsyntax :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat,VI_ftype_VIVI,2)
-// tag : V6_vdmpyhvsat
-def int_hexagon_V6_vdmpyhvsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat">;
+def int_hexagon_S4_ntstbit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vdmpyhvsat_128B
-def int_hexagon_V6_vdmpyhvsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_128B">;
+def int_hexagon_F2_conv_w2sf :
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_conv_w2sf">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vdmpyhvsat_acc
-def int_hexagon_V6_vdmpyhvsat_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc">;
+def int_hexagon_C2_not :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_not">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vdmpyhvsat_acc_128B
-def int_hexagon_V6_vdmpyhvsat_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc_128B">;
+def int_hexagon_C2_tfrpr :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_tfrpr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat,VI_ftype_VISI,2)
-// tag : V6_vdmpyhsat
-def int_hexagon_V6_vdmpyhsat :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat">;
+def int_hexagon_M2_mpy_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_128B,VI_ftype_VISI,2)
-// tag : V6_vdmpyhsat_128B
-def int_hexagon_V6_vdmpyhsat_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_128B">;
+def int_hexagon_M2_mpy_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhsat_acc
-def int_hexagon_V6_vdmpyhsat_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc">;
+def int_hexagon_A4_cmpbgt :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgt">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhsat_acc_128B
-def int_hexagon_V6_vdmpyhsat_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc_128B">;
+def int_hexagon_S2_asr_r_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat,VI_ftype_VDSI,2)
-// tag : V6_vdmpyhisat
-def int_hexagon_V6_vdmpyhisat :
-Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat">;
+def int_hexagon_A4_rcmpneqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneqi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_128B,VI_ftype_VDSI,2)
-// tag : V6_vdmpyhisat_128B
-def int_hexagon_V6_vdmpyhisat_128B :
-Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_128B">;
+def int_hexagon_S2_asl_i_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc,VI_ftype_VIVDSI,3)
-// tag : V6_vdmpyhisat_acc
-def int_hexagon_V6_vdmpyhisat_acc :
-Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc">;
+def int_hexagon_M2_subacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_subacc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc_128B,VI_ftype_VIVDSI,3)
-// tag : V6_vdmpyhisat_acc_128B
-def int_hexagon_V6_vdmpyhisat_acc_128B :
-Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc_128B">;
+def int_hexagon_A2_orp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_orp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat,VI_ftype_VISI,2)
-// tag : V6_vdmpyhsusat
-def int_hexagon_V6_vdmpyhsusat :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat">;
+def int_hexagon_M2_mpyu_up :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_up">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_128B,VI_ftype_VISI,2)
-// tag : V6_vdmpyhsusat_128B
-def int_hexagon_V6_vdmpyhsusat_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_128B">;
+def int_hexagon_M2_mpy_acc_sat_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhsusat_acc
-def int_hexagon_V6_vdmpyhsusat_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc">;
+def int_hexagon_S2_asr_i_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhsusat_acc_128B
-def int_hexagon_V6_vdmpyhsusat_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc_128B">;
+def int_hexagon_S2_asr_i_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat,VI_ftype_VDSI,2)
-// tag : V6_vdmpyhsuisat
-def int_hexagon_V6_vdmpyhsuisat :
-Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat">;
+def int_hexagon_A4_cmpbgtu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_128B,VI_ftype_VDSI,2)
-// tag : V6_vdmpyhsuisat_128B
-def int_hexagon_V6_vdmpyhsuisat_128B :
-Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">;
+def int_hexagon_A4_vcmpbeq_any :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A4_vcmpbeq_any">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc,VI_ftype_VIVDSI,3)
-// tag : V6_vdmpyhsuisat_acc
-def int_hexagon_V6_vdmpyhsuisat_acc :
-Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc">;
+def int_hexagon_A4_cmpbgti :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgti">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc_128B,VI_ftype_VIVDSI,3)
-// tag : V6_vdmpyhsuisat_acc_128B
-def int_hexagon_V6_vdmpyhsuisat_acc_128B :
-Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc_128B">;
+def int_hexagon_M2_mpyd_lh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyb,VD_ftype_VDSI,2)
-// tag : V6_vtmpyb
-def int_hexagon_V6_vtmpyb :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb">;
+def int_hexagon_S2_asl_r_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyb_128B,VD_ftype_VDSI,2)
-// tag : V6_vtmpyb_128B
-def int_hexagon_V6_vtmpyb_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_128B">;
+def int_hexagon_S2_lsr_i_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpyb_acc
-def int_hexagon_V6_vtmpyb_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb_acc">;
+def int_hexagon_A2_addsp :
+Hexagon_i64_i32i64_Intrinsic<"HEXAGON_A2_addsp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpyb_acc_128B
-def int_hexagon_V6_vtmpyb_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_acc_128B">;
+def int_hexagon_S4_vxsubaddw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpybus,VD_ftype_VDSI,2)
-// tag : V6_vtmpybus
-def int_hexagon_V6_vtmpybus :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus">;
+def int_hexagon_A4_vcmpheqi :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpheqi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpybus_128B,VD_ftype_VDSI,2)
-// tag : V6_vtmpybus_128B
-def int_hexagon_V6_vtmpybus_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_128B">;
+def int_hexagon_S4_vxsubaddh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpybus_acc
-def int_hexagon_V6_vtmpybus_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus_acc">;
+def int_hexagon_M4_pmpyw :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M4_pmpyw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpybus_acc_128B
-def int_hexagon_V6_vtmpybus_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_acc_128B">;
+def int_hexagon_S2_vsathb :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vsathb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyhb,VD_ftype_VDSI,2)
-// tag : V6_vtmpyhb
-def int_hexagon_V6_vtmpyhb :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb">;
+def int_hexagon_S2_asr_r_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_128B,VD_ftype_VDSI,2)
-// tag : V6_vtmpyhb_128B
-def int_hexagon_V6_vtmpyhb_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_128B">;
+def int_hexagon_M2_mpyu_acc_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpyhb_acc
-def int_hexagon_V6_vtmpyhb_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc">;
+def int_hexagon_M2_mpyu_acc_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpyhb_acc_128B
-def int_hexagon_V6_vtmpyhb_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc_128B">;
+def int_hexagon_S2_lsl_r_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub,VI_ftype_VISI,2)
-// tag : V6_vrmpyub
-def int_hexagon_V6_vrmpyub :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub">;
+def int_hexagon_A2_pxorf :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_A2_pxorf">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_128B,VI_ftype_VISI,2)
-// tag : V6_vrmpyub_128B
-def int_hexagon_V6_vrmpyub_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_128B">;
+def int_hexagon_C2_cmpgei :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgei">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc,VI_ftype_VIVISI,3)
-// tag : V6_vrmpyub_acc
-def int_hexagon_V6_vrmpyub_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub_acc">;
+def int_hexagon_A2_vsubub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vrmpyub_acc_128B
-def int_hexagon_V6_vrmpyub_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_acc_128B">;
+def int_hexagon_S2_asl_i_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubv,VI_ftype_VIVI,2)
-// tag : V6_vrmpyubv
-def int_hexagon_V6_vrmpyubv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv">;
+def int_hexagon_S2_asl_i_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_128B,VI_ftype_VIVI,2)
-// tag : V6_vrmpyubv_128B
-def int_hexagon_V6_vrmpyubv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_128B">;
+def int_hexagon_A4_vrminuw :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminuw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpyubv_acc
-def int_hexagon_V6_vrmpyubv_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv_acc">;
+def int_hexagon_F2_sffma :
+Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffma">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpyubv_acc_128B
-def int_hexagon_V6_vrmpyubv_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_acc_128B">;
+def int_hexagon_A2_absp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_absp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybv,VI_ftype_VIVI,2)
-// tag : V6_vrmpybv
-def int_hexagon_V6_vrmpybv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv">;
+def int_hexagon_C2_all8 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_all8">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybv_128B,VI_ftype_VIVI,2)
-// tag : V6_vrmpybv_128B
-def int_hexagon_V6_vrmpybv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_128B">;
+def int_hexagon_A4_vrminuh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminuh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpybv_acc
-def int_hexagon_V6_vrmpybv_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv_acc">;
+def int_hexagon_F2_sffma_lib :
+Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffma_lib">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpybv_acc_128B
-def int_hexagon_V6_vrmpybv_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_acc_128B">;
+def int_hexagon_M4_vrmpyoh_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubi,VD_ftype_VDSISI,3)
-// tag : V6_vrmpyubi
-def int_hexagon_V6_vrmpyubi :
-Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi">;
+def int_hexagon_M4_vrmpyoh_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_128B,VD_ftype_VDSISI,3)
-// tag : V6_vrmpyubi_128B
-def int_hexagon_V6_vrmpyubi_128B :
-Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">;
+def int_hexagon_C2_bitsset :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsset">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc,VD_ftype_VDVDSISI,4)
-// tag : V6_vrmpyubi_acc
-def int_hexagon_V6_vrmpyubi_acc :
-Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">;
+def int_hexagon_M2_mpysip :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysip">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc_128B,VD_ftype_VDVDSISI,4)
-// tag : V6_vrmpyubi_acc_128B
-def int_hexagon_V6_vrmpyubi_acc_128B :
-Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">;
+def int_hexagon_M2_mpysin :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysin">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybus,VI_ftype_VISI,2)
-// tag : V6_vrmpybus
-def int_hexagon_V6_vrmpybus :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus">;
+def int_hexagon_A4_boundscheck :
+Hexagon_i32_i32i64_Intrinsic<"HEXAGON_A4_boundscheck">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybus_128B,VI_ftype_VISI,2)
-// tag : V6_vrmpybus_128B
-def int_hexagon_V6_vrmpybus_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_128B">;
+def int_hexagon_M5_vrmpybuu :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M5_vrmpybuu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc,VI_ftype_VIVISI,3)
-// tag : V6_vrmpybus_acc
-def int_hexagon_V6_vrmpybus_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus_acc">;
+def int_hexagon_C4_fastcorner9 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_fastcorner9">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vrmpybus_acc_128B
-def int_hexagon_V6_vrmpybus_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_acc_128B">;
+def int_hexagon_M2_vrcmpys_s1rp :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M2_vrcmpys_s1rp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusi,VD_ftype_VDSISI,3)
-// tag : V6_vrmpybusi
-def int_hexagon_V6_vrmpybusi :
-Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi">;
+def int_hexagon_A2_neg :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_neg">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_128B,VD_ftype_VDSISI,3)
-// tag : V6_vrmpybusi_128B
-def int_hexagon_V6_vrmpybusi_128B :
-Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">;
+def int_hexagon_A2_subsat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc,VD_ftype_VDVDSISI,4)
-// tag : V6_vrmpybusi_acc
-def int_hexagon_V6_vrmpybusi_acc :
-Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">;
+def int_hexagon_S2_asl_r_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc_128B,VD_ftype_VDVDSISI,4)
-// tag : V6_vrmpybusi_acc_128B
-def int_hexagon_V6_vrmpybusi_acc_128B :
-Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">;
+def int_hexagon_S2_asl_r_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusv,VI_ftype_VIVI,2)
-// tag : V6_vrmpybusv
-def int_hexagon_V6_vrmpybusv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv">;
+def int_hexagon_A2_vnavgh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_128B,VI_ftype_VIVI,2)
-// tag : V6_vrmpybusv_128B
-def int_hexagon_V6_vrmpybusv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_128B">;
+def int_hexagon_M2_mpy_nac_sat_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpybusv_acc
-def int_hexagon_V6_vrmpybusv_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv_acc">;
+def int_hexagon_F2_conv_ud2df :
+Hexagon_double_i64_Intrinsic<"HEXAGON_F2_conv_ud2df">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpybusv_acc_128B
-def int_hexagon_V6_vrmpybusv_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_acc_128B">;
+def int_hexagon_A2_vnavgw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdsaduh,VD_ftype_VDSI,2)
-// tag : V6_vdsaduh
-def int_hexagon_V6_vdsaduh :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh">;
+def int_hexagon_S2_asl_i_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdsaduh_128B,VD_ftype_VDSI,2)
-// tag : V6_vdsaduh_128B
-def int_hexagon_V6_vdsaduh_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_128B">;
+def int_hexagon_S4_subi_lsr_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vdsaduh_acc
-def int_hexagon_V6_vdsaduh_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh_acc">;
+def int_hexagon_S2_vzxthw :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vzxthw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vdsaduh_acc_128B
-def int_hexagon_V6_vdsaduh_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">;
+def int_hexagon_F2_sfadd :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfadd">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrsadubi,VD_ftype_VDSISI,3)
-// tag : V6_vrsadubi
-def int_hexagon_V6_vrsadubi :
-Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi">;
+def int_hexagon_A2_sub :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_sub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrsadubi_128B,VD_ftype_VDSISI,3)
-// tag : V6_vrsadubi_128B
-def int_hexagon_V6_vrsadubi_128B :
-Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_128B">;
+def int_hexagon_M2_vmac2su_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2su_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc,VD_ftype_VDVDSISI,4)
-// tag : V6_vrsadubi_acc
-def int_hexagon_V6_vrsadubi_acc :
-Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc">;
+def int_hexagon_M2_vmac2su_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2su_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc_128B,VD_ftype_VDVDSISI,4)
-// tag : V6_vrsadubi_acc_128B
-def int_hexagon_V6_vrsadubi_acc_128B :
-Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">;
+def int_hexagon_M2_dpmpyss_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrw,VI_ftype_VISI,2)
-// tag : V6_vasrw
-def int_hexagon_V6_vasrw :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrw">;
+def int_hexagon_S2_insert :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_insert">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrw_128B,VI_ftype_VISI,2)
-// tag : V6_vasrw_128B
-def int_hexagon_V6_vasrw_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_128B">;
+def int_hexagon_S2_packhl :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_S2_packhl">;
+def int_hexagon_A4_vcmpwgti :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgti">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslw,VI_ftype_VISI,2)
-// tag : V6_vaslw
-def int_hexagon_V6_vaslw :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslw">;
+def int_hexagon_A2_vavguwr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguwr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslw_128B,VI_ftype_VISI,2)
-// tag : V6_vaslw_128B
-def int_hexagon_V6_vaslw_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_128B">;
+def int_hexagon_S2_asl_r_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrw,VI_ftype_VISI,2)
-// tag : V6_vlsrw
-def int_hexagon_V6_vlsrw :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrw">;
+def int_hexagon_A2_svsubhs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svsubhs">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrw_128B,VI_ftype_VISI,2)
-// tag : V6_vlsrw_128B
-def int_hexagon_V6_vlsrw_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrw_128B">;
+def int_hexagon_A2_addh_l16_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwv,VI_ftype_VIVI,2)
-// tag : V6_vasrwv
-def int_hexagon_V6_vasrwv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrwv">;
+def int_hexagon_M4_and_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwv_128B,VI_ftype_VIVI,2)
-// tag : V6_vasrwv_128B
-def int_hexagon_V6_vasrwv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrwv_128B">;
+def int_hexagon_F2_conv_d2df :
+Hexagon_double_i64_Intrinsic<"HEXAGON_F2_conv_d2df">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslwv,VI_ftype_VIVI,2)
-// tag : V6_vaslwv
-def int_hexagon_V6_vaslwv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslwv">;
+def int_hexagon_C2_cmpgtui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtui">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslwv_128B,VI_ftype_VIVI,2)
-// tag : V6_vaslwv_128B
-def int_hexagon_V6_vaslwv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslwv_128B">;
+def int_hexagon_A2_vconj :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vconj">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrwv,VI_ftype_VIVI,2)
-// tag : V6_vlsrwv
-def int_hexagon_V6_vlsrwv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrwv">;
+def int_hexagon_S2_lsr_r_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrwv_128B,VI_ftype_VIVI,2)
-// tag : V6_vlsrwv_128B
-def int_hexagon_V6_vlsrwv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrwv_128B">;
+def int_hexagon_S2_lsr_r_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrh,VI_ftype_VISI,2)
-// tag : V6_vasrh
-def int_hexagon_V6_vasrh :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrh">;
+def int_hexagon_A2_subh_l16_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrh_128B,VI_ftype_VISI,2)
-// tag : V6_vasrh_128B
-def int_hexagon_V6_vasrh_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_128B">;
+def int_hexagon_S4_vxsubaddhr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddhr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslh,VI_ftype_VISI,2)
-// tag : V6_vaslh
-def int_hexagon_V6_vaslh :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslh">;
+def int_hexagon_S2_clbp :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_clbp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslh_128B,VI_ftype_VISI,2)
-// tag : V6_vaslh_128B
-def int_hexagon_V6_vaslh_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_128B">;
+def int_hexagon_S2_deinterleave :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_deinterleave">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrh,VI_ftype_VISI,2)
-// tag : V6_vlsrh
-def int_hexagon_V6_vlsrh :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrh">;
+def int_hexagon_C2_any8 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_any8">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrh_128B,VI_ftype_VISI,2)
-// tag : V6_vlsrh_128B
-def int_hexagon_V6_vlsrh_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrh_128B">;
+def int_hexagon_S2_togglebit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_r">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhv,VI_ftype_VIVI,2)
-// tag : V6_vasrhv
-def int_hexagon_V6_vasrhv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrhv">;
+def int_hexagon_S2_togglebit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_i">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhv_128B,VI_ftype_VIVI,2)
-// tag : V6_vasrhv_128B
-def int_hexagon_V6_vasrhv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrhv_128B">;
+def int_hexagon_F2_conv_uw2sf :
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_conv_uw2sf">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslhv,VI_ftype_VIVI,2)
-// tag : V6_vaslhv
-def int_hexagon_V6_vaslhv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslhv">;
+def int_hexagon_S2_vsathb_nopack :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsathb_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslhv_128B,VI_ftype_VIVI,2)
-// tag : V6_vaslhv_128B
-def int_hexagon_V6_vaslhv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslhv_128B">;
+def int_hexagon_M2_cmacs_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrhv,VI_ftype_VIVI,2)
-// tag : V6_vlsrhv
-def int_hexagon_V6_vlsrhv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrhv">;
+def int_hexagon_M2_cmacs_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrhv_128B,VI_ftype_VIVI,2)
-// tag : V6_vlsrhv_128B
-def int_hexagon_V6_vlsrhv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrhv_128B">;
+def int_hexagon_M2_mpy_sat_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwh,VI_ftype_VIVISI,3)
-// tag : V6_vasrwh
-def int_hexagon_V6_vasrwh :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwh">;
+def int_hexagon_M2_mpy_sat_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwh_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwh_128B
-def int_hexagon_V6_vasrwh_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwh_128B">;
+def int_hexagon_M2_mmacuhs_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacuhs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwhsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrwhsat
-def int_hexagon_V6_vasrwhsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhsat">;
+def int_hexagon_M2_mmacuhs_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacuhs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwhsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwhsat_128B
-def int_hexagon_V6_vasrwhsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhsat_128B">;
+def int_hexagon_S2_clrbit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrwhrndsat
-def int_hexagon_V6_vasrwhrndsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhrndsat">;
+def int_hexagon_C4_or_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_or_andn">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwhrndsat_128B
-def int_hexagon_V6_vasrwhrndsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhrndsat_128B">;
+def int_hexagon_S2_asl_r_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrwuhsat
-def int_hexagon_V6_vasrwuhsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwuhsat">;
+def int_hexagon_S2_asl_i_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwuhsat_128B
-def int_hexagon_V6_vasrwuhsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwuhsat_128B">;
+def int_hexagon_A4_vcmpwgtui :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgtui">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundwh,VI_ftype_VIVI,2)
-// tag : V6_vroundwh
-def int_hexagon_V6_vroundwh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwh">;
+def int_hexagon_M4_vrmpyoh_acc_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundwh_128B,VI_ftype_VIVI,2)
-// tag : V6_vroundwh_128B
-def int_hexagon_V6_vroundwh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwh_128B">;
+def int_hexagon_M4_vrmpyoh_acc_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundwuh,VI_ftype_VIVI,2)
-// tag : V6_vroundwuh
-def int_hexagon_V6_vroundwuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwuh">;
+def int_hexagon_A4_vrmaxh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundwuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vroundwuh_128B
-def int_hexagon_V6_vroundwuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwuh_128B">;
+def int_hexagon_A2_vcmpbeq :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpbeq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhubsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrhubsat
-def int_hexagon_V6_vasrhubsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubsat">;
+def int_hexagon_A2_vcmphgt :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmphgt">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhubsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrhubsat_128B
-def int_hexagon_V6_vasrhubsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubsat_128B">;
+def int_hexagon_A2_vnavgwcr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgwcr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrhubrndsat
-def int_hexagon_V6_vasrhubrndsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubrndsat">;
+def int_hexagon_M2_vrcmacr_s0c :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrcmacr_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrhubrndsat_128B
-def int_hexagon_V6_vasrhubrndsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubrndsat_128B">;
+def int_hexagon_A2_vavgwcr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgwcr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrhbrndsat
-def int_hexagon_V6_vasrhbrndsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhbrndsat">;
+def int_hexagon_S2_asl_i_p_xacc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrhbrndsat_128B
-def int_hexagon_V6_vasrhbrndsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhbrndsat_128B">;
+def int_hexagon_A4_vrmaxw :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundhb,VI_ftype_VIVI,2)
-// tag : V6_vroundhb
-def int_hexagon_V6_vroundhb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhb">;
+def int_hexagon_A2_vnavghr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavghr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundhb_128B,VI_ftype_VIVI,2)
-// tag : V6_vroundhb_128B
-def int_hexagon_V6_vroundhb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhb_128B">;
+def int_hexagon_M4_cmpyi_wh :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyi_wh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundhub,VI_ftype_VIVI,2)
-// tag : V6_vroundhub
-def int_hexagon_V6_vroundhub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhub">;
+def int_hexagon_A2_tfrsi :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrsi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundhub_128B,VI_ftype_VIVI,2)
-// tag : V6_vroundhub_128B
-def int_hexagon_V6_vroundhub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhub_128B">;
+def int_hexagon_S2_asr_i_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslw_acc,VI_ftype_VIVISI,3)
-// tag : V6_vaslw_acc
-def int_hexagon_V6_vaslw_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslw_acc">;
+def int_hexagon_A2_svnavgh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svnavgh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslw_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vaslw_acc_128B
-def int_hexagon_V6_vaslw_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_acc_128B">;
+def int_hexagon_S2_lsr_i_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrw_acc,VI_ftype_VIVISI,3)
-// tag : V6_vasrw_acc
-def int_hexagon_V6_vasrw_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrw_acc">;
+def int_hexagon_M2_vmac2 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrw_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrw_acc_128B
-def int_hexagon_V6_vasrw_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_acc_128B">;
+def int_hexagon_A4_vcmphgtui :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgtui">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddb,VI_ftype_VIVI,2)
-// tag : V6_vaddb
-def int_hexagon_V6_vaddb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddb">;
+def int_hexagon_A2_svavgh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svavgh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddb_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddb_128B
-def int_hexagon_V6_vaddb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_128B">;
+def int_hexagon_M4_vrmpyeh_acc_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubb,VI_ftype_VIVI,2)
-// tag : V6_vsubb
-def int_hexagon_V6_vsubb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubb">;
+def int_hexagon_M4_vrmpyeh_acc_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubb_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubb_128B
-def int_hexagon_V6_vsubb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_128B">;
+def int_hexagon_S2_lsr_i_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddb_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddb_dv
-def int_hexagon_V6_vaddb_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_dv">;
+def int_hexagon_A2_combine_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_hl">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddb_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddb_dv_128B
-def int_hexagon_V6_vaddb_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">;
+def int_hexagon_M2_mpy_up :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_up">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubb_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubb_dv
-def int_hexagon_V6_vsubb_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_dv">;
+def int_hexagon_A2_combine_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_hh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubb_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubb_dv_128B
-def int_hexagon_V6_vsubb_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">;
+def int_hexagon_A2_negsat :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_negsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddh,VI_ftype_VIVI,2)
-// tag : V6_vaddh
-def int_hexagon_V6_vaddh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddh">;
+def int_hexagon_M2_mpyd_hl_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddh_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddh_128B
-def int_hexagon_V6_vaddh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_128B">;
+def int_hexagon_M2_mpyd_hl_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubh,VI_ftype_VIVI,2)
-// tag : V6_vsubh
-def int_hexagon_V6_vsubh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubh">;
+def int_hexagon_A4_bitsplit :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitsplit">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubh_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubh_128B
-def int_hexagon_V6_vsubh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_128B">;
+def int_hexagon_A2_vabshsat :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabshsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddh_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddh_dv
-def int_hexagon_V6_vaddh_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_dv">;
+def int_hexagon_M2_mpyui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyui">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddh_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddh_dv_128B
-def int_hexagon_V6_vaddh_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">;
+def int_hexagon_A2_addh_l16_sat_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubh_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubh_dv
-def int_hexagon_V6_vsubh_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_dv">;
+def int_hexagon_S2_lsl_r_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubh_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubh_dv_128B
-def int_hexagon_V6_vsubh_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">;
+def int_hexagon_M2_mmpyul_rs0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddw,VI_ftype_VIVI,2)
-// tag : V6_vaddw
-def int_hexagon_V6_vaddw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddw">;
+def int_hexagon_S2_asr_i_r_rnd_goodsyntax :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddw_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddw_128B
-def int_hexagon_V6_vaddw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_128B">;
+def int_hexagon_S2_lsr_r_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubw,VI_ftype_VIVI,2)
-// tag : V6_vsubw
-def int_hexagon_V6_vsubw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubw">;
+def int_hexagon_C2_cmplt :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmplt">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubw_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubw_128B
-def int_hexagon_V6_vsubw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_128B">;
+def int_hexagon_M2_cmacr_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddw_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddw_dv
-def int_hexagon_V6_vaddw_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_dv">;
+def int_hexagon_M4_or_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddw_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddw_dv_128B
-def int_hexagon_V6_vaddw_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">;
+def int_hexagon_M4_mpyrr_addi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubw_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubw_dv
-def int_hexagon_V6_vsubw_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_dv">;
+def int_hexagon_S4_or_andi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubw_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubw_dv_128B
-def int_hexagon_V6_vsubw_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">;
+def int_hexagon_M2_mpy_sat_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubsat,VI_ftype_VIVI,2)
-// tag : V6_vaddubsat
-def int_hexagon_V6_vaddubsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddubsat">;
+def int_hexagon_M2_mpy_sat_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddubsat_128B
-def int_hexagon_V6_vaddubsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_128B">;
+def int_hexagon_M4_mpyrr_addr :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddubsat_dv
-def int_hexagon_V6_vaddubsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_dv">;
+def int_hexagon_M2_mmachs_rs0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddubsat_dv_128B
-def int_hexagon_V6_vaddubsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddubsat_dv_128B">;
+def int_hexagon_M2_mmachs_rs1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububsat,VI_ftype_VIVI,2)
-// tag : V6_vsububsat
-def int_hexagon_V6_vsububsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsububsat">;
+def int_hexagon_M2_vrcmpyr_s0c :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyr_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsububsat_128B
-def int_hexagon_V6_vsububsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_128B">;
+def int_hexagon_M2_mpy_acc_sat_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsububsat_dv
-def int_hexagon_V6_vsububsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_dv">;
+def int_hexagon_M2_mpyd_acc_ll_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsububsat_dv_128B
-def int_hexagon_V6_vsububsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsububsat_dv_128B">;
+def int_hexagon_F2_sffixupn :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sffixupn">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhsat,VI_ftype_VIVI,2)
-// tag : V6_vadduhsat
-def int_hexagon_V6_vadduhsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vadduhsat">;
+def int_hexagon_M2_mpyd_acc_lh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vadduhsat_128B
-def int_hexagon_V6_vadduhsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_128B">;
+def int_hexagon_M2_mpyd_acc_lh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vadduhsat_dv
-def int_hexagon_V6_vadduhsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_dv">;
+def int_hexagon_M2_mpy_rnd_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vadduhsat_dv_128B
-def int_hexagon_V6_vadduhsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vadduhsat_dv_128B">;
+def int_hexagon_M2_mpy_rnd_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhsat,VI_ftype_VIVI,2)
-// tag : V6_vsubuhsat
-def int_hexagon_V6_vsubuhsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubuhsat">;
+def int_hexagon_A2_vadduhs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vadduhs">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubuhsat_128B
-def int_hexagon_V6_vsubuhsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_128B">;
+def int_hexagon_A2_vsubuhs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubuhs">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubuhsat_dv
-def int_hexagon_V6_vsubuhsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_dv">;
+def int_hexagon_A2_subh_h16_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubuhsat_dv_128B
-def int_hexagon_V6_vsubuhsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubuhsat_dv_128B">;
+def int_hexagon_A2_subh_h16_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_hh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhsat,VI_ftype_VIVI,2)
-// tag : V6_vaddhsat
-def int_hexagon_V6_vaddhsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddhsat">;
+def int_hexagon_A2_xorp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_xorp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddhsat_128B
-def int_hexagon_V6_vaddhsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_128B">;
+def int_hexagon_A4_tfrpcp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A4_tfrpcp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddhsat_dv
-def int_hexagon_V6_vaddhsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_dv">;
+def int_hexagon_A2_addh_h16_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_lh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddhsat_dv_128B
-def int_hexagon_V6_vaddhsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddhsat_dv_128B">;
+def int_hexagon_A2_addh_h16_sat_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhsat,VI_ftype_VIVI,2)
-// tag : V6_vsubhsat
-def int_hexagon_V6_vsubhsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubhsat">;
+def int_hexagon_A2_addh_h16_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubhsat_128B
-def int_hexagon_V6_vsubhsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_128B">;
+def int_hexagon_A2_addh_h16_sat_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_sat_hh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubhsat_dv
-def int_hexagon_V6_vsubhsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_dv">;
+def int_hexagon_A2_zxtb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxtb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubhsat_dv_128B
-def int_hexagon_V6_vsubhsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubhsat_dv_128B">;
+def int_hexagon_A2_zxth :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxth">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwsat,VI_ftype_VIVI,2)
-// tag : V6_vaddwsat
-def int_hexagon_V6_vaddwsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddwsat">;
+def int_hexagon_A2_vnavgwr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgwr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddwsat_128B
-def int_hexagon_V6_vaddwsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_128B">;
+def int_hexagon_M4_or_xor :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddwsat_dv
-def int_hexagon_V6_vaddwsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_dv">;
+def int_hexagon_M2_mpyud_acc_hh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddwsat_dv_128B
-def int_hexagon_V6_vaddwsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddwsat_dv_128B">;
+def int_hexagon_M2_mpyud_acc_hh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwsat,VI_ftype_VIVI,2)
-// tag : V6_vsubwsat
-def int_hexagon_V6_vsubwsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubwsat">;
+def int_hexagon_M5_vmacbsu :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M5_vmacbsu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubwsat_128B
-def int_hexagon_V6_vsubwsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_128B">;
+def int_hexagon_M2_dpmpyuu_acc_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubwsat_dv
-def int_hexagon_V6_vsubwsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_dv">;
+def int_hexagon_M2_mpy_rnd_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubwsat_dv_128B
-def int_hexagon_V6_vsubwsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">;
+def int_hexagon_M2_mpy_rnd_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgub,VI_ftype_VIVI,2)
-// tag : V6_vavgub
-def int_hexagon_V6_vavgub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgub">;
+def int_hexagon_F2_sffms_lib :
+Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffms_lib">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgub_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgub_128B
-def int_hexagon_V6_vavgub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgub_128B">;
+def int_hexagon_C4_cmpneqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneqi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgubrnd,VI_ftype_VIVI,2)
-// tag : V6_vavgubrnd
-def int_hexagon_V6_vavgubrnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgubrnd">;
+def int_hexagon_M4_and_xor :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgubrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgubrnd_128B
-def int_hexagon_V6_vavgubrnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgubrnd_128B">;
+def int_hexagon_A2_sat :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_A2_sat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguh,VI_ftype_VIVI,2)
-// tag : V6_vavguh
-def int_hexagon_V6_vavguh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguh">;
+def int_hexagon_M2_mpyd_nac_lh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguh_128B,VI_ftype_VIVI,2)
-// tag : V6_vavguh_128B
-def int_hexagon_V6_vavguh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguh_128B">;
+def int_hexagon_M2_mpyd_nac_lh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguhrnd,VI_ftype_VIVI,2)
-// tag : V6_vavguhrnd
-def int_hexagon_V6_vavguhrnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguhrnd">;
+def int_hexagon_A2_addsat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguhrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavguhrnd_128B
-def int_hexagon_V6_vavguhrnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguhrnd_128B">;
+def int_hexagon_A2_svavghs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svavghs">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgh,VI_ftype_VIVI,2)
-// tag : V6_vavgh
-def int_hexagon_V6_vavgh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgh">;
+def int_hexagon_A2_vrsadub_acc :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_A2_vrsadub_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgh_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgh_128B
-def int_hexagon_V6_vavgh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgh_128B">;
+def int_hexagon_C2_bitsclri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclri">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavghrnd,VI_ftype_VIVI,2)
-// tag : V6_vavghrnd
-def int_hexagon_V6_vavghrnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavghrnd">;
+def int_hexagon_A2_subh_h16_sat_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_hh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavghrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavghrnd_128B
-def int_hexagon_V6_vavghrnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavghrnd_128B">;
+def int_hexagon_A2_subh_h16_sat_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgh,VI_ftype_VIVI,2)
-// tag : V6_vnavgh
-def int_hexagon_V6_vnavgh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgh">;
+def int_hexagon_M2_mmaculs_rs0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmaculs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgh_128B,VI_ftype_VIVI,2)
-// tag : V6_vnavgh_128B
-def int_hexagon_V6_vnavgh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgh_128B">;
+def int_hexagon_M2_mmaculs_rs1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmaculs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgw,VI_ftype_VIVI,2)
-// tag : V6_vavgw
-def int_hexagon_V6_vavgw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgw">;
+def int_hexagon_M2_vradduh :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vradduh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgw_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgw_128B
-def int_hexagon_V6_vavgw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgw_128B">;
+def int_hexagon_A4_addp_c :
+Hexagon_i64i32_i64i64i32_Intrinsic<"HEXAGON_A4_addp_c">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgwrnd,VI_ftype_VIVI,2)
-// tag : V6_vavgwrnd
-def int_hexagon_V6_vavgwrnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgwrnd">;
+def int_hexagon_C2_xor :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgwrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgwrnd_128B
-def int_hexagon_V6_vavgwrnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgwrnd_128B">;
+def int_hexagon_S2_lsl_r_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgw,VI_ftype_VIVI,2)
-// tag : V6_vnavgw
-def int_hexagon_V6_vnavgw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgw">;
+def int_hexagon_M2_mmpyh_rs1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgw_128B,VI_ftype_VIVI,2)
-// tag : V6_vnavgw_128B
-def int_hexagon_V6_vnavgw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgw_128B">;
+def int_hexagon_M2_mmpyh_rs0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyh_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffub,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffub
-def int_hexagon_V6_vabsdiffub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffub">;
+def int_hexagon_F2_conv_df2ud_chop :
+Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2ud_chop">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffub_128B,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffub_128B
-def int_hexagon_V6_vabsdiffub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffub_128B">;
+def int_hexagon_C4_or_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_or_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffuh
-def int_hexagon_V6_vabsdiffuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffuh">;
+def int_hexagon_S4_vxaddsubhr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxaddsubhr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffuh_128B
-def int_hexagon_V6_vabsdiffuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffuh_128B">;
+def int_hexagon_S2_vsathub :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vsathub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffh,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffh
-def int_hexagon_V6_vabsdiffh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffh">;
+def int_hexagon_F2_conv_df2sf :
+Hexagon_float_double_Intrinsic<"HEXAGON_F2_conv_df2sf">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffh_128B,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffh_128B
-def int_hexagon_V6_vabsdiffh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffh_128B">;
+def int_hexagon_M2_hmmpyh_rs1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffw,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffw
-def int_hexagon_V6_vabsdiffw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffw">;
+def int_hexagon_M2_hmmpyh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffw_128B,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffw_128B
-def int_hexagon_V6_vabsdiffw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffw_128B">;
+def int_hexagon_A2_vavgwr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgwr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgub,VI_ftype_VIVI,2)
-// tag : V6_vnavgub
-def int_hexagon_V6_vnavgub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgub">;
+def int_hexagon_S2_tableidxh_goodsyntax :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxh_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgub_128B,VI_ftype_VIVI,2)
-// tag : V6_vnavgub_128B
-def int_hexagon_V6_vnavgub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgub_128B">;
+def int_hexagon_A2_sxth :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sxth">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubh,VD_ftype_VIVI,2)
-// tag : V6_vaddubh
-def int_hexagon_V6_vaddubh :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddubh">;
+def int_hexagon_A2_sxtb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sxtb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubh_128B,VD_ftype_VIVI,2)
-// tag : V6_vaddubh_128B
-def int_hexagon_V6_vaddubh_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddubh_128B">;
+def int_hexagon_C4_or_orn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_or_orn">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububh,VD_ftype_VIVI,2)
-// tag : V6_vsububh
-def int_hexagon_V6_vsububh :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsububh">;
+def int_hexagon_M2_vrcmaci_s0c :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrcmaci_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububh_128B,VD_ftype_VIVI,2)
-// tag : V6_vsububh_128B
-def int_hexagon_V6_vsububh_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsububh_128B">;
+def int_hexagon_A2_sxtw :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_sxtw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhw,VD_ftype_VIVI,2)
-// tag : V6_vaddhw
-def int_hexagon_V6_vaddhw :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddhw">;
+def int_hexagon_M2_vabsdiffh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vabsdiffh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhw_128B,VD_ftype_VIVI,2)
-// tag : V6_vaddhw_128B
-def int_hexagon_V6_vaddhw_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddhw_128B">;
+def int_hexagon_M2_mpy_acc_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhw,VD_ftype_VIVI,2)
-// tag : V6_vsubhw
-def int_hexagon_V6_vsubhw :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubhw">;
+def int_hexagon_M2_mpy_acc_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhw_128B,VD_ftype_VIVI,2)
-// tag : V6_vsubhw_128B
-def int_hexagon_V6_vsubhw_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubhw_128B">;
+def int_hexagon_M2_hmmpyl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhw,VD_ftype_VIVI,2)
-// tag : V6_vadduhw
-def int_hexagon_V6_vadduhw :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vadduhw">;
+def int_hexagon_S2_cl1p :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_cl1p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhw_128B,VD_ftype_VIVI,2)
-// tag : V6_vadduhw_128B
-def int_hexagon_V6_vadduhw_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vadduhw_128B">;
+def int_hexagon_M2_vabsdiffw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vabsdiffw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhw,VD_ftype_VIVI,2)
-// tag : V6_vsubuhw
-def int_hexagon_V6_vsubuhw :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubuhw">;
+def int_hexagon_A4_andnp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A4_andnp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhw_128B,VD_ftype_VIVI,2)
-// tag : V6_vsubuhw_128B
-def int_hexagon_V6_vsubuhw_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhw_128B">;
+def int_hexagon_C2_vmux :
+Hexagon_i64_i32i64i64_Intrinsic<"HEXAGON_C2_vmux">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vd0,VI_ftype_,0)
-// tag : V6_vd0
-def int_hexagon_V6_vd0 :
-Hexagon_v512_Intrinsic<"HEXAGON_V6_vd0">;
+def int_hexagon_S2_parityp :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_S2_parityp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vd0_128B,VI_ftype_,0)
-// tag : V6_vd0_128B
-def int_hexagon_V6_vd0_128B :
-Hexagon_v1024_Intrinsic<"HEXAGON_V6_vd0_128B">;
+def int_hexagon_S2_lsr_i_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddbq
-def int_hexagon_V6_vaddbq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbq">;
+def int_hexagon_S2_asr_i_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddbq_128B
-def int_hexagon_V6_vaddbq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbq_128B">;
+def int_hexagon_M2_mpyu_nac_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s0">;
+def int_hexagon_M2_mpyu_nac_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubbq
-def int_hexagon_V6_vsubbq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbq">;
+def int_hexagon_F2_sfcmpeq :
+Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubbq_128B
-def int_hexagon_V6_vsubbq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbq_128B">;
+def int_hexagon_A2_vaddb_map :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddb_map">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbnq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddbnq
-def int_hexagon_V6_vaddbnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbnq">;
+def int_hexagon_S2_lsr_r_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddbnq_128B
-def int_hexagon_V6_vaddbnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
+def int_hexagon_A2_vcmpheq :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpheq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbnq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubbnq
-def int_hexagon_V6_vsubbnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbnq">;
+def int_hexagon_S2_clbnorm :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_clbnorm">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubbnq_128B
-def int_hexagon_V6_vsubbnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbnq_128B">;
+def int_hexagon_M2_cnacsc_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacsc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddhq
-def int_hexagon_V6_vaddhq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhq">;
+def int_hexagon_M2_cnacsc_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacsc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddhq_128B
-def int_hexagon_V6_vaddhq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhq_128B">;
+def int_hexagon_S4_subaddi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subaddi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubhq
-def int_hexagon_V6_vsubhq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhq">;
+def int_hexagon_M2_mpyud_nac_hl_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubhq_128B
-def int_hexagon_V6_vsubhq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhq_128B">;
+def int_hexagon_M2_mpyud_nac_hl_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhnq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddhnq
-def int_hexagon_V6_vaddhnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhnq">;
+def int_hexagon_S5_vasrhrnd_goodsyntax :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddhnq_128B
-def int_hexagon_V6_vaddhnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhnq_128B">;
+def int_hexagon_S2_tstbit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhnq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubhnq
-def int_hexagon_V6_vsubhnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhnq">;
+def int_hexagon_S4_vrcrotate :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubhnq_128B
-def int_hexagon_V6_vsubhnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhnq_128B">;
+def int_hexagon_M2_mmachs_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddwq
-def int_hexagon_V6_vaddwq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwq">;
+def int_hexagon_M2_mmachs_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddwq_128B
-def int_hexagon_V6_vaddwq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwq_128B">;
+def int_hexagon_S2_tstbit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubwq
-def int_hexagon_V6_vsubwq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwq">;
+def int_hexagon_M2_mpy_up_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_up_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubwq_128B
-def int_hexagon_V6_vsubwq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwq_128B">;
+def int_hexagon_S2_extractu_rp :
+Hexagon_i32_i32i64_Intrinsic<"HEXAGON_S2_extractu_rp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwnq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddwnq
-def int_hexagon_V6_vaddwnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwnq">;
+def int_hexagon_M2_mmpyuh_rs0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddwnq_128B
-def int_hexagon_V6_vaddwnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwnq_128B">;
+def int_hexagon_S2_lsr_i_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwnq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubwnq
-def int_hexagon_V6_vsubwnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwnq">;
+def int_hexagon_M2_mpy_rnd_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubwnq_128B
-def int_hexagon_V6_vsubwnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwnq_128B">;
+def int_hexagon_M2_mpy_rnd_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsh,VI_ftype_VI,1)
-// tag : V6_vabsh
-def int_hexagon_V6_vabsh :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh">;
+def int_hexagon_M4_or_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsh_128B,VI_ftype_VI,1)
-// tag : V6_vabsh_128B
-def int_hexagon_V6_vabsh_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_128B">;
+def int_hexagon_M2_mpyu_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsh_sat,VI_ftype_VI,1)
-// tag : V6_vabsh_sat
-def int_hexagon_V6_vabsh_sat :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh_sat">;
+def int_hexagon_M2_mpyu_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsh_sat_128B,VI_ftype_VI,1)
-// tag : V6_vabsh_sat_128B
-def int_hexagon_V6_vabsh_sat_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_sat_128B">;
+def int_hexagon_S2_asl_r_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsw,VI_ftype_VI,1)
-// tag : V6_vabsw
-def int_hexagon_V6_vabsw :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw">;
+def int_hexagon_M2_mpyu_nac_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsw_128B,VI_ftype_VI,1)
-// tag : V6_vabsw_128B
-def int_hexagon_V6_vabsw_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_128B">;
+def int_hexagon_M2_mpyu_nac_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsw_sat,VI_ftype_VI,1)
-// tag : V6_vabsw_sat
-def int_hexagon_V6_vabsw_sat :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw_sat">;
+def int_hexagon_M2_mpy_sat_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsw_sat_128B,VI_ftype_VI,1)
-// tag : V6_vabsw_sat_128B
-def int_hexagon_V6_vabsw_sat_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_sat_128B">;
+def int_hexagon_M2_mpy_sat_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybv,VD_ftype_VIVI,2)
-// tag : V6_vmpybv
-def int_hexagon_V6_vmpybv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv">;
+def int_hexagon_F2_conv_w2df :
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_conv_w2df">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpybv_128B
-def int_hexagon_V6_vmpybv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_128B">;
+def int_hexagon_A2_subh_l16_sat_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpybv_acc
-def int_hexagon_V6_vmpybv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv_acc">;
+def int_hexagon_C2_cmpeqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeqi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpybv_acc_128B
-def int_hexagon_V6_vmpybv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">;
+def int_hexagon_S2_asl_i_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyubv,VD_ftype_VIVI,2)
-// tag : V6_vmpyubv
-def int_hexagon_V6_vmpyubv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv">;
+def int_hexagon_S2_vcnegh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_vcnegh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyubv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyubv_128B
-def int_hexagon_V6_vmpyubv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_128B">;
+def int_hexagon_A4_vcmpweqi :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpweqi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyubv_acc
-def int_hexagon_V6_vmpyubv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv_acc">;
+def int_hexagon_M2_vdmpyrs_s0 :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vdmpyrs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyubv_acc_128B
-def int_hexagon_V6_vmpyubv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_acc_128B">;
+def int_hexagon_M2_vdmpyrs_s1 :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vdmpyrs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybusv,VD_ftype_VIVI,2)
-// tag : V6_vmpybusv
-def int_hexagon_V6_vmpybusv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv">;
+def int_hexagon_M4_xor_xacc :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_xor_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybusv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpybusv_128B
-def int_hexagon_V6_vmpybusv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_128B">;
+def int_hexagon_M2_vdmpys_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vdmpys_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpybusv_acc
-def int_hexagon_V6_vmpybusv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv_acc">;
+def int_hexagon_M2_vdmpys_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vdmpys_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpybusv_acc_128B
-def int_hexagon_V6_vmpybusv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_acc_128B">;
+def int_hexagon_A2_vavgubr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgubr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabusv,VD_ftype_VDVD,2)
-// tag : V6_vmpabusv
-def int_hexagon_V6_vmpabusv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabusv">;
+def int_hexagon_M2_mpyu_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabusv_128B,VD_ftype_VDVD,2)
-// tag : V6_vmpabusv_128B
-def int_hexagon_V6_vmpabusv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabusv_128B">;
+def int_hexagon_M2_mpyu_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuuv,VD_ftype_VDVD,2)
-// tag : V6_vmpabuuv
-def int_hexagon_V6_vmpabuuv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabuuv">;
+def int_hexagon_S2_asl_r_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuuv_128B,VD_ftype_VDVD,2)
-// tag : V6_vmpabuuv_128B
-def int_hexagon_V6_vmpabuuv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabuuv_128B">;
+def int_hexagon_S2_cl0p :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_cl0p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhv,VD_ftype_VIVI,2)
-// tag : V6_vmpyhv
-def int_hexagon_V6_vmpyhv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv">;
+def int_hexagon_S2_valignib :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignib">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyhv_128B
-def int_hexagon_V6_vmpyhv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_128B">;
+def int_hexagon_F2_sffixupd :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sffixupd">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyhv_acc
-def int_hexagon_V6_vmpyhv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv_acc">;
+def int_hexagon_M2_mpy_sat_rnd_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyhv_acc_128B
-def int_hexagon_V6_vmpyhv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_acc_128B">;
+def int_hexagon_M2_mpy_sat_rnd_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhv,VD_ftype_VIVI,2)
-// tag : V6_vmpyuhv
-def int_hexagon_V6_vmpyuhv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv">;
+def int_hexagon_M2_cmacsc_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacsc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyuhv_128B
-def int_hexagon_V6_vmpyuhv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_128B">;
+def int_hexagon_M2_cmacsc_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacsc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyuhv_acc
-def int_hexagon_V6_vmpyuhv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv_acc">;
+def int_hexagon_S2_ct1 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_ct1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyuhv_acc_128B
-def int_hexagon_V6_vmpyuhv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">;
+def int_hexagon_S2_ct0 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_ct0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs,VI_ftype_VIVI,2)
-// tag : V6_vmpyhvsrs
-def int_hexagon_V6_vmpyhvsrs :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyhvsrs">;
+def int_hexagon_M2_dpmpyuu_nac_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_nac_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyhvsrs_128B
-def int_hexagon_V6_vmpyhvsrs_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhvsrs_128B">;
+def int_hexagon_M2_mmpyul_rs1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhus,VD_ftype_VIVI,2)
-// tag : V6_vmpyhus
-def int_hexagon_V6_vmpyhus :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus">;
+def int_hexagon_S4_ntstbit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhus_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyhus_128B
-def int_hexagon_V6_vmpyhus_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_128B">;
+def int_hexagon_F2_sffixupr :
+Hexagon_float_float_Intrinsic<"HEXAGON_F2_sffixupr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyhus_acc
-def int_hexagon_V6_vmpyhus_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus_acc">;
+def int_hexagon_S2_asr_r_p_xor :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyhus_acc_128B
-def int_hexagon_V6_vmpyhus_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_acc_128B">;
+def int_hexagon_M2_mpyud_acc_hl_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyih,VI_ftype_VIVI,2)
-// tag : V6_vmpyih
-def int_hexagon_V6_vmpyih :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih">;
+def int_hexagon_M2_mpyud_acc_hl_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyih_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyih_128B
-def int_hexagon_V6_vmpyih_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_128B">;
+def int_hexagon_A2_vcmphgtu :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmphgtu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyih_acc
-def int_hexagon_V6_vmpyih_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih_acc">;
+def int_hexagon_C2_andn :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_andn">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyih_acc_128B
-def int_hexagon_V6_vmpyih_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_acc_128B">;
+def int_hexagon_M2_vmpy2s_s0pack :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s0pack">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyewuh,VI_ftype_VIVI,2)
-// tag : V6_vmpyewuh
-def int_hexagon_V6_vmpyewuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyewuh">;
+def int_hexagon_S4_addaddi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addaddi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyewuh_128B
-def int_hexagon_V6_vmpyewuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyewuh_128B">;
+def int_hexagon_M2_mpyd_acc_ll_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh,VI_ftype_VIVI,2)
-// tag : V6_vmpyowh
-def int_hexagon_V6_vmpyowh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh">;
+def int_hexagon_M2_mpy_acc_sat_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyowh_128B
-def int_hexagon_V6_vmpyowh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_128B">;
+def int_hexagon_A4_rcmpeqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeqi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd,VI_ftype_VIVI,2)
-// tag : V6_vmpyowh_rnd
-def int_hexagon_V6_vmpyowh_rnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd">;
+def int_hexagon_M4_xor_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyowh_rnd_128B
-def int_hexagon_V6_vmpyowh_rnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_128B">;
+def int_hexagon_S2_asl_i_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyowh_sacc
-def int_hexagon_V6_vmpyowh_sacc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_sacc">;
+def int_hexagon_M2_mmpyuh_rs1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyowh_sacc_128B
-def int_hexagon_V6_vmpyowh_sacc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_sacc_128B">;
+def int_hexagon_S2_asr_r_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyowh_rnd_sacc
-def int_hexagon_V6_vmpyowh_rnd_sacc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc">;
+def int_hexagon_A4_round_ri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyowh_rnd_sacc_128B
-def int_hexagon_V6_vmpyowh_rnd_sacc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc_128B">;
+def int_hexagon_A2_max :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_max">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyieoh,VI_ftype_VIVI,2)
-// tag : V6_vmpyieoh
-def int_hexagon_V6_vmpyieoh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyieoh">;
+def int_hexagon_A4_round_rr :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_rr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyieoh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyieoh_128B
-def int_hexagon_V6_vmpyieoh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyieoh_128B">;
+def int_hexagon_A4_combineii :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineii">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh,VI_ftype_VIVI,2)
-// tag : V6_vmpyiewuh
-def int_hexagon_V6_vmpyiewuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh">;
+def int_hexagon_A4_combineir :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineir">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyiewuh_128B
-def int_hexagon_V6_vmpyiewuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_128B">;
+def int_hexagon_C4_and_orn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_orn">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiowh,VI_ftype_VIVI,2)
-// tag : V6_vmpyiowh
-def int_hexagon_V6_vmpyiowh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiowh">;
+def int_hexagon_M5_vmacbuu :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M5_vmacbuu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiowh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyiowh_128B
-def int_hexagon_V6_vmpyiowh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiowh_128B">;
+def int_hexagon_A4_rcmpeq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyiewh_acc
-def int_hexagon_V6_vmpyiewh_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewh_acc">;
+def int_hexagon_M4_cmpyr_whc :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyr_whc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyiewh_acc_128B
-def int_hexagon_V6_vmpyiewh_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewh_acc_128B">;
+def int_hexagon_S2_lsr_i_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyiewuh_acc
-def int_hexagon_V6_vmpyiewuh_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc">;
+def int_hexagon_S2_vzxtbh :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vzxtbh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyiewuh_acc_128B
-def int_hexagon_V6_vmpyiewuh_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc_128B">;
+def int_hexagon_M2_mmacuhs_rs1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacuhs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyub,VD_ftype_VISI,2)
-// tag : V6_vmpyub
-def int_hexagon_V6_vmpyub :
-Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub">;
+def int_hexagon_S2_asr_r_r_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_sat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyub_128B,VD_ftype_VISI,2)
-// tag : V6_vmpyub_128B
-def int_hexagon_V6_vmpyub_128B :
-Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_128B">;
+def int_hexagon_A2_combinew :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combinew">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpyub_acc
-def int_hexagon_V6_vmpyub_acc :
-Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub_acc">;
+def int_hexagon_M2_mpy_acc_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpyub_acc_128B
-def int_hexagon_V6_vmpyub_acc_128B :
-Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_acc_128B">;
+def int_hexagon_M2_mpy_acc_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybus,VD_ftype_VISI,2)
-// tag : V6_vmpybus
-def int_hexagon_V6_vmpybus :
-Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus">;
+def int_hexagon_M2_cmpyi_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpyi_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybus_128B,VD_ftype_VISI,2)
-// tag : V6_vmpybus_128B
-def int_hexagon_V6_vmpybus_128B :
-Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_128B">;
+def int_hexagon_S2_asl_r_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpybus_acc
-def int_hexagon_V6_vmpybus_acc :
-Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus_acc">;
+def int_hexagon_S4_ori_asl_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpybus_acc_128B
-def int_hexagon_V6_vmpybus_acc_128B :
-Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_acc_128B">;
+def int_hexagon_C4_nbitsset :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsset">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabus,VD_ftype_VDSI,2)
-// tag : V6_vmpabus
-def int_hexagon_V6_vmpabus :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus">;
+def int_hexagon_M2_mpyu_acc_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabus_128B,VD_ftype_VDSI,2)
-// tag : V6_vmpabus_128B
-def int_hexagon_V6_vmpabus_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_128B">;
+def int_hexagon_M2_mpyu_acc_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vmpabus_acc
-def int_hexagon_V6_vmpabus_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus_acc">;
+def int_hexagon_M2_mpyu_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vmpabus_acc_128B
-def int_hexagon_V6_vmpabus_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_acc_128B">;
+def int_hexagon_M2_mpyu_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahb,VD_ftype_VDSI,2)
-// tag : V6_vmpahb
-def int_hexagon_V6_vmpahb :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb">;
+def int_hexagon_A2_addh_l16_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahb_128B,VD_ftype_VDSI,2)
-// tag : V6_vmpahb_128B
-def int_hexagon_V6_vmpahb_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_128B">;
+def int_hexagon_S2_lsr_r_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vmpahb_acc
-def int_hexagon_V6_vmpahb_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb_acc">;
+def int_hexagon_A4_modwrapu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_modwrapu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vmpahb_acc_128B
-def int_hexagon_V6_vmpahb_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_acc_128B">;
+def int_hexagon_A4_rcmpneq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyh,VD_ftype_VISI,2)
-// tag : V6_vmpyh
-def int_hexagon_V6_vmpyh :
-Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh">;
+def int_hexagon_M2_mpyd_acc_hh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyh_128B,VD_ftype_VISI,2)
-// tag : V6_vmpyh_128B
-def int_hexagon_V6_vmpyh_128B :
-Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_128B">;
+def int_hexagon_M2_mpyd_acc_hh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpyhsat_acc
-def int_hexagon_V6_vmpyhsat_acc :
-Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc">;
+def int_hexagon_F2_sfimm_p :
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpyhsat_acc_128B
-def int_hexagon_V6_vmpyhsat_acc_128B :
-Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc_128B">;
+def int_hexagon_F2_sfimm_n :
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_n">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhss,VI_ftype_VISI,2)
-// tag : V6_vmpyhss
-def int_hexagon_V6_vmpyhss :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhss">;
+def int_hexagon_M4_cmpyr_wh :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyr_wh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhss_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyhss_128B
-def int_hexagon_V6_vmpyhss_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhss_128B">;
+def int_hexagon_S2_lsl_r_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs,VI_ftype_VISI,2)
-// tag : V6_vmpyhsrs
-def int_hexagon_V6_vmpyhsrs :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhsrs">;
+def int_hexagon_A2_vavgub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyhsrs_128B
-def int_hexagon_V6_vmpyhsrs_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhsrs_128B">;
+def int_hexagon_F2_conv_d2sf :
+Hexagon_float_i64_Intrinsic<"HEXAGON_F2_conv_d2sf">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuh,VD_ftype_VISI,2)
-// tag : V6_vmpyuh
-def int_hexagon_V6_vmpyuh :
-Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh">;
+def int_hexagon_A2_vavguh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuh_128B,VD_ftype_VISI,2)
-// tag : V6_vmpyuh_128B
-def int_hexagon_V6_vmpyuh_128B :
-Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_128B">;
+def int_hexagon_A4_cmpbeqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeqi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpyuh_acc
-def int_hexagon_V6_vmpyuh_acc :
-Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh_acc">;
+def int_hexagon_F2_sfcmpuo :
+Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpuo">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpyuh_acc_128B
-def int_hexagon_V6_vmpyuh_acc_128B :
-Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_acc_128B">;
+def int_hexagon_A2_vavguw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyihb,VI_ftype_VISI,2)
-// tag : V6_vmpyihb
-def int_hexagon_V6_vmpyihb :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb">;
+def int_hexagon_S2_asr_i_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyihb_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyihb_128B
-def int_hexagon_V6_vmpyihb_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_128B">;
+def int_hexagon_S2_vsatwh_nopack :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsatwh_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyihb_acc
-def int_hexagon_V6_vmpyihb_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb_acc">;
+def int_hexagon_M2_mpyd_hh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyihb_acc_128B
-def int_hexagon_V6_vmpyihb_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_acc_128B">;
+def int_hexagon_M2_mpyd_hh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwb,VI_ftype_VISI,2)
-// tag : V6_vmpyiwb
-def int_hexagon_V6_vmpyiwb :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb">;
+def int_hexagon_S2_lsl_r_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyiwb_128B
-def int_hexagon_V6_vmpyiwb_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_128B">;
+def int_hexagon_A2_minu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_minu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwb_acc
-def int_hexagon_V6_vmpyiwb_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc">;
+def int_hexagon_M2_mpy_sat_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwb_acc_128B
-def int_hexagon_V6_vmpyiwb_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc_128B">;
+def int_hexagon_M4_or_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_andn">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwh,VI_ftype_VISI,2)
-// tag : V6_vmpyiwh
-def int_hexagon_V6_vmpyiwh :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh">;
+def int_hexagon_A2_minp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_minp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyiwh_128B
-def int_hexagon_V6_vmpyiwh_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_128B">;
+def int_hexagon_S4_or_andix :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andix">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwh_acc
-def int_hexagon_V6_vmpyiwh_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc">;
+def int_hexagon_M2_mpy_rnd_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwh_acc_128B
-def int_hexagon_V6_vmpyiwh_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc_128B">;
+def int_hexagon_M2_mpy_rnd_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vand,VI_ftype_VIVI,2)
-// tag : V6_vand
-def int_hexagon_V6_vand :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vand">;
+def int_hexagon_M2_mmpyuh_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vand_128B,VI_ftype_VIVI,2)
-// tag : V6_vand_128B
-def int_hexagon_V6_vand_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vand_128B">;
+def int_hexagon_M2_mmpyuh_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vor,VI_ftype_VIVI,2)
-// tag : V6_vor
-def int_hexagon_V6_vor :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vor">;
+def int_hexagon_M2_mpy_acc_sat_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vor_128B,VI_ftype_VIVI,2)
-// tag : V6_vor_128B
-def int_hexagon_V6_vor_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vor_128B">;
+def int_hexagon_F2_sfcmpge :
+Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpge">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vxor,VI_ftype_VIVI,2)
-// tag : V6_vxor
-def int_hexagon_V6_vxor :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vxor">;
+def int_hexagon_F2_sfmin :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfmin">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vxor_128B,VI_ftype_VIVI,2)
-// tag : V6_vxor_128B
-def int_hexagon_V6_vxor_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vxor_128B">;
+def int_hexagon_F2_sfcmpgt :
+Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpgt">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnot,VI_ftype_VI,1)
-// tag : V6_vnot
-def int_hexagon_V6_vnot :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnot">;
+def int_hexagon_M4_vpmpyh :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M4_vpmpyh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnot_128B,VI_ftype_VI,1)
-// tag : V6_vnot_128B
-def int_hexagon_V6_vnot_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnot_128B">;
+def int_hexagon_M2_mmacuhs_rs0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacuhs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandqrt,VI_ftype_QVSI,2)
-// tag : V6_vandqrt
-def int_hexagon_V6_vandqrt :
-Hexagon_v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt">;
+def int_hexagon_M2_mpyd_rnd_lh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandqrt_128B,VI_ftype_QVSI,2)
-// tag : V6_vandqrt_128B
-def int_hexagon_V6_vandqrt_128B :
-Hexagon_v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_128B">;
+def int_hexagon_M2_mpyd_rnd_lh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc,VI_ftype_VIQVSI,3)
-// tag : V6_vandqrt_acc
-def int_hexagon_V6_vandqrt_acc :
-Hexagon_v512v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt_acc">;
+def int_hexagon_A2_roundsat :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_A2_roundsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc_128B,VI_ftype_VIQVSI,3)
-// tag : V6_vandqrt_acc_128B
-def int_hexagon_V6_vandqrt_acc_128B :
-Hexagon_v1024v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">;
+def int_hexagon_S2_ct1p :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_ct1p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvrt,QV_ftype_VISI,2)
-// tag : V6_vandvrt
-def int_hexagon_V6_vandvrt :
-Hexagon_v64iv512i_Intrinsic<"HEXAGON_V6_vandvrt">;
+def int_hexagon_S4_extract_rp :
+Hexagon_i32_i32i64_Intrinsic<"HEXAGON_S4_extract_rp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvrt_128B,QV_ftype_VISI,2)
-// tag : V6_vandvrt_128B
-def int_hexagon_V6_vandvrt_128B :
-Hexagon_v128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_128B">;
+def int_hexagon_S2_lsl_r_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc,QV_ftype_QVVISI,3)
-// tag : V6_vandvrt_acc
-def int_hexagon_V6_vandvrt_acc :
-Hexagon_v64iv64iv512i_Intrinsic<"HEXAGON_V6_vandvrt_acc">;
+def int_hexagon_C4_cmplteui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteui">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc_128B,QV_ftype_QVVISI,3)
-// tag : V6_vandvrt_acc_128B
-def int_hexagon_V6_vandvrt_acc_128B :
-Hexagon_v128iv128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">;
+def int_hexagon_S4_addi_lsr_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw,QV_ftype_VIVI,2)
-// tag : V6_vgtw
-def int_hexagon_V6_vgtw :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtw">;
+def int_hexagon_A4_tfrcpp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A4_tfrcpp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtw_128B
-def int_hexagon_V6_vgtw_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_128B">;
+def int_hexagon_S2_asr_i_svw_trun :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_i_svw_trun">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_and
-def int_hexagon_V6_vgtw_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_and">;
+def int_hexagon_A4_cmphgti :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgti">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_and_128B
-def int_hexagon_V6_vgtw_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_and_128B">;
+def int_hexagon_A4_vrminh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_or
-def int_hexagon_V6_vgtw_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_or">;
+def int_hexagon_A4_vrminw :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_or_128B
-def int_hexagon_V6_vgtw_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_or_128B">;
+def int_hexagon_A4_cmphgtu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_xor
-def int_hexagon_V6_vgtw_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_xor">;
+def int_hexagon_S2_insertp_rp :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_S2_insertp_rp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_xor_128B
-def int_hexagon_V6_vgtw_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">;
+def int_hexagon_A2_vnavghcr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavghcr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw,QV_ftype_VIVI,2)
-// tag : V6_veqw
-def int_hexagon_V6_veqw :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqw">;
+def int_hexagon_S4_subi_asl_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_128B,QV_ftype_VIVI,2)
-// tag : V6_veqw_128B
-def int_hexagon_V6_veqw_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_128B">;
+def int_hexagon_S2_lsl_r_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_and,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_and
-def int_hexagon_V6_veqw_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_and">;
+def int_hexagon_M2_mpy_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_and_128B
-def int_hexagon_V6_veqw_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_and_128B">;
+def int_hexagon_A2_vsubws :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubws">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_or,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_or
-def int_hexagon_V6_veqw_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_or">;
+def int_hexagon_A2_sath :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sath">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_or_128B
-def int_hexagon_V6_veqw_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_or_128B">;
+def int_hexagon_S2_asl_r_p_xor :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_xor,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_xor
-def int_hexagon_V6_veqw_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_xor">;
+def int_hexagon_A2_satb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_satb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_xor_128B
-def int_hexagon_V6_veqw_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_xor_128B">;
+def int_hexagon_C2_cmpltu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpltu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth,QV_ftype_VIVI,2)
-// tag : V6_vgth
-def int_hexagon_V6_vgth :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgth">;
+def int_hexagon_S2_insertp :
+Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S2_insertp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_128B,QV_ftype_VIVI,2)
-// tag : V6_vgth_128B
-def int_hexagon_V6_vgth_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_128B">;
+def int_hexagon_M2_mpyd_rnd_ll_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_and
-def int_hexagon_V6_vgth_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_and">;
+def int_hexagon_M2_mpyd_rnd_ll_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_and_128B
-def int_hexagon_V6_vgth_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_and_128B">;
+def int_hexagon_S2_lsr_i_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_or
-def int_hexagon_V6_vgth_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_or">;
+def int_hexagon_S2_extractup_rp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_extractup_rp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_or_128B
-def int_hexagon_V6_vgth_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_or_128B">;
+def int_hexagon_S4_vxaddsubw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxaddsubw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_xor
-def int_hexagon_V6_vgth_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_xor">;
+def int_hexagon_S4_vxaddsubh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxaddsubh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_xor_128B
-def int_hexagon_V6_vgth_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_xor_128B">;
+def int_hexagon_A2_asrh :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_asrh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh,QV_ftype_VIVI,2)
-// tag : V6_veqh
-def int_hexagon_V6_veqh :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqh">;
+def int_hexagon_S4_extractp_rp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_extractp_rp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_128B,QV_ftype_VIVI,2)
-// tag : V6_veqh_128B
-def int_hexagon_V6_veqh_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_128B">;
+def int_hexagon_S2_lsr_r_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_and,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_and
-def int_hexagon_V6_veqh_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_and">;
+def int_hexagon_M2_mpyd_nac_ll_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_and_128B
-def int_hexagon_V6_veqh_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
+def int_hexagon_M2_mpyd_nac_ll_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_or,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_or
-def int_hexagon_V6_veqh_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_or">;
+def int_hexagon_C2_or :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_or_128B
-def int_hexagon_V6_veqh_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_or_128B">;
+def int_hexagon_M2_mmpyul_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_xor,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_xor
-def int_hexagon_V6_veqh_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_xor">;
+def int_hexagon_M2_vrcmacr_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrcmacr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_xor_128B
-def int_hexagon_V6_veqh_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_xor_128B">;
+def int_hexagon_A2_xor :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb,QV_ftype_VIVI,2)
-// tag : V6_vgtb
-def int_hexagon_V6_vgtb :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtb">;
+def int_hexagon_A2_add :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_add">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtb_128B
-def int_hexagon_V6_vgtb_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_128B">;
+def int_hexagon_A2_vsububs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsububs">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_and
-def int_hexagon_V6_vgtb_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_and">;
+def int_hexagon_M2_vmpy2s_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s1">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_and_128B
-def int_hexagon_V6_vgtb_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_and_128B">;
+def int_hexagon_M2_vmpy2s_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_or
-def int_hexagon_V6_vgtb_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_or">;
+def int_hexagon_A2_vraddub_acc :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_A2_vraddub_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_or_128B
-def int_hexagon_V6_vgtb_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_or_128B">;
+def int_hexagon_F2_sfinvsqrta :
+Hexagon_floati32_float_Intrinsic<"HEXAGON_F2_sfinvsqrta">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_xor
-def int_hexagon_V6_vgtb_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_xor">;
+def int_hexagon_S2_ct0p :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_ct0p">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_xor_128B
-def int_hexagon_V6_vgtb_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">;
+def int_hexagon_A2_svaddh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svaddh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb,QV_ftype_VIVI,2)
-// tag : V6_veqb
-def int_hexagon_V6_veqb :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqb">;
+def int_hexagon_S2_vcrotate :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_vcrotate">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_128B,QV_ftype_VIVI,2)
-// tag : V6_veqb_128B
-def int_hexagon_V6_veqb_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_128B">;
+def int_hexagon_A2_aslh :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_aslh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_and,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_and
-def int_hexagon_V6_veqb_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_and">;
+def int_hexagon_A2_subh_h16_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_lh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_and_128B
-def int_hexagon_V6_veqb_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_and_128B">;
+def int_hexagon_A2_subh_h16_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_ll">;
+
+def int_hexagon_M2_hmmpyl_rs1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyl_rs1">;
+
+def int_hexagon_S2_asr_r_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_r_p">;
+
+def int_hexagon_S2_vsplatrh :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vsplatrh">;
+
+def int_hexagon_S2_asr_r_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_r_r">;
+
+def int_hexagon_A2_addh_h16_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_hl">;
+
+def int_hexagon_S2_vsplatrb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_vsplatrb">;
+
+def int_hexagon_A2_addh_h16_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_hh">;
+
+def int_hexagon_M2_cmpyr_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpyr_s0">;
+
+def int_hexagon_M2_dpmpyss_rnd_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">;
+
+def int_hexagon_C2_muxri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxri">;
+
+def int_hexagon_M2_vmac2es_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vmac2es_s0">;
+
+def int_hexagon_M2_vmac2es_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vmac2es_s1">;
+
+def int_hexagon_C2_pxfer_map :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_pxfer_map">;
+
+def int_hexagon_M2_mpyu_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_lh_s1">;
+
+def int_hexagon_M2_mpyu_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_lh_s0">;
+
+def int_hexagon_S2_asl_i_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_or">;
+
+def int_hexagon_M2_mpyd_acc_hl_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s0">;
+
+def int_hexagon_M2_mpyd_acc_hl_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s1">;
+
+def int_hexagon_S2_asr_r_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_nac">;
+
+def int_hexagon_A2_vaddw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddw">;
+
+def int_hexagon_S2_asr_i_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_and">;
+
+def int_hexagon_A2_vaddh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddh">;
+
+def int_hexagon_M2_mpy_nac_sat_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s1">;
+
+def int_hexagon_M2_mpy_nac_sat_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s0">;
+
+def int_hexagon_C2_cmpeqp :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpeqp">;
+
+def int_hexagon_M4_mpyri_addi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addi">;
+
+def int_hexagon_A2_not :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_not">;
+
+def int_hexagon_S4_andi_lsr_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_lsr_ri">;
+
+def int_hexagon_M2_macsip :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsip">;
+
+def int_hexagon_A2_tfrcrr :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrcrr">;
+
+def int_hexagon_M2_macsin :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsin">;
+
+def int_hexagon_C2_orn :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_orn">;
+
+def int_hexagon_M4_and_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_andn">;
+
+def int_hexagon_F2_sfmpy :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfmpy">;
+
+def int_hexagon_M2_mpyud_nac_hh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s1">;
+
+def int_hexagon_M2_mpyud_nac_hh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s0">;
+
+def int_hexagon_S2_lsr_r_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_acc">;
+
+def int_hexagon_S2_asr_r_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_r_vw">;
+
+def int_hexagon_M4_and_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_or">;
+
+def int_hexagon_S2_asr_r_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_r_vh">;
+
+def int_hexagon_C2_mask :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_C2_mask">;
+
+def int_hexagon_M2_mpy_nac_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s0">;
+
+def int_hexagon_M2_mpy_nac_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s1">;
+
+def int_hexagon_M2_mpy_up_s1_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_up_s1_sat">;
+
+def int_hexagon_A4_vcmpbgt :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A4_vcmpbgt">;
+
+def int_hexagon_M5_vrmacbsu :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M5_vrmacbsu">;
+
+def int_hexagon_S2_tableidxw_goodsyntax :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxw_goodsyntax">;
+
+def int_hexagon_A2_vrsadub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vrsadub">;
+
+def int_hexagon_A2_tfrrcr :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrrcr">;
+
+def int_hexagon_M2_vrcmpys_acc_s1 :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_M2_vrcmpys_acc_s1">;
+
+def int_hexagon_F2_dfcmpge :
+Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpge">;
+
+def int_hexagon_M2_accii :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_accii">;
+
+def int_hexagon_A5_vaddhubs :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A5_vaddhubs">;
+
+def int_hexagon_A2_vmaxw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxw">;
+
+def int_hexagon_A2_vmaxb :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxb">;
+
+def int_hexagon_A2_vmaxh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxh">;
+
+def int_hexagon_S2_vsxthw :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vsxthw">;
+
+def int_hexagon_S4_andi_asl_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_asl_ri">;
+
+def int_hexagon_S2_asl_i_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_nac">;
+
+def int_hexagon_S2_lsl_r_p_xor :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_xor">;
+
+def int_hexagon_C2_cmpgt :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgt">;
+
+def int_hexagon_F2_conv_df2d_chop :
+Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2d_chop">;
+
+def int_hexagon_M2_mpyu_nac_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s0">;
+
+def int_hexagon_M2_mpyu_nac_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s1">;
+
+def int_hexagon_F2_conv_sf2w :
+Hexagon_i32_float_Intrinsic<"HEXAGON_F2_conv_sf2w">;
+
+def int_hexagon_S2_lsr_r_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_or">;
+
+def int_hexagon_F2_sfclass :
+Hexagon_i32_floati32_Intrinsic<"HEXAGON_F2_sfclass">;
+
+def int_hexagon_M2_mpyud_acc_lh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s0">;
+
+def int_hexagon_M4_xor_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_andn">;
+
+def int_hexagon_S2_addasl_rrri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri">;
+
+def int_hexagon_M5_vdmpybsu :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M5_vdmpybsu">;
+
+def int_hexagon_M2_mpyu_nac_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s0">;
+
+def int_hexagon_M2_mpyu_nac_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s1">;
+
+def int_hexagon_A2_addi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi">;
+
+def int_hexagon_A2_addp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addp">;
+
+def int_hexagon_M2_vmpy2s_s1pack :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s1pack">;
+
+def int_hexagon_S4_clbpnorm :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S4_clbpnorm">;
+
+def int_hexagon_A4_round_rr_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_rr_sat">;
+
+def int_hexagon_M2_nacci :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_nacci">;
+
+def int_hexagon_S2_shuffeh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffeh">;
+
+def int_hexagon_S2_lsr_i_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_and">;
+
+def int_hexagon_M2_mpy_sat_rnd_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s1">;
+
+def int_hexagon_M2_mpy_sat_rnd_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s0">;
+
+def int_hexagon_F2_conv_sf2uw :
+Hexagon_i32_float_Intrinsic<"HEXAGON_F2_conv_sf2uw">;
+
+def int_hexagon_A2_vsubh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubh">;
+
+def int_hexagon_F2_conv_sf2ud :
+Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2ud">;
+
+def int_hexagon_A2_vsubw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubw">;
+
+def int_hexagon_A2_vcmpwgt :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpwgt">;
+
+def int_hexagon_M4_xor_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_or">;
+
+def int_hexagon_F2_conv_sf2uw_chop :
+Hexagon_i32_float_Intrinsic<"HEXAGON_F2_conv_sf2uw_chop">;
+
+def int_hexagon_S2_asl_r_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_vw">;
+
+def int_hexagon_S2_vsatwuh_nopack :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsatwuh_nopack">;
+
+def int_hexagon_S2_asl_r_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_vh">;
+
+def int_hexagon_A2_svsubuhs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svsubuhs">;
+
+def int_hexagon_M5_vmpybsu :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M5_vmpybsu">;
+
+def int_hexagon_A2_subh_l16_sat_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_sat_ll">;
+
+def int_hexagon_C4_and_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_and">;
+
+def int_hexagon_M2_mpyu_acc_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s1">;
+
+def int_hexagon_M2_mpyu_acc_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s0">;
+
+def int_hexagon_S2_lsr_r_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p">;
+
+def int_hexagon_S2_lsr_r_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r">;
+
+def int_hexagon_A4_subp_c :
+Hexagon_i64i32_i64i64i32_Intrinsic<"HEXAGON_A4_subp_c">;
+
+def int_hexagon_A2_vsubhs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubhs">;
+
+def int_hexagon_C2_vitpack :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_vitpack">;
+
+def int_hexagon_A2_vavguhr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguhr">;
+
+def int_hexagon_S2_vsplicerb :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vsplicerb">;
+
+def int_hexagon_C4_nbitsclr :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclr">;
+
+def int_hexagon_A2_vcmpbgtu :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpbgtu">;
+
+def int_hexagon_M2_cmpys_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpys_s1">;
+
+def int_hexagon_M2_cmpys_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpys_s0">;
+
+def int_hexagon_F2_dfcmpuo :
+Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpuo">;
+
+def int_hexagon_S2_shuffob :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffob">;
+
+def int_hexagon_C2_and :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_and">;
+
+def int_hexagon_S5_popcountp :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S5_popcountp">;
+
+def int_hexagon_S4_extractp :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_extractp">;
+
+def int_hexagon_S2_cl0 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_cl0">;
+
+def int_hexagon_A4_vcmpbgti :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgti">;
+
+def int_hexagon_M2_mmacls_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_s1">;
+
+def int_hexagon_M2_mmacls_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_s0">;
+
+def int_hexagon_C4_cmpneq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneq">;
+
+def int_hexagon_M2_vmac2es :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vmac2es">;
+
+def int_hexagon_M2_vdmacs_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vdmacs_s0">;
+
+def int_hexagon_M2_vdmacs_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vdmacs_s1">;
+
+def int_hexagon_M2_mpyud_ll_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_ll_s0">;
+
+def int_hexagon_M2_mpyud_ll_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_ll_s1">;
+
+def int_hexagon_S2_clb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_clb">;
+
+def int_hexagon_M2_mpy_nac_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s0">;
+
+def int_hexagon_M2_mpy_nac_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s1">;
+
+def int_hexagon_M2_mpyd_nac_hl_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s1">;
+
+def int_hexagon_M2_mpyd_nac_hl_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s0">;
+
+def int_hexagon_M2_maci :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_maci">;
+
+def int_hexagon_A2_vmaxuh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxuh">;
+
+def int_hexagon_A4_bitspliti :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitspliti">;
+
+def int_hexagon_A2_vmaxub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxub">;
+
+def int_hexagon_M2_mpyud_hh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_hh_s0">;
+
+def int_hexagon_M2_mpyud_hh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_hh_s1">;
+
+def int_hexagon_M2_vrmac_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrmac_s0">;
+
+def int_hexagon_M2_mpy_sat_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s0">;
+
+def int_hexagon_S2_asl_r_r_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_sat">;
+
+def int_hexagon_F2_conv_sf2d :
+Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2d">;
+
+def int_hexagon_S2_asr_r_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_nac">;
+
+def int_hexagon_F2_dfimm_n :
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_n">;
+
+def int_hexagon_A4_cmphgt :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgt">;
+
+def int_hexagon_F2_dfimm_p :
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_p">;
+
+def int_hexagon_M2_mpyud_acc_lh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s1">;
+
+def int_hexagon_M2_vcmpy_s1_sat_r :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_r">;
+
+def int_hexagon_M4_mpyri_addr_u2 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr_u2">;
+
+def int_hexagon_M2_vcmpy_s1_sat_i :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_i">;
+
+def int_hexagon_S2_lsl_r_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_nac">;
+
+def int_hexagon_M5_vrmacbuu :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M5_vrmacbuu">;
+
+def int_hexagon_S5_asrhub_rnd_sat_goodsyntax :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax">;
+
+def int_hexagon_S2_vspliceib :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vspliceib">;
+
+def int_hexagon_M2_dpmpyss_acc_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_acc_s0">;
+
+def int_hexagon_M2_cnacs_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacs_s1">;
+
+def int_hexagon_M2_cnacs_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacs_s0">;
+
+def int_hexagon_A2_maxu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_maxu">;
+
+def int_hexagon_A2_maxp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_maxp">;
+
+def int_hexagon_A2_andir :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir">;
+
+def int_hexagon_F2_sfrecipa :
+Hexagon_floati32_floatfloat_Intrinsic<"HEXAGON_F2_sfrecipa">;
+
+def int_hexagon_A2_combineii :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combineii">;
+
+def int_hexagon_A4_orn :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_orn">;
+
+def int_hexagon_A4_cmpbgtui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtui">;
+
+def int_hexagon_S2_lsr_r_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_or">;
+
+def int_hexagon_A4_vcmpbeqi :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbeqi">;
+
+def int_hexagon_S2_lsl_r_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r">;
+
+def int_hexagon_S2_lsl_r_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p">;
+
+def int_hexagon_A2_or :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_or">;
+
+def int_hexagon_F2_dfcmpeq :
+Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpeq">;
+
+def int_hexagon_C2_cmpeq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeq">;
+
+def int_hexagon_A2_tfrp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_tfrp">;
+
+def int_hexagon_C4_and_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_andn">;
+
+def int_hexagon_S2_vsathub_nopack :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsathub_nopack">;
+
+def int_hexagon_A2_satuh :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_satuh">;
+
+def int_hexagon_A2_satub :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_satub">;
+
+def int_hexagon_M2_vrcmpys_s1 :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_M2_vrcmpys_s1">;
+
+def int_hexagon_S4_or_ori :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_ori">;
+
+def int_hexagon_C4_fastcorner9_not :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_fastcorner9_not">;
+
+def int_hexagon_A2_tfrih :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfrih">;
+
+def int_hexagon_A2_tfril :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfril">;
+
+def int_hexagon_M4_mpyri_addr :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr">;
+
+def int_hexagon_S2_vtrunehb :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vtrunehb">;
+
+def int_hexagon_A2_vabsw :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabsw">;
+
+def int_hexagon_A2_vabsh :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabsh">;
+
+def int_hexagon_F2_sfsub :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfsub">;
+
+def int_hexagon_C2_muxii :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxii">;
+
+def int_hexagon_C2_muxir :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxir">;
+
+def int_hexagon_A2_swiz :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_swiz">;
+
+def int_hexagon_S2_asr_i_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_and">;
+
+def int_hexagon_M2_cmpyrsc_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrsc_s0">;
+
+def int_hexagon_M2_cmpyrsc_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrsc_s1">;
+
+def int_hexagon_A2_vraddub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vraddub">;
+
+def int_hexagon_A4_tlbmatch :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_tlbmatch">;
+
+def int_hexagon_F2_conv_df2w_chop :
+Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2w_chop">;
+
+def int_hexagon_A2_and :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_and">;
+
+def int_hexagon_S2_lsr_r_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_and">;
+
+def int_hexagon_M2_mpy_nac_sat_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s1">;
+
+def int_hexagon_M2_mpy_nac_sat_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s0">;
+
+def int_hexagon_S4_extract :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_extract">;
+
+def int_hexagon_A2_vcmpweq :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpweq">;
+
+def int_hexagon_M2_acci :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_acci">;
+
+def int_hexagon_S2_lsr_i_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc">;
+
+def int_hexagon_S2_lsr_i_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_or">;
+
+def int_hexagon_F2_conv_ud2sf :
+Hexagon_float_i64_Intrinsic<"HEXAGON_F2_conv_ud2sf">;
+
+def int_hexagon_A2_tfr :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfr">;
+
+def int_hexagon_S2_asr_i_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_or">;
+
+def int_hexagon_A2_subri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri">;
+
+def int_hexagon_A4_vrmaxuw :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxuw">;
+
+def int_hexagon_M5_vmpybuu :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M5_vmpybuu">;
+
+def int_hexagon_A4_vrmaxuh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxuh">;
+
+def int_hexagon_S2_asl_i_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vw">;
+
+def int_hexagon_A2_vavgw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgw">;
+
+def int_hexagon_S2_brev :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_brev">;
+
+def int_hexagon_A2_vavgh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgh">;
+
+def int_hexagon_S2_clrbit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_i">;
+
+def int_hexagon_S2_asl_i_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vh">;
+
+def int_hexagon_S2_lsr_i_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_or">;
+
+def int_hexagon_S2_lsl_r_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_nac">;
+
+def int_hexagon_M2_mmpyl_rs1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_rs1">;
+
+def int_hexagon_M2_mpyud_hl_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_hl_s1">;
+
+def int_hexagon_M2_mmpyl_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_s0">;
+
+def int_hexagon_M2_mmpyl_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_s1">;
+
+def int_hexagon_M2_naccii :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_naccii">;
+
+def int_hexagon_S2_vrndpackwhs :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vrndpackwhs">;
+
+def int_hexagon_S2_vtrunewh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_vtrunewh">;
+
+def int_hexagon_M2_dpmpyss_nac_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_nac_s0">;
+
+def int_hexagon_M2_mpyd_ll_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_ll_s0">;
+
+def int_hexagon_M2_mpyd_ll_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_ll_s1">;
+
+def int_hexagon_M4_mac_up_s1_sat :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mac_up_s1_sat">;
+
+def int_hexagon_S4_vrcrotate_acc :
+Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate_acc">;
+
+def int_hexagon_F2_conv_uw2df :
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_conv_uw2df">;
+
+def int_hexagon_A2_vaddubs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddubs">;
+
+def int_hexagon_S2_asr_r_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_acc">;
+
+def int_hexagon_A2_orir :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir">;
+
+def int_hexagon_A2_andp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_andp">;
+
+def int_hexagon_S2_lfsp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_lfsp">;
+
+def int_hexagon_A2_min :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_min">;
+
+def int_hexagon_M2_mpysmi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi">;
+
+def int_hexagon_M2_vcmpy_s0_sat_r :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_r">;
+
+def int_hexagon_M2_mpyu_acc_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s1">;
+
+def int_hexagon_M2_mpyu_acc_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s0">;
+
+def int_hexagon_S2_asr_r_svw_trun :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_r_svw_trun">;
+
+def int_hexagon_M2_mmpyh_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyh_s0">;
+
+def int_hexagon_M2_mmpyh_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyh_s1">;
+
+def int_hexagon_F2_conv_sf2df :
+Hexagon_double_float_Intrinsic<"HEXAGON_F2_conv_sf2df">;
+
+def int_hexagon_S2_vtrunohb :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vtrunohb">;
+
+def int_hexagon_F2_conv_sf2d_chop :
+Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2d_chop">;
+
+def int_hexagon_M2_mpyd_lh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_lh_s0">;
+
+def int_hexagon_F2_conv_df2w :
+Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2w">;
+
+def int_hexagon_S5_asrhub_sat :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_sat">;
+
+def int_hexagon_S2_asl_i_r_xacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_xacc">;
+
+def int_hexagon_F2_conv_df2d :
+Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2d">;
+
+def int_hexagon_M2_mmaculs_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmaculs_s1">;
+
+def int_hexagon_M2_mmaculs_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmaculs_s0">;
+
+def int_hexagon_A2_svadduhs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svadduhs">;
+
+def int_hexagon_F2_conv_sf2w_chop :
+Hexagon_i32_float_Intrinsic<"HEXAGON_F2_conv_sf2w_chop">;
+
+def int_hexagon_S2_svsathub :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_svsathub">;
+
+def int_hexagon_M2_mpyd_rnd_hl_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s1">;
+
+def int_hexagon_M2_mpyd_rnd_hl_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s0">;
+
+def int_hexagon_S2_setbit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_r">;
+
+def int_hexagon_A2_vavghr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavghr">;
+
+def int_hexagon_F2_sffma_sc :
+Hexagon_float_floatfloatfloati32_Intrinsic<"HEXAGON_F2_sffma_sc">;
+
+def int_hexagon_F2_dfclass :
+Hexagon_i32_doublei32_Intrinsic<"HEXAGON_F2_dfclass">;
+
+def int_hexagon_F2_conv_df2ud :
+Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2ud">;
+
+def int_hexagon_F2_conv_df2uw :
+Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2uw">;
+
+def int_hexagon_M2_cmpyrs_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrs_s0">;
+
+def int_hexagon_M2_cmpyrs_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrs_s1">;
+
+def int_hexagon_C4_cmpltei :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpltei">;
+
+def int_hexagon_C4_cmplteu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteu">;
+
+def int_hexagon_A2_vsubb_map :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubb_map">;
+
+def int_hexagon_A2_subh_l16_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_ll">;
+
+def int_hexagon_S2_asr_i_r_rnd :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd">;
+
+def int_hexagon_M2_vrmpy_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrmpy_s0">;
+
+def int_hexagon_M2_mpyd_rnd_hh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s1">;
+
+def int_hexagon_M2_mpyd_rnd_hh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s0">;
+
+def int_hexagon_A2_minup :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_minup">;
+
+def int_hexagon_S2_valignrb :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignrb">;
+
+def int_hexagon_S2_asr_r_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_acc">;
+
+def int_hexagon_M2_mmpyl_rs0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_rs0">;
+
+def int_hexagon_M2_vrcmaci_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrcmaci_s0">;
+
+def int_hexagon_A2_vaddub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddub">;
+
+def int_hexagon_A2_combine_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_lh">;
+
+def int_hexagon_M5_vdmacbsu :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M5_vdmacbsu">;
+
+def int_hexagon_A2_combine_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_ll">;
+
+def int_hexagon_M2_mpyud_hl_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_hl_s0">;
+
+def int_hexagon_M2_vrcmpyi_s0c :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyi_s0c">;
+
+def int_hexagon_S2_asr_i_p_rnd :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd">;
+
+def int_hexagon_A2_addpsat :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addpsat">;
+
+def int_hexagon_A2_svaddhs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svaddhs">;
+
+def int_hexagon_S4_ori_lsr_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_lsr_ri">;
+
+def int_hexagon_M2_mpy_sat_rnd_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s1">;
+
+def int_hexagon_M2_mpy_sat_rnd_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s0">;
+
+def int_hexagon_A2_vminw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminw">;
+
+def int_hexagon_A2_vminh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminh">;
+
+def int_hexagon_M2_vrcmpyr_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyr_s0">;
+
+def int_hexagon_A2_vminb :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminb">;
+
+def int_hexagon_M2_vcmac_s0_sat_i :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_i">;
+
+def int_hexagon_M2_mpyud_lh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_lh_s0">;
+
+def int_hexagon_M2_mpyud_lh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_lh_s1">;
+
+def int_hexagon_S2_asl_r_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_or">;
+
+def int_hexagon_S4_lsli :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_lsli">;
+
+def int_hexagon_S2_lsl_r_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vw">;
+
+def int_hexagon_M2_mpy_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_hh_s1">;
+
+def int_hexagon_M4_vrmpyeh_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_s0">;
+
+def int_hexagon_M4_vrmpyeh_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_s1">;
+
+def int_hexagon_M2_mpy_nac_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s0">;
+
+def int_hexagon_M2_mpy_nac_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s1">;
+
+def int_hexagon_M2_vraddh :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vraddh">;
+
+def int_hexagon_C2_tfrrp :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_tfrrp">;
+
+def int_hexagon_M2_mpy_acc_sat_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s0">;
+
+def int_hexagon_M2_mpy_acc_sat_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s1">;
+
+def int_hexagon_S2_vtrunowh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_vtrunowh">;
+
+def int_hexagon_A2_abs :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_abs">;
+
+def int_hexagon_A4_cmpbeq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeq">;
+
+def int_hexagon_A2_negp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_negp">;
+
+def int_hexagon_S2_asl_i_r_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_sat">;
+
+def int_hexagon_A2_addh_l16_sat_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_sat_hl">;
+
+def int_hexagon_S2_vsatwuh :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vsatwuh">;
+
+def int_hexagon_F2_dfcmpgt :
+Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpgt">;
+
+def int_hexagon_S2_svsathb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_svsathb">;
+
+def int_hexagon_C2_cmpgtup :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtup">;
+
+def int_hexagon_A4_cround_ri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_ri">;
+
+def int_hexagon_S4_clbpaddi :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S4_clbpaddi">;
+
+def int_hexagon_A4_cround_rr :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_rr">;
+
+def int_hexagon_C2_mux :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_mux">;
+
+def int_hexagon_M2_dpmpyuu_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_s0">;
+
+def int_hexagon_S2_shuffeb :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffeb">;
+
+def int_hexagon_A2_vminuw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminuw">;
+
+def int_hexagon_A2_vaddhs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddhs">;
+
+def int_hexagon_S2_insert_rp :
+Hexagon_i32_i32i32i64_Intrinsic<"HEXAGON_S2_insert_rp">;
+
+def int_hexagon_A2_vminuh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminuh">;
+
+def int_hexagon_A2_vminub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminub">;
+
+def int_hexagon_S2_extractu :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_extractu">;
+
+def int_hexagon_A2_svsubh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svsubh">;
+
+def int_hexagon_S4_clbaddi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_clbaddi">;
+
+def int_hexagon_F2_sffms :
+Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffms">;
+
+def int_hexagon_S2_vsxtbh :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vsxtbh">;
+
+def int_hexagon_M2_mpyud_nac_ll_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s1">;
+
+def int_hexagon_M2_mpyud_nac_ll_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s0">;
+
+def int_hexagon_A2_subp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_subp">;
+
+def int_hexagon_M2_vmpy2es_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vmpy2es_s1">;
+
+def int_hexagon_M2_vmpy2es_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vmpy2es_s0">;
+
+def int_hexagon_S4_parity :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_parity">;
+
+def int_hexagon_M2_mpy_acc_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s1">;
+
+def int_hexagon_M2_mpy_acc_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s0">;
+
+def int_hexagon_S4_addi_asl_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_asl_ri">;
+
+def int_hexagon_M2_mpyd_nac_hh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s1">;
+
+def int_hexagon_M2_mpyd_nac_hh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s0">;
+
+def int_hexagon_S2_asr_i_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_nac">;
+
+def int_hexagon_A4_cmpheqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheqi">;
+
+def int_hexagon_S2_lsr_r_p_xor :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_xor">;
+
+def int_hexagon_M2_mpy_acc_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s1">;
+
+def int_hexagon_M2_mpy_acc_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s0">;
+
+def int_hexagon_F2_conv_sf2ud_chop :
+Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2ud_chop">;
+
+def int_hexagon_C2_cmpgeui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgeui">;
+
+def int_hexagon_M2_mpy_acc_sat_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s0">;
+
+def int_hexagon_M2_mpy_acc_sat_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s1">;
+
+def int_hexagon_S2_asl_r_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_and">;
+
+def int_hexagon_A2_addh_h16_sat_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_sat_lh">;
+
+def int_hexagon_A2_addh_h16_sat_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_sat_ll">;
+
+def int_hexagon_M4_nac_up_s1_sat :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_nac_up_s1_sat">;
+
+def int_hexagon_M2_mpyud_nac_lh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s1">;
+
+def int_hexagon_M2_mpyud_nac_lh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s0">;
+
+def int_hexagon_A4_round_ri_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri_sat">;
+
+def int_hexagon_M2_mpy_nac_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s0">;
+
+def int_hexagon_M2_mpy_nac_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s1">;
+
+def int_hexagon_A2_vavghcr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavghcr">;
+
+def int_hexagon_M2_mmacls_rs0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_rs0">;
+
+def int_hexagon_M2_mmacls_rs1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_rs1">;
+
+def int_hexagon_M2_cmaci_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmaci_s0">;
+
+def int_hexagon_S2_setbit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_i">;
+
+def int_hexagon_S2_asl_i_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_or">;
+
+def int_hexagon_A4_andn :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_andn">;
+
+def int_hexagon_M5_vrmpybsu :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M5_vrmpybsu">;
+
+def int_hexagon_S2_vrndpackwh :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vrndpackwh">;
+
+def int_hexagon_M2_vcmac_s0_sat_r :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_r">;
+
+def int_hexagon_A2_vmaxuw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxuw">;
+
+def int_hexagon_C2_bitsclr :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclr">;
+
+def int_hexagon_M2_xor_xacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_xor_xacc">;
+
+def int_hexagon_A4_vcmpbgtui :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgtui">;
+
+def int_hexagon_A4_ornp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A4_ornp">;
+
+def int_hexagon_A2_tfrpi :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_tfrpi">;
+
+def int_hexagon_C4_and_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_or">;
+
+def int_hexagon_M2_mpy_nac_sat_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s1">;
+
+def int_hexagon_M2_mpy_nac_sat_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s0">;
+
+def int_hexagon_A2_subh_h16_sat_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_ll">;
+
+def int_hexagon_A2_subh_h16_sat_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_lh">;
+
+def int_hexagon_M2_vmpy2su_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2su_s1">;
+
+def int_hexagon_M2_vmpy2su_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2su_s0">;
+
+def int_hexagon_S2_asr_i_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc">;
+
+def int_hexagon_C4_nbitsclri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclri">;
+
+def int_hexagon_S2_lsr_i_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vh">;
+
+def int_hexagon_S2_lsr_i_p_xacc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc">;
+
+// V55 Scalar Instructions.
+
+def int_hexagon_A5_ACS :
+Hexagon_i64i32_i64i64i64_Intrinsic<"HEXAGON_A5_ACS">;
+
+// V60 Scalar Instructions.
+
+def int_hexagon_S6_rol_i_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_and">;
+
+def int_hexagon_S6_rol_i_r_xacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">;
+
+def int_hexagon_S6_rol_i_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_and">;
+
+def int_hexagon_S6_rol_i_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_acc">;
+
+def int_hexagon_S6_rol_i_p_xacc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">;
+
+def int_hexagon_S6_rol_i_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S6_rol_i_p">;
+
+def int_hexagon_S6_rol_i_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_nac">;
+
+def int_hexagon_S6_rol_i_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_acc">;
+
+def int_hexagon_S6_rol_i_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_or">;
+
+def int_hexagon_S6_rol_i_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S6_rol_i_r">;
+
+def int_hexagon_S6_rol_i_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_nac">;
+
+def int_hexagon_S6_rol_i_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_or">;
+
+// V62 Scalar Instructions.
+
+def int_hexagon_S6_vtrunehb_ppp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S6_vtrunehb_ppp">;
+
+def int_hexagon_V6_ldntnt0 :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_ldntnt0">;
+
+def int_hexagon_M6_vabsdiffub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M6_vabsdiffub">;
+
+def int_hexagon_S6_vtrunohb_ppp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S6_vtrunohb_ppp">;
+
+def int_hexagon_M6_vabsdiffb :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M6_vabsdiffb">;
+
+def int_hexagon_A6_vminub_RdP :
+Hexagon_i64i32_i64i64_Intrinsic<"HEXAGON_A6_vminub_RdP">;
+
+def int_hexagon_S6_vsplatrbp :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S6_vsplatrbp">;
+
+// V65 Scalar Instructions.
+
+def int_hexagon_A6_vcmpbeq_notany :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A6_vcmpbeq_notany">;
+
+// V66 Scalar Instructions.
+
+def int_hexagon_F2_dfsub :
+Hexagon_double_doubledouble_Intrinsic<"HEXAGON_F2_dfsub">;
+
+def int_hexagon_F2_dfadd :
+Hexagon_double_doubledouble_Intrinsic<"HEXAGON_F2_dfadd">;
+
+def int_hexagon_M2_mnaci :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mnaci">;
+
+def int_hexagon_S2_mask :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_mask">;
+
+// V60 HVX Instructions.
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_or,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_or
def int_hexagon_V6_veqb_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_or">;
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_or_128B
def int_hexagon_V6_veqb_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_or_128B">;
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_or_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_xor,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_xor
-def int_hexagon_V6_veqb_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_xor">;
+def int_hexagon_V6_vminub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_xor_128B
-def int_hexagon_V6_veqb_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_xor_128B">;
+def int_hexagon_V6_vminub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminub_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw,QV_ftype_VIVI,2)
-// tag : V6_vgtuw
-def int_hexagon_V6_vgtuw :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw">;
+def int_hexagon_V6_vaslw_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vaslw_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtuw_128B
-def int_hexagon_V6_vgtuw_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_128B">;
+def int_hexagon_V6_vaslw_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vaslw_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_and
-def int_hexagon_V6_vgtuw_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_and">;
+def int_hexagon_V6_vmpyhvsrs :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhvsrs">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_and_128B
-def int_hexagon_V6_vgtuw_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">;
+def int_hexagon_V6_vmpyhvsrs_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhvsrs_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_or
-def int_hexagon_V6_vgtuw_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_or">;
+def int_hexagon_V6_vsathub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsathub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_or_128B
-def int_hexagon_V6_vgtuw_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">;
+def int_hexagon_V6_vsathub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsathub_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_xor
-def int_hexagon_V6_vgtuw_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_xor">;
+def int_hexagon_V6_vaddh_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddh_dv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_xor_128B
-def int_hexagon_V6_vgtuw_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">;
+def int_hexagon_V6_vaddh_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh,QV_ftype_VIVI,2)
-// tag : V6_vgtuh
-def int_hexagon_V6_vgtuh :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh">;
+def int_hexagon_V6_vrmpybusi :
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtuh_128B
-def int_hexagon_V6_vgtuh_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_128B">;
+def int_hexagon_V6_vrmpybusi_128B :
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">;
+
+def int_hexagon_V6_vshufoh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufoh">;
+
+def int_hexagon_V6_vshufoh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshufoh_128B">;
+
+def int_hexagon_V6_vasrwv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vasrwv">;
+
+def int_hexagon_V6_vasrwv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vasrwv_128B">;
+
+def int_hexagon_V6_vdmpyhsuisat :
+Hexagon_v16i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat">;
+
+def int_hexagon_V6_vdmpyhsuisat_128B :
+Hexagon_v32i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">;
+
+def int_hexagon_V6_vrsadubi_acc :
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc">;
+
+def int_hexagon_V6_vrsadubi_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">;
+
+def int_hexagon_V6_vnavgw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgw">;
+
+def int_hexagon_V6_vnavgw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgw_128B">;
+
+def int_hexagon_V6_vnavgh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgh">;
+
+def int_hexagon_V6_vnavgh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgh_128B">;
+
+def int_hexagon_V6_vavgub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgub">;
+
+def int_hexagon_V6_vavgub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgub_128B">;
+
+def int_hexagon_V6_vsubb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubb">;
+
+def int_hexagon_V6_vsubb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubb_128B">;
+
+def int_hexagon_V6_vgtw_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_and">;
+
+def int_hexagon_V6_vgtw_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_and_128B">;
+
+def int_hexagon_V6_vavgubrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgubrnd">;
+
+def int_hexagon_V6_vavgubrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgubrnd_128B">;
+
+def int_hexagon_V6_vrmpybusv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybusv">;
+
+def int_hexagon_V6_vrmpybusv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybusv_128B">;
+
+def int_hexagon_V6_vsubbnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbnq">;
+
+def int_hexagon_V6_vsubbnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbnq_128B">;
+
+def int_hexagon_V6_vroundhb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vroundhb">;
+
+def int_hexagon_V6_vroundhb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vroundhb_128B">;
+
+def int_hexagon_V6_vadduhsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduhsat_dv">;
+
+def int_hexagon_V6_vadduhsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vadduhsat_dv_128B">;
+
+def int_hexagon_V6_vsububsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsububsat">;
+
+def int_hexagon_V6_vsububsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsububsat_128B">;
+
+def int_hexagon_V6_vmpabus_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpabus_acc">;
+
+def int_hexagon_V6_vmpabus_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vmpabus_acc_128B">;
+
+def int_hexagon_V6_vmux :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vmux">;
+
+def int_hexagon_V6_vmux_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vmux_128B">;
+
+def int_hexagon_V6_vmpyhus :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhus">;
+
+def int_hexagon_V6_vmpyhus_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhus_128B">;
+
+def int_hexagon_V6_vpackeb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackeb">;
+
+def int_hexagon_V6_vpackeb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackeb_128B">;
+
+def int_hexagon_V6_vsubhnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhnq">;
+
+def int_hexagon_V6_vsubhnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhnq_128B">;
+
+def int_hexagon_V6_vavghrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavghrnd">;
+
+def int_hexagon_V6_vavghrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavghrnd_128B">;
+
+def int_hexagon_V6_vtran2x2_map :
+Hexagon_v16i32v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vtran2x2_map">;
+
+def int_hexagon_V6_vtran2x2_map_128B :
+Hexagon_v32i32v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vtran2x2_map_128B">;
+
+def int_hexagon_V6_vdelta :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vdelta">;
+
+def int_hexagon_V6_vdelta_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vdelta_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_and
def int_hexagon_V6_vgtuh_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_and">;
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_and_128B
def int_hexagon_V6_vgtuh_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">;
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_or
-def int_hexagon_V6_vgtuh_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_or">;
+def int_hexagon_V6_vtmpyhb :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyhb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_or_128B
-def int_hexagon_V6_vgtuh_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">;
+def int_hexagon_V6_vtmpyhb_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vtmpyhb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_xor
-def int_hexagon_V6_vgtuh_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_xor">;
+def int_hexagon_V6_vpackob :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackob">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_xor_128B
-def int_hexagon_V6_vgtuh_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">;
+def int_hexagon_V6_vpackob_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackob_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub,QV_ftype_VIVI,2)
-// tag : V6_vgtub
-def int_hexagon_V6_vgtub :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtub">;
+def int_hexagon_V6_vmaxh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtub_128B
-def int_hexagon_V6_vgtub_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_128B">;
+def int_hexagon_V6_vmaxh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_and
-def int_hexagon_V6_vgtub_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_and">;
+def int_hexagon_V6_vtmpybus_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vtmpybus_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_and_128B
-def int_hexagon_V6_vgtub_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_and_128B">;
+def int_hexagon_V6_vtmpybus_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vtmpybus_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_or
-def int_hexagon_V6_vgtub_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_or">;
+def int_hexagon_V6_vsubuhsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubuhsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_or_128B
-def int_hexagon_V6_vgtub_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_or_128B">;
+def int_hexagon_V6_vsubuhsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuhsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_xor
-def int_hexagon_V6_vgtub_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_xor">;
+def int_hexagon_V6_vasrw_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrw_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_xor_128B
-def int_hexagon_V6_vgtub_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">;
+def int_hexagon_V6_vasrw_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrw_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_or,QV_ftype_QVQV,2)
-// tag : V6_pred_or
def int_hexagon_V6_pred_or :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or">;
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_or_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_or_128B
def int_hexagon_V6_pred_or_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_128B">;
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_or_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_and,QV_ftype_QVQV,2)
-// tag : V6_pred_and
-def int_hexagon_V6_pred_and :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and">;
+def int_hexagon_V6_vrmpyub_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vrmpyub_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_and_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_and_128B
-def int_hexagon_V6_pred_and_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_128B">;
+def int_hexagon_V6_vrmpyub_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vrmpyub_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_not,QV_ftype_QV,1)
-// tag : V6_pred_not
-def int_hexagon_V6_pred_not :
-Hexagon_v64iv64i_Intrinsic<"HEXAGON_V6_pred_not">;
+def int_hexagon_V6_lo :
+Hexagon_v16i32_v32i32_Intrinsic<"HEXAGON_V6_lo">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_not_128B,QV_ftype_QV,1)
-// tag : V6_pred_not_128B
-def int_hexagon_V6_pred_not_128B :
-Hexagon_v128iv128i_Intrinsic<"HEXAGON_V6_pred_not_128B">;
+def int_hexagon_V6_lo_128B :
+Hexagon_v32i32_v64i32_Intrinsic<"HEXAGON_V6_lo_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_xor,QV_ftype_QVQV,2)
-// tag : V6_pred_xor
-def int_hexagon_V6_pred_xor :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_xor">;
+def int_hexagon_V6_vsubb_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubb_dv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_xor_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_xor_128B
-def int_hexagon_V6_pred_xor_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_xor_128B">;
+def int_hexagon_V6_vsubb_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">;
+
+def int_hexagon_V6_vsubhsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhsat_dv">;
+
+def int_hexagon_V6_vsubhsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubhsat_dv_128B">;
+
+def int_hexagon_V6_vmpyiwh :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwh">;
+
+def int_hexagon_V6_vmpyiwh_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwh_128B">;
+
+def int_hexagon_V6_vmpyiwb :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwb">;
+
+def int_hexagon_V6_vmpyiwb_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwb_128B">;
+
+def int_hexagon_V6_ldu0 :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_ldu0">;
+
+def int_hexagon_V6_ldu0_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_ldu0_128B">;
+
+def int_hexagon_V6_vgtuh_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_xor">;
+
+def int_hexagon_V6_vgtuh_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">;
+
+def int_hexagon_V6_vgth_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_or">;
+
+def int_hexagon_V6_vgth_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_or_128B">;
+
+def int_hexagon_V6_vavgh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgh">;
+
+def int_hexagon_V6_vavgh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgh_128B">;
+
+def int_hexagon_V6_vlalignb :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignb">;
+
+def int_hexagon_V6_vlalignb_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignb_128B">;
+
+def int_hexagon_V6_vsh :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vsh">;
+
+def int_hexagon_V6_vsh_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vsh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_and_n,QV_ftype_QVQV,2)
-// tag : V6_pred_and_n
def int_hexagon_V6_pred_and_n :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and_n">;
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_and_n">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_and_n_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_and_n_128B
def int_hexagon_V6_pred_and_n_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_n_128B">;
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_and_n_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_or_n,QV_ftype_QVQV,2)
-// tag : V6_pred_or_n
-def int_hexagon_V6_pred_or_n :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or_n">;
+def int_hexagon_V6_vsb :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vsb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_or_n_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_or_n_128B
-def int_hexagon_V6_pred_or_n_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_n_128B">;
+def int_hexagon_V6_vsb_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vsb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_scalar2,QV_ftype_SI,1)
-// tag : V6_pred_scalar2
-def int_hexagon_V6_pred_scalar2 :
-Hexagon_v64ii_Intrinsic<"HEXAGON_V6_pred_scalar2">;
+def int_hexagon_V6_vroundwuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vroundwuh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_scalar2_128B,QV_ftype_SI,1)
-// tag : V6_pred_scalar2_128B
-def int_hexagon_V6_pred_scalar2_128B :
-Hexagon_v128ii_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">;
+def int_hexagon_V6_vroundwuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vroundwuh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmux,VI_ftype_QVVIVI,3)
-// tag : V6_vmux
-def int_hexagon_V6_vmux :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vmux">;
+def int_hexagon_V6_vasrhv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vasrhv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmux_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vmux_128B
-def int_hexagon_V6_vmux_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vmux_128B">;
+def int_hexagon_V6_vasrhv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vasrhv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vswap,VD_ftype_QVVIVI,3)
-// tag : V6_vswap
-def int_hexagon_V6_vswap :
-Hexagon_v1024v64iv512v512_Intrinsic<"HEXAGON_V6_vswap">;
+def int_hexagon_V6_vshuffh :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vshuffh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vswap_128B,VD_ftype_QVVIVI,3)
-// tag : V6_vswap_128B
-def int_hexagon_V6_vswap_128B :
-Hexagon_v2048v128iv1024v1024_Intrinsic<"HEXAGON_V6_vswap_128B">;
+def int_hexagon_V6_vshuffh_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vshuffh_128B">;
+
+def int_hexagon_V6_vaddhsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhsat_dv">;
+
+def int_hexagon_V6_vaddhsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddhsat_dv_128B">;
+
+def int_hexagon_V6_vnavgub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgub">;
+
+def int_hexagon_V6_vnavgub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgub_128B">;
+
+def int_hexagon_V6_vrmpybv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybv">;
+
+def int_hexagon_V6_vrmpybv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybv_128B">;
+
+def int_hexagon_V6_vnormamth :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vnormamth">;
+
+def int_hexagon_V6_vnormamth_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vnormamth_128B">;
+
+def int_hexagon_V6_vdmpyhb :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb">;
+
+def int_hexagon_V6_vdmpyhb_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_128B">;
+
+def int_hexagon_V6_vavguh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavguh">;
+
+def int_hexagon_V6_vavguh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavguh_128B">;
+
+def int_hexagon_V6_vlsrwv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vlsrwv">;
+
+def int_hexagon_V6_vlsrwv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vlsrwv_128B">;
+
+def int_hexagon_V6_vlsrhv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vlsrhv">;
+
+def int_hexagon_V6_vlsrhv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vlsrhv_128B">;
+
+def int_hexagon_V6_vdmpyhisat :
+Hexagon_v16i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhisat">;
+
+def int_hexagon_V6_vdmpyhisat_128B :
+Hexagon_v32i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhisat_128B">;
+
+def int_hexagon_V6_vdmpyhvsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vdmpyhvsat">;
+
+def int_hexagon_V6_vdmpyhvsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vdmpyhvsat_128B">;
+
+def int_hexagon_V6_vaddw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddw">;
+
+def int_hexagon_V6_vaddw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddw_128B">;
+
+def int_hexagon_V6_vzh :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vzh">;
+
+def int_hexagon_V6_vzh_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vzh_128B">;
+
+def int_hexagon_V6_vaddh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddh">;
+
+def int_hexagon_V6_vaddh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxub,VI_ftype_VIVI,2)
-// tag : V6_vmaxub
def int_hexagon_V6_vmaxub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxub">;
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxub_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxub_128B
def int_hexagon_V6_vmaxub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxub_128B">;
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxub_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminub,VI_ftype_VIVI,2)
-// tag : V6_vminub
-def int_hexagon_V6_vminub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminub">;
+def int_hexagon_V6_vmpyhv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhv_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminub_128B,VI_ftype_VIVI,2)
-// tag : V6_vminub_128B
-def int_hexagon_V6_vminub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminub_128B">;
+def int_hexagon_V6_vmpyhv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhv_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxuh,VI_ftype_VIVI,2)
-// tag : V6_vmaxuh
-def int_hexagon_V6_vmaxuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxuh">;
+def int_hexagon_V6_vadduhsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadduhsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxuh_128B
-def int_hexagon_V6_vmaxuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxuh_128B">;
+def int_hexagon_V6_vadduhsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduhsat_128B">;
+
+def int_hexagon_V6_vshufoeh :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufoeh">;
+
+def int_hexagon_V6_vshufoeh_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshufoeh_128B">;
+
+def int_hexagon_V6_vmpyuhv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyuhv_acc">;
+
+def int_hexagon_V6_vmpyuhv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">;
+
+def int_hexagon_V6_veqh :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh">;
+
+def int_hexagon_V6_veqh_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_128B">;
+
+def int_hexagon_V6_vmpabuuv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpabuuv">;
+
+def int_hexagon_V6_vmpabuuv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vmpabuuv_128B">;
+
+def int_hexagon_V6_vasrwhsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwhsat">;
+
+def int_hexagon_V6_vasrwhsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwhsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminuh,VI_ftype_VIVI,2)
-// tag : V6_vminuh
def int_hexagon_V6_vminuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminuh">;
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminuh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vminuh_128B
def int_hexagon_V6_vminuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminuh_128B">;
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminuh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxh,VI_ftype_VIVI,2)
-// tag : V6_vmaxh
-def int_hexagon_V6_vmaxh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxh">;
+def int_hexagon_V6_vror :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vror">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxh_128B
-def int_hexagon_V6_vmaxh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxh_128B">;
+def int_hexagon_V6_vror_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vror_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminh,VI_ftype_VIVI,2)
-// tag : V6_vminh
-def int_hexagon_V6_vminh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminh">;
+def int_hexagon_V6_vmpyowh_rnd_sacc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminh_128B,VI_ftype_VIVI,2)
-// tag : V6_vminh_128B
-def int_hexagon_V6_vminh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminh_128B">;
+def int_hexagon_V6_vmpyowh_rnd_sacc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxw,VI_ftype_VIVI,2)
-// tag : V6_vmaxw
-def int_hexagon_V6_vmaxw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxw">;
+def int_hexagon_V6_vmaxuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxuh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxw_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxw_128B
-def int_hexagon_V6_vmaxw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxw_128B">;
+def int_hexagon_V6_vmaxuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxuh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminw,VI_ftype_VIVI,2)
-// tag : V6_vminw
-def int_hexagon_V6_vminw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminw">;
+def int_hexagon_V6_vabsh_sat :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsh_sat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminw_128B,VI_ftype_VIVI,2)
-// tag : V6_vminw_128B
-def int_hexagon_V6_vminw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminw_128B">;
+def int_hexagon_V6_vabsh_sat_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsh_sat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsathub,VI_ftype_VIVI,2)
-// tag : V6_vsathub
-def int_hexagon_V6_vsathub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsathub">;
+def int_hexagon_V6_pred_or_n :
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_or_n">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsathub_128B,VI_ftype_VIVI,2)
-// tag : V6_vsathub_128B
-def int_hexagon_V6_vsathub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsathub_128B">;
+def int_hexagon_V6_pred_or_n_128B :
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_or_n_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsatwh,VI_ftype_VIVI,2)
-// tag : V6_vsatwh
-def int_hexagon_V6_vsatwh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsatwh">;
+def int_hexagon_V6_vdealb :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vdealb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsatwh_128B,VI_ftype_VIVI,2)
-// tag : V6_vsatwh_128B
-def int_hexagon_V6_vsatwh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsatwh_128B">;
+def int_hexagon_V6_vdealb_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vdealb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffeb,VI_ftype_VIVI,2)
-// tag : V6_vshuffeb
-def int_hexagon_V6_vshuffeb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffeb">;
+def int_hexagon_V6_vmpybusv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpybusv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffeb_128B,VI_ftype_VIVI,2)
-// tag : V6_vshuffeb_128B
-def int_hexagon_V6_vshuffeb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffeb_128B">;
+def int_hexagon_V6_vmpybusv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybusv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffob,VI_ftype_VIVI,2)
-// tag : V6_vshuffob
-def int_hexagon_V6_vshuffob :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffob">;
+def int_hexagon_V6_vzb :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vzb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffob_128B,VI_ftype_VIVI,2)
-// tag : V6_vshuffob_128B
-def int_hexagon_V6_vshuffob_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffob_128B">;
+def int_hexagon_V6_vzb_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vzb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufeh,VI_ftype_VIVI,2)
-// tag : V6_vshufeh
-def int_hexagon_V6_vshufeh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufeh">;
+def int_hexagon_V6_vdmpybus_dv :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_dv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufeh_128B,VI_ftype_VIVI,2)
-// tag : V6_vshufeh_128B
-def int_hexagon_V6_vshufeh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufeh_128B">;
+def int_hexagon_V6_vdmpybus_dv_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_dv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoh,VI_ftype_VIVI,2)
-// tag : V6_vshufoh
-def int_hexagon_V6_vshufoh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufoh">;
+def int_hexagon_V6_vaddbq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoh_128B,VI_ftype_VIVI,2)
-// tag : V6_vshufoh_128B
-def int_hexagon_V6_vshufoh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufoh_128B">;
+def int_hexagon_V6_vaddbq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbq_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffvdd,VD_ftype_VIVISI,3)
-// tag : V6_vshuffvdd
-def int_hexagon_V6_vshuffvdd :
-Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vshuffvdd">;
+def int_hexagon_V6_vaddb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffvdd_128B,VD_ftype_VIVISI,3)
-// tag : V6_vshuffvdd_128B
-def int_hexagon_V6_vshuffvdd_128B :
-Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vshuffvdd_128B">;
+def int_hexagon_V6_vaddb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealvdd,VD_ftype_VIVISI,3)
-// tag : V6_vdealvdd
-def int_hexagon_V6_vdealvdd :
-Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vdealvdd">;
+def int_hexagon_V6_vaddwq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealvdd_128B,VD_ftype_VIVISI,3)
-// tag : V6_vdealvdd_128B
-def int_hexagon_V6_vdealvdd_128B :
-Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vdealvdd_128B">;
+def int_hexagon_V6_vaddwq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwq_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoeh,VD_ftype_VIVI,2)
-// tag : V6_vshufoeh
-def int_hexagon_V6_vshufoeh :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeh">;
+def int_hexagon_V6_vasrhubrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhubrndsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoeh_128B,VD_ftype_VIVI,2)
-// tag : V6_vshufoeh_128B
-def int_hexagon_V6_vshufoeh_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeh_128B">;
+def int_hexagon_V6_vasrhubrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrhubrndsat_128B">;
+
+def int_hexagon_V6_vasrhubsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhubsat">;
+
+def int_hexagon_V6_vasrhubsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrhubsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoeb,VD_ftype_VIVI,2)
-// tag : V6_vshufoeb
def int_hexagon_V6_vshufoeb :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeb">;
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufoeb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoeb_128B,VD_ftype_VIVI,2)
-// tag : V6_vshufoeb_128B
def int_hexagon_V6_vshufoeb_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeb_128B">;
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshufoeb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealh,VI_ftype_VI,1)
-// tag : V6_vdealh
-def int_hexagon_V6_vdealh :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealh">;
+def int_hexagon_V6_vpackhub_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackhub_sat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealh_128B,VI_ftype_VI,1)
-// tag : V6_vdealh_128B
-def int_hexagon_V6_vdealh_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealh_128B">;
+def int_hexagon_V6_vpackhub_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackhub_sat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealb,VI_ftype_VI,1)
-// tag : V6_vdealb
-def int_hexagon_V6_vdealb :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealb">;
+def int_hexagon_V6_vmpyiwh_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwh_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealb_128B,VI_ftype_VI,1)
-// tag : V6_vdealb_128B
-def int_hexagon_V6_vdealb_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealb_128B">;
+def int_hexagon_V6_vmpyiwh_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwh_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealb4w,VI_ftype_VIVI,2)
-// tag : V6_vdealb4w
-def int_hexagon_V6_vdealb4w :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdealb4w">;
+def int_hexagon_V6_vtmpyb :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealb4w_128B,VI_ftype_VIVI,2)
-// tag : V6_vdealb4w_128B
-def int_hexagon_V6_vdealb4w_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdealb4w_128B">;
+def int_hexagon_V6_vtmpyb_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vtmpyb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffh,VI_ftype_VI,1)
-// tag : V6_vshuffh
-def int_hexagon_V6_vshuffh :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffh">;
+def int_hexagon_V6_vmpabusv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpabusv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffh_128B,VI_ftype_VI,1)
-// tag : V6_vshuffh_128B
-def int_hexagon_V6_vshuffh_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffh_128B">;
+def int_hexagon_V6_vmpabusv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vmpabusv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffb,VI_ftype_VI,1)
-// tag : V6_vshuffb
-def int_hexagon_V6_vshuffb :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffb">;
+def int_hexagon_V6_pred_and :
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffb_128B,VI_ftype_VI,1)
-// tag : V6_vshuffb_128B
-def int_hexagon_V6_vshuffb_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffb_128B">;
+def int_hexagon_V6_pred_and_128B :
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_and_128B">;
+
+def int_hexagon_V6_vsubwnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwnq">;
+
+def int_hexagon_V6_vsubwnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwnq_128B">;
+
+def int_hexagon_V6_vpackwuh_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackwuh_sat">;
+
+def int_hexagon_V6_vpackwuh_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackwuh_sat_128B">;
+
+def int_hexagon_V6_vswap :
+Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vswap">;
+
+def int_hexagon_V6_vswap_128B :
+Hexagon_v64i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vswap_128B">;
+
+def int_hexagon_V6_vrmpyubv_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpyubv_acc">;
+
+def int_hexagon_V6_vrmpyubv_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpyubv_acc_128B">;
+
+def int_hexagon_V6_vgtb_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_and">;
+
+def int_hexagon_V6_vgtb_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_and_128B">;
+
+def int_hexagon_V6_vaslw :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vaslw">;
+
+def int_hexagon_V6_vaslw_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vaslw_128B">;
+
+def int_hexagon_V6_vpackhb_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackhb_sat">;
+
+def int_hexagon_V6_vpackhb_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackhb_sat_128B">;
+
+def int_hexagon_V6_vmpyih_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyih_acc">;
+
+def int_hexagon_V6_vmpyih_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyih_acc_128B">;
+
+def int_hexagon_V6_vshuffvdd :
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vshuffvdd">;
+
+def int_hexagon_V6_vshuffvdd_128B :
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vshuffvdd_128B">;
+
+def int_hexagon_V6_vaddb_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddb_dv">;
+
+def int_hexagon_V6_vaddb_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">;
+
+def int_hexagon_V6_vunpackub :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vunpackub">;
+
+def int_hexagon_V6_vunpackub_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vunpackub_128B">;
+
+def int_hexagon_V6_vgtuw :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw">;
+
+def int_hexagon_V6_vgtuw_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_128B">;
+
+def int_hexagon_V6_vlutvwh :
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh">;
+
+def int_hexagon_V6_vlutvwh_128B :
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_128B">;
+
+def int_hexagon_V6_vgtub :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub">;
+
+def int_hexagon_V6_vgtub_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_128B">;
+
+def int_hexagon_V6_vmpyowh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh">;
+
+def int_hexagon_V6_vmpyowh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_128B">;
+
+def int_hexagon_V6_vmpyieoh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyieoh">;
+
+def int_hexagon_V6_vmpyieoh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyieoh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_extractw,SI_ftype_VISI,2)
-// tag : V6_extractw
def int_hexagon_V6_extractw :
-Hexagon_iv512i_Intrinsic<"HEXAGON_V6_extractw">;
+Hexagon_i32_v16i32i32_Intrinsic<"HEXAGON_V6_extractw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_extractw_128B,SI_ftype_VISI,2)
-// tag : V6_extractw_128B
def int_hexagon_V6_extractw_128B :
-Hexagon_iv1024i_Intrinsic<"HEXAGON_V6_extractw_128B">;
+Hexagon_i32_v32i32i32_Intrinsic<"HEXAGON_V6_extractw_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vinsertwr,VI_ftype_VISI,2)
-// tag : V6_vinsertwr
-def int_hexagon_V6_vinsertwr :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vinsertwr">;
+def int_hexagon_V6_vavgwrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgwrnd">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vinsertwr_128B,VI_ftype_VISI,2)
-// tag : V6_vinsertwr_128B
-def int_hexagon_V6_vinsertwr_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vinsertwr_128B">;
+def int_hexagon_V6_vavgwrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgwrnd_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplatw,VI_ftype_SI,1)
-// tag : V6_lvsplatw
-def int_hexagon_V6_lvsplatw :
-Hexagon_v512i_Intrinsic<"HEXAGON_V6_lvsplatw">;
+def int_hexagon_V6_vdmpyhsat_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplatw_128B,VI_ftype_SI,1)
-// tag : V6_lvsplatw_128B
-def int_hexagon_V6_lvsplatw_128B :
-Hexagon_v1024i_Intrinsic<"HEXAGON_V6_lvsplatw_128B">;
+def int_hexagon_V6_vdmpyhsat_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vassign,VI_ftype_VI,1)
-// tag : V6_vassign
-def int_hexagon_V6_vassign :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vassign">;
+def int_hexagon_V6_vgtub_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vassign_128B,VI_ftype_VI,1)
-// tag : V6_vassign_128B
-def int_hexagon_V6_vassign_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vassign_128B">;
+def int_hexagon_V6_vgtub_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">;
+
+def int_hexagon_V6_vmpyub :
+Hexagon_v32i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyub">;
+
+def int_hexagon_V6_vmpyub_128B :
+Hexagon_v64i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyub_128B">;
+
+def int_hexagon_V6_vmpyuh :
+Hexagon_v32i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyuh">;
+
+def int_hexagon_V6_vmpyuh_128B :
+Hexagon_v64i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyuh_128B">;
+
+def int_hexagon_V6_vunpackob :
+Hexagon_v32i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vunpackob">;
+
+def int_hexagon_V6_vunpackob_128B :
+Hexagon_v64i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vunpackob_128B">;
+
+def int_hexagon_V6_vmpahb :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpahb">;
+
+def int_hexagon_V6_vmpahb_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vmpahb_128B">;
+
+def int_hexagon_V6_veqw_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_or">;
+
+def int_hexagon_V6_veqw_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_or_128B">;
+
+def int_hexagon_V6_vandqrt :
+Hexagon_v16i32_v512i1i32_Intrinsic<"HEXAGON_V6_vandqrt">;
+
+def int_hexagon_V6_vandqrt_128B :
+Hexagon_v32i32_v1024i1i32_Intrinsic<"HEXAGON_V6_vandqrt_128B">;
+
+def int_hexagon_V6_vxor :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vxor">;
+
+def int_hexagon_V6_vxor_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vxor_128B">;
+
+def int_hexagon_V6_vasrwhrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwhrndsat">;
+
+def int_hexagon_V6_vasrwhrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwhrndsat_128B">;
+
+def int_hexagon_V6_vmpyhsat_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyhsat_acc">;
+
+def int_hexagon_V6_vmpyhsat_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyhsat_acc_128B">;
+
+def int_hexagon_V6_vrmpybus_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vrmpybus_acc">;
+
+def int_hexagon_V6_vrmpybus_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vrmpybus_acc_128B">;
+
+def int_hexagon_V6_vsubhw :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhw">;
+
+def int_hexagon_V6_vsubhw_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhw_128B">;
+
+def int_hexagon_V6_vdealb4w :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vdealb4w">;
+
+def int_hexagon_V6_vdealb4w_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vdealb4w_128B">;
+
+def int_hexagon_V6_vmpyowh_sacc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh_sacc">;
+
+def int_hexagon_V6_vmpyowh_sacc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_sacc_128B">;
+
+def int_hexagon_V6_vmpybv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpybv">;
+
+def int_hexagon_V6_vmpybv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybv_128B">;
+
+def int_hexagon_V6_vabsdiffh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffh">;
+
+def int_hexagon_V6_vabsdiffh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vabsdiffh_128B">;
+
+def int_hexagon_V6_vshuffob :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshuffob">;
+
+def int_hexagon_V6_vshuffob_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshuffob_128B">;
+
+def int_hexagon_V6_vmpyub_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyub_acc">;
+
+def int_hexagon_V6_vmpyub_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyub_acc_128B">;
+
+def int_hexagon_V6_vnormamtw :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vnormamtw">;
+
+def int_hexagon_V6_vnormamtw_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vnormamtw_128B">;
+
+def int_hexagon_V6_vunpackuh :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vunpackuh">;
+
+def int_hexagon_V6_vunpackuh_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vunpackuh_128B">;
+
+def int_hexagon_V6_vgtuh_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_or">;
+
+def int_hexagon_V6_vgtuh_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">;
+
+def int_hexagon_V6_vmpyiewuh_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc">;
+
+def int_hexagon_V6_vmpyiewuh_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc_128B">;
+
+def int_hexagon_V6_vunpackoh :
+Hexagon_v32i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vunpackoh">;
+
+def int_hexagon_V6_vunpackoh_128B :
+Hexagon_v64i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vunpackoh_128B">;
+
+def int_hexagon_V6_vdmpyhsat :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsat">;
+
+def int_hexagon_V6_vdmpyhsat_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsat_128B">;
+
+def int_hexagon_V6_vmpyubv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyubv">;
+
+def int_hexagon_V6_vmpyubv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyubv_128B">;
+
+def int_hexagon_V6_vmpyhss :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyhss">;
+
+def int_hexagon_V6_vmpyhss_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyhss_128B">;
+
+def int_hexagon_V6_hi :
+Hexagon_v16i32_v32i32_Intrinsic<"HEXAGON_V6_hi">;
+
+def int_hexagon_V6_hi_128B :
+Hexagon_v32i32_v64i32_Intrinsic<"HEXAGON_V6_hi_128B">;
+
+def int_hexagon_V6_vasrwuhsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwuhsat">;
+
+def int_hexagon_V6_vasrwuhsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwuhsat_128B">;
+
+def int_hexagon_V6_veqw :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw">;
+
+def int_hexagon_V6_veqw_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_128B">;
+
+def int_hexagon_V6_vdsaduh :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdsaduh">;
+
+def int_hexagon_V6_vdsaduh_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_128B">;
+
+def int_hexagon_V6_vsubw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubw">;
+
+def int_hexagon_V6_vsubw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubw_128B">;
+
+def int_hexagon_V6_vsubw_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubw_dv">;
+
+def int_hexagon_V6_vsubw_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">;
+
+def int_hexagon_V6_veqb_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_and">;
+
+def int_hexagon_V6_veqb_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_and_128B">;
+
+def int_hexagon_V6_vmpyih :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyih">;
+
+def int_hexagon_V6_vmpyih_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyih_128B">;
+
+def int_hexagon_V6_vtmpyb_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyb_acc">;
+
+def int_hexagon_V6_vtmpyb_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vtmpyb_acc_128B">;
+
+def int_hexagon_V6_vrmpybus :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vrmpybus">;
+
+def int_hexagon_V6_vrmpybus_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vrmpybus_128B">;
+
+def int_hexagon_V6_vmpybus_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpybus_acc">;
+
+def int_hexagon_V6_vmpybus_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpybus_acc_128B">;
+
+def int_hexagon_V6_vgth_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_xor">;
+
+def int_hexagon_V6_vgth_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_xor_128B">;
+
+def int_hexagon_V6_vsubhsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhsat">;
+
+def int_hexagon_V6_vsubhsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhsat_128B">;
+
+def int_hexagon_V6_vrmpyubi_acc :
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">;
+
+def int_hexagon_V6_vrmpyubi_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">;
+
+def int_hexagon_V6_vabsw :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsw">;
+
+def int_hexagon_V6_vabsw_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsw_128B">;
+
+def int_hexagon_V6_vaddwsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwsat_dv">;
+
+def int_hexagon_V6_vaddwsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddwsat_dv_128B">;
+
+def int_hexagon_V6_vlsrw :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vlsrw">;
+
+def int_hexagon_V6_vlsrw_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vlsrw_128B">;
+
+def int_hexagon_V6_vabsh :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsh">;
+
+def int_hexagon_V6_vabsh_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsh_128B">;
+
+def int_hexagon_V6_vlsrh :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vlsrh">;
+
+def int_hexagon_V6_vlsrh_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vlsrh_128B">;
+
+def int_hexagon_V6_valignb :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignb">;
+
+def int_hexagon_V6_valignb_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignb_128B">;
+
+def int_hexagon_V6_vsubhq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhq">;
+
+def int_hexagon_V6_vsubhq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhq_128B">;
+
+def int_hexagon_V6_vpackoh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackoh">;
+
+def int_hexagon_V6_vpackoh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackoh_128B">;
+
+def int_hexagon_V6_vdmpybus_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_acc">;
+
+def int_hexagon_V6_vdmpybus_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_acc_128B">;
+
+def int_hexagon_V6_vdmpyhvsat_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc">;
+
+def int_hexagon_V6_vdmpyhvsat_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc_128B">;
+
+def int_hexagon_V6_vrmpybv_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybv_acc">;
+
+def int_hexagon_V6_vrmpybv_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybv_acc_128B">;
+
+def int_hexagon_V6_vaddhsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhsat">;
+
+def int_hexagon_V6_vaddhsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vcombine,VD_ftype_VIVI,2)
-// tag : V6_vcombine
def int_hexagon_V6_vcombine :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vcombine">;
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcombine">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vcombine_128B,VD_ftype_VIVI,2)
-// tag : V6_vcombine_128B
def int_hexagon_V6_vcombine_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vcombine_128B">;
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcombine_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdelta,VI_ftype_VIVI,2)
-// tag : V6_vdelta
-def int_hexagon_V6_vdelta :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdelta">;
+def int_hexagon_V6_vandqrt_acc :
+Hexagon_v16i32_v16i32v512i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdelta_128B,VI_ftype_VIVI,2)
-// tag : V6_vdelta_128B
-def int_hexagon_V6_vdelta_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdelta_128B">;
+def int_hexagon_V6_vandqrt_acc_128B :
+Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrdelta,VI_ftype_VIVI,2)
-// tag : V6_vrdelta
-def int_hexagon_V6_vrdelta :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrdelta">;
+def int_hexagon_V6_vaslhv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaslhv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrdelta_128B,VI_ftype_VIVI,2)
-// tag : V6_vrdelta_128B
-def int_hexagon_V6_vrdelta_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrdelta_128B">;
+def int_hexagon_V6_vaslhv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaslhv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vcl0w,VI_ftype_VI,1)
-// tag : V6_vcl0w
-def int_hexagon_V6_vcl0w :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0w">;
+def int_hexagon_V6_vinsertwr :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vinsertwr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vcl0w_128B,VI_ftype_VI,1)
-// tag : V6_vcl0w_128B
-def int_hexagon_V6_vcl0w_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0w_128B">;
+def int_hexagon_V6_vinsertwr_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vinsertwr_128B">;
+
+def int_hexagon_V6_vsubh_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubh_dv">;
+
+def int_hexagon_V6_vsubh_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">;
+
+def int_hexagon_V6_vshuffb :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vshuffb">;
+
+def int_hexagon_V6_vshuffb_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vshuffb_128B">;
+
+def int_hexagon_V6_vand :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vand">;
+
+def int_hexagon_V6_vand_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vand_128B">;
+
+def int_hexagon_V6_vmpyhv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhv">;
+
+def int_hexagon_V6_vmpyhv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhv_128B">;
+
+def int_hexagon_V6_vdmpyhsuisat_acc :
+Hexagon_v16i32_v16i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc">;
+
+def int_hexagon_V6_vdmpyhsuisat_acc_128B :
+Hexagon_v32i32_v32i32v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc_128B">;
+
+def int_hexagon_V6_vsububsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsububsat_dv">;
+
+def int_hexagon_V6_vsububsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsububsat_dv_128B">;
+
+def int_hexagon_V6_vgtb_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_xor">;
+
+def int_hexagon_V6_vgtb_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">;
+
+def int_hexagon_V6_vdsaduh_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_acc">;
+
+def int_hexagon_V6_vdsaduh_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">;
+
+def int_hexagon_V6_vrmpyub :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vrmpyub">;
+
+def int_hexagon_V6_vrmpyub_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vrmpyub_128B">;
+
+def int_hexagon_V6_vmpyuh_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyuh_acc">;
+
+def int_hexagon_V6_vmpyuh_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyuh_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vcl0h,VI_ftype_VI,1)
-// tag : V6_vcl0h
def int_hexagon_V6_vcl0h :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0h">;
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcl0h">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vcl0h_128B,VI_ftype_VI,1)
-// tag : V6_vcl0h_128B
def int_hexagon_V6_vcl0h_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0h_128B">;
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcl0h_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnormamtw,VI_ftype_VI,1)
-// tag : V6_vnormamtw
-def int_hexagon_V6_vnormamtw :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamtw">;
+def int_hexagon_V6_vmpyhus_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhus_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnormamtw_128B,VI_ftype_VI,1)
-// tag : V6_vnormamtw_128B
-def int_hexagon_V6_vnormamtw_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamtw_128B">;
+def int_hexagon_V6_vmpyhus_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhus_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnormamth,VI_ftype_VI,1)
-// tag : V6_vnormamth
-def int_hexagon_V6_vnormamth :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamth">;
+def int_hexagon_V6_vmpybv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpybv_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnormamth_128B,VI_ftype_VI,1)
-// tag : V6_vnormamth_128B
-def int_hexagon_V6_vnormamth_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamth_128B">;
+def int_hexagon_V6_vmpybv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">;
+
+def int_hexagon_V6_vrsadubi :
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi">;
+
+def int_hexagon_V6_vrsadubi_128B :
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_128B">;
+
+def int_hexagon_V6_vdmpyhb_dv_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">;
+
+def int_hexagon_V6_vdmpyhb_dv_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc_128B">;
+
+def int_hexagon_V6_vshufeh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufeh">;
+
+def int_hexagon_V6_vshufeh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshufeh_128B">;
+
+def int_hexagon_V6_vmpyewuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyewuh">;
+
+def int_hexagon_V6_vmpyewuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyewuh_128B">;
+
+def int_hexagon_V6_vmpyhsrs :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyhsrs">;
+
+def int_hexagon_V6_vmpyhsrs_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyhsrs_128B">;
+
+def int_hexagon_V6_vdmpybus_dv_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc">;
+
+def int_hexagon_V6_vdmpybus_dv_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc_128B">;
+
+def int_hexagon_V6_vaddubh :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddubh">;
+
+def int_hexagon_V6_vaddubh_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddubh_128B">;
+
+def int_hexagon_V6_vasrwh :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwh">;
+
+def int_hexagon_V6_vasrwh_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwh_128B">;
+
+def int_hexagon_V6_ld0 :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_ld0">;
+
+def int_hexagon_V6_ld0_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_ld0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpopcounth,VI_ftype_VI,1)
-// tag : V6_vpopcounth
def int_hexagon_V6_vpopcounth :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vpopcounth">;
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vpopcounth">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vpopcounth_128B,VI_ftype_VI,1)
-// tag : V6_vpopcounth_128B
def int_hexagon_V6_vpopcounth_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vpopcounth_128B">;
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vpopcounth_128B">;
+
+def int_hexagon_V6_ldnt0 :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_ldnt0">;
+
+def int_hexagon_V6_ldnt0_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_ldnt0_128B">;
+
+def int_hexagon_V6_vgth_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_and">;
+
+def int_hexagon_V6_vgth_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_and_128B">;
+
+def int_hexagon_V6_vaddubsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddubsat_dv">;
+
+def int_hexagon_V6_vaddubsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddubsat_dv_128B">;
+
+def int_hexagon_V6_vpackeh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackeh">;
+
+def int_hexagon_V6_vpackeh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackeh_128B">;
+
+def int_hexagon_V6_vmpyh :
+Hexagon_v32i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyh">;
+
+def int_hexagon_V6_vmpyh_128B :
+Hexagon_v64i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyh_128B">;
+
+def int_hexagon_V6_vminh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminh">;
+
+def int_hexagon_V6_vminh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminh_128B">;
+
+def int_hexagon_V6_pred_scalar2 :
+Hexagon_v512i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2">;
+
+def int_hexagon_V6_pred_scalar2_128B :
+Hexagon_v1024i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">;
+
+def int_hexagon_V6_vdealh :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vdealh">;
+
+def int_hexagon_V6_vdealh_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vdealh_128B">;
+
+def int_hexagon_V6_vpackwh_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackwh_sat">;
+
+def int_hexagon_V6_vpackwh_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackwh_sat_128B">;
+
+def int_hexagon_V6_vaslh :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vaslh">;
+
+def int_hexagon_V6_vaslh_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vaslh_128B">;
+
+def int_hexagon_V6_vgtuw_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_and">;
+
+def int_hexagon_V6_vgtuw_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">;
+
+def int_hexagon_V6_vor :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vor">;
+
+def int_hexagon_V6_vor_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vor_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvb
def int_hexagon_V6_vlutvvb :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb">;
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvb_128B
def int_hexagon_V6_vlutvvb_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_128B">;
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_128B">;
+
+def int_hexagon_V6_vmpyiowh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyiowh">;
+
+def int_hexagon_V6_vmpyiowh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyiowh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc,VI_ftype_VIVIVISI,4)
-// tag : V6_vlutvvb_oracc
def int_hexagon_V6_vlutvvb_oracc :
-Hexagon_v512v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc">;
+Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc_128B,VI_ftype_VIVIVISI,4)
-// tag : V6_vlutvvb_oracc_128B
def int_hexagon_V6_vlutvvb_oracc_128B :
-Hexagon_v1024v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc_128B">;
+Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwh
-def int_hexagon_V6_vlutvwh :
-Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh">;
+def int_hexagon_V6_vandvrt :
+Hexagon_v512i1_v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_128B,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwh_128B
-def int_hexagon_V6_vlutvwh_128B :
-Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_128B">;
+def int_hexagon_V6_vandvrt_128B :
+Hexagon_v1024i1_v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc,VD_ftype_VDVIVISI,4)
-// tag : V6_vlutvwh_oracc
-def int_hexagon_V6_vlutvwh_oracc :
-Hexagon_v1024v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc">;
+def int_hexagon_V6_veqh_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc_128B,VD_ftype_VDVIVISI,4)
-// tag : V6_vlutvwh_oracc_128B
-def int_hexagon_V6_vlutvwh_oracc_128B :
-Hexagon_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">;
+def int_hexagon_V6_veqh_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_xor_128B">;
-//
-// Masked vector stores
-//
-def int_hexagon_V6_vS32b_qpred_ai :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai">;
+def int_hexagon_V6_vadduhw :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadduhw">;
-def int_hexagon_V6_vS32b_nqpred_ai :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai">;
+def int_hexagon_V6_vadduhw_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduhw_128B">;
-def int_hexagon_V6_vS32b_nt_qpred_ai :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai">;
+def int_hexagon_V6_vcl0w :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcl0w">;
-def int_hexagon_V6_vS32b_nt_nqpred_ai :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai">;
+def int_hexagon_V6_vcl0w_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcl0w_128B">;
-def int_hexagon_V6_vS32b_qpred_ai_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B">;
+def int_hexagon_V6_vmpyihb :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyihb">;
-def int_hexagon_V6_vS32b_nqpred_ai_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B">;
+def int_hexagon_V6_vmpyihb_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyihb_128B">;
-def int_hexagon_V6_vS32b_nt_qpred_ai_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B">;
+def int_hexagon_V6_vtmpybus :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vtmpybus">;
-def int_hexagon_V6_vS32b_nt_nqpred_ai_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B">;
+def int_hexagon_V6_vtmpybus_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vtmpybus_128B">;
-def int_hexagon_V6_vmaskedstoreq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">;
+def int_hexagon_V6_vd0 :
+Hexagon_v16i32__Intrinsic<"HEXAGON_V6_vd0">;
-def int_hexagon_V6_vmaskedstorenq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorenq">;
+def int_hexagon_V6_vd0_128B :
+Hexagon_v32i32__Intrinsic<"HEXAGON_V6_vd0_128B">;
-def int_hexagon_V6_vmaskedstorentq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentq">;
+def int_hexagon_V6_veqh_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_or">;
-def int_hexagon_V6_vmaskedstorentnq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentnq">;
+def int_hexagon_V6_veqh_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_or_128B">;
-def int_hexagon_V6_vmaskedstoreq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstoreq_128B">;
+def int_hexagon_V6_vgtw_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_or">;
-def int_hexagon_V6_vmaskedstorenq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorenq_128B">;
+def int_hexagon_V6_vgtw_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_or_128B">;
-def int_hexagon_V6_vmaskedstorentq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentq_128B">;
+def int_hexagon_V6_vdmpybus :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vdmpybus">;
-def int_hexagon_V6_vmaskedstorentnq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentnq_128B">;
+def int_hexagon_V6_vdmpybus_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_128B">;
-multiclass Hexagon_custom_circ_ld_Intrinsic<LLVMType ElTy> {
- def NAME#_pci : Hexagon_NonGCC_Intrinsic<
- [ElTy, llvm_ptr_ty],
- [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty],
- [IntrArgMemOnly, NoCapture<3>]>;
- def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
- [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty],
- [IntrArgMemOnly, NoCapture<2>]>;
-}
+def int_hexagon_V6_vgtub_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_or">;
-defm int_hexagon_L2_loadrub : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadrb : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadruh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadrh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadri : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadrd : Hexagon_custom_circ_ld_Intrinsic<llvm_i64_ty>;
+def int_hexagon_V6_vgtub_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_or_128B">;
-multiclass Hexagon_custom_circ_st_Intrinsic<LLVMType ElTy> {
- def NAME#_pci : Hexagon_NonGCC_Intrinsic<
- [llvm_ptr_ty],
- [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
- [IntrArgMemOnly, NoCapture<4>]>;
- def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
- [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
- [IntrArgMemOnly, NoCapture<3>]>;
-}
+def int_hexagon_V6_vmpybus :
+Hexagon_v32i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpybus">;
-defm int_hexagon_S2_storerb : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_S2_storerh : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_S2_storerf : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_S2_storeri : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_S2_storerd : Hexagon_custom_circ_st_Intrinsic<llvm_i64_ty>;
+def int_hexagon_V6_vmpybus_128B :
+Hexagon_v64i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpybus_128B">;
-// The front-end emits the intrinsic call with only two arguments. The third
-// argument from the builtin is already used by front-end to write to memory
-// by generating a store.
-class Hexagon_custom_brev_ld_Intrinsic<LLVMType ElTy>
- : Hexagon_NonGCC_Intrinsic<
- [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem]>;
+def int_hexagon_V6_vdmpyhb_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_acc">;
-def int_hexagon_L2_loadrub_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadrb_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadruh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadrh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadri_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadrd_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i64_ty>;
+def int_hexagon_V6_vdmpyhb_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_acc_128B">;
-def int_hexagon_S2_storerb_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stb">;
-def int_hexagon_S2_storerh_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sth">;
-def int_hexagon_S2_storerf_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sthhi">;
-def int_hexagon_S2_storeri_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stw">;
-def int_hexagon_S2_storerd_pbr : Hexagon_mem_memdisi_Intrinsic<"brev_std">;
+def int_hexagon_V6_vandvrt_acc :
+Hexagon_v512i1_v512i1v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc">;
+def int_hexagon_V6_vandvrt_acc_128B :
+Hexagon_v1024i1_v1024i1v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">;
-///
-/// HexagonV62 intrinsics
-///
+def int_hexagon_V6_vassign :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vassign">;
-//
-// Hexagon_LLiLLiLLi_Intrinsic<string GCCIntSuffix>
-// tag : M6_vabsdiffb
-class Hexagon_LLiLLiLLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vassign_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vassign_128B">;
-//
-// Hexagon_LLii_Intrinsic<string GCCIntSuffix>
-// tag : S6_vsplatrbp
-class Hexagon_LLii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i64_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddwnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwnq">;
-//
-// Hexagon_V62_v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlsrb
-class Hexagon_V62_v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddwnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwnq_128B">;
-//
-// Hexagon_V62_v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlsrb_128B
-class Hexagon_V62_v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vgtub_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_and">;
-//
-// Hexagon_V62_v512v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vasrwuhrndsat
-class Hexagon_V62_v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vgtub_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_and_128B">;
-//
-// Hexagon_V62_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vasrwuhrndsat_128B
-class Hexagon_V62_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vdmpyhb_dv :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv">;
-//
-// Hexagon_V62_v512v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrounduwuh
-class Hexagon_V62_v512v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vdmpyhb_dv_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_128B">;
-//
-// Hexagon_V62_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrounduwuh_128B
-class Hexagon_V62_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vunpackb :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vunpackb">;
-//
-// Hexagon_V62_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
-// tag : V6_vadduwsat_dv_128B
-class Hexagon_V62_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vunpackb_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vunpackb_128B">;
-//
-// Hexagon_V62_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddhw_acc
-class Hexagon_V62_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vunpackh :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vunpackh">;
-//
-// Hexagon_V62_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddhw_acc_128B
-class Hexagon_V62_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vunpackh_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vunpackh_128B">;
-//
-// Hexagon_V62_v1024v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyewuh_64
-class Hexagon_V62_v1024v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmpahb_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpahb_acc">;
-//
-// Hexagon_V62_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyewuh_64_128B
-class Hexagon_V62_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmpahb_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vmpahb_acc_128B">;
-//
-// Hexagon_V62_v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpauhb_128B
-class Hexagon_V62_v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddbnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbnq">;
-//
-// Hexagon_V62_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpauhb_acc_128B
-class Hexagon_V62_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddbnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
-//
-// Hexagon_V62_v512v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandnqrt
-class Hexagon_V62_v512v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vlalignbi :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignbi">;
-//
-// Hexagon_V62_v1024v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandnqrt_128B
-class Hexagon_V62_v1024v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vlalignbi_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignbi_128B">;
-//
-// Hexagon_V62_v512v512v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandnqrt_acc
-class Hexagon_V62_v512v512v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vsatwh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatwh">;
-//
-// Hexagon_V62_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandnqrt_acc_128B
-class Hexagon_V62_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vsatwh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsatwh_128B">;
-//
-// Hexagon_V62_v512v64iv512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvqv
-class Hexagon_V62_v512v64iv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vgtuh :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh">;
-//
-// Hexagon_V62_v1024v128iv1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvqv_128B
-class Hexagon_V62_v1024v128iv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vgtuh_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_128B">;
-//
-// Hexagon_V62_v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_scalar2v2
-class Hexagon_V62_v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmpyihb_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyihb_acc">;
-//
-// Hexagon_V62_v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_scalar2v2_128B
-class Hexagon_V62_v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmpyihb_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyihb_acc_128B">;
-//
-// Hexagon_V62_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
-// tag : V6_shuffeqw
-class Hexagon_V62_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vrmpybusv_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybusv_acc">;
-//
-// Hexagon_V62_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
-// tag : V6_shuffeqw_128B
-class Hexagon_V62_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vrmpybusv_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybusv_acc_128B">;
-//
-// Hexagon_V62_v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_lvsplath
-class Hexagon_V62_v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vrdelta :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrdelta">;
-//
-// Hexagon_V62_v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_lvsplath_128B
-class Hexagon_V62_v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vrdelta_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrdelta_128B">;
-//
-// Hexagon_V62_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvvb_oracci
-class Hexagon_V62_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vroundwh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vroundwh">;
-//
-// Hexagon_V62_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvvb_oracci_128B
-class Hexagon_V62_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vroundwh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vroundwh_128B">;
-//
-// Hexagon_V62_v1024v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwhi
-class Hexagon_V62_v1024v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddw_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddw_dv">;
-//
-// Hexagon_V62_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwhi_128B
-class Hexagon_V62_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddw_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">;
-//
-// Hexagon_V62_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwh_oracci
-class Hexagon_V62_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmpyiwb_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwb_acc">;
-//
-// Hexagon_V62_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwh_oracci_128B
-class Hexagon_V62_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmpyiwb_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwb_acc_128B">;
-// Hexagon_v512v64iv512v512v64i_Intrinsic<string GCCIntSuffix>
-// tag: V6_vaddcarry
-class Hexagon_v512v64iv512v512v64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty, llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vsubbq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbq">;
-// Hexagon_v1024v128iv1024v1024v128i_Intrinsic<string GCCIntSuffix>
-// tag: V6_vaddcarry_128B
-class Hexagon_v1024v128iv1024v1024v128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty, llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vsubbq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbq_128B">;
+def int_hexagon_V6_veqh_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_and">;
-//
-// BUILTIN_INFO(HEXAGON.M6_vabsdiffb,DI_ftype_DIDI,2)
-// tag : M6_vabsdiffb
-def int_hexagon_M6_vabsdiffb :
-Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffb">;
+def int_hexagon_V6_veqh_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
-//
-// BUILTIN_INFO(HEXAGON.M6_vabsdiffub,DI_ftype_DIDI,2)
-// tag : M6_vabsdiffub
-def int_hexagon_M6_vabsdiffub :
-Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffub">;
+def int_hexagon_V6_valignbi :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignbi">;
-//
-// BUILTIN_INFO(HEXAGON.S6_vtrunehb_ppp,DI_ftype_DIDI,2)
-// tag : S6_vtrunehb_ppp
-def int_hexagon_S6_vtrunehb_ppp :
-Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunehb_ppp">;
+def int_hexagon_V6_valignbi_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignbi_128B">;
-//
-// BUILTIN_INFO(HEXAGON.S6_vtrunohb_ppp,DI_ftype_DIDI,2)
-// tag : S6_vtrunohb_ppp
-def int_hexagon_S6_vtrunohb_ppp :
-Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunohb_ppp">;
+def int_hexagon_V6_vaddwsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwsat">;
-//
-// BUILTIN_INFO(HEXAGON.S6_vsplatrbp,DI_ftype_SI,1)
-// tag : S6_vsplatrbp
-def int_hexagon_S6_vsplatrbp :
-Hexagon_LLii_Intrinsic<"HEXAGON_S6_vsplatrbp">;
+def int_hexagon_V6_vaddwsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrb,VI_ftype_VISI,2)
-// tag : V6_vlsrb
-def int_hexagon_V6_vlsrb :
-Hexagon_V62_v512v512i_Intrinsic<"HEXAGON_V6_vlsrb">;
+def int_hexagon_V6_veqw_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_and">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrb_128B,VI_ftype_VISI,2)
-// tag : V6_vlsrb_128B
-def int_hexagon_V6_vlsrb_128B :
-Hexagon_V62_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrb_128B">;
+def int_hexagon_V6_veqw_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_and_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwuhrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrwuhrndsat
-def int_hexagon_V6_vasrwuhrndsat :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwuhrndsat">;
+def int_hexagon_V6_vabsdiffub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwuhrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwuhrndsat_128B
-def int_hexagon_V6_vasrwuhrndsat_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwuhrndsat_128B">;
+def int_hexagon_V6_vabsdiffub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vabsdiffub_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruwuhrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasruwuhrndsat
-def int_hexagon_V6_vasruwuhrndsat :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruwuhrndsat">;
+def int_hexagon_V6_vshuffeb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshuffeb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruwuhrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasruwuhrndsat_128B
-def int_hexagon_V6_vasruwuhrndsat_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruwuhrndsat_128B">;
+def int_hexagon_V6_vshuffeb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshuffeb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhbsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrhbsat
-def int_hexagon_V6_vasrhbsat :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhbsat">;
+def int_hexagon_V6_vabsdiffuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffuh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhbsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrhbsat_128B
-def int_hexagon_V6_vasrhbsat_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhbsat_128B">;
+def int_hexagon_V6_vabsdiffuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vabsdiffuh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrounduwuh,VI_ftype_VIVI,2)
-// tag : V6_vrounduwuh
-def int_hexagon_V6_vrounduwuh :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vrounduwuh">;
+def int_hexagon_V6_veqw_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrounduwuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vrounduwuh_128B
-def int_hexagon_V6_vrounduwuh_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrounduwuh_128B">;
+def int_hexagon_V6_veqw_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_xor_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrounduhub,VI_ftype_VIVI,2)
-// tag : V6_vrounduhub
-def int_hexagon_V6_vrounduhub :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vrounduhub">;
+def int_hexagon_V6_vgth :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrounduhub_128B,VI_ftype_VIVI,2)
-// tag : V6_vrounduhub_128B
-def int_hexagon_V6_vrounduhub_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrounduhub_128B">;
+def int_hexagon_V6_vgth_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduwsat,VI_ftype_VIVI,2)
-// tag : V6_vadduwsat
-def int_hexagon_V6_vadduwsat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vadduwsat">;
+def int_hexagon_V6_vgtuw_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_xor">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduwsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vadduwsat_128B
-def int_hexagon_V6_vadduwsat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduwsat_128B">;
+def int_hexagon_V6_vgtuw_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduwsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vadduwsat_dv
-def int_hexagon_V6_vadduwsat_dv :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduwsat_dv">;
+def int_hexagon_V6_vgtb :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduwsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vadduwsat_dv_128B
-def int_hexagon_V6_vadduwsat_dv_128B :
-Hexagon_V62_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vadduwsat_dv_128B">;
+def int_hexagon_V6_vgtb_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuwsat,VI_ftype_VIVI,2)
-// tag : V6_vsubuwsat
-def int_hexagon_V6_vsubuwsat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vsubuwsat">;
+def int_hexagon_V6_vgtw :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuwsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubuwsat_128B
-def int_hexagon_V6_vsubuwsat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuwsat_128B">;
+def int_hexagon_V6_vgtw_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuwsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubuwsat_dv
-def int_hexagon_V6_vsubuwsat_dv :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuwsat_dv">;
+def int_hexagon_V6_vsubwq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuwsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubuwsat_dv_128B
-def int_hexagon_V6_vsubuwsat_dv_128B :
-Hexagon_V62_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubuwsat_dv_128B">;
+def int_hexagon_V6_vsubwq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwq_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbsat,VI_ftype_VIVI,2)
-// tag : V6_vaddbsat
-def int_hexagon_V6_vaddbsat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vaddbsat">;
+def int_hexagon_V6_vnot :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vnot">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddbsat_128B
-def int_hexagon_V6_vaddbsat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddbsat_128B">;
+def int_hexagon_V6_vnot_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vnot_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddbsat_dv
-def int_hexagon_V6_vaddbsat_dv :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddbsat_dv">;
+def int_hexagon_V6_vgtb_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddbsat_dv_128B
-def int_hexagon_V6_vaddbsat_dv_128B :
-Hexagon_V62_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddbsat_dv_128B">;
+def int_hexagon_V6_vgtb_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_or_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbsat,VI_ftype_VIVI,2)
-// tag : V6_vsubbsat
-def int_hexagon_V6_vsubbsat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vsubbsat">;
+def int_hexagon_V6_vgtuw_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_or">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubbsat_128B
-def int_hexagon_V6_vsubbsat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubbsat_128B">;
+def int_hexagon_V6_vgtuw_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubbsat_dv
-def int_hexagon_V6_vsubbsat_dv :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubbsat_dv">;
+def int_hexagon_V6_vaddubsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddubsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubbsat_dv_128B
-def int_hexagon_V6_vsubbsat_dv_128B :
-Hexagon_V62_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubbsat_dv_128B">;
+def int_hexagon_V6_vaddubsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddubsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddububb_sat,VI_ftype_VIVI,2)
-// tag : V6_vaddububb_sat
-def int_hexagon_V6_vaddububb_sat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vaddububb_sat">;
+def int_hexagon_V6_vmaxw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddububb_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddububb_sat_128B
-def int_hexagon_V6_vaddububb_sat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddububb_sat_128B">;
+def int_hexagon_V6_vmaxw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxw_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubububb_sat,VI_ftype_VIVI,2)
-// tag : V6_vsubububb_sat
-def int_hexagon_V6_vsubububb_sat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vsubububb_sat">;
+def int_hexagon_V6_vaslwv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaslwv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubububb_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubububb_sat_128B
-def int_hexagon_V6_vsubububb_sat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubububb_sat_128B">;
+def int_hexagon_V6_vaslwv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaslwv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhw_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vaddhw_acc
-def int_hexagon_V6_vaddhw_acc :
-Hexagon_V62_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vaddhw_acc">;
+def int_hexagon_V6_vabsw_sat :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsw_sat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhw_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vaddhw_acc_128B
-def int_hexagon_V6_vaddhw_acc_128B :
-Hexagon_V62_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddhw_acc_128B">;
+def int_hexagon_V6_vabsw_sat_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsw_sat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhw_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vadduhw_acc
-def int_hexagon_V6_vadduhw_acc :
-Hexagon_V62_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vadduhw_acc">;
+def int_hexagon_V6_vsubwsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwsat_dv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhw_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vadduhw_acc_128B
-def int_hexagon_V6_vadduhw_acc_128B :
-Hexagon_V62_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vadduhw_acc_128B">;
+def int_hexagon_V6_vsubwsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubh_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vaddubh_acc
-def int_hexagon_V6_vaddubh_acc :
-Hexagon_V62_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vaddubh_acc">;
+def int_hexagon_V6_vroundhub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vroundhub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubh_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vaddubh_acc_128B
-def int_hexagon_V6_vaddubh_acc_128B :
-Hexagon_V62_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddubh_acc_128B">;
+def int_hexagon_V6_vroundhub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vroundhub_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_64,VD_ftype_VIVI,2)
-// tag : V6_vmpyewuh_64
-def int_hexagon_V6_vmpyewuh_64 :
-Hexagon_V62_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyewuh_64">;
+def int_hexagon_V6_vdmpyhisat_acc :
+Hexagon_v16i32_v16i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_64_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyewuh_64_128B
-def int_hexagon_V6_vmpyewuh_64_128B :
-Hexagon_V62_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyewuh_64_128B">;
+def int_hexagon_V6_vdmpyhisat_acc_128B :
+Hexagon_v32i32_v32i32v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_64_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyowh_64_acc
-def int_hexagon_V6_vmpyowh_64_acc :
-Hexagon_V62_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_64_acc">;
+def int_hexagon_V6_vmpabus :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpabus">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_64_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyowh_64_acc_128B
-def int_hexagon_V6_vmpyowh_64_acc_128B :
-Hexagon_V62_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_64_acc_128B">;
+def int_hexagon_V6_vmpabus_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vmpabus_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhb,VD_ftype_VDSI,2)
-// tag : V6_vmpauhb
-def int_hexagon_V6_vmpauhb :
-Hexagon_V62_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpauhb">;
+def int_hexagon_V6_vassignp :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vassignp">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhb_128B,VD_ftype_VDSI,2)
-// tag : V6_vmpauhb_128B
-def int_hexagon_V6_vmpauhb_128B :
-Hexagon_V62_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpauhb_128B">;
+def int_hexagon_V6_vassignp_128B :
+Hexagon_v64i32_v64i32_Intrinsic<"HEXAGON_V6_vassignp_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhb_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vmpauhb_acc
-def int_hexagon_V6_vmpauhb_acc :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpauhb_acc">;
+def int_hexagon_V6_veqb :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhb_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vmpauhb_acc_128B
-def int_hexagon_V6_vmpauhb_acc_128B :
-Hexagon_V62_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpauhb_acc_128B">;
+def int_hexagon_V6_veqb_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwub,VI_ftype_VISI,2)
-// tag : V6_vmpyiwub
-def int_hexagon_V6_vmpyiwub :
-Hexagon_V62_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwub">;
+def int_hexagon_V6_vsububh :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsububh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwub_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyiwub_128B
-def int_hexagon_V6_vmpyiwub_128B :
-Hexagon_V62_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwub_128B">;
+def int_hexagon_V6_vsububh_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsububh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwub_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwub_acc
-def int_hexagon_V6_vmpyiwub_acc :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwub_acc">;
+def int_hexagon_V6_lvsplatw :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_lvsplatw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwub_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwub_acc_128B
-def int_hexagon_V6_vmpyiwub_acc_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwub_acc_128B">;
+def int_hexagon_V6_lvsplatw_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplatw_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandnqrt,VI_ftype_QVSI,2)
-// tag : V6_vandnqrt
-def int_hexagon_V6_vandnqrt :
-Hexagon_V62_v512v64ii_Intrinsic<"HEXAGON_V6_vandnqrt">;
+def int_hexagon_V6_vaddhnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhnq">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandnqrt_128B,VI_ftype_QVSI,2)
-// tag : V6_vandnqrt_128B
-def int_hexagon_V6_vandnqrt_128B :
-Hexagon_V62_v1024v128ii_Intrinsic<"HEXAGON_V6_vandnqrt_128B">;
+def int_hexagon_V6_vaddhnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhnq_128B">;
+
+def int_hexagon_V6_vdmpyhsusat :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsusat">;
+
+def int_hexagon_V6_vdmpyhsusat_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsusat_128B">;
+
+def int_hexagon_V6_pred_not :
+Hexagon_v512i1_v512i1_Intrinsic<"HEXAGON_V6_pred_not">;
+
+def int_hexagon_V6_pred_not_128B :
+Hexagon_v1024i1_v1024i1_Intrinsic<"HEXAGON_V6_pred_not_128B">;
+
+def int_hexagon_V6_vlutvwh_oracc :
+Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracc">;
+
+def int_hexagon_V6_vlutvwh_oracc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">;
+
+def int_hexagon_V6_vmpyiewh_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyiewh_acc">;
+
+def int_hexagon_V6_vmpyiewh_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyiewh_acc_128B">;
+
+def int_hexagon_V6_vdealvdd :
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdealvdd">;
+
+def int_hexagon_V6_vdealvdd_128B :
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdealvdd_128B">;
+
+def int_hexagon_V6_vavgw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgw">;
+
+def int_hexagon_V6_vavgw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgw_128B">;
+
+def int_hexagon_V6_vdmpyhsusat_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc">;
+
+def int_hexagon_V6_vdmpyhsusat_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc_128B">;
+
+def int_hexagon_V6_vgtw_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_xor">;
+
+def int_hexagon_V6_vgtw_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">;
+
+def int_hexagon_V6_vtmpyhb_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyhb_acc">;
+
+def int_hexagon_V6_vtmpyhb_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vtmpyhb_acc_128B">;
+
+def int_hexagon_V6_vaddhw :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhw">;
+
+def int_hexagon_V6_vaddhw_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhw_128B">;
+
+def int_hexagon_V6_vaddhq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhq">;
+
+def int_hexagon_V6_vaddhq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhq_128B">;
+
+def int_hexagon_V6_vrmpyubv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpyubv">;
+
+def int_hexagon_V6_vrmpyubv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpyubv_128B">;
+
+def int_hexagon_V6_vsubh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubh">;
+
+def int_hexagon_V6_vsubh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubh_128B">;
+
+def int_hexagon_V6_vrmpyubi :
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi">;
+
+def int_hexagon_V6_vrmpyubi_128B :
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">;
+
+def int_hexagon_V6_vminw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminw">;
+
+def int_hexagon_V6_vminw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminw_128B">;
+
+def int_hexagon_V6_vmpyubv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyubv_acc">;
+
+def int_hexagon_V6_vmpyubv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyubv_acc_128B">;
+
+def int_hexagon_V6_pred_xor :
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_xor">;
+
+def int_hexagon_V6_pred_xor_128B :
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_xor_128B">;
+
+def int_hexagon_V6_veqb_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_xor">;
+
+def int_hexagon_V6_veqb_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_xor_128B">;
+
+def int_hexagon_V6_vmpyiewuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyiewuh">;
+
+def int_hexagon_V6_vmpyiewuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyiewuh_128B">;
+
+def int_hexagon_V6_vmpybusv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpybusv_acc">;
+
+def int_hexagon_V6_vmpybusv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybusv_acc_128B">;
+
+def int_hexagon_V6_vavguhrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavguhrnd">;
+
+def int_hexagon_V6_vavguhrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavguhrnd_128B">;
+
+def int_hexagon_V6_vmpyowh_rnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh_rnd">;
+
+def int_hexagon_V6_vmpyowh_rnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_128B">;
+
+def int_hexagon_V6_vsubwsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwsat">;
+
+def int_hexagon_V6_vsubwsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwsat_128B">;
+
+def int_hexagon_V6_vsubuhw :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubuhw">;
+
+def int_hexagon_V6_vsubuhw_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuhw_128B">;
+
+def int_hexagon_V6_vrmpybusi_acc :
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">;
+
+def int_hexagon_V6_vrmpybusi_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">;
+
+def int_hexagon_V6_vasrw :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vasrw">;
+
+def int_hexagon_V6_vasrw_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vasrw_128B">;
+
+def int_hexagon_V6_vasrh :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vasrh">;
+
+def int_hexagon_V6_vasrh_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vasrh_128B">;
+
+def int_hexagon_V6_vmpyuhv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyuhv">;
+
+def int_hexagon_V6_vmpyuhv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhv_128B">;
+
+def int_hexagon_V6_vasrhbrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhbrndsat">;
+
+def int_hexagon_V6_vasrhbrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrhbrndsat_128B">;
+
+def int_hexagon_V6_vsubuhsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuhsat_dv">;
+
+def int_hexagon_V6_vsubuhsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubuhsat_dv_128B">;
+
+def int_hexagon_V6_vabsdiffw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffw">;
+
+def int_hexagon_V6_vabsdiffw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vabsdiffw_128B">;
+
+// V62 HVX Instructions.
-//
-// BUILTIN_INFO(HEXAGON.V6_vandnqrt_acc,VI_ftype_VIQVSI,3)
-// tag : V6_vandnqrt_acc
def int_hexagon_V6_vandnqrt_acc :
-Hexagon_V62_v512v512v64ii_Intrinsic<"HEXAGON_V6_vandnqrt_acc">;
+Hexagon_v16i32_v16i32v512i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandnqrt_acc_128B,VI_ftype_VIQVSI,3)
-// tag : V6_vandnqrt_acc_128B
def int_hexagon_V6_vandnqrt_acc_128B :
-Hexagon_V62_v1024v1024v128ii_Intrinsic<"HEXAGON_V6_vandnqrt_acc_128B">;
+Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvqv,VI_ftype_QVVI,2)
-// tag : V6_vandvqv
-def int_hexagon_V6_vandvqv :
-Hexagon_V62_v512v64iv512_Intrinsic<"HEXAGON_V6_vandvqv">;
+def int_hexagon_V6_vaddclbh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddclbh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvqv_128B,VI_ftype_QVVI,2)
-// tag : V6_vandvqv_128B
-def int_hexagon_V6_vandvqv_128B :
-Hexagon_V62_v1024v128iv1024_Intrinsic<"HEXAGON_V6_vandvqv_128B">;
+def int_hexagon_V6_vaddclbh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddclbh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvnqv,VI_ftype_QVVI,2)
-// tag : V6_vandvnqv
-def int_hexagon_V6_vandvnqv :
-Hexagon_V62_v512v64iv512_Intrinsic<"HEXAGON_V6_vandvnqv">;
+def int_hexagon_V6_vmpyowh_64_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh_64_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvnqv_128B,VI_ftype_QVVI,2)
-// tag : V6_vandvnqv_128B
-def int_hexagon_V6_vandvnqv_128B :
-Hexagon_V62_v1024v128iv1024_Intrinsic<"HEXAGON_V6_vandvnqv_128B">;
+def int_hexagon_V6_vmpyowh_64_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_64_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_scalar2v2,QV_ftype_SI,1)
-// tag : V6_pred_scalar2v2
-def int_hexagon_V6_pred_scalar2v2 :
-Hexagon_V62_v64ii_Intrinsic<"HEXAGON_V6_pred_scalar2v2">;
+def int_hexagon_V6_vmpyewuh_64 :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyewuh_64">;
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_scalar2v2_128B,QV_ftype_SI,1)
-// tag : V6_pred_scalar2v2_128B
-def int_hexagon_V6_pred_scalar2v2_128B :
-Hexagon_V62_v128ii_Intrinsic<"HEXAGON_V6_pred_scalar2v2_128B">;
+def int_hexagon_V6_vmpyewuh_64_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyewuh_64_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_shuffeqw,QV_ftype_QVQV,2)
-// tag : V6_shuffeqw
-def int_hexagon_V6_shuffeqw :
-Hexagon_V62_v64iv64iv64i_Intrinsic<"HEXAGON_V6_shuffeqw">;
+def int_hexagon_V6_vsatuwuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatuwuh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_shuffeqw_128B,QV_ftype_QVQV,2)
-// tag : V6_shuffeqw_128B
-def int_hexagon_V6_shuffeqw_128B :
-Hexagon_V62_v128iv128iv128i_Intrinsic<"HEXAGON_V6_shuffeqw_128B">;
+def int_hexagon_V6_vsatuwuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsatuwuh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_shuffeqh,QV_ftype_QVQV,2)
-// tag : V6_shuffeqh
def int_hexagon_V6_shuffeqh :
-Hexagon_V62_v64iv64iv64i_Intrinsic<"HEXAGON_V6_shuffeqh">;
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_shuffeqh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_shuffeqh_128B,QV_ftype_QVQV,2)
-// tag : V6_shuffeqh_128B
def int_hexagon_V6_shuffeqh_128B :
-Hexagon_V62_v128iv128iv128i_Intrinsic<"HEXAGON_V6_shuffeqh_128B">;
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_shuffeqh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxb,VI_ftype_VIVI,2)
-// tag : V6_vmaxb
-def int_hexagon_V6_vmaxb :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxb">;
+def int_hexagon_V6_shuffeqw :
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_shuffeqw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxb_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxb_128B
-def int_hexagon_V6_vmaxb_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxb_128B">;
+def int_hexagon_V6_shuffeqw_128B :
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_shuffeqw_128B">;
+
+def int_hexagon_V6_ldcnpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnpnt0">;
+
+def int_hexagon_V6_ldcnpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnpnt0_128B">;
+
+def int_hexagon_V6_vsubcarry :
+Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic;
+
+def int_hexagon_V6_vsubcarry_128B :
+Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B;
+
+def int_hexagon_V6_vasrhbsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhbsat">;
+
+def int_hexagon_V6_vasrhbsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrhbsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminb,VI_ftype_VIVI,2)
-// tag : V6_vminb
def int_hexagon_V6_vminb :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vminb">;
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vminb_128B,VI_ftype_VIVI,2)
-// tag : V6_vminb_128B
def int_hexagon_V6_vminb_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminb_128B">;
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsatuwuh,VI_ftype_VIVI,2)
-// tag : V6_vsatuwuh
-def int_hexagon_V6_vsatuwuh :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vsatuwuh">;
+def int_hexagon_V6_vmpauhb_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpauhb_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsatuwuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vsatuwuh_128B
-def int_hexagon_V6_vsatuwuh_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsatuwuh_128B">;
+def int_hexagon_V6_vmpauhb_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vmpauhb_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplath,VI_ftype_SI,1)
-// tag : V6_lvsplath
-def int_hexagon_V6_lvsplath :
-Hexagon_V62_v512i_Intrinsic<"HEXAGON_V6_lvsplath">;
+def int_hexagon_V6_vaddhw_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhw_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplath_128B,VI_ftype_SI,1)
-// tag : V6_lvsplath_128B
-def int_hexagon_V6_lvsplath_128B :
-Hexagon_V62_v1024i_Intrinsic<"HEXAGON_V6_lvsplath_128B">;
+def int_hexagon_V6_vaddhw_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhw_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplatb,VI_ftype_SI,1)
-// tag : V6_lvsplatb
-def int_hexagon_V6_lvsplatb :
-Hexagon_V62_v512i_Intrinsic<"HEXAGON_V6_lvsplatb">;
+def int_hexagon_V6_vlsrb :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vlsrb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplatb_128B,VI_ftype_SI,1)
-// tag : V6_lvsplatb_128B
-def int_hexagon_V6_lvsplatb_128B :
-Hexagon_V62_v1024i_Intrinsic<"HEXAGON_V6_lvsplatb_128B">;
+def int_hexagon_V6_vlsrb_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vlsrb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddclbw,VI_ftype_VIVI,2)
-// tag : V6_vaddclbw
-def int_hexagon_V6_vaddclbw :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vaddclbw">;
+def int_hexagon_V6_vlutvwhi :
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddclbw_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddclbw_128B
-def int_hexagon_V6_vaddclbw_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddclbw_128B">;
+def int_hexagon_V6_vlutvwhi_128B :
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddclbh,VI_ftype_VIVI,2)
-// tag : V6_vaddclbh
-def int_hexagon_V6_vaddclbh :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vaddclbh">;
+def int_hexagon_V6_vaddububb_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddububb_sat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddclbh_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddclbh_128B
-def int_hexagon_V6_vaddclbh_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddclbh_128B">;
+def int_hexagon_V6_vaddububb_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddububb_sat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvbi,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvbi
-def int_hexagon_V6_vlutvvbi :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvbi">;
+def int_hexagon_V6_vsubbsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbsat_dv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvbi_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvbi_128B
-def int_hexagon_V6_vlutvvbi_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvbi_128B">;
+def int_hexagon_V6_vsubbsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubbsat_dv_128B">;
+
+def int_hexagon_V6_ldtp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldtp0">;
+
+def int_hexagon_V6_ldtp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtp0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracci,VI_ftype_VIVIVISI,4)
-// tag : V6_vlutvvb_oracci
def int_hexagon_V6_vlutvvb_oracci :
-Hexagon_V62_v512v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_oracci">;
+Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracci_128B,VI_ftype_VIVIVISI,4)
-// tag : V6_vlutvvb_oracci_128B
def int_hexagon_V6_vlutvvb_oracci_128B :
-Hexagon_V62_v1024v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B">;
+Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwhi,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwhi
-def int_hexagon_V6_vlutvwhi :
-Hexagon_V62_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwhi">;
+def int_hexagon_V6_vsubuwsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuwsat_dv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwhi_128B,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwhi_128B
-def int_hexagon_V6_vlutvwhi_128B :
-Hexagon_V62_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwhi_128B">;
+def int_hexagon_V6_vsubuwsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubuwsat_dv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracci,VD_ftype_VDVIVISI,4)
-// tag : V6_vlutvwh_oracci
-def int_hexagon_V6_vlutvwh_oracci :
-Hexagon_V62_v1024v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_oracci">;
+def int_hexagon_V6_ldpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldpnt0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracci_128B,VD_ftype_VDVIVISI,4)
-// tag : V6_vlutvwh_oracci_128B
-def int_hexagon_V6_vlutvwh_oracci_128B :
-Hexagon_V62_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B">;
+def int_hexagon_V6_ldpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldpnt0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_nm,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvb_nm
-def int_hexagon_V6_vlutvvb_nm :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_nm">;
+def int_hexagon_V6_vandvnqv :
+Hexagon_v16i32_v512i1v16i32_Intrinsic<"HEXAGON_V6_vandvnqv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_nm_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvb_nm_128B
-def int_hexagon_V6_vlutvvb_nm_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_nm_128B">;
+def int_hexagon_V6_vandvnqv_128B :
+Hexagon_v32i32_v1024i1v32i32_Intrinsic<"HEXAGON_V6_vandvnqv_128B">;
+
+def int_hexagon_V6_lvsplatb :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_lvsplatb">;
+
+def int_hexagon_V6_lvsplatb_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplatb_128B">;
+
+def int_hexagon_V6_lvsplath :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_lvsplath">;
+
+def int_hexagon_V6_lvsplath_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplath_128B">;
+
+def int_hexagon_V6_ldtpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldtpnt0">;
+
+def int_hexagon_V6_ldtpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtpnt0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_nm,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwh_nm
def int_hexagon_V6_vlutvwh_nm :
-Hexagon_V62_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_nm">;
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_nm">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_nm_128B,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwh_nm_128B
def int_hexagon_V6_vlutvwh_nm_128B :
-Hexagon_V62_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_nm_128B">;
-
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddcarry,VI_ftype_VIVIQV,3)
-// tag: V6_vaddcarry
-def int_hexagon_V6_vaddcarry :
-Hexagon_v512v64iv512v512v64i_Intrinsic<"HEXAGON_v6_vaddcarry">;
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_nm_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddcarry_128B,VI_ftype_VIVIQV,3)
-// tag: V6_vaddcarry_128B
-def int_hexagon_V6_vaddcarry_128B :
-Hexagon_v1024v128iv1024v1024v128i_Intrinsic<"HEXAGON_v6_vaddcarry_128B">;
+def int_hexagon_V6_ldnpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldnpnt0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubcarry,VI_ftype_VIVIQV,3)
-// tag: V6_vsubcarry
-def int_hexagon_V6_vsubcarry :
-Hexagon_v512v64iv512v512v64i_Intrinsic<"HEXAGON_v6_vsubcarry">;
+def int_hexagon_V6_ldnpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldnpnt0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubcarry_128B,VI_ftype_VIVIQV,3)
-// tag: V6_vsubcarry_128B
-def int_hexagon_V6_vsubcarry_128B :
-Hexagon_v1024v128iv1024v1024v128i_Intrinsic<"HEXAGON_v6_vsubcarry_128B">;
+def int_hexagon_V6_vmpauhb :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpauhb">;
+def int_hexagon_V6_vmpauhb_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vmpauhb_128B">;
-///
-/// HexagonV65 intrinsics
-///
+def int_hexagon_V6_ldtnp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldtnp0">;
-//
-// Hexagon_V65_iLLiLLi_Intrinsic<string GCCIntSuffix>
-// tag : A6_vcmpbeq_notany
-class Hexagon_V65_iLLiLLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_ldtnp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtnp0_128B">;
-//
-// Hexagon_V65_v1024v512LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyub_rtt
-class Hexagon_V65_v1024v512LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vrounduhub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrounduhub">;
-//
-// Hexagon_V65_v2048v1024LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyub_rtt_128B
-class Hexagon_V65_v2048v1024LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vrounduhub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrounduhub_128B">;
-//
-// Hexagon_V65_v1024v1024v512LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyub_rtt_acc
-class Hexagon_V65_v1024v1024v512LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vadduhw_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vadduhw_acc">;
-//
-// Hexagon_V65_v2048v2048v1024LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyub_rtt_acc_128B
-class Hexagon_V65_v2048v2048v1024LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vadduhw_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduhw_acc_128B">;
-//
-// Hexagon_V65_v512v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vasruwuhsat
-class Hexagon_V65_v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_ldcp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldcp0">;
-//
-// Hexagon_V65_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vasruwuhsat_128B
-class Hexagon_V65_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_ldcp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcp0_128B">;
-//
-// Hexagon_V65_v512v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vavguw
-class Hexagon_V65_v512v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vadduwsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadduwsat">;
-//
-// Hexagon_V65_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vavguw_128B
-class Hexagon_V65_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vadduwsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduwsat_128B">;
-//
-// Hexagon_V65_v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vabsb
-class Hexagon_V65_v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_ldtnpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldtnpnt0">;
-//
-// Hexagon_V65_v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vabsb_128B
-class Hexagon_V65_v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_ldtnpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtnpnt0_128B">;
-//
-// Hexagon_V65_v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpabuu
-class Hexagon_V65_v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddbsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbsat">;
-//
-// Hexagon_V65_v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpabuu_128B
-class Hexagon_V65_v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddbsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbsat_128B">;
-//
-// Hexagon_V65_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpabuu_acc_128B
-class Hexagon_V65_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vandnqrt :
+Hexagon_v16i32_v512i1i32_Intrinsic<"HEXAGON_V6_vandnqrt">;
-//
-// Hexagon_V65_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyh_acc
-class Hexagon_V65_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vandnqrt_128B :
+Hexagon_v32i32_v1024i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_128B">;
-//
-// Hexagon_V65_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyh_acc_128B
-class Hexagon_V65_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmpyiwub_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub_acc">;
-//
-// Hexagon_V65_v512v512v512LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpahhsat
-class Hexagon_V65_v512v512v512LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmpyiwub_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub_acc_128B">;
-//
-// Hexagon_V65_v1024v1024v1024LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpahhsat_128B
-class Hexagon_V65_v1024v1024v1024LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaxb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxb">;
-//
-// Hexagon_V65_v512v512LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlut4
-class Hexagon_V65_v512v512LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vmaxb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxb_128B">;
-//
-// Hexagon_V65_v1024v1024LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlut4_128B
-class Hexagon_V65_v1024v1024LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i64_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vandvqv :
+Hexagon_v16i32_v512i1v16i32_Intrinsic<"HEXAGON_V6_vandvqv">;
-//
-// Hexagon_V65_v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyuhe
-class Hexagon_V65_v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vandvqv_128B :
+Hexagon_v32i32_v1024i1v32i32_Intrinsic<"HEXAGON_V6_vandvqv_128B">;
-//
-// Hexagon_V65_v512v64i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vprefixqb
-class Hexagon_V65_v512v64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddcarry :
+Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic;
-//
-// Hexagon_V65_v1024v128i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vprefixqb_128B
-class Hexagon_V65_v1024v128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty],
- [IntrNoMem]>;
+def int_hexagon_V6_vaddcarry_128B :
+Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B;
-//
-// BUILTIN_INFO(HEXAGON.A6_vcmpbeq_notany,QI_ftype_DIDI,2)
-// tag : A6_vcmpbeq_notany
-def int_hexagon_A6_vcmpbeq_notany :
-Hexagon_V65_iLLiLLi_Intrinsic<"HEXAGON_A6_vcmpbeq_notany">;
+def int_hexagon_V6_vasrwuhrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwuhrndsat">;
-//
-// BUILTIN_INFO(HEXAGON.A6_vcmpbeq_notany_128B,QI_ftype_DIDI,2)
-// tag : A6_vcmpbeq_notany_128B
-def int_hexagon_A6_vcmpbeq_notany_128B :
-Hexagon_V65_iLLiLLi_Intrinsic<"HEXAGON_A6_vcmpbeq_notany_128B">;
+def int_hexagon_V6_vasrwuhrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwuhrndsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt,VD_ftype_VIDI,2)
-// tag : V6_vrmpyub_rtt
-def int_hexagon_V6_vrmpyub_rtt :
-Hexagon_V65_v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt">;
+def int_hexagon_V6_vlutvvbi :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_128B,VD_ftype_VIDI,2)
-// tag : V6_vrmpyub_rtt_128B
-def int_hexagon_V6_vrmpyub_rtt_128B :
-Hexagon_V65_v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_128B">;
+def int_hexagon_V6_vlutvvbi_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_acc,VD_ftype_VDVIDI,3)
-// tag : V6_vrmpyub_rtt_acc
-def int_hexagon_V6_vrmpyub_rtt_acc :
-Hexagon_V65_v1024v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc">;
+def int_hexagon_V6_vsubuwsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubuwsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_acc_128B,VD_ftype_VDVIDI,3)
-// tag : V6_vrmpyub_rtt_acc_128B
-def int_hexagon_V6_vrmpyub_rtt_acc_128B :
-Hexagon_V65_v2048v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
+def int_hexagon_V6_vsubuwsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuwsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt,VD_ftype_VIDI,2)
-// tag : V6_vrmpybub_rtt
-def int_hexagon_V6_vrmpybub_rtt :
-Hexagon_V65_v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt">;
+def int_hexagon_V6_vaddbsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbsat_dv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_128B,VD_ftype_VIDI,2)
-// tag : V6_vrmpybub_rtt_128B
-def int_hexagon_V6_vrmpybub_rtt_128B :
-Hexagon_V65_v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_128B">;
+def int_hexagon_V6_vaddbsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddbsat_dv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_acc,VD_ftype_VDVIDI,3)
-// tag : V6_vrmpybub_rtt_acc
-def int_hexagon_V6_vrmpybub_rtt_acc :
-Hexagon_V65_v1024v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc">;
+def int_hexagon_V6_ldnp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldnp0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_acc_128B,VD_ftype_VDVIDI,3)
-// tag : V6_vrmpybub_rtt_acc_128B
-def int_hexagon_V6_vrmpybub_rtt_acc_128B :
-Hexagon_V65_v2048v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc_128B">;
+def int_hexagon_V6_ldnp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldnp0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruwuhsat,VI_ftype_VIVISI,3)
-// tag : V6_vasruwuhsat
-def int_hexagon_V6_vasruwuhsat :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruwuhsat">;
+def int_hexagon_V6_vasruwuhrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasruwuhrndsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruwuhsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasruwuhsat_128B
-def int_hexagon_V6_vasruwuhsat_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruwuhsat_128B">;
+def int_hexagon_V6_vasruwuhrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasruwuhrndsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruhubsat,VI_ftype_VIVISI,3)
-// tag : V6_vasruhubsat
-def int_hexagon_V6_vasruhubsat :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruhubsat">;
+def int_hexagon_V6_vrounduwuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrounduwuh">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruhubsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasruhubsat_128B
-def int_hexagon_V6_vasruhubsat_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruhubsat_128B">;
+def int_hexagon_V6_vrounduwuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrounduwuh_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruhubrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasruhubrndsat
-def int_hexagon_V6_vasruhubrndsat :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruhubrndsat">;
+def int_hexagon_V6_vlutvvb_nm :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_nm">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruhubrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasruhubrndsat_128B
-def int_hexagon_V6_vasruhubrndsat_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruhubrndsat_128B">;
+def int_hexagon_V6_vlutvvb_nm_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_nm_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslh_acc,VI_ftype_VIVISI,3)
-// tag : V6_vaslh_acc
-def int_hexagon_V6_vaslh_acc :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslh_acc">;
+def int_hexagon_V6_pred_scalar2v2 :
+Hexagon_v512i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslh_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vaslh_acc_128B
-def int_hexagon_V6_vaslh_acc_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_acc_128B">;
+def int_hexagon_V6_pred_scalar2v2_128B :
+Hexagon_v1024i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrh_acc,VI_ftype_VIVISI,3)
-// tag : V6_vasrh_acc
-def int_hexagon_V6_vasrh_acc :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrh_acc">;
+def int_hexagon_V6_ldp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldp0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrh_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrh_acc_128B
-def int_hexagon_V6_vasrh_acc_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_acc_128B">;
+def int_hexagon_V6_ldp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldp0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguw,VI_ftype_VIVI,2)
-// tag : V6_vavguw
-def int_hexagon_V6_vavguw :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavguw">;
+def int_hexagon_V6_vaddubh_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddubh_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguw_128B,VI_ftype_VIVI,2)
-// tag : V6_vavguw_128B
-def int_hexagon_V6_vavguw_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguw_128B">;
+def int_hexagon_V6_vaddubh_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddubh_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguwrnd,VI_ftype_VIVI,2)
-// tag : V6_vavguwrnd
-def int_hexagon_V6_vavguwrnd :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavguwrnd">;
+def int_hexagon_V6_vaddclbw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddclbw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguwrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavguwrnd_128B
-def int_hexagon_V6_vavguwrnd_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguwrnd_128B">;
+def int_hexagon_V6_vaddclbw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddclbw_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgb,VI_ftype_VIVI,2)
-// tag : V6_vavgb
-def int_hexagon_V6_vavgb :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavgb">;
+def int_hexagon_V6_ldcpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldcpnt0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgb_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgb_128B
-def int_hexagon_V6_vavgb_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgb_128B">;
+def int_hexagon_V6_ldcpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcpnt0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgbrnd,VI_ftype_VIVI,2)
-// tag : V6_vavgbrnd
-def int_hexagon_V6_vavgbrnd :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavgbrnd">;
+def int_hexagon_V6_vadduwsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduwsat_dv">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgbrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgbrnd_128B
-def int_hexagon_V6_vavgbrnd_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgbrnd_128B">;
+def int_hexagon_V6_vadduwsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vadduwsat_dv_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgb,VI_ftype_VIVI,2)
-// tag : V6_vnavgb
-def int_hexagon_V6_vnavgb :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgb">;
+def int_hexagon_V6_vmpyiwub :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgb_128B,VI_ftype_VIVI,2)
-// tag : V6_vnavgb_128B
-def int_hexagon_V6_vnavgb_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgb_128B">;
+def int_hexagon_V6_vmpyiwub_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsb,VI_ftype_VI,1)
-// tag : V6_vabsb
-def int_hexagon_V6_vabsb :
-Hexagon_V65_v512v512_Intrinsic<"HEXAGON_V6_vabsb">;
+def int_hexagon_V6_vsubububb_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubububb_sat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsb_128B,VI_ftype_VI,1)
-// tag : V6_vabsb_128B
-def int_hexagon_V6_vabsb_128B :
-Hexagon_V65_v1024v1024_Intrinsic<"HEXAGON_V6_vabsb_128B">;
+def int_hexagon_V6_vsubububb_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubububb_sat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsb_sat,VI_ftype_VI,1)
-// tag : V6_vabsb_sat
-def int_hexagon_V6_vabsb_sat :
-Hexagon_V65_v512v512_Intrinsic<"HEXAGON_V6_vabsb_sat">;
+def int_hexagon_V6_ldcnp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnp0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsb_sat_128B,VI_ftype_VI,1)
-// tag : V6_vabsb_sat_128B
-def int_hexagon_V6_vabsb_sat_128B :
-Hexagon_V65_v1024v1024_Intrinsic<"HEXAGON_V6_vabsb_sat_128B">;
+def int_hexagon_V6_ldcnp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnp0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuu,VD_ftype_VDSI,2)
-// tag : V6_vmpabuu
-def int_hexagon_V6_vmpabuu :
-Hexagon_V65_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabuu">;
+def int_hexagon_V6_vlutvwh_oracci :
+Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuu_128B,VD_ftype_VDSI,2)
-// tag : V6_vmpabuu_128B
-def int_hexagon_V6_vmpabuu_128B :
-Hexagon_V65_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabuu_128B">;
+def int_hexagon_V6_vlutvwh_oracci_128B :
+Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuu_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vmpabuu_acc
-def int_hexagon_V6_vmpabuu_acc :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabuu_acc">;
+def int_hexagon_V6_vsubbsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuu_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vmpabuu_acc_128B
-def int_hexagon_V6_vmpabuu_acc_128B :
-Hexagon_V65_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabuu_acc_128B">;
+def int_hexagon_V6_vsubbsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyh_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpyh_acc
-def int_hexagon_V6_vmpyh_acc :
-Hexagon_V65_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh_acc">;
+// V65 HVX Instructions.
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyh_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpyh_acc_128B
-def int_hexagon_V6_vmpyh_acc_128B :
-Hexagon_V65_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_acc_128B">;
+def int_hexagon_V6_vasruhubrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasruhubrndsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahhsat,VI_ftype_VIVIDI,3)
-// tag : V6_vmpahhsat
-def int_hexagon_V6_vmpahhsat :
-Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpahhsat">;
+def int_hexagon_V6_vasruhubrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasruhubrndsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahhsat_128B,VI_ftype_VIVIDI,3)
-// tag : V6_vmpahhsat_128B
-def int_hexagon_V6_vmpahhsat_128B :
-Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpahhsat_128B">;
+def int_hexagon_V6_vrmpybub_rtt :
+Hexagon_v32i32_v16i32i64_Intrinsic<"HEXAGON_V6_vrmpybub_rtt">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhuhsat,VI_ftype_VIVIDI,3)
-// tag : V6_vmpauhuhsat
-def int_hexagon_V6_vmpauhuhsat :
-Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpauhuhsat">;
+def int_hexagon_V6_vrmpybub_rtt_128B :
+Hexagon_v64i32_v32i32i64_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhuhsat_128B,VI_ftype_VIVIDI,3)
-// tag : V6_vmpauhuhsat_128B
-def int_hexagon_V6_vmpauhuhsat_128B :
-Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpauhuhsat_128B">;
+def int_hexagon_V6_vmpahhsat :
+Hexagon_v16i32_v16i32v16i32i64_Intrinsic<"HEXAGON_V6_vmpahhsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpsuhuhsat,VI_ftype_VIVIDI,3)
-// tag : V6_vmpsuhuhsat
-def int_hexagon_V6_vmpsuhuhsat :
-Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpsuhuhsat">;
+def int_hexagon_V6_vmpahhsat_128B :
+Hexagon_v32i32_v32i32v32i32i64_Intrinsic<"HEXAGON_V6_vmpahhsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpsuhuhsat_128B,VI_ftype_VIVIDI,3)
-// tag : V6_vmpsuhuhsat_128B
-def int_hexagon_V6_vmpsuhuhsat_128B :
-Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpsuhuhsat_128B">;
+def int_hexagon_V6_vavguwrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavguwrnd">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlut4,VI_ftype_VIDI,2)
-// tag : V6_vlut4
-def int_hexagon_V6_vlut4 :
-Hexagon_V65_v512v512LLi_Intrinsic<"HEXAGON_V6_vlut4">;
+def int_hexagon_V6_vavguwrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavguwrnd_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vlut4_128B,VI_ftype_VIDI,2)
-// tag : V6_vlut4_128B
-def int_hexagon_V6_vlut4_128B :
-Hexagon_V65_v1024v1024LLi_Intrinsic<"HEXAGON_V6_vlut4_128B">;
+def int_hexagon_V6_vnavgb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhe,VI_ftype_VISI,2)
-// tag : V6_vmpyuhe
-def int_hexagon_V6_vmpyuhe :
-Hexagon_V65_v512v512i_Intrinsic<"HEXAGON_V6_vmpyuhe">;
+def int_hexagon_V6_vnavgb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyuhe_128B
-def int_hexagon_V6_vmpyuhe_128B :
-Hexagon_V65_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyuhe_128B">;
+def int_hexagon_V6_vasrh_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrh_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyuhe_acc
-def int_hexagon_V6_vmpyuhe_acc :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyuhe_acc">;
+def int_hexagon_V6_vasrh_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrh_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyuhe_acc_128B
-def int_hexagon_V6_vmpyuhe_acc_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyuhe_acc_128B">;
+def int_hexagon_V6_vmpauhuhsat :
+Hexagon_v16i32_v16i32v16i32i64_Intrinsic<"HEXAGON_V6_vmpauhuhsat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqb,VI_ftype_QV,1)
-// tag : V6_vprefixqb
-def int_hexagon_V6_vprefixqb :
-Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqb">;
+def int_hexagon_V6_vmpauhuhsat_128B :
+Hexagon_v32i32_v32i32v32i32i64_Intrinsic<"HEXAGON_V6_vmpauhuhsat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqb_128B,VI_ftype_QV,1)
-// tag : V6_vprefixqb_128B
-def int_hexagon_V6_vprefixqb_128B :
-Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqb_128B">;
+def int_hexagon_V6_vmpyh_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyh_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqh,VI_ftype_QV,1)
-// tag : V6_vprefixqh
-def int_hexagon_V6_vprefixqh :
-Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqh">;
+def int_hexagon_V6_vmpyh_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyh_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqh_128B,VI_ftype_QV,1)
-// tag : V6_vprefixqh_128B
-def int_hexagon_V6_vprefixqh_128B :
-Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqh_128B">;
+def int_hexagon_V6_vrmpybub_rtt_acc :
+Hexagon_v32i32_v32i32v16i32i64_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqw,VI_ftype_QV,1)
-// tag : V6_vprefixqw
-def int_hexagon_V6_vprefixqw :
-Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqw">;
+def int_hexagon_V6_vrmpybub_rtt_acc_128B :
+Hexagon_v64i32_v64i32v32i32i64_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqw_128B,VI_ftype_QV,1)
-// tag : V6_vprefixqw_128B
-def int_hexagon_V6_vprefixqw_128B :
-Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqw_128B">;
+def int_hexagon_V6_vavgb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgb">;
+def int_hexagon_V6_vavgb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgb_128B">;
-// The scatter/gather ones below will not be generated from iset.py. Make sure
-// you don't overwrite these.
-class Hexagon_V65_vvmemiiv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
- llvm_v16i32_ty],
- [IntrArgMemOnly]>;
+def int_hexagon_V6_vaslh_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vaslh_acc">;
-class Hexagon_V65_vvmemiiv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
- llvm_v32i32_ty],
- [IntrArgMemOnly]>;
+def int_hexagon_V6_vaslh_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vaslh_acc_128B">;
-class Hexagon_V65_vvmemiiv2048_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
- llvm_v64i32_ty],
- [IntrArgMemOnly]>;
+def int_hexagon_V6_vavguw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavguw">;
-class Hexagon_V65_vvmemv64iiiv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,
- llvm_i32_ty,llvm_v16i32_ty],
- [IntrArgMemOnly]>;
+def int_hexagon_V6_vavguw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavguw_128B">;
-class Hexagon_V65_vvmemv128iiiv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,
- llvm_i32_ty,llvm_v32i32_ty],
- [IntrArgMemOnly]>;
+def int_hexagon_V6_vlut4 :
+Hexagon_v16i32_v16i32i64_Intrinsic<"HEXAGON_V6_vlut4">;
-class Hexagon_V65_vvmemv64iiiv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,
- llvm_i32_ty,llvm_v32i32_ty],
- [IntrArgMemOnly]>;
+def int_hexagon_V6_vlut4_128B :
+Hexagon_v32i32_v32i32i64_Intrinsic<"HEXAGON_V6_vlut4_128B">;
-class Hexagon_V65_vvmemv128iiiv2048_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,
- llvm_i32_ty,llvm_v64i32_ty],
- [IntrArgMemOnly]>;
+def int_hexagon_V6_vmpyuhe_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyuhe_acc">;
-def int_hexagon_V6_vgathermw :
-Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermw">;
+def int_hexagon_V6_vmpyuhe_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyuhe_acc_128B">;
-def int_hexagon_V6_vgathermw_128B :
-Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermw_128B">;
+def int_hexagon_V6_vrmpyub_rtt :
+Hexagon_v32i32_v16i32i64_Intrinsic<"HEXAGON_V6_vrmpyub_rtt">;
-def int_hexagon_V6_vgathermh :
-Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermh">;
+def int_hexagon_V6_vrmpyub_rtt_128B :
+Hexagon_v64i32_v32i32i64_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_128B">;
-def int_hexagon_V6_vgathermh_128B :
-Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermh_128B">;
+def int_hexagon_V6_vmpsuhuhsat :
+Hexagon_v16i32_v16i32v16i32i64_Intrinsic<"HEXAGON_V6_vmpsuhuhsat">;
-def int_hexagon_V6_vgathermhw :
-Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermhw">;
+def int_hexagon_V6_vmpsuhuhsat_128B :
+Hexagon_v32i32_v32i32v32i32i64_Intrinsic<"HEXAGON_V6_vmpsuhuhsat_128B">;
-def int_hexagon_V6_vgathermhw_128B :
-Hexagon_V65_vvmemiiv2048_Intrinsic<"HEXAGON_V6_vgathermhw_128B">;
+def int_hexagon_V6_vasruhubsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasruhubsat">;
-def int_hexagon_V6_vgathermwq :
-Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermwq">;
+def int_hexagon_V6_vasruhubsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasruhubsat_128B">;
-def int_hexagon_V6_vgathermwq_128B :
-Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermwq_128B">;
+def int_hexagon_V6_vmpyuhe :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyuhe">;
-def int_hexagon_V6_vgathermhq :
-Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermhq">;
+def int_hexagon_V6_vmpyuhe_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyuhe_128B">;
-def int_hexagon_V6_vgathermhq_128B :
-Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhq_128B">;
+def int_hexagon_V6_vrmpyub_rtt_acc :
+Hexagon_v32i32_v32i32v16i32i64_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc">;
-def int_hexagon_V6_vgathermhwq :
-Hexagon_V65_vvmemv64iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhwq">;
+def int_hexagon_V6_vrmpyub_rtt_acc_128B :
+Hexagon_v64i32_v64i32v32i32i64_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
-def int_hexagon_V6_vgathermhwq_128B :
-Hexagon_V65_vvmemv128iiiv2048_Intrinsic<"HEXAGON_V6_vgathermhwq_128B">;
+def int_hexagon_V6_vasruwuhsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasruwuhsat">;
-class Hexagon_V65_viiv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_i32_ty,llvm_i32_ty,
- llvm_v16i32_ty,llvm_v16i32_ty],
- [IntrWriteMem]>;
+def int_hexagon_V6_vasruwuhsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasruwuhsat_128B">;
-class Hexagon_V65_viiv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_i32_ty,llvm_i32_ty,
- llvm_v32i32_ty,llvm_v32i32_ty],
- [IntrWriteMem]>;
+def int_hexagon_V6_vmpabuu_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpabuu_acc">;
-class Hexagon_V65_vv64iiiv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v512i1_ty,llvm_i32_ty,
- llvm_i32_ty,llvm_v16i32_ty,
- llvm_v16i32_ty],
- [IntrWriteMem]>;
+def int_hexagon_V6_vmpabuu_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vmpabuu_acc_128B">;
-class Hexagon_V65_vv128iiiv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v1024i1_ty,llvm_i32_ty,
- llvm_i32_ty,llvm_v32i32_ty,
- llvm_v32i32_ty],
- [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqw :
+Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqw">;
-class Hexagon_V65_viiv1024v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_i32_ty,llvm_i32_ty,
- llvm_v32i32_ty,llvm_v16i32_ty],
- [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqw_128B :
+Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqw_128B">;
-class Hexagon_V65_viiv2048v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_i32_ty,llvm_i32_ty,
- llvm_v64i32_ty,llvm_v32i32_ty],
- [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqh :
+Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqh">;
-class Hexagon_V65_vv64iiiv1024v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v512i1_ty,llvm_i32_ty,
- llvm_i32_ty,llvm_v32i32_ty,
- llvm_v16i32_ty],
- [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqh_128B :
+Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqh_128B">;
-class Hexagon_V65_vv128iiiv2048v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v1024i1_ty,llvm_i32_ty,
- llvm_i32_ty,llvm_v64i32_ty,
- llvm_v32i32_ty],
- [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqb :
+Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqb">;
-class Hexagon_V65_v2048_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [],
- [IntrNoMem]>;
+def int_hexagon_V6_vprefixqb_128B :
+Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermw,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermw
-def int_hexagon_V6_vscattermw :
-Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw">;
+def int_hexagon_V6_vabsb :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsb">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermw_128B,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermw_128B
-def int_hexagon_V6_vscattermw_128B :
-Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_128B">;
+def int_hexagon_V6_vabsb_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsb_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermh,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermh
-def int_hexagon_V6_vscattermh :
-Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh">;
+def int_hexagon_V6_vavgbrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgbrnd">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermh_128B,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermh_128B
-def int_hexagon_V6_vscattermh_128B :
-Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_128B">;
+def int_hexagon_V6_vavgbrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgbrnd_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermw_add,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermw_add
-def int_hexagon_V6_vscattermw_add :
-Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw_add">;
+def int_hexagon_V6_vdd0 :
+Hexagon_v32i32__Intrinsic<"HEXAGON_V6_vdd0">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermw_add_128B,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermw_add_128B
-def int_hexagon_V6_vscattermw_add_128B :
-Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_add_128B">;
+def int_hexagon_V6_vdd0_128B :
+Hexagon_v64i32__Intrinsic<"HEXAGON_V6_vdd0_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermh_add,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermh_add
-def int_hexagon_V6_vscattermh_add :
-Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh_add">;
+def int_hexagon_V6_vmpabuu :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpabuu">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermh_add_128B,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermh_add_128B
-def int_hexagon_V6_vscattermh_add_128B :
-Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_add_128B">;
+def int_hexagon_V6_vmpabuu_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vmpabuu_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermwq,v_ftype_QVSISIVIVI,5)
-// tag : V6_vscattermwq
-def int_hexagon_V6_vscattermwq :
-Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermwq">;
+def int_hexagon_V6_vabsb_sat :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsb_sat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermwq_128B,v_ftype_QVSISIVIVI,5)
-// tag : V6_vscattermwq_128B
-def int_hexagon_V6_vscattermwq_128B :
-Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermwq_128B">;
+def int_hexagon_V6_vabsb_sat_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsb_sat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhq,v_ftype_QVSISIVIVI,5)
-// tag : V6_vscattermhq
-def int_hexagon_V6_vscattermhq :
-Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermhq">;
+// V66 HVX Instructions.
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhq_128B,v_ftype_QVSISIVIVI,5)
-// tag : V6_vscattermhq_128B
-def int_hexagon_V6_vscattermhq_128B :
-Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermhq_128B">;
+def int_hexagon_V6_vaddcarrysat :
+Hexagon_v16i32_v16i32v16i32v512i1_Intrinsic<"HEXAGON_V6_vaddcarrysat">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhw,v_ftype_SISIVDVI,4)
-// tag : V6_vscattermhw
-def int_hexagon_V6_vscattermhw :
-Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw">;
+def int_hexagon_V6_vaddcarrysat_128B :
+Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<"HEXAGON_V6_vaddcarrysat_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhw_128B,v_ftype_SISIVDVI,4)
-// tag : V6_vscattermhw_128B
-def int_hexagon_V6_vscattermhw_128B :
-Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_128B">;
+def int_hexagon_V6_vasr_into :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vasr_into">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhwq,v_ftype_QVSISIVDVI,5)
-// tag : V6_vscattermhwq
-def int_hexagon_V6_vscattermhwq :
-Hexagon_V65_vv64iiiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhwq">;
+def int_hexagon_V6_vasr_into_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vasr_into_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhwq_128B,v_ftype_QVSISIVDVI,5)
-// tag : V6_vscattermhwq_128B
-def int_hexagon_V6_vscattermhwq_128B :
-Hexagon_V65_vv128iiiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhwq_128B">;
+def int_hexagon_V6_vsatdw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatdw">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add,v_ftype_SISIVDVI,4)
-// tag : V6_vscattermhw_add
-def int_hexagon_V6_vscattermhw_add :
-Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw_add">;
+def int_hexagon_V6_vsatdw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsatdw_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add_128B,v_ftype_SISIVDVI,4)
-// tag : V6_vscattermhw_add_128B
-def int_hexagon_V6_vscattermhw_add_128B :
-Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B">;
+def int_hexagon_V6_vrotr :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrotr">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdd0,VD_ftype_,0)
-// tag : V6_vdd0
-def int_hexagon_V6_vdd0 :
-Hexagon_v1024_Intrinsic<"HEXAGON_V6_vdd0">;
+def int_hexagon_V6_vrotr_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrotr_128B">;
-//
-// BUILTIN_INFO(HEXAGON.V6_vdd0_128B,VD_ftype_,0)
-// tag : V6_vdd0_128B
-def int_hexagon_V6_vdd0_128B :
-Hexagon_V65_v2048_Intrinsic<"HEXAGON_V6_vdd0_128B">;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 3433aaa402eb..62b2e8f77e7d 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -83,6 +83,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_fmaf128_round_to_odd
: GCCBuiltin<"__builtin_fmaf128_round_to_odd">,
Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
+ def int_ppc_scalar_extract_expq
+ : GCCBuiltin<"__builtin_vsx_scalar_extract_expq">,
+ Intrinsic <[llvm_i64_ty], [llvm_f128_ty], [IntrNoMem]>;
+ def int_ppc_scalar_insert_exp_qp
+ : GCCBuiltin<"__builtin_vsx_scalar_insert_exp_qp">,
+ Intrinsic <[llvm_f128_ty], [llvm_f128_ty, llvm_i64_ty], [IntrNoMem]>;
}
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsRISCV.td b/contrib/llvm/include/llvm/IR/IntrinsicsRISCV.td
new file mode 100644
index 000000000000..0ac7348b56db
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -0,0 +1,44 @@
+//===- IntrinsicsRISCV.td - Defines RISCV intrinsics -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the RISCV-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "riscv" in {
+
+//===----------------------------------------------------------------------===//
+// Atomics
+
+class MaskedAtomicRMW32Intrinsic
+ : Intrinsic<[llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly, NoCapture<0>]>;
+
+class MaskedAtomicRMW32WithSextIntrinsic
+ : Intrinsic<[llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty],
+ [IntrArgMemOnly, NoCapture<0>]>;
+
+def int_riscv_masked_atomicrmw_xchg_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_add_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_sub_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_nand_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_max_i32 : MaskedAtomicRMW32WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_min_i32 : MaskedAtomicRMW32WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_umax_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_umin_i32 : MaskedAtomicRMW32Intrinsic;
+
+def int_riscv_masked_cmpxchg_i32
+ : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly, NoCapture<0>]>;
+
+} // TargetPrefix = "riscv"
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 7afc755a1e37..b015650906e0 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -24,17 +24,16 @@ def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty],
[llvm_i32_ty, LLVMMatchType<0>],
[]>;
-// These are the old names.
-def int_wasm_mem_size : Intrinsic<[llvm_anyint_ty],
- [llvm_i32_ty],
- [IntrReadMem]>;
-def int_wasm_mem_grow : Intrinsic<[llvm_anyint_ty],
- [llvm_i32_ty, LLVMMatchType<0>],
- []>;
+//===----------------------------------------------------------------------===//
+// Saturating float-to-int conversions
+//===----------------------------------------------------------------------===//
-// These are the old old names. They also lack the immediate field.
-def int_wasm_current_memory : Intrinsic<[llvm_anyint_ty], [], [IntrReadMem]>;
-def int_wasm_grow_memory : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
+def int_wasm_trunc_saturate_signed : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyfloat_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyfloat_ty],
+ [IntrNoMem, IntrSpeculatable]>;
//===----------------------------------------------------------------------===//
// Exception handling intrinsics
@@ -60,8 +59,57 @@ def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
// WebAssembly EH must maintain the landingpads in the order assigned to them
// by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is
// used in order to give them the indices in WasmEHPrepare.
-def int_wasm_landingpad_index: Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+def int_wasm_landingpad_index: Intrinsic<[], [llvm_token_ty, llvm_i32_ty],
+ [IntrNoMem]>;
// Returns LSDA address of the current function.
def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-}
+
+//===----------------------------------------------------------------------===//
+// Atomic intrinsics
+//===----------------------------------------------------------------------===//
+
+// wait / notify
+def int_wasm_atomic_wait_i32 :
+ Intrinsic<[llvm_i32_ty],
+ [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty, llvm_i64_ty],
+ [IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
+ IntrHasSideEffects],
+ "", [SDNPMemOperand]>;
+def int_wasm_atomic_wait_i64 :
+ Intrinsic<[llvm_i32_ty],
+ [LLVMPointerType<llvm_i64_ty>, llvm_i64_ty, llvm_i64_ty],
+ [IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
+ IntrHasSideEffects],
+ "", [SDNPMemOperand]>;
+def int_wasm_atomic_notify:
+ Intrinsic<[llvm_i32_ty], [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty],
+ [IntrInaccessibleMemOnly, NoCapture<0>, IntrHasSideEffects], "",
+ [SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// SIMD intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_wasm_sub_saturate_signed :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_sub_saturate_unsigned :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_bitselect :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_anytrue :
+ Intrinsic<[llvm_i32_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_alltrue :
+ Intrinsic<[llvm_i32_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+} // TargetPrefix = "wasm"
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
index 905afc130d8f..8d8cc8e97678 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -27,12 +27,6 @@ let TargetPrefix = "x86" in {
// Marks the EH guard slot node created in LLVM IR prior to code generation.
def int_x86_seh_ehguard : Intrinsic<[], [llvm_ptr_ty], []>;
-
- // Given a pointer to the end of an EH registration object, returns the true
- // parent frame address that can be used with llvm.localrecover.
- def int_x86_seh_recoverfp : Intrinsic<[llvm_ptr_ty],
- [llvm_ptr_ty, llvm_ptr_ty],
- [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -53,8 +47,8 @@ let TargetPrefix = "x86" in {
let TargetPrefix = "x86" in {
def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">,
Intrinsic<[llvm_i64_ty], [], []>;
- def int_x86_rdtscp : GCCBuiltin<"__builtin_ia32_rdtscp">,
- Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrArgMemOnly]>;
+ def int_x86_rdtscp :
+ Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
}
// Read Performance-Monitoring Counter.
@@ -364,30 +358,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
@@ -1336,21 +1306,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// BITALG bits shuffle
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_mask_vpshufbitqmb_128 :
- GCCBuiltin<"__builtin_ia32_vpshufbitqmb128_mask">,
- Intrinsic<[llvm_i16_ty],
- [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
- [IntrNoMem]>;
- def int_x86_avx512_mask_vpshufbitqmb_256 :
- GCCBuiltin<"__builtin_ia32_vpshufbitqmb256_mask">,
- Intrinsic<[llvm_i32_ty],
- [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_x86_avx512_mask_vpshufbitqmb_512 :
- GCCBuiltin<"__builtin_ia32_vpshufbitqmb512_mask">,
- Intrinsic<[llvm_i64_ty],
- [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
- [IntrNoMem]>;
+ def int_x86_avx512_vpshufbitqmb_128 :
+ Intrinsic<[llvm_v16i1_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_vpshufbitqmb_256 :
+ Intrinsic<[llvm_v32i1_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_vpshufbitqmb_512 :
+ Intrinsic<[llvm_v64i1_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -1358,30 +1319,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
- llvm_v32i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
- llvm_v16i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
- llvm_v32i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
- llvm_v16i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
- llvm_v32i8_ty], [IntrNoMem]>;
- def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
- llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
- llvm_v32i8_ty], [IntrNoMem]>;
- def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
- llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
@@ -1518,18 +1455,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_pmultishift_qb_128:
- GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_pmultishift_qb_256:
- GCCBuiltin<"__builtin_ia32_vpmultishiftqb256_mask">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
- llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_pmultishift_qb_512:
- GCCBuiltin<"__builtin_ia32_vpmultishiftqb512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
- llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_pmultishift_qb_128:
+ GCCBuiltin<"__builtin_ia32_vpmultishiftqb128">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pmultishift_qb_256:
+ GCCBuiltin<"__builtin_ia32_vpmultishiftqb256">,
+ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pmultishift_qb_512:
+ GCCBuiltin<"__builtin_ia32_vpmultishiftqb512">,
+ Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>;
}
// Pack ops.
@@ -1739,83 +1673,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
[IntrNoMem]>;
-
- def int_x86_avx512_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
- llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
- llvm_v8i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
- llvm_v16i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
- llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx512_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
- llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx512_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
- llvm_v8i64_ty], [IntrNoMem]>;
-
- def int_x86_avx512_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128">,
- Intrinsic<[llvm_v4i32_ty] , [llvm_v4i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256">,
- Intrinsic<[llvm_v8i32_ty] , [llvm_v8i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512">,
- Intrinsic<[llvm_v16i32_ty] , [llvm_v16i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128">,
- Intrinsic<[llvm_v2i64_ty] , [llvm_v2i64_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256">,
- Intrinsic<[llvm_v4i64_ty] , [llvm_v4i64_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512">,
- Intrinsic<[llvm_v8i64_ty] , [llvm_v8i64_ty,
- llvm_i32_ty], [IntrNoMem]>;
-
-
- def int_x86_avx512_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
- llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
- llvm_v8i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
- llvm_v16i32_ty], [IntrNoMem]>;
- def int_x86_avx512_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
- llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx512_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
- llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx512_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
- llvm_v8i64_ty], [IntrNoMem]>;
- def int_x86_avx512_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
- llvm_i32_ty], [IntrNoMem]>;
-
}
// Gather ops
@@ -2187,32 +2044,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-
- def int_x86_xop_vprotb : GCCBuiltin<"__builtin_ia32_vprotb">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
- [IntrNoMem]>;
- def int_x86_xop_vprotd : GCCBuiltin<"__builtin_ia32_vprotd">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
- def int_x86_xop_vprotq : GCCBuiltin<"__builtin_ia32_vprotq">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
- [IntrNoMem]>;
- def int_x86_xop_vprotw : GCCBuiltin<"__builtin_ia32_vprotw">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_x86_xop_vprotbi : GCCBuiltin<"__builtin_ia32_vprotbi">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_xop_vprotdi : GCCBuiltin<"__builtin_ia32_vprotdi">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_xop_vprotqi : GCCBuiltin<"__builtin_ia32_vprotqi">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_xop_vprotwi : GCCBuiltin<"__builtin_ia32_vprotwi">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
def int_x86_xop_vpshab :
GCCBuiltin<"__builtin_ia32_vpshab">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@@ -2750,24 +2581,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// ADX
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_addcarryx_u32: GCCBuiltin<"__builtin_ia32_addcarryx_u32">,
- Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_ptr_ty], [IntrArgMemOnly]>;
- def int_x86_addcarryx_u64: GCCBuiltin<"__builtin_ia32_addcarryx_u64">,
- Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty,
- llvm_ptr_ty], [IntrArgMemOnly]>;
- def int_x86_addcarry_u32: GCCBuiltin<"__builtin_ia32_addcarry_u32">,
- Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_ptr_ty], [IntrArgMemOnly]>;
- def int_x86_addcarry_u64: GCCBuiltin<"__builtin_ia32_addcarry_u64">,
- Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty,
- llvm_ptr_ty], [IntrArgMemOnly]>;
- def int_x86_subborrow_u32: GCCBuiltin<"__builtin_ia32_subborrow_u32">,
- Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_ptr_ty], [IntrArgMemOnly]>;
- def int_x86_subborrow_u64: GCCBuiltin<"__builtin_ia32_subborrow_u64">,
- Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty,
- llvm_ptr_ty], [IntrArgMemOnly]>;
+ def int_x86_addcarry_32:
+ Intrinsic<[llvm_i8_ty, llvm_i32_ty],
+ [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_addcarry_64:
+ Intrinsic<[llvm_i8_ty, llvm_i64_ty],
+ [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+ def int_x86_subborrow_32:
+ Intrinsic<[llvm_i8_ty, llvm_i32_ty],
+ [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_subborrow_64:
+ Intrinsic<[llvm_i8_ty, llvm_i64_ty],
+ [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -2787,6 +2612,36 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
//===----------------------------------------------------------------------===//
// AVX512
+// Mask ops
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_kadd_b :
+ Intrinsic<[llvm_v8i1_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_kadd_w :
+ Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_kadd_d :
+ Intrinsic<[llvm_v32i1_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_kadd_q :
+ Intrinsic<[llvm_v64i1_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_ktestc_b :
+ Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestc_w :
+ Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestc_d :
+ Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestc_q :
+ Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_ktestz_b :
+ Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestz_w :
+ Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestz_d :
+ Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestz_q :
+ Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+}
+
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
@@ -3677,78 +3532,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
// Integer arithmetic ops
let TargetPrefix = "x86" in {
- def int_x86_avx512_mask_padds_b_128 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_padds_b_256 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
- llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_padds_w_128 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_padds_w_256 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_paddus_b_128 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_paddus_b_256 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_paddus_b_512 : GCCBuiltin<"__builtin_ia32_paddusb512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
- llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_paddus_w_128 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_paddus_w_256 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubs_b_128 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubs_b_256 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
- llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubs_w_128 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubs_w_256 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubus_b_128 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubus_b_256 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubus_b_512 : GCCBuiltin<"__builtin_ia32_psubusb512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
- llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubus_w_128 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubus_w_256 : // FIXME: remove this intrinsic
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v32i16_ty], [IntrNoMem, Commutative]>;
@@ -3780,6 +3563,7 @@ let TargetPrefix = "x86" in {
// Gather and Scatter ops
let TargetPrefix = "x86" in {
+ // NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
@@ -3912,6 +3696,7 @@ let TargetPrefix = "x86" in {
[IntrReadMem, IntrArgMemOnly]>;
// scatter
+ // NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
@@ -4072,6 +3857,239 @@ let TargetPrefix = "x86" in {
llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
}
+// AVX512 gather/scatter intrinsics that use vXi1 masks.
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_mask_gather_dpd_512 :
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
+ llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_x86_avx512_mask_gather_dps_512 :
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty,
+ llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_x86_avx512_mask_gather_qpd_512 :
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
+ llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_x86_avx512_mask_gather_qps_512 :
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty,
+ llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+
+ def int_x86_avx512_mask_gather_dpq_512 :
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
+ llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_x86_avx512_mask_gather_dpi_512 :
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
+ llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_x86_avx512_mask_gather_qpq_512 :
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
+ llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_x86_avx512_mask_gather_qpi_512 :
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty,
+ llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3div2_df :
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3div2_di :
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3div4_df :
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3div4_di :
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3div4_sf :
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3div4_si :
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3div8_sf :
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3div8_si :
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3siv2_df :
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3siv2_di :
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3siv4_df :
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3siv4_di :
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3siv4_sf :
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3siv4_si :
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3siv8_sf :
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_gather3siv8_si :
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatter_dpd_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+ llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_dps_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
+ llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_qpd_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+ llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_qps_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+ llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+
+ def int_x86_avx512_mask_scatter_dpq_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+ llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_dpi_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
+ llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_qpq_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,llvm_v8i64_ty, llvm_v8i64_ty,
+ llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_qpi_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8i32_ty,
+ llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv2_df :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv2_di :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv4_df :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv4_di :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv4_sf :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv4_si :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv8_sf :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv8_si :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv2_df :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv2_di :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv4_df :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv4_di :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv4_sf :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv4_si :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv8_sf :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv8_si :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+}
+
// AVX-512 conflict detection instruction
// Instructions that count the number of leading zero bits
let TargetPrefix = "x86" in {
@@ -4273,237 +4291,6 @@ let TargetPrefix = "x86" in {
llvm_i8_ty], [IntrNoMem]>;
}
-// VBMI2 Concat & Shift
-let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_vpshld_q_512 :
- GCCBuiltin<"__builtin_ia32_vpshldq512">,
- Intrinsic<[llvm_v8i64_ty],
- [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshld_q_256 :
- GCCBuiltin<"__builtin_ia32_vpshldq256">,
- Intrinsic<[llvm_v4i64_ty],
- [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshld_q_128 :
- GCCBuiltin<"__builtin_ia32_vpshldq128">,
- Intrinsic<[llvm_v2i64_ty],
- [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_vpshld_d_512 :
- GCCBuiltin<"__builtin_ia32_vpshldd512">,
- Intrinsic<[llvm_v16i32_ty],
- [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshld_d_256 :
- GCCBuiltin<"__builtin_ia32_vpshldd256">,
- Intrinsic<[llvm_v8i32_ty],
- [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshld_d_128 :
- GCCBuiltin<"__builtin_ia32_vpshldd128">,
- Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_vpshld_w_512 :
- GCCBuiltin<"__builtin_ia32_vpshldw512">,
- Intrinsic<[llvm_v32i16_ty],
- [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshld_w_256 :
- GCCBuiltin<"__builtin_ia32_vpshldw256">,
- Intrinsic<[llvm_v16i16_ty],
- [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshld_w_128 :
- GCCBuiltin<"__builtin_ia32_vpshldw128">,
- Intrinsic<[llvm_v8i16_ty],
- [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_vpshrd_q_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdq512">,
- Intrinsic<[llvm_v8i64_ty],
- [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshrd_q_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdq256">,
- Intrinsic<[llvm_v4i64_ty],
- [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshrd_q_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdq128">,
- Intrinsic<[llvm_v2i64_ty],
- [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_vpshrd_d_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdd512">,
- Intrinsic<[llvm_v16i32_ty],
- [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshrd_d_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdd256">,
- Intrinsic<[llvm_v8i32_ty],
- [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshrd_d_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdd128">,
- Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_vpshrd_w_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdw512">,
- Intrinsic<[llvm_v32i16_ty],
- [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshrd_w_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdw256">,
- Intrinsic<[llvm_v16i16_ty],
- [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_vpshrd_w_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdw128">,
- Intrinsic<[llvm_v8i16_ty],
- [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpshldv_w_128 :
- GCCBuiltin<"__builtin_ia32_vpshldvw128_mask">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_w_128 :
- GCCBuiltin<"__builtin_ia32_vpshldvw128_maskz">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshldv_w_256 :
- GCCBuiltin<"__builtin_ia32_vpshldvw256_mask">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_w_256 :
- GCCBuiltin<"__builtin_ia32_vpshldvw256_maskz">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshldv_w_512 :
- GCCBuiltin<"__builtin_ia32_vpshldvw512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_w_512 :
- GCCBuiltin<"__builtin_ia32_vpshldvw512_maskz">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpshldv_q_128 :
- GCCBuiltin<"__builtin_ia32_vpshldvq128_mask">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_q_128 :
- GCCBuiltin<"__builtin_ia32_vpshldvq128_maskz">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshldv_q_256 :
- GCCBuiltin<"__builtin_ia32_vpshldvq256_mask">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
- llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_q_256 :
- GCCBuiltin<"__builtin_ia32_vpshldvq256_maskz">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
- llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshldv_q_512 :
- GCCBuiltin<"__builtin_ia32_vpshldvq512_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_q_512 :
- GCCBuiltin<"__builtin_ia32_vpshldvq512_maskz">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpshldv_d_128 :
- GCCBuiltin<"__builtin_ia32_vpshldvd128_mask">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_d_128 :
- GCCBuiltin<"__builtin_ia32_vpshldvd128_maskz">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshldv_d_256 :
- GCCBuiltin<"__builtin_ia32_vpshldvd256_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_d_256 :
- GCCBuiltin<"__builtin_ia32_vpshldvd256_maskz">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshldv_d_512 :
- GCCBuiltin<"__builtin_ia32_vpshldvd512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshldv_d_512 :
- GCCBuiltin<"__builtin_ia32_vpshldvd512_maskz">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpshrdv_w_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdvw128_mask">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_w_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdvw128_maskz">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshrdv_w_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdvw256_mask">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_w_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdvw256_maskz">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshrdv_w_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdvw512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_w_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdvw512_maskz">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpshrdv_q_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdvq128_mask">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_q_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdvq128_maskz">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshrdv_q_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdvq256_mask">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
- llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_q_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdvq256_maskz">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
- llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshrdv_q_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdvq512_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_q_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdvq512_maskz">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpshrdv_d_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdvd128_mask">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_d_128 :
- GCCBuiltin<"__builtin_ia32_vpshrdvd128_maskz">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshrdv_d_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdvd256_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_d_256 :
- GCCBuiltin<"__builtin_ia32_vpshrdvd256_maskz">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpshrdv_d_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdvd512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpshrdv_d_512 :
- GCCBuiltin<"__builtin_ia32_vpshrdvd512_maskz">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-}
-
// truncate
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_pmov_qb_128 :
diff --git a/contrib/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm/include/llvm/IR/LLVMContext.h
index ebd445553167..bd7097b39a3e 100644
--- a/contrib/llvm/include/llvm/IR/LLVMContext.h
+++ b/contrib/llvm/include/llvm/IR/LLVMContext.h
@@ -102,6 +102,7 @@ public:
MD_associated = 22, // "associated"
MD_callees = 23, // "callees"
MD_irr_loop = 24, // "irr_loop"
+ MD_access_group = 25, // "llvm.access.group"
};
/// Known operand bundle tag IDs, which always have the same value. All
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManager.h b/contrib/llvm/include/llvm/IR/LegacyPassManager.h
index 9a376a151505..5257a0eed488 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassManager.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManager.h
@@ -98,9 +98,6 @@ private:
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_STDCXX_CONVERSION_FUNCTIONS(legacy::PassManagerBase, LLVMPassManagerRef)
-/// If -time-passes has been specified, report the timings immediately and then
-/// reset the timers to zero.
-void reportAndResetTimings();
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
index f6752f2817ba..51a2eb2a146d 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
@@ -406,11 +406,23 @@ public:
/// Set the initial size of the module if the user has specified that they
/// want remarks for size.
/// Returns 0 if the remark was not requested.
- unsigned initSizeRemarkInfo(Module &M);
+ unsigned initSizeRemarkInfo(
+ Module &M,
+ StringMap<std::pair<unsigned, unsigned>> &FunctionToInstrCount);
/// Emit a remark signifying that the number of IR instructions in the module
/// changed.
- void emitInstrCountChangedRemark(Pass *P, Module &M, unsigned CountBefore);
+ /// \p F is optionally passed by passes which run on Functions, and thus
+ /// always know whether or not a non-empty function is available.
+ ///
+ /// \p FunctionToInstrCount maps the name of a \p Function to a pair. The
+ /// first member of the pair is the IR count of the \p Function before running
+ /// \p P, and the second member is the IR count of the \p Function after
+ /// running \p P.
+ void emitInstrCountChangedRemark(
+ Pass *P, Module &M, int64_t Delta, unsigned CountBefore,
+ StringMap<std::pair<unsigned, unsigned>> &FunctionToInstrCount,
+ Function *F = nullptr);
protected:
// Top level manager.
@@ -508,7 +520,6 @@ public:
}
};
-Timer *getPassTimer(Pass *);
}
#endif
diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h
index 9ac97f4224ac..be82c4efc115 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm/include/llvm/IR/Metadata.h
@@ -66,9 +66,11 @@ protected:
enum StorageType { Uniqued, Distinct, Temporary };
/// Storage flag for non-uniqued, otherwise unowned, metadata.
- unsigned char Storage;
+ unsigned char Storage : 7;
// TODO: expose remaining bits to subclasses.
+ unsigned char ImplicitCode : 1;
+
unsigned short SubclassData16 = 0;
unsigned SubclassData32 = 0;
@@ -80,7 +82,7 @@ public:
protected:
Metadata(unsigned ID, StorageType Storage)
- : SubclassID(ID), Storage(Storage) {
+ : SubclassID(ID), Storage(Storage), ImplicitCode(false) {
static_assert(sizeof(*this) == 8, "Metadata fields poorly packed");
}
@@ -1316,10 +1318,11 @@ public:
//===----------------------------------------------------------------------===//
/// A tuple of MDNodes.
///
-/// Despite its name, a NamedMDNode isn't itself an MDNode. NamedMDNodes belong
-/// to modules, have names, and contain lists of MDNodes.
+/// Despite its name, a NamedMDNode isn't itself an MDNode.
+///
+/// NamedMDNodes are named module-level entities that contain lists of MDNodes.
///
-/// TODO: Inherit from Metadata.
+/// It is illegal for a NamedMDNode to appear as an operand of an MDNode.
class NamedMDNode : public ilist_node<NamedMDNode> {
friend class LLVMContextImpl;
friend class Module;
@@ -1420,6 +1423,9 @@ public:
}
};
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_ISA_CONVERSION_FUNCTIONS(NamedMDNode, LLVMNamedMDNodeRef)
+
} // end namespace llvm
#endif // LLVM_IR_METADATA_H
diff --git a/contrib/llvm/include/llvm/IR/Module.h b/contrib/llvm/include/llvm/IR/Module.h
index a405f7df3efe..9ef35f1f73cd 100644
--- a/contrib/llvm/include/llvm/IR/Module.h
+++ b/contrib/llvm/include/llvm/IR/Module.h
@@ -16,6 +16,7 @@
#define LLVM_IR_MODULE_H
#include "llvm-c/Types.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -48,6 +49,7 @@ class MemoryBuffer;
class RandomNumberGenerator;
template <class PtrType> class SmallPtrSetImpl;
class StructType;
+class VersionTuple;
/// A Module instance is used to store all the information related to an
/// LLVM module. Modules are the top level container of all other LLVM
@@ -365,6 +367,11 @@ public:
return getOrInsertFunction(Name, AttributeList{}, RetTy, Args...);
}
+ // Avoid an incorrect ordering that'd otherwise compile incorrectly.
+ template <typename... ArgsTy>
+ Constant *getOrInsertFunction(StringRef Name, AttributeList AttributeList,
+ FunctionType *Invalid, ArgsTy... Args) = delete;
+
/// Look up the specified function in the module symbol table. If it does not
/// exist, return null.
Function *getFunction(StringRef Name) const;
@@ -401,11 +408,15 @@ public:
}
/// Look up the specified global in the module symbol table.
- /// 1. If it does not exist, add a declaration of the global and return it.
- /// 2. Else, the global exists but has the wrong type: return the function
- /// with a constantexpr cast to the right type.
- /// 3. Finally, if the existing global is the correct declaration, return
- /// the existing global.
+ /// If it does not exist, invoke a callback to create a declaration of the
+ /// global and return it. The global is constantexpr casted to the expected
+ /// type if necessary.
+ Constant *
+ getOrInsertGlobal(StringRef Name, Type *Ty,
+ function_ref<GlobalVariable *()> CreateGlobalCallback);
+
+ /// Look up the specified global in the module symbol table. If required, this
+ /// overload constructs the global variable using its constructor's defaults.
Constant *getOrInsertGlobal(StringRef Name, Type *Ty);
/// @}
@@ -840,6 +851,17 @@ public:
void setPIELevel(PIELevel::Level PL);
/// @}
+ /// @}
+ /// @name Utility function for querying and setting code model
+ /// @{
+
+ /// Returns the code model (tiny, small, kernel, medium or large model)
+ Optional<CodeModel::Model> getCodeModel() const;
+
+ /// Set the code model (tiny, small, kernel, medium or large)
+ void setCodeModel(CodeModel::Model CL);
+ /// @}
+
/// @name Utility functions for querying and setting PGO summary
/// @{
@@ -856,6 +878,17 @@ public:
/// Set that PLT should be avoid for RTLib calls.
void setRtLibUseGOT();
+ /// @name Utility functions for querying and setting the build SDK version
+ /// @{
+
+ /// Attach a build SDK version metadata to this module.
+ void setSDKVersion(const VersionTuple &V);
+
+ /// Get the build SDK version metadata.
+ ///
+ /// An empty version is returned if no such metadata is attached.
+ VersionTuple getSDKVersion() const;
+ /// @}
/// Take ownership of the given memory buffer.
void setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB);
diff --git a/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h b/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
index fdf3d4b5f1ce..a1acee494475 100644
--- a/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Allocator.h"
@@ -99,6 +100,22 @@ struct CalleeInfo {
}
};
+inline const char *getHotnessName(CalleeInfo::HotnessType HT) {
+ switch (HT) {
+ case CalleeInfo::HotnessType::Unknown:
+ return "unknown";
+ case CalleeInfo::HotnessType::Cold:
+ return "cold";
+ case CalleeInfo::HotnessType::None:
+ return "none";
+ case CalleeInfo::HotnessType::Hot:
+ return "hot";
+ case CalleeInfo::HotnessType::Critical:
+ return "critical";
+ }
+ llvm_unreachable("invalid hotness");
+}
+
class GlobalValueSummary;
using GlobalValueSummaryList = std::vector<std::unique_ptr<GlobalValueSummary>>;
@@ -146,13 +163,13 @@ using GlobalValueSummaryMapTy =
/// Struct that holds a reference to a particular GUID in a global value
/// summary.
struct ValueInfo {
- PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 1, bool>
- RefAndFlag;
+ PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 2, int>
+ RefAndFlags;
ValueInfo() = default;
ValueInfo(bool HaveGVs, const GlobalValueSummaryMapTy::value_type *R) {
- RefAndFlag.setPointer(R);
- RefAndFlag.setInt(HaveGVs);
+ RefAndFlags.setPointer(R);
+ RefAndFlags.setInt(HaveGVs);
}
operator bool() const { return getRef(); }
@@ -172,10 +189,12 @@ struct ValueInfo {
: getRef()->second.U.Name;
}
- bool haveGVs() const { return RefAndFlag.getInt(); }
+ bool haveGVs() const { return RefAndFlags.getInt() & 0x1; }
+ bool isReadOnly() const { return RefAndFlags.getInt() & 0x2; }
+ void setReadOnly() { RefAndFlags.setInt(RefAndFlags.getInt() | 0x2); }
const GlobalValueSummaryMapTy::value_type *getRef() const {
- return RefAndFlag.getPointer();
+ return RefAndFlags.getPointer();
}
bool isDSOLocal() const;
@@ -391,6 +410,7 @@ public:
return const_cast<GlobalValueSummary &>(
static_cast<const AliasSummary *>(this)->getAliasee());
}
+ bool hasAliaseeGUID() const { return AliaseeGUID != 0; }
const GlobalValue::GUID &getAliaseeGUID() const {
assert(AliaseeGUID && "Unexpected missing aliasee GUID");
return AliaseeGUID;
@@ -460,13 +480,17 @@ public:
TypeCheckedLoadConstVCalls;
};
- /// Function attribute flags. Used to track if a function accesses memory,
- /// recurses or aliases.
+ /// Flags specific to function summaries.
struct FFlags {
+ // Function attribute flags. Used to track if a function accesses memory,
+ // recurses or aliases.
unsigned ReadNone : 1;
unsigned ReadOnly : 1;
unsigned NoRecurse : 1;
unsigned ReturnDoesNotAlias : 1;
+
+ // Indicate if the global value cannot be inlined.
+ unsigned NoInline : 1;
};
/// Create an empty FunctionSummary (with specified call edges).
@@ -477,8 +501,9 @@ public:
FunctionSummary::GVFlags(
GlobalValue::LinkageTypes::AvailableExternallyLinkage,
/*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
- 0, FunctionSummary::FFlags{}, std::vector<ValueInfo>(),
- std::move(Edges), std::vector<GlobalValue::GUID>(),
+ /*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
+ std::vector<ValueInfo>(), std::move(Edges),
+ std::vector<GlobalValue::GUID>(),
std::vector<FunctionSummary::VFuncId>(),
std::vector<FunctionSummary::VFuncId>(),
std::vector<FunctionSummary::ConstVCall>(),
@@ -493,10 +518,14 @@ private:
/// during the initial compile step when the summary index is first built.
unsigned InstCount;
- /// Function attribute flags. Used to track if a function accesses memory,
- /// recurses or aliases.
+ /// Function summary specific flags.
FFlags FunFlags;
+ /// The synthesized entry count of the function.
+ /// This is only populated during ThinLink phase and remains unused while
+ /// generating per-module summaries.
+ uint64_t EntryCount = 0;
+
/// List of <CalleeValueInfo, CalleeInfo> call edge pairs from this function.
std::vector<EdgeTy> CallGraphEdgeList;
@@ -504,14 +533,15 @@ private:
public:
FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
- std::vector<ValueInfo> Refs, std::vector<EdgeTy> CGEdges,
+ uint64_t EntryCount, std::vector<ValueInfo> Refs,
+ std::vector<EdgeTy> CGEdges,
std::vector<GlobalValue::GUID> TypeTests,
std::vector<VFuncId> TypeTestAssumeVCalls,
std::vector<VFuncId> TypeCheckedLoadVCalls,
std::vector<ConstVCall> TypeTestAssumeConstVCalls,
std::vector<ConstVCall> TypeCheckedLoadConstVCalls)
: GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
- InstCount(NumInsts), FunFlags(FunFlags),
+ InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
CallGraphEdgeList(std::move(CGEdges)) {
if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() ||
!TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() ||
@@ -522,18 +552,26 @@ public:
std::move(TypeTestAssumeConstVCalls),
std::move(TypeCheckedLoadConstVCalls)});
}
+ // Gets the number of immutable refs in RefEdgeList
+ unsigned immutableRefCount() const;
/// Check if this is a function summary.
static bool classof(const GlobalValueSummary *GVS) {
return GVS->getSummaryKind() == FunctionKind;
}
- /// Get function attribute flags.
+ /// Get function summary flags.
FFlags fflags() const { return FunFlags; }
/// Get the instruction count recorded for this function.
unsigned instCount() const { return InstCount; }
+ /// Get the synthetic entry count for this function.
+ uint64_t entryCount() const { return EntryCount; }
+
+ /// Set the synthetic entry count for this function.
+ void setEntryCount(uint64_t EC) { EntryCount = EC; }
+
/// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
@@ -631,19 +669,30 @@ template <> struct DenseMapInfo<FunctionSummary::ConstVCall> {
/// Global variable summary information to aid decisions and
/// implementation of importing.
///
-/// Currently this doesn't add anything to the base \p GlobalValueSummary,
-/// but is a placeholder as additional info may be added to the summary
-/// for variables.
+/// Global variable summary has extra flag, telling if it is
+/// modified during the program run or not. This affects ThinLTO
+/// internalization
class GlobalVarSummary : public GlobalValueSummary {
-
public:
- GlobalVarSummary(GVFlags Flags, std::vector<ValueInfo> Refs)
- : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)) {}
+ struct GVarFlags {
+ GVarFlags(bool ReadOnly = false) : ReadOnly(ReadOnly) {}
+
+ unsigned ReadOnly : 1;
+ } VarFlags;
+
+ GlobalVarSummary(GVFlags Flags, GVarFlags VarFlags,
+ std::vector<ValueInfo> Refs)
+ : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)),
+ VarFlags(VarFlags) {}
/// Check if this is a global variable summary.
static bool classof(const GlobalValueSummary *GVS) {
return GVS->getSummaryKind() == GlobalVarKind;
}
+
+ GVarFlags varflags() const { return VarFlags; }
+ void setReadOnly(bool RO) { VarFlags.ReadOnly = RO; }
+ bool isReadOnly() const { return VarFlags.ReadOnly; }
};
struct TypeTestResolution {
@@ -737,6 +786,11 @@ using ModulePathStringTableTy = StringMap<std::pair<uint64_t, ModuleHash>>;
/// a particular module, and provide efficient access to their summary.
using GVSummaryMapTy = DenseMap<GlobalValue::GUID, GlobalValueSummary *>;
+/// Map of a type GUID to type id string and summary (multimap used
+/// in case of GUID conflicts).
+using TypeIdSummaryMapTy =
+ std::multimap<GlobalValue::GUID, std::pair<std::string, TypeIdSummary>>;
+
/// Class to hold module path string table and global value map,
/// and encapsulate methods for operating on them.
class ModuleSummaryIndex {
@@ -748,9 +802,9 @@ private:
/// Holds strings for combined index, mapping to the corresponding module ID.
ModulePathStringTableTy ModulePathStringTable;
- /// Mapping from type identifiers to summary information for that type
- /// identifier.
- std::map<std::string, TypeIdSummary> TypeIdMap;
+ /// Mapping from type identifier GUIDs to type identifier and its summary
+ /// information.
+ TypeIdSummaryMapTy TypeIdMap;
/// Mapping from original ID to GUID. If original ID can map to multiple
/// GUIDs, it will be mapped to 0.
@@ -761,6 +815,9 @@ private:
/// considered live.
bool WithGlobalValueDeadStripping = false;
+ /// Indicates that summary-based synthetic entry count propagation has run
+ bool HasSyntheticEntryCounts = false;
+
/// Indicates that distributed backend should skip compilation of the
/// module. Flag is suppose to be set by distributed ThinLTO indexing
/// when it detected that the module is not needed during the final
@@ -774,6 +831,13 @@ private:
/// union.
bool HaveGVs;
+ // True if the index was created for a module compiled with -fsplit-lto-unit.
+ bool EnableSplitLTOUnit;
+
+ // True if some of the modules were compiled with -fsplit-lto-unit and
+ // some were not. Set when the combined index is created during the thin link.
+ bool PartiallySplitLTOUnits = false;
+
std::set<std::string> CfiFunctionDefs;
std::set<std::string> CfiFunctionDecls;
@@ -793,7 +857,9 @@ private:
public:
// See HaveGVs variable comment.
- ModuleSummaryIndex(bool HaveGVs) : HaveGVs(HaveGVs), Saver(Alloc) {}
+ ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false)
+ : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), Saver(Alloc) {
+ }
bool haveGVs() const { return HaveGVs; }
@@ -873,6 +939,9 @@ public:
WithGlobalValueDeadStripping = true;
}
+ bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
+ void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
+
bool skipModuleByDistributedBackend() const {
return SkipModuleByDistributedBackend;
}
@@ -880,6 +949,12 @@ public:
SkipModuleByDistributedBackend = true;
}
+ bool enableSplitLTOUnit() const { return EnableSplitLTOUnit; }
+ void setEnableSplitLTOUnit() { EnableSplitLTOUnit = true; }
+
+ bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; }
+ void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; }
+
bool isGlobalValueLive(const GlobalValueSummary *GVS) const {
return !WithGlobalValueDeadStripping || GVS->isLive();
}
@@ -905,7 +980,7 @@ public:
// Save a string in the Index. Use before passing Name to
// getOrInsertValueInfo when the string isn't owned elsewhere (e.g. on the
// module's Strtab).
- StringRef saveString(std::string String) { return Saver.save(String); }
+ StringRef saveString(StringRef String) { return Saver.save(String); }
/// Return a ValueInfo for \p GUID setting value \p Name.
ValueInfo getOrInsertValueInfo(GlobalValue::GUID GUID, StringRef Name) {
@@ -1063,23 +1138,29 @@ public:
return ModulePathStringTable.count(M.getModuleIdentifier());
}
- const std::map<std::string, TypeIdSummary> &typeIds() const {
- return TypeIdMap;
- }
+ const TypeIdSummaryMapTy &typeIds() const { return TypeIdMap; }
- /// This accessor should only be used when exporting because it can mutate the
- /// map.
+ /// Return an existing or new TypeIdSummary entry for \p TypeId.
+ /// This accessor can mutate the map and therefore should not be used in
+ /// the ThinLTO backends.
TypeIdSummary &getOrInsertTypeIdSummary(StringRef TypeId) {
- return TypeIdMap[TypeId];
+ auto TidIter = TypeIdMap.equal_range(GlobalValue::getGUID(TypeId));
+ for (auto It = TidIter.first; It != TidIter.second; ++It)
+ if (It->second.first == TypeId)
+ return It->second.second;
+ auto It = TypeIdMap.insert(
+ {GlobalValue::getGUID(TypeId), {TypeId, TypeIdSummary()}});
+ return It->second.second;
}
/// This returns either a pointer to the type id summary (if present in the
/// summary map) or null (if not present). This may be used when importing.
const TypeIdSummary *getTypeIdSummary(StringRef TypeId) const {
- auto I = TypeIdMap.find(TypeId);
- if (I == TypeIdMap.end())
- return nullptr;
- return &I->second;
+ auto TidIter = TypeIdMap.equal_range(GlobalValue::getGUID(TypeId));
+ for (auto It = TidIter.first; It != TidIter.second; ++It)
+ if (It->second.first == TypeId)
+ return &It->second.second;
+ return nullptr;
}
/// Collect for the given module the list of functions it defines
@@ -1103,11 +1184,15 @@ public:
/// Print out strongly connected components for debugging.
void dumpSCCs(raw_ostream &OS);
+
+ /// Analyze index and detect unmodified globals
+ void propagateConstants(const DenseSet<GlobalValue::GUID> &PreservedSymbols);
};
/// GraphTraits definition to build SCC for the index
template <> struct GraphTraits<ValueInfo> {
typedef ValueInfo NodeRef;
+ using EdgeRef = FunctionSummary::EdgeTy &;
static NodeRef valueInfoFromEdge(FunctionSummary::EdgeTy &P) {
return P.first;
@@ -1116,6 +1201,8 @@ template <> struct GraphTraits<ValueInfo> {
mapped_iterator<std::vector<FunctionSummary::EdgeTy>::iterator,
decltype(&valueInfoFromEdge)>;
+ using ChildEdgeIteratorType = std::vector<FunctionSummary::EdgeTy>::iterator;
+
static NodeRef getEntryNode(ValueInfo V) { return V; }
static ChildIteratorType child_begin(NodeRef N) {
@@ -1137,6 +1224,26 @@ template <> struct GraphTraits<ValueInfo> {
cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
return ChildIteratorType(F->CallGraphEdgeList.end(), &valueInfoFromEdge);
}
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ if (!N.getSummaryList().size()) // handle external function
+ return FunctionSummary::ExternalNode.CallGraphEdgeList.begin();
+
+ FunctionSummary *F =
+ cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+ return F->CallGraphEdgeList.begin();
+ }
+
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+ if (!N.getSummaryList().size()) // handle external function
+ return FunctionSummary::ExternalNode.CallGraphEdgeList.end();
+
+ FunctionSummary *F =
+ cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+ return F->CallGraphEdgeList.end();
+ }
+
+ static NodeRef edge_dest(EdgeRef E) { return E.first; }
};
template <>
@@ -1152,6 +1259,14 @@ struct GraphTraits<ModuleSummaryIndex *> : public GraphTraits<ValueInfo> {
}
};
+static inline bool canImportGlobalVar(GlobalValueSummary *S) {
+ assert(isa<GlobalVarSummary>(S->getBaseObject()));
+
+ // We don't import GV with references, because it can result
+ // in promotion of local variables in the source module.
+ return !GlobalValue::isInterposableLinkage(S->linkage()) &&
+ !S->notEligibleToImport() && S->refs().empty();
+}
} // end namespace llvm
#endif // LLVM_IR_MODULESUMMARYINDEX_H
diff --git a/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 1b339ab32cf1..a88ee26b51c3 100644
--- a/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -195,7 +195,6 @@ template <> struct MappingTraits<FunctionSummaryYaml> {
} // End yaml namespace
} // End llvm namespace
-LLVM_YAML_IS_STRING_MAP(TypeIdSummary)
LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml)
namespace llvm {
@@ -225,7 +224,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
GlobalValueSummary::GVFlags(
static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
- 0, FunctionSummary::FFlags{}, Refs,
+ /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs,
ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
std::move(FSum.TypeTestAssumeVCalls),
std::move(FSum.TypeCheckedLoadVCalls),
@@ -258,6 +257,18 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
}
};
+template <> struct CustomMappingTraits<TypeIdSummaryMapTy> {
+ static void inputOne(IO &io, StringRef Key, TypeIdSummaryMapTy &V) {
+ TypeIdSummary TId;
+ io.mapRequired(Key.str().c_str(), TId);
+ V.insert({GlobalValue::getGUID(Key), {Key, TId}});
+ }
+ static void output(IO &io, TypeIdSummaryMapTy &V) {
+ for (auto TidIter = V.begin(); TidIter != V.end(); TidIter++)
+ io.mapRequired(TidIter->second.first.c_str(), TidIter->second.second);
+ }
+};
+
template <> struct MappingTraits<ModuleSummaryIndex> {
static void mapping(IO &io, ModuleSummaryIndex& index) {
io.mapOptional("GlobalValueMap", index.GlobalValueMap);
diff --git a/contrib/llvm/include/llvm/IR/Operator.h b/contrib/llvm/include/llvm/IR/Operator.h
index 939cec7f4aa4..6b387bbcccb1 100644
--- a/contrib/llvm/include/llvm/IR/Operator.h
+++ b/contrib/llvm/include/llvm/IR/Operator.h
@@ -364,19 +364,26 @@ public:
/// precision.
float getFPAccuracy() const;
- static bool classof(const Instruction *I) {
- return I->getType()->isFPOrFPVectorTy() ||
- I->getOpcode() == Instruction::FCmp;
- }
-
- static bool classof(const ConstantExpr *CE) {
- return CE->getType()->isFPOrFPVectorTy() ||
- CE->getOpcode() == Instruction::FCmp;
- }
-
static bool classof(const Value *V) {
- return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
- (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+ unsigned Opcode;
+ if (auto *I = dyn_cast<Instruction>(V))
+ Opcode = I->getOpcode();
+ else if (auto *CE = dyn_cast<ConstantExpr>(V))
+ Opcode = CE->getOpcode();
+ else
+ return false;
+
+ switch (Opcode) {
+ case Instruction::FCmp:
+ return true;
+ // non math FP Operators (no FMF)
+ case Instruction::ExtractElement:
+ case Instruction::ShuffleVector:
+ case Instruction::InsertElement:
+ return false;
+ default:
+ return V->getType()->isFPOrFPVectorTy();
+ }
}
};
diff --git a/contrib/llvm/include/llvm/IR/PassInstrumentation.h b/contrib/llvm/include/llvm/IR/PassInstrumentation.h
new file mode 100644
index 000000000000..08dac1c4a274
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/PassInstrumentation.h
@@ -0,0 +1,207 @@
+//===- llvm/IR/PassInstrumentation.h ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Pass Instrumentation classes that provide
+/// instrumentation points into the pass execution by PassManager.
+///
+/// There are two main classes:
+/// - PassInstrumentation provides a set of instrumentation points for
+/// pass managers to call on.
+///
+/// - PassInstrumentationCallbacks registers callbacks and provides access
+/// to them for PassInstrumentation.
+///
+/// PassInstrumentation object is being used as a result of
+/// PassInstrumentationAnalysis (so it is intended to be easily copyable).
+///
+/// Intended scheme of use for Pass Instrumentation is as follows:
+/// - register instrumentation callbacks in PassInstrumentationCallbacks
+/// instance. PassBuilder provides helper for that.
+///
+/// - register PassInstrumentationAnalysis with all the PassManagers.
+/// PassBuilder handles that automatically when registering analyses.
+///
+/// - Pass Manager requests PassInstrumentationAnalysis from analysis manager
+/// and gets PassInstrumentation as its result.
+///
+/// - Pass Manager invokes PassInstrumentation entry points appropriately,
+/// passing StringRef identification ("name") of the pass currently being
+/// executed and IRUnit it works on. There can be different schemes of
+/// providing names in future, currently it is just a name() of the pass.
+///
+/// - PassInstrumentation wraps address of IRUnit into llvm::Any and passes
+/// control to all the registered callbacks. Note that we specifically wrap
+/// 'const IRUnitT*' so as to avoid any accidental changes to IR in
+/// instrumenting callbacks.
+///
+/// - Some instrumentation points (BeforePass) allow to control execution
+/// of a pass. For those callbacks returning false means pass will not be
+/// executed.
+///
+/// TODO: currently there is no way for a pass to opt-out of execution control
+/// (e.g. become unskippable). PassManager is the only entity that determines
+/// how pass instrumentation affects pass execution.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_PASSINSTRUMENTATION_H
+#define LLVM_IR_PASSINSTRUMENTATION_H
+
+#include "llvm/ADT/Any.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/TypeName.h"
+#include <type_traits>
+
+namespace llvm {
+
+class PreservedAnalyses;
+
+/// This class manages callbacks registration, as well as provides a way for
+/// PassInstrumentation to pass control to the registered callbacks.
+class PassInstrumentationCallbacks {
+public:
+ // Before/After callbacks accept IRUnits whenever appropriate, so they need
+ // to take them as constant pointers, wrapped with llvm::Any.
+ // For the case when IRUnit has been invalidated there is a different
+ // callback to use - AfterPassInvalidated.
+ // TODO: currently AfterPassInvalidated does not accept IRUnit, since passing
+ // already invalidated IRUnit is unsafe. There are ways to handle invalidated IRUnits
+ // in a safe way, and we might pursue that as soon as there is a useful instrumentation
+ // that needs it.
+ using BeforePassFunc = bool(StringRef, Any);
+ using AfterPassFunc = void(StringRef, Any);
+ using AfterPassInvalidatedFunc = void(StringRef);
+ using BeforeAnalysisFunc = void(StringRef, Any);
+ using AfterAnalysisFunc = void(StringRef, Any);
+
+public:
+ PassInstrumentationCallbacks() {}
+
+ /// Copying PassInstrumentationCallbacks is not intended.
+ PassInstrumentationCallbacks(const PassInstrumentationCallbacks &) = delete;
+ void operator=(const PassInstrumentationCallbacks &) = delete;
+
+ template <typename CallableT> void registerBeforePassCallback(CallableT C) {
+ BeforePassCallbacks.emplace_back(std::move(C));
+ }
+
+ template <typename CallableT> void registerAfterPassCallback(CallableT C) {
+ AfterPassCallbacks.emplace_back(std::move(C));
+ }
+
+ template <typename CallableT>
+ void registerAfterPassInvalidatedCallback(CallableT C) {
+ AfterPassInvalidatedCallbacks.emplace_back(std::move(C));
+ }
+
+ template <typename CallableT>
+ void registerBeforeAnalysisCallback(CallableT C) {
+ BeforeAnalysisCallbacks.emplace_back(std::move(C));
+ }
+
+ template <typename CallableT>
+ void registerAfterAnalysisCallback(CallableT C) {
+ AfterAnalysisCallbacks.emplace_back(std::move(C));
+ }
+
+private:
+ friend class PassInstrumentation;
+
+ SmallVector<llvm::unique_function<BeforePassFunc>, 4> BeforePassCallbacks;
+ SmallVector<llvm::unique_function<AfterPassFunc>, 4> AfterPassCallbacks;
+ SmallVector<llvm::unique_function<AfterPassInvalidatedFunc>, 4>
+ AfterPassInvalidatedCallbacks;
+ SmallVector<llvm::unique_function<BeforeAnalysisFunc>, 4>
+ BeforeAnalysisCallbacks;
+ SmallVector<llvm::unique_function<AfterAnalysisFunc>, 4>
+ AfterAnalysisCallbacks;
+};
+
+/// This class provides instrumentation entry points for the Pass Manager,
+/// doing calls to callbacks registered in PassInstrumentationCallbacks.
+class PassInstrumentation {
+ PassInstrumentationCallbacks *Callbacks;
+
+public:
+ /// Callbacks object is not owned by PassInstrumentation, its life-time
+ /// should at least match the life-time of corresponding
+ /// PassInstrumentationAnalysis (which usually is till the end of current
+ /// compilation).
+ PassInstrumentation(PassInstrumentationCallbacks *CB = nullptr)
+ : Callbacks(CB) {}
+
+ /// BeforePass instrumentation point - takes \p Pass instance to be executed
+ /// and constant reference to IR it operates on. \Returns true if pass is
+ /// allowed to be executed.
+ template <typename IRUnitT, typename PassT>
+ bool runBeforePass(const PassT &Pass, const IRUnitT &IR) const {
+ if (!Callbacks)
+ return true;
+
+ bool ShouldRun = true;
+ for (auto &C : Callbacks->BeforePassCallbacks)
+ ShouldRun &= C(Pass.name(), llvm::Any(&IR));
+ return ShouldRun;
+ }
+
+ /// AfterPass instrumentation point - takes \p Pass instance that has
+ /// just been executed and constant reference to \p IR it operates on.
+ /// \p IR is guaranteed to be valid at this point.
+ template <typename IRUnitT, typename PassT>
+ void runAfterPass(const PassT &Pass, const IRUnitT &IR) const {
+ if (Callbacks)
+ for (auto &C : Callbacks->AfterPassCallbacks)
+ C(Pass.name(), llvm::Any(&IR));
+ }
+
+ /// AfterPassInvalidated instrumentation point - takes \p Pass instance
+ /// that has just been executed. For use when IR has been invalidated
+ /// by \p Pass execution.
+ template <typename IRUnitT, typename PassT>
+ void runAfterPassInvalidated(const PassT &Pass) const {
+ if (Callbacks)
+ for (auto &C : Callbacks->AfterPassInvalidatedCallbacks)
+ C(Pass.name());
+ }
+
+ /// BeforeAnalysis instrumentation point - takes \p Analysis instance
+ /// to be executed and constant reference to IR it operates on.
+ template <typename IRUnitT, typename PassT>
+ void runBeforeAnalysis(const PassT &Analysis, const IRUnitT &IR) const {
+ if (Callbacks)
+ for (auto &C : Callbacks->BeforeAnalysisCallbacks)
+ C(Analysis.name(), llvm::Any(&IR));
+ }
+
+ /// AfterAnalysis instrumentation point - takes \p Analysis instance
+ /// that has just been executed and constant reference to IR it operated on.
+ template <typename IRUnitT, typename PassT>
+ void runAfterAnalysis(const PassT &Analysis, const IRUnitT &IR) const {
+ if (Callbacks)
+ for (auto &C : Callbacks->AfterAnalysisCallbacks)
+ C(Analysis.name(), llvm::Any(&IR));
+ }
+
+ /// Handle invalidation from the pass manager when PassInstrumentation
+ /// is used as the result of PassInstrumentationAnalysis.
+ ///
+ /// On attempt to invalidate just return false. There is nothing to become
+ /// invalid here.
+ template <typename IRUnitT, typename... ExtraArgsT>
+ bool invalidate(IRUnitT &, const class llvm::PreservedAnalyses &,
+ ExtraArgsT...) {
+ return false;
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/IR/PassManager.h b/contrib/llvm/include/llvm/IR/PassManager.h
index a5d4aaf71c0e..738a2242eea0 100644
--- a/contrib/llvm/include/llvm/IR/PassManager.h
+++ b/contrib/llvm/include/llvm/IR/PassManager.h
@@ -44,6 +44,7 @@
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManagerInternal.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TypeName.h"
@@ -402,6 +403,43 @@ struct AnalysisInfoMixin : PassInfoMixin<DerivedT> {
}
};
+namespace detail {
+
+/// Actual unpacker of extra arguments in getAnalysisResult,
+/// passes only those tuple arguments that are mentioned in index_sequence.
+template <typename PassT, typename IRUnitT, typename AnalysisManagerT,
+ typename... ArgTs, size_t... Ns>
+typename PassT::Result
+getAnalysisResultUnpackTuple(AnalysisManagerT &AM, IRUnitT &IR,
+ std::tuple<ArgTs...> Args,
+ llvm::index_sequence<Ns...>) {
+ (void)Args;
+ return AM.template getResult<PassT>(IR, std::get<Ns>(Args)...);
+}
+
+/// Helper for *partial* unpacking of extra arguments in getAnalysisResult.
+///
+/// Arguments passed in tuple come from PassManager, so they might have extra
+/// arguments after those AnalysisManager's ExtraArgTs ones that we need to
+/// pass to getResult.
+template <typename PassT, typename IRUnitT, typename... AnalysisArgTs,
+ typename... MainArgTs>
+typename PassT::Result
+getAnalysisResult(AnalysisManager<IRUnitT, AnalysisArgTs...> &AM, IRUnitT &IR,
+ std::tuple<MainArgTs...> Args) {
+ return (getAnalysisResultUnpackTuple<
+ PassT, IRUnitT>)(AM, IR, Args,
+ llvm::index_sequence_for<AnalysisArgTs...>{});
+}
+
+} // namespace detail
+
+// Forward declare the pass instrumentation analysis explicitly queried in
+// generic PassManager code.
+// FIXME: figure out a way to move PassInstrumentationAnalysis into its own
+// header.
+class PassInstrumentationAnalysis;
+
/// Manages a sequence of passes over a particular unit of IR.
///
/// A pass manager contains a sequence of passes to run over a particular unit
@@ -445,15 +483,34 @@ public:
ExtraArgTs... ExtraArgs) {
PreservedAnalyses PA = PreservedAnalyses::all();
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ // Here we use std::tuple wrapper over getResult which helps to extract
+ // AnalysisManager's arguments out of the whole ExtraArgs set.
+ PassInstrumentation PI =
+ detail::getAnalysisResult<PassInstrumentationAnalysis>(
+ AM, IR, std::tuple<ExtraArgTs...>(ExtraArgs...));
+
if (DebugLogging)
dbgs() << "Starting " << getTypeName<IRUnitT>() << " pass manager run.\n";
for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
+ auto *P = Passes[Idx].get();
if (DebugLogging)
- dbgs() << "Running pass: " << Passes[Idx]->name() << " on "
- << IR.getName() << "\n";
+ dbgs() << "Running pass: " << P->name() << " on " << IR.getName()
+ << "\n";
- PreservedAnalyses PassPA = Passes[Idx]->run(IR, AM, ExtraArgs...);
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns
+ // false).
+ if (!PI.runBeforePass<IRUnitT>(*P, IR))
+ continue;
+
+ PreservedAnalyses PassPA = P->run(IR, AM, ExtraArgs...);
+
+ // Call onto PassInstrumentation's AfterPass callbacks immediately after
+ // running the pass.
+ PI.runAfterPass<IRUnitT>(*P, IR);
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
@@ -510,6 +567,32 @@ extern template class PassManager<Function>;
/// Convenience typedef for a pass manager over functions.
using FunctionPassManager = PassManager<Function>;
+/// Pseudo-analysis pass that exposes the \c PassInstrumentation to pass
+/// managers. Goes before AnalysisManager definition to provide its
+/// internals (e.g PassInstrumentationAnalysis::ID) for use there if needed.
+/// FIXME: figure out a way to move PassInstrumentationAnalysis into its own
+/// header.
+class PassInstrumentationAnalysis
+ : public AnalysisInfoMixin<PassInstrumentationAnalysis> {
+ friend AnalysisInfoMixin<PassInstrumentationAnalysis>;
+ static AnalysisKey Key;
+
+ PassInstrumentationCallbacks *Callbacks;
+
+public:
+ /// PassInstrumentationCallbacks object is shared, owned by something else,
+ /// not this analysis.
+ PassInstrumentationAnalysis(PassInstrumentationCallbacks *Callbacks = nullptr)
+ : Callbacks(Callbacks) {}
+
+ using Result = PassInstrumentation;
+
+ template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
+ Result run(IRUnitT &, AnalysisManagerT &, ExtraArgTs &&...) {
+ return PassInstrumentation(Callbacks);
+ }
+};
+
/// A container for analyses that lazily runs them and caches their
/// results.
///
@@ -860,9 +943,18 @@ private:
if (DebugLogging)
dbgs() << "Running analysis: " << P.name() << " on " << IR.getName()
<< "\n";
+
+ PassInstrumentation PI;
+ if (ID != PassInstrumentationAnalysis::ID()) {
+ PI = getResult<PassInstrumentationAnalysis>(IR, ExtraArgs...);
+ PI.runBeforeAnalysis(P, IR);
+ }
+
AnalysisResultListT &ResultList = AnalysisResultLists[&IR];
ResultList.emplace_back(ID, P.run(IR, *this, ExtraArgs...));
+ PI.runAfterAnalysis(P, IR);
+
// P.run may have inserted elements into AnalysisResults and invalidated
// RI.
RI = AnalysisResults.find({ID, &IR});
@@ -930,7 +1022,7 @@ using FunctionAnalysisManager = AnalysisManager<Function>;
/// analysis manager over an "inner" IR unit. The inner unit must be contained
/// in the outer unit.
///
-/// Fore example, InnerAnalysisManagerProxy<FunctionAnalysisManager, Module> is
+/// For example, InnerAnalysisManagerProxy<FunctionAnalysisManager, Module> is
/// an analysis over Modules (the "outer" unit) that provides access to a
/// Function analysis manager. The FunctionAnalysisManager is the "inner"
/// manager being proxied, and Functions are the "inner" unit. The inner/outer
@@ -1192,13 +1284,24 @@ public:
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
PreservedAnalyses PA = PreservedAnalyses::all();
for (Function &F : M) {
if (F.isDeclaration())
continue;
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns
+ // false).
+ if (!PI.runBeforePass<Function>(Pass, F))
+ continue;
PreservedAnalyses PassPA = Pass.run(F, FAM);
+ PI.runAfterPass(Pass, F);
+
// We know that the function pass couldn't have invalidated any other
// function's analyses (that's the contract of a function pass), so
// directly handle the function analysis manager's invalidation here.
@@ -1302,10 +1405,26 @@ public:
RepeatedPass(int Count, PassT P) : Count(Count), P(std::move(P)) {}
template <typename IRUnitT, typename AnalysisManagerT, typename... Ts>
- PreservedAnalyses run(IRUnitT &Arg, AnalysisManagerT &AM, Ts &&... Args) {
+ PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, Ts &&... Args) {
+
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ // Here we use std::tuple wrapper over getResult which helps to extract
+ // AnalysisManager's arguments out of the whole Args set.
+ PassInstrumentation PI =
+ detail::getAnalysisResult<PassInstrumentationAnalysis>(
+ AM, IR, std::tuple<Ts...>(Args...));
+
auto PA = PreservedAnalyses::all();
- for (int i = 0; i < Count; ++i)
- PA.intersect(P.run(Arg, AM, std::forward<Ts>(Args)...));
+ for (int i = 0; i < Count; ++i) {
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns
+ // false).
+ if (!PI.runBeforePass<IRUnitT>(P, IR))
+ continue;
+ PA.intersect(P.run(IR, AM, std::forward<Ts>(Args)...));
+ PI.runAfterPass(P, IR);
+ }
return PA;
}
diff --git a/contrib/llvm/include/llvm/IR/PassManagerInternal.h b/contrib/llvm/include/llvm/IR/PassManagerInternal.h
index 16a3258b4121..5ad68be62742 100644
--- a/contrib/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/contrib/llvm/include/llvm/IR/PassManagerInternal.h
@@ -48,7 +48,7 @@ struct PassConcept {
ExtraArgTs... ExtraArgs) = 0;
/// Polymorphic method to access the name of a pass.
- virtual StringRef name() = 0;
+ virtual StringRef name() const = 0;
};
/// A template wrapper used to implement the polymorphic API.
@@ -80,7 +80,7 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
return Pass.run(IR, AM, ExtraArgs...);
}
- StringRef name() override { return PassT::name(); }
+ StringRef name() const override { return PassT::name(); }
PassT Pass;
};
@@ -250,7 +250,7 @@ struct AnalysisPassConcept {
ExtraArgTs... ExtraArgs) = 0;
/// Polymorphic method to access the name of a pass.
- virtual StringRef name() = 0;
+ virtual StringRef name() const = 0;
};
/// Wrapper to model the analysis pass concept.
@@ -290,13 +290,14 @@ struct AnalysisPassModel : AnalysisPassConcept<IRUnitT, PreservedAnalysesT,
AnalysisResultConcept<IRUnitT, PreservedAnalysesT, InvalidatorT>>
run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
ExtraArgTs... ExtraArgs) override {
- return llvm::make_unique<ResultModelT>(Pass.run(IR, AM, ExtraArgs...));
+ return llvm::make_unique<ResultModelT>(
+ Pass.run(IR, AM, std::forward<ExtraArgTs>(ExtraArgs)...));
}
/// The model delegates to a static \c PassT::name method.
///
/// The returned string ref must point to constant immutable data!
- StringRef name() override { return PassT::name(); }
+ StringRef name() const override { return PassT::name(); }
PassT Pass;
};
diff --git a/contrib/llvm/include/llvm/IR/PassTimingInfo.h b/contrib/llvm/include/llvm/IR/PassTimingInfo.h
new file mode 100644
index 000000000000..e9945f997f43
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/PassTimingInfo.h
@@ -0,0 +1,108 @@
+//===- PassTimingInfo.h - pass execution timing -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This header defines classes/functions to handle pass execution timing
+/// information with interfaces for both pass managers.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_PASSTIMINGINFO_H
+#define LLVM_IR_PASSTIMINGINFO_H
+
+#include "llvm/ADT/Any.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/TypeName.h"
+#include <memory>
+namespace llvm {
+
+class Pass;
+class PassInstrumentationCallbacks;
+
+/// If -time-passes has been specified, report the timings immediately and then
+/// reset the timers to zero.
+void reportAndResetTimings();
+
+/// Request the timer for this legacy-pass-manager's pass instance.
+Timer *getPassTimer(Pass *);
+
+/// If the user specifies the -time-passes argument on an LLVM tool command line
+/// then the value of this boolean will be true, otherwise false.
+/// This is the storage for the -time-passes option.
+extern bool TimePassesIsEnabled;
+
+/// This class implements -time-passes functionality for new pass manager.
+/// It provides the pass-instrumentation callbacks that measure the pass
+/// execution time. They collect timing info into individual timers as
+/// passes are being run. At the end of its life-time it prints the resulting
+/// timing report.
+class TimePassesHandler {
+ /// Value of this type is capable of uniquely identifying pass invocations.
+ /// It is a pair of string Pass-Identifier (which for now is common
+ /// to all the instance of a given pass) + sequential invocation counter.
+ using PassInvocationID = std::pair<StringRef, unsigned>;
+
+ /// A group of all pass-timing timers.
+ TimerGroup TG;
+
+ /// Map of timers for pass invocations
+ DenseMap<PassInvocationID, std::unique_ptr<Timer>> TimingData;
+
+ /// Map that counts invocations of passes, for use in UniqPassID construction.
+ StringMap<unsigned> PassIDCountMap;
+
+ /// Stack of currently active timers.
+ SmallVector<Timer *, 8> TimerStack;
+
+ bool Enabled;
+
+public:
+ TimePassesHandler(bool Enabled = TimePassesIsEnabled);
+
+ /// Destructor handles the print action if it has not been handled before.
+ ~TimePassesHandler() {
+ // First destroying the timers from TimingData, which deploys all their
+ // collected data into the TG time group member, which later prints itself
+ // when being destroyed.
+ TimingData.clear();
+ }
+
+ /// Prints out timing information and then resets the timers.
+ void print();
+
+ // We intend this to be unique per-compilation, thus no copies.
+ TimePassesHandler(const TimePassesHandler &) = delete;
+ void operator=(const TimePassesHandler &) = delete;
+
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+private:
+ /// Dumps information for running/triggered timers, useful for debugging
+ LLVM_DUMP_METHOD void dump() const;
+
+ /// Returns the new timer for each new run of the pass.
+ Timer &getPassTimer(StringRef PassID);
+
+ /// Returns the incremented counter for the next invocation of \p PassID.
+ unsigned nextPassID(StringRef PassID) { return ++PassIDCountMap[PassID]; }
+
+ void startTimer(StringRef PassID);
+ void stopTimer(StringRef PassID);
+
+ // Implementation of pass instrumentation callbacks.
+ bool runBeforePass(StringRef PassID);
+ void runAfterPass(StringRef PassID);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/IR/PatternMatch.h b/contrib/llvm/include/llvm/IR/PatternMatch.h
index af0616cd8221..120fc253b908 100644
--- a/contrib/llvm/include/llvm/IR/PatternMatch.h
+++ b/contrib/llvm/include/llvm/IR/PatternMatch.h
@@ -31,7 +31,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
@@ -215,6 +214,7 @@ template <typename Predicate> struct cst_pred_ty : public Predicate {
// Non-splat vector constant: check each element for a match.
unsigned NumElts = V->getType()->getVectorNumElements();
assert(NumElts != 0 && "Constant vector with no elements?");
+ bool HasNonUndefElements = false;
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = C->getAggregateElement(i);
if (!Elt)
@@ -224,8 +224,9 @@ template <typename Predicate> struct cst_pred_ty : public Predicate {
auto *CI = dyn_cast<ConstantInt>(Elt);
if (!CI || !this->isValue(CI->getValue()))
return false;
+ HasNonUndefElements = true;
}
- return true;
+ return HasNonUndefElements;
}
}
return false;
@@ -272,6 +273,7 @@ template <typename Predicate> struct cstfp_pred_ty : public Predicate {
// Non-splat vector constant: check each element for a match.
unsigned NumElts = V->getType()->getVectorNumElements();
assert(NumElts != 0 && "Constant vector with no elements?");
+ bool HasNonUndefElements = false;
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = C->getAggregateElement(i);
if (!Elt)
@@ -281,8 +283,9 @@ template <typename Predicate> struct cstfp_pred_ty : public Predicate {
auto *CF = dyn_cast<ConstantFP>(Elt);
if (!CF || !this->isValue(CF->getValueAPF()))
return false;
+ HasNonUndefElements = true;
}
- return true;
+ return HasNonUndefElements;
}
}
return false;
@@ -659,11 +662,39 @@ inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
}
+template <typename Op_t> struct FNeg_match {
+ Op_t X;
+
+ FNeg_match(const Op_t &Op) : X(Op) {}
+ template <typename OpTy> bool match(OpTy *V) {
+ auto *FPMO = dyn_cast<FPMathOperator>(V);
+ if (!FPMO || FPMO->getOpcode() != Instruction::FSub)
+ return false;
+ if (FPMO->hasNoSignedZeros()) {
+ // With 'nsz', any zero goes.
+ if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
+ return false;
+ } else {
+ // Without 'nsz', we need fsub -0.0, X exactly.
+ if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
+ return false;
+ }
+ return X.match(FPMO->getOperand(1));
+ }
+};
+
/// Match 'fneg X' as 'fsub -0.0, X'.
+template <typename OpTy>
+inline FNeg_match<OpTy>
+m_FNeg(const OpTy &X) {
+ return FNeg_match<OpTy>(X);
+}
+
+/// Match 'fneg X' as 'fsub +-0.0, X'.
template <typename RHS>
-inline BinaryOp_match<cstfp_pred_ty<is_neg_zero_fp>, RHS, Instruction::FSub>
-m_FNeg(const RHS &X) {
- return m_FSub(m_NegZeroFP(), X);
+inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
+m_FNegNSZ(const RHS &X) {
+ return m_FSub(m_AnyZeroFP(), X);
}
template <typename LHS, typename RHS>
@@ -991,116 +1022,111 @@ m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
}
//===----------------------------------------------------------------------===//
-// Matchers for SelectInst classes
+// Matchers for instructions with a given opcode and number of operands.
//
-template <typename Cond_t, typename LHS_t, typename RHS_t>
-struct SelectClass_match {
- Cond_t C;
- LHS_t L;
- RHS_t R;
+/// Matches instructions with Opcode and three operands.
+template <typename T0, unsigned Opcode> struct OneOps_match {
+ T0 Op1;
- SelectClass_match(const Cond_t &Cond, const LHS_t &LHS, const RHS_t &RHS)
- : C(Cond), L(LHS), R(RHS) {}
+ OneOps_match(const T0 &Op1) : Op1(Op1) {}
template <typename OpTy> bool match(OpTy *V) {
- if (auto *I = dyn_cast<SelectInst>(V))
- return C.match(I->getOperand(0)) && L.match(I->getOperand(1)) &&
- R.match(I->getOperand(2));
+ if (V->getValueID() == Value::InstructionVal + Opcode) {
+ auto *I = cast<Instruction>(V);
+ return Op1.match(I->getOperand(0));
+ }
return false;
}
};
+/// Matches instructions with Opcode and three operands.
+template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
+ T0 Op1;
+ T1 Op2;
+
+ TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ if (V->getValueID() == Value::InstructionVal + Opcode) {
+ auto *I = cast<Instruction>(V);
+ return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
+ }
+ return false;
+ }
+};
+
+/// Matches instructions with Opcode and three operands.
+template <typename T0, typename T1, typename T2, unsigned Opcode>
+struct ThreeOps_match {
+ T0 Op1;
+ T1 Op2;
+ T2 Op3;
+
+ ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
+ : Op1(Op1), Op2(Op2), Op3(Op3) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ if (V->getValueID() == Value::InstructionVal + Opcode) {
+ auto *I = cast<Instruction>(V);
+ return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
+ Op3.match(I->getOperand(2));
+ }
+ return false;
+ }
+};
+
+/// Matches SelectInst.
template <typename Cond, typename LHS, typename RHS>
-inline SelectClass_match<Cond, LHS, RHS> m_Select(const Cond &C, const LHS &L,
- const RHS &R) {
- return SelectClass_match<Cond, LHS, RHS>(C, L, R);
+inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
+m_Select(const Cond &C, const LHS &L, const RHS &R) {
+ return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
}
/// This matches a select of two constants, e.g.:
/// m_SelectCst<-1, 0>(m_Value(V))
template <int64_t L, int64_t R, typename Cond>
-inline SelectClass_match<Cond, constantint_match<L>, constantint_match<R>>
+inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
+ Instruction::Select>
m_SelectCst(const Cond &C) {
return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
}
-//===----------------------------------------------------------------------===//
-// Matchers for InsertElementInst classes
-//
-
+/// Matches InsertElementInst.
template <typename Val_t, typename Elt_t, typename Idx_t>
-struct InsertElementClass_match {
- Val_t V;
- Elt_t E;
- Idx_t I;
-
- InsertElementClass_match(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
- : V(Val), E(Elt), I(Idx) {}
-
- template <typename OpTy> bool match(OpTy *VV) {
- if (auto *II = dyn_cast<InsertElementInst>(VV))
- return V.match(II->getOperand(0)) && E.match(II->getOperand(1)) &&
- I.match(II->getOperand(2));
- return false;
- }
-};
-
-template <typename Val_t, typename Elt_t, typename Idx_t>
-inline InsertElementClass_match<Val_t, Elt_t, Idx_t>
+inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
- return InsertElementClass_match<Val_t, Elt_t, Idx_t>(Val, Elt, Idx);
+ return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
+ Val, Elt, Idx);
}
-//===----------------------------------------------------------------------===//
-// Matchers for ExtractElementInst classes
-//
-
-template <typename Val_t, typename Idx_t> struct ExtractElementClass_match {
- Val_t V;
- Idx_t I;
-
- ExtractElementClass_match(const Val_t &Val, const Idx_t &Idx)
- : V(Val), I(Idx) {}
-
- template <typename OpTy> bool match(OpTy *VV) {
- if (auto *II = dyn_cast<ExtractElementInst>(VV))
- return V.match(II->getOperand(0)) && I.match(II->getOperand(1));
- return false;
- }
-};
-
+/// Matches ExtractElementInst.
template <typename Val_t, typename Idx_t>
-inline ExtractElementClass_match<Val_t, Idx_t>
+inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
m_ExtractElement(const Val_t &Val, const Idx_t &Idx) {
- return ExtractElementClass_match<Val_t, Idx_t>(Val, Idx);
+ return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
}
-//===----------------------------------------------------------------------===//
-// Matchers for ShuffleVectorInst classes
-//
-
+/// Matches ShuffleVectorInst.
template <typename V1_t, typename V2_t, typename Mask_t>
-struct ShuffleVectorClass_match {
- V1_t V1;
- V2_t V2;
- Mask_t M;
-
- ShuffleVectorClass_match(const V1_t &v1, const V2_t &v2, const Mask_t &m)
- : V1(v1), V2(v2), M(m) {}
+inline ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>
+m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) {
+ return ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>(v1, v2,
+ m);
+}
- template <typename OpTy> bool match(OpTy *V) {
- if (auto *SI = dyn_cast<ShuffleVectorInst>(V))
- return V1.match(SI->getOperand(0)) && V2.match(SI->getOperand(1)) &&
- M.match(SI->getOperand(2));
- return false;
- }
-};
+/// Matches LoadInst.
+template <typename OpTy>
+inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
+ return OneOps_match<OpTy, Instruction::Load>(Op);
+}
-template <typename V1_t, typename V2_t, typename Mask_t>
-inline ShuffleVectorClass_match<V1_t, V2_t, Mask_t>
-m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) {
- return ShuffleVectorClass_match<V1_t, V2_t, Mask_t>(v1, v2, m);
+/// Matches StoreInst.
+template <typename ValueOpTy, typename PointerOpTy>
+inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
+m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
+ return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
+ PointerOp);
}
//===----------------------------------------------------------------------===//
@@ -1181,54 +1207,6 @@ inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
}
//===----------------------------------------------------------------------===//
-// Matcher for LoadInst classes
-//
-
-template <typename Op_t> struct LoadClass_match {
- Op_t Op;
-
- LoadClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
-
- template <typename OpTy> bool match(OpTy *V) {
- if (auto *LI = dyn_cast<LoadInst>(V))
- return Op.match(LI->getPointerOperand());
- return false;
- }
-};
-
-/// Matches LoadInst.
-template <typename OpTy> inline LoadClass_match<OpTy> m_Load(const OpTy &Op) {
- return LoadClass_match<OpTy>(Op);
-}
-
-//===----------------------------------------------------------------------===//
-// Matcher for StoreInst classes
-//
-
-template <typename ValueOp_t, typename PointerOp_t> struct StoreClass_match {
- ValueOp_t ValueOp;
- PointerOp_t PointerOp;
-
- StoreClass_match(const ValueOp_t &ValueOpMatch,
- const PointerOp_t &PointerOpMatch) :
- ValueOp(ValueOpMatch), PointerOp(PointerOpMatch) {}
-
- template <typename OpTy> bool match(OpTy *V) {
- if (auto *LI = dyn_cast<StoreInst>(V))
- return ValueOp.match(LI->getValueOperand()) &&
- PointerOp.match(LI->getPointerOperand());
- return false;
- }
-};
-
-/// Matches StoreInst.
-template <typename ValueOpTy, typename PointerOpTy>
-inline StoreClass_match<ValueOpTy, PointerOpTy>
-m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
- return StoreClass_match<ValueOpTy, PointerOpTy>(ValueOp, PointerOp);
-}
-
-//===----------------------------------------------------------------------===//
// Matchers for control flow.
//
@@ -1507,8 +1485,10 @@ template <typename Opnd_t> struct Argument_match {
Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
template <typename OpTy> bool match(OpTy *V) {
- CallSite CS(V);
- return CS.isCall() && Val.match(CS.getArgument(OpI));
+ // FIXME: Should likely be switched to use `CallBase`.
+ if (const auto *CI = dyn_cast<CallInst>(V))
+ return Val.match(CI->getArgOperand(OpI));
+ return false;
}
};
diff --git a/contrib/llvm/include/llvm/IR/RuntimeLibcalls.def b/contrib/llvm/include/llvm/IR/RuntimeLibcalls.def
index 7ed90d959f01..89005120cdc1 100644
--- a/contrib/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/contrib/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -83,6 +83,9 @@ HANDLE_LIBCALL(UDIVREM_I64, nullptr)
HANDLE_LIBCALL(UDIVREM_I128, nullptr)
HANDLE_LIBCALL(NEG_I32, "__negsi2")
HANDLE_LIBCALL(NEG_I64, "__negdi2")
+HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
+HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
+HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
// Floating-point
HANDLE_LIBCALL(ADD_F32, "__addsf3")
@@ -125,6 +128,11 @@ HANDLE_LIBCALL(SQRT_F64, "sqrt")
HANDLE_LIBCALL(SQRT_F80, "sqrtl")
HANDLE_LIBCALL(SQRT_F128, "sqrtl")
HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl")
+HANDLE_LIBCALL(CBRT_F32, "cbrtf")
+HANDLE_LIBCALL(CBRT_F64, "cbrt")
+HANDLE_LIBCALL(CBRT_F80, "cbrtl")
+HANDLE_LIBCALL(CBRT_F128, "cbrtl")
+HANDLE_LIBCALL(CBRT_PPCF128, "cbrtl")
HANDLE_LIBCALL(LOG_F32, "logf")
HANDLE_LIBCALL(LOG_F64, "log")
HANDLE_LIBCALL(LOG_F80, "logl")
diff --git a/contrib/llvm/include/llvm/IR/TypeBuilder.h b/contrib/llvm/include/llvm/IR/TypeBuilder.h
deleted file mode 100644
index d2c6f00079da..000000000000
--- a/contrib/llvm/include/llvm/IR/TypeBuilder.h
+++ /dev/null
@@ -1,407 +0,0 @@
-//===---- llvm/TypeBuilder.h - Builder for LLVM types -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the TypeBuilder class, which is used as a convenient way to
-// create LLVM types with a consistent and simplified interface.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_TYPEBUILDER_H
-#define LLVM_IR_TYPEBUILDER_H
-
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include <climits>
-
-namespace llvm {
-
-/// TypeBuilder - This provides a uniform API for looking up types
-/// known at compile time. To support cross-compilation, we define a
-/// series of tag types in the llvm::types namespace, like i<N>,
-/// ieee_float, ppc_fp128, etc. TypeBuilder<T, false> allows T to be
-/// any of these, a native C type (whose size may depend on the host
-/// compiler), or a pointer, function, or struct type built out of
-/// these. TypeBuilder<T, true> removes native C types from this set
-/// to guarantee that its result is suitable for cross-compilation.
-/// We define the primitive types, pointer types, and functions up to
-/// 5 arguments here, but to use this class with your own types,
-/// you'll need to specialize it. For example, say you want to call a
-/// function defined externally as:
-///
-/// \code{.cpp}
-///
-/// struct MyType {
-/// int32 a;
-/// int32 *b;
-/// void *array[1]; // Intended as a flexible array.
-/// };
-/// int8 AFunction(struct MyType *value);
-///
-/// \endcode
-///
-/// You'll want to use
-/// Function::Create(TypeBuilder<types::i<8>(MyType*), true>::get(), ...)
-/// to declare the function, but when you first try this, your compiler will
-/// complain that TypeBuilder<MyType, true>::get() doesn't exist. To fix this,
-/// write:
-///
-/// \code{.cpp}
-///
-/// namespace llvm {
-/// template<bool xcompile> class TypeBuilder<MyType, xcompile> {
-/// public:
-/// static StructType *get(LLVMContext &Context) {
-/// // If you cache this result, be sure to cache it separately
-/// // for each LLVMContext.
-/// return StructType::get(
-/// TypeBuilder<types::i<32>, xcompile>::get(Context),
-/// TypeBuilder<types::i<32>*, xcompile>::get(Context),
-/// TypeBuilder<types::i<8>*[], xcompile>::get(Context),
-/// nullptr);
-/// }
-///
-/// // You may find this a convenient place to put some constants
-/// // to help with getelementptr. They don't have any effect on
-/// // the operation of TypeBuilder.
-/// enum Fields {
-/// FIELD_A,
-/// FIELD_B,
-/// FIELD_ARRAY
-/// };
-/// }
-/// } // namespace llvm
-///
-/// \endcode
-///
-/// TypeBuilder cannot handle recursive types or types you only know at runtime.
-/// If you try to give it a recursive type, it will deadlock, infinitely
-/// recurse, or do something similarly undesirable.
-template<typename T, bool cross_compilable> class TypeBuilder {};
-
-// Types for use with cross-compilable TypeBuilders. These correspond
-// exactly with an LLVM-native type.
-namespace types {
-/// i<N> corresponds to the LLVM IntegerType with N bits.
-template<uint32_t num_bits> class i {};
-
-// The following classes represent the LLVM floating types.
-class ieee_float {};
-class ieee_double {};
-class x86_fp80 {};
-class fp128 {};
-class ppc_fp128 {};
-// X86 MMX.
-class x86_mmx {};
-} // namespace types
-
-// LLVM doesn't have const or volatile types.
-template<typename T, bool cross> class TypeBuilder<const T, cross>
- : public TypeBuilder<T, cross> {};
-template<typename T, bool cross> class TypeBuilder<volatile T, cross>
- : public TypeBuilder<T, cross> {};
-template<typename T, bool cross> class TypeBuilder<const volatile T, cross>
- : public TypeBuilder<T, cross> {};
-
-// Pointers
-template<typename T, bool cross> class TypeBuilder<T*, cross> {
-public:
- static PointerType *get(LLVMContext &Context) {
- return PointerType::getUnqual(TypeBuilder<T,cross>::get(Context));
- }
-};
-
-/// There is no support for references
-template<typename T, bool cross> class TypeBuilder<T&, cross> {};
-
-// Arrays
-template<typename T, size_t N, bool cross> class TypeBuilder<T[N], cross> {
-public:
- static ArrayType *get(LLVMContext &Context) {
- return ArrayType::get(TypeBuilder<T, cross>::get(Context), N);
- }
-};
-/// LLVM uses an array of length 0 to represent an unknown-length array.
-template<typename T, bool cross> class TypeBuilder<T[], cross> {
-public:
- static ArrayType *get(LLVMContext &Context) {
- return ArrayType::get(TypeBuilder<T, cross>::get(Context), 0);
- }
-};
-
-// Define the C integral types only for TypeBuilder<T, false>.
-//
-// C integral types do not have a defined size. It would be nice to use the
-// stdint.h-defined typedefs that do have defined sizes, but we'd run into the
-// following problem:
-//
-// On an ILP32 machine, stdint.h might define:
-//
-// typedef int int32_t;
-// typedef long long int64_t;
-// typedef long size_t;
-//
-// If we defined TypeBuilder<int32_t> and TypeBuilder<int64_t>, then any use of
-// TypeBuilder<size_t> would fail. We couldn't define TypeBuilder<size_t> in
-// addition to the defined-size types because we'd get duplicate definitions on
-// platforms where stdint.h instead defines:
-//
-// typedef int int32_t;
-// typedef long long int64_t;
-// typedef int size_t;
-//
-// So we define all the primitive C types and nothing else.
-#define DEFINE_INTEGRAL_TYPEBUILDER(T) \
-template<> class TypeBuilder<T, false> { \
-public: \
- static IntegerType *get(LLVMContext &Context) { \
- return IntegerType::get(Context, sizeof(T) * CHAR_BIT); \
- } \
-}; \
-template<> class TypeBuilder<T, true> { \
- /* We provide a definition here so users don't accidentally */ \
- /* define these types to work. */ \
-}
-DEFINE_INTEGRAL_TYPEBUILDER(char);
-DEFINE_INTEGRAL_TYPEBUILDER(signed char);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned char);
-DEFINE_INTEGRAL_TYPEBUILDER(short);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned short);
-DEFINE_INTEGRAL_TYPEBUILDER(int);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned int);
-DEFINE_INTEGRAL_TYPEBUILDER(long);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned long);
-#ifdef _MSC_VER
-DEFINE_INTEGRAL_TYPEBUILDER(__int64);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned __int64);
-#else /* _MSC_VER */
-DEFINE_INTEGRAL_TYPEBUILDER(long long);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned long long);
-#endif /* _MSC_VER */
-#undef DEFINE_INTEGRAL_TYPEBUILDER
-
-template<uint32_t num_bits, bool cross>
-class TypeBuilder<types::i<num_bits>, cross> {
-public:
- static IntegerType *get(LLVMContext &C) {
- return IntegerType::get(C, num_bits);
- }
-};
-
-template<> class TypeBuilder<float, false> {
-public:
- static Type *get(LLVMContext& C) {
- return Type::getFloatTy(C);
- }
-};
-template<> class TypeBuilder<float, true> {};
-
-template<> class TypeBuilder<double, false> {
-public:
- static Type *get(LLVMContext& C) {
- return Type::getDoubleTy(C);
- }
-};
-template<> class TypeBuilder<double, true> {};
-
-template<bool cross> class TypeBuilder<types::ieee_float, cross> {
-public:
- static Type *get(LLVMContext& C) { return Type::getFloatTy(C); }
-};
-template<bool cross> class TypeBuilder<types::ieee_double, cross> {
-public:
- static Type *get(LLVMContext& C) { return Type::getDoubleTy(C); }
-};
-template<bool cross> class TypeBuilder<types::x86_fp80, cross> {
-public:
- static Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::fp128, cross> {
-public:
- static Type *get(LLVMContext& C) { return Type::getFP128Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::ppc_fp128, cross> {
-public:
- static Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::x86_mmx, cross> {
-public:
- static Type *get(LLVMContext& C) { return Type::getX86_MMXTy(C); }
-};
-
-template<bool cross> class TypeBuilder<void, cross> {
-public:
- static Type *get(LLVMContext &C) {
- return Type::getVoidTy(C);
- }
-};
-
-/// void* is disallowed in LLVM types, but it occurs often enough in C code that
-/// we special case it.
-template<> class TypeBuilder<void*, false>
- : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<const void*, false>
- : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<volatile void*, false>
- : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<const volatile void*, false>
- : public TypeBuilder<types::i<8>*, false> {};
-
-template<typename R, bool cross> class TypeBuilder<R(), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- return FunctionType::get(TypeBuilder<R, cross>::get(Context), false);
- }
-};
-template<typename R, typename A1, bool cross> class TypeBuilder<R(A1), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, false);
- }
-};
-template<typename R, typename A1, typename A2, bool cross>
-class TypeBuilder<R(A1, A2), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- TypeBuilder<A2, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, false);
- }
-};
-template<typename R, typename A1, typename A2, typename A3, bool cross>
-class TypeBuilder<R(A1, A2, A3), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- TypeBuilder<A2, cross>::get(Context),
- TypeBuilder<A3, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, false);
- }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
- bool cross>
-class TypeBuilder<R(A1, A2, A3, A4), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- TypeBuilder<A2, cross>::get(Context),
- TypeBuilder<A3, cross>::get(Context),
- TypeBuilder<A4, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, false);
- }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
- typename A5, bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, A5), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- TypeBuilder<A2, cross>::get(Context),
- TypeBuilder<A3, cross>::get(Context),
- TypeBuilder<A4, cross>::get(Context),
- TypeBuilder<A5, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, false);
- }
-};
-
-template<typename R, bool cross> class TypeBuilder<R(...), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- return FunctionType::get(TypeBuilder<R, cross>::get(Context), true);
- }
-};
-template<typename R, typename A1, bool cross>
-class TypeBuilder<R(A1, ...), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context), params, true);
- }
-};
-template<typename R, typename A1, typename A2, bool cross>
-class TypeBuilder<R(A1, A2, ...), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- TypeBuilder<A2, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, true);
- }
-};
-template<typename R, typename A1, typename A2, typename A3, bool cross>
-class TypeBuilder<R(A1, A2, A3, ...), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- TypeBuilder<A2, cross>::get(Context),
- TypeBuilder<A3, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, true);
- }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
- bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, ...), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- TypeBuilder<A2, cross>::get(Context),
- TypeBuilder<A3, cross>::get(Context),
- TypeBuilder<A4, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, true);
- }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
- typename A5, bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, A5, ...), cross> {
-public:
- static FunctionType *get(LLVMContext &Context) {
- Type *params[] = {
- TypeBuilder<A1, cross>::get(Context),
- TypeBuilder<A2, cross>::get(Context),
- TypeBuilder<A3, cross>::get(Context),
- TypeBuilder<A4, cross>::get(Context),
- TypeBuilder<A5, cross>::get(Context),
- };
- return FunctionType::get(TypeBuilder<R, cross>::get(Context),
- params, true);
- }
-};
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/include/llvm/IR/Value.h b/contrib/llvm/include/llvm/IR/Value.h
index f396db995ab0..4f3a45c684fc 100644
--- a/contrib/llvm/include/llvm/IR/Value.h
+++ b/contrib/llvm/include/llvm/IR/Value.h
@@ -254,7 +254,8 @@ public:
private:
void destroyValueName();
- void doRAUW(Value *New, bool NoMetadata);
+ enum class ReplaceMetadataUses { No, Yes };
+ void doRAUW(Value *New, ReplaceMetadataUses);
void setNameImpl(const Twine &Name);
public:
diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h
index d67b1d48f274..037c0dbb56ec 100644
--- a/contrib/llvm/include/llvm/InitializePasses.h
+++ b/contrib/llvm/include/llvm/InitializePasses.h
@@ -85,6 +85,7 @@ void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&);
void initializeBranchRelaxationPass(PassRegistry&);
void initializeBreakCriticalEdgesPass(PassRegistry&);
void initializeBreakFalseDepsPass(PassRegistry&);
+void initializeCanonicalizeAliasesLegacyPassPass(PassRegistry &);
void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&);
void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&);
void initializeCFGPrinterLegacyPassPass(PassRegistry&);
@@ -103,6 +104,7 @@ void initializeCodeGenPreparePass(PassRegistry&);
void initializeConstantHoistingLegacyPassPass(PassRegistry&);
void initializeConstantMergeLegacyPassPass(PassRegistry&);
void initializeConstantPropagationPass(PassRegistry&);
+void initializeControlHeightReductionLegacyPassPass(PassRegistry&);
void initializeCorrelatedValuePropagationPass(PassRegistry&);
void initializeCostModelAnalysisPass(PassRegistry&);
void initializeCrossDSOCFIPass(PassRegistry&);
@@ -119,7 +121,6 @@ void initializeDependenceAnalysisPass(PassRegistry&);
void initializeDependenceAnalysisWrapperPassPass(PassRegistry&);
void initializeDetectDeadLanesPass(PassRegistry&);
void initializeDivRemPairsLegacyPassPass(PassRegistry&);
-void initializeDivergenceAnalysisPass(PassRegistry&);
void initializeDomOnlyPrinterPass(PassRegistry&);
void initializeDomOnlyViewerPass(PassRegistry&);
void initializeDomPrinterPass(PassRegistry&);
@@ -140,6 +141,7 @@ void initializeExpandISelPseudosPass(PassRegistry&);
void initializeExpandMemCmpPassPass(PassRegistry&);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
+void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
@@ -161,6 +163,7 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&);
void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
+void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
void initializeHWAddressSanitizerPass(PassRegistry&);
void initializeIPCPPass(PassRegistry&);
void initializeIPSCCPLegacyPassPass(PassRegistry&);
@@ -181,6 +184,7 @@ void initializeInstrProfilingLegacyPassPass(PassRegistry&);
void initializeInstructionCombiningPassPass(PassRegistry&);
void initializeInstructionSelectPass(PassRegistry&);
void initializeInterleavedAccessPass(PassRegistry&);
+void initializeInterleavedLoadCombinePass(PassRegistry &);
void initializeInternalizeLegacyPassPass(PassRegistry&);
void initializeIntervalPartitionPass(PassRegistry&);
void initializeJumpThreadingPass(PassRegistry&);
@@ -191,9 +195,11 @@ void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&);
void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&);
void initializeLazyValueInfoPrinterPass(PassRegistry&);
void initializeLazyValueInfoWrapperPassPass(PassRegistry&);
+void initializeLegacyDivergenceAnalysisPass(PassRegistry&);
void initializeLegacyLICMPassPass(PassRegistry&);
void initializeLegacyLoopSinkPassPass(PassRegistry&);
void initializeLegalizerPass(PassRegistry&);
+void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &);
void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&);
void initializeLintPass(PassRegistry&);
void initializeLiveDebugValuesPass(PassRegistry&);
@@ -203,7 +209,7 @@ void initializeLiveRangeShrinkPass(PassRegistry&);
void initializeLiveRegMatrixPass(PassRegistry&);
void initializeLiveStacksPass(PassRegistry&);
void initializeLiveVariablesPass(PassRegistry&);
-void initializeLoadStoreVectorizerPass(PassRegistry&);
+void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry&);
void initializeLoaderPassPass(PassRegistry&);
void initializeLocalStackSlotPassPass(PassRegistry&);
void initializeLocalizerPass(PassRegistry&);
@@ -269,7 +275,7 @@ void initializeMemDerefPrinterPass(PassRegistry&);
void initializeMemoryDependenceWrapperPassPass(PassRegistry&);
void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&);
void initializeMemorySSAWrapperPassPass(PassRegistry&);
-void initializeMemorySanitizerPass(PassRegistry&);
+void initializeMemorySanitizerLegacyPassPass(PassRegistry&);
void initializeMergeFunctionsPass(PassRegistry&);
void initializeMergeICmpsPass(PassRegistry&);
void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
@@ -352,7 +358,7 @@ void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
void initializeSanitizerCoverageModulePass(PassRegistry&);
void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
void initializeScalarizeMaskedMemIntrinPass(PassRegistry&);
-void initializeScalarizerPass(PassRegistry&);
+void initializeScalarizerLegacyPassPass(PassRegistry&);
void initializeScavengerTestPass(PassRegistry&);
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&);
void initializeSeparateConstOffsetFromGEPPass(PassRegistry&);
@@ -369,6 +375,8 @@ void initializeSpillPlacementPass(PassRegistry&);
void initializeStackColoringPass(PassRegistry&);
void initializeStackMapLivenessPass(PassRegistry&);
void initializeStackProtectorPass(PassRegistry&);
+void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &);
+void initializeStackSafetyInfoWrapperPassPass(PassRegistry &);
void initializeStackSlotColoringPass(PassRegistry&);
void initializeStraightLineStrengthReducePass(PassRegistry&);
void initializeStripDeadDebugInfoPass(PassRegistry&);
@@ -384,7 +392,7 @@ void initializeTailDuplicatePass(PassRegistry&);
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);
void initializeTargetPassConfigPass(PassRegistry&);
void initializeTargetTransformInfoWrapperPassPass(PassRegistry&);
-void initializeThreadSanitizerPass(PassRegistry&);
+void initializeThreadSanitizerLegacyPassPass(PassRegistry&);
void initializeTwoAddressInstructionPassPass(PassRegistry&);
void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
void initializeUnifyFunctionExitNodesPass(PassRegistry&);
@@ -394,6 +402,7 @@ void initializeUnreachableMachineBlockElimPass(PassRegistry&);
void initializeVerifierLegacyPassPass(PassRegistry&);
void initializeVirtRegMapPass(PassRegistry&);
void initializeVirtRegRewriterPass(PassRegistry&);
+void initializeWarnMissedTransformationsLegacyPass(PassRegistry &);
void initializeWasmEHPreparePass(PassRegistry&);
void initializeWholeProgramDevirtPass(PassRegistry&);
void initializeWinEHPreparePass(PassRegistry&);
diff --git a/contrib/llvm/include/llvm/LTO/Config.h b/contrib/llvm/include/llvm/LTO/Config.h
index 57bba5e34840..7058602c3ee2 100644
--- a/contrib/llvm/include/llvm/LTO/Config.h
+++ b/contrib/llvm/include/llvm/LTO/Config.h
@@ -49,6 +49,10 @@ struct Config {
/// Use the new pass manager
bool UseNewPM = false;
+ /// Flag to indicate that the optimizer should not assume builtins are present
+ /// on the target.
+ bool Freestanding = false;
+
/// Disable entirely the optimizer, including importing for ThinLTO
bool CodeGenOnly = false;
@@ -73,6 +77,9 @@ struct Config {
/// Sample PGO profile path.
std::string SampleProfile;
+ /// Name remapping file for profile data.
+ std::string ProfileRemapping;
+
/// The directory to store .dwo files.
std::string DwoDir;
diff --git a/contrib/llvm/include/llvm/LTO/LTO.h b/contrib/llvm/include/llvm/LTO/LTO.h
index 7d6beab6b441..534d9b6f3f2a 100644
--- a/contrib/llvm/include/llvm/LTO/LTO.h
+++ b/contrib/llvm/include/llvm/LTO/LTO.h
@@ -40,13 +40,13 @@ class Module;
class Target;
class raw_pwrite_stream;
-/// Resolve Weak and LinkOnce values in the \p Index. Linkage changes recorded
-/// in the index and the ThinLTO backends must apply the changes to the Module
-/// via thinLTOResolveWeakForLinkerModule.
+/// Resolve linkage for prevailing symbols in the \p Index. Linkage changes
+/// recorded in the index and the ThinLTO backends must apply the changes to
+/// the module via thinLTOResolvePrevailingInModule.
///
/// This is done for correctness (if value exported, ensure we always
/// emit a copy), and compile-time optimization (allow drop of duplicates).
-void thinLTOResolveWeakForLinkerInIndex(
+void thinLTOResolvePrevailingInIndex(
ModuleSummaryIndex &Index,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
@@ -60,6 +60,19 @@ void thinLTOInternalizeAndPromoteInIndex(
ModuleSummaryIndex &Index,
function_ref<bool(StringRef, GlobalValue::GUID)> isExported);
+/// Computes a unique hash for the Module considering the current list of
+/// export/import and other global analysis results.
+/// The hash is produced in \p Key.
+void computeLTOCacheKey(
+ SmallString<40> &Key, const lto::Config &Conf,
+ const ModuleSummaryIndex &Index, StringRef ModuleID,
+ const FunctionImporter::ImportMapTy &ImportList,
+ const FunctionImporter::ExportSetTy &ExportList,
+ const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+ const GVSummaryMapTy &DefinedGlobals,
+ const std::set<GlobalValue::GUID> &CfiFunctionDefs = {},
+ const std::set<GlobalValue::GUID> &CfiFunctionDecls = {});
+
namespace lto {
/// Given the original \p Path to an output file, replace any path
@@ -387,6 +400,9 @@ private:
Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache);
mutable bool CalledGetMaxTasks = false;
+
+ // Use Optional to distinguish false from not yet initialized.
+ Optional<bool> EnableSplitLTOUnit;
};
/// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/contrib/llvm/include/llvm/LTO/SummaryBasedOptimizations.h b/contrib/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
new file mode 100644
index 000000000000..ad3a8e7dc77b
--- /dev/null
+++ b/contrib/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -0,0 +1,17 @@
+//=- llvm/LTO/SummaryBasedOptimizations.h -Link time optimizations-*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
+#define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
+namespace llvm {
+class ModuleSummaryIndex;
+void computeSyntheticCounts(ModuleSummaryIndex &Index);
+
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index f48ab02863a5..8f23b7cb4574 100644
--- a/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -48,6 +48,9 @@
#include <string>
#include <vector>
+/// Enable global value internalization in LTO.
+extern llvm::cl::opt<bool> EnableLTOInternalization;
+
namespace llvm {
template <typename T> class ArrayRef;
class LLVMContext;
@@ -233,7 +236,7 @@ private:
unsigned OptLevel = 2;
lto_diagnostic_handler_t DiagHandler = nullptr;
void *DiagContext = nullptr;
- bool ShouldInternalize = true;
+ bool ShouldInternalize = EnableLTOInternalization;
bool ShouldEmbedUselists = false;
bool ShouldRestoreGlobalsLinkage = false;
TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile;
diff --git a/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index b32a972542c8..d4c69a1ce260 100644
--- a/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -187,7 +187,7 @@ public:
/// Cache policy: the maximum size for the cache directory in bytes. A value
/// over the amount of available space on the disk will be reduced to the
/// amount of available space. A value of 0 will be ignored.
- void setCacheMaxSizeBytes(unsigned MaxSizeBytes) {
+ void setCacheMaxSizeBytes(uint64_t MaxSizeBytes) {
if (MaxSizeBytes)
CacheOptions.Policy.MaxSizeBytes = MaxSizeBytes;
}
@@ -273,8 +273,8 @@ public:
/**
* Compute and emit the imported files for module at \p ModulePath.
*/
- static void emitImports(StringRef ModulePath, StringRef OutputName,
- ModuleSummaryIndex &Index);
+ void emitImports(Module &Module, StringRef OutputName,
+ ModuleSummaryIndex &Index);
/**
* Perform cross-module importing for the module identified by
@@ -285,8 +285,8 @@ public:
/**
* Compute the list of summaries needed for importing into module.
*/
- static void gatherImportedSummariesForModule(
- StringRef ModulePath, ModuleSummaryIndex &Index,
+ void gatherImportedSummariesForModule(
+ Module &Module, ModuleSummaryIndex &Index,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
/**
@@ -299,11 +299,6 @@ public:
*/
void optimize(Module &Module);
- /**
- * Perform ThinLTO CodeGen.
- */
- std::unique_ptr<MemoryBuffer> codegen(Module &Module);
-
/**@}*/
private:
diff --git a/contrib/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm/include/llvm/LinkAllPasses.h
index bd432c58b613..0851c2f8d265 100644
--- a/contrib/llvm/include/llvm/LinkAllPasses.h
+++ b/contrib/llvm/include/llvm/LinkAllPasses.h
@@ -50,6 +50,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
@@ -88,13 +89,13 @@ namespace {
(void) llvm::createCalledValuePropagationPass();
(void) llvm::createConstantMergePass();
(void) llvm::createConstantPropagationPass();
+ (void) llvm::createControlHeightReductionLegacyPass();
(void) llvm::createCostModelAnalysisPass();
(void) llvm::createDeadArgEliminationPass();
(void) llvm::createDeadCodeEliminationPass();
(void) llvm::createDeadInstEliminationPass();
(void) llvm::createDeadStoreEliminationPass();
(void) llvm::createDependenceAnalysisWrapperPass();
- (void) llvm::createDivergenceAnalysisPass();
(void) llvm::createDomOnlyPrinterPass();
(void) llvm::createDomPrinterPass();
(void) llvm::createDomOnlyViewerPass();
@@ -121,6 +122,7 @@ namespace {
(void) llvm::createInstructionCombiningPass();
(void) llvm::createInternalizePass();
(void) llvm::createLCSSAPass();
+ (void) llvm::createLegacyDivergenceAnalysisPass();
(void) llvm::createLICMPass();
(void) llvm::createLoopSinkPass();
(void) llvm::createLazyValueInfoPass();
@@ -218,6 +220,7 @@ namespace {
(void) llvm::createFloat2IntPass();
(void) llvm::createEliminateAvailableExternallyPass();
(void) llvm::createScalarizeMaskedMemIntrinPass();
+ (void) llvm::createWarnMissedTransformationsPass();
(void)new llvm::IntervalPartition();
(void)new llvm::ScalarEvolutionWrapperPass();
@@ -227,7 +230,8 @@ namespace {
llvm::TargetLibraryInfo TLI(TLII);
llvm::AliasAnalysis AA(TLI);
llvm::AliasSetTracker X(AA);
- X.add(nullptr, 0, llvm::AAMDNodes()); // for -print-alias-sets
+ X.add(nullptr, llvm::LocationSize::unknown(),
+ llvm::AAMDNodes()); // for -print-alias-sets
(void) llvm::AreStatisticsEnabled();
(void) llvm::sys::RunningOnValgrind();
}
diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h b/contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h
index bc46cfdf4c4c..71c6ee28df70 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h
@@ -19,6 +19,6 @@ class MCAsmInfoWasm : public MCAsmInfo {
protected:
MCAsmInfoWasm();
};
-}
+} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/MC/MCAsmMacro.h b/contrib/llvm/include/llvm/MC/MCAsmMacro.h
index 09b32c7ea333..135fa4f2e33d 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmMacro.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmMacro.h
@@ -52,7 +52,7 @@ public:
Pipe, PipePipe, Caret,
Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
Less, LessEqual, LessLess, LessGreater,
- Greater, GreaterEqual, GreaterGreater, At,
+ Greater, GreaterEqual, GreaterGreater, At, MinusGreater,
// MIPS unary expression operators such as %neg.
PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
diff --git a/contrib/llvm/include/llvm/MC/MCAssembler.h b/contrib/llvm/include/llvm/MC/MCAssembler.h
index 0f9499d705e4..986c6e17548f 100644
--- a/contrib/llvm/include/llvm/MC/MCAssembler.h
+++ b/contrib/llvm/include/llvm/MC/MCAssembler.h
@@ -23,6 +23,7 @@
#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/VersionTuple.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -94,6 +95,8 @@ public:
unsigned Major;
unsigned Minor;
unsigned Update;
+ /// An optional version of the SDK that was used to build the source.
+ VersionTuple SDKVersion;
};
private:
@@ -255,20 +258,24 @@ public:
/// MachO deployment target version information.
const VersionInfoType &getVersionInfo() const { return VersionInfo; }
void setVersionMin(MCVersionMinType Type, unsigned Major, unsigned Minor,
- unsigned Update) {
+ unsigned Update,
+ VersionTuple SDKVersion = VersionTuple()) {
VersionInfo.EmitBuildVersion = false;
VersionInfo.TypeOrPlatform.Type = Type;
VersionInfo.Major = Major;
VersionInfo.Minor = Minor;
VersionInfo.Update = Update;
+ VersionInfo.SDKVersion = SDKVersion;
}
void setBuildVersion(MachO::PlatformType Platform, unsigned Major,
- unsigned Minor, unsigned Update) {
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion = VersionTuple()) {
VersionInfo.EmitBuildVersion = true;
VersionInfo.TypeOrPlatform.Platform = Platform;
VersionInfo.Major = Major;
VersionInfo.Minor = Minor;
VersionInfo.Update = Update;
+ VersionInfo.SDKVersion = SDKVersion;
}
/// Reuse an assembler instance
diff --git a/contrib/llvm/include/llvm/MC/MCCodeView.h b/contrib/llvm/include/llvm/MC/MCCodeView.h
index 1d9e3c6698cf..cef03a409f95 100644
--- a/contrib/llvm/include/llvm/MC/MCCodeView.h
+++ b/contrib/llvm/include/llvm/MC/MCCodeView.h
@@ -30,6 +30,7 @@ class CodeViewContext;
/// Instances of this class represent the information from a
/// .cv_loc directive.
class MCCVLoc {
+ const MCSymbol *Label = nullptr;
uint32_t FunctionId;
uint32_t FileNum;
uint32_t Line;
@@ -39,15 +40,17 @@ class MCCVLoc {
private: // CodeViewContext manages these
friend class CodeViewContext;
- MCCVLoc(unsigned functionid, unsigned fileNum, unsigned line, unsigned column,
- bool prologueend, bool isstmt)
- : FunctionId(functionid), FileNum(fileNum), Line(line), Column(column),
- PrologueEnd(prologueend), IsStmt(isstmt) {}
+ MCCVLoc(const MCSymbol *Label, unsigned functionid, unsigned fileNum,
+ unsigned line, unsigned column, bool prologueend, bool isstmt)
+ : Label(Label), FunctionId(functionid), FileNum(fileNum), Line(line),
+ Column(column), PrologueEnd(prologueend), IsStmt(isstmt) {}
// Allow the default copy constructor and assignment operator to be used
// for an MCCVLoc object.
public:
+ const MCSymbol *getLabel() const { return Label; }
+
unsigned getFunctionId() const { return FunctionId; }
/// Get the FileNum of this MCCVLoc.
@@ -62,6 +65,8 @@ public:
bool isPrologueEnd() const { return PrologueEnd; }
bool isStmt() const { return IsStmt; }
+ void setLabel(const MCSymbol *L) { Label = L; }
+
void setFunctionId(unsigned FID) { FunctionId = FID; }
/// Set the FileNum of this MCCVLoc.
@@ -80,31 +85,6 @@ public:
void setIsStmt(bool IS) { IsStmt = IS; }
};
-/// Instances of this class represent the line information for
-/// the CodeView line table entries. Which is created after a machine
-/// instruction is assembled and uses an address from a temporary label
-/// created at the current address in the current section and the info from
-/// the last .cv_loc directive seen as stored in the context.
-class MCCVLineEntry : public MCCVLoc {
- const MCSymbol *Label;
-
-private:
- // Allow the default copy constructor and assignment operator to be used
- // for an MCCVLineEntry object.
-
-public:
- // Constructor to create an MCCVLineEntry given a symbol and the dwarf loc.
- MCCVLineEntry(const MCSymbol *Label, const MCCVLoc loc)
- : MCCVLoc(loc), Label(Label) {}
-
- const MCSymbol *getLabel() const { return Label; }
-
- // This is called when an instruction is assembled into the specified
- // section and if there is information from the last .cv_loc directive that
- // has yet to have a line entry made for it is made.
- static void Make(MCObjectStreamer *MCOS);
-};
-
/// Information describing a function or inlined call site introduced by
/// .cv_func_id or .cv_inline_site_id. Accumulates information from .cv_loc
/// directives used with this function's id or the id of an inlined call site
@@ -183,32 +163,20 @@ public:
/// and sets CVLocSeen. When the next instruction is assembled an entry
/// in the line number table with this information and the address of the
/// instruction will be created.
- void setCurrentCVLoc(unsigned FunctionId, unsigned FileNo, unsigned Line,
- unsigned Column, bool PrologueEnd, bool IsStmt) {
- CurrentCVLoc.setFunctionId(FunctionId);
- CurrentCVLoc.setFileNum(FileNo);
- CurrentCVLoc.setLine(Line);
- CurrentCVLoc.setColumn(Column);
- CurrentCVLoc.setPrologueEnd(PrologueEnd);
- CurrentCVLoc.setIsStmt(IsStmt);
- CVLocSeen = true;
- }
-
- bool getCVLocSeen() { return CVLocSeen; }
- void clearCVLocSeen() { CVLocSeen = false; }
-
- const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; }
+ void recordCVLoc(MCContext &Ctx, const MCSymbol *Label, unsigned FunctionId,
+ unsigned FileNo, unsigned Line, unsigned Column,
+ bool PrologueEnd, bool IsStmt);
bool isValidCVFileNumber(unsigned FileNumber);
/// Add a line entry.
- void addLineEntry(const MCCVLineEntry &LineEntry);
+ void addLineEntry(const MCCVLoc &LineEntry);
- std::vector<MCCVLineEntry> getFunctionLineEntries(unsigned FuncId);
+ std::vector<MCCVLoc> getFunctionLineEntries(unsigned FuncId);
std::pair<size_t, size_t> getLineExtent(unsigned FuncId);
- ArrayRef<MCCVLineEntry> getLinesForExtent(size_t L, size_t R);
+ ArrayRef<MCCVLoc> getLinesForExtent(size_t L, size_t R);
/// Emits a line table substream.
void emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId,
@@ -226,7 +194,7 @@ public:
void encodeInlineLineTable(MCAsmLayout &Layout,
MCCVInlineLineTableFragment &F);
- void
+ MCFragment *
emitDefRange(MCObjectStreamer &OS,
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion);
@@ -247,10 +215,6 @@ public:
std::pair<StringRef, unsigned> addToStringTable(StringRef S);
private:
- /// The current CodeView line information from the last .cv_loc directive.
- MCCVLoc CurrentCVLoc = MCCVLoc(0, 0, 0, 0, false, true);
- bool CVLocSeen = false;
-
/// Map from string to string table offset.
StringMap<unsigned> StringTable;
@@ -286,8 +250,8 @@ private:
/// id.
std::map<unsigned, std::pair<size_t, size_t>> MCCVLineStartStop;
- /// A collection of MCCVLineEntry for each section.
- std::vector<MCCVLineEntry> MCCVLines;
+ /// A collection of MCCVLoc for each section.
+ std::vector<MCCVLoc> MCCVLines;
/// All known functions and inlined call sites, indexed by function id.
std::vector<MCCVFunctionInfo> Functions;
diff --git a/contrib/llvm/include/llvm/MC/MCContext.h b/contrib/llvm/include/llvm/MC/MCContext.h
index a712e2d95cbc..3b8ac8b79e21 100644
--- a/contrib/llvm/include/llvm/MC/MCContext.h
+++ b/contrib/llvm/include/llvm/MC/MCContext.h
@@ -298,10 +298,6 @@ namespace llvm {
CodeViewContext &getCVContext();
- /// Clear the current cv_loc, if there is one. Avoids lazily creating a
- /// CodeViewContext if none is needed.
- void clearCVLocSeen();
-
void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; }
void setUseNamesOnTempLabels(bool Value) { UseNamesOnTempLabels = Value; }
diff --git a/contrib/llvm/include/llvm/MC/MCDwarf.h b/contrib/llvm/include/llvm/MC/MCDwarf.h
index 2bfaf19cf2c6..7b96e9aaca89 100644
--- a/contrib/llvm/include/llvm/MC/MCDwarf.h
+++ b/contrib/llvm/include/llvm/MC/MCDwarf.h
@@ -430,6 +430,7 @@ public:
OpUndefined,
OpRegister,
OpWindowSave,
+ OpNegateRAState,
OpGnuArgsSize
};
@@ -509,6 +510,11 @@ public:
return MCCFIInstruction(OpWindowSave, L, 0, 0, "");
}
+ /// .cfi_negate_ra_state AArch64 negate RA state.
+ static MCCFIInstruction createNegateRAState(MCSymbol *L) {
+ return MCCFIInstruction(OpNegateRAState, L, 0, 0, "");
+ }
+
/// .cfi_restore says that the rule for Register is now the same as it
/// was at the beginning of the function, after all initial instructions added
/// by .cfi_startproc were executed.
@@ -593,6 +599,7 @@ struct MCDwarfFrameInfo {
bool IsSignalFrame = false;
bool IsSimple = false;
unsigned RAReg = static_cast<unsigned>(INT_MAX);
+ bool IsBKeyFrame = false;
};
class MCDwarfFrameEmitter {
diff --git a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
index bff58fef6af9..f226d6a45a5a 100644
--- a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
@@ -73,6 +74,8 @@ public:
switch (OSType) {
case Triple::CloudABI:
return ELF::ELFOSABI_CLOUDABI;
+ case Triple::HermitCore:
+ return ELF::ELFOSABI_STANDALONE;
case Triple::PS4:
case Triple::FreeBSD:
return ELF::ELFOSABI_FREEBSD;
@@ -90,6 +93,8 @@ public:
virtual void sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs);
+ virtual void addTargetSectionFlags(MCContext &Ctx, MCSectionELF &Sec);
+
/// \name Accessors
/// @{
uint8_t getOSABI() const { return OSABI; }
diff --git a/contrib/llvm/include/llvm/MC/MCExpr.h b/contrib/llvm/include/llvm/MC/MCExpr.h
index 3fd58a169d4b..8cb6b86fd672 100644
--- a/contrib/llvm/include/llvm/MC/MCExpr.h
+++ b/contrib/llvm/include/llvm/MC/MCExpr.h
@@ -286,7 +286,9 @@ public:
VK_Hexagon_IE_GOT,
VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr
+ VK_WebAssembly_GLOBAL, // Global object index
VK_WebAssembly_TYPEINDEX,// Type table index
+ VK_WebAssembly_EVENT, // Event index
VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
diff --git a/contrib/llvm/include/llvm/MC/MCInst.h b/contrib/llvm/include/llvm/MC/MCInst.h
index 67bb11a70387..d501b686bb2e 100644
--- a/contrib/llvm/include/llvm/MC/MCInst.h
+++ b/contrib/llvm/include/llvm/MC/MCInst.h
@@ -208,6 +208,8 @@ public:
/// string.
void dump_pretty(raw_ostream &OS, const MCInstPrinter *Printer = nullptr,
StringRef Separator = " ") const;
+ void dump_pretty(raw_ostream &OS, StringRef Name,
+ StringRef Separator = " ") const;
};
inline raw_ostream& operator<<(raw_ostream &OS, const MCOperand &MO) {
diff --git a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
index e1673208d875..200f10f7d64b 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -23,6 +23,7 @@
namespace llvm {
class MCRegisterInfo;
+class Triple;
class MCInstrAnalysis {
protected:
@@ -87,24 +88,77 @@ public:
const MCInst &Inst,
APInt &Writes) const;
- /// Returns true if \param Inst is a dependency breaking instruction for the
- /// given subtarget.
+ /// Returns true if MI is a dependency breaking zero-idiom for the given
+ /// subtarget.
+ ///
+ /// Mask is used to identify input operands that have their dependency
+ /// broken. Each bit of the mask is associated with a specific input operand.
+ /// Bits associated with explicit input operands are laid out first in the
+ /// mask; implicit operands come after explicit operands.
+ ///
+ /// Dependencies are broken only for operands that have their corresponding bit
+ /// set. Operands that have their bit cleared, or that don't have a
+ /// corresponding bit in the mask don't have their dependency broken. Note
+ /// that Mask may not be big enough to describe all operands. The assumption
+ /// for operands that don't have a correspondent bit in the mask is that those
+ /// are still data dependent.
+ ///
+ /// The only exception to the rule is for when Mask has all zeroes.
+ /// A zero mask means: dependencies are broken for all explicit register
+ /// operands.
+ virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask,
+ unsigned CPUID) const {
+ return false;
+ }
+
+ /// Returns true if MI is a dependency breaking instruction for the
+ /// subtarget associated with CPUID .
///
/// The value computed by a dependency breaking instruction is not dependent
/// on the inputs. An example of dependency breaking instruction on X86 is
/// `XOR %eax, %eax`.
- /// TODO: In future, we could implement an alternative approach where this
- /// method returns `true` if the input instruction is not dependent on
- /// some/all of its input operands. An APInt mask could then be used to
- /// identify independent operands.
- virtual bool isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const;
+ ///
+ /// If MI is a dependency breaking instruction for subtarget CPUID, then Mask
+ /// can be inspected to identify independent operands.
+ ///
+ /// Essentially, each bit of the mask corresponds to an input operand.
+ /// Explicit operands are laid out first in the mask; implicit operands follow
+ /// explicit operands. Bits are set for operands that are independent.
+ ///
+ /// Note that the number of bits in Mask may not be equivalent to the sum of
+ /// explicit and implicit operands in MI. Operands that don't have a
+ /// corresponding bit in Mask are assumed "not independente".
+ ///
+ /// The only exception is for when Mask is all zeroes. That means: explicit
+ /// input operands of MI are independent.
+ virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask,
+ unsigned CPUID) const {
+ return isZeroIdiom(MI, Mask, CPUID);
+ }
+
+ /// Returns true if MI is a candidate for move elimination.
+ ///
+ /// Different subtargets may apply different constraints to optimizable
+ /// register moves. For example, on most X86 subtargets, a candidate for move
+ /// elimination cannot specify the same register for both source and
+ /// destination.
+ virtual bool isOptimizableRegisterMove(const MCInst &MI,
+ unsigned CPUID) const {
+ return false;
+ }
/// Given a branch instruction try to get the address the branch
/// targets. Return true on success, and the address in Target.
virtual bool
evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const;
+
+ /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
+ virtual std::vector<std::pair<uint64_t, uint64_t>>
+ findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotPltSectionVA, const Triple &TargetTriple) const {
+ return {};
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
index 3e000a2210e9..61e7d09afbcb 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
@@ -120,6 +120,7 @@ enum Flag {
HasOptionalDef,
Pseudo,
Return,
+ EHScopeReturn,
Call,
Barrier,
Terminator,
@@ -150,7 +151,8 @@ enum Flag {
InsertSubreg,
Convergent,
Add,
- Trap
+ Trap,
+ VariadicOpsAreDefs,
};
}
@@ -382,6 +384,11 @@ public:
/// additional values.
bool isConvergent() const { return Flags & (1ULL << MCID::Convergent); }
+ /// Return true if variadic operands of this instruction are definitions.
+ bool variadicOpsAreDefs() const {
+ return Flags & (1ULL << MCID::VariadicOpsAreDefs);
+ }
+
//===--------------------------------------------------------------------===//
// Side Effect Analysis
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
index 3a27ef8c8fee..f8142ccd8ac5 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/VersionTuple.h"
namespace llvm {
class MCContext;
@@ -42,12 +43,11 @@ protected:
/// dwarf unwind.
bool OmitDwarfIfHaveCompactUnwind;
- /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values
- /// for EH.
- unsigned PersonalityEncoding;
- unsigned LSDAEncoding;
- unsigned FDECFIEncoding;
- unsigned TTypeEncoding;
+ /// FDE CFI encoding. Controls the encoding of the begin label in the
+ /// .eh_frame section. Unlike the LSDA encoding, personality encoding, and
+ /// type encodings, this is something that the assembler just "knows" about
+ /// its target
+ unsigned FDECFIEncoding = 0;
/// Compact unwind encoding indicating that we should emit only an EH frame.
unsigned CompactUnwindDwarfEHFrameOnly;
@@ -118,6 +118,8 @@ protected:
MCSection *DwarfAddrSection;
/// The DWARF v5 range list section.
MCSection *DwarfRnglistsSection;
+ /// The DWARF v5 locations list section.
+ MCSection *DwarfLoclistsSection;
/// The DWARF v5 range list section for fission.
MCSection *DwarfRnglistsDWOSection;
@@ -226,10 +228,7 @@ public:
return CommDirectiveSupportsAlignment;
}
- unsigned getPersonalityEncoding() const { return PersonalityEncoding; }
- unsigned getLSDAEncoding() const { return LSDAEncoding; }
unsigned getFDEEncoding() const { return FDECFIEncoding; }
- unsigned getTTypeEncoding() const { return TTypeEncoding; }
unsigned getCompactUnwindDwarfEHFrameOnly() const {
return CompactUnwindDwarfEHFrameOnly;
@@ -243,6 +242,9 @@ public:
MCSection *getCompactUnwindSection() const { return CompactUnwindSection; }
MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
+ MCSection *getDwarfInfoSection(uint64_t Hash) const {
+ return getDwarfComdatSection(".debug_info", Hash);
+ }
MCSection *getDwarfLineSection() const { return DwarfLineSection; }
MCSection *getDwarfLineStrSection() const { return DwarfLineStrSection; }
MCSection *getDwarfFrameSection() const { return DwarfFrameSection; }
@@ -262,6 +264,7 @@ public:
MCSection *getDwarfARangesSection() const { return DwarfARangesSection; }
MCSection *getDwarfRangesSection() const { return DwarfRangesSection; }
MCSection *getDwarfRnglistsSection() const { return DwarfRnglistsSection; }
+ MCSection *getDwarfLoclistsSection() const { return DwarfLoclistsSection; }
MCSection *getDwarfMacinfoSection() const { return DwarfMacinfoSection; }
MCSection *getDwarfDebugNamesSection() const {
@@ -278,7 +281,9 @@ public:
return DwarfAccelTypesSection;
}
MCSection *getDwarfInfoDWOSection() const { return DwarfInfoDWOSection; }
- MCSection *getDwarfTypesSection(uint64_t Hash) const;
+ MCSection *getDwarfTypesSection(uint64_t Hash) const {
+ return getDwarfComdatSection(".debug_types", Hash);
+ }
MCSection *getDwarfTypesDWOSection() const { return DwarfTypesDWOSection; }
MCSection *getDwarfAbbrevDWOSection() const { return DwarfAbbrevDWOSection; }
MCSection *getDwarfStrDWOSection() const { return DwarfStrDWOSection; }
@@ -386,14 +391,22 @@ private:
bool PositionIndependent;
MCContext *Ctx;
Triple TT;
+ VersionTuple SDKVersion;
void initMachOMCObjectFileInfo(const Triple &T);
void initELFMCObjectFileInfo(const Triple &T, bool Large);
void initCOFFMCObjectFileInfo(const Triple &T);
void initWasmMCObjectFileInfo(const Triple &T);
+ MCSection *getDwarfComdatSection(const char *Name, uint64_t Hash) const;
public:
const Triple &getTargetTriple() const { return TT; }
+
+ void setSDKVersion(const VersionTuple &TheSDKVersion) {
+ SDKVersion = TheSDKVersion;
+ }
+
+ const VersionTuple &getSDKVersion() const { return SDKVersion; }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
index 035206dce939..892909656c15 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -39,12 +39,21 @@ class MCObjectStreamer : public MCStreamer {
bool EmitEHFrame;
bool EmitDebugFrame;
SmallVector<MCSymbol *, 2> PendingLabels;
+ struct PendingMCFixup {
+ const MCSymbol *Sym;
+ MCFixup Fixup;
+ MCDataFragment *DF;
+ PendingMCFixup(const MCSymbol *McSym, MCDataFragment *F, MCFixup McFixup)
+ : Sym(McSym), Fixup(McFixup), DF(F) {}
+ };
+ SmallVector<PendingMCFixup, 2> PendingFixups;
virtual void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo&) = 0;
void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
MCSymbol *EmitCFILabel() override;
void EmitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI);
+ void resolvePendingFixups();
protected:
MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
@@ -179,7 +188,9 @@ public:
///
/// Emit the absolute difference between \c Hi and \c Lo, as long as we can
/// compute it. Currently, that requires that both symbols are in the same
- /// data fragment. Otherwise, do nothing and return \c false.
+ /// data fragment and that the target has not specified that diff expressions
+ /// require relocations to be emitted. Otherwise, do nothing and return
+ /// \c false.
///
/// \pre Offset of \c Hi is greater than the offset \c Lo.
void emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
diff --git a/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
index 207183a69b0e..2e9b8dfa3b26 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer {
StringRef CurBuf;
bool IsAtStartOfLine = true;
bool IsAtStartOfStatement = true;
- bool IsParsingMSInlineAsm = false;
bool IsPeeking = false;
protected:
@@ -44,7 +43,6 @@ public:
~AsmLexer() override;
void setBuffer(StringRef Buf, const char *ptr = nullptr);
- void setParsingMSInlineAsm(bool V) { IsParsingMSInlineAsm = V; }
StringRef LexUntilEndOfStatement() override;
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 10550b3370e8..ea13d1cdc09f 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -50,9 +50,9 @@ protected: // Can only create subclasses.
bool SkipSpace = true;
bool AllowAtInIdentifier;
bool IsAtStartOfStatement = true;
+ bool LexMasmIntegers = false;
AsmCommentConsumer *CommentConsumer = nullptr;
- bool AltMacroMode;
MCAsmLexer();
virtual AsmToken LexToken() = 0;
@@ -67,17 +67,9 @@ public:
MCAsmLexer &operator=(const MCAsmLexer &) = delete;
virtual ~MCAsmLexer();
- bool IsaAltMacroMode() {
- return AltMacroMode;
- }
-
- void SetAltMacroMode(bool AltMacroSet) {
- AltMacroMode = AltMacroSet;
- }
-
/// Consume the next token from the input stream and return it.
///
- /// The lexer will continuosly return the end-of-file token once the end of
+ /// The lexer will continuously return the end-of-file token once the end of
/// the main input file has been reached.
const AsmToken &Lex() {
assert(!CurTok.empty());
@@ -155,6 +147,10 @@ public:
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
this->CommentConsumer = CommentConsumer;
}
+
+ /// Set whether to lex masm-style binary and hex literals. They look like
+ /// 0b1101 and 0ABCh respectively.
+ void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 0d56f36fbae8..b80289878e6e 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -122,17 +122,18 @@ public:
private:
MCTargetAsmParser *TargetParser = nullptr;
- unsigned ShowParsedOperands : 1;
-
protected: // Can only create subclasses.
MCAsmParser();
+ SmallVector<MCPendingError, 0> PendingErrors;
+
/// Flag tracking whether any errors have been encountered.
bool HadError = false;
+
/// Enable print [latency:throughput] in output file.
bool EnablePrintSchedInfo = false;
- SmallVector<MCPendingError, 1> PendingErrors;
+ bool ShowParsedOperands = false;
public:
MCAsmParser(const MCAsmParser &) = delete;
@@ -166,7 +167,7 @@ public:
void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
void setEnablePrintSchedInfo(bool Value) { EnablePrintSchedInfo = Value; }
- bool shouldPrintSchedInfo() { return EnablePrintSchedInfo; }
+ bool shouldPrintSchedInfo() const { return EnablePrintSchedInfo; }
/// Run the parser on the input source buffer.
virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index 135b5fab07ce..ccf13a6a4fb4 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -476,6 +476,9 @@ public:
return nullptr;
}
+ // For actions that have to be performed before a label is emitted
+ virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
+
virtual void onLabelParsed(MCSymbol *Symbol) {}
/// Ensure that all previously parsed instructions have been emitted to the
@@ -487,6 +490,9 @@ public:
MCContext &Ctx) {
return nullptr;
}
+
+ // For any checks or cleanups at the end of parsing.
+ virtual void onEndOfFile() {}
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
index 6edfc30b0aa6..8d8c677c77ea 100644
--- a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -41,7 +41,6 @@ public:
const uint16_t RegsSize;
const uint16_t RegSetSize;
const uint16_t ID;
- const uint16_t PhysRegSize;
const int8_t CopyCost;
const bool Allocatable;
@@ -80,11 +79,6 @@ public:
return contains(Reg1) && contains(Reg2);
}
- /// Return the size of the physical register in bytes.
- unsigned getPhysRegSize() const { return PhysRegSize; }
- /// Temporary function to allow out-of-tree targets to switch.
- unsigned getSize() const { return getPhysRegSize(); }
-
/// getCopyCost - Return the cost of copying a value between two registers in
/// this class. A negative number means the register class is very expensive
/// to copy e.g. status flag register classes.
diff --git a/contrib/llvm/include/llvm/MC/MCSchedule.h b/contrib/llvm/include/llvm/MC/MCSchedule.h
index f2f1dfb36918..689ac73cbdd1 100644
--- a/contrib/llvm/include/llvm/MC/MCSchedule.h
+++ b/contrib/llvm/include/llvm/MC/MCSchedule.h
@@ -142,6 +142,7 @@ struct MCSchedClassDesc {
struct MCRegisterCostEntry {
unsigned RegisterClassID;
unsigned Cost;
+ bool AllowMoveElimination;
};
/// A register file descriptor.
@@ -159,6 +160,12 @@ struct MCRegisterFileDesc {
uint16_t NumRegisterCostEntries;
// Index of the first cost entry in MCExtraProcessorInfo::RegisterCostTable.
uint16_t RegisterCostEntryIdx;
+ // A value of zero means: there is no limit in the number of moves that can be
+ // eliminated every cycle.
+ uint16_t MaxMovesEliminatedPerCycle;
+ // Ture if this register file only knows how to optimize register moves from
+ // known zero registers.
+ bool AllowZeroMoveEliminationOnly;
};
/// Provide extra details about the machine processor.
@@ -176,18 +183,8 @@ struct MCExtraProcessorInfo {
unsigned NumRegisterFiles;
const MCRegisterCostEntry *RegisterCostTable;
unsigned NumRegisterCostEntries;
-
- struct PfmCountersInfo {
- // An optional name of a performance counter that can be used to measure
- // cycles.
- const char *CycleCounter;
-
- // For each MCProcResourceDesc defined by the processor, an optional list of
- // names of performance counters that can be used to measure the resource
- // utilization.
- const char **IssueCounters;
- };
- PfmCountersInfo PfmCounters;
+ unsigned LoadQueueID;
+ unsigned StoreQueueID;
};
/// Machine model for scheduling, bundling, and heuristics.
diff --git a/contrib/llvm/include/llvm/MC/MCSection.h b/contrib/llvm/include/llvm/MC/MCSection.h
index ba5c60d3ba58..eb210b4e9dfa 100644
--- a/contrib/llvm/include/llvm/MC/MCSection.h
+++ b/contrib/llvm/include/llvm/MC/MCSection.h
@@ -78,6 +78,10 @@ private:
/// Whether this section has had instructions emitted into it.
bool HasInstructions : 1;
+ /// Whether this section has had data emitted into it.
+ /// Right now this is only used by the ARM backend.
+ bool HasData : 1;
+
bool IsRegistered : 1;
MCDummyFragment DummyFragment;
@@ -137,6 +141,9 @@ public:
bool hasInstructions() const { return HasInstructions; }
void setHasInstructions(bool Value) { HasInstructions = Value; }
+ bool hasData() const { return HasData; }
+ void setHasData(bool Value) { HasData = Value; }
+
bool isRegistered() const { return IsRegistered; }
void setIsRegistered(bool Value) { IsRegistered = Value; }
diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h
index e4d0dc03b87c..f613d3a1943f 100644
--- a/contrib/llvm/include/llvm/MC/MCStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCStreamer.h
@@ -28,6 +28,7 @@
#include "llvm/Support/MD5.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/VersionTuple.h"
#include <cassert>
#include <cstdint>
#include <memory>
@@ -109,6 +110,11 @@ public:
virtual void emitValue(const MCExpr *Value);
+ /// Emit the bytes in \p Data into the output.
+ ///
+ /// This is used to emit bytes in \p Data as sequence of .byte directives.
+ virtual void emitRawBytes(StringRef Data);
+
virtual void finish();
};
@@ -193,10 +199,6 @@ class MCStreamer {
WinEH::FrameInfo *CurrentWinFrameInfo;
- /// Retreive the current frame info if one is available and it is not yet
- /// closed. Otherwise, issue an error and return null.
- WinEH::FrameInfo *EnsureValidWinFrameInfo(SMLoc Loc);
-
/// Tracks an index to represent the order a symbol was emitted in.
/// Zero means we did not emit that symbol.
DenseMap<const MCSymbol *, unsigned> SymbolOrdering;
@@ -219,10 +221,6 @@ protected:
virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame);
- /// When emitting an object file, create and emit a real label. When emitting
- /// textual assembly, this should do nothing to avoid polluting our output.
- virtual MCSymbol *EmitCFILabel();
-
WinEH::FrameInfo *getCurrentWinFrameInfo() {
return CurrentWinFrameInfo;
}
@@ -231,6 +229,9 @@ protected:
virtual void EmitRawTextImpl(StringRef String);
+ /// Returns true if the the .cv_loc directive is in the right section.
+ bool checkCVLocSection(unsigned FuncId, unsigned FileNo, SMLoc Loc);
+
public:
MCStreamer(const MCStreamer &) = delete;
MCStreamer &operator=(const MCStreamer &) = delete;
@@ -258,6 +259,14 @@ public:
return TargetStreamer.get();
}
+ /// When emitting an object file, create and emit a real label. When emitting
+ /// textual assembly, this should do nothing to avoid polluting our output.
+ virtual MCSymbol *EmitCFILabel();
+
+ /// Retreive the current frame info if one is available and it is not yet
+ /// closed. Otherwise, issue an error and return null.
+ WinEH::FrameInfo *EnsureValidWinFrameInfo(SMLoc Loc);
+
unsigned getNumFrameInfos() { return DwarfFrameInfos.size(); }
ArrayRef<MCDwarfFrameInfo> getDwarfFrameInfos() const {
return DwarfFrameInfos;
@@ -444,14 +453,17 @@ public:
/// Specify the Mach-O minimum deployment target version.
virtual void EmitVersionMin(MCVersionMinType Type, unsigned Major,
- unsigned Minor, unsigned Update) {}
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) {}
/// Emit/Specify Mach-O build version command.
/// \p Platform should be one of MachO::PlatformType.
virtual void EmitBuildVersion(unsigned Platform, unsigned Major,
- unsigned Minor, unsigned Update) {}
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) {}
- void EmitVersionForTarget(const Triple &Target);
+ void EmitVersionForTarget(const Triple &Target,
+ const VersionTuple &SDKVersion);
/// Note in the output that the specified \p Func is a Thumb mode
/// function (ARM target only).
@@ -794,6 +806,8 @@ public:
Optional<StringRef> Source,
unsigned CUID = 0);
+ virtual void EmitCFIBKeyFrame();
+
/// This implements the DWARF2 '.loc fileno lineno ...' assembler
/// directive.
virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -867,7 +881,7 @@ public:
virtual MCSymbol *getDwarfLineTableSymbol(unsigned CUID);
virtual void EmitCFISections(bool EH, bool Debug);
- void EmitCFIStartProc(bool IsSimple);
+ void EmitCFIStartProc(bool IsSimple, SMLoc Loc = SMLoc());
void EmitCFIEndProc();
virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
virtual void EmitCFIDefCfaOffset(int64_t Offset);
@@ -888,9 +902,15 @@ public:
virtual void EmitCFIUndefined(int64_t Register);
virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
virtual void EmitCFIWindowSave();
+ virtual void EmitCFINegateRAState();
virtual void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc());
virtual void EmitWinCFIEndProc(SMLoc Loc = SMLoc());
+ /// This is used on platforms, such as Windows on ARM64, that require function
+ /// or funclet sizes to be emitted in .xdata before the End marker is emitted
+ /// for the frame. We cannot use the End marker, as it is not set at the
+ /// point of emitting .xdata, in order to indicate that the frame is active.
+ virtual void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc());
virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc());
virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc());
virtual void EmitWinCFIPushReg(unsigned Register, SMLoc Loc = SMLoc());
diff --git a/contrib/llvm/include/llvm/MC/MCSymbolWasm.h b/contrib/llvm/include/llvm/MC/MCSymbolWasm.h
index e043453dc732..8e66dc881d0f 100644
--- a/contrib/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/contrib/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -20,12 +20,9 @@ class MCSymbolWasm : public MCSymbol {
bool IsHidden = false;
bool IsComdat = false;
std::string ModuleName;
- SmallVector<wasm::ValType, 1> Returns;
- SmallVector<wasm::ValType, 4> Params;
- wasm::WasmGlobalType GlobalType;
- bool ParamsSet = false;
- bool ReturnsSet = false;
- bool GlobalTypeSet = false;
+ wasm::WasmSignature *Signature = nullptr;
+ Optional<wasm::WasmGlobalType> GlobalType;
+ Optional<wasm::WasmEventType> EventType;
/// An expression describing how to calculate the size of a symbol. If a
/// symbol has no size this field will be NULL.
@@ -35,8 +32,7 @@ public:
// Use a module name of "env" for now, for compatibility with existing tools.
// This is temporary, and may change, as the ABI is not yet stable.
MCSymbolWasm(const StringMapEntry<bool> *Name, bool isTemporary)
- : MCSymbol(SymbolKindWasm, Name, isTemporary),
- ModuleName("env") {}
+ : MCSymbol(SymbolKindWasm, Name, isTemporary), ModuleName("env") {}
static bool classof(const MCSymbol *S) { return S->isWasm(); }
const MCExpr *getSize() const { return SymbolSize; }
@@ -46,6 +42,7 @@ public:
bool isData() const { return Type == wasm::WASM_SYMBOL_TYPE_DATA; }
bool isGlobal() const { return Type == wasm::WASM_SYMBOL_TYPE_GLOBAL; }
bool isSection() const { return Type == wasm::WASM_SYMBOL_TYPE_SECTION; }
+ bool isEvent() const { return Type == wasm::WASM_SYMBOL_TYPE_EVENT; }
wasm::WasmSymbolType getType() const { return Type; }
void setType(wasm::WasmSymbolType type) { Type = type; }
@@ -61,37 +58,22 @@ public:
const StringRef getModuleName() const { return ModuleName; }
void setModuleName(StringRef Name) { ModuleName = Name; }
- const SmallVector<wasm::ValType, 1> &getReturns() const {
- assert(ReturnsSet);
- return Returns;
- }
-
- void setReturns(SmallVectorImpl<wasm::ValType> &&Rets) {
- ReturnsSet = true;
- Returns = std::move(Rets);
- }
-
- const SmallVector<wasm::ValType, 4> &getParams() const {
- assert(ParamsSet);
- return Params;
- }
-
- void setParams(SmallVectorImpl<wasm::ValType> &&Pars) {
- ParamsSet = true;
- Params = std::move(Pars);
- }
+ const wasm::WasmSignature *getSignature() const { return Signature; }
+ void setSignature(wasm::WasmSignature *Sig) { Signature = Sig; }
const wasm::WasmGlobalType &getGlobalType() const {
- assert(GlobalTypeSet);
- return GlobalType;
+ assert(GlobalType.hasValue());
+ return GlobalType.getValue();
}
+ void setGlobalType(wasm::WasmGlobalType GT) { GlobalType = GT; }
- void setGlobalType(wasm::WasmGlobalType GT) {
- GlobalTypeSet = true;
- GlobalType = GT;
+ const wasm::WasmEventType &getEventType() const {
+ assert(EventType.hasValue());
+ return EventType.getValue();
}
+ void setEventType(wasm::WasmEventType ET) { EventType = ET; }
};
-} // end namespace llvm
+} // end namespace llvm
#endif // LLVM_MC_MCSYMBOLWASM_H
diff --git a/contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h b/contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h
index e45030f302ff..6b788cfe96b9 100644
--- a/contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h
@@ -51,6 +51,6 @@ std::unique_ptr<MCObjectWriter>
createWasmObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
raw_pwrite_stream &OS);
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/MC/MCWin64EH.h b/contrib/llvm/include/llvm/MC/MCWin64EH.h
index 83ea738de8c3..1a9f6f403d7c 100644
--- a/contrib/llvm/include/llvm/MC/MCWin64EH.h
+++ b/contrib/llvm/include/llvm/MC/MCWin64EH.h
@@ -56,6 +56,14 @@ public:
void Emit(MCStreamer &Streamer) const override;
void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI) const override;
};
+
+class ARM64UnwindEmitter : public WinEH::UnwindEmitter {
+public:
+ void Emit(MCStreamer &Streamer) const override;
+ void EmitUnwindInfo(MCStreamer &Streamer,
+ WinEH::FrameInfo *FI) const override;
+};
+
}
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCWinEH.h b/contrib/llvm/include/llvm/MC/MCWinEH.h
index 4ca52a6654eb..98ef0367a11d 100644
--- a/contrib/llvm/include/llvm/MC/MCWinEH.h
+++ b/contrib/llvm/include/llvm/MC/MCWinEH.h
@@ -10,6 +10,7 @@
#ifndef LLVM_MC_MCWINEH_H
#define LLVM_MC_MCWINEH_H
+#include "llvm/ADT/MapVector.h"
#include <vector>
namespace llvm {
@@ -20,9 +21,9 @@ class MCSymbol;
namespace WinEH {
struct Instruction {
const MCSymbol *Label;
- const unsigned Offset;
- const unsigned Register;
- const unsigned Operation;
+ unsigned Offset;
+ unsigned Register;
+ unsigned Operation;
Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off)
: Label(L), Offset(Off), Register(Reg), Operation(Op) {}
@@ -31,6 +32,7 @@ struct Instruction {
struct FrameInfo {
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
+ const MCSymbol *FuncletOrFuncEnd = nullptr;
const MCSymbol *ExceptionHandler = nullptr;
const MCSymbol *Function = nullptr;
const MCSymbol *PrologEnd = nullptr;
@@ -43,6 +45,7 @@ struct FrameInfo {
int LastFrameInst = -1;
const FrameInfo *ChainedParent = nullptr;
std::vector<Instruction> Instructions;
+ MapVector<MCSymbol*, std::vector<Instruction>> EpilogMap;
FrameInfo() = default;
FrameInfo(const MCSymbol *Function, const MCSymbol *BeginFuncEHLabel)
diff --git a/contrib/llvm/tools/llvm-mca/Context.h b/contrib/llvm/include/llvm/MCA/Context.h
index cf483fa7b37d..6b2bee0fdc42 100644
--- a/contrib/llvm/tools/llvm-mca/Context.h
+++ b/contrib/llvm/include/llvm/MCA/Context.h
@@ -11,21 +11,22 @@
/// This file defines a class for holding ownership of various simulated
/// hardware units. A Context also provides a utility routine for constructing
/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
-/// stages).
+/// stages.
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_CONTEXT_H
-#define LLVM_TOOLS_LLVM_MCA_CONTEXT_H
-#include "HardwareUnit.h"
-#include "InstrBuilder.h"
-#include "Pipeline.h"
-#include "SourceMgr.h"
+#ifndef LLVM_MCA_CONTEXT_H
+#define LLVM_MCA_CONTEXT_H
+
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/SourceMgr.h"
#include <memory>
+namespace llvm {
namespace mca {
/// This is a convenience struct to hold the parameters necessary for creating
@@ -43,13 +44,12 @@ struct PipelineOptions {
};
class Context {
- llvm::SmallVector<std::unique_ptr<HardwareUnit>, 4> Hardware;
- const llvm::MCRegisterInfo &MRI;
- const llvm::MCSubtargetInfo &STI;
+ SmallVector<std::unique_ptr<HardwareUnit>, 4> Hardware;
+ const MCRegisterInfo &MRI;
+ const MCSubtargetInfo &STI;
public:
- Context(const llvm::MCRegisterInfo &R, const llvm::MCSubtargetInfo &S)
- : MRI(R), STI(S) {}
+ Context(const MCRegisterInfo &R, const MCSubtargetInfo &S) : MRI(R), STI(S) {}
Context(const Context &C) = delete;
Context &operator=(const Context &C) = delete;
@@ -65,4 +65,5 @@ public:
};
} // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_CONTEXT_H
+} // namespace llvm
+#endif // LLVM_MCA_CONTEXT_H
diff --git a/contrib/llvm/tools/llvm-mca/HWEventListener.h b/contrib/llvm/include/llvm/MCA/HWEventListener.h
index aa3e6dcf19a0..3b32b2cd6577 100644
--- a/contrib/llvm/tools/llvm-mca/HWEventListener.h
+++ b/contrib/llvm/include/llvm/MCA/HWEventListener.h
@@ -12,13 +12,14 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
-#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
+#ifndef LLVM_MCA_HWEVENTLISTENER_H
+#define LLVM_MCA_HWEVENTLISTENER_H
-#include "Instruction.h"
#include "llvm/ADT/ArrayRef.h"
-#include <utility>
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+namespace llvm {
namespace mca {
// An HWInstructionEvent represents state changes of instructions that
@@ -62,30 +63,41 @@ class HWInstructionIssuedEvent : public HWInstructionEvent {
public:
using ResourceRef = std::pair<uint64_t, uint64_t>;
HWInstructionIssuedEvent(const InstRef &IR,
- llvm::ArrayRef<std::pair<ResourceRef, double>> UR)
+ ArrayRef<std::pair<ResourceRef, ResourceCycles>> UR)
: HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
- llvm::ArrayRef<std::pair<ResourceRef, double>> UsedResources;
+ ArrayRef<std::pair<ResourceRef, ResourceCycles>> UsedResources;
};
class HWInstructionDispatchedEvent : public HWInstructionEvent {
public:
- HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
+ HWInstructionDispatchedEvent(const InstRef &IR, ArrayRef<unsigned> Regs,
+ unsigned UOps)
: HWInstructionEvent(HWInstructionEvent::Dispatched, IR),
- UsedPhysRegs(Regs) {}
+ UsedPhysRegs(Regs), MicroOpcodes(UOps) {}
// Number of physical register allocated for this instruction. There is one
// entry per register file.
- llvm::ArrayRef<unsigned> UsedPhysRegs;
+ ArrayRef<unsigned> UsedPhysRegs;
+ // Number of micro opcodes dispatched.
+ // This field is often set to the total number of micro-opcodes specified by
+ // the instruction descriptor of IR.
+ // The only exception is when IR declares a number of micro opcodes
+ // which exceeds the processor DispatchWidth, and - by construction - it
+ // requires multiple cycles to be fully dispatched. In that particular case,
+ // the dispatch logic would generate more than one dispatch event (one per
+ // cycle), and each event would declare how many micro opcodes are effectively
+ // been dispatched to the schedulers.
+ unsigned MicroOpcodes;
};
class HWInstructionRetiredEvent : public HWInstructionEvent {
public:
- HWInstructionRetiredEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
+ HWInstructionRetiredEvent(const InstRef &IR, ArrayRef<unsigned> Regs)
: HWInstructionEvent(HWInstructionEvent::Retired, IR),
FreedPhysRegs(Regs) {}
// Number of register writes that have been architecturally committed. There
// is one entry per register file.
- llvm::ArrayRef<unsigned> FreedPhysRegs;
+ ArrayRef<unsigned> FreedPhysRegs;
};
// A HWStallEvent represents a pipeline stall caused by the lack of hardware
@@ -127,9 +139,11 @@ public:
virtual void onResourceAvailable(const ResourceRef &RRef) {}
// Events generated by the Scheduler when buffered resources are
- // consumed/freed.
- virtual void onReservedBuffers(llvm::ArrayRef<unsigned> Buffers) {}
- virtual void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) {}
+ // consumed/freed for an instruction.
+ virtual void onReservedBuffers(const InstRef &Inst,
+ ArrayRef<unsigned> Buffers) {}
+ virtual void onReleasedBuffers(const InstRef &Inst,
+ ArrayRef<unsigned> Buffers) {}
virtual ~HWEventListener() {}
@@ -137,5 +151,6 @@ private:
virtual void anchor();
};
} // namespace mca
+} // namespace llvm
-#endif
+#endif // LLVM_MCA_HWEVENTLISTENER_H
diff --git a/contrib/llvm/tools/llvm-mca/HardwareUnit.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/HardwareUnit.h
index e8c496ab967a..104a2009f219 100644
--- a/contrib/llvm/tools/llvm-mca/HardwareUnit.h
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/HardwareUnit.h
@@ -13,9 +13,10 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
-#define LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
+#ifndef LLVM_MCA_HARDWAREUNIT_H
+#define LLVM_MCA_HARDWAREUNIT_H
+namespace llvm {
namespace mca {
class HardwareUnit {
@@ -28,4 +29,5 @@ public:
};
} // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
+} // namespace llvm
+#endif // LLVM_MCA_HARDWAREUNIT_H
diff --git a/contrib/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
new file mode 100644
index 000000000000..e217fc50f780
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -0,0 +1,207 @@
+//===------------------------- LSUnit.h --------------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load/Store unit class that models load/store queues and that implements
+/// a simple weak memory consistency model.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_LSUNIT_H
+#define LLVM_MCA_LSUNIT_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+
+namespace llvm {
+namespace mca {
+
+class InstRef;
+class Scheduler;
+
+/// A Load/Store Unit implementing a load and store queues.
+///
+/// This class implements a load queue and a store queue to emulate the
+/// out-of-order execution of memory operations.
+/// Each load (or store) consumes an entry in the load (or store) queue.
+///
+/// Rules are:
+/// 1) A younger load is allowed to pass an older load only if there are no
+/// stores nor barriers in between the two loads.
+/// 2) An younger store is not allowed to pass an older store.
+/// 3) A younger store is not allowed to pass an older load.
+/// 4) A younger load is allowed to pass an older store only if the load does
+/// not alias with the store.
+///
+/// This class optimistically assumes that loads don't alias store operations.
+/// Under this assumption, younger loads are always allowed to pass older
+/// stores (this would only affects rule 4).
+/// Essentially, this class doesn't perform any sort alias analysis to
+/// identify aliasing loads and stores.
+///
+/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
+/// set to `false` by the constructor of LSUnit.
+///
+/// Note that this class doesn't know about the existence of different memory
+/// types for memory operations (example: write-through, write-combining, etc.).
+/// Derived classes are responsible for implementing that extra knowledge, and
+/// provide different sets of rules for loads and stores by overriding method
+/// `isReady()`.
+/// To emulate a write-combining memory type, rule 2. must be relaxed in a
+/// derived class to enable the reordering of non-aliasing store operations.
+///
+/// No assumptions are made by this class on the size of the store buffer. This
+/// class doesn't know how to identify cases where store-to-load forwarding may
+/// occur.
+///
+/// LSUnit doesn't attempt to predict whether a load or store hits or misses
+/// the L1 cache. To be more specific, LSUnit doesn't know anything about
+/// cache hierarchy and memory types.
+/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
+/// scheduling model provides an "optimistic" load-to-use latency (which usually
+/// matches the load-to-use latency for when there is a hit in the L1D).
+/// Derived classes may expand this knowledge.
+///
+/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
+/// memory-barrier like instructions.
+/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
+/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
+/// serializes loads without forcing a flush of the load queue.
+/// Similarly, instructions that both `mayStore` and have `unmodeled side
+/// effects` are treated like store barriers. A full memory
+/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
+/// effects. This is obviously inaccurate, but this is the best that we can do
+/// at the moment.
+///
+/// Each load/store barrier consumes one entry in the load/store queue. A
+/// load/store barrier enforces ordering of loads/stores:
+/// - A younger load cannot pass a load barrier.
+/// - A younger store cannot pass a store barrier.
+///
+/// A younger load has to wait for the memory load barrier to execute.
+/// A load/store barrier is "executed" when it becomes the oldest entry in
+/// the load/store queue(s). That also means, all the older loads/stores have
+/// already been executed.
+class LSUnit : public HardwareUnit {
+ // Load queue size.
+ // LQ_Size == 0 means that there are infinite slots in the load queue.
+ unsigned LQ_Size;
+
+ // Store queue size.
+ // SQ_Size == 0 means that there are infinite slots in the store queue.
+ unsigned SQ_Size;
+
+ // If true, loads will never alias with stores. This is the default.
+ bool NoAlias;
+
+ // When a `MayLoad` instruction is dispatched to the schedulers for execution,
+ // the LSUnit reserves an entry in the `LoadQueue` for it.
+ //
+ // LoadQueue keeps track of all the loads that are in-flight. A load
+ // instruction is eventually removed from the LoadQueue when it reaches
+ // completion stage. That means, a load leaves the queue whe it is 'executed',
+ // and its value can be forwarded on the data path to outside units.
+ //
+ // This class doesn't know about the latency of a load instruction. So, it
+ // conservatively/pessimistically assumes that the latency of a load opcode
+ // matches the instruction latency.
+ //
+ // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
+ // and load/store conflicts, the latency of a load is determined by the depth
+ // of the load pipeline. So, we could use field `LoadLatency` in the
+ // MCSchedModel to model that latency.
+ // Field `LoadLatency` often matches the so-called 'load-to-use' latency from
+ // L1D, and it usually already accounts for any extra latency due to data
+ // forwarding.
+ // When doing throughput analysis, `LoadLatency` is likely to
+ // be a better predictor of load latency than instruction latency. This is
+ // particularly true when simulating code with temporal/spatial locality of
+ // memory accesses.
+ // Using `LoadLatency` (instead of the instruction latency) is also expected
+ // to improve the load queue allocation for long latency instructions with
+ // folded memory operands (See PR39829).
+ //
+ // FIXME: On some processors, load/store operations are split into multiple
+ // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
+ // not 256-bit data types. So, a 256-bit load is effectively split into two
+ // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
+ // simplicity, this class optimistically assumes that a load instruction only
+ // consumes one entry in the LoadQueue. Similarly, store instructions only
+ // consume a single entry in the StoreQueue.
+ // In future, we should reassess the quality of this design, and consider
+ // alternative approaches that let instructions specify the number of
+ // load/store queue entries which they consume at dispatch stage (See
+ // PR39830).
+ SmallSet<unsigned, 16> LoadQueue;
+ SmallSet<unsigned, 16> StoreQueue;
+
+ void assignLQSlot(unsigned Index);
+ void assignSQSlot(unsigned Index);
+ bool isReadyNoAlias(unsigned Index) const;
+
+ // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
+ // conservatively treated as a store barrier. It forces older store to be
+ // executed before newer stores are issued.
+ SmallSet<unsigned, 8> StoreBarriers;
+
+ // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
+ // conservatively treated as a load barrier. It forces older loads to execute
+ // before newer loads are issued.
+ SmallSet<unsigned, 8> LoadBarriers;
+
+ bool isSQEmpty() const { return StoreQueue.empty(); }
+ bool isLQEmpty() const { return LoadQueue.empty(); }
+ bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
+ bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
+
+public:
+ LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0,
+ bool AssumeNoAlias = false);
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+
+ enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL };
+
+ // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
+ // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
+ Status isAvailable(const InstRef &IR) const;
+
+ // Allocates load/store queue resources for IR.
+ //
+ // This method assumes that a previous call to `isAvailable(IR)` returned
+ // LSU_AVAILABLE, and that IR is a memory operation.
+ void dispatch(const InstRef &IR);
+
+ // By default, rules are:
+ // 1. A store may not pass a previous store.
+ // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
+ // 3. A load may pass a previous load.
+ // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
+ // 5. A load has to wait until an older load barrier is fully executed.
+ // 6. A store has to wait until an older store barrier is fully executed.
+ virtual bool isReady(const InstRef &IR) const;
+
+ // Load and store instructions are tracked by their corresponding queues from
+ // dispatch until the "instruction executed" event.
+ // Only when a load instruction reaches the 'Executed' stage, its value
+ // becomes available to the users. At that point, the load no longer needs to
+ // be tracked by the load queue.
+ // FIXME: For simplicity, we optimistically assume a similar behavior for
+ // store instructions. In practice, store operations don't tend to leave the
+ // store queue until they reach the 'Retired' stage (See PR39830).
+ void onInstructionExecuted(const InstRef &IR);
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_LSUNIT_H
diff --git a/contrib/llvm/tools/llvm-mca/RegisterFile.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
index 349e9789b6ee..c23ab0389234 100644
--- a/contrib/llvm/tools/llvm-mca/RegisterFile.h
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -14,14 +14,17 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
-#define LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
+#ifndef LLVM_MCA_REGISTER_FILE_H
+#define LLVM_MCA_REGISTER_FILE_H
-#include "HardwareUnit.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/Support/Error.h"
+namespace llvm {
namespace mca {
class ReadState;
@@ -31,12 +34,15 @@ class WriteRef;
/// Manages hardware register files, and tracks register definitions for
/// register renaming purposes.
class RegisterFile : public HardwareUnit {
- const llvm::MCRegisterInfo &MRI;
+ const MCRegisterInfo &MRI;
- // Each register file is associated with an instance of
- // RegisterMappingTracker.
- // A RegisterMappingTracker keeps track of the number of physical registers
- // which have been dynamically allocated by the simulator.
+ // class RegisterMappingTracker is a physical register file (PRF) descriptor.
+ // There is one RegisterMappingTracker for every PRF definition in the
+ // scheduling model.
+ //
+ // An instance of RegisterMappingTracker tracks the number of physical
+ // registers available for renaming. It also tracks the number of register
+ // moves eliminated per cycle.
struct RegisterMappingTracker {
// The total number of physical registers that are available in this
// register file for register renaming purpouses. A value of zero for this
@@ -46,8 +52,28 @@ class RegisterFile : public HardwareUnit {
// Number of physical registers that are currently in use.
unsigned NumUsedPhysRegs;
- RegisterMappingTracker(unsigned NumPhysRegisters)
- : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0) {}
+ // Maximum number of register moves that can be eliminated by this PRF every
+ // cycle. A value of zero means that there is no limit in the number of
+ // moves which can be eliminated every cycle.
+ const unsigned MaxMoveEliminatedPerCycle;
+
+ // Number of register moves eliminated during this cycle.
+ //
+ // This value is increased by one every time a register move is eliminated.
+ // Every new cycle, this value is reset to zero.
+ // A move can be eliminated only if MaxMoveEliminatedPerCycle is zero, or if
+ // NumMoveEliminated is less than MaxMoveEliminatedPerCycle.
+ unsigned NumMoveEliminated;
+
+ // If set, move elimination is restricted to zero-register moves only.
+ bool AllowZeroMoveEliminationOnly;
+
+ RegisterMappingTracker(unsigned NumPhysRegisters,
+ unsigned MaxMoveEliminated = 0U,
+ bool AllowZeroMoveElimOnly = false)
+ : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0),
+ MaxMoveEliminatedPerCycle(MaxMoveEliminated), NumMoveEliminated(0U),
+ AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly) {}
};
// A vector of register file descriptors. This set always contains at least
@@ -59,7 +85,7 @@ class RegisterFile : public HardwareUnit {
//
// Users can limit the number of physical registers that are available in
// regsiter file #0 specifying command line flag `-register-file-size=<uint>`.
- llvm::SmallVector<RegisterMappingTracker, 4> RegisterFiles;
+ SmallVector<RegisterMappingTracker, 4> RegisterFiles;
// This type is used to propagate information about the owner of a register,
// and the cost of allocating it in the PRF. Register cost is defined as the
@@ -70,20 +96,42 @@ class RegisterFile : public HardwareUnit {
// registers. So, the cost of allocating a YMM register in BtVer2 is 2.
using IndexPlusCostPairTy = std::pair<unsigned, unsigned>;
- // Struct RegisterRenamingInfo maps registers to register files.
- // There is a RegisterRenamingInfo object for every register defined by
- // the target. RegisteRenamingInfo objects are stored into vector
- // RegisterMappings, and register IDs can be used to reference them.
+ // Struct RegisterRenamingInfo is used to map logical registers to register
+ // files.
+ //
+ // There is a RegisterRenamingInfo object for every logical register defined
+ // by the target. RegisteRenamingInfo objects are stored into vector
+ // `RegisterMappings`, and MCPhysReg IDs can be used to reference
+ // elements in that vector.
+ //
+ // Each RegisterRenamingInfo is owned by a PRF, and field `IndexPlusCost`
+ // specifies both the owning PRF, as well as the number of physical registers
+ // consumed at register renaming stage.
+ //
+ // Field `AllowMoveElimination` is set for registers that are used as
+ // destination by optimizable register moves.
+ //
+ // Field `AliasRegID` is set by writes from register moves that have been
+ // eliminated at register renaming stage. A move eliminated at register
+ // renaming stage is effectively bypassed, and its write aliases the source
+ // register definition.
struct RegisterRenamingInfo {
IndexPlusCostPairTy IndexPlusCost;
- llvm::MCPhysReg RenameAs;
+ MCPhysReg RenameAs;
+ MCPhysReg AliasRegID;
+ bool AllowMoveElimination;
+ RegisterRenamingInfo()
+ : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U), AliasRegID(0U),
+ AllowMoveElimination(false) {}
};
// RegisterMapping objects are mainly used to track physical register
- // definitions. There is a RegisterMapping for every register defined by the
- // Target. For each register, a RegisterMapping pair contains a descriptor of
- // the last register write (in the form of a WriteRef object), as well as a
- // RegisterRenamingInfo to quickly identify owning register files.
+ // definitions and resolve data dependencies.
+ //
+ // Every register declared by the Target is associated with an instance of
+ // RegisterMapping. RegisterMapping objects keep track of writes to a logical
+ // register. That information is used by class RegisterFile to resolve data
+ // dependencies, and correctly set latencies for register uses.
//
// This implementation does not allow overlapping register files. The only
// register file that is allowed to overlap with other register files is
@@ -91,9 +139,13 @@ class RegisterFile : public HardwareUnit {
// at most one register file.
using RegisterMapping = std::pair<WriteRef, RegisterRenamingInfo>;
- // This map contains one entry for each register defined by the target.
+ // There is one entry per each register defined by the target.
std::vector<RegisterMapping> RegisterMappings;
+ // Used to track zero registers. There is one bit for each register defined by
+ // the target. Bits are set for registers that are known to be zero.
+ APInt ZeroRegisters;
+
// This method creates a new register file descriptor.
// The new register file owns all of the registers declared by register
// classes in the 'RegisterClasses' set.
@@ -108,43 +160,56 @@ class RegisterFile : public HardwareUnit {
// Here FPRegisterFile contains all the registers defined by register class
// VR128RegClass and VR256RegClass. FPRegisterFile implements 60
// registers which can be used for register renaming purpose.
- void
- addRegisterFile(llvm::ArrayRef<llvm::MCRegisterCostEntry> RegisterClasses,
- unsigned NumPhysRegs);
+ void addRegisterFile(const MCRegisterFileDesc &RF,
+ ArrayRef<MCRegisterCostEntry> Entries);
// Consumes physical registers in each register file specified by the
// `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`.
void allocatePhysRegs(const RegisterRenamingInfo &Entry,
- llvm::MutableArrayRef<unsigned> UsedPhysRegs);
+ MutableArrayRef<unsigned> UsedPhysRegs);
// Releases previously allocated physical registers from the register file(s).
// This method is called from `invalidateRegisterMapping()`.
void freePhysRegs(const RegisterRenamingInfo &Entry,
- llvm::MutableArrayRef<unsigned> FreedPhysRegs);
+ MutableArrayRef<unsigned> FreedPhysRegs);
+
+ // Collects writes that are in a RAW dependency with RS.
+ // This method is called from `addRegisterRead()`.
+ void collectWrites(const ReadState &RS,
+ SmallVectorImpl<WriteRef> &Writes) const;
// Create an instance of RegisterMappingTracker for every register file
// specified by the processor model.
// If no register file is specified, then this method creates a default
// register file with an unbounded number of physical registers.
- void initialize(const llvm::MCSchedModel &SM, unsigned NumRegs);
+ void initialize(const MCSchedModel &SM, unsigned NumRegs);
public:
- RegisterFile(const llvm::MCSchedModel &SM, const llvm::MCRegisterInfo &mri,
+ RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
unsigned NumRegs = 0);
// This method updates the register mappings inserting a new register
// definition. This method is also responsible for updating the number of
// allocated physical registers in each register file modified by the write.
- // No physical regiser is allocated when flag ShouldAllocatePhysRegs is set.
- void addRegisterWrite(WriteRef Write,
- llvm::MutableArrayRef<unsigned> UsedPhysRegs,
- bool ShouldAllocatePhysRegs = true);
+ // No physical regiser is allocated if this write is from a zero-idiom.
+ void addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs);
+
+ // Collect writes that are in a data dependency with RS, and update RS
+ // internal state.
+ void addRegisterRead(ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const;
// Removes write \param WS from the register mappings.
// Physical registers may be released to reflect this update.
+ // No registers are released if this write is from a zero-idiom.
void removeRegisterWrite(const WriteState &WS,
- llvm::MutableArrayRef<unsigned> FreedPhysRegs,
- bool ShouldFreePhysRegs = true);
+ MutableArrayRef<unsigned> FreedPhysRegs);
+
+ // Returns true if a move from RS to WS can be eliminated.
+ // On success, it updates WriteState by setting flag `WS.isEliminated`.
+ // If RS is a read from a zero register, and WS is eliminated, then
+ // `WS.WritesZero` is also set, so that method addRegisterWrite() would not
+ // reserve a physical register for it.
+ bool tryEliminateMove(WriteState &WS, ReadState &RS);
// Checks if there are enough physical registers in the register files.
// Returns a "response mask" where each bit represents the response from a
@@ -155,18 +220,20 @@ public:
//
// Current implementation can simulate up to 32 register files (including the
// special register file at index #0).
- unsigned isAvailable(llvm::ArrayRef<unsigned> Regs) const;
- void collectWrites(llvm::SmallVectorImpl<WriteRef> &Writes,
- unsigned RegID) const;
- void updateOnRead(ReadState &RS, unsigned RegID);
+ unsigned isAvailable(ArrayRef<unsigned> Regs) const;
+ // Returns the number of PRFs implemented by this processor.
unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
+ // Notify each PRF that a new cycle just started.
+ void cycleStart();
+
#ifndef NDEBUG
void dump() const;
#endif
};
} // namespace mca
+} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
+#endif // LLVM_MCA_REGISTER_FILE_H
diff --git a/contrib/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
new file mode 100644
index 000000000000..549a46c247fe
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -0,0 +1,410 @@
+//===--------------------- ResourceManager.h --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy. These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_RESOURCE_MANAGER_H
+#define LLVM_MCA_RESOURCE_MANAGER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+/// Used to notify the internal state of a processor resource.
+///
+/// A processor resource is available if it is not reserved, and there are
+/// available slots in the buffer. A processor resource is unavailable if it
+/// is either reserved, or the associated buffer is full. A processor resource
+/// with a buffer size of -1 is always available if it is not reserved.
+///
+/// Values of type ResourceStateEvent are returned by method
+/// ResourceState::isBufferAvailable(), which is used to query the internal
+/// state of a resource.
+///
+/// The naming convention for resource state events is:
+/// * Event names start with prefix RS_
+/// * Prefix RS_ is followed by a string describing the actual resource state.
+enum ResourceStateEvent {
+ RS_BUFFER_AVAILABLE,
+ RS_BUFFER_UNAVAILABLE,
+ RS_RESERVED
+};
+
+/// Resource allocation strategy used by hardware scheduler resources.
+class ResourceStrategy {
+ ResourceStrategy(const ResourceStrategy &) = delete;
+ ResourceStrategy &operator=(const ResourceStrategy &) = delete;
+
+public:
+ ResourceStrategy() {}
+ virtual ~ResourceStrategy();
+
+ /// Selects a processor resource unit from a ReadyMask.
+ virtual uint64_t select(uint64_t ReadyMask) = 0;
+
+ /// Called by the ResourceManager when a processor resource group, or a
+ /// processor resource with multiple units has become unavailable.
+ ///
+ /// The default strategy uses this information to bias its selection logic.
+ virtual void used(uint64_t ResourceMask) {}
+};
+
+/// Default resource allocation strategy used by processor resource groups and
+/// processor resources with multiple units.
+class DefaultResourceStrategy final : public ResourceStrategy {
+ /// A Mask of resource unit identifiers.
+ ///
+ /// There is one bit set for every available resource unit.
+ /// It defaults to the value of field ResourceSizeMask in ResourceState.
+ const uint64_t ResourceUnitMask;
+
+ /// A simple round-robin selector for processor resource units.
+ /// Each bit of this mask identifies a sub resource within a group.
+ ///
+ /// As an example, lets assume that this is a default policy for a
+ /// processor resource group composed by the following three units:
+ /// ResourceA -- 0b001
+ /// ResourceB -- 0b010
+ /// ResourceC -- 0b100
+ ///
+ /// Field NextInSequenceMask is used to select the next unit from the set of
+ /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
+ /// this example, it defaults to mask '0b111').
+ ///
+ /// The round-robin selector would firstly select 'ResourceC', then
+ /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the
+ /// corresponding bit in NextInSequenceMask is cleared. For example, if
+ /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
+ /// 0xb011.
+ ///
+ /// When NextInSequenceMask becomes zero, it is automatically reset to the
+ /// default value (i.e. ResourceUnitMask).
+ uint64_t NextInSequenceMask;
+
+ /// This field is used to track resource units that are used (i.e. selected)
+ /// by other groups other than the one associated with this strategy object.
+ ///
+ /// In LLVM processor resource groups are allowed to partially (or fully)
+ /// overlap. That means, a same unit may be visible to multiple groups.
+ /// This field keeps track of uses that have originated from outside of
+ /// this group. The idea is to bias the selection strategy, so that resources
+ /// that haven't been used by other groups get prioritized.
+ ///
+ /// The end goal is to (try to) keep the resource distribution as much uniform
+ /// as possible. By construction, this mask only tracks one-level of resource
+ /// usage. Therefore, this strategy is expected to be less accurate when same
+ /// units are used multiple times by other groups within a single round of
+ /// select.
+ ///
+ /// Note: an LRU selector would have a better accuracy at the cost of being
+ /// slightly more expensive (mostly in terms of runtime cost). Methods
+ /// 'select' and 'used', are always in the hot execution path of llvm-mca.
+ /// Therefore, a slow implementation of 'select' would have a negative impact
+ /// on the overall performance of the tool.
+ uint64_t RemovedFromNextInSequence;
+
+public:
+ DefaultResourceStrategy(uint64_t UnitMask)
+ : ResourceStrategy(), ResourceUnitMask(UnitMask),
+ NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {}
+ virtual ~DefaultResourceStrategy() = default;
+
+ uint64_t select(uint64_t ReadyMask) override;
+ void used(uint64_t Mask) override;
+};
+
+/// A processor resource descriptor.
+///
+/// There is an instance of this class for every processor resource defined by
+/// the machine scheduling model.
+/// Objects of class ResourceState dynamically track the usage of processor
+/// resource units.
+class ResourceState {
+ /// An index to the MCProcResourceDesc entry in the processor model.
+ const unsigned ProcResourceDescIndex;
+ /// A resource mask. This is generated by the tool with the help of
+ /// function `mca::computeProcResourceMasks' (see Support.h).
+ ///
+ /// Field ResourceMask only has one bit set if this resource state describes a
+ /// processor resource unit (i.e. this is not a group). That means, we can
+ /// quickly check if a resource is a group by simply counting the number of
+ /// bits that are set in the mask.
+ ///
+ /// The most significant bit of a mask (MSB) uniquely identifies a resource.
+ /// Remaining bits are used to describe the composition of a group (Group).
+ ///
+ /// Example (little endian):
+ /// Resource | Mask | MSB | Group
+ /// ---------+------------+------------+------------
+ /// A | 0b000001 | 0b000001 | 0b000000
+ /// | | |
+ /// B | 0b000010 | 0b000010 | 0b000000
+ /// | | |
+ /// C | 0b010000 | 0b010000 | 0b000000
+ /// | | |
+ /// D | 0b110010 | 0b100000 | 0b010010
+ ///
+ /// In this example, resources A, B and C are processor resource units.
+ /// Only resource D is a group resource, and it contains resources B and C.
+ /// That is because MSB(B) and MSB(C) are both contained within Group(D).
+ const uint64_t ResourceMask;
+
+ /// A ProcResource can have multiple units.
+ ///
+ /// For processor resource groups this field is a mask of contained resource
+ /// units. It is obtained from ResourceMask by clearing the highest set bit.
+ /// The number of resource units in a group can be simply computed as the
+ /// population count of this field.
+ ///
+ /// For normal (i.e. non-group) resources, the number of bits set in this mask
+ /// is equivalent to the number of units declared by the processor model (see
+ /// field 'NumUnits' in 'ProcResourceUnits').
+ uint64_t ResourceSizeMask;
+
+ /// A mask of ready units.
+ uint64_t ReadyMask;
+
+ /// Buffered resources will have this field set to a positive number different
+ /// than zero. A buffered resource behaves like a reservation station
+ /// implementing its own buffer for out-of-order execution.
+ ///
+ /// A BufferSize of 1 is used by scheduler resources that force in-order
+ /// execution.
+ ///
+ /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
+ /// Since in-order issue/dispatch resources don't implement buffers, dispatch
+ /// events coincide with issue events.
+ /// Also, no other instruction ca be dispatched/issue while this resource is
+ /// in use. Only when all the "resource cycles" are consumed (after the issue
+ /// event), a new instruction ca be dispatched.
+ const int BufferSize;
+
+ /// Available slots in the buffer (zero, if this is not a buffered resource).
+ unsigned AvailableSlots;
+
+ /// This field is set if this resource is currently reserved.
+ ///
+ /// Resources can be reserved for a number of cycles.
+ /// Instructions can still be dispatched to reserved resources. However,
+ /// istructions dispatched to a reserved resource cannot be issued to the
+ /// underlying units (i.e. pipelines) until the resource is released.
+ bool Unavailable;
+
+ const bool IsAGroup;
+
+ /// Checks for the availability of unit 'SubResMask' in the group.
+ bool isSubResourceReady(uint64_t SubResMask) const {
+ return ReadyMask & SubResMask;
+ }
+
+public:
+ ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask);
+
+ unsigned getProcResourceID() const { return ProcResourceDescIndex; }
+ uint64_t getResourceMask() const { return ResourceMask; }
+ uint64_t getReadyMask() const { return ReadyMask; }
+ int getBufferSize() const { return BufferSize; }
+
+ bool isBuffered() const { return BufferSize > 0; }
+ bool isInOrder() const { return BufferSize == 1; }
+
+ /// Returns true if this is an in-order dispatch/issue resource.
+ bool isADispatchHazard() const { return BufferSize == 0; }
+ bool isReserved() const { return Unavailable; }
+
+ void setReserved() { Unavailable = true; }
+ void clearReserved() { Unavailable = false; }
+
+ /// Returs true if this resource is not reserved, and if there are at least
+ /// `NumUnits` available units.
+ bool isReady(unsigned NumUnits = 1) const;
+
+ bool isAResourceGroup() const { return IsAGroup; }
+
+ bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
+
+ void markSubResourceAsUsed(uint64_t ID) {
+ assert(isSubResourceReady(ID));
+ ReadyMask ^= ID;
+ }
+
+ void releaseSubResource(uint64_t ID) {
+ assert(!isSubResourceReady(ID));
+ ReadyMask ^= ID;
+ }
+
+ unsigned getNumUnits() const {
+ return isAResourceGroup() ? 1U : countPopulation(ResourceSizeMask);
+ }
+
+ /// Checks if there is an available slot in the resource buffer.
+ ///
+ /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
+ /// there is a slot available.
+ ///
+ /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
+ /// is reserved.
+ ///
+ /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
+ ResourceStateEvent isBufferAvailable() const;
+
+ /// Reserve a slot in the buffer.
+ void reserveBuffer() {
+ if (AvailableSlots)
+ AvailableSlots--;
+ }
+
+ /// Release a slot in the buffer.
+ void releaseBuffer() {
+ if (BufferSize > 0)
+ AvailableSlots++;
+ assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
+ }
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+/// A resource unit identifier.
+///
+/// This is used to identify a specific processor resource unit using a pair
+/// of indices where the 'first' index is a processor resource mask, and the
+/// 'second' index is an index for a "sub-resource" (i.e. unit).
+typedef std::pair<uint64_t, uint64_t> ResourceRef;
+
+// First: a MCProcResourceDesc index identifying a buffered resource.
+// Second: max number of buffer entries used in this resource.
+typedef std::pair<unsigned, unsigned> BufferUsageEntry;
+
+/// A resource manager for processor resource units and groups.
+///
+/// This class owns all the ResourceState objects, and it is responsible for
+/// acting on requests from a Scheduler by updating the internal state of
+/// ResourceState objects.
+/// This class doesn't know about instruction itineraries and functional units.
+/// In future, it can be extended to support itineraries too through the same
+/// public interface.
+class ResourceManager {
+ // Set of resources available on the subtarget.
+ //
+ // There is an instance of ResourceState for every resource declared by the
+ // target scheduling model.
+ //
+ // Elements of this vector are ordered by resource kind. In particular,
+ // resource units take precedence over resource groups.
+ //
+ // The index of a processor resource in this vector depends on the value of
+ // its mask (see the description of field ResourceState::ResourceMask). In
+ // particular, it is computed as the position of the most significant bit set
+ // (MSB) in the mask plus one (since we want to ignore the invalid resource
+ // descriptor at index zero).
+ //
+ // Example (little endian):
+ //
+ // Resource | Mask | MSB | Index
+ // ---------+---------+---------+-------
+ // A | 0b00001 | 0b00001 | 1
+ // | | |
+ // B | 0b00100 | 0b00100 | 3
+ // | | |
+ // C | 0b10010 | 0b10000 | 5
+ //
+ //
+ // The same index is also used to address elements within vector `Strategies`
+ // and vector `Resource2Groups`.
+ std::vector<std::unique_ptr<ResourceState>> Resources;
+ std::vector<std::unique_ptr<ResourceStrategy>> Strategies;
+
+ // Used to quickly identify groups that own a particular resource unit.
+ std::vector<uint64_t> Resource2Groups;
+
+ // A table to map processor resource IDs to processor resource masks.
+ SmallVector<uint64_t, 8> ProcResID2Mask;
+
+ // Keeps track of which resources are busy, and how many cycles are left
+ // before those become usable again.
+ SmallDenseMap<ResourceRef, unsigned> BusyResources;
+
+ // Returns the actual resource unit that will be used.
+ ResourceRef selectPipe(uint64_t ResourceID);
+
+ void use(const ResourceRef &RR);
+ void release(const ResourceRef &RR);
+
+ unsigned getNumUnits(uint64_t ResourceID) const;
+
+ // Overrides the selection strategy for the processor resource with the given
+ // mask.
+ void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+ uint64_t ResourceMask);
+
+public:
+ ResourceManager(const MCSchedModel &SM);
+ virtual ~ResourceManager() = default;
+
+ // Overrides the selection strategy for the resource at index ResourceID in
+ // the MCProcResourceDesc table.
+ void setCustomStrategy(std::unique_ptr<ResourceStrategy> S,
+ unsigned ResourceID) {
+ assert(ResourceID < ProcResID2Mask.size() &&
+ "Invalid resource index in input!");
+ return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
+ }
+
+ // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
+ // there are enough available slots in the buffers.
+ ResourceStateEvent canBeDispatched(ArrayRef<uint64_t> Buffers) const;
+
+ // Return the processor resource identifier associated to this Mask.
+ unsigned resolveResourceMask(uint64_t Mask) const;
+
+ // Consume a slot in every buffered resource from array 'Buffers'. Resource
+ // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
+ void reserveBuffers(ArrayRef<uint64_t> Buffers);
+
+ // Release buffer entries previously allocated by method reserveBuffers.
+ void releaseBuffers(ArrayRef<uint64_t> Buffers);
+
+ // Reserve a processor resource. A reserved resource is not available for
+ // instruction issue until it is released.
+ void reserveResource(uint64_t ResourceID);
+
+ // Release a previously reserved processor resource.
+ void releaseResource(uint64_t ResourceID);
+
+ bool canBeIssued(const InstrDesc &Desc) const;
+
+ void issueInstruction(
+ const InstrDesc &Desc,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
+
+ void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed);
+
+#ifndef NDEBUG
+ void dump() const {
+ for (const std::unique_ptr<ResourceState> &Resource : Resources)
+ Resource->dump();
+ }
+#endif
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_RESOURCE_MANAGER_H
diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnit.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
index 8acc8bcc98fe..71360e984ade 100644
--- a/contrib/llvm/tools/llvm-mca/RetireControlUnit.h
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -12,14 +12,15 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
-#define LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#ifndef LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#define LLVM_MCA_RETIRE_CONTROL_UNIT_H
-#include "HardwareUnit.h"
-#include "Instruction.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/Instruction.h"
#include <vector>
+namespace llvm {
namespace mca {
/// This class tracks which instructions are in-flight (i.e., dispatched but not
@@ -62,15 +63,19 @@ private:
std::vector<RUToken> Queue;
public:
- RetireControlUnit(const llvm::MCSchedModel &SM);
+ RetireControlUnit(const MCSchedModel &SM);
- bool isFull() const { return !AvailableSlots; }
bool isEmpty() const { return AvailableSlots == Queue.size(); }
bool isAvailable(unsigned Quantity = 1) const {
// Some instructions may declare a number of uOps which exceeds the size
// of the reorder buffer. To avoid problems, cap the amount of slots to
// the size of the reorder buffer.
Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+
+ // Further normalize the number of micro opcodes for instructions that
+ // declare zero opcodes. This should match the behavior of method
+ // reserveSlot().
+ Quantity = std::max(Quantity, 1U);
return AvailableSlots >= Quantity;
}
@@ -94,5 +99,6 @@ public:
};
} // namespace mca
+} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#endif // LLVM_MCA_RETIRE_CONTROL_UNIT_H
diff --git a/contrib/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
new file mode 100644
index 000000000000..351ea4827df9
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -0,0 +1,214 @@
+//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A scheduler for Processor Resource Units and Processor Resource Groups.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SCHEDULER_H
+#define LLVM_MCA_SCHEDULER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+class SchedulerStrategy {
+public:
+ SchedulerStrategy() = default;
+ virtual ~SchedulerStrategy();
+
+ /// Returns true if Lhs should take priority over Rhs.
+ ///
+ /// This method is used by class Scheduler to select the "best" ready
+ /// instruction to issue to the underlying pipelines.
+ virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0;
+};
+
+/// Default instruction selection strategy used by class Scheduler.
+class DefaultSchedulerStrategy : public SchedulerStrategy {
+ /// This method ranks instructions based on their age, and the number of known
+ /// users. The lower the rank value, the better.
+ int computeRank(const InstRef &Lhs) const {
+ return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers();
+ }
+
+public:
+ DefaultSchedulerStrategy() = default;
+ virtual ~DefaultSchedulerStrategy();
+
+ bool compare(const InstRef &Lhs, const InstRef &Rhs) const override {
+ int LhsRank = computeRank(Lhs);
+ int RhsRank = computeRank(Rhs);
+
+ /// Prioritize older instructions over younger instructions to minimize the
+ /// pressure on the reorder buffer.
+ if (LhsRank == RhsRank)
+ return Lhs.getSourceIndex() < Rhs.getSourceIndex();
+ return LhsRank < RhsRank;
+ }
+};
+
+/// Class Scheduler is responsible for issuing instructions to pipeline
+/// resources.
+///
+/// Internally, it delegates to a ResourceManager the management of processor
+/// resources. This class is also responsible for tracking the progress of
+/// instructions from the dispatch stage, until the write-back stage.
+///
+/// An instruction dispatched to the Scheduler is initially placed into either
+/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input
+/// operands.
+///
+/// An instruction is moved from the WaitSet to the ReadySet when register
+/// operands become available, and all memory dependencies are met.
+/// Instructions that are moved from the WaitSet to the ReadySet transition
+/// in state from 'IS_AVAILABLE' to 'IS_READY'.
+///
+/// On every cycle, the Scheduler checks if it can promote instructions from the
+/// WaitSet to the ReadySet.
+///
+/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued
+/// to a (one or more) pipeline(s). This event also causes an instruction state
+/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction
+/// leaves the IssuedSet when it reaches the write-back stage.
+class Scheduler : public HardwareUnit {
+ LSUnit &LSU;
+
+ // Instruction selection strategy for this Scheduler.
+ std::unique_ptr<SchedulerStrategy> Strategy;
+
+ // Hardware resources that are managed by this scheduler.
+ std::unique_ptr<ResourceManager> Resources;
+
+ std::vector<InstRef> WaitSet;
+ std::vector<InstRef> ReadySet;
+ std::vector<InstRef> IssuedSet;
+
+ /// Verify the given selection strategy and set the Strategy member
+ /// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is
+ /// used.
+ void initializeStrategy(std::unique_ptr<SchedulerStrategy> S);
+
+ /// Issue an instruction without updating the ready queue.
+ void issueInstructionImpl(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
+
+ // Identify instructions that have finished executing, and remove them from
+ // the IssuedSet. References to executed instructions are added to input
+ // vector 'Executed'.
+ void updateIssuedSet(SmallVectorImpl<InstRef> &Executed);
+
+ // Try to promote instructions from WaitSet to ReadySet.
+ // Add promoted instructions to the 'Ready' vector in input.
+ void promoteToReadySet(SmallVectorImpl<InstRef> &Ready);
+
+public:
+ Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
+ : Scheduler(Model, Lsu, nullptr) {}
+
+ Scheduler(const MCSchedModel &Model, LSUnit &Lsu,
+ std::unique_ptr<SchedulerStrategy> SelectStrategy)
+ : Scheduler(make_unique<ResourceManager>(Model), Lsu,
+ std::move(SelectStrategy)) {}
+
+ Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit &Lsu,
+ std::unique_ptr<SchedulerStrategy> SelectStrategy)
+ : LSU(Lsu), Resources(std::move(RM)) {
+ initializeStrategy(std::move(SelectStrategy));
+ }
+
+ // Stalls generated by the scheduler.
+ enum Status {
+ SC_AVAILABLE,
+ SC_LOAD_QUEUE_FULL,
+ SC_STORE_QUEUE_FULL,
+ SC_BUFFERS_FULL,
+ SC_DISPATCH_GROUP_STALL,
+ };
+
+ /// Check if the instruction in 'IR' can be dispatched and returns an answer
+ /// in the form of a Status value.
+ ///
+ /// The DispatchStage is responsible for querying the Scheduler before
+ /// dispatching new instructions. This routine is used for performing such
+ /// a query. If the instruction 'IR' can be dispatched, then true is
+ /// returned, otherwise false is returned with Event set to the stall type.
+ /// Internally, it also checks if the load/store unit is available.
+ Status isAvailable(const InstRef &IR) const;
+
+ /// Reserves buffer and LSUnit queue resources that are necessary to issue
+ /// this instruction.
+ ///
+ /// Returns true if instruction IR is ready to be issued to the underlying
+ /// pipelines. Note that this operation cannot fail; it assumes that a
+ /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
+ void dispatch(const InstRef &IR);
+
+ /// Returns true if IR is ready to be executed by the underlying pipelines.
+ /// This method assumes that IR has been previously dispatched.
+ bool isReady(const InstRef &IR) const;
+
+ /// Issue an instruction and populates a vector of used pipeline resources,
+ /// and a vector of instructions that transitioned to the ready state as a
+ /// result of this event.
+ void issueInstruction(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Used,
+ SmallVectorImpl<InstRef> &Ready);
+
+ /// Returns true if IR has to be issued immediately, or if IR is a zero
+ /// latency instruction.
+ bool mustIssueImmediately(const InstRef &IR) const;
+
+ /// This routine notifies the Scheduler that a new cycle just started.
+ ///
+ /// It notifies the underlying ResourceManager that a new cycle just started.
+ /// Vector `Freed` is populated with resourceRef related to resources that
+ /// have changed in state, and that are now available to new instructions.
+ /// Instructions executed are added to vector Executed, while vector Ready is
+ /// populated with instructions that have become ready in this new cycle.
+ void cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+ SmallVectorImpl<InstRef> &Ready,
+ SmallVectorImpl<InstRef> &Executed);
+
+ /// Convert a resource mask into a valid llvm processor resource identifier.
+ unsigned getResourceID(uint64_t Mask) const {
+ return Resources->resolveResourceMask(Mask);
+ }
+
+ /// Select the next instruction to issue from the ReadySet. Returns an invalid
+ /// instruction reference if there are no ready instructions, or if processor
+ /// resources are not available.
+ InstRef select();
+
+#ifndef NDEBUG
+ // Update the ready queues.
+ void dump() const;
+
+ // This routine performs a sanity check. This routine should only be called
+ // when we know that 'IR' is not in the scheduler's instruction queues.
+ void sanityCheck(const InstRef &IR) const {
+ assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!");
+ assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!");
+ assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!");
+ }
+#endif // !NDEBUG
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SCHEDULER_H
diff --git a/contrib/llvm/include/llvm/MCA/InstrBuilder.h b/contrib/llvm/include/llvm/MCA/InstrBuilder.h
new file mode 100644
index 000000000000..5f998db5e4ce
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/InstrBuilder.h
@@ -0,0 +1,77 @@
+//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A builder class for instructions that are statically analyzed by llvm-mca.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INSTRBUILDER_H
+#define LLVM_MCA_INSTRBUILDER_H
+
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+/// A builder class that knows how to construct Instruction objects.
+///
+/// Every llvm-mca Instruction is described by an object of class InstrDesc.
+/// An InstrDesc describes which registers are read/written by the instruction,
+/// as well as the instruction latency and hardware resources consumed.
+///
+/// This class is used by the tool to construct Instructions and instruction
+/// descriptors (i.e. InstrDesc objects).
+/// Information from the machine scheduling model is used to identify processor
+/// resources that are consumed by an instruction.
+class InstrBuilder {
+ const MCSubtargetInfo &STI;
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+ const MCInstrAnalysis *MCIA;
+ SmallVector<uint64_t, 8> ProcResourceMasks;
+
+ DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
+ DenseMap<const MCInst *, std::unique_ptr<const InstrDesc>> VariantDescriptors;
+
+ bool FirstCallInst;
+ bool FirstReturnInst;
+
+ Expected<const InstrDesc &> createInstrDescImpl(const MCInst &MCI);
+ Expected<const InstrDesc &> getOrCreateInstrDesc(const MCInst &MCI);
+
+ InstrBuilder(const InstrBuilder &) = delete;
+ InstrBuilder &operator=(const InstrBuilder &) = delete;
+
+ void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+ void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+ Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const;
+
+public:
+ InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
+ const MCRegisterInfo &RI, const MCInstrAnalysis *IA);
+
+ void clear() {
+ VariantDescriptors.shrink_and_clear();
+ FirstCallInst = true;
+ FirstReturnInst = true;
+ }
+
+ Expected<std::unique_ptr<Instruction>> createInstruction(const MCInst &MCI);
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_INSTRBUILDER_H
diff --git a/contrib/llvm/tools/llvm-mca/Instruction.h b/contrib/llvm/include/llvm/MCA/Instruction.h
index 3b2f90528f2e..b91610c64d85 100644
--- a/contrib/llvm/tools/llvm-mca/Instruction.h
+++ b/contrib/llvm/include/llvm/MCA/Instruction.h
@@ -13,9 +13,12 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
+#ifndef LLVM_MCA_INSTRUCTION_H
+#define LLVM_MCA_INSTRUCTION_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/MathExtras.h"
#ifndef NDEBUG
@@ -23,8 +26,8 @@
#endif
#include <memory>
-#include <set>
-#include <vector>
+
+namespace llvm {
namespace mca {
@@ -85,7 +88,7 @@ class ReadState;
/// register write. It also tracks how many cycles are left before the write
/// back stage.
class WriteState {
- const WriteDescriptor &WD;
+ const WriteDescriptor *WD;
// On instruction issue, this field is set equal to the write latency.
// Before instruction issue, this field defaults to -512, a special
// value that represents an "unknown" number of cycles.
@@ -97,43 +100,94 @@ class WriteState {
// field RegisterID from WD.
unsigned RegisterID;
+ // Physical register file that serves register RegisterID.
+ unsigned PRFID;
+
// True if this write implicitly clears the upper portion of RegisterID's
// super-registers.
bool ClearsSuperRegs;
+ // True if this write is from a dependency breaking zero-idiom instruction.
+ bool WritesZero;
+
+ // True if this write has been eliminated at register renaming stage.
+ // Example: a register move doesn't consume scheduler/pipleline resources if
+ // it is eliminated at register renaming stage. It still consumes
+ // decode bandwidth, and ROB entries.
+ bool IsEliminated;
+
// This field is set if this is a partial register write, and it has a false
// dependency on any previous write of the same register (or a portion of it).
// DependentWrite must be able to complete before this write completes, so
// that we don't break the WAW, and the two writes can be merged together.
const WriteState *DependentWrite;
+ // A partial write that is in a false dependency with this write.
+ WriteState *PartialWrite;
+
+ unsigned DependentWriteCyclesLeft;
+
// A list of dependent reads. Users is a set of dependent
// reads. A dependent read is added to the set only if CyclesLeft
// is "unknown". As soon as CyclesLeft is 'known', each user in the set
// gets notified with the actual CyclesLeft.
// The 'second' element of a pair is a "ReadAdvance" number of cycles.
- std::set<std::pair<ReadState *, int>> Users;
+ SmallVector<std::pair<ReadState *, int>, 4> Users;
public:
WriteState(const WriteDescriptor &Desc, unsigned RegID,
- bool clearsSuperRegs = false)
- : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
- ClearsSuperRegs(clearsSuperRegs), DependentWrite(nullptr) {}
- WriteState(const WriteState &Other) = delete;
- WriteState &operator=(const WriteState &Other) = delete;
+ bool clearsSuperRegs = false, bool writesZero = false)
+ : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
+ ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
+ IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
+ DependentWriteCyclesLeft(0) {}
+
+ WriteState(const WriteState &Other) = default;
+ WriteState &operator=(const WriteState &Other) = default;
int getCyclesLeft() const { return CyclesLeft; }
- unsigned getWriteResourceID() const { return WD.SClassOrWriteResourceID; }
+ unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
unsigned getRegisterID() const { return RegisterID; }
- unsigned getLatency() const { return WD.Latency; }
+ unsigned getRegisterFileID() const { return PRFID; }
+ unsigned getLatency() const { return WD->Latency; }
void addUser(ReadState *Use, int ReadAdvance);
- unsigned getNumUsers() const { return Users.size(); }
+ void addUser(WriteState *Use);
+
+ unsigned getDependentWriteCyclesLeft() const {
+ return DependentWriteCyclesLeft;
+ }
+
+ unsigned getNumUsers() const {
+ unsigned NumUsers = Users.size();
+ if (PartialWrite)
+ ++NumUsers;
+ return NumUsers;
+ }
+
bool clearsSuperRegisters() const { return ClearsSuperRegs; }
+ bool isWriteZero() const { return WritesZero; }
+ bool isEliminated() const { return IsEliminated; }
+ bool isExecuted() const {
+ return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
+ }
const WriteState *getDependentWrite() const { return DependentWrite; }
- void setDependentWrite(const WriteState *Write) { DependentWrite = Write; }
+ void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
+ void writeStartEvent(unsigned Cycles) {
+ DependentWriteCyclesLeft = Cycles;
+ DependentWrite = nullptr;
+ }
+
+ void setWriteZero() { WritesZero = true; }
+ void setEliminated() {
+ assert(Users.empty() && "Write is in an inconsistent state.");
+ CyclesLeft = 0;
+ IsEliminated = true;
+ }
+
+ void setPRF(unsigned PRF) { PRFID = PRF; }
// On every cycle, update CyclesLeft and notify dependent users.
void cycleEvent();
@@ -149,9 +203,11 @@ public:
/// A read may be dependent on more than one write. This occurs when some
/// writes only partially update the register associated to this read.
class ReadState {
- const ReadDescriptor &RD;
+ const ReadDescriptor *RD;
// Physical register identified associated to this read.
unsigned RegisterID;
+ // Physical register file that serves register RegisterID.
+ unsigned PRFID;
// Number of writes that contribute to the definition of RegisterID.
// In the absence of partial register updates, the number of DependentWrites
// cannot be more than one.
@@ -168,20 +224,27 @@ class ReadState {
// This field is set to true only if there are no dependent writes, and
// there are no `CyclesLeft' to wait.
bool IsReady;
+ // True if this is a read from a known zero register.
+ bool IsZero;
+ // True if this register read is from a dependency-breaking instruction.
+ bool IndependentFromDef;
public:
ReadState(const ReadDescriptor &Desc, unsigned RegID)
- : RD(Desc), RegisterID(RegID), DependentWrites(0),
- CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true) {}
- ReadState(const ReadState &Other) = delete;
- ReadState &operator=(const ReadState &Other) = delete;
+ : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
+ CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
+ IsZero(false), IndependentFromDef(false) {}
- const ReadDescriptor &getDescriptor() const { return RD; }
- unsigned getSchedClass() const { return RD.SchedClassID; }
+ const ReadDescriptor &getDescriptor() const { return *RD; }
+ unsigned getSchedClass() const { return RD->SchedClassID; }
unsigned getRegisterID() const { return RegisterID; }
+ unsigned getRegisterFileID() const { return PRFID; }
bool isReady() const { return IsReady; }
- bool isImplicitRead() const { return RD.isImplicitRead(); }
+ bool isImplicitRead() const { return RD->isImplicitRead(); }
+
+ bool isIndependentFromDef() const { return IndependentFromDef; }
+ void setIndependentFromDef() { IndependentFromDef = true; }
void cycleEvent();
void writeStartEvent(unsigned Cycles);
@@ -189,6 +252,10 @@ public:
DependentWrites = Writes;
IsReady = !Writes;
}
+
+ bool isReadZero() const { return IsZero; }
+ void setReadZero() { IsZero = true; }
+ void setPRF(unsigned ID) { PRFID = ID; }
};
/// A sequence of cycles.
@@ -224,7 +291,7 @@ public:
bool isValid() const { return Begin <= End; }
unsigned size() const { return End - Begin; };
- void Subtract(unsigned Cycles) {
+ void subtract(unsigned Cycles) {
assert(End >= Cycles);
End -= Cycles;
}
@@ -253,15 +320,16 @@ struct ResourceUsage {
/// An instruction descriptor
struct InstrDesc {
- std::vector<WriteDescriptor> Writes; // Implicit writes are at the end.
- std::vector<ReadDescriptor> Reads; // Implicit reads are at the end.
+ SmallVector<WriteDescriptor, 4> Writes; // Implicit writes are at the end.
+ SmallVector<ReadDescriptor, 4> Reads; // Implicit reads are at the end.
// For every resource used by an instruction of this kind, this vector
// reports the number of "consumed cycles".
- std::vector<std::pair<uint64_t, ResourceUsage>> Resources;
+ SmallVector<std::pair<uint64_t, ResourceUsage>, 4> Resources;
// A list of buffered resources consumed by this instruction.
- std::vector<uint64_t> Buffers;
+ SmallVector<uint64_t, 4> Buffers;
+
unsigned MaxLatency;
// Number of MicroOps for this instruction.
unsigned NumMicroOps;
@@ -269,18 +337,74 @@ struct InstrDesc {
bool MayLoad;
bool MayStore;
bool HasSideEffects;
+ bool BeginGroup;
+ bool EndGroup;
+
+ // True if all buffered resources are in-order, and there is at least one
+ // buffer which is a dispatch hazard (BufferSize = 0).
+ bool MustIssueImmediately;
// A zero latency instruction doesn't consume any scheduler resources.
bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
+
+ InstrDesc() = default;
+ InstrDesc(const InstrDesc &Other) = delete;
+ InstrDesc &operator=(const InstrDesc &Other) = delete;
+};
+
+/// Base class for instructions consumed by the simulation pipeline.
+///
+/// This class tracks data dependencies as well as generic properties
+/// of the instruction.
+class InstructionBase {
+ const InstrDesc &Desc;
+
+ // This field is set for instructions that are candidates for move
+ // elimination. For more information about move elimination, see the
+ // definition of RegisterMappingTracker in RegisterFile.h
+ bool IsOptimizableMove;
+
+ // Output dependencies.
+ // One entry per each implicit and explicit register definition.
+ SmallVector<WriteState, 4> Defs;
+
+ // Input dependencies.
+ // One entry per each implicit and explicit register use.
+ SmallVector<ReadState, 4> Uses;
+
+public:
+ InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
+
+ SmallVectorImpl<WriteState> &getDefs() { return Defs; }
+ const ArrayRef<WriteState> getDefs() const { return Defs; }
+ SmallVectorImpl<ReadState> &getUses() { return Uses; }
+ const ArrayRef<ReadState> getUses() const { return Uses; }
+ const InstrDesc &getDesc() const { return Desc; }
+
+ unsigned getLatency() const { return Desc.MaxLatency; }
+
+ bool hasDependentUsers() const {
+ return any_of(Defs,
+ [](const WriteState &Def) { return Def.getNumUsers() > 0; });
+ }
+
+ unsigned getNumUsers() const {
+ unsigned NumUsers = 0;
+ for (const WriteState &Def : Defs)
+ NumUsers += Def.getNumUsers();
+ return NumUsers;
+ }
+
+ // Returns true if this instruction is a candidate for move elimination.
+ bool isOptimizableMove() const { return IsOptimizableMove; }
+ void setOptimizableMove() { IsOptimizableMove = true; }
};
/// An instruction propagated through the simulated instruction pipeline.
///
/// This class is used to monitor changes to the internal state of instructions
/// that are sent to the various components of the simulated hardware pipeline.
-class Instruction {
- const InstrDesc &Desc;
-
+class Instruction : public InstructionBase {
enum InstrStage {
IS_INVALID, // Instruction in an invalid state.
IS_AVAILABLE, // Instruction dispatched but operands are not ready.
@@ -300,46 +424,14 @@ class Instruction {
// Retire Unit token ID for this instruction.
unsigned RCUTokenID;
- bool IsDepBreaking;
-
- using UniqueDef = std::unique_ptr<WriteState>;
- using UniqueUse = std::unique_ptr<ReadState>;
- using VecDefs = std::vector<UniqueDef>;
- using VecUses = std::vector<UniqueUse>;
-
- // Output dependencies.
- // One entry per each implicit and explicit register definition.
- VecDefs Defs;
-
- // Input dependencies.
- // One entry per each implicit and explicit register use.
- VecUses Uses;
-
public:
Instruction(const InstrDesc &D)
- : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0),
- IsDepBreaking(false) {}
- Instruction(const Instruction &Other) = delete;
- Instruction &operator=(const Instruction &Other) = delete;
-
- VecDefs &getDefs() { return Defs; }
- const VecDefs &getDefs() const { return Defs; }
- VecUses &getUses() { return Uses; }
- const VecUses &getUses() const { return Uses; }
- const InstrDesc &getDesc() const { return Desc; }
+ : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
+ RCUTokenID(0) {}
+
unsigned getRCUTokenID() const { return RCUTokenID; }
int getCyclesLeft() const { return CyclesLeft; }
- bool isDependencyBreaking() const { return IsDepBreaking; }
- void setDependencyBreaking() { IsDepBreaking = true; }
-
- unsigned getNumUsers() const {
- unsigned NumUsers = 0;
- for (const UniqueDef &Def : Defs)
- NumUsers += Def->getNumUsers();
- return NumUsers;
- }
-
// Transition to the dispatch stage, and assign a RCUToken to this
// instruction. The RCUToken is used to track the completion of every
// register write performed by this instruction.
@@ -363,6 +455,15 @@ public:
bool isExecuted() const { return Stage == IS_EXECUTED; }
bool isRetired() const { return Stage == IS_RETIRED; }
+ bool isEliminated() const {
+ return isReady() && getDefs().size() &&
+ all_of(getDefs(),
+ [](const WriteState &W) { return W.isEliminated(); });
+ }
+
+ // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED.
+ void forceExecuted();
+
void retire() {
assert(isExecuted() && "Instruction is in an invalid state!");
Stage = IS_RETIRED;
@@ -374,26 +475,32 @@ public:
/// An InstRef contains both a SourceMgr index and Instruction pair. The index
/// is used as a unique identifier for the instruction. MCA will make use of
/// this index as a key throughout MCA.
-class InstRef : public std::pair<unsigned, Instruction *> {
+class InstRef {
+ std::pair<unsigned, Instruction *> Data;
+
public:
- InstRef() : std::pair<unsigned, Instruction *>(0, nullptr) {}
- InstRef(unsigned Index, Instruction *I)
- : std::pair<unsigned, Instruction *>(Index, I) {}
+ InstRef() : Data(std::make_pair(0, nullptr)) {}
+ InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {}
+
+ bool operator==(const InstRef &Other) const { return Data == Other.Data; }
+
+ unsigned getSourceIndex() const { return Data.first; }
+ Instruction *getInstruction() { return Data.second; }
+ const Instruction *getInstruction() const { return Data.second; }
- unsigned getSourceIndex() const { return first; }
- Instruction *getInstruction() { return second; }
- const Instruction *getInstruction() const { return second; }
+ /// Returns true if this references a valid instruction.
+ operator bool() const { return Data.second != nullptr; }
- /// Returns true if this InstRef has been populated.
- bool isValid() const { return second != nullptr; }
+ /// Invalidate this reference.
+ void invalidate() { Data.second = nullptr; }
#ifndef NDEBUG
- void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); }
+ void print(raw_ostream &OS) const { OS << getSourceIndex(); }
#endif
};
#ifndef NDEBUG
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InstRef &IR) {
+inline raw_ostream &operator<<(raw_ostream &OS, const InstRef &IR) {
IR.print(OS);
return OS;
}
@@ -415,20 +522,30 @@ public:
unsigned getSourceIndex() const { return Data.first; }
const WriteState *getWriteState() const { return Data.second; }
WriteState *getWriteState() { return Data.second; }
- void invalidate() { Data = std::make_pair(INVALID_IID, nullptr); }
-
- bool isValid() const {
- return Data.first != INVALID_IID && Data.second != nullptr;
+ void invalidate() { Data.second = nullptr; }
+ bool isWriteZero() const {
+ assert(isValid() && "Invalid null WriteState found!");
+ return getWriteState()->isWriteZero();
}
- bool operator==(const WriteRef &Other) const {
- return Data == Other.Data;
+
+ /// Returns true if this register write has been executed, and the new
+ /// register value is therefore available to users.
+ bool isAvailable() const {
+ if (getSourceIndex() == INVALID_IID)
+ return false;
+ const WriteState *WS = getWriteState();
+ return !WS || WS->isExecuted();
}
+ bool isValid() const { return Data.first != INVALID_IID && Data.second; }
+ bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
+
#ifndef NDEBUG
void dump() const;
#endif
};
} // namespace mca
+} // namespace llvm
-#endif
+#endif // LLVM_MCA_INSTRUCTION_H
diff --git a/contrib/llvm/tools/llvm-mca/Pipeline.h b/contrib/llvm/include/llvm/MCA/Pipeline.h
index 6916e422be39..acd256060bdd 100644
--- a/contrib/llvm/tools/llvm-mca/Pipeline.h
+++ b/contrib/llvm/include/llvm/MCA/Pipeline.h
@@ -13,18 +13,18 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINE_H
-#define LLVM_TOOLS_LLVM_MCA_PIPELINE_H
+#ifndef LLVM_MCA_PIPELINE_H
+#define LLVM_MCA_PIPELINE_H
-#include "Scheduler.h"
-#include "Stage.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/Stage.h"
+#include "llvm/Support/Error.h"
+namespace llvm {
namespace mca {
class HWEventListener;
-class HWInstructionEvent;
-class HWStallEvent;
/// A pipeline for a specific subtarget.
///
@@ -55,25 +55,25 @@ class Pipeline {
Pipeline &operator=(const Pipeline &P) = delete;
/// An ordered list of stages that define this instruction pipeline.
- llvm::SmallVector<std::unique_ptr<Stage>, 8> Stages;
+ SmallVector<std::unique_ptr<Stage>, 8> Stages;
std::set<HWEventListener *> Listeners;
unsigned Cycles;
- void preExecuteStages();
- bool executeStages(InstRef &IR);
- void postExecuteStages();
- void runCycle();
-
+ Error runCycle();
bool hasWorkToProcess();
void notifyCycleBegin();
void notifyCycleEnd();
public:
Pipeline() : Cycles(0) {}
- void appendStage(std::unique_ptr<Stage> S) { Stages.push_back(std::move(S)); }
- void run();
+ void appendStage(std::unique_ptr<Stage> S);
+
+ /// Returns the total number of simulated cycles.
+ Expected<unsigned> run();
+
void addEventListener(HWEventListener *Listener);
};
} // namespace mca
+} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_MCA_PIPELINE_H
+#endif // LLVM_MCA_PIPELINE_H
diff --git a/contrib/llvm/include/llvm/MCA/SourceMgr.h b/contrib/llvm/include/llvm/MCA/SourceMgr.h
new file mode 100644
index 000000000000..5e0ca6419f5d
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/SourceMgr.h
@@ -0,0 +1,57 @@
+//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements class SourceMgr. Class SourceMgr abstracts the input
+/// code sequence (a sequence of MCInst), and assings unique identifiers to
+/// every instruction in the sequence.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SOURCEMGR_H
+#define LLVM_MCA_SOURCEMGR_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace mca {
+
+class Instruction;
+
+typedef std::pair<unsigned, const Instruction &> SourceRef;
+
+class SourceMgr {
+ using UniqueInst = std::unique_ptr<Instruction>;
+ ArrayRef<UniqueInst> Sequence;
+ unsigned Current;
+ const unsigned Iterations;
+ static const unsigned DefaultIterations = 100;
+
+public:
+ SourceMgr(ArrayRef<UniqueInst> S, unsigned Iter)
+ : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {}
+
+ unsigned getNumIterations() const { return Iterations; }
+ unsigned size() const { return Sequence.size(); }
+ bool hasNext() const { return Current < (Iterations * Sequence.size()); }
+ void updateNext() { ++Current; }
+
+ SourceRef peekNext() const {
+ assert(hasNext() && "Already at end of sequence!");
+ return SourceRef(Current, *Sequence[Current % Sequence.size()]);
+ }
+
+ using const_iterator = ArrayRef<UniqueInst>::const_iterator;
+ const_iterator begin() const { return Sequence.begin(); }
+ const_iterator end() const { return Sequence.end(); }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SOURCEMGR_H
diff --git a/contrib/llvm/tools/llvm-mca/DispatchStage.h b/contrib/llvm/include/llvm/MCA/Stages/DispatchStage.h
index 4262a241c08c..f015cd7522eb 100644
--- a/contrib/llvm/tools/llvm-mca/DispatchStage.h
+++ b/contrib/llvm/include/llvm/MCA/Stages/DispatchStage.h
@@ -16,21 +16,20 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
+#ifndef LLVM_MCA_DISPATCH_STAGE_H
+#define LLVM_MCA_DISPATCH_STAGE_H
-#include "HWEventListener.h"
-#include "Instruction.h"
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "Stage.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Stages/Stage.h"
+namespace llvm {
namespace mca {
-class Scheduler;
-
// Implements the hardware dispatch logic.
//
// This class is responsible for the dispatch stage, in which instructions are
@@ -49,58 +48,46 @@ class Scheduler;
//
// If the number of micro opcodes exceedes DispatchWidth, then the instruction
// is dispatched in multiple cycles.
-class DispatchStage : public Stage {
+class DispatchStage final : public Stage {
unsigned DispatchWidth;
unsigned AvailableEntries;
unsigned CarryOver;
- const llvm::MCSubtargetInfo &STI;
+ InstRef CarriedOver;
+ const MCSubtargetInfo &STI;
RetireControlUnit &RCU;
RegisterFile &PRF;
- Scheduler &SC;
-
- bool checkRCU(const InstRef &IR);
- bool checkPRF(const InstRef &IR);
- bool checkScheduler(const InstRef &IR);
- void dispatch(InstRef IR);
- void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
-
- void notifyInstructionDispatched(const InstRef &IR,
- llvm::ArrayRef<unsigned> UsedPhysRegs);
- bool isAvailable(unsigned NumEntries) const {
- return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
- }
+ bool checkRCU(const InstRef &IR) const;
+ bool checkPRF(const InstRef &IR) const;
+ bool canDispatch(const InstRef &IR) const;
+ Error dispatch(InstRef IR);
- bool canDispatch(const InstRef &IR) {
- assert(isAvailable(IR.getInstruction()->getDesc().NumMicroOps));
- return checkRCU(IR) && checkPRF(IR) && checkScheduler(IR);
- }
+ void updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI);
- void collectWrites(llvm::SmallVectorImpl<WriteRef> &Vec,
- unsigned RegID) const {
- return PRF.collectWrites(Vec, RegID);
- }
+ void notifyInstructionDispatched(const InstRef &IR,
+ ArrayRef<unsigned> UsedPhysRegs,
+ unsigned uOps) const;
public:
- DispatchStage(const llvm::MCSubtargetInfo &Subtarget,
- const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize,
+ DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
unsigned MaxDispatchWidth, RetireControlUnit &R,
- RegisterFile &F, Scheduler &Sched)
+ RegisterFile &F)
: DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
- CarryOver(0U), STI(Subtarget), RCU(R), PRF(F), SC(Sched) {}
+ CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {}
+
+ bool isAvailable(const InstRef &IR) const override;
- // We can always try to dispatch, so returning false is okay in this case.
- // The retire stage, which controls the RCU, might have items to complete but
- // RetireStage::hasWorkToComplete will check for that case.
- virtual bool hasWorkToComplete() const override final { return false; }
- virtual void cycleStart() override final;
- virtual bool execute(InstRef &IR) override final;
- void notifyDispatchStall(const InstRef &IR, unsigned EventType);
+ // The dispatch logic internally doesn't buffer instructions. So there is
+ // never work to do at the beginning of every cycle.
+ bool hasWorkToComplete() const override { return false; }
+ Error cycleStart() override;
+ Error execute(InstRef &IR) override;
#ifndef NDEBUG
void dump() const;
#endif
};
} // namespace mca
+} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
+#endif // LLVM_MCA_DISPATCH_STAGE_H
diff --git a/contrib/llvm/include/llvm/MCA/Stages/EntryStage.h b/contrib/llvm/include/llvm/MCA/Stages/EntryStage.h
new file mode 100644
index 000000000000..cd9a65b8cc2b
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/Stages/EntryStage.h
@@ -0,0 +1,52 @@
+//===---------------------- EntryStage.h ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Entry stage of an instruction pipeline. Its sole
+/// purpose in life is to pick instructions in sequence and move them to the
+/// next pipeline stage.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_ENTRY_STAGE_H
+#define LLVM_MCA_ENTRY_STAGE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class EntryStage final : public Stage {
+ InstRef CurrentInstruction;
+ SmallVector<std::unique_ptr<Instruction>, 16> Instructions;
+ SourceMgr &SM;
+ unsigned NumRetired;
+
+ // Updates the program counter, and sets 'CurrentInstruction'.
+ void getNextInstruction();
+
+ EntryStage(const EntryStage &Other) = delete;
+ EntryStage &operator=(const EntryStage &Other) = delete;
+
+public:
+ EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { }
+
+ bool isAvailable(const InstRef &IR) const override;
+ bool hasWorkToComplete() const override;
+ Error execute(InstRef &IR) override;
+ Error cycleStart() override;
+ Error cycleEnd() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_FETCH_STAGE_H
diff --git a/contrib/llvm/include/llvm/MCA/Stages/ExecuteStage.h b/contrib/llvm/include/llvm/MCA/Stages/ExecuteStage.h
new file mode 100644
index 000000000000..8cb287e06d9f
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/Stages/ExecuteStage.h
@@ -0,0 +1,80 @@
+//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of a default instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_EXECUTE_STAGE_H
+#define LLVM_MCA_EXECUTE_STAGE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class ExecuteStage final : public Stage {
+ Scheduler &HWS;
+
+ Error issueInstruction(InstRef &IR);
+
+ // Called at the beginning of each cycle to issue already dispatched
+ // instructions to the underlying pipelines.
+ Error issueReadyInstructions();
+
+ // Used to notify instructions eliminated at register renaming stage.
+ Error handleInstructionEliminated(InstRef &IR);
+
+ ExecuteStage(const ExecuteStage &Other) = delete;
+ ExecuteStage &operator=(const ExecuteStage &Other) = delete;
+
+public:
+ ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
+
+ // This stage works under the assumption that the Pipeline will eventually
+ // execute a retire stage. We don't need to check if pipelines and/or
+ // schedulers have instructions to process, because those instructions are
+ // also tracked by the retire control unit. That means,
+ // RetireControlUnit::hasWorkToComplete() is responsible for checking if there
+ // are still instructions in-flight in the out-of-order backend.
+ bool hasWorkToComplete() const override { return false; }
+ bool isAvailable(const InstRef &IR) const override;
+
+ // Notifies the scheduler that a new cycle just started.
+ //
+ // This method notifies the scheduler that a new cycle started.
+ // This method is also responsible for notifying listeners about instructions
+ // state changes, and processor resources freed by the scheduler.
+ // Instructions that transitioned to the 'Executed' state are automatically
+ // moved to the next stage (i.e. RetireStage).
+ Error cycleStart() override;
+ Error execute(InstRef &IR) override;
+
+ void notifyInstructionIssued(
+ const InstRef &IR,
+ MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const;
+ void notifyInstructionExecuted(const InstRef &IR) const;
+ void notifyInstructionReady(const InstRef &IR) const;
+ void notifyResourceAvailable(const ResourceRef &RR) const;
+
+ // Notify listeners that buffered resources have been consumed or freed.
+ void notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_EXECUTE_STAGE_H
diff --git a/contrib/llvm/tools/llvm-mca/InstructionTables.h b/contrib/llvm/include/llvm/MCA/Stages/InstructionTables.h
index 18e019988430..34e338f0ce6b 100644
--- a/contrib/llvm/tools/llvm-mca/InstructionTables.h
+++ b/contrib/llvm/include/llvm/MCA/Stages/InstructionTables.h
@@ -14,30 +14,33 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H
+#ifndef LLVM_MCA_INSTRUCTIONTABLES_H
+#define LLVM_MCA_INSTRUCTIONTABLES_H
-#include "InstrBuilder.h"
-#include "Scheduler.h"
-#include "Stage.h"
-#include "View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/Stage.h"
+#include "llvm/MCA/Support.h"
+namespace llvm {
namespace mca {
-class InstructionTables : public Stage {
- const llvm::MCSchedModel &SM;
- InstrBuilder &IB;
- llvm::SmallVector<std::pair<ResourceRef, double>, 4> UsedResources;
+class InstructionTables final : public Stage {
+ const MCSchedModel &SM;
+ SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
+ SmallVector<uint64_t, 8> Masks;
public:
- InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder)
- : Stage(), SM(Model), IB(Builder) {}
+ InstructionTables(const MCSchedModel &Model)
+ : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) {
+ computeProcResourceMasks(Model, Masks);
+ }
- bool hasWorkToComplete() const override final { return false; }
- bool execute(InstRef &IR) override final;
+ bool hasWorkToComplete() const override { return false; }
+ Error execute(InstRef &IR) override;
};
} // namespace mca
+} // namespace llvm
-#endif
+#endif // LLVM_MCA_INSTRUCTIONTABLES_H
diff --git a/contrib/llvm/tools/llvm-mca/RetireStage.h b/contrib/llvm/include/llvm/MCA/Stages/RetireStage.h
index 8cf672d92c6e..2051ce5c86ad 100644
--- a/contrib/llvm/tools/llvm-mca/RetireStage.h
+++ b/contrib/llvm/include/llvm/MCA/Stages/RetireStage.h
@@ -8,41 +8,41 @@
//===----------------------------------------------------------------------===//
/// \file
///
-/// This file defines the retire stage of an instruction pipeline.
+/// This file defines the retire stage of a default instruction pipeline.
/// The RetireStage represents the process logic that interacts with the
/// simulated RetireControlUnit hardware.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
+#ifndef LLVM_MCA_RETIRE_STAGE_H
+#define LLVM_MCA_RETIRE_STAGE_H
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "Stage.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Stages/Stage.h"
+namespace llvm {
namespace mca {
-class RetireStage : public Stage {
+class RetireStage final : public Stage {
// Owner will go away when we move listeners/eventing to the stages.
RetireControlUnit &RCU;
RegisterFile &PRF;
+ RetireStage(const RetireStage &Other) = delete;
+ RetireStage &operator=(const RetireStage &Other) = delete;
+
public:
RetireStage(RetireControlUnit &R, RegisterFile &F)
: Stage(), RCU(R), PRF(F) {}
- RetireStage(const RetireStage &Other) = delete;
- RetireStage &operator=(const RetireStage &Other) = delete;
- virtual bool hasWorkToComplete() const override final {
- return !RCU.isEmpty();
- }
- virtual void cycleStart() override final;
- virtual bool execute(InstRef &IR) override final { return true; }
- void notifyInstructionRetired(const InstRef &IR);
- void onInstructionExecuted(unsigned TokenID);
+ bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
+ Error cycleStart() override;
+ Error execute(InstRef &IR) override;
+ void notifyInstructionRetired(const InstRef &IR) const;
};
} // namespace mca
+} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
+#endif // LLVM_MCA_RETIRE_STAGE_H
diff --git a/contrib/llvm/tools/llvm-mca/Stage.h b/contrib/llvm/include/llvm/MCA/Stages/Stage.h
index 9dbdcd89a33b..fc7ab569bb0f 100644
--- a/contrib/llvm/tools/llvm-mca/Stage.h
+++ b/contrib/llvm/include/llvm/MCA/Stages/Stage.h
@@ -13,64 +13,76 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_STAGE_H
+#ifndef LLVM_MCA_STAGE_H
+#define LLVM_MCA_STAGE_H
-#include "HWEventListener.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Error.h"
#include <set>
+namespace llvm {
namespace mca {
class InstRef;
class Stage {
+ Stage *NextInSequence;
+ std::set<HWEventListener *> Listeners;
+
Stage(const Stage &Other) = delete;
Stage &operator=(const Stage &Other) = delete;
- std::set<HWEventListener *> Listeners;
protected:
const std::set<HWEventListener *> &getListeners() const { return Listeners; }
public:
- Stage();
- virtual ~Stage() = default;
+ Stage() : NextInSequence(nullptr) {}
+ virtual ~Stage();
+
+ /// Returns true if it can execute IR during this cycle.
+ virtual bool isAvailable(const InstRef &IR) const { return true; }
- /// Called prior to preExecute to ensure that the stage has items that it
- /// is to process. For example, a FetchStage might have more instructions
- /// that need to be processed, or a RCU might have items that have yet to
- /// retire.
+ /// Returns true if some instructions are still executing this stage.
virtual bool hasWorkToComplete() const = 0;
/// Called once at the start of each cycle. This can be used as a setup
/// phase to prepare for the executions during the cycle.
- virtual void cycleStart() {}
+ virtual Error cycleStart() { return ErrorSuccess(); }
/// Called once at the end of each cycle.
- virtual void cycleEnd() {}
+ virtual Error cycleEnd() { return ErrorSuccess(); }
- /// Called prior to executing the list of stages.
- /// This can be called multiple times per cycle.
- virtual void preExecute() {}
+ /// The primary action that this stage performs on instruction IR.
+ virtual Error execute(InstRef &IR) = 0;
- /// Called as a cleanup and finalization phase after each execution.
- /// This will only be called if all stages return a success from their
- /// execute callback. This can be called multiple times per cycle.
- virtual void postExecute() {}
+ void setNextInSequence(Stage *NextStage) {
+ assert(!NextInSequence && "This stage already has a NextInSequence!");
+ NextInSequence = NextStage;
+ }
+
+ bool checkNextStage(const InstRef &IR) const {
+ return NextInSequence && NextInSequence->isAvailable(IR);
+ }
- /// The primary action that this stage performs.
- /// Returning false prevents successor stages from having their 'execute'
- /// routine called. This can be called multiple times during a single cycle.
- virtual bool execute(InstRef &IR) = 0;
+ /// Called when an instruction is ready to move the next pipeline stage.
+ ///
+ /// Stages are responsible for moving instructions to their immediate
+ /// successor stages.
+ Error moveToTheNextStage(InstRef &IR) {
+ assert(checkNextStage(IR) && "Next stage is not ready!");
+ return NextInSequence->execute(IR);
+ }
/// Add a listener to receive callbacks during the execution of this stage.
void addListener(HWEventListener *Listener);
/// Notify listeners of a particular hardware event.
- template <typename EventT> void notifyEvent(const EventT &Event) {
+ template <typename EventT> void notifyEvent(const EventT &Event) const {
for (HWEventListener *Listener : Listeners)
Listener->onEvent(Event);
}
};
} // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_STAGE_H
+} // namespace llvm
+#endif // LLVM_MCA_STAGE_H
diff --git a/contrib/llvm/include/llvm/MCA/Support.h b/contrib/llvm/include/llvm/MCA/Support.h
new file mode 100644
index 000000000000..7b0c5bf3a486
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/Support.h
@@ -0,0 +1,119 @@
+//===--------------------- Support.h ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Helper functions used by various pipeline components.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SUPPORT_H
+#define LLVM_MCA_SUPPORT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+template <typename T>
+class InstructionError : public ErrorInfo<InstructionError<T>> {
+public:
+ static char ID;
+ std::string Message;
+ const T &Inst;
+
+ InstructionError(std::string M, const T &MCI)
+ : Message(std::move(M)), Inst(MCI) {}
+
+ void log(raw_ostream &OS) const override { OS << Message; }
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+};
+
+template <typename T> char InstructionError<T>::ID;
+
+/// This class represents the number of cycles per resource (fractions of
+/// cycles). That quantity is managed here as a ratio, and accessed via the
+/// double cast-operator below. The two quantities, number of cycles and
+/// number of resources, are kept separate. This is used by the
+/// ResourcePressureView to calculate the average resource cycles
+/// per instruction/iteration.
+class ResourceCycles {
+ unsigned Numerator, Denominator;
+
+public:
+ ResourceCycles() : Numerator(0), Denominator(1) {}
+ ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1)
+ : Numerator(Cycles), Denominator(ResourceUnits) {}
+
+ operator double() const {
+ assert(Denominator && "Invalid denominator (must be non-zero).");
+ return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
+ }
+
+ // Add the components of RHS to this instance. Instead of calculating
+ // the final value here, we keep track of the numerator and denominator
+ // separately, to reduce floating point error.
+ ResourceCycles &operator+=(const ResourceCycles &RHS) {
+ if (Denominator == RHS.Denominator)
+ Numerator += RHS.Numerator;
+ else {
+ // Create a common denominator for LHS and RHS by calculating the least
+ // common multiple from the GCD.
+ unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator);
+ unsigned LCM = (Denominator * RHS.Denominator) / GCD;
+ unsigned LHSNumerator = Numerator * (LCM / Denominator);
+ unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
+ Numerator = LHSNumerator + RHSNumerator;
+ Denominator = LCM;
+ }
+ return *this;
+ }
+};
+
+/// Populates vector Masks with processor resource masks.
+///
+/// The number of bits set in a mask depends on the processor resource type.
+/// Each processor resource mask has at least one bit set. For groups, the
+/// number of bits set in the mask is equal to the cardinality of the group plus
+/// one. Excluding the most significant bit, the remaining bits in the mask
+/// identify processor resources that are part of the group.
+///
+/// Example:
+///
+/// ResourceA -- Mask: 0b001
+/// ResourceB -- Mask: 0b010
+/// ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
+///
+/// ResourceAB is a processor resource group containing ResourceA and ResourceB.
+/// Each resource mask uniquely identifies a resource; both ResourceA and
+/// ResourceB only have one bit set.
+/// ResourceAB is a group; excluding the most significant bit in the mask, the
+/// remaining bits identify the composition of the group.
+///
+/// Resource masks are used by the ResourceManager to solve set membership
+/// problems with simple bit manipulation operations.
+void computeProcResourceMasks(const MCSchedModel &SM,
+ MutableArrayRef<uint64_t> Masks);
+
+/// Compute the reciprocal block throughput from a set of processor resource
+/// cycles. The reciprocal block throughput is computed as the MAX between:
+/// - NumMicroOps / DispatchWidth
+/// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource).
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+ unsigned NumMicroOps,
+ ArrayRef<unsigned> ProcResourceUsage);
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SUPPORT_H
diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h
index 6caadea0175b..b753d261a0fc 100644
--- a/contrib/llvm/include/llvm/Object/COFF.h
+++ b/contrib/llvm/include/llvm/Object/COFF.h
@@ -594,6 +594,8 @@ enum class coff_guard_flags : uint32_t {
FidTableHasFlags = 0x10000000, // Indicates that fid tables are 5 bytes
};
+enum class frame_type : uint16_t { Fpo = 0, Trap = 1, Tss = 2, NonFpo = 3 };
+
struct coff_load_config_code_integrity {
support::ulittle16_t Flags;
support::ulittle16_t Catalog;
@@ -883,6 +885,7 @@ public:
assert(is64());
return reinterpret_cast<const coff_load_configuration64 *>(LoadConfig);
}
+ StringRef getRelocationTypeName(uint16_t Type) const;
protected:
void moveSymbolNext(DataRefImpl &Symb) const override;
@@ -968,6 +971,9 @@ public:
return nullptr;
return reinterpret_cast<const dos_header *>(base());
}
+ std::error_code getCOFFHeader(const coff_file_header *&Res) const;
+ std::error_code
+ getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const;
std::error_code getPE32Header(const pe32_header *&Res) const;
std::error_code getPE32PlusHeader(const pe32plus_header *&Res) const;
std::error_code getDataDirectory(uint32_t index,
@@ -1016,6 +1022,8 @@ public:
ArrayRef<uint8_t> getSymbolAuxData(COFFSymbolRef Symbol) const;
+ uint32_t getSymbolIndex(COFFSymbolRef Symbol) const;
+
size_t getSymbolTableEntrySize() const {
if (COFFHeader)
return sizeof(coff_symbol16);
@@ -1059,6 +1067,8 @@ public:
bool isRelocatableObject() const override;
bool is64() const { return PE32PlusHeader; }
+ StringRef mapDebugSectionName(StringRef Name) const override;
+
static bool classof(const Binary *v) { return v->isCOFF(); }
};
@@ -1227,7 +1237,7 @@ struct FpoData {
bool useBP() const { return (Attributes >> 10) & 1; }
// cbFrame: frame pointer
- int getFP() const { return Attributes >> 14; }
+ frame_type getFP() const { return static_cast<frame_type>(Attributes >> 14); }
};
} // end namespace object
diff --git a/contrib/llvm/include/llvm/Object/ELF.h b/contrib/llvm/include/llvm/Object/ELF.h
index 752d468fd25b..bcdc190cc7dc 100644
--- a/contrib/llvm/include/llvm/Object/ELF.h
+++ b/contrib/llvm/include/llvm/Object/ELF.h
@@ -32,7 +32,7 @@ namespace llvm {
namespace object {
StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type);
-uint32_t getELFRelrRelocationType(uint32_t Machine);
+uint32_t getELFRelativeRelocationType(uint32_t Machine);
StringRef getELFSectionTypeName(uint32_t Machine, uint32_t Type);
// Subclasses of ELFFile may need this for template instantiation
@@ -113,7 +113,7 @@ public:
StringRef getRelocationTypeName(uint32_t Type) const;
void getRelocationTypeName(uint32_t Type,
SmallVectorImpl<char> &Result) const;
- uint32_t getRelrRelocationType() const;
+ uint32_t getRelativeRelocationType() const;
const char *getDynamicTagAsString(unsigned Arch, uint64_t Type) const;
const char *getDynamicTagAsString(uint64_t Type) const;
@@ -415,8 +415,8 @@ void ELFFile<ELFT>::getRelocationTypeName(uint32_t Type,
}
template <class ELFT>
-uint32_t ELFFile<ELFT>::getRelrRelocationType() const {
- return getELFRelrRelocationType(getHeader()->e_machine);
+uint32_t ELFFile<ELFT>::getRelativeRelocationType() const {
+ return getELFRelativeRelocationType(getHeader()->e_machine);
}
template <class ELFT>
diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
index 2c0905d545a7..0f620681cd99 100644
--- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
@@ -86,6 +86,8 @@ public:
void setARMSubArch(Triple &TheTriple) const override;
virtual uint16_t getEType() const = 0;
+
+ std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const;
};
class ELFSectionRef : public SectionRef {
@@ -258,6 +260,8 @@ protected:
bool isSectionData(DataRefImpl Sec) const override;
bool isSectionBSS(DataRefImpl Sec) const override;
bool isSectionVirtual(DataRefImpl Sec) const override;
+ bool isBerkeleyText(DataRefImpl Sec) const override;
+ bool isBerkeleyData(DataRefImpl Sec) const override;
relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
relocation_iterator section_rel_end(DataRefImpl Sec) const override;
std::vector<SectionRef> dynamic_relocation_sections() const override;
@@ -331,9 +335,10 @@ protected:
// A symbol is exported if its binding is either GLOBAL or WEAK, and its
// visibility is either DEFAULT or PROTECTED. All other symbols are not
// exported.
- return ((Binding == ELF::STB_GLOBAL || Binding == ELF::STB_WEAK) &&
- (Visibility == ELF::STV_DEFAULT ||
- Visibility == ELF::STV_PROTECTED));
+ return (
+ (Binding == ELF::STB_GLOBAL || Binding == ELF::STB_WEAK ||
+ Binding == ELF::STB_GNU_UNIQUE) &&
+ (Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_PROTECTED));
}
// This flag is used for classof, to distinguish ELFObjectFile from
@@ -757,6 +762,20 @@ bool ELFObjectFile<ELFT>::isSectionVirtual(DataRefImpl Sec) const {
}
template <class ELFT>
+bool ELFObjectFile<ELFT>::isBerkeleyText(DataRefImpl Sec) const {
+ return getSection(Sec)->sh_flags & ELF::SHF_ALLOC &&
+ (getSection(Sec)->sh_flags & ELF::SHF_EXECINSTR ||
+ !(getSection(Sec)->sh_flags & ELF::SHF_WRITE));
+}
+
+template <class ELFT>
+bool ELFObjectFile<ELFT>::isBerkeleyData(DataRefImpl Sec) const {
+ const Elf_Shdr *EShdr = getSection(Sec);
+ return !isBerkeleyText(Sec) && EShdr->sh_type != ELF::SHT_NOBITS &&
+ EShdr->sh_flags & ELF::SHF_ALLOC;
+}
+
+template <class ELFT>
relocation_iterator
ELFObjectFile<ELFT>::section_rel_begin(DataRefImpl Sec) const {
DataRefImpl RelData;
@@ -1019,6 +1038,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
return "ELF32-lanai";
case ELF::EM_MIPS:
return "ELF32-mips";
+ case ELF::EM_MSP430:
+ return "ELF32-msp430";
case ELF::EM_PPC:
return "ELF32-ppc";
case ELF::EM_RISCV:
@@ -1089,6 +1110,8 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
default:
report_fatal_error("Invalid ELFCLASS!");
}
+ case ELF::EM_MSP430:
+ return Triple::msp430;
case ELF::EM_PPC:
return Triple::ppc;
case ELF::EM_PPC64:
diff --git a/contrib/llvm/include/llvm/Object/ELFTypes.h b/contrib/llvm/include/llvm/Object/ELFTypes.h
index fb386120e34d..ec3c8e7bae46 100644
--- a/contrib/llvm/include/llvm/Object/ELFTypes.h
+++ b/contrib/llvm/include/llvm/Object/ELFTypes.h
@@ -605,13 +605,12 @@ public:
}
/// Get the note's descriptor.
- ArrayRef<Elf_Word> getDesc() const {
+ ArrayRef<uint8_t> getDesc() const {
if (!Nhdr.n_descsz)
- return ArrayRef<Elf_Word>();
- return ArrayRef<Elf_Word>(
- reinterpret_cast<const Elf_Word *>(
- reinterpret_cast<const uint8_t *>(&Nhdr) + sizeof(Nhdr) +
- alignTo<Elf_Nhdr_Impl<ELFT>::Align>(Nhdr.n_namesz)),
+ return ArrayRef<uint8_t>();
+ return ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(&Nhdr) + sizeof(Nhdr) +
+ alignTo<Elf_Nhdr_Impl<ELFT>::Align>(Nhdr.n_namesz),
Nhdr.n_descsz);
}
@@ -643,14 +642,19 @@ class Elf_Note_Iterator_Impl
// container, either cleanly or with an overflow error.
void advanceNhdr(const uint8_t *NhdrPos, size_t NoteSize) {
RemainingSize -= NoteSize;
- if (RemainingSize == 0u)
+ if (RemainingSize == 0u) {
+ // Ensure that if the iterator walks to the end, the error is checked
+ // afterwards.
+ *Err = Error::success();
Nhdr = nullptr;
- else if (sizeof(*Nhdr) > RemainingSize)
+ } else if (sizeof(*Nhdr) > RemainingSize)
stopWithOverflowError();
else {
Nhdr = reinterpret_cast<const Elf_Nhdr_Impl<ELFT> *>(NhdrPos + NoteSize);
if (Nhdr->getSize() > RemainingSize)
stopWithOverflowError();
+ else
+ *Err = Error::success();
}
}
@@ -658,6 +662,7 @@ class Elf_Note_Iterator_Impl
explicit Elf_Note_Iterator_Impl(Error &Err) : Err(&Err) {}
Elf_Note_Iterator_Impl(const uint8_t *Start, size_t Size, Error &Err)
: RemainingSize(Size), Err(&Err) {
+ consumeError(std::move(Err));
assert(Start && "ELF note iterator starting at NULL");
advanceNhdr(Start, 0u);
}
@@ -671,6 +676,10 @@ public:
return *this;
}
bool operator==(Elf_Note_Iterator_Impl Other) const {
+ if (!Nhdr && Other.Err)
+ (void)(bool)(*Other.Err);
+ if (!Other.Nhdr && Err)
+ (void)(bool)(*Err);
return Nhdr == Other.Nhdr;
}
bool operator!=(Elf_Note_Iterator_Impl Other) const {
diff --git a/contrib/llvm/include/llvm/Object/Error.h b/contrib/llvm/include/llvm/Object/Error.h
index eb938338715d..a15f8b9236eb 100644
--- a/contrib/llvm/include/llvm/Object/Error.h
+++ b/contrib/llvm/include/llvm/Object/Error.h
@@ -50,6 +50,7 @@ inline std::error_code make_error_code(object_error e) {
/// Currently inherits from ECError for easy interoperability with
/// std::error_code, but this will be removed in the future.
class BinaryError : public ErrorInfo<BinaryError, ECError> {
+ virtual void anchor();
public:
static char ID;
BinaryError() {
diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h
index 159c1765ab86..c2f4f4062934 100644
--- a/contrib/llvm/include/llvm/Object/MachO.h
+++ b/contrib/llvm/include/llvm/Object/MachO.h
@@ -356,7 +356,7 @@ public:
basic_symbol_iterator symbol_end() const override;
// MachO specific.
- basic_symbol_iterator getSymbolByIndex(unsigned Index) const;
+ symbol_iterator getSymbolByIndex(unsigned Index) const;
uint64_t getSymbolIndex(DataRefImpl Symb) const;
section_iterator section_begin() const override;
@@ -616,6 +616,9 @@ public:
case MachO::PLATFORM_TVOS: return "tvos";
case MachO::PLATFORM_WATCHOS: return "watchos";
case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
+ case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
+ case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
+ case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
default:
std::string ret;
raw_string_ostream ss(ret);
diff --git a/contrib/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm/include/llvm/Object/ObjectFile.h
index 02d62e8e4879..036c99cb6baf 100644
--- a/contrib/llvm/include/llvm/Object/ObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ObjectFile.h
@@ -104,13 +104,25 @@ public:
uint64_t getAlignment() const;
bool isCompressed() const;
+ /// Whether this section contains instructions.
bool isText() const;
+ /// Whether this section contains data, not instructions.
bool isData() const;
+ /// Whether this section contains BSS uninitialized data.
bool isBSS() const;
bool isVirtual() const;
bool isBitcode() const;
bool isStripped() const;
+ /// Whether this section will be placed in the text segment, according to the
+ /// Berkeley size format. This is true if the section is allocatable, and
+ /// contains either code or readonly data.
+ bool isBerkeleyText() const;
+ /// Whether this section will be placed in the data segment, according to the
+ /// Berkeley size format. This is true if the section is allocatable and
+ /// contains data (e.g. PROGBITS), but is not text.
+ bool isBerkeleyData() const;
+
bool containsSymbol(SymbolRef S) const;
relocation_iterator relocation_begin() const;
@@ -238,6 +250,8 @@ protected:
virtual bool isSectionVirtual(DataRefImpl Sec) const = 0;
virtual bool isSectionBitcode(DataRefImpl Sec) const;
virtual bool isSectionStripped(DataRefImpl Sec) const;
+ virtual bool isBerkeleyText(DataRefImpl Sec) const;
+ virtual bool isBerkeleyData(DataRefImpl Sec) const;
virtual relocation_iterator section_rel_begin(DataRefImpl Sec) const = 0;
virtual relocation_iterator section_rel_end(DataRefImpl Sec) const = 0;
virtual section_iterator getRelocatedSection(DataRefImpl Sec) const;
@@ -449,6 +463,14 @@ inline bool SectionRef::isStripped() const {
return OwningObject->isSectionStripped(SectionPimpl);
}
+inline bool SectionRef::isBerkeleyText() const {
+ return OwningObject->isBerkeleyText(SectionPimpl);
+}
+
+inline bool SectionRef::isBerkeleyData() const {
+ return OwningObject->isBerkeleyData(SectionPimpl);
+}
+
inline relocation_iterator SectionRef::relocation_begin() const {
return OwningObject->section_rel_begin(SectionPimpl);
}
diff --git a/contrib/llvm/include/llvm/Object/RelocVisitor.h b/contrib/llvm/include/llvm/Object/RelocVisitor.h
index 008e109f6679..9a978de2e599 100644
--- a/contrib/llvm/include/llvm/Object/RelocVisitor.h
+++ b/contrib/llvm/include/llvm/Object/RelocVisitor.h
@@ -129,6 +129,8 @@ private:
case ELF::R_X86_64_NONE:
return 0;
case ELF::R_X86_64_64:
+ case ELF::R_X86_64_DTPOFF32:
+ case ELF::R_X86_64_DTPOFF64:
return Value + getELFAddend(R);
case ELF::R_X86_64_PC32:
return Value + getELFAddend(R) - R.getOffset();
@@ -333,6 +335,7 @@ private:
case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
// For wasm section, its offset at 0 -- ignoring Value
return 0;
}
diff --git a/contrib/llvm/include/llvm/Object/Wasm.h b/contrib/llvm/include/llvm/Object/Wasm.h
index fd34e45feb62..ed857652a048 100644
--- a/contrib/llvm/include/llvm/Object/Wasm.h
+++ b/contrib/llvm/include/llvm/Object/Wasm.h
@@ -18,10 +18,11 @@
#define LLVM_OBJECT_WASM_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
@@ -36,13 +37,16 @@ namespace object {
class WasmSymbol {
public:
WasmSymbol(const wasm::WasmSymbolInfo &Info,
- const wasm::WasmSignature *FunctionType,
- const wasm::WasmGlobalType *GlobalType)
- : Info(Info), FunctionType(FunctionType), GlobalType(GlobalType) {}
+ const wasm::WasmGlobalType *GlobalType,
+ const wasm::WasmEventType *EventType,
+ const wasm::WasmSignature *Signature)
+ : Info(Info), GlobalType(GlobalType), EventType(EventType),
+ Signature(Signature) {}
const wasm::WasmSymbolInfo &Info;
- const wasm::WasmSignature *FunctionType;
const wasm::WasmGlobalType *GlobalType;
+ const wasm::WasmEventType *EventType;
+ const wasm::WasmSignature *Signature;
bool isTypeFunction() const {
return Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION;
@@ -58,6 +62,8 @@ public:
return Info.Kind == wasm::WASM_SYMBOL_TYPE_SECTION;
}
+ bool isTypeEvent() const { return Info.Kind == wasm::WASM_SYMBOL_TYPE_EVENT; }
+
bool isDefined() const { return !isUndefined(); }
bool isUndefined() const {
@@ -98,9 +104,9 @@ public:
struct WasmSection {
WasmSection() = default;
- uint32_t Type = 0; // Section type (See below)
- uint32_t Offset = 0; // Offset with in the file
- StringRef Name; // Section name (User-defined sections only)
+ uint32_t Type = 0; // Section type (See below)
+ uint32_t Offset = 0; // Offset with in the file
+ StringRef Name; // Section name (User-defined sections only)
ArrayRef<uint8_t> Content; // Section content
std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section
};
@@ -119,19 +125,21 @@ public:
const WasmSymbol &getWasmSymbol(const DataRefImpl &Symb) const;
const WasmSymbol &getWasmSymbol(const SymbolRef &Symbol) const;
const WasmSection &getWasmSection(const SectionRef &Section) const;
- const wasm::WasmRelocation &getWasmRelocation(const RelocationRef& Ref) const;
+ const wasm::WasmRelocation &getWasmRelocation(const RelocationRef &Ref) const;
static bool classof(const Binary *v) { return v->isWasm(); }
+ const wasm::WasmDylinkInfo &dylinkInfo() const { return DylinkInfo; }
ArrayRef<wasm::WasmSignature> types() const { return Signatures; }
ArrayRef<uint32_t> functionTypes() const { return FunctionTypes; }
ArrayRef<wasm::WasmImport> imports() const { return Imports; }
ArrayRef<wasm::WasmTable> tables() const { return Tables; }
ArrayRef<wasm::WasmLimits> memories() const { return Memories; }
ArrayRef<wasm::WasmGlobal> globals() const { return Globals; }
+ ArrayRef<wasm::WasmEvent> events() const { return Events; }
ArrayRef<wasm::WasmExport> exports() const { return Exports; }
ArrayRef<WasmSymbol> syms() const { return Symbols; }
- const wasm::WasmLinkingData& linkingData() const { return LinkingData; }
+ const wasm::WasmLinkingData &linkingData() const { return LinkingData; }
uint32_t getNumberOfSymbols() const { return Symbols.size(); }
ArrayRef<wasm::WasmElemSegment> elements() const { return ElemSegments; }
ArrayRef<WasmSegment> dataSegments() const { return DataSegments; }
@@ -140,6 +148,7 @@ public:
uint32_t startFunction() const { return StartFunction; }
uint32_t getNumImportedGlobals() const { return NumImportedGlobals; }
uint32_t getNumImportedFunctions() const { return NumImportedFunctions; }
+ uint32_t getNumImportedEvents() const { return NumImportedEvents; }
void moveSymbolNext(DataRefImpl &Symb) const override;
@@ -151,7 +160,7 @@ public:
Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
- uint64_t getWasmSymbolValue(const WasmSymbol& Sym) const;
+ uint64_t getWasmSymbolValue(const WasmSymbol &Sym) const;
uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
@@ -192,6 +201,7 @@ public:
Triple::ArchType getArch() const override;
SubtargetFeatures getFeatures() const override;
bool isRelocatableObject() const override;
+ bool isSharedObject() const;
struct ReadContext {
const uint8_t *Start;
@@ -204,12 +214,16 @@ private:
bool isDefinedFunctionIndex(uint32_t Index) const;
bool isValidGlobalIndex(uint32_t Index) const;
bool isDefinedGlobalIndex(uint32_t Index) const;
+ bool isValidEventIndex(uint32_t Index) const;
+ bool isDefinedEventIndex(uint32_t Index) const;
bool isValidFunctionSymbol(uint32_t Index) const;
bool isValidGlobalSymbol(uint32_t Index) const;
+ bool isValidEventSymbol(uint32_t Index) const;
bool isValidDataSymbol(uint32_t Index) const;
bool isValidSectionSymbol(uint32_t Index) const;
wasm::WasmFunction &getDefinedFunction(uint32_t Index);
wasm::WasmGlobal &getDefinedGlobal(uint32_t Index);
+ wasm::WasmEvent &getDefinedEvent(uint32_t Index);
const WasmSection &getWasmSection(DataRefImpl Ref) const;
const wasm::WasmRelocation &getWasmRelocation(DataRefImpl Ref) const;
@@ -225,6 +239,7 @@ private:
Error parseTableSection(ReadContext &Ctx);
Error parseMemorySection(ReadContext &Ctx);
Error parseGlobalSection(ReadContext &Ctx);
+ Error parseEventSection(ReadContext &Ctx);
Error parseExportSection(ReadContext &Ctx);
Error parseStartSection(ReadContext &Ctx);
Error parseElemSection(ReadContext &Ctx);
@@ -232,6 +247,7 @@ private:
Error parseDataSection(ReadContext &Ctx);
// Custom section types
+ Error parseDylinkSection(ReadContext &Ctx);
Error parseNameSection(ReadContext &Ctx);
Error parseLinkingSection(ReadContext &Ctx);
Error parseLinkingSectionSymtab(ReadContext &Ctx);
@@ -240,11 +256,13 @@ private:
wasm::WasmObjectHeader Header;
std::vector<WasmSection> Sections;
+ wasm::WasmDylinkInfo DylinkInfo;
std::vector<wasm::WasmSignature> Signatures;
std::vector<uint32_t> FunctionTypes;
std::vector<wasm::WasmTable> Tables;
std::vector<wasm::WasmLimits> Memories;
std::vector<wasm::WasmGlobal> Globals;
+ std::vector<wasm::WasmEvent> Events;
std::vector<wasm::WasmImport> Imports;
std::vector<wasm::WasmExport> Exports;
std::vector<wasm::WasmElemSegment> ElemSegments;
@@ -254,18 +272,63 @@ private:
std::vector<wasm::WasmFunctionName> DebugNames;
uint32_t StartFunction = -1;
bool HasLinkingSection = false;
+ bool HasDylinkSection = false;
wasm::WasmLinkingData LinkingData;
uint32_t NumImportedGlobals = 0;
uint32_t NumImportedFunctions = 0;
+ uint32_t NumImportedEvents = 0;
uint32_t CodeSection = 0;
uint32_t DataSection = 0;
uint32_t GlobalSection = 0;
+ uint32_t EventSection = 0;
+};
+
+class WasmSectionOrderChecker {
+public:
+ // We define orders for all core wasm sections and known custom sections.
+ enum : int {
+ // Core sections
+ // The order of standard sections is precisely given by the spec.
+ WASM_SEC_ORDER_TYPE = 1,
+ WASM_SEC_ORDER_IMPORT = 2,
+ WASM_SEC_ORDER_FUNCTION = 3,
+ WASM_SEC_ORDER_TABLE = 4,
+ WASM_SEC_ORDER_MEMORY = 5,
+ WASM_SEC_ORDER_GLOBAL = 6,
+ WASM_SEC_ORDER_EVENT = 7,
+ WASM_SEC_ORDER_EXPORT = 8,
+ WASM_SEC_ORDER_START = 9,
+ WASM_SEC_ORDER_ELEM = 10,
+ WASM_SEC_ORDER_DATACOUNT = 11,
+ WASM_SEC_ORDER_CODE = 12,
+ WASM_SEC_ORDER_DATA = 13,
+
+ // Custom sections
+ // "dylink" should be the very first section in the module
+ WASM_SEC_ORDER_DYLINK = 0,
+ // "linking" section requires DATA section in order to validate data symbols
+ WASM_SEC_ORDER_LINKING = 100,
+ // Must come after "linking" section in order to validate reloc indexes.
+ WASM_SEC_ORDER_RELOC = 101,
+ // "name" section must appear after DATA. Comes after "linking" to allow
+ // symbol table to set default function name.
+ WASM_SEC_ORDER_NAME = 102,
+ // "producers" section must appear after "name" section.
+ WASM_SEC_ORDER_PRODUCERS = 103
+ };
+
+ bool isValidSectionOrder(unsigned ID, StringRef CustomSectionName = "");
+
+private:
+ int LastOrder = -1; // Lastly seen known section's order
+
+ // Returns -1 for unknown sections.
+ int getSectionOrder(unsigned ID, StringRef CustomSectionName = "");
};
} // end namespace object
-inline raw_ostream &operator<<(raw_ostream &OS,
- const object::WasmSymbol &Sym) {
+inline raw_ostream &operator<<(raw_ostream &OS, const object::WasmSymbol &Sym) {
Sym.print(OS);
return OS;
}
diff --git a/contrib/llvm/include/llvm/Object/WasmTraits.h b/contrib/llvm/include/llvm/Object/WasmTraits.h
index ebcd00b15227..049d72f79e41 100644
--- a/contrib/llvm/include/llvm/Object/WasmTraits.h
+++ b/contrib/llvm/include/llvm/Object/WasmTraits.h
@@ -24,14 +24,20 @@ template <typename T> struct DenseMapInfo;
// Traits for using WasmSignature in a DenseMap.
template <> struct DenseMapInfo<wasm::WasmSignature> {
static wasm::WasmSignature getEmptyKey() {
- return wasm::WasmSignature{{}, 1};
+ wasm::WasmSignature Sig;
+ Sig.State = wasm::WasmSignature::Empty;
+ return Sig;
}
static wasm::WasmSignature getTombstoneKey() {
- return wasm::WasmSignature{{}, 2};
+ wasm::WasmSignature Sig;
+ Sig.State = wasm::WasmSignature::Tombstone;
+ return Sig;
}
static unsigned getHashValue(const wasm::WasmSignature &Sig) {
- unsigned H = hash_value(Sig.ReturnType);
- for (int32_t Param : Sig.ParamTypes)
+ uintptr_t H = hash_value(Sig.State);
+ for (auto Ret : Sig.Returns)
+ H = hash_combine(H, Ret);
+ for (auto Param : Sig.Params)
H = hash_combine(H, Param);
return H;
}
diff --git a/contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h b/contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h
index 78f021fc0386..253c627dd683 100644
--- a/contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h
+++ b/contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h
@@ -58,7 +58,13 @@ LLVM_YAML_STRONG_TYPEDEF(uint8_t, AuxSymbolType)
struct Relocation {
uint32_t VirtualAddress;
uint16_t Type;
+
+ // Normally a Relocation can refer to the symbol via its name.
+ // It can also use a direct symbol table index instead (with no name
+ // specified), allowing disambiguating between multiple symbols with the
+ // same name or crafting intentionally broken files for testing.
StringRef SymbolName;
+ Optional<uint32_t> SymbolTableIndex;
};
struct Section {
diff --git a/contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h b/contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h
index 6fc69735f1c7..f2b0c35521f0 100644
--- a/contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -68,6 +68,7 @@ struct FileHeader {
ELF_ELFCLASS Class;
ELF_ELFDATA Data;
ELF_ELFOSABI OSABI;
+ llvm::yaml::Hex8 ABIVersion;
ELF_ET Type;
ELF_EM Machine;
ELF_EF Flags;
@@ -123,6 +124,7 @@ struct Section {
StringRef Link;
StringRef Info;
llvm::yaml::Hex64 AddressAlign;
+ Optional<llvm::yaml::Hex64> EntSize;
Section(SectionKind Kind) : Kind(Kind) {}
virtual ~Section();
diff --git a/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h b/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h
index 8cd08e520560..406dd7cb515f 100644
--- a/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -74,6 +74,12 @@ struct Global {
wasm::WasmInitExpr InitExpr;
};
+struct Event {
+ uint32_t Index;
+ uint32_t Attribute;
+ uint32_t SigIndex;
+};
+
struct Import {
StringRef Module;
StringRef Field;
@@ -83,6 +89,7 @@ struct Import {
Global GlobalImport;
Table TableImport;
Limits Memory;
+ Event EventImport;
};
};
@@ -176,6 +183,21 @@ struct CustomSection : Section {
yaml::BinaryRef Payload;
};
+struct DylinkSection : CustomSection {
+ DylinkSection() : CustomSection("dylink") {}
+
+ static bool classof(const Section *S) {
+ auto C = dyn_cast<CustomSection>(S);
+ return C && C->Name == "dylink";
+ }
+
+ uint32_t MemorySize;
+ uint32_t MemoryAlignment;
+ uint32_t TableSize;
+ uint32_t TableAlignment;
+ std::vector<StringRef> Needed;
+};
+
struct NameSection : CustomSection {
NameSection() : CustomSection("name") {}
@@ -262,6 +284,16 @@ struct GlobalSection : Section {
std::vector<Global> Globals;
};
+struct EventSection : Section {
+ EventSection() : Section(wasm::WASM_SEC_EVENT) {}
+
+ static bool classof(const Section *S) {
+ return S->Type == wasm::WASM_SEC_EVENT;
+ }
+
+ std::vector<Event> Events;
+};
+
struct ExportSection : Section {
ExportSection() : Section(wasm::WASM_SEC_EXPORT) {}
@@ -339,6 +371,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ComdatEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Comdat)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Event)
namespace llvm {
namespace yaml {
@@ -471,6 +504,10 @@ template <> struct ScalarEnumerationTraits<WasmYAML::RelocType> {
static void enumeration(IO &IO, WasmYAML::RelocType &Kind);
};
+template <> struct MappingTraits<WasmYAML::Event> {
+ static void mapping(IO &IO, WasmYAML::Event &Event);
+};
+
} // end namespace yaml
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Option/OptTable.h b/contrib/llvm/include/llvm/Option/OptTable.h
index 743c4772c98c..fdb05d8a15af 100644
--- a/contrib/llvm/include/llvm/Option/OptTable.h
+++ b/contrib/llvm/include/llvm/Option/OptTable.h
@@ -217,8 +217,8 @@ public:
/// Render the help text for an option table.
///
/// \param OS - The stream to write the help text to.
- /// \param Name - The name to use in the usage line.
- /// \param Title - The title to use in the usage line.
+ /// \param Usage - USAGE: Usage
+ /// \param Title - OVERVIEW: Title
/// \param FlagsToInclude - If non-zero, only include options with any
/// of these flags set.
/// \param FlagsToExclude - Exclude options with any of these flags set.
@@ -226,11 +226,11 @@ public:
/// that don't have help texts. By default, we display
/// only options that are not hidden and have help
/// texts.
- void PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
+ void PrintHelp(raw_ostream &OS, const char *Usage, const char *Title,
unsigned FlagsToInclude, unsigned FlagsToExclude,
bool ShowAllAliases) const;
- void PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
+ void PrintHelp(raw_ostream &OS, const char *Usage, const char *Title,
bool ShowHidden = false, bool ShowAllAliases = false) const;
};
diff --git a/contrib/llvm/include/llvm/Pass.h b/contrib/llvm/include/llvm/Pass.h
index d65347d611ea..5935a0853d32 100644
--- a/contrib/llvm/include/llvm/Pass.h
+++ b/contrib/llvm/include/llvm/Pass.h
@@ -356,17 +356,6 @@ protected:
/// This is the storage for the -time-passes option.
extern bool TimePassesIsEnabled;
-/// isFunctionInPrintList - returns true if a function should be printed via
-// debugging options like -print-after-all/-print-before-all.
-// Tells if the function IR should be printed by PrinterPass.
-extern bool isFunctionInPrintList(StringRef FunctionName);
-
-/// forcePrintModuleIR - returns true if IR printing passes should
-// be printing module IR (even for local-pass printers e.g. function-pass)
-// to provide more context, as enabled by debugging option -print-module-scope
-// Tells if IR printer should be printing module IR
-extern bool forcePrintModuleIR();
-
} // end namespace llvm
// Include support files that contain important APIs commonly used by Passes,
diff --git a/contrib/llvm/include/llvm/Passes/PassBuilder.h b/contrib/llvm/include/llvm/Passes/PassBuilder.h
index 24a93bc76af5..fa59345a02cf 100644
--- a/contrib/llvm/include/llvm/Passes/PassBuilder.h
+++ b/contrib/llvm/include/llvm/Passes/PassBuilder.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Error.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include <vector>
@@ -32,10 +33,13 @@ class ModuleSummaryIndex;
/// A struct capturing PGO tunables.
struct PGOOptions {
PGOOptions(std::string ProfileGenFile = "", std::string ProfileUseFile = "",
- std::string SampleProfileFile = "", bool RunProfileGen = false,
- bool SamplePGOSupport = false)
+ std::string SampleProfileFile = "",
+ std::string ProfileRemappingFile = "",
+ bool RunProfileGen = false, bool SamplePGOSupport = false)
: ProfileGenFile(ProfileGenFile), ProfileUseFile(ProfileUseFile),
- SampleProfileFile(SampleProfileFile), RunProfileGen(RunProfileGen),
+ SampleProfileFile(SampleProfileFile),
+ ProfileRemappingFile(ProfileRemappingFile),
+ RunProfileGen(RunProfileGen),
SamplePGOSupport(SamplePGOSupport || !SampleProfileFile.empty()) {
assert((RunProfileGen ||
!SampleProfileFile.empty() ||
@@ -45,6 +49,7 @@ struct PGOOptions {
std::string ProfileGenFile;
std::string ProfileUseFile;
std::string SampleProfileFile;
+ std::string ProfileRemappingFile;
bool RunProfileGen;
bool SamplePGOSupport;
};
@@ -58,6 +63,7 @@ struct PGOOptions {
class PassBuilder {
TargetMachine *TM;
Optional<PGOOptions> PGOOpt;
+ PassInstrumentationCallbacks *PIC;
public:
/// A struct to capture parsed pass pipeline names.
@@ -172,8 +178,9 @@ public:
};
explicit PassBuilder(TargetMachine *TM = nullptr,
- Optional<PGOOptions> PGOOpt = None)
- : TM(TM), PGOOpt(PGOOpt) {}
+ Optional<PGOOptions> PGOOpt = None,
+ PassInstrumentationCallbacks *PIC = nullptr)
+ : TM(TM), PGOOpt(PGOOpt), PIC(PIC) {}
/// Cross register the analysis managers through their proxies.
///
@@ -378,8 +385,9 @@ public:
/// If the sequence of passes aren't all the exact same kind of pass, it will
/// be an error. You cannot mix different levels implicitly, you must
/// explicitly form a pass manager in which to nest passes.
- bool parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText,
- bool VerifyEachPass = true, bool DebugLogging = false);
+ Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText,
+ bool VerifyEachPass = true,
+ bool DebugLogging = false);
/// {{@ Parse a textual pass pipeline description into a specific PassManager
///
@@ -388,12 +396,15 @@ public:
/// this is the valid pipeline text:
///
/// function(lpass)
- bool parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText,
- bool VerifyEachPass = true, bool DebugLogging = false);
- bool parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText,
- bool VerifyEachPass = true, bool DebugLogging = false);
- bool parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText,
- bool VerifyEachPass = true, bool DebugLogging = false);
+ Error parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText,
+ bool VerifyEachPass = true,
+ bool DebugLogging = false);
+ Error parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText,
+ bool VerifyEachPass = true,
+ bool DebugLogging = false);
+ Error parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText,
+ bool VerifyEachPass = true,
+ bool DebugLogging = false);
/// @}}
/// Parse a textual alias analysis pipeline into the provided AA manager.
@@ -411,7 +422,7 @@ public:
/// Returns false if the text cannot be parsed cleanly. The specific state of
/// the \p AA manager is unspecified if such an error is encountered and this
/// returns false.
- bool parseAAPipeline(AAManager &AA, StringRef PipelineText);
+ Error parseAAPipeline(AAManager &AA, StringRef PipelineText);
/// Register a callback for a default optimizer pipeline extension
/// point
@@ -490,6 +501,18 @@ public:
PipelineStartEPCallbacks.push_back(C);
}
+ /// Register a callback for a default optimizer pipeline extension point
+ ///
+ /// This extension point allows adding optimizations at the very end of the
+ /// function optimization pipeline. A key difference between this and the
+ /// legacy PassManager's OptimizerLast callback is that this extension point
+ /// is not triggered at O0. Extensions to the O0 pipeline should append their
+ /// passes to the end of the overall pipeline.
+ void registerOptimizerLastEPCallback(
+ const std::function<void(FunctionPassManager &, OptimizationLevel)> &C) {
+ OptimizerLastEPCallbacks.push_back(C);
+ }
+
/// Register a callback for parsing an AliasAnalysis Name to populate
/// the given AAManager \p AA
void registerParseAACallback(
@@ -559,33 +582,34 @@ private:
static Optional<std::vector<PipelineElement>>
parsePipelineText(StringRef Text);
- bool parseModulePass(ModulePassManager &MPM, const PipelineElement &E,
+ Error parseModulePass(ModulePassManager &MPM, const PipelineElement &E,
+ bool VerifyEachPass, bool DebugLogging);
+ Error parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E,
bool VerifyEachPass, bool DebugLogging);
- bool parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E,
+ Error parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E,
+ bool VerifyEachPass, bool DebugLogging);
+ Error parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
bool VerifyEachPass, bool DebugLogging);
- bool parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging);
- bool parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging);
bool parseAAPassName(AAManager &AA, StringRef Name);
- bool parseLoopPassPipeline(LoopPassManager &LPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass, bool DebugLogging);
- bool parseFunctionPassPipeline(FunctionPassManager &FPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass, bool DebugLogging);
- bool parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
+ Error parseLoopPassPipeline(LoopPassManager &LPM,
ArrayRef<PipelineElement> Pipeline,
bool VerifyEachPass, bool DebugLogging);
- bool parseModulePassPipeline(ModulePassManager &MPM,
+ Error parseFunctionPassPipeline(FunctionPassManager &FPM,
+ ArrayRef<PipelineElement> Pipeline,
+ bool VerifyEachPass, bool DebugLogging);
+ Error parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
ArrayRef<PipelineElement> Pipeline,
bool VerifyEachPass, bool DebugLogging);
+ Error parseModulePassPipeline(ModulePassManager &MPM,
+ ArrayRef<PipelineElement> Pipeline,
+ bool VerifyEachPass, bool DebugLogging);
void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
OptimizationLevel Level, bool RunProfileGen,
std::string ProfileGenFile,
- std::string ProfileUseFile);
+ std::string ProfileUseFile,
+ std::string ProfileRemappingFile);
void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel);
@@ -602,6 +626,8 @@ private:
CGSCCOptimizerLateEPCallbacks;
SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
VectorizerStartEPCallbacks;
+ SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
+ OptimizerLastEPCallbacks;
// Module callbacks
SmallVector<std::function<void(ModulePassManager &)>, 2>
PipelineStartEPCallbacks;
diff --git a/contrib/llvm/include/llvm/Passes/StandardInstrumentations.h b/contrib/llvm/include/llvm/Passes/StandardInstrumentations.h
new file mode 100644
index 000000000000..8c6f5e1e22f7
--- /dev/null
+++ b/contrib/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -0,0 +1,70 @@
+//===- StandardInstrumentations.h ------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This header defines a class that provides bookkeeping for all standard
+/// (i.e in-tree) pass instrumentations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSES_STANDARDINSTRUMENTATIONS_H
+#define LLVM_PASSES_STANDARDINSTRUMENTATIONS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/PassTimingInfo.h"
+
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+class Module;
+
+/// Instrumentation to print IR before/after passes.
+///
+/// Needs state to be able to print module after pass that invalidates IR unit
+/// (typically Loop or SCC).
+class PrintIRInstrumentation {
+public:
+ PrintIRInstrumentation() = default;
+ ~PrintIRInstrumentation();
+
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+private:
+ bool printBeforePass(StringRef PassID, Any IR);
+ void printAfterPass(StringRef PassID, Any IR);
+ void printAfterPassInvalidated(StringRef PassID);
+
+ using PrintModuleDesc = std::tuple<const Module *, std::string, StringRef>;
+
+ void pushModuleDesc(StringRef PassID, Any IR);
+ PrintModuleDesc popModuleDesc(StringRef PassID);
+
+ /// Stack of Module description, enough to print the module after a given
+ /// pass.
+ SmallVector<PrintModuleDesc, 2> ModuleDescStack;
+ bool StoreModuleDesc = false;
+};
+
+/// This class provides an interface to register all the standard pass
+/// instrumentations and manages their state (if any).
+class StandardInstrumentations {
+ PrintIRInstrumentation PrintIR;
+ TimePassesHandler TimePasses;
+
+public:
+ StandardInstrumentations() = default;
+
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index e820f71cb6d5..beaa36553287 100644
--- a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -510,7 +510,6 @@ class CoverageMapping {
DenseMap<size_t, DenseSet<size_t>> RecordProvenance;
std::vector<FunctionRecord> Functions;
std::vector<std::pair<std::string, uint64_t>> FuncHashMismatches;
- std::vector<std::pair<std::string, uint64_t>> FuncCounterMismatches;
CoverageMapping() = default;
@@ -537,9 +536,7 @@ public:
///
/// This is a count of functions whose profile is out of date or otherwise
/// can't be associated with any coverage information.
- unsigned getMismatchedCount() const {
- return FuncHashMismatches.size() + FuncCounterMismatches.size();
- }
+ unsigned getMismatchedCount() const { return FuncHashMismatches.size(); }
/// A hash mismatch occurs when a profile record for a symbol does not have
/// the same hash as a coverage mapping record for the same symbol. This
@@ -549,14 +546,6 @@ public:
return FuncHashMismatches;
}
- /// A counter mismatch occurs when there is an error when evaluating the
- /// counter expressions in a coverage mapping record. This returns a list of
- /// counter mismatches, where each mismatch is a pair of the symbol name and
- /// the number of valid evaluated counter expressions.
- ArrayRef<std::pair<std::string, uint64_t>> getCounterMismatches() const {
- return FuncCounterMismatches;
- }
-
/// Returns a lexicographically sorted, unique list of files that are
/// covered.
std::vector<StringRef> getUniqueSourceFiles() const;
diff --git a/contrib/llvm/include/llvm/ProfileData/GCOV.h b/contrib/llvm/include/llvm/ProfileData/GCOV.h
index 8500401e44ad..a088f63a6915 100644
--- a/contrib/llvm/include/llvm/ProfileData/GCOV.h
+++ b/contrib/llvm/include/llvm/ProfileData/GCOV.h
@@ -24,9 +24,11 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
+#include <limits>
#include <memory>
#include <string>
#include <utility>
@@ -266,13 +268,14 @@ struct GCOVEdge {
GCOVBlock &Src;
GCOVBlock &Dst;
uint64_t Count = 0;
+ uint64_t CyclesCount = 0;
};
/// GCOVFunction - Collects function information.
class GCOVFunction {
public:
- using BlockIterator = pointee_iterator<SmallVectorImpl<
- std::unique_ptr<GCOVBlock>>::const_iterator>;
+ using BlockIterator = pointee_iterator<
+ SmallVectorImpl<std::unique_ptr<GCOVBlock>>::const_iterator>;
GCOVFunction(GCOVFile &P) : Parent(P) {}
@@ -322,6 +325,9 @@ class GCOVBlock {
public:
using EdgeIterator = SmallVectorImpl<GCOVEdge *>::const_iterator;
+ using BlockVector = SmallVector<const GCOVBlock *, 4>;
+ using BlockVectorLists = SmallVector<BlockVector, 4>;
+ using Edges = SmallVector<GCOVEdge *, 4>;
GCOVBlock(GCOVFunction &P, uint32_t N) : Parent(P), Number(N) {}
~GCOVBlock();
@@ -365,6 +371,16 @@ public:
void dump() const;
void collectLineCounts(FileInfo &FI);
+ static uint64_t getCycleCount(const Edges &Path);
+ static void unblock(const GCOVBlock *U, BlockVector &Blocked,
+ BlockVectorLists &BlockLists);
+ static bool lookForCircuit(const GCOVBlock *V, const GCOVBlock *Start,
+ Edges &Path, BlockVector &Blocked,
+ BlockVectorLists &BlockLists,
+ const BlockVector &Blocks, uint64_t &Count);
+ static void getCyclesCount(const BlockVector &Blocks, uint64_t &Count);
+ static uint64_t getLineCount(const BlockVector &Blocks);
+
private:
GCOVFunction &Parent;
uint32_t Number;
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
index 206142b3565a..dc45021fc47d 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
@@ -544,9 +544,9 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) {
void InstrProfSymtab::finalizeSymtab() {
if (Sorted)
return;
- llvm::sort(MD5NameMap.begin(), MD5NameMap.end(), less_first());
- llvm::sort(MD5FuncMap.begin(), MD5FuncMap.end(), less_first());
- llvm::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first());
+ llvm::sort(MD5NameMap, less_first());
+ llvm::sort(MD5FuncMap, less_first());
+ llvm::sort(AddrToMD5Map, less_first());
AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
AddrToMD5Map.end());
Sorted = true;
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h b/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
index efc22dcd0d9a..08d782276117 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -349,12 +349,17 @@ using OnDiskHashTableImplV3 =
OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
template <typename HashTableImpl>
+class InstrProfReaderItaniumRemapper;
+
+template <typename HashTableImpl>
class InstrProfReaderIndex : public InstrProfReaderIndexBase {
private:
std::unique_ptr<HashTableImpl> HashTable;
typename HashTableImpl::data_iterator RecordIterator;
uint64_t FormatVersion;
+ friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
+
public:
InstrProfReaderIndex(const unsigned char *Buckets,
const unsigned char *const Payload,
@@ -386,13 +391,26 @@ public:
}
};
+/// Name matcher supporting fuzzy matching of symbol names to names in profiles.
+class InstrProfReaderRemapper {
+public:
+ virtual ~InstrProfReaderRemapper() {}
+ virtual Error populateRemappings() { return Error::success(); }
+ virtual Error getRecords(StringRef FuncName,
+ ArrayRef<NamedInstrProfRecord> &Data) = 0;
+};
+
/// Reader for the indexed binary instrprof format.
class IndexedInstrProfReader : public InstrProfReader {
private:
/// The profile data file contents.
std::unique_ptr<MemoryBuffer> DataBuffer;
+ /// The profile remapping file contents.
+ std::unique_ptr<MemoryBuffer> RemappingBuffer;
/// The index into the profile data.
std::unique_ptr<InstrProfReaderIndexBase> Index;
+ /// The profile remapping file contents.
+ std::unique_ptr<InstrProfReaderRemapper> Remapper;
/// Profile summary data.
std::unique_ptr<ProfileSummary> Summary;
// Index to the current record in the record array.
@@ -404,8 +422,11 @@ private:
const unsigned char *Cur);
public:
- IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
- : DataBuffer(std::move(DataBuffer)), RecordIndex(0) {}
+ IndexedInstrProfReader(
+ std::unique_ptr<MemoryBuffer> DataBuffer,
+ std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
+ : DataBuffer(std::move(DataBuffer)),
+ RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
@@ -434,10 +455,11 @@ public:
/// Factory method to create an indexed reader.
static Expected<std::unique_ptr<IndexedInstrProfReader>>
- create(const Twine &Path);
+ create(const Twine &Path, const Twine &RemappingPath = "");
static Expected<std::unique_ptr<IndexedInstrProfReader>>
- create(std::unique_ptr<MemoryBuffer> Buffer);
+ create(std::unique_ptr<MemoryBuffer> Buffer,
+ std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
// Used for testing purpose only.
void setValueProfDataEndianness(support::endianness Endianness) {
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
index 0cd6dd2c2c0e..927dfd246878 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
@@ -49,7 +49,8 @@ enum class sampleprof_error {
unsupported_writing_format,
truncated_name_table,
not_implemented,
- counter_overflow
+ counter_overflow,
+ ostream_seek_unsupported
};
inline std::error_code make_error_code(sampleprof_error E) {
@@ -293,6 +294,9 @@ public:
/// with the maximum total sample count.
const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
StringRef CalleeName) const {
+ std::string CalleeGUID;
+ CalleeName = getRepInFormat(CalleeName, Format, CalleeGUID);
+
auto iter = CallsiteSamples.find(Loc);
if (iter == CallsiteSamples.end())
return nullptr;
@@ -377,30 +381,53 @@ public:
/// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
/// to \p S.
void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
- uint64_t Threshold, bool isCompact) const {
+ uint64_t Threshold) const {
if (TotalSamples <= Threshold)
return;
- S.insert(Function::getGUID(Name));
+ S.insert(getGUID(Name));
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
for (const auto &BS : BodySamples)
for (const auto &TS : BS.second.getCallTargets())
if (TS.getValue() > Threshold) {
- Function *Callee = M->getFunction(TS.getKey());
+ const Function *Callee =
+ M->getFunction(getNameInModule(TS.getKey(), M));
if (!Callee || !Callee->getSubprogram())
- S.insert(isCompact ? std::stol(TS.getKey().data())
- : Function::getGUID(TS.getKey()));
+ S.insert(getGUID(TS.getKey()));
}
for (const auto &CS : CallsiteSamples)
for (const auto &NameFS : CS.second)
- NameFS.second.findInlinedFunctions(S, M, Threshold, isCompact);
+ NameFS.second.findInlinedFunctions(S, M, Threshold);
}
/// Set the name of the function.
void setName(StringRef FunctionName) { Name = FunctionName; }
/// Return the function name.
- const StringRef &getName() const { return Name; }
+ StringRef getName() const { return Name; }
+
+ /// Return the original function name if it exists in Module \p M.
+ StringRef getFuncNameInModule(const Module *M) const {
+ return getNameInModule(Name, M);
+ }
+
+ /// Translate \p Name into its original name in Module.
+ /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
+ /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
+ /// is actually GUID of the original function name. getNameInModule will
+ /// translate \p Name in current FunctionSamples into its original name.
+ /// If the original name doesn't exist in \p M, return empty StringRef.
+ StringRef getNameInModule(StringRef Name, const Module *M) const {
+ if (Format != SPF_Compact_Binary)
+ return Name;
+ // Expect CurrentModule to be initialized by GUIDToFuncNameMapper.
+ if (M != CurrentModule)
+ llvm_unreachable("Input Module should be the same as CurrentModule");
+ auto iter = GUIDToFuncNameMap.find(std::stoull(Name.data()));
+ if (iter == GUIDToFuncNameMap.end())
+ return StringRef();
+ return iter->second;
+ }
/// Returns the line offset to the start line of the subprogram.
/// We assume that a single function will not exceed 65535 LOC.
@@ -417,6 +444,54 @@ public:
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *findFunctionSamples(const DILocation *DIL) const;
+ static SampleProfileFormat Format;
+ /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
+ /// all the function symbols defined or declared in CurrentModule.
+ static DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
+ static Module *CurrentModule;
+
+ class GUIDToFuncNameMapper {
+ public:
+ GUIDToFuncNameMapper(Module &M) {
+ if (Format != SPF_Compact_Binary)
+ return;
+
+ for (const auto &F : M) {
+ StringRef OrigName = F.getName();
+ GUIDToFuncNameMap.insert({Function::getGUID(OrigName), OrigName});
+ /// Local to global var promotion used by optimization like thinlto
+ /// will rename the var and add suffix like ".llvm.xxx" to the
+ /// original local name. In sample profile, the suffixes of function
+ /// names are all stripped. Since it is possible that the mapper is
+ /// built in post-thin-link phase and var promotion has been done,
+ /// we need to add the substring of function name without the suffix
+ /// into the GUIDToFuncNameMap.
+ auto pos = OrigName.find('.');
+ if (pos != StringRef::npos) {
+ StringRef NewName = OrigName.substr(0, pos);
+ GUIDToFuncNameMap.insert({Function::getGUID(NewName), NewName});
+ }
+ }
+ CurrentModule = &M;
+ }
+
+ ~GUIDToFuncNameMapper() {
+ if (Format != SPF_Compact_Binary)
+ return;
+
+ GUIDToFuncNameMap.clear();
+ CurrentModule = nullptr;
+ }
+ };
+
+ // Assume the input \p Name is a name coming from FunctionSamples itself.
+ // If the format is SPF_Compact_Binary, the name is already a GUID and we
+ // don't want to return the GUID of GUID.
+ static uint64_t getGUID(StringRef Name) {
+ return (Format == SPF_Compact_Binary) ? std::stoull(Name.data())
+ : Function::getGUID(Name);
+ }
+
private:
/// Mangled name of the function.
StringRef Name;
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h b/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
index 0617b05e8d4f..5cc729e42cc8 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -222,6 +222,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SymbolRemappingReader.h"
#include <algorithm>
#include <cstdint>
#include <memory>
@@ -279,6 +280,8 @@ public:
/// Print the profile for \p FName on stream \p OS.
void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
+ virtual void collectFuncsToUse(const Module &M) {}
+
/// Print all the profiles on stream \p OS.
void dump(raw_ostream &OS = dbgs());
@@ -287,11 +290,16 @@ public:
// The function name may have been updated by adding suffix. In sample
// profile, the function names are all stripped, so we need to strip
// the function name suffix before matching with profile.
- StringRef Fname = F.getName().split('.').first;
+ return getSamplesFor(F.getName().split('.').first);
+ }
+
+ /// Return the samples collected for function \p F.
+ virtual FunctionSamples *getSamplesFor(StringRef Fname) {
std::string FGUID;
Fname = getRepInFormat(Fname, getFormat(), FGUID);
- if (Profiles.count(Fname))
- return &Profiles[Fname];
+ auto It = Profiles.find(Fname);
+ if (It != Profiles.end())
+ return &It->second;
return nullptr;
}
@@ -335,6 +343,12 @@ protected:
/// Profile summary information.
std::unique_ptr<ProfileSummary> Summary;
+ /// Take ownership of the summary of this reader.
+ static std::unique_ptr<ProfileSummary>
+ takeSummary(SampleProfileReader &Reader) {
+ return std::move(Reader.Summary);
+ }
+
/// Compute summary for this profile.
void computeSummary();
@@ -364,7 +378,7 @@ public:
: SampleProfileReader(std::move(B), C, Format) {}
/// Read and validate the file header.
- std::error_code readHeader() override;
+ virtual std::error_code readHeader() override;
/// Read sample profiles from the associated file.
std::error_code read() override;
@@ -378,6 +392,10 @@ protected:
/// \returns the read value.
template <typename T> ErrorOr<T> readNumber();
+ /// Read a numeric value of type T from the profile. The value is saved
+ /// without encoded.
+ template <typename T> ErrorOr<T> readUnencodedNumber();
+
/// Read a string from the profile.
///
/// If an error occurs during decoding, a diagnostic message is emitted and
@@ -392,6 +410,9 @@ protected:
/// Return true if we've reached the end of file.
bool at_eof() const { return Data >= End; }
+ /// Read the next function profile instance.
+ std::error_code readFuncProfile();
+
/// Read the contents of the given profile instance.
std::error_code readProfile(FunctionSamples &FProfile);
@@ -436,10 +457,17 @@ class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
private:
/// Function name table.
std::vector<std::string> NameTable;
+ /// The table mapping from function name to the offset of its FunctionSample
+ /// towards file start.
+ DenseMap<StringRef, uint64_t> FuncOffsetTable;
+ /// The set containing the functions to use when compiling a module.
+ DenseSet<StringRef> FuncsToUse;
virtual std::error_code verifySPMagic(uint64_t Magic) override;
virtual std::error_code readNameTable() override;
/// Read a string indirectly via the name table.
virtual ErrorOr<StringRef> readStringFromTable() override;
+ virtual std::error_code readHeader() override;
+ std::error_code readFuncOffsetTable();
public:
SampleProfileReaderCompactBinary(std::unique_ptr<MemoryBuffer> B,
@@ -448,6 +476,12 @@ public:
/// \brief Return true if \p Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer);
+
+ /// Read samples only for functions to use.
+ std::error_code read() override;
+
+ /// Collect functions to be used when compiling Module \p M.
+ void collectFuncsToUse(const Module &M) override;
};
using InlineCallStack = SmallVector<FunctionSamples *, 10>;
@@ -503,6 +537,44 @@ protected:
static const uint32_t GCOVTagAFDOFunction = 0xac000000;
};
+/// A profile data reader proxy that remaps the profile data from another
+/// sample profile data reader, by applying a provided set of equivalences
+/// between components of the symbol names in the profile.
+class SampleProfileReaderItaniumRemapper : public SampleProfileReader {
+public:
+ SampleProfileReaderItaniumRemapper(
+ std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
+ std::unique_ptr<SampleProfileReader> Underlying)
+ : SampleProfileReader(std::move(B), C, Underlying->getFormat()) {
+ Profiles = std::move(Underlying->getProfiles());
+ Summary = takeSummary(*Underlying);
+ // Keep the underlying reader alive; the profile data may contain
+ // StringRefs referencing names in its name table.
+ UnderlyingReader = std::move(Underlying);
+ }
+
+ /// Create a remapped sample profile from the given remapping file and
+ /// underlying samples.
+ static ErrorOr<std::unique_ptr<SampleProfileReader>>
+ create(const Twine &Filename, LLVMContext &C,
+ std::unique_ptr<SampleProfileReader> Underlying);
+
+ /// Read and validate the file header.
+ std::error_code readHeader() override { return sampleprof_error::success; }
+
+ /// Read remapping file and apply it to the sample profile.
+ std::error_code read() override;
+
+ /// Return the samples collected for function \p F.
+ FunctionSamples *getSamplesFor(StringRef FunctionName) override;
+ using SampleProfileReader::getSamplesFor;
+
+private:
+ SymbolRemappingReader Remappings;
+ DenseMap<SymbolRemappingReader::Key, FunctionSamples*> SampleMap;
+ std::unique_ptr<SampleProfileReader> UnderlyingReader;
+};
+
} // end namespace sampleprof
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h b/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 74dc839ff049..d5ac6e53e4f7 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -42,7 +42,7 @@ public:
/// Write all the sample profiles in the given map of samples.
///
/// \returns status code of the file update operation.
- std::error_code write(const StringMap<FunctionSamples> &ProfileMap);
+ virtual std::error_code write(const StringMap<FunctionSamples> &ProfileMap);
raw_ostream &getOutputStream() { return *OutputStream; }
@@ -103,14 +103,15 @@ private:
/// Sample-based profile writer (binary format).
class SampleProfileWriterBinary : public SampleProfileWriter {
public:
- std::error_code write(const FunctionSamples &S) override;
+ virtual std::error_code write(const FunctionSamples &S) override;
SampleProfileWriterBinary(std::unique_ptr<raw_ostream> &OS)
: SampleProfileWriter(OS) {}
protected:
virtual std::error_code writeNameTable() = 0;
virtual std::error_code writeMagicIdent() = 0;
- std::error_code writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ virtual std::error_code
+ writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
std::error_code writeSummary();
std::error_code writeNameIdx(StringRef FName);
std::error_code writeBody(const FunctionSamples &S);
@@ -135,12 +136,56 @@ protected:
virtual std::error_code writeMagicIdent() override;
};
+// CompactBinary is a compact format of binary profile which both reduces
+// the profile size and the load time needed when compiling. It has two
+// major difference with Binary format.
+// 1. It represents all the strings in name table using md5 hash.
+// 2. It saves a function offset table which maps function name index to
+// the offset of its function profile to the start of the binary profile,
+// so by using the function offset table, for those function profiles which
+// will not be needed when compiling a module, the profile reader does't
+// have to read them and it saves compile time if the profile size is huge.
+// The layout of the compact format is shown as follows:
+//
+// Part1: Profile header, the same as binary format, containing magic
+// number, version, summary, name table...
+// Part2: Function Offset Table Offset, which saves the position of
+// Part4.
+// Part3: Function profile collection
+// function1 profile start
+// ....
+// function2 profile start
+// ....
+// function3 profile start
+// ....
+// ......
+// Part4: Function Offset Table
+// function1 name index --> function1 profile start
+// function2 name index --> function2 profile start
+// function3 name index --> function3 profile start
+//
+// We need Part2 because profile reader can use it to find out and read
+// function offset table without reading Part3 first.
class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary {
using SampleProfileWriterBinary::SampleProfileWriterBinary;
+public:
+ virtual std::error_code write(const FunctionSamples &S) override;
+ virtual std::error_code
+ write(const StringMap<FunctionSamples> &ProfileMap) override;
+
protected:
+ /// The table mapping from function name to the offset of its FunctionSample
+ /// towards profile start.
+ MapVector<StringRef, uint64_t> FuncOffsetTable;
+ /// The offset of the slot to be filled with the offset of FuncOffsetTable
+ /// towards profile start.
+ uint64_t TableOffset;
virtual std::error_code writeNameTable() override;
virtual std::error_code writeMagicIdent() override;
+ virtual std::error_code
+ writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ std::error_code writeFuncOffsetTable();
};
} // end namespace sampleprof
diff --git a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
index 6772e5f9b734..e03297b7c3c3 100644
--- a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -40,6 +40,11 @@ AARCH64_ARCH("armv8.4-a", ARMV8_4A, "8.4-A", "v8.4a",
(AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD))
+AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD))
#undef AARCH64_ARCH
#ifndef AARCH64_ARCH_EXT_NAME
@@ -60,10 +65,16 @@ AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
+AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
+AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
+AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
+AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
+AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
+AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
#undef AARCH64_ARCH_EXT_NAME
#ifndef AARCH64_CPU_NAME
@@ -91,8 +102,8 @@ AARCH64_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
-AARCH64_CPU_NAME("exynos-m4", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+AARCH64_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD))
AARCH64_CPU_NAME("falkor", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC | AArch64::AEK_RDM))
AARCH64_CPU_NAME("saphira", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -109,6 +120,9 @@ AARCH64_CPU_NAME("thunderxt81", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("thunderxt83", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+AARCH64_CPU_NAME("tsv110", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_PROFILE | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
+ AArch64::AEK_DOTPROD))
// Invalid CPU
AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID)
#undef AARCH64_CPU_NAME
diff --git a/contrib/llvm/include/llvm/Support/AArch64TargetParser.h b/contrib/llvm/include/llvm/Support/AArch64TargetParser.h
new file mode 100644
index 000000000000..76b77d474428
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -0,0 +1,124 @@
+//===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise AArch64 hardware features
+// such as FPU/CPU/ARCH and extension names.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_AARCH64TARGETPARSERCOMMON_H
+#define LLVM_SUPPORT_AARCH64TARGETPARSERCOMMON_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/ARMTargetParser.h"
+#include <vector>
+
+// FIXME:This should be made into class design,to avoid dupplication.
+namespace llvm {
+namespace AArch64 {
+
+// Arch extension modifiers for CPUs.
+enum ArchExtKind : unsigned {
+ AEK_INVALID = 0,
+ AEK_NONE = 1,
+ AEK_CRC = 1 << 1,
+ AEK_CRYPTO = 1 << 2,
+ AEK_FP = 1 << 3,
+ AEK_SIMD = 1 << 4,
+ AEK_FP16 = 1 << 5,
+ AEK_PROFILE = 1 << 6,
+ AEK_RAS = 1 << 7,
+ AEK_LSE = 1 << 8,
+ AEK_SVE = 1 << 9,
+ AEK_DOTPROD = 1 << 10,
+ AEK_RCPC = 1 << 11,
+ AEK_RDM = 1 << 12,
+ AEK_SM4 = 1 << 13,
+ AEK_SHA3 = 1 << 14,
+ AEK_SHA2 = 1 << 15,
+ AEK_AES = 1 << 16,
+ AEK_FP16FML = 1 << 17,
+ AEK_RAND = 1 << 18,
+ AEK_MTE = 1 << 19,
+ AEK_SSBS = 1 << 20,
+ AEK_SB = 1 << 21,
+ AEK_PREDRES = 1 << 22,
+};
+
+enum class ArchKind {
+#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
+#include "AArch64TargetParser.def"
+};
+
+const ARM::ArchNames<ArchKind> AArch64ARCHNames[] = {
+#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, \
+ ARCH_BASE_EXT) \
+ {NAME, \
+ sizeof(NAME) - 1, \
+ CPU_ATTR, \
+ sizeof(CPU_ATTR) - 1, \
+ SUB_ARCH, \
+ sizeof(SUB_ARCH) - 1, \
+ ARM::FPUKind::ARCH_FPU, \
+ ARCH_BASE_EXT, \
+ AArch64::ArchKind::ID, \
+ ARCH_ATTR},
+#include "AArch64TargetParser.def"
+};
+
+const ARM::ExtName AArch64ARCHExtNames[] = {
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
+ {NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE},
+#include "AArch64TargetParser.def"
+};
+
+const ARM::CpuNames<ArchKind> AArch64CPUNames[] = {
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ {NAME, sizeof(NAME) - 1, AArch64::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT},
+#include "AArch64TargetParser.def"
+};
+
+const ArchKind ArchKinds[] = {
+#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) \
+ ArchKind::ID,
+#include "AArch64TargetParser.def"
+};
+
+// FIXME: These should be moved to TargetTuple once it exists
+bool getExtensionFeatures(unsigned Extensions,
+ std::vector<StringRef> &Features);
+bool getArchFeatures(ArchKind AK, std::vector<StringRef> &Features);
+
+StringRef getArchName(ArchKind AK);
+unsigned getArchAttr(ArchKind AK);
+StringRef getCPUAttr(ArchKind AK);
+StringRef getSubArch(ArchKind AK);
+StringRef getArchExtName(unsigned ArchExtKind);
+StringRef getArchExtFeature(StringRef ArchExt);
+
+// Information by Name
+unsigned getDefaultFPU(StringRef CPU, ArchKind AK);
+unsigned getDefaultExtensions(StringRef CPU, ArchKind AK);
+StringRef getDefaultCPU(StringRef Arch);
+ArchKind getCPUArchKind(StringRef CPU);
+
+// Parser
+ArchKind parseArch(StringRef Arch);
+ArchExtKind parseArchExt(StringRef ArchExt);
+ArchKind parseCPUArch(StringRef CPU);
+// Used by target parser tests
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
+
+bool isX18ReservedByDefault(const Triple &TT);
+
+} // namespace AArch64
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/AMDGPUMetadata.h b/contrib/llvm/include/llvm/Support/AMDGPUMetadata.h
index 667fb3f3da43..84851c07499d 100644
--- a/contrib/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/contrib/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -431,6 +431,21 @@ std::error_code fromString(std::string String, Metadata &HSAMetadata);
/// Converts \p HSAMetadata to \p String.
std::error_code toString(Metadata HSAMetadata, std::string &String);
+//===----------------------------------------------------------------------===//
+// HSA metadata for v3 code object.
+//===----------------------------------------------------------------------===//
+namespace V3 {
+/// HSA metadata major version.
+constexpr uint32_t VersionMajor = 1;
+/// HSA metadata minor version.
+constexpr uint32_t VersionMinor = 0;
+
+/// HSA metadata beginning assembler directive.
+constexpr char AssemblerDirectiveBegin[] = ".amdgpu_metadata";
+/// HSA metadata ending assembler directive.
+constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_metadata";
+} // end namespace V3
+
} // end namespace HSAMD
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.def b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
index 78f5410fb733..9e844e2b464d 100644
--- a/contrib/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
@@ -106,6 +106,11 @@ ARM_ARCH("armv8.4-a", ARMV8_4A, "8.4-A", "v8.4a",
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD))
+ARM_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD))
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
FK_NEON_FP_ARMV8,
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -152,6 +157,8 @@ ARM_ARCH_EXT_NAME("iwmmxt", ARM::AEK_IWMMXT, nullptr, nullptr)
ARM_ARCH_EXT_NAME("iwmmxt2", ARM::AEK_IWMMXT2, nullptr, nullptr)
ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr, nullptr)
ARM_ARCH_EXT_NAME("xscale", ARM::AEK_XSCALE, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml")
+ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb")
#undef ARM_ARCH_EXT_NAME
#ifndef ARM_HW_DIV_NAME
@@ -202,10 +209,9 @@ ARM_CPU_NAME("arm926ej-s", ARMV5TEJ, FK_NONE, true, ARM::AEK_NONE)
ARM_CPU_NAME("arm1136j-s", ARMV6, FK_NONE, false, ARM::AEK_NONE)
ARM_CPU_NAME("arm1136jf-s", ARMV6, FK_VFPV2, true, ARM::AEK_NONE)
ARM_CPU_NAME("arm1136jz-s", ARMV6, FK_NONE, false, ARM::AEK_NONE)
-ARM_CPU_NAME("arm1176j-s", ARMV6K, FK_NONE, true, ARM::AEK_NONE)
-ARM_CPU_NAME("arm1176jz-s", ARMV6KZ, FK_NONE, false, ARM::AEK_NONE)
-ARM_CPU_NAME("mpcore", ARMV6K, FK_VFPV2, false, ARM::AEK_NONE)
+ARM_CPU_NAME("mpcore", ARMV6K, FK_VFPV2, true, ARM::AEK_NONE)
ARM_CPU_NAME("mpcorenovfp", ARMV6K, FK_NONE, false, ARM::AEK_NONE)
+ARM_CPU_NAME("arm1176jz-s", ARMV6KZ, FK_NONE, false, ARM::AEK_NONE)
ARM_CPU_NAME("arm1176jzf-s", ARMV6KZ, FK_VFPV2, true, ARM::AEK_NONE)
ARM_CPU_NAME("arm1156t2-s", ARMV6T2, FK_NONE, true, ARM::AEK_NONE)
ARM_CPU_NAME("arm1156t2f-s", ARMV6T2, FK_VFPV2, false, ARM::AEK_NONE)
@@ -260,7 +266,8 @@ ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
-ARM_CPU_NAME("exynos-m4", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
+ARM_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
// Non-standard Arch names.
ARM_CPU_NAME("iwmmxt", IWMMXT, FK_NONE, true, ARM::AEK_NONE)
diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.h b/contrib/llvm/include/llvm/Support/ARMTargetParser.h
new file mode 100644
index 000000000000..71acc0dc72d0
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.h
@@ -0,0 +1,264 @@
+//===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise ARM hardware features
+// such as FPU/CPU/ARCH/extensions and specific support such as HWDIV.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ARMTARGETPARSER_H
+#define LLVM_SUPPORT_ARMTARGETPARSER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include <vector>
+
+namespace llvm {
+namespace ARM {
+
+// Arch extension modifiers for CPUs.
+// Note that this is not the same as the AArch64 list
+enum ArchExtKind : unsigned {
+ AEK_INVALID = 0,
+ AEK_NONE = 1,
+ AEK_CRC = 1 << 1,
+ AEK_CRYPTO = 1 << 2,
+ AEK_FP = 1 << 3,
+ AEK_HWDIVTHUMB = 1 << 4,
+ AEK_HWDIVARM = 1 << 5,
+ AEK_MP = 1 << 6,
+ AEK_SIMD = 1 << 7,
+ AEK_SEC = 1 << 8,
+ AEK_VIRT = 1 << 9,
+ AEK_DSP = 1 << 10,
+ AEK_FP16 = 1 << 11,
+ AEK_RAS = 1 << 12,
+ AEK_SVE = 1 << 13,
+ AEK_DOTPROD = 1 << 14,
+ AEK_SHA2 = 1 << 15,
+ AEK_AES = 1 << 16,
+ AEK_FP16FML = 1 << 17,
+ AEK_SB = 1 << 18,
+ // Unsupported extensions.
+ AEK_OS = 0x8000000,
+ AEK_IWMMXT = 0x10000000,
+ AEK_IWMMXT2 = 0x20000000,
+ AEK_MAVERICK = 0x40000000,
+ AEK_XSCALE = 0x80000000,
+};
+
+// List of Arch Extension names.
+// FIXME: TableGen this.
+struct ExtName {
+ const char *NameCStr;
+ size_t NameLength;
+ unsigned ID;
+ const char *Feature;
+ const char *NegFeature;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+
+const ExtName ARCHExtNames[] = {
+#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
+ {NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE},
+#include "ARMTargetParser.def"
+};
+
+// List of HWDiv names (use getHWDivSynonym) and which architectural
+// features they correspond to (use getHWDivFeatures).
+// FIXME: TableGen this.
+const struct {
+ const char *NameCStr;
+ size_t NameLength;
+ unsigned ID;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+} HWDivNames[] = {
+#define ARM_HW_DIV_NAME(NAME, ID) {NAME, sizeof(NAME) - 1, ID},
+#include "ARMTargetParser.def"
+};
+
+// Arch names.
+enum class ArchKind {
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
+#include "ARMTargetParser.def"
+};
+
+// List of CPU names and their arches.
+// The same CPU can have multiple arches and can be default on multiple arches.
+// When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
+// When this becomes table-generated, we'd probably need two tables.
+// FIXME: TableGen this.
+template <typename T> struct CpuNames {
+ const char *NameCStr;
+ size_t NameLength;
+ T ArchID;
+ bool Default; // is $Name the default CPU for $ArchID ?
+ unsigned DefaultExtensions;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+
+const CpuNames<ArchKind> CPUNames[] = {
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ {NAME, sizeof(NAME) - 1, ARM::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT},
+#include "ARMTargetParser.def"
+};
+
+// FPU names.
+enum FPUKind {
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) KIND,
+#include "ARMTargetParser.def"
+ FK_LAST
+};
+
+// FPU Version
+enum class FPUVersion {
+ NONE,
+ VFPV2,
+ VFPV3,
+ VFPV3_FP16,
+ VFPV4,
+ VFPV5
+};
+
+// An FPU name restricts the FPU in one of three ways:
+enum class FPURestriction {
+ None = 0, ///< No restriction
+ D16, ///< Only 16 D registers
+ SP_D16 ///< Only single-precision instructions, with 16 D registers
+};
+
+// An FPU name implies one of three levels of Neon support:
+enum class NeonSupportLevel {
+ None = 0, ///< No Neon
+ Neon, ///< Neon
+ Crypto ///< Neon with Crypto
+};
+
+// ISA kinds.
+enum class ISAKind { INVALID = 0, ARM, THUMB, AARCH64 };
+
+// Endianness
+// FIXME: BE8 vs. BE32?
+enum class EndianKind { INVALID = 0, LITTLE, BIG };
+
+// v6/v7/v8 Profile
+enum class ProfileKind { INVALID = 0, A, R, M };
+
+// List of canonical FPU names (use getFPUSynonym) and which architectural
+// features they correspond to (use getFPUFeatures).
+// FIXME: TableGen this.
+// The entries must appear in the order listed in ARM::FPUKind for correct
+// indexing
+struct FPUName {
+ const char *NameCStr;
+ size_t NameLength;
+ FPUKind ID;
+ FPUVersion FPUVer;
+ NeonSupportLevel NeonSupport;
+ FPURestriction Restriction;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+
+static const FPUName FPUNames[] = {
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) \
+ {NAME, sizeof(NAME) - 1, KIND, VERSION, NEON_SUPPORT, RESTRICTION},
+#include "llvm/Support/ARMTargetParser.def"
+};
+
+// List of canonical arch names (use getArchSynonym).
+// This table also provides the build attribute fields for CPU arch
+// and Arch ID, according to the Addenda to the ARM ABI, chapters
+// 2.4 and 2.3.5.2 respectively.
+// FIXME: SubArch values were simplified to fit into the expectations
+// of the triples and are not conforming with their official names.
+// Check to see if the expectation should be changed.
+// FIXME: TableGen this.
+template <typename T> struct ArchNames {
+ const char *NameCStr;
+ size_t NameLength;
+ const char *CPUAttrCStr;
+ size_t CPUAttrLength;
+ const char *SubArchCStr;
+ size_t SubArchLength;
+ unsigned DefaultFPU;
+ unsigned ArchBaseExtensions;
+ T ID;
+ ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+
+ // CPU class in build attributes.
+ StringRef getCPUAttr() const { return StringRef(CPUAttrCStr, CPUAttrLength); }
+
+ // Sub-Arch name.
+ StringRef getSubArch() const { return StringRef(SubArchCStr, SubArchLength); }
+};
+
+static const ArchNames<ArchKind> ARCHNames[] = {
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, \
+ ARCH_BASE_EXT) \
+ {NAME, sizeof(NAME) - 1, \
+ CPU_ATTR, sizeof(CPU_ATTR) - 1, \
+ SUB_ARCH, sizeof(SUB_ARCH) - 1, \
+ ARCH_FPU, ARCH_BASE_EXT, \
+ ArchKind::ID, ARCH_ATTR},
+#include "llvm/Support/ARMTargetParser.def"
+};
+
+// Information by ID
+StringRef getFPUName(unsigned FPUKind);
+FPUVersion getFPUVersion(unsigned FPUKind);
+NeonSupportLevel getFPUNeonSupportLevel(unsigned FPUKind);
+FPURestriction getFPURestriction(unsigned FPUKind);
+
+// FIXME: These should be moved to TargetTuple once it exists
+bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
+bool getHWDivFeatures(unsigned HWDivKind, std::vector<StringRef> &Features);
+bool getExtensionFeatures(unsigned Extensions,
+ std::vector<StringRef> &Features);
+
+StringRef getArchName(ArchKind AK);
+unsigned getArchAttr(ArchKind AK);
+StringRef getCPUAttr(ArchKind AK);
+StringRef getSubArch(ArchKind AK);
+StringRef getArchExtName(unsigned ArchExtKind);
+StringRef getArchExtFeature(StringRef ArchExt);
+StringRef getHWDivName(unsigned HWDivKind);
+
+// Information by Name
+unsigned getDefaultFPU(StringRef CPU, ArchKind AK);
+unsigned getDefaultExtensions(StringRef CPU, ArchKind AK);
+StringRef getDefaultCPU(StringRef Arch);
+StringRef getCanonicalArchName(StringRef Arch);
+StringRef getFPUSynonym(StringRef FPU);
+StringRef getArchSynonym(StringRef Arch);
+
+// Parser
+unsigned parseHWDiv(StringRef HWDiv);
+unsigned parseFPU(StringRef FPU);
+ArchKind parseArch(StringRef Arch);
+unsigned parseArchExt(StringRef ArchExt);
+ArchKind parseCPUArch(StringRef CPU);
+ISAKind parseArchISA(StringRef Arch);
+EndianKind parseArchEndian(StringRef Arch);
+ProfileKind parseArchProfile(StringRef Arch);
+unsigned parseArchVersion(StringRef Arch);
+
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
+StringRef computeDefaultTargetABI(const Triple &TT, StringRef CPU);
+
+} // namespace ARM
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/ARMWinEH.h b/contrib/llvm/include/llvm/Support/ARMWinEH.h
index 1463629f45dc..60174503ad49 100644
--- a/contrib/llvm/include/llvm/Support/ARMWinEH.h
+++ b/contrib/llvm/include/llvm/Support/ARMWinEH.h
@@ -207,6 +207,8 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
/// ExceptionDataRecord - An entry in the table of exception data (.xdata)
///
+/// The format on ARM is:
+///
/// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
/// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
/// +-------+---------+-+-+-+---+-----------------------------------+
@@ -215,6 +217,16 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
/// | Reserved |Ex. Code Words| (Extended Epilogue Count) |
/// +-------+--------+--------------+-------------------------------+
///
+/// The format on ARM64 is:
+///
+/// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+/// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+/// +---------+---------+-+-+---+-----------------------------------+
+/// | C Wrd | Epi Cnt |E|X|Ver| Function Length |
+/// +---------+------+--'-'-'---'---+-------------------------------+
+/// | Reserved |Ex. Code Words| (Extended Epilogue Count) |
+/// +-------+--------+--------------+-------------------------------+
+///
/// Function Length : 18-bit field indicating the total length of the function
/// in bytes divided by 2. If a function is larger than
/// 512KB, then multiple pdata and xdata records must be used.
@@ -225,7 +237,7 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
/// header
/// F : 1-bit field indicating that the record describes a function fragment
/// (implies that no prologue is present, and prologue processing should be
-/// skipped)
+/// skipped) (ARM only)
/// Epilogue Count : 5-bit field that differs in meaning based on the E field.
///
/// If E is set, then this field specifies the index of the
@@ -235,33 +247,43 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
/// scopes. If more than 31 scopes exist, then this field and
/// the Code Words field must both be set to 0 to indicate that
/// an extension word is required.
-/// Code Words : 4-bit field that species the number of 32-bit words needed to
-/// contain all the unwind codes. If more than 15 words (63 code
-/// bytes) are required, then this field and the Epilogue Count
-/// field must both be set to 0 to indicate that an extension word
-/// is required.
+/// Code Words : 4-bit (5-bit on ARM64) field that specifies the number of
+/// 32-bit words needed to contain all the unwind codes. If more
+/// than 15 words (31 words on ARM64) are required, then this field
+/// and the Epilogue Count field must both be set to 0 to indicate
+/// that an extension word is required.
/// Extended Epilogue Count, Extended Code Words :
/// Valid only if Epilog Count and Code Words are both
/// set to 0. Provides an 8-bit extended code word
/// count and 16-bits for epilogue count
///
+/// The epilogue scope format on ARM is:
+///
/// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
/// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
/// +----------------+------+---+---+-------------------------------+
/// | Ep Start Idx | Cond |Res| Epilogue Start Offset |
/// +----------------+------+---+-----------------------------------+
///
+/// The epilogue scope format on ARM64 is:
+///
+/// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+/// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+/// +-------------------+-------+---+-------------------------------+
+/// | Ep Start Idx | Res | Epilogue Start Offset |
+/// +-------------------+-------+-----------------------------------+
+///
/// If the E bit is unset in the header, the header is followed by a series of
/// epilogue scopes, which are sorted by their offset.
///
/// Epilogue Start Offset: 18-bit field encoding the offset of epilogue relative
/// to the start of the function in bytes divided by two
/// Res : 2-bit field reserved for future expansion (must be set to 0)
-/// Condition : 4-bit field providing the condition under which the epilogue is
-/// executed. Unconditional epilogues should set this field to 0xe.
-/// Epilogues must be entirely conditional or unconditional, and in
-/// Thumb-2 mode. The epilogue beings with the first instruction
-/// after the IT opcode.
+/// Condition : (ARM only) 4-bit field providing the condition under which the
+/// epilogue is executed. Unconditional epilogues should set this
+/// field to 0xe. Epilogues must be entirely conditional or
+/// unconditional, and in Thumb-2 mode. The epilogue begins with
+/// the first instruction after the IT opcode.
/// Epilogue Start Index : 8-bit field indicating the byte index of the first
/// unwind code describing the epilogue
///
@@ -293,18 +315,33 @@ struct EpilogueScope {
const support::ulittle32_t ES;
EpilogueScope(const support::ulittle32_t Data) : ES(Data) {}
+ // Same for both ARM and AArch64.
uint32_t EpilogueStartOffset() const {
return (ES & 0x0003ffff);
}
- uint8_t Res() const {
+
+ // Different implementations for ARM and AArch64.
+ uint8_t ResARM() const {
return ((ES & 0x000c0000) >> 18);
}
+
+ uint8_t ResAArch64() const {
+ return ((ES & 0x000f0000) >> 18);
+ }
+
+ // Condition is only applicable to ARM.
uint8_t Condition() const {
return ((ES & 0x00f00000) >> 20);
}
- uint8_t EpilogueStartIndex() const {
+
+ // Different implementations for ARM and AArch64.
+ uint8_t EpilogueStartIndexARM() const {
return ((ES & 0xff000000) >> 24);
}
+
+ uint16_t EpilogueStartIndexAArch64() const {
+ return ((ES & 0xffc00000) >> 22);
+ }
};
struct ExceptionDataRecord;
@@ -312,13 +349,23 @@ inline size_t HeaderWords(const ExceptionDataRecord &XR);
struct ExceptionDataRecord {
const support::ulittle32_t *Data;
+ bool isAArch64;
- ExceptionDataRecord(const support::ulittle32_t *Data) : Data(Data) {}
+ ExceptionDataRecord(const support::ulittle32_t *Data, bool isAArch64) :
+ Data(Data), isAArch64(isAArch64) {}
uint32_t FunctionLength() const {
return (Data[0] & 0x0003ffff);
}
+ uint32_t FunctionLengthInBytesARM() const {
+ return FunctionLength() << 1;
+ }
+
+ uint32_t FunctionLengthInBytesAArch64() const {
+ return FunctionLength() << 2;
+ }
+
uint8_t Vers() const {
return (Data[0] & 0x000C0000) >> 18;
}
@@ -332,18 +379,25 @@ struct ExceptionDataRecord {
}
bool F() const {
+ assert(!isAArch64 && "Fragments are only supported on ARMv7 WinEH");
return ((Data[0] & 0x00400000) >> 22);
}
uint8_t EpilogueCount() const {
- if (HeaderWords(*this) == 1)
+ if (HeaderWords(*this) == 1) {
+ if (isAArch64)
+ return (Data[0] & 0x07C00000) >> 22;
return (Data[0] & 0x0f800000) >> 23;
+ }
return Data[1] & 0x0000ffff;
}
uint8_t CodeWords() const {
- if (HeaderWords(*this) == 1)
+ if (HeaderWords(*this) == 1) {
+ if (isAArch64)
+ return (Data[0] & 0xf8000000) >> 27;
return (Data[0] & 0xf0000000) >> 28;
+ }
return (Data[1] & 0x00ff0000) >> 16;
}
@@ -373,6 +427,8 @@ struct ExceptionDataRecord {
};
inline size_t HeaderWords(const ExceptionDataRecord &XR) {
+ if (XR.isAArch64)
+ return (XR.Data[0] & 0xffc00000) ? 1 : 2;
return (XR.Data[0] & 0xff800000) ? 1 : 2;
}
}
diff --git a/contrib/llvm/include/llvm/Support/Allocator.h b/contrib/llvm/include/llvm/Support/Allocator.h
index 184ac491b1f1..42d08378a677 100644
--- a/contrib/llvm/include/llvm/Support/Allocator.h
+++ b/contrib/llvm/include/llvm/Support/Allocator.h
@@ -21,6 +21,7 @@
#ifndef LLVM_SUPPORT_ALLOCATOR_H
#define LLVM_SUPPORT_ALLOCATOR_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -283,6 +284,60 @@ public:
size_t GetNumSlabs() const { return Slabs.size() + CustomSizedSlabs.size(); }
+ /// \return An index uniquely and reproducibly identifying
+ /// an input pointer \p Ptr in the given allocator.
+ /// The returned value is negative iff the object is inside a custom-size
+ /// slab.
+ /// Returns an empty optional if the pointer is not found in the allocator.
+ llvm::Optional<int64_t> identifyObject(const void *Ptr) {
+ const char *P = static_cast<const char *>(Ptr);
+ int64_t InSlabIdx = 0;
+ for (size_t Idx = 0, E = Slabs.size(); Idx < E; Idx++) {
+ const char *S = static_cast<const char *>(Slabs[Idx]);
+ if (P >= S && P < S + computeSlabSize(Idx))
+ return InSlabIdx + static_cast<int64_t>(P - S);
+ InSlabIdx += static_cast<int64_t>(computeSlabSize(Idx));
+ }
+
+ // Use negative index to denote custom sized slabs.
+ int64_t InCustomSizedSlabIdx = -1;
+ for (size_t Idx = 0, E = CustomSizedSlabs.size(); Idx < E; Idx++) {
+ const char *S = static_cast<const char *>(CustomSizedSlabs[Idx].first);
+ size_t Size = CustomSizedSlabs[Idx].second;
+ if (P >= S && P < S + Size)
+ return InCustomSizedSlabIdx - static_cast<int64_t>(P - S);
+ InCustomSizedSlabIdx -= static_cast<int64_t>(Size);
+ }
+ return None;
+ }
+
+ /// A wrapper around identifyObject that additionally asserts that
+ /// the object is indeed within the allocator.
+ /// \return An index uniquely and reproducibly identifying
+ /// an input pointer \p Ptr in the given allocator.
+ int64_t identifyKnownObject(const void *Ptr) {
+ Optional<int64_t> Out = identifyObject(Ptr);
+ assert(Out && "Wrong allocator used");
+ return *Out;
+ }
+
+ /// A wrapper around identifyKnownObject. Accepts type information
+ /// about the object and produces a smaller identifier by relying on
+ /// the alignment information. Note that sub-classes may have different
+ /// alignment, so the most base class should be passed as template parameter
+ /// in order to obtain correct results. For that reason automatic template
+ /// parameter deduction is disabled.
+ /// \return An index uniquely and reproducibly identifying
+ /// an input pointer \p Ptr in the given allocator. This identifier is
+ /// different from the ones produced by identifyObject and
+ /// identifyAlignedObject.
+ template <typename T>
+ int64_t identifyKnownAlignedObject(const void *Ptr) {
+ int64_t Out = identifyKnownObject(Ptr);
+ assert(Out % alignof(T) == 0 && "Wrong alignment information");
+ return Out / alignof(T);
+ }
+
size_t getTotalMemory() const {
size_t TotalMemory = 0;
for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I)
diff --git a/contrib/llvm/include/llvm/Support/BinaryStreamArray.h b/contrib/llvm/include/llvm/Support/BinaryStreamArray.h
index d1571cb37fc6..7c110fcb6a4b 100644
--- a/contrib/llvm/include/llvm/Support/BinaryStreamArray.h
+++ b/contrib/llvm/include/llvm/Support/BinaryStreamArray.h
@@ -96,21 +96,32 @@ public:
explicit VarStreamArray(const Extractor &E) : E(E) {}
- explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {}
+ explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
+ : Stream(Stream), Skew(Skew) {}
- VarStreamArray(BinaryStreamRef Stream, const Extractor &E)
- : Stream(Stream), E(E) {}
+ VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
+ : Stream(Stream), E(E), Skew(Skew) {}
Iterator begin(bool *HadError = nullptr) const {
- return Iterator(*this, E, HadError);
+ return Iterator(*this, E, Skew, nullptr);
}
bool valid() const { return Stream.valid(); }
+ uint32_t skew() const { return Skew; }
Iterator end() const { return Iterator(E); }
bool empty() const { return Stream.getLength() == 0; }
+ VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
+ uint32_t End) const {
+ assert(Begin >= Skew);
+ // We should never cut off the beginning of the stream since it might be
+ // skewed, meaning the initial bytes are important.
+ BinaryStreamRef NewStream = Stream.slice(0, End);
+ return {NewStream, E, Begin};
+ }
+
/// given an offset into the array's underlying stream, return an
/// iterator to the record at that offset. This is considered unsafe
/// since the behavior is undefined if \p Offset does not refer to the
@@ -123,11 +134,17 @@ public:
Extractor &getExtractor() { return E; }
BinaryStreamRef getUnderlyingStream() const { return Stream; }
- void setUnderlyingStream(BinaryStreamRef S) { Stream = S; }
+ void setUnderlyingStream(BinaryStreamRef S, uint32_t Skew = 0) {
+ Stream = S;
+ this->Skew = Skew;
+ }
+
+ void drop_front() { Skew += begin()->length(); }
private:
BinaryStreamRef Stream;
Extractor E;
+ uint32_t Skew;
};
template <typename ValueType, typename Extractor>
@@ -139,10 +156,6 @@ class VarStreamArrayIterator
public:
VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
- bool *HadError)
- : VarStreamArrayIterator(Array, E, 0, HadError) {}
-
- VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
uint32_t Offset, bool *HadError)
: IterRef(Array.Stream.drop_front(Offset)), Extract(E),
Array(&Array), AbsOffset(Offset), HadError(HadError) {
diff --git a/contrib/llvm/include/llvm/Support/BinaryStreamReader.h b/contrib/llvm/include/llvm/Support/BinaryStreamReader.h
index fe77b550c453..392958de30d5 100644
--- a/contrib/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/contrib/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -203,11 +203,12 @@ public:
/// \returns a success error code if the data was successfully read, otherwise
/// returns an appropriate error code.
template <typename T, typename U>
- Error readArray(VarStreamArray<T, U> &Array, uint32_t Size) {
+ Error readArray(VarStreamArray<T, U> &Array, uint32_t Size,
+ uint32_t Skew = 0) {
BinaryStreamRef S;
if (auto EC = readStreamRef(S, Size))
return EC;
- Array.setUnderlyingStream(S);
+ Array.setUnderlyingStream(S, Skew);
return Error::success();
}
diff --git a/contrib/llvm/include/llvm/Support/BuryPointer.h b/contrib/llvm/include/llvm/Support/BuryPointer.h
new file mode 100644
index 000000000000..53f1f395b922
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/BuryPointer.h
@@ -0,0 +1,30 @@
+//===- llvm/Support/BuryPointer.h - Memory Manipulation/Leak ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_BURYPOINTER_H
+#define LLVM_SUPPORT_BURYPOINTER_H
+
+#include <memory>
+
+namespace llvm {
+
+// In tools that will exit soon anyway, going through the process of explicitly
+// deallocating resources can be unnecessary - better to leak the resources and
+// let the OS clean them up when the process ends. Use this function to ensure
+// the memory is not misdiagnosed as an unintentional leak by leak detection
+// tools (this is achieved by preserving pointers to the object in a globally
+// visible array).
+void BuryPointer(const void *Ptr);
+template <typename T> void BuryPointer(std::unique_ptr<T> Ptr) {
+ BuryPointer(Ptr.release());
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/CFGUpdate.h b/contrib/llvm/include/llvm/Support/CFGUpdate.h
new file mode 100644
index 000000000000..63c24a3d2a20
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/CFGUpdate.h
@@ -0,0 +1,118 @@
+//===- CFGUpdate.h - Encode a CFG Edge Update. ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a CFG Edge Update: Insert or Delete, and two Nodes as the
+// Edge ends.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CFGUPDATE_H
+#define LLVM_SUPPORT_CFGUPDATE_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace cfg {
+enum class UpdateKind : unsigned char { Insert, Delete };
+
+template <typename NodePtr> class Update {
+ using NodeKindPair = PointerIntPair<NodePtr, 1, UpdateKind>;
+ NodePtr From;
+ NodeKindPair ToAndKind;
+
+public:
+ Update(UpdateKind Kind, NodePtr From, NodePtr To)
+ : From(From), ToAndKind(To, Kind) {}
+
+ UpdateKind getKind() const { return ToAndKind.getInt(); }
+ NodePtr getFrom() const { return From; }
+ NodePtr getTo() const { return ToAndKind.getPointer(); }
+ bool operator==(const Update &RHS) const {
+ return From == RHS.From && ToAndKind == RHS.ToAndKind;
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << (getKind() == UpdateKind::Insert ? "Insert " : "Delete ");
+ getFrom()->printAsOperand(OS, false);
+ OS << " -> ";
+ getTo()->printAsOperand(OS, false);
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
+};
+
+// LegalizeUpdates function simplifies updates assuming a graph structure.
+// This function serves double purpose:
+// a) It removes redundant updates, which makes it easier to reverse-apply
+// them when traversing CFG.
+// b) It optimizes away updates that cancel each other out, as the end result
+// is the same.
+template <typename NodePtr>
+void LegalizeUpdates(ArrayRef<Update<NodePtr>> AllUpdates,
+ SmallVectorImpl<Update<NodePtr>> &Result,
+ bool InverseGraph) {
+ // Count the total number of inserions of each edge.
+ // Each insertion adds 1 and deletion subtracts 1. The end number should be
+ // one of {-1 (deletion), 0 (NOP), +1 (insertion)}. Otherwise, the sequence
+ // of updates contains multiple updates of the same kind and we assert for
+ // that case.
+ SmallDenseMap<std::pair<NodePtr, NodePtr>, int, 4> Operations;
+ Operations.reserve(AllUpdates.size());
+
+ for (const auto &U : AllUpdates) {
+ NodePtr From = U.getFrom();
+ NodePtr To = U.getTo();
+ if (InverseGraph)
+ std::swap(From, To); // Reverse edge for postdominators.
+
+ Operations[{From, To}] += (U.getKind() == UpdateKind::Insert ? 1 : -1);
+ }
+
+ Result.clear();
+ Result.reserve(Operations.size());
+ for (auto &Op : Operations) {
+ const int NumInsertions = Op.second;
+ assert(std::abs(NumInsertions) <= 1 && "Unbalanced operations!");
+ if (NumInsertions == 0)
+ continue;
+ const UpdateKind UK =
+ NumInsertions > 0 ? UpdateKind::Insert : UpdateKind::Delete;
+ Result.push_back({UK, Op.first.first, Op.first.second});
+ }
+
+ // Make the order consistent by not relying on pointer values within the
+ // set. Reuse the old Operations map.
+ // In the future, we should sort by something else to minimize the amount
+ // of work needed to perform the series of updates.
+ for (size_t i = 0, e = AllUpdates.size(); i != e; ++i) {
+ const auto &U = AllUpdates[i];
+ if (!InverseGraph)
+ Operations[{U.getFrom(), U.getTo()}] = int(i);
+ else
+ Operations[{U.getTo(), U.getFrom()}] = int(i);
+ }
+
+ llvm::sort(Result,
+ [&Operations](const Update<NodePtr> &A, const Update<NodePtr> &B) {
+ return Operations[{A.getFrom(), A.getTo()}] >
+ Operations[{B.getFrom(), B.getTo()}];
+ });
+}
+
+} // end namespace cfg
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_CFGUPDATE_H
diff --git a/contrib/llvm/include/llvm/Support/Chrono.h b/contrib/llvm/include/llvm/Support/Chrono.h
index 994068af3771..57677e8d5cf1 100644
--- a/contrib/llvm/include/llvm/Support/Chrono.h
+++ b/contrib/llvm/include/llvm/Support/Chrono.h
@@ -47,6 +47,14 @@ toTimePoint(std::time_t T) {
return time_point_cast<seconds>(system_clock::from_time_t(T));
}
+/// Convert a std::time_t + nanoseconds to a TimePoint
+LLVM_ATTRIBUTE_ALWAYS_INLINE inline TimePoint<>
+toTimePoint(std::time_t T, uint32_t nsec) {
+ using namespace std::chrono;
+ return time_point_cast<nanoseconds>(system_clock::from_time_t(T))
+ + nanoseconds(nsec);
+}
+
} // namespace sys
raw_ostream &operator<<(raw_ostream &OS, sys::TimePoint<> TP);
diff --git a/contrib/llvm/include/llvm/Support/CodeGen.h b/contrib/llvm/include/llvm/Support/CodeGen.h
index 5f9e33129587..22e74167266c 100644
--- a/contrib/llvm/include/llvm/Support/CodeGen.h
+++ b/contrib/llvm/include/llvm/Support/CodeGen.h
@@ -25,7 +25,7 @@ namespace llvm {
// Code model types.
namespace CodeModel {
// Sync changes with CodeGenCWrappers.h.
- enum Model { Small, Kernel, Medium, Large };
+ enum Model { Tiny, Small, Kernel, Medium, Large };
}
namespace PICLevel {
@@ -57,6 +57,11 @@ namespace llvm {
};
}
+ // Specify effect of frame pointer elimination optimization.
+ namespace FramePointer {
+ enum FP {All, NonLeaf, None};
+ }
+
} // end llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Support/CommandLine.h b/contrib/llvm/include/llvm/Support/CommandLine.h
index 799b41fbf8b0..a8ad89384d17 100644
--- a/contrib/llvm/include/llvm/Support/CommandLine.h
+++ b/contrib/llvm/include/llvm/Support/CommandLine.h
@@ -56,9 +56,18 @@ namespace cl {
// Returns true on success. Otherwise, this will print the error message to
// stderr and exit if \p Errs is not set (nullptr by default), or print the
// error message to \p Errs and return false if \p Errs is provided.
+//
+// If EnvVar is not nullptr, command-line options are also parsed from the
+// environment variable named by EnvVar. Precedence is given to occurrences
+// from argv. This precedence is currently implemented by parsing argv after
+// the environment variable, so it is only implemented correctly for options
+// that give precedence to later occurrences. If your program supports options
+// that give precedence to earlier occurrences, you will need to extend this
+// function to support it correctly.
bool ParseCommandLineOptions(int argc, const char *const *argv,
StringRef Overview = "",
- raw_ostream *Errs = nullptr);
+ raw_ostream *Errs = nullptr,
+ const char *EnvVar = nullptr);
//===----------------------------------------------------------------------===//
// ParseEnvironmentOptions - Environment variable option processing alternate
@@ -147,6 +156,9 @@ enum OptionHidden { // Control whether -help shows this option
// enabled, and used, the value for the flag comes from the suffix of the
// argument.
//
+// AlwaysPrefix - Only allow the behavior enabled by the Prefix flag and reject
+// the Option=Value form.
+//
// Grouping - With this option enabled, multiple letter options are allowed to
// bunch together with only a single hyphen for the whole group. This allows
// emulation of the behavior that ls uses for example: ls -la === ls -l -a
@@ -156,7 +168,8 @@ enum FormattingFlags {
NormalFormatting = 0x00, // Nothing special
Positional = 0x01, // Is a positional argument, no '-' required
Prefix = 0x02, // Can this option directly prefix its value?
- Grouping = 0x03 // Can this option group with other options?
+ AlwaysPrefix = 0x03, // Can this option only directly prefix its value?
+ Grouping = 0x04 // Can this option group with other options?
};
enum MiscFlags { // Miscellaneous flags to adjust argument
@@ -256,7 +269,7 @@ class Option {
// detail representing the non-value
unsigned Value : 2;
unsigned HiddenFlag : 2; // enum OptionHidden
- unsigned Formatting : 2; // enum FormattingFlags
+ unsigned Formatting : 3; // enum FormattingFlags
unsigned Misc : 3;
unsigned Position = 0; // Position of last occurrence of the option
unsigned AdditionalVals = 0; // Greater than 0 for multi-valued option.
diff --git a/contrib/llvm/include/llvm/Support/Compiler.h b/contrib/llvm/include/llvm/Support/Compiler.h
index 4de815fe61d7..14e4d6e97140 100644
--- a/contrib/llvm/include/llvm/Support/Compiler.h
+++ b/contrib/llvm/include/llvm/Support/Compiler.h
@@ -133,6 +133,19 @@
#define LLVM_NODISCARD
#endif
+// Indicate that a non-static, non-const C++ member function reinitializes
+// the entire object to a known state, independent of the previous state of
+// the object.
+//
+// The clang-tidy check bugprone-use-after-move recognizes this attribute as a
+// marker that a moved-from object has left the indeterminate state and can be
+// reused.
+#if __has_cpp_attribute(clang::reinitializes)
+#define LLVM_ATTRIBUTE_REINITIALIZES [[clang::reinitializes]]
+#else
+#define LLVM_ATTRIBUTE_REINITIALIZES
+#endif
+
// Some compilers warn about unused functions. When a function is sometimes
// used or not depending on build settings (e.g. a function only called from
// within "assert"), this attribute can be used to suppress such warnings.
@@ -519,7 +532,7 @@ namespace llvm {
/// reduced default alignment.
inline void *allocate_buffer(size_t Size, size_t Alignment) {
return ::operator new(Size
-#if __cpp_aligned_new
+#ifdef __cpp_aligned_new
,
std::align_val_t(Alignment)
#endif
@@ -535,11 +548,11 @@ inline void *allocate_buffer(size_t Size, size_t Alignment) {
/// most likely using the above helper.
inline void deallocate_buffer(void *Ptr, size_t Size, size_t Alignment) {
::operator delete(Ptr
-#if __cpp_sized_deallocation
+#ifdef __cpp_sized_deallocation
,
Size
#endif
-#if __cpp_aligned_new
+#ifdef __cpp_aligned_new
,
std::align_val_t(Alignment)
#endif
diff --git a/contrib/llvm/include/llvm/Support/Compression.h b/contrib/llvm/include/llvm/Support/Compression.h
index 2d191abe4b1a..f7258f4bf8f8 100644
--- a/contrib/llvm/include/llvm/Support/Compression.h
+++ b/contrib/llvm/include/llvm/Support/Compression.h
@@ -23,17 +23,15 @@ class StringRef;
namespace zlib {
-enum CompressionLevel {
- NoCompression,
- DefaultCompression,
- BestSpeedCompression,
- BestSizeCompression
-};
+static constexpr int NoCompression = 0;
+static constexpr int BestSpeedCompression = 1;
+static constexpr int DefaultCompression = 6;
+static constexpr int BestSizeCompression = 9;
bool isAvailable();
Error compress(StringRef InputBuffer, SmallVectorImpl<char> &CompressedBuffer,
- CompressionLevel Level = DefaultCompression);
+ int Level = DefaultCompression);
Error uncompress(StringRef InputBuffer, char *UncompressedBuffer,
size_t &UncompressedSize);
@@ -49,4 +47,3 @@ uint32_t crc32(StringRef Buffer);
} // End of namespace llvm
#endif
-
diff --git a/contrib/llvm/include/llvm/Support/Debug.h b/contrib/llvm/include/llvm/Support/Debug.h
index 980abfb0e8da..df86dbb82414 100644
--- a/contrib/llvm/include/llvm/Support/Debug.h
+++ b/contrib/llvm/include/llvm/Support/Debug.h
@@ -94,6 +94,10 @@ extern bool VerifyDomInfo;
///
extern bool VerifyLoopInfo;
+/// Enables verification of MemorySSA.
+///
+extern bool VerifyMemorySSA;
+
///\}
/// EnableDebugBuffering - This defaults to false. If true, the debug
diff --git a/contrib/llvm/include/llvm/Support/DebugCounter.h b/contrib/llvm/include/llvm/Support/DebugCounter.h
index 83bd5a06c94a..6eadd5c6aeff 100644
--- a/contrib/llvm/include/llvm/Support/DebugCounter.h
+++ b/contrib/llvm/include/llvm/Support/DebugCounter.h
@@ -55,6 +55,8 @@ namespace llvm {
class DebugCounter {
public:
+ ~DebugCounter();
+
/// Returns a reference to the singleton instance.
static DebugCounter &instance();
diff --git a/contrib/llvm/include/llvm/Support/Error.h b/contrib/llvm/include/llvm/Support/Error.h
index 8015cab45a06..ee2cbeec97a8 100644
--- a/contrib/llvm/include/llvm/Support/Error.h
+++ b/contrib/llvm/include/llvm/Support/Error.h
@@ -14,8 +14,9 @@
#ifndef LLVM_SUPPORT_ERROR_H
#define LLVM_SUPPORT_ERROR_H
-#include "llvm/ADT/SmallVector.h"
+#include "llvm-c/Error.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/abi-breaking.h"
@@ -155,9 +156,10 @@ private:
/// they're moved-assigned or constructed from Success values that have already
/// been checked. This enforces checking through all levels of the call stack.
class LLVM_NODISCARD Error {
- // ErrorList needs to be able to yank ErrorInfoBase pointers out of this
- // class to add to the error list.
+ // Both ErrorList and FileError need to be able to yank ErrorInfoBase
+ // pointers out of this class to add to the error list.
friend class ErrorList;
+ friend class FileError;
// handleErrors needs to be able to set the Checked flag.
template <typename... HandlerTs>
@@ -167,6 +169,9 @@ class LLVM_NODISCARD Error {
// error.
template <typename T> friend class Expected;
+ // wrap needs to be able to steal the payload.
+ friend LLVMErrorRef wrap(Error);
+
protected:
/// Create a success value. Prefer using 'Error::success()' for readability
Error() {
@@ -317,7 +322,7 @@ private:
/// Subclass of Error for the sole purpose of identifying the success path in
/// the type system. This allows to catch invalid conversion to Expected<T> at
/// compile time.
-class ErrorSuccess : public Error {};
+class ErrorSuccess final : public Error {};
inline ErrorSuccess Error::success() { return ErrorSuccess(); }
@@ -339,6 +344,8 @@ template <typename ErrT, typename... ArgTs> Error make_error(ArgTs &&... Args) {
template <typename ThisErrT, typename ParentErrT = ErrorInfoBase>
class ErrorInfo : public ParentErrT {
public:
+ using ParentErrT::ParentErrT; // inherit constructors
+
static const void *classID() { return &ThisErrT::ID; }
const void *dynamicClassID() const override { return &ThisErrT::ID; }
@@ -946,10 +953,14 @@ Expected<T> handleExpected(Expected<T> ValOrErr, RecoveryFtor &&RecoveryPath,
/// will be printed before the first one is logged. A newline will be printed
/// after each error.
///
+/// This function is compatible with the helpers from Support/WithColor.h. You
+/// can pass any of them as the OS. Please consider using them instead of
+/// including 'error: ' in the ErrorBanner.
+///
/// This is useful in the base level of your program to allow clean termination
/// (allowing clean deallocation of resources, etc.), while reporting error
/// information to the user.
-void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner);
+void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner = {});
/// Write all error messages (if any) in E to a string. The newline character
/// is used to separate error messages.
@@ -1055,6 +1066,8 @@ private:
class ECError : public ErrorInfo<ECError> {
friend Error errorCodeToError(std::error_code);
+ virtual void anchor() override;
+
public:
void setErrorCode(std::error_code EC) { this->EC = EC; }
std::error_code convertToErrorCode() const override { return EC; }
@@ -1106,10 +1119,33 @@ template <typename T> ErrorOr<T> expectedToErrorOr(Expected<T> &&E) {
/// StringError is useful in cases where the client is not expected to be able
/// to consume the specific error message programmatically (for example, if the
/// error message is to be presented to the user).
+///
+/// StringError can also be used when additional information is to be printed
+/// along with a error_code message. Depending on the constructor called, this
+/// class can either display:
+/// 1. the error_code message (ECError behavior)
+/// 2. a string
+/// 3. the error_code message and a string
+///
+/// These behaviors are useful when subtyping is required; for example, when a
+/// specific library needs an explicit error type. In the example below,
+/// PDBError is derived from StringError:
+///
+/// @code{.cpp}
+/// Expected<int> foo() {
+/// return llvm::make_error<PDBError>(pdb_error_code::dia_failed_loading,
+/// "Additional information");
+/// }
+/// @endcode
+///
class StringError : public ErrorInfo<StringError> {
public:
static char ID;
+ // Prints EC + S and converts to EC
+ StringError(std::error_code EC, const Twine &S = Twine());
+
+ // Prints S and converts to EC
StringError(const Twine &S, std::error_code EC);
void log(raw_ostream &OS) const override;
@@ -1120,6 +1156,7 @@ public:
private:
std::string Msg;
std::error_code EC;
+ const bool PrintMsgOnly = false;
};
/// Create formatted StringError object.
@@ -1134,6 +1171,53 @@ Error createStringError(std::error_code EC, char const *Fmt,
Error createStringError(std::error_code EC, char const *Msg);
+/// This class wraps a filename and another Error.
+///
+/// In some cases, an error needs to live along a 'source' name, in order to
+/// show more detailed information to the user.
+class FileError final : public ErrorInfo<FileError> {
+
+ friend Error createFileError(std::string, Error);
+
+public:
+ void log(raw_ostream &OS) const override {
+ assert(Err && !FileName.empty() && "Trying to log after takeError().");
+ OS << "'" << FileName << "': ";
+ Err->log(OS);
+ }
+
+ Error takeError() { return Error(std::move(Err)); }
+
+ std::error_code convertToErrorCode() const override;
+
+ // Used by ErrorInfo::classID.
+ static char ID;
+
+private:
+ FileError(std::string F, std::unique_ptr<ErrorInfoBase> E) {
+ assert(E && "Cannot create FileError from Error success value.");
+ assert(!F.empty() &&
+ "The file name provided to FileError must not be empty.");
+ FileName = F;
+ Err = std::move(E);
+ }
+
+ static Error build(std::string F, Error E) {
+ return Error(std::unique_ptr<FileError>(new FileError(F, E.takePayload())));
+ }
+
+ std::string FileName;
+ std::unique_ptr<ErrorInfoBase> Err;
+};
+
+/// Concatenate a source file path and/or name with an Error. The resulting
+/// Error is unchecked.
+inline Error createFileError(std::string F, Error E) {
+ return FileError::build(F, std::move(E));
+}
+
+Error createFileError(std::string F, ErrorSuccess) = delete;
+
/// Helper for check-and-exit error handling.
///
/// For tool use only. NOT FOR USE IN LIBRARY CODE.
@@ -1183,6 +1267,17 @@ private:
std::function<int(const Error &)> GetExitCode;
};
+/// Conversion from Error to LLVMErrorRef for C error bindings.
+inline LLVMErrorRef wrap(Error Err) {
+ return reinterpret_cast<LLVMErrorRef>(Err.takePayload().release());
+}
+
+/// Conversion from LLVMErrorRef to Error for C error bindings.
+inline Error unwrap(LLVMErrorRef ErrRef) {
+ return Error(std::unique_ptr<ErrorInfoBase>(
+ reinterpret_cast<ErrorInfoBase *>(ErrRef)));
+}
+
} // end namespace llvm
#endif // LLVM_SUPPORT_ERROR_H
diff --git a/contrib/llvm/include/llvm/Support/ErrorHandling.h b/contrib/llvm/include/llvm/Support/ErrorHandling.h
index 39cbfed2436a..fec39e59a717 100644
--- a/contrib/llvm/include/llvm/Support/ErrorHandling.h
+++ b/contrib/llvm/include/llvm/Support/ErrorHandling.h
@@ -112,8 +112,8 @@ void install_out_of_memory_new_handler();
/// in the unwind chain.
///
/// If no error handler is installed (default), then a bad_alloc exception
-/// is thrown, if LLVM is compiled with exception support, otherwise an assertion
-/// is called.
+/// is thrown, if LLVM is compiled with exception support, otherwise an
+/// assertion is called.
void report_bad_alloc_error(const char *Reason, bool GenCrashDiag = true);
/// This function calls abort(), and prints the optional message to stderr.
diff --git a/contrib/llvm/include/llvm/Support/FileCheck.h b/contrib/llvm/include/llvm/Support/FileCheck.h
new file mode 100644
index 000000000000..4061a26e22c5
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/FileCheck.h
@@ -0,0 +1,282 @@
+//==-- llvm/Support/FileCheck.h ---------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file has some utilities to use FileCheck as an API
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FILECHECK_H
+#define LLVM_SUPPORT_FILECHECK_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/SourceMgr.h"
+#include <vector>
+#include <map>
+
+namespace llvm {
+
+/// Contains info about various FileCheck options.
+struct FileCheckRequest {
+ std::vector<std::string> CheckPrefixes;
+ bool NoCanonicalizeWhiteSpace = false;
+ std::vector<std::string> ImplicitCheckNot;
+ std::vector<std::string> GlobalDefines;
+ bool AllowEmptyInput = false;
+ bool MatchFullLines = false;
+ bool EnableVarScope = false;
+ bool AllowDeprecatedDagOverlap = false;
+ bool Verbose = false;
+ bool VerboseVerbose = false;
+};
+
+
+//===----------------------------------------------------------------------===//
+// Pattern Handling Code.
+//===----------------------------------------------------------------------===//
+
+namespace Check {
+
+enum FileCheckKind {
+ CheckNone = 0,
+ CheckPlain,
+ CheckNext,
+ CheckSame,
+ CheckNot,
+ CheckDAG,
+ CheckLabel,
+ CheckEmpty,
+
+ /// Indicates the pattern only matches the end of file. This is used for
+ /// trailing CHECK-NOTs.
+ CheckEOF,
+
+ /// Marks when parsing found a -NOT check combined with another CHECK suffix.
+ CheckBadNot,
+
+ /// Marks when parsing found a -COUNT directive with invalid count value.
+ CheckBadCount
+};
+
+class FileCheckType {
+ FileCheckKind Kind;
+ int Count; ///< optional Count for some checks
+
+public:
+ FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {}
+ FileCheckType(const FileCheckType &) = default;
+
+ operator FileCheckKind() const { return Kind; }
+
+ int getCount() const { return Count; }
+ FileCheckType &setCount(int C);
+
+ std::string getDescription(StringRef Prefix) const;
+};
+}
+
+struct FileCheckDiag;
+
+class FileCheckPattern {
+ SMLoc PatternLoc;
+
+ /// A fixed string to match as the pattern or empty if this pattern requires
+ /// a regex match.
+ StringRef FixedStr;
+
+ /// A regex string to match as the pattern or empty if this pattern requires
+ /// a fixed string to match.
+ std::string RegExStr;
+
+ /// Entries in this vector map to uses of a variable in the pattern, e.g.
+ /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and
+ /// we'll get an entry in this vector that tells us to insert the value of
+ /// bar at offset 3.
+ std::vector<std::pair<StringRef, unsigned>> VariableUses;
+
+ /// Maps definitions of variables to their parenthesized capture numbers.
+ ///
+ /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
+ /// 1.
+ std::map<StringRef, unsigned> VariableDefs;
+
+ Check::FileCheckType CheckTy;
+
+ /// Contains the number of line this pattern is in.
+ unsigned LineNumber;
+
+public:
+ explicit FileCheckPattern(Check::FileCheckType Ty)
+ : CheckTy(Ty) {}
+
+ /// Returns the location in source code.
+ SMLoc getLoc() const { return PatternLoc; }
+
+ bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
+ unsigned LineNumber, const FileCheckRequest &Req);
+ size_t Match(StringRef Buffer, size_t &MatchLen,
+ StringMap<StringRef> &VariableTable) const;
+ void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
+ const StringMap<StringRef> &VariableTable,
+ SMRange MatchRange = None) const;
+ void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
+ const StringMap<StringRef> &VariableTable,
+ std::vector<FileCheckDiag> *Diags) const;
+
+ bool hasVariable() const {
+ return !(VariableUses.empty() && VariableDefs.empty());
+ }
+
+ Check::FileCheckType getCheckTy() const { return CheckTy; }
+
+ int getCount() const { return CheckTy.getCount(); }
+
+private:
+ bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
+ void AddBackrefToRegEx(unsigned BackrefNum);
+ unsigned
+ ComputeMatchDistance(StringRef Buffer,
+ const StringMap<StringRef> &VariableTable) const;
+ bool EvaluateExpression(StringRef Expr, std::string &Value) const;
+ size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
+};
+
+//===----------------------------------------------------------------------===//
+/// Summary of a FileCheck diagnostic.
+//===----------------------------------------------------------------------===//
+
+struct FileCheckDiag {
+ /// What is the FileCheck directive for this diagnostic?
+ Check::FileCheckType CheckTy;
+ /// Where is the FileCheck directive for this diagnostic?
+ unsigned CheckLine, CheckCol;
+ /// What type of match result does this diagnostic describe?
+ ///
+ /// A directive's supplied pattern is said to be either expected or excluded
+ /// depending on whether the pattern must have or must not have a match in
+ /// order for the directive to succeed. For example, a CHECK directive's
+ /// pattern is expected, and a CHECK-NOT directive's pattern is excluded.
+ /// All match result types whose names end with "Excluded" are for excluded
+ /// patterns, and all others are for expected patterns.
+ ///
+ /// There might be more than one match result for a single pattern. For
+ /// example, there might be several discarded matches
+ /// (MatchFoundButDiscarded) before either a good match
+ /// (MatchFoundAndExpected) or a failure to match (MatchNoneButExpected),
+ /// and there might be a fuzzy match (MatchFuzzy) after the latter.
+ enum MatchType {
+ /// Indicates a good match for an expected pattern.
+ MatchFoundAndExpected,
+ /// Indicates a match for an excluded pattern.
+ MatchFoundButExcluded,
+ /// Indicates a match for an expected pattern, but the match is on the
+ /// wrong line.
+ MatchFoundButWrongLine,
+ /// Indicates a discarded match for an expected pattern.
+ MatchFoundButDiscarded,
+ /// Indicates no match for an excluded pattern.
+ MatchNoneAndExcluded,
+ /// Indicates no match for an expected pattern, but this might follow good
+ /// matches when multiple matches are expected for the pattern, or it might
+ /// follow discarded matches for the pattern.
+ MatchNoneButExpected,
+ /// Indicates a fuzzy match that serves as a suggestion for the next
+ /// intended match for an expected pattern with too few or no good matches.
+ MatchFuzzy,
+ } MatchTy;
+ /// The search range if MatchTy is MatchNoneAndExcluded or
+ /// MatchNoneButExpected, or the match range otherwise.
+ unsigned InputStartLine;
+ unsigned InputStartCol;
+ unsigned InputEndLine;
+ unsigned InputEndCol;
+ FileCheckDiag(const SourceMgr &SM, const Check::FileCheckType &CheckTy,
+ SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange);
+};
+
+//===----------------------------------------------------------------------===//
+// Check Strings.
+//===----------------------------------------------------------------------===//
+
+/// A check that we found in the input file.
+struct FileCheckString {
+ /// The pattern to match.
+ FileCheckPattern Pat;
+
+ /// Which prefix name this check matched.
+ StringRef Prefix;
+
+ /// The location in the match file that the check string was specified.
+ SMLoc Loc;
+
+ /// All of the strings that are disallowed from occurring between this match
+ /// string and the previous one (or start of file).
+ std::vector<FileCheckPattern> DagNotStrings;
+
+ FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L)
+ : Pat(P), Prefix(S), Loc(L) {}
+
+ size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
+ size_t &MatchLen, StringMap<StringRef> &VariableTable,
+ FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const;
+
+ bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
+ bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
+ bool CheckNot(const SourceMgr &SM, StringRef Buffer,
+ const std::vector<const FileCheckPattern *> &NotStrings,
+ StringMap<StringRef> &VariableTable,
+ const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const;
+ size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
+ std::vector<const FileCheckPattern *> &NotStrings,
+ StringMap<StringRef> &VariableTable,
+ const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const;
+};
+
+/// FileCheck class takes the request and exposes various methods that
+/// use information from the request.
+class FileCheck {
+ FileCheckRequest Req;
+
+public:
+ FileCheck(FileCheckRequest Req) : Req(Req) {}
+
+ // Combines the check prefixes into a single regex so that we can efficiently
+ // scan for any of the set.
+ //
+ // The semantics are that the longest-match wins which matches our regex
+ // library.
+ Regex buildCheckPrefixRegex();
+
+ /// Read the check file, which specifies the sequence of expected strings.
+ ///
+ /// The strings are added to the CheckStrings vector. Returns true in case of
+ /// an error, false otherwise.
+ bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+ std::vector<FileCheckString> &CheckStrings);
+
+ bool ValidateCheckPrefixes();
+
+ /// Canonicalize whitespaces in the file. Line endings are replaced with
+ /// UNIX-style '\n'.
+ StringRef CanonicalizeFile(MemoryBuffer &MB,
+ SmallVectorImpl<char> &OutputBuffer);
+
+ /// Check the input to FileCheck provided in the \p Buffer against the \p
+ /// CheckStrings read from the check file.
+ ///
+ /// Returns false if the input fails to satisfy the checks.
+ bool CheckInput(SourceMgr &SM, StringRef Buffer,
+ ArrayRef<FileCheckString> CheckStrings,
+ std::vector<FileCheckDiag> *Diags = nullptr);
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/Support/FileOutputBuffer.h b/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
index ee8cbb730878..68226ca55502 100644
--- a/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
+++ b/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
@@ -76,6 +76,10 @@ public:
/// deallocates the buffer and the target file is never written.
virtual ~FileOutputBuffer() {}
+ /// This removes the temporary file (unless it already was committed)
+ /// but keeps the memory mapping alive.
+ virtual void discard() {}
+
protected:
FileOutputBuffer(StringRef Path) : FinalPath(Path) {}
diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h
index 02db4596bf1c..d2042f51d8c1 100644
--- a/contrib/llvm/include/llvm/Support/FileSystem.h
+++ b/contrib/llvm/include/llvm/Support/FileSystem.h
@@ -160,6 +160,8 @@ protected:
#if defined(LLVM_ON_UNIX)
time_t fs_st_atime = 0;
time_t fs_st_mtime = 0;
+ uint32_t fs_st_atime_nsec = 0;
+ uint32_t fs_st_mtime_nsec = 0;
uid_t fs_st_uid = 0;
gid_t fs_st_gid = 0;
off_t fs_st_size = 0;
@@ -180,9 +182,12 @@ public:
explicit basic_file_status(file_type Type) : Type(Type) {}
#if defined(LLVM_ON_UNIX)
- basic_file_status(file_type Type, perms Perms, time_t ATime, time_t MTime,
+ basic_file_status(file_type Type, perms Perms, time_t ATime,
+ uint32_t ATimeNSec, time_t MTime, uint32_t MTimeNSec,
uid_t UID, gid_t GID, off_t Size)
- : fs_st_atime(ATime), fs_st_mtime(MTime), fs_st_uid(UID), fs_st_gid(GID),
+ : fs_st_atime(ATime), fs_st_mtime(MTime),
+ fs_st_atime_nsec(ATimeNSec), fs_st_mtime_nsec(MTimeNSec),
+ fs_st_uid(UID), fs_st_gid(GID),
fs_st_size(Size), Type(Type), Perms(Perms) {}
#elif defined(_WIN32)
basic_file_status(file_type Type, perms Perms, uint32_t LastAccessTimeHigh,
@@ -199,7 +204,20 @@ public:
// getters
file_type type() const { return Type; }
perms permissions() const { return Perms; }
+
+ /// The file access time as reported from the underlying file system.
+ ///
+ /// Also see comments on \c getLastModificationTime() related to the precision
+ /// of the returned value.
TimePoint<> getLastAccessedTime() const;
+
+ /// The file modification time as reported from the underlying file system.
+ ///
+ /// The returned value allows for nanosecond precision but the actual
+ /// resolution is an implementation detail of the underlying file system.
+ /// There is no guarantee for what kind of resolution you can expect, the
+ /// resolution can differ across platforms and even across mountpoints on the
+ /// same machine.
TimePoint<> getLastModificationTime() const;
#if defined(LLVM_ON_UNIX)
@@ -247,8 +265,11 @@ public:
#if defined(LLVM_ON_UNIX)
file_status(file_type Type, perms Perms, dev_t Dev, nlink_t Links, ino_t Ino,
- time_t ATime, time_t MTime, uid_t UID, gid_t GID, off_t Size)
- : basic_file_status(Type, Perms, ATime, MTime, UID, GID, Size),
+ time_t ATime, uint32_t ATimeNSec,
+ time_t MTime, uint32_t MTimeNSec,
+ uid_t UID, gid_t GID, off_t Size)
+ : basic_file_status(Type, Perms, ATime, ATimeNSec, MTime, MTimeNSec,
+ UID, GID, Size),
fs_st_dev(Dev), fs_st_nlinks(Links), fs_st_ino(Ino) {}
#elif defined(_WIN32)
file_status(file_type Type, perms Perms, uint32_t LinkCount,
@@ -281,10 +302,7 @@ public:
/// relative/../path => <current-directory>/relative/../path
///
/// @param path A path that is modified to be an absolute path.
-/// @returns errc::success if \a path has been made absolute, otherwise a
-/// platform-specific error_code.
-std::error_code make_absolute(const Twine &current_directory,
- SmallVectorImpl<char> &path);
+void make_absolute(const Twine &current_directory, SmallVectorImpl<char> &path);
/// Make \a path an absolute path.
///
@@ -349,6 +367,12 @@ std::error_code create_hard_link(const Twine &to, const Twine &from);
std::error_code real_path(const Twine &path, SmallVectorImpl<char> &output,
bool expand_tilde = false);
+/// Expands ~ expressions to the user's home directory. On Unix ~user
+/// directories are resolved as well.
+///
+/// @param path The path to resolve.
+void expand_tilde(const Twine &path, SmallVectorImpl<char> &output);
+
/// Get the current path.
///
/// @param result Holds the current path on return.
@@ -666,7 +690,15 @@ inline std::error_code file_size(const Twine &Path, uint64_t &Result) {
/// @returns errc::success if the file times were successfully set, otherwise a
/// platform-specific error_code or errc::function_not_supported on
/// platforms where the functionality isn't available.
-std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time);
+std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
+ TimePoint<> ModificationTime);
+
+/// Simpler version that sets both file modification and access time to the same
+/// time.
+inline std::error_code setLastAccessAndModificationTime(int FD,
+ TimePoint<> Time) {
+ return setLastAccessAndModificationTime(FD, Time, Time);
+}
/// Is status available?
///
@@ -693,7 +725,7 @@ enum CreationDisposition : unsigned {
/// * If it does not already exist, create a new file.
CD_CreateNew = 1,
- /// CD_OpenAlways - When opening a file:
+ /// CD_OpenExisting - When opening a file:
/// * If it already exists, open the file with the offset set to 0.
/// * If it does not already exist, fail.
CD_OpenExisting = 2,
@@ -1092,38 +1124,51 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr);
/// @name Iterators
/// @{
-/// directory_entry - A single entry in a directory. Caches the status either
-/// from the result of the iteration syscall, or the first time status is
-/// called.
+/// directory_entry - A single entry in a directory.
class directory_entry {
+ // FIXME: different platforms make different information available "for free"
+ // when traversing a directory. The design of this class wraps most of the
+ // information in basic_file_status, so on platforms where we can't populate
+ // that whole structure, callers end up paying for a stat().
+ // std::filesystem::directory_entry may be a better model.
std::string Path;
- bool FollowSymlinks;
- basic_file_status Status;
+ file_type Type; // Most platforms can provide this.
+ bool FollowSymlinks; // Affects the behavior of status().
+ basic_file_status Status; // If available.
public:
- explicit directory_entry(const Twine &path, bool follow_symlinks = true,
- basic_file_status st = basic_file_status())
- : Path(path.str()), FollowSymlinks(follow_symlinks), Status(st) {}
+ explicit directory_entry(const Twine &Path, bool FollowSymlinks = true,
+ file_type Type = file_type::type_unknown,
+ basic_file_status Status = basic_file_status())
+ : Path(Path.str()), Type(Type), FollowSymlinks(FollowSymlinks),
+ Status(Status) {}
directory_entry() = default;
- void assign(const Twine &path, basic_file_status st = basic_file_status()) {
- Path = path.str();
- Status = st;
- }
-
- void replace_filename(const Twine &filename,
- basic_file_status st = basic_file_status());
+ void replace_filename(const Twine &Filename, file_type Type,
+ basic_file_status Status = basic_file_status());
const std::string &path() const { return Path; }
+ // Get basic information about entry file (a subset of fs::status()).
+ // On most platforms this is a stat() call.
+ // On windows the information was already retrieved from the directory.
ErrorOr<basic_file_status> status() const;
+ // Get the type of this file.
+ // On most platforms (Linux/Mac/Windows/BSD), this was already retrieved.
+ // On some platforms (e.g. Solaris) this is a stat() call.
+ file_type type() const {
+ if (Type != file_type::type_unknown)
+ return Type;
+ auto S = status();
+ return S ? S->type() : file_type::type_unknown;
+ }
- bool operator==(const directory_entry& rhs) const { return Path == rhs.Path; }
- bool operator!=(const directory_entry& rhs) const { return !(*this == rhs); }
- bool operator< (const directory_entry& rhs) const;
- bool operator<=(const directory_entry& rhs) const;
- bool operator> (const directory_entry& rhs) const;
- bool operator>=(const directory_entry& rhs) const;
+ bool operator==(const directory_entry& RHS) const { return Path == RHS.Path; }
+ bool operator!=(const directory_entry& RHS) const { return !(*this == RHS); }
+ bool operator< (const directory_entry& RHS) const;
+ bool operator<=(const directory_entry& RHS) const;
+ bool operator> (const directory_entry& RHS) const;
+ bool operator>=(const directory_entry& RHS) const;
};
namespace detail {
@@ -1161,7 +1206,6 @@ public:
SmallString<128> path_storage;
ec = detail::directory_iterator_construct(
*State, path.toStringRef(path_storage), FollowSymlinks);
- update_error_code_for_current_entry(ec);
}
explicit directory_iterator(const directory_entry &de, std::error_code &ec,
@@ -1170,7 +1214,6 @@ public:
State = std::make_shared<detail::DirIterState>();
ec = detail::directory_iterator_construct(
*State, de.path(), FollowSymlinks);
- update_error_code_for_current_entry(ec);
}
/// Construct end iterator.
@@ -1179,7 +1222,6 @@ public:
// No operator++ because we need error_code.
directory_iterator &increment(std::error_code &ec) {
ec = directory_iterator_increment(*State);
- update_error_code_for_current_entry(ec);
return *this;
}
@@ -1199,26 +1241,6 @@ public:
bool operator!=(const directory_iterator &RHS) const {
return !(*this == RHS);
}
- // Other members as required by
- // C++ Std, 24.1.1 Input iterators [input.iterators]
-
-private:
- // Checks if current entry is valid and populates error code. For example,
- // current entry may not exist due to broken symbol links.
- void update_error_code_for_current_entry(std::error_code &ec) {
- // Bail out if error has already occured earlier to avoid overwriting it.
- if (ec)
- return;
-
- // Empty directory entry is used to mark the end of an interation, it's not
- // an error.
- if (State->CurrentEntry == directory_entry())
- return;
-
- ErrorOr<basic_file_status> status = State->CurrentEntry.status();
- if (!status)
- ec = status.getError();
- }
};
namespace detail {
@@ -1256,8 +1278,15 @@ public:
if (State->HasNoPushRequest)
State->HasNoPushRequest = false;
else {
- ErrorOr<basic_file_status> status = State->Stack.top()->status();
- if (status && is_directory(*status)) {
+ file_type type = State->Stack.top()->type();
+ if (type == file_type::symlink_file && Follow) {
+ // Resolve the symlink: is it a directory to recurse into?
+ ErrorOr<basic_file_status> status = State->Stack.top()->status();
+ if (status)
+ type = status->type();
+ // Otherwise broken symlink, and we'll continue.
+ }
+ if (type == file_type::directory_file) {
State->Stack.push(directory_iterator(*State->Stack.top(), ec, Follow));
if (State->Stack.top() != end_itr) {
++State->Level;
@@ -1321,8 +1350,6 @@ public:
bool operator!=(const recursive_directory_iterator &RHS) const {
return !(*this == RHS);
}
- // Other members as required by
- // C++ Std, 24.1.1 Input iterators [input.iterators]
};
/// @}
diff --git a/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h b/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h
index 56dda430efda..e8bd90f50941 100644
--- a/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h
+++ b/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h
@@ -21,6 +21,8 @@ class Error;
namespace detail {
class format_adapter {
+ virtual void anchor();
+
protected:
virtual ~format_adapter() {}
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h
index c716e4a4d300..b3018bac310a 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h
@@ -24,6 +24,14 @@
#ifndef LLVM_SUPPORT_GENERICDOMTREE_H
#define LLVM_SUPPORT_GENERICDOMTREE_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CFGUpdate.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -32,13 +40,6 @@
#include <type_traits>
#include <utility>
#include <vector>
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -192,6 +193,10 @@ template <typename DomTreeT>
void Calculate(DomTreeT &DT);
template <typename DomTreeT>
+void CalculateWithUpdates(DomTreeT &DT,
+ ArrayRef<typename DomTreeT::UpdateType> Updates);
+
+template <typename DomTreeT>
void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
typename DomTreeT::NodePtr To);
@@ -199,36 +204,6 @@ template <typename DomTreeT>
void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
typename DomTreeT::NodePtr To);
-// UpdateKind and Update are used by the batch update API and it's easiest to
-// define them here.
-enum class UpdateKind : unsigned char { Insert, Delete };
-
-template <typename NodePtr>
-struct Update {
- using NodeKindPair = PointerIntPair<NodePtr, 1, UpdateKind>;
-
- NodePtr From;
- NodeKindPair ToAndKind;
-
- Update(UpdateKind Kind, NodePtr From, NodePtr To)
- : From(From), ToAndKind(To, Kind) {}
-
- UpdateKind getKind() const { return ToAndKind.getInt(); }
- NodePtr getFrom() const { return From; }
- NodePtr getTo() const { return ToAndKind.getPointer(); }
- bool operator==(const Update &RHS) const {
- return From == RHS.From && ToAndKind == RHS.ToAndKind;
- }
-
- friend raw_ostream &operator<<(raw_ostream &OS, const Update &U) {
- OS << (U.getKind() == UpdateKind::Insert ? "Insert " : "Delete ");
- U.getFrom()->printAsOperand(OS, false);
- OS << " -> ";
- U.getTo()->printAsOperand(OS, false);
- return OS;
- }
-};
-
template <typename DomTreeT>
void ApplyUpdates(DomTreeT &DT,
ArrayRef<typename DomTreeT::UpdateType> Updates);
@@ -254,8 +229,8 @@ class DominatorTreeBase {
using ParentType = typename std::remove_pointer<ParentPtr>::type;
static constexpr bool IsPostDominator = IsPostDom;
- using UpdateType = DomTreeBuilder::Update<NodePtr>;
- using UpdateKind = DomTreeBuilder::UpdateKind;
+ using UpdateType = cfg::Update<NodePtr>;
+ using UpdateKind = cfg::UpdateKind;
static constexpr UpdateKind Insert = UpdateKind::Insert;
static constexpr UpdateKind Delete = UpdateKind::Delete;
@@ -759,6 +734,11 @@ public:
DomTreeBuilder::Calculate(*this);
}
+ void recalculate(ParentType &Func, ArrayRef<UpdateType> Updates) {
+ Parent = &Func;
+ DomTreeBuilder::CalculateWithUpdates(*this, Updates);
+ }
+
/// verify - checks if the tree is correct. There are 3 level of verification:
/// - Full -- verifies if the tree is correct by making sure all the
/// properties (including the parent and the sibling property)
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index 977f209f92b3..971e8305a112 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -71,6 +71,7 @@ struct SemiNCAInfo {
DenseMap<NodePtr, InfoRec> NodeToInfo;
using UpdateT = typename DomTreeT::UpdateType;
+ using UpdateKind = typename DomTreeT::UpdateKind;
struct BatchUpdateInfo {
SmallVector<UpdateT, 4> Updates;
using NodePtrAndKind = PointerIntPair<NodePtr, 1, UpdateKind>;
@@ -1166,7 +1167,8 @@ struct SemiNCAInfo {
}
BatchUpdateInfo BUI;
- LegalizeUpdates(Updates, BUI.Updates);
+ LLVM_DEBUG(dbgs() << "Legalizing " << BUI.Updates.size() << " updates\n");
+ cfg::LegalizeUpdates<NodePtr>(Updates, BUI.Updates, IsPostDom);
const size_t NumLegalized = BUI.Updates.size();
BUI.FutureSuccessors.reserve(NumLegalized);
@@ -1182,8 +1184,11 @@ struct SemiNCAInfo {
LLVM_DEBUG(dbgs() << "About to apply " << NumLegalized << " updates\n");
LLVM_DEBUG(if (NumLegalized < 32) for (const auto &U
- : reverse(BUI.Updates)) dbgs()
- << '\t' << U << "\n");
+ : reverse(BUI.Updates)) {
+ dbgs() << "\t";
+ U.dump();
+ dbgs() << "\n";
+ });
LLVM_DEBUG(dbgs() << "\n");
// Recalculate the DominatorTree when the number of updates
@@ -1207,76 +1212,11 @@ struct SemiNCAInfo {
ApplyNextUpdate(DT, BUI);
}
- // This function serves double purpose:
- // a) It removes redundant updates, which makes it easier to reverse-apply
- // them when traversing CFG.
- // b) It optimizes away updates that cancel each other out, as the end result
- // is the same.
- //
- // It relies on the property of the incremental updates that says that the
- // order of updates doesn't matter. This allows us to reorder them and end up
- // with the exact same DomTree every time.
- //
- // Following the same logic, the function doesn't care about the order of
- // input updates, so it's OK to pass it an unordered sequence of updates, that
- // doesn't make sense when applied sequentially, eg. performing double
- // insertions or deletions and then doing an opposite update.
- //
- // In the future, it should be possible to schedule updates in way that
- // minimizes the amount of work needed done during incremental updates.
- static void LegalizeUpdates(ArrayRef<UpdateT> AllUpdates,
- SmallVectorImpl<UpdateT> &Result) {
- LLVM_DEBUG(dbgs() << "Legalizing " << AllUpdates.size() << " updates\n");
- // Count the total number of inserions of each edge.
- // Each insertion adds 1 and deletion subtracts 1. The end number should be
- // one of {-1 (deletion), 0 (NOP), +1 (insertion)}. Otherwise, the sequence
- // of updates contains multiple updates of the same kind and we assert for
- // that case.
- SmallDenseMap<std::pair<NodePtr, NodePtr>, int, 4> Operations;
- Operations.reserve(AllUpdates.size());
-
- for (const auto &U : AllUpdates) {
- NodePtr From = U.getFrom();
- NodePtr To = U.getTo();
- if (IsPostDom) std::swap(From, To); // Reverse edge for postdominators.
-
- Operations[{From, To}] += (U.getKind() == UpdateKind::Insert ? 1 : -1);
- }
-
- Result.clear();
- Result.reserve(Operations.size());
- for (auto &Op : Operations) {
- const int NumInsertions = Op.second;
- assert(std::abs(NumInsertions) <= 1 && "Unbalanced operations!");
- if (NumInsertions == 0) continue;
- const UpdateKind UK =
- NumInsertions > 0 ? UpdateKind::Insert : UpdateKind::Delete;
- Result.push_back({UK, Op.first.first, Op.first.second});
- }
-
- // Make the order consistent by not relying on pointer values within the
- // set. Reuse the old Operations map.
- // In the future, we should sort by something else to minimize the amount
- // of work needed to perform the series of updates.
- for (size_t i = 0, e = AllUpdates.size(); i != e; ++i) {
- const auto &U = AllUpdates[i];
- if (!IsPostDom)
- Operations[{U.getFrom(), U.getTo()}] = int(i);
- else
- Operations[{U.getTo(), U.getFrom()}] = int(i);
- }
-
- llvm::sort(Result.begin(), Result.end(),
- [&Operations](const UpdateT &A, const UpdateT &B) {
- return Operations[{A.getFrom(), A.getTo()}] >
- Operations[{B.getFrom(), B.getTo()}];
- });
- }
-
static void ApplyNextUpdate(DomTreeT &DT, BatchUpdateInfo &BUI) {
assert(!BUI.Updates.empty() && "No updates to apply!");
UpdateT CurrentUpdate = BUI.Updates.pop_back_val();
- LLVM_DEBUG(dbgs() << "Applying update: " << CurrentUpdate << "\n");
+ LLVM_DEBUG(dbgs() << "Applying update: ");
+ LLVM_DEBUG(CurrentUpdate.dump(); dbgs() << "\n");
// Move to the next snapshot of the CFG by removing the reverse-applied
// current update. Since updates are performed in the same order they are
@@ -1460,10 +1400,9 @@ struct SemiNCAInfo {
// Make a copy and sort it such that it is possible to check if there are
// no gaps between DFS numbers of adjacent children.
SmallVector<TreeNodePtr, 8> Children(Node->begin(), Node->end());
- llvm::sort(Children.begin(), Children.end(),
- [](const TreeNodePtr Ch1, const TreeNodePtr Ch2) {
- return Ch1->getDFSNumIn() < Ch2->getDFSNumIn();
- });
+ llvm::sort(Children, [](const TreeNodePtr Ch1, const TreeNodePtr Ch2) {
+ return Ch1->getDFSNumIn() < Ch2->getDFSNumIn();
+ });
auto PrintChildrenError = [Node, &Children, PrintNodeAndDFSNums](
const TreeNodePtr FirstCh, const TreeNodePtr SecondCh) {
@@ -1650,6 +1589,25 @@ void Calculate(DomTreeT &DT) {
SemiNCAInfo<DomTreeT>::CalculateFromScratch(DT, nullptr);
}
+template <typename DomTreeT>
+void CalculateWithUpdates(DomTreeT &DT,
+ ArrayRef<typename DomTreeT::UpdateType> Updates) {
+ // TODO: Move BUI creation in common method, reuse in ApplyUpdates.
+ typename SemiNCAInfo<DomTreeT>::BatchUpdateInfo BUI;
+ LLVM_DEBUG(dbgs() << "Legalizing " << BUI.Updates.size() << " updates\n");
+ cfg::LegalizeUpdates<typename DomTreeT::NodePtr>(Updates, BUI.Updates,
+ DomTreeT::IsPostDominator);
+ const size_t NumLegalized = BUI.Updates.size();
+ BUI.FutureSuccessors.reserve(NumLegalized);
+ BUI.FuturePredecessors.reserve(NumLegalized);
+ for (auto &U : BUI.Updates) {
+ BUI.FutureSuccessors[U.getFrom()].push_back({U.getTo(), U.getKind()});
+ BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()});
+ }
+
+ SemiNCAInfo<DomTreeT>::CalculateFromScratch(DT, &BUI);
+}
+
template <class DomTreeT>
void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
typename DomTreeT::NodePtr To) {
diff --git a/contrib/llvm/include/llvm/Support/GraphWriter.h b/contrib/llvm/include/llvm/Support/GraphWriter.h
index c9a9f409c522..02d98bec16e2 100644
--- a/contrib/llvm/include/llvm/Support/GraphWriter.h
+++ b/contrib/llvm/include/llvm/Support/GraphWriter.h
@@ -27,6 +27,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstddef>
@@ -320,14 +321,32 @@ raw_ostream &WriteGraph(raw_ostream &O, const GraphType &G,
std::string createGraphFilename(const Twine &Name, int &FD);
+/// Writes graph into a provided {@code Filename}.
+/// If {@code Filename} is empty, generates a random one.
+/// \return The resulting filename, or an empty string if writing
+/// failed.
template <typename GraphType>
std::string WriteGraph(const GraphType &G, const Twine &Name,
- bool ShortNames = false, const Twine &Title = "") {
+ bool ShortNames = false,
+ const Twine &Title = "",
+ std::string Filename = "") {
int FD;
// Windows can't always handle long paths, so limit the length of the name.
std::string N = Name.str();
N = N.substr(0, std::min<std::size_t>(N.size(), 140));
- std::string Filename = createGraphFilename(N, FD);
+ if (Filename.empty()) {
+ Filename = createGraphFilename(N, FD);
+ } else {
+ std::error_code EC = sys::fs::openFileForWrite(Filename, FD);
+
+ // Writing over an existing file is not considered an error.
+ if (EC == std::errc::file_exists) {
+ errs() << "file exists, overwriting" << "\n";
+ } else if (EC) {
+ errs() << "error writing into file" << "\n";
+ return "";
+ }
+ }
raw_fd_ostream O(FD, /*shouldClose=*/ true);
if (FD == -1) {
diff --git a/contrib/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h b/contrib/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h
new file mode 100644
index 000000000000..34eb9f7deaaf
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h
@@ -0,0 +1,93 @@
+//===--- ItaniumManglingCanonicalizer.h -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a class for computing equivalence classes of mangled names
+// given a set of equivalences between name fragments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
+#define LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstddef>
+
+namespace llvm {
+/// Canonicalizer for mangled names.
+///
+/// This class allows specifying a list of "equivalent" manglings. For example,
+/// you can specify that Ss is equivalent to
+/// NSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE
+/// and then manglings that refer to libstdc++'s 'std::string' will be
+/// considered equivalent to manglings that are the same except that they refer
+/// to libc++'s 'std::string'.
+///
+/// This can be used when data (eg, profiling data) is available for a version
+/// of a program built in a different configuration, with correspondingly
+/// different manglings.
+class ItaniumManglingCanonicalizer {
+public:
+ ItaniumManglingCanonicalizer();
+ ItaniumManglingCanonicalizer(const ItaniumManglingCanonicalizer &) = delete;
+ void operator=(const ItaniumManglingCanonicalizer &) = delete;
+ ~ItaniumManglingCanonicalizer();
+
+ enum class EquivalenceError {
+ Success,
+
+ /// Both the equivalent manglings have already been used as components of
+ /// some other mangling we've looked at. It's too late to add this
+ /// equivalence.
+ ManglingAlreadyUsed,
+
+ /// The first equivalent mangling is invalid.
+ InvalidFirstMangling,
+
+ /// The second equivalent mangling is invalid.
+ InvalidSecondMangling,
+ };
+
+ enum class FragmentKind {
+ /// The mangling fragment is a <name> (or a predefined <substitution>).
+ Name,
+ /// The mangling fragment is a <type>.
+ Type,
+ /// The mangling fragment is an <encoding>.
+ Encoding,
+ };
+
+ /// Add an equivalence between \p First and \p Second. Both manglings must
+ /// live at least as long as the canonicalizer.
+ EquivalenceError addEquivalence(FragmentKind Kind, StringRef First,
+ StringRef Second);
+
+ using Key = uintptr_t;
+
+ /// Form a canonical key for the specified mangling. They key will be the
+ /// same for all equivalent manglings, and different for any two
+ /// non-equivalent manglings, but is otherwise unspecified.
+ ///
+ /// Returns Key() if (and only if) the mangling is not a valid Itanium C++
+ /// ABI mangling.
+ ///
+ /// The string denoted by Mangling must live as long as the canonicalizer.
+ Key canonicalize(StringRef Mangling);
+
+ /// Find a canonical key for the specified mangling, if one has already been
+ /// formed. Otherwise returns Key().
+ Key lookup(StringRef Mangling);
+
+private:
+ struct Impl;
+ Impl *P;
+};
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
diff --git a/contrib/llvm/include/llvm/Support/JSON.h b/contrib/llvm/include/llvm/Support/JSON.h
index da3c5ea0b25d..7a04fd52bc50 100644
--- a/contrib/llvm/include/llvm/Support/JSON.h
+++ b/contrib/llvm/include/llvm/Support/JSON.h
@@ -294,9 +294,13 @@ public:
Value(json::Array &&Elements) : Type(T_Array) {
create<json::Array>(std::move(Elements));
}
+ template <typename Elt>
+ Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
Value(json::Object &&Properties) : Type(T_Object) {
create<json::Object>(std::move(Properties));
}
+ template <typename Elt>
+ Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
// Strings: types with value semantics. Must be valid UTF-8.
Value(std::string V) : Type(T_String) {
if (LLVM_UNLIKELY(!isUTF8(V))) {
@@ -452,7 +456,10 @@ private:
new (reinterpret_cast<T *>(Union.buffer)) T(std::forward<U>(V)...);
}
template <typename T> T &as() const {
- return *reinterpret_cast<T *>(Union.buffer);
+ // Using this two-step static_cast via void * instead of reinterpret_cast
+ // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
+ void *Storage = static_cast<void *>(Union.buffer);
+ return *static_cast<T *>(Storage);
}
template <typename Indenter>
diff --git a/contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h b/contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h
index a0a5a52d206e..2a1075c9a48d 100644
--- a/contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h
+++ b/contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h
@@ -147,6 +147,7 @@ public:
bool operator!=(const LLT &RHS) const { return !(*this == RHS); }
friend struct DenseMapInfo<LLT>;
+ friend class GISelInstProfileBuilder;
private:
/// LLT is packed into 64 bits as follows:
@@ -231,6 +232,11 @@ private:
maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo);
}
}
+
+ uint64_t getUniqueRAWLLTData() const {
+ return ((uint64_t)RawData) << 2 | ((uint64_t)IsPointer) << 1 |
+ ((uint64_t)IsVector);
+ }
};
inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) {
@@ -250,8 +256,7 @@ template<> struct DenseMapInfo<LLT> {
return Invalid;
}
static inline unsigned getHashValue(const LLT &Ty) {
- uint64_t Val = ((uint64_t)Ty.RawData) << 2 | ((uint64_t)Ty.IsPointer) << 1 |
- ((uint64_t)Ty.IsVector);
+ uint64_t Val = Ty.getUniqueRAWLLTData();
return DenseMapInfo<uint64_t>::getHashValue(Val);
}
static bool isEqual(const LLT &LHS, const LLT &RHS) {
diff --git a/contrib/llvm/include/llvm/Support/MSVCErrorWorkarounds.h b/contrib/llvm/include/llvm/Support/MSVCErrorWorkarounds.h
new file mode 100644
index 000000000000..053ecf64d1e9
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/MSVCErrorWorkarounds.h
@@ -0,0 +1,84 @@
+//===--- MSVCErrorWorkarounds.h - Enable future<Error> in MSVC --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MSVC's promise/future implementation requires types to be default
+// constructible, so this header provides analogues of Error an Expected
+// that are default constructed in a safely destructible state.
+//
+// FIXME: Kill off this header and migrate all users to Error/Expected once we
+// move to MSVC versions that support non-default-constructible types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSVCERRORWORKAROUNDS_H
+#define LLVM_SUPPORT_MSVCERRORWORKAROUNDS_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+// A default-constructible llvm::Error that is suitable for use with MSVC's
+// std::future implementation which requires default constructible types.
+class MSVCPError : public Error {
+public:
+ MSVCPError() { (void)!!*this; }
+
+ MSVCPError(MSVCPError &&Other) : Error(std::move(Other)) {}
+
+ MSVCPError &operator=(MSVCPError Other) {
+ Error::operator=(std::move(Other));
+ return *this;
+ }
+
+ MSVCPError(Error Err) : Error(std::move(Err)) {}
+};
+
+// A default-constructible llvm::Expected that is suitable for use with MSVC's
+// std::future implementation, which requires default constructible types.
+template <typename T> class MSVCPExpected : public Expected<T> {
+public:
+ MSVCPExpected()
+ : Expected<T>(make_error<StringError>("", inconvertibleErrorCode())) {
+ consumeError(this->takeError());
+ }
+
+ MSVCPExpected(MSVCPExpected &&Other) : Expected<T>(std::move(Other)) {}
+
+ MSVCPExpected &operator=(MSVCPExpected &&Other) {
+ Expected<T>::operator=(std::move(Other));
+ return *this;
+ }
+
+ MSVCPExpected(Error Err) : Expected<T>(std::move(Err)) {}
+
+ template <typename OtherT>
+ MSVCPExpected(
+ OtherT &&Val,
+ typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
+ nullptr)
+ : Expected<T>(std::move(Val)) {}
+
+ template <class OtherT>
+ MSVCPExpected(
+ Expected<OtherT> &&Other,
+ typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
+ nullptr)
+ : Expected<T>(std::move(Other)) {}
+
+ template <class OtherT>
+ explicit MSVCPExpected(
+ Expected<OtherT> &&Other,
+ typename std::enable_if<!std::is_convertible<OtherT, T>::value>::type * =
+ nullptr)
+ : Expected<T>(std::move(Other)) {}
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_MSVCERRORWORKAROUNDS_H
diff --git a/contrib/llvm/include/llvm/Support/Path.h b/contrib/llvm/include/llvm/Support/Path.h
index c4cc93721d7e..76de887b7cb4 100644
--- a/contrib/llvm/include/llvm/Support/Path.h
+++ b/contrib/llvm/include/llvm/Support/Path.h
@@ -361,22 +361,6 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result);
/// @result True if a home directory is set, false otherwise.
bool home_directory(SmallVectorImpl<char> &result);
-/// Get the user's cache directory.
-///
-/// Expect the resulting path to be a directory shared with other
-/// applications/services used by the user. Params \p Path1 to \p Path3 can be
-/// used to append additional directory names to the resulting path. Recommended
-/// pattern is <user_cache_directory>/<vendor>/<application>.
-///
-/// @param Result Holds the resulting path.
-/// @param Path1 Additional path to be appended to the user's cache directory
-/// path. "" can be used to append nothing.
-/// @param Path2 Second additional path to be appended.
-/// @param Path3 Third additional path to be appended.
-/// @result True if a cache directory path is set, false otherwise.
-bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
- const Twine &Path2 = "", const Twine &Path3 = "");
-
/// Has root name?
///
/// root_name != ""
diff --git a/contrib/llvm/include/llvm/Support/ScopedPrinter.h b/contrib/llvm/include/llvm/Support/ScopedPrinter.h
index 062439b4f7db..34c1a287ee10 100644
--- a/contrib/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/contrib/llvm/include/llvm/Support/ScopedPrinter.h
@@ -138,7 +138,7 @@ public:
}
}
- llvm::sort(SetFlags.begin(), SetFlags.end(), &flagName<TFlag>);
+ llvm::sort(SetFlags, &flagName<TFlag>);
startLine() << Label << " [ (" << hex(Value) << ")\n";
for (const auto &Flag : SetFlags) {
diff --git a/contrib/llvm/include/llvm/Support/SymbolRemappingReader.h b/contrib/llvm/include/llvm/Support/SymbolRemappingReader.h
new file mode 100644
index 000000000000..b457b9e817e4
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/SymbolRemappingReader.h
@@ -0,0 +1,133 @@
+//===- SymbolRemappingReader.h - Read symbol remapping file -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions needed for reading and applying symbol
+// remapping files.
+//
+// Support is provided only for the Itanium C++ name mangling scheme for now.
+//
+// NOTE: If you are making changes to this file format, please remember
+// to document them in the Clang documentation at
+// tools/clang/docs/UsersManual.rst.
+//
+// File format
+// -----------
+//
+// The symbol remappings are written as an ASCII text file. Blank lines and
+// lines starting with a # are ignored. All other lines specify a kind of
+// mangled name fragment, along with two fragments of that kind that should
+// be treated as equivalent, separated by spaces.
+//
+// See http://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling for a
+// description of the Itanium name mangling scheme.
+//
+// The accepted fragment kinds are:
+//
+// * name A <name>, such as 6foobar or St3__1
+// * type A <type>, such as Ss or N4llvm9StringRefE
+// * encoding An <encoding> (a complete mangling without the leading _Z)
+//
+// For example:
+//
+// # Ignore int / long differences to treat symbols from 32-bit and 64-bit
+// # builds with differing size_t / ptrdiff_t / intptr_t as equivalent.
+// type i l
+// type j m
+//
+// # Ignore differences between libc++ and libstdc++, and between libstdc++'s
+// # C++98 and C++11 ABIs.
+// name 3std St3__1
+// name 3std St7__cxx11
+//
+// # Remap a function overload to a specialization of a template (including
+// # any local symbols declared within it).
+// encoding N2NS1fEi N2NS1fIiEEvT_
+//
+// # Substitutions must be remapped separately from namespace 'std' for now.
+// name Sa NSt3__19allocatorE
+// name Sb NSt3__112basic_stringE
+// type Ss NSt3__112basic_stringIcSt11char_traitsIcESaE
+// # ...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
+#define LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ItaniumManglingCanonicalizer.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+
+class SymbolRemappingParseError : public ErrorInfo<SymbolRemappingParseError> {
+public:
+ SymbolRemappingParseError(StringRef File, int64_t Line, Twine Message)
+ : File(File), Line(Line), Message(Message.str()) {}
+
+ void log(llvm::raw_ostream &OS) const override {
+ OS << File << ':' << Line << ": " << Message;
+ }
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+
+ StringRef getFileName() const { return File; }
+ int64_t getLineNum() const { return Line; }
+ StringRef getMessage() const { return Message; }
+
+ static char ID;
+
+private:
+ std::string File;
+ int64_t Line;
+ std::string Message;
+};
+
+/// Reader for symbol remapping files.
+///
+/// Remaps the symbol names in profile data to match those in the program
+/// according to a set of rules specified in a given file.
+class SymbolRemappingReader {
+public:
+ /// Read remappings from the given buffer, which must live as long as
+ /// the remapper.
+ Error read(MemoryBuffer &B);
+
+ /// A Key represents an equivalence class of symbol names.
+ using Key = uintptr_t;
+
+ /// Construct a key for the given symbol, or return an existing one if an
+ /// equivalent name has already been inserted. The symbol name must live
+ /// as long as the remapper.
+ ///
+ /// The result will be Key() if the name cannot be remapped (typically
+ /// because it is not a valid mangled name).
+ Key insert(StringRef FunctionName) {
+ return Canonicalizer.canonicalize(FunctionName);
+ }
+
+ /// Map the given symbol name into the key for the corresponding equivalence
+ /// class.
+ ///
+ /// The result will typically be Key() if no equivalent symbol has been
+ /// inserted, but this is not guaranteed: a Key different from all keys ever
+ /// returned by \c insert may be returned instead.
+ Key lookup(StringRef FunctionName) {
+ return Canonicalizer.lookup(FunctionName);
+ }
+
+private:
+ ItaniumManglingCanonicalizer Canonicalizer;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
diff --git a/contrib/llvm/include/llvm/Support/TargetOpcodes.def b/contrib/llvm/include/llvm/Support/TargetOpcodes.def
index 63491a5f01d2..3e8193a5cdcf 100644
--- a/contrib/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/contrib/llvm/include/llvm/Support/TargetOpcodes.def
@@ -258,6 +258,17 @@ HANDLE_TARGET_OPCODE(G_INSERT)
/// larger register.
HANDLE_TARGET_OPCODE(G_MERGE_VALUES)
+/// Generic instruction to create a vector value from a number of scalar
+/// components.
+HANDLE_TARGET_OPCODE(G_BUILD_VECTOR)
+
+/// Generic instruction to create a vector value from a number of scalar
+/// components, which have types larger than the result vector elt type.
+HANDLE_TARGET_OPCODE(G_BUILD_VECTOR_TRUNC)
+
+/// Generic instruction to create a vector by concatenating multiple vectors.
+HANDLE_TARGET_OPCODE(G_CONCAT_VECTORS)
+
/// Generic pointer to int conversion.
HANDLE_TARGET_OPCODE(G_PTRTOINT)
@@ -268,6 +279,12 @@ HANDLE_TARGET_OPCODE(G_INTTOPTR)
/// COPY is the relevant instruction.
HANDLE_TARGET_OPCODE(G_BITCAST)
+/// INTRINSIC trunc intrinsic.
+HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC)
+
+/// INTRINSIC round intrinsic.
+HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUND)
+
/// Generic load (including anyext load)
HANDLE_TARGET_OPCODE(G_LOAD)
@@ -356,10 +373,18 @@ HANDLE_TARGET_OPCODE(G_FCMP)
/// Generic select.
HANDLE_TARGET_OPCODE(G_SELECT)
+/// Generic unsigned add instruction, consuming the normal operands and
+/// producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_UADDO)
+
/// Generic unsigned add instruction, consuming the normal operands plus a carry
/// flag, and similarly producing the result and a carry flag.
HANDLE_TARGET_OPCODE(G_UADDE)
+/// Generic unsigned sub instruction, consuming the normal operands and
+/// producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_USUBO)
+
/// Generic unsigned subtract instruction, consuming the normal operands plus a
/// carry flag, and similarly producing the result and a carry flag.
HANDLE_TARGET_OPCODE(G_USUBE)
@@ -368,10 +393,18 @@ HANDLE_TARGET_OPCODE(G_USUBE)
/// flag.
HANDLE_TARGET_OPCODE(G_SADDO)
+/// Generic signed add instruction, consuming the normal operands plus a carry
+/// flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_SADDE)
+
/// Generic signed subtract instruction, producing the result and a signed
/// overflow flag.
HANDLE_TARGET_OPCODE(G_SSUBO)
+/// Generic signed sub instruction, consuming the normal operands plus a carry
+/// flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_SSUBE)
+
/// Generic unsigned multiply instruction, producing the result and a signed
/// overflow flag.
HANDLE_TARGET_OPCODE(G_UMULO)
@@ -421,6 +454,9 @@ HANDLE_TARGET_OPCODE(G_FLOG)
/// Floating point base-2 logarithm of a value.
HANDLE_TARGET_OPCODE(G_FLOG2)
+/// Floating point base-10 logarithm of a value.
+HANDLE_TARGET_OPCODE(G_FLOG10)
+
/// Generic FP negation.
HANDLE_TARGET_OPCODE(G_FNEG)
@@ -464,9 +500,27 @@ HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT)
/// Generic shufflevector.
HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR)
+/// Generic count trailing zeroes.
+HANDLE_TARGET_OPCODE(G_CTTZ)
+
+/// Same as above, undefined for zero inputs.
+HANDLE_TARGET_OPCODE(G_CTTZ_ZERO_UNDEF)
+
+/// Generic count leading zeroes.
+HANDLE_TARGET_OPCODE(G_CTLZ)
+
+/// Same as above, undefined for zero inputs.
+HANDLE_TARGET_OPCODE(G_CTLZ_ZERO_UNDEF)
+
+/// Generic count bits.
+HANDLE_TARGET_OPCODE(G_CTPOP)
+
/// Generic byte swap.
HANDLE_TARGET_OPCODE(G_BSWAP)
+/// Floating point ceil.
+HANDLE_TARGET_OPCODE(G_FCEIL)
+
/// Generic AddressSpaceCast.
HANDLE_TARGET_OPCODE(G_ADDRSPACE_CAST)
diff --git a/contrib/llvm/include/llvm/Support/TargetParser.h b/contrib/llvm/include/llvm/Support/TargetParser.h
index 08ad42dda3eb..ace11ed410a3 100644
--- a/contrib/llvm/include/llvm/Support/TargetParser.h
+++ b/contrib/llvm/include/llvm/Support/TargetParser.h
@@ -18,211 +18,20 @@
// FIXME: vector is used because that's what clang uses for subtarget feature
// lists, but SmallVector would probably be better
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/ARMTargetParser.h"
+#include "llvm/Support/AArch64TargetParser.h"
#include <vector>
namespace llvm {
class StringRef;
-// Target specific information into their own namespaces. These should be
-// generated from TableGen because the information is already there, and there
-// is where new information about targets will be added.
+// Target specific information in their own namespaces.
+// (ARM/AArch64 are declared in ARM/AArch64TargetParser.h)
+// These should be generated from TableGen because the information is already
+// there, and there is where new information about targets will be added.
// FIXME: To TableGen this we need to make some table generated files available
// even if the back-end is not compiled with LLVM, plus we need to create a new
// back-end to TableGen to create these clean tables.
-namespace ARM {
-
-// FPU Version
-enum class FPUVersion {
- NONE,
- VFPV2,
- VFPV3,
- VFPV3_FP16,
- VFPV4,
- VFPV5
-};
-
-// An FPU name restricts the FPU in one of three ways:
-enum class FPURestriction {
- None = 0, ///< No restriction
- D16, ///< Only 16 D registers
- SP_D16 ///< Only single-precision instructions, with 16 D registers
-};
-
-// An FPU name implies one of three levels of Neon support:
-enum class NeonSupportLevel {
- None = 0, ///< No Neon
- Neon, ///< Neon
- Crypto ///< Neon with Crypto
-};
-
-// FPU names.
-enum FPUKind {
-#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) KIND,
-#include "ARMTargetParser.def"
- FK_LAST
-};
-
-// Arch names.
-enum class ArchKind {
-#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
-#include "ARMTargetParser.def"
-};
-
-// Arch extension modifiers for CPUs.
-enum ArchExtKind : unsigned {
- AEK_INVALID = 0,
- AEK_NONE = 1,
- AEK_CRC = 1 << 1,
- AEK_CRYPTO = 1 << 2,
- AEK_FP = 1 << 3,
- AEK_HWDIVTHUMB = 1 << 4,
- AEK_HWDIVARM = 1 << 5,
- AEK_MP = 1 << 6,
- AEK_SIMD = 1 << 7,
- AEK_SEC = 1 << 8,
- AEK_VIRT = 1 << 9,
- AEK_DSP = 1 << 10,
- AEK_FP16 = 1 << 11,
- AEK_RAS = 1 << 12,
- AEK_SVE = 1 << 13,
- AEK_DOTPROD = 1 << 14,
- AEK_SHA2 = 1 << 15,
- AEK_AES = 1 << 16,
- // Unsupported extensions.
- AEK_OS = 0x8000000,
- AEK_IWMMXT = 0x10000000,
- AEK_IWMMXT2 = 0x20000000,
- AEK_MAVERICK = 0x40000000,
- AEK_XSCALE = 0x80000000,
-};
-
-// ISA kinds.
-enum class ISAKind { INVALID = 0, ARM, THUMB, AARCH64 };
-
-// Endianness
-// FIXME: BE8 vs. BE32?
-enum class EndianKind { INVALID = 0, LITTLE, BIG };
-
-// v6/v7/v8 Profile
-enum class ProfileKind { INVALID = 0, A, R, M };
-
-StringRef getCanonicalArchName(StringRef Arch);
-
-// Information by ID
-StringRef getFPUName(unsigned FPUKind);
-FPUVersion getFPUVersion(unsigned FPUKind);
-NeonSupportLevel getFPUNeonSupportLevel(unsigned FPUKind);
-FPURestriction getFPURestriction(unsigned FPUKind);
-
-// FIXME: These should be moved to TargetTuple once it exists
-bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
-bool getHWDivFeatures(unsigned HWDivKind, std::vector<StringRef> &Features);
-bool getExtensionFeatures(unsigned Extensions,
- std::vector<StringRef> &Features);
-
-StringRef getArchName(ArchKind AK);
-unsigned getArchAttr(ArchKind AK);
-StringRef getCPUAttr(ArchKind AK);
-StringRef getSubArch(ArchKind AK);
-StringRef getArchExtName(unsigned ArchExtKind);
-StringRef getArchExtFeature(StringRef ArchExt);
-StringRef getHWDivName(unsigned HWDivKind);
-
-// Information by Name
-unsigned getDefaultFPU(StringRef CPU, ArchKind AK);
-unsigned getDefaultExtensions(StringRef CPU, ArchKind AK);
-StringRef getDefaultCPU(StringRef Arch);
-
-// Parser
-unsigned parseHWDiv(StringRef HWDiv);
-unsigned parseFPU(StringRef FPU);
-ArchKind parseArch(StringRef Arch);
-unsigned parseArchExt(StringRef ArchExt);
-ArchKind parseCPUArch(StringRef CPU);
-void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
-ISAKind parseArchISA(StringRef Arch);
-EndianKind parseArchEndian(StringRef Arch);
-ProfileKind parseArchProfile(StringRef Arch);
-unsigned parseArchVersion(StringRef Arch);
-
-StringRef computeDefaultTargetABI(const Triple &TT, StringRef CPU);
-
-} // namespace ARM
-
-// FIXME:This should be made into class design,to avoid dupplication.
-namespace AArch64 {
-
-// Arch names.
-enum class ArchKind {
-#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
-#include "AArch64TargetParser.def"
-};
-
-// Arch extension modifiers for CPUs.
-enum ArchExtKind : unsigned {
- AEK_INVALID = 0,
- AEK_NONE = 1,
- AEK_CRC = 1 << 1,
- AEK_CRYPTO = 1 << 2,
- AEK_FP = 1 << 3,
- AEK_SIMD = 1 << 4,
- AEK_FP16 = 1 << 5,
- AEK_PROFILE = 1 << 6,
- AEK_RAS = 1 << 7,
- AEK_LSE = 1 << 8,
- AEK_SVE = 1 << 9,
- AEK_DOTPROD = 1 << 10,
- AEK_RCPC = 1 << 11,
- AEK_RDM = 1 << 12,
- AEK_SM4 = 1 << 13,
- AEK_SHA3 = 1 << 14,
- AEK_SHA2 = 1 << 15,
- AEK_AES = 1 << 16,
-};
-
-StringRef getCanonicalArchName(StringRef Arch);
-
-// Information by ID
-StringRef getFPUName(unsigned FPUKind);
-ARM::FPUVersion getFPUVersion(unsigned FPUKind);
-ARM::NeonSupportLevel getFPUNeonSupportLevel(unsigned FPUKind);
-ARM::FPURestriction getFPURestriction(unsigned FPUKind);
-
-// FIXME: These should be moved to TargetTuple once it exists
-bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
-bool getExtensionFeatures(unsigned Extensions,
- std::vector<StringRef> &Features);
-bool getArchFeatures(ArchKind AK, std::vector<StringRef> &Features);
-
-StringRef getArchName(ArchKind AK);
-unsigned getArchAttr(ArchKind AK);
-StringRef getCPUAttr(ArchKind AK);
-StringRef getSubArch(ArchKind AK);
-StringRef getArchExtName(unsigned ArchExtKind);
-StringRef getArchExtFeature(StringRef ArchExt);
-unsigned checkArchVersion(StringRef Arch);
-
-// Information by Name
-unsigned getDefaultFPU(StringRef CPU, ArchKind AK);
-unsigned getDefaultExtensions(StringRef CPU, ArchKind AK);
-StringRef getDefaultCPU(StringRef Arch);
-AArch64::ArchKind getCPUArchKind(StringRef CPU);
-
-// Parser
-unsigned parseFPU(StringRef FPU);
-AArch64::ArchKind parseArch(StringRef Arch);
-ArchExtKind parseArchExt(StringRef ArchExt);
-ArchKind parseCPUArch(StringRef CPU);
-void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
-ARM::ISAKind parseArchISA(StringRef Arch);
-ARM::EndianKind parseArchEndian(StringRef Arch);
-ARM::ProfileKind parseArchProfile(StringRef Arch);
-unsigned parseArchVersion(StringRef Arch);
-
-bool isX18ReservedByDefault(const Triple &TT);
-
-} // namespace AArch64
-
namespace X86 {
// This should be kept in sync with libcc/compiler-rt as its included by clang
@@ -266,6 +75,96 @@ enum ProcessorFeatures {
} // namespace X86
+namespace AMDGPU {
+
+/// GPU kinds supported by the AMDGPU target.
+enum GPUKind : uint32_t {
+ // Not specified processor.
+ GK_NONE = 0,
+
+ // R600-based processors.
+ GK_R600 = 1,
+ GK_R630 = 2,
+ GK_RS880 = 3,
+ GK_RV670 = 4,
+ GK_RV710 = 5,
+ GK_RV730 = 6,
+ GK_RV770 = 7,
+ GK_CEDAR = 8,
+ GK_CYPRESS = 9,
+ GK_JUNIPER = 10,
+ GK_REDWOOD = 11,
+ GK_SUMO = 12,
+ GK_BARTS = 13,
+ GK_CAICOS = 14,
+ GK_CAYMAN = 15,
+ GK_TURKS = 16,
+
+ GK_R600_FIRST = GK_R600,
+ GK_R600_LAST = GK_TURKS,
+
+ // AMDGCN-based processors.
+ GK_GFX600 = 32,
+ GK_GFX601 = 33,
+
+ GK_GFX700 = 40,
+ GK_GFX701 = 41,
+ GK_GFX702 = 42,
+ GK_GFX703 = 43,
+ GK_GFX704 = 44,
+
+ GK_GFX801 = 50,
+ GK_GFX802 = 51,
+ GK_GFX803 = 52,
+ GK_GFX810 = 53,
+
+ GK_GFX900 = 60,
+ GK_GFX902 = 61,
+ GK_GFX904 = 62,
+ GK_GFX906 = 63,
+ GK_GFX909 = 65,
+
+ GK_AMDGCN_FIRST = GK_GFX600,
+ GK_AMDGCN_LAST = GK_GFX909,
+};
+
+/// Instruction set architecture version.
+struct IsaVersion {
+ unsigned Major;
+ unsigned Minor;
+ unsigned Stepping;
+};
+
+// This isn't comprehensive for now, just things that are needed from the
+// frontend driver.
+enum ArchFeatureKind : uint32_t {
+ FEATURE_NONE = 0,
+
+ // These features only exist for r600, and are implied true for amdgcn.
+ FEATURE_FMA = 1 << 1,
+ FEATURE_LDEXP = 1 << 2,
+ FEATURE_FP64 = 1 << 3,
+
+ // Common features.
+ FEATURE_FAST_FMA_F32 = 1 << 4,
+ FEATURE_FAST_DENORMAL_F32 = 1 << 5
+};
+
+StringRef getArchNameAMDGCN(GPUKind AK);
+StringRef getArchNameR600(GPUKind AK);
+StringRef getCanonicalArchName(StringRef Arch);
+GPUKind parseArchAMDGCN(StringRef CPU);
+GPUKind parseArchR600(StringRef CPU);
+unsigned getArchAttrAMDGCN(GPUKind AK);
+unsigned getArchAttrR600(GPUKind AK);
+
+void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
+void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
+
+IsaVersion getIsaVersion(StringRef GPU);
+
+} // namespace AMDGPU
+
} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/Support/Threading.h b/contrib/llvm/include/llvm/Support/Threading.h
index e8021f648b0d..ba7ece5e72ba 100644
--- a/contrib/llvm/include/llvm/Support/Threading.h
+++ b/contrib/llvm/include/llvm/Support/Threading.h
@@ -27,7 +27,8 @@
#define LLVM_THREADING_USE_STD_CALL_ONCE 1
#elif defined(LLVM_ON_UNIX) && \
(defined(_LIBCPP_VERSION) || \
- !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__ppc__)))
+ !(defined(__NetBSD__) || defined(__OpenBSD__) || \
+ (defined(__ppc__) || defined(__PPC__))))
// std::call_once from libc++ is used on all Unix platforms. Other
// implementations like libstdc++ are known to have problems on NetBSD,
// OpenBSD and PowerPC.
diff --git a/contrib/llvm/include/llvm/Support/Timer.h b/contrib/llvm/include/llvm/Support/Timer.h
index bfffbc3157b1..a11c3ce3ff22 100644
--- a/contrib/llvm/include/llvm/Support/Timer.h
+++ b/contrib/llvm/include/llvm/Support/Timer.h
@@ -206,15 +206,23 @@ public:
Description.assign(NewDescription.begin(), NewDescription.end());
}
- /// Print any started timers in this group and zero them.
+ /// Print any started timers in this group.
void print(raw_ostream &OS);
- /// This static method prints all timers and clears them all out.
+ /// Clear all timers in this group.
+ void clear();
+
+ /// This static method prints all timers.
static void printAll(raw_ostream &OS);
+ /// Clear out all timers. This is mostly used to disable automatic
+ /// printing on shutdown, when timers have already been printed explicitly
+ /// using \c printAll or \c printJSONValues.
+ static void clearAll();
+
const char *printJSONValues(raw_ostream &OS, const char *delim);
- /// Prints all timers as JSON key/value pairs, and clears them all out.
+ /// Prints all timers as JSON key/value pairs.
static const char *printAllJSONValues(raw_ostream &OS, const char *delim);
/// Ensure global timer group lists are initialized. This function is mostly
diff --git a/contrib/llvm/include/llvm/Support/VirtualFileSystem.h b/contrib/llvm/include/llvm/Support/VirtualFileSystem.h
new file mode 100644
index 000000000000..61c3d2f46e9c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -0,0 +1,764 @@
+//===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines the virtual file system interface vfs::FileSystem.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H
+#define LLVM_SUPPORT_VIRTUALFILESYSTEM_H
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include <cassert>
+#include <cstdint>
+#include <ctime>
+#include <memory>
+#include <stack>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class MemoryBuffer;
+
+namespace vfs {
+
+/// The result of a \p status operation.
+class Status {
+ std::string Name;
+ llvm::sys::fs::UniqueID UID;
+ llvm::sys::TimePoint<> MTime;
+ uint32_t User;
+ uint32_t Group;
+ uint64_t Size;
+ llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error;
+ llvm::sys::fs::perms Perms;
+
+public:
+ // FIXME: remove when files support multiple names
+ bool IsVFSMapped = false;
+
+ Status() = default;
+ Status(const llvm::sys::fs::file_status &Status);
+ Status(StringRef Name, llvm::sys::fs::UniqueID UID,
+ llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
+ uint64_t Size, llvm::sys::fs::file_type Type,
+ llvm::sys::fs::perms Perms);
+
+ /// Get a copy of a Status with a different name.
+ static Status copyWithNewName(const Status &In, StringRef NewName);
+ static Status copyWithNewName(const llvm::sys::fs::file_status &In,
+ StringRef NewName);
+
+ /// Returns the name that should be used for this file or directory.
+ StringRef getName() const { return Name; }
+
+ /// @name Status interface from llvm::sys::fs
+ /// @{
+ llvm::sys::fs::file_type getType() const { return Type; }
+ llvm::sys::fs::perms getPermissions() const { return Perms; }
+ llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; }
+ llvm::sys::fs::UniqueID getUniqueID() const { return UID; }
+ uint32_t getUser() const { return User; }
+ uint32_t getGroup() const { return Group; }
+ uint64_t getSize() const { return Size; }
+ /// @}
+ /// @name Status queries
+ /// These are static queries in llvm::sys::fs.
+ /// @{
+ bool equivalent(const Status &Other) const;
+ bool isDirectory() const;
+ bool isRegularFile() const;
+ bool isOther() const;
+ bool isSymlink() const;
+ bool isStatusKnown() const;
+ bool exists() const;
+ /// @}
+};
+
+/// Represents an open file.
+class File {
+public:
+ /// Destroy the file after closing it (if open).
+ /// Sub-classes should generally call close() inside their destructors. We
+ /// cannot do that from the base class, since close is virtual.
+ virtual ~File();
+
+ /// Get the status of the file.
+ virtual llvm::ErrorOr<Status> status() = 0;
+
+ /// Get the name of the file
+ virtual llvm::ErrorOr<std::string> getName() {
+ if (auto Status = status())
+ return Status->getName().str();
+ else
+ return Status.getError();
+ }
+
+ /// Get the contents of the file as a \p MemoryBuffer.
+ virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+ getBuffer(const Twine &Name, int64_t FileSize = -1,
+ bool RequiresNullTerminator = true, bool IsVolatile = false) = 0;
+
+ /// Closes the file.
+ virtual std::error_code close() = 0;
+};
+
+/// A member of a directory, yielded by a directory_iterator.
+/// Only information available on most platforms is included.
+class directory_entry {
+ std::string Path;
+ llvm::sys::fs::file_type Type;
+
+public:
+ directory_entry() = default;
+ directory_entry(std::string Path, llvm::sys::fs::file_type Type)
+ : Path(std::move(Path)), Type(Type) {}
+
+ llvm::StringRef path() const { return Path; }
+ llvm::sys::fs::file_type type() const { return Type; }
+};
+
+namespace detail {
+
+/// An interface for virtual file systems to provide an iterator over the
+/// (non-recursive) contents of a directory.
+struct DirIterImpl {
+ virtual ~DirIterImpl();
+
+ /// Sets \c CurrentEntry to the next entry in the directory on success,
+ /// to directory_entry() at end, or returns a system-defined \c error_code.
+ virtual std::error_code increment() = 0;
+
+ directory_entry CurrentEntry;
+};
+
+} // namespace detail
+
+/// An input iterator over the entries in a virtual path, similar to
+/// llvm::sys::fs::directory_iterator.
+class directory_iterator {
+ std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy
+
+public:
+ directory_iterator(std::shared_ptr<detail::DirIterImpl> I)
+ : Impl(std::move(I)) {
+ assert(Impl.get() != nullptr && "requires non-null implementation");
+ if (Impl->CurrentEntry.path().empty())
+ Impl.reset(); // Normalize the end iterator to Impl == nullptr.
+ }
+
+ /// Construct an 'end' iterator.
+ directory_iterator() = default;
+
+ /// Equivalent to operator++, with an error code.
+ directory_iterator &increment(std::error_code &EC) {
+ assert(Impl && "attempting to increment past end");
+ EC = Impl->increment();
+ if (Impl->CurrentEntry.path().empty())
+ Impl.reset(); // Normalize the end iterator to Impl == nullptr.
+ return *this;
+ }
+
+ const directory_entry &operator*() const { return Impl->CurrentEntry; }
+ const directory_entry *operator->() const { return &Impl->CurrentEntry; }
+
+ bool operator==(const directory_iterator &RHS) const {
+ if (Impl && RHS.Impl)
+ return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path();
+ return !Impl && !RHS.Impl;
+ }
+ bool operator!=(const directory_iterator &RHS) const {
+ return !(*this == RHS);
+ }
+};
+
+class FileSystem;
+
+namespace detail {
+
+/// Keeps state for the recursive_directory_iterator.
+struct RecDirIterState {
+ std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
+ bool HasNoPushRequest = false;
+};
+
+} // end namespace detail
+
+/// An input iterator over the recursive contents of a virtual path,
+/// similar to llvm::sys::fs::recursive_directory_iterator.
+class recursive_directory_iterator {
+ FileSystem *FS;
+ std::shared_ptr<detail::RecDirIterState>
+ State; // Input iterator semantics on copy.
+
+public:
+ recursive_directory_iterator(FileSystem &FS, const Twine &Path,
+ std::error_code &EC);
+
+ /// Construct an 'end' iterator.
+ recursive_directory_iterator() = default;
+
+ /// Equivalent to operator++, with an error code.
+ recursive_directory_iterator &increment(std::error_code &EC);
+
+ const directory_entry &operator*() const { return *State->Stack.top(); }
+ const directory_entry *operator->() const { return &*State->Stack.top(); }
+
+ bool operator==(const recursive_directory_iterator &Other) const {
+ return State == Other.State; // identity
+ }
+ bool operator!=(const recursive_directory_iterator &RHS) const {
+ return !(*this == RHS);
+ }
+
+ /// Gets the current level. Starting path is at level 0.
+ int level() const {
+ assert(!State->Stack.empty() &&
+ "Cannot get level without any iteration state");
+ return State->Stack.size() - 1;
+ }
+
+ void no_push() { State->HasNoPushRequest = true; }
+};
+
+/// The virtual file system interface.
+class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> {
+public:
+ virtual ~FileSystem();
+
+ /// Get the status of the entry at \p Path, if one exists.
+ virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0;
+
+ /// Get a \p File object for the file at \p Path, if one exists.
+ virtual llvm::ErrorOr<std::unique_ptr<File>>
+ openFileForRead(const Twine &Path) = 0;
+
+ /// This is a convenience method that opens a file, gets its content and then
+ /// closes the file.
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+ getBufferForFile(const Twine &Name, int64_t FileSize = -1,
+ bool RequiresNullTerminator = true, bool IsVolatile = false);
+
+ /// Get a directory_iterator for \p Dir.
+ /// \note The 'end' iterator is directory_iterator().
+ virtual directory_iterator dir_begin(const Twine &Dir,
+ std::error_code &EC) = 0;
+
+ /// Set the working directory. This will affect all following operations on
+ /// this file system and may propagate down for nested file systems.
+ virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0;
+
+ /// Get the working directory of this file system.
+ virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0;
+
+ /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve
+ /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`.
+ /// This returns errc::operation_not_permitted if not implemented by subclass.
+ virtual std::error_code getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const;
+
+ /// Check whether a file exists. Provided for convenience.
+ bool exists(const Twine &Path);
+
+ /// Is the file mounted on a local filesystem?
+ virtual std::error_code isLocal(const Twine &Path, bool &Result);
+
+ /// Make \a Path an absolute path.
+ ///
+ /// Makes \a Path absolute using the current directory if it is not already.
+ /// An empty \a Path will result in the current directory.
+ ///
+ /// /absolute/path => /absolute/path
+ /// relative/../path => <current-directory>/relative/../path
+ ///
+ /// \param Path A path that is modified to be an absolute path.
+ /// \returns success if \a path has been made absolute, otherwise a
+ /// platform-specific error_code.
+ std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
+};
+
+/// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
+/// the operating system.
+IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
+
+/// A file system that allows overlaying one \p AbstractFileSystem on top
+/// of another.
+///
+/// Consists of a stack of >=1 \p FileSystem objects, which are treated as being
+/// one merged file system. When there is a directory that exists in more than
+/// one file system, the \p OverlayFileSystem contains a directory containing
+/// the union of their contents. The attributes (permissions, etc.) of the
+/// top-most (most recently added) directory are used. When there is a file
+/// that exists in more than one file system, the file in the top-most file
+/// system overrides the other(s).
+class OverlayFileSystem : public FileSystem {
+ using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>;
+
+ /// The stack of file systems, implemented as a list in order of
+ /// their addition.
+ FileSystemList FSList;
+
+public:
+ OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base);
+
+ /// Pushes a file system on top of the stack.
+ void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS);
+
+ llvm::ErrorOr<Status> status(const Twine &Path) override;
+ llvm::ErrorOr<std::unique_ptr<File>>
+ openFileForRead(const Twine &Path) override;
+ directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+ llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
+ std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+ std::error_code isLocal(const Twine &Path, bool &Result) override;
+ std::error_code getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const override;
+
+ using iterator = FileSystemList::reverse_iterator;
+ using const_iterator = FileSystemList::const_reverse_iterator;
+
+ /// Get an iterator pointing to the most recently added file system.
+ iterator overlays_begin() { return FSList.rbegin(); }
+ const_iterator overlays_begin() const { return FSList.rbegin(); }
+
+ /// Get an iterator pointing one-past the least recently added file
+ /// system.
+ iterator overlays_end() { return FSList.rend(); }
+ const_iterator overlays_end() const { return FSList.rend(); }
+};
+
+/// By default, this delegates all calls to the underlying file system. This
+/// is useful when derived file systems want to override some calls and still
+/// proxy other calls.
+class ProxyFileSystem : public FileSystem {
+public:
+ explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
+ : FS(std::move(FS)) {}
+
+ llvm::ErrorOr<Status> status(const Twine &Path) override {
+ return FS->status(Path);
+ }
+ llvm::ErrorOr<std::unique_ptr<File>>
+ openFileForRead(const Twine &Path) override {
+ return FS->openFileForRead(Path);
+ }
+ directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
+ return FS->dir_begin(Dir, EC);
+ }
+ llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+ return FS->getCurrentWorkingDirectory();
+ }
+ std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
+ return FS->setCurrentWorkingDirectory(Path);
+ }
+ std::error_code getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const override {
+ return FS->getRealPath(Path, Output);
+ }
+ std::error_code isLocal(const Twine &Path, bool &Result) override {
+ return FS->isLocal(Path, Result);
+ }
+
+protected:
+ FileSystem &getUnderlyingFS() { return *FS; }
+
+private:
+ IntrusiveRefCntPtr<FileSystem> FS;
+
+ virtual void anchor();
+};
+
+namespace detail {
+
+class InMemoryDirectory;
+class InMemoryFile;
+
+} // namespace detail
+
+/// An in-memory file system.
+class InMemoryFileSystem : public FileSystem {
+ std::unique_ptr<detail::InMemoryDirectory> Root;
+ std::string WorkingDirectory;
+ bool UseNormalizedPaths = true;
+
+ /// If HardLinkTarget is non-null, a hardlink is created to the To path which
+ /// must be a file. If it is null then it adds the file as the public addFile.
+ bool addFile(const Twine &Path, time_t ModificationTime,
+ std::unique_ptr<llvm::MemoryBuffer> Buffer,
+ Optional<uint32_t> User, Optional<uint32_t> Group,
+ Optional<llvm::sys::fs::file_type> Type,
+ Optional<llvm::sys::fs::perms> Perms,
+ const detail::InMemoryFile *HardLinkTarget);
+
+public:
+ explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
+ ~InMemoryFileSystem() override;
+
+ /// Add a file containing a buffer or a directory to the VFS with a
+ /// path. The VFS owns the buffer. If present, User, Group, Type
+ /// and Perms apply to the newly-created file or directory.
+ /// \return true if the file or directory was successfully added,
+ /// false if the file or directory already exists in the file system with
+ /// different contents.
+ bool addFile(const Twine &Path, time_t ModificationTime,
+ std::unique_ptr<llvm::MemoryBuffer> Buffer,
+ Optional<uint32_t> User = None, Optional<uint32_t> Group = None,
+ Optional<llvm::sys::fs::file_type> Type = None,
+ Optional<llvm::sys::fs::perms> Perms = None);
+
+ /// Add a hard link to a file.
+ /// Here hard links are not intended to be fully equivalent to the classical
+ /// filesystem. Both the hard link and the file share the same buffer and
+ /// status (and thus have the same UniqueID). Because of this there is no way
+ /// to distinguish between the link and the file after the link has been
+ /// added.
+ ///
+ /// The To path must be an existing file or a hardlink. The From file must not
+ /// have been added before. The To Path must not be a directory. The From Node
+ /// is added as a hard link which points to the resolved file of To Node.
+ /// \return true if the above condition is satisfied and hardlink was
+ /// successfully created, false otherwise.
+ bool addHardLink(const Twine &From, const Twine &To);
+
+ /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
+ /// If present, User, Group, Type and Perms apply to the newly-created file
+ /// or directory.
+ /// \return true if the file or directory was successfully added,
+ /// false if the file or directory already exists in the file system with
+ /// different contents.
+ bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
+ llvm::MemoryBuffer *Buffer, Optional<uint32_t> User = None,
+ Optional<uint32_t> Group = None,
+ Optional<llvm::sys::fs::file_type> Type = None,
+ Optional<llvm::sys::fs::perms> Perms = None);
+
+ std::string toString() const;
+
+ /// Return true if this file system normalizes . and .. in paths.
+ bool useNormalizedPaths() const { return UseNormalizedPaths; }
+
+ llvm::ErrorOr<Status> status(const Twine &Path) override;
+ llvm::ErrorOr<std::unique_ptr<File>>
+ openFileForRead(const Twine &Path) override;
+ directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+
+ llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+ return WorkingDirectory;
+ }
+ /// Canonicalizes \p Path by combining with the current working
+ /// directory and normalizing the path (e.g. remove dots). If the current
+ /// working directory is not set, this returns errc::operation_not_permitted.
+ ///
+ /// This doesn't resolve symlinks as they are not supported in in-memory file
+ /// system.
+ std::error_code getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const override;
+ std::error_code isLocal(const Twine &Path, bool &Result) override;
+ std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+};
+
+/// Get a globally unique ID for a virtual file or directory.
+llvm::sys::fs::UniqueID getNextVirtualUniqueID();
+
+/// Gets a \p FileSystem for a virtual file system described in YAML
+/// format.
+IntrusiveRefCntPtr<FileSystem>
+getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer,
+ llvm::SourceMgr::DiagHandlerTy DiagHandler,
+ StringRef YAMLFilePath, void *DiagContext = nullptr,
+ IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
+
+struct YAMLVFSEntry {
+ template <typename T1, typename T2>
+ YAMLVFSEntry(T1 &&VPath, T2 &&RPath)
+ : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)) {}
+ std::string VPath;
+ std::string RPath;
+};
+
+class VFSFromYamlDirIterImpl;
+class RedirectingFileSystemParser;
+
+/// A virtual file system parsed from a YAML file.
+///
+/// Currently, this class allows creating virtual directories and mapping
+/// virtual file paths to existing external files, available in \c ExternalFS.
+///
+/// The basic structure of the parsed file is:
+/// \verbatim
+/// {
+/// 'version': <version number>,
+/// <optional configuration>
+/// 'roots': [
+/// <directory entries>
+/// ]
+/// }
+/// \endverbatim
+///
+/// All configuration options are optional.
+/// 'case-sensitive': <boolean, default=true>
+/// 'use-external-names': <boolean, default=true>
+/// 'overlay-relative': <boolean, default=false>
+/// 'fallthrough': <boolean, default=true>
+///
+/// Virtual directories are represented as
+/// \verbatim
+/// {
+/// 'type': 'directory',
+/// 'name': <string>,
+/// 'contents': [ <file or directory entries> ]
+/// }
+/// \endverbatim
+///
+/// The default attributes for virtual directories are:
+/// \verbatim
+/// MTime = now() when created
+/// Perms = 0777
+/// User = Group = 0
+/// Size = 0
+/// UniqueID = unspecified unique value
+/// \endverbatim
+///
+/// Re-mapped files are represented as
+/// \verbatim
+/// {
+/// 'type': 'file',
+/// 'name': <string>,
+/// 'use-external-name': <boolean> # Optional
+/// 'external-contents': <path to external file>
+/// }
+/// \endverbatim
+///
+/// and inherit their attributes from the external contents.
+///
+/// In both cases, the 'name' field may contain multiple path components (e.g.
+/// /path/to/file). However, any directory that contains more than one child
+/// must be uniquely represented by a directory entry.
+class RedirectingFileSystem : public vfs::FileSystem {
+public:
+ enum EntryKind { EK_Directory, EK_File };
+
+ /// A single file or directory in the VFS.
+ class Entry {
+ EntryKind Kind;
+ std::string Name;
+
+ public:
+ Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {}
+ virtual ~Entry() = default;
+
+ StringRef getName() const { return Name; }
+ EntryKind getKind() const { return Kind; }
+ };
+
+ class RedirectingDirectoryEntry : public Entry {
+ std::vector<std::unique_ptr<Entry>> Contents;
+ Status S;
+
+ public:
+ RedirectingDirectoryEntry(StringRef Name,
+ std::vector<std::unique_ptr<Entry>> Contents,
+ Status S)
+ : Entry(EK_Directory, Name), Contents(std::move(Contents)),
+ S(std::move(S)) {}
+ RedirectingDirectoryEntry(StringRef Name, Status S)
+ : Entry(EK_Directory, Name), S(std::move(S)) {}
+
+ Status getStatus() { return S; }
+
+ void addContent(std::unique_ptr<Entry> Content) {
+ Contents.push_back(std::move(Content));
+ }
+
+ Entry *getLastContent() const { return Contents.back().get(); }
+
+ using iterator = decltype(Contents)::iterator;
+
+ iterator contents_begin() { return Contents.begin(); }
+ iterator contents_end() { return Contents.end(); }
+
+ static bool classof(const Entry *E) { return E->getKind() == EK_Directory; }
+ };
+
+ class RedirectingFileEntry : public Entry {
+ public:
+ enum NameKind { NK_NotSet, NK_External, NK_Virtual };
+
+ private:
+ std::string ExternalContentsPath;
+ NameKind UseName;
+
+ public:
+ RedirectingFileEntry(StringRef Name, StringRef ExternalContentsPath,
+ NameKind UseName)
+ : Entry(EK_File, Name), ExternalContentsPath(ExternalContentsPath),
+ UseName(UseName) {}
+
+ StringRef getExternalContentsPath() const { return ExternalContentsPath; }
+
+ /// whether to use the external path as the name for this file.
+ bool useExternalName(bool GlobalUseExternalName) const {
+ return UseName == NK_NotSet ? GlobalUseExternalName
+ : (UseName == NK_External);
+ }
+
+ NameKind getUseName() const { return UseName; }
+
+ static bool classof(const Entry *E) { return E->getKind() == EK_File; }
+ };
+
+private:
+ friend class VFSFromYamlDirIterImpl;
+ friend class RedirectingFileSystemParser;
+
+ /// The root(s) of the virtual file system.
+ std::vector<std::unique_ptr<Entry>> Roots;
+
+ /// The file system to use for external references.
+ IntrusiveRefCntPtr<FileSystem> ExternalFS;
+
+ /// If IsRelativeOverlay is set, this represents the directory
+ /// path that should be prefixed to each 'external-contents' entry
+ /// when reading from YAML files.
+ std::string ExternalContentsPrefixDir;
+
+ /// @name Configuration
+ /// @{
+
+ /// Whether to perform case-sensitive comparisons.
+ ///
+ /// Currently, case-insensitive matching only works correctly with ASCII.
+ bool CaseSensitive = true;
+
+ /// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must
+ /// be prefixed in every 'external-contents' when reading from YAML files.
+ bool IsRelativeOverlay = false;
+
+ /// Whether to use to use the value of 'external-contents' for the
+ /// names of files. This global value is overridable on a per-file basis.
+ bool UseExternalNames = true;
+
+ /// Whether to attempt a file lookup in external file system after it wasn't
+ /// found in VFS.
+ bool IsFallthrough = true;
+ /// @}
+
+ /// Virtual file paths and external files could be canonicalized without "..",
+ /// "." and "./" in their paths. FIXME: some unittests currently fail on
+ /// win32 when using remove_dots and remove_leading_dotslash on paths.
+ bool UseCanonicalizedPaths =
+#ifdef _WIN32
+ false;
+#else
+ true;
+#endif
+
+ RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS)
+ : ExternalFS(std::move(ExternalFS)) {}
+
+ /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly
+ /// recursing into the contents of \p From if it is a directory.
+ ErrorOr<Entry *> lookupPath(llvm::sys::path::const_iterator Start,
+ llvm::sys::path::const_iterator End,
+ Entry *From) const;
+
+ /// Get the status of a given an \c Entry.
+ ErrorOr<Status> status(const Twine &Path, Entry *E);
+
+public:
+ /// Looks up \p Path in \c Roots.
+ ErrorOr<Entry *> lookupPath(const Twine &Path) const;
+
+ /// Parses \p Buffer, which is expected to be in YAML format and
+ /// returns a virtual file system representing its contents.
+ static RedirectingFileSystem *
+ create(std::unique_ptr<MemoryBuffer> Buffer,
+ SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
+ void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
+
+ ErrorOr<Status> status(const Twine &Path) override;
+ ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
+
+ std::error_code getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const override;
+
+ llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
+
+ std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+
+ std::error_code isLocal(const Twine &Path, bool &Result) override;
+
+ directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+
+ void setExternalContentsPrefixDir(StringRef PrefixDir);
+
+ StringRef getExternalContentsPrefixDir() const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const;
+ LLVM_DUMP_METHOD void dumpEntry(Entry *E, int NumSpaces = 0) const;
+#endif
+};
+
+/// Collect all pairs of <virtual path, real path> entries from the
+/// \p YAMLFilePath. This is used by the module dependency collector to forward
+/// the entries into the reproducer output VFS YAML file.
+void collectVFSFromYAML(
+ std::unique_ptr<llvm::MemoryBuffer> Buffer,
+ llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
+ SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
+ void *DiagContext = nullptr,
+ IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
+
+class YAMLVFSWriter {
+ std::vector<YAMLVFSEntry> Mappings;
+ Optional<bool> IsCaseSensitive;
+ Optional<bool> IsOverlayRelative;
+ Optional<bool> UseExternalNames;
+ std::string OverlayDir;
+
+public:
+ YAMLVFSWriter() = default;
+
+ void addFileMapping(StringRef VirtualPath, StringRef RealPath);
+
+ void setCaseSensitivity(bool CaseSensitive) {
+ IsCaseSensitive = CaseSensitive;
+ }
+
+ void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; }
+
+ void setOverlayDir(StringRef OverlayDirectory) {
+ IsOverlayRelative = true;
+ OverlayDir.assign(OverlayDirectory.str());
+ }
+
+ const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; }
+
+ void write(llvm::raw_ostream &OS);
+};
+
+} // namespace vfs
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H
diff --git a/contrib/llvm/include/llvm/Support/Win64EH.h b/contrib/llvm/include/llvm/Support/Win64EH.h
index 928eb906de0c..e27bf1b3a1a5 100644
--- a/contrib/llvm/include/llvm/Support/Win64EH.h
+++ b/contrib/llvm/include/llvm/Support/Win64EH.h
@@ -33,7 +33,24 @@ enum UnwindOpcodes {
UOP_SaveNonVolBig,
UOP_SaveXMM128 = 8,
UOP_SaveXMM128Big,
- UOP_PushMachFrame
+ UOP_PushMachFrame,
+ // The following set of unwind opcodes is for ARM64. They are documented at
+ // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+ UOP_AllocMedium,
+ UOP_SaveFPLRX,
+ UOP_SaveFPLR,
+ UOP_SaveReg,
+ UOP_SaveRegX,
+ UOP_SaveRegP,
+ UOP_SaveRegPX,
+ UOP_SaveFReg,
+ UOP_SaveFRegX,
+ UOP_SaveFRegP,
+ UOP_SaveFRegPX,
+ UOP_SetFP,
+ UOP_AddFP,
+ UOP_Nop,
+ UOP_End
};
/// UnwindCode - This union describes a single operation in a function prolog,
diff --git a/contrib/llvm/include/llvm/Support/WithColor.h b/contrib/llvm/include/llvm/Support/WithColor.h
index 85fc5fa0cf14..76842d1c3dc8 100644
--- a/contrib/llvm/include/llvm/Support/WithColor.h
+++ b/contrib/llvm/include/llvm/Support/WithColor.h
@@ -29,23 +29,49 @@ enum class HighlightColor {
Macro,
Error,
Warning,
- Note
+ Note,
+ Remark
};
/// An RAII object that temporarily switches an output stream to a specific
/// color.
class WithColor {
raw_ostream &OS;
- /// Determine whether colors should be displayed.
- bool colorsEnabled(raw_ostream &OS);
+ bool DisableColors;
public:
/// To be used like this: WithColor(OS, HighlightColor::String) << "text";
- WithColor(raw_ostream &OS, HighlightColor S);
+ /// @param OS The output stream
+ /// @param S Symbolic name for syntax element to color
+ /// @param DisableColors Whether to ignore color changes regardless of -color
+ /// and support in OS
+ WithColor(raw_ostream &OS, HighlightColor S, bool DisableColors = false);
+ /// To be used like this: WithColor(OS, raw_ostream::Black) << "text";
+ /// @param OS The output stream
+ /// @param Color ANSI color to use, the special SAVEDCOLOR can be used to
+ /// change only the bold attribute, and keep colors untouched
+ /// @param Bold Bold/brighter text, default false
+ /// @param BG If true, change the background, default: change foreground
+ /// @param DisableColors Whether to ignore color changes regardless of -color
+ /// and support in OS
+ WithColor(raw_ostream &OS,
+ raw_ostream::Colors Color = raw_ostream::SAVEDCOLOR,
+ bool Bold = false, bool BG = false, bool DisableColors = false)
+ : OS(OS), DisableColors(DisableColors) {
+ changeColor(Color, Bold, BG);
+ }
~WithColor();
raw_ostream &get() { return OS; }
operator raw_ostream &() { return OS; }
+ template <typename T> WithColor &operator<<(T &O) {
+ OS << O;
+ return *this;
+ }
+ template <typename T> WithColor &operator<<(const T &O) {
+ OS << O;
+ return *this;
+ }
/// Convenience method for printing "error: " to stderr.
static raw_ostream &error();
@@ -53,13 +79,36 @@ public:
static raw_ostream &warning();
/// Convenience method for printing "note: " to stderr.
static raw_ostream &note();
+ /// Convenience method for printing "remark: " to stderr.
+ static raw_ostream &remark();
/// Convenience method for printing "error: " to the given stream.
- static raw_ostream &error(raw_ostream &OS, StringRef Prefix = "");
+ static raw_ostream &error(raw_ostream &OS, StringRef Prefix = "",
+ bool DisableColors = false);
/// Convenience method for printing "warning: " to the given stream.
- static raw_ostream &warning(raw_ostream &OS, StringRef Prefix = "");
+ static raw_ostream &warning(raw_ostream &OS, StringRef Prefix = "",
+ bool DisableColors = false);
/// Convenience method for printing "note: " to the given stream.
- static raw_ostream &note(raw_ostream &OS, StringRef Prefix = "");
+ static raw_ostream &note(raw_ostream &OS, StringRef Prefix = "",
+ bool DisableColors = false);
+ /// Convenience method for printing "remark: " to the given stream.
+ static raw_ostream &remark(raw_ostream &OS, StringRef Prefix = "",
+ bool DisableColors = false);
+
+ /// Determine whether colors are displayed.
+ bool colorsEnabled();
+
+ /// Change the color of text that will be output from this point forward.
+ /// @param Color ANSI color to use, the special SAVEDCOLOR can be used to
+ /// change only the bold attribute, and keep colors untouched
+ /// @param Bold Bold/brighter text, default false
+ /// @param BG If true, change the background, default: change foreground
+ WithColor &changeColor(raw_ostream::Colors Color, bool Bold = false,
+ bool BG = false);
+
+ /// Reset the colors to terminal defaults. Call this when you are done
+ /// outputting colored text, or before program exit.
+ WithColor &resetColor();
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 185b357efef5..466dd309909a 100644
--- a/contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -414,7 +414,7 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_R16, "2-byte") \
ENUM_ENTRY(TYPE_R32, "4-byte") \
ENUM_ENTRY(TYPE_R64, "8-byte") \
- ENUM_ENTRY(TYPE_IMM, "immediate operand") \
+ ENUM_ENTRY(TYPE_IMM, "immediate operand") \
ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \
ENUM_ENTRY(TYPE_AVX512ICC, "1-byte immediate operand for AVX512 icmp") \
diff --git a/contrib/llvm/include/llvm/Support/X86TargetParser.def b/contrib/llvm/include/llvm/Support/X86TargetParser.def
index e4af0657a350..e9bede545d3f 100644
--- a/contrib/llvm/include/llvm/Support/X86TargetParser.def
+++ b/contrib/llvm/include/llvm/Support/X86TargetParser.def
@@ -34,17 +34,20 @@ X86_VENDOR(VENDOR_AMD, "amd")
#ifndef X86_CPU_TYPE
#define X86_CPU_TYPE(ARCHNAME, ENUM)
#endif
-X86_CPU_TYPE_COMPAT_WITH_ALIAS("bonnell", INTEL_BONNELL, "bonnell", "atom")
-X86_CPU_TYPE_COMPAT ("core2", INTEL_CORE2, "core2")
-X86_CPU_TYPE_COMPAT ("nehalem", INTEL_COREI7, "corei7")
-X86_CPU_TYPE_COMPAT_WITH_ALIAS("amdfam10", AMDFAM10H, "amdfam10h", "amdfam10")
-X86_CPU_TYPE_COMPAT_WITH_ALIAS("bdver1", AMDFAM15H, "amdfam15h", "amdfam15")
-X86_CPU_TYPE_COMPAT_WITH_ALIAS("silvermont", INTEL_SILVERMONT, "silvermont", "slm")
-X86_CPU_TYPE_COMPAT ("knl", INTEL_KNL, "knl")
-X86_CPU_TYPE_COMPAT ("btver1", AMD_BTVER1, "btver1")
-X86_CPU_TYPE_COMPAT ("btver2", AMD_BTVER2, "btver2")
-X86_CPU_TYPE_COMPAT ("znver1", AMDFAM17H, "amdfam17h")
-X86_CPU_TYPE_COMPAT ("knm", INTEL_KNM, "knm")
+X86_CPU_TYPE_COMPAT_WITH_ALIAS("bonnell", INTEL_BONNELL, "bonnell", "atom")
+X86_CPU_TYPE_COMPAT ("core2", INTEL_CORE2, "core2")
+X86_CPU_TYPE_COMPAT ("nehalem", INTEL_COREI7, "corei7")
+X86_CPU_TYPE_COMPAT_WITH_ALIAS("amdfam10", AMDFAM10H, "amdfam10h", "amdfam10")
+X86_CPU_TYPE_COMPAT_WITH_ALIAS("bdver1", AMDFAM15H, "amdfam15h", "amdfam15")
+X86_CPU_TYPE_COMPAT_WITH_ALIAS("silvermont", INTEL_SILVERMONT, "silvermont", "slm")
+X86_CPU_TYPE_COMPAT ("knl", INTEL_KNL, "knl")
+X86_CPU_TYPE_COMPAT ("btver1", AMD_BTVER1, "btver1")
+X86_CPU_TYPE_COMPAT ("btver2", AMD_BTVER2, "btver2")
+X86_CPU_TYPE_COMPAT ("znver1", AMDFAM17H, "amdfam17h")
+X86_CPU_TYPE_COMPAT ("knm", INTEL_KNM, "knm")
+X86_CPU_TYPE_COMPAT ("goldmont", INTEL_GOLDMONT, "goldmont")
+X86_CPU_TYPE_COMPAT ("goldmont-plus", INTEL_GOLDMONT_PLUS, "goldmont-plus")
+X86_CPU_TYPE_COMPAT ("tremont", INTEL_TREMONT, "tremont")
// Entries below this are not in libgcc/compiler-rt.
X86_CPU_TYPE ("i386", INTEL_i386)
X86_CPU_TYPE ("i486", INTEL_i486)
@@ -64,9 +67,6 @@ X86_CPU_TYPE ("athlon", AMD_ATHLON)
X86_CPU_TYPE ("athlon-xp", AMD_ATHLON_XP)
X86_CPU_TYPE ("k8", AMD_K8)
X86_CPU_TYPE ("k8-sse3", AMD_K8SSE3)
-X86_CPU_TYPE ("goldmont", INTEL_GOLDMONT)
-X86_CPU_TYPE ("goldmont-plus", INTEL_GOLDMONT_PLUS)
-X86_CPU_TYPE ("tremont", INTEL_TREMONT)
#undef X86_CPU_TYPE_COMPAT_WITH_ALIAS
#undef X86_CPU_TYPE_COMPAT
#undef X86_CPU_TYPE
@@ -97,9 +97,12 @@ X86_CPU_SUBTYPE_COMPAT("broadwell", INTEL_COREI7_BROADWELL, "broadwell
X86_CPU_SUBTYPE_COMPAT("skylake", INTEL_COREI7_SKYLAKE, "skylake")
X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-avx512")
X86_CPU_SUBTYPE_COMPAT("cannonlake", INTEL_COREI7_CANNONLAKE, "cannonlake")
+X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
+X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
// Entries below this are not in libgcc/compiler-rt.
X86_CPU_SUBTYPE ("core2", INTEL_CORE2_65)
X86_CPU_SUBTYPE ("penryn", INTEL_CORE2_45)
+X86_CPU_SUBTYPE ("cascadelake", INTEL_COREI7_CASCADELAKE)
X86_CPU_SUBTYPE ("k6", AMDPENTIUM_K6)
X86_CPU_SUBTYPE ("k6-2", AMDPENTIUM_K62)
X86_CPU_SUBTYPE ("k6-3", AMDPENTIUM_K63)
@@ -147,11 +150,16 @@ X86_FEATURE_COMPAT(27, FEATURE_AVX512IFMA, "avx512ifma")
X86_FEATURE_COMPAT(28, FEATURE_AVX5124VNNIW, "avx5124vnniw")
X86_FEATURE_COMPAT(29, FEATURE_AVX5124FMAPS, "avx5124fmaps")
X86_FEATURE_COMPAT(30, FEATURE_AVX512VPOPCNTDQ, "avx512vpopcntdq")
+X86_FEATURE_COMPAT(31, FEATURE_AVX512VBMI2, "avx512vbmi2")
+X86_FEATURE_COMPAT(32, FEATURE_GFNI, "gfni")
+X86_FEATURE_COMPAT(33, FEATURE_VPCLMULQDQ, "vpclmulqdq")
+X86_FEATURE_COMPAT(34, FEATURE_AVX512VNNI, "avx512vnni")
+X86_FEATURE_COMPAT(35, FEATURE_AVX512BITALG, "avx512bitalg")
// Features below here are not in libgcc/compiler-rt.
-X86_FEATURE (32, FEATURE_MOVBE)
-X86_FEATURE (33, FEATURE_ADX)
-X86_FEATURE (34, FEATURE_EM64T)
-X86_FEATURE (35, FEATURE_CLFLUSHOPT)
-X86_FEATURE (36, FEATURE_SHA)
+X86_FEATURE (64, FEATURE_MOVBE)
+X86_FEATURE (65, FEATURE_ADX)
+X86_FEATURE (66, FEATURE_EM64T)
+X86_FEATURE (67, FEATURE_CLFLUSHOPT)
+X86_FEATURE (68, FEATURE_SHA)
#undef X86_FEATURE_COMPAT
#undef X86_FEATURE
diff --git a/contrib/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm/include/llvm/Support/YAMLTraits.h
index 4b8c4e958288..3d790e96fff7 100644
--- a/contrib/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h
@@ -27,6 +27,7 @@
#include <cctype>
#include <cstddef>
#include <cstdint>
+#include <iterator>
#include <map>
#include <memory>
#include <new>
@@ -38,6 +39,12 @@
namespace llvm {
namespace yaml {
+enum class NodeKind : uint8_t {
+ Scalar,
+ Map,
+ Sequence,
+};
+
struct EmptyContext {};
/// This class should be specialized by any type that needs to be converted
@@ -144,14 +151,14 @@ struct ScalarTraits {
// Must provide:
//
// Function to write the value as a string:
- //static void output(const T &value, void *ctxt, llvm::raw_ostream &out);
+ // static void output(const T &value, void *ctxt, llvm::raw_ostream &out);
//
// Function to convert a string to a value. Returns the empty
// StringRef on success or an error string if string is malformed:
- //static StringRef input(StringRef scalar, void *ctxt, T &value);
+ // static StringRef input(StringRef scalar, void *ctxt, T &value);
//
// Function to determine if the value should be quoted.
- //static QuotingType mustQuote(StringRef);
+ // static QuotingType mustQuote(StringRef);
};
/// This class should be specialized by type that requires custom conversion
@@ -162,7 +169,7 @@ struct ScalarTraits {
/// static void output(const MyType &Value, void*, llvm::raw_ostream &Out)
/// {
/// // stream out custom formatting
-/// Out << Val;
+/// Out << Value;
/// }
/// static StringRef input(StringRef Scalar, void*, MyType &Value) {
/// // parse scalar and set `value`
@@ -180,6 +187,47 @@ struct BlockScalarTraits {
// Function to convert a string to a value. Returns the empty
// StringRef on success or an error string if string is malformed:
// static StringRef input(StringRef Scalar, void *ctxt, T &Value);
+ //
+ // Optional:
+ // static StringRef inputTag(T &Val, std::string Tag)
+ // static void outputTag(const T &Val, raw_ostream &Out)
+};
+
+/// This class should be specialized by type that requires custom conversion
+/// to/from a YAML scalar with optional tags. For example:
+///
+/// template <>
+/// struct TaggedScalarTraits<MyType> {
+/// static void output(const MyType &Value, void*, llvm::raw_ostream
+/// &ScalarOut, llvm::raw_ostream &TagOut)
+/// {
+/// // stream out custom formatting including optional Tag
+/// Out << Value;
+/// }
+/// static StringRef input(StringRef Scalar, StringRef Tag, void*, MyType
+/// &Value) {
+/// // parse scalar and set `value`
+/// // return empty string on success, or error string
+/// return StringRef();
+/// }
+/// static QuotingType mustQuote(const MyType &Value, StringRef) {
+/// return QuotingType::Single;
+/// }
+/// };
+template <typename T> struct TaggedScalarTraits {
+ // Must provide:
+ //
+ // Function to write the value and tag as strings:
+ // static void output(const T &Value, void *ctx, llvm::raw_ostream &ScalarOut,
+ // llvm::raw_ostream &TagOut);
+ //
+ // Function to convert a string to a value. Returns the empty
+ // StringRef on success or an error string if string is malformed:
+ // static StringRef input(StringRef Scalar, StringRef Tag, void *ctxt, T
+ // &Value);
+ //
+ // Function to determine if the value should be quoted.
+ // static QuotingType mustQuote(const T &Value, StringRef Scalar);
};
/// This class should be specialized by any type that needs to be converted
@@ -233,6 +281,31 @@ struct CustomMappingTraits {
// static void output(IO &io, T &elem);
};
+/// This class should be specialized by any type that can be represented as
+/// a scalar, map, or sequence, decided dynamically. For example:
+///
+/// typedef std::unique_ptr<MyBase> MyPoly;
+///
+/// template<>
+/// struct PolymorphicTraits<MyPoly> {
+/// static NodeKind getKind(const MyPoly &poly) {
+/// return poly->getKind();
+/// }
+/// static MyScalar& getAsScalar(MyPoly &poly) {
+/// if (!poly || !isa<MyScalar>(poly))
+/// poly.reset(new MyScalar());
+/// return *cast<MyScalar>(poly.get());
+/// }
+/// // ...
+/// };
+template <typename T> struct PolymorphicTraits {
+ // Must provide:
+ // static NodeKind getKind(const T &poly);
+ // static scalar_type &getAsScalar(T &poly);
+ // static map_type &getAsMap(T &poly);
+ // static sequence_type &getAsSequence(T &poly);
+};
+
// Only used for better diagnostics of missing traits
template <typename T>
struct MissingTrait;
@@ -249,7 +322,6 @@ struct has_ScalarEnumerationTraits
template <typename U>
static double test(...);
-public:
static bool const value =
(sizeof(test<ScalarEnumerationTraits<T>>(nullptr)) == 1);
};
@@ -266,7 +338,6 @@ struct has_ScalarBitSetTraits
template <typename U>
static double test(...);
-public:
static bool const value = (sizeof(test<ScalarBitSetTraits<T>>(nullptr)) == 1);
};
@@ -286,7 +357,6 @@ struct has_ScalarTraits
template <typename U>
static double test(...);
-public:
static bool const value =
(sizeof(test<ScalarTraits<T>>(nullptr, nullptr, nullptr)) == 1);
};
@@ -305,11 +375,28 @@ struct has_BlockScalarTraits
template <typename U>
static double test(...);
-public:
static bool const value =
(sizeof(test<BlockScalarTraits<T>>(nullptr, nullptr)) == 1);
};
+// Test if TaggedScalarTraits<T> is defined on type T.
+template <class T> struct has_TaggedScalarTraits {
+ using Signature_input = StringRef (*)(StringRef, StringRef, void *, T &);
+ using Signature_output = void (*)(const T &, void *, raw_ostream &,
+ raw_ostream &);
+ using Signature_mustQuote = QuotingType (*)(const T &, StringRef);
+
+ template <typename U>
+ static char test(SameType<Signature_input, &U::input> *,
+ SameType<Signature_output, &U::output> *,
+ SameType<Signature_mustQuote, &U::mustQuote> *);
+
+ template <typename U> static double test(...);
+
+ static bool const value =
+ (sizeof(test<TaggedScalarTraits<T>>(nullptr, nullptr, nullptr)) == 1);
+};
+
// Test if MappingContextTraits<T> is defined on type T.
template <class T, class Context> struct has_MappingTraits {
using Signature_mapping = void (*)(class IO &, T &, Context &);
@@ -320,7 +407,6 @@ template <class T, class Context> struct has_MappingTraits {
template <typename U>
static double test(...);
-public:
static bool const value =
(sizeof(test<MappingContextTraits<T, Context>>(nullptr)) == 1);
};
@@ -334,7 +420,6 @@ template <class T> struct has_MappingTraits<T, EmptyContext> {
template <typename U> static double test(...);
-public:
static bool const value = (sizeof(test<MappingTraits<T>>(nullptr)) == 1);
};
@@ -348,7 +433,6 @@ template <class T, class Context> struct has_MappingValidateTraits {
template <typename U>
static double test(...);
-public:
static bool const value =
(sizeof(test<MappingContextTraits<T, Context>>(nullptr)) == 1);
};
@@ -362,7 +446,6 @@ template <class T> struct has_MappingValidateTraits<T, EmptyContext> {
template <typename U> static double test(...);
-public:
static bool const value = (sizeof(test<MappingTraits<T>>(nullptr)) == 1);
};
@@ -378,7 +461,6 @@ struct has_SequenceMethodTraits
template <typename U>
static double test(...);
-public:
static bool const value = (sizeof(test<SequenceTraits<T>>(nullptr)) == 1);
};
@@ -394,7 +476,6 @@ struct has_CustomMappingTraits
template <typename U>
static double test(...);
-public:
static bool const value =
(sizeof(test<CustomMappingTraits<T>>(nullptr)) == 1);
};
@@ -424,7 +505,6 @@ struct has_FlowTraits<T, true>
template<typename C>
static char (&f(...))[2];
-public:
static bool const value = sizeof(f<Derived>(nullptr)) == 2;
};
@@ -445,50 +525,114 @@ struct has_DocumentListTraits
template <typename U>
static double test(...);
-public:
static bool const value = (sizeof(test<DocumentListTraits<T>>(nullptr))==1);
};
-inline bool isNumber(StringRef S) {
- static const char OctalChars[] = "01234567";
- if (S.startswith("0") &&
- S.drop_front().find_first_not_of(OctalChars) == StringRef::npos)
- return true;
+template <class T> struct has_PolymorphicTraits {
+ using Signature_getKind = NodeKind (*)(const T &);
- if (S.startswith("0o") &&
- S.drop_front(2).find_first_not_of(OctalChars) == StringRef::npos)
- return true;
+ template <typename U>
+ static char test(SameType<Signature_getKind, &U::getKind> *);
- static const char HexChars[] = "0123456789abcdefABCDEF";
- if (S.startswith("0x") &&
- S.drop_front(2).find_first_not_of(HexChars) == StringRef::npos)
- return true;
+ template <typename U> static double test(...);
- static const char DecChars[] = "0123456789";
- if (S.find_first_not_of(DecChars) == StringRef::npos)
- return true;
+ static bool const value = (sizeof(test<PolymorphicTraits<T>>(nullptr)) == 1);
+};
- if (S.equals(".inf") || S.equals(".Inf") || S.equals(".INF"))
- return true;
+inline bool isNumeric(StringRef S) {
+ const static auto skipDigits = [](StringRef Input) {
+ return Input.drop_front(
+ std::min(Input.find_first_not_of("0123456789"), Input.size()));
+ };
- Regex FloatMatcher("^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
- if (FloatMatcher.match(S))
+ // Make S.front() and S.drop_front().front() (if S.front() is [+-]) calls
+ // safe.
+ if (S.empty() || S.equals("+") || S.equals("-"))
+ return false;
+
+ if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
return true;
- return false;
-}
+ // Infinity and decimal numbers can be prefixed with sign.
+ StringRef Tail = (S.front() == '-' || S.front() == '+') ? S.drop_front() : S;
-inline bool isNumeric(StringRef S) {
- if ((S.front() == '-' || S.front() == '+') && isNumber(S.drop_front()))
+ // Check for infinity first, because checking for hex and oct numbers is more
+ // expensive.
+ if (Tail.equals(".inf") || Tail.equals(".Inf") || Tail.equals(".INF"))
return true;
- if (isNumber(S))
- return true;
+ // Section 10.3.2 Tag Resolution
+ // YAML 1.2 Specification prohibits Base 8 and Base 16 numbers prefixed with
+ // [-+], so S should be used instead of Tail.
+ if (S.startswith("0o"))
+ return S.size() > 2 &&
+ S.drop_front(2).find_first_not_of("01234567") == StringRef::npos;
+
+ if (S.startswith("0x"))
+ return S.size() > 2 && S.drop_front(2).find_first_not_of(
+ "0123456789abcdefABCDEF") == StringRef::npos;
+
+ // Parse float: [-+]? (\. [0-9]+ | [0-9]+ (\. [0-9]* )?) ([eE] [-+]? [0-9]+)?
+ S = Tail;
+
+ // Handle cases when the number starts with '.' and hence needs at least one
+ // digit after dot (as opposed by number which has digits before the dot), but
+ // doesn't have one.
+ if (S.startswith(".") &&
+ (S.equals(".") ||
+ (S.size() > 1 && std::strchr("0123456789", S[1]) == nullptr)))
+ return false;
+
+ if (S.startswith("E") || S.startswith("e"))
+ return false;
+
+ enum ParseState {
+ Default,
+ FoundDot,
+ FoundExponent,
+ };
+ ParseState State = Default;
- if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
+ S = skipDigits(S);
+
+ // Accept decimal integer.
+ if (S.empty())
return true;
- return false;
+ if (S.front() == '.') {
+ State = FoundDot;
+ S = S.drop_front();
+ } else if (S.front() == 'e' || S.front() == 'E') {
+ State = FoundExponent;
+ S = S.drop_front();
+ } else {
+ return false;
+ }
+
+ if (State == FoundDot) {
+ S = skipDigits(S);
+ if (S.empty())
+ return true;
+
+ if (S.front() == 'e' || S.front() == 'E') {
+ State = FoundExponent;
+ S = S.drop_front();
+ } else {
+ return false;
+ }
+ }
+
+ assert(State == FoundExponent && "Should have found exponent at this point.");
+ if (S.empty())
+ return false;
+
+ if (S.front() == '+' || S.front() == '-') {
+ S = S.drop_front();
+ if (S.empty())
+ return false;
+ }
+
+ return skipDigits(S).empty();
}
inline bool isNull(StringRef S) {
@@ -535,7 +679,6 @@ inline QuotingType needsQuotes(StringRef S) {
// Safe scalar characters.
case '_':
case '-':
- case '/':
case '^':
case '.':
case ',':
@@ -552,6 +695,12 @@ inline QuotingType needsQuotes(StringRef S) {
// DEL (0x7F) are excluded from the allowed character range.
case 0x7F:
return QuotingType::Double;
+ // Forward slash is allowed to be unquoted, but we quote it anyway. We have
+ // many tests that use FileCheck against YAML output, and this output often
+ // contains paths. If we quote backslashes but not forward slashes then
+ // paths will come out either quoted or unquoted depending on which platform
+ // the test is run on, making FileCheck comparisons difficult.
+ case '/':
default: {
// C0 control block (0x0 - 0x1F) is excluded from the allowed character
// range.
@@ -578,10 +727,12 @@ struct missingTraits
!has_ScalarBitSetTraits<T>::value &&
!has_ScalarTraits<T>::value &&
!has_BlockScalarTraits<T>::value &&
+ !has_TaggedScalarTraits<T>::value &&
!has_MappingTraits<T, Context>::value &&
!has_SequenceTraits<T>::value &&
!has_CustomMappingTraits<T>::value &&
- !has_DocumentListTraits<T>::value> {};
+ !has_DocumentListTraits<T>::value &&
+ !has_PolymorphicTraits<T>::value> {};
template <typename T, typename Context>
struct validatedMappingTraits
@@ -635,6 +786,9 @@ public:
virtual void scalarString(StringRef &, QuotingType) = 0;
virtual void blockScalarString(StringRef &) = 0;
+ virtual void scalarTag(std::string &) = 0;
+
+ virtual NodeKind getNodeKind() = 0;
virtual void setError(const Twine &) = 0;
@@ -869,6 +1023,31 @@ yamlize(IO &YamlIO, T &Val, bool, EmptyContext &Ctx) {
}
}
+template <typename T>
+typename std::enable_if<has_TaggedScalarTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
+ if (io.outputting()) {
+ std::string ScalarStorage, TagStorage;
+ raw_string_ostream ScalarBuffer(ScalarStorage), TagBuffer(TagStorage);
+ TaggedScalarTraits<T>::output(Val, io.getContext(), ScalarBuffer,
+ TagBuffer);
+ io.scalarTag(TagBuffer.str());
+ StringRef ScalarStr = ScalarBuffer.str();
+ io.scalarString(ScalarStr,
+ TaggedScalarTraits<T>::mustQuote(Val, ScalarStr));
+ } else {
+ std::string Tag;
+ io.scalarTag(Tag);
+ StringRef Str;
+ io.scalarString(Str, QuotingType::None);
+ StringRef Result =
+ TaggedScalarTraits<T>::input(Str, Tag, io.getContext(), Val);
+ if (!Result.empty()) {
+ io.setError(Twine(Result));
+ }
+ }
+}
+
template <typename T, typename Context>
typename std::enable_if<validatedMappingTraits<T, Context>::value, void>::type
yamlize(IO &io, T &Val, bool, Context &Ctx) {
@@ -925,6 +1104,20 @@ yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
}
template <typename T>
+typename std::enable_if<has_PolymorphicTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
+ switch (io.outputting() ? PolymorphicTraits<T>::getKind(Val)
+ : io.getNodeKind()) {
+ case NodeKind::Scalar:
+ return yamlize(io, PolymorphicTraits<T>::getAsScalar(Val), true, Ctx);
+ case NodeKind::Map:
+ return yamlize(io, PolymorphicTraits<T>::getAsMap(Val), true, Ctx);
+ case NodeKind::Sequence:
+ return yamlize(io, PolymorphicTraits<T>::getAsSequence(Val), true, Ctx);
+ }
+}
+
+template <typename T>
typename std::enable_if<missingTraits<T, EmptyContext>::value, void>::type
yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
@@ -1202,6 +1395,8 @@ private:
void endBitSetScalar() override;
void scalarString(StringRef &, QuotingType) override;
void blockScalarString(StringRef &) override;
+ void scalarTag(std::string &) override;
+ NodeKind getNodeKind() override;
void setError(const Twine &message) override;
bool canElideEmptySequence() override;
@@ -1347,6 +1542,8 @@ public:
void endBitSetScalar() override;
void scalarString(StringRef &, QuotingType) override;
void blockScalarString(StringRef &) override;
+ void scalarTag(std::string &) override;
+ NodeKind getNodeKind() override;
void setError(const Twine &message) override;
bool canElideEmptySequence() override;
@@ -1366,14 +1563,21 @@ private:
void flowKey(StringRef Key);
enum InState {
- inSeq,
- inFlowSeq,
+ inSeqFirstElement,
+ inSeqOtherElement,
+ inFlowSeqFirstElement,
+ inFlowSeqOtherElement,
inMapFirstKey,
inMapOtherKey,
inFlowMapFirstKey,
inFlowMapOtherKey
};
+ static bool inSeqAnyElement(InState State);
+ static bool inFlowSeqAnyElement(InState State);
+ static bool inMapAnyKey(InState State);
+ static bool inFlowMapAnyKey(InState State);
+
raw_ostream &Out;
int WrapColumn;
SmallVector<InState, 8> StateStack;
@@ -1509,6 +1713,16 @@ operator>>(Input &In, T &Val) {
return In;
}
+// Define non-member operator>> so that Input can stream in a polymorphic type.
+template <typename T>
+inline typename std::enable_if<has_PolymorphicTraits<T>::value, Input &>::type
+operator>>(Input &In, T &Val) {
+ EmptyContext Ctx;
+ if (In.setCurrentDocument())
+ yamlize(In, Val, true, Ctx);
+ return In;
+}
+
// Provide better error message about types missing a trait specialization
template <typename T>
inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
@@ -1597,6 +1811,24 @@ operator<<(Output &Out, T &Val) {
return Out;
}
+// Define non-member operator<< so that Output can stream out a polymorphic
+// type.
+template <typename T>
+inline typename std::enable_if<has_PolymorphicTraits<T>::value, Output &>::type
+operator<<(Output &Out, T &Val) {
+ EmptyContext Ctx;
+ Out.beginDocuments();
+ if (Out.preflightDocument(0)) {
+ // FIXME: The parser does not support explicit documents terminated with a
+ // plain scalar; the end-marker is included as part of the scalar token.
+ assert(PolymorphicTraits<T>::getKind(Val) != NodeKind::Scalar && "plain scalar documents are not supported");
+ yamlize(Out, Val, true, Ctx);
+ Out.postflightDocument();
+ }
+ Out.endDocuments();
+ return Out;
+}
+
// Provide better error message about types missing a trait specialization
template <typename T>
inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
diff --git a/contrib/llvm/include/llvm/Support/raw_ostream.h b/contrib/llvm/include/llvm/Support/raw_ostream.h
index b9ea9b5817f2..d062e716209d 100644
--- a/contrib/llvm/include/llvm/Support/raw_ostream.h
+++ b/contrib/llvm/include/llvm/Support/raw_ostream.h
@@ -367,12 +367,18 @@ class raw_fd_ostream : public raw_pwrite_stream {
int FD;
bool ShouldClose;
+ bool SupportsSeeking;
+
+#ifdef _WIN32
+ /// True if this fd refers to a Windows console device. Mintty and other
+ /// terminal emulators are TTYs, but they are not consoles.
+ bool IsWindowsConsole = false;
+#endif
+
std::error_code EC;
uint64_t pos;
- bool SupportsSeeking;
-
/// See raw_ostream::write_impl.
void write_impl(const char *Ptr, size_t Size) override;
@@ -548,6 +554,8 @@ class buffer_ostream : public raw_svector_ostream {
raw_ostream &OS;
SmallVector<char, 0> Buffer;
+ virtual void anchor() override;
+
public:
buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer), OS(OS) {}
~buffer_ostream() override { OS << str(); }
diff --git a/contrib/llvm/include/llvm/Support/type_traits.h b/contrib/llvm/include/llvm/Support/type_traits.h
index 55d84f138f07..e7b8f2517b8a 100644
--- a/contrib/llvm/include/llvm/Support/type_traits.h
+++ b/contrib/llvm/include/llvm/Support/type_traits.h
@@ -30,9 +30,10 @@ namespace llvm {
template <typename T>
struct isPodLike {
// std::is_trivially_copyable is available in libc++ with clang, libstdc++
- // that comes with GCC 5.
+ // that comes with GCC 5. MSVC 2015 and newer also have
+ // std::is_trivially_copyable.
#if (__has_feature(is_trivially_copyable) && defined(_LIBCPP_VERSION)) || \
- (defined(__GNUC__) && __GNUC__ >= 5)
+ (defined(__GNUC__) && __GNUC__ >= 5) || defined(_MSC_VER)
// If the compiler supports the is_trivially_copyable trait use it, as it
// matches the definition of isPodLike closely.
static const bool value = std::is_trivially_copyable<T>::value;
diff --git a/contrib/llvm/include/llvm/TableGen/StringMatcher.h b/contrib/llvm/include/llvm/TableGen/StringMatcher.h
index 09d2092d43b0..3aa3540d616d 100644
--- a/contrib/llvm/include/llvm/TableGen/StringMatcher.h
+++ b/contrib/llvm/include/llvm/TableGen/StringMatcher.h
@@ -23,12 +23,11 @@ namespace llvm {
class raw_ostream;
-/// StringMatcher - Given a list of strings and code to execute when they match,
-/// output a simple switch tree to classify the input string.
+/// Given a list of strings and code to execute when they match, output a
+/// simple switch tree to classify the input string.
///
-/// If a match is found, the code in Vals[i].second is executed; control must
+/// If a match is found, the code in Matches[i].second is executed; control must
/// not exit this code fragment. If nothing matches, execution falls through.
-///
class StringMatcher {
public:
using StringPair = std::pair<std::string, std::string>;
diff --git a/contrib/llvm/include/llvm/Target/CodeGenCWrappers.h b/contrib/llvm/include/llvm/Target/CodeGenCWrappers.h
index e9a990569d36..3ad77c5d5e00 100644
--- a/contrib/llvm/include/llvm/Target/CodeGenCWrappers.h
+++ b/contrib/llvm/include/llvm/Target/CodeGenCWrappers.h
@@ -31,6 +31,8 @@ inline Optional<CodeModel::Model> unwrap(LLVMCodeModel Model, bool &JIT) {
LLVM_FALLTHROUGH;
case LLVMCodeModelDefault:
return None;
+ case LLVMCodeModelTiny:
+ return CodeModel::Tiny;
case LLVMCodeModelSmall:
return CodeModel::Small;
case LLVMCodeModelKernel:
@@ -45,6 +47,8 @@ inline Optional<CodeModel::Model> unwrap(LLVMCodeModel Model, bool &JIT) {
inline LLVMCodeModel wrap(CodeModel::Model Model) {
switch (Model) {
+ case CodeModel::Tiny:
+ return LLVMCodeModelTiny;
case CodeModel::Small:
return LLVMCodeModelSmall;
case CodeModel::Kernel:
diff --git a/contrib/llvm/include/llvm/Target/GenericOpcodes.td b/contrib/llvm/include/llvm/Target/GenericOpcodes.td
index 79cc1e4d9eee..045fe2520047 100644
--- a/contrib/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/contrib/llvm/include/llvm/Target/GenericOpcodes.td
@@ -120,6 +120,36 @@ def G_VAARG : GenericInstruction {
let mayStore = 1;
}
+def G_CTLZ : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+def G_CTLZ_ZERO_UNDEF : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+def G_CTTZ : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+def G_CTTZ_ZERO_UNDEF : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+def G_CTPOP : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
def G_BSWAP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
@@ -281,6 +311,14 @@ def G_PTR_MASK : GenericInstruction {
// Overflow ops
//------------------------------------------------------------------------------
+// Generic unsigned addition producing a carry flag.
+def G_UADDO : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$carry_out);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
// Generic unsigned addition consuming and producing a carry flag.
def G_UADDE : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
@@ -296,6 +334,19 @@ def G_SADDO : GenericInstruction {
let isCommutable = 1;
}
+// Generic signed addition consuming and producing a carry flag.
+def G_SADDE : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$carry_out);
+ let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
+ let hasSideEffects = 0;
+}
+
+// Generic unsigned subtraction producing a carry flag.
+def G_USUBO : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$carry_out);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
// Generic unsigned subtraction consuming and producing a carry flag.
def G_USUBE : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
@@ -303,13 +354,20 @@ def G_USUBE : GenericInstruction {
let hasSideEffects = 0;
}
-// Generic unsigned subtraction producing a carry flag.
+// Generic signed subtraction producing a carry flag.
def G_SSUBO : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
let InOperandList = (ins type0:$src1, type0:$src2);
let hasSideEffects = 0;
}
+// Generic signed subtraction consuming and producing a carry flag.
+def G_SSUBE : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$carry_out);
+ let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
+ let hasSideEffects = 0;
+}
+
// Generic unsigned multiplication producing a carry flag.
def G_UMULO : GenericInstruction {
let OutOperandList = (outs type0:$dst, type1:$carry_out);
@@ -482,6 +540,35 @@ def G_FLOG2 : GenericInstruction {
let hasSideEffects = 0;
}
+// Floating point base-10 logarithm of a value.
+def G_FLOG10 : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point ceiling of a value.
+def G_FCEIL : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+//------------------------------------------------------------------------------
+// Opcodes for LLVM Intrinsics
+//------------------------------------------------------------------------------
+def G_INTRINSIC_TRUNC : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+def G_INTRINSIC_ROUND : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
//------------------------------------------------------------------------------
// Memory ops
//------------------------------------------------------------------------------
@@ -576,6 +663,9 @@ def G_EXTRACT : GenericInstruction {
// Extract multiple registers specified size, starting from blocks given by
// indexes. This will almost certainly be mapped to sub-register COPYs after
// register banks have been selected.
+// The output operands are always ordered from lowest bits to highest:
+// %bits_0_7:(s8), %bits_8_15:(s8),
+// %bits_16_23:(s8), %bits_24_31:(s8) = G_UNMERGE_VALUES %0:(s32)
def G_UNMERGE_VALUES : GenericInstruction {
let OutOperandList = (outs type0:$dst0, variable_ops);
let InOperandList = (ins type1:$src);
@@ -589,13 +679,38 @@ def G_INSERT : GenericInstruction {
let hasSideEffects = 0;
}
-/// Concatenate multiple registers of the same size into a wider register.
+// Concatenate multiple registers of the same size into a wider register.
+// The input operands are always ordered from lowest bits to highest:
+// %0:(s32) = G_MERGE_VALUES %bits_0_7:(s8), %bits_8_15:(s8),
+// %bits_16_23:(s8), %bits_24_31:(s8)
def G_MERGE_VALUES : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src0, variable_ops);
let hasSideEffects = 0;
}
+/// Create a vector from multiple scalar registers.
+def G_BUILD_VECTOR : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src0, variable_ops);
+ let hasSideEffects = 0;
+}
+
+/// Like G_BUILD_VECTOR, but truncates the larger operand types to fit the
+/// destination vector elt type.
+def G_BUILD_VECTOR_TRUNC : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src0, variable_ops);
+ let hasSideEffects = 0;
+}
+
+/// Create a vector by concatenating vectors together.
+def G_CONCAT_VECTORS : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src0, variable_ops);
+ let hasSideEffects = 0;
+}
+
// Intrinsic without side effects.
def G_INTRINSIC : GenericInstruction {
let OutOperandList = (outs);
diff --git a/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index d487759a4852..31d26361260d 100644
--- a/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -83,6 +83,13 @@ def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
def : GINodeEquiv<G_BR, br>;
def : GINodeEquiv<G_BSWAP, bswap>;
+def : GINodeEquiv<G_CTLZ, ctlz>;
+def : GINodeEquiv<G_CTTZ, cttz>;
+def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>;
+def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
+def : GINodeEquiv<G_CTPOP, ctpop>;
+def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_FCEIL, fceil>;
// Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
// complications that tablegen must take care of. For example, Predicates such
diff --git a/contrib/llvm/include/llvm/Target/Target.td b/contrib/llvm/include/llvm/Target/Target.td
index b746505d2a45..e4b827babb92 100644
--- a/contrib/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm/include/llvm/Target/Target.td
@@ -439,6 +439,7 @@ class Instruction {
// instruction.
bit isReturn = 0; // Is this instruction a return instruction?
bit isBranch = 0; // Is this instruction a branch instruction?
+ bit isEHScopeReturn = 0; // Does this instruction end an EH scope?
bit isIndirectBranch = 0; // Is this instruction an indirect branch?
bit isCompare = 0; // Is this instruction a comparison instruction?
bit isMoveImm = 0; // Is this instruction a move immediate instruction?
@@ -478,6 +479,7 @@ class Instruction {
bit isInsertSubreg = 0; // Is this instruction a kind of insert subreg?
// If so, make sure to override
// TargetInstrInfo::getInsertSubregLikeInputs.
+ bit variadicOpsAreDefs = 0; // Are variadic operands definitions?
// Does the instruction have side effects that are not captured by any
// operands of the instruction or other flags?
@@ -1103,7 +1105,7 @@ def FAULTING_OP : StandardPseudoInstruction {
let isBranch = 1;
}
def PATCHABLE_OP : StandardPseudoInstruction {
- let OutOperandList = (outs unknown:$dst);
+ let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let usesCustomInserter = 1;
let mayLoad = 1;
@@ -1163,8 +1165,8 @@ def PATCHABLE_TYPED_EVENT_CALL : StandardPseudoInstruction {
let hasSideEffects = 1;
}
def FENTRY_CALL : StandardPseudoInstruction {
- let OutOperandList = (outs unknown:$dst);
- let InOperandList = (ins variable_ops);
+ let OutOperandList = (outs);
+ let InOperandList = (ins);
let AsmString = "# FEntry call";
let usesCustomInserter = 1;
let mayLoad = 1;
@@ -1554,3 +1556,8 @@ include "llvm/Target/GlobalISel/Target.td"
// Pull in the common support for the Global ISel DAG-based selector generation.
//
include "llvm/Target/GlobalISel/SelectionDAGCompat.td"
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for Pfm Counters generation.
+//
+include "llvm/Target/TargetPfmCounters.td"
diff --git a/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td b/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td
index 8d57cae02d22..4b2c57b34c2e 100644
--- a/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td
+++ b/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td
@@ -7,29 +7,39 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines MCInstPredicate classes and its subclasses.
+// This file defines class MCInstPredicate and its subclasses.
//
-// MCInstPredicate is used to describe constraints on the opcode/operand(s) of
-// an instruction. Each MCInstPredicate class has a well-known semantic, and it
-// is used by a PredicateExpander to generate code for MachineInstr and/or
-// MCInst.
-//
-// MCInstPredicate definitions can be used to construct MCSchedPredicate
-// definitions. An MCSchedPredicate can be used in place of a SchedPredicate
-// when defining SchedReadVariant and SchedWriteVariant used by a processor
-// scheduling model.
+// MCInstPredicate definitions are used by target scheduling models to describe
+// constraints on instructions.
//
-// Here is an example of MCInstPredicate definition:
+// Here is an example of an MCInstPredicate definition in tablegen:
//
// def MCInstPredicateExample : CheckAll<[
// CheckOpcode<[BLR]>,
// CheckIsRegOperand<0>,
// CheckNot<CheckRegOperand<0, LR>>]>;
//
-// Predicate `MCInstPredicateExample` checks that the machine instruction in
-// input is a BLR, and that operand at index 0 is register `LR`.
+// The syntax for MCInstPredicate is declarative, and predicate definitions can
+// be composed together in order to generate more complex constraints.
+//
+// The `CheckAll` from the example defines a composition of three different
+// predicates. Definition `MCInstPredicateExample` identifies instructions
+// whose opcode is BLR, and whose first operand is a register different from
+// register `LR`.
+//
+// Every MCInstPredicate class has a well-known semantic in tablegen. For
+// example, `CheckOpcode` is a special type of predicate used to describe a
+// constraint on the value of an instruction opcode.
//
-// That predicate could be used to rewrite the following definition (from
+// MCInstPredicate definitions are typically used by scheduling models to
+// construct MCSchedPredicate definitions (see the definition of class
+// MCSchedPredicate in llvm/Target/TargetSchedule.td).
+// In particular, an MCSchedPredicate can be used instead of a SchedPredicate
+// when defining the set of SchedReadVariant and SchedWriteVariant of a
+// processor scheduling model.
+//
+// The `MCInstPredicateExample` definition above is equivalent (and therefore
+// could replace) the following definition from a previous ExynosM3 model (see
// AArch64SchedExynosM3.td):
//
// def M3BranchLinkFastPred : SchedPredicate<[{
@@ -37,22 +47,13 @@
// MI->getOperand(0).isReg() &&
// MI->getOperand(0).getReg() != AArch64::LR}]>;
//
-// MCInstPredicate definitions are used to construct MCSchedPredicate (see the
-// definition of class MCSchedPredicate in llvm/Target/TargetSchedule.td). An
-// MCSchedPredicate can be used by a `SchedVar` to associate a predicate with a
-// list of SchedReadWrites. Note that `SchedVar` are used to create SchedVariant
-// definitions.
-//
-// Each MCInstPredicate class has a well known semantic. For example,
-// `CheckOpcode` is only used to check the instruction opcode value.
-//
-// MCInstPredicate classes allow the definition of predicates in a declarative
-// way. These predicates don't require a custom block of C++, and can be used
-// to define conditions on instructions without being bound to a particular
+// The main advantage of using MCInstPredicate instead of SchedPredicate is
+// portability: users don't need to specify predicates in C++. As a consequence
+// of this, MCInstPredicate definitions are not bound to a particular
// representation (i.e. MachineInstr vs MCInst).
//
-// It also means that tablegen backends must know how to parse and expand them
-// into code that works on MCInst (or MachineInst).
+// Tablegen backends know how to expand MCInstPredicate definitions into actual
+// C++ code that works on MachineInstr (and/or MCInst).
//
// Instances of class PredicateExpander (see utils/Tablegen/PredicateExpander.h)
// know how to expand a predicate. For each MCInstPredicate class, there must be
@@ -68,6 +69,7 @@
// Forward declarations.
class Instruction;
+class SchedMachineModel;
// A generic machine instruction predicate.
class MCInstPredicate;
@@ -104,28 +106,50 @@ class CheckSameRegOperand<int First, int Second> : MCInstPredicate {
int SecondIndex = Second;
}
+// Base class for checks on register/immediate operands.
+// It allows users to define checks like:
+// MyFunction(MI->getOperand(Index).getImm()) == Val;
+//
+// In the example above, `MyFunction` is a function that takes as input an
+// immediate operand value, and returns another value. Field `FunctionMapper` is
+// the name of the function to call on the operand value.
+class CheckOperandBase<int Index, string Fn = ""> : MCOperandPredicate<Index> {
+ string FunctionMapper = Fn;
+}
+
// Check that the machine register operand at position `Index` references
// register R. This predicate assumes that we already checked that the machine
// operand at position `Index` is a register operand.
-class CheckRegOperand<int Index, Register R> : MCOperandPredicate<Index> {
+class CheckRegOperand<int Index, Register R> : CheckOperandBase<Index> {
Register Reg = R;
}
// Check if register operand at index `Index` is the invalid register.
-class CheckInvalidRegOperand<int Index> : MCOperandPredicate<Index>;
+class CheckInvalidRegOperand<int Index> : CheckOperandBase<Index>;
// Check that the operand at position `Index` is immediate `Imm`.
-class CheckImmOperand<int Index, int Imm> : MCOperandPredicate<Index> {
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperand<int Index, int Imm> : CheckOperandBase<Index> {
int ImmVal = Imm;
}
// Similar to CheckImmOperand, however the immediate is not a literal number.
// This is useful when we want to compare the value of an operand against an
// enum value, and we know the actual integer value of that enum.
-class CheckImmOperand_s<int Index, string Value> : MCOperandPredicate<Index> {
+class CheckImmOperand_s<int Index, string Value> : CheckOperandBase<Index> {
string ImmVal = Value;
}
+// Expands to a call to `FunctionMapper` if field `FunctionMapper` is set.
+// Otherwise, it expands to a CheckNot<CheckInvalidRegOperand<Index>>.
+class CheckRegOperandSimple<int Index> : CheckOperandBase<Index>;
+
+// Expands to a call to `FunctionMapper` if field `FunctionMapper` is set.
+// Otherwise, it simply evaluates to TruePred.
+class CheckImmOperandSimple<int Index> : CheckOperandBase<Index>;
+
// Check that the operand at position `Index` is immediate value zero.
class CheckZeroOperand<int Index> : CheckImmOperand<Index, 0>;
@@ -169,18 +193,53 @@ class CheckAll<list<MCInstPredicate> Sequence>
class CheckAny<list<MCInstPredicate> Sequence>
: CheckPredicateSequence<Sequence>;
-// Check that a call to method `Name` in class "XXXGenInstrInfo" (where XXX is
-// the `Target` name) returns true.
+
+// Used to expand the body of a function predicate. See the definition of
+// TIIPredicate below.
+class MCStatement;
+
+// Expands to a return statement. The return expression is a boolean expression
+// described by a MCInstPredicate.
+class MCReturnStatement<MCInstPredicate predicate> : MCStatement {
+ MCInstPredicate Pred = predicate;
+}
+
+// Used to automatically construct cases of a switch statement where the switch
+// variable is an instruction opcode. There is a 'case' for every opcode in the
+// `opcodes` list, and each case is associated with MCStatement `caseStmt`.
+class MCOpcodeSwitchCase<list<Instruction> opcodes, MCStatement caseStmt> {
+ list<Instruction> Opcodes = opcodes;
+ MCStatement CaseStmt = caseStmt;
+}
+
+// Expands to a switch statement. The switch variable is an instruction opcode.
+// The auto-generated switch is populated by a number of cases based on the
+// `cases` list in input. A default case is automatically generated, and it
+// evaluates to `default`.
+class MCOpcodeSwitchStatement<list<MCOpcodeSwitchCase> cases,
+ MCStatement default> : MCStatement {
+ list<MCOpcodeSwitchCase> Cases = cases;
+ MCStatement DefaultCase = default;
+}
+
+// Base class for function predicates.
+class FunctionPredicateBase<string name, MCStatement body> {
+ string FunctionName = name;
+ MCStatement Body = body;
+}
+
+// Check that a call to method `Name` in class "XXXInstrInfo" (where XXX is
+// the name of a target) returns true.
//
// TIIPredicate definitions are used to model calls to the target-specific
// InstrInfo. A TIIPredicate is treated specially by the InstrInfoEmitter
// tablegen backend, which will use it to automatically generate a definition in
-// the target specific `GenInstrInfo` class.
-class TIIPredicate<string Target, string Name, MCInstPredicate P> : MCInstPredicate {
- string TargetName = Target;
- string FunctionName = Name;
- MCInstPredicate Pred = P;
-}
+// the target specific `InstrInfo` class.
+//
+// There cannot be multiple TIIPredicate definitions with the same name for the
+// same target.
+class TIIPredicate<string Name, MCStatement body>
+ : FunctionPredicateBase<Name, body>, MCInstPredicate;
// A function predicate that takes as input a machine instruction, and returns
// a boolean value.
@@ -195,3 +254,106 @@ class CheckFunctionPredicate<string MCInstFn, string MachineInstrFn> : MCInstPre
string MCInstFnName = MCInstFn;
string MachineInstrFnName = MachineInstrFn;
}
+
+// Used to classify machine instructions based on a machine instruction
+// predicate.
+//
+// Let IC be an InstructionEquivalenceClass definition, and MI a machine
+// instruction. We say that MI belongs to the equivalence class described by IC
+// if and only if the following two conditions are met:
+// a) MI's opcode is in the `opcodes` set, and
+// b) `Predicate` evaluates to true when applied to MI.
+//
+// Instances of this class can be used by processor scheduling models to
+// describe instructions that have a property in common. For example,
+// InstructionEquivalenceClass definitions can be used to identify the set of
+// dependency breaking instructions for a processor model.
+//
+// An (optional) list of operand indices can be used to further describe
+// properties that apply to instruction operands. For example, it can be used to
+// identify register uses of a dependency breaking instructions that are not in
+// a RAW dependency.
+class InstructionEquivalenceClass<list<Instruction> opcodes,
+ MCInstPredicate pred,
+ list<int> operands = []> {
+ list<Instruction> Opcodes = opcodes;
+ MCInstPredicate Predicate = pred;
+ list<int> OperandIndices = operands;
+}
+
+// Used by processor models to describe dependency breaking instructions.
+//
+// This is mainly an alias for InstructionEquivalenceClass. Input operand
+// `BrokenDeps` identifies the set of "broken dependencies". There is one bit
+// per each implicit and explicit input operand. An empty set of broken
+// dependencies means: "explicit input register operands are independent."
+class DepBreakingClass<list<Instruction> opcodes, MCInstPredicate pred,
+ list<int> BrokenDeps = []>
+ : InstructionEquivalenceClass<opcodes, pred, BrokenDeps>;
+
+// A function descriptor used to describe the signature of a predicate methods
+// which will be expanded by the STIPredicateExpander into a tablegen'd
+// XXXGenSubtargetInfo class member definition (here, XXX is a target name).
+//
+// It describes the signature of a TargetSubtarget hook, as well as a few extra
+// properties. Examples of extra properties are:
+// - The default return value for the auto-generate function hook.
+// - A list of subtarget hooks (Delegates) that are called from this function.
+//
+class STIPredicateDecl<string name, MCInstPredicate default = FalsePred,
+ bit overrides = 1, bit expandForMC = 1,
+ bit updatesOpcodeMask = 0,
+ list<STIPredicateDecl> delegates = []> {
+ string Name = name;
+
+ MCInstPredicate DefaultReturnValue = default;
+
+ // True if this method is declared as virtual in class TargetSubtargetInfo.
+ bit OverridesBaseClassMember = overrides;
+
+ // True if we need an equivalent predicate function in the MC layer.
+ bit ExpandForMC = expandForMC;
+
+ // True if the autogenerated method has a extra in/out APInt param used as a
+ // mask of operands.
+ bit UpdatesOpcodeMask = updatesOpcodeMask;
+
+ // A list of STIPredicates used by this definition to delegate part of the
+ // computation. For example, STIPredicateFunction `isDependencyBreaking()`
+ // delegates to `isZeroIdiom()` part of its computation.
+ list<STIPredicateDecl> Delegates = delegates;
+}
+
+// A predicate function definition member of class `XXXGenSubtargetInfo`.
+//
+// If `Declaration.ExpandForMC` is true, then SubtargetEmitter
+// will also expand another definition of this method that accepts a MCInst.
+class STIPredicate<STIPredicateDecl declaration,
+ list<InstructionEquivalenceClass> classes> {
+ STIPredicateDecl Declaration = declaration;
+ list<InstructionEquivalenceClass> Classes = classes;
+ SchedMachineModel SchedModel = ?;
+}
+
+// Convenience classes and definitions used by processor scheduling models to
+// describe dependency breaking instructions and move elimination candidates.
+let UpdatesOpcodeMask = 1 in {
+
+def IsZeroIdiomDecl : STIPredicateDecl<"isZeroIdiom">;
+
+let Delegates = [IsZeroIdiomDecl] in
+def IsDepBreakingDecl : STIPredicateDecl<"isDependencyBreaking">;
+
+} // UpdatesOpcodeMask
+
+def IsOptimizableRegisterMoveDecl
+ : STIPredicateDecl<"isOptimizableRegisterMove">;
+
+class IsZeroIdiomFunction<list<DepBreakingClass> classes>
+ : STIPredicate<IsZeroIdiomDecl, classes>;
+
+class IsDepBreakingFunction<list<DepBreakingClass> classes>
+ : STIPredicate<IsDepBreakingDecl, classes>;
+
+class IsOptimizableRegisterMove<list<InstructionEquivalenceClass> classes>
+ : STIPredicate<IsOptimizableRegisterMoveDecl, classes>;
diff --git a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index dbdfd4139a0f..e80f2bf82f26 100644
--- a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -45,6 +45,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
protected:
bool SupportIndirectSymViaGOTPCRel = false;
bool SupportGOTPCRelWithOffset = true;
+ bool SupportDebugThreadLocalLocation = true;
+
+ /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values
+ /// for EH.
+ unsigned PersonalityEncoding = 0;
+ unsigned LSDAEncoding = 0;
+ unsigned TTypeEncoding = 0;
/// This section contains the static constructor pointer list.
MCSection *StaticCtorSection = nullptr;
@@ -135,6 +142,10 @@ public:
const TargetMachine &TM,
MachineModuleInfo *MMI) const;
+ unsigned getPersonalityEncoding() const { return PersonalityEncoding; }
+ unsigned getLSDAEncoding() const { return LSDAEncoding; }
+ unsigned getTTypeEncoding() const { return TTypeEncoding; }
+
const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
MCStreamer &Streamer) const;
@@ -170,6 +181,11 @@ public:
return SupportGOTPCRelWithOffset;
}
+ /// Target supports TLS offset relocation in debug section?
+ bool supportDebugThreadLocalLocation() const {
+ return SupportDebugThreadLocalLocation;
+ }
+
/// Get the target specific PC relative GOT entry relocation
virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
const MCValue &MV,
@@ -185,6 +201,12 @@ public:
virtual void emitLinkerFlagsForUsed(raw_ostream &OS,
const GlobalValue *GV) const {}
+ /// If supported, return the section to use for the llvm.commandline
+ /// metadata. Otherwise, return nullptr.
+ virtual MCSection *getSectionForCommandLines() const {
+ return nullptr;
+ }
+
protected:
virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO,
SectionKind Kind,
diff --git a/contrib/llvm/include/llvm/Target/TargetMachine.h b/contrib/llvm/include/llvm/Target/TargetMachine.h
index 1ca68c8df63a..3eafcc25583a 100644
--- a/contrib/llvm/include/llvm/Target/TargetMachine.h
+++ b/contrib/llvm/include/llvm/Target/TargetMachine.h
@@ -84,11 +84,10 @@ protected: // Can only create subclasses.
CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
/// Contains target specific asm information.
- const MCAsmInfo *AsmInfo;
-
- const MCRegisterInfo *MRI;
- const MCInstrInfo *MII;
- const MCSubtargetInfo *STI;
+ std::unique_ptr<const MCAsmInfo> AsmInfo;
+ std::unique_ptr<const MCRegisterInfo> MRI;
+ std::unique_ptr<const MCInstrInfo> MII;
+ std::unique_ptr<const MCSubtargetInfo> STI;
unsigned RequireStructuredCFG : 1;
unsigned O0WantsFastISel : 1;
@@ -160,11 +159,11 @@ public:
void resetTargetOptions(const Function &F) const;
/// Return target specific asm information.
- const MCAsmInfo *getMCAsmInfo() const { return AsmInfo; }
+ const MCAsmInfo *getMCAsmInfo() const { return AsmInfo.get(); }
- const MCRegisterInfo *getMCRegisterInfo() const { return MRI; }
- const MCInstrInfo *getMCInstrInfo() const { return MII; }
- const MCSubtargetInfo *getMCSubtargetInfo() const { return STI; }
+ const MCRegisterInfo *getMCRegisterInfo() const { return MRI.get(); }
+ const MCInstrInfo *getMCInstrInfo() const { return MII.get(); }
+ const MCSubtargetInfo *getMCSubtargetInfo() const { return STI.get(); }
/// If intrinsic information is available, return it. If not, return null.
virtual const TargetIntrinsicInfo *getIntrinsicInfo() const {
@@ -202,6 +201,9 @@ public:
bool getO0WantsFastISel() { return O0WantsFastISel; }
void setO0WantsFastISel(bool Enable) { O0WantsFastISel = Enable; }
void setGlobalISel(bool Enable) { Options.EnableGlobalISel = Enable; }
+ void setGlobalISelAbort(GlobalISelAbortMode Mode) {
+ Options.GlobalISelAbort = Mode;
+ }
void setMachineOutliner(bool Enable) {
Options.EnableMachineOutliner = Enable;
}
@@ -285,18 +287,6 @@ public:
void getNameWithPrefix(SmallVectorImpl<char> &Name, const GlobalValue *GV,
Mangler &Mang, bool MayAlwaysUsePrivate = false) const;
MCSymbol *getSymbol(const GlobalValue *GV) const;
-
- /// True if the target uses physical regs at Prolog/Epilog insertion
- /// time. If true (most machines), all vregs must be allocated before
- /// PEI. If false (virtual-register machines), then callee-save register
- /// spilling and scavenging are not needed or used.
- virtual bool usesPhysRegsForPEI() const { return true; }
-
- /// True if the target wants to use interprocedural register allocation by
- /// default. The -enable-ipra flag can be used to override this.
- virtual bool useIPRA() const {
- return false;
- }
};
/// This class describes a target machine that is implemented with the LLVM
@@ -350,8 +340,37 @@ public:
bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out,
raw_pwrite_stream *DwoOut, CodeGenFileType FileTYpe,
MCContext &Context);
+
+ /// True if the target uses physical regs at Prolog/Epilog insertion
+ /// time. If true (most machines), all vregs must be allocated before
+ /// PEI. If false (virtual-register machines), then callee-save register
+ /// spilling and scavenging are not needed or used.
+ virtual bool usesPhysRegsForPEI() const { return true; }
+
+ /// True if the target wants to use interprocedural register allocation by
+ /// default. The -enable-ipra flag can be used to override this.
+ virtual bool useIPRA() const {
+ return false;
+ }
};
+/// Helper method for getting the code model, returning Default if
+/// CM does not have a value. The tiny and kernel models will produce
+/// an error, so targets that support them or require more complex codemodel
+/// selection logic should implement and call their own getEffectiveCodeModel.
+inline CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
+ CodeModel::Model Default) {
+ if (CM) {
+ // By default, targets do not support the tiny and kernel models.
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
+ if (*CM == CodeModel::Kernel)
+ report_fatal_error("Target does not support the kernel CodeModel");
+ return *CM;
+ }
+ return Default;
+}
+
} // end namespace llvm
#endif // LLVM_TARGET_TARGETMACHINE_H
diff --git a/contrib/llvm/include/llvm/Target/TargetOptions.h b/contrib/llvm/include/llvm/Target/TargetOptions.h
index 07ed773de55e..b18101d92833 100644
--- a/contrib/llvm/include/llvm/Target/TargetOptions.h
+++ b/contrib/llvm/include/llvm/Target/TargetOptions.h
@@ -96,6 +96,14 @@ namespace llvm {
SCE // Tune debug info for SCE targets (e.g. PS4).
};
+ /// Enable abort calls when global instruction selection fails to lower/select
+ /// an instruction.
+ enum class GlobalISelAbortMode {
+ Disable, // Disable the abort.
+ Enable, // Enable the abort.
+ DisableWithDiag // Disable the abort but emit a diagnostic on failure.
+ };
+
class TargetOptions {
public:
TargetOptions()
@@ -192,6 +200,10 @@ namespace llvm {
/// EnableGlobalISel - This flag enables global instruction selection.
unsigned EnableGlobalISel : 1;
+ /// EnableGlobalISelAbort - Control abort behaviour when global instruction
+ /// selection fails to lower/select an instruction.
+ GlobalISelAbortMode GlobalISelAbort = GlobalISelAbortMode::Enable;
+
/// UseInitArray - Use .init_array instead of .ctors for static
/// constructors.
unsigned UseInitArray : 1;
diff --git a/contrib/llvm/include/llvm/Target/TargetPfmCounters.td b/contrib/llvm/include/llvm/Target/TargetPfmCounters.td
new file mode 100644
index 000000000000..dac150f03445
--- /dev/null
+++ b/contrib/llvm/include/llvm/Target/TargetPfmCounters.td
@@ -0,0 +1,50 @@
+//===- TargetPfmCounters.td - Target Pfm Counters -*- tablegen ----------*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces for performance counters.
+
+// Definition of a hardware counters from libpfm identifiers.
+class PfmCounter<string counter> {
+ // The name of the counter that measures events.
+ // The name can be "some_counter + some_other_counter", in which case the
+ // measured value is the sum of events on these counters.
+ string Counter = counter;
+}
+
+// Issue counters can be tied to a ProcResource
+class PfmIssueCounter<string resource_name, string counter>
+ : PfmCounter<counter> {
+ // The name of the ProcResource on which uops are issued. This is used by
+ // llvm-exegesis to compare measurements with values in the SchedModels.
+ // If the CPU has a sched model, this should correspond to the name of a
+ // ProcResource.
+ string ResourceName = resource_name;
+}
+
+def NoPfmCounter : PfmCounter <""> {}
+
+// Set of PfmCounters for measuring sched model characteristics.
+class ProcPfmCounters {
+ // Processors can define how to measure cycles by defining a CycleCounter.
+ PfmCounter CycleCounter = NoPfmCounter;
+ // Processors can define how to measure uops by defining a UopsCounter.
+ PfmCounter UopsCounter = NoPfmCounter;
+ // Processors can define how to measure issued uops by defining IssueCounters.
+ list<PfmIssueCounter> IssueCounters = [];
+}
+
+// A binding of a set of counters to a CPU.
+class PfmCountersBinding<string cpu_name, ProcPfmCounters counters> {
+ string CpuName = cpu_name;
+ ProcPfmCounters Counters = counters;
+}
+
+// Declares the default binding for unbound CPUs for the target.
+class PfmCountersDefaultBinding<ProcPfmCounters counters>
+ : PfmCountersBinding<"", counters> {}
diff --git a/contrib/llvm/include/llvm/Target/TargetSchedule.td b/contrib/llvm/include/llvm/Target/TargetSchedule.td
index 6fd2d5b78e54..808e183f5a5f 100644
--- a/contrib/llvm/include/llvm/Target/TargetSchedule.td
+++ b/contrib/llvm/include/llvm/Target/TargetSchedule.td
@@ -182,8 +182,7 @@ class ProcResourceKind;
//
// SchedModel ties these units to a processor for any stand-alone defs
// of this class.
-class ProcResourceUnits<ProcResourceKind kind, int num,
- list<string> pfmCounters> {
+class ProcResourceUnits<ProcResourceKind kind, int num> {
ProcResourceKind Kind = kind;
int NumUnits = num;
ProcResourceKind Super = ?;
@@ -198,8 +197,8 @@ def EponymousProcResourceKind : ProcResourceKind;
// Subtargets typically define processor resource kind and number of
// units in one place.
-class ProcResource<int num, list<string> pfmCounters = []> : ProcResourceKind,
- ProcResourceUnits<EponymousProcResourceKind, num, pfmCounters>;
+class ProcResource<int num> : ProcResourceKind,
+ ProcResourceUnits<EponymousProcResourceKind, num>;
class ProcResGroup<list<ProcResource> resources> : ProcResourceKind {
list<ProcResource> Resources = resources;
@@ -374,7 +373,11 @@ class SchedPredicate<code pred> : SchedPredicateBase {
SchedMachineModel SchedModel = ?;
code Predicate = pred;
}
-def NoSchedPred : SchedPredicate<[{true}]>;
+
+// Define a predicate to be typically used as the default case in a
+// SchedVariant. It the SchedVariant does not use any other predicate based on
+// MCSchedPredicate, this is the default scheduling case used by llvm-mca.
+def NoSchedPred : MCSchedPredicate<TruePred>;
// Associate a predicate with a list of SchedReadWrites. By default,
// the selected SchedReadWrites are still associated with a single
@@ -461,6 +464,10 @@ class SchedAlias<SchedReadWrite match, SchedReadWrite alias> {
// - The number of physical registers which can be used for register renaming
// purpose.
// - The cost of a register rename.
+// - The set of registers that allow move elimination.
+// - The maximum number of moves that can be eliminated every cycle.
+// - Whether move elimination is limited to register moves whose input
+// is known to be zero.
//
// The cost of a rename is the number of physical registers allocated by the
// register alias table to map the new definition. By default, register can be
@@ -507,11 +514,35 @@ class SchedAlias<SchedReadWrite match, SchedReadWrite alias> {
// partial write is combined with the previous super-register definition. We
// should add support for these cases, and correctly model merge problems with
// partial register accesses.
+//
+// Field MaxMovesEliminatedPerCycle specifies how many moves can be eliminated
+// every cycle. A default value of zero for that field means: there is no limit
+// to the number of moves that can be eliminated by this register file.
+//
+// An instruction MI is a candidate for move elimination if a call to
+// method TargetSubtargetInfo::isOptimizableRegisterMove(MI) returns true (see
+// llvm/CodeGen/TargetSubtargetInfo.h, and llvm/MC/MCInstrAnalysis.h).
+//
+// Subtargets can instantiate tablegen class IsOptimizableRegisterMove (see
+// llvm/Target/TargetInstrPredicate.td) to customize the set of move elimination
+// candidates. By default, no instruction is a valid move elimination candidate.
+//
+// A register move MI is eliminated only if:
+// - MI is a move elimination candidate.
+// - The destination register is from a register class that allows move
+// elimination (see field `AllowMoveElimination` below).
+// - Constraints on the move kind, and the maximum number of moves that can be
+// eliminated per cycle are all met.
+
class RegisterFile<int numPhysRegs, list<RegisterClass> Classes = [],
- list<int> Costs = []> {
+ list<int> Costs = [], list<bit> AllowMoveElim = [],
+ int MaxMoveElimPerCy = 0, bit AllowZeroMoveElimOnly = 0> {
list<RegisterClass> RegClasses = Classes;
list<int> RegCosts = Costs;
+ list<bit> AllowMoveElimination = AllowMoveElim;
int NumPhysRegs = numPhysRegs;
+ int MaxMovesEliminatedPerCycle = MaxMoveElimPerCy;
+ bit AllowZeroMoveEliminationOnly = AllowZeroMoveElimOnly;
SchedMachineModel SchedModel = ?;
}
@@ -531,23 +562,12 @@ class RetireControlUnit<int bufferSize, int retirePerCycle> {
SchedMachineModel SchedModel = ?;
}
-// Allow the definition of hardware counters.
-class PfmCounter {
+// Base class for Load/StoreQueue. It is used to identify processor resources
+// which describe load/store queues in the LS unit.
+class MemoryQueue<ProcResource PR> {
+ ProcResource QueueDescriptor = PR;
SchedMachineModel SchedModel = ?;
}
-// Each processor can define how to measure cycles by defining a
-// PfmCycleCounter.
-class PfmCycleCounter<string counter> : PfmCounter {
- string Counter = counter;
-}
-
-// Each ProcResourceUnits can define how to measure issued uops by defining
-// a PfmIssueCounter.
-class PfmIssueCounter<ProcResourceUnits resource, list<string> counters>
- : PfmCounter{
- // The resource units on which uops are issued.
- ProcResourceUnits Resource = resource;
- // The list of counters that measure issue events.
- list<string> Counters = counters;
-}
+class LoadQueue<ProcResource LDQueue> : MemoryQueue<LDQueue>;
+class StoreQueue<ProcResource STQueue> : MemoryQueue<STQueue>;
diff --git a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
index 4ba4d821225d..eb5a14bd21b8 100644
--- a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -116,12 +116,18 @@ def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc.
def SDTIntShiftOp : SDTypeProfile<1, 2, [ // shl, sra, srl
SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>
]>;
+def SDTIntShiftDOp: SDTypeProfile<1, 3, [ // fshl, fshr
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift
SDTCisSameAs<0, 1>, SDTCisInt<2>
]>;
def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0>
]>;
+def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
def SDTFPBinOp : SDTypeProfile<1, 2, [ // fadd, fmul, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
@@ -162,7 +168,7 @@ def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg
]>;
def SDTExtInvec : SDTypeProfile<1, 1, [ // sext_invec
SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>, SDTCisVec<1>,
- SDTCisOpSmallerThanOp<1, 0>, SDTCisSameSizeAs<0,1>
+ SDTCisOpSmallerThanOp<1, 0>
]>;
def SDTSetCC : SDTypeProfile<1, 3, [ // setcc
@@ -217,7 +223,7 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
]>;
def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store
- SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<1, 2>
+ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>
]>;
def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load
@@ -225,16 +231,6 @@ def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load
SDTCisSameNumEltsAs<0, 2>
]>;
-def SDTMaskedGather: SDTypeProfile<2, 3, [ // masked gather
- SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>,
- SDTCisPtrTy<4>, SDTCVecEltisVT<1, i1>, SDTCisSameNumEltsAs<0, 1>
-]>;
-
-def SDTMaskedScatter: SDTypeProfile<1, 3, [ // masked scatter
- SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameNumEltsAs<0, 1>,
- SDTCVecEltisVT<0, i1>, SDTCisPtrTy<3>
-]>;
-
def SDTVecShuffle : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
]>;
@@ -360,6 +356,8 @@ def sra : SDNode<"ISD::SRA" , SDTIntShiftOp>;
def shl : SDNode<"ISD::SHL" , SDTIntShiftOp>;
def rotl : SDNode<"ISD::ROTL" , SDTIntShiftOp>;
def rotr : SDNode<"ISD::ROTR" , SDTIntShiftOp>;
+def fshl : SDNode<"ISD::FSHL" , SDTIntShiftDOp>;
+def fshr : SDNode<"ISD::FSHR" , SDTIntShiftDOp>;
def and : SDNode<"ISD::AND" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def or : SDNode<"ISD::OR" , SDTIntBinOp,
@@ -383,6 +381,12 @@ def umin : SDNode<"ISD::UMIN" , SDTIntBinOp,
def umax : SDNode<"ISD::UMAX" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
+def saddsat : SDNode<"ISD::SADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
+def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
+def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>;
+def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>;
+def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
+
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>;
@@ -416,8 +420,14 @@ def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def fmaxnum : SDNode<"ISD::FMAXNUM" , SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
-def fminnan : SDNode<"ISD::FMINNAN" , SDTFPBinOp>;
-def fmaxnan : SDNode<"ISD::FMAXNAN" , SDTFPBinOp>;
+def fminnum_ieee : SDNode<"ISD::FMINNUM_IEEE", SDTFPBinOp,
+ [SDNPCommutative]>;
+def fmaxnum_ieee : SDNode<"ISD::FMAXNUM_IEEE", SDTFPBinOp,
+ [SDNPCommutative]>;
+def fminimum : SDNode<"ISD::FMINIMUM" , SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def fmaximum : SDNode<"ISD::FMAXIMUM" , SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
def fgetsign : SDNode<"ISD::FGETSIGN" , SDTFPToIntOp>;
def fcanonicalize : SDNode<"ISD::FCANONICALIZE", SDTFPUnaryOp>;
def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>;
@@ -510,10 +520,6 @@ def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def masked_scatter : SDNode<"ISD::MSCATTER", SDTMaskedScatter,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def masked_gather : SDNode<"ISD::MGATHER", SDTMaskedGather,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
// and truncst (see below).
@@ -630,6 +636,15 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
code ImmediateCode = [{}];
SDNodeXForm OperandTransform = xform;
+ // When this is set, the PredicateCode may refer to a constant Operands
+ // vector which contains the captured nodes of the DAG, in the order listed
+ // by the Operands field above.
+ //
+ // This is useful when Fragments involves associative / commutative
+ // operators: a single piece of code can easily refer to all operands even
+ // when re-associated / commuted variants of the fragment are matched.
+ bit PredicateCodeUsesOperands = 0;
+
// Define a few pre-packaged predicates. This helps GlobalISel import
// existing rules from SelectionDAG for many common cases.
// They will be tested prior to the code in pred and must not be used in
@@ -1067,6 +1082,15 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
let MemoryVT = f32;
}
+def nonvolatile_load : PatFrag<(ops node:$ptr),
+ (load node:$ptr), [{
+ return !cast<LoadSDNode>(N)->isVolatile();
+}]>;
+def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return !cast<StoreSDNode>(N)->isVolatile();
+}]>;
+
// nontemporal store fragments.
def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
diff --git a/contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h b/contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h
index 96264ac81dc4..b2975ec395d5 100644
--- a/contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h
+++ b/contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h
@@ -10,10 +10,13 @@
#ifndef LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H
#define LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H
-#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_os_ostream.h"
#include "gtest/gtest-printers.h"
+#include <string>
+
namespace llvm {
namespace detail {
struct ErrorHolder {
@@ -52,6 +55,10 @@ void PrintTo(const ExpectedHolder<T> &Item, std::ostream *Out) {
}
}
} // namespace detail
+
+namespace unittest {
+SmallString<128> getInputFileDirectory(const char *Argv0);
+}
} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/TextAPI/ELF/ELFStub.h b/contrib/llvm/include/llvm/TextAPI/ELF/ELFStub.h
new file mode 100644
index 000000000000..fa54e6f8b711
--- /dev/null
+++ b/contrib/llvm/include/llvm/TextAPI/ELF/ELFStub.h
@@ -0,0 +1,69 @@
+//===- ELFStub.h ------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-----------------------------------------------------------------------===/
+///
+/// \file
+/// This file defines an internal representation of an ELF stub.
+///
+//===-----------------------------------------------------------------------===/
+
+#ifndef LLVM_TEXTAPI_ELF_ELFSTUB_H
+#define LLVM_TEXTAPI_ELF_ELFSTUB_H
+
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/VersionTuple.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+namespace elfabi {
+
+typedef uint16_t ELFArch;
+
+enum class ELFSymbolType {
+ NoType = ELF::STT_NOTYPE,
+ Object = ELF::STT_OBJECT,
+ Func = ELF::STT_FUNC,
+ TLS = ELF::STT_TLS,
+
+ // Type information is 4 bits, so 16 is safely out of range.
+ Unknown = 16,
+};
+
+struct ELFSymbol {
+ ELFSymbol(std::string SymbolName) : Name(SymbolName) {}
+ std::string Name;
+ uint64_t Size;
+ ELFSymbolType Type;
+ bool Undefined;
+ bool Weak;
+ Optional<std::string> Warning;
+ bool operator<(const ELFSymbol &RHS) const {
+ return Name < RHS.Name;
+ }
+};
+
+// A cumulative representation of ELF stubs.
+// Both textual and binary stubs will read into and write from this object.
+class ELFStub {
+// TODO: Add support for symbol versioning.
+public:
+ VersionTuple TbeVersion;
+ Optional<std::string> SoName;
+ ELFArch Arch;
+ std::vector<std::string> NeededLibs;
+ std::set<ELFSymbol> Symbols;
+
+ ELFStub() {}
+ ELFStub(const ELFStub &Stub);
+ ELFStub(ELFStub &&Stub);
+};
+} // end namespace elfabi
+} // end namespace llvm
+
+#endif // LLVM_TEXTAPI_ELF_ELFSTUB_H
diff --git a/contrib/llvm/include/llvm/TextAPI/ELF/TBEHandler.h b/contrib/llvm/include/llvm/TextAPI/ELF/TBEHandler.h
new file mode 100644
index 000000000000..91521c656fa2
--- /dev/null
+++ b/contrib/llvm/include/llvm/TextAPI/ELF/TBEHandler.h
@@ -0,0 +1,45 @@
+//===- TBEHandler.h ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-----------------------------------------------------------------------===/
+///
+/// \file
+/// This file declares an interface for reading and writing .tbe (text-based
+/// ELF) files.
+///
+//===-----------------------------------------------------------------------===/
+
+#ifndef LLVM_TEXTAPI_ELF_TBEHANDLER_H
+#define LLVM_TEXTAPI_ELF_TBEHANDLER_H
+
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+
+namespace llvm {
+
+class raw_ostream;
+class Error;
+class StringRef;
+class VersionTuple;
+
+namespace elfabi {
+
+class ELFStub;
+
+const VersionTuple TBEVersionCurrent(1, 0);
+
+/// Attempts to read an ELF interface file from a StringRef buffer.
+Expected<std::unique_ptr<ELFStub>> readTBEFromBuffer(StringRef Buf);
+
+/// Attempts to write an ELF interface file to a raw_ostream.
+Error writeTBEToOutputStream(raw_ostream &OS, const ELFStub &Stub);
+
+} // end namespace elfabi
+} // end namespace llvm
+
+#endif // LLVM_TEXTAPI_ELF_TBEHANDLER_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm/include/llvm/Transforms/IPO.h
index ebc76bf82118..11d363b1200b 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO.h
@@ -202,6 +202,11 @@ Pass *createReversePostOrderFunctionAttrsPass();
ModulePass *createMergeFunctionsPass();
//===----------------------------------------------------------------------===//
+/// createHotColdSplittingPass - This pass outlines cold blocks into a separate
+/// function(s).
+ModulePass *createHotColdSplittingPass();
+
+//===----------------------------------------------------------------------===//
/// createPartialInliningPass - This pass inlines parts of functions.
///
ModulePass *createPartialInliningPass();
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index dc9f18c79410..901fed7a0fa4 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -32,7 +32,8 @@ class Pass;
enum MemoryAccessKind {
MAK_ReadNone = 0,
MAK_ReadOnly = 1,
- MAK_MayWrite = 2
+ MAK_MayWrite = 2,
+ MAK_WriteOnly = 3
};
/// Returns the memory access properties of this copy of the function.
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 120a34e15933..c2103b637266 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -37,13 +37,61 @@ public:
/// containing all the GUIDs of all functions to import for a source module.
using FunctionsToImportTy = std::unordered_set<GlobalValue::GUID>;
+ /// The different reasons selectCallee will chose not to import a
+ /// candidate.
+ enum ImportFailureReason {
+ None,
+ // We can encounter a global variable instead of a function in rare
+ // situations with SamplePGO. See comments where this failure type is
+ // set for more details.
+ GlobalVar,
+ // Found to be globally dead, so we don't bother importing.
+ NotLive,
+ // Instruction count over the current threshold.
+ TooLarge,
+ // Don't import something with interposable linkage as we can't inline it
+ // anyway.
+ InterposableLinkage,
+ // Generally we won't end up failing due to this reason, as we expect
+ // to find at least one summary for the GUID that is global or a local
+ // in the referenced module for direct calls.
+ LocalLinkageNotInModule,
+ // This corresponds to the NotEligibleToImport being set on the summary,
+ // which can happen in a few different cases (e.g. local that can't be
+ // renamed or promoted because it is referenced on a llvm*.used variable).
+ NotEligible,
+ // This corresponds to NoInline being set on the function summary,
+ // which will happen if it is known that the inliner will not be able
+ // to inline the function (e.g. it is marked with a NoInline attribute).
+ NoInline
+ };
+
+ /// Information optionally tracked for candidates the importer decided
+ /// not to import. Used for optional stat printing.
+ struct ImportFailureInfo {
+ // The ValueInfo corresponding to the candidate. We save an index hash
+ // table lookup for each GUID by stashing this here.
+ ValueInfo VI;
+ // The maximum call edge hotness for all failed imports of this candidate.
+ CalleeInfo::HotnessType MaxHotness;
+ // most recent reason for failing to import (doesn't necessarily correspond
+ // to the attempt with the maximum hotness).
+ ImportFailureReason Reason;
+ // The number of times we tried to import candidate but failed.
+ unsigned Attempts;
+ ImportFailureInfo(ValueInfo VI, CalleeInfo::HotnessType MaxHotness,
+ ImportFailureReason Reason, unsigned Attempts)
+ : VI(VI), MaxHotness(MaxHotness), Reason(Reason), Attempts(Attempts) {}
+ };
+
/// Map of callee GUID considered for import into a given module to a pair
/// consisting of the largest threshold applied when deciding whether to
/// import it and, if we decided to import, a pointer to the summary instance
/// imported. If we decided not to import, the summary will be nullptr.
using ImportThresholdsTy =
DenseMap<GlobalValue::GUID,
- std::pair<unsigned, const GlobalValueSummary *>>;
+ std::tuple<unsigned, const GlobalValueSummary *,
+ std::unique_ptr<ImportFailureInfo>>>;
/// The map contains an entry for every module to import from, the key being
/// the module identifier to pass to the ModuleLoader. The value is the set of
@@ -128,6 +176,14 @@ void computeDeadSymbols(
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing);
+/// Compute dead symbols and run constant propagation in combined index
+/// after that.
+void computeDeadSymbolsWithConstProp(
+ ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
+ function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
+ bool ImportEnabled);
+
/// Converts value \p GV to declaration, or replaces with a declaration if
/// it is an alias. Returns true if converted, false if replaced.
bool convertToDeclaration(GlobalValue &GV);
@@ -153,10 +209,10 @@ std::error_code EmitImportsFiles(
StringRef ModulePath, StringRef OutputFilename,
const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
-/// Resolve WeakForLinker values in \p TheModule based on the information
+/// Resolve prevailing symbol linkages in \p TheModule based on the information
/// recorded in the summaries during global summary-based analysis.
-void thinLTOResolveWeakForLinkerModule(Module &TheModule,
- const GVSummaryMapTy &DefinedGlobals);
+void thinLTOResolvePrevailingInModule(Module &TheModule,
+ const GVSummaryMapTy &DefinedGlobals);
/// Internalize \p TheModule based on the information recorded in the summaries
/// during global summary-based analysis.
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h b/contrib/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
new file mode 100644
index 000000000000..57e9a9e69187
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
@@ -0,0 +1,31 @@
+//===- HotColdSplitting.h ---- Outline Cold Regions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This pass outlines cold regions to a separate function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H
+#define LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+/// Pass to outline cold regions.
+class HotColdSplittingPass : public PassInfoMixin<HotColdSplittingPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H
+
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h
index cd5a0563898e..af4a933ec1f6 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h
@@ -25,13 +25,16 @@ class Module;
/// The sample profiler data loader pass.
class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
public:
- SampleProfileLoaderPass(std::string File = "", bool IsThinLTOPreLink = false)
- : ProfileFileName(File), IsThinLTOPreLink(IsThinLTOPreLink) {}
+ SampleProfileLoaderPass(std::string File = "", std::string RemappingFile = "",
+ bool IsThinLTOPreLink = false)
+ : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
+ IsThinLTOPreLink(IsThinLTOPreLink) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
std::string ProfileFileName;
+ std::string ProfileRemappingFileName;
bool IsThinLTOPreLink;
};
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
index 4a346c8d7450..017cab0a7750 100644
--- a/contrib/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
@@ -24,9 +24,11 @@
namespace llvm {
+class Triple;
class FunctionPass;
class ModulePass;
class OptimizationRemarkEmitter;
+class Comdat;
/// Instrumentation passes often insert conditional checks into entry blocks.
/// Call this function before splitting the entry block to move instructions
@@ -36,6 +38,17 @@ class OptimizationRemarkEmitter;
BasicBlock::iterator PrepareToSplitEntryBlock(BasicBlock &BB,
BasicBlock::iterator IP);
+// Create a constant for Str so that we can pass it to the run-time lib.
+GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
+ bool AllowMerging,
+ const char *NamePrefix = "");
+
+// Returns F.getComdat() if it exists.
+// Otherwise creates a new comdat, sets F's comdat, and returns it.
+// Returns nullptr on failure.
+Comdat *GetOrCreateFunctionComdat(Function &F, Triple &T,
+ const std::string &ModuleId);
+
// Insert GCOV profiling instrumentation
struct GCOVOptions {
static GCOVOptions getDefault();
@@ -64,6 +77,12 @@ struct GCOVOptions {
// Emit the exit block immediately after the start block, rather than after
// all of the function body's blocks.
bool ExitBlockBeforeBody;
+
+ // Regexes separated by a semi-colon to filter the files to instrument.
+ std::string Filter;
+
+ // Regexes separated by a semi-colon to filter the files to not instrument.
+ std::string Exclude;
};
ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
@@ -111,6 +130,9 @@ struct InstrProfOptions {
// Do counter register promotion
bool DoCounterPromotion = false;
+ // Use atomic profile counter increments.
+ bool Atomic = false;
+
// Name of the profile file to use as output
std::string InstrProfileOutput;
@@ -127,18 +149,12 @@ FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false,
bool UseAfterScope = false);
ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false,
bool Recover = false,
- bool UseGlobalsGC = true);
-
-// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
-FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0,
- bool Recover = false);
+ bool UseGlobalsGC = true,
+ bool UseOdrIndicator = true);
FunctionPass *createHWAddressSanitizerPass(bool CompileKernel = false,
bool Recover = false);
-// Insert ThreadSanitizer (race detection) instrumentation
-FunctionPass *createThreadSanitizerPass();
-
// Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation
ModulePass *createDataFlowSanitizerPass(
const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
@@ -206,7 +222,6 @@ static inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) {
assert(Scaled <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits");
return Scaled;
}
-
} // end namespace llvm
#endif // LLVM_TRANSFORMS_INSTRUMENTATION_H
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h b/contrib/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
new file mode 100644
index 000000000000..460342d1631b
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
@@ -0,0 +1,31 @@
+//===- ControlHeightReduction.h - Control Height Reduction ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class ControlHeightReductionPass :
+ public PassInfoMixin<ControlHeightReductionPass> {
+public:
+ ControlHeightReductionPass();
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/contrib/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
new file mode 100644
index 000000000000..54f0e2f78230
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -0,0 +1,48 @@
+//===- Transforms/Instrumentation/MemorySanitizer.h - MSan Pass -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the memoy sanitizer pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
+FunctionPass *createMemorySanitizerLegacyPassPass(int TrackOrigins = 0,
+ bool Recover = false,
+ bool EnableKmsan = false);
+
+/// A function pass for msan instrumentation.
+///
+/// Instruments functions to detect unitialized reads. This function pass
+/// inserts calls to runtime library functions. If the functions aren't declared
+/// yet, the pass inserts the declarations. Otherwise the existing globals are
+/// used.
+struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
+ MemorySanitizerPass(int TrackOrigins = 0, bool Recover = false,
+ bool EnableKmsan = false)
+ : TrackOrigins(TrackOrigins), Recover(Recover), EnableKmsan(EnableKmsan) {
+ }
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+
+private:
+ int TrackOrigins;
+ bool Recover;
+ bool EnableKmsan;
+};
+}
+
+#endif /* LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H */
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
index c0b37c470b74..fdc5df68a669 100644
--- a/contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
@@ -36,12 +36,14 @@ public:
/// The profile annotation (profile-instr-use) pass for IR based PGO.
class PGOInstrumentationUse : public PassInfoMixin<PGOInstrumentationUse> {
public:
- PGOInstrumentationUse(std::string Filename = "");
+ PGOInstrumentationUse(std::string Filename = "",
+ std::string RemappingFilename = "");
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
std::string ProfileFileName;
+ std::string ProfileRemappingFileName;
};
/// The indirect function call promotion pass.
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/contrib/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
new file mode 100644
index 000000000000..701e2e6ec89e
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -0,0 +1,33 @@
+//===- Transforms/Instrumentation/MemorySanitizer.h - TSan Pass -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the thread sanitizer pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+// Insert ThreadSanitizer (race detection) instrumentation
+FunctionPass *createThreadSanitizerLegacyPassPass();
+
+/// A function pass for tsan instrumentation.
+///
+/// Instruments functions to detect race conditions reads. This function pass
+/// inserts calls to runtime library functions. If the functions aren't declared
+/// yet, the pass inserts the declarations. Otherwise the existing globals are
+struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // namespace llvm
+#endif /* LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H */
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm/include/llvm/Transforms/Scalar.h
index 9491e1bbac93..8fcf9296ba47 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar.h
@@ -26,7 +26,6 @@ class ModulePass;
class Pass;
class GetElementPtrInst;
class PassInfo;
-class TerminatorInst;
class TargetLowering;
class TargetMachine;
@@ -184,11 +183,12 @@ Pass *createLoopInstSimplifyPass();
//
// LoopUnroll - This pass is a simple loop unrolling pass.
//
-Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1,
+Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
+ int Threshold = -1, int Count = -1,
int AllowPartial = -1, int Runtime = -1,
int UpperBound = -1, int AllowPeeling = -1);
// Create an unrolling pass for full unrolling that uses exact trip count only.
-Pass *createSimpleLoopUnrollPass(int OptLevel = 2);
+Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false);
//===----------------------------------------------------------------------===//
//
@@ -394,12 +394,6 @@ FunctionPass *createPartiallyInlineLibCallsPass();
//===----------------------------------------------------------------------===//
//
-// ScalarizerPass - Converts vector operations into scalar operations
-//
-FunctionPass *createScalarizerPass();
-
-//===----------------------------------------------------------------------===//
-//
// SeparateConstOffsetFromGEP - Split GEPs for better CSE
//
FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false);
@@ -477,6 +471,7 @@ FunctionPass *createLoopDataPrefetchPass();
///===---------------------------------------------------------------------===//
ModulePass *createNameAnonGlobalPass();
+ModulePass *createCanonicalizeAliasesPass();
//===----------------------------------------------------------------------===//
//
@@ -491,6 +486,13 @@ FunctionPass *createLibCallsShrinkWrapPass();
// primarily to help other loop passes.
//
Pass *createLoopSimplifyCFGPass();
+
+//===----------------------------------------------------------------------===//
+//
+// WarnMissedTransformations - This pass emits warnings for leftover forced
+// transformations.
+//
+Pass *createWarnMissedTransformationsPass();
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
index 84589bf4db99..ba32e122fa10 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -38,6 +38,7 @@
#define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
@@ -50,8 +51,10 @@ class BasicBlock;
class BlockFrequencyInfo;
class Constant;
class ConstantInt;
+class ConstantExpr;
class DominatorTree;
class Function;
+class GlobalVariable;
class Instruction;
class TargetTransformInfo;
@@ -74,10 +77,15 @@ using ConstantUseListType = SmallVector<ConstantUser, 8>;
/// Keeps track of a constant candidate and its uses.
struct ConstantCandidate {
ConstantUseListType Uses;
+ // If the candidate is a ConstantExpr (currely only constant GEP expressions
+ // whose base pointers are GlobalVariables are supported), ConstInt records
+ // its offset from the base GV, ConstExpr tracks the candidate GEP expr.
ConstantInt *ConstInt;
+ ConstantExpr *ConstExpr;
unsigned CumulativeCost = 0;
- ConstantCandidate(ConstantInt *ConstInt) : ConstInt(ConstInt) {}
+ ConstantCandidate(ConstantInt *ConstInt, ConstantExpr *ConstExpr=nullptr) :
+ ConstInt(ConstInt), ConstExpr(ConstExpr) {}
/// Add the user to the use list and update the cost.
void addUser(Instruction *Inst, unsigned Idx, unsigned Cost) {
@@ -91,16 +99,21 @@ struct ConstantCandidate {
struct RebasedConstantInfo {
ConstantUseListType Uses;
Constant *Offset;
+ Type *Ty;
- RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset)
- : Uses(std::move(Uses)), Offset(Offset) {}
+ RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset,
+ Type *Ty=nullptr) : Uses(std::move(Uses)), Offset(Offset), Ty(Ty) {}
};
using RebasedConstantListType = SmallVector<RebasedConstantInfo, 4>;
/// A base constant and all its rebased constants.
struct ConstantInfo {
- ConstantInt *BaseConstant;
+ // If the candidate is a ConstantExpr (currely only constant GEP expressions
+ // whose base pointers are GlobalVariables are supported), ConstInt records
+ // its offset from the base GV, ConstExpr tracks the candidate GEP expr.
+ ConstantInt *BaseInt;
+ ConstantExpr *BaseExpr;
RebasedConstantListType RebasedConstants;
};
@@ -115,29 +128,43 @@ public:
BlockFrequencyInfo *BFI, BasicBlock &Entry);
void releaseMemory() {
- ConstantVec.clear();
ClonedCastMap.clear();
- ConstCandVec.clear();
+ ConstIntCandVec.clear();
+ for (auto MapEntry : ConstGEPCandMap)
+ MapEntry.second.clear();
+ ConstGEPCandMap.clear();
+ ConstIntInfoVec.clear();
+ for (auto MapEntry : ConstGEPInfoMap)
+ MapEntry.second.clear();
+ ConstGEPInfoMap.clear();
}
private:
- using ConstCandMapType = DenseMap<ConstantInt *, unsigned>;
- using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
+ using ConstPtrUnionType = PointerUnion<ConstantInt *, ConstantExpr *>;
+ using ConstCandMapType = DenseMap<ConstPtrUnionType, unsigned>;
const TargetTransformInfo *TTI;
DominatorTree *DT;
BlockFrequencyInfo *BFI;
+ LLVMContext *Ctx;
+ const DataLayout *DL;
BasicBlock *Entry;
/// Keeps track of constant candidates found in the function.
- ConstCandVecType ConstCandVec;
+ using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
+ using GVCandVecMapType = DenseMap<GlobalVariable *, ConstCandVecType>;
+ ConstCandVecType ConstIntCandVec;
+ GVCandVecMapType ConstGEPCandMap;
+
+ /// These are the final constants we decided to hoist.
+ using ConstInfoVecType = SmallVector<consthoist::ConstantInfo, 8>;
+ using GVInfoVecMapType = DenseMap<GlobalVariable *, ConstInfoVecType>;
+ ConstInfoVecType ConstIntInfoVec;
+ GVInfoVecMapType ConstGEPInfoMap;
/// Keep track of cast instructions we already cloned.
SmallDenseMap<Instruction *, Instruction *> ClonedCastMap;
- /// These are the final constants we decided to hoist.
- SmallVector<consthoist::ConstantInfo, 8> ConstantVec;
-
Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
SmallPtrSet<Instruction *, 8>
findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const;
@@ -145,19 +172,27 @@ private:
Instruction *Inst, unsigned Idx,
ConstantInt *ConstInt);
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst, unsigned Idx,
+ ConstantExpr *ConstExpr);
+ void collectConstantCandidates(ConstCandMapType &ConstCandMap,
Instruction *Inst, unsigned Idx);
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
Instruction *Inst);
void collectConstantCandidates(Function &Fn);
void findAndMakeBaseConstant(ConstCandVecType::iterator S,
- ConstCandVecType::iterator E);
+ ConstCandVecType::iterator E,
+ SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec);
unsigned maximizeConstantsInRange(ConstCandVecType::iterator S,
ConstCandVecType::iterator E,
ConstCandVecType::iterator &MaxCostItr);
- void findBaseConstants();
- void emitBaseConstants(Instruction *Base, Constant *Offset,
+ // If BaseGV is nullptr, find base among Constant Integer candidates;
+ // otherwise find base among constant GEPs sharing BaseGV as base pointer.
+ void findBaseConstants(GlobalVariable *BaseGV);
+ void emitBaseConstants(Instruction *Base, Constant *Offset, Type *Ty,
const consthoist::ConstantUser &ConstUser);
- bool emitBaseConstants();
+ // If BaseGV is nullptr, emit Constant Integer base; otherwise emit
+ // constant GEP base.
+ bool emitBaseConstants(GlobalVariable *BaseGV);
void deleteDeadCastInst() const;
bool optimizeConstants(Function &Fn);
};
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h b/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
index b9de07ec9279..9827678b89f2 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -22,13 +22,14 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <cstdint>
#include <utility>
#include <vector>
@@ -158,11 +159,8 @@ private:
AssumptionCache *AC;
SetVector<BasicBlock *> DeadBlocks;
OptimizationRemarkEmitter *ORE;
- // Maps a block to the topmost instruction with implicit control flow in it.
- DenseMap<const BasicBlock *, const Instruction *>
- FirstImplicitControlFlowInsts;
+ ImplicitControlFlowTracking *ICF;
- OrderedInstructions *OI;
ValueTable VN;
/// A mapping from value numbers to lists of Value*'s that
@@ -183,7 +181,12 @@ private:
// Map the block to reversed postorder traversal number. It is used to
// find back edge easily.
- DenseMap<const BasicBlock *, uint32_t> BlockRPONumber;
+ DenseMap<AssertingVH<BasicBlock>, uint32_t> BlockRPONumber;
+
+ // This is set 'true' initially and also when new blocks have been added to
+ // the function being analyzed. This boolean is used to control the updating
+ // of BlockRPONumber prior to accessing the contents of BlockRPONumber.
+ bool InvalidBlockRPONumbers = true;
using LoadDepVect = SmallVector<NonLocalDepResult, 64>;
using AvailValInBlkVect = SmallVector<gvn::AvailableValueInBlock, 64>;
@@ -240,7 +243,7 @@ private:
}
// List of critical edges to be split between iterations.
- SmallVector<std::pair<TerminatorInst *, unsigned>, 4> toSplit;
+ SmallVector<std::pair<Instruction *, unsigned>, 4> toSplit;
// Helper functions of redundant load elimination
bool processLoad(LoadInst *L);
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index b3493a292498..9894345645a1 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -23,6 +23,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/ValueHandle.h"
#include <memory>
#include <utility>
@@ -34,7 +35,7 @@ class BinaryOperator;
class BranchInst;
class CmpInst;
class Constant;
-class DeferredDominance;
+class DomTreeUpdater;
class Function;
class Instruction;
class IntrinsicInst;
@@ -78,7 +79,7 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
AliasAnalysis *AA;
- DeferredDominance *DDT;
+ DomTreeUpdater *DTU;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = false;
@@ -88,29 +89,16 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
#else
SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders;
#endif
- DenseSet<std::pair<Value *, BasicBlock *>> RecursionSet;
unsigned BBDupThreshold;
- // RAII helper for updating the recursion stack.
- struct RecursionSetRemover {
- DenseSet<std::pair<Value *, BasicBlock *>> &TheSet;
- std::pair<Value *, BasicBlock *> ThePair;
-
- RecursionSetRemover(DenseSet<std::pair<Value *, BasicBlock *>> &S,
- std::pair<Value *, BasicBlock *> P)
- : TheSet(S), ThePair(P) {}
-
- ~RecursionSetRemover() { TheSet.erase(ThePair); }
- };
-
public:
JumpThreadingPass(int T = -1);
// Glue for old PM.
bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_,
- AliasAnalysis *AA_, DeferredDominance *DDT_,
- bool HasProfileData_, std::unique_ptr<BlockFrequencyInfo> BFI_,
+ AliasAnalysis *AA_, DomTreeUpdater *DTU_, bool HasProfileData_,
+ std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -127,11 +115,21 @@ public:
bool DuplicateCondBranchOnPHIIntoPred(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs);
+ bool ComputeValueKnownInPredecessorsImpl(
+ Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result,
+ jumpthreading::ConstantPreference Preference,
+ DenseSet<std::pair<Value *, BasicBlock *>> &RecursionSet,
+ Instruction *CxtI = nullptr);
bool
ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
jumpthreading::PredValueInfo &Result,
jumpthreading::ConstantPreference Preference,
- Instruction *CxtI = nullptr);
+ Instruction *CxtI = nullptr) {
+ DenseSet<std::pair<Value *, BasicBlock *>> RecursionSet;
+ return ComputeValueKnownInPredecessorsImpl(V, BB, Result, Preference,
+ RecursionSet, CxtI);
+ }
+
bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
jumpthreading::ConstantPreference Preference,
Instruction *CxtI = nullptr);
@@ -141,7 +139,11 @@ public:
bool ProcessImpliedCondition(BasicBlock *BB);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+ void UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI,
+ PHINode *SIUse, unsigned Idx);
+
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+ bool TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB);
bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
bool ProcessGuards(BasicBlock *BB);
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 5f61c39b5530..46ebb74c413c 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -276,7 +276,15 @@ public:
// pass pipeline to put loops into their canonical form. Note that we can
// directly build up function analyses after this as the function pass
// manager handles all the invalidation at that layer.
- PreservedAnalyses PA = LoopCanonicalizationFPM.run(F, AM);
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F);
+
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // canonicalization pipeline.
+ if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) {
+ PA = LoopCanonicalizationFPM.run(F, AM);
+ PI.runAfterPass<Function>(LoopCanonicalizationFPM, F);
+ }
// Get the loop structure for this function
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
@@ -337,8 +345,19 @@ public:
assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) &&
"Loops must remain in LCSSA form!");
#endif
-
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns
+ // false).
+ if (!PI.runBeforePass<Loop>(Pass, *L))
+ continue;
PreservedAnalyses PassPA = Pass.run(*L, LAM, LAR, Updater);
+
+ // Do not pass deleted Loop into the instrumentation.
+ if (Updater.skipCurrentLoop())
+ PI.runAfterPassInvalidated<Loop>(Pass);
+ else
+ PI.runAfterPass<Loop>(Pass, *L);
+
// FIXME: We should verify the set of analyses relevant to Loop passes
// are preserved.
@@ -364,8 +383,8 @@ public:
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
PA.preserve<ScalarEvolutionAnalysis>();
- // FIXME: Uncomment this when all loop passes preserve MemorySSA
- // PA.preserve<MemorySSAAnalysis>();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
// FIXME: What we really want to do here is preserve an AA category, but
// that concept doesn't exist yet.
PA.preserve<AAManager>();
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index 9848e0d54f2b..e38e983cc9eb 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -10,6 +10,7 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
+#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
@@ -23,23 +24,90 @@ class LPMUpdater;
class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
const int OptLevel;
+ /// If false, use a cost model to determine whether unrolling of a loop is
+ /// profitable. If true, only loops that explicitly request unrolling via
+ /// metadata are considered. All other loops are skipped.
+ const bool OnlyWhenForced;
+
public:
- explicit LoopFullUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
+ explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false)
+ : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
+/// A set of parameters used to control various transforms performed by the
+/// LoopUnroll pass. Each of the boolean parameters can be set to:
+/// true - enabling the transformation.
+/// false - disabling the transformation.
+/// None - relying on a global default.
+///
+/// There is also OptLevel parameter, which is used for additional loop unroll
+/// tuning.
+///
+/// Intended use is to create a default object, modify parameters with
+/// additional setters and then pass it to LoopUnrollPass.
+///
+struct LoopUnrollOptions {
+ Optional<bool> AllowPartial;
+ Optional<bool> AllowPeeling;
+ Optional<bool> AllowRuntime;
+ Optional<bool> AllowUpperBound;
+ int OptLevel;
+
+ /// If false, use a cost model to determine whether unrolling of a loop is
+ /// profitable. If true, only loops that explicitly request unrolling via
+ /// metadata are considered. All other loops are skipped.
+ bool OnlyWhenForced;
+
+ LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false)
+ : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
+
+ /// Enables or disables partial unrolling. When disabled only full unrolling
+ /// is allowed.
+ LoopUnrollOptions &setPartial(bool Partial) {
+ AllowPartial = Partial;
+ return *this;
+ }
+
+ /// Enables or disables unrolling of loops with runtime trip count.
+ LoopUnrollOptions &setRuntime(bool Runtime) {
+ AllowRuntime = Runtime;
+ return *this;
+ }
+
+ /// Enables or disables loop peeling.
+ LoopUnrollOptions &setPeeling(bool Peeling) {
+ AllowPeeling = Peeling;
+ return *this;
+ }
+
+ /// Enables or disables the use of trip count upper bound
+ /// in loop unrolling.
+ LoopUnrollOptions &setUpperBound(bool UpperBound) {
+ AllowUpperBound = UpperBound;
+ return *this;
+ }
+
+ // Sets "optimization level" tuning parameter for loop unrolling.
+ LoopUnrollOptions &setOptLevel(int O) {
+ OptLevel = O;
+ return *this;
+ }
+};
+
/// Loop unroll pass that will support both full and partial unrolling.
/// It is a function pass to have access to function and module analyses.
/// It will also put loops into canonical form (simplified and LCSSA).
class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
- const int OptLevel;
+ LoopUnrollOptions UnrollOpts;
public:
/// This uses the target information (or flags) to control the thresholds for
/// different unrolling stategies but supports all of them.
- explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
+ explicit LoopUnrollPass(LoopUnrollOptions UnrollOpts = {})
+ : UnrollOpts(UnrollOpts) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h b/contrib/llvm/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
new file mode 100644
index 000000000000..41b4aada2baa
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
@@ -0,0 +1,47 @@
+//===-- MakeGuardsExplicit.h - Turn guard intrinsics into guard branches --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the @llvm.experimental.guard intrinsic to the new form of
+// guard represented as widenable explicit branch to the deopt block. The
+// difference between this pass and LowerGuardIntrinsic is that after this pass
+// the guard represented as intrinsic:
+//
+// call void(i1, ...) @llvm.experimental.guard(i1 %old_cond) [ "deopt"() ]
+//
+// transforms to a guard represented as widenable explicit branch:
+//
+// %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+// br i1 (%old_cond & %widenable_cond), label %guarded, label %deopt
+//
+// Here:
+// - The semantics of @llvm.experimental.widenable.condition allows to replace
+// %widenable_cond with the construction (%widenable_cond & %any_other_cond)
+// without loss of correctness;
+// - %guarded is the lower part of old guard intrinsic's parent block split by
+// the intrinsic call;
+// - %deopt is a block containing a sole call to @llvm.experimental.deoptimize
+// intrinsic.
+//
+// Therefore, this branch preserves the property of widenability.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_SCALAR_MAKEGUARDSEXPLICIT_H
+#define LLVM_TRANSFORMS_SCALAR_MAKEGUARDSEXPLICIT_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct MakeGuardsExplicitPass : public PassInfoMixin<MakeGuardsExplicitPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif //LLVM_TRANSFORMS_SCALAR_MAKEGUARDSEXPLICIT_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h b/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
index 2a294c95a17b..0abbb32fde6a 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
@@ -21,15 +21,17 @@
#ifndef LLVM_TRANSFORMS_SCALAR_SCCP_H
#define LLVM_TRANSFORMS_SCALAR_SCCP_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Utils/PredicateInfo.h"
namespace llvm {
-class Function;
+class PostDominatorTree;
/// This pass performs function-level constant propagation and merging.
class SCCPPass : public PassInfoMixin<SCCPPass> {
@@ -37,7 +39,15 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-bool runIPSCCP(Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI);
+/// Helper struct for bundling up the analysis results per function for IPSCCP.
+struct AnalysisResultsForFn {
+ std::unique_ptr<PredicateInfo> PredInfo;
+ DominatorTree *DT;
+ PostDominatorTree *PDT;
+};
+
+bool runIPSCCP(Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ function_ref<AnalysisResultsForFn(Function &)> getAnalysis);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_SCALAR_SCCP_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/contrib/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
new file mode 100644
index 000000000000..1a0b9a2b638c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -0,0 +1,35 @@
+//===- Scalarizer.h --- Scalarize vector operations -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass converts vector operations into scalar operations, in order
+/// to expose optimization opportunities on the individual scalar operations.
+/// It is mainly intended for targets that do not have vector units, but it
+/// may also be useful for revectorizing code to different vector widths.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
+#define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// Create a legacy pass manager instance of the Scalarizer pass
+FunctionPass *createScalarizerPass();
+
+}
+
+#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/contrib/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
new file mode 100644
index 000000000000..018b22a932e6
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -0,0 +1,38 @@
+//===- WarnMissedTransforms.h -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit warnings if forced code transformations have not been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
+#define LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Function;
+class Loop;
+class LPMUpdater;
+
+// New pass manager boilerplate.
+class WarnMissedTransformationsPass
+ : public PassInfoMixin<WarnMissedTransformationsPass> {
+public:
+ explicit WarnMissedTransformationsPass() {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+// Legacy pass manager boilerplate.
+Pass *createWarnMissedTransformationsPass();
+void initializeWarnMissedTransformationsLegacyPass(PassRegistry &);
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils.h b/contrib/llvm/include/llvm/Transforms/Utils.h
index 0d997ce17b83..378552775c77 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils.h
@@ -113,6 +113,13 @@ extern char &LoopSimplifyID;
/// This function returns a new pass that downgrades the debug info in the
/// module to line tables only.
ModulePass *createStripNonLineTableDebugInfoPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ControlHeightReudction - Merges conditional blocks of code and reduces the
+// number of conditional branches in the hot paths based on profiles.
+//
+FunctionPass *createControlHeightReductionLegacyPass();
}
#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 3dfc73b64842..5b16a2c0d0b1 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/InstrTypes.h"
#include <cassert>
@@ -27,19 +28,27 @@ namespace llvm {
class BlockFrequencyInfo;
class BranchProbabilityInfo;
-class DeferredDominance;
class DominatorTree;
+class DomTreeUpdater;
class Function;
class Instruction;
class LoopInfo;
class MDNode;
class MemoryDependenceResults;
+class MemorySSAUpdater;
class ReturnInst;
class TargetLibraryInfo;
class Value;
/// Delete the specified block, which must have no predecessors.
-void DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT = nullptr);
+void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
+
+/// Delete the specified blocks from \p BB. The set of deleted blocks must have
+/// no predecessors that are not being deleted themselves. \p BBs must have no
+/// duplicating blocks. If there are loops among this set of blocks, all
+/// relevant loop info updates should be done before this function is called.
+void DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
+ DomTreeUpdater *DTU = nullptr);
/// We know that BB has one predecessor. If there are any single-entry PHI nodes
/// in it, fold them away. This handles the case when all entries to the PHI
@@ -56,10 +65,10 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr);
/// Attempts to merge a block into its predecessor, if possible. The return
/// value indicates success or failure.
-bool MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT = nullptr,
+bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
LoopInfo *LI = nullptr,
- MemoryDependenceResults *MemDep = nullptr,
- DeferredDominance *DDT = nullptr);
+ MemorySSAUpdater *MSSAU = nullptr,
+ MemoryDependenceResults *MemDep = nullptr);
/// Replace all uses of an instruction (specified by BI) with a value, then
/// remove and delete the original instruction.
@@ -84,13 +93,15 @@ void ReplaceInstWithInst(Instruction *From, Instruction *To);
struct CriticalEdgeSplittingOptions {
DominatorTree *DT;
LoopInfo *LI;
+ MemorySSAUpdater *MSSAU;
bool MergeIdenticalEdges = false;
bool DontDeleteUselessPHIs = false;
bool PreserveLCSSA = false;
CriticalEdgeSplittingOptions(DominatorTree *DT = nullptr,
- LoopInfo *LI = nullptr)
- : DT(DT), LI(LI) {}
+ LoopInfo *LI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr)
+ : DT(DT), LI(LI), MSSAU(MSSAU) {}
CriticalEdgeSplittingOptions &setMergeIdenticalEdges() {
MergeIdenticalEdges = true;
@@ -124,7 +135,7 @@ struct CriticalEdgeSplittingOptions {
/// IndirectBrInst. Splitting these edges will almost always create an invalid
/// program because the address of the new block won't be the one that is jumped
/// to.
-BasicBlock *SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+BasicBlock *SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
const CriticalEdgeSplittingOptions &Options =
CriticalEdgeSplittingOptions());
@@ -144,7 +155,7 @@ inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI,
const CriticalEdgeSplittingOptions &Options =
CriticalEdgeSplittingOptions()) {
bool MadeChange = false;
- TerminatorInst *TI = (*PI)->getTerminator();
+ Instruction *TI = (*PI)->getTerminator();
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (TI->getSuccessor(i) == Succ)
MadeChange |= !!SplitCriticalEdge(TI, i, Options);
@@ -158,7 +169,7 @@ inline BasicBlock *
SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst,
const CriticalEdgeSplittingOptions &Options =
CriticalEdgeSplittingOptions()) {
- TerminatorInst *TI = Src->getTerminator();
+ Instruction *TI = Src->getTerminator();
unsigned i = 0;
while (true) {
assert(i != TI->getNumSuccessors() && "Edge doesn't exist!");
@@ -176,14 +187,16 @@ unsigned SplitAllCriticalEdges(Function &F,
/// Split the edge connecting specified block.
BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To,
- DominatorTree *DT = nullptr, LoopInfo *LI = nullptr);
+ DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr);
/// Split the specified block at the specified instruction - everything before
/// SplitPt stays in Old and everything starting with SplitPt moves to a new
/// block. The two blocks are joined by an unconditional branch and the loop
/// info is updated.
BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt,
- DominatorTree *DT = nullptr, LoopInfo *LI = nullptr);
+ DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr);
/// This method introduces at least one new basic block into the function and
/// moves some of the predecessors of BB to be predecessors of the new block.
@@ -203,6 +216,7 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
const char *Suffix,
DominatorTree *DT = nullptr,
LoopInfo *LI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr,
bool PreserveLCSSA = false);
/// This method transforms the landing pad, OrigBB, by introducing two new basic
@@ -216,20 +230,19 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
/// no other analyses. In particular, it does not preserve LoopSimplify
/// (because it's complicated to handle the case where one of the edges being
/// split is an exit of a loop with other exits).
-void SplitLandingPadPredecessors(BasicBlock *OrigBB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix, const char *Suffix2,
- SmallVectorImpl<BasicBlock *> &NewBBs,
- DominatorTree *DT = nullptr,
- LoopInfo *LI = nullptr,
- bool PreserveLCSSA = false);
+void SplitLandingPadPredecessors(
+ BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix,
+ const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
+ DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr, bool PreserveLCSSA = false);
/// This method duplicates the specified return instruction into a predecessor
/// which ends in an unconditional branch. If the return instruction returns a
/// value defined by a PHI, propagate the right value into the return. It
/// returns the new return instruction in the predecessor.
ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
- BasicBlock *Pred);
+ BasicBlock *Pred,
+ DomTreeUpdater *DTU = nullptr);
/// Split the containing block at the specified instruction - everything before
/// SplitBefore stays in the old basic block, and the rest of the instructions
@@ -251,11 +264,11 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
/// Returns the NewBasicBlock's terminator.
///
/// Updates DT and LI if given.
-TerminatorInst *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
- bool Unreachable,
- MDNode *BranchWeights = nullptr,
- DominatorTree *DT = nullptr,
- LoopInfo *LI = nullptr);
+Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights = nullptr,
+ DominatorTree *DT = nullptr,
+ LoopInfo *LI = nullptr);
/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
/// but also creates the ElseBlock.
@@ -272,8 +285,8 @@ TerminatorInst *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
/// SplitBefore
/// Tail
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
- TerminatorInst **ThenTerm,
- TerminatorInst **ElseTerm,
+ Instruction **ThenTerm,
+ Instruction **ElseTerm,
MDNode *BranchWeights = nullptr);
/// Check whether BB is the merge point of a if-region.
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index eafe07f49284..28efce6ac3fb 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -37,6 +37,12 @@ namespace llvm {
LibFunc DoubleFn, LibFunc FloatFn,
LibFunc LongDoubleFn);
+ /// Get the name of the overloaded unary floating point function
+ /// corresponding to \a Ty.
+ StringRef getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn);
+
/// Return V if it is an i8*, otherwise cast it to i8*.
Value *castToCStr(Value *V, IRBuilder<> &B);
@@ -94,6 +100,13 @@ namespace llvm {
Value *emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
const AttributeList &Attrs);
+ /// Emit a call to the unary function DoubleFn, FloatFn or LongDoubleFn,
+ /// depending of the type of Op.
+ Value *emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, IRBuilder<> &B,
+ const AttributeList &Attrs);
+
/// Emit a call to the binary function named 'Name' (e.g. 'fmin'). This
/// function is known to take type matching 'Op1' and 'Op2' and return one
/// value with the same type. If 'Op1/Op2' are long double, 'l' is added as
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h b/contrib/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h
new file mode 100644
index 000000000000..f23263783fec
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h
@@ -0,0 +1,32 @@
+//===-- CanonicalizeAliases.h - Alias Canonicalization Pass -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file canonicalizes aliases.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_CANONICALIZE_ALIASES_H
+#define LLVM_TRANSFORMS_UTILS_CANONICALIZE_ALIASES_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Simple pass that canonicalizes aliases.
+class CanonicalizeAliasesPass : public PassInfoMixin<CanonicalizeAliasesPass> {
+public:
+ CanonicalizeAliasesPass() = default;
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_CANONICALIZE_ALIASESH
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
index 7531fb2d69b3..f5e997324fc8 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -46,9 +47,9 @@ class LoopInfo;
class Module;
class ProfileSummaryInfo;
class ReturnInst;
+class DomTreeUpdater;
/// Return an exact copy of the specified module
-///
std::unique_ptr<Module> CloneModule(const Module &M);
std::unique_ptr<Module> CloneModule(const Module &M, ValueToValueMapTy &VMap);
@@ -60,17 +61,15 @@ std::unique_ptr<Module>
CloneModule(const Module &M, ValueToValueMapTy &VMap,
function_ref<bool(const GlobalValue *)> ShouldCloneDefinition);
-/// ClonedCodeInfo - This struct can be used to capture information about code
+/// This struct can be used to capture information about code
/// being cloned, while it is being cloned.
struct ClonedCodeInfo {
- /// ContainsCalls - This is set to true if the cloned code contains a normal
- /// call instruction.
+ /// This is set to true if the cloned code contains a normal call instruction.
bool ContainsCalls = false;
- /// ContainsDynamicAllocas - This is set to true if the cloned code contains
- /// a 'dynamic' alloca. Dynamic allocas are allocas that are either not in
- /// the entry block or they are in the entry block but are not a constant
- /// size.
+ /// This is set to true if the cloned code contains a 'dynamic' alloca.
+ /// Dynamic allocas are allocas that are either not in the entry block or they
+ /// are in the entry block but are not a constant size.
bool ContainsDynamicAllocas = false;
/// All cloned call sites that have operand bundles attached are appended to
@@ -81,7 +80,7 @@ struct ClonedCodeInfo {
ClonedCodeInfo() = default;
};
-/// CloneBasicBlock - Return a copy of the specified basic block, but without
+/// Return a copy of the specified basic block, but without
/// embedding the block into a particular function. The block returned is an
/// exact copy of the specified basic block, without any remapping having been
/// performed. Because of this, this is only suitable for applications where
@@ -108,13 +107,12 @@ struct ClonedCodeInfo {
/// If you would like to collect additional information about the cloned
/// function, you can specify a ClonedCodeInfo object with the optional fifth
/// parameter.
-///
BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix = "", Function *F = nullptr,
ClonedCodeInfo *CodeInfo = nullptr,
DebugInfoFinder *DIFinder = nullptr);
-/// CloneFunction - Return a copy of the specified function and add it to that
+/// Return a copy of the specified function and add it to that
/// function's module. Also, any references specified in the VMap are changed
/// to refer to their mapped value instead of the original one. If any of the
/// arguments to the function are in the VMap, the arguments are deleted from
@@ -153,7 +151,7 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr);
-/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
/// effect of this is to copy significantly less code in cases where (for
/// example) a function call with constant arguments is inlined, and those
@@ -171,8 +169,8 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ClonedCodeInfo *CodeInfo = nullptr,
Instruction *TheCall = nullptr);
-/// InlineFunctionInfo - This class captures the data input to the
-/// InlineFunction call, and records the auxiliary results produced by it.
+/// This class captures the data input to the InlineFunction call, and records
+/// the auxiliary results produced by it.
class InlineFunctionInfo {
public:
explicit InlineFunctionInfo(CallGraph *cg = nullptr,
@@ -184,19 +182,19 @@ public:
: CG(cg), GetAssumptionCache(GetAssumptionCache), PSI(PSI),
CallerBFI(CallerBFI), CalleeBFI(CalleeBFI) {}
- /// CG - If non-null, InlineFunction will update the callgraph to reflect the
+ /// If non-null, InlineFunction will update the callgraph to reflect the
/// changes it makes.
CallGraph *CG;
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
ProfileSummaryInfo *PSI;
BlockFrequencyInfo *CallerBFI, *CalleeBFI;
- /// StaticAllocas - InlineFunction fills this in with all static allocas that
- /// get copied into the caller.
+ /// InlineFunction fills this in with all static allocas that get copied into
+ /// the caller.
SmallVector<AllocaInst *, 4> StaticAllocas;
- /// InlinedCalls - InlineFunction fills this in with callsites that were
- /// inlined from the callee. This is only filled in if CG is non-null.
+ /// InlineFunction fills this in with callsites that were inlined from the
+ /// callee. This is only filled in if CG is non-null.
SmallVector<WeakTrackingVH, 8> InlinedCalls;
/// All of the new call sites inlined into the caller.
@@ -213,7 +211,7 @@ public:
}
};
-/// InlineFunction - This function inlines the called function into the basic
+/// This function inlines the called function into the basic
/// block of the caller. This returns false if it is not possible to inline
/// this call. The program is still in a well defined state if this occurs
/// though.
@@ -232,13 +230,16 @@ public:
/// and all varargs at the callsite will be passed to any calls to
/// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs
/// are only used by ForwardVarArgsTo.
-bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
-bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
-bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR = nullptr, bool InsertLifetime = true,
- Function *ForwardVarArgsTo = nullptr);
+InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR = nullptr,
+ bool InsertLifetime = true);
+InlineResult InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR = nullptr,
+ bool InsertLifetime = true);
+InlineResult InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR = nullptr,
+ bool InsertLifetime = true,
+ Function *ForwardVarArgsTo = nullptr);
/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
/// Blocks.
@@ -262,11 +263,12 @@ void remapInstructionsInBlocks(const SmallVectorImpl<BasicBlock *> &Blocks,
/// we replace them with the uses of corresponding Phi inputs. ValueMapping
/// is used to map the original instructions from BB to their newly-created
/// copies. Returns the split block.
-BasicBlock *
-DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
- Instruction *StopAt,
- ValueToValueMapTy &ValueMapping,
- DominatorTree *DT = nullptr);
+BasicBlock *DuplicateInstructionsInSplitBetween(BasicBlock *BB,
+ BasicBlock *PredBB,
+ Instruction *StopAt,
+ ValueToValueMapTy &ValueMapping,
+ DomTreeUpdater &DTU);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CLONING_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 0e5254acb0d3..fee79fdc3bff 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include <limits>
namespace llvm {
@@ -26,6 +27,7 @@ class BasicBlock;
class BlockFrequency;
class BlockFrequencyInfo;
class BranchProbabilityInfo;
+class CallInst;
class DominatorTree;
class Function;
class Instruction;
@@ -64,6 +66,11 @@ class Value;
unsigned NumExitBlocks = std::numeric_limits<unsigned>::max();
Type *RetTy;
+ // Suffix to use when creating extracted function (appended to the original
+ // function name + "."). If empty, the default is to use the entry block
+ // label, if non-empty, otherwise "extracted".
+ std::string Suffix;
+
public:
/// Create a code extractor for a sequence of blocks.
///
@@ -78,7 +85,8 @@ class Value;
CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
BranchProbabilityInfo *BPI = nullptr,
- bool AllowVarArgs = false, bool AllowAlloca = false);
+ bool AllowVarArgs = false, bool AllowAlloca = false,
+ std::string Suffix = "");
/// Create a code extractor for a loop body.
///
@@ -86,7 +94,8 @@ class Value;
/// block sequence of the loop.
CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false,
BlockFrequencyInfo *BFI = nullptr,
- BranchProbabilityInfo *BPI = nullptr);
+ BranchProbabilityInfo *BPI = nullptr,
+ std::string Suffix = "");
/// Perform the extraction, returning the new function.
///
@@ -139,7 +148,8 @@ class Value;
BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock);
private:
- void severSplitPHINodes(BasicBlock *&Header);
+ void severSplitPHINodesOfEntry(BasicBlock *&Header);
+ void severSplitPHINodesOfExits(const SmallPtrSetImpl<BasicBlock *> &Exits);
void splitReturnBlocks();
Function *constructFunction(const ValueSet &inputs,
@@ -155,10 +165,9 @@ class Value;
DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
BranchProbabilityInfo *BPI);
- void emitCallAndSwitchStatement(Function *newFunction,
- BasicBlock *newHeader,
- ValueSet &inputs,
- ValueSet &outputs);
+ CallInst *emitCallAndSwitchStatement(Function *newFunction,
+ BasicBlock *newHeader,
+ ValueSet &inputs, ValueSet &outputs);
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index b9fbef04cdc3..e24398b90012 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -114,6 +114,9 @@ bool renameModuleForThinLTO(
Module &M, const ModuleSummaryIndex &Index,
SetVector<GlobalValue *> *GlobalsToImport = nullptr);
+/// Compute synthetic function entry counts.
+void computeSyntheticCounts(ModuleSummaryIndex &Index);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/GuardUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/GuardUtils.h
new file mode 100644
index 000000000000..537045edafe4
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/GuardUtils.h
@@ -0,0 +1,30 @@
+//===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform transformations related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_GUARDUTILS_H
+#define LLVM_TRANSFORMS_UTILS_GUARDUTILS_H
+
+namespace llvm {
+
+class CallInst;
+class Function;
+
+/// Splits control flow at point of \p Guard, replacing it with explicit branch
+/// by the condition of guard's first argument. The taken branch then goes to
+/// the block that contains \p Guard's successors, and the non-taken branch
+/// goes to a newly-created deopt block that contains a sole call of the
+/// deoptimize function \p DeoptIntrinsic.
+void makeGuardControlFlowExplicit(Function *DeoptIntrinsic, CallInst *Guard);
+
+} // llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_GUARDUTILS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
index b8df32565723..ec8b0eda3641 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
@@ -26,6 +26,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Operator.h"
@@ -43,7 +44,7 @@ class AssumptionCache;
class BasicBlock;
class BranchInst;
class CallInst;
-class DbgInfoIntrinsic;
+class DbgVariableIntrinsic;
class DbgValueInst;
class DIBuilder;
class Function;
@@ -51,6 +52,7 @@ class Instruction;
class LazyValueInfo;
class LoadInst;
class MDNode;
+class MemorySSAUpdater;
class PHINode;
class StoreInst;
class TargetLibraryInfo;
@@ -120,7 +122,7 @@ struct SimplifyCFGOptions {
/// DeleteDeadConditions is true.
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false,
const TargetLibraryInfo *TLI = nullptr,
- DeferredDominance *DDT = nullptr);
+ DomTreeUpdater *DTU = nullptr);
//===----------------------------------------------------------------------===//
// Local dead code elimination.
@@ -140,8 +142,9 @@ bool wouldInstructionBeTriviallyDead(Instruction *I,
/// If the specified value is a trivially dead instruction, delete it.
/// If that makes any of its operands trivially dead, delete them too,
/// recursively. Return true if any instructions were deleted.
-bool RecursivelyDeleteTriviallyDeadInstructions(Value *V,
- const TargetLibraryInfo *TLI = nullptr);
+bool RecursivelyDeleteTriviallyDeadInstructions(
+ Value *V, const TargetLibraryInfo *TLI = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr);
/// Delete all of the instructions in `DeadInsts`, and all other instructions
/// that deleting these in turn causes to be trivially dead.
@@ -153,7 +156,7 @@ bool RecursivelyDeleteTriviallyDeadInstructions(Value *V,
/// empty afterward.
void RecursivelyDeleteTriviallyDeadInstructions(
SmallVectorImpl<Instruction *> &DeadInsts,
- const TargetLibraryInfo *TLI = nullptr);
+ const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr);
/// If the specified value is an effectively dead PHI node, due to being a
/// def-use chain of single-use nodes that either forms a cycle or is terminated
@@ -171,6 +174,12 @@ bool RecursivelyDeleteDeadPHINode(PHINode *PN,
bool SimplifyInstructionsInBlock(BasicBlock *BB,
const TargetLibraryInfo *TLI = nullptr);
+/// Replace all the uses of an SSA value in @llvm.dbg intrinsics with
+/// undef. This is useful for signaling that a variable, e.g. has been
+/// found dead and hence it's unavailable at a given program point.
+/// Returns true if the dbg values have been changed.
+bool replaceDbgUsesWithUndef(Instruction *I);
+
//===----------------------------------------------------------------------===//
// Control Flow Graph Restructuring.
//
@@ -187,20 +196,19 @@ bool SimplifyInstructionsInBlock(BasicBlock *BB,
/// .. and delete the predecessor corresponding to the '1', this will attempt to
/// recursively fold the 'and' to 0.
void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DeferredDominance *DDT = nullptr);
+ DomTreeUpdater *DTU = nullptr);
/// BB is a block with one predecessor and its predecessor is known to have one
/// successor (BB!). Eliminate the edge between them, moving the instructions in
/// the predecessor into BB. This deletes the predecessor block.
-void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT = nullptr,
- DeferredDominance *DDT = nullptr);
+void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
/// BB is known to contain an unconditional branch, and contains no instructions
/// other than PHI nodes, potential debug intrinsics and the branch. If
/// possible, eliminate BB by rewriting all the predecessors to branch to the
/// successor block and return true. If we can't transform, return false.
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
- DeferredDominance *DDT = nullptr);
+ DomTreeUpdater *DTU = nullptr);
/// Check for and eliminate duplicate PHI nodes in this block. This doesn't try
/// to be clever about PHI nodes which differ only in the order of the incoming
@@ -270,17 +278,17 @@ inline unsigned getKnownAlignment(Value *V, const DataLayout &DL,
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
StoreInst *SI, DIBuilder &Builder);
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
LoadInst *LI, DIBuilder &Builder);
/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
PHINode *LI, DIBuilder &Builder);
/// Lowers llvm.dbg.declare intrinsics into appropriate set of
@@ -294,13 +302,13 @@ void insertDebugValuesForPHIs(BasicBlock *BB,
/// Finds all intrinsics declaring local variables as living in the memory that
/// 'V' points to. This may include a mix of dbg.declare and
/// dbg.addr intrinsics.
-TinyPtrVector<DbgInfoIntrinsic *> FindDbgAddrUses(Value *V);
+TinyPtrVector<DbgVariableIntrinsic *> FindDbgAddrUses(Value *V);
/// Finds the llvm.dbg.value intrinsics describing a value.
void findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V);
/// Finds the debug info intrinsics describing a value.
-void findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgInsts, Value *V);
+void findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V);
/// Replaces llvm.dbg.declare instruction when the address it
/// describes is replaced with a new value. If Deref is true, an
@@ -359,7 +367,7 @@ unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);
/// instruction, making it and the rest of the code in the block dead.
unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap,
bool PreserveLCSSA = false,
- DeferredDominance *DDT = nullptr);
+ DomTreeUpdater *DTU = nullptr);
/// Convert the CallInst to InvokeInst with the specified unwind edge basic
/// block. This also splits the basic block where CI is located, because
@@ -374,24 +382,36 @@ BasicBlock *changeToInvokeAndSplitBasicBlock(CallInst *CI,
///
/// \param BB Block whose terminator will be replaced. Its terminator must
/// have an unwind successor.
-void removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT = nullptr);
+void removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
/// Remove all blocks that can not be reached from the function's entry.
///
/// Returns true if any basic block was removed.
bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr,
- DeferredDominance *DDT = nullptr);
+ DomTreeUpdater *DTU = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr);
-/// Combine the metadata of two instructions so that K can replace J
+/// Combine the metadata of two instructions so that K can replace J. Some
+/// metadata kinds can only be kept if K does not move, meaning it dominated
+/// J in the original IR.
///
/// Metadata not listed as known via KnownIDs is removed
-void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs);
+void combineMetadata(Instruction *K, const Instruction *J,
+ ArrayRef<unsigned> KnownIDs, bool DoesKMove);
/// Combine the metadata of two instructions so that K can replace J. This
-/// specifically handles the case of CSE-like transformations.
+/// specifically handles the case of CSE-like transformations. Some
+/// metadata can only be kept if K dominates J. For this to be correct,
+/// K cannot be hoisted.
///
/// Unknown metadata is removed.
-void combineMetadataForCSE(Instruction *K, const Instruction *J);
+void combineMetadataForCSE(Instruction *K, const Instruction *J,
+ bool DoesKMove);
+
+/// Patch the replacement so that it is not more restrictive than the value
+/// being replaced. It assumes that the replacement does not get moved from
+/// its original position.
+void patchReplacementInstruction(Instruction *I, Value *Repl);
// Replace each use of 'From' with 'To', if that use does not belong to basic
// block where 'From' is defined. Returns the number of replacements made.
@@ -429,6 +449,18 @@ void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
LoadInst &NewLI);
+/// Remove the debug intrinsic instructions for the given instruction.
+void dropDebugUsers(Instruction &I);
+
+/// Hoist all of the instructions in the \p IfBlock to the dominant block
+/// \p DomBlock, by moving its instructions to the insertion point \p InsertPt.
+///
+/// The moved instructions receive the insertion point debug location values
+/// (DILocations) and their debug intrinsic instructions (dbg.values) are
+/// removed.
+void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
+ BasicBlock *BB);
+
//===----------------------------------------------------------------------===//
// Intrinsic pattern matching
//
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
index 231e5bbb6dee..cd5bc4301018 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -20,6 +20,7 @@ class AssumptionCache;
class DominatorTree;
class Loop;
class LoopInfo;
+class MemorySSAUpdater;
class ScalarEvolution;
struct SimplifyQuery;
class TargetTransformInfo;
@@ -32,8 +33,8 @@ class TargetTransformInfo;
/// LoopRotation. If it is true, the profitability heuristic will be ignored.
bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE,
- const SimplifyQuery &SQ, bool RotationOnly,
- unsigned Threshold, bool IsUtilMode);
+ MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
+ bool RotationOnly, unsigned Threshold, bool IsUtilMode);
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index eb4c99102a63..8c2527b6ae68 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -23,6 +23,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
@@ -40,6 +41,7 @@ class BasicBlock;
class DataLayout;
class Loop;
class LoopInfo;
+class MemorySSAUpdater;
class OptimizationRemarkEmitter;
class PredicatedScalarEvolution;
class PredIteratorCache;
@@ -48,318 +50,6 @@ class SCEV;
class TargetLibraryInfo;
class TargetTransformInfo;
-
-/// The RecurrenceDescriptor is used to identify recurrences variables in a
-/// loop. Reduction is a special case of recurrence that has uses of the
-/// recurrence variable outside the loop. The method isReductionPHI identifies
-/// reductions that are basic recurrences.
-///
-/// Basic recurrences are defined as the summation, product, OR, AND, XOR, min,
-/// or max of a set of terms. For example: for(i=0; i<n; i++) { total +=
-/// array[i]; } is a summation of array elements. Basic recurrences are a
-/// special case of chains of recurrences (CR). See ScalarEvolution for CR
-/// references.
-
-/// This struct holds information about recurrence variables.
-class RecurrenceDescriptor {
-public:
- /// This enum represents the kinds of recurrences that we support.
- enum RecurrenceKind {
- RK_NoRecurrence, ///< Not a recurrence.
- RK_IntegerAdd, ///< Sum of integers.
- RK_IntegerMult, ///< Product of integers.
- RK_IntegerOr, ///< Bitwise or logical OR of numbers.
- RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
- RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
- RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
- RK_FloatAdd, ///< Sum of floats.
- RK_FloatMult, ///< Product of floats.
- RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
- };
-
- // This enum represents the kind of minmax recurrence.
- enum MinMaxRecurrenceKind {
- MRK_Invalid,
- MRK_UIntMin,
- MRK_UIntMax,
- MRK_SIntMin,
- MRK_SIntMax,
- MRK_FloatMin,
- MRK_FloatMax
- };
-
- RecurrenceDescriptor() = default;
-
- RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
- MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
- bool Signed, SmallPtrSetImpl<Instruction *> &CI)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
- UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
- CastInsts.insert(CI.begin(), CI.end());
- }
-
- /// This POD struct holds information about a potential recurrence operation.
- class InstDesc {
- public:
- InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
- : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
- UnsafeAlgebraInst(UAI) {}
-
- InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr)
- : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
- UnsafeAlgebraInst(UAI) {}
-
- bool isRecurrence() { return IsRecurrence; }
-
- bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
-
- Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
-
- MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
-
- Instruction *getPatternInst() { return PatternLastInst; }
-
- private:
- // Is this instruction a recurrence candidate.
- bool IsRecurrence;
- // The last instruction in a min/max pattern (select of the select(icmp())
- // pattern), or the current recurrence instruction otherwise.
- Instruction *PatternLastInst;
- // If this is a min/max pattern the comparison predicate.
- MinMaxRecurrenceKind MinMaxKind;
- // Recurrence has unsafe algebra.
- Instruction *UnsafeAlgebraInst;
- };
-
- /// Returns a struct describing if the instruction 'I' can be a recurrence
- /// variable of type 'Kind'. If the recurrence is a min/max pattern of
- /// select(icmp()) this function advances the instruction pointer 'I' from the
- /// compare instruction to the select instruction and stores this pointer in
- /// 'PatternLastInst' member of the returned struct.
- static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
- InstDesc &Prev, bool HasFunNoNaNAttr);
-
- /// Returns true if instruction I has multiple uses in Insts
- static bool hasMultipleUsesOf(Instruction *I,
- SmallPtrSetImpl<Instruction *> &Insts);
-
- /// Returns true if all uses of the instruction I is within the Set.
- static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
-
- /// Returns a struct describing if the instruction if the instruction is a
- /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
- /// or max(X, Y).
- static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev);
-
- /// Returns identity corresponding to the RecurrenceKind.
- static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp);
-
- /// Returns the opcode of binary operation corresponding to the
- /// RecurrenceKind.
- static unsigned getRecurrenceBinOp(RecurrenceKind Kind);
-
- /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
- static Value *createMinMaxOp(IRBuilder<> &Builder, MinMaxRecurrenceKind RK,
- Value *Left, Value *Right);
-
- /// Returns true if Phi is a reduction of type Kind and adds it to the
- /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
- /// non-null, the minimal bit width needed to compute the reduction will be
- /// computed.
- static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
- bool HasFunNoNaNAttr,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB = nullptr,
- AssumptionCache *AC = nullptr,
- DominatorTree *DT = nullptr);
-
- /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
- /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
- /// non-null, the minimal bit width needed to compute the reduction will be
- /// computed.
- static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB = nullptr,
- AssumptionCache *AC = nullptr,
- DominatorTree *DT = nullptr);
-
- /// Returns true if Phi is a first-order recurrence. A first-order recurrence
- /// is a non-reduction recurrence relation in which the value of the
- /// recurrence in the current loop iteration equals a value defined in the
- /// previous iteration. \p SinkAfter includes pairs of instructions where the
- /// first will be rescheduled to appear after the second if/when the loop is
- /// vectorized. It may be augmented with additional pairs if needed in order
- /// to handle Phi as a first-order recurrence.
- static bool
- isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- DenseMap<Instruction *, Instruction *> &SinkAfter,
- DominatorTree *DT);
-
- RecurrenceKind getRecurrenceKind() { return Kind; }
-
- MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
-
- TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
-
- Instruction *getLoopExitInstr() { return LoopExitInstr; }
-
- /// Returns true if the recurrence has unsafe algebra which requires a relaxed
- /// floating-point model.
- bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
-
- /// Returns first unsafe algebra instruction in the PHI node's use-chain.
- Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
-
- /// Returns true if the recurrence kind is an integer kind.
- static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
-
- /// Returns true if the recurrence kind is a floating point kind.
- static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
-
- /// Returns true if the recurrence kind is an arithmetic kind.
- static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
-
- /// Returns the type of the recurrence. This type can be narrower than the
- /// actual type of the Phi if the recurrence has been type-promoted.
- Type *getRecurrenceType() { return RecurrenceType; }
-
- /// Returns a reference to the instructions used for type-promoting the
- /// recurrence.
- SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
-
- /// Returns true if all source operands of the recurrence are SExtInsts.
- bool isSigned() { return IsSigned; }
-
-private:
- // The starting value of the recurrence.
- // It does not have to be zero!
- TrackingVH<Value> StartValue;
- // The instruction who's value is used outside the loop.
- Instruction *LoopExitInstr = nullptr;
- // The kind of the recurrence.
- RecurrenceKind Kind = RK_NoRecurrence;
- // If this a min/max recurrence the kind of recurrence.
- MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
- // First occurrence of unasfe algebra in the PHI's use-chain.
- Instruction *UnsafeAlgebraInst = nullptr;
- // The type of the recurrence.
- Type *RecurrenceType = nullptr;
- // True if all source operands of the recurrence are SExtInsts.
- bool IsSigned = false;
- // Instructions used for type-promoting the recurrence.
- SmallPtrSet<Instruction *, 8> CastInsts;
-};
-
-/// A struct for saving information about induction variables.
-class InductionDescriptor {
-public:
- /// This enum represents the kinds of inductions that we support.
- enum InductionKind {
- IK_NoInduction, ///< Not an induction variable.
- IK_IntInduction, ///< Integer induction variable. Step = C.
- IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
- IK_FpInduction ///< Floating point induction variable.
- };
-
-public:
- /// Default constructor - creates an invalid induction.
- InductionDescriptor() = default;
-
- /// Get the consecutive direction. Returns:
- /// 0 - unknown or non-consecutive.
- /// 1 - consecutive and increasing.
- /// -1 - consecutive and decreasing.
- int getConsecutiveDirection() const;
-
- /// Compute the transformed value of Index at offset StartValue using step
- /// StepValue.
- /// For integer induction, returns StartValue + Index * StepValue.
- /// For pointer induction, returns StartValue[Index * StepValue].
- /// FIXME: The newly created binary instructions should contain nsw/nuw
- /// flags, which can be found from the original scalar operations.
- Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
- const DataLayout& DL) const;
-
- Value *getStartValue() const { return StartValue; }
- InductionKind getKind() const { return IK; }
- const SCEV *getStep() const { return Step; }
- ConstantInt *getConstIntStepValue() const;
-
- /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
- /// induction, the induction descriptor \p D will contain the data describing
- /// this induction. If by some other means the caller has a better SCEV
- /// expression for \p Phi than the one returned by the ScalarEvolution
- /// analysis, it can be passed through \p Expr. If the def-use chain
- /// associated with the phi includes casts (that we know we can ignore
- /// under proper runtime checks), they are passed through \p CastsToIgnore.
- static bool
- isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
- InductionDescriptor &D, const SCEV *Expr = nullptr,
- SmallVectorImpl<Instruction *> *CastsToIgnore = nullptr);
-
- /// Returns true if \p Phi is a floating point induction in the loop \p L.
- /// If \p Phi is an induction, the induction descriptor \p D will contain
- /// the data describing this induction.
- static bool isFPInductionPHI(PHINode *Phi, const Loop* L,
- ScalarEvolution *SE, InductionDescriptor &D);
-
- /// Returns true if \p Phi is a loop \p L induction, in the context associated
- /// with the run-time predicate of PSE. If \p Assume is true, this can add
- /// further SCEV predicates to \p PSE in order to prove that \p Phi is an
- /// induction.
- /// If \p Phi is an induction, \p D will contain the data describing this
- /// induction.
- static bool isInductionPHI(PHINode *Phi, const Loop* L,
- PredicatedScalarEvolution &PSE,
- InductionDescriptor &D, bool Assume = false);
-
- /// Returns true if the induction type is FP and the binary operator does
- /// not have the "fast-math" property. Such operation requires a relaxed FP
- /// mode.
- bool hasUnsafeAlgebra() {
- return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast();
- }
-
- /// Returns induction operator that does not have "fast-math" property
- /// and requires FP unsafe mode.
- Instruction *getUnsafeAlgebraInst() {
- if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
- return nullptr;
- return InductionBinOp;
- }
-
- /// Returns binary opcode of the induction operator.
- Instruction::BinaryOps getInductionOpcode() const {
- return InductionBinOp ? InductionBinOp->getOpcode() :
- Instruction::BinaryOpsEnd;
- }
-
- /// Returns a reference to the type cast instructions in the induction
- /// update chain, that are redundant when guarded with a runtime
- /// SCEV overflow check.
- const SmallVectorImpl<Instruction *> &getCastInsts() const {
- return RedundantCasts;
- }
-
-private:
- /// Private constructor - used by \c isInductionPHI.
- InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
- BinaryOperator *InductionBinOp = nullptr,
- SmallVectorImpl<Instruction *> *Casts = nullptr);
-
- /// Start value.
- TrackingVH<Value> StartValue;
- /// Induction kind.
- InductionKind IK = IK_NoInduction;
- /// Step value.
- const SCEV *Step = nullptr;
- // Instruction that advances induction variable.
- BinaryOperator *InductionBinOp = nullptr;
- // Instructions used for type-casts of the induction variable,
- // that are redundant when guarded with a runtime SCEV overflow check.
- SmallVector<Instruction *, 2> RedundantCasts;
-};
-
BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA);
@@ -420,7 +110,7 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, TargetTransformInfo *, Loop *,
- AliasSetTracker *, LoopSafetyInfo *,
+ AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *ORE);
/// Walk the specified region of the CFG (defined by all blocks
@@ -433,7 +123,8 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
/// ORE. It returns changed status.
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, Loop *, AliasSetTracker *,
- LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);
+ MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ OptimizationRemarkEmitter *ORE);
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
@@ -462,7 +153,8 @@ bool promoteLoopAccessesToScalars(const SmallSetVector<Value *, 8> &,
SmallVectorImpl<Instruction *> &,
PredIteratorCache &, LoopInfo *,
DominatorTree *, const TargetLibraryInfo *,
- Loop *, AliasSetTracker *, LoopSafetyInfo *,
+ Loop *, AliasSetTracker *,
+ ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *);
/// Does a BFS from a given node to all of its children inside a given loop.
@@ -478,9 +170,80 @@ SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
/// operand or null otherwise. If the string metadata is not found return
/// Optional's not-a-value.
-Optional<const MDOperand *> findStringMetadataForLoop(Loop *TheLoop,
+Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
StringRef Name);
+/// Find named metadata for a loop with an integer value.
+llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
+
+/// Create a new loop identifier for a loop created from a loop transformation.
+///
+/// @param OrigLoopID The loop ID of the loop before the transformation.
+/// @param FollowupAttrs List of attribute names that contain attributes to be
+/// added to the new loop ID.
+/// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited
+/// from the original loop. The following values
+/// are considered:
+/// nullptr : Inherit all attributes from @p OrigLoopID.
+/// "" : Do not inherit any attribute from @p OrigLoopID; only use
+/// those specified by a followup attribute.
+/// "<prefix>": Inherit all attributes except those which start with
+/// <prefix>; commonly used to remove metadata for the
+/// applied transformation.
+/// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
+/// None.
+///
+/// @return The loop ID for the after-transformation loop. The following values
+/// can be returned:
+/// None : No followup attribute was found; it is up to the
+/// transformation to choose attributes that make sense.
+/// @p OrigLoopID: The original identifier can be reused.
+/// nullptr : The new loop has no attributes.
+/// MDNode* : A new unique loop identifier.
+Optional<MDNode *>
+makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
+ const char *InheritOptionsAttrsPrefix = "",
+ bool AlwaysNew = false);
+
+/// Look for the loop attribute that disables all transformation heuristic.
+bool hasDisableAllTransformsHint(const Loop *L);
+
+/// The mode sets how eager a transformation should be applied.
+enum TransformationMode {
+ /// The pass can use heuristics to determine whether a transformation should
+ /// be applied.
+ TM_Unspecified,
+
+ /// The transformation should be applied without considering a cost model.
+ TM_Enable,
+
+ /// The transformation should not be applied.
+ TM_Disable,
+
+ /// Force is a flag and should not be used alone.
+ TM_Force = 0x04,
+
+ /// The transformation was directed by the user, e.g. by a #pragma in
+ /// the source code. If the transformation could not be applied, a
+ /// warning should be emitted.
+ TM_ForcedByUser = TM_Enable | TM_Force,
+
+ /// The transformation must not be applied. For instance, `#pragma clang loop
+ /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
+ /// general loop metadata, it must not be dropped. Most passes should not
+ /// behave differently under TM_Disable and TM_SuppressedByUser.
+ TM_SuppressedByUser = TM_Disable | TM_Force
+};
+
+/// @{
+/// Get the mode for LLVM's supported loop transformations.
+TransformationMode hasUnrollTransformation(Loop *L);
+TransformationMode hasUnrollAndJamTransformation(Loop *L);
+TransformationMode hasVectorizeTransformation(Loop *L);
+TransformationMode hasDistributeTransformation(Loop *L);
+TransformationMode hasLICMVersioningTransformation(Loop *L);
+/// @}
+
/// Set input string into loop metadata by keeping other values intact.
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
unsigned V = 0);
@@ -490,6 +253,11 @@ void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
/// estimate can not be made.
Optional<unsigned> getLoopEstimatedTripCount(Loop *L);
+/// Check inner loop (L) backedge count is known to be invariant on all
+/// iterations of its outer loop. If the loop has no parent, this is trivially
+/// true.
+bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE);
+
/// Helper to consistently add the set of standard passes to a loop pass's \c
/// AnalysisUsage.
///
@@ -497,18 +265,25 @@ Optional<unsigned> getLoopEstimatedTripCount(Loop *L);
/// getAnalysisUsage.
void getLoopAnalysisUsage(AnalysisUsage &AU);
-/// Returns true if the hoister and sinker can handle this instruction.
-/// If SafetyInfo is null, we are checking for sinking instructions from
-/// preheader to loop body (no speculation).
-/// If SafetyInfo is not null, we are checking for hoisting/sinking
-/// instructions from loop body to preheader/exit. Check if the instruction
-/// can execute speculatively.
+/// Returns true if is legal to hoist or sink this instruction disregarding the
+/// possible introduction of faults. Reasoning about potential faulting
+/// instructions is the responsibility of the caller since it is challenging to
+/// do efficiently from within this routine.
+/// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the
+/// target executes at most once per execution of the loop body. This is used
+/// to assess the legality of duplicating atomic loads. Generally, this is
+/// true when moving out of loop and not true when moving into loops.
/// If \p ORE is set use it to emit optimization remarks.
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
- LoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
OptimizationRemarkEmitter *ORE = nullptr);
+/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
+Value *createMinMaxOp(IRBuilder<> &Builder,
+ RecurrenceDescriptor::MinMaxRecurrenceKind RK,
+ Value *Left, Value *Right);
+
/// Generates an ordered vector reduction using extracts to reduce the value.
Value *
getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op,
@@ -527,12 +302,12 @@ Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
/// additional information supplied in \p Flags.
/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.
-Value *
-createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
- unsigned Opcode, Value *Src,
- TargetTransformInfo::ReductionFlags Flags =
- TargetTransformInfo::ReductionFlags(),
- ArrayRef<Value *> RedOps = None);
+Value *createSimpleTargetReduction(IRBuilder<> &B,
+ const TargetTransformInfo *TTI,
+ unsigned Opcode, Value *Src,
+ TargetTransformInfo::ReductionFlags Flags =
+ TargetTransformInfo::ReductionFlags(),
+ ArrayRef<Value *> RedOps = None);
/// Create a generic target reduction using a recurrence descriptor \p Desc
/// The target is queried to determine if intrinsics or shuffle sequences are
@@ -548,6 +323,23 @@ Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
/// Flag set: NSW, NUW, exact, and all of fast-math.
void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
+/// Returns true if we can prove that \p S is defined and always negative in
+/// loop \p L.
+bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
+
+/// Returns true if we can prove that \p S is defined and always non-negative in
+/// loop \p L.
+bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE);
+
+/// Returns true if \p S is defined and never is equal to signed/unsigned max.
+bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool Signed);
+
+/// Returns true if \p S is defined and never is equal to signed/unsigned min.
+bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool Signed);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index 14615c25d093..fee492be2a90 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -58,6 +58,24 @@ std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
StringRef VersionCheckName = StringRef());
+/// Creates sanitizer constructor function lazily. If a constructor and init
+/// function already exist, this function returns it. Otherwise it calls \c
+/// createSanitizerCtorAndInitFunctions. The FunctionsCreatedCallback is invoked
+/// in that case, passing the new Ctor and Init function.
+///
+/// \return Returns pair of pointers to constructor, and init functions
+/// respectively.
+std::pair<Function *, Function *> getOrCreateSanitizerCtorAndInitFunctions(
+ Module &M, StringRef CtorName, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+ StringRef VersionCheckName = StringRef());
+
+// Creates and returns a sanitizer init function without argument if it doesn't
+// exist, and adds it to the global constructors list. Otherwise it returns the
+// existing function.
+Function *getOrCreateInitFunction(Module &M, StringRef Name);
+
/// Rename all the anon globals in the module using a hash computed from
/// the list of public globals in the module.
bool nameUnamedGlobals(Module &M);
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index b53eda7e5a42..2fc38089f3f1 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -60,6 +60,7 @@
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/OrderedInstructions.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -76,7 +77,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index d007f909c6a4..025bcd44e310 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -77,21 +77,34 @@ private:
OptimizationRemarkEmitter &ORE;
bool UnsafeFPShrink;
function_ref<void(Instruction *, Value *)> Replacer;
+ function_ref<void(Instruction *)> Eraser;
/// Internal wrapper for RAUW that is the default implementation.
///
/// Other users may provide an alternate function with this signature instead
/// of this one.
- static void replaceAllUsesWithDefault(Instruction *I, Value *With);
+ static void replaceAllUsesWithDefault(Instruction *I, Value *With) {
+ I->replaceAllUsesWith(With);
+ }
+
+ /// Internal wrapper for eraseFromParent that is the default implementation.
+ static void eraseFromParentDefault(Instruction *I) { I->eraseFromParent(); }
/// Replace an instruction's uses with a value using our replacer.
void replaceAllUsesWith(Instruction *I, Value *With);
+ /// Erase an instruction from its parent with our eraser.
+ void eraseFromParent(Instruction *I);
+
+ Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B);
+
public:
- LibCallSimplifier(const DataLayout &DL, const TargetLibraryInfo *TLI,
- OptimizationRemarkEmitter &ORE,
- function_ref<void(Instruction *, Value *)> Replacer =
- &replaceAllUsesWithDefault);
+ LibCallSimplifier(
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
+ OptimizationRemarkEmitter &ORE,
+ function_ref<void(Instruction *, Value *)> Replacer =
+ &replaceAllUsesWithDefault,
+ function_ref<void(Instruction *)> Eraser = &eraseFromParentDefault);
/// optimizeCall - Take the given call instruction and return a more
/// optimal value to replace the instruction with or 0 if a more
@@ -131,8 +144,8 @@ private:
// Math Library Optimizations
Value *optimizeCAbs(CallInst *CI, IRBuilder<> &B);
- Value *optimizeCos(CallInst *CI, IRBuilder<> &B);
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
+ Value *replacePowWithExp(CallInst *Pow, IRBuilder<> &B);
Value *replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B);
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B);
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index a6b84af068a5..70e936d75008 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -35,6 +35,15 @@ class ScalarEvolution;
using NewLoopsMap = SmallDenseMap<const Loop *, Loop *, 4>;
+/// @{
+/// Metadata attribute names
+const char *const LLVMLoopUnrollFollowupAll = "llvm.loop.unroll.followup_all";
+const char *const LLVMLoopUnrollFollowupUnrolled =
+ "llvm.loop.unroll.followup_unrolled";
+const char *const LLVMLoopUnrollFollowupRemainder =
+ "llvm.loop.unroll.followup_remainder";
+/// @}
+
const Loop* addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
BasicBlock *ClonedBB, LoopInfo *LI,
NewLoopsMap &NewLoops);
@@ -61,15 +70,16 @@ LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
unsigned PeelCount, bool UnrollRemainder,
LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
+ OptimizationRemarkEmitter *ORE, bool PreserveLCSSA,
+ Loop **RemainderLoop = nullptr);
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder, bool UnrollRemainder,
- LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC,
- bool PreserveLCSSA);
+ LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveLCSSA,
+ Loop **ResultLoop = nullptr);
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
@@ -84,7 +94,8 @@ LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
unsigned TripMultiple, bool UnrollRemainder,
LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE);
+ OptimizationRemarkEmitter *ORE,
+ Loop **EpilogueLoop = nullptr);
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
DependenceInfo &DI);
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize.h b/contrib/llvm/include/llvm/Transforms/Vectorize.h
index 950af7ffe05f..70f9a2e0741b 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize.h
@@ -110,8 +110,8 @@ struct VectorizeConfig {
//
// LoopVectorize - Create a loop vectorization pass.
//
-Pass *createLoopVectorizePass(bool NoUnrolling = false,
- bool AlwaysVectorize = true);
+Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced = false,
+ bool VectorizeOnlyWhenForced = false);
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/contrib/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
new file mode 100644
index 000000000000..6b37d7093c44
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
@@ -0,0 +1,27 @@
+//===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H
+#define LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class LoadStoreVectorizerPass : public PassInfoMixin<LoadStoreVectorizerPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// Create a legacy pass manager instance of the LoadStoreVectorizer pass
+Pass *createLoadStoreVectorizerPass();
+
+}
+
+#endif /* LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H */
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 224879cdba52..5c7bba048607 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -95,7 +95,7 @@ public:
FK_Enabled = 1, ///< Forcing enabled.
};
- LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
+ LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
OptimizationRemarkEmitter &ORE);
/// Mark the loop L as already vectorized by setting the width to 1.
@@ -105,7 +105,8 @@ public:
writeHintsToMetadata(Hints);
}
- bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const;
+ bool allowVectorization(Function *F, Loop *L,
+ bool VectorizeOnlyWhenForced) const;
/// Dumps all the hint information.
void emitRemarkWithHints() const;
@@ -113,7 +114,12 @@ public:
unsigned getWidth() const { return Width.Value; }
unsigned getInterleave() const { return Interleave.Value; }
unsigned getIsVectorized() const { return IsVectorized.Value; }
- enum ForceKind getForce() const { return (ForceKind)Force.Value; }
+ enum ForceKind getForce() const {
+ if ((ForceKind)Force.Value == FK_Undefined &&
+ hasDisableAllTransformsHint(TheLoop))
+ return FK_Disabled;
+ return (ForceKind)Force.Value;
+ }
/// If hints are provided that force vectorization, use the AlwaysPrint
/// pass name to force the frontend to print the diagnostic.
@@ -241,6 +247,10 @@ public:
/// If false, good old LV code.
bool canVectorize(bool UseVPlanNativePath);
+ /// Return true if we can vectorize this loop while folding its tail by
+ /// masking.
+ bool canFoldTailByMasking();
+
/// Returns the primary induction variable.
PHINode *getPrimaryInduction() { return PrimaryInduction; }
@@ -332,6 +342,11 @@ private:
/// If false, good old LV code.
bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
+ /// Set up outer loop inductions by checking Phis in outer loop header for
+ /// supported inductions (int inductions). Return false if any of these Phis
+ /// is not a supported induction or if we fail to find an induction.
+ bool setupOuterLoopInductions();
+
/// Return true if the pre-header, exiting and latch blocks of \p Lp
/// (non-recursive) are considered legal for vectorization.
/// Temporarily taking UseVPlanNativePath parameter. If true, take
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index d79d84691803..d9c4f7b023c1 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -78,12 +78,13 @@ class TargetTransformInfo;
/// The LoopVectorize Pass.
struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
- bool DisableUnrolling = false;
+ /// If false, consider all loops for interleaving.
+ /// If true, only loops that explicitly request interleaving are considered.
+ bool InterleaveOnlyWhenForced = false;
- /// If true, consider all loops for vectorization.
- /// If false, only loops that explicitly request vectorization are
- /// considered.
- bool AlwaysVectorize = true;
+ /// If false, consider all loops for vectorization.
+ /// If true, only loops that explicitly request vectorization are considered.
+ bool VectorizeOnlyWhenForced = false;
ScalarEvolution *SE;
LoopInfo *LI;
diff --git a/contrib/llvm/include/llvm/XRay/BlockIndexer.h b/contrib/llvm/include/llvm/XRay/BlockIndexer.h
new file mode 100644
index 000000000000..b42fa17f3fb7
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/BlockIndexer.h
@@ -0,0 +1,69 @@
+//===- BlockIndexer.h - FDR Block Indexing Visitor ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which generates a mapping between a
+// thread and a range of records representing a block.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_XRAY_BLOCKINDEXER_H_
+#define LLVM_LIB_XRAY_BLOCKINDEXER_H_
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/XRay/FDRRecords.h"
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+namespace xray {
+
+// The BlockIndexer will gather all related records associated with a
+// process+thread and group them by 'Block'.
+class BlockIndexer : public RecordVisitor {
+public:
+ struct Block {
+ uint64_t ProcessID;
+ int32_t ThreadID;
+ WallclockRecord *WallclockTime;
+ std::vector<Record *> Records;
+ };
+
+ // This maps the process + thread combination to a sequence of blocks.
+ using Index = DenseMap<std::pair<uint64_t, int32_t>, std::vector<Block>>;
+
+private:
+ Index &Indices;
+
+ Block CurrentBlock{0, 0, nullptr, {}};
+
+public:
+ explicit BlockIndexer(Index &I) : RecordVisitor(), Indices(I) {}
+
+ Error visit(BufferExtents &) override;
+ Error visit(WallclockRecord &) override;
+ Error visit(NewCPUIDRecord &) override;
+ Error visit(TSCWrapRecord &) override;
+ Error visit(CustomEventRecord &) override;
+ Error visit(CallArgRecord &) override;
+ Error visit(PIDRecord &) override;
+ Error visit(NewBufferRecord &) override;
+ Error visit(EndBufferRecord &) override;
+ Error visit(FunctionRecord &) override;
+ Error visit(CustomEventRecordV5 &) override;
+ Error visit(TypedEventRecord &) override;
+
+ /// The flush() function will clear out the current state of the visitor, to
+ /// allow for explicitly flushing a block's records to the currently
+ /// recognized thread and process combination.
+ Error flush();
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_LIB_XRAY_BLOCKINDEXER_H_
diff --git a/contrib/llvm/include/llvm/XRay/BlockPrinter.h b/contrib/llvm/include/llvm/XRay/BlockPrinter.h
new file mode 100644
index 000000000000..bfb21e239517
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/BlockPrinter.h
@@ -0,0 +1,62 @@
+//===- BlockPrinter.h - FDR Block Pretty Printer -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which formats a block of records for
+// easier human consumption.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_BLOCKPRINTER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_BLOCKPRINTER_H_
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/RecordPrinter.h"
+
+namespace llvm {
+namespace xray {
+
+class BlockPrinter : public RecordVisitor {
+ enum class State {
+ Start,
+ Preamble,
+ Metadata,
+ Function,
+ Arg,
+ CustomEvent,
+ End,
+ };
+
+ raw_ostream &OS;
+ RecordPrinter &RP;
+ State CurrentState = State::Start;
+
+public:
+ explicit BlockPrinter(raw_ostream &O, RecordPrinter &P)
+ : RecordVisitor(), OS(O), RP(P) {}
+
+ Error visit(BufferExtents &) override;
+ Error visit(WallclockRecord &) override;
+ Error visit(NewCPUIDRecord &) override;
+ Error visit(TSCWrapRecord &) override;
+ Error visit(CustomEventRecord &) override;
+ Error visit(CallArgRecord &) override;
+ Error visit(PIDRecord &) override;
+ Error visit(NewBufferRecord &) override;
+ Error visit(EndBufferRecord &) override;
+ Error visit(FunctionRecord &) override;
+ Error visit(CustomEventRecordV5 &) override;
+ Error visit(TypedEventRecord &) override;
+
+ void reset() { CurrentState = State::Start; }
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_BLOCKPRINTER_H_
diff --git a/contrib/llvm/include/llvm/XRay/BlockVerifier.h b/contrib/llvm/include/llvm/XRay/BlockVerifier.h
new file mode 100644
index 000000000000..46371c13891a
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/BlockVerifier.h
@@ -0,0 +1,72 @@
+//===- BlockVerifier.h - FDR Block Verifier -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which verifies a sequence of records
+// associated with a block, following the FDR mode log format's specifications.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_BLOCKVERIFIER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_BLOCKVERIFIER_H_
+
+#include "llvm/XRay/FDRRecords.h"
+#include <array>
+#include <bitset>
+
+namespace llvm {
+namespace xray {
+
+class BlockVerifier : public RecordVisitor {
+public:
+ // We force State elements to be size_t, to be used as indices for containers.
+ enum class State : std::size_t {
+ Unknown,
+ BufferExtents,
+ NewBuffer,
+ WallClockTime,
+ PIDEntry,
+ NewCPUId,
+ TSCWrap,
+ CustomEvent,
+ TypedEvent,
+ Function,
+ CallArg,
+ EndOfBuffer,
+ StateMax,
+ };
+
+private:
+ // We keep track of the current record seen by the verifier.
+ State CurrentRecord = State::Unknown;
+
+ // Transitions the current record to the new record, records an error on
+ // invalid transitions.
+ Error transition(State To);
+
+public:
+ Error visit(BufferExtents &) override;
+ Error visit(WallclockRecord &) override;
+ Error visit(NewCPUIDRecord &) override;
+ Error visit(TSCWrapRecord &) override;
+ Error visit(CustomEventRecord &) override;
+ Error visit(CallArgRecord &) override;
+ Error visit(PIDRecord &) override;
+ Error visit(NewBufferRecord &) override;
+ Error visit(EndBufferRecord &) override;
+ Error visit(FunctionRecord &) override;
+ Error visit(CustomEventRecordV5 &) override;
+ Error visit(TypedEventRecord &) override;
+
+ Error verify();
+ void reset();
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_BLOCKVERIFIER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRLogBuilder.h b/contrib/llvm/include/llvm/XRay/FDRLogBuilder.h
new file mode 100644
index 000000000000..b5e9ed5c406b
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRLogBuilder.h
@@ -0,0 +1,41 @@
+//===- FDRLogBuilder.h - XRay FDR Log Building Utility --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRLOGBUILDER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_FDRLOGBUILDER_H_
+
+#include "llvm/XRay/FDRRecords.h"
+
+namespace llvm {
+namespace xray {
+
+/// The LogBuilder class allows for creating ad-hoc collections of records
+/// through the `add<...>(...)` function. An example use of this API is in
+/// crafting arbitrary sequences of records:
+///
+/// auto Records = LogBuilder()
+/// .add<BufferExtents>(256)
+/// .add<NewBufferRecord>(1)
+/// .consume();
+///
+class LogBuilder {
+ std::vector<std::unique_ptr<Record>> Records;
+
+public:
+ template <class R, class... T> LogBuilder &add(T &&... A) {
+ Records.emplace_back(new R(std::forward<T>(A)...));
+ return *this;
+ }
+
+ std::vector<std::unique_ptr<Record>> consume() { return std::move(Records); }
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_FDRLOGBUILDER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRRecordConsumer.h b/contrib/llvm/include/llvm/XRay/FDRRecordConsumer.h
new file mode 100644
index 000000000000..e856e1540558
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRRecordConsumer.h
@@ -0,0 +1,55 @@
+//===- FDRRecordConsumer.h - XRay Flight Data Recorder Mode Records -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRRECORDCONSUMER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_FDRRECORDCONSUMER_H_
+
+#include "llvm/Support/Error.h"
+#include "llvm/XRay/FDRRecords.h"
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace xray {
+
+class RecordConsumer {
+public:
+ virtual Error consume(std::unique_ptr<Record> R) = 0;
+ virtual ~RecordConsumer() = default;
+};
+
+// This consumer will collect all the records into a vector of records, in
+// arrival order.
+class LogBuilderConsumer : public RecordConsumer {
+ std::vector<std::unique_ptr<Record>> &Records;
+
+public:
+ explicit LogBuilderConsumer(std::vector<std::unique_ptr<Record>> &R)
+ : RecordConsumer(), Records(R) {}
+
+ Error consume(std::unique_ptr<Record> R) override;
+};
+
+// A PipelineConsumer applies a set of visitors to every consumed Record, in the
+// order by which the visitors are added to the pipeline in the order of
+// appearance.
+class PipelineConsumer : public RecordConsumer {
+ std::vector<RecordVisitor *> Visitors;
+
+public:
+ PipelineConsumer(std::initializer_list<RecordVisitor *> V)
+ : RecordConsumer(), Visitors(V) {}
+
+ Error consume(std::unique_ptr<Record> R) override;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_FDRRECORDCONSUMER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRRecordProducer.h b/contrib/llvm/include/llvm/XRay/FDRRecordProducer.h
new file mode 100644
index 000000000000..efdba2a67b7b
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRRecordProducer.h
@@ -0,0 +1,51 @@
+//===- FDRRecordProducer.h - XRay FDR Mode Record Producer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRRECORDPRODUCER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_FDRRECORDPRODUCER_H_
+
+#include "llvm/Support/Error.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/XRayRecord.h"
+#include <memory>
+
+namespace llvm {
+namespace xray {
+
+class RecordProducer {
+public:
+ /// All producer implementations must yield either an Error or a non-nullptr
+ /// unique_ptr<Record>.
+ virtual Expected<std::unique_ptr<Record>> produce() = 0;
+ virtual ~RecordProducer() = default;
+};
+
+class FileBasedRecordProducer : public RecordProducer {
+ const XRayFileHeader &Header;
+ DataExtractor &E;
+ uint32_t &OffsetPtr;
+ uint32_t CurrentBufferBytes = 0;
+
+ // Helper function which gets the next record by speculatively reading through
+ // the log, finding a buffer extents record.
+ Expected<std::unique_ptr<Record>> findNextBufferExtent();
+
+public:
+ FileBasedRecordProducer(const XRayFileHeader &FH, DataExtractor &DE,
+ uint32_t &OP)
+ : Header(FH), E(DE), OffsetPtr(OP) {}
+
+ /// This producer encapsulates the logic for loading a File-backed
+ /// RecordProducer hidden behind a DataExtractor.
+ Expected<std::unique_ptr<Record>> produce() override;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_FDRRECORDPRODUCER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRRecords.h b/contrib/llvm/include/llvm/XRay/FDRRecords.h
new file mode 100644
index 000000000000..8a84f4d0c1fb
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRRecords.h
@@ -0,0 +1,450 @@
+//===- FDRRecords.h - XRay Flight Data Recorder Mode Records --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define types and operations on these types that represent the different kinds
+// of records we encounter in XRay flight data recorder mode traces.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_XRAY_FDRRECORDS_H_
+#define LLVM_LIB_XRAY_FDRRECORDS_H_
+
+#include <cstdint>
+#include <string>
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/XRay/XRayRecord.h"
+
+namespace llvm {
+namespace xray {
+
+class RecordVisitor;
+class RecordInitializer;
+
+class Record {
+public:
+ enum class RecordKind {
+ RK_Metadata,
+ RK_Metadata_BufferExtents,
+ RK_Metadata_WallClockTime,
+ RK_Metadata_NewCPUId,
+ RK_Metadata_TSCWrap,
+ RK_Metadata_CustomEvent,
+ RK_Metadata_CustomEventV5,
+ RK_Metadata_CallArg,
+ RK_Metadata_PIDEntry,
+ RK_Metadata_NewBuffer,
+ RK_Metadata_EndOfBuffer,
+ RK_Metadata_TypedEvent,
+ RK_Metadata_LastMetadata,
+ RK_Function,
+ };
+
+ static StringRef kindToString(RecordKind K);
+
+private:
+ const RecordKind T;
+
+public:
+ Record(const Record &) = delete;
+ Record(Record &&) = delete;
+ Record &operator=(const Record &) = delete;
+ Record &operator=(Record &&) = delete;
+ explicit Record(RecordKind T) : T(T) {}
+
+ RecordKind getRecordType() const { return T; }
+
+ // Each Record should be able to apply an abstract visitor, and choose the
+ // appropriate function in the visitor to invoke, given its own type.
+ virtual Error apply(RecordVisitor &V) = 0;
+
+ virtual ~Record() = default;
+};
+
+class MetadataRecord : public Record {
+public:
+ enum class MetadataType : unsigned {
+ Unknown,
+ BufferExtents,
+ WallClockTime,
+ NewCPUId,
+ TSCWrap,
+ CustomEvent,
+ CallArg,
+ PIDEntry,
+ NewBuffer,
+ EndOfBuffer,
+ TypedEvent,
+ };
+
+protected:
+ static constexpr int kMetadataBodySize = 15;
+ friend class RecordInitializer;
+
+private:
+ const MetadataType MT;
+
+public:
+ explicit MetadataRecord(RecordKind T, MetadataType M) : Record(T), MT(M) {}
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() >= RecordKind::RK_Metadata &&
+ R->getRecordType() <= RecordKind::RK_Metadata_LastMetadata;
+ }
+
+ MetadataType metadataType() const { return MT; }
+
+ virtual ~MetadataRecord() = default;
+};
+
+// What follows are specific Metadata record types which encapsulate the
+// information associated with specific metadata record types in an FDR mode
+// log.
+class BufferExtents : public MetadataRecord {
+ uint64_t Size = 0;
+ friend class RecordInitializer;
+
+public:
+ BufferExtents()
+ : MetadataRecord(RecordKind::RK_Metadata_BufferExtents,
+ MetadataType::BufferExtents) {}
+
+ explicit BufferExtents(uint64_t S)
+ : MetadataRecord(RecordKind::RK_Metadata_BufferExtents,
+ MetadataType::BufferExtents),
+ Size(S) {}
+
+ uint64_t size() const { return Size; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_BufferExtents;
+ }
+};
+
+class WallclockRecord : public MetadataRecord {
+ uint64_t Seconds = 0;
+ uint32_t Nanos = 0;
+ friend class RecordInitializer;
+
+public:
+ WallclockRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_WallClockTime,
+ MetadataType::WallClockTime) {}
+
+ explicit WallclockRecord(uint64_t S, uint32_t N)
+ : MetadataRecord(RecordKind::RK_Metadata_WallClockTime,
+ MetadataType::WallClockTime),
+ Seconds(S), Nanos(N) {}
+
+ uint64_t seconds() const { return Seconds; }
+ uint32_t nanos() const { return Nanos; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_WallClockTime;
+ }
+};
+
+class NewCPUIDRecord : public MetadataRecord {
+ uint16_t CPUId = 0;
+ uint64_t TSC = 0;
+ friend class RecordInitializer;
+
+public:
+ NewCPUIDRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_NewCPUId,
+ MetadataType::NewCPUId) {}
+
+ NewCPUIDRecord(uint16_t C, uint64_t T)
+ : MetadataRecord(RecordKind::RK_Metadata_NewCPUId,
+ MetadataType::NewCPUId),
+ CPUId(C), TSC(T) {}
+
+ uint16_t cpuid() const { return CPUId; }
+
+ uint64_t tsc() const { return TSC; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_NewCPUId;
+ }
+};
+
+class TSCWrapRecord : public MetadataRecord {
+ uint64_t BaseTSC = 0;
+ friend class RecordInitializer;
+
+public:
+ TSCWrapRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_TSCWrap, MetadataType::TSCWrap) {
+ }
+
+ explicit TSCWrapRecord(uint64_t B)
+ : MetadataRecord(RecordKind::RK_Metadata_TSCWrap, MetadataType::TSCWrap),
+ BaseTSC(B) {}
+
+ uint64_t tsc() const { return BaseTSC; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_TSCWrap;
+ }
+};
+
+class CustomEventRecord : public MetadataRecord {
+ int32_t Size = 0;
+ uint64_t TSC = 0;
+ uint16_t CPU = 0;
+ std::string Data{};
+ friend class RecordInitializer;
+
+public:
+ CustomEventRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_CustomEvent,
+ MetadataType::CustomEvent) {}
+
+ explicit CustomEventRecord(uint64_t S, uint64_t T, uint16_t C, std::string D)
+ : MetadataRecord(RecordKind::RK_Metadata_CustomEvent,
+ MetadataType::CustomEvent),
+ Size(S), TSC(T), CPU(C), Data(std::move(D)) {}
+
+ int32_t size() const { return Size; }
+ uint64_t tsc() const { return TSC; }
+ uint16_t cpu() const { return CPU; }
+ StringRef data() const { return Data; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_CustomEvent;
+ }
+};
+
+class CustomEventRecordV5 : public MetadataRecord {
+ int32_t Size = 0;
+ int32_t Delta = 0;
+ std::string Data{};
+ friend class RecordInitializer;
+
+public:
+ CustomEventRecordV5()
+ : MetadataRecord(RecordKind::RK_Metadata_CustomEventV5,
+ MetadataType::CustomEvent) {}
+
+ explicit CustomEventRecordV5(int32_t S, int32_t D, std::string P)
+ : MetadataRecord(RecordKind::RK_Metadata_CustomEventV5,
+ MetadataType::CustomEvent),
+ Size(S), Delta(D), Data(std::move(P)) {}
+
+ int32_t size() const { return Size; }
+ int32_t delta() const { return Delta; }
+ StringRef data() const { return Data; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_CustomEventV5;
+ }
+};
+
+class TypedEventRecord : public MetadataRecord {
+ int32_t Size = 0;
+ int32_t Delta = 0;
+ uint16_t EventType = 0;
+ std::string Data{};
+ friend class RecordInitializer;
+
+public:
+ TypedEventRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_TypedEvent,
+ MetadataType::TypedEvent) {}
+
+ explicit TypedEventRecord(int32_t S, int32_t D, uint16_t E, std::string P)
+ : MetadataRecord(RecordKind::RK_Metadata_TypedEvent,
+ MetadataType::TypedEvent),
+ Size(S), Delta(D), Data(std::move(P)) {}
+
+ int32_t size() const { return Size; }
+ int32_t delta() const { return Delta; }
+ uint16_t eventType() const { return EventType; }
+ StringRef data() const { return Data; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_TypedEvent;
+ }
+};
+
+class CallArgRecord : public MetadataRecord {
+ uint64_t Arg;
+ friend class RecordInitializer;
+
+public:
+ CallArgRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_CallArg, MetadataType::CallArg) {
+ }
+
+ explicit CallArgRecord(uint64_t A)
+ : MetadataRecord(RecordKind::RK_Metadata_CallArg, MetadataType::CallArg),
+ Arg(A) {}
+
+ uint64_t arg() const { return Arg; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_CallArg;
+ }
+};
+
+class PIDRecord : public MetadataRecord {
+ int32_t PID = 0;
+ friend class RecordInitializer;
+
+public:
+ PIDRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_PIDEntry,
+ MetadataType::PIDEntry) {}
+
+ explicit PIDRecord(int32_t P)
+ : MetadataRecord(RecordKind::RK_Metadata_PIDEntry,
+ MetadataType::PIDEntry),
+ PID(P) {}
+
+ int32_t pid() const { return PID; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_PIDEntry;
+ }
+};
+
+class NewBufferRecord : public MetadataRecord {
+ int32_t TID = 0;
+ friend class RecordInitializer;
+
+public:
+ NewBufferRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_NewBuffer,
+ MetadataType::NewBuffer) {}
+
+ explicit NewBufferRecord(int32_t T)
+ : MetadataRecord(RecordKind::RK_Metadata_NewBuffer,
+ MetadataType::NewBuffer),
+ TID(T) {}
+
+ int32_t tid() const { return TID; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_NewBuffer;
+ }
+};
+
+class EndBufferRecord : public MetadataRecord {
+public:
+ EndBufferRecord()
+ : MetadataRecord(RecordKind::RK_Metadata_EndOfBuffer,
+ MetadataType::EndOfBuffer) {}
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Metadata_EndOfBuffer;
+ }
+};
+
+class FunctionRecord : public Record {
+ RecordTypes Kind;
+ int32_t FuncId;
+ uint32_t Delta;
+ friend class RecordInitializer;
+
+ static constexpr unsigned kFunctionRecordSize = 8;
+
+public:
+ FunctionRecord() : Record(RecordKind::RK_Function) {}
+
+ explicit FunctionRecord(RecordTypes K, int32_t F, uint32_t D)
+ : Record(RecordKind::RK_Function), Kind(K), FuncId(F), Delta(D) {}
+
+ // A function record is a concrete record type which has a number of common
+ // properties.
+ RecordTypes recordType() const { return Kind; }
+ int32_t functionId() const { return FuncId; }
+ uint32_t delta() const { return Delta; }
+
+ Error apply(RecordVisitor &V) override;
+
+ static bool classof(const Record *R) {
+ return R->getRecordType() == RecordKind::RK_Function;
+ }
+};
+
+class RecordVisitor {
+public:
+ virtual ~RecordVisitor() = default;
+
+ // Support all specific kinds of records:
+ virtual Error visit(BufferExtents &) = 0;
+ virtual Error visit(WallclockRecord &) = 0;
+ virtual Error visit(NewCPUIDRecord &) = 0;
+ virtual Error visit(TSCWrapRecord &) = 0;
+ virtual Error visit(CustomEventRecord &) = 0;
+ virtual Error visit(CallArgRecord &) = 0;
+ virtual Error visit(PIDRecord &) = 0;
+ virtual Error visit(NewBufferRecord &) = 0;
+ virtual Error visit(EndBufferRecord &) = 0;
+ virtual Error visit(FunctionRecord &) = 0;
+ virtual Error visit(CustomEventRecordV5 &) = 0;
+ virtual Error visit(TypedEventRecord &) = 0;
+};
+
+class RecordInitializer : public RecordVisitor {
+ DataExtractor &E;
+ uint32_t &OffsetPtr;
+ uint16_t Version;
+
+public:
+ static constexpr uint16_t DefaultVersion = 5u;
+
+ explicit RecordInitializer(DataExtractor &DE, uint32_t &OP, uint16_t V)
+ : RecordVisitor(), E(DE), OffsetPtr(OP), Version(V) {}
+
+ explicit RecordInitializer(DataExtractor &DE, uint32_t &OP)
+ : RecordInitializer(DE, OP, DefaultVersion) {}
+
+ Error visit(BufferExtents &) override;
+ Error visit(WallclockRecord &) override;
+ Error visit(NewCPUIDRecord &) override;
+ Error visit(TSCWrapRecord &) override;
+ Error visit(CustomEventRecord &) override;
+ Error visit(CallArgRecord &) override;
+ Error visit(PIDRecord &) override;
+ Error visit(NewBufferRecord &) override;
+ Error visit(EndBufferRecord &) override;
+ Error visit(FunctionRecord &) override;
+ Error visit(CustomEventRecordV5 &) override;
+ Error visit(TypedEventRecord &) override;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_LIB_XRAY_FDRRECORDS_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRTraceExpander.h b/contrib/llvm/include/llvm/XRay/FDRTraceExpander.h
new file mode 100644
index 000000000000..02a21bed5ce9
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRTraceExpander.h
@@ -0,0 +1,63 @@
+//===- FDRTraceExpander.h - XRay FDR Mode Log Expander --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// We define an FDR record visitor which can re-constitute XRayRecord instances
+// from a sequence of FDR mode records in arrival order into a collection.
+//
+//===----------------------------------------------------------------------===//
+#ifndef INCLUDE_LLVM_XRAY_FDRTRACEEXPANDER_H_
+#define INCLUDE_LLVM_XRAY_FDRTRACEEXPANDER_H_
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/XRayRecord.h"
+
+namespace llvm {
+namespace xray {
+
+class TraceExpander : public RecordVisitor {
+ // Type-erased callback for handling individual XRayRecord instances.
+ function_ref<void(const XRayRecord &)> C;
+ int32_t PID = 0;
+ int32_t TID = 0;
+ uint64_t BaseTSC = 0;
+ XRayRecord CurrentRecord{0, 0, RecordTypes::ENTER, 0, 0, 0, 0, {}, {}};
+ uint16_t CPUId = 0;
+ uint16_t LogVersion = 0;
+ bool BuildingRecord = false;
+ bool IgnoringRecords = false;
+
+ void resetCurrentRecord();
+
+public:
+ explicit TraceExpander(function_ref<void(const XRayRecord &)> F, uint16_t L)
+ : RecordVisitor(), C(std::move(F)), LogVersion(L) {}
+
+ Error visit(BufferExtents &) override;
+ Error visit(WallclockRecord &) override;
+ Error visit(NewCPUIDRecord &) override;
+ Error visit(TSCWrapRecord &) override;
+ Error visit(CustomEventRecord &) override;
+ Error visit(CallArgRecord &) override;
+ Error visit(PIDRecord &) override;
+ Error visit(NewBufferRecord &) override;
+ Error visit(EndBufferRecord &) override;
+ Error visit(FunctionRecord &) override;
+ Error visit(CustomEventRecordV5 &) override;
+ Error visit(TypedEventRecord &) override;
+
+ // Must be called after all the records have been processed, to handle the
+ // most recent record generated.
+ Error flush();
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // INCLUDE_LLVM_XRAY_FDRTRACEEXPANDER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRTraceWriter.h b/contrib/llvm/include/llvm/XRay/FDRTraceWriter.h
new file mode 100644
index 000000000000..7b3b5fa25eff
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRTraceWriter.h
@@ -0,0 +1,56 @@
+//===- FDRTraceWriter.h - XRay FDR Trace Writer -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Test a utility that can write out XRay FDR Mode formatted trace files.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRTRACEWRITER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_FDRTRACEWRITER_H_
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/XRayRecord.h"
+
+namespace llvm {
+namespace xray {
+
+/// The FDRTraceWriter allows us to hand-craft an XRay Flight Data Recorder
+/// (FDR) mode log file. This is used primarily for testing, generating
+/// sequences of FDR records that can be read/processed. It can also be used to
+/// generate various kinds of execution traces without using the XRay runtime.
+/// Note that this writer does not do any validation, but uses the types of
+/// records defined in the FDRRecords.h file.
+class FDRTraceWriter : public RecordVisitor {
+public:
+ // Construct an FDRTraceWriter associated with an output stream.
+ explicit FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H);
+ ~FDRTraceWriter();
+
+ Error visit(BufferExtents &) override;
+ Error visit(WallclockRecord &) override;
+ Error visit(NewCPUIDRecord &) override;
+ Error visit(TSCWrapRecord &) override;
+ Error visit(CustomEventRecord &) override;
+ Error visit(CallArgRecord &) override;
+ Error visit(PIDRecord &) override;
+ Error visit(NewBufferRecord &) override;
+ Error visit(EndBufferRecord &) override;
+ Error visit(FunctionRecord &) override;
+ Error visit(CustomEventRecordV5 &) override;
+ Error visit(TypedEventRecord &) override;
+
+private:
+ support::endian::Writer OS;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_FDRTRACEWRITER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FileHeaderReader.h b/contrib/llvm/include/llvm/XRay/FileHeaderReader.h
new file mode 100644
index 000000000000..3b8809bdbb34
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FileHeaderReader.h
@@ -0,0 +1,33 @@
+//===- FileHeaderReader.h - XRay Trace File Header Reading Function -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares functions that can load an XRay log header from various
+// sources.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_XRAY_FILEHEADERREADER_H_
+#define LLVM_LIB_XRAY_FILEHEADERREADER_H_
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/XRay/XRayRecord.h"
+#include <cstdint>
+
+namespace llvm {
+namespace xray {
+
+/// Convenience function for loading the file header given a data extractor at a
+/// specified offset.
+Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
+ uint32_t &OffsetPtr);
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_LIB_XRAY_FILEHEADERREADER_H_
diff --git a/contrib/llvm/include/llvm/XRay/Profile.h b/contrib/llvm/include/llvm/XRay/Profile.h
new file mode 100644
index 000000000000..9365630358e8
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/Profile.h
@@ -0,0 +1,150 @@
+//===- Profile.h - XRay Profile Abstraction -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the XRay Profile class representing the latency profile generated by
+// XRay's profiling mode.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_XRAY_PROFILE_H
+#define LLVM_XRAY_PROFILE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+namespace xray {
+
+class Profile;
+
+// We forward declare the Trace type for turning a Trace into a Profile.
+class Trace;
+
+/// This function will attempt to load an XRay Profiling Mode profile from the
+/// provided |Filename|.
+///
+/// For any errors encountered in the loading of the profile data from
+/// |Filename|, this function will return an Error condition appropriately.
+Expected<Profile> loadProfile(StringRef Filename);
+
+/// This algorithm will merge two Profile instances into a single Profile
+/// instance, aggregating blocks by Thread ID.
+Profile mergeProfilesByThread(const Profile &L, const Profile &R);
+
+/// This algorithm will merge two Profile instances into a single Profile
+/// instance, aggregating blocks by function call stack.
+Profile mergeProfilesByStack(const Profile &L, const Profile &R);
+
+/// This function takes a Trace and creates a Profile instance from it.
+Expected<Profile> profileFromTrace(const Trace &T);
+
+/// Profile instances are thread-compatible.
+class Profile {
+public:
+ using ThreadID = uint64_t;
+ using PathID = unsigned;
+ using FuncID = int32_t;
+
+ struct Data {
+ uint64_t CallCount;
+ uint64_t CumulativeLocalTime;
+ };
+
+ struct Block {
+ ThreadID Thread;
+ std::vector<std::pair<PathID, Data>> PathData;
+ };
+
+ /// Provides a sequence of function IDs from a previously interned PathID.
+ ///
+ /// Returns an error if |P| had not been interned before into the Profile.
+ ///
+ Expected<std::vector<FuncID>> expandPath(PathID P) const;
+
+ /// The stack represented in |P| must be in stack order (leaf to root). This
+ /// will always return the same PathID for |P| that has the same sequence.
+ PathID internPath(ArrayRef<FuncID> P);
+
+ /// Appends a fully-formed Block instance into the Profile.
+ ///
+ /// Returns an error condition in the following cases:
+ ///
+ /// - The PathData component of the Block is empty
+ ///
+ Error addBlock(Block &&B);
+
+ Profile() = default;
+ ~Profile() = default;
+
+ Profile(Profile &&O) noexcept
+ : Blocks(std::move(O.Blocks)), NodeStorage(std::move(O.NodeStorage)),
+ Roots(std::move(O.Roots)), PathIDMap(std::move(O.PathIDMap)),
+ NextID(O.NextID) {}
+
+ Profile &operator=(Profile &&O) noexcept {
+ Blocks = std::move(O.Blocks);
+ NodeStorage = std::move(O.NodeStorage);
+ Roots = std::move(O.Roots);
+ PathIDMap = std::move(O.PathIDMap);
+ NextID = O.NextID;
+ return *this;
+ }
+
+ Profile(const Profile &);
+ Profile &operator=(const Profile &);
+
+ friend void swap(Profile &L, Profile &R) {
+ using std::swap;
+ swap(L.Blocks, R.Blocks);
+ swap(L.NodeStorage, R.NodeStorage);
+ swap(L.Roots, R.Roots);
+ swap(L.PathIDMap, R.PathIDMap);
+ swap(L.NextID, R.NextID);
+ }
+
+private:
+ using BlockList = std::list<Block>;
+
+ struct TrieNode {
+ FuncID Func = 0;
+ std::vector<TrieNode *> Callees{};
+ TrieNode *Caller = nullptr;
+ PathID ID = 0;
+ };
+
+ // List of blocks associated with a Profile.
+ BlockList Blocks;
+
+ // List of TrieNode elements we've seen.
+ std::list<TrieNode> NodeStorage;
+
+ // List of call stack roots.
+ SmallVector<TrieNode *, 4> Roots;
+
+ // Reverse mapping between a PathID to a TrieNode*.
+ DenseMap<PathID, TrieNode *> PathIDMap;
+
+ // Used to identify paths.
+ PathID NextID = 1;
+
+public:
+ using const_iterator = BlockList::const_iterator;
+ const_iterator begin() const { return Blocks.begin(); }
+ const_iterator end() const { return Blocks.end(); }
+ bool empty() const { return Blocks.empty(); }
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/XRay/RecordPrinter.h b/contrib/llvm/include/llvm/XRay/RecordPrinter.h
new file mode 100644
index 000000000000..649c64ab6f5c
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/RecordPrinter.h
@@ -0,0 +1,50 @@
+//===- RecordPrinter.h - FDR Record Printer -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which prints an individual record's
+// data in an adhoc format, suitable for human inspection.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_RECORDPRINTER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_RECORDPRINTER_H_
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/XRay/FDRRecords.h"
+
+namespace llvm {
+namespace xray {
+
+class RecordPrinter : public RecordVisitor {
+ raw_ostream &OS;
+ std::string Delim;
+
+public:
+ explicit RecordPrinter(raw_ostream &O, std::string D)
+ : RecordVisitor(), OS(O), Delim(std::move(D)) {}
+
+ explicit RecordPrinter(raw_ostream &O) : RecordPrinter(O, ""){};
+
+ Error visit(BufferExtents &) override;
+ Error visit(WallclockRecord &) override;
+ Error visit(NewCPUIDRecord &) override;
+ Error visit(TSCWrapRecord &) override;
+ Error visit(CustomEventRecord &) override;
+ Error visit(CallArgRecord &) override;
+ Error visit(PIDRecord &) override;
+ Error visit(NewBufferRecord &) override;
+ Error visit(EndBufferRecord &) override;
+ Error visit(FunctionRecord &) override;
+ Error visit(CustomEventRecordV5 &) override;
+ Error visit(TypedEventRecord &) override;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_RECORDPRINTER_H
diff --git a/contrib/llvm/include/llvm/XRay/Trace.h b/contrib/llvm/include/llvm/XRay/Trace.h
index 6b033d686b06..924addd1560d 100644
--- a/contrib/llvm/include/llvm/XRay/Trace.h
+++ b/contrib/llvm/include/llvm/XRay/Trace.h
@@ -17,8 +17,8 @@
#include <vector>
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/FileSystem.h"
#include "llvm/XRay/XRayRecord.h"
namespace llvm {
@@ -46,25 +46,35 @@ namespace xray {
///
class Trace {
XRayFileHeader FileHeader;
- std::vector<XRayRecord> Records;
+ using RecordVector = std::vector<XRayRecord>;
+ RecordVector Records;
typedef std::vector<XRayRecord>::const_iterator citerator;
- friend Expected<Trace> loadTraceFile(StringRef, bool);
+ friend Expected<Trace> loadTrace(const DataExtractor &, bool);
public:
+ using size_type = RecordVector::size_type;
+ using value_type = RecordVector::value_type;
+ using const_iterator = RecordVector::const_iterator;
+
/// Provides access to the loaded XRay trace file header.
const XRayFileHeader &getFileHeader() const { return FileHeader; }
- citerator begin() const { return Records.begin(); }
- citerator end() const { return Records.end(); }
- size_t size() const { return Records.size(); }
+ const_iterator begin() const { return Records.begin(); }
+ const_iterator end() const { return Records.end(); }
+ bool empty() const { return Records.empty(); }
+ size_type size() const { return Records.size(); }
};
/// This function will attempt to load XRay trace records from the provided
/// |Filename|.
Expected<Trace> loadTraceFile(StringRef Filename, bool Sort = false);
+/// This function will attempt to load XRay trace records from the provided
+/// DataExtractor.
+Expected<Trace> loadTrace(const DataExtractor &Extractor, bool Sort = false);
+
} // namespace xray
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/XRay/XRayRecord.h b/contrib/llvm/include/llvm/XRay/XRayRecord.h
index 76873447f170..7685ec95838a 100644
--- a/contrib/llvm/include/llvm/XRay/XRayRecord.h
+++ b/contrib/llvm/include/llvm/XRay/XRayRecord.h
@@ -17,6 +17,7 @@
#include <cstdint>
#include <vector>
+#include <string>
namespace llvm {
namespace xray {
@@ -54,10 +55,23 @@ struct XRayFileHeader {
/// This may or may not correspond to actual record types in the raw trace (as
/// the loader implementation may synthesize this information in the process of
/// of loading).
-enum class RecordTypes { ENTER, EXIT, TAIL_EXIT, ENTER_ARG };
+enum class RecordTypes {
+ ENTER,
+ EXIT,
+ TAIL_EXIT,
+ ENTER_ARG,
+ CUSTOM_EVENT,
+ TYPED_EVENT
+};
+/// An XRayRecord is the denormalized view of data associated in a trace. These
+/// records may not correspond to actual entries in the raw traces, but they are
+/// the logical representation of records in a higher-level event log.
struct XRayRecord {
- /// The type of record.
+ /// RecordType values are used as "sub-types" which have meaning in the
+ /// context of the `Type` below. For function call and custom event records,
+ /// the RecordType is always 0, while for typed events we store the type in
+ /// the RecordType field.
uint16_t RecordType;
/// The CPU where the thread is running. We assume number of CPUs <= 65536.
@@ -66,7 +80,7 @@ struct XRayRecord {
/// Identifies the type of record.
RecordTypes Type;
- /// The function ID for the record.
+ /// The function ID for the record, if this is a function call record.
int32_t FuncId;
/// Get the full 8 bytes of the TSC when we get the log record.
@@ -80,6 +94,9 @@ struct XRayRecord {
/// The function call arguments.
std::vector<uint64_t> CallArgs;
+
+ /// For custom and typed events, we provide the raw data from the trace.
+ std::string Data;
};
} // namespace xray
diff --git a/contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h b/contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h
index 0de9ea0968e6..6150196ed98d 100644
--- a/contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h
+++ b/contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h
@@ -39,6 +39,7 @@ struct YAMLXRayRecord {
uint32_t TId;
uint32_t PId;
std::vector<uint64_t> CallArgs;
+ std::string Data;
};
struct YAMLXRayTrace {
@@ -58,6 +59,8 @@ template <> struct ScalarEnumerationTraits<xray::RecordTypes> {
IO.enumCase(Type, "function-exit", xray::RecordTypes::EXIT);
IO.enumCase(Type, "function-tail-exit", xray::RecordTypes::TAIL_EXIT);
IO.enumCase(Type, "function-enter-arg", xray::RecordTypes::ENTER_ARG);
+ IO.enumCase(Type, "custom-event", xray::RecordTypes::CUSTOM_EVENT);
+ IO.enumCase(Type, "typed-event", xray::RecordTypes::TYPED_EVENT);
}
};
@@ -73,16 +76,16 @@ template <> struct MappingTraits<xray::YAMLXRayFileHeader> {
template <> struct MappingTraits<xray::YAMLXRayRecord> {
static void mapping(IO &IO, xray::YAMLXRayRecord &Record) {
- // FIXME: Make this type actually be descriptive
IO.mapRequired("type", Record.RecordType);
- IO.mapRequired("func-id", Record.FuncId);
+ IO.mapOptional("func-id", Record.FuncId);
IO.mapOptional("function", Record.Function);
IO.mapOptional("args", Record.CallArgs);
IO.mapRequired("cpu", Record.CPU);
- IO.mapRequired("thread", Record.TId);
+ IO.mapOptional("thread", Record.TId, 0U);
IO.mapOptional("process", Record.PId, 0U);
IO.mapRequired("kind", Record.Type);
IO.mapRequired("tsc", Record.TSC);
+ IO.mapOptional("data", Record.Data);
}
static constexpr bool flow = true;
diff --git a/contrib/llvm/include/llvm/module.extern.modulemap b/contrib/llvm/include/llvm/module.extern.modulemap
new file mode 100644
index 000000000000..8acda137e044
--- /dev/null
+++ b/contrib/llvm/include/llvm/module.extern.modulemap
@@ -0,0 +1,5 @@
+module LLVM_Extern_Config_Def {}
+module LLVM_Extern_IR_Attributes_Gen {}
+module LLVM_Extern_IR_Intrinsics_Gen {}
+module LLVM_Extern_IR_Intrinsics_Enum {}
+module LLVM_Extern_Utils_DataTypes {}
diff --git a/contrib/llvm/include/llvm/module.install.modulemap b/contrib/llvm/include/llvm/module.install.modulemap
new file mode 100644
index 000000000000..ac73a8612326
--- /dev/null
+++ b/contrib/llvm/include/llvm/module.install.modulemap
@@ -0,0 +1,27 @@
+
+module LLVM_Extern_Config_Def {
+ textual header "Config/AsmParsers.def"
+ textual header "Config/AsmPrinters.def"
+ textual header "Config/Disassemblers.def"
+ textual header "Config/Targets.def"
+ export *
+}
+
+module LLVM_Extern_IR_Attributes_Gen {
+ textual header "IR/Attributes.gen"
+ textual header "IR/Attributes.inc"
+}
+
+module LLVM_Extern_IR_Intrinsics_Gen {
+ textual header "IR/Intrinsics.gen"
+ textual header "IR/Intrinsics.inc"
+}
+
+module LLVM_Extern_IR_Intrinsics_Enum {
+ textual header "IR/IntrinsicEnums.inc"
+}
+
+module LLVM_Extern_Utils_DataTypes {
+ header "Support/DataTypes.h"
+ export *
+}
diff --git a/contrib/llvm/include/llvm/module.modulemap b/contrib/llvm/include/llvm/module.modulemap
index 649cdf3b0a89..bcc12534ec85 100644
--- a/contrib/llvm/include/llvm/module.modulemap
+++ b/contrib/llvm/include/llvm/module.modulemap
@@ -7,7 +7,11 @@ module LLVM_Analysis {
textual header "Analysis/TargetLibraryInfo.def"
}
-module LLVM_AsmParser { requires cplusplus umbrella "AsmParser" module * { export * } }
+module LLVM_AsmParser {
+ requires cplusplus
+ umbrella "AsmParser"
+ module * { export * }
+}
// A module covering CodeGen/ and Target/. These are intertwined
// and codependent, and thus notionally form a single module.
@@ -27,14 +31,21 @@ module LLVM_Backend {
textual header "CodeGen/CommandFlags.inc"
textual header "CodeGen/DIEValue.def"
}
+}
- module Target {
- umbrella "Target"
- module * { export * }
- }
+// FIXME: Make this as a submodule of LLVM_Backend again.
+// Doing so causes a linker error in clang-format.
+module LLVM_Backend_Target {
+ umbrella "Target"
+ module * { export * }
+}
+
+module LLVM_Bitcode {
+ requires cplusplus
+ umbrella "Bitcode"
+ module * { export * }
}
-module LLVM_Bitcode { requires cplusplus umbrella "Bitcode" module * { export * } }
module LLVM_BinaryFormat {
requires cplusplus
@@ -52,6 +63,7 @@ module LLVM_BinaryFormat {
textual header "BinaryFormat/ELFRelocs/i386.def"
textual header "BinaryFormat/ELFRelocs/Lanai.def"
textual header "BinaryFormat/ELFRelocs/Mips.def"
+ textual header "BinaryFormat/ELFRelocs/MSP430.def"
textual header "BinaryFormat/ELFRelocs/PowerPC64.def"
textual header "BinaryFormat/ELFRelocs/PowerPC.def"
textual header "BinaryFormat/ELFRelocs/RISCV.def"
@@ -59,9 +71,15 @@ module LLVM_BinaryFormat {
textual header "BinaryFormat/ELFRelocs/SystemZ.def"
textual header "BinaryFormat/ELFRelocs/x86_64.def"
textual header "BinaryFormat/WasmRelocs.def"
+ textual header "BinaryFormat/MsgPack.def"
}
-module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } }
+module LLVM_Config {
+ requires cplusplus
+ umbrella "Config"
+ extern module LLVM_Extern_Config_Def "module.extern.modulemap"
+ module * { export * }
+}
module LLVM_DebugInfo {
requires cplusplus
@@ -87,12 +105,14 @@ module LLVM_DebugInfo_PDB {
// FIXME: There should be a better way to specify this.
exclude header "DebugInfo/PDB/DIA/DIADataStream.h"
exclude header "DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+ exclude header "DebugInfo/PDB/DIA/DIAEnumFrameData.h"
exclude header "DebugInfo/PDB/DIA/DIAEnumInjectedSources.h"
exclude header "DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
exclude header "DebugInfo/PDB/DIA/DIAEnumSectionContribs.h"
exclude header "DebugInfo/PDB/DIA/DIAEnumSourceFiles.h"
exclude header "DebugInfo/PDB/DIA/DIAEnumSymbols.h"
exclude header "DebugInfo/PDB/DIA/DIAEnumTables.h"
+ exclude header "DebugInfo/PDB/DIA/DIAFrameData.h"
exclude header "DebugInfo/PDB/DIA/DIAInjectedSource.h"
exclude header "DebugInfo/PDB/DIA/DIALineNumber.h"
exclude header "DebugInfo/PDB/DIA/DIARawSymbol.h"
@@ -177,7 +197,11 @@ module LLVM_intrinsic_gen {
// Attributes.h
module IR_Argument { header "IR/Argument.h" export * }
- module IR_Attributes { header "IR/Attributes.h" export * }
+ module IR_Attributes {
+ header "IR/Attributes.h"
+ extern module LLVM_Extern_IR_Attributes_Gen "module.extern.modulemap"
+ export *
+ }
module IR_CallSite { header "IR/CallSite.h" export * }
module IR_ConstantFolder { header "IR/ConstantFolder.h" export * }
module IR_GlobalVariable { header "IR/GlobalVariable.h" export * }
@@ -192,6 +216,7 @@ module LLVM_intrinsic_gen {
// Intrinsics.h
module IR_CFG { header "IR/CFG.h" export * }
+ module IR_CFGDiff { header "IR/CFGDiff.h" export * }
module IR_ConstantRange { header "IR/ConstantRange.h" export * }
module IR_Dominators { header "IR/Dominators.h" export * }
module Analysis_PostDominators { header "Analysis/PostDominators.h" export * }
@@ -202,7 +227,12 @@ module LLVM_intrinsic_gen {
module IR_Verifier { header "IR/Verifier.h" export * }
module IR_InstIterator { header "IR/InstIterator.h" export * }
module IR_InstVisitor { header "IR/InstVisitor.h" export * }
- module IR_Intrinsics { header "IR/Intrinsics.h" export * }
+ module IR_Intrinsics {
+ header "IR/Intrinsics.h"
+ extern module LLVM_Extern_IR_Intricsics_Gen "module.extern.modulemap"
+ extern module LLVM_Extern_IR_Intrinsics_Enum "module.extern.modulemap"
+ export *
+ }
module IR_IntrinsicInst { header "IR/IntrinsicInst.h" export * }
module IR_PatternMatch { header "IR/PatternMatch.h" export * }
module IR_Statepoint { header "IR/Statepoint.h" export * }
@@ -224,9 +254,23 @@ module LLVM_IR {
textual header "IR/RuntimeLibcalls.def"
}
-module LLVM_IRReader { requires cplusplus umbrella "IRReader" module * { export * } }
-module LLVM_LineEditor { requires cplusplus umbrella "LineEditor" module * { export * } }
-module LLVM_LTO { requires cplusplus umbrella "LTO" module * { export * } }
+module LLVM_IRReader {
+ requires cplusplus
+ umbrella "IRReader"
+ module * { export * }
+}
+
+module LLVM_LineEditor {
+ requires cplusplus
+ umbrella "LineEditor"
+ module * { export * }
+}
+
+module LLVM_LTO {
+ requires cplusplus
+ umbrella "LTO"
+ module * { export * }
+}
module LLVM_MC {
requires cplusplus
@@ -253,7 +297,11 @@ module LLVM_Object {
module * { export * }
}
-module LLVM_Option { requires cplusplus umbrella "Option" module * { export * } }
+module LLVM_Option {
+ requires cplusplus
+ umbrella "Option"
+ module * { export * }
+}
module LLVM_ProfileData {
requires cplusplus
@@ -271,7 +319,11 @@ module LLVM_Support_TargetRegistry {
export *
}
-module LLVM_TableGen { requires cplusplus umbrella "TableGen" module * { export * } }
+module LLVM_TableGen {
+ requires cplusplus
+ umbrella "TableGen"
+ module * { export * }
+}
module LLVM_Transforms {
requires cplusplus
@@ -279,6 +331,8 @@ module LLVM_Transforms {
module * { export * }
}
+extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap"
+
// A module covering ADT/ and Support/. These are intertwined and
// codependent, and notionally form a single module.
module LLVM_Utils {
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index a6585df949f8..3446aef39938 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -40,7 +40,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -118,11 +117,11 @@ bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
return false;
}
-ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getArgModRefInfo(CS, ArgIdx));
+ Result = intersectModRef(Result, AA->getArgModRefInfo(Call, ArgIdx));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -132,11 +131,11 @@ ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
return Result;
}
-ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
// We may have two calls.
- if (auto CS = ImmutableCallSite(I)) {
+ if (const auto *Call1 = dyn_cast<CallBase>(I)) {
// Check if the two calls modify the same memory.
- return getModRefInfo(CS, Call);
+ return getModRefInfo(Call1, Call2);
} else if (I->isFenceLike()) {
// If this is a fence, just return ModRef.
return ModRefInfo::ModRef;
@@ -146,19 +145,19 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
// is that if the call references what this instruction
// defines, it must be clobbered by this location.
const MemoryLocation DefLoc = MemoryLocation::get(I);
- ModRefInfo MR = getModRefInfo(Call, DefLoc);
+ ModRefInfo MR = getModRefInfo(Call2, DefLoc);
if (isModOrRefSet(MR))
return setModAndRef(MR);
}
return ModRefInfo::NoModRef;
}
-ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getModRefInfo(CS, Loc));
+ Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -167,7 +166,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
// Try to refine the mod-ref info further using other API entry points to the
// aggregate set of AA results.
- auto MRB = getModRefBehavior(CS);
+ auto MRB = getModRefBehavior(Call);
if (MRB == FMRB_DoesNotAccessMemory ||
MRB == FMRB_OnlyAccessesInaccessibleMem)
return ModRefInfo::NoModRef;
@@ -178,20 +177,19 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
Result = clearRef(Result);
if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) {
- bool DoesAlias = false;
bool IsMustAlias = true;
ModRefInfo AllArgsMask = ModRefInfo::NoModRef;
if (doesAccessArgPointees(MRB)) {
- for (auto AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) {
+ for (auto AI = Call->arg_begin(), AE = Call->arg_end(); AI != AE; ++AI) {
const Value *Arg = *AI;
if (!Arg->getType()->isPointerTy())
continue;
- unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
- MemoryLocation ArgLoc = MemoryLocation::getForArgument(CS, ArgIdx, TLI);
+ unsigned ArgIdx = std::distance(Call->arg_begin(), AI);
+ MemoryLocation ArgLoc =
+ MemoryLocation::getForArgument(Call, ArgIdx, TLI);
AliasResult ArgAlias = alias(ArgLoc, Loc);
if (ArgAlias != NoAlias) {
- ModRefInfo ArgMask = getArgModRefInfo(CS, ArgIdx);
- DoesAlias = true;
+ ModRefInfo ArgMask = getArgModRefInfo(Call, ArgIdx);
AllArgsMask = unionModRef(AllArgsMask, ArgMask);
}
// Conservatively clear IsMustAlias unless only MustAlias is found.
@@ -199,7 +197,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
}
}
// Return NoModRef if no alias found with any argument.
- if (!DoesAlias)
+ if (isNoModRef(AllArgsMask))
return ModRefInfo::NoModRef;
// Logical & between other AA analyses and argument analysis.
Result = intersectModRef(Result, AllArgsMask);
@@ -215,12 +213,12 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
return Result;
}
-ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getModRefInfo(CS1, CS2));
+ Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -230,59 +228,61 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
// Try to refine the mod-ref info further using other API entry points to the
// aggregate set of AA results.
- // If CS1 or CS2 are readnone, they don't interact.
- auto CS1B = getModRefBehavior(CS1);
- if (CS1B == FMRB_DoesNotAccessMemory)
+ // If Call1 or Call2 are readnone, they don't interact.
+ auto Call1B = getModRefBehavior(Call1);
+ if (Call1B == FMRB_DoesNotAccessMemory)
return ModRefInfo::NoModRef;
- auto CS2B = getModRefBehavior(CS2);
- if (CS2B == FMRB_DoesNotAccessMemory)
+ auto Call2B = getModRefBehavior(Call2);
+ if (Call2B == FMRB_DoesNotAccessMemory)
return ModRefInfo::NoModRef;
// If they both only read from memory, there is no dependence.
- if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
+ if (onlyReadsMemory(Call1B) && onlyReadsMemory(Call2B))
return ModRefInfo::NoModRef;
- // If CS1 only reads memory, the only dependence on CS2 can be
- // from CS1 reading memory written by CS2.
- if (onlyReadsMemory(CS1B))
+ // If Call1 only reads memory, the only dependence on Call2 can be
+ // from Call1 reading memory written by Call2.
+ if (onlyReadsMemory(Call1B))
Result = clearMod(Result);
- else if (doesNotReadMemory(CS1B))
+ else if (doesNotReadMemory(Call1B))
Result = clearRef(Result);
- // If CS2 only access memory through arguments, accumulate the mod/ref
- // information from CS1's references to the memory referenced by
- // CS2's arguments.
- if (onlyAccessesArgPointees(CS2B)) {
- if (!doesAccessArgPointees(CS2B))
+ // If Call2 only access memory through arguments, accumulate the mod/ref
+ // information from Call1's references to the memory referenced by
+ // Call2's arguments.
+ if (onlyAccessesArgPointees(Call2B)) {
+ if (!doesAccessArgPointees(Call2B))
return ModRefInfo::NoModRef;
ModRefInfo R = ModRefInfo::NoModRef;
bool IsMustAlias = true;
- for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ for (auto I = Call2->arg_begin(), E = Call2->arg_end(); I != E; ++I) {
const Value *Arg = *I;
if (!Arg->getType()->isPointerTy())
continue;
- unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
- auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI);
-
- // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the
- // dependence of CS1 on that location is the inverse:
- // - If CS2 modifies location, dependence exists if CS1 reads or writes.
- // - If CS2 only reads location, dependence exists if CS1 writes.
- ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx);
+ unsigned Call2ArgIdx = std::distance(Call2->arg_begin(), I);
+ auto Call2ArgLoc =
+ MemoryLocation::getForArgument(Call2, Call2ArgIdx, TLI);
+
+ // ArgModRefC2 indicates what Call2 might do to Call2ArgLoc, and the
+ // dependence of Call1 on that location is the inverse:
+ // - If Call2 modifies location, dependence exists if Call1 reads or
+ // writes.
+ // - If Call2 only reads location, dependence exists if Call1 writes.
+ ModRefInfo ArgModRefC2 = getArgModRefInfo(Call2, Call2ArgIdx);
ModRefInfo ArgMask = ModRefInfo::NoModRef;
- if (isModSet(ArgModRefCS2))
+ if (isModSet(ArgModRefC2))
ArgMask = ModRefInfo::ModRef;
- else if (isRefSet(ArgModRefCS2))
+ else if (isRefSet(ArgModRefC2))
ArgMask = ModRefInfo::Mod;
- // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use
+ // ModRefC1 indicates what Call1 might do to Call2ArgLoc, and we use
// above ArgMask to update dependence info.
- ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc);
- ArgMask = intersectModRef(ArgMask, ModRefCS1);
+ ModRefInfo ModRefC1 = getModRefInfo(Call1, Call2ArgLoc);
+ ArgMask = intersectModRef(ArgMask, ModRefC1);
// Conservatively clear IsMustAlias unless only MustAlias is found.
- IsMustAlias &= isMustSet(ModRefCS1);
+ IsMustAlias &= isMustSet(ModRefC1);
R = intersectModRef(unionModRef(R, ArgMask), Result);
if (R == Result) {
@@ -300,31 +300,32 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
return IsMustAlias ? setMust(R) : clearMust(R);
}
- // If CS1 only accesses memory through arguments, check if CS2 references
- // any of the memory referenced by CS1's arguments. If not, return NoModRef.
- if (onlyAccessesArgPointees(CS1B)) {
- if (!doesAccessArgPointees(CS1B))
+ // If Call1 only accesses memory through arguments, check if Call2 references
+ // any of the memory referenced by Call1's arguments. If not, return NoModRef.
+ if (onlyAccessesArgPointees(Call1B)) {
+ if (!doesAccessArgPointees(Call1B))
return ModRefInfo::NoModRef;
ModRefInfo R = ModRefInfo::NoModRef;
bool IsMustAlias = true;
- for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+ for (auto I = Call1->arg_begin(), E = Call1->arg_end(); I != E; ++I) {
const Value *Arg = *I;
if (!Arg->getType()->isPointerTy())
continue;
- unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
- auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI);
-
- // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might
- // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If
- // CS1 might Ref, then we care only about a Mod by CS2.
- ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx);
- ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc);
- if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) ||
- (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2)))
- R = intersectModRef(unionModRef(R, ArgModRefCS1), Result);
+ unsigned Call1ArgIdx = std::distance(Call1->arg_begin(), I);
+ auto Call1ArgLoc =
+ MemoryLocation::getForArgument(Call1, Call1ArgIdx, TLI);
+
+ // ArgModRefC1 indicates what Call1 might do to Call1ArgLoc; if Call1
+ // might Mod Call1ArgLoc, then we care about either a Mod or a Ref by
+ // Call2. If Call1 might Ref, then we care only about a Mod by Call2.
+ ModRefInfo ArgModRefC1 = getArgModRefInfo(Call1, Call1ArgIdx);
+ ModRefInfo ModRefC2 = getModRefInfo(Call2, Call1ArgLoc);
+ if ((isModSet(ArgModRefC1) && isModOrRefSet(ModRefC2)) ||
+ (isRefSet(ArgModRefC1) && isModSet(ModRefC2)))
+ R = intersectModRef(unionModRef(R, ArgModRefC1), Result);
// Conservatively clear IsMustAlias unless only MustAlias is found.
- IsMustAlias &= isMustSet(ModRefCS2);
+ IsMustAlias &= isMustSet(ModRefC2);
if (R == Result) {
// On early exit, not all args were checked, cannot set Must.
@@ -344,11 +345,11 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
return Result;
}
-FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) {
+FunctionModRefBehavior AAResults::getModRefBehavior(const CallBase *Call) {
FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
for (const auto &AA : AAs) {
- Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS));
+ Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(Call));
// Early-exit the moment we reach the bottom of the lattice.
if (Result == FMRB_DoesNotAccessMemory)
@@ -560,8 +561,8 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
isa<Constant>(Object))
return ModRefInfo::ModRef;
- ImmutableCallSite CS(I);
- if (!CS.getInstruction() || CS.getInstruction() == Object)
+ const auto *Call = dyn_cast<CallBase>(I);
+ if (!Call || Call == Object)
return ModRefInfo::ModRef;
if (PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
@@ -574,14 +575,14 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
ModRefInfo R = ModRefInfo::NoModRef;
bool IsMustAlias = true;
// Set flag only if no May found and all operands processed.
- for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
+ for (auto CI = Call->data_operands_begin(), CE = Call->data_operands_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture or byval pointer arguments. If this
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!CS.doesNotCapture(ArgNo) &&
- ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo)))
+ (!Call->doesNotCapture(ArgNo) && ArgNo < Call->getNumArgOperands() &&
+ !Call->isByValArgument(ArgNo)))
continue;
AliasResult AR = alias(MemoryLocation(*CI), MemoryLocation(Object));
@@ -593,9 +594,9 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
IsMustAlias = false;
if (AR == NoAlias)
continue;
- if (CS.doesNotAccessMemory(ArgNo))
+ if (Call->doesNotAccessMemory(ArgNo))
continue;
- if (CS.onlyReadsMemory(ArgNo)) {
+ if (Call->onlyReadsMemory(ArgNo)) {
R = ModRefInfo::Ref;
continue;
}
@@ -642,28 +643,6 @@ AnalysisKey AAManager::Key;
namespace {
-/// A wrapper pass for external alias analyses. This just squirrels away the
-/// callback used to run any analyses and register their results.
-struct ExternalAAWrapperPass : ImmutablePass {
- using CallbackT = std::function<void(Pass &, Function &, AAResults &)>;
-
- CallbackT CB;
-
- static char ID;
-
- ExternalAAWrapperPass() : ImmutablePass(ID) {
- initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
-
- explicit ExternalAAWrapperPass(CallbackT CB)
- : ImmutablePass(ID), CB(std::move(CB)) {
- initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-};
} // end anonymous namespace
@@ -799,8 +778,8 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
}
bool llvm::isNoAliasCall(const Value *V) {
- if (auto CS = ImmutableCallSite(V))
- return CS.hasRetAttr(Attribute::NoAlias);
+ if (const auto *Call = dyn_cast<CallBase>(V))
+ return Call->hasRetAttr(Attribute::NoAlias);
return false;
}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 764ae9160350..85dd4fe95b33 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -66,11 +66,10 @@ static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I,
}
}
-static inline void PrintModRefResults(const char *Msg, bool P, CallSite CSA,
- CallSite CSB, Module *M) {
+static inline void PrintModRefResults(const char *Msg, bool P, CallBase *CallA,
+ CallBase *CallB, Module *M) {
if (PrintAll || P) {
- errs() << " " << Msg << ": " << *CSA.getInstruction() << " <-> "
- << *CSB.getInstruction() << '\n';
+ errs() << " " << Msg << ": " << *CallA << " <-> " << *CallB << '\n';
}
}
@@ -98,7 +97,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
++FunctionCount;
SetVector<Value *> Pointers;
- SmallSetVector<CallSite, 16> CallSites;
+ SmallSetVector<CallBase *, 16> Calls;
SetVector<Value *> Loads;
SetVector<Value *> Stores;
@@ -114,16 +113,16 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
if (EvalAAMD && isa<StoreInst>(&*I))
Stores.insert(&*I);
Instruction &Inst = *I;
- if (auto CS = CallSite(&Inst)) {
- Value *Callee = CS.getCalledValue();
+ if (auto *Call = dyn_cast<CallBase>(&Inst)) {
+ Value *Callee = Call->getCalledValue();
// Skip actual functions for direct function calls.
if (!isa<Function>(Callee) && isInterestingPointer(Callee))
Pointers.insert(Callee);
// Consider formals.
- for (Use &DataOp : CS.data_ops())
+ for (Use &DataOp : Call->data_ops())
if (isInterestingPointer(DataOp))
Pointers.insert(DataOp);
- CallSites.insert(CS);
+ Calls.insert(Call);
} else {
// Consider all operands.
for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
@@ -136,19 +135,21 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias ||
PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef)
errs() << "Function: " << F.getName() << ": " << Pointers.size()
- << " pointers, " << CallSites.size() << " call sites\n";
+ << " pointers, " << Calls.size() << " call sites\n";
// iterate over the worklist, and run the full (n^2)/2 disambiguations
for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
I1 != E; ++I1) {
- uint64_t I1Size = MemoryLocation::UnknownSize;
+ auto I1Size = LocationSize::unknown();
Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
- if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy);
+ if (I1ElTy->isSized())
+ I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));
for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
- uint64_t I2Size = MemoryLocation::UnknownSize;
- Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
- if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);
+ auto I2Size = LocationSize::unknown();
+ Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType();
+ if (I2ElTy->isSized())
+ I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));
AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size);
switch (AR) {
@@ -228,49 +229,48 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
}
// Mod/ref alias analysis: compare all pairs of calls and values
- for (CallSite C : CallSites) {
- Instruction *I = C.getInstruction();
-
+ for (CallBase *Call : Calls) {
for (auto Pointer : Pointers) {
- uint64_t Size = MemoryLocation::UnknownSize;
+ auto Size = LocationSize::unknown();
Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType();
- if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);
+ if (ElTy->isSized())
+ Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));
- switch (AA.getModRefInfo(C, Pointer, Size)) {
+ switch (AA.getModRefInfo(Call, Pointer, Size)) {
case ModRefInfo::NoModRef:
- PrintModRefResults("NoModRef", PrintNoModRef, I, Pointer,
+ PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer,
F.getParent());
++NoModRefCount;
break;
case ModRefInfo::Mod:
- PrintModRefResults("Just Mod", PrintMod, I, Pointer, F.getParent());
+ PrintModRefResults("Just Mod", PrintMod, Call, Pointer, F.getParent());
++ModCount;
break;
case ModRefInfo::Ref:
- PrintModRefResults("Just Ref", PrintRef, I, Pointer, F.getParent());
+ PrintModRefResults("Just Ref", PrintRef, Call, Pointer, F.getParent());
++RefCount;
break;
case ModRefInfo::ModRef:
- PrintModRefResults("Both ModRef", PrintModRef, I, Pointer,
+ PrintModRefResults("Both ModRef", PrintModRef, Call, Pointer,
F.getParent());
++ModRefCount;
break;
case ModRefInfo::Must:
- PrintModRefResults("Must", PrintMust, I, Pointer, F.getParent());
+ PrintModRefResults("Must", PrintMust, Call, Pointer, F.getParent());
++MustCount;
break;
case ModRefInfo::MustMod:
- PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, I, Pointer,
+ PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, Call, Pointer,
F.getParent());
++MustModCount;
break;
case ModRefInfo::MustRef:
- PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, I, Pointer,
+ PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, Call, Pointer,
F.getParent());
++MustRefCount;
break;
case ModRefInfo::MustModRef:
- PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, I,
+ PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, Call,
Pointer, F.getParent());
++MustModRefCount;
break;
@@ -279,44 +279,46 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
}
// Mod/ref alias analysis: compare all pairs of calls
- for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
- for (auto D = CallSites.begin(); D != Ce; ++D) {
- if (D == C)
+ for (CallBase *CallA : Calls) {
+ for (CallBase *CallB : Calls) {
+ if (CallA == CallB)
continue;
- switch (AA.getModRefInfo(*C, *D)) {
+ switch (AA.getModRefInfo(CallA, CallB)) {
case ModRefInfo::NoModRef:
- PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+ PrintModRefResults("NoModRef", PrintNoModRef, CallA, CallB,
+ F.getParent());
++NoModRefCount;
break;
case ModRefInfo::Mod:
- PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+ PrintModRefResults("Just Mod", PrintMod, CallA, CallB, F.getParent());
++ModCount;
break;
case ModRefInfo::Ref:
- PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+ PrintModRefResults("Just Ref", PrintRef, CallA, CallB, F.getParent());
++RefCount;
break;
case ModRefInfo::ModRef:
- PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+ PrintModRefResults("Both ModRef", PrintModRef, CallA, CallB,
+ F.getParent());
++ModRefCount;
break;
case ModRefInfo::Must:
- PrintModRefResults("Must", PrintMust, *C, *D, F.getParent());
+ PrintModRefResults("Must", PrintMust, CallA, CallB, F.getParent());
++MustCount;
break;
case ModRefInfo::MustMod:
- PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, *C, *D,
+ PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, CallA, CallB,
F.getParent());
++MustModCount;
break;
case ModRefInfo::MustRef:
- PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, *C, *D,
+ PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, CallA, CallB,
F.getParent());
++MustRefCount;
break;
case ModRefInfo::MustModRef:
- PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, *C, *D,
- F.getParent());
+ PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, CallA,
+ CallB, F.getParent());
++MustModRefCount;
break;
}
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 8f903fa4f1e8..f6ad704cc914 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -13,9 +13,9 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -24,6 +24,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -55,7 +56,6 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
// Update the alias and access types of this set...
Access |= AS.Access;
Alias |= AS.Alias;
- Volatile |= AS.Volatile;
if (Alias == SetMustAlias) {
// Check that these two merged sets really are must aliases. Since both
@@ -113,10 +113,9 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
if (AliasSet *Fwd = AS->Forward) {
Fwd->dropRef(*this);
AS->Forward = nullptr;
- }
-
- if (AS->Alias == AliasSet::SetMayAlias)
- TotalMayAliasSetSize -= AS->size();
+ } else // Update TotalMayAliasSetSize only if not forwarding.
+ if (AS->Alias == AliasSet::SetMayAlias)
+ TotalMayAliasSetSize -= AS->size();
AliasSets.erase(AS);
}
@@ -169,7 +168,12 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
addRef();
UnknownInsts.emplace_back(I);
- if (!I->mayWriteToMemory()) {
+ // Guards are marked as modifying memory for control flow modelling purposes,
+ // but don't actually modify any specific memory location.
+ using namespace PatternMatch;
+ bool MayWriteMemory = I->mayWriteToMemory() && !isGuard(I) &&
+ !(I->use_empty() && match(I, m_Intrinsic<Intrinsic::invariant_start>()));
+ if (!MayWriteMemory) {
Alias = SetMayAlias;
Access |= RefAccess;
return;
@@ -226,12 +230,13 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
if (AliasAny)
return true;
- if (!Inst->mayReadOrWriteMemory())
- return false;
+ assert(Inst->mayReadOrWriteMemory() &&
+ "Instruction must either read or write memory.");
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
if (auto *UnknownInst = getUnknownInst(i)) {
- ImmutableCallSite C1(UnknownInst), C2(Inst);
+ const auto *C1 = dyn_cast<CallBase>(UnknownInst);
+ const auto *C2 = dyn_cast<CallBase>(Inst);
if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) ||
isModOrRefSet(AA.getModRefInfo(C2, C1)))
return true;
@@ -246,6 +251,31 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
return false;
}
+Instruction* AliasSet::getUniqueInstruction() {
+ if (AliasAny)
+ // May have collapses alias set
+ return nullptr;
+ if (begin() != end()) {
+ if (!UnknownInsts.empty())
+ // Another instruction found
+ return nullptr;
+ if (std::next(begin()) != end())
+ // Another instruction found
+ return nullptr;
+ Value *Addr = begin()->getValue();
+ assert(!Addr->user_empty() &&
+ "where's the instruction which added this pointer?");
+ if (std::next(Addr->user_begin()) != Addr->user_end())
+ // Another instruction found -- this is really restrictive
+ // TODO: generalize!
+ return nullptr;
+ return cast<Instruction>(*(Addr->user_begin()));
+ }
+ if (1 != UnknownInsts.size())
+ return nullptr;
+ return cast<Instruction>(UnknownInsts[0]);
+}
+
void AliasSetTracker::clear() {
// Delete all the PointerRec entries.
for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
@@ -280,13 +310,6 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
return FoundSet;
}
-bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
- for (const AliasSet &AS : *this)
- if (!AS.Forward && AS.aliasesUnknownInst(Inst, AA))
- return true;
- return false;
-}
-
AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
AliasSet *FoundSet = nullptr;
for (iterator I = begin(), E = end(); I != E;) {
@@ -295,17 +318,18 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
continue;
if (!FoundSet) // If this is the first alias set ptr can go into.
FoundSet = &*Cur; // Remember it.
- else if (!Cur->Forward) // Otherwise, we must merge the sets.
+ else // Otherwise, we must merge the sets.
FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
return FoundSet;
}
-/// getAliasSetForPointer - Return the alias set that the specified pointer
-/// lives in.
-AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
- LocationSize Size,
- const AAMDNodes &AAInfo) {
+AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
+
+ Value * const Pointer = const_cast<Value*>(MemLoc.Ptr);
+ const LocationSize Size = MemLoc.Size;
+ const AAMDNodes &AAInfo = MemLoc.AATags;
+
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
if (AliasAnyAS) {
@@ -351,83 +375,32 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
void AliasSetTracker::add(Value *Ptr, LocationSize Size,
const AAMDNodes &AAInfo) {
- addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess);
+ addPointer(MemoryLocation(Ptr, Size, AAInfo), AliasSet::NoAccess);
}
void AliasSetTracker::add(LoadInst *LI) {
- if (isStrongerThanMonotonic(LI->getOrdering())) return addUnknown(LI);
-
- AAMDNodes AAInfo;
- LI->getAAMetadata(AAInfo);
-
- AliasSet::AccessLattice Access = AliasSet::RefAccess;
- const DataLayout &DL = LI->getModule()->getDataLayout();
- AliasSet &AS = addPointer(LI->getOperand(0),
- DL.getTypeStoreSize(LI->getType()), AAInfo, Access);
- if (LI->isVolatile()) AS.setVolatile();
+ if (isStrongerThanMonotonic(LI->getOrdering()))
+ return addUnknown(LI);
+ addPointer(MemoryLocation::get(LI), AliasSet::RefAccess);
}
void AliasSetTracker::add(StoreInst *SI) {
- if (isStrongerThanMonotonic(SI->getOrdering())) return addUnknown(SI);
-
- AAMDNodes AAInfo;
- SI->getAAMetadata(AAInfo);
-
- AliasSet::AccessLattice Access = AliasSet::ModAccess;
- const DataLayout &DL = SI->getModule()->getDataLayout();
- Value *Val = SI->getOperand(0);
- AliasSet &AS = addPointer(
- SI->getOperand(1), DL.getTypeStoreSize(Val->getType()), AAInfo, Access);
- if (SI->isVolatile()) AS.setVolatile();
+ if (isStrongerThanMonotonic(SI->getOrdering()))
+ return addUnknown(SI);
+ addPointer(MemoryLocation::get(SI), AliasSet::ModAccess);
}
void AliasSetTracker::add(VAArgInst *VAAI) {
- AAMDNodes AAInfo;
- VAAI->getAAMetadata(AAInfo);
-
- addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
- AliasSet::ModRefAccess);
+ addPointer(MemoryLocation::get(VAAI), AliasSet::ModRefAccess);
}
void AliasSetTracker::add(AnyMemSetInst *MSI) {
- AAMDNodes AAInfo;
- MSI->getAAMetadata(AAInfo);
-
- uint64_t Len;
-
- if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
- Len = C->getZExtValue();
- else
- Len = MemoryLocation::UnknownSize;
-
- AliasSet &AS =
- addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
- auto *MS = dyn_cast<MemSetInst>(MSI);
- if (MS && MS->isVolatile())
- AS.setVolatile();
+ addPointer(MemoryLocation::getForDest(MSI), AliasSet::ModAccess);
}
void AliasSetTracker::add(AnyMemTransferInst *MTI) {
- AAMDNodes AAInfo;
- MTI->getAAMetadata(AAInfo);
-
- uint64_t Len;
- if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
- Len = C->getZExtValue();
- else
- Len = MemoryLocation::UnknownSize;
-
- AliasSet &ASSrc =
- addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess);
-
- AliasSet &ASDst =
- addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
-
- auto* MT = dyn_cast<MemTransferInst>(MTI);
- if (MT && MT->isVolatile()) {
- ASSrc.setVolatile();
- ASDst.setVolatile();
- }
+ addPointer(MemoryLocation::getForDest(MTI), AliasSet::ModAccess);
+ addPointer(MemoryLocation::getForSource(MTI), AliasSet::RefAccess);
}
void AliasSetTracker::addUnknown(Instruction *Inst) {
@@ -471,6 +444,46 @@ void AliasSetTracker::add(Instruction *I) {
return add(MSI);
if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(I))
return add(MTI);
+
+ // Handle all calls with known mod/ref sets genericall
+ if (auto *Call = dyn_cast<CallBase>(I))
+ if (Call->onlyAccessesArgMemory()) {
+ auto getAccessFromModRef = [](ModRefInfo MRI) {
+ if (isRefSet(MRI) && isModSet(MRI))
+ return AliasSet::ModRefAccess;
+ else if (isModSet(MRI))
+ return AliasSet::ModAccess;
+ else if (isRefSet(MRI))
+ return AliasSet::RefAccess;
+ else
+ return AliasSet::NoAccess;
+ };
+
+ ModRefInfo CallMask = createModRefInfo(AA.getModRefBehavior(Call));
+
+ // Some intrinsics are marked as modifying memory for control flow
+ // modelling purposes, but don't actually modify any specific memory
+ // location.
+ using namespace PatternMatch;
+ if (Call->use_empty() &&
+ match(Call, m_Intrinsic<Intrinsic::invariant_start>()))
+ CallMask = clearMod(CallMask);
+
+ for (auto IdxArgPair : enumerate(Call->args())) {
+ int ArgIdx = IdxArgPair.index();
+ const Value *Arg = IdxArgPair.value();
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ MemoryLocation ArgLoc =
+ MemoryLocation::getForArgument(Call, ArgIdx, nullptr);
+ ModRefInfo ArgMask = AA.getArgModRefInfo(Call, ArgIdx);
+ ArgMask = intersectModRef(CallMask, ArgMask);
+ if (!isNoModRef(ArgMask))
+ addPointer(ArgLoc, getAccessFromModRef(ArgMask));
+ }
+ return;
+ }
+
return addUnknown(I);
}
@@ -496,12 +509,10 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
add(Inst);
// Loop over all of the pointers in this alias set.
- for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
- AliasSet &NewAS =
- addPointer(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo(),
- (AliasSet::AccessLattice)AS.Access);
- if (AS.isVolatile()) NewAS.setVolatile();
- }
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI)
+ addPointer(
+ MemoryLocation(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo()),
+ (AliasSet::AccessLattice)AS.Access);
}
}
@@ -594,10 +605,9 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() {
return *AliasAnyAS;
}
-AliasSet &AliasSetTracker::addPointer(Value *P, LocationSize Size,
- const AAMDNodes &AAInfo,
+AliasSet &AliasSetTracker::addPointer(MemoryLocation Loc,
AliasSet::AccessLattice E) {
- AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo);
+ AliasSet &AS = getAliasSetFor(Loc);
AS.Access |= E;
if (!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold)) {
@@ -623,7 +633,6 @@ void AliasSet::print(raw_ostream &OS) const {
case ModRefAccess: OS << "Mod/Ref "; break;
default: llvm_unreachable("Bad value for Access!");
}
- if (isVolatile()) OS << "[volatile] ";
if (Forward)
OS << " forwarding to " << (void*)Forward;
@@ -632,7 +641,10 @@ void AliasSet::print(raw_ostream &OS) const {
for (iterator I = begin(), E = end(); I != E; ++I) {
if (I != begin()) OS << ", ";
I.getPointer()->printAsOperand(OS << "(");
- OS << ", " << I.getSize() << ")";
+ if (I.getSize() == LocationSize::unknown())
+ OS << ", unknown)";
+ else
+ OS << ", " << I.getSize() << ")";
}
}
if (!UnknownInsts.empty()) {
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 30576cf1ae10..bb8742123a0f 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -39,7 +39,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeDependenceAnalysisWrapperPassPass(Registry);
initializeDelinearizationPass(Registry);
initializeDemandedBitsWrapperPassPass(Registry);
- initializeDivergenceAnalysisPass(Registry);
initializeDominanceFrontierWrapperPassPass(Registry);
initializeDomViewerPass(Registry);
initializeDomPrinterPass(Registry);
@@ -58,6 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeLazyBlockFrequencyInfoPassPass(Registry);
initializeLazyValueInfoWrapperPassPass(Registry);
initializeLazyValueInfoPrinterPass(Registry);
+ initializeLegacyDivergenceAnalysisPass(Registry);
initializeLintPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
@@ -77,6 +77,8 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeRegionOnlyPrinterPass(Registry);
initializeSCEVAAWrapperPassPass(Registry);
initializeScalarEvolutionWrapperPassPass(Registry);
+ initializeStackSafetyGlobalInfoWrapperPassPass(Registry);
+ initializeStackSafetyInfoWrapperPassPass(Registry);
initializeTargetTransformInfoWrapperPassPass(Registry);
initializeTypeBasedAAWrapperPassPass(Registry);
initializeScopedNoAliasAAWrapperPassPass(Registry);
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index f9ecbc043261..332eeaa00e73 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -31,7 +31,6 @@
#include "llvm/Analysis/PhiValues.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -68,6 +67,16 @@ using namespace llvm;
/// Enable analysis of recursive PHI nodes.
static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
cl::init(false));
+
+/// By default, even on 32-bit architectures we use 64-bit integers for
+/// calculations. This will allow us to more-aggressively decompose indexing
+/// expressions calculated using i64 values (e.g., long long in C) which is
+/// common enough to worry about.
+static cl::opt<bool> ForceAtLeast64Bits("basicaa-force-at-least-64b",
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> DoubleCalcBits("basicaa-double-calc-bits",
+ cl::Hidden, cl::init(false));
+
/// SearchLimitReached / SearchTimes shows how often the limit of
/// to decompose GEPs is reached. It will affect the precision
/// of basic alias analysis.
@@ -134,7 +143,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
/// Returns true if the pointer is one which would have been considered an
/// escape by isNonEscapingLocalObject.
static bool isEscapeSource(const Value *V) {
- if (ImmutableCallSite(V))
+ if (isa<CallBase>(V))
return true;
if (isa<Argument>(V))
@@ -381,13 +390,22 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
}
/// To ensure a pointer offset fits in an integer of size PointerSize
-/// (in bits) when that size is smaller than 64. This is an issue in
-/// particular for 32b programs with negative indices that rely on two's
-/// complement wrap-arounds for precise alias information.
-static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) {
- assert(PointerSize <= 64 && "Invalid PointerSize!");
- unsigned ShiftBits = 64 - PointerSize;
- return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits;
+/// (in bits) when that size is smaller than the maximum pointer size. This is
+/// an issue, for example, in particular for 32b pointers with negative indices
+/// that rely on two's complement wrap-arounds for precise alias information
+/// where the maximum pointer size is 64b.
+static APInt adjustToPointerSize(APInt Offset, unsigned PointerSize) {
+ assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!");
+ unsigned ShiftBits = Offset.getBitWidth() - PointerSize;
+ return (Offset << ShiftBits).ashr(ShiftBits);
+}
+
+static unsigned getMaxPointerSize(const DataLayout &DL) {
+ unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
+ if (MaxPointerSize < 64 && ForceAtLeast64Bits) MaxPointerSize = 64;
+ if (DoubleCalcBits) MaxPointerSize *= 2;
+
+ return MaxPointerSize;
}
/// If V is a symbolic pointer expression, decompose it into a base pointer
@@ -410,8 +428,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
unsigned MaxLookup = MaxLookupSearchDepth;
SearchTimes++;
- Decomposed.StructOffset = 0;
- Decomposed.OtherOffset = 0;
+ unsigned MaxPointerSize = getMaxPointerSize(DL);
Decomposed.VarIndices.clear();
do {
// See if this is a bitcast or GEP.
@@ -436,7 +453,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
if (!GEPOp) {
- if (auto CS = ImmutableCallSite(V)) {
+ if (const auto *Call = dyn_cast<CallBase>(V)) {
// CaptureTracking can know about special capturing properties of some
// intrinsics like launder.invariant.group, that can't be expressed with
// the attributes, but have properties like returning aliasing pointer.
@@ -446,7 +463,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
- if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
V = RP;
continue;
}
@@ -501,13 +518,15 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
if (CIdx->isZero())
continue;
Decomposed.OtherOffset +=
- DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue();
+ (DL.getTypeAllocSize(GTI.getIndexedType()) *
+ CIdx->getValue().sextOrSelf(MaxPointerSize))
+ .sextOrTrunc(MaxPointerSize);
continue;
}
GepHasConstantOffset = false;
- uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType());
+ APInt Scale(MaxPointerSize, DL.getTypeAllocSize(GTI.getIndexedType()));
unsigned ZExtBits = 0, SExtBits = 0;
// If the integer type is smaller than the pointer size, it is implicitly
@@ -519,20 +538,34 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
bool NSW = true, NUW = true;
+ const Value *OrigIndex = Index;
Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
SExtBits, DL, 0, AC, DT, NSW, NUW);
- // All GEP math happens in the width of the pointer type,
- // so we can truncate the value to 64-bits as we don't handle
- // currently pointers larger than 64 bits and we would crash
- // later. TODO: Make `Scale` an APInt to avoid this problem.
- if (IndexScale.getBitWidth() > 64)
- IndexScale = IndexScale.sextOrTrunc(64);
-
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
- Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale;
- Scale *= IndexScale.getSExtValue();
+
+ // It can be the case that, even through C1*V+C2 does not overflow for
+ // relevant values of V, (C2*Scale) can overflow. In that case, we cannot
+ // decompose the expression in this way.
+ //
+ // FIXME: C1*Scale and the other operations in the decomposed
+ // (C1*Scale)*V+C2*Scale can also overflow. We should check for this
+ // possibility.
+ APInt WideScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize*2) *
+ Scale.sext(MaxPointerSize*2);
+ if (WideScaledOffset.getMinSignedBits() > MaxPointerSize) {
+ Index = OrigIndex;
+ IndexScale = 1;
+ IndexOffset = 0;
+
+ ZExtBits = SExtBits = 0;
+ if (PointerSize > Width)
+ SExtBits += PointerSize - Width;
+ } else {
+ Decomposed.OtherOffset += IndexOffset.sextOrTrunc(MaxPointerSize) * Scale;
+ Scale *= IndexScale.sextOrTrunc(MaxPointerSize);
+ }
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
@@ -552,9 +585,8 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// pointer size.
Scale = adjustToPointerSize(Scale, PointerSize);
- if (Scale) {
- VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
- static_cast<int64_t>(Scale)};
+ if (!!Scale) {
+ VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale};
Decomposed.VarIndices.push_back(Entry);
}
}
@@ -640,8 +672,8 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
}
/// Returns the behavior when calling the given call site.
-FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
- if (CS.doesNotAccessMemory())
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(const CallBase *Call) {
+ if (Call->doesNotAccessMemory())
// Can't do better than this.
return FMRB_DoesNotAccessMemory;
@@ -649,23 +681,23 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
// If the callsite knows it only reads memory, don't return worse
// than that.
- if (CS.onlyReadsMemory())
+ if (Call->onlyReadsMemory())
Min = FMRB_OnlyReadsMemory;
- else if (CS.doesNotReadMemory())
+ else if (Call->doesNotReadMemory())
Min = FMRB_DoesNotReadMemory;
- if (CS.onlyAccessesArgMemory())
+ if (Call->onlyAccessesArgMemory())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
- else if (CS.onlyAccessesInaccessibleMemory())
+ else if (Call->onlyAccessesInaccessibleMemory())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem);
- else if (CS.onlyAccessesInaccessibleMemOrArgMem())
+ else if (Call->onlyAccessesInaccessibleMemOrArgMem())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem);
- // If CS has operand bundles then aliasing attributes from the function it
- // calls do not directly apply to the CallSite. This can be made more
- // precise in the future.
- if (!CS.hasOperandBundles())
- if (const Function *F = CS.getCalledFunction())
+ // If the call has operand bundles then aliasing attributes from the function
+ // it calls do not directly apply to the call. This can be made more precise
+ // in the future.
+ if (!Call->hasOperandBundles())
+ if (const Function *F = Call->getCalledFunction())
Min =
FunctionModRefBehavior(Min & getBestAAResults().getModRefBehavior(F));
@@ -698,9 +730,9 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
}
/// Returns true if this is a writeonly (i.e Mod only) parameter.
-static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
+static bool isWriteOnlyParam(const CallBase *Call, unsigned ArgIdx,
const TargetLibraryInfo &TLI) {
- if (CS.paramHasAttr(ArgIdx, Attribute::WriteOnly))
+ if (Call->paramHasAttr(ArgIdx, Attribute::WriteOnly))
return true;
// We can bound the aliasing properties of memset_pattern16 just as we can
@@ -710,7 +742,8 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
// FIXME Consider handling this in InferFunctionAttr.cpp together with other
// attributes.
LibFunc F;
- if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
+ if (Call->getCalledFunction() &&
+ TLI.getLibFunc(*Call->getCalledFunction(), F) &&
F == LibFunc_memset_pattern16 && TLI.has(F))
if (ArgIdx == 0)
return true;
@@ -722,23 +755,23 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
return false;
}
-ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+ModRefInfo BasicAAResult::getArgModRefInfo(const CallBase *Call,
unsigned ArgIdx) {
// Checking for known builtin intrinsics and target library functions.
- if (isWriteOnlyParam(CS, ArgIdx, TLI))
+ if (isWriteOnlyParam(Call, ArgIdx, TLI))
return ModRefInfo::Mod;
- if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly))
+ if (Call->paramHasAttr(ArgIdx, Attribute::ReadOnly))
return ModRefInfo::Ref;
- if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone))
+ if (Call->paramHasAttr(ArgIdx, Attribute::ReadNone))
return ModRefInfo::NoModRef;
- return AAResultBase::getArgModRefInfo(CS, ArgIdx);
+ return AAResultBase::getArgModRefInfo(Call, ArgIdx);
}
-static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) {
- const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+static bool isIntrinsicCall(const CallBase *Call, Intrinsic::ID IID) {
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call);
return II && II->getIntrinsicID() == IID;
}
@@ -794,9 +827,9 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
/// Since we only look at local properties of this function, we really can't
/// say much about this query. We do, however, use simple "address taken"
/// analysis on local objects.
-ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
- assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+ assert(notDifferentParent(Call, Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
@@ -807,15 +840,21 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// contents of the alloca into argument registers or stack slots, so there is
// no lifetime issue.
if (isa<AllocaInst>(Object))
- if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+ if (const CallInst *CI = dyn_cast<CallInst>(Call))
if (CI->isTailCall() &&
!CI->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return ModRefInfo::NoModRef;
+ // Stack restore is able to modify unescaped dynamic allocas. Assume it may
+ // modify them even though the alloca is not escaped.
+ if (auto *AI = dyn_cast<AllocaInst>(Object))
+ if (!AI->isStaticAlloca() && isIntrinsicCall(Call, Intrinsic::stackrestore))
+ return ModRefInfo::Mod;
+
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
- if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+ if (!isa<Constant>(Object) && Call != Object &&
isNonEscapingLocalObject(Object)) {
// Optimistically assume that call doesn't touch Object and check this
@@ -824,19 +863,20 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
bool IsMustAlias = true;
unsigned OperandNo = 0;
- for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
+ for (auto CI = Call->data_operands_begin(), CE = Call->data_operands_end();
CI != CE; ++CI, ++OperandNo) {
// Only look at the no-capture or byval pointer arguments. If this
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!CS.doesNotCapture(OperandNo) &&
- OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo)))
+ (!Call->doesNotCapture(OperandNo) &&
+ OperandNo < Call->getNumArgOperands() &&
+ !Call->isByValArgument(OperandNo)))
continue;
// Call doesn't access memory through this operand, so we don't care
// if it aliases with Object.
- if (CS.doesNotAccessMemory(OperandNo))
+ if (Call->doesNotAccessMemory(OperandNo))
continue;
// If this is a no-capture pointer argument, see if we can tell that it
@@ -850,12 +890,12 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
continue;
// Operand aliases 'Object', but call doesn't modify it. Strengthen
// initial assumption and keep looking in case if there are more aliases.
- if (CS.onlyReadsMemory(OperandNo)) {
+ if (Call->onlyReadsMemory(OperandNo)) {
Result = setRef(Result);
continue;
}
// Operand aliases 'Object' but call only writes into it.
- if (CS.doesNotReadMemory(OperandNo)) {
+ if (Call->doesNotReadMemory(OperandNo)) {
Result = setMod(Result);
continue;
}
@@ -879,17 +919,16 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
}
}
- // If the CallSite is to malloc or calloc, we can assume that it doesn't
+ // If the call is to malloc or calloc, we can assume that it doesn't
// modify any IR visible value. This is only valid because we assume these
// routines do not read values visible in the IR. TODO: Consider special
// casing realloc and strdup routines which access only their arguments as
// well. Or alternatively, replace all of this with inaccessiblememonly once
// that's implemented fully.
- auto *Inst = CS.getInstruction();
- if (isMallocOrCallocLikeFn(Inst, &TLI)) {
+ if (isMallocOrCallocLikeFn(Call, &TLI)) {
// Be conservative if the accessed pointer may alias the allocation -
// fallback to the generic handling below.
- if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias)
+ if (getBestAAResults().alias(MemoryLocation(Call), Loc) == NoAlias)
return ModRefInfo::NoModRef;
}
@@ -897,7 +936,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// operands, i.e., source and destination of any given memcpy must no-alias.
// If Loc must-aliases either one of these two locations, then it necessarily
// no-aliases the other.
- if (auto *Inst = dyn_cast<AnyMemCpyInst>(CS.getInstruction())) {
+ if (auto *Inst = dyn_cast<AnyMemCpyInst>(Call)) {
AliasResult SrcAA, DestAA;
if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
@@ -921,7 +960,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
- if (isIntrinsicCall(CS, Intrinsic::assume))
+ if (isIntrinsicCall(Call, Intrinsic::assume))
return ModRefInfo::NoModRef;
// Like assumes, guard intrinsics are also marked as arbitrarily writing so
@@ -931,7 +970,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// *Unlike* assumes, guard intrinsics are modeled as reading memory since the
// heap state at the point the guard is issued needs to be consistent in case
// the guard invokes the "deopt" continuation.
- if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
+ if (isIntrinsicCall(Call, Intrinsic::experimental_guard))
return ModRefInfo::Ref;
// Like assumes, invariant.start intrinsics were also marked as arbitrarily
@@ -957,20 +996,20 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// The transformation will cause the second store to be ignored (based on
// rules of invariant.start) and print 40, while the first program always
// prints 50.
- if (isIntrinsicCall(CS, Intrinsic::invariant_start))
+ if (isIntrinsicCall(Call, Intrinsic::invariant_start))
return ModRefInfo::Ref;
// The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
}
-ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) {
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
- if (isIntrinsicCall(CS1, Intrinsic::assume) ||
- isIntrinsicCall(CS2, Intrinsic::assume))
+ if (isIntrinsicCall(Call1, Intrinsic::assume) ||
+ isIntrinsicCall(Call2, Intrinsic::assume))
return ModRefInfo::NoModRef;
// Like assumes, guard intrinsics are also marked as arbitrarily writing so
@@ -984,26 +1023,26 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
// NB! This function is *not* commutative, so we specical case two
// possibilities for guard intrinsics.
- if (isIntrinsicCall(CS1, Intrinsic::experimental_guard))
- return isModSet(createModRefInfo(getModRefBehavior(CS2)))
+ if (isIntrinsicCall(Call1, Intrinsic::experimental_guard))
+ return isModSet(createModRefInfo(getModRefBehavior(Call2)))
? ModRefInfo::Ref
: ModRefInfo::NoModRef;
- if (isIntrinsicCall(CS2, Intrinsic::experimental_guard))
- return isModSet(createModRefInfo(getModRefBehavior(CS1)))
+ if (isIntrinsicCall(Call2, Intrinsic::experimental_guard))
+ return isModSet(createModRefInfo(getModRefBehavior(Call1)))
? ModRefInfo::Mod
: ModRefInfo::NoModRef;
// The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
}
/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
/// both having the exact same pointer operand.
static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
- LocationSize V1Size,
+ LocationSize MaybeV1Size,
const GEPOperator *GEP2,
- LocationSize V2Size,
+ LocationSize MaybeV2Size,
const DataLayout &DL) {
assert(GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
@@ -1019,10 +1058,13 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// If we don't know the size of the accesses through both GEPs, we can't
// determine whether the struct fields accessed can't alias.
- if (V1Size == MemoryLocation::UnknownSize ||
- V2Size == MemoryLocation::UnknownSize)
+ if (MaybeV1Size == LocationSize::unknown() ||
+ MaybeV2Size == LocationSize::unknown())
return MayAlias;
+ const uint64_t V1Size = MaybeV1Size.getValue();
+ const uint64_t V2Size = MaybeV2Size.getValue();
+
ConstantInt *C1 =
dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
ConstantInt *C2 =
@@ -1030,8 +1072,12 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// If the last (struct) indices are constants and are equal, the other indices
// might be also be dynamically equal, so the GEPs can alias.
- if (C1 && C2 && C1->getSExtValue() == C2->getSExtValue())
- return MayAlias;
+ if (C1 && C2) {
+ unsigned BitWidth = std::max(C1->getBitWidth(), C2->getBitWidth());
+ if (C1->getValue().sextOrSelf(BitWidth) ==
+ C2->getValue().sextOrSelf(BitWidth))
+ return MayAlias;
+ }
// Find the last-indexed type of the GEP, i.e., the type you'd get if
// you stripped the last index.
@@ -1114,6 +1160,10 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
return MayAlias;
}
+ if (C1->getValue().getActiveBits() > 64 ||
+ C2->getValue().getActiveBits() > 64)
+ return MayAlias;
+
// We know that:
// - both GEPs begin indexing from the exact same pointer;
// - the last indices in both GEPs are constants, indexing into a struct;
@@ -1179,11 +1229,13 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// than (%alloca - 1), and so is not inbounds, a contradiction.
bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
- LocationSize ObjectAccessSize) {
+ LocationSize MaybeObjectAccessSize) {
// If the object access size is unknown, or the GEP isn't inbounds, bail.
- if (ObjectAccessSize == MemoryLocation::UnknownSize || !GEPOp->isInBounds())
+ if (MaybeObjectAccessSize == LocationSize::unknown() || !GEPOp->isInBounds())
return false;
+ const uint64_t ObjectAccessSize = MaybeObjectAccessSize.getValue();
+
// We need the object to be an alloca or a globalvariable, and want to know
// the offset of the pointer from the object precisely, so no variable
// indices are allowed.
@@ -1192,8 +1244,8 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
!DecompObject.VarIndices.empty())
return false;
- int64_t ObjectBaseOffset = DecompObject.StructOffset +
- DecompObject.OtherOffset;
+ APInt ObjectBaseOffset = DecompObject.StructOffset +
+ DecompObject.OtherOffset;
// If the GEP has no variable indices, we know the precise offset
// from the base, then use it. If the GEP has variable indices,
@@ -1201,10 +1253,11 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
// false in that case.
if (!DecompGEP.VarIndices.empty())
return false;
- int64_t GEPBaseOffset = DecompGEP.StructOffset;
+
+ APInt GEPBaseOffset = DecompGEP.StructOffset;
GEPBaseOffset += DecompGEP.OtherOffset;
- return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize);
+ return GEPBaseOffset.sge(ObjectBaseOffset + (int64_t)ObjectAccessSize);
}
/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
@@ -1219,13 +1272,17 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
const Value *UnderlyingV1, const Value *UnderlyingV2) {
DecomposedGEP DecompGEP1, DecompGEP2;
+ unsigned MaxPointerSize = getMaxPointerSize(DL);
+ DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0);
+ DecompGEP2.StructOffset = DecompGEP2.OtherOffset = APInt(MaxPointerSize, 0);
+
bool GEP1MaxLookupReached =
DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT);
bool GEP2MaxLookupReached =
DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT);
- int64_t GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
- int64_t GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
+ APInt GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
+ APInt GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 &&
"DecomposeGEPExpression returned a result different from "
@@ -1248,8 +1305,8 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
return NoAlias;
// Do the base pointers alias?
AliasResult BaseAlias =
- aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AAMDNodes(),
- UnderlyingV2, MemoryLocation::UnknownSize, AAMDNodes());
+ aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(),
+ UnderlyingV2, LocationSize::unknown(), AAMDNodes());
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
@@ -1308,13 +1365,12 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// pointer, we know they cannot alias.
// If both accesses are unknown size, we can't do anything useful here.
- if (V1Size == MemoryLocation::UnknownSize &&
- V2Size == MemoryLocation::UnknownSize)
+ if (V1Size == LocationSize::unknown() && V2Size == LocationSize::unknown())
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
- AAMDNodes(), V2, MemoryLocation::UnknownSize,
- V2AAInfo, nullptr, UnderlyingV2);
+ AliasResult R =
+ aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(), V2,
+ LocationSize::unknown(), V2AAInfo, nullptr, UnderlyingV2);
if (R != MustAlias) {
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -1344,9 +1400,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// that the objects are partially overlapping. If the difference is
// greater, we know they do not overlap.
if (GEP1BaseOffset != 0 && DecompGEP1.VarIndices.empty()) {
- if (GEP1BaseOffset >= 0) {
- if (V2Size != MemoryLocation::UnknownSize) {
- if ((uint64_t)GEP1BaseOffset < V2Size)
+ if (GEP1BaseOffset.sge(0)) {
+ if (V2Size != LocationSize::unknown()) {
+ if (GEP1BaseOffset.ult(V2Size.getValue()))
return PartialAlias;
return NoAlias;
}
@@ -1359,9 +1415,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// GEP1 V2
// We need to know that V2Size is not unknown, otherwise we might have
// stripped a gep with negative index ('gep <ptr>, -1, ...).
- if (V1Size != MemoryLocation::UnknownSize &&
- V2Size != MemoryLocation::UnknownSize) {
- if (-(uint64_t)GEP1BaseOffset < V1Size)
+ if (V1Size != LocationSize::unknown() &&
+ V2Size != LocationSize::unknown()) {
+ if ((-GEP1BaseOffset).ult(V1Size.getValue()))
return PartialAlias;
return NoAlias;
}
@@ -1369,7 +1425,7 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
}
if (!DecompGEP1.VarIndices.empty()) {
- uint64_t Modulo = 0;
+ APInt Modulo(MaxPointerSize, 0);
bool AllPositive = true;
for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
@@ -1377,7 +1433,7 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// Grab the least significant bit set in any of the scales. We
// don't need std::abs here (even if the scale's negative) as we'll
// be ^'ing Modulo with itself later.
- Modulo |= (uint64_t)DecompGEP1.VarIndices[i].Scale;
+ Modulo |= DecompGEP1.VarIndices[i].Scale;
if (AllPositive) {
// If the Value could change between cycles, then any reasoning about
@@ -1398,9 +1454,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// If the variable begins with a zero then we know it's
// positive, regardless of whether the value is signed or
// unsigned.
- int64_t Scale = DecompGEP1.VarIndices[i].Scale;
+ APInt Scale = DecompGEP1.VarIndices[i].Scale;
AllPositive =
- (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
+ (SignKnownZero && Scale.sge(0)) || (SignKnownOne && Scale.slt(0));
}
}
@@ -1409,16 +1465,18 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// We can compute the difference between the two addresses
// mod Modulo. Check whether that difference guarantees that the
// two locations do not alias.
- uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
- if (V1Size != MemoryLocation::UnknownSize &&
- V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
- V1Size <= Modulo - ModOffset)
+ APInt ModOffset = GEP1BaseOffset & (Modulo - 1);
+ if (V1Size != LocationSize::unknown() &&
+ V2Size != LocationSize::unknown() && ModOffset.uge(V2Size.getValue()) &&
+ (Modulo - ModOffset).uge(V1Size.getValue()))
return NoAlias;
// If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
// If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
// don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
- if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset)
+ if (AllPositive && GEP1BaseOffset.sgt(0) &&
+ V2Size != LocationSize::unknown() &&
+ GEP1BaseOffset.uge(V2Size.getValue()))
return NoAlias;
if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
@@ -1598,7 +1656,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
// unknown to represent all the possible values the GEP could advance the
// pointer to.
if (isRecursive)
- PNSize = MemoryLocation::UnknownSize;
+ PNSize = LocationSize::unknown();
AliasResult Alias =
aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
@@ -1632,7 +1690,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
const Value *O1, const Value *O2) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
- if (V1Size == 0 || V2Size == 0)
+ if (V1Size.isZero() || V2Size.isZero())
return NoAlias;
// Strip off any casts if they exist.
@@ -1706,10 +1764,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
bool NullIsValidLocation = NullPointerIsDefined(&F);
- if ((V1Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O2, V1Size, DL, TLI, NullIsValidLocation)) ||
- (V2Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O1, V2Size, DL, TLI, NullIsValidLocation)))
+ if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI,
+ NullIsValidLocation)) ||
+ (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI,
+ NullIsValidLocation)))
return NoAlias;
// Check the cache before climbing up use-def chains. This also terminates
@@ -1767,10 +1825,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// If both pointers are pointing into the same object and one of them
// accesses the entire object, then the accesses must overlap in some way.
if (O1 == O2)
- if (V1Size != MemoryLocation::UnknownSize &&
- V2Size != MemoryLocation::UnknownSize &&
- (isObjectSize(O1, V1Size, DL, TLI, NullIsValidLocation) ||
- isObjectSize(O2, V2Size, DL, TLI, NullIsValidLocation)))
+ if (V1Size.isPrecise() && V2Size.isPrecise() &&
+ (isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) ||
+ isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation)))
return AliasCache[Locs] = PartialAlias;
// Recurse back into the best AA results we have, potentially with refined
@@ -1825,7 +1882,7 @@ void BasicAAResult::GetIndexDifference(
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
const Value *V = Src[i].V;
unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
- int64_t Scale = Src[i].Scale;
+ APInt Scale = Src[i].Scale;
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
@@ -1845,7 +1902,7 @@ void BasicAAResult::GetIndexDifference(
}
// If we didn't consume this entry, add it to the end of the Dest list.
- if (Scale) {
+ if (!!Scale) {
VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
Dest.push_back(Entry);
}
@@ -1853,13 +1910,16 @@ void BasicAAResult::GetIndexDifference(
}
bool BasicAAResult::constantOffsetHeuristic(
- const SmallVectorImpl<VariableGEPIndex> &VarIndices, LocationSize V1Size,
- LocationSize V2Size, int64_t BaseOffset, AssumptionCache *AC,
- DominatorTree *DT) {
- if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize ||
- V2Size == MemoryLocation::UnknownSize)
+ const SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ LocationSize MaybeV1Size, LocationSize MaybeV2Size, APInt BaseOffset,
+ AssumptionCache *AC, DominatorTree *DT) {
+ if (VarIndices.size() != 2 || MaybeV1Size == LocationSize::unknown() ||
+ MaybeV2Size == LocationSize::unknown())
return false;
+ const uint64_t V1Size = MaybeV1Size.getValue();
+ const uint64_t V2Size = MaybeV2Size.getValue();
+
const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
@@ -1896,14 +1956,15 @@ bool BasicAAResult::constantOffsetHeuristic(
// the minimum distance between %i and %i + 5 is 3.
APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff;
MinDiff = APIntOps::umin(MinDiff, Wrapped);
- uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale);
+ APInt MinDiffBytes =
+ MinDiff.zextOrTrunc(Var0.Scale.getBitWidth()) * Var0.Scale.abs();
// We can't definitely say whether GEP1 is before or after V2 due to wrapping
// arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
// values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
// V2Size can fit in the MinDiffBytes gap.
- return V1Size + std::abs(BaseOffset) <= MinDiffBytes &&
- V2Size + std::abs(BaseOffset) <= MinDiffBytes;
+ return MinDiffBytes.uge(V1Size + BaseOffset.abs()) &&
+ MinDiffBytes.uge(V2Size + BaseOffset.abs());
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 41c295895213..ef27c36517ea 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -252,8 +252,8 @@ void BlockFrequencyInfo::setBlockFreqAndScale(
/// Pop up a ghostview window with the current block frequency propagation
/// rendered using dot.
-void BlockFrequencyInfo::view() const {
- ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
+void BlockFrequencyInfo::view(StringRef title) const {
+ ViewGraph(const_cast<BlockFrequencyInfo *>(this), title);
}
const Function *BlockFrequencyInfo::getFunction() const {
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 3d095068e7ff..08ebcc47a807 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -156,9 +156,9 @@ static void combineWeight(Weight &W, const Weight &OtherW) {
static void combineWeightsBySorting(WeightList &Weights) {
// Sort so edges to the same node are adjacent.
- llvm::sort(Weights.begin(), Weights.end(),
- [](const Weight &L,
- const Weight &R) { return L.TargetNode < R.TargetNode; });
+ llvm::sort(Weights, [](const Weight &L, const Weight &R) {
+ return L.TargetNode < R.TargetNode;
+ });
// Combine adjacent edges.
WeightList::iterator O = Weights.begin();
@@ -573,7 +573,9 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
APInt BlockFreq(128, Freq);
APInt EntryFreq(128, getEntryFreq());
BlockCount *= BlockFreq;
- BlockCount = BlockCount.udiv(EntryFreq);
+ // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
+ // lshr by 1 gives EntryFreq/2.
+ BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
return BlockCount.getLimitedValue();
}
@@ -705,7 +707,7 @@ static void findIrreducibleHeaders(
"Expected irreducible CFG; -loop-info is likely invalid");
if (Headers.size() == InSCC.size()) {
// Every block is a header.
- llvm::sort(Headers.begin(), Headers.end());
+ llvm::sort(Headers);
return;
}
@@ -740,8 +742,8 @@ static void findIrreducibleHeaders(
Others.push_back(Irr.Node);
LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
}
- llvm::sort(Headers.begin(), Headers.end());
- llvm::sort(Others.begin(), Others.end());
+ llvm::sort(Headers);
+ llvm::sort(Others);
}
static void createIrreducibleLoop(
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 54a657073f0f..7f544b27fe9d 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -135,7 +135,7 @@ static const uint32_t IH_NONTAKEN_WEIGHT = 1;
/// Add \p BB to PostDominatedByUnreachable set if applicable.
void
BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0) {
if (isa<UnreachableInst>(TI) ||
// If this block is terminated by a call to
@@ -167,7 +167,7 @@ BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
void
BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
assert(!PostDominatedByColdCall.count(BB));
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0)
return;
@@ -202,7 +202,7 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
/// Predict that a successor which leads necessarily to an
/// unreachable-terminated block as extremely unlikely.
bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
(void) TI;
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
assert(!isa<InvokeInst>(TI) &&
@@ -246,7 +246,7 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
// heuristic. The probability of the edge coming to unreachable block is
// set to min of metadata and unreachable heuristic.
bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI)))
return false;
@@ -348,7 +348,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
/// Return true if we could compute the weights for cold edges.
/// Return false, otherwise.
bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
(void) TI;
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
assert(!isa<InvokeInst>(TI) &&
diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp
index a319be8092f9..aa880a62b754 100644
--- a/contrib/llvm/lib/Analysis/CFG.cpp
+++ b/contrib/llvm/lib/Analysis/CFG.cpp
@@ -71,7 +71,7 @@ void llvm::FindFunctionBackedges(const Function &F,
/// successor.
unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
const BasicBlock *Succ) {
- const TerminatorInst *Term = BB->getTerminator();
+ const Instruction *Term = BB->getTerminator();
#ifndef NDEBUG
unsigned e = Term->getNumSuccessors();
#endif
@@ -85,8 +85,9 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
/// isCriticalEdge - Return true if the specified edge is a critical edge.
/// Critical edges are edges from a block with multiple successors to a block
/// with multiple predecessors.
-bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges) {
+ assert(TI->isTerminator() && "Must be a terminator to have successors!");
assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
if (TI->getNumSuccessors() == 1) return false;
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
index 5b170dfa7903..6d01e9d5d447 100644
--- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -7,9 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a '-dot-cfg' analysis pass, which emits the
-// cfg.<fnname>.dot file for each function in the program, with a graph of the
-// CFG for that function.
+// This file defines a `-dot-cfg` analysis pass, which emits the
+// `<prefix>.<fnname>.dot` file for each function in the program, with a graph
+// of the CFG for that function. The default value for `<prefix>` is `cfg` but
+// can be customized as needed.
//
// The other main feature of this file is that it implements the
// Function::viewCFG method, which is useful for debugging passes which operate
@@ -27,6 +28,10 @@ static cl::opt<std::string> CFGFuncName(
cl::desc("The name of a function (or its substring)"
" whose CFG is viewed/printed."));
+static cl::opt<std::string> CFGDotFilenamePrefix(
+ "cfg-dot-filename-prefix", cl::Hidden,
+ cl::desc("The prefix used for the CFG dot file names."));
+
namespace {
struct CFGViewerLegacyPass : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
@@ -90,7 +95,8 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
static void writeCFGToDotFile(Function &F, bool CFGOnly = false) {
if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
return;
- std::string Filename = ("cfg." + F.getName() + ".dot").str();
+ std::string Filename =
+ (CFGDotFilenamePrefix + "." + F.getName() + ".dot").str();
errs() << "Writing '" << Filename << "'...";
std::error_code EC;
diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 194983418b08..1c61dd369a05 100644
--- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -395,7 +395,7 @@ populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
}
// Sort AliasList for faster lookup
- llvm::sort(AliasList.begin(), AliasList.end());
+ llvm::sort(AliasList);
}
}
@@ -479,7 +479,7 @@ static void populateExternalRelations(
}
// Remove duplicates in ExtRelations
- llvm::sort(ExtRelations.begin(), ExtRelations.end());
+ llvm::sort(ExtRelations);
ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
ExtRelations.end());
}
@@ -515,10 +515,9 @@ CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
return None;
}
-bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
- LocationSize LHSSize,
- const Value *RHS,
- LocationSize RHSSize) const {
+bool CFLAndersAAResult::FunctionInfo::mayAlias(
+ const Value *LHS, LocationSize MaybeLHSSize, const Value *RHS,
+ LocationSize MaybeRHSSize) const {
assert(LHS && RHS);
// Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
@@ -557,11 +556,14 @@ bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
OffsetValue{RHS, 0}, Comparator);
if (RangePair.first != RangePair.second) {
- // Be conservative about UnknownSize
- if (LHSSize == MemoryLocation::UnknownSize ||
- RHSSize == MemoryLocation::UnknownSize)
+ // Be conservative about unknown sizes
+ if (MaybeLHSSize == LocationSize::unknown() ||
+ MaybeRHSSize == LocationSize::unknown())
return true;
+ const uint64_t LHSSize = MaybeLHSSize.getValue();
+ const uint64_t RHSSize = MaybeRHSSize.getValue();
+
for (const auto &OVal : make_range(RangePair)) {
// Be conservative about UnknownOffset
if (OVal.Offset == UnknownOffset)
diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h
index 86812009da7c..12121d717433 100644
--- a/contrib/llvm/lib/Analysis/CFLGraph.h
+++ b/contrib/llvm/lib/Analysis/CFLGraph.h
@@ -594,7 +594,7 @@ template <typename CFLAA> class CFLGraphBuilder {
// Determines whether or not we an instruction is useless to us (e.g.
// FenceInst)
static bool hasUsefulEdges(Instruction *Inst) {
- bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
+ bool IsNonInvokeRetTerminator = Inst->isTerminator() &&
!isa<InvokeInst>(Inst) &&
!isa<ReturnInst>(Inst);
return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
index b325afb8e7c5..fd2292ced017 100644
--- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -54,6 +54,11 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM,
LazyCallGraph &G, CGSCCUpdateResult &UR) {
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI =
+ AM.getResult<PassInstrumentationAnalysis>(InitialC, G);
+
PreservedAnalyses PA = PreservedAnalyses::all();
if (DebugLogging)
@@ -67,8 +72,18 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
if (DebugLogging)
dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n";
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns false).
+ if (!PI.runBeforePass(*Pass, *C))
+ continue;
+
PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR);
+ if (UR.InvalidatedSCCs.count(C))
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass);
+ else
+ PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C);
+
// Update the SCC if necessary.
C = UR.UpdatedC ? UR.UpdatedC : C;
diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
index cbdf5f63c557..0da678e1611b 100644
--- a/contrib/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -97,8 +97,7 @@ void CallGraph::print(raw_ostream &OS) const {
for (const auto &I : *this)
Nodes.push_back(I.second.get());
- llvm::sort(Nodes.begin(), Nodes.end(),
- [](CallGraphNode *LHS, CallGraphNode *RHS) {
+ llvm::sort(Nodes, [](CallGraphNode *LHS, CallGraphNode *RHS) {
if (Function *LF = LHS->getFunction())
if (Function *RF = RHS->getFunction())
return LF->getName() < RF->getName();
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 4c33c420b65d..0aed57a39387 100644
--- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -22,11 +22,13 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OptBisect.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -123,24 +125,34 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
Module &M = CG.getModule();
if (!PM) {
- CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+ CallGraphSCCPass *CGSP = (CallGraphSCCPass *)P;
if (!CallGraphUpToDate) {
DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
CallGraphUpToDate = true;
}
{
- unsigned InstrCount = 0;
+ unsigned InstrCount, SCCCount = 0;
+ StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
TimeRegion PassTimer(getPassTimer(CGSP));
if (EmitICRemark)
- InstrCount = initSizeRemarkInfo(M);
+ InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
Changed = CGSP->runOnSCC(CurSCC);
- // If the pass modified the module, it may have modified the instruction
- // count of the module. Try emitting a remark.
- if (EmitICRemark)
- emitInstrCountChangedRemark(P, M, InstrCount);
+ if (EmitICRemark) {
+ // FIXME: Add getInstructionCount to CallGraphSCC.
+ SCCCount = M.getInstructionCount();
+ // Is there a difference in the number of instructions in the module?
+ if (SCCCount != InstrCount) {
+ // Yep. Emit a remark and update InstrCount.
+ int64_t Delta =
+ static_cast<int64_t>(SCCCount) - static_cast<int64_t>(InstrCount);
+ emitInstrCountChangedRemark(P, M, Delta, InstrCount,
+ FunctionToInstrCount);
+ InstrCount = SCCCount;
+ }
+ }
}
// After the CGSCCPass is done, when assertions are enabled, use
@@ -621,23 +633,40 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override {
bool BannerPrinted = false;
- auto PrintBannerOnce = [&] () {
+ auto PrintBannerOnce = [&]() {
if (BannerPrinted)
return;
OS << Banner;
BannerPrinted = true;
- };
+ };
+
+ bool NeedModule = llvm::forcePrintModuleIR();
+ if (isFunctionInPrintList("*") && NeedModule) {
+ PrintBannerOnce();
+ OS << "\n";
+ SCC.getCallGraph().getModule().print(OS, nullptr);
+ return false;
+ }
+ bool FoundFunction = false;
for (CallGraphNode *CGN : SCC) {
if (Function *F = CGN->getFunction()) {
if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) {
- PrintBannerOnce();
- F->print(OS);
+ FoundFunction = true;
+ if (!NeedModule) {
+ PrintBannerOnce();
+ F->print(OS);
+ }
}
} else if (isFunctionInPrintList("*")) {
PrintBannerOnce();
OS << "\nPrinting <null> Function\n";
}
}
+ if (NeedModule && FoundFunction) {
+ PrintBannerOnce();
+ OS << "\n";
+ SCC.getCallGraph().getModule().print(OS, nullptr);
+ }
return false;
}
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index d4f73bdb4361..669f4f2835fa 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -23,7 +23,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -158,7 +157,8 @@ namespace {
/// storing the value (or part of it) into memory anywhere automatically
/// counts as capturing it or not.
bool llvm::PointerMayBeCaptured(const Value *V,
- bool ReturnCaptures, bool StoreCaptures) {
+ bool ReturnCaptures, bool StoreCaptures,
+ unsigned MaxUsesToExplore) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
@@ -169,7 +169,7 @@ bool llvm::PointerMayBeCaptured(const Value *V,
(void)StoreCaptures;
SimpleCaptureTracker SCT(ReturnCaptures);
- PointerMayBeCaptured(V, &SCT);
+ PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
return SCT.Captured;
}
@@ -186,13 +186,15 @@ bool llvm::PointerMayBeCaptured(const Value *V,
bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
const DominatorTree *DT, bool IncludeI,
- OrderedBasicBlock *OBB) {
+ OrderedBasicBlock *OBB,
+ unsigned MaxUsesToExplore) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
bool UseNewOBB = OBB == nullptr;
if (!DT)
- return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures);
+ return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures,
+ MaxUsesToExplore);
if (UseNewOBB)
OBB = new OrderedBasicBlock(I->getParent());
@@ -200,29 +202,25 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
// with StoreCaptures.
CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB);
- PointerMayBeCaptured(V, &CB);
+ PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
if (UseNewOBB)
delete OBB;
return CB.Captured;
}
-/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
-/// a cache. Then we can move the code from BasicAliasAnalysis into
-/// that path, and remove this threshold.
-static int const Threshold = 20;
-
-void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
+void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
+ unsigned MaxUsesToExplore) {
assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
- SmallVector<const Use *, Threshold> Worklist;
- SmallSet<const Use *, Threshold> Visited;
+ SmallVector<const Use *, DefaultMaxUsesToExplore> Worklist;
+ SmallSet<const Use *, DefaultMaxUsesToExplore> Visited;
auto AddUses = [&](const Value *V) {
- int Count = 0;
+ unsigned Count = 0;
for (const Use &U : V->uses()) {
// If there are lots of uses, conservatively say that the value
// is captured to avoid taking too much compile time.
- if (Count++ >= Threshold)
+ if (Count++ >= MaxUsesToExplore)
return Tracker->tooManyUses();
if (!Visited.insert(&U).second)
continue;
@@ -241,11 +239,12 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
switch (I->getOpcode()) {
case Instruction::Call:
case Instruction::Invoke: {
- CallSite CS(I);
+ auto *Call = cast<CallBase>(I);
// Not captured if the callee is readonly, doesn't return a copy through
// its return value and doesn't unwind (a readonly function can leak bits
// by throwing an exception or not depending on the input value).
- if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+ if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
+ Call->getType()->isVoidTy())
break;
// The pointer is not captured if returned pointer is not captured.
@@ -253,14 +252,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
// marked with nocapture do not capture. This means that places like
// GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
// in BasicAA also need to know about this property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS)) {
- AddUses(I);
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) {
+ AddUses(Call);
break;
}
// Volatile operations effectively capture the memory location that they
// load and store to.
- if (auto *MI = dyn_cast<MemIntrinsic>(I))
+ if (auto *MI = dyn_cast<MemIntrinsic>(Call))
if (MI->isVolatile())
if (Tracker->captured(U))
return;
@@ -272,13 +271,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
// that loading a value from a pointer does not cause the pointer to be
// captured, even though the loaded value might be the pointer itself
// (think of self-referential objects).
- CallSite::data_operand_iterator B =
- CS.data_operands_begin(), E = CS.data_operands_end();
- for (CallSite::data_operand_iterator A = B; A != E; ++A)
- if (A->get() == V && !CS.doesNotCapture(A - B))
+ for (auto IdxOpPair : enumerate(Call->data_ops())) {
+ int Idx = IdxOpPair.index();
+ Value *A = IdxOpPair.value();
+ if (A == V && !Call->doesNotCapture(Idx))
// The parameter is not marked 'nocapture' - captured.
if (Tracker->captured(U))
return;
+ }
break;
}
case Instruction::Load:
diff --git a/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp b/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp
index 159c1a2d135a..27071babec5c 100644
--- a/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -40,28 +40,28 @@ unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
}
}
-Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
- CmpInst::Predicate &NewICmpPred) {
+Constant *llvm::getPredForICmpCode(unsigned Code, bool Sign, Type *OpTy,
+ CmpInst::Predicate &Pred) {
switch (Code) {
default: llvm_unreachable("Illegal ICmp code!");
case 0: // False.
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
- case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
- case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
- case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
- case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
- case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
- case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 0);
+ case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: Pred = ICmpInst::ICMP_EQ; break;
+ case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: Pred = ICmpInst::ICMP_NE; break;
+ case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
case 7: // True.
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 1);
}
return nullptr;
}
-bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
- return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
- (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
- (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+bool llvm::predicatesFoldable(ICmpInst::Predicate P1, ICmpInst::Predicate P2) {
+ return (CmpInst::isSigned(P1) == CmpInst::isSigned(P2)) ||
+ (CmpInst::isSigned(P1) && ICmpInst::isEquality(P2)) ||
+ (CmpInst::isSigned(P2) && ICmpInst::isEquality(P1));
}
bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index c5281c57bc19..5da29d6d2372 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -347,9 +347,20 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
// We're simulating a load through a pointer that was bitcast to point to
// a different type, so we can try to walk down through the initial
- // elements of an aggregate to see if some part of th e aggregate is
+ // elements of an aggregate to see if some part of the aggregate is
// castable to implement the "load" semantic model.
- C = C->getAggregateElement(0u);
+ if (SrcTy->isStructTy()) {
+ // Struct types might have leading zero-length elements like [0 x i32],
+ // which are certainly not what we are looking for, so skip them.
+ unsigned Elem = 0;
+ Constant *ElemC;
+ do {
+ ElemC = C->getAggregateElement(Elem++);
+ } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()) == 0);
+ C = ElemC;
+ } else {
+ C = C->getAggregateElement(0u);
+ }
} while (C);
return nullptr;
@@ -960,10 +971,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
NewIdxs.size() > *LastIRIndex) {
InRangeIndex = LastIRIndex;
for (unsigned I = 0; I <= *LastIRIndex; ++I)
- if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) {
- InRangeIndex = None;
- break;
- }
+ if (NewIdxs[I] != InnermostGEP->getOperand(I + 1))
+ return nullptr;
}
// Create a GEP.
@@ -985,11 +994,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
/// returned, if not, null is returned. Note that this function can fail when
/// attempting to fold instructions like loads and stores, which have no
/// constant expression form.
-///
-/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/inrange
-/// etc information, due to only being passed an opcode and operands. Constant
-/// folding using this function strips this information.
-///
Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
@@ -1370,6 +1374,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::fabs:
case Intrinsic::minnum:
case Intrinsic::maxnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum:
case Intrinsic::log:
case Intrinsic::log2:
case Intrinsic::log10:
@@ -1389,6 +1395,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz:
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::copysign:
@@ -1402,6 +1410,10 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::usub_with_overflow:
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::usub_sat:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
case Intrinsic::bitreverse:
@@ -1413,6 +1425,23 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::x86_sse2_cvtsd2si64:
case Intrinsic::x86_sse2_cvttsd2si:
case Intrinsic::x86_sse2_cvttsd2si64:
+ case Intrinsic::x86_avx512_vcvtss2si32:
+ case Intrinsic::x86_avx512_vcvtss2si64:
+ case Intrinsic::x86_avx512_cvttss2si:
+ case Intrinsic::x86_avx512_cvttss2si64:
+ case Intrinsic::x86_avx512_vcvtsd2si32:
+ case Intrinsic::x86_avx512_vcvtsd2si64:
+ case Intrinsic::x86_avx512_cvttsd2si:
+ case Intrinsic::x86_avx512_cvttsd2si64:
+ case Intrinsic::x86_avx512_vcvtss2usi32:
+ case Intrinsic::x86_avx512_vcvtss2usi64:
+ case Intrinsic::x86_avx512_cvttss2usi:
+ case Intrinsic::x86_avx512_cvttss2usi64:
+ case Intrinsic::x86_avx512_vcvtsd2usi32:
+ case Intrinsic::x86_avx512_vcvtsd2usi64:
+ case Intrinsic::x86_avx512_cvttsd2usi:
+ case Intrinsic::x86_avx512_cvttsd2usi64:
+ case Intrinsic::is_constant:
return true;
default:
return false;
@@ -1553,7 +1582,7 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V,
/// result. Returns null if the conversion cannot be performed, otherwise
/// returns the Constant value resulting from the conversion.
Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
- Type *Ty) {
+ Type *Ty, bool IsSigned) {
// All of these conversion intrinsics form an integer of at most 64bits.
unsigned ResultWidth = Ty->getIntegerBitWidth();
assert(ResultWidth <= 64 &&
@@ -1565,11 +1594,11 @@ Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
: APFloat::rmNearestTiesToEven;
APFloat::opStatus status =
Val.convertToInteger(makeMutableArrayRef(UIntVal), ResultWidth,
- /*isSigned=*/true, mode, &isExact);
+ IsSigned, mode, &isExact);
if (status != APFloat::opOK &&
(!roundTowardZero || status != APFloat::opInexact))
return nullptr;
- return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+ return ConstantInt::get(Ty, UIntVal, IsSigned);
}
double getValueAsDouble(ConstantFP *Op) {
@@ -1587,14 +1616,49 @@ double getValueAsDouble(ConstantFP *Op) {
return APF.convertToDouble();
}
+static bool isManifestConstant(const Constant *c) {
+ if (isa<ConstantData>(c)) {
+ return true;
+ } else if (isa<ConstantAggregate>(c) || isa<ConstantExpr>(c)) {
+ for (const Value *subc : c->operand_values()) {
+ if (!isManifestConstant(cast<Constant>(subc)))
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
+ if (auto *CI = dyn_cast<ConstantInt>(Op)) {
+ C = &CI->getValue();
+ return true;
+ }
+ if (isa<UndefValue>(Op)) {
+ C = nullptr;
+ return true;
+ }
+ return false;
+}
+
Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI,
ImmutableCallSite CS) {
if (Operands.size() == 1) {
+ if (IntrinsicID == Intrinsic::is_constant) {
+ // We know we have a "Constant" argument. But we want to only
+ // return true for manifest constants, not those that depend on
+ // constants with unknowable values, e.g. GlobalValue or BlockAddress.
+ if (isManifestConstant(Operands[0]))
+ return ConstantInt::getTrue(Ty->getContext());
+ return nullptr;
+ }
if (isa<UndefValue>(Operands[0])) {
- // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN
- if (IntrinsicID == Intrinsic::cos)
+ // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
+ // ctpop() is between 0 and bitwidth, pick 0 for undef.
+ if (IntrinsicID == Intrinsic::cos ||
+ IntrinsicID == Intrinsic::ctpop)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::bswap ||
IntrinsicID == Intrinsic::bitreverse ||
@@ -1849,7 +1913,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/false, Ty);
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/true);
break;
case Intrinsic::x86_sse_cvttss2si:
case Intrinsic::x86_sse_cvttss2si64:
@@ -1858,7 +1923,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/true, Ty);
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/true);
break;
}
}
@@ -1899,6 +1965,18 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
}
+ if (IntrinsicID == Intrinsic::minimum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
+ }
+
+ if (IntrinsicID == Intrinsic::maximum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
+ }
+
if (!TLI)
return nullptr;
if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
@@ -1931,58 +2009,149 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return nullptr;
}
- if (auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
- if (auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+ if (Operands[0]->getType()->isIntegerTy() &&
+ Operands[1]->getType()->isIntegerTy()) {
+ const APInt *C0, *C1;
+ if (!getConstIntOrUndef(Operands[0], C0) ||
+ !getConstIntOrUndef(Operands[1], C1))
+ return nullptr;
+
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ // Even if both operands are undef, we cannot fold muls to undef
+ // in the general case. For example, on i2 there are no inputs
+ // that would produce { i2 -1, i1 true } as the result.
+ if (!C0 || !C1)
+ return Constant::getNullValue(Ty);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow: {
+ if (!C0 || !C1)
+ return UndefValue::get(Ty);
+
+ APInt Res;
+ bool Overflow;
switch (IntrinsicID) {
- default: break;
+ default: llvm_unreachable("Invalid case");
case Intrinsic::sadd_with_overflow:
+ Res = C0->sadd_ov(*C1, Overflow);
+ break;
case Intrinsic::uadd_with_overflow:
+ Res = C0->uadd_ov(*C1, Overflow);
+ break;
case Intrinsic::ssub_with_overflow:
+ Res = C0->ssub_ov(*C1, Overflow);
+ break;
case Intrinsic::usub_with_overflow:
+ Res = C0->usub_ov(*C1, Overflow);
+ break;
case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow: {
- APInt Res;
- bool Overflow;
- switch (IntrinsicID) {
- default: llvm_unreachable("Invalid case");
- case Intrinsic::sadd_with_overflow:
- Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::uadd_with_overflow:
- Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::ssub_with_overflow:
- Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::usub_with_overflow:
- Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::smul_with_overflow:
- Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::umul_with_overflow:
- Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
- break;
- }
- Constant *Ops[] = {
- ConstantInt::get(Ty->getContext(), Res),
- ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
- };
- return ConstantStruct::get(cast<StructType>(Ty), Ops);
- }
- case Intrinsic::cttz:
- if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
- return UndefValue::get(Ty);
- return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
- case Intrinsic::ctlz:
- if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef.
- return UndefValue::get(Ty);
- return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
+ Res = C0->smul_ov(*C1, Overflow);
+ break;
+ case Intrinsic::umul_with_overflow:
+ Res = C0->umul_ov(*C1, Overflow);
+ break;
}
+ Constant *Ops[] = {
+ ConstantInt::get(Ty->getContext(), Res),
+ ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
+ };
+ return ConstantStruct::get(cast<StructType>(Ty), Ops);
+ }
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_sat:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return Constant::getAllOnesValue(Ty);
+ if (IntrinsicID == Intrinsic::uadd_sat)
+ return ConstantInt::get(Ty, C0->uadd_sat(*C1));
+ else
+ return ConstantInt::get(Ty, C0->sadd_sat(*C1));
+ case Intrinsic::usub_sat:
+ case Intrinsic::ssub_sat:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::usub_sat)
+ return ConstantInt::get(Ty, C0->usub_sat(*C1));
+ else
+ return ConstantInt::get(Ty, C0->ssub_sat(*C1));
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz:
+ assert(C1 && "Must be constant int");
+
+ // cttz(0, 1) and ctlz(0, 1) are undef.
+ if (C1->isOneValue() && (!C0 || C0->isNullValue()))
+ return UndefValue::get(Ty);
+ if (!C0)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::cttz)
+ return ConstantInt::get(Ty, C0->countTrailingZeros());
+ else
+ return ConstantInt::get(Ty, C0->countLeadingZeros());
}
return nullptr;
}
+
+ // Support ConstantVector in case we have an Undef in the top.
+ if ((isa<ConstantVector>(Operands[0]) ||
+ isa<ConstantDataVector>(Operands[0])) &&
+ // Check for default rounding mode.
+ // FIXME: Support other rounding modes?
+ isa<ConstantInt>(Operands[1]) &&
+ cast<ConstantInt>(Operands[1])->getValue() == 4) {
+ auto *Op = cast<Constant>(Operands[0]);
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::x86_avx512_vcvtss2si32:
+ case Intrinsic::x86_avx512_vcvtss2si64:
+ case Intrinsic::x86_avx512_vcvtsd2si32:
+ case Intrinsic::x86_avx512_vcvtsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_avx512_vcvtss2usi32:
+ case Intrinsic::x86_avx512_vcvtss2usi64:
+ case Intrinsic::x86_avx512_vcvtsd2usi32:
+ case Intrinsic::x86_avx512_vcvtsd2usi64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/false);
+ break;
+ case Intrinsic::x86_avx512_cvttss2si:
+ case Intrinsic::x86_avx512_cvttss2si64:
+ case Intrinsic::x86_avx512_cvttsd2si:
+ case Intrinsic::x86_avx512_cvttsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_avx512_cvttss2usi:
+ case Intrinsic::x86_avx512_cvttss2usi64:
+ case Intrinsic::x86_avx512_cvttsd2usi:
+ case Intrinsic::x86_avx512_cvttsd2usi64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/false);
+ break;
+ }
+ }
return nullptr;
}
@@ -2010,6 +2179,36 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
}
}
+ if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
+ const APInt *C0, *C1, *C2;
+ if (!getConstIntOrUndef(Operands[0], C0) ||
+ !getConstIntOrUndef(Operands[1], C1) ||
+ !getConstIntOrUndef(Operands[2], C2))
+ return nullptr;
+
+ bool IsRight = IntrinsicID == Intrinsic::fshr;
+ if (!C2)
+ return Operands[IsRight ? 1 : 0];
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+
+ // The shift amount is interpreted as modulo the bitwidth. If the shift
+ // amount is effectively 0, avoid UB due to oversized inverse shift below.
+ unsigned BitWidth = C2->getBitWidth();
+ unsigned ShAmt = C2->urem(BitWidth);
+ if (!ShAmt)
+ return Operands[IsRight ? 1 : 0];
+
+ // (C0 << ShlAmt) | (C1 >> LshrAmt)
+ unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
+ unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
+ if (!C0)
+ return ConstantInt::get(Ty, C1->lshr(LshrAmt));
+ if (!C1)
+ return ConstantInt::get(Ty, C0->shl(ShlAmt));
+ return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
+ }
+
return nullptr;
}
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
index e7637cd88327..34f785fb02be 100644
--- a/contrib/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -21,8 +21,7 @@
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -39,6 +38,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/Pass.h"
@@ -50,6 +50,7 @@
#include <cstdint>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "demanded-bits"
@@ -78,13 +79,14 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
}
static bool isAlwaysLive(Instruction *I) {
- return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- I->isEHPad() || I->mayHaveSideEffects();
+ return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
+ I->mayHaveSideEffects();
}
void DemandedBits::determineLiveOperandBits(
- const Instruction *UserI, const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2) {
+ const Instruction *UserI, const Value *Val, unsigned OperandNo,
+ const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2,
+ bool &KnownBitsComputed) {
unsigned BitWidth = AB.getBitWidth();
// We're called once per operand, but for some instructions, we need to
@@ -95,7 +97,11 @@ void DemandedBits::determineLiveOperandBits(
// provided here.
auto ComputeKnownBits =
[&](unsigned BitWidth, const Value *V1, const Value *V2) {
- const DataLayout &DL = I->getModule()->getDataLayout();
+ if (KnownBitsComputed)
+ return;
+ KnownBitsComputed = true;
+
+ const DataLayout &DL = UserI->getModule()->getDataLayout();
Known = KnownBits(BitWidth);
computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT);
@@ -127,7 +133,7 @@ void DemandedBits::determineLiveOperandBits(
// We need some output bits, so we need all bits of the
// input to the left of, and including, the leftmost bit
// known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
+ ComputeKnownBits(BitWidth, Val, nullptr);
AB = APInt::getHighBitsSet(BitWidth,
std::min(BitWidth, Known.countMaxLeadingZeros()+1));
}
@@ -137,11 +143,33 @@ void DemandedBits::determineLiveOperandBits(
// We need some output bits, so we need all bits of the
// input to the right of, and including, the rightmost bit
// known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
+ ComputeKnownBits(BitWidth, Val, nullptr);
AB = APInt::getLowBitsSet(BitWidth,
std::min(BitWidth, Known.countMaxTrailingZeros()+1));
}
break;
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ const APInt *SA;
+ if (OperandNo == 2) {
+ // Shift amount is modulo the bitwidth. For powers of two we have
+ // SA % BW == SA & (BW - 1).
+ if (isPowerOf2_32(BitWidth))
+ AB = BitWidth - 1;
+ } else if (match(II->getOperand(2), m_APInt(SA))) {
+ // Normalize to funnel shift left. APInt shifts of BitWidth are well-
+ // defined, so no need to special-case zero shifts here.
+ uint64_t ShiftAmt = SA->urem(BitWidth);
+ if (II->getIntrinsicID() == Intrinsic::fshr)
+ ShiftAmt = BitWidth - ShiftAmt;
+
+ if (OperandNo == 0)
+ AB = AOut.lshr(ShiftAmt);
+ else if (OperandNo == 1)
+ AB = AOut.shl(BitWidth - ShiftAmt);
+ }
+ break;
+ }
}
break;
case Instruction::Add:
@@ -153,8 +181,9 @@ void DemandedBits::determineLiveOperandBits(
AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
break;
case Instruction::Shl:
- if (OperandNo == 0)
- if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ if (OperandNo == 0) {
+ const APInt *ShiftAmtC;
+ if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.lshr(ShiftAmt);
@@ -166,10 +195,12 @@ void DemandedBits::determineLiveOperandBits(
else if (S->hasNoUnsignedWrap())
AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
}
+ }
break;
case Instruction::LShr:
- if (OperandNo == 0)
- if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ if (OperandNo == 0) {
+ const APInt *ShiftAmtC;
+ if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.shl(ShiftAmt);
@@ -178,10 +209,12 @@ void DemandedBits::determineLiveOperandBits(
if (cast<LShrOperator>(UserI)->isExact())
AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
}
+ }
break;
case Instruction::AShr:
- if (OperandNo == 0)
- if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ if (OperandNo == 0) {
+ const APInt *ShiftAmtC;
+ if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.shl(ShiftAmt);
// Because the high input bit is replicated into the
@@ -196,6 +229,7 @@ void DemandedBits::determineLiveOperandBits(
if (cast<AShrOperator>(UserI)->isExact())
AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
}
+ }
break;
case Instruction::And:
AB = AOut;
@@ -204,14 +238,11 @@ void DemandedBits::determineLiveOperandBits(
// other operand are dead (unless they're both zero, in which
// case they can't both be dead, so just mark the LHS bits as
// dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+ if (OperandNo == 0)
AB &= ~Known2.Zero;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ else
AB &= ~(Known.Zero & ~Known2.Zero);
- }
break;
case Instruction::Or:
AB = AOut;
@@ -220,14 +251,11 @@ void DemandedBits::determineLiveOperandBits(
// other operand are dead (unless they're both one, in which
// case they can't both be dead, so just mark the LHS bits as
// dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+ if (OperandNo == 0)
AB &= ~Known2.One;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ else
AB &= ~(Known.One & ~Known2.One);
- }
break;
case Instruction::Xor:
case Instruction::PHI:
@@ -253,6 +281,15 @@ void DemandedBits::determineLiveOperandBits(
if (OperandNo != 0)
AB = AOut;
break;
+ case Instruction::ExtractElement:
+ if (OperandNo == 0)
+ AB = AOut;
+ break;
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ if (OperandNo == 0 || OperandNo == 1)
+ AB = AOut;
+ break;
}
}
@@ -275,8 +312,9 @@ void DemandedBits::performAnalysis() {
Visited.clear();
AliveBits.clear();
+ DeadUses.clear();
- SmallVector<Instruction*, 128> Worklist;
+ SmallSetVector<Instruction*, 16> Worklist;
// Collect the set of "root" instructions that are known live.
for (Instruction &I : instructions(F)) {
@@ -288,9 +326,10 @@ void DemandedBits::performAnalysis() {
// bits and add the instruction to the work list. For other instructions
// add their operands to the work list (for integer values operands, mark
// all bits as live).
- if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
- Worklist.push_back(&I);
+ Type *T = I.getType();
+ if (T->isIntOrIntVectorTy()) {
+ if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
+ Worklist.insert(&I);
continue;
}
@@ -298,9 +337,10 @@ void DemandedBits::performAnalysis() {
// Non-integer-typed instructions...
for (Use &OI : I.operands()) {
if (Instruction *J = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
- AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
- Worklist.push_back(J);
+ Type *T = J->getType();
+ if (T->isIntOrIntVectorTy())
+ AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+ Worklist.insert(J);
}
}
// To save memory, we don't add I to the Visited set here. Instead, we
@@ -315,35 +355,51 @@ void DemandedBits::performAnalysis() {
LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
APInt AOut;
- if (UserI->getType()->isIntegerTy()) {
+ if (UserI->getType()->isIntOrIntVectorTy()) {
AOut = AliveBits[UserI];
- LLVM_DEBUG(dbgs() << " Alive Out: " << AOut);
+ LLVM_DEBUG(dbgs() << " Alive Out: 0x"
+ << Twine::utohexstr(AOut.getLimitedValue()));
}
LLVM_DEBUG(dbgs() << "\n");
- if (!UserI->getType()->isIntegerTy())
+ if (!UserI->getType()->isIntOrIntVectorTy())
Visited.insert(UserI);
KnownBits Known, Known2;
+ bool KnownBitsComputed = false;
// Compute the set of alive bits for each operand. These are anded into the
// existing set, if any, and if that changes the set of alive bits, the
// operand is added to the work-list.
for (Use &OI : UserI->operands()) {
- if (Instruction *I = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
- unsigned BitWidth = IT->getBitWidth();
- APInt AB = APInt::getAllOnesValue(BitWidth);
- if (UserI->getType()->isIntegerTy() && !AOut &&
- !isAlwaysLive(UserI)) {
- AB = APInt(BitWidth, 0);
- } else {
- // If all bits of the output are dead, then all bits of the input
- // Bits of each operand that are used to compute alive bits of the
- // output are alive, all others are dead.
- determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
- Known, Known2);
- }
+ // We also want to detect dead uses of arguments, but will only store
+ // demanded bits for instructions.
+ Instruction *I = dyn_cast<Instruction>(OI);
+ if (!I && !isa<Argument>(OI))
+ continue;
+
+ Type *T = OI->getType();
+ if (T->isIntOrIntVectorTy()) {
+ unsigned BitWidth = T->getScalarSizeInBits();
+ APInt AB = APInt::getAllOnesValue(BitWidth);
+ if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
+ !isAlwaysLive(UserI)) {
+ // If all bits of the output are dead, then all bits of the input
+ // are also dead.
+ AB = APInt(BitWidth, 0);
+ } else {
+ // Bits of each operand that are used to compute alive bits of the
+ // output are alive, all others are dead.
+ determineLiveOperandBits(UserI, OI, OI.getOperandNo(), AOut, AB,
+ Known, Known2, KnownBitsComputed);
+
+ // Keep track of uses which have no demanded bits.
+ if (AB.isNullValue())
+ DeadUses.insert(&OI);
+ else
+ DeadUses.erase(&OI);
+ }
+ if (I) {
// If we've added to the set of alive bits (or the operand has not
// been previously visited), then re-queue the operand to be visited
// again.
@@ -355,11 +411,11 @@ void DemandedBits::performAnalysis() {
APInt ABNew = AB | ABPrev;
if (ABNew != ABPrev || ABI == AliveBits.end()) {
AliveBits[I] = std::move(ABNew);
- Worklist.push_back(I);
+ Worklist.insert(I);
}
- } else if (!Visited.count(I)) {
- Worklist.push_back(I);
}
+ } else if (I && !Visited.count(I)) {
+ Worklist.insert(I);
}
}
}
@@ -368,11 +424,13 @@ void DemandedBits::performAnalysis() {
APInt DemandedBits::getDemandedBits(Instruction *I) {
performAnalysis();
- const DataLayout &DL = I->getModule()->getDataLayout();
auto Found = AliveBits.find(I);
if (Found != AliveBits.end())
return Found->second;
- return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ return APInt::getAllOnesValue(
+ DL.getTypeSizeInBits(I->getType()->getScalarType()));
}
bool DemandedBits::isInstructionDead(Instruction *I) {
@@ -382,6 +440,31 @@ bool DemandedBits::isInstructionDead(Instruction *I) {
!isAlwaysLive(I);
}
+bool DemandedBits::isUseDead(Use *U) {
+ // We only track integer uses, everything else is assumed live.
+ if (!(*U)->getType()->isIntOrIntVectorTy())
+ return false;
+
+ // Uses by always-live instructions are never dead.
+ Instruction *UserI = cast<Instruction>(U->getUser());
+ if (isAlwaysLive(UserI))
+ return false;
+
+ performAnalysis();
+ if (DeadUses.count(U))
+ return true;
+
+ // If no output bits are demanded, no input bits are demanded and the use
+ // is dead. These uses might not be explicitly present in the DeadUses map.
+ if (UserI->getType()->isIntOrIntVectorTy()) {
+ auto Found = AliveBits.find(UserI);
+ if (Found != AliveBits.end() && Found->second.isNullValue())
+ return true;
+ }
+
+ return false;
+}
+
void DemandedBits::print(raw_ostream &OS) {
performAnalysis();
for (auto &KV : AliveBits) {
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index 79c2728d5620..3f4dfa52e1da 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -194,6 +194,13 @@ void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
dumpExampleDependence(OS, info.get());
}
+PreservedAnalyses
+DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
+ OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";
+ dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F));
+ return PreservedAnalyses::all();
+}
+
//===----------------------------------------------------------------------===//
// Dependence methods
@@ -633,8 +640,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
const MemoryLocation &LocB) {
// Check the original locations (minus size) for noalias, which can happen for
// tbaa, incompatible underlying object locations, etc.
- MemoryLocation LocAS(LocA.Ptr, MemoryLocation::UnknownSize, LocA.AATags);
- MemoryLocation LocBS(LocB.Ptr, MemoryLocation::UnknownSize, LocB.AATags);
+ MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags);
+ MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags);
if (AA->alias(LocAS, LocBS) == NoAlias)
return NoAlias;
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
index f5f1874c9303..7ba23854a3cc 100644
--- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements divergence analysis which determines whether a branch
-// in a GPU program is divergent.It can help branch optimizations such as jump
+// This file implements a general divergence analysis for loop vectorization
+// and GPU programs. It determines which branches and values in a loop or GPU
+// program are divergent. It can help branch optimizations such as jump
// threading and loop unswitching to make better decisions.
//
// GPU programs typically use the SIMD execution model, where multiple threads
@@ -16,25 +17,29 @@
// code contains divergent branches (i.e., threads in a group do not agree on
// which path of the branch to take), the group of threads has to execute all
// the paths from that branch with different subsets of threads enabled until
-// they converge at the immediately post-dominating BB of the paths.
+// they re-converge.
//
// Due to this execution model, some optimizations such as jump
-// threading and loop unswitching can be unfortunately harmful when performed on
-// divergent branches. Therefore, an analysis that computes which branches in a
-// GPU program are divergent can help the compiler to selectively run these
-// optimizations.
+// threading and loop unswitching can interfere with thread re-convergence.
+// Therefore, an analysis that computes which branches in a GPU program are
+// divergent can help the compiler to selectively run these optimizations.
//
-// This file defines divergence analysis which computes a conservative but
-// non-trivial approximation of all divergent branches in a GPU program. It
-// partially implements the approach described in
+// This implementation is derived from the Vectorization Analysis of the
+// Region Vectorizer (RV). That implementation in turn is based on the approach
+// described in
//
-// Divergence Analysis
-// Sampaio, Souza, Collange, Pereira
-// TOPLAS '13
+// Improving Performance of OpenCL on CPUs
+// Ralf Karrenberg and Sebastian Hack
+// CC '12
//
-// The divergence analysis identifies the sources of divergence (e.g., special
-// variables that hold the thread ID), and recursively marks variables that are
-// data or sync dependent on a source of divergence as divergent.
+// This DivergenceAnalysis implementation is generic in the sense that it does
+// not itself identify original sources of divergence.
+// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
+// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
+// (e.g., special variables that hold the thread ID or the iteration variable).
+//
+// The generic implementation propagates divergence to variables that are data
+// or sync dependent on a source of divergence.
//
// While data dependency is a well-known concept, the notion of sync dependency
// is worth more explanation. Sync dependence characterizes the control flow
@@ -54,287 +59,399 @@
// because the branch "br i1 %cond" depends on %tid and affects which value %a
// is assigned to.
//
-// The current implementation has the following limitations:
+// The sync dependence detection (which branch induces divergence in which join
+// points) is implemented in the SyncDependenceAnalysis.
+//
+// The current DivergenceAnalysis implementation has the following limitations:
// 1. intra-procedural. It conservatively considers the arguments of a
// non-kernel-entry function and the return value of a function call as
// divergent.
// 2. memory as black box. It conservatively considers values loaded from
// generic or local address as divergent. This can be improved by leveraging
-// pointer analysis.
+// pointer analysis and/or by modelling non-escaping memory objects in SSA
+// as done in RV.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
+
using namespace llvm;
-#define DEBUG_TYPE "divergence"
-
-namespace {
-
-class DivergencePropagator {
-public:
- DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- PostDominatorTree &PDT, DenseSet<const Value *> &DV)
- : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
- void populateWithSourcesOfDivergence();
- void propagate();
-
-private:
- // A helper function that explores data dependents of V.
- void exploreDataDependency(Value *V);
- // A helper function that explores sync dependents of TI.
- void exploreSyncDependency(TerminatorInst *TI);
- // Computes the influence region from Start to End. This region includes all
- // basic blocks on any simple path from Start to End.
- void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion);
- // Finds all users of I that are outside the influence region, and add these
- // users to Worklist.
- void findUsersOutsideInfluenceRegion(
- Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
-
- Function &F;
- TargetTransformInfo &TTI;
- DominatorTree &DT;
- PostDominatorTree &PDT;
- std::vector<Value *> Worklist; // Stack for DFS.
- DenseSet<const Value *> &DV; // Stores all divergent values.
-};
-
-void DivergencePropagator::populateWithSourcesOfDivergence() {
- Worklist.clear();
- DV.clear();
- for (auto &I : instructions(F)) {
- if (TTI.isSourceOfDivergence(&I)) {
- Worklist.push_back(&I);
- DV.insert(&I);
- }
+#define DEBUG_TYPE "divergence-analysis"
+
+// class DivergenceAnalysis
+DivergenceAnalysis::DivergenceAnalysis(
+ const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
+ const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
+ : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
+ IsLCSSAForm(IsLCSSAForm) {}
+
+void DivergenceAnalysis::markDivergent(const Value &DivVal) {
+ assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
+ assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
+ DivergentValues.insert(&DivVal);
+}
+
+void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
+ UniformOverrides.insert(&UniVal);
+}
+
+bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const {
+ if (Term.getNumSuccessors() <= 1)
+ return false;
+ if (auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
+ assert(BranchTerm->isConditional());
+ return isDivergent(*BranchTerm->getCondition());
}
- for (auto &Arg : F.args()) {
- if (TTI.isSourceOfDivergence(&Arg)) {
- Worklist.push_back(&Arg);
- DV.insert(&Arg);
- }
+ if (auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
+ return isDivergent(*SwitchTerm->getCondition());
+ }
+ if (isa<InvokeInst>(Term)) {
+ return false; // ignore abnormal executions through landingpad
}
+
+ llvm_unreachable("unexpected terminator");
}
-void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
- // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
- // immediate post dominator are divergent. This rule handles if-then-else
- // patterns. For example,
- //
- // if (tid < 5)
- // a1 = 1;
- // else
- // a2 = 2;
- // a = phi(a1, a2); // sync dependent on (tid < 5)
- BasicBlock *ThisBB = TI->getParent();
-
- // Unreachable blocks may not be in the dominator tree.
- if (!DT.isReachableFromEntry(ThisBB))
- return;
+bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
+ // TODO function calls with side effects, etc
+ for (const auto &Op : I.operands()) {
+ if (isDivergent(*Op))
+ return true;
+ }
+ return false;
+}
- // If the function has no exit blocks or doesn't reach any exit blocks, the
- // post dominator may be null.
- DomTreeNode *ThisNode = PDT.getNode(ThisBB);
- if (!ThisNode)
- return;
+bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
+ const Value &Val) const {
+ const auto *Inst = dyn_cast<const Instruction>(&Val);
+ if (!Inst)
+ return false;
+ // check whether any divergent loop carrying Val terminates before control
+ // proceeds to ObservingBlock
+ for (const auto *Loop = LI.getLoopFor(Inst->getParent());
+ Loop != RegionLoop && !Loop->contains(&ObservingBlock);
+ Loop = Loop->getParentLoop()) {
+ if (DivergentLoops.find(Loop) != DivergentLoops.end())
+ return true;
+ }
- BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
- if (IPostDom == nullptr)
- return;
+ return false;
+}
- for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
- // A PHINode is uniform if it returns the same value no matter which path is
- // taken.
- if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
- Worklist.push_back(&*I);
+bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
+ // joining divergent disjoint path in Phi parent block
+ if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) {
+ return true;
}
- // Propagation rule 2: if a value defined in a loop is used outside, the user
- // is sync dependent on the condition of the loop exits that dominate the
- // user. For example,
- //
- // int i = 0;
- // do {
- // i++;
- // if (foo(i)) ... // uniform
- // } while (i < tid);
- // if (bar(i)) ... // divergent
+ // An incoming value could be divergent by itself.
+ // Otherwise, an incoming value could be uniform within the loop
+ // that carries its definition but it may appear divergent
+ // from outside the loop. This happens when divergent loop exits
+ // drop definitions of that uniform value in different iterations.
//
- // A program may contain unstructured loops. Therefore, we cannot leverage
- // LoopInfo, which only recognizes natural loops.
- //
- // The algorithm used here handles both natural and unstructured loops. Given
- // a branch TI, we first compute its influence region, the union of all simple
- // paths from TI to its immediate post dominator (IPostDom). Then, we search
- // for all the values defined in the influence region but used outside. All
- // these users are sync dependent on TI.
- DenseSet<BasicBlock *> InfluenceRegion;
- computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
- // An insight that can speed up the search process is that all the in-region
- // values that are used outside must dominate TI. Therefore, instead of
- // searching every basic blocks in the influence region, we search all the
- // dominators of TI until it is outside the influence region.
- BasicBlock *InfluencedBB = ThisBB;
- while (InfluenceRegion.count(InfluencedBB)) {
- for (auto &I : *InfluencedBB)
- findUsersOutsideInfluenceRegion(I, InfluenceRegion);
- DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
- if (IDomNode == nullptr)
- break;
- InfluencedBB = IDomNode->getBlock();
+ // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
+ // if (i % thread_id == 0) break; // divergent loop exit
+ // }
+ // int divI = i; // divI is divergent
+ for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) {
+ const auto *InVal = Phi.getIncomingValue(i);
+ if (isDivergent(*Phi.getIncomingValue(i)) ||
+ isTemporalDivergent(*Phi.getParent(), *InVal)) {
+ return true;
+ }
}
+ return false;
}
-void DivergencePropagator::findUsersOutsideInfluenceRegion(
- Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
- for (User *U : I.users()) {
- Instruction *UserInst = cast<Instruction>(U);
- if (!InfluenceRegion.count(UserInst->getParent())) {
- if (DV.insert(UserInst).second)
- Worklist.push_back(UserInst);
+bool DivergenceAnalysis::inRegion(const Instruction &I) const {
+ return I.getParent() && inRegion(*I.getParent());
+}
+
+bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
+ return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
+}
+
+// marks all users of loop-carried values of the loop headed by LoopHeader as
+// divergent
+void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
+ auto *DivLoop = LI.getLoopFor(&LoopHeader);
+ assert(DivLoop && "loopHeader is not actually part of a loop");
+
+ SmallVector<BasicBlock *, 8> TaintStack;
+ DivLoop->getExitBlocks(TaintStack);
+
+ // Otherwise potential users of loop-carried values could be anywhere in the
+ // dominance region of DivLoop (including its fringes for phi nodes)
+ DenseSet<const BasicBlock *> Visited;
+ for (auto *Block : TaintStack) {
+ Visited.insert(Block);
+ }
+ Visited.insert(&LoopHeader);
+
+ while (!TaintStack.empty()) {
+ auto *UserBlock = TaintStack.back();
+ TaintStack.pop_back();
+
+ // don't spread divergence beyond the region
+ if (!inRegion(*UserBlock))
+ continue;
+
+ assert(!DivLoop->contains(UserBlock) &&
+ "irreducible control flow detected");
+
+ // phi nodes at the fringes of the dominance region
+ if (!DT.dominates(&LoopHeader, UserBlock)) {
+ // all PHI nodes of UserBlock become divergent
+ for (auto &Phi : UserBlock->phis()) {
+ Worklist.push_back(&Phi);
+ }
+ continue;
+ }
+
+ // taint outside users of values carried by DivLoop
+ for (auto &I : *UserBlock) {
+ if (isAlwaysUniform(I))
+ continue;
+ if (isDivergent(I))
+ continue;
+
+ for (auto &Op : I.operands()) {
+ auto *OpInst = dyn_cast<Instruction>(&Op);
+ if (!OpInst)
+ continue;
+ if (DivLoop->contains(OpInst->getParent())) {
+ markDivergent(I);
+ pushUsers(I);
+ break;
+ }
+ }
+ }
+
+ // visit all blocks in the dominance region
+ for (auto *SuccBlock : successors(UserBlock)) {
+ if (!Visited.insert(SuccBlock).second) {
+ continue;
+ }
+ TaintStack.push_back(SuccBlock);
}
}
}
-// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
-// to the influence region.
-static void
-addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion,
- std::vector<BasicBlock *> &InfluenceStack) {
- for (BasicBlock *Succ : successors(ThisBB)) {
- if (Succ != End && InfluenceRegion.insert(Succ).second)
- InfluenceStack.push_back(Succ);
+void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
+ for (const auto &Phi : Block.phis()) {
+ if (isDivergent(Phi))
+ continue;
+ Worklist.push_back(&Phi);
}
}
-void DivergencePropagator::computeInfluenceRegion(
- BasicBlock *Start, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion) {
- assert(PDT.properlyDominates(End, Start) &&
- "End does not properly dominate Start");
-
- // The influence region starts from the end of "Start" to the beginning of
- // "End". Therefore, "Start" should not be in the region unless "Start" is in
- // a loop that doesn't contain "End".
- std::vector<BasicBlock *> InfluenceStack;
- addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
- while (!InfluenceStack.empty()) {
- BasicBlock *BB = InfluenceStack.back();
- InfluenceStack.pop_back();
- addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
+void DivergenceAnalysis::pushUsers(const Value &V) {
+ for (const auto *User : V.users()) {
+ const auto *UserInst = dyn_cast<const Instruction>(User);
+ if (!UserInst)
+ continue;
+
+ if (isDivergent(*UserInst))
+ continue;
+
+ // only compute divergent inside loop
+ if (!inRegion(*UserInst))
+ continue;
+ Worklist.push_back(UserInst);
}
}
-void DivergencePropagator::exploreDataDependency(Value *V) {
- // Follow def-use chains of V.
- for (User *U : V->users()) {
- Instruction *UserInst = cast<Instruction>(U);
- if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
- Worklist.push_back(UserInst);
+bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
+ const Loop *BranchLoop) {
+ LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
+
+ // ignore divergence outside the region
+ if (!inRegion(JoinBlock)) {
+ return false;
+ }
+
+ // push non-divergent phi nodes in JoinBlock to the worklist
+ pushPHINodes(JoinBlock);
+
+ // JoinBlock is a divergent loop exit
+ if (BranchLoop && !BranchLoop->contains(&JoinBlock)) {
+ return true;
}
+
+ // disjoint-paths divergent at JoinBlock
+ markBlockJoinDivergent(JoinBlock);
+ return false;
}
-void DivergencePropagator::propagate() {
- // Traverse the dependency graph using DFS.
- while (!Worklist.empty()) {
- Value *V = Worklist.back();
- Worklist.pop_back();
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
- // Terminators with less than two successors won't introduce sync
- // dependency. Ignore them.
- if (TI->getNumSuccessors() > 1)
- exploreSyncDependency(TI);
+void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {
+ LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
+
+ markDivergent(Term);
+
+ const auto *BranchLoop = LI.getLoopFor(Term.getParent());
+
+ // whether there is a divergent loop exit from BranchLoop (if any)
+ bool IsBranchLoopDivergent = false;
+
+ // iterate over all blocks reachable by disjoint from Term within the loop
+ // also iterates over loop exits that become divergent due to Term.
+ for (const auto *JoinBlock : SDA.join_blocks(Term)) {
+ IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
+ }
+
+ // Branch loop is a divergent loop due to the divergent branch in Term
+ if (IsBranchLoopDivergent) {
+ assert(BranchLoop);
+ if (!DivergentLoops.insert(BranchLoop).second) {
+ return;
}
- exploreDataDependency(V);
+ propagateLoopDivergence(*BranchLoop);
}
}
-} /// end namespace anonymous
+void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
+ LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
+
+ // don't propagate beyond region
+ if (!inRegion(*ExitingLoop.getHeader()))
+ return;
-// Register this pass.
-char DivergenceAnalysis::ID = 0;
-INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
- false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
- false, true)
+ const auto *BranchLoop = ExitingLoop.getParentLoop();
-FunctionPass *llvm::createDivergenceAnalysisPass() {
- return new DivergenceAnalysis();
-}
+ // Uses of loop-carried values could occur anywhere
+ // within the dominance region of the definition. All loop-carried
+ // definitions are dominated by the loop header (reducible control).
+ // Thus all users have to be in the dominance region of the loop header,
+ // except PHI nodes that can also live at the fringes of the dom region
+ // (incoming defining value).
+ if (!IsLCSSAForm)
+ taintLoopLiveOuts(*ExitingLoop.getHeader());
+
+ // whether there is a divergent loop exit from BranchLoop (if any)
+ bool IsBranchLoopDivergent = false;
+
+ // iterate over all blocks reachable by disjoint paths from exits of
+ // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
+ // become divergent.
+ for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
+ IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
+ }
-void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.setPreservesAll();
+ // Branch loop is a divergent due to divergent loop exit in ExitingLoop
+ if (IsBranchLoopDivergent) {
+ assert(BranchLoop);
+ if (!DivergentLoops.insert(BranchLoop).second) {
+ return;
+ }
+ propagateLoopDivergence(*BranchLoop);
+ }
}
-bool DivergenceAnalysis::runOnFunction(Function &F) {
- auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
- if (TTIWP == nullptr)
- return false;
+void DivergenceAnalysis::compute() {
+ for (auto *DivVal : DivergentValues) {
+ pushUsers(*DivVal);
+ }
- TargetTransformInfo &TTI = TTIWP->getTTI(F);
- // Fast path: if the target does not have branch divergence, we do not mark
- // any branch as divergent.
- if (!TTI.hasBranchDivergence())
- return false;
+ // propagate divergence
+ while (!Worklist.empty()) {
+ const Instruction &I = *Worklist.back();
+ Worklist.pop_back();
- DivergentValues.clear();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- DivergencePropagator DP(F, TTI,
- getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- PDT, DivergentValues);
- DP.populateWithSourcesOfDivergence();
- DP.propagate();
- LLVM_DEBUG(
- dbgs() << "\nAfter divergence analysis on " << F.getName() << ":\n";
- print(dbgs(), F.getParent())
- );
- return false;
+ // maintain uniformity of overrides
+ if (isAlwaysUniform(I))
+ continue;
+
+ bool WasDivergent = isDivergent(I);
+ if (WasDivergent)
+ continue;
+
+ // propagate divergence caused by terminator
+ if (I.isTerminator()) {
+ if (updateTerminator(I)) {
+ // propagate control divergence to affected instructions
+ propagateBranchDivergence(I);
+ continue;
+ }
+ }
+
+ // update divergence of I due to divergent operands
+ bool DivergentUpd = false;
+ const auto *Phi = dyn_cast<const PHINode>(&I);
+ if (Phi) {
+ DivergentUpd = updatePHINode(*Phi);
+ } else {
+ DivergentUpd = updateNormalInstruction(I);
+ }
+
+ // propagate value divergence to users
+ if (DivergentUpd) {
+ markDivergent(I);
+ pushUsers(I);
+ }
+ }
+}
+
+bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
+ return UniformOverrides.find(&V) != UniformOverrides.end();
+}
+
+bool DivergenceAnalysis::isDivergent(const Value &V) const {
+ return DivergentValues.find(&V) != DivergentValues.end();
}
void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if (DivergentValues.empty())
return;
- const Value *FirstDivergentValue = *DivergentValues.begin();
- const Function *F;
- if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
- F = Arg->getParent();
- } else if (const Instruction *I =
- dyn_cast<Instruction>(FirstDivergentValue)) {
- F = I->getParent()->getParent();
- } else {
- llvm_unreachable("Only arguments and instructions can be divergent");
+ // iterate instructions using instructions() to ensure a deterministic order.
+ for (auto &I : instructions(F)) {
+ if (isDivergent(I))
+ OS << "DIVERGENT:" << I << '\n';
}
+}
- // Dumps all divergent values in F, arguments and then instructions.
- for (auto &Arg : F->args()) {
- OS << (DivergentValues.count(&Arg) ? "DIVERGENT: " : " ");
- OS << Arg << "\n";
+// class GPUDivergenceAnalysis
+GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ const LoopInfo &LI,
+ const TargetTransformInfo &TTI)
+ : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) {
+ for (auto &I : instructions(F)) {
+ if (TTI.isSourceOfDivergence(&I)) {
+ DA.markDivergent(I);
+ } else if (TTI.isAlwaysUniform(&I)) {
+ DA.addUniformOverride(I);
+ }
}
- // Iterate instructions using instructions() to ensure a deterministic order.
- for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
- auto &BB = *BI;
- OS << "\n " << BB.getName() << ":\n";
- for (auto &I : BB.instructionsWithoutDebug()) {
- OS << (DivergentValues.count(&I) ? "DIVERGENT: " : " ");
- OS << I << "\n";
+ for (auto &Arg : F.args()) {
+ if (TTI.isSourceOfDivergence(&Arg)) {
+ DA.markDivergent(Arg);
}
}
- OS << "\n";
+
+ DA.compute();
+}
+
+bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
+ return DA.isDivergent(val);
+}
+
+void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
+ OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
+ DA.print(OS, mod);
+ OS << "}\n";
}
diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
index 2d35a3fa9118..0df73aeebbdc 100644
--- a/contrib/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
@@ -120,7 +120,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
<< "\'.\n");
BasicBlock *SuccColor = Color;
- TerminatorInst *Terminator = Visiting->getTerminator();
+ Instruction *Terminator = Visiting->getTerminator();
if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) {
Value *ParentPad = CatchRet->getCatchSwitchParentPad();
if (isa<ConstantTokenNone>(ParentPad))
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index 2c503609d96b..b28abcadca4a 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -255,11 +255,11 @@ FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
}
FunctionModRefBehavior
-GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
+GlobalsAAResult::getModRefBehavior(const CallBase *Call) {
FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
- if (!CS.hasOperandBundles())
- if (const Function *F = CS.getCalledFunction())
+ if (!Call->hasOperandBundles())
+ if (const Function *F = Call->getCalledFunction())
if (FunctionInfo *FI = getFunctionInfo(F)) {
if (!isModOrRefSet(FI->getModRefInfo()))
Min = FMRB_DoesNotAccessMemory;
@@ -267,7 +267,7 @@ GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
Min = FMRB_OnlyReadsMemory;
}
- return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min);
}
/// Returns the function info for the function, or null if we don't have
@@ -366,14 +366,14 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
} else if (Operator::getOpcode(I) == Instruction::BitCast) {
if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
return true;
- } else if (auto CS = CallSite(I)) {
+ } else if (auto *Call = dyn_cast<CallBase>(I)) {
// Make sure that this is just the function being called, not that it is
// passing into the function.
- if (CS.isDataOperand(&U)) {
+ if (Call->isDataOperand(&U)) {
// Detect calls to free.
- if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
+ if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) {
if (Writers)
- Writers->insert(CS->getParent()->getParent());
+ Writers->insert(Call->getParent()->getParent());
} else {
return true; // Argument of an unknown call.
}
@@ -576,15 +576,15 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// We handle calls specially because the graph-relevant aspects are
// handled above.
- if (auto CS = CallSite(&I)) {
- if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) {
// FIXME: It is completely unclear why this is necessary and not
// handled by the above graph code.
FI.addModRefInfo(ModRefInfo::ModRef);
- } else if (Function *Callee = CS.getCalledFunction()) {
+ } else if (Function *Callee = Call->getCalledFunction()) {
// The callgraph doesn't include intrinsic calls.
if (Callee->isIntrinsic()) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (isa<DbgInfoIntrinsic>(Call))
// Don't let dbg intrinsics affect alias info.
continue;
@@ -885,16 +885,16 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
return AAResultBase::alias(LocA, LocB);
}
-ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
+ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
const GlobalValue *GV) {
- if (CS.doesNotAccessMemory())
+ if (Call->doesNotAccessMemory())
return ModRefInfo::NoModRef;
ModRefInfo ConservativeResult =
- CS.onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef;
+ Call->onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef;
// Iterate through all the arguments to the called function. If any argument
// is based on GV, return the conservative result.
- for (auto &A : CS.args()) {
+ for (auto &A : Call->args()) {
SmallVector<Value*, 4> Objects;
GetUnderlyingObjects(A, Objects, DL);
@@ -914,7 +914,7 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
return ModRefInfo::NoModRef;
}
-ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
ModRefInfo Known = ModRefInfo::ModRef;
@@ -923,15 +923,15 @@ ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
if (const GlobalValue *GV =
dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
if (GV->hasLocalLinkage())
- if (const Function *F = CS.getCalledFunction())
+ if (const Function *F = Call->getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
if (const FunctionInfo *FI = getFunctionInfo(F))
Known = unionModRef(FI->getModRefInfoForGlobal(*GV),
- getModRefInfoForArgument(CS, GV));
+ getModRefInfoForArgument(Call, GV));
if (!isModOrRefSet(Known))
return ModRefInfo::NoModRef; // No need to query other mod/ref analyses
- return intersectModRef(Known, AAResultBase::getModRefInfo(CS, Loc));
+ return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc));
}
GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
diff --git a/contrib/llvm/lib/Analysis/GuardUtils.cpp b/contrib/llvm/lib/Analysis/GuardUtils.cpp
new file mode 100644
index 000000000000..08fa6abeafb5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/GuardUtils.cpp
@@ -0,0 +1,21 @@
+//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform analyzes related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/PatternMatch.h"
+
+using namespace llvm;
+
+bool llvm::isGuard(const User *U) {
+ using namespace llvm::PatternMatch;
+ return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
+}
diff --git a/contrib/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm/lib/Analysis/IVDescriptors.cpp
new file mode 100644
index 000000000000..aaebc4a481ec
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IVDescriptors.cpp
@@ -0,0 +1,1089 @@
+//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file "describes" induction and recurrence variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "iv-descriptors"
+
+bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
+ SmallPtrSetImpl<Instruction *> &Set) {
+ for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+ if (!Set.count(dyn_cast<Instruction>(*Use)))
+ return false;
+ return true;
+}
+
+bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_IntegerOr:
+ case RK_IntegerAnd:
+ case RK_IntegerXor:
+ case RK_IntegerMinMax:
+ return true;
+ }
+ return false;
+}
+
+bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
+ return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
+}
+
+bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_FloatAdd:
+ case RK_FloatMult:
+ return true;
+ }
+ return false;
+}
+
+/// Determines if Phi may have been type-promoted. If Phi has a single user
+/// that ANDs the Phi with a type mask, return the user. RT is updated to
+/// account for the narrower bit width represented by the mask, and the AND
+/// instruction is added to CI.
+static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
+ if (!Phi->hasOneUse())
+ return Phi;
+
+ const APInt *M = nullptr;
+ Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
+
+ // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
+ // with a new integer type of the corresponding bit width.
+ if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
+ int32_t Bits = (*M + 1).exactLogBase2();
+ if (Bits > 0) {
+ RT = IntegerType::get(Phi->getContext(), Bits);
+ Visited.insert(Phi);
+ CI.insert(J);
+ return J;
+ }
+ }
+ return Phi;
+}
+
+/// Compute the minimal bit width needed to represent a reduction whose exit
+/// instruction is given by Exit.
+static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
+ DemandedBits *DB,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
+ bool IsSigned = false;
+ const DataLayout &DL = Exit->getModule()->getDataLayout();
+ uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
+
+ if (DB) {
+ // Use the demanded bits analysis to determine the bits that are live out
+ // of the exit instruction, rounding up to the nearest power of two. If the
+ // use of demanded bits results in a smaller bit width, we know the value
+ // must be positive (i.e., IsSigned = false), because if this were not the
+ // case, the sign bit would have been demanded.
+ auto Mask = DB->getDemandedBits(Exit);
+ MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
+ }
+
+ if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
+ // If demanded bits wasn't able to limit the bit width, we can try to use
+ // value tracking instead. This can be the case, for example, if the value
+ // may be negative.
+ auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
+ auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
+ MaxBitWidth = NumTypeBits - NumSignBits;
+ KnownBits Bits = computeKnownBits(Exit, DL);
+ if (!Bits.isNonNegative()) {
+ // If the value is not known to be non-negative, we set IsSigned to true,
+ // meaning that we will use sext instructions instead of zext
+ // instructions to restore the original type.
+ IsSigned = true;
+ if (!Bits.isNegative())
+ // If the value is not known to be negative, we don't known what the
+ // upper bit is, and therefore, we don't know what kind of extend we
+ // will need. In this case, just increase the bit width by one bit and
+ // use sext.
+ ++MaxBitWidth;
+ }
+ }
+ if (!isPowerOf2_64(MaxBitWidth))
+ MaxBitWidth = NextPowerOf2(MaxBitWidth);
+
+ return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
+ IsSigned);
+}
+
+/// Collect cast instructions that can be ignored in the vectorizer's cost
+/// model, given a reduction exit value and the minimal type in which the
+/// reduction can be represented.
+static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
+ Type *RecurrenceType,
+ SmallPtrSetImpl<Instruction *> &Casts) {
+
+ SmallVector<Instruction *, 8> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
+ Worklist.push_back(Exit);
+
+ while (!Worklist.empty()) {
+ Instruction *Val = Worklist.pop_back_val();
+ Visited.insert(Val);
+ if (auto *Cast = dyn_cast<CastInst>(Val))
+ if (Cast->getSrcTy() == RecurrenceType) {
+ // If the source type of a cast instruction is equal to the recurrence
+ // type, it will be eliminated, and should be ignored in the vectorizer
+ // cost model.
+ Casts.insert(Cast);
+ continue;
+ }
+
+ // Add all operands to the work list if they are loop-varying values that
+ // we haven't yet visited.
+ for (Value *O : cast<User>(Val)->operands())
+ if (auto *I = dyn_cast<Instruction>(O))
+ if (TheLoop->contains(I) && !Visited.count(I))
+ Worklist.push_back(I);
+ }
+}
+
+bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
+ Loop *TheLoop, bool HasFunNoNaNAttr,
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Reduction variables are only found in the loop header block.
+ if (Phi->getParent() != TheLoop->getHeader())
+ return false;
+
+ // Obtain the reduction start value from the value that comes from the loop
+ // preheader.
+ Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
+
+ // ExitInstruction is the single value which is used outside the loop.
+ // We only allow for a single reduction value to be used outside the loop.
+ // This includes users of the reduction, variables (which form a cycle
+ // which ends in the phi node).
+ Instruction *ExitInstruction = nullptr;
+ // Indicates that we found a reduction operation in our scan.
+ bool FoundReduxOp = false;
+
+ // We start with the PHI node and scan for all of the users of this
+ // instruction. All users must be instructions that can be used as reduction
+ // variables (such as ADD). We must have a single out-of-block user. The cycle
+ // must include the original PHI.
+ bool FoundStartPHI = false;
+
+ // To recognize min/max patterns formed by a icmp select sequence, we store
+ // the number of instruction we saw from the recognized min/max pattern,
+ // to make sure we only see exactly the two instructions.
+ unsigned NumCmpSelectPatternInst = 0;
+ InstDesc ReduxDesc(false, nullptr);
+
+ // Data used for determining if the recurrence has been type-promoted.
+ Type *RecurrenceType = Phi->getType();
+ SmallPtrSet<Instruction *, 4> CastInsts;
+ Instruction *Start = Phi;
+ bool IsSigned = false;
+
+ SmallPtrSet<Instruction *, 8> VisitedInsts;
+ SmallVector<Instruction *, 8> Worklist;
+
+ // Return early if the recurrence kind does not match the type of Phi. If the
+ // recurrence kind is arithmetic, we attempt to look through AND operations
+ // resulting from the type promotion performed by InstCombine. Vector
+ // operations are not limited to the legal integer widths, so we may be able
+ // to evaluate the reduction in the narrower width.
+ if (RecurrenceType->isFloatingPointTy()) {
+ if (!isFloatingPointRecurrenceKind(Kind))
+ return false;
+ } else {
+ if (!isIntegerRecurrenceKind(Kind))
+ return false;
+ if (isArithmeticRecurrenceKind(Kind))
+ Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+ }
+
+ Worklist.push_back(Start);
+ VisitedInsts.insert(Start);
+
+ // A value in the reduction can be used:
+ // - By the reduction:
+ // - Reduction operation:
+ // - One use of reduction value (safe).
+ // - Multiple use of reduction value (not safe).
+ // - PHI:
+ // - All uses of the PHI must be the reduction (safe).
+ // - Otherwise, not safe.
+ // - By instructions outside of the loop (safe).
+ // * One value may have several outside users, but all outside
+ // uses must be of the same value.
+ // - By an instruction that is not part of the reduction (not safe).
+ // This is either:
+ // * An instruction type other than PHI or the reduction operation.
+ // * A PHI in the header other than the initial PHI.
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.back();
+ Worklist.pop_back();
+
+ // No Users.
+ // If the instruction has no users then this is a broken chain and can't be
+ // a reduction variable.
+ if (Cur->use_empty())
+ return false;
+
+ bool IsAPhi = isa<PHINode>(Cur);
+
+ // A header PHI use other than the original PHI.
+ if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+ return false;
+
+ // Reductions of instructions such as Div, and Sub is only possible if the
+ // LHS is the reduction variable.
+ if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+ !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+ !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+ return false;
+
+ // Any reduction instruction must be of one of the allowed kinds. We ignore
+ // the starting value (the Phi or an AND instruction if the Phi has been
+ // type-promoted).
+ if (Cur != Start) {
+ ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+ if (!ReduxDesc.isRecurrence())
+ return false;
+ }
+
+ bool IsASelect = isa<SelectInst>(Cur);
+
+ // A conditional reduction operation must only have 2 or less uses in
+ // VisitedInsts.
+ if (IsASelect && (Kind == RK_FloatAdd || Kind == RK_FloatMult) &&
+ hasMultipleUsesOf(Cur, VisitedInsts, 2))
+ return false;
+
+ // A reduction operation must only have one use of the reduction value.
+ if (!IsAPhi && !IsASelect && Kind != RK_IntegerMinMax &&
+ Kind != RK_FloatMinMax && hasMultipleUsesOf(Cur, VisitedInsts, 1))
+ return false;
+
+ // All inputs to a PHI node must be a reduction value.
+ if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+ return false;
+
+ if (Kind == RK_IntegerMinMax &&
+ (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+
+ // Check whether we found a reduction operator.
+ FoundReduxOp |= !IsAPhi && Cur != Start;
+
+ // Process users of current instruction. Push non-PHI nodes after PHI nodes
+ // onto the stack. This way we are going to have seen all inputs to PHI
+ // nodes once we get to them.
+ SmallVector<Instruction *, 8> NonPHIs;
+ SmallVector<Instruction *, 8> PHIs;
+ for (User *U : Cur->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Check if we found the exit user.
+ BasicBlock *Parent = UI->getParent();
+ if (!TheLoop->contains(Parent)) {
+ // If we already know this instruction is used externally, move on to
+ // the next user.
+ if (ExitInstruction == Cur)
+ continue;
+
+ // Exit if you find multiple values used outside or if the header phi
+ // node is being used. In this case the user uses the value of the
+ // previous iteration, in which case we would loose "VF-1" iterations of
+ // the reduction operation if we vectorize.
+ if (ExitInstruction != nullptr || Cur == Phi)
+ return false;
+
+ // The instruction used by an outside user must be the last instruction
+ // before we feed back to the reduction phi. Otherwise, we loose VF-1
+ // operations on the value.
+ if (!is_contained(Phi->operands(), Cur))
+ return false;
+
+ ExitInstruction = Cur;
+ continue;
+ }
+
+ // Process instructions only once (termination). Each reduction cycle
+ // value must only be used once, except by phi nodes and min/max
+ // reductions which are represented as a cmp followed by a select.
+ InstDesc IgnoredVal(false, nullptr);
+ if (VisitedInsts.insert(UI).second) {
+ if (isa<PHINode>(UI))
+ PHIs.push_back(UI);
+ else
+ NonPHIs.push_back(UI);
+ } else if (!isa<PHINode>(UI) &&
+ ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
+ !isa<SelectInst>(UI)) ||
+ (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
+ !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())))
+ return false;
+
+ // Remember that we completed the cycle.
+ if (UI == Phi)
+ FoundStartPHI = true;
+ }
+ Worklist.append(PHIs.begin(), PHIs.end());
+ Worklist.append(NonPHIs.begin(), NonPHIs.end());
+ }
+
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
+ return false;
+
+ if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+ return false;
+
+ if (Start != Phi) {
+ // If the starting value is not the same as the phi node, we speculatively
+ // looked through an 'and' instruction when evaluating a potential
+ // arithmetic reduction to determine if it may have been type-promoted.
+ //
+ // We now compute the minimal bit width that is required to represent the
+ // reduction. If this is the same width that was indicated by the 'and', we
+ // can represent the reduction in the smaller type. The 'and' instruction
+ // will be eliminated since it will essentially be a cast instruction that
+ // can be ignore in the cost model. If we compute a different type than we
+ // did when evaluating the 'and', the 'and' will not be eliminated, and we
+ // will end up with different kinds of operations in the recurrence
+ // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
+ // the case.
+ //
+ // The vectorizer relies on InstCombine to perform the actual
+ // type-shrinking. It does this by inserting instructions to truncate the
+ // exit value of the reduction to the width indicated by RecurrenceType and
+ // then extend this value back to the original width. If IsSigned is false,
+ // a 'zext' instruction will be generated; otherwise, a 'sext' will be
+ // used.
+ //
+ // TODO: We should not rely on InstCombine to rewrite the reduction in the
+ // smaller type. We should just generate a correctly typed expression
+ // to begin with.
+ Type *ComputedType;
+ std::tie(ComputedType, IsSigned) =
+ computeRecurrenceType(ExitInstruction, DB, AC, DT);
+ if (ComputedType != RecurrenceType)
+ return false;
+
+ // The recurrence expression will be represented in a narrower type. If
+ // there are any cast instructions that will be unnecessary, collect them
+ // in CastInsts. Note that the 'and' instruction was already included in
+ // this list.
+ //
+ // TODO: A better way to represent this may be to tag in some way all the
+ // instructions that are a part of the reduction. The vectorizer cost
+ // model could then apply the recurrence type to these instructions,
+ // without needing a white list of instructions to ignore.
+ collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
+ }
+
+ // We found a reduction var if we have reached the original phi node and we
+ // only have a single instruction with out-of-loop users.
+
+ // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
+ // is saved as part of the RecurrenceDescriptor.
+
+ // Save the description of this reduction variable.
+ RecurrenceDescriptor RD(
+ RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+ ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
+ RedDes = RD;
+
+ return true;
+}
+
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
+
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+ "Expect a select instruction");
+ Instruction *Cmp = nullptr;
+ SelectInst *Select = nullptr;
+
+ // We must handle the select(cmp()) as a single instruction. Advance to the
+ // select.
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+ if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
+ return InstDesc(false, I);
+ return InstDesc(Select, Prev.getMinMaxKind());
+ }
+
+ // Only handle single use cases for now.
+ if (!(Select = dyn_cast<SelectInst>(I)))
+ return InstDesc(false, I);
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+ return InstDesc(false, I);
+ if (!Cmp->hasOneUse())
+ return InstDesc(false, I);
+
+ Value *CmpLeft;
+ Value *CmpRight;
+
+ // Look for a min/max pattern.
+ if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_UIntMin);
+ else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_UIntMax);
+ else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_SIntMax);
+ else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMax);
+
+ return InstDesc(false, I);
+}
+
+/// Returns true if the select instruction has users in the compare-and-add
+/// reduction pattern below. The select instruction argument is the last one
+/// in the sequence.
+///
+/// %sum.1 = phi ...
+/// ...
+/// %cmp = fcmp pred %0, %CFP
+/// %add = fadd %0, %sum.1
+/// %sum.2 = select %cmp, %add, %sum.1
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isConditionalRdxPattern(
+ RecurrenceKind Kind, Instruction *I) {
+ SelectInst *SI = dyn_cast<SelectInst>(I);
+ if (!SI)
+ return InstDesc(false, I);
+
+ CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
+ // Only handle single use cases for now.
+ if (!CI || !CI->hasOneUse())
+ return InstDesc(false, I);
+
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+ // Handle only when either of operands of select instruction is a PHI
+ // node for now.
+ if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||
+ (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))
+ return InstDesc(false, I);
+
+ Instruction *I1 =
+ isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)
+ : dyn_cast<Instruction>(TrueVal);
+ if (!I1 || !I1->isBinaryOp())
+ return InstDesc(false, I);
+
+ Value *Op1, *Op2;
+ if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
+ m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
+ I1->isFast())
+ return InstDesc(Kind == RK_FloatAdd, SI);
+
+ if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
+ return InstDesc(Kind == RK_FloatMult, SI);
+
+ return InstDesc(false, I);
+}
+
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+ InstDesc &Prev, bool HasFunNoNaNAttr) {
+ bool FP = I->getType()->isFloatingPointTy();
+ Instruction *UAI = Prev.getUnsafeAlgebraInst();
+ if (!UAI && FP && !I->isFast())
+ UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
+
+ switch (I->getOpcode()) {
+ default:
+ return InstDesc(false, I);
+ case Instruction::PHI:
+ return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
+ case Instruction::Sub:
+ case Instruction::Add:
+ return InstDesc(Kind == RK_IntegerAdd, I);
+ case Instruction::Mul:
+ return InstDesc(Kind == RK_IntegerMult, I);
+ case Instruction::And:
+ return InstDesc(Kind == RK_IntegerAnd, I);
+ case Instruction::Or:
+ return InstDesc(Kind == RK_IntegerOr, I);
+ case Instruction::Xor:
+ return InstDesc(Kind == RK_IntegerXor, I);
+ case Instruction::FMul:
+ return InstDesc(Kind == RK_FloatMult, I, UAI);
+ case Instruction::FSub:
+ case Instruction::FAdd:
+ return InstDesc(Kind == RK_FloatAdd, I, UAI);
+ case Instruction::Select:
+ if (Kind == RK_FloatAdd || Kind == RK_FloatMult)
+ return isConditionalRdxPattern(Kind, I);
+ LLVM_FALLTHROUGH;
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+ return InstDesc(false, I);
+ return isMinMaxSelectCmpPattern(I, Prev);
+ }
+}
+
+bool RecurrenceDescriptor::hasMultipleUsesOf(
+ Instruction *I, SmallPtrSetImpl<Instruction *> &Insts,
+ unsigned MaxNumUses) {
+ unsigned NumUses = 0;
+ for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
+ ++Use) {
+ if (Insts.count(dyn_cast<Instruction>(*Use)))
+ ++NumUses;
+ if (NumUses > MaxNumUses)
+ return true;
+ }
+
+ return false;
+}
+bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB, AssumptionCache *AC,
+ DominatorTree *DT) {
+
+ BasicBlock *Header = TheLoop->getHeader();
+ Function &F = *Header->getParent();
+ bool HasFunNoNaNAttr =
+ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
+
+ if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi
+ << "\n");
+ return true;
+ }
+ // Not a reduction of known type.
+ return false;
+}
+
+bool RecurrenceDescriptor::isFirstOrderRecurrence(
+ PHINode *Phi, Loop *TheLoop,
+ DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
+
+ // Ensure the phi node is in the loop header and has two incoming values.
+ if (Phi->getParent() != TheLoop->getHeader() ||
+ Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Ensure the loop has a preheader and a single latch block. The loop
+ // vectorizer will need the latch to set up the next iteration of the loop.
+ auto *Preheader = TheLoop->getLoopPreheader();
+ auto *Latch = TheLoop->getLoopLatch();
+ if (!Preheader || !Latch)
+ return false;
+
+ // Ensure the phi node's incoming blocks are the loop preheader and latch.
+ if (Phi->getBasicBlockIndex(Preheader) < 0 ||
+ Phi->getBasicBlockIndex(Latch) < 0)
+ return false;
+
+ // Get the previous value. The previous value comes from the latch edge while
+ // the initial value comes form the preheader edge.
+ auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
+ SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
+ return false;
+
+ // Ensure every user of the phi node is dominated by the previous value.
+ // The dominance requirement ensures the loop vectorizer will not need to
+ // vectorize the initial value prior to the first iteration of the loop.
+ // TODO: Consider extending this sinking to handle other kinds of instructions
+ // and expressions, beyond sinking a single cast past Previous.
+ if (Phi->hasOneUse()) {
+ auto *I = Phi->user_back();
+ if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
+ DT->dominates(Previous, I->user_back())) {
+ if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
+ SinkAfter[I] = Previous;
+ return true;
+ }
+ }
+
+ for (User *U : Phi->users())
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (!DT->dominates(Previous, I))
+ return false;
+ }
+
+ return true;
+}
+
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
+ Type *Tp) {
+ switch (K) {
+ case RK_IntegerXor:
+ case RK_IntegerAdd:
+ case RK_IntegerOr:
+ // Adding, Xoring, Oring zero to a number does not change it.
+ return ConstantInt::get(Tp, 0);
+ case RK_IntegerMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantInt::get(Tp, 1);
+ case RK_IntegerAnd:
+ // AND-ing a number with an all-1 value does not change it.
+ return ConstantInt::get(Tp, -1, true);
+ case RK_FloatMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantFP::get(Tp, 1.0L);
+ case RK_FloatAdd:
+ // Adding zero to a number does not change it.
+ return ConstantFP::get(Tp, 0.0L);
+ default:
+ llvm_unreachable("Unknown recurrence kind");
+ }
+}
+
+/// This function translates the recurrence kind to an LLVM binary operator.
+unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
+ switch (Kind) {
+ case RK_IntegerAdd:
+ return Instruction::Add;
+ case RK_IntegerMult:
+ return Instruction::Mul;
+ case RK_IntegerOr:
+ return Instruction::Or;
+ case RK_IntegerAnd:
+ return Instruction::And;
+ case RK_IntegerXor:
+ return Instruction::Xor;
+ case RK_FloatMult:
+ return Instruction::FMul;
+ case RK_FloatAdd:
+ return Instruction::FAdd;
+ case RK_IntegerMinMax:
+ return Instruction::ICmp;
+ case RK_FloatMinMax:
+ return Instruction::FCmp;
+ default:
+ llvm_unreachable("Unknown recurrence operation");
+ }
+}
+
+InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
+ const SCEV *Step, BinaryOperator *BOp,
+ SmallVectorImpl<Instruction *> *Casts)
+ : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
+ assert(IK != IK_NoInduction && "Not an induction");
+
+ // Start value type should match the induction kind and the value
+ // itself should not be null.
+ assert(StartValue && "StartValue is null");
+ assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+ "StartValue is not a pointer for pointer induction");
+ assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+ "StartValue is not an integer for integer induction");
+
+ // Check the Step Value. It should be non-zero integer value.
+ assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
+ "Step value is zero");
+
+ assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
+ "Step value should be constant for pointer induction");
+ assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
+ "StepValue is not an integer");
+
+ assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
+ "StepValue is not FP for FpInduction");
+ assert((IK != IK_FpInduction ||
+ (InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub))) &&
+ "Binary opcode should be specified for FP induction");
+
+ if (Casts) {
+ for (auto &Inst : *Casts) {
+ RedundantCasts.push_back(Inst);
+ }
+ }
+}
+
+int InductionDescriptor::getConsecutiveDirection() const {
+ ConstantInt *ConstStep = getConstIntStepValue();
+ if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
+ return ConstStep->getSExtValue();
+ return 0;
+}
+
+ConstantInt *InductionDescriptor::getConstIntStepValue() const {
+ if (isa<SCEVConstant>(Step))
+ return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
+ return nullptr;
+}
+
+bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
+ ScalarEvolution *SE,
+ InductionDescriptor &D) {
+
+ // Here we only handle FP induction variables.
+ assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
+
+ if (TheLoop->getHeader() != Phi->getParent())
+ return false;
+
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi if it has a unique entry value and a unique backedge value.
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+ Value *BEValue = nullptr, *StartValue = nullptr;
+ if (TheLoop->contains(Phi->getIncomingBlock(0))) {
+ BEValue = Phi->getIncomingValue(0);
+ StartValue = Phi->getIncomingValue(1);
+ } else {
+ assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
+ "Unexpected Phi node in the loop");
+ BEValue = Phi->getIncomingValue(1);
+ StartValue = Phi->getIncomingValue(0);
+ }
+
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
+ if (!BOp)
+ return false;
+
+ Value *Addend = nullptr;
+ if (BOp->getOpcode() == Instruction::FAdd) {
+ if (BOp->getOperand(0) == Phi)
+ Addend = BOp->getOperand(1);
+ else if (BOp->getOperand(1) == Phi)
+ Addend = BOp->getOperand(0);
+ } else if (BOp->getOpcode() == Instruction::FSub)
+ if (BOp->getOperand(0) == Phi)
+ Addend = BOp->getOperand(1);
+
+ if (!Addend)
+ return false;
+
+ // The addend should be loop invariant
+ if (auto *I = dyn_cast<Instruction>(Addend))
+ if (TheLoop->contains(I))
+ return false;
+
+ // FP Step has unknown SCEV
+ const SCEV *Step = SE->getUnknown(Addend);
+ D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
+ return true;
+}
+
+/// This function is called when we suspect that the update-chain of a phi node
+/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
+/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
+/// predicate P under which the SCEV expression for the phi can be the
+/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
+/// cast instructions that are involved in the update-chain of this induction.
+/// A caller that adds the required runtime predicate can be free to drop these
+/// cast instructions, and compute the phi using \p AR (instead of some scev
+/// expression with casts).
+///
+/// For example, without a predicate the scev expression can take the following
+/// form:
+/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
+///
+/// It corresponds to the following IR sequence:
+/// %for.body:
+/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+/// %casted_phi = "ExtTrunc i64 %x"
+/// %add = add i64 %casted_phi, %step
+///
+/// where %x is given in \p PN,
+/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
+/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
+/// several forms, for example, such as:
+/// ExtTrunc1: %casted_phi = and %x, 2^n-1
+/// or:
+/// ExtTrunc2: %t = shl %x, m
+/// %casted_phi = ashr %t, m
+///
+/// If we are able to find such sequence, we return the instructions
+/// we found, namely %casted_phi and the instructions on its use-def chain up
+/// to the phi (not including the phi).
+static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
+ const SCEVUnknown *PhiScev,
+ const SCEVAddRecExpr *AR,
+ SmallVectorImpl<Instruction *> &CastInsts) {
+
+ assert(CastInsts.empty() && "CastInsts is expected to be empty.");
+ auto *PN = cast<PHINode>(PhiScev->getValue());
+ assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
+ const Loop *L = AR->getLoop();
+
+ // Find any cast instructions that participate in the def-use chain of
+ // PhiScev in the loop.
+ // FORNOW/TODO: We currently expect the def-use chain to include only
+ // two-operand instructions, where one of the operands is an invariant.
+ // createAddRecFromPHIWithCasts() currently does not support anything more
+ // involved than that, so we keep the search simple. This can be
+ // extended/generalized as needed.
+
+ auto getDef = [&](const Value *Val) -> Value * {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
+ if (!BinOp)
+ return nullptr;
+ Value *Op0 = BinOp->getOperand(0);
+ Value *Op1 = BinOp->getOperand(1);
+ Value *Def = nullptr;
+ if (L->isLoopInvariant(Op0))
+ Def = Op1;
+ else if (L->isLoopInvariant(Op1))
+ Def = Op0;
+ return Def;
+ };
+
+ // Look for the instruction that defines the induction via the
+ // loop backedge.
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+ Value *Val = PN->getIncomingValueForBlock(Latch);
+ if (!Val)
+ return false;
+
+ // Follow the def-use chain until the induction phi is reached.
+ // If on the way we encounter a Value that has the same SCEV Expr as the
+ // phi node, we can consider the instructions we visit from that point
+ // as part of the cast-sequence that can be ignored.
+ bool InCastSequence = false;
+ auto *Inst = dyn_cast<Instruction>(Val);
+ while (Val != PN) {
+ // If we encountered a phi node other than PN, or if we left the loop,
+ // we bail out.
+ if (!Inst || !L->contains(Inst)) {
+ return false;
+ }
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
+ if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
+ InCastSequence = true;
+ if (InCastSequence) {
+ // Only the last instruction in the cast sequence is expected to have
+ // uses outside the induction def-use chain.
+ if (!CastInsts.empty())
+ if (!Inst->hasOneUse())
+ return false;
+ CastInsts.push_back(Inst);
+ }
+ Val = getDef(Val);
+ if (!Val)
+ return false;
+ Inst = dyn_cast<Instruction>(Val);
+ }
+
+ return InCastSequence;
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
+ PredicatedScalarEvolution &PSE,
+ InductionDescriptor &D, bool Assume) {
+ Type *PhiTy = Phi->getType();
+
+ // Handle integer and pointer inductions variables.
+ // Now we handle also FP induction but not trying to make a
+ // recurrent expression from the PHI node in-place.
+
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
+ !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
+ return false;
+
+ if (PhiTy->isFloatingPointTy())
+ return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
+
+ const SCEV *PhiScev = PSE.getSCEV(Phi);
+ const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+ // We need this expression to be an AddRecExpr.
+ if (Assume && !AR)
+ AR = PSE.getAsAddRec(Phi);
+
+ if (!AR) {
+ LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ // Record any Cast instructions that participate in the induction update
+ const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
+ // If we started from an UnknownSCEV, and managed to build an addRecurrence
+ // only after enabling Assume with PSCEV, this means we may have encountered
+ // cast instructions that required adding a runtime check in order to
+ // guarantee the correctness of the AddRecurence respresentation of the
+ // induction.
+ if (PhiScev != AR && SymbolicPhi) {
+ SmallVector<Instruction *, 2> Casts;
+ if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
+ return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
+ }
+
+ return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
+}
+
+bool InductionDescriptor::isInductionPHI(
+ PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
+ InductionDescriptor &D, const SCEV *Expr,
+ SmallVectorImpl<Instruction *> *CastsToIgnore) {
+ Type *PhiTy = Phi->getType();
+ // We only handle integer and pointer inductions variables.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return false;
+
+ // Check that the PHI is consecutive.
+ const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+ if (!AR) {
+ LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ if (AR->getLoop() != TheLoop) {
+ // FIXME: We should treat this as a uniform. Unfortunately, we
+ // don't currently know how to handled uniform PHIs.
+ LLVM_DEBUG(
+ dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
+ return false;
+ }
+
+ Value *StartValue =
+ Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+ // Calculate the pointer stride and check if it is consecutive.
+ // The stride may be a constant or a loop invariant integer value.
+ const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+ if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
+ return false;
+
+ if (PhiTy->isIntegerTy()) {
+ D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr,
+ CastsToIgnore);
+ return true;
+ }
+
+ assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+ // Pointer induction should be a constant.
+ if (!ConstStep)
+ return false;
+
+ ConstantInt *CV = ConstStep->getValue();
+ Type *PointerElementType = PhiTy->getPointerElementType();
+ // The pointer stride cannot be determined if the pointer element type is not
+ // sized.
+ if (!PointerElementType->isSized())
+ return false;
+
+ const DataLayout &DL = Phi->getModule()->getDataLayout();
+ int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+ if (!Size)
+ return false;
+
+ int64_t CVSize = CV->getSExtValue();
+ if (CVSize % Size)
+ return false;
+ auto *StepValue =
+ SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
+ return true;
+}
diff --git a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 4659c0a00629..d6e6e76af03c 100644
--- a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -15,7 +15,7 @@
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index a6cccc3b5910..6ddb3cbc01a3 100644
--- a/contrib/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -30,6 +31,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/InstVisitor.h"
@@ -137,7 +139,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool HasReturn;
bool HasIndirectBr;
bool HasUninlineableIntrinsic;
- bool UsesVarArgs;
+ bool InitsVargArgs;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -227,7 +229,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
BlockFrequencyInfo *CallerBFI);
// Custom analysis routines.
- bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
+ InlineResult analyzeBlock(BasicBlock *BB,
+ SmallPtrSetImpl<const Value *> &EphValues);
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
@@ -282,7 +285,7 @@ public:
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- HasUninlineableIntrinsic(false), UsesVarArgs(false), AllocatedSize(0),
+ HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
@@ -290,7 +293,7 @@ public:
NumInstructionsSimplified(0), SROACostSavings(0),
SROACostSavingsLost(0) {}
- bool analyzeCall(CallSite CS);
+ InlineResult analyzeCall(CallSite CS);
int getThreshold() { return Threshold; }
int getCost() { return Cost; }
@@ -719,6 +722,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
case Instruction::FPToSI:
if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
Cost += InlineConstants::CallPenalty;
+ break;
default:
break;
}
@@ -1238,8 +1242,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
HasUninlineableIntrinsic = true;
return false;
case Intrinsic::vastart:
- case Intrinsic::vaend:
- UsesVarArgs = true;
+ InitsVargArgs = true;
return false;
}
}
@@ -1541,8 +1544,9 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// aborts early if the threshold has been exceeded or an impossible to inline
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
-bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
- SmallPtrSetImpl<const Value *> &EphValues) {
+InlineResult
+CallAnalyzer::analyzeBlock(BasicBlock *BB,
+ SmallPtrSetImpl<const Value *> &EphValues) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
// FIXME: Currently, the number of instructions in a function regardless of
// our ability to simplify them during inline to constants or dead code,
@@ -1574,16 +1578,29 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr || HasUninlineableIntrinsic || UsesVarArgs) {
+ InlineResult IR;
+ if (IsRecursiveCall)
+ IR = "recursive";
+ else if (ExposesReturnsTwice)
+ IR = "exposes returns twice";
+ else if (HasDynamicAlloca)
+ IR = "dynamic alloca";
+ else if (HasIndirectBr)
+ IR = "indirect branch";
+ else if (HasUninlineableIntrinsic)
+ IR = "uninlinable intrinsic";
+ else if (InitsVargArgs)
+ IR = "varargs";
+ if (!IR) {
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
CandidateCS.getInstruction())
- << NV("Callee", &F)
- << " has uninlinable pattern and cost is not fully computed";
+ << NV("Callee", &F) << " has uninlinable pattern ("
+ << NV("InlineResult", IR.message)
+ << ") and cost is not fully computed";
});
- return false;
+ return IR;
}
// If the caller is a recursive function then we don't want to inline
@@ -1591,15 +1608,15 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
// the caller stack usage dramatically.
if (IsCallerRecursive &&
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
+ InlineResult IR = "recursive and allocates too much stack space";
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
CandidateCS.getInstruction())
- << NV("Callee", &F)
- << " is recursive and allocates too much stack space. Cost is "
- "not fully computed";
+ << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
+ << ". Cost is not fully computed";
});
- return false;
+ return IR;
}
// Check if we've past the maximum possible threshold so we don't spin in
@@ -1695,7 +1712,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
/// some artifact of the routine.
-bool CallAnalyzer::analyzeCall(CallSite CS) {
+InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
++NumCallsAnalyzed;
// Perform some tweaks to the cost and threshold based on the direct
@@ -1714,6 +1731,13 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Update the threshold based on callsite properties
updateThreshold(CS, F);
+ // While Threshold depends on commandline options that can take negative
+ // values, we want to enforce the invariant that the computed threshold and
+ // bonuses are non-negative.
+ assert(Threshold >= 0);
+ assert(SingleBBBonus >= 0);
+ assert(VectorBonus >= 0);
+
// Speculatively apply all possible bonuses to Threshold. If cost exceeds
// this Threshold any time, and cost cannot decrease, we can stop processing
// the rest of the function body.
@@ -1730,7 +1754,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Check if we're done. This can happen due to bonuses and penalties.
if (Cost >= Threshold && !ComputeFullInlineCost)
- return false;
+ return "high cost";
if (F.empty())
return true;
@@ -1809,14 +1833,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// site. If the blockaddress escapes the function, e.g., via a global
// variable, inlining may lead to an invalid cross-function reference.
if (BB->hasAddressTaken())
- return false;
+ return "blockaddress";
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
- if (!analyzeBlock(BB, EphValues))
- return false;
+ InlineResult IR = analyzeBlock(BB, EphValues);
+ if (!IR)
+ return IR;
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
@@ -1867,7 +1892,25 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
- return false;
+ return "noduplicate";
+
+ // Loops generally act a lot like calls in that they act like barriers to
+ // movement, require a certain amount of setup, etc. So when optimising for
+ // size, we penalise any call sites that perform loops. We do this after all
+ // other costs here, so will likely only be dealing with relatively small
+ // functions (and hence DT and LI will hopefully be cheap).
+ if (Caller->optForMinSize()) {
+ DominatorTree DT(F);
+ LoopInfo LI(DT);
+ int NumLoops = 0;
+ for (Loop *L : LI) {
+ // Ignore loops that will not be executed
+ if (DeadBlocks.count(L->getHeader()))
+ continue;
+ NumLoops++;
+ }
+ Cost += NumLoops * InlineConstants::CallPenalty;
+ }
// We applied the maximum possible vector bonus at the beginning. Now,
// subtract the excess bonus, if any, from the Threshold before
@@ -1961,7 +2004,7 @@ InlineCost llvm::getInlineCost(
// Cannot inline indirect calls.
if (!Callee)
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("indirect call");
// Never inline calls with byval arguments that does not have the alloca
// address space. Since byval arguments can be replaced with a copy to an
@@ -1973,54 +2016,59 @@ InlineCost llvm::getInlineCost(
if (CS.isByValArgument(I)) {
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
if (PTy->getAddressSpace() != AllocaAS)
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("byval arguments without alloca"
+ " address space");
}
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
if (CS.hasFnAttr(Attribute::AlwaysInline)) {
if (isInlineViable(*Callee))
- return llvm::InlineCost::getAlways();
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getAlways("always inline attribute");
+ return llvm::InlineCost::getNever("inapplicable always inline attribute");
}
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
Function *Caller = CS.getCaller();
if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("conflicting attributes");
// Don't inline this call if the caller has the optnone attribute.
if (Caller->hasFnAttribute(Attribute::OptimizeNone))
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("optnone attribute");
// Don't inline a function that treats null pointer as valid into a caller
// that does not have this attribute.
if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("nullptr definitions incompatible");
+
+ // Don't inline functions which can be interposed at link-time.
+ if (Callee->isInterposable())
+ return llvm::InlineCost::getNever("interposable");
+
+ // Don't inline functions marked noinline.
+ if (Callee->hasFnAttribute(Attribute::NoInline))
+ return llvm::InlineCost::getNever("noinline function attribute");
- // Don't inline functions which can be interposed at link-time. Don't inline
- // functions marked noinline or call sites marked noinline.
- // Note: inlining non-exact non-interposable functions is fine, since we know
- // we have *a* correct implementation of the source level function.
- if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
- CS.isNoInline())
- return llvm::InlineCost::getNever();
+ // Don't inline call sites marked noinline.
+ if (CS.isNoInline())
+ return llvm::InlineCost::getNever("noinline call site attribute");
LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "... (caller:" << Caller->getName() << ")\n");
CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
Params);
- bool ShouldInline = CA.analyzeCall(CS);
+ InlineResult ShouldInline = CA.analyzeCall(CS);
LLVM_DEBUG(CA.dump());
// Check if there was a reason to force inlining or no inlining.
if (!ShouldInline && CA.getCost() < CA.getThreshold())
- return InlineCost::getNever();
+ return InlineCost::getNever(ShouldInline.message);
if (ShouldInline && CA.getCost() >= CA.getThreshold())
- return InlineCost::getAlways();
+ return InlineCost::getAlways("empty function");
return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
@@ -2058,9 +2106,8 @@ bool llvm::isInlineViable(Function &F) {
// Disallow inlining functions that call @llvm.localescape. Doing this
// correctly would require major changes to the inliner.
case llvm::Intrinsic::localescape:
- // Disallow inlining of functions that access VarArgs.
+ // Disallow inlining of functions that initialize VarArgs with va_start.
case llvm::Intrinsic::vastart:
- case llvm::Intrinsic::vaend:
return false;
}
}
diff --git a/contrib/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/contrib/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
new file mode 100644
index 000000000000..816126f407ca
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -0,0 +1,157 @@
+//===-- InstructionPrecedenceTracking.cpp -----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implements a class that is able to define some instructions as "special"
+// (e.g. as having implicit control flow, or writing memory, or having another
+// interesting property) and then efficiently answers queries of the types:
+// 1. Are there any special instructions in the block of interest?
+// 2. Return first of the special instructions in the given block;
+// 3. Check if the given instruction is preceeded by the first special
+// instruction in the same block.
+// The class provides caching that allows to answer these queries quickly. The
+// user must make sure that the cached data is invalidated properly whenever
+// a content of some tracked block is changed.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool> ExpensiveAsserts(
+ "ipt-expensive-asserts",
+ cl::desc("Perform expensive assert validation on every query to Instruction"
+ " Precedence Tracking"),
+ cl::init(false), cl::Hidden);
+#endif
+
+const Instruction *InstructionPrecedenceTracking::getFirstSpecialInstruction(
+ const BasicBlock *BB) {
+#ifndef NDEBUG
+ // If there is a bug connected to invalid cache, turn on ExpensiveAsserts to
+ // catch this situation as early as possible.
+ if (ExpensiveAsserts)
+ validateAll();
+ else
+ validate(BB);
+#endif
+
+ if (FirstSpecialInsts.find(BB) == FirstSpecialInsts.end()) {
+ fill(BB);
+ assert(FirstSpecialInsts.find(BB) != FirstSpecialInsts.end() && "Must be!");
+ }
+ return FirstSpecialInsts[BB];
+}
+
+bool InstructionPrecedenceTracking::hasSpecialInstructions(
+ const BasicBlock *BB) {
+ return getFirstSpecialInstruction(BB) != nullptr;
+}
+
+bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction(
+ const Instruction *Insn) {
+ const Instruction *MaybeFirstSpecial =
+ getFirstSpecialInstruction(Insn->getParent());
+ return MaybeFirstSpecial && OI.dominates(MaybeFirstSpecial, Insn);
+}
+
+void InstructionPrecedenceTracking::fill(const BasicBlock *BB) {
+ FirstSpecialInsts.erase(BB);
+ for (auto &I : *BB)
+ if (isSpecialInstruction(&I)) {
+ FirstSpecialInsts[BB] = &I;
+ return;
+ }
+
+ // Mark this block as having no special instructions.
+ FirstSpecialInsts[BB] = nullptr;
+}
+
+#ifndef NDEBUG
+void InstructionPrecedenceTracking::validate(const BasicBlock *BB) const {
+ auto It = FirstSpecialInsts.find(BB);
+ // Bail if we don't have anything cached for this block.
+ if (It == FirstSpecialInsts.end())
+ return;
+
+ for (const Instruction &Insn : *BB)
+ if (isSpecialInstruction(&Insn)) {
+ assert(It->second == &Insn &&
+ "Cached first special instruction is wrong!");
+ return;
+ }
+
+ assert(It->second == nullptr &&
+ "Block is marked as having special instructions but in fact it has "
+ "none!");
+}
+
+void InstructionPrecedenceTracking::validateAll() const {
+ // Check that for every known block the cached value is correct.
+ for (auto &It : FirstSpecialInsts)
+ validate(It.first);
+}
+#endif
+
+void InstructionPrecedenceTracking::insertInstructionTo(const Instruction *Inst,
+ const BasicBlock *BB) {
+ if (isSpecialInstruction(Inst))
+ FirstSpecialInsts.erase(BB);
+ OI.invalidateBlock(BB);
+}
+
+void InstructionPrecedenceTracking::removeInstruction(const Instruction *Inst) {
+ if (isSpecialInstruction(Inst))
+ FirstSpecialInsts.erase(Inst->getParent());
+ OI.invalidateBlock(Inst->getParent());
+}
+
+void InstructionPrecedenceTracking::clear() {
+ for (auto It : FirstSpecialInsts)
+ OI.invalidateBlock(It.first);
+ FirstSpecialInsts.clear();
+#ifndef NDEBUG
+ // The map should be valid after clearing (at least empty).
+ validateAll();
+#endif
+}
+
+bool ImplicitControlFlowTracking::isSpecialInstruction(
+ const Instruction *Insn) const {
+ // If a block's instruction doesn't always pass the control to its successor
+ // instruction, mark the block as having implicit control flow. We use them
+ // to avoid wrong assumptions of sort "if A is executed and B post-dominates
+ // A, then B is also executed". This is not true is there is an implicit
+ // control flow instruction (e.g. a guard) between them.
+ //
+ // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
+ // for volatile stores and loads because they can trap. The discussion on
+ // whether or not it is correct is still ongoing. We might want to get rid
+ // of this logic in the future. Anyways, trapping instructions shouldn't
+ // introduce implicit control flow, so we explicitly allow them here. This
+ // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
+ if (isGuaranteedToTransferExecutionToSuccessor(Insn))
+ return false;
+ if (isa<LoadInst>(Insn)) {
+ assert(cast<LoadInst>(Insn)->isVolatile() &&
+ "Non-volatile load should transfer execution to successor!");
+ return false;
+ }
+ if (isa<StoreInst>(Insn)) {
+ assert(cast<StoreInst>(Insn)->isVolatile() &&
+ "Non-volatile store should transfer execution to successor!");
+ return false;
+ }
+ return true;
+}
+
+bool MemoryWriteTracking::isSpecialInstruction(
+ const Instruction *Insn) const {
+ return Insn->mayWriteToMemory();
+}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 5e72798d459a..ccf907c144f0 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -861,8 +861,10 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// (X / Y) * Y -> X if the division is exact.
Value *X = nullptr;
- if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
- match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y)
+ if (Q.IIQ.UseInstrInfo &&
+ (match(Op0,
+ m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
+ match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0)))))) // Y * (X / Y)
return X;
// i1 mul -> and.
@@ -1035,8 +1037,8 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) {
auto *Mul = cast<OverflowingBinaryOperator>(Op0);
// If the Mul does not overflow, then we are good to go.
- if ((IsSigned && Mul->hasNoSignedWrap()) ||
- (!IsSigned && Mul->hasNoUnsignedWrap()))
+ if ((IsSigned && Q.IIQ.hasNoSignedWrap(Mul)) ||
+ (!IsSigned && Q.IIQ.hasNoUnsignedWrap(Mul)))
return X;
// If X has the form X = A / Y, then X * Y cannot overflow.
if ((IsSigned && match(X, m_SDiv(m_Value(), m_Specific(Op1)))) ||
@@ -1094,10 +1096,11 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
return Op0;
// (X << Y) % X -> 0
- if ((Opcode == Instruction::SRem &&
- match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) ||
- (Opcode == Instruction::URem &&
- match(Op0, m_NUWShl(m_Specific(Op1), m_Value()))))
+ if (Q.IIQ.UseInstrInfo &&
+ ((Opcode == Instruction::SRem &&
+ match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) ||
+ (Opcode == Instruction::URem &&
+ match(Op0, m_NUWShl(m_Specific(Op1), m_Value())))))
return Constant::getNullValue(Op0->getType());
// If the operation is with the result of a select instruction, check whether
@@ -1295,7 +1298,8 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// (X >> A) << A -> X
Value *X;
- if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
+ if (Q.IIQ.UseInstrInfo &&
+ match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
return X;
// shl nuw i8 C, %x -> C iff C has sign bit set.
@@ -1365,7 +1369,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
// (X << A) >> A -> X
Value *X;
- if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
+ if (Q.IIQ.UseInstrInfo && match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
return X;
// Arithmetic shifting an all-sign-bit value is a no-op.
@@ -1552,7 +1556,8 @@ static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
}
-static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
+ const InstrInfoQuery &IIQ) {
// (icmp (add V, C0), C1) & (icmp V, C0)
ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
@@ -1563,13 +1568,13 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
return nullptr;
- auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+ auto *AddInst = cast<OverflowingBinaryOperator>(Op0->getOperand(0));
if (AddInst->getOperand(1) != Op1->getOperand(1))
return nullptr;
Type *ITy = Op0->getType();
- bool isNSW = AddInst->hasNoSignedWrap();
- bool isNUW = AddInst->hasNoUnsignedWrap();
+ bool isNSW = IIQ.hasNoSignedWrap(AddInst);
+ bool isNUW = IIQ.hasNoUnsignedWrap(AddInst);
const APInt Delta = *C1 - *C0;
if (C0->isStrictlyPositive()) {
@@ -1598,7 +1603,8 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
+ const InstrInfoQuery &IIQ) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
return X;
if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
@@ -1615,15 +1621,16 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
return X;
- if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1))
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ))
return X;
- if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0))
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ))
return X;
return nullptr;
}
-static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
+ const InstrInfoQuery &IIQ) {
// (icmp (add V, C0), C1) | (icmp V, C0)
ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
@@ -1639,8 +1646,8 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
Type *ITy = Op0->getType();
- bool isNSW = AddInst->hasNoSignedWrap();
- bool isNUW = AddInst->hasNoUnsignedWrap();
+ bool isNSW = IIQ.hasNoSignedWrap(AddInst);
+ bool isNUW = IIQ.hasNoUnsignedWrap(AddInst);
const APInt Delta = *C1 - *C0;
if (C0->isStrictlyPositive()) {
@@ -1669,7 +1676,8 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
+ const InstrInfoQuery &IIQ) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
return X;
if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
@@ -1686,15 +1694,16 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
return X;
- if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1))
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ))
return X;
- if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0))
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ))
return X;
return nullptr;
}
-static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
+static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI,
+ FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
if (LHS0->getType() != RHS0->getType())
@@ -1711,8 +1720,8 @@ static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
// (fcmp uno NNAN, X) | (fcmp uno Y, X) --> fcmp uno Y, X
// (fcmp uno X, NNAN) | (fcmp uno X, Y) --> fcmp uno X, Y
// (fcmp uno X, NNAN) | (fcmp uno Y, X) --> fcmp uno Y, X
- if ((isKnownNeverNaN(LHS0) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
- (isKnownNeverNaN(LHS1) && (LHS0 == RHS0 || LHS0 == RHS1)))
+ if ((isKnownNeverNaN(LHS0, TLI) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
+ (isKnownNeverNaN(LHS1, TLI) && (LHS0 == RHS0 || LHS0 == RHS1)))
return RHS;
// (fcmp ord X, Y) & (fcmp ord NNAN, X) --> fcmp ord X, Y
@@ -1723,15 +1732,16 @@ static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
// (fcmp uno Y, X) | (fcmp uno NNAN, X) --> fcmp uno Y, X
// (fcmp uno X, Y) | (fcmp uno X, NNAN) --> fcmp uno X, Y
// (fcmp uno Y, X) | (fcmp uno X, NNAN) --> fcmp uno Y, X
- if ((isKnownNeverNaN(RHS0) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
- (isKnownNeverNaN(RHS1) && (RHS0 == LHS0 || RHS0 == LHS1)))
+ if ((isKnownNeverNaN(RHS0, TLI) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
+ (isKnownNeverNaN(RHS1, TLI) && (RHS0 == LHS0 || RHS0 == LHS1)))
return LHS;
}
return nullptr;
}
-static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) {
+static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
+ Value *Op0, Value *Op1, bool IsAnd) {
// Look through casts of the 'and' operands to find compares.
auto *Cast0 = dyn_cast<CastInst>(Op0);
auto *Cast1 = dyn_cast<CastInst>(Op1);
@@ -1745,13 +1755,13 @@ static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) {
auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
if (ICmp0 && ICmp1)
- V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1) :
- simplifyOrOfICmps(ICmp0, ICmp1);
+ V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ)
+ : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ);
auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
if (FCmp0 && FCmp1)
- V = simplifyAndOrOfFCmps(FCmp0, FCmp1, IsAnd);
+ V = simplifyAndOrOfFCmps(Q.TLI, FCmp0, FCmp1, IsAnd);
if (!V)
return nullptr;
@@ -1831,7 +1841,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op1;
}
- if (Value *V = simplifyAndOrOfCmps(Op0, Op1, true))
+ if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
return V;
// Try some generic simplifications for associative operations.
@@ -1981,7 +1991,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
return Op0;
- if (Value *V = simplifyAndOrOfCmps(Op0, Op1, false))
+ if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
// Try some generic simplifications for associative operations.
@@ -2142,13 +2152,15 @@ static Constant *
computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, CmpInst::Predicate Pred,
AssumptionCache *AC, const Instruction *CxtI,
- Value *LHS, Value *RHS) {
+ const InstrInfoQuery &IIQ, Value *LHS, Value *RHS) {
// First, skip past any trivial no-ops.
LHS = LHS->stripPointerCasts();
RHS = RHS->stripPointerCasts();
// A non-null pointer is not equal to a null pointer.
- if (llvm::isKnownNonZero(LHS, DL) && isa<ConstantPointerNull>(RHS) &&
+ if (llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr,
+ IIQ.UseInstrInfo) &&
+ isa<ConstantPointerNull>(RHS) &&
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
@@ -2413,12 +2425,12 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT: {
@@ -2463,17 +2475,18 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
/// Many binary operators with a constant operand have an easy-to-compute
/// range of outputs. This can be used to fold a comparison to always true or
/// always false.
-static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
+static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper,
+ const InstrInfoQuery &IIQ) {
unsigned Width = Lower.getBitWidth();
const APInt *C;
switch (BO.getOpcode()) {
case Instruction::Add:
if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
// FIXME: If we have both nuw and nsw, we should reduce the range further.
- if (BO.hasNoUnsignedWrap()) {
+ if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
// 'add nuw x, C' produces [C, UINT_MAX].
Lower = *C;
- } else if (BO.hasNoSignedWrap()) {
+ } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
if (C->isNegative()) {
// 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
Lower = APInt::getSignedMinValue(Width);
@@ -2506,7 +2519,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
} else if (match(BO.getOperand(0), m_APInt(C))) {
unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && BO.isExact())
+ if (!C->isNullValue() && IIQ.isExact(&BO))
ShiftAmount = C->countTrailingZeros();
if (C->isNegative()) {
// 'ashr C, x' produces [C, C >> (Width-1)]
@@ -2527,7 +2540,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
} else if (match(BO.getOperand(0), m_APInt(C))) {
// 'lshr C, x' produces [C >> (Width-1), C].
unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && BO.isExact())
+ if (!C->isNullValue() && IIQ.isExact(&BO))
ShiftAmount = C->countTrailingZeros();
Lower = C->lshr(ShiftAmount);
Upper = *C + 1;
@@ -2536,7 +2549,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
case Instruction::Shl:
if (match(BO.getOperand(0), m_APInt(C))) {
- if (BO.hasNoUnsignedWrap()) {
+ if (IIQ.hasNoUnsignedWrap(&BO)) {
// 'shl nuw C, x' produces [C, C << CLZ(C)]
Lower = *C;
Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
@@ -2617,8 +2630,72 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
}
}
+/// Some intrinsics with a constant operand have an easy-to-compute range of
+/// outputs. This can be used to fold a comparison to always true or always
+/// false.
+static void setLimitsForIntrinsic(IntrinsicInst &II, APInt &Lower,
+ APInt &Upper) {
+ unsigned Width = Lower.getBitWidth();
+ const APInt *C;
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::uadd_sat:
+ // uadd.sat(x, C) produces [C, UINT_MAX].
+ if (match(II.getOperand(0), m_APInt(C)) ||
+ match(II.getOperand(1), m_APInt(C)))
+ Lower = *C;
+ break;
+ case Intrinsic::sadd_sat:
+ if (match(II.getOperand(0), m_APInt(C)) ||
+ match(II.getOperand(1), m_APInt(C))) {
+ if (C->isNegative()) {
+ // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+ } else {
+ // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
+ Lower = APInt::getSignedMinValue(Width) + *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ }
+ break;
+ case Intrinsic::usub_sat:
+ // usub.sat(C, x) produces [0, C].
+ if (match(II.getOperand(0), m_APInt(C)))
+ Upper = *C + 1;
+ // usub.sat(x, C) produces [0, UINT_MAX - C].
+ else if (match(II.getOperand(1), m_APInt(C)))
+ Upper = APInt::getMaxValue(Width) - *C + 1;
+ break;
+ case Intrinsic::ssub_sat:
+ if (match(II.getOperand(0), m_APInt(C))) {
+ if (C->isNegative()) {
+ // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = *C - APInt::getSignedMinValue(Width) + 1;
+ } else {
+ // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
+ Lower = *C - APInt::getSignedMaxValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ } else if (match(II.getOperand(1), m_APInt(C))) {
+ if (C->isNegative()) {
+ // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
+ Lower = APInt::getSignedMinValue(Width) - *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ } else {
+ // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) - *C + 1;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS) {
+ Value *RHS, const InstrInfoQuery &IIQ) {
Type *ITy = GetCompareTy(RHS); // The return type.
Value *X;
@@ -2649,13 +2726,15 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
APInt Lower = APInt(Width, 0);
APInt Upper = APInt(Width, 0);
if (auto *BO = dyn_cast<BinaryOperator>(LHS))
- setLimitsForBinOp(*BO, Lower, Upper);
+ setLimitsForBinOp(*BO, Lower, Upper, IIQ);
+ else if (auto *II = dyn_cast<IntrinsicInst>(LHS))
+ setLimitsForIntrinsic(*II, Lower, Upper);
ConstantRange LHS_CR =
Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
if (auto *I = dyn_cast<Instruction>(LHS))
- if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+ if (auto *Ranges = IIQ.getMetadata(I, LLVMContext::MD_range))
LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
if (!LHS_CR.isFullSet()) {
@@ -2688,16 +2767,20 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
B = LBO->getOperand(1);
NoLHSWrapProblem =
ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+ (CmpInst::isUnsigned(Pred) &&
+ Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO))) ||
+ (CmpInst::isSigned(Pred) &&
+ Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)));
}
if (RBO && RBO->getOpcode() == Instruction::Add) {
C = RBO->getOperand(0);
D = RBO->getOperand(1);
NoRHSWrapProblem =
ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+ (CmpInst::isUnsigned(Pred) &&
+ Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(RBO))) ||
+ (CmpInst::isSigned(Pred) &&
+ Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RBO)));
}
// icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
@@ -2915,7 +2998,8 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// - The shift is nuw, we can't shift out the one bit.
// - CI2 is one
// - CI isn't zero
- if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() ||
+ if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
+ Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
CI2Val->isOneValue() || !CI->isZero()) {
if (Pred == ICmpInst::ICMP_EQ)
return ConstantInt::getFalse(RHS->getContext());
@@ -2939,29 +3023,31 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
break;
case Instruction::UDiv:
case Instruction::LShr:
- if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact())
+ if (ICmpInst::isSigned(Pred) || !Q.IIQ.isExact(LBO) ||
+ !Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::SDiv:
- if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact())
+ if (!ICmpInst::isEquality(Pred) || !Q.IIQ.isExact(LBO) ||
+ !Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::AShr:
- if (!LBO->isExact() || !RBO->isExact())
+ if (!Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::Shl: {
- bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap();
- bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+ bool NUW = Q.IIQ.hasNoUnsignedWrap(LBO) && Q.IIQ.hasNoUnsignedWrap(RBO);
+ bool NSW = Q.IIQ.hasNoSignedWrap(LBO) && Q.IIQ.hasNoSignedWrap(RBO);
if (!NUW && !NSW)
break;
if (!NSW && ICmpInst::isSigned(Pred))
@@ -2976,6 +3062,44 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
+static Value *simplifyICmpWithAbsNabs(CmpInst::Predicate Pred, Value *Op0,
+ Value *Op1) {
+ // We need a comparison with a constant.
+ const APInt *C;
+ if (!match(Op1, m_APInt(C)))
+ return nullptr;
+
+ // matchSelectPattern returns the negation part of an abs pattern in SP1.
+ // If the negate has an NSW flag, abs(INT_MIN) is undefined. Without that
+ // constraint, we can't make a contiguous range for the result of abs.
+ ICmpInst::Predicate AbsPred = ICmpInst::BAD_ICMP_PREDICATE;
+ Value *SP0, *SP1;
+ SelectPatternFlavor SPF = matchSelectPattern(Op0, SP0, SP1).Flavor;
+ if (SPF == SelectPatternFlavor::SPF_ABS &&
+ cast<Instruction>(SP1)->hasNoSignedWrap())
+ // The result of abs(X) is >= 0 (with nsw).
+ AbsPred = ICmpInst::ICMP_SGE;
+ if (SPF == SelectPatternFlavor::SPF_NABS)
+ // The result of -abs(X) is <= 0.
+ AbsPred = ICmpInst::ICMP_SLE;
+
+ if (AbsPred == ICmpInst::BAD_ICMP_PREDICATE)
+ return nullptr;
+
+ // If there is no intersection between abs/nabs and the range of this icmp,
+ // the icmp must be false. If the abs/nabs range is a subset of the icmp
+ // range, the icmp must be true.
+ APInt Zero = APInt::getNullValue(C->getBitWidth());
+ ConstantRange AbsRange = ConstantRange::makeExactICmpRegion(AbsPred, Zero);
+ ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(Pred, *C);
+ if (AbsRange.intersectWith(CmpRange).isEmptySet())
+ return getFalse(GetCompareTy(Op0));
+ if (CmpRange.contains(AbsRange))
+ return getTrue(GetCompareTy(Op0));
+
+ return nullptr;
+}
+
/// Simplify integer comparisons where at least one operand of the compare
/// matches an integer min/max idiom.
static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
@@ -3209,7 +3333,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q))
return V;
- if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS))
+ if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS, Q.IIQ))
return V;
// If both operands have range metadata, use the metadata
@@ -3218,8 +3342,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
auto RHS_Instr = cast<Instruction>(RHS);
auto LHS_Instr = cast<Instruction>(LHS);
- if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
- LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+ if (Q.IIQ.getMetadata(RHS_Instr, LLVMContext::MD_range) &&
+ Q.IIQ.getMetadata(LHS_Instr, LLVMContext::MD_range)) {
auto RHS_CR = getConstantRangeFromMetadata(
*RHS_Instr->getMetadata(LLVMContext::MD_range));
auto LHS_CR = getConstantRangeFromMetadata(
@@ -3397,7 +3521,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// icmp eq|ne X, Y -> false|true if X != Y
if (ICmpInst::isEquality(Pred) &&
- isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+ isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo)) {
return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy);
}
@@ -3407,11 +3531,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
return V;
+ if (Value *V = simplifyICmpWithAbsNabs(Pred, LHS, RHS))
+ return V;
+
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
- if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, LHS,
- RHS))
+ if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI,
+ Q.IIQ, LHS, RHS))
return C;
if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
@@ -3420,7 +3547,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
Q.DL.getTypeSizeInBits(CRHS->getType()))
if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI,
- CLHS->getPointerOperand(),
+ Q.IIQ, CLHS->getPointerOperand(),
CRHS->getPointerOperand()))
return C;
@@ -3491,13 +3618,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Pred == FCmpInst::FCMP_TRUE)
return getTrue(RetTy);
- // UNO/ORD predicates can be trivially folded if NaNs are ignored.
- if (FMF.noNaNs()) {
- if (Pred == FCmpInst::FCMP_UNO)
- return getFalse(RetTy);
- if (Pred == FCmpInst::FCMP_ORD)
- return getTrue(RetTy);
- }
+ // Fold (un)ordered comparison if we can determine there are no NaNs.
+ if (Pred == FCmpInst::FCMP_UNO || Pred == FCmpInst::FCMP_ORD)
+ if (FMF.noNaNs() ||
+ (isKnownNeverNaN(LHS, Q.TLI) && isKnownNeverNaN(RHS, Q.TLI)))
+ return ConstantInt::get(RetTy, Pred == FCmpInst::FCMP_ORD);
// NaN is unordered; NaN is not ordered.
assert((FCmpInst::isOrdered(Pred) || FCmpInst::isUnordered(Pred)) &&
@@ -3552,12 +3677,19 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
if (C->isZero()) {
switch (Pred) {
+ case FCmpInst::FCMP_OGE:
+ if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return getTrue(RetTy);
+ break;
case FCmpInst::FCMP_UGE:
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
return getTrue(RetTy);
break;
+ case FCmpInst::FCMP_ULT:
+ if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return getFalse(RetTy);
+ break;
case FCmpInst::FCMP_OLT:
- // X < 0
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
return getFalse(RetTy);
break;
@@ -3634,11 +3766,10 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
//
// We can't replace %sel with %add unless we strip away the flags.
if (isa<OverflowingBinaryOperator>(B))
- if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap())
- return nullptr;
- if (isa<PossiblyExactOperator>(B))
- if (B->isExact())
+ if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
return nullptr;
+ if (isa<PossiblyExactOperator>(B) && Q.IIQ.isExact(B))
+ return nullptr;
if (MaxRecurse) {
if (B->getOperand(0) == Op)
@@ -3772,6 +3903,28 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
Pred == ICmpInst::ICMP_EQ))
return V;
+
+ // Test for zero-shift-guard-ops around funnel shifts. These are used to
+ // avoid UB from oversized shifts in raw IR rotate patterns, but the
+ // intrinsics do not have that problem.
+ Value *ShAmt;
+ auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(),
+ m_Value(ShAmt)),
+ m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X),
+ m_Value(ShAmt)));
+ // (ShAmt != 0) ? fshl(X, *, ShAmt) : X --> fshl(X, *, ShAmt)
+ // (ShAmt != 0) ? fshr(*, X, ShAmt) : X --> fshr(*, X, ShAmt)
+ // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
+ // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
+ if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
+ return Pred == ICmpInst::ICMP_NE ? TrueVal : X;
+
+ // (ShAmt == 0) ? X : fshl(X, *, ShAmt) --> fshl(X, *, ShAmt)
+ // (ShAmt == 0) ? X : fshr(*, X, ShAmt) --> fshr(*, X, ShAmt)
+ // (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
+ // (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
+ if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt)
+ return Pred == ICmpInst::ICMP_EQ ? FalseVal : X;
}
// Check for other compares that behave like bit test.
@@ -3809,6 +3962,34 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
return nullptr;
}
+/// Try to simplify a select instruction when its condition operand is a
+/// floating-point comparison.
+static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F) {
+ FCmpInst::Predicate Pred;
+ if (!match(Cond, m_FCmp(Pred, m_Specific(T), m_Specific(F))) &&
+ !match(Cond, m_FCmp(Pred, m_Specific(F), m_Specific(T))))
+ return nullptr;
+
+ // TODO: The transform may not be valid with -0.0. An incomplete way of
+ // testing for that possibility is to check if at least one operand is a
+ // non-zero constant.
+ const APFloat *C;
+ if ((match(T, m_APFloat(C)) && C->isNonZero()) ||
+ (match(F, m_APFloat(C)) && C->isNonZero())) {
+ // (T == F) ? T : F --> F
+ // (F == T) ? T : F --> F
+ if (Pred == FCmpInst::FCMP_OEQ)
+ return F;
+
+ // (T != F) ? T : F --> T
+ // (F != T) ? T : F --> T
+ if (Pred == FCmpInst::FCMP_UNE)
+ return T;
+ }
+
+ return nullptr;
+}
+
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -3845,9 +4026,16 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
return V;
+ if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal))
+ return V;
+
if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal))
return V;
+ Optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
+ if (Imp)
+ return *Imp ? TrueVal : FalseVal;
+
return nullptr;
}
@@ -4359,6 +4547,14 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))))
return ConstantFP::getNullValue(Op0->getType());
+ // (X - Y) + Y --> X
+ // Y + (X - Y) --> X
+ Value *X;
+ if (FMF.noSignedZeros() && FMF.allowReassoc() &&
+ (match(Op0, m_FSub(m_Value(X), m_Specific(Op1))) ||
+ match(Op1, m_FSub(m_Value(X), m_Specific(Op0)))))
+ return X;
+
return nullptr;
}
@@ -4396,6 +4592,13 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (FMF.noNaNs() && Op0 == Op1)
return Constant::getNullValue(Op0->getType());
+ // Y - (Y - X) --> X
+ // (X + Y) - Y --> X
+ if (FMF.noSignedZeros() && FMF.allowReassoc() &&
+ (match(Op1, m_FSub(m_Specific(Op0), m_Value(X))) ||
+ match(Op0, m_c_FAdd(m_Specific(Op1), m_Value(X)))))
+ return X;
+
return nullptr;
}
@@ -4476,10 +4679,8 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
// -X / X -> -1.0 and
// X / -X -> -1.0 are legal when NaNs are ignored.
// We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
- if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) &&
- BinaryOperator::getFNegArgument(Op0) == Op1) ||
- (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) &&
- BinaryOperator::getFNegArgument(Op1) == Op0))
+ if (match(Op0, m_FNegNSZ(m_Specific(Op1))) ||
+ match(Op1, m_FNegNSZ(m_Specific(Op0))))
return ConstantFP::get(Op0->getType(), -1.0);
}
@@ -4781,6 +4982,40 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
return Constant::getNullValue(ReturnType);
break;
+ case Intrinsic::uadd_sat:
+ // sat(MAX + X) -> MAX
+ // sat(X + MAX) -> MAX
+ if (match(Op0, m_AllOnes()) || match(Op1, m_AllOnes()))
+ return Constant::getAllOnesValue(ReturnType);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::sadd_sat:
+ // sat(X + undef) -> -1
+ // sat(undef + X) -> -1
+ // For unsigned: Assume undef is MAX, thus we saturate to MAX (-1).
+ // For signed: Assume undef is ~X, in which case X + ~X = -1.
+ if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return Constant::getAllOnesValue(ReturnType);
+
+ // X + 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+ // 0 + X -> X
+ if (match(Op0, m_Zero()))
+ return Op1;
+ break;
+ case Intrinsic::usub_sat:
+ // sat(0 - X) -> 0, sat(X - MAX) -> 0
+ if (match(Op0, m_Zero()) || match(Op1, m_AllOnes()))
+ return Constant::getNullValue(ReturnType);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::ssub_sat:
+ // X - X -> 0, X - undef -> 0, undef - X -> 0
+ if (Op0 == Op1 || match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return Constant::getNullValue(ReturnType);
+ // X - 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+ break;
case Intrinsic::load_relative:
if (auto *C0 = dyn_cast<Constant>(Op0))
if (auto *C1 = dyn_cast<Constant>(Op1))
@@ -4798,10 +5033,51 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
break;
case Intrinsic::maxnum:
case Intrinsic::minnum:
- // If one argument is NaN, return the other argument.
- if (match(Op0, m_NaN())) return Op1;
- if (match(Op1, m_NaN())) return Op0;
+ case Intrinsic::maximum:
+ case Intrinsic::minimum: {
+ // If the arguments are the same, this is a no-op.
+ if (Op0 == Op1) return Op0;
+
+ // If one argument is undef, return the other argument.
+ if (match(Op0, m_Undef()))
+ return Op1;
+ if (match(Op1, m_Undef()))
+ return Op0;
+
+ // If one argument is NaN, return other or NaN appropriately.
+ bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
+ if (match(Op0, m_NaN()))
+ return PropagateNaN ? Op0 : Op1;
+ if (match(Op1, m_NaN()))
+ return PropagateNaN ? Op1 : Op0;
+
+ // Min/max of the same operation with common operand:
+ // m(m(X, Y)), X --> m(X, Y) (4 commuted variants)
+ if (auto *M0 = dyn_cast<IntrinsicInst>(Op0))
+ if (M0->getIntrinsicID() == IID &&
+ (M0->getOperand(0) == Op1 || M0->getOperand(1) == Op1))
+ return Op0;
+ if (auto *M1 = dyn_cast<IntrinsicInst>(Op1))
+ if (M1->getIntrinsicID() == IID &&
+ (M1->getOperand(0) == Op0 || M1->getOperand(1) == Op0))
+ return Op1;
+
+ // min(X, -Inf) --> -Inf (and commuted variant)
+ // max(X, +Inf) --> +Inf (and commuted variant)
+ bool UseNegInf = IID == Intrinsic::minnum || IID == Intrinsic::minimum;
+ const APFloat *C;
+ if ((match(Op0, m_APFloat(C)) && C->isInfinity() &&
+ C->isNegative() == UseNegInf) ||
+ (match(Op1, m_APFloat(C)) && C->isInfinity() &&
+ C->isNegative() == UseNegInf))
+ return ConstantFP::getInfinity(ReturnType, UseNegInf);
+
+ // TODO: minnum(nnan x, inf) -> x
+ // TODO: minnum(nnan ninf x, flt_max) -> x
+ // TODO: maxnum(nnan x, -inf) -> x
+ // TODO: maxnum(nnan ninf x, -flt_max) -> x
break;
+ }
default:
break;
}
@@ -4836,7 +5112,16 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
- Value *ShAmtArg = ArgBegin[2];
+ Value *Op0 = ArgBegin[0], *Op1 = ArgBegin[1], *ShAmtArg = ArgBegin[2];
+
+ // If both operands are undef, the result is undef.
+ if (match(Op0, m_Undef()) && match(Op1, m_Undef()))
+ return UndefValue::get(F->getReturnType());
+
+ // If shift amount is undef, assume it is zero.
+ if (match(ShAmtArg, m_Undef()))
+ return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+
const APInt *ShAmtC;
if (match(ShAmtArg, m_APInt(ShAmtC))) {
// If there's effectively no shift, return the 1st arg or 2nd arg.
@@ -4923,18 +5208,20 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
I->getFastMathFlags(), Q);
break;
case Instruction::Add:
- Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+ Result =
+ SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+ Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::FSub:
Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
I->getFastMathFlags(), Q);
break;
case Instruction::Sub:
- Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+ Result =
+ SimplifySubInst(I->getOperand(0), I->getOperand(1),
+ Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::FMul:
Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
@@ -4964,17 +5251,18 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
I->getFastMathFlags(), Q);
break;
case Instruction::Shl:
- Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+ Result =
+ SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+ Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(), Q);
+ Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(), Q);
+ Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
break;
case Instruction::And:
Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q);
@@ -5100,7 +5388,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
- if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
+ if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
!I->mayHaveSideEffects())
I->eraseFromParent();
} else {
@@ -5129,7 +5417,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
- if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
+ if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
!I->mayHaveSideEffects())
I->eraseFromParent();
}
diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
index e7751d32aab3..000fe5ddad54 100644
--- a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -17,6 +17,7 @@
#include <queue>
namespace llvm {
+
template <class NodeTy, bool IsPostDom>
void IDFCalculator<NodeTy, IsPostDom>::calculate(
SmallVectorImpl<BasicBlock *> &PHIBlocks) {
@@ -61,29 +62,39 @@ void IDFCalculator<NodeTy, IsPostDom>::calculate(
BasicBlock *BB = Node->getBlock();
// Succ is the successor in the direction we are calculating IDF, so it is
// successor for IDF, and predecessor for Reverse IDF.
- for (auto *Succ : children<NodeTy>(BB)) {
+ auto DoWork = [&](BasicBlock *Succ) {
DomTreeNode *SuccNode = DT.getNode(Succ);
// Quickly skip all CFG edges that are also dominator tree edges instead
// of catching them below.
if (SuccNode->getIDom() == Node)
- continue;
+ return;
const unsigned SuccLevel = SuccNode->getLevel();
if (SuccLevel > RootLevel)
- continue;
+ return;
if (!VisitedPQ.insert(SuccNode).second)
- continue;
+ return;
BasicBlock *SuccBB = SuccNode->getBlock();
if (useLiveIn && !LiveInBlocks->count(SuccBB))
- continue;
+ return;
PHIBlocks.emplace_back(SuccBB);
if (!DefBlocks->count(SuccBB))
PQ.push(std::make_pair(
SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
+ };
+
+ if (GD) {
+ for (auto Pair : children<
+ std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, NodeTy>>(
+ {GD, BB}))
+ DoWork(Pair.second);
+ } else {
+ for (auto *Succ : children<NodeTy>(BB))
+ DoWork(Succ);
}
for (auto DomChild : *Node) {
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
index b1d585bfc683..3f22ada803c9 100644
--- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -619,7 +619,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(
// If the merge range is empty, then adding the edge didn't actually form any
// new cycles. We're done.
- if (MergeRange.begin() == MergeRange.end()) {
+ if (empty(MergeRange)) {
// Now that the SCC structure is finalized, flip the kind to call.
SourceN->setEdgeKind(TargetN, Edge::Call);
return false; // No new cycle.
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index ee0148e0d795..110c085d3f35 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -420,6 +421,8 @@ namespace {
BasicBlock *BB);
bool solveBlockValueSelect(ValueLatticeElement &BBLV, SelectInst *S,
BasicBlock *BB);
+ Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I,
+ BasicBlock *BB);
bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI,
BasicBlock *BB);
bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
@@ -634,8 +637,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
if (auto *CI = dyn_cast<CastInst>(BBI))
return solveBlockValueCast(Res, CI, BB);
- BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
- if (BO && isa<ConstantInt>(BO->getOperand(1)))
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
return solveBlockValueBinaryOp(Res, BO, BB);
}
@@ -951,6 +953,25 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
return true;
}
+Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op,
+ Instruction *I,
+ BasicBlock *BB) {
+ if (!hasBlockValue(I->getOperand(Op), BB))
+ if (pushBlockValue(std::make_pair(BB, I->getOperand(Op))))
+ return None;
+
+ const unsigned OperandBitWidth =
+ DL.getTypeSizeInBits(I->getOperand(Op)->getType());
+ ConstantRange Range = ConstantRange(OperandBitWidth);
+ if (hasBlockValue(I->getOperand(Op), BB)) {
+ ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB);
+ intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
+ if (Val.isConstantRange())
+ Range = Val.getConstantRange();
+ }
+ return Range;
+}
+
bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
CastInst *CI,
BasicBlock *BB) {
@@ -981,21 +1002,11 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
// Figure out the range of the LHS. If that fails, we still apply the
// transfer rule on the full set since we may be able to locally infer
// interesting facts.
- if (!hasBlockValue(CI->getOperand(0), BB))
- if (pushBlockValue(std::make_pair(BB, CI->getOperand(0))))
- // More work to do before applying this transfer rule.
- return false;
-
- const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(CI->getOperand(0)->getType());
- ConstantRange LHSRange = ConstantRange(OperandBitWidth);
- if (hasBlockValue(CI->getOperand(0), BB)) {
- ValueLatticeElement LHSVal = getBlockValue(CI->getOperand(0), BB);
- intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal,
- CI);
- if (LHSVal.isConstantRange())
- LHSRange = LHSVal.getConstantRange();
- }
+ Optional<ConstantRange> LHSRes = getRangeForOperand(0, CI, BB);
+ if (!LHSRes.hasValue())
+ // More work to do before applying this transfer rule.
+ return false;
+ ConstantRange LHSRange = LHSRes.getValue();
const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
@@ -1037,27 +1048,19 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
return true;
};
- // Figure out the range of the LHS. If that fails, use a conservative range,
- // but apply the transfer rule anyways. This lets us pick up facts from
- // expressions like "and i32 (call i32 @foo()), 32"
- if (!hasBlockValue(BO->getOperand(0), BB))
- if (pushBlockValue(std::make_pair(BB, BO->getOperand(0))))
- // More work to do before applying this transfer rule.
- return false;
+ // Figure out the ranges of the operands. If that fails, use a
+ // conservative range, but apply the transfer rule anyways. This
+ // lets us pick up facts from expressions like "and i32 (call i32
+ // @foo()), 32"
+ Optional<ConstantRange> LHSRes = getRangeForOperand(0, BO, BB);
+ Optional<ConstantRange> RHSRes = getRangeForOperand(1, BO, BB);
- const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(BO->getOperand(0)->getType());
- ConstantRange LHSRange = ConstantRange(OperandBitWidth);
- if (hasBlockValue(BO->getOperand(0), BB)) {
- ValueLatticeElement LHSVal = getBlockValue(BO->getOperand(0), BB);
- intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal,
- BO);
- if (LHSVal.isConstantRange())
- LHSRange = LHSVal.getConstantRange();
- }
+ if (!LHSRes.hasValue() || !RHSRes.hasValue())
+ // More work to do before applying this transfer rule.
+ return false;
- ConstantInt *RHS = cast<ConstantInt>(BO->getOperand(1));
- ConstantRange RHSRange = ConstantRange(RHS->getValue());
+ ConstantRange LHSRange = LHSRes.getValue();
+ ConstantRange RHSRange = RHSRes.getValue();
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
diff --git a/contrib/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
new file mode 100644
index 000000000000..5540859ebdda
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -0,0 +1,391 @@
+//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
+//Implementation -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements divergence analysis which determines whether a branch
+// in a GPU program is divergent.It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+// Divergence Analysis
+// Sampaio, Souza, Collange, Pereira
+// TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+// %cond = icmp slt i32 %tid, 10
+// br i1 %cond, label %then, label %else
+// then:
+// br label %merge
+// else:
+// br label %merge
+// merge:
+// %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+// non-kernel-entry function and the return value of a function call as
+// divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+// generic or local address as divergent. This can be improved by leveraging
+// pointer analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "divergence"
+
+// transparently use the GPUDivergenceAnalysis
+static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
+ cl::Hidden,
+ cl::desc("turn the LegacyDivergenceAnalysis into "
+ "a wrapper for GPUDivergenceAnalysis"));
+
+namespace {
+
+class DivergencePropagator {
+public:
+ DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
+ PostDominatorTree &PDT, DenseSet<const Value *> &DV)
+ : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+ void populateWithSourcesOfDivergence();
+ void propagate();
+
+private:
+ // A helper function that explores data dependents of V.
+ void exploreDataDependency(Value *V);
+ // A helper function that explores sync dependents of TI.
+ void exploreSyncDependency(Instruction *TI);
+ // Computes the influence region from Start to End. This region includes all
+ // basic blocks on any simple path from Start to End.
+ void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion);
+ // Finds all users of I that are outside the influence region, and add these
+ // users to Worklist.
+ void findUsersOutsideInfluenceRegion(
+ Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+ Function &F;
+ TargetTransformInfo &TTI;
+ DominatorTree &DT;
+ PostDominatorTree &PDT;
+ std::vector<Value *> Worklist; // Stack for DFS.
+ DenseSet<const Value *> &DV; // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+ Worklist.clear();
+ DV.clear();
+ for (auto &I : instructions(F)) {
+ if (TTI.isSourceOfDivergence(&I)) {
+ Worklist.push_back(&I);
+ DV.insert(&I);
+ }
+ }
+ for (auto &Arg : F.args()) {
+ if (TTI.isSourceOfDivergence(&Arg)) {
+ Worklist.push_back(&Arg);
+ DV.insert(&Arg);
+ }
+ }
+}
+
+void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
+ // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+ // immediate post dominator are divergent. This rule handles if-then-else
+ // patterns. For example,
+ //
+ // if (tid < 5)
+ // a1 = 1;
+ // else
+ // a2 = 2;
+ // a = phi(a1, a2); // sync dependent on (tid < 5)
+ BasicBlock *ThisBB = TI->getParent();
+
+ // Unreachable blocks may not be in the dominator tree.
+ if (!DT.isReachableFromEntry(ThisBB))
+ return;
+
+ // If the function has no exit blocks or doesn't reach any exit blocks, the
+ // post dominator may be null.
+ DomTreeNode *ThisNode = PDT.getNode(ThisBB);
+ if (!ThisNode)
+ return;
+
+ BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
+ if (IPostDom == nullptr)
+ return;
+
+ for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+ // A PHINode is uniform if it returns the same value no matter which path is
+ // taken.
+ if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
+ Worklist.push_back(&*I);
+ }
+
+ // Propagation rule 2: if a value defined in a loop is used outside, the user
+ // is sync dependent on the condition of the loop exits that dominate the
+ // user. For example,
+ //
+ // int i = 0;
+ // do {
+ // i++;
+ // if (foo(i)) ... // uniform
+ // } while (i < tid);
+ // if (bar(i)) ... // divergent
+ //
+ // A program may contain unstructured loops. Therefore, we cannot leverage
+ // LoopInfo, which only recognizes natural loops.
+ //
+ // The algorithm used here handles both natural and unstructured loops. Given
+ // a branch TI, we first compute its influence region, the union of all simple
+ // paths from TI to its immediate post dominator (IPostDom). Then, we search
+ // for all the values defined in the influence region but used outside. All
+ // these users are sync dependent on TI.
+ DenseSet<BasicBlock *> InfluenceRegion;
+ computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+ // An insight that can speed up the search process is that all the in-region
+ // values that are used outside must dominate TI. Therefore, instead of
+ // searching every basic blocks in the influence region, we search all the
+ // dominators of TI until it is outside the influence region.
+ BasicBlock *InfluencedBB = ThisBB;
+ while (InfluenceRegion.count(InfluencedBB)) {
+ for (auto &I : *InfluencedBB)
+ findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+ DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+ if (IDomNode == nullptr)
+ break;
+ InfluencedBB = IDomNode->getBlock();
+ }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+ Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+ for (User *U : I.users()) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (!InfluenceRegion.count(UserInst->getParent())) {
+ if (DV.insert(UserInst).second)
+ Worklist.push_back(UserInst);
+ }
+ }
+}
+
+// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
+// to the influence region.
+static void
+addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion,
+ std::vector<BasicBlock *> &InfluenceStack) {
+ for (BasicBlock *Succ : successors(ThisBB)) {
+ if (Succ != End && InfluenceRegion.insert(Succ).second)
+ InfluenceStack.push_back(Succ);
+ }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+ BasicBlock *Start, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion) {
+ assert(PDT.properlyDominates(End, Start) &&
+ "End does not properly dominate Start");
+
+ // The influence region starts from the end of "Start" to the beginning of
+ // "End". Therefore, "Start" should not be in the region unless "Start" is in
+ // a loop that doesn't contain "End".
+ std::vector<BasicBlock *> InfluenceStack;
+ addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
+ while (!InfluenceStack.empty()) {
+ BasicBlock *BB = InfluenceStack.back();
+ InfluenceStack.pop_back();
+ addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
+ }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+ // Follow def-use chains of V.
+ for (User *U : V->users()) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
+ Worklist.push_back(UserInst);
+ }
+}
+
+void DivergencePropagator::propagate() {
+ // Traverse the dependency graph using DFS.
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // Terminators with less than two successors won't introduce sync
+ // dependency. Ignore them.
+ if (I->isTerminator() && I->getNumSuccessors() > 1)
+ exploreSyncDependency(I);
+ }
+ exploreDataDependency(V);
+ }
+}
+
+} // namespace
+
+// Register this pass.
+char LegacyDivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence",
+ "Legacy Divergence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence",
+ "Legacy Divergence Analysis", false, true)
+
+FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
+ return new LegacyDivergenceAnalysis();
+}
+
+void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ if (UseGPUDA)
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.setPreservesAll();
+}
+
+bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
+ const Function &F) const {
+ if (!UseGPUDA)
+ return false;
+
+ // GPUDivergenceAnalysis requires a reducible CFG.
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ using RPOTraversal = ReversePostOrderTraversal<const Function *>;
+ RPOTraversal FuncRPOT(&F);
+ return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
+ const LoopInfo>(FuncRPOT, LI);
+}
+
+bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ if (TTIWP == nullptr)
+ return false;
+
+ TargetTransformInfo &TTI = TTIWP->getTTI(F);
+ // Fast path: if the target does not have branch divergence, we do not mark
+ // any branch as divergent.
+ if (!TTI.hasBranchDivergence())
+ return false;
+
+ DivergentValues.clear();
+ gpuDA = nullptr;
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+
+ if (shouldUseGPUDivergenceAnalysis(F)) {
+ // run the new GPU divergence analysis
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
+
+ } else {
+ // run LLVM's existing DivergenceAnalysis
+ DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
+ DP.populateWithSourcesOfDivergence();
+ DP.propagate();
+ }
+
+ LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
+ << ":\n";
+ print(dbgs(), F.getParent()));
+
+ return false;
+}
+
+bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
+ if (gpuDA) {
+ return gpuDA->isDivergent(*V);
+ }
+ return DivergentValues.count(V);
+}
+
+void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+ if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
+ return;
+
+ const Function *F = nullptr;
+ if (!DivergentValues.empty()) {
+ const Value *FirstDivergentValue = *DivergentValues.begin();
+ if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+ F = Arg->getParent();
+ } else if (const Instruction *I =
+ dyn_cast<Instruction>(FirstDivergentValue)) {
+ F = I->getParent()->getParent();
+ } else {
+ llvm_unreachable("Only arguments and instructions can be divergent");
+ }
+ } else if (gpuDA) {
+ F = &gpuDA->getFunction();
+ }
+ if (!F)
+ return;
+
+ // Dumps all divergent values in F, arguments and then instructions.
+ for (auto &Arg : F->args()) {
+ OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
+ OS << Arg << "\n";
+ }
+ // Iterate instructions using instructions() to ensure a deterministic order.
+ for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
+ auto &BB = *BI;
+ OS << "\n " << BB.getName() << ":\n";
+ for (auto &I : BB.instructionsWithoutDebug()) {
+ OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
+ OS << I << "\n";
+ }
+ }
+ OS << "\n";
+}
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index db919bd233bf..5d0a627f8426 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -330,12 +330,12 @@ void Lint::visitCallSite(CallSite CS) {
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
// overlap is not distinguished from the case where nothing is known.
- uint64_t Size = 0;
+ auto Size = LocationSize::unknown();
if (const ConstantInt *Len =
dyn_cast<ConstantInt>(findValue(MCI->getLength(),
/*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
- Size = Len->getValue().getZExtValue();
+ Size = LocationSize::precise(Len->getValue().getZExtValue());
Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
MustAlias,
"Undefined behavior: memcpy source and destination overlap", &I);
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index d319d4c249d3..8129795bc0c1 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -107,8 +107,8 @@ static bool isDereferenceableAndAlignedPointer(
return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size,
DL, CtxI, DT, Visited);
- if (auto CS = ImmutableCallSite(V))
- if (auto *RP = getArgumentAliasingToReturnedPointer(CS))
+ if (const auto *Call = dyn_cast<CallBase>(V))
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call))
return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT,
Visited);
@@ -345,7 +345,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
const DataLayout &DL = ScanBB->getModule()->getDataLayout();
// Try to get the store size for the type.
- uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
+ auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy));
Value *StrippedPtr = Ptr->stripPointerCasts();
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index a24d66011b8d..7f3480f512ab 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -342,7 +342,7 @@ void RuntimePointerChecking::groupChecks(
//
// The above case requires that we have an UnknownDependence between
// accesses to the same underlying object. This cannot happen unless
- // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+ // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
// is also false. In this case we will use the fallback path and create
// separate checking groups for all pointers.
@@ -420,7 +420,7 @@ void RuntimePointerChecking::groupChecks(
// We've computed the grouped checks for this partition.
// Save the results and continue with the next one.
- std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+ llvm::copy(Groups, std::back_inserter(CheckingGroups));
}
}
@@ -509,7 +509,7 @@ public:
/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
+ AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, false));
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
@@ -518,7 +518,7 @@ public:
/// Register a store.
void addStore(MemoryLocation &Loc) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
+ AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, true));
}
@@ -556,7 +556,7 @@ public:
/// perform dependency checking.
///
/// Note that this can later be cleared if we retry memcheck analysis without
- /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
+ /// dependency checking (i.e. FoundNonConstantDistanceDependence).
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
/// We decided that no dependence analysis would be used. Reset the state.
@@ -604,8 +604,8 @@ private:
///
/// Note that, this is different from isDependencyCheckNeeded. When we retry
/// memcheck analysis without dependency checking
- /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
- /// while this remains set if we have potentially dependent accesses.
+ /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
+ /// cleared while this remains set if we have potentially dependent accesses.
bool IsRTCheckAnalysisNeeded;
/// The SCEV predicate containing all the SCEV-related assumptions.
@@ -1221,18 +1221,20 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return X == PtrSCEVB;
}
-bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
+MemoryDepChecker::VectorizationSafetyStatus
+MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
switch (Type) {
case NoDep:
case Forward:
case BackwardVectorizable:
- return true;
+ return VectorizationSafetyStatus::Safe;
case Unknown:
+ return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
- return false;
+ return VectorizationSafetyStatus::Unsafe;
}
llvm_unreachable("unexpected DepType!");
}
@@ -1317,6 +1319,11 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
return false;
}
+void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
+ if (Status < S)
+ Status = S;
+}
+
/// Given a non-constant (unknown) dependence-distance \p Dist between two
/// memory accesses, that have the same stride whose absolute value is given
/// in \p Stride, and that have the same type size \p TypeByteSize,
@@ -1485,7 +1492,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::NoDep;
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
- ShouldRetryWithRuntimeCheck = true;
+ FoundNonConstantDistanceDependence = true;
return Dependence::Unknown;
}
@@ -1652,7 +1659,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
Dependence::DepType Type =
isDependent(*A.first, A.second, *B.first, B.second, Strides);
- SafeForVectorization &= Dependence::isSafeForVectorization(Type);
+ mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
// dependences. In that case return as soon as we find the first
@@ -1669,7 +1676,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
<< "Too many dependences, stopped recording\n");
}
}
- if (!RecordDependences && !SafeForVectorization)
+ if (!RecordDependences && !isSafeForVectorization())
return false;
}
++OI;
@@ -1679,7 +1686,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
}
LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
- return SafeForVectorization;
+ return isSafeForVectorization();
}
SmallVector<Instruction *, 4>
@@ -1862,10 +1869,17 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
// writes and between reads and writes, but not between reads and reads.
ValueSet Seen;
+ // Record uniform store addresses to identify if we have multiple stores
+ // to the same address.
+ ValueSet UniformStores;
+
for (StoreInst *ST : Stores) {
Value *Ptr = ST->getPointerOperand();
- // Check for store to loop invariant address.
- StoreToLoopInvariantAddress |= isUniform(Ptr);
+
+ if (isUniform(Ptr))
+ HasDependenceInvolvingLoopInvariantAddress |=
+ !UniformStores.insert(Ptr).second;
+
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr).second) {
@@ -1907,6 +1921,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
IsReadOnlyPtr = true;
}
+ // See if there is an unsafe dependency between a load to a uniform address and
+ // store to the same uniform address.
+ if (UniformStores.count(Ptr)) {
+ LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
+ "load and uniform store to the same address!\n");
+ HasDependenceInvolvingLoopInvariantAddress = true;
+ }
+
MemoryLocation Loc = MemoryLocation::get(LD);
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
@@ -2265,7 +2287,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
- StoreToLoopInvariantAddress(false) {
+ HasDependenceInvolvingLoopInvariantAddress(false) {
if (canAnalyzeLoop())
analyzeLoop(AA, LI, TLI, DT);
}
@@ -2297,8 +2319,8 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
PtrRtChecking->print(OS, Depth);
OS << "\n";
- OS.indent(Depth) << "Store to invariant address was "
- << (StoreToLoopInvariantAddress ? "" : "not ")
+ OS.indent(Depth) << "Non vectorizable stores to invariant address were "
+ << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
<< "found in loop.\n";
OS.indent(Depth) << "SCEV assumptions:\n";
diff --git a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
index 074023a7e1e2..2a3b29d7fbca 100644
--- a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
@@ -147,8 +147,8 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PA.preserve<LoopAnalysis>();
PA.preserve<LoopAnalysisManagerFunctionProxy>();
PA.preserve<ScalarEvolutionAnalysis>();
- // FIXME: Uncomment this when all loop passes preserve MemorySSA
- // PA.preserve<MemorySSAAnalysis>();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
// FIXME: What we really want to do here is preserve an AA category, but that
// concept doesn't exist yet.
PA.preserve<AAManager>();
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 3f78456b3586..ef2b1257015c 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -213,33 +214,21 @@ bool Loop::isSafeToClone() const {
MDNode *Loop::getLoopID() const {
MDNode *LoopID = nullptr;
- if (BasicBlock *Latch = getLoopLatch()) {
- LoopID = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
- } else {
- assert(!getLoopLatch() &&
- "The loop should have no single latch at this point");
- // Go through each predecessor of the loop header and check the
- // terminator for the metadata.
- BasicBlock *H = getHeader();
- for (BasicBlock *BB : this->blocks()) {
- TerminatorInst *TI = BB->getTerminator();
- MDNode *MD = nullptr;
-
- // Check if this terminator branches to the loop header.
- for (BasicBlock *Successor : TI->successors()) {
- if (Successor == H) {
- MD = TI->getMetadata(LLVMContext::MD_loop);
- break;
- }
- }
- if (!MD)
- return nullptr;
- if (!LoopID)
- LoopID = MD;
- else if (MD != LoopID)
- return nullptr;
- }
+ // Go through the latch blocks and check the terminator for the metadata.
+ SmallVector<BasicBlock *, 4> LatchesBlocks;
+ getLoopLatches(LatchesBlocks);
+ for (BasicBlock *BB : LatchesBlocks) {
+ Instruction *TI = BB->getTerminator();
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
+
+ if (!MD)
+ return nullptr;
+
+ if (!LoopID)
+ LoopID = MD;
+ else if (MD != LoopID)
+ return nullptr;
}
if (!LoopID || LoopID->getNumOperands() == 0 ||
LoopID->getOperand(0) != LoopID)
@@ -248,23 +237,19 @@ MDNode *Loop::getLoopID() const {
}
void Loop::setLoopID(MDNode *LoopID) const {
- assert(LoopID && "Loop ID should not be null");
- assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself");
+ assert((!LoopID || LoopID->getNumOperands() > 0) &&
+ "Loop ID needs at least one operand");
+ assert((!LoopID || LoopID->getOperand(0) == LoopID) &&
+ "Loop ID should refer to itself");
- if (BasicBlock *Latch = getLoopLatch()) {
- Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
- return;
- }
-
- assert(!getLoopLatch() &&
- "The loop should have no single latch at this point");
BasicBlock *H = getHeader();
for (BasicBlock *BB : this->blocks()) {
- TerminatorInst *TI = BB->getTerminator();
- for (BasicBlock *Successor : TI->successors()) {
- if (Successor == H)
+ Instruction *TI = BB->getTerminator();
+ for (BasicBlock *Successor : successors(TI)) {
+ if (Successor == H) {
TI->setMetadata(LLVMContext::MD_loop, LoopID);
+ break;
+ }
}
}
}
@@ -308,16 +293,50 @@ bool Loop::isAnnotatedParallel() const {
if (!DesiredLoopIdMetadata)
return false;
+ MDNode *ParallelAccesses =
+ findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
+ SmallPtrSet<MDNode *, 4>
+ ParallelAccessGroups; // For scalable 'contains' check.
+ if (ParallelAccesses) {
+ for (const MDOperand &MD : drop_begin(ParallelAccesses->operands(), 1)) {
+ MDNode *AccGroup = cast<MDNode>(MD.get());
+ assert(isValidAsAccessGroup(AccGroup) &&
+ "List item must be an access group");
+ ParallelAccessGroups.insert(AccGroup);
+ }
+ }
+
// The loop branch contains the parallel loop metadata. In order to ensure
// that any parallel-loop-unaware optimization pass hasn't added loop-carried
// dependencies (thus converted the loop back to a sequential loop), check
- // that all the memory instructions in the loop contain parallelism metadata
- // that point to the same unique "loop id metadata" the loop branch does.
+ // that all the memory instructions in the loop belong to an access group that
+ // is parallel to this loop.
for (BasicBlock *BB : this->blocks()) {
for (Instruction &I : *BB) {
if (!I.mayReadOrWriteMemory())
continue;
+ if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
+ auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
+ if (AG->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+ return ParallelAccessGroups.count(AG);
+ }
+
+ for (const MDOperand &AccessListItem : AG->operands()) {
+ MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+ assert(isValidAsAccessGroup(AccGroup) &&
+ "List item must be an access group");
+ if (ParallelAccessGroups.count(AccGroup))
+ return true;
+ }
+ return false;
+ };
+
+ if (ContainsAccessGroup(AccessGroup))
+ continue;
+ }
+
// The memory instruction can refer to the loop identifier metadata
// directly or indirectly through another list metadata (in case of
// nested parallel loops). The loop identifier metadata refers to
@@ -708,6 +727,40 @@ void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) {
}
}
+MDNode *llvm::findOptionMDForLoopID(MDNode *LoopID, StringRef Name) {
+ // No loop metadata node, no loop properties.
+ if (!LoopID)
+ return nullptr;
+
+ // First operand should refer to the metadata node itself, for legacy reasons.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ // Iterate over the metdata node operands and look for MDString metadata.
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD || MD->getNumOperands() < 1)
+ continue;
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+ // Return the operand node if MDString holds expected metadata.
+ if (Name.equals(S->getString()))
+ return MD;
+ }
+
+ // Loop property not found.
+ return nullptr;
+}
+
+MDNode *llvm::findOptionMDForLoop(const Loop *TheLoop, StringRef Name) {
+ return findOptionMDForLoopID(TheLoop->getLoopID(), Name);
+}
+
+bool llvm::isValidAsAccessGroup(MDNode *Node) {
+ return Node->getNumOperands() == 0 && Node->isDistinct();
+}
+
//===----------------------------------------------------------------------===//
// LoopInfo implementation
//
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 07a151ce0fce..a68f114b83a0 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -193,8 +194,14 @@ bool LPPassManager::runOnFunction(Function &F) {
}
// Walk Loops
- unsigned InstrCount = 0;
+ unsigned InstrCount, FunctionSize = 0;
+ StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
+ // Collect the initial size of the module and the function we're looking at.
+ if (EmitICRemark) {
+ InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+ FunctionSize = F.getInstructionCount();
+ }
while (!LQ.empty()) {
CurrentLoopDeleted = false;
CurrentLoop = LQ.back();
@@ -209,17 +216,28 @@ bool LPPassManager::runOnFunction(Function &F) {
initializeAnalysisImpl(P);
+ bool LocalChanged = false;
{
PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
TimeRegion PassTimer(getPassTimer(P));
- if (EmitICRemark)
- InstrCount = initSizeRemarkInfo(M);
- Changed |= P->runOnLoop(CurrentLoop, *this);
- if (EmitICRemark)
- emitInstrCountChangedRemark(P, M, InstrCount);
+ LocalChanged = P->runOnLoop(CurrentLoop, *this);
+ Changed |= LocalChanged;
+ if (EmitICRemark) {
+ unsigned NewSize = F.getInstructionCount();
+ // Update the size of the function, emit a remark, and update the
+ // size of the module.
+ if (NewSize != FunctionSize) {
+ int64_t Delta = static_cast<int64_t>(NewSize) -
+ static_cast<int64_t>(FunctionSize);
+ emitInstrCountChangedRemark(P, M, Delta, InstrCount,
+ FunctionToInstrCount, &F);
+ InstrCount = static_cast<int64_t>(InstrCount) + Delta;
+ FunctionSize = NewSize;
+ }
+ }
}
- if (Changed)
+ if (LocalChanged)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
CurrentLoopDeleted ? "<deleted loop>"
: CurrentLoop->getName());
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
index 5a6bbd7b2ac6..907b321b231a 100644
--- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -13,7 +13,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
@@ -106,9 +105,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
if (!Res.isNonLocal()) {
Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
static_cast<BasicBlock *>(nullptr)));
- } else if (auto CS = CallSite(Inst)) {
+ } else if (auto *Call = dyn_cast<CallBase>(Inst)) {
const MemoryDependenceResults::NonLocalDepInfo &NLDI =
- MDA.getNonLocalCallDependency(CS);
+ MDA.getNonLocalCallDependency(Call);
DepSet &InstDeps = Deps[Inst];
for (const NonLocalDepEntry &I : NLDI) {
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index feae53c54ecb..e22182b99e11 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -31,7 +31,6 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -182,8 +181,8 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
}
/// Private helper for finding the local dependencies of a call site.
-MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
- CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
+MemDepResult MemoryDependenceResults::getCallDependencyFrom(
+ CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
BasicBlock *BB) {
unsigned Limit = BlockScanLimit;
@@ -205,21 +204,21 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
ModRefInfo MR = GetLocation(Inst, Loc, TLI);
if (Loc.Ptr) {
// A simple instruction.
- if (isModOrRefSet(AA.getModRefInfo(CS, Loc)))
+ if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
return MemDepResult::getClobber(Inst);
continue;
}
- if (auto InstCS = CallSite(Inst)) {
+ if (auto *CallB = dyn_cast<CallBase>(Inst)) {
// If these two calls do not interfere, look past it.
- if (isNoModRef(AA.getModRefInfo(CS, InstCS))) {
- // If the two calls are the same, return InstCS as a Def, so that
- // CS can be found redundant and eliminated.
+ if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
+ // If the two calls are the same, return Inst as a Def, so that
+ // Call can be found redundant and eliminated.
if (isReadOnlyCall && !isModSet(MR) &&
- CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+ Call->isIdenticalToWhenDefined(CallB))
return MemDepResult::getDef(Inst);
- // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+ // Otherwise if the two calls don't interact (e.g. CallB is readnone)
// keep scanning.
continue;
} else
@@ -750,11 +749,10 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
LocalCache = getPointerDependencyFrom(
MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
- } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
- CallSite QueryCS(QueryInst);
- bool isReadOnly = AA.onlyReadsMemory(QueryCS);
- LocalCache = getCallSiteDependencyFrom(
- QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
+ } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
+ bool isReadOnly = AA.onlyReadsMemory(QueryCall);
+ LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
+ ScanPos->getIterator(), QueryParent);
} else
// Non-memory instruction.
LocalCache = MemDepResult::getUnknown();
@@ -780,11 +778,11 @@ static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
#endif
const MemoryDependenceResults::NonLocalDepInfo &
-MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
- assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
+ assert(getDependency(QueryCall).isNonLocal() &&
"getNonLocalCallDependency should only be used on calls with "
"non-local deps!");
- PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+ PerInstNLInfo &CacheP = NonLocalDeps[QueryCall];
NonLocalDepInfo &Cache = CacheP.first;
// This is the set of blocks that need to be recomputed. In the cached case,
@@ -807,21 +805,21 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
DirtyBlocks.push_back(Entry.getBB());
// Sort the cache so that we can do fast binary search lookups below.
- llvm::sort(Cache.begin(), Cache.end());
+ llvm::sort(Cache);
++NumCacheDirtyNonLocal;
// cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
// << Cache.size() << " cached: " << *QueryInst;
} else {
// Seed DirtyBlocks with each of the preds of QueryInst's block.
- BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+ BasicBlock *QueryBB = QueryCall->getParent();
for (BasicBlock *Pred : PredCache.get(QueryBB))
DirtyBlocks.push_back(Pred);
++NumUncacheNonLocal;
}
// isReadonlyCall - If this is a read-only call, we can be more aggressive.
- bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
+ bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
SmallPtrSet<BasicBlock *, 32> Visited;
@@ -865,8 +863,8 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
if (Instruction *Inst = ExistingResult->getResult().getInst()) {
ScanPos = Inst->getIterator();
// We're removing QueryInst's use of Inst.
- RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
- QueryCS.getInstruction());
+ RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
+ QueryCall);
}
}
@@ -874,8 +872,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
MemDepResult Dep;
if (ScanPos != DirtyBB->begin()) {
- Dep =
- getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
+ Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
} else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
// No dependence found. If this is the entry block of the function, it is
// a clobber, otherwise it is unknown.
@@ -897,7 +894,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
// Keep the ReverseNonLocalDeps map up to date so we can efficiently
// update this when we remove instructions.
if (Instruction *Inst = Dep.getInst())
- ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+ ReverseNonLocalDeps[Inst].insert(QueryCall);
} else {
// If the block *is* completely transparent to the load, we need to check
@@ -1070,7 +1067,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
break;
default:
// Added many values, do a full scale sort.
- llvm::sort(Cache.begin(), Cache.end());
+ llvm::sort(Cache);
break;
}
}
@@ -1113,21 +1110,36 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// If we already have a cache entry for this CacheKey, we may need to do some
// work to reconcile the cache entry and the current query.
if (!Pair.second) {
- if (CacheInfo->Size < Loc.Size) {
- // The query's Size is greater than the cached one. Throw out the
- // cached data and proceed with the query at the greater size.
- CacheInfo->Pair = BBSkipFirstBlockPair();
- CacheInfo->Size = Loc.Size;
- for (auto &Entry : CacheInfo->NonLocalDeps)
- if (Instruction *Inst = Entry.getResult().getInst())
- RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
- CacheInfo->NonLocalDeps.clear();
- } else if (CacheInfo->Size > Loc.Size) {
- // This query's Size is less than the cached one. Conservatively restart
- // the query using the greater size.
- return getNonLocalPointerDepFromBB(
- QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
- StartBB, Result, Visited, SkipFirstBlock);
+ if (CacheInfo->Size != Loc.Size) {
+ bool ThrowOutEverything;
+ if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
+ // FIXME: We may be able to do better in the face of results with mixed
+ // precision. We don't appear to get them in practice, though, so just
+ // be conservative.
+ ThrowOutEverything =
+ CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
+ CacheInfo->Size.getValue() < Loc.Size.getValue();
+ } else {
+ // For our purposes, unknown size > all others.
+ ThrowOutEverything = !Loc.Size.hasValue();
+ }
+
+ if (ThrowOutEverything) {
+ // The query's Size is greater than the cached one. Throw out the
+ // cached data and proceed with the query at the greater size.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->Size = Loc.Size;
+ for (auto &Entry : CacheInfo->NonLocalDeps)
+ if (Instruction *Inst = Entry.getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ } else {
+ // This query's Size is less than the cached one. Conservatively restart
+ // the query using the greater size.
+ return getNonLocalPointerDepFromBB(
+ QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
+ StartBB, Result, Visited, SkipFirstBlock);
+ }
}
// If the query's AATags are inconsistent with the cached one,
@@ -1572,7 +1584,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
// RemInst can't be the terminator if it has local stuff depending on it.
- assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
+ assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
"Nothing can locally depend on a terminator");
for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
@@ -1662,7 +1674,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
// Re-sort the NonLocalDepInfo. Changing the dirty entry to its
// subsequent value may invalidate the sortedness.
- llvm::sort(NLPDI.begin(), NLPDI.end());
+ llvm::sort(NLPDI);
}
ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
diff --git a/contrib/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
index 55924db284ec..27e8d72b8e89 100644
--- a/contrib/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
@@ -18,13 +18,28 @@
#include "llvm/IR/Type.h"
using namespace llvm;
+void LocationSize::print(raw_ostream &OS) const {
+ OS << "LocationSize::";
+ if (*this == unknown())
+ OS << "unknown";
+ else if (*this == mapEmpty())
+ OS << "mapEmpty";
+ else if (*this == mapTombstone())
+ OS << "mapTombstone";
+ else if (isPrecise())
+ OS << "precise(" << getValue() << ')';
+ else
+ OS << "upperBound(" << getValue() << ')';
+}
+
MemoryLocation MemoryLocation::get(const LoadInst *LI) {
AAMDNodes AATags;
LI->getAAMetadata(AATags);
const auto &DL = LI->getModule()->getDataLayout();
- return MemoryLocation(LI->getPointerOperand(),
- DL.getTypeStoreSize(LI->getType()), AATags);
+ return MemoryLocation(
+ LI->getPointerOperand(),
+ LocationSize::precise(DL.getTypeStoreSize(LI->getType())), AATags);
}
MemoryLocation MemoryLocation::get(const StoreInst *SI) {
@@ -33,7 +48,8 @@ MemoryLocation MemoryLocation::get(const StoreInst *SI) {
const auto &DL = SI->getModule()->getDataLayout();
return MemoryLocation(SI->getPointerOperand(),
- DL.getTypeStoreSize(SI->getValueOperand()->getType()),
+ LocationSize::precise(DL.getTypeStoreSize(
+ SI->getValueOperand()->getType())),
AATags);
}
@@ -41,7 +57,8 @@ MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
AAMDNodes AATags;
VI->getAAMetadata(AATags);
- return MemoryLocation(VI->getPointerOperand(), UnknownSize, AATags);
+ return MemoryLocation(VI->getPointerOperand(), LocationSize::unknown(),
+ AATags);
}
MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
@@ -49,9 +66,10 @@ MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
CXI->getAAMetadata(AATags);
const auto &DL = CXI->getModule()->getDataLayout();
- return MemoryLocation(
- CXI->getPointerOperand(),
- DL.getTypeStoreSize(CXI->getCompareOperand()->getType()), AATags);
+ return MemoryLocation(CXI->getPointerOperand(),
+ LocationSize::precise(DL.getTypeStoreSize(
+ CXI->getCompareOperand()->getType())),
+ AATags);
}
MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
@@ -60,7 +78,8 @@ MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
const auto &DL = RMWI->getModule()->getDataLayout();
return MemoryLocation(RMWI->getPointerOperand(),
- DL.getTypeStoreSize(RMWI->getValOperand()->getType()),
+ LocationSize::precise(DL.getTypeStoreSize(
+ RMWI->getValOperand()->getType())),
AATags);
}
@@ -73,9 +92,9 @@ MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
}
MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
- uint64_t Size = UnknownSize;
+ auto Size = LocationSize::unknown();
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
- Size = C->getValue().getZExtValue();
+ Size = LocationSize::precise(C->getValue().getZExtValue());
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
@@ -94,9 +113,9 @@ MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
}
MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
- uint64_t Size = UnknownSize;
+ auto Size = LocationSize::unknown();
if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()))
- Size = C->getValue().getZExtValue();
+ Size = LocationSize::precise(C->getValue().getZExtValue());
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
@@ -106,15 +125,15 @@ MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
return MemoryLocation(MI->getRawDest(), Size, AATags);
}
-MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
+MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
unsigned ArgIdx,
- const TargetLibraryInfo &TLI) {
+ const TargetLibraryInfo *TLI) {
AAMDNodes AATags;
- CS->getAAMetadata(AATags);
- const Value *Arg = CS.getArgument(ArgIdx);
+ Call->getAAMetadata(AATags);
+ const Value *Arg = Call->getArgOperand(ArgIdx);
// We may be able to produce an exact size for known intrinsics.
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call)) {
const DataLayout &DL = II->getModule()->getDataLayout();
switch (II->getIntrinsicID()) {
@@ -126,7 +145,8 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memory intrinsic");
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
- return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+ return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+ AATags);
break;
case Intrinsic::lifetime_start:
@@ -134,23 +154,37 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
case Intrinsic::invariant_start:
assert(ArgIdx == 1 && "Invalid argument index");
return MemoryLocation(
- Arg, cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AATags);
+ Arg,
+ LocationSize::precise(
+ cast<ConstantInt>(II->getArgOperand(0))->getZExtValue()),
+ AATags);
case Intrinsic::invariant_end:
+ // The first argument to an invariant.end is a "descriptor" type (e.g. a
+ // pointer to a empty struct) which is never actually dereferenced.
+ if (ArgIdx == 0)
+ return MemoryLocation(Arg, LocationSize::precise(0), AATags);
assert(ArgIdx == 2 && "Invalid argument index");
return MemoryLocation(
- Arg, cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AATags);
+ Arg,
+ LocationSize::precise(
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()),
+ AATags);
case Intrinsic::arm_neon_vld1:
assert(ArgIdx == 0 && "Invalid argument index");
// LLVM's vld1 and vst1 intrinsics currently only support a single
// vector register.
- return MemoryLocation(Arg, DL.getTypeStoreSize(II->getType()), AATags);
+ return MemoryLocation(
+ Arg, LocationSize::precise(DL.getTypeStoreSize(II->getType())),
+ AATags);
case Intrinsic::arm_neon_vst1:
assert(ArgIdx == 0 && "Invalid argument index");
- return MemoryLocation(
- Arg, DL.getTypeStoreSize(II->getArgOperand(1)->getType()), AATags);
+ return MemoryLocation(Arg,
+ LocationSize::precise(DL.getTypeStoreSize(
+ II->getArgOperand(1)->getType())),
+ AATags);
}
}
@@ -159,16 +193,20 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
LibFunc F;
- if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
- F == LibFunc_memset_pattern16 && TLI.has(F)) {
+ if (TLI && Call->getCalledFunction() &&
+ TLI->getLibFunc(*Call->getCalledFunction(), F) &&
+ F == LibFunc_memset_pattern16 && TLI->has(F)) {
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
if (ArgIdx == 1)
- return MemoryLocation(Arg, 16, AATags);
- if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
- return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+ return MemoryLocation(Arg, LocationSize::precise(16), AATags);
+ if (const ConstantInt *LenCI =
+ dyn_cast<ConstantInt>(Call->getArgOperand(2)))
+ return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+ AATags);
}
// FIXME: Handle memset_pattern4 and memset_pattern8 also.
- return MemoryLocation(CS.getArgument(ArgIdx), UnknownSize, AATags);
+ return MemoryLocation(Call->getArgOperand(ArgIdx), LocationSize::unknown(),
+ AATags);
}
diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp
index 6e49a39926a2..6a5567ed765b 100644
--- a/contrib/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp
@@ -30,7 +30,6 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
@@ -77,9 +76,15 @@ static cl::opt<unsigned> MaxCheckLimit(
cl::desc("The maximum number of stores/phis MemorySSA"
"will consider trying to walk past (default = 100)"));
-static cl::opt<bool>
- VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
- cl::desc("Verify MemorySSA in legacy printer pass."));
+// Always verify MemorySSA if expensive checking is enabled.
+#ifdef EXPENSIVE_CHECKS
+bool llvm::VerifyMemorySSA = true;
+#else
+bool llvm::VerifyMemorySSA = false;
+#endif
+static cl::opt<bool, true>
+ VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA),
+ cl::Hidden, cl::desc("Enable verification of MemorySSA."));
namespace llvm {
@@ -125,9 +130,9 @@ public:
: MemoryLocOrCall(MUD->getMemoryInst()) {}
MemoryLocOrCall(Instruction *Inst) {
- if (ImmutableCallSite(Inst)) {
+ if (auto *C = dyn_cast<CallBase>(Inst)) {
IsCall = true;
- CS = ImmutableCallSite(Inst);
+ Call = C;
} else {
IsCall = false;
// There is no such thing as a memorylocation for a fence inst, and it is
@@ -139,9 +144,9 @@ public:
explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {}
- ImmutableCallSite getCS() const {
+ const CallBase *getCall() const {
assert(IsCall);
- return CS;
+ return Call;
}
MemoryLocation getLoc() const {
@@ -156,16 +161,17 @@ public:
if (!IsCall)
return Loc == Other.Loc;
- if (CS.getCalledValue() != Other.CS.getCalledValue())
+ if (Call->getCalledValue() != Other.Call->getCalledValue())
return false;
- return CS.arg_size() == Other.CS.arg_size() &&
- std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin());
+ return Call->arg_size() == Other.Call->arg_size() &&
+ std::equal(Call->arg_begin(), Call->arg_end(),
+ Other.Call->arg_begin());
}
private:
union {
- ImmutableCallSite CS;
+ const CallBase *Call;
MemoryLocation Loc;
};
};
@@ -191,9 +197,9 @@ template <> struct DenseMapInfo<MemoryLocOrCall> {
hash_code hash =
hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue(
- MLOC.getCS().getCalledValue()));
+ MLOC.getCall()->getCalledValue()));
- for (const Value *Arg : MLOC.getCS().args())
+ for (const Value *Arg : MLOC.getCall()->args())
hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg));
return hash;
}
@@ -246,13 +252,13 @@ struct ClobberAlias {
// Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being
// ignored if IsClobber = false.
-static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
+static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
const MemoryLocation &UseLoc,
const Instruction *UseInst,
AliasAnalysis &AA) {
Instruction *DefInst = MD->getMemoryInst();
assert(DefInst && "Defining instruction not actually an instruction");
- ImmutableCallSite UseCS(UseInst);
+ const auto *UseCall = dyn_cast<CallBase>(UseInst);
Optional<AliasResult> AR;
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
@@ -265,7 +271,7 @@ static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
// context.
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
- if (UseCS)
+ if (UseCall)
return {false, NoAlias};
AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc);
return {AR != NoAlias, AR};
@@ -279,8 +285,8 @@ static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
}
}
- if (UseCS) {
- ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
+ if (UseCall) {
+ ModRefInfo I = AA.getModRefInfo(DefInst, UseCall);
AR = isMustSet(I) ? MustAlias : MayAlias;
return {isModOrRefSet(I), AR};
}
@@ -326,11 +332,12 @@ struct UpwardsMemoryQuery {
// The MemoryAccess we actually got called with, used to test local domination
const MemoryAccess *OriginalAccess = nullptr;
Optional<AliasResult> AR = MayAlias;
+ bool SkipSelfAccess = false;
UpwardsMemoryQuery() = default;
UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
- : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) {
+ : IsCall(isa<CallBase>(Inst)), Inst(Inst), OriginalAccess(Access) {
if (!IsCall)
StartingLoc = MemoryLocation::get(Inst);
}
@@ -370,13 +377,15 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
/// \param Start The MemoryAccess that we want to walk from.
/// \param ClobberAt A clobber for Start.
/// \param StartLoc The MemoryLocation for Start.
-/// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to.
+/// \param MSSA The MemorySSA instance that Start and ClobberAt belong to.
/// \param Query The UpwardsMemoryQuery we used for our search.
/// \param AA The AliasAnalysis we used for our search.
-static void LLVM_ATTRIBUTE_UNUSED
-checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
+/// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
+static void
+checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
const MemoryLocation &StartLoc, const MemorySSA &MSSA,
- const UpwardsMemoryQuery &Query, AliasAnalysis &AA) {
+ const UpwardsMemoryQuery &Query, AliasAnalysis &AA,
+ bool AllowImpreciseClobber = false) {
assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
if (MSSA.isLiveOnEntryDef(Start)) {
@@ -386,21 +395,21 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
}
bool FoundClobber = false;
- DenseSet<MemoryAccessPair> VisitedPhis;
- SmallVector<MemoryAccessPair, 8> Worklist;
+ DenseSet<ConstMemoryAccessPair> VisitedPhis;
+ SmallVector<ConstMemoryAccessPair, 8> Worklist;
Worklist.emplace_back(Start, StartLoc);
// Walk all paths from Start to ClobberAt, while looking for clobbers. If one
// is found, complain.
while (!Worklist.empty()) {
- MemoryAccessPair MAP = Worklist.pop_back_val();
+ auto MAP = Worklist.pop_back_val();
// All we care about is that nothing from Start to ClobberAt clobbers Start.
// We learn nothing from revisiting nodes.
if (!VisitedPhis.insert(MAP).second)
continue;
- for (MemoryAccess *MA : def_chain(MAP.first)) {
+ for (const auto *MA : def_chain(MAP.first)) {
if (MA == ClobberAt) {
- if (auto *MD = dyn_cast<MemoryDef>(MA)) {
+ if (const auto *MD = dyn_cast<MemoryDef>(MA)) {
// instructionClobbersQuery isn't essentially free, so don't use `|=`,
// since it won't let us short-circuit.
//
@@ -422,19 +431,39 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
// We should never hit liveOnEntry, unless it's the clobber.
assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
- if (auto *MD = dyn_cast<MemoryDef>(MA)) {
- (void)MD;
+ if (const auto *MD = dyn_cast<MemoryDef>(MA)) {
+ // If Start is a Def, skip self.
+ if (MD == Start)
+ continue;
+
assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA)
.IsClobber &&
"Found clobber before reaching ClobberAt!");
continue;
}
+ if (const auto *MU = dyn_cast<MemoryUse>(MA)) {
+ (void)MU;
+ assert (MU == Start &&
+ "Can only find use in def chain if Start is a use");
+ continue;
+ }
+
assert(isa<MemoryPhi>(MA));
- Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end());
+ Worklist.append(
+ upward_defs_begin({const_cast<MemoryAccess *>(MA), MAP.second}),
+ upward_defs_end());
}
}
+ // If the verify is done following an optimization, it's possible that
+ // ClobberAt was a conservative clobbering, that we can now infer is not a
+ // true clobbering access. Don't fail the verify if that's the case.
+ // We do have accesses that claim they're optimized, but could be optimized
+ // further. Updating all these can be expensive, so allow it for now (FIXME).
+ if (AllowImpreciseClobber)
+ return;
+
// If ClobberAt is a MemoryPhi, we can assume something above it acted as a
// clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
@@ -507,13 +536,13 @@ class ClobberWalker {
///
/// This does not test for whether StopAt is a clobber
UpwardsWalkResult
- walkToPhiOrClobber(DefPath &Desc,
- const MemoryAccess *StopAt = nullptr) const {
+ walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr,
+ const MemoryAccess *SkipStopAt = nullptr) const {
assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
for (MemoryAccess *Current : def_chain(Desc.Last)) {
Desc.Last = Current;
- if (Current == StopAt)
+ if (Current == StopAt || Current == SkipStopAt)
return {Current, false, MayAlias};
if (auto *MD = dyn_cast<MemoryDef>(Current)) {
@@ -591,9 +620,16 @@ class ClobberWalker {
if (!VisitedPhis.insert({Node.Last, Node.Loc}).second)
continue;
- UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere);
+ const MemoryAccess *SkipStopWhere = nullptr;
+ if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) {
+ assert(isa<MemoryDef>(Query->OriginalAccess));
+ SkipStopWhere = Query->OriginalAccess;
+ }
+
+ UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere,
+ /*SkipStopAt=*/SkipStopWhere);
if (Res.IsKnownClobber) {
- assert(Res.Result != StopWhere);
+ assert(Res.Result != StopWhere && Res.Result != SkipStopWhere);
// If this wasn't a cache hit, we hit a clobber when walking. That's a
// failure.
TerminatedPath Term{Res.Result, PathIndex};
@@ -605,10 +641,13 @@ class ClobberWalker {
continue;
}
- if (Res.Result == StopWhere) {
+ if (Res.Result == StopWhere || Res.Result == SkipStopWhere) {
// We've hit our target. Save this path off for if we want to continue
- // walking.
- NewPaused.push_back(PathIndex);
+ // walking. If we are in the mode of skipping the OriginalAccess, and
+ // we've reached back to the OriginalAccess, do not save path, we've
+ // just looped back to self.
+ if (Res.Result != SkipStopWhere)
+ NewPaused.push_back(PathIndex);
continue;
}
@@ -879,7 +918,8 @@ public:
}
#ifdef EXPENSIVE_CHECKS
- checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
+ if (!Q.SkipSelfAccess)
+ checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
#endif
return Result;
}
@@ -907,28 +947,76 @@ struct RenamePassData {
namespace llvm {
+class MemorySSA::ClobberWalkerBase {
+ ClobberWalker Walker;
+ MemorySSA *MSSA;
+
+public:
+ ClobberWalkerBase(MemorySSA *M, AliasAnalysis *A, DominatorTree *D)
+ : Walker(*M, *A, *D), MSSA(M) {}
+
+ MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *,
+ const MemoryLocation &);
+ // Second argument (bool), defines whether the clobber search should skip the
+ // original queried access. If true, there will be a follow-up query searching
+ // for a clobber access past "self". Note that the Optimized access is not
+ // updated if a new clobber is found by this SkipSelf search. If this
+ // additional query becomes heavily used we may decide to cache the result.
+ // Walker instantiations will decide how to set the SkipSelf bool.
+ MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, bool);
+ void verify(const MemorySSA *MSSA) { Walker.verify(MSSA); }
+};
+
/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
/// longer does caching on its own, but the name has been retained for the
/// moment.
class MemorySSA::CachingWalker final : public MemorySSAWalker {
- ClobberWalker Walker;
-
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
+ ClobberWalkerBase *Walker;
public:
- CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
+ CachingWalker(MemorySSA *M, ClobberWalkerBase *W)
+ : MemorySSAWalker(M), Walker(W) {}
~CachingWalker() override = default;
using MemorySSAWalker::getClobberingMemoryAccess;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
- const MemoryLocation &) override;
- void invalidateInfo(MemoryAccess *) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) override;
+
+ void invalidateInfo(MemoryAccess *MA) override {
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+ MUD->resetOptimized();
+ }
void verify(const MemorySSA *MSSA) override {
MemorySSAWalker::verify(MSSA);
- Walker.verify(MSSA);
+ Walker->verify(MSSA);
+ }
+};
+
+class MemorySSA::SkipSelfWalker final : public MemorySSAWalker {
+ ClobberWalkerBase *Walker;
+
+public:
+ SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W)
+ : MemorySSAWalker(M), Walker(W) {}
+ ~SkipSelfWalker() override = default;
+
+ using MemorySSAWalker::getClobberingMemoryAccess;
+
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) override;
+
+ void invalidateInfo(MemoryAccess *MA) override {
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+ MUD->resetOptimized();
+ }
+
+ void verify(const MemorySSA *MSSA) override {
+ MemorySSAWalker::verify(MSSA);
+ Walker->verify(MSSA);
}
};
@@ -1067,7 +1155,7 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
: AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
- NextID(0) {
+ SkipWalker(nullptr), NextID(0) {
buildMemorySSA();
}
@@ -1398,10 +1486,25 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
if (Walker)
return Walker.get();
- Walker = llvm::make_unique<CachingWalker>(this, AA, DT);
+ if (!WalkerBase)
+ WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+
+ Walker = llvm::make_unique<CachingWalker>(this, WalkerBase.get());
return Walker.get();
}
+MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
+ if (SkipWalker)
+ return SkipWalker.get();
+
+ if (!WalkerBase)
+ WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+
+ SkipWalker = llvm::make_unique<SkipSelfWalker>(this, WalkerBase.get());
+ return SkipWalker.get();
+ }
+
+
// This is a helper function used by the creation routines. It places NewAccess
// into the access and defs lists for a given basic block, at the given
// insertion point.
@@ -1465,15 +1568,25 @@ void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
BlockNumberingValid.erase(BB);
}
+void MemorySSA::prepareForMoveTo(MemoryAccess *What, BasicBlock *BB) {
+ // Keep it in the lookup tables, remove from the lists
+ removeFromLists(What, false);
+
+ // Note that moving should implicitly invalidate the optimized state of a
+ // MemoryUse (and Phis can't be optimized). However, it doesn't do so for a
+ // MemoryDef.
+ if (auto *MD = dyn_cast<MemoryDef>(What))
+ MD->resetOptimized();
+ What->setBlock(BB);
+}
+
// Move What before Where in the IR. The end result is that What will belong to
// the right lists and have the right Block set, but will not otherwise be
// correct. It will not have the right defining access, and if it is a def,
// things below it will not properly be updated.
void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
AccessList::iterator Where) {
- // Keep it in the lookup tables, remove from the lists
- removeFromLists(What, false);
- What->setBlock(BB);
+ prepareForMoveTo(What, BB);
insertIntoListsBefore(What, BB, Where);
}
@@ -1489,8 +1602,7 @@ void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB,
assert(Inserted && "Cannot move a Phi to a block that already has one");
}
- removeFromLists(What, false);
- What->setBlock(BB);
+ prepareForMoveTo(What, BB);
insertIntoListsForBlock(What, BB, Point);
}
@@ -1504,9 +1616,10 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
}
MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
- MemoryAccess *Definition) {
+ MemoryAccess *Definition,
+ const MemoryUseOrDef *Template) {
assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
- MemoryUseOrDef *NewAccess = createNewAccess(I);
+ MemoryUseOrDef *NewAccess = createNewAccess(I, Template);
assert(
NewAccess != nullptr &&
"Tried to create a memory access for a non-memory touching instruction");
@@ -1529,7 +1642,8 @@ static inline bool isOrdered(const Instruction *I) {
}
/// Helper function to create new memory accesses
-MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
+MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
+ const MemoryUseOrDef *Template) {
// The assume intrinsic has a control dependency which we model by claiming
// that it writes arbitrarily. Ignore that fake memory dependency here.
// FIXME: Replace this special casing with a more accurate modelling of
@@ -1538,18 +1652,31 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
if (II->getIntrinsicID() == Intrinsic::assume)
return nullptr;
- // Find out what affect this instruction has on memory.
- ModRefInfo ModRef = AA->getModRefInfo(I, None);
- // The isOrdered check is used to ensure that volatiles end up as defs
- // (atomics end up as ModRef right now anyway). Until we separate the
- // ordering chain from the memory chain, this enables people to see at least
- // some relative ordering to volatiles. Note that getClobberingMemoryAccess
- // will still give an answer that bypasses other volatile loads. TODO:
- // Separate memory aliasing and ordering into two different chains so that we
- // can precisely represent both "what memory will this read/write/is clobbered
- // by" and "what instructions can I move this past".
- bool Def = isModSet(ModRef) || isOrdered(I);
- bool Use = isRefSet(ModRef);
+ bool Def, Use;
+ if (Template) {
+ Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
+ Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr;
+#if !defined(NDEBUG)
+ ModRefInfo ModRef = AA->getModRefInfo(I, None);
+ bool DefCheck, UseCheck;
+ DefCheck = isModSet(ModRef) || isOrdered(I);
+ UseCheck = isRefSet(ModRef);
+ assert(Def == DefCheck && (Def || Use == UseCheck) && "Invalid template");
+#endif
+ } else {
+ // Find out what affect this instruction has on memory.
+ ModRefInfo ModRef = AA->getModRefInfo(I, None);
+ // The isOrdered check is used to ensure that volatiles end up as defs
+ // (atomics end up as ModRef right now anyway). Until we separate the
+ // ordering chain from the memory chain, this enables people to see at least
+ // some relative ordering to volatiles. Note that getClobberingMemoryAccess
+ // will still give an answer that bypasses other volatile loads. TODO:
+ // Separate memory aliasing and ordering into two different chains so that
+ // we can precisely represent both "what memory will this read/write/is
+ // clobbered by" and "what instructions can I move this past".
+ Def = isModSet(ModRef) || isOrdered(I);
+ Use = isRefSet(ModRef);
+ }
// It's possible for an instruction to not modify memory at all. During
// construction, we ignore them.
@@ -1652,6 +1779,34 @@ void MemorySSA::verifyMemorySSA() const {
verifyOrdering(F);
verifyDominationNumbers(F);
Walker->verify(this);
+ verifyClobberSanity(F);
+}
+
+/// Check sanity of the clobbering instruction for access MA.
+void MemorySSA::checkClobberSanityAccess(const MemoryAccess *MA) const {
+ if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
+ if (!MUD->isOptimized())
+ return;
+ auto *I = MUD->getMemoryInst();
+ auto Loc = MemoryLocation::getOrNone(I);
+ if (Loc == None)
+ return;
+ auto *Clobber = MUD->getOptimized();
+ UpwardsMemoryQuery Q(I, MUD);
+ checkClobberSanity(MUD, Clobber, *Loc, *this, Q, *AA, true);
+ }
+}
+
+void MemorySSA::verifyClobberSanity(const Function &F) const {
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+ for (const BasicBlock &BB : F) {
+ const AccessList *Accesses = getBlockAccesses(&BB);
+ if (!Accesses)
+ continue;
+ for (const MemoryAccess &MA : *Accesses)
+ checkClobberSanityAccess(&MA);
+ }
+#endif
}
/// Verify that all of the blocks we believe to have valid domination numbers
@@ -1695,6 +1850,7 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
/// Verify that the order and existence of MemoryAccesses matches the
/// order and existence of memory affecting instructions.
void MemorySSA::verifyOrdering(Function &F) const {
+#ifndef NDEBUG
// Walk all the blocks, comparing what the lookups think and what the access
// lists think, as well as the order in the blocks vs the order in the access
// lists.
@@ -1753,6 +1909,7 @@ void MemorySSA::verifyOrdering(Function &F) const {
}
ActualDefs.clear();
}
+#endif
}
/// Verify the domination properties of MemorySSA by checking that each
@@ -1795,6 +1952,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
/// accesses and verifying that, for each use, it appears in the
/// appropriate def's use list
void MemorySSA::verifyDefUses(Function &F) const {
+#ifndef NDEBUG
for (BasicBlock &B : F) {
// Phi nodes are attached to basic blocks
if (MemoryPhi *Phi = getMemoryAccess(&B)) {
@@ -1815,14 +1973,7 @@ void MemorySSA::verifyDefUses(Function &F) const {
}
}
}
-}
-
-MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const {
- return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
-}
-
-MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
- return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
+#endif
}
/// Perform a local numbering on blocks so that instruction ordering can be
@@ -2055,25 +2206,11 @@ void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
-MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
- DominatorTree *D)
- : MemorySSAWalker(M), Walker(*M, *A, *D) {}
-
-void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
- MUD->resetOptimized();
-}
-
-/// Walk the use-def chains starting at \p MA and find
+/// Walk the use-def chains starting at \p StartingAccess and find
/// the MemoryAccess that actually clobbers Loc.
///
/// \returns our clobbering memory access
-MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
- MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
- return Walker.findClobber(StartingAccess, Q);
-}
-
-MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
+MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
if (isa<MemoryPhi>(StartingAccess))
return StartingAccess;
@@ -2086,7 +2223,7 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
// Conservatively, fences are always clobbers, so don't perform the walk if we
// hit a fence.
- if (!ImmutableCallSite(I) && I->isFenceLike())
+ if (!isa<CallBase>(I) && I->isFenceLike())
return StartingUseOrDef;
UpwardsMemoryQuery Q;
@@ -2097,11 +2234,12 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
// Unlike the other function, do not walk to the def of a def, because we are
// handed something we already believe is the clobbering access.
+ // We never set SkipSelf to true in Q in this method.
MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
? StartingUseOrDef->getDefiningAccess()
: StartingUseOrDef;
- MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
+ MemoryAccess *Clobber = Walker.findClobber(DefiningAccess, Q);
LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n");
LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
@@ -2110,26 +2248,33 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
}
MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
+ bool SkipSelf) {
auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
// If this is a MemoryPhi, we can't do anything.
if (!StartingAccess)
return MA;
+ bool IsOptimized = false;
+
// If this is an already optimized use or def, return the optimized result.
// Note: Currently, we store the optimized def result in a separate field,
// since we can't use the defining access.
- if (StartingAccess->isOptimized())
- return StartingAccess->getOptimized();
+ if (StartingAccess->isOptimized()) {
+ if (!SkipSelf || !isa<MemoryDef>(StartingAccess))
+ return StartingAccess->getOptimized();
+ IsOptimized = true;
+ }
const Instruction *I = StartingAccess->getMemoryInst();
- UpwardsMemoryQuery Q(I, StartingAccess);
// We can't sanely do anything with a fence, since they conservatively clobber
// all memory, and have no locations to get pointers from to try to
// disambiguate.
- if (!Q.IsCall && I->isFenceLike())
+ if (!isa<CallBase>(I) && I->isFenceLike())
return StartingAccess;
+ UpwardsMemoryQuery Q(I, StartingAccess);
+
if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
StartingAccess->setOptimized(LiveOnEntry);
@@ -2137,33 +2282,71 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
return LiveOnEntry;
}
- // Start with the thing we already think clobbers this location
- MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
+ MemoryAccess *OptimizedAccess;
+ if (!IsOptimized) {
+ // Start with the thing we already think clobbers this location
+ MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
+
+ // At this point, DefiningAccess may be the live on entry def.
+ // If it is, we will not get a better result.
+ if (MSSA->isLiveOnEntryDef(DefiningAccess)) {
+ StartingAccess->setOptimized(DefiningAccess);
+ StartingAccess->setOptimizedAccessType(None);
+ return DefiningAccess;
+ }
- // At this point, DefiningAccess may be the live on entry def.
- // If it is, we will not get a better result.
- if (MSSA->isLiveOnEntryDef(DefiningAccess)) {
- StartingAccess->setOptimized(DefiningAccess);
- StartingAccess->setOptimizedAccessType(None);
- return DefiningAccess;
- }
+ OptimizedAccess = Walker.findClobber(DefiningAccess, Q);
+ StartingAccess->setOptimized(OptimizedAccess);
+ if (MSSA->isLiveOnEntryDef(OptimizedAccess))
+ StartingAccess->setOptimizedAccessType(None);
+ else if (Q.AR == MustAlias)
+ StartingAccess->setOptimizedAccessType(MustAlias);
+ } else
+ OptimizedAccess = StartingAccess->getOptimized();
- MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
- LLVM_DEBUG(dbgs() << *DefiningAccess << "\n");
- LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
- LLVM_DEBUG(dbgs() << *Result << "\n");
-
- StartingAccess->setOptimized(Result);
- if (MSSA->isLiveOnEntryDef(Result))
- StartingAccess->setOptimizedAccessType(None);
- else if (Q.AR == MustAlias)
- StartingAccess->setOptimizedAccessType(MustAlias);
+ LLVM_DEBUG(dbgs() << *StartingAccess << "\n");
+ LLVM_DEBUG(dbgs() << "Optimized Memory SSA clobber for " << *I << " is ");
+ LLVM_DEBUG(dbgs() << *OptimizedAccess << "\n");
+
+ MemoryAccess *Result;
+ if (SkipSelf && isa<MemoryPhi>(OptimizedAccess) &&
+ isa<MemoryDef>(StartingAccess)) {
+ assert(isa<MemoryDef>(Q.OriginalAccess));
+ Q.SkipSelfAccess = true;
+ Result = Walker.findClobber(OptimizedAccess, Q);
+ } else
+ Result = OptimizedAccess;
+
+ LLVM_DEBUG(dbgs() << "Result Memory SSA clobber [SkipSelf = " << SkipSelf);
+ LLVM_DEBUG(dbgs() << "] for " << *I << " is " << *Result << "\n");
return Result;
}
MemoryAccess *
+MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+ return Walker->getClobberingMemoryAccessBase(MA, false);
+}
+
+MemoryAccess *
+MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) {
+ return Walker->getClobberingMemoryAccessBase(MA, Loc);
+}
+
+MemoryAccess *
+MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+ return Walker->getClobberingMemoryAccessBase(MA, true);
+}
+
+MemoryAccess *
+MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) {
+ return Walker->getClobberingMemoryAccessBase(MA, Loc);
+}
+
+MemoryAccess *
DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
return Use->getDefiningAccess();
diff --git a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
index abe2b3c25a58..6c817d203684 100644
--- a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------===//
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -91,7 +93,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// FIXME: Figure out whether this is dead code and if so remove it.
if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
// These will have been filled in by the recursive read we did above.
- std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin());
+ llvm::copy(PhiOps, Phi->op_begin());
std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
}
} else {
@@ -264,16 +266,15 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end();
UI != UE;) {
Use &U = *UI++;
- // Leave the uses alone
- if (isa<MemoryUse>(U.getUser()))
+ // Leave the MemoryUses alone.
+ // Also make sure we skip ourselves to avoid self references.
+ if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
continue;
U.set(MD);
}
}
// and that def is now our defining access.
- // We change them in this order otherwise we will appear in the use list
- // above and reset ourselves.
MD->setDefiningAccess(DefBefore);
SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
@@ -392,6 +393,522 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
}
}
+void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
+ MPhi->unorderedDeleteIncomingBlock(From);
+ if (MPhi->getNumIncomingValues() == 1)
+ removeMemoryAccess(MPhi);
+ }
+}
+
+void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
+ BasicBlock *To) {
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
+ bool Found = false;
+ MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) {
+ if (From != B)
+ return false;
+ if (Found)
+ return true;
+ Found = true;
+ return false;
+ });
+ if (MPhi->getNumIncomingValues() == 1)
+ removeMemoryAccess(MPhi);
+ }
+}
+
+void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
+ const ValueToValueMapTy &VMap,
+ PhiToDefMap &MPhiMap) {
+ auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
+ MemoryAccess *InsnDefining = MA;
+ if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
+ if (!MSSA->isLiveOnEntryDef(DefMUD)) {
+ Instruction *DefMUDI = DefMUD->getMemoryInst();
+ assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
+ if (Instruction *NewDefMUDI =
+ cast_or_null<Instruction>(VMap.lookup(DefMUDI)))
+ InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
+ }
+ } else {
+ MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
+ if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
+ InsnDefining = NewDefPhi;
+ }
+ assert(InsnDefining && "Defining instruction cannot be nullptr.");
+ return InsnDefining;
+ };
+
+ const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
+ if (!Acc)
+ return;
+ for (const MemoryAccess &MA : *Acc) {
+ if (const MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&MA)) {
+ Instruction *Insn = MUD->getMemoryInst();
+ // Entry does not exist if the clone of the block did not clone all
+ // instructions. This occurs in LoopRotate when cloning instructions
+ // from the old header to the old preheader. The cloned instruction may
+ // also be a simplified Value, not an Instruction (see LoopRotate).
+ if (Instruction *NewInsn =
+ dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
+ MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
+ NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD);
+ MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
+ }
+ }
+ }
+}
+
+void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
+ ArrayRef<BasicBlock *> ExitBlocks,
+ const ValueToValueMapTy &VMap,
+ bool IgnoreIncomingWithNoClones) {
+ PhiToDefMap MPhiMap;
+
+ auto FixPhiIncomingValues = [&](MemoryPhi *Phi, MemoryPhi *NewPhi) {
+ assert(Phi && NewPhi && "Invalid Phi nodes.");
+ BasicBlock *NewPhiBB = NewPhi->getBlock();
+ SmallPtrSet<BasicBlock *, 4> NewPhiBBPreds(pred_begin(NewPhiBB),
+ pred_end(NewPhiBB));
+ for (unsigned It = 0, E = Phi->getNumIncomingValues(); It < E; ++It) {
+ MemoryAccess *IncomingAccess = Phi->getIncomingValue(It);
+ BasicBlock *IncBB = Phi->getIncomingBlock(It);
+
+ if (BasicBlock *NewIncBB = cast_or_null<BasicBlock>(VMap.lookup(IncBB)))
+ IncBB = NewIncBB;
+ else if (IgnoreIncomingWithNoClones)
+ continue;
+
+ // Now we have IncBB, and will need to add incoming from it to NewPhi.
+
+ // If IncBB is not a predecessor of NewPhiBB, then do not add it.
+ // NewPhiBB was cloned without that edge.
+ if (!NewPhiBBPreds.count(IncBB))
+ continue;
+
+ // Determine incoming value and add it as incoming from IncBB.
+ if (MemoryUseOrDef *IncMUD = dyn_cast<MemoryUseOrDef>(IncomingAccess)) {
+ if (!MSSA->isLiveOnEntryDef(IncMUD)) {
+ Instruction *IncI = IncMUD->getMemoryInst();
+ assert(IncI && "Found MemoryUseOrDef with no Instruction.");
+ if (Instruction *NewIncI =
+ cast_or_null<Instruction>(VMap.lookup(IncI))) {
+ IncMUD = MSSA->getMemoryAccess(NewIncI);
+ assert(IncMUD &&
+ "MemoryUseOrDef cannot be null, all preds processed.");
+ }
+ }
+ NewPhi->addIncoming(IncMUD, IncBB);
+ } else {
+ MemoryPhi *IncPhi = cast<MemoryPhi>(IncomingAccess);
+ if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi))
+ NewPhi->addIncoming(NewDefPhi, IncBB);
+ else
+ NewPhi->addIncoming(IncPhi, IncBB);
+ }
+ }
+ };
+
+ auto ProcessBlock = [&](BasicBlock *BB) {
+ BasicBlock *NewBlock = cast_or_null<BasicBlock>(VMap.lookup(BB));
+ if (!NewBlock)
+ return;
+
+ assert(!MSSA->getWritableBlockAccesses(NewBlock) &&
+ "Cloned block should have no accesses");
+
+ // Add MemoryPhi.
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) {
+ MemoryPhi *NewPhi = MSSA->createMemoryPhi(NewBlock);
+ MPhiMap[MPhi] = NewPhi;
+ }
+ // Update Uses and Defs.
+ cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap);
+ };
+
+ for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ ProcessBlock(BB);
+
+ for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
+ if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi))
+ FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi));
+}
+
+void MemorySSAUpdater::updateForClonedBlockIntoPred(
+ BasicBlock *BB, BasicBlock *P1, const ValueToValueMapTy &VM) {
+ // All defs/phis from outside BB that are used in BB, are valid uses in P1.
+ // Since those defs/phis must have dominated BB, and also dominate P1.
+ // Defs from BB being used in BB will be replaced with the cloned defs from
+ // VM. The uses of BB's Phi (if it exists) in BB will be replaced by the
+ // incoming def into the Phi from P1.
+ PhiToDefMap MPhiMap;
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
+ MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
+ cloneUsesAndDefs(BB, P1, VM, MPhiMap);
+}
+
+template <typename Iter>
+void MemorySSAUpdater::privateUpdateExitBlocksForClonedLoop(
+ ArrayRef<BasicBlock *> ExitBlocks, Iter ValuesBegin, Iter ValuesEnd,
+ DominatorTree &DT) {
+ SmallVector<CFGUpdate, 4> Updates;
+ // Update/insert phis in all successors of exit blocks.
+ for (auto *Exit : ExitBlocks)
+ for (const ValueToValueMapTy *VMap : make_range(ValuesBegin, ValuesEnd))
+ if (BasicBlock *NewExit = cast_or_null<BasicBlock>(VMap->lookup(Exit))) {
+ BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+ Updates.push_back({DT.Insert, NewExit, ExitSucc});
+ }
+ applyInsertUpdates(Updates, DT);
+}
+
+void MemorySSAUpdater::updateExitBlocksForClonedLoop(
+ ArrayRef<BasicBlock *> ExitBlocks, const ValueToValueMapTy &VMap,
+ DominatorTree &DT) {
+ const ValueToValueMapTy *const Arr[] = {&VMap};
+ privateUpdateExitBlocksForClonedLoop(ExitBlocks, std::begin(Arr),
+ std::end(Arr), DT);
+}
+
+void MemorySSAUpdater::updateExitBlocksForClonedLoop(
+ ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, DominatorTree &DT) {
+ auto GetPtr = [&](const std::unique_ptr<ValueToValueMapTy> &I) {
+ return I.get();
+ };
+ using MappedIteratorType =
+ mapped_iterator<const std::unique_ptr<ValueToValueMapTy> *,
+ decltype(GetPtr)>;
+ auto MapBegin = MappedIteratorType(VMaps.begin(), GetPtr);
+ auto MapEnd = MappedIteratorType(VMaps.end(), GetPtr);
+ privateUpdateExitBlocksForClonedLoop(ExitBlocks, MapBegin, MapEnd, DT);
+}
+
+void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
+ DominatorTree &DT) {
+ SmallVector<CFGUpdate, 4> RevDeleteUpdates;
+ SmallVector<CFGUpdate, 4> InsertUpdates;
+ for (auto &Update : Updates) {
+ if (Update.getKind() == DT.Insert)
+ InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
+ else
+ RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
+ }
+
+ if (!RevDeleteUpdates.empty()) {
+ // Update for inserted edges: use newDT and snapshot CFG as if deletes had
+ // not occured.
+ // FIXME: This creates a new DT, so it's more expensive to do mix
+ // delete/inserts vs just inserts. We can do an incremental update on the DT
+ // to revert deletes, than re-delete the edges. Teaching DT to do this, is
+ // part of a pending cleanup.
+ DominatorTree NewDT(DT, RevDeleteUpdates);
+ GraphDiff<BasicBlock *> GD(RevDeleteUpdates);
+ applyInsertUpdates(InsertUpdates, NewDT, &GD);
+ } else {
+ GraphDiff<BasicBlock *> GD;
+ applyInsertUpdates(InsertUpdates, DT, &GD);
+ }
+
+ // Update for deleted edges
+ for (auto &Update : RevDeleteUpdates)
+ removeEdge(Update.getFrom(), Update.getTo());
+}
+
+void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
+ DominatorTree &DT) {
+ GraphDiff<BasicBlock *> GD;
+ applyInsertUpdates(Updates, DT, &GD);
+}
+
+void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
+ DominatorTree &DT,
+ const GraphDiff<BasicBlock *> *GD) {
+ // Get recursive last Def, assuming well formed MSSA and updated DT.
+ auto GetLastDef = [&](BasicBlock *BB) -> MemoryAccess * {
+ while (true) {
+ MemorySSA::DefsList *Defs = MSSA->getWritableBlockDefs(BB);
+ // Return last Def or Phi in BB, if it exists.
+ if (Defs)
+ return &*(--Defs->end());
+
+ // Check number of predecessors, we only care if there's more than one.
+ unsigned Count = 0;
+ BasicBlock *Pred = nullptr;
+ for (auto &Pair : children<GraphDiffInvBBPair>({GD, BB})) {
+ Pred = Pair.second;
+ Count++;
+ if (Count == 2)
+ break;
+ }
+
+ // If BB has multiple predecessors, get last definition from IDom.
+ if (Count != 1) {
+ // [SimpleLoopUnswitch] If BB is a dead block, about to be deleted, its
+ // DT is invalidated. Return LoE as its last def. This will be added to
+ // MemoryPhi node, and later deleted when the block is deleted.
+ if (!DT.getNode(BB))
+ return MSSA->getLiveOnEntryDef();
+ if (auto *IDom = DT.getNode(BB)->getIDom())
+ if (IDom->getBlock() != BB) {
+ BB = IDom->getBlock();
+ continue;
+ }
+ return MSSA->getLiveOnEntryDef();
+ } else {
+ // Single predecessor, BB cannot be dead. GetLastDef of Pred.
+ assert(Count == 1 && Pred && "Single predecessor expected.");
+ BB = Pred;
+ }
+ };
+ llvm_unreachable("Unable to get last definition.");
+ };
+
+ // Get nearest IDom given a set of blocks.
+ // TODO: this can be optimized by starting the search at the node with the
+ // lowest level (highest in the tree).
+ auto FindNearestCommonDominator =
+ [&](const SmallSetVector<BasicBlock *, 2> &BBSet) -> BasicBlock * {
+ BasicBlock *PrevIDom = *BBSet.begin();
+ for (auto *BB : BBSet)
+ PrevIDom = DT.findNearestCommonDominator(PrevIDom, BB);
+ return PrevIDom;
+ };
+
+ // Get all blocks that dominate PrevIDom, stop when reaching CurrIDom. Do not
+ // include CurrIDom.
+ auto GetNoLongerDomBlocks =
+ [&](BasicBlock *PrevIDom, BasicBlock *CurrIDom,
+ SmallVectorImpl<BasicBlock *> &BlocksPrevDom) {
+ if (PrevIDom == CurrIDom)
+ return;
+ BlocksPrevDom.push_back(PrevIDom);
+ BasicBlock *NextIDom = PrevIDom;
+ while (BasicBlock *UpIDom =
+ DT.getNode(NextIDom)->getIDom()->getBlock()) {
+ if (UpIDom == CurrIDom)
+ break;
+ BlocksPrevDom.push_back(UpIDom);
+ NextIDom = UpIDom;
+ }
+ };
+
+ // Map a BB to its predecessors: added + previously existing. To get a
+ // deterministic order, store predecessors as SetVectors. The order in each
+ // will be defined by teh order in Updates (fixed) and the order given by
+ // children<> (also fixed). Since we further iterate over these ordered sets,
+ // we lose the information of multiple edges possibly existing between two
+ // blocks, so we'll keep and EdgeCount map for that.
+ // An alternate implementation could keep unordered set for the predecessors,
+ // traverse either Updates or children<> each time to get the deterministic
+ // order, and drop the usage of EdgeCount. This alternate approach would still
+ // require querying the maps for each predecessor, and children<> call has
+ // additional computation inside for creating the snapshot-graph predecessors.
+ // As such, we favor using a little additional storage and less compute time.
+ // This decision can be revisited if we find the alternative more favorable.
+
+ struct PredInfo {
+ SmallSetVector<BasicBlock *, 2> Added;
+ SmallSetVector<BasicBlock *, 2> Prev;
+ };
+ SmallDenseMap<BasicBlock *, PredInfo> PredMap;
+
+ for (auto &Edge : Updates) {
+ BasicBlock *BB = Edge.getTo();
+ auto &AddedBlockSet = PredMap[BB].Added;
+ AddedBlockSet.insert(Edge.getFrom());
+ }
+
+ // Store all existing predecessor for each BB, at least one must exist.
+ SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, int> EdgeCountMap;
+ SmallPtrSet<BasicBlock *, 2> NewBlocks;
+ for (auto &BBPredPair : PredMap) {
+ auto *BB = BBPredPair.first;
+ const auto &AddedBlockSet = BBPredPair.second.Added;
+ auto &PrevBlockSet = BBPredPair.second.Prev;
+ for (auto &Pair : children<GraphDiffInvBBPair>({GD, BB})) {
+ BasicBlock *Pi = Pair.second;
+ if (!AddedBlockSet.count(Pi))
+ PrevBlockSet.insert(Pi);
+ EdgeCountMap[{Pi, BB}]++;
+ }
+
+ if (PrevBlockSet.empty()) {
+ assert(pred_size(BB) == AddedBlockSet.size() && "Duplicate edges added.");
+ LLVM_DEBUG(
+ dbgs()
+ << "Adding a predecessor to a block with no predecessors. "
+ "This must be an edge added to a new, likely cloned, block. "
+ "Its memory accesses must be already correct, assuming completed "
+ "via the updateExitBlocksForClonedLoop API. "
+ "Assert a single such edge is added so no phi addition or "
+ "additional processing is required.\n");
+ assert(AddedBlockSet.size() == 1 &&
+ "Can only handle adding one predecessor to a new block.");
+ // Need to remove new blocks from PredMap. Remove below to not invalidate
+ // iterator here.
+ NewBlocks.insert(BB);
+ }
+ }
+ // Nothing to process for new/cloned blocks.
+ for (auto *BB : NewBlocks)
+ PredMap.erase(BB);
+
+ SmallVector<BasicBlock *, 8> BlocksToProcess;
+ SmallVector<BasicBlock *, 16> BlocksWithDefsToReplace;
+
+ // First create MemoryPhis in all blocks that don't have one. Create in the
+ // order found in Updates, not in PredMap, to get deterministic numbering.
+ for (auto &Edge : Updates) {
+ BasicBlock *BB = Edge.getTo();
+ if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
+ MSSA->createMemoryPhi(BB);
+ }
+
+ // Now we'll fill in the MemoryPhis with the right incoming values.
+ for (auto &BBPredPair : PredMap) {
+ auto *BB = BBPredPair.first;
+ const auto &PrevBlockSet = BBPredPair.second.Prev;
+ const auto &AddedBlockSet = BBPredPair.second.Added;
+ assert(!PrevBlockSet.empty() &&
+ "At least one previous predecessor must exist.");
+
+ // TODO: if this becomes a bottleneck, we can save on GetLastDef calls by
+ // keeping this map before the loop. We can reuse already populated entries
+ // if an edge is added from the same predecessor to two different blocks,
+ // and this does happen in rotate. Note that the map needs to be updated
+ // when deleting non-necessary phis below, if the phi is in the map by
+ // replacing the value with DefP1.
+ SmallDenseMap<BasicBlock *, MemoryAccess *> LastDefAddedPred;
+ for (auto *AddedPred : AddedBlockSet) {
+ auto *DefPn = GetLastDef(AddedPred);
+ assert(DefPn != nullptr && "Unable to find last definition.");
+ LastDefAddedPred[AddedPred] = DefPn;
+ }
+
+ MemoryPhi *NewPhi = MSSA->getMemoryAccess(BB);
+ // If Phi is not empty, add an incoming edge from each added pred. Must
+ // still compute blocks with defs to replace for this block below.
+ if (NewPhi->getNumOperands()) {
+ for (auto *Pred : AddedBlockSet) {
+ auto *LastDefForPred = LastDefAddedPred[Pred];
+ for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+ NewPhi->addIncoming(LastDefForPred, Pred);
+ }
+ } else {
+ // Pick any existing predecessor and get its definition. All other
+ // existing predecessors should have the same one, since no phi existed.
+ auto *P1 = *PrevBlockSet.begin();
+ MemoryAccess *DefP1 = GetLastDef(P1);
+
+ // Check DefP1 against all Defs in LastDefPredPair. If all the same,
+ // nothing to add.
+ bool InsertPhi = false;
+ for (auto LastDefPredPair : LastDefAddedPred)
+ if (DefP1 != LastDefPredPair.second) {
+ InsertPhi = true;
+ break;
+ }
+ if (!InsertPhi) {
+ // Since NewPhi may be used in other newly added Phis, replace all uses
+ // of NewPhi with the definition coming from all predecessors (DefP1),
+ // before deleting it.
+ NewPhi->replaceAllUsesWith(DefP1);
+ removeMemoryAccess(NewPhi);
+ continue;
+ }
+
+ // Update Phi with new values for new predecessors and old value for all
+ // other predecessors. Since AddedBlockSet and PrevBlockSet are ordered
+ // sets, the order of entries in NewPhi is deterministic.
+ for (auto *Pred : AddedBlockSet) {
+ auto *LastDefForPred = LastDefAddedPred[Pred];
+ for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+ NewPhi->addIncoming(LastDefForPred, Pred);
+ }
+ for (auto *Pred : PrevBlockSet)
+ for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+ NewPhi->addIncoming(DefP1, Pred);
+
+ // Insert BB in the set of blocks that now have definition. We'll use this
+ // to compute IDF and add Phis there next.
+ BlocksToProcess.push_back(BB);
+ }
+
+ // Get all blocks that used to dominate BB and no longer do after adding
+ // AddedBlockSet, where PrevBlockSet are the previously known predecessors.
+ assert(DT.getNode(BB)->getIDom() && "BB does not have valid idom");
+ BasicBlock *PrevIDom = FindNearestCommonDominator(PrevBlockSet);
+ assert(PrevIDom && "Previous IDom should exists");
+ BasicBlock *NewIDom = DT.getNode(BB)->getIDom()->getBlock();
+ assert(NewIDom && "BB should have a new valid idom");
+ assert(DT.dominates(NewIDom, PrevIDom) &&
+ "New idom should dominate old idom");
+ GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace);
+ }
+
+ // Compute IDF and add Phis in all IDF blocks that do not have one.
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ if (!BlocksToProcess.empty()) {
+ ForwardIDFCalculator IDFs(DT);
+ SmallPtrSet<BasicBlock *, 16> DefiningBlocks(BlocksToProcess.begin(),
+ BlocksToProcess.end());
+ IDFs.setDefiningBlocks(DefiningBlocks);
+ IDFs.calculate(IDFBlocks);
+ for (auto *BBIDF : IDFBlocks) {
+ if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) {
+ // Update existing Phi.
+ // FIXME: some updates may be redundant, try to optimize and skip some.
+ for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
+ IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
+ } else {
+ IDFPhi = MSSA->createMemoryPhi(BBIDF);
+ for (auto &Pair : children<GraphDiffInvBBPair>({GD, BBIDF})) {
+ BasicBlock *Pi = Pair.second;
+ IDFPhi->addIncoming(GetLastDef(Pi), Pi);
+ }
+ }
+ }
+ }
+
+ // Now for all defs in BlocksWithDefsToReplace, if there are uses they no
+ // longer dominate, replace those with the closest dominating def.
+ // This will also update optimized accesses, as they're also uses.
+ for (auto *BlockWithDefsToReplace : BlocksWithDefsToReplace) {
+ if (auto DefsList = MSSA->getWritableBlockDefs(BlockWithDefsToReplace)) {
+ for (auto &DefToReplaceUses : *DefsList) {
+ BasicBlock *DominatingBlock = DefToReplaceUses.getBlock();
+ Value::use_iterator UI = DefToReplaceUses.use_begin(),
+ E = DefToReplaceUses.use_end();
+ for (; UI != E;) {
+ Use &U = *UI;
+ ++UI;
+ MemoryAccess *Usr = dyn_cast<MemoryAccess>(U.getUser());
+ if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) {
+ BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
+ if (!DT.dominates(DominatingBlock, DominatedBlock))
+ U.set(GetLastDef(DominatedBlock));
+ } else {
+ BasicBlock *DominatedBlock = Usr->getBlock();
+ if (!DT.dominates(DominatingBlock, DominatedBlock)) {
+ if (auto *DomBlPhi = MSSA->getMemoryAccess(DominatedBlock))
+ U.set(DomBlPhi);
+ else {
+ auto *IDom = DT.getNode(DominatedBlock)->getIDom();
+ assert(IDom && "Block must have a valid IDom.");
+ U.set(GetLastDef(IDom->getBlock()));
+ }
+ cast<MemoryUseOrDef>(Usr)->resetOptimized();
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
// Move What before Where in the MemorySSA IR.
template <class WhereType>
void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
@@ -498,13 +1015,14 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
}
void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
- BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds) {
+ BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds,
+ bool IdenticalEdgesWereMerged) {
assert(!MSSA->getWritableBlockAccesses(New) &&
"Access list should be null for a new block.");
MemoryPhi *Phi = MSSA->getMemoryAccess(Old);
if (!Phi)
return;
- if (pred_size(Old) == 1) {
+ if (Old->hasNPredecessors(1)) {
assert(pred_size(New) == Preds.size() &&
"Should have moved all predecessors.");
MSSA->moveTo(Phi, New, MemorySSA::Beginning);
@@ -513,9 +1031,17 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
"new immediate predecessor.");
MemoryPhi *NewPhi = MSSA->createMemoryPhi(New);
SmallPtrSet<BasicBlock *, 16> PredsSet(Preds.begin(), Preds.end());
+ // Currently only support the case of removing a single incoming edge when
+ // identical edges were not merged.
+ if (!IdenticalEdgesWereMerged)
+ assert(PredsSet.size() == Preds.size() &&
+ "If identical edges were not merged, we cannot have duplicate "
+ "blocks in the predecessors");
Phi->unorderedDeleteIncomingIf([&](MemoryAccess *MA, BasicBlock *B) {
if (PredsSet.count(B)) {
NewPhi->addIncoming(MA, B);
+ if (!IdenticalEdgesWereMerged)
+ PredsSet.erase(B);
return true;
}
return false;
@@ -578,9 +1104,9 @@ void MemorySSAUpdater::removeBlocks(
const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) {
// First delete all uses of BB in MemoryPhis.
for (BasicBlock *BB : DeadBlocks) {
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
assert(TI && "Basic block expected to have a terminator instruction");
- for (BasicBlock *Succ : TI->successors())
+ for (BasicBlock *Succ : successors(TI))
if (!DeadBlocks.count(Succ))
if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) {
MP->unorderedDeleteIncomingBlock(BB);
diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index ce099ed2f391..87f76d43bb1e 100644
--- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -158,7 +158,8 @@ static void addIntrinsicToSummary(
SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
- SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls) {
+ SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
+ DominatorTree &DT) {
switch (CI->getCalledFunction()->getIntrinsicID()) {
case Intrinsic::type_test: {
auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
@@ -183,7 +184,7 @@ static void addIntrinsicToSummary(
SmallVector<DevirtCallSite, 4> DevirtCalls;
SmallVector<CallInst *, 4> Assumes;
- findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
for (auto &Call : DevirtCalls)
addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
TypeTestAssumeConstVCalls);
@@ -203,7 +204,7 @@ static void addIntrinsicToSummary(
SmallVector<Instruction *, 4> Preds;
bool HasNonCallUses = false;
findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
- HasNonCallUses, CI);
+ HasNonCallUses, CI, DT);
// Any non-call uses of the result of llvm.type.checked.load will
// prevent us from optimizing away the llvm.type.test.
if (HasNonCallUses)
@@ -219,11 +220,19 @@ static void addIntrinsicToSummary(
}
}
-static void
-computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
- const Function &F, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, bool HasLocalsInUsedOrAsm,
- DenseSet<GlobalValue::GUID> &CantBePromoted) {
+static bool isNonVolatileLoad(const Instruction *I) {
+ if (const auto *LI = dyn_cast<LoadInst>(I))
+ return !LI->isVolatile();
+
+ return false;
+}
+
+static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+ const Function &F, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, DominatorTree &DT,
+ bool HasLocalsInUsedOrAsm,
+ DenseSet<GlobalValue::GUID> &CantBePromoted,
+ bool IsThinLTO) {
// Summary not currently supported for anonymous functions, they should
// have been named.
assert(F.hasName());
@@ -244,6 +253,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// Add personality function, prefix data and prologue data to function's ref
// list.
findRefEdges(Index, &F, RefEdges, Visited);
+ std::vector<const Instruction *> NonVolatileLoads;
bool HasInlineAsmMaybeReferencingInternal = false;
for (const BasicBlock &BB : F)
@@ -251,6 +261,13 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
if (isa<DbgInfoIntrinsic>(I))
continue;
++NumInsts;
+ if (isNonVolatileLoad(&I)) {
+ // Postpone processing of non-volatile load instructions
+ // See comments below
+ Visited.insert(&I);
+ NonVolatileLoads.push_back(&I);
+ continue;
+ }
findRefEdges(Index, &I, RefEdges, Visited);
auto CS = ImmutableCallSite(&I);
if (!CS)
@@ -284,7 +301,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
if (CI && CalledFunction->isIntrinsic()) {
addIntrinsicToSummary(
CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
- TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls);
+ TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT);
continue;
}
// We should have named any anonymous globals
@@ -340,6 +357,24 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
}
}
+ // By now we processed all instructions in a function, except
+ // non-volatile loads. All new refs we add in a loop below
+ // are obviously constant. All constant refs are grouped in the
+ // end of RefEdges vector, so we can use a single integer value
+ // to identify them.
+ unsigned RefCnt = RefEdges.size();
+ for (const Instruction *I : NonVolatileLoads) {
+ Visited.erase(I);
+ findRefEdges(Index, I, RefEdges, Visited);
+ }
+ std::vector<ValueInfo> Refs = RefEdges.takeVector();
+ // Regular LTO module doesn't participate in ThinLTO import,
+ // so no reference from it can be readonly, since this would
+ // require importing variable as local copy
+ if (IsThinLTO)
+ for (; RefCnt < Refs.size(); ++RefCnt)
+ Refs[RefCnt].setReadOnly();
+
// Explicit add hot edges to enforce importing for designated GUIDs for
// sample PGO, to enable the same inlines as the profiled optimized binary.
for (auto &I : F.getImportGUIDs())
@@ -350,22 +385,18 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport =
- NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
- // Inliner doesn't handle variadic functions.
- // FIXME: refactor this to use the same code that inliner is using.
- F.isVarArg() ||
- // Don't try to import functions with noinline attribute.
- F.getAttributes().hasFnAttribute(Attribute::NoInline);
+ NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
/* Live = */ false, F.isDSOLocal());
FunctionSummary::FFlags FunFlags{
F.hasFnAttribute(Attribute::ReadNone),
F.hasFnAttribute(Attribute::ReadOnly),
- F.hasFnAttribute(Attribute::NoRecurse),
- F.returnDoesNotAlias(),
- };
+ F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
+ // FIXME: refactor this to use the same code that inliner is using.
+ // Don't try to import functions with noinline attribute.
+ F.getAttributes().hasFnAttribute(Attribute::NoInline)};
auto FuncSummary = llvm::make_unique<FunctionSummary>(
- Flags, NumInsts, FunFlags, RefEdges.takeVector(),
+ Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
CallGraphEdges.takeVector(), TypeTests.takeVector(),
TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
TypeTestAssumeConstVCalls.takeVector(),
@@ -384,8 +415,13 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
bool NonRenamableLocal = isNonRenamableLocal(V);
GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
/* Live = */ false, V.isDSOLocal());
- auto GVarSummary =
- llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+
+ // Don't mark variables we won't be able to internalize as read-only.
+ GlobalVarSummary::GVarFlags VarFlags(
+ !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
+ !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass());
+ auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
+ RefEdges.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(V.getGUID());
if (HasBlockAddress)
@@ -421,7 +457,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
ProfileSummaryInfo *PSI) {
assert(PSI);
- ModuleSummaryIndex Index(/*HaveGVs=*/true);
+ bool EnableSplitLTOUnit = false;
+ if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("EnableSplitLTOUnit")))
+ EnableSplitLTOUnit = MD->getZExtValue();
+ ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit);
// Identify the local values in the llvm.used and llvm.compiler.used sets,
// which should not be exported as they would then require renaming and
@@ -473,13 +513,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (Function *F = dyn_cast<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
llvm::make_unique<FunctionSummary>(
- GVFlags, 0,
+ GVFlags, /*InstCount=*/0,
FunctionSummary::FFlags{
F->hasFnAttribute(Attribute::ReadNone),
F->hasFnAttribute(Attribute::ReadOnly),
F->hasFnAttribute(Attribute::NoRecurse),
- F->returnDoesNotAlias()},
- ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{},
+ F->returnDoesNotAlias(),
+ /* NoInline = */ false},
+ /*EntryCount=*/0, ArrayRef<ValueInfo>{},
+ ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
ArrayRef<FunctionSummary::VFuncId>{},
ArrayRef<FunctionSummary::VFuncId>{},
@@ -488,33 +530,40 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
Index.addGlobalValueSummary(*GV, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
- llvm::make_unique<GlobalVarSummary>(GVFlags,
- ArrayRef<ValueInfo>{});
+ llvm::make_unique<GlobalVarSummary>(
+ GVFlags, GlobalVarSummary::GVarFlags(),
+ ArrayRef<ValueInfo>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
}
});
}
+ bool IsThinLTO = true;
+ if (auto *MD =
+ mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
+ IsThinLTO = MD->getZExtValue();
+
// Compute summaries for all functions defined in module, and save in the
// index.
for (auto &F : M) {
if (F.isDeclaration())
continue;
+ DominatorTree DT(const_cast<Function &>(F));
BlockFrequencyInfo *BFI = nullptr;
std::unique_ptr<BlockFrequencyInfo> BFIPtr;
if (GetBFICallback)
BFI = GetBFICallback(F);
else if (F.hasProfileData()) {
- LoopInfo LI{DominatorTree(const_cast<Function &>(F))};
+ LoopInfo LI{DT};
BranchProbabilityInfo BPI{F, LI};
BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
BFI = BFIPtr.get();
}
- computeFunctionSummary(Index, M, F, BFI, PSI,
+ computeFunctionSummary(Index, M, F, BFI, PSI, DT,
!LocalsUsed.empty() || HasLocalInlineAsmSymbol,
- CantBePromoted);
+ CantBePromoted, IsThinLTO);
}
// Compute summaries for all variables defined in module, and save in the
@@ -545,11 +594,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
setLiveRoot(Index, "llvm.global_dtors");
setLiveRoot(Index, "llvm.global.annotations");
- bool IsThinLTO = true;
- if (auto *MD =
- mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
- IsThinLTO = MD->getZExtValue();
-
for (auto &GlobalList : Index) {
// Ignore entries for references that are undefined in the current module.
if (GlobalList.second.SummaryList.empty())
@@ -619,7 +663,7 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
}
bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
- auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
Index.emplace(buildModuleSummaryIndex(
M,
[this](const Function &F) {
@@ -627,7 +671,7 @@ bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
*const_cast<Function *>(&F))
.getBFI());
},
- &PSI));
+ PSI));
return false;
}
diff --git a/contrib/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm/lib/Analysis/MustExecute.cpp
index 8e85366b4618..180c38ddacc2 100644
--- a/contrib/llvm/lib/Analysis/MustExecute.cpp
+++ b/contrib/llvm/lib/Analysis/MustExecute.cpp
@@ -22,20 +22,32 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-/// Computes loop safety information, checks loop body & header
-/// for the possibility of may throw exception.
-///
-void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
+const DenseMap<BasicBlock *, ColorVector> &
+LoopSafetyInfo::getBlockColors() const {
+ return BlockColors;
+}
+
+void LoopSafetyInfo::copyColors(BasicBlock *New, BasicBlock *Old) {
+ ColorVector &ColorsForNewBlock = BlockColors[New];
+ ColorVector &ColorsForOldBlock = BlockColors[Old];
+ ColorsForNewBlock = ColorsForOldBlock;
+}
+
+bool SimpleLoopSafetyInfo::blockMayThrow(const BasicBlock *BB) const {
+ (void)BB;
+ return anyBlockMayThrow();
+}
+
+bool SimpleLoopSafetyInfo::anyBlockMayThrow() const {
+ return MayThrow;
+}
+
+void SimpleLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) {
assert(CurLoop != nullptr && "CurLoop can't be null");
BasicBlock *Header = CurLoop->getHeader();
- // Setting default safety values.
- SafetyInfo->MayThrow = false;
- SafetyInfo->HeaderMayThrow = false;
// Iterate over header and compute safety info.
- SafetyInfo->HeaderMayThrow =
- !isGuaranteedToTransferExecutionToSuccessor(Header);
-
- SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
+ HeaderMayThrow = !isGuaranteedToTransferExecutionToSuccessor(Header);
+ MayThrow = HeaderMayThrow;
// Iterate over loop instructions and compute safety info.
// Skip header as it has been computed and stored in HeaderMayThrow.
// The first block in loopinfo.Blocks is guaranteed to be the header.
@@ -43,23 +55,59 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
"First block must be header");
for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
BBE = CurLoop->block_end();
- (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
- SafetyInfo->MayThrow |=
- !isGuaranteedToTransferExecutionToSuccessor(*BB);
+ (BB != BBE) && !MayThrow; ++BB)
+ MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(*BB);
+
+ computeBlockColors(CurLoop);
+}
+bool ICFLoopSafetyInfo::blockMayThrow(const BasicBlock *BB) const {
+ return ICF.hasICF(BB);
+}
+
+bool ICFLoopSafetyInfo::anyBlockMayThrow() const {
+ return MayThrow;
+}
+
+void ICFLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) {
+ assert(CurLoop != nullptr && "CurLoop can't be null");
+ ICF.clear();
+ MW.clear();
+ MayThrow = false;
+ // Figure out the fact that at least one block may throw.
+ for (auto &BB : CurLoop->blocks())
+ if (ICF.hasICF(&*BB)) {
+ MayThrow = true;
+ break;
+ }
+ computeBlockColors(CurLoop);
+}
+
+void ICFLoopSafetyInfo::insertInstructionTo(const Instruction *Inst,
+ const BasicBlock *BB) {
+ ICF.insertInstructionTo(Inst, BB);
+ MW.insertInstructionTo(Inst, BB);
+}
+
+void ICFLoopSafetyInfo::removeInstruction(const Instruction *Inst) {
+ ICF.removeInstruction(Inst);
+ MW.removeInstruction(Inst);
+}
+
+void LoopSafetyInfo::computeBlockColors(const Loop *CurLoop) {
// Compute funclet colors if we might sink/hoist in a function with a funclet
// personality routine.
Function *Fn = CurLoop->getHeader()->getParent();
if (Fn->hasPersonalityFn())
if (Constant *PersonalityFn = Fn->getPersonalityFn())
if (isScopedEHPersonality(classifyEHPersonality(PersonalityFn)))
- SafetyInfo->BlockColors = colorEHFunclets(*Fn);
+ BlockColors = colorEHFunclets(*Fn);
}
/// Return true if we can prove that the given ExitBlock is not reached on the
/// first iteration of the given loop. That is, the backedge of the loop must
/// be executed before the ExitBlock is executed in any dynamic execution trace.
-static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
+static bool CanProveNotTakenFirstIteration(const BasicBlock *ExitBlock,
const DominatorTree *DT,
const Loop *CurLoop) {
auto *CondExitBlock = ExitBlock->getSinglePredecessor();
@@ -99,15 +147,94 @@ static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
return SimpleCst->isAllOnesValue();
}
+/// Collect all blocks from \p CurLoop which lie on all possible paths from
+/// the header of \p CurLoop (inclusive) to BB (exclusive) into the set
+/// \p Predecessors. If \p BB is the header, \p Predecessors will be empty.
+static void collectTransitivePredecessors(
+ const Loop *CurLoop, const BasicBlock *BB,
+ SmallPtrSetImpl<const BasicBlock *> &Predecessors) {
+ assert(Predecessors.empty() && "Garbage in predecessors set?");
+ assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+ if (BB == CurLoop->getHeader())
+ return;
+ SmallVector<const BasicBlock *, 4> WorkList;
+ for (auto *Pred : predecessors(BB)) {
+ Predecessors.insert(Pred);
+ WorkList.push_back(Pred);
+ }
+ while (!WorkList.empty()) {
+ auto *Pred = WorkList.pop_back_val();
+ assert(CurLoop->contains(Pred) && "Should only reach loop blocks!");
+ // We are not interested in backedges and we don't want to leave loop.
+ if (Pred == CurLoop->getHeader())
+ continue;
+ // TODO: If BB lies in an inner loop of CurLoop, this will traverse over all
+ // blocks of this inner loop, even those that are always executed AFTER the
+ // BB. It may make our analysis more conservative than it could be, see test
+ // @nested and @nested_no_throw in test/Analysis/MustExecute/loop-header.ll.
+ // We can ignore backedge of all loops containing BB to get a sligtly more
+ // optimistic result.
+ for (auto *PredPred : predecessors(Pred))
+ if (Predecessors.insert(PredPred).second)
+ WorkList.push_back(PredPred);
+ }
+}
+
+bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
+ const BasicBlock *BB,
+ const DominatorTree *DT) const {
+ assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+
+ // Fast path: header is always reached once the loop is entered.
+ if (BB == CurLoop->getHeader())
+ return true;
+
+ // Collect all transitive predecessors of BB in the same loop. This set will
+ // be a subset of the blocks within the loop.
+ SmallPtrSet<const BasicBlock *, 4> Predecessors;
+ collectTransitivePredecessors(CurLoop, BB, Predecessors);
+
+ // Make sure that all successors of all predecessors of BB are either:
+ // 1) BB,
+ // 2) Also predecessors of BB,
+ // 3) Exit blocks which are not taken on 1st iteration.
+ // Memoize blocks we've already checked.
+ SmallPtrSet<const BasicBlock *, 4> CheckedSuccessors;
+ for (auto *Pred : Predecessors) {
+ // Predecessor block may throw, so it has a side exit.
+ if (blockMayThrow(Pred))
+ return false;
+ for (auto *Succ : successors(Pred))
+ if (CheckedSuccessors.insert(Succ).second &&
+ Succ != BB && !Predecessors.count(Succ))
+ // By discharging conditions that are not executed on the 1st iteration,
+ // we guarantee that *at least* on the first iteration all paths from
+ // header that *may* execute will lead us to the block of interest. So
+ // that if we had virtually peeled one iteration away, in this peeled
+ // iteration the set of predecessors would contain only paths from
+ // header to BB without any exiting edges that may execute.
+ //
+ // TODO: We only do it for exiting edges currently. We could use the
+ // same function to skip some of the edges within the loop if we know
+ // that they will not be taken on the 1st iteration.
+ //
+ // TODO: If we somehow know the number of iterations in loop, the same
+ // check may be done for any arbitrary N-th iteration as long as N is
+ // not greater than minimum number of iterations in this loop.
+ if (CurLoop->contains(Succ) ||
+ !CanProveNotTakenFirstIteration(Succ, DT, CurLoop))
+ return false;
+ }
+
+ // All predecessors can only lead us to BB.
+ return true;
+}
+
/// Returns true if the instruction in a loop is guaranteed to execute at least
/// once.
-bool llvm::isGuaranteedToExecute(const Instruction &Inst,
- const DominatorTree *DT, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo) {
- // We have to check to make sure that the instruction dominates all
- // of the exit blocks. If it doesn't, then there is a path out of the loop
- // which does not execute this instruction, so we can't hoist it.
-
+bool SimpleLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop) const {
// If the instruction is in the header block for the loop (which is very
// common), it is always guaranteed to dominate the exit blocks. Since this
// is a common case, and can save some work, check it now.
@@ -116,52 +243,48 @@ bool llvm::isGuaranteedToExecute(const Instruction &Inst,
// Inst unless we can prove that Inst comes before the potential implicit
// exit. At the moment, we use a (cheap) hack for the common case where
// the instruction of interest is the first one in the block.
- return !SafetyInfo->HeaderMayThrow ||
- Inst.getParent()->getFirstNonPHIOrDbg() == &Inst;
+ return !HeaderMayThrow ||
+ Inst.getParent()->getFirstNonPHIOrDbg() == &Inst;
- // Somewhere in this loop there is an instruction which may throw and make us
- // exit the loop.
- if (SafetyInfo->MayThrow)
- return false;
+ // If there is a path from header to exit or latch that doesn't lead to our
+ // instruction's block, return false.
+ return allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT);
+}
- // Note: There are two styles of reasoning intermixed below for
- // implementation efficiency reasons. They are:
- // 1) If we can prove that the instruction dominates all exit blocks, then we
- // know the instruction must have executed on *some* iteration before we
- // exit. We do not prove *which* iteration the instruction must execute on.
- // 2) If we can prove that the instruction dominates the latch and all exits
- // which might be taken on the first iteration, we know the instruction must
- // execute on the first iteration. This second style allows a conditional
- // exit before the instruction of interest which is provably not taken on the
- // first iteration. This is a quite common case for range check like
- // patterns. TODO: support loops with multiple latches.
-
- const bool InstDominatesLatch =
- CurLoop->getLoopLatch() != nullptr &&
- DT->dominates(Inst.getParent(), CurLoop->getLoopLatch());
-
- // Get the exit blocks for the current loop.
- SmallVector<BasicBlock *, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
-
- // Verify that the block dominates each of the exit blocks of the loop.
- for (BasicBlock *ExitBlock : ExitBlocks)
- if (!DT->dominates(Inst.getParent(), ExitBlock))
- if (!InstDominatesLatch ||
- !CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop))
- return false;
-
- // As a degenerate case, if the loop is statically infinite then we haven't
- // proven anything since there are no exit blocks.
- if (ExitBlocks.empty())
- return false;
+bool ICFLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop) const {
+ return !ICF.isDominatedByICFIFromSameBlock(&Inst) &&
+ allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT);
+}
+
+bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const BasicBlock *BB,
+ const Loop *CurLoop) const {
+ assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
- // FIXME: In general, we have to prove that the loop isn't an infinite loop.
- // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
- // just a special case of this.)
+ // Fast path: there are no instructions before header.
+ if (BB == CurLoop->getHeader())
+ return true;
+
+ // Collect all transitive predecessors of BB in the same loop. This set will
+ // be a subset of the blocks within the loop.
+ SmallPtrSet<const BasicBlock *, 4> Predecessors;
+ collectTransitivePredecessors(CurLoop, BB, Predecessors);
+ // Find if there any instruction in either predecessor that could write
+ // to memory.
+ for (auto *Pred : Predecessors)
+ if (MW.mayWriteToMemory(Pred))
+ return false;
return true;
}
+bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const Instruction &I,
+ const Loop *CurLoop) const {
+ auto *BB = I.getParent();
+ assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+ return !MW.isDominatedByMemoryWriteFromSameBlock(&I) &&
+ doesNotWriteMemoryBefore(BB, CurLoop);
+}
namespace {
struct MustExecutePrinter : public FunctionPass {
@@ -195,9 +318,9 @@ static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) {
// TODO: merge these two routines. For the moment, we display the best
// result obtained by *either* implementation. This is a bit unfair since no
// caller actually gets the full power at the moment.
- LoopSafetyInfo LSI;
- computeLoopSafetyInfo(&LSI, L);
- return isGuaranteedToExecute(I, DT, L, &LSI) ||
+ SimpleLoopSafetyInfo LSI;
+ LSI.computeLoopSafetyInfo(L);
+ return LSI.isGuaranteedToExecute(I, DT, L) ||
isGuaranteedToExecuteForEveryIteration(&I, L);
}
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 096ea661ecb6..95ae1a6e744f 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -106,12 +106,12 @@ FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
return AAResultBase::getModRefBehavior(F);
}
-ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
if (!EnableARCOpts)
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
- switch (GetBasicARCInstKind(CS.getInstruction())) {
+ switch (GetBasicARCInstKind(Call)) {
case ARCInstKind::Retain:
case ARCInstKind::RetainRV:
case ARCInstKind::Autorelease:
@@ -128,7 +128,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
break;
}
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
}
ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
index f268e2a9abdd..31c432711834 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -85,97 +85,73 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
}
ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
- Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- // No (mandatory) arguments.
- if (AI == AE)
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush)
- .Case("clang.arc.use", ARCInstKind::IntrinsicUser)
- .Default(ARCInstKind::CallOrUser);
-
- // One argument.
- const Argument *A0 = &*AI++;
- if (AI == AE) {
- // Argument is a pointer.
- PointerType *PTy = dyn_cast<PointerType>(A0->getType());
- if (!PTy)
- return ARCInstKind::CallOrUser;
-
- Type *ETy = PTy->getElementType();
- // Argument is i8*.
- if (ETy->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_retain", ARCInstKind::Retain)
- .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
- .Case("objc_unsafeClaimAutoreleasedReturnValue", ARCInstKind::ClaimRV)
- .Case("objc_retainBlock", ARCInstKind::RetainBlock)
- .Case("objc_release", ARCInstKind::Release)
- .Case("objc_autorelease", ARCInstKind::Autorelease)
- .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
- .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
- .Case("objc_retainedObject", ARCInstKind::NoopCast)
- .Case("objc_unretainedObject", ARCInstKind::NoopCast)
- .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
- .Case("objc_retain_autorelease", ARCInstKind::FusedRetainAutorelease)
- .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
- .Case("objc_retainAutoreleaseReturnValue",
- ARCInstKind::FusedRetainAutoreleaseRV)
- .Case("objc_sync_enter", ARCInstKind::User)
- .Case("objc_sync_exit", ARCInstKind::User)
- .Default(ARCInstKind::CallOrUser);
-
- // Argument is i8**
- if (PointerType *Pte = dyn_cast<PointerType>(ETy))
- if (Pte->getElementType()->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
- .Case("objc_loadWeak", ARCInstKind::LoadWeak)
- .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
- .Default(ARCInstKind::CallOrUser);
-
- // Anything else with one argument.
+ Intrinsic::ID ID = F->getIntrinsicID();
+ switch (ID) {
+ default:
return ARCInstKind::CallOrUser;
+ case Intrinsic::objc_autorelease:
+ return ARCInstKind::Autorelease;
+ case Intrinsic::objc_autoreleasePoolPop:
+ return ARCInstKind::AutoreleasepoolPop;
+ case Intrinsic::objc_autoreleasePoolPush:
+ return ARCInstKind::AutoreleasepoolPush;
+ case Intrinsic::objc_autoreleaseReturnValue:
+ return ARCInstKind::AutoreleaseRV;
+ case Intrinsic::objc_copyWeak:
+ return ARCInstKind::CopyWeak;
+ case Intrinsic::objc_destroyWeak:
+ return ARCInstKind::DestroyWeak;
+ case Intrinsic::objc_initWeak:
+ return ARCInstKind::InitWeak;
+ case Intrinsic::objc_loadWeak:
+ return ARCInstKind::LoadWeak;
+ case Intrinsic::objc_loadWeakRetained:
+ return ARCInstKind::LoadWeakRetained;
+ case Intrinsic::objc_moveWeak:
+ return ARCInstKind::MoveWeak;
+ case Intrinsic::objc_release:
+ return ARCInstKind::Release;
+ case Intrinsic::objc_retain:
+ return ARCInstKind::Retain;
+ case Intrinsic::objc_retainAutorelease:
+ return ARCInstKind::FusedRetainAutorelease;
+ case Intrinsic::objc_retainAutoreleaseReturnValue:
+ return ARCInstKind::FusedRetainAutoreleaseRV;
+ case Intrinsic::objc_retainAutoreleasedReturnValue:
+ return ARCInstKind::RetainRV;
+ case Intrinsic::objc_retainBlock:
+ return ARCInstKind::RetainBlock;
+ case Intrinsic::objc_storeStrong:
+ return ARCInstKind::StoreStrong;
+ case Intrinsic::objc_storeWeak:
+ return ARCInstKind::StoreWeak;
+ case Intrinsic::objc_clang_arc_use:
+ return ARCInstKind::IntrinsicUser;
+ case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
+ return ARCInstKind::ClaimRV;
+ case Intrinsic::objc_retainedObject:
+ return ARCInstKind::NoopCast;
+ case Intrinsic::objc_unretainedObject:
+ return ARCInstKind::NoopCast;
+ case Intrinsic::objc_unretainedPointer:
+ return ARCInstKind::NoopCast;
+ case Intrinsic::objc_retain_autorelease:
+ return ARCInstKind::FusedRetainAutorelease;
+ case Intrinsic::objc_sync_enter:
+ return ARCInstKind::User;
+ case Intrinsic::objc_sync_exit:
+ return ARCInstKind::User;
+ case Intrinsic::objc_arc_annotation_topdown_bbstart:
+ case Intrinsic::objc_arc_annotation_topdown_bbend:
+ case Intrinsic::objc_arc_annotation_bottomup_bbstart:
+ case Intrinsic::objc_arc_annotation_bottomup_bbend:
+ // Ignore annotation calls. This is important to stop the
+ // optimizer from treating annotations as uses which would
+ // make the state of the pointers they are attempting to
+ // elucidate to be incorrect.
+ return ARCInstKind::None;
}
-
- // Two arguments, first is i8**.
- const Argument *A1 = &*AI++;
- if (AI == AE)
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
- if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
- if (Pte->getElementType()->isIntegerTy(8))
- if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
- Type *ETy1 = PTy1->getElementType();
- // Second argument is i8*
- if (ETy1->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_storeWeak", ARCInstKind::StoreWeak)
- .Case("objc_initWeak", ARCInstKind::InitWeak)
- .Case("objc_storeStrong", ARCInstKind::StoreStrong)
- .Default(ARCInstKind::CallOrUser);
- // Second argument is i8**.
- if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
- if (Pte1->getElementType()->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_moveWeak", ARCInstKind::MoveWeak)
- .Case("objc_copyWeak", ARCInstKind::CopyWeak)
- // Ignore annotation calls. This is important to stop the
- // optimizer from treating annotations as uses which would
- // make the state of the pointers they are attempting to
- // elucidate to be incorrect.
- .Case("llvm.arc.annotation.topdown.bbstart",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.topdown.bbend",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.bottomup.bbstart",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.bottomup.bbend",
- ARCInstKind::None)
- .Default(ARCInstKind::CallOrUser);
- }
-
- // Anything else.
- return ARCInstKind::CallOrUser;
}
// A whitelist of intrinsics that we know do not use objc pointers or decrement
diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
index 6c47651eae9e..5f4fe0f7dda2 100644
--- a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
+++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
@@ -37,6 +37,8 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
const Instruction *Inst = nullptr;
assert(!(LastInstFound == BB->end() && NextInstPos != 0) &&
"Instruction supposed to be in NumberedInsts");
+ assert(A->getParent() == BB && "Instruction supposed to be in the block!");
+ assert(B->getParent() == BB && "Instruction supposed to be in the block!");
// Start the search with the instruction found in the last lookup round.
auto II = BB->begin();
@@ -65,6 +67,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
assert(A->getParent() == B->getParent() &&
"Instructions must be in the same basic block!");
+ assert(A->getParent() == BB && "Instructions must be in the tracked block!");
// First we lookup the instructions. If they don't exist, lookup will give us
// back ::end(). If they both exist, we compare the numbers. Otherwise, if NA
diff --git a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp b/contrib/llvm/lib/Analysis/OrderedInstructions.cpp
index 6d0b96f6aa8a..7b155208c02e 100644
--- a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
+++ b/contrib/llvm/lib/Analysis/OrderedInstructions.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
+#include "llvm/Analysis/OrderedInstructions.h"
using namespace llvm;
bool OrderedInstructions::localDominates(const Instruction *InstA,
diff --git a/contrib/llvm/lib/Analysis/PhiValues.cpp b/contrib/llvm/lib/Analysis/PhiValues.cpp
index ef121815d2cf..729227c86697 100644
--- a/contrib/llvm/lib/Analysis/PhiValues.cpp
+++ b/contrib/llvm/lib/Analysis/PhiValues.cpp
@@ -14,6 +14,16 @@
using namespace llvm;
+void PhiValues::PhiValuesCallbackVH::deleted() {
+ PV->invalidateValue(getValPtr());
+}
+
+void PhiValues::PhiValuesCallbackVH::allUsesReplacedWith(Value *) {
+ // We could potentially update the cached values we have with the new value,
+ // but it's simpler to just treat the old value as invalidated.
+ PV->invalidateValue(getValPtr());
+}
+
bool PhiValues::invalidate(Function &, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
// PhiValues is invalidated if it isn't preserved.
@@ -46,6 +56,7 @@ void PhiValues::processPhi(const PHINode *Phi,
DepthMap[Phi] = DepthNumber;
// Recursively process the incoming phis of this phi.
+ TrackedValues.insert(PhiValuesCallbackVH(const_cast<PHINode *>(Phi), this));
for (Value *PhiOp : Phi->incoming_values()) {
if (PHINode *PhiPhiOp = dyn_cast<PHINode>(PhiOp)) {
// Recurse if the phi has not yet been visited.
@@ -56,6 +67,8 @@ void PhiValues::processPhi(const PHINode *Phi,
// phi are part of the same component, so adjust the depth number.
if (!ReachableMap.count(DepthMap[PhiPhiOp]))
DepthMap[Phi] = std::min(DepthMap[Phi], DepthMap[PhiPhiOp]);
+ } else {
+ TrackedValues.insert(PhiValuesCallbackVH(PhiOp, this));
}
}
@@ -122,6 +135,10 @@ void PhiValues::invalidateValue(const Value *V) {
NonPhiReachableMap.erase(N);
ReachableMap.erase(N);
}
+ // This value is no longer tracked
+ auto It = TrackedValues.find_as(V);
+ if (It != TrackedValues.end())
+ TrackedValues.erase(It);
}
void PhiValues::releaseMemory() {
diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index fb591f5d6a69..1d70c75f2e1c 100644
--- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -39,11 +39,6 @@ static cl::opt<int> ProfileSummaryCutoffCold(
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
-static cl::opt<bool> ProfileSampleAccurate(
- "profile-sample-accurate", cl::Hidden, cl::init(false),
- cl::desc("If the sample profile is accurate, we will mark all un-sampled "
- "callsite as cold. Otherwise, treat un-sampled callsites as if "
- "we have no profile."));
static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
cl::init(15000), cl::ZeroOrMore,
@@ -51,6 +46,18 @@ static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
+// The next two options override the counts derived from summary computation and
+// are useful for debugging purposes.
+static cl::opt<int> ProfileSummaryHotCount(
+ "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
+ cl::desc("A fixed hot count that overrides the count derived from"
+ " profile-summary-cutoff-hot"));
+
+static cl::opt<int> ProfileSummaryColdCount(
+ "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
+ cl::desc("A fixed cold count that overrides the count derived from"
+ " profile-summary-cutoff-cold"));
+
// Find the summary entry for a desired percentile of counts.
static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
uint64_t Percentile) {
@@ -139,7 +146,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F,
return true;
}
for (const auto &BB : *F)
- if (isHotBB(&BB, &BFI))
+ if (isHotBlock(&BB, &BFI))
return true;
return false;
}
@@ -168,7 +175,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
return false;
}
for (const auto &BB : *F)
- if (!isColdBB(&BB, &BFI))
+ if (!isColdBlock(&BB, &BFI))
return false;
return true;
}
@@ -198,9 +205,15 @@ void ProfileSummaryInfo::computeThresholds() {
auto &HotEntry =
getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot);
HotCountThreshold = HotEntry.MinCount;
+ if (ProfileSummaryHotCount.getNumOccurrences() > 0)
+ HotCountThreshold = ProfileSummaryHotCount;
auto &ColdEntry =
getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffCold);
ColdCountThreshold = ColdEntry.MinCount;
+ if (ProfileSummaryColdCount.getNumOccurrences() > 0)
+ ColdCountThreshold = ProfileSummaryColdCount;
+ assert(ColdCountThreshold <= HotCountThreshold &&
+ "Cold count threshold cannot exceed hot count threshold!");
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
}
@@ -226,23 +239,23 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
if (!HotCountThreshold)
computeThresholds();
- return HotCountThreshold && HotCountThreshold.getValue();
+ return HotCountThreshold ? HotCountThreshold.getValue() : UINT64_MAX;
}
uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() {
if (!ColdCountThreshold)
computeThresholds();
- return ColdCountThreshold && ColdCountThreshold.getValue();
+ return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
}
-bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
- auto Count = BFI->getBlockProfileCount(B);
+bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) {
+ auto Count = BFI->getBlockProfileCount(BB);
return Count && isHotCount(*Count);
}
-bool ProfileSummaryInfo::isColdBB(const BasicBlock *B,
+bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
BlockFrequencyInfo *BFI) {
- auto Count = BFI->getBlockProfileCount(B);
+ auto Count = BFI->getBlockProfileCount(BB);
return Count && isColdCount(*Count);
}
@@ -260,11 +273,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
// In SamplePGO, if the caller has been sampled, and there is no profile
// annotated on the callsite, we consider the callsite as cold.
- // If there is no profile for the caller, and we know the profile is
- // accurate, we consider the callsite as cold.
- return (hasSampleProfile() &&
- (CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
- CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
+ return hasSampleProfile() && CS.getCaller()->hasProfileData();
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
index ed17df2e7e93..a101ff109199 100644
--- a/contrib/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/OptBisect.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 0e715b8814ff..e5134f2eeda9 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -112,6 +112,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -162,6 +163,11 @@ static cl::opt<bool>
cl::desc("Verify no dangling value in ScalarEvolution's "
"ExprValueMap (slow)"));
+static cl::opt<bool> VerifyIR(
+ "scev-verify-ir", cl::Hidden,
+ cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"),
+ cl::init(false));
+
static cl::opt<unsigned> MulOpsInlineThreshold(
"scev-mulops-inline-threshold", cl::Hidden,
cl::desc("Threshold for inlining multiplication operands into a SCEV"),
@@ -204,7 +210,7 @@ static cl::opt<unsigned>
static cl::opt<unsigned>
MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
cl::desc("Max coefficients in AddRec during evolving"),
- cl::init(16));
+ cl::init(8));
//===----------------------------------------------------------------------===//
// SCEV class definitions
@@ -692,10 +698,6 @@ static int CompareSCEVComplexity(
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
- // Compare NoWrap flags.
- if (LA->getNoWrapFlags() != RA->getNoWrapFlags())
- return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags();
-
// Lexicographically compare.
for (unsigned i = 0; i != LNumOps; ++i) {
int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
@@ -720,10 +722,6 @@ static int CompareSCEVComplexity(
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
- // Compare NoWrap flags.
- if (LC->getNoWrapFlags() != RC->getNoWrapFlags())
- return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags();
-
for (unsigned i = 0; i != LNumOps; ++i) {
int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
LC->getOperand(i), RC->getOperand(i), DT,
@@ -2767,6 +2765,29 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
+ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L, SCEV::NoWrapFlags Flags) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ ID.AddPointer(L);
+ void *IP = nullptr;
+ SCEVAddRecExpr *S =
+ static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator)
+ SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
+ UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+const SCEV *
ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
@@ -3045,7 +3066,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV*, 7> AddRecOps;
for (int x = 0, xe = AddRec->getNumOperands() +
OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
- const SCEV *Term = getZero(Ty);
+ SmallVector <const SCEV *, 7> SumOps;
for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
@@ -3060,12 +3081,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEV *CoeffTerm = getConstant(Ty, Coeff);
const SCEV *Term1 = AddRec->getOperand(y-z);
const SCEV *Term2 = OtherAddRec->getOperand(z);
- Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2,
- SCEV::FlagAnyWrap, Depth + 1),
- SCEV::FlagAnyWrap, Depth + 1);
+ SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2,
+ SCEV::FlagAnyWrap, Depth + 1));
}
}
- AddRecOps.push_back(Term);
+ if (SumOps.empty())
+ SumOps.push_back(getZero(Ty));
+ AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
}
if (!Overflow) {
const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
@@ -3416,24 +3438,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// Okay, it looks like we really DO need an addrec expr. Check to see if we
// already have one, otherwise create a new one.
- FoldingSetNodeID ID;
- ID.AddInteger(scAddRecExpr);
- for (unsigned i = 0, e = Operands.size(); i != e; ++i)
- ID.AddPointer(Operands[i]);
- ID.AddPointer(L);
- void *IP = nullptr;
- SCEVAddRecExpr *S =
- static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
- if (!S) {
- const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
- std::uninitialized_copy(Operands.begin(), Operands.end(), O);
- S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
- O, Operands.size(), L);
- UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
- }
- S->setNoWrapFlags(Flags);
- return S;
+ return getOrCreateAddRecExpr(Operands, L, Flags);
}
const SCEV *
@@ -7080,7 +7085,7 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
return getCouldNotCompute();
bool IsOnlyExit = (L->getExitingBlock() != nullptr);
- TerminatorInst *Term = ExitingBlock->getTerminator();
+ Instruction *Term = ExitingBlock->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
@@ -8344,69 +8349,273 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
}
-/// Find the roots of the quadratic equation for the given quadratic chrec
-/// {L,+,M,+,N}. This returns either the two roots (which might be the same) or
-/// two SCEVCouldNotCompute objects.
-static Optional<std::pair<const SCEVConstant *,const SCEVConstant *>>
-SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+/// For a given quadratic addrec, generate coefficients of the corresponding
+/// quadratic equation, multiplied by a common value to ensure that they are
+/// integers.
+/// The returned value is a tuple { A, B, C, M, BitWidth }, where
+/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C
+/// were multiplied by, and BitWidth is the bit width of the original addrec
+/// coefficients.
+/// This function returns None if the addrec coefficients are not compile-
+/// time constants.
+static Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>>
+GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+ LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: "
+ << *AddRec << '\n');
// We currently can only solve this if the coefficients are constants.
- if (!LC || !MC || !NC)
+ if (!LC || !MC || !NC) {
+ LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n");
return None;
+ }
- uint32_t BitWidth = LC->getAPInt().getBitWidth();
- const APInt &L = LC->getAPInt();
- const APInt &M = MC->getAPInt();
- const APInt &N = NC->getAPInt();
- APInt Two(BitWidth, 2);
-
- // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+ APInt L = LC->getAPInt();
+ APInt M = MC->getAPInt();
+ APInt N = NC->getAPInt();
+ assert(!N.isNullValue() && "This is not a quadratic addrec");
+
+ unsigned BitWidth = LC->getAPInt().getBitWidth();
+ unsigned NewWidth = BitWidth + 1;
+ LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: "
+ << BitWidth << '\n');
+ // The sign-extension (as opposed to a zero-extension) here matches the
+ // extension used in SolveQuadraticEquationWrap (with the same motivation).
+ N = N.sext(NewWidth);
+ M = M.sext(NewWidth);
+ L = L.sext(NewWidth);
+
+ // The increments are M, M+N, M+2N, ..., so the accumulated values are
+ // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is,
+ // L+M, L+2M+N, L+3M+3N, ...
+ // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N.
+ //
+ // The equation Acc = 0 is then
+ // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0.
+ // In a quadratic form it becomes:
+ // N n^2 + (2M-N) n + 2L = 0.
+
+ APInt A = N;
+ APInt B = 2 * M - A;
+ APInt C = 2 * L;
+ APInt T = APInt(NewWidth, 2);
+ LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B
+ << "x + " << C << ", coeff bw: " << NewWidth
+ << ", multiplied by " << T << '\n');
+ return std::make_tuple(A, B, C, T, BitWidth);
+}
+
+/// Helper function to compare optional APInts:
+/// (a) if X and Y both exist, return min(X, Y),
+/// (b) if neither X nor Y exist, return None,
+/// (c) if exactly one of X and Y exists, return that value.
+static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
+ if (X.hasValue() && Y.hasValue()) {
+ unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
+ APInt XW = X->sextOrSelf(W);
+ APInt YW = Y->sextOrSelf(W);
+ return XW.slt(YW) ? *X : *Y;
+ }
+ if (!X.hasValue() && !Y.hasValue())
+ return None;
+ return X.hasValue() ? *X : *Y;
+}
- // The A coefficient is N/2
- APInt A = N.sdiv(Two);
+/// Helper function to truncate an optional APInt to a given BitWidth.
+/// When solving addrec-related equations, it is preferable to return a value
+/// that has the same bit width as the original addrec's coefficients. If the
+/// solution fits in the original bit width, truncate it (except for i1).
+/// Returning a value of a different bit width may inhibit some optimizations.
+///
+/// In general, a solution to a quadratic equation generated from an addrec
+/// may require BW+1 bits, where BW is the bit width of the addrec's
+/// coefficients. The reason is that the coefficients of the quadratic
+/// equation are BW+1 bits wide (to avoid truncation when converting from
+/// the addrec to the equation).
+static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
+ if (!X.hasValue())
+ return None;
+ unsigned W = X->getBitWidth();
+ if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
+ return X->trunc(BitWidth);
+ return X;
+}
+
+/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n
+/// iterations. The values L, M, N are assumed to be signed, and they
+/// should all have the same bit widths.
+/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW,
+/// where BW is the bit width of the addrec's coefficients.
+/// If the calculated value is a BW-bit integer (for BW > 1), it will be
+/// returned as such, otherwise the bit width of the returned value may
+/// be greater than BW.
+///
+/// This function returns None if
+/// (a) the addrec coefficients are not constant, or
+/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases
+/// like x^2 = 5, no integer solutions exist, in other cases an integer
+/// solution may exist, but SolveQuadraticEquationWrap may fail to find it.
+static Optional<APInt>
+SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+ APInt A, B, C, M;
+ unsigned BitWidth;
+ auto T = GetQuadraticEquation(AddRec);
+ if (!T.hasValue())
+ return None;
- // The B coefficient is M-N/2
- APInt B = M;
- B -= A; // A is the same as N/2.
+ std::tie(A, B, C, M, BitWidth) = *T;
+ LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
+ Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
+ if (!X.hasValue())
+ return None;
- // The C coefficient is L.
- const APInt& C = L;
+ ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
+ ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE);
+ if (!V->isZero())
+ return None;
- // Compute the B^2-4ac term.
- APInt SqrtTerm = B;
- SqrtTerm *= B;
- SqrtTerm -= 4 * (A * C);
+ return TruncIfPossible(X, BitWidth);
+}
- if (SqrtTerm.isNegative()) {
- // The loop is provably infinite.
+/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n
+/// iterations. The values M, N are assumed to be signed, and they
+/// should all have the same bit widths.
+/// Find the least n such that c(n) does not belong to the given range,
+/// while c(n-1) does.
+///
+/// This function returns None if
+/// (a) the addrec coefficients are not constant, or
+/// (b) SolveQuadraticEquationWrap was unable to find a solution for the
+/// bounds of the range.
+static Optional<APInt>
+SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
+ const ConstantRange &Range, ScalarEvolution &SE) {
+ assert(AddRec->getOperand(0)->isZero() &&
+ "Starting value of addrec should be 0");
+ LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range "
+ << Range << ", addrec " << *AddRec << '\n');
+ // This case is handled in getNumIterationsInRange. Here we can assume that
+ // we start in the range.
+ assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) &&
+ "Addrec's initial value should be in range");
+
+ APInt A, B, C, M;
+ unsigned BitWidth;
+ auto T = GetQuadraticEquation(AddRec);
+ if (!T.hasValue())
return None;
- }
- // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
- // integer value or else APInt::sqrt() will assert.
- APInt SqrtVal = SqrtTerm.sqrt();
+ // Be careful about the return value: there can be two reasons for not
+ // returning an actual number. First, if no solutions to the equations
+ // were found, and second, if the solutions don't leave the given range.
+ // The first case means that the actual solution is "unknown", the second
+ // means that it's known, but not valid. If the solution is unknown, we
+ // cannot make any conclusions.
+ // Return a pair: the optional solution and a flag indicating if the
+ // solution was found.
+ auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> {
+ // Solve for signed overflow and unsigned overflow, pick the lower
+ // solution.
+ LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary "
+ << Bound << " (before multiplying by " << M << ")\n");
+ Bound *= M; // The quadratic equation multiplier.
+
+ Optional<APInt> SO = None;
+ if (BitWidth > 1) {
+ LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
+ "signed overflow\n");
+ SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth);
+ }
+ LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
+ "unsigned overflow\n");
+ Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound,
+ BitWidth+1);
+
+ auto LeavesRange = [&] (const APInt &X) {
+ ConstantInt *C0 = ConstantInt::get(SE.getContext(), X);
+ ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE);
+ if (Range.contains(V0->getValue()))
+ return false;
+ // X should be at least 1, so X-1 is non-negative.
+ ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1);
+ ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE);
+ if (Range.contains(V1->getValue()))
+ return true;
+ return false;
+ };
- // Compute the two solutions for the quadratic formula.
- // The divisions must be performed as signed divisions.
- APInt NegB = -std::move(B);
- APInt TwoA = std::move(A);
- TwoA <<= 1;
- if (TwoA.isNullValue())
- return None;
+ // If SolveQuadraticEquationWrap returns None, it means that there can
+ // be a solution, but the function failed to find it. We cannot treat it
+ // as "no solution".
+ if (!SO.hasValue() || !UO.hasValue())
+ return { None, false };
+
+ // Check the smaller value first to see if it leaves the range.
+ // At this point, both SO and UO must have values.
+ Optional<APInt> Min = MinOptional(SO, UO);
+ if (LeavesRange(*Min))
+ return { Min, true };
+ Optional<APInt> Max = Min == SO ? UO : SO;
+ if (LeavesRange(*Max))
+ return { Max, true };
+
+ // Solutions were found, but were eliminated, hence the "true".
+ return { None, true };
+ };
- LLVMContext &Context = SE.getContext();
+ std::tie(A, B, C, M, BitWidth) = *T;
+ // Lower bound is inclusive, subtract 1 to represent the exiting value.
+ APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
+ APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
+ auto SL = SolveForBoundary(Lower);
+ auto SU = SolveForBoundary(Upper);
+ // If any of the solutions was unknown, no meaninigful conclusions can
+ // be made.
+ if (!SL.second || !SU.second)
+ return None;
- ConstantInt *Solution1 =
- ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
- ConstantInt *Solution2 =
- ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+ // Claim: The correct solution is not some value between Min and Max.
+ //
+ // Justification: Assuming that Min and Max are different values, one of
+ // them is when the first signed overflow happens, the other is when the
+ // first unsigned overflow happens. Crossing the range boundary is only
+ // possible via an overflow (treating 0 as a special case of it, modeling
+ // an overflow as crossing k*2^W for some k).
+ //
+ // The interesting case here is when Min was eliminated as an invalid
+ // solution, but Max was not. The argument is that if there was another
+ // overflow between Min and Max, it would also have been eliminated if
+ // it was considered.
+ //
+ // For a given boundary, it is possible to have two overflows of the same
+ // type (signed/unsigned) without having the other type in between: this
+ // can happen when the vertex of the parabola is between the iterations
+ // corresponding to the overflows. This is only possible when the two
+ // overflows cross k*2^W for the same k. In such case, if the second one
+ // left the range (and was the first one to do so), the first overflow
+ // would have to enter the range, which would mean that either we had left
+ // the range before or that we started outside of it. Both of these cases
+ // are contradictions.
+ //
+ // Claim: In the case where SolveForBoundary returns None, the correct
+ // solution is not some value between the Max for this boundary and the
+ // Min of the other boundary.
+ //
+ // Justification: Assume that we had such Max_A and Min_B corresponding
+ // to range boundaries A and B and such that Max_A < Min_B. If there was
+ // a solution between Max_A and Min_B, it would have to be caused by an
+ // overflow corresponding to either A or B. It cannot correspond to B,
+ // since Min_B is the first occurrence of such an overflow. If it
+ // corresponded to A, it would have to be either a signed or an unsigned
+ // overflow that is larger than both eliminated overflows for A. But
+ // between the eliminated overflows and this overflow, the values would
+ // cover the entire value space, thus crossing the other boundary, which
+ // is a contradiction.
- return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
- cast<SCEVConstant>(SE.getConstant(Solution2)));
+ return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
}
ScalarEvolution::ExitLimit
@@ -8441,23 +8650,12 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
// the quadratic equation to solve it.
if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
- if (auto Roots = SolveQuadraticEquation(AddRec, *this)) {
- const SCEVConstant *R1 = Roots->first;
- const SCEVConstant *R2 = Roots->second;
- // Pick the smallest positive root value.
- if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
- CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
- if (!CB->getZExtValue())
- std::swap(R1, R2); // R1 is the minimum root now.
-
- // We can only use this value if the chrec ends up with an exact zero
- // value at this index. When solving for "X*X != 5", for example, we
- // should not accept a root of 2.
- const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
- if (Val->isZero())
- // We found a quadratic root!
- return ExitLimit(R1, R1, false, Predicates);
- }
+ // We can only use this value if the chrec ends up with an exact zero
+ // value at this index. When solving for "X*X != 5", for example, we
+ // should not accept a root of 2.
+ if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
+ const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
+ return ExitLimit(R, R, false, Predicates);
}
return getCouldNotCompute();
}
@@ -8617,7 +8815,13 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
const SCEV *&LHS, const SCEV *&RHS,
unsigned Depth) {
bool Changed = false;
-
+ // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
+ // '0 != 0'.
+ auto TrivialCase = [&](bool TriviallyTrue) {
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+ Pred = TriviallyTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ return true;
+ };
// If we hit the max recursion limit bail out.
if (Depth >= 3)
return false;
@@ -8629,9 +8833,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
if (ConstantExpr::getICmp(Pred,
LHSC->getValue(),
RHSC->getValue())->isNullValue())
- goto trivially_false;
+ return TrivialCase(false);
else
- goto trivially_true;
+ return TrivialCase(true);
}
// Otherwise swap the operands to put the constant on the right.
std::swap(LHS, RHS);
@@ -8661,9 +8865,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
if (!ICmpInst::isEquality(Pred)) {
ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
if (ExactCR.isFullSet())
- goto trivially_true;
+ return TrivialCase(true);
else if (ExactCR.isEmptySet())
- goto trivially_false;
+ return TrivialCase(false);
APInt NewRHS;
CmpInst::Predicate NewPred;
@@ -8699,7 +8903,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// The "Should have been caught earlier!" messages refer to the fact
// that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
// should have fired on the corresponding cases, and canonicalized the
- // check to trivially_true or trivially_false.
+ // check to trivial case.
case ICmpInst::ICMP_UGE:
assert(!RA.isMinValue() && "Should have been caught earlier!");
@@ -8732,9 +8936,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// Check for obvious equality.
if (HasSameValue(LHS, RHS)) {
if (ICmpInst::isTrueWhenEqual(Pred))
- goto trivially_true;
+ return TrivialCase(true);
if (ICmpInst::isFalseWhenEqual(Pred))
- goto trivially_false;
+ return TrivialCase(false);
}
// If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
@@ -8802,18 +9006,6 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
return Changed;
-
-trivially_true:
- // Return 0 == 0.
- LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
- Pred = ICmpInst::ICMP_EQ;
- return true;
-
-trivially_false:
- // Return 0 != 0.
- LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
- Pred = ICmpInst::ICMP_NE;
- return true;
}
bool ScalarEvolution::isKnownNegative(const SCEV *S) {
@@ -9184,6 +9376,11 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return true;
+ if (VerifyIR)
+ assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
+ "This cannot be done on broken IR!");
+
+
if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
return true;
@@ -9289,6 +9486,10 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return false;
+ if (VerifyIR)
+ assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
+ "This cannot be done on broken IR!");
+
// Both LHS and RHS must be available at loop entry.
assert(isAvailableAtLoopEntry(LHS, L) &&
"LHS is not available at Loop Entry");
@@ -10565,52 +10766,11 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
"Linear scev computation is off in a bad way!");
return SE.getConstant(ExitValue);
- } else if (isQuadratic()) {
- // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
- // quadratic equation to solve it. To do this, we must frame our problem in
- // terms of figuring out when zero is crossed, instead of when
- // Range.getUpper() is crossed.
- SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
- NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
- const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
-
- // Next, solve the constructed addrec
- if (auto Roots =
- SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE)) {
- const SCEVConstant *R1 = Roots->first;
- const SCEVConstant *R2 = Roots->second;
- // Pick the smallest positive root value.
- if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
- ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
- if (!CB->getZExtValue())
- std::swap(R1, R2); // R1 is the minimum root now.
-
- // Make sure the root is not off by one. The returned iteration should
- // not be in the range, but the previous one should be. When solving
- // for "X*X < 5", for example, we should not return a root of 2.
- ConstantInt *R1Val =
- EvaluateConstantChrecAtConstant(this, R1->getValue(), SE);
- if (Range.contains(R1Val->getValue())) {
- // The next iteration must be out of the range...
- ConstantInt *NextVal =
- ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
-
- R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
- if (!Range.contains(R1Val->getValue()))
- return SE.getConstant(NextVal);
- return SE.getCouldNotCompute(); // Something strange happened
- }
+ }
- // If R1 was not in the range, then it is a good return value. Make
- // sure that R1-1 WAS in the range though, just in case.
- ConstantInt *NextVal =
- ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
- R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
- if (Range.contains(R1Val->getValue()))
- return R1;
- return SE.getCouldNotCompute(); // Something strange happened
- }
- }
+ if (isQuadratic()) {
+ if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
+ return SE.getConstant(S.getValue());
}
return SE.getCouldNotCompute();
@@ -10920,7 +11080,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
// Put larger terms first.
- llvm::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+ llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
return numberOfTerms(LHS) > numberOfTerms(RHS);
});
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 7bea994121c8..289d4f8ae49a 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -27,7 +27,7 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
// If either of the memory references is empty, it doesn't matter what the
// pointer values are. This allows the code below to ignore this special
// case.
- if (LocA.Size == 0 || LocB.Size == 0)
+ if (LocA.Size.isZero() || LocB.Size.isZero())
return NoAlias;
// This is SCEVAAResult. Get the SCEVs!
@@ -43,8 +43,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
if (SE.getEffectiveSCEVType(AS->getType()) ==
SE.getEffectiveSCEVType(BS->getType())) {
unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
- APInt ASizeInt(BitWidth, LocA.Size);
- APInt BSizeInt(BitWidth, LocB.Size);
+ APInt ASizeInt(BitWidth, LocA.Size.hasValue()
+ ? LocA.Size.getValue()
+ : MemoryLocation::UnknownSize);
+ APInt BSizeInt(BitWidth, LocB.Size.hasValue()
+ ? LocB.Size.getValue()
+ : MemoryLocation::UnknownSize);
// Compute the difference between the two pointers.
const SCEV *BA = SE.getMinusSCEV(BS, AS);
@@ -78,10 +82,10 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
Value *BO = GetBaseValue(BS);
if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
if (alias(MemoryLocation(AO ? AO : LocA.Ptr,
- AO ? +MemoryLocation::UnknownSize : LocA.Size,
+ AO ? LocationSize::unknown() : LocA.Size,
AO ? AAMDNodes() : LocA.AATags),
MemoryLocation(BO ? BO : LocB.Ptr,
- BO ? +MemoryLocation::UnknownSize : LocB.Size,
+ BO ? LocationSize::unknown() : LocB.Size,
BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
return NoAlias;
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 8f89389c4b5d..ca5cf1663b83 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1867,7 +1867,7 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
Phis.push_back(&PN);
if (TTI)
- llvm::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
+ llvm::sort(Phis, [](Value *LHS, Value *RHS) {
// Put pointers at the back and make sure pointer < pointer = false.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index f12275aff387..9a581fe46afc 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -95,39 +95,36 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
return AAResultBase::alias(LocA, LocB);
}
-ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
- if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
- LLVMContext::MD_noalias)))
+ if (!mayAliasInScopes(Loc.AATags.Scope,
+ Call->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- if (!mayAliasInScopes(
- CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- Loc.AATags.NoAlias))
+ if (!mayAliasInScopes(Call->getMetadata(LLVMContext::MD_alias_scope),
+ Loc.AATags.NoAlias))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
}
-ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
- if (!mayAliasInScopes(
- CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope),
+ Call2->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- if (!mayAliasInScopes(
- CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ if (!mayAliasInScopes(Call2->getMetadata(LLVMContext::MD_alias_scope),
+ Call1->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
}
static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
diff --git a/contrib/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm/lib/Analysis/StackSafetyAnalysis.cpp
new file mode 100644
index 000000000000..66b03845864f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -0,0 +1,673 @@
+//===- StackSafetyAnalysis.cpp - Stack memory safety analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-safety"
+
+static cl::opt<int> StackSafetyMaxIterations("stack-safety-max-iterations",
+ cl::init(20), cl::Hidden);
+
+namespace {
+
+/// Rewrite an SCEV expression for a memory access address to an expression that
+/// represents offset from the given alloca.
+class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
+ const Value *AllocaPtr;
+
+public:
+ AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
+ : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
+
+ const SCEV *visit(const SCEV *Expr) {
+ // Only re-write the expression if the alloca is used in an addition
+ // expression (it can be used in other types of expressions if it's cast to
+ // an int and passed as an argument.)
+ if (!isa<SCEVAddRecExpr>(Expr) && !isa<SCEVAddExpr>(Expr) &&
+ !isa<SCEVUnknown>(Expr))
+ return Expr;
+ return SCEVRewriteVisitor<AllocaOffsetRewriter>::visit(Expr);
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ // FIXME: look through one or several levels of definitions?
+ // This can be inttoptr(AllocaPtr) and SCEV would not unwrap
+ // it for us.
+ if (Expr->getValue() == AllocaPtr)
+ return SE.getZero(Expr->getType());
+ return Expr;
+ }
+};
+
+/// Describes use of address in as a function call argument.
+struct PassAsArgInfo {
+ /// Function being called.
+ const GlobalValue *Callee = nullptr;
+ /// Index of argument which pass address.
+ size_t ParamNo = 0;
+ // Offset range of address from base address (alloca or calling function
+ // argument).
+ // Range should never set to empty-set, that is an invalid access range
+ // that can cause empty-set to be propagated with ConstantRange::add
+ ConstantRange Offset;
+ PassAsArgInfo(const GlobalValue *Callee, size_t ParamNo, ConstantRange Offset)
+ : Callee(Callee), ParamNo(ParamNo), Offset(Offset) {}
+
+ StringRef getName() const { return Callee->getName(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const PassAsArgInfo &P) {
+ return OS << "@" << P.getName() << "(arg" << P.ParamNo << ", " << P.Offset
+ << ")";
+}
+
+/// Describe uses of address (alloca or parameter) inside of the function.
+struct UseInfo {
+ // Access range if the address (alloca or parameters).
+ // It is allowed to be empty-set when there are no known accesses.
+ ConstantRange Range;
+
+ // List of calls which pass address as an argument.
+ SmallVector<PassAsArgInfo, 4> Calls;
+
+ explicit UseInfo(unsigned PointerSize) : Range{PointerSize, false} {}
+
+ void updateRange(ConstantRange R) { Range = Range.unionWith(R); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const UseInfo &U) {
+ OS << U.Range;
+ for (auto &Call : U.Calls)
+ OS << ", " << Call;
+ return OS;
+}
+
+struct AllocaInfo {
+ const AllocaInst *AI = nullptr;
+ uint64_t Size = 0;
+ UseInfo Use;
+
+ AllocaInfo(unsigned PointerSize, const AllocaInst *AI, uint64_t Size)
+ : AI(AI), Size(Size), Use(PointerSize) {}
+
+ StringRef getName() const { return AI->getName(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const AllocaInfo &A) {
+ return OS << A.getName() << "[" << A.Size << "]: " << A.Use;
+}
+
+struct ParamInfo {
+ const Argument *Arg = nullptr;
+ UseInfo Use;
+
+ explicit ParamInfo(unsigned PointerSize, const Argument *Arg)
+ : Arg(Arg), Use(PointerSize) {}
+
+ StringRef getName() const { return Arg ? Arg->getName() : "<N/A>"; }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ParamInfo &P) {
+ return OS << P.getName() << "[]: " << P.Use;
+}
+
+/// Calculate the allocation size of a given alloca. Returns 0 if the
+/// size can not be statically determined.
+uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) {
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C)
+ return 0;
+ Size *= C->getZExtValue();
+ }
+ return Size;
+}
+
+} // end anonymous namespace
+
+/// Describes uses of allocas and parameters inside of a single function.
+struct StackSafetyInfo::FunctionInfo {
+ // May be a Function or a GlobalAlias
+ const GlobalValue *GV = nullptr;
+ // Informations about allocas uses.
+ SmallVector<AllocaInfo, 4> Allocas;
+ // Informations about parameters uses.
+ SmallVector<ParamInfo, 4> Params;
+ // TODO: describe return value as depending on one or more of its arguments.
+
+ // StackSafetyDataFlowAnalysis counter stored here for faster access.
+ int UpdateCount = 0;
+
+ FunctionInfo(const StackSafetyInfo &SSI) : FunctionInfo(*SSI.Info) {}
+
+ explicit FunctionInfo(const Function *F) : GV(F){};
+ // Creates FunctionInfo that forwards all the parameters to the aliasee.
+ explicit FunctionInfo(const GlobalAlias *A);
+
+ FunctionInfo(FunctionInfo &&) = default;
+
+ bool IsDSOLocal() const { return GV->isDSOLocal(); };
+
+ bool IsInterposable() const { return GV->isInterposable(); };
+
+ StringRef getName() const { return GV->getName(); }
+
+ void print(raw_ostream &O) const {
+ // TODO: Consider different printout format after
+ // StackSafetyDataFlowAnalysis. Calls and parameters are irrelevant then.
+ O << " @" << getName() << (IsDSOLocal() ? "" : " dso_preemptable")
+ << (IsInterposable() ? " interposable" : "") << "\n";
+ O << " args uses:\n";
+ for (auto &P : Params)
+ O << " " << P << "\n";
+ O << " allocas uses:\n";
+ for (auto &AS : Allocas)
+ O << " " << AS << "\n";
+ }
+
+private:
+ FunctionInfo(const FunctionInfo &) = default;
+};
+
+StackSafetyInfo::FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) {
+ unsigned PointerSize = A->getParent()->getDataLayout().getPointerSizeInBits();
+ const GlobalObject *Aliasee = A->getBaseObject();
+ const FunctionType *Type = cast<FunctionType>(Aliasee->getValueType());
+ // 'Forward' all parameters to this alias to the aliasee
+ for (unsigned ArgNo = 0; ArgNo < Type->getNumParams(); ArgNo++) {
+ Params.emplace_back(PointerSize, nullptr);
+ UseInfo &US = Params.back().Use;
+ US.Calls.emplace_back(Aliasee, ArgNo, ConstantRange(APInt(PointerSize, 0)));
+ }
+}
+
+namespace {
+
+class StackSafetyLocalAnalysis {
+ const Function &F;
+ const DataLayout &DL;
+ ScalarEvolution &SE;
+ unsigned PointerSize = 0;
+
+ const ConstantRange UnknownRange;
+
+ ConstantRange offsetFromAlloca(Value *Addr, const Value *AllocaPtr);
+ ConstantRange getAccessRange(Value *Addr, const Value *AllocaPtr,
+ uint64_t AccessSize);
+ ConstantRange getMemIntrinsicAccessRange(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr);
+
+ bool analyzeAllUses(const Value *Ptr, UseInfo &AS);
+
+ ConstantRange getRange(uint64_t Lower, uint64_t Upper) const {
+ return ConstantRange(APInt(PointerSize, Lower), APInt(PointerSize, Upper));
+ }
+
+public:
+ StackSafetyLocalAnalysis(const Function &F, ScalarEvolution &SE)
+ : F(F), DL(F.getParent()->getDataLayout()), SE(SE),
+ PointerSize(DL.getPointerSizeInBits()),
+ UnknownRange(PointerSize, true) {}
+
+ // Run the transformation on the associated function.
+ StackSafetyInfo run();
+};
+
+ConstantRange
+StackSafetyLocalAnalysis::offsetFromAlloca(Value *Addr,
+ const Value *AllocaPtr) {
+ if (!SE.isSCEVable(Addr->getType()))
+ return UnknownRange;
+
+ AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
+ ConstantRange Offset = SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize);
+ assert(!Offset.isEmptySet());
+ return Offset;
+}
+
+ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr,
+ const Value *AllocaPtr,
+ uint64_t AccessSize) {
+ if (!SE.isSCEVable(Addr->getType()))
+ return UnknownRange;
+
+ AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
+
+ ConstantRange AccessStartRange =
+ SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize);
+ ConstantRange SizeRange = getRange(0, AccessSize);
+ ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+ assert(!AccessRange.isEmptySet());
+ return AccessRange;
+}
+
+ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
+ const MemIntrinsic *MI, const Use &U, const Value *AllocaPtr) {
+ if (auto MTI = dyn_cast<MemTransferInst>(MI)) {
+ if (MTI->getRawSource() != U && MTI->getRawDest() != U)
+ return getRange(0, 1);
+ } else {
+ if (MI->getRawDest() != U)
+ return getRange(0, 1);
+ }
+ const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+ // Non-constant size => unsafe. FIXME: try SCEV getRange.
+ if (!Len)
+ return UnknownRange;
+ ConstantRange AccessRange = getAccessRange(U, AllocaPtr, Len->getZExtValue());
+ return AccessRange;
+}
+
+/// The function analyzes all local uses of Ptr (alloca or argument) and
+/// calculates local access range and all function calls where it was used.
+bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) {
+ SmallPtrSet<const Value *, 16> Visited;
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(Ptr);
+
+ // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (const Use &UI : V->uses()) {
+ auto I = cast<const Instruction>(UI.getUser());
+ assert(V == UI.get());
+
+ switch (I->getOpcode()) {
+ case Instruction::Load: {
+ US.updateRange(
+ getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
+ break;
+ }
+
+ case Instruction::VAArg:
+ // "va-arg" from a pointer is safe.
+ break;
+ case Instruction::Store: {
+ if (V == I->getOperand(0)) {
+ // Stored the pointer - conservatively assume it may be unsafe.
+ US.updateRange(UnknownRange);
+ return false;
+ }
+ US.updateRange(getAccessRange(
+ UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
+ break;
+ }
+
+ case Instruction::Ret:
+ // Information leak.
+ // FIXME: Process parameters correctly. This is a leak only if we return
+ // alloca.
+ US.updateRange(UnknownRange);
+ return false;
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ ImmutableCallSite CS(I);
+
+ if (I->isLifetimeStartOrEnd())
+ break;
+
+ if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ US.updateRange(getMemIntrinsicAccessRange(MI, UI, Ptr));
+ break;
+ }
+
+ // FIXME: consult devirt?
+ // Do not follow aliases, otherwise we could inadvertently follow
+ // dso_preemptable aliases or aliases with interposable linkage.
+ const GlobalValue *Callee = dyn_cast<GlobalValue>(
+ CS.getCalledValue()->stripPointerCastsNoFollowAliases());
+ if (!Callee) {
+ US.updateRange(UnknownRange);
+ return false;
+ }
+
+ assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee));
+
+ ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) {
+ if (A->get() == V) {
+ ConstantRange Offset = offsetFromAlloca(UI, Ptr);
+ US.Calls.emplace_back(Callee, A - B, Offset);
+ }
+ }
+
+ break;
+ }
+
+ default:
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+ }
+ }
+
+ return true;
+}
+
+StackSafetyInfo StackSafetyLocalAnalysis::run() {
+ StackSafetyInfo::FunctionInfo Info(&F);
+ assert(!F.isDeclaration() &&
+ "Can't run StackSafety on a function declaration");
+
+ LLVM_DEBUG(dbgs() << "[StackSafety] " << F.getName() << "\n");
+
+ for (auto &I : instructions(F)) {
+ if (auto AI = dyn_cast<AllocaInst>(&I)) {
+ Info.Allocas.emplace_back(PointerSize, AI,
+ getStaticAllocaAllocationSize(AI));
+ AllocaInfo &AS = Info.Allocas.back();
+ analyzeAllUses(AI, AS.Use);
+ }
+ }
+
+ for (const Argument &A : make_range(F.arg_begin(), F.arg_end())) {
+ Info.Params.emplace_back(PointerSize, &A);
+ ParamInfo &PS = Info.Params.back();
+ analyzeAllUses(&A, PS.Use);
+ }
+
+ LLVM_DEBUG(dbgs() << "[StackSafety] done\n");
+ LLVM_DEBUG(Info.print(dbgs()));
+ return StackSafetyInfo(std::move(Info));
+}
+
+class StackSafetyDataFlowAnalysis {
+ using FunctionMap =
+ std::map<const GlobalValue *, StackSafetyInfo::FunctionInfo>;
+
+ FunctionMap Functions;
+ // Callee-to-Caller multimap.
+ DenseMap<const GlobalValue *, SmallVector<const GlobalValue *, 4>> Callers;
+ SetVector<const GlobalValue *> WorkList;
+
+ unsigned PointerSize = 0;
+ const ConstantRange UnknownRange;
+
+ ConstantRange getArgumentAccessRange(const GlobalValue *Callee,
+ unsigned ParamNo) const;
+ bool updateOneUse(UseInfo &US, bool UpdateToFullSet);
+ void updateOneNode(const GlobalValue *Callee,
+ StackSafetyInfo::FunctionInfo &FS);
+ void updateOneNode(const GlobalValue *Callee) {
+ updateOneNode(Callee, Functions.find(Callee)->second);
+ }
+ void updateAllNodes() {
+ for (auto &F : Functions)
+ updateOneNode(F.first, F.second);
+ }
+ void runDataFlow();
+ void verifyFixedPoint();
+
+public:
+ StackSafetyDataFlowAnalysis(
+ Module &M, std::function<const StackSafetyInfo &(Function &)> FI);
+ StackSafetyGlobalInfo run();
+};
+
+StackSafetyDataFlowAnalysis::StackSafetyDataFlowAnalysis(
+ Module &M, std::function<const StackSafetyInfo &(Function &)> FI)
+ : PointerSize(M.getDataLayout().getPointerSizeInBits()),
+ UnknownRange(PointerSize, true) {
+ // Without ThinLTO, run the local analysis for every function in the TU and
+ // then run the DFA.
+ for (auto &F : M.functions())
+ if (!F.isDeclaration())
+ Functions.emplace(&F, FI(F));
+ for (auto &A : M.aliases())
+ if (isa<Function>(A.getBaseObject()))
+ Functions.emplace(&A, StackSafetyInfo::FunctionInfo(&A));
+}
+
+ConstantRange
+StackSafetyDataFlowAnalysis::getArgumentAccessRange(const GlobalValue *Callee,
+ unsigned ParamNo) const {
+ auto IT = Functions.find(Callee);
+ // Unknown callee (outside of LTO domain or an indirect call).
+ if (IT == Functions.end())
+ return UnknownRange;
+ const StackSafetyInfo::FunctionInfo &FS = IT->second;
+ // The definition of this symbol may not be the definition in this linkage
+ // unit.
+ if (!FS.IsDSOLocal() || FS.IsInterposable())
+ return UnknownRange;
+ if (ParamNo >= FS.Params.size()) // possibly vararg
+ return UnknownRange;
+ return FS.Params[ParamNo].Use.Range;
+}
+
+bool StackSafetyDataFlowAnalysis::updateOneUse(UseInfo &US,
+ bool UpdateToFullSet) {
+ bool Changed = false;
+ for (auto &CS : US.Calls) {
+ assert(!CS.Offset.isEmptySet() &&
+ "Param range can't be empty-set, invalid offset range");
+
+ ConstantRange CalleeRange = getArgumentAccessRange(CS.Callee, CS.ParamNo);
+ CalleeRange = CalleeRange.add(CS.Offset);
+ if (!US.Range.contains(CalleeRange)) {
+ Changed = true;
+ if (UpdateToFullSet)
+ US.Range = UnknownRange;
+ else
+ US.Range = US.Range.unionWith(CalleeRange);
+ }
+ }
+ return Changed;
+}
+
+void StackSafetyDataFlowAnalysis::updateOneNode(
+ const GlobalValue *Callee, StackSafetyInfo::FunctionInfo &FS) {
+ bool UpdateToFullSet = FS.UpdateCount > StackSafetyMaxIterations;
+ bool Changed = false;
+ for (auto &AS : FS.Allocas)
+ Changed |= updateOneUse(AS.Use, UpdateToFullSet);
+ for (auto &PS : FS.Params)
+ Changed |= updateOneUse(PS.Use, UpdateToFullSet);
+
+ if (Changed) {
+ LLVM_DEBUG(dbgs() << "=== update [" << FS.UpdateCount
+ << (UpdateToFullSet ? ", full-set" : "") << "] "
+ << FS.getName() << "\n");
+ // Callers of this function may need updating.
+ for (auto &CallerID : Callers[Callee])
+ WorkList.insert(CallerID);
+
+ ++FS.UpdateCount;
+ }
+}
+
+void StackSafetyDataFlowAnalysis::runDataFlow() {
+ Callers.clear();
+ WorkList.clear();
+
+ SmallVector<const GlobalValue *, 16> Callees;
+ for (auto &F : Functions) {
+ Callees.clear();
+ StackSafetyInfo::FunctionInfo &FS = F.second;
+ for (auto &AS : FS.Allocas)
+ for (auto &CS : AS.Use.Calls)
+ Callees.push_back(CS.Callee);
+ for (auto &PS : FS.Params)
+ for (auto &CS : PS.Use.Calls)
+ Callees.push_back(CS.Callee);
+
+ llvm::sort(Callees);
+ Callees.erase(std::unique(Callees.begin(), Callees.end()), Callees.end());
+
+ for (auto &Callee : Callees)
+ Callers[Callee].push_back(F.first);
+ }
+
+ updateAllNodes();
+
+ while (!WorkList.empty()) {
+ const GlobalValue *Callee = WorkList.back();
+ WorkList.pop_back();
+ updateOneNode(Callee);
+ }
+}
+
+void StackSafetyDataFlowAnalysis::verifyFixedPoint() {
+ WorkList.clear();
+ updateAllNodes();
+ assert(WorkList.empty());
+}
+
+StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() {
+ runDataFlow();
+ LLVM_DEBUG(verifyFixedPoint());
+
+ StackSafetyGlobalInfo SSI;
+ for (auto &F : Functions)
+ SSI.emplace(F.first, std::move(F.second));
+ return SSI;
+}
+
+void print(const StackSafetyGlobalInfo &SSI, raw_ostream &O, const Module &M) {
+ size_t Count = 0;
+ for (auto &F : M.functions())
+ if (!F.isDeclaration()) {
+ SSI.find(&F)->second.print(O);
+ O << "\n";
+ ++Count;
+ }
+ for (auto &A : M.aliases()) {
+ SSI.find(&A)->second.print(O);
+ O << "\n";
+ ++Count;
+ }
+ assert(Count == SSI.size() && "Unexpected functions in the result");
+}
+
+} // end anonymous namespace
+
+StackSafetyInfo::StackSafetyInfo() = default;
+StackSafetyInfo::StackSafetyInfo(StackSafetyInfo &&) = default;
+StackSafetyInfo &StackSafetyInfo::operator=(StackSafetyInfo &&) = default;
+
+StackSafetyInfo::StackSafetyInfo(FunctionInfo &&Info)
+ : Info(new FunctionInfo(std::move(Info))) {}
+
+StackSafetyInfo::~StackSafetyInfo() = default;
+
+void StackSafetyInfo::print(raw_ostream &O) const { Info->print(O); }
+
+AnalysisKey StackSafetyAnalysis::Key;
+
+StackSafetyInfo StackSafetyAnalysis::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ StackSafetyLocalAnalysis SSLA(F, AM.getResult<ScalarEvolutionAnalysis>(F));
+ return SSLA.run();
+}
+
+PreservedAnalyses StackSafetyPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OS << "'Stack Safety Local Analysis' for function '" << F.getName() << "'\n";
+ AM.getResult<StackSafetyAnalysis>(F).print(OS);
+ return PreservedAnalyses::all();
+}
+
+char StackSafetyInfoWrapperPass::ID = 0;
+
+StackSafetyInfoWrapperPass::StackSafetyInfoWrapperPass() : FunctionPass(ID) {
+ initializeStackSafetyInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+void StackSafetyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.setPreservesAll();
+}
+
+void StackSafetyInfoWrapperPass::print(raw_ostream &O, const Module *M) const {
+ SSI.print(O);
+}
+
+bool StackSafetyInfoWrapperPass::runOnFunction(Function &F) {
+ StackSafetyLocalAnalysis SSLA(
+ F, getAnalysis<ScalarEvolutionWrapperPass>().getSE());
+ SSI = StackSafetyInfo(SSLA.run());
+ return false;
+}
+
+AnalysisKey StackSafetyGlobalAnalysis::Key;
+
+StackSafetyGlobalInfo
+StackSafetyGlobalAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ StackSafetyDataFlowAnalysis SSDFA(
+ M, [&FAM](Function &F) -> const StackSafetyInfo & {
+ return FAM.getResult<StackSafetyAnalysis>(F);
+ });
+ return SSDFA.run();
+}
+
+PreservedAnalyses StackSafetyGlobalPrinterPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ OS << "'Stack Safety Analysis' for module '" << M.getName() << "'\n";
+ print(AM.getResult<StackSafetyGlobalAnalysis>(M), OS, M);
+ return PreservedAnalyses::all();
+}
+
+char StackSafetyGlobalInfoWrapperPass::ID = 0;
+
+StackSafetyGlobalInfoWrapperPass::StackSafetyGlobalInfoWrapperPass()
+ : ModulePass(ID) {
+ initializeStackSafetyGlobalInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void StackSafetyGlobalInfoWrapperPass::print(raw_ostream &O,
+ const Module *M) const {
+ ::print(SSI, O, *M);
+}
+
+void StackSafetyGlobalInfoWrapperPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<StackSafetyInfoWrapperPass>();
+}
+
+bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) {
+ StackSafetyDataFlowAnalysis SSDFA(
+ M, [this](Function &F) -> const StackSafetyInfo & {
+ return getAnalysis<StackSafetyInfoWrapperPass>(F).getResult();
+ });
+ SSI = SSDFA.run();
+ return false;
+}
+
+static const char LocalPassArg[] = "stack-safety-local";
+static const char LocalPassName[] = "Stack Safety Local Analysis";
+INITIALIZE_PASS_BEGIN(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName,
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName,
+ false, true)
+
+static const char GlobalPassName[] = "Stack Safety Analysis";
+INITIALIZE_PASS_BEGIN(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE,
+ GlobalPassName, false, false)
+INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass)
+INITIALIZE_PASS_END(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE,
+ GlobalPassName, false, false)
diff --git a/contrib/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
new file mode 100644
index 000000000000..e1a7e4476d12
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -0,0 +1,380 @@
+//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
+//--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an algorithm that returns for a divergent branch
+// the set of basic blocks whose phi nodes become divergent due to divergent
+// control. These are the blocks that are reachable by two disjoint paths from
+// the branch or loop exits that have a reaching path that is disjoint from a
+// path to the loop latch.
+//
+// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
+// control-induced divergence in phi nodes.
+//
+// -- Summary --
+// The SyncDependenceAnalysis lazily computes sync dependences [3].
+// The analysis evaluates the disjoint path criterion [2] by a reduction
+// to SSA construction. The SSA construction algorithm is implemented as
+// a simple data-flow analysis [1].
+//
+// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
+// [2] "Efficiently Computing Static Single Assignment Form
+// and the Control Dependence Graph", TOPLAS '91,
+// Cytron, Ferrante, Rosen, Wegman and Zadeck
+// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
+// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
+//
+// -- Sync dependence --
+// Sync dependence [4] characterizes the control flow aspect of the
+// propagation of branch divergence. For example,
+//
+// %cond = icmp slt i32 %tid, 10
+// br i1 %cond, label %then, label %else
+// then:
+// br label %merge
+// else:
+// br label %merge
+// merge:
+// %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// -- Reduction to SSA construction --
+// There are two disjoint paths from A to X, if a certain variant of SSA
+// construction places a phi node in X under the following set-up scheme [2].
+//
+// This variant of SSA construction ignores incoming undef values.
+// That is paths from the entry without a definition do not result in
+// phi nodes.
+//
+// entry
+// / \
+// A \
+// / \ Y
+// B C /
+// \ / \ /
+// D E
+// \ /
+// F
+// Assume that A contains a divergent branch. We are interested
+// in the set of all blocks where each block is reachable from A
+// via two disjoint paths. This would be the set {D, F} in this
+// case.
+// To generally reduce this query to SSA construction we introduce
+// a virtual variable x and assign to x different values in each
+// successor block of A.
+// entry
+// / \
+// A \
+// / \ Y
+// x = 0 x = 1 /
+// \ / \ /
+// D E
+// \ /
+// F
+// Our flavor of SSA construction for x will construct the following
+// entry
+// / \
+// A \
+// / \ Y
+// x0 = 0 x1 = 1 /
+// \ / \ /
+// x2=phi E
+// \ /
+// x3=phi
+// The blocks D and F contain phi nodes and are thus each reachable
+// by two disjoins paths from A.
+//
+// -- Remarks --
+// In case of loop exits we need to check the disjoint path criterion for loops
+// [2]. To this end, we check whether the definition of x differs between the
+// loop exit and the loop header (_after_ SSA construction).
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/SyncDependenceAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+
+#include <stack>
+#include <unordered_set>
+
+#define DEBUG_TYPE "sync-dependence"
+
+namespace llvm {
+
+ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
+
+SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ const LoopInfo &LI)
+ : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
+
+SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
+
+using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
+
+// divergence propagator for reducible CFGs
+struct DivergencePropagator {
+ const FunctionRPOT &FuncRPOT;
+ const DominatorTree &DT;
+ const PostDominatorTree &PDT;
+ const LoopInfo &LI;
+
+ // identified join points
+ std::unique_ptr<ConstBlockSet> JoinBlocks;
+
+ // reached loop exits (by a path disjoint to a path to the loop header)
+ SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
+
+ // if DefMap[B] == C then C is the dominating definition at block B
+ // if DefMap[B] ~ undef then we haven't seen B yet
+ // if DefMap[B] == B then B is a join point of disjoint paths from X or B is
+ // an immediate successor of X (initial value).
+ using DefiningBlockMap = std::map<const BasicBlock *, const BasicBlock *>;
+ DefiningBlockMap DefMap;
+
+ // all blocks with pending visits
+ std::unordered_set<const BasicBlock *> PendingUpdates;
+
+ DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
+ const PostDominatorTree &PDT, const LoopInfo &LI)
+ : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
+ JoinBlocks(new ConstBlockSet) {}
+
+ // set the definition at @block and mark @block as pending for a visit
+ void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
+ bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
+ if (WasAdded)
+ PendingUpdates.insert(&Block);
+ }
+
+ void printDefs(raw_ostream &Out) {
+ Out << "Propagator::DefMap {\n";
+ for (const auto *Block : FuncRPOT) {
+ auto It = DefMap.find(Block);
+ Out << Block->getName() << " : ";
+ if (It == DefMap.end()) {
+ Out << "\n";
+ } else {
+ const auto *DefBlock = It->second;
+ Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
+ }
+ }
+ Out << "}\n";
+ }
+
+ // process @succBlock with reaching definition @defBlock
+ // the original divergent branch was in @parentLoop (if any)
+ void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
+ const BasicBlock &DefBlock) {
+
+ // @succBlock is a loop exit
+ if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
+ DefMap.emplace(&SuccBlock, &DefBlock);
+ ReachedLoopExits.insert(&SuccBlock);
+ return;
+ }
+
+ // first reaching def?
+ auto ItLastDef = DefMap.find(&SuccBlock);
+ if (ItLastDef == DefMap.end()) {
+ addPending(SuccBlock, DefBlock);
+ return;
+ }
+
+ // a join of at least two definitions
+ if (ItLastDef->second != &DefBlock) {
+ // do we know this join already?
+ if (!JoinBlocks->insert(&SuccBlock).second)
+ return;
+
+ // update the definition
+ addPending(SuccBlock, SuccBlock);
+ }
+ }
+
+ // find all blocks reachable by two disjoint paths from @rootTerm.
+ // This method works for both divergent terminators and loops with
+ // divergent exits.
+ // @rootBlock is either the block containing the branch or the header of the
+ // divergent loop.
+ // @nodeSuccessors is the set of successors of the node (Loop or Terminator)
+ // headed by @rootBlock.
+ // @parentLoop is the parent loop of the Loop or the loop that contains the
+ // Terminator.
+ template <typename SuccessorIterable>
+ std::unique_ptr<ConstBlockSet>
+ computeJoinPoints(const BasicBlock &RootBlock,
+ SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
+ assert(JoinBlocks);
+
+ // immediate post dominator (no join block beyond that block)
+ const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
+ const auto *IpdNode = PdNode->getIDom();
+ const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+
+ // bootstrap with branch targets
+ for (const auto *SuccBlock : NodeSuccessors) {
+ DefMap.emplace(SuccBlock, SuccBlock);
+
+ if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
+ // immediate loop exit from node.
+ ReachedLoopExits.insert(SuccBlock);
+ continue;
+ } else {
+ // regular successor
+ PendingUpdates.insert(SuccBlock);
+ }
+ }
+
+ auto ItBeginRPO = FuncRPOT.begin();
+
+ // skip until term (TODO RPOT won't let us start at @term directly)
+ for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {}
+
+ auto ItEndRPO = FuncRPOT.end();
+ assert(ItBeginRPO != ItEndRPO);
+
+ // propagate definitions at the immediate successors of the node in RPO
+ auto ItBlockRPO = ItBeginRPO;
+ while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
+ const auto *Block = *ItBlockRPO;
+
+ // skip @block if not pending update
+ auto ItPending = PendingUpdates.find(Block);
+ if (ItPending == PendingUpdates.end())
+ continue;
+ PendingUpdates.erase(ItPending);
+
+ // propagate definition at @block to its successors
+ auto ItDef = DefMap.find(Block);
+ const auto *DefBlock = ItDef->second;
+ assert(DefBlock);
+
+ auto *BlockLoop = LI.getLoopFor(Block);
+ if (ParentLoop &&
+ (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
+ // if the successor is the header of a nested loop pretend its a
+ // single node with the loop's exits as successors
+ SmallVector<BasicBlock *, 4> BlockLoopExits;
+ BlockLoop->getExitBlocks(BlockLoopExits);
+ for (const auto *BlockLoopExit : BlockLoopExits) {
+ visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
+ }
+
+ } else {
+ // the successors are either on the same loop level or loop exits
+ for (const auto *SuccBlock : successors(Block)) {
+ visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
+ }
+ }
+ }
+
+ // We need to know the definition at the parent loop header to decide
+ // whether the definition at the header is different from the definition at
+ // the loop exits, which would indicate a divergent loop exits.
+ //
+ // A // loop header
+ // |
+ // B // nested loop header
+ // |
+ // C -> X (exit from B loop) -..-> (A latch)
+ // |
+ // D -> back to B (B latch)
+ // |
+ // proper exit from both loops
+ //
+ // D post-dominates B as it is the only proper exit from the "A loop".
+ // If C has a divergent branch, propagation will therefore stop at D.
+ // That implies that B will never receive a definition.
+ // But that definition can only be the same as at D (D itself in thise case)
+ // because all paths to anywhere have to pass through D.
+ //
+ const BasicBlock *ParentLoopHeader =
+ ParentLoop ? ParentLoop->getHeader() : nullptr;
+ if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
+ DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
+ }
+
+ // analyze reached loop exits
+ if (!ReachedLoopExits.empty()) {
+ assert(ParentLoop);
+ const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
+ LLVM_DEBUG(printDefs(dbgs()));
+ assert(HeaderDefBlock && "no definition in header of carrying loop");
+
+ for (const auto *ExitBlock : ReachedLoopExits) {
+ auto ItExitDef = DefMap.find(ExitBlock);
+ assert((ItExitDef != DefMap.end()) &&
+ "no reaching def at reachable loop exit");
+ if (ItExitDef->second != HeaderDefBlock) {
+ JoinBlocks->insert(ExitBlock);
+ }
+ }
+ }
+
+ return std::move(JoinBlocks);
+ }
+};
+
+const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
+ using LoopExitVec = SmallVector<BasicBlock *, 4>;
+ LoopExitVec LoopExits;
+ Loop.getExitBlocks(LoopExits);
+ if (LoopExits.size() < 1) {
+ return EmptyBlockSet;
+ }
+
+ // already available in cache?
+ auto ItCached = CachedLoopExitJoins.find(&Loop);
+ if (ItCached != CachedLoopExitJoins.end())
+ return *ItCached->second;
+
+ // compute all join points
+ DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
+ auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
+ *Loop.getHeader(), LoopExits, Loop.getParentLoop());
+
+ auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
+ assert(ItInserted.second);
+ return *ItInserted.first->second;
+}
+
+const ConstBlockSet &
+SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
+ // trivial case
+ if (Term.getNumSuccessors() < 1) {
+ return EmptyBlockSet;
+ }
+
+ // already available in cache?
+ auto ItCached = CachedBranchJoins.find(&Term);
+ if (ItCached != CachedBranchJoins.end())
+ return *ItCached->second;
+
+ // compute all join points
+ DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
+ const auto &TermBlock = *Term.getParent();
+ auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
+ TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+
+ auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
+ assert(ItInserted.second);
+ return *ItInserted.first->second;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp
index b085fa274d7f..c2d7bb11a4cf 100644
--- a/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp
@@ -14,22 +14,21 @@
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
using namespace llvm;
// Given an SCC, propagate entry counts along the edge of the SCC nodes.
template <typename CallGraphType>
void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
- const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
- AddCountTy AddCount) {
+ const SccTy &SCC, GetProfCountTy GetProfCount, AddCountTy AddCount) {
- SmallPtrSet<NodeRef, 8> SCCNodes;
+ DenseSet<NodeRef> SCCNodes;
SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges;
for (auto &Node : SCC)
@@ -54,17 +53,13 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
// This ensures that the order of
// traversal of nodes within the SCC doesn't affect the final result.
- DenseMap<NodeRef, uint64_t> AdditionalCounts;
+ DenseMap<NodeRef, Scaled64> AdditionalCounts;
for (auto &E : SCCEdges) {
- auto OptRelFreq = GetRelBBFreq(E.second);
- if (!OptRelFreq)
+ auto OptProfCount = GetProfCount(E.first, E.second);
+ if (!OptProfCount)
continue;
- Scaled64 RelFreq = OptRelFreq.getValue();
- auto Caller = E.first;
auto Callee = CGT::edge_dest(E.second);
- RelFreq *= Scaled64(GetCount(Caller), 0);
- uint64_t AdditionalCount = RelFreq.toInt<uint64_t>();
- AdditionalCounts[Callee] += AdditionalCount;
+ AdditionalCounts[Callee] += OptProfCount.getValue();
}
// Update the counts for the nodes in the SCC.
@@ -73,14 +68,11 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
// Now update the counts for nodes outside the SCC.
for (auto &E : NonSCCEdges) {
- auto OptRelFreq = GetRelBBFreq(E.second);
- if (!OptRelFreq)
+ auto OptProfCount = GetProfCount(E.first, E.second);
+ if (!OptProfCount)
continue;
- Scaled64 RelFreq = OptRelFreq.getValue();
- auto Caller = E.first;
auto Callee = CGT::edge_dest(E.second);
- RelFreq *= Scaled64(GetCount(Caller), 0);
- AddCount(Callee, RelFreq.toInt<uint64_t>());
+ AddCount(Callee, OptProfCount.getValue());
}
}
@@ -94,8 +86,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
template <typename CallGraphType>
void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
- GetRelBBFreqTy GetRelBBFreq,
- GetCountTy GetCount,
+ GetProfCountTy GetProfCount,
AddCountTy AddCount) {
std::vector<SccTy> SCCs;
@@ -107,7 +98,8 @@ void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
// The scc iterator returns the scc in bottom-up order, so reverse the SCCs
// and call propagateFromSCC.
for (auto &SCC : reverse(SCCs))
- propagateFromSCC(SCC, GetRelBBFreq, GetCount, AddCount);
+ propagateFromSCC(SCC, GetProfCount, AddCount);
}
template class llvm::SyntheticCountsUtils<const CallGraph *>;
+template class llvm::SyntheticCountsUtils<ModuleSummaryIndex *>;
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 102135fbf313..4643f75da42d 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -413,17 +413,17 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_flsll);
}
- // The following functions are available on Linux,
- // but Android uses bionic instead of glibc.
- if (!T.isOSLinux() || T.isAndroid()) {
+ // The following functions are only available on GNU/Linux (using glibc).
+ // Linux variants without glibc (eg: bionic, musl) may have some subset.
+ if (!T.isOSLinux() || !T.isGNUEnvironment()) {
TLI.setUnavailable(LibFunc_dunder_strdup);
TLI.setUnavailable(LibFunc_dunder_strtok_r);
TLI.setUnavailable(LibFunc_dunder_isoc99_scanf);
TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf);
TLI.setUnavailable(LibFunc_under_IO_getc);
TLI.setUnavailable(LibFunc_under_IO_putc);
- // But, Android has memalign.
- if (!T.isAndroid())
+ // But, Android and musl have memalign.
+ if (!T.isAndroid() && !T.isMusl())
TLI.setUnavailable(LibFunc_memalign);
TLI.setUnavailable(LibFunc_fopen64);
TLI.setUnavailable(LibFunc_fseeko64);
@@ -613,6 +613,24 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
unsigned NumParams = FTy.getNumParams();
switch (F) {
+ case LibFunc_execl:
+ case LibFunc_execlp:
+ case LibFunc_execle:
+ return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32));
+ case LibFunc_execv:
+ case LibFunc_execvp:
+ return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32));
+ case LibFunc_execvP:
+ case LibFunc_execvpe:
+ case LibFunc_execve:
+ return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getParamType(2)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32));
case LibFunc_strlen:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy());
@@ -863,6 +881,8 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
+ case LibFunc_fork:
+ return (NumParams == 0 && FTy.getReturnType()->isIntegerTy(32));
case LibFunc_fdopen:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
@@ -1399,10 +1419,10 @@ static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
- llvm::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
+ llvm::sort(VectorDescs, compareByScalarFnName);
ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
- llvm::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
+ llvm::sort(ScalarDescs, compareByVectorFnName);
}
void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index 7233a86e5daf..9151d46c6cce 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -268,6 +268,10 @@ bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
return TTIImpl->enableInterleavedAccessVectorization();
}
+bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
+ return TTIImpl->enableMaskedInterleavedAccessVectorization();
+}
+
bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
return TTIImpl->isFPVectorizationPotentiallyUnsafe();
}
@@ -384,6 +388,55 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
+TargetTransformInfo::OperandValueKind
+TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
+ OperandValueKind OpInfo = OK_AnyValue;
+ OpProps = OP_None;
+
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().isPowerOf2())
+ OpProps = OP_PowerOf2;
+ return OK_UniformConstantValue;
+ }
+
+ // A broadcast shuffle creates a uniform value.
+ // TODO: Add support for non-zero index broadcasts.
+ // TODO: Add support for different source vector width.
+ if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
+ if (ShuffleInst->isZeroEltSplat())
+ OpInfo = OK_UniformValue;
+
+ const Value *Splat = getSplatValue(V);
+
+ // Check for a splat of a constant or for a non uniform vector of constants
+ // and check if the constant(s) are all powers of two.
+ if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
+ OpInfo = OK_NonUniformConstantValue;
+ if (Splat) {
+ OpInfo = OK_UniformConstantValue;
+ if (auto *CI = dyn_cast<ConstantInt>(Splat))
+ if (CI->getValue().isPowerOf2())
+ OpProps = OP_PowerOf2;
+ } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
+ OpProps = OP_PowerOf2;
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
+ if (CI->getValue().isPowerOf2())
+ continue;
+ OpProps = OP_None;
+ break;
+ }
+ }
+ }
+
+ // Check for a splat of a uniform value. This is not loop aware, so return
+ // true only for the obviously uniform cases (argument, globalvalue)
+ if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
+ OpInfo = OK_UniformValue;
+
+ return OpInfo;
+}
+
int TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
@@ -472,9 +525,12 @@ int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
int TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace) const {
+ unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+ bool UseMaskForGaps) const {
int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond,
+ UseMaskForGaps);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -569,6 +625,12 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
return TTIImpl->areInlineCompatible(Caller, Callee);
}
+bool TargetTransformInfo::areFunctionArgsABICompatible(
+ const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const {
+ return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
+}
+
bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
Type *Ty) const {
return TTIImpl->isIndexedLoadLegal(Mode, Ty);
@@ -630,49 +692,6 @@ int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
-static TargetTransformInfo::OperandValueKind
-getOperandInfo(Value *V, TargetTransformInfo::OperandValueProperties &OpProps) {
- TargetTransformInfo::OperandValueKind OpInfo =
- TargetTransformInfo::OK_AnyValue;
- OpProps = TargetTransformInfo::OP_None;
-
- if (auto *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->getValue().isPowerOf2())
- OpProps = TargetTransformInfo::OP_PowerOf2;
- return TargetTransformInfo::OK_UniformConstantValue;
- }
-
- const Value *Splat = getSplatValue(V);
-
- // Check for a splat of a constant or for a non uniform vector of constants
- // and check if the constant(s) are all powers of two.
- if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
- OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
- if (Splat) {
- OpInfo = TargetTransformInfo::OK_UniformConstantValue;
- if (auto *CI = dyn_cast<ConstantInt>(Splat))
- if (CI->getValue().isPowerOf2())
- OpProps = TargetTransformInfo::OP_PowerOf2;
- } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
- OpProps = TargetTransformInfo::OP_PowerOf2;
- for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
- if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
- if (CI->getValue().isPowerOf2())
- continue;
- OpProps = TargetTransformInfo::OP_None;
- break;
- }
- }
- }
-
- // Check for a splat of a uniform value. This is not loop aware, so return
- // true only for the obviously uniform cases (argument, globalvalue)
- if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
- OpInfo = TargetTransformInfo::OK_UniformValue;
-
- return OpInfo;
-}
-
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
unsigned Level) {
// We don't need a shuffle if we just want to have element 0 in position 0 of
@@ -1101,14 +1120,20 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
}
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
- // TODO: Identify and add costs for insert/extract subvector, etc.
+ Type *Ty = Shuffle->getType();
+ Type *SrcTy = Shuffle->getOperand(0)->getType();
+
+ // TODO: Identify and add costs for insert subvector, etc.
+ int SubIndex;
+ if (Shuffle->isExtractSubvectorMask(SubIndex))
+ return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
+
if (Shuffle->changesLength())
return -1;
if (Shuffle->isIdentity())
return 0;
- Type *Ty = Shuffle->getType();
if (Shuffle->isReverse())
return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 25a154edf4ac..83974da30a54 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -399,20 +399,20 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
}
FunctionModRefBehavior
-TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
+TypeBasedAAResult::getModRefBehavior(const CallBase *Call) {
if (!EnableTBAA)
- return AAResultBase::getModRefBehavior(CS);
+ return AAResultBase::getModRefBehavior(Call);
FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
- if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
(isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
Min = FMRB_OnlyReadsMemory;
- return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min);
}
FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
@@ -420,33 +420,30 @@ FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
return AAResultBase::getModRefBehavior(F);
}
-ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
if (const MDNode *L = Loc.AATags.TBAA)
- if (const MDNode *M =
- CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
}
-ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
- if (const MDNode *M1 =
- CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
- if (const MDNode *M2 =
- CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
}
bool MDNode::isTBAAVtableAccess() const {
diff --git a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 6871e4887c9e..bd13a43b8d46 100644
--- a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
@@ -22,11 +23,21 @@ using namespace llvm;
// Search for virtual calls that call FPtr and add them to DevirtCalls.
static void
findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- bool *HasNonCallUses, Value *FPtr, uint64_t Offset) {
+ bool *HasNonCallUses, Value *FPtr, uint64_t Offset,
+ const CallInst *CI, DominatorTree &DT) {
for (const Use &U : FPtr->uses()) {
- Value *User = U.getUser();
+ Instruction *User = cast<Instruction>(U.getUser());
+ // Ignore this instruction if it is not dominated by the type intrinsic
+ // being analyzed. Otherwise we may transform a call sharing the same
+ // vtable pointer incorrectly. Specifically, this situation can arise
+ // after indirect call promotion and inlining, where we may have uses
+ // of the vtable pointer guarded by a function pointer check, and a fallback
+ // indirect call.
+ if (!DT.dominates(CI, User))
+ continue;
if (isa<BitCastInst>(User)) {
- findCallsAtConstantOffset(DevirtCalls, HasNonCallUses, User, Offset);
+ findCallsAtConstantOffset(DevirtCalls, HasNonCallUses, User, Offset, CI,
+ DT);
} else if (auto CI = dyn_cast<CallInst>(User)) {
DevirtCalls.push_back({Offset, CI});
} else if (auto II = dyn_cast<InvokeInst>(User)) {
@@ -38,23 +49,23 @@ findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
}
// Search for virtual calls that load from VPtr and add them to DevirtCalls.
-static void
-findLoadCallsAtConstantOffset(const Module *M,
- SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- Value *VPtr, int64_t Offset) {
+static void findLoadCallsAtConstantOffset(
+ const Module *M, SmallVectorImpl<DevirtCallSite> &DevirtCalls, Value *VPtr,
+ int64_t Offset, const CallInst *CI, DominatorTree &DT) {
for (const Use &U : VPtr->uses()) {
Value *User = U.getUser();
if (isa<BitCastInst>(User)) {
- findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset);
+ findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset, CI, DT);
} else if (isa<LoadInst>(User)) {
- findCallsAtConstantOffset(DevirtCalls, nullptr, User, Offset);
+ findCallsAtConstantOffset(DevirtCalls, nullptr, User, Offset, CI, DT);
} else if (auto GEP = dyn_cast<GetElementPtrInst>(User)) {
// Take into account the GEP offset.
if (VPtr == GEP->getPointerOperand() && GEP->hasAllConstantIndices()) {
SmallVector<Value *, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
int64_t GEPOffset = M->getDataLayout().getIndexedOffsetInType(
GEP->getSourceElementType(), Indices);
- findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset + GEPOffset);
+ findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset + GEPOffset,
+ CI, DT);
}
}
}
@@ -62,7 +73,8 @@ findLoadCallsAtConstantOffset(const Module *M,
void llvm::findDevirtualizableCallsForTypeTest(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI) {
+ SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI,
+ DominatorTree &DT) {
assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test);
const Module *M = CI->getParent()->getParent()->getParent();
@@ -79,15 +91,15 @@ void llvm::findDevirtualizableCallsForTypeTest(
// If we found any, search for virtual calls based on %p and add them to
// DevirtCalls.
if (!Assumes.empty())
- findLoadCallsAtConstantOffset(M, DevirtCalls,
- CI->getArgOperand(0)->stripPointerCasts(), 0);
+ findLoadCallsAtConstantOffset(
+ M, DevirtCalls, CI->getArgOperand(0)->stripPointerCasts(), 0, CI, DT);
}
void llvm::findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
SmallVectorImpl<Instruction *> &LoadedPtrs,
SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
- const CallInst *CI) {
+ const CallInst *CI, DominatorTree &DT) {
assert(CI->getCalledFunction()->getIntrinsicID() ==
Intrinsic::type_checked_load);
@@ -114,5 +126,5 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
for (Value *LoadedPtr : LoadedPtrs)
findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr,
- Offset->getZExtValue());
+ Offset->getZExtValue(), CI, DT);
}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index edd46c5fe362..0446426c0e66 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -118,14 +119,18 @@ struct Query {
/// (all of which can call computeKnownBits), and so on.
std::array<const Value *, MaxDepth> Excluded;
+ /// If true, it is safe to use metadata during simplification.
+ InstrInfoQuery IIQ;
+
unsigned NumExcluded = 0;
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
- : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {}
+ const DominatorTree *DT, bool UseInstrInfo,
+ OptimizationRemarkEmitter *ORE = nullptr)
+ : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
Query(const Query &Q, const Value *NewExcl)
- : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
+ : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ),
NumExcluded(Q.NumExcluded) {
Excluded = Q.Excluded;
Excluded[NumExcluded++] = NewExcl;
@@ -165,9 +170,9 @@ void llvm::computeKnownBits(const Value *V, KnownBits &Known,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
::computeKnownBits(V, Known, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
+ Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
}
static KnownBits computeKnownBits(const Value *V, unsigned Depth,
@@ -177,15 +182,16 @@ KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE) {
- return ::computeKnownBits(V, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
+ OptimizationRemarkEmitter *ORE,
+ bool UseInstrInfo) {
+ return ::computeKnownBits(
+ V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
}
bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
+ const DataLayout &DL, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
assert(LHS->getType() == RHS->getType() &&
"LHS and RHS should have the same type");
assert(LHS->getType()->isIntOrIntVectorTy() &&
@@ -201,8 +207,8 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
KnownBits LHSKnown(IT->getBitWidth());
KnownBits RHSKnown(IT->getBitWidth());
- computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT);
- computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT);
+ computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
+ computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
}
@@ -222,69 +228,71 @@ static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const Query &Q);
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
- bool OrZero,
- unsigned Depth, AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT));
+ bool OrZero, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::isKnownToBeAPowerOfTwo(
+ V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::isKnownNonZero(V, Depth,
+ Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
- unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
+ KnownBits Known =
+ computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
return Known.isNonNegative();
}
bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
+ const DominatorTree *DT, bool UseInstrInfo) {
if (auto *CI = dyn_cast<ConstantInt>(V))
return CI->getValue().isStrictlyPositive();
// TODO: We'd doing two recursive queries here. We should factor this such
// that only a single query is needed.
- return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) &&
- isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
+ return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) &&
+ isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
}
bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+ const DominatorTree *DT, bool UseInstrInfo) {
+ KnownBits Known =
+ computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
return Known.isNegative();
}
static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
- const DataLayout &DL,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::isKnownNonEqual(V1, V2, Query(DL, AC,
- safeCxtI(V1, safeCxtI(V2, CxtI)),
- DT));
+ const DataLayout &DL, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
+ return ::isKnownNonEqual(V1, V2,
+ Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT,
+ UseInstrInfo, /*ORE=*/nullptr));
}
static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
const Query &Q);
bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
- const DataLayout &DL,
- unsigned Depth, AssumptionCache *AC,
- const Instruction *CxtI, const DominatorTree *DT) {
- return ::MaskedValueIsZero(V, Mask, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT));
+ const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::MaskedValueIsZero(
+ V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
@@ -293,8 +301,9 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::ComputeNumSignBits(
+ V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
@@ -965,7 +974,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
switch (I->getOpcode()) {
default: break;
case Instruction::Load:
- if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
+ if (MDNode *MD =
+ Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
computeKnownBitsFromRangeMetadata(*MD, Known);
break;
case Instruction::And: {
@@ -1014,7 +1024,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
break;
}
case Instruction::Mul: {
- bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known,
Known2, Depth, Q);
break;
@@ -1082,7 +1092,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// RHS from matchSelectPattern returns the negation part of abs pattern.
// If the negate has an NSW flag we can assume the sign bit of the result
// will be 0 because that makes abs(INT_MIN) undefined.
- if (cast<Instruction>(RHS)->hasNoSignedWrap())
+ if (Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
MaxHighZeros = 1;
}
@@ -1151,7 +1161,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
}
case Instruction::Shl: {
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
- bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
APInt KZResult = KnownZero << ShiftAmt;
KZResult.setLowBits(ShiftAmt); // Low bits known 0.
@@ -1202,13 +1212,13 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
break;
}
case Instruction::Sub: {
- bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
Known, Known2, Depth, Q);
break;
}
case Instruction::Add: {
- bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
Known, Known2, Depth, Q);
break;
@@ -1369,7 +1379,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Known3.countMinTrailingZeros()));
auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
- if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
+ if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) {
// If initial value of recurrence is nonnegative, and we are adding
// a nonnegative number with nsw, the result can only be nonnegative
// or poison value regardless of the number of times we execute the
@@ -1442,7 +1452,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// If range metadata is attached to this call, set known bits from that,
// and then intersect with known bits based on other properties of the
// function.
- if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
+ if (MDNode *MD =
+ Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
computeKnownBitsFromRangeMetadata(*MD, Known);
if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
computeKnownBits(RV, Known2, Depth + 1, Q);
@@ -1495,6 +1506,27 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// of bits which might be set provided by popcnt KnownOne2.
break;
}
+ case Intrinsic::fshr:
+ case Intrinsic::fshl: {
+ const APInt *SA;
+ if (!match(I->getOperand(2), m_APInt(SA)))
+ break;
+
+ // Normalize to funnel shift left.
+ uint64_t ShiftAmt = SA->urem(BitWidth);
+ if (II->getIntrinsicID() == Intrinsic::fshr)
+ ShiftAmt = BitWidth - ShiftAmt;
+
+ KnownBits Known3(Known);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);
+
+ Known.Zero =
+ Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
+ Known.One =
+ Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
+ break;
+ }
case Intrinsic::x86_sse42_crc32_64_64:
Known.Zero.setBitsFrom(32);
break;
@@ -1722,7 +1754,8 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
// either the original power-of-two, a larger power-of-two or zero.
if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
- if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
+ if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
+ Q.IIQ.hasNoSignedWrap(VOBO)) {
if (match(X, m_And(m_Specific(Y), m_Value())) ||
match(X, m_And(m_Value(), m_Specific(Y))))
if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
@@ -1860,19 +1893,41 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
(Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE))
continue;
+ SmallVector<const User *, 4> WorkList;
+ SmallPtrSet<const User *, 4> Visited;
for (auto *CmpU : U->users()) {
- if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) {
- assert(BI->isConditional() && "uses a comparison!");
+ assert(WorkList.empty() && "Should be!");
+ if (Visited.insert(CmpU).second)
+ WorkList.push_back(CmpU);
+
+ while (!WorkList.empty()) {
+ auto *Curr = WorkList.pop_back_val();
+
+ // If a user is an AND, add all its users to the work list. We only
+ // propagate "pred != null" condition through AND because it is only
+ // correct to assume that all conditions of AND are met in true branch.
+ // TODO: Support similar logic of OR and EQ predicate?
+ if (Pred == ICmpInst::ICMP_NE)
+ if (auto *BO = dyn_cast<BinaryOperator>(Curr))
+ if (BO->getOpcode() == Instruction::And) {
+ for (auto *BOU : BO->users())
+ if (Visited.insert(BOU).second)
+ WorkList.push_back(BOU);
+ continue;
+ }
- BasicBlock *NonNullSuccessor =
- BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
- BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
- if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+ if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
+ assert(BI->isConditional() && "uses a comparison!");
+
+ BasicBlock *NonNullSuccessor =
+ BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
+ BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
+ if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+ return true;
+ } else if (Pred == ICmpInst::ICMP_NE && isGuard(Curr) &&
+ DT->dominates(cast<Instruction>(Curr), CtxI)) {
return true;
- } else if (Pred == ICmpInst::ICMP_NE &&
- match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) &&
- DT->dominates(cast<Instruction>(CmpU), CtxI)) {
- return true;
+ }
}
}
}
@@ -1937,7 +1992,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
if (auto *I = dyn_cast<Instruction>(V)) {
- if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) {
+ if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) {
// If the possible ranges don't contain zero, then the value is
// definitely non-zero.
if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
@@ -1965,13 +2020,13 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
// A Load tagged with nonnull metadata is never null.
if (const LoadInst *LI = dyn_cast<LoadInst>(V))
- if (LI->getMetadata(LLVMContext::MD_nonnull))
+ if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull))
return true;
- if (auto CS = ImmutableCallSite(V)) {
- if (CS.isReturnNonNull())
+ if (const auto *Call = dyn_cast<CallBase>(V)) {
+ if (Call->isReturnNonNull())
return true;
- if (const auto *RP = getArgumentAliasingToReturnedPointer(CS))
+ if (const auto *RP = getArgumentAliasingToReturnedPointer(Call))
return isKnownNonZero(RP, Depth, Q);
}
}
@@ -2003,7 +2058,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
// shl nuw can't remove any non-zero bits.
const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
- if (BO->hasNoUnsignedWrap())
+ if (Q.IIQ.hasNoUnsignedWrap(BO))
return isKnownNonZero(X, Depth, Q);
KnownBits Known(BitWidth);
@@ -2078,7 +2133,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
- if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
+ if ((Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) &&
isKnownNonZero(X, Depth, Q) && isKnownNonZero(Y, Depth, Q))
return true;
}
@@ -2100,7 +2155,8 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
if (!C->isZero() && !C->isNegative()) {
ConstantInt *X;
- if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
+ if (Q.IIQ.UseInstrInfo &&
+ (match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
!X->isNegative())
return true;
@@ -2174,6 +2230,36 @@ bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
return Mask.isSubsetOf(Known.Zero);
}
+// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
+// Returns the input and lower/upper bounds.
+static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
+ const APInt *&CLow, const APInt *&CHigh) {
+ assert(isa<Operator>(Select) &&
+ cast<Operator>(Select)->getOpcode() == Instruction::Select &&
+ "Input should be a Select!");
+
+ const Value *LHS, *RHS, *LHS2, *RHS2;
+ SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
+ if (SPF != SPF_SMAX && SPF != SPF_SMIN)
+ return false;
+
+ if (!match(RHS, m_APInt(CLow)))
+ return false;
+
+ SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
+ if (getInverseMinMaxFlavor(SPF) != SPF2)
+ return false;
+
+ if (!match(RHS2, m_APInt(CHigh)))
+ return false;
+
+ if (SPF == SPF_SMIN)
+ std::swap(CLow, CHigh);
+
+ In = LHS2;
+ return CLow->sle(*CHigh);
+}
+
/// For vector constants, loop over the elements and find the constant with the
/// minimum number of sign bits. Return 0 if the value is not a vector constant
/// or if any element was not analyzed; otherwise, return the count for the
@@ -2335,11 +2421,19 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
}
break;
- case Instruction::Select:
+ case Instruction::Select: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ const Value *X;
+ const APInt *CLow, *CHigh;
+ if (isSignedMinMaxClamp(U, X, CLow, CHigh))
+ return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+
Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
if (Tmp == 1) break;
Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
return std::min(Tmp, Tmp2);
+ }
case Instruction::Add:
// Add can have at most one carry bit. Thus we know that the output
@@ -2437,6 +2531,44 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// valid for all elements of the vector (for example if vector is sign
// extended, shifted, etc).
return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+ case Instruction::ShuffleVector: {
+ // TODO: This is copied almost directly from the SelectionDAG version of
+ // ComputeNumSignBits. It would be better if we could share common
+ // code. If not, make sure that changes are translated to the DAG.
+
+ // Collect the minimum number of sign bits that are shared by every vector
+ // element referenced by the shuffle.
+ auto *Shuf = cast<ShuffleVectorInst>(U);
+ int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
+ int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ for (int i = 0; i != NumMaskElts; ++i) {
+ int M = Shuf->getMaskValue(i);
+ assert(M < NumElts * 2 && "Invalid shuffle mask constant");
+ // For undef elements, we don't know anything about the common state of
+ // the shuffle result.
+ if (M == -1)
+ return 1;
+ if (M < NumElts)
+ DemandedLHS.setBit(M % NumElts);
+ else
+ DemandedRHS.setBit(M % NumElts);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedLHS)
+ Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
+ if (!!DemandedRHS) {
+ Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ // If we don't know anything, early out and try computeKnownBits fall-back.
+ if (Tmp == 1)
+ break;
+ assert(Tmp <= V->getType()->getScalarSizeInBits() &&
+ "Failed to determine minimum sign bits");
+ return Tmp;
+ }
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2722,6 +2854,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
break;
// sqrt(-0.0) = -0.0, no other negative results are possible.
case Intrinsic::sqrt:
+ case Intrinsic::canonicalize:
return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
// fabs(x) != -0.0
case Intrinsic::fabs:
@@ -2817,14 +2950,20 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
default:
break;
case Intrinsic::maxnum:
- return (isKnownNeverNaN(I->getOperand(0)) &&
+ return (isKnownNeverNaN(I->getOperand(0), TLI) &&
cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI,
SignBitOnly, Depth + 1)) ||
- (isKnownNeverNaN(I->getOperand(1)) &&
+ (isKnownNeverNaN(I->getOperand(1), TLI) &&
cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
SignBitOnly, Depth + 1));
+ case Intrinsic::maximum:
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+ Depth + 1) ||
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+ Depth + 1);
case Intrinsic::minnum:
+ case Intrinsic::minimum:
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
Depth + 1) &&
cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
@@ -2885,7 +3024,8 @@ bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
}
-bool llvm::isKnownNeverNaN(const Value *V) {
+bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
+ unsigned Depth) {
assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
// If we're told that NaNs won't happen, assume they won't.
@@ -2893,13 +3033,60 @@ bool llvm::isKnownNeverNaN(const Value *V) {
if (FPMathOp->hasNoNaNs())
return true;
- // TODO: Handle instructions and potentially recurse like other 'isKnown'
- // functions. For example, the result of sitofp is never NaN.
-
// Handle scalar constants.
if (auto *CFP = dyn_cast<ConstantFP>(V))
return !CFP->isNaN();
+ if (Depth == MaxDepth)
+ return false;
+
+ if (auto *Inst = dyn_cast<Instruction>(V)) {
+ switch (Inst->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FSub:
+ case Instruction::FDiv:
+ case Instruction::FRem: {
+ // TODO: Need isKnownNeverInfinity
+ return false;
+ }
+ case Instruction::Select: {
+ return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
+ isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1);
+ }
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ return true;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1);
+ default:
+ break;
+ }
+ }
+
+ if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::canonicalize:
+ case Intrinsic::fabs:
+ case Intrinsic::copysign:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1);
+ case Intrinsic::sqrt:
+ return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
+ CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
+ default:
+ return false;
+ }
+ }
+
// Bail out for constant expressions, but try to handle vector constants.
if (!V->getType()->isVectorTy() || !isa<Constant>(V))
return false;
@@ -2920,62 +3107,92 @@ bool llvm::isKnownNeverNaN(const Value *V) {
return true;
}
-/// If the specified value can be set by repeating the same byte in memory,
-/// return the i8 value that it is represented with. This is
-/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
-/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
-/// byte store (e.g. i16 0x1234), return null.
Value *llvm::isBytewiseValue(Value *V) {
+
// All byte-wide stores are splatable, even of arbitrary variables.
- if (V->getType()->isIntegerTy(8)) return V;
+ if (V->getType()->isIntegerTy(8))
+ return V;
+
+ LLVMContext &Ctx = V->getContext();
+
+ // Undef don't care.
+ auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
+ if (isa<UndefValue>(V))
+ return UndefInt8;
+
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C) {
+ // Conceptually, we could handle things like:
+ // %a = zext i8 %X to i16
+ // %b = shl i16 %a, 8
+ // %c = or i16 %a, %b
+ // but until there is an example that actually needs this, it doesn't seem
+ // worth worrying about.
+ return nullptr;
+ }
// Handle 'null' ConstantArrayZero etc.
- if (Constant *C = dyn_cast<Constant>(V))
- if (C->isNullValue())
- return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+ if (C->isNullValue())
+ return Constant::getNullValue(Type::getInt8Ty(Ctx));
- // Constant float and double values can be handled as integer values if the
+ // Constant floating-point values can be handled as integer values if the
// corresponding integer value is "byteable". An important case is 0.0.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
- if (CFP->getType()->isFloatTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
- if (CFP->getType()->isDoubleTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ Type *Ty = nullptr;
+ if (CFP->getType()->isHalfTy())
+ Ty = Type::getInt16Ty(Ctx);
+ else if (CFP->getType()->isFloatTy())
+ Ty = Type::getInt32Ty(Ctx);
+ else if (CFP->getType()->isDoubleTy())
+ Ty = Type::getInt64Ty(Ctx);
// Don't handle long double formats, which have strange constraints.
+ return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
}
// We can handle constant integers that are multiple of 8 bits.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
if (CI->getBitWidth() % 8 == 0) {
assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
-
if (!CI->getValue().isSplat(8))
return nullptr;
- return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
+ return ConstantInt::get(Ctx, CI->getValue().trunc(8));
}
}
- // A ConstantDataArray/Vector is splatable if all its members are equal and
- // also splatable.
- if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
- Value *Elt = CA->getElementAsConstant(0);
- Value *Val = isBytewiseValue(Elt);
- if (!Val)
+ auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
+ if (LHS == RHS)
+ return LHS;
+ if (!LHS || !RHS)
return nullptr;
+ if (LHS == UndefInt8)
+ return RHS;
+ if (RHS == UndefInt8)
+ return LHS;
+ return nullptr;
+ };
- for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
- if (CA->getElementAsConstant(I) != Elt)
+ if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
+ Value *Val = UndefInt8;
+ for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
+ if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
return nullptr;
+ return Val;
+ }
+
+ if (isa<ConstantVector>(C)) {
+ Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
+ return Splat ? isBytewiseValue(Splat) : nullptr;
+ }
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ Value *Val = UndefInt8;
+ for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
+ if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
+ return nullptr;
return Val;
}
- // Conceptually, we could handle things like:
- // %a = zext i8 %X to i16
- // %b = shl i16 %a, 8
- // %c = or i16 %a, %b
- // but until there is an example that actually needs this, it doesn't seem
- // worth worrying about.
+ // Don't try to handle the handful of other constants.
return nullptr;
}
@@ -3172,7 +3389,14 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
break;
- ByteOffset += GEPOffset.getSExtValue();
+ APInt OrigByteOffset(ByteOffset);
+ ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth());
+ if (ByteOffset.getMinSignedBits() > 64) {
+ // Stop traversal if the pointer offset wouldn't fit into int64_t
+ // (this should be removed if Offset is updated to an APInt)
+ ByteOffset = OrigByteOffset;
+ break;
+ }
Ptr = GEP->getPointerOperand();
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -3400,21 +3624,21 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
return Len == ~0ULL ? 1 : Len;
}
-const Value *llvm::getArgumentAliasingToReturnedPointer(ImmutableCallSite CS) {
- assert(CS &&
- "getArgumentAliasingToReturnedPointer only works on nonnull CallSite");
- if (const Value *RV = CS.getReturnedArgOperand())
+const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
+ assert(Call &&
+ "getArgumentAliasingToReturnedPointer only works on nonnull calls");
+ if (const Value *RV = Call->getReturnedArgOperand())
return RV;
// This can be used only as a aliasing property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS))
- return CS.getArgOperand(0);
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call))
+ return Call->getArgOperand(0);
return nullptr;
}
bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
- ImmutableCallSite CS) {
- return CS.getIntrinsicID() == Intrinsic::launder_invariant_group ||
- CS.getIntrinsicID() == Intrinsic::strip_invariant_group;
+ const CallBase *Call) {
+ return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
+ Call->getIntrinsicID() == Intrinsic::strip_invariant_group;
}
/// \p PN defines a loop-variant pointer to an object. Check if the
@@ -3462,7 +3686,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
// An alloca can't be further simplified.
return V;
} else {
- if (auto CS = CallSite(V)) {
+ if (auto *Call = dyn_cast<CallBase>(V)) {
// CaptureTracking can know about special capturing properties of some
// intrinsics like launder.invariant.group, that can't be expressed with
// the attributes, but have properties like returning aliasing pointer.
@@ -3472,7 +3696,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
- if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
V = RP;
continue;
}
@@ -3605,8 +3829,7 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
if (!II) return false;
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
+ if (!II->isLifetimeStartOrEnd())
return false;
}
return true;
@@ -3703,12 +3926,10 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
}
-OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
- const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
+OverflowResult llvm::computeOverflowForUnsignedMul(
+ const Value *LHS, const Value *RHS, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -3718,8 +3939,10 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
KnownBits LHSKnown(BitWidth);
KnownBits RHSKnown(BitWidth);
- computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
- computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
+ computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
+ UseInstrInfo);
+ computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
+ UseInstrInfo);
// Note that underestimating the number of zero bits gives a more
// conservative answer.
unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
@@ -3750,12 +3973,11 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
return OverflowResult::MayOverflow;
}
-OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
- const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
+OverflowResult
+llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
+ const DataLayout &DL, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT, bool UseInstrInfo) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -3784,33 +4006,33 @@ OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
// product is exactly the minimum negative number.
// E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
// For simplicity we just check if at least one side is not negative.
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
return OverflowResult::NeverOverflows;
}
return OverflowResult::MayOverflow;
}
-OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
- const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+OverflowResult llvm::computeOverflowForUnsignedAdd(
+ const Value *LHS, const Value *RHS, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
// The sign bit is set in both cases: this MUST overflow.
- // Create a simple add instruction, and insert it into the struct.
return OverflowResult::AlwaysOverflows;
}
if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
// The sign bit is clear in both cases: this CANNOT overflow.
- // Create a simple add instruction, and insert it into the struct.
return OverflowResult::NeverOverflows;
}
}
@@ -3927,11 +4149,18 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
- // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
- if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
- return OverflowResult::NeverOverflows;
+ if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+
+ // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
+ if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
+ return OverflowResult::NeverOverflows;
+
+ // If the LHS is non-negative and the RHS negative, we always wrap.
+ if (LHSKnown.isNonNegative() && RHSKnown.isNegative())
+ return OverflowResult::AlwaysOverflows;
+ }
return OverflowResult::MayOverflow;
}
@@ -4244,12 +4473,34 @@ static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
if (auto *C = dyn_cast<ConstantFP>(V))
return !C->isNaN();
+
+ if (auto *C = dyn_cast<ConstantDataVector>(V)) {
+ if (!C->getElementType()->isFloatingPointTy())
+ return false;
+ for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
+ if (C->getElementAsAPFloat(I).isNaN())
+ return false;
+ }
+ return true;
+ }
+
return false;
}
static bool isKnownNonZero(const Value *V) {
if (auto *C = dyn_cast<ConstantFP>(V))
return !C->isZero();
+
+ if (auto *C = dyn_cast<ConstantDataVector>(V)) {
+ if (!C->getElementType()->isFloatingPointTy())
+ return false;
+ for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
+ if (C->getElementAsAPFloat(I).isZero())
+ return false;
+ }
+ return true;
+ }
+
return false;
}
@@ -4541,6 +4792,27 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
Value *TrueVal, Value *FalseVal,
Value *&LHS, Value *&RHS,
unsigned Depth) {
+ if (CmpInst::isFPPredicate(Pred)) {
+ // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
+ // 0.0 operand, set the compare's 0.0 operands to that same value for the
+ // purpose of identifying min/max. Disregard vector constants with undefined
+ // elements because those can not be back-propagated for analysis.
+ Value *OutputZeroVal = nullptr;
+ if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) &&
+ !cast<Constant>(TrueVal)->containsUndefElement())
+ OutputZeroVal = TrueVal;
+ else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) &&
+ !cast<Constant>(FalseVal)->containsUndefElement())
+ OutputZeroVal = FalseVal;
+
+ if (OutputZeroVal) {
+ if (match(CmpLHS, m_AnyZeroFP()))
+ CmpLHS = OutputZeroVal;
+ if (match(CmpRHS, m_AnyZeroFP()))
+ CmpRHS = OutputZeroVal;
+ }
+ }
+
LHS = CmpLHS;
RHS = CmpRHS;
@@ -4970,21 +5242,16 @@ static bool isMatchingOps(const Value *ALHS, const Value *ARHS,
return IsMatchingOps || IsSwappedOps;
}
-/// Return true if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS BRHS" is
-/// true. Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS
-/// BRHS" is false. Otherwise, return None if we can't infer anything.
+/// Return true if "icmp1 APred X, Y" implies "icmp2 BPred X, Y" is true.
+/// Return false if "icmp1 APred X, Y" implies "icmp2 BPred X, Y" is false.
+/// Otherwise, return None if we can't infer anything.
static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
- const Value *ALHS,
- const Value *ARHS,
CmpInst::Predicate BPred,
- const Value *BLHS,
- const Value *BRHS,
- bool IsSwappedOps) {
- // Canonicalize the operands so they're matching.
- if (IsSwappedOps) {
- std::swap(BLHS, BRHS);
+ bool AreSwappedOps) {
+ // Canonicalize the predicate as if the operands were not commuted.
+ if (AreSwappedOps)
BPred = ICmpInst::getSwappedPredicate(BPred);
- }
+
if (CmpInst::isImpliedTrueByMatchingCmp(APred, BPred))
return true;
if (CmpInst::isImpliedFalseByMatchingCmp(APred, BPred))
@@ -4993,15 +5260,14 @@ static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
return None;
}
-/// Return true if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS C2" is
-/// true. Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS
-/// C2" is false. Otherwise, return None if we can't infer anything.
+/// Return true if "icmp APred X, C1" implies "icmp BPred X, C2" is true.
+/// Return false if "icmp APred X, C1" implies "icmp BPred X, C2" is false.
+/// Otherwise, return None if we can't infer anything.
static Optional<bool>
-isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
+isImpliedCondMatchingImmOperands(CmpInst::Predicate APred,
const ConstantInt *C1,
CmpInst::Predicate BPred,
- const Value *BLHS, const ConstantInt *C2) {
- assert(ALHS == BLHS && "LHS operands must match.");
+ const ConstantInt *C2) {
ConstantRange DomCR =
ConstantRange::makeExactICmpRegion(APred, C1->getValue());
ConstantRange CR =
@@ -5033,10 +5299,10 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
ICmpInst::Predicate BPred = RHS->getPredicate();
// Can we infer anything when the two compares have matching operands?
- bool IsSwappedOps;
- if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, IsSwappedOps)) {
+ bool AreSwappedOps;
+ if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, AreSwappedOps)) {
if (Optional<bool> Implication = isImpliedCondMatchingOperands(
- APred, ALHS, ARHS, BPred, BLHS, BRHS, IsSwappedOps))
+ APred, BPred, AreSwappedOps))
return Implication;
// No amount of additional analysis will infer the second condition, so
// early exit.
@@ -5047,8 +5313,7 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
// constants (not necessarily matching)?
if (ALHS == BLHS && isa<ConstantInt>(ARHS) && isa<ConstantInt>(BRHS)) {
if (Optional<bool> Implication = isImpliedCondMatchingImmOperands(
- APred, ALHS, cast<ConstantInt>(ARHS), BPred, BLHS,
- cast<ConstantInt>(BRHS)))
+ APred, cast<ConstantInt>(ARHS), BPred, cast<ConstantInt>(BRHS)))
return Implication;
// No amount of additional analysis will infer the second condition, so
// early exit.
@@ -5133,3 +5398,35 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
}
return None;
}
+
+Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
+ const Instruction *ContextI,
+ const DataLayout &DL) {
+ assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
+ if (!ContextI || !ContextI->getParent())
+ return None;
+
+ // TODO: This is a poor/cheap way to determine dominance. Should we use a
+ // dominator tree (eg, from a SimplifyQuery) instead?
+ const BasicBlock *ContextBB = ContextI->getParent();
+ const BasicBlock *PredBB = ContextBB->getSinglePredecessor();
+ if (!PredBB)
+ return None;
+
+ // We need a conditional branch in the predecessor.
+ Value *PredCond;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB)))
+ return None;
+
+ // The branch should get simplified. Don't bother simplifying this condition.
+ if (TrueBB == FalseBB)
+ return None;
+
+ assert((TrueBB == ContextBB || FalseBB == ContextBB) &&
+ "Predecessor block does not point to successor?");
+
+ // Is this condition implied by the predecessor condition?
+ bool CondIsTrue = TrueBB == ContextBB;
+ return isImpliedCondition(PredCond, Cond, DL, CondIsTrue);
+}
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
index d73d24736439..5656a19d7e0d 100644
--- a/contrib/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -25,16 +26,30 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#define DEBUG_TYPE "vectorutils"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-/// Identify if the intrinsic is trivially vectorizable.
-/// This method returns true if the intrinsic's argument types are all
-/// scalars for the scalar form of the intrinsic and all vectors for
-/// the vector form of the intrinsic.
+/// Maximum factor for an interleaved memory access.
+static cl::opt<unsigned> MaxInterleaveGroupFactor(
+ "max-interleave-group-factor", cl::Hidden,
+ cl::desc("Maximum factor for an interleaved access group (default = 8)"),
+ cl::init(8));
+
+/// Return true if all of the intrinsic's arguments and return type are scalars
+/// for the scalar form of the intrinsic and vectors for the vector form of the
+/// intrinsic.
bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::sqrt:
+ case Intrinsic::bswap: // Begin integer bit-manipulation.
+ case Intrinsic::bitreverse:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
@@ -45,6 +60,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::fabs:
case Intrinsic::minnum:
case Intrinsic::maxnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
@@ -52,15 +69,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
- case Intrinsic::bswap:
- case Intrinsic::bitreverse:
- case Intrinsic::ctpop:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
- case Intrinsic::ctlz:
- case Intrinsic::cttz:
case Intrinsic::powi:
+ case Intrinsic::canonicalize:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat:
return true;
default:
return false;
@@ -270,9 +287,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
}
// Extract a value from a vector add operation with a constant zero.
- Value *Val = nullptr; Constant *Con = nullptr;
- if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
- if (Constant *Elt = Con->getAggregateElement(EltNo))
+ // TODO: Use getBinOpIdentity() to generalize this.
+ Value *Val; Constant *C;
+ if (match(V, m_Add(m_Value(Val), m_Constant(C))))
+ if (Constant *Elt = C->getAggregateElement(EltNo))
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
@@ -450,16 +468,100 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
return MinBWs;
}
+/// Add all access groups in @p AccGroups to @p List.
+template <typename ListT>
+static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
+ // Interpret an access group as a list containing itself.
+ if (AccGroups->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
+ List.insert(AccGroups);
+ return;
+ }
+
+ for (auto &AccGroupListOp : AccGroups->operands()) {
+ auto *Item = cast<MDNode>(AccGroupListOp.get());
+ assert(isValidAsAccessGroup(Item) && "List item must be an access group");
+ List.insert(Item);
+ }
+}
+
+MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
+ if (!AccGroups1)
+ return AccGroups2;
+ if (!AccGroups2)
+ return AccGroups1;
+ if (AccGroups1 == AccGroups2)
+ return AccGroups1;
+
+ SmallSetVector<Metadata *, 4> Union;
+ addToAccessGroupList(Union, AccGroups1);
+ addToAccessGroupList(Union, AccGroups2);
+
+ if (Union.size() == 0)
+ return nullptr;
+ if (Union.size() == 1)
+ return cast<MDNode>(Union.front());
+
+ LLVMContext &Ctx = AccGroups1->getContext();
+ return MDNode::get(Ctx, Union.getArrayRef());
+}
+
+MDNode *llvm::intersectAccessGroups(const Instruction *Inst1,
+ const Instruction *Inst2) {
+ bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
+ bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
+
+ if (!MayAccessMem1 && !MayAccessMem2)
+ return nullptr;
+ if (!MayAccessMem1)
+ return Inst2->getMetadata(LLVMContext::MD_access_group);
+ if (!MayAccessMem2)
+ return Inst1->getMetadata(LLVMContext::MD_access_group);
+
+ MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
+ MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
+ if (!MD1 || !MD2)
+ return nullptr;
+ if (MD1 == MD2)
+ return MD1;
+
+ // Use set for scalable 'contains' check.
+ SmallPtrSet<Metadata *, 4> AccGroupSet2;
+ addToAccessGroupList(AccGroupSet2, MD2);
+
+ SmallVector<Metadata *, 4> Intersection;
+ if (MD1->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
+ if (AccGroupSet2.count(MD1))
+ Intersection.push_back(MD1);
+ } else {
+ for (const MDOperand &Node : MD1->operands()) {
+ auto *Item = cast<MDNode>(Node.get());
+ assert(isValidAsAccessGroup(Item) && "List item must be an access group");
+ if (AccGroupSet2.count(Item))
+ Intersection.push_back(Item);
+ }
+ }
+
+ if (Intersection.size() == 0)
+ return nullptr;
+ if (Intersection.size() == 1)
+ return cast<MDNode>(Intersection.front());
+
+ LLVMContext &Ctx = Inst1->getContext();
+ return MDNode::get(Ctx, Intersection);
+}
+
/// \returns \p I after propagating metadata from \p VL.
Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
Instruction *I0 = cast<Instruction>(VL[0]);
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
I0->getAllMetadataOtherThanDebugLoc(Metadata);
- for (auto Kind :
- {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
- LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) {
+ for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+ LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_access_group}) {
MDNode *MD = I0->getMetadata(Kind);
for (int J = 1, E = VL.size(); MD && J != E; ++J) {
@@ -480,6 +582,9 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
case LLVMContext::MD_invariant_load:
MD = MDNode::intersect(MD, IMD);
break;
+ case LLVMContext::MD_access_group:
+ MD = intersectAccessGroups(Inst, IJ);
+ break;
default:
llvm_unreachable("unhandled metadata");
}
@@ -491,6 +596,36 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
return Inst;
}
+Constant *
+llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
+ const InterleaveGroup<Instruction> &Group) {
+ // All 1's means mask is not needed.
+ if (Group.getNumMembers() == Group.getFactor())
+ return nullptr;
+
+ // TODO: support reversed access.
+ assert(!Group.isReverse() && "Reversed group not supported.");
+
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < Group.getFactor(); ++j) {
+ unsigned HasMember = Group.getMember(j) ? 1 : 0;
+ Mask.push_back(Builder.getInt1(HasMember));
+ }
+
+ return ConstantVector::get(Mask);
+}
+
+Constant *llvm::createReplicatedMask(IRBuilder<> &Builder,
+ unsigned ReplicationFactor, unsigned VF) {
+ SmallVector<Constant *, 16> MaskVec;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < ReplicationFactor; j++)
+ MaskVec.push_back(Builder.getInt32(i));
+
+ return ConstantVector::get(MaskVec);
+}
+
Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
unsigned NumVecs) {
SmallVector<Constant *, 16> Mask;
@@ -575,3 +710,364 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
return ResList[0];
}
+
+bool InterleavedAccessInfo::isStrided(int Stride) {
+ unsigned Factor = std::abs(Stride);
+ return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
+}
+
+void InterleavedAccessInfo::collectConstStrideAccesses(
+ MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
+ const ValueToValueMap &Strides) {
+ auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
+
+ // Since it's desired that the load/store instructions be maintained in
+ // "program order" for the interleaved access analysis, we have to visit the
+ // blocks in the loop in reverse postorder (i.e., in a topological order).
+ // Such an ordering will ensure that any load/store that may be executed
+ // before a second load/store will precede the second load/store in
+ // AccessStrideInfo.
+ LoopBlocksDFS DFS(TheLoop);
+ DFS.perform(LI);
+ for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
+ for (auto &I : *BB) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ auto *SI = dyn_cast<StoreInst>(&I);
+ if (!LI && !SI)
+ continue;
+
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ // We don't check wrapping here because we don't know yet if Ptr will be
+ // part of a full group or a group with gaps. Checking wrapping for all
+ // pointers (even those that end up in groups with no gaps) will be overly
+ // conservative. For full groups, wrapping should be ok since if we would
+ // wrap around the address space we would do a memory access at nullptr
+ // even without the transformation. The wrapping checks are therefore
+ // deferred until after we've formed the interleaved groups.
+ int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+ /*Assume=*/true, /*ShouldCheckWrap=*/false);
+
+ const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+
+ // An alignment of 0 means target ABI alignment.
+ unsigned Align = getLoadStoreAlignment(&I);
+ if (!Align)
+ Align = DL.getABITypeAlignment(PtrTy->getElementType());
+
+ AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+ }
+}
+
+// Analyze interleaved accesses and collect them into interleaved load and
+// store groups.
+//
+// When generating code for an interleaved load group, we effectively hoist all
+// loads in the group to the location of the first load in program order. When
+// generating code for an interleaved store group, we sink all stores to the
+// location of the last store. This code motion can change the order of load
+// and store instructions and may break dependences.
+//
+// The code generation strategy mentioned above ensures that we won't violate
+// any write-after-read (WAR) dependences.
+//
+// E.g., for the WAR dependence: a = A[i]; // (1)
+// A[i] = b; // (2)
+//
+// The store group of (2) is always inserted at or below (2), and the load
+// group of (1) is always inserted at or above (1). Thus, the instructions will
+// never be reordered. All other dependences are checked to ensure the
+// correctness of the instruction reordering.
+//
+// The algorithm visits all memory accesses in the loop in bottom-up program
+// order. Program order is established by traversing the blocks in the loop in
+// reverse postorder when collecting the accesses.
+//
+// We visit the memory accesses in bottom-up order because it can simplify the
+// construction of store groups in the presence of write-after-write (WAW)
+// dependences.
+//
+// E.g., for the WAW dependence: A[i] = a; // (1)
+// A[i] = b; // (2)
+// A[i + 1] = c; // (3)
+//
+// We will first create a store group with (3) and (2). (1) can't be added to
+// this group because it and (2) are dependent. However, (1) can be grouped
+// with other accesses that may precede it in program order. Note that a
+// bottom-up order does not imply that WAW dependences should not be checked.
+void InterleavedAccessInfo::analyzeInterleaving(
+ bool EnablePredicatedInterleavedMemAccesses) {
+ LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
+ const ValueToValueMap &Strides = LAI->getSymbolicStrides();
+
+ // Holds all accesses with a constant stride.
+ MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
+ collectConstStrideAccesses(AccessStrideInfo, Strides);
+
+ if (AccessStrideInfo.empty())
+ return;
+
+ // Collect the dependences in the loop.
+ collectDependences();
+
+ // Holds all interleaved store groups temporarily.
+ SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;
+ // Holds all interleaved load groups temporarily.
+ SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;
+
+ // Search in bottom-up program order for pairs of accesses (A and B) that can
+ // form interleaved load or store groups. In the algorithm below, access A
+ // precedes access B in program order. We initialize a group for B in the
+ // outer loop of the algorithm, and then in the inner loop, we attempt to
+ // insert each A into B's group if:
+ //
+ // 1. A and B have the same stride,
+ // 2. A and B have the same memory object size, and
+ // 3. A belongs in B's group according to its distance from B.
+ //
+ // Special care is taken to ensure group formation will not break any
+ // dependences.
+ for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
+ BI != E; ++BI) {
+ Instruction *B = BI->first;
+ StrideDescriptor DesB = BI->second;
+
+ // Initialize a group for B if it has an allowable stride. Even if we don't
+ // create a group for B, we continue with the bottom-up algorithm to ensure
+ // we don't break any of B's dependences.
+ InterleaveGroup<Instruction> *Group = nullptr;
+ if (isStrided(DesB.Stride) &&
+ (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
+ Group = getInterleaveGroup(B);
+ if (!Group) {
+ LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
+ << '\n');
+ Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
+ }
+ if (B->mayWriteToMemory())
+ StoreGroups.insert(Group);
+ else
+ LoadGroups.insert(Group);
+ }
+
+ for (auto AI = std::next(BI); AI != E; ++AI) {
+ Instruction *A = AI->first;
+ StrideDescriptor DesA = AI->second;
+
+ // Our code motion strategy implies that we can't have dependences
+ // between accesses in an interleaved group and other accesses located
+ // between the first and last member of the group. Note that this also
+ // means that a group can't have more than one member at a given offset.
+ // The accesses in a group can have dependences with other accesses, but
+ // we must ensure we don't extend the boundaries of the group such that
+ // we encompass those dependent accesses.
+ //
+ // For example, assume we have the sequence of accesses shown below in a
+ // stride-2 loop:
+ //
+ // (1, 2) is a group | A[i] = a; // (1)
+ // | A[i-1] = b; // (2) |
+ // A[i-3] = c; // (3)
+ // A[i] = d; // (4) | (2, 4) is not a group
+ //
+ // Because accesses (2) and (3) are dependent, we can group (2) with (1)
+ // but not with (4). If we did, the dependent access (3) would be within
+ // the boundaries of the (2, 4) group.
+ if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
+ // If a dependence exists and A is already in a group, we know that A
+ // must be a store since A precedes B and WAR dependences are allowed.
+ // Thus, A would be sunk below B. We release A's group to prevent this
+ // illegal code motion. A will then be free to form another group with
+ // instructions that precede it.
+ if (isInterleaved(A)) {
+ InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
+ StoreGroups.remove(StoreGroup);
+ releaseGroup(StoreGroup);
+ }
+
+ // If a dependence exists and A is not already in a group (or it was
+ // and we just released it), B might be hoisted above A (if B is a
+ // load) or another store might be sunk below A (if B is a store). In
+ // either case, we can't add additional instructions to B's group. B
+ // will only form a group with instructions that it precedes.
+ break;
+ }
+
+ // At this point, we've checked for illegal code motion. If either A or B
+ // isn't strided, there's nothing left to do.
+ if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
+ continue;
+
+ // Ignore A if it's already in a group or isn't the same kind of memory
+ // operation as B.
+ // Note that mayReadFromMemory() isn't mutually exclusive to
+ // mayWriteToMemory in the case of atomic loads. We shouldn't see those
+ // here, canVectorizeMemory() should have returned false - except for the
+ // case we asked for optimization remarks.
+ if (isInterleaved(A) ||
+ (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
+ (A->mayWriteToMemory() != B->mayWriteToMemory()))
+ continue;
+
+ // Check rules 1 and 2. Ignore A if its stride or size is different from
+ // that of B.
+ if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
+ continue;
+
+ // Ignore A if the memory object of A and B don't belong to the same
+ // address space
+ if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
+ continue;
+
+ // Calculate the distance from A to B.
+ const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
+ if (!DistToB)
+ continue;
+ int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
+
+ // Check rule 3. Ignore A if its distance to B is not a multiple of the
+ // size.
+ if (DistanceToB % static_cast<int64_t>(DesB.Size))
+ continue;
+
+ // All members of a predicated interleave-group must have the same predicate,
+ // and currently must reside in the same BB.
+ BasicBlock *BlockA = A->getParent();
+ BasicBlock *BlockB = B->getParent();
+ if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
+ (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
+ continue;
+
+ // The index of A is the index of B plus A's distance to B in multiples
+ // of the size.
+ int IndexA =
+ Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
+
+ // Try to insert A into B's group.
+ if (Group->insertMember(A, IndexA, DesA.Align)) {
+ LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
+ << " into the interleave group with" << *B
+ << '\n');
+ InterleaveGroupMap[A] = Group;
+
+ // Set the first load in program order as the insert position.
+ if (A->mayReadFromMemory())
+ Group->setInsertPos(A);
+ }
+ } // Iteration over A accesses.
+ } // Iteration over B accesses.
+
+ // Remove interleaved store groups with gaps.
+ for (auto *Group : StoreGroups)
+ if (Group->getNumMembers() != Group->getFactor()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved store group due "
+ "to gaps.\n");
+ releaseGroup(Group);
+ }
+ // Remove interleaved groups with gaps (currently only loads) whose memory
+ // accesses may wrap around. We have to revisit the getPtrStride analysis,
+ // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
+ // not check wrapping (see documentation there).
+ // FORNOW we use Assume=false;
+ // TODO: Change to Assume=true but making sure we don't exceed the threshold
+ // of runtime SCEV assumptions checks (thereby potentially failing to
+ // vectorize altogether).
+ // Additional optional optimizations:
+ // TODO: If we are peeling the loop and we know that the first pointer doesn't
+ // wrap then we can deduce that all pointers in the group don't wrap.
+ // This means that we can forcefully peel the loop in order to only have to
+ // check the first pointer for no-wrap. When we'll change to use Assume=true
+ // we'll only need at most one runtime check per interleaved group.
+ for (auto *Group : LoadGroups) {
+ // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+ // load would wrap around the address space we would do a memory access at
+ // nullptr even without the transformation.
+ if (Group->getNumMembers() == Group->getFactor())
+ continue;
+
+ // Case 2: If first and last members of the group don't wrap this implies
+ // that all the pointers in the group don't wrap.
+ // So we check only group member 0 (which is always guaranteed to exist),
+ // and group member Factor - 1; If the latter doesn't exist we rely on
+ // peeling (if it is a non-reveresed accsess -- see Case 3).
+ Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
+ if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "first group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
+ if (LastMember) {
+ Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
+ if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "last group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ }
+ } else {
+ // Case 3: A non-reversed interleaved load group with gaps: We need
+ // to execute at least one scalar epilogue iteration. This will ensure
+ // we don't speculatively access memory out-of-bounds. We only need
+ // to look for a member at index factor - 1, since every group must have
+ // a member at index zero.
+ if (Group->isReverse()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "a reverse access with gaps.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ LLVM_DEBUG(
+ dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
+ RequiresScalarEpilogue = true;
+ }
+ }
+}
+
+void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
+ // If no group had triggered the requirement to create an epilogue loop,
+ // there is nothing to do.
+ if (!requiresScalarEpilogue())
+ return;
+
+ // Avoid releasing a Group twice.
+ SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
+ for (auto &I : InterleaveGroupMap) {
+ InterleaveGroup<Instruction> *Group = I.second;
+ if (Group->requiresScalarEpilogue())
+ DelSet.insert(Group);
+ }
+ for (auto *Ptr : DelSet) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Invalidate candidate interleaved group due to gaps that "
+ "require a scalar epilogue (not allowed under optsize) and cannot "
+ "be masked (not enabled). \n");
+ releaseGroup(Ptr);
+ }
+
+ RequiresScalarEpilogue = false;
+}
+
+template <typename InstT>
+void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
+ llvm_unreachable("addMetadata can only be used for Instruction");
+}
+
+namespace llvm {
+template <>
+void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
+ SmallVector<Value *, 4> VL;
+ std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
+ [](std::pair<int, Instruction *> p) { return p.second; });
+ propagateMetadata(NewInst, VL);
+}
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index da9855ff630b..eab7ec819536 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -592,6 +592,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(arm_apcscc);
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
+ KEYWORD(aarch64_vector_pcs);
KEYWORD(msp430_intrcc);
KEYWORD(avr_intrcc);
KEYWORD(avr_signalcc);
@@ -678,6 +679,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(sanitize_hwaddress);
KEYWORD(sanitize_thread);
KEYWORD(sanitize_memory);
+ KEYWORD(speculative_load_hardening);
KEYWORD(swifterror);
KEYWORD(swiftself);
KEYWORD(uwtable);
@@ -738,6 +740,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(readOnly);
KEYWORD(noRecurse);
KEYWORD(returnDoesNotAlias);
+ KEYWORD(noInline);
KEYWORD(calls);
KEYWORD(callee);
KEYWORD(hotness);
@@ -785,6 +788,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(info);
KEYWORD(byte);
KEYWORD(bit);
+ KEYWORD(varFlags);
#undef KEYWORD
@@ -820,6 +824,8 @@ lltok::Kind LLLexer::LexIdentifier() {
} \
} while (false)
+ INSTKEYWORD(fneg, FNeg);
+
INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
@@ -899,17 +905,27 @@ lltok::Kind LLLexer::LexIdentifier() {
return lltok::DIFlag;
}
+ if (Keyword.startswith("DISPFlag")) {
+ StrVal.assign(Keyword.begin(), Keyword.end());
+ return lltok::DISPFlag;
+ }
+
if (Keyword.startswith("CSK_")) {
StrVal.assign(Keyword.begin(), Keyword.end());
return lltok::ChecksumKind;
}
if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
- Keyword == "LineTablesOnly") {
+ Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly") {
StrVal.assign(Keyword.begin(), Keyword.end());
return lltok::EmissionKind;
}
+ if (Keyword == "GNU" || Keyword == "None" || Keyword == "Default") {
+ StrVal.assign(Keyword.begin(), Keyword.end());
+ return lltok::NameTableKind;
+ }
+
// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
// the CFE to avoid forcing it to deal with 64-bit numbers.
if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index 7cf74dd16f5a..ee634505581e 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -1276,6 +1276,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
B.addAttribute(Attribute::SanitizeThread); break;
case lltok::kw_sanitize_memory:
B.addAttribute(Attribute::SanitizeMemory); break;
+ case lltok::kw_speculative_load_hardening:
+ B.addAttribute(Attribute::SpeculativeLoadHardening);
+ break;
case lltok::kw_strictfp: B.addAttribute(Attribute::StrictFP); break;
case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break;
@@ -1317,7 +1320,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy,
const std::string &Name) {
if (auto *FT = dyn_cast<FunctionType>(PTy->getElementType()))
- return Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+ return Function::Create(FT, GlobalValue::ExternalWeakLinkage,
+ PTy->getAddressSpace(), Name, M);
else
return new GlobalVariable(*M, PTy->getElementType(), false,
GlobalValue::ExternalWeakLinkage, nullptr, Name,
@@ -1325,11 +1329,33 @@ static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy,
PTy->getAddressSpace());
}
+Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
+ Value *Val, bool IsCall) {
+ if (Val->getType() == Ty)
+ return Val;
+ // For calls we also accept variables in the program address space.
+ Type *SuggestedTy = Ty;
+ if (IsCall && isa<PointerType>(Ty)) {
+ Type *TyInProgAS = cast<PointerType>(Ty)->getElementType()->getPointerTo(
+ M->getDataLayout().getProgramAddressSpace());
+ SuggestedTy = TyInProgAS;
+ if (Val->getType() == TyInProgAS)
+ return Val;
+ }
+ if (Ty->isLabelTy())
+ Error(Loc, "'" + Name + "' is not a basic block");
+ else
+ Error(Loc, "'" + Name + "' defined with type '" +
+ getTypeString(Val->getType()) + "' but expected '" +
+ getTypeString(SuggestedTy) + "'");
+ return nullptr;
+}
+
/// GetGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
- LocTy Loc) {
+ LocTy Loc, bool IsCall) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy) {
Error(Loc, "global variable reference must have pointer type");
@@ -1349,12 +1375,9 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
}
// If we have the value in the symbol table or fwd-ref table, return it.
- if (Val) {
- if (Val->getType() == Ty) return Val;
- Error(Loc, "'@" + Name + "' defined with type '" +
- getTypeString(Val->getType()) + "'");
- return nullptr;
- }
+ if (Val)
+ return cast_or_null<GlobalValue>(
+ checkValidVariableType(Loc, "@" + Name, Ty, Val, IsCall));
// Otherwise, create a new forward reference for this value and remember it.
GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, Name);
@@ -1362,7 +1385,8 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
return FwdVal;
}
-GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
+GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc,
+ bool IsCall) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy) {
Error(Loc, "global variable reference must have pointer type");
@@ -1380,12 +1404,9 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
}
// If we have the value in the symbol table or fwd-ref table, return it.
- if (Val) {
- if (Val->getType() == Ty) return Val;
- Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
- getTypeString(Val->getType()) + "'");
- return nullptr;
- }
+ if (Val)
+ return cast_or_null<GlobalValue>(
+ checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val, IsCall));
// Otherwise, create a new forward reference for this value and remember it.
GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, "");
@@ -1500,8 +1521,8 @@ bool LLParser::ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM) {
/// ParseOptionalAddrSpace
/// := /*empty*/
/// := 'addrspace' '(' uint32 ')'
-bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
- AddrSpace = 0;
+bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS) {
+ AddrSpace = DefaultAS;
if (!EatIfPresent(lltok::kw_addrspace))
return false;
return ParseToken(lltok::lparen, "expected '(' in address space") ||
@@ -1601,6 +1622,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_sanitize_hwaddress:
case lltok::kw_sanitize_memory:
case lltok::kw_sanitize_thread:
+ case lltok::kw_speculative_load_hardening:
case lltok::kw_ssp:
case lltok::kw_sspreq:
case lltok::kw_sspstrong:
@@ -1697,6 +1719,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_sanitize_hwaddress:
case lltok::kw_sanitize_memory:
case lltok::kw_sanitize_thread:
+ case lltok::kw_speculative_load_hardening:
case lltok::kw_ssp:
case lltok::kw_sspreq:
case lltok::kw_sspstrong:
@@ -1851,6 +1874,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'arm_apcscc'
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
+/// ::= 'aarch64_vector_pcs'
/// ::= 'msp430_intrcc'
/// ::= 'avr_intrcc'
/// ::= 'avr_signalcc'
@@ -1894,6 +1918,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
+ case lltok::kw_aarch64_vector_pcs:CC = CallingConv::AArch64_VectorCall; break;
case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
case lltok::kw_avr_intrcc: CC = CallingConv::AVR_INTR; break;
case lltok::kw_avr_signalcc: CC = CallingConv::AVR_SIGNAL; break;
@@ -2741,19 +2766,6 @@ bool LLParser::PerFunctionState::FinishFunction() {
return false;
}
-static bool isValidVariableType(Module *M, Type *Ty, Value *Val, bool IsCall) {
- if (Val->getType() == Ty)
- return true;
- // For calls we also accept variables in the program address space
- if (IsCall && isa<PointerType>(Ty)) {
- Type *TyInProgAS = cast<PointerType>(Ty)->getElementType()->getPointerTo(
- M->getDataLayout().getProgramAddressSpace());
- if (Val->getType() == TyInProgAS)
- return true;
- }
- return false;
-}
-
/// GetVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
@@ -2771,16 +2783,8 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty,
}
// If we have the value in the symbol table or fwd-ref table, return it.
- if (Val) {
- if (isValidVariableType(P.M, Ty, Val, IsCall))
- return Val;
- if (Ty->isLabelTy())
- P.Error(Loc, "'%" + Name + "' is not a basic block");
- else
- P.Error(Loc, "'%" + Name + "' defined with type '" +
- getTypeString(Val->getType()) + "'");
- return nullptr;
- }
+ if (Val)
+ return P.checkValidVariableType(Loc, "%" + Name, Ty, Val, IsCall);
// Don't make placeholders with invalid type.
if (!Ty->isFirstClassType()) {
@@ -2814,16 +2818,8 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc,
}
// If we have the value in the symbol table or fwd-ref table, return it.
- if (Val) {
- if (isValidVariableType(P.M, Ty, Val, IsCall))
- return Val;
- if (Ty->isLabelTy())
- P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
- else
- P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
- getTypeString(Val->getType()) + "'");
- return nullptr;
- }
+ if (Val)
+ return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val, IsCall);
if (!Ty->isFirstClassType()) {
P.Error(Loc, "invalid use of a non-first-class type");
@@ -3299,7 +3295,31 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ID.Kind = ValID::t_Constant;
return false;
}
-
+
+ // Unary Operators.
+ case lltok::kw_fneg: {
+ unsigned Opc = Lex.getUIntVal();
+ Constant *Val;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' in unary constantexpr") ||
+ ParseGlobalTypeAndValue(Val) ||
+ ParseToken(lltok::rparen, "expected ')' in unary constantexpr"))
+ return true;
+
+ // Check that the type is valid for the operator.
+ switch (Opc) {
+ case Instruction::FNeg:
+ if (!Val->getType()->isFPOrFPVectorTy())
+ return Error(ID.Loc, "constexpr requires fp operands");
+ break;
+ default: llvm_unreachable("Unknown unary operator!");
+ }
+ unsigned Flags = 0;
+ Constant *C = ConstantExpr::get(Opc, Val, Flags);
+ ID.ConstantVal = C;
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
// Binary Operators.
case lltok::kw_add:
case lltok::kw_fadd:
@@ -3718,10 +3738,21 @@ struct EmissionKindField : public MDUnsignedField {
EmissionKindField() : MDUnsignedField(0, DICompileUnit::LastEmissionKind) {}
};
+struct NameTableKindField : public MDUnsignedField {
+ NameTableKindField()
+ : MDUnsignedField(
+ 0, (unsigned)
+ DICompileUnit::DebugNameTableKind::LastDebugNameTableKind) {}
+};
+
struct DIFlagField : public MDFieldImpl<DINode::DIFlags> {
DIFlagField() : MDFieldImpl(DINode::FlagZero) {}
};
+struct DISPFlagField : public MDFieldImpl<DISubprogram::DISPFlags> {
+ DISPFlagField() : MDFieldImpl(DISubprogram::SPFlagZero) {}
+};
+
struct MDSignedField : public MDFieldImpl<int64_t> {
int64_t Min;
int64_t Max;
@@ -3938,6 +3969,25 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, EmissionKindField &Result
template <>
bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+ NameTableKindField &Result) {
+ if (Lex.getKind() == lltok::APSInt)
+ return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+ if (Lex.getKind() != lltok::NameTableKind)
+ return TokError("expected nameTable kind");
+
+ auto Kind = DICompileUnit::getNameTableKind(Lex.getStrVal());
+ if (!Kind)
+ return TokError("invalid nameTable kind" + Twine(" '") + Lex.getStrVal() +
+ "'");
+ assert(((unsigned)*Kind) <= Result.Max && "Expected valid nameTable kind");
+ Result.assign((unsigned)*Kind);
+ Lex.Lex();
+ return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
DwarfAttEncodingField &Result) {
if (Lex.getKind() == lltok::APSInt)
return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
@@ -3995,6 +4045,46 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
return false;
}
+/// DISPFlagField
+/// ::= uint32
+/// ::= DISPFlagVector
+/// ::= DISPFlagVector '|' DISPFlag* '|' uint32
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DISPFlagField &Result) {
+
+ // Parser for a single flag.
+ auto parseFlag = [&](DISubprogram::DISPFlags &Val) {
+ if (Lex.getKind() == lltok::APSInt && !Lex.getAPSIntVal().isSigned()) {
+ uint32_t TempVal = static_cast<uint32_t>(Val);
+ bool Res = ParseUInt32(TempVal);
+ Val = static_cast<DISubprogram::DISPFlags>(TempVal);
+ return Res;
+ }
+
+ if (Lex.getKind() != lltok::DISPFlag)
+ return TokError("expected debug info flag");
+
+ Val = DISubprogram::getFlag(Lex.getStrVal());
+ if (!Val)
+ return TokError(Twine("invalid subprogram debug info flag '") +
+ Lex.getStrVal() + "'");
+ Lex.Lex();
+ return false;
+ };
+
+ // Parse the flags and combine them together.
+ DISubprogram::DISPFlags Combined = DISubprogram::SPFlagZero;
+ do {
+ DISubprogram::DISPFlags Val;
+ if (parseFlag(Val))
+ return true;
+ Combined |= Val;
+ } while (EatIfPresent(lltok::bar));
+
+ Result.assign(Combined);
+ return false;
+}
+
template <>
bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
MDSignedField &Result) {
@@ -4206,18 +4296,21 @@ bool LLParser::ParseSpecializedMDNode(MDNode *&N, bool IsDistinct) {
(IsDistinct ? CLASS::getDistinct ARGS : CLASS::get ARGS)
/// ParseDILocationFields:
-/// ::= !DILocation(line: 43, column: 8, scope: !5, inlinedAt: !6)
+/// ::= !DILocation(line: 43, column: 8, scope: !5, inlinedAt: !6,
+/// isImplicitCode: true)
bool LLParser::ParseDILocation(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(line, LineField, ); \
OPTIONAL(column, ColumnField, ); \
REQUIRED(scope, MDField, (/* AllowNull */ false)); \
- OPTIONAL(inlinedAt, MDField, );
+ OPTIONAL(inlinedAt, MDField, ); \
+ OPTIONAL(isImplicitCode, MDBoolField, (false));
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
- Result = GET_OR_DISTINCT(
- DILocation, (Context, line.Val, column.Val, scope.Val, inlinedAt.Val));
+ Result =
+ GET_OR_DISTINCT(DILocation, (Context, line.Val, column.Val, scope.Val,
+ inlinedAt.Val, isImplicitCode.Val));
return false;
}
@@ -4281,19 +4374,21 @@ bool LLParser::ParseDIEnumerator(MDNode *&Result, bool IsDistinct) {
}
/// ParseDIBasicType:
-/// ::= !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32)
+/// ::= !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32,
+/// encoding: DW_ATE_encoding, flags: 0)
bool LLParser::ParseDIBasicType(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type)); \
OPTIONAL(name, MDStringField, ); \
OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \
OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \
- OPTIONAL(encoding, DwarfAttEncodingField, );
+ OPTIONAL(encoding, DwarfAttEncodingField, ); \
+ OPTIONAL(flags, DIFlagField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
Result = GET_OR_DISTINCT(DIBasicType, (Context, tag.Val, name.Val, size.Val,
- align.Val, encoding.Val));
+ align.Val, encoding.Val, flags.Val));
return false;
}
@@ -4446,7 +4541,8 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
OPTIONAL(dwoId, MDUnsignedField, ); \
OPTIONAL(splitDebugInlining, MDBoolField, = true); \
OPTIONAL(debugInfoForProfiling, MDBoolField, = false); \
- OPTIONAL(gnuPubnames, MDBoolField, = false);
+ OPTIONAL(nameTableKind, NameTableKindField, ); \
+ OPTIONAL(debugBaseAddress, MDBoolField, = false);
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4454,7 +4550,8 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val,
runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val,
retainedTypes.Val, globals.Val, imports.Val, macros.Val, dwoId.Val,
- splitDebugInlining.Val, debugInfoForProfiling.Val, gnuPubnames.Val);
+ splitDebugInlining.Val, debugInfoForProfiling.Val, nameTableKind.Val,
+ debugBaseAddress.Val);
return false;
}
@@ -4464,8 +4561,8 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
/// isDefinition: true, scopeLine: 8, containingType: !3,
/// virtuality: DW_VIRTUALTIY_pure_virtual,
/// virtualIndex: 10, thisAdjustment: 4, flags: 11,
-/// isOptimized: false, templateParams: !4, declaration: !5,
-/// retainedNodes: !6, thrownTypes: !7)
+/// spFlags: 10, isOptimized: false, templateParams: !4,
+/// declaration: !5, retainedNodes: !6, thrownTypes: !7)
bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
auto Loc = Lex.getLoc();
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
@@ -4483,26 +4580,31 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
OPTIONAL(virtualIndex, MDUnsignedField, (0, UINT32_MAX)); \
OPTIONAL(thisAdjustment, MDSignedField, (0, INT32_MIN, INT32_MAX)); \
OPTIONAL(flags, DIFlagField, ); \
+ OPTIONAL(spFlags, DISPFlagField, ); \
OPTIONAL(isOptimized, MDBoolField, ); \
OPTIONAL(unit, MDField, ); \
OPTIONAL(templateParams, MDField, ); \
OPTIONAL(declaration, MDField, ); \
- OPTIONAL(retainedNodes, MDField, ); \
+ OPTIONAL(retainedNodes, MDField, ); \
OPTIONAL(thrownTypes, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
- if (isDefinition.Val && !IsDistinct)
+ // An explicit spFlags field takes precedence over individual fields in
+ // older IR versions.
+ DISubprogram::DISPFlags SPFlags =
+ spFlags.Seen ? spFlags.Val
+ : DISubprogram::toSPFlags(isLocal.Val, isDefinition.Val,
+ isOptimized.Val, virtuality.Val);
+ if ((SPFlags & DISubprogram::SPFlagDefinition) && !IsDistinct)
return Lex.Error(
Loc,
- "missing 'distinct', required for !DISubprogram when 'isDefinition'");
-
+ "missing 'distinct', required for !DISubprogram that is a Definition");
Result = GET_OR_DISTINCT(
DISubprogram,
(Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val,
- type.Val, isLocal.Val, isDefinition.Val, scopeLine.Val,
- containingType.Val, virtuality.Val, virtualIndex.Val, thisAdjustment.Val,
- flags.Val, isOptimized.Val, unit.Val, templateParams.Val,
+ type.Val, scopeLine.Val, containingType.Val, virtualIndex.Val,
+ thisAdjustment.Val, flags.Val, SPFlags, unit.Val, templateParams.Val,
declaration.Val, retainedNodes.Val, thrownTypes.Val));
return false;
}
@@ -4637,7 +4739,8 @@ bool LLParser::ParseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
/// ParseDIGlobalVariable:
/// ::= !DIGlobalVariable(scope: !0, name: "foo", linkageName: "foo",
/// file: !1, line: 7, type: !2, isLocal: false,
-/// isDefinition: true, declaration: !3, align: 8)
+/// isDefinition: true, templateParams: !3,
+/// declaration: !4, align: 8)
bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(name, MDStringField, (/* AllowEmpty */ false)); \
@@ -4648,15 +4751,17 @@ bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
OPTIONAL(type, MDField, ); \
OPTIONAL(isLocal, MDBoolField, ); \
OPTIONAL(isDefinition, MDBoolField, (true)); \
+ OPTIONAL(templateParams, MDField, ); \
OPTIONAL(declaration, MDField, ); \
OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
- Result = GET_OR_DISTINCT(DIGlobalVariable,
- (Context, scope.Val, name.Val, linkageName.Val,
- file.Val, line.Val, type.Val, isLocal.Val,
- isDefinition.Val, declaration.Val, align.Val));
+ Result =
+ GET_OR_DISTINCT(DIGlobalVariable,
+ (Context, scope.Val, name.Val, linkageName.Val, file.Val,
+ line.Val, type.Val, isLocal.Val, isDefinition.Val,
+ declaration.Val, templateParams.Val, align.Val));
return false;
}
@@ -4912,10 +5017,10 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return false;
}
case ValID::t_GlobalName:
- V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
+ V = GetGlobalVal(ID.StrVal, Ty, ID.Loc, IsCall);
return V == nullptr;
case ValID::t_GlobalID:
- V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
+ V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc, IsCall);
return V == nullptr;
case ValID::t_APSInt:
if (!Ty->isIntegerTy())
@@ -5058,8 +5163,8 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
/// FunctionHeader
/// ::= OptionalLinkage OptionalPreemptionSpecifier OptionalVisibility
/// OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName
-/// '(' ArgList ')' OptFuncAttrs OptSection OptionalAlign OptGC
-/// OptionalPrefix OptionalPrologue OptPersonalityFn
+/// '(' ArgList ')' OptAddrSpace OptFuncAttrs OptSection OptionalAlign
+/// OptGC OptionalPrefix OptionalPrologue OptPersonalityFn
bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
// Parse the linkage.
LocTy LinkageLoc = Lex.getLoc();
@@ -5137,6 +5242,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
unsigned Alignment;
std::string GC;
GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
+ unsigned AddrSpace = 0;
Constant *Prefix = nullptr;
Constant *Prologue = nullptr;
Constant *PersonalityFn = nullptr;
@@ -5144,6 +5250,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
if (ParseArgumentList(ArgList, isVarArg) ||
ParseOptionalUnnamedAddr(UnnamedAddr) ||
+ ParseOptionalProgramAddrSpace(AddrSpace) ||
ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false,
BuiltinLoc) ||
(EatIfPresent(lltok::kw_section) &&
@@ -5188,7 +5295,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
FunctionType *FT =
FunctionType::get(RetType, ParamTypeList, isVarArg);
- PointerType *PFT = PointerType::getUnqual(FT);
+ PointerType *PFT = PointerType::get(FT, AddrSpace);
Fn = nullptr;
if (!FunctionName.empty()) {
@@ -5202,8 +5309,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
"function as global value!");
if (Fn->getType() != PFT)
return Error(FRVI->second.second, "invalid forward reference to "
- "function '" + FunctionName + "' with wrong type!");
-
+ "function '" + FunctionName + "' with wrong type: "
+ "expected '" + getTypeString(PFT) + "' but was '" +
+ getTypeString(Fn->getType()) + "'");
ForwardRefVals.erase(FRVI);
} else if ((Fn = M->getFunction(FunctionName))) {
// Reject redefinitions.
@@ -5221,16 +5329,21 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Fn = cast<Function>(I->second.first);
if (Fn->getType() != PFT)
return Error(NameLoc, "type of definition and forward reference of '@" +
- Twine(NumberedVals.size()) + "' disagree");
+ Twine(NumberedVals.size()) + "' disagree: "
+ "expected '" + getTypeString(PFT) + "' but was '" +
+ getTypeString(Fn->getType()) + "'");
ForwardRefValIDs.erase(I);
}
}
if (!Fn)
- Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M);
+ Fn = Function::Create(FT, GlobalValue::ExternalLinkage, AddrSpace,
+ FunctionName, M);
else // Move the forward-reference to the correct spot in the module.
M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn);
+ assert(Fn->getAddressSpace() == AddrSpace && "Created function in wrong AS");
+
if (FunctionName.empty())
NumberedVals.push_back(Fn);
@@ -5419,7 +5532,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// Set the name on the instruction.
if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
- } while (!isa<TerminatorInst>(Inst));
+ } while (!Inst->isTerminator());
return false;
}
@@ -5454,6 +5567,16 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS);
case lltok::kw_catchpad: return ParseCatchPad(Inst, PFS);
case lltok::kw_cleanuppad: return ParseCleanupPad(Inst, PFS);
+ // Unary Operators.
+ case lltok::kw_fneg: {
+ FastMathFlags FMF = EatFastMathFlagsIfPresent();
+ int Res = ParseUnaryOp(Inst, PFS, KeywordVal, 2);
+ if (Res != 0)
+ return Res;
+ if (FMF.any())
+ Inst->setFastMathFlags(FMF);
+ return false;
+ }
// Binary Operators.
case lltok::kw_add:
case lltok::kw_sub:
@@ -5749,6 +5872,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
std::vector<unsigned> FwdRefAttrGrps;
LocTy NoBuiltinLoc;
unsigned CC;
+ unsigned InvokeAddrSpace;
Type *RetType = nullptr;
LocTy RetTypeLoc;
ValID CalleeID;
@@ -5757,6 +5881,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
BasicBlock *NormalBB, *UnwindBB;
if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
+ ParseOptionalProgramAddrSpace(InvokeAddrSpace) ||
ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) ||
ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
@@ -5788,8 +5913,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
// Look up the callee.
Value *Callee;
- if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
- /*IsCall=*/true))
+ if (ConvertValIDToValue(PointerType::get(Ty, InvokeAddrSpace), CalleeID,
+ Callee, &PFS, /*IsCall=*/true))
return true;
// Set up the Attribute for the function.
@@ -6024,6 +6149,43 @@ bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
}
//===----------------------------------------------------------------------===//
+// Unary Operators.
+//===----------------------------------------------------------------------===//
+
+/// ParseUnaryOp
+/// ::= UnaryOp TypeAndValue ',' Value
+///
+/// If OperandType is 0, then any FP or integer operand is allowed. If it is 1,
+/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc, unsigned OperandType) {
+ LocTy Loc; Value *LHS;
+ if (ParseTypeAndValue(LHS, Loc, PFS))
+ return true;
+
+ bool Valid;
+ switch (OperandType) {
+ default: llvm_unreachable("Unknown operand type!");
+ case 0: // int or FP.
+ Valid = LHS->getType()->isIntOrIntVectorTy() ||
+ LHS->getType()->isFPOrFPVectorTy();
+ break;
+ case 1:
+ Valid = LHS->getType()->isIntOrIntVectorTy();
+ break;
+ case 2:
+ Valid = LHS->getType()->isFPOrFPVectorTy();
+ break;
+ }
+
+ if (!Valid)
+ return Error(Loc, "invalid operand type for instruction");
+
+ Inst = UnaryOperator::Create((Instruction::UnaryOps)Opc, LHS);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
// Binary Operators.
//===----------------------------------------------------------------------===//
@@ -6332,6 +6494,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
AttrBuilder RetAttrs, FnAttrs;
std::vector<unsigned> FwdRefAttrGrps;
LocTy BuiltinLoc;
+ unsigned CallAddrSpace;
unsigned CC;
Type *RetType = nullptr;
LocTy RetTypeLoc;
@@ -6348,6 +6511,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
FastMathFlags FMF = EatFastMathFlagsIfPresent();
if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
+ ParseOptionalProgramAddrSpace(CallAddrSpace) ||
ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
ParseValID(CalleeID) ||
ParseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail,
@@ -6380,8 +6544,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
// Look up the callee.
Value *Callee;
- if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
- /*IsCall=*/true))
+ if (ConvertValIDToValue(PointerType::get(Ty, CallAddrSpace), CalleeID, Callee,
+ &PFS, /*IsCall=*/true))
return true;
// Set up the Attribute for the function.
@@ -6685,8 +6849,13 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
return Error(PtrLoc, "atomicrmw operand must be a pointer");
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
return Error(ValLoc, "atomicrmw value and pointer type do not match");
- if (!Val->getType()->isIntegerTy())
- return Error(ValLoc, "atomicrmw operand must be an integer");
+
+ if (!Val->getType()->isIntegerTy()) {
+ return Error(ValLoc, "atomicrmw " +
+ AtomicRMWInst::getOperationName(Operation) +
+ " operand must be an integer");
+ }
+
unsigned Size = Val->getType()->getPrimitiveSizeInBits();
if (Size < 8 || (Size & (Size - 1)))
return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
@@ -7350,8 +7519,14 @@ bool LLParser::ParseArgs(std::vector<uint64_t> &Args) {
return false;
}
-static ValueInfo EmptyVI =
- ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-8);
+static const auto FwdVIRef = (GlobalValueSummaryMapTy::value_type *)-8;
+
+static void resolveFwdRef(ValueInfo *Fwd, ValueInfo &Resolved) {
+ bool ReadOnly = Fwd->isReadOnly();
+ *Fwd = Resolved;
+ if (ReadOnly)
+ Fwd->setReadOnly();
+}
/// Stores the given Name/GUID and associated summary into the Index.
/// Also updates any forward references to the associated entry ID.
@@ -7387,9 +7562,9 @@ void LLParser::AddGlobalValueToIndex(
auto FwdRefVIs = ForwardRefValueInfos.find(ID);
if (FwdRefVIs != ForwardRefValueInfos.end()) {
for (auto VIRef : FwdRefVIs->second) {
- assert(*VIRef.first == EmptyVI &&
+ assert(VIRef.first->getRef() == FwdVIRef &&
"Forward referenced ValueInfo expected to be empty");
- *VIRef.first = VI;
+ resolveFwdRef(VIRef.first, VI);
}
ForwardRefValueInfos.erase(FwdRefVIs);
}
@@ -7552,8 +7727,8 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
return true;
auto FS = llvm::make_unique<FunctionSummary>(
- GVFlags, InstCount, FFlags, std::move(Refs), std::move(Calls),
- std::move(TypeIdInfo.TypeTests),
+ GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs),
+ std::move(Calls), std::move(TypeIdInfo.TypeTests),
std::move(TypeIdInfo.TypeTestAssumeVCalls),
std::move(TypeIdInfo.TypeCheckedLoadVCalls),
std::move(TypeIdInfo.TypeTestAssumeConstVCalls),
@@ -7579,11 +7754,14 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
/*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
/*Live=*/false, /*IsLocal=*/false);
+ GlobalVarSummary::GVarFlags GVarFlags(/*ReadOnly*/ false);
std::vector<ValueInfo> Refs;
if (ParseToken(lltok::colon, "expected ':' here") ||
ParseToken(lltok::lparen, "expected '(' here") ||
ParseModuleReference(ModulePath) ||
- ParseToken(lltok::comma, "expected ',' here") || ParseGVFlags(GVFlags))
+ ParseToken(lltok::comma, "expected ',' here") || ParseGVFlags(GVFlags) ||
+ ParseToken(lltok::comma, "expected ',' here") ||
+ ParseGVarFlags(GVarFlags))
return true;
// Parse optional refs field
@@ -7595,7 +7773,8 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
if (ParseToken(lltok::rparen, "expected ')' here"))
return true;
- auto GS = llvm::make_unique<GlobalVarSummary>(GVFlags, std::move(Refs));
+ auto GS =
+ llvm::make_unique<GlobalVarSummary>(GVFlags, GVarFlags, std::move(Refs));
GS->setModulePath(ModulePath);
@@ -7640,7 +7819,7 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
AS->setModulePath(ModulePath);
// Record forward reference if the aliasee is not parsed yet.
- if (AliaseeVI == EmptyVI) {
+ if (AliaseeVI.getRef() == FwdVIRef) {
auto FwdRef = ForwardRefAliasees.insert(
std::make_pair(GVId, std::vector<std::pair<AliasSummary *, LocTy>>()));
FwdRef.first->second.push_back(std::make_pair(AS.get(), Loc));
@@ -7667,6 +7846,7 @@ bool LLParser::ParseFlag(unsigned &Val) {
/// := 'funcFlags' ':' '(' ['readNone' ':' Flag]?
/// [',' 'readOnly' ':' Flag]? [',' 'noRecurse' ':' Flag]?
/// [',' 'returnDoesNotAlias' ':' Flag]? ')'
+/// [',' 'noInline' ':' Flag]? ')'
bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
assert(Lex.getKind() == lltok::kw_funcFlags);
Lex.Lex();
@@ -7702,6 +7882,12 @@ bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
return true;
FFlags.ReturnDoesNotAlias = Val;
break;
+ case lltok::kw_noInline:
+ Lex.Lex();
+ if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+ return true;
+ FFlags.NoInline = Val;
+ break;
default:
return Error(Lex.getLoc(), "expected function flag type");
}
@@ -7755,7 +7941,7 @@ bool LLParser::ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
// Keep track of the Call array index needing a forward reference.
// We will save the location of the ValueInfo needing an update, but
// can only do so once the std::vector is finalized.
- if (VI == EmptyVI)
+ if (VI.getRef() == FwdVIRef)
IdToIndexMap[GVId].push_back(std::make_pair(Calls.size(), Loc));
Calls.push_back(FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, RelBF)});
@@ -7767,7 +7953,7 @@ bool LLParser::ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
// of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
for (auto P : I.second) {
- assert(Calls[P.first].first == EmptyVI &&
+ assert(Calls[P.first].first.getRef() == FwdVIRef &&
"Forward referenced ValueInfo expected to be empty");
auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
@@ -7818,28 +8004,42 @@ bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
ParseToken(lltok::lparen, "expected '(' in refs"))
return true;
- IdToIndexMapType IdToIndexMap;
- // Parse each ref edge
- do {
+ struct ValueContext {
ValueInfo VI;
- LocTy Loc = Lex.getLoc();
unsigned GVId;
- if (ParseGVReference(VI, GVId))
+ LocTy Loc;
+ };
+ std::vector<ValueContext> VContexts;
+ // Parse each ref edge
+ do {
+ ValueContext VC;
+ VC.Loc = Lex.getLoc();
+ if (ParseGVReference(VC.VI, VC.GVId))
return true;
+ VContexts.push_back(VC);
+ } while (EatIfPresent(lltok::comma));
+
+ // Sort value contexts so that ones with readonly ValueInfo are at the end
+ // of VContexts vector. This is needed to match immutableRefCount() behavior.
+ llvm::sort(VContexts, [](const ValueContext &VC1, const ValueContext &VC2) {
+ return VC1.VI.isReadOnly() < VC2.VI.isReadOnly();
+ });
+ IdToIndexMapType IdToIndexMap;
+ for (auto &VC : VContexts) {
// Keep track of the Refs array index needing a forward reference.
// We will save the location of the ValueInfo needing an update, but
// can only do so once the std::vector is finalized.
- if (VI == EmptyVI)
- IdToIndexMap[GVId].push_back(std::make_pair(Refs.size(), Loc));
- Refs.push_back(VI);
- } while (EatIfPresent(lltok::comma));
+ if (VC.VI.getRef() == FwdVIRef)
+ IdToIndexMap[VC.GVId].push_back(std::make_pair(Refs.size(), VC.Loc));
+ Refs.push_back(VC.VI);
+ }
// Now that the Refs vector is finalized, it is safe to save the locations
// of any forward GV references that need updating later.
for (auto I : IdToIndexMap) {
for (auto P : I.second) {
- assert(Refs[P.first] == EmptyVI &&
+ assert(Refs[P.first].getRef() == FwdVIRef &&
"Forward referenced ValueInfo expected to be empty");
auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
@@ -8027,12 +8227,18 @@ bool LLParser::ParseConstVCallList(
}
/// ConstVCall
-/// ::= VFuncId, Args
+/// ::= '(' VFuncId ',' Args ')'
bool LLParser::ParseConstVCall(FunctionSummary::ConstVCall &ConstVCall,
IdToIndexMapType &IdToIndexMap, unsigned Index) {
- if (ParseVFuncId(ConstVCall.VFunc, IdToIndexMap, Index) ||
- ParseToken(lltok::comma, "expected ',' here") ||
- ParseArgs(ConstVCall.Args))
+ if (ParseToken(lltok::lparen, "expected '(' here") ||
+ ParseVFuncId(ConstVCall.VFunc, IdToIndexMap, Index))
+ return true;
+
+ if (EatIfPresent(lltok::comma))
+ if (ParseArgs(ConstVCall.Args))
+ return true;
+
+ if (ParseToken(lltok::rparen, "expected ')' here"))
return true;
return false;
@@ -8119,6 +8325,27 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
return false;
}
+/// GVarFlags
+/// ::= 'varFlags' ':' '(' 'readonly' ':' Flag ')'
+bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
+ assert(Lex.getKind() == lltok::kw_varFlags);
+ Lex.Lex();
+
+ unsigned Flag;
+ if (ParseToken(lltok::colon, "expected ':' here") ||
+ ParseToken(lltok::lparen, "expected '(' here") ||
+ ParseToken(lltok::kw_readonly, "expected 'readonly' here") ||
+ ParseToken(lltok::colon, "expected ':' here"))
+ return true;
+
+ ParseFlag(Flag);
+ GVarFlags.ReadOnly = Flag;
+
+ if (ParseToken(lltok::rparen, "expected ')' here"))
+ return true;
+ return false;
+}
+
/// ModuleReference
/// ::= 'module' ':' UInt
bool LLParser::ParseModuleReference(StringRef &ModulePath) {
@@ -8139,18 +8366,20 @@ bool LLParser::ParseModuleReference(StringRef &ModulePath) {
/// GVReference
/// ::= SummaryID
bool LLParser::ParseGVReference(ValueInfo &VI, unsigned &GVId) {
+ bool ReadOnly = EatIfPresent(lltok::kw_readonly);
if (ParseToken(lltok::SummaryID, "expected GV ID"))
return true;
GVId = Lex.getUIntVal();
-
// Check if we already have a VI for this GV
if (GVId < NumberedValueInfos.size()) {
- assert(NumberedValueInfos[GVId] != EmptyVI);
+ assert(NumberedValueInfos[GVId].getRef() != FwdVIRef);
VI = NumberedValueInfos[GVId];
} else
// We will create a forward reference to the stored location.
- VI = EmptyVI;
+ VI = ValueInfo(false, FwdVIRef);
+ if (ReadOnly)
+ VI.setReadOnly();
return false;
}
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
index 811f96418fa5..5a0fc297265d 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -202,8 +202,9 @@ namespace llvm {
/// GetGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- GlobalValue *GetGlobalVal(const std::string &Name, Type *Ty, LocTy Loc);
- GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
+ GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc,
+ bool IsCall);
+ GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
/// Get a Comdat with the specified name, creating a forward reference
/// record if needed.
@@ -267,7 +268,11 @@ namespace llvm {
bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
bool ParseOptionalUnnamedAddr(GlobalVariable::UnnamedAddr &UnnamedAddr);
- bool ParseOptionalAddrSpace(unsigned &AddrSpace);
+ bool ParseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS = 0);
+ bool ParseOptionalProgramAddrSpace(unsigned &AddrSpace) {
+ return ParseOptionalAddrSpace(
+ AddrSpace, M->getDataLayout().getProgramAddressSpace());
+ };
bool ParseOptionalParamAttrs(AttrBuilder &B);
bool ParseOptionalReturnAttrs(AttrBuilder &B);
bool ParseOptionalLinkage(unsigned &Res, bool &HasLinkage,
@@ -347,6 +352,7 @@ namespace llvm {
bool ParseVariableSummary(std::string Name, GlobalValue::GUID, unsigned ID);
bool ParseAliasSummary(std::string Name, GlobalValue::GUID, unsigned ID);
bool ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags);
+ bool ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags);
bool ParseOptionalFFlags(FunctionSummary::FFlags &FFlags);
bool ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls);
bool ParseHotness(CalleeInfo::HotnessType &Hotness);
@@ -448,6 +454,9 @@ namespace llvm {
bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
PerFunctionState *PFS, bool IsCall);
+ Value *checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
+ Value *Val, bool IsCall);
+
bool parseConstantValue(Type *Ty, Constant *&C);
bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
@@ -563,6 +572,8 @@ namespace llvm {
bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
+ unsigned OperandType);
bool ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
unsigned OperandType);
bool ParseLogical(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index 8d8c7e99656e..c2e2795a9467 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -139,6 +139,7 @@ enum Kind {
kw_arm_apcscc,
kw_arm_aapcscc,
kw_arm_aapcs_vfpcc,
+ kw_aarch64_vector_pcs,
kw_msp430_intrcc,
kw_avr_intrcc,
kw_avr_signalcc,
@@ -219,6 +220,7 @@ enum Kind {
kw_sret,
kw_sanitize_thread,
kw_sanitize_memory,
+ kw_speculative_load_hardening,
kw_strictfp,
kw_swifterror,
kw_swiftself,
@@ -268,6 +270,7 @@ enum Kind {
kw_umin,
// Instruction Opcodes (Opcode in UIntVal).
+ kw_fneg,
kw_add,
kw_fadd,
kw_sub,
@@ -367,6 +370,7 @@ enum Kind {
kw_readOnly,
kw_noRecurse,
kw_returnDoesNotAlias,
+ kw_noInline,
kw_calls,
kw_callee,
kw_hotness,
@@ -414,6 +418,7 @@ enum Kind {
kw_info,
kw_byte,
kw_bit,
+ kw_varFlags,
// Unsigned Valued tokens (UIntVal).
GlobalID, // @42
@@ -434,8 +439,10 @@ enum Kind {
DwarfLang, // DW_LANG_foo
DwarfCC, // DW_CC_foo
EmissionKind, // lineTablesOnly
+ NameTableKind, // GNU
DwarfOp, // DW_OP_foo
DIFlag, // DIFlagFoo
+ DISPFlag, // DISPFlagFoo
DwarfMacinfo, // DW_MACINFO_foo
ChecksumKind, // CSK_foo
diff --git a/contrib/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/contrib/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
new file mode 100644
index 000000000000..b789f646b5f6
--- /dev/null
+++ b/contrib/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -0,0 +1,324 @@
+//===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implements a verifier for AMDGPU HSA metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/Support/AMDGPUMetadata.h"
+
+namespace llvm {
+namespace AMDGPU {
+namespace HSAMD {
+namespace V3 {
+
+bool MetadataVerifier::verifyScalar(
+ msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
+ function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
+ auto ScalarPtr = dyn_cast<msgpack::ScalarNode>(&Node);
+ if (!ScalarPtr)
+ return false;
+ auto &Scalar = *ScalarPtr;
+ // Do not output extraneous tags for types we know from the spec.
+ Scalar.IgnoreTag = true;
+ if (Scalar.getScalarKind() != SKind) {
+ if (Strict)
+ return false;
+ // If we are not strict, we interpret string values as "implicitly typed"
+ // and attempt to coerce them to the expected type here.
+ if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String)
+ return false;
+ std::string StringValue = Scalar.getString();
+ Scalar.setScalarKind(SKind);
+ if (Scalar.inputYAML(StringValue) != StringRef())
+ return false;
+ }
+ if (verifyValue)
+ return verifyValue(Scalar);
+ return true;
+}
+
+bool MetadataVerifier::verifyInteger(msgpack::Node &Node) {
+ if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt))
+ if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int))
+ return false;
+ return true;
+}
+
+bool MetadataVerifier::verifyArray(
+ msgpack::Node &Node, function_ref<bool(msgpack::Node &)> verifyNode,
+ Optional<size_t> Size) {
+ auto ArrayPtr = dyn_cast<msgpack::ArrayNode>(&Node);
+ if (!ArrayPtr)
+ return false;
+ auto &Array = *ArrayPtr;
+ if (Size && Array.size() != *Size)
+ return false;
+ for (auto &Item : Array)
+ if (!verifyNode(*Item.get()))
+ return false;
+
+ return true;
+}
+
+bool MetadataVerifier::verifyEntry(
+ msgpack::MapNode &MapNode, StringRef Key, bool Required,
+ function_ref<bool(msgpack::Node &)> verifyNode) {
+ auto Entry = MapNode.find(Key);
+ if (Entry == MapNode.end())
+ return !Required;
+ return verifyNode(*Entry->second.get());
+}
+
+bool MetadataVerifier::verifyScalarEntry(
+ msgpack::MapNode &MapNode, StringRef Key, bool Required,
+ msgpack::ScalarNode::ScalarKind SKind,
+ function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
+ return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) {
+ return verifyScalar(Node, SKind, verifyValue);
+ });
+}
+
+bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode &MapNode,
+ StringRef Key, bool Required) {
+ return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) {
+ return verifyInteger(Node);
+ });
+}
+
+bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
+ auto ArgsMapPtr = dyn_cast<msgpack::MapNode>(&Node);
+ if (!ArgsMapPtr)
+ return false;
+ auto &ArgsMap = *ArgsMapPtr;
+
+ if (!verifyScalarEntry(ArgsMap, ".name", false,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".type_name", false,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyIntegerEntry(ArgsMap, ".size", true))
+ return false;
+ if (!verifyIntegerEntry(ArgsMap, ".offset", true))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".value_kind", true,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("by_value", true)
+ .Case("global_buffer", true)
+ .Case("dynamic_shared_pointer", true)
+ .Case("sampler", true)
+ .Case("image", true)
+ .Case("pipe", true)
+ .Case("queue", true)
+ .Case("hidden_global_offset_x", true)
+ .Case("hidden_global_offset_y", true)
+ .Case("hidden_global_offset_z", true)
+ .Case("hidden_none", true)
+ .Case("hidden_printf_buffer", true)
+ .Case("hidden_default_queue", true)
+ .Case("hidden_completion_action", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".value_type", true,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("struct", true)
+ .Case("i8", true)
+ .Case("u8", true)
+ .Case("i16", true)
+ .Case("u16", true)
+ .Case("f16", true)
+ .Case("i32", true)
+ .Case("u32", true)
+ .Case("f32", true)
+ .Case("i64", true)
+ .Case("u64", true)
+ .Case("f64", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyIntegerEntry(ArgsMap, ".pointee_align", false))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".address_space", false,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("private", true)
+ .Case("global", true)
+ .Case("constant", true)
+ .Case("local", true)
+ .Case("generic", true)
+ .Case("region", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".access", false,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("read_only", true)
+ .Case("write_only", true)
+ .Case("read_write", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".actual_access", false,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("read_only", true)
+ .Case("write_only", true)
+ .Case("read_write", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".is_const", false,
+ msgpack::ScalarNode::SK_Boolean))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".is_restrict", false,
+ msgpack::ScalarNode::SK_Boolean))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".is_volatile", false,
+ msgpack::ScalarNode::SK_Boolean))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".is_pipe", false,
+ msgpack::ScalarNode::SK_Boolean))
+ return false;
+
+ return true;
+}
+
+bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
+ auto KernelMapPtr = dyn_cast<msgpack::MapNode>(&Node);
+ if (!KernelMapPtr)
+ return false;
+ auto &KernelMap = *KernelMapPtr;
+
+ if (!verifyScalarEntry(KernelMap, ".name", true,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyScalarEntry(KernelMap, ".symbol", true,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyScalarEntry(KernelMap, ".language", false,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("OpenCL C", true)
+ .Case("OpenCL C++", true)
+ .Case("HCC", true)
+ .Case("HIP", true)
+ .Case("OpenMP", true)
+ .Case("Assembler", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyEntry(
+ KernelMap, ".language_version", false, [this](msgpack::Node &Node) {
+ return verifyArray(
+ Node,
+ [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+ }))
+ return false;
+ if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::Node &Node) {
+ return verifyArray(Node, [this](msgpack::Node &Node) {
+ return verifyKernelArgs(Node);
+ });
+ }))
+ return false;
+ if (!verifyEntry(KernelMap, ".reqd_workgroup_size", false,
+ [this](msgpack::Node &Node) {
+ return verifyArray(Node,
+ [this](msgpack::Node &Node) {
+ return verifyInteger(Node);
+ },
+ 3);
+ }))
+ return false;
+ if (!verifyEntry(KernelMap, ".workgroup_size_hint", false,
+ [this](msgpack::Node &Node) {
+ return verifyArray(Node,
+ [this](msgpack::Node &Node) {
+ return verifyInteger(Node);
+ },
+ 3);
+ }))
+ return false;
+ if (!verifyScalarEntry(KernelMap, ".vec_type_hint", false,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyScalarEntry(KernelMap, ".device_enqueue_symbol", false,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".group_segment_fixed_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".sgpr_count", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".vgpr_count", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".max_flat_workgroup_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".sgpr_spill_count", false))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".vgpr_spill_count", false))
+ return false;
+
+ return true;
+}
+
+bool MetadataVerifier::verify(msgpack::Node &HSAMetadataRoot) {
+ auto RootMapPtr = dyn_cast<msgpack::MapNode>(&HSAMetadataRoot);
+ if (!RootMapPtr)
+ return false;
+ auto &RootMap = *RootMapPtr;
+
+ if (!verifyEntry(
+ RootMap, "amdhsa.version", true, [this](msgpack::Node &Node) {
+ return verifyArray(
+ Node,
+ [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+ }))
+ return false;
+ if (!verifyEntry(
+ RootMap, "amdhsa.printf", false, [this](msgpack::Node &Node) {
+ return verifyArray(Node, [this](msgpack::Node &Node) {
+ return verifyScalar(Node, msgpack::ScalarNode::SK_String);
+ });
+ }))
+ return false;
+ if (!verifyEntry(RootMap, "amdhsa.kernels", true,
+ [this](msgpack::Node &Node) {
+ return verifyArray(Node, [this](msgpack::Node &Node) {
+ return verifyKernel(Node);
+ });
+ }))
+ return false;
+
+ return true;
+}
+
+} // end namespace V3
+} // end namespace HSAMD
+} // end namespace AMDGPU
+} // end namespace llvm
diff --git a/contrib/llvm/lib/BinaryFormat/Dwarf.cpp b/contrib/llvm/lib/BinaryFormat/Dwarf.cpp
index 5984de73ae63..46f8056774b7 100644
--- a/contrib/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/contrib/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -13,6 +13,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -300,7 +301,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
switch (Language) {
default:
return StringRef();
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
case DW_LANG_##NAME: \
return "DW_LANG_" #NAME;
#include "llvm/BinaryFormat/Dwarf.def"
@@ -309,7 +310,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
unsigned llvm::dwarf::getLanguage(StringRef LanguageString) {
return StringSwitch<unsigned>(LanguageString)
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
.Case("DW_LANG_" #NAME, DW_LANG_##NAME)
#include "llvm/BinaryFormat/Dwarf.def"
.Default(0);
@@ -319,7 +320,7 @@ unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) {
switch (Lang) {
default:
return 0;
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
case DW_LANG_##NAME: \
return VERSION;
#include "llvm/BinaryFormat/Dwarf.def"
@@ -330,13 +331,24 @@ unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) {
switch (Lang) {
default:
return 0;
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
case DW_LANG_##NAME: \
return DWARF_VENDOR_##VENDOR;
#include "llvm/BinaryFormat/Dwarf.def"
}
}
+Optional<unsigned> llvm::dwarf::LanguageLowerBound(dwarf::SourceLanguage Lang) {
+ switch (Lang) {
+ default:
+ return None;
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
+ case DW_LANG_##NAME: \
+ return LOWER_BOUND;
+#include "llvm/BinaryFormat/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::CaseString(unsigned Case) {
switch (Case) {
case DW_ID_case_sensitive:
@@ -455,14 +467,32 @@ StringRef llvm::dwarf::RangeListEncodingString(unsigned Encoding) {
}
}
-StringRef llvm::dwarf::CallFrameString(unsigned Encoding) {
+StringRef llvm::dwarf::CallFrameString(unsigned Encoding,
+ Triple::ArchType Arch) {
+ assert(Arch != llvm::Triple::ArchType::UnknownArch);
+#define SELECT_AARCH64 (Arch == llvm::Triple::aarch64_be || Arch == llvm::Triple::aarch64)
+#define SELECT_MIPS64 Arch == llvm::Triple::mips64
+#define SELECT_SPARC (Arch == llvm::Triple::sparc || Arch == llvm::Triple::sparcv9)
+#define SELECT_X86 (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
+#define HANDLE_DW_CFA(ID, NAME)
+#define HANDLE_DW_CFA_PRED(ID, NAME, PRED) \
+ if (ID == Encoding && PRED) \
+ return "DW_CFA_" #NAME;
+#include "llvm/BinaryFormat/Dwarf.def"
+
switch (Encoding) {
default:
return StringRef();
+#define HANDLE_DW_CFA_PRED(ID, NAME, PRED)
#define HANDLE_DW_CFA(ID, NAME) \
case DW_CFA_##NAME: \
return "DW_CFA_" #NAME;
#include "llvm/BinaryFormat/Dwarf.def"
+
+#undef SELECT_X86
+#undef SELECT_SPARC
+#undef SELECT_MIPS64
+#undef SELECT_AARCH64
}
}
diff --git a/contrib/llvm/lib/BinaryFormat/Magic.cpp b/contrib/llvm/lib/BinaryFormat/Magic.cpp
index 5a339583fca1..78efa6ec87be 100644
--- a/contrib/llvm/lib/BinaryFormat/Magic.cpp
+++ b/contrib/llvm/lib/BinaryFormat/Magic.cpp
@@ -206,7 +206,7 @@ file_magic llvm::identify_magic(StringRef Magic) {
}
std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
- auto FileOrError = MemoryBuffer::getFile(Path);
+ auto FileOrError = MemoryBuffer::getFile(Path, -1LL, false);
if (!FileOrError)
return FileOrError.getError();
diff --git a/contrib/llvm/lib/BinaryFormat/MsgPackReader.cpp b/contrib/llvm/lib/BinaryFormat/MsgPackReader.cpp
new file mode 100644
index 000000000000..b510fdba9608
--- /dev/null
+++ b/contrib/llvm/lib/BinaryFormat/MsgPackReader.cpp
@@ -0,0 +1,255 @@
+//===- MsgPackReader.cpp - Simple MsgPack reader ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements a MessagePack reader.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackReader.h"
+#include "llvm/BinaryFormat/MsgPack.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace msgpack;
+
+Reader::Reader(MemoryBufferRef InputBuffer)
+ : InputBuffer(InputBuffer), Current(InputBuffer.getBufferStart()),
+ End(InputBuffer.getBufferEnd()) {}
+
+Reader::Reader(StringRef Input) : Reader({Input, "MsgPack"}) {}
+
+Expected<bool> Reader::read(Object &Obj) {
+ if (Current == End)
+ return false;
+
+ uint8_t FB = static_cast<uint8_t>(*Current++);
+
+ switch (FB) {
+ case FirstByte::Nil:
+ Obj.Kind = Type::Nil;
+ return true;
+ case FirstByte::True:
+ Obj.Kind = Type::Boolean;
+ Obj.Bool = true;
+ return true;
+ case FirstByte::False:
+ Obj.Kind = Type::Boolean;
+ Obj.Bool = false;
+ return true;
+ case FirstByte::Int8:
+ Obj.Kind = Type::Int;
+ return readInt<int8_t>(Obj);
+ case FirstByte::Int16:
+ Obj.Kind = Type::Int;
+ return readInt<int16_t>(Obj);
+ case FirstByte::Int32:
+ Obj.Kind = Type::Int;
+ return readInt<int32_t>(Obj);
+ case FirstByte::Int64:
+ Obj.Kind = Type::Int;
+ return readInt<int64_t>(Obj);
+ case FirstByte::UInt8:
+ Obj.Kind = Type::UInt;
+ return readUInt<uint8_t>(Obj);
+ case FirstByte::UInt16:
+ Obj.Kind = Type::UInt;
+ return readUInt<uint16_t>(Obj);
+ case FirstByte::UInt32:
+ Obj.Kind = Type::UInt;
+ return readUInt<uint32_t>(Obj);
+ case FirstByte::UInt64:
+ Obj.Kind = Type::UInt;
+ return readUInt<uint64_t>(Obj);
+ case FirstByte::Float32:
+ Obj.Kind = Type::Float;
+ if (sizeof(float) > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Float32 with insufficient payload",
+ std::make_error_code(std::errc::invalid_argument));
+ Obj.Float = BitsToFloat(endian::read<uint32_t, Endianness>(Current));
+ Current += sizeof(float);
+ return true;
+ case FirstByte::Float64:
+ Obj.Kind = Type::Float;
+ if (sizeof(double) > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Float64 with insufficient payload",
+ std::make_error_code(std::errc::invalid_argument));
+ Obj.Float = BitsToDouble(endian::read<uint64_t, Endianness>(Current));
+ Current += sizeof(double);
+ return true;
+ case FirstByte::Str8:
+ Obj.Kind = Type::String;
+ return readRaw<uint8_t>(Obj);
+ case FirstByte::Str16:
+ Obj.Kind = Type::String;
+ return readRaw<uint16_t>(Obj);
+ case FirstByte::Str32:
+ Obj.Kind = Type::String;
+ return readRaw<uint32_t>(Obj);
+ case FirstByte::Bin8:
+ Obj.Kind = Type::Binary;
+ return readRaw<uint8_t>(Obj);
+ case FirstByte::Bin16:
+ Obj.Kind = Type::Binary;
+ return readRaw<uint16_t>(Obj);
+ case FirstByte::Bin32:
+ Obj.Kind = Type::Binary;
+ return readRaw<uint32_t>(Obj);
+ case FirstByte::Array16:
+ Obj.Kind = Type::Array;
+ return readLength<uint16_t>(Obj);
+ case FirstByte::Array32:
+ Obj.Kind = Type::Array;
+ return readLength<uint32_t>(Obj);
+ case FirstByte::Map16:
+ Obj.Kind = Type::Map;
+ return readLength<uint16_t>(Obj);
+ case FirstByte::Map32:
+ Obj.Kind = Type::Map;
+ return readLength<uint32_t>(Obj);
+ case FirstByte::FixExt1:
+ Obj.Kind = Type::Extension;
+ return createExt(Obj, FixLen::Ext1);
+ case FirstByte::FixExt2:
+ Obj.Kind = Type::Extension;
+ return createExt(Obj, FixLen::Ext2);
+ case FirstByte::FixExt4:
+ Obj.Kind = Type::Extension;
+ return createExt(Obj, FixLen::Ext4);
+ case FirstByte::FixExt8:
+ Obj.Kind = Type::Extension;
+ return createExt(Obj, FixLen::Ext8);
+ case FirstByte::FixExt16:
+ Obj.Kind = Type::Extension;
+ return createExt(Obj, FixLen::Ext16);
+ case FirstByte::Ext8:
+ Obj.Kind = Type::Extension;
+ return readExt<uint8_t>(Obj);
+ case FirstByte::Ext16:
+ Obj.Kind = Type::Extension;
+ return readExt<uint16_t>(Obj);
+ case FirstByte::Ext32:
+ Obj.Kind = Type::Extension;
+ return readExt<uint32_t>(Obj);
+ }
+
+ if ((FB & FixBitsMask::NegativeInt) == FixBits::NegativeInt) {
+ Obj.Kind = Type::Int;
+ int8_t I;
+ static_assert(sizeof(I) == sizeof(FB), "Unexpected type sizes");
+ memcpy(&I, &FB, sizeof(FB));
+ Obj.Int = I;
+ return true;
+ }
+
+ if ((FB & FixBitsMask::PositiveInt) == FixBits::PositiveInt) {
+ Obj.Kind = Type::UInt;
+ Obj.UInt = FB;
+ return true;
+ }
+
+ if ((FB & FixBitsMask::String) == FixBits::String) {
+ Obj.Kind = Type::String;
+ uint8_t Size = FB & ~FixBitsMask::String;
+ return createRaw(Obj, Size);
+ }
+
+ if ((FB & FixBitsMask::Array) == FixBits::Array) {
+ Obj.Kind = Type::Array;
+ Obj.Length = FB & ~FixBitsMask::Array;
+ return true;
+ }
+
+ if ((FB & FixBitsMask::Map) == FixBits::Map) {
+ Obj.Kind = Type::Map;
+ Obj.Length = FB & ~FixBitsMask::Map;
+ return true;
+ }
+
+ return make_error<StringError>(
+ "Invalid first byte", std::make_error_code(std::errc::invalid_argument));
+}
+
+template <class T> Expected<bool> Reader::readRaw(Object &Obj) {
+ if (sizeof(T) > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Raw with insufficient payload",
+ std::make_error_code(std::errc::invalid_argument));
+ T Size = endian::read<T, Endianness>(Current);
+ Current += sizeof(T);
+ return createRaw(Obj, Size);
+}
+
+template <class T> Expected<bool> Reader::readInt(Object &Obj) {
+ if (sizeof(T) > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Int with insufficient payload",
+ std::make_error_code(std::errc::invalid_argument));
+ Obj.Int = static_cast<int64_t>(endian::read<T, Endianness>(Current));
+ Current += sizeof(T);
+ return true;
+}
+
+template <class T> Expected<bool> Reader::readUInt(Object &Obj) {
+ if (sizeof(T) > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Int with insufficient payload",
+ std::make_error_code(std::errc::invalid_argument));
+ Obj.UInt = static_cast<uint64_t>(endian::read<T, Endianness>(Current));
+ Current += sizeof(T);
+ return true;
+}
+
+template <class T> Expected<bool> Reader::readLength(Object &Obj) {
+ if (sizeof(T) > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Map/Array with invalid length",
+ std::make_error_code(std::errc::invalid_argument));
+ Obj.Length = static_cast<size_t>(endian::read<T, Endianness>(Current));
+ Current += sizeof(T);
+ return true;
+}
+
+template <class T> Expected<bool> Reader::readExt(Object &Obj) {
+ if (sizeof(T) > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Ext with invalid length",
+ std::make_error_code(std::errc::invalid_argument));
+ T Size = endian::read<T, Endianness>(Current);
+ Current += sizeof(T);
+ return createExt(Obj, Size);
+}
+
+Expected<bool> Reader::createRaw(Object &Obj, uint32_t Size) {
+ if (Size > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Raw with insufficient payload",
+ std::make_error_code(std::errc::invalid_argument));
+ Obj.Raw = StringRef(Current, Size);
+ Current += Size;
+ return true;
+}
+
+Expected<bool> Reader::createExt(Object &Obj, uint32_t Size) {
+ if (Current == End)
+ return make_error<StringError>(
+ "Invalid Ext with no type",
+ std::make_error_code(std::errc::invalid_argument));
+ Obj.Extension.Type = *Current++;
+ if (Size > remainingSpace())
+ return make_error<StringError>(
+ "Invalid Ext with insufficient payload",
+ std::make_error_code(std::errc::invalid_argument));
+ Obj.Extension.Bytes = StringRef(Current, Size);
+ Current += Size;
+ return true;
+}
diff --git a/contrib/llvm/lib/BinaryFormat/MsgPackTypes.cpp b/contrib/llvm/lib/BinaryFormat/MsgPackTypes.cpp
new file mode 100644
index 000000000000..4a8f70b10fb8
--- /dev/null
+++ b/contrib/llvm/lib/BinaryFormat/MsgPackTypes.cpp
@@ -0,0 +1,303 @@
+//===- MsgPackTypes.cpp - MsgPack Types -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implementation of types representing MessagePack "documents".
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackTypes.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+namespace llvm {
+namespace msgpack {
+void ScalarNode::anchor() {}
+void ArrayNode::anchor() {}
+void MapNode::anchor() {}
+}
+}
+
+Expected<OptNodePtr> Node::readArray(Reader &MPReader, size_t Length) {
+ auto A = std::make_shared<ArrayNode>();
+ for (size_t I = 0; I < Length; ++I) {
+ auto OptNodeOrErr = Node::read(MPReader);
+ if (auto Err = OptNodeOrErr.takeError())
+ return std::move(Err);
+ if (!*OptNodeOrErr)
+ return make_error<StringError>(
+ "Insufficient array elements",
+ std::make_error_code(std::errc::invalid_argument));
+ A->push_back(std::move(**OptNodeOrErr));
+ }
+ return OptNodePtr(std::move(A));
+}
+
+Expected<OptNodePtr> Node::readMap(Reader &MPReader, size_t Length) {
+ auto M = std::make_shared<MapNode>();
+ for (size_t I = 0; I < Length; ++I) {
+ auto OptKeyOrErr = Node::read(MPReader);
+ if (auto Err = OptKeyOrErr.takeError())
+ return std::move(Err);
+ if (!*OptKeyOrErr)
+ return make_error<StringError>(
+ "Insufficient map elements",
+ std::make_error_code(std::errc::invalid_argument));
+ auto OptValOrErr = Node::read(MPReader);
+ if (auto Err = OptValOrErr.takeError())
+ return std::move(Err);
+ if (!*OptValOrErr)
+ return make_error<StringError>(
+ "Insufficient map elements",
+ std::make_error_code(std::errc::invalid_argument));
+ auto *Key = dyn_cast<ScalarNode>((*OptKeyOrErr)->get());
+ if (!Key)
+ return make_error<StringError>(
+ "Only string map keys are supported",
+ std::make_error_code(std::errc::invalid_argument));
+ if (Key->getScalarKind() != ScalarNode::SK_String)
+ return make_error<StringError>(
+ "Only string map keys are supported",
+ std::make_error_code(std::errc::invalid_argument));
+ M->try_emplace(Key->getString(), std::move(**OptValOrErr));
+ }
+ return OptNodePtr(std::move(M));
+}
+
+Expected<OptNodePtr> Node::read(Reader &MPReader) {
+ Object Obj;
+
+ auto ContinueOrErr = MPReader.read(Obj);
+ if (auto Err = ContinueOrErr.takeError())
+ return std::move(Err);
+ if (!*ContinueOrErr)
+ return None;
+
+ switch (Obj.Kind) {
+ case Type::Int:
+ return OptNodePtr(std::make_shared<ScalarNode>(Obj.Int));
+ case Type::UInt:
+ return OptNodePtr(std::make_shared<ScalarNode>(Obj.UInt));
+ case Type::Nil:
+ return OptNodePtr(std::make_shared<ScalarNode>());
+ case Type::Boolean:
+ return OptNodePtr(std::make_shared<ScalarNode>(Obj.Bool));
+ case Type::Float:
+ return OptNodePtr(std::make_shared<ScalarNode>(Obj.Float));
+ case Type::String:
+ return OptNodePtr(std::make_shared<ScalarNode>(Obj.Raw));
+ case Type::Binary:
+ return OptNodePtr(std::make_shared<ScalarNode>(Obj.Raw));
+ case Type::Array:
+ return Node::readArray(MPReader, Obj.Length);
+ case Type::Map:
+ return Node::readMap(MPReader, Obj.Length);
+ case Type::Extension:
+ return make_error<StringError>(
+ "Extension types are not supported",
+ std::make_error_code(std::errc::invalid_argument));
+ }
+ llvm_unreachable("msgpack::Type not handled");
+}
+
+void ScalarNode::destroy() {
+ switch (SKind) {
+ case SK_String:
+ case SK_Binary:
+ StringValue.~basic_string();
+ break;
+ default:
+ // POD types do not require destruction
+ break;
+ }
+}
+
+ScalarNode::ScalarNode(int64_t IntValue)
+ : Node(NK_Scalar), SKind(SK_Int), IntValue(IntValue) {}
+
+ScalarNode::ScalarNode(int32_t IntValue)
+ : ScalarNode(static_cast<int64_t>(IntValue)) {}
+
+ScalarNode::ScalarNode(uint64_t UIntValue)
+ : Node(NK_Scalar), SKind(SK_UInt), UIntValue(UIntValue) {}
+
+ScalarNode::ScalarNode(uint32_t IntValue)
+ : ScalarNode(static_cast<uint64_t>(IntValue)) {}
+
+ScalarNode::ScalarNode() : Node(NK_Scalar), SKind(SK_Nil) {}
+
+ScalarNode::ScalarNode(bool BoolValue)
+ : Node(NK_Scalar), SKind(SK_Boolean), BoolValue(BoolValue) {}
+
+ScalarNode::ScalarNode(double FloatValue)
+ : Node(NK_Scalar), SKind(SK_Float), BoolValue(FloatValue) {}
+
+ScalarNode::ScalarNode(StringRef StringValue)
+ : Node(NK_Scalar), SKind(SK_String) {
+ new (&this->StringValue) std::string(StringValue);
+}
+
+ScalarNode::ScalarNode(const char *StringValue)
+ : ScalarNode(StringRef(StringValue)) {}
+
+ScalarNode::ScalarNode(std::string &&StringValue)
+ : Node(NK_Scalar), SKind(SK_String) {
+ new (&this->StringValue) std::string(StringValue);
+}
+
+ScalarNode::ScalarNode(MemoryBufferRef BinaryValue)
+ : Node(NK_Scalar), SKind(SK_Binary) {
+ new (&StringValue) std::string(BinaryValue.getBuffer());
+}
+
+ScalarNode::~ScalarNode() { destroy(); }
+
+ScalarNode &ScalarNode::operator=(ScalarNode &&RHS) {
+ destroy();
+ switch (SKind = RHS.SKind) {
+ case SK_Int:
+ IntValue = RHS.IntValue;
+ break;
+ case SK_UInt:
+ UIntValue = RHS.UIntValue;
+ break;
+ case SK_Boolean:
+ BoolValue = RHS.BoolValue;
+ break;
+ case SK_Float:
+ FloatValue = RHS.FloatValue;
+ break;
+ case SK_String:
+ case SK_Binary:
+ new (&StringValue) std::string(std::move(RHS.StringValue));
+ break;
+ case SK_Nil:
+ // pass
+ break;
+ }
+ return *this;
+}
+
+StringRef ScalarNode::inputYAML(StringRef ScalarStr) {
+ switch (SKind) {
+ case SK_Int:
+ return yaml::ScalarTraits<int64_t>::input(ScalarStr, nullptr, IntValue);
+ case SK_UInt:
+ return yaml::ScalarTraits<uint64_t>::input(ScalarStr, nullptr, UIntValue);
+ case SK_Nil:
+ return StringRef();
+ case SK_Boolean:
+ return yaml::ScalarTraits<bool>::input(ScalarStr, nullptr, BoolValue);
+ case SK_Float:
+ return yaml::ScalarTraits<double>::input(ScalarStr, nullptr, FloatValue);
+ case SK_Binary:
+ case SK_String:
+ return yaml::ScalarTraits<std::string>::input(ScalarStr, nullptr,
+ StringValue);
+ }
+ llvm_unreachable("unrecognized ScalarKind");
+}
+
+void ScalarNode::outputYAML(raw_ostream &OS) const {
+ switch (SKind) {
+ case SK_Int:
+ yaml::ScalarTraits<int64_t>::output(IntValue, nullptr, OS);
+ break;
+ case SK_UInt:
+ yaml::ScalarTraits<uint64_t>::output(UIntValue, nullptr, OS);
+ break;
+ case SK_Nil:
+ yaml::ScalarTraits<StringRef>::output("", nullptr, OS);
+ break;
+ case SK_Boolean:
+ yaml::ScalarTraits<bool>::output(BoolValue, nullptr, OS);
+ break;
+ case SK_Float:
+ yaml::ScalarTraits<double>::output(FloatValue, nullptr, OS);
+ break;
+ case SK_Binary:
+ case SK_String:
+ yaml::ScalarTraits<std::string>::output(StringValue, nullptr, OS);
+ break;
+ }
+}
+
+yaml::QuotingType ScalarNode::mustQuoteYAML(StringRef ScalarStr) const {
+ switch (SKind) {
+ case SK_Int:
+ return yaml::ScalarTraits<int64_t>::mustQuote(ScalarStr);
+ case SK_UInt:
+ return yaml::ScalarTraits<uint64_t>::mustQuote(ScalarStr);
+ case SK_Nil:
+ return yaml::ScalarTraits<StringRef>::mustQuote(ScalarStr);
+ case SK_Boolean:
+ return yaml::ScalarTraits<bool>::mustQuote(ScalarStr);
+ case SK_Float:
+ return yaml::ScalarTraits<double>::mustQuote(ScalarStr);
+ case SK_Binary:
+ case SK_String:
+ return yaml::ScalarTraits<std::string>::mustQuote(ScalarStr);
+ }
+ llvm_unreachable("unrecognized ScalarKind");
+}
+
+const char *ScalarNode::IntTag = "!int";
+const char *ScalarNode::NilTag = "!nil";
+const char *ScalarNode::BooleanTag = "!bool";
+const char *ScalarNode::FloatTag = "!float";
+const char *ScalarNode::StringTag = "!str";
+const char *ScalarNode::BinaryTag = "!bin";
+
+StringRef ScalarNode::getYAMLTag() const {
+ switch (SKind) {
+ case SK_Int:
+ return IntTag;
+ case SK_UInt:
+ return IntTag;
+ case SK_Nil:
+ return NilTag;
+ case SK_Boolean:
+ return BooleanTag;
+ case SK_Float:
+ return FloatTag;
+ case SK_String:
+ return StringTag;
+ case SK_Binary:
+ return BinaryTag;
+ }
+ llvm_unreachable("unrecognized ScalarKind");
+}
+
+void ScalarNode::write(Writer &MPWriter) {
+ switch (SKind) {
+ case SK_Int:
+ MPWriter.write(IntValue);
+ break;
+ case SK_UInt:
+ MPWriter.write(UIntValue);
+ break;
+ case SK_Nil:
+ MPWriter.writeNil();
+ break;
+ case SK_Boolean:
+ MPWriter.write(BoolValue);
+ break;
+ case SK_Float:
+ MPWriter.write(FloatValue);
+ break;
+ case SK_String:
+ MPWriter.write(StringValue);
+ break;
+ case SK_Binary:
+ MPWriter.write(MemoryBufferRef(StringValue, ""));
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/BinaryFormat/MsgPackWriter.cpp b/contrib/llvm/lib/BinaryFormat/MsgPackWriter.cpp
new file mode 100644
index 000000000000..d024bb0fcdb2
--- /dev/null
+++ b/contrib/llvm/lib/BinaryFormat/MsgPackWriter.cpp
@@ -0,0 +1,209 @@
+//===- MsgPackWriter.cpp - Simple MsgPack writer ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements a MessagePack writer.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackWriter.h"
+#include "llvm/BinaryFormat/MsgPack.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+Writer::Writer(raw_ostream &OS, bool Compatible)
+ : EW(OS, Endianness), Compatible(Compatible) {}
+
+void Writer::writeNil() { EW.write(FirstByte::Nil); }
+
+void Writer::write(bool b) { EW.write(b ? FirstByte::True : FirstByte::False); }
+
+void Writer::write(int64_t i) {
+ if (i >= 0) {
+ write(static_cast<uint64_t>(i));
+ return;
+ }
+
+ if (i >= FixMin::NegativeInt) {
+ EW.write(static_cast<int8_t>(i));
+ return;
+ }
+
+ if (i >= INT8_MIN) {
+ EW.write(FirstByte::Int8);
+ EW.write(static_cast<int8_t>(i));
+ return;
+ }
+
+ if (i >= INT16_MIN) {
+ EW.write(FirstByte::Int16);
+ EW.write(static_cast<int16_t>(i));
+ return;
+ }
+
+ if (i >= INT32_MIN) {
+ EW.write(FirstByte::Int32);
+ EW.write(static_cast<int32_t>(i));
+ return;
+ }
+
+ EW.write(FirstByte::Int64);
+ EW.write(i);
+}
+
+void Writer::write(uint64_t u) {
+ if (u <= FixMax::PositiveInt) {
+ EW.write(static_cast<uint8_t>(u));
+ return;
+ }
+
+ if (u <= UINT8_MAX) {
+ EW.write(FirstByte::UInt8);
+ EW.write(static_cast<uint8_t>(u));
+ return;
+ }
+
+ if (u <= UINT16_MAX) {
+ EW.write(FirstByte::UInt16);
+ EW.write(static_cast<uint16_t>(u));
+ return;
+ }
+
+ if (u <= UINT32_MAX) {
+ EW.write(FirstByte::UInt32);
+ EW.write(static_cast<uint32_t>(u));
+ return;
+ }
+
+ EW.write(FirstByte::UInt64);
+ EW.write(u);
+}
+
+void Writer::write(double d) {
+ // If no loss of precision, encode as a Float32.
+ double a = std::fabs(d);
+ if (a >= std::numeric_limits<float>::min() &&
+ a <= std::numeric_limits<float>::max()) {
+ EW.write(FirstByte::Float32);
+ EW.write(static_cast<float>(d));
+ } else {
+ EW.write(FirstByte::Float64);
+ EW.write(d);
+ }
+}
+
+void Writer::write(StringRef s) {
+ size_t Size = s.size();
+
+ if (Size <= FixMax::String)
+ EW.write(static_cast<uint8_t>(FixBits::String | Size));
+ else if (!Compatible && Size <= UINT8_MAX) {
+ EW.write(FirstByte::Str8);
+ EW.write(static_cast<uint8_t>(Size));
+ } else if (Size <= UINT16_MAX) {
+ EW.write(FirstByte::Str16);
+ EW.write(static_cast<uint16_t>(Size));
+ } else {
+ assert(Size <= UINT32_MAX && "String object too long to be encoded");
+ EW.write(FirstByte::Str32);
+ EW.write(static_cast<uint32_t>(Size));
+ }
+
+ EW.OS << s;
+}
+
+void Writer::write(MemoryBufferRef Buffer) {
+ assert(!Compatible && "Attempt to write Bin format in compatible mode");
+
+ size_t Size = Buffer.getBufferSize();
+
+ if (Size <= UINT8_MAX) {
+ EW.write(FirstByte::Bin8);
+ EW.write(static_cast<uint8_t>(Size));
+ } else if (Size <= UINT16_MAX) {
+ EW.write(FirstByte::Bin16);
+ EW.write(static_cast<uint16_t>(Size));
+ } else {
+ assert(Size <= UINT32_MAX && "Binary object too long to be encoded");
+ EW.write(FirstByte::Bin32);
+ EW.write(static_cast<uint32_t>(Size));
+ }
+
+ EW.OS.write(Buffer.getBufferStart(), Size);
+}
+
+void Writer::writeArraySize(uint32_t Size) {
+ if (Size <= FixMax::Array) {
+ EW.write(static_cast<uint8_t>(FixBits::Array | Size));
+ return;
+ }
+
+ if (Size <= UINT16_MAX) {
+ EW.write(FirstByte::Array16);
+ EW.write(static_cast<uint16_t>(Size));
+ return;
+ }
+
+ EW.write(FirstByte::Array32);
+ EW.write(Size);
+}
+
+void Writer::writeMapSize(uint32_t Size) {
+ if (Size <= FixMax::Map) {
+ EW.write(static_cast<uint8_t>(FixBits::Map | Size));
+ return;
+ }
+
+ if (Size <= UINT16_MAX) {
+ EW.write(FirstByte::Map16);
+ EW.write(static_cast<uint16_t>(Size));
+ return;
+ }
+
+ EW.write(FirstByte::Map32);
+ EW.write(Size);
+}
+
+void Writer::writeExt(int8_t Type, MemoryBufferRef Buffer) {
+ size_t Size = Buffer.getBufferSize();
+
+ switch (Size) {
+ case FixLen::Ext1:
+ EW.write(FirstByte::FixExt1);
+ break;
+ case FixLen::Ext2:
+ EW.write(FirstByte::FixExt2);
+ break;
+ case FixLen::Ext4:
+ EW.write(FirstByte::FixExt4);
+ break;
+ case FixLen::Ext8:
+ EW.write(FirstByte::FixExt8);
+ break;
+ case FixLen::Ext16:
+ EW.write(FirstByte::FixExt16);
+ break;
+ default:
+ if (Size <= UINT8_MAX) {
+ EW.write(FirstByte::Ext8);
+ EW.write(static_cast<uint8_t>(Size));
+ } else if (Size <= UINT16_MAX) {
+ EW.write(FirstByte::Ext16);
+ EW.write(static_cast<uint16_t>(Size));
+ } else {
+ assert(Size <= UINT32_MAX && "Ext size too large to be encoded");
+ EW.write(FirstByte::Ext32);
+ EW.write(static_cast<uint32_t>(Size));
+ }
+ }
+
+ EW.write(Type);
+ EW.OS.write(Buffer.getBufferStart(), Size);
+}
diff --git a/contrib/llvm/lib/BinaryFormat/Wasm.cpp b/contrib/llvm/lib/BinaryFormat/Wasm.cpp
index 35360d0ae4f0..94d40bf02a39 100644
--- a/contrib/llvm/lib/BinaryFormat/Wasm.cpp
+++ b/contrib/llvm/lib/BinaryFormat/Wasm.cpp
@@ -19,13 +19,17 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType type) {
return "WASM_SYMBOL_TYPE_DATA";
case wasm::WASM_SYMBOL_TYPE_SECTION:
return "WASM_SYMBOL_TYPE_SECTION";
+ case wasm::WASM_SYMBOL_TYPE_EVENT:
+ return "WASM_SYMBOL_TYPE_EVENT";
}
llvm_unreachable("unknown symbol type");
}
std::string llvm::wasm::relocTypetoString(uint32_t type) {
switch (type) {
-#define WASM_RELOC(NAME, VALUE) case VALUE: return #NAME;
+#define WASM_RELOC(NAME, VALUE) \
+ case VALUE: \
+ return #NAME;
#include "llvm/BinaryFormat/WasmRelocs.def"
#undef WASM_RELOC
default:
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index c45b441238bc..fe051e7a9125 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -876,6 +876,7 @@ static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) {
Flags.ReadOnly = (RawFlags >> 1) & 0x1;
Flags.NoRecurse = (RawFlags >> 2) & 0x1;
Flags.ReturnDoesNotAlias = (RawFlags >> 3) & 0x1;
+ Flags.NoInline = (RawFlags >> 4) & 0x1;
return Flags;
}
@@ -897,6 +898,11 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local);
}
+// Decode the flags for GlobalVariable in the summary
+static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) {
+ return GlobalVarSummary::GVarFlags((RawFlags & 0x1) ? true : false);
+}
+
static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
switch (Val) {
default: // Map unknown visibilities to default.
@@ -963,6 +969,20 @@ static int getDecodedCastOpcode(unsigned Val) {
}
}
+static int getDecodedUnaryOpcode(unsigned Val, Type *Ty) {
+ bool IsFP = Ty->isFPOrFPVectorTy();
+ // UnOps are only valid for int/fp or vector of int/fp types
+ if (!IsFP && !Ty->isIntOrIntVectorTy())
+ return -1;
+
+ switch (Val) {
+ default:
+ return -1;
+ case bitc::UNOP_NEG:
+ return IsFP ? Instruction::FNeg : -1;
+ }
+}
+
static int getDecodedBinaryOpcode(unsigned Val, Type *Ty) {
bool IsFP = Ty->isFPOrFPVectorTy();
// BinOps are only valid for int/fp or vector of int/fp types
@@ -1165,6 +1185,8 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
case Attribute::NoCfCheck: return 1ULL << 57;
case Attribute::OptForFuzzing: return 1ULL << 58;
case Attribute::ShadowCallStack: return 1ULL << 59;
+ case Attribute::SpeculativeLoadHardening:
+ return 1ULL << 60;
case Attribute::Dereferenceable:
llvm_unreachable("dereferenceable attribute not supported in raw format");
break;
@@ -1389,6 +1411,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::SanitizeThread;
case bitc::ATTR_KIND_SANITIZE_MEMORY:
return Attribute::SanitizeMemory;
+ case bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING:
+ return Attribute::SpeculativeLoadHardening;
case bitc::ATTR_KIND_SWIFT_ERROR:
return Attribute::SwiftError;
case bitc::ATTR_KIND_SWIFT_SELF:
@@ -2312,6 +2336,19 @@ Error BitcodeReader::parseConstants() {
}
break;
}
+ case bitc::CST_CODE_CE_UNOP: { // CE_UNOP: [opcode, opval]
+ if (Record.size() < 2)
+ return error("Invalid record");
+ int Opc = getDecodedUnaryOpcode(Record[0], CurTy);
+ if (Opc < 0) {
+ V = UndefValue::get(CurTy); // Unknown unop.
+ } else {
+ Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
+ unsigned Flags = 0;
+ V = ConstantExpr::get(Opc, LHS, Flags);
+ }
+ break;
+ }
case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval]
if (Record.size() < 3)
return error("Invalid record");
@@ -2938,7 +2975,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
// v1: [type, callingconv, isproto, linkage, paramattr, alignment, section,
// visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat,
- // prefixdata, personalityfn, preemption specifier] (name in VST)
+ // prefixdata, personalityfn, preemption specifier, addrspace] (name in VST)
// v2: [strtab_offset, strtab_size, v1]
StringRef Name;
std::tie(Name, Record) = readNameFromStrtab(Record);
@@ -2957,8 +2994,12 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
if (CC & ~CallingConv::MaxID)
return error("Invalid calling convention ID");
- Function *Func =
- Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule);
+ unsigned AddrSpace = TheModule->getDataLayout().getProgramAddressSpace();
+ if (Record.size() > 16)
+ AddrSpace = Record[16];
+
+ Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ AddrSpace, Name, TheModule);
Func->setCallingConv(CC);
bool isProto = Record[2];
@@ -3508,24 +3549,47 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned Line = Record[0], Col = Record[1];
unsigned ScopeID = Record[2], IAID = Record[3];
+ bool isImplicitCode = Record.size() == 5 && Record[4];
MDNode *Scope = nullptr, *IA = nullptr;
if (ScopeID) {
- Scope = MDLoader->getMDNodeFwdRefOrNull(ScopeID - 1);
+ Scope = dyn_cast_or_null<MDNode>(
+ MDLoader->getMetadataFwdRefOrLoad(ScopeID - 1));
if (!Scope)
return error("Invalid record");
}
if (IAID) {
- IA = MDLoader->getMDNodeFwdRefOrNull(IAID - 1);
+ IA = dyn_cast_or_null<MDNode>(
+ MDLoader->getMetadataFwdRefOrLoad(IAID - 1));
if (!IA)
return error("Invalid record");
}
- LastLoc = DebugLoc::get(Line, Col, Scope, IA);
+ LastLoc = DebugLoc::get(Line, Col, Scope, IA, isImplicitCode);
I->setDebugLoc(LastLoc);
I = nullptr;
continue;
}
+ case bitc::FUNC_CODE_INST_UNOP: { // UNOP: [opval, ty, opcode]
+ unsigned OpNum = 0;
+ Value *LHS;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ OpNum+1 > Record.size())
+ return error("Invalid record");
+ int Opc = getDecodedUnaryOpcode(Record[OpNum++], LHS->getType());
+ if (Opc == -1)
+ return error("Invalid record");
+ I = UnaryOperator::Create((Instruction::UnaryOps)Opc, LHS);
+ InstructionList.push_back(I);
+ if (OpNum < Record.size()) {
+ if (isa<FPMathOperator>(I)) {
+ FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
+ if (FMF.any())
+ I->setFastMathFlags(FMF);
+ }
+ }
+ break;
+ }
case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode]
unsigned OpNum = 0;
Value *LHS, *RHS;
@@ -3656,16 +3720,16 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("EXTRACTVAL: Invalid type");
if ((unsigned)Index != Index)
return error("Invalid value");
- if (IsStruct && Index >= CurTy->subtypes().size())
+ if (IsStruct && Index >= CurTy->getStructNumElements())
return error("EXTRACTVAL: Invalid struct index");
if (IsArray && Index >= CurTy->getArrayNumElements())
return error("EXTRACTVAL: Invalid array index");
EXTRACTVALIdx.push_back((unsigned)Index);
if (IsStruct)
- CurTy = CurTy->subtypes()[Index];
+ CurTy = CurTy->getStructElementType(Index);
else
- CurTy = CurTy->subtypes()[0];
+ CurTy = CurTy->getArrayElementType();
}
I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
@@ -3698,16 +3762,16 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("INSERTVAL: Invalid type");
if ((unsigned)Index != Index)
return error("Invalid value");
- if (IsStruct && Index >= CurTy->subtypes().size())
+ if (IsStruct && Index >= CurTy->getStructNumElements())
return error("INSERTVAL: Invalid struct index");
if (IsArray && Index >= CurTy->getArrayNumElements())
return error("INSERTVAL: Invalid array index");
INSERTVALIdx.push_back((unsigned)Index);
if (IsStruct)
- CurTy = CurTy->subtypes()[Index];
+ CurTy = CurTy->getStructElementType(Index);
else
- CurTy = CurTy->subtypes()[0];
+ CurTy = CurTy->getArrayElementType();
}
if (CurTy != Val->getType())
@@ -4616,7 +4680,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
CurBB->getInstList().push_back(I);
// If this was a terminator instruction, move to the next block.
- if (isa<TerminatorInst>(I)) {
+ if (I->isTerminator()) {
++CurBBNo;
CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : nullptr;
}
@@ -4854,7 +4918,7 @@ void ModuleSummaryIndexBitcodeReader::setValueGUID(
ValueIdToValueInfoMap[ValueID] = std::make_pair(
TheIndex.getOrInsertValueInfo(
ValueGUID,
- UseStrtab ? ValueName : TheIndex.saveString(ValueName.str())),
+ UseStrtab ? ValueName : TheIndex.saveString(ValueName)),
OriginalNameID);
}
@@ -5160,6 +5224,12 @@ static void parseTypeIdSummaryRecord(ArrayRef<uint64_t> Record,
parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId);
}
+static void setImmutableRefs(std::vector<ValueInfo> &Refs, unsigned Count) {
+ // Read-only refs are in the end of the refs list.
+ for (unsigned RefNo = Refs.size() - Count; RefNo < Refs.size(); ++RefNo)
+ Refs[RefNo].setReadOnly();
+}
+
// Eagerly parse the entire summary block. This populates the GlobalValueSummary
// objects in the index.
Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
@@ -5177,9 +5247,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
}
const uint64_t Version = Record[0];
const bool IsOldProfileFormat = Version == 1;
- if (Version < 1 || Version > 4)
+ if (Version < 1 || Version > 6)
return error("Invalid summary version " + Twine(Version) +
- ", 1, 2, 3 or 4 expected");
+ ". Version should be in the range [1-6].");
Record.clear();
// Keep around the last seen summary to be used when we see an optional
@@ -5224,15 +5294,30 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
break;
case bitc::FS_FLAGS: { // [flags]
uint64_t Flags = Record[0];
- // Scan flags (set only on the combined index).
- assert(Flags <= 0x3 && "Unexpected bits in flag");
+ // Scan flags.
+ assert(Flags <= 0x1f && "Unexpected bits in flag");
// 1 bit: WithGlobalValueDeadStripping flag.
+ // Set on combined index only.
if (Flags & 0x1)
TheIndex.setWithGlobalValueDeadStripping();
// 1 bit: SkipModuleByDistributedBackend flag.
+ // Set on combined index only.
if (Flags & 0x2)
TheIndex.setSkipModuleByDistributedBackend();
+ // 1 bit: HasSyntheticEntryCounts flag.
+ // Set on combined index only.
+ if (Flags & 0x4)
+ TheIndex.setHasSyntheticEntryCounts();
+ // 1 bit: DisableSplitLTOUnit flag.
+ // Set on per module indexes. It is up to the client to validate
+ // the consistency of this flag across modules being linked.
+ if (Flags & 0x8)
+ TheIndex.setEnableSplitLTOUnit();
+ // 1 bit: PartiallySplitLTOUnits flag.
+ // Set on combined index only.
+ if (Flags & 0x10)
+ TheIndex.setPartiallySplitLTOUnits();
break;
}
case bitc::FS_VALUE_GUID: { // [valueid, refguid]
@@ -5258,11 +5343,16 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
unsigned InstCount = Record[2];
uint64_t RawFunFlags = 0;
unsigned NumRefs = Record[3];
+ unsigned NumImmutableRefs = 0;
int RefListStartIndex = 4;
if (Version >= 4) {
RawFunFlags = Record[3];
NumRefs = Record[4];
RefListStartIndex = 5;
+ if (Version >= 5) {
+ NumImmutableRefs = Record[5];
+ RefListStartIndex = 6;
+ }
}
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
@@ -5281,9 +5371,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
std::vector<FunctionSummary::EdgeTy> Calls = makeCallList(
ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
IsOldProfileFormat, HasProfile, HasRelBF);
+ setImmutableRefs(Refs, NumImmutableRefs);
auto FS = llvm::make_unique<FunctionSummary>(
- Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
- std::move(Calls), std::move(PendingTypeTests),
+ Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
+ std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
std::move(PendingTypeTestAssumeVCalls),
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
@@ -5329,14 +5420,21 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
TheIndex.addGlobalValueSummary(GUID.first, std::move(AS));
break;
}
- // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid]
+ // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags, n x valueid]
case bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS: {
unsigned ValueID = Record[0];
uint64_t RawFlags = Record[1];
+ unsigned RefArrayStart = 2;
+ GlobalVarSummary::GVarFlags GVF;
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
+ if (Version >= 5) {
+ GVF = getDecodedGVarFlags(Record[2]);
+ RefArrayStart = 3;
+ }
std::vector<ValueInfo> Refs =
- makeRefList(ArrayRef<uint64_t>(Record).slice(2));
- auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
+ makeRefList(ArrayRef<uint64_t>(Record).slice(RefArrayStart));
+ auto FS =
+ llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
FS->setModulePath(getThisModule()->first());
auto GUID = getValueInfoFromValueId(ValueID);
FS->setOriginalName(GUID.second);
@@ -5354,13 +5452,25 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
uint64_t RawFlags = Record[2];
unsigned InstCount = Record[3];
uint64_t RawFunFlags = 0;
+ uint64_t EntryCount = 0;
unsigned NumRefs = Record[4];
+ unsigned NumImmutableRefs = 0;
int RefListStartIndex = 5;
if (Version >= 4) {
RawFunFlags = Record[4];
- NumRefs = Record[5];
RefListStartIndex = 6;
+ size_t NumRefsIndex = 5;
+ if (Version >= 5) {
+ RefListStartIndex = 7;
+ if (Version >= 6) {
+ NumRefsIndex = 6;
+ EntryCount = Record[5];
+ RefListStartIndex = 8;
+ }
+ NumImmutableRefs = Record[RefListStartIndex - 1];
+ }
+ NumRefs = Record[NumRefsIndex];
}
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
@@ -5374,9 +5484,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
IsOldProfileFormat, HasProfile, false);
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
+ setImmutableRefs(Refs, NumImmutableRefs);
auto FS = llvm::make_unique<FunctionSummary>(
- Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
- std::move(Edges), std::move(PendingTypeTests),
+ Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
+ std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
std::move(PendingTypeTestAssumeVCalls),
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
@@ -5422,10 +5533,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
unsigned ValueID = Record[0];
uint64_t ModuleId = Record[1];
uint64_t RawFlags = Record[2];
+ unsigned RefArrayStart = 3;
+ GlobalVarSummary::GVarFlags GVF;
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
+ if (Version >= 5) {
+ GVF = getDecodedGVarFlags(Record[3]);
+ RefArrayStart = 4;
+ }
std::vector<ValueInfo> Refs =
- makeRefList(ArrayRef<uint64_t>(Record).slice(3));
- auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
+ makeRefList(ArrayRef<uint64_t>(Record).slice(RefArrayStart));
+ auto FS =
+ llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
LastSeenSummary = FS.get();
FS->setModulePath(ModuleIdMap[ModuleId]);
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
@@ -5811,6 +5929,46 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
return std::move(Index);
}
+static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
+ unsigned ID) {
+ if (Stream.EnterSubBlock(ID))
+ return error("Invalid record");
+ SmallVector<uint64_t, 64> Record;
+
+ while (true) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ // If no flags record found, conservatively return true to mimic
+ // behavior before this flag was added.
+ return true;
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Look for the FS_FLAGS record.
+ Record.clear();
+ auto BitCode = Stream.readRecord(Entry.ID, Record);
+ switch (BitCode) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::FS_FLAGS: { // [flags]
+ uint64_t Flags = Record[0];
+ // Scan flags.
+ assert(Flags <= 0x1f && "Unexpected bits in flag");
+
+ return Flags & 0x8;
+ }
+ }
+ }
+ llvm_unreachable("Exit infinite loop");
+}
+
// Check if the given bitcode buffer contains a global value summary block.
Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
BitstreamCursor Stream(Buffer);
@@ -5826,14 +5984,27 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
- return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false};
+ return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false,
+ /*EnableSplitLTOUnit=*/false};
case BitstreamEntry::SubBlock:
- if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID)
- return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true};
+ if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) {
+ Expected<bool> EnableSplitLTOUnit =
+ getEnableSplitLTOUnitFlag(Stream, Entry.ID);
+ if (!EnableSplitLTOUnit)
+ return EnableSplitLTOUnit.takeError();
+ return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true,
+ *EnableSplitLTOUnit};
+ }
- if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID)
- return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true};
+ if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) {
+ Expected<bool> EnableSplitLTOUnit =
+ getEnableSplitLTOUnitFlag(Stream, Entry.ID);
+ if (!EnableSplitLTOUnit)
+ return EnableSplitLTOUnit.takeError();
+ return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true,
+ *EnableSplitLTOUnit};
+ }
// Ignore other sub-blocks.
if (Stream.SkipBlock())
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 011c41e2cecd..3289aa0acddd 100644
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -650,10 +650,6 @@ public:
return MetadataList.getMetadataFwdRef(ID);
}
- MDNode *getMDNodeFwdRefOrNull(unsigned Idx) {
- return MetadataList.getMDNodeFwdRefOrNull(Idx);
- }
-
DISubprogram *lookupSubprogramForFunction(Function *F) {
return FunctionsWithSPs.lookup(F);
}
@@ -772,7 +768,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
// It is acknowledged by 'TODO: Inherit from Metadata' in the
// NamedMDNode class definition.
MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
- assert(MD && "Invalid record");
+ assert(MD && "Invalid metadata: expect fwd ref to MDNode");
NMD->addOperand(MD);
}
break;
@@ -1049,7 +1045,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
for (unsigned i = 0; i != Size; ++i) {
MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
if (!MD)
- return error("Invalid record");
+ return error("Invalid named metadata: expect fwd ref to MDNode");
NMD->addOperand(MD);
}
break;
@@ -1139,7 +1135,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_LOCATION: {
- if (Record.size() != 5)
+ if (Record.size() != 5 && Record.size() != 6)
return error("Invalid record");
IsDistinct = Record[0];
@@ -1147,8 +1143,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
unsigned Column = Record[2];
Metadata *Scope = getMD(Record[3]);
Metadata *InlinedAt = getMDOrNull(Record[4]);
+ bool ImplicitCode = Record.size() == 6 && Record[5];
MetadataList.assignValue(
- GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)),
+ GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt,
+ ImplicitCode)),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1211,14 +1209,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_BASIC_TYPE: {
- if (Record.size() != 6)
+ if (Record.size() < 6 || Record.size() > 7)
return error("Invalid record");
IsDistinct = Record[0];
+ DINode::DIFlags Flags = (Record.size() > 6) ?
+ static_cast<DINode::DIFlags>(Record[6]) : DINode::FlagZero;
+
MetadataList.assignValue(
GET_OR_DISTINCT(DIBasicType,
(Context, Record[1], getMDString(Record[2]), Record[3],
- Record[4], Record[5])),
+ Record[4], Record[5], Flags)),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1308,7 +1309,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, Tag, Name, File, Line, Scope, BaseType,
SizeInBits, AlignInBits, OffsetInBits, Flags,
Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier));
+ Identifier, Discriminator));
if (!IsNotUsedInTypeRef && Identifier)
MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
@@ -1390,7 +1391,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Record.size() <= 14 ? 0 : Record[14],
Record.size() <= 16 ? true : Record[16],
Record.size() <= 17 ? false : Record[17],
- Record.size() <= 18 ? false : Record[18]);
+ Record.size() <= 18 ? 0 : Record[18],
+ Record.size() <= 19 ? 0 : Record[19]);
MetadataList.assignValue(CU, NextMetadataNo);
NextMetadataNo++;
@@ -1404,20 +1406,43 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Record.size() < 18 || Record.size() > 21)
return error("Invalid record");
- IsDistinct =
- (Record[0] & 1) || Record[8]; // All definitions should be distinct.
+ bool HasSPFlags = Record[0] & 4;
+ DISubprogram::DISPFlags SPFlags =
+ HasSPFlags
+ ? static_cast<DISubprogram::DISPFlags>(Record[9])
+ : DISubprogram::toSPFlags(
+ /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
+ /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11]);
+
+ // All definitions should be distinct.
+ IsDistinct = (Record[0] & 1) || (SPFlags & DISubprogram::SPFlagDefinition);
// Version 1 has a Function as Record[15].
// Version 2 has removed Record[15].
// Version 3 has the Unit as Record[15].
// Version 4 added thisAdjustment.
- bool HasUnit = Record[0] >= 2;
- if (HasUnit && Record.size() < 19)
+ // Version 5 repacked flags into DISPFlags, changing many element numbers.
+ bool HasUnit = Record[0] & 2;
+ if (!HasSPFlags && HasUnit && Record.size() < 19)
return error("Invalid record");
- Metadata *CUorFn = getMDOrNull(Record[15]);
- unsigned Offset = Record.size() >= 19 ? 1 : 0;
- bool HasFn = Offset && !HasUnit;
- bool HasThisAdj = Record.size() >= 20;
- bool HasThrownTypes = Record.size() >= 21;
+ if (HasSPFlags && !HasUnit)
+ return error("Invalid record");
+ // Accommodate older formats.
+ bool HasFn = false;
+ bool HasThisAdj = true;
+ bool HasThrownTypes = true;
+ unsigned OffsetA = 0;
+ unsigned OffsetB = 0;
+ if (!HasSPFlags) {
+ OffsetA = 2;
+ OffsetB = 2;
+ if (Record.size() >= 19) {
+ HasFn = !HasUnit;
+ OffsetB++;
+ }
+ HasThisAdj = Record.size() >= 20;
+ HasThrownTypes = Record.size() >= 21;
+ }
+ Metadata *CUorFn = getMDOrNull(Record[12 + OffsetB]);
DISubprogram *SP = GET_OR_DISTINCT(
DISubprogram,
(Context,
@@ -1427,20 +1452,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[4]), // file
Record[5], // line
getMDOrNull(Record[6]), // type
- Record[7], // isLocal
- Record[8], // isDefinition
- Record[9], // scopeLine
- getDITypeRefOrNull(Record[10]), // containingType
- Record[11], // virtuality
- Record[12], // virtualIndex
- HasThisAdj ? Record[19] : 0, // thisAdjustment
- static_cast<DINode::DIFlags>(Record[13]), // flags
- Record[14], // isOptimized
+ Record[7 + OffsetA], // scopeLine
+ getDITypeRefOrNull(Record[8 + OffsetA]), // containingType
+ Record[10 + OffsetA], // virtualIndex
+ HasThisAdj ? Record[16 + OffsetB] : 0, // thisAdjustment
+ static_cast<DINode::DIFlags>(Record[11 + OffsetA]),// flags
+ SPFlags, // SPFlags
HasUnit ? CUorFn : nullptr, // unit
- getMDOrNull(Record[15 + Offset]), // templateParams
- getMDOrNull(Record[16 + Offset]), // declaration
- getMDOrNull(Record[17 + Offset]), // retainedNodes
- HasThrownTypes ? getMDOrNull(Record[20]) : nullptr // thrownTypes
+ getMDOrNull(Record[13 + OffsetB]), // templateParams
+ getMDOrNull(Record[14 + OffsetB]), // declaration
+ getMDOrNull(Record[15 + OffsetB]), // retainedNodes
+ HasThrownTypes ? getMDOrNull(Record[17 + OffsetB])
+ : nullptr // thrownTypes
));
MetadataList.assignValue(SP, NextMetadataNo);
NextMetadataNo++;
@@ -1557,21 +1580,35 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_GLOBAL_VAR: {
- if (Record.size() < 11 || Record.size() > 12)
+ if (Record.size() < 11 || Record.size() > 13)
return error("Invalid record");
IsDistinct = Record[0] & 1;
unsigned Version = Record[0] >> 1;
- if (Version == 1) {
+ if (Version == 2) {
+ MetadataList.assignValue(
+ GET_OR_DISTINCT(
+ DIGlobalVariable,
+ (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
+ getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
+ getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+ getMDOrNull(Record[9]), getMDOrNull(Record[10]), Record[11])),
+ NextMetadataNo);
+
+ NextMetadataNo++;
+ } else if (Version == 1) {
+ // No upgrade necessary. A null field will be introduced to indicate
+ // that no parameter information is available.
MetadataList.assignValue(
GET_OR_DISTINCT(DIGlobalVariable,
(Context, getMDOrNull(Record[1]),
getMDString(Record[2]), getMDString(Record[3]),
getMDOrNull(Record[4]), Record[5],
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- getMDOrNull(Record[10]), Record[11])),
+ getMDOrNull(Record[10]), nullptr, Record[11])),
NextMetadataNo);
+
NextMetadataNo++;
} else if (Version == 0) {
// Upgrade old metadata, which stored a global variable reference or a
@@ -1602,7 +1639,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, getMDOrNull(Record[1]), getMDString(Record[2]),
getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- getMDOrNull(Record[10]), AlignInBits));
+ getMDOrNull(Record[10]), nullptr, AlignInBits));
DIGlobalVariableExpression *DGVE = nullptr;
if (Attach || Expr)
@@ -1814,7 +1851,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment(
return error("Invalid ID");
MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[I + 1]);
if (!MD)
- return error("Invalid metadata attachment");
+ return error("Invalid metadata attachment: expect fwd ref to MDNode");
GO.addMetadata(K->second, *MD);
}
return Error::success();
@@ -1984,10 +2021,6 @@ Metadata *MetadataLoader::getMetadataFwdRefOrLoad(unsigned Idx) {
return Pimpl->getMetadataFwdRefOrLoad(Idx);
}
-MDNode *MetadataLoader::getMDNodeFwdRefOrNull(unsigned Idx) {
- return Pimpl->getMDNodeFwdRefOrNull(Idx);
-}
-
DISubprogram *MetadataLoader::lookupSubprogramForFunction(Function *F) {
return Pimpl->lookupSubprogramForFunction(F);
}
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
index f23dcc06cc94..07a77a086f32 100644
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
@@ -65,9 +65,7 @@ public:
/// necessary.
Metadata *getMetadataFwdRefOrLoad(unsigned Idx);
- MDNode *getMDNodeFwdRefOrNull(unsigned Idx);
-
- /// Return the DISubprogra metadata for a Function if any, null otherwise.
+ /// Return the DISubprogram metadata for a Function if any, null otherwise.
DISubprogram *lookupSubprogramForFunction(Function *F);
/// Parse a `METADATA_ATTACHMENT` block for a function.
diff --git a/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp b/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp
index 1ab22b5cc3d1..b3945a37408f 100644
--- a/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp
@@ -144,7 +144,7 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
void BitcodeReaderValueList::resolveConstantForwardRefs() {
// Sort the values by-pointer so that they are efficient to look up with a
// binary search.
- llvm::sort(ResolveConstants.begin(), ResolveConstants.end());
+ llvm::sort(ResolveConstants);
SmallVector<Constant *, 64> NewOps;
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 87b47dc354b5..ba4f932e2e6d 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -112,6 +112,8 @@ enum {
// FUNCTION_BLOCK abbrev id's.
FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ FUNCTION_INST_UNOP_ABBREV,
+ FUNCTION_INST_UNOP_FLAGS_ABBREV,
FUNCTION_INST_BINOP_ABBREV,
FUNCTION_INST_BINOP_FLAGS_ABBREV,
FUNCTION_INST_CAST_ABBREV,
@@ -513,6 +515,13 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) {
}
}
+static unsigned getEncodedUnaryOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown binary instruction!");
+ case Instruction::FNeg: return bitc::UNOP_NEG;
+ }
+}
+
static unsigned getEncodedBinaryOpcode(unsigned Opcode) {
switch (Opcode) {
default: llvm_unreachable("Unknown binary instruction!");
@@ -690,6 +699,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_SANITIZE_THREAD;
case Attribute::SanitizeMemory:
return bitc::ATTR_KIND_SANITIZE_MEMORY;
+ case Attribute::SpeculativeLoadHardening:
+ return bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING;
case Attribute::SwiftError:
return bitc::ATTR_KIND_SWIFT_ERROR;
case Attribute::SwiftSelf:
@@ -969,6 +980,7 @@ static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) {
RawFlags |= (Flags.ReadOnly << 1);
RawFlags |= (Flags.NoRecurse << 2);
RawFlags |= (Flags.ReturnDoesNotAlias << 3);
+ RawFlags |= (Flags.NoInline << 4);
return RawFlags;
}
@@ -988,6 +1000,11 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
return RawFlags;
}
+static uint64_t getEncodedGVarFlags(GlobalVarSummary::GVarFlags Flags) {
+ uint64_t RawFlags = Flags.ReadOnly;
+ return RawFlags;
+}
+
static unsigned getEncodedVisibility(const GlobalValue &GV) {
switch (GV.getVisibility()) {
case GlobalValue::DefaultVisibility: return 0;
@@ -1264,7 +1281,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// FUNCTION: [strtab offset, strtab size, type, callingconv, isproto,
// linkage, paramattrs, alignment, section, visibility, gc,
// unnamed_addr, prologuedata, dllstorageclass, comdat,
- // prefixdata, personalityfn, DSO_Local]
+ // prefixdata, personalityfn, DSO_Local, addrspace]
Vals.push_back(addToStrtab(F.getName()));
Vals.push_back(F.getName().size());
Vals.push_back(VE.getTypeID(F.getFunctionType()));
@@ -1287,6 +1304,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
F.hasPersonalityFn() ? (VE.getValueID(F.getPersonalityFn()) + 1) : 0);
Vals.push_back(F.isDSOLocal());
+ Vals.push_back(F.getAddressSpace());
+
unsigned AbbrevToUse = 0;
Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
Vals.clear();
@@ -1399,6 +1418,7 @@ unsigned ModuleBitcodeWriter::createDILocationAbbrev() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
return Stream.EmitAbbrev(std::move(Abbv));
}
@@ -1413,6 +1433,7 @@ void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
Record.push_back(N->getColumn());
Record.push_back(VE.getMetadataID(N->getScope()));
Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt()));
+ Record.push_back(N->isImplicitCode());
Stream.EmitRecord(bitc::METADATA_LOCATION, Record, Abbrev);
Record.clear();
@@ -1486,6 +1507,7 @@ void ModuleBitcodeWriter::writeDIBasicType(const DIBasicType *N,
Record.push_back(N->getSizeInBits());
Record.push_back(N->getAlignInBits());
Record.push_back(N->getEncoding());
+ Record.push_back(N->getFlags());
Stream.EmitRecord(bitc::METADATA_BASIC_TYPE, Record, Abbrev);
Record.clear();
@@ -1602,7 +1624,7 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
Record.push_back(VE.getMetadataOrNullID(N->getMacros().get()));
Record.push_back(N->getSplitDebugInlining());
Record.push_back(N->getDebugInfoForProfiling());
- Record.push_back(N->getGnuPubnames());
+ Record.push_back((unsigned)N->getNameTableKind());
Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
Record.clear();
@@ -1611,22 +1633,20 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
- uint64_t HasUnitFlag = 1 << 1;
- Record.push_back(N->isDistinct() | HasUnitFlag);
+ const uint64_t HasUnitFlag = 1 << 1;
+ const uint64_t HasSPFlagsFlag = 1 << 2;
+ Record.push_back(uint64_t(N->isDistinct()) | HasUnitFlag | HasSPFlagsFlag);
Record.push_back(VE.getMetadataOrNullID(N->getScope()));
Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
Record.push_back(VE.getMetadataOrNullID(N->getFile()));
Record.push_back(N->getLine());
Record.push_back(VE.getMetadataOrNullID(N->getType()));
- Record.push_back(N->isLocalToUnit());
- Record.push_back(N->isDefinition());
Record.push_back(N->getScopeLine());
Record.push_back(VE.getMetadataOrNullID(N->getContainingType()));
- Record.push_back(N->getVirtuality());
+ Record.push_back(N->getSPFlags());
Record.push_back(N->getVirtualIndex());
Record.push_back(N->getFlags());
- Record.push_back(N->isOptimized());
Record.push_back(VE.getMetadataOrNullID(N->getRawUnit()));
Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
@@ -1738,7 +1758,7 @@ void ModuleBitcodeWriter::writeDITemplateValueParameter(
void ModuleBitcodeWriter::writeDIGlobalVariable(
const DIGlobalVariable *N, SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
- const uint64_t Version = 1 << 1;
+ const uint64_t Version = 2 << 1;
Record.push_back((uint64_t)N->isDistinct() | Version);
Record.push_back(VE.getMetadataOrNullID(N->getScope()));
Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
@@ -1748,8 +1768,8 @@ void ModuleBitcodeWriter::writeDIGlobalVariable(
Record.push_back(VE.getMetadataOrNullID(N->getType()));
Record.push_back(N->isLocalToUnit());
Record.push_back(N->isDefinition());
- Record.push_back(/* expr */ 0);
Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
+ Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams()));
Record.push_back(N->getAlignInBits());
Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
@@ -2376,6 +2396,16 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
Record.push_back(Flags);
}
break;
+ case Instruction::FNeg: {
+ assert(CE->getNumOperands() == 1 && "Unknown constant expr!");
+ Code = bitc::CST_CODE_CE_UNOP;
+ Record.push_back(getEncodedUnaryOpcode(CE->getOpcode()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ uint64_t Flags = getOptimizationFlags(CE);
+ if (Flags != 0)
+ Record.push_back(Flags);
+ break;
+ }
case Instruction::GetElementPtr: {
Code = bitc::CST_CODE_CE_GEP;
const auto *GO = cast<GEPOperator>(C);
@@ -2548,7 +2578,19 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
}
}
break;
-
+ case Instruction::FNeg: {
+ Code = bitc::FUNC_CODE_INST_UNOP;
+ if (!pushValueAndType(I.getOperand(0), InstID, Vals))
+ AbbrevToUse = FUNCTION_INST_UNOP_ABBREV;
+ Vals.push_back(getEncodedUnaryOpcode(I.getOpcode()));
+ uint64_t Flags = getOptimizationFlags(&I);
+ if (Flags != 0) {
+ if (AbbrevToUse == FUNCTION_INST_UNOP_ABBREV)
+ AbbrevToUse = FUNCTION_INST_UNOP_FLAGS_ABBREV;
+ Vals.push_back(Flags);
+ }
+ break;
+ }
case Instruction::GetElementPtr: {
Code = bitc::FUNC_CODE_INST_GEP;
AbbrevToUse = FUNCTION_INST_GEP_ABBREV;
@@ -3088,6 +3130,7 @@ void ModuleBitcodeWriter::writeFunction(
Vals.push_back(DL->getColumn());
Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
+ Vals.push_back(DL->isImplicitCode());
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
Vals.clear();
@@ -3208,6 +3251,25 @@ void ModuleBitcodeWriter::writeBlockInfo() {
FUNCTION_INST_LOAD_ABBREV)
llvm_unreachable("Unexpected abbrev ordering!");
}
+ { // INST_UNOP abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
+ FUNCTION_INST_UNOP_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
+ { // INST_UNOP_FLAGS abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // flags
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
+ FUNCTION_INST_UNOP_FLAGS_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
{ // INST_BINOP abbrev for FUNCTION_BLOCK.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
@@ -3353,14 +3415,10 @@ void IndexBitcodeWriter::writeModStrings() {
/// Write the function type metadata related records that need to appear before
/// a function summary entry (whether per-module or combined).
-static void writeFunctionTypeMetadataRecords(
- BitstreamWriter &Stream, FunctionSummary *FS,
- std::set<GlobalValue::GUID> &ReferencedTypeIds) {
- if (!FS->type_tests().empty()) {
+static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
+ FunctionSummary *FS) {
+ if (!FS->type_tests().empty())
Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
- for (auto &TT : FS->type_tests())
- ReferencedTypeIds.insert(TT);
- }
SmallVector<uint64_t, 64> Record;
@@ -3372,7 +3430,6 @@ static void writeFunctionTypeMetadataRecords(
for (auto &VF : VFs) {
Record.push_back(VF.GUID);
Record.push_back(VF.Offset);
- ReferencedTypeIds.insert(VF.GUID);
}
Stream.EmitRecord(Ty, Record);
};
@@ -3387,7 +3444,6 @@ static void writeFunctionTypeMetadataRecords(
for (auto &VC : VCs) {
Record.clear();
Record.push_back(VC.VFunc.GUID);
- ReferencedTypeIds.insert(VC.VFunc.GUID);
Record.push_back(VC.VFunc.Offset);
Record.insert(Record.end(), VC.Args.begin(), VC.Args.end());
Stream.EmitRecord(Ty, Record);
@@ -3400,6 +3456,33 @@ static void writeFunctionTypeMetadataRecords(
FS->type_checked_load_const_vcalls());
}
+/// Collect type IDs from type tests used by function.
+static void
+getReferencedTypeIds(FunctionSummary *FS,
+ std::set<GlobalValue::GUID> &ReferencedTypeIds) {
+ if (!FS->type_tests().empty())
+ for (auto &TT : FS->type_tests())
+ ReferencedTypeIds.insert(TT);
+
+ auto GetReferencedTypesFromVFuncIdVec =
+ [&](ArrayRef<FunctionSummary::VFuncId> VFs) {
+ for (auto &VF : VFs)
+ ReferencedTypeIds.insert(VF.GUID);
+ };
+
+ GetReferencedTypesFromVFuncIdVec(FS->type_test_assume_vcalls());
+ GetReferencedTypesFromVFuncIdVec(FS->type_checked_load_vcalls());
+
+ auto GetReferencedTypesFromConstVCallVec =
+ [&](ArrayRef<FunctionSummary::ConstVCall> VCs) {
+ for (auto &VC : VCs)
+ ReferencedTypeIds.insert(VC.VFunc.GUID);
+ };
+
+ GetReferencedTypesFromConstVCallVec(FS->type_test_assume_const_vcalls());
+ GetReferencedTypesFromConstVCallVec(FS->type_checked_load_const_vcalls());
+}
+
static void writeWholeProgramDevirtResolutionByArg(
SmallVector<uint64_t, 64> &NameVals, const std::vector<uint64_t> &args,
const WholeProgramDevirtResolution::ByArg &ByArg) {
@@ -3453,13 +3536,13 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
- std::set<GlobalValue::GUID> ReferencedTypeIds;
- writeFunctionTypeMetadataRecords(Stream, FS, ReferencedTypeIds);
+ writeFunctionTypeMetadataRecords(Stream, FS);
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
NameVals.push_back(getEncodedFFlags(FS->fflags()));
NameVals.push_back(FS->refs().size());
+ NameVals.push_back(FS->immutableRefCount());
for (auto &RI : FS->refs())
NameVals.push_back(VE.getValueID(RI.getValue()));
@@ -3501,6 +3584,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
NameVals.push_back(VE.getValueID(&V));
GlobalVarSummary *VS = cast<GlobalVarSummary>(Summary);
NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
+ NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
unsigned SizeBeforeRefs = NameVals.size();
for (auto &RI : VS->refs())
@@ -3517,7 +3601,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
// Current version for the summary.
// This is bumped whenever we introduce changes in the way some record are
// interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 4;
+static const uint64_t INDEX_VERSION = 6;
/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
@@ -3534,6 +3618,13 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
+ // Write the index flags.
+ uint64_t Flags = 0;
+ // Bits 1-3 are set only in the combined index, skip them.
+ if (Index->enableSplitLTOUnit())
+ Flags |= 0x8;
+ Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
+
if (Index->begin() == Index->end()) {
Stream.ExitBlock();
return;
@@ -3552,6 +3643,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
// numrefs x valueid, n x (valueid, hotness)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3568,6 +3660,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
// numrefs x valueid, n x (valueid [, rel_block_freq])
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3646,6 +3739,12 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Flags |= 0x1;
if (Index.skipModuleByDistributedBackend())
Flags |= 0x2;
+ if (Index.hasSyntheticEntryCounts())
+ Flags |= 0x4;
+ if (Index.enableSplitLTOUnit())
+ Flags |= 0x8;
+ if (Index.partiallySplitLTOUnits())
+ Flags |= 0x10;
Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
for (const auto &GVI : valueIds()) {
@@ -3661,7 +3760,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
// numrefs x valueid, n x (valueid)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3676,6 +3777,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
// numrefs x valueid, n x (valueid, hotness)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3748,6 +3850,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.push_back(*ValueId);
NameVals.push_back(Index.getModuleId(VS->modulePath()));
NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
+ NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
for (auto &RI : VS->refs()) {
auto RefValueId = getValueId(RI.getGUID());
if (!RefValueId)
@@ -3764,25 +3867,32 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
}
auto *FS = cast<FunctionSummary>(S);
- writeFunctionTypeMetadataRecords(Stream, FS, ReferencedTypeIds);
+ writeFunctionTypeMetadataRecords(Stream, FS);
+ getReferencedTypeIds(FS, ReferencedTypeIds);
NameVals.push_back(*ValueId);
NameVals.push_back(Index.getModuleId(FS->modulePath()));
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
NameVals.push_back(getEncodedFFlags(FS->fflags()));
+ NameVals.push_back(FS->entryCount());
+
// Fill in below
- NameVals.push_back(0);
+ NameVals.push_back(0); // numrefs
+ NameVals.push_back(0); // immutablerefcnt
- unsigned Count = 0;
+ unsigned Count = 0, ImmutableRefCnt = 0;
for (auto &RI : FS->refs()) {
auto RefValueId = getValueId(RI.getGUID());
if (!RefValueId)
continue;
NameVals.push_back(*RefValueId);
+ if (RI.isReadOnly())
+ ImmutableRefCnt++;
Count++;
}
- NameVals[5] = Count;
+ NameVals[6] = Count;
+ NameVals[7] = ImmutableRefCnt;
bool HasProfileData = false;
for (auto &EI : FS->calls()) {
@@ -3851,6 +3961,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.EmitRecord(bitc::FS_COMBINED_ALIAS, NameVals, FSAliasAbbrev);
NameVals.clear();
MaybeEmitOriginalName(*AS);
+
+ if (auto *FS = dyn_cast<FunctionSummary>(&AS->getAliasee()))
+ getReferencedTypeIds(FS, ReferencedTypeIds);
}
if (!Index.cfiFunctionDefs().empty()) {
@@ -3871,12 +3984,13 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.clear();
}
- if (!Index.typeIds().empty()) {
- for (auto &S : Index.typeIds()) {
- // Skip if not referenced in any GV summary within this index file.
- if (!ReferencedTypeIds.count(GlobalValue::getGUID(S.first)))
- continue;
- writeTypeIdSummaryRecord(NameVals, StrtabBuilder, S.first, S.second);
+ // Walk the GUIDs that were referenced, and write the
+ // corresponding type id records.
+ for (auto &T : ReferencedTypeIds) {
+ auto TidIter = Index.typeIds().equal_range(T);
+ for (auto It = TidIter.first; It != TidIter.second; ++It) {
+ writeTypeIdSummaryRecord(NameVals, StrtabBuilder, It->second.first,
+ It->second.second);
Stream.EmitRecord(bitc::FS_TYPE_ID, NameVals);
NameVals.clear();
}
@@ -3926,7 +4040,7 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
if (ModHash)
// Save the written hash value.
- std::copy(std::begin(Vals), std::end(Vals), std::begin(*ModHash));
+ llvm::copy(Vals, std::begin(*ModHash));
}
}
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index d473741e8ceb..deb04f1bb36c 100644
--- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -184,7 +184,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
return;
bool IsGlobalValue = OM.isGlobalValue(ID);
- llvm::sort(List.begin(), List.end(), [&](const Entry &L, const Entry &R) {
+ llvm::sort(List, [&](const Entry &L, const Entry &R) {
const Use *LU = L.first;
const Use *RU = R.first;
if (LU == RU)
@@ -745,7 +745,7 @@ void ValueEnumerator::organizeMetadata() {
// and then sort by the original/current ID. Since the IDs are guaranteed to
// be unique, the result of std::sort will be deterministic. There's no need
// for std::stable_sort.
- llvm::sort(Order.begin(), Order.end(), [this](MDIndex LHS, MDIndex RHS) {
+ llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) {
return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) <
std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID);
});
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 79f11def38f7..797f05ee5cf3 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -471,7 +471,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
- const TerminatorInst *Term = ExitBB->getTerminator();
+ const Instruction *Term = ExitBB->getTerminator();
const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
// The block must end in a return statement or unreachable.
@@ -496,6 +496,10 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
// Debug info intrinsics do not get in the way of tail call optimization.
if (isa<DbgInfoIntrinsic>(BBI))
continue;
+ // A lifetime end intrinsic should not stop tail call optimization.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
!isSafeToSpeculativelyExecute(&*BBI))
return false;
@@ -519,10 +523,12 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
AttributeList::ReturnIndex);
- // Noalias is completely benign as far as calling convention goes, it
- // shouldn't affect whether the call is a tail call.
+ // NoAlias and NonNull are completely benign as far as calling convention
+ // goes, they shouldn't affect whether the call is a tail call.
CallerAttrs.removeAttribute(Attribute::NoAlias);
CalleeAttrs.removeAttribute(Attribute::NoAlias);
+ CallerAttrs.removeAttribute(Attribute::NonNull);
+ CalleeAttrs.removeAttribute(Attribute::NonNull);
if (CallerAttrs.contains(Attribute::ZExt)) {
if (!CalleeAttrs.contains(Attribute::ZExt))
@@ -540,6 +546,21 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
CalleeAttrs.removeAttribute(Attribute::SExt);
}
+ // Drop sext and zext return attributes if the result is not used.
+ // This enables tail calls for code like:
+ //
+ // define void @caller() {
+ // entry:
+ // %unused_result = tail call zeroext i1 @callee()
+ // br label %retlabel
+ // retlabel:
+ // ret void
+ // }
+ if (I->use_empty()) {
+ CalleeAttrs.removeAttribute(Attribute::SExt);
+ CalleeAttrs.removeAttribute(Attribute::ZExt);
+ }
+
// If they're still different, there's some facet we don't understand
// (currently only "inreg", but in future who knows). It may be OK but the
// only safe option is to reject the tail call.
@@ -650,7 +671,7 @@ static void collectEHScopeMembers(
// Returns are boundaries where scope transfer can occur, don't follow
// successors.
- if (Visiting->isReturnBlock())
+ if (Visiting->isEHScopeReturnBlock())
continue;
for (const MachineBasicBlock *Succ : Visiting->successors())
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 20b0b8d3feab..95875ccb8a0b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -553,19 +554,31 @@ void llvm::emitDWARF5AccelTable(
AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents,
const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) {
std::vector<MCSymbol *> CompUnits;
+ SmallVector<unsigned, 1> CUIndex(CUs.size());
+ int Count = 0;
for (const auto &CU : enumerate(CUs)) {
+ if (CU.value()->getCUNode()->getNameTableKind() ==
+ DICompileUnit::DebugNameTableKind::None)
+ continue;
+ CUIndex[CU.index()] = Count++;
assert(CU.index() == CU.value()->getUniqueID());
const DwarfCompileUnit *MainCU =
DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get();
CompUnits.push_back(MainCU->getLabelBegin());
}
+ if (CompUnits.empty())
+ return;
+
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfDebugNamesSection());
+
Contents.finalize(Asm, "names");
Dwarf5AccelTableWriter<DWARF5AccelTableData>(
Asm, Contents, CompUnits,
- [&DD](const DWARF5AccelTableData &Entry) {
+ [&](const DWARF5AccelTableData &Entry) {
const DIE *CUDie = Entry.getDie().getUnitDie();
- return DD.lookupCU(CUDie)->getUniqueID();
+ return CUIndex[DD.lookupCU(CUDie)->getUniqueID()];
})
.emit();
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index c8305ad9c547..042243b79259 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -27,29 +27,35 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
- Asm.OutStreamer->SwitchSection(Section);
-
uint64_t Length = sizeof(uint16_t) // version
+ sizeof(uint8_t) // address_size
+ sizeof(uint8_t) // segment_selector_size
+ AddrSize * Pool.size(); // entries
+ Asm.OutStreamer->AddComment("Length of contribution");
Asm.emitInt32(Length); // TODO: Support DWARF64 format.
+ Asm.OutStreamer->AddComment("DWARF version number");
Asm.emitInt16(Asm.getDwarfVersion());
+ Asm.OutStreamer->AddComment("Address size");
Asm.emitInt8(AddrSize);
+ Asm.OutStreamer->AddComment("Segment selector size");
Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
}
// Emit addresses into the section given.
void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
- if (Asm.getDwarfVersion() >= 5)
- emitHeader(Asm, AddrSection);
-
- if (Pool.empty())
+ if (isEmpty())
return;
// Start the dwarf addr section.
Asm.OutStreamer->SwitchSection(AddrSection);
+ if (Asm.getDwarfVersion() >= 5)
+ emitHeader(Asm, AddrSection);
+
+ // Define the symbol that marks the start of the contribution.
+ // It is referenced via DW_AT_addr_base.
+ Asm.OutStreamer->EmitLabel(AddressTableBaseSym);
+
// Order the address pool entries by ID
SmallVector<const MCExpr *, 64> Entries(Pool.size());
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index d5008fab5563..2209c7eb50ed 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -51,8 +51,14 @@ public:
void resetUsedFlag() { HasBeenUsed = false; }
+ MCSymbol *getLabel() { return AddressTableBaseSym; }
+ void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
+
private:
void emitHeader(AsmPrinter &Asm, MCSection *Section);
+
+ /// Symbol designates the start of the contribution to the address table.
+ MCSymbol *AddressTableBaseSym = nullptr;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9bbc77b3056b..7070451e3330 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/AsmPrinter.h"
-#include "AsmPrinterHandler.h"
#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
+#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
#include "llvm/ADT/APFloat.h"
@@ -32,8 +32,10 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
@@ -52,6 +54,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -260,7 +263,7 @@ bool AsmPrinter::doInitialization(Module &M) {
// use the directive, where it would need the same conditionalization
// anyway.
const Triple &Target = TM.getTargetTriple();
- OutStreamer->EmitVersionForTarget(Target);
+ OutStreamer->EmitVersionForTarget(Target, M.getSDKVersion());
// Allow the target to emit any magic that it wants at the start of the file.
EmitStartOfAsmFile(M);
@@ -355,7 +358,7 @@ bool AsmPrinter::doInitialization(Module &M) {
}
break;
case ExceptionHandling::Wasm:
- // TODO to prevent warning
+ ES = new WasmException(this);
break;
}
if (ES)
@@ -363,7 +366,7 @@ bool AsmPrinter::doInitialization(Module &M) {
DWARFGroupName, DWARFGroupDescription));
if (mdconst::extract_or_null<ConstantInt>(
- MMI->getModule()->getModuleFlag("cfguard")))
+ MMI->getModule()->getModuleFlag("cfguardtable")))
Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription));
@@ -627,8 +630,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
///
/// \p Value - The value to emit.
/// \p Size - The size of the integer (in bytes) to emit.
-void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
- unsigned Size) const {
+void AsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
OutStreamer->EmitValue(Value, Size);
}
@@ -749,18 +751,30 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
const MachineFrameInfo &MFI = MF->getFrameInfo();
bool Commented = false;
+ auto getSize =
+ [&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) {
+ unsigned Size = 0;
+ for (auto A : Accesses)
+ if (MFI.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+ ->getFrameIndex()))
+ Size += A->getSize();
+ return Size;
+ };
+
// We assume a single instruction only has a spill or reload, not
// both.
const MachineMemOperand *MMO;
+ SmallVector<const MachineMemOperand *, 2> Accesses;
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Reload";
Commented = true;
}
- } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI)) {
- CommentOS << MMO->getSize() << "-byte Folded Reload";
+ } else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
+ if (auto Size = getSize(Accesses)) {
+ CommentOS << Size << "-byte Folded Reload";
Commented = true;
}
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
@@ -769,9 +783,9 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
CommentOS << MMO->getSize() << "-byte Spill";
Commented = true;
}
- } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI)) {
- CommentOS << MMO->getSize() << "-byte Folded Spill";
+ } else if (TII->hasStoreToStackSlot(MI, Accesses)) {
+ if (auto Size = getSize(Accesses)) {
+ CommentOS << Size << "-byte Folded Spill";
Commented = true;
}
}
@@ -1066,6 +1080,10 @@ void AsmPrinter::EmitFunctionBody() {
++NumInstsInFunction;
}
+ // If there is a pre-instruction symbol, emit a label for it here.
+ if (MCSymbol *S = MI.getPreInstrSymbol())
+ OutStreamer->EmitLabel(S);
+
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -1117,6 +1135,10 @@ void AsmPrinter::EmitFunctionBody() {
break;
}
+ // If there is a post-instruction symbol, emit a label for it here.
+ if (MCSymbol *S = MI.getPostInstrSymbol())
+ OutStreamer->EmitLabel(S);
+
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -1394,6 +1416,33 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
+ if (TM.getTargetTriple().isOSBinFormatCOFF()) {
+ MachineModuleInfoCOFF &MMICOFF =
+ MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoCOFF::SymbolListTy Stubs = MMICOFF.GetGVStubList();
+ if (!Stubs.empty()) {
+ const DataLayout &DL = M.getDataLayout();
+
+ for (const auto &Stub : Stubs) {
+ SmallString<256> SectionName = StringRef(".rdata$");
+ SectionName += Stub.first->getName();
+ OutStreamer->SwitchSection(OutContext.getCOFFSection(
+ SectionName,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_LNK_COMDAT,
+ SectionKind::getReadOnly(), Stub.first->getName(),
+ COFF::IMAGE_COMDAT_SELECT_ANY));
+ EmitAlignment(Log2_32(DL.getPointerSize()));
+ OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global);
+ OutStreamer->EmitLabel(Stub.first);
+ OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
+ DL.getPointerSize());
+ }
+ }
+ }
+
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
@@ -1450,6 +1499,9 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit llvm.ident metadata in an '.ident' directive.
EmitModuleIdents(M);
+ // Emit bytes for llvm.commandline metadata.
+ EmitModuleCommandLines(M);
+
// Emit __morestack address if needed for indirect calls.
if (MMI->usesMorestackAddr()) {
unsigned Align = 1;
@@ -1534,7 +1586,8 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit address-significance attributes for all globals.
OutStreamer->EmitAddrsig();
for (const GlobalValue &GV : M.global_values())
- if (!GV.isThreadLocal() && !GV.getName().startswith("llvm.") &&
+ if (!GV.use_empty() && !GV.isThreadLocal() &&
+ !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") &&
!GV.hasAtLeastLocalUnnamedAddr())
OutStreamer->EmitAddrsigSym(getSymbol(&GV));
}
@@ -1958,6 +2011,29 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
}
}
+void AsmPrinter::EmitModuleCommandLines(Module &M) {
+ MCSection *CommandLine = getObjFileLowering().getSectionForCommandLines();
+ if (!CommandLine)
+ return;
+
+ const NamedMDNode *NMD = M.getNamedMetadata("llvm.commandline");
+ if (!NMD || !NMD->getNumOperands())
+ return;
+
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(CommandLine);
+ OutStreamer->EmitZeros(1);
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ assert(N->getNumOperands() == 1 &&
+ "llvm.commandline metadata entry can have only one operand");
+ const MDString *S = cast<MDString>(N->getOperand(0));
+ OutStreamer->EmitBytes(S->getString());
+ OutStreamer->EmitZeros(1);
+ }
+ OutStreamer->PopSection();
+}
+
//===--------------------------------------------------------------------===//
// Emission and print routines
//
@@ -2927,11 +3003,6 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
if (!S.usesMetadata())
return nullptr;
- assert(!S.useStatepoints() && "statepoints do not currently support custom"
- " stackmap formats, please see the documentation for a description of"
- " the default format. If you really need a custom serialized format,"
- " please file a bug");
-
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
gcp_map_type::iterator GCPI = GCMap.find(&S);
if (GCPI != GCMap.end())
@@ -2952,6 +3023,27 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
}
+void AsmPrinter::emitStackMaps(StackMaps &SM) {
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ bool NeedsDefault = false;
+ if (MI->begin() == MI->end())
+ // No GC strategy, use the default format.
+ NeedsDefault = true;
+ else
+ for (auto &I : *MI) {
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ if (MP->emitStackMaps(SM, *this))
+ continue;
+ // The strategy doesn't have printer or doesn't emit custom stack maps.
+ // Use the default format.
+ NeedsDefault = true;
+ }
+
+ if (NeedsDefault)
+ SM.serializeToStackMapSection();
+}
+
/// Pin vtable to this file.
AsmPrinterHandler::~AsmPrinterHandler() = default;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 605588470670..afce3ad3133b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -212,6 +212,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpWindowSave:
OutStreamer->EmitCFIWindowSave();
break;
+ case MCCFIInstruction::OpNegateRAState:
+ OutStreamer->EmitCFINegateRAState();
+ break;
case MCCFIInstruction::OpSameValue:
OutStreamer->EmitCFISameValue(Inst.getRegister());
break;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4159eb19423a..62103e3107c0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -71,6 +71,42 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
}
+unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
+ const MDNode *LocMDNode) const {
+ if (!DiagInfo) {
+ DiagInfo = make_unique<SrcMgrDiagInfo>();
+
+ MCContext &Context = MMI->getContext();
+ Context.setInlineSourceManager(&DiagInfo->SrcMgr);
+
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
+ DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
+ }
+ }
+
+ SourceMgr &SrcMgr = DiagInfo->SrcMgr;
+
+ std::unique_ptr<MemoryBuffer> Buffer;
+ // The inline asm source manager will outlive AsmStr, so make a copy of the
+ // string for SourceMgr to own.
+ Buffer = MemoryBuffer::getMemBufferCopy(AsmStr, "<inline asm>");
+
+ // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+ unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+
+ // Store LocMDNode in DiagInfo, using BufNum as an identifier.
+ if (LocMDNode) {
+ DiagInfo->LocInfos.resize(BufNum);
+ DiagInfo->LocInfos[BufNum - 1] = LocMDNode;
+ }
+
+ return BufNum;
+}
+
+
/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
const MCTargetOptions &MCOptions,
@@ -98,39 +134,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
return;
}
- if (!DiagInfo) {
- DiagInfo = make_unique<SrcMgrDiagInfo>();
+ unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
+ DiagInfo->SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
- MCContext &Context = MMI->getContext();
- Context.setInlineSourceManager(&DiagInfo->SrcMgr);
-
- LLVMContext &LLVMCtx = MMI->getModule()->getContext();
- if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
- DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
- DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
- DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
- }
- }
-
- SourceMgr &SrcMgr = DiagInfo->SrcMgr;
- SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
-
- std::unique_ptr<MemoryBuffer> Buffer;
- // The inline asm source manager will outlive Str, so make a copy of the
- // string for SourceMgr to own.
- Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
-
- // Tell SrcMgr about this buffer, it takes ownership of the buffer.
- unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
-
- // Store LocMDNode in DiagInfo, using BufNum as an identifier.
- if (LocMDNode) {
- DiagInfo->LocInfos.resize(BufNum);
- DiagInfo->LocInfos[BufNum-1] = LocMDNode;
- }
-
- std::unique_ptr<MCAsmParser> Parser(
- createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
+ std::unique_ptr<MCAsmParser> Parser(createMCAsmParser(
+ DiagInfo->SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
// Do not use assembler-level information for parsing inline assembly.
OutStreamer->setUseAssemblerInfoForParsing(false);
@@ -148,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
+ // Enable lexing Masm binary and hex integer literals in intel inline
+ // assembly.
if (Dialect == InlineAsm::AD_Intel)
- // We need this flag to be able to parse numbers like "0bH"
- Parser->setParsingInlineAsm(true);
+ Parser->getLexer().setLexMasmIntegers(true);
if (MF) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
@@ -519,6 +528,44 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
MCOptions.SanitizeAddress =
MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress);
+ // Emit warnings if we use reserved registers on the clobber list, as
+ // that might give surprising results.
+ std::vector<std::string> RestrRegs;
+ // Start with the first operand descriptor, and iterate over them.
+ for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands();
+ I < NumOps; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (MO.isImm()) {
+ unsigned Flags = MO.getImm();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber &&
+ !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) {
+ RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg()));
+ }
+ // Skip to one before the next operand descriptor, if it exists.
+ I += InlineAsm::getNumOperandRegisters(Flags);
+ }
+ }
+
+ if (!RestrRegs.empty()) {
+ unsigned BufNum = addInlineAsmDiagBuffer(OS.str(), LocMD);
+ auto &SrcMgr = DiagInfo->SrcMgr;
+ SMLoc Loc = SMLoc::getFromPointer(
+ SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin());
+
+ std::string Msg = "inline asm clobber list contains reserved registers: ";
+ for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) {
+ if(I != RestrRegs.begin())
+ Msg += ", ";
+ Msg += *I;
+ }
+ std::string Note = "Reserved registers on the clobber list may not be "
+ "preserved across the asm statement, and clobbering them may "
+ "lead to undefined behaviour.";
+ SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg);
+ SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
+ }
+
EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
MI->getInlineAsmDialect());
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8c5c5478d01a..8cabad4ad312 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -31,6 +31,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -43,6 +44,7 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
@@ -72,6 +74,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -90,8 +93,20 @@
using namespace llvm;
using namespace llvm::codeview;
-static cl::opt<bool> EmitDebugGlobalHashes("emit-codeview-ghash-section",
- cl::ReallyHidden, cl::init(false));
+static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
+ switch (Type) {
+ case Triple::ArchType::x86:
+ return CPUType::Pentium3;
+ case Triple::ArchType::x86_64:
+ return CPUType::X64;
+ case Triple::ArchType::thumb:
+ return CPUType::Thumb;
+ case Triple::ArchType::aarch64:
+ return CPUType::ARM64;
+ default:
+ report_fatal_error("target architecture doesn't map to a CodeView CPUType");
+ }
+}
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
: DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
@@ -100,11 +115,21 @@ CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
!AP->getObjFileLowering().getCOFFDebugSymbolsSection()) {
Asm = nullptr;
+ MMI->setDebugInfoAvailability(false);
return;
}
-
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
+
+ TheCPU =
+ mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
+
+ collectGlobalVariableInfo();
+
+ // Check if we should emit type record hashes.
+ ConstantInt *GH = mdconst::extract_or_null<ConstantInt>(
+ MMI->getModule()->getModuleFlag("CodeViewGHash"));
+ EmitDebugGlobalHashes = GH && !GH->isZero();
}
StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
@@ -116,7 +141,9 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
// If this is a Unix-style path, just use it as is. Don't try to canonicalize
// it textually because one of the path components could be a symlink.
- if (!Dir.empty() && Dir[0] == '/') {
+ if (Dir.startswith("/") || Filename.startswith("/")) {
+ if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix))
+ return Filename;
Filepath = Dir;
if (Dir.back() != '/')
Filepath += '/';
@@ -337,6 +364,36 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
return recordTypeIndexForDINode(SP, TI);
}
+static bool isTrivial(const DICompositeType *DCTy) {
+ return ((DCTy->getFlags() & DINode::FlagTrivial) == DINode::FlagTrivial);
+}
+
+static FunctionOptions
+getFunctionOptions(const DISubroutineType *Ty,
+ const DICompositeType *ClassTy = nullptr,
+ StringRef SPName = StringRef("")) {
+ FunctionOptions FO = FunctionOptions::None;
+ const DIType *ReturnTy = nullptr;
+ if (auto TypeArray = Ty->getTypeArray()) {
+ if (TypeArray.size())
+ ReturnTy = TypeArray[0].resolve();
+ }
+
+ if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) {
+ if (!isTrivial(ReturnDCTy))
+ FO |= FunctionOptions::CxxReturnUdt;
+ }
+
+ // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison.
+ if (ClassTy && !isTrivial(ClassTy) && SPName == ClassTy->getName()) {
+ FO |= FunctionOptions::Constructor;
+
+ // TODO: put the FunctionOptions::ConstructorWithVirtualBases flag.
+
+ }
+ return FO;
+}
+
TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
const DICompositeType *Class) {
// Always use the method declaration as the key for the function type. The
@@ -356,8 +413,10 @@ TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
// member function type.
TypeLoweringScope S(*this);
const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0;
+
+ FunctionOptions FO = getFunctionOptions(SP->getType(), Class, SP->getName());
TypeIndex TI = lowerTypeMemberFunction(
- SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod);
+ SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod, FO);
return recordTypeIndexForDINode(SP, TI, Class);
}
@@ -508,6 +567,11 @@ void CodeViewDebug::endModule() {
OS.AddComment("String table");
OS.EmitCVStringTableDirective();
+ // Emit S_BUILDINFO, which points to LF_BUILDINFO. Put this in its own symbol
+ // subsection in the generic .debug$S section at the end. There is no
+ // particular reason for this ordering other than to match MSVC.
+ emitBuildInfo();
+
// Emit type information and hashes last, so that any types we translate while
// emitting function info are included.
emitTypeInformation();
@@ -669,30 +733,8 @@ static Version parseVersion(StringRef Name) {
return V;
}
-static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
- switch (Type) {
- case Triple::ArchType::x86:
- return CPUType::Pentium3;
- case Triple::ArchType::x86_64:
- return CPUType::X64;
- case Triple::ArchType::thumb:
- return CPUType::Thumb;
- case Triple::ArchType::aarch64:
- return CPUType::ARM64;
- default:
- report_fatal_error("target architecture doesn't map to a CodeView CPUType");
- }
-}
-
void CodeViewDebug::emitCompilerInformation() {
- MCContext &Context = MMI->getContext();
- MCSymbol *CompilerBegin = Context.createTempSymbol(),
- *CompilerEnd = Context.createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(CompilerEnd, CompilerBegin, 2);
- OS.EmitLabel(CompilerBegin);
- OS.AddComment("Record kind: S_COMPILE3");
- OS.EmitIntValue(SymbolKind::S_COMPILE3, 2);
+ MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3);
uint32_t Flags = 0;
NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
@@ -707,9 +749,7 @@ void CodeViewDebug::emitCompilerInformation() {
OS.EmitIntValue(Flags, 4);
OS.AddComment("CPUType");
- CPUType CPU =
- mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
- OS.EmitIntValue(static_cast<uint64_t>(CPU), 2);
+ OS.EmitIntValue(static_cast<uint64_t>(TheCPU), 2);
StringRef CompilerVersion = CU->getProducer();
Version FrontVer = parseVersion(CompilerVersion);
@@ -733,7 +773,48 @@ void CodeViewDebug::emitCompilerInformation() {
OS.AddComment("Null-terminated compiler version string");
emitNullTerminatedSymbolName(OS, CompilerVersion);
- OS.EmitLabel(CompilerEnd);
+ endSymbolRecord(CompilerEnd);
+}
+
+static TypeIndex getStringIdTypeIdx(GlobalTypeTableBuilder &TypeTable,
+ StringRef S) {
+ StringIdRecord SIR(TypeIndex(0x0), S);
+ return TypeTable.writeLeafType(SIR);
+}
+
+void CodeViewDebug::emitBuildInfo() {
+ // First, make LF_BUILDINFO. It's a sequence of strings with various bits of
+ // build info. The known prefix is:
+ // - Absolute path of current directory
+ // - Compiler path
+ // - Main source file path, relative to CWD or absolute
+ // - Type server PDB file
+ // - Canonical compiler command line
+ // If frontend and backend compilation are separated (think llc or LTO), it's
+ // not clear if the compiler path should refer to the executable for the
+ // frontend or the backend. Leave it blank for now.
+ TypeIndex BuildInfoArgs[BuildInfoRecord::MaxArgs] = {};
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin(); // FIXME: Multiple CUs.
+ const auto *CU = cast<DICompileUnit>(Node);
+ const DIFile *MainSourceFile = CU->getFile();
+ BuildInfoArgs[BuildInfoRecord::CurrentDirectory] =
+ getStringIdTypeIdx(TypeTable, MainSourceFile->getDirectory());
+ BuildInfoArgs[BuildInfoRecord::SourceFile] =
+ getStringIdTypeIdx(TypeTable, MainSourceFile->getFilename());
+ // FIXME: Path to compiler and command line. PDB is intentionally blank unless
+ // we implement /Zi type servers.
+ BuildInfoRecord BIR(BuildInfoArgs);
+ TypeIndex BuildInfoIndex = TypeTable.writeLeafType(BIR);
+
+ // Make a new .debug$S subsection for the S_BUILDINFO record, which points
+ // from the module symbols into the type stream.
+ MCSymbol *BISubsecEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
+ MCSymbol *BIEnd = beginSymbolRecord(SymbolKind::S_BUILDINFO);
+ OS.AddComment("LF_BUILDINFO index");
+ OS.EmitIntValue(BuildInfoIndex.getIndex(), 4);
+ endSymbolRecord(BIEnd);
+ endCVSubsection(BISubsecEnd);
}
void CodeViewDebug::emitInlineeLinesSubsection() {
@@ -773,18 +854,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
const DILocation *InlinedAt,
const InlineSite &Site) {
- MCSymbol *InlineBegin = MMI->getContext().createTempSymbol(),
- *InlineEnd = MMI->getContext().createTempSymbol();
-
assert(TypeIndices.count({Site.Inlinee, nullptr}));
TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}];
// SymbolRecord
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 2); // RecordLength
- OS.EmitLabel(InlineBegin);
- OS.AddComment("Record kind: S_INLINESITE");
- OS.EmitIntValue(SymbolKind::S_INLINESITE, 2); // RecordKind
+ MCSymbol *InlineEnd = beginSymbolRecord(SymbolKind::S_INLINESITE);
OS.AddComment("PtrParent");
OS.EmitIntValue(0, 4);
@@ -799,9 +873,9 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
FI.Begin, FI.End);
- OS.EmitLabel(InlineEnd);
+ endSymbolRecord(InlineEnd);
- emitLocalVariableList(Site.InlinedLocals);
+ emitLocalVariableList(FI, Site.InlinedLocals);
// Recurse on child inlined call sites before closing the scope.
for (const DILocation *ChildSite : Site.ChildSites) {
@@ -812,10 +886,7 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
}
// Close the scope.
- OS.AddComment("Record length");
- OS.EmitIntValue(2, 2); // RecordLength
- OS.AddComment("Record kind: S_INLINESITE_END");
- OS.EmitIntValue(SymbolKind::S_INLINESITE_END, 2); // RecordKind
+ emitEndSymbolRecord(SymbolKind::S_INLINESITE_END);
}
void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
@@ -850,13 +921,7 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
// Emit S_THUNK32
- MCSymbol *ThunkRecordBegin = MMI->getContext().createTempSymbol(),
- *ThunkRecordEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(ThunkRecordEnd, ThunkRecordBegin, 2);
- OS.EmitLabel(ThunkRecordBegin);
- OS.AddComment("Record kind: S_THUNK32");
- OS.EmitIntValue(unsigned(SymbolKind::S_THUNK32), 2);
+ MCSymbol *ThunkRecordEnd = beginSymbolRecord(SymbolKind::S_THUNK32);
OS.AddComment("PtrParent");
OS.EmitIntValue(0, 4);
OS.AddComment("PtrEnd");
@@ -874,17 +939,13 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
OS.AddComment("Function name");
emitNullTerminatedSymbolName(OS, FuncName);
// Additional fields specific to the thunk ordinal would go here.
- OS.EmitLabel(ThunkRecordEnd);
+ endSymbolRecord(ThunkRecordEnd);
// Local variables/inlined routines are purposely omitted here. The point of
// marking this as a thunk is so Visual Studio will NOT stop in this routine.
// Emit S_PROC_ID_END
- const unsigned RecordLengthForSymbolEnd = 2;
- OS.AddComment("Record length");
- OS.EmitIntValue(RecordLengthForSymbolEnd, 2);
- OS.AddComment("Record kind: S_PROC_ID_END");
- OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+ emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
endCVSubsection(SymbolsEnd);
}
@@ -927,19 +988,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Symbol subsection for " + Twine(FuncName));
MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
{
- MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(),
- *ProcRecordEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2);
- OS.EmitLabel(ProcRecordBegin);
-
- if (GV->hasLocalLinkage()) {
- OS.AddComment("Record kind: S_LPROC32_ID");
- OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
- } else {
- OS.AddComment("Record kind: S_GPROC32_ID");
- OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
- }
+ SymbolKind ProcKind = GV->hasLocalLinkage() ? SymbolKind::S_LPROC32_ID
+ : SymbolKind::S_GPROC32_ID;
+ MCSymbol *ProcRecordEnd = beginSymbolRecord(ProcKind);
// These fields are filled in by tools like CVPACK which run after the fact.
OS.AddComment("PtrParent");
@@ -968,9 +1019,28 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Function name");
// Truncate the name so we won't overflow the record length field.
emitNullTerminatedSymbolName(OS, FuncName);
- OS.EmitLabel(ProcRecordEnd);
+ endSymbolRecord(ProcRecordEnd);
- emitLocalVariableList(FI.Locals);
+ MCSymbol *FrameProcEnd = beginSymbolRecord(SymbolKind::S_FRAMEPROC);
+ // Subtract out the CSR size since MSVC excludes that and we include it.
+ OS.AddComment("FrameSize");
+ OS.EmitIntValue(FI.FrameSize - FI.CSRSize, 4);
+ OS.AddComment("Padding");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Offset of padding");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Bytes of callee saved registers");
+ OS.EmitIntValue(FI.CSRSize, 4);
+ OS.AddComment("Exception handler offset");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Exception handler section");
+ OS.EmitIntValue(0, 2);
+ OS.AddComment("Flags (defines frame register)");
+ OS.EmitIntValue(uint32_t(FI.FrameProcOpts), 4);
+ endSymbolRecord(FrameProcEnd);
+
+ emitLocalVariableList(FI, FI.Locals);
+ emitGlobalVariableList(FI.Globals);
emitLexicalBlockList(FI.ChildBlocks, FI);
// Emit inlined call site information. Only emit functions inlined directly
@@ -986,13 +1056,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
for (auto Annot : FI.Annotations) {
MCSymbol *Label = Annot.first;
MDTuple *Strs = cast<MDTuple>(Annot.second);
- MCSymbol *AnnotBegin = MMI->getContext().createTempSymbol(),
- *AnnotEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(AnnotEnd, AnnotBegin, 2);
- OS.EmitLabel(AnnotBegin);
- OS.AddComment("Record kind: S_ANNOTATION");
- OS.EmitIntValue(SymbolKind::S_ANNOTATION, 2);
+ MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION);
OS.EmitCOFFSecRel32(Label, /*Offset=*/0);
// FIXME: Make sure we don't overflow the max record size.
OS.EmitCOFFSectionIndex(Label);
@@ -1004,17 +1068,14 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString");
OS.EmitBytes(StringRef(Str.data(), Str.size() + 1));
}
- OS.EmitLabel(AnnotEnd);
+ endSymbolRecord(AnnotEnd);
}
if (SP != nullptr)
emitDebugInfoForUDTs(LocalUDTs);
// We're done with this function.
- OS.AddComment("Record length");
- OS.EmitIntValue(0x0002, 2);
- OS.AddComment("Record kind: S_PROC_ID_END");
- OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+ emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
}
endCVSubsection(SymbolsEnd);
@@ -1034,21 +1095,8 @@ CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
return DR;
}
-CodeViewDebug::LocalVarDefRange
-CodeViewDebug::createDefRangeGeneral(uint16_t CVRegister, bool InMemory,
- int Offset, bool IsSubfield,
- uint16_t StructOffset) {
- LocalVarDefRange DR;
- DR.InMemory = InMemory;
- DR.DataOffset = Offset;
- DR.IsSubfield = IsSubfield;
- DR.StructOffset = StructOffset;
- DR.CVRegister = CVRegister;
- return DR;
-}
-
void CodeViewDebug::collectVariableInfoFromMFTable(
- DenseSet<InlinedVariable> &Processed) {
+ DenseSet<InlinedEntity> &Processed) {
const MachineFunction &MF = *Asm->MF;
const TargetSubtargetInfo &TSI = MF.getSubtarget();
const TargetFrameLowering *TFI = TSI.getFrameLowering();
@@ -1060,7 +1108,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
"Expected inlined-at fields to agree");
- Processed.insert(InlinedVariable(VI.Var, VI.Loc->getInlinedAt()));
+ Processed.insert(InlinedEntity(VI.Var, VI.Loc->getInlinedAt()));
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
// If variable scope is not found then skip this variable.
@@ -1196,15 +1244,15 @@ void CodeViewDebug::calculateRanges(
}
void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
- DenseSet<InlinedVariable> Processed;
+ DenseSet<InlinedEntity> Processed;
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(Processed);
for (const auto &I : DbgValues) {
- InlinedVariable IV = I.first;
+ InlinedEntity IV = I.first;
if (Processed.count(IV))
continue;
- const DILocalVariable *DIVar = IV.first;
+ const DILocalVariable *DIVar = cast<DILocalVariable>(IV.first);
const DILocation *InlinedAt = IV.second;
// Instruction ranges, specifying where IV is accessible.
@@ -1228,6 +1276,9 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
}
void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
+ const TargetSubtargetInfo &TSI = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
const Function &GV = MF->getFunction();
auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()});
assert(Insertion.second && "function already has info");
@@ -1235,6 +1286,66 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn->FuncId = NextFuncId++;
CurFn->Begin = Asm->getFunctionBegin();
+ // The S_FRAMEPROC record reports the stack size, and how many bytes of
+ // callee-saved registers were used. For targets that don't use a PUSH
+ // instruction (AArch64), this will be zero.
+ CurFn->CSRSize = MFI.getCVBytesOfCalleeSavedRegisters();
+ CurFn->FrameSize = MFI.getStackSize();
+ CurFn->OffsetAdjustment = MFI.getOffsetAdjustment();
+ CurFn->HasStackRealignment = TRI->needsStackRealignment(*MF);
+
+ // For this function S_FRAMEPROC record, figure out which codeview register
+ // will be the frame pointer.
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::None; // None.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::None; // None.
+ if (CurFn->FrameSize > 0) {
+ if (!TSI.getFrameLowering()->hasFP(*MF)) {
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::StackPtr;
+ } else {
+ // If there is an FP, parameters are always relative to it.
+ CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::FramePtr;
+ if (CurFn->HasStackRealignment) {
+ // If the stack needs realignment, locals are relative to SP or VFRAME.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
+ } else {
+ // Otherwise, locals are relative to EBP, and we probably have VLAs or
+ // other stack adjustments.
+ CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::FramePtr;
+ }
+ }
+ }
+
+ // Compute other frame procedure options.
+ FrameProcedureOptions FPO = FrameProcedureOptions::None;
+ if (MFI.hasVarSizedObjects())
+ FPO |= FrameProcedureOptions::HasAlloca;
+ if (MF->exposesReturnsTwice())
+ FPO |= FrameProcedureOptions::HasSetJmp;
+ // FIXME: Set HasLongJmp if we ever track that info.
+ if (MF->hasInlineAsm())
+ FPO |= FrameProcedureOptions::HasInlineAssembly;
+ if (GV.hasPersonalityFn()) {
+ if (isAsynchronousEHPersonality(
+ classifyEHPersonality(GV.getPersonalityFn())))
+ FPO |= FrameProcedureOptions::HasStructuredExceptionHandling;
+ else
+ FPO |= FrameProcedureOptions::HasExceptionHandling;
+ }
+ if (GV.hasFnAttribute(Attribute::InlineHint))
+ FPO |= FrameProcedureOptions::MarkedInline;
+ if (GV.hasFnAttribute(Attribute::Naked))
+ FPO |= FrameProcedureOptions::Naked;
+ if (MFI.hasStackProtectorIndex())
+ FPO |= FrameProcedureOptions::SecurityChecks;
+ FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
+ FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
+ if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.optForSize() &&
+ !GV.hasFnAttribute(Attribute::OptimizeNone))
+ FPO |= FrameProcedureOptions::OptimizedForSpeed;
+ // FIXME: Set GuardCfg when it is implemented.
+ CurFn->FrameProcOpts = FPO;
+
OS.EmitCVFuncIdDirective(CurFn->FuncId);
// Find the end of the function prolog. First known non-DBG_VALUE and
@@ -1358,6 +1469,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
case dwarf::DW_TAG_union_type:
return lowerTypeUnion(cast<DICompositeType>(Ty));
case dwarf::DW_TAG_unspecified_type:
+ if (Ty->getName() == "decltype(nullptr)")
+ return TypeIndex::NullptrT();
return TypeIndex::None();
default:
// Use the null type index.
@@ -1552,6 +1665,9 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty,
break;
}
+ if (Ty->isObjectPointer())
+ PO |= PointerOptions::Const;
+
PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
return TypeTable.writeLeafType(PR);
}
@@ -1702,49 +1818,54 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
- ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
- ArgTypeIndices.size(), ArgListIndex);
+ FunctionOptions FO = getFunctionOptions(Ty);
+ ProcedureRecord Procedure(ReturnTypeIndex, CC, FO, ArgTypeIndices.size(),
+ ArgListIndex);
return TypeTable.writeLeafType(Procedure);
}
TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
const DIType *ClassTy,
int ThisAdjustment,
- bool IsStaticMethod) {
+ bool IsStaticMethod,
+ FunctionOptions FO) {
// Lower the containing class type.
TypeIndex ClassType = getTypeIndex(ClassTy);
- SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
- for (DITypeRef ArgTypeRef : Ty->getTypeArray())
- ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+ DITypeRefArray ReturnAndArgs = Ty->getTypeArray();
- // MSVC uses type none for variadic argument.
- if (ReturnAndArgTypeIndices.size() > 1 &&
- ReturnAndArgTypeIndices.back() == TypeIndex::Void()) {
- ReturnAndArgTypeIndices.back() = TypeIndex::None();
- }
- TypeIndex ReturnTypeIndex = TypeIndex::Void();
- ArrayRef<TypeIndex> ArgTypeIndices = None;
- if (!ReturnAndArgTypeIndices.empty()) {
- auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices);
- ReturnTypeIndex = ReturnAndArgTypesRef.front();
- ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
- }
+ unsigned Index = 0;
+ SmallVector<TypeIndex, 8> ArgTypeIndices;
+ TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+
+ // If the first argument is a pointer type and this isn't a static method,
+ // treat it as the special 'this' parameter, which is encoded separately from
+ // the arguments.
TypeIndex ThisTypeIndex;
- if (!IsStaticMethod && !ArgTypeIndices.empty()) {
- ThisTypeIndex = ArgTypeIndices.front();
- ArgTypeIndices = ArgTypeIndices.drop_front();
+ if (!IsStaticMethod && ReturnAndArgs.size() > Index) {
+ if (const DIDerivedType *PtrTy =
+ dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index].resolve())) {
+ if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) {
+ ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty);
+ Index++;
+ }
+ }
}
+ while (Index < ReturnAndArgs.size())
+ ArgTypeIndices.push_back(getTypeIndex(ReturnAndArgs[Index++]));
+
+ // MSVC uses type none for variadic argument.
+ if (!ArgTypeIndices.empty() && ArgTypeIndices.back() == TypeIndex::Void())
+ ArgTypeIndices.back() = TypeIndex::None();
+
ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
- // TODO: Need to use the correct values for FunctionOptions.
- MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC,
- FunctionOptions::None, ArgTypeIndices.size(),
- ArgListIndex, ThisAdjustment);
+ MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FO,
+ ArgTypeIndices.size(), ArgListIndex, ThisAdjustment);
return TypeTable.writeLeafType(MFR);
}
@@ -1825,12 +1946,20 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
CO |= ClassOptions::Nested;
- // Put the Scoped flag on function-local types.
- for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
- Scope = Scope->getScope().resolve()) {
- if (isa<DISubprogram>(Scope)) {
+ // Put the Scoped flag on function-local types. MSVC puts this flag for enum
+ // type only when it has an immediate function scope. Clang never puts enums
+ // inside DILexicalBlock scopes. Enum types, as generated by clang, are
+ // always in function, class, or file scopes.
+ if (Ty->getTag() == dwarf::DW_TAG_enumeration_type) {
+ if (ImmediateScope && isa<DISubprogram>(ImmediateScope))
CO |= ClassOptions::Scoped;
- break;
+ } else {
+ for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
+ Scope = Scope->getScope().resolve()) {
+ if (isa<DISubprogram>(Scope)) {
+ CO |= ClassOptions::Scoped;
+ break;
+ }
}
}
@@ -1930,6 +2059,7 @@ void CodeViewDebug::clear() {
GlobalUDTs.clear();
TypeIndices.clear();
CompleteTypeIndices.clear();
+ ScopeGlobals.clear();
}
void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
@@ -2275,6 +2405,32 @@ TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
return recordTypeIndexForDINode(Ty, TI, ClassTy);
}
+codeview::TypeIndex
+CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
+ const DISubroutineType *SubroutineTy) {
+ assert(PtrTy->getTag() == dwarf::DW_TAG_pointer_type &&
+ "this type must be a pointer type");
+
+ PointerOptions Options = PointerOptions::None;
+ if (SubroutineTy->getFlags() & DINode::DIFlags::FlagLValueReference)
+ Options = PointerOptions::LValueRefThisPointer;
+ else if (SubroutineTy->getFlags() & DINode::DIFlags::FlagRValueReference)
+ Options = PointerOptions::RValueRefThisPointer;
+
+ // Check if we've already translated this type. If there is no ref qualifier
+ // on the function then we look up this pointer type with no associated class
+ // so that the TypeIndex for the this pointer can be shared with the type
+ // index for other pointers to this class type. If there is a ref qualifier
+ // then we lookup the pointer using the subroutine as the parent type.
+ auto I = TypeIndices.find({PtrTy, SubroutineTy});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ TypeLoweringScope S(*this);
+ TypeIndex TI = lowerTypePointer(PtrTy, Options);
+ return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy);
+}
+
TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
DIType *Ty = TypeRef.resolve();
PointerRecord PR(getTypeIndex(Ty),
@@ -2292,6 +2448,14 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
if (!Ty)
return TypeIndex::Void();
+ // Look through typedefs when getting the complete type index. Call
+ // getTypeIndex on the typdef to ensure that any UDTs are accumulated and are
+ // emitted only once.
+ if (Ty->getTag() == dwarf::DW_TAG_typedef)
+ (void)getTypeIndex(Ty);
+ while (Ty->getTag() == dwarf::DW_TAG_typedef)
+ Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+
// If this is a non-record type, the complete type index is the same as the
// normal type index. Just call getTypeIndex.
switch (Ty->getTag()) {
@@ -2360,35 +2524,40 @@ void CodeViewDebug::emitDeferredCompleteTypes() {
}
}
-void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) {
+void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI,
+ ArrayRef<LocalVariable> Locals) {
// Get the sorted list of parameters and emit them first.
SmallVector<const LocalVariable *, 6> Params;
for (const LocalVariable &L : Locals)
if (L.DIVar->isParameter())
Params.push_back(&L);
- llvm::sort(Params.begin(), Params.end(),
- [](const LocalVariable *L, const LocalVariable *R) {
- return L->DIVar->getArg() < R->DIVar->getArg();
- });
+ llvm::sort(Params, [](const LocalVariable *L, const LocalVariable *R) {
+ return L->DIVar->getArg() < R->DIVar->getArg();
+ });
for (const LocalVariable *L : Params)
- emitLocalVariable(*L);
+ emitLocalVariable(FI, *L);
// Next emit all non-parameters in the order that we found them.
for (const LocalVariable &L : Locals)
if (!L.DIVar->isParameter())
- emitLocalVariable(L);
+ emitLocalVariable(FI, L);
}
-void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
- // LocalSym record, see SymbolRecord.h for more info.
- MCSymbol *LocalBegin = MMI->getContext().createTempSymbol(),
- *LocalEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(LocalEnd, LocalBegin, 2);
- OS.EmitLabel(LocalBegin);
+/// Only call this on endian-specific types like ulittle16_t and little32_t, or
+/// structs composed of them.
+template <typename T>
+static void copyBytesForDefRange(SmallString<20> &BytePrefix,
+ SymbolKind SymKind, const T &DefRangeHeader) {
+ BytePrefix.resize(2 + sizeof(T));
+ ulittle16_t SymKindLE = ulittle16_t(SymKind);
+ memcpy(&BytePrefix[0], &SymKindLE, 2);
+ memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T));
+}
- OS.AddComment("Record kind: S_LOCAL");
- OS.EmitIntValue(unsigned(SymbolKind::S_LOCAL), 2);
+void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
+ const LocalVariable &Var) {
+ // LocalSym record, see SymbolRecord.h for more info.
+ MCSymbol *LocalEnd = beginSymbolRecord(SymbolKind::S_LOCAL);
LocalSymFlags Flags = LocalSymFlags::None;
if (Var.DIVar->isParameter())
@@ -2405,7 +2574,7 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
OS.EmitIntValue(static_cast<uint16_t>(Flags), 2);
// Truncate the name so we won't overflow the record length field.
emitNullTerminatedSymbolName(OS, Var.DIVar->getName());
- OS.EmitLabel(LocalEnd);
+ endSymbolRecord(LocalEnd);
// Calculate the on disk prefix of the appropriate def range record. The
// records and on disk formats are described in SymbolRecords.h. BytePrefix
@@ -2414,45 +2583,53 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
for (const LocalVarDefRange &DefRange : Var.DefRanges) {
BytePrefix.clear();
if (DefRange.InMemory) {
- uint16_t RegRelFlags = 0;
- if (DefRange.IsSubfield) {
- RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
- (DefRange.StructOffset
- << DefRangeRegisterRelSym::OffsetInParentShift);
+ int Offset = DefRange.DataOffset;
+ unsigned Reg = DefRange.CVRegister;
+
+ // 32-bit x86 call sequences often use PUSH instructions, which disrupt
+ // ESP-relative offsets. Use the virtual frame pointer, VFRAME or $T0,
+ // instead. In frames without stack realignment, $T0 will be the CFA.
+ if (RegisterId(Reg) == RegisterId::ESP) {
+ Reg = unsigned(RegisterId::VFRAME);
+ Offset += FI.OffsetAdjustment;
+ }
+
+ // If we can use the chosen frame pointer for the frame and this isn't a
+ // sliced aggregate, use the smaller S_DEFRANGE_FRAMEPOINTER_REL record.
+ // Otherwise, use S_DEFRANGE_REGISTER_REL.
+ EncodedFramePtrReg EncFP = encodeFramePtrReg(RegisterId(Reg), TheCPU);
+ if (!DefRange.IsSubfield && EncFP != EncodedFramePtrReg::None &&
+ (bool(Flags & LocalSymFlags::IsParameter)
+ ? (EncFP == FI.EncodedParamFramePtrReg)
+ : (EncFP == FI.EncodedLocalFramePtrReg))) {
+ little32_t FPOffset = little32_t(Offset);
+ copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset);
+ } else {
+ uint16_t RegRelFlags = 0;
+ if (DefRange.IsSubfield) {
+ RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
+ (DefRange.StructOffset
+ << DefRangeRegisterRelSym::OffsetInParentShift);
+ }
+ DefRangeRegisterRelSym::Header DRHdr;
+ DRHdr.Register = Reg;
+ DRHdr.Flags = RegRelFlags;
+ DRHdr.BasePointerOffset = Offset;
+ copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr);
}
- DefRangeRegisterRelSym Sym(S_DEFRANGE_REGISTER_REL);
- Sym.Hdr.Register = DefRange.CVRegister;
- Sym.Hdr.Flags = RegRelFlags;
- Sym.Hdr.BasePointerOffset = DefRange.DataOffset;
- ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL);
- BytePrefix +=
- StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
- BytePrefix +=
- StringRef(reinterpret_cast<const char *>(&Sym.Hdr), sizeof(Sym.Hdr));
} else {
assert(DefRange.DataOffset == 0 && "unexpected offset into register");
if (DefRange.IsSubfield) {
- // Unclear what matters here.
- DefRangeSubfieldRegisterSym Sym(S_DEFRANGE_SUBFIELD_REGISTER);
- Sym.Hdr.Register = DefRange.CVRegister;
- Sym.Hdr.MayHaveNoName = 0;
- Sym.Hdr.OffsetInParent = DefRange.StructOffset;
-
- ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_SUBFIELD_REGISTER);
- BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
- sizeof(SymKind));
- BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
- sizeof(Sym.Hdr));
+ DefRangeSubfieldRegisterSym::Header DRHdr;
+ DRHdr.Register = DefRange.CVRegister;
+ DRHdr.MayHaveNoName = 0;
+ DRHdr.OffsetInParent = DefRange.StructOffset;
+ copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr);
} else {
- // Unclear what matters here.
- DefRangeRegisterSym Sym(S_DEFRANGE_REGISTER);
- Sym.Hdr.Register = DefRange.CVRegister;
- Sym.Hdr.MayHaveNoName = 0;
- ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
- BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
- sizeof(SymKind));
- BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
- sizeof(Sym.Hdr));
+ DefRangeRegisterSym::Header DRHdr;
+ DRHdr.Register = DefRange.CVRegister;
+ DRHdr.MayHaveNoName = 0;
+ copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr);
}
}
OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
@@ -2469,15 +2646,7 @@ void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
/// lexical block scope.
void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
const FunctionInfo& FI) {
- MCSymbol *RecordBegin = MMI->getContext().createTempSymbol(),
- *RecordEnd = MMI->getContext().createTempSymbol();
-
- // Lexical block symbol record.
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(RecordEnd, RecordBegin, 2); // Record Length
- OS.EmitLabel(RecordBegin);
- OS.AddComment("Record kind: S_BLOCK32");
- OS.EmitIntValue(SymbolKind::S_BLOCK32, 2); // Record Kind
+ MCSymbol *RecordEnd = beginSymbolRecord(SymbolKind::S_BLOCK32);
OS.AddComment("PtrParent");
OS.EmitIntValue(0, 4); // PtrParent
OS.AddComment("PtrEnd");
@@ -2490,19 +2659,17 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol
OS.AddComment("Lexical block name");
emitNullTerminatedSymbolName(OS, Block.Name); // Name
- OS.EmitLabel(RecordEnd);
+ endSymbolRecord(RecordEnd);
// Emit variables local to this lexical block.
- emitLocalVariableList(Block.Locals);
+ emitLocalVariableList(FI, Block.Locals);
+ emitGlobalVariableList(Block.Globals);
// Emit lexical blocks contained within this block.
emitLexicalBlockList(Block.Children, FI);
// Close the lexical block scope.
- OS.AddComment("Record length");
- OS.EmitIntValue(2, 2); // Record Length
- OS.AddComment("Record kind: S_END");
- OS.EmitIntValue(SymbolKind::S_END, 2); // Record Kind
+ emitEndSymbolRecord(SymbolKind::S_END);
}
/// Convenience routine for collecting lexical block information for a list
@@ -2510,9 +2677,10 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
void CodeViewDebug::collectLexicalBlockInfo(
SmallVectorImpl<LexicalScope *> &Scopes,
SmallVectorImpl<LexicalBlock *> &Blocks,
- SmallVectorImpl<LocalVariable> &Locals) {
+ SmallVectorImpl<LocalVariable> &Locals,
+ SmallVectorImpl<CVGlobalVariable> &Globals) {
for (LexicalScope *Scope : Scopes)
- collectLexicalBlockInfo(*Scope, Blocks, Locals);
+ collectLexicalBlockInfo(*Scope, Blocks, Locals, Globals);
}
/// Populate the lexical blocks and local variable lists of the parent with
@@ -2520,45 +2688,58 @@ void CodeViewDebug::collectLexicalBlockInfo(
void CodeViewDebug::collectLexicalBlockInfo(
LexicalScope &Scope,
SmallVectorImpl<LexicalBlock *> &ParentBlocks,
- SmallVectorImpl<LocalVariable> &ParentLocals) {
+ SmallVectorImpl<LocalVariable> &ParentLocals,
+ SmallVectorImpl<CVGlobalVariable> &ParentGlobals) {
if (Scope.isAbstractScope())
return;
- auto LocalsIter = ScopeVariables.find(&Scope);
- if (LocalsIter == ScopeVariables.end()) {
- // This scope does not contain variables and can be eliminated.
- collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
- return;
- }
- SmallVectorImpl<LocalVariable> &Locals = LocalsIter->second;
-
+ // Gather information about the lexical scope including local variables,
+ // global variables, and address ranges.
+ bool IgnoreScope = false;
+ auto LI = ScopeVariables.find(&Scope);
+ SmallVectorImpl<LocalVariable> *Locals =
+ LI != ScopeVariables.end() ? &LI->second : nullptr;
+ auto GI = ScopeGlobals.find(Scope.getScopeNode());
+ SmallVectorImpl<CVGlobalVariable> *Globals =
+ GI != ScopeGlobals.end() ? GI->second.get() : nullptr;
const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode());
- if (!DILB) {
- // This scope is not a lexical block and can be eliminated, but keep any
- // local variables it contains.
- ParentLocals.append(Locals.begin(), Locals.end());
- collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
- return;
- }
-
const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges();
- if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) {
- // This lexical block scope has too many address ranges to represent in the
- // current CodeView format or does not have a valid address range.
- // Eliminate this lexical scope and promote any locals it contains to the
- // parent scope.
- //
- // For lexical scopes with multiple address ranges you may be tempted to
- // construct a single range covering every instruction where the block is
- // live and everything in between. Unfortunately, Visual Studio only
- // displays variables from the first matching lexical block scope. If the
- // first lexical block contains exception handling code or cold code which
- // is moved to the bottom of the routine creating a single range covering
- // nearly the entire routine, then it will hide all other lexical blocks
- // and the variables they contain.
- //
- ParentLocals.append(Locals.begin(), Locals.end());
- collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
+
+ // Ignore lexical scopes which do not contain variables.
+ if (!Locals && !Globals)
+ IgnoreScope = true;
+
+ // Ignore lexical scopes which are not lexical blocks.
+ if (!DILB)
+ IgnoreScope = true;
+
+ // Ignore scopes which have too many address ranges to represent in the
+ // current CodeView format or do not have a valid address range.
+ //
+ // For lexical scopes with multiple address ranges you may be tempted to
+ // construct a single range covering every instruction where the block is
+ // live and everything in between. Unfortunately, Visual Studio only
+ // displays variables from the first matching lexical block scope. If the
+ // first lexical block contains exception handling code or cold code which
+ // is moved to the bottom of the routine creating a single range covering
+ // nearly the entire routine, then it will hide all other lexical blocks
+ // and the variables they contain.
+ if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second))
+ IgnoreScope = true;
+
+ if (IgnoreScope) {
+ // This scope can be safely ignored and eliminating it will reduce the
+ // size of the debug information. Be sure to collect any variable and scope
+ // information from the this scope or any of its children and collapse them
+ // into the parent scope.
+ if (Locals)
+ ParentLocals.append(Locals->begin(), Locals->end());
+ if (Globals)
+ ParentGlobals.append(Globals->begin(), Globals->end());
+ collectLexicalBlockInfo(Scope.getChildren(),
+ ParentBlocks,
+ ParentLocals,
+ ParentGlobals);
return;
}
@@ -2569,8 +2750,8 @@ void CodeViewDebug::collectLexicalBlockInfo(
if (!BlockInsertion.second)
return;
- // Create a lexical block containing the local variables and collect the
- // the lexical block information for the children.
+ // Create a lexical block containing the variables and collect the the
+ // lexical block information for the children.
const InsnRange &Range = Ranges.front();
assert(Range.first && Range.second);
LexicalBlock &Block = BlockInsertion.first->second;
@@ -2579,9 +2760,15 @@ void CodeViewDebug::collectLexicalBlockInfo(
assert(Block.Begin && "missing label for scope begin");
assert(Block.End && "missing label for scope end");
Block.Name = DILB->getName();
- Block.Locals = std::move(Locals);
+ if (Locals)
+ Block.Locals = std::move(*Locals);
+ if (Globals)
+ Block.Globals = std::move(*Globals);
ParentBlocks.push_back(&Block);
- collectLexicalBlockInfo(Scope.getChildren(), Block.Children, Block.Locals);
+ collectLexicalBlockInfo(Scope.getChildren(),
+ Block.Children,
+ Block.Locals,
+ Block.Globals);
}
void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
@@ -2593,7 +2780,10 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
// Build the lexical block structure to emit for this routine.
if (LexicalScope *CFS = LScopes.getCurrentFunctionScope())
- collectLexicalBlockInfo(*CFS, CurFn->ChildBlocks, CurFn->Locals);
+ collectLexicalBlockInfo(*CFS,
+ CurFn->ChildBlocks,
+ CurFn->Locals,
+ CurFn->Globals);
// Clear the scope and variable information from the map which will not be
// valid after we have finished processing this routine. This also prepares
@@ -2660,30 +2850,57 @@ void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
OS.EmitValueToAlignment(4);
}
+static StringRef getSymbolName(SymbolKind SymKind) {
+ for (const EnumEntry<SymbolKind> &EE : getSymbolTypeNames())
+ if (EE.Value == SymKind)
+ return EE.Name;
+ return "";
+}
+
+MCSymbol *CodeViewDebug::beginSymbolRecord(SymbolKind SymKind) {
+ MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
+ *EndLabel = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
+ OS.EmitLabel(BeginLabel);
+ if (OS.isVerboseAsm())
+ OS.AddComment("Record kind: " + getSymbolName(SymKind));
+ OS.EmitIntValue(unsigned(SymKind), 2);
+ return EndLabel;
+}
+
+void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) {
+ // MSVC does not pad out symbol records to four bytes, but LLVM does to avoid
+ // an extra copy of every symbol record in LLD. This increases object file
+ // size by less than 1% in the clang build, and is compatible with the Visual
+ // C++ linker.
+ OS.EmitValueToAlignment(4);
+ OS.EmitLabel(SymEnd);
+}
+
+void CodeViewDebug::emitEndSymbolRecord(SymbolKind EndKind) {
+ OS.AddComment("Record length");
+ OS.EmitIntValue(2, 2);
+ if (OS.isVerboseAsm())
+ OS.AddComment("Record kind: " + getSymbolName(EndKind));
+ OS.EmitIntValue(unsigned(EndKind), 2); // Record Kind
+}
+
void CodeViewDebug::emitDebugInfoForUDTs(
ArrayRef<std::pair<std::string, const DIType *>> UDTs) {
for (const auto &UDT : UDTs) {
const DIType *T = UDT.second;
assert(shouldEmitUdt(T));
- MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(),
- *UDTRecordEnd = MMI->getContext().createTempSymbol();
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(UDTRecordEnd, UDTRecordBegin, 2);
- OS.EmitLabel(UDTRecordBegin);
-
- OS.AddComment("Record kind: S_UDT");
- OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2);
-
+ MCSymbol *UDTRecordEnd = beginSymbolRecord(SymbolKind::S_UDT);
OS.AddComment("Type");
OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4);
-
emitNullTerminatedSymbolName(OS, UDT.first);
- OS.EmitLabel(UDTRecordEnd);
+ endSymbolRecord(UDTRecordEnd);
}
}
-void CodeViewDebug::emitDebugInfoForGlobals() {
+void CodeViewDebug::collectGlobalVariableInfo() {
DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *>
GlobalMap;
for (const GlobalVariable &GV : MMI->getModule()->globals()) {
@@ -2696,42 +2913,56 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
for (const MDNode *Node : CUs->operands()) {
const auto *CU = cast<DICompileUnit>(Node);
-
- // First, emit all globals that are not in a comdat in a single symbol
- // substream. MSVC doesn't like it if the substream is empty, so only open
- // it if we have at least one global to emit.
- switchToDebugSectionForSymbol(nullptr);
- MCSymbol *EndLabel = nullptr;
for (const auto *GVE : CU->getGlobalVariables()) {
- if (const auto *GV = GlobalMap.lookup(GVE))
- if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
- if (!EndLabel) {
- OS.AddComment("Symbol subsection for globals");
- EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
- }
- // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
- emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
- }
+ const auto *GV = GlobalMap.lookup(GVE);
+ if (!GV || GV->isDeclarationForLinker())
+ continue;
+ const DIGlobalVariable *DIGV = GVE->getVariable();
+ DIScope *Scope = DIGV->getScope();
+ SmallVector<CVGlobalVariable, 1> *VariableList;
+ if (Scope && isa<DILocalScope>(Scope)) {
+ // Locate a global variable list for this scope, creating one if
+ // necessary.
+ auto Insertion = ScopeGlobals.insert(
+ {Scope, std::unique_ptr<GlobalVariableList>()});
+ if (Insertion.second)
+ Insertion.first->second = llvm::make_unique<GlobalVariableList>();
+ VariableList = Insertion.first->second.get();
+ } else if (GV->hasComdat())
+ // Emit this global variable into a COMDAT section.
+ VariableList = &ComdatVariables;
+ else
+ // Emit this globla variable in a single global symbol section.
+ VariableList = &GlobalVariables;
+ CVGlobalVariable CVGV = {DIGV, GV};
+ VariableList->emplace_back(std::move(CVGV));
}
- if (EndLabel)
- endCVSubsection(EndLabel);
+ }
+}
- // Second, emit each global that is in a comdat into its own .debug$S
- // section along with its own symbol substream.
- for (const auto *GVE : CU->getGlobalVariables()) {
- if (const auto *GV = GlobalMap.lookup(GVE)) {
- if (GV->hasComdat()) {
- MCSymbol *GVSym = Asm->getSymbol(GV);
- OS.AddComment("Symbol subsection for " +
- Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
- switchToDebugSectionForSymbol(GVSym);
- EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
- // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
- emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
- endCVSubsection(EndLabel);
- }
- }
- }
+void CodeViewDebug::emitDebugInfoForGlobals() {
+ // First, emit all globals that are not in a comdat in a single symbol
+ // substream. MSVC doesn't like it if the substream is empty, so only open
+ // it if we have at least one global to emit.
+ switchToDebugSectionForSymbol(nullptr);
+ if (!GlobalVariables.empty()) {
+ OS.AddComment("Symbol subsection for globals");
+ MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
+ emitGlobalVariableList(GlobalVariables);
+ endCVSubsection(EndLabel);
+ }
+
+ // Second, emit each global that is in a comdat into its own .debug$S
+ // section along with its own symbol substream.
+ for (const CVGlobalVariable &CVGV : ComdatVariables) {
+ MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+ OS.AddComment("Symbol subsection for " +
+ Twine(GlobalValue::dropLLVMManglingEscape(CVGV.GV->getName())));
+ switchToDebugSectionForSymbol(GVSym);
+ MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
+ // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+ emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+ endCVSubsection(EndLabel);
}
}
@@ -2747,34 +2978,26 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
}
}
+// Emit each global variable in the specified array.
+void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
+ for (const CVGlobalVariable &CVGV : Globals) {
+ MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+ // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+ emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+ }
+}
+
void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
const GlobalVariable *GV,
MCSymbol *GVSym) {
- // DataSym record, see SymbolRecord.h for more info.
- // FIXME: Thread local data, etc
- MCSymbol *DataBegin = MMI->getContext().createTempSymbol(),
- *DataEnd = MMI->getContext().createTempSymbol();
- const unsigned FixedLengthOfThisRecord = 12;
- OS.AddComment("Record length");
- OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
- OS.EmitLabel(DataBegin);
- if (DIGV->isLocalToUnit()) {
- if (GV->isThreadLocal()) {
- OS.AddComment("Record kind: S_LTHREAD32");
- OS.EmitIntValue(unsigned(SymbolKind::S_LTHREAD32), 2);
- } else {
- OS.AddComment("Record kind: S_LDATA32");
- OS.EmitIntValue(unsigned(SymbolKind::S_LDATA32), 2);
- }
- } else {
- if (GV->isThreadLocal()) {
- OS.AddComment("Record kind: S_GTHREAD32");
- OS.EmitIntValue(unsigned(SymbolKind::S_GTHREAD32), 2);
- } else {
- OS.AddComment("Record kind: S_GDATA32");
- OS.EmitIntValue(unsigned(SymbolKind::S_GDATA32), 2);
- }
- }
+ // DataSym record, see SymbolRecord.h for more info. Thread local data
+ // happens to have the same format as global data.
+ SymbolKind DataSym = GV->isThreadLocal()
+ ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
+ : SymbolKind::S_GTHREAD32)
+ : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
+ : SymbolKind::S_GDATA32);
+ MCSymbol *DataEnd = beginSymbolRecord(DataSym);
OS.AddComment("Type");
OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
OS.AddComment("DataOffset");
@@ -2782,6 +3005,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
OS.AddComment("Segment");
OS.EmitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
- emitNullTerminatedSymbolName(OS, DIGV->getName(), FixedLengthOfThisRecord);
- OS.EmitLabel(DataEnd);
+ const unsigned LengthOfDataRecord = 12;
+ emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
+ endSymbolRecord(DataEnd);
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 6a0da5f993d0..21557ed1be35 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -14,14 +14,14 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
-#include "DbgValueHistoryCalculator.h"
-#include "DebugHandlerBase.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
@@ -54,6 +54,12 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
BumpPtrAllocator Allocator;
codeview::GlobalTypeTableBuilder TypeTable;
+ /// Whether to emit type record hashes into .debug$H.
+ bool EmitDebugGlobalHashes = false;
+
+ /// The codeview CPU type used by the translation unit.
+ codeview::CPUType TheCPU;
+
/// Represents the most general definition range.
struct LocalVarDefRange {
/// Indicates that variable data is stored in memory relative to the
@@ -85,10 +91,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
};
static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
- static LocalVarDefRange createDefRangeGeneral(uint16_t CVRegister,
- bool InMemory, int Offset,
- bool IsSubfield,
- uint16_t StructOffset);
/// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
struct LocalVariable {
@@ -97,6 +99,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
bool UseReferenceType = false;
};
+ struct CVGlobalVariable {
+ const DIGlobalVariable *DIGV;
+ const GlobalVariable *GV;
+ };
+
struct InlineSite {
SmallVector<LocalVariable, 1> InlinedLocals;
SmallVector<const DILocation *, 1> ChildSites;
@@ -110,6 +117,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
// Combines information from DILexicalBlock and LexicalScope.
struct LexicalBlock {
SmallVector<LocalVariable, 1> Locals;
+ SmallVector<CVGlobalVariable, 1> Globals;
SmallVector<LexicalBlock *, 1> Children;
const MCSymbol *Begin;
const MCSymbol *End;
@@ -132,6 +140,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<const DILocation *, 1> ChildSites;
SmallVector<LocalVariable, 1> Locals;
+ SmallVector<CVGlobalVariable, 1> Globals;
std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks;
@@ -144,6 +153,33 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
const MCSymbol *End = nullptr;
unsigned FuncId = 0;
unsigned LastFileId = 0;
+
+ /// Number of bytes allocated in the prologue for all local stack objects.
+ unsigned FrameSize = 0;
+
+ /// Number of bytes of parameters on the stack.
+ unsigned ParamSize = 0;
+
+ /// Number of bytes pushed to save CSRs.
+ unsigned CSRSize = 0;
+
+ /// Adjustment to apply on x86 when using the VFRAME frame pointer.
+ int OffsetAdjustment = 0;
+
+ /// Two-bit value indicating which register is the designated frame pointer
+ /// register for local variables. Included in S_FRAMEPROC.
+ codeview::EncodedFramePtrReg EncodedLocalFramePtrReg =
+ codeview::EncodedFramePtrReg::None;
+
+ /// Two-bit value indicating which register is the designated frame pointer
+ /// register for stack parameters. Included in S_FRAMEPROC.
+ codeview::EncodedFramePtrReg EncodedParamFramePtrReg =
+ codeview::EncodedFramePtrReg::None;
+
+ codeview::FrameProcedureOptions FrameProcOpts;
+
+ bool HasStackRealignment = false;
+
bool HaveLineInfo = false;
};
FunctionInfo *CurFn = nullptr;
@@ -154,6 +190,17 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
// and LexicalBlocks.
DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables;
+ // Map to separate global variables according to the lexical scope they
+ // belong in. A null local scope represents the global scope.
+ typedef SmallVector<CVGlobalVariable, 1> GlobalVariableList;
+ DenseMap<const DIScope*, std::unique_ptr<GlobalVariableList> > ScopeGlobals;
+
+ // Array of global variables which need to be emitted into a COMDAT section.
+ SmallVector<CVGlobalVariable, 1> ComdatVariables;
+
+ // Array of non-COMDAT global variables.
+ SmallVector<CVGlobalVariable, 1> GlobalVariables;
+
/// The set of comdat .debug$S sections that we've seen so far. Each section
/// must start with a magic version number that must only be emitted once.
/// This set tracks which sections we've already opened.
@@ -249,6 +296,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitCompilerInformation();
+ void emitBuildInfo();
+
void emitInlineeLinesSubsection();
void emitDebugInfoForThunk(const Function *GV,
@@ -257,13 +306,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
- void emitDebugInfoForGlobals();
-
void emitDebugInfoForRetainedTypes();
void
emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs);
+ void emitDebugInfoForGlobals();
+ void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
const GlobalVariable *GV, MCSymbol *GVSym);
@@ -271,36 +320,49 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Returns an end label for use with endCVSubsection when the subsection is
/// finished.
MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind);
-
void endCVSubsection(MCSymbol *EndLabel);
+ /// Opens a symbol record of the given kind. Returns an end label for use with
+ /// endSymbolRecord.
+ MCSymbol *beginSymbolRecord(codeview::SymbolKind Kind);
+ void endSymbolRecord(MCSymbol *SymEnd);
+
+ /// Emits an S_END, S_INLINESITE_END, or S_PROC_ID_END record. These records
+ /// are empty, so we emit them with a simpler assembly sequence that doesn't
+ /// involve labels.
+ void emitEndSymbolRecord(codeview::SymbolKind EndKind);
+
void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
const InlineSite &Site);
- using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+ void collectGlobalVariableInfo();
void collectVariableInfo(const DISubprogram *SP);
- void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed);
+ void collectVariableInfoFromMFTable(DenseSet<InlinedEntity> &Processed);
// Construct the lexical block tree for a routine, pruning emptpy lexical
// scopes, and populate it with local variables.
void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes,
SmallVectorImpl<LexicalBlock *> &Blocks,
- SmallVectorImpl<LocalVariable> &Locals);
+ SmallVectorImpl<LocalVariable> &Locals,
+ SmallVectorImpl<CVGlobalVariable> &Globals);
void collectLexicalBlockInfo(LexicalScope &Scope,
SmallVectorImpl<LexicalBlock *> &ParentBlocks,
- SmallVectorImpl<LocalVariable> &ParentLocals);
+ SmallVectorImpl<LocalVariable> &ParentLocals,
+ SmallVectorImpl<CVGlobalVariable> &ParentGlobals);
/// Records information about a local variable in the appropriate scope. In
/// particular, locals from inlined code live inside the inlining site.
void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS);
/// Emits local variables in the appropriate order.
- void emitLocalVariableList(ArrayRef<LocalVariable> Locals);
+ void emitLocalVariableList(const FunctionInfo &FI,
+ ArrayRef<LocalVariable> Locals);
/// Emits an S_LOCAL record and its associated defined ranges.
- void emitLocalVariable(const LocalVariable &Var);
+ void emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var);
/// Emits a sequence of lexical block scopes and their children.
void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
@@ -314,6 +376,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
DITypeRef ClassTyRef = DITypeRef());
+ codeview::TypeIndex
+ getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
+ const DISubroutineType *SubroutineTy);
+
codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef);
codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
@@ -340,10 +406,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
- codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
- const DIType *ClassTy,
- int ThisAdjustment,
- bool IsStaticMethod);
+ codeview::TypeIndex lowerTypeMemberFunction(
+ const DISubroutineType *Ty, const DIType *ClassTy, int ThisAdjustment,
+ bool IsStaticMethod,
+ codeview::FunctionOptions FO = codeview::FunctionOptions::None);
codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty);
codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty);
codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 570424a79c81..e27659494f08 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -414,6 +414,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_GNU_addr_index:
case dwarf::DW_FORM_ref_udata:
case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_addrx:
+ case dwarf::DW_FORM_rnglistx:
case dwarf::DW_FORM_udata:
Asm->EmitULEB128(Integer);
return;
@@ -440,6 +442,8 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_GNU_addr_index:
case dwarf::DW_FORM_ref_udata:
case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_addrx:
+ case dwarf::DW_FORM_rnglistx:
case dwarf::DW_FORM_udata:
return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata:
@@ -461,7 +465,7 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->EmitDebugThreadLocal(Expr, SizeOf(AP, Form));
+ AP->EmitDebugValue(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
@@ -585,8 +589,7 @@ void DIEString::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_string) {
- for (char ch : S)
- AP->emitInt8(ch);
+ AP->OutStreamer->EmitBytes(S);
AP->emitInt8(0);
return;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 25518a339c61..09867822c30a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp --------------===//
+//===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "DbgValueHistoryCalculator.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -42,7 +42,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
}
-void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
+void DbgValueHistoryMap::startInstrRange(InlinedEntity Var,
const MachineInstr &MI) {
// Instruction range should start with a DBG_VALUE instruction for the
// variable.
@@ -57,7 +57,7 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
Ranges.push_back(std::make_pair(&MI, nullptr));
}
-void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
+void DbgValueHistoryMap::endInstrRange(InlinedEntity Var,
const MachineInstr &MI) {
auto &Ranges = VarInstrRanges[Var];
// Verify that the current instruction range is not yet closed.
@@ -68,7 +68,7 @@ void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
Ranges.back().second = &MI;
}
-unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
+unsigned DbgValueHistoryMap::getRegisterForVar(InlinedEntity Var) const {
const auto &I = VarInstrRanges.find(Var);
if (I == VarInstrRanges.end())
return 0;
@@ -78,17 +78,22 @@ unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
return isDescribedByReg(*Ranges.back().first);
}
+void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
+ assert(MI.isDebugLabel() && "not a DBG_LABEL");
+ LabelInstr[Label] = &MI;
+}
+
namespace {
// Maps physreg numbers to the variables they describe.
-using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
-using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>;
+using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>;
} // end anonymous namespace
// Claim that @Var is not described by @RegNo anymore.
static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
- InlinedVariable Var) {
+ InlinedEntity Var) {
const auto &I = RegVars.find(RegNo);
assert(RegNo != 0U && I != RegVars.end());
auto &VarSet = I->second;
@@ -102,7 +107,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
// Claim that @Var is now described by @RegNo.
static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
- InlinedVariable Var) {
+ InlinedEntity Var) {
assert(RegNo != 0U);
auto &VarSet = RegVars[RegNo];
assert(!is_contained(VarSet, Var));
@@ -187,9 +192,10 @@ static void collectChangingRegs(const MachineFunction *MF,
}
}
-void llvm::calculateDbgValueHistory(const MachineFunction *MF,
- const TargetRegisterInfo *TRI,
- DbgValueHistoryMap &Result) {
+void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &DbgValues,
+ DbgLabelInstrMap &DbgLabels) {
BitVector ChangingRegs(TRI->getNumRegs());
collectChangingRegs(MF, TRI, ChangingRegs);
@@ -210,14 +216,14 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
// If this is a virtual register, only clobber it since it doesn't
// have aliases.
if (TRI->isVirtualRegister(MO.getReg()))
- clobberRegisterUses(RegVars, MO.getReg(), Result, MI);
+ clobberRegisterUses(RegVars, MO.getReg(), DbgValues, MI);
// If this is a register def operand, it may end a debug value
// range.
else {
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
++AI)
if (ChangingRegs.test(*AI))
- clobberRegisterUses(RegVars, *AI, Result, MI);
+ clobberRegisterUses(RegVars, *AI, DbgValues, MI);
}
} else if (MO.isRegMask()) {
// If this is a register mask operand, clobber all debug values in
@@ -226,7 +232,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
// Don't consider SP to be clobbered by register masks.
if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
MO.clobbersPhysReg(I)) {
- clobberRegisterUses(RegVars, I, Result, MI);
+ clobberRegisterUses(RegVars, I, DbgValues, MI);
}
}
}
@@ -234,26 +240,34 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
continue;
}
- // Skip DBG_LABEL instructions.
- if (MI.isDebugLabel())
- continue;
-
- assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
- // Use the base variable (without any DW_OP_piece expressions)
- // as index into History. The full variables including the
- // piece expressions are attached to the MI.
- const DILocalVariable *RawVar = MI.getDebugVariable();
- assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
- "Expected inlined-at fields to agree");
- InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
-
- if (unsigned PrevReg = Result.getRegisterForVar(Var))
- dropRegDescribedVar(RegVars, PrevReg, Var);
-
- Result.startInstrRange(Var, MI);
-
- if (unsigned NewReg = isDescribedByReg(MI))
- addRegDescribedVar(RegVars, NewReg, Var);
+ if (MI.isDebugValue()) {
+ assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
+ // Use the base variable (without any DW_OP_piece expressions)
+ // as index into History. The full variables including the
+ // piece expressions are attached to the MI.
+ const DILocalVariable *RawVar = MI.getDebugVariable();
+ assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt());
+
+ if (unsigned PrevReg = DbgValues.getRegisterForVar(Var))
+ dropRegDescribedVar(RegVars, PrevReg, Var);
+
+ DbgValues.startInstrRange(Var, MI);
+
+ if (unsigned NewReg = isDescribedByReg(MI))
+ addRegDescribedVar(RegVars, NewReg, Var);
+ } else if (MI.isDebugLabel()) {
+ assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!");
+ const DILabel *RawLabel = MI.getDebugLabel();
+ assert(RawLabel->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ // When collecting debug information for labels, there is no MCSymbol
+ // generated for it. So, we keep MachineInstr in DbgLabels in order
+ // to query MCSymbol afterward.
+ InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt());
+ DbgLabels.addInstr(L, MI);
+ }
}
// Make sure locations for register-described variables are valid only
@@ -264,7 +278,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
auto CurElem = I++; // CurElem can be erased below.
if (TRI->isVirtualRegister(CurElem->first) ||
ChangingRegs.test(CurElem->first))
- clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
+ clobberRegisterUses(RegVars, CurElem, DbgValues, MBB.back());
}
}
}
@@ -274,10 +288,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
dbgs() << "DbgValueHistoryMap:\n";
for (const auto &VarRangePair : *this) {
- const InlinedVariable &Var = VarRangePair.first;
+ const InlinedEntity &Var = VarRangePair.first;
const InstrRanges &Ranges = VarRangePair.second;
- const DILocalVariable *LocalVar = Var.first;
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first);
const DILocation *Location = Var.second;
dbgs() << " - " << LocalVar->getName() << " at ";
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 82e14dc13cb1..551cd36d1984 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "DebugHandlerBase.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -125,6 +125,21 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
return LabelsAfterInsn.lookup(MI);
}
+// Return the function-local offset of an instruction.
+const MCExpr *
+DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) {
+ MCContext &MC = Asm->OutContext;
+
+ MCSymbol *Start = Asm->getFunctionBegin();
+ const auto *StartRef = MCSymbolRefExpr::create(Start, MC);
+
+ MCSymbol *AfterInsn = getLabelAfterInsn(MI);
+ assert(AfterInsn && "Expected label after instruction");
+ const auto *AfterRef = MCSymbolRefExpr::create(AfterInsn, MC);
+
+ return MCBinaryExpr::createSub(AfterRef, StartRef, MC);
+}
+
/// If this type is derived from a base type then return base type size.
uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
DIType *Ty = TyRef.resolve();
@@ -190,8 +205,9 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// Calculate history for local variables.
assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
- calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
- DbgValues);
+ assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!");
+ calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
+ DbgValues, DbgLabels);
LLVM_DEBUG(DbgValues.dump());
// Request labels for the full history.
@@ -229,6 +245,12 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
}
}
+ // Ensure there is a symbol before DBG_LABEL.
+ for (const auto &I : DbgLabels) {
+ const MachineInstr *MI = I.second;
+ requestLabelBeforeInsn(MI);
+ }
+
PrevInstLoc = DebugLoc();
PrevLabel = Asm->getFunctionBegin();
beginFunctionImpl(MF);
@@ -296,6 +318,7 @@ void DebugHandlerBase::endFunction(const MachineFunction *MF) {
if (hasDebugInfo(MMI, MF))
endFunctionImpl(MF);
DbgValues.clear();
+ DbgLabels.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index ac49657b68fa..befa4b941c8d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -139,7 +139,7 @@ public:
// Sort the pieces by offset.
// Remove any duplicate entries by dropping all but the first.
void sortUniqueValues() {
- llvm::sort(Values.begin(), Values.end());
+ llvm::sort(Values);
Values.erase(
std::unique(
Values.begin(), Values.end(), [](const Value &A, const Value &B) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 32271a0ef24a..1dca3f0fce5b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -69,14 +69,16 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
// pool from the skeleton - maybe even in non-fission (possibly fewer
// relocations by sharing them in the pool, but we have other ideas about how
// to reduce the number of relocations as well/instead).
- if (!DD->useSplitDwarf() || !Skeleton)
+ if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
return addLocalLabelAddress(Die, Attribute, Label);
if (Label)
DD->addArangeLabel(SymbolCU(this, Label));
unsigned idx = DD->getAddressPool().getIndex(Label);
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_GNU_addr_index,
+ Die.addValue(DIEValueAllocator, Attribute,
+ DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
+ : dwarf::DW_FORM_GNU_addr_index,
DIEInteger(idx));
}
@@ -160,6 +162,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
+ if (MDTuple *TP = GV->getTemplateParams())
+ addTemplateParams(*VariableDIE, DINodeArray(TP));
+
// Add location.
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
@@ -186,6 +191,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
if (!Global && (!Expr || !Expr->isConstant()))
continue;
+ if (Global && Global->isThreadLocal() &&
+ !Asm->getObjFileLowering().supportDebugThreadLocalLocation())
+ continue;
+
if (!Loc) {
addToAccelTable = true;
Loc = new (DIEValueAllocator) DIELoc;
@@ -245,13 +254,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addLinkageName(*VariableDIE, GV->getLinkageName());
if (addToAccelTable) {
- DD->addAccelName(GV->getName(), *VariableDIE);
+ DD->addAccelName(*CUNode, GV->getName(), *VariableDIE);
// If the linkage name is different than the name, go ahead and output
// that as well into the name table.
if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() &&
DD->useAllLinkageNames())
- DD->addAccelName(GV->getLinkageName(), *VariableDIE);
+ DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
}
return VariableDIE;
@@ -268,6 +277,7 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
(&CURanges.back().getEnd()->getSection() !=
&Range.getEnd()->getSection())) {
CURanges.push_back(Range);
+ DD->addSectionLabel(Range.getStart());
return;
}
@@ -275,6 +285,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
}
void DwarfCompileUnit::initStmtList() {
+ if (CUNode->isDebugDirectivesOnly())
+ return;
+
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym;
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -341,7 +354,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_subprogram nodes.
- DD->addSubprogramNames(SP, *SPDie);
+ DD->addSubprogramNames(*CUNode, SP, *SPDie);
return *SPDie;
}
@@ -412,24 +425,29 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
? TLOF.getDwarfRnglistsSection()->getBeginSymbol()
: TLOF.getDwarfRangesSection()->getBeginSymbol();
- RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
+ HasRangeLists = true;
+
+ // Add the range list to the set of ranges to be emitted.
+ auto IndexAndList =
+ (DD->getDwarfVersion() < 5 && Skeleton ? Skeleton->DU : DU)
+ ->addRange(*(Skeleton ? Skeleton : this), std::move(Range));
+
+ uint32_t Index = IndexAndList.first;
+ auto &List = *IndexAndList.second;
// Under fission, ranges are specified by constant offsets relative to the
// CU's DW_AT_GNU_ranges_base.
// FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under
// fission until we support the forms using the .debug_addr section
// (DW_RLE_startx_endx etc.).
- if (isDwoUnit()) {
- if (DD->getDwarfVersion() < 5)
- addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
- RangeSectionSym);
- } else {
+ if (DD->getDwarfVersion() >= 5)
+ addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index);
+ else if (isDwoUnit())
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ else
addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
RangeSectionSym);
- }
-
- // Add the range list to the set of ranges to be emitted.
- (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List));
}
void DwarfCompileUnit::attachRangesOrLowHighPC(
@@ -479,7 +497,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
- DD->addSubprogramNames(InlinedSP, *ScopeDIE);
+ DD->addSubprogramNames(*CUNode, InlinedSP, *ScopeDIE);
return ScopeDIE;
}
@@ -506,6 +524,18 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
return D;
}
+DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL,
+ const LexicalScope &Scope) {
+ auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag());
+ insertDIE(DL.getLabel(), LabelDie);
+ DL.setDIE(*LabelDie);
+
+ if (Scope.isAbstractScope())
+ applyLabelAttributes(DL, *LabelDie);
+
+ return LabelDie;
+}
+
DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
bool Abstract) {
// Define variable debug information entry.
@@ -699,13 +729,17 @@ DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
if (HasNonScopeChildren)
*HasNonScopeChildren = !Children.empty();
+ for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
+ Children.push_back(constructLabelDIE(*DL, *Scope));
+
for (LexicalScope *LS : Scope->getChildren())
constructScopeDIE(LS, Children);
return ObjectPointer;
}
-void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) {
+DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
+ LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
if (Scope) {
@@ -728,6 +762,8 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, Lexi
!includeMinimalInlineScopes())
ScopeDIE.addChild(
DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters));
+
+ return ScopeDIE;
}
DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
@@ -782,6 +818,32 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
+DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
+ const DISubprogram &CalleeSP,
+ bool IsTail,
+ const MCExpr *PCOffset) {
+ // Insert a call site entry DIE within ScopeDIE.
+ DIE &CallSiteDIE =
+ createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr);
+
+ // For the purposes of showing tail call frames in backtraces, a key piece of
+ // information is DW_AT_call_origin, a pointer to the callee DIE.
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE);
+
+ if (IsTail) {
+ // Attach DW_AT_call_tail_call to tail calls for standards compliance.
+ addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call);
+ } else {
+ // Attach the return PC to allow the debugger to disambiguate call paths
+ // from one function to another.
+ assert(PCOffset && "Missing return PC information for a call");
+ addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset);
+ }
+ return CallSiteDIE;
+}
+
DIE *DwarfCompileUnit::constructImportedEntityDIE(
const DIImportedEntity *Module) {
DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
@@ -824,40 +886,51 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
}
}
-void DwarfCompileUnit::finishVariableDefinition(const DbgVariable &Var) {
- DbgVariable *AbsVar = getExistingAbstractVariable(
- InlinedVariable(Var.getVariable(), Var.getInlinedAt()));
- auto *VariableDie = Var.getDIE();
- if (AbsVar && AbsVar->getDIE()) {
- addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
- *AbsVar->getDIE());
- } else
- applyVariableAttributes(Var, *VariableDie);
-}
+void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) {
+ DbgEntity *AbsEntity = getExistingAbstractEntity(Entity->getEntity());
-DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(InlinedVariable IV) {
- const DILocalVariable *Cleansed;
- return getExistingAbstractVariable(IV, Cleansed);
+ auto *Die = Entity->getDIE();
+ /// Label may be used to generate DW_AT_low_pc, so put it outside
+ /// if/else block.
+ const DbgLabel *Label = nullptr;
+ if (AbsEntity && AbsEntity->getDIE()) {
+ addDIEEntry(*Die, dwarf::DW_AT_abstract_origin, *AbsEntity->getDIE());
+ Label = dyn_cast<const DbgLabel>(Entity);
+ } else {
+ if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity))
+ applyVariableAttributes(*Var, *Die);
+ else if ((Label = dyn_cast<const DbgLabel>(Entity)))
+ applyLabelAttributes(*Label, *Die);
+ else
+ llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel.");
+ }
+
+ if (Label)
+ if (const auto *Sym = Label->getSymbol())
+ addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym);
}
-// Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(
- InlinedVariable IV, const DILocalVariable *&Cleansed) {
- // More then one inlined variable corresponds to one abstract variable.
- Cleansed = IV.first;
- auto &AbstractVariables = getAbstractVariables();
- auto I = AbstractVariables.find(Cleansed);
- if (I != AbstractVariables.end())
+DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) {
+ auto &AbstractEntities = getAbstractEntities();
+ auto I = AbstractEntities.find(Node);
+ if (I != AbstractEntities.end())
return I->second.get();
return nullptr;
}
-void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,
- LexicalScope *Scope) {
+void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
+ LexicalScope *Scope) {
assert(Scope && Scope->isAbstractScope());
- auto AbsDbgVariable = llvm::make_unique<DbgVariable>(Var, /* IA */ nullptr);
- DU->addScopeVariable(Scope, AbsDbgVariable.get());
- getAbstractVariables()[Var] = std::move(AbsDbgVariable);
+ auto &Entity = getAbstractEntities()[Node];
+ if (isa<const DILocalVariable>(Node)) {
+ Entity = llvm::make_unique<DbgVariable>(
+ cast<const DILocalVariable>(Node), nullptr /* IA */);;
+ DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
+ } else if (isa<const DILabel>(Node)) {
+ Entity = llvm::make_unique<DbgLabel>(
+ cast<const DILabel>(Node), nullptr /* IA */);
+ DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get()));
+ }
}
void DwarfCompileUnit::emitHeader(bool UseOffsets) {
@@ -876,13 +949,18 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
}
bool DwarfCompileUnit::hasDwarfPubSections() const {
- // Opting in to GNU Pubnames/types overrides the default to ensure these are
- // generated for things like Gold's gdb_index generation.
- if (CUNode->getGnuPubnames())
+ switch (CUNode->getNameTableKind()) {
+ case DICompileUnit::DebugNameTableKind::None:
+ return false;
+ // Opting in to GNU Pubnames/types overrides the default to ensure these are
+ // generated for things like Gold's gdb_index generation.
+ case DICompileUnit::DebugNameTableKind::GNU:
return true;
-
- return DD->tuneForGDB() && DD->usePubSections() &&
- !includeMinimalInlineScopes();
+ case DICompileUnit::DebugNameTableKind::Default:
+ return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
+ !CUNode->isDebugDirectivesOnly();
+ }
+ llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
}
/// addGlobalName - Add a new global name to the compile unit.
@@ -939,8 +1017,6 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
"block byref variable without a complex expression");
if (DV.hasComplexAddress())
addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
- else if (DV.isBlockByrefVariable())
- addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
else
addAddress(Die, dwarf::DW_AT_location, Location);
}
@@ -1012,12 +1088,27 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
addFlag(VariableDie, dwarf::DW_AT_artificial);
}
+void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label,
+ DIE &LabelDie) {
+ StringRef Name = Label.getName();
+ if (!Name.empty())
+ addString(LabelDie, dwarf::DW_AT_name, Name);
+ const auto *DILabel = Label.getLabel();
+ addSourceLine(LabelDie, DILabel);
+}
+
/// Add a Dwarf expression attribute data and value.
void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
const MCExpr *Expr) {
Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr));
}
+void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute,
+ const MCExpr *Expr) {
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
+ DIEExpr(Expr));
+}
+
void DwarfCompileUnit::applySubprogramAttributesToDefinition(
const DISubprogram *SP, DIE &SPDie) {
auto *SPDecl = SP->getDeclaration();
@@ -1034,3 +1125,12 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const {
return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
(DD->useSplitDwarf() && !Skeleton);
}
+
+void DwarfCompileUnit::addAddrTableBase() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ MCSymbol *Label = DD->getAddressPool().getLabel();
+ addSectionLabel(getUnitDie(),
+ getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
+ : dwarf::DW_AT_GNU_addr_base,
+ Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 51e1558fe4a3..9ec22f68c12f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -14,7 +14,6 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
-#include "DbgValueHistoryCalculator.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
#include "llvm/ADT/ArrayRef.h"
@@ -23,6 +22,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -44,6 +44,7 @@ class MDNode;
class DwarfCompileUnit final : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
unsigned UniqueID;
+ bool HasRangeLists = false;
/// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
/// the need to search for it in applyStmtList.
@@ -69,10 +70,6 @@ class DwarfCompileUnit final : public DwarfUnit {
/// GlobalTypes - A map of globally visible types for this unit.
StringMap<const DIE *> GlobalTypes;
- // List of range lists for a given compile unit, separate from the ranges for
- // the CU itself.
- SmallVector<RangeSpanList, 1> CURangeLists;
-
// List of ranges for a given compile unit.
SmallVector<RangeSpan, 2> CURanges;
@@ -81,7 +78,7 @@ class DwarfCompileUnit final : public DwarfUnit {
const MCSymbol *BaseAddress = nullptr;
DenseMap<const MDNode *, DIE *> AbstractSPDies;
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// DWO ID for correlating skeleton and split units.
uint64_t DWOId = 0;
@@ -98,16 +95,17 @@ class DwarfCompileUnit final : public DwarfUnit {
return DU->getAbstractSPDies();
}
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
- return AbstractVariables;
- return DU->getAbstractVariables();
+ return AbstractEntities;
+ return DU->getAbstractEntities();
}
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
+ bool hasRangeLists() const { return HasRangeLists; }
unsigned getUniqueID() const { return UniqueID; }
DwarfCompileUnit *getSkeleton() const {
@@ -194,30 +192,39 @@ public:
DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
DIE *&ObjectPointer);
+ /// Construct a DIE for the given DbgLabel.
+ DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);
+
/// A helper function to create children of a Scope DIE.
DIE *createScopeChildrenDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &Children,
bool *HasNonScopeChildren = nullptr);
/// Construct a DIE for this subprogram scope.
- void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
+ DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
+ LexicalScope *Scope);
DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+ /// Construct a call site entry DIE describing a call within \p Scope to a
+ /// callee described by \p CalleeSP. \p IsTail specifies whether the call is
+ /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the
+ /// function-local offset to PC value after the call instruction.
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP,
+ bool IsTail, const MCExpr *PCOffset);
+
/// Construct import_module DIE.
DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
void finishSubprogramDefinition(const DISubprogram *SP);
- void finishVariableDefinition(const DbgVariable &Var);
+ void finishEntityDefinition(const DbgEntity *Entity);
/// Find abstract variable associated with Var.
- using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
- DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
- const DILocalVariable *&Cleansed);
- DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
- void createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope);
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+ DbgEntity *getExistingAbstractEntity(const DINode *Node);
+ void createAbstractEntity(const DINode *Node, LexicalScope *Scope);
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
@@ -236,6 +243,9 @@ public:
void emitHeader(bool UseOffsets) override;
+ /// Add the DW_AT_addr_base attribute to the unit DIE.
+ void addAddrTableBase();
+
MCSymbol *getLabelBegin() const {
assert(getSection());
return LabelBegin;
@@ -285,13 +295,13 @@ public:
/// Add a Dwarf expression attribute data and value.
void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
+ /// Add an attribute containing an address expression to \p Die.
+ void addAddressExpr(DIE &Die, dwarf::Attribute Attribute, const MCExpr *Expr);
+
void applySubprogramAttributesToDefinition(const DISubprogram *SP,
DIE &SPDie);
- /// getRangeLists - Get the vector of range lists.
- const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
- return (Skeleton ? Skeleton : this)->CURangeLists;
- }
+ void applyLabelAttributes(const DbgLabel &Label, DIE &LabelDie);
/// getRanges - Get the list of ranges for this unit.
const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 19b3afef34b5..1de2ffb6cfa1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
@@ -130,11 +131,6 @@ DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden,
cl::init(Default));
static cl::opt<bool>
- NoDwarfPubSections("no-dwarf-pub-sections", cl::Hidden,
- cl::desc("Disable emission of DWARF pub sections."),
- cl::init(false));
-
-static cl::opt<bool>
NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden,
cl::desc("Disable emission .debug_ranges section."),
cl::init(false));
@@ -188,12 +184,12 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
}
bool DbgVariable::isBlockByrefVariable() const {
- assert(Var && "Invalid complex DbgVariable!");
- return Var->getType().resolve()->isBlockByrefStruct();
+ assert(getVariable() && "Invalid complex DbgVariable!");
+ return getVariable()->getType().resolve()->isBlockByrefStruct();
}
const DIType *DbgVariable::getType() const {
- DIType *Ty = Var->getType().resolve();
+ DIType *Ty = getVariable()->getType().resolve();
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
// addresses instead.
if (Ty->isBlockByrefStruct()) {
@@ -246,7 +242,7 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
return A.Expr->isFragment();
}) &&
"multiple FI expressions without DW_OP_LLVM_fragment");
- llvm::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
+ llvm::sort(FrameIndexExprs,
[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
return A.Expr->getFragmentInfo()->OffsetInBits <
B.Expr->getFragmentInfo()->OffsetInBits;
@@ -258,8 +254,8 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
void DbgVariable::addMMIEntry(const DbgVariable &V) {
assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
- assert(V.Var == Var && "conflicting variable");
- assert(V.IA == IA && "conflicting inlined-at location");
+ assert(V.getVariable() == getVariable() && "conflicting variable");
+ assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
@@ -355,7 +351,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfVersion =
TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
- UsePubSections = !NoDwarfPubSections && !TT.isNVPTX();
UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
// Use sections as references. Force for NVPTX.
@@ -421,30 +416,35 @@ static StringRef getObjCMethodName(StringRef In) {
}
// Add the various names to the Dwarf accelerator table names.
-void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
+void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
+ const DISubprogram *SP, DIE &Die) {
+ if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+ return;
+
if (!SP->isDefinition())
return;
if (SP->getName() != "")
- addAccelName(SP->getName(), Die);
+ addAccelName(CU, SP->getName(), Die);
// If the linkage name is different than the name, go ahead and output that as
// well into the name table. Only do that if we are going to actually emit
// that name.
if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
(useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP)))
- addAccelName(SP->getLinkageName(), Die);
+ addAccelName(CU, SP->getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
// too.
if (isObjCClass(SP->getName())) {
StringRef Class, Category;
getObjCClassCategory(SP->getName(), Class, Category);
- addAccelObjC(Class, Die);
+ addAccelObjC(CU, Class, Die);
if (Category != "")
- addAccelObjC(Category, Die);
+ addAccelObjC(CU, Category, Die);
// Also add the base method name to the name table.
- addAccelName(getObjCMethodName(SP->getName()), Die);
+ addAccelName(CU, getObjCMethodName(SP->getName()), Die);
}
}
@@ -503,6 +503,64 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
+void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
+ DwarfCompileUnit &CU, DIE &ScopeDIE,
+ const MachineFunction &MF) {
+ // Add a call site-related attribute (DWARF5, Sec. 3.3.1.3). Do this only if
+ // the subprogram is required to have one.
+ if (!SP.areAllCallsDescribed() || !SP.isDefinition())
+ return;
+
+ // Use DW_AT_call_all_calls to express that call site entries are present
+ // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls
+ // because one of its requirements is not met: call site entries for
+ // optimized-out calls are elided.
+ CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls);
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "TargetInstrInfo not found: cannot label tail calls");
+
+ // Emit call site entries for each call or tail call in the function.
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB.instrs()) {
+ // Skip instructions which aren't calls. Both calls and tail-calling jump
+ // instructions (e.g TAILJMPd64) are classified correctly here.
+ if (!MI.isCall())
+ continue;
+
+ // TODO: Add support for targets with delay slots (see: beginInstruction).
+ if (MI.hasDelaySlot())
+ return;
+
+ // If this is a direct call, find the callee's subprogram.
+ const MachineOperand &CalleeOp = MI.getOperand(0);
+ if (!CalleeOp.isGlobal())
+ continue;
+ const Function *CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
+ if (!CalleeDecl || !CalleeDecl->getSubprogram())
+ continue;
+
+ // TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
+ // TODO: Add support for indirect calls.
+
+ bool IsTail = TII->isTailCall(MI);
+
+ // For tail calls, no return PC information is needed. For regular calls,
+ // the return PC is needed to disambiguate paths in the call graph which
+ // could lead to some target function.
+ const MCExpr *PCOffset =
+ IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI);
+
+ assert((IsTail || PCOffset) && "Call without return PC information");
+ LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
+ << CalleeDecl->getName() << (IsTail ? " [tail]" : "")
+ << "\n");
+ CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(),
+ IsTail, PCOffset);
+ }
+ }
+}
+
void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
if (!U.hasDwarfPubSections())
return;
@@ -510,41 +568,14 @@ void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
}
-// Create new DwarfCompileUnit for the given metadata node with tag
-// DW_TAG_compile_unit.
-DwarfCompileUnit &
-DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
- if (auto *CU = CUMap.lookup(DIUnit))
- return *CU;
- StringRef FN = DIUnit->getFilename();
- CompilationDir = DIUnit->getDirectory();
-
- auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
- InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
- DwarfCompileUnit &NewCU = *OwnedUnit;
+void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
+ DwarfCompileUnit &NewCU) {
DIE &Die = NewCU.getUnitDie();
- InfoHolder.addUnit(std::move(OwnedUnit));
- if (useSplitDwarf()) {
- NewCU.setSkeleton(constructSkeletonCU(NewCU));
- NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
- Asm->TM.Options.MCOptions.SplitDwarfFile);
- }
-
- for (auto *IE : DIUnit->getImportedEntities())
- NewCU.addImportedEntity(IE);
-
- // LTO with assembly output shares a single line table amongst multiple CUs.
- // To avoid the compilation directory being ambiguous, let the line table
- // explicitly describe the directory of all files, never relying on the
- // compilation directory.
- if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
- Asm->OutStreamer->emitDwarfFile0Directive(
- CompilationDir, FN, NewCU.getMD5AsBytes(DIUnit->getFile()),
- DIUnit->getSource(), NewCU.getUniqueID());
+ StringRef FN = DIUnit->getFilename();
StringRef Producer = DIUnit->getProducer();
StringRef Flags = DIUnit->getFlags();
- if (!Flags.empty()) {
+ if (!Flags.empty() && !useAppleExtensionAttributes()) {
std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
} else
@@ -582,11 +613,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
dwarf::DW_FORM_data1, RVer);
}
- if (useSplitDwarf())
- NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
- else
- NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
-
if (DIUnit->getDWOId()) {
// This CU is either a clang module DWO or a skeleton CU.
NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
@@ -596,9 +622,44 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
DIUnit->getSplitDebugFilename());
}
+}
+// Create new DwarfCompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
+DwarfCompileUnit &
+DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
+ if (auto *CU = CUMap.lookup(DIUnit))
+ return *CU;
+
+ CompilationDir = DIUnit->getDirectory();
+
+ auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
+ InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
+ DwarfCompileUnit &NewCU = *OwnedUnit;
+ InfoHolder.addUnit(std::move(OwnedUnit));
+
+ for (auto *IE : DIUnit->getImportedEntities())
+ NewCU.addImportedEntity(IE);
+
+ // LTO with assembly output shares a single line table amongst multiple CUs.
+ // To avoid the compilation directory being ambiguous, let the line table
+ // explicitly describe the directory of all files, never relying on the
+ // compilation directory.
+ if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
+ Asm->OutStreamer->emitDwarfFile0Directive(
+ CompilationDir, DIUnit->getFilename(),
+ NewCU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(),
+ NewCU.getUniqueID());
+
+ if (useSplitDwarf()) {
+ NewCU.setSkeleton(constructSkeletonCU(NewCU));
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+ } else {
+ finishUnitAttributes(DIUnit, NewCU);
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
+ }
CUMap.insert({DIUnit, &NewCU});
- CUDieMap.insert({&Die, &NewCU});
+ CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
}
@@ -613,22 +674,21 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
/// Sort and unique GVEs by comparing their fragment offset.
static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
- llvm::sort(GVEs.begin(), GVEs.end(),
- [](DwarfCompileUnit::GlobalExpr A,
- DwarfCompileUnit::GlobalExpr B) {
- // Sort order: first null exprs, then exprs without fragment
- // info, then sort by fragment offset in bits.
- // FIXME: Come up with a more comprehensive comparator so
- // the sorting isn't non-deterministic, and so the following
- // std::unique call works correctly.
- if (!A.Expr || !B.Expr)
- return !!B.Expr;
- auto FragmentA = A.Expr->getFragmentInfo();
- auto FragmentB = B.Expr->getFragmentInfo();
- if (!FragmentA || !FragmentB)
- return !!FragmentB;
- return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
- });
+ llvm::sort(
+ GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
+ // Sort order: first null exprs, then exprs without fragment
+ // info, then sort by fragment offset in bits.
+ // FIXME: Come up with a more comprehensive comparator so
+ // the sorting isn't non-deterministic, and so the following
+ // std::unique call works correctly.
+ if (!A.Expr || !B.Expr)
+ return !!B.Expr;
+ auto FragmentA = A.Expr->getFragmentInfo();
+ auto FragmentB = B.Expr->getFragmentInfo();
+ if (!FragmentA || !FragmentB)
+ return !!FragmentB;
+ return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
+ });
GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
[](DwarfCompileUnit::GlobalExpr A,
DwarfCompileUnit::GlobalExpr B) {
@@ -644,15 +704,18 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
void DwarfDebug::beginModule() {
NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
DWARFGroupDescription, TimePassesIsEnabled);
- if (DisableDebugInfoPrinting)
+ if (DisableDebugInfoPrinting) {
+ MMI->setDebugInfoAvailability(false);
return;
+ }
const Module *M = MMI->getModule();
unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
// Tell MMI whether we have debug info.
- MMI->setDebugInfoAvailability(NumDebugCUs > 0);
+ assert(MMI->hasDebugInfo() == (NumDebugCUs > 0) &&
+ "DebugInfoAvailabilty initialized unexpectedly");
SingleCU = NumDebugCUs == 1;
DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
GVMap;
@@ -670,11 +733,24 @@ void DwarfDebug::beginModule() {
(useSplitDwarf() ? SkeletonHolder : InfoHolder)
.setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base"));
- // Create the symbol that designates the start of the DWARF v5 range list
- // table. It is located past the header and before the offsets table.
- if (getDwarfVersion() >= 5)
- (useSplitDwarf() ? SkeletonHolder : InfoHolder)
- .setRnglistsTableBaseSym(Asm->createTempSymbol("rnglists_table_base"));
+
+ // Create the symbols that designates the start of the DWARF v5 range list
+ // and locations list tables. They are located past the table headers.
+ if (getDwarfVersion() >= 5) {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.setRnglistsTableBaseSym(
+ Asm->createTempSymbol("rnglists_table_base"));
+ Holder.setLoclistsTableBaseSym(
+ Asm->createTempSymbol("loclists_table_base"));
+
+ if (useSplitDwarf())
+ InfoHolder.setRnglistsTableBaseSym(
+ Asm->createTempSymbol("rnglists_dwo_table_base"));
+ }
+
+ // Create the symbol that points to the first entry following the debug
+ // address table (.debug_addr) header.
+ AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
for (DICompileUnit *CUNode : M->debug_compile_units()) {
// FIXME: Move local imported entities into a list attached to the
@@ -728,16 +804,16 @@ void DwarfDebug::beginModule() {
}
}
-void DwarfDebug::finishVariableDefinitions() {
- for (const auto &Var : ConcreteVariables) {
- DIE *VariableDie = Var->getDIE();
- assert(VariableDie);
+void DwarfDebug::finishEntityDefinitions() {
+ for (const auto &Entity : ConcreteEntities) {
+ DIE *Die = Entity->getDIE();
+ assert(Die);
// FIXME: Consider the time-space tradeoff of just storing the unit pointer
- // in the ConcreteVariables list, rather than looking it up again here.
+ // in the ConcreteEntities list, rather than looking it up again here.
// DIE::getUnit isn't simple - it walks parent pointers, etc.
- DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
+ DwarfCompileUnit *Unit = CUDieMap.lookup(Die->getUnitDie());
assert(Unit);
- Unit->finishVariableDefinition(*Var);
+ Unit->finishEntityDefinition(Entity.get());
}
}
@@ -755,7 +831,7 @@ void DwarfDebug::finalizeModuleInfo() {
finishSubprogramDefinitions();
- finishVariableDefinitions();
+ finishEntityDefinitions();
// Include the DWO file name in the hash if there's more than one CU.
// This handles ThinLTO's situation where imported CUs may very easily be
@@ -768,6 +844,8 @@ void DwarfDebug::finalizeModuleInfo() {
// all other generation.
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
+ if (TheCU.getCUNode()->isDebugDirectivesOnly())
+ continue;
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
TheCU.constructContainingTypeDIEs();
@@ -776,7 +854,12 @@ void DwarfDebug::finalizeModuleInfo() {
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
auto *SkCU = TheCU.getSkeleton();
- if (useSplitDwarf()) {
+ if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) {
+ finishUnitAttributes(TheCU.getCUNode(), TheCU);
+ TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
+ SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
// Emit a unique identifier for this CU.
uint64_t ID =
DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie());
@@ -789,18 +872,14 @@ void DwarfDebug::finalizeModuleInfo() {
SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
}
- // We don't keep track of which addresses are used in which CU so this
- // is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty()) {
- const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
- SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
- Sym, Sym);
- }
- if (getDwarfVersion() < 5 && !SkCU->getRangeLists().empty()) {
+
+ if (getDwarfVersion() < 5 && !SkeletonHolder.getRangeLists().empty()) {
const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
Sym, Sym);
}
+ } else if (SkCU) {
+ finishUnitAttributes(SkCU->getCUNode(), *SkCU);
}
// If we have code split among multiple sections or non-contiguous
@@ -810,6 +889,14 @@ void DwarfDebug::finalizeModuleInfo() {
// .subsections_via_symbols in mach-o. This would mean turning on
// ranges for all subprogram DIEs for mach-o.
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+
+ // We don't keep track of which addresses are used in which CU so this
+ // is a bit pessimistic under LTO.
+ if (!AddrPool.isEmpty() &&
+ (getDwarfVersion() >= 5 ||
+ (SkCU && !empty(TheCU.getUnitDie().children()))))
+ U.addAddrTableBase();
+
if (unsigned NumRanges = TheCU.getRanges().size()) {
if (NumRanges > 1 && useRangesSection())
// A DW_AT_low_pc attribute may also be specified in combination with
@@ -822,9 +909,13 @@ void DwarfDebug::finalizeModuleInfo() {
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
- if (getDwarfVersion() >= 5 && !useSplitDwarf() &&
- !U.getRangeLists().empty())
- U.addRnglistsBase();
+ if (getDwarfVersion() >= 5) {
+ if (U.hasRangeLists())
+ U.addRnglistsBase();
+
+ if (!DebugLocs.getLists().empty() && !useSplitDwarf())
+ U.addLoclistsBase();
+ }
auto *CUNode = cast<DICompileUnit>(P.first);
// If compile Unit has macros, emit "DW_AT_macro_info" attribute.
@@ -888,9 +979,11 @@ void DwarfDebug::endModule() {
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
- emitDebugAddr();
+ emitDebugRangesDWO();
}
+ emitDebugAddr();
+
// Emit info into the dwarf accelerator table sections.
switch (getAccelTableKind()) {
case AccelTableKind::Apple:
@@ -915,38 +1008,37 @@ void DwarfDebug::endModule() {
// FIXME: AbstractVariables.clear();
}
-void DwarfDebug::ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
- const MDNode *ScopeNode) {
- const DILocalVariable *Cleansed = nullptr;
- if (CU.getExistingAbstractVariable(IV, Cleansed))
+void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
+ const DINode *Node,
+ const MDNode *ScopeNode) {
+ if (CU.getExistingAbstractEntity(Node))
return;
- CU.createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+ CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope(
cast<DILocalScope>(ScopeNode)));
}
-void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU,
- InlinedVariable IV, const MDNode *ScopeNode) {
- const DILocalVariable *Cleansed = nullptr;
- if (CU.getExistingAbstractVariable(IV, Cleansed))
+void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
+ const DINode *Node, const MDNode *ScopeNode) {
+ if (CU.getExistingAbstractEntity(Node))
return;
if (LexicalScope *Scope =
LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
- CU.createAbstractVariable(Cleansed, Scope);
+ CU.createAbstractEntity(Node, Scope);
}
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
- DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) {
- SmallDenseMap<InlinedVariable, DbgVariable *> MFVars;
+ DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
+ SmallDenseMap<InlinedEntity, DbgVariable *> MFVars;
for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
"Expected inlined-at fields to agree");
- InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
+ InlinedEntity Var(VI.Var, VI.Loc->getInlinedAt());
Processed.insert(Var);
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
@@ -954,14 +1046,15 @@ void DwarfDebug::collectVariableInfoFromMFTable(
if (!Scope)
continue;
- ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode());
- auto RegVar = llvm::make_unique<DbgVariable>(Var.first, Var.second);
+ ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
+ auto RegVar = llvm::make_unique<DbgVariable>(
+ cast<DILocalVariable>(Var.first), Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
if (DbgVariable *DbgVar = MFVars.lookup(Var))
DbgVar->addMMIEntry(*RegVar);
else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
MFVars.insert({Var, RegVar.get()});
- ConcreteVariables.push_back(std::move(RegVar));
+ ConcreteEntities.push_back(std::move(RegVar));
}
}
}
@@ -1087,6 +1180,18 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
auto Value = getDebugLocValue(Begin);
+
+ // Omit entries with empty ranges as they do not have any effect in DWARF.
+ if (StartLabel == EndLabel) {
+ // If this is a fragment, we must still add the value to the list of
+ // open ranges, since it may describe non-overlapping parts of the
+ // variable.
+ if (DIExpr->isFragment())
+ OpenRanges.push_back(Value);
+ LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
+ continue;
+ }
+
DebugLocEntry Loc(StartLabel, EndLabel, Value);
bool couldMerge = false;
@@ -1126,14 +1231,26 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
}
}
-DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
- LexicalScope &Scope,
- InlinedVariable IV) {
- ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode());
- ConcreteVariables.push_back(
- llvm::make_unique<DbgVariable>(IV.first, IV.second));
- InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
- return ConcreteVariables.back().get();
+DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope,
+ const DINode *Node,
+ const DILocation *Location,
+ const MCSymbol *Sym) {
+ ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode());
+ if (isa<const DILocalVariable>(Node)) {
+ ConcreteEntities.push_back(
+ llvm::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+ Location));
+ InfoHolder.addScopeVariable(&Scope,
+ cast<DbgVariable>(ConcreteEntities.back().get()));
+ } else if (isa<const DILabel>(Node)) {
+ ConcreteEntities.push_back(
+ llvm::make_unique<DbgLabel>(cast<const DILabel>(Node),
+ Location, Sym));
+ InfoHolder.addScopeLabel(&Scope,
+ cast<DbgLabel>(ConcreteEntities.back().get()));
+ }
+ return ConcreteEntities.back().get();
}
/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
@@ -1195,14 +1312,14 @@ static bool validThroughout(LexicalScopes &LScopes,
}
// Find variables for each lexical scope.
-void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
- const DISubprogram *SP,
- DenseSet<InlinedVariable> &Processed) {
+void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
+ const DISubprogram *SP,
+ DenseSet<InlinedEntity> &Processed) {
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(TheCU, Processed);
for (const auto &I : DbgValues) {
- InlinedVariable IV = I.first;
+ InlinedEntity IV = I.first;
if (Processed.count(IV))
continue;
@@ -1212,16 +1329,18 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
continue;
LexicalScope *Scope = nullptr;
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(IV.first);
if (const DILocation *IA = IV.second)
- Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
+ Scope = LScopes.findInlinedScope(LocalVar->getScope(), IA);
else
- Scope = LScopes.findLexicalScope(IV.first->getScope());
+ Scope = LScopes.findLexicalScope(LocalVar->getScope());
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
Processed.insert(IV);
- DbgVariable *RegVar = createConcreteVariable(TheCU, *Scope, IV);
+ DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
+ *Scope, LocalVar, IV.second));
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
@@ -1247,20 +1366,53 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
// unique identifiers, so don't bother resolving the type with the
// identifier map.
const DIBasicType *BT = dyn_cast<DIBasicType>(
- static_cast<const Metadata *>(IV.first->getType()));
+ static_cast<const Metadata *>(LocalVar->getType()));
// Finalize the entry by lowering it into a DWARF bytestream.
for (auto &Entry : Entries)
Entry.finalize(*Asm, List, BT);
}
- // Collect info for variables that were optimized out.
+ // For each InlinedEntity collected from DBG_LABEL instructions, convert to
+ // DWARF-related DbgLabel.
+ for (const auto &I : DbgLabels) {
+ InlinedEntity IL = I.first;
+ const MachineInstr *MI = I.second;
+ if (MI == nullptr)
+ continue;
+
+ LexicalScope *Scope = nullptr;
+ const DILabel *Label = cast<DILabel>(IL.first);
+ // Get inlined DILocation if it is inlined label.
+ if (const DILocation *IA = IL.second)
+ Scope = LScopes.findInlinedScope(Label->getScope(), IA);
+ else
+ Scope = LScopes.findLexicalScope(Label->getScope());
+ // If label scope is not found then skip this label.
+ if (!Scope)
+ continue;
+
+ Processed.insert(IL);
+ /// At this point, the temporary label is created.
+ /// Save the temporary label to DbgLabel entity to get the
+ /// actually address when generating Dwarf DIE.
+ MCSymbol *Sym = getLabelBeforeInsn(MI);
+ createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym);
+ }
+
+ // Collect info for variables/labels that were optimized out.
for (const DINode *DN : SP->getRetainedNodes()) {
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+ continue;
+ LexicalScope *Scope = nullptr;
if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
- if (Processed.insert(InlinedVariable(DV, nullptr)).second)
- if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
- createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr));
+ Scope = LScopes.findLexicalScope(DV->getScope());
+ } else if (auto *DL = dyn_cast<DILabel>(DN)) {
+ Scope = LScopes.findLexicalScope(DL->getScope());
}
+
+ if (Scope)
+ createConcreteEntity(TheCU, *Scope, DN, nullptr);
}
}
@@ -1284,6 +1436,11 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
+ // Request a label after the call in order to emit AT_return_pc information
+ // in call site entries. TODO: Add support for targets with delay slots.
+ if (SP->areAllCallsDescribed() && MI->isCall() && !MI->hasDelaySlot())
+ requestLabelAfterInsn(MI);
+
if (DL == PrevInstLoc) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
@@ -1416,9 +1573,14 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
assert(!FnScope || SP == FnScope->getScopeNode());
DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
+ if (TheCU.getCUNode()->isDebugDirectivesOnly()) {
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+ return;
+ }
- DenseSet<InlinedVariable> ProcessedVars;
- collectVariableInfo(TheCU, SP, ProcessedVars);
+ DenseSet<InlinedEntity> Processed;
+ collectEntityInfo(TheCU, SP, Processed);
// Add the range of this function to the list of ranges for the CU.
TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
@@ -1442,31 +1604,41 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
auto *SP = cast<DISubprogram>(AScope->getScopeNode());
for (const DINode *DN : SP->getRetainedNodes()) {
- if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
- // Collect info for variables that were optimized out.
- if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
- continue;
- ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr),
- DV->getScope());
- assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
- && "ensureAbstractVariableIsCreated inserted abstract scopes");
- }
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+ continue;
+
+ const MDNode *Scope = nullptr;
+ if (auto *DV = dyn_cast<DILocalVariable>(DN))
+ Scope = DV->getScope();
+ else if (auto *DL = dyn_cast<DILabel>(DN))
+ Scope = DL->getScope();
+ else
+ llvm_unreachable("Unexpected DI type!");
+
+ // Collect info for variables/labels that were optimized out.
+ ensureAbstractEntityIsCreated(TheCU, DN, Scope);
+ assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
+ && "ensureAbstractEntityIsCreated inserted abstract scopes");
}
constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
ProcessedSPNodes.insert(SP);
- TheCU.constructSubprogramScopeDIE(SP, FnScope);
+ DIE &ScopeDIE = TheCU.constructSubprogramScopeDIE(SP, FnScope);
if (auto *SkelCU = TheCU.getSkeleton())
if (!LScopes.getAbstractScopesList().empty() &&
TheCU.getCUNode()->getSplitDebugInlining())
SkelCU->constructSubprogramScopeDIE(SP, FnScope);
+ // Construct call site entries.
+ constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF);
+
// Clear debug info
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
InfoHolder.getScopeVariables().clear();
+ InfoHolder.getScopeLabels().clear();
PrevLabel = nullptr;
CurFn = nullptr;
}
@@ -1530,8 +1702,6 @@ void DwarfDebug::emitAccelDebugNames() {
if (getUnits().empty())
return;
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfDebugNamesSection());
emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits());
}
@@ -1636,7 +1806,8 @@ void DwarfDebug::emitDebugPubSections() {
if (!TheU->hasDwarfPubSections())
continue;
- bool GnuStyle = TheU->getCUNode()->getGnuPubnames();
+ bool GnuStyle = TheU->getCUNode()->getNameTableKind() ==
+ DICompileUnit::DebugNameTableKind::GNU;
Asm->OutStreamer->SwitchSection(
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
@@ -1692,8 +1863,8 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
if (GnuStyle) {
dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
Asm->OutStreamer->AddComment(
- Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
- dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+ Twine("Attributes: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) +
+ ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
Asm->emitInt8(Desc.toBits());
}
@@ -1759,6 +1930,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
void DebugLocEntry::finalize(const AsmPrinter &AP,
DebugLocStream::ListBuilder &List,
const DIBasicType *BT) {
+ assert(Begin != End && "unexpected location list entry with empty range");
DebugLocStream::EntryBuilder Entry(List, Begin, End);
BufferByteStreamer Streamer = Entry.getStreamer();
DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
@@ -1791,25 +1963,119 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
emitDebugLocEntry(Streamer, Entry);
}
-// Emit locations into the debug loc section.
+// Emit the common part of the DWARF 5 range/locations list tables header.
+static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder,
+ MCSymbol *TableStart,
+ MCSymbol *TableEnd) {
+ // Build the table header, which starts with the length field.
+ Asm->OutStreamer->AddComment("Length");
+ Asm->EmitLabelDifference(TableEnd, TableStart, 4);
+ Asm->OutStreamer->EmitLabel(TableStart);
+ // Version number (DWARF v5 and later).
+ Asm->OutStreamer->AddComment("Version");
+ Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion());
+ // Address size.
+ Asm->OutStreamer->AddComment("Address size");
+ Asm->emitInt8(Asm->MAI->getCodePointerSize());
+ // Segment selector size.
+ Asm->OutStreamer->AddComment("Segment selector size");
+ Asm->emitInt8(0);
+}
+
+// Emit the header of a DWARF 5 range list table list table. Returns the symbol
+// that designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
+ const DwarfFile &Holder) {
+ MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
+ MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
+ emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+
+ Asm->OutStreamer->AddComment("Offset entry count");
+ Asm->emitInt32(Holder.getRangeLists().size());
+ Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym());
+
+ for (const RangeSpanList &List : Holder.getRangeLists())
+ Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(),
+ 4);
+
+ return TableEnd;
+}
+
+// Emit the header of a DWARF 5 locations list table. Returns the symbol that
+// designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
+ const DwarfFile &Holder) {
+ MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start");
+ MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end");
+ emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+
+ // FIXME: Generate the offsets table and use DW_FORM_loclistx with the
+ // DW_AT_loclists_base attribute. Until then set the number of offsets to 0.
+ Asm->OutStreamer->AddComment("Offset entry count");
+ Asm->emitInt32(0);
+ Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym());
+
+ return TableEnd;
+}
+
+// Emit locations into the .debug_loc/.debug_rnglists section.
void DwarfDebug::emitDebugLoc() {
if (DebugLocs.getLists().empty())
return;
- // Start the dwarf loc section.
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLocSection());
+ bool IsLocLists = getDwarfVersion() >= 5;
+ MCSymbol *TableEnd = nullptr;
+ if (IsLocLists) {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLoclistsSection());
+ TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
+ : InfoHolder);
+ } else {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ }
+
unsigned char Size = Asm->MAI->getCodePointerSize();
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->EmitLabel(List.Label);
+
const DwarfCompileUnit *CU = List.CU;
+ const MCSymbol *Base = CU->getBaseAddress();
for (const auto &Entry : DebugLocs.getEntries(List)) {
- // Set up the range. This range is relative to the entry point of the
- // compile unit. This is a hard coded 0 for low_pc when we're emitting
- // ranges, or the DW_AT_low_pc on the compile unit otherwise.
- if (auto *Base = CU->getBaseAddress()) {
- Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
- Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+ if (Base) {
+ // Set up the range. This range is relative to the entry point of the
+ // compile unit. This is a hard coded 0 for low_pc when we're emitting
+ // ranges, or the DW_AT_low_pc on the compile unit otherwise.
+ if (IsLocLists) {
+ Asm->OutStreamer->AddComment("DW_LLE_offset_pair");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1);
+ Asm->OutStreamer->AddComment(" starting offset");
+ Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base);
+ Asm->OutStreamer->AddComment(" ending offset");
+ Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base);
+ } else {
+ Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
+ Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+ }
+
+ emitDebugLocEntryLocation(Entry);
+ continue;
+ }
+
+ // We have no base address.
+ if (IsLocLists) {
+ // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or
+ // DW_LLE_startx_length in case if there is only a single range.
+ // That should reduce the size of the debug data emited.
+ // For now just use the DW_LLE_startx_length for all cases.
+ Asm->OutStreamer->AddComment("DW_LLE_startx_length");
+ Asm->emitInt8(dwarf::DW_LLE_startx_length);
+ Asm->OutStreamer->AddComment(" start idx");
+ Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym));
+ Asm->OutStreamer->AddComment(" length");
+ Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym);
} else {
Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
@@ -1817,9 +2083,20 @@ void DwarfDebug::emitDebugLoc() {
emitDebugLocEntryLocation(Entry);
}
- Asm->OutStreamer->EmitIntValue(0, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
+
+ if (IsLocLists) {
+ // .debug_loclists section ends with DW_LLE_end_of_list.
+ Asm->OutStreamer->AddComment("DW_LLE_end_of_list");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1);
+ } else {
+ // Terminate the .debug_loc list with two 0 values.
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
}
+
+ if (TableEnd)
+ Asm->OutStreamer->EmitLabel(TableEnd);
}
void DwarfDebug::emitDebugLocDWO() {
@@ -1828,10 +2105,13 @@ void DwarfDebug::emitDebugLocDWO() {
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->EmitLabel(List.Label);
for (const auto &Entry : DebugLocs.getEntries(List)) {
- // Just always use start_length for now - at least that's one address
- // rather than two. We could get fancier and try to, say, reuse an
- // address we know we've emitted elsewhere (the start of the function?
- // The start of the CU or CU subrange that encloses this range?)
+ // GDB only supports startx_length in pre-standard split-DWARF.
+ // (in v5 standard loclists, it currently* /only/ supports base_address +
+ // offset_pair, so the implementations can't really share much since they
+ // need to use different representations)
+ // * as of October 2018, at least
+ // Ideally/in v5, this could use SectionLabels to reuse existing addresses
+ // in the address pool to minimize object size/relocations.
Asm->emitInt8(dwarf::DW_LLE_startx_length);
unsigned idx = AddrPool.getIndex(Entry.BeginSym);
Asm->EmitULEB128(idx);
@@ -1939,10 +2219,9 @@ void DwarfDebug::emitDebugARanges() {
}
// Sort the CU list (again, to ensure consistent output order).
- llvm::sort(CUs.begin(), CUs.end(),
- [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
- return A->getUniqueID() < B->getUniqueID();
- });
+ llvm::sort(CUs, [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
+ return A->getUniqueID() < B->getUniqueID();
+ });
// Emit an arange table for each CU we used.
for (DwarfCompileUnit *CU : CUs) {
@@ -2006,10 +2285,10 @@ void DwarfDebug::emitDebugARanges() {
}
/// Emit a single range list. We handle both DWARF v5 and earlier.
-static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
+static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
const RangeSpanList &List) {
- auto DwarfVersion = CU->getDwarfVersion();
+ auto DwarfVersion = DD.getDwarfVersion();
// Emit our symbol so we can find the beginning of the range.
Asm->OutStreamer->EmitLabel(List.getSym());
// Gather all the ranges that apply to the same section so they can share
@@ -2021,7 +2300,8 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
for (const RangeSpan &Range : List.getRanges())
SectionRanges[&Range.getStart()->getSection()].push_back(&Range);
- auto *CUBase = CU->getBaseAddress();
+ const DwarfCompileUnit &CU = List.getCU();
+ const MCSymbol *CUBase = CU.getBaseAddress();
bool BaseIsSet = false;
for (const auto &P : SectionRanges) {
// Don't bother with a base address entry if there's only one range in
@@ -2031,19 +2311,23 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
// or optnone where there may be holes in a single CU's section
// contributions.
auto *Base = CUBase;
- if (!Base && P.second.size() > 1 &&
- (UseDwarfRangesBaseAddressSpecifier || DwarfVersion >= 5)) {
+ if (!Base && (P.second.size() > 1 || DwarfVersion < 5) &&
+ (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) {
BaseIsSet = true;
// FIXME/use care: This may not be a useful base address if it's not
// the lowest address/range in this object.
Base = P.second.front()->getStart();
if (DwarfVersion >= 5) {
- Asm->OutStreamer->AddComment("DW_RLE_base_address");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_address, 1);
- } else
+ Base = DD.getSectionLabel(&Base->getSection());
+ Asm->OutStreamer->AddComment("DW_RLE_base_addressx");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1);
+ Asm->OutStreamer->AddComment(" base address index");
+ Asm->EmitULEB128(DD.getAddressPool().getIndex(Base));
+ } else {
Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->AddComment(" base address");
- Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ Asm->OutStreamer->AddComment(" base address");
+ Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ }
} else if (BaseIsSet && DwarfVersion < 5) {
BaseIsSet = false;
assert(!Base);
@@ -2070,10 +2354,10 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
Asm->EmitLabelDifference(End, Base, Size);
}
} else if (DwarfVersion >= 5) {
- Asm->OutStreamer->AddComment("DW_RLE_start_length");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_start_length, 1);
- Asm->OutStreamer->AddComment(" start");
- Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer->AddComment("DW_RLE_startx_length");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1);
+ Asm->OutStreamer->AddComment(" start index");
+ Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin));
Asm->OutStreamer->AddComment(" length");
Asm->EmitLabelDifferenceAsULEB128(End, Begin);
} else {
@@ -2092,31 +2376,13 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
}
}
-// Emit the header of a DWARF 5 range list table. Returns the symbol that
-// designates the end of the table for the caller to emit when the table is
-// complete.
-static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, DwarfFile &Holder) {
- // The length is described by a starting label right after the length field
- // and an end label.
- MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
- MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
- // Build the range table header, which starts with the length field.
- Asm->EmitLabelDifference(TableEnd, TableStart, 4);
- Asm->OutStreamer->EmitLabel(TableStart);
- // Version number (DWARF v5 and later).
- Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion());
- // Address size.
- Asm->emitInt8(Asm->MAI->getCodePointerSize());
- // Segment selector size.
- Asm->emitInt8(0);
-
- MCSymbol *RnglistTableBaseSym = Holder.getRnglistsTableBaseSym();
+static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
+ const DwarfFile &Holder, MCSymbol *TableEnd) {
+ for (const RangeSpanList &List : Holder.getRangeLists())
+ emitRangeList(DD, Asm, List);
- // FIXME: Generate the offsets table and use DW_FORM_rnglistx with the
- // DW_AT_ranges attribute. Until then set the number of offsets to 0.
- Asm->emitInt32(0);
- Asm->OutStreamer->EmitLabel(RnglistTableBaseSym);
- return TableEnd;
+ if (TableEnd)
+ Asm->OutStreamer->EmitLabel(TableEnd);
}
/// Emit address ranges into the .debug_ranges section or into the DWARF v5
@@ -2125,46 +2391,52 @@ void DwarfDebug::emitDebugRanges() {
if (CUMap.empty())
return;
- auto NoRangesPresent = [this]() {
- return llvm::all_of(
- CUMap, [](const decltype(CUMap)::value_type &Pair) {
- return Pair.second->getRangeLists().empty();
- });
- };
+ const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- if (!useRangesSection()) {
- assert(NoRangesPresent() && "No debug ranges expected.");
+ if (Holder.getRangeLists().empty())
return;
- }
- if (NoRangesPresent())
- return;
+ assert(useRangesSection());
+ assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }));
// Start the dwarf ranges section.
MCSymbol *TableEnd = nullptr;
if (getDwarfVersion() >= 5) {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfRnglistsSection());
- TableEnd = emitRnglistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
- : InfoHolder);
+ TableEnd = emitRnglistsTableHeader(Asm, Holder);
} else
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfRangesSection());
- // Grab the specific ranges for the compile units in the module.
- for (const auto &I : CUMap) {
- DwarfCompileUnit *TheCU = I.second;
+ emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+}
- if (auto *Skel = TheCU->getSkeleton())
- TheCU = Skel;
+void DwarfDebug::emitDebugRangesDWO() {
+ assert(useSplitDwarf());
- // Iterate over the misc ranges for the compile units in the module.
- for (const RangeSpanList &List : TheCU->getRangeLists())
- emitRangeList(Asm, TheCU, List);
- }
+ if (CUMap.empty())
+ return;
- if (TableEnd)
- Asm->OutStreamer->EmitLabel(TableEnd);
+ const auto &Holder = InfoHolder;
+
+ if (Holder.getRangeLists().empty())
+ return;
+
+ assert(getDwarfVersion() >= 5);
+ assert(useRangesSection());
+ assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }));
+
+ // Start the dwarf ranges section.
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
+ MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder);
+
+ emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -2206,12 +2478,19 @@ void DwarfDebug::emitDebugMacinfo() {
if (CUMap.empty())
return;
+ if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }))
+ return;
+
// Start the dwarf macinfo section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfMacinfoSection());
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
+ if (TheCU.getCUNode()->isDebugDirectivesOnly())
+ continue;
auto *SkCU = TheCU.getSkeleton();
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
auto *CUNode = cast<DICompileUnit>(P.first);
@@ -2229,8 +2508,6 @@ void DwarfDebug::emitDebugMacinfo() {
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfCompileUnit> NewU) {
- NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
- Asm->TM.Options.MCOptions.SplitDwarfFile);
if (!CompilationDir.empty())
NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -2298,9 +2575,8 @@ void DwarfDebug::emitDebugStrDWO() {
OffSec, /* UseRelativeOffsets = */ false);
}
-// Emit DWO addresses.
+// Emit address pool.
void DwarfDebug::emitDebugAddr() {
- assert(useSplitDwarf() && "No split dwarf?");
AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
}
@@ -2356,10 +2632,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
NewTU.setTypeSignature(Signature);
Ins.first->second = Signature;
- if (useSplitDwarf())
- NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
- else {
- NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ if (useSplitDwarf()) {
+ MCSection *Section =
+ getDwarfVersion() <= 4
+ ? Asm->getObjFileLowering().getDwarfTypesDWOSection()
+ : Asm->getObjFileLowering().getDwarfInfoDWOSection();
+ NewTU.setSection(Section);
+ } else {
+ MCSection *Section =
+ getDwarfVersion() <= 4
+ ? Asm->getObjFileLowering().getDwarfTypesSection(Signature)
+ : Asm->getObjFileLowering().getDwarfInfoSection(Signature);
+ NewTU.setSection(Section);
// Non-split type units reuse the compile unit's line table.
CU.applyStmtList(UnitDie);
}
@@ -2408,14 +2692,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// AccelTableKind::Apple, we use the table we got as an argument). If
// accelerator tables are disabled, this function does nothing.
template <typename DataT>
-void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
+void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
+ AccelTable<DataT> &AppleAccel, StringRef Name,
const DIE &Die) {
if (getAccelTableKind() == AccelTableKind::None)
return;
+ if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+ return;
+
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- DwarfStringPoolEntryRef Ref =
- Holder.getStringPool().getEntry(*Asm, Name);
+ DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name);
switch (getAccelTableKind()) {
case AccelTableKind::Apple:
@@ -2431,24 +2719,36 @@ void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
}
}
-void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
- addAccelNameImpl(AccelNames, Name, Die);
+void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
+ addAccelNameImpl(CU, AccelNames, Name, Die);
}
-void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
+void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
// ObjC names go only into the Apple accelerator tables.
if (getAccelTableKind() == AccelTableKind::Apple)
- addAccelNameImpl(AccelObjC, Name, Die);
+ addAccelNameImpl(CU, AccelObjC, Name, Die);
}
-void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
- addAccelNameImpl(AccelNamespace, Name, Die);
+void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die) {
+ addAccelNameImpl(CU, AccelNamespace, Name, Die);
}
-void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
- addAccelNameImpl(AccelTypes, Name, Die);
+void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die, char Flags) {
+ addAccelNameImpl(CU, AccelTypes, Name, Die);
}
uint16_t DwarfDebug::getDwarfVersion() const {
return Asm->OutStreamer->getContext().getDwarfVersion();
}
+
+void DwarfDebug::addSectionLabel(const MCSymbol *Sym) {
+ SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym));
+}
+
+const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
+ return SectionLabels.find(S)->second;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index abf2e43b1312..8a31e989b289 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,8 +15,6 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#include "AddressPool.h"
-#include "DbgValueHistoryCalculator.h"
-#include "DebugHandlerBase.h"
#include "DebugLocStream.h"
#include "DwarfFile.h"
#include "llvm/ADT/ArrayRef.h"
@@ -31,6 +29,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AccelTable.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
@@ -62,6 +62,47 @@ class MDNode;
class Module;
//===----------------------------------------------------------------------===//
+/// This class is defined as the common parent of DbgVariable and DbgLabel
+/// such that it could levarage polymorphism to extract common code for
+/// DbgVariable and DbgLabel.
+class DbgEntity {
+ const DINode *Entity;
+ const DILocation *InlinedAt;
+ DIE *TheDIE = nullptr;
+ unsigned SubclassID;
+
+public:
+ enum DbgEntityKind {
+ DbgVariableKind,
+ DbgLabelKind
+ };
+
+ DbgEntity(const DINode *N, const DILocation *IA, unsigned ID)
+ : Entity(N), InlinedAt(IA), SubclassID(ID) {}
+ virtual ~DbgEntity() {}
+
+ /// Accessors.
+ /// @{
+ const DINode *getEntity() const { return Entity; }
+ const DILocation *getInlinedAt() const { return InlinedAt; }
+ DIE *getDIE() const { return TheDIE; }
+ unsigned getDbgEntityID() const { return SubclassID; }
+ /// @}
+
+ void setDIE(DIE &D) { TheDIE = &D; }
+
+ static bool classof(const DbgEntity *N) {
+ switch (N->getDbgEntityID()) {
+ default:
+ return false;
+ case DbgVariableKind:
+ case DbgLabelKind:
+ return true;
+ }
+ }
+};
+
+//===----------------------------------------------------------------------===//
/// This class is used to track local variable information.
///
/// Variables can be created from allocas, in which case they're generated from
@@ -73,10 +114,7 @@ class Module;
/// single instruction use \a MInsn and (optionally) a single entry of \a Expr.
///
/// Variables that have been optimized out use none of these fields.
-class DbgVariable {
- const DILocalVariable *Var; /// Variable Descriptor.
- const DILocation *IA; /// Inlined at location.
- DIE *TheDIE = nullptr; /// Variable DIE.
+class DbgVariable : public DbgEntity {
unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs.
const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction.
@@ -93,7 +131,7 @@ public:
/// Creates a variable without any DW_AT_location. Call \a initializeMMI()
/// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
DbgVariable(const DILocalVariable *V, const DILocation *IA)
- : Var(V), IA(IA) {}
+ : DbgEntity(V, IA, DbgVariableKind) {}
/// Initialize from the MMI table.
void initializeMMI(const DIExpression *E, int FI) {
@@ -111,8 +149,9 @@ public:
assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!MInsn && "Already initialized?");
- assert(Var == DbgValue->getDebugVariable() && "Wrong variable");
- assert(IA == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at");
+ assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
+ assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
+ "Wrong inlined-at");
MInsn = DbgValue;
if (auto *E = DbgValue->getDebugExpression())
@@ -121,19 +160,18 @@ public:
}
// Accessors.
- const DILocalVariable *getVariable() const { return Var; }
- const DILocation *getInlinedAt() const { return IA; }
+ const DILocalVariable *getVariable() const {
+ return cast<DILocalVariable>(getEntity());
+ }
const DIExpression *getSingleExpression() const {
assert(MInsn && FrameIndexExprs.size() <= 1);
return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
}
- void setDIE(DIE &D) { TheDIE = &D; }
- DIE *getDIE() const { return TheDIE; }
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
- StringRef getName() const { return Var->getName(); }
+ StringRef getName() const { return getVariable()->getName(); }
const MachineInstr *getMInsn() const { return MInsn; }
/// Get the FI entries, sorted by fragment offset.
ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
@@ -143,7 +181,7 @@ public:
// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
// FIXME: Why don't we just infer this tag and store it all along?
- if (Var->isParameter())
+ if (getVariable()->isParameter())
return dwarf::DW_TAG_formal_parameter;
return dwarf::DW_TAG_variable;
@@ -151,7 +189,7 @@ public:
/// Return true if DbgVariable is artificial.
bool isArtificial() const {
- if (Var->isArtificial())
+ if (getVariable()->isArtificial())
return true;
if (getType()->isArtificial())
return true;
@@ -159,7 +197,7 @@ public:
}
bool isObjectPointer() const {
- if (Var->isObjectPointer())
+ if (getVariable()->isObjectPointer())
return true;
if (getType()->isObjectPointer())
return true;
@@ -178,6 +216,45 @@ public:
bool isBlockByrefVariable() const;
const DIType *getType() const;
+ static bool classof(const DbgEntity *N) {
+ return N->getDbgEntityID() == DbgVariableKind;
+ }
+
+private:
+ template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
+ return Ref.resolve();
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// This class is used to track label information.
+///
+/// Labels are collected from \c DBG_LABEL instructions.
+class DbgLabel : public DbgEntity {
+ const MCSymbol *Sym; /// Symbol before DBG_LABEL instruction.
+
+public:
+ /// We need MCSymbol information to generate DW_AT_low_pc.
+ DbgLabel(const DILabel *L, const DILocation *IA, const MCSymbol *Sym = nullptr)
+ : DbgEntity(L, IA, DbgLabelKind), Sym(Sym) {}
+
+ /// Accessors.
+ /// @{
+ const DILabel *getLabel() const { return cast<DILabel>(getEntity()); }
+ const MCSymbol *getSymbol() const { return Sym; }
+
+ StringRef getName() const { return getLabel()->getName(); }
+ /// @}
+
+ /// Translate tag to proper Dwarf tag.
+ dwarf::Tag getTag() const {
+ return dwarf::DW_TAG_label;
+ }
+
+ static bool classof(const DbgEntity *N) {
+ return N->getDbgEntityID() == DbgLabelKind;
+ }
+
private:
template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
return Ref.resolve();
@@ -217,8 +294,8 @@ class DwarfDebug : public DebugHandlerBase {
/// Size of each symbol emitted (for those symbols that have a specific size).
DenseMap<const MCSymbol *, uint64_t> SymSize;
- /// Collection of abstract variables.
- SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
+ /// Collection of abstract variables/labels.
+ SmallVector<std::unique_ptr<DbgEntity>, 64> ConcreteEntities;
/// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
/// can refer to them in spite of insertions into this list.
@@ -250,6 +327,8 @@ class DwarfDebug : public DebugHandlerBase {
/// used to keep track of which types we have emitted type units for.
DenseMap<const MDNode *, uint64_t> TypeSignatures;
+ DenseMap<const MCSection *, const MCSymbol *> SectionLabels;
+
SmallVector<
std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
TypeUnitsUnderConstruction;
@@ -266,9 +345,6 @@ class DwarfDebug : public DebugHandlerBase {
/// Use inlined strings.
bool UseInlineStrings = false;
- /// Whether to emit DWARF pub sections or not.
- bool UsePubSections = true;
-
/// Allow emission of .debug_ranges section.
bool UseRangesSection = true;
@@ -332,24 +408,33 @@ class DwarfDebug : public DebugHandlerBase {
return InfoHolder.getUnits();
}
- using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
+ using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
- void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
- const MDNode *Scope);
- void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV,
- const MDNode *Scope);
+ void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
+ const DINode *Node,
+ const MDNode *Scope);
+ void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
+ const DINode *Node,
+ const MDNode *Scope);
- DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU,
- LexicalScope &Scope, InlinedVariable IV);
+ DbgEntity *createConcreteEntity(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope,
+ const DINode *Node,
+ const DILocation *Location,
+ const MCSymbol *Sym = nullptr);
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
+ /// Construct DIEs for call site entries describing the calls in \p MF.
+ void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
+ DIE &ScopeDIE, const MachineFunction &MF);
+
template <typename DataT>
- void addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
- const DIE &Die);
+ void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel,
+ StringRef Name, const DIE &Die);
- void finishVariableDefinitions();
+ void finishEntityDefinitions();
void finishSubprogramDefinitions();
@@ -407,9 +492,7 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit address ranges into a debug ranges section.
void emitDebugRanges();
-
- /// Emit range lists into a DWARF v5 debug rnglists section.
- void emitDebugRnglists();
+ void emitDebugRangesDWO();
/// Emit macros into a debug macinfo section.
void emitDebugMacinfo();
@@ -457,6 +540,8 @@ class DwarfDebug : public DebugHandlerBase {
/// Create new DwarfCompileUnit for the given metadata node with tag
/// DW_TAG_compile_unit.
DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit);
+ void finishUnitAttributes(const DICompileUnit *DIUnit,
+ DwarfCompileUnit &NewCU);
/// Construct imported_module or imported_declaration DIE.
void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
@@ -469,8 +554,8 @@ class DwarfDebug : public DebugHandlerBase {
unsigned Flags);
/// Populate LexicalScope entries with variables' info.
- void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
- DenseSet<InlinedVariable> &ProcessedVars);
+ void collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
+ DenseSet<InlinedEntity> &ProcessedVars);
/// Build the location list for all DBG_VALUEs in the
/// function that describe the same variable.
@@ -479,7 +564,7 @@ class DwarfDebug : public DebugHandlerBase {
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
- DenseSet<InlinedVariable> &P);
+ DenseSet<InlinedEntity> &P);
/// Emit the reference to the section.
void emitSectionReference(const DwarfCompileUnit &CU);
@@ -543,9 +628,6 @@ public:
/// Returns whether to use inline strings.
bool useInlineStrings() const { return UseInlineStrings; }
- /// Returns whether GNU pub sections should be emitted.
- bool usePubSections() const { return UsePubSections; }
-
/// Returns whether ranges section should be emitted.
bool useRangesSection() const { return UseRangesSection; }
@@ -608,17 +690,20 @@ public:
return Ref.resolve();
}
- void addSubprogramNames(const DISubprogram *SP, DIE &Die);
+ void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
+ DIE &Die);
AddressPool &getAddressPool() { return AddrPool; }
- void addAccelName(StringRef Name, const DIE &Die);
+ void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die);
- void addAccelObjC(StringRef Name, const DIE &Die);
+ void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die);
- void addAccelNamespace(StringRef Name, const DIE &Die);
+ void addAccelNamespace(const DICompileUnit &CU, StringRef Name,
+ const DIE &Die);
- void addAccelType(StringRef Name, const DIE &Die, char Flags);
+ void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die,
+ char Flags);
const MachineFunction *getCurrentFunction() const { return CurFn; }
@@ -640,6 +725,9 @@ public:
bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
/// @}
+
+ void addSectionLabel(const MCSymbol *Sym);
+ const MCSymbol *getSectionLabel(const MCSection *S);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index d8d1a5e8f841..19c350afbf17 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -24,6 +24,20 @@
using namespace llvm;
+void DwarfExpression::emitConstu(uint64_t Value) {
+ if (Value < 32)
+ emitOp(dwarf::DW_OP_lit0 + Value);
+ else if (Value == std::numeric_limits<uint64_t>::max()) {
+ // Only do this for 64-bit values as the DWARF expression stack uses
+ // target-address-size values.
+ emitOp(dwarf::DW_OP_lit0);
+ emitOp(dwarf::DW_OP_not);
+ } else {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Value);
+ }
+}
+
void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
assert((LocationKind == Unknown || LocationKind == Register) &&
@@ -72,14 +86,12 @@ void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
}
void DwarfExpression::addShr(unsigned ShiftBy) {
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(ShiftBy);
+ emitConstu(ShiftBy);
emitOp(dwarf::DW_OP_shr);
}
void DwarfExpression::addAnd(unsigned Mask) {
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(Mask);
+ emitConstu(Mask);
emitOp(dwarf::DW_OP_and);
}
@@ -181,8 +193,7 @@ void DwarfExpression::addSignedConstant(int64_t Value) {
void DwarfExpression::addUnsignedConstant(uint64_t Value) {
assert(LocationKind == Implicit || LocationKind == Unknown);
LocationKind = Implicit;
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(Value);
+ emitConstu(Value);
}
void DwarfExpression::addUnsignedConstant(const APInt &Value) {
@@ -243,10 +254,9 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// Don't emit locations that cannot be expressed without DW_OP_stack_value.
if (DwarfVersion < 4)
- if (std::any_of(ExprCursor.begin(), ExprCursor.end(),
- [](DIExpression::ExprOperand Op) -> bool {
- return Op.getOp() == dwarf::DW_OP_stack_value;
- })) {
+ if (any_of(ExprCursor, [](DIExpression::ExprOperand Op) -> bool {
+ return Op.getOp() == dwarf::DW_OP_stack_value;
+ })) {
DwarfRegs.clear();
LocationKind = Unknown;
return false;
@@ -373,8 +383,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
break;
case dwarf::DW_OP_constu:
assert(LocationKind != Register);
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(Op->getArg(0));
+ emitConstu(Op->getArg(0));
break;
case dwarf::DW_OP_stack_value:
LocationKind = Implicit;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 0637d952eba4..91568ba6d107 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -138,6 +138,9 @@ protected:
/// Emit a raw unsigned value.
virtual void emitUnsigned(uint64_t Value) = 0;
+ /// Emit a normalized unsigned constant.
+ void emitConstu(uint64_t Value);
+
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
@@ -187,7 +190,7 @@ protected:
/// DW_OP_stack_value. Unfortunately, DW_OP_stack_value was not available
/// until DWARF 4, so we will continue to generate DW_OP_constu <const> for
/// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const
- /// <const> actually describes a value at a constant addess, not a constant
+ /// <const> actually describes a value at a constant address, not a constant
/// value. However, in the past there was no better way to describe a
/// constant value, so the producers and consumers started to rely on
/// heuristics to disambiguate the value vs. location status of the
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 049f349b009a..78ccad481411 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -36,13 +36,20 @@ void DwarfFile::emitUnits(bool UseOffsets) {
}
void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
- DIE &Die = TheU->getUnitDie();
- MCSection *USection = TheU->getSection();
- Asm->OutStreamer->SwitchSection(USection);
+ if (TheU->getCUNode()->isDebugDirectivesOnly())
+ return;
+ MCSection *S = TheU->getSection();
+
+ if (!S)
+ return;
+
+ Asm->OutStreamer->SwitchSection(S);
TheU->emitHeader(UseOffsets);
+ Asm->emitDwarfDIE(TheU->getUnitDie());
- Asm->emitDwarfDIE(Die);
+ if (MCSymbol *EndLabel = TheU->getEndLabel())
+ Asm->OutStreamer->EmitLabel(EndLabel);
}
// Compute the size and offset for each DIE.
@@ -53,6 +60,9 @@ void DwarfFile::computeSizeAndOffsets() {
// Iterate over each compile unit and set the size and offsets for each
// DIE within each compile unit. All offsets are CU relative.
for (const auto &TheU : CUs) {
+ if (TheU->getCUNode()->isDebugDirectivesOnly())
+ continue;
+
TheU->setDebugSectionOffset(SecOffset);
SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
}
@@ -98,3 +108,15 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
}
return true;
}
+
+void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
+ SmallVectorImpl<DbgLabel *> &Labels = ScopeLabels[LS];
+ Labels.push_back(Label);
+}
+
+std::pair<uint32_t, RangeSpanList *>
+DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) {
+ CURangeLists.push_back(
+ RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R)));
+ return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back());
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 8dfbc4e1c434..51acca8c1e53 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -24,12 +24,44 @@
namespace llvm {
class AsmPrinter;
+class DbgEntity;
class DbgVariable;
+class DbgLabel;
class DwarfCompileUnit;
class DwarfUnit;
class LexicalScope;
class MCSection;
+// Data structure to hold a range for range lists.
+class RangeSpan {
+public:
+ RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
+ const MCSymbol *getStart() const { return Start; }
+ const MCSymbol *getEnd() const { return End; }
+ void setEnd(const MCSymbol *E) { End = E; }
+
+private:
+ const MCSymbol *Start, *End;
+};
+
+class RangeSpanList {
+private:
+ // Index for locating within the debug_range section this particular span.
+ MCSymbol *RangeSym;
+ const DwarfCompileUnit *CU;
+ // List of ranges.
+ SmallVector<RangeSpan, 2> Ranges;
+
+public:
+ RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU,
+ SmallVector<RangeSpan, 2> Ranges)
+ : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {}
+ MCSymbol *getSym() const { return RangeSym; }
+ const DwarfCompileUnit &getCU() const { return *CU; }
+ const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
+ void addRange(RangeSpan Range) { Ranges.push_back(Range); }
+};
+
class DwarfFile {
// Target of Dwarf emission, used for sizing of abbreviations.
AsmPrinter *Asm;
@@ -44,6 +76,10 @@ class DwarfFile {
DwarfStringPool StrPool;
+ // List of range lists for a given compile unit, separate from the ranges for
+ // the CU itself.
+ SmallVector<RangeSpanList, 1> CURangeLists;
+
/// DWARF v5: The symbol that designates the start of the contribution to
/// the string offsets table. The contribution is shared by all units.
MCSymbol *StringOffsetsStartSym = nullptr;
@@ -52,6 +88,10 @@ class DwarfFile {
/// The table is shared by all units.
MCSymbol *RnglistsTableBaseSym = nullptr;
+ /// DWARF v5: The symbol that designates the base of the locations list table.
+ /// The table is shared by all units.
+ MCSymbol *LoclistsTableBaseSym = nullptr;
+
/// The variables of a lexical scope.
struct ScopeVars {
/// We need to sort Args by ArgNo and check for duplicates. This could also
@@ -62,9 +102,13 @@ class DwarfFile {
/// Collection of DbgVariables of each lexical scope.
DenseMap<LexicalScope *, ScopeVars> ScopeVariables;
+ /// Collection of DbgLabels of each lexical scope.
+ using LabelList = SmallVector<DbgLabel *, 4>;
+ DenseMap<LexicalScope *, LabelList> ScopeLabels;
+
// Collection of abstract subprogram DIEs.
DenseMap<const MDNode *, DIE *> AbstractSPDies;
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
/// be shared across CUs, that is why we keep the map here instead
@@ -78,6 +122,14 @@ public:
return CUs;
}
+ std::pair<uint32_t, RangeSpanList *> addRange(const DwarfCompileUnit &CU,
+ SmallVector<RangeSpan, 2> R);
+
+ /// getRangeLists - Get the vector of range lists.
+ const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
+ return CURangeLists;
+ }
+
/// Compute the size and offset of a DIE given an incoming Offset.
unsigned computeSizeAndOffset(DIE &Die, unsigned Offset);
@@ -112,26 +164,33 @@ public:
DwarfStringPool &getStringPool() { return StrPool; }
MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; }
-
void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; }
MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; }
-
void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; }
+ MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; }
+ void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; }
+
/// \returns false if the variable was merged with a previous one.
bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+ void addScopeLabel(LexicalScope *LS, DbgLabel *Label);
+
DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() {
return ScopeVariables;
}
+ DenseMap<LexicalScope *, LabelList> &getScopeLabels() {
+ return ScopeLabels;
+ }
+
DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
return AbstractSPDies;
}
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
- return AbstractVariables;
+ DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
+ return AbstractEntities;
}
void insertDIE(const MDNode *TypeMD, DIE *Die) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index a61fa83cfb03..02016534a774 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -24,25 +24,39 @@ DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm,
: Pool(A), Prefix(Prefix),
ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {}
-DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
- StringRef Str) {
+StringMapEntry<DwarfStringPool::EntryTy> &
+DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
auto I = Pool.insert(std::make_pair(Str, EntryTy()));
+ auto &Entry = I.first->second;
if (I.second) {
- auto &Entry = I.first->second;
- Entry.Index = Pool.size() - 1;
+ Entry.Index = EntryTy::NotIndexed;
Entry.Offset = NumBytes;
Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
NumBytes += Str.size() + 1;
assert(NumBytes > Entry.Offset && "Unexpected overflow");
}
- return EntryRef(*I.first);
+ return *I.first;
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
+ StringRef Str) {
+ auto &MapEntry = getEntryImpl(Asm, Str);
+ return EntryRef(MapEntry, false);
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
+ StringRef Str) {
+ auto &MapEntry = getEntryImpl(Asm, Str);
+ if (!MapEntry.getValue().isIndexed())
+ MapEntry.getValue().Index = NumIndexedStrings++;
+ return EntryRef(MapEntry, true);
}
void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
MCSection *Section,
MCSymbol *StartSym) {
- if (empty())
+ if (getNumIndexedStrings() == 0)
return;
Asm.OutStreamer->SwitchSection(Section);
unsigned EntrySize = 4;
@@ -51,7 +65,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
// table. The header consists of an entry with the contribution's
// size (not including the size of the length field), the DWARF version and
// 2 bytes of padding.
- Asm.emitInt32(size() * EntrySize + 4);
+ Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4);
Asm.emitInt16(Asm.getDwarfVersion());
Asm.emitInt16(0);
// Define the symbol that marks the start of the contribution. It is
@@ -69,12 +83,17 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
// Start the dwarf str section.
Asm.OutStreamer->SwitchSection(StrSection);
- // Get all of the string pool entries and put them in an array by their ID so
- // we can sort them.
- SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size());
+ // Get all of the string pool entries and sort them by their offset.
+ SmallVector<const StringMapEntry<EntryTy> *, 64> Entries;
+ Entries.reserve(Pool.size());
for (const auto &E : Pool)
- Entries[E.getValue().Index] = &E;
+ Entries.push_back(&E);
+
+ llvm::sort(Entries, [](const StringMapEntry<EntryTy> *A,
+ const StringMapEntry<EntryTy> *B) {
+ return A->getValue().Offset < B->getValue().Offset;
+ });
for (const auto &Entry : Entries) {
assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) &&
@@ -93,6 +112,14 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
// If we've got an offset section go ahead and emit that now as well.
if (OffsetSection) {
+ // Now only take the indexed entries and put them in an array by their ID so
+ // we can emit them in order.
+ Entries.resize(NumIndexedStrings);
+ for (const auto &Entry : Pool) {
+ if (Entry.getValue().isIndexed())
+ Entries[Entry.getValue().Index] = &Entry;
+ }
+
Asm.OutStreamer->SwitchSection(OffsetSection);
unsigned size = 4; // FIXME: DWARF64 is 8.
for (const auto &Entry : Entries)
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 6e6988ea4ad4..f484540d8d37 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -30,8 +30,11 @@ class DwarfStringPool {
StringMap<EntryTy, BumpPtrAllocator &> Pool;
StringRef Prefix;
unsigned NumBytes = 0;
+ unsigned NumIndexedStrings = 0;
bool ShouldCreateSymbols;
+ StringMapEntry<EntryTy> &getEntryImpl(AsmPrinter &Asm, StringRef Str);
+
public:
using EntryRef = DwarfStringPoolEntryRef;
@@ -48,8 +51,15 @@ public:
unsigned size() const { return Pool.size(); }
+ unsigned getNumIndexedStrings() const { return NumIndexedStrings; }
+
/// Get a reference to an entry in the string pool.
EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
+
+ /// Same as getEntry, except that you can use EntryRef::getIndex to obtain a
+ /// unique ID of this entry (e.g., for use in indexed forms like
+ /// DW_FORM_strx).
+ EntryRef getIndexedEntry(AsmPrinter &Asm, StringRef Str);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 600f4a78fda0..80b365f1aa43 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -234,15 +234,23 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
+ if (CUNode->isDebugDirectivesOnly())
+ return;
+
if (DD->useInlineStrings()) {
Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string,
new (DIEValueAllocator)
DIEInlineString(String, DIEValueAllocator));
return;
}
- auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String);
dwarf::Form IxForm =
isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;
+
+ auto StringPoolEntry =
+ useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index
+ ? DU->getStringPool().getIndexedEntry(*Asm, String)
+ : DU->getStringPool().getEntry(*Asm, String);
+
// For DWARF v5 and beyond, use the smallest strx? form possible.
if (useSegmentedStringOffsetsTable()) {
IxForm = dwarf::DW_FORM_strx1;
@@ -307,14 +315,21 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
}
void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
- if (!DD->useSplitDwarf()) {
- addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Die, dwarf::DW_FORM_udata, Sym);
- } else {
+ if (DD->getDwarfVersion() >= 5) {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
+ addUInt(Die, dwarf::DW_FORM_addrx, DD->getAddressPool().getIndex(Sym));
+ return;
+ }
+
+ if (DD->useSplitDwarf()) {
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
addUInt(Die, dwarf::DW_FORM_GNU_addr_index,
DD->getAddressPool().getIndex(Sym));
+ return;
}
+
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Die, dwarf::DW_FORM_udata, Sym);
}
void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
@@ -401,6 +416,12 @@ void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
addSourceLine(Die, SP->getLine(), SP->getFile());
}
+void DwarfUnit::addSourceLine(DIE &Die, const DILabel *L) {
+ assert(L);
+
+ addSourceLine(Die, L->getLine(), L->getFile());
+}
+
void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
assert(Ty);
@@ -413,138 +434,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
addSourceLine(Die, Ty->getLine(), Ty->getFile());
}
-/* Byref variables, in Blocks, are declared by the programmer as "SomeType
- VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
- gives the variable VarName either the struct, or a pointer to the struct, as
- its type. This is necessary for various behind-the-scenes things the
- compiler needs to do with by-reference variables in Blocks.
-
- However, as far as the original *programmer* is concerned, the variable
- should still have type 'SomeType', as originally declared.
-
- The function getBlockByrefType dives into the __Block_byref_x_VarName
- struct to find the original type of the variable, which is then assigned to
- the variable's Debug Information Entry as its real type. So far, so good.
- However now the debugger will expect the variable VarName to have the type
- SomeType. So we need the location attribute for the variable to be an
- expression that explains to the debugger how to navigate through the
- pointers and struct to find the actual variable of type SomeType.
-
- The following function does just that. We start by getting
- the "normal" location for the variable. This will be the location
- of either the struct __Block_byref_x_VarName or the pointer to the
- struct __Block_byref_x_VarName.
-
- The struct will look something like:
-
- struct __Block_byref_x_VarName {
- ... <various fields>
- struct __Block_byref_x_VarName *forwarding;
- ... <various other fields>
- SomeType VarName;
- ... <maybe more fields>
- };
-
- If we are given the struct directly (as our starting point) we
- need to tell the debugger to:
-
- 1). Add the offset of the forwarding field.
-
- 2). Follow that pointer to get the real __Block_byref_x_VarName
- struct to use (the real one may have been copied onto the heap).
-
- 3). Add the offset for the field VarName, to find the actual variable.
-
- If we started with a pointer to the struct, then we need to
- dereference that pointer first, before the other steps.
- Translating this into DWARF ops, we will need to append the following
- to the current location description for the variable:
-
- DW_OP_deref -- optional, if we start with a pointer
- DW_OP_plus_uconst <forward_fld_offset>
- DW_OP_deref
- DW_OP_plus_uconst <varName_fld_offset>
-
- That is what this function does. */
-
-void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
- dwarf::Attribute Attribute,
- const MachineLocation &Location) {
- const DIType *Ty = DV.getType();
- const DIType *TmpTy = Ty;
- uint16_t Tag = Ty->getTag();
- bool isPointer = false;
-
- StringRef varName = DV.getName();
-
- if (Tag == dwarf::DW_TAG_pointer_type) {
- auto *DTy = cast<DIDerivedType>(Ty);
- TmpTy = resolve(DTy->getBaseType());
- isPointer = true;
- }
-
- // Find the __forwarding field and the variable field in the __Block_byref
- // struct.
- DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements();
- const DIDerivedType *varField = nullptr;
- const DIDerivedType *forwardingField = nullptr;
-
- for (unsigned i = 0, N = Fields.size(); i < N; ++i) {
- auto *DT = cast<DIDerivedType>(Fields[i]);
- StringRef fieldName = DT->getName();
- if (fieldName == "__forwarding")
- forwardingField = DT;
- else if (fieldName == varName)
- varField = DT;
- }
-
- // Get the offsets for the forwarding field and the variable field.
- unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3;
- unsigned varFieldOffset = varField->getOffsetInBits() >> 2;
-
- // Decode the original location, and use that as the start of the byref
- // variable's location.
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- if (Location.isIndirect())
- DwarfExpr.setMemoryLocationKind();
-
- SmallVector<uint64_t, 6> Ops;
- // If we started with a pointer to the __Block_byref... struct, then
- // the first thing we need to do is dereference the pointer (DW_OP_deref).
- if (isPointer)
- Ops.push_back(dwarf::DW_OP_deref);
-
- // Next add the offset for the '__forwarding' field:
- // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
- // adding the offset if it's 0.
- if (forwardingFieldOffset > 0) {
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(forwardingFieldOffset);
- }
-
- // Now dereference the __forwarding field to get to the real __Block_byref
- // struct: DW_OP_deref.
- Ops.push_back(dwarf::DW_OP_deref);
-
- // Now that we've got the real __Block_byref... struct, add the offset
- // for the variable's field to get to the location of the actual variable:
- // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
- if (varFieldOffset > 0) {
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(varFieldOffset);
- }
-
- DIExpressionCursor Cursor(Ops);
- const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
- if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
- return;
- DwarfExpr.addExpression(std::move(Cursor));
-
- // Now attach the location information to the DIE.
- addBlock(Die, Attribute, DwarfExpr.finalize());
-}
-
/// Return true if type encoding is unsigned.
static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
@@ -787,7 +676,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
}
unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
- DD->addAccelType(Ty->getName(), TyDIE, Flags);
+ DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
isa<DINamespace>(Context))
@@ -851,6 +740,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
uint64_t Size = BTy->getSizeInBits() >> 3;
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+
+ if (BTy->isBigEndian())
+ addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big);
+ else if (BTy->isLittleEndian())
+ addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little);
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
@@ -1155,7 +1049,7 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
addString(NDie, dwarf::DW_AT_name, NS->getName());
else
Name = "(anonymous namespace)";
- DD->addAccelNamespace(Name, NDie);
+ DD->addAccelNamespace(*CUNode, Name, NDie);
addGlobalName(Name, NDie, NS->getScope());
if (NS->getExportSymbols())
addFlag(NDie, dwarf::DW_AT_export_symbols);
@@ -1404,7 +1298,7 @@ DIE *DwarfUnit::getIndexTyDie() {
addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::DW_ATE_unsigned);
- DD->addAccelType(Name, *IndexTyDie, /*Flags*/ 0);
+ DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0);
return IndexTyDie;
}
@@ -1467,7 +1361,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (DTy) {
if (DD->getDwarfVersion() >= 3)
addType(Buffer, DTy);
- if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagFixedEnum))
+ if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass))
addFlag(Buffer, dwarf::DW_AT_enum_class);
}
@@ -1659,7 +1553,14 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
Asm->OutStreamer->AddComment("Length of Unit");
- Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
+ if (!DD->useSectionsAsReferences()) {
+ StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_";
+ MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start");
+ EndLabel = Asm->createTempSymbol(Prefix + "end");
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->OutStreamer->EmitLabel(BeginLabel);
+ } else
+ Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
Asm->OutStreamer->AddComment("DWARF version number");
unsigned Version = DD->getDwarfVersion();
@@ -1761,3 +1662,12 @@ void DwarfUnit::addRnglistsBase() {
DU->getRnglistsTableBaseSym(),
TLOF.getDwarfRnglistsSection()->getBeginSymbol());
}
+
+void DwarfUnit::addLoclistsBase() {
+ assert(DD->getDwarfVersion() >= 5 &&
+ "DW_AT_loclists_base requires DWARF version 5 or later");
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base,
+ DU->getLoclistsTableBaseSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 69696f626536..a59ebb7c1465 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -35,33 +35,6 @@ class ConstantFP;
class DbgVariable;
class DwarfCompileUnit;
-// Data structure to hold a range for range lists.
-class RangeSpan {
-public:
- RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
- const MCSymbol *getStart() const { return Start; }
- const MCSymbol *getEnd() const { return End; }
- void setEnd(const MCSymbol *E) { End = E; }
-
-private:
- const MCSymbol *Start, *End;
-};
-
-class RangeSpanList {
-private:
- // Index for locating within the debug_range section this particular span.
- MCSymbol *RangeSym;
- // List of ranges.
- SmallVector<RangeSpan, 2> Ranges;
-
-public:
- RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges)
- : RangeSym(Sym), Ranges(std::move(Ranges)) {}
- MCSymbol *getSym() const { return RangeSym; }
- const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
- void addRange(RangeSpan Range) { Ranges.push_back(Range); }
-};
-
//===----------------------------------------------------------------------===//
/// This dwarf writer support class manages information associated with a
/// source file.
@@ -76,6 +49,9 @@ protected:
/// Target of Dwarf emission.
AsmPrinter *Asm;
+ /// Emitted at the end of the CU and used to compute the CU Length field.
+ MCSymbol *EndLabel = nullptr;
+
// Holders for some common dwarf information.
DwarfDebug *DD;
DwarfFile *DU;
@@ -109,6 +85,7 @@ protected:
public:
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
+ MCSymbol *getEndLabel() const { return EndLabel; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
const DICompileUnit *getCUNode() const { return CUNode; }
@@ -213,6 +190,7 @@ public:
void addSourceLine(DIE &Die, const DILocalVariable *V);
void addSourceLine(DIE &Die, const DIGlobalVariable *G);
void addSourceLine(DIE &Die, const DISubprogram *SP);
+ void addSourceLine(DIE &Die, const DILabel *L);
void addSourceLine(DIE &Die, const DIType *Ty);
void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
@@ -298,6 +276,9 @@ public:
/// Add the DW_AT_rnglists_base attribute to the unit DIE.
void addRnglistsBase();
+ /// Add the DW_AT_loclists_base attribute to the unit DIE.
+ void addLoclistsBase();
+
virtual DwarfCompileUnit &getCU() = 0;
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 65de9d7e65a4..7599121de2b0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -99,7 +99,7 @@ void EHStreamer::computeActionsTable(
FirstActions.reserve(LandingPads.size());
int FirstAction = 0;
- unsigned SizeActions = 0;
+ unsigned SizeActions = 0; // Total size of all action entries for a function
const LandingPadInfo *PrevLPI = nullptr;
for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
@@ -107,23 +107,24 @@ void EHStreamer::computeActionsTable(
const LandingPadInfo *LPI = *I;
const std::vector<int> &TypeIds = LPI->TypeIds;
unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
- unsigned SizeSiteActions = 0;
+ unsigned SizeSiteActions = 0; // Total size of all entries for a landingpad
if (NumShared < TypeIds.size()) {
- unsigned SizeAction = 0;
+ // Size of one action entry (typeid + next action)
+ unsigned SizeActionEntry = 0;
unsigned PrevAction = (unsigned)-1;
if (NumShared) {
unsigned SizePrevIds = PrevLPI->TypeIds.size();
assert(Actions.size());
PrevAction = Actions.size() - 1;
- SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) +
- getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeActionEntry = getSLEB128Size(Actions[PrevAction].NextAction) +
+ getSLEB128Size(Actions[PrevAction].ValueForTypeID);
for (unsigned j = NumShared; j != SizePrevIds; ++j) {
assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
- SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
- SizeAction += -Actions[PrevAction].NextAction;
+ SizeActionEntry -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeActionEntry += -Actions[PrevAction].NextAction;
PrevAction = Actions[PrevAction].Previous;
}
}
@@ -136,9 +137,9 @@ void EHStreamer::computeActionsTable(
isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
- int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
- SizeAction = SizeTypeID + getSLEB128Size(NextAction);
- SizeSiteActions += SizeAction;
+ int NextAction = SizeActionEntry ? -(SizeActionEntry + SizeTypeID) : 0;
+ SizeActionEntry = SizeTypeID + getSLEB128Size(NextAction);
+ SizeSiteActions += SizeActionEntry;
ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
Actions.push_back(Action);
@@ -146,7 +147,7 @@ void EHStreamer::computeActionsTable(
}
// Record the first action of the landing pad site.
- FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ FirstAction = SizeActions + SizeSiteActions - SizeActionEntry + 1;
} // else identical - re-use previous FirstAction
// Information used when creating the call-site table. The action record
@@ -344,7 +345,9 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
/// unwound and handling continues.
/// 3. Type ID table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reverse indexed base 1.
-void EHStreamer::emitExceptionTable() {
+///
+/// Returns the starting symbol of an exception table.
+MCSymbol *EHStreamer::emitExceptionTable() {
const MachineFunction *MF = Asm->MF;
const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
const std::vector<unsigned> &FilterIds = MF->getFilterIds();
@@ -359,9 +362,9 @@ void EHStreamer::emitExceptionTable() {
LandingPads.push_back(&PadInfos[i]);
// Order landing pads lexicographically by type id.
- llvm::sort(LandingPads.begin(), LandingPads.end(),
- [](const LandingPadInfo *L,
- const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; });
+ llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) {
+ return L->TypeIds < R->TypeIds;
+ });
// Compute the actions table and gather the first action index for each
// landing pad site.
@@ -374,6 +377,7 @@ void EHStreamer::emitExceptionTable() {
computeCallSiteTable(CallSites, LandingPads, FirstActions);
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
unsigned CallSiteEncoding =
IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128;
bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
@@ -456,8 +460,8 @@ void EHStreamer::emitExceptionTable() {
Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
Asm->OutStreamer->EmitLabel(CstBeginLabel);
- // SjLj Exception handling
- if (IsSJLJ) {
+ // SjLj / Wasm Exception handling
+ if (IsSJLJ || IsWasm) {
unsigned idx = 0;
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
@@ -603,6 +607,7 @@ void EHStreamer::emitExceptionTable() {
}
Asm->EmitAlignment(2);
+ return GCCETSym;
}
void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index b89421a1e067..ce912d032c6d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -14,8 +14,8 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
-#include "AsmPrinterHandler.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@@ -85,9 +85,10 @@ protected:
/// zero for the landing pad and the action. Calls marked 'nounwind' have
/// no entry and must not be contained in the try-range of any entry - they
/// form gaps in the table. Entries must be ordered by try-range address.
- void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
- const SmallVectorImpl<unsigned> &FirstActions);
+ virtual void computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions);
/// Emit landing pads and actions.
///
@@ -108,7 +109,9 @@ protected:
/// found the frame is unwound and handling continues.
/// 3. Type id table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reversed indexed base 1.
- void emitExceptionTable();
+ ///
+ /// Returns the starting symbol of an exception table.
+ MCSymbol *emitExceptionTable();
virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 49cc376fcc98..34677ecc9e69 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -15,10 +15,10 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 59a57ed30d10..3479a00def23 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -15,9 +15,9 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Mangler.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
new file mode 100644
index 000000000000..527e5ae50146
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -0,0 +1,97 @@
+//===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing WebAssembly exception info into asm
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WasmException.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+void WasmException::endModule() {
+ // This is the symbol used in 'throw' and 'if_except' instruction to denote
+ // this is a C++ exception. This symbol has to be emitted somewhere once in
+ // the module. Check if the symbol has already been created, i.e., we have at
+ // least one 'throw' or 'if_except' instruction in the module, and emit the
+ // symbol only if so.
+ SmallString<60> NameStr;
+ Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
+ if (Asm->OutContext.lookupSymbol(NameStr)) {
+ MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception");
+ Asm->OutStreamer->EmitLabel(ExceptionSym);
+ }
+}
+
+void WasmException::markFunctionEnd() {
+ // Get rid of any dead landing pads.
+ if (!Asm->MF->getLandingPads().empty()) {
+ auto *NonConstMF = const_cast<MachineFunction *>(Asm->MF);
+ // Wasm does not set BeginLabel and EndLabel information for landing pads,
+ // so we should set the second argument false.
+ NonConstMF->tidyLandingPads(nullptr, /* TidyIfNoBeginLabels */ false);
+ }
+}
+
+void WasmException::endFunction(const MachineFunction *MF) {
+ bool ShouldEmitExceptionTable = false;
+ for (const LandingPadInfo &Info : MF->getLandingPads()) {
+ if (MF->hasWasmLandingPadIndex(Info.LandingPadBlock)) {
+ ShouldEmitExceptionTable = true;
+ break;
+ }
+ }
+ if (!ShouldEmitExceptionTable)
+ return;
+ MCSymbol *LSDALabel = emitExceptionTable();
+ assert(LSDALabel && ".GCC_exception_table has not been emitted!");
+
+ // Wasm requires every data section symbol to have a .size set. So we emit an
+ // end marker and set the size as the difference between the start end the end
+ // marker.
+ MCSymbol *LSDAEndLabel = Asm->createTempSymbol("GCC_except_table_end");
+ Asm->OutStreamer->EmitLabel(LSDAEndLabel);
+ MCContext &OutContext = Asm->OutStreamer->getContext();
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(LSDAEndLabel, OutContext),
+ MCSymbolRefExpr::create(LSDALabel, OutContext), OutContext);
+ Asm->OutStreamer->emitELFSize(LSDALabel, SizeExp);
+}
+
+// Compute the call-site table for wasm EH. Even though we use the same function
+// name to share the common routines, a call site entry in the table corresponds
+// to not a call site for possibly-throwing functions but a landing pad. In wasm
+// EH the VM is responsible for stack unwinding. After an exception occurs and
+// the stack is unwound, the control flow is transferred to wasm 'catch'
+// instruction by the VM, after which the personality function is called from
+// the compiler-generated code. Refer to WasmEHPrepare pass for more
+// information.
+void WasmException::computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ MachineFunction &MF = *Asm->MF;
+ for (unsigned I = 0, N = LandingPads.size(); I < N; ++I) {
+ const LandingPadInfo *Info = LandingPads[I];
+ MachineBasicBlock *LPad = Info->LandingPadBlock;
+ // We don't emit LSDA for single catch (...).
+ if (!MF.hasWasmLandingPadIndex(LPad))
+ continue;
+ // Wasm EH must maintain the EH pads in the order assigned to them by the
+ // WasmEHPrepare pass.
+ unsigned LPadIndex = MF.getWasmLandingPadIndex(LPad);
+ CallSiteEntry Site = {nullptr, nullptr, Info, FirstActions[I]};
+ if (CallSites.size() < LPadIndex + 1)
+ CallSites.resize(LPadIndex + 1);
+ CallSites[LPadIndex] = Site;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.h
new file mode 100644
index 000000000000..cbdb42457cf8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -0,0 +1,42 @@
+//===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing WebAssembly exception info into asm
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
+
+#include "EHStreamer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer {
+public:
+ WasmException(AsmPrinter *A) : EHStreamer(A) {}
+
+ void endModule() override;
+ void beginFunction(const MachineFunction *MF) override {}
+ virtual void markFunctionEnd() override;
+ void endFunction(const MachineFunction *MF) override;
+
+protected:
+ // Compute the call site table for wasm EH.
+ void computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) override;
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 124e8f04bfad..28f119e35966 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
-#include "AsmPrinterHandler.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index eff73a58d8d2..cf8e8c69bc2a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -42,6 +42,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
// MSVC's EH tables are always composed of 32-bit words. All known 64-bit
// platforms use an imagerel32 relocation to refer to symbols.
useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
+ isAArch64 = Asm->TM.getTargetTriple().isAArch64();
}
WinException::~WinException() {}
@@ -242,6 +243,17 @@ void WinException::endFunclet() {
if (F.hasPersonalityFn())
Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
+ // On funclet exit, we emit a fake "function" end marker, so that the call
+ // to EmitWinEHHandlerData below can calculate the size of the funclet or
+ // function.
+ if (isAArch64) {
+ Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+ MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
+ Asm->OutStreamer->getCurrentSectionOnly());
+ Asm->OutStreamer->SwitchSection(XData);
+ }
+
// Emit an UNWIND_INFO struct describing the prologue.
Asm->OutStreamer->EmitWinEHHandlerData();
@@ -286,7 +298,10 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
return create32bitRef(Asm->getSymbol(GV));
}
-const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+const MCExpr *WinException::getLabel(const MCSymbol *Label) {
+ if (isAArch64)
+ return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ Asm->OutContext);
return MCBinaryExpr::createAdd(create32bitRef(Label),
MCConstantExpr::create(1, Asm->OutContext),
Asm->OutContext);
@@ -531,7 +546,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
};
// Emit a label assignment with the SEH frame offset so we can use it for
- // llvm.x86.seh.recoverfp.
+ // llvm.eh.recoverfp.
StringRef FLinkageName =
GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
MCSymbol *ParentFrameOffset =
@@ -588,7 +603,6 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
const MCSymbol *EndLabel, int State) {
auto &OS = *Asm->OutStreamer;
MCContext &Ctx = Asm->OutContext;
-
bool VerboseAsm = OS.isVerboseAsm();
auto AddComment = [&](const Twine &Comment) {
if (VerboseAsm)
@@ -613,9 +627,9 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
}
AddComment("LabelStart");
- OS.EmitValue(getLabelPlusOne(BeginLabel), 4);
+ OS.EmitValue(getLabel(BeginLabel), 4);
AddComment("LabelEnd");
- OS.EmitValue(getLabelPlusOne(EndLabel), 4);
+ OS.EmitValue(getLabel(EndLabel), 4);
AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
: "CatchAll");
OS.EmitValue(FilterOrFinally, 4);
@@ -799,7 +813,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// TypeDescriptor *Type;
// int32_t CatchObjOffset;
// void (*Handler)();
- // int32_t ParentFrameOffset; // x64 only
+ // int32_t ParentFrameOffset; // x64 and AArch64 only
// };
OS.EmitLabel(HandlerMapXData);
for (const WinEHHandlerType &HT : TBME.HandlerArray) {
@@ -901,7 +915,7 @@ void WinException::computeIP2StateTable(
ChangeLabel = StateChange.PreviousEndLabel;
// Emit an entry indicating that PCs after 'Label' have this EH state.
IPToStateTable.push_back(
- std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState));
+ std::make_pair(getLabel(ChangeLabel), StateChange.NewState));
// FIXME: assert that NewState is between CatchLow and CatchHigh.
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index eed3c4453ffc..37c796f89765 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -38,6 +38,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// True if this is a 64-bit target and we should use image relative offsets.
bool useImageRel32 = false;
+ /// True if we are generating exception handling on Windows for ARM64.
+ bool isAArch64 = false;
+
/// Pointer to the current funclet entry BB.
const MachineBasicBlock *CurrentFuncletEntry = nullptr;
@@ -65,14 +68,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable);
- /// Emits the label used with llvm.x86.seh.recoverfp, which is used by
+ /// Emits the label used with llvm.eh.recoverfp, which is used by
/// outlined funclets.
void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
StringRef FLinkageName);
const MCExpr *create32bitRef(const MCSymbol *Value);
const MCExpr *create32bitRef(const GlobalValue *GV);
- const MCExpr *getLabelPlusOne(const MCSymbol *Label);
+ const MCExpr *getLabel(const MCSymbol *Label);
const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
const MCSymbol *OffsetFrom);
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index e28fc6fb9d4f..95581c09dd1c 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -88,7 +88,10 @@ namespace {
void expandPartwordAtomicRMW(
AtomicRMWInst *I,
TargetLoweringBase::AtomicExpansionKind ExpansionKind);
+ AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+ void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
+ void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
static Value *insertRMWCmpXchgLoop(
@@ -96,6 +99,7 @@ namespace {
AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
CreateCmpXchgInstFun CreateCmpXchg);
+ bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *RMWI);
@@ -258,7 +262,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
isAcquireOrStronger(RMWI->getOrdering()))) {
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(AtomicOrdering::Monotonic);
- } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
+ } else if (CASI &&
+ TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+ TargetLoweringBase::AtomicExpansionKind::None &&
(isReleaseOrStronger(CASI->getSuccessOrdering()) ||
isAcquireOrStronger(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
@@ -306,6 +312,16 @@ bool AtomicExpand::runOnFunction(Function &F) {
if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
MadeChange = true;
} else {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(RMWI);
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ if (ValueSize < MinCASSize &&
+ (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And)) {
+ RMWI = widenPartwordAtomicRMW(RMWI);
+ MadeChange = true;
+ }
+
MadeChange |= tryExpandAtomicRMW(RMWI);
}
} else if (CASI) {
@@ -322,16 +338,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange = true;
}
- unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
- unsigned ValueSize = getAtomicOpSize(CASI);
- if (ValueSize < MinCASSize) {
- assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
- "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
- expandPartwordCmpXchg(CASI);
- } else {
- if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
- MadeChange |= expandAtomicCmpXchg(CASI);
- }
+ MadeChange |= tryExpandAtomicCmpXchg(CASI);
}
}
return MadeChange;
@@ -400,8 +407,9 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
return expandAtomicLoadToLL(LI);
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
- llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -563,6 +571,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
}
return true;
}
+ case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
+ expandAtomicRMWToMaskedIntrinsic(AI);
+ return true;
+ }
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
@@ -651,6 +663,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
IRBuilder<> &Builder, Value *Loaded,
Value *Shifted_Inc, Value *Inc,
const PartwordMaskValues &PMV) {
+ // TODO: update to use
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
+ // to merge bits from two values without requiring PMV.Inv_Mask.
switch (Op) {
case AtomicRMWInst::Xchg: {
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
@@ -659,12 +674,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
}
case AtomicRMWInst::Or:
case AtomicRMWInst::Xor:
- // Or/Xor won't affect any other bits, so can just be done
- // directly.
- return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ case AtomicRMWInst::And:
+ llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
- case AtomicRMWInst::And:
case AtomicRMWInst::Nand: {
// The other arithmetic ops need to be masked into place.
Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
@@ -733,6 +746,41 @@ void AtomicExpand::expandPartwordAtomicRMW(
AI->eraseFromParent();
}
+// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
+AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
+ IRBuilder<> Builder(AI);
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+
+ assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And) &&
+ "Unable to widen operation");
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *ValOperand_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+
+ Value *NewOperand;
+
+ if (Op == AtomicRMWInst::And)
+ NewOperand =
+ Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
+ else
+ NewOperand = ValOperand_Shifted;
+
+ AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
+ NewOperand, AI->getOrdering());
+
+ Value *FinalOldResult = Builder.CreateTrunc(
+ Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+ return NewAI;
+}
+
void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
// The basic idea here is that we're expanding a cmpxchg of a
// smaller memory size up to a word-sized cmpxchg. To do this, we
@@ -870,6 +918,62 @@ void AtomicExpand::expandAtomicOpToLLSC(
I->eraseFromParent();
}
+void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
+ IRBuilder<> Builder(AI);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ // The value operand must be sign-extended for signed min/max so that the
+ // target's signed comparison instructions can be used. Otherwise, just
+ // zero-ext.
+ Instruction::CastOps CastOp = Instruction::ZExt;
+ AtomicRMWInst::BinOp RMWOp = AI->getOperation();
+ if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
+ CastOp = Instruction::SExt;
+
+ Value *ValOperand_Shifted = Builder.CreateShl(
+ Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+ Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
+ Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
+ AI->getOrdering());
+ Value *FinalOldResult = Builder.CreateTrunc(
+ Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+}
+
+void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
+ IRBuilder<> Builder(CI);
+
+ PartwordMaskValues PMV = createMaskInstrs(
+ Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *CmpVal_Shifted = Builder.CreateShl(
+ Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
+ "CmpVal_Shifted");
+ Value *NewVal_Shifted = Builder.CreateShl(
+ Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
+ "NewVal_Shifted");
+ Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
+ Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
+ CI->getSuccessOrdering());
+ Value *FinalOldVal = Builder.CreateTrunc(
+ Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
+
+ Value *Res = UndefValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
+ Value *Success = Builder.CreateICmpEQ(
+ CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+}
+
Value *AtomicExpand::insertRMWLLSCLoop(
IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
AtomicOrdering MemOpOrder,
@@ -1275,6 +1379,28 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
return NewLoaded;
}
+bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(CI);
+
+ switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ if (ValueSize < MinCASSize)
+ expandPartwordCmpXchg(CI);
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC: {
+ assert(ValueSize >= MinCASSize &&
+ "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
+ return expandAtomicCmpXchg(CI);
+ }
+ case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
+ expandAtomicCmpXchgToMaskedIntrinsic(CI);
+ return true;
+ }
+}
+
// Note: This function is exposed externally by AtomicExpandUtils.h
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index c7a0c6457164..efbfd5f4ab2c 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -298,7 +298,7 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
/// Whether MI should be counted as an instruction when calculating common tail.
static bool countsAsInstruction(const MachineInstr &MI) {
- return !(MI.isDebugValue() || MI.isCFIInstruction());
+ return !(MI.isDebugInstr() || MI.isCFIInstruction());
}
/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
@@ -865,7 +865,7 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
// Merge MMOs from memory operations in the common block.
if (MBBICommon->mayLoad() || MBBICommon->mayStore())
- MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
+ MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI});
// Drop undef flags if they aren't present in all merged instructions.
for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
MachineOperand &MO = MBBICommon->getOperand(I);
@@ -1363,9 +1363,9 @@ static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII,
MachineBasicBlock &PredMBB) {
auto InsertBefore = PredMBB.getFirstTerminator();
for (MachineInstr &MI : MBB.instrs())
- if (MI.isDebugValue()) {
+ if (MI.isDebugInstr()) {
TII->duplicate(PredMBB, InsertBefore, MI);
- LLVM_DEBUG(dbgs() << "Copied debug value from empty block to pred: "
+ LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to pred: "
<< MI);
}
}
@@ -1375,9 +1375,9 @@ static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII,
MachineBasicBlock &SuccMBB) {
auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin());
for (MachineInstr &MI : MBB.instrs())
- if (MI.isDebugValue()) {
+ if (MI.isDebugInstr()) {
TII->duplicate(SuccMBB, InsertBefore, MI);
- LLVM_DEBUG(dbgs() << "Copied debug value from empty block to succ: "
+ LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to succ: "
<< MI);
}
}
diff --git a/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 7f098cb71657..210699cbf239 100644
--- a/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
}
bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+ unsigned Pref) {
unsigned reg = MI->getOperand(OpIdx).getReg();
unsigned Clearance = RDA->getClearance(MI, reg);
LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
diff --git a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
index 3a9b20aa661d..93939e573b7b 100644
--- a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
+++ b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/BuiltinGCs.h"
#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/Casting.h"
@@ -28,10 +28,8 @@ namespace {
class ErlangGC : public GCStrategy {
public:
ErlangGC() {
- InitRoots = false;
- NeededSafePoints = 1 << GC::PostCall;
+ NeededSafePoints = true;
UsesMetadata = true;
- CustomRoots = false;
}
};
@@ -41,7 +39,7 @@ public:
class OcamlGC : public GCStrategy {
public:
OcamlGC() {
- NeededSafePoints = 1 << GC::PostCall;
+ NeededSafePoints = true;
UsesMetadata = true;
}
};
@@ -56,10 +54,7 @@ public:
/// while introducing only minor runtime overhead.
class ShadowStackGC : public GCStrategy {
public:
- ShadowStackGC() {
- InitRoots = true;
- CustomRoots = true;
- }
+ ShadowStackGC() {}
};
/// A GCStrategy which serves as an example for the usage of a statepoint based
@@ -74,10 +69,8 @@ public:
UseStatepoints = true;
// These options are all gc.root specific, we specify them so that the
// gc.root lowering code doesn't run.
- InitRoots = false;
- NeededSafePoints = 0;
+ NeededSafePoints = false;
UsesMetadata = false;
- CustomRoots = false;
}
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
@@ -108,10 +101,8 @@ public:
UseStatepoints = true;
// These options are all gc.root specific, we specify them so that the
// gc.root lowering code doesn't run.
- InitRoots = false;
- NeededSafePoints = 0;
+ NeededSafePoints = false;
UsesMetadata = false;
- CustomRoots = false;
}
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
@@ -136,9 +127,5 @@ static GCRegistry::Add<StatepointGC> D("statepoint-example",
"an example strategy for statepoint");
static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC");
-// Provide hooks to ensure the containing library is fully loaded.
-void llvm::linkErlangGC() {}
-void llvm::linkOcamlGC() {}
-void llvm::linkShadowStackGC() {}
-void llvm::linkStatepointExampleGC() {}
-void llvm::linkCoreCLRGC() {}
+// Provide hook to ensure the containing library is fully loaded.
+void llvm::linkAllBuiltinGCs() {}
diff --git a/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 00ebf63fc174..c4799855a2b3 100644
--- a/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -207,6 +207,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
case MCCFIInstruction::OpUndefined:
case MCCFIInstruction::OpRegister:
case MCCFIInstruction::OpWindowSave:
+ case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpGnuArgsSize:
break;
}
@@ -317,6 +318,10 @@ unsigned CFIInstrInserter::verify(MachineFunction &MF) {
// outgoing offset and register values of CurrMBB
if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset ||
SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) {
+ // Inconsistent offsets/registers are ok for 'noreturn' blocks because
+ // we don't generate epilogues inside such blocks.
+ if (SuccMBBInfo.MBB->succ_empty() && !SuccMBBInfo.MBB->isReturnBlock())
+ continue;
report(CurrMBBInfo, SuccMBBInfo);
ErrorNum++;
}
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 57541182cab2..02347b9f0b5c 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -70,15 +70,6 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
return sub == hsub ? hreg : 0;
const TargetRegisterClass *rc = mri.getRegClass(reg);
- if (!tri.enableMultipleCopyHints()) {
- // Only allow physreg hints in rc.
- if (sub == 0)
- return rc->contains(hreg) ? hreg : 0;
-
- // reg:sub should match the physreg hreg.
- return tri.getMatchingSuperReg(hreg, sub, rc);
- }
-
unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
if (rc->contains(CopiedPReg))
return CopiedPReg;
@@ -199,31 +190,19 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
unsigned Reg;
float Weight;
bool IsPhys;
- unsigned HintOrder;
- CopyHint(unsigned R, float W, bool P, unsigned HR) :
- Reg(R), Weight(W), IsPhys(P), HintOrder(HR) {}
+ CopyHint(unsigned R, float W, bool P) :
+ Reg(R), Weight(W), IsPhys(P) {}
bool operator<(const CopyHint &rhs) const {
// Always prefer any physreg hint.
if (IsPhys != rhs.IsPhys)
return (IsPhys && !rhs.IsPhys);
if (Weight != rhs.Weight)
return (Weight > rhs.Weight);
-
- // This is just a temporary way to achive NFC for targets that don't
- // enable multiple copy hints. HintOrder should be removed when all
- // targets return true in enableMultipleCopyHints().
- return (HintOrder < rhs.HintOrder);
-
-#if 0 // Should replace the HintOrder check, see above.
- // (just for the purpose of maintaining the set)
- return Reg < rhs.Reg;
-#endif
+ return Reg < rhs.Reg; // Tie-breaker.
}
};
std::set<CopyHint> CopyHints;
- // Temporary: see comment for HintOrder above.
- unsigned CopyHintOrder = 0;
for (MachineRegisterInfo::reg_instr_iterator
I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
I != E; ) {
@@ -263,8 +242,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
}
// Get allocation hints from copies.
- if (!mi->isCopy() ||
- (TargetHint.first != 0 && !tri.enableMultipleCopyHints()))
+ if (!mi->isCopy())
continue;
unsigned hint = copyHint(mi, li.reg, tri, mri);
if (!hint)
@@ -275,8 +253,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
// FIXME: we probably shouldn't use floats at all.
volatile float hweight = Hint[hint] += weight;
if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint))
- CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint),
- (tri.enableMultipleCopyHints() ? hint : CopyHintOrder++)));
+ CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint)));
}
Hint.clear();
@@ -287,13 +264,13 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
if (TargetHint.first == 0 && TargetHint.second)
mri.clearSimpleHint(li.reg);
+ std::set<unsigned> HintedRegs;
for (auto &Hint : CopyHints) {
- if (TargetHint.first != 0 && Hint.Reg == TargetHint.second)
- // Don't add again the target-type hint.
+ if (!HintedRegs.insert(Hint.Reg).second ||
+ (TargetHint.first != 0 && Hint.Reg == TargetHint.second))
+ // Don't add the same reg twice or the target-type hint again.
continue;
mri.addRegAllocationHint(li.reg, Hint.Reg);
- if (!tri.enableMultipleCopyHints())
- break;
}
// Weakly boost the spill weight of hinted registers.
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 2f845354c570..66166482c78b 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -42,6 +42,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandPassPass(Registry);
+ initializeInterleavedLoadCombinePass(Registry);
initializeInterleavedAccessPass(Registry);
initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index be685b26a9ea..c35f8666fa3c 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -278,7 +278,7 @@ class TypePromotionTransaction;
/// Keep track of GEPs accessing the same data structures such as structs or
/// arrays that are candidates to be split later because of their large
/// size.
- DenseMap<
+ MapVector<
AssertingVH<Value>,
SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
LargeOffsetGEPMap;
@@ -321,6 +321,24 @@ class TypePromotionTransaction;
}
private:
+ template <typename F>
+ void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
+ // Substituting can cause recursive simplifications, which can invalidate
+ // our iterator. Use a WeakTrackingVH to hold onto it in case this
+ // happens.
+ Value *CurValue = &*CurInstIterator;
+ WeakTrackingVH IterHandle(CurValue);
+
+ f();
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurValue) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ }
+
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -398,7 +416,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
OptSize = F.optForSize();
ProfileSummaryInfo *PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
@@ -426,11 +444,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
- // llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
- // find a node corresponding to the value.
- EverMadeChange |= placeDbgValues(F);
-
if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F);
@@ -441,11 +454,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
- SeenChainsForSExt.clear();
- ValToSExtendedUses.clear();
- RemovedInsts.clear();
- LargeOffsetGEPMap.clear();
- LargeOffsetGEPID.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -465,6 +473,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
I->deleteValue();
EverMadeChange |= MadeChange;
+ SeenChainsForSExt.clear();
+ ValToSExtendedUses.clear();
+ RemovedInsts.clear();
+ LargeOffsetGEPMap.clear();
+ LargeOffsetGEPID.clear();
}
SunkAddrs.clear();
@@ -518,6 +531,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |= simplifyOffsetableRelocate(*I);
}
+ // Do this last to clean up use-before-def scenarios introduced by other
+ // preparatory transforms.
+ EverMadeChange |= placeDbgValues(F);
+
return EverMadeChange;
}
@@ -651,7 +668,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
isa<IndirectBrInst>(Pred->getTerminator())))
return true;
- if (BB->getTerminator() != BB->getFirstNonPHI())
+ if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
return true;
// We use a simple cost heuristic which determine skipping merging is
@@ -1165,11 +1182,15 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+ DebugLoc Loc = CI->getDebugLoc();
auto *UAddWithOverflow =
CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+ UAddWithOverflow->setDebugLoc(Loc);
auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+ UAdd->setDebugLoc(Loc);
auto *Overflow =
ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+ Overflow->setDebugLoc(Loc);
CI->replaceAllUsesWith(Overflow);
AddI->replaceAllUsesWith(UAdd);
@@ -1402,6 +1423,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
else
InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
"", &*InsertPt);
+ InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
// Sink the trunc
BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
@@ -1410,6 +1432,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
TruncI->getType(), "", &*TruncInsertPt);
+ InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
MadeChange = true;
@@ -1501,6 +1524,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
else
InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
"", &*InsertPt);
+ InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
MadeChange = true;
}
@@ -1510,8 +1534,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
}
// If we removed all uses, nuke the shift.
- if (ShiftI->use_empty())
+ if (ShiftI->use_empty()) {
+ salvageDebugInfo(*ShiftI);
ShiftI->eraseFromParent();
+ }
return MadeChange;
}
@@ -1682,21 +1708,18 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// Lower all uses of llvm.objectsize.*
ConstantInt *RetVal =
lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
- // Substituting this can cause recursive simplifications, which can
- // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case
- // this
- // happens.
- Value *CurValue = &*CurInstIterator;
- WeakTrackingVH IterHandle(CurValue);
-
- replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- if (IterHandle != CurValue) {
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+ });
+ return true;
+ }
+ case Intrinsic::is_constant: {
+ // If is_constant hasn't folded away yet, lower it to false now.
+ Constant *RetVal = ConstantInt::get(II->getType(), 0);
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+ });
return true;
}
case Intrinsic::aarch64_stlxr:
@@ -1713,11 +1736,22 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
return true;
}
case Intrinsic::launder_invariant_group:
- case Intrinsic::strip_invariant_group:
- II->replaceAllUsesWith(II->getArgOperand(0));
+ case Intrinsic::strip_invariant_group: {
+ Value *ArgVal = II->getArgOperand(0);
+ auto it = LargeOffsetGEPMap.find(II);
+ if (it != LargeOffsetGEPMap.end()) {
+ // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
+ // Make sure not to have to deal with iterator invalidation
+ // after possibly adding ArgVal to LargeOffsetGEPMap.
+ auto GEPs = std::move(it->second);
+ LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
+ LargeOffsetGEPMap.erase(II);
+ }
+
+ II->replaceAllUsesWith(ArgVal);
II->eraseFromParent();
return true;
-
+ }
case Intrinsic::cttz:
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
@@ -1863,15 +1897,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
CallInst *CI = TailCalls[i];
CallSite CS(CI);
- // Conservatively require the attributes of the call to match those of the
- // return. Ignore noalias because it doesn't affect the call sequence.
- AttributeList CalleeAttrs = CS.getAttributes();
- if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
- .removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
- .removeAttribute(Attribute::NoAlias))
- continue;
-
// Make sure the call instruction is followed by an unconditional branch to
// the return block.
BasicBlock *CallBB = CI->getParent();
@@ -2337,6 +2362,8 @@ class TypePromotionTransaction {
/// Keep track of the original uses (pair Instruction, Index).
SmallVector<InstructionAndIdx, 4> OriginalUses;
+ /// Keep track of the debug users.
+ SmallVector<DbgValueInst *, 1> DbgValues;
using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
@@ -2350,6 +2377,10 @@ class TypePromotionTransaction {
Instruction *UserI = cast<Instruction>(U.getUser());
OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
}
+ // Record the debug uses separately. They are not in the instruction's
+ // use list, but they are replaced by RAUW.
+ findDbgValues(DbgValues, Inst);
+
// Now, we can replace the uses.
Inst->replaceAllUsesWith(New);
}
@@ -2362,6 +2393,15 @@ class TypePromotionTransaction {
UseIt != EndIt; ++UseIt) {
UseIt->Inst->setOperand(UseIt->Idx, Inst);
}
+ // RAUW has replaced all original uses with references to the new value,
+ // including the debug uses. Since we are undoing the replacements,
+ // the original debug uses must also be reinstated to maintain the
+ // correctness and utility of debug value instructions.
+ for (auto *DVI: DbgValues) {
+ LLVMContext &Ctx = Inst->getType()->getContext();
+ auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
+ DVI->setOperand(0, MV);
+ }
}
};
@@ -2632,15 +2672,159 @@ private:
Value *PromotedOperand) const;
};
+class PhiNodeSet;
+
+/// An iterator for PhiNodeSet.
+class PhiNodeSetIterator {
+ PhiNodeSet * const Set;
+ size_t CurrentIndex = 0;
+
+public:
+ /// The constructor. Start should point to either a valid element, or be equal
+ /// to the size of the underlying SmallVector of the PhiNodeSet.
+ PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
+ PHINode * operator*() const;
+ PhiNodeSetIterator& operator++();
+ bool operator==(const PhiNodeSetIterator &RHS) const;
+ bool operator!=(const PhiNodeSetIterator &RHS) const;
+};
+
+/// Keeps a set of PHINodes.
+///
+/// This is a minimal set implementation for a specific use case:
+/// It is very fast when there are very few elements, but also provides good
+/// performance when there are many. It is similar to SmallPtrSet, but also
+/// provides iteration by insertion order, which is deterministic and stable
+/// across runs. It is also similar to SmallSetVector, but provides removing
+/// elements in O(1) time. This is achieved by not actually removing the element
+/// from the underlying vector, so comes at the cost of using more memory, but
+/// that is fine, since PhiNodeSets are used as short lived objects.
+class PhiNodeSet {
+ friend class PhiNodeSetIterator;
+
+ using MapType = SmallDenseMap<PHINode *, size_t, 32>;
+ using iterator = PhiNodeSetIterator;
+
+ /// Keeps the elements in the order of their insertion in the underlying
+ /// vector. To achieve constant time removal, it never deletes any element.
+ SmallVector<PHINode *, 32> NodeList;
+
+ /// Keeps the elements in the underlying set implementation. This (and not the
+ /// NodeList defined above) is the source of truth on whether an element
+ /// is actually in the collection.
+ MapType NodeMap;
+
+ /// Points to the first valid (not deleted) element when the set is not empty
+ /// and the value is not zero. Equals to the size of the underlying vector
+ /// when the set is empty. When the value is 0, as in the beginning, the
+ /// first element may or may not be valid.
+ size_t FirstValidElement = 0;
+
+public:
+ /// Inserts a new element to the collection.
+ /// \returns true if the element is actually added, i.e. was not in the
+ /// collection before the operation.
+ bool insert(PHINode *Ptr) {
+ if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
+ NodeList.push_back(Ptr);
+ return true;
+ }
+ return false;
+ }
+
+ /// Removes the element from the collection.
+ /// \returns whether the element is actually removed, i.e. was in the
+ /// collection before the operation.
+ bool erase(PHINode *Ptr) {
+ auto it = NodeMap.find(Ptr);
+ if (it != NodeMap.end()) {
+ NodeMap.erase(Ptr);
+ SkipRemovedElements(FirstValidElement);
+ return true;
+ }
+ return false;
+ }
+
+ /// Removes all elements and clears the collection.
+ void clear() {
+ NodeMap.clear();
+ NodeList.clear();
+ FirstValidElement = 0;
+ }
+
+ /// \returns an iterator that will iterate the elements in the order of
+ /// insertion.
+ iterator begin() {
+ if (FirstValidElement == 0)
+ SkipRemovedElements(FirstValidElement);
+ return PhiNodeSetIterator(this, FirstValidElement);
+ }
+
+ /// \returns an iterator that points to the end of the collection.
+ iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
+
+ /// Returns the number of elements in the collection.
+ size_t size() const {
+ return NodeMap.size();
+ }
+
+ /// \returns 1 if the given element is in the collection, and 0 if otherwise.
+ size_t count(PHINode *Ptr) const {
+ return NodeMap.count(Ptr);
+ }
+
+private:
+ /// Updates the CurrentIndex so that it will point to a valid element.
+ ///
+ /// If the element of NodeList at CurrentIndex is valid, it does not
+ /// change it. If there are no more valid elements, it updates CurrentIndex
+ /// to point to the end of the NodeList.
+ void SkipRemovedElements(size_t &CurrentIndex) {
+ while (CurrentIndex < NodeList.size()) {
+ auto it = NodeMap.find(NodeList[CurrentIndex]);
+ // If the element has been deleted and added again later, NodeMap will
+ // point to a different index, so CurrentIndex will still be invalid.
+ if (it != NodeMap.end() && it->second == CurrentIndex)
+ break;
+ ++CurrentIndex;
+ }
+ }
+};
+
+PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
+ : Set(Set), CurrentIndex(Start) {}
+
+PHINode * PhiNodeSetIterator::operator*() const {
+ assert(CurrentIndex < Set->NodeList.size() &&
+ "PhiNodeSet access out of range");
+ return Set->NodeList[CurrentIndex];
+}
+
+PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
+ assert(CurrentIndex < Set->NodeList.size() &&
+ "PhiNodeSet access out of range");
+ ++CurrentIndex;
+ Set->SkipRemovedElements(CurrentIndex);
+ return *this;
+}
+
+bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
+ return CurrentIndex == RHS.CurrentIndex;
+}
+
+bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
+ return !((*this) == RHS);
+}
+
/// Keep track of simplification of Phi nodes.
/// Accept the set of all phi nodes and erase phi node from this set
/// if it is simplified.
class SimplificationTracker {
DenseMap<Value *, Value *> Storage;
const SimplifyQuery &SQ;
- // Tracks newly created Phi nodes. We use a SetVector to get deterministic
- // order when iterating over the set in MatchPhiSet.
- SmallSetVector<PHINode *, 32> AllPhiNodes;
+ // Tracks newly created Phi nodes. The elements are iterated by insertion
+ // order.
+ PhiNodeSet AllPhiNodes;
// Tracks newly created Select nodes.
SmallPtrSet<SelectInst *, 32> AllSelectNodes;
@@ -2672,7 +2856,7 @@ public:
Put(PI, V);
PI->replaceAllUsesWith(V);
if (auto *PHI = dyn_cast<PHINode>(PI))
- AllPhiNodes.remove(PHI);
+ AllPhiNodes.erase(PHI);
if (auto *Select = dyn_cast<SelectInst>(PI))
AllSelectNodes.erase(Select);
PI->eraseFromParent();
@@ -2695,11 +2879,11 @@ public:
assert(Get(To) == To && "Replacement PHI node is already replaced.");
Put(From, To);
From->replaceAllUsesWith(To);
- AllPhiNodes.remove(From);
+ AllPhiNodes.erase(From);
From->eraseFromParent();
}
- SmallSetVector<PHINode *, 32>& newPhiNodes() { return AllPhiNodes; }
+ PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
@@ -2727,8 +2911,7 @@ public:
/// A helper class for combining addressing modes.
class AddressingModeCombiner {
- typedef std::pair<Value *, BasicBlock *> ValueInBB;
- typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping;
+ typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
typedef std::pair<PHINode *, PHINode *> PHIPair;
private:
@@ -2748,10 +2931,10 @@ private:
const SimplifyQuery &SQ;
/// Original Address.
- ValueInBB Original;
+ Value *Original;
public:
- AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue)
+ AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
: CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
/// Get the combined AddrMode
@@ -2847,46 +3030,40 @@ public:
}
private:
- /// Initialize Map with anchor values. For address seen in some BB
+ /// Initialize Map with anchor values. For address seen
/// we set the value of different field saw in this address.
- /// If address is not an instruction than basic block is set to null.
/// At the same time we find a common type for different field we will
/// use to create new Phi/Select nodes. Keep it in CommonType field.
/// Return false if there is no common type found.
bool initializeMap(FoldAddrToValueMapping &Map) {
// Keep track of keys where the value is null. We will need to replace it
// with constant null when we know the common type.
- SmallVector<ValueInBB, 2> NullValue;
+ SmallVector<Value *, 2> NullValue;
Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
for (auto &AM : AddrModes) {
- BasicBlock *BB = nullptr;
- if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue))
- BB = I->getParent();
-
Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
if (DV) {
auto *Type = DV->getType();
if (CommonType && CommonType != Type)
return false;
CommonType = Type;
- Map[{ AM.OriginalValue, BB }] = DV;
+ Map[AM.OriginalValue] = DV;
} else {
- NullValue.push_back({ AM.OriginalValue, BB });
+ NullValue.push_back(AM.OriginalValue);
}
}
assert(CommonType && "At least one non-null value must be!");
- for (auto VIBB : NullValue)
- Map[VIBB] = Constant::getNullValue(CommonType);
+ for (auto *V : NullValue)
+ Map[V] = Constant::getNullValue(CommonType);
return true;
}
- /// We have mapping between value A and basic block where value A
- /// seen to other value B where B was a field in addressing mode represented
- /// by A. Also we have an original value C representing an address in some
- /// basic block. Traversing from C through phi and selects we ended up with
- /// A's in a map. This utility function tries to find a value V which is a
- /// field in addressing mode C and traversing through phi nodes and selects
- /// we will end up in corresponded values B in a map.
+ /// We have mapping between value A and other value B where B was a field in
+ /// addressing mode represented by A. Also we have an original value C
+ /// representing an address we start with. Traversing from C through phi and
+ /// selects we ended up with A's in a map. This utility function tries to find
+ /// a value V which is a field in addressing mode C and traversing through phi
+ /// nodes and selects we will end up in corresponded values B in a map.
/// The utility will create a new Phi/Selects if needed.
// The simple example looks as follows:
// BB1:
@@ -2899,22 +3076,24 @@ private:
// p = phi [p1, BB1], [p2, BB2]
// v = load p
// Map is
- // <p1, BB1> -> b1
- // <p2, BB2> -> b2
+ // p1 -> b1
+ // p2 -> b2
// Request is
- // <p, BB3> -> ?
- // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3
+ // p -> ?
+ // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
Value *findCommon(FoldAddrToValueMapping &Map) {
// Tracks the simplification of newly created phi nodes. The reason we use
// this mapping is because we will add new created Phi nodes in AddrToBase.
// Simplification of Phi nodes is recursive, so some Phi node may
- // be simplified after we added it to AddrToBase.
+ // be simplified after we added it to AddrToBase. In reality this
+ // simplification is possible only if original phi/selects were not
+ // simplified yet.
// Using this mapping we can find the current value in AddrToBase.
SimplificationTracker ST(SQ);
// First step, DFS to create PHI nodes for all intermediate blocks.
// Also fill traverse order for the second step.
- SmallVector<ValueInBB, 32> TraverseOrder;
+ SmallVector<Value *, 32> TraverseOrder;
InsertPlaceholders(Map, TraverseOrder, ST);
// Second Step, fill new nodes by merged values and simplify if possible.
@@ -2944,7 +3123,7 @@ private:
/// Matcher tracks the matched Phi nodes.
bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
SmallSetVector<PHIPair, 8> &Matcher,
- SmallSetVector<PHINode *, 32> &PhiNodesToMatch) {
+ PhiNodeSet &PhiNodesToMatch) {
SmallVector<PHIPair, 8> WorkList;
Matcher.insert({ PHI, Candidate });
WorkList.push_back({ PHI, Candidate });
@@ -2993,11 +3172,12 @@ private:
/// Returns false if this matching fails and creation of new Phi is disabled.
bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
unsigned &PhiNotMatchedCount) {
- // Use a SetVector for Matched to make sure we do replacements (ReplacePhi)
- // in a deterministic order below.
+ // Matched and PhiNodesToMatch iterate their elements in a deterministic
+ // order, so the replacements (ReplacePhi) are also done in a deterministic
+ // order.
SmallSetVector<PHIPair, 8> Matched;
SmallPtrSet<PHINode *, 8> WillNotMatch;
- SmallSetVector<PHINode *, 32> &PhiNodesToMatch = ST.newPhiNodes();
+ PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
while (PhiNodesToMatch.size()) {
PHINode *PHI = *PhiNodesToMatch.begin();
@@ -3032,129 +3212,86 @@ private:
// Just remove all seen values in matcher. They will not match anything.
PhiNotMatchedCount += WillNotMatch.size();
for (auto *P : WillNotMatch)
- PhiNodesToMatch.remove(P);
+ PhiNodesToMatch.erase(P);
}
return true;
}
- /// Fill the placeholder with values from predecessors and simplify it.
+ /// Fill the placeholders with values from predecessors and simplify them.
void FillPlaceholders(FoldAddrToValueMapping &Map,
- SmallVectorImpl<ValueInBB> &TraverseOrder,
+ SmallVectorImpl<Value *> &TraverseOrder,
SimplificationTracker &ST) {
while (!TraverseOrder.empty()) {
- auto Current = TraverseOrder.pop_back_val();
+ Value *Current = TraverseOrder.pop_back_val();
assert(Map.find(Current) != Map.end() && "No node to fill!!!");
- Value *CurrentValue = Current.first;
- BasicBlock *CurrentBlock = Current.second;
Value *V = Map[Current];
if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
// CurrentValue also must be Select.
- auto *CurrentSelect = cast<SelectInst>(CurrentValue);
+ auto *CurrentSelect = cast<SelectInst>(Current);
auto *TrueValue = CurrentSelect->getTrueValue();
- ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue)
- ? CurrentBlock
- : nullptr };
- assert(Map.find(TrueItem) != Map.end() && "No True Value!");
- Select->setTrueValue(ST.Get(Map[TrueItem]));
+ assert(Map.find(TrueValue) != Map.end() && "No True Value!");
+ Select->setTrueValue(ST.Get(Map[TrueValue]));
auto *FalseValue = CurrentSelect->getFalseValue();
- ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue)
- ? CurrentBlock
- : nullptr };
- assert(Map.find(FalseItem) != Map.end() && "No False Value!");
- Select->setFalseValue(ST.Get(Map[FalseItem]));
+ assert(Map.find(FalseValue) != Map.end() && "No False Value!");
+ Select->setFalseValue(ST.Get(Map[FalseValue]));
} else {
// Must be a Phi node then.
PHINode *PHI = cast<PHINode>(V);
+ auto *CurrentPhi = dyn_cast<PHINode>(Current);
// Fill the Phi node with values from predecessors.
- bool IsDefinedInThisBB =
- cast<Instruction>(CurrentValue)->getParent() == CurrentBlock;
- auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue);
- for (auto B : predecessors(CurrentBlock)) {
- Value *PV = IsDefinedInThisBB
- ? CurrentPhi->getIncomingValueForBlock(B)
- : CurrentValue;
- ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr };
- assert(Map.find(item) != Map.end() && "No predecessor Value!");
- PHI->addIncoming(ST.Get(Map[item]), B);
+ for (auto B : predecessors(PHI->getParent())) {
+ Value *PV = CurrentPhi->getIncomingValueForBlock(B);
+ assert(Map.find(PV) != Map.end() && "No predecessor Value!");
+ PHI->addIncoming(ST.Get(Map[PV]), B);
}
}
- // Simplify if possible.
Map[Current] = ST.Simplify(V);
}
}
- /// Starting from value recursively iterates over predecessors up to known
- /// ending values represented in a map. For each traversed block inserts
- /// a placeholder Phi or Select.
+ /// Starting from original value recursively iterates over def-use chain up to
+ /// known ending values represented in a map. For each traversed phi/select
+ /// inserts a placeholder Phi or Select.
/// Reports all new created Phi/Select nodes by adding them to set.
- /// Also reports and order in what basic blocks have been traversed.
+ /// Also reports and order in what values have been traversed.
void InsertPlaceholders(FoldAddrToValueMapping &Map,
- SmallVectorImpl<ValueInBB> &TraverseOrder,
+ SmallVectorImpl<Value *> &TraverseOrder,
SimplificationTracker &ST) {
- SmallVector<ValueInBB, 32> Worklist;
- assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) &&
+ SmallVector<Value *, 32> Worklist;
+ assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
"Address must be a Phi or Select node");
auto *Dummy = UndefValue::get(CommonType);
Worklist.push_back(Original);
while (!Worklist.empty()) {
- auto Current = Worklist.pop_back_val();
- // If value is not an instruction it is something global, constant,
- // parameter and we can say that this value is observable in any block.
- // Set block to null to denote it.
- // Also please take into account that it is how we build anchors.
- if (!isa<Instruction>(Current.first))
- Current.second = nullptr;
+ Value *Current = Worklist.pop_back_val();
// if it is already visited or it is an ending value then skip it.
if (Map.find(Current) != Map.end())
continue;
TraverseOrder.push_back(Current);
- Value *CurrentValue = Current.first;
- BasicBlock *CurrentBlock = Current.second;
// CurrentValue must be a Phi node or select. All others must be covered
// by anchors.
- Instruction *CurrentI = cast<Instruction>(CurrentValue);
- bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock;
-
- unsigned PredCount = pred_size(CurrentBlock);
- // if Current Value is not defined in this basic block we are interested
- // in values in predecessors.
- if (!IsDefinedInThisBB) {
- assert(PredCount && "Unreachable block?!");
- PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
- &CurrentBlock->front());
- Map[Current] = PHI;
- ST.insertNewPhi(PHI);
- // Add all predecessors in work list.
- for (auto B : predecessors(CurrentBlock))
- Worklist.push_back({ CurrentValue, B });
- continue;
- }
- // Value is defined in this basic block.
- if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) {
+ if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
// Is it OK to get metadata from OrigSelect?!
// Create a Select placeholder with dummy value.
- SelectInst *Select =
- SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy,
- OrigSelect->getName(), OrigSelect, OrigSelect);
+ SelectInst *Select = SelectInst::Create(
+ CurrentSelect->getCondition(), Dummy, Dummy,
+ CurrentSelect->getName(), CurrentSelect, CurrentSelect);
Map[Current] = Select;
ST.insertNewSelect(Select);
- // We are interested in True and False value in this basic block.
- Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock });
- Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock });
+ // We are interested in True and False values.
+ Worklist.push_back(CurrentSelect->getTrueValue());
+ Worklist.push_back(CurrentSelect->getFalseValue());
} else {
// It must be a Phi node then.
- auto *CurrentPhi = cast<PHINode>(CurrentI);
- // Create new Phi node for merge of bases.
- assert(PredCount && "Unreachable block?!");
- PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
- &CurrentBlock->front());
+ PHINode *CurrentPhi = cast<PHINode>(Current);
+ unsigned PredCount = CurrentPhi->getNumIncomingValues();
+ PHINode *PHI =
+ PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
Map[Current] = PHI;
ST.insertNewPhi(PHI);
-
- // Add all predecessors in work list.
- for (auto B : predecessors(CurrentBlock))
- Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B });
+ for (Value *P : CurrentPhi->incoming_values())
+ Worklist.push_back(P);
}
}
}
@@ -3843,8 +3980,13 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
} else {
uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
- ConstantOffset += CI->getSExtValue() * TypeSize;
- } else if (TypeSize) { // Scales of zero don't do anything.
+ const APInt &CVal = CI->getValue();
+ if (CVal.getMinSignedBits() <= 64) {
+ ConstantOffset += CVal.getSExtValue() * TypeSize;
+ continue;
+ }
+ }
+ if (TypeSize) { // Scales of zero don't do anything.
// We only allow one variable index at the moment.
if (VariableOperand != -1)
return false;
@@ -4368,7 +4510,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool PhiOrSelectSeen = false;
SmallVector<Instruction*, 16> AddrModeInsts;
const SimplifyQuery SQ(*DL, TLInfo);
- AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() });
+ AddressingModeCombiner AddrModes(SQ, Addr);
TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
@@ -4985,8 +5127,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
};
// Sorting all the GEPs of the same data structures based on the offsets.
- llvm::sort(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end(),
- compareGEPOffset);
+ llvm::sort(LargeOffsetGEPs, compareGEPOffset);
LargeOffsetGEPs.erase(
std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
LargeOffsetGEPs.end());
@@ -5019,11 +5160,11 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
}
// Generate a new GEP to replace the current one.
- IRBuilder<> Builder(GEP);
+ LLVMContext &Ctx = GEP->getContext();
Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
Type *I8PtrTy =
- Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace());
- Type *I8Ty = Builder.getInt8Ty();
+ Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
+ Type *I8Ty = Type::getInt8Ty(Ctx);
if (!NewBaseGEP) {
// Create a new base if we don't have one yet. Find the insertion
@@ -5059,6 +5200,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
NewGEPBases.insert(NewBaseGEP);
}
+ IRBuilder<> Builder(GEP);
Value *NewGEP = NewBaseGEP;
if (Offset == BaseOffset) {
if (GEP->getType() != I8PtrTy)
@@ -5587,6 +5729,10 @@ static Value *getTrueOrFalseValue(
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
+ // If branch conversion isn't desirable, exit early.
+ if (DisableSelectToBranch || OptSize || !TLI)
+ return false;
+
// Find all consecutive select instructions that share the same condition.
SmallVector<SelectInst *, 2> ASI;
ASI.push_back(SI);
@@ -5608,8 +5754,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
// Can we convert the 'select' to CF ?
- if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||
- SI->getMetadata(LLVMContext::MD_unpredictable))
+ if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
return false;
TargetLowering::SelectSupportKind SelectKind;
@@ -5672,6 +5817,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
EndBlock->getParent(), EndBlock);
TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ TrueBranch->setDebugLoc(SI->getDebugLoc());
}
auto *TrueInst = cast<Instruction>(SI->getTrueValue());
TrueInst->moveBefore(TrueBranch);
@@ -5681,6 +5827,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
EndBlock->getParent(), EndBlock);
FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(SI->getDebugLoc());
}
auto *FalseInst = cast<Instruction>(SI->getFalseValue());
FalseInst->moveBefore(FalseBranch);
@@ -5695,7 +5842,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
EndBlock->getParent(), EndBlock);
- BranchInst::Create(EndBlock, FalseBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(SI->getDebugLoc());
}
// Insert the real conditional branch based on the original condition.
@@ -5730,6 +5878,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
PN->takeName(SI);
PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+ PN->setDebugLoc(SI->getDebugLoc());
SI->replaceAllUsesWith(PN);
SI->eraseFromParent();
@@ -5841,6 +5990,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
ExtInst->insertBefore(SI);
+ ExtInst->setDebugLoc(SI->getDebugLoc());
SI->setCondition(ExtInst);
for (auto Case : SI->cases()) {
APInt NarrowConst = Case.getCaseValue()->getValue();
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index cd302e78cc3e..68034afe98d5 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -250,8 +250,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
LLVM_DEBUG({
dbgs() << "Scheduling DAG of the packetize region\n";
- for (SUnit &SU : VLIWScheduler->SUnits)
- SU.dumpAll(VLIWScheduler);
+ VLIWScheduler->dump();
});
// Generate MI -> SU map.
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 098afd885f2f..364e1f030942 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -398,6 +398,13 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
return false;
}
+ // Make sure the analyzed branch is conditional; one of the successors
+ // could be a landing pad. (Empty landing pads can be generated on Windows.)
+ if (Cond.empty()) {
+ LLVM_DEBUG(dbgs() << "AnalyzeBranch found an unconditional branch.\n");
+ return false;
+ }
+
// AnalyzeBranch doesn't set FBB on a fall-through branch.
// Make sure it is always set.
FBB = TBB == Succ0 ? Succ1 : Succ0;
diff --git a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
index d7562cbf1e90..ee7683adbcdd 100644
--- a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -66,23 +66,18 @@ class MemCmpExpansion {
// Represents the decomposition in blocks of the expansion. For example,
// comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
// 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}.
- // TODO(courbet): Involve the target more in this computation. On X86, 7
- // bytes can be done more efficiently with two overlaping 4-byte loads than
- // covering the interval with [{4, 0},{2, 4},{1, 6}}.
struct LoadEntry {
LoadEntry(unsigned LoadSize, uint64_t Offset)
: LoadSize(LoadSize), Offset(Offset) {
- assert(Offset % LoadSize == 0 && "invalid load entry");
}
- uint64_t getGEPIndex() const { return Offset / LoadSize; }
-
// The size of the load for this block, in bytes.
- const unsigned LoadSize;
- // The offset of this load WRT the base pointer, in bytes.
- const uint64_t Offset;
+ unsigned LoadSize;
+ // The offset of this load from the base pointer, in bytes.
+ uint64_t Offset;
};
- SmallVector<LoadEntry, 8> LoadSequence;
+ using LoadEntryVector = SmallVector<LoadEntry, 8>;
+ LoadEntryVector LoadSequence;
void createLoadCmpBlocks();
void createResultBlock();
@@ -92,13 +87,23 @@ class MemCmpExpansion {
void emitLoadCompareBlock(unsigned BlockIndex);
void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
unsigned &LoadIndex);
- void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex);
+ void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase();
Value *getMemCmpEqZeroOneBlock();
Value *getMemCmpOneBlock();
+ Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType,
+ uint64_t OffsetBytes);
+
+ static LoadEntryVector
+ computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+ unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
+ static LoadEntryVector
+ computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
+ unsigned MaxNumLoads,
+ unsigned &NumLoadsNonOneByte);
- public:
+public:
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
@@ -110,6 +115,76 @@ class MemCmpExpansion {
Value *getMemCmpExpansion();
};
+MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
+ uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+ const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
+ NumLoadsNonOneByte = 0;
+ LoadEntryVector LoadSequence;
+ uint64_t Offset = 0;
+ while (Size && !LoadSizes.empty()) {
+ const unsigned LoadSize = LoadSizes.front();
+ const uint64_t NumLoadsForThisSize = Size / LoadSize;
+ if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+ // Do not expand if the total number of loads is larger than what the
+ // target allows. Note that it's important that we exit before completing
+ // the expansion to avoid using a ton of memory to store the expansion for
+ // large sizes.
+ return {};
+ }
+ if (NumLoadsForThisSize > 0) {
+ for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+ LoadSequence.push_back({LoadSize, Offset});
+ Offset += LoadSize;
+ }
+ if (LoadSize > 1)
+ ++NumLoadsNonOneByte;
+ Size = Size % LoadSize;
+ }
+ LoadSizes = LoadSizes.drop_front();
+ }
+ return LoadSequence;
+}
+
+MemCmpExpansion::LoadEntryVector
+MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
+ const unsigned MaxLoadSize,
+ const unsigned MaxNumLoads,
+ unsigned &NumLoadsNonOneByte) {
+ // These are already handled by the greedy approach.
+ if (Size < 2 || MaxLoadSize < 2)
+ return {};
+
+ // We try to do as many non-overlapping loads as possible starting from the
+ // beginning.
+ const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
+ assert(NumNonOverlappingLoads && "there must be at least one load");
+ // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
+ // an overlapping load.
+ Size = Size - NumNonOverlappingLoads * MaxLoadSize;
+ // Bail if we do not need an overloapping store, this is already handled by
+ // the greedy approach.
+ if (Size == 0)
+ return {};
+ // Bail if the number of loads (non-overlapping + potential overlapping one)
+ // is larger than the max allowed.
+ if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
+ return {};
+
+ // Add non-overlapping loads.
+ LoadEntryVector LoadSequence;
+ uint64_t Offset = 0;
+ for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
+ LoadSequence.push_back({MaxLoadSize, Offset});
+ Offset += MaxLoadSize;
+ }
+
+ // Add the last overlapping load.
+ assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
+ LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
+ NumLoadsNonOneByte = 1;
+ return LoadSequence;
+}
+
// Initialize the basic block structure required for expansion of memcmp call
// with given maximum load size and memcmp size parameter.
// This structure includes:
@@ -133,38 +208,31 @@ MemCmpExpansion::MemCmpExpansion(
Builder(CI) {
assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
- size_t LoadSizeIndex = 0;
- while (LoadSizeIndex < Options.LoadSizes.size() &&
- Options.LoadSizes[LoadSizeIndex] > Size) {
- ++LoadSizeIndex;
+ llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
+ while (!LoadSizes.empty() && LoadSizes.front() > Size) {
+ LoadSizes = LoadSizes.drop_front();
}
- this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
+ assert(!LoadSizes.empty() && "cannot load Size bytes");
+ MaxLoadSize = LoadSizes.front();
// Compute the decomposition.
- uint64_t CurSize = Size;
- uint64_t Offset = 0;
- while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
- const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
- assert(LoadSize > 0 && "zero load size");
- const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
- if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
- // Do not expand if the total number of loads is larger than what the
- // target allows. Note that it's important that we exit before completing
- // the expansion to avoid using a ton of memory to store the expansion for
- // large sizes.
- LoadSequence.clear();
- return;
- }
- if (NumLoadsForThisSize > 0) {
- for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
- LoadSequence.push_back({LoadSize, Offset});
- Offset += LoadSize;
- }
- if (LoadSize > 1) {
- ++NumLoadsNonOneByte;
- }
- CurSize = CurSize % LoadSize;
+ unsigned GreedyNumLoadsNonOneByte = 0;
+ LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
+ GreedyNumLoadsNonOneByte);
+ NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
+ assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+ // If we allow overlapping loads and the load sequence is not already optimal,
+ // use overlapping loads.
+ if (Options.AllowOverlappingLoads &&
+ (LoadSequence.empty() || LoadSequence.size() > 2)) {
+ unsigned OverlappingNumLoadsNonOneByte = 0;
+ auto OverlappingLoads = computeOverlappingLoadSequence(
+ Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
+ if (!OverlappingLoads.empty() &&
+ (LoadSequence.empty() ||
+ OverlappingLoads.size() < LoadSequence.size())) {
+ LoadSequence = OverlappingLoads;
+ NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
}
- ++LoadSizeIndex;
}
assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
}
@@ -189,30 +257,32 @@ void MemCmpExpansion::createResultBlock() {
EndBlock->getParent(), EndBlock);
}
+/// Return a pointer to an element of type `LoadSizeType` at offset
+/// `OffsetBytes`.
+Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
+ Type *LoadSizeType,
+ uint64_t OffsetBytes) {
+ if (OffsetBytes > 0) {
+ auto *ByteType = Type::getInt8Ty(CI->getContext());
+ Source = Builder.CreateGEP(
+ ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
+ ConstantInt::get(ByteType, OffsetBytes));
+ }
+ return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
+}
+
// This function creates the IR instructions for loading and comparing 1 byte.
// It loads 1 byte from each source of the memcmp parameters with the given
// GEPIndex. It then subtracts the two loaded values and adds this result to the
// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
- unsigned GEPIndex) {
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
+ unsigned OffsetBytes) {
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Get the base address using the GEPIndex.
- if (GEPIndex != 0) {
- Source1 = Builder.CreateGEP(LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, GEPIndex));
- Source2 = Builder.CreateGEP(LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, GEPIndex));
- }
+ Value *Source1 =
+ getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);
+ Value *Source2 =
+ getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
@@ -270,24 +340,10 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
IntegerType *LoadSizeType =
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Get the base address using a GEP.
- if (CurLoadEntry.Offset != 0) {
- Source1 = Builder.CreateGEP(
- LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
- Source2 = Builder.CreateGEP(
- LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
- }
+ Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
+ CurLoadEntry.Offset);
+ Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
+ CurLoadEntry.Offset);
// Get a constant or load a value for each source address.
Value *LoadSrc1 = nullptr;
@@ -378,8 +434,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
if (CurLoadEntry.LoadSize == 1) {
- MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex,
- CurLoadEntry.getGEPIndex());
+ MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
return;
}
@@ -388,25 +443,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
- // Get the base address using a GEP.
- if (CurLoadEntry.Offset != 0) {
- Source1 = Builder.CreateGEP(
- LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
- Source2 = Builder.CreateGEP(
- LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
- }
+ Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
+ CurLoadEntry.Offset);
+ Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
+ CurLoadEntry.Offset);
// Load LoadSizeType from the base address.
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
@@ -694,7 +736,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
if (SizeVal == 0) {
return false;
}
-
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index bc747fc610f8..f2a2bcbb94b1 100644
--- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -97,6 +97,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
if (MI->allDefsAreDead()) {
MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI);
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index fe3d29657942..1c80556dfef5 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -103,16 +103,6 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<GCModuleInfo>();
}
-static const char *DescKind(GC::PointKind Kind) {
- switch (Kind) {
- case GC::PreCall:
- return "pre-call";
- case GC::PostCall:
- return "post-call";
- }
- llvm_unreachable("Invalid point kind");
-}
-
bool Printer::runOnFunction(Function &F) {
if (F.hasGC())
return false;
@@ -129,7 +119,7 @@ bool Printer::runOnFunction(Function &F) {
for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
++PI) {
- OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind)
+ OS << "\t" << PI->Label->getName() << ": " << "post-call"
<< ", live = {";
for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index 31ddeadbd97a..e8ccd84b0b93 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -38,7 +38,7 @@ namespace {
/// directed by the GCStrategy. It also performs automatic root initialization
/// and custom intrinsic lowering.
class LowerIntrinsics : public FunctionPass {
- bool PerformDefaultLowering(Function &F, GCStrategy &S);
+ bool DoLowering(Function &F, GCStrategy &S);
public:
static char ID;
@@ -102,13 +102,6 @@ void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTreeWrapperPass>();
}
-static bool NeedsDefaultLoweringPass(const GCStrategy &C) {
- // Default lowering is necessary only if read or write barriers have a default
- // action. The default for roots is no action.
- return !C.customWriteBarrier() || !C.customReadBarrier() ||
- C.initializeRoots();
-}
-
/// doInitialization - If this module uses the GC intrinsics, find them now.
bool LowerIntrinsics::doInitialization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -148,8 +141,7 @@ static bool CouldBecomeSafePoint(Instruction *I) {
return true;
}
-static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
- unsigned Count) {
+static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) {
// Scroll past alloca instructions.
BasicBlock::iterator IP = F.getEntryBlock().begin();
while (isa<AllocaInst>(IP))
@@ -166,12 +158,12 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
// Add root initializers.
bool MadeChange = false;
- for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
- if (!InitedRoots.count(*I)) {
+ for (AllocaInst *Root : Roots)
+ if (!InitedRoots.count(Root)) {
StoreInst *SI = new StoreInst(
- ConstantPointerNull::get(cast<PointerType>((*I)->getAllocatedType())),
- *I);
- SI->insertAfter(*I);
+ ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())),
+ Root);
+ SI->insertAfter(Root);
MadeChange = true;
}
@@ -188,64 +180,59 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
GCStrategy &S = FI.getStrategy();
- bool MadeChange = false;
-
- if (NeedsDefaultLoweringPass(S))
- MadeChange |= PerformDefaultLowering(F, S);
-
- return MadeChange;
+ return DoLowering(F, S);
}
-bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
- bool LowerWr = !S.customWriteBarrier();
- bool LowerRd = !S.customReadBarrier();
- bool InitRoots = S.initializeRoots();
-
+/// Lower barriers out of existance (if the associated GCStrategy hasn't
+/// already done so...), and insert initializing stores to roots as a defensive
+/// measure. Given we're going to report all roots live at all safepoints, we
+/// need to be able to ensure each root has been initialized by the point the
+/// first safepoint is reached. This really should have been done by the
+/// frontend, but the old API made this non-obvious, so we do a potentially
+/// redundant store just in case.
+bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
SmallVector<AllocaInst *, 32> Roots;
bool MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
- Function *F = CI->getCalledFunction();
- switch (F->getIntrinsicID()) {
- case Intrinsic::gcwrite:
- if (LowerWr) {
- // Replace a write barrier with a simple store.
- Value *St =
- new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
- CI->replaceAllUsesWith(St);
- CI->eraseFromParent();
- }
- break;
- case Intrinsic::gcread:
- if (LowerRd) {
- // Replace a read barrier with a simple load.
- Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
- Ld->takeName(CI);
- CI->replaceAllUsesWith(Ld);
- CI->eraseFromParent();
- }
- break;
- case Intrinsic::gcroot:
- if (InitRoots) {
- // Initialize the GC root, but do not delete the intrinsic. The
- // backend needs the intrinsic to flag the stack slot.
- Roots.push_back(
- cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
- }
- break;
- default:
- continue;
- }
-
+ for (BasicBlock &BB : F)
+ for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) {
+ IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++);
+ if (!CI)
+ continue;
+
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::gcwrite: {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
MadeChange = true;
+ break;
+ }
+ case Intrinsic::gcread: {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ MadeChange = true;
+ break;
+ }
+ case Intrinsic::gcroot: {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(
+ cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ break;
+ }
}
}
- }
if (Roots.size())
- MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+ MadeChange |= InsertRootInitializers(F, Roots);
return MadeChange;
}
@@ -276,26 +263,18 @@ MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
}
void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
- // Find the return address (next instruction), too, so as to bracket the call
- // instruction.
+ // Find the return address (next instruction), since that's what will be on
+ // the stack when the call is suspended and we need to inspect the stack.
MachineBasicBlock::iterator RAI = CI;
++RAI;
- if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
- MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
- FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
- }
-
- if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
- MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
- FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
- }
+ MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(Label, CI->getDebugLoc());
}
void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
- for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
- ++BBI)
- for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end();
MI != ME; ++MI)
if (MI->isCall()) {
// Do not treat tail or sibling call sites as safe points. This is
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
new file mode 100644
index 000000000000..89c525c5ba15
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -0,0 +1,370 @@
+//===- CSEInfo.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "cseinfo"
+
+using namespace llvm;
+char llvm::GISelCSEAnalysisWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
+ "Analysis containing CSE Info", false, true)
+INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
+ "Analysis containing CSE Info", false, true)
+
+/// -------- UniqueMachineInstr -------------//
+
+void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) {
+ GISelInstProfileBuilder(ID, MI->getMF()->getRegInfo()).addNodeID(MI);
+}
+/// -----------------------------------------
+
+/// --------- CSEConfig ---------- ///
+bool CSEConfig::shouldCSEOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_TRUNC:
+ return true;
+ }
+ return false;
+}
+
+bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
+ return Opc == TargetOpcode::G_CONSTANT;
+}
+/// -----------------------------------------
+
+/// -------- GISelCSEInfo -------------//
+void GISelCSEInfo::setMF(MachineFunction &MF) {
+ this->MF = &MF;
+ this->MRI = &MF.getRegInfo();
+}
+
+GISelCSEInfo::~GISelCSEInfo() {}
+
+bool GISelCSEInfo::isUniqueMachineInstValid(
+ const UniqueMachineInstr &UMI) const {
+ // Should we check here and assert that the instruction has been fully
+ // constructed?
+ // FIXME: Any other checks required to be done here? Remove this method if
+ // none.
+ return true;
+}
+
+void GISelCSEInfo::invalidateUniqueMachineInstr(UniqueMachineInstr *UMI) {
+ bool Removed = CSEMap.RemoveNode(UMI);
+ (void)Removed;
+ assert(Removed && "Invalidation called on invalid UMI");
+ // FIXME: Should UMI be deallocated/destroyed?
+}
+
+UniqueMachineInstr *GISelCSEInfo::getNodeIfExists(FoldingSetNodeID &ID,
+ MachineBasicBlock *MBB,
+ void *&InsertPos) {
+ auto *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (Node) {
+ if (!isUniqueMachineInstValid(*Node)) {
+ invalidateUniqueMachineInstr(Node);
+ return nullptr;
+ }
+
+ if (Node->MI->getParent() != MBB)
+ return nullptr;
+ }
+ return Node;
+}
+
+void GISelCSEInfo::insertNode(UniqueMachineInstr *UMI, void *InsertPos) {
+ handleRecordedInsts();
+ assert(UMI);
+ UniqueMachineInstr *MaybeNewNode = UMI;
+ if (InsertPos)
+ CSEMap.InsertNode(UMI, InsertPos);
+ else
+ MaybeNewNode = CSEMap.GetOrInsertNode(UMI);
+ if (MaybeNewNode != UMI) {
+ // A similar node exists in the folding set. Let's ignore this one.
+ return;
+ }
+ assert(InstrMapping.count(UMI->MI) == 0 &&
+ "This instruction should not be in the map");
+ InstrMapping[UMI->MI] = MaybeNewNode;
+}
+
+UniqueMachineInstr *GISelCSEInfo::getUniqueInstrForMI(const MachineInstr *MI) {
+ assert(shouldCSE(MI->getOpcode()) && "Trying to CSE an unsupported Node");
+ auto *Node = new (UniqueInstrAllocator) UniqueMachineInstr(MI);
+ return Node;
+}
+
+void GISelCSEInfo::insertInstr(MachineInstr *MI, void *InsertPos) {
+ assert(MI);
+ // If it exists in temporary insts, remove it.
+ TemporaryInsts.remove(MI);
+ auto *Node = getUniqueInstrForMI(MI);
+ insertNode(Node, InsertPos);
+}
+
+MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID,
+ MachineBasicBlock *MBB,
+ void *&InsertPos) {
+ handleRecordedInsts();
+ if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) {
+ LLVM_DEBUG(dbgs() << "CSEInfo: Found Instr " << *Inst->MI << "\n";);
+ return const_cast<MachineInstr *>(Inst->MI);
+ }
+ return nullptr;
+}
+
+void GISelCSEInfo::countOpcodeHit(unsigned Opc) {
+#ifndef NDEBUG
+ if (OpcodeHitTable.count(Opc))
+ OpcodeHitTable[Opc] += 1;
+ else
+ OpcodeHitTable[Opc] = 1;
+#endif
+ // Else do nothing.
+}
+
+void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) {
+ if (shouldCSE(MI->getOpcode())) {
+ TemporaryInsts.insert(MI);
+ LLVM_DEBUG(dbgs() << "CSEInfo: Recording new MI" << *MI << "\n";);
+ }
+}
+
+void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) {
+ assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE");
+ auto *UMI = InstrMapping.lookup(MI);
+ LLVM_DEBUG(dbgs() << "CSEInfo: Handling recorded MI" << *MI << "\n";);
+ if (UMI) {
+ // Invalidate this MI.
+ invalidateUniqueMachineInstr(UMI);
+ InstrMapping.erase(MI);
+ }
+ /// Now insert the new instruction.
+ if (UMI) {
+ /// We'll reuse the same UniqueMachineInstr to avoid the new
+ /// allocation.
+ *UMI = UniqueMachineInstr(MI);
+ insertNode(UMI, nullptr);
+ } else {
+ /// This is a new instruction. Allocate a new UniqueMachineInstr and
+ /// Insert.
+ insertInstr(MI);
+ }
+}
+
+void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) {
+ if (auto *UMI = InstrMapping.lookup(MI)) {
+ invalidateUniqueMachineInstr(UMI);
+ InstrMapping.erase(MI);
+ }
+ TemporaryInsts.remove(MI);
+}
+
+void GISelCSEInfo::handleRecordedInsts() {
+ while (!TemporaryInsts.empty()) {
+ auto *MI = TemporaryInsts.pop_back_val();
+ handleRecordedInst(MI);
+ }
+}
+
+bool GISelCSEInfo::shouldCSE(unsigned Opc) const {
+ // Only GISel opcodes are CSEable
+ if (!isPreISelGenericOpcode(Opc))
+ return false;
+ assert(CSEOpt.get() && "CSEConfig not set");
+ return CSEOpt->shouldCSEOpc(Opc);
+}
+
+void GISelCSEInfo::erasingInstr(MachineInstr &MI) { handleRemoveInst(&MI); }
+void GISelCSEInfo::createdInstr(MachineInstr &MI) { recordNewInstruction(&MI); }
+void GISelCSEInfo::changingInstr(MachineInstr &MI) {
+ // For now, perform erase, followed by insert.
+ erasingInstr(MI);
+ createdInstr(MI);
+}
+void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); }
+
+void GISelCSEInfo::analyze(MachineFunction &MF) {
+ setMF(MF);
+ for (auto &MBB : MF) {
+ if (MBB.empty())
+ continue;
+ for (MachineInstr &MI : MBB) {
+ if (!shouldCSE(MI.getOpcode()))
+ continue;
+ LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI << "\n";);
+ insertInstr(&MI);
+ }
+ }
+}
+
+void GISelCSEInfo::releaseMemory() {
+ // print();
+ CSEMap.clear();
+ InstrMapping.clear();
+ UniqueInstrAllocator.Reset();
+ TemporaryInsts.clear();
+ CSEOpt.reset();
+ MRI = nullptr;
+ MF = nullptr;
+#ifndef NDEBUG
+ OpcodeHitTable.clear();
+#endif
+}
+
+void GISelCSEInfo::print() {
+#ifndef NDEBUG
+ for (auto &It : OpcodeHitTable) {
+ dbgs() << "CSE Count for Opc " << It.first << " : " << It.second << "\n";
+ };
+#endif
+}
+/// -----------------------------------------
+// ---- Profiling methods for FoldingSetNode --- //
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeID(const MachineInstr *MI) const {
+ addNodeIDMBB(MI->getParent());
+ addNodeIDOpcode(MI->getOpcode());
+ for (auto &Op : MI->operands())
+ addNodeIDMachineOperand(Op);
+ addNodeIDFlag(MI->getFlags());
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDOpcode(unsigned Opc) const {
+ ID.AddInteger(Opc);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const LLT &Ty) const {
+ uint64_t Val = Ty.getUniqueRAWLLTData();
+ ID.AddInteger(Val);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const TargetRegisterClass *RC) const {
+ ID.AddPointer(RC);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const RegisterBank *RB) const {
+ ID.AddPointer(RB);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDImmediate(int64_t Imm) const {
+ ID.AddInteger(Imm);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegNum(unsigned Reg) const {
+ ID.AddInteger(Reg);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const unsigned Reg) const {
+ addNodeIDMachineOperand(MachineOperand::CreateReg(Reg, false));
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDMBB(const MachineBasicBlock *MBB) const {
+ ID.AddPointer(MBB);
+ return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
+ if (Flag)
+ ID.AddInteger(Flag);
+ return *this;
+}
+
+const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
+ const MachineOperand &MO) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (!MO.isDef())
+ addNodeIDRegNum(Reg);
+ LLT Ty = MRI.getType(Reg);
+ if (Ty.isValid())
+ addNodeIDRegType(Ty);
+ auto *RB = MRI.getRegBankOrNull(Reg);
+ if (RB)
+ addNodeIDRegType(RB);
+ auto *RC = MRI.getRegClassOrNull(Reg);
+ if (RC)
+ addNodeIDRegType(RC);
+ assert(!MO.isImplicit() && "Unhandled case");
+ } else if (MO.isImm())
+ ID.AddInteger(MO.getImm());
+ else if (MO.isCImm())
+ ID.AddPointer(MO.getCImm());
+ else if (MO.isFPImm())
+ ID.AddPointer(MO.getFPImm());
+ else if (MO.isPredicate())
+ ID.AddInteger(MO.getPredicate());
+ else
+ llvm_unreachable("Unhandled operand type");
+ // Handle other types
+ return *this;
+}
+
+GISelCSEInfo &GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfig> CSEOpt,
+ bool Recompute) {
+ if (!AlreadyComputed || Recompute) {
+ Info.setCSEConfig(std::move(CSEOpt));
+ Info.analyze(*MF);
+ AlreadyComputed = true;
+ }
+ return Info;
+}
+void GISelCSEAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool GISelCSEAnalysisWrapperPass::runOnMachineFunction(MachineFunction &MF) {
+ releaseMemory();
+ Wrapper.setMF(MF);
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
new file mode 100644
index 000000000000..863efe0c3e34
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -0,0 +1,231 @@
+//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the CSEMIRBuilder class which CSEs as it builds
+/// instructions.
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+
+using namespace llvm;
+
+bool CSEMIRBuilder::dominates(MachineBasicBlock::const_iterator A,
+ MachineBasicBlock::const_iterator B) const {
+ auto MBBEnd = getMBB().end();
+ if (B == MBBEnd)
+ return true;
+ assert(A->getParent() == B->getParent() &&
+ "Iterators should be in same block");
+ const MachineBasicBlock *BBA = A->getParent();
+ MachineBasicBlock::const_iterator I = BBA->begin();
+ for (; &*I != A && &*I != B; ++I)
+ ;
+ return &*I == A;
+}
+
+MachineInstrBuilder
+CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
+ void *&NodeInsertPos) {
+ GISelCSEInfo *CSEInfo = getCSEInfo();
+ assert(CSEInfo && "Can't get here without setting CSEInfo");
+ MachineBasicBlock *CurMBB = &getMBB();
+ MachineInstr *MI =
+ CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos);
+ if (MI) {
+ auto CurrPos = getInsertPt();
+ if (!dominates(MI, CurrPos))
+ CurMBB->splice(CurrPos, CurMBB, MI);
+ return MachineInstrBuilder(getMF(), MI);
+ }
+ return MachineInstrBuilder();
+}
+
+bool CSEMIRBuilder::canPerformCSEForOpc(unsigned Opc) const {
+ const GISelCSEInfo *CSEInfo = getCSEInfo();
+ if (!CSEInfo || !CSEInfo->shouldCSE(Opc))
+ return false;
+ return true;
+}
+
+void CSEMIRBuilder::profileDstOp(const DstOp &Op,
+ GISelInstProfileBuilder &B) const {
+ switch (Op.getDstOpKind()) {
+ case DstOp::DstType::Ty_RC:
+ B.addNodeIDRegType(Op.getRegClass());
+ break;
+ default:
+ B.addNodeIDRegType(Op.getLLTTy(*getMRI()));
+ break;
+ }
+}
+
+void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
+ GISelInstProfileBuilder &B) const {
+ switch (Op.getSrcOpKind()) {
+ case SrcOp::SrcType::Ty_Predicate:
+ B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
+ break;
+ default:
+ B.addNodeIDRegType(Op.getReg());
+ break;
+ }
+}
+
+void CSEMIRBuilder::profileMBBOpcode(GISelInstProfileBuilder &B,
+ unsigned Opc) const {
+ // First add the MBB (Local CSE).
+ B.addNodeIDMBB(&getMBB());
+ // Then add the opcode.
+ B.addNodeIDOpcode(Opc);
+}
+
+void CSEMIRBuilder::profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flags,
+ GISelInstProfileBuilder &B) const {
+
+ profileMBBOpcode(B, Opc);
+ // Then add the DstOps.
+ profileDstOps(DstOps, B);
+ // Then add the SrcOps.
+ profileSrcOps(SrcOps, B);
+ // Add Flags if passed in.
+ if (Flags)
+ B.addNodeIDFlag(*Flags);
+}
+
+MachineInstrBuilder CSEMIRBuilder::memoizeMI(MachineInstrBuilder MIB,
+ void *NodeInsertPos) {
+ assert(canPerformCSEForOpc(MIB->getOpcode()) &&
+ "Attempting to CSE illegal op");
+ MachineInstr *MIBInstr = MIB;
+ getCSEInfo()->insertInstr(MIBInstr, NodeInsertPos);
+ return MIB;
+}
+
+bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) {
+ if (DstOps.size() == 1)
+ return true; // always possible to emit copy to just 1 vreg.
+
+ return std::all_of(DstOps.begin(), DstOps.end(), [](const DstOp &Op) {
+ DstOp::DstType DT = Op.getDstOpKind();
+ return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC;
+ });
+}
+
+MachineInstrBuilder
+CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
+ MachineInstrBuilder &MIB) {
+ assert(checkCopyToDefsPossible(DstOps) &&
+ "Impossible return a single MIB with copies to multiple defs");
+ if (DstOps.size() == 1) {
+ const DstOp &Op = DstOps[0];
+ if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
+ return buildCopy(Op.getReg(), MIB->getOperand(0).getReg());
+ }
+ return MIB;
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
+ ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flag) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM: {
+ // Try to constant fold these.
+ assert(SrcOps.size() == 2 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
+ SrcOps[1].getReg(), *getMRI()))
+ return buildConstant(DstOps[0], Cst->getSExtValue());
+ break;
+ }
+ }
+ bool CanCopy = checkCopyToDefsPossible(DstOps);
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ // If we can CSE this instruction, but involves generating copies to multiple
+ // regs, give up. This frequently happens to UNMERGEs.
+ if (!CanCopy) {
+ auto MIB = MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ // CSEInfo would have tracked this instruction. Remove it from the temporary
+ // insts.
+ getCSEInfo()->handleRemoveInst(&*MIB);
+ return MIB;
+ }
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileEverything(Opc, DstOps, SrcOps, Flag, ProfBuilder);
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired(DstOps, MIB);
+ }
+ // This instruction does not exist in the CSEInfo. Build it and CSE it.
+ MachineInstrBuilder NewMIB =
+ MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+ return memoizeMI(NewMIB, InsertPos);
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
+ const ConstantInt &Val) {
+ constexpr unsigned Opc = TargetOpcode::G_CONSTANT;
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildConstant(Res, Val);
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileMBBOpcode(ProfBuilder, Opc);
+ profileDstOp(Res, ProfBuilder);
+ ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateCImm(&Val));
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired({Res}, MIB);
+ }
+ MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val);
+ return memoizeMI(NewMIB, InsertPos);
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res,
+ const ConstantFP &Val) {
+ constexpr unsigned Opc = TargetOpcode::G_FCONSTANT;
+ if (!canPerformCSEForOpc(Opc))
+ return MachineIRBuilder::buildFConstant(Res, Val);
+ FoldingSetNodeID ID;
+ GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+ void *InsertPos = nullptr;
+ profileMBBOpcode(ProfBuilder, Opc);
+ profileDstOp(Res, ProfBuilder);
+ ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateFPImm(&Val));
+ MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+ if (MIB) {
+ // Handle generating copies here.
+ return generateCopiesIfRequired({Res}, MIB);
+ }
+ MachineInstrBuilder NewMIB = MachineIRBuilder::buildFConstant(Res, Val);
+ return memoizeMI(NewMIB, InsertPos);
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 07de31bec660..724ecedf3b3f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -23,6 +23,8 @@
using namespace llvm;
+void CallLowering::anchor() {}
+
bool CallLowering::lowerCall(
MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
@@ -164,7 +166,6 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
// nop in big-endian situations.
return ValReg;
case CCValAssign::AExt: {
- assert(!VA.getLocVT().isVector() && "unexpected vector extend");
auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg);
return MIB->getOperand(0).getReg();
}
@@ -181,3 +182,5 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
}
llvm_unreachable("unable to extend register");
}
+
+void CallLowering::ValueHandler::anchor() {}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 0bc5b87de150..45b0e36fd7d9 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -1,4 +1,4 @@
-//===-- lib/CodeGen/GlobalISel/GICombiner.cpp -----------------------===//
+//===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
-#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -25,20 +28,76 @@
using namespace llvm;
+namespace {
+/// This class acts as the glue the joins the CombinerHelper to the overall
+/// Combine algorithm. The CombinerHelper is intended to report the
+/// modifications it makes to the MIR to the GISelChangeObserver and the
+/// observer subclass will act on these events. In this case, instruction
+/// erasure will cancel any future visits to the erased instruction and
+/// instruction creation will schedule that instruction for a future visit.
+/// Other Combiner implementations may require more complex behaviour from
+/// their GISelChangeObserver subclass.
+class WorkListMaintainer : public GISelChangeObserver {
+ using WorkListTy = GISelWorkList<512>;
+ WorkListTy &WorkList;
+ /// The instructions that have been created but we want to report once they
+ /// have their operands. This is only maintained if debug output is requested.
+ SmallPtrSet<const MachineInstr *, 4> CreatedInstrs;
+
+public:
+ WorkListMaintainer(WorkListTy &WorkList)
+ : GISelChangeObserver(), WorkList(WorkList) {}
+ virtual ~WorkListMaintainer() {
+ }
+
+ void erasingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Erased: " << MI << "\n");
+ WorkList.remove(&MI);
+ }
+ void createdInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n");
+ WorkList.insert(&MI);
+ LLVM_DEBUG(CreatedInstrs.insert(&MI));
+ }
+ void changingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n");
+ WorkList.insert(&MI);
+ }
+ void changedInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n");
+ WorkList.insert(&MI);
+ }
+
+ void reportFullyCreatedInstrs() {
+ LLVM_DEBUG(for (const auto *MI
+ : CreatedInstrs) {
+ dbgs() << "Created: ";
+ MI->print(dbgs());
+ });
+ LLVM_DEBUG(CreatedInstrs.clear());
+ }
+};
+}
+
Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC)
: CInfo(Info), TPC(TPC) {
(void)this->TPC; // FIXME: Remove when used.
}
-bool Combiner::combineMachineInstrs(MachineFunction &MF) {
+bool Combiner::combineMachineInstrs(MachineFunction &MF,
+ GISelCSEInfo *CSEInfo) {
// If the ISel pipeline failed, do not bother running this pass.
// FIXME: Should this be here or in individual combiner passes.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
+ Builder =
+ CSEInfo ? make_unique<CSEMIRBuilder>() : make_unique<MachineIRBuilder>();
MRI = &MF.getRegInfo();
- Builder.setMF(MF);
+ Builder->setMF(MF);
+ if (CSEInfo)
+ Builder->setCSEInfo(CSEInfo);
LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n');
@@ -46,6 +105,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
bool MFChanged = false;
bool Changed;
+ MachineIRBuilder &B = *Builder.get();
do {
// Collect all instructions. Do a post order traversal for basic blocks and
@@ -53,6 +113,11 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
// down RPOT.
Changed = false;
GISelWorkList<512> WorkList;
+ WorkListMaintainer Observer(WorkList);
+ GISelObserverWrapper WrapperObserver(&Observer);
+ if (CSEInfo)
+ WrapperObserver.addObserver(CSEInfo);
+ RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
for (MachineBasicBlock *MBB : post_order(&MF)) {
if (MBB->empty())
continue;
@@ -71,8 +136,9 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
// Main Loop. Process the instructions here.
while (!WorkList.empty()) {
MachineInstr *CurrInst = WorkList.pop_back_val();
- LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";);
- Changed |= CInfo.combine(*CurrInst, Builder);
+ LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
+ Changed |= CInfo.combine(WrapperObserver, *CurrInst, B);
+ Observer.reportFullyCreatedInstrs();
}
MFChanged |= Changed;
} while (Changed);
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 44e904a6391b..b1c5670a6dec 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1,4 +1,4 @@
-//== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == //
+//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,44 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
-#define DEBUG_TYPE "gi-combine"
+#define DEBUG_TYPE "gi-combiner"
using namespace llvm;
-CombinerHelper::CombinerHelper(MachineIRBuilder &B) :
- Builder(B), MRI(Builder.getMF().getRegInfo()) {}
+CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
+ MachineIRBuilder &B)
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
+
+void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
+ unsigned ToReg) const {
+ Observer.changingAllUsesOfReg(MRI, FromReg);
+
+ if (MRI.constrainRegAttrs(ToReg, FromReg))
+ MRI.replaceRegWith(FromReg, ToReg);
+ else
+ Builder.buildCopy(ToReg, FromReg);
+
+ Observer.finishedChangingAllUsesOfReg();
+}
+
+void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
+ MachineOperand &FromRegOp,
+ unsigned ToReg) const {
+ assert(FromRegOp.getParent() && "Expected an operand in an MI");
+ Observer.changingInstr(*FromRegOp.getParent());
+
+ FromRegOp.setReg(ToReg);
+
+ Observer.changedInstr(*FromRegOp.getParent());
+}
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::COPY)
@@ -30,12 +57,279 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
// a(sx) = COPY b(sx) -> Replace all uses of a with b.
if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) {
MI.eraseFromParent();
- MRI.replaceRegWith(DstReg, SrcReg);
+ replaceRegWith(MRI, DstReg, SrcReg);
return true;
}
return false;
}
+namespace {
+struct PreferredTuple {
+ LLT Ty; // The result type of the extend.
+ unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
+ MachineInstr *MI;
+};
+
+/// Select a preference between two uses. CurrentUse is the current preference
+/// while *ForCandidate is attributes of the candidate under consideration.
+PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
+ const LLT &TyForCandidate,
+ unsigned OpcodeForCandidate,
+ MachineInstr *MIForCandidate) {
+ if (!CurrentUse.Ty.isValid()) {
+ if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
+ CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ return CurrentUse;
+ }
+
+ // We permit the extend to hoist through basic blocks but this is only
+ // sensible if the target has extending loads. If you end up lowering back
+ // into a load and extend during the legalizer then the end result is
+ // hoisting the extend up to the load.
+
+ // Prefer defined extensions to undefined extensions as these are more
+ // likely to reduce the number of instructions.
+ if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
+ CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
+ return CurrentUse;
+ else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
+ OpcodeForCandidate != TargetOpcode::G_ANYEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+
+ // Prefer sign extensions to zero extensions as sign-extensions tend to be
+ // more expensive.
+ if (CurrentUse.Ty == TyForCandidate) {
+ if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
+ OpcodeForCandidate == TargetOpcode::G_ZEXT)
+ return CurrentUse;
+ else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
+ OpcodeForCandidate == TargetOpcode::G_SEXT)
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ }
+
+ // This is potentially target specific. We've chosen the largest type
+ // because G_TRUNC is usually free. One potential catch with this is that
+ // some targets have a reduced number of larger registers than smaller
+ // registers and this choice potentially increases the live-range for the
+ // larger value.
+ if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
+ return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+ }
+ return CurrentUse;
+}
+
+/// Find a suitable place to insert some instructions and insert them. This
+/// function accounts for special cases like inserting before a PHI node.
+/// The current strategy for inserting before PHI's is to duplicate the
+/// instructions for each predecessor. However, while that's ok for G_TRUNC
+/// on most targets since it generally requires no code, other targets/cases may
+/// want to try harder to find a dominating block.
+static void InsertInsnsWithoutSideEffectsBeforeUse(
+ MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
+ std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)>
+ Inserter) {
+ MachineInstr &UseMI = *UseMO.getParent();
+
+ MachineBasicBlock *InsertBB = UseMI.getParent();
+
+ // If the use is a PHI then we want the predecessor block instead.
+ if (UseMI.isPHI()) {
+ MachineOperand *PredBB = std::next(&UseMO);
+ InsertBB = PredBB->getMBB();
+ }
+
+ // If the block is the same block as the def then we want to insert just after
+ // the def instead of at the start of the block.
+ if (InsertBB == DefMI.getParent()) {
+ MachineBasicBlock::iterator InsertPt = &DefMI;
+ Inserter(InsertBB, std::next(InsertPt));
+ return;
+ }
+
+ // Otherwise we want the start of the BB
+ Inserter(InsertBB, InsertBB->getFirstNonPHI());
+}
+} // end anonymous namespace
+
+bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
+ struct InsertionPoint {
+ MachineOperand *UseMO;
+ MachineBasicBlock *InsertIntoBB;
+ MachineBasicBlock::iterator InsertBefore;
+ InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore)
+ : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) {
+ }
+ };
+
+ // We match the loads and follow the uses to the extend instead of matching
+ // the extends and following the def to the load. This is because the load
+ // must remain in the same position for correctness (unless we also add code
+ // to find a safe place to sink it) whereas the extend is freely movable.
+ // It also prevents us from duplicating the load for the volatile case or just
+ // for performance.
+
+ if (MI.getOpcode() != TargetOpcode::G_LOAD &&
+ MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
+ MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
+ return false;
+
+ auto &LoadValue = MI.getOperand(0);
+ assert(LoadValue.isReg() && "Result wasn't a register?");
+
+ LLT LoadValueTy = MRI.getType(LoadValue.getReg());
+ if (!LoadValueTy.isScalar())
+ return false;
+
+ // Find the preferred type aside from the any-extends (unless it's the only
+ // one) and non-extending ops. We'll emit an extending load to that type and
+ // and emit a variant of (extend (trunc X)) for the others according to the
+ // relative type sizes. At the same time, pick an extend to use based on the
+ // extend involved in the chosen type.
+ unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
+ ? TargetOpcode::G_ANYEXT
+ : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
+ PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr};
+ for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
+ if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
+ UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
+ UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
+ Preferred = ChoosePreferredUse(Preferred,
+ MRI.getType(UseMI.getOperand(0).getReg()),
+ UseMI.getOpcode(), &UseMI);
+ }
+ }
+
+ // There were no extends
+ if (!Preferred.MI)
+ return false;
+ // It should be impossible to chose an extend without selecting a different
+ // type since by definition the result of an extend is larger.
+ assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
+
+ LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
+
+ // Rewrite the load to the chosen extending load.
+ unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+ Observer.changingInstr(MI);
+ MI.setDesc(
+ Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
+ ? TargetOpcode::G_SEXTLOAD
+ : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
+ ? TargetOpcode::G_ZEXTLOAD
+ : TargetOpcode::G_LOAD));
+
+ // Rewrite all the uses to fix up the types.
+ SmallVector<MachineInstr *, 1> ScheduleForErase;
+ SmallVector<InsertionPoint, 4> ScheduleForInsert;
+ for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
+ MachineInstr *UseMI = UseMO.getParent();
+
+ // If the extend is compatible with the preferred extend then we should fix
+ // up the type and extend so that it uses the preferred use.
+ if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
+ UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
+ unsigned UseDstReg = UseMI->getOperand(0).getReg();
+ MachineOperand &UseSrcMO = UseMI->getOperand(1);
+ const LLT &UseDstTy = MRI.getType(UseDstReg);
+ if (UseDstReg != ChosenDstReg) {
+ if (Preferred.Ty == UseDstTy) {
+ // If the use has the same type as the preferred use, then merge
+ // the vregs and erase the extend. For example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s32) = G_SEXT %1(s8)
+ // %3:_(s32) = G_ANYEXT %1(s8)
+ // ... = ... %3(s32)
+ // rewrites to:
+ // %2:_(s32) = G_SEXTLOAD ...
+ // ... = ... %2(s32)
+ replaceRegWith(MRI, UseDstReg, ChosenDstReg);
+ ScheduleForErase.push_back(UseMO.getParent());
+ } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
+ // If the preferred size is smaller, then keep the extend but extend
+ // from the result of the extending load. For example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s32) = G_SEXT %1(s8)
+ // %3:_(s64) = G_ANYEXT %1(s8)
+ // ... = ... %3(s64)
+ /// rewrites to:
+ // %2:_(s32) = G_SEXTLOAD ...
+ // %3:_(s64) = G_ANYEXT %2:_(s32)
+ // ... = ... %3(s64)
+ replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
+ } else {
+ // If the preferred size is large, then insert a truncate. For
+ // example:
+ // %1:_(s8) = G_LOAD ...
+ // %2:_(s64) = G_SEXT %1(s8)
+ // %3:_(s32) = G_ZEXT %1(s8)
+ // ... = ... %3(s32)
+ /// rewrites to:
+ // %2:_(s64) = G_SEXTLOAD ...
+ // %4:_(s8) = G_TRUNC %2:_(s32)
+ // %3:_(s64) = G_ZEXT %2:_(s8)
+ // ... = ... %3(s64)
+ InsertInsnsWithoutSideEffectsBeforeUse(
+ Builder, MI, UseMO,
+ [&](MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore) {
+ ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
+ });
+ }
+ continue;
+ }
+ // The use is (one of) the uses of the preferred use we chose earlier.
+ // We're going to update the load to def this value later so just erase
+ // the old extend.
+ ScheduleForErase.push_back(UseMO.getParent());
+ continue;
+ }
+
+ // The use isn't an extend. Truncate back to the type we originally loaded.
+ // This is free on many targets.
+ InsertInsnsWithoutSideEffectsBeforeUse(
+ Builder, MI, UseMO,
+ [&](MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore) {
+ ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
+ });
+ }
+
+ DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+ for (auto &InsertionInfo : ScheduleForInsert) {
+ MachineOperand *UseMO = InsertionInfo.UseMO;
+ MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB;
+ MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore;
+
+ MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
+ if (PreviouslyEmitted) {
+ Observer.changingInstr(*UseMO->getParent());
+ UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
+ Observer.changedInstr(*UseMO->getParent());
+ continue;
+ }
+
+ Builder.setInsertPt(*InsertIntoBB, InsertBefore);
+ unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+ MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+ EmittedInsns[InsertIntoBB] = NewMI;
+ replaceRegOpWith(MRI, *UseMO, NewDstReg);
+ }
+ for (auto &EraseMI : ScheduleForErase) {
+ Observer.erasingInstr(*EraseMI);
+ EraseMI->eraseFromParent();
+ }
+ MI.getOperand(0).setReg(ChosenDstReg);
+ Observer.changedInstr(MI);
+
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
- return tryCombineCopy(MI);
+ if (tryCombineCopy(MI))
+ return true;
+ return tryCombineExtendingLoads(MI);
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
new file mode 100644
index 000000000000..c693acbbf10b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -0,0 +1,40 @@
+//===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file constains common code to combine machine functions at generic
+// level.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+void GISelChangeObserver::changingAllUsesOfReg(
+ const MachineRegisterInfo &MRI, unsigned Reg) {
+ for (auto &ChangingMI : MRI.use_instructions(Reg)) {
+ changingInstr(ChangingMI);
+ ChangingAllUsesOfReg.insert(&ChangingMI);
+ }
+}
+
+void GISelChangeObserver::finishedChangingAllUsesOfReg() {
+ for (auto *ChangedMI : ChangingAllUsesOfReg)
+ changedInstr(*ChangedMI);
+}
+
+RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF,
+ MachineFunction::Delegate *Del)
+ : MF(MF), Delegate(Del) {
+ // Register this as the delegate for handling insertions and deletions of
+ // instructions.
+ MF.setDelegate(Del);
+}
+
+RAIIDelegateInstaller::~RAIIDelegateInstaller() { MF.resetDelegate(Delegate); }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 75496fba0449..95f6274aa068 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -75,11 +76,16 @@
using namespace llvm;
+static cl::opt<bool>
+ EnableCSEInIRTranslator("enable-cse-in-irtranslator",
+ cl::desc("Should enable CSE in irtranslator"),
+ cl::Optional, cl::init(false));
char IRTranslator::ID = 0;
INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
@@ -104,9 +110,44 @@ IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
}
+#ifndef NDEBUG
+namespace {
+/// Verify that every instruction created has the same DILocation as the
+/// instruction being translated.
+class DILocationVerifier : public GISelChangeObserver {
+ const Instruction *CurrInst = nullptr;
+
+public:
+ DILocationVerifier() = default;
+ ~DILocationVerifier() = default;
+
+ const Instruction *getCurrentInst() const { return CurrInst; }
+ void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
+
+ void erasingInstr(MachineInstr &MI) override {}
+ void changingInstr(MachineInstr &MI) override {}
+ void changedInstr(MachineInstr &MI) override {}
+
+ void createdInstr(MachineInstr &MI) override {
+ assert(getCurrentInst() && "Inserted instruction without a current MI");
+
+ // Only print the check message if we're actually checking it.
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
+ << " was copied to " << MI);
+#endif
+ assert(CurrInst->getDebugLoc() == MI.getDebugLoc() &&
+ "Line info was not transferred to all instructions");
+ }
+};
+} // namespace
+#endif // ifndef NDEBUG
+
+
void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -185,7 +226,7 @@ ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
unsigned Idx = 0;
while (auto Elt = C.getAggregateElement(Idx++)) {
auto EltRegs = getOrCreateVRegs(*Elt);
- std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs));
+ llvm::copy(EltRegs, std::back_inserter(*VRegs));
}
} else {
assert(SplitTys.size() == 1 && "unexpectedly split LLT");
@@ -279,7 +320,12 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
unsigned Res = getOrCreateVReg(U);
- MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+ auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+ if (isa<Instruction>(U)) {
+ MachineInstr *FBinOpMI = FBinOp.getInstr();
+ const Instruction &I = cast<Instruction>(U);
+ FBinOpMI->copyIRFlags(I);
+ }
return true;
}
@@ -295,6 +341,13 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
}
+bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(1)));
+ return true;
+}
+
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
const CmpInst *CI = dyn_cast<CmpInst>(&U);
@@ -312,8 +365,10 @@ bool IRTranslator::translateCompare(const User &U,
else if (Pred == CmpInst::FCMP_TRUE)
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
- else
- MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+ else {
+ auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+ FCmp->copyIRFlags(*CI);
+ }
return true;
}
@@ -323,14 +378,16 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
const Value *Ret = RI.getReturnValue();
if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
Ret = nullptr;
+
+ ArrayRef<unsigned> VRegs;
+ if (Ret)
+ VRegs = getOrCreateVRegs(*Ret);
+
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
- // FIXME: this interface should simplify when CallLowering gets adapted to
- // multiple VRegs per Value.
- unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0;
- return CLI->lowerReturn(MIRBuilder, Ret, VReg);
+ return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -353,7 +410,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
MIRBuilder.buildBr(TgtBB);
// Link successors.
- for (const BasicBlock *Succ : BrInst.successors())
+ for (const BasicBlock *Succ : successors(&BrInst))
CurBB.addSuccessor(&getMBB(*Succ));
return true;
}
@@ -413,7 +470,7 @@ bool IRTranslator::translateIndirectBr(const User &U,
// Link successors.
MachineBasicBlock &CurBB = MIRBuilder.getMBB();
- for (const BasicBlock *Succ : BrInst.successors())
+ for (const BasicBlock *Succ : successors(&BrInst))
CurBB.addSuccessor(&getMBB(*Succ));
return true;
@@ -544,8 +601,15 @@ bool IRTranslator::translateSelect(const User &U,
ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
- for (unsigned i = 0; i < ResRegs.size(); ++i)
- MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
+ const SelectInst &SI = cast<SelectInst>(U);
+ const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition());
+ for (unsigned i = 0; i < ResRegs.size(); ++i) {
+ auto Select =
+ MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
+ if (Cmp && isa<FPMathOperator>(Cmp)) {
+ Select->copyIRFlags(*Cmp);
+ }
+ }
return true;
}
@@ -704,29 +768,22 @@ void IRTranslator::getStackGuard(unsigned DstReg,
return;
MachinePointerInfo MPInfo(Global);
- MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1);
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
- *MemRefs =
+ MachineMemOperand *MemRef =
MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
DL->getPointerABIAlignment(0));
- MIB.setMemRefs(MemRefs, MemRefs + 1);
+ MIB.setMemRefs({MemRef});
}
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
- auto MIB = MIRBuilder.buildInstr(Op)
- .addDef(ResRegs[0])
- .addDef(ResRegs[1])
- .addUse(getOrCreateVReg(*CI.getOperand(0)))
- .addUse(getOrCreateVReg(*CI.getOperand(1)));
-
- if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
- unsigned Zero = getOrCreateVReg(
- *Constant::getNullValue(Type::getInt1Ty(CI.getContext())));
- MIB.addUse(Zero);
- }
+ MIRBuilder.buildInstr(Op)
+ .addDef(ResRegs[0])
+ .addDef(ResRegs[1])
+ .addUse(getOrCreateVReg(*CI.getOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getOperand(1)));
return true;
}
@@ -763,9 +820,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// instructions (in fact, they get ignored if they *do* exist).
MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
getOrCreateFrameIndex(*AI), DI.getDebugLoc());
- } else
- MIRBuilder.buildDirectDbgValue(getOrCreateVReg(*Address),
- DI.getVariable(), DI.getExpression());
+ } else {
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
+ }
+ return true;
+ }
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
+ assert(DI.getLabel() && "Missing label");
+
+ assert(DI.getLabel()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+
+ MIRBuilder.buildDbgLabel(DI.getLabel());
return true;
}
case Intrinsic::vaend:
@@ -807,55 +878,86 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
case Intrinsic::uadd_with_overflow:
- return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
case Intrinsic::sadd_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
case Intrinsic::usub_with_overflow:
- return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder);
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
case Intrinsic::ssub_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
case Intrinsic::umul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
case Intrinsic::smul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
- case Intrinsic::pow:
- MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
+ case Intrinsic::pow: {
+ auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+ Pow->copyIRFlags(CI);
return true;
- case Intrinsic::exp:
- MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
+ }
+ case Intrinsic::exp: {
+ auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Exp->copyIRFlags(CI);
return true;
- case Intrinsic::exp2:
- MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
+ }
+ case Intrinsic::exp2: {
+ auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Exp2->copyIRFlags(CI);
return true;
- case Intrinsic::log:
- MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
+ }
+ case Intrinsic::log: {
+ auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Log->copyIRFlags(CI);
return true;
- case Intrinsic::log2:
- MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
+ }
+ case Intrinsic::log2: {
+ auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Log2->copyIRFlags(CI);
return true;
- case Intrinsic::fabs:
- MIRBuilder.buildInstr(TargetOpcode::G_FABS)
+ }
+ case Intrinsic::log10: {
+ auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Log10->copyIRFlags(CI);
return true;
- case Intrinsic::fma:
- MIRBuilder.buildInstr(TargetOpcode::G_FMA)
+ }
+ case Intrinsic::fabs: {
+ auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ Fabs->copyIRFlags(CI);
+ return true;
+ }
+ case Intrinsic::trunc:
+ MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::round:
+ MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::fma: {
+ auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA)
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)))
.addUse(getOrCreateVReg(*CI.getArgOperand(2)));
+ FMA->copyIRFlags(CI);
return true;
+ }
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
@@ -867,11 +969,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
// TODO: Revisit this to see if we should move this part of the
// lowering to the combiner.
- MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2);
+ auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2});
+ FMA->copyIRFlags(CI);
} else {
LLT Ty = getLLTForType(*CI.getType(), *DL);
- auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1);
- MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2);
+ auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1});
+ FMul->copyIRFlags(CI);
+ auto FAdd = MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2});
+ FAdd->copyIRFlags(CI);
}
return true;
}
@@ -893,6 +998,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
return true;
}
+ case Intrinsic::is_constant:
+ // If this wasn't constant-folded away by now, then it's not a
+ // constant.
+ MIRBuilder.buildConstant(getOrCreateVReg(CI), 0);
+ return true;
case Intrinsic::stackguard:
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
@@ -902,15 +1012,50 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(GuardVal, MIRBuilder);
AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
+ int FI = getOrCreateFrameIndex(*Slot);
+ MF->getFrameInfo().setStackProtectorIndex(FI);
+
MIRBuilder.buildStore(
GuardVal, getOrCreateVReg(*Slot),
- *MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*MF,
- getOrCreateFrameIndex(*Slot)),
- MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
- PtrTy.getSizeInBits() / 8, 8));
+ *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile,
+ PtrTy.getSizeInBits() / 8, 8));
return true;
}
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz: {
+ ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
+ bool isTrailing = ID == Intrinsic::cttz;
+ unsigned Opcode = isTrailing
+ ? Cst->isZero() ? TargetOpcode::G_CTTZ
+ : TargetOpcode::G_CTTZ_ZERO_UNDEF
+ : Cst->isZero() ? TargetOpcode::G_CTLZ
+ : TargetOpcode::G_CTLZ_ZERO_UNDEF;
+ MIRBuilder.buildInstr(Opcode)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ }
+ case Intrinsic::ctpop: {
+ MIRBuilder.buildInstr(TargetOpcode::G_CTPOP)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ }
+ case Intrinsic::invariant_start: {
+ LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
+ unsigned Undef = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildUndef(Undef);
+ return true;
+ }
+ case Intrinsic::invariant_end:
+ return true;
+ case Intrinsic::ceil:
+ MIRBuilder.buildInstr(TargetOpcode::G_FCEIL)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
}
return false;
}
@@ -1101,7 +1246,6 @@ bool IRTranslator::translateLandingPad(const User &U,
const LandingPadInst &LP = cast<LandingPadInst>(U);
MachineBasicBlock &MBB = MIRBuilder.getMBB();
- addLandingPadInfo(LP, MBB);
MBB.setIsEHPad();
@@ -1279,7 +1423,22 @@ bool IRTranslator::translateExtractElement(const User &U,
}
unsigned Res = getOrCreateVReg(U);
unsigned Val = getOrCreateVReg(*U.getOperand(0));
- unsigned Idx = getOrCreateVReg(*U.getOperand(1));
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
+ unsigned Idx = 0;
+ if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
+ if (CI->getBitWidth() != PreferredVecIdxWidth) {
+ APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
+ auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
+ Idx = getOrCreateVReg(*NewIdxCI);
+ }
+ }
+ if (!Idx)
+ Idx = getOrCreateVReg(*U.getOperand(1));
+ if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
+ const LLT &VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
+ Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx)->getOperand(0).getReg();
+ }
MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
return true;
}
@@ -1299,7 +1458,7 @@ bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
SmallVector<MachineInstr *, 4> Insts;
for (auto Reg : getOrCreateVRegs(PI)) {
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg);
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
Insts.push_back(MIB.getInstr());
}
@@ -1402,9 +1561,18 @@ bool IRTranslator::translateAtomicRMW(const User &U,
}
void IRTranslator::finishPendingPhis() {
+#ifndef NDEBUG
+ DILocationVerifier Verifier;
+ GISelObserverWrapper WrapperObserver(&Verifier);
+ RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+#endif // ifndef NDEBUG
for (auto &Phi : PendingPHIs) {
const PHINode *PI = Phi.first;
ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
+ EntryBuilder->setDebugLoc(PI->getDebugLoc());
+#ifndef NDEBUG
+ Verifier.setCurrentInst(PI);
+#endif // ifndef NDEBUG
// All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
// won't create extra control flow here, otherwise we need to find the
@@ -1442,10 +1610,12 @@ bool IRTranslator::valueIsSplit(const Value &V,
}
bool IRTranslator::translate(const Instruction &Inst) {
- CurBuilder.setDebugLoc(Inst.getDebugLoc());
+ CurBuilder->setDebugLoc(Inst.getDebugLoc());
+ EntryBuilder->setDebugLoc(Inst.getDebugLoc());
switch(Inst.getOpcode()) {
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
- case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: \
+ return translate##OPCODE(Inst, *CurBuilder.get());
#include "llvm/IR/Instruction.def"
default:
return false;
@@ -1454,11 +1624,11 @@ bool IRTranslator::translate(const Instruction &Inst) {
bool IRTranslator::translate(const Constant &C, unsigned Reg) {
if (auto CI = dyn_cast<ConstantInt>(&C))
- EntryBuilder.buildConstant(Reg, *CI);
+ EntryBuilder->buildConstant(Reg, *CI);
else if (auto CF = dyn_cast<ConstantFP>(&C))
- EntryBuilder.buildFConstant(Reg, *CF);
+ EntryBuilder->buildFConstant(Reg, *CF);
else if (isa<UndefValue>(C))
- EntryBuilder.buildUndef(Reg);
+ EntryBuilder->buildUndef(Reg);
else if (isa<ConstantPointerNull>(C)) {
// As we are trying to build a constant val of 0 into a pointer,
// insert a cast to make them correct with respect to types.
@@ -1466,35 +1636,36 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
- EntryBuilder.buildCast(Reg, ZeroReg);
+ EntryBuilder->buildCast(Reg, ZeroReg);
} else if (auto GV = dyn_cast<GlobalValue>(&C))
- EntryBuilder.buildGlobalValue(Reg, GV);
+ EntryBuilder->buildGlobalValue(Reg, GV);
else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
if (!CAZ->getType()->isVectorTy())
return false;
// Return the scalar if it is a <1 x Ty> vector.
if (CAZ->getNumElements() == 1)
return translate(*CAZ->getElementValue(0u), Reg);
- std::vector<unsigned> Ops;
+ SmallVector<unsigned, 4> Ops;
for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
Constant &Elt = *CAZ->getElementValue(i);
Ops.push_back(getOrCreateVReg(Elt));
}
- EntryBuilder.buildMerge(Reg, Ops);
+ EntryBuilder->buildBuildVector(Reg, Ops);
} else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
// Return the scalar if it is a <1 x Ty> vector.
if (CV->getNumElements() == 1)
return translate(*CV->getElementAsConstant(0), Reg);
- std::vector<unsigned> Ops;
+ SmallVector<unsigned, 4> Ops;
for (unsigned i = 0; i < CV->getNumElements(); ++i) {
Constant &Elt = *CV->getElementAsConstant(i);
Ops.push_back(getOrCreateVReg(Elt));
}
- EntryBuilder.buildMerge(Reg, Ops);
+ EntryBuilder->buildBuildVector(Reg, Ops);
} else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
switch(CE->getOpcode()) {
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
- case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: \
+ return translate##OPCODE(*CE, *EntryBuilder.get());
#include "llvm/IR/Instruction.def"
default:
return false;
@@ -1506,9 +1677,9 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
}
- EntryBuilder.buildMerge(Reg, Ops);
+ EntryBuilder->buildBuildVector(Reg, Ops);
} else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
- EntryBuilder.buildBlockAddress(Reg, BA);
+ EntryBuilder->buildBlockAddress(Reg, BA);
} else
return false;
@@ -1525,8 +1696,8 @@ void IRTranslator::finalizeFunction() {
// MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
// to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
// destroying it twice (in ~IRTranslator() and ~LLVMContext())
- EntryBuilder = MachineIRBuilder();
- CurBuilder = MachineIRBuilder();
+ EntryBuilder.reset();
+ CurBuilder.reset();
}
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -1534,12 +1705,30 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
const Function &F = MF->getFunction();
if (F.empty())
return false;
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ // Set the CSEConfig and run the analysis.
+ GISelCSEInfo *CSEInfo = nullptr;
+ TPC = &getAnalysis<TargetPassConfig>();
+ bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None;
+ // Disable CSE for O0.
+ bool EnableCSE = !IsO0 && EnableCSEInIRTranslator;
+ if (EnableCSE) {
+ EntryBuilder = make_unique<CSEMIRBuilder>(CurMF);
+ std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
+ CSEInfo = &Wrapper.get(std::move(Config));
+ EntryBuilder->setCSEInfo(CSEInfo);
+ CurBuilder = make_unique<CSEMIRBuilder>(CurMF);
+ CurBuilder->setCSEInfo(CSEInfo);
+ } else {
+ EntryBuilder = make_unique<MachineIRBuilder>();
+ CurBuilder = make_unique<MachineIRBuilder>();
+ }
CLI = MF->getSubtarget().getCallLowering();
- CurBuilder.setMF(*MF);
- EntryBuilder.setMF(*MF);
+ CurBuilder->setMF(*MF);
+ EntryBuilder->setMF(*MF);
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
- TPC = &getAnalysis<TargetPassConfig>();
ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
assert(PendingPHIs.empty() && "stale PHIs");
@@ -1558,7 +1747,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Setup a separate basic-block for the arguments and constants
MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
MF->push_back(EntryBB);
- EntryBuilder.setMBB(*EntryBB);
+ EntryBuilder->setMBB(*EntryBB);
// Create all blocks, in IR order, to preserve the layout.
for (const BasicBlock &BB: F) {
@@ -1595,7 +1784,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
+ if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
@@ -1612,38 +1801,54 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
assert(VRegs.empty() && "VRegs already populated?");
VRegs.push_back(VArg);
} else {
- unpackRegs(*ArgIt, VArg, EntryBuilder);
+ unpackRegs(*ArgIt, VArg, *EntryBuilder.get());
}
ArgIt++;
}
// Need to visit defs before uses when translating instructions.
- ReversePostOrderTraversal<const Function *> RPOT(&F);
- for (const BasicBlock *BB : RPOT) {
- MachineBasicBlock &MBB = getMBB(*BB);
- // Set the insertion point of all the following translations to
- // the end of this basic block.
- CurBuilder.setMBB(MBB);
-
- for (const Instruction &Inst : *BB) {
- if (translate(Inst))
- continue;
-
- OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
- Inst.getDebugLoc(), BB);
- R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
-
- if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
- std::string InstStrStorage;
- raw_string_ostream InstStr(InstStrStorage);
- InstStr << Inst;
+ GISelObserverWrapper WrapperObserver;
+ if (EnableCSE && CSEInfo)
+ WrapperObserver.addObserver(CSEInfo);
+ {
+ ReversePostOrderTraversal<const Function *> RPOT(&F);
+#ifndef NDEBUG
+ DILocationVerifier Verifier;
+ WrapperObserver.addObserver(&Verifier);
+#endif // ifndef NDEBUG
+ RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+ for (const BasicBlock *BB : RPOT) {
+ MachineBasicBlock &MBB = getMBB(*BB);
+ // Set the insertion point of all the following translations to
+ // the end of this basic block.
+ CurBuilder->setMBB(MBB);
+
+ for (const Instruction &Inst : *BB) {
+#ifndef NDEBUG
+ Verifier.setCurrentInst(&Inst);
+#endif // ifndef NDEBUG
+ if (translate(Inst))
+ continue;
+
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ Inst.getDebugLoc(), BB);
+ R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
+
+ if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << Inst;
+
+ R << ": '" << InstStr.str() << "'";
+ }
- R << ": '" << InstStr.str() << "'";
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
}
-
- reportTranslationError(*MF, *TPC, *ORE, R);
- return false;
}
+#ifndef NDEBUG
+ WrapperObserver.removeObserver(&Verifier);
+#endif
}
finishPendingPhis();
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5e77fcbb0ed9..38913e4afcba 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -80,5 +80,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
return true;
return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
- MI.implicit_operands().begin() == MI.implicit_operands().end();
+ empty(MI.implicit_operands());
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 344f573a67f5..94eab9ae00c8 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -45,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet(
SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit;
return [=](const LegalityQuery &Query) {
TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
- Query.MMODescrs[MMOIdx].Size};
+ Query.MMODescrs[MMOIdx].SizeInBits};
return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) !=
TypesAndMemSize.end();
};
@@ -82,7 +82,7 @@ LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
- return !isPowerOf2_32(Query.MMODescrs[MMOIdx].Size /* In Bytes */);
+ return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8);
};
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 9a2aac998a84..84131e59948c 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -16,6 +16,9 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -32,11 +35,17 @@
using namespace llvm;
+static cl::opt<bool>
+ EnableCSEInLegalizer("enable-cse-in-legalizer",
+ cl::desc("Should enable CSE in Legalizer"),
+ cl::Optional, cl::init(false));
+
char Legalizer::ID = 0;
INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
@@ -47,6 +56,8 @@ Legalizer::Legalizer() : MachineFunctionPass(ID) {
void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addPreserved<GISelCSEAnalysisWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -64,9 +75,54 @@ static bool isArtifact(const MachineInstr &MI) {
case TargetOpcode::G_SEXT:
case TargetOpcode::G_MERGE_VALUES:
case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
return true;
}
}
+using InstListTy = GISelWorkList<256>;
+using ArtifactListTy = GISelWorkList<128>;
+
+namespace {
+class LegalizerWorkListManager : public GISelChangeObserver {
+ InstListTy &InstList;
+ ArtifactListTy &ArtifactList;
+
+public:
+ LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts)
+ : InstList(Insts), ArtifactList(Arts) {}
+
+ void createdInstr(MachineInstr &MI) override {
+ // Only legalize pre-isel generic instructions.
+ // Legalization process could generate Target specific pseudo
+ // instructions with generic types. Don't record them
+ if (isPreISelGenericOpcode(MI.getOpcode())) {
+ if (isArtifact(MI))
+ ArtifactList.insert(&MI);
+ else
+ InstList.insert(&MI);
+ }
+ LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI);
+ }
+
+ void erasingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << ".. .. Erasing: " << MI);
+ InstList.remove(&MI);
+ ArtifactList.remove(&MI);
+ }
+
+ void changingInstr(MachineInstr &MI) override {
+ LLVM_DEBUG(dbgs() << ".. .. Changing MI: " << MI);
+ }
+
+ void changedInstr(MachineInstr &MI) override {
+ // When insts change, we want to revisit them to legalize them again.
+ // We'll consider them the same as created.
+ LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI);
+ createdInstr(MI);
+ }
+};
+} // namespace
bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
@@ -76,15 +132,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
init(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
- LegalizerHelper Helper(MF);
const size_t NumBlocks = MF.size();
MachineRegisterInfo &MRI = MF.getRegInfo();
// Populate Insts
- GISelWorkList<256> InstList;
- GISelWorkList<128> ArtifactList;
+ InstListTy InstList;
+ ArtifactListTy ArtifactList;
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
// Perform legalization bottom up so we can DCE as we legalize.
// Traverse BB in RPOT and within each basic block, add insts top down,
@@ -103,24 +160,34 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
InstList.insert(&MI);
}
}
- Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
- // Only legalize pre-isel generic instructions.
- // Legalization process could generate Target specific pseudo
- // instructions with generic types. Don't record them
- if (isPreISelGenericOpcode(MI->getOpcode())) {
- if (isArtifact(*MI))
- ArtifactList.insert(MI);
- else
- InstList.insert(MI);
- }
- LLVM_DEBUG(dbgs() << ".. .. New MI: " << *MI;);
- });
+ std::unique_ptr<MachineIRBuilder> MIRBuilder;
+ GISelCSEInfo *CSEInfo = nullptr;
+ bool IsO0 = TPC.getOptLevel() == CodeGenOpt::Level::None;
+ // Disable CSE for O0.
+ bool EnableCSE = !IsO0 && EnableCSEInLegalizer;
+ if (EnableCSE) {
+ MIRBuilder = make_unique<CSEMIRBuilder>();
+ std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
+ CSEInfo = &Wrapper.get(std::move(Config));
+ MIRBuilder->setCSEInfo(CSEInfo);
+ } else
+ MIRBuilder = make_unique<MachineIRBuilder>();
+ // This observer keeps the worklist updated.
+ LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
+ // We want both WorkListObserver as well as CSEInfo to observe all changes.
+ // Use the wrapper observer.
+ GISelObserverWrapper WrapperObserver(&WorkListObserver);
+ if (EnableCSE && CSEInfo)
+ WrapperObserver.addObserver(CSEInfo);
+ // Now install the observer as the delegate to MF.
+ // This will keep all the observers notified about new insertions/deletions.
+ RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
+ LegalizerHelper Helper(MF, WrapperObserver, *MIRBuilder.get());
const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
- LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo);
- auto RemoveDeadInstFromLists = [&InstList,
- &ArtifactList](MachineInstr *DeadMI) {
- InstList.remove(DeadMI);
- ArtifactList.remove(DeadMI);
+ LegalizationArtifactCombiner ArtCombiner(*MIRBuilder.get(), MF.getRegInfo(),
+ LInfo);
+ auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
+ WrapperObserver.erasingInstr(*DeadMI);
};
bool Changed = false;
do {
@@ -138,7 +205,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Error out if we couldn't legalize this instruction. We may want to
// fall back to DAG ISel instead in the future.
if (Res == LegalizerHelper::UnableToLegalize) {
- Helper.MIRBuilder.stopRecordingInsertions();
+ Helper.MIRBuilder.stopObservingChanges();
reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
"unable to legalize instruction", MI);
return false;
@@ -149,7 +216,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *ArtifactList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ LLVM_DEBUG(dbgs() << MI << "Is dead\n");
RemoveDeadInstFromLists(&MI);
MI.eraseFromParentAndMarkDBGValuesForRemoval();
continue;
@@ -157,7 +224,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr *, 4> DeadInstructions;
if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
for (auto *DeadMI : DeadInstructions) {
- LLVM_DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI);
+ LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
RemoveDeadInstFromLists(DeadMI);
DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 87086af121b7..b3fc94cdec60 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -15,24 +15,37 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-
#define DEBUG_TYPE "legalizer"
using namespace llvm;
using namespace LegalizeActions;
-LegalizerHelper::LegalizerHelper(MachineFunction &MF)
- : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
+LegalizerHelper::LegalizerHelper(MachineFunction &MF,
+ GISelChangeObserver &Observer,
+ MachineIRBuilder &Builder)
+ : MIRBuilder(Builder), MRI(MF.getRegInfo()),
+ LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
MIRBuilder.setMF(MF);
+ MIRBuilder.setChangeObserver(Observer);
}
+LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
+ GISelChangeObserver &Observer,
+ MachineIRBuilder &B)
+ : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
+ MIRBuilder.setMF(MF);
+ MIRBuilder.setChangeObserver(Observer);
+}
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
@@ -59,8 +72,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
case Custom:
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
- return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
- : UnableToLegalize;
+ return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
+ : UnableToLegalize;
default:
LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
@@ -77,17 +90,20 @@ void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
switch (Opcode) {
case TargetOpcode::G_SDIV:
- assert(Size == 32 && "Unsupported size");
- return RTLIB::SDIV_I32;
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
case TargetOpcode::G_UDIV:
- assert(Size == 32 && "Unsupported size");
- return RTLIB::UDIV_I32;
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
case TargetOpcode::G_SREM:
- assert(Size == 32 && "Unsupported size");
- return RTLIB::SREM_I32;
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
case TargetOpcode::G_UREM:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
assert(Size == 32 && "Unsupported size");
- return RTLIB::UREM_I32;
+ return RTLIB::CTLZ_I32;
case TargetOpcode::G_FADD:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
@@ -184,8 +200,9 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
- case TargetOpcode::G_UREM: {
- Type *HLTy = Type::getInt32Ty(Ctx);
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ Type *HLTy = IntegerType::get(Ctx, Size);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
return Status;
@@ -289,7 +306,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
for (int i = 0; i < NumParts; ++i)
DstRegs.push_back(
MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -319,7 +341,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
CarryIn = CarryOut;
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -375,7 +400,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(SegReg);
}
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -436,7 +465,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
}
assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -462,12 +495,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
unsigned SrcReg = 0;
unsigned Adjustment = i * NarrowSize / 8;
+ unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
- NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
- MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
- MMO.getOrdering(), MMO.getFailureOrdering());
+ NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
+ MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
Adjustment);
@@ -477,7 +510,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstReg);
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -504,12 +540,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
for (int i = 0; i < NumParts; ++i) {
unsigned DstReg = 0;
unsigned Adjustment = i * NarrowSize / 8;
+ unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
- NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
- MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
- MMO.getOrdering(), MMO.getFailureOrdering());
+ NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
+ MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
Adjustment);
@@ -537,11 +573,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstReg);
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_OR: {
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR: {
// Legalize bitwise operation:
// A = BinOp<Ty> B, C
// into:
@@ -580,11 +621,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// Do the operation on each small part.
for (int i = 0; i < NumParts; ++i)
- MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]);
+ MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
+ {SrcsReg1[i], SrcsReg2[i]});
// Gather the destination registers into the final destination.
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -594,7 +639,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned ExtOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
- auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg());
+ auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
MO.setReg(ExtB->getOperand(0).getReg());
}
@@ -603,7 +648,7 @@ void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
MachineOperand &MO = MI.getOperand(OpIdx);
unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt);
+ MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
MO.setReg(DstExt);
}
@@ -614,6 +659,69 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBO: {
+ if (TypeIdx == 1)
+ return UnableToLegalize; // TODO
+ auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
+ {MI.getOperand(2).getReg()});
+ auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
+ {MI.getOperand(3).getReg()});
+ unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
+ ? TargetOpcode::G_ADD
+ : TargetOpcode::G_SUB;
+ // Do the arithmetic in the larger type.
+ auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
+ LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+ APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
+ auto AndOp = MIRBuilder.buildInstr(
+ TargetOpcode::G_AND, {WideTy},
+ {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
+ // There is no overflow if the AndOp is the same as NewOp.
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
+ AndOp);
+ // Now trunc the NewOp to the original result.
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTPOP: {
+ // First ZEXT the input.
+ auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
+ LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
+ if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+ // The count is the same in the larger type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit =
+ APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
+ MIBSrc = MIRBuilder.buildInstr(
+ TargetOpcode::G_OR, {WideTy},
+ {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
+ }
+ // Perform the operation at the larger size.
+ auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+ // This is already the correct result for CTPOP and CTTZs
+ if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
+ MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
+ // The correct result is NewOp - (Difference in widety and current ty).
+ unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
+ MIBNewOp = MIRBuilder.buildInstr(
+ TargetOpcode::G_SUB, {WideTy},
+ {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
+ }
+ auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ // Make the original instruction a trunc now, and update its source.
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
+ MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
@@ -624,87 +732,100 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
// Perform operation at larger width (any extension is fine here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SHL:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
// The "number of bits to shift" operand must preserve its value as an
// unsigned integer:
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ASHR:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
// The "number of bits to shift" operand must preserve its value as an
// unsigned integer:
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UDIV:
case TargetOpcode::G_UREM:
case TargetOpcode::G_LSHR:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- // Perform operation at larger width (any extension is fine here, high bits
- // don't affect the result) and then truncate the result back to the
- // original type.
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changingInstr(MI);
+ if (TypeIdx == 0) {
+ // Perform operation at larger width (any extension is fine here, high
+ // bits don't affect the result) and then truncate the result back to the
+ // original type.
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ } else {
+ // Explicit extension is required here since high bits affect the result.
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ }
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
if (TypeIdx != 0)
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SITOFP:
if (TypeIdx != 1)
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UITOFP:
if (TypeIdx != 1)
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INSERT:
if (TypeIdx != 0)
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_LOAD:
@@ -717,8 +838,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
LLVM_FALLTHROUGH;
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
+ Observer.changingInstr(MI);
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_STORE: {
@@ -726,18 +848,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
WideTy != LLT::scalar(8))
return UnableToLegalize;
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_CONSTANT: {
MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
+ Observer.changingInstr(MI);
SrcMO.setCImm(ConstantInt::get(Ctx, Val));
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FCONSTANT: {
@@ -755,28 +879,38 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
default:
llvm_unreachable("Unhandled fp widen type");
}
+ Observer.changingInstr(MI);
SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_BRCOND:
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FCMP:
+ Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
}
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ICMP:
+ Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
@@ -787,18 +921,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(MI, WideTy, 2, ExtOpcode);
widenScalarSrc(MI, WideTy, 3, ExtOpcode);
}
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_GEP:
assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+ Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
assert(TypeIdx == 0 && "Expecting only Idx 0");
+ Observer.changingInstr(MI);
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
@@ -808,9 +944,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
widenScalarDst(MI, WideTy);
- MIRBuilder.recordInsertion(&MI);
+ Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ if (TypeIdx != 2)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+
+ case TargetOpcode::G_FCEIL:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
}
}
@@ -984,6 +1136,30 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTPOP:
+ return lowerBitCount(MI, TypeIdx, Ty);
+ case G_UADDE: {
+ unsigned Res = MI.getOperand(0).getReg();
+ unsigned CarryOut = MI.getOperand(1).getReg();
+ unsigned LHS = MI.getOperand(2).getReg();
+ unsigned RHS = MI.getOperand(3).getReg();
+ unsigned CarryIn = MI.getOperand(4).getReg();
+
+ unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
+ unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
+
+ MIRBuilder.buildAdd(TmpRes, LHS, RHS);
+ MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
+ MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -993,10 +1169,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
// FIXME: Don't know how to handle secondary types yet.
if (TypeIdx != 0)
return UnableToLegalize;
+
+ MIRBuilder.setInstr(MI);
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
- case TargetOpcode::G_ADD: {
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ SmallVector<unsigned, 2> DstRegs;
+
unsigned NarrowSize = NarrowTy.getSizeInBits();
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Size = MRI.getType(DstReg).getSizeInBits();
@@ -1006,7 +1186,29 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
if (Size % NarrowSize != 0)
return UnableToLegalize;
- MIRBuilder.setInstr(MI);
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUndef(TmpReg);
+ DstRegs.push_back(TmpReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_ADD: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ int NumParts = Size / NarrowSize;
+ // FIXME: Don't know how to handle the situation where the small vectors
+ // aren't all the same size yet.
+ if (Size % NarrowSize != 0)
+ return UnableToLegalize;
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
@@ -1018,9 +1220,164 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
DstRegs.push_back(DstReg);
}
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE: {
+ bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
+ unsigned ValReg = MI.getOperand(0).getReg();
+ unsigned AddrReg = MI.getOperand(1).getReg();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ unsigned Size = MRI.getType(ValReg).getSizeInBits();
+ unsigned NumParts = Size / NarrowSize;
+
+ SmallVector<unsigned, 8> NarrowRegs;
+ if (!IsLoad)
+ extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
+
+ const LLT OffsetTy =
+ LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+ MachineFunction &MF = *MI.getMF();
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
+ unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
+ unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
+ unsigned NewAddrReg = 0;
+ MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
+ MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
+ MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
+ NarrowTy.getSizeInBits() / 8, Alignment);
+ if (IsLoad) {
+ unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
+ NarrowRegs.push_back(Dst);
+ MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
+ } else {
+ MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
+ }
+ }
+ if (IsLoad) {
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
+ else
+ MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
+ }
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ unsigned Opc = MI.getOpcode();
+ auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ auto isSupported = [this](const LegalityQuery &Q) {
+ auto QAction = LI.getAction(Q).Action;
+ return QAction == Legal || QAction == Libcall || QAction == Custom;
+ };
+ switch (Opc) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ // This trivially expands to CTLZ.
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_CTLZ: {
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Len = Ty.getSizeInBits();
+ if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
+ // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
+ auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
+ {Ty}, {SrcReg});
+ auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
+ auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
+ auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ SrcReg, MIBZero);
+ MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
+ MIBCtlzZU);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // for now, we do this:
+ // NewLen = NextPowerOf2(Len);
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // Upto NewLen/2
+ // return Len - popcount(x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ unsigned Op = SrcReg;
+ unsigned NewLen = PowerOf2Ceil(Len);
+ for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
+ auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
+ auto MIBOp = MIRBuilder.buildInstr(
+ TargetOpcode::G_OR, {Ty},
+ {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
+ {Op, MIBShiftAmt})});
+ Op = MIBOp->getOperand(0).getReg();
+ }
+ auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
+ MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
+ {MIRBuilder.buildConstant(Ty, Len), MIBPop});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
+ // This trivially expands to CTTZ.
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_CTTZ: {
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Len = Ty.getSizeInBits();
+ if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
+ // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
+ // zero.
+ auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
+ {Ty}, {SrcReg});
+ auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
+ auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
+ auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ SrcReg, MIBZero);
+ MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
+ MIBCttzZU);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // Ref: "Hacker's Delight" by Henry Warren
+ auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
+ auto MIBNot =
+ MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
+ auto MIBTmp = MIRBuilder.buildInstr(
+ TargetOpcode::G_AND, {Ty},
+ {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
+ {SrcReg, MIBCstNeg1})});
+ if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
+ isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
+ auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
+ MIRBuilder.buildInstr(
+ TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
+ {MIBCstLen,
+ MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
+ MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
+ return Legalized;
+ }
}
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index ae061b64a38c..fa36ede5b976 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -51,7 +52,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
OS << Opcode << ", MMOs={";
for (const auto &MMODescr : MMODescrs) {
- OS << MMODescr.Size << ", ";
+ OS << MMODescr.SizeInBits << ", ";
}
OS << "}";
@@ -219,7 +220,7 @@ void LegalizerInfo::computeTables() {
Opcode, TypeIdx, ElementSize,
moreToWiderTypesAndLessToWidest(NumElementsActions));
}
- llvm::sort(ElementSizesSeen.begin(), ElementSizesSeen.end());
+ llvm::sort(ElementSizesSeen);
SizeChangeStrategy VectorElementSizeChangeStrategy =
&unsupportedForDifferentSizes;
if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
@@ -298,8 +299,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
std::initializer_list<unsigned> Opcodes) {
unsigned Representative = *Opcodes.begin();
- assert(Opcodes.begin() != Opcodes.end() &&
- Opcodes.begin() + 1 != Opcodes.end() &&
+ assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
"Initializer list must have at least two opcodes");
for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I)
@@ -376,7 +376,8 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
}
bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
return false;
}
@@ -584,7 +585,7 @@ const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) {
for (const MachineBasicBlock &MBB : MF)
for (const MachineInstr &MI : MBB)
if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
- return &MI;
+ return &MI;
}
return nullptr;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 3271b54aa830..1f5611061994 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -10,6 +10,7 @@
/// This file implements the MachineIRBuidler class.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -22,73 +23,72 @@
using namespace llvm;
-void MachineIRBuilderBase::setMF(MachineFunction &MF) {
+void MachineIRBuilder::setMF(MachineFunction &MF) {
State.MF = &MF;
State.MBB = nullptr;
State.MRI = &MF.getRegInfo();
State.TII = MF.getSubtarget().getInstrInfo();
State.DL = DebugLoc();
State.II = MachineBasicBlock::iterator();
- State.InsertedInstr = nullptr;
+ State.Observer = nullptr;
}
-void MachineIRBuilderBase::setMBB(MachineBasicBlock &MBB) {
+void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
State.MBB = &MBB;
State.II = MBB.end();
assert(&getMF() == MBB.getParent() &&
"Basic block is in a different function");
}
-void MachineIRBuilderBase::setInstr(MachineInstr &MI) {
+void MachineIRBuilder::setInstr(MachineInstr &MI) {
assert(MI.getParent() && "Instruction is not part of a basic block");
setMBB(*MI.getParent());
State.II = MI.getIterator();
}
-void MachineIRBuilderBase::setInsertPt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator II) {
+void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; }
+
+void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II) {
assert(MBB.getParent() == &getMF() &&
"Basic block is in a different function");
State.MBB = &MBB;
State.II = II;
}
-void MachineIRBuilderBase::recordInsertion(MachineInstr *InsertedInstr) const {
- if (State.InsertedInstr)
- State.InsertedInstr(InsertedInstr);
+void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const {
+ if (State.Observer)
+ State.Observer->createdInstr(*InsertedInstr);
}
-void MachineIRBuilderBase::recordInsertions(
- std::function<void(MachineInstr *)> Inserted) {
- State.InsertedInstr = std::move(Inserted);
+void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) {
+ State.Observer = &Observer;
}
-void MachineIRBuilderBase::stopRecordingInsertions() {
- State.InsertedInstr = nullptr;
-}
+void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; }
//------------------------------------------------------------------------------
// Build instruction variants.
//------------------------------------------------------------------------------
-MachineInstrBuilder MachineIRBuilderBase::buildInstr(unsigned Opcode) {
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
return insertInstr(buildInstrNoInsert(Opcode));
}
-MachineInstrBuilder MachineIRBuilderBase::buildInstrNoInsert(unsigned Opcode) {
+MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode));
return MIB;
}
-MachineInstrBuilder MachineIRBuilderBase::insertInstr(MachineInstrBuilder MIB) {
+MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
getMBB().insert(getInsertPt(), MIB);
recordInsertion(MIB);
return MIB;
}
MachineInstrBuilder
-MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
- const MDNode *Expr) {
+MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(
@@ -99,8 +99,9 @@ MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
/*IsIndirect*/ false, Reg, Variable, Expr));
}
-MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue(
- unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder
+MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(
@@ -111,9 +112,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue(
/*IsIndirect*/ true, Reg, Variable, Expr));
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable,
- const MDNode *Expr) {
+MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
+ const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(
@@ -126,8 +127,9 @@ MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable,
.addMetadata(Expr);
}
-MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue(
- const Constant &C, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
+ const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(
@@ -149,16 +151,24 @@ MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue(
return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
}
-MachineInstrBuilder MachineIRBuilderBase::buildFrameIndex(unsigned Res,
- int Idx) {
+MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
+ assert(isa<DILabel>(Label) && "not a label");
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(State.DL) &&
+ "Expected inlined-at fields to agree");
+ auto MIB = buildInstr(TargetOpcode::DBG_LABEL);
+
+ return MIB.addMetadata(Label);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
return buildInstr(TargetOpcode::G_FRAME_INDEX)
.addDef(Res)
.addFrameIndex(Idx);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) {
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+ const GlobalValue *GV) {
assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
assert(getMRI()->getType(Res).getAddressSpace() ==
GV->getType()->getAddressSpace() &&
@@ -169,17 +179,14 @@ MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) {
.addGlobalAddress(GV);
}
-void MachineIRBuilderBase::validateBinaryOp(unsigned Res, unsigned Op0,
- unsigned Op1) {
- assert((getMRI()->getType(Res).isScalar() ||
- getMRI()->getType(Res).isVector()) &&
- "invalid operand type");
- assert(getMRI()->getType(Res) == getMRI()->getType(Op0) &&
- getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");
+void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
+ const LLT &Op1) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0 && Res == Op1) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0,
- unsigned Op1) {
+MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
+ unsigned Op1) {
assert(getMRI()->getType(Res).isPointer() &&
getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
assert(getMRI()->getType(Op1).isScalar() && "invalid offset type");
@@ -191,8 +198,8 @@ MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0,
}
Optional<MachineInstrBuilder>
-MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0,
- const LLT &ValueTy, uint64_t Value) {
+MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
+ const LLT &ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
assert(ValueTy.isScalar() && "invalid offset type");
@@ -208,9 +215,8 @@ MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0,
return buildGEP(Res, Op0, TmpReg);
}
-MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res,
- unsigned Op0,
- uint32_t NumBits) {
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+ uint32_t NumBits) {
assert(getMRI()->getType(Res).isPointer() &&
getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
@@ -220,24 +226,23 @@ MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res,
.addImm(NumBits);
}
-MachineInstrBuilder MachineIRBuilderBase::buildBr(MachineBasicBlock &Dest) {
+MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilderBase::buildBrIndirect(unsigned Tgt) {
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
-MachineInstrBuilder MachineIRBuilderBase::buildCopy(unsigned Res, unsigned Op) {
- assert(getMRI()->getType(Res) == LLT() || getMRI()->getType(Op) == LLT() ||
- getMRI()->getType(Res) == getMRI()->getType(Op));
- return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::COPY, Res, Op);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) {
- LLT Ty = getMRI()->getType(Res);
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ const ConstantInt &Val) {
+ LLT Ty = Res.getLLTTy(*getMRI());
assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
@@ -246,48 +251,55 @@ MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) {
NewVal = ConstantInt::get(getMF().getFunction().getContext(),
Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
- return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
+ auto MIB = buildInstr(TargetOpcode::G_CONSTANT);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addCImm(NewVal);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilderBase::buildConstant(unsigned Res,
- int64_t Val) {
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ int64_t Val) {
auto IntN = IntegerType::get(getMF().getFunction().getContext(),
- getMRI()->getType(Res).getSizeInBits());
+ Res.getLLTTy(*getMRI()).getSizeInBits());
ConstantInt *CI = ConstantInt::get(IntN, Val, true);
return buildConstant(Res, *CI);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildFConstant(unsigned Res, const ConstantFP &Val) {
- assert(getMRI()->getType(Res).isScalar() && "invalid operand type");
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ const ConstantFP &Val) {
+ assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
- return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val);
+ auto MIB = buildInstr(TargetOpcode::G_FCONSTANT);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addFPImm(&Val);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilderBase::buildFConstant(unsigned Res,
- double Val) {
- LLT DstTy = getMRI()->getType(Res);
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ double Val) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
auto &Ctx = getMF().getFunction().getContext();
auto *CFP =
ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits()));
return buildFConstant(Res, *CFP);
}
-MachineInstrBuilder MachineIRBuilderBase::buildBrCond(unsigned Tst,
- MachineBasicBlock &Dest) {
+MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+ MachineBasicBlock &Dest) {
assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilderBase::buildLoad(unsigned Res, unsigned Addr,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+ MachineMemOperand &MMO) {
return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res,
- unsigned Addr, MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
+ unsigned Res,
+ unsigned Addr,
+ MachineMemOperand &MMO) {
assert(getMRI()->getType(Res).isValid() && "invalid operand type");
assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
@@ -297,9 +309,8 @@ MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res,
.addMemOperand(&MMO);
}
-MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val,
- unsigned Addr,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+ MachineMemOperand &MMO) {
assert(getMRI()->getType(Val).isValid() && "invalid operand type");
assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
@@ -309,83 +320,73 @@ MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val,
.addMemOperand(&MMO);
}
-MachineInstrBuilder MachineIRBuilderBase::buildUAdde(unsigned Res,
- unsigned CarryOut,
- unsigned Op0, unsigned Op1,
- unsigned CarryIn) {
- assert(getMRI()->getType(Res).isScalar() && "invalid operand type");
- assert(getMRI()->getType(Res) == getMRI()->getType(Op0) &&
- getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");
- assert(getMRI()->getType(CarryOut).isScalar() && "invalid operand type");
- assert(getMRI()->getType(CarryOut) == getMRI()->getType(CarryIn) &&
- "type mismatch");
-
- return buildInstr(TargetOpcode::G_UADDE)
- .addDef(Res)
- .addDef(CarryOut)
- .addUse(Op0)
- .addUse(Op1)
- .addUse(CarryIn);
+MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res,
+ const DstOp &CarryOut,
+ const SrcOp &Op0,
+ const SrcOp &Op1,
+ const SrcOp &CarryIn) {
+ return buildInstr(TargetOpcode::G_UADDE, {Res, CarryOut},
+ {Op0, Op1, CarryIn});
}
-MachineInstrBuilder MachineIRBuilderBase::buildAnyExt(unsigned Res,
- unsigned Op) {
- validateTruncExt(Res, Op, true);
- return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildSExt(unsigned Res, unsigned Op) {
- validateTruncExt(Res, Op, true);
- return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildSExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_SEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildZExt(unsigned Res, unsigned Op) {
- validateTruncExt(Res, Op, true);
- return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildExtOrTrunc(unsigned ExtOpc,
- unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
+ const DstOp &Res,
+ const SrcOp &Op) {
assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc ||
TargetOpcode::G_SEXT == ExtOpc) &&
"Expecting Extending Opc");
- assert(getMRI()->getType(Res).isScalar() ||
- getMRI()->getType(Res).isVector());
- assert(getMRI()->getType(Res).isScalar() == getMRI()->getType(Op).isScalar());
+ assert(Res.getLLTTy(*getMRI()).isScalar() ||
+ Res.getLLTTy(*getMRI()).isVector());
+ assert(Res.getLLTTy(*getMRI()).isScalar() ==
+ Op.getLLTTy(*getMRI()).isScalar());
unsigned Opcode = TargetOpcode::COPY;
- if (getMRI()->getType(Res).getSizeInBits() >
- getMRI()->getType(Op).getSizeInBits())
+ if (Res.getLLTTy(*getMRI()).getSizeInBits() >
+ Op.getLLTTy(*getMRI()).getSizeInBits())
Opcode = ExtOpc;
- else if (getMRI()->getType(Res).getSizeInBits() <
- getMRI()->getType(Op).getSizeInBits())
+ else if (Res.getLLTTy(*getMRI()).getSizeInBits() <
+ Op.getLLTTy(*getMRI()).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
else
- assert(getMRI()->getType(Res) == getMRI()->getType(Op));
+ assert(Res.getLLTTy(*getMRI()) == Op.getLLTTy(*getMRI()));
- return buildInstr(Opcode).addDef(Res).addUse(Op);
+ return buildInstr(Opcode, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildSExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildZExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildAnyExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(const DstOp &Res,
+ const SrcOp &Op) {
return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst,
- unsigned Src) {
- LLT SrcTy = getMRI()->getType(Src);
- LLT DstTy = getMRI()->getType(Dst);
+MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
+ const SrcOp &Src) {
+ LLT SrcTy = Src.getLLTTy(*getMRI());
+ LLT DstTy = Dst.getLLTTy(*getMRI());
if (SrcTy == DstTy)
return buildCopy(Dst, Src);
@@ -399,11 +400,11 @@ MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst,
Opcode = TargetOpcode::G_BITCAST;
}
- return buildInstr(Opcode).addDef(Dst).addUse(Src);
+ return buildInstr(Opcode, Dst, Src);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) {
+MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+ uint64_t Index) {
#ifndef NDEBUG
assert(getMRI()->getType(Src).isValid() && "invalid operand type");
assert(getMRI()->getType(Res).isValid() && "invalid operand type");
@@ -424,8 +425,8 @@ MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) {
.addImm(Index);
}
-void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
- ArrayRef<uint64_t> Indices) {
+void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+ ArrayRef<uint64_t> Indices) {
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
assert(!Ops.empty() && "invalid trivial sequence");
@@ -465,56 +466,67 @@ void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
}
}
-MachineInstrBuilder MachineIRBuilderBase::buildUndef(unsigned Res) {
- return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);
+MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
+ return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {});
}
-MachineInstrBuilder MachineIRBuilderBase::buildMerge(unsigned Res,
- ArrayRef<unsigned> Ops) {
-
-#ifndef NDEBUG
- assert(!Ops.empty() && "invalid trivial sequence");
- LLT Ty = getMRI()->getType(Ops[0]);
- for (auto Reg : Ops)
- assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list");
- assert(Ops.size() * getMRI()->getType(Ops[0]).getSizeInBits() ==
- getMRI()->getType(Res).getSizeInBits() &&
- "input operands do not cover output register");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
+ ArrayRef<unsigned> Ops) {
+ // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
+}
- if (Ops.size() == 1)
- return buildCast(Res, Ops[0]);
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
+ const SrcOp &Op) {
+ // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<DstOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
+}
- MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
- MIB.addDef(Res);
- for (unsigned i = 0; i < Ops.size(); ++i)
- MIB.addUse(Ops[i]);
- return MIB;
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+ const SrcOp &Op) {
+ // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<DstOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildUnmerge(ArrayRef<unsigned> Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
+ ArrayRef<unsigned> Ops) {
+ // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
-#ifndef NDEBUG
- assert(!Res.empty() && "invalid trivial sequence");
- LLT Ty = getMRI()->getType(Res[0]);
- for (auto Reg : Res)
- assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list");
- assert(Res.size() * getMRI()->getType(Res[0]).getSizeInBits() ==
- getMRI()->getType(Op).getSizeInBits() &&
- "input operands do not cover output register");
-#endif
+MachineInstrBuilder
+MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
+ ArrayRef<unsigned> Ops) {
+ // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
+}
- MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_UNMERGE_VALUES);
- for (unsigned i = 0; i < Res.size(); ++i)
- MIB.addDef(Res[i]);
- MIB.addUse(Op);
- return MIB;
+MachineInstrBuilder
+MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<unsigned> Ops) {
+ // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec);
}
-MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res,
- unsigned Src, unsigned Op,
- unsigned Index) {
+MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
+ unsigned Op, unsigned Index) {
assert(Index + getMRI()->getType(Op).getSizeInBits() <=
getMRI()->getType(Res).getSizeInBits() &&
"insertion past the end of a register");
@@ -531,9 +543,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res,
.addImm(Index);
}
-MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID,
- unsigned Res,
- bool HasSideEffects) {
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ unsigned Res,
+ bool HasSideEffects) {
auto MIB =
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
: TargetOpcode::G_INTRINSIC);
@@ -543,133 +555,52 @@ MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID,
return MIB;
}
-MachineInstrBuilder MachineIRBuilderBase::buildTrunc(unsigned Res,
- unsigned Op) {
- validateTruncExt(Res, Op, false);
- return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildFPTrunc(unsigned Res,
- unsigned Op) {
- validateTruncExt(Res, Op, false);
- return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res,
+ const SrcOp &Op) {
+ return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op);
}
-MachineInstrBuilder MachineIRBuilderBase::buildICmp(CmpInst::Predicate Pred,
- unsigned Res, unsigned Op0,
- unsigned Op1) {
-#ifndef NDEBUG
- assert(getMRI()->getType(Op0) == getMRI()->getType(Op0) && "type mismatch");
- assert(CmpInst::isIntPredicate(Pred) && "invalid predicate");
- if (getMRI()->getType(Op0).isScalar() || getMRI()->getType(Op0).isPointer())
- assert(getMRI()->getType(Res).isScalar() && "type mismatch");
- else
- assert(getMRI()->getType(Res).isVector() &&
- getMRI()->getType(Res).getNumElements() ==
- getMRI()->getType(Op0).getNumElements() &&
- "type mismatch");
-#endif
-
- return buildInstr(TargetOpcode::G_ICMP)
- .addDef(Res)
- .addPredicate(Pred)
- .addUse(Op0)
- .addUse(Op1);
+MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
+ const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1});
}
-MachineInstrBuilder MachineIRBuilderBase::buildFCmp(CmpInst::Predicate Pred,
- unsigned Res, unsigned Op0,
- unsigned Op1) {
-#ifndef NDEBUG
- assert((getMRI()->getType(Op0).isScalar() ||
- getMRI()->getType(Op0).isVector()) &&
- "invalid operand type");
- assert(getMRI()->getType(Op0) == getMRI()->getType(Op1) && "type mismatch");
- assert(CmpInst::isFPPredicate(Pred) && "invalid predicate");
- if (getMRI()->getType(Op0).isScalar())
- assert(getMRI()->getType(Res).isScalar() && "type mismatch");
- else
- assert(getMRI()->getType(Res).isVector() &&
- getMRI()->getType(Res).getNumElements() ==
- getMRI()->getType(Op0).getNumElements() &&
- "type mismatch");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
+ const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
- return buildInstr(TargetOpcode::G_FCMP)
- .addDef(Res)
- .addPredicate(Pred)
- .addUse(Op0)
- .addUse(Op1);
+ return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1});
}
-MachineInstrBuilder MachineIRBuilderBase::buildSelect(unsigned Res,
- unsigned Tst,
- unsigned Op0,
- unsigned Op1) {
-#ifndef NDEBUG
- LLT ResTy = getMRI()->getType(Res);
- assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
- "invalid operand type");
- assert(ResTy == getMRI()->getType(Op0) && ResTy == getMRI()->getType(Op1) &&
- "type mismatch");
- if (ResTy.isScalar() || ResTy.isPointer())
- assert(getMRI()->getType(Tst).isScalar() && "type mismatch");
- else
- assert((getMRI()->getType(Tst).isScalar() ||
- (getMRI()->getType(Tst).isVector() &&
- getMRI()->getType(Tst).getNumElements() ==
- getMRI()->getType(Op0).getNumElements())) &&
- "type mismatch");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res,
+ const SrcOp &Tst,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
- return buildInstr(TargetOpcode::G_SELECT)
- .addDef(Res)
- .addUse(Tst)
- .addUse(Op0)
- .addUse(Op1);
+ return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1});
}
MachineInstrBuilder
-MachineIRBuilderBase::buildInsertVectorElement(unsigned Res, unsigned Val,
- unsigned Elt, unsigned Idx) {
-#ifndef NDEBUG
- LLT ResTy = getMRI()->getType(Res);
- LLT ValTy = getMRI()->getType(Val);
- LLT EltTy = getMRI()->getType(Elt);
- LLT IdxTy = getMRI()->getType(Idx);
- assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
- assert(IdxTy.isScalar() && "invalid operand type");
- assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
- assert(ResTy.getElementType() == EltTy && "type mismatch");
-#endif
-
- return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT)
- .addDef(Res)
- .addUse(Val)
- .addUse(Elt)
- .addUse(Idx);
+MachineIRBuilder::buildInsertVectorElement(const DstOp &Res, const SrcOp &Val,
+ const SrcOp &Elt, const SrcOp &Idx) {
+ return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT, Res, {Val, Elt, Idx});
}
MachineInstrBuilder
-MachineIRBuilderBase::buildExtractVectorElement(unsigned Res, unsigned Val,
- unsigned Idx) {
-#ifndef NDEBUG
- LLT ResTy = getMRI()->getType(Res);
- LLT ValTy = getMRI()->getType(Val);
- LLT IdxTy = getMRI()->getType(Idx);
- assert(ValTy.isVector() && "invalid operand type");
- assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type");
- assert(IdxTy.isScalar() && "invalid operand type");
- assert(ValTy.getElementType() == ResTy && "type mismatch");
-#endif
-
- return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT)
- .addDef(Res)
- .addUse(Val)
- .addUse(Idx);
+MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val,
+ const SrcOp &Idx) {
+ return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT, Res, {Val, Idx});
}
-MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess(
+MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal,
unsigned NewVal, MachineMemOperand &MMO) {
#ifndef NDEBUG
@@ -697,9 +628,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess(
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
- unsigned CmpVal, unsigned NewVal,
- MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
+ unsigned CmpVal, unsigned NewVal,
+ MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
LLT AddrTy = getMRI()->getType(Addr);
@@ -721,10 +652,11 @@ MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
.addMemOperand(&MMO);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
- unsigned Addr, unsigned Val,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
+ unsigned OldValRes,
+ unsigned Addr,
+ unsigned Val,
+ MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
LLT AddrTy = getMRI()->getType(Addr);
@@ -743,74 +675,75 @@ MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val,
MMO);
}
-MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWOr(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(unsigned OldValRes,
+ unsigned Addr,
+ unsigned Val,
+ MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
+MachineIRBuilder::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
#ifndef NDEBUG
assert(getMRI()->getType(Res).isPointer() && "invalid res type");
#endif
@@ -818,12 +751,9 @@ MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA);
}
-void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
- bool IsExtend) {
+void MachineIRBuilder::validateTruncExt(const LLT &DstTy, const LLT &SrcTy,
+ bool IsExtend) {
#ifndef NDEBUG
- LLT SrcTy = getMRI()->getType(Src);
- LLT DstTy = getMRI()->getType(Dst);
-
if (DstTy.isVector()) {
assert(SrcTy.isVector() && "mismatched cast between vector and non-vector");
assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
@@ -839,3 +769,236 @@ void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
"invalid widening trunc");
#endif
}
+
+void MachineIRBuilder::validateSelectOp(const LLT &ResTy, const LLT &TstTy,
+ const LLT &Op0Ty, const LLT &Op1Ty) {
+#ifndef NDEBUG
+ assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
+ "invalid operand type");
+ assert((ResTy == Op0Ty && ResTy == Op1Ty) && "type mismatch");
+ if (ResTy.isScalar() || ResTy.isPointer())
+ assert(TstTy.isScalar() && "type mismatch");
+ else
+ assert((TstTy.isScalar() ||
+ (TstTy.isVector() &&
+ TstTy.getNumElements() == Op0Ty.getNumElements())) &&
+ "type mismatch");
+#endif
+}
+
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
+ ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ Optional<unsigned> Flags) {
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_SELECT: {
+ assert(DstOps.size() == 1 && "Invalid select");
+ assert(SrcOps.size() == 3 && "Invalid select");
+ validateSelectOp(
+ DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI()));
+ break;
+ }
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM: {
+ // All these are binary ops.
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 2 && "Invalid Srcs");
+ validateBinaryOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()));
+ break;
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()), true);
+ break;
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_FPTRUNC:
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()), false);
+ break;
+ }
+ case TargetOpcode::COPY:
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ assert(DstOps[0].getLLTTy(*getMRI()) == LLT() ||
+ SrcOps[0].getLLTTy(*getMRI()) == LLT() ||
+ DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI()));
+ break;
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_ICMP: {
+ assert(DstOps.size() == 1 && "Invalid Dst Operands");
+ assert(SrcOps.size() == 3 && "Invalid Src Operands");
+ // For F/ICMP, the first src operand is the predicate, followed by
+ // the two comparands.
+ assert(SrcOps[0].getSrcOpKind() == SrcOp::SrcType::Ty_Predicate &&
+ "Expecting predicate");
+ assert([&]() -> bool {
+ CmpInst::Predicate Pred = SrcOps[0].getPredicate();
+ return Opc == TargetOpcode::G_ICMP ? CmpInst::isIntPredicate(Pred)
+ : CmpInst::isFPPredicate(Pred);
+ }() && "Invalid predicate");
+ assert(SrcOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ assert([&]() -> bool {
+ LLT Op0Ty = SrcOps[1].getLLTTy(*getMRI());
+ LLT DstTy = DstOps[0].getLLTTy(*getMRI());
+ if (Op0Ty.isScalar() || Op0Ty.isPointer())
+ return DstTy.isScalar();
+ else
+ return DstTy.isVector() &&
+ DstTy.getNumElements() == Op0Ty.getNumElements();
+ }() && "Type Mismatch");
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ assert(!DstOps.empty() && "Invalid trivial sequence");
+ assert(SrcOps.size() == 1 && "Invalid src for Unmerge");
+ assert(std::all_of(DstOps.begin(), DstOps.end(),
+ [&, this](const DstOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ DstOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in output list");
+ assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input operands do not cover output register");
+ break;
+ }
+ case TargetOpcode::G_MERGE_VALUES: {
+ assert(!SrcOps.empty() && "invalid trivial sequence");
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input operands do not cover output register");
+ if (SrcOps.size() == 1)
+ return buildCast(DstOps[0], SrcOps[0]);
+ if (DstOps[0].getLLTTy(*getMRI()).isVector())
+ return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps);
+ break;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+ assert(DstOps.size() == 1 && "Invalid Dst size");
+ assert(SrcOps.size() == 2 && "Invalid Src size");
+ assert(SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type");
+ assert((DstOps[0].getLLTTy(*getMRI()).isScalar() ||
+ DstOps[0].getLLTTy(*getMRI()).isPointer()) &&
+ "Invalid operand type");
+ assert(SrcOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand type");
+ assert(SrcOps[0].getLLTTy(*getMRI()).getElementType() ==
+ DstOps[0].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ break;
+ }
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ assert(DstOps.size() == 1 && "Invalid dst size");
+ assert(SrcOps.size() == 3 && "Invalid src size");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type");
+ assert(DstOps[0].getLLTTy(*getMRI()).getElementType() ==
+ SrcOps[1].getLLTTy(*getMRI()) &&
+ "Type mismatch");
+ assert(SrcOps[2].getLLTTy(*getMRI()).isScalar() && "Invalid index");
+ assert(DstOps[0].getLLTTy(*getMRI()).getNumElements() ==
+ SrcOps[0].getLLTTy(*getMRI()).getNumElements() &&
+ "Type mismatch");
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR: {
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ "Res type must be a vector");
+ assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input scalars do not exactly cover the outpur vector register");
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ "Res type must be a vector");
+ assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
+ "type mismatch in input list");
+ if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits())
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps);
+ break;
+ }
+ case TargetOpcode::G_CONCAT_VECTORS: {
+ assert(DstOps.size() == 1 && "Invalid DstOps");
+ assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+ "Must have at least 2 operands");
+ assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+ [&, this](const SrcOp &Op) {
+ return (Op.getLLTTy(*getMRI()).isVector() &&
+ Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI()));
+ }) &&
+ "type mismatch in input list");
+ assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "input vectors do not exactly cover the outpur vector register");
+ break;
+ }
+ case TargetOpcode::G_UADDE: {
+ assert(DstOps.size() == 2 && "Invalid no of dst operands");
+ assert(SrcOps.size() == 3 && "Invalid no of src operands");
+ assert(DstOps[0].getLLTTy(*getMRI()).isScalar() && "Invalid operand");
+ assert((DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI())) &&
+ (DstOps[0].getLLTTy(*getMRI()) == SrcOps[1].getLLTTy(*getMRI())) &&
+ "Invalid operand");
+ assert(DstOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand");
+ assert(DstOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) &&
+ "type mismatch");
+ break;
+ }
+ }
+
+ auto MIB = buildInstr(Opc);
+ for (const DstOp &Op : DstOps)
+ Op.addDefToMIB(*getMRI(), MIB);
+ for (const SrcOp &Op : SrcOps)
+ Op.addSrcToMIB(MIB);
+ if (Flags)
+ MIB->setFlags(*Flags);
+ return MIB;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 9e2d48d1dc42..dcc8b7cc23c5 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -115,8 +115,8 @@ bool RegBankSelect::assignmentMatch(
// By default we assume we will have to repair something.
OnlyAssign = false;
// Each part of a break down needs to end up in a different register.
- // In other word, Reg assignement does not match.
- if (ValMapping.NumBreakDowns > 1)
+ // In other word, Reg assignment does not match.
+ if (ValMapping.NumBreakDowns != 1)
return false;
const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
@@ -140,7 +140,7 @@ bool RegBankSelect::repairReg(
return false;
assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
// An empty range of new register means no repairing.
- assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair");
+ assert(!empty(NewVRegs) && "We should not have to repair");
// Assume we are repairing a use and thus, the original reg will be
// the source of the repairing.
@@ -528,7 +528,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
bool RegBankSelect::applyMapping(
MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
- // OpdMapper will hold all the information needed for the rewritting.
+ // OpdMapper will hold all the information needed for the rewriting.
RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI);
// First, place the repairing code.
@@ -714,18 +714,23 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
// - Terminators must be the last instructions:
// * Before, move the insert point before the first terminator.
// * After, we have to split the outcoming edges.
- unsigned Reg = MO.getReg();
if (Before) {
// Check whether Reg is defined by any terminator.
- MachineBasicBlock::iterator It = MI;
- for (auto Begin = MI.getParent()->begin();
- --It != Begin && It->isTerminator();)
- if (It->modifiesRegister(Reg, &TRI)) {
- // Insert the repairing code right after the definition.
- addInsertPoint(*It, /*Before*/ false);
- return;
- }
- addInsertPoint(*It, /*Before*/ true);
+ MachineBasicBlock::reverse_iterator It = MI;
+ auto REnd = MI.getParent()->rend();
+
+ for (; It != REnd && It->isTerminator(); ++It) {
+ assert(!It->modifiesRegister(MO.getReg(), &TRI) &&
+ "copy insertion in middle of terminators not handled");
+ }
+
+ if (It == REnd) {
+ addInsertPoint(*MI.getParent()->begin(), true);
+ return;
+ }
+
+ // We are sure to be right before the first terminator.
+ addInsertPoint(*It, /*Before*/ false);
return;
}
// Make sure Reg is not redefined by other terminators, otherwise
@@ -733,7 +738,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end();
++It != End;)
// The machine verifier should reject this kind of code.
- assert(It->modifiesRegister(Reg, &TRI) && "Do not know where to split");
+ assert(It->modifiesRegister(MO.getReg(), &TRI) &&
+ "Do not know where to split");
// Split each outcoming edges.
MachineBasicBlock &Src = *MI.getParent();
for (auto &Succ : Src.successors())
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index dd15567ef1c1..28404e52d6ea 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -426,7 +426,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
"This mapping is too complex for this function");
iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
OpdMapper.getVRegs(OpIdx);
- if (NewRegs.begin() == NewRegs.end()) {
+ if (empty(NewRegs)) {
LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1a5f88743d5f..59cbf93e7cd1 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -137,7 +137,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
// If we can move an instruction, we can remove it. Otherwise, it has
// a side-effect of some sort.
bool SawStore = false;
- if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore))
+ if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore) && !MI.isPHI())
return false;
// Instructions without side-effects are dead iff they only define dead vregs.
@@ -235,6 +235,57 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
return APF;
}
+Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
+ const unsigned Op2,
+ const MachineRegisterInfo &MRI) {
+ auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
+ if (MaybeOp1Cst && MaybeOp2Cst) {
+ LLT Ty = MRI.getType(Op1);
+ APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
+ APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
+ switch (Opcode) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ return C1 + C2;
+ case TargetOpcode::G_AND:
+ return C1 & C2;
+ case TargetOpcode::G_ASHR:
+ return C1.ashr(C2);
+ case TargetOpcode::G_LSHR:
+ return C1.lshr(C2);
+ case TargetOpcode::G_MUL:
+ return C1 * C2;
+ case TargetOpcode::G_OR:
+ return C1 | C2;
+ case TargetOpcode::G_SHL:
+ return C1 << C2;
+ case TargetOpcode::G_SUB:
+ return C1 - C2;
+ case TargetOpcode::G_XOR:
+ return C1 ^ C2;
+ case TargetOpcode::G_UDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.udiv(C2);
+ case TargetOpcode::G_SDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.sdiv(C2);
+ case TargetOpcode::G_UREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.urem(C2);
+ case TargetOpcode::G_SREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.srem(C2);
+ }
+ }
+ return None;
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 9f7f5e392a9a..d3364952f244 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -461,6 +461,8 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
unsigned CurIdx = 0;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
Type *Ty = Globals[j]->getValueType();
+
+ // Make sure we use the same alignment AsmPrinter would use.
unsigned Align = DL.getPreferredAlignment(Globals[j]);
unsigned Padding = alignTo(MergedSize, Align) - MergedSize;
MergedSize += Padding;
@@ -516,6 +518,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
GlobalVariable::NotThreadLocal, AddrSpace);
MergedGV->setAlignment(MaxAlign);
+ MergedGV->setSection(Globals[i]->getSection());
const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
@@ -599,16 +602,15 @@ bool GlobalMerge::doInitialization(Module &M) {
IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
auto &DL = M.getDataLayout();
- DenseMap<unsigned, SmallVector<GlobalVariable *, 16>> Globals, ConstGlobals,
- BSSGlobals;
+ DenseMap<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 16>>
+ Globals, ConstGlobals, BSSGlobals;
bool Changed = false;
setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
- if (GV.isDeclaration() || GV.isThreadLocal() ||
- GV.hasSection() || GV.hasImplicitSection())
+ if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
continue;
// It's not safe to merge globals that may be preempted
@@ -623,6 +625,7 @@ bool GlobalMerge::doInitialization(Module &M) {
assert(PT && "Global variable is not a pointer!");
unsigned AddressSpace = PT->getAddressSpace();
+ StringRef Section = GV.getSection();
// Ignore all 'special' globals.
if (GV.getName().startswith("llvm.") ||
@@ -636,27 +639,27 @@ bool GlobalMerge::doInitialization(Module &M) {
Type *Ty = GV.getValueType();
if (DL.getTypeAllocSize(Ty) < MaxOffset) {
if (TM &&
- TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
- BSSGlobals[AddressSpace].push_back(&GV);
+ TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS())
+ BSSGlobals[{AddressSpace, Section}].push_back(&GV);
else if (GV.isConstant())
- ConstGlobals[AddressSpace].push_back(&GV);
+ ConstGlobals[{AddressSpace, Section}].push_back(&GV);
else
- Globals[AddressSpace].push_back(&GV);
+ Globals[{AddressSpace, Section}].push_back(&GV);
}
}
for (auto &P : Globals)
if (P.second.size() > 1)
- Changed |= doMerge(P.second, M, false, P.first);
+ Changed |= doMerge(P.second, M, false, P.first.first);
for (auto &P : BSSGlobals)
if (P.second.size() > 1)
- Changed |= doMerge(P.second, M, false, P.first);
+ Changed |= doMerge(P.second, M, false, P.first.first);
if (EnableGlobalMergeOnConst)
for (auto &P : ConstGlobals)
if (P.second.size() > 1)
- Changed |= doMerge(P.second, M, true, P.first);
+ Changed |= doMerge(P.second, M, true, P.first.first);
return Changed;
}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index f12d00071b24..ceeba639ee09 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -273,7 +273,7 @@ namespace {
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> *LaterRedefs = nullptr);
+ SmallSet<MCPhysReg, 4> *LaterRedefs = nullptr);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
bool IgnoreBr = false);
@@ -1366,12 +1366,12 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
// Before stepping forward past MI, remember which regs were live
// before MI. This is needed to set the Undef flag only when reg is
// dead.
- SparseSet<unsigned> LiveBeforeMI;
+ SparseSet<MCPhysReg, identity<MCPhysReg>> LiveBeforeMI;
LiveBeforeMI.setUniverse(TRI->getNumRegs());
for (unsigned Reg : Redefs)
LiveBeforeMI.insert(Reg);
- SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Clobbers;
Redefs.stepForward(MI, Clobbers);
// Now add the implicit uses for each of the clobbered values.
@@ -1444,7 +1444,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
Redefs.init(*TRI);
if (MRI->tracksLiveness()) {
- // Initialize liveins to the first BB. These are potentiall redefined by
+ // Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
Redefs.addLiveIns(CvtMBB);
Redefs.addLiveIns(NextMBB);
@@ -1740,7 +1740,7 @@ bool IfConverter::IfConvertDiamondCommon(
if (MRI->tracksLiveness()) {
for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
- SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Dummy;
Redefs.stepForward(MI, Dummy);
}
}
@@ -1806,13 +1806,13 @@ bool IfConverter::IfConvertDiamondCommon(
// generate:
// sub r0, r1, #1
// addne r0, r1, #1
- SmallSet<unsigned, 4> RedefsByFalse;
- SmallSet<unsigned, 4> ExtUses;
+ SmallSet<MCPhysReg, 4> RedefsByFalse;
+ SmallSet<MCPhysReg, 4> ExtUses;
if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
if (FI.isDebugInstr())
continue;
- SmallVector<unsigned, 4> Defs;
+ SmallVector<MCPhysReg, 4> Defs;
for (const MachineOperand &MO : FI.operands()) {
if (!MO.isReg())
continue;
@@ -1830,7 +1830,7 @@ bool IfConverter::IfConvertDiamondCommon(
}
}
- for (unsigned Reg : Defs) {
+ for (MCPhysReg Reg : Defs) {
if (!ExtUses.count(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
@@ -1976,7 +1976,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
}
static bool MaySpeculate(const MachineInstr &MI,
- SmallSet<unsigned, 4> &LaterRedefs) {
+ SmallSet<MCPhysReg, 4> &LaterRedefs) {
bool SawStore = true;
if (!MI.isSafeToMove(nullptr, SawStore))
return false;
@@ -1999,7 +1999,7 @@ static bool MaySpeculate(const MachineInstr &MI,
void IfConverter::PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> *LaterRedefs) {
+ SmallSet<MCPhysReg, 4> *LaterRedefs) {
bool AnyUnpred = false;
bool MaySpec = LaterRedefs != nullptr;
for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
@@ -2148,7 +2148,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// Calculate the edge probability for the edge from ToBBI.BB to Succ,
// which is a portion of the edge probability from FromMBB to Succ. The
// portion ratio is the edge probability from ToBBI.BB to FromMBB (if
- // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
+ // FromBBI is a successor of ToBBI.BB. See comment below for exception).
NewProb = MBPI->getEdgeProbability(&FromMBB, Succ);
// To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 0a447bc613b1..f411ee6745d0 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -90,7 +90,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// A data type for representing the result computed by \c
/// computeDependence. States whether it is okay to reorder the
/// instruction passed to \c computeDependence with at most one
- /// depednency.
+ /// dependency.
struct DependenceResult {
/// Can we actually re-order \p MI with \p Insts (see \c
/// computeDependence).
@@ -344,11 +344,11 @@ ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
return AR_MayAlias;
continue;
}
- llvm::AliasResult AAResult = AA->alias(
- MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
- MMO1->getAAInfo()),
- MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
- MMO2->getAAInfo()));
+ llvm::AliasResult AAResult =
+ AA->alias(MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
+ MMO1->getAAInfo()),
+ MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
+ MMO2->getAAInfo()));
if (AAResult != NoAlias)
return AR_MayAlias;
}
@@ -360,10 +360,10 @@ ImplicitNullChecks::SuitabilityResult
ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts) {
int64_t Offset;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
- if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
- BaseReg != PointerReg)
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) ||
+ !BaseOp->isReg() || BaseOp->getReg() != PointerReg)
return SR_Unsuitable;
// We want the mem access to be issued at a sane offset from PointerReg,
@@ -651,7 +651,7 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr(
}
}
- MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
return MIB;
}
diff --git a/contrib/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
new file mode 100644
index 000000000000..989fa164ad2d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -0,0 +1,1359 @@
+//===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+//
+// This file defines the interleaved-load-combine pass. The pass searches for
+// ShuffleVectorInstruction that execute interleaving loads. If a matching
+// pattern is found, it adds a combined load and further instructions in a
+// pattern that is detectable by InterleavedAccesPass. The old instructions are
+// left dead to be removed later. The pass is specifically designed to be
+// executed just before InterleavedAccesPass to find any left-over instances
+// that are not detected within former passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <algorithm>
+#include <cassert>
+#include <list>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "interleaved-load-combine"
+
+namespace {
+
+/// Statistic counter
+STATISTIC(NumInterleavedLoadCombine, "Number of combined loads");
+
+/// Option to disable the pass
+static cl::opt<bool> DisableInterleavedLoadCombine(
+ "disable-" DEBUG_TYPE, cl::init(false), cl::Hidden,
+ cl::desc("Disable combining of interleaved loads"));
+
+struct VectorInfo;
+
+struct InterleavedLoadCombineImpl {
+public:
+ InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA,
+ TargetMachine &TM)
+ : F(F), DT(DT), MSSA(MSSA),
+ TLI(*TM.getSubtargetImpl(F)->getTargetLowering()),
+ TTI(TM.getTargetTransformInfo(F)) {}
+
+ /// Scan the function for interleaved load candidates and execute the
+ /// replacement if applicable.
+ bool run();
+
+private:
+ /// Function this pass is working on
+ Function &F;
+
+ /// Dominator Tree Analysis
+ DominatorTree &DT;
+
+ /// Memory Alias Analyses
+ MemorySSA &MSSA;
+
+ /// Target Lowering Information
+ const TargetLowering &TLI;
+
+ /// Target Transform Information
+ const TargetTransformInfo TTI;
+
+ /// Find the instruction in sets LIs that dominates all others, return nullptr
+ /// if there is none.
+ LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs);
+
+ /// Replace interleaved load candidates. It does additional
+ /// analyses if this makes sense. Returns true on success and false
+ /// of nothing has been changed.
+ bool combine(std::list<VectorInfo> &InterleavedLoad,
+ OptimizationRemarkEmitter &ORE);
+
+ /// Given a set of VectorInfo containing candidates for a given interleave
+ /// factor, find a set that represents a 'factor' interleaved load.
+ bool findPattern(std::list<VectorInfo> &Candidates,
+ std::list<VectorInfo> &InterleavedLoad, unsigned Factor,
+ const DataLayout &DL);
+}; // InterleavedLoadCombine
+
+/// First Order Polynomial on an n-Bit Integer Value
+///
+/// Polynomial(Value) = Value * B + A + E*2^(n-e)
+///
+/// A and B are the coefficients. E*2^(n-e) is an error within 'e' most
+/// significant bits. It is introduced if an exact computation cannot be proven
+/// (e.q. division by 2).
+///
+/// As part of this optimization multiple loads will be combined. It necessary
+/// to prove that loads are within some relative offset to each other. This
+/// class is used to prove relative offsets of values loaded from memory.
+///
+/// Representing an integer in this form is sound since addition in two's
+/// complement is associative (trivial) and multiplication distributes over the
+/// addition (see Proof(1) in Polynomial::mul). Further, both operations
+/// commute.
+//
+// Example:
+// declare @fn(i64 %IDX, <4 x float>* %PTR) {
+// %Pa1 = add i64 %IDX, 2
+// %Pa2 = lshr i64 %Pa1, 1
+// %Pa3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pa2
+// %Va = load <4 x float>, <4 x float>* %Pa3
+//
+// %Pb1 = add i64 %IDX, 4
+// %Pb2 = lshr i64 %Pb1, 1
+// %Pb3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pb2
+// %Vb = load <4 x float>, <4 x float>* %Pb3
+// ... }
+//
+// The goal is to prove that two loads load consecutive addresses.
+//
+// In this case the polynomials are constructed by the following
+// steps.
+//
+// The number tag #e specifies the error bits.
+//
+// Pa_0 = %IDX #0
+// Pa_1 = %IDX + 2 #0 | add 2
+// Pa_2 = %IDX/2 + 1 #1 | lshr 1
+// Pa_3 = %IDX/2 + 1 #1 | GEP, step signext to i64
+// Pa_4 = (%IDX/2)*16 + 16 #0 | GEP, multiply index by sizeof(4) for floats
+// Pa_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components
+//
+// Pb_0 = %IDX #0
+// Pb_1 = %IDX + 4 #0 | add 2
+// Pb_2 = %IDX/2 + 2 #1 | lshr 1
+// Pb_3 = %IDX/2 + 2 #1 | GEP, step signext to i64
+// Pb_4 = (%IDX/2)*16 + 32 #0 | GEP, multiply index by sizeof(4) for floats
+// Pb_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components
+//
+// Pb_5 - Pa_5 = 16 #0 | subtract to get the offset
+//
+// Remark: %PTR is not maintained within this class. So in this instance the
+// offset of 16 can only be assumed if the pointers are equal.
+//
+class Polynomial {
+ /// Operations on B
+ enum BOps {
+ LShr,
+ Mul,
+ SExt,
+ Trunc,
+ };
+
+ /// Number of Error Bits e
+ unsigned ErrorMSBs;
+
+ /// Value
+ Value *V;
+
+ /// Coefficient B
+ SmallVector<std::pair<BOps, APInt>, 4> B;
+
+ /// Coefficient A
+ APInt A;
+
+public:
+ Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() {
+ IntegerType *Ty = dyn_cast<IntegerType>(V->getType());
+ if (Ty) {
+ ErrorMSBs = 0;
+ this->V = V;
+ A = APInt(Ty->getBitWidth(), 0);
+ }
+ }
+
+ Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
+ : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(A) {}
+
+ Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
+ : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(BitWidth, A) {}
+
+ Polynomial() : ErrorMSBs((unsigned)-1), V(NULL), B(), A() {}
+
+ /// Increment and clamp the number of undefined bits.
+ void incErrorMSBs(unsigned amt) {
+ if (ErrorMSBs == (unsigned)-1)
+ return;
+
+ ErrorMSBs += amt;
+ if (ErrorMSBs > A.getBitWidth())
+ ErrorMSBs = A.getBitWidth();
+ }
+
+ /// Decrement and clamp the number of undefined bits.
+ void decErrorMSBs(unsigned amt) {
+ if (ErrorMSBs == (unsigned)-1)
+ return;
+
+ if (ErrorMSBs > amt)
+ ErrorMSBs -= amt;
+ else
+ ErrorMSBs = 0;
+ }
+
+ /// Apply an add on the polynomial
+ Polynomial &add(const APInt &C) {
+ // Note: Addition is associative in two's complement even when in case of
+ // signed overflow.
+ //
+ // Error bits can only propagate into higher significant bits. As these are
+ // already regarded as undefined, there is no change.
+ //
+ // Theorem: Adding a constant to a polynomial does not change the error
+ // term.
+ //
+ // Proof:
+ //
+ // Since the addition is associative and commutes:
+ //
+ // (B + A + E*2^(n-e)) + C = B + (A + C) + E*2^(n-e)
+ // [qed]
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ A += C;
+ return *this;
+ }
+
+ /// Apply a multiplication onto the polynomial.
+ Polynomial &mul(const APInt &C) {
+ // Note: Multiplication distributes over the addition
+ //
+ // Theorem: Multiplication distributes over the addition
+ //
+ // Proof(1):
+ //
+ // (B+A)*C =-
+ // = (B + A) + (B + A) + .. {C Times}
+ // addition is associative and commutes, hence
+ // = B + B + .. {C Times} .. + A + A + .. {C times}
+ // = B*C + A*C
+ // (see (function add) for signed values and overflows)
+ // [qed]
+ //
+ // Theorem: If C has c trailing zeros, errors bits in A or B are shifted out
+ // to the left.
+ //
+ // Proof(2):
+ //
+ // Let B' and A' be the n-Bit inputs with some unknown errors EA,
+ // EB at e leading bits. B' and A' can be written down as:
+ //
+ // B' = B + 2^(n-e)*EB
+ // A' = A + 2^(n-e)*EA
+ //
+ // Let C' be an input with c trailing zero bits. C' can be written as
+ //
+ // C' = C*2^c
+ //
+ // Therefore we can compute the result by using distributivity and
+ // commutativity.
+ //
+ // (B'*C' + A'*C') = [B + 2^(n-e)*EB] * C' + [A + 2^(n-e)*EA] * C' =
+ // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
+ // = (B'+A') * C' =
+ // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
+ // = [B + A + 2^(n-e)*EB + 2^(n-e)*EA] * C' =
+ // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C' =
+ // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C*2^c =
+ // = (B + A) * C' + C*(EB + EA)*2^(n-e)*2^c =
+ //
+ // Let EC be the final error with EC = C*(EB + EA)
+ //
+ // = (B + A)*C' + EC*2^(n-e)*2^c =
+ // = (B + A)*C' + EC*2^(n-(e-c))
+ //
+ // Since EC is multiplied by 2^(n-(e-c)) the resulting error contains c
+ // less error bits than the input. c bits are shifted out to the left.
+ // [qed]
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ // Multiplying by one is a no-op.
+ if (C.isOneValue()) {
+ return *this;
+ }
+
+ // Multiplying by zero removes the coefficient B and defines all bits.
+ if (C.isNullValue()) {
+ ErrorMSBs = 0;
+ deleteB();
+ }
+
+ // See Proof(2): Trailing zero bits indicate a left shift. This removes
+ // leading bits from the result even if they are undefined.
+ decErrorMSBs(C.countTrailingZeros());
+
+ A *= C;
+ pushBOperation(Mul, C);
+ return *this;
+ }
+
+ /// Apply a logical shift right on the polynomial
+ Polynomial &lshr(const APInt &C) {
+ // Theorem(1): (B + A + E*2^(n-e)) >> 1 => (B >> 1) + (A >> 1) + E'*2^(n-e')
+ // where
+ // e' = e + 1,
+ // E is a e-bit number,
+ // E' is a e'-bit number,
+ // holds under the following precondition:
+ // pre(1): A % 2 = 0
+ // pre(2): e < n, (see Theorem(2) for the trivial case with e=n)
+ // where >> expresses a logical shift to the right, with adding zeros.
+ //
+ // We need to show that for every, E there is a E'
+ //
+ // B = b_h * 2^(n-1) + b_m * 2 + b_l
+ // A = a_h * 2^(n-1) + a_m * 2 (pre(1))
+ //
+ // where a_h, b_h, b_l are single bits, and a_m, b_m are (n-2) bit numbers
+ //
+ // Let X = (B + A + E*2^(n-e)) >> 1
+ // Let Y = (B >> 1) + (A >> 1) + E*2^(n-e) >> 1
+ //
+ // X = [B + A + E*2^(n-e)] >> 1 =
+ // = [ b_h * 2^(n-1) + b_m * 2 + b_l +
+ // + a_h * 2^(n-1) + a_m * 2 +
+ // + E * 2^(n-e) ] >> 1 =
+ //
+ // The sum is built by putting the overflow of [a_m + b+n] into the term
+ // 2^(n-1). As there are no more bits beyond 2^(n-1) the overflow within
+ // this bit is discarded. This is expressed by % 2.
+ //
+ // The bit in position 0 cannot overflow into the term (b_m + a_m).
+ //
+ // = [ ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-1) +
+ // + ((b_m + a_m) % 2^(n-2)) * 2 +
+ // + b_l + E * 2^(n-e) ] >> 1 =
+ //
+ // The shift is computed by dividing the terms by 2 and by cutting off
+ // b_l.
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-(e+1)) =
+ //
+ // by the definition in the Theorem e+1 = e'
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-e') =
+ //
+ // Compute Y by applying distributivity first
+ //
+ // Y = (B >> 1) + (A >> 1) + E*2^(n-e') =
+ // = (b_h * 2^(n-1) + b_m * 2 + b_l) >> 1 +
+ // + (a_h * 2^(n-1) + a_m * 2) >> 1 +
+ // + E * 2^(n-e) >> 1 =
+ //
+ // Again, the shift is computed by dividing the terms by 2 and by cutting
+ // off b_l.
+ //
+ // = b_h * 2^(n-2) + b_m +
+ // + a_h * 2^(n-2) + a_m +
+ // + E * 2^(n-(e+1)) =
+ //
+ // Again, the sum is built by putting the overflow of [a_m + b+n] into
+ // the term 2^(n-1). But this time there is room for a second bit in the
+ // term 2^(n-2) we add this bit to a new term and denote it o_h in a
+ // second step.
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] >> 1) * 2^(n-1) +
+ // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-(e+1)) =
+ //
+ // Let o_h = [b_h + a_h + (b_m + a_m) >> (n-2)] >> 1
+ // Further replace e+1 by e'.
+ //
+ // = o_h * 2^(n-1) +
+ // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E * 2^(n-e') =
+ //
+ // Move o_h into the error term and construct E'. To ensure that there is
+ // no 2^x with negative x, this step requires pre(2) (e < n).
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + o_h * 2^(e'-1) * 2^(n-e') + | pre(2), move 2^(e'-1)
+ // | out of the old exponent
+ // + E * 2^(n-e') =
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + [o_h * 2^(e'-1) + E] * 2^(n-e') + | move 2^(e'-1) out of
+ // | the old exponent
+ //
+ // Let E' = o_h * 2^(e'-1) + E
+ //
+ // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+ // + ((b_m + a_m) % 2^(n-2)) +
+ // + E' * 2^(n-e')
+ //
+ // Because X and Y are distinct only in there error terms and E' can be
+ // constructed as shown the theorem holds.
+ // [qed]
+ //
+ // For completeness in case of the case e=n it is also required to show that
+ // distributivity can be applied.
+ //
+ // In this case Theorem(1) transforms to (the pre-condition on A can also be
+ // dropped)
+ //
+ // Theorem(2): (B + A + E) >> 1 => (B >> 1) + (A >> 1) + E'
+ // where
+ // A, B, E, E' are two's complement numbers with the same bit
+ // width
+ //
+ // Let A + B + E = X
+ // Let (B >> 1) + (A >> 1) = Y
+ //
+ // Therefore we need to show that for every X and Y there is an E' which
+ // makes the equation
+ //
+ // X = Y + E'
+ //
+ // hold. This is trivially the case for E' = X - Y.
+ //
+ // [qed]
+ //
+ // Remark: Distributing lshr with and arbitrary number n can be expressed as
+ // ((((B + A) lshr 1) lshr 1) ... ) {n times}.
+ // This construction induces n additional error bits at the left.
+
+ if (C.getBitWidth() != A.getBitWidth()) {
+ ErrorMSBs = (unsigned)-1;
+ return *this;
+ }
+
+ if (C.isNullValue())
+ return *this;
+
+ // Test if the result will be zero
+ unsigned shiftAmt = C.getZExtValue();
+ if (shiftAmt >= C.getBitWidth())
+ return mul(APInt(C.getBitWidth(), 0));
+
+ // The proof that shiftAmt LSBs are zero for at least one summand is only
+ // possible for the constant number.
+ //
+ // If this can be proven add shiftAmt to the error counter
+ // `ErrorMSBs`. Otherwise set all bits as undefined.
+ if (A.countTrailingZeros() < shiftAmt)
+ ErrorMSBs = A.getBitWidth();
+ else
+ incErrorMSBs(shiftAmt);
+
+ // Apply the operation.
+ pushBOperation(LShr, C);
+ A = A.lshr(shiftAmt);
+
+ return *this;
+ }
+
+ /// Apply a sign-extend or truncate operation on the polynomial.
+ Polynomial &sextOrTrunc(unsigned n) {
+ if (n < A.getBitWidth()) {
+ // Truncate: Clearly undefined Bits on the MSB side are removed
+ // if there are any.
+ decErrorMSBs(A.getBitWidth() - n);
+ A = A.trunc(n);
+ pushBOperation(Trunc, APInt(sizeof(n) * 8, n));
+ }
+ if (n > A.getBitWidth()) {
+ // Extend: Clearly extending first and adding later is different
+ // to adding first and extending later in all extended bits.
+ incErrorMSBs(n - A.getBitWidth());
+ A = A.sext(n);
+ pushBOperation(SExt, APInt(sizeof(n) * 8, n));
+ }
+
+ return *this;
+ }
+
+ /// Test if there is a coefficient B.
+ bool isFirstOrder() const { return V != nullptr; }
+
+ /// Test coefficient B of two Polynomials are equal.
+ bool isCompatibleTo(const Polynomial &o) const {
+ // The polynomial use different bit width.
+ if (A.getBitWidth() != o.A.getBitWidth())
+ return false;
+
+ // If neither Polynomial has the Coefficient B.
+ if (!isFirstOrder() && !o.isFirstOrder())
+ return true;
+
+ // The index variable is different.
+ if (V != o.V)
+ return false;
+
+ // Check the operations.
+ if (B.size() != o.B.size())
+ return false;
+
+ auto ob = o.B.begin();
+ for (auto &b : B) {
+ if (b != *ob)
+ return false;
+ ob++;
+ }
+
+ return true;
+ }
+
+ /// Subtract two polynomials, return an undefined polynomial if
+ /// subtraction is not possible.
+ Polynomial operator-(const Polynomial &o) const {
+ // Return an undefined polynomial if incompatible.
+ if (!isCompatibleTo(o))
+ return Polynomial();
+
+ // If the polynomials are compatible (meaning they have the same
+ // coefficient on B), B is eliminated. Thus a polynomial solely
+ // containing A is returned
+ return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs));
+ }
+
+ /// Subtract a constant from a polynomial,
+ Polynomial operator-(uint64_t C) const {
+ Polynomial Result(*this);
+ Result.A -= C;
+ return Result;
+ }
+
+ /// Add a constant to a polynomial,
+ Polynomial operator+(uint64_t C) const {
+ Polynomial Result(*this);
+ Result.A += C;
+ return Result;
+ }
+
+ /// Returns true if it can be proven that two Polynomials are equal.
+ bool isProvenEqualTo(const Polynomial &o) {
+ // Subtract both polynomials and test if it is fully defined and zero.
+ Polynomial r = *this - o;
+ return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue());
+ }
+
+ /// Print the polynomial into a stream.
+ void print(raw_ostream &OS) const {
+ OS << "[{#ErrBits:" << ErrorMSBs << "} ";
+
+ if (V) {
+ for (auto b : B)
+ OS << "(";
+ OS << "(" << *V << ") ";
+
+ for (auto b : B) {
+ switch (b.first) {
+ case LShr:
+ OS << "LShr ";
+ break;
+ case Mul:
+ OS << "Mul ";
+ break;
+ case SExt:
+ OS << "SExt ";
+ break;
+ case Trunc:
+ OS << "Trunc ";
+ break;
+ }
+
+ OS << b.second << ") ";
+ }
+ }
+
+ OS << "+ " << A << "]";
+ }
+
+private:
+ void deleteB() {
+ V = nullptr;
+ B.clear();
+ }
+
+ void pushBOperation(const BOps Op, const APInt &C) {
+ if (isFirstOrder()) {
+ B.push_back(std::make_pair(Op, C));
+ return;
+ }
+ }
+};
+
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS, const Polynomial &S) {
+ S.print(OS);
+ return OS;
+}
+#endif
+
+/// VectorInfo stores abstract the following information for each vector
+/// element:
+///
+/// 1) The the memory address loaded into the element as Polynomial
+/// 2) a set of load instruction necessary to construct the vector,
+/// 3) a set of all other instructions that are necessary to create the vector and
+/// 4) a pointer value that can be used as relative base for all elements.
+struct VectorInfo {
+private:
+ VectorInfo(const VectorInfo &c) : VTy(c.VTy) {
+ llvm_unreachable(
+ "Copying VectorInfo is neither implemented nor necessary,");
+ }
+
+public:
+ /// Information of a Vector Element
+ struct ElementInfo {
+ /// Offset Polynomial.
+ Polynomial Ofs;
+
+ /// The Load Instruction used to Load the entry. LI is null if the pointer
+ /// of the load instruction does not point on to the entry
+ LoadInst *LI;
+
+ ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr)
+ : Ofs(Offset), LI(LI) {}
+ };
+
+ /// Basic-block the load instructions are within
+ BasicBlock *BB;
+
+ /// Pointer value of all participation load instructions
+ Value *PV;
+
+ /// Participating load instructions
+ std::set<LoadInst *> LIs;
+
+ /// Participating instructions
+ std::set<Instruction *> Is;
+
+ /// Final shuffle-vector instruction
+ ShuffleVectorInst *SVI;
+
+ /// Information of the offset for each vector element
+ ElementInfo *EI;
+
+ /// Vector Type
+ VectorType *const VTy;
+
+ VectorInfo(VectorType *VTy)
+ : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) {
+ EI = new ElementInfo[VTy->getNumElements()];
+ }
+
+ virtual ~VectorInfo() { delete[] EI; }
+
+ unsigned getDimension() const { return VTy->getNumElements(); }
+
+ /// Test if the VectorInfo can be part of an interleaved load with the
+ /// specified factor.
+ ///
+ /// \param Factor of the interleave
+ /// \param DL Targets Datalayout
+ ///
+ /// \returns true if this is possible and false if not
+ bool isInterleaved(unsigned Factor, const DataLayout &DL) const {
+ unsigned Size = DL.getTypeAllocSize(VTy->getElementType());
+ for (unsigned i = 1; i < getDimension(); i++) {
+ if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /// Recursively computes the vector information stored in V.
+ ///
+ /// This function delegates the work to specialized implementations
+ ///
+ /// \param V Value to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) {
+ ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
+ if (SVI)
+ return computeFromSVI(SVI, Result, DL);
+ LoadInst *LI = dyn_cast<LoadInst>(V);
+ if (LI)
+ return computeFromLI(LI, Result, DL);
+ BitCastInst *BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ return computeFromBCI(BCI, Result, DL);
+ return false;
+ }
+
+ /// BitCastInst specialization to compute the vector information.
+ ///
+ /// \param BCI BitCastInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result,
+ const DataLayout &DL) {
+ Instruction *Op = dyn_cast<Instruction>(BCI->getOperand(0));
+
+ if (!Op)
+ return false;
+
+ VectorType *VTy = dyn_cast<VectorType>(Op->getType());
+ if (!VTy)
+ return false;
+
+ // We can only cast from large to smaller vectors
+ if (Result.VTy->getNumElements() % VTy->getNumElements())
+ return false;
+
+ unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements();
+ unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType());
+ unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType());
+
+ if (NewSize * Factor != OldSize)
+ return false;
+
+ VectorInfo Old(VTy);
+ if (!compute(Op, Old, DL))
+ return false;
+
+ for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) {
+ for (unsigned j = 0; j < Factor; j++) {
+ Result.EI[i + j] =
+ ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize,
+ j == 0 ? Old.EI[i / Factor].LI : nullptr);
+ }
+ }
+
+ Result.BB = Old.BB;
+ Result.PV = Old.PV;
+ Result.LIs.insert(Old.LIs.begin(), Old.LIs.end());
+ Result.Is.insert(Old.Is.begin(), Old.Is.end());
+ Result.Is.insert(BCI);
+ Result.SVI = nullptr;
+
+ return true;
+ }
+
+ /// ShuffleVectorInst specialization to compute vector information.
+ ///
+ /// \param SVI ShuffleVectorInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// Compute the left and the right side vector information and merge them by
+ /// applying the shuffle operation. This function also ensures that the left
+ /// and right side have compatible loads. This means that all loads are with
+ /// in the same basic block and are based on the same pointer.
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result,
+ const DataLayout &DL) {
+ VectorType *ArgTy = dyn_cast<VectorType>(SVI->getOperand(0)->getType());
+ assert(ArgTy && "ShuffleVector Operand is not a VectorType");
+
+ // Compute the left hand vector information.
+ VectorInfo LHS(ArgTy);
+ if (!compute(SVI->getOperand(0), LHS, DL))
+ LHS.BB = nullptr;
+
+ // Compute the right hand vector information.
+ VectorInfo RHS(ArgTy);
+ if (!compute(SVI->getOperand(1), RHS, DL))
+ RHS.BB = nullptr;
+
+ // Neither operand produced sensible results?
+ if (!LHS.BB && !RHS.BB)
+ return false;
+ // Only RHS produced sensible results?
+ else if (!LHS.BB) {
+ Result.BB = RHS.BB;
+ Result.PV = RHS.PV;
+ }
+ // Only LHS produced sensible results?
+ else if (!RHS.BB) {
+ Result.BB = LHS.BB;
+ Result.PV = LHS.PV;
+ }
+ // Both operands produced sensible results?
+ else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) {
+ Result.BB = LHS.BB;
+ Result.PV = LHS.PV;
+ }
+ // Both operands produced sensible results but they are incompatible.
+ else {
+ return false;
+ }
+
+ // Merge and apply the operation on the offset information.
+ if (LHS.BB) {
+ Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end());
+ Result.Is.insert(LHS.Is.begin(), LHS.Is.end());
+ }
+ if (RHS.BB) {
+ Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end());
+ Result.Is.insert(RHS.Is.begin(), RHS.Is.end());
+ }
+ Result.Is.insert(SVI);
+ Result.SVI = SVI;
+
+ int j = 0;
+ for (int i : SVI->getShuffleMask()) {
+ assert((i < 2 * (signed)ArgTy->getNumElements()) &&
+ "Invalid ShuffleVectorInst (index out of bounds)");
+
+ if (i < 0)
+ Result.EI[j] = ElementInfo();
+ else if (i < (signed)ArgTy->getNumElements()) {
+ if (LHS.BB)
+ Result.EI[j] = LHS.EI[i];
+ else
+ Result.EI[j] = ElementInfo();
+ } else {
+ if (RHS.BB)
+ Result.EI[j] = RHS.EI[i - ArgTy->getNumElements()];
+ else
+ Result.EI[j] = ElementInfo();
+ }
+ j++;
+ }
+
+ return true;
+ }
+
+ /// LoadInst specialization to compute vector information.
+ ///
+ /// This function also acts as abort condition to the recursion.
+ ///
+ /// \param LI LoadInst to operate on
+ /// \param Result Result of the computation
+ ///
+ /// \returns false if no sensible information can be gathered.
+ static bool computeFromLI(LoadInst *LI, VectorInfo &Result,
+ const DataLayout &DL) {
+ Value *BasePtr;
+ Polynomial Offset;
+
+ if (LI->isVolatile())
+ return false;
+
+ if (LI->isAtomic())
+ return false;
+
+ // Get the base polynomial
+ computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL);
+
+ Result.BB = LI->getParent();
+ Result.PV = BasePtr;
+ Result.LIs.insert(LI);
+ Result.Is.insert(LI);
+
+ for (unsigned i = 0; i < Result.getDimension(); i++) {
+ Value *Idx[2] = {
+ ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(LI->getContext()), i),
+ };
+ int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, makeArrayRef(Idx, 2));
+ Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
+ }
+
+ return true;
+ }
+
+ /// Recursively compute polynomial of a value.
+ ///
+ /// \param BO Input binary operation
+ /// \param Result Result polynomial
+ static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) {
+ Value *LHS = BO.getOperand(0);
+ Value *RHS = BO.getOperand(1);
+
+ // Find the RHS Constant if any
+ ConstantInt *C = dyn_cast<ConstantInt>(RHS);
+ if ((!C) && BO.isCommutative()) {
+ C = dyn_cast<ConstantInt>(LHS);
+ if (C)
+ std::swap(LHS, RHS);
+ }
+
+ switch (BO.getOpcode()) {
+ case Instruction::Add:
+ if (!C)
+ break;
+
+ computePolynomial(*LHS, Result);
+ Result.add(C->getValue());
+ return;
+
+ case Instruction::LShr:
+ if (!C)
+ break;
+
+ computePolynomial(*LHS, Result);
+ Result.lshr(C->getValue());
+ return;
+
+ default:
+ break;
+ }
+
+ Result = Polynomial(&BO);
+ }
+
+ /// Recursively compute polynomial of a value
+ ///
+ /// \param V input value
+ /// \param Result result polynomial
+ static void computePolynomial(Value &V, Polynomial &Result) {
+ if (isa<BinaryOperator>(&V))
+ computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result);
+ else
+ Result = Polynomial(&V);
+ }
+
+ /// Compute the Polynomial representation of a Pointer type.
+ ///
+ /// \param Ptr input pointer value
+ /// \param Result result polynomial
+ /// \param BasePtr pointer the polynomial is based on
+ /// \param DL Datalayout of the target machine
+ static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result,
+ Value *&BasePtr,
+ const DataLayout &DL) {
+ // Not a pointer type? Return an undefined polynomial
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr.getType());
+ if (!PtrTy) {
+ Result = Polynomial();
+ BasePtr = nullptr;
+ }
+ unsigned PointerBits =
+ DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
+
+ /// Skip pointer casts. Return Zero polynomial otherwise
+ if (isa<CastInst>(&Ptr)) {
+ CastInst &CI = *cast<CastInst>(&Ptr);
+ switch (CI.getOpcode()) {
+ case Instruction::BitCast:
+ computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL);
+ break;
+ default:
+ BasePtr = &Ptr;
+ Polynomial(PointerBits, 0);
+ break;
+ }
+ }
+ /// Resolve GetElementPtrInst.
+ else if (isa<GetElementPtrInst>(&Ptr)) {
+ GetElementPtrInst &GEP = *cast<GetElementPtrInst>(&Ptr);
+
+ APInt BaseOffset(PointerBits, 0);
+
+ // Check if we can compute the Offset with accumulateConstantOffset
+ if (GEP.accumulateConstantOffset(DL, BaseOffset)) {
+ Result = Polynomial(BaseOffset);
+ BasePtr = GEP.getPointerOperand();
+ return;
+ } else {
+ // Otherwise we allow that the last index operand of the GEP is
+ // non-constant.
+ unsigned idxOperand, e;
+ SmallVector<Value *, 4> Indices;
+ for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e;
+ idxOperand++) {
+ ConstantInt *IDX = dyn_cast<ConstantInt>(GEP.getOperand(idxOperand));
+ if (!IDX)
+ break;
+ Indices.push_back(IDX);
+ }
+
+ // It must also be the last operand.
+ if (idxOperand + 1 != e) {
+ Result = Polynomial();
+ BasePtr = nullptr;
+ return;
+ }
+
+ // Compute the polynomial of the index operand.
+ computePolynomial(*GEP.getOperand(idxOperand), Result);
+
+ // Compute base offset from zero based index, excluding the last
+ // variable operand.
+ BaseOffset =
+ DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices);
+
+ // Apply the operations of GEP to the polynomial.
+ unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType());
+ Result.sextOrTrunc(PointerBits);
+ Result.mul(APInt(PointerBits, ResultSize));
+ Result.add(BaseOffset);
+ BasePtr = GEP.getPointerOperand();
+ }
+ }
+ // All other instructions are handled by using the value as base pointer and
+ // a zero polynomial.
+ else {
+ BasePtr = &Ptr;
+ Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0);
+ }
+ }
+
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const {
+ if (PV)
+ OS << *PV;
+ else
+ OS << "(none)";
+ OS << " + ";
+ for (unsigned i = 0; i < getDimension(); i++)
+ OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs;
+ OS << "]";
+ }
+#endif
+};
+
+} // anonymous namespace
+
+bool InterleavedLoadCombineImpl::findPattern(
+ std::list<VectorInfo> &Candidates, std::list<VectorInfo> &InterleavedLoad,
+ unsigned Factor, const DataLayout &DL) {
+ for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) {
+ unsigned i;
+ // Try to find an interleaved load using the front of Worklist as first line
+ unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType());
+
+ // List containing iterators pointing to the VectorInfos of the candidates
+ std::vector<std::list<VectorInfo>::iterator> Res(Factor, Candidates.end());
+
+ for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) {
+ if (C->VTy != C0->VTy)
+ continue;
+ if (C->BB != C0->BB)
+ continue;
+ if (C->PV != C0->PV)
+ continue;
+
+ // Check the current value matches any of factor - 1 remaining lines
+ for (i = 1; i < Factor; i++) {
+ if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) {
+ Res[i] = C;
+ }
+ }
+
+ for (i = 1; i < Factor; i++) {
+ if (Res[i] == Candidates.end())
+ break;
+ }
+ if (i == Factor) {
+ Res[0] = C0;
+ break;
+ }
+ }
+
+ if (Res[0] != Candidates.end()) {
+ // Move the result into the output
+ for (unsigned i = 0; i < Factor; i++) {
+ InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]);
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+LoadInst *
+InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {
+ assert(!LIs.empty() && "No load instructions given.");
+
+ // All LIs are within the same BB. Select the first for a reference.
+ BasicBlock *BB = (*LIs.begin())->getParent();
+ BasicBlock::iterator FLI =
+ std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool {
+ return is_contained(LIs, &I);
+ });
+ assert(FLI != BB->end());
+
+ return cast<LoadInst>(FLI);
+}
+
+bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
+ OptimizationRemarkEmitter &ORE) {
+ LLVM_DEBUG(dbgs() << "Checking interleaved load\n");
+
+ // The insertion point is the LoadInst which loads the first values. The
+ // following tests are used to proof that the combined load can be inserted
+ // just before InsertionPoint.
+ LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI;
+
+ // Test if the offset is computed
+ if (!InsertionPoint)
+ return false;
+
+ std::set<LoadInst *> LIs;
+ std::set<Instruction *> Is;
+ std::set<Instruction *> SVIs;
+
+ unsigned InterleavedCost;
+ unsigned InstructionCost = 0;
+
+ // Get the interleave factor
+ unsigned Factor = InterleavedLoad.size();
+
+ // Merge all input sets used in analysis
+ for (auto &VI : InterleavedLoad) {
+ // Generate a set of all load instructions to be combined
+ LIs.insert(VI.LIs.begin(), VI.LIs.end());
+
+ // Generate a set of all instructions taking part in load
+ // interleaved. This list excludes the instructions necessary for the
+ // polynomial construction.
+ Is.insert(VI.Is.begin(), VI.Is.end());
+
+ // Generate the set of the final ShuffleVectorInst.
+ SVIs.insert(VI.SVI);
+ }
+
+ // There is nothing to combine.
+ if (LIs.size() < 2)
+ return false;
+
+ // Test if all participating instruction will be dead after the
+ // transformation. If intermediate results are used, no performance gain can
+ // be expected. Also sum the cost of the Instructions beeing left dead.
+ for (auto &I : Is) {
+ // Compute the old cost
+ InstructionCost +=
+ TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+
+ // The final SVIs are allowed not to be dead, all uses will be replaced
+ if (SVIs.find(I) != SVIs.end())
+ continue;
+
+ // If there are users outside the set to be eliminated, we abort the
+ // transformation. No gain can be expected.
+ for (const auto &U : I->users()) {
+ if (Is.find(dyn_cast<Instruction>(U)) == Is.end())
+ return false;
+ }
+ }
+
+ // We know that all LoadInst are within the same BB. This guarantees that
+ // either everything or nothing is loaded.
+ LoadInst *First = findFirstLoad(LIs);
+
+ // To be safe that the loads can be combined, iterate over all loads and test
+ // that the corresponding defining access dominates first LI. This guarantees
+ // that there are no aliasing stores in between the loads.
+ auto FMA = MSSA.getMemoryAccess(First);
+ for (auto LI : LIs) {
+ auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess();
+ if (!MSSA.dominates(MADef, FMA))
+ return false;
+ }
+ assert(!LIs.empty() && "There are no LoadInst to combine");
+
+ // It is necessary that insertion point dominates all final ShuffleVectorInst.
+ for (auto &VI : InterleavedLoad) {
+ if (!DT.dominates(InsertionPoint, VI.SVI))
+ return false;
+ }
+
+ // All checks are done. Add instructions detectable by InterleavedAccessPass
+ // The old instruction will are left dead.
+ IRBuilder<> Builder(InsertionPoint);
+ Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType();
+ unsigned ElementsPerSVI =
+ InterleavedLoad.front().SVI->getType()->getNumElements();
+ VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI);
+
+ SmallVector<unsigned, 4> Indices;
+ for (unsigned i = 0; i < Factor; i++)
+ Indices.push_back(i);
+ InterleavedCost = TTI.getInterleavedMemoryOpCost(
+ Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(),
+ InsertionPoint->getPointerAddressSpace());
+
+ if (InterleavedCost >= InstructionCost) {
+ return false;
+ }
+
+ // Create a pointer cast for the wide load.
+ auto CI = Builder.CreatePointerCast(InsertionPoint->getOperand(0),
+ ILTy->getPointerTo(),
+ "interleaved.wide.ptrcast");
+
+ // Create the wide load and update the MemorySSA.
+ auto LI = Builder.CreateAlignedLoad(CI, InsertionPoint->getAlignment(),
+ "interleaved.wide.load");
+ auto MSSAU = MemorySSAUpdater(&MSSA);
+ MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
+ LI, nullptr, MSSA.getMemoryAccess(InsertionPoint)));
+ MSSAU.insertUse(MSSALoad);
+
+ // Create the final SVIs and replace all uses.
+ int i = 0;
+ for (auto &VI : InterleavedLoad) {
+ SmallVector<uint32_t, 4> Mask;
+ for (unsigned j = 0; j < ElementsPerSVI; j++)
+ Mask.push_back(i + j * Factor);
+
+ Builder.SetInsertPoint(VI.SVI);
+ auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()),
+ Mask, "interleaved.shuffle");
+ VI.SVI->replaceAllUsesWith(SVI);
+ i++;
+ }
+
+ NumInterleavedLoadCombine++;
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Combined Interleaved Load", LI)
+ << "Load interleaved combined with factor "
+ << ore::NV("Factor", Factor);
+ });
+
+ return true;
+}
+
+bool InterleavedLoadCombineImpl::run() {
+ OptimizationRemarkEmitter ORE(&F);
+ bool changed = false;
+ unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor();
+
+ auto &DL = F.getParent()->getDataLayout();
+
+ // Start with the highest factor to avoid combining and recombining.
+ for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) {
+ std::list<VectorInfo> Candidates;
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) {
+
+ Candidates.emplace_back(SVI->getType());
+
+ if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) {
+ Candidates.pop_back();
+ continue;
+ }
+
+ if (!Candidates.back().isInterleaved(Factor, DL)) {
+ Candidates.pop_back();
+ }
+ }
+ }
+ }
+
+ std::list<VectorInfo> InterleavedLoad;
+ while (findPattern(Candidates, InterleavedLoad, Factor, DL)) {
+ if (combine(InterleavedLoad, ORE)) {
+ changed = true;
+ } else {
+ // Remove the first element of the Interleaved Load but put the others
+ // back on the list and continue searching
+ Candidates.splice(Candidates.begin(), InterleavedLoad,
+ std::next(InterleavedLoad.begin()),
+ InterleavedLoad.end());
+ }
+ InterleavedLoad.clear();
+ }
+ }
+
+ return changed;
+}
+
+namespace {
+/// This pass combines interleaved loads into a pattern detectable by
+/// InterleavedAccessPass.
+struct InterleavedLoadCombine : public FunctionPass {
+ static char ID;
+
+ InterleavedLoadCombine() : FunctionPass(ID) {
+ initializeInterleavedLoadCombinePass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Interleaved Load Combine Pass";
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (DisableInterleavedLoadCombine)
+ return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName()
+ << "\n");
+
+ return InterleavedLoadCombineImpl(
+ F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<MemorySSAWrapperPass>().getMSSA(),
+ TPC->getTM<TargetMachine>())
+ .run();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+};
+} // anonymous namespace
+
+char InterleavedLoadCombine::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+ InterleavedLoadCombine, DEBUG_TYPE,
+ "Combine interleaved loads into wide loads and shufflevector instructions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(
+ InterleavedLoadCombine, DEBUG_TYPE,
+ "Combine interleaved loads into wide loads and shufflevector instructions",
+ false, false)
+
+FunctionPass *
+llvm::createInterleavedLoadCombinePass() {
+ auto P = new InterleavedLoadCombine();
+ return P;
+}
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 2cd389ce2c11..52e832cc38c1 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -40,14 +40,14 @@ static cl::opt<bool> EnableTrapUnreachable("trap-unreachable",
cl::desc("Enable generating trap for unreachable"));
void LLVMTargetMachine::initAsmInfo() {
- MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
- MII = TheTarget.createMCInstrInfo();
+ MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
+ MII.reset(TheTarget.createMCInstrInfo());
// FIXME: Having an MCSubtargetInfo on the target machine is a hack due
// to some backends having subtarget feature dependent module level
// code generation. This is similar to the hack in the AsmPrinter for
// module level assembly etc.
- STI = TheTarget.createMCSubtargetInfo(getTargetTriple().str(), getTargetCPU(),
- getTargetFeatureString());
+ STI.reset(TheTarget.createMCSubtargetInfo(
+ getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
MCAsmInfo *TmpAsmInfo =
TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str());
@@ -71,7 +71,7 @@ void LLVMTargetMachine::initAsmInfo() {
if (Options.ExceptionModel != ExceptionHandling::None)
TmpAsmInfo->setExceptionsType(Options.ExceptionModel);
- AsmInfo = TmpAsmInfo;
+ AsmInfo.reset(TmpAsmInfo);
}
LLVMTargetMachine::LLVMTargetMachine(const Target &T,
@@ -95,29 +95,22 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) {
}
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
-static MCContext *
-addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
- bool DisableVerify, bool &WillCompleteCodeGenPipeline,
- raw_pwrite_stream &Out, MachineModuleInfo *MMI) {
+static TargetPassConfig *
+addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM,
+ bool DisableVerify, MachineModuleInfo &MMI) {
// Targets may override createPassConfig to provide a target-specific
// subclass.
- TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+ TargetPassConfig *PassConfig = TM.createPassConfig(PM);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
- WillCompleteCodeGenPipeline = PassConfig->willCompleteCodeGenPipeline();
PM.add(PassConfig);
- if (!MMI)
- MMI = new MachineModuleInfo(TM);
- PM.add(MMI);
+ PM.add(&MMI);
if (PassConfig->addISelPasses())
return nullptr;
PassConfig->addMachinePasses();
PassConfig->setInitialized();
- if (!WillCompleteCodeGenPipeline)
- PM.add(createPrintMIRPass(Out));
-
- return &MMI->getContext();
+ return PassConfig;
}
bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
@@ -201,14 +194,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
bool DisableVerify,
MachineModuleInfo *MMI) {
// Add common CodeGen passes.
- bool WillCompleteCodeGenPipeline = true;
- MCContext *Context = addPassesToGenerateCode(
- this, PM, DisableVerify, WillCompleteCodeGenPipeline, Out, MMI);
- if (!Context)
+ if (!MMI)
+ MMI = new MachineModuleInfo(this);
+ TargetPassConfig *PassConfig =
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+ if (!PassConfig)
return true;
- if (WillCompleteCodeGenPipeline &&
- addAsmPrinter(PM, Out, DwoOut, FileType, *Context))
+ if (!TargetPassConfig::willCompleteCodeGenPipeline()) {
+ PM.add(createPrintMIRPass(Out));
+ } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext()))
return true;
PM.add(createFreeMachineFunctionPass());
@@ -224,14 +219,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
raw_pwrite_stream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- bool WillCompleteCodeGenPipeline = true;
- Ctx = addPassesToGenerateCode(this, PM, DisableVerify,
- WillCompleteCodeGenPipeline, Out,
- /*MachineModuleInfo*/ nullptr);
- if (!Ctx)
+ MachineModuleInfo *MMI = new MachineModuleInfo(this);
+ TargetPassConfig *PassConfig =
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+ if (!PassConfig)
return true;
- assert(WillCompleteCodeGenPipeline && "CodeGen pipeline has been altered");
+ assert(TargetPassConfig::willCompleteCodeGenPipeline() &&
+ "Cannot emit MC with limited codegen pipeline");
+ Ctx = &MMI->getContext();
if (Options.MCOptions.MCSaveTempLabels)
Ctx->setAllowTemporaryLabels(false);
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index 5dbce841cfd5..f9f33a98a9d1 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -145,9 +145,9 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
LLVM_DUMP_METHOD void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
dbgs() << "Latency Priority Queue\n";
dbgs() << " Number of Queue Entries: " << Queue.size() << "\n";
- for (auto const &SU : Queue) {
+ for (const SUnit *SU : Queue) {
dbgs() << " ";
- SU->dump(DAG);
+ DAG->dumpNode(*SU);
}
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index 417bd9d5aebe..fc0ebea2d36c 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -258,7 +258,8 @@ private:
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited);
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks);
bool ExtendRanges(MachineFunction &MF);
@@ -323,8 +324,10 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
raw_ostream &Out) const {
Out << '\n' << msg << '\n';
for (const MachineBasicBlock &BB : MF) {
- const auto &L = V.lookup(&BB);
- Out << "MBB: " << BB.getName() << ":\n";
+ const VarLocSet &L = V.lookup(&BB);
+ if (L.empty())
+ continue;
+ Out << "MBB: " << BB.getNumber() << ":\n";
for (unsigned VLL : L) {
const VarLoc &VL = VarLocIDs[VLL];
Out << " Var: " << VL.Var.getVar()->getName();
@@ -470,16 +473,21 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
MachineFunction *MF, unsigned &Reg) {
const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
int FI;
- const MachineMemOperand *MMO;
+ SmallVector<const MachineMemOperand*, 1> Accesses;
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
return false;
// To identify a spill instruction, use the same criteria as in AsmPrinter.
- if (!((TII->isStoreToStackSlotPostFE(MI, FI) ||
- TII->hasStoreToStackSlot(MI, MMO, FI)) &&
- FrameInfo.isSpillSlotObjectIndex(FI)))
+ if (!((TII->isStoreToStackSlotPostFE(MI, FI) &&
+ FrameInfo.isSpillSlotObjectIndex(FI)) ||
+ (TII->hasStoreToStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, [&FrameInfo](const MachineMemOperand *MMO) {
+ return FrameInfo.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(MMO->getPseudoValue())
+ ->getFrameIndex());
+ }))))
return false;
auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
@@ -599,7 +607,7 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
LLVM_DEBUG(for (unsigned ID
: OpenRanges.getVarLocs()) {
// Copy OpenRanges to OutLocs, if not already present.
- dbgs() << "Add to OutLocs: ";
+ dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
VarLocIDs[ID].dump();
});
VarLocSet &VLS = OutLocs[CurMBB];
@@ -626,10 +634,12 @@ bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
/// This routine joins the analysis results of all incoming edges in @MBB by
/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
/// source variable in all the predecessors of @MBB reside in the same location.
-bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
- VarLocInMBB &InLocs, const VarLocMap &VarLocIDs,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited) {
- LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
+bool LiveDebugValues::join(
+ MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
+ const VarLocMap &VarLocIDs,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) {
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
VarLocSet InLocsT; // Temporary incoming locations.
@@ -641,8 +651,11 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
// Ignore unvisited predecessor blocks. As we are processing
// the blocks in reverse post-order any unvisited block can
// be considered to not remove any incoming values.
- if (!Visited.count(p))
+ if (!Visited.count(p)) {
+ LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber()
+ << "\n");
continue;
+ }
auto OL = OutLocs.find(p);
// Join is null in case of empty OutLocs from any of the pred.
if (OL == OutLocs.end())
@@ -654,14 +667,32 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
InLocsT = OL->second;
else
InLocsT &= OL->second;
+
+ LLVM_DEBUG({
+ if (!InLocsT.empty()) {
+ for (auto ID : InLocsT)
+ dbgs() << " gathered candidate incoming var: "
+ << VarLocIDs[ID].Var.getVar()->getName() << "\n";
+ }
+ });
+
NumVisited++;
}
// Filter out DBG_VALUES that are out of scope.
VarLocSet KillSet;
- for (auto ID : InLocsT)
- if (!VarLocIDs[ID].dominates(MBB))
- KillSet.set(ID);
+ bool IsArtificial = ArtificialBlocks.count(&MBB);
+ if (!IsArtificial) {
+ for (auto ID : InLocsT) {
+ if (!VarLocIDs[ID].dominates(MBB)) {
+ KillSet.set(ID);
+ LLVM_DEBUG({
+ auto Name = VarLocIDs[ID].Var.getVar()->getName();
+ dbgs() << " killing " << Name << ", it doesn't dominate MBB\n";
+ });
+ }
+ }
+ }
InLocsT.intersectWithComplement(KillSet);
// As we are processing blocks in reverse post-order we
@@ -712,6 +743,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
VarLocInMBB InLocs; // Ranges that are incoming after joining.
TransferMap Transfers; // DBG_VALUEs associated with spills.
+ // Blocks which are artificial, i.e. blocks which exclusively contain
+ // instructions without locations, or with line 0 locations.
+ SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
std::priority_queue<unsigned int, std::vector<unsigned int>,
@@ -733,6 +768,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
dontTransferChanges);
+ auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
+ if (const DebugLoc &DL = MI.getDebugLoc())
+ return DL.getLine() != 0;
+ return false;
+ };
+ for (auto &MBB : MF)
+ if (none_of(MBB.instrs(), hasNonArtificialLocation))
+ ArtificialBlocks.insert(&MBB);
+
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
"OutLocs after initialization", dbgs()));
@@ -758,7 +802,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
while (!Worklist.empty()) {
MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
Worklist.pop();
- MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited);
+ MBBJoined =
+ join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks);
Visited.insert(MBB);
if (MBBJoined) {
MBBJoined = false;
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 3ff03ec4a7ee..d0d889782a35 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -132,14 +132,18 @@ private:
unsigned WasIndirect : 1;
};
-/// LocMap - Map of where a user value is live, and its location.
+/// Map of where a user value is live, and its location.
using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>;
+/// Map of stack slot offsets for spilled locations.
+/// Non-spilled locations are not added to the map.
+using SpillOffsetMap = DenseMap<unsigned, unsigned>;
+
namespace {
class LDVImpl;
-/// UserValue - A user value is a part of a debug info user variable.
+/// A user value is a part of a debug info user variable.
///
/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
/// holds part of a user variable. The part is identified by a byte offset.
@@ -166,26 +170,26 @@ class UserValue {
/// lexical scope.
SmallSet<SlotIndex, 2> trimmedDefs;
- /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ /// Insert a DBG_VALUE into MBB at Idx for LocNo.
void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
- SlotIndex StopIdx,
- DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS,
+ SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
+ unsigned SpillOffset, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI);
- /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+ /// Replace OldLocNo ranges with NewRegs ranges where NewRegs
/// is live. Returns true if any changes were made.
bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
LiveIntervals &LIS);
public:
- /// UserValue - Create a new UserValue.
+ /// Create a new UserValue.
UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
LocMap::Allocator &alloc)
: Variable(var), Expression(expr), dl(std::move(L)), leader(this),
locInts(alloc) {}
- /// getLeader - Get the leader of this value's equivalence class.
+ /// Get the leader of this value's equivalence class.
UserValue *getLeader() {
UserValue *l = leader;
while (l != l->leader)
@@ -193,10 +197,10 @@ public:
return leader = l;
}
- /// getNext - Return the next UserValue in the equivalence class.
+ /// Return the next UserValue in the equivalence class.
UserValue *getNext() const { return next; }
- /// match - Does this UserValue match the parameters?
+ /// Does this UserValue match the parameters?
bool match(const DILocalVariable *Var, const DIExpression *Expr,
const DILocation *IA) const {
// FIXME: The fragment should be part of the equivalence class, but not
@@ -204,7 +208,7 @@ public:
return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
}
- /// merge - Merge equivalence classes.
+ /// Merge equivalence classes.
static UserValue *merge(UserValue *L1, UserValue *L2) {
L2 = L2->getLeader();
if (!L1)
@@ -256,10 +260,10 @@ public:
return locations.size() - 1;
}
- /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+ /// Ensure that all virtual register locations are mapped.
void mapVirtRegs(LDVImpl *LDV);
- /// addDef - Add a definition point to this value.
+ /// Add a definition point to this value.
void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
// Add a singular (Idx,Idx) -> Loc mapping.
@@ -271,63 +275,71 @@ public:
I.setValue(Loc);
}
- /// extendDef - Extend the current definition as far as possible down.
+ /// Extend the current definition as far as possible down.
+ ///
/// Stop when meeting an existing def or when leaving the live
- /// range of VNI.
- /// End points where VNI is no longer live are added to Kills.
- /// @param Idx Starting point for the definition.
- /// @param Loc Location number to propagate.
- /// @param LR Restrict liveness to where LR has the value VNI. May be null.
- /// @param VNI When LR is not null, this is the value to restrict to.
- /// @param Kills Append end points of VNI's live range to Kills.
- /// @param LIS Live intervals analysis.
+ /// range of VNI. End points where VNI is no longer live are added to Kills.
+ ///
+ /// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
+ /// data-flow analysis to propagate them beyond basic block boundaries.
+ ///
+ /// \param Idx Starting point for the definition.
+ /// \param Loc Location number to propagate.
+ /// \param LR Restrict liveness to where LR has the value VNI. May be null.
+ /// \param VNI When LR is not null, this is the value to restrict to.
+ /// \param [out] Kills Append end points of VNI's live range to Kills.
+ /// \param LIS Live intervals analysis.
void extendDef(SlotIndex Idx, DbgValueLocation Loc,
LiveRange *LR, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS);
- /// addDefsFromCopies - The value in LI/LocNo may be copies to other
- /// registers. Determine if any of the copies are available at the kill
- /// points, and add defs if possible.
- /// @param LI Scan for copies of the value in LI->reg.
- /// @param LocNo Location number of LI->reg.
- /// @param WasIndirect Indicates if the original use of LI->reg was indirect
- /// @param Kills Points where the range of LocNo could be extended.
- /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+ /// The value in LI/LocNo may be copies to other registers. Determine if
+ /// any of the copies are available at the kill points, and add defs if
+ /// possible.
+ ///
+ /// \param LI Scan for copies of the value in LI->reg.
+ /// \param LocNo Location number of LI->reg.
+ /// \param WasIndirect Indicates if the original use of LI->reg was indirect
+ /// \param Kills Points where the range of LocNo could be extended.
+ /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here.
void addDefsFromCopies(
LiveInterval *LI, unsigned LocNo, bool WasIndirect,
const SmallVectorImpl<SlotIndex> &Kills,
SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS);
- /// computeIntervals - Compute the live intervals of all locations after
- /// collecting all their def points.
+ /// Compute the live intervals of all locations after collecting all their
+ /// def points.
void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
LiveIntervals &LIS, LexicalScopes &LS);
- /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+ /// Replace OldReg ranges with NewRegs ranges where NewRegs is
/// live. Returns true if any changes were made.
bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
LiveIntervals &LIS);
- /// rewriteLocations - Rewrite virtual register locations according to the
- /// provided virtual register map. Record which locations were spilled.
- void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
- BitVector &SpilledLocations);
+ /// Rewrite virtual register locations according to the provided virtual
+ /// register map. Record the stack slot offsets for the locations that
+ /// were spilled.
+ void rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ SpillOffsetMap &SpillOffsets);
- /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ /// Recreate DBG_VALUE instruction from data structures.
void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
- const BitVector &SpilledLocations);
+ const SpillOffsetMap &SpillOffsets);
- /// getDebugLoc - Return DebugLoc of this UserValue.
+ /// Return DebugLoc of this UserValue.
DebugLoc getDebugLoc() { return dl;}
void print(raw_ostream &, const TargetRegisterInfo *);
};
-/// LDVImpl - Implementation of the LiveDebugVariables pass.
+/// Implementation of the LiveDebugVariables pass.
class LDVImpl {
LiveDebugVariables &pass;
LocMap::Allocator allocator;
@@ -341,7 +353,7 @@ class LDVImpl {
/// Whether the machine function is modified during the pass.
bool ModifiedMF = false;
- /// userValues - All allocated UserValue instances.
+ /// All allocated UserValue instances.
SmallVector<std::unique_ptr<UserValue>, 8> userValues;
/// Map virtual register to eq class leader.
@@ -352,27 +364,31 @@ class LDVImpl {
using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
UVMap userVarMap;
- /// getUserValue - Find or create a UserValue.
+ /// Find or create a UserValue.
UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
const DebugLoc &DL);
- /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ /// Find the EC leader for VirtReg or null.
UserValue *lookupVirtReg(unsigned VirtReg);
- /// handleDebugValue - Add DBG_VALUE instruction to our maps.
- /// @param MI DBG_VALUE instruction
- /// @param Idx Last valid SLotIndex before instruction.
- /// @return True if the DBG_VALUE instruction should be deleted.
+ /// Add DBG_VALUE instruction to our maps.
+ ///
+ /// \param MI DBG_VALUE instruction
+ /// \param Idx Last valid SLotIndex before instruction.
+ ///
+ /// \returns True if the DBG_VALUE instruction should be deleted.
bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
- /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
- /// a UserValue def for each instruction.
- /// @param mf MachineFunction to be scanned.
- /// @return True if any debug values were found.
+ /// Collect and erase all DBG_VALUE instructions, adding a UserValue def
+ /// for each instruction.
+ ///
+ /// \param mf MachineFunction to be scanned.
+ ///
+ /// \returns True if any debug values were found.
bool collectDebugValues(MachineFunction &mf);
- /// computeIntervals - Compute the live intervals of all user values after
- /// collecting all their def points.
+ /// Compute the live intervals of all user values after collecting all
+ /// their def points.
void computeIntervals();
public:
@@ -380,7 +396,7 @@ public:
bool runOnMachineFunction(MachineFunction &mf);
- /// clear - Release all memory.
+ /// Release all memory.
void clear() {
MF = nullptr;
userValues.clear();
@@ -393,13 +409,13 @@ public:
ModifiedMF = false;
}
- /// mapVirtReg - Map virtual register to an equivalence class.
+ /// Map virtual register to an equivalence class.
void mapVirtReg(unsigned VirtReg, UserValue *EC);
- /// splitRegister - Replace all references to OldReg with NewRegs.
+ /// Replace all references to OldReg with NewRegs.
void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
- /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ /// Recreate DBG_VALUE instruction from data structures.
void emitDebugValues(VirtRegMap *VRM);
void print(raw_ostream&);
@@ -578,30 +594,33 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
MachineBasicBlock *MBB = &*MFI;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
MBBI != MBBE;) {
- if (!MBBI->isDebugValue()) {
+ // Use the first debug instruction in the sequence to get a SlotIndex
+ // for following consecutive debug instructions.
+ if (!MBBI->isDebugInstr()) {
++MBBI;
continue;
}
- // DBG_VALUE has no slot index, use the previous instruction instead.
+ // Debug instructions has no slot index. Use the previous
+ // non-debug instruction's SlotIndex as its SlotIndex.
SlotIndex Idx =
MBBI == MBB->begin()
? LIS->getMBBStartIdx(MBB)
: LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot();
- // Handle consecutive DBG_VALUE instructions with the same slot index.
+ // Handle consecutive debug instructions with the same slot index.
do {
- if (handleDebugValue(*MBBI, Idx)) {
+ // Only handle DBG_VALUE in handleDebugValue(). Skip all other
+ // kinds of debug instructions.
+ if (MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) {
MBBI = MBB->erase(MBBI);
Changed = true;
} else
++MBBI;
- } while (MBBI != MBBE && MBBI->isDebugValue());
+ } while (MBBI != MBBE && MBBI->isDebugInstr());
}
}
return Changed;
}
-/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
-/// data-flow analysis to propagate them beyond basic block boundaries.
void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS) {
@@ -752,7 +771,15 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
}
SmallVector<SlotIndex, 16> Kills;
extendDef(Idx, Loc, LI, VNI, &Kills, LIS);
- if (LI)
+ // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that
+ // if the original location for example is %vreg0:sub_hi, and we find a
+ // full register copy in addDefsFromCopies (at the moment it only handles
+ // full register copies), then we must add the sub1 sub-register index to
+ // the new location. However, that is only possible if the new virtual
+ // register is of the same regclass (or if there is an equivalent
+ // sub-register in that regclass). For now, simply skip handling copies if
+ // a sub-register is involved.
+ if (LI && !LocMO.getSubReg())
addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
LIS);
continue;
@@ -1039,8 +1066,10 @@ splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) {
static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
}
-void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
- BitVector &SpilledLocations) {
+void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ SpillOffsetMap &SpillOffsets) {
// Build a set of new locations with new numbers so we can coalesce our
// IntervalMap if two vreg intervals collapse to the same physical location.
// Use MapVector instead of SetVector because MapVector::insert returns the
@@ -1049,10 +1078,11 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
// FIXME: This will be problematic if we ever support direct and indirect
// frame index locations, i.e. expressing both variables in memory and
// 'int x, *px = &x'. The "spilled" bit must become part of the location.
- MapVector<MachineOperand, bool> NewLocations;
+ MapVector<MachineOperand, std::pair<bool, unsigned>> NewLocations;
SmallVector<unsigned, 4> LocNoMap(locations.size());
for (unsigned I = 0, E = locations.size(); I != E; ++I) {
bool Spilled = false;
+ unsigned SpillOffset = 0;
MachineOperand Loc = locations[I];
// Only virtual registers are rewritten.
if (Loc.isReg() && Loc.getReg() &&
@@ -1065,7 +1095,16 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
// non-existent sub-register, and %noreg is exactly what we want.
Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
} else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
- // FIXME: Translate SubIdx to a stackslot offset.
+ // Retrieve the stack slot offset.
+ unsigned SpillSize;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass *TRC = MRI.getRegClass(VirtReg);
+ bool Success = TII.getStackSlotRange(TRC, Loc.getSubReg(), SpillSize,
+ SpillOffset, MF);
+
+ // FIXME: Invalidate the location if the offset couldn't be calculated.
+ (void)Success;
+
Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
Spilled = true;
} else {
@@ -1076,20 +1115,22 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
// Insert this location if it doesn't already exist and record a mapping
// from the old number to the new number.
- auto InsertResult = NewLocations.insert({Loc, Spilled});
+ auto InsertResult = NewLocations.insert({Loc, {Spilled, SpillOffset}});
unsigned NewLocNo = std::distance(NewLocations.begin(), InsertResult.first);
LocNoMap[I] = NewLocNo;
}
- // Rewrite the locations and record which ones were spill slots.
+ // Rewrite the locations and record the stack slot offsets for spills.
locations.clear();
- SpilledLocations.clear();
- SpilledLocations.resize(NewLocations.size());
+ SpillOffsets.clear();
for (auto &Pair : NewLocations) {
+ bool Spilled;
+ unsigned SpillOffset;
+ std::tie(Spilled, SpillOffset) = Pair.second;
locations.push_back(Pair.first);
- if (Pair.second) {
+ if (Spilled) {
unsigned NewLocNo = std::distance(&*NewLocations.begin(), &Pair);
- SpilledLocations.set(NewLocNo);
+ SpillOffsets[NewLocNo] = SpillOffset;
}
}
@@ -1158,10 +1199,9 @@ findNextInsertLocation(MachineBasicBlock *MBB,
}
void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
- SlotIndex StopIdx,
- DbgValueLocation Loc, bool Spilled,
- LiveIntervals &LIS,
- const TargetInstrInfo &TII,
+ SlotIndex StopIdx, DbgValueLocation Loc,
+ bool Spilled, unsigned SpillOffset,
+ LiveIntervals &LIS, const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI) {
SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB);
// Only search within the current MBB.
@@ -1184,12 +1224,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// If the location was spilled, the new DBG_VALUE will be indirect. If the
// original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate
- // that the original virtual register was a pointer.
+ // that the original virtual register was a pointer. Also, add the stack slot
+ // offset for the spilled register to the expression.
const DIExpression *Expr = Expression;
bool IsIndirect = Loc.wasIndirect();
if (Spilled) {
- if (IsIndirect)
- Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+ auto Deref = IsIndirect ? DIExpression::WithDeref : DIExpression::NoDeref;
+ Expr =
+ DIExpression::prepend(Expr, DIExpression::NoDeref, SpillOffset, Deref);
IsIndirect = true;
}
@@ -1208,14 +1250,17 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
- const BitVector &SpilledLocations) {
+ const SpillOffsetMap &SpillOffsets) {
MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
SlotIndex Start = I.start();
SlotIndex Stop = I.stop();
DbgValueLocation Loc = I.value();
- bool Spilled = !Loc.isUndef() ? SpilledLocations.test(Loc.locNo()) : false;
+ auto SpillIt =
+ !Loc.isUndef() ? SpillOffsets.find(Loc.locNo()) : SpillOffsets.end();
+ bool Spilled = SpillIt != SpillOffsets.end();
+ unsigned SpillOffset = Spilled ? SpillIt->second : 0;
// If the interval start was trimmed to the lexical scope insert the
// DBG_VALUE at the previous index (otherwise it appears after the
@@ -1228,7 +1273,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
+ insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII,
+ TRI);
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
while (Stop > MBBEnd) {
@@ -1238,7 +1284,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
break;
MBBEnd = LIS.getMBBEndIdx(&*MBB);
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
+ insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII,
+ TRI);
}
LLVM_DEBUG(dbgs() << '\n');
if (MBB == MFEnd)
@@ -1253,11 +1300,11 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
if (!MF)
return;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- BitVector SpilledLocations;
+ SpillOffsetMap SpillOffsets;
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
LLVM_DEBUG(userValues[i]->print(dbgs(), TRI));
- userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations);
- userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations);
+ userValues[i]->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
}
EmitDone = true;
}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
index aa35880b063a..0060399c2b04 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -39,13 +39,6 @@ public:
LiveDebugVariables();
~LiveDebugVariables() override;
- /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
- /// @param OldReg Old virtual register that is going away.
- /// @param NewReg New register holding the user variables.
- /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
- /// register.
- void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
-
/// splitRegister - Move any user variables in OldReg to the live ranges in
/// NewRegs where they are live. Mark the values as unavailable where no new
/// register is live.
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 83dd982587c6..2340b6abd87c 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -1310,17 +1310,17 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
MachineOperand &MO = *RI;
MachineInstr *MI = RI->getParent();
++RI;
- // DBG_VALUE instructions don't have slot indexes, so get the index of the
- // instruction before them.
- // Normally, DBG_VALUE instructions are removed before this function is
- // called, but it is not a requirement.
- SlotIndex Idx;
- if (MI->isDebugValue())
- Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
- else
- Idx = LIS.getInstructionIndex(*MI);
- LiveQueryResult LRQ = LI.Query(Idx);
- const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ const VNInfo *VNI;
+ if (MI->isDebugValue()) {
+ // DBG_VALUE instructions don't have slot indexes, so get the index of
+ // the instruction before them. The value is defined there too.
+ SlotIndex Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
+ VNI = LI.Query(Idx).valueOut();
+ } else {
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
+ LiveQueryResult LRQ = LI.Query(Idx);
+ VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ }
// In the case of an <undef> use that isn't tied to any def, VNI will be
// NULL. If the use is tied to a def, VNI will be the defined value.
if (!VNI)
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index 86c6c8e29f9a..619643acb6d3 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -29,8 +29,8 @@ using namespace llvm;
/// The clobbers set will be the list of live registers clobbered
/// by the regmask.
void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
- SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) {
- SparseSet<unsigned>::iterator LRI = LiveRegs.begin();
+ SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> *Clobbers) {
+ RegisterSet::iterator LRI = LiveRegs.begin();
while (LRI != LiveRegs.end()) {
if (MO.clobbersPhysReg(*LRI)) {
if (Clobbers)
@@ -83,7 +83,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
/// on accurate kill flags. If possible use stepBackward() instead of this
/// function.
void LivePhysRegs::stepForward(const MachineInstr &MI,
- SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
+ SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) {
// Remove killed registers from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg() && !O->isDebug()) {
@@ -142,7 +142,7 @@ LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
#endif
bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
- unsigned Reg) const {
+ MCPhysReg Reg) const {
if (LiveRegs.count(Reg))
return false;
if (MRI.isReserved(Reg))
@@ -157,7 +157,7 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
/// Add live-in registers of basic block \p MBB to \p LiveRegs.
void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
for (const auto &LI : MBB.liveins()) {
- unsigned Reg = LI.PhysReg;
+ MCPhysReg Reg = LI.PhysReg;
LaneBitmask Mask = LI.LaneMask;
MCSubRegIndexIterator S(Reg, TRI);
assert(Mask.any() && "Invalid livein mask");
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 04324943dfad..70e135ab1aff 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -364,7 +364,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
#ifndef NDEBUG
if (MBB->pred_empty()) {
MBB->getParent()->verify();
- errs() << "Use of " << printReg(PhysReg)
+ errs() << "Use of " << printReg(PhysReg, MRI->getTargetRegisterInfo())
<< " does not have a corresponding definition on every path:\n";
const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
if (MI != nullptr)
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index f90ce0c8cd2a..795028e97929 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -328,7 +328,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Sort the frame references by local offset.
// Use frame index as a tie-breaker in case MI's have the same offset.
- llvm::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+ llvm::sort(FrameReferenceInsns);
MachineBasicBlock *Entry = &Fn.front();
diff --git a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index fa43d13b1b85..f17c23619ed5 100644
--- a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -134,10 +134,10 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions,
StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
}
- llvm::sort(StringInstrMap.begin(), StringInstrMap.end(),
- [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
- return (a.first < b.first);
- });
+ llvm::sort(StringInstrMap,
+ [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
+ return (a.first < b.first);
+ });
for (auto &II : StringInstrMap) {
@@ -677,8 +677,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
std::vector<MachineInstr *> VisitedMIs;
- std::copy(Candidates.begin(), Candidates.end(),
- std::back_inserter(VisitedMIs));
+ llvm::copy(Candidates, std::back_inserter(VisitedMIs));
std::vector<TypedVReg> VRegs;
for (auto candidate : Candidates) {
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index da05c9a22785..265877c2f5b4 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -202,6 +202,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("contract", MIToken::kw_contract)
.Case("afn", MIToken::kw_afn)
.Case("reassoc", MIToken::kw_reassoc)
+ .Case("nuw" , MIToken::kw_nuw)
+ .Case("nsw" , MIToken::kw_nsw)
+ .Case("exact" , MIToken::kw_exact)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
@@ -217,6 +220,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("undefined", MIToken::kw_cfi_undefined)
.Case("register", MIToken::kw_cfi_register)
.Case("window_save", MIToken::kw_cfi_window_save)
+ .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state)
.Case("blockaddress", MIToken::kw_blockaddress)
.Case("intrinsic", MIToken::kw_intrinsic)
.Case("target-index", MIToken::kw_target_index)
@@ -245,6 +249,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("successors", MIToken::kw_successors)
.Case("floatpred", MIToken::kw_floatpred)
.Case("intpred", MIToken::kw_intpred)
+ .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
+ .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
+ .Case("unknown-size", MIToken::kw_unknown_size)
.Default(MIToken::Identifier);
}
@@ -460,6 +467,53 @@ static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
ErrorCallback);
}
+static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "<mcsymbol ";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ auto Start = C;
+ C.advance(Rule.size());
+
+ // Try a simple unquoted name.
+ if (C.peek() != '"') {
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ StringRef String = Start.upto(C).drop_front(Rule.size());
+ if (C.peek() != '>') {
+ ErrorCallback(C.location(),
+ "expected the '<mcsymbol ...' to be closed by a '>'");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ C.advance();
+
+ Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
+ return C;
+ }
+
+ // Otherwise lex out a quoted name.
+ Cursor R = lexStringConstant(C, ErrorCallback);
+ if (!R) {
+ ErrorCallback(C.location(),
+ "unable to parse quoted string from opening quote");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ StringRef String = Start.upto(R).drop_front(Rule.size());
+ if (R.peek() != '>') {
+ ErrorCallback(R.location(),
+ "expected the '<mcsymbol ...' to be closed by a '>'");
+ Token.reset(MIToken::Error, Start.remaining());
+ return Start;
+ }
+ R.advance();
+
+ Token.reset(MIToken::MCSymbol, Start.upto(R))
+ .setOwnedStringValue(unescapeQuotedString(String));
+ return R;
+}
+
static bool isValidHexFloatingPointPrefix(char C) {
return C == 'H' || C == 'K' || C == 'L' || C == 'M';
}
@@ -523,6 +577,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
.Case("!noalias", MIToken::md_noalias)
.Case("!range", MIToken::md_range)
.Case("!DIExpression", MIToken::md_diexpr)
+ .Case("!DILocation", MIToken::md_dilocation)
.Default(MIToken::Error);
}
@@ -657,6 +712,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return R.remaining();
if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
return R.remaining();
+ if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
+ return R.remaining();
if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
return R.remaining();
if (Cursor R = maybeLexNumericalLiteral(C, Token))
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index e21c71532f79..ceff79087d81 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -71,6 +71,9 @@ struct MIToken {
kw_contract,
kw_afn,
kw_reassoc,
+ kw_nuw,
+ kw_nsw,
+ kw_exact,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
@@ -86,6 +89,7 @@ struct MIToken {
kw_cfi_restore_state,
kw_cfi_undefined,
kw_cfi_window_save,
+ kw_cfi_aarch64_negate_ra_sign_state,
kw_blockaddress,
kw_intrinsic,
kw_target_index,
@@ -113,6 +117,9 @@ struct MIToken {
kw_successors,
kw_floatpred,
kw_intpred,
+ kw_pre_instr_symbol,
+ kw_post_instr_symbol,
+ kw_unknown_size,
// Named metadata keywords
md_tbaa,
@@ -120,6 +127,7 @@ struct MIToken {
md_noalias,
md_range,
md_diexpr,
+ md_dilocation,
// Identifier tokens
Identifier,
@@ -132,6 +140,7 @@ struct MIToken {
NamedGlobalValue,
GlobalValue,
ExternalSymbol,
+ MCSymbol,
// Other tokens
IntegerLiteral,
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index a61e7872f1ae..6f2d8bb53ac8 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/MIRPrinter.h"
@@ -54,6 +55,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -221,8 +223,10 @@ public:
bool parseSubRegisterIndexOperand(MachineOperand &Dest);
bool parseJumpTableIndexOperand(MachineOperand &Dest);
bool parseExternalSymbolOperand(MachineOperand &Dest);
+ bool parseMCSymbolOperand(MachineOperand &Dest);
bool parseMDNode(MDNode *&Node);
bool parseDIExpression(MDNode *&Expr);
+ bool parseDILocation(MDNode *&Expr);
bool parseMetadataOperand(MachineOperand &Dest);
bool parseCFIOffset(int &Offset);
bool parseCFIRegister(unsigned &Reg);
@@ -250,6 +254,7 @@ public:
bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID);
bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
+ bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
private:
/// Convert the integer literal in the current token into an unsigned integer.
@@ -346,6 +351,9 @@ private:
/// Return true if the name isn't a name of a target MMO flag.
bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
+ /// Get or create an MCSymbol for a given name.
+ MCSymbol *getOrCreateMCSymbol(StringRef Name);
+
/// parseStringConstant
/// ::= StringConstant
bool parseStringConstant(std::string &Result);
@@ -737,12 +745,16 @@ bool MIParser::parse(MachineInstr *&MI) {
return true;
// Parse the remaining machine operands.
- while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
+ while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
+ Token.isNot(MIToken::kw_post_instr_symbol) &&
+ Token.isNot(MIToken::kw_debug_location) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
Optional<unsigned> TiedDefIdx;
if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
return true;
+ if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg())
+ MO.setIsDebug();
Operands.push_back(
ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
@@ -753,14 +765,29 @@ bool MIParser::parse(MachineInstr *&MI) {
lex();
}
+ MCSymbol *PreInstrSymbol = nullptr;
+ if (Token.is(MIToken::kw_pre_instr_symbol))
+ if (parsePreOrPostInstrSymbol(PreInstrSymbol))
+ return true;
+ MCSymbol *PostInstrSymbol = nullptr;
+ if (Token.is(MIToken::kw_post_instr_symbol))
+ if (parsePreOrPostInstrSymbol(PostInstrSymbol))
+ return true;
+
DebugLoc DebugLocation;
if (Token.is(MIToken::kw_debug_location)) {
lex();
- if (Token.isNot(MIToken::exclaim))
- return error("expected a metadata node after 'debug-location'");
MDNode *Node = nullptr;
- if (parseMDNode(Node))
- return true;
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(Node))
+ return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(Node))
+ return true;
+ } else
+ return error("expected a metadata node after 'debug-location'");
+ if (!isa<DILocation>(Node))
+ return error("referenced metadata is not a DILocation");
DebugLocation = DebugLoc(Node);
}
@@ -795,12 +822,12 @@ bool MIParser::parse(MachineInstr *&MI) {
MI->addOperand(MF, Operand.Operand);
if (assignRegisterTies(*MI, Operands))
return true;
- if (MemOperands.empty())
- return false;
- MachineInstr::mmo_iterator MemRefs =
- MF.allocateMemRefsArray(MemOperands.size());
- std::copy(MemOperands.begin(), MemOperands.end(), MemRefs);
- MI->setMemRefs(MemRefs, MemRefs + MemOperands.size());
+ if (PreInstrSymbol)
+ MI->setPreInstrSymbol(MF, PreInstrSymbol);
+ if (PostInstrSymbol)
+ MI->setPostInstrSymbol(MF, PostInstrSymbol);
+ if (!MemOperands.empty())
+ MI->setMemRefs(MF, MemOperands);
return false;
}
@@ -876,6 +903,9 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
} else if (Token.is(MIToken::md_diexpr)) {
if (parseDIExpression(Node))
return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(Node))
+ return true;
} else
return error("expected a metadata node");
if (Token.isNot(MIToken::Eof))
@@ -945,7 +975,10 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_arcp) ||
Token.is(MIToken::kw_contract) ||
Token.is(MIToken::kw_afn) ||
- Token.is(MIToken::kw_reassoc)) {
+ Token.is(MIToken::kw_reassoc) ||
+ Token.is(MIToken::kw_nuw) ||
+ Token.is(MIToken::kw_nsw) ||
+ Token.is(MIToken::kw_exact)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -965,6 +998,12 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::FmAfn;
if (Token.is(MIToken::kw_reassoc))
Flags |= MachineInstr::FmReassoc;
+ if (Token.is(MIToken::kw_nuw))
+ Flags |= MachineInstr::NoUWrap;
+ if (Token.is(MIToken::kw_nsw))
+ Flags |= MachineInstr::NoSWrap;
+ if (Token.is(MIToken::kw_exact))
+ Flags |= MachineInstr::IsExact;
lex();
}
@@ -1573,6 +1612,16 @@ bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::MCSymbol));
+ MCSymbol *Symbol = getOrCreateMCSymbol(Token.stringValue());
+ lex();
+ Dest = MachineOperand::CreateMCSymbol(Symbol);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::SubRegisterIndex));
StringRef Name = Token.stringValue();
@@ -1643,6 +1692,109 @@ bool MIParser::parseDIExpression(MDNode *&Expr) {
return false;
}
+bool MIParser::parseDILocation(MDNode *&Loc) {
+ assert(Token.is(MIToken::md_dilocation));
+ lex();
+
+ bool HaveLine = false;
+ unsigned Line = 0;
+ unsigned Column = 0;
+ MDNode *Scope = nullptr;
+ MDNode *InlinedAt = nullptr;
+ bool ImplicitCode = false;
+
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ if (Token.isNot(MIToken::rparen)) {
+ do {
+ if (Token.is(MIToken::Identifier)) {
+ if (Token.stringValue() == "line") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNot(MIToken::IntegerLiteral) ||
+ Token.integerValue().isSigned())
+ return error("expected unsigned integer");
+ Line = Token.integerValue().getZExtValue();
+ HaveLine = true;
+ lex();
+ continue;
+ }
+ if (Token.stringValue() == "column") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNot(MIToken::IntegerLiteral) ||
+ Token.integerValue().isSigned())
+ return error("expected unsigned integer");
+ Column = Token.integerValue().getZExtValue();
+ lex();
+ continue;
+ }
+ if (Token.stringValue() == "scope") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (parseMDNode(Scope))
+ return error("expected metadata node");
+ if (!isa<DIScope>(Scope))
+ return error("expected DIScope node");
+ continue;
+ }
+ if (Token.stringValue() == "inlinedAt") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(InlinedAt))
+ return true;
+ } else if (Token.is(MIToken::md_dilocation)) {
+ if (parseDILocation(InlinedAt))
+ return true;
+ } else
+ return error("expected metadata node");
+ if (!isa<DILocation>(InlinedAt))
+ return error("expected DILocation node");
+ continue;
+ }
+ if (Token.stringValue() == "isImplicitCode") {
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (!Token.is(MIToken::Identifier))
+ return error("expected true/false");
+ // As far as I can see, we don't have any existing need for parsing
+ // true/false in MIR yet. Do it ad-hoc until there's something else
+ // that needs it.
+ if (Token.stringValue() == "true")
+ ImplicitCode = true;
+ else if (Token.stringValue() == "false")
+ ImplicitCode = false;
+ else
+ return error("expected true/false");
+ lex();
+ continue;
+ }
+ }
+ return error(Twine("invalid DILocation argument '") +
+ Token.stringValue() + "'");
+ } while (consumeIfPresent(MIToken::comma));
+ }
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ if (!HaveLine)
+ return error("DILocation requires line number");
+ if (!Scope)
+ return error("DILocation requires a scope");
+
+ Loc = DILocation::get(MF.getFunction().getContext(), Line, Column, Scope,
+ InlinedAt, ImplicitCode);
+ return false;
+}
+
bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
MDNode *Node = nullptr;
if (Token.is(MIToken::exclaim)) {
@@ -1779,6 +1931,9 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
case MIToken::kw_cfi_window_save:
CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
break;
+ case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ break;
case MIToken::kw_cfi_escape: {
std::string Values;
if (parseCFIEscapeValues(Values))
@@ -2050,6 +2205,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
return parseJumpTableIndexOperand(Dest);
case MIToken::ExternalSymbol:
return parseExternalSymbolOperand(Dest);
+ case MIToken::MCSymbol:
+ return parseMCSymbolOperand(Dest);
case MIToken::SubRegisterIndex:
return parseSubRegisterIndexOperand(Dest);
case MIToken::md_diexpr:
@@ -2069,6 +2226,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
case MIToken::kw_cfi_restore_state:
case MIToken::kw_cfi_undefined:
case MIToken::kw_cfi_window_save:
+ case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
return parseCFIOperand(Dest);
case MIToken::kw_blockaddress:
return parseBlockAddressOperand(Dest);
@@ -2423,7 +2581,7 @@ bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) {
return false;
}
- return error("expected an atomic scope, ordering or a size integer literal");
+ return error("expected an atomic scope, ordering or a size specification");
}
bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
@@ -2462,11 +2620,17 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseOptionalAtomicOrdering(FailureOrder))
return true;
- if (Token.isNot(MIToken::IntegerLiteral))
- return error("expected the size integer literal after memory operation");
+ if (Token.isNot(MIToken::IntegerLiteral) &&
+ Token.isNot(MIToken::kw_unknown_size))
+ return error("expected the size integer literal or 'unknown-size' after "
+ "memory operation");
uint64_t Size;
- if (getUint64(Size))
- return true;
+ if (Token.is(MIToken::IntegerLiteral)) {
+ if (getUint64(Size))
+ return true;
+ } else if (Token.is(MIToken::kw_unknown_size)) {
+ Size = MemoryLocation::UnknownSize;
+ }
lex();
MachinePointerInfo Ptr = MachinePointerInfo();
@@ -2483,7 +2647,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseMachinePointerInfo(Ptr))
return true;
}
- unsigned BaseAlignment = Size;
+ unsigned BaseAlignment = (Size != MemoryLocation::UnknownSize ? Size : 1);
AAMDNodes AAInfo;
MDNode *Range = nullptr;
while (consumeIfPresent(MIToken::comma)) {
@@ -2529,6 +2693,24 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
return false;
}
+bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
+ assert((Token.is(MIToken::kw_pre_instr_symbol) ||
+ Token.is(MIToken::kw_post_instr_symbol)) &&
+ "Invalid token for a pre- post-instruction symbol!");
+ lex();
+ if (Token.isNot(MIToken::MCSymbol))
+ return error("expected a symbol after 'pre-instr-symbol'");
+ Symbol = getOrCreateMCSymbol(Token.stringValue());
+ lex();
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ return false;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ return false;
+}
+
void MIParser::initNames2InstrOpCodes() {
if (!Names2InstrOpCodes.empty())
return;
@@ -2759,6 +2941,15 @@ bool MIParser::getMMOTargetFlag(StringRef Name,
return false;
}
+MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
+ // FIXME: Currently we can't recognize temporary or local symbols and call all
+ // of the appropriate forms to create them. However, this handles basic cases
+ // well as most of the special aspects are recognized by a prefix on their
+ // name, and the input names should already be unique. For test cases, keeping
+ // the symbol name out of the symbol table isn't terribly important.
+ return MF.getContext().getOrCreateSymbol(Name);
+}
+
bool MIParser::parseStringConstant(std::string &Result) {
if (Token.isNot(MIToken::StringConstant))
return error("expected string constant");
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 3d2db97acb48..00da92a92ec6 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -355,6 +355,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (YamlMF.Alignment)
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
+ MF.setHasWinCFI(YamlMF.HasWinCFI);
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
@@ -580,6 +581,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
MFI.setHasCalls(YamlMFI.HasCalls);
if (YamlMFI.MaxCallFrameSize != ~0u)
MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+ MFI.setCVBytesOfCalleeSavedRegisters(YamlMFI.CVBytesOfCalleeSavedRegisters);
MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
MFI.setHasVAStart(YamlMFI.HasVAStart);
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index bf8cd1489ec5..d9dcc428943f 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -50,6 +50,7 @@
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -195,6 +196,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.Name = MF.getName();
YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
+ YamlMF.HasWinCFI = MF.hasWinCFI();
YamlMF.Legalized = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Legalized);
@@ -327,6 +329,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
YamlMFI.HasCalls = MFI.hasCalls();
YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed()
? MFI.getMaxCallFrameSize() : ~0u;
+ YamlMFI.CVBytesOfCalleeSavedRegisters =
+ MFI.getCVBytesOfCalleeSavedRegisters();
YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
YamlMFI.HasVAStart = MFI.hasVAStart();
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
@@ -397,18 +401,20 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
yaml::StringValue Reg;
printRegMIR(CSInfo.getReg(), Reg, TRI);
- auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
- assert(StackObjectInfo != StackObjectOperandMapping.end() &&
- "Invalid stack object index");
- const FrameIndexOperand &StackObject = StackObjectInfo->second;
- if (StackObject.IsFixed) {
- YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
- CSInfo.isRestored();
- } else {
- YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
- CSInfo.isRestored();
+ if (!CSInfo.isSpilledToReg()) {
+ auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ if (StackObject.IsFixed) {
+ YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
+ CSInfo.isRestored();
+ } else {
+ YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
+ CSInfo.isRestored();
+ }
}
}
for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
@@ -694,6 +700,12 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "afn ";
if (MI.getFlag(MachineInstr::FmReassoc))
OS << "reassoc ";
+ if (MI.getFlag(MachineInstr::NoUWrap))
+ OS << "nuw ";
+ if (MI.getFlag(MachineInstr::NoSWrap))
+ OS << "nsw ";
+ if (MI.getFlag(MachineInstr::IsExact))
+ OS << "exact ";
OS << TII->getName(MI.getOpcode());
if (I < E)
@@ -708,6 +720,23 @@ void MIPrinter::print(const MachineInstr &MI) {
NeedComma = true;
}
+ // Print any optional symbols attached to this instruction as-if they were
+ // operands.
+ if (MCSymbol *PreInstrSymbol = MI.getPreInstrSymbol()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " pre-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PreInstrSymbol);
+ NeedComma = true;
+ }
+ if (MCSymbol *PostInstrSymbol = MI.getPostInstrSymbol()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " post-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PostInstrSymbol);
+ NeedComma = true;
+ }
+
if (const DebugLoc &DL = MI.getDebugLoc()) {
if (NeedComma)
OS << ',';
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 38e8369dc739..03771bc5dae1 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -110,6 +110,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
// use/def lists.
MachineFunction *MF = Parent->getParent();
N->AddRegOperandsToUseLists(MF->getRegInfo());
+ MF->handleInsertion(*N);
}
/// When we remove an instruction from a basic block list, we update its parent
@@ -118,8 +119,10 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() && "machine instruction not in a basic block");
// Remove from the use/def lists.
- if (MachineFunction *MF = N->getMF())
+ if (MachineFunction *MF = N->getMF()) {
+ MF->handleRemoval(*N);
N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
+ }
N->setParent(nullptr);
}
@@ -359,7 +362,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Print human readable probabilities as comments.
OS << "; ";
for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
- const BranchProbability &BP = *getProbabilityIterator(I);
+ const BranchProbability &BP = getSuccProbability(I);
if (I != succ_begin())
OS << ", ";
OS << printMBBReference(**I) << '('
@@ -458,7 +461,7 @@ bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
}
void MachineBasicBlock::sortUniqueLiveIns() {
- llvm::sort(LiveIns.begin(), LiveIns.end(),
+ llvm::sort(LiveIns,
[](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
return LI0.PhysReg < LI1.PhysReg;
});
@@ -1375,13 +1378,53 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
unsigned Neighborhood) const {
unsigned N = Neighborhood;
- // Start by searching backwards from Before, looking for kills, reads or defs.
+ // Try searching forwards from Before, looking for reads or defs.
const_iterator I(Before);
+ for (; I != end() && N > 0; ++I) {
+ if (I->isDebugInstr())
+ continue;
+
+ --N;
+
+ MachineOperandIteratorBase::PhysRegInfo Info =
+ ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+
+ // Register is live when we read it here.
+ if (Info.Read)
+ return LQR_Live;
+ // Register is dead if we can fully overwrite or clobber it here.
+ if (Info.FullyDefined || Info.Clobbered)
+ return LQR_Dead;
+ }
+
+ // If we reached the end, it is safe to clobber Reg at the end of a block of
+ // no successor has it live in.
+ if (I == end()) {
+ for (MachineBasicBlock *S : successors()) {
+ for (const MachineBasicBlock::RegisterMaskPair &LI : S->liveins()) {
+ if (TRI->regsOverlap(LI.PhysReg, Reg))
+ return LQR_Live;
+ }
+ }
+
+ return LQR_Dead;
+ }
+
+
+ N = Neighborhood;
+
+ // Start by searching backwards from Before, looking for kills, reads or defs.
+ I = const_iterator(Before);
// If this is the first insn in the block, don't search backwards.
if (I != begin()) {
do {
--I;
+ if (I->isDebugInstr())
+ continue;
+
+ --N;
+
MachineOperandIteratorBase::PhysRegInfo Info =
ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
@@ -1406,39 +1449,20 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
// Register must be live if we read it.
if (Info.Read)
return LQR_Live;
- } while (I != begin() && --N > 0);
+
+ } while (I != begin() && N > 0);
}
// Did we get to the start of the block?
if (I == begin()) {
// If so, the register's state is definitely defined by the live-in state.
- for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid();
- ++RAI)
- if (isLiveIn(*RAI))
+ for (const MachineBasicBlock::RegisterMaskPair &LI : liveins())
+ if (TRI->regsOverlap(LI.PhysReg, Reg))
return LQR_Live;
return LQR_Dead;
}
- N = Neighborhood;
-
- // Try searching forwards from Before, looking for reads or defs.
- I = const_iterator(Before);
- // If this is the last insn in the block, don't search forwards.
- if (I != end()) {
- for (++I; I != end() && N > 0; ++I, --N) {
- MachineOperandIteratorBase::PhysRegInfo Info =
- ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
-
- // Register is live when we read it here.
- if (Info.Read)
- return LQR_Live;
- // Register is dead if we can fully overwrite or clobber it here.
- if (Info.FullyDefined || Info.Clobbered)
- return LQR_Dead;
- }
- }
-
// At this point we have no idea of the liveness of the register.
return LQR_Unknown;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 21350df624e7..4fee9c4ea027 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -316,7 +316,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// A type for a block filter set.
using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
- /// Pair struct containing basic block and taildup profitiability
+ /// Pair struct containing basic block and taildup profitability
struct BlockAndTailDupResult {
MachineBasicBlock *BB;
bool ShouldTailDup;
@@ -2497,7 +2497,8 @@ void MachineBlockPlacement::alignBlocks() {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F->getFunction().optForSize())
+ if (F->getFunction().optForMinSize() ||
+ (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize()))
return;
BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 6c92b1d426d6..6ee8571c28aa 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -180,6 +180,10 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
continue;
LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
LLVM_DEBUG(dbgs() << "*** to: " << *MI);
+
+ // Update matching debug values.
+ DefMI->changeDebugValuesDefReg(SrcReg);
+
// Propagate SrcReg of copies to MI.
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
@@ -231,6 +235,21 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
return false;
}
+static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
+ const MachineFunction &MF,
+ const TargetRegisterInfo &TRI) {
+ // MachineRegisterInfo::isConstantPhysReg directly called by
+ // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
+ // reserved registers to be frozen. That doesn't cause a problem post-ISel as
+ // most (if not all) targets freeze reserved registers right after ISel.
+ //
+ // It does cause issues mid-GlobalISel, however, hence the additional
+ // reservedRegsFrozen check.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return TRI.isCallerPreservedPhysReg(Reg, MF) ||
+ (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
+}
+
/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
/// physical registers (except for dead defs of physical registers). It also
/// returns the physical register def by reference if it's the only one and the
@@ -250,7 +269,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
// Reading either caller preserved or constant physregs is ok.
- if (!MRI->isCallerPreservedOrConstPhysReg(Reg))
+ if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
}
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 0c6efff7bb40..f51b482e20e3 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -231,6 +231,8 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
// Get the first instruction that uses MO
MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
RI++;
+ if (RI == MRI->reg_end())
+ continue;
MachineInstr *UseMO = RI->getParent();
unsigned LatencyOp = 0;
if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 3bf8147a06c3..19879fe89007 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -74,58 +74,154 @@ DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
namespace {
-using RegList = SmallVector<unsigned, 4>;
-using SourceMap = DenseMap<unsigned, RegList>;
-using Reg2MIMap = DenseMap<unsigned, MachineInstr *>;
-
- class MachineCopyPropagation : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- const MachineRegisterInfo *MRI;
-
- public:
- static char ID; // Pass identification, replacement for typeid
+class CopyTracker {
+ struct CopyInfo {
+ MachineInstr *MI;
+ SmallVector<unsigned, 4> DefRegs;
+ bool Avail;
+ };
- MachineCopyPropagation() : MachineFunctionPass(ID) {
- initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ DenseMap<unsigned, CopyInfo> Copies;
+
+public:
+ /// Mark all of the given registers and their subregisters as unavailable for
+ /// copying.
+ void markRegsUnavailable(ArrayRef<unsigned> Regs,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned Reg : Regs) {
+ // Source of copy is no longer available for propagation.
+ for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+ auto CI = Copies.find(*RUI);
+ if (CI != Copies.end())
+ CI->second.Avail = false;
+ }
}
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
+ /// Clobber a single register, removing it from the tracker's copy maps.
+ void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+ for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+ auto I = Copies.find(*RUI);
+ if (I != Copies.end()) {
+ // When we clobber the source of a copy, we need to clobber everything
+ // it defined.
+ markRegsUnavailable(I->second.DefRegs, TRI);
+ // When we clobber the destination of a copy, we need to clobber the
+ // whole register it defined.
+ if (MachineInstr *MI = I->second.MI)
+ markRegsUnavailable({MI->getOperand(0).getReg()}, TRI);
+ // Now we can erase the copy.
+ Copies.erase(I);
+ }
}
+ }
+
+ /// Add this copy's registers into the tracker's copy maps.
+ void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
+ assert(MI->isCopy() && "Tracking non-copy?");
+
+ unsigned Def = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
- bool runOnMachineFunction(MachineFunction &MF) override;
+ // Remember Def is defined by the copy.
+ for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
+ Copies[*RUI] = {MI, {}, true};
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ for (MCRegUnitIterator RUI(Src, &TRI); RUI.isValid(); ++RUI) {
+ auto I = Copies.insert({*RUI, {nullptr, {}, false}});
+ auto &Copy = I.first->second;
+ if (!is_contained(Copy.DefRegs, Def))
+ Copy.DefRegs.push_back(Def);
}
+ }
+
+ bool hasAnyCopies() {
+ return !Copies.empty();
+ }
- private:
- void ClobberRegister(unsigned Reg);
- void ReadRegister(unsigned Reg);
- void CopyPropagateBlock(MachineBasicBlock &MBB);
- bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
- void forwardUses(MachineInstr &MI);
- bool isForwardableRegClassCopy(const MachineInstr &Copy,
- const MachineInstr &UseI, unsigned UseIdx);
- bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+ MachineInstr *findCopyForUnit(unsigned RegUnit, const TargetRegisterInfo &TRI,
+ bool MustBeAvailable = false) {
+ auto CI = Copies.find(RegUnit);
+ if (CI == Copies.end())
+ return nullptr;
+ if (MustBeAvailable && !CI->second.Avail)
+ return nullptr;
+ return CI->second.MI;
+ }
- /// Candidates for deletion.
- SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
+ MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
+ const TargetRegisterInfo &TRI) {
+ // We check the first RegUnit here, since we'll only be interested in the
+ // copy if it copies the entire register anyway.
+ MCRegUnitIterator RUI(Reg, &TRI);
+ MachineInstr *AvailCopy =
+ findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true);
+ if (!AvailCopy ||
+ !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg))
+ return nullptr;
+
+ // Check that the available copy isn't clobbered by any regmasks between
+ // itself and the destination.
+ unsigned AvailSrc = AvailCopy->getOperand(1).getReg();
+ unsigned AvailDef = AvailCopy->getOperand(0).getReg();
+ for (const MachineInstr &MI :
+ make_range(AvailCopy->getIterator(), DestCopy.getIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+ return nullptr;
+
+ return AvailCopy;
+ }
- /// Def -> available copies map.
- Reg2MIMap AvailCopyMap;
+ void clear() {
+ Copies.clear();
+ }
+};
- /// Def -> copies map.
- Reg2MIMap CopyMap;
+class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ const MachineRegisterInfo *MRI;
- /// Src -> Def map
- SourceMap SrcMap;
+public:
+ static char ID; // Pass identification, replacement for typeid
- bool Changed;
- };
+ MachineCopyPropagation() : MachineFunctionPass(ID) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ void ClobberRegister(unsigned Reg);
+ void ReadRegister(unsigned Reg);
+ void CopyPropagateBlock(MachineBasicBlock &MBB);
+ bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+ void forwardUses(MachineInstr &MI);
+ bool isForwardableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI, unsigned UseIdx);
+ bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+
+ /// Candidates for deletion.
+ SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
+
+ CopyTracker Tracker;
+
+ bool Changed;
+};
} // end anonymous namespace
@@ -136,54 +232,13 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
-/// Remove any entry in \p Map where the register is a subregister or equal to
-/// a register contained in \p Regs.
-static void removeRegsFromMap(Reg2MIMap &Map, const RegList &Regs,
- const TargetRegisterInfo &TRI) {
- for (unsigned Reg : Regs) {
- // Source of copy is no longer available for propagation.
- for (MCSubRegIterator SR(Reg, &TRI, true); SR.isValid(); ++SR)
- Map.erase(*SR);
- }
-}
-
-/// Remove any entry in \p Map that is marked clobbered in \p RegMask.
-/// The map will typically have a lot fewer entries than the regmask clobbers,
-/// so this is more efficient than iterating the clobbered registers and calling
-/// ClobberRegister() on them.
-static void removeClobberedRegsFromMap(Reg2MIMap &Map,
- const MachineOperand &RegMask) {
- for (Reg2MIMap::iterator I = Map.begin(), E = Map.end(), Next; I != E;
- I = Next) {
- Next = std::next(I);
- unsigned Reg = I->first;
- if (RegMask.clobbersPhysReg(Reg))
- Map.erase(I);
- }
-}
-
-void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- CopyMap.erase(*AI);
- AvailCopyMap.erase(*AI);
-
- SourceMap::iterator SI = SrcMap.find(*AI);
- if (SI != SrcMap.end()) {
- removeRegsFromMap(AvailCopyMap, SI->second, *TRI);
- SrcMap.erase(SI);
- }
- }
-}
-
void MachineCopyPropagation::ReadRegister(unsigned Reg) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination.
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- Reg2MIMap::iterator CI = CopyMap.find(*AI);
- if (CI != CopyMap.end()) {
- LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: ";
- CI->second->dump());
- MaybeDeadCopies.remove(CI->second);
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+ if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
+ MaybeDeadCopies.remove(Copy);
}
}
}
@@ -219,15 +274,14 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
return false;
// Search for an existing copy.
- Reg2MIMap::iterator CI = AvailCopyMap.find(Def);
- if (CI == AvailCopyMap.end())
+ MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI);
+ if (!PrevCopy)
return false;
// Check that the existing copy uses the correct sub registers.
- MachineInstr &PrevCopy = *CI->second;
- if (PrevCopy.getOperand(0).isDead())
+ if (PrevCopy->getOperand(0).isDead())
return false;
- if (!isNopCopy(PrevCopy, Src, Def, TRI))
+ if (!isNopCopy(*PrevCopy, Src, Def, TRI))
return false;
LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
@@ -238,7 +292,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
unsigned CopyDef = Copy.getOperand(0).getReg();
assert(CopyDef == Src || CopyDef == Def);
for (MachineInstr &MI :
- make_range(PrevCopy.getIterator(), Copy.getIterator()))
+ make_range(PrevCopy->getIterator(), Copy.getIterator()))
MI.clearRegisterKills(CopyDef, TRI);
Copy.eraseFromParent();
@@ -314,7 +368,7 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
/// Look for available copies whose destination register is used by \p MI and
/// replace the use in \p MI with the copy's source register.
void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
- if (AvailCopyMap.empty())
+ if (!Tracker.hasAnyCopies())
return;
// Look for non-tied explicit vreg uses that have an active COPY
@@ -341,13 +395,12 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!MOUse.isRenamable())
continue;
- auto CI = AvailCopyMap.find(MOUse.getReg());
- if (CI == AvailCopyMap.end())
+ MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg(), *TRI);
+ if (!Copy)
continue;
- MachineInstr &Copy = *CI->second;
- unsigned CopyDstReg = Copy.getOperand(0).getReg();
- const MachineOperand &CopySrc = Copy.getOperand(1);
+ unsigned CopyDstReg = Copy->getOperand(0).getReg();
+ const MachineOperand &CopySrc = Copy->getOperand(1);
unsigned CopySrcReg = CopySrc.getReg();
// FIXME: Don't handle partial uses of wider COPYs yet.
@@ -362,7 +415,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg))
continue;
- if (!isForwardableRegClassCopy(Copy, MI, OpIdx))
+ if (!isForwardableRegClassCopy(*Copy, MI, OpIdx))
continue;
if (hasImplicitOverlap(MI, MOUse))
@@ -376,7 +429,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
<< "\n with " << printReg(CopySrcReg, TRI)
- << "\n in " << MI << " from " << Copy);
+ << "\n in " << MI << " from " << *Copy);
MOUse.setReg(CopySrcReg);
if (!CopySrc.isRenamable())
@@ -386,7 +439,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
// Clear kill markers that may have been invalidated.
for (MachineInstr &KMI :
- make_range(Copy.getIterator(), std::next(MI.getIterator())))
+ make_range(Copy->getIterator(), std::next(MI.getIterator())))
KMI.clearRegisterKills(CopySrcReg, TRI);
++NumCopyForwards;
@@ -459,28 +512,17 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// %xmm2 = copy %xmm0
// ...
// %xmm2 = copy %xmm9
- ClobberRegister(Def);
+ Tracker.clobberRegister(Def, *TRI);
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- ClobberRegister(Reg);
+ Tracker.clobberRegister(Reg, *TRI);
}
- // Remember Def is defined by the copy.
- for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid();
- ++SR) {
- CopyMap[*SR] = MI;
- AvailCopyMap[*SR] = MI;
- }
-
- // Remember source that's copied to Def. Once it's clobbered, then
- // it's no longer available for copy propagation.
- RegList &DestList = SrcMap[Src];
- if (!is_contained(DestList, Def))
- DestList.push_back(Def);
+ Tracker.trackCopy(MI, *TRI);
continue;
}
@@ -494,7 +536,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// later.
if (MO.isTied())
ReadRegister(Reg);
- ClobberRegister(Reg);
+ Tracker.clobberRegister(Reg, *TRI);
}
forwardUses(*MI);
@@ -541,6 +583,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
MaybeDead->dump());
+ // Make sure we invalidate any entries in the copy maps before erasing
+ // the instruction.
+ Tracker.clobberRegister(Reg, *TRI);
+
// erase() will return the next valid iterator pointing to the next
// element after the erased one.
DI = MaybeDeadCopies.erase(DI);
@@ -548,22 +594,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
Changed = true;
++NumDeletes;
}
-
- removeClobberedRegsFromMap(AvailCopyMap, *RegMask);
- removeClobberedRegsFromMap(CopyMap, *RegMask);
- for (SourceMap::iterator I = SrcMap.begin(), E = SrcMap.end(), Next;
- I != E; I = Next) {
- Next = std::next(I);
- if (RegMask->clobbersPhysReg(I->first)) {
- removeRegsFromMap(AvailCopyMap, I->second, *TRI);
- SrcMap.erase(I);
- }
- }
}
// Any previous copy definition or reading the Defs is no longer available.
for (unsigned Reg : Defs)
- ClobberRegister(Reg);
+ Tracker.clobberRegister(Reg, *TRI);
}
// If MBB doesn't have successors, delete the copies whose defs are not used.
@@ -574,6 +609,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
+
+ // Update matching debug values.
+ assert(MaybeDead->isCopy());
+ MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg());
+
MaybeDead->eraseFromParent();
Changed = true;
++NumDeletes;
@@ -581,9 +621,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
MaybeDeadCopies.clear();
- AvailCopyMap.clear();
- CopyMap.clear();
- SrcMap.clear();
+ Tracker.clear();
}
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index dd668bcf6193..3495319670a5 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -99,6 +99,9 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
llvm_unreachable("Invalid machine function property");
}
+// Pin the vtable to this file.
+void MachineFunction::Delegate::anchor() {}
+
void MachineFunctionProperties::print(raw_ostream &OS) const {
const char *Separator = "";
for (BitVector::size_type I = 0; I < Properties.size(); ++I) {
@@ -127,7 +130,8 @@ static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
return STI->getFrameLowering()->getStackAlignment();
}
-MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target,
+MachineFunction::MachineFunction(const Function &F,
+ const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI,
unsigned FunctionNum, MachineModuleInfo &mmi)
: F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) {
@@ -135,6 +139,16 @@ MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target,
init();
}
+void MachineFunction::handleInsertion(MachineInstr &MI) {
+ if (TheDelegate)
+ TheDelegate->MF_HandleInsertion(MI);
+}
+
+void MachineFunction::handleRemoval(MachineInstr &MI) {
+ if (TheDelegate)
+ TheDelegate->MF_HandleRemoval(MI);
+}
+
void MachineFunction::init() {
// Assume the function starts in SSA form with correct liveness.
Properties.set(MachineFunctionProperties::Property::IsSSA);
@@ -233,6 +247,11 @@ void MachineFunction::clear() {
WinEHInfo->~WinEHFuncInfo();
Allocator.Deallocate(WinEHInfo);
}
+
+ if (WasmEHInfo) {
+ WasmEHInfo->~WasmEHFuncInfo();
+ Allocator.Deallocate(WasmEHInfo);
+ }
}
const DataLayout &MachineFunction::getDataLayout() const {
@@ -406,82 +425,17 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MMO->getOrdering(), MMO->getFailureOrdering());
}
-MachineInstr::mmo_iterator
-MachineFunction::allocateMemRefsArray(unsigned long Num) {
- return Allocator.Allocate<MachineMemOperand *>(Num);
-}
-
-std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
-MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
- MachineInstr::mmo_iterator End) {
- // Count the number of load mem refs.
- unsigned Num = 0;
- for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
- if ((*I)->isLoad())
- ++Num;
-
- // Allocate a new array and populate it with the load information.
- MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
- unsigned Index = 0;
- for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
- if ((*I)->isLoad()) {
- if (!(*I)->isStore())
- // Reuse the MMO.
- Result[Index] = *I;
- else {
- // Clone the MMO and unset the store flag.
- MachineMemOperand *JustLoad =
- getMachineMemOperand((*I)->getPointerInfo(),
- (*I)->getFlags() & ~MachineMemOperand::MOStore,
- (*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getAAInfo(), nullptr,
- (*I)->getSyncScopeID(), (*I)->getOrdering(),
- (*I)->getFailureOrdering());
- Result[Index] = JustLoad;
- }
- ++Index;
- }
- }
- return std::make_pair(Result, Result + Num);
-}
-
-std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
-MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
- MachineInstr::mmo_iterator End) {
- // Count the number of load mem refs.
- unsigned Num = 0;
- for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
- if ((*I)->isStore())
- ++Num;
-
- // Allocate a new array and populate it with the store information.
- MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
- unsigned Index = 0;
- for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
- if ((*I)->isStore()) {
- if (!(*I)->isLoad())
- // Reuse the MMO.
- Result[Index] = *I;
- else {
- // Clone the MMO and unset the load flag.
- MachineMemOperand *JustStore =
- getMachineMemOperand((*I)->getPointerInfo(),
- (*I)->getFlags() & ~MachineMemOperand::MOLoad,
- (*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getAAInfo(), nullptr,
- (*I)->getSyncScopeID(), (*I)->getOrdering(),
- (*I)->getFailureOrdering());
- Result[Index] = JustStore;
- }
- ++Index;
- }
- }
- return std::make_pair(Result, Result + Num);
+MachineInstr::ExtraInfo *
+MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
+ MCSymbol *PreInstrSymbol,
+ MCSymbol *PostInstrSymbol) {
+ return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
+ PostInstrSymbol);
}
const char *MachineFunction::createExternalSymbolName(StringRef Name) {
char *Dest = Allocator.Allocate<char>(Name.size() + 1);
- std::copy(Name.begin(), Name.end(), Dest);
+ llvm::copy(Name, Dest);
Dest[Name.size()] = 0;
return Dest;
}
@@ -678,6 +632,46 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
MCSymbol *LandingPadLabel = Ctx.createTempSymbol();
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
LP.LandingPadLabel = LandingPadLabel;
+
+ const Instruction *FirstI = LandingPad->getBasicBlock()->getFirstNonPHI();
+ if (const auto *LPI = dyn_cast<LandingPadInst>(FirstI)) {
+ if (const auto *PF =
+ dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()))
+ getMMI().addPersonality(PF);
+
+ if (LPI->isCleanup())
+ addCleanup(LandingPad);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100%
+ // correct, but we need to do it this way because of how the DWARF EH
+ // emitter processes the clauses.
+ for (unsigned I = LPI->getNumClauses(); I != 0; --I) {
+ Value *Val = LPI->getClause(I - 1);
+ if (LPI->isCatch(I - 1)) {
+ addCatchTypeInfo(LandingPad,
+ dyn_cast<GlobalValue>(Val->stripPointerCasts()));
+ } else {
+ // Add filters in a list.
+ auto *CVal = cast<Constant>(Val);
+ SmallVector<const GlobalValue *, 4> FilterList;
+ for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
+ II != IE; ++II)
+ FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+
+ addFilterTypeInfo(LandingPad, FilterList);
+ }
+ }
+
+ } else if (const auto *CPI = dyn_cast<CatchPadInst>(FirstI)) {
+ for (unsigned I = CPI->getNumArgOperands(); I != 0; --I) {
+ Value *TypeInfo = CPI->getArgOperand(I - 1)->stripPointerCasts();
+ addCatchTypeInfo(LandingPad, dyn_cast<GlobalValue>(TypeInfo));
+ }
+
+ } else {
+ assert(isa<CleanupPadInst>(FirstI) && "Invalid landingpad!");
+ }
+
return LandingPadLabel;
}
@@ -697,7 +691,8 @@ void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
}
-void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+void MachineFunction::tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap,
+ bool TidyIfNoBeginLabels) {
for (unsigned i = 0; i != LandingPads.size(); ) {
LandingPadInfo &LandingPad = LandingPads[i];
if (LandingPad.LandingPadLabel &&
@@ -712,24 +707,25 @@ void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
continue;
}
- for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
- MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
- MCSymbol *EndLabel = LandingPad.EndLabels[j];
- if ((BeginLabel->isDefined() ||
- (LPMap && (*LPMap)[BeginLabel] != 0)) &&
- (EndLabel->isDefined() ||
- (LPMap && (*LPMap)[EndLabel] != 0))) continue;
-
- LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
- LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
- --j;
- --e;
- }
+ if (TidyIfNoBeginLabels) {
+ for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+ MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad.EndLabels[j];
+ if ((BeginLabel->isDefined() || (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+ (EndLabel->isDefined() || (LPMap && (*LPMap)[EndLabel] != 0)))
+ continue;
+
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ --j;
+ --e;
+ }
- // Remove landing pads with no try-ranges.
- if (LandingPads[i].BeginLabels.empty()) {
- LandingPads.erase(LandingPads.begin() + i);
- continue;
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
}
// If there is no landing pad, ensure that the list of typeids is empty.
@@ -806,36 +802,6 @@ try_next:;
return FilterID;
}
-void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) {
- MachineFunction &MF = *MBB.getParent();
- if (const auto *PF = dyn_cast<Function>(
- I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
- MF.getMMI().addPersonality(PF);
-
- if (I.isCleanup())
- MF.addCleanup(&MBB);
-
- // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
- // but we need to do it this way because of how the DWARF EH emitter
- // processes the clauses.
- for (unsigned i = I.getNumClauses(); i != 0; --i) {
- Value *Val = I.getClause(i - 1);
- if (I.isCatch(i - 1)) {
- MF.addCatchTypeInfo(&MBB,
- dyn_cast<GlobalValue>(Val->stripPointerCasts()));
- } else {
- // Add filters in a list.
- Constant *CVal = cast<Constant>(Val);
- SmallVector<const GlobalValue *, 4> FilterList;
- for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
- II != IE; ++II)
- FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
-
- MF.addFilterTypeInfo(&MBB, FilterList);
- }
- }
-}
-
/// \}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 67ac95740e3e..5db4e299fa70 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -23,11 +23,13 @@
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
using namespace llvm;
+using namespace ore;
Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
const std::string &Banner) const {
@@ -57,9 +59,43 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
llvm_unreachable("MachineFunctionProperties check failed");
}
#endif
+ // Collect the MI count of the function before the pass.
+ unsigned CountBefore, CountAfter;
+
+ // Check if the user asked for size remarks.
+ bool ShouldEmitSizeRemarks =
+ F.getParent()->shouldEmitInstrCountChangedRemark();
+
+ // If we want size remarks, collect the number of MachineInstrs in our
+ // MachineFunction before the pass runs.
+ if (ShouldEmitSizeRemarks)
+ CountBefore = MF.getInstructionCount();
bool RV = runOnMachineFunction(MF);
+ if (ShouldEmitSizeRemarks) {
+ // We wanted size remarks. Check if there was a change to the number of
+ // MachineInstrs in the module. Emit a remark if there was a change.
+ CountAfter = MF.getInstructionCount();
+ if (CountBefore != CountAfter) {
+ MachineOptimizationRemarkEmitter MORE(MF, nullptr);
+ MORE.emit([&]() {
+ int64_t Delta = static_cast<int64_t>(CountAfter) -
+ static_cast<int64_t>(CountBefore);
+ MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
+ MF.getFunction().getSubprogram(),
+ &MF.front());
+ R << NV("Pass", getPassName())
+ << ": Function: " << NV("Function", F.getName()) << ": "
+ << "MI Instruction count changed from "
+ << NV("MIInstrsBefore", CountBefore) << " to "
+ << NV("MIInstrsAfter", CountAfter)
+ << "; Delta: " << NV("Delta", Delta);
+ return R;
+ });
+ }
+ }
+
MFProps.set(SetProperties);
MFProps.reset(ClearedProperties);
return RV;
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 55d9defced3a..9c96ba748778 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -38,6 +39,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
+ AU.addUsedIfAvailable<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 96fcfdb72ad7..764a84c7e132 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/Operator.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -131,8 +132,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
- debugLoc(MI.getDebugLoc()) {
+ : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -315,71 +315,201 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
--NumOperands;
}
-/// addMemOperand - Add a MachineMemOperand to the machine instruction.
-/// This function should be used only occasionally. The setMemRefs function
-/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::dropMemRefs(MachineFunction &MF) {
+ if (memoperands_empty())
+ return;
+
+ // See if we can just drop all of our extra info.
+ if (!getPreInstrSymbol() && !getPostInstrSymbol()) {
+ Info.clear();
+ return;
+ }
+ if (!getPostInstrSymbol()) {
+ Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+ return;
+ }
+ if (!getPreInstrSymbol()) {
+ Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+ return;
+ }
+
+ // Otherwise allocate a fresh extra info with just these symbols.
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo({}, getPreInstrSymbol(), getPostInstrSymbol()));
+}
+
+void MachineInstr::setMemRefs(MachineFunction &MF,
+ ArrayRef<MachineMemOperand *> MMOs) {
+ if (MMOs.empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ // Try to store a single MMO inline.
+ if (MMOs.size() == 1 && !getPreInstrSymbol() && !getPostInstrSymbol()) {
+ Info.set<EIIK_MMO>(MMOs[0]);
+ return;
+ }
+
+ // Otherwise create an extra info struct with all of our info.
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo(MMOs, getPreInstrSymbol(), getPostInstrSymbol()));
+}
+
void MachineInstr::addMemOperand(MachineFunction &MF,
MachineMemOperand *MO) {
- mmo_iterator OldMemRefs = MemRefs;
- unsigned OldNumMemRefs = NumMemRefs;
+ SmallVector<MachineMemOperand *, 2> MMOs;
+ MMOs.append(memoperands_begin(), memoperands_end());
+ MMOs.push_back(MO);
+ setMemRefs(MF, MMOs);
+}
- unsigned NewNum = NumMemRefs + 1;
- mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) {
+ if (this == &MI)
+ // Nothing to do for a self-clone!
+ return;
- std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
- NewMemRefs[NewNum - 1] = MO;
- setMemRefs(NewMemRefs, NewMemRefs + NewNum);
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning memory refrences!");
+ // See if we can just steal the extra info already allocated for the
+ // instruction. We can do this whenever the pre- and post-instruction symbols
+ // are the same (including null).
+ if (getPreInstrSymbol() == MI.getPreInstrSymbol() &&
+ getPostInstrSymbol() == MI.getPostInstrSymbol()) {
+ Info = MI.Info;
+ return;
+ }
+
+ // Otherwise, fall back on a copy-based clone.
+ setMemRefs(MF, MI.memoperands());
}
/// Check to see if the MMOs pointed to by the two MemRefs arrays are
/// identical.
-static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) {
- auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end();
- auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end();
- if ((E1 - I1) != (E2 - I2))
+static bool hasIdenticalMMOs(ArrayRef<MachineMemOperand *> LHS,
+ ArrayRef<MachineMemOperand *> RHS) {
+ if (LHS.size() != RHS.size())
return false;
- for (; I1 != E1; ++I1, ++I2) {
- if (**I1 != **I2)
- return false;
+
+ auto LHSPointees = make_pointee_range(LHS);
+ auto RHSPointees = make_pointee_range(RHS);
+ return std::equal(LHSPointees.begin(), LHSPointees.end(),
+ RHSPointees.begin());
+}
+
+void MachineInstr::cloneMergedMemRefs(MachineFunction &MF,
+ ArrayRef<const MachineInstr *> MIs) {
+ // Try handling easy numbers of MIs with simpler mechanisms.
+ if (MIs.empty()) {
+ dropMemRefs(MF);
+ return;
}
- return true;
+ if (MIs.size() == 1) {
+ cloneMemRefs(MF, *MIs[0]);
+ return;
+ }
+ // Because an empty memoperands list provides *no* information and must be
+ // handled conservatively (assuming the instruction can do anything), the only
+ // way to merge with it is to drop all other memoperands.
+ if (MIs[0]->memoperands_empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ // Handle the general case.
+ SmallVector<MachineMemOperand *, 2> MergedMMOs;
+ // Start with the first instruction.
+ assert(&MF == MIs[0]->getMF() &&
+ "Invalid machine functions when cloning memory references!");
+ MergedMMOs.append(MIs[0]->memoperands_begin(), MIs[0]->memoperands_end());
+ // Now walk all the other instructions and accumulate any different MMOs.
+ for (const MachineInstr &MI : make_pointee_range(MIs.slice(1))) {
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning memory references!");
+
+ // Skip MIs with identical operands to the first. This is a somewhat
+ // arbitrary hack but will catch common cases without being quadratic.
+ // TODO: We could fully implement merge semantics here if needed.
+ if (hasIdenticalMMOs(MIs[0]->memoperands(), MI.memoperands()))
+ continue;
+
+ // Because an empty memoperands list provides *no* information and must be
+ // handled conservatively (assuming the instruction can do anything), the
+ // only way to merge with it is to drop all other memoperands.
+ if (MI.memoperands_empty()) {
+ dropMemRefs(MF);
+ return;
+ }
+
+ // Otherwise accumulate these into our temporary buffer of the merged state.
+ MergedMMOs.append(MI.memoperands_begin(), MI.memoperands_end());
+ }
+
+ setMemRefs(MF, MergedMMOs);
}
-std::pair<MachineInstr::mmo_iterator, unsigned>
-MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
+void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+ MCSymbol *OldSymbol = getPreInstrSymbol();
+ if (OldSymbol == Symbol)
+ return;
+ if (OldSymbol && !Symbol) {
+ // We're removing a symbol rather than adding one. Try to clean up any
+ // extra info carried around.
+ if (Info.is<EIIK_PreInstrSymbol>()) {
+ Info.clear();
+ return;
+ }
- // If either of the incoming memrefs are empty, we must be conservative and
- // treat this as if we've exhausted our space for memrefs and dropped them.
- if (memoperands_empty() || Other.memoperands_empty())
- return std::make_pair(nullptr, 0);
+ if (memoperands_empty()) {
+ assert(getPostInstrSymbol() &&
+ "Should never have only a single symbol allocated out-of-line!");
+ Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+ return;
+ }
- // If both instructions have identical memrefs, we don't need to merge them.
- // Since many instructions have a single memref, and we tend to merge things
- // like pairs of loads from the same location, this catches a large number of
- // cases in practice.
- if (hasIdenticalMMOs(*this, Other))
- return std::make_pair(MemRefs, NumMemRefs);
+ // Otherwise fallback on the generic update.
+ } else if (!Info || Info.is<EIIK_PreInstrSymbol>()) {
+ // If we don't have any other extra info, we can store this inline.
+ Info.set<EIIK_PreInstrSymbol>(Symbol);
+ return;
+ }
- // TODO: consider uniquing elements within the operand lists to reduce
- // space usage and fall back to conservative information less often.
- size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs;
+ // Otherwise, allocate a full new set of extra info.
+ // FIXME: Maybe we should make the symbols in the extra info mutable?
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo(memoperands(), Symbol, getPostInstrSymbol()));
+}
- // If we don't have enough room to store this many memrefs, be conservative
- // and drop them. Otherwise, we'd fail asserts when trying to add them to
- // the new instruction.
- if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
- return std::make_pair(nullptr, 0);
+void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+ MCSymbol *OldSymbol = getPostInstrSymbol();
+ if (OldSymbol == Symbol)
+ return;
+ if (OldSymbol && !Symbol) {
+ // We're removing a symbol rather than adding one. Try to clean up any
+ // extra info carried around.
+ if (Info.is<EIIK_PostInstrSymbol>()) {
+ Info.clear();
+ return;
+ }
+
+ if (memoperands_empty()) {
+ assert(getPreInstrSymbol() &&
+ "Should never have only a single symbol allocated out-of-line!");
+ Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+ return;
+ }
- MachineFunction *MF = getMF();
- mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
- mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
- MemBegin);
- MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(),
- MemEnd);
- assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
- "missing memrefs");
+ // Otherwise fallback on the generic update.
+ } else if (!Info || Info.is<EIIK_PostInstrSymbol>()) {
+ // If we don't have any other extra info, we can store this inline.
+ Info.set<EIIK_PostInstrSymbol>(Symbol);
+ return;
+ }
- return std::make_pair(MemBegin, CombinedNumMemRefs);
+ // Otherwise, allocate a full new set of extra info.
+ // FIXME: Maybe we should make the symbols in the extra info mutable?
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
}
uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
@@ -388,7 +518,42 @@ uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
return getFlags() | Other.getFlags();
}
-bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+void MachineInstr::copyIRFlags(const Instruction &I) {
+ // Copy the wrapping flags.
+ if (const OverflowingBinaryOperator *OB =
+ dyn_cast<OverflowingBinaryOperator>(&I)) {
+ if (OB->hasNoSignedWrap())
+ setFlag(MachineInstr::MIFlag::NoSWrap);
+ if (OB->hasNoUnsignedWrap())
+ setFlag(MachineInstr::MIFlag::NoUWrap);
+ }
+
+ // Copy the exact flag.
+ if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I))
+ if (PE->isExact())
+ setFlag(MachineInstr::MIFlag::IsExact);
+
+ // Copy the fast-math flags.
+ if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) {
+ const FastMathFlags Flags = FP->getFastMathFlags();
+ if (Flags.noNaNs())
+ setFlag(MachineInstr::MIFlag::FmNoNans);
+ if (Flags.noInfs())
+ setFlag(MachineInstr::MIFlag::FmNoInfs);
+ if (Flags.noSignedZeros())
+ setFlag(MachineInstr::MIFlag::FmNsz);
+ if (Flags.allowReciprocal())
+ setFlag(MachineInstr::MIFlag::FmArcp);
+ if (Flags.allowContract())
+ setFlag(MachineInstr::MIFlag::FmContract);
+ if (Flags.approxFunc())
+ setFlag(MachineInstr::MIFlag::FmAfn);
+ if (Flags.allowReassoc())
+ setFlag(MachineInstr::MIFlag::FmReassoc);
+ }
+}
+
+bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const {
assert(!isBundledWithPred() && "Must be called on bundle header");
for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
if (MII->getDesc().getFlags() & Mask) {
@@ -768,9 +933,7 @@ int MachineInstr::findRegisterUseOperandIdx(
unsigned MOReg = MO.getReg();
if (!MOReg)
continue;
- if (MOReg == Reg || (TRI && TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- TargetRegisterInfo::isPhysicalRegister(Reg) &&
- TRI->isSubRegister(MOReg, Reg)))
+ if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg)))
if (!isKill || MO.isKill())
return i;
}
@@ -1050,10 +1213,13 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
int64_t OffsetA = MMOa->getOffset();
int64_t OffsetB = MMOb->getOffset();
-
int64_t MinOffset = std::min(OffsetA, OffsetB);
- int64_t WidthA = MMOa->getSize();
- int64_t WidthB = MMOb->getSize();
+
+ uint64_t WidthA = MMOa->getSize();
+ uint64_t WidthB = MMOb->getSize();
+ bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
+ bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
+
const Value *ValA = MMOa->getValue();
const Value *ValB = MMOb->getValue();
bool SameVal = (ValA && ValB && (ValA == ValB));
@@ -1069,6 +1235,8 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
}
if (SameVal) {
+ if (!KnownWidthA || !KnownWidthB)
+ return true;
int64_t MaxOffset = std::max(OffsetA, OffsetB);
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
return (MinOffset + LowWidth > MaxOffset);
@@ -1083,13 +1251,15 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
- int64_t Overlapa = WidthA + OffsetA - MinOffset;
- int64_t Overlapb = WidthB + OffsetB - MinOffset;
+ int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
+ : MemoryLocation::UnknownSize;
+ int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
+ : MemoryLocation::UnknownSize;
AliasResult AAResult = AA->alias(
- MemoryLocation(ValA, Overlapa,
+ MemoryLocation(ValA, OverlapA,
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(ValB, Overlapb,
+ MemoryLocation(ValB, OverlapB,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != NoAlias);
@@ -1294,7 +1464,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
SmallBitVector PrintedTypes(8);
- bool ShouldPrintRegisterTies = hasComplexRegisterTies();
+ bool ShouldPrintRegisterTies = IsStandalone || hasComplexRegisterTies();
auto getTiedOperandIdx = [&](unsigned OpIdx) {
if (!ShouldPrintRegisterTies)
return 0U;
@@ -1343,6 +1513,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "afn ";
if (getFlag(MachineInstr::FmReassoc))
OS << "reassoc ";
+ if (getFlag(MachineInstr::NoUWrap))
+ OS << "nuw ";
+ if (getFlag(MachineInstr::NoSWrap))
+ OS << "nsw ";
+ if (getFlag(MachineInstr::IsExact))
+ OS << "exact ";
// Print the opcode name.
if (TII)
@@ -1486,6 +1662,25 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
}
+ // Print any optional symbols attached to this instruction as-if they were
+ // operands.
+ if (MCSymbol *PreInstrSymbol = getPreInstrSymbol()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " pre-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PreInstrSymbol);
+ }
+ if (MCSymbol *PostInstrSymbol = getPostInstrSymbol()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " post-instr-symbol ";
+ MachineOperand::printSymbol(OS, *PostInstrSymbol);
+ }
+
if (!SkipDebugLoc) {
if (const DebugLoc &DL = getDebugLoc()) {
if (!FirstOp)
@@ -1605,7 +1800,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
// Trim unneeded kill operands.
while (!DeadOps.empty()) {
unsigned OpIdx = DeadOps.back();
- if (getOperand(OpIdx).isImplicit())
+ if (getOperand(OpIdx).isImplicit() &&
+ (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
RemoveOperand(OpIdx);
else
getOperand(OpIdx).setIsKill(false);
@@ -1669,7 +1865,8 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
// Trim unneeded dead operands.
while (!DeadOps.empty()) {
unsigned OpIdx = DeadOps.back();
- if (getOperand(OpIdx).isImplicit())
+ if (getOperand(OpIdx).isImplicit() &&
+ (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
RemoveOperand(OpIdx);
else
getOperand(OpIdx).setIsDead(false);
@@ -1876,3 +2073,30 @@ void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) {
Orig.getOperand(1).ChangeToImmediate(0U);
Orig.getOperand(3).setMetadata(Expr);
}
+
+void MachineInstr::collectDebugValues(
+ SmallVectorImpl<MachineInstr *> &DbgValues) {
+ MachineInstr &MI = *this;
+ if (!MI.getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = MI; ++DI;
+ for (MachineBasicBlock::iterator DE = MI.getParent()->end();
+ DI != DE; ++DI) {
+ if (!DI->isDebugValue())
+ return;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
+ DbgValues.push_back(&*DI);
+ }
+}
+
+void MachineInstr::changeDebugValuesDefReg(unsigned Reg) {
+ // Collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValues;
+ collectDebugValues(DbgValues);
+
+ // Propagate Reg to debug value instructions.
+ for (auto *DBI : DbgValues)
+ DBI->getOperand(0).setReg(Reg);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index ed16a2b6084c..ae378cc8c464 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -105,6 +105,16 @@ bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
return llvm::finalizeBundles(MF);
}
+/// Return the first found DebugLoc that has a DILocation, given a range of
+/// instructions. The search range is from FirstMI to LastMI (exclusive). If no
+/// DILocation is found, then an empty location is returned.
+static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ for (auto MII = FirstMI; MII != LastMI; ++MII)
+ if (MII->getDebugLoc().get())
+ return MII->getDebugLoc();
+ return DebugLoc();
+}
/// finalizeBundle - Finalize a machine instruction bundle which includes
/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
@@ -123,7 +133,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MachineInstrBuilder MIB =
- BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE));
+ BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE));
Bundle.prepend(MIB);
SmallVector<unsigned, 32> LocalDefs;
@@ -135,9 +145,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
SmallSet<unsigned, 8> KilledUseSet;
SmallSet<unsigned, 8> UndefUseSet;
SmallVector<MachineOperand*, 4> Defs;
- for (; FirstMI != LastMI; ++FirstMI) {
- for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = FirstMI->getOperand(i);
+ for (auto MII = FirstMI; MII != LastMI; ++MII) {
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
if (!MO.isReg())
continue;
if (MO.isDef()) {
@@ -215,6 +225,15 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
getImplRegState(true));
}
+
+ // Set FrameSetup/FrameDestroy for the bundle. If any of the instructions got
+ // the property, then also set it on the bundle.
+ for (auto MII = FirstMI; MII != LastMI; ++MII) {
+ if (MII->getFlag(MachineInstr::FrameSetup))
+ MIB.setMIFlag(MachineInstr::FrameSetup);
+ if (MII->getFlag(MachineInstr::FrameDestroy))
+ MIB.setMIFlag(MachineInstr::FrameDestroy);
+ }
}
/// finalizeBundle - Same functionality as the previous finalizeBundle except
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index 7332b7162030..58fd1f238420 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -463,8 +463,12 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
if (PhysRegDefs.test(*AS))
PhysRegClobbers.set(*AS);
- PhysRegDefs.set(*AS);
}
+ // Need a second loop because MCRegAliasIterator can visit the same
+ // register twice.
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
+ PhysRegDefs.set(*AS);
+
if (PhysRegClobbers.test(Reg))
// MI defined register is seen defined by another instruction in
// the loop, it cannot be a LICM candidate.
@@ -497,8 +501,7 @@ void MachineLICMBase::HoistRegionPostRA() {
// Walk the entire region, count number of defs for each register, and
// collect potential LICM candidates.
- const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
- for (MachineBasicBlock *BB : Blocks) {
+ for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
// If the header of the loop containing this basic block is a landing pad,
// then don't try to hoist instructions out of this loop.
const MachineLoop *ML = MLI->getLoopFor(BB);
@@ -570,8 +573,7 @@ void MachineLICMBase::HoistRegionPostRA() {
/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
/// sure it is not killed by any instructions in the loop.
void MachineLICMBase::AddToLiveIns(unsigned Reg) {
- const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
- for (MachineBasicBlock *BB : Blocks) {
+ for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
for (MachineInstr &MI : *BB) {
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 639cd80768fc..6ef8de88f8b1 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -194,7 +194,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
}
-MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
: ImmutablePass(ID), TM(*TM),
Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
TM->getObjFileLowering(), nullptr, false) {
@@ -206,10 +206,11 @@ MachineModuleInfo::~MachineModuleInfo() = default;
bool MachineModuleInfo::doInitialization(Module &M) {
ObjFileMMI = nullptr;
CurCallSite = 0;
- DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
+ UsesVAFloatArgument = UsesMorestackAddr = false;
HasSplitStack = HasNosplitStack = false;
AddrLabelSymbols = nullptr;
TheModule = &M;
+ DbgInfoAvailable = !empty(M.debug_compile_units());
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
index 07b173bc94f8..7b4f64bfe60d 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
// Out of line virtual method.
void MachineModuleInfoMachO::anchor() {}
void MachineModuleInfoELF::anchor() {}
+void MachineModuleInfoCOFF::anchor() {}
using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>;
static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) {
diff --git a/contrib/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm/lib/CodeGen/MachineOperand.cpp
index 8098333832b4..05e51e1873cf 100644
--- a/contrib/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineOperand.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -460,7 +461,8 @@ static void printIRValueReference(raw_ostream &OS, const Value &V,
printLLVMNameWithoutPrefix(OS, V.getName());
return;
}
- MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V));
+ int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
+ MachineOperand::printIRSlotNumber(OS, Slot);
}
static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
@@ -695,6 +697,11 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
if (MCSymbol *Label = CFI.getLabel())
MachineOperand::printSymbol(OS, *Label);
break;
+ case MCCFIInstruction::OpNegateRAState:
+ OS << "negate_ra_sign_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
default:
// TODO: Print the other CFI Operations.
OS << "<unserializable cfi directive>";
@@ -742,10 +749,10 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "undef ";
if (isEarlyClobber())
OS << "early-clobber ";
- if (isDebug())
- OS << "debug-use ";
if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())
OS << "renamable ";
+ // isDebug() is exactly true for register operands of a DBG_VALUE. So we
+ // simply infer it when parsing and do not need to print it.
const MachineRegisterInfo *MRI = nullptr;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -1078,7 +1085,11 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (getFailureOrdering() != AtomicOrdering::NotAtomic)
OS << toIRString(getFailureOrdering()) << ' ';
- OS << getSize();
+ if (getSize() == MemoryLocation::UnknownSize)
+ OS << "unknown-size";
+ else
+ OS << getSize();
+
if (const Value *Val = getValue()) {
OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
printIRValueReference(OS, *Val, MST);
diff --git a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
index a712afec0959..ad96c0e579e4 100644
--- a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -128,9 +128,6 @@ struct SuffixTreeNode {
/// mapping by tacking that character on the end of the current string.
DenseMap<unsigned, SuffixTreeNode *> Children;
- /// A flag set to false if the node has been pruned from the tree.
- bool IsInTree = true;
-
/// The start index of this node's substring in the main string.
unsigned StartIdx = EmptyIdx;
@@ -167,15 +164,6 @@ struct SuffixTreeNode {
/// construction algorithm O(N^2) rather than O(N).
SuffixTreeNode *Link = nullptr;
- /// The parent of this node. Every node except for the root has a parent.
- SuffixTreeNode *Parent = nullptr;
-
- /// The number of times this node's string appears in the tree.
- ///
- /// This is equal to the number of leaf children of the string. It represents
- /// the number of suffixes that the node's string is a prefix of.
- unsigned OccurrenceCount = 0;
-
/// The length of the string formed by concatenating the edge labels from the
/// root to this node.
unsigned ConcatLen = 0;
@@ -200,9 +188,8 @@ struct SuffixTreeNode {
return *EndIdx - StartIdx + 1;
}
- SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
- SuffixTreeNode *Parent)
- : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
+ SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
+ : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
SuffixTreeNode() {}
};
@@ -231,14 +218,18 @@ struct SuffixTreeNode {
/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
class SuffixTree {
public:
- /// Stores each leaf node in the tree.
- ///
- /// This is used for finding outlining candidates.
- std::vector<SuffixTreeNode *> LeafVector;
-
/// Each element is an integer representing an instruction in the module.
ArrayRef<unsigned> Str;
+ /// A repeated substring in the tree.
+ struct RepeatedSubstring {
+ /// The length of the string.
+ unsigned Length;
+
+ /// The start indices of each occurrence.
+ std::vector<unsigned> StartIndices;
+ };
+
private:
/// Maintains each node in the tree.
SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
@@ -291,7 +282,7 @@ private:
assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
SuffixTreeNode *N = new (NodeAllocator.Allocate())
- SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
+ SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
Parent.Children[Edge] = N;
return N;
@@ -314,7 +305,7 @@ private:
unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
SuffixTreeNode *N = new (NodeAllocator.Allocate())
- SuffixTreeNode(StartIdx, E, Root, Parent);
+ SuffixTreeNode(StartIdx, E, Root);
if (Parent)
Parent->Children[Edge] = N;
@@ -322,41 +313,27 @@ private:
}
/// Set the suffix indices of the leaves to the start indices of their
- /// respective suffixes. Also stores each leaf in \p LeafVector at its
- /// respective suffix index.
+ /// respective suffixes.
///
/// \param[in] CurrNode The node currently being visited.
- /// \param CurrIdx The current index of the string being visited.
- void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
+ /// \param CurrNodeLen The concatenation of all node sizes from the root to
+ /// this node. Used to produce suffix indices.
+ void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
- // Store the length of the concatenation of all strings from the root to
- // this node.
- if (!CurrNode.isRoot()) {
- if (CurrNode.ConcatLen == 0)
- CurrNode.ConcatLen = CurrNode.size();
-
- if (CurrNode.Parent)
- CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
- }
-
+ // Store the concatenation of lengths down from the root.
+ CurrNode.ConcatLen = CurrNodeLen;
// Traverse the tree depth-first.
for (auto &ChildPair : CurrNode.Children) {
assert(ChildPair.second && "Node had a null child!");
- setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
+ setSuffixIndices(*ChildPair.second,
+ CurrNodeLen + ChildPair.second->size());
}
- // Is this node a leaf?
- if (IsLeaf) {
- // If yes, give it a suffix index and bump its parent's occurrence count.
- CurrNode.SuffixIdx = Str.size() - CurrIdx;
- assert(CurrNode.Parent && "CurrNode had no parent!");
- CurrNode.Parent->OccurrenceCount++;
-
- // Store the leaf in the leaf vector for pruning later.
- LeafVector[CurrNode.SuffixIdx] = &CurrNode;
- }
+ // Is this node a leaf? If it is, give it a suffix index.
+ if (IsLeaf)
+ CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
}
/// Construct the suffix tree for the prefix of the input ending at
@@ -461,7 +438,6 @@ private:
// Make the old node a child of the split node and update its start
// index. This is the node n from the diagram.
NextNode->StartIdx += Active.Len;
- NextNode->Parent = SplitNode;
SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
// SplitNode is an internal node, update the suffix link.
@@ -495,9 +471,7 @@ public:
/// \param Str The string to construct the suffix tree for.
SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
- Root->IsInTree = true;
Active.Node = Root;
- LeafVector = std::vector<SuffixTreeNode *>(Str.size());
// Keep track of the number of suffixes we have to add of the current
// prefix.
@@ -518,6 +492,117 @@ public:
assert(Root && "Root node can't be nullptr!");
setSuffixIndices(*Root, 0);
}
+
+
+ /// Iterator for finding all repeated substrings in the suffix tree.
+ struct RepeatedSubstringIterator {
+ private:
+ /// The current node we're visiting.
+ SuffixTreeNode *N = nullptr;
+
+ /// The repeated substring associated with this node.
+ RepeatedSubstring RS;
+
+ /// The nodes left to visit.
+ std::vector<SuffixTreeNode *> ToVisit;
+
+ /// The minimum length of a repeated substring to find.
+ /// Since we're outlining, we want at least two instructions in the range.
+ /// FIXME: This may not be true for targets like X86 which support many
+ /// instruction lengths.
+ const unsigned MinLength = 2;
+
+ /// Move the iterator to the next repeated substring.
+ void advance() {
+ // Clear the current state. If we're at the end of the range, then this
+ // is the state we want to be in.
+ RS = RepeatedSubstring();
+ N = nullptr;
+
+ // Each leaf node represents a repeat of a string.
+ std::vector<SuffixTreeNode *> LeafChildren;
+
+ // Continue visiting nodes until we find one which repeats more than once.
+ while (!ToVisit.empty()) {
+ SuffixTreeNode *Curr = ToVisit.back();
+ ToVisit.pop_back();
+ LeafChildren.clear();
+
+ // Keep track of the length of the string associated with the node. If
+ // it's too short, we'll quit.
+ unsigned Length = Curr->ConcatLen;
+
+ // Iterate over each child, saving internal nodes for visiting, and
+ // leaf nodes in LeafChildren. Internal nodes represent individual
+ // strings, which may repeat.
+ for (auto &ChildPair : Curr->Children) {
+ // Save all of this node's children for processing.
+ if (!ChildPair.second->isLeaf())
+ ToVisit.push_back(ChildPair.second);
+
+ // It's not an internal node, so it must be a leaf. If we have a
+ // long enough string, then save the leaf children.
+ else if (Length >= MinLength)
+ LeafChildren.push_back(ChildPair.second);
+ }
+
+ // The root never represents a repeated substring. If we're looking at
+ // that, then skip it.
+ if (Curr->isRoot())
+ continue;
+
+ // Do we have any repeated substrings?
+ if (LeafChildren.size() >= 2) {
+ // Yes. Update the state to reflect this, and then bail out.
+ N = Curr;
+ RS.Length = Length;
+ for (SuffixTreeNode *Leaf : LeafChildren)
+ RS.StartIndices.push_back(Leaf->SuffixIdx);
+ break;
+ }
+ }
+
+ // At this point, either NewRS is an empty RepeatedSubstring, or it was
+ // set in the above loop. Similarly, N is either nullptr, or the node
+ // associated with NewRS.
+ }
+
+ public:
+ /// Return the current repeated substring.
+ RepeatedSubstring &operator*() { return RS; }
+
+ RepeatedSubstringIterator &operator++() {
+ advance();
+ return *this;
+ }
+
+ RepeatedSubstringIterator operator++(int I) {
+ RepeatedSubstringIterator It(*this);
+ advance();
+ return It;
+ }
+
+ bool operator==(const RepeatedSubstringIterator &Other) {
+ return N == Other.N;
+ }
+ bool operator!=(const RepeatedSubstringIterator &Other) {
+ return !(*this == Other);
+ }
+
+ RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
+ // Do we have a non-null node?
+ if (N) {
+ // Yes. At the first step, we need to visit all of N's children.
+ // Note: This means that we visit N last.
+ ToVisit.push_back(N);
+ advance();
+ }
+ }
+};
+
+ typedef RepeatedSubstringIterator iterator;
+ iterator begin() { return iterator(Root); }
+ iterator end() { return iterator(nullptr); }
};
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -537,9 +622,8 @@ struct InstructionMapper {
DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
InstructionIntegerMap;
- /// Corresponcence from unsigned integers to \p MachineInstrs.
- /// Inverse of \p InstructionIntegerMap.
- DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
+ /// Correspondence between \p MachineBasicBlocks and target-defined flags.
+ DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
/// The vector of unsigned integers that the module is mapped to.
std::vector<unsigned> UnsignedVec;
@@ -548,17 +632,39 @@ struct InstructionMapper {
/// at index i in \p UnsignedVec for each index i.
std::vector<MachineBasicBlock::iterator> InstrList;
+ // Set if we added an illegal number in the previous step.
+ // Since each illegal number is unique, we only need one of them between
+ // each range of legal numbers. This lets us make sure we don't add more
+ // than one illegal number per range.
+ bool AddedIllegalLastTime = false;
+
/// Maps \p *It to a legal integer.
///
- /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
- /// \p IntegerInstructionMap, and \p LegalInstrNumber.
+ /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
+ /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
///
/// \returns The integer that \p *It was mapped to.
- unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
+ unsigned mapToLegalUnsigned(
+ MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
+ bool &HaveLegalRange, unsigned &NumLegalInBlock,
+ std::vector<unsigned> &UnsignedVecForMBB,
+ std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ // We added something legal, so we should unset the AddedLegalLastTime
+ // flag.
+ AddedIllegalLastTime = false;
+
+ // If we have at least two adjacent legal instructions (which may have
+ // invisible instructions in between), remember that.
+ if (CanOutlineWithPrevInstr)
+ HaveLegalRange = true;
+ CanOutlineWithPrevInstr = true;
+
+ // Keep track of the number of legal instructions we insert.
+ NumLegalInBlock++;
// Get the integer for this instruction or give it the current
// LegalInstrNumber.
- InstrList.push_back(It);
+ InstrListForMBB.push_back(It);
MachineInstr &MI = *It;
bool WasInserted;
DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
@@ -568,12 +674,10 @@ struct InstructionMapper {
unsigned MINumber = ResultIt->second;
// There was an insertion.
- if (WasInserted) {
+ if (WasInserted)
LegalInstrNumber++;
- IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
- }
- UnsignedVec.push_back(MINumber);
+ UnsignedVecForMBB.push_back(MINumber);
// Make sure we don't overflow or use any integers reserved by the DenseMap.
if (LegalInstrNumber >= IllegalInstrNumber)
@@ -589,14 +693,26 @@ struct InstructionMapper {
/// Maps \p *It to an illegal integer.
///
- /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
+ /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
+ /// IllegalInstrNumber.
///
/// \returns The integer that \p *It was mapped to.
- unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
+ unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
+ bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
+ std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ // Can't outline an illegal instruction. Set the flag.
+ CanOutlineWithPrevInstr = false;
+
+ // Only add one illegal number per range of legal numbers.
+ if (AddedIllegalLastTime)
+ return IllegalInstrNumber;
+
+ // Remember that we added an illegal number last time.
+ AddedIllegalLastTime = true;
unsigned MINumber = IllegalInstrNumber;
- InstrList.push_back(It);
- UnsignedVec.push_back(IllegalInstrNumber);
+ InstrListForMBB.push_back(It);
+ UnsignedVecForMBB.push_back(IllegalInstrNumber);
IllegalInstrNumber--;
assert(LegalInstrNumber < IllegalInstrNumber &&
@@ -623,40 +739,78 @@ struct InstructionMapper {
/// \param TII \p TargetInstrInfo for the function.
void convertToUnsignedVec(MachineBasicBlock &MBB,
const TargetInstrInfo &TII) {
- unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
+ unsigned Flags = 0;
+
+ // Don't even map in this case.
+ if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
+ return;
+
+ // Store info for the MBB for later outlining.
+ MBBFlagsMap[&MBB] = Flags;
+
+ MachineBasicBlock::iterator It = MBB.begin();
- for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
- It++) {
+ // The number of instructions in this block that will be considered for
+ // outlining.
+ unsigned NumLegalInBlock = 0;
+ // True if we have at least two legal instructions which aren't separated
+ // by an illegal instruction.
+ bool HaveLegalRange = false;
+
+ // True if we can perform outlining given the last mapped (non-invisible)
+ // instruction. This lets us know if we have a legal range.
+ bool CanOutlineWithPrevInstr = false;
+
+ // FIXME: Should this all just be handled in the target, rather than using
+ // repeated calls to getOutliningType?
+ std::vector<unsigned> UnsignedVecForMBB;
+ std::vector<MachineBasicBlock::iterator> InstrListForMBB;
+
+ for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
// Keep track of where this instruction is in the module.
switch (TII.getOutliningType(It, Flags)) {
case InstrType::Illegal:
- mapToIllegalUnsigned(It);
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
+ UnsignedVecForMBB, InstrListForMBB);
break;
case InstrType::Legal:
- mapToLegalUnsigned(It);
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
break;
case InstrType::LegalTerminator:
- mapToLegalUnsigned(It);
- InstrList.push_back(It);
- UnsignedVec.push_back(IllegalInstrNumber);
- IllegalInstrNumber--;
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
+ // The instruction also acts as a terminator, so we have to record that
+ // in the string.
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
break;
case InstrType::Invisible:
+ // Normally this is set by mapTo(Blah)Unsigned, but we just want to
+ // skip this instruction. So, unset the flag here.
+ AddedIllegalLastTime = false;
break;
}
}
- // After we're done every insertion, uniquely terminate this part of the
- // "string". This makes sure we won't match across basic block or function
- // boundaries since the "end" is encoded uniquely and thus appears in no
- // repeated substring.
- InstrList.push_back(MBB.end());
- UnsignedVec.push_back(IllegalInstrNumber);
- IllegalInstrNumber--;
+ // Are there enough legal instructions in the block for outlining to be
+ // possible?
+ if (HaveLegalRange) {
+ // After we're done every insertion, uniquely terminate this part of the
+ // "string". This makes sure we won't match across basic block or function
+ // boundaries since the "end" is encoded uniquely and thus appears in no
+ // repeated substring.
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
+ InstrListForMBB.end());
+ UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
+ UnsignedVecForMBB.end());
+ }
}
InstructionMapper() {
@@ -692,9 +846,6 @@ struct MachineOutliner : public ModulePass {
/// Set when the pass is constructed in TargetPassConfig.
bool RunOnAllFunctions = true;
- // Collection of IR functions created by the outliner.
- std::vector<Function *> CreatedIRFunctions;
-
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -717,7 +868,8 @@ struct MachineOutliner : public ModulePass {
/// Remark output explaining that a function was outlined.
void emitOutlinedFunctionRemark(OutlinedFunction &OF);
- /// Find all repeated substrings that satisfy the outlining cost model.
+ /// Find all repeated substrings that satisfy the outlining cost model by
+ /// constructing a suffix tree.
///
/// If a substring appears at least twice, then it must be represented by
/// an internal node which appears in at least two suffixes. Each suffix
@@ -726,73 +878,25 @@ struct MachineOutliner : public ModulePass {
/// internal node represents a beneficial substring, then we use each of
/// its leaf children to find the locations of its substring.
///
- /// \param ST A suffix tree to query.
/// \param Mapper Contains outlining mapping information.
- /// \param[out] CandidateList Filled with candidates representing each
- /// beneficial substring.
/// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
/// each type of candidate.
- ///
- /// \returns The length of the longest candidate found.
- unsigned
- findCandidates(SuffixTree &ST,
- InstructionMapper &Mapper,
- std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList);
-
- /// Replace the sequences of instructions represented by the
- /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
- /// described in \p FunctionList.
+ void findCandidates(InstructionMapper &Mapper,
+ std::vector<OutlinedFunction> &FunctionList);
+
+ /// Replace the sequences of instructions represented by \p OutlinedFunctions
+ /// with calls to functions.
///
/// \param M The module we are outlining from.
- /// \param CandidateList A list of candidates to be outlined.
/// \param FunctionList A list of functions to be inserted into the module.
/// \param Mapper Contains the instruction mappings for the module.
- bool outline(Module &M,
- const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
+ bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
InstructionMapper &Mapper);
/// Creates a function for \p OF and inserts it into the module.
- MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
- InstructionMapper &Mapper);
-
- /// Find potential outlining candidates and store them in \p CandidateList.
- ///
- /// For each type of potential candidate, also build an \p OutlinedFunction
- /// struct containing the information to build the function for that
- /// candidate.
- ///
- /// \param[out] CandidateList Filled with outlining candidates for the module.
- /// \param[out] FunctionList Filled with functions corresponding to each type
- /// of \p Candidate.
- /// \param ST The suffix tree for the module.
- ///
- /// \returns The length of the longest candidate found. 0 if there are none.
- unsigned
- buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- SuffixTree &ST, InstructionMapper &Mapper);
-
- /// Helper function for pruneOverlaps.
- /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
- void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
-
- /// Remove any overlapping candidates that weren't handled by the
- /// suffix tree's pruning method.
- ///
- /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
- /// If a short candidate is chosen for outlining, then a longer candidate
- /// which has that short candidate as a suffix is chosen, the tree's pruning
- /// method will not find it. Thus, we need to prune before outlining as well.
- ///
- /// \param[in,out] CandidateList A list of outlining candidates.
- /// \param[in,out] FunctionList A list of functions to be outlined.
- /// \param Mapper Contains instruction mapping info for outlining.
- /// \param MaxCandidateLen The length of the longest candidate.
- void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper, unsigned MaxCandidateLen);
+ MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
+ InstructionMapper &Mapper,
+ unsigned Name);
/// Construct a suffix tree on the instructions in \p M and outline repeated
/// strings from that tree.
@@ -802,13 +906,31 @@ struct MachineOutliner : public ModulePass {
/// function for remark emission.
DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
DISubprogram *SP;
- for (const std::shared_ptr<Candidate> &C : OF.Candidates)
- if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram()))
+ for (const Candidate &C : OF.Candidates)
+ if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
return SP;
return nullptr;
}
-};
+ /// Populate and \p InstructionMapper with instruction-to-integer mappings.
+ /// These are used to construct a suffix tree.
+ void populateMapper(InstructionMapper &Mapper, Module &M,
+ MachineModuleInfo &MMI);
+
+ /// Initialize information necessary to output a size remark.
+ /// FIXME: This should be handled by the pass manager, not the outliner.
+ /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
+ /// pass manager.
+ void initSizeRemarkInfo(
+ const Module &M, const MachineModuleInfo &MMI,
+ StringMap<unsigned> &FunctionToInstrCount);
+
+ /// Emit the remark.
+ // FIXME: This should be handled by the pass manager, not the outliner.
+ void emitInstrCountChangedRemark(
+ const Module &M, const MachineModuleInfo &MMI,
+ const StringMap<unsigned> &FunctionToInstrCount);
+};
} // Anonymous namespace.
char MachineOutliner::ID = 0;
@@ -828,6 +950,10 @@ INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
void MachineOutliner::emitNotOutliningCheaperRemark(
unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
OutlinedFunction &OF) {
+ // FIXME: Right now, we arbitrarily choose some Candidate from the
+ // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
+ // We should probably sort these by function name or something to make sure
+ // the remarks are stable.
Candidate &C = CandidatesForRepeatedSeq.front();
MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
MORE.emit([&]() {
@@ -861,7 +987,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
MBB->findDebugLoc(MBB->begin()), MBB);
R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
- << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
+ << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
<< "from " << NV("NumOccurrences", OF.getOccurrenceCount())
<< " locations. "
<< "(Found at: ";
@@ -869,12 +995,8 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
// Tell the user the other places the candidate was found.
for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
- // Skip over things that were pruned.
- if (!OF.Candidates[i]->InCandidateList)
- continue;
-
R << NV((Twine("StartLoc") + Twine(i)).str(),
- OF.Candidates[i]->front()->getDebugLoc());
+ OF.Candidates[i].front()->getDebugLoc());
if (i != e - 1)
R << ", ";
}
@@ -884,95 +1006,65 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MORE.emit(R);
}
-unsigned MachineOutliner::findCandidates(
- SuffixTree &ST, InstructionMapper &Mapper,
- std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList) {
- CandidateList.clear();
+void
+MachineOutliner::findCandidates(InstructionMapper &Mapper,
+ std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
- unsigned MaxLen = 0;
-
- // FIXME: Visit internal nodes instead of leaves.
- for (SuffixTreeNode *Leaf : ST.LeafVector) {
- assert(Leaf && "Leaves in LeafVector cannot be null!");
- if (!Leaf->IsInTree)
- continue;
-
- assert(Leaf->Parent && "All leaves must have parents!");
- SuffixTreeNode &Parent = *(Leaf->Parent);
-
- // If it doesn't appear enough, or we already outlined from it, skip it.
- if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
- continue;
-
- // Figure out if this candidate is beneficial.
- unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
-
- // Too short to be beneficial; skip it.
- // FIXME: This isn't necessarily true for, say, X86. If we factor in
- // instruction lengths we need more information than this.
- if (StringLen < 2)
- continue;
-
- // If this is a beneficial class of candidate, then every one is stored in
- // this vector.
- std::vector<Candidate> CandidatesForRepeatedSeq;
-
- // Figure out the call overhead for each instance of the sequence.
- for (auto &ChildPair : Parent.Children) {
- SuffixTreeNode *M = ChildPair.second;
-
- if (M && M->IsInTree && M->isLeaf()) {
- // Never visit this leaf again.
- M->IsInTree = false;
- unsigned StartIdx = M->SuffixIdx;
- unsigned EndIdx = StartIdx + StringLen - 1;
+ SuffixTree ST(Mapper.UnsignedVec);
- // Trick: Discard some candidates that would be incompatible with the
- // ones we've already found for this sequence. This will save us some
- // work in candidate selection.
- //
- // If two candidates overlap, then we can't outline them both. This
- // happens when we have candidates that look like, say
- //
- // AA (where each "A" is an instruction).
- //
- // We might have some portion of the module that looks like this:
- // AAAAAA (6 A's)
- //
- // In this case, there are 5 different copies of "AA" in this range, but
- // at most 3 can be outlined. If only outlining 3 of these is going to
- // be unbeneficial, then we ought to not bother.
- //
- // Note that two things DON'T overlap when they look like this:
- // start1...end1 .... start2...end2
- // That is, one must either
- // * End before the other starts
- // * Start after the other ends
- if (std::all_of(CandidatesForRepeatedSeq.begin(),
- CandidatesForRepeatedSeq.end(),
- [&StartIdx, &EndIdx](const Candidate &C) {
- return (EndIdx < C.getStartIdx() ||
- StartIdx > C.getEndIdx());
- })) {
- // It doesn't overlap with anything, so we can outline it.
- // Each sequence is over [StartIt, EndIt].
- // Save the candidate and its location.
-
- MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
- MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
-
- CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
- EndIt, StartIt->getParent(),
- FunctionList.size());
- }
+ // First, find dall of the repeated substrings in the tree of minimum length
+ // 2.
+ std::vector<Candidate> CandidatesForRepeatedSeq;
+ for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
+ CandidatesForRepeatedSeq.clear();
+ SuffixTree::RepeatedSubstring RS = *It;
+ unsigned StringLen = RS.Length;
+ for (const unsigned &StartIdx : RS.StartIndices) {
+ unsigned EndIdx = StartIdx + StringLen - 1;
+ // Trick: Discard some candidates that would be incompatible with the
+ // ones we've already found for this sequence. This will save us some
+ // work in candidate selection.
+ //
+ // If two candidates overlap, then we can't outline them both. This
+ // happens when we have candidates that look like, say
+ //
+ // AA (where each "A" is an instruction).
+ //
+ // We might have some portion of the module that looks like this:
+ // AAAAAA (6 A's)
+ //
+ // In this case, there are 5 different copies of "AA" in this range, but
+ // at most 3 can be outlined. If only outlining 3 of these is going to
+ // be unbeneficial, then we ought to not bother.
+ //
+ // Note that two things DON'T overlap when they look like this:
+ // start1...end1 .... start2...end2
+ // That is, one must either
+ // * End before the other starts
+ // * Start after the other ends
+ if (std::all_of(
+ CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
+ [&StartIdx, &EndIdx](const Candidate &C) {
+ return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
+ })) {
+ // It doesn't overlap with anything, so we can outline it.
+ // Each sequence is over [StartIt, EndIt].
+ // Save the candidate and its location.
+
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+ MachineBasicBlock *MBB = StartIt->getParent();
+
+ CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
+ EndIt, MBB, FunctionList.size(),
+ Mapper.MBBFlagsMap[MBB]);
}
}
// We've found something we might want to outline.
// Create an OutlinedFunction to store it and check if it'd be beneficial
// to outline.
- if (CandidatesForRepeatedSeq.empty())
+ if (CandidatesForRepeatedSeq.size() < 2)
continue;
// Arbitrarily choose a TII from the first candidate.
@@ -983,179 +1075,33 @@ unsigned MachineOutliner::findCandidates(
OutlinedFunction OF =
TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
- // If we deleted every candidate, then there's nothing to outline.
- if (OF.Candidates.empty())
+ // If we deleted too many candidates, then there's nothing worth outlining.
+ // FIXME: This should take target-specified instruction sizes into account.
+ if (OF.Candidates.size() < 2)
continue;
- std::vector<unsigned> Seq;
- for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
- Seq.push_back(ST.Str[i]);
- OF.Sequence = Seq;
- OF.Name = FunctionList.size();
-
// Is it better to outline this candidate than not?
if (OF.getBenefit() < 1) {
emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
continue;
}
- if (StringLen > MaxLen)
- MaxLen = StringLen;
-
- // The function is beneficial. Save its candidates to the candidate list
- // for pruning.
- for (std::shared_ptr<Candidate> &C : OF.Candidates)
- CandidateList.push_back(C);
FunctionList.push_back(OF);
-
- // Move to the next function.
- Parent.IsInTree = false;
- }
-
- return MaxLen;
-}
-
-// Remove C from the candidate space, and update its OutlinedFunction.
-void MachineOutliner::prune(Candidate &C,
- std::vector<OutlinedFunction> &FunctionList) {
- // Get the OutlinedFunction associated with this Candidate.
- OutlinedFunction &F = FunctionList[C.FunctionIdx];
-
- // Update C's associated function's occurrence count.
- F.decrement();
-
- // Remove C from the CandidateList.
- C.InCandidateList = false;
-
- LLVM_DEBUG(dbgs() << "- Removed a Candidate \n";
- dbgs() << "--- Num fns left for candidate: "
- << F.getOccurrenceCount() << "\n";
- dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
- << "\n";);
-}
-
-void MachineOutliner::pruneOverlaps(
- std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
- unsigned MaxCandidateLen) {
-
- // Return true if this candidate became unbeneficial for outlining in a
- // previous step.
- auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) {
-
- // Check if the candidate was removed in a previous step.
- if (!C.InCandidateList)
- return true;
-
- // C must be alive. Check if we should remove it.
- if (FunctionList[C.FunctionIdx].getBenefit() < 1) {
- prune(C, FunctionList);
- return true;
- }
-
- // C is in the list, and F is still beneficial.
- return false;
- };
-
- // TODO: Experiment with interval trees or other interval-checking structures
- // to lower the time complexity of this function.
- // TODO: Can we do better than the simple greedy choice?
- // Check for overlaps in the range.
- // This is O(MaxCandidateLen * CandidateList.size()).
- for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
- It++) {
- Candidate &C1 = **It;
-
- // If C1 was already pruned, or its function is no longer beneficial for
- // outlining, move to the next candidate.
- if (ShouldSkipCandidate(C1))
- continue;
-
- // The minimum start index of any candidate that could overlap with this
- // one.
- unsigned FarthestPossibleIdx = 0;
-
- // Either the index is 0, or it's at most MaxCandidateLen indices away.
- if (C1.getStartIdx() > MaxCandidateLen)
- FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
-
- // Compare against the candidates in the list that start at most
- // FarthestPossibleIdx indices away from C1. There are at most
- // MaxCandidateLen of these.
- for (auto Sit = It + 1; Sit != Et; Sit++) {
- Candidate &C2 = **Sit;
-
- // Is this candidate too far away to overlap?
- if (C2.getStartIdx() < FarthestPossibleIdx)
- break;
-
- // If C2 was already pruned, or its function is no longer beneficial for
- // outlining, move to the next candidate.
- if (ShouldSkipCandidate(C2))
- continue;
-
- // Do C1 and C2 overlap?
- //
- // Not overlapping:
- // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
- //
- // We sorted our candidate list so C2Start <= C1Start. We know that
- // C2End > C2Start since each candidate has length >= 2. Therefore, all we
- // have to check is C2End < C2Start to see if we overlap.
- if (C2.getEndIdx() < C1.getStartIdx())
- continue;
-
- // C1 and C2 overlap.
- // We need to choose the better of the two.
- //
- // Approximate this by picking the one which would have saved us the
- // most instructions before any pruning.
-
- // Is C2 a better candidate?
- if (C2.Benefit > C1.Benefit) {
- // Yes, so prune C1. Since C1 is dead, we don't have to compare it
- // against anything anymore, so break.
- prune(C1, FunctionList);
- break;
- }
-
- // Prune C2 and move on to the next candidate.
- prune(C2, FunctionList);
- }
}
}
-unsigned MachineOutliner::buildCandidateList(
- std::vector<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
- InstructionMapper &Mapper) {
-
- std::vector<unsigned> CandidateSequence; // Current outlining candidate.
- unsigned MaxCandidateLen = 0; // Length of the longest candidate.
-
- MaxCandidateLen =
- findCandidates(ST, Mapper, CandidateList, FunctionList);
-
- // Sort the candidates in decending order. This will simplify the outlining
- // process when we have to remove the candidates from the mapping by
- // allowing us to cut them out without keeping track of an offset.
- std::stable_sort(
- CandidateList.begin(), CandidateList.end(),
- [](const std::shared_ptr<Candidate> &LHS,
- const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; });
-
- return MaxCandidateLen;
-}
-
MachineFunction *
-MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
- InstructionMapper &Mapper) {
+MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
+ InstructionMapper &Mapper,
+ unsigned Name) {
// Create the function name. This should be unique. For now, just hash the
// module name and include it in the function name plus the number of this
// function.
std::ostringstream NameStream;
- NameStream << "OUTLINED_FUNCTION_" << OF.Name;
+ // FIXME: We should have a better naming scheme. This should be stable,
+ // regardless of changes to the outliner's cost model/traversal order.
+ NameStream << "OUTLINED_FUNCTION_" << Name;
// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
@@ -1176,8 +1122,14 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
F->addFnAttr(Attribute::OptimizeForSize);
F->addFnAttr(Attribute::MinSize);
- // Save F so that we can add debug info later if we need to.
- CreatedIRFunctions.push_back(F);
+ // Include target features from an arbitrary candidate for the outlined
+ // function. This makes sure the outlined function knows what kinds of
+ // instructions are going into it. This is fine, since all parent functions
+ // must necessarily support the instructions that are in the outlined region.
+ Candidate &FirstCand = OF.Candidates.front();
+ const Function &ParentFn = FirstCand.getMF()->getFunction();
+ if (ParentFn.hasFnAttribute("target-features"))
+ F->addFnAttr(ParentFn.getFnAttribute("target-features"));
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
@@ -1192,12 +1144,10 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
- // Copy over the instructions for the function using the integer mappings in
- // its sequence.
- for (unsigned Str : OF.Sequence) {
- MachineInstr *NewMI =
- MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
- NewMI->dropMemRefs();
+ for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
+ ++I) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ NewMI->dropMemRefs(MF);
// Don't keep debug information for outlined instructions.
NewMI->setDebugLoc(DebugLoc());
@@ -1206,6 +1156,10 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
TII.buildOutlinedFrame(MBB, MF, OF);
+ // Outlined functions shouldn't preserve liveness.
+ MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
+ MF.getRegInfo().freezeReservedRegs(MF);
+
// If there's a DISubprogram associated with this outlined function, then
// emit debug info for the outlined function.
if (DISubprogram *SP = getSubprogramOrNull(OF)) {
@@ -1214,118 +1168,127 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
DIBuilder DB(M, true, CU);
DIFile *Unit = SP->getFile();
Mangler Mg;
-
- // Walk over each IR function we created in the outliner and create
- // DISubprograms for each function.
- for (Function *F : CreatedIRFunctions) {
- // Get the mangled name of the function for the linkage name.
- std::string Dummy;
- llvm::raw_string_ostream MangledNameStream(Dummy);
- Mg.getNameWithPrefix(MangledNameStream, F, false);
-
- DISubprogram *SP = DB.createFunction(
- Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
- Unit /* File */,
- 0 /* Line 0 is reserved for compiler-generated code. */,
- DB.createSubroutineType(
- DB.getOrCreateTypeArray(None)), /* void type */
- false, true, 0, /* Line 0 is reserved for compiler-generated code. */
- DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
- true /* Outlined code is optimized code by definition. */);
-
- // Don't add any new variables to the subprogram.
- DB.finalizeSubprogram(SP);
-
- // Attach subprogram to the function.
- F->setSubprogram(SP);
- }
-
+ // Get the mangled name of the function for the linkage name.
+ std::string Dummy;
+ llvm::raw_string_ostream MangledNameStream(Dummy);
+ Mg.getNameWithPrefix(MangledNameStream, F, false);
+
+ DISubprogram *OutlinedSP = DB.createFunction(
+ Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
+ Unit /* File */,
+ 0 /* Line 0 is reserved for compiler-generated code. */,
+ DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
+ 0, /* Line 0 is reserved for compiler-generated code. */
+ DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
+ /* Outlined code is optimized code by definition. */
+ DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
+
+ // Don't add any new variables to the subprogram.
+ DB.finalizeSubprogram(OutlinedSP);
+
+ // Attach subprogram to the function.
+ F->setSubprogram(OutlinedSP);
// We're done with the DIBuilder.
DB.finalize();
}
- // Outlined functions shouldn't preserve liveness.
- MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
- MF.getRegInfo().freezeReservedRegs(MF);
return &MF;
}
-bool MachineOutliner::outline(
- Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) {
+bool MachineOutliner::outline(Module &M,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper) {
bool OutlinedSomething = false;
- // Replace the candidates with calls to their respective outlined functions.
- for (const std::shared_ptr<Candidate> &Cptr : CandidateList) {
- Candidate &C = *Cptr;
- // Was the candidate removed during pruneOverlaps?
- if (!C.InCandidateList)
- continue;
- // If not, then look at its OutlinedFunction.
- OutlinedFunction &OF = FunctionList[C.FunctionIdx];
+ // Number to append to the current outlined function.
+ unsigned OutlinedFunctionNum = 0;
- // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
+ // Sort by benefit. The most beneficial functions should be outlined first.
+ std::stable_sort(
+ FunctionList.begin(), FunctionList.end(),
+ [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
+ return LHS.getBenefit() > RHS.getBenefit();
+ });
+
+ // Walk over each function, outlining them as we go along. Functions are
+ // outlined greedily, based off the sort above.
+ for (OutlinedFunction &OF : FunctionList) {
+ // If we outlined something that overlapped with a candidate in a previous
+ // step, then we can't outline from it.
+ erase_if(OF.Candidates, [&Mapper](Candidate &C) {
+ return std::any_of(
+ Mapper.UnsignedVec.begin() + C.getStartIdx(),
+ Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
+ [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
+ });
+
+ // If we made it unbeneficial to outline this function, skip it.
if (OF.getBenefit() < 1)
continue;
- // Does this candidate have a function yet?
- if (!OF.MF) {
- OF.MF = createOutlinedFunction(M, OF, Mapper);
- emitOutlinedFunctionRemark(OF);
- FunctionsCreated++;
- }
-
+ // It's beneficial. Create the function and outline its sequence's
+ // occurrences.
+ OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
+ emitOutlinedFunctionRemark(OF);
+ FunctionsCreated++;
+ OutlinedFunctionNum++; // Created a function, move to the next name.
MachineFunction *MF = OF.MF;
- MachineBasicBlock &MBB = *C.getMBB();
- MachineBasicBlock::iterator StartIt = C.front();
- MachineBasicBlock::iterator EndIt = C.back();
- assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!");
- assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!");
-
const TargetSubtargetInfo &STI = MF->getSubtarget();
const TargetInstrInfo &TII = *STI.getInstrInfo();
- // Insert a call to the new function and erase the old sequence.
- auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C);
-
- // If the caller tracks liveness, then we need to make sure that anything
- // we outline doesn't break liveness assumptions.
- // The outlined functions themselves currently don't track liveness, but
- // we should make sure that the ranges we yank things out of aren't
- // wrong.
- if (MBB.getParent()->getProperties().hasProperty(
- MachineFunctionProperties::Property::TracksLiveness)) {
- // Helper lambda for adding implicit def operands to the call instruction.
- auto CopyDefs = [&CallInst](MachineInstr &MI) {
- for (MachineOperand &MOP : MI.operands()) {
- // Skip over anything that isn't a register.
- if (!MOP.isReg())
- continue;
-
- // If it's a def, add it to the call instruction.
- if (MOP.isDef())
- CallInst->addOperand(
- MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */
- true /* isImp = true */));
- }
- };
+ // Replace occurrences of the sequence with calls to the new function.
+ for (Candidate &C : OF.Candidates) {
+ MachineBasicBlock &MBB = *C.getMBB();
+ MachineBasicBlock::iterator StartIt = C.front();
+ MachineBasicBlock::iterator EndIt = C.back();
+
+ // Insert the call.
+ auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
+
+ // If the caller tracks liveness, then we need to make sure that
+ // anything we outline doesn't break liveness assumptions. The outlined
+ // functions themselves currently don't track liveness, but we should
+ // make sure that the ranges we yank things out of aren't wrong.
+ if (MBB.getParent()->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness)) {
+ // Helper lambda for adding implicit def operands to the call
+ // instruction.
+ auto CopyDefs = [&CallInst](MachineInstr &MI) {
+ for (MachineOperand &MOP : MI.operands()) {
+ // Skip over anything that isn't a register.
+ if (!MOP.isReg())
+ continue;
+
+ // If it's a def, add it to the call instruction.
+ if (MOP.isDef())
+ CallInst->addOperand(MachineOperand::CreateReg(
+ MOP.getReg(), true, /* isDef = true */
+ true /* isImp = true */));
+ }
+ };
+ // Copy over the defs in the outlined range.
+ // First inst in outlined range <-- Anything that's defined in this
+ // ... .. range has to be added as an
+ // implicit Last inst in outlined range <-- def to the call
+ // instruction.
+ std::for_each(CallInst, std::next(EndIt), CopyDefs);
+ }
- // Copy over the defs in the outlined range.
- // First inst in outlined range <-- Anything that's defined in this
- // ... .. range has to be added as an implicit
- // Last inst in outlined range <-- def to the call instruction.
- std::for_each(CallInst, std::next(EndIt), CopyDefs);
- }
+ // Erase from the point after where the call was inserted up to, and
+ // including, the final instruction in the sequence.
+ // Erase needs one past the end, so we need std::next there too.
+ MBB.erase(std::next(StartIt), std::next(EndIt));
- // Erase from the point after where the call was inserted up to, and
- // including, the final instruction in the sequence.
- // Erase needs one past the end, so we need std::next there too.
- MBB.erase(std::next(StartIt), std::next(EndIt));
- OutlinedSomething = true;
+ // Keep track of what we removed by marking them all as -1.
+ std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
+ Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
+ [](unsigned &I) { I = static_cast<unsigned>(-1); });
+ OutlinedSomething = true;
- // Statistics.
- NumOutlined++;
+ // Statistics.
+ NumOutlined++;
+ }
}
LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
@@ -1333,34 +1296,8 @@ bool MachineOutliner::outline(
return OutlinedSomething;
}
-bool MachineOutliner::runOnModule(Module &M) {
- // Check if there's anything in the module. If it's empty, then there's
- // nothing to outline.
- if (M.empty())
- return false;
-
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
-
- // If the user passed -enable-machine-outliner=always or
- // -enable-machine-outliner, the pass will run on all functions in the module.
- // Otherwise, if the target supports default outlining, it will run on all
- // functions deemed by the target to be worth outlining from by default. Tell
- // the user how the outliner is running.
- LLVM_DEBUG(
- dbgs() << "Machine Outliner: Running on ";
- if (RunOnAllFunctions)
- dbgs() << "all functions";
- else
- dbgs() << "target-default functions";
- dbgs() << "\n"
- );
-
- // If the user specifies that they want to outline from linkonceodrs, set
- // it here.
- OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
-
- InstructionMapper Mapper;
-
+void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
+ MachineModuleInfo &MMI) {
// Build instruction mappings for each function in the module. Start by
// iterating over each Function in M.
for (Function &F : M) {
@@ -1395,7 +1332,11 @@ bool MachineOutliner::runOnModule(Module &M) {
for (MachineBasicBlock &MBB : *MF) {
// If there isn't anything in MBB, then there's no point in outlining from
// it.
- if (MBB.empty())
+ // If there are fewer than 2 instructions in the MBB, then it can't ever
+ // contain something worth outlining.
+ // FIXME: This should be based off of the maximum size in B of an outlined
+ // call versus the size in B of the MBB.
+ if (MBB.empty() || MBB.size() < 2)
continue;
// Check if MBB could be the target of an indirect branch. If it is, then
@@ -1407,21 +1348,133 @@ bool MachineOutliner::runOnModule(Module &M) {
Mapper.convertToUnsignedVec(MBB, *TII);
}
}
+}
- // Construct a suffix tree, use it to find candidates, and then outline them.
- SuffixTree ST(Mapper.UnsignedVec);
- std::vector<std::shared_ptr<Candidate>> CandidateList;
+void MachineOutliner::initSizeRemarkInfo(
+ const Module &M, const MachineModuleInfo &MMI,
+ StringMap<unsigned> &FunctionToInstrCount) {
+ // Collect instruction counts for every function. We'll use this to emit
+ // per-function size remarks later.
+ for (const Function &F : M) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+
+ // We only care about MI counts here. If there's no MachineFunction at this
+ // point, then there won't be after the outliner runs, so let's move on.
+ if (!MF)
+ continue;
+ FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
+ }
+}
+
+void MachineOutliner::emitInstrCountChangedRemark(
+ const Module &M, const MachineModuleInfo &MMI,
+ const StringMap<unsigned> &FunctionToInstrCount) {
+ // Iterate over each function in the module and emit remarks.
+ // Note that we won't miss anything by doing this, because the outliner never
+ // deletes functions.
+ for (const Function &F : M) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+
+ // The outliner never deletes functions. If we don't have a MF here, then we
+ // didn't have one prior to outlining either.
+ if (!MF)
+ continue;
+
+ std::string Fname = F.getName();
+ unsigned FnCountAfter = MF->getInstructionCount();
+ unsigned FnCountBefore = 0;
+
+ // Check if the function was recorded before.
+ auto It = FunctionToInstrCount.find(Fname);
+
+ // Did we have a previously-recorded size? If yes, then set FnCountBefore
+ // to that.
+ if (It != FunctionToInstrCount.end())
+ FnCountBefore = It->second;
+
+ // Compute the delta and emit a remark if there was a change.
+ int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
+ static_cast<int64_t>(FnCountBefore);
+ if (FnDelta == 0)
+ continue;
+
+ MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
+ DiagnosticLocation(),
+ &MF->front());
+ R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
+ << ": Function: "
+ << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
+ << ": MI instruction count changed from "
+ << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
+ FnCountBefore)
+ << " to "
+ << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
+ FnCountAfter)
+ << "; Delta: "
+ << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
+ return R;
+ });
+ }
+}
+
+bool MachineOutliner::runOnModule(Module &M) {
+ // Check if there's anything in the module. If it's empty, then there's
+ // nothing to outline.
+ if (M.empty())
+ return false;
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+
+ // If the user passed -enable-machine-outliner=always or
+ // -enable-machine-outliner, the pass will run on all functions in the module.
+ // Otherwise, if the target supports default outlining, it will run on all
+ // functions deemed by the target to be worth outlining from by default. Tell
+ // the user how the outliner is running.
+ LLVM_DEBUG(
+ dbgs() << "Machine Outliner: Running on ";
+ if (RunOnAllFunctions)
+ dbgs() << "all functions";
+ else
+ dbgs() << "target-default functions";
+ dbgs() << "\n"
+ );
+
+ // If the user specifies that they want to outline from linkonceodrs, set
+ // it here.
+ OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
+ InstructionMapper Mapper;
+
+ // Prepare instruction mappings for the suffix tree.
+ populateMapper(Mapper, M, MMI);
std::vector<OutlinedFunction> FunctionList;
// Find all of the outlining candidates.
- unsigned MaxCandidateLen =
- buildCandidateList(CandidateList, FunctionList, ST, Mapper);
-
- // Remove candidates that overlap with other candidates.
- pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen);
+ findCandidates(Mapper, FunctionList);
+
+ // If we've requested size remarks, then collect the MI counts of every
+ // function before outlining, and the MI counts after outlining.
+ // FIXME: This shouldn't be in the outliner at all; it should ultimately be
+ // the pass manager's responsibility.
+ // This could pretty easily be placed in outline instead, but because we
+ // really ultimately *don't* want this here, it's done like this for now
+ // instead.
+
+ // Check if we want size remarks.
+ bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
+ StringMap<unsigned> FunctionToInstrCount;
+ if (ShouldEmitSizeRemarks)
+ initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
// Outline each of the candidates and return true if something was outlined.
- bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
+ bool OutlinedSomething = outline(M, FunctionList, Mapper);
+
+ // If we outlined something, we definitely changed the MI count of the
+ // module. If we've asked for size remarks, then output them.
+ // FIXME: This should be in the pass manager.
+ if (ShouldEmitSizeRemarks && OutlinedSomething)
+ emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
return OutlinedSomething;
}
diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
deleted file mode 100644
index 3ee3e40b27e2..000000000000
--- a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the machine function pass registry for register allocators
-// and instruction schedulers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachinePassRegistry.h"
-
-using namespace llvm;
-
-void MachinePassRegistryListener::anchor() { }
-
-/// setDefault - Set the default constructor by name.
-void MachinePassRegistry::setDefault(StringRef Name) {
- MachinePassCtor Ctor = nullptr;
- for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) {
- if (R->getName() == Name) {
- Ctor = R->getCtor();
- break;
- }
- }
- assert(Ctor && "Unregistered pass name");
- setDefault(Ctor);
-}
-
-/// Add - Adds a function pass to the registration list.
-///
-void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
- Node->setNext(List);
- List = Node;
- if (Listener) Listener->NotifyAdd(Node->getName(),
- Node->getCtor(),
- Node->getDescription());
-}
-
-
-/// Remove - Removes a function pass from the registration list.
-///
-void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
- for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
- if (*I == Node) {
- if (Listener) Listener->NotifyRemove(Node->getName());
- *I = (*I)->getNext();
- break;
- }
- }
-}
diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
index 9bb00aaef86d..4d451bdd7f69 100644
--- a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -9,34 +9,6 @@
//
// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
//
-// Software pipelining (SWP) is an instruction scheduling technique for loops
-// that overlap loop iterations and exploits ILP via a compiler transformation.
-//
-// Swing Modulo Scheduling is an implementation of software pipelining
-// that generates schedules that are near optimal in terms of initiation
-// interval, register requirements, and stage count. See the papers:
-//
-// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
-// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
-// Conference on Parallel Architectures and Compilation Techiniques.
-//
-// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
-// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
-// Transactions on Computers, Vol. 50, No. 3, 2001.
-//
-// "An Implementation of Swing Modulo Scheduling With Extensions for
-// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
-// Urbana-Chambpain, 2005.
-//
-//
-// The SMS algorithm consists of three main steps after computing the minimal
-// initiation interval (MII).
-// 1) Analyze the dependence graph and compute information about each
-// instruction in the graph.
-// 2) Order the nodes (instructions) by priority based upon the heuristics
-// described in the algorithm.
-// 3) Attempt to schedule the nodes in the specified order using the MII.
-//
// This SMS implementation is a target-independent back-end pass. When enabled,
// the pass runs just prior to the register allocation pass, while the machine
// IR is in SSA form. If software pipelining is successful, then the original
@@ -83,13 +55,11 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -171,552 +141,15 @@ static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
cl::ReallyHidden, cl::init(false),
cl::ZeroOrMore, cl::desc("Ignore RecMII"));
-namespace {
-
-class NodeSet;
-class SMSchedule;
-
-/// The main class in the implementation of the target independent
-/// software pipeliner pass.
-class MachinePipeliner : public MachineFunctionPass {
-public:
- MachineFunction *MF = nullptr;
- const MachineLoopInfo *MLI = nullptr;
- const MachineDominatorTree *MDT = nullptr;
- const InstrItineraryData *InstrItins;
- const TargetInstrInfo *TII = nullptr;
- RegisterClassInfo RegClassInfo;
-
-#ifndef NDEBUG
- static int NumTries;
-#endif
-
- /// Cache the target analysis information about the loop.
- struct LoopInfo {
- MachineBasicBlock *TBB = nullptr;
- MachineBasicBlock *FBB = nullptr;
- SmallVector<MachineOperand, 4> BrCond;
- MachineInstr *LoopInductionVar = nullptr;
- MachineInstr *LoopCompare = nullptr;
- };
- LoopInfo LI;
-
- static char ID;
-
- MachinePipeliner() : MachineFunctionPass(ID) {
- initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<MachineLoopInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<LiveIntervals>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
-private:
- void preprocessPhiNodes(MachineBasicBlock &B);
- bool canPipelineLoop(MachineLoop &L);
- bool scheduleLoop(MachineLoop &L);
- bool swingModuloScheduler(MachineLoop &L);
-};
-
-/// This class builds the dependence graph for the instructions in a loop,
-/// and attempts to schedule the instructions using the SMS algorithm.
-class SwingSchedulerDAG : public ScheduleDAGInstrs {
- MachinePipeliner &Pass;
- /// The minimum initiation interval between iterations for this schedule.
- unsigned MII = 0;
- /// Set to true if a valid pipelined schedule is found for the loop.
- bool Scheduled = false;
- MachineLoop &Loop;
- LiveIntervals &LIS;
- const RegisterClassInfo &RegClassInfo;
-
- /// A toplogical ordering of the SUnits, which is needed for changing
- /// dependences and iterating over the SUnits.
- ScheduleDAGTopologicalSort Topo;
-
- struct NodeInfo {
- int ASAP = 0;
- int ALAP = 0;
- int ZeroLatencyDepth = 0;
- int ZeroLatencyHeight = 0;
-
- NodeInfo() = default;
- };
- /// Computed properties for each node in the graph.
- std::vector<NodeInfo> ScheduleInfo;
-
- enum OrderKind { BottomUp = 0, TopDown = 1 };
- /// Computed node ordering for scheduling.
- SetVector<SUnit *> NodeOrder;
-
- using NodeSetType = SmallVector<NodeSet, 8>;
- using ValueMapTy = DenseMap<unsigned, unsigned>;
- using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
- using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
-
- /// Instructions to change when emitting the final schedule.
- DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
-
- /// We may create a new instruction, so remember it because it
- /// must be deleted when the pass is finished.
- SmallPtrSet<MachineInstr *, 4> NewMIs;
-
- /// Ordered list of DAG postprocessing steps.
- std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
-
- /// Helper class to implement Johnson's circuit finding algorithm.
- class Circuits {
- std::vector<SUnit> &SUnits;
- SetVector<SUnit *> Stack;
- BitVector Blocked;
- SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
- SmallVector<SmallVector<int, 4>, 16> AdjK;
- unsigned NumPaths;
- static unsigned MaxPaths;
-
- public:
- Circuits(std::vector<SUnit> &SUs)
- : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
-
- /// Reset the data structures used in the circuit algorithm.
- void reset() {
- Stack.clear();
- Blocked.reset();
- B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
- NumPaths = 0;
- }
-
- void createAdjacencyStructure(SwingSchedulerDAG *DAG);
- bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
- void unblock(int U);
- };
-
-public:
- SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
- const RegisterClassInfo &rci)
- : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
- RegClassInfo(rci), Topo(SUnits, &ExitSU) {
- P.MF->getSubtarget().getSMSMutations(Mutations);
- }
-
- void schedule() override;
- void finishBlock() override;
-
- /// Return true if the loop kernel has been scheduled.
- bool hasNewSchedule() { return Scheduled; }
-
- /// Return the earliest time an instruction may be scheduled.
- int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
-
- /// Return the latest time an instruction my be scheduled.
- int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
-
- /// The mobility function, which the number of slots in which
- /// an instruction may be scheduled.
- int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
-
- /// The depth, in the dependence graph, for a node.
- unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
-
- /// The maximum unweighted length of a path from an arbitrary node to the
- /// given node in which each edge has latency 0
- int getZeroLatencyDepth(SUnit *Node) {
- return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
- }
-
- /// The height, in the dependence graph, for a node.
- unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
-
- /// The maximum unweighted length of a path from the given node to an
- /// arbitrary node in which each edge has latency 0
- int getZeroLatencyHeight(SUnit *Node) {
- return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
- }
-
- /// Return true if the dependence is a back-edge in the data dependence graph.
- /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
- /// using an anti dependence from a Phi to an instruction.
- bool isBackedge(SUnit *Source, const SDep &Dep) {
- if (Dep.getKind() != SDep::Anti)
- return false;
- return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
- }
-
- bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
-
- /// The distance function, which indicates that operation V of iteration I
- /// depends on operations U of iteration I-distance.
- unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
- // Instructions that feed a Phi have a distance of 1. Computing larger
- // values for arrays requires data dependence information.
- if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
- return 1;
- return 0;
- }
-
- /// Set the Minimum Initiation Interval for this schedule attempt.
- void setMII(unsigned mii) { MII = mii; }
-
- void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
-
- void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
-
- /// Return the new base register that was stored away for the changed
- /// instruction.
- unsigned getInstrBaseReg(SUnit *SU) {
- DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
- InstrChanges.find(SU);
- if (It != InstrChanges.end())
- return It->second.first;
- return 0;
- }
-
- void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
- Mutations.push_back(std::move(Mutation));
- }
-
-private:
- void addLoopCarriedDependences(AliasAnalysis *AA);
- void updatePhiDependences();
- void changeDependences();
- unsigned calculateResMII();
- unsigned calculateRecMII(NodeSetType &RecNodeSets);
- void findCircuits(NodeSetType &NodeSets);
- void fuseRecs(NodeSetType &NodeSets);
- void removeDuplicateNodes(NodeSetType &NodeSets);
- void computeNodeFunctions(NodeSetType &NodeSets);
- void registerPressureFilter(NodeSetType &NodeSets);
- void colocateNodeSets(NodeSetType &NodeSets);
- void checkNodeSets(NodeSetType &NodeSets);
- void groupRemainingNodes(NodeSetType &NodeSets);
- void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
- SetVector<SUnit *> &NodesAdded);
- void computeNodeOrder(NodeSetType &NodeSets);
- void checkValidNodeOrder(const NodeSetType &Circuits) const;
- bool schedulePipeline(SMSchedule &Schedule);
- void generatePipelinedLoop(SMSchedule &Schedule);
- void generateProlog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
- MBBVectorTy &PrologBBs);
- void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
- void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
- MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum,
- unsigned CurStageNum, bool IsLast);
- void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
- MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum,
- unsigned CurStageNum, bool IsLast);
- void removeDeadInstructions(MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs);
- void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule);
- void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
- ValueMapTy *VRMap);
- bool computeDelta(MachineInstr &MI, unsigned &Delta);
- void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
- unsigned Num);
- MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
- unsigned InstStageNum);
- MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
- unsigned InstStageNum,
- SMSchedule &Schedule);
- void updateInstruction(MachineInstr *NewMI, bool LastDef,
- unsigned CurStageNum, unsigned InstrStageNum,
- SMSchedule &Schedule, ValueMapTy *VRMap);
- MachineInstr *findDefInLoop(unsigned Reg);
- unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
- unsigned LoopStage, ValueMapTy *VRMap,
- MachineBasicBlock *BB);
- void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
- SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap);
- void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
- InstrMapTy &InstrMap, unsigned CurStageNum,
- unsigned PhiNum, MachineInstr *Phi,
- unsigned OldReg, unsigned NewReg,
- unsigned PrevReg = 0);
- bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
- unsigned &OffsetPos, unsigned &NewBase,
- int64_t &NewOffset);
- void postprocessDAG();
-};
-
-/// A NodeSet contains a set of SUnit DAG nodes with additional information
-/// that assigns a priority to the set.
-class NodeSet {
- SetVector<SUnit *> Nodes;
- bool HasRecurrence = false;
- unsigned RecMII = 0;
- int MaxMOV = 0;
- unsigned MaxDepth = 0;
- unsigned Colocate = 0;
- SUnit *ExceedPressure = nullptr;
- unsigned Latency = 0;
-
-public:
- using iterator = SetVector<SUnit *>::const_iterator;
-
- NodeSet() = default;
- NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
- Latency = 0;
- for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
- for (const SDep &Succ : Nodes[i]->Succs)
- if (Nodes.count(Succ.getSUnit()))
- Latency += Succ.getLatency();
- }
-
- bool insert(SUnit *SU) { return Nodes.insert(SU); }
-
- void insert(iterator S, iterator E) { Nodes.insert(S, E); }
-
- template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
- return Nodes.remove_if(P);
- }
-
- unsigned count(SUnit *SU) const { return Nodes.count(SU); }
-
- bool hasRecurrence() { return HasRecurrence; };
-
- unsigned size() const { return Nodes.size(); }
-
- bool empty() const { return Nodes.empty(); }
-
- SUnit *getNode(unsigned i) const { return Nodes[i]; };
-
- void setRecMII(unsigned mii) { RecMII = mii; };
-
- void setColocate(unsigned c) { Colocate = c; };
-
- void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
-
- bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
-
- int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
-
- int getRecMII() { return RecMII; }
-
- /// Summarize node functions for the entire node set.
- void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
- for (SUnit *SU : *this) {
- MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
- MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
- }
- }
-
- unsigned getLatency() { return Latency; }
-
- unsigned getMaxDepth() { return MaxDepth; }
-
- void clear() {
- Nodes.clear();
- RecMII = 0;
- HasRecurrence = false;
- MaxMOV = 0;
- MaxDepth = 0;
- Colocate = 0;
- ExceedPressure = nullptr;
- }
-
- operator SetVector<SUnit *> &() { return Nodes; }
-
- /// Sort the node sets by importance. First, rank them by recurrence MII,
- /// then by mobility (least mobile done first), and finally by depth.
- /// Each node set may contain a colocate value which is used as the first
- /// tie breaker, if it's set.
- bool operator>(const NodeSet &RHS) const {
- if (RecMII == RHS.RecMII) {
- if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
- return Colocate < RHS.Colocate;
- if (MaxMOV == RHS.MaxMOV)
- return MaxDepth > RHS.MaxDepth;
- return MaxMOV < RHS.MaxMOV;
- }
- return RecMII > RHS.RecMII;
- }
-
- bool operator==(const NodeSet &RHS) const {
- return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
- MaxDepth == RHS.MaxDepth;
- }
-
- bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
-
- iterator begin() { return Nodes.begin(); }
- iterator end() { return Nodes.end(); }
-
- void print(raw_ostream &os) const {
- os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
- << " depth " << MaxDepth << " col " << Colocate << "\n";
- for (const auto &I : Nodes)
- os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
- os << "\n";
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
-#endif
-};
-
-/// This class represents the scheduled code. The main data structure is a
-/// map from scheduled cycle to instructions. During scheduling, the
-/// data structure explicitly represents all stages/iterations. When
-/// the algorithm finshes, the schedule is collapsed into a single stage,
-/// which represents instructions from different loop iterations.
-///
-/// The SMS algorithm allows negative values for cycles, so the first cycle
-/// in the schedule is the smallest cycle value.
-class SMSchedule {
-private:
- /// Map from execution cycle to instructions.
- DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
-
- /// Map from instruction to execution cycle.
- std::map<SUnit *, int> InstrToCycle;
-
- /// Map for each register and the max difference between its uses and def.
- /// The first element in the pair is the max difference in stages. The
- /// second is true if the register defines a Phi value and loop value is
- /// scheduled before the Phi.
- std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
-
- /// Keep track of the first cycle value in the schedule. It starts
- /// as zero, but the algorithm allows negative values.
- int FirstCycle = 0;
-
- /// Keep track of the last cycle value in the schedule.
- int LastCycle = 0;
-
- /// The initiation interval (II) for the schedule.
- int InitiationInterval = 0;
-
- /// Target machine information.
- const TargetSubtargetInfo &ST;
-
- /// Virtual register information.
- MachineRegisterInfo &MRI;
-
- std::unique_ptr<DFAPacketizer> Resources;
-
-public:
- SMSchedule(MachineFunction *mf)
- : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
- Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
-
- void reset() {
- ScheduledInstrs.clear();
- InstrToCycle.clear();
- RegToStageDiff.clear();
- FirstCycle = 0;
- LastCycle = 0;
- InitiationInterval = 0;
- }
-
- /// Set the initiation interval for this schedule.
- void setInitiationInterval(int ii) { InitiationInterval = ii; }
-
- /// Return the first cycle in the completed schedule. This
- /// can be a negative value.
- int getFirstCycle() const { return FirstCycle; }
-
- /// Return the last cycle in the finalized schedule.
- int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
-
- /// Return the cycle of the earliest scheduled instruction in the dependence
- /// chain.
- int earliestCycleInChain(const SDep &Dep);
-
- /// Return the cycle of the latest scheduled instruction in the dependence
- /// chain.
- int latestCycleInChain(const SDep &Dep);
+namespace llvm {
- void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
- int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
- bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
+// A command line option to enable the CopyToPhi DAG mutation.
+cl::opt<bool>
+ SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
+ cl::init(true), cl::ZeroOrMore,
+ cl::desc("Enable CopyToPhi DAG Mutation"));
- /// Iterators for the cycle to instruction map.
- using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
- using const_sched_iterator =
- DenseMap<int, std::deque<SUnit *>>::const_iterator;
-
- /// Return true if the instruction is scheduled at the specified stage.
- bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
- return (stageScheduled(SU) == (int)StageNum);
- }
-
- /// Return the stage for a scheduled instruction. Return -1 if
- /// the instruction has not been scheduled.
- int stageScheduled(SUnit *SU) const {
- std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
- if (it == InstrToCycle.end())
- return -1;
- return (it->second - FirstCycle) / InitiationInterval;
- }
-
- /// Return the cycle for a scheduled instruction. This function normalizes
- /// the first cycle to be 0.
- unsigned cycleScheduled(SUnit *SU) const {
- std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
- assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
- return (it->second - FirstCycle) % InitiationInterval;
- }
-
- /// Return the maximum stage count needed for this schedule.
- unsigned getMaxStageCount() {
- return (LastCycle - FirstCycle) / InitiationInterval;
- }
-
- /// Return the max. number of stages/iterations that can occur between a
- /// register definition and its uses.
- unsigned getStagesForReg(int Reg, unsigned CurStage) {
- std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
- if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
- return 1;
- return Stages.first;
- }
-
- /// The number of stages for a Phi is a little different than other
- /// instructions. The minimum value computed in RegToStageDiff is 1
- /// because we assume the Phi is needed for at least 1 iteration.
- /// This is not the case if the loop value is scheduled prior to the
- /// Phi in the same stage. This function returns the number of stages
- /// or iterations needed between the Phi definition and any uses.
- unsigned getStagesForPhi(int Reg) {
- std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
- if (Stages.second)
- return Stages.first;
- return Stages.first - 1;
- }
-
- /// Return the instructions that are scheduled at the specified cycle.
- std::deque<SUnit *> &getInstructions(int cycle) {
- return ScheduledInstrs[cycle];
- }
-
- bool isValidSchedule(SwingSchedulerDAG *SSD);
- void finalizeSchedule(SwingSchedulerDAG *SSD);
- void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
- std::deque<SUnit *> &Insts);
- bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
- bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
- MachineOperand &MO);
- void print(raw_ostream &os) const;
- void dump() const;
-};
-
-} // end anonymous namespace
+} // end namespace llvm
unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
char MachinePipeliner::ID = 0;
@@ -884,12 +317,9 @@ void SwingSchedulerDAG::schedule() {
addLoopCarriedDependences(AA);
updatePhiDependences();
Topo.InitDAGTopologicalSorting();
- postprocessDAG();
changeDependences();
- LLVM_DEBUG({
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this);
- });
+ postprocessDAG();
+ LLVM_DEBUG(dump());
NodeSetType NodeSets;
findCircuits(NodeSets);
@@ -1101,11 +531,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
// First, perform the cheaper check that compares the base register.
// If they are the same and the load offset is less than the store
// offset, then mark the dependence as loop carried potentially.
- unsigned BaseReg1, BaseReg2;
+ MachineOperand *BaseOp1, *BaseOp2;
int64_t Offset1, Offset2;
- if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
- TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
- if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+ if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
+ TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
+ if (BaseOp1->isIdenticalTo(*BaseOp2) &&
+ (int)Offset1 < (int)Offset2) {
assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
"What happened to the chain edge?");
SDep Dep(Load, SDep::Barrier);
@@ -1139,9 +570,9 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
continue;
}
AliasResult AAResult = AA->alias(
- MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+ MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
MMO1->getAAInfo()),
- MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+ MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
MMO2->getAAInfo()));
if (AAResult != NoAlias) {
@@ -1298,6 +729,7 @@ void SwingSchedulerDAG::changeDependences() {
// Add a dependence between the new instruction and the instruction
// that defines the new base.
SDep Dep(&I, SDep::Anti, NewBase);
+ Topo.AddPred(LastSU, &I);
LastSU->addPred(Dep);
// Remember the base and offset information so that we can update the
@@ -1509,9 +941,9 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
}
OutputDeps[N] = BackEdge;
}
- // Do not process a boundary node and a back-edge is processed only
- // if it goes to a Phi.
- if (SI.getSUnit()->isBoundaryNode() ||
+ // Do not process a boundary node, an artificial node.
+ // A back-edge is processed only if it goes to a Phi.
+ if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
(SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
continue;
int N = SI.getSUnit()->NodeNum;
@@ -1535,7 +967,7 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
}
}
}
- // Add back-eges in the adjacency matrix for the output dependences.
+ // Add back-edges in the adjacency matrix for the output dependences.
for (auto &OD : OutputDeps)
if (!Added.test(OD.second)) {
AdjK[OD.first].push_back(OD.second);
@@ -1564,7 +996,8 @@ bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
++NumPaths;
break;
} else if (!Blocked.test(W)) {
- if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
+ if (circuit(W, S, NodeSets,
+ Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
F = true;
}
}
@@ -1604,7 +1037,7 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
// but we do this to find the circuits, and then change them back.
swapAntiDependences(SUnits);
- Circuits Cir(SUnits);
+ Circuits Cir(SUnits, Topo);
// Create the adjacency structure.
Cir.createAdjacencyStructure(this);
for (int i = 0, e = SUnits.size(); i != e; ++i) {
@@ -1616,6 +1049,85 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
swapAntiDependences(SUnits);
}
+// Create artificial dependencies between the source of COPY/REG_SEQUENCE that
+// is loop-carried to the USE in next iteration. This will help pipeliner avoid
+// additional copies that are needed across iterations. An artificial dependence
+// edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
+
+// PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
+// SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
+// PHI-------True-Dep------> USEOfPhi
+
+// The mutation creates
+// USEOfPHI -------Artificial-Dep---> SRCOfCopy
+
+// This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
+// (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
+// late to avoid additional copies across iterations. The possible scheduling
+// order would be
+// USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
+
+void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
+ for (SUnit &SU : DAG->SUnits) {
+ // Find the COPY/REG_SEQUENCE instruction.
+ if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
+ continue;
+
+ // Record the loop carried PHIs.
+ SmallVector<SUnit *, 4> PHISUs;
+ // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
+ SmallVector<SUnit *, 4> SrcSUs;
+
+ for (auto &Dep : SU.Preds) {
+ SUnit *TmpSU = Dep.getSUnit();
+ MachineInstr *TmpMI = TmpSU->getInstr();
+ SDep::Kind DepKind = Dep.getKind();
+ // Save the loop carried PHI.
+ if (DepKind == SDep::Anti && TmpMI->isPHI())
+ PHISUs.push_back(TmpSU);
+ // Save the source of COPY/REG_SEQUENCE.
+ // If the source has no pre-decessors, we will end up creating cycles.
+ else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
+ SrcSUs.push_back(TmpSU);
+ }
+
+ if (PHISUs.size() == 0 || SrcSUs.size() == 0)
+ continue;
+
+ // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
+ // SUnit to the container.
+ SmallVector<SUnit *, 8> UseSUs;
+ for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
+ for (auto &Dep : (*I)->Succs) {
+ if (Dep.getKind() != SDep::Data)
+ continue;
+
+ SUnit *TmpSU = Dep.getSUnit();
+ MachineInstr *TmpMI = TmpSU->getInstr();
+ if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
+ PHISUs.push_back(TmpSU);
+ continue;
+ }
+ UseSUs.push_back(TmpSU);
+ }
+ }
+
+ if (UseSUs.size() == 0)
+ continue;
+
+ SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
+ // Add the artificial dependencies if it does not form a cycle.
+ for (auto I : UseSUs) {
+ for (auto Src : SrcSUs) {
+ if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
+ Src->addPred(SDep(I, SDep::Artificial));
+ SDAG->Topo.AddPred(Src, I);
+ }
+ }
+ }
+ }
+}
+
/// Return true for DAG nodes that we ignore when computing the cost functions.
/// We ignore the back-edge recurrence in order to avoid unbounded recursion
/// in the calculation of the ASAP, ALAP, etc functions.
@@ -1638,8 +1150,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
E = Topo.end();
I != E; ++I) {
- SUnit *SU = &SUnits[*I];
- SU->dump(this);
+ const SUnit &SU = SUnits[*I];
+ dumpNode(SU);
}
});
@@ -1864,8 +1376,7 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
RecRPTracker.closeBottom();
std::vector<SUnit *> SUnits(NS.begin(), NS.end());
- llvm::sort(SUnits.begin(), SUnits.end(),
- [](const SUnit *A, const SUnit *B) {
+ llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
return A->NodeNum > B->NodeNum;
});
@@ -2672,7 +2183,7 @@ void SwingSchedulerDAG::generateExistingPhis(
else if (PrologStage >= AccessStage + StageDiff + np &&
VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
- // Check if the Phi has already been scheduled, but the loop intruction
+ // Check if the Phi has already been scheduled, but the loop instruction
// is either another Phi, or doesn't occur in the loop.
else if (PrologStage >= AccessStage + StageDiff + np) {
// If the Phi references another Phi, we need to examine the other
@@ -2725,7 +2236,7 @@ void SwingSchedulerDAG::generateExistingPhis(
VRMap[PrevStage - np + 1].count(Def))
PhiOp2 = VRMap[PrevStage - np + 1][Def];
// Use the loop value defined in the kernel.
- else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
+ else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
// Use the value defined by the Phi, unless we're generating the first
@@ -2739,35 +2250,38 @@ void SwingSchedulerDAG::generateExistingPhis(
// references another Phi, and the other Phi is scheduled in an
// earlier stage. We can try to reuse an existing Phi up until the last
// stage of the current Phi.
- if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) {
- int LVNumStages = Schedule.getStagesForPhi(LoopVal);
- int StageDiff = (StageScheduled - LoopValStage);
- LVNumStages -= StageDiff;
- // Make sure the loop value Phi has been processed already.
- if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
- NewReg = PhiOp2;
- unsigned ReuseStage = CurStageNum;
- if (Schedule.isLoopCarried(this, *PhiInst))
- ReuseStage -= LVNumStages;
- // Check if the Phi to reuse has been generated yet. If not, then
- // there is nothing to reuse.
- if (VRMap[ReuseStage - np].count(LoopVal)) {
- NewReg = VRMap[ReuseStage - np][LoopVal];
-
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, Def, NewReg);
- // Update the map with the new Phi name.
- VRMap[CurStageNum - np][Def] = NewReg;
- PhiOp2 = NewReg;
- if (VRMap[LastStageNum - np - 1].count(LoopVal))
- PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
-
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
- continue;
+ if (LoopDefIsPhi) {
+ if (static_cast<int>(PrologStage - np) >= StageScheduled) {
+ int LVNumStages = Schedule.getStagesForPhi(LoopVal);
+ int StageDiff = (StageScheduled - LoopValStage);
+ LVNumStages -= StageDiff;
+ // Make sure the loop value Phi has been processed already.
+ if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
+ NewReg = PhiOp2;
+ unsigned ReuseStage = CurStageNum;
+ if (Schedule.isLoopCarried(this, *PhiInst))
+ ReuseStage -= LVNumStages;
+ // Check if the Phi to reuse has been generated yet. If not, then
+ // there is nothing to reuse.
+ if (VRMap[ReuseStage - np].count(LoopVal)) {
+ NewReg = VRMap[ReuseStage - np][LoopVal];
+
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+ &*BBI, Def, NewReg);
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ PhiOp2 = NewReg;
+ if (VRMap[LastStageNum - np - 1].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ continue;
+ }
}
- } else if (InKernel && StageDiff > 0 &&
- VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+ }
+ if (InKernel && StageDiff > 0 &&
+ VRMap[CurStageNum - StageDiff - np].count(LoopVal))
PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
}
@@ -3143,11 +2657,16 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
/// during each iteration. Set Delta to the amount of the change.
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
return false;
+ if (!BaseOp->isReg())
+ return false;
+
+ unsigned BaseReg = BaseOp->getReg();
+
MachineRegisterInfo &MRI = MF.getRegInfo();
// Check if there is a Phi. If so, get the definition in the loop.
MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
@@ -3175,28 +2694,26 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
return;
// If the instruction has memory operands, then adjust the offset
// when the instruction appears in different stages.
- unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
- if (NumRefs == 0)
+ if (NewMI.memoperands_empty())
return;
- MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
- unsigned Refs = 0;
+ SmallVector<MachineMemOperand *, 2> NewMMOs;
for (MachineMemOperand *MMO : NewMI.memoperands()) {
if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
(!MMO->getValue())) {
- NewMemRefs[Refs++] = MMO;
+ NewMMOs.push_back(MMO);
continue;
}
unsigned Delta;
if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
int64_t AdjOffset = Delta * Num;
- NewMemRefs[Refs++] =
- MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
} else {
- NewMI.dropMemRefs();
- return;
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
}
}
- NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
+ NewMI.setMemRefs(MF, NewMMOs);
}
/// Clone the instruction for the new pipelined loop and update the
@@ -3552,19 +3069,19 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
return true;
- unsigned BaseRegS, BaseRegD;
+ MachineOperand *BaseOpS, *BaseOpD;
int64_t OffsetS, OffsetD;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
- !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
+ if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
+ !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
return true;
- if (BaseRegS != BaseRegD)
+ if (!BaseOpS->isIdenticalTo(*BaseOpD))
return true;
// Check that the base register is incremented by a constant value for each
// iteration.
- MachineInstr *Def = MRI.getVRegDef(BaseRegS);
+ MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
if (!Def || !Def->isPHI())
return true;
unsigned InitVal = 0;
@@ -3983,7 +3500,7 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
};
// sort, so that we can perform a binary search
- llvm::sort(Indices.begin(), Indices.end(), CompareKey);
+ llvm::sort(Indices, CompareKey);
bool Valid = true;
(void)Valid;
@@ -4193,6 +3710,14 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
LLVM_DEBUG(dump(););
}
+void NodeSet::print(raw_ostream &os) const {
+ os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
+ << " depth " << MaxDepth << " col " << Colocate << "\n";
+ for (const auto &I : Nodes)
+ os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
+ os << "\n";
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the schedule information to the given output.
void SMSchedule::print(raw_ostream &os) const {
@@ -4211,4 +3736,9 @@ void SMSchedule::print(raw_ostream &os) const {
/// Utility function used for debugging to print the schedule.
LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
+
#endif
+
+
+
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index f632a9bd457f..6e5ca45d5e5e 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -93,36 +93,29 @@ bool
MachineRegisterInfo::constrainRegAttrs(unsigned Reg,
unsigned ConstrainingReg,
unsigned MinNumRegs) {
- auto const *OldRC = getRegClassOrNull(Reg);
- auto const *RC = getRegClassOrNull(ConstrainingReg);
- // A virtual register at any point must have either a low-level type
- // or a class assigned, but not both. The only exception is the internals of
- // GlobalISel's instruction selection pass, which is allowed to temporarily
- // introduce registers with types and classes both.
- assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type");
- assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both");
- assert((RC || getType(ConstrainingReg).isValid()) &&
- "ConstrainingReg has neither class nor type");
- assert((!RC || !getType(ConstrainingReg).isValid()) &&
- "ConstrainingReg has class and type both");
- if (OldRC && RC)
- return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs);
- // If one of the virtual registers is generic (used in generic machine
- // instructions, has a low-level type, doesn't have a class), and the other is
- // concrete (used in target specific instructions, doesn't have a low-level
- // type, has a class), we can not unify them.
- if (OldRC || RC)
+ const LLT RegTy = getType(Reg);
+ const LLT ConstrainingRegTy = getType(ConstrainingReg);
+ if (RegTy.isValid() && ConstrainingRegTy.isValid() &&
+ RegTy != ConstrainingRegTy)
return false;
- // At this point, both registers are guaranteed to have a valid low-level
- // type, and they must agree.
- if (getType(Reg) != getType(ConstrainingReg))
- return false;
- auto const *OldRB = getRegBankOrNull(Reg);
- auto const *RB = getRegBankOrNull(ConstrainingReg);
- if (OldRB)
- return !RB || RB == OldRB;
- if (RB)
- setRegBank(Reg, *RB);
+ const auto ConstrainingRegCB = getRegClassOrRegBank(ConstrainingReg);
+ if (!ConstrainingRegCB.isNull()) {
+ const auto RegCB = getRegClassOrRegBank(Reg);
+ if (RegCB.isNull())
+ setRegClassOrRegBank(Reg, ConstrainingRegCB);
+ else if (RegCB.is<const TargetRegisterClass *>() !=
+ ConstrainingRegCB.is<const TargetRegisterClass *>())
+ return false;
+ else if (RegCB.is<const TargetRegisterClass *>()) {
+ if (!::constrainRegClass(
+ *this, Reg, RegCB.get<const TargetRegisterClass *>(),
+ ConstrainingRegCB.get<const TargetRegisterClass *>(), MinNumRegs))
+ return false;
+ } else if (RegCB != ConstrainingRegCB)
+ return false;
+ }
+ if (ConstrainingRegTy.isValid())
+ setType(Reg, ConstrainingRegTy);
return true;
}
@@ -177,11 +170,17 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
return Reg;
}
+unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg,
+ StringRef Name) {
+ unsigned Reg = createIncompleteVirtualRegister(Name);
+ VRegInfo[Reg].first = VRegInfo[VReg].first;
+ setType(Reg, getType(VReg));
+ if (TheDelegate)
+ TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+ return Reg;
+}
+
void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
- // Check that VReg doesn't have a class.
- assert((getRegClassOrRegBank(VReg).isNull() ||
- !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) &&
- "Can't set the size of a non-generic virtual register");
VRegToType.grow(VReg);
VRegToType[VReg] = Ty;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index 502d18f08f93..90dad9d399fe 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -41,6 +41,7 @@
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -100,8 +101,11 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
cl::desc("Only schedule this function"));
static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
cl::desc("Only schedule this MBB#"));
+static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
+ cl::desc("Print schedule DAGs"));
#else
-static bool ViewMISchedDAGs = false;
+static const bool ViewMISchedDAGs = false;
+static const bool PrintDAGs = false;
#endif // NDEBUG
/// Avoid quadratic complexity in unusually large basic blocks by limiting the
@@ -237,7 +241,8 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-MachinePassRegistry MachineSchedRegistry::Registry;
+MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
+ MachineSchedRegistry::Registry;
/// A dummy default scheduler factory indicates whether the scheduler
/// is overridden on the command line.
@@ -633,7 +638,7 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
+ dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -670,7 +675,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- PredSU->dump(this);
+ dumpNode(*PredSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -764,10 +769,8 @@ void ScheduleDAGMI::schedule() {
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
- LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
- for (const SUnit &SU
- : SUnits) SU.dumpAll(this);
- if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+ LLVM_DEBUG(dump());
+ if (PrintDAGs) dump();
if (ViewMISchedDAGs) viewGraph();
// Initialize the strategy before modifying the DAG.
@@ -920,7 +923,7 @@ void ScheduleDAGMI::placeDebugValues() {
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
if (SUnit *SU = getSUnit(&(*MI)))
- SU->dump(this);
+ dumpNode(*SU);
else
dbgs() << "Missing SUnit\n";
}
@@ -1171,6 +1174,29 @@ void ScheduleDAGMILive::updatePressureDiffs(
}
}
+void ScheduleDAGMILive::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getInstr() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits) {
+ dumpNodeAll(SU);
+ if (ShouldTrackPressure) {
+ dbgs() << " Pressure Diff : ";
+ getPressureDiff(&SU).dump(*TRI);
+ }
+ dbgs() << " Single Issue : ";
+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+ SchedModel.mustEndGroup(SU.getInstr()))
+ dbgs() << "true;";
+ else
+ dbgs() << "false;";
+ dbgs() << '\n';
+ }
+ if (ExitSU.getInstr() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
/// schedule - Called back from MachineScheduler::runOnMachineFunction
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
/// only includes instructions that have DAG nodes, not scheduling boundaries.
@@ -1197,22 +1223,8 @@ void ScheduleDAGMILive::schedule() {
// This may initialize a DFSResult to be used for queue priority.
SchedImpl->initialize(this);
- LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
- for (const SUnit &SU
- : SUnits) {
- SU.dumpAll(this);
- if (ShouldTrackPressure) {
- dbgs() << " Pressure Diff : ";
- getPressureDiff(&SU).dump(*TRI);
- }
- dbgs() << " Single Issue : ";
- if (SchedModel.mustBeginGroup(SU.getInstr()) &&
- SchedModel.mustEndGroup(SU.getInstr()))
- dbgs() << "true;";
- else
- dbgs() << "false;";
- dbgs() << '\n';
- } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+ LLVM_DEBUG(dump());
+ if (PrintDAGs) dump();
if (ViewMISchedDAGs) viewGraph();
// Initialize ready queues now that the DAG and priority data are finalized.
@@ -1472,15 +1484,40 @@ namespace {
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
struct MemOpInfo {
SUnit *SU;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
- : SU(su), BaseReg(reg), Offset(ofs) {}
+ MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
+ : SU(su), BaseOp(Op), Offset(ofs) {}
+
+ bool operator<(const MemOpInfo &RHS) const {
+ if (BaseOp->getType() != RHS.BaseOp->getType())
+ return BaseOp->getType() < RHS.BaseOp->getType();
+
+ if (BaseOp->isReg())
+ return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
+ std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
+ RHS.SU->NodeNum);
+ if (BaseOp->isFI()) {
+ const MachineFunction &MF =
+ *BaseOp->getParent()->getParent()->getParent();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ bool StackGrowsDown = TFI.getStackGrowthDirection() ==
+ TargetFrameLowering::StackGrowsDown;
+ // Can't use tuple comparison here since we might need to use a
+ // different order when the stack grows down.
+ if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
+ return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
+ : BaseOp->getIndex() < RHS.BaseOp->getIndex();
+
+ if (Offset != RHS.Offset)
+ return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
+
+ return SU->NodeNum < RHS.SU->NodeNum;
+ }
- bool operator<(const MemOpInfo&RHS) const {
- return std::tie(BaseReg, Offset, SU->NodeNum) <
- std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
+ llvm_unreachable("MemOpClusterMutation only supports register or frame "
+ "index bases.");
}
};
@@ -1536,21 +1573,21 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
for (SUnit *SU : MemOps) {
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
- MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
+ if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
}
if (MemOpRecords.size() < 2)
return;
- llvm::sort(MemOpRecords.begin(), MemOpRecords.end());
+ llvm::sort(MemOpRecords);
unsigned ClusterLength = 1;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
SUnit *SUa = MemOpRecords[Idx].SU;
SUnit *SUb = MemOpRecords[Idx+1].SU;
- if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
- *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
+ if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
+ *MemOpRecords[Idx + 1].BaseOp,
ClusterLength) &&
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
@@ -2397,6 +2434,52 @@ initResourceDelta(const ScheduleDAGMI *DAG,
}
}
+/// Compute remaining latency. We need this both to determine whether the
+/// overall schedule has become latency-limited and whether the instructions
+/// outside this zone are resource or latency limited.
+///
+/// The "dependent" latency is updated incrementally during scheduling as the
+/// max height/depth of scheduled nodes minus the cycles since it was
+/// scheduled:
+/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+///
+/// The "independent" latency is the max ready queue depth:
+/// ILat = max N.depth for N in Available|Pending
+///
+/// RemainingLatency is the greater of independent and dependent latency.
+///
+/// These computations are expensive, especially in DAGs with many edges, so
+/// only do them if necessary.
+static unsigned computeRemLatency(SchedBoundary &CurrZone) {
+ unsigned RemLatency = CurrZone.getDependentLatency();
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Available.elements()));
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Pending.elements()));
+ return RemLatency;
+}
+
+/// Returns true if the current cycle plus remaning latency is greater than
+/// the critical path in the scheduling region.
+bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
+ SchedBoundary &CurrZone,
+ bool ComputeRemLatency,
+ unsigned &RemLatency) const {
+ // The current cycle is already greater than the critical path, so we are
+ // already latency limited and don't need to compute the remaining latency.
+ if (CurrZone.getCurrCycle() > Rem.CriticalPath)
+ return true;
+
+ // If we haven't scheduled anything yet, then we aren't latency limited.
+ if (CurrZone.getCurrCycle() == 0)
+ return false;
+
+ if (ComputeRemLatency)
+ RemLatency = computeRemLatency(CurrZone);
+
+ return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
+}
+
/// Set the CandPolicy given a scheduling zone given the current resources and
/// latencies inside and outside the zone.
void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
@@ -2406,46 +2489,32 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
// inside and outside this zone. Potential stalls should be considered before
// following this policy.
- // Compute remaining latency. We need this both to determine whether the
- // overall schedule has become latency-limited and whether the instructions
- // outside this zone are resource or latency limited.
- //
- // The "dependent" latency is updated incrementally during scheduling as the
- // max height/depth of scheduled nodes minus the cycles since it was
- // scheduled:
- // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
- //
- // The "independent" latency is the max ready queue depth:
- // ILat = max N.depth for N in Available|Pending
- //
- // RemainingLatency is the greater of independent and dependent latency.
- unsigned RemLatency = CurrZone.getDependentLatency();
- RemLatency = std::max(RemLatency,
- CurrZone.findMaxLatency(CurrZone.Available.elements()));
- RemLatency = std::max(RemLatency,
- CurrZone.findMaxLatency(CurrZone.Pending.elements()));
-
// Compute the critical resource outside the zone.
unsigned OtherCritIdx = 0;
unsigned OtherCount =
OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
bool OtherResLimited = false;
- if (SchedModel->hasInstrSchedModel())
+ unsigned RemLatency = 0;
+ bool RemLatencyComputed = false;
+ if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
+ RemLatency = computeRemLatency(CurrZone);
+ RemLatencyComputed = true;
OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
OtherCount, RemLatency);
+ }
// Schedule aggressively for latency in PostRA mode. We don't check for
// acyclic latency during PostRA, and highly out-of-order processors will
// skip PostRA scheduling.
- if (!OtherResLimited) {
- if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
- Policy.ReduceLatency |= true;
- LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
- << " RemainingLatency " << RemLatency << " + "
- << CurrZone.getCurrCycle() << "c > CritPath "
- << Rem.CriticalPath << "\n");
- }
+ if (!OtherResLimited &&
+ (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
+ RemLatency))) {
+ Policy.ReduceLatency |= true;
+ LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
+ << " RemainingLatency " << RemLatency << " + "
+ << CurrZone.getCurrCycle() << "c > CritPath "
+ << Rem.CriticalPath << "\n");
}
// If the same resource is limiting inside and outside the zone, do nothing.
if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
@@ -2473,7 +2542,7 @@ const char *GenericSchedulerBase::getReasonStr(
switch (Reason) {
case NoCand: return "NOCAND ";
case Only1: return "ONLY1 ";
- case PhysRegCopy: return "PREG-COPY ";
+ case PhysReg: return "PHYS-REG ";
case RegExcess: return "REG-EXCESS";
case RegCritical: return "REG-CRIT ";
case Stall: return "STALL ";
@@ -2809,24 +2878,41 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) {
/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
/// with the operation that produces or consumes the physreg. We'll do this when
/// regalloc has support for parallel copies.
-int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+int biasPhysReg(const SUnit *SU, bool isTop) {
const MachineInstr *MI = SU->getInstr();
- if (!MI->isCopy())
- return 0;
- unsigned ScheduledOper = isTop ? 1 : 0;
- unsigned UnscheduledOper = isTop ? 0 : 1;
- // If we have already scheduled the physreg produce/consumer, immediately
- // schedule the copy.
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(ScheduledOper).getReg()))
- return 1;
- // If the physreg is at the boundary, defer it. Otherwise schedule it
- // immediately to free the dependent. We can hoist the copy later.
- bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(UnscheduledOper).getReg()))
- return AtBoundary ? -1 : 1;
+ if (MI->isCopy()) {
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(ScheduledOper).getReg()))
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(UnscheduledOper).getReg()))
+ return AtBoundary ? -1 : 1;
+ }
+
+ if (MI->isMoveImmediate()) {
+ // If we have a move immediate and all successors have been assigned, bias
+ // towards scheduling this later. Make sure all register defs are to
+ // physical registers.
+ bool DoBias = true;
+ for (const MachineOperand &Op : MI->defs()) {
+ if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
+ DoBias = false;
+ break;
+ }
+ }
+
+ if (DoBias)
+ return isTop ? -1 : 1;
+ }
+
return 0;
}
} // end namespace llvm
@@ -2887,9 +2973,9 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
return;
}
- if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
- biasPhysRegCopy(Cand.SU, Cand.AtTop),
- TryCand, Cand, PhysRegCopy))
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
return;
// Avoid exceeding the target's limit.
@@ -3136,7 +3222,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
return SU;
}
-void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
MachineBasicBlock::iterator InsertPos = SU->getInstr();
if (!isTop)
++InsertPos;
@@ -3151,10 +3237,10 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
continue;
MachineInstr *Copy = DepSU->getInstr();
- if (!Copy->isCopy())
+ if (!Copy->isCopy() && !Copy->isMoveImmediate())
continue;
LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
- Dep.getSUnit()->dump(DAG));
+ DAG->dumpNode(*Dep.getSUnit()));
DAG->moveInstruction(Copy, InsertPos);
}
}
@@ -3165,18 +3251,18 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
/// does.
///
/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
-/// them here. See comments in biasPhysRegCopy.
+/// them here. See comments in biasPhysReg.
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
- reschedulePhysRegCopies(SU, true);
+ reschedulePhysReg(SU, true);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
- reschedulePhysRegCopies(SU, false);
+ reschedulePhysReg(SU, false);
}
}
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 1fd40f757351..cdc597db6401 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -513,25 +513,6 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
return true;
}
-/// collectDebgValues - Scan instructions following MI and collect any
-/// matching DBG_VALUEs.
-static void collectDebugValues(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DbgValues) {
- DbgValues.clear();
- if (!MI.getOperand(0).isReg())
- return;
-
- MachineBasicBlock::iterator DI = MI; ++DI;
- for (MachineBasicBlock::iterator DE = MI.getParent()->end();
- DI != DE; ++DI) {
- if (!DI->isDebugValue())
- return;
- if (DI->getOperand(0).isReg() &&
- DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
- DbgValues.push_back(&*DI);
- }
-}
-
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
@@ -735,9 +716,12 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
!PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
return false;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ return false;
+
+ if (!BaseOp->isReg())
return false;
if (!(MI.mayLoad() && !MI.isPredicable()))
@@ -750,15 +734,21 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
(MBP.Predicate == MachineBranchPredicate::PRED_NE ||
MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
- MBP.LHS.getReg() == BaseReg;
+ MBP.LHS.getReg() == BaseOp->getReg();
}
-/// Sink an instruction and its associated debug instructions.
+/// Sink an instruction and its associated debug instructions. If the debug
+/// instructions to be sunk are already known, they can be provided in DbgVals.
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
- MachineBasicBlock::iterator InsertPos) {
- // Collect matching debug values.
+ MachineBasicBlock::iterator InsertPos,
+ SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) {
+ // If debug values are provided use those, otherwise call collectDebugValues.
SmallVector<MachineInstr *, 2> DbgValuesToSink;
- collectDebugValues(MI, DbgValuesToSink);
+ if (DbgVals)
+ DbgValuesToSink.insert(DbgValuesToSink.begin(),
+ DbgVals->begin(), DbgVals->end());
+ else
+ MI.collectDebugValues(DbgValuesToSink);
// If we cannot find a location to use (merge with), then we erase the debug
// location to prevent debug-info driven tools from potentially reporting
@@ -970,6 +960,9 @@ private:
/// Track which register units have been modified and used.
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+ /// Track DBG_VALUEs of (unmodified) register units.
+ DenseMap<unsigned, TinyPtrVector<MachineInstr*>> SeenDbgInstrs;
+
/// Sink Copy instructions unused in the same block close to their uses in
/// successors.
bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
@@ -1056,8 +1049,11 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
SmallVectorImpl<unsigned> &UsedOpsInCopy,
SmallVectorImpl<unsigned> &DefedRegsInCopy) {
- for (auto DefReg : DefedRegsInCopy)
- SuccBB->removeLiveIn(DefReg);
+ MachineFunction &MF = *SuccBB->getParent();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (unsigned DefReg : DefedRegsInCopy)
+ for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
+ SuccBB->removeLiveIn(*S);
for (auto U : UsedOpsInCopy) {
unsigned Reg = MI->getOperand(U).getReg();
if (!SuccBB->isLiveIn(Reg))
@@ -1121,11 +1117,34 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// block and the current instruction.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
+ SeenDbgInstrs.clear();
for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
MachineInstr *MI = &*I;
++I;
+ // Track the operand index for use in Copy.
+ SmallVector<unsigned, 2> UsedOpsInCopy;
+ // Track the register number defed in Copy.
+ SmallVector<unsigned, 2> DefedRegsInCopy;
+
+ // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
+ // for DBG_VALUEs later, record them when they're encountered.
+ if (MI->isDebugValue()) {
+ auto &MO = MI->getOperand(0);
+ if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) {
+ // Bail if we can already tell the sink would be rejected, rather
+ // than needlessly accumulating lots of DBG_VALUEs.
+ if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ ModifiedRegUnits, UsedRegUnits))
+ continue;
+
+ // Record debug use of this register.
+ SeenDbgInstrs[MO.getReg()].push_back(MI);
+ }
+ continue;
+ }
+
if (MI->isDebugInstr())
continue;
@@ -1139,11 +1158,6 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
continue;
}
- // Track the operand index for use in Copy.
- SmallVector<unsigned, 2> UsedOpsInCopy;
- // Track the register number defed in Copy.
- SmallVector<unsigned, 2> DefedRegsInCopy;
-
// Don't sink the COPY if it would violate a register dependency.
if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits)) {
@@ -1165,11 +1179,21 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
"Unexpected predecessor");
+ // Collect DBG_VALUEs that must sink with this copy.
+ SmallVector<MachineInstr *, 4> DbgValsToSink;
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned reg = MO.getReg();
+ for (auto *MI : SeenDbgInstrs.lookup(reg))
+ DbgValsToSink.push_back(MI);
+ }
+
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
- performSink(*MI, *SuccBB, InsertPos);
+ performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink);
updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
Changed = true;
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 79ca6adf95c4..e62ed3094651 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -218,8 +218,7 @@ computeHeightResources(const MachineBasicBlock *MBB) {
// The trace tail is done.
if (!TBI->Succ) {
TBI->Tail = MBB->getNumber();
- std::copy(PRCycles.begin(), PRCycles.end(),
- ProcResourceHeights.begin() + PROffset);
+ llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
return;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index 318776136e24..534d3699db29 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -23,6 +23,7 @@
// the verifier errors.
//===----------------------------------------------------------------------===//
+#include "LiveRangeCalc.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -108,6 +109,7 @@ namespace {
using RegMap = DenseMap<unsigned, const MachineInstr *>;
using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
+ const MachineInstr *FirstNonPHI;
const MachineInstr *FirstTerminator;
BlockSet FunctionBlocks;
@@ -248,6 +250,7 @@ namespace {
void report_context(const LiveRange::Segment &S) const;
void report_context(const VNInfo &VNI) const;
void report_context(SlotIndex Pos) const;
+ void report_context(MCPhysReg PhysReg) const;
void report_context_liverange(const LiveRange &LR) const;
void report_context_lanemask(LaneBitmask LaneMask) const;
void report_context_vreg(unsigned VReg) const;
@@ -261,6 +264,7 @@ namespace {
LaneBitmask LaneMask = LaneBitmask::getNone());
void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
+ bool SubRangeCheck = false,
LaneBitmask LaneMask = LaneBitmask::getNone());
void markReachable(const MachineBasicBlock *MBB);
@@ -362,6 +366,13 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
const bool isFunctionFailedISel = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel);
+
+ // If we're mid-GlobalISel and we already triggered the fallback path then
+ // it's expected that the MIR is somewhat broken but that's ok since we'll
+ // reset it and clear the FailedISel attribute in ResetMachineFunctions.
+ if (isFunctionFailedISel)
+ return foundErrors;
+
isFunctionRegBankSelected =
!isFunctionFailedISel &&
MF.getProperties().hasProperty(
@@ -530,6 +541,10 @@ void MachineVerifier::report_context_liverange(const LiveRange &LR) const {
errs() << "- liverange: " << LR << '\n';
}
+void MachineVerifier::report_context(MCPhysReg PReg) const {
+ errs() << "- p. register: " << printReg(PReg, TRI) << '\n';
+}
+
void MachineVerifier::report_context_vreg(unsigned VReg) const {
errs() << "- v. register: " << printReg(VReg, TRI) << '\n';
}
@@ -599,6 +614,7 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i,
void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
FirstTerminator = nullptr;
+ FirstNonPHI = nullptr;
if (!MF->getProperties().hasProperty(
MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
@@ -608,6 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
MBB->getIterator() != MBB->getParent()->begin()) {
report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
+ report_context(LI.PhysReg);
}
}
}
@@ -666,7 +683,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// out the bottom of the function.
} else if (MBB->succ_size() == LandingPadSuccs.size()) {
// It's possible that the block legitimately ends with a noreturn
- // call or an unreachable, in which case it won't actuall fall
+ // call or an unreachable, in which case it won't actually fall
// out of the block.
} else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
report("MBB exits via unconditional fall-through but doesn't have "
@@ -767,7 +784,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
"isn't a terminator instruction!", MBB);
}
if (Cond.empty()) {
- report("MBB exits via conditinal branch/branch but there's no "
+ report("MBB exits via conditional branch/branch but there's no "
"condition!", MBB);
}
} else {
@@ -880,9 +897,15 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
<< MI->getNumOperands() << " given.\n";
}
- if (MI->isPHI() && MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::NoPHIs))
- report("Found PHI instruction with NoPHIs property set", MI);
+ if (MI->isPHI()) {
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs))
+ report("Found PHI instruction with NoPHIs property set", MI);
+
+ if (FirstNonPHI)
+ report("Found PHI instruction after non-PHI", MI);
+ } else if (FirstNonPHI == nullptr)
+ FirstNonPHI = MI;
// Check the tied operands.
if (MI->isInlineAsm())
@@ -1038,6 +1061,89 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_MERGE_VALUES: {
+ // G_MERGE_VALUES should only be used to merge scalars into a larger scalar,
+ // e.g. s2N = MERGE sN, sN
+ // Merging multiple scalars into a vector is not allowed, should use
+ // G_BUILD_VECTOR for that.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (DstTy.isVector() || SrcTy.isVector())
+ report("G_MERGE_VALUES cannot operate on vectors", MI);
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(MI->getNumOperands()-1).getReg());
+ // For now G_UNMERGE can split vectors.
+ for (unsigned i = 0; i < MI->getNumOperands()-1; ++i) {
+ if (MRI->getType(MI->getOperand(i).getReg()) != DstTy)
+ report("G_UNMERGE_VALUES destination types do not match", MI);
+ }
+ if (SrcTy.getSizeInBits() !=
+ (DstTy.getSizeInBits() * (MI->getNumOperands() - 1))) {
+ report("G_UNMERGE_VALUES source operand does not cover dest operands",
+ MI);
+ }
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR: {
+ // Source types must be scalars, dest type a vector. Total size of scalars
+ // must match the dest vector size.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || SrcEltTy.isVector())
+ report("G_BUILD_VECTOR must produce a vector from scalar operands", MI);
+ for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+ if (MRI->getType(MI->getOperand(1).getReg()) !=
+ MRI->getType(MI->getOperand(i).getReg()))
+ report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
+ }
+ if (DstTy.getSizeInBits() !=
+ SrcEltTy.getSizeInBits() * (MI->getNumOperands() - 1))
+ report("G_BUILD_VECTOR src operands total size don't match dest "
+ "size.",
+ MI);
+ break;
+ }
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+ // Source types must be scalars, dest type a vector. Scalar types must be
+ // larger than the dest vector elt type, as this is a truncating operation.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || SrcEltTy.isVector())
+ report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands",
+ MI);
+ for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+ if (MRI->getType(MI->getOperand(1).getReg()) !=
+ MRI->getType(MI->getOperand(i).getReg()))
+ report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous",
+ MI);
+ }
+ if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits())
+ report("G_BUILD_VECTOR_TRUNC source operand types are not larger than "
+ "dest elt type",
+ MI);
+ break;
+ }
+ case TargetOpcode::G_CONCAT_VECTORS: {
+ // Source types should be vectors, and total size should match the dest
+ // vector size.
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isVector() || !SrcTy.isVector())
+ report("G_CONCAT_VECTOR requires vector source and destination operands",
+ MI);
+ for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+ if (MRI->getType(MI->getOperand(1).getReg()) !=
+ MRI->getType(MI->getOperand(i).getReg()))
+ report("G_CONCAT_VECTOR source operand types are not homogeneous", MI);
+ }
+ if (DstTy.getNumElements() !=
+ SrcTy.getNumElements() * (MI->getNumOperands() - 1))
+ report("G_CONCAT_VECTOR num dest and source elements should match", MI);
+ break;
+ }
case TargetOpcode::COPY: {
if (foundErrors)
break;
@@ -1395,7 +1501,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
- LaneBitmask LaneMask) {
+ bool SubRangeCheck, LaneBitmask LaneMask) {
if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
assert(VNI && "NULL valno is not allowed");
if (VNI->def != DefIdx) {
@@ -1419,25 +1525,14 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
if (MO->isDead()) {
LiveQueryResult LRQ = LR.Query(DefIdx);
if (!LRQ.isDeadDef()) {
- // In case of physregs we can have a non-dead definition on another
- // operand.
- bool otherDef = false;
- if (!TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
- const MachineInstr &MI = *MO->getParent();
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || MO.isDead())
- continue;
- unsigned Reg = MO.getReg();
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- if (*Units == VRegOrUnit) {
- otherDef = true;
- break;
- }
- }
- }
- }
-
- if (!otherDef) {
+ assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) &&
+ "Expecting a virtual register.");
+ // A dead subreg def only tells us that the specific subreg is dead. There
+ // could be other non-dead defs of other subregs, or we could have other
+ // parts of the register being live through the instruction. So unless we
+ // are checking liveness for a subrange it is ok for the live range to
+ // continue, given that we have a dead def of a subregister.
+ if (SubRangeCheck || MO->getSubReg() == 0) {
report("Live range continues after dead def flag", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
@@ -1532,10 +1627,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// get a report for its operand.
if (Bad) {
for (const MachineOperand &MOP : MI->uses()) {
- if (!MOP.isReg())
+ if (!MOP.isReg() || !MOP.isImplicit())
continue;
- if (!MOP.isImplicit())
+
+ if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg()))
continue;
+
for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
++SubRegs) {
if (*SubRegs == Reg) {
@@ -1593,7 +1690,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
for (const LiveInterval::SubRange &SR : LI.subranges()) {
if ((SR.LaneMask & MOMask).none())
continue;
- checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask);
+ checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
}
}
} else {
@@ -2116,6 +2213,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Skip this block.
++MFI;
}
+
+ SmallVector<SlotIndex, 4> Undefs;
+ if (LaneMask.any()) {
+ LiveInterval &OwnerLI = LiveInts->getInterval(Reg);
+ OwnerLI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes);
+ }
+
while (true) {
assert(LiveInts->isLiveInToMBB(LR, &*MFI));
// We don't know how to track physregs into a landing pad.
@@ -2141,7 +2245,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// instruction with subregister intervals
// only one of the subregisters (not necessarily the current one) needs to
// be defined.
- if (!PVNI && (LaneMask.none() || !IsPHI) ) {
+ if (!PVNI && (LaneMask.none() || !IsPHI)) {
+ if (LiveRangeCalc::isJointlyDominated(*PI, Undefs, *Indexes))
+ continue;
report("Register not marked live out of predecessor", *PI);
report_context(LR, Reg, LaneMask);
report_context(*VNI);
diff --git a/contrib/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm/lib/CodeGen/MacroFusion.cpp
index 62dadbba0c1a..82b6d642c73b 100644
--- a/contrib/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/contrib/llvm/lib/CodeGen/MacroFusion.cpp
@@ -67,8 +67,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
SI.setLatency(0);
LLVM_DEBUG(
- dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
- SecondSU.print(dbgs(), &DAG); dbgs() << " / ";
+ dbgs() << "Macro fuse: "; DAG.dumpNodeName(FirstSU); dbgs() << " - ";
+ DAG.dumpNodeName(SecondSU); dbgs() << " / ";
dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - "
<< DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';);
@@ -80,8 +80,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
if (SI.isWeak() || isHazard(SI) ||
SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU))
continue;
- LLVM_DEBUG(dbgs() << " Bind "; SecondSU.print(dbgs(), &DAG);
- dbgs() << " - "; SU->print(dbgs(), &DAG); dbgs() << '\n';);
+ LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(SecondSU);
+ dbgs() << " - "; DAG.dumpNodeName(*SU); dbgs() << '\n';);
DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));
}
@@ -92,8 +92,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
SUnit *SU = SI.getSUnit();
if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU))
continue;
- LLVM_DEBUG(dbgs() << " Bind "; SU->print(dbgs(), &DAG); dbgs() << " - ";
- FirstSU.print(dbgs(), &DAG); dbgs() << '\n';);
+ LLVM_DEBUG(dbgs() << " Bind "; DAG.dumpNodeName(*SU); dbgs() << " - ";
+ DAG.dumpNodeName(FirstSU); dbgs() << '\n';);
DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));
}
// ExitSU comes last by design, which acts like an implicit dependency
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index befa8422d399..770f6c5b0403 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -90,10 +90,10 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
}
/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
-/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
/// SingleValReg is zero on entry, it is set to the register with the single
-/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
-/// have been scanned.
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned. PHIs may be grouped by cycle, several cycles or chains.
bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
unsigned &SingleValReg,
InstrSet &PHIsInCycle) {
@@ -119,8 +119,10 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
if (SrcMI && SrcMI->isCopy() &&
!SrcMI->getOperand(0).getSubReg() &&
!SrcMI->getOperand(1).getSubReg() &&
- TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
- SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) {
+ SrcReg = SrcMI->getOperand(1).getReg();
+ SrcMI = MRI->getVRegDef(SrcReg);
+ }
if (!SrcMI)
return false;
@@ -129,7 +131,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
return false;
} else {
// Fail if there is more than one non-phi/non-move register.
- if (SingleValReg != 0)
+ if (SingleValReg != 0 && SingleValReg != SrcReg)
return false;
SingleValReg = SrcReg;
}
@@ -180,6 +182,9 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
continue;
+ // for the case SingleValReg taken from copy instr
+ MRI->clearKillFlags(SingleValReg);
+
MRI->replaceRegWith(OldReg, SingleValReg);
MI->eraseFromParent();
++NumPHICycles;
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index 7a5c20000066..b9801c6fd97b 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -153,8 +153,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
// This pass takes the function out of SSA form.
MRI->leaveSSA();
- // Split critical edges to help the coalescer. This does not yet support
- // updating LiveIntervals, so we disable it.
+ // Split critical edges to help the coalescer.
if (!DisableEdgeSplitting && (LV || LIS)) {
MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
for (auto &MBB : MF)
@@ -197,12 +196,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
- MachineBasicBlock &MBB) {
+ MachineBasicBlock &MBB) {
if (MBB.empty() || !MBB.front().isPHI())
return false; // Quick exit for basic blocks without PHIs.
- // Get an iterator to the first instruction after the last PHI node (this may
- // also be the end of the basic block).
+ // Get an iterator to the last PHI node.
MachineBasicBlock::iterator LastPHIIt =
std::prev(MBB.SkipPHIsAndLabels(MBB.begin()));
@@ -212,26 +210,26 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
return true;
}
-/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs.
+/// Return true if all defs of VirtReg are implicit-defs.
/// This includes registers with no defs.
static bool isImplicitlyDefined(unsigned VirtReg,
- const MachineRegisterInfo *MRI) {
- for (MachineInstr &DI : MRI->def_instructions(VirtReg))
+ const MachineRegisterInfo &MRI) {
+ for (MachineInstr &DI : MRI.def_instructions(VirtReg))
if (!DI.isImplicitDef())
return false;
return true;
}
-/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
-/// are implicit_def's.
-static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
- const MachineRegisterInfo *MRI) {
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
- if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI))
+/// Return true if all sources of the phi node are implicit_def's, or undef's.
+static bool allPhiOperandsUndefined(const MachineInstr &MPhi,
+ const MachineRegisterInfo &MRI) {
+ for (unsigned I = 1, E = MPhi.getNumOperands(); I != E; I += 2) {
+ const MachineOperand &MO = MPhi.getOperand(I);
+ if (!isImplicitlyDefined(MO.getReg(), MRI) && !MO.isUndef())
return false;
+ }
return true;
}
-
/// LowerPHINode - Lower the PHI node at the top of the specified block.
void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
MachineBasicBlock::iterator LastPHIIt) {
@@ -256,8 +254,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// after any remaining phi nodes) which copies the new incoming register
// into the phi node destination.
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- if (isSourceDefinedByImplicitDef(MPhi, MRI))
- // If all sources of a PHI node are implicit_def, just emit an
+ if (allPhiOperandsUndefined(*MPhi, *MRI))
+ // If all sources of a PHI node are implicit_def or undef uses, just emit an
// implicit_def instead of a copy.
BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
@@ -374,7 +372,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
- isImplicitlyDefined(SrcReg, MRI);
+ isImplicitlyDefined(SrcReg, *MRI);
assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 215da630caf4..dd0a5fe1b39d 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -256,7 +256,7 @@ void SchedulePostRATDList::exitRegion() {
LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
- SU->dump(this);
+ dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
}
@@ -414,11 +414,7 @@ void SchedulePostRATDList::schedule() {
postprocessDAG();
LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
- LLVM_DEBUG(for (const SUnit &SU
- : SUnits) {
- SU.dumpAll(this);
- dbgs() << '\n';
- });
+ LLVM_DEBUG(dump());
AvailableQueue.initNodes(SUnits);
ListScheduleTopDown();
@@ -465,7 +461,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
+ dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -502,7 +498,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
/// the Available queue.
void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() &&
diff --git a/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 8f88ef78828a..b0e9ac03612d 100644
--- a/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -7,13 +7,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass implements IR lowering for the llvm.load.relative intrinsic.
+// This pass implements IR lowering for the llvm.load.relative and llvm.objc.*
+// intrinsics.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -55,11 +57,129 @@ static bool lowerLoadRelative(Function &F) {
return Changed;
}
+static bool lowerObjCCall(Function &F, const char *NewFn,
+ bool setNonLazyBind = false) {
+ if (F.use_empty())
+ return false;
+
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = F.getParent();
+ Constant* FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
+
+ if (Function* Fn = dyn_cast<Function>(FCache)) {
+ Fn->setLinkage(F.getLinkage());
+ if (setNonLazyBind && !Fn->isWeakForLinker()) {
+ // If we have Native ARC, set nonlazybind attribute for these APIs for
+ // performance.
+ Fn->addFnAttr(Attribute::NonLazyBind);
+ }
+ }
+
+ for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
+ auto *CI = dyn_cast<CallInst>(I->getUser());
+ assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
+ ++I;
+
+ IRBuilder<> Builder(CI->getParent(), CI->getIterator());
+ SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
+ CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ NewCI->setName(CI->getName());
+ NewCI->setTailCallKind(CI->getTailCallKind());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ CI->eraseFromParent();
+ }
+
+ return true;
+}
+
static bool lowerIntrinsics(Module &M) {
bool Changed = false;
for (Function &F : M) {
- if (F.getName().startswith("llvm.load.relative."))
+ if (F.getName().startswith("llvm.load.relative.")) {
Changed |= lowerLoadRelative(F);
+ continue;
+ }
+ switch (F.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::objc_autorelease:
+ Changed |= lowerObjCCall(F, "objc_autorelease");
+ break;
+ case Intrinsic::objc_autoreleasePoolPop:
+ Changed |= lowerObjCCall(F, "objc_autoreleasePoolPop");
+ break;
+ case Intrinsic::objc_autoreleasePoolPush:
+ Changed |= lowerObjCCall(F, "objc_autoreleasePoolPush");
+ break;
+ case Intrinsic::objc_autoreleaseReturnValue:
+ Changed |= lowerObjCCall(F, "objc_autoreleaseReturnValue");
+ break;
+ case Intrinsic::objc_copyWeak:
+ Changed |= lowerObjCCall(F, "objc_copyWeak");
+ break;
+ case Intrinsic::objc_destroyWeak:
+ Changed |= lowerObjCCall(F, "objc_destroyWeak");
+ break;
+ case Intrinsic::objc_initWeak:
+ Changed |= lowerObjCCall(F, "objc_initWeak");
+ break;
+ case Intrinsic::objc_loadWeak:
+ Changed |= lowerObjCCall(F, "objc_loadWeak");
+ break;
+ case Intrinsic::objc_loadWeakRetained:
+ Changed |= lowerObjCCall(F, "objc_loadWeakRetained");
+ break;
+ case Intrinsic::objc_moveWeak:
+ Changed |= lowerObjCCall(F, "objc_moveWeak");
+ break;
+ case Intrinsic::objc_release:
+ Changed |= lowerObjCCall(F, "objc_release", true);
+ break;
+ case Intrinsic::objc_retain:
+ Changed |= lowerObjCCall(F, "objc_retain", true);
+ break;
+ case Intrinsic::objc_retainAutorelease:
+ Changed |= lowerObjCCall(F, "objc_retainAutorelease");
+ break;
+ case Intrinsic::objc_retainAutoreleaseReturnValue:
+ Changed |= lowerObjCCall(F, "objc_retainAutoreleaseReturnValue");
+ break;
+ case Intrinsic::objc_retainAutoreleasedReturnValue:
+ Changed |= lowerObjCCall(F, "objc_retainAutoreleasedReturnValue");
+ break;
+ case Intrinsic::objc_retainBlock:
+ Changed |= lowerObjCCall(F, "objc_retainBlock");
+ break;
+ case Intrinsic::objc_storeStrong:
+ Changed |= lowerObjCCall(F, "objc_storeStrong");
+ break;
+ case Intrinsic::objc_storeWeak:
+ Changed |= lowerObjCCall(F, "objc_storeWeak");
+ break;
+ case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
+ Changed |= lowerObjCCall(F, "objc_unsafeClaimAutoreleasedReturnValue");
+ break;
+ case Intrinsic::objc_retainedObject:
+ Changed |= lowerObjCCall(F, "objc_retainedObject");
+ break;
+ case Intrinsic::objc_unretainedObject:
+ Changed |= lowerObjCCall(F, "objc_unretainedObject");
+ break;
+ case Intrinsic::objc_unretainedPointer:
+ Changed |= lowerObjCCall(F, "objc_unretainedPointer");
+ break;
+ case Intrinsic::objc_retain_autorelease:
+ Changed |= lowerObjCCall(F, "objc_retain_autorelease");
+ break;
+ case Intrinsic::objc_sync_enter:
+ Changed |= lowerObjCCall(F, "objc_sync_enter");
+ break;
+ case Intrinsic::objc_sync_exit:
+ Changed |= lowerObjCCall(F, "objc_sync_exit");
+ break;
+ }
}
return Changed;
}
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index fc62c8caf59e..23754e487a18 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -75,6 +75,10 @@ using namespace llvm;
using MBBVector = SmallVector<MachineBasicBlock *, 4>;
+STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
+STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
+
+
namespace {
class PEI : public MachineFunctionPass {
@@ -168,6 +172,7 @@ using StackObjSet = SmallSetVector<int, 8>;
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
bool PEI::runOnMachineFunction(MachineFunction &MF) {
+ NumFuncSeen++;
const Function &F = MF.getFunction();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
@@ -357,6 +362,11 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
// Now that we know which registers need to be saved and restored, allocate
// stack slots for them.
for (auto &CS : CSI) {
+ // If the target has spilled this register to another register, we don't
+ // need to allocate a stack slot.
+ if (CS.isSpilledToReg())
+ continue;
+
unsigned Reg = CS.getReg();
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
@@ -454,7 +464,22 @@ static void updateLiveness(MachineFunction &MF) {
if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
}
+ // If callee-saved register is spilled to another register rather than
+ // spilling to stack, the destination register has to be marked as live for
+ // each MBB between the prologue and epilogue so that it is not clobbered
+ // before it is reloaded in the epilogue. The Visited set contains all
+ // blocks outside of the region delimited by prologue/epilogue.
+ if (CSI[i].isSpilledToReg()) {
+ for (MachineBasicBlock &MBB : MF) {
+ if (Visited.count(&MBB))
+ continue;
+ MCPhysReg DstReg = CSI[i].getDstReg();
+ if (!MBB.isLiveIn(DstReg))
+ MBB.addLiveIn(DstReg);
+ }
+ }
}
+
}
/// Insert restore code for the callee-saved registers used in the function.
@@ -530,6 +555,9 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (!CSI.empty()) {
+ if (!MFI.hasCalls())
+ NumLeafFuncWithSpills++;
+
for (MachineBasicBlock *SaveBlock : SaveBlocks) {
insertCSRSaves(*SaveBlock, CSI);
// Update the live-in information of all the blocks up to the save
@@ -1090,7 +1118,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
MachineOperand &Offset = MI.getOperand(i + 1);
int refOffset = TFI->getFrameIndexReferencePreferSP(
MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
- Offset.setImm(Offset.getImm() + refOffset);
+ Offset.setImm(Offset.getImm() + refOffset + SPAdj);
MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
continue;
}
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 86fd87450521..6ca8d86e3f8e 100644
--- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -25,7 +25,7 @@ static const char *const PSVNames[] = {
"Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
"GlobalValueCallEntry", "ExternalSymbolCallEntry"};
-PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII)
+PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
: Kind(Kind) {
AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind);
}
@@ -81,7 +81,7 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
}
CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(
- PSVKind Kind, const TargetInstrInfo &TII)
+ unsigned Kind, const TargetInstrInfo &TII)
: PseudoSourceValue(Kind, TII) {}
bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
diff --git a/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 050fef5d25ed..a9f0a9387297 100644
--- a/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -157,7 +157,7 @@ bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
// Sorting all reaching defs found for a ceartin reg unit in a given BB.
for (MBBDefsInfo &MBBDefs : MBBReachingDefs) {
for (MBBRegUnitDefs &RegUnitDefs : MBBDefs)
- llvm::sort(RegUnitDefs.begin(), RegUnitDefs.end());
+ llvm::sort(RegUnitDefs);
}
return false;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 7b57c6cbcdb8..eb3a4e481f5d 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -54,7 +54,7 @@ using namespace llvm;
STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads , "Number of loads added");
-STATISTIC(NumCopies, "Number of copies coalesced");
+STATISTIC(NumCoalesced, "Number of copies coalesced");
static RegisterRegAlloc
fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
@@ -88,7 +88,7 @@ namespace {
unsigned short LastOpNum = 0; ///< OpNum on LastUse.
bool Dirty = false; ///< Register needs spill.
- explicit LiveReg(unsigned v) : VirtReg(v) {}
+ explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {}
unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
@@ -96,14 +96,13 @@ namespace {
};
using LiveRegMap = SparseSet<LiveReg>;
-
/// This map contains entries for each virtual register that is currently
/// available in a physical register.
LiveRegMap LiveVirtRegs;
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>> LiveDbgValueMap;
+ DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
- /// Track the state of a physical register.
+ /// State of a physical register.
enum RegState {
/// A disabled register is not available for allocation, but an alias may
/// be in use. A register can only be moved out of the disabled state if
@@ -123,18 +122,18 @@ namespace {
/// register. In that case, LiveVirtRegs contains the inverse mapping.
};
- /// One of the RegState enums, or a virtreg.
+ /// Maps each physical register to a RegState enum or a virtual register.
std::vector<unsigned> PhysRegState;
SmallVector<unsigned, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
- /// Set of register units.
- using UsedInInstrSet = SparseSet<unsigned>;
-
+ using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
/// Set of register units that are used in the current instruction, and so
/// cannot be allocated.
- UsedInInstrSet UsedInInstr;
+ RegUnitSet UsedInInstr;
+
+ void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
/// Mark a physreg as used in this instruction.
void markRegUsedInInstr(MCPhysReg PhysReg) {
@@ -150,12 +149,8 @@ namespace {
return false;
}
- /// This flag is set when LiveRegMap will be cleared completely after
- /// spilling all live registers. LiveRegMap entries should not be erased.
- bool isBulkSpilling = false;
-
enum : unsigned {
- spillClean = 1,
+ spillClean = 50,
spillDirty = 100,
spillImpossible = ~0u
};
@@ -180,16 +175,18 @@ namespace {
private:
bool runOnMachineFunction(MachineFunction &MF) override;
+
void allocateBasicBlock(MachineBasicBlock &MBB);
+ void allocateInstruction(MachineInstr &MI);
+ void handleDebugValue(MachineInstr &MI);
void handleThroughOperands(MachineInstr &MI,
SmallVectorImpl<unsigned> &VirtDead);
- int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass &RC);
bool isLastUseOfLocalReg(const MachineOperand &MO) const;
void addKillFlag(const LiveReg &LRI);
- void killVirtReg(LiveRegMap::iterator LRI);
+ void killVirtReg(LiveReg &LR);
void killVirtReg(unsigned VirtReg);
- void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
void usePhysReg(MachineOperand &MO);
@@ -206,15 +203,19 @@ namespace {
return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
}
- LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg);
- LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
- unsigned Hint);
- LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint);
- LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+ MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
+ unsigned Hint);
+ LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
+ unsigned Hint);
void spillAll(MachineBasicBlock::iterator MI);
- bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg);
+ bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+
+ int getStackSpaceFor(unsigned VirtReg);
+ void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ MCPhysReg AssignedReg, bool Kill);
+ void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ MCPhysReg PhysReg);
void dumpState();
};
@@ -226,10 +227,13 @@ char RegAllocFast::ID = 0;
INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)
+void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
+ PhysRegState[PhysReg] = NewState;
+}
+
/// This allocates space for the specified virtual register to be held on the
/// stack.
-int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
- const TargetRegisterClass &RC) {
+int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
// Find the location Reg would belong...
int SS = StackSlotForVirtReg[VirtReg];
// Already has space allocated?
@@ -237,6 +241,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
return SS;
// Allocate a new stack object for this spill location...
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
unsigned Size = TRI->getSpillSize(RC);
unsigned Align = TRI->getSpillAlignment(RC);
int FrameIdx = MFI->CreateSpillStackObject(Size, Align);
@@ -246,6 +251,46 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
return FrameIdx;
}
+/// Insert spill instruction for \p AssignedReg before \p Before. Update
+/// DBG_VALUEs with \p VirtReg operands with the stack slot.
+void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ MCPhysReg AssignedReg, bool Kill) {
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
+ << " in " << printReg(AssignedReg, TRI));
+ int FI = getStackSpaceFor(VirtReg);
+ LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
+
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI);
+ ++NumStores;
+
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
+ SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg];
+ for (MachineInstr *DBG : LRIDbgValues) {
+ MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI);
+ assert(NewDV->getParent() == MBB && "dangling parent pointer");
+ (void)NewDV;
+ LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
+ }
+ // Now this register is spilled there is should not be any DBG_VALUE
+ // pointing to this register because they are all pointing to spilled value
+ // now.
+ LRIDbgValues.clear();
+}
+
+/// Insert reload instruction for \p PhysReg before \p Before.
+void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ MCPhysReg PhysReg) {
+ LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
+ << printReg(PhysReg, TRI) << '\n');
+ int FI = getStackSpaceFor(VirtReg);
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI);
+ ++NumLoads;
+}
+
/// Return true if MO is the only remaining reference to its virtual register,
/// and it is guaranteed to be a block-local register.
bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
@@ -281,14 +326,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) {
}
/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) {
- addKillFlag(*LRI);
- assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+void RegAllocFast::killVirtReg(LiveReg &LR) {
+ addKillFlag(LR);
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
"Broken RegState mapping");
- PhysRegState[LRI->PhysReg] = regFree;
- // Erase from LiveVirtRegs unless we're spilling in bulk.
- if (!isBulkSpilling)
- LiveVirtRegs.erase(LRI);
+ setPhysRegState(LR.PhysReg, regFree);
+ LR.PhysReg = 0;
}
/// Mark virtreg as no longer available.
@@ -296,8 +339,8 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- if (LRI != LiveVirtRegs.end())
- killVirtReg(LRI);
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
+ killVirtReg(*LRI);
}
/// This method spills the value specified by VirtReg into the corresponding
@@ -307,63 +350,41 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
- spillVirtReg(MI, LRI);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Spilling unmapped virtual register");
+ spillVirtReg(MI, *LRI);
}
/// Do the actual work of spilling.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
- LiveRegMap::iterator LRI) {
- LiveReg &LR = *LRI;
- assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
+ assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
// instruction, not on the spill.
bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
- LLVM_DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) << " in "
- << printReg(LR.PhysReg, TRI));
- const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg);
- int FI = getStackSpaceFor(LRI->VirtReg, RC);
- LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n");
- TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI);
- ++NumStores; // Update statistics
-
- // If this register is used by DBG_VALUE then insert new DBG_VALUE to
- // identify spilled location as the place to find corresponding variable's
- // value.
- SmallVectorImpl<MachineInstr *> &LRIDbgValues =
- LiveDbgValueMap[LRI->VirtReg];
- for (MachineInstr *DBG : LRIDbgValues) {
- MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);
- assert(NewDV->getParent() == MBB && "dangling parent pointer");
- (void)NewDV;
- LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:"
- << "\n"
- << *NewDV);
- }
- // Now this register is spilled there is should not be any DBG_VALUE
- // pointing to this register because they are all pointing to spilled value
- // now.
- LRIDbgValues.clear();
+
+ spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
+
if (SpillKill)
LR.LastUse = nullptr; // Don't kill register again
}
- killVirtReg(LRI);
+ killVirtReg(LR);
}
/// Spill all dirty virtregs without killing them.
void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
- if (LiveVirtRegs.empty()) return;
- isBulkSpilling = true;
+ if (LiveVirtRegs.empty())
+ return;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
// of spilling here is deterministic, if arbitrary.
- for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end();
- I != E; ++I)
- spillVirtReg(MI, I);
+ for (LiveReg &LR : LiveVirtRegs) {
+ if (!LR.PhysReg)
+ continue;
+ spillVirtReg(MI, LR);
+ }
LiveVirtRegs.clear();
- isBulkSpilling = false;
}
/// Handle the direct use of a physical register. Check that the register is
@@ -417,12 +438,12 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
case regFree:
if (TRI->isSuperRegister(PhysReg, Alias)) {
// Leave the superregister in the working set.
- PhysRegState[Alias] = regFree;
+ setPhysRegState(Alias, regFree);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
}
// Some other alias was in the working set - clear it.
- PhysRegState[Alias] = regDisabled;
+ setPhysRegState(Alias, regDisabled);
break;
default:
llvm_unreachable("Instruction uses an alias of an allocated register");
@@ -430,7 +451,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
}
// All aliases are disabled, bring register into working set.
- PhysRegState[PhysReg] = regFree;
+ setPhysRegState(PhysReg, regFree);
MO.setIsKill();
}
@@ -448,12 +469,12 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
LLVM_FALLTHROUGH;
case regFree:
case regReserved:
- PhysRegState[PhysReg] = NewState;
+ setPhysRegState(PhysReg, NewState);
return;
}
// This is a disabled register, disable all aliases.
- PhysRegState[PhysReg] = NewState;
+ setPhysRegState(PhysReg, NewState);
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
MCPhysReg Alias = *AI;
switch (unsigned VirtReg = PhysRegState[Alias]) {
@@ -464,7 +485,7 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
LLVM_FALLTHROUGH;
case regFree:
case regReserved:
- PhysRegState[Alias] = regDisabled;
+ setPhysRegState(Alias, regDisabled);
if (TRI->isSuperRegister(PhysReg, Alias))
return;
break;
@@ -472,9 +493,9 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
}
}
-/// Return the cost of spilling clearing out PhysReg and aliases so it is
-/// free for allocation. Returns 0 when PhysReg is free or disabled with all
-/// aliases disabled - it can be allocated directly.
+/// Return the cost of spilling clearing out PhysReg and aliases so it is free
+/// for allocation. Returns 0 when PhysReg is free or disabled with all aliases
+/// disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
if (isRegUsedInInstr(PhysReg)) {
@@ -492,9 +513,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
<< printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
- LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- return I->Dirty ? spillDirty : spillClean;
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ return LRI->Dirty ? spillDirty : spillClean;
}
}
@@ -512,9 +534,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
case regReserved:
return spillImpossible;
default: {
- LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- Cost += I->Dirty ? spillDirty : spillClean;
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ Cost += LRI->Dirty ? spillDirty : spillClean;
break;
}
}
@@ -526,31 +549,27 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
- LLVM_DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to "
- << printReg(PhysReg, TRI) << "\n");
- PhysRegState[PhysReg] = LR.VirtReg;
- assert(!LR.PhysReg && "Already assigned a physreg");
+ unsigned VirtReg = LR.VirtReg;
+ LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
+ << printReg(PhysReg, TRI) << '\n');
+ assert(LR.PhysReg == 0 && "Already assigned a physreg");
+ assert(PhysReg != 0 && "Trying to assign no register");
LR.PhysReg = PhysReg;
-}
-
-RegAllocFast::LiveRegMap::iterator
-RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) {
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
+ setPhysRegState(PhysReg, VirtReg);
}
/// Allocates a physical register for VirtReg.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
- LiveRegMap::iterator LRI, unsigned Hint) {
- const unsigned VirtReg = LRI->VirtReg;
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
+ const unsigned VirtReg = LR.VirtReg;
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
- // Take hint when possible.
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
+ << " in class " << TRI->getRegClassName(&RC) << '\n');
+
+ // Take hint when possible.
if (TargetRegisterInfo::isPhysicalRegister(Hint) &&
MRI->isAllocatable(Hint) && RC.contains(Hint)) {
// Ignore the hint if we would have to spill a dirty register.
@@ -558,67 +577,62 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
if (Cost < spillDirty) {
if (Cost)
definePhysReg(MI, Hint, regFree);
- // definePhysReg may kill virtual registers and modify LiveVirtRegs.
- // That invalidates LRI, so run a new lookup for VirtReg.
- return assignVirtToPhysReg(VirtReg, Hint);
+ assignVirtToPhysReg(LR, Hint);
+ return;
}
}
// First try to find a completely free register.
- ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(&RC);
- for (MCPhysReg PhysReg : AO) {
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ for (MCPhysReg PhysReg : AllocationOrder) {
if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
+ assignVirtToPhysReg(LR, PhysReg);
+ return;
}
}
- LLVM_DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from "
- << TRI->getRegClassName(&RC) << "\n");
-
- unsigned BestReg = 0;
+ MCPhysReg BestReg = 0;
unsigned BestCost = spillImpossible;
- for (MCPhysReg PhysReg : AO) {
+ for (MCPhysReg PhysReg : AllocationOrder) {
+ LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
unsigned Cost = calcSpillCost(PhysReg);
- LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n");
- LLVM_DEBUG(dbgs() << "\tCost: " << Cost << "\n");
- LLVM_DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
- // Cost is 0 when all aliases are already disabled.
+ LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
+ // Immediate take a register with cost 0.
if (Cost == 0) {
- assignVirtToPhysReg(*LRI, PhysReg);
- return LRI;
+ assignVirtToPhysReg(LR, PhysReg);
+ return;
+ }
+ if (Cost < BestCost) {
+ BestReg = PhysReg;
+ BestCost = Cost;
}
- if (Cost < BestCost)
- BestReg = PhysReg, BestCost = Cost;
}
- if (BestReg) {
- definePhysReg(MI, BestReg, regFree);
- // definePhysReg may kill virtual registers and modify LiveVirtRegs.
- // That invalidates LRI, so run a new lookup for VirtReg.
- return assignVirtToPhysReg(VirtReg, BestReg);
+ if (!BestReg) {
+ // Nothing we can do: Report an error and keep going with an invalid
+ // allocation.
+ if (MI.isInlineAsm())
+ MI.emitError("inline assembly requires more registers than available");
+ else
+ MI.emitError("ran out of registers during register allocation");
+ definePhysReg(MI, *AllocationOrder.begin(), regFree);
+ assignVirtToPhysReg(LR, *AllocationOrder.begin());
+ return;
}
- // Nothing we can do. Report an error and keep going with a bad allocation.
- if (MI.isInlineAsm())
- MI.emitError("inline assembly requires more registers than available");
- else
- MI.emitError("ran out of registers during register allocation");
- definePhysReg(MI, *AO.begin(), regFree);
- return assignVirtToPhysReg(VirtReg, *AO.begin());
+ definePhysReg(MI, BestReg, regFree);
+ assignVirtToPhysReg(LR, BestReg);
}
/// Allocates a register for VirtReg and mark it as dirty.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
- unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- if (New) {
+ if (!LRI->PhysReg) {
// If there is no hint, peek at the only use of this register.
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
@@ -627,7 +641,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
if (UseMI.isCopyLike())
Hint = UseMI.getOperand(0).getReg();
}
- LRI = allocVirtReg(MI, LRI, Hint);
+ allocVirtReg(MI, *LRI, Hint);
} else if (LRI->LastUse) {
// Redefining a live register - kill at the last use, unless it is this
// instruction defining VirtReg multiple times.
@@ -639,40 +653,35 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
LRI->LastOpNum = OpNum;
LRI->Dirty = true;
markRegUsedInInstr(LRI->PhysReg);
- return LRI;
+ return LRI->PhysReg;
}
/// Make sure VirtReg is available in a physreg and return it.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
- unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
MachineOperand &MO = MI.getOperand(OpNum);
- if (New) {
- LRI = allocVirtReg(MI, LRI, Hint);
- const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
- int FrameIndex = getStackSpaceFor(VirtReg, RC);
- LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
- << printReg(LRI->PhysReg, TRI) << "\n");
- TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI);
- ++NumLoads;
+ if (!LRI->PhysReg) {
+ allocVirtReg(MI, *LRI, Hint);
+ reload(MI, VirtReg, LRI->PhysReg);
} else if (LRI->Dirty) {
if (isLastUseOfLocalReg(MO)) {
- LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n');
if (MO.isUse())
MO.setIsKill();
else
MO.setIsDead();
} else if (MO.isKill()) {
- LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n');
MO.setIsKill(false);
} else if (MO.isDead()) {
- LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n');
MO.setIsDead(false);
}
} else if (MO.isKill()) {
@@ -680,25 +689,24 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
// register would be killed immediately, and there might be a second use:
// %foo = OR killed %x, %x
// This would cause a second reload of %x into a different register.
- LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n');
MO.setIsKill(false);
} else if (MO.isDead()) {
- LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n');
MO.setIsDead(false);
}
assert(LRI->PhysReg && "Register not assigned");
LRI->LastUse = &MI;
LRI->LastOpNum = OpNum;
markRegUsedInInstr(LRI->PhysReg);
- return LRI;
+ return *LRI;
}
/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
/// may invalidate any operand pointers. Return true if the operand kills its
/// register.
-bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,
+bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MCPhysReg PhysReg) {
- MachineOperand &MO = MI.getOperand(OpNum);
bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
@@ -761,7 +769,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
SmallVector<unsigned, 8> PartialDefs;
LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
@@ -770,17 +778,17 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
<< ") is tied to operand " << MI.findTiedOperandIdx(I)
<< ".\n");
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
- MCPhysReg PhysReg = LRI->PhysReg;
- setPhysReg(MI, I, PhysReg);
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+ MCPhysReg PhysReg = LR.PhysReg;
+ setPhysReg(MI, MO, PhysReg);
// Note: we don't update the def operand yet. That would cause the normal
// def-scan to attempt spilling.
} else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
- LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n');
// Reload the register, but don't assign to the operand just yet.
// That would confuse the later phys-def processing pass.
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
- PartialDefs.push_back(LRI->PhysReg);
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+ PartialDefs.push_back(LR.PhysReg);
}
}
@@ -793,9 +801,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
if (!MO.isEarlyClobber())
continue;
// Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0);
- MCPhysReg PhysReg = LRI->PhysReg;
- if (setPhysReg(MI, I, PhysReg))
+ MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0);
+ if (setPhysReg(MI, MI.getOperand(I), PhysReg))
VirtDead.push_back(Reg);
}
@@ -828,11 +835,12 @@ void RegAllocFast::dumpState() {
break;
default: {
dbgs() << '=' << printReg(PhysRegState[Reg]);
- LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- if (I->Dirty)
+ LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
+ assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+ "Missing VirtReg entry");
+ if (LRI->Dirty)
dbgs() << "*";
- assert(I->PhysReg == Reg && "Bad inverse map");
+ assert(LRI->PhysReg == Reg && "Bad inverse map");
break;
}
}
@@ -841,6 +849,8 @@ void RegAllocFast::dumpState() {
// Check that LiveVirtRegs is the inverse.
for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
e = LiveVirtRegs.end(); i != e; ++i) {
+ if (!i->PhysReg)
+ continue;
assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
"Bad map key");
assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
@@ -850,6 +860,199 @@ void RegAllocFast::dumpState() {
}
#endif
+void RegAllocFast::allocateInstruction(MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ // If this is a copy, we may be able to coalesce.
+ unsigned CopySrcReg = 0;
+ unsigned CopyDstReg = 0;
+ unsigned CopySrcSub = 0;
+ unsigned CopyDstSub = 0;
+ if (MI.isCopy()) {
+ CopyDstReg = MI.getOperand(0).getReg();
+ CopySrcReg = MI.getOperand(1).getReg();
+ CopyDstSub = MI.getOperand(0).getSubReg();
+ CopySrcSub = MI.getOperand(1).getSubReg();
+ }
+
+ // Track registers used by instruction.
+ UsedInInstr.clear();
+
+ // First scan.
+ // Mark physreg uses and early clobbers as used.
+ // Find the end of the virtreg operands
+ unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask()) {
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+ continue;
+ }
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg) continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+ } else {
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
+ }
+ continue;
+ }
+ if (!MRI->isAllocatable(Reg)) continue;
+ if (MO.isUse()) {
+ usePhysReg(MO);
+ } else if (MO.isEarlyClobber()) {
+ definePhysReg(MI, Reg,
+ (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
+ }
+
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDstReg = 0;
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
+ }
+
+ // Second scan.
+ // Allocate virtreg uses.
+ for (unsigned I = 0; I != VirtOpEnd; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
+ MCPhysReg PhysReg = LR.PhysReg;
+ CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, MO, PhysReg))
+ killVirtReg(LR);
+ }
+ }
+
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
+ UsedInInstr.clear();
+ if (hasEarlyClobbers) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MO.isTied()) continue;
+ markRegUsedInInstr(Reg);
+ }
+ }
+
+ unsigned DefOpEnd = MI.getNumOperands();
+ if (MI.isCall()) {
+ // Spill all virtregs before a call. This serves one purpose: If an
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots.
+ // Note: although this is appealing to just consider all definitions
+ // as call-clobbered, this is not correct because some of those
+ // definitions may be used later on and we do not want to reuse
+ // those for virtual registers in between.
+ LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ spillAll(MI);
+ }
+
+ // Third scan.
+ // Allocate defs and collect dead defs.
+ for (unsigned I = 0; I != DefOpEnd; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!MRI->isAllocatable(Reg)) continue;
+ definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ continue;
+ }
+ MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
+ if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
+ VirtDead.push_back(Reg);
+ CopyDstReg = 0; // cancel coalescing;
+ } else
+ CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
+ }
+
+ // Kill dead defs after the scan to ensure that multiple defs of the same
+ // register are allocated identically. We didn't need to do this for uses
+ // because we are crerating our own kill flags, and they are always at the
+ // last use.
+ for (unsigned VirtReg : VirtDead)
+ killVirtReg(VirtReg);
+ VirtDead.clear();
+
+ LLVM_DEBUG(dbgs() << "<< " << MI);
+ if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
+ LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n");
+ Coalesced.push_back(&MI);
+ }
+}
+
+void RegAllocFast::handleDebugValue(MachineInstr &MI) {
+ MachineOperand &MO = MI.getOperand(0);
+
+ // Ignore DBG_VALUEs that aren't based on virtual registers. These are
+ // mostly constants and frame indices.
+ if (!MO.isReg())
+ return;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ // See if this virtual register has already been allocated to a physical
+ // register or spilled to a stack slot.
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+ setPhysReg(MI, MO, LRI->PhysReg);
+ } else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS != -1) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ updateDbgValueForSpill(MI, SS);
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI);
+ return;
+ }
+
+ // We can't allocate a physreg for a DebugValue, sorry!
+ LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+
+ // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
+ // that future spills of Reg will have DBG_VALUEs.
+ LiveDbgValueMap[Reg].push_back(&MI);
+}
+
void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
@@ -869,206 +1072,19 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// Otherwise, sequentially allocate each instruction in the MBB.
for (MachineInstr &MI : MBB) {
- const MCInstrDesc &MCID = MI.getDesc();
- LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState());
+ LLVM_DEBUG(
+ dbgs() << "\n>> " << MI << "Regs:";
+ dumpState()
+ );
- // Debug values are not allowed to change codegen in any way.
+ // Special handling for debug values. Note that they are not allowed to
+ // affect codegen of the other instructions in any way.
if (MI.isDebugValue()) {
- MachineInstr *DebugMI = &MI;
- MachineOperand &MO = DebugMI->getOperand(0);
-
- // Ignore DBG_VALUEs that aren't based on virtual registers. These are
- // mostly constants and frame indices.
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
- // See if this virtual register has already been allocated to a physical
- // register or spilled to a stack slot.
- LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
- if (LRI != LiveVirtRegs.end())
- setPhysReg(*DebugMI, 0, LRI->PhysReg);
- else {
- int SS = StackSlotForVirtReg[Reg];
- if (SS != -1) {
- // Modify DBG_VALUE now that the value is in a spill slot.
- updateDbgValueForSpill(*DebugMI, SS);
- LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:"
- << "\t" << *DebugMI);
- continue;
- }
-
- // We can't allocate a physreg for a DebugValue, sorry!
- LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
- MO.setReg(0);
- }
-
- // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
- // that future spills of Reg will have DBG_VALUEs.
- LiveDbgValueMap[Reg].push_back(DebugMI);
+ handleDebugValue(MI);
continue;
}
- if (MI.isDebugLabel())
- continue;
-
- // If this is a copy, we may be able to coalesce.
- unsigned CopySrcReg = 0;
- unsigned CopyDstReg = 0;
- unsigned CopySrcSub = 0;
- unsigned CopyDstSub = 0;
- if (MI.isCopy()) {
- CopyDstReg = MI.getOperand(0).getReg();
- CopySrcReg = MI.getOperand(1).getReg();
- CopyDstSub = MI.getOperand(0).getSubReg();
- CopySrcSub = MI.getOperand(1).getSubReg();
- }
-
- // Track registers used by instruction.
- UsedInInstr.clear();
-
- // First scan.
- // Mark physreg uses and early clobbers as used.
- // Find the end of the virtreg operands
- unsigned VirtOpEnd = 0;
- bool hasTiedOps = false;
- bool hasEarlyClobbers = false;
- bool hasPartialRedefs = false;
- bool hasPhysDefs = false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- // Make sure MRI knows about registers clobbered by regmasks.
- if (MO.isRegMask()) {
- MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- continue;
- }
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg) continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- VirtOpEnd = i+1;
- if (MO.isUse()) {
- hasTiedOps = hasTiedOps ||
- MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
- } else {
- if (MO.isEarlyClobber())
- hasEarlyClobbers = true;
- if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
- hasPartialRedefs = true;
- }
- continue;
- }
- if (!MRI->isAllocatable(Reg)) continue;
- if (MO.isUse()) {
- usePhysReg(MO);
- } else if (MO.isEarlyClobber()) {
- definePhysReg(MI, Reg,
- (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
- hasEarlyClobbers = true;
- } else
- hasPhysDefs = true;
- }
-
- // The instruction may have virtual register operands that must be allocated
- // the same register at use-time and def-time: early clobbers and tied
- // operands. If there are also physical defs, these registers must avoid
- // both physical defs and uses, making them more constrained than normal
- // operands.
- // Similarly, if there are multiple defs and tied operands, we must make
- // sure the same register is allocated to uses and defs.
- // We didn't detect inline asm tied operands above, so just make this extra
- // pass for all inline asm.
- if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
- (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
- handleThroughOperands(MI, VirtDead);
- // Don't attempt coalescing when we have funny stuff going on.
- CopyDstReg = 0;
- // Pretend we have early clobbers so the use operands get marked below.
- // This is not necessary for the common case of a single tied use.
- hasEarlyClobbers = true;
- }
-
- // Second scan.
- // Allocate virtreg uses.
- for (unsigned I = 0; I != VirtOpEnd; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
- if (MO.isUse()) {
- LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg);
- MCPhysReg PhysReg = LRI->PhysReg;
- CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
- if (setPhysReg(MI, I, PhysReg))
- killVirtReg(LRI);
- }
- }
-
- // Track registers defined by instruction - early clobbers and tied uses at
- // this point.
- UsedInInstr.clear();
- if (hasEarlyClobbers) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- // Look for physreg defs and tied uses.
- if (!MO.isDef() && !MO.isTied()) continue;
- markRegUsedInInstr(Reg);
- }
- }
-
- unsigned DefOpEnd = MI.getNumOperands();
- if (MI.isCall()) {
- // Spill all virtregs before a call. This serves one purpose: If an
- // exception is thrown, the landing pad is going to expect to find
- // registers in their spill slots.
- // Note: although this is appealing to just consider all definitions
- // as call-clobbered, this is not correct because some of those
- // definitions may be used later on and we do not want to reuse
- // those for virtual registers in between.
- LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
- spillAll(MI);
- }
-
- // Third scan.
- // Allocate defs and collect dead defs.
- for (unsigned I = 0; I != DefOpEnd; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
- continue;
- unsigned Reg = MO.getReg();
-
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (!MRI->isAllocatable(Reg)) continue;
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
- continue;
- }
- LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg);
- MCPhysReg PhysReg = LRI->PhysReg;
- if (setPhysReg(MI, I, PhysReg)) {
- VirtDead.push_back(Reg);
- CopyDstReg = 0; // cancel coalescing;
- } else
- CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
- }
-
- // Kill dead defs after the scan to ensure that multiple defs of the same
- // register are allocated identically. We didn't need to do this for uses
- // because we are crerating our own kill flags, and they are always at the
- // last use.
- for (unsigned VirtReg : VirtDead)
- killVirtReg(VirtReg);
- VirtDead.clear();
-
- if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
- LLVM_DEBUG(dbgs() << "-- coalescing: " << MI);
- Coalesced.push_back(&MI);
- } else {
- LLVM_DEBUG(dbgs() << "<< " << MI);
- }
+ allocateInstruction(MI);
}
// Spill all physical registers holding virtual registers now.
@@ -1079,12 +1095,11 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// LiveVirtRegs might refer to the instrs.
for (MachineInstr *MI : Coalesced)
MBB.erase(MI);
- NumCopies += Coalesced.size();
+ NumCoalesced += Coalesced.size();
LLVM_DEBUG(MBB.dump());
}
-/// Allocates registers for a function.
bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
<< "********** Function: " << MF.getName() << '\n');
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 3333e1f2fb8b..81b21b442437 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -318,7 +318,7 @@ class RAGreedy : public MachineFunctionPass,
/// Track new eviction.
/// The Evictor vreg has evicted the Evictee vreg from Physreg.
- /// \param PhysReg The phisical register Evictee was evicted from.
+ /// \param PhysReg The physical register Evictee was evicted from.
/// \param Evictor The evictor Vreg that evicted Evictee.
/// \param Evictee The evictee Vreg.
void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
@@ -449,8 +449,8 @@ private:
BlockFrequency calcSpillCost();
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
- void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
- void growRegion(GlobalSplitCandidate &Cand);
+ bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ bool growRegion(GlobalSplitCandidate &Cand);
bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order);
@@ -1183,7 +1183,10 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
BC.Number = BI.MBB->getNumber();
Intf.moveToBlock(BC.Number);
BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
- BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = (BI.LiveOut &&
+ !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
+ ? SpillPlacement::PrefReg
+ : SpillPlacement::DontCare;
BC.ChangesValue = BI.FirstDef.isValid();
if (!Intf.hasInterference())
@@ -1203,6 +1206,13 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
} else if (Intf.first() < BI.LastInstr) {
++Ins;
}
+
+ // Abort if the spill cannot be inserted at the MBB' start
+ if (((BC.Entry == SpillPlacement::MustSpill) ||
+ (BC.Entry == SpillPlacement::PrefSpill)) &&
+ SlotIndex::isEarlierInstr(BI.FirstInstr,
+ SA->getFirstSplitPoint(BC.Number)))
+ return false;
}
// Interference for the live-out value.
@@ -1232,7 +1242,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
/// addThroughConstraints - Add constraints and links to SpillPlacer from the
/// live-through blocks in Blocks.
-void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
ArrayRef<unsigned> Blocks) {
const unsigned GroupSize = 8;
SpillPlacement::BlockConstraint BCS[GroupSize];
@@ -1256,6 +1266,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
assert(B < GroupSize && "Array overflow");
BCS[B].Number = Number;
+ // Abort if the spill cannot be inserted at the MBB' start
+ MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
+ if (!MBB->empty() &&
+ SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()),
+ SA->getFirstSplitPoint(Number)))
+ return false;
// Interference for the live-in value.
if (Intf.first() <= Indexes->getMBBStartIdx(Number))
BCS[B].Entry = SpillPlacement::MustSpill;
@@ -1276,9 +1292,10 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
SpillPlacer->addConstraints(makeArrayRef(BCS, B));
SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ return true;
}
-void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Keep track of through blocks that have not been added to SpillPlacer.
BitVector Todo = SA->getThroughBlocks();
SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
@@ -1314,9 +1331,10 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Compute through constraints from the interference, or assume that all
// through blocks prefer spilling when forming compact regions.
auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
- if (Cand.PhysReg)
- addThroughConstraints(Cand.Intf, NewBlocks);
- else
+ if (Cand.PhysReg) {
+ if (!addThroughConstraints(Cand.Intf, NewBlocks))
+ return false;
+ } else
// Provide a strong negative bias on through blocks to prevent unwanted
// liveness on loop backedges.
SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
@@ -1326,6 +1344,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
SpillPlacer->iterate();
}
LLVM_DEBUG(dbgs() << ", v=" << Visited);
+ return true;
}
/// calcCompactRegion - Compute the set of edge bundles that should be live
@@ -1356,7 +1375,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
return false;
}
- growRegion(Cand);
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ return false;
+ }
+
SpillPlacer->finish();
if (!Cand.LiveBundles.any()) {
@@ -1886,7 +1909,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
});
continue;
}
- growRegion(Cand);
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ continue;
+ }
SpillPlacer->finish();
@@ -2188,7 +2214,11 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
///
unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs) {
- assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ // TODO: the function currently only handles a single UseBlock; it should be
+ // possible to generalize.
+ if (SA->getUseBlocks().size() != 1)
+ return 0;
+
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
// Note that it is possible to have an interval that is live-in or live-out
@@ -3120,18 +3150,23 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
// Handle blocks that were not included in subloops.
if (Loops->getLoopFor(MBB) == L)
for (MachineInstr &MI : *MBB) {
- const MachineMemOperand *MMO;
+ SmallVector<const MachineMemOperand *, 2> Accesses;
+ auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
+ return MFI.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+ ->getFrameIndex());
+ };
if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
++Reloads;
- else if (TII->hasLoadFromStackSlot(MI, MMO, FI) &&
- MFI.isSpillSlotObjectIndex(FI))
+ else if (TII->hasLoadFromStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, isSpillSlotAccess))
++FoldedReloads;
else if (TII->isStoreToStackSlot(MI, FI) &&
MFI.isSpillSlotObjectIndex(FI))
++Spills;
- else if (TII->hasStoreToStackSlot(MI, MMO, FI) &&
- MFI.isSpillSlotObjectIndex(FI))
+ else if (TII->hasStoreToStackSlot(MI, Accesses) &&
+ llvm::any_of(Accesses, isSpillSlotAccess))
++FoldedSpills;
}
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index f1c442ac38ae..66c7c5cd7dbf 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -81,7 +81,7 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- const TargetMachine &TM = MF.getTarget();
+ const LLVMTargetMachine &TM = MF.getTarget();
LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
<< " -------------------- \n");
@@ -166,28 +166,27 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
}
// Insert any register fully saved via subregisters.
- for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
- if (SavedRegs.test(PReg))
- continue;
-
- // Check if PReg is fully covered by its subregs.
- bool CoveredBySubRegs = false;
- for (const TargetRegisterClass *RC : TRI.regclasses())
- if (RC->CoveredBySubRegs && RC->contains(PReg)) {
- CoveredBySubRegs = true;
- break;
- }
- if (!CoveredBySubRegs)
- continue;
-
- // Add PReg to SavedRegs if all subregs are saved.
- bool AllSubRegsSaved = true;
- for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
- if (!SavedRegs.test(*SR)) {
- AllSubRegsSaved = false;
- break;
- }
- if (AllSubRegsSaved)
- SavedRegs.set(PReg);
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
+ if (!RC->CoveredBySubRegs)
+ continue;
+
+ for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
+ if (SavedRegs.test(PReg))
+ continue;
+
+ // Check if PReg is fully covered by its subregs.
+ if (!RC->contains(PReg))
+ continue;
+
+ // Add PReg to SavedRegs if all subregs are saved.
+ bool AllSubRegsSaved = true;
+ for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
+ if (!SavedRegs.test(*SR)) {
+ AllSubRegsSaved = false;
+ break;
+ }
+ if (AllSubRegsSaved)
+ SavedRegs.set(PReg);
+ }
}
}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index cad13a60efd2..2a06d5e95fbb 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,6 +16,7 @@
#include "RegisterCoalescer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -69,6 +70,7 @@ STATISTIC(NumReMats , "Number of instructions re-materialized");
STATISTIC(NumInflated , "Number of register classes inflated");
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
+STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
static cl::opt<bool> EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
@@ -94,6 +96,15 @@ VerifyCoalescing("verify-coalescing",
cl::desc("Verify machine instrs before and after register coalescing"),
cl::Hidden);
+static cl::opt<unsigned> LateRematUpdateThreshold(
+ "late-remat-update-threshold", cl::Hidden,
+ cl::desc("During rematerialization for a copy, if the def instruction has "
+ "many other copy uses to be rematerialized, delay the multiple "
+ "separate live interval update work and do them all at once after "
+ "all those rematerialization are done. It will save a lot of "
+ "repeated work. "),
+ cl::init(100));
+
namespace {
class RegisterCoalescer : public MachineFunctionPass,
@@ -137,6 +148,11 @@ namespace {
/// Virtual registers to be considered for register class inflation.
SmallVector<unsigned, 8> InflateRegs;
+ /// The collection of live intervals which should have been updated
+ /// immediately after rematerialiation but delayed until
+ /// lateLiveIntervalUpdate is called.
+ DenseSet<unsigned> ToBeUpdated;
+
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
@@ -157,6 +173,13 @@ namespace {
/// was made.
bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
+ /// If one def has many copy like uses, and those copy uses are all
+ /// rematerialized, the live interval update needed for those
+ /// rematerializations will be delayed and done all at once instead
+ /// of being done multiple times. This is to save compile cost because
+ /// live interval update is costly.
+ void lateLiveIntervalUpdate();
+
/// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
/// src/dst of the copy instruction CopyMI. This returns true if the copy
/// was successfully coalesced away. If it is not currently possible to
@@ -203,8 +226,12 @@ namespace {
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
- /// This returns true if an interval was modified.
- bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+ /// This returns a pair of two flags:
+ /// - the first element is true if an interval was modified,
+ /// - the second element is true if the destination interval needs
+ /// to be shrunk after deleting the copy.
+ std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI);
/// We found a copy which can be moved to its less frequent predecessor.
bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
@@ -258,6 +285,7 @@ namespace {
/// mentioned method returns true.
void shrinkToUses(LiveInterval *LI,
SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
+ NumShrinkToUses++;
if (LIS->shrinkToUses(LI, Dead)) {
/// Check whether or not \p LI is composed by multiple connected
/// components and if that is the case, fix that.
@@ -662,17 +690,32 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
/// Copy segments with value number @p SrcValNo from liverange @p Src to live
/// range @Dst and use value number @p DstValNo there.
-static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
- const LiveRange &Src, const VNInfo *SrcValNo) {
+static std::pair<bool,bool>
+addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
+ const VNInfo *SrcValNo) {
+ bool Changed = false;
+ bool MergedWithDead = false;
for (const LiveRange::Segment &S : Src.segments) {
if (S.valno != SrcValNo)
continue;
- Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
- }
+ // This is adding a segment from Src that ends in a copy that is about
+ // to be removed. This segment is going to be merged with a pre-existing
+ // segment in Dst. This works, except in cases when the corresponding
+ // segment in Dst is dead. For example: adding [192r,208r:1) from Src
+ // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
+ // Recognized such cases, so that the segments can be shrunk.
+ LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
+ LiveRange::Segment &Merged = *Dst.addSegment(Added);
+ if (Merged.end.isDead())
+ MergedWithDead = true;
+ Changed = true;
+ }
+ return std::make_pair(Changed, MergedWithDead);
}
-bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
- MachineInstr *CopyMI) {
+std::pair<bool,bool>
+RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
assert(!CP.isPhys());
LiveInterval &IntA =
@@ -710,19 +753,19 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && !AValNo->isUnused() && "COPY source not live");
if (AValNo->isPHIDef())
- return false;
+ return { false, false };
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
- return false;
+ return { false, false };
if (!DefMI->isCommutable())
- return false;
+ return { false, false };
// If DefMI is a two-address instruction then commuting it will change the
// destination register.
int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
assert(DefIdx != -1);
unsigned UseOpIdx;
if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
- return false;
+ return { false, false };
// FIXME: The code below tries to commute 'UseOpIdx' operand with some other
// commutable operand which is expressed by 'CommuteAnyOperandIndex'value
@@ -735,17 +778,17 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// op#2<->op#3) of commute transformation should be considered/tried here.
unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
- return false;
+ return { false, false };
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
unsigned NewReg = NewDstMO.getReg();
if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
- return false;
+ return { false, false };
// Make sure there are no other definitions of IntB that would reach the
// uses which the new definition can reach.
if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
- return false;
+ return { false, false };
// If some of the uses of IntA.reg is already coalesced away, return false.
// It's not possible to determine whether it's safe to perform the coalescing.
@@ -758,7 +801,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
// If this use is tied to a def, we can't rewrite the register.
if (UseMI->isRegTiedToDefOperand(OpNo))
- return false;
+ return { false, false };
}
LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
@@ -770,11 +813,11 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *NewMI =
TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
- return false;
+ return { false, false };
if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
!MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
- return false;
+ return { false, false };
if (NewMI != DefMI) {
LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
MachineBasicBlock::iterator Pos = DefMI;
@@ -848,37 +891,58 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// Extend BValNo by merging in IntA live segments of AValNo. Val# definition
// is updated.
+ bool ShrinkB = false;
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- if (IntB.hasSubRanges()) {
+ if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
if (!IntA.hasSubRanges()) {
LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
IntA.createSubRangeFrom(Allocator, Mask, IntA);
+ } else if (!IntB.hasSubRanges()) {
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg);
+ IntB.createSubRangeFrom(Allocator, Mask, IntB);
}
SlotIndex AIdx = CopyIdx.getRegSlot(true);
+ LaneBitmask MaskA;
for (LiveInterval::SubRange &SA : IntA.subranges()) {
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
assert(ASubValNo != nullptr);
+ MaskA |= SA.LaneMask;
IntB.refineSubRanges(Allocator, SA.LaneMask,
- [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
+ [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB]
+ (LiveInterval::SubRange &SR) {
VNInfo *BSubValNo = SR.empty()
? SR.getNextValue(CopyIdx, Allocator)
: SR.getVNInfoAt(CopyIdx);
assert(BSubValNo != nullptr);
- addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ ShrinkB |= P.second;
+ if (P.first)
+ BSubValNo->def = ASubValNo->def;
});
}
+ // Go over all subranges of IntB that have not been covered by IntA,
+ // and delete the segments starting at CopyIdx. This can happen if
+ // IntA has undef lanes that are defined in IntB.
+ for (LiveInterval::SubRange &SB : IntB.subranges()) {
+ if ((SB.LaneMask & MaskA).any())
+ continue;
+ if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
+ if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
+ SB.removeSegment(*S, true);
+ }
}
BValNo->def = AValNo->def;
- addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
+ auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
+ ShrinkB |= P.second;
LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
LIS->removeVRegDefAt(IntA, AValNo->def);
LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
++numCommutes;
- return true;
+ return { true, ShrinkB };
}
/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
@@ -1067,6 +1131,20 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
assert(BValNo && "All sublanes should be live");
LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
BValNo->markUnused();
+ // We can have a situation where the result of the original copy is live,
+ // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
+ // the copy appear as an endpoint from pruneValue(), but we don't want it
+ // to because the copy has been removed. We can go ahead and remove that
+ // endpoint; there is no other situation here that there could be a use at
+ // the same place as we know that the copy is a full copy.
+ for (unsigned I = 0; I != EndPoints.size(); ) {
+ if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
+ EndPoints[I] = EndPoints.back();
+ EndPoints.pop_back();
+ continue;
+ }
+ ++I;
+ }
LIS->extendToIndices(SR, EndPoints);
}
// If any dead defs were extended, truncate them.
@@ -1107,7 +1185,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
- assert(ValNo && "CopyMI input register not live");
+ if (!ValNo)
+ return false;
if (ValNo->isPHIDef() || ValNo->isUnused())
return false;
MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
@@ -1365,24 +1444,40 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
++NumReMats;
- // The source interval can become smaller because we removed a use.
- shrinkToUses(&SrcInt, &DeadDefs);
- if (!DeadDefs.empty()) {
- // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
- // to describe DstReg instead.
+ // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
+ // to describe DstReg instead.
+ if (MRI->use_nodbg_empty(SrcReg)) {
for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugValue()) {
- UseMO.setReg(DstReg);
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ UseMO.substPhysReg(DstReg, *TRI);
+ else
+ UseMO.setReg(DstReg);
// Move the debug value directly after the def of the rematerialized
// value in DstReg.
MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
}
}
- eliminateDeadDefs();
}
+ if (ToBeUpdated.count(SrcReg))
+ return true;
+
+ unsigned NumCopyUses = 0;
+ for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
+ if (UseMO.getParent()->isCopyLike())
+ NumCopyUses++;
+ }
+ if (NumCopyUses < LateRematUpdateThreshold) {
+ // The source interval can become smaller because we removed a use.
+ shrinkToUses(&SrcInt, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
+ } else {
+ ToBeUpdated.insert(SrcReg);
+ }
return true;
}
@@ -1751,9 +1846,18 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// If we can eliminate the copy without merging the live segments, do so
// now.
if (!CP.isPartial() && !CP.isPhys()) {
- if (adjustCopiesBackFrom(CP, CopyMI) ||
- removeCopyByCommutingDef(CP, CopyMI)) {
+ bool Changed = adjustCopiesBackFrom(CP, CopyMI);
+ bool Shrink = false;
+ if (!Changed)
+ std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
+ if (Changed) {
deleteInstr(CopyMI);
+ if (Shrink) {
+ unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ LiveInterval &DstLI = LIS->getInterval(DstReg);
+ shrinkToUses(&DstLI);
+ LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
+ }
LLVM_DEBUG(dbgs() << "\tTrivial!\n");
return true;
}
@@ -1806,6 +1910,13 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
LI.removeEmptySubRanges();
}
+
+ // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
+ // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
+ // is not up-to-date, need to update the merged live interval here.
+ if (ToBeUpdated.count(CP.getSrcReg()))
+ ShrinkMainRange = true;
+
if (ShrinkMainRange) {
LiveInterval &LI = LIS->getInterval(CP.getDstReg());
shrinkToUses(&LI);
@@ -2397,8 +2508,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// We normally expect IMPLICIT_DEF values to be live only until the end
// of their block. If the value is really live longer and gets pruned in
// another block, this flag is cleared again.
+ //
+ // Clearing the valid lanes is deferred until it is sure this can be
+ // erased.
V.ErasableImplicitDef = true;
- V.ValidLanes &= ~V.WriteLanes;
}
}
}
@@ -2453,20 +2566,25 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
Other.computeAssignment(V.OtherVNI->id, *this);
Val &OtherV = Other.Vals[V.OtherVNI->id];
- // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
- // This shouldn't normally happen, but ProcessImplicitDefs can leave such
- // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
- // technically.
- //
- // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
- // to erase the IMPLICIT_DEF instruction.
- if (OtherV.ErasableImplicitDef && DefMI &&
- DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
- LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
- << " extends into "
- << printMBBReference(*DefMI->getParent())
- << ", keeping it.\n");
- OtherV.ErasableImplicitDef = false;
+ if (OtherV.ErasableImplicitDef) {
+ // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+ // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+ // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+ // technically.
+ //
+ // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // to erase the IMPLICIT_DEF instruction.
+ if (DefMI &&
+ DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into "
+ << printMBBReference(*DefMI->getParent())
+ << ", keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ } else {
+ // We deferred clearing these lanes in case we needed to save them
+ OtherV.ValidLanes &= ~OtherV.WriteLanes;
+ }
}
// Allow overlapping PHI values. Any real interference would show up in a
@@ -2509,6 +2627,12 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
return CR_Erase;
}
+ // The remaining checks apply to the lanes, which aren't tracked here. This
+ // was already decided to be OK via the following CR_Replace condition.
+ // CR_Replace.
+ if (SubRangeJoin)
+ return CR_Replace;
+
// If the lanes written by this instruction were all undef in OtherVNI, it is
// still safe to join the live ranges. This can't be done with a simple value
// mapping, though - OtherVNI will map to multiple values:
@@ -2590,8 +2714,18 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
Val &OtherV = Other.Vals[V.OtherVNI->id];
// We cannot erase an IMPLICIT_DEF if we don't have valid values for all
// its lanes.
- if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
+ if (OtherV.ErasableImplicitDef &&
+ TrackSubRegLiveness &&
+ (OtherV.WriteLanes & ~V.ValidLanes).any()) {
+ LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
+
OtherV.ErasableImplicitDef = false;
+ // The valid lanes written by the implicit_def were speculatively cleared
+ // before, so make this more conservative. It may be better to track this,
+ // I haven't found a testcase where it matters.
+ OtherV.ValidLanes = LaneBitmask::getAll();
+ }
+
OtherV.Pruned = true;
LLVM_FALLTHROUGH;
}
@@ -3290,6 +3424,18 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
|| LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
}
+void RegisterCoalescer::lateLiveIntervalUpdate() {
+ for (unsigned reg : ToBeUpdated) {
+ if (!LIS->hasInterval(reg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(reg);
+ shrinkToUses(&LI, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
+ }
+ ToBeUpdated.clear();
+}
+
bool RegisterCoalescer::
copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
bool Progress = false;
@@ -3459,12 +3605,14 @@ void RegisterCoalescer::joinAllIntervals() {
}
copyCoalesceInMBB(MBBs[i].MBB);
}
+ lateLiveIntervalUpdate();
coalesceLocals();
// Joining intervals can allow other intervals to be joined. Iteratively join
// until we make no progress.
while (copyCoalesceWorkList(WorkList))
/* empty */ ;
+ lateLiveIntervalUpdate();
}
void RegisterCoalescer::releaseMemory() {
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 51414de518fd..1099e468e885 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -681,8 +681,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
PressureDiff::iterator J;
for (J = std::next(I); J != E && J->isValid(); ++J, ++I)
*I = *J;
- if (J != E)
- *I = *J;
+ *I = PressureChange();
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 6a31118cc562..6b9880a8913f 100644
--- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -40,7 +40,7 @@ INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
char PhysicalRegisterUsageInfo::ID = 0;
-void PhysicalRegisterUsageInfo::setTargetMachine(const TargetMachine &TM) {
+void PhysicalRegisterUsageInfo::setTargetMachine(const LLVMTargetMachine &TM) {
this->TM = &TM;
}
@@ -81,7 +81,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
// sort the vector to print analysis in alphabatic order of function name.
llvm::sort(
- FPRMPairVector.begin(), FPRMPairVector.end(),
+ FPRMPairVector,
[](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool {
return A->first->getName() < B->first->getName();
});
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
index cbbbf7c385aa..c356fb57ac6d 100644
--- a/contrib/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -260,8 +260,14 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
const Value *AllocaPtr,
uint64_t AllocaSize) {
- // All MemIntrinsics have destination address in Arg0 and size in Arg2.
- if (MI->getRawDest() != U) return true;
+ if (auto MTI = dyn_cast<MemTransferInst>(MI)) {
+ if (MTI->getRawSource() != U && MTI->getRawDest() != U)
+ return true;
+ } else {
+ if (MI->getRawDest() != U)
+ return true;
+ }
+
const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
// Non-constant size => unsafe. FIXME: try SCEV getRange.
if (!Len) return false;
@@ -318,11 +324,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
case Instruction::Invoke: {
ImmutableCallSite CS(I);
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end)
- continue;
- }
+ if (I->isLifetimeStartOrEnd())
+ continue;
if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
@@ -775,6 +778,10 @@ bool SafeStack::run() {
++NumUnsafeStackRestorePointsFunctions;
IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+ // Calls must always have a debug location, or else inlining breaks. So
+ // we explicitly set a artificial debug location here.
+ if (DISubprogram *SP = F.getSubprogram())
+ IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP));
if (SafeStackUsePointerAddress) {
Value *Fn = F.getParent()->getOrInsertFunction(
"__safestack_pointer_address", StackPtrTy->getPointerTo(0));
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
index 329458778a98..726c38002817 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -46,11 +46,10 @@ const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
}
bool StackColoring::readMarker(Instruction *I, bool *IsStart) {
- auto *II = dyn_cast<IntrinsicInst>(I);
- if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end))
+ if (!I->isLifetimeStartOrEnd())
return false;
+ auto *II = cast<IntrinsicInst>(I);
*IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start;
return true;
}
@@ -172,7 +171,9 @@ void StackColoring::calculateLocalLiveness() {
BitVector LocalLiveIn;
for (auto *PredBB : predecessors(BB)) {
LivenessMap::const_iterator I = BlockLiveness.find(PredBB);
- assert(I != BlockLiveness.end() && "Predecessor not found");
+ // If a predecessor is unreachable, ignore it.
+ if (I == BlockLiveness.end())
+ continue;
LocalLiveIn |= I->second.LiveOut;
}
diff --git a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 9387722bfebd..2684f92b3a93 100644
--- a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -77,6 +77,21 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
return new ScalarizeMaskedMemIntrin();
}
+static bool isConstantIntVector(Value *Mask) {
+ Constant *C = dyn_cast<Constant>(Mask);
+ if (!C)
+ return false;
+
+ unsigned NumElts = Mask->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *CElt = C->getAggregateElement(i);
+ if (!CElt || !isa<ConstantInt>(CElt))
+ return false;
+ }
+
+ return true;
+}
+
// Translate a masked load intrinsic like
// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
// <16 x i1> %mask, <16 x i32> %passthru)
@@ -85,32 +100,29 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
//
// %1 = bitcast i8* %addr to i32*
// %2 = extractelement <16 x i1> %mask, i32 0
-// %3 = icmp eq i1 %2, true
-// br i1 %3, label %cond.load, label %else
+// br i1 %2, label %cond.load, label %else
//
// cond.load: ; preds = %0
-// %4 = getelementptr i32* %1, i32 0
-// %5 = load i32* %4
-// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
+// %3 = getelementptr i32* %1, i32 0
+// %4 = load i32* %3
+// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
// br label %else
//
// else: ; preds = %0, %cond.load
-// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
-// %7 = extractelement <16 x i1> %mask, i32 1
-// %8 = icmp eq i1 %7, true
-// br i1 %8, label %cond.load1, label %else2
+// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
+// %6 = extractelement <16 x i1> %mask, i32 1
+// br i1 %6, label %cond.load1, label %else2
//
// cond.load1: ; preds = %else
-// %9 = getelementptr i32* %1, i32 1
-// %10 = load i32* %9
-// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
+// %7 = getelementptr i32* %1, i32 1
+// %8 = load i32* %7
+// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
// br label %else2
//
// else2: ; preds = %else, %cond.load1
-// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
-// %12 = extractelement <16 x i1> %mask, i32 2
-// %13 = icmp eq i1 %12, true
-// br i1 %13, label %cond.load4, label %else5
+// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
+// %10 = extractelement <16 x i1> %mask, i32 2
+// br i1 %10, label %cond.load4, label %else5
//
static void scalarizeMaskedLoad(CallInst *CI) {
Value *Ptr = CI->getArgOperand(0);
@@ -119,25 +131,19 @@ static void scalarizeMaskedLoad(CallInst *CI) {
Value *Src0 = CI->getArgOperand(3);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
- assert(VecType && "Unexpected return type of masked load intrinsic");
+ VectorType *VecType = cast<VectorType>(CI->getType());
- Type *EltTy = CI->getType()->getVectorElementType();
+ Type *EltTy = VecType->getElementType();
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
- BasicBlock *CondBlock = nullptr;
- BasicBlock *PrevIfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
// Short-cut if the mask is all-true.
- bool IsAllOnesMask =
- isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
-
- if (IsAllOnesMask) {
+ if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
@@ -145,21 +151,19 @@ static void scalarizeMaskedLoad(CallInst *CI) {
}
// Adjust alignment for the scalar instruction.
- AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8);
+ AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
// Bitcast %addr fron i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
- Value *UndefVal = UndefValue::get(VecType);
-
// The result vector
- Value *VResult = UndefVal;
+ Value *VResult = Src0;
- if (isa<ConstantVector>(Mask)) {
+ if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
@@ -167,35 +171,21 @@ static void scalarizeMaskedLoad(CallInst *CI) {
VResult =
Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
}
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
}
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // %to_load = icmp eq i1 %mask_1, true
- // br i1 %to_load, label %cond.load, label %else
+ // br i1 %mask_1, label %cond.load, label %else
//
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
Value *Predicate =
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1));
// Create "cond" block
//
@@ -203,30 +193,34 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// %Elt = load i32* %EltAddr
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
- CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
+ "cond.load");
Builder.SetInsertPoint(InsertPt);
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock =
CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
OldBr->eraseFromParent();
- PrevIfBlock = IfBlock;
+ BasicBlock *PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
+
+ // Create the phi to join the new and previous value.
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
}
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
}
@@ -238,24 +232,22 @@ static void scalarizeMaskedLoad(CallInst *CI) {
//
// %1 = bitcast i8* %addr to i32*
// %2 = extractelement <16 x i1> %mask, i32 0
-// %3 = icmp eq i1 %2, true
-// br i1 %3, label %cond.store, label %else
+// br i1 %2, label %cond.store, label %else
//
// cond.store: ; preds = %0
-// %4 = extractelement <16 x i32> %val, i32 0
-// %5 = getelementptr i32* %1, i32 0
-// store i32 %4, i32* %5
+// %3 = extractelement <16 x i32> %val, i32 0
+// %4 = getelementptr i32* %1, i32 0
+// store i32 %3, i32* %4
// br label %else
//
// else: ; preds = %0, %cond.store
-// %6 = extractelement <16 x i1> %mask, i32 1
-// %7 = icmp eq i1 %6, true
-// br i1 %7, label %cond.store1, label %else2
+// %5 = extractelement <16 x i1> %mask, i32 1
+// br i1 %5, label %cond.store1, label %else2
//
// cond.store1: ; preds = %else
-// %8 = extractelement <16 x i32> %val, i32 1
-// %9 = getelementptr i32* %1, i32 1
-// store i32 %8, i32* %9
+// %6 = extractelement <16 x i32> %val, i32 1
+// %7 = getelementptr i32* %1, i32 1
+// store i32 %6, i32* %7
// br label %else2
// . . .
static void scalarizeMaskedStore(CallInst *CI) {
@@ -265,8 +257,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
Value *Mask = CI->getArgOperand(3);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = dyn_cast<VectorType>(Src->getType());
- assert(VecType && "Unexpected data type in masked store intrinsic");
+ VectorType *VecType = cast<VectorType>(Src->getType());
Type *EltTy = VecType->getElementType();
@@ -277,26 +268,23 @@ static void scalarizeMaskedStore(CallInst *CI) {
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
// Short-cut if the mask is all-true.
- bool IsAllOnesMask =
- isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
-
- if (IsAllOnesMask) {
+ if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Builder.CreateAlignedStore(Src, Ptr, AlignVal);
CI->eraseFromParent();
return;
}
// Adjust alignment for the scalar instruction.
- AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8);
+ AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
// Bitcast %addr fron i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
- if (isa<ConstantVector>(Mask)) {
+ if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
Value *Gep =
@@ -311,13 +299,10 @@ static void scalarizeMaskedStore(CallInst *CI) {
// Fill the "else" block, created in the previous iteration
//
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // %to_store = icmp eq i1 %mask_1, true
- // br i1 %to_store, label %cond.store, label %else
+ // br i1 %mask_1, label %cond.store, label %else
//
Value *Predicate =
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1));
// Create "cond" block
//
@@ -339,7 +324,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
OldBr->eraseFromParent();
IfBlock = NewIfBlock;
}
@@ -352,30 +337,28 @@ static void scalarizeMaskedStore(CallInst *CI) {
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
//
-// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> %Mask, i32 0
-// % ToLoad0 = icmp eq i1 % Mask0, true
-// br i1 % ToLoad0, label %cond.load, label %else
+// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// %Mask0 = extractelement <16 x i1> %Mask, i32 0
+// br i1 %Mask0, label %cond.load, label %else
//
// cond.load:
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// % Load0 = load i32, i32* % Ptr0, align 4
-// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// %Load0 = load i32, i32* %Ptr0, align 4
+// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
// br label %else
//
// else:
-// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
-// % Mask1 = extractelement <16 x i1> %Mask, i32 1
-// % ToLoad1 = icmp eq i1 % Mask1, true
-// br i1 % ToLoad1, label %cond.load1, label %else2
+// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
+// %Mask1 = extractelement <16 x i1> %Mask, i32 1
+// br i1 %Mask1, label %cond.load1, label %else2
//
// cond.load1:
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// % Load1 = load i32, i32* % Ptr1, align 4
-// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// %Load1 = load i32, i32* %Ptr1, align 4
+// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
// br label %else2
// . . .
-// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
static void scalarizeMaskedGather(CallInst *CI) {
Value *Ptrs = CI->getArgOperand(0);
@@ -383,32 +366,24 @@ static void scalarizeMaskedGather(CallInst *CI) {
Value *Mask = CI->getArgOperand(2);
Value *Src0 = CI->getArgOperand(3);
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
-
- assert(VecType && "Unexpected return type of masked load intrinsic");
+ VectorType *VecType = cast<VectorType>(CI->getType());
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
- BasicBlock *CondBlock = nullptr;
- BasicBlock *PrevIfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
- Value *UndefVal = UndefValue::get(VecType);
-
// The result vector
- Value *VResult = UndefVal;
+ Value *VResult = Src0;
unsigned VectorWidth = VecType->getNumElements();
// Shorten the way if the mask is a vector of constants.
- bool IsConstMask = isa<ConstantVector>(Mask);
-
- if (IsConstMask) {
+ if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
"Ptr" + Twine(Idx));
@@ -417,35 +392,20 @@ static void scalarizeMaskedGather(CallInst *CI) {
VResult = Builder.CreateInsertElement(
VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
}
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
}
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
- // %ToLoad1 = icmp eq i1 %Mask1, true
- // br i1 %ToLoad1, label %cond.load, label %else
+ // br i1 %Mask1, label %cond.load, label %else
//
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
"Mask" + Twine(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1),
- "ToLoad" + Twine(Idx));
// Create "cond" block
//
@@ -453,31 +413,33 @@ static void scalarizeMaskedGather(CallInst *CI) {
// %Elt = load i32* %EltAddr
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Builder.SetInsertPoint(InsertPt);
Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
"Ptr" + Twine(Idx));
LoadInst *Load =
Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
- "Res" + Twine(Idx));
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
OldBr->eraseFromParent();
- PrevIfBlock = IfBlock;
+ BasicBlock *PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
+
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
}
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
}
@@ -487,26 +449,24 @@ static void scalarizeMaskedGather(CallInst *CI) {
// to a chain of basic blocks, that stores element one-by-one if
// the appropriate mask bit is set.
//
-// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> % Mask, i32 0
-// % ToStore0 = icmp eq i1 % Mask0, true
-// br i1 %ToStore0, label %cond.store, label %else
+// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// %Mask0 = extractelement <16 x i1> %Mask, i32 0
+// br i1 %Mask0, label %cond.store, label %else
//
// cond.store:
-// % Elt0 = extractelement <16 x i32> %Src, i32 0
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// store i32 %Elt0, i32* % Ptr0, align 4
+// %Elt0 = extractelement <16 x i32> %Src, i32 0
+// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* %Ptr0, align 4
// br label %else
//
// else:
-// % Mask1 = extractelement <16 x i1> % Mask, i32 1
-// % ToStore1 = icmp eq i1 % Mask1, true
-// br i1 % ToStore1, label %cond.store1, label %else2
+// %Mask1 = extractelement <16 x i1> %Mask, i32 1
+// br i1 %Mask1, label %cond.store1, label %else2
//
// cond.store1:
-// % Elt1 = extractelement <16 x i32> %Src, i32 1
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// store i32 % Elt1, i32* % Ptr1, align 4
+// %Elt1 = extractelement <16 x i32> %Src, i32 1
+// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 %Elt1, i32* %Ptr1, align 4
// br label %else2
// . . .
static void scalarizeMaskedScatter(CallInst *CI) {
@@ -531,11 +491,9 @@ static void scalarizeMaskedScatter(CallInst *CI) {
unsigned VectorWidth = Src->getType()->getVectorNumElements();
// Shorten the way if the mask is a vector of constants.
- bool IsConstMask = isa<ConstantVector>(Mask);
-
- if (IsConstMask) {
+ if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
"Elt" + Twine(Idx));
@@ -546,24 +504,21 @@ static void scalarizeMaskedScatter(CallInst *CI) {
CI->eraseFromParent();
return;
}
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
- // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
- // % ToStore = icmp eq i1 % Mask1, true
- // br i1 % ToStore, label %cond.store, label %else
+ // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
+ // br i1 %Mask1, label %cond.store, label %else
//
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
"Mask" + Twine(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1),
- "ToStore" + Twine(Idx));
// Create "cond" block
//
- // % Elt1 = extractelement <16 x i32> %Src, i32 1
- // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
- // %store i32 % Elt1, i32* % Ptr1
+ // %Elt1 = extractelement <16 x i32> %Src, i32 1
+ // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+ // %store i32 %Elt1, i32* %Ptr1
//
BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
Builder.SetInsertPoint(InsertPt);
@@ -578,7 +533,7 @@ static void scalarizeMaskedScatter(CallInst *CI) {
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
OldBr->eraseFromParent();
IfBlock = NewIfBlock;
}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 46064012d9d8..6c135b3d69d6 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -68,39 +68,36 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
return &TII->get(Node->getMachineOpcode());
}
-LLVM_DUMP_METHOD
-raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+LLVM_DUMP_METHOD void SDep::dump(const TargetRegisterInfo *TRI) const {
switch (getKind()) {
- case Data: OS << "Data"; break;
- case Anti: OS << "Anti"; break;
- case Output: OS << "Out "; break;
- case Order: OS << "Ord "; break;
+ case Data: dbgs() << "Data"; break;
+ case Anti: dbgs() << "Anti"; break;
+ case Output: dbgs() << "Out "; break;
+ case Order: dbgs() << "Ord "; break;
}
switch (getKind()) {
case Data:
- OS << " Latency=" << getLatency();
+ dbgs() << " Latency=" << getLatency();
if (TRI && isAssignedRegDep())
- OS << " Reg=" << printReg(getReg(), TRI);
+ dbgs() << " Reg=" << printReg(getReg(), TRI);
break;
case Anti:
case Output:
- OS << " Latency=" << getLatency();
+ dbgs() << " Latency=" << getLatency();
break;
case Order:
- OS << " Latency=" << getLatency();
+ dbgs() << " Latency=" << getLatency();
switch(Contents.OrdKind) {
- case Barrier: OS << " Barrier"; break;
+ case Barrier: dbgs() << " Barrier"; break;
case MayAliasMem:
- case MustAliasMem: OS << " Memory"; break;
- case Artificial: OS << " Artificial"; break;
- case Weak: OS << " Weak"; break;
- case Cluster: OS << " Cluster"; break;
+ case MustAliasMem: dbgs() << " Memory"; break;
+ case Artificial: dbgs() << " Artificial"; break;
+ case Weak: dbgs() << " Weak"; break;
+ case Cluster: dbgs() << " Cluster"; break;
}
break;
}
-
- return OS;
}
bool SUnit::addPred(const SDep &D, bool Required) {
@@ -337,33 +334,7 @@ void SUnit::biasCriticalPath() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
-raw_ostream &SUnit::print(raw_ostream &OS,
- const SUnit *Entry, const SUnit *Exit) const {
- if (this == Entry)
- OS << "EntrySU";
- else if (this == Exit)
- OS << "ExitSU";
- else
- OS << "SU(" << NodeNum << ")";
- return OS;
-}
-
-LLVM_DUMP_METHOD
-raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const {
- return print(OS, &G->EntrySU, &G->ExitSU);
-}
-
-LLVM_DUMP_METHOD
-void SUnit::dump(const ScheduleDAG *G) const {
- print(dbgs(), G);
- dbgs() << ": ";
- G->dumpNode(this);
-}
-
-LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
- dump(G);
-
+LLVM_DUMP_METHOD void SUnit::dumpAttributes() const {
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
if (WeakPredsLeft)
@@ -374,21 +345,38 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << getDepth() << "\n";
dbgs() << " Height : " << getHeight() << "\n";
+}
+
+LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeName(const SUnit &SU) const {
+ if (&SU == &EntrySU)
+ dbgs() << "EntrySU";
+ else if (&SU == &ExitSU)
+ dbgs() << "ExitSU";
+ else
+ dbgs() << "SU(" << SU.NodeNum << ")";
+}
- if (Preds.size() != 0) {
+LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeAll(const SUnit &SU) const {
+ dumpNode(SU);
+ SU.dumpAttributes();
+ if (SU.Preds.size() > 0) {
dbgs() << " Predecessors:\n";
- for (const SDep &Dep : Preds) {
+ for (const SDep &Dep : SU.Preds) {
dbgs() << " ";
- Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
- Dep.print(dbgs(), G->TRI); dbgs() << '\n';
+ dumpNodeName(*Dep.getSUnit());
+ dbgs() << ": ";
+ Dep.dump(TRI);
+ dbgs() << '\n';
}
}
- if (Succs.size() != 0) {
+ if (SU.Succs.size() > 0) {
dbgs() << " Successors:\n";
- for (const SDep &Dep : Succs) {
+ for (const SDep &Dep : SU.Succs) {
dbgs() << " ";
- Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
- Dep.print(dbgs(), G->TRI); dbgs() << '\n';
+ dumpNodeName(*Dep.getSUnit());
+ dbgs() << ": ";
+ Dep.dump(TRI);
+ dbgs() << '\n';
}
}
}
@@ -406,7 +394,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
}
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnit.dump(this);
+ dumpNode(SUnit);
dbgs() << "has not been scheduled!\n";
AnyNotSched = true;
}
@@ -415,7 +403,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
unsigned(std::numeric_limits<int>::max())) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnit.dump(this);
+ dumpNode(SUnit);
dbgs() << "has an unexpected "
<< (isBottomUp ? "Height" : "Depth") << " value!\n";
AnyNotSched = true;
@@ -424,7 +412,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
if (SUnit.NumSuccsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnit.dump(this);
+ dumpNode(SUnit);
dbgs() << "has successors left!\n";
AnyNotSched = true;
}
@@ -432,7 +420,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
if (SUnit.NumPredsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnit.dump(this);
+ dumpNode(SUnit);
dbgs() << "has predecessors left!\n";
AnyNotSched = true;
}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index d1c5ddabb975..99406ed1496a 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -234,6 +234,11 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
// Ask the target if address-backscheduling is desirable, and if so how much.
const TargetSubtargetInfo &ST = MF.getSubtarget();
+ // Only use any non-zero latency for real defs/uses, in contrast to
+ // "fake" operands added by regalloc.
+ const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
+ bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
+ !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
@@ -257,11 +262,18 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
}
- Dep.setLatency(
- SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
- UseOp));
+ const MCInstrDesc *UseMIDesc =
+ (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
+ bool ImplicitPseudoUse =
+ (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
+ !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
+ if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
+ Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp));
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ } else
+ Dep.setLatency(0);
- ST.adjustSchedDependency(SU, UseSU, Dep);
UseSU->addPred(Dep);
}
}
@@ -996,7 +1008,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
for (auto &I : loads)
for (auto *SU : I.second)
NodeNums.push_back(SU->NodeNum);
- llvm::sort(NodeNums.begin(), NodeNums.end());
+ llvm::sort(NodeNums);
// The N last elements in NodeNums will be removed, and the SU with
// the lowest NodeNum of them will become the new BarrierChain to
@@ -1097,10 +1109,22 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
}
}
-void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
- // Cannot completely remove virtual function even in release mode.
+void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dumpNodeName(SU);
+ dbgs() << ": ";
+ SU.getInstr()->dump();
+#endif
+}
+
+void ScheduleDAGInstrs::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- SU->getInstr()->dump();
+ if (EntrySU.getInstr() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits)
+ dumpNodeAll(SU);
+ if (ExitSU.getInstr() != nullptr)
+ dumpNodeAll(ExitSU);
#endif
}
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b8bfe69a76e1..4301372179b8 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -157,8 +157,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (!freeUnits) {
LLVM_DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
- LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
- LLVM_DEBUG(DAG->dumpNode(SU));
+ LLVM_DEBUG(DAG->dumpNode(*SU));
return Hazard;
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a5c0b7750410..ff5505c97721 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
@@ -83,6 +84,7 @@ STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
STATISTIC(SlicedLoads, "Number of load sliced");
+STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
@@ -249,6 +251,11 @@ namespace {
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
+ // Scalars have size 0 to distinguish from singleton vectors.
+ SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
+ bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
+ bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
+
/// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
/// load.
///
@@ -257,8 +264,9 @@ namespace {
/// \param EltNo index of the vector element to load.
/// \param OriginalLoad load that EVE came from to be replaced.
/// \returns EVE on success SDValue() on failure.
- SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
- SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
+ SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+ SDValue EltNo,
+ LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -285,6 +293,8 @@ namespace {
SDValue visitADD(SDNode *N);
SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
+ SDValue visitADDSAT(SDNode *N);
+ SDValue visitSUBSAT(SDNode *N);
SDValue visitADDC(SDNode *N);
SDValue visitUADDO(SDNode *N);
SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
@@ -318,6 +328,7 @@ namespace {
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
+ SDValue visitFunnelShift(SDNode *N);
SDValue visitRotate(SDNode *N);
SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
@@ -350,6 +361,7 @@ namespace {
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitFPOW(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
@@ -364,6 +376,8 @@ namespace {
SDValue visitFFLOOR(SDNode *N);
SDValue visitFMINNUM(SDNode *N);
SDValue visitFMAXNUM(SDNode *N);
+ SDValue visitFMINIMUM(SDNode *N);
+ SDValue visitFMAXIMUM(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -393,7 +407,7 @@ namespace {
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1);
+ SDValue N1, SDNodeFlags Flags);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -401,11 +415,14 @@ namespace {
SDValue foldVSelectOfConstants(SDNode *N);
SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
- SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
+ SDValue convertSelectOfFPConstantsToLoadOffset(
+ const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
@@ -455,7 +472,6 @@ namespace {
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
- SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
@@ -482,6 +498,10 @@ namespace {
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
+ // Helper for findBetterNeighborChains. Walk up store chain add additional
+ // chained stores that do not overlap and can be parallelized.
+ bool parallelizeChainedStores(StoreSDNode *St);
+
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -515,7 +535,7 @@ namespace {
EVT &MemVT, unsigned ShAmt = 0);
/// Used by BackwardsPropagateMask to find suitable loads.
- bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
+ bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
SmallPtrSetImpl<SDNode*> &NodesWithConsts,
ConstantSDNode *Mask, SDNode *&NodeToMask);
/// Attempt to propagate a given AND node back to load leaves so that they
@@ -865,12 +885,6 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-static SDValue peekThroughBitcast(SDValue V) {
- while (V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
- return V;
-}
-
// Returns the SDNode if it is a constant float BuildVector
// or constant float.
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
@@ -901,50 +915,23 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
return true;
}
-// Determines if it is a constant null integer or a splatted vector of a
-// constant null integer (with no undefs).
-// Build vector implicit truncation is not an issue for null values.
-static bool isNullConstantOrNullSplatConstant(SDValue N) {
- // TODO: may want to use peekThroughBitcast() here.
- if (ConstantSDNode *Splat = isConstOrConstSplat(N))
- return Splat->isNullValue();
- return false;
-}
-
-// Determines if it is a constant integer of one or a splatted vector of a
-// constant integer of one (with no undefs).
-// Do not permit build vector implicit truncation.
-static bool isOneConstantOrOneSplatConstant(SDValue N) {
- // TODO: may want to use peekThroughBitcast() here.
- unsigned BitWidth = N.getScalarValueSizeInBits();
- if (ConstantSDNode *Splat = isConstOrConstSplat(N))
- return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
- return false;
-}
-
-// Determines if it is a constant integer of all ones or a splatted vector of a
-// constant integer of all ones (with no undefs).
-// Do not permit build vector implicit truncation.
-static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
- N = peekThroughBitcast(N);
- unsigned BitWidth = N.getScalarValueSizeInBits();
- if (ConstantSDNode *Splat = isConstOrConstSplat(N))
- return Splat->isAllOnesValue() &&
- Splat->getAPIntValue().getBitWidth() == BitWidth;
- return false;
-}
-
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
// undef's.
-static bool isAnyConstantBuildVector(const SDNode *N) {
- return ISD::isBuildVectorOfConstantSDNodes(N) ||
- ISD::isBuildVectorOfConstantFPSDNodes(N);
+static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ return isConstantOrConstantVector(V, NoOpaques) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1) {
+ SDValue N1, SDNodeFlags Flags) {
+ // Don't reassociate reductions.
+ if (Flags.hasVectorReduction())
+ return SDValue();
+
EVT VT = N0.getValueType();
- if (N0.getOpcode() == Opc) {
+ if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
@@ -964,7 +951,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
}
}
- if (N1.getOpcode() == Opc) {
+ if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
@@ -1501,6 +1488,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
+ case ISD::SADDSAT:
+ case ISD::UADDSAT: return visitADDSAT(N);
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: return visitSUBSAT(N);
case ISD::ADDC: return visitADDC(N);
case ISD::UADDO: return visitUADDO(N);
case ISD::SUBC: return visitSUBC(N);
@@ -1532,6 +1523,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
+ case ISD::FSHL:
+ case ISD::FSHR: return visitFunnelShift(N);
case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
@@ -1564,6 +1557,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::FPOW: return visitFPOW(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
@@ -1576,6 +1570,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FFLOOR: return visitFFLOOR(N);
case ISD::FMINNUM: return visitFMINNUM(N);
case ISD::FMAXNUM: return visitFMAXNUM(N);
+ case ISD::FMINIMUM: return visitFMINIMUM(N);
+ case ISD::FMAXIMUM: return visitFMAXIMUM(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
@@ -1855,8 +1851,11 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
// can be tried again once they have new operands.
AddUsersToWorklist(N);
do {
+ // Do as a single replacement to avoid rewalking use lists.
+ SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
+ Ops.push_back(N->getOperand(i));
+ DAG.ReplaceAllUsesWith(N, Ops.data());
} while (!N->use_empty());
deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -1870,17 +1869,7 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
}
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
- auto BinOpcode = BO->getOpcode();
- assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
- BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
- BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
- BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
- BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
- BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
- BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
- BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
- BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
- "Unexpected binary operator");
+ assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
@@ -1910,11 +1899,11 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
// propagate non constant operands into select. I.e.:
// and (select Cond, 0, -1), X --> select Cond, 0, X
// or X, (select Cond, -1, 0) --> select Cond, -1, X
- bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
- (isNullConstantOrNullSplatConstant(CT) ||
- isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
- (isNullConstantOrNullSplatConstant(CF) ||
- isAllOnesConstantOrAllOnesSplatConstant(CF));
+ auto BinOpcode = BO->getOpcode();
+ bool CanFoldNonConst =
+ (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
+ (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
+ (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!CanFoldNonConst &&
@@ -2009,10 +1998,8 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return SDValue();
// The shift must be of a 'not' value.
- // TODO: Use isBitwiseNot() if it works with vectors.
SDValue Not = ShiftOp.getOperand(0);
- if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
- !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
+ if (!Not.hasOneUse() || !isBitwiseNot(Not))
return SDValue();
// The shift must be moving the sign bit to the least-significant-bit.
@@ -2085,7 +2072,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// add (zext i1 X), -1 -> sext (not i1 X)
// because most (?) targets generate better code for the zext form.
if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
- isOneConstantOrOneSplatConstant(N1)) {
+ isOneOrOneSplat(N1)) {
SDValue X = N0.getOperand(0);
if ((!LegalOperations ||
(TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
@@ -2110,17 +2097,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return NewSel;
// reassociate add
- if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
+ if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
return RADD;
// fold ((0-A) + B) -> B-A
- if (N0.getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N0.getOperand(0)))
+ if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
// fold (A + (0-B)) -> A-B
- if (N1.getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+ if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
// fold (A+(B-A)) -> B
@@ -2178,7 +2163,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
// fold (add (xor a, -1), 1) -> (sub 0, a)
- if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
N0.getOperand(0));
@@ -2191,6 +2176,49 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitADDSAT(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ // TODO SimplifyVBinOp
+
+ // fold (add_sat x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ }
+
+ // fold (add_sat x, undef) -> -1
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ // canonicalize constant to RHS
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+ // fold (add_sat c1, c2) -> c3
+ return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
+ N1.getNode());
+ }
+
+ // fold (add_sat x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ // If it cannot overflow, transform into an add.
+ if (Opcode == ISD::UADDSAT)
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+
+ return SDValue();
+}
+
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
bool Masked = false;
@@ -2235,7 +2263,7 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
+ isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N0,
DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
@@ -2248,8 +2276,7 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits &&
- isOneConstantOrOneSplatConstant(N1->getOperand(1)))
+ if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
}
@@ -2380,7 +2407,7 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
- if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
DAG.getConstant(0, DL, VT),
N0.getOperand(0));
@@ -2539,8 +2566,7 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
- SelectionDAG &DAG, bool LegalOperations,
- bool LegalTypes) {
+ SelectionDAG &DAG, bool LegalOperations) {
if (!VT.isVector())
return DAG.getConstant(0, DL, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
@@ -2567,7 +2593,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// fold (sub c1, c2) -> c1-c2
@@ -2586,7 +2612,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
}
- if (isNullConstantOrNullSplatConstant(N0)) {
+ if (isNullOrNullSplat(N0)) {
unsigned BitWidth = VT.getScalarSizeInBits();
// Right-shifting everything out but the sign bit followed by negation is
// the same as flipping arithmetic/logical shift type without the negation:
@@ -2617,12 +2643,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
- if (isAllOnesConstantOrAllOnesSplatConstant(N0))
+ if (isAllOnesOrAllOnesSplat(N0))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
// fold (A - (0-B)) -> A+B
- if (N1.getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+ if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
// fold A-(A-B) -> B
@@ -2676,14 +2701,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
+ isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1));
return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
}
if (N1.getOperand(1).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
+ isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
N1.getOperand(0),
N1.getOperand(1).getOperand(1));
@@ -2756,6 +2781,43 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ // TODO SimplifyVBinOp
+
+ // fold (sub_sat x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (sub_sat x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (sub_sat x, x) -> 0
+ if (N0 == N1)
+ return DAG.getConstant(0, DL, VT);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // fold (sub_sat c1, c2) -> c3
+ return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
+ N1.getNode());
+ }
+
+ // fold (sub_sat x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2931,6 +2993,39 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
+ // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+ // mul x, (2^N + 1) --> add (shl x, N), x
+ // mul x, (2^N - 1) --> sub (shl x, N), x
+ // Examples: x * 33 --> (x << 5) + x
+ // x * 15 --> (x << 4) - x
+ // x * -33 --> -((x << 5) + x)
+ // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+ if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+ // TODO: We could handle more general decomposition of any constant by
+ // having the target set a limit on number of ops and making a
+ // callback to determine that sequence (similar to sqrt expansion).
+ unsigned MathOp = ISD::DELETED_NODE;
+ APInt MulC = ConstValue1.abs();
+ if ((MulC - 1).isPowerOf2())
+ MathOp = ISD::ADD;
+ else if ((MulC + 1).isPowerOf2())
+ MathOp = ISD::SUB;
+
+ if (MathOp != ISD::DELETED_NODE) {
+ unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
+ : (MulC + 1).logBase2();
+ assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
+ "Not expecting multiply-by-constant that could have simplified");
+ SDLoc DL(N);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(ShAmt, DL, VT));
+ SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
+ if (ConstValue1.isNegative())
+ R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
+ return R;
+ }
+ }
+
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
if (N0.getOpcode() == ISD::SHL &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
@@ -2974,7 +3069,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getOperand(1), N1));
// reassociate mul
- if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
+ if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
return SDValue();
@@ -3076,7 +3171,16 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- if (DAG.isUndef(N->getOpcode(), {N0, N1}))
+ unsigned Opc = N->getOpcode();
+ bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // X / undef -> undef
+ // X % undef -> undef
+ // X / 0 -> undef
+ // X % 0 -> undef
+ // NOTE: This includes vectors where any divisor element is zero/undef.
+ if (DAG.isUndef(Opc, {N0, N1}))
return DAG.getUNDEF(VT);
// undef / X -> 0
@@ -3084,6 +3188,26 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
+ // 0 / X -> 0
+ // 0 % X -> 0
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ if (N0C && N0C->isNullValue())
+ return N0;
+
+ // X / X -> 1
+ // X % X -> 0
+ if (N0 == N1)
+ return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
+
+ // X / 1 -> X
+ // X % 1 -> 0
+ // If this is a boolean op (single-bit element type), we can't have
+ // division-by-zero or remainder-by-zero, so assume the divisor is 1.
+ // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
+ // it's a 1.
+ if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+ return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
+
return SDValue();
}
@@ -3105,9 +3229,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
- // fold (sdiv X, 1) -> X
- if (N1C && N1C->isOne())
- return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
@@ -3128,8 +3249,19 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
- if (SDValue V = visitSDIVLike(N0, N1, N))
+ if (SDValue V = visitSDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
+ { N0, N1 })) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
return V;
+ }
// sdiv, srem -> sdivrem
// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
@@ -3148,8 +3280,6 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT CCVT = getSetCCResultType(VT);
unsigned BitWidth = VT.getScalarSizeInBits();
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
@@ -3166,8 +3296,7 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// FIXME: We check for the exact bit here because the generic lowering gives
// better results in that case. The target-specific lowering should learn how
// to handle exact sdivs efficiently.
- if (!N->getFlags().hasExact() &&
- ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
+ if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
@@ -3218,7 +3347,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -3245,9 +3375,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
- // fold (udiv X, 1) -> X
- if (N1C && N1C->isOne())
- return N0;
// fold (udiv X, -1) -> select(X == -1, 1, 0)
if (N1C && N1C->getAPIntValue().isAllOnesValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -3260,8 +3387,19 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (SDValue V = visitUDIVLike(N0, N1, N))
+ if (SDValue V = visitUDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
+ { N0, N1 })) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
return V;
+ }
// sdiv, srem -> sdivrem
// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
@@ -3278,8 +3416,6 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -3311,7 +3447,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -3380,8 +3517,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
SDValue OptimizedDiv =
isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
- if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
- OptimizedDiv.getOpcode() != ISD::SDIVREM) {
+ if (OptimizedDiv.getNode()) {
+ // If the equivalent Div node also exists, update its users.
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
+ { N0, N1 }))
+ CombineTo(DivNode, OptimizedDiv);
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(OptimizedDiv.getNode());
@@ -3468,6 +3609,19 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
+ // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
+ SDLoc DL(N);
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ SDValue SRLAmt = DAG.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
+
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
@@ -3495,18 +3649,16 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp) {
// If the high half is not needed, just compute the low half.
bool HiExists = N->hasAnyUseOfValue(1);
- if (!HiExists &&
- (!LegalOperations ||
- TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
+ if (!HiExists && (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
return CombineTo(N, Res, Res);
}
// If the low half is not needed, just compute the high half.
bool LoExists = N->hasAnyUseOfValue(0);
- if (!LoExists &&
- (!LegalOperations ||
- TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ if (!LoExists && (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
return CombineTo(N, Res, Res);
}
@@ -3522,7 +3674,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
SDValue LoOpt = combine(Lo.getNode());
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
(!LegalOperations ||
- TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
return CombineTo(N, LoOpt, LoOpt);
}
@@ -3532,7 +3684,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
SDValue HiOpt = combine(Hi.getNode());
if (HiOpt.getNode() && HiOpt != Hi &&
(!LegalOperations ||
- TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
return CombineTo(N, HiOpt, HiOpt);
}
@@ -3664,59 +3816,94 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return SDValue();
}
-/// If this is a binary operator with two operands of the same opcode, try to
-/// simplify it.
-SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+/// If this is a bitwise logic instruction and both operands have the same
+/// opcode, try to sink the other opcode after the logic instruction.
+SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+ unsigned LogicOpcode = N->getOpcode();
+ unsigned HandOpcode = N0.getOpcode();
+ assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+ LogicOpcode == ISD::XOR) && "Expected logic opcode");
+ assert(HandOpcode == N1.getOpcode() && "Bad input!");
// Bail early if none of these transforms apply.
- if (N0.getNumOperands() == 0) return SDValue();
-
- // For each of OP in AND/OR/XOR:
- // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
- // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
- // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
- // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
- // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
- //
- // do not sink logical op inside of a vector extend, since it may combine
- // into a vsetcc.
- EVT Op0VT = N0.getOperand(0).getValueType();
- if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND ||
- N0.getOpcode() == ISD::BSWAP ||
- // Avoid infinite looping with PromoteIntBinOp.
- (N0.getOpcode() == ISD::ANY_EXTEND &&
- (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
- (N0.getOpcode() == ISD::TRUNCATE &&
- (!TLI.isZExtFree(VT, Op0VT) ||
- !TLI.isTruncateFree(Op0VT, VT)) &&
- TLI.isTypeLegal(Op0VT))) &&
- !VT.isVector() &&
- Op0VT == N1.getOperand(0).getValueType() &&
- (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
- SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
- N0.getOperand(0).getValueType(),
- N0.getOperand(0), N1.getOperand(0));
- AddToWorklist(ORNode.getNode());
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
- }
-
- // For each of OP in SHL/SRL/SRA/AND...
- // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
- // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
- // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
- if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
- N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ if (N0.getNumOperands() == 0)
+ return SDValue();
+
+ // FIXME: We should check number of uses of the operands to not increase
+ // the instruction count for all transforms.
+
+ // Handle size-changing casts.
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N1.getOperand(0);
+ EVT XVT = X.getValueType();
+ SDLoc DL(N);
+ if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
+ HandOpcode == ISD::SIGN_EXTEND) {
+ // If both operands have other uses, this transform would create extra
+ // instructions without eliminating anything.
+ if (!N0.hasOneUse() && !N1.hasOneUse())
+ return SDValue();
+ // We need matching integer source types.
+ if (XVT != Y.getValueType())
+ return SDValue();
+ // Don't create an illegal op during or after legalization. Don't ever
+ // create an unsupported vector op.
+ if ((VT.isVector() || LegalOperations) &&
+ !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
+ return SDValue();
+ // Avoid infinite looping with PromoteIntBinOp.
+ // TODO: Should we apply desirable/legal constraints to all opcodes?
+ if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
+ !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+ return SDValue();
+ // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
+ if (HandOpcode == ISD::TRUNCATE) {
+ // If both operands have other uses, this transform would create extra
+ // instructions without eliminating anything.
+ if (!N0.hasOneUse() && !N1.hasOneUse())
+ return SDValue();
+ // We need matching source types.
+ if (XVT != Y.getValueType())
+ return SDValue();
+ // Don't create an illegal op during or after legalization.
+ if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
+ return SDValue();
+ // Be extra careful sinking truncate. If it's free, there's no benefit in
+ // widening a binop. Also, don't create a logic op on an illegal type.
+ if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
+ return SDValue();
+ if (!TLI.isTypeLegal(XVT))
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // For binops SHL/SRL/SRA/AND:
+ // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
+ if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
+ HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
N0.getOperand(1) == N1.getOperand(1)) {
- SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
- N0.getOperand(0).getValueType(),
- N0.getOperand(0), N1.getOperand(0));
- AddToWorklist(ORNode.getNode());
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
- ORNode, N0.getOperand(1));
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
+ }
+
+ // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
+ if (HandOpcode == ISD::BSWAP) {
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
@@ -3726,21 +3913,12 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// we don't want to undo this promotion.
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
- if ((N0.getOpcode() == ISD::BITCAST ||
- N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+ if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
Level <= AfterLegalizeTypes) {
- SDValue In0 = N0.getOperand(0);
- SDValue In1 = N1.getOperand(0);
- EVT In0Ty = In0.getValueType();
- EVT In1Ty = In1.getValueType();
- SDLoc DL(N);
- // If both incoming values are integers, and the original types are the
- // same.
- if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
- SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
- SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
- AddToWorklist(Op.getNode());
- return BC;
+ // Input types must be integer and the same.
+ if (XVT.isInteger() && XVT == Y.getValueType()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
}
}
@@ -3756,61 +3934,44 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// If both shuffles use the same mask, and both shuffles have the same first
// or second operand, then it might still be profitable to move the shuffle
// after the xor/and/or operation.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
- ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
- ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
-
- assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+ auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
+ assert(X.getValueType() == Y.getValueType() &&
"Inputs to shuffles are not the same type");
// Check that both shuffles use the same mask. The masks are known to be of
// the same length because the result vector type is the same.
// Check also that shuffles have only one use to avoid introducing extra
// instructions.
- if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
- SVN0->getMask().equals(SVN1->getMask())) {
- SDValue ShOp = N0->getOperand(1);
-
- // Don't try to fold this node if it requires introducing a
- // build vector of all zeros that might be illegal at this stage.
- if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
- if (!LegalTypes)
- ShOp = DAG.getConstant(0, SDLoc(N), VT);
- else
- ShOp = SDValue();
- }
+ if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
+ !SVN0->getMask().equals(SVN1->getMask()))
+ return SDValue();
- // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
- // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C)
- // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
- if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
- SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- N0->getOperand(0), N1->getOperand(0));
- AddToWorklist(NewNode.getNode());
- return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
- SVN0->getMask());
- }
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ SDValue ShOp = N0.getOperand(1);
+ if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+ ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
- // Don't try to fold this node if it requires introducing a
- // build vector of all zeros that might be illegal at this stage.
- ShOp = N0->getOperand(0);
- if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
- if (!LegalTypes)
- ShOp = DAG.getConstant(0, SDLoc(N), VT);
- else
- ShOp = SDValue();
- }
+ // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
+ if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
+ }
- // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
- // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B))
- // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
- if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
- SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- N0->getOperand(1), N1->getOperand(1));
- AddToWorklist(NewNode.getNode());
- return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
- SVN0->getMask());
- }
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ ShOp = N0.getOperand(0);
+ if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+ ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
+ if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
+ N1.getOperand(1));
+ return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
}
}
@@ -3846,8 +4007,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
bool IsInteger = OpVT.isInteger();
if (LR == RR && CC0 == CC1 && IsInteger) {
- bool IsZero = isNullConstantOrNullSplatConstant(LR);
- bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
+ bool IsZero = isNullOrNullSplat(LR);
+ bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
// All bits clear?
bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
@@ -4149,7 +4310,7 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
}
bool DAGCombiner::SearchForAndLoads(SDNode *N,
- SmallPtrSetImpl<LoadSDNode*> &Loads,
+ SmallVectorImpl<LoadSDNode*> &Loads,
SmallPtrSetImpl<SDNode*> &NodesWithConsts,
ConstantSDNode *Mask,
SDNode *&NodeToMask) {
@@ -4186,7 +4347,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
// Use LE to convert equal sized loads to zext.
if (ExtVT.bitsLE(Load->getMemoryVT()))
- Loads.insert(Load);
+ Loads.push_back(Load);
continue;
}
@@ -4251,7 +4412,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
if (isa<LoadSDNode>(N->getOperand(0)))
return false;
- SmallPtrSet<LoadSDNode*, 8> Loads;
+ SmallVector<LoadSDNode*, 8> Loads;
SmallPtrSet<SDNode*, 2> NodesWithConsts;
SDNode *FixupNode = nullptr;
if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
@@ -4399,7 +4560,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
// fold (and x, -1) -> x
if (isAllOnesConstant(N1))
@@ -4414,7 +4575,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return NewSel;
// reassociate and
- if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
+ if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
// Try to convert a constant mask AND into a shuffle clear mask.
@@ -4563,9 +4724,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Res = ReduceLoadWidth(N)) {
LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
-
AddToWorklist(N);
- CombineTo(LN0, Res, Res.getValue(1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
return SDValue(N, 0);
}
}
@@ -4585,8 +4745,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
if (N0.getOpcode() == N1.getOpcode())
- if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
- return Tmp;
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
// Masking the negated extension of a boolean is just the zero-extended
// boolean:
@@ -4596,7 +4756,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Note: the SimplifyDemandedBits fold below can make an information-losing
// transform, and then we have no way to find this better fold.
if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
- if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
+ if (isNullOrNullSplat(N0.getOperand(0))) {
SDValue SubRHS = N0.getOperand(1);
if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
@@ -5124,16 +5284,16 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return BSwap;
// reassociate or
- if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
+ if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
- // iff (c1 & c2) != 0.
- auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
- return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
+ // iff (c1 & c2) != 0 or c1/c2 are undef.
+ auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
+ return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
};
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
- ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
if (SDValue COR = DAG.FoldConstantArithmetic(
ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
@@ -5144,8 +5304,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
- if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
- return Tmp;
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -5257,9 +5417,9 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// Compute the shift amount we need to extract to complete the rotate.
const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
- APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
- if (NeededShiftAmt.isNegative())
+ if (OppShiftCst->getAPIntValue().ugt(VTWidth))
return SDValue();
+ APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
// Normalize the bitwidth of the two mul/udiv/shift constant operands.
APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
APInt OppLHSAmt = OppLHSCst->getAPIntValue();
@@ -5340,8 +5500,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
unsigned MaskLoBits = 0;
if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
- KnownBits Known;
- DAG.computeKnownBits(Neg.getOperand(0), Known);
+ KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
unsigned Bits = Log2_64(EltSize);
if (NegC->getAPIntValue().getActiveBits() <= Bits &&
((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
@@ -5363,8 +5522,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// Pos'. The truncation is redundant for the purpose of the equality.
if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
- KnownBits Known;
- DAG.computeKnownBits(Pos.getOperand(0), Known);
+ KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
MaskLoBits))
@@ -5894,7 +6052,7 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
assert(N->getOpcode() == ISD::XOR);
// Don't touch 'not' (i.e. where y = -1).
- if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
+ if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
return SDValue();
EVT VT = N->getValueType(0);
@@ -5911,7 +6069,7 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
SDValue Xor0 = Xor.getOperand(0);
SDValue Xor1 = Xor.getOperand(1);
// Don't touch 'not' (i.e. where y = -1).
- if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
+ if (isAllOnesOrAllOnesSplat(Xor1))
return false;
if (Other == Xor0)
std::swap(Xor0, Xor1);
@@ -5977,8 +6135,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// fold (xor x, undef) -> undef
if (N0.isUndef())
return N0;
@@ -5988,11 +6147,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -6001,19 +6160,18 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return NewSel;
// reassociate xor
- if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
+ if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
// fold !(x cc y) -> (x !cc y)
+ unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
- bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
- isInt);
-
+ LHS.getValueType().isInteger());
if (!LegalOperations ||
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
- switch (N0.getOpcode()) {
+ switch (N0Opcode) {
default:
llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
@@ -6026,54 +6184,74 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
- if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
- N0.getNode()->hasOneUse() &&
+ if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
SDValue V = N0.getOperand(0);
- SDLoc DL(N0);
- V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
- DAG.getConstant(1, DL, V.getValueType()));
+ SDLoc DL0(N0);
+ V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
+ DAG.getConstant(1, DL0, V.getValueType()));
AddToWorklist(V.getNode());
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
- (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
- unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
+ return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
if (isAllOnesConstant(N1) && N0.hasOneUse() &&
- (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
- unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
+ return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
}
}
// fold (xor (and x, y), y) -> (and (not x), y)
- if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
- N0->getOperand(1) == N1) {
- SDValue X = N0->getOperand(0);
+ if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
+ SDValue X = N0.getOperand(0);
SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
AddToWorklist(NotX.getNode());
- return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
+ return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
+ }
+
+ if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
+ ConstantSDNode *XorC = isConstOrConstSplat(N1);
+ ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ if (XorC && ShiftC) {
+ // Don't crash on an oversized shift. We can not guarantee that a bogus
+ // shift has been simplified to undef.
+ uint64_t ShiftAmt = ShiftC->getLimitedValue();
+ if (ShiftAmt < BitWidth) {
+ APInt Ones = APInt::getAllOnesValue(BitWidth);
+ Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
+ if (XorC->getAPIntValue() == Ones) {
+ // If the xor constant is a shifted -1, do a 'not' before the shift:
+ // xor (X << ShiftC), XorC --> (not X) << ShiftC
+ // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
+ SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
+ return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
+ }
+ }
+ }
}
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
- SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
- SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
+ SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
+ SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
SDValue S0 = S.getOperand(0);
@@ -6081,14 +6259,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
unsigned OpSizeInBits = VT.getScalarSizeInBits();
if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
if (C->getAPIntValue() == (OpSizeInBits - 1))
- return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ return DAG.getNode(ISD::ABS, DL, VT, S0);
}
}
}
// fold (xor x, x) -> 0
if (N0 == N1)
- return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// fold (xor (shl 1, x), -1) -> (rotl ~1, x)
// Here is a concrete example of this equivalence:
@@ -6108,17 +6286,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// consistent result.
// - Pushing the zero left requires shifting one bits in from the right.
// A rotate left of ~1 is a nice way of achieving the desired result.
- if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
- && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
- SDLoc DL(N);
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
+ isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
N0.getOperand(1));
}
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
- if (N0.getOpcode() == N1.getOpcode())
- if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
- return Tmp;
+ if (N0Opcode == N1.getOpcode())
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
// Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
if (SDValue MM = unfoldMaskedMerge(N))
@@ -6134,6 +6311,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+ // Do not turn a 'not' into a regular xor.
+ if (isBitwiseNot(N->getOperand(0)))
+ return SDValue();
+
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
@@ -6191,7 +6372,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
return SDValue();
}
- if (!TLI.isDesirableToCommuteWithShift(LHS))
+ if (!TLI.isDesirableToCommuteWithShift(N, Level))
return SDValue();
// Fold the constants, shifting the binop RHS by the shift amount.
@@ -6239,9 +6420,16 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
unsigned Bitsize = VT.getScalarSizeInBits();
// fold (rot x, 0) -> x
- if (isNullConstantOrNullSplatConstant(N1))
+ if (isNullOrNullSplat(N1))
return N0;
+ // fold (rot x, c) -> x iff (c % BitSize) == 0
+ if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
+ APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
+ if (DAG.MaskedValueIsZero(N1, ModuloMask))
+ return N0;
+ }
+
// fold (rot x, c) -> (rot x, c % BitSize)
if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
if (Cst->getAPIntValue().uge(Bitsize)) {
@@ -6284,6 +6472,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -6318,22 +6509,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
- // fold (shl 0, x) -> 0
- if (isNullConstantOrNullSplatConstant(N0))
- return N0;
- // fold (shl x, c >= size(x)) -> undef
- // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
- auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
- return Val->getAPIntValue().uge(OpSizeInBits);
- };
- if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
- return DAG.getUNDEF(VT);
- // fold (shl x, 0) -> x
- if (N1C && N1C->isNullValue())
- return N0;
- // fold (shl undef, x) -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -6454,7 +6629,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+ TLI.shouldFoldShiftPairToMask(N, Level)) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t c1 = N0C1->getZExtValue();
if (c1 < OpSizeInBits) {
@@ -6495,7 +6671,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
N0.getNode()->hasOneUse() &&
isConstantOrConstantVector(N1, /* No Opaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
AddToWorklist(Shl0.getNode());
@@ -6522,6 +6699,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -6542,16 +6722,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
- // fold (sra x, c >= size(x)) -> undef
- // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
- auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
- return Val->getAPIntValue().uge(OpSizeInBits);
- };
- if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
- return DAG.getUNDEF(VT);
- // fold (sra x, 0) -> x
- if (N1C && N1C->isNullValue())
- return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -6571,31 +6741,30 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ // clamp (add c1, c2) to max shift.
if (N0.getOpcode() == ISD::SRA) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
+ EVT ShiftSVT = ShiftVT.getScalarType();
+ SmallVector<SDValue, 16> ShiftValues;
- auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
- ConstantSDNode *RHS) {
+ auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
- return (c1 + c2).uge(OpSizeInBits);
- };
- if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
- return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
- DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
-
- auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
- ConstantSDNode *RHS) {
- APInt c1 = LHS->getAPIntValue();
- APInt c2 = RHS->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
- return (c1 + c2).ult(OpSizeInBits);
+ APInt Sum = c1 + c2;
+ unsigned ShiftSum =
+ Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
+ ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
+ return true;
};
- if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
- SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
- return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
+ SDValue ShiftValue;
+ if (VT.isVector())
+ ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
+ else
+ ShiftValue = ShiftValues[0];
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
}
}
@@ -6689,6 +6858,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -6703,19 +6875,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
- // fold (srl 0, x) -> 0
- if (isNullConstantOrNullSplatConstant(N0))
- return N0;
- // fold (srl x, c >= size(x)) -> undef
- // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
- auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
- return Val->getAPIntValue().uge(OpSizeInBits);
- };
- if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
- return DAG.getUNDEF(VT);
- // fold (srl x, 0) -> x
- if (N1C && N1C->isNullValue())
- return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -6819,8 +6978,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
- KnownBits Known;
- DAG.computeKnownBits(N0.getOperand(0), Known);
+ KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
@@ -6906,6 +7064,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ bool IsFSHL = N->getOpcode() == ISD::FSHL;
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // fold (fshl N0, N1, 0) -> N0
+ // fold (fshr N0, N1, 0) -> N1
+ if (isPowerOf2_32(BitWidth))
+ if (DAG.MaskedValueIsZero(
+ N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
+ return IsFSHL ? N0 : N1;
+
+ // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+ if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+ if (Cst->getAPIntValue().uge(BitWidth)) {
+ uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
+ DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+ }
+ }
+
+ // fold (fshl N0, N0, N2) -> (rotl N0, N2)
+ // fold (fshr N0, N0, N2) -> (rotr N0, N2)
+ // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
+ // is legal as well we might be better off avoiding non-constant (BW - N2).
+ unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (N0 == N1 && hasOperation(RotOpc, VT))
+ return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7012,6 +7205,16 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
+// FIXME: This should be checking for no signed zeros on individual operands, as
+// well as no nans.
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
+ const TargetOptions &Options = DAG.getTarget().Options;
+ EVT VT = LHS.getValueType();
+
+ return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
+}
+
/// Generate Min/Max node
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
@@ -7020,6 +7223,7 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
+ EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
switch (CC) {
case ISD::SETOLT:
case ISD::SETOLE:
@@ -7027,8 +7231,15 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
case ISD::SETLE:
case ISD::SETULT:
case ISD::SETULE: {
+ // Since it's known never nan to get here already, either fminnum or
+ // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
+ // expanded in terms of it.
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
- if (TLI.isOperationLegal(Opcode, VT))
+ if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
return SDValue();
}
@@ -7038,8 +7249,12 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
case ISD::SETGE:
case ISD::SETUGT:
case ISD::SETUGE: {
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
- if (TLI.isOperationLegal(Opcode, VT))
+ if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
return SDValue();
}
@@ -7150,15 +7365,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
EVT VT0 = N0.getValueType();
SDLoc DL(N);
- // fold (select C, X, X) -> X
- if (N1 == N2)
- return N1;
-
- if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
- // fold (select true, X, Y) -> X
- // fold (select false, X, Y) -> Y
- return !N0C->isNullValue() ? N1 : N2;
- }
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
// fold (select X, X, Y) -> (or X, Y)
// fold (select X, 1, Y) -> (or C, Y)
@@ -7264,32 +7472,54 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
}
- // fold selects based on a setcc into other things, such as min/max/abs
+ // Fold selects based on a setcc into other things, such as min/max/abs.
if (N0.getOpcode() == ISD::SETCC) {
- // select x, y (fcmp lt x, y) -> fminnum x, y
- // select x, y (fcmp gt x, y) -> fmaxnum x, y
- //
- // This is OK if we don't care about what happens if either operand is a
- // NaN.
- //
-
- // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
- // no signed zeros as well as no nans.
- const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
- DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- if (SDValue FMinMax = combineMinNumMaxNum(
- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ // select (fcmp lt x, y), x, y -> fminnum x, y
+ // select (fcmp gt x, y), x, y -> fmaxnum x, y
+ //
+ // This is OK if we don't care what happens if either operand is a NaN.
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
+ if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
+ CC, TLI, DAG))
return FMinMax;
+
+ // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
+ // This is conservatively limited to pre-legal-operations to give targets
+ // a chance to reverse the transform if they want to do that. Also, it is
+ // unlikely that the pattern would be formed late, so it's probably not
+ // worth going through the other checks.
+ if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
+ CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
+ N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
+ auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
+ auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
+ if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
+ // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
+ // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
+ //
+ // The IR equivalent of this transform would have this form:
+ // %a = add %x, C
+ // %c = icmp ugt %x, ~C
+ // %r = select %c, -1, %a
+ // =>
+ // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
+ // %u0 = extractvalue %u, 0
+ // %u1 = extractvalue %u, 1
+ // %r = select %u1, -1, %u0
+ SDVTList VTs = DAG.getVTList(VT, VT0);
+ SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
+ return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
+ }
}
- if ((!LegalOperations &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
- TLI.isOperationLegal(ISD::SELECT_CC, VT))
- return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
- N0.getOperand(1), N1, N2, N0.getOperand(2));
+ if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
+ (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
+ N0.getOperand(2));
+
return SimplifySelect(DL, N0, N1, N2);
}
@@ -7388,7 +7618,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
TargetLowering::TypeSplitVector)
return SDValue();
- SDValue MaskLo, MaskHi, Lo, Hi;
+ SDValue MaskLo, MaskHi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
EVT LoVT, HiVT;
@@ -7416,17 +7646,15 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
Alignment, MSC->getAAInfo(), MSC->getRanges());
SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
- Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
- DL, OpsLo, MMO);
+ SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ DataLo.getValueType(), DL, OpsLo, MMO);
- SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
- Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ // The order of the Scatter operation after split is well defined. The "Hi"
+ // part comes after the "Lo". So these two operations should be chained one
+ // after another.
+ SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+ DL, OpsHi, MMO);
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
@@ -7525,9 +7753,9 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- SDValue Src0 = MGT->getValue();
- SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+ SDValue PassThru = MGT->getPassThru();
+ SDValue PassThruLo, PassThruHi;
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
@@ -7550,11 +7778,11 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
+ SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
MMO);
- SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
+ SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
MMO);
@@ -7599,9 +7827,9 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- SDValue Src0 = MLD->getSrc0();
- SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+ SDValue PassThru = MLD->getPassThru();
+ SDValue PassThruLo, PassThruHi;
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
@@ -7625,8 +7853,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
- Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
- ISD::NON_EXTLOAD, MLD->isExpandingLoad());
+ Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
+ MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
MLD->isExpandingLoad());
@@ -7637,8 +7865,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
MLD->getAAInfo(), MLD->getRanges());
- Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
- ISD::NON_EXTLOAD, MLD->isExpandingLoad());
+ Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
+ MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -7717,9 +7945,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N2 = N->getOperand(2);
SDLoc DL(N);
- // fold (vselect C, X, X) -> X
- if (N1 == N2)
- return N1;
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
@@ -7754,12 +7981,26 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
}
+ // vselect x, y (fcmp lt x, y) -> fminnum x, y
+ // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+ EVT VT = N->getValueType(0);
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (SDValue FMinMax = combineMinNumMaxNum(
+ DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ return FMinMax;
+ }
+
// If this select has a condition (setcc) with narrower operands than the
// select, try to widen the compare to match the select width.
// TODO: This should be extended to handle any constant.
// TODO: This could be extended to handle non-loading patterns, but that
// requires thorough testing to avoid regressions.
- if (isNullConstantOrNullSplatConstant(RHS)) {
+ if (isNullOrNullSplat(RHS)) {
EVT NarrowVT = LHS.getValueType();
EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
EVT SetCCVT = getSetCCResultType(LHS.getValueType());
@@ -7902,9 +8143,8 @@ SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
/// Vector extends are not folded if operations are legal; this is to
/// avoid introducing illegal build_vector dag nodes.
-static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
- SelectionDAG &DAG, bool LegalTypes,
- bool LegalOperations) {
+static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG, bool LegalTypes) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7918,16 +8158,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// fold (zext c1) -> c1
// fold (aext c1) -> c1
if (isa<ConstantSDNode>(N0))
- return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
+ return DAG.getNode(Opcode, SDLoc(N), VT, N0);
// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
EVT SVT = VT.getScalarType();
- if (!(VT.isVector() &&
- (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
+ if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
- return nullptr;
+ return SDValue();
// We can fold this node into a build_vector.
unsigned VTBits = SVT.getSizeInBits();
@@ -7936,10 +8175,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned NumElts = VT.getVectorNumElements();
SDLoc DL(N);
- for (unsigned i=0; i != NumElts; ++i) {
- SDValue Op = N0->getOperand(i);
- if (Op->isUndef()) {
- Elts.push_back(DAG.getUNDEF(SVT));
+ // For zero-extensions, UNDEF elements still guarantee to have the upper
+ // bits set to zero.
+ bool IsZext =
+ Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = N0.getOperand(i);
+ if (Op.isUndef()) {
+ Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
continue;
}
@@ -7953,7 +8197,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
}
- return DAG.getBuildVector(VT, DL, Elts).getNode();
+ return DAG.getBuildVector(VT, DL, Elts);
}
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
@@ -8269,7 +8513,7 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- if ((LegalOperations || LN0->isVolatile()) &&
+ if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
return {};
@@ -8359,9 +8603,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
@@ -8498,21 +8741,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// if this is the case.
EVT SVT = getSetCCResultType(N00VT);
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == SVT.getSizeInBits())
- return DAG.getSetCC(DL, VT, N00, N01, CC);
-
- // If the desired elements are smaller or larger than the source
- // elements, we can use a matching integer vector type and then
- // truncate/sign extend.
- EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
- if (SVT == MatchingVecType) {
- SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
- return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+ // If we already have the desired type, don't change it.
+ if (SVT != N0.getValueType()) {
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
+
+ // If the desired elements are smaller or larger than the source
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+ }
}
}
@@ -8569,40 +8815,37 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
KnownBits &Known) {
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
- DAG.computeKnownBits(Op, Known);
+ Known = DAG.computeKnownBits(Op);
return true;
}
- if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
- cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ if (N.getOpcode() != ISD::SETCC ||
+ N.getValueType().getScalarType() != MVT::i1 ||
+ cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
return false;
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
assert(Op0.getValueType() == Op1.getValueType());
- if (isNullConstant(Op0))
+ if (isNullOrNullSplat(Op0))
Op = Op1;
- else if (isNullConstant(Op1))
+ else if (isNullOrNullSplat(Op1))
Op = Op0;
else
return false;
- DAG.computeKnownBits(Op, Known);
+ Known = DAG.computeKnownBits(Op);
- if (!(Known.Zero | 1).isAllOnesValue())
- return false;
-
- return true;
+ return (Known.Zero | 1).isAllOnesValue();
}
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
@@ -8613,17 +8856,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (zext x) or
// (zext (truncate x)) -> (truncate x)
// This is valid when the truncated bits of x are already zero.
- // FIXME: We should extend this to work for vectors too.
SDValue Op;
KnownBits Known;
- if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
+ if (isTruncateOf(DAG, N0, Op, Known)) {
APInt TruncatedBits =
- (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
- APInt(Op.getValueSizeInBits(), 0) :
- APInt::getBitsSet(Op.getValueSizeInBits(),
- N0.getValueSizeInBits(),
- std::min(Op.getValueSizeInBits(),
- VT.getSizeInBits()));
+ (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
+ APInt(Op.getScalarValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getScalarValueSizeInBits(),
+ N0.getScalarValueSizeInBits(),
+ std::min(Op.getScalarValueSizeInBits(),
+ VT.getScalarSizeInBits()));
if (TruncatedBits.isSubsetOf(Known.Zero))
return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
}
@@ -8851,9 +9093,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
@@ -8968,17 +9209,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/any extend
- else {
- EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
- SDValue VsetCC =
- DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
- }
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
+ SDValue VsetCC =
+ DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
}
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -9025,6 +9265,26 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
}
+ // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
+ // than X. Just move the AssertZext in front of the truncate and drop the
+ // AssertSExt.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::AssertSext &&
+ Opcode == ISD::AssertZext) {
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+ "Asserting zero/sign-extended bits to a type larger than the "
+ "truncated destination does not provide information");
+
+ if (AssertVT.bitsLT(BigA_AssertVT)) {
+ SDLoc DL(N);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+ BigA.getOperand(0), N1);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
+ }
+ }
+
return SDValue();
}
@@ -9046,6 +9306,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (VT.isVector())
return SDValue();
+ unsigned ShAmt = 0;
+ bool HasShiftedOffset = false;
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
@@ -9073,15 +9335,25 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!AndC || !AndC->getAPIntValue().isMask())
+ if (!AndC)
+ return SDValue();
+
+ const APInt &Mask = AndC->getAPIntValue();
+ unsigned ActiveBits = 0;
+ if (Mask.isMask()) {
+ ActiveBits = Mask.countTrailingOnes();
+ } else if (Mask.isShiftedMask()) {
+ ShAmt = Mask.countTrailingZeros();
+ APInt ShiftedMask = Mask.lshr(ShAmt);
+ ActiveBits = ShiftedMask.countTrailingOnes();
+ HasShiftedOffset = true;
+ } else
return SDValue();
- unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
ExtType = ISD::ZEXTLOAD;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
}
- unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
SDValue SRL = N0;
if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
@@ -9150,13 +9422,16 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (DAG.getDataLayout().isBigEndian()) {
+ auto AdjustBigEndianShift = [&](unsigned ShAmt) {
unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
- ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
- }
+ return LVTStoreBits - EVTStoreBits - ShAmt;
+ };
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (DAG.getDataLayout().isBigEndian())
+ ShAmt = AdjustBigEndianShift(ShAmt);
EVT PtrType = N0.getOperand(1).getValueType();
uint64_t PtrOff = ShAmt / 8;
@@ -9204,6 +9479,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
}
+ if (HasShiftedOffset) {
+ // Recalculate the shift amount after it has been altered to calculate
+ // the offset.
+ if (DAG.getDataLayout().isBigEndian())
+ ShAmt = AdjustBigEndianShift(ShAmt);
+
+ // We're using a shifted mask, so the load now has an offset. This means
+ // that data has been loaded into the lower bytes than it would have been
+ // before, so we need to shl the loaded data into the correct position in the
+ // register.
+ SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
+ Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ }
+
// Return the new loaded value.
return Result;
}
@@ -9235,12 +9525,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (sext x)) -> (sext x)
// fold (sext_in_reg (aext x)) -> (sext x)
- // if x is small enough.
+ // if x is small enough or if we know that x has more than 1 sign bit and the
+ // sign_extend_inreg is extending from one of them.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getScalarValueSizeInBits() <= EVTBits &&
+ unsigned N00Bits = N00.getScalarValueSizeInBits();
+ if ((N00Bits <= EVTBits ||
+ (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
@@ -9250,7 +9543,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
if (!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
- return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
+ N0.getOperand(0));
}
// fold (sext_in_reg (zext x)) -> (sext x)
@@ -9345,9 +9639,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
if (N0.isUndef())
return DAG.getUNDEF(VT);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
return SDValue();
}
@@ -9359,9 +9655,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
if (N0.isUndef())
return DAG.getUNDEF(VT);
- if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
- LegalOperations))
- return SDValue(Res, 0);
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
return SDValue();
}
@@ -9458,8 +9756,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
SDValue Amt = N0.getOperand(1);
- KnownBits Known;
- DAG.computeKnownBits(Amt, Known);
+ KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
SDLoc SL(N);
@@ -9636,6 +9933,32 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
+ // Narrow a suitable binary operation with a non-opaque constant operand by
+ // moving it ahead of the truncate. This is limited to pre-legalization
+ // because targets may prefer a wider type during later combines and invert
+ // this transform.
+ switch (N0.getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ if (!LegalOperations && N0.hasOneUse() &&
+ (isConstantOrConstantVector(N0.getOperand(0), true) ||
+ isConstantOrConstantVector(N0.getOperand(1), true))) {
+ // TODO: We already restricted this to pre-legalization, but for vectors
+ // we are extra cautious to not create an unsupported operation.
+ // Target-specific changes are likely needed to avoid regressions here.
+ if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
+ SDLoc DL(N);
+ SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
+ SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
+ return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+ }
+ }
+ }
+
return SDValue();
}
@@ -9694,11 +10017,11 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
return SDValue();
- // TODO: Use splat values for the constant-checking below and remove this
- // restriction.
+ // TODO: Handle cases where the integer constant is a different scalar
+ // bitwidth to the FP.
SDValue N0 = N->getOperand(0);
EVT SourceVT = N0.getValueType();
- if (SourceVT.isVector())
+ if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
return SDValue();
unsigned FPOpcode;
@@ -9706,25 +10029,35 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
switch (N0.getOpcode()) {
case ISD::AND:
FPOpcode = ISD::FABS;
- SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
+ SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
break;
case ISD::XOR:
FPOpcode = ISD::FNEG;
- SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
+ SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
+ break;
+ case ISD::OR:
+ FPOpcode = ISD::FABS;
+ SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
break;
- // TODO: ISD::OR --> ISD::FNABS?
default:
return SDValue();
}
// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
+ // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
+ // fneg (fabs X)
SDValue LogicOp0 = N0.getOperand(0);
- ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
LogicOp0.getOpcode() == ISD::BITCAST &&
- LogicOp0->getOperand(0).getValueType() == VT)
- return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
+ LogicOp0.getOperand(0).getValueType() == VT) {
+ SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
+ NumFPLogicOpsConv++;
+ if (N0.getOpcode() == ISD::OR)
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
+ return FPOp;
+ }
return SDValue();
}
@@ -9737,33 +10070,32 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return DAG.getUNDEF(VT);
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
- // Only do this before legalize, since afterward the target may be depending
- // on the bitconvert.
+ // Only do this before legalize types, since we might create an illegal
+ // scalar type. Even if we knew we wouldn't create an illegal scalar type
+ // we can only do this before legalize ops, since the target maybe
+ // depending on the bitcast.
// First check to see if this is all constant.
if (!LegalTypes &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
- VT.isVector()) {
- bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
-
- EVT DestEltVT = N->getValueType(0).getVectorElementType();
- assert(!DestEltVT.isVector() &&
- "Element type of vector ValueType must not be vector!");
- if (isSimple)
- return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
- }
+ VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
+ VT.getVectorElementType());
// If the input is a constant, let getNode fold it.
- // We always need to check that this is just a fp -> int or int -> conversion
- // otherwise we will get back N which will confuse the caller into thinking
- // we used CombineTo. This can block target combines from running. If we can't
- // allowed legal operations, we need to ensure the resulting operation will be
- // legal.
- // TODO: Maybe we should check that the return value isn't N explicitly?
- if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
- (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
- (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
- (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
- return DAG.getBitcast(VT, N0);
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ // If we can't allow illegal operations, we need to check that this is just
+ // a fp -> int or int -> conversion and that the resulting operation will
+ // be legal.
+ if (!LegalOperations ||
+ (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::Constant, VT))) {
+ SDValue C = DAG.getBitcast(VT, N0);
+ if (C.getNode() != N)
+ return C;
+ }
+ }
// (conv (conv x, t1), t2) -> (conv x, t2)
if (N0.getOpcode() == ISD::BITCAST)
@@ -9772,12 +10104,16 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (conv (load x)) -> (load (conv*)x)
// If the resultant load doesn't need a higher alignment than the original!
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- // Do not change the width of a volatile load.
- !cast<LoadSDNode>(N0)->isVolatile() &&
// Do not remove the cast if the types differ in endian layout.
TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ // If the load is volatile, we only want to change the load type if the
+ // resulting load is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
unsigned OrigAlign = LN0->getAlignment();
@@ -9934,7 +10270,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// float vectors bitcast to integer vectors) into shuffles.
// bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
- N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
!(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
@@ -10000,15 +10336,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// If this is a conversion of N elements of one type to N elements of another
// type, convert each element. This handles FP<->INT cases.
if (SrcBitSize == DstBitSize) {
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- BV->getValueType(0).getVectorNumElements());
-
- // Due to the FP element handling below calling this routine recursively,
- // we can end up with a scalar-to-vector node here.
- if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
- DAG.getBitcast(DstEltVT, BV->getOperand(0)));
-
SmallVector<SDValue, 8> Ops;
for (SDValue Op : BV->op_values()) {
// If the vector element type is not legal, the BUILD_VECTOR operands
@@ -10018,6 +10345,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
Ops.push_back(DAG.getBitcast(DstEltVT, Op));
AddToWorklist(Ops.back().getNode());
}
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
return DAG.getBuildVector(VT, SDLoc(BV), Ops);
}
@@ -10651,17 +10980,18 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
- // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+ // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
+ // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
- auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
- if (XC1 && XC1->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
- if (XC1 && XC1->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
+ if (C->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ }
}
return SDValue();
};
@@ -10671,29 +11001,30 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
if (SDValue FMA = FuseFADD(N1, N0, Flags))
return FMA;
- // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
- // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
- // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
- // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
+ // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
+ // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
+ // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
+ // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
- auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
- if (XC0 && XC0->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- Y, Flags);
- if (XC0 && XC0->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
-
- auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
- if (XC1 && XC1->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
- if (XC1 && XC1->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
+ if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
+ if (C0->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ Y, Flags);
+ if (C0->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ }
+ if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ if (C1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
+ }
}
return SDValue();
};
@@ -10706,14 +11037,6 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
return SDValue();
}
-static bool isFMulNegTwo(SDValue &N) {
- if (N.getOpcode() != ISD::FMUL)
- return false;
- if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
- return CFP->isExactlyValue(-2.0);
- return false;
-}
-
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -10737,6 +11060,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
+ ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
+ if (N1C && N1C->isZero())
+ if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
+ return N0;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -10752,23 +11081,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FSUB, DL, VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
- // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
- // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
- if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
- (isFMulNegTwo(N1) && N1.hasOneUse())) {
- bool N1IsFMul = isFMulNegTwo(N1);
- SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
- SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
- return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
- }
+ auto isFMulNegTwo = [](SDValue FMul) {
+ if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
+ return false;
+ auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
+ return C && C->isExactlyValue(-2.0);
+ };
- ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
- if (N1C && N1C->isZero()) {
- if (N1C->isNegative() || Options.UnsafeFPMath ||
- Flags.hasNoSignedZeros()) {
- // fold (fadd A, 0) -> A
- return N0;
- }
+ // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
+ if (isFMulNegTwo(N0)) {
+ SDValue B = N0.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
+ }
+ // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
+ if (isFMulNegTwo(N1)) {
+ SDValue B = N1.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
}
// No FP constant should be created after legalization as Instruction
@@ -10887,8 +11217,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -10920,9 +11250,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return DAG.getConstantFP(0.0f, DL, VT);
}
- // (fsub 0, B) -> -B
+ // (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
- if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
+ if (N0CFP->isNegative() ||
+ (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -10930,27 +11261,22 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
+ if ((Options.UnsafeFPMath ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
+ && N1.getOpcode() == ISD::FADD) {
+ // X - (X + Y) -> -Y
+ if (N0 == N1->getOperand(0))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
+ // X - (Y + X) -> -Y
+ if (N0 == N1->getOperand(1))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
+ }
+
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations), Flags);
- // If 'unsafe math' is enabled, fold lots of things.
- if (Options.UnsafeFPMath) {
- // (fsub x, (fadd x, y)) -> (fneg y)
- // (fsub x, (fadd y, x)) -> (fneg y)
- if (N1.getOpcode() == ISD::FADD) {
- SDValue N10 = N1->getOperand(0);
- SDValue N11 = N1->getOperand(1);
-
- if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
- return GetNegatedExpression(N11, DAG, LegalOperations);
-
- if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
- return GetNegatedExpression(N10, DAG, LegalOperations);
- }
- }
-
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
AddToWorklist(Fused.getNode());
@@ -10963,8 +11289,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -11002,26 +11328,16 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
- if (N0.getOpcode() == ISD::FMUL) {
- // Fold scalars or any vector constants (not just splats).
- // This fold is done in general by InstCombine, but extra fmul insts
- // may have been generated during lowering.
+ if (isConstantFPBuildVectorOrConstantFP(N1) &&
+ N0.getOpcode() == ISD::FMUL) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
- auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
- auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
- auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-
- // Check 1: Make sure that the first operand of the inner multiply is NOT
- // a constant. Otherwise, we may induce infinite looping.
- if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
- // Check 2: Make sure that the second operand of the inner multiply and
- // the second operand of the outer multiply are constants.
- if ((N1CFP && isConstOrConstSplatFP(N01)) ||
- (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
- }
+ // Avoid an infinite loop by making sure that N00 is not a constant
+ // (the inner multiply has not been constant folded yet).
+ if (isConstantFPBuildVectorOrConstantFP(N01) &&
+ !isConstantFPBuildVectorOrConstantFP(N00)) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
}
}
@@ -11445,15 +11761,15 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
if (N0CFP && N1CFP) // Constant fold
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
- if (N1CFP) {
- const APFloat &V = N1CFP->getValueAPF();
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
+ const APFloat &V = N1C->getValueAPF();
// copysign(x, c1) -> fabs(x) iff ispos(c1)
// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
if (!V.isNegative()) {
@@ -11489,6 +11805,72 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFPOW(SDNode *N) {
+ ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
+ if (!ExponentC)
+ return SDValue();
+
+ // Try to convert x ** (1/3) into cube root.
+ // TODO: Handle the various flavors of long double.
+ // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
+ // Some range near 1/3 should be fine.
+ EVT VT = N->getValueType(0);
+ if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
+ (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
+ // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
+ // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
+ // pow(-val, 1/3) = nan; cbrt(-val) = -num.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf nnan afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+ if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Do not create a cbrt() libcall if the target does not have it, and do not
+ // turn a pow that has lowering support into a cbrt() libcall.
+ if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
+ (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
+ DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
+ return SDValue();
+
+ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+ }
+
+ // Try to convert x ** (1/4) into square roots.
+ // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
+ // TODO: This could be extended (using a target hook) to handle smaller
+ // power-of-2 fractional exponents.
+ if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
+ // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
+ // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+ if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Don't double the number of libcalls. We are trying to inline fast code.
+ if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
+ return SDValue();
+
+ // Assume that libcalls are the smallest code.
+ // TODO: This restriction should probably be lifted for vectors.
+ if (DAG.getMachineFunction().getFunction().optForSize())
+ return SDValue();
+
+ // pow(X, 0.25) --> sqrt(sqrt(X))
+ SDLoc DL(N);
+ SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
+ return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ }
+
+ return SDValue();
+}
+
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
// This optimization is guarded by a function attribute because it may produce
@@ -11538,8 +11920,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
// but UINT_TO_FP is legal on this target, try to convert.
- if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
- TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
+ hasOperation(ISD::UINT_TO_FP, OpVT)) {
// If the sign bit is known to be zero, we can change this to UINT_TO_FP.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
@@ -11595,8 +11977,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
// but SINT_TO_FP is legal on this target, try to convert.
- if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
- TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
+ hasOperation(ISD::SINT_TO_FP, OpVT)) {
// If the sign bit is known to be zero, we can change this to SINT_TO_FP.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
@@ -11917,7 +12299,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
+static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
+ APFloat (*Op)(const APFloat &, const APFloat &)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
@@ -11927,36 +12310,31 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
+ return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
}
// Canonicalize to constant on RHS.
if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
return SDValue();
}
-SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
- const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
+ return visitFMinMax(DAG, N, minnum);
+}
- if (N0CFP && N1CFP) {
- const APFloat &C0 = N0CFP->getValueAPF();
- const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
- }
+SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
+ return visitFMinMax(DAG, N, maxnum);
+}
- // Canonicalize to constant on RHS.
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
+SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
+ return visitFMinMax(DAG, N, minimum);
+}
- return SDValue();
+SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
+ return visitFMinMax(DAG, N, maximum);
}
SDValue DAGCombiner::visitFABS(SDNode *N) {
@@ -11976,11 +12354,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
- // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
- // constant pool values.
- if (!TLI.isFAbsFree(VT) &&
- N0.getOpcode() == ISD::BITCAST &&
- N0.getNode()->hasOneUse()) {
+ // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
+ if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
@@ -12512,8 +12887,15 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (TryNext)
continue;
- // Check for #2
- if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ // Check for #2.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ // Ptr is predecessor to both N and Op.
+ Visited.insert(Ptr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(Op);
+ if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
+ !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
SDValue Result = isLoad
? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
BasePtr, Offset, AM)
@@ -12571,6 +12953,157 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
}
+static inline int numVectorEltsOrZero(EVT T) {
+ return T.isVector() ? T.getVectorNumElements() : 0;
+}
+
+bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
+ Val = ST->getValue();
+ EVT STType = Val.getValueType();
+ EVT STMemType = ST->getMemoryVT();
+ if (STType == STMemType)
+ return true;
+ if (isTypeLegal(STMemType))
+ return false; // fail.
+ if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
+ TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
+ Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
+ STType.isInteger() && STMemType.isInteger()) {
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
+ Val = DAG.getBitcast(STMemType, Val);
+ return true;
+ }
+ return false; // fail.
+}
+
+bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
+ EVT LDMemType = LD->getMemoryVT();
+ EVT LDType = LD->getValueType(0);
+ assert(Val.getValueType() == LDMemType &&
+ "Attempting to extend value of non-matching type");
+ if (LDType == LDMemType)
+ return true;
+ if (LDMemType.isInteger() && LDType.isInteger()) {
+ switch (LD->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ Val = DAG.getBitcast(LDType, Val);
+ return true;
+ case ISD::EXTLOAD:
+ Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::SEXTLOAD:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::ZEXTLOAD:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ }
+ }
+ return false;
+}
+
+SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
+ if (OptLevel == CodeGenOpt::None || LD->isVolatile())
+ return SDValue();
+ SDValue Chain = LD->getOperand(0);
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
+ if (!ST || ST->isVolatile())
+ return SDValue();
+
+ EVT LDType = LD->getValueType(0);
+ EVT LDMemType = LD->getMemoryVT();
+ EVT STMemType = ST->getMemoryVT();
+ EVT STType = ST->getValue().getValueType();
+
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
+ int64_t Offset;
+ if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ return SDValue();
+
+ // Normalize for Endianness. After this Offset=0 will denote that the least
+ // significant bit in the loaded value maps to the least significant bit in
+ // the stored value). With Offset=n (for n > 0) the loaded value starts at the
+ // n:th least significant byte of the stored value.
+ if (DAG.getDataLayout().isBigEndian())
+ Offset = (STMemType.getStoreSizeInBits() -
+ LDMemType.getStoreSizeInBits()) / 8 - Offset;
+
+ // Check that the stored value cover all bits that are loaded.
+ bool STCoversLD =
+ (Offset >= 0) &&
+ (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
+
+ auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
+ if (LD->isIndexed()) {
+ bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
+ LD->getAddressingMode() == ISD::POST_DEC);
+ unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
+ SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
+ LD->getOperand(1), LD->getOperand(2));
+ SDValue Ops[] = {Val, Idx, Chain};
+ return CombineTo(LD, Ops, 3);
+ }
+ return CombineTo(LD, Val, Chain);
+ };
+
+ if (!STCoversLD)
+ return SDValue();
+
+ // Memory as copy space (potentially masked).
+ if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
+ // Simple case: Direct non-truncating forwarding
+ if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
+ return ReplaceLd(LD, ST->getValue(), Chain);
+ // Can we model the truncate and extension with an and mask?
+ if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
+ !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
+ // Mask to size of LDMemType
+ auto Mask =
+ DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
+ STMemType.getSizeInBits()),
+ SDLoc(ST), STType);
+ auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
+ return ReplaceLd(LD, Val, Chain);
+ }
+ }
+
+ // TODO: Deal with nonzero offset.
+ if (LD->getBasePtr().isUndef() || Offset != 0)
+ return SDValue();
+ // Model necessary truncations / extenstions.
+ SDValue Val;
+ // Truncate Value To Stored Memory Size.
+ do {
+ if (!getTruncatedStoreValue(ST, Val))
+ continue;
+ if (!isTypeLegal(LDMemType))
+ continue;
+ if (STMemType != LDMemType) {
+ // TODO: Support vectors? This requires extract_subvector/bitcast.
+ if (!STMemType.isVector() && !LDMemType.isVector() &&
+ STMemType.isInteger() && LDMemType.isInteger())
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
+ else
+ continue;
+ }
+ if (!extendLoadedValueToExtension(LD, Val))
+ continue;
+ return ReplaceLd(LD, Val, Chain);
+ } while (false);
+
+ // On failure, cleanup dead nodes we may have created.
+ if (Val->use_empty())
+ deleteAndRecombine(Val.getNode());
+ return SDValue();
+}
+
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -12637,17 +13170,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// If this load is directly stored, replace the load value with the stored
// value.
- // TODO: Handle store large -> read small portion.
- // TODO: Handle TRUNCSTORE/LOADEXT
- if (OptLevel != CodeGenOpt::None &&
- ISD::isNormalLoad(N) && !LD->isVolatile()) {
- if (ISD::isNON_TRUNCStore(Chain.getNode())) {
- StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
- if (PrevST->getBasePtr() == Ptr &&
- PrevST->getValue().getValueType() == N->getValueType(0))
- return CombineTo(N, PrevST->getOperand(1), Chain);
- }
- }
+ if (auto V = ForwardStoreValueToDirectLoad(LD))
+ return V;
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
@@ -13055,8 +13579,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
- llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
- [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
});
@@ -13689,7 +14212,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SDValue Val = St->getValue();
// If constant is of the wrong type, convert it now.
if (MemVT != Val.getValueType()) {
- Val = peekThroughBitcast(Val);
+ Val = peekThroughBitcasts(Val);
// Deal with constants of wrong size.
if (ElementSizeBits != Val.getValueSizeInBits()) {
EVT IntMemVT =
@@ -13715,7 +14238,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue Val = peekThroughBitcast(St->getValue());
+ SDValue Val = peekThroughBitcasts(St->getValue());
// All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
// type MemVT. If the underlying value is not the correct
// type, but it is an extraction of an appropriate vector we
@@ -13725,26 +14248,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if ((MemVT != Val.getValueType()) &&
(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
- SDValue Vec = Val.getOperand(0);
EVT MemVTScalarTy = MemVT.getScalarType();
- SDValue Idx = Val.getOperand(1);
// We may need to add a bitcast here to get types to line up.
- if (MemVTScalarTy != Vec.getValueType()) {
- unsigned Elts = Vec.getValueType().getSizeInBits() /
- MemVTScalarTy.getSizeInBits();
- if (Val.getValueType().isVector() && MemVT.isVector()) {
- unsigned IdxC = cast<ConstantSDNode>(Idx)->getZExtValue();
- unsigned NewIdx =
- ((uint64_t)IdxC * MemVT.getVectorNumElements()) / Elts;
- Idx = DAG.getConstant(NewIdx, SDLoc(Val), Idx.getValueType());
- }
- EVT NewVecTy =
- EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
- Vec = DAG.getBitcast(NewVecTy, Vec);
+ if (MemVTScalarTy != Val.getValueType().getScalarType()) {
+ Val = DAG.getBitcast(MemVT, Val);
+ } else {
+ unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
+ : ISD::EXTRACT_VECTOR_ELT;
+ SDValue Vec = Val.getOperand(0);
+ SDValue Idx = Val.getOperand(1);
+ Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
}
- auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
- : ISD::EXTRACT_VECTOR_ELT;
- Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
}
Ops.push_back(Val);
}
@@ -13769,7 +14283,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
- Val = peekThroughBitcast(Val);
+ Val = peekThroughBitcasts(Val);
StoreInt <<= ElementSizeBits;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
StoreInt |= C->getAPIntValue()
@@ -13832,7 +14346,7 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
EVT MemVT = St->getMemoryVT();
- SDValue Val = peekThroughBitcast(St->getValue());
+ SDValue Val = peekThroughBitcasts(St->getValue());
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
return;
@@ -13866,7 +14380,7 @@ void DAGCombiner::getStoreMergeCandidates(
int64_t &Offset) -> bool {
if (Other->isVolatile() || Other->isIndexed())
return false;
- SDValue Val = peekThroughBitcast(Other->getValue());
+ SDValue Val = peekThroughBitcasts(Other->getValue());
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
@@ -13973,11 +14487,12 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
Worklist.push_back(RootNode);
while (!Worklist.empty()) {
auto N = Worklist.pop_back_val();
+ if (!Visited.insert(N).second)
+ continue; // Already present in Visited.
if (N->getOpcode() == ISD::TokenFactor) {
for (SDValue Op : N->ops())
Worklist.push_back(Op.getNode());
}
- Visited.insert(N);
}
// Don't count pruning nodes towards max.
@@ -13990,14 +14505,14 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
// in candidate selection and can be
// safely ignored
// * Value (Op 1) -> Cycles may happen (e.g. through load chains)
- // * Address (Op 2) -> Merged addresses may only vary by a fixed constant
- // and so no cycles are possible.
- // * (Op 3) -> appears to always be undef. Cannot be source of cycle.
- //
- // Thus we need only check predecessors of the value operands.
- auto *Op = N->getOperand(1).getNode();
- if (Visited.insert(Op).second)
- Worklist.push_back(Op);
+ // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
+ // but aren't necessarily fromt the same base node, so
+ // cycles possible (e.g. via indexed store).
+ // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
+ // non-indexed stores). Not constant on all targets (e.g. ARM)
+ // and so can participate in a cycle.
+ for (unsigned j = 1; j < N->getNumOperands(); ++j)
+ Worklist.push_back(N->getOperand(j).getNode());
}
// Search through DAG. We can stop early if we find a store node.
for (unsigned i = 0; i < NumStores; ++i)
@@ -14030,7 +14545,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Perform an early exit check. Do not bother looking at stored values that
// are not constants, loads, or extracted vector elements.
- SDValue StoredVal = peekThroughBitcast(St->getValue());
+ SDValue StoredVal = peekThroughBitcasts(St->getValue());
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
@@ -14051,10 +14566,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Sort the memory operands according to their distance from the
// base pointer.
- llvm::sort(StoreNodes.begin(), StoreNodes.end(),
- [](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
- });
+ llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
// Store Merge attempts to merge the lowest stores. This generally
// works out as if successful, as the remaining stores are checked
@@ -14299,7 +14813,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue Val = peekThroughBitcast(St->getValue());
+ SDValue Val = peekThroughBitcasts(St->getValue());
LoadSDNode *Ld = cast<LoadSDNode>(Val);
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
@@ -14647,8 +15161,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
EVT SVT = Value.getOperand(0).getValueType();
+ // If the store is volatile, we only want to change the store type if the
+ // resulting store is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
if (((!LegalOperations && !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+ TLI.isOperationLegal(ISD::STORE, SVT)) &&
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
unsigned OrigAlign = ST->getAlignment();
bool Fast = false;
@@ -14699,7 +15218,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// FIXME: is there such a thing as a truncating indexed store?
if (ST->isTruncatingStore() && ST->isUnindexed() &&
- Value.getValueType().isInteger()) {
+ Value.getValueType().isInteger() &&
+ (!isa<ConstantSDNode>(Value) ||
+ !cast<ConstantSDNode>(Value)->isOpaque())) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
@@ -14983,6 +15504,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return InVec;
EVT VT = InVec.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
// Remove redundant insertions:
// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
@@ -14990,12 +15512,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
return InVec;
- // We must know which element is being inserted for folds below here.
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
- if (!IndexC)
+ if (!IndexC) {
+ // If this is variable insert to undef vector, it might be better to splat:
+ // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
+ if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
+ SmallVector<SDValue, 8> Ops(NumElts, InVal);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
return SDValue();
- unsigned Elt = IndexC->getZExtValue();
+ }
+ // We must know which element is being inserted for folds below here.
+ unsigned Elt = IndexC->getZExtValue();
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
return Shuf;
@@ -15033,11 +15562,11 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
Ops.append(InVec.getNode()->op_begin(),
InVec.getNode()->op_end());
} else if (InVec.isUndef()) {
- unsigned NElts = VT.getVectorNumElements();
- Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+ Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
} else {
return SDValue();
}
+ assert(Ops.size() == NumElts && "Unexpected vector size");
// Insert the element
if (Elt < Ops.size()) {
@@ -15051,8 +15580,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return DAG.getBuildVector(VT, DL, Ops);
}
-SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
- SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+ SDValue EltNo,
+ LoadSDNode *OriginalLoad) {
assert(!OriginalLoad->isVolatile());
EVT ResultVT = EVE->getValueType(0);
@@ -15134,70 +15664,132 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
return SDValue(EVE, 0);
}
+/// Transform a vector binary operation into a scalar binary operation by moving
+/// the math/logic after an extract element of a vector.
+static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
+ bool LegalOperations) {
+ SDValue Vec = ExtElt->getOperand(0);
+ SDValue Index = ExtElt->getOperand(1);
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
+ return SDValue();
+
+ // Targets may want to avoid this to prevent an expensive register transfer.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldScalarizeBinop(Vec))
+ return SDValue();
+
+ // Extracting an element of a vector constant is constant-folded, so this
+ // transform is just replacing a vector op with a scalar op while moving the
+ // extract.
+ SDValue Op0 = Vec.getOperand(0);
+ SDValue Op1 = Vec.getOperand(1);
+ if (isAnyConstantBuildVector(Op0, true) ||
+ isAnyConstantBuildVector(Op1, true)) {
+ // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
+ // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
+ SDLoc DL(ExtElt);
+ EVT VT = ExtElt->getValueType(0);
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
+ return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
- // (vextract (scalar_to_vector val, 0) -> val
- SDValue InVec = N->getOperand(0);
- EVT VT = InVec.getValueType();
- EVT NVT = N->getValueType(0);
+ SDValue VecOp = N->getOperand(0);
+ SDValue Index = N->getOperand(1);
+ EVT ScalarVT = N->getValueType(0);
+ EVT VecVT = VecOp.getValueType();
+ if (VecOp.isUndef())
+ return DAG.getUNDEF(ScalarVT);
- if (InVec.isUndef())
- return DAG.getUNDEF(NVT);
+ // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
+ //
+ // This only really matters if the index is non-constant since other combines
+ // on the constant elements already work.
+ SDLoc DL(N);
+ if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ Index == VecOp.getOperand(2)) {
+ SDValue Elt = VecOp.getOperand(1);
+ return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
+ }
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
- SDValue InOp = InVec.getOperand(0);
- if (InOp.getValueType() != NVT) {
- assert(InOp.getValueType().isInteger() && NVT.isInteger());
- return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
+ SDValue InOp = VecOp.getOperand(0);
+ if (InOp.getValueType() != ScalarVT) {
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
}
return InOp;
}
- SDValue EltNo = N->getOperand(1);
- ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
-
// extract_vector_elt of out-of-bounds element -> UNDEF
- if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
- return DAG.getUNDEF(NVT);
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (IndexC && IndexC->getAPIntValue().uge(NumElts))
+ return DAG.getUNDEF(ScalarVT);
// extract_vector_elt (build_vector x, y), 1 -> y
- if (ConstEltNo &&
- InVec.getOpcode() == ISD::BUILD_VECTOR &&
- TLI.isTypeLegal(VT) &&
- (InVec.hasOneUse() ||
- TLI.aggressivelyPreferBuildVectorSources(VT))) {
- SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+ if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
+ TLI.isTypeLegal(VecVT) &&
+ (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
+ SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
EVT InEltVT = Elt.getValueType();
// Sometimes build_vector's scalar input types do not match result type.
- if (NVT == InEltVT)
+ if (ScalarVT == InEltVT)
return Elt;
// TODO: It may be useful to truncate if free if the build_vector implicitly
// converts.
}
- // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
- bool isLE = DAG.getDataLayout().isLittleEndian();
- unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
- if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
- ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
- SDValue BCSrc = InVec.getOperand(0);
- if (BCSrc.getValueType().isScalarInteger())
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
+ // TODO: These transforms should not require the 'hasOneUse' restriction, but
+ // there are regressions on multiple targets without it. We can end up with a
+ // mess of scalar and vector code if we reduce only part of the DAG to scalar.
+ if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
+ VecOp.hasOneUse()) {
+ // The vector index of the LSBs of the source depend on the endian-ness.
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ unsigned ExtractIndex = IndexC->getZExtValue();
+ // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
+ unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
+ SDValue BCSrc = VecOp.getOperand(0);
+ if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
+ return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
+
+ if (LegalTypes && BCSrc.getValueType().isInteger() &&
+ BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
+ // trunc i64 X to i32
+ SDValue X = BCSrc.getOperand(0);
+ assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
+ "Extract element and scalar to vector can't change element type "
+ "from FP to integer.");
+ unsigned XBitWidth = X.getValueSizeInBits();
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
+
+ // An extract element return value type can be wider than its vector
+ // operand element type. In that case, the high bits are undefined, so
+ // it's possible that we may need to extend rather than truncate.
+ if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
+ assert(XBitWidth % VecEltBitWidth == 0 &&
+ "Scalar bitwidth must be a multiple of vector element bitwidth");
+ return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
+ }
+ }
}
- // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
- //
- // This only really matters if the index is non-constant since other combines
- // on the constant elements already work.
- if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
- EltNo == InVec.getOperand(2)) {
- SDValue Elt = InVec.getOperand(1);
- return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
- }
+ if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
+ return BO;
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
@@ -15205,30 +15797,29 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// patterns. For example on AVX, extracting elements from a wide vector
// without using extract_subvector. However, if we can find an underlying
// scalar value, then we can always use that.
- if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
- int NumElem = VT.getVectorNumElements();
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
// Find the new index to extract from.
- int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
+ int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
// Extracting an undef index is undef.
if (OrigElt == -1)
- return DAG.getUNDEF(NVT);
+ return DAG.getUNDEF(ScalarVT);
// Select the right vector half to extract from.
SDValue SVInVec;
- if (OrigElt < NumElem) {
- SVInVec = InVec->getOperand(0);
+ if (OrigElt < (int)NumElts) {
+ SVInVec = VecOp.getOperand(0);
} else {
- SVInVec = InVec->getOperand(1);
- OrigElt -= NumElem;
+ SVInVec = VecOp.getOperand(1);
+ OrigElt -= NumElts;
}
if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
SDValue InOp = SVInVec.getOperand(OrigElt);
- if (InOp.getValueType() != NVT) {
- assert(InOp.getValueType().isInteger() && NVT.isInteger());
- InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+ if (InOp.getValueType() != ScalarVT) {
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
}
return InOp;
@@ -15239,136 +15830,131 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (!LegalOperations ||
// FIXME: Should really be just isOperationLegalOrCustom.
- TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
- TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
+ TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
- DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
+ DAG.getConstant(OrigElt, DL, IndexTy));
}
}
// If only EXTRACT_VECTOR_ELT nodes use the source vector we can
// simplify it based on the (valid) extraction indices.
- if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
+ if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- Use->getOperand(0) == InVec &&
+ Use->getOperand(0) == VecOp &&
isa<ConstantSDNode>(Use->getOperand(1));
})) {
- APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
- for (SDNode *Use : InVec->uses()) {
+ APInt DemandedElts = APInt::getNullValue(NumElts);
+ for (SDNode *Use : VecOp->uses()) {
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
- if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
+ if (CstElt->getAPIntValue().ult(NumElts))
DemandedElts.setBit(CstElt->getZExtValue());
}
- if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
+ if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
+ // We simplified the vector operand of this extract element. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
return SDValue(N, 0);
+ }
}
- bool BCNumEltsChanged = false;
- EVT ExtVT = VT.getVectorElementType();
- EVT LVT = ExtVT;
-
+ // Everything under here is trying to match an extract of a loaded value.
// If the result of load has to be truncated, then it's not necessarily
// profitable.
- if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VecVT.getVectorElementType();
+ EVT LVT = ExtVT;
+ if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
return SDValue();
- if (InVec.getOpcode() == ISD::BITCAST) {
+ if (VecOp.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
+ if (!VecOp.hasOneUse())
return SDValue();
- EVT BCVT = InVec.getOperand(0).getValueType();
+ EVT BCVT = VecOp.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
- if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ if (NumElts != BCVT.getVectorNumElements())
BCNumEltsChanged = true;
- InVec = InVec.getOperand(0);
+ VecOp = VecOp.getOperand(0);
ExtVT = BCVT.getVectorElementType();
}
- // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
- if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
- ISD::isNormalLoad(InVec.getNode()) &&
- !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
- SDValue Index = N->getOperand(1);
- if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
- if (!OrigLoad->isVolatile()) {
- return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
- OrigLoad);
- }
- }
+ // extract (vector load $addr), i --> load $addr + i * size
+ if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
+ ISD::isNormalLoad(VecOp.getNode()) &&
+ !Index->hasPredecessor(VecOp.getNode())) {
+ auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
+ if (VecLoad && !VecLoad->isVolatile())
+ return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
}
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
- if (!LegalOperations) return SDValue();
+ if (!LegalOperations || !IndexC)
+ return SDValue();
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+ int Elt = IndexC->getZExtValue();
+ LoadSDNode *LN0 = nullptr;
+ if (ISD::isNormalLoad(VecOp.getNode())) {
+ LN0 = cast<LoadSDNode>(VecOp);
+ } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ VecOp.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
- if (ConstEltNo) {
- int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
+ }
+ if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
- LoadSDNode *LN0 = nullptr;
- const ShuffleVectorSDNode *SVN = nullptr;
- if (ISD::isNormalLoad(InVec.getNode())) {
- LN0 = cast<LoadSDNode>(InVec);
- } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- InVec.getOperand(0).getValueType() == ExtVT &&
- ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
- LN0 = cast<LoadSDNode>(InVec.getOperand(0));
- } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
- // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
- // =>
- // (load $addr+1*size)
+ // Select the input vector, guarding against out of range extract vector.
+ int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
+ VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
+ if (VecOp.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- // If the bit convert changed the number of elements, it is unsafe
- // to examine the mask.
- if (BCNumEltsChanged)
+ if (!VecOp.hasOneUse())
return SDValue();
- // Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = VT.getVectorNumElements();
- int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
- InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
-
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- InVec = InVec.getOperand(0);
- }
- if (ISD::isNormalLoad(InVec.getNode())) {
- LN0 = cast<LoadSDNode>(InVec);
- Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
- EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
- }
+ VecOp = VecOp.getOperand(0);
}
+ if (ISD::isNormalLoad(VecOp.getNode())) {
+ LN0 = cast<LoadSDNode>(VecOp);
+ Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
+ Index = DAG.getConstant(Elt, DL, Index.getValueType());
+ }
+ }
- // Make sure we found a non-volatile load and the extractelement is
- // the only use.
- if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
- return SDValue();
-
- // If Idx was -1 above, Elt is going to be -1, so just return undef.
- if (Elt == -1)
- return DAG.getUNDEF(LVT);
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ return SDValue();
- return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
- }
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LVT);
- return SDValue();
+ return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
}
// Simplify (build_vec (ext )) to (bitcast (build_vec ))
@@ -15484,77 +16070,6 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
return DAG.getBitcast(VT, BV);
}
-SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
- EVT VT = N->getValueType(0);
-
- unsigned NumInScalars = N->getNumOperands();
- SDLoc DL(N);
-
- EVT SrcVT = MVT::Other;
- unsigned Opcode = ISD::DELETED_NODE;
- unsigned NumDefs = 0;
-
- for (unsigned i = 0; i != NumInScalars; ++i) {
- SDValue In = N->getOperand(i);
- unsigned Opc = In.getOpcode();
-
- if (Opc == ISD::UNDEF)
- continue;
-
- // If all scalar values are floats and converted from integers.
- if (Opcode == ISD::DELETED_NODE &&
- (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
- Opcode = Opc;
- }
-
- if (Opc != Opcode)
- return SDValue();
-
- EVT InVT = In.getOperand(0).getValueType();
-
- // If all scalar values are typed differently, bail out. It's chosen to
- // simplify BUILD_VECTOR of integer types.
- if (SrcVT == MVT::Other)
- SrcVT = InVT;
- if (SrcVT != InVT)
- return SDValue();
- NumDefs++;
- }
-
- // If the vector has just one element defined, it's not worth to fold it into
- // a vectorized one.
- if (NumDefs < 2)
- return SDValue();
-
- assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
- && "Should only handle conversion from integer to float.");
- assert(SrcVT != MVT::Other && "Cannot determine source type!");
-
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
-
- if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
- return SDValue();
-
- // Just because the floating-point vector type is legal does not necessarily
- // mean that the corresponding integer vector type is.
- if (!isTypeLegal(NVT))
- return SDValue();
-
- SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0; i != NumInScalars; ++i) {
- SDValue In = N->getOperand(i);
-
- if (In.isUndef())
- Opnds.push_back(DAG.getUNDEF(SrcVT));
- else
- Opnds.push_back(In.getOperand(0));
- }
- SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
- AddToWorklist(BV.getNode());
-
- return DAG.getNode(Opcode, DL, VT, BV);
-}
-
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
@@ -15676,6 +16191,78 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
return Shuffle;
}
+static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
+ assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+
+ // First, determine where the build vector is not undef.
+ // TODO: We could extend this to handle zero elements as well as undefs.
+ int NumBVOps = BV->getNumOperands();
+ int ZextElt = -1;
+ for (int i = 0; i != NumBVOps; ++i) {
+ SDValue Op = BV->getOperand(i);
+ if (Op.isUndef())
+ continue;
+ if (ZextElt == -1)
+ ZextElt = i;
+ else
+ return SDValue();
+ }
+ // Bail out if there's no non-undef element.
+ if (ZextElt == -1)
+ return SDValue();
+
+ // The build vector contains some number of undef elements and exactly
+ // one other element. That other element must be a zero-extended scalar
+ // extracted from a vector at a constant index to turn this into a shuffle.
+ // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+ SDValue Zext = BV->getOperand(ZextElt);
+ if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
+ Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+ return SDValue();
+
+ // The zero-extend must be a multiple of the source size.
+ SDValue Extract = Zext.getOperand(0);
+ unsigned DestSize = Zext.getValueSizeInBits();
+ unsigned SrcSize = Extract.getValueSizeInBits();
+ if (DestSize % SrcSize != 0)
+ return SDValue();
+
+ // Create a shuffle mask that will combine the extracted element with zeros
+ // and undefs.
+ int ZextRatio = DestSize / SrcSize;
+ int NumMaskElts = NumBVOps * ZextRatio;
+ SmallVector<int, 32> ShufMask(NumMaskElts, -1);
+ for (int i = 0; i != NumMaskElts; ++i) {
+ if (i / ZextRatio == ZextElt) {
+ // The low bits of the (potentially translated) extracted element map to
+ // the source vector. The high bits map to zero. We will use a zero vector
+ // as the 2nd source operand of the shuffle, so use the 1st element of
+ // that vector (mask value is number-of-elements) for the high bits.
+ if (i % ZextRatio == 0)
+ ShufMask[i] = Extract.getConstantOperandVal(1);
+ else
+ ShufMask[i] = NumMaskElts;
+ }
+
+ // Undef elements of the build vector remain undef because we initialize
+ // the shuffle mask with -1.
+ }
+
+ // Turn this into a shuffle with zero if that's legal.
+ EVT VecVT = Extract.getOperand(0).getValueType();
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
+ return SDValue();
+
+ // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
+ // bitcast (shuffle V, ZeroVec, VectorMask)
+ SDLoc DL(BV);
+ SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
+ SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
+ ShufMask);
+ return DAG.getBitcast(BV->getValueType(0), Shuf);
+}
+
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If the types of the vectors we're extracting from allow it,
// turn this into a vector_shuffle node.
@@ -15687,6 +16274,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
if (!isTypeLegal(VT))
return SDValue();
+ if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
+ return V;
+
// May only combine to shuffle after legalize if shuffle is legal.
if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();
@@ -15950,7 +16540,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// TODO: Maybe this is useful for non-splat too?
if (!LegalOperations) {
if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
- Splat = peekThroughBitcast(Splat);
+ Splat = peekThroughBitcasts(Splat);
EVT SrcVT = Splat.getValueType();
if (SrcVT.isVector()) {
unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
@@ -16001,9 +16591,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
- if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
- return V;
-
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
@@ -16085,8 +16672,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
SmallVector<int, 8> Mask;
for (SDValue Op : N->ops()) {
- // Peek through any bitcast.
- Op = peekThroughBitcast(Op);
+ Op = peekThroughBitcasts(Op);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (Op.isUndef()) {
@@ -16103,9 +16689,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
// We want the EVT of the original extraction to correctly scale the
// extraction index.
EVT ExtVT = ExtVec.getValueType();
-
- // Peek through any bitcast.
- ExtVec = peekThroughBitcast(ExtVec);
+ ExtVec = peekThroughBitcasts(ExtVec);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (ExtVec.isUndef()) {
@@ -16169,11 +16753,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
- // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
- if (In->getOpcode() == ISD::BITCAST &&
- !In->getOperand(0).getValueType().isVector()) {
- SDValue Scalar = In->getOperand(0);
+ SDValue Scalar = peekThroughOneUseBitcasts(In);
+ // concat_vectors(scalar_to_vector(scalar), undef) ->
+ // scalar_to_vector(scalar)
+ if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ Scalar.hasOneUse()) {
+ EVT SVT = Scalar.getValueType().getVectorElementType();
+ if (SVT == Scalar.getOperand(0).getValueType())
+ Scalar = Scalar.getOperand(0);
+ }
+
+ // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
+ if (!Scalar.getValueType().isVector()) {
// If the bitcast type isn't legal, it might be a trunc of a legal type;
// look through the trunc so we can still do the transform:
// concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
@@ -16182,7 +16774,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
Scalar = Scalar->getOperand(0);
- EVT SclTy = Scalar->getValueType(0);
+ EVT SclTy = Scalar.getValueType();
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
@@ -16310,60 +16902,93 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
-/// If we are extracting a subvector produced by a wide binary operator with at
-/// at least one operand that was the result of a vector concatenation, then try
-/// to use the narrow vector operands directly to avoid the concatenation and
-/// extraction.
+/// If we are extracting a subvector produced by a wide binary operator try
+/// to use a narrow binary operator and/or avoid concatenation and extraction.
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
// The extract index must be a constant, so we can map it to a concat operand.
- auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!ExtractIndex)
- return SDValue();
-
- // Only handle the case where we are doubling and then halving. A larger ratio
- // may require more than two narrow binops to replace the wide binop.
- EVT VT = Extract->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
- "Extract index is not a multiple of the vector length.");
- if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
+ auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!ExtractIndexC)
return SDValue();
// We are looking for an optionally bitcasted wide vector binary operator
// feeding an extract subvector.
- SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
-
- // TODO: The motivating case for this transform is an x86 AVX1 target. That
- // target has temptingly almost legal versions of bitwise logic ops in 256-bit
- // flavors, but no other 256-bit integer support. This could be extended to
- // handle any binop, but that may require fixing/adding other folds to avoid
- // codegen regressions.
- unsigned BOpcode = BinOp.getOpcode();
- if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+ SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
+ if (!ISD::isBinaryOp(BinOp.getNode()))
return SDValue();
- // The binop must be a vector type, so we can chop it in half.
+ // The binop must be a vector type, so we can extract some fraction of it.
EVT WideBVT = BinOp.getValueType();
if (!WideBVT.isVector())
return SDValue();
+ EVT VT = Extract->getValueType(0);
+ unsigned ExtractIndex = ExtractIndexC->getZExtValue();
+ assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
+ "Extract index is not a multiple of the vector length.");
+
+ // Bail out if this is not a proper multiple width extraction.
+ unsigned WideWidth = WideBVT.getSizeInBits();
+ unsigned NarrowWidth = VT.getSizeInBits();
+ if (WideWidth % NarrowWidth != 0)
+ return SDValue();
+
+ // Bail out if we are extracting a fraction of a single operation. This can
+ // occur because we potentially looked through a bitcast of the binop.
+ unsigned NarrowingRatio = WideWidth / NarrowWidth;
+ unsigned WideNumElts = WideBVT.getVectorNumElements();
+ if (WideNumElts % NarrowingRatio != 0)
+ return SDValue();
+
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
- WideBVT.getVectorNumElements() / 2);
+ WideNumElts / NarrowingRatio);
+ unsigned BOpcode = BinOp.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
return SDValue();
- // Peek through bitcasts of the binary operator operands if needed.
- SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
- SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
+ // If extraction is cheap, we don't need to look at the binop operands
+ // for concat ops. The narrow binop alone makes this transform profitable.
+ // We can't just reuse the original extract index operand because we may have
+ // bitcasted.
+ unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
+ unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
+ EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+ if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
+ BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
+ // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
+ SDLoc DL(Extract);
+ SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), NewExtIndex);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), NewExtIndex);
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
+ BinOp.getNode()->getFlags());
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
+
+ // Only handle the case where we are doubling and then halving. A larger ratio
+ // may require more than two narrow binops to replace the wide binop.
+ if (NarrowingRatio != 2)
+ return SDValue();
+
+ // TODO: The motivating case for this transform is an x86 AVX1 target. That
+ // target has temptingly almost legal versions of bitwise logic ops in 256-bit
+ // flavors, but no other 256-bit integer support. This could be extended to
+ // handle any binop, but that may require fixing/adding other folds to avoid
+ // codegen regressions.
+ if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+ return SDValue();
// We need at least one concatenation operation of a binop operand to make
// this transform worthwhile. The concat must double the input vector sizes.
// TODO: Should we also handle INSERT_SUBVECTOR patterns?
+ SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
+ SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
bool ConcatL =
LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
bool ConcatR =
@@ -16372,11 +16997,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
// If one of the binop operands was not the result of a concat, we must
- // extract a half-sized operand for our new narrow binop. We can't just reuse
- // the original extract index operand because we may have bitcasted.
- unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
- unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
- EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+ // extract a half-sized operand for our new narrow binop.
SDLoc DL(Extract);
// extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
@@ -16404,17 +17025,19 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
if (DAG.getDataLayout().isBigEndian())
return SDValue();
- // TODO: The one-use check is overly conservative. Check the cost of the
- // extract instead or remove that condition entirely.
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
- !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
+ return SDValue();
+
+ // Allow targets to opt-out.
+ EVT VT = Extract->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
return SDValue();
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
- EVT VT = Extract->getValueType(0);
SDLoc DL(Extract);
SDValue BaseAddr = Ld->getOperand(1);
unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
@@ -16447,9 +17070,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// Vi if possible
// Only operand 0 is checked as 'concat' assumes all inputs of the same
// type.
- if (V->getOpcode() == ISD::CONCAT_VECTORS &&
+ if (V.getOpcode() == ISD::CONCAT_VECTORS &&
isa<ConstantSDNode>(N->getOperand(1)) &&
- V->getOperand(0).getValueType() == NVT) {
+ V.getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
@@ -16457,13 +17080,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return V->getOperand(Idx / NumElems);
}
- // Skip bitcasting
- V = peekThroughBitcast(V);
+ V = peekThroughBitcasts(V);
// If the input is a build vector. Try to make a smaller build vector.
- if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ if (V.getOpcode() == ISD::BUILD_VECTOR) {
if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- EVT InVT = V->getValueType(0);
+ EVT InVT = V.getValueType();
unsigned ExtractSize = NVT.getSizeInBits();
unsigned EltSize = InVT.getScalarSizeInBits();
// Only do this if we won't split any elements.
@@ -16496,16 +17118,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
}
- if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
// being extracted are of same size.
- EVT SmallVT = V->getOperand(1).getValueType();
+ EVT SmallVT = V.getOperand(1).getValueType();
if (!NVT.bitsEq(SmallVT))
return SDValue();
// Only handle cases where both indexes are constants.
- ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
- ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
if (InsIdx && ExtIdx) {
// Combine:
@@ -16515,11 +17137,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// otherwise => (extract_subvec V1, ExtIdx)
if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
- return DAG.getBitcast(NVT, V->getOperand(1));
+ return DAG.getBitcast(NVT, V.getOperand(1));
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
- DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
- N->getOperand(1));
+ DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
+ N->getOperand(1));
}
}
@@ -16620,14 +17242,17 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SDValue N0 = SVN->getOperand(0);
SDValue N1 = SVN->getOperand(1);
- if (!N0->hasOneUse() || !N1->hasOneUse())
+ if (!N0->hasOneUse())
return SDValue();
// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
// discussed above.
if (!N1.isUndef()) {
- bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
- bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
+ if (!N1->hasOneUse())
+ return SDValue();
+
+ bool N0AnyConst = isAnyConstantBuildVector(N0);
+ bool N1AnyConst = isAnyConstantBuildVector(N1);
if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
return SDValue();
if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -16693,8 +17318,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
const TargetLowering &TLI,
- bool LegalOperations,
- bool LegalTypes) {
+ bool LegalOperations) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
@@ -16730,11 +17354,14 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
- if (!LegalTypes || TLI.isTypeLegal(OutVT))
+ // Never create an illegal type. Only create unsupported operations if we
+ // are pre-legalization.
+ if (TLI.isTypeLegal(OutVT))
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
return DAG.getBitcast(VT,
- DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
+ DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
+ SDLoc(SVN), OutVT, N0));
}
return SDValue();
@@ -16754,7 +17381,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
if (!VT.isInteger() || IsBigEndian)
return SDValue();
- SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
+ SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
unsigned Opcode = N0.getOpcode();
if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
@@ -17039,7 +17666,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue(N, 0);
// Match shuffles that can be converted to any_vector_extend_in_reg.
- if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
return V;
// Combine "truncate_vector_in_reg" style shuffles.
@@ -17057,7 +17684,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
return Res;
@@ -17067,15 +17694,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N1.isUndef() && Level < AfterLegalizeVectorOps &&
TLI.isTypeLegal(VT)) {
-
- // Peek through the bitcast only if there is one user.
- SDValue BC0 = N0;
- while (BC0.getOpcode() == ISD::BITCAST) {
- if (!BC0.hasOneUse())
- break;
- BC0 = BC0.getOperand(0);
- }
-
auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
if (Scale == 1)
return SmallVector<int, 8>(Mask.begin(), Mask.end());
@@ -17086,7 +17704,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
NewMask.push_back(M < 0 ? -1 : Scale * M + s);
return NewMask;
};
-
+
+ SDValue BC0 = peekThroughOneUseBitcasts(N0);
if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
EVT SVT = VT.getScalarType();
EVT InnerVT = BC0->getValueType(0);
@@ -17329,12 +17948,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N1.isUndef())
return N0;
- // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
- // us to pull BITCASTs from input to output.
- if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
- if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
- return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
-
// If this is an insert of an extracted vector into an undef vector, we can
// just use the input to the extract.
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
@@ -17382,6 +17995,14 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
N1, N2);
+ // Eliminate an intermediate insert into an undef vector:
+ // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
+ // insert_subvector undef, X, N2
+ if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
+ N1.getOperand(1), N2);
+
if (!isa<ConstantSDNode>(N2))
return SDValue();
@@ -17417,6 +18038,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
+ // Simplify source operands based on insertion.
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -17454,7 +18079,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
- SDValue RHS = peekThroughBitcast(N->getOperand(1));
+ SDValue RHS = peekThroughBitcasts(N->getOperand(1));
SDLoc DL(N);
// Make sure we're not running after operation legalization where it
@@ -17684,31 +18309,64 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
LLD->getBasePtr().getValueType()))
return false;
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
+ return false;
+
// Check that the select condition doesn't reach either load. If so,
// folding this will induce a cycle into the DAG. If not, this is safe to
// xform, so create a select of the addresses.
+
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
+ // Always fail if LLD and RLD are not independent. TheSelect is a
+ // predecessor to all Nodes in question so we need not search past it.
+
+ Visited.insert(TheSelect);
+ Worklist.push_back(LLD);
+ Worklist.push_back(RLD);
+
+ if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
+ return false;
+
SDValue Addr;
if (TheSelect->getOpcode() == ISD::SELECT) {
+ // We cannot do this optimization if any pair of {RLD, LLD} is a
+ // predecessor to {RLD, LLD, CondNode}. As we've already compared the
+ // Loads, we only need to check if CondNode is a successor to one of the
+ // loads. We can further avoid this if there's no use of their chain
+ // value.
SDNode *CondNode = TheSelect->getOperand(0).getNode();
- if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
- (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
- return false;
- // The loads must not depend on one another.
- if (LLD->isPredecessorOf(RLD) ||
- RLD->isPredecessorOf(LLD))
+ Worklist.push_back(CondNode);
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
return false;
+
Addr = DAG.getSelect(SDLoc(TheSelect),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0), LLD->getBasePtr(),
RLD->getBasePtr());
} else { // Otherwise SELECT_CC
+ // We cannot do this optimization if any pair of {RLD, LLD} is a
+ // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
+ // the Loads, we only need to check if CondLHS/CondRHS is a successor to
+ // one of the loads. We can further avoid this if there's no use of their
+ // chain value.
+
SDNode *CondLHS = TheSelect->getOperand(0).getNode();
SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+ Worklist.push_back(CondLHS);
+ Worklist.push_back(CondRHS);
if ((LLD->hasAnyUseOfValue(1) &&
- (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
(RLD->hasAnyUseOfValue(1) &&
- (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
return false;
Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
@@ -17823,6 +18481,63 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
+/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+/// in it. This may be a win when the constant is not otherwise available
+/// because it replaces two constant pool loads with one.
+SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
+ const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+ return SDValue();
+
+ // If we are before legalize types, we want the other legalization to happen
+ // first (for example, to avoid messing with soft float).
+ auto *TV = dyn_cast<ConstantFPSDNode>(N2);
+ auto *FV = dyn_cast<ConstantFPSDNode>(N3);
+ EVT VT = N2.getValueType();
+ if (!TV || !FV || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // If a constant can be materialized without loads, this does not make sense.
+ if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
+ TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
+ TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
+ return SDValue();
+
+ // If both constants have multiple uses, then we won't need to do an extra
+ // load. The values are likely around in registers for other users.
+ if (!TV->hasOneUse() && !FV->hasOneUse())
+ return SDValue();
+
+ Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue()) };
+ Type *FPTy = Elts[0]->getType();
+ const DataLayout &TD = DAG.getDataLayout();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get offsets to the 0 and 1 elements of the array, so we can select between
+ // them.
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
+ SDValue Cond =
+ DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
+ AddToWorklist(Cond.getNode());
+ SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
+ AddToWorklist(CstOffset.getNode());
+ CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
+ AddToWorklist(CPIdx.getNode());
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(
+ DAG.getMachineFunction()), Alignment);
+}
+
/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
@@ -17831,75 +18546,26 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// (x ? y : y) -> y.
if (N2 == N3) return N2;
+ EVT CmpOpVT = N0.getValueType();
EVT VT = N2.getValueType();
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
- // Determine if the condition we're dealing with is constant
- SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
- N0, N1, CC, DL, false);
+ // Determine if the condition we're dealing with is constant.
+ SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
+ false);
if (SCC.getNode()) AddToWorklist(SCC.getNode());
- if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
// fold select_cc true, x, y -> x
// fold select_cc false, x, y -> y
return !SCCC->isNullValue() ? N2 : N3;
}
- // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
- // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
- // in it. This is a win when the constant is not otherwise available because
- // it replaces two constant pool loads with one. We only do this if the FP
- // type is known to be legal, because if it isn't, then we are before legalize
- // types an we want the other legalization to happen first (e.g. to avoid
- // messing with soft float) and if the ConstantFP is not legal, because if
- // it is legal, we may not need to store the FP constant in a constant pool.
- if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
- if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
- if (TLI.isTypeLegal(N2.getValueType()) &&
- (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
- TargetLowering::Legal &&
- !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
- !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
- // If both constants have multiple uses, then we won't need to do an
- // extra load, they are likely around in registers for other users.
- (TV->hasOneUse() || FV->hasOneUse())) {
- Constant *Elts[] = {
- const_cast<ConstantFP*>(FV->getConstantFPValue()),
- const_cast<ConstantFP*>(TV->getConstantFPValue())
- };
- Type *FPTy = Elts[0]->getType();
- const DataLayout &TD = DAG.getDataLayout();
-
- // Create a ConstantArray of the two constants.
- Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
- SDValue CPIdx =
- DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
- TD.getPrefTypeAlignment(FPTy));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-
- // Get the offsets to the 0 and 1 element of the array so that we can
- // select between them.
- SDValue Zero = DAG.getIntPtrConstant(0, DL);
- unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
- SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
-
- SDValue Cond = DAG.getSetCC(DL,
- getSetCCResultType(N0.getValueType()),
- N0, N1, CC);
- AddToWorklist(Cond.getNode());
- SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
- Cond, One, Zero);
- AddToWorklist(CstOffset.getNode());
- CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
- CstOffset);
- AddToWorklist(CPIdx.getNode());
- return DAG.getLoad(
- TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- Alignment);
- }
- }
+ if (SDValue V =
+ convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
+ return V;
if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
return V;
@@ -17913,7 +18579,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
- ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
@@ -17934,48 +18600,48 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
// fold select C, 16, 0 -> shl C, 4
- if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
- TLI.getBooleanContents(N0.getValueType()) ==
- TargetLowering::ZeroOrOneBooleanContent) {
+ bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
+ bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
+
+ if ((Fold || Swap) &&
+ TLI.getBooleanContents(CmpOpVT) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
+
+ if (Swap) {
+ CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
+ std::swap(N2C, N3C);
+ }
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
if (NotExtCompare && N2C->isOne())
return SDValue();
- // Get a SetCC of the condition
- // NOTE: Don't create a SETCC if it's not legal on this target.
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
- SDValue Temp, SCC;
- // cast from setcc result type to select result type
- if (LegalTypes) {
- SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
- N0, N1, CC);
- if (N2.getValueType().bitsLT(SCC.getValueType()))
- Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
- N2.getValueType());
- else
- Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
- N2.getValueType(), SCC);
- } else {
- SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
- Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
- N2.getValueType(), SCC);
- }
+ SDValue Temp, SCC;
+ // zext (setcc n0, n1)
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
+ if (VT.bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ } else {
+ SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ }
- AddToWorklist(SCC.getNode());
- AddToWorklist(Temp.getNode());
+ AddToWorklist(SCC.getNode());
+ AddToWorklist(Temp.getNode());
- if (N2C->isOne())
- return Temp;
+ if (N2C->isOne())
+ return Temp;
- // shl setcc result by log2 n2c
- return DAG.getNode(
- ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
- getShiftAmountTy(Temp.getValueType())));
- }
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ SDLoc(Temp),
+ getShiftAmountTy(Temp.getValueType())));
}
// Check to see if this is an integer abs.
@@ -17995,18 +18661,16 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
- EVT XType = N0.getValueType();
- if (SubC && SubC->isNullValue() && XType.isInteger()) {
+ if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
SDLoc DL(N0);
- SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
- N0,
- DAG.getConstant(XType.getSizeInBits() - 1, DL,
- getShiftAmountTy(N0.getValueType())));
- SDValue Add = DAG.getNode(ISD::ADD, DL,
- XType, N0, Shift);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
+ DL,
+ getShiftAmountTy(CmpOpVT)));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
}
}
@@ -18067,21 +18731,14 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
- ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
- if (!C)
- return SDValue();
-
- // Avoid division by zero.
- if (C->isNullValue())
- return SDValue();
-
SmallVector<SDNode *, 8> Built;
- SDValue S =
- TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
+ if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
- for (SDNode *N : Built)
- AddToWorklist(N);
- return S;
+ return SDValue();
}
/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
@@ -18096,11 +18753,13 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
return SDValue();
SmallVector<SDNode *, 8> Built;
- SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built);
+ if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
- for (SDNode *N : Built)
- AddToWorklist(N);
- return S;
+ return SDValue();
}
/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
@@ -18113,21 +18772,14 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
- ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
- if (!C)
- return SDValue();
-
- // Avoid division by zero.
- if (C->isNullValue())
- return SDValue();
-
SmallVector<SDNode *, 8> Built;
- SDValue S =
- TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
+ if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
- for (SDNode *N : Built)
- AddToWorklist(N);
- return S;
+ return SDValue();
}
/// Determines the LogBase2 value for a non-null input value using the
@@ -18583,6 +19235,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+// TODO: Replace with with std::monostate when we move to C++17.
+struct UnitT { } Unit;
+bool operator==(const UnitT &, const UnitT &) { return true; }
+bool operator!=(const UnitT &, const UnitT &) { return false; }
+
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
@@ -18595,13 +19252,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.
-bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
- return false;
+
+bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ StoreSDNode *STChain = St;
+ // Intervals records which offsets from BaseIndex have been covered. In
+ // the common case, every store writes to the immediately previous address
+ // space and thus merged with the previous interval at insertion time.
+
+ using IMap =
+ llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+ IMap::Allocator A;
+ IMap Intervals(A);
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -18611,76 +19277,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (BasePtr.getBase().isUndef())
return false;
- SmallVector<StoreSDNode *, 8> ChainedStores;
- ChainedStores.push_back(St);
+ // Add ST's interval.
+ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- StoreSDNode *Index = St;
- while (Index) {
+ while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
// If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ if (!SDValue(Chain, 0)->hasOneUse())
break;
-
- if (Index->isVolatile() || Index->isIndexed())
+ if (Chain->isVolatile() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
-
+ const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
// Check that the base pointer is the same as the original one.
- if (!BasePtr.equalBaseIndex(Ptr, DAG))
+ int64_t Offset;
+ if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
+ break;
+ int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
+ // Make sure we don't overlap with other intervals by checking the ones to
+ // the left or right before inserting.
+ auto I = Intervals.find(Offset);
+ // If there's a next interval, we should end before it.
+ if (I != Intervals.end() && I.start() < (Offset + Length))
+ break;
+ // If there's a previous interval, we should start after it.
+ if (I != Intervals.begin() && (--I).stop() <= Offset)
break;
+ Intervals.insert(Offset, Offset + Length, Unit);
- // Walk up the chain to find the next store node, ignoring any
- // intermediate loads. Any other kind of node will halt the loop.
- SDNode *NextInChain = Index->getChain().getNode();
- while (true) {
- if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
- // We found a store node. Use it for the next iteration.
- if (STn->isVolatile() || STn->isIndexed()) {
- Index = nullptr;
- break;
- }
- ChainedStores.push_back(STn);
- Index = STn;
- break;
- } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
- NextInChain = Ldn->getChain().getNode();
- continue;
- } else {
- Index = nullptr;
- break;
- }
- }// end while
+ ChainedStores.push_back(Chain);
+ STChain = Chain;
}
- // At this point, ChainedStores lists all of the Store nodes
- // reachable by iterating up through chain nodes matching the above
- // conditions. For each such store identified, try to find an
- // earlier chain to attach the store to which won't violate the
- // required ordering.
- bool MadeChangeToSt = false;
- SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+ // If we didn't find a chained store, exit.
+ if (ChainedStores.size() == 0)
+ return false;
+
+ // Improve all chained stores (St and ChainedStores members) starting from
+ // where the store chain ended and return single TokenFactor.
+ SDValue NewChain = STChain->getChain();
+ SmallVector<SDValue, 8> TFOps;
+ for (unsigned I = ChainedStores.size(); I;) {
+ StoreSDNode *S = ChainedStores[--I];
+ SDValue BetterChain = FindBetterChain(S, NewChain);
+ S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
+ S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
+ TFOps.push_back(SDValue(S, 0));
+ ChainedStores[I] = S;
+ }
+
+ // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
+ SDValue BetterChain = FindBetterChain(St, NewChain);
+ SDValue NewST;
+ if (St->isTruncatingStore())
+ NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand());
+ else
+ NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemOperand());
- for (StoreSDNode *ChainedStore : ChainedStores) {
- SDValue Chain = ChainedStore->getChain();
- SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+ TFOps.push_back(NewST);
- if (Chain != BetterChain) {
- if (ChainedStore == St)
- MadeChangeToSt = true;
- BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
- }
- }
+ // If we improved every element of TFOps, then we've lost the dependence on
+ // NewChain to successors of St and we need to add it back to TFOps. Do so at
+ // the beginning to keep relative order consistent with FindBetterChains.
+ auto hasImprovedChain = [&](SDValue ST) -> bool {
+ return ST->getOperand(0) != NewChain;
+ };
+ bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
+ if (AddNewChain)
+ TFOps.insert(TFOps.begin(), NewChain);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+ CombineTo(St, TF);
+
+ AddToWorklist(STChain);
+ // Add TF operands worklist in reverse order.
+ for (auto I = TF->getNumOperands(); I;)
+ AddToWorklist(TF->getOperand(--I).getNode());
+ AddToWorklist(TF.getNode());
+ return true;
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
- // Do all replacements after finding the replacements to make to avoid making
- // the chains more complicated by introducing new TokenFactors.
- for (auto Replacement : BetterChains)
- replaceStoreChain(Replacement.first, Replacement.second);
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
- return MadeChangeToSt;
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return false;
+
+ // Directly improve a chain of disjoint stores starting at St.
+ if (parallelizeChainedStores(St))
+ return true;
+
+ // Improve St's Chain..
+ SDValue BetterChain = FindBetterChain(St, St->getChain());
+ if (St->getChain() != BetterChain) {
+ replaceStoreChain(St, BetterChain);
+ return true;
+ }
+ return false;
}
/// This is the entry point for the file.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 795ade588b8f..a9a3c44ea0c9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -89,6 +89,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -110,6 +111,7 @@
#include <utility>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "isel"
@@ -545,6 +547,15 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
"Invalid iterator!");
while (I != E) {
+ if (LastFlushPoint == I)
+ LastFlushPoint = E;
+ if (SavedInsertPt == I)
+ SavedInsertPt = E;
+ if (EmitStartPt == I)
+ EmitStartPt = E.isValid() ? &*E : nullptr;
+ if (LastLocalValue == I)
+ LastLocalValue = E.isValid() ? &*E : nullptr;
+
MachineInstr *Dead = &*I;
++I;
Dead->eraseFromParent();
@@ -1426,6 +1437,18 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
}
return true;
}
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst *DI = cast<DbgLabelInst>(II);
+ assert(DI->getLabel() && "Missing label");
+ if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
+ return true;
+ }
case Intrinsic::objectsize: {
ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -1436,6 +1459,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
+ case Intrinsic::is_constant: {
+ Constant *ResCI = ConstantInt::get(II->getType(), 0);
+ unsigned ResultReg = getRegForValue(ResCI);
+ if (!ResultReg)
+ return false;
+ updateValueMap(II, ResultReg);
+ return true;
+ }
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
@@ -1565,7 +1596,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
MachineInstr *SavedLastLocalValue = getLastLocalValue();
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
- if (isa<TerminatorInst>(I)) {
+ if (I->isTerminator()) {
if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
// PHI node handling may have generated local value instructions,
// even though it failed to handle all PHI nodes.
@@ -1629,7 +1660,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
DbgLoc = DebugLoc();
// Undo phi node updates, because they will be added again by SelectionDAG.
- if (isa<TerminatorInst>(I)) {
+ if (I->isTerminator()) {
// PHI node handling may have generated local value instructions.
// We remove them because SelectionDAGISel will generate them again.
removeDeadLocalValueCode(SavedLastLocalValue);
@@ -1680,7 +1711,10 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
/// Emit an FNeg operation.
bool FastISel::selectFNeg(const User *I) {
- unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+ Value *X;
+ if (!match(I, m_FNeg(m_Value(X))))
+ return false;
+ unsigned OpReg = getRegForValue(X);
if (!OpReg)
return false;
bool OpRegIsKill = hasTrivialKill(I);
@@ -1770,11 +1804,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return selectBinaryOp(I, ISD::FADD);
case Instruction::Sub:
return selectBinaryOp(I, ISD::SUB);
- case Instruction::FSub:
+ case Instruction::FSub:
// FNeg is currently represented in LLVM IR as a special case of FSub.
- if (BinaryOperator::isFNeg(I))
- return selectFNeg(I);
- return selectBinaryOp(I, ISD::FSUB);
+ return selectFNeg(I) || selectBinaryOp(I, ISD::FSUB);
case Instruction::Mul:
return selectBinaryOp(I, ISD::MUL);
case Instruction::FMul:
@@ -2211,7 +2243,7 @@ unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
/// might result in multiple MBB's for one BB. As such, the start of the
/// BB might correspond to a different MBB than the end.
bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
- const TerminatorInst *TI = LLVMBB->getTerminator();
+ const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d3c31911d677..fba728625b07 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -579,9 +579,18 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const
const Value *
FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
if (VirtReg2Value.empty()) {
+ SmallVector<EVT, 4> ValueVTs;
for (auto &P : ValueMap) {
- VirtReg2Value[P.second] = P.first;
+ ValueVTs.clear();
+ ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(),
+ P.first->getType(), ValueVTs);
+ unsigned Reg = P.second;
+ for (EVT VT : ValueVTs) {
+ unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ VirtReg2Value[Reg++] = P.first;
+ }
}
}
- return VirtReg2Value[Vreg];
+ return VirtReg2Value.lookup(Vreg);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index d6171f3177d7..6a6114677cc2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -524,7 +524,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
Reg = R->getReg();
DefMI = nullptr;
} else {
- Reg = getVR(Node->getOperand(0), VRBaseMap);
+ Reg = R ? R->getReg() : getVR(Node->getOperand(0), VRBaseMap);
DefMI = MRI->getVRegDef(Reg);
}
@@ -652,6 +652,12 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
unsigned NumOps = Node->getNumOperands();
+ // If the input pattern has a chain, then the root of the corresponding
+ // output pattern will get a chain as well. This can happen to be a
+ // REG_SEQUENCE (which is not "guarded" by countOperands/CountResults).
+ if (NumOps && Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ --NumOps; // Ignore chain if it exists.
+
assert((NumOps & 1) == 1 &&
"REG_SEQUENCE must have an odd number of operands!");
for (unsigned i = 1; i != NumOps; ++i) {
@@ -694,6 +700,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
+ SD->setIsEmitted();
+
+ if (SD->isInvalidated()) {
+ // An invalidated SDNode must generate an undef DBG_VALUE: although the
+ // original value is no longer computed, earlier DBG_VALUEs live ranges
+ // must not leak into later code.
+ auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
+ MIB.addReg(0U);
+ MIB.addReg(0U, RegState::Debug);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+ return &*MIB;
+ }
+
if (SD->getKind() == SDDbgValue::FRAMEIX) {
// Stack address; this needs to be lowered in target-dependent fashion.
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
@@ -735,6 +755,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
MIB.addImm(CI->getSExtValue());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
MIB.addFPImm(CF);
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Note: This assumes that all nullptr constants are zero-valued.
+ MIB.addImm(0);
} else {
// Could be an Undef. In any case insert an Undef so we can see what we
// dropped.
@@ -868,6 +891,15 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasAllowReassociation())
MI->setFlag(MachineInstr::MIFlag::FmReassoc);
+
+ if (Flags.hasNoUnsignedWrap())
+ MI->setFlag(MachineInstr::MIFlag::NoUWrap);
+
+ if (Flags.hasNoSignedWrap())
+ MI->setFlag(MachineInstr::MIFlag::NoSWrap);
+
+ if (Flags.hasExact())
+ MI->setFlag(MachineInstr::MIFlag::IsExact);
}
// Emit all of the actual operands of this instruction, adding them to the
@@ -886,9 +918,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
RegState::EarlyClobber);
- // Transfer all of the memory reference descriptions of this instruction.
- MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
- cast<MachineSDNode>(Node)->memoperands_end());
+ // Set the memory reference descriptions of this instruction now that it is
+ // part of the function.
+ MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands());
// Insert the instruction into position in the block. This needs to
// happen before any custom inserter hook is called so that the
@@ -950,7 +982,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
// Finally mark unused registers as dead.
- if (!UsedRegs.empty() || II.getImplicitDefs())
+ if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
// Run post-isel target hook to adjust this instruction if needed.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 36c436918916..d3aea37f944d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -176,7 +176,6 @@ private:
SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
- SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
SDValue ExpandInsertToVectorThroughStack(SDValue Op);
@@ -239,7 +238,7 @@ public:
} // end anonymous namespace
/// Return a vector shuffle operation which
-/// performs the same shuffe in terms of order or result bytes, but on a type
+/// performs the same shuffle in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(
@@ -1060,6 +1059,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::FRAMEADDR:
case ISD::RETURNADDR:
case ISD::ADDROFRETURNADDR:
+ case ISD::SPONENTRY:
// These operations lie about being legal: when they claim to be legal,
// they should actually be custom-lowered.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1094,6 +1094,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -1107,6 +1108,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -1114,6 +1121,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0));
break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
+ }
+ case ISD::SMULFIX: {
+ unsigned Scale = Node->getConstantOperandVal(2);
+ Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+ Node->getValueType(0), Scale);
+ break;
+ }
+ case ISD::MSCATTER:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::MSTORE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1148,6 +1176,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
}
break;
+ case ISD::FSHL:
+ case ISD::FSHR:
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
case ISD::SHL_PARTS: {
@@ -1247,6 +1277,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// Caches for hasPredecessorHelper
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Op.getNode());
Worklist.push_back(Idx.getNode());
SDValue StackPtr, Ch;
for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
@@ -2299,9 +2330,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
EVT DestVT,
const SDLoc &dl) {
+ EVT SrcVT = Op0.getValueType();
+
// TODO: Should any fast-math-flags be set for the created nodes?
LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
- if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+ if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
"expansion\n");
@@ -2346,116 +2379,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// subtract the bias
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
// final result
- SDValue Result;
- // handle final rounding
- if (DestVT == MVT::f64) {
- // do nothing
- Result = Sub;
- } else if (DestVT.bitsLT(MVT::f64)) {
- Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
- DAG.getIntPtrConstant(0, dl));
- } else if (DestVT.bitsGT(MVT::f64)) {
- Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
- }
+ SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
// Code below here assumes !isSigned without checking again.
- // Implementation of unsigned i64 to f64 following the algorithm in
- // __floatundidf in compiler_rt. This implementation has the advantage
- // of performing rounding correctly, both in the default rounding mode
- // and in all alternate rounding modes.
- // TODO: Generalize this for use with other types.
- if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
- LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
- SDValue TwoP52 =
- DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
- SDValue TwoP84PlusTwoP52 =
- DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl,
- MVT::f64);
- SDValue TwoP84 =
- DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64);
-
- SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
- SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
- DAG.getConstant(32, dl, MVT::i64));
- SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
- SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
- SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
- SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
- SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
- TwoP84PlusTwoP52);
- return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
- }
-
- // TODO: Generalize this for use with other types.
- if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
- LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
- // For unsigned conversions, convert them to signed conversions using the
- // algorithm from the x86_64 __floatundidf in compiler_rt.
- if (!isSigned) {
- SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
-
- SDValue ShiftConst = DAG.getConstant(
- 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout()));
- SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
- SDValue AndConst = DAG.getConstant(1, dl, MVT::i64);
- SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
- SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
-
- SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
- SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
-
- // TODO: This really should be implemented using a branch rather than a
- // select. We happen to get lucky and machinesink does the right
- // thing most of the time. This would be a good candidate for a
- //pseudo-op, or, even better, for whole-function isel.
- SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
- Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
- return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast);
- }
-
- // Otherwise, implement the fully general conversion.
-
- SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
- DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64));
- SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
- DAG.getConstant(UINT64_C(0x800), dl, MVT::i64));
- SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
- DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64));
- SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2,
- DAG.getConstant(UINT64_C(0), dl, MVT::i64),
- ISD::SETNE);
- SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0);
- SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0,
- DAG.getConstant(UINT64_C(0x0020000000000000), dl,
- MVT::i64),
- ISD::SETUGE);
- SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
- EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout());
-
- SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
- DAG.getConstant(32, dl, SHVT));
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
- SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
- SDValue TwoP32 =
- DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl,
- MVT::f64);
- SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
- SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
- SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
- SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
- return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
- DAG.getIntPtrConstant(0, dl));
- }
-
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
- SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()),
- Op0,
- DAG.getConstant(0, dl, Op0.getValueType()),
- ISD::SETLT);
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
+ DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
SDValue Zero = DAG.getIntPtrConstant(0, dl),
Four = DAG.getIntPtrConstant(4, dl);
SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
@@ -2465,7 +2398,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// as a negative number. To counteract this, the dynamic code adds an
// offset depending on the data type.
uint64_t FF;
- switch (Op0.getSimpleValueType().SimpleTy) {
+ switch (SrcVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported integer type!");
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
@@ -2614,22 +2547,22 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
// swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
// swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
// swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
return Tmp;
}
@@ -2705,126 +2638,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
}
}
-/// Expand the specified bitcount instruction into operations.
-SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
- const SDLoc &dl) {
- switch (Opc) {
- default: llvm_unreachable("Cannot expand this yet!");
- case ISD::CTPOP: {
- EVT VT = Op.getValueType();
- EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- unsigned Len = VT.getSizeInBits();
-
- assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
- "CTPOP not implemented for this type.");
-
- // This is the "best" algorithm from
- // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-
- SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)),
- dl, VT);
- SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)),
- dl, VT);
- SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)),
- dl, VT);
- SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)),
- dl, VT);
-
- // v = v - ((v >> 1) & 0x55555555...)
- Op = DAG.getNode(ISD::SUB, dl, VT, Op,
- DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRL, dl, VT, Op,
- DAG.getConstant(1, dl, ShVT)),
- Mask55));
- // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
- Op = DAG.getNode(ISD::ADD, dl, VT,
- DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
- DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRL, dl, VT, Op,
- DAG.getConstant(2, dl, ShVT)),
- Mask33));
- // v = (v + (v >> 4)) & 0x0F0F0F0F...
- Op = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::ADD, dl, VT, Op,
- DAG.getNode(ISD::SRL, dl, VT, Op,
- DAG.getConstant(4, dl, ShVT))),
- Mask0F);
- // v = (v * 0x01010101...) >> (Len - 8)
- Op = DAG.getNode(ISD::SRL, dl, VT,
- DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
- DAG.getConstant(Len - 8, dl, ShVT));
-
- return Op;
- }
- case ISD::CTLZ_ZERO_UNDEF:
- // This trivially expands to CTLZ.
- return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
- case ISD::CTLZ: {
- EVT VT = Op.getValueType();
- unsigned Len = VT.getSizeInBits();
-
- if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
- EVT SetCCVT = getSetCCResultType(VT);
- SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
- DAG.getConstant(Len, dl, VT), CTLZ);
- }
-
- // for now, we do this:
- // x = x | (x >> 1);
- // x = x | (x >> 2);
- // ...
- // x = x | (x >>16);
- // x = x | (x >>32); // for 64-bit input
- // return popcount(~x);
- //
- // Ref: "Hacker's Delight" by Henry Warren
- EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) {
- SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
- Op = DAG.getNode(ISD::OR, dl, VT, Op,
- DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
- }
- Op = DAG.getNOT(dl, Op, VT);
- return DAG.getNode(ISD::CTPOP, dl, VT, Op);
- }
- case ISD::CTTZ_ZERO_UNDEF:
- // This trivially expands to CTTZ.
- return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
- case ISD::CTTZ: {
- EVT VT = Op.getValueType();
- unsigned Len = VT.getSizeInBits();
-
- if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
- EVT SetCCVT = getSetCCResultType(VT);
- SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
- DAG.getConstant(Len, dl, VT), CTTZ);
- }
-
- // for now, we use: { return popcount(~x & (x - 1)); }
- // unless the target has ctlz but not ctpop, in which case we use:
- // { return 32 - nlz(~x & (x-1)); }
- // Ref: "Hacker's Delight" by Henry Warren
- SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNOT(dl, Op, VT),
- DAG.getNode(ISD::SUB, dl, VT, Op,
- DAG.getConstant(1, dl, VT)));
- // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
- if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
- return DAG.getNode(ISD::SUB, dl, VT,
- DAG.getConstant(VT.getSizeInBits(), dl, VT),
- DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
- return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
- }
- }
-}
-
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
@@ -2832,13 +2645,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
bool NeedInvert;
switch (Node->getOpcode()) {
+ case ISD::ABS:
+ if (TLI.expandABS(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTPOP:
+ if (TLI.expandCTPOP(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ if (TLI.expandCTLZ(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
- Results.push_back(Tmp1);
+ if (TLI.expandCTTZ(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
break;
case ISD::BITREVERSE:
Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
@@ -3033,8 +2856,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+ Results.push_back(Tmp1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SINT_TO_FP:
Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
Node->getOperand(0), Node->getValueType(0), dl);
Results.push_back(Tmp1);
@@ -3043,29 +2871,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
Results.push_back(Tmp1);
break;
- case ISD::FP_TO_UINT: {
- SDValue True, False;
- EVT VT = Node->getOperand(0).getValueType();
- EVT NVT = Node->getValueType(0);
- APFloat apf(DAG.EVTToAPFloatSemantics(VT),
- APInt::getNullValue(VT.getSizeInBits()));
- APInt x = APInt::getSignMask(NVT.getSizeInBits());
- (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
- Tmp1 = DAG.getConstantFP(apf, dl, VT);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
- Node->getOperand(0),
- Tmp1, ISD::SETLT);
- True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
- // TODO: Should any fast-math-flags be set for the FSUB?
- False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
- DAG.getNode(ISD::FSUB, dl, VT,
- Node->getOperand(0), Tmp1));
- False = DAG.getNode(ISD::XOR, dl, NVT, False,
- DAG.getConstant(x, dl, NVT));
- Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False);
- Results.push_back(Tmp1);
+ case ISD::FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
break;
- }
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
@@ -3252,7 +3061,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
-
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ }
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
@@ -3460,6 +3274,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
+ case ISD::FSHL:
+ case ISD::FSHR:
+ if (TLI.expandFunnelShift(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (TLI.expandROT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ Results.push_back(TLI.expandAddSubSat(Node, DAG));
+ break;
+ case ISD::SMULFIX:
+ Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));
+ break;
case ISD::SADDO:
case ISD::SSUBO: {
SDValue LHS = Node->getOperand(0);
@@ -3852,10 +3685,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
- // If we expanded the SETCC by inverting the condition code, then wrap
- // the existing SETCC in a NOT to restore the intended condition.
- if (NeedInvert)
- Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0));
+ assert(!NeedInvert && "Don't know how to invert BR_CC!");
// If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
// node.
@@ -3899,46 +3729,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ReplaceNode(SDValue(Node, 0), Result);
break;
}
- case ISD::ROTL:
- case ISD::ROTR: {
- bool IsLeft = Node->getOpcode() == ISD::ROTL;
- SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1);
- EVT ResVT = Node->getValueType(0);
- EVT OpVT = Op0.getValueType();
- assert(OpVT == ResVT &&
- "The result and the operand types of rotate should match");
- EVT ShVT = Op1.getValueType();
- SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT);
-
- // If a rotate in the other direction is legal, use it.
- unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
- if (TLI.isOperationLegal(RevRot, ResVT)) {
- SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
- Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub));
- break;
- }
-
- // Otherwise,
- // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
- // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
- //
- assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) &&
- "Expecting the type bitwidth to be a power of 2");
- unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
- unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
- SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT,
- Width, DAG.getConstant(1, dl, ShVT));
- SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
- SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1);
- SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1);
-
- SDValue Or = DAG.getNode(ISD::OR, dl, ResVT,
- DAG.getNode(ShOpc, dl, ResVT, Op0, And0),
- DAG.getNode(HsOpc, dl, ResVT, Op0, And1));
- Results.push_back(Or);
- break;
- }
-
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -3958,7 +3748,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
return false;
}
- LLVM_DEBUG(dbgs() << "Succesfully expanded node\n");
+ LLVM_DEBUG(dbgs() << "Successfully expanded node\n");
ReplaceNode(Node, Results.data());
return true;
}
@@ -4031,11 +3821,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::FMINNUM:
+ case ISD::STRICT_FMINNUM:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
RTLIB::FMIN_F80, RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128));
break;
case ISD::FMAXNUM:
+ case ISD::STRICT_FMAXNUM:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
RTLIB::FMAX_F80, RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128));
@@ -4046,6 +3838,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128));
break;
+ case ISD::FCBRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128));
+ break;
case ISD::FSIN:
case ISD::STRICT_FSIN:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
@@ -4128,16 +3925,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::EXP2_PPCF128));
break;
case ISD::FTRUNC:
+ case ISD::STRICT_FTRUNC:
Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128));
break;
case ISD::FFLOOR:
+ case ISD::STRICT_FFLOOR:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128));
break;
case ISD::FCEIL:
+ case ISD::STRICT_FCEIL:
Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
RTLIB::CEIL_F80, RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128));
@@ -4157,6 +3957,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::NEARBYINT_PPCF128));
break;
case ISD::FROUND:
+ case ISD::STRICT_FROUND:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
RTLIB::ROUND_F64,
RTLIB::ROUND_F80,
@@ -4188,6 +3989,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::DIV_PPCF128));
break;
case ISD::FREM:
+ case ISD::STRICT_FREM:
Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
RTLIB::REM_F80, RTLIB::REM_F128,
RTLIB::REM_PPCF128));
@@ -4260,6 +4062,21 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::MUL_I16, RTLIB::MUL_I32,
RTLIB::MUL_I64, RTLIB::MUL_I128));
break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("LibCall explicitly requested, but not available");
+ case MVT::i32:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false));
+ break;
+ case MVT::i64:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false));
+ break;
+ case MVT::i128:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false));
+ break;
+ }
+ break;
}
// Replace the original node with the legalized result.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2e6f6edbce55..4644e9588e7b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1750,6 +1750,11 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
SDValue R = SDValue();
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
// Nodes that use a promotion-requiring floating point operand, but doesn't
// produce a promotion-requiring floating point result, need to be legalized
// to use the promoted float operand. Nodes that produce at least one
@@ -1905,8 +1910,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
// Binary FP Operations
case ISD::FADD:
case ISD::FDIV:
- case ISD::FMAXNAN:
- case ISD::FMINNAN:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::FMUL:
@@ -2138,9 +2143,9 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
SDValue TrueVal = GetPromotedFloat(N->getOperand(2));
SDValue FalseVal = GetPromotedFloat(N->getOperand(3));
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
- N->getOperand(0), N->getOperand(1), TrueVal, FalseVal,
- N->getOperand(4));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ TrueVal.getNode()->getValueType(0), N->getOperand(0),
+ N->getOperand(1), TrueVal, FalseVal, N->getOperand(4));
}
// Construct a SDNode that transforms the SINT or UINT operand to the promoted
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 133831fa76fb..5fbc70fce60d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -118,6 +118,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
+ case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
+
case ISD::AND:
case ISD::OR:
case ISD::XOR:
@@ -138,9 +140,17 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMULO:
case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+ case ISD::ADDE:
+ case ISD::SUBE:
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
+ case ISD::SMULFIX: Res = PromoteIntRes_SMULFIX(N); break;
+
case ISD::ATOMIC_LOAD:
Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
@@ -305,12 +315,45 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
// make us bitcast between two vectors which are legalized in different ways.
if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+ // If the output type is also a vector and widening it to the same size
+ // as the widened input type would be a legal type, we can widen the bitcast
+ // and handle the promotion after.
+ if (NOutVT.isVector()) {
+ unsigned WidenInSize = NInVT.getSizeInBits();
+ unsigned OutSize = OutVT.getSizeInBits();
+ if (WidenInSize % OutSize == 0) {
+ unsigned Scale = WidenInSize / OutSize;
+ EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
+ OutVT.getVectorElementType(),
+ OutVT.getVectorNumElements() * Scale);
+ if (isTypeLegal(WideOutVT)) {
+ InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
+ DAG.getConstant(0, dl, IdxTy));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
+ }
+ }
+ }
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
CreateStackStoreLoad(InOp, OutVT));
}
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
+static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ // If the value won't fit in the prefered type, just use something safe. It
+ // will be legalized when the shift is expanded.
+ if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+ ShiftVT = MVT::i32;
+ return ShiftVT;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0);
@@ -318,10 +361,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(
- ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl,
- TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+ EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl, ShiftVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -331,10 +373,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(
- ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl,
- TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+ EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ return DAG.getNode(ISD::SRL, dl, NVT,
+ DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl, ShiftVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -399,8 +441,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
- N->getOperand(1));
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // If the input also needs to be promoted, do that first so we can get a
+ // get a good idea for the output type.
+ if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue In = GetPromotedInteger(Op0);
+
+ // If the new type is larger than NVT, use it. We probably won't need to
+ // promote it again.
+ EVT SVT = In.getValueType().getScalarType();
+ if (SVT.bitsGE(NVT)) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
+ return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
+ }
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
}
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
@@ -438,6 +498,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ return DAG.getNode(N->getOpcode(), dl, NVT);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -483,11 +550,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
+ SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
- N->getMask(), ExtSrc0, N->getMemoryVT(),
+ N->getMask(), ExtPassThru, N->getMemoryVT(),
N->getMemOperand(), ISD::SEXTLOAD);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -497,12 +564,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
- assert(NVT == ExtSrc0.getValueType() &&
+ SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ assert(NVT == ExtPassThru.getValueType() &&
"Gather result type and the passThru agrument type should be the same");
SDLoc dl(N);
- SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
+ SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
@@ -534,6 +601,61 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
return SDValue(Res.getNode(), 1);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
+ // For promoting iN -> iM, this can be expanded by
+ // 1. ANY_EXTEND iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB]SAT
+ // 4. L/ASHR by M-N
+ SDLoc dl(N);
+ SDValue Op1 = N->getOperand(0);
+ SDValue Op2 = N->getOperand(1);
+ unsigned OldBits = Op1.getScalarValueSizeInBits();
+
+ unsigned Opcode = N->getOpcode();
+ unsigned ShiftOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ ShiftOp = ISD::SRA;
+ break;
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ ShiftOp = ISD::SRL;
+ break;
+ default:
+ llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+ "addition or subtraction");
+ }
+
+ SDValue Op1Promoted = GetPromotedInteger(Op1);
+ SDValue Op2Promoted = GetPromotedInteger(Op2);
+
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned NewBits = PromotedType.getScalarSizeInBits();
+ unsigned SHLAmount = NewBits - OldBits;
+ EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+ SDValue Result =
+ DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) {
+ // Can just promote the operands then continue with operation.
+ SDLoc dl(N);
+ SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ EVT PromotedType = Op1Promoted.getValueType();
+ return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
+ N->getOperand(2));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
@@ -763,6 +885,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
+// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
+// the third operand of ADDE/SUBE nodes is carry flag, which differs from
+// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
@@ -960,6 +1085,13 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
+
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
+
+ case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
+
+ case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -981,9 +1113,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
/// shared among BR_CC, SELECT_CC, and SETCC handlers.
void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
ISD::CondCode CCCode) {
- // We have to insert explicit sign or zero extends. Note that we could
- // insert sign extends for ALL conditions, but zero extend is cheaper on
- // many machines (an AND instead of two shifts), so prefer it.
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions. For those operations where either
+ // zero or sign extension would be valid, use SExtOrZExtPromotedInteger
+ // which will choose the cheapest for the target.
switch (CCCode) {
default: llvm_unreachable("Unknown integer comparison!");
case ISD::SETEQ:
@@ -994,7 +1127,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
// We would prefer to promote the comparison operand with sign extension.
// If the width of OpL/OpR excluding the duplicated sign bits is no greater
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
- // instruction, which is redudant eventually.
+ // instruction, which is redundant eventually.
unsigned OpLEffectiveBits =
OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
unsigned OpREffectiveBits =
@@ -1004,8 +1137,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
NewLHS = OpL;
NewRHS = OpR;
} else {
- NewLHS = ZExtPromotedInteger(NewLHS);
- NewRHS = ZExtPromotedInteger(NewRHS);
+ NewLHS = SExtOrZExtPromotedInteger(NewLHS);
+ NewRHS = SExtOrZExtPromotedInteger(NewRHS);
}
break;
}
@@ -1013,11 +1146,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
case ISD::SETUGT:
case ISD::SETULE:
case ISD::SETULT:
- // ALL of these operations will work if we either sign or zero extend
- // the operands (including the unsigned comparisons!). Zero extend is
- // usually a simpler/cheaper operation, so prefer it.
- NewLHS = ZExtPromotedInteger(NewLHS);
- NewRHS = ZExtPromotedInteger(NewRHS);
+ NewLHS = SExtOrZExtPromotedInteger(NewLHS);
+ NewRHS = SExtOrZExtPromotedInteger(NewRHS);
break;
case ISD::SETGE:
case ISD::SETGT:
@@ -1219,28 +1349,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
SDLoc dl(N);
bool TruncateStore = false;
- if (OpNo == 2) {
- // Mask comes before the data operand. If the data operand is legal, we just
- // promote the mask.
- // When the data operand has illegal type, we should legalize the data
- // operand first. The mask will be promoted/splitted/widened according to
- // the data operand type.
- if (TLI.isTypeLegal(DataVT)) {
- Mask = PromoteTargetBoolean(Mask, DataVT);
- // Update in place.
- SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
- NewOps[2] = Mask;
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
- }
-
- if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
- return PromoteIntOp_MSTORE(N, 3);
- if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
- return WidenVecOp_MSTORE(N, 3);
- assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
- return SplitVecOp_MSTORE(N, 3);
+ if (OpNo == 3) {
+ Mask = PromoteTargetBoolean(Mask, DataVT);
+ // Update in place.
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[3] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
} else { // Data operand
- assert(OpNo == 3 && "Unexpected operand for promotion");
+ assert(OpNo == 1 && "Unexpected operand for promotion");
DataOp = GetPromotedInteger(DataOp);
TruncateStore = true;
}
@@ -1274,14 +1390,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
- SDValue Res = SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
- // updated in place.
- if (Res.getNode() == N)
- return Res;
-
- ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
- ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- return SDValue();
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
@@ -1342,6 +1451,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) {
+ SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+ return SDValue(
+ DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
+ // Promote the RETURNADDR/FRAMEADDR argument to a supported integer width.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1 && "Don't know how to promote this operand!");
+ // Promote the rw, locality, and cache type arguments to a supported integer
+ // width.
+ SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+ SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
+ SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+ Op2, Op3, Op4),
+ 0);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
@@ -1475,6 +1608,12 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
case ISD::UMULO:
case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
+ case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1595,8 +1734,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
- KnownBits Known;
- DAG.computeKnownBits(N->getOperand(1), Known);
+ KnownBits Known = DAG.computeKnownBits(N->getOperand(1));
// If we don't know anything about the high bits, exit.
if (((Known.Zero|Known.One) & HighBitMask) == 0)
@@ -2437,6 +2575,101 @@ void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), R.getValue(2));
}
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Result = TLI.expandAddSubSat(N, DAG);
+ SplitInteger(Result, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ uint64_t Scale = N->getConstantOperandVal(2);
+ if (!Scale) {
+ SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(LHS, LL, LH);
+ GetExpandedInteger(RHS, RL, RH);
+ SmallVector<SDValue, 4> Result;
+
+ if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+ LL, LH, RL, RH)) {
+ report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI.");
+ return;
+ }
+
+ unsigned VTSize = VT.getScalarSizeInBits();
+ unsigned NVTSize = NVT.getScalarSizeInBits();
+ EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+
+ // Shift whole amount by scale.
+ SDValue ResultLL = Result[0];
+ SDValue ResultLH = Result[1];
+ SDValue ResultHL = Result[2];
+ SDValue ResultHH = Result[3];
+
+ // After getting the multplication result in 4 parts, we need to perform a
+ // shift right by the amount of the scale to get the result in that scale.
+ // Let's say we multiply 2 64 bit numbers. The resulting value can be held in
+ // 128 bits that are cut into 4 32-bit parts:
+ //
+ // HH HL LH LL
+ // |---32---|---32---|---32---|---32---|
+ // 128 96 64 32 0
+ //
+ // |------VTSize-----|
+ //
+ // |NVTSize-|
+ //
+ // The resulting Lo and Hi will only need to be one of these 32-bit parts
+ // after shifting.
+ if (Scale < NVTSize) {
+ // If the scale is less than the size of the VT we expand to, the Hi and
+ // Lo of the result will be in the first 2 parts of the result after
+ // shifting right. This only requires shifting by the scale as far as the
+ // third part in the result (ResultHL).
+ SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);
+ SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));
+ Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+ } else if (Scale == NVTSize) {
+ // If the scales are equal, Lo and Hi are ResultLH and Result HL,
+ // respectively. Avoid shifting to prevent undefined behavior.
+ Lo = ResultLH;
+ Hi = ResultHL;
+ } else if (Scale < VTSize) {
+ // If the scale is instead less than the old VT size, but greater than or
+ // equal to the expanded VT size, the first part of the result (ResultLL) is
+ // no longer a part of Lo because it would be scaled out anyway. Instead we
+ // can start shifting right from the fourth part (ResultHH) to the second
+ // part (ResultLH), and Result LH will be the new Lo.
+ SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);
+ SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+ Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
+ } else {
+ llvm_unreachable(
+ "Expected the scale to be less than the width of the operands");
+ }
+}
+
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
SDValue LHS = Node->getOperand(0);
@@ -2705,25 +2938,56 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
EVT VT = N->getValueType(0);
SDLoc dl(N);
- // A divide for UMULO should be faster than a function call.
if (N->getOpcode() == ISD::UMULO) {
+ // This section expands the operation into the following sequence of
+ // instructions. `iNh` here refers to a type which has half the bit width of
+ // the type the original operation operated on.
+ //
+ // %0 = %LHS.HI != 0 && %RHS.HI != 0
+ // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
+ // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
+ // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
+ // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh
+ // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 )
+ //
+ // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 }
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
-
- SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
- SplitInteger(MUL, Lo, Hi);
-
- // A divide for UMULO will be faster than a function call. Select to
- // make sure we aren't using 0.
- SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT),
- RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ);
- SDValue NotZero = DAG.getSelect(dl, VT, isZero,
- DAG.getConstant(1, dl, VT), RHS);
- SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
- SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
- ISD::SETNE);
- Overflow = DAG.getSelect(dl, N->getValueType(1), isZero,
- DAG.getConstant(0, dl, N->getValueType(1)),
- Overflow);
+ SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
+ SplitInteger(LHS, LHSLow, LHSHigh);
+ SplitInteger(RHS, RHSLow, RHSHigh);
+ EVT HalfVT = LHSLow.getValueType()
+ , BitVT = N->getValueType(1);
+ SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT);
+ SDVTList VTFullAddO = DAG.getVTList(VT, BitVT);
+
+ SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
+ SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
+ DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
+ DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
+
+ SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
+ SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
+ One.getValue(0));
+
+ SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
+ SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
+ Two.getValue(0));
+
+ // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
+ // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
+ // operation recursively legalized?).
+ //
+ // Many backends understand this pattern and will convert into LOHI
+ // themselves, if applicable.
+ SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
+ SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh);
+ SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1));
+ SplitInteger(Five, Lo, Hi);
ReplaceValueWith(SDValue(N, 1), Overflow);
return;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 135922d6f267..032000f6cb79 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -281,6 +281,20 @@ private:
return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
}
+ // Get a promoted operand and sign or zero extend it to the final size
+ // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given
+ // subtarget and type, the choice of sign or zero-extension will be
+ // consistent.
+ SDValue SExtOrZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc DL(Op);
+ Op = GetPromotedInteger(Op);
+ if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType());
+ }
+
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
@@ -330,6 +344,9 @@ private:
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
+ SDValue PromoteIntRes_SMULFIX(SDNode *N);
+ SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -360,6 +377,9 @@ private:
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
+ SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SMULFIX(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -414,6 +434,8 @@ private:
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SMULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -650,6 +672,7 @@ private:
SDValue ScalarizeVecRes_BinOp(SDNode *N);
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
@@ -668,6 +691,8 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_SMULFIX(SDNode *N);
+
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
@@ -703,6 +728,8 @@ private:
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -780,6 +807,7 @@ private:
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+ SDValue WidenVecRes_StrictFP(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
@@ -796,6 +824,7 @@ private:
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
@@ -844,9 +873,6 @@ private:
/// MaskVT to ToMaskVT if needed with vector extension or truncation.
SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
- /// Get the target mask VT, and widen if needed.
- EVT getSETCCWidenedResultTy(SDValue SetCC);
-
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index df3134828af5..b9d370441c3e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -327,7 +327,7 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
NumElements >>= 1;
SplitInteger(Op, Parts[0], Parts[1]);
if (DAG.getDataLayout().isBigEndian())
- std::swap(Parts[0], Parts[1]);
+ std::swap(Parts[0], Parts[1]);
IntegerToVector(Parts[0], NumElements, Ops, EltVT);
IntegerToVector(Parts[1], NumElements, Ops, EltVT);
} else {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3a98a7a904cb..4923a529c21b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -86,9 +86,10 @@ class VectorLegalizer {
/// operations to legalize them.
SDValue Expand(SDValue Op);
- /// Implements expansion for FNEG; falls back to UnrollVectorOp if
- /// FSUB isn't legal.
- ///
+ /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
+ /// FP_TO_SINT isn't legal.
+ SDValue ExpandFP_TO_UINT(SDValue Op);
+
/// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
/// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
@@ -116,6 +117,12 @@ class VectorLegalizer {
/// the remaining lanes, finally bitcasting to the proper type.
SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
+ /// Implement expand-based legalization of ABS vector operations.
+ /// If following expanding is legal/custom then do it:
+ /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1))
+ /// else unroll the operation.
+ SDValue ExpandABS(SDValue Op);
+
/// Expand bswap of vectors into a shuffle if legal.
SDValue ExpandBSWAP(SDValue Op);
@@ -128,8 +135,13 @@ class VectorLegalizer {
SDValue ExpandFNEG(SDValue Op);
SDValue ExpandFSUB(SDValue Op);
SDValue ExpandBITREVERSE(SDValue Op);
+ SDValue ExpandCTPOP(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);
- SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
+ SDValue ExpandCTTZ(SDValue Op);
+ SDValue ExpandFunnelShift(SDValue Op);
+ SDValue ExpandROT(SDValue Op);
+ SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
+ SDValue ExpandAddSubSat(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);
/// Implements vector promotion.
@@ -226,7 +238,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
Op.getResNo());
- bool HasVectorValue = false;
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -240,16 +251,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
- if (Lowered == Result)
- return TranslateLegalizeResults(Op, Lowered);
- Changed = true;
- if (Lowered->getNumValues() != Op->getNumValues()) {
- // This expanded to something other than the load. Assume the
- // lowering code took care of any chain values, and just handle the
- // returned value.
- assert(Result.getValue(1).use_empty() &&
- "There are still live users of the old chain!");
- return LegalizeOp(Lowered);
+ assert(Lowered->getNumValues() == Op->getNumValues() &&
+ "Unexpected number of results");
+ if (Lowered != Result) {
+ // Make sure the new code is also legal.
+ Lowered = LegalizeOp(Lowered);
+ Changed = true;
}
return TranslateLegalizeResults(Op, Lowered);
}
@@ -272,7 +279,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom: {
SDValue Lowered = TLI.LowerOperation(Result, DAG);
- Changed = Lowered != Result;
+ if (Lowered != Result) {
+ // Make sure the new code is also legal.
+ Lowered = LegalizeOp(Lowered);
+ Changed = true;
+ }
return TranslateLegalizeResults(Op, Lowered);
}
case TargetLowering::Expand:
@@ -280,9 +291,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return LegalizeOp(ExpandStore(Op));
}
}
- } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
- HasVectorValue = true;
+ }
+ bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
J != E;
++J)
@@ -298,6 +309,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -311,6 +323,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -321,6 +339,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
@@ -338,8 +358,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
+ case ISD::FSHL:
+ case ISD::FSHR:
case ISD::ROTL:
case ISD::ROTR:
+ case ISD::ABS:
case ISD::BSWAP:
case ISD::BITREVERSE:
case ISD::CTLZ:
@@ -361,8 +384,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FABS:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -394,8 +419,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
case ISD::FCANONICALIZE:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
+ case ISD::SMULFIX: {
+ unsigned Scale = Node->getConstantOperandVal(2);
+ Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+ Node->getValueType(0), Scale);
+ break;
+ }
case ISD::FP_ROUND_INREG:
Action = TLI.getOperationAction(Node->getOpcode(),
cast<VTSDNode>(Node->getOperand(1))->getVT());
@@ -405,14 +440,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
- case ISD::MSCATTER:
- Action = TLI.getOperationAction(Node->getOpcode(),
- cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
- break;
- case ISD::MSTORE:
- Action = TLI.getOperationAction(Node->getOpcode(),
- cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
- break;
}
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -720,6 +747,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandVSELECT(Op);
case ISD::SELECT:
return ExpandSELECT(Op);
+ case ISD::FP_TO_UINT:
+ return ExpandFP_TO_UINT(Op);
case ISD::UINT_TO_FP:
return ExpandUINT_TO_FLOAT(Op);
case ISD::FNEG:
@@ -728,17 +757,37 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandFSUB(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
+ case ISD::ABS:
+ return ExpandABS(Op);
case ISD::BITREVERSE:
return ExpandBITREVERSE(Op);
+ case ISD::CTPOP:
+ return ExpandCTPOP(Op);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
return ExpandCTLZ(Op);
+ case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- return ExpandCTTZ_ZERO_UNDEF(Op);
+ return ExpandCTTZ(Op);
+ case ISD::FSHL:
+ case ISD::FSHR:
+ return ExpandFunnelShift(Op);
+ case ISD::ROTL:
+ case ISD::ROTR:
+ return ExpandROT(Op);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ return ExpandFMINNUM_FMAXNUM(Op);
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ return ExpandAddSubSat(Op);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -752,6 +801,12 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
return ExpandStrictFPOp(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
@@ -866,7 +921,7 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
// First build an any-extend node which can be legalized above when we
// recurse through it.
- Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
+ Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
// Now we need sign extend. Do this by shifting the elements. Even if these
// aren't legal operations, they have a better chance of being legalized
@@ -1024,10 +1079,35 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
+SDValue VectorLegalizer::ExpandABS(SDValue Op) {
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandABS(Op.getNode(), Result, DAG))
+ return Result;
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG))
+ return Result;
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
EVT VT = Op.getOperand(0).getValueType();
SDLoc DL(Op);
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
+ return Result;
+
// Make sure that the SINT_TO_FP and SRL instructions are available.
if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
@@ -1086,56 +1166,55 @@ SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandCTPOP(Op.getNode(), Result, DAG))
+ return Result;
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
- EVT VT = Op.getValueType();
- unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+ SDValue Result;
+ if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
+ return Result;
- // If the non-ZERO_UNDEF version is supported we can use that instead.
- if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
- TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
- SDLoc DL(Op);
- return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
- }
+ return DAG.UnrollVectorOp(Op.getNode());
+}
- // If CTPOP is available we can lower with a CTPOP based method:
- // u16 ctlz(u16 x) {
- // x |= (x >> 1);
- // x |= (x >> 2);
- // x |= (x >> 4);
- // x |= (x >> 8);
- // return ctpop(~x);
- // }
- // Ref: "Hacker's Delight" by Henry Warren
- if (isPowerOf2_32(NumBitsPerElt) &&
- TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
- SDLoc DL(Op);
- SDValue Res = Op.getOperand(0);
- EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandCTTZ(Op.getNode(), Result, DAG))
+ return Result;
- for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
- Res = DAG.getNode(
- ISD::OR, DL, VT, Res,
- DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
+ return DAG.UnrollVectorOp(Op.getNode());
+}
- Res = DAG.getNOT(DL, Res, VT);
- return DAG.getNode(ISD::CTPOP, DL, VT, Res);
- }
+SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandFunnelShift(Op.getNode(), Result, DAG))
+ return Result;
- // Otherwise go ahead and unroll.
return DAG.UnrollVectorOp(Op.getNode());
}
-SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
- // If the non-ZERO_UNDEF version is supported we can use that instead.
- if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
- SDLoc DL(Op);
- return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
- }
+SDValue VectorLegalizer::ExpandROT(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandROT(Op.getNode(), Result, DAG))
+ return Result;
- // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
+ if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
+ return Expanded;
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -1183,7 +1262,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
AddLegalizedOperand(Op.getValue(0), Result);
AddLegalizedOperand(Op.getValue(1), NewChain);
- return NewChain;
+ return Op.getResNo() ? NewChain : Result;
}
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index b21249d01ef9..f367e9358576 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -113,13 +113,20 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+
case ISD::FPOW:
case ISD::FREM:
case ISD::FSUB:
@@ -139,6 +146,35 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMA:
R = ScalarizeVecRes_TernaryOp(N);
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ R = ScalarizeVecRes_StrictFPOp(N);
+ break;
+ case ISD::SMULFIX:
+ R = ScalarizeVecRes_SMULFIX(N);
+ break;
}
// If R is null, the sub-method took care of registering the result.
@@ -161,6 +197,44 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
Op0.getValueType(), Op0, Op1, Op2);
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+ Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
+ EVT VT = N->getValueType(0).getVectorElementType();
+ unsigned NumOpers = N->getNumOperands();
+ SDValue Chain = N->getOperand(0);
+ EVT ValueVTs[] = {VT, MVT::Other};
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 4> Opers;
+
+ // The Chain is the first operand.
+ Opers.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOpers; ++i) {
+ SDValue Oper = N->getOperand(i);
+
+ if (Oper.getValueType().isVector())
+ Oper = GetScalarizedVector(Oper);
+
+ Opers.push_back(Oper);
+ }
+
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -731,8 +805,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::FDIV:
@@ -750,6 +824,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
@@ -759,6 +837,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -772,8 +851,17 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FLOG2:
case ISD::STRICT_FRINT:
case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
+ case ISD::SMULFIX:
+ SplitVecRes_SMULFIX(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -811,6 +899,20 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
Op0Hi, Op1Hi, Op2Hi);
}
+void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ SDLoc dl(N);
+ SDValue Op2 = N->getOperand(2);
+
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2);
+}
+
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
@@ -1238,7 +1340,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();
- SDValue Src0 = MLD->getSrc0();
+ SDValue PassThru = MLD->getPassThru();
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
@@ -1259,18 +1361,18 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0Lo, Src0Hi;
- if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Src0, Src0Lo, Src0Hi);
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
else
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
- Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO,
ExtType, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
@@ -1282,7 +1384,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
MLD->getRanges());
- Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
ExtType, MLD->isExpandingLoad());
// Build a factor node to remember that this load is independent of the
@@ -1305,7 +1407,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Mask = MGT->getMask();
- SDValue Src0 = MGT->getValue();
+ SDValue PassThru = MGT->getPassThru();
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
unsigned Alignment = MGT->getOriginalAlignment();
@@ -1322,11 +1424,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
// Split MemoryVT
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0Lo, Src0Hi;
- if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Src0, Src0Lo, Src0Hi);
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
else
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1339,11 +1441,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
+ SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
MMO);
- SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
+ SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
MMO);
@@ -1620,13 +1722,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
- Res = SplitVecOp_TruncateHelper(N);
- else
- Res = SplitVecOp_UnaryOp(N);
- break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
@@ -1634,6 +1729,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
else
Res = SplitVecOp_UnaryOp(N);
break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -1746,10 +1843,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
case ISD::VECREDUCE_FMAX:
- CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN;
+ CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
break;
case ISD::VECREDUCE_FMIN:
- CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN;
+ CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
break;
default:
llvm_unreachable("Unexpected reduce ISD node");
@@ -1860,6 +1957,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Load back the required element.
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+
+ // FIXME: This is to handle i1 vectors with elements promoted to i8.
+ // i1 vector handling needs general improvement.
+ if (N->getValueType(0).bitsLT(EltVT)) {
+ SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0));
+ }
+
return DAG.getExtLoad(
ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
@@ -1886,7 +1992,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
SDValue Mask = MGT->getMask();
- SDValue Src0 = MGT->getValue();
+ SDValue PassThru = MGT->getPassThru();
unsigned Alignment = MGT->getOriginalAlignment();
SDValue MaskLo, MaskHi;
@@ -1900,11 +2006,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0Lo, Src0Hi;
- if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Src0, Src0Lo, Src0Hi);
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
else
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1917,7 +2023,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
+ SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
OpsLo, MMO);
@@ -1927,7 +2033,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
Alignment, MGT->getAAInfo(),
MGT->getRanges());
- SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
+ SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
OpsHi, MMO);
@@ -2164,16 +2270,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
unsigned InElementSize = InVT.getScalarSizeInBits();
unsigned OutElementSize = OutVT.getScalarSizeInBits();
+ // Determine the split output VT. If its legal we can just split dirctly.
+ EVT LoOutVT, HiOutVT;
+ std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
+ assert(LoOutVT == HiOutVT && "Unequal split?");
+
// If the input elements are only 1/2 the width of the result elements,
// just use the normal splitting. Our trick only work if there's room
// to split more than once.
- if (InElementSize <= OutElementSize * 2)
+ if (isTypeLegal(LoOutVT) ||
+ InElementSize <= OutElementSize * 2)
return SplitVecOp_UnaryOp(N);
SDLoc DL(N);
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = InVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext());
+
+ if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector)
+ return SplitVecOp_UnaryOp(N);
+
// Get the split input vector.
SDValue InLoVec, InHiVec;
GetSplitVector(InVec, InLoVec, InHiVec);
+
// Truncate them to 1/2 the element size.
EVT HalfElementVT = IsFloat ?
EVT::getFloatingPointVT(InElementSize/2) :
@@ -2298,12 +2419,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
Res = WidenVecRes_Binary(N);
break;
@@ -2320,6 +2445,33 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ Res = WidenVecRes_StrictFP(N);
+ break;
+
case ISD::FCOPYSIGN:
Res = WidenVecRes_FCOPYSIGN(N);
break;
@@ -2353,11 +2505,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
- case ISD::BITREVERSE:
- case ISD::BSWAP:
- case ISD::CTLZ:
- case ISD::CTPOP:
- case ISD::CTTZ:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -2368,12 +2515,37 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
- case ISD::FNEG:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
- case ISD::FTRUNC:
+ case ISD::FTRUNC: {
+ // We're going to widen this vector op to a legal type by padding with undef
+ // elements. If the wide vector op is eventually going to be expanded to
+ // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
+ // libcalls on the undef elements. We are assuming that if the scalar op
+ // requires expanding, then the vector op needs expanding too.
+ EVT VT = N->getValueType(0);
+ if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ "Target supports vector op, but scalar requires expansion?");
+ Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ break;
+ }
+ }
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those like
+ // any other unary ops.
+ LLVM_FALLTHROUGH;
+
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FNEG:
+ case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
break;
case ISD::FMA:
@@ -2405,6 +2577,88 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
+// Given a vector of operations that have been broken up to widen, see
+// if we can collect them together into the next widest legal VT. This
+// implementation is trap-safe.
+static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
+ SmallVectorImpl<SDValue> &ConcatOps,
+ unsigned ConcatEnd, EVT VT, EVT MaxVT,
+ EVT WidenVT) {
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ SDLoc dl(ConcatOps[0]);
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ int Idx = 0;
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, SubConcatOps);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(ConcatOps.data(), NumOps));
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// Binary op widening for operations that can trap.
unsigned Opcode = N->getOpcode();
@@ -2477,75 +2731,119 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
}
}
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
+ return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
+ // StrictFP op widening for operations that can trap.
+ unsigned NumOpers = N->getNumOperands();
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
- // while (Some element of ConcatOps is not of type MaxVT) {
- // From the end of ConcatOps, collect elements of the same type and put
- // them into an op of the next larger supported type
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SmallVector<SDValue, 4> InOps;
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ SmallVector<SDValue, 16> Chains;
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // The Chain is the first operand.
+ InOps.push_back(N->getOperand(0));
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOpers; ++i) {
+ SDValue Oper = N->getOperand(i);
+
+ if (Oper.getValueType().isVector()) {
+ assert(Oper.getValueType() == N->getValueType(0) &&
+ "Invalid operand type to widen!");
+ Oper = GetWidenedVector(Oper);
+ }
+
+ InOps.push_back(Oper);
+ }
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
// }
- while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
- Idx = ConcatEnd - 1;
- VT = ConcatOps[Idx--].getValueType();
- while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
- Idx--;
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SmallVector<SDValue, 4> EOps;
+
+ for (unsigned i = 0; i < NumOpers; ++i) {
+ SDValue Op = InOps[i];
+
+ if (Op.getValueType().isVector())
+ Op = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
- EVT NextVT;
+ EOps.push_back(Op);
+ }
+
+ EVT OperVT[] = {VT, MVT::Other};
+ SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps);
+ ConcatOps[ConcatEnd++] = Oper;
+ Chains.push_back(Oper.getValue(1));
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
do {
- NextSize *= 2;
- NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeLegal(NextVT));
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
- if (!VT.isVector()) {
- // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
- SDValue VecOp = DAG.getUNDEF(NextVT);
- unsigned NumToInsert = ConcatEnd - Idx - 1;
- for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(
- ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SmallVector<SDValue, 4> EOps;
+
+ for (unsigned i = 0; i < NumOpers; ++i) {
+ SDValue Op = InOps[i];
+
+ if (Op.getValueType().isVector())
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
+ DAG.getConstant(Idx, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ EOps.push_back(Op);
+ }
+
+ EVT WidenVT[] = {WidenEltVT, MVT::Other};
+ SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps);
+ ConcatOps[ConcatEnd++] = Oper;
+ Chains.push_back(Oper.getValue(1));
}
- ConcatOps[Idx+1] = VecOp;
- ConcatEnd = Idx + 2;
- } else {
- // Vector type, create a CONCAT_VECTORS of type NextVT
- SDValue undefVec = DAG.getUNDEF(VT);
- unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
- SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
- unsigned RealVals = ConcatEnd - Idx - 1;
- unsigned SubConcatEnd = 0;
- unsigned SubConcatIdx = Idx + 1;
- while (SubConcatEnd < RealVals)
- SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
- while (SubConcatEnd < OpsToConcat)
- SubConcatOps[SubConcatEnd++] = undefVec;
- ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
- NextVT, SubConcatOps);
- ConcatEnd = SubConcatIdx + 1;
+ CurNumElts = 0;
}
}
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
+ // Build a factor node to remember all the Ops that have been created.
+ SDValue NewChain;
+ if (Chains.size() == 1)
+ NewChain = Chains[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
- // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
- unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
- if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(MaxVT);
- for (unsigned j = ConcatEnd; j < NumOps; ++j)
- ConcatOps[j] = UndefVal;
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- makeArrayRef(ConcatOps.data(), NumOps));
+ return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -2575,10 +2873,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// If both input and result vector types are of same width, extend
// operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
// accepts fewer elements in the result than in the input.
+ if (Opcode == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
if (Opcode == ISD::SIGN_EXTEND)
- return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
if (Opcode == ISD::ZERO_EXTEND)
- return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
}
}
@@ -2591,11 +2891,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (WidenNumElts % InVTNumElts == 0) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat = WidenNumElts/InVTNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = InOp;
- SDValue UndefVal = DAG.getUNDEF(InVT);
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
@@ -2614,11 +2911,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
// Otherwise unroll into some nasty scalar code and rebuild the vector.
- SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = WidenVT.getVectorElementType();
- unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
- unsigned i;
- for (i=0; i < MinElts; ++i) {
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
SDValue Val = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -2628,10 +2926,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; i < WidenNumElts; ++i)
- Ops[i] = UndefVal;
-
return DAG.getBuildVector(WidenVT, DL, Ops);
}
@@ -2654,11 +2948,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
switch (Opcode) {
case ISD::ANY_EXTEND_VECTOR_INREG:
- return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT);
case ISD::SIGN_EXTEND_VECTOR_INREG:
- return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
case ISD::ZERO_EXTEND_VECTOR_INREG:
- return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
}
}
}
@@ -2810,22 +3102,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
}
if (TLI.isTypeLegal(NewInVT)) {
- // Because the result and the input are different vector types, widening
- // the result could create a legal type but widening the input might make
- // it an illegal type that might lead to repeatedly splitting the input
- // and then widening it. To avoid this, we widen the input only if
- // it results in a legal type.
- SmallVector<SDValue, 16> Ops(NewNumElts);
- SDValue UndefVal = DAG.getUNDEF(InVT);
- Ops[0] = InOp;
- for (unsigned i = 1; i < NewNumElts; ++i)
- Ops[i] = UndefVal;
-
SDValue NewVec;
- if (InVT.isVector())
+ if (InVT.isVector()) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT));
+ Ops[0] = InOp;
+
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
- else
- NewVec = DAG.getBuildVector(NewInVT, dl, Ops);
+ } else {
+ NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
+ }
return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
@@ -3003,7 +3293,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
- SDValue Src0 = GetWidenedVector(N->getSrc0());
+ SDValue PassThru = GetWidenedVector(N->getPassThru());
ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);
@@ -3014,9 +3304,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
Mask = ModifyToType(Mask, WideMaskVT, true);
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
- Mask, Src0, N->getMemoryVT(),
+ Mask, PassThru, N->getMemoryVT(),
N->getMemOperand(), ExtType,
- N->isExpandingLoad());
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3028,7 +3318,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
- SDValue Src0 = GetWidenedVector(N->getValue());
+ SDValue PassThru = GetWidenedVector(N->getPassThru());
SDValue Scale = N->getScale();
unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
@@ -3045,7 +3335,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Index.getValueType().getScalarType(),
NumElts);
Index = ModifyToType(Index, WideIndexVT);
- SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale };
+ SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
+ Scale };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand());
@@ -3155,16 +3446,6 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
return Mask;
}
-// Get the target mask VT, and widen if needed.
-EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) {
- assert(SetCC->getOpcode() == ISD::SETCC);
- LLVMContext &Ctx = *DAG.getContext();
- EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType());
- if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT);
- return MaskVT;
-}
-
// This method tries to handle VSELECT and its mask by legalizing operands
// (which may require widening) and if needed adjusting the mask vector type
// to match that of the VSELECT. Without it, many cases end up with
@@ -3232,7 +3513,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
SDValue Mask;
if (Cond->getOpcode() == ISD::SETCC) {
- EVT MaskVT = getSETCCWidenedResultTy(Cond);
+ EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType());
Mask = convertMask(Cond, MaskVT, ToMaskVT);
} else if (isLogicalMaskOp(Cond->getOpcode()) &&
Cond->getOperand(0).getOpcode() == ISD::SETCC &&
@@ -3240,8 +3521,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
// Cond is (AND/OR/XOR (SETCC, SETCC))
SDValue SETCC0 = Cond->getOperand(0);
SDValue SETCC1 = Cond->getOperand(1);
- EVT VT0 = getSETCCWidenedResultTy(SETCC0);
- EVT VT1 = getSETCCWidenedResultTy(SETCC1);
+ EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType());
+ EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType());
unsigned ScalarBits0 = VT0.getScalarSizeInBits();
unsigned ScalarBits1 = VT1.getScalarSizeInBits();
unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
@@ -3414,6 +3695,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
+ case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
@@ -3503,11 +3785,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
default:
llvm_unreachable("Extend legalization on extend operation!");
case ISD::ANY_EXTEND:
- return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
+ return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp);
case ISD::SIGN_EXTEND:
- return DAG.getSignExtendVectorInReg(InOp, DL, VT);
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp);
case ISD::ZERO_EXTEND:
- return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp);
}
}
@@ -3537,8 +3819,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
InVT.getVectorNumElements());
if (TLI.isTypeLegal(WideVT)) {
SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
- DAG.getIntPtrConstant(0, dl));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
EVT InEltVT = InVT.getVectorElementType();
@@ -3580,20 +3863,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
- // If the input vector is not legal, it is likely that we will not find a
- // legal vector of the same size. Replace the concatenate vector with a
- // nasty build vector.
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
+ EVT InVT = N->getOperand(0).getValueType();
SDLoc dl(N);
+
+ // If the widen width for this operand is the same as the width of the concat
+ // and all but the first operand is undef, just use the widened operand.
+ unsigned NumOperands = N->getNumOperands();
+ if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ unsigned i;
+ for (i = 1; i < NumOperands; ++i)
+ if (!N->getOperand(i).isUndef())
+ break;
+
+ if (i == NumOperands)
+ return GetWidenedVector(N->getOperand(0));
+ }
+
+ // Otherwise, fall back to a nasty build vector.
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(NumElts);
- EVT InVT = N->getOperand(0).getValueType();
unsigned NumInElts = InVT.getVectorNumElements();
unsigned Idx = 0;
- unsigned NumOperands = N->getNumOperands();
for (unsigned i=0; i < NumOperands; ++i) {
SDValue InOp = N->getOperand(i);
assert(getTypeAction(InOp.getValueType()) ==
@@ -3641,7 +3935,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
- assert((OpNo == 2 || OpNo == 3) &&
+ assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of mstore");
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
@@ -3649,8 +3943,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
SDValue StVal = MST->getValue();
SDLoc dl(N);
- if (OpNo == 3) {
- // Widen the value
+ if (OpNo == 1) {
+ // Widen the value.
StVal = GetWidenedVector(StVal);
// The mask should be widened as well.
@@ -3660,18 +3954,15 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
WideVT.getVectorNumElements());
Mask = ModifyToType(Mask, WideMaskVT, true);
} else {
+ // Widen the mask.
EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
Mask = ModifyToType(Mask, WideMaskVT, true);
EVT ValueVT = StVal.getValueType();
- if (getTypeAction(ValueVT) == TargetLowering::TypeWidenVector)
- StVal = GetWidenedVector(StVal);
- else {
- EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
- ValueVT.getVectorElementType(),
- WideMaskVT.getVectorNumElements());
- StVal = ModifyToType(StVal, WideVT);
- }
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(),
+ WideMaskVT.getVectorNumElements());
+ StVal = ModifyToType(StVal, WideVT);
}
assert(Mask.getValueType().getVectorNumElements() ==
@@ -3682,36 +3973,59 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
false, MST->isCompressingStore());
}
+SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 4 && "Can widen only the index of mgather");
+ auto *MG = cast<MaskedGatherSDNode>(N);
+ SDValue DataOp = MG->getPassThru();
+ SDValue Mask = MG->getMask();
+ SDValue Scale = MG->getScale();
+
+ // Just widen the index. It's allowed to have extra elements.
+ SDValue Index = GetWidenedVector(MG->getIndex());
+
+ SDLoc dl(N);
+ SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
+ Scale};
+ SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
+ MG->getMemOperand());
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ return SDValue();
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
- assert(OpNo == 1 && "Can widen only data operand of mscatter");
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue DataOp = MSC->getValue();
SDValue Mask = MSC->getMask();
- EVT MaskVT = Mask.getValueType();
+ SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
- // Widen the value.
- SDValue WideVal = GetWidenedVector(DataOp);
- EVT WideVT = WideVal.getValueType();
- unsigned NumElts = WideVT.getVectorNumElements();
- SDLoc dl(N);
+ unsigned NumElts;
+ if (OpNo == 1) {
+ DataOp = GetWidenedVector(DataOp);
+ NumElts = DataOp.getValueType().getVectorNumElements();
- // The mask should be widened as well.
- EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
- MaskVT.getVectorElementType(), NumElts);
- Mask = ModifyToType(Mask, WideMaskVT, true);
+ // Widen index.
+ EVT IndexVT = Index.getValueType();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ IndexVT.getVectorElementType(), NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
- // Widen index.
- SDValue Index = MSC->getIndex();
- EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
- Index.getValueType().getScalarType(),
- NumElts);
- Index = ModifyToType(Index, WideIndexVT);
+ // The mask should be widened as well.
+ EVT MaskVT = Mask.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(), NumElts);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+ } else if (OpNo == 4) {
+ // Just widen the index. It's allowed to have extra elements.
+ Index = GetWidenedVector(Index);
+ } else
+ llvm_unreachable("Can't widen this operand of mscatter");
- SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index,
+ SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- MSC->getMemoryVT(), dl, Ops,
+ MSC->getMemoryVT(), SDLoc(N), Ops,
MSC->getMemOperand());
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 7e6b57426338..f7566b246f32 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -24,6 +24,7 @@ class DIVariable;
class DIExpression;
class SDNode;
class Value;
+class raw_ostream;
/// Holds the information from a dbg_value node through SDISel.
/// We do not use SDValue here to avoid including its header.
@@ -52,6 +53,7 @@ private:
enum DbgValueKind kind;
bool IsIndirect;
bool Invalid = false;
+ bool Emitted = false;
public:
/// Constructor for non-constants.
@@ -124,6 +126,17 @@ public:
/// deleted.
void setIsInvalidated() { Invalid = true; }
bool isInvalidated() const { return Invalid; }
+
+ /// setIsEmitted / isEmitted - Getter/Setter for flag indicating that this
+ /// SDDbgValue has been emitted to an MBB.
+ void setIsEmitted() { Emitted = true; }
+ bool isEmitted() const { return Emitted; }
+
+ /// clearIsEmitted - Reset Emitted flag, for certain special cases where
+ /// dbg.addr is emitted twice.
+ void clearIsEmitted() { Emitted = false; }
+
+ LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
};
/// Holds the information from a dbg_label node through SDISel.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 3944d7df286d..90e109b022fd 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -125,8 +125,7 @@ void ScheduleDAGFast::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
- .dumpAll(this));
+ LLVM_DEBUG(dump());
// Execute the actual scheduling loop.
ListScheduleBottomUp();
@@ -144,7 +143,7 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- PredSU->dump(this);
+ dumpNode(*PredSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -182,7 +181,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
/// the Available queue.
void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
SU->setHeightToAtLeast(CurCycle);
@@ -777,11 +776,9 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (N->getHasDebugValue()) {
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
for (auto DV : DAG->GetDbgValues(N)) {
- if (DV->isInvalidated())
- continue;
- if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
- BB->insert(InsertPos, DbgMI);
- DV->setIsInvalidated();
+ if (!DV->isEmitted())
+ if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
+ BB->insert(InsertPos, DbgMI);
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 43e8ffd3839c..8d75b8133a30 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -365,7 +365,7 @@ void ScheduleDAGRRList::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
+ LLVM_DEBUG(dump());
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
@@ -396,7 +396,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- PredSU->dump(this);
+ dumpNode(*PredSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -729,7 +729,7 @@ static void resetVRegCycle(SUnit *SU);
/// the Available queue.
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
@@ -828,7 +828,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
/// its predecessor states to reflect the change.
void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
for (SDep &Pred : SU->Preds) {
CapturePred(&Pred);
@@ -1130,7 +1130,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return nullptr;
LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
if (N->getGluedNode() &&
!TII->canCopyGluedNodeDuringSchedule(N)) {
@@ -1888,7 +1888,7 @@ public:
while (!DumpQueue.empty()) {
SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
dbgs() << "Height " << SU->getHeight() << ": ";
- SU->dump(DAG);
+ DAG->dumpNode(*SU);
}
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 430d8fb34476..e258f0a218a5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -145,20 +145,18 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
Ops.push_back(ExtraOper);
SDVTList VTList = DAG->getVTList(VTs);
- MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr;
MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
// Store memory references.
- if (MN) {
- Begin = MN->memoperands_begin();
- End = MN->memoperands_end();
- }
+ SmallVector<MachineMemOperand *, 2> MMOs;
+ if (MN)
+ MMOs.assign(MN->memoperands_begin(), MN->memoperands_end());
DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
// Reset the memory references
if (MN)
- MN->setMemRefs(Begin, End);
+ DAG->setNodeMemRefs(MN, MMOs);
}
static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
@@ -244,7 +242,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
return;
// Sort them in increasing order.
- llvm::sort(Offsets.begin(), Offsets.end());
+ llvm::sort(Offsets);
// Check if the loads are close enough.
SmallVector<SDNode*, 4> Loads;
@@ -650,18 +648,20 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
dep.setLatency(Latency);
}
-void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
- // Cannot completely remove virtual function even in release mode.
+void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- if (!SU->getNode()) {
+ dumpNodeName(SU);
+ dbgs() << ": ";
+
+ if (!SU.getNode()) {
dbgs() << "PHYS REG COPY\n";
return;
}
- SU->getNode()->dump(DAG);
+ SU.getNode()->dump(DAG);
dbgs() << "\n";
SmallVector<SDNode *, 4> GluedNodes;
- for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode())
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
dbgs() << " ";
@@ -672,11 +672,22 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
#endif
}
+void ScheduleDAGSDNodes::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getNode() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits)
+ dumpNodeAll(SU);
+ if (ExitSU.getNode() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScheduleDAGSDNodes::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
- SU->dump(this);
+ dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
}
@@ -711,7 +722,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
for (auto DV : DAG->GetDbgValues(N)) {
- if (DV->isInvalidated())
+ if (DV->isEmitted())
continue;
unsigned DVOrder = DV->getOrder();
if (!Order || DVOrder == Order) {
@@ -720,7 +731,6 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
Orders.push_back({DVOrder, DbgMI});
BB->insert(InsertPos, DbgMI);
}
- DV->setIsInvalidated();
}
}
}
@@ -811,8 +821,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
for (; PDI != PDE; ++PDI) {
MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
- if (DbgMI)
+ if (DbgMI) {
BB->insert(InsertPos, DbgMI);
+ // We re-emit the dbg_value closer to its use, too, after instructions
+ // are emitted to the BB.
+ (*PDI)->clearIsEmitted();
+ }
}
}
@@ -878,7 +892,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
for (; DI != DE; ++DI) {
if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
break;
- if ((*DI)->isInvalidated())
+ if ((*DI)->isEmitted())
continue;
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
@@ -900,7 +914,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// some of them before one or more conditional branches?
SmallVector<MachineInstr*, 8> DbgMIs;
for (; DI != DE; ++DI) {
- if ((*DI)->isInvalidated())
+ if ((*DI)->isEmitted())
continue;
assert((*DI)->getOrder() >= LastOrder &&
"emitting DBG_VALUE out of order");
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 6417e16bd0fd..3fa7ad895725 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -122,8 +122,8 @@ class InstrItineraryData;
virtual MachineBasicBlock*
EmitSchedule(MachineBasicBlock::iterator &InsertPos);
- void dumpNode(const SUnit *SU) const override;
-
+ void dumpNode(const SUnit &SU) const override;
+ void dump() const override;
void dumpSchedule() const;
std::string getGraphNodeLabel(const SUnit *SU) const override;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 84055f8ecc1a..416061475b1a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -118,7 +118,7 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
+ dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -152,7 +152,7 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
/// the Available queue.
void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- LLVM_DEBUG(SU->dump(this));
+ LLVM_DEBUG(dumpNode(*SU));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9e38e675d13a..647496c1afcb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -87,6 +87,8 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+void SelectionDAG::DAGNodeDeletedListener::anchor() {}
+
#define DEBUG_TYPE "selectiondag"
static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
@@ -269,15 +271,24 @@ bool ISD::allOperandsUndef(const SDNode *N) {
}
bool ISD::matchUnaryPredicate(SDValue Op,
- std::function<bool(ConstantSDNode *)> Match) {
+ std::function<bool(ConstantSDNode *)> Match,
+ bool AllowUndefs) {
+ // FIXME: Add support for scalar UNDEF cases?
if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
return Match(Cst);
+ // FIXME: Add support for vector UNDEF cases?
if (ISD::BUILD_VECTOR != Op.getOpcode())
return false;
EVT SVT = Op.getValueType().getScalarType();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ if (AllowUndefs && Op.getOperand(i).isUndef()) {
+ if (!Match(nullptr))
+ return false;
+ continue;
+ }
+
auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
return false;
@@ -287,26 +298,33 @@ bool ISD::matchUnaryPredicate(SDValue Op,
bool ISD::matchBinaryPredicate(
SDValue LHS, SDValue RHS,
- std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
+ bool AllowUndefs) {
if (LHS.getValueType() != RHS.getValueType())
return false;
+ // TODO: Add support for scalar UNDEF cases?
if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
return Match(LHSCst, RHSCst);
+ // TODO: Add support for vector UNDEF cases?
if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
ISD::BUILD_VECTOR != RHS.getOpcode())
return false;
EVT SVT = LHS.getValueType().getScalarType();
for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
- auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
- auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
- if (!LHSCst || !RHSCst)
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ bool LHSUndef = AllowUndefs && LHSOp.isUndef();
+ bool RHSUndef = AllowUndefs && RHSOp.isUndef();
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp);
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);
+ if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))
return false;
- if (LHSCst->getValueType(0) != SVT ||
- LHSCst->getValueType(0) != RHSCst->getValueType(0))
+ if (LHSOp.getValueType() != SVT ||
+ LHSOp.getValueType() != RHSOp.getValueType())
return false;
if (!Match(LHSCst, RHSCst))
return false;
@@ -984,7 +1002,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE,
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- DivergenceAnalysis * Divergence) {
+ LegacyDivergenceAnalysis * Divergence) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
@@ -1118,39 +1136,6 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
getConstant(Imm, DL, Op.getValueType()));
}
-SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
- EVT VT) {
- assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
- "The sizes of the input and result must match in order to perform the "
- "extend in-register.");
- assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
- "The destination vector type must have fewer lanes than the input.");
- return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
-SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
- EVT VT) {
- assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
- "The sizes of the input and result must match in order to perform the "
- "extend in-register.");
- assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
- "The destination vector type must have fewer lanes than the input.");
- return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
-SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
- EVT VT) {
- assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
- "The sizes of the input and result must match in order to perform the "
- "extend in-register.");
- assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
- "The destination vector type must have fewer lanes than the input.");
- return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
@@ -1718,7 +1703,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
// SDNode doesn't have access to it. This memory will be "leaked" when
// the node is deallocated, but recovered when the NodeAllocator is released.
int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
- std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc);
+ llvm::copy(MaskVec, MaskAlloc);
auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
dl.getDebugLoc(), MaskAlloc);
@@ -2135,6 +2120,15 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
break;
}
+ case ISD::SIGN_EXTEND_INREG:
+ EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT();
+ unsigned ExVTBits = ExVT.getScalarSizeInBits();
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (Mask.getActiveBits() <= ExVTBits)
+ return V.getOperand(0);
+
+ break;
}
return SDValue();
}
@@ -2151,9 +2145,103 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
/// for bits that V cannot have.
bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
unsigned Depth) const {
- KnownBits Known;
- computeKnownBits(Op, Known, Depth);
- return Mask.isSubsetOf(Known.Zero);
+ return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero);
+}
+
+/// isSplatValue - Return true if the vector V has the same value
+/// across all DemandedElts.
+bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
+ APInt &UndefElts) {
+ if (!DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
+ EVT VT = V.getValueType();
+ assert(VT.isVector() && "Vector type expected");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
+ UndefElts = APInt::getNullValue(NumElts);
+
+ switch (V.getOpcode()) {
+ case ISD::BUILD_VECTOR: {
+ SDValue Scl;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = V.getOperand(i);
+ if (Op.isUndef()) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (!DemandedElts[i])
+ continue;
+ if (Scl && Scl != Op)
+ return false;
+ Scl = Op;
+ }
+ return true;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO: Do we need to handle shuffle(splat, undef, mask)?
+ int SplatIndex = -1;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (!DemandedElts[i])
+ continue;
+ if (0 <= SplatIndex && SplatIndex != M)
+ return false;
+ SplatIndex = M;
+ }
+ return true;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Src = V.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt UndefSrcElts;
+ APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) {
+ UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
+ return true;
+ }
+ }
+ break;
+ }
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND: {
+ APInt UndefLHS, UndefRHS;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
+ isSplatValue(RHS, DemandedElts, UndefRHS)) {
+ UndefElts = UndefLHS | UndefRHS;
+ return true;
+ }
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// Helper wrapper to main isSplatValue function.
+bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
+ EVT VT = V.getValueType();
+ assert(VT.isVector() && "Vector type expected");
+ unsigned NumElts = VT.getVectorNumElements();
+
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return isSplatValue(V, DemandedElts, UndefElts) &&
+ (AllowUndefs || !UndefElts);
}
/// Helper function that checks to see if a node is a constant or a
@@ -2195,60 +2283,59 @@ static const APInt *getValidShiftAmountConstant(SDValue V) {
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
/// every vector element.
-void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
- unsigned Depth) const {
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
- computeKnownBits(Op, Known, DemandedElts, Depth);
+ return computeKnownBits(Op, DemandedElts, Depth);
}
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. The DemandedElts argument allows us to only collect the known
/// bits that are shared by the requested vector elements.
-void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
- const APInt &DemandedElts,
- unsigned Depth) const {
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- Known = KnownBits(BitWidth); // Don't know anything.
+ KnownBits Known(BitWidth); // Don't know anything.
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
Known.One = C->getAPIntValue();
Known.Zero = ~Known.One;
- return;
+ return Known;
}
if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
// We know all of the bits for a constant fp!
Known.One = C->getValueAPF().bitcastToAPInt();
Known.Zero = ~Known.One;
- return;
+ return Known;
}
if (Depth == 6)
- return; // Limit search depth.
+ return Known; // Limit search depth.
KnownBits Known2;
unsigned NumElts = DemandedElts.getBitWidth();
+ assert((!Op.getValueType().isVector() ||
+ NumElts == Op.getValueType().getVectorNumElements()) &&
+ "Unexpected vector size");
if (!DemandedElts)
- return; // No demanded elts, better to assume we don't know anything.
+ return Known; // No demanded elts, better to assume we don't know anything.
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded vector element.
- assert(NumElts == Op.getValueType().getVectorNumElements() &&
- "Unexpected vector size");
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
if (!DemandedElts[i])
continue;
SDValue SrcOp = Op.getOperand(i);
- computeKnownBits(SrcOp, Known2, Depth + 1);
+ Known2 = computeKnownBits(SrcOp, Depth + 1);
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
if (SrcOp.getValueSizeInBits() != BitWidth) {
@@ -2295,7 +2382,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// Known bits are the values that are shared by every demanded element.
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
- computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1);
+ Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -2304,7 +2391,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
- computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1);
+ Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -2321,7 +2408,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
DemandedSub = DemandedSub.trunc(NumSubVectorElts);
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
- computeKnownBits(Sub, Known2, DemandedSub, Depth + 1);
+ Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -2344,22 +2431,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
uint64_t Idx = SubIdx->getZExtValue();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
if (!!DemandedSubElts) {
- computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1);
+ Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
if (Known.isUnknown())
break; // early-out.
}
APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
APInt DemandedSrcElts = DemandedElts & ~SubMask;
if (!!DemandedSrcElts) {
- computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1);
+ Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
} else {
- computeKnownBits(Sub, Known, Depth + 1);
+ Known = computeKnownBits(Sub, Depth + 1);
if (Known.isUnknown())
break; // early-out.
- computeKnownBits(Src, Known2, Depth + 1);
+ Known2 = computeKnownBits(Src, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -2375,12 +2462,25 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- computeKnownBits(Src, Known, DemandedSrc, Depth + 1);
+ Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
} else {
- computeKnownBits(Src, Known, Depth + 1);
+ Known = computeKnownBits(Src, Depth + 1);
}
break;
}
+ case ISD::SCALAR_TO_VECTOR: {
+ // We know about scalar_to_vector as much as we know about it source,
+ // which becomes the first element of otherwise unknown vector.
+ if (DemandedElts != 1)
+ break;
+
+ SDValue N0 = Op.getOperand(0);
+ Known = computeKnownBits(N0, Depth + 1);
+ if (N0.getValueSizeInBits() != BitWidth)
+ Known = Known.trunc(BitWidth);
+
+ break;
+ }
case ISD::BITCAST: {
SDValue N0 = Op.getOperand(0);
EVT SubVT = N0.getValueType();
@@ -2392,7 +2492,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// Fast handling of 'identity' bitcasts.
if (BitWidth == SubBitWidth) {
- computeKnownBits(N0, Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(N0, DemandedElts, Depth + 1);
break;
}
@@ -2413,7 +2513,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
SubDemandedElts.setBit(i * SubScale);
for (unsigned i = 0; i != SubScale; ++i) {
- computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
+ Known2 = computeKnownBits(N0, SubDemandedElts.shl(i),
Depth + 1);
unsigned Shifts = IsLE ? i : SubScale - 1 - i;
Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
@@ -2434,7 +2534,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
if (DemandedElts[i])
SubDemandedElts.setBit(i / SubScale);
- computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1);
+ Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i)
@@ -2452,8 +2552,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// Output known-1 bits are only known if set in both the LHS & RHS.
Known.One &= Known2.One;
@@ -2461,8 +2561,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero |= Known2.Zero;
break;
case ISD::OR:
- computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// Output known-0 bits are only known if clear in both the LHS & RHS.
Known.Zero &= Known2.Zero;
@@ -2470,8 +2570,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.One |= Known2.One;
break;
case ISD::XOR: {
- computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
@@ -2481,8 +2581,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::MUL: {
- computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If low bits are zero in either operand, output low known-0 bits.
// Also compute a conservative estimate for high known-0 bits.
@@ -2503,10 +2603,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned LeadZ = Known2.countMinLeadingZeros();
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
if (RHSMaxLeadingZeros != BitWidth)
LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
@@ -2516,22 +2616,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::SELECT:
case ISD::VSELECT:
- computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1);
+ Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
- computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1);
+ Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
- computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1);
+ Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
@@ -2560,7 +2660,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
Known.Zero <<= Shift;
Known.One <<= Shift;
@@ -2570,7 +2670,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SRL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
Known.Zero.lshrInPlace(Shift);
Known.One.lshrInPlace(Shift);
@@ -2599,13 +2699,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SRA:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
// Sign extend known zero/one bit (else is unknown).
Known.Zero.ashrInPlace(Shift);
Known.One.ashrInPlace(Shift);
}
break;
+ case ISD::FSHL:
+ case ISD::FSHR:
+ if (ConstantSDNode *C =
+ isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
+ unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+ // For fshl, 0-shift returns the 1st arg.
+ // For fshr, 0-shift returns the 2nd arg.
+ if (Amt == 0) {
+ Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1),
+ DemandedElts, Depth + 1);
+ break;
+ }
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Opcode == ISD::FSHL) {
+ Known.One <<= Amt;
+ Known.Zero <<= Amt;
+ Known2.One.lshrInPlace(BitWidth - Amt);
+ Known2.Zero.lshrInPlace(BitWidth - Amt);
+ } else {
+ Known.One <<= BitWidth - Amt;
+ Known.Zero <<= BitWidth - Amt;
+ Known2.One.lshrInPlace(Amt);
+ Known2.Zero.lshrInPlace(Amt);
+ }
+ Known.One |= Known2.One;
+ Known.Zero |= Known2.Zero;
+ }
+ break;
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned EBits = EVT.getScalarSizeInBits();
@@ -2623,7 +2756,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
if (NewBits.getBoolValue())
InputDemandedBits |= InSignMask;
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.One &= InputDemandedBits;
Known.Zero &= InputDemandedBits;
@@ -2643,7 +2776,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleTZ = Known2.countMaxTrailingZeros();
unsigned LowBits = Log2_32(PossibleTZ) + 1;
@@ -2652,7 +2785,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleLZ = Known2.countMaxLeadingZeros();
unsigned LowBits = Log2_32(PossibleLZ) + 1;
@@ -2660,7 +2793,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::CTPOP: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we know some of the bits are zero, they can't be one.
unsigned PossibleOnes = Known2.countMaxPopulation();
Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
@@ -2681,41 +2814,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
- computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1);
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
break;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
break;
}
- // TODO ISD::SIGN_EXTEND_VECTOR_INREG
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
case ISD::SIGN_EXTEND: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the sign bit is known to be zero or one, then sext will extend
// it to the top bits, else it will just zext.
Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
- computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = Known.trunc(BitWidth);
break;
}
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
Known.Zero |= (~InMask);
Known.One &= (~Known.Zero);
break;
@@ -2745,7 +2886,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts,
Depth + 1);
// If all of the MaskV bits are known to be zero, then we know the
@@ -2762,12 +2903,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// If low bits are know to be zero in both operands, then we know they are
// going to be 0 in the result. Both addition and complement operations
// preserve the low zero bits.
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned KnownZeroLow = Known2.countMinTrailingZeros();
if (KnownZeroLow == 0)
break;
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
Known.Zero.setLowBits(KnownZeroLow);
break;
@@ -2794,12 +2935,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// known to be clear. For example, if one input has the top 10 bits clear
// and the other has the top 8 bits clear, we know the top 7 bits of the
// output must be clear.
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
unsigned KnownZeroLow = Known2.countMinTrailingZeros();
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
- Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
@@ -2823,7 +2963,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
const APInt &RA = Rem->getAPIntValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// The low bits of the first operand are unchanged by the srem.
Known.Zero = Known2.Zero & LowBits;
@@ -2847,7 +2987,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// The upper bits are all zero, the lower ones are unchanged.
Known.Zero = Known2.Zero | ~LowBits;
@@ -2858,8 +2998,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
uint32_t Leaders =
std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
@@ -2868,7 +3008,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::EXTRACT_ELEMENT: {
- computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
const unsigned Index = Op.getConstantOperandVal(1);
const unsigned BitWidth = Op.getValueSizeInBits();
@@ -2896,10 +3036,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// If we know the element index, just demand that vector element.
unsigned Idx = ConstEltNo->getZExtValue();
APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
- computeKnownBits(InVec, Known, DemandedElt, Depth + 1);
+ Known = computeKnownBits(InVec, DemandedElt, Depth + 1);
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
- computeKnownBits(InVec, Known, Depth + 1);
+ Known = computeKnownBits(InVec, Depth + 1);
}
if (BitWidth > EltBitWidth)
Known = Known.zext(BitWidth);
@@ -2919,7 +3059,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// If we demand the inserted element then add its common known bits.
if (DemandedElts[EltIdx]) {
- computeKnownBits(InVal, Known2, Depth + 1);
+ Known2 = computeKnownBits(InVal, Depth + 1);
Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
}
@@ -2928,33 +3068,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// that we don't demand the inserted element.
APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
if (!!VectorElts) {
- computeKnownBits(InVec, Known2, VectorElts, Depth + 1);
+ Known2 = computeKnownBits(InVec, VectorElts, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
- computeKnownBits(InVec, Known, Depth + 1);
- computeKnownBits(InVal, Known2, Depth + 1);
+ Known = computeKnownBits(InVec, Depth + 1);
+ Known2 = computeKnownBits(InVal, Depth + 1);
Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
}
break;
}
case ISD::BITREVERSE: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.Zero = Known2.Zero.reverseBits();
Known.One = Known2.One.reverseBits();
break;
}
case ISD::BSWAP: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.Zero = Known2.Zero.byteSwap();
Known.One = Known2.One.byteSwap();
break;
}
case ISD::ABS: {
- computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the source's MSB is zero then we know the rest of the bits already.
if (Known2.isNonNegative()) {
@@ -2973,8 +3113,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::UMIN: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// UMIN - we know that the result will have the maximum of the
// known zero leading bits of the inputs.
@@ -2987,9 +3127,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::UMAX: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts,
- Depth + 1);
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// UMAX - we know that the result will have the maximum of the
// known one leading bits of the inputs.
@@ -3033,9 +3172,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
// Fallback - just get the shared known bits of the operands.
- computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Known.isUnknown()) break; // Early-out
- computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
@@ -3058,6 +3197,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ return Known;
}
SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
@@ -3066,11 +3206,9 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
if (isNullConstant(N1))
return OFK_Never;
- KnownBits N1Known;
- computeKnownBits(N1, N1Known);
+ KnownBits N1Known = computeKnownBits(N1);
if (N1Known.Zero.getBoolValue()) {
- KnownBits N0Known;
- computeKnownBits(N0, N0Known);
+ KnownBits N0Known = computeKnownBits(N0);
bool overflow;
(void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
@@ -3084,8 +3222,7 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
return OFK_Never;
if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
- KnownBits N0Known;
- computeKnownBits(N0, N0Known);
+ KnownBits N0Known = computeKnownBits(N0);
if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
return OFK_Never;
@@ -3131,8 +3268,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// to handle some common cases.
// Fall back to computeKnownBits to catch other known cases.
- KnownBits Known;
- computeKnownBits(Val, Known);
+ KnownBits Known = computeKnownBits(Val);
return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
}
@@ -3240,14 +3376,35 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (VTBits == SrcBits)
return ComputeNumSignBits(N0, DemandedElts, Depth + 1);
+ bool IsLE = getDataLayout().isLittleEndian();
+
// Bitcast 'large element' scalar/vector to 'small element' vector.
- // TODO: Handle cases other than 'sign splat' when we have a use case.
- // Requires handling of DemandedElts and Endianness.
if ((SrcBits % VTBits) == 0) {
- assert(Op.getValueType().isVector() && "Expected bitcast to vector");
- Tmp = ComputeNumSignBits(N0, Depth + 1);
+ assert(VT.isVector() && "Expected bitcast to vector");
+
+ unsigned Scale = SrcBits / VTBits;
+ APInt SrcDemandedElts(NumElts / Scale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBit(i / Scale);
+
+ // Fast case - sign splat can be simply split across the small elements.
+ Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
if (Tmp == SrcBits)
return VTBits;
+
+ // Slow case - determine how far the sign extends into each sub-element.
+ Tmp2 = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned SubOffset = i % Scale;
+ SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset);
+ SubOffset = SubOffset * VTBits;
+ if (Tmp <= SubOffset)
+ return 1;
+ Tmp2 = std::min(Tmp2, Tmp - SubOffset);
+ }
+ return Tmp2;
}
break;
}
@@ -3264,7 +3421,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
- APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements());
Tmp = VTBits - SrcVT.getScalarSizeInBits();
return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
}
@@ -3361,7 +3518,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If setcc returns 0/-1, all bits are sign bits.
// We know that we have an integer-based boolean since these operations
// are only available for integer.
- if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ if (TLI->getBooleanContents(VT.isVector(), false) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
@@ -3396,8 +3553,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Special case decrementing a value (ADD X, -1):
if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
if (CRHS->isAllOnesValue()) {
- KnownBits Known;
- computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -3421,8 +3577,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Handle NEG.
if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
if (CLHS->isNullValue()) {
- KnownBits Known;
- computeKnownBits(Op.getOperand(1), Known, Depth+1);
+ KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((Known.Zero | 1).isAllOnesValue())
@@ -3538,7 +3693,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
return ComputeNumSignBits(Src, Depth + 1);
}
- case ISD::CONCAT_VECTORS:
+ case ISD::CONCAT_VECTORS: {
// Determine the minimum number of sign bits across all demanded
// elts of the input vectors. Early out if the result is already 1.
Tmp = std::numeric_limits<unsigned>::max();
@@ -3556,6 +3711,40 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
+ case ISD::INSERT_SUBVECTOR: {
+ // If we know the element index, demand any elements from the subvector and
+ // the remainder from the src its inserted into, otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+ Tmp = std::numeric_limits<unsigned>::max();
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ if (!!DemandedSubElts) {
+ Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
+ if (Tmp == 1) return 1; // early-out
+ }
+ APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+ APInt DemandedSrcElts = DemandedElts & ~SubMask;
+ if (!!DemandedSrcElts) {
+ Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+
+ // Not able to determine the index so just assume worst case.
+ Tmp = ComputeNumSignBits(Sub, Depth + 1);
+ if (Tmp == 1) return 1; // early-out
+ Tmp2 = ComputeNumSignBits(Src, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ }
// If we are looking at the loaded value of the SDNode.
if (Op.getResNo() == 0) {
@@ -3587,8 +3776,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
- KnownBits Known;
- computeKnownBits(Op, Known, DemandedElts, Depth);
+ KnownBits Known = computeKnownBits(Op, DemandedElts, Depth);
APInt Mask;
if (Known.isNonNegative()) { // sign bit is 0
@@ -3622,21 +3810,121 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
return true;
}
-bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
- if (getTarget().Options.NoNaNsFPMath)
+ if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
- if (Op->getFlags().hasNoNaNs())
- return true;
+ if (Depth == 6)
+ return false; // Limit search depth.
+ // TODO: Handle vectors.
// If the value is a constant, we can obviously see if it is a NaN or not.
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
- return !C->getValueAPF().isNaN();
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ return !C->getValueAPF().isNaN() ||
+ (SNaN && !C->getValueAPF().isSignaling());
+ }
- // TODO: Recognize more cases here.
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ if (SNaN)
+ return true;
+ // TODO: Need isKnownNeverInfinity
+ return false;
+ }
+ case ISD::FCANONICALIZE:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FROUND:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::FABS:
+ case ISD::FNEG:
+ case ISD::FCOPYSIGN: {
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::SELECT:
+ return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return true;
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ }
+ case ISD::FSQRT: // Need is known positive
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FPOWI:
+ case ISD::FPOW: {
+ if (SNaN)
+ return true;
+ // TODO: Refine on operand
+ return false;
+ }
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Only one needs to be known not-nan, since it will be returned if the
+ // other ends up being one.
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE: {
+ if (SNaN)
+ return true;
+ // This can return a NaN if either operand is an sNaN, or if both operands
+ // are NaN.
+ return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ }
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ // TODO: Does this quiet or return the origina NaN as-is?
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ default:
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
+ return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+ }
- return false;
+ return false;
+ }
}
bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
@@ -3690,10 +3978,39 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
- KnownBits AKnown, BKnown;
- computeKnownBits(A, AKnown);
- computeKnownBits(B, BKnown);
- return (AKnown.Zero | BKnown.Zero).isAllOnesValue();
+ return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
+}
+
+static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ SelectionDAG &DAG) {
+ int NumOps = Ops.size();
+ assert(NumOps != 0 && "Can't build an empty vector!");
+ assert(VT.getVectorNumElements() == (unsigned)NumOps &&
+ "Incorrect element count in BUILD_VECTOR!");
+
+ // BUILD_VECTOR of UNDEFs is UNDEF.
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity.
+ SDValue IdentitySrc;
+ bool IsIdentity = true;
+ for (int i = 0; i != NumOps; ++i) {
+ if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Ops[i].getOperand(0).getValueType() != VT ||
+ (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) ||
+ !isa<ConstantSDNode>(Ops[i].getOperand(1)) ||
+ cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) {
+ IsIdentity = false;
+ break;
+ }
+ IdentitySrc = Ops[i].getOperand(0);
+ }
+ if (IsIdentity)
+ return IdentitySrc;
+
+ return SDValue();
}
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
@@ -3779,9 +4096,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SIGN_EXTEND:
return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
+ case ISD::TRUNCATE:
+ if (C->isOpaque())
+ break;
+ LLVM_FALLTHROUGH;
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::TRUNCATE:
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
case ISD::UINT_TO_FP:
@@ -3947,6 +4267,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::MERGE_VALUES:
case ISD::CONCAT_VECTORS:
return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {Operand};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
case ISD::FP_EXTEND:
assert(VT.isFloatingPoint() &&
@@ -4045,6 +4372,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(Operand.getValueType().bitsLE(VT) &&
+ "The input must be the same size or smaller than the result.");
+ assert(VT.getVectorNumElements() <
+ Operand.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ break;
case ISD::ABS:
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid ABS!");
@@ -4151,6 +4488,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
+ case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true);
+ case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true);
+ case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true);
+ case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
@@ -4258,14 +4599,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
return FoldSymbolOffset(Opcode, VT, GA, Cst1);
- // For vectors extract each constant element into Inputs so we can constant
- // fold them individually.
- BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
- BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
- if (!BV1 || !BV2)
+ // For vectors, extract each constant element and fold them individually.
+ // Either input may be an undef value.
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ if (!BV1 && !Cst1->isUndef())
+ return SDValue();
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV2 && !Cst2->isUndef())
+ return SDValue();
+ // If both operands are undef, that's handled the same way as scalars.
+ if (!BV1 && !BV2)
return SDValue();
- assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+ assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) &&
+ "Vector binop with different number of elements in operands?");
EVT SVT = VT.getScalarType();
EVT LegalSVT = SVT;
@@ -4275,15 +4622,15 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
return SDValue();
}
SmallVector<SDValue, 4> Outputs;
- for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
- SDValue V1 = BV1->getOperand(I);
- SDValue V2 = BV2->getOperand(I);
-
+ unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands();
+ for (unsigned I = 0; I != NumOps; ++I) {
+ SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT);
+ SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT);
if (SVT.isInteger()) {
- if (V1->getValueType(0).bitsGT(SVT))
- V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
- if (V2->getValueType(0).bitsGT(SVT))
- V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ if (V1->getValueType(0).bitsGT(SVT))
+ V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
+ if (V2->getValueType(0).bitsGT(SVT))
+ V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
}
if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
@@ -4436,6 +4783,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N2.getOpcode() == ISD::EntryToken) return N1;
if (N1 == N2) return N1;
break;
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1, N2};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2};
@@ -4477,6 +4831,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -4499,6 +4857,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
+ if (SDValue V = simplifyShift(N1, N2))
+ return V;
+ LLVM_FALLTHROUGH;
case ISD::ROTL:
case ISD::ROTR:
assert(VT == N1.getValueType() &&
@@ -4507,7 +4868,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Shifts only work on integers");
assert((!VT.isVector() || VT == N2.getValueType()) &&
"Vector shift amounts must be in the same as their first arg");
- // Verify that the shift amount VT is bit enough to hold valid shift
+ // Verify that the shift amount VT is big enough to hold valid shift
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().
@@ -4555,8 +4916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(!EVT.isVector() &&
"AssertSExt/AssertZExt type should be the vector element type "
"rather than the vector type!");
- assert(EVT.bitsLE(VT) && "Not extending!");
- if (VT == EVT) return N1; // noop assertion.
+ assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
+ if (VT.getScalarType() == EVT) return N1; // noop assertion.
break;
}
case ISD::SIGN_EXTEND_INREG: {
@@ -4793,14 +5154,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
}
- // Any FP binop with an undef operand is folded to NaN. This matches the
- // behavior of the IR optimizer.
switch (Opcode) {
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
+ // If both operands are undef, the result is undef. If 1 operand is undef,
+ // the result is NaN. This should match the behavior of the IR optimizer.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
if (N1.isUndef() || N2.isUndef())
return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
}
@@ -4819,9 +5182,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::SHL:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
}
}
@@ -4837,21 +5199,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getConstant(0, DL, VT);
LLVM_FALLTHROUGH;
case ISD::ADD:
- case ISD::ADDC:
- case ISD::ADDE:
case ISD::SUB:
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::SHL:
return getUNDEF(VT); // fold op(arg1, undef) -> undef
case ISD::MUL:
case ISD::AND:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
case ISD::OR:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
return getAllOnesConstant(DL, VT);
}
}
@@ -4907,6 +5268,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
break;
}
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2, N3};
@@ -4915,6 +5283,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::SETCC: {
+ assert(VT.isInteger() && "SETCC result type must be an integer!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ "SETCC operands must have the same type!");
+ assert(VT.isVector() == N1.getValueType().isVector() &&
+ "SETCC type should be vector iff the operand type is vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) &&
+ "SETCC vector element counts must match!");
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
return V;
@@ -4927,13 +5303,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::SELECT:
- if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
- if (N1C->getZExtValue())
- return N2; // select true, X, Y -> X
- return N3; // select false, X, Y -> Y
- }
-
- if (N2 == N3) return N2; // select C, X, X -> X
+ case ISD::VSELECT:
+ if (SDValue V = simplifySelect(N1, N2, N3))
+ return V;
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
@@ -5048,8 +5420,11 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
assert(C->getAPIntValue().getBitWidth() == 8);
APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
- if (VT.isInteger())
- return DAG.getConstant(Val, dl, VT);
+ if (VT.isInteger()) {
+ bool IsOpaque = VT.getSizeInBits() > 64 ||
+ !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue());
+ return DAG.getConstant(Val, dl, VT, false, IsOpaque);
+ }
return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
VT);
}
@@ -5229,12 +5604,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
- // FIXME: Only does this for 64-bit or more since we don't have proper
- // cost model for unaligned load / store.
bool Fast;
- if (NumMemOps && AllowOverlap &&
- VTSize >= 8 && NewVTSize < Size &&
- TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)
+ if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
+ Fast)
VTSize = Size;
else {
VT = NewVT;
@@ -6495,11 +6868,11 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
}
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
- SDValue Ptr, SDValue Mask, SDValue Src0,
+ SDValue Ptr, SDValue Mask, SDValue PassThru,
EVT MemVT, MachineMemOperand *MMO,
ISD::LoadExtType ExtTy, bool isExpanding) {
SDVTList VTs = getVTList(VT, MVT::Other);
- SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
+ SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(VT.getRawBits());
@@ -6530,7 +6903,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
"Invalid chain type");
EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);
- SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ SDValue Ops[] = { Chain, Val, Ptr, Mask };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());
@@ -6574,12 +6947,12 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
VTs, VT, MMO);
createOperands(N, Ops);
- assert(N->getValue().getValueType() == N->getValueType(0) &&
+ assert(N->getPassThru().getValueType() == N->getValueType(0) &&
"Incompatible type of the PassThru value in MaskedGatherSDNode");
assert(N->getMask().getValueType().getVectorNumElements() ==
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between mask and data");
- assert(N->getIndex().getValueType().getVectorNumElements() ==
+ assert(N->getIndex().getValueType().getVectorNumElements() >=
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
@@ -6616,7 +6989,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
assert(N->getMask().getValueType().getVectorNumElements() ==
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between mask and data");
- assert(N->getIndex().getValueType().getVectorNumElements() ==
+ assert(N->getIndex().getValueType().getVectorNumElements() >=
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
@@ -6630,6 +7003,60 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
+ // select undef, T, F --> T (if T is a constant), otherwise F
+ // select, ?, undef, F --> F
+ // select, ?, T, undef --> T
+ if (Cond.isUndef())
+ return isConstantValueOfAnyType(T) ? T : F;
+ if (T.isUndef())
+ return F;
+ if (F.isUndef())
+ return T;
+
+ // select true, T, F --> T
+ // select false, T, F --> F
+ if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
+ return CondC->isNullValue() ? F : T;
+
+ // TODO: This should simplify VSELECT with constant condition using something
+ // like this (but check boolean contents to be complete?):
+ // if (ISD::isBuildVectorAllOnes(Cond.getNode()))
+ // return T;
+ // if (ISD::isBuildVectorAllZeros(Cond.getNode()))
+ // return F;
+
+ // select ?, T, T --> T
+ if (T == F)
+ return T;
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
+ // shift undef, Y --> 0 (can always assume that the undef value is 0)
+ if (X.isUndef())
+ return getConstant(0, SDLoc(X.getNode()), X.getValueType());
+ // shift X, undef --> undef (because it may shift by the bitwidth)
+ if (Y.isUndef())
+ return getUNDEF(X.getValueType());
+
+ // shift 0, Y --> 0
+ // shift X, 0 --> X
+ if (isNullOrNullSplat(X) || isNullOrNullSplat(Y))
+ return X;
+
+ // shift X, C >= bitwidth(X) --> undef
+ // All vector elements must be too big (or undef) to avoid partial undefs.
+ auto isShiftTooBig = [X](ConstantSDNode *Val) {
+ return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits());
+ };
+ if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true))
+ return getUNDEF(X.getValueType());
+
+ return SDValue();
+}
+
SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue SV, unsigned Align) {
SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
@@ -6659,12 +7086,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
- case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags);
default: break;
}
switch (Opcode) {
default: break;
+ case ISD::BUILD_VECTOR:
+ // Attempt to simplify BUILD_VECTOR.
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
case ISD::CONCAT_VECTORS:
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
@@ -6880,7 +7312,7 @@ SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(NumVTs);
- std::copy(VTs.begin(), VTs.end(), Array);
+ llvm::copy(VTs, Array);
Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
VTListMap.InsertNode(Result, IP);
}
@@ -7010,6 +7442,27 @@ void SDNode::DropOperands() {
}
}
+void SelectionDAG::setNodeMemRefs(MachineSDNode *N,
+ ArrayRef<MachineMemOperand *> NewMemRefs) {
+ if (NewMemRefs.empty()) {
+ N->clearMemRefs();
+ return;
+ }
+
+ // Check if we can avoid allocating by storing a single reference directly.
+ if (NewMemRefs.size() == 1) {
+ N->MemRefs = NewMemRefs[0];
+ N->NumMemRefs = 1;
+ return;
+ }
+
+ MachineMemOperand **MemRefsBuffer =
+ Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size());
+ llvm::copy(NewMemRefs, MemRefsBuffer);
+ N->MemRefs = MemRefsBuffer;
+ N->NumMemRefs = static_cast<int>(NewMemRefs.size());
+}
+
/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
/// machine opcode.
///
@@ -7152,7 +7605,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
// For MachineNode, initialize the memory references information.
if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
- MN->setMemRefs(nullptr, nullptr);
+ MN->clearMemRefs();
// Swap for an appropriately sized array from the recycler.
removeOperands(N);
@@ -7202,6 +7655,12 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
NewOpc = ISD::FNEARBYINT;
IsUnary = true;
break;
+ case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
+ case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
+ case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break;
+ case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
+ case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
+ case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
}
// We're taking this node out of the chain, so we need to re-link things.
@@ -7488,8 +7947,11 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
Dbg->getDebugLoc(), Dbg->getOrder());
ClonedDVs.push_back(Clone);
- if (InvalidateDbg)
+ if (InvalidateDbg) {
+ // Invalidate value and indicate the SDDbgValue should not be emitted.
Dbg->setIsInvalidated();
+ Dbg->setIsEmitted();
+ }
}
for (SDDbgValue *Dbg : ClonedDVs)
@@ -7526,6 +7988,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
+ DV->setIsEmitted();
LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
N0.getNode()->dumprFull(this);
dbgs() << " into " << *DIExpr << '\n');
@@ -7688,7 +8151,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
// Preserve Debug Info.
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
- transferDbgValues(SDValue(From, i), *To);
+ transferDbgValues(SDValue(From, i), To[i]);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -7700,18 +8163,22 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(User);
- // A user can appear in a use list multiple times, and when this
- // happens the uses are usually next to each other in the list.
- // To help reduce the number of CSE recomputations, process all
- // the uses of this user that we can find this way.
+ // A user can appear in a use list multiple times, and when this happens the
+ // uses are usually next to each other in the list. To help reduce the
+ // number of CSE and divergence recomputations, process all the uses of this
+ // user that we can find this way.
+ bool To_IsDivergent = false;
do {
SDUse &Use = UI.getUse();
const SDValue &ToOp = To[Use.getResNo()];
++UI;
Use.set(ToOp);
- if (To->getNode()->isDivergent() != From->isDivergent())
- updateDivergence(User);
+ To_IsDivergent |= ToOp->isDivergent();
} while (UI != UE && *UI == User);
+
+ if (To_IsDivergent != From->isDivergent())
+ updateDivergence(User);
+
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7842,6 +8309,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
}
}
+#ifndef NDEBUG
void SelectionDAG::VerifyDAGDiverence()
{
std::vector<SDNode*> TopoOrder;
@@ -7868,6 +8336,7 @@ void SelectionDAG::VerifyDAGDiverence()
"Divergence bit inconsistency detected\n");
}
}
+#endif
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
@@ -7901,7 +8370,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
}
// Sort the uses, so that all the uses from a given User are together.
- llvm::sort(Uses.begin(), Uses.end());
+ llvm::sort(Uses);
for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
UseIndex != UseIndexEnd; ) {
@@ -8053,6 +8522,32 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
return TokenFactor;
}
+SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
+ Function **OutFunction) {
+ assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
+
+ auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ auto *Module = MF->getFunction().getParent();
+ auto *Function = Module->getFunction(Symbol);
+
+ if (OutFunction != nullptr)
+ *OutFunction = Function;
+
+ if (Function != nullptr) {
+ auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace());
+ return getGlobalAddress(Function, SDLoc(Op), PtrTy);
+ }
+
+ std::string ErrorStr;
+ raw_string_ostream ErrorFormatter(ErrorStr);
+
+ ErrorFormatter << "Undefined external symbol ";
+ ErrorFormatter << '"' << Symbol << '"';
+ ErrorFormatter.flush();
+
+ report_fatal_error(ErrorStr);
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
@@ -8077,11 +8572,26 @@ bool llvm::isOneConstant(SDValue V) {
return Const != nullptr && Const->isOne();
}
+SDValue llvm::peekThroughBitcasts(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ return V;
+}
+
+SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse())
+ V = V.getOperand(0);
+ return V;
+}
+
bool llvm::isBitwiseNot(SDValue V) {
- return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
+ if (V.getOpcode() != ISD::XOR)
+ return false;
+ ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1)));
+ return C && C->isAllOnesValue();
}
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
@@ -8090,9 +8600,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
- // FIXME: We blindly ignore splats which include undef which is overly
- // pessimistic.
- if (CN && UndefElements.none() &&
+ if (CN && (UndefElements.none() || AllowUndefs) &&
CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
@@ -8100,21 +8608,40 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
return nullptr;
}
-ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
-
- if (CN && UndefElements.none())
+ if (CN && (UndefElements.none() || AllowUndefs))
return CN;
}
return nullptr;
}
+bool llvm::isNullOrNullSplat(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
+ ConstantSDNode *C = isConstOrConstSplat(N);
+ return C && C->isNullValue();
+}
+
+bool llvm::isOneOrOneSplat(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ ConstantSDNode *C = isConstOrConstSplat(N);
+ return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth;
+}
+
+bool llvm::isAllOnesOrAllOnesSplat(SDValue N) {
+ N = peekThroughBitcasts(N);
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ ConstantSDNode *C = isConstOrConstSplat(N);
+ return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
+}
+
HandleSDNode::~HandleSDNode() {
DropOperands();
}
@@ -8318,6 +8845,64 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
this->Flags.intersectWith(Flags);
}
+SDValue
+SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
+ ArrayRef<ISD::NodeType> CandidateBinOps) {
+ // The pattern must end in an extract from index 0.
+ if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isNullConstant(Extract->getOperand(1)))
+ return SDValue();
+
+ SDValue Op = Extract->getOperand(0);
+ unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
+
+ // Match against one of the candidate binary ops.
+ if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
+ return Op.getOpcode() == unsigned(BinOp);
+ }))
+ return SDValue();
+
+ // At each stage, we're looking for something that looks like:
+ // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
+ // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
+ // i32 undef, i32 undef, i32 undef, i32 undef>
+ // %a = binop <8 x i32> %op, %s
+ // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
+ // we expect something like:
+ // <4,5,6,7,u,u,u,u>
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ unsigned CandidateBinOp = Op.getOpcode();
+ for (unsigned i = 0; i < Stages; ++i) {
+ if (Op.getOpcode() != CandidateBinOp)
+ return SDValue();
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0);
+ if (Shuffle) {
+ Op = Op1;
+ } else {
+ Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1);
+ Op = Op0;
+ }
+
+ // The first operand of the shuffle should be the same as the other operand
+ // of the binop.
+ if (!Shuffle || Shuffle->getOperand(0) != Op)
+ return SDValue();
+
+ // Verify the shuffle has the expected (at this stage of the pyramid) mask.
+ for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
+ if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
+ return SDValue();
+ }
+
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return Op;
+}
+
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -8681,8 +9266,11 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
assert(!Node->OperandList && "Node already has operands");
+ assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >=
+ Vals.size() &&
+ "too many operands to fit into SDNode");
SDUse *Ops = OperandRecycler.allocate(
- ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+ ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
bool IsDivergent = false;
for (unsigned I = 0; I != Vals.size(); ++I) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index c859f16e74fe..488bac1a9a80 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -19,8 +19,9 @@
using namespace llvm;
-bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
- const SelectionDAG &DAG, int64_t &Off) {
+bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
+ const SelectionDAG &DAG,
+ int64_t &Off) const {
// Conservatively fail if we a match failed..
if (!Base.getNode() || !Other.Base.getNode())
return false;
@@ -75,7 +76,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
}
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
+BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
const SelectionDAG &DAG) {
SDValue Ptr = N->getBasePtr();
@@ -106,14 +107,14 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1)))
if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) {
Offset += C->getSExtValue();
- Base = Base->getOperand(0);
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
continue;
}
break;
case ISD::ADD:
if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
Offset += C->getSExtValue();
- Base = Base->getOperand(0);
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
continue;
}
break;
@@ -129,7 +130,7 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
Offset -= Off;
else
Offset += Off;
- Base = LSBase->getBasePtr();
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(LSBase->getBasePtr());
continue;
}
break;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c1c15514c09a..871ab9b29881 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -88,6 +88,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -121,6 +122,7 @@
#include <vector>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "isel"
@@ -614,6 +616,32 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
std::reverse(Parts, Parts + OrigNumParts);
}
+static SDValue widenVectorToPartType(SelectionDAG &DAG,
+ SDValue Val, const SDLoc &DL, EVT PartVT) {
+ if (!PartVT.isVector())
+ return SDValue();
+
+ EVT ValueVT = Val.getValueType();
+ unsigned PartNumElts = PartVT.getVectorNumElements();
+ unsigned ValueNumElts = ValueVT.getVectorNumElements();
+ if (PartNumElts > ValueNumElts &&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ DAG.ExtractVectorElements(Val, Ops);
+ SDValue EltUndef = DAG.getUNDEF(ElementVT);
+ for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i)
+ Ops.push_back(EltUndef);
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+ return DAG.getBuildVector(PartVT, DL, Ops);
+ }
+
+ return SDValue();
+}
+
/// getCopyToPartsVector - Create a series of nodes that contain the specified
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
@@ -632,28 +660,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
- } else if (PartVT.isVector() &&
- PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
- PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
- EVT ElementVT = PartVT.getVectorElementType();
- // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
- // undef elements.
- SmallVector<SDValue, 16> Ops;
- for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
- Ops.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
-
- for (unsigned i = ValueVT.getVectorNumElements(),
- e = PartVT.getVectorNumElements(); i != e; ++i)
- Ops.push_back(DAG.getUNDEF(ElementVT));
-
- Val = DAG.getBuildVector(PartVT, DL, Ops);
-
- // FIXME: Use CONCAT for 2x -> 4x.
-
- //SDValue UndefElts = DAG.getUNDEF(VectorTy);
- //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
+ Val = Widened;
} else if (PartVT.isVector() &&
PartEVT.getVectorElementType().bitsGE(
ValueVT.getVectorElementType()) &&
@@ -695,33 +703,38 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
}
- unsigned NumElements = ValueVT.getVectorNumElements();
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ unsigned IntermediateNumElts = IntermediateVT.isVector() ?
+ IntermediateVT.getVectorNumElements() : 1;
+
// Convert the vector to the appropiate type if necessary.
- unsigned DestVectorNoElts =
- NumIntermediates *
- (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+ unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
+
EVT BuiltVectorTy = EVT::getVectorVT(
*DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
- if (Val.getValueType() != BuiltVectorTy)
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ if (ValueVT != BuiltVectorTy) {
+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
+ Val = Widened;
+
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+ }
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
- if (IntermediateVT.isVector())
- Ops[i] =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
- DAG.getConstant(i * (NumElements / NumIntermediates), DL,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
- else
+ if (IntermediateVT.isVector()) {
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+ DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
+ } else {
Ops[i] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ DAG.getConstant(i, DL, IdxVT));
+ }
}
// Split the intermediate operands into legal parts.
@@ -810,7 +823,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// If the source register was virtual and if we know something about it,
// add an assert node.
if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
- !RegisterVT.isInteger() || RegisterVT.isVector())
+ !RegisterVT.isInteger())
continue;
const FunctionLoweringInfo::LiveOutInfo *LOI =
@@ -818,7 +831,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
if (!LOI)
continue;
- unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned RegSize = RegisterVT.getScalarSizeInBits();
unsigned NumSignBits = LOI->NumSignBits;
unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
@@ -1019,8 +1032,19 @@ SDValue SelectionDAGBuilder::getRoot() {
}
// Otherwise, we have to make a token factor node.
- SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
- PendingLoads);
+ // If we have >= 2^16 loads then split across multiple token factors as
+ // there's a 64k limit on the number of SDNode operands.
+ SDValue Root;
+ size_t Limit = (1 << 16) - 1;
+ while (PendingLoads.size() > Limit) {
+ unsigned SliceIdx = PendingLoads.size() - Limit;
+ auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit);
+ SDValue NewTF =
+ DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs);
+ PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end());
+ PendingLoads.emplace_back(NewTF);
+ }
+ Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads);
PendingLoads.clear();
DAG.setRoot(Root);
return Root;
@@ -1054,7 +1078,7 @@ SDValue SelectionDAGBuilder::getControlRoot() {
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
- if (isa<TerminatorInst>(&I)) {
+ if (I.isTerminator()) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
@@ -1082,7 +1106,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
}
}
- if (!isa<TerminatorInst>(&I) && !HasTailCall &&
+ if (!I.isTerminator() && !HasTailCall &&
!isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
@@ -1178,7 +1202,8 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), InReg, Ty, getABIRegCopyCC(V));
+ DAG.getDataLayout(), InReg, Ty,
+ None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
V);
@@ -1437,8 +1462,11 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
// Don't emit any special code for the cleanuppad instruction. It just marks
// the start of an EH scope/funclet.
FuncInfo.MBB->setIsEHScopeEntry();
- FuncInfo.MBB->setIsEHFuncletEntry();
- FuncInfo.MBB->setIsCleanupFuncletEntry();
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ if (Pers != EHPersonality::Wasm_CXX) {
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+ }
}
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
@@ -1458,6 +1486,7 @@ static void findUnwindDestinations(
classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
bool IsSEH = isAsynchronousEHPersonality(Personality);
while (EHPadBB) {
@@ -1472,7 +1501,8 @@ static void findUnwindDestinations(
// personalities.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
- UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsWasmCXX)
+ UnwindDests.back().first->setIsEHFuncletEntry();
break;
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations.
@@ -1807,7 +1837,6 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
SwitchCases.push_back(CB);
}
-/// FindMergedConditions - If Cond is an expression like
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -1819,13 +1848,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
bool InvertCond) {
// Skip over not part of the tree and remember to invert op and operands at
// next level.
- if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
- const Value *CondOp = BinaryOperator::getNotArgument(Cond);
- if (InBlock(CondOp, CurBB->getBasicBlock())) {
- FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
- !InvertCond);
- return;
- }
+ Value *NotCond;
+ if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+ InBlock(NotCond, CurBB->getBasicBlock())) {
+ FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
}
const Instruction *BOp = dyn_cast<Instruction>(Cond);
@@ -2193,12 +2221,11 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
if (Global) {
MachinePointerInfo MPInfo(Global);
- MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
- *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
- DAG.getEVTAlignment(PtrTy));
- Node->setMemRefs(MemRefs, MemRefs + 1);
+ MachineMemOperand *MemRef = MF.getMachineMemOperand(
+ MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
+ DAG.setNodeMemRefs(Node, {MemRef});
}
return SDValue(Node, 0);
}
@@ -2514,9 +2541,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
assert(FuncInfo.MBB->isEHPad() &&
"Call to landingpad not in landing pad!");
- MachineBasicBlock *MBB = FuncInfo.MBB;
- addLandingPadInfo(LP, *MBB);
-
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -2567,8 +2591,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
assert(CC.Low == CC.High && "Input clusters must be single-case");
#endif
- llvm::sort(Clusters.begin(), Clusters.end(),
- [](const CaseCluster &a, const CaseCluster &b) {
+ llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
return a.Low->getValue().slt(b.Low->getValue());
});
@@ -2789,6 +2812,15 @@ static bool isVectorReductionOp(const User *I) {
return ReduxExtracted;
}
+void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
+ SDNodeFlags Flags;
+
+ SDValue Op = getValue(I.getOperand(0));
+ SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
+ Op, Flags);
+ setValue(&I, UnNodeValue);
+}
+
void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
@@ -2815,7 +2847,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op2 = getValue(I.getOperand(1));
EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
- Op2.getValueType(), DAG.getDataLayout());
+ Op1.getValueType(), DAG.getDataLayout());
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -2932,7 +2964,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
ISD::VSELECT : ISD::SELECT;
// Min/max matching is only viable if all output VTs are the same.
- if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
+ if (is_splat(ValueVTs)) {
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
@@ -2960,16 +2992,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_FMINNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
+ case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
case SPNB_RETURNS_ANY: {
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
Opc = ISD::FMINNUM;
- else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
- Opc = ISD::FMINNAN;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
+ Opc = ISD::FMINIMUM;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
- ISD::FMINNUM : ISD::FMINNAN;
+ ISD::FMINNUM : ISD::FMINIMUM;
break;
}
}
@@ -2977,17 +3009,17 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_FMAXNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
+ case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
Opc = ISD::FMAXNUM;
- else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
- Opc = ISD::FMAXNAN;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
+ Opc = ISD::FMAXIMUM;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
- ISD::FMAXNUM : ISD::FMAXNAN;
+ ISD::FMAXNUM : ISD::FMAXIMUM;
break;
}
break;
@@ -3662,8 +3694,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA && AA->pointsToConstantMemory(MemoryLocation(
- SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
+ else if (AA &&
+ AA->pointsToConstantMemory(MemoryLocation(
+ SV,
+ LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3774,9 +3809,12 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
Type *Ty = I.getType();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
- SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
- "load_from_swift_error should not be constant memory");
+ assert(
+ (!AA ||
+ !AA->pointsToConstantMemory(MemoryLocation(
+ SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+ AAInfo))) &&
+ "load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -4063,8 +4101,12 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
+ bool AddToChain =
+ !AA || !AA->pointsToConstantMemory(MemoryLocation(
+ PtrOperand,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(I.getType())),
+ AAInfo));
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO =
@@ -4105,10 +4147,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
const Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
bool ConstantMemory = false;
- if (UniformBase &&
- AA && AA->pointsToConstantMemory(MemoryLocation(
- BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
- AAInfo))) {
+ if (UniformBase && AA &&
+ AA->pointsToConstantMemory(
+ MemoryLocation(BasePtr,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(I.getType())),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -5038,6 +5082,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout())));
return nullptr;
+ case Intrinsic::sponentry:
+ setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ return nullptr;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
@@ -5176,7 +5224,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
- const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
+ const auto &DI = cast<DbgVariableIntrinsic>(I);
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
@@ -5276,7 +5324,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
SDDbgValue *SDV;
- if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+ isa<ConstantPointerNull>(V)) {
SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
return nullptr;
@@ -5553,8 +5602,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::minnum: {
auto VT = getValue(I.getArgOperand(0)).getValueType();
unsigned Opc =
- I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)
- ? ISD::FMINNAN
+ I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)
+ ? ISD::FMINIMUM
: ISD::FMINNUM;
setValue(&I, DAG.getNode(Opc, sdl, VT,
getValue(I.getArgOperand(0)),
@@ -5564,14 +5613,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::maxnum: {
auto VT = getValue(I.getArgOperand(0)).getValueType();
unsigned Opc =
- I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)
- ? ISD::FMAXNAN
+ I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)
+ ? ISD::FMAXIMUM
: ISD::FMAXNUM;
setValue(&I, DAG.getNode(Opc, sdl, VT,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
return nullptr;
}
+ case Intrinsic::minimum:
+ setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ case Intrinsic::maximum:
+ setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5603,6 +5664,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return nullptr;
case Intrinsic::fmuladd: {
@@ -5693,43 +5760,94 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Y = getValue(I.getArgOperand(1));
SDValue Z = getValue(I.getArgOperand(2));
EVT VT = X.getValueType();
+ SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
+ SDValue Zero = DAG.getConstant(0, sdl, VT);
+ SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
- // When X == Y, this is rotate. Create the node directly if legal.
- // TODO: This should also be done if the operation is custom, but we have
- // to make sure targets are handling the modulo shift amount as expected.
- // TODO: If the rotate direction (left or right) corresponding to the shift
- // is not available, adjust the shift value and invert the direction.
- auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
- if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) {
- setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
+ if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
+ setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
return nullptr;
}
- // Get the shift amount and inverse shift amount, modulo the bit-width.
- SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
- SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
- SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z);
- SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+ // When X == Y, this is rotate. If the data type has a power-of-2 size, we
+ // avoid the select that is necessary in the general case to filter out
+ // the 0-shift possibility that leads to UB.
+ if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) {
+ auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ return nullptr;
+ }
- // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW))
- // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW))
+ // Some targets only rotate one way. Try the opposite direction.
+ RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL;
+ if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
+ // Negate the shift amount because it is safe to ignore the high bits.
+ SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
+ return nullptr;
+ }
+
+ // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
+ // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
+ SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
+ SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+ SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
+ setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
+ return nullptr;
+ }
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
- SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
-
- // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would
- // be X | Y. If X == Y (rotate), that's fine. If not, we have to select.
- if (X != Y) {
- SDValue Zero = DAG.getConstant(0, sdl, VT);
- EVT CCVT = MVT::i1;
- if (VT.isVector())
- CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
- // For fshl, 0 shift returns the 1st arg (X).
- // For fshr, 0 shift returns the 2nd arg (Y).
- SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
- Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res);
- }
- setValue(&I, Res);
+ SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
+
+ // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
+ // and that is undefined. We must compare and select to avoid UB.
+ EVT CCVT = MVT::i1;
+ if (VT.isVector())
+ CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
+
+ // For fshl, 0-shift returns the 1st arg (X).
+ // For fshr, 0-shift returns the 2nd arg (Y).
+ SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
+ setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
+ return nullptr;
+ }
+ case Intrinsic::sadd_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::uadd_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::ssub_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::usub_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::smul_fix: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I,
+ DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3));
return nullptr;
}
case Intrinsic::stacksave: {
@@ -5824,6 +5942,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return nullptr;
}
+
+ case Intrinsic::is_constant:
+ // If this wasn't constant-folded away by now, then it's not a
+ // constant.
+ setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
+ return nullptr;
+
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
case Intrinsic::launder_invariant_group:
@@ -6224,7 +6349,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
GA->getGlobal(), getCurSDLoc(),
Val.getValueType(), GA->getOffset())});
}
- llvm::sort(Targets.begin(), Targets.end(),
+ llvm::sort(Targets,
[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
return T1.Offset < T2.Offset;
});
@@ -6243,12 +6368,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
- case Intrinsic::wasm_landingpad_index: {
- // TODO store landing pad index in a map, which will be used when generating
- // LSDA information
+ case Intrinsic::wasm_landingpad_index:
+ // Information this intrinsic contained has been transferred to
+ // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
+ // delete it now.
return nullptr;
}
- }
}
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
@@ -6311,6 +6436,24 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_nearbyint:
Opcode = ISD::STRICT_FNEARBYINT;
break;
+ case Intrinsic::experimental_constrained_maxnum:
+ Opcode = ISD::STRICT_FMAXNUM;
+ break;
+ case Intrinsic::experimental_constrained_minnum:
+ Opcode = ISD::STRICT_FMINNUM;
+ break;
+ case Intrinsic::experimental_constrained_ceil:
+ Opcode = ISD::STRICT_FCEIL;
+ break;
+ case Intrinsic::experimental_constrained_floor:
+ Opcode = ISD::STRICT_FFLOOR;
+ break;
+ case Intrinsic::experimental_constrained_round:
+ Opcode = ISD::STRICT_FROUND;
+ break;
+ case Intrinsic::experimental_constrained_trunc:
+ Opcode = ISD::STRICT_FTRUNC;
+ break;
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain = getRoot();
@@ -6405,7 +6548,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
BeginLabel, EndLabel);
- } else {
+ } else if (!isScopedEHPersonality(Pers)) {
MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
}
}
@@ -7200,10 +7343,11 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
-static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
- const SDLoc &DL, SDISelAsmOperandInfo &OpInfo,
+static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo,
SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
@@ -7211,13 +7355,21 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
- std::pair<unsigned, const TargetRegisterClass *> PhysReg =
- TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode,
- RefOpInfo.ConstraintVT);
+ unsigned AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
+ &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
+ // RC is unset only on failure. Return immediately.
+ if (!RC)
+ return;
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
- unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other) {
- // If this is a FP operand in an integer register (or visa versa), or more
+ // If this is an FP operand in an integer register (or visa versa), or more
// generally if the operand value disagrees with the register class we plan
// to stick it in, fix the operand type.
//
@@ -7225,34 +7377,30 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// Bitcast for output value is done at the end of visitInlineAsm().
if ((OpInfo.Type == InlineAsm::isOutput ||
OpInfo.Type == InlineAsm::isInput) &&
- PhysReg.second &&
- !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {
+ !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
// Try to convert to the first EVT that the reg class contains. If the
// types are identical size, use a bitcast to convert (e.g. two differing
// vector types). Note: output bitcast is done at the end of
// visitInlineAsm().
- MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
// Exclude indirect inputs while they are unsupported because the code
// to perform the load is missing and thus OpInfo.CallOperand still
- // refer to the input address rather than the pointed-to value.
+ // refers to the input address rather than the pointed-to value.
if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
OpInfo.CallOperand =
DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
- // If the operand is a FP value and we want it in integer registers,
+ // If the operand is an FP value and we want it in integer registers,
// use the corresponding integer type. This turns an f64 value into
// i64, which can be passed with two i32 values on a 32-bit machine.
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
- RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
if (OpInfo.Type == InlineAsm::isInput)
OpInfo.CallOperand =
- DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
- OpInfo.ConstraintVT = RegVT;
+ DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = VT;
}
}
-
- NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
}
// No need to allocate a matching input constraint since the constraint it's
@@ -7260,59 +7408,38 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
if (OpInfo.isMatchingInputConstraint())
return;
- MVT RegVT;
EVT ValueVT = OpInfo.ConstraintVT;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Initialize NumRegs.
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other)
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
// If this is a constraint for a specific physical register, like {r17},
// assign it now.
- if (unsigned AssignedReg = PhysReg.first) {
- const TargetRegisterClass *RC = PhysReg.second;
- if (OpInfo.ConstraintVT == MVT::Other)
- ValueVT = *TRI.legalclasstypes_begin(*RC);
-
- // Get the actual register value type. This is important, because the user
- // may have asked for (e.g.) the AX register in i32 type. We need to
- // remember that AX is actually i16 to get the right extension.
- RegVT = *TRI.legalclasstypes_begin(*RC);
-
- // This is a explicit reference to a physical register.
- Regs.push_back(AssignedReg);
-
- // If this is an expanded reference, add the rest of the regs to Regs.
- if (NumRegs != 1) {
- TargetRegisterClass::iterator I = RC->begin();
- for (; *I != AssignedReg; ++I)
- assert(I != RC->end() && "Didn't find reg!");
-
- // Already added the first reg.
- --NumRegs; ++I;
- for (; NumRegs; --NumRegs, ++I) {
- assert(I != RC->end() && "Ran out of registers to allocate!");
- Regs.push_back(*I);
- }
- }
- OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- return;
- }
+ // If this associated to a specific register, initialize iterator to correct
+ // place. If virtual, make sure we have enough registers
- // Otherwise, if this was a reference to an LLVM register class, create vregs
- // for this reference.
- if (const TargetRegisterClass *RC = PhysReg.second) {
- RegVT = *TRI.legalclasstypes_begin(*RC);
- if (OpInfo.ConstraintVT == MVT::Other)
- ValueVT = RegVT;
+ // Initialize iterator if necessary
+ TargetRegisterClass::iterator I = RC->begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
- // Create the appropriate number of virtual registers.
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- for (; NumRegs; --NumRegs)
- Regs.push_back(RegInfo.createVirtualRegister(RC));
+ // Do not check for single registers.
+ if (AssignedReg) {
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "AssignedReg should be member of RC");
+ }
- OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- return;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC);
+ Regs.push_back(R);
}
- // Otherwise, we couldn't allocate enough registers for this.
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
}
static unsigned
@@ -7333,21 +7460,6 @@ findMatchingInlineAsmOperand(unsigned OperandNo,
return CurOp;
}
-/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT
-/// \return true if it has succeeded, false otherwise
-static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
- MVT RegVT, SelectionDAG &DAG) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0, e = NumRegs; i != e; ++i) {
- if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
- Regs.push_back(RegInfo.createVirtualRegister(RC));
- else
- return false;
- }
- return true;
-}
-
namespace {
class ExtraFlags {
@@ -7404,12 +7516,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+ for (auto &T : TargetConstraints) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
- MVT OpVT = MVT::Other;
-
// Compute the value type for each operand.
if (OpInfo.Type == InlineAsm::isInput ||
(OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
@@ -7423,39 +7533,37 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpVT =
+ OpInfo.ConstraintVT =
OpInfo
.getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
.getSimpleVT();
- }
-
- if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
+ } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
- OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
- STy->getElementType(ResNo));
+ OpInfo.ConstraintVT = TLI.getSimpleValueType(
+ DAG.getDataLayout(), STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
+ OpInfo.ConstraintVT =
+ TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
}
++ResNo;
+ } else {
+ OpInfo.ConstraintVT = MVT::Other;
}
- OpInfo.ConstraintVT = OpVT;
-
if (!hasMemory)
hasMemory = OpInfo.hasMemory(TLI);
// Determine if this InlineAsm MayLoad or MayStore based on the constraints.
- // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]?
- auto TargetConstraint = TargetConstraints[i];
+ // FIXME: Could we compute this on OpInfo rather than T?
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(TargetConstraint, SDValue());
+ TLI.ComputeConstraintToUse(T, SDValue());
- ExtraInfo.update(TargetConstraint);
+ ExtraInfo.update(T);
}
SDValue Chain, Flag;
@@ -7469,9 +7577,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Second pass over the constraints: compute which constraint option to use
// and assign registers to constraints that want a specific physreg.
- for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
- SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
// other is floating point, or their sizes are different, flag it as an
@@ -7511,24 +7617,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &RefOpInfo =
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
- : ConstraintOperands[i];
+ : OpInfo;
if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
+ GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
}
// Third pass - Loop over all of the operands, assigning virtual or physregs
// to register class operands.
- for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
- SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
SDISelAsmOperandInfo &RefOpInfo =
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
- : ConstraintOperands[i];
+ : OpInfo;
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
+ GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -7555,9 +7660,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// IndirectStoresToEmit - The set of stores to emit after the inline asm node.
std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
- for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
- SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
@@ -7635,9 +7738,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
SmallVector<unsigned, 4> Regs;
- if (!createVirtualRegs(Regs,
- InlineAsm::getNumOperandRegisters(OpFlag),
- RegVT, DAG)) {
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) {
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
+ MachineRegisterInfo &RegInfo =
+ DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+ } else {
emitInlineAsmError(CS, "inline asm error: This value type register "
"class is not natively supported!");
return;
@@ -7772,19 +7879,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
unsigned numRet;
ArrayRef<Type *> ResultTypes;
SmallVector<SDValue, 1> ResultValues(1);
- if (CSResultType->isSingleValueType()) {
- numRet = 1;
- ResultValues[0] = Val;
- ResultTypes = makeArrayRef(CSResultType);
- } else {
- numRet = CSResultType->getNumContainedTypes();
+ if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) {
+ numRet = StructResult->getNumElements();
assert(Val->getNumOperands() == numRet &&
"Mismatch in number of output operands in asm result");
- ResultTypes = CSResultType->subtypes();
+ ResultTypes = StructResult->elements();
ArrayRef<SDUse> ValueUses = Val->ops();
ResultValues.resize(numRet);
std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
[](const SDUse &u) -> SDValue { return u.get(); });
+ } else {
+ numRet = 1;
+ ResultValues[0] = Val;
+ ResultTypes = makeArrayRef(CSResultType);
}
SmallVector<EVT, 1> ResultVTs(numRet);
for (unsigned i = 0; i < numRet; i++) {
@@ -7922,7 +8029,8 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return Op;
APInt Hi = CR.getUnsignedMax();
- unsigned Bits = Hi.getActiveBits();
+ unsigned Bits = std::max(Hi.getActiveBits(),
+ static_cast<unsigned>(IntegerType::MIN_INT_BITS));
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
@@ -8677,7 +8785,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
// notional registers required by the type.
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
- getABIRegCopyCC(V));
+ None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -9210,7 +9318,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
/// the end.
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
- const TerminatorInst *TI = LLVMBB->getTerminator();
+ const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
@@ -9642,7 +9750,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
}
BitTestInfo BTI;
- llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
+ llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
// Sort by probability first, number of bits second, bit mask third.
if (a.ExtraProb != b.ExtraProb)
return a.ExtraProb > b.ExtraProb;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 4b5dda982f1b..5f9cdb69daf7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -854,6 +854,9 @@ private:
void visitInvoke(const InvokeInst &I);
void visitResume(const ResumeInst &I);
+ void visitUnary(const User &I, unsigned Opcode);
+ void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
+
void visitBinary(const User &I, unsigned Opcode);
void visitShift(const User &I, unsigned Opcode);
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index fa341e8b5fa5..43df2abb674b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "SDNodeDbgValue.h"
#include <cstdint>
#include <iterator>
@@ -123,6 +124,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::SPONENTRY: return "SPONENTRY";
case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
case ISD::READ_REGISTER: return "READ_REGISTER";
case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
@@ -174,25 +176,34 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Unary operators
case ISD::FABS: return "fabs";
case ISD::FMINNUM: return "fminnum";
+ case ISD::STRICT_FMINNUM: return "strict_fminnum";
case ISD::FMAXNUM: return "fmaxnum";
- case ISD::FMINNAN: return "fminnan";
- case ISD::FMAXNAN: return "fmaxnan";
+ case ISD::STRICT_FMAXNUM: return "strict_fmaxnum";
+ case ISD::FMINNUM_IEEE: return "fminnum_ieee";
+ case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
+ case ISD::FMINIMUM: return "fminimum";
+ case ISD::FMAXIMUM: return "fmaximum";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::STRICT_FSQRT: return "strict_fsqrt";
+ case ISD::FCBRT: return "fcbrt";
case ISD::FSIN: return "fsin";
case ISD::STRICT_FSIN: return "strict_fsin";
case ISD::FCOS: return "fcos";
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
case ISD::FTRUNC: return "ftrunc";
+ case ISD::STRICT_FTRUNC: return "strict_ftrunc";
case ISD::FFLOOR: return "ffloor";
+ case ISD::STRICT_FFLOOR: return "strict_ffloor";
case ISD::FCEIL: return "fceil";
+ case ISD::STRICT_FCEIL: return "strict_fceil";
case ISD::FRINT: return "frint";
case ISD::STRICT_FRINT: return "strict_frint";
case ISD::FNEARBYINT: return "fnearbyint";
case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
case ISD::FROUND: return "fround";
+ case ISD::STRICT_FROUND: return "strict_fround";
case ISD::FEXP: return "fexp";
case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
@@ -226,6 +237,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SRL: return "srl";
case ISD::ROTL: return "rotl";
case ISD::ROTR: return "rotr";
+ case ISD::FSHL: return "fshl";
+ case ISD::FSHR: return "fshr";
case ISD::FADD: return "fadd";
case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
@@ -280,6 +293,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
+ case ISD::SADDSAT: return "saddsat";
+ case ISD::UADDSAT: return "uaddsat";
+ case ISD::SSUBSAT: return "ssubsat";
+ case ISD::USUBSAT: return "usubsat";
+ case ISD::SMULFIX: return "smulfix";
+
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
case ISD::ZERO_EXTEND: return "zero_extend";
@@ -681,9 +700,26 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ':' << L->getLine();
if (unsigned C = L->getColumn())
OS << ':' << C;
+
+ for (SDDbgValue *Dbg : G->GetDbgValues(this)) {
+ if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
+ continue;
+ Dbg->dump(OS);
+ }
}
}
+LLVM_DUMP_METHOD void SDDbgValue::dump(raw_ostream &OS) const {
+ OS << " DbgVal";
+ if (kind==SDNODE)
+ OS << '(' << u.s.ResNo << ')';
+ OS << ":\"" << Var->getName() << '"';
+#ifndef NDEBUG
+ if (Expr->getNumElements())
+ Expr->dump();
+#endif
+}
+
/// Return true if this node is so simple that we should just print it inline
/// if it appears as an operand.
static bool shouldPrintInline(const SDNode &Node) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index f7bd8847bee3..af5c2433fa2f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -176,7 +177,8 @@ static const bool ViewDAGCombine1 = false,
/// RegisterScheduler class - Track the registration of instruction schedulers.
///
//===---------------------------------------------------------------------===//
-MachinePassRegistry RegisterScheduler::Registry;
+MachinePassRegistry<RegisterScheduler::FunctionPassCtor>
+ RegisterScheduler::Registry;
//===---------------------------------------------------------------------===//
///
@@ -417,7 +419,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
CurDAG->init(*MF, *ORE, this, LibInfo,
- getAnalysisIfAvailable<DivergenceAnalysis>());
+ getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
@@ -451,7 +453,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (!succ_empty(&BB))
continue;
- const TerminatorInst *Term = BB.getTerminator();
+ const Instruction *Term = BB.getTerminator();
if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term))
continue;
@@ -695,14 +697,14 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
if (!TargetRegisterInfo::isVirtualRegister(DestReg))
continue;
- // Ignore non-scalar or non-integer values.
+ // Ignore non-integer values.
SDValue Src = N->getOperand(2);
EVT SrcVT = Src.getValueType();
- if (!SrcVT.isInteger() || SrcVT.isVector())
+ if (!SrcVT.isInteger())
continue;
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
- CurDAG->computeKnownBits(Src, Known);
+ Known = CurDAG->computeKnownBits(Src);
FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known);
} while (!Worklist.empty());
}
@@ -714,8 +716,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
int BlockNumber = -1;
(void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
+#ifndef NDEBUG
TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
+#endif
// Pre-type legalization allow creation of any node types.
CurDAG->NewNodesMustHaveLegalTypes = false;
@@ -750,8 +754,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -770,8 +776,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -792,8 +800,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -839,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
}
if (ViewLegalizeDAGs && MatchFilterBB)
@@ -852,8 +864,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -870,8 +884,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
+#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDiverence();
+#endif
LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -1114,6 +1130,37 @@ static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
return false;
}
+// wasm.landingpad.index intrinsic is for associating a landing pad index number
+// with a catchpad instruction. Retrieve the landing pad index in the intrinsic
+// and store the mapping in the function.
+static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
+ const CatchPadInst *CPI) {
+ MachineFunction *MF = MBB->getParent();
+ // In case of single catch (...), we don't emit LSDA, so we don't need
+ // this information.
+ bool IsSingleCatchAllClause =
+ CPI->getNumArgOperands() == 1 &&
+ cast<Constant>(CPI->getArgOperand(0))->isNullValue();
+ if (!IsSingleCatchAllClause) {
+ // Create a mapping from landing pad label to landing pad index.
+ bool IntrFound = false;
+ for (const User *U : CPI->users()) {
+ if (const auto *Call = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = Call->getIntrinsicID();
+ if (IID == Intrinsic::wasm_landingpad_index) {
+ Value *IndexArg = Call->getArgOperand(1);
+ int Index = cast<ConstantInt>(IndexArg)->getZExtValue();
+ MF->setWasmLandingPadIndex(MBB, Index);
+ IntrFound = true;
+ break;
+ }
+ }
+ }
+ assert(IntrFound && "wasm.landingpad.index intrinsic not found!");
+ (void)IntrFound;
+ }
+}
+
/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
/// do other setup for EH landing-pad blocks.
bool SelectionDAGISel::PrepareEHLandingPad() {
@@ -1123,44 +1170,48 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
const TargetRegisterClass *PtrRC =
TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
+ auto Pers = classifyEHPersonality(PersonalityFn);
+
// Catchpads have one live-in register, which typically holds the exception
// pointer or code.
- if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
- if (hasExceptionPointerOrCodeUser(CPI)) {
- // Get or create the virtual register to hold the pointer or code. Mark
- // the live in physreg and copy into the vreg.
- MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
- assert(EHPhysReg && "target lacks exception pointer register");
- MBB->addLiveIn(EHPhysReg);
- unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
- BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
- TII->get(TargetOpcode::COPY), VReg)
- .addReg(EHPhysReg, RegState::Kill);
+ if (isFuncletEHPersonality(Pers)) {
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+ if (hasExceptionPointerOrCodeUser(CPI)) {
+ // Get or create the virtual register to hold the pointer or code. Mark
+ // the live in physreg and copy into the vreg.
+ MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+ assert(EHPhysReg && "target lacks exception pointer register");
+ MBB->addLiveIn(EHPhysReg);
+ unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::COPY), VReg)
+ .addReg(EHPhysReg, RegState::Kill);
+ }
}
return true;
}
- if (!LLVMBB->isLandingPad())
- return true;
-
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
MCSymbol *Label = MF->addLandingPad(MBB);
- // Assign the call site to the landing pad's begin label.
- MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-
const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
- // Mark exception register as live in.
- if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
- FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
-
- // Mark exception selector register as live in.
- if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
- FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ if (Pers == EHPersonality::Wasm_CXX) {
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
+ mapWasmLandingPadIndex(MBB, CPI);
+ } else {
+ // Assign the call site to the landing pad's begin label.
+ MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+ // Mark exception register as live in.
+ if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
+ FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ // Mark exception selector register as live in.
+ if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
+ FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ }
return true;
}
@@ -1171,7 +1222,7 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
static bool isFoldedOrDeadInstruction(const Instruction *I,
FunctionLoweringInfo *FuncInfo) {
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
- !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !I->isTerminator() && // Terminators aren't folded.
!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
!I->isEHPad() && // EH pad instructions aren't folded.
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
@@ -1688,7 +1739,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
Inst->getDebugLoc(), LLVMBB);
bool ShouldAbort = EnableFastISelAbort;
- if (isa<TerminatorInst>(Inst)) {
+ if (Inst->isTerminator()) {
// Use a different message for terminator misses.
R << "FastISel missed terminator";
// Don't abort for terminator unless the level is really high
@@ -2160,9 +2211,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
// Otherwise, the DAG Combiner may have proven that the value coming in is
// either already zero or is not demanded. Check for known zero input bits.
APInt NeededMask = DesiredMask & ~ActualMask;
-
- KnownBits Known;
- CurDAG->computeKnownBits(LHS, Known);
+ KnownBits Known = CurDAG->computeKnownBits(LHS);
// If all the missing bits in the or are already known to be set, match!
if (NeededMask.isSubsetOf(Known.One))
@@ -3156,6 +3205,18 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
N.getNode()))
break;
continue;
+ case OPC_CheckPredicateWithOperands: {
+ unsigned OpNum = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Operands;
+
+ for (unsigned i = 0; i < OpNum; ++i)
+ Operands.push_back(RecordedNodes[MatcherTable[MatcherIndex++]].first);
+
+ unsigned PredNo = MatcherTable[MatcherIndex++];
+ if (!CheckNodePredicateWithOperands(N.getNode(), PredNo, Operands))
+ break;
+ continue;
+ }
case OPC_CheckComplexPat: {
unsigned CPNum = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
@@ -3598,38 +3659,22 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
bool mayLoad = MCID.mayLoad();
bool mayStore = MCID.mayStore();
- unsigned NumMemRefs = 0;
- for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
- MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
- if ((*I)->isLoad()) {
- if (mayLoad)
- ++NumMemRefs;
- } else if ((*I)->isStore()) {
- if (mayStore)
- ++NumMemRefs;
- } else {
- ++NumMemRefs;
- }
- }
-
- MachineSDNode::mmo_iterator MemRefs =
- MF->allocateMemRefsArray(NumMemRefs);
-
- MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
- for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
- MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
- if ((*I)->isLoad()) {
+ // We expect to have relatively few of these so just filter them into a
+ // temporary buffer so that we can easily add them to the instruction.
+ SmallVector<MachineMemOperand *, 4> FilteredMemRefs;
+ for (MachineMemOperand *MMO : MatchedMemRefs) {
+ if (MMO->isLoad()) {
if (mayLoad)
- *MemRefsPos++ = *I;
- } else if ((*I)->isStore()) {
+ FilteredMemRefs.push_back(MMO);
+ } else if (MMO->isStore()) {
if (mayStore)
- *MemRefsPos++ = *I;
+ FilteredMemRefs.push_back(MMO);
} else {
- *MemRefsPos++ = *I;
+ FilteredMemRefs.push_back(MMO);
}
}
- Res->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+ CurDAG->setNodeMemRefs(Res, FilteredMemRefs);
}
LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs()
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 54cbd6859f70..90a1b350fc94 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -522,7 +522,16 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// The vm state arguments are lowered in an opaque manner. We do not know
// what type of values are contained within.
for (const Value *V : SI.DeoptState) {
- SDValue Incoming = Builder.getValue(V);
+ SDValue Incoming;
+ // If this is a function argument at a static frame index, generate it as
+ // the frame index.
+ if (const Argument *Arg = dyn_cast<Argument>(V)) {
+ int FI = Builder.FuncInfo.getArgumentFrameIndex(Arg);
+ if (FI != INT_MAX)
+ Incoming = Builder.DAG.getFrameIndex(FI, Builder.getFrameIndexTy());
+ }
+ if (!Incoming.getNode())
+ Incoming = Builder.getValue(V);
const bool LiveInValue = LiveInDeopt && !isGCValue(V);
lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e317268fa5f4..a2f05c1e3cef 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -55,10 +55,12 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
const Function &F = DAG.getMachineFunction().getFunction();
// Conservatively require the attributes of the call to match those of
- // the return. Ignore noalias because it doesn't affect the call sequence.
+ // the return. Ignore NoAlias and NonNull because they don't affect the
+ // call sequence.
AttributeList CallerAttrs = F.getAttributes();
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
.removeAttribute(Attribute::NoAlias)
+ .removeAttribute(Attribute::NonNull)
.hasAttributes())
return false;
@@ -429,87 +431,56 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
return false;
}
-bool
-TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
- const APInt &Demanded,
- DAGCombinerInfo &DCI,
- TargetLoweringOpt &TLO) const {
- SDValue Op = User->getOperand(OpIdx);
- KnownBits Known;
-
- if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true))
- return false;
-
-
- // Old will not always be the same as Op. For example:
- //
- // Demanded = 0xffffff
- // Op = i64 truncate (i32 and x, 0xffffff)
- // In this case simplify demand bits will want to replace the 'and' node
- // with the value 'x', which will give us:
- // Old = i32 and x, 0xffffff
- // New = x
- if (TLO.Old.hasOneUse()) {
- // For the one use case, we just commit the change.
- DCI.CommitTargetLoweringOpt(TLO);
- return true;
- }
-
- // If Old has more than one use then it must be Op, because the
- // AssumeSingleUse flag is not propogated to recursive calls of
- // SimplifyDemanded bits, so the only node with multiple use that
- // it will attempt to combine will be Op.
- assert(TLO.Old == Op);
-
- SmallVector <SDValue, 4> NewOps;
- for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
- if (i == OpIdx) {
- NewOps.push_back(TLO.New);
- continue;
- }
- NewOps.push_back(User->getOperand(i));
- }
- User = TLO.DAG.UpdateNodeOperands(User, NewOps);
- // Op has less users now, so we may be able to perform additional combines
- // with it.
- DCI.AddToWorklist(Op.getNode());
- // User's operands have been updated, so we may be able to do new combines
- // with it.
- DCI.AddToWorklist(User);
- return true;
-}
-
-bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const {
-
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
KnownBits Known;
- bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO);
- if (Simplified)
+ bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
+ }
return Simplified;
}
-/// Look at Op. At this point, we know that only the DemandedMask bits of the
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth,
+ bool AssumeSingleUse) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
+ AssumeSingleUse);
+}
+
+/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
/// original and new nodes in Old and New. Otherwise, analyze the expression and
/// return a mask of Known bits for the expression (used to simplify the
/// caller). The Known bits may only be accurate for those bits in the
-/// DemandedMask.
-bool TargetLowering::SimplifyDemandedBits(SDValue Op,
- const APInt &DemandedMask,
- KnownBits &Known,
- TargetLoweringOpt &TLO,
- unsigned Depth,
- bool AssumeSingleUse) const {
- unsigned BitWidth = DemandedMask.getBitWidth();
+/// OriginalDemandedBits and OriginalDemandedElts.
+bool TargetLowering::SimplifyDemandedBits(
+ SDValue Op, const APInt &OriginalDemandedBits,
+ const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+ unsigned Depth, bool AssumeSingleUse) const {
+ unsigned BitWidth = OriginalDemandedBits.getBitWidth();
assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
- APInt NewMask = DemandedMask;
+
+ unsigned NumElts = OriginalDemandedElts.getBitWidth();
+ assert((!Op.getValueType().isVector() ||
+ NumElts == Op.getValueType().getVectorNumElements()) &&
+ "Unexpected vector size");
+
+ APInt DemandedBits = OriginalDemandedBits;
+ APInt DemandedElts = OriginalDemandedElts;
SDLoc dl(Op);
auto &DL = TLO.DAG.getDataLayout();
@@ -529,18 +500,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Depth != 0) {
// If not at the root, Just compute the Known bits to
// simplify things downstream.
- TLO.DAG.computeKnownBits(Op, Known, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
- // just set the NewMask to all bits.
- NewMask = APInt::getAllOnesValue(BitWidth);
- } else if (DemandedMask == 0) {
- // Not demanding any bits from Op.
+ // just set the DemandedBits/Elts to all bits.
+ DemandedBits = APInt::getAllOnesValue(BitWidth);
+ DemandedElts = APInt::getAllOnesValue(NumElts);
+ } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
+ // Not demanding any bits/elts from Op.
if (!Op.isUndef())
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
return false;
- } else if (Depth == 6) { // Limit search depth.
+ } else if (Depth == 6) { // Limit search depth.
return false;
}
@@ -570,24 +542,90 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
- return false; // Don't fall through, will infinitely loop.
- case ISD::AND:
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::CONCAT_VECTORS: {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVecs = Op.getNumOperands();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ APInt DemandedSubElts =
+ DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ // Known bits are shared by every demanded subvector element.
+ if (!!DemandedSubElts) {
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ }
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // Collect demanded elements from shuffle operands..
+ APInt DemandedLHS(NumElts, 0);
+ APInt DemandedRHS(NumElts, 0);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ int M = ShuffleMask[i];
+ if (M < 0) {
+ // For UNDEF elements, we don't know anything about the common state of
+ // the shuffle result.
+ DemandedLHS.clearAllBits();
+ DemandedRHS.clearAllBits();
+ break;
+ }
+ assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
+ }
+
+ if (!!DemandedLHS || !!DemandedRHS) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!DemandedLHS) {
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
+ Known2, TLO, Depth + 1))
+ return true;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ if (!!DemandedRHS) {
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
+ Known2, TLO, Depth + 1))
+ return true;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ }
+ break;
+ }
+ case ISD::AND: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
// If the RHS is a constant, check to see if the LHS would be zero without
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
- if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
- SDValue Op0 = Op.getOperand(0);
- KnownBits LHSKnown;
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
// Do not increment Depth here; that can cause an infinite loop.
- TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
+ KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
- if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ if ((LHSKnown.Zero & DemandedBits) ==
+ (~RHSC->getAPIntValue() & DemandedBits))
return TLO.CombineTo(Op, Op0);
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
- if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -597,34 +635,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
LHSKnown.One == ~RHSC->getAPIntValue()) {
- SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0),
- Op.getOperand(1));
+ SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
return TLO.CombineTo(Op, Xor);
}
}
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask,
- Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
- if (NewMask.isSubsetOf(Known2.Zero | Known.One))
- return TLO.CombineTo(Op, Op.getOperand(0));
- if (NewMask.isSubsetOf(Known.Zero | Known2.One))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
+ return TLO.CombineTo(Op, Op1);
// If all of the demanded bits in the inputs are known zeros, return zero.
- if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
return true;
// If the operation can be done in a smaller type, do so.
- if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
@@ -632,26 +669,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Output known-0 are known to be clear if zero in either the LHS | RHS.
Known.Zero |= Known2.Zero;
break;
- case ISD::OR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+ }
+ case ISD::OR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask,
- Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
- if (NewMask.isSubsetOf(Known2.One | Known.Zero))
- return TLO.CombineTo(Op, Op.getOperand(0));
- if (NewMask.isSubsetOf(Known.One | Known2.Zero))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// If the operation can be done in a smaller type, do so.
- if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -659,78 +700,81 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Output known-1 are known to be set if set in either the LHS | RHS.
Known.One |= Known2.One;
break;
+ }
case ISD::XOR: {
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
- if (NewMask.isSubsetOf(Known.Zero))
- return TLO.CombineTo(Op, Op.getOperand(0));
- if (NewMask.isSubsetOf(Known2.Zero))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (DemandedBits.isSubsetOf(Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
// If the operation can be done in a smaller type, do so.
- if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// If all of the unknown bits are known to be zero on one side or the other
// (but not both) turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
- if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT,
- Op.getOperand(0),
- Op.getOperand(1)));
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
// Output known-0 bits are known if clear or set in both the LHS & RHS.
KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
- // If all of the demanded bits on one side are known, and all of the set
- // bits on that side are also known to be set on the other side, turn this
- // into an AND, as we know the bits will be cleared.
- // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
- // NB: it is okay if more bits are known than are requested
- if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side
- if (Known.One == Known2.One) { // set bits are the same on both sides
- SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
- Op.getOperand(0), ANDC));
+ if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
+ // If one side is a constant, and all of the known set bits on the other
+ // side are also set in the constant, turn this into an AND, as we know
+ // the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ // NB: it is okay if more bits are known than are requested
+ if (C->getAPIntValue() == Known2.One) {
+ SDValue ANDC =
+ TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
}
- }
- // If the RHS is a constant, see if we can change it. Don't alter a -1
- // constant because that's a 'not' op, and that is better for combining and
- // codegen.
- ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1));
- if (C && !C->isAllOnesValue()) {
- if (NewMask.isSubsetOf(C->getAPIntValue())) {
- // We're flipping all demanded bits. Flip the undemanded bits too.
- SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT);
- return TLO.CombineTo(Op, New);
+ // If the RHS is a constant, see if we can change it. Don't alter a -1
+ // constant because that's a 'not' op, and that is better for combining
+ // and codegen.
+ if (!C->isAllOnesValue()) {
+ if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ // We're flipping all demanded bits. Flip the undemanded bits too.
+ SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
+ return TLO.CombineTo(Op, New);
+ }
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ return true;
}
- // If we can't turn this into a 'not', try to shrink the constant.
- if (ShrinkDemandedConstant(Op, NewMask, TLO))
- return true;
}
Known = std::move(KnownOut);
break;
}
case ISD::SELECT:
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(Op, NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -738,15 +782,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
- if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(Op, NewMask, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -760,7 +806,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
- if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
+ if (DemandedBits.isSignMask() &&
+ Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(VT) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
@@ -780,10 +827,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.Zero.setBitsFrom(1);
break;
}
- case ISD::SHL:
- if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
- SDValue InOp = Op.getOperand(0);
+ case ISD::SHL: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
@@ -793,90 +841,91 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
- if (InOp.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
- if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (Op0.getOpcode() == ISD::SRL) {
+ if (ShAmt &&
+ (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
- int Diff = ShAmt-C1;
+ int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
}
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0),
- NewSA));
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
}
- if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO,
+ Depth + 1))
return true;
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
- if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
- SDValue InnerOp = InOp.getOperand(0);
+ if (Op0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = Op0.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getScalarSizeInBits();
- if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
+ if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
ShTy = InnerVT;
SDValue NarrowShl =
- TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
- TLO.DAG.getConstant(ShAmt, dl, ShTy));
- return
- TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, dl, ShTy));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
- if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+ if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse()) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(InnerOp.getOperand(1))) {
unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
- if (InnerShAmt < ShAmt &&
- InnerShAmt < InnerBits &&
- NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
- NewMask.countTrailingZeros() >= ShAmt) {
- SDValue NewSA =
- TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
- Op.getOperand(1).getValueType());
+ if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
+ DemandedBits.getActiveBits() <=
+ (InnerBits - InnerShAmt + ShAmt) &&
+ DemandedBits.countTrailingZeros() >= ShAmt) {
+ SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+ Op1.getValueType());
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
- NewExt, NewSA));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
}
}
}
}
Known.Zero <<= ShAmt;
- Known.One <<= ShAmt;
+ Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
}
break;
- case ISD::SRL:
- if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
- SDValue InOp = Op.getOperand(0);
+ }
+ case ISD::SRL: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
- APInt InDemandedMask = (NewMask << ShAmt);
+ APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
@@ -886,56 +935,56 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
- if (InOp.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
if (ShAmt &&
- (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
- int Diff = ShAmt-C1;
+ int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
}
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0),
- NewSA));
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
}
// Compute the new bits that are at the top now.
- if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
- Known.Zero.setHighBits(ShAmt); // High bits known zero.
+ Known.Zero.setHighBits(ShAmt); // High bits known zero.
}
break;
- case ISD::SRA:
+ }
+ case ISD::SRA: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (NewMask.isOneValue())
- return TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
- Op.getOperand(1)));
+ if (DemandedBits.isOneValue())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
- APInt InDemandedMask = (NewMask << ShAmt);
+ APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
@@ -944,11 +993,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- if (NewMask.countLeadingZeros() < ShAmt)
+ if (DemandedBits.countLeadingZeros() < ShAmt)
InDemandedMask.setSignBit();
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO,
- Depth+1))
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
@@ -957,22 +1005,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
- NewMask.countLeadingZeros() >= ShAmt) {
+ DemandedBits.countLeadingZeros() >= ShAmt) {
SDNodeFlags Flags;
Flags.setExact(Op->getFlags().hasExact());
- return TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
- Op.getOperand(1), Flags));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
}
- int Log2 = NewMask.exactLogBase2();
+ int Log2 = DemandedBits.exactLogBase2();
if (Log2 >= 0) {
// The bit must come from the sign.
SDValue NewSA =
- TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
- Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
- Op.getOperand(0), NewSA));
+ TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
}
if (Known.One[BitWidth - ShAmt - 1])
@@ -980,15 +1025,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.One.setHighBits(ShAmt);
}
break;
+ }
case ISD::SIGN_EXTEND_INREG: {
+ SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExVTBits = ExVT.getScalarSizeInBits();
// If we only care about the highest bit, don't bother shifting right.
- if (NewMask.isSignMask()) {
- SDValue InOp = Op.getOperand(0);
+ if (DemandedBits.isSignMask()) {
bool AlreadySignExtended =
- TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1;
+ TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -998,25 +1044,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
- SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
- ShiftAmtTy);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp,
- ShiftAmt));
+ SDValue ShiftAmt =
+ TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
}
}
// If none of the extended bits are demanded, eliminate the sextinreg.
- if (NewMask.getActiveBits() <= ExVTBits)
- return TLO.CombineTo(Op, Op.getOperand(0));
+ if (DemandedBits.getActiveBits() <= ExVTBits)
+ return TLO.CombineTo(Op, Op0);
- APInt InputDemandedBits = NewMask.getLoBits(ExVTBits);
+ APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits.setBit(ExVTBits - 1);
- if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
- Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1025,14 +1070,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known zero, convert this into a zero extension.
if (Known.Zero[ExVTBits - 1])
- return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
- Op.getOperand(0), dl, ExVT.getScalarType()));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
- if (Known.One[ExVTBits - 1]) { // Input sign bit known set
+ if (Known.One[ExVTBits - 1]) { // Input sign bit known set
Known.One.setBitsFrom(ExVTBits);
Known.Zero &= Mask;
- } else { // Input sign bit unknown
+ } else { // Input sign bit unknown
Known.Zero &= Mask;
Known.One &= Mask;
}
@@ -1042,8 +1087,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
EVT HalfVT = Op.getOperand(0).getValueType();
unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
- APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
- APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
+ APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
+ APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
KnownBits KnownLo, KnownHi;
@@ -1061,36 +1106,35 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::ZERO_EXTEND: {
- unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
// If none of the top bits are demanded, convert this into an any_extend.
- if (NewMask.getActiveBits() <= OperandBitWidth)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
- Op.getOperand(0)));
+ if (DemandedBits.getActiveBits() <= InBits)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
- APInt InMask = NewMask.trunc(OperandBitWidth);
- if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(OperandBitWidth);
+ Known.Zero.setBitsFrom(InBits);
break;
}
case ISD::SIGN_EXTEND: {
- unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits();
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
// If none of the top bits are demanded, convert this into an any_extend.
- if (NewMask.getActiveBits() <= InBits)
- return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
- Op.getOperand(0)));
+ if (DemandedBits.getActiveBits() <= InBits)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
- APInt InDemandedBits = NewMask.trunc(InBits);
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
InDemandedBits.setBit(InBits - 1);
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
- Depth+1))
+ if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit is known one, the top bits match.
@@ -1098,34 +1142,55 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the sign bit is known zero, convert this to a zero extend.
if (Known.isNonNegative())
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT,
- Op.getOperand(0)));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
+ break;
+ }
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ // TODO - merge this with SIGN_EXTEND above?
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+
+ // If some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if (InBits < DemandedBits.getActiveBits())
+ InDemandedBits.setBit(InBits - 1);
+
+ if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ // If the sign bit is known one, the top bits match.
+ Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
- unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
- APInt InMask = NewMask.trunc(OperandBitWidth);
- if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
+ SDValue Src = Op.getOperand(0);
+ unsigned InBits = Src.getScalarValueSizeInBits();
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
+ SDValue Src = Op.getOperand(0);
+
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
- unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
- APInt TruncMask = NewMask.zext(OperandBitWidth);
- if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1))
+ unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
+ APInt TruncMask = DemandedBits.zext(OperandBitWidth);
+ if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
return true;
Known = Known.trunc(BitWidth);
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
- if (Op.getOperand(0).getNode()->hasOneUse()) {
- SDValue In = Op.getOperand(0);
- switch (In.getOpcode()) {
- default: break;
+ if (Src.getNode()->hasOneUse()) {
+ switch (Src.getOpcode()) {
+ default:
+ break;
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
@@ -1133,10 +1198,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
- ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
if (!ShAmt)
break;
- SDValue Shift = In.getOperand(1);
+ SDValue Shift = Src.getOperand(1);
if (TLO.LegalTypes()) {
uint64_t ShVal = ShAmt->getZExtValue();
Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
@@ -1148,13 +1213,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
HighBits.lshrInPlace(ShAmt->getZExtValue());
HighBits = HighBits.trunc(BitWidth);
- if (!(HighBits & NewMask)) {
+ if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
- SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT,
- In.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc,
- Shift));
+ SDValue NewTrunc =
+ TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
}
}
break;
@@ -1169,7 +1234,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// demanded by its users.
EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
- if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits,
Known, TLO, Depth+1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1177,50 +1242,111 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.Zero |= ~InMask;
break;
}
- case ISD::BITCAST:
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ unsigned EltBitWidth = Src.getScalarValueSizeInBits();
+
+ // Demand the bits from every vector element without a constant index.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
+ if (CIdx->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
+
+ // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+ // anything about the extended bits.
+ APInt DemandedSrcBits = DemandedBits;
+ if (BitWidth > EltBitWidth)
+ DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
+
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ Known = Known2;
+ if (BitWidth > EltBitWidth)
+ Known = Known.zext(BitWidth);
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
- if (!TLO.LegalOperations() && !VT.isVector() &&
- !Op.getOperand(0).getValueType().isVector() &&
- NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
- Op.getOperand(0).getValueType().isFloatingPoint()) {
+ if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
+ DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
+ SrcVT.isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
- bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
- if ((OpVTLegal || i32Legal) && VT.isSimple() &&
- Op.getOperand(0).getValueType() != MVT::f16 &&
- Op.getOperand(0).getValueType() != MVT::f128) {
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
+ SrcVT != MVT::f128) {
// Cannot eliminate/lower SHL for f128 yet.
EVT Ty = OpVTLegal ? VT : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
- SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
unsigned OpVTSizeInBits = Op.getValueSizeInBits();
if (!OpVTLegal && OpVTSizeInBits > 32)
Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
unsigned ShVal = Op.getValueSizeInBits() - 1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
+ }
+ }
+ // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by
+ // demanding the element if any bits from it are demanded.
+ // TODO - bigendian once we have test coverage.
+ // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
+ if (SrcVT.isVector() && NumSrcEltBits > 1 &&
+ (BitWidth % NumSrcEltBits) == 0 &&
+ TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = BitWidth / NumSrcEltBits;
+ auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
+ DemandedSubElts = APInt::getNullValue(Scale);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue())
+ DemandedSubElts.setBit(i);
+ }
+ return true;
+ };
+
+ APInt DemandedSubElts;
+ if (GetDemandedSubMask(DemandedSubElts)) {
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
+
+ APInt KnownUndef, KnownZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
+ return true;
}
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
- TLO.DAG.computeKnownBits(Op, Known, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, Depth);
return false;
}
break;
+ }
case ISD::ADD:
case ISD::MUL:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
- unsigned NewMaskLZ = NewMask.countLeadingZeros();
- APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ);
- if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
- SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
+ unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
+ APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
+ if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
+ SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
- ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
SDNodeFlags Flags = Op.getNode()->getFlags();
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
@@ -1240,7 +1366,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
- APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ);
+ APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
if (C && !C->isAllOnesValue() && !C->isOne() &&
(C->getAPIntValue() | HighMask).isAllOnesValue()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
@@ -1257,24 +1383,34 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
LLVM_FALLTHROUGH;
}
default:
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
+ Known, TLO, Depth))
+ return true;
+ break;
+ }
+
// Just use computeKnownBits to compute output bits.
- TLO.DAG.computeKnownBits(Op, Known, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
// If we know the value of all of the demanded bits, return this as a
// constant.
- if (NewMask.isSubsetOf(Known.Zero|Known.One)) {
+ if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
for (SDNodeIterator I = SDNodeIterator::begin(N),
- E = SDNodeIterator::end(N); I != E; ++I) {
+ E = SDNodeIterator::end(N);
+ I != E; ++I) {
SDNode *Op = *I;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
if (C->isOpaque())
return false;
}
- return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+ // TODO: Handle float bits as well.
+ if (VT.isInteger())
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
}
return false;
@@ -1291,8 +1427,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
bool Simplified =
SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
- if (Simplified)
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
+ }
return Simplified;
}
@@ -1371,6 +1509,23 @@ bool TargetLowering::SimplifyDemandedVectorElts(
TLO, Depth + 1))
return true;
+ // Try calling SimplifyDemandedBits, converting demanded elts to the bits
+ // of the large element.
+ // TODO - bigendian once we have test coverage.
+ if (TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
+ APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Ofs = (i % Scale) * EltSizeInBits;
+ SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
+ }
+
+ KnownBits Known;
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
+ return true;
+ }
+
// If the src element is zero/undef then all the output elements will be -
// only demanded elements are guaranteed to be correct.
for (unsigned i = 0; i != NumSrcElts; ++i) {
@@ -1463,7 +1618,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
EVT SubVT = Sub.getValueType();
unsigned NumSubElts = SubVT.getVectorNumElements();
const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
- if (Idx.uge(NumElts - NumSubElts))
+ if (Idx.ugt(NumElts - NumSubElts))
break;
unsigned SubIdx = Idx.getZExtValue();
APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
@@ -1481,22 +1636,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::EXTRACT_SUBVECTOR: {
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
- break;
SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
- if (Idx.uge(NumSrcElts - NumElts))
- break;
- // Offset the demanded elts by the subvector index.
- uint64_t SubIdx = Idx.getZExtValue();
- APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx);
- APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
- Depth + 1))
- return true;
- KnownUndef = SrcUndef.extractBits(NumElts, SubIdx);
- KnownZero = SrcZero.extractBits(NumElts, SubIdx);
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef = SrcUndef.extractBits(NumElts, Idx);
+ KnownZero = SrcZero.extractBits(NumElts, Idx);
+ }
break;
}
case ISD::INSERT_VECTOR_ELT: {
@@ -1510,9 +1663,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned Idx = CIdx->getZExtValue();
if (!DemandedElts[Idx])
return TLO.CombineTo(Op, Vec);
- DemandedElts.clearBit(Idx);
- if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
+ APInt DemandedVecElts(DemandedElts);
+ DemandedVecElts.clearBit(Idx);
+ if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
@@ -1534,12 +1688,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::VSELECT: {
- APInt DemandedLHS(DemandedElts);
- APInt DemandedRHS(DemandedElts);
-
- // TODO - add support for constant vselect masks.
+ // Try to transform the select condition based on the current demanded
+ // elements.
+ // TODO: If a condition element is undef, we can choose from one arm of the
+ // select (and if one arm is undef, then we can propagate that to the
+ // result).
+ // TODO - add support for constant vselect masks (see IR version of this).
+ APInt UnusedUndef, UnusedZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
+ UnusedZero, TLO, Depth + 1))
+ return true;
// See if we can simplify either vselect operand.
+ APInt DemandedLHS(DemandedElts);
+ APInt DemandedRHS(DemandedElts);
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
@@ -1624,8 +1786,35 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ APInt SrcUndef, SrcZero;
+ SDValue Src = Op.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ KnownZero = SrcZero.zextOrTrunc(NumElts);
+ KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
case ISD::ADD:
- case ISD::SUB: {
+ case ISD::SUB:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM: {
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
SrcZero, TLO, Depth + 1))
@@ -1637,21 +1826,58 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef &= SrcUndef;
break;
}
+ case ISD::AND: {
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ // If either side has a zero element, then the result element is zero, even
+ // if the other is an UNDEF.
+ KnownZero |= SrcZero;
+ KnownUndef &= SrcUndef;
+ KnownUndef &= ~KnownZero;
+ break;
+ }
case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
break;
default: {
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
KnownZero, TLO, Depth))
return true;
+ } else {
+ KnownBits Known;
+ APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+ if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO,
+ Depth, AssumeSingleUse))
+ return true;
+ }
break;
}
}
-
assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
+
+ // Constant fold all undef cases.
+ // TODO: Handle zero cases as well.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
return false;
}
@@ -1711,6 +1937,32 @@ bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return false;
}
+bool TargetLowering::SimplifyDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyDemandedBits if you don't know whether Op"
+ " is a target node!");
+ computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
+ return false;
+}
+
+bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isKnownNeverNaN if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
@@ -1901,10 +2153,24 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
} else
return SDValue();
- const APInt &I01 = C01->getAPIntValue();
- // Both of them must be power-of-two, and the constant from setcc is bigger.
- if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
- return SDValue();
+ APInt I01 = C01->getAPIntValue();
+
+ auto checkConstants = [&I1, &I01]() -> bool {
+ // Both of them must be power-of-two, and the constant from setcc is bigger.
+ return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
+ };
+
+ if (checkConstants()) {
+ // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
+ } else {
+ // What if we invert constants? (and the target predicate)
+ I1.negate();
+ I01.negate();
+ NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
+ if (!checkConstants())
+ return SDValue();
+ // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
+ }
// They are power-of-two, so which bit is set?
const unsigned KeptBits = I1.logBase2();
@@ -2141,7 +2407,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (bestWidth) {
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
- if (newVT.isRound()) {
+ if (newVT.isRound() &&
+ shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
@@ -2819,8 +3086,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
/// Returns true (and the GlobalValue and the offset) if the node is a
/// GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
int64_t &Offset) const {
+
+ SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
+
if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
GA = GASD->getGlobal();
Offset += GASD->getOffset();
@@ -3419,34 +3689,63 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// Given an exact SDIV by a constant, create a multiplication
/// with the multiplicative inverse of the constant.
-static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
+static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) {
- assert(d != 0 && "Division by zero!");
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ bool UseSRA = false;
+ SmallVector<SDValue, 16> Shifts, Factors;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+ APInt Divisor = C->getAPIntValue();
+ unsigned Shift = Divisor.countTrailingZeros();
+ if (Shift) {
+ Divisor.ashrInPlace(Shift);
+ UseSRA = true;
+ }
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t;
+ APInt Factor = Divisor;
+ while ((t = Divisor * Factor) != 1)
+ Factor *= APInt(Divisor.getBitWidth(), 2) - t;
+ Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
+ Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+ return true;
+ };
+
+ // Collect all magic values from the build vector.
+ if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
+ return SDValue();
+
+ SDValue Shift, Factor;
+ if (VT.isVector()) {
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ } else {
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ SDValue Res = Op0;
// Shift the value upfront if it is even, so the LSB is one.
- unsigned ShAmt = d.countTrailingZeros();
- if (ShAmt) {
+ if (UseSRA) {
// TODO: For UDIV use SRL instead of SRA.
- SDValue Amt =
- DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(),
- DAG.getDataLayout()));
SDNodeFlags Flags;
Flags.setExact(true);
- Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags);
- Created.push_back(Op1.getNode());
- d.ashrInPlace(ShAmt);
+ Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
+ Created.push_back(Res.getNode());
}
- // Calculate the multiplicative inverse, using Newton's method.
- APInt t, xn = d;
- while ((t = d*xn) != 1)
- xn *= APInt(d.getBitWidth(), 2) - t;
-
- SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
- SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
- Created.push_back(Mul.getNode());
- return Mul;
+ return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
}
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
@@ -3463,11 +3762,15 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG, bool IsAfterLegalization,
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const {
- EVT VT = N->getValueType(0);
SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
@@ -3476,50 +3779,90 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
// If the sdiv has an 'exact' bit we can use a simpler lowering.
if (N->getFlags().hasExact())
- return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, Created);
+ return BuildExactSDIV(*this, N, dl, DAG, Created);
+
+ SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+
+ const APInt &Divisor = C->getAPIntValue();
+ APInt::ms magics = Divisor.magic();
+ int NumeratorFactor = 0;
+ int ShiftMask = -1;
+
+ if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+ // If d is +1/-1, we just multiply the numerator by +1/-1.
+ NumeratorFactor = Divisor.getSExtValue();
+ magics.m = 0;
+ magics.s = 0;
+ ShiftMask = 0;
+ } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ // If d > 0 and m < 0, add the numerator.
+ NumeratorFactor = 1;
+ } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ // If d < 0 and m > 0, subtract the numerator.
+ NumeratorFactor = -1;
+ }
+
+ MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+ Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
+ Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+ ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Collect the shifts / magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+ return SDValue();
- APInt::ms magics = Divisor.magic();
+ SDValue MagicFactor, Factor, Shift, ShiftMask;
+ if (VT.isVector()) {
+ MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
+ } else {
+ MagicFactor = MagicFactors[0];
+ Factor = Factors[0];
+ Shift = Shifts[0];
+ ShiftMask = ShiftMasks[0];
+ }
- // Multiply the numerator (operand 0) by the magic value
- // FIXME: We should support doing a MUL in a wider type
+ // Multiply the numerator (operand 0) by the magic value.
+ // FIXME: We should support doing a MUL in a wider type.
SDValue Q;
- if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
- isOperationLegalOrCustom(ISD::MULHS, VT))
- Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
- DAG.getConstant(magics.m, dl, VT));
- else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
- isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
- Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
- N->getOperand(0),
- DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
- else
- return SDValue(); // No mulhs or equvialent
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
+ : isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
+ : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
+ Q = SDValue(LoHi.getNode(), 1);
+ } else
+ return SDValue(); // No mulhs or equivalent.
+ Created.push_back(Q.getNode());
+ // (Optionally) Add/subtract the numerator using Factor.
+ Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
+ Created.push_back(Factor.getNode());
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
Created.push_back(Q.getNode());
- // If d > 0 and m < 0, add the numerator
- if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
- Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
- Created.push_back(Q.getNode());
- }
- // If d < 0 and m > 0, subtract the numerator.
- if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
- Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
- Created.push_back(Q.getNode());
- }
- auto &DL = DAG.getDataLayout();
- // Shift right algebraic if shift value is nonzero
- if (magics.s > 0) {
- Q = DAG.getNode(
- ISD::SRA, dl, VT, Q,
- DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
- Created.push_back(Q.getNode());
- }
- // Extract the sign bit and add it to the quotient
- SDValue T =
- DAG.getNode(ISD::SRL, dl, VT, Q,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
- getShiftAmountTy(Q.getValueType(), DL)));
+ // Shift right algebraic by shift value.
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
+ Created.push_back(Q.getNode());
+
+ // Extract the sign bit, mask it and add it to the quotient.
+ SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
+ SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
+ Created.push_back(T.getNode());
+ T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
Created.push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
@@ -3528,72 +3871,133 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG, bool IsAfterLegalization,
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const {
- EVT VT = N->getValueType(0);
SDLoc dl(N);
- auto &DL = DAG.getDataLayout();
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT))
return SDValue();
- // FIXME: We should use a narrower constant when the upper
- // bits are known to be zero.
- APInt::mu magics = Divisor.magicu();
+ bool UseNPQ = false;
+ SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
- SDValue Q = N->getOperand(0);
+ auto BuildUDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ APInt Divisor = C->getAPIntValue();
+ APInt::mu magics = Divisor.magicu();
+ unsigned PreShift = 0, PostShift = 0;
+
+ // If the divisor is even, we can avoid using the expensive fixup by
+ // shifting the divided value upfront.
+ if (magics.a != 0 && !Divisor[0]) {
+ PreShift = Divisor.countTrailingZeros();
+ // Get magic number for the shifted divisor.
+ magics = Divisor.lshr(PreShift).magicu(PreShift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
- // If the divisor is even, we can avoid using the expensive fixup by shifting
- // the divided value upfront.
- if (magics.a != 0 && !Divisor[0]) {
- unsigned Shift = Divisor.countTrailingZeros();
- Q = DAG.getNode(
- ISD::SRL, dl, VT, Q,
- DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
- Created.push_back(Q.getNode());
+ APInt Magic = magics.m;
+
+ unsigned SelNPQ;
+ if (magics.a == 0 || Divisor.isOneValue()) {
+ assert(magics.s < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ PostShift = magics.s;
+ SelNPQ = false;
+ } else {
+ PostShift = magics.s - 1;
+ SelNPQ = true;
+ }
+
+ PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
+ MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
+ NPQFactors.push_back(
+ DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getNullValue(EltBits),
+ dl, SVT));
+ PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
- // Get magic number for the shifted divisor.
- magics = Divisor.lshr(Shift).magicu(Shift);
- assert(magics.a == 0 && "Should use cheap fixup now");
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Collect the shifts/magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ return SDValue();
+
+ SDValue PreShift, PostShift, MagicFactor, NPQFactor;
+ if (VT.isVector()) {
+ PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
+ MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+ NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
+ PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
+ } else {
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
}
- // Multiply the numerator (operand 0) by the magic value
- // FIXME: We should support doing a MUL in a wider type
- if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
- isOperationLegalOrCustom(ISD::MULHU, VT))
- Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT));
- else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
- isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
- Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
- DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
- else
- return SDValue(); // No mulhu or equivalent
+ SDValue Q = N0;
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
+ Created.push_back(Q.getNode());
+
+ // FIXME: We should support doing a MUL in a wider type.
+ auto GetMULHU = [&](SDValue X, SDValue Y) {
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
+ : isOperationLegalOrCustom(ISD::MULHU, VT))
+ return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
+ if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
+ : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
+ return SDValue(LoHi.getNode(), 1);
+ }
+ return SDValue(); // No mulhu or equivalent
+ };
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = GetMULHU(Q, MagicFactor);
+ if (!Q)
+ return SDValue();
Created.push_back(Q.getNode());
- if (magics.a == 0) {
- assert(magics.s < Divisor.getBitWidth() &&
- "We shouldn't generate an undefined shift!");
- return DAG.getNode(
- ISD::SRL, dl, VT, Q,
- DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
- } else {
- SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
- Created.push_back(NPQ.getNode());
- NPQ = DAG.getNode(
- ISD::SRL, dl, VT, NPQ,
- DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
+ if (UseNPQ) {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
Created.push_back(NPQ.getNode());
- NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (VT.isVector())
+ NPQ = GetMULHU(NPQ, NPQFactor);
+ else
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
+
Created.push_back(NPQ.getNode());
- return DAG.getNode(
- ISD::SRL, dl, VT, NPQ,
- DAG.getConstant(magics.s - 1, dl,
- getShiftAmountTy(NPQ.getValueType(), DL)));
+
+ Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ Created.push_back(Q.getNode());
}
+
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
+ Created.push_back(Q.getNode());
+
+ SDValue One = DAG.getConstant(1, dl, VT);
+ SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
+ return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
bool TargetLowering::
@@ -3750,8 +4154,17 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
return false;
- Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
- Merge(Lo, Hi));
+ SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+ EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
+ isOperationLegalOrCustom(ISD::ADDE, VT));
+ if (UseGlue)
+ Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
+ Merge(Lo, Hi));
+ else
+ Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
+ Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
SDValue Carry = Next.getValue(1);
Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
@@ -3760,9 +4173,13 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
return false;
- SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
- Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
- Carry);
+ if (UseGlue)
+ Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
+ Carry);
+ else
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
+ Zero, Carry);
+
Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
if (Opcode == ISD::SMUL_LOHI) {
@@ -3797,66 +4214,525 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
return Ok;
}
+bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return false;
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ SDValue X = Node->getOperand(0);
+ SDValue Y = Node->getOperand(1);
+ SDValue Z = Node->getOperand(2);
+
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsFSHL = Node->getOpcode() == ISD::FSHL;
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Z.getValueType();
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
+
+ SDValue ShAmt;
+ if (isPowerOf2_32(EltSizeInBits)) {
+ SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
+ } else {
+ ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
+ }
+
+ SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
+ SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
+
+ // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
+ // and that is undefined. We must compare and select to avoid UB.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
+
+ // For fshl, 0-shift returns the 1st arg (X).
+ // For fshr, 0-shift returns the 2nd arg (Y).
+ SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
+ Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
+ return true;
+}
+
+// TODO: Merge with expandFunnelShift.
+bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsLeft = Node->getOpcode() == ISD::ROTL;
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Op1.getValueType();
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+
+ // If a rotate in the other direction is legal, use it.
+ unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
+ if (isOperationLegal(RevRot, VT)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
+ return true;
+ }
+
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ return false;
+
+ // Otherwise,
+ // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
+ // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
+ //
+ assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
+ "Expecting the type bitwidth to be a power of 2");
+ unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
+ unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
+ SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
+ SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
+ Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
+ DAG.getNode(HsOpc, DL, VT, Op0, And1));
+ return true;
+}
+
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
- EVT VT = Node->getOperand(0).getValueType();
- EVT NVT = Node->getValueType(0);
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
SDLoc dl(SDValue(Node, 0));
// FIXME: Only f32 to i64 conversions are supported.
- if (VT != MVT::f32 || NVT != MVT::i64)
+ if (SrcVT != MVT::f32 || DstVT != MVT::i64)
return false;
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
// https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
- EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits());
+ unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
+ EVT IntVT = SrcVT.changeTypeToInteger();
+ EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
+
SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
SDValue Bias = DAG.getConstant(127, dl, IntVT);
- SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
- IntVT);
- SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
+ SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
- auto &DL = DAG.getDataLayout();
SDValue ExponentBits = DAG.getNode(
ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
- DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
- SDValue Sign = DAG.getNode(
- ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
- DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
- Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
- DAG.getConstant(0x00800000, dl, IntVT));
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, dl, IntVT));
- R = DAG.getZExtOrTrunc(R, dl, NVT);
+ R = DAG.getZExtOrTrunc(R, dl, DstVT);
R = DAG.getSelectCC(
dl, Exponent, ExponentLoBit,
- DAG.getNode(ISD::SHL, dl, NVT, R,
+ DAG.getNode(ISD::SHL, dl, DstVT, R,
DAG.getZExtOrTrunc(
DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
- dl, getShiftAmountTy(IntVT, DL))),
- DAG.getNode(ISD::SRL, dl, NVT, R,
+ dl, IntShVT)),
+ DAG.getNode(ISD::SRL, dl, DstVT, R,
DAG.getZExtOrTrunc(
DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
- dl, getShiftAmountTy(IntVT, DL))),
+ dl, IntShVT)),
ISD::SETGT);
- SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
- DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
- Sign);
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
+ DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
- DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
+ DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
+ return true;
+}
+
+bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(SDValue(Node, 0));
+ SDValue Src = Node->getOperand(0);
+
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
+ return false;
+
+ // If the maximum float value is smaller then the signed integer range,
+ // the destination signmask can't be represented by the float, so we can
+ // just use FP_TO_SINT directly.
+ const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
+ APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+ APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
+ if (APFloat::opOverflow &
+ APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
+ Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ return true;
+ }
+
+ SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+ SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+
+ bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+ if (Strict) {
+ // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
+ // signmask then offset (the result of which should be fully representable).
+ // Sel = Src < 0x8000000000000000
+ // Val = select Sel, Src, Src - 0x8000000000000000
+ // Ofs = select Sel, 0, 0x8000000000000000
+ // Result = fp_to_sint(Val) ^ Ofs
+
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
+ DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
+ DAG.getConstant(SignMask, dl, DstVT));
+ Result = DAG.getNode(ISD::XOR, dl, DstVT,
+ DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
+ } else {
+ // Expand based on maximum range of FP_TO_SINT:
+ // True = fp_to_sint(Src)
+ // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
+ // Result = select (Src < 0x8000000000000000), True, False
+
+ SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
+ DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ False = DAG.getNode(ISD::XOR, dl, DstVT, False,
+ DAG.getConstant(SignMask, dl, DstVT));
+ Result = DAG.getSelect(dl, DstVT, Sel, True, False);
+ }
+ return true;
+}
+
+bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+
+ if (SrcVT.getScalarType() != MVT::i64)
+ return false;
+
+ SDLoc dl(SDValue(Node, 0));
+ EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
+
+ if (DstVT.getScalarType() == MVT::f32) {
+ // Only expand vector types if we have the appropriate vector bit
+ // operations.
+ if (SrcVT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+ return false;
+
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ // pseudo-op, or, even better, for whole-function isel.
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+ SDValue SignBitTest = DAG.getSetCC(
+ dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+ Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
+ return true;
+ }
+
+ if (DstVT.getScalarType() == MVT::f64) {
+ // Only expand vector types if we have the appropriate vector bit
+ // operations.
+ if (SrcVT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+ return false;
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
+ SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
+ BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
+ SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
+ SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
+ SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+
+ SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
+ SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
+ SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ return true;
+ }
+
+ return false;
+}
+
+SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
+ ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ EVT VT = Node->getValueType(0);
+ if (isOperationLegalOrCustom(NewOp, VT)) {
+ SDValue Quiet0 = Node->getOperand(0);
+ SDValue Quiet1 = Node->getOperand(1);
+
+ if (!Node->getFlags().hasNoNaNs()) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+ if (!DAG.isKnownNeverSNaN(Quiet0)) {
+ Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
+ Node->getFlags());
+ }
+ if (!DAG.isKnownNeverSNaN(Quiet1)) {
+ Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
+ Node->getFlags());
+ }
+ }
+
+ return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
+ }
+
+ return SDValue();
+}
+
+bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned Len = VT.getScalarSizeInBits();
+ assert(VT.isInteger() && "CTPOP not implemented for this type.");
+
+ // TODO: Add support for irregular type lengths.
+ if (!(Len <= 128 && Len % 8 == 0))
+ return false;
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ return false;
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ SDValue Mask55 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+ SDValue Mask33 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
+ SDValue Mask0F =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, dl, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, dl, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, dl, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ if (Len > 8)
+ Op =
+ DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, dl, ShVT));
+
+ Result = Op;
+ return true;
+}
+
+bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+ Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
+ return true;
+ }
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
+ return true;
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return false;
+
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
+ SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ return true;
+}
+
+bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTTZ, VT)) {
+ Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
+ return true;
+ }
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
+ return true;
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ return false;
+
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // Ref: "Hacker's Delight" by Henry Warren
+ SDValue Tmp = DAG.getNode(
+ ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
+
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
+ Result =
+ DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
+ return true;
+ }
+
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
+ return true;
+}
+
+bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = N->getOperand(0);
+
+ // Only expand vector types if we have the appropriate vector operations.
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
+ !isOperationLegalOrCustom(ISD::ADD, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ return false;
+
+ SDValue Shift =
+ DAG.getNode(ISD::SRA, dl, VT, Op,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
+ Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
return true;
}
@@ -3876,8 +4752,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
assert(SrcEltVT.isByteSized());
- EVT PtrVT = BasePTR.getValueType();
-
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
@@ -3888,8 +4762,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
- BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR,
- DAG.getConstant(Stride, SL, PtrVT));
+ BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -3989,7 +4862,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
- if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
+ if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
+ LoadedVT.isVector()) {
// Scalarize the load and let the individual components be handled.
SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
@@ -4139,13 +5013,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
auto &MF = DAG.getMachineFunction();
+ EVT MemVT = ST->getMemoryVT();
SDLoc dl(ST);
- if (ST->getMemoryVT().isFloatingPoint() ||
- ST->getMemoryVT().isVector()) {
+ if (MemVT.isFloatingPoint() || MemVT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
if (isTypeLegal(intVT)) {
- if (!isOperationLegalOrCustom(ISD::STORE, intVT)) {
+ if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
+ MemVT.isVector()) {
// Scalarize the store and let the individual components be handled.
SDValue Result = scalarizeVectorStore(ST, DAG);
@@ -4399,3 +5274,134 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
}
return SDValue();
}
+
+SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ SDLoc dl(Node);
+
+ // usub.sat(a, b) -> umax(a, b) - b
+ if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
+ SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
+ return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
+ }
+
+ if (VT.isVector()) {
+ // TODO: Consider not scalarizing here.
+ return SDValue();
+ }
+
+ unsigned OverflowOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ OverflowOp = ISD::SADDO;
+ break;
+ case ISD::UADDSAT:
+ OverflowOp = ISD::UADDO;
+ break;
+ case ISD::SSUBSAT:
+ OverflowOp = ISD::SSUBO;
+ break;
+ case ISD::USUBSAT:
+ OverflowOp = ISD::USUBO;
+ break;
+ default:
+ llvm_unreachable("Expected method to receive signed or unsigned saturation "
+ "addition or subtraction node.");
+ }
+
+ assert(LHS.getValueType().isScalarInteger() &&
+ "Expected operands to be integers. Vector of int arguments should "
+ "already be unrolled.");
+ assert(RHS.getValueType().isScalarInteger() &&
+ "Expected operands to be integers. Vector of int arguments should "
+ "already be unrolled.");
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected both operands to be the same type");
+
+ unsigned BitWidth = LHS.getValueSizeInBits();
+ EVT ResultType = LHS.getValueType();
+ EVT BoolVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType);
+ SDValue Result =
+ DAG.getNode(OverflowOp, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS);
+ SDValue SumDiff = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, ResultType);
+
+ if (Opcode == ISD::UADDSAT) {
+ // Just need to check overflow for SatMax.
+ APInt MaxVal = APInt::getMaxValue(BitWidth);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+ return DAG.getSelect(dl, ResultType, Overflow, SatMax, SumDiff);
+ } else if (Opcode == ISD::USUBSAT) {
+ // Just need to check overflow for SatMin.
+ APInt MinVal = APInt::getMinValue(BitWidth);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
+ return DAG.getSelect(dl, ResultType, Overflow, SatMin, SumDiff);
+ } else {
+ // SatMax -> Overflow && SumDiff < 0
+ // SatMin -> Overflow && SumDiff >= 0
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+ SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff);
+ }
+}
+
+SDValue
+TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node,
+ SelectionDAG &DAG) const {
+ assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
+ assert(Node->getNumOperands() == 3 &&
+ "Expected signed fixed point multiplication to have 3 operands.");
+
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ assert(LHS.getValueType().isScalarInteger() &&
+ "Expected operands to be integers. Vector of int arguments should "
+ "already be unrolled.");
+ assert(RHS.getValueType().isScalarInteger() &&
+ "Expected operands to be integers. Vector of int arguments should "
+ "already be unrolled.");
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected both operands to be the same type");
+
+ unsigned Scale = Node->getConstantOperandVal(2);
+ EVT VT = LHS.getValueType();
+ assert(Scale < VT.getScalarSizeInBits() &&
+ "Expected scale to be less than the number of bits.");
+
+ if (!Scale)
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+
+ // Get the upper and lower bits of the result.
+ SDValue Lo, Hi;
+ if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ Lo = Result.getValue(0);
+ Hi = Result.getValue(1);
+ } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
+ Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
+ } else {
+ report_fatal_error("Unable to expand signed fixed point multiplication.");
+ }
+
+ // The result will need to be shifted right by the scale since both operands
+ // are scaled. The result is given to us in 2 halves, so we only want part of
+ // both in the result.
+ EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy));
+ Hi = DAG.getNode(
+ ISD::SHL, dl, VT, Hi,
+ DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy));
+ return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index ed74b3e4fa19..fccbb8ec91cb 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -95,7 +95,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
}
// Sort the Idx2MBBMap
- llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+ llvm::sort(idx2MBBMap, Idx2MBBCompare());
LLVM_DEBUG(mf->print(dbgs(), this));
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index 8fbe724045e6..bcc8f8cf18bc 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SlotIndexes.h"
@@ -76,6 +77,18 @@ public:
/// Returns the last insert point as an iterator for \pCurLI in \pMBB.
MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,
MachineBasicBlock &MBB);
+
+ /// Return the base index of the first insert point in \pMBB.
+ SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) {
+ SlotIndex Res = LIS.getMBBStartIdx(&MBB);
+ if (!MBB.empty()) {
+ MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+ if (MII != MBB.end())
+ Res = LIS.getInstructionIndex(*MII);
+ }
+ return Res;
+ }
+
};
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
@@ -225,6 +238,10 @@ public:
MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
return IPA.getLastInsertPointIter(*CurLI, *BB);
}
+
+ SlotIndex getFirstSplitPoint(unsigned Num) {
+ return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num));
+ }
};
/// SplitEditor - Edit machine code and LiveIntervals for live range
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 81a41970f9e2..eb8552915e2a 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -1022,9 +1022,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
// We adjust AliasAnalysis information for merged stack slots.
- MachineSDNode::mmo_iterator NewMemOps =
- MF->allocateMemRefsArray(I.getNumMemOperands());
- unsigned MemOpIdx = 0;
+ SmallVector<MachineMemOperand *, 2> NewMMOs;
bool ReplaceMemOps = false;
for (MachineMemOperand *MMO : I.memoperands()) {
// If this memory location can be a slot remapped here,
@@ -1051,17 +1049,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
}
if (MayHaveConflictingAAMD) {
- NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
+ NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
ReplaceMemOps = true;
+ } else {
+ NewMMOs.push_back(MMO);
}
- else
- NewMemOps[MemOpIdx++] = MMO;
}
// If any memory operand is updated, set memory references of
// this instruction.
if (ReplaceMemOps)
- I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
+ I.setMemRefs(*MF, NewMMOs);
}
// Update the location of C++ catch objects for the MSVC personality routine.
@@ -1233,7 +1231,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
});
for (auto &s : LiveStarts)
- llvm::sort(s.begin(), s.end());
+ llvm::sort(s);
bool Changed = true;
while (Changed) {
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index 19a191c01db9..0676fa2421e8 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -268,11 +268,10 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
// in the list. Merge entries that refer to the same dwarf register and use
// the maximum size that needs to be spilled.
- llvm::sort(LiveOuts.begin(), LiveOuts.end(),
- [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
- // Only sort by the dwarf register number.
- return LHS.DwarfRegNum < RHS.DwarfRegNum;
- });
+ llvm::sort(LiveOuts, [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
+ // Only sort by the dwarf register number.
+ return LHS.DwarfRegNum < RHS.DwarfRegNum;
+ });
for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
for (auto II = std::next(I); II != E; ++II) {
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index cb12c7ce6e82..3b578c7391da 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -157,14 +157,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
-static bool isLifetimeInst(const Instruction *I) {
- if (const auto Intrinsic = dyn_cast<IntrinsicInst>(I)) {
- const auto Id = Intrinsic->getIntrinsicID();
- return Id == Intrinsic::lifetime_start || Id == Intrinsic::lifetime_end;
- }
- return false;
-}
-
bool StackProtector::HasAddressTaken(const Instruction *AI) {
for (const User *U : AI->users()) {
if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
@@ -175,7 +167,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
return true;
} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
// Ignore intrinsics that are not calls. TODO: Use isLoweredToCall().
- if (!isa<DbgInfoIntrinsic>(CI) && !isLifetimeInst(CI))
+ if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
return true;
} else if (isa<InvokeInst>(U)) {
return true;
@@ -199,6 +191,18 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
return false;
}
+/// Search for the first call to the llvm.stackprotector intrinsic and return it
+/// if present.
+static const CallInst *findStackProtectorIntrinsic(Function &F) {
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->getCalledFunction() ==
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::stackprotector))
+ return CI;
+ return nullptr;
+}
+
/// Check whether or not this function needs a stack protector based
/// upon the stack protector level.
///
@@ -215,13 +219,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
bool StackProtector::RequiresStackProtector() {
bool Strong = false;
bool NeedsProtector = false;
- for (const BasicBlock &BB : *F)
- for (const Instruction &I : BB)
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (CI->getCalledFunction() ==
- Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::stackprotector))
- HasPrologue = true;
+ HasPrologue = findStackProtectorIntrinsic(*F);
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
@@ -379,7 +377,8 @@ bool StackProtector::InsertStackProtectors() {
// protection in SDAG.
bool SupportsSelectionDAGSP =
TLI->useStackGuardXorFP() ||
- (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
+ (EnableSelectionDAGSP && !TM->Options.EnableFastISel &&
+ !TM->Options.EnableGlobalISel);
AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
@@ -399,6 +398,14 @@ bool StackProtector::InsertStackProtectors() {
if (SupportsSelectionDAGSP)
break;
+ // Find the stack guard slot if the prologue was not created by this pass
+ // itself via a previous call to CreatePrologue().
+ if (!AI) {
+ const CallInst *SPCall = findStackProtectorIntrinsic(*F);
+ assert(SPCall && "Call to llvm.stackprotector is missing");
+ AI = cast<AllocaInst>(SPCall->getArgOperand(1));
+ }
+
// Set HasIRCheck to true, so that SelectionDAG will not generate its own
// version. SelectionDAG called 'shouldEmitSDCheck' to check whether
// instrumentation has already been generated.
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index eb15b15a24a6..d8c6a249e4da 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -214,7 +214,7 @@ void StackSlotColoring::InitializeSlots() {
Intervals.reserve(LS->getNumIntervals());
for (auto &I : *LS)
Intervals.push_back(&I);
- llvm::sort(Intervals.begin(), Intervals.end(),
+ llvm::sort(Intervals,
[](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
// Gather all spill slots into a list.
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f0cfa2fbe4fd..cf78fb5a1f12 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -30,12 +30,6 @@ using namespace llvm;
TargetFrameLowering::~TargetFrameLowering() = default;
-/// The default implementation just looks at attribute "no-frame-pointer-elim".
-bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
- auto Attr = MF.getFunction().getFnAttribute("no-frame-pointer-elim");
- return Attr.getValueAsString() == "true";
-}
-
bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 963f8178b509..2a17af391105 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -339,42 +339,32 @@ bool TargetInstrInfo::PredicateInstruction(
return MadeChange;
}
-bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
+bool TargetInstrInfo::hasLoadFromStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
+ size_t StartSize = Accesses.size();
for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
oe = MI.memoperands_end();
o != oe; ++o) {
- if ((*o)->isLoad()) {
- if (const FixedStackPseudoSourceValue *Value =
- dyn_cast_or_null<FixedStackPseudoSourceValue>(
- (*o)->getPseudoValue())) {
- FrameIndex = Value->getFrameIndex();
- MMO = *o;
- return true;
- }
- }
+ if ((*o)->isLoad() &&
+ dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ Accesses.push_back(*o);
}
- return false;
+ return Accesses.size() != StartSize;
}
-bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
+bool TargetInstrInfo::hasStoreToStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
+ size_t StartSize = Accesses.size();
for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
oe = MI.memoperands_end();
o != oe; ++o) {
- if ((*o)->isStore()) {
- if (const FixedStackPseudoSourceValue *Value =
- dyn_cast_or_null<FixedStackPseudoSourceValue>(
- (*o)->getPseudoValue())) {
- FrameIndex = Value->getFrameIndex();
- MMO = *o;
- return true;
- }
- }
+ if ((*o)->isStore() &&
+ dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ Accesses.push_back(*o);
}
- return false;
+ return Accesses.size() != StartSize;
}
bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
@@ -388,8 +378,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
return true;
}
unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
- // Convert bit size to byte size to be consistent with
- // MCRegisterClass::getSize().
+ // Convert bit size to byte size.
if (BitSize % 8)
return false;
@@ -584,7 +573,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
}
if (NewMI) {
- NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ NewMI->setMemRefs(MF, MI.memoperands());
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
assert((!(Flags & MachineMemOperand::MOStore) ||
NewMI->mayStore()) &&
@@ -654,10 +643,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
// Copy the memoperands from the load to the folded instruction.
if (MI.memoperands_empty()) {
- NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
+ NewMI->setMemRefs(MF, LoadMI.memoperands());
} else {
// Handle the rare case of folding multiple loads.
- NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ NewMI->setMemRefs(MF, MI.memoperands());
for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
E = LoadMI.memoperands_end();
I != E; ++I) {
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 7b1b76821daa..e86190375642 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -161,7 +161,8 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
}
- if (TT.isGNUEnvironment() || TT.isOSFuchsia()) {
+ if (TT.isGNUEnvironment() || TT.isOSFuchsia() ||
+ (TT.isAndroid() && !TT.isAndroidVersionLT(9))) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
setLibcallName(RTLIB::SINCOS_F80, "sincosl");
@@ -599,14 +600,23 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Expand);
setOperationAction(ISD::FMAXNUM, VT, Expand);
- setOperationAction(ISD::FMINNAN, VT, Expand);
- setOperationAction(ISD::FMAXNAN, VT, Expand);
+ setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
+ setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
+ setOperationAction(ISD::FMINIMUM, VT, Expand);
+ setOperationAction(ISD::FMAXIMUM, VT, Expand);
setOperationAction(ISD::FMAD, VT, Expand);
setOperationAction(ISD::SMIN, VT, Expand);
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
setOperationAction(ISD::ABS, VT, Expand);
+ setOperationAction(ISD::FSHL, VT, Expand);
+ setOperationAction(ISD::FSHR, VT, Expand);
+ setOperationAction(ISD::SADDSAT, VT, Expand);
+ setOperationAction(ISD::UADDSAT, VT, Expand);
+ setOperationAction(ISD::SSUBSAT, VT, Expand);
+ setOperationAction(ISD::USUBSAT, VT, Expand);
+ setOperationAction(ISD::SMULFIX, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -666,6 +676,7 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+ setOperationAction(ISD::FCBRT, VT, Expand);
setOperationAction(ISD::FLOG , VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
@@ -968,7 +979,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
MIB.add(MI->getOperand(i));
// Inherit previous memory operands.
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.cloneMemRefs(*MI);
assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
// Add a new memory operand for this FI.
@@ -1096,7 +1107,7 @@ void TargetLoweringBase::computeRegisterProperties(
LegalIntReg = IntReg;
} else {
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
- (const MVT::SimpleValueType)LegalIntReg;
+ (MVT::SimpleValueType)LegalIntReg;
ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
}
}
@@ -1443,6 +1454,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case CatchPad: return 0;
case CatchSwitch: return 0;
case CleanupPad: return 0;
+ case FNeg: return ISD::FNEG;
case Add: return ISD::ADD;
case FAdd: return ISD::FADD;
case Sub: return ISD::SUB;
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 16140f0b12be..cb2fe691d702 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -95,6 +95,161 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
const TargetMachine &TgtM) {
TargetLoweringObjectFile::Initialize(Ctx, TgtM);
TM = &TgtM;
+
+ CodeModel::Model CM = TgtM.getCodeModel();
+
+ switch (TgtM.getTargetTriple().getArch()) {
+ case Triple::arm:
+ case Triple::armeb:
+ case Triple::thumb:
+ case Triple::thumbeb:
+ if (Ctx.getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
+ break;
+ // Fallthrough if not using EHABI
+ LLVM_FALLTHROUGH;
+ case Triple::ppc:
+ case Triple::x86:
+ PersonalityEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_indirect |
+ dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = isPositionIndependent()
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ break;
+ case Triple::x86_64:
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+ (CM == CodeModel::Small
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
+ } else {
+ PersonalityEncoding =
+ (CM == CodeModel::Small || CM == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = (CM == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = (CM == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::hexagon:
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ if (isPositionIndependent()) {
+ PersonalityEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
+ LSDAEncoding |= dwarf::DW_EH_PE_pcrel;
+ TTypeEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
+ }
+ break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ // The small model guarantees static code/data size < 4GB, but not where it
+ // will be in memory. Most of these could end up >2GB away so even a signed
+ // pc-relative 32-bit address is insufficient, theoretically.
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::lanai:
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ break;
+ case Triple::mips:
+ case Triple::mipsel:
+ case Triple::mips64:
+ case Triple::mips64el:
+ // MIPS uses indirect pointer to refer personality functions and types, so
+ // that the eh_frame section can be read-only. DW.ref.personality will be
+ // generated for relocation.
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect;
+ // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't
+ // identify N64 from just a triple.
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ // We don't support PC-relative LSDA references in GAS so we use the default
+ // DW_EH_PE_absptr for those.
+
+ // FreeBSD must be explicit about the data size and using pcrel since it's
+ // assembler/linker won't do the automatic conversion that the Linux tools
+ // do.
+ if (TgtM.getTargetTriple().isOSFreeBSD()) {
+ PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ }
+ break;
+ case Triple::ppc64:
+ case Triple::ppc64le:
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_udata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_udata8;
+ break;
+ case Triple::sparcel:
+ case Triple::sparc:
+ if (isPositionIndependent()) {
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::sparcv9:
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ case Triple::systemz:
+ // All currently-defined code models guarantee that 4-byte PC-relative
+ // values will be in range.
+ if (isPositionIndependent()) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
+ break;
+ default:
+ break;
+ }
}
void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
@@ -351,6 +506,30 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr;
}
+static unsigned getEntrySizeForKind(SectionKind Kind) {
+ if (Kind.isMergeable1ByteCString())
+ return 1;
+ else if (Kind.isMergeable2ByteCString())
+ return 2;
+ else if (Kind.isMergeable4ByteCString())
+ return 4;
+ else if (Kind.isMergeableConst4())
+ return 4;
+ else if (Kind.isMergeableConst8())
+ return 8;
+ else if (Kind.isMergeableConst16())
+ return 16;
+ else if (Kind.isMergeableConst32())
+ return 32;
+ else {
+ // We shouldn't have mergeable C strings or mergeable constants that we
+ // didn't handle above.
+ assert(!Kind.isMergeableCString() && "unknown string width");
+ assert(!Kind.isMergeableConst() && "unknown data width");
+ return 0;
+ }
+}
+
MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
StringRef SectionName = GO->getSection();
@@ -395,7 +574,7 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
MCSectionELF *Section = getContext().getELFSection(
SectionName, getELFSectionType(SectionName, Kind), Flags,
- /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol);
+ getEntrySizeForKind(Kind), Group, UniqueID, AssociatedSymbol);
// Make sure that we did not get some other section with incompatible sh_link.
// This should not be possible due to UniqueID code above.
assert(Section->getAssociatedSymbol() == AssociatedSymbol &&
@@ -422,30 +601,6 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
-static unsigned getEntrySizeForKind(SectionKind Kind) {
- if (Kind.isMergeable1ByteCString())
- return 1;
- else if (Kind.isMergeable2ByteCString())
- return 2;
- else if (Kind.isMergeable4ByteCString())
- return 4;
- else if (Kind.isMergeableConst4())
- return 4;
- else if (Kind.isMergeableConst8())
- return 8;
- else if (Kind.isMergeableConst16())
- return 16;
- else if (Kind.isMergeableConst32())
- return 32;
- else {
- // We shouldn't have mergeable C strings or mergeable constants that we
- // didn't handle above.
- assert(!Kind.isMergeableCString() && "unknown string width");
- assert(!Kind.isMergeableConst() && "unknown data width");
- return 0;
- }
-}
-
static MCSectionELF *selectELFSectionForGlobal(
MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
@@ -640,6 +795,14 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
}
+MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const {
+ // Use ".GCC.command.line" since this feature is to support clang's
+ // -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the
+ // same name.
+ return getContext().getELFSection(".GCC.command.line", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+}
+
void
TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
UseInitArray = UseInitArray_;
@@ -684,6 +847,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
MachO::S_MOD_TERM_FUNC_POINTERS,
SectionKind::getData());
}
+
+ PersonalityEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel;
+ TTypeEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
}
void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
@@ -939,6 +1108,22 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
// .indirect_symbol _extfoo
// .long 0
//
+ // The indirect symbol table (and sections of non_lazy_symbol_pointers type)
+ // may point to both local (same translation unit) and global (other
+ // translation units) symbols. Example:
+ //
+ // .section __DATA,__pointers,non_lazy_symbol_pointers
+ // L1:
+ // .indirect_symbol _myGlobal
+ // .long 0
+ // L2:
+ // .indirect_symbol _myLocal
+ // .long _myLocal
+ //
+ // If the symbol is local, instead of the symbol's index, the assembler
+ // places the constant INDIRECT_SYMBOL_LOCAL into the indirect symbol table.
+ // Then the linker will notice the constant in the table and will look at the
+ // content of the symbol.
MachineModuleInfoMachO &MachOMMI =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
MCContext &Ctx = getContext();
@@ -958,9 +1143,12 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
- if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::
- StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
+ if (!StubSym.getPointer()) {
+ bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal();
+ // With the assumption that IsIndirectLocal == GV->hasLocalLinkage().
+ StubSym = MachineModuleInfoImpl::StubValueTy(const_cast<MCSymbol *>(Sym),
+ !IsIndirectLocal);
+ }
const MCExpr *BSymExpr =
MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
@@ -1296,8 +1484,25 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
unsigned Priority,
const MCSymbol *KeySym,
MCSectionCOFF *Default) {
- if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment())
- return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+ if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ // If the priority is the default, use .CRT$XCU, possibly associative.
+ if (Priority == 65535)
+ return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+
+ // Otherwise, we need to compute a new section name. Low priorities should
+ // run earlier. The linker will sort sections ASCII-betically, and we need a
+ // string that sorts between .CRT$XCA and .CRT$XCU. In the general case, we
+ // make a name like ".CRT$XCT12345", since that runs before .CRT$XCU. Really
+ // low priorities need to sort before 'L', since the CRT uses that
+ // internally, so we use ".CRT$XCA00001" for them.
+ SmallString<24> Name;
+ raw_svector_ostream OS(Name);
+ OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority);
+ MCSectionCOFF *Sec = Ctx.getCOFFSection(
+ Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ return Ctx.getAssociativeCOFFSection(Sec, KeySym, 0);
+ }
std::string Name = IsCtor ? ".ctors" : ".dtors";
if (Priority != 65535)
@@ -1571,6 +1776,10 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
void TargetLoweringObjectFileWasm::InitializeWasm() {
StaticCtorSection =
getContext().getWasmSection(".init_array", SectionKind::getData());
+
+ // We don't use PersonalityEncoding and LSDAEncoding because we don't emit
+ // .cfi directives. We use TTypeEncoding to encode typeinfo global variables.
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection(
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 853e71d0efa5..3c133fb8594e 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -23,15 +23,34 @@ using namespace llvm;
/// DisableFramePointerElim - This returns true if frame pointer elimination
/// optimization should be disabled for the given machine function.
bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
- // Check to see if we should eliminate all frame pointers.
- if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF))
+ // Check to see if the target want to forcably keep frame pointer.
+ if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF))
return true;
- // Check to see if we should eliminate non-leaf frame pointers.
- if (MF.getFunction().hasFnAttribute("no-frame-pointer-elim-non-leaf"))
- return MF.getFrameInfo().hasCalls();
+ const Function &F = MF.getFunction();
+
+ // TODO: Remove support for old `fp elim` function attributes after fully
+ // migrate to use "frame-pointer"
+ if (!F.hasFnAttribute("frame-pointer")) {
+ // Check to see if we should eliminate all frame pointers.
+ if (F.getFnAttribute("no-frame-pointer-elim").getValueAsString() == "true")
+ return true;
+
+ // Check to see if we should eliminate non-leaf frame pointers.
+ if (F.hasFnAttribute("no-frame-pointer-elim-non-leaf"))
+ return MF.getFrameInfo().hasCalls();
- return false;
+ return false;
+ }
+
+ StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString();
+ if (FP == "all")
+ return true;
+ if (FP == "non-leaf")
+ return MF.getFrameInfo().hasCalls();
+ if (FP == "none")
+ return false;
+ llvm_unreachable("unknown frame pointer flag");
}
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
index 2db03288f2ac..28126fcf766d 100644
--- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -107,10 +108,10 @@ static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
cl::desc("Print LLVM IR input to isel pass"));
static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
cl::desc("Dump garbage collector data"));
-static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
- cl::desc("Verify generated machine code"),
- cl::init(false),
- cl::ZeroOrMore);
+static cl::opt<cl::boolOrDefault>
+ VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::ZeroOrMore);
enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault };
// Enable or disable the MachineOutliner.
static cl::opt<RunOutliner> EnableMachineOutliner(
@@ -136,13 +137,15 @@ static cl::opt<std::string> PrintMachineInstrs(
"print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"),
cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden);
-static cl::opt<int> EnableGlobalISelAbort(
+static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
"global-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"global\" instruction selection "
- "fails to lower/select an instruction: 0 disable the abort, "
- "1 enable the abort, and "
- "2 disable the abort but emit a diagnostic on failure"),
- cl::init(1));
+ "fails to lower/select an instruction"),
+ cl::values(
+ clEnumValN(GlobalISelAbortMode::Disable, "0", "Disable the abort"),
+ clEnumValN(GlobalISelAbortMode::Enable, "1", "Enable the abort"),
+ clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
+ "Disable the abort but emit a diagnostic on failure")));
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
@@ -342,11 +345,39 @@ static AnalysisID getPassIDFromName(StringRef PassName) {
return PI ? PI->getTypeInfo() : nullptr;
}
+static std::pair<StringRef, unsigned>
+getPassNameAndInstanceNum(StringRef PassName) {
+ StringRef Name, InstanceNumStr;
+ std::tie(Name, InstanceNumStr) = PassName.split(',');
+
+ unsigned InstanceNum = 0;
+ if (!InstanceNumStr.empty() && InstanceNumStr.getAsInteger(10, InstanceNum))
+ report_fatal_error("invalid pass instance specifier " + PassName);
+
+ return std::make_pair(Name, InstanceNum);
+}
+
void TargetPassConfig::setStartStopPasses() {
- StartBefore = getPassIDFromName(StartBeforeOpt);
- StartAfter = getPassIDFromName(StartAfterOpt);
- StopBefore = getPassIDFromName(StopBeforeOpt);
- StopAfter = getPassIDFromName(StopAfterOpt);
+ StringRef StartBeforeName;
+ std::tie(StartBeforeName, StartBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StartBeforeOpt);
+
+ StringRef StartAfterName;
+ std::tie(StartAfterName, StartAfterInstanceNum) =
+ getPassNameAndInstanceNum(StartAfterOpt);
+
+ StringRef StopBeforeName;
+ std::tie(StopBeforeName, StopBeforeInstanceNum)
+ = getPassNameAndInstanceNum(StopBeforeOpt);
+
+ StringRef StopAfterName;
+ std::tie(StopAfterName, StopAfterInstanceNum)
+ = getPassNameAndInstanceNum(StopAfterOpt);
+
+ StartBefore = getPassIDFromName(StartBeforeName);
+ StartAfter = getPassIDFromName(StartAfterName);
+ StopBefore = getPassIDFromName(StopBeforeName);
+ StopAfter = getPassIDFromName(StopAfterName);
if (StartBefore && StartAfter)
report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
Twine(StartAfterOptName) + Twine(" specified!"));
@@ -383,6 +414,9 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
if (TM.Options.EnableIPRA)
setRequiresCodeGenSCCOrder();
+ if (EnableGlobalISelAbort.getNumOccurrences())
+ TM.Options.GlobalISelAbort = EnableGlobalISelAbort;
+
setStartStopPasses();
}
@@ -418,8 +452,13 @@ TargetPassConfig::TargetPassConfig()
"triple set?");
}
-bool TargetPassConfig::hasLimitedCodeGenPipeline() const {
- return StartBefore || StartAfter || StopBefore || StopAfter;
+bool TargetPassConfig::willCompleteCodeGenPipeline() {
+ return StopBeforeOpt.empty() && StopAfterOpt.empty();
+}
+
+bool TargetPassConfig::hasLimitedCodeGenPipeline() {
+ return !StartBeforeOpt.empty() || !StartAfterOpt.empty() ||
+ !willCompleteCodeGenPipeline();
}
std::string
@@ -482,9 +521,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
// and shouldn't reference it.
AnalysisID PassID = P->getPassID();
- if (StartBefore == PassID)
+ if (StartBefore == PassID && StartBeforeCount++ == StartBeforeInstanceNum)
Started = true;
- if (StopBefore == PassID)
+ if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum)
Stopped = true;
if (Started && !Stopped) {
std::string Banner;
@@ -507,9 +546,11 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
} else {
delete P;
}
- if (StopAfter == PassID)
+
+ if (StopAfter == PassID && StopAfterCount++ == StopAfterInstanceNum)
Stopped = true;
- if (StartAfter == PassID)
+
+ if (StartAfter == PassID && StartAfterCount++ == StartAfterInstanceNum)
Started = true;
if (Stopped && !Started)
report_fatal_error("Cannot stop compilation after pass that is not run");
@@ -552,7 +593,7 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) {
}
void TargetPassConfig::addVerifyPass(const std::string &Banner) {
- bool Verify = VerifyMachineCode;
+ bool Verify = VerifyMachineCode == cl::BOU_TRUE;
#ifdef EXPENSIVE_CHECKS
if (VerifyMachineCode == cl::BOU_UNSET)
Verify = TM->isMachineVerifierClean();
@@ -714,18 +755,34 @@ void TargetPassConfig::addISelPrepare() {
bool TargetPassConfig::addCoreISelPasses() {
// Enable FastISel with -fast-isel, but allow that to be overridden.
TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
- if (EnableFastISelOption == cl::BOU_TRUE ||
- (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel()))
- TM->setFastISel(true);
- // Ask the target for an instruction selector.
- // Explicitly enabling fast-isel should override implicitly enabled
- // global-isel.
- if (EnableGlobalISelOption == cl::BOU_TRUE ||
- (EnableGlobalISelOption == cl::BOU_UNSET &&
- TM->Options.EnableGlobalISel && EnableFastISelOption != cl::BOU_TRUE)) {
+ // Determine an instruction selector.
+ enum class SelectorType { SelectionDAG, FastISel, GlobalISel };
+ SelectorType Selector;
+
+ if (EnableFastISelOption == cl::BOU_TRUE)
+ Selector = SelectorType::FastISel;
+ else if (EnableGlobalISelOption == cl::BOU_TRUE ||
+ (TM->Options.EnableGlobalISel &&
+ EnableGlobalISelOption != cl::BOU_FALSE))
+ Selector = SelectorType::GlobalISel;
+ else if (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())
+ Selector = SelectorType::FastISel;
+ else
+ Selector = SelectorType::SelectionDAG;
+
+ // Set consistently TM->Options.EnableFastISel and EnableGlobalISel.
+ if (Selector == SelectorType::FastISel) {
+ TM->setFastISel(true);
+ TM->setGlobalISel(false);
+ } else if (Selector == SelectorType::GlobalISel) {
TM->setFastISel(false);
+ TM->setGlobalISel(true);
+ }
+ // Add instruction selector passes.
+ if (Selector == SelectorType::GlobalISel) {
+ SaveAndRestore<bool> SavedAddingMachinePasses(AddingMachinePasses, true);
if (addIRTranslator())
return true;
@@ -804,15 +861,17 @@ void TargetPassConfig::addMachinePasses() {
AddingMachinePasses = true;
// Insert a machine instr printer pass after the specified pass.
- if (!StringRef(PrintMachineInstrs.getValue()).equals("") &&
- !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) {
- const PassRegistry *PR = PassRegistry::getPassRegistry();
- const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
- const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
- assert (TPI && IPI && "Pass ID not registered!");
- const char *TID = (const char *)(TPI->getTypeInfo());
- const char *IID = (const char *)(IPI->getTypeInfo());
- insertPass(TID, IID);
+ StringRef PrintMachineInstrsPassName = PrintMachineInstrs.getValue();
+ if (!PrintMachineInstrsPassName.equals("") &&
+ !PrintMachineInstrsPassName.equals("option-unspecified")) {
+ if (const PassInfo *TPI = getPassInfo(PrintMachineInstrsPassName)) {
+ const PassRegistry *PR = PassRegistry::getPassRegistry();
+ const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
+ assert(IPI && "failed to get \"machineinstr-printer\" PassInfo!");
+ const char *TID = (const char *)(TPI->getTypeInfo());
+ const char *IID = (const char *)(IPI->getTypeInfo());
+ insertPass(TID, IID);
+ }
}
// Print the instruction selected machine code...
@@ -981,7 +1040,8 @@ bool TargetPassConfig::getOptimizeRegAlloc() const {
}
/// RegisterRegAlloc's global Registry tracks allocator registration.
-MachinePassRegistry RegisterRegAlloc::Registry;
+MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
+ RegisterRegAlloc::Registry;
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
@@ -1155,14 +1215,9 @@ void TargetPassConfig::addBlockPlacement() {
/// GlobalISel Configuration
//===---------------------------------------------------------------------===//
bool TargetPassConfig::isGlobalISelAbortEnabled() const {
- if (EnableGlobalISelAbort.getNumOccurrences() > 0)
- return EnableGlobalISelAbort == 1;
-
- // When no abort behaviour is specified, we don't abort if the target says
- // that GISel is enabled.
- return !TM->Options.EnableGlobalISel;
+ return TM->Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
}
bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
- return EnableGlobalISelAbort == 2;
+ return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0ca435016ead..4b72f6a84ca1 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -592,17 +592,17 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// the two-address register.
// e.g.
// %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
- // %reg1029 = MOV8rr %reg1028
+ // %reg1029 = COPY %reg1028
// %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
- // insert => %reg1030 = MOV8rr %reg1028
+ // insert => %reg1030 = COPY %reg1028
// %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
- // In this case, it might not be possible to coalesce the second MOV8rr
+ // In this case, it might not be possible to coalesce the second COPY
// instruction if the first one is coalesced. So it would be profitable to
// commute it:
// %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
- // %reg1029 = MOV8rr %reg1028
+ // %reg1029 = COPY %reg1028
// %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
- // insert => %reg1030 = MOV8rr %reg1029
+ // insert => %reg1030 = COPY %reg1029
// %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
if (!isPlainlyKilled(MI, regC, LIS))
@@ -929,9 +929,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator Begin = MI;
MachineBasicBlock::iterator AfterMI = std::next(Begin);
MachineBasicBlock::iterator End = AfterMI;
- while (End->isCopy() &&
- regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
- Defs.push_back(End->getOperand(0).getReg());
+ while (End != MBB->end()) {
+ End = skipDebugInstructionsForward(End, MBB->end());
+ if (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI))
+ Defs.push_back(End->getOperand(0).getReg());
+ else
+ break;
++End;
}
@@ -1608,23 +1611,28 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
if (AllUsesCopied) {
+ bool ReplacedAllUntiedUses = true;
if (!IsEarlyClobber) {
// Replace other (un-tied) uses of regB with LastCopiedReg.
for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB &&
- MO.isUse()) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.getSubReg() == SubRegB) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ MO.setSubReg(0);
+ } else {
+ ReplacedAllUntiedUses = false;
}
- MO.setReg(LastCopiedReg);
- MO.setSubReg(MO.getSubReg());
}
}
}
// Update live variables for regB.
- if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(*MI)) {
+ if (RemovedKillFlag && ReplacedAllUntiedUses &&
+ LV && LV->getVarInfo(RegB).removeKill(*MI)) {
MachineBasicBlock::iterator PrevMI = MI;
--PrevMI;
LV->addVirtualRegisterKilled(RegB, *PrevMI);
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 0ead2b8340ab..ed7bef667e77 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -525,7 +525,7 @@ void VirtRegRewriter::rewrite() {
// Preserve semantics of sub-register operands.
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
- if (NoSubRegLiveness) {
+ if (NoSubRegLiveness || !MRI->shouldTrackSubRegLiveness(VirtReg)) {
// A virtual register kill refers to the whole register, so we may
// have to add implicit killed operands for the super-register. A
// partial redef always kills and redefines the super-register.
diff --git a/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 83d04da5dd0c..e5002eb95346 100644
--- a/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -137,6 +137,7 @@ class WasmEHPrepare : public FunctionPass {
Value *LSDAField = nullptr; // lsda field
Value *SelectorField = nullptr; // selector
+ Function *ThrowF = nullptr; // wasm.throw() intrinsic
Function *CatchF = nullptr; // wasm.catch.extract() intrinsic
Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
Function *LSDAF = nullptr; // wasm.lsda() intrinsic
@@ -145,6 +146,9 @@ class WasmEHPrepare : public FunctionPass {
Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper
Function *ClangCallTermF = nullptr; // __clang_call_terminate() function
+ bool prepareEHPads(Function &F);
+ bool prepareThrows(Function &F);
+
void prepareEHPad(BasicBlock *BB, unsigned Index);
void prepareTerminateCleanupPad(BasicBlock *BB);
@@ -177,7 +181,62 @@ bool WasmEHPrepare::doInitialization(Module &M) {
return false;
}
+// Erase the specified BBs if the BB does not have any remaining predecessors,
+// and also all its dead children.
+template <typename Container>
+static void eraseDeadBBsAndChildren(const Container &BBs) {
+ SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end());
+ while (!WL.empty()) {
+ auto *BB = WL.pop_back_val();
+ if (pred_begin(BB) != pred_end(BB))
+ continue;
+ WL.append(succ_begin(BB), succ_end(BB));
+ DeleteDeadBlock(BB);
+ }
+}
+
bool WasmEHPrepare::runOnFunction(Function &F) {
+ bool Changed = false;
+ Changed |= prepareThrows(F);
+ Changed |= prepareEHPads(F);
+ return Changed;
+}
+
+bool WasmEHPrepare::prepareThrows(Function &F) {
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
+ bool Changed = false;
+
+ // wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction.
+ ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw);
+
+ // Insert an unreachable instruction after a call to @llvm.wasm.throw and
+ // delete all following instructions within the BB, and delete all the dead
+ // children of the BB as well.
+ for (User *U : ThrowF->users()) {
+ // A call to @llvm.wasm.throw() is only generated from
+ // __builtin_wasm_throw() builtin call within libcxxabi, and cannot be an
+ // InvokeInst.
+ auto *ThrowI = cast<CallInst>(U);
+ if (ThrowI->getFunction() != &F)
+ continue;
+ Changed = true;
+ auto *BB = ThrowI->getParent();
+ SmallVector<BasicBlock *, 4> Succs(succ_begin(BB), succ_end(BB));
+ auto &InstList = BB->getInstList();
+ InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end());
+ IRB.SetInsertPoint(BB);
+ IRB.CreateUnreachable();
+ eraseDeadBBsAndChildren(Succs);
+ }
+
+ return Changed;
+}
+
+bool WasmEHPrepare::prepareEHPads(Function &F) {
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
+
SmallVector<BasicBlock *, 16> CatchPads;
SmallVector<BasicBlock *, 16> CleanupPads;
for (BasicBlock &BB : F) {
@@ -194,9 +253,6 @@ bool WasmEHPrepare::runOnFunction(Function &F) {
return false;
assert(F.hasPersonalityFn() && "Personality function not found");
- Module &M = *F.getParent();
- IRBuilder<> IRB(F.getContext());
-
// __wasm_lpad_context global variable
LPadContextGV = cast<GlobalVariable>(
M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy));
@@ -300,7 +356,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
// This is to create a map of <landingpad EH label, landingpad index> in
// SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
// Pseudocode: wasm.landingpad.index(Index);
- IRB.CreateCall(LPadIndexF, IRB.getInt32(Index));
+ IRB.CreateCall(LPadIndexF, {FPI, IRB.getInt32(Index)});
// Pseudocode: __wasm_lpad_context.lpad_index = index;
IRB.CreateStore(IRB.getInt32(Index), LPadIndexField);
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 65d0a7a774fe..6a15240fa6e0 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -218,7 +218,7 @@ static void calculateStateNumbersForInvokes(const Function *Fn,
// to. If the unwind edge came from an invoke, return null.
static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
Value *ParentPad) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
if (isa<InvokeInst>(TI))
return nullptr;
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
@@ -977,7 +977,7 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
break;
}
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
// CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
// The token consumed by a CatchReturnInst must match the funclet token.
@@ -1074,7 +1074,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
AllocaInst *SpillSlot = nullptr;
Instruction *EHPad = PHIBlock->getFirstNonPHI();
- if (!isa<TerminatorInst>(EHPad)) {
+ if (!EHPad->isTerminator()) {
// If the EHPad isn't a terminator, then we can insert a load in this block
// that will dominate all uses.
SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
@@ -1148,8 +1148,7 @@ void WinEHPrepare::insertPHIStore(
BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
- if (PredBlock->isEHPad() &&
- isa<TerminatorInst>(PredBlock->getFirstNonPHI())) {
+ if (PredBlock->isEHPad() && PredBlock->getFirstNonPHI()->isTerminator()) {
// Pred is unsplittable, so we need to queue it on the worklist.
Worklist.push_back({PredBlock, PredVal});
return;
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
index 44a67743169e..cbcaa5692828 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -75,7 +75,7 @@ Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) {
Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols,
uint32_t InitialOffset) {
for (auto I : Symbols) {
- if (auto EC = visitSymbolRecord(I, InitialOffset))
+ if (auto EC = visitSymbolRecord(I, InitialOffset + Symbols.skew()))
return EC;
InitialOffset += I.length();
}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
index 8de266b836b4..2a9753add311 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
@@ -14,25 +14,23 @@
using namespace llvm;
using namespace llvm::codeview;
-namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
class CodeViewErrorCategory : public std::error_category {
public:
const char *name() const noexcept override { return "llvm.codeview"; }
-
std::string message(int Condition) const override {
switch (static_cast<cv_error_code>(Condition)) {
case cv_error_code::unspecified:
- return "An unknown error has occurred.";
+ return "An unknown CodeView error has occurred.";
case cv_error_code::insufficient_buffer:
return "The buffer is not large enough to read the requested number of "
"bytes.";
case cv_error_code::corrupt_record:
return "The CodeView record is corrupted.";
case cv_error_code::no_records:
- return "There are no records";
+ return "There are no records.";
case cv_error_code::operation_unsupported:
return "The requested operation is not supported.";
case cv_error_code::unknown_member_record:
@@ -41,31 +39,10 @@ public:
llvm_unreachable("Unrecognized cv_error_code");
}
};
-} // end anonymous namespace
-
-static ManagedStatic<CodeViewErrorCategory> Category;
-
-char CodeViewError::ID = 0;
-
-CodeViewError::CodeViewError(cv_error_code C) : CodeViewError(C, "") {}
-CodeViewError::CodeViewError(const std::string &Context)
- : CodeViewError(cv_error_code::unspecified, Context) {}
-
-CodeViewError::CodeViewError(cv_error_code C, const std::string &Context)
- : Code(C) {
- ErrMsg = "CodeView Error: ";
- std::error_code EC = convertToErrorCode();
- if (Code != cv_error_code::unspecified)
- ErrMsg += EC.message() + " ";
- if (!Context.empty())
- ErrMsg += Context;
+static llvm::ManagedStatic<CodeViewErrorCategory> CodeViewErrCategory;
+const std::error_category &llvm::codeview::CVErrorCategory() {
+ return *CodeViewErrCategory;
}
-void CodeViewError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
-
-const std::string &CodeViewError::getErrorMessage() const { return ErrMsg; }
-
-std::error_code CodeViewError::convertToErrorCode() const {
- return std::error_code(static_cast<int>(Code), *Category);
-}
+char CodeViewError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
index bf9dd7c86862..4001741f560a 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
@@ -79,7 +79,7 @@ Error DebugCrossModuleImportsSubsection::commit(
for (const auto &M : Mappings)
Ids.push_back(&M);
- llvm::sort(Ids.begin(), Ids.end(), [this](const T &L1, const T &L2) {
+ llvm::sort(Ids, [this](const T &L1, const T &L2) {
return Strings.getIdForString(L1->getKey()) <
Strings.getIdForString(L2->getKey());
});
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
index fd558aa9cc8a..5881bf177a55 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
@@ -14,8 +14,11 @@ using namespace llvm;
using namespace llvm::codeview;
Error DebugFrameDataSubsectionRef::initialize(BinaryStreamReader Reader) {
- if (auto EC = Reader.readObject(RelocPtr))
- return EC;
+ if (Reader.bytesRemaining() % sizeof(FrameData) != 0) {
+ if (auto EC = Reader.readObject(RelocPtr))
+ return EC;
+ }
+
if (Reader.bytesRemaining() % sizeof(FrameData) != 0)
return make_error<CodeViewError>(cv_error_code::corrupt_record,
"Invalid frame data record format!");
@@ -26,15 +29,30 @@ Error DebugFrameDataSubsectionRef::initialize(BinaryStreamReader Reader) {
return Error::success();
}
+Error DebugFrameDataSubsectionRef::initialize(BinaryStreamRef Section) {
+ BinaryStreamReader Reader(Section);
+ return initialize(Reader);
+}
+
uint32_t DebugFrameDataSubsection::calculateSerializedSize() const {
- return 4 + sizeof(FrameData) * Frames.size();
+ uint32_t Size = sizeof(FrameData) * Frames.size();
+ if (IncludeRelocPtr)
+ Size += sizeof(uint32_t);
+ return Size;
}
Error DebugFrameDataSubsection::commit(BinaryStreamWriter &Writer) const {
- if (auto EC = Writer.writeInteger<uint32_t>(0))
- return EC;
-
- if (auto EC = Writer.writeArray(makeArrayRef(Frames)))
+ if (IncludeRelocPtr) {
+ if (auto EC = Writer.writeInteger<uint32_t>(0))
+ return EC;
+ }
+
+ std::vector<FrameData> SortedFrames(Frames.begin(), Frames.end());
+ std::sort(SortedFrames.begin(), SortedFrames.end(),
+ [](const FrameData &LHS, const FrameData &RHS) {
+ return LHS.RvaStart < RHS.RvaStart;
+ });
+ if (auto EC = Writer.writeArray(makeArrayRef(SortedFrames)))
return EC;
return Error::success();
}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index d2acc9a21003..9b251f5931b3 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -91,7 +91,7 @@ std::vector<uint32_t> DebugStringTableSubsection::sortedIds() const {
Result.reserve(IdToString.size());
for (const auto &Entry : IdToString)
Result.push_back(Entry.first);
- llvm::sort(Result.begin(), Result.end());
+ llvm::sort(Result);
return Result;
}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
index d8301cab1657..ef4e42f79ebc 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -200,6 +200,8 @@ static const EnumEntry<uint32_t> FrameProcSymFlagNames[] = {
CV_ENUM_CLASS_ENT(FrameProcedureOptions, Inlined),
CV_ENUM_CLASS_ENT(FrameProcedureOptions, StrictSecurityChecks),
CV_ENUM_CLASS_ENT(FrameProcedureOptions, SafeBuffers),
+ CV_ENUM_CLASS_ENT(FrameProcedureOptions, EncodedLocalBasePointerMask),
+ CV_ENUM_CLASS_ENT(FrameProcedureOptions, EncodedParamBasePointerMask),
CV_ENUM_CLASS_ENT(FrameProcedureOptions, ProfileGuidedOptimization),
CV_ENUM_CLASS_ENT(FrameProcedureOptions, ValidProfileCounts),
CV_ENUM_CLASS_ENT(FrameProcedureOptions, OptimizedForSpeed),
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index ca8007411cad..ddcad8c631d7 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -89,6 +89,8 @@ uint32_t LazyRandomTypeCollection::getOffsetOfType(TypeIndex Index) {
}
CVType LazyRandomTypeCollection::getType(TypeIndex Index) {
+ assert(!Index.isSimple());
+
auto EC = ensureTypeExists(Index);
error(std::move(EC));
assert(contains(Index));
@@ -97,6 +99,9 @@ CVType LazyRandomTypeCollection::getType(TypeIndex Index) {
}
Optional<CVType> LazyRandomTypeCollection::tryGetType(TypeIndex Index) {
+ if (Index.isSimple())
+ return None;
+
if (auto EC = ensureTypeExists(Index)) {
consumeError(std::move(EC));
return None;
@@ -151,6 +156,7 @@ Error LazyRandomTypeCollection::ensureTypeExists(TypeIndex TI) {
}
void LazyRandomTypeCollection::ensureCapacityFor(TypeIndex Index) {
+ assert(!Index.isSimple());
uint32_t MinSize = Index.toArrayIndex() + 1;
if (MinSize <= capacity())
@@ -163,6 +169,7 @@ void LazyRandomTypeCollection::ensureCapacityFor(TypeIndex Index) {
}
Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) {
+ assert(!TI.isSimple());
if (PartialOffsets.empty())
return fullScanForType(TI);
@@ -217,6 +224,7 @@ Optional<TypeIndex> LazyRandomTypeCollection::getNext(TypeIndex Prev) {
}
Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) {
+ assert(!TI.isSimple());
assert(PartialOffsets.empty());
TypeIndex CurrentTI = TypeIndex::fromArrayIndex(0);
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
index f8bf961f22a1..04e0bab745d3 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -32,8 +32,8 @@ namespace {
class CVSymbolDumperImpl : public SymbolVisitorCallbacks {
public:
CVSymbolDumperImpl(TypeCollection &Types, SymbolDumpDelegate *ObjDelegate,
- ScopedPrinter &W, bool PrintRecordBytes)
- : Types(Types), ObjDelegate(ObjDelegate), W(W),
+ ScopedPrinter &W, CPUType CPU, bool PrintRecordBytes)
+ : Types(Types), ObjDelegate(ObjDelegate), W(W), CompilationCPUType(CPU),
PrintRecordBytes(PrintRecordBytes), InFunctionScope(false) {}
/// CVSymbolVisitor overrides.
@@ -46,6 +46,8 @@ public:
Error visitSymbolEnd(CVSymbol &Record) override;
Error visitUnknownSymbol(CVSymbol &Record) override;
+ CPUType getCompilationCPUType() const { return CompilationCPUType; }
+
private:
void printLocalVariableAddrRange(const LocalVariableAddrRange &Range,
uint32_t RelocationOffset);
@@ -56,6 +58,9 @@ private:
SymbolDumpDelegate *ObjDelegate;
ScopedPrinter &W;
+ /// Save the machine or CPU type when dumping a compile symbols.
+ CPUType CompilationCPUType = CPUType::X64;
+
bool PrintRecordBytes;
bool InFunctionScope;
};
@@ -235,6 +240,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
W.printEnum("Language", Compile2.getLanguage(), getSourceLanguageNames());
W.printFlags("Flags", Compile2.getFlags(), getCompileSym2FlagNames());
W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames());
+ CompilationCPUType = Compile2.Machine;
std::string FrontendVersion;
{
raw_string_ostream Out(FrontendVersion);
@@ -255,9 +261,11 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Compile3Sym &Compile3) {
- W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames());
- W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames());
+ W.printEnum("Language", uint8_t(Compile3.getLanguage()), getSourceLanguageNames());
+ W.printFlags("Flags", uint32_t(Compile3.getFlags()),
+ getCompileSym3FlagNames());
W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames());
+ CompilationCPUType = Compile3.Machine;
std::string FrontendVersion;
{
raw_string_ostream Out(FrontendVersion);
@@ -415,6 +423,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
FrameProc.SectionIdOfExceptionHandler);
W.printFlags("Flags", static_cast<uint32_t>(FrameProc.Flags),
getFrameProcSymFlagNames());
+ W.printEnum("LocalFramePtrReg",
+ uint16_t(FrameProc.getLocalFramePtrReg(CompilationCPUType)),
+ getRegisterNames());
+ W.printEnum("ParamFramePtrReg",
+ uint16_t(FrameProc.getParamFramePtrReg(CompilationCPUType)),
+ getRegisterNames());
return Error::success();
}
@@ -625,21 +639,27 @@ Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
SymbolVisitorCallbackPipeline Pipeline;
SymbolDeserializer Deserializer(ObjDelegate.get(), Container);
- CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
+ CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, CompilationCPUType,
+ PrintRecordBytes);
Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(Dumper);
CVSymbolVisitor Visitor(Pipeline);
- return Visitor.visitSymbolRecord(Record);
+ auto Err = Visitor.visitSymbolRecord(Record);
+ CompilationCPUType = Dumper.getCompilationCPUType();
+ return Err;
}
Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) {
SymbolVisitorCallbackPipeline Pipeline;
SymbolDeserializer Deserializer(ObjDelegate.get(), Container);
- CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
+ CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, CompilationCPUType,
+ PrintRecordBytes);
Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(Dumper);
CVSymbolVisitor Visitor(Pipeline);
- return Visitor.visitSymbolStream(Symbols);
+ auto Err = Visitor.visitSymbolStream(Symbols);
+ CompilationCPUType = Dumper.getCompilationCPUType();
+ return Err;
}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
new file mode 100644
index 000000000000..01746138ad1f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
@@ -0,0 +1,94 @@
+//===- SymbolRecordHelpers.cpp ----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+template <typename RecordT> RecordT createRecord(const CVSymbol &sym) {
+ RecordT record(static_cast<SymbolRecordKind>(sym.kind()));
+ cantFail(SymbolDeserializer::deserializeAs<RecordT>(sym, record));
+ return record;
+}
+
+uint32_t llvm::codeview::getScopeEndOffset(const CVSymbol &Sym) {
+ assert(symbolOpensScope(Sym.kind()));
+ switch (Sym.kind()) {
+ case SymbolKind::S_GPROC32:
+ case SymbolKind::S_LPROC32:
+ case SymbolKind::S_GPROC32_ID:
+ case SymbolKind::S_LPROC32_ID:
+ case SymbolKind::S_LPROC32_DPC:
+ case SymbolKind::S_LPROC32_DPC_ID: {
+ ProcSym Proc = createRecord<ProcSym>(Sym);
+ return Proc.End;
+ }
+ case SymbolKind::S_BLOCK32: {
+ BlockSym Block = createRecord<BlockSym>(Sym);
+ return Block.End;
+ }
+ case SymbolKind::S_THUNK32: {
+ Thunk32Sym Thunk = createRecord<Thunk32Sym>(Sym);
+ return Thunk.End;
+ }
+ case SymbolKind::S_INLINESITE: {
+ InlineSiteSym Site = createRecord<InlineSiteSym>(Sym);
+ return Site.End;
+ }
+ default:
+ assert(false && "Unknown record type");
+ return 0;
+ }
+}
+
+uint32_t
+llvm::codeview::getScopeParentOffset(const llvm::codeview::CVSymbol &Sym) {
+ assert(symbolOpensScope(Sym.kind()));
+ switch (Sym.kind()) {
+ case SymbolKind::S_GPROC32:
+ case SymbolKind::S_LPROC32:
+ case SymbolKind::S_GPROC32_ID:
+ case SymbolKind::S_LPROC32_ID:
+ case SymbolKind::S_LPROC32_DPC:
+ case SymbolKind::S_LPROC32_DPC_ID: {
+ ProcSym Proc = createRecord<ProcSym>(Sym);
+ return Proc.Parent;
+ }
+ case SymbolKind::S_BLOCK32: {
+ BlockSym Block = createRecord<BlockSym>(Sym);
+ return Block.Parent;
+ }
+ case SymbolKind::S_THUNK32: {
+ Thunk32Sym Thunk = createRecord<Thunk32Sym>(Sym);
+ return Thunk.Parent;
+ }
+ case SymbolKind::S_INLINESITE: {
+ InlineSiteSym Site = createRecord<InlineSiteSym>(Sym);
+ return Site.Parent;
+ }
+ default:
+ assert(false && "Unknown record type");
+ return 0;
+ }
+}
+
+CVSymbolArray
+llvm::codeview::limitSymbolArrayToScope(const CVSymbolArray &Symbols,
+ uint32_t ScopeBegin) {
+ CVSymbol Opener = *Symbols.at(ScopeBegin);
+ assert(symbolOpensScope(Opener.kind()));
+ uint32_t EndOffset = getScopeEndOffset(Opener);
+ CVSymbol Closer = *Symbols.at(EndOffset);
+ EndOffset += Closer.RecordData.size();
+ return Symbols.substream(ScopeBegin, EndOffset);
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index e77c8e8f02f5..2af8205cebc3 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -471,3 +471,77 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
return Error::success();
}
+
+RegisterId codeview::decodeFramePtrReg(EncodedFramePtrReg EncodedReg,
+ CPUType CPU) {
+ assert(unsigned(EncodedReg) < 4);
+ switch (CPU) {
+ // FIXME: Add ARM and AArch64 variants here.
+ default:
+ break;
+ case CPUType::Intel8080:
+ case CPUType::Intel8086:
+ case CPUType::Intel80286:
+ case CPUType::Intel80386:
+ case CPUType::Intel80486:
+ case CPUType::Pentium:
+ case CPUType::PentiumPro:
+ case CPUType::Pentium3:
+ switch (EncodedReg) {
+ case EncodedFramePtrReg::None: return RegisterId::NONE;
+ case EncodedFramePtrReg::StackPtr: return RegisterId::VFRAME;
+ case EncodedFramePtrReg::FramePtr: return RegisterId::EBP;
+ case EncodedFramePtrReg::BasePtr: return RegisterId::EBX;
+ }
+ llvm_unreachable("bad encoding");
+ case CPUType::X64:
+ switch (EncodedReg) {
+ case EncodedFramePtrReg::None: return RegisterId::NONE;
+ case EncodedFramePtrReg::StackPtr: return RegisterId::RSP;
+ case EncodedFramePtrReg::FramePtr: return RegisterId::RBP;
+ case EncodedFramePtrReg::BasePtr: return RegisterId::R13;
+ }
+ llvm_unreachable("bad encoding");
+ }
+ return RegisterId::NONE;
+}
+
+EncodedFramePtrReg codeview::encodeFramePtrReg(RegisterId Reg, CPUType CPU) {
+ switch (CPU) {
+ // FIXME: Add ARM and AArch64 variants here.
+ default:
+ break;
+ case CPUType::Intel8080:
+ case CPUType::Intel8086:
+ case CPUType::Intel80286:
+ case CPUType::Intel80386:
+ case CPUType::Intel80486:
+ case CPUType::Pentium:
+ case CPUType::PentiumPro:
+ case CPUType::Pentium3:
+ switch (Reg) {
+ case RegisterId::VFRAME:
+ return EncodedFramePtrReg::StackPtr;
+ case RegisterId::EBP:
+ return EncodedFramePtrReg::FramePtr;
+ case RegisterId::EBX:
+ return EncodedFramePtrReg::BasePtr;
+ default:
+ break;
+ }
+ break;
+ case CPUType::X64:
+ switch (Reg) {
+ case RegisterId::RSP:
+ return EncodedFramePtrReg::StackPtr;
+ case RegisterId::RBP:
+ return EncodedFramePtrReg::FramePtr;
+ case RegisterId::R13:
+ return EncodedFramePtrReg::BasePtr;
+ default:
+ break;
+ }
+ break;
+ }
+ return EncodedFramePtrReg::None;
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index 7c68c9167c98..f5d3bea43a14 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -361,7 +361,6 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) {
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) {
printTypeIndex("PointeeType", Ptr.getReferentType());
- W->printHex("PointerAttributes", uint32_t(Ptr.getOptions()));
W->printEnum("PtrType", unsigned(Ptr.getPointerKind()),
makeArrayRef(PtrKindNames));
W->printEnum("PtrMode", unsigned(Ptr.getMode()), makeArrayRef(PtrModeNames));
@@ -371,6 +370,8 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) {
W->printNumber("IsVolatile", Ptr.isVolatile());
W->printNumber("IsUnaligned", Ptr.isUnaligned());
W->printNumber("IsRestrict", Ptr.isRestrict());
+ W->printNumber("IsThisPtr&", Ptr.isLValueReferenceThisPtr());
+ W->printNumber("IsThisPtr&&", Ptr.isRValueReferenceThisPtr());
W->printNumber("SizeOf", Ptr.getSize());
if (Ptr.isPointerToMember()) {
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
index 24fe5fcb28d4..332d67470da5 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
@@ -74,6 +74,9 @@ StringRef TypeIndex::simpleTypeName(TypeIndex TI) {
if (TI.isNoneType())
return "<no type>";
+ if (TI == TypeIndex::NullptrT())
+ return "std::nullptr_t";
+
// This is a simple type.
for (const auto &SimpleTypeName : SimpleTypeNames) {
if (SimpleTypeName.Kind == TI.getSimpleKind()) {
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
new file mode 100644
index 000000000000..2a66474cf5b6
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
@@ -0,0 +1,53 @@
+//===- TypeRecordHelpers.cpp ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+template <typename RecordT> static ClassOptions getUdtOptions(CVType CVT) {
+ RecordT Record;
+ if (auto EC = TypeDeserializer::deserializeAs<RecordT>(CVT, Record)) {
+ consumeError(std::move(EC));
+ return ClassOptions::None;
+ }
+ return Record.getOptions();
+}
+
+bool llvm::codeview::isUdtForwardRef(CVType CVT) {
+ ClassOptions UdtOptions = ClassOptions::None;
+ switch (CVT.kind()) {
+ case LF_STRUCTURE:
+ case LF_CLASS:
+ case LF_INTERFACE:
+ UdtOptions = getUdtOptions<ClassRecord>(std::move(CVT));
+ break;
+ case LF_ENUM:
+ UdtOptions = getUdtOptions<EnumRecord>(std::move(CVT));
+ break;
+ case LF_UNION:
+ UdtOptions = getUdtOptions<UnionRecord>(std::move(CVT));
+ break;
+ default:
+ return false;
+ }
+ return (UdtOptions & ClassOptions::ForwardReference) != ClassOptions::None;
+}
+
+TypeIndex llvm::codeview::getModifiedType(const CVType &CVT) {
+ assert(CVT.kind() == LF_MODIFIER);
+ SmallVector<TypeIndex, 1> Refs;
+ discoverTypeIndices(CVT, Refs);
+ return Refs.front();
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index 2e29c9d7dfa0..bae11ce6a6a1 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
@@ -63,7 +64,12 @@ class TypeStreamMerger {
public:
explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
: IndexMap(SourceToDest) {
- SourceToDest.clear();
+ // When dealing with precompiled headers objects, all data in SourceToDest
+ // belongs to the precompiled headers object, and is assumed to be already
+ // remapped to the target PDB. Any forthcoming type that will be merged in
+ // might potentially back-reference this data. We also don't want to resolve
+ // twice the types in the precompiled object.
+ CurIndex += SourceToDest.size();
}
static const TypeIndex Untranslated;
@@ -71,7 +77,7 @@ public:
// Local hashing entry points
Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
MergingTypeTableBuilder &DestTypes,
- const CVTypeArray &IdsAndTypes);
+ const CVTypeArray &IdsAndTypes, Optional<uint32_t> &S);
Error mergeIdRecords(MergingTypeTableBuilder &Dest,
ArrayRef<TypeIndex> TypeSourceToDest,
const CVTypeArray &Ids);
@@ -82,13 +88,15 @@ public:
Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
GlobalTypeTableBuilder &DestTypes,
const CVTypeArray &IdsAndTypes,
- ArrayRef<GloballyHashedType> Hashes);
+ ArrayRef<GloballyHashedType> Hashes,
+ Optional<uint32_t> &S);
Error mergeIdRecords(GlobalTypeTableBuilder &Dest,
ArrayRef<TypeIndex> TypeSourceToDest,
const CVTypeArray &Ids,
ArrayRef<GloballyHashedType> Hashes);
Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
- ArrayRef<GloballyHashedType> Hashes);
+ ArrayRef<GloballyHashedType> Hashes,
+ Optional<uint32_t> &S);
private:
Error doit(const CVTypeArray &Types);
@@ -156,6 +164,8 @@ private:
return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
}
+ Expected<bool> shouldRemapType(const CVType &Type);
+
Optional<Error> LastError;
bool UseGlobalHashes = false;
@@ -185,6 +195,8 @@ private:
/// Temporary storage that we use to copy a record's data while re-writing
/// its type indices.
SmallVector<uint8_t, 256> RemapStorage;
+
+ Optional<uint32_t> PCHSignature;
};
} // end anonymous namespace
@@ -261,22 +273,27 @@ Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest,
Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
MergingTypeTableBuilder &DestTypes,
- const CVTypeArray &IdsAndTypes) {
+ const CVTypeArray &IdsAndTypes,
+ Optional<uint32_t> &S) {
DestIdStream = &DestIds;
DestTypeStream = &DestTypes;
UseGlobalHashes = false;
- return doit(IdsAndTypes);
+ auto Err = doit(IdsAndTypes);
+ S = PCHSignature;
+ return Err;
}
// Global hashing entry points
Error TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
const CVTypeArray &Types,
- ArrayRef<GloballyHashedType> Hashes) {
+ ArrayRef<GloballyHashedType> Hashes,
+ Optional<uint32_t> &S) {
DestGlobalTypeStream = &Dest;
UseGlobalHashes = true;
GlobalHashes = Hashes;
-
- return doit(Types);
+ auto Err = doit(Types);
+ S = PCHSignature;
+ return Err;
}
Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
@@ -294,12 +311,15 @@ Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
GlobalTypeTableBuilder &DestTypes,
const CVTypeArray &IdsAndTypes,
- ArrayRef<GloballyHashedType> Hashes) {
+ ArrayRef<GloballyHashedType> Hashes,
+ Optional<uint32_t> &S) {
DestGlobalIdStream = &DestIds;
DestGlobalTypeStream = &DestTypes;
UseGlobalHashes = true;
GlobalHashes = Hashes;
- return doit(IdsAndTypes);
+ auto Err = doit(IdsAndTypes);
+ S = PCHSignature;
+ return Err;
}
Error TypeStreamMerger::doit(const CVTypeArray &Types) {
@@ -326,7 +346,7 @@ Error TypeStreamMerger::doit(const CVTypeArray &Types) {
"second pass found more bad indices");
if (!LastError && NumBadIndices == BadIndicesRemaining) {
return llvm::make_error<CodeViewError>(
- cv_error_code::corrupt_record, "input type graph contains cycles");
+ cv_error_code::corrupt_record, "Input type graph contains cycles");
}
}
@@ -345,25 +365,30 @@ Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
}
Error TypeStreamMerger::remapType(const CVType &Type) {
- auto DoSerialize =
- [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
- return remapIndices(Type, Storage);
- };
+ auto R = shouldRemapType(Type);
+ if (!R)
+ return R.takeError();
TypeIndex DestIdx = Untranslated;
- if (LLVM_LIKELY(UseGlobalHashes)) {
- GlobalTypeTableBuilder &Dest =
- isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
- GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
- DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize);
- } else {
- MergingTypeTableBuilder &Dest =
- isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
-
- RemapStorage.resize(Type.RecordData.size());
- ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
- if (!Result.empty())
- DestIdx = Dest.insertRecordBytes(Result);
+ if (*R) {
+ auto DoSerialize =
+ [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
+ return remapIndices(Type, Storage);
+ };
+ if (LLVM_LIKELY(UseGlobalHashes)) {
+ GlobalTypeTableBuilder &Dest =
+ isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
+ GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
+ DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize);
+ } else {
+ MergingTypeTableBuilder &Dest =
+ isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
+
+ RemapStorage.resize(Type.RecordData.size());
+ ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
+ if (!Result.empty())
+ DestIdx = Dest.insertRecordBytes(Result);
+ }
}
addMapping(DestIdx);
@@ -418,25 +443,28 @@ Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest,
Error llvm::codeview::mergeTypeAndIdRecords(
MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
- SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes) {
+ SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
+ Optional<uint32_t> &PCHSignature) {
TypeStreamMerger M(SourceToDest);
- return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes);
+ return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHSignature);
}
Error llvm::codeview::mergeTypeAndIdRecords(
GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
- ArrayRef<GloballyHashedType> Hashes) {
+ ArrayRef<GloballyHashedType> Hashes, Optional<uint32_t> &PCHSignature) {
TypeStreamMerger M(SourceToDest);
- return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes);
+ return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes,
+ PCHSignature);
}
Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
SmallVectorImpl<TypeIndex> &SourceToDest,
const CVTypeArray &Types,
- ArrayRef<GloballyHashedType> Hashes) {
+ ArrayRef<GloballyHashedType> Hashes,
+ Optional<uint32_t> &PCHSignature) {
TypeStreamMerger M(SourceToDest);
- return M.mergeTypeRecords(Dest, Types, Hashes);
+ return M.mergeTypeRecords(Dest, Types, Hashes, PCHSignature);
}
Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
@@ -447,3 +475,20 @@ Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
TypeStreamMerger M(SourceToDest);
return M.mergeIdRecords(Dest, Types, Ids, Hashes);
}
+
+Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
+ // For object files containing precompiled types, we need to extract the
+ // signature, through EndPrecompRecord. This is done here for performance
+ // reasons, to avoid re-parsing the Types stream.
+ if (Type.kind() == LF_ENDPRECOMP) {
+ EndPrecompRecord EP;
+ if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
+ EP))
+ return joinErrors(std::move(EC), errorCorruptRecord());
+ if (PCHSignature.hasValue())
+ return errorCorruptRecord();
+ PCHSignature.emplace(EP.getSignature());
+ return false;
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 4582e036f9fc..54daf34ff253 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -14,6 +14,7 @@
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DJB.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -45,9 +46,9 @@ llvm::Error AppleAcceleratorTable::extract() {
uint32_t Offset = 0;
// Check that we can at least read the header.
- if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength)+4))
- return make_error<StringError>("Section too small: cannot read header.",
- inconvertibleErrorCode());
+ if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength) + 4))
+ return createStringError(errc::illegal_byte_sequence,
+ "Section too small: cannot read header.");
Hdr.Magic = AccelSection.getU32(&Offset);
Hdr.Version = AccelSection.getU16(&Offset);
@@ -62,9 +63,9 @@ llvm::Error AppleAcceleratorTable::extract() {
// equal to the size for an empty table and hence pointer after the section.
if (!AccelSection.isValidOffset(sizeof(Hdr) + Hdr.HeaderDataLength +
Hdr.BucketCount * 4 + Hdr.HashCount * 8 - 1))
- return make_error<StringError>(
- "Section too small: cannot read buckets and hashes.",
- inconvertibleErrorCode());
+ return createStringError(
+ errc::illegal_byte_sequence,
+ "Section too small: cannot read buckets and hashes.");
HdrData.DIEOffsetBase = AccelSection.getU32(&Offset);
uint32_t NumAtoms = AccelSection.getU32(&Offset);
@@ -380,8 +381,8 @@ llvm::Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
uint32_t *Offset) {
// Check that we can read the fixed-size part.
if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1))
- return make_error<StringError>("Section too small: cannot read header.",
- inconvertibleErrorCode());
+ return createStringError(errc::illegal_byte_sequence,
+ "Section too small: cannot read header.");
UnitLength = AS.getU32(Offset);
Version = AS.getU16(Offset);
@@ -395,9 +396,9 @@ llvm::Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
AugmentationStringSize = alignTo(AS.getU32(Offset), 4);
if (!AS.isValidOffsetForDataOfSize(*Offset, AugmentationStringSize))
- return make_error<StringError>(
- "Section too small: cannot read header augmentation.",
- inconvertibleErrorCode());
+ return createStringError(
+ errc::illegal_byte_sequence,
+ "Section too small: cannot read header augmentation.");
AugmentationString.resize(AugmentationStringSize);
AS.getU8(Offset, reinterpret_cast<uint8_t *>(AugmentationString.data()),
AugmentationStringSize);
@@ -439,8 +440,8 @@ DWARFDebugNames::Abbrev DWARFDebugNames::AbbrevMapInfo::getTombstoneKey() {
Expected<DWARFDebugNames::AttributeEncoding>
DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) {
if (*Offset >= EntriesBase) {
- return make_error<StringError>("Incorrectly terminated abbreviation table.",
- inconvertibleErrorCode());
+ return createStringError(errc::illegal_byte_sequence,
+ "Incorrectly terminated abbreviation table.");
}
uint32_t Index = Section.AccelSection.getULEB128(Offset);
@@ -465,8 +466,8 @@ DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) {
Expected<DWARFDebugNames::Abbrev>
DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) {
if (*Offset >= EntriesBase) {
- return make_error<StringError>("Incorrectly terminated abbreviation table.",
- inconvertibleErrorCode());
+ return createStringError(errc::illegal_byte_sequence,
+ "Incorrectly terminated abbreviation table.");
}
uint32_t Code = Section.AccelSection.getULEB128(Offset);
@@ -501,9 +502,8 @@ Error DWARFDebugNames::NameIndex::extract() {
Offset += Hdr.NameCount * 4;
if (!AS.isValidOffsetForDataOfSize(Offset, Hdr.AbbrevTableSize))
- return make_error<StringError>(
- "Section too small: cannot read abbreviations.",
- inconvertibleErrorCode());
+ return createStringError(errc::illegal_byte_sequence,
+ "Section too small: cannot read abbreviations.");
EntriesBase = Offset + Hdr.AbbrevTableSize;
@@ -514,10 +514,9 @@ Error DWARFDebugNames::NameIndex::extract() {
if (isSentinel(*AbbrevOr))
return Error::success();
- if (!Abbrevs.insert(std::move(*AbbrevOr)).second) {
- return make_error<StringError>("Duplicate abbreviation code.",
- inconvertibleErrorCode());
- }
+ if (!Abbrevs.insert(std::move(*AbbrevOr)).second)
+ return createStringError(errc::invalid_argument,
+ "Duplicate abbreviation code.");
}
}
DWARFDebugNames::Entry::Entry(const NameIndex &NameIdx, const Abbrev &Abbr)
@@ -600,8 +599,8 @@ Expected<DWARFDebugNames::Entry>
DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const {
const DWARFDataExtractor &AS = Section.AccelSection;
if (!AS.isValidOffset(*Offset))
- return make_error<StringError>("Incorrectly terminated entry list.",
- inconvertibleErrorCode());
+ return createStringError(errc::illegal_byte_sequence,
+ "Incorrectly terminated entry list.");
uint32_t AbbrevCode = AS.getULEB128(Offset);
if (AbbrevCode == 0)
@@ -609,16 +608,15 @@ DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const {
const auto AbbrevIt = Abbrevs.find_as(AbbrevCode);
if (AbbrevIt == Abbrevs.end())
- return make_error<StringError>("Invalid abbreviation.",
- inconvertibleErrorCode());
+ return createStringError(errc::invalid_argument, "Invalid abbreviation.");
Entry E(*this, *AbbrevIt);
dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32};
for (auto &Value : E.Values) {
if (!Value.extractValue(AS, Offset, FormParams))
- return make_error<StringError>("Error extracting index attribute values.",
- inconvertibleErrorCode());
+ return createStringError(errc::io_error,
+ "Error extracting index attribute values.");
}
return std::move(E);
}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 9d2554ff9e2e..e6620ee3dd1d 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -99,22 +99,18 @@ using ContributionCollection =
// Collect all the contributions to the string offsets table from all units,
// sort them by their starting offsets and remove duplicates.
static ContributionCollection
-collectContributionData(DWARFContext::cu_iterator_range CUs,
- DWARFContext::tu_section_iterator_range TUSs) {
+collectContributionData(DWARFContext::unit_iterator_range Units) {
ContributionCollection Contributions;
- for (const auto &CU : CUs)
- Contributions.push_back(CU->getStringOffsetsTableContribution());
- for (const auto &TUS : TUSs)
- for (const auto &TU : TUS)
- Contributions.push_back(TU->getStringOffsetsTableContribution());
-
+ for (const auto &U : Units)
+ Contributions.push_back(U->getStringOffsetsTableContribution());
// Sort the contributions so that any invalid ones are placed at
// the start of the contributions vector. This way they are reported
// first.
- llvm::sort(Contributions.begin(), Contributions.end(),
+ llvm::sort(Contributions,
[](const Optional<StrOffsetsContributionDescriptor> &L,
const Optional<StrOffsetsContributionDescriptor> &R) {
- if (L && R) return L->Base < R->Base;
+ if (L && R)
+ return L->Base < R->Base;
return R.hasValue();
});
@@ -136,9 +132,8 @@ collectContributionData(DWARFContext::cu_iterator_range CUs,
static void dumpDWARFv5StringOffsetsSection(
raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj,
const DWARFSection &StringOffsetsSection, StringRef StringSection,
- DWARFContext::cu_iterator_range CUs,
- DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian) {
- auto Contributions = collectContributionData(CUs, TUSs);
+ DWARFContext::unit_iterator_range Units, bool LittleEndian) {
+ auto Contributions = collectContributionData(Units);
DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0);
DataExtractor StrData(StringSection, LittleEndian, 0);
uint64_t SectionSize = StringOffsetsSection.Data.size();
@@ -215,18 +210,18 @@ static void dumpDWARFv5StringOffsetsSection(
// a header containing size and version number. Alternatively, it may be a
// monolithic series of string offsets, as generated by the pre-DWARF v5
// implementation of split DWARF.
-static void dumpStringOffsetsSection(
- raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj,
- const DWARFSection &StringOffsetsSection, StringRef StringSection,
- DWARFContext::cu_iterator_range CUs,
- DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian,
- unsigned MaxVersion) {
+static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
+ const DWARFObject &Obj,
+ const DWARFSection &StringOffsetsSection,
+ StringRef StringSection,
+ DWARFContext::unit_iterator_range Units,
+ bool LittleEndian, unsigned MaxVersion) {
// If we have at least one (compile or type) unit with DWARF v5 or greater,
// we assume that the section is formatted like a DWARF v5 string offsets
// section.
if (MaxVersion >= 5)
dumpDWARFv5StringOffsetsSection(OS, SectionName, Obj, StringOffsetsSection,
- StringSection, CUs, TUSs, LittleEndian);
+ StringSection, Units, LittleEndian);
else {
DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
uint32_t offset = 0;
@@ -254,19 +249,12 @@ static void dumpStringOffsetsSection(
static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
DIDumpOptions DumpOpts, uint16_t Version,
uint8_t AddrSize) {
- // TODO: Make this more general: add callback types to Error.h, create
- // implementation and make all DWARF classes use them.
- static auto WarnCallback = [](Error Warn) {
- handleAllErrors(std::move(Warn), [](ErrorInfoBase &Info) {
- WithColor::warning() << Info.message() << '\n';
- });
- };
uint32_t Offset = 0;
while (AddrData.isValidOffset(Offset)) {
DWARFDebugAddrTable AddrTable;
uint32_t TableOffset = Offset;
- if (Error Err = AddrTable.extract(AddrData, &Offset, Version,
- AddrSize, WarnCallback)) {
+ if (Error Err = AddrTable.extract(AddrData, &Offset, Version, AddrSize,
+ DWARFContext::dumpWarning)) {
WithColor::error() << toString(std::move(Err)) << '\n';
// Keep going after an error, if we can, assuming that the length field
// could be read. If it couldn't, stop reading the section.
@@ -281,9 +269,11 @@ static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
}
// Dump the .debug_rnglists or .debug_rnglists.dwo section (DWARF v5).
-static void dumpRnglistsSection(raw_ostream &OS,
- DWARFDataExtractor &rnglistData,
- DIDumpOptions DumpOpts) {
+static void
+dumpRnglistsSection(raw_ostream &OS, DWARFDataExtractor &rnglistData,
+ llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ LookupPooledAddress,
+ DIDumpOptions DumpOpts) {
uint32_t Offset = 0;
while (rnglistData.isValidOffset(Offset)) {
llvm::DWARFDebugRnglistTable Rnglists;
@@ -297,16 +287,36 @@ static void dumpRnglistsSection(raw_ostream &OS,
break;
Offset = TableOffset + Length;
} else {
- Rnglists.dump(OS, DumpOpts);
+ Rnglists.dump(OS, LookupPooledAddress, DumpOpts);
}
}
}
+static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts,
+ DWARFDataExtractor Data,
+ const MCRegisterInfo *MRI,
+ Optional<uint64_t> DumpOffset) {
+ uint32_t Offset = 0;
+ DWARFDebugLoclists Loclists;
+
+ DWARFListTableHeader Header(".debug_loclists", "locations");
+ if (Error E = Header.extract(Data, &Offset)) {
+ WithColor::error() << toString(std::move(E)) << '\n';
+ return;
+ }
+
+ Header.dump(OS, DumpOpts);
+ DataExtractor LocData(Data.getData().drop_front(Offset),
+ Data.isLittleEndian(), Header.getAddrSize());
+
+ Loclists.parse(LocData, Header.getVersion());
+ Loclists.dump(OS, 0, MRI, DumpOffset);
+}
+
void DWARFContext::dump(
raw_ostream &OS, DIDumpOptions DumpOpts,
std::array<Optional<uint64_t>, DIDT_ID_Count> DumpOffsets) {
- Optional<uint64_t> DumpOffset;
uint64_t DumpType = DumpOpts.DumpType;
StringRef Extension = sys::path::extension(DObj->getFileName());
@@ -323,13 +333,13 @@ void DWARFContext::dump(
bool Explicit = DumpType != DIDT_All && !IsDWO;
bool ExplicitDWO = Explicit && IsDWO;
auto shouldDump = [&](bool Explicit, const char *Name, unsigned ID,
- StringRef Section) {
- DumpOffset = DumpOffsets[ID];
+ StringRef Section) -> Optional<uint64_t> * {
unsigned Mask = 1U << ID;
bool Should = (DumpType & Mask) && (Explicit || !Section.empty());
- if (Should)
- OS << "\n" << Name << " contents:\n";
- return Should;
+ if (!Should)
+ return nullptr;
+ OS << "\n" << Name << " contents:\n";
+ return &DumpOffsets[ID];
};
// Dump individual sections.
@@ -340,57 +350,63 @@ void DWARFContext::dump(
DObj->getAbbrevDWOSection()))
getDebugAbbrevDWO()->dump(OS);
- auto dumpDebugInfo = [&](bool IsExplicit, const char *Name,
- DWARFSection Section, cu_iterator_range CUs) {
- if (shouldDump(IsExplicit, Name, DIDT_ID_DebugInfo, Section.Data)) {
- if (DumpOffset)
- getDIEForOffset(DumpOffset.getValue())
+ auto dumpDebugInfo = [&](const char *Name, unit_iterator_range Units) {
+ OS << '\n' << Name << " contents:\n";
+ if (auto DumpOffset = DumpOffsets[DIDT_ID_DebugInfo])
+ for (const auto &U : Units)
+ U->getDIEForOffset(DumpOffset.getValue())
.dump(OS, 0, DumpOpts.noImplicitRecursion());
- else
- for (const auto &CU : CUs)
- CU->dump(OS, DumpOpts);
- }
+ else
+ for (const auto &U : Units)
+ U->dump(OS, DumpOpts);
};
- dumpDebugInfo(Explicit, ".debug_info", DObj->getInfoSection(),
- compile_units());
- dumpDebugInfo(ExplicitDWO, ".debug_info.dwo", DObj->getInfoDWOSection(),
- dwo_compile_units());
+ if ((DumpType & DIDT_DebugInfo)) {
+ if (Explicit || getNumCompileUnits())
+ dumpDebugInfo(".debug_info", info_section_units());
+ if (ExplicitDWO || getNumDWOCompileUnits())
+ dumpDebugInfo(".debug_info.dwo", dwo_info_section_units());
+ }
- auto dumpDebugType = [&](const char *Name,
- tu_section_iterator_range TUSections) {
+ auto dumpDebugType = [&](const char *Name, unit_iterator_range Units) {
OS << '\n' << Name << " contents:\n";
- DumpOffset = DumpOffsets[DIDT_ID_DebugTypes];
- for (const auto &TUS : TUSections)
- for (const auto &TU : TUS)
- if (DumpOffset)
- TU->getDIEForOffset(*DumpOffset)
- .dump(OS, 0, DumpOpts.noImplicitRecursion());
- else
- TU->dump(OS, DumpOpts);
+ for (const auto &U : Units)
+ if (auto DumpOffset = DumpOffsets[DIDT_ID_DebugTypes])
+ U->getDIEForOffset(*DumpOffset)
+ .dump(OS, 0, DumpOpts.noImplicitRecursion());
+ else
+ U->dump(OS, DumpOpts);
};
if ((DumpType & DIDT_DebugTypes)) {
if (Explicit || getNumTypeUnits())
- dumpDebugType(".debug_types", type_unit_sections());
+ dumpDebugType(".debug_types", types_section_units());
if (ExplicitDWO || getNumDWOTypeUnits())
- dumpDebugType(".debug_types.dwo", dwo_type_unit_sections());
+ dumpDebugType(".debug_types.dwo", dwo_types_section_units());
}
- if (shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc,
- DObj->getLocSection().Data)) {
- getDebugLoc()->dump(OS, getRegisterInfo(), DumpOffset);
+ if (const auto *Off = shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc,
+ DObj->getLocSection().Data)) {
+ getDebugLoc()->dump(OS, getRegisterInfo(), *Off);
+ }
+ if (const auto *Off =
+ shouldDump(Explicit, ".debug_loclists", DIDT_ID_DebugLoclists,
+ DObj->getLoclistsSection().Data)) {
+ DWARFDataExtractor Data(*DObj, DObj->getLoclistsSection(), isLittleEndian(),
+ 0);
+ dumpLoclistsSection(OS, DumpOpts, Data, getRegisterInfo(), *Off);
}
- if (shouldDump(ExplicitDWO, ".debug_loc.dwo", DIDT_ID_DebugLoc,
- DObj->getLocDWOSection().Data)) {
- getDebugLocDWO()->dump(OS, getRegisterInfo(), DumpOffset);
+ if (const auto *Off =
+ shouldDump(ExplicitDWO, ".debug_loc.dwo", DIDT_ID_DebugLoc,
+ DObj->getLocDWOSection().Data)) {
+ getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), *Off);
}
- if (shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame,
- DObj->getDebugFrameSection()))
- getDebugFrame()->dump(OS, getRegisterInfo(), DumpOffset);
+ if (const auto *Off = shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame,
+ DObj->getDebugFrameSection()))
+ getDebugFrame()->dump(OS, getRegisterInfo(), *Off);
- if (shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame,
- DObj->getEHFrameSection()))
- getEHFrame()->dump(OS, getRegisterInfo(), DumpOffset);
+ if (const auto *Off = shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame,
+ DObj->getEHFrameSection()))
+ getEHFrame()->dump(OS, getRegisterInfo(), *Off);
if (DumpType & DIDT_DebugMacro) {
if (Explicit || !getDebugMacro()->empty()) {
@@ -409,38 +425,41 @@ void DWARFContext::dump(
}
auto DumpLineSection = [&](DWARFDebugLine::SectionParser Parser,
- DIDumpOptions DumpOpts) {
+ DIDumpOptions DumpOpts,
+ Optional<uint64_t> DumpOffset) {
while (!Parser.done()) {
if (DumpOffset && Parser.getOffset() != *DumpOffset) {
- Parser.skip();
+ Parser.skip(dumpWarning);
continue;
}
OS << "debug_line[" << format("0x%8.8x", Parser.getOffset()) << "]\n";
if (DumpOpts.Verbose) {
- Parser.parseNext(DWARFDebugLine::warn, DWARFDebugLine::warn, &OS);
+ Parser.parseNext(dumpWarning, dumpWarning, &OS);
} else {
- DWARFDebugLine::LineTable LineTable = Parser.parseNext();
+ DWARFDebugLine::LineTable LineTable =
+ Parser.parseNext(dumpWarning, dumpWarning);
LineTable.dump(OS, DumpOpts);
}
}
};
- if (shouldDump(Explicit, ".debug_line", DIDT_ID_DebugLine,
- DObj->getLineSection().Data)) {
+ if (const auto *Off = shouldDump(Explicit, ".debug_line", DIDT_ID_DebugLine,
+ DObj->getLineSection().Data)) {
DWARFDataExtractor LineData(*DObj, DObj->getLineSection(), isLittleEndian(),
0);
DWARFDebugLine::SectionParser Parser(LineData, *this, compile_units(),
- type_unit_sections());
- DumpLineSection(Parser, DumpOpts);
+ type_units());
+ DumpLineSection(Parser, DumpOpts, *Off);
}
- if (shouldDump(ExplicitDWO, ".debug_line.dwo", DIDT_ID_DebugLine,
- DObj->getLineDWOSection().Data)) {
+ if (const auto *Off =
+ shouldDump(ExplicitDWO, ".debug_line.dwo", DIDT_ID_DebugLine,
+ DObj->getLineDWOSection().Data)) {
DWARFDataExtractor LineData(*DObj, DObj->getLineDWOSection(),
isLittleEndian(), 0);
DWARFDebugLine::SectionParser Parser(LineData, *this, dwo_compile_units(),
- dwo_type_unit_sections());
- DumpLineSection(Parser, DumpOpts);
+ dwo_type_units());
+ DumpLineSection(Parser, DumpOpts, *Off);
}
if (shouldDump(Explicit, ".debug_cu_index", DIDT_ID_DebugCUIndex,
@@ -509,56 +528,64 @@ void DWARFContext::dump(
}
}
+ auto LookupPooledAddress = [&](uint32_t Index) -> Optional<SectionedAddress> {
+ const auto &CUs = compile_units();
+ auto I = CUs.begin();
+ if (I == CUs.end())
+ return None;
+ return (*I)->getAddrOffsetSectionItem(Index);
+ };
+
if (shouldDump(Explicit, ".debug_rnglists", DIDT_ID_DebugRnglists,
DObj->getRnglistsSection().Data)) {
DWARFDataExtractor RnglistData(*DObj, DObj->getRnglistsSection(),
isLittleEndian(), 0);
- dumpRnglistsSection(OS, RnglistData, DumpOpts);
+ dumpRnglistsSection(OS, RnglistData, LookupPooledAddress, DumpOpts);
}
if (shouldDump(ExplicitDWO, ".debug_rnglists.dwo", DIDT_ID_DebugRnglists,
DObj->getRnglistsDWOSection().Data)) {
DWARFDataExtractor RnglistData(*DObj, DObj->getRnglistsDWOSection(),
isLittleEndian(), 0);
- dumpRnglistsSection(OS, RnglistData, DumpOpts);
+ dumpRnglistsSection(OS, RnglistData, LookupPooledAddress, DumpOpts);
}
if (shouldDump(Explicit, ".debug_pubnames", DIDT_ID_DebugPubnames,
- DObj->getPubNamesSection()))
- DWARFDebugPubTable(DObj->getPubNamesSection(), isLittleEndian(), false)
+ DObj->getPubNamesSection().Data))
+ DWARFDebugPubTable(*DObj, DObj->getPubNamesSection(), isLittleEndian(), false)
.dump(OS);
if (shouldDump(Explicit, ".debug_pubtypes", DIDT_ID_DebugPubtypes,
- DObj->getPubTypesSection()))
- DWARFDebugPubTable(DObj->getPubTypesSection(), isLittleEndian(), false)
+ DObj->getPubTypesSection().Data))
+ DWARFDebugPubTable(*DObj, DObj->getPubTypesSection(), isLittleEndian(), false)
.dump(OS);
if (shouldDump(Explicit, ".debug_gnu_pubnames", DIDT_ID_DebugGnuPubnames,
- DObj->getGnuPubNamesSection()))
- DWARFDebugPubTable(DObj->getGnuPubNamesSection(), isLittleEndian(),
+ DObj->getGnuPubNamesSection().Data))
+ DWARFDebugPubTable(*DObj, DObj->getGnuPubNamesSection(), isLittleEndian(),
true /* GnuStyle */)
.dump(OS);
if (shouldDump(Explicit, ".debug_gnu_pubtypes", DIDT_ID_DebugGnuPubtypes,
- DObj->getGnuPubTypesSection()))
- DWARFDebugPubTable(DObj->getGnuPubTypesSection(), isLittleEndian(),
+ DObj->getGnuPubTypesSection().Data))
+ DWARFDebugPubTable(*DObj, DObj->getGnuPubTypesSection(), isLittleEndian(),
true /* GnuStyle */)
.dump(OS);
if (shouldDump(Explicit, ".debug_str_offsets", DIDT_ID_DebugStrOffsets,
DObj->getStringOffsetSection().Data))
- dumpStringOffsetsSection(
- OS, "debug_str_offsets", *DObj, DObj->getStringOffsetSection(),
- DObj->getStringSection(), compile_units(), type_unit_sections(),
- isLittleEndian(), getMaxVersion());
+ dumpStringOffsetsSection(OS, "debug_str_offsets", *DObj,
+ DObj->getStringOffsetSection(),
+ DObj->getStringSection(), normal_units(),
+ isLittleEndian(), getMaxVersion());
if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets,
DObj->getStringOffsetDWOSection().Data))
- dumpStringOffsetsSection(
- OS, "debug_str_offsets.dwo", *DObj, DObj->getStringOffsetDWOSection(),
- DObj->getStringDWOSection(), dwo_compile_units(),
- dwo_type_unit_sections(), isLittleEndian(), getMaxVersion());
+ dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", *DObj,
+ DObj->getStringOffsetDWOSection(),
+ DObj->getStringDWOSection(), dwo_units(),
+ isLittleEndian(), getMaxDWOVersion());
- if (shouldDump(Explicit, ".gnu_index", DIDT_ID_GdbIndex,
+ if (shouldDump(Explicit, ".gdb_index", DIDT_ID_GdbIndex,
DObj->getGdbIndexSection())) {
getGdbIndex().dump(OS);
}
@@ -584,11 +611,12 @@ void DWARFContext::dump(
}
DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
- DWOCUs.parseDWO(*this, DObj->getInfoDWOSection(), true);
+ parseDWOUnits(LazyParse);
if (const auto &CUI = getCUIndex()) {
if (const auto *R = CUI.getFromHash(Hash))
- return DWOCUs.getUnitForIndexEntry(*R);
+ return dyn_cast_or_null<DWARFCompileUnit>(
+ DWOUnits.getUnitForIndexEntry(*R));
return nullptr;
}
@@ -607,14 +635,14 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
continue;
}
if (DWOCU->getDWOId() == Hash)
- return DWOCU.get();
+ return dyn_cast<DWARFCompileUnit>(DWOCU.get());
}
return nullptr;
}
DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) {
- parseCompileUnits();
- if (auto *CU = CUs.getUnitForOffset(Offset))
+ parseNormalUnits();
+ if (auto *CU = NormalUnits.getUnitForOffset(Offset))
return CU->getDIEForOffset(Offset);
return DWARFDie();
}
@@ -690,26 +718,28 @@ const DWARFDebugLoc *DWARFContext::getDebugLoc() {
return Loc.get();
Loc.reset(new DWARFDebugLoc);
- // Assume all compile units have the same address byte size.
+ // Assume all units have the same address byte size.
if (getNumCompileUnits()) {
DWARFDataExtractor LocData(*DObj, DObj->getLocSection(), isLittleEndian(),
- getCompileUnitAtIndex(0)->getAddressByteSize());
+ getUnitAtIndex(0)->getAddressByteSize());
Loc->parse(LocData);
}
return Loc.get();
}
-const DWARFDebugLocDWO *DWARFContext::getDebugLocDWO() {
+const DWARFDebugLoclists *DWARFContext::getDebugLocDWO() {
if (LocDWO)
return LocDWO.get();
- LocDWO.reset(new DWARFDebugLocDWO());
+ LocDWO.reset(new DWARFDebugLoclists());
// Assume all compile units have the same address byte size.
- if (getNumCompileUnits()) {
- DataExtractor LocData(DObj->getLocDWOSection().Data, isLittleEndian(),
- getCompileUnitAtIndex(0)->getAddressByteSize());
- LocDWO->parse(LocData);
- }
+ // FIXME: We don't need AddressSize for split DWARF since relocatable
+ // addresses cannot appear there. At the moment DWARFExpression requires it.
+ DataExtractor LocData(DObj->getLocDWOSection().Data, isLittleEndian(), 4);
+ // Use version 4. DWO does not support the DWARF v5 .debug_loclists yet and
+ // that means we are parsing the new style .debug_loc (pre-standatized version
+ // of the .debug_loclists).
+ LocDWO->parse(LocData, 4 /* Version */);
return LocDWO.get();
}
@@ -737,7 +767,7 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() {
// http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html
DWARFDataExtractor debugFrameData(DObj->getDebugFrameSection(),
isLittleEndian(), DObj->getAddressSize());
- DebugFrame.reset(new DWARFDebugFrame(false /* IsEH */));
+ DebugFrame.reset(new DWARFDebugFrame(getArch(), false /* IsEH */));
DebugFrame->parse(debugFrameData);
return DebugFrame.get();
}
@@ -748,7 +778,7 @@ const DWARFDebugFrame *DWARFContext::getEHFrame() {
DWARFDataExtractor debugFrameData(DObj->getEHFrameSection(), isLittleEndian(),
DObj->getAddressSize());
- DebugFrame.reset(new DWARFDebugFrame(true /* IsEH */));
+ DebugFrame.reset(new DWARFDebugFrame(getArch(), true /* IsEH */));
DebugFrame->parse(debugFrameData);
return DebugFrame.get();
}
@@ -806,9 +836,9 @@ const AppleAcceleratorTable &DWARFContext::getAppleObjC() {
const DWARFDebugLine::LineTable *
DWARFContext::getLineTableForUnit(DWARFUnit *U) {
Expected<const DWARFDebugLine::LineTable *> ExpectedLineTable =
- getLineTableForUnit(U, DWARFDebugLine::warn);
+ getLineTableForUnit(U, dumpWarning);
if (!ExpectedLineTable) {
- DWARFDebugLine::warn(ExpectedLineTable.takeError());
+ dumpWarning(ExpectedLineTable.takeError());
return nullptr;
}
return *ExpectedLineTable;
@@ -843,35 +873,34 @@ Expected<const DWARFDebugLine::LineTable *> DWARFContext::getLineTableForUnit(
RecoverableErrorCallback);
}
-void DWARFContext::parseCompileUnits() {
- CUs.parse(*this, DObj->getInfoSection());
-}
-
-void DWARFContext::parseTypeUnits() {
- if (!TUs.empty())
+void DWARFContext::parseNormalUnits() {
+ if (!NormalUnits.empty())
return;
+ DObj->forEachInfoSections([&](const DWARFSection &S) {
+ NormalUnits.addUnitsForSection(*this, S, DW_SECT_INFO);
+ });
+ NormalUnits.finishedInfoUnits();
DObj->forEachTypesSections([&](const DWARFSection &S) {
- TUs.emplace_back();
- TUs.back().parse(*this, S);
+ NormalUnits.addUnitsForSection(*this, S, DW_SECT_TYPES);
});
}
-void DWARFContext::parseDWOCompileUnits() {
- DWOCUs.parseDWO(*this, DObj->getInfoDWOSection());
-}
-
-void DWARFContext::parseDWOTypeUnits() {
- if (!DWOTUs.empty())
+void DWARFContext::parseDWOUnits(bool Lazy) {
+ if (!DWOUnits.empty())
return;
+ DObj->forEachInfoDWOSections([&](const DWARFSection &S) {
+ DWOUnits.addUnitsForDWOSection(*this, S, DW_SECT_INFO, Lazy);
+ });
+ DWOUnits.finishedInfoUnits();
DObj->forEachTypesDWOSections([&](const DWARFSection &S) {
- DWOTUs.emplace_back();
- DWOTUs.back().parseDWO(*this, S);
+ DWOUnits.addUnitsForDWOSection(*this, S, DW_SECT_TYPES, Lazy);
});
}
DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
- parseCompileUnits();
- return CUs.getUnitForOffset(Offset);
+ parseNormalUnits();
+ return dyn_cast_or_null<DWARFCompileUnit>(
+ NormalUnits.getUnitForOffset(Offset));
}
DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
@@ -1213,19 +1242,20 @@ class DWARFObjInMemory final : public DWARFObject {
const object::ObjectFile *Obj = nullptr;
std::vector<SectionName> SectionNames;
- using TypeSectionMap = MapVector<object::SectionRef, DWARFSectionMap,
+ using InfoSectionMap = MapVector<object::SectionRef, DWARFSectionMap,
std::map<object::SectionRef, unsigned>>;
- TypeSectionMap TypesSections;
- TypeSectionMap TypesDWOSections;
+ InfoSectionMap InfoSections;
+ InfoSectionMap TypesSections;
+ InfoSectionMap InfoDWOSections;
+ InfoSectionMap TypesDWOSections;
- DWARFSectionMap InfoSection;
DWARFSectionMap LocSection;
+ DWARFSectionMap LocListsSection;
DWARFSectionMap LineSection;
DWARFSectionMap RangeSection;
DWARFSectionMap RnglistsSection;
DWARFSectionMap StringOffsetSection;
- DWARFSectionMap InfoDWOSection;
DWARFSectionMap LineDWOSection;
DWARFSectionMap LocDWOSection;
DWARFSectionMap StringOffsetDWOSection;
@@ -1237,16 +1267,19 @@ class DWARFObjInMemory final : public DWARFObject {
DWARFSectionMap AppleNamespacesSection;
DWARFSectionMap AppleObjCSection;
DWARFSectionMap DebugNamesSection;
+ DWARFSectionMap PubNamesSection;
+ DWARFSectionMap PubTypesSection;
+ DWARFSectionMap GnuPubNamesSection;
+ DWARFSectionMap GnuPubTypesSection;
DWARFSectionMap *mapNameToDWARFSection(StringRef Name) {
return StringSwitch<DWARFSectionMap *>(Name)
- .Case("debug_info", &InfoSection)
.Case("debug_loc", &LocSection)
+ .Case("debug_loclists", &LocListsSection)
.Case("debug_line", &LineSection)
.Case("debug_str_offsets", &StringOffsetSection)
.Case("debug_ranges", &RangeSection)
.Case("debug_rnglists", &RnglistsSection)
- .Case("debug_info.dwo", &InfoDWOSection)
.Case("debug_loc.dwo", &LocDWOSection)
.Case("debug_line.dwo", &LineDWOSection)
.Case("debug_names", &DebugNamesSection)
@@ -1254,6 +1287,10 @@ class DWARFObjInMemory final : public DWARFObject {
.Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
.Case("debug_addr", &AddrSection)
.Case("apple_names", &AppleNamesSection)
+ .Case("debug_pubnames", &PubNamesSection)
+ .Case("debug_pubtypes", &PubTypesSection)
+ .Case("debug_gnu_pubnames", &GnuPubNamesSection)
+ .Case("debug_gnu_pubtypes", &GnuPubTypesSection)
.Case("apple_types", &AppleTypesSection)
.Case("apple_namespaces", &AppleNamespacesSection)
.Case("apple_namespac", &AppleNamespacesSection)
@@ -1267,12 +1304,8 @@ class DWARFObjInMemory final : public DWARFObject {
StringRef EHFrameSection;
StringRef StringSection;
StringRef MacinfoSection;
- StringRef PubNamesSection;
- StringRef PubTypesSection;
- StringRef GnuPubNamesSection;
StringRef AbbrevDWOSection;
StringRef StringDWOSection;
- StringRef GnuPubTypesSection;
StringRef CUIndexSection;
StringRef GdbIndexSection;
StringRef TUIndexSection;
@@ -1292,10 +1325,6 @@ class DWARFObjInMemory final : public DWARFObject {
.Case("eh_frame", &EHFrameSection)
.Case("debug_str", &StringSection)
.Case("debug_macinfo", &MacinfoSection)
- .Case("debug_pubnames", &PubNamesSection)
- .Case("debug_pubtypes", &PubTypesSection)
- .Case("debug_gnu_pubnames", &GnuPubNamesSection)
- .Case("debug_gnu_pubtypes", &GnuPubTypesSection)
.Case("debug_abbrev.dwo", &AbbrevDWOSection)
.Case("debug_str.dwo", &StringDWOSection)
.Case("debug_cu_index", &CUIndexSection)
@@ -1335,6 +1364,16 @@ public:
for (const auto &SecIt : Sections) {
if (StringRef *SectionData = mapSectionToMember(SecIt.first()))
*SectionData = SecIt.second->getBuffer();
+ else if (SecIt.first() == "debug_info")
+ // Find debug_info and debug_types data by section rather than name as
+ // there are multiple, comdat grouped, of these sections.
+ InfoSections[SectionRef()].Data = SecIt.second->getBuffer();
+ else if (SecIt.first() == "debug_info.dwo")
+ InfoDWOSections[SectionRef()].Data = SecIt.second->getBuffer();
+ else if (SecIt.first() == "debug_types")
+ TypesSections[SectionRef()].Data = SecIt.second->getBuffer();
+ else if (SecIt.first() == "debug_types.dwo")
+ TypesDWOSections[SectionRef()].Data = SecIt.second->getBuffer();
}
}
DWARFObjInMemory(const object::ObjectFile &Obj, const LoadedObjectInfo *L,
@@ -1389,9 +1428,13 @@ public:
// FIXME: Use the other dwo range section when we emit it.
RangeDWOSection.Data = Data;
}
+ } else if (Name == "debug_info") {
+ // Find debug_info and debug_types data by section rather than name as
+ // there are multiple, comdat grouped, of these sections.
+ InfoSections[Section].Data = Data;
+ } else if (Name == "debug_info.dwo") {
+ InfoDWOSections[Section].Data = Data;
} else if (Name == "debug_types") {
- // Find debug_types data by section rather than name as there are
- // multiple, comdat grouped, debug_types sections.
TypesSections[Section].Data = Data;
} else if (Name == "debug_types.dwo") {
TypesDWOSections[Section].Data = Data;
@@ -1426,9 +1469,16 @@ public:
DWARFSectionMap *Sec = mapNameToDWARFSection(RelSecName);
RelocAddrMap *Map = Sec ? &Sec->Relocs : nullptr;
if (!Map) {
- // Find debug_types relocs by section rather than name as there are
- // multiple, comdat grouped, debug_types sections.
- if (RelSecName == "debug_types")
+ // Find debug_info and debug_types relocs by section rather than name
+ // as there are multiple, comdat grouped, of these sections.
+ if (RelSecName == "debug_info")
+ Map = &static_cast<DWARFSectionMap &>(InfoSections[*RelocatedSection])
+ .Relocs;
+ else if (RelSecName == "debug_info.dwo")
+ Map = &static_cast<DWARFSectionMap &>(
+ InfoDWOSections[*RelocatedSection])
+ .Relocs;
+ else if (RelSecName == "debug_types")
Map =
&static_cast<DWARFSectionMap &>(TypesSections[*RelocatedSection])
.Relocs;
@@ -1526,8 +1576,10 @@ public:
StringRef getLineStringSection() const override { return LineStringSection; }
// Sections for DWARF5 split dwarf proposal.
- const DWARFSection &getInfoDWOSection() const override {
- return InfoDWOSection;
+ void forEachInfoDWOSections(
+ function_ref<void(const DWARFSection &)> F) const override {
+ for (auto &P : InfoDWOSections)
+ F(P.second);
}
void forEachTypesDWOSections(
function_ref<void(const DWARFSection &)> F) const override {
@@ -1537,6 +1589,7 @@ public:
StringRef getAbbrevSection() const override { return AbbrevSection; }
const DWARFSection &getLocSection() const override { return LocSection; }
+ const DWARFSection &getLoclistsSection() const override { return LocListsSection; }
StringRef getARangeSection() const override { return ARangeSection; }
StringRef getDebugFrameSection() const override { return DebugFrameSection; }
StringRef getEHFrameSection() const override { return EHFrameSection; }
@@ -1547,12 +1600,12 @@ public:
return RnglistsSection;
}
StringRef getMacinfoSection() const override { return MacinfoSection; }
- StringRef getPubNamesSection() const override { return PubNamesSection; }
- StringRef getPubTypesSection() const override { return PubTypesSection; }
- StringRef getGnuPubNamesSection() const override {
+ const DWARFSection &getPubNamesSection() const override { return PubNamesSection; }
+ const DWARFSection &getPubTypesSection() const override { return PubTypesSection; }
+ const DWARFSection &getGnuPubNamesSection() const override {
return GnuPubNamesSection;
}
- StringRef getGnuPubTypesSection() const override {
+ const DWARFSection &getGnuPubTypesSection() const override {
return GnuPubTypesSection;
}
const DWARFSection &getAppleNamesSection() const override {
@@ -1573,7 +1626,11 @@ public:
StringRef getFileName() const override { return FileName; }
uint8_t getAddressSize() const override { return AddressSize; }
- const DWARFSection &getInfoSection() const override { return InfoSection; }
+ void forEachInfoSections(
+ function_ref<void(const DWARFSection &)> F) const override {
+ for (auto &P : InfoSections)
+ F(P.second);
+ }
void forEachTypesSections(
function_ref<void(const DWARFSection &)> F) const override {
for (auto &P : TypesSections)
@@ -1609,7 +1666,8 @@ Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) {
const Target *TheTarget =
TargetRegistry::lookupTarget(TT.str(), TargetLookupError);
if (!TargetLookupError.empty())
- return make_error<StringError>(TargetLookupError, inconvertibleErrorCode());
+ return createStringError(errc::invalid_argument,
+ TargetLookupError.c_str());
RegInfo.reset(TheTarget->createMCRegInfo(TT.str()));
return Error::success();
}
@@ -1627,3 +1685,9 @@ uint8_t DWARFContext::getCUAddrSize() {
}
return Addr;
}
+
+void DWARFContext::dumpWarning(Error Warning) {
+ handleAllErrors(std::move(Warning), [](ErrorInfoBase &Info) {
+ WithColor::warning() << Info.message() << '\n';
+ });
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
index 7085ca067ba6..22759bfac26c 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -148,7 +148,7 @@ void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
HeaderData.Length, HeaderData.Version, HeaderData.AddrSize,
HeaderData.SegSize);
- static const char *Fmt32 = "0x%8.8" PRIx32;
+ static const char *Fmt32 = "0x%8.8" PRIx64;
static const char *Fmt64 = "0x%16.16" PRIx64;
std::string AddrFmt = "\n";
std::string AddrFmtVerbose = " => ";
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index 19bfcaed2021..e8c5dec821b4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -12,6 +12,7 @@
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/WithColor.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -53,10 +54,12 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) {
for (const auto &CU : CTX->compile_units()) {
uint32_t CUOffset = CU->getOffset();
if (ParsedCUOffsets.insert(CUOffset).second) {
- DWARFAddressRangesVector CURanges;
- CU->collectAddressRanges(CURanges);
- for (const auto &R : CURanges)
- appendRange(CUOffset, R.LowPC, R.HighPC);
+ Expected<DWARFAddressRangesVector> CURanges = CU->collectAddressRanges();
+ if (!CURanges)
+ WithColor::error() << toString(CURanges.takeError()) << '\n';
+ else
+ for (const auto &R : *CURanges)
+ appendRange(CUOffset, R.LowPC, R.HighPC);
}
}
@@ -80,7 +83,7 @@ void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC,
void DWARFDebugAranges::construct() {
std::multiset<uint32_t> ValidCUs; // Maintain the set of CUs describing
// a current address range.
- llvm::sort(Endpoints.begin(), Endpoints.end());
+ llvm::sort(Endpoints);
uint64_t PrevAddress = -1ULL;
for (const auto &E : Endpoints) {
if (PrevAddress < E.Address && !ValidCUs.empty()) {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 73333395f4c1..ba55ffc28174 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -47,9 +48,9 @@ Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset,
uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK;
switch (Primary) {
default:
- return make_error<StringError>(
- "Invalid primary CFI opcode",
- std::make_error_code(std::errc::illegal_byte_sequence));
+ return createStringError(errc::illegal_byte_sequence,
+ "Invalid primary CFI opcode 0x%" PRIx8,
+ Primary);
case DW_CFA_advance_loc:
case DW_CFA_restore:
addInstruction(Primary, Op1);
@@ -62,9 +63,9 @@ Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset,
// Extended opcode - its value is Opcode itself.
switch (Opcode) {
default:
- return make_error<StringError>(
- "Invalid extended CFI opcode",
- std::make_error_code(std::errc::illegal_byte_sequence));
+ return createStringError(errc::illegal_byte_sequence,
+ "Invalid extended CFI opcode 0x%" PRIx8,
+ Opcode);
case DW_CFA_nop:
case DW_CFA_remember_state:
case DW_CFA_restore_state:
@@ -224,7 +225,7 @@ void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI,
switch (Type) {
case OT_Unset: {
OS << " Unsupported " << (OperandIdx ? "second" : "first") << " operand to";
- auto OpcodeName = CallFrameString(Opcode);
+ auto OpcodeName = CallFrameString(Opcode, Arch);
if (!OpcodeName.empty())
OS << " " << OpcodeName;
else
@@ -278,7 +279,7 @@ void CFIProgram::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
OS.indent(2 * IndentLevel);
- OS << CallFrameString(Opcode) << ":";
+ OS << CallFrameString(Opcode, Arch) << ":";
for (unsigned i = 0; i < Instr.Ops.size(); ++i)
printOperand(OS, MRI, IsEH, Instr, i, Instr.Ops[i]);
OS << '\n';
@@ -324,8 +325,9 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
OS << "\n";
}
-DWARFDebugFrame::DWARFDebugFrame(bool IsEH, uint64_t EHFrameAddress)
- : IsEH(IsEH), EHFrameAddress(EHFrameAddress) {}
+DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch,
+ bool IsEH, uint64_t EHFrameAddress)
+ : Arch(Arch), IsEH(IsEH), EHFrameAddress(EHFrameAddress) {}
DWARFDebugFrame::~DWARFDebugFrame() = default;
@@ -395,7 +397,8 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
uint8_t SegmentDescriptorSize = Version < 4 ? 0 : Data.getU8(&Offset);
uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset);
int64_t DataAlignmentFactor = Data.getSLEB128(&Offset);
- uint64_t ReturnAddressRegister = Data.getULEB128(&Offset);
+ uint64_t ReturnAddressRegister =
+ Version == 1 ? Data.getU8(&Offset) : Data.getULEB128(&Offset);
// Parse the augmentation data for EH CIEs
StringRef AugmentationData("");
@@ -443,6 +446,11 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
StartAugmentationOffset = Offset;
EndAugmentationOffset = Offset +
static_cast<uint32_t>(*AugmentationLength);
+ break;
+ case 'B':
+ // B-Key is used for signing functions associated with this
+ // augmentation string
+ break;
}
}
@@ -459,7 +467,7 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
StartOffset, Length, Version, AugmentationString, AddressSize,
SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor,
ReturnAddressRegister, AugmentationData, FDEPointerEncoding,
- LSDAPointerEncoding, Personality, PersonalityEncoding);
+ LSDAPointerEncoding, Personality, PersonalityEncoding, Arch);
CIEs[StartOffset] = Cie.get();
Entries.emplace_back(std::move(Cie));
} else {
@@ -511,7 +519,7 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer,
InitialLocation, AddressRange,
- Cie, LSDAAddress));
+ Cie, LSDAAddress, Arch));
}
if (Error E =
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 53a8e193ef56..1d621ff244f3 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -15,6 +15,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/WithColor.h"
@@ -273,24 +274,6 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
return true;
}
-template <typename... Ts>
-static std::string formatErrorString(char const *Fmt, const Ts &... Vals) {
- std::string Buffer;
- raw_string_ostream Stream(Buffer);
- Stream << format(Fmt, Vals...);
- return Stream.str();
-}
-
-template <typename... Ts>
-static Error createError(char const *Fmt, const Ts &... Vals) {
- return make_error<StringError>(formatErrorString(Fmt, Vals...),
- inconvertibleErrorCode());
-}
-
-static Error createError(char const *Msg) {
- return make_error<StringError>(Msg, inconvertibleErrorCode());
-}
-
Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
uint32_t *OffsetPtr,
const DWARFContext &Ctx,
@@ -303,14 +286,15 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
FormParams.Format = dwarf::DWARF64;
TotalLength = DebugLineData.getU64(OffsetPtr);
} else if (TotalLength >= 0xffffff00) {
- return createError(
+ return createStringError(errc::invalid_argument,
"parsing line table prologue at offset 0x%8.8" PRIx64
" unsupported reserved unit length found of value 0x%8.8" PRIx64,
PrologueOffset, TotalLength);
}
FormParams.Version = DebugLineData.getU16(OffsetPtr);
if (getVersion() < 2)
- return createError("parsing line table prologue at offset 0x%8.8" PRIx64
+ return createStringError(errc::not_supported,
+ "parsing line table prologue at offset 0x%8.8" PRIx64
" found unsupported version 0x%2.2" PRIx16,
PrologueOffset, getVersion());
@@ -342,7 +326,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset,
FormParams, Ctx, U, ContentTypes,
IncludeDirectories, FileNames)) {
- return createError(
+ return createStringError(errc::invalid_argument,
"parsing line table prologue at 0x%8.8" PRIx64
" found an invalid directory or file table description at"
" 0x%8.8" PRIx64,
@@ -353,7 +337,8 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
ContentTypes, IncludeDirectories, FileNames);
if (*OffsetPtr != EndPrologueOffset)
- return createError("parsing line table prologue at 0x%8.8" PRIx64
+ return createStringError(errc::invalid_argument,
+ "parsing line table prologue at 0x%8.8" PRIx64
" should have ended at 0x%8.8" PRIx64
" but it ended at 0x%8.8" PRIx64,
PrologueOffset, EndPrologueOffset, (uint64_t)*OffsetPtr);
@@ -470,7 +455,7 @@ Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable(
DWARFDataExtractor &DebugLineData, uint32_t Offset, const DWARFContext &Ctx,
const DWARFUnit *U, std::function<void(Error)> RecoverableErrorCallback) {
if (!DebugLineData.isValidOffset(Offset))
- return createError("offset 0x%8.8" PRIx32
+ return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx32
" is not a valid debug line section offset",
Offset);
@@ -575,7 +560,8 @@ Error DWARFDebugLine::LineTable::parse(
if (DebugLineData.getAddressSize() == 0)
DebugLineData.setAddressSize(Len - 1);
else if (DebugLineData.getAddressSize() != Len - 1) {
- return createError("mismatching address size at offset 0x%8.8" PRIx32
+ return createStringError(errc::invalid_argument,
+ "mismatching address size at offset 0x%8.8" PRIx32
" expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64,
ExtOffset, DebugLineData.getAddressSize(),
Len - 1);
@@ -640,7 +626,8 @@ Error DWARFDebugLine::LineTable::parse(
// Make sure the stated and parsed lengths are the same.
// Otherwise we have an unparseable line-number program.
if (*OffsetPtr - ExtOffset != Len)
- return createError("unexpected line op length at offset 0x%8.8" PRIx32
+ return createStringError(errc::illegal_byte_sequence,
+ "unexpected line op length at offset 0x%8.8" PRIx32
" expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx32,
ExtOffset, Len, *OffsetPtr - ExtOffset);
} else if (Opcode < Prologue.OpcodeBase) {
@@ -847,11 +834,12 @@ Error DWARFDebugLine::LineTable::parse(
if (!State.Sequence.Empty)
RecoverableErrorCallback(
- createError("last sequence in debug line table is not terminated!"));
+ createStringError(errc::illegal_byte_sequence,
+ "last sequence in debug line table is not terminated!"));
// Sort all sequences so that address lookup will work faster.
if (!Sequences.empty()) {
- llvm::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
+ llvm::sort(Sequences, Sequence::orderByLowPC);
// Note: actually, instruction address ranges of sequences should not
// overlap (in shared objects and executables). If they do, the address
// lookup would still work, though, but result would be ambiguous.
@@ -1047,17 +1035,16 @@ bool DWARFDebugLine::LineTable::getFileLineInfoForAddress(
// line-table section.
static DWARFDebugLine::SectionParser::LineToUnitMap
buildLineToUnitMap(DWARFDebugLine::SectionParser::cu_range CUs,
- DWARFDebugLine::SectionParser::tu_range TUSections) {
+ DWARFDebugLine::SectionParser::tu_range TUs) {
DWARFDebugLine::SectionParser::LineToUnitMap LineToUnit;
for (const auto &CU : CUs)
if (auto CUDIE = CU->getUnitDIE())
if (auto StmtOffset = toSectionOffset(CUDIE.find(DW_AT_stmt_list)))
LineToUnit.insert(std::make_pair(*StmtOffset, &*CU));
- for (const auto &TUS : TUSections)
- for (const auto &TU : TUS)
- if (auto TUDIE = TU->getUnitDIE())
- if (auto StmtOffset = toSectionOffset(TUDIE.find(DW_AT_stmt_list)))
- LineToUnit.insert(std::make_pair(*StmtOffset, &*TU));
+ for (const auto &TU : TUs)
+ if (auto TUDIE = TU->getUnitDIE())
+ if (auto StmtOffset = toSectionOffset(TUDIE.find(DW_AT_stmt_list)))
+ LineToUnit.insert(std::make_pair(*StmtOffset, &*TU));
return LineToUnit;
}
@@ -1125,9 +1112,3 @@ void DWARFDebugLine::SectionParser::moveToNextTable(uint32_t OldOffset,
Done = true;
}
}
-
-void DWARFDebugLine::warn(Error Err) {
- handleAllErrors(std::move(Err), [](ErrorInfoBase &Info) {
- WithColor::warning() << Info.message() << '\n';
- });
-}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index 617b914ecce9..f8b5ff6ec8fb 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -124,7 +124,7 @@ DWARFDebugLoc::parseOneLocationList(DWARFDataExtractor Data, unsigned *Offset) {
StringRef str = Data.getData().substr(*Offset, Bytes);
*Offset += Bytes;
E.Loc.reserve(str.size());
- std::copy(str.begin(), str.end(), std::back_inserter(E.Loc));
+ llvm::copy(str, std::back_inserter(E.Loc));
LL.Entries.push_back(std::move(E));
}
}
@@ -144,51 +144,74 @@ void DWARFDebugLoc::parse(const DWARFDataExtractor &data) {
WithColor::error() << "failed to consume entire .debug_loc section\n";
}
-Optional<DWARFDebugLocDWO::LocationList>
-DWARFDebugLocDWO::parseOneLocationList(DataExtractor Data, unsigned *Offset) {
+Optional<DWARFDebugLoclists::LocationList>
+DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset,
+ unsigned Version) {
LocationList LL;
LL.Offset = *Offset;
// dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list.
while (auto Kind =
static_cast<dwarf::LocationListEntry>(Data.getU8(Offset))) {
- if (Kind != dwarf::DW_LLE_startx_length) {
+
+ Entry E;
+ E.Kind = Kind;
+ switch (Kind) {
+ case dwarf::DW_LLE_startx_length:
+ E.Value0 = Data.getULEB128(Offset);
+ // Pre-DWARF 5 has different interpretation of the length field. We have
+ // to support both pre- and standartized styles for the compatibility.
+ if (Version < 5)
+ E.Value1 = Data.getU32(Offset);
+ else
+ E.Value1 = Data.getULEB128(Offset);
+ break;
+ case dwarf::DW_LLE_start_length:
+ E.Value0 = Data.getAddress(Offset);
+ E.Value1 = Data.getULEB128(Offset);
+ break;
+ case dwarf::DW_LLE_offset_pair:
+ E.Value0 = Data.getULEB128(Offset);
+ E.Value1 = Data.getULEB128(Offset);
+ break;
+ case dwarf::DW_LLE_base_address:
+ E.Value0 = Data.getAddress(Offset);
+ break;
+ default:
WithColor::error() << "dumping support for LLE of kind " << (int)Kind
<< " not implemented\n";
return None;
}
- Entry E;
- E.Start = Data.getULEB128(Offset);
- E.Length = Data.getU32(Offset);
-
- unsigned Bytes = Data.getU16(Offset);
- // A single location description describing the location of the object...
- StringRef str = Data.getData().substr(*Offset, Bytes);
- *Offset += Bytes;
- E.Loc.resize(str.size());
- std::copy(str.begin(), str.end(), E.Loc.begin());
+ if (Kind != dwarf::DW_LLE_base_address) {
+ unsigned Bytes = Data.getU16(Offset);
+ // A single location description describing the location of the object...
+ StringRef str = Data.getData().substr(*Offset, Bytes);
+ *Offset += Bytes;
+ E.Loc.resize(str.size());
+ llvm::copy(str, E.Loc.begin());
+ }
LL.Entries.push_back(std::move(E));
}
return LL;
}
-void DWARFDebugLocDWO::parse(DataExtractor data) {
+void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) {
IsLittleEndian = data.isLittleEndian();
AddressSize = data.getAddressSize();
uint32_t Offset = 0;
while (data.isValidOffset(Offset)) {
- if (auto LL = parseOneLocationList(data, &Offset))
+ if (auto LL = parseOneLocationList(data, &Offset, Version))
Locations.push_back(std::move(*LL));
else
return;
}
}
-DWARFDebugLocDWO::LocationList const *
-DWARFDebugLocDWO::getLocationListAtOffset(uint64_t Offset) const {
+DWARFDebugLoclists::LocationList const *
+DWARFDebugLoclists::getLocationListAtOffset(uint64_t Offset) const {
auto It = std::lower_bound(
Locations.begin(), Locations.end(), Offset,
[](const LocationList &L, uint64_t Offset) { return L.Offset < Offset; });
@@ -197,23 +220,49 @@ DWARFDebugLocDWO::getLocationListAtOffset(uint64_t Offset) const {
return nullptr;
}
-void DWARFDebugLocDWO::LocationList::dump(raw_ostream &OS, bool IsLittleEndian,
- unsigned AddressSize,
- const MCRegisterInfo *MRI,
- unsigned Indent) const {
+void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
+ bool IsLittleEndian,
+ unsigned AddressSize,
+ const MCRegisterInfo *MRI,
+ unsigned Indent) const {
for (const Entry &E : Entries) {
- OS << '\n';
- OS.indent(Indent);
- OS << "Addr idx " << E.Start << " (w/ length " << E.Length << "): ";
+ switch (E.Kind) {
+ case dwarf::DW_LLE_startx_length:
+ OS << '\n';
+ OS.indent(Indent);
+ OS << "Addr idx " << E.Value0 << " (w/ length " << E.Value1 << "): ";
+ break;
+ case dwarf::DW_LLE_start_length:
+ OS << '\n';
+ OS.indent(Indent);
+ OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2,
+ AddressSize * 2, E.Value0, AddressSize * 2, AddressSize * 2,
+ E.Value0 + E.Value1);
+ break;
+ case dwarf::DW_LLE_offset_pair:
+ OS << '\n';
+ OS.indent(Indent);
+ OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2,
+ AddressSize * 2, BaseAddr + E.Value0, AddressSize * 2,
+ AddressSize * 2, BaseAddr + E.Value1);
+ break;
+ case dwarf::DW_LLE_base_address:
+ BaseAddr = E.Value0;
+ break;
+ default:
+ llvm_unreachable("unreachable locations list kind");
+ }
+
dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI);
}
}
-void DWARFDebugLocDWO::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
- Optional<uint64_t> Offset) const {
+void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr,
+ const MCRegisterInfo *MRI,
+ Optional<uint64_t> Offset) const {
auto DumpLocationList = [&](const LocationList &L) {
OS << format("0x%8.8x: ", L.Offset);
- L.dump(OS, IsLittleEndian, AddressSize, MRI, /*Indent=*/12);
+ L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, /*Indent=*/12);
OS << "\n\n";
};
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
index de8b6e543fab..abd1ad59a9c1 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Support/DataExtractor.h"
@@ -18,10 +19,11 @@
using namespace llvm;
using namespace dwarf;
-DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian,
- bool GnuStyle)
+DWARFDebugPubTable::DWARFDebugPubTable(const DWARFObject &Obj,
+ const DWARFSection &Sec,
+ bool LittleEndian, bool GnuStyle)
: GnuStyle(GnuStyle) {
- DataExtractor PubNames(Data, LittleEndian, 0);
+ DWARFDataExtractor PubNames(Obj, Sec, LittleEndian, 0);
uint32_t Offset = 0;
while (PubNames.isValidOffset(Offset)) {
Sets.push_back({});
@@ -29,10 +31,10 @@ DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian,
SetData.Length = PubNames.getU32(&Offset);
SetData.Version = PubNames.getU16(&Offset);
- SetData.Offset = PubNames.getU32(&Offset);
+ SetData.Offset = PubNames.getRelocatedValue(4, &Offset);
SetData.Size = PubNames.getU32(&Offset);
- while (Offset < Data.size()) {
+ while (Offset < Sec.Data.size()) {
uint32_t DieRef = PubNames.getU32(&Offset);
if (DieRef == 0)
break;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index a565718debd0..dfb913000a46 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cinttypes>
@@ -16,15 +17,6 @@
using namespace llvm;
-// FIXME: There are several versions of this. Consolidate them.
-template <typename... Ts>
-static Error createError(char const *Fmt, const Ts &... Vals) {
- std::string Buffer;
- raw_string_ostream Stream(Buffer);
- Stream << format(Fmt, Vals...);
- return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
-}
-
void DWARFDebugRangeList::clear() {
Offset = -1U;
AddressSize = 0;
@@ -35,11 +27,13 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
uint32_t *offset_ptr) {
clear();
if (!data.isValidOffset(*offset_ptr))
- return createError("invalid range list offset 0x%" PRIx32, *offset_ptr);
+ return createStringError(errc::invalid_argument,
+ "invalid range list offset 0x%" PRIx32, *offset_ptr);
AddressSize = data.getAddressSize();
if (AddressSize != 4 && AddressSize != 8)
- return createError("invalid address size: %d", AddressSize);
+ return createStringError(errc::invalid_argument,
+ "invalid address size: %" PRIu8, AddressSize);
Offset = *offset_ptr;
while (true) {
RangeListEntry Entry;
@@ -53,7 +47,8 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
// Check that both values were extracted correctly.
if (*offset_ptr != prev_offset + 2 * AddressSize) {
clear();
- return createError("invalid range list entry at offset 0x%" PRIx32,
+ return createStringError(errc::invalid_argument,
+ "invalid range list entry at offset 0x%" PRIx32,
prev_offset);
}
if (Entry.isEndOfListEntry())
@@ -74,7 +69,7 @@ void DWARFDebugRangeList::dump(raw_ostream &OS) const {
}
DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges(
- llvm::Optional<BaseAddress> BaseAddr) const {
+ llvm::Optional<SectionedAddress> BaseAddr) const {
DWARFAddressRangesVector Res;
for (const RangeListEntry &RLE : Entries) {
if (RLE.isBaseAddressSelectionEntry(AddressSize)) {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
index b19c808a8fb3..60c6eb30857f 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
@@ -10,28 +10,13 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-template <typename... Ts>
-static Error createError(char const *Fmt, const Ts &... Vals) {
- std::string Buffer;
- raw_string_ostream Stream(Buffer);
- Stream << format(Fmt, Vals...);
- return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
-}
-
-namespace llvm { // workaround for gcc bug
-template <>
-Error DWARFListType<RangeListEntry>::createError(const char *Fmt, const char *s,
- uint32_t Val) {
- return ::createError(Fmt, s, Val);
-}
-}
-
Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
uint32_t *OffsetPtr) {
Offset = *OffsetPtr;
@@ -47,31 +32,49 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
Value0 = Value1 = 0;
break;
// TODO: Support other encodings.
- case dwarf::DW_RLE_base_addressx:
- return createError("unsupported rnglists encoding DW_RLE_base_addressx "
- "at offset 0x%" PRIx32,
- *OffsetPtr - 1);
+ case dwarf::DW_RLE_base_addressx: {
+ uint32_t PreviousOffset = *OffsetPtr - 1;
+ Value0 = Data.getULEB128(OffsetPtr);
+ if (End < *OffsetPtr)
+ return createStringError(
+ errc::invalid_argument,
+ "read past end of table when reading "
+ "DW_RLE_base_addressx encoding at offset 0x%" PRIx32,
+ PreviousOffset);
+ break;
+ }
case dwarf::DW_RLE_startx_endx:
- return createError("unsupported rnglists encoding DW_RLE_startx_endx at "
+ return createStringError(errc::not_supported,
+ "unsupported rnglists encoding DW_RLE_startx_endx at "
"offset 0x%" PRIx32,
*OffsetPtr - 1);
- case dwarf::DW_RLE_startx_length:
- return createError("unsupported rnglists encoding DW_RLE_startx_length "
- "at offset 0x%" PRIx32,
- *OffsetPtr - 1);
+ case dwarf::DW_RLE_startx_length: {
+ uint32_t PreviousOffset = *OffsetPtr - 1;
+ Value0 = Data.getULEB128(OffsetPtr);
+ Value1 = Data.getULEB128(OffsetPtr);
+ if (End < *OffsetPtr)
+ return createStringError(
+ errc::invalid_argument,
+ "read past end of table when reading "
+ "DW_RLE_startx_length encoding at offset 0x%" PRIx32,
+ PreviousOffset);
+ break;
+ }
case dwarf::DW_RLE_offset_pair: {
uint32_t PreviousOffset = *OffsetPtr - 1;
Value0 = Data.getULEB128(OffsetPtr);
Value1 = Data.getULEB128(OffsetPtr);
if (End < *OffsetPtr)
- return createError("read past end of table when reading "
+ return createStringError(errc::invalid_argument,
+ "read past end of table when reading "
"DW_RLE_offset_pair encoding at offset 0x%" PRIx32,
PreviousOffset);
break;
}
case dwarf::DW_RLE_base_address: {
if ((End - *OffsetPtr) < Data.getAddressSize())
- return createError("insufficient space remaining in table for "
+ return createStringError(errc::invalid_argument,
+ "insufficient space remaining in table for "
"DW_RLE_base_address encoding at offset 0x%" PRIx32,
*OffsetPtr - 1);
Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
@@ -79,7 +82,8 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
}
case dwarf::DW_RLE_start_end: {
if ((End - *OffsetPtr) < unsigned(Data.getAddressSize() * 2))
- return createError("insufficient space remaining in table for "
+ return createStringError(errc::invalid_argument,
+ "insufficient space remaining in table for "
"DW_RLE_start_end encoding "
"at offset 0x%" PRIx32,
*OffsetPtr - 1);
@@ -92,13 +96,15 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
Value1 = Data.getULEB128(OffsetPtr);
if (End < *OffsetPtr)
- return createError("read past end of table when reading "
+ return createStringError(errc::invalid_argument,
+ "read past end of table when reading "
"DW_RLE_start_length encoding at offset 0x%" PRIx32,
PreviousOffset);
break;
}
default:
- return createError("unknown rnglists encoding 0x%" PRIx32
+ return createStringError(errc::not_supported,
+ "unknown rnglists encoding 0x%" PRIx32
" at offset 0x%" PRIx32,
uint32_t(Encoding), *OffsetPtr - 1);
}
@@ -107,12 +113,19 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
return Error::success();
}
-DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
- llvm::Optional<BaseAddress> BaseAddr) const {
+DWARFAddressRangesVector
+DWARFDebugRnglist::getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
+ DWARFUnit &U) const {
DWARFAddressRangesVector Res;
for (const RangeListEntry &RLE : Entries) {
if (RLE.EntryKind == dwarf::DW_RLE_end_of_list)
break;
+ if (RLE.EntryKind == dwarf::DW_RLE_base_addressx) {
+ BaseAddr = U.getAddrOffsetSectionItem(RLE.Value0);
+ if (!BaseAddr)
+ BaseAddr = {RLE.Value0, -1ULL};
+ continue;
+ }
if (RLE.EntryKind == dwarf::DW_RLE_base_address) {
BaseAddr = {RLE.Value0, RLE.SectionIndex};
continue;
@@ -140,6 +153,15 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
E.LowPC = RLE.Value0;
E.HighPC = E.LowPC + RLE.Value1;
break;
+ case dwarf::DW_RLE_startx_length: {
+ auto Start = U.getAddrOffsetSectionItem(RLE.Value0);
+ if (!Start)
+ Start = {0, -1ULL};
+ E.SectionIndex = Start->SectionIndex;
+ E.LowPC = Start->Address;
+ E.HighPC = E.LowPC + RLE.Value1;
+ break;
+ }
default:
// Unsupported encodings should have been reported during extraction,
// so we should not run into any here.
@@ -150,9 +172,11 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
return Res;
}
-void RangeListEntry::dump(raw_ostream &OS, uint8_t AddrSize,
- uint8_t MaxEncodingStringLength,
- uint64_t &CurrentBase, DIDumpOptions DumpOpts) const {
+void RangeListEntry::dump(
+ raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
+ uint64_t &CurrentBase, DIDumpOptions DumpOpts,
+ llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ LookupPooledAddress) const {
auto PrintRawEntry = [](raw_ostream &OS, const RangeListEntry &Entry,
uint8_t AddrSize, DIDumpOptions DumpOpts) {
if (DumpOpts.Verbose) {
@@ -179,6 +203,17 @@ void RangeListEntry::dump(raw_ostream &OS, uint8_t AddrSize,
case dwarf::DW_RLE_end_of_list:
OS << (DumpOpts.Verbose ? "" : "<End of list>");
break;
+ // case dwarf::DW_RLE_base_addressx:
+ case dwarf::DW_RLE_base_addressx: {
+ if (auto SA = LookupPooledAddress(Value0))
+ CurrentBase = SA->Address;
+ else
+ CurrentBase = Value0;
+ if (!DumpOpts.Verbose)
+ return;
+ OS << format(" 0x%*.*" PRIx64, AddrSize * 2, AddrSize * 2, Value0);
+ break;
+ }
case dwarf::DW_RLE_base_address:
// In non-verbose mode we do not print anything for this entry.
CurrentBase = Value0;
@@ -198,6 +233,14 @@ void RangeListEntry::dump(raw_ostream &OS, uint8_t AddrSize,
case dwarf::DW_RLE_start_end:
DWARFAddressRange(Value0, Value1).dump(OS, AddrSize, DumpOpts);
break;
+ case dwarf::DW_RLE_startx_length: {
+ PrintRawEntry(OS, *this, AddrSize, DumpOpts);
+ uint64_t Start = 0;
+ if (auto SA = LookupPooledAddress(Value0))
+ Start = SA->Address;
+ DWARFAddressRange(Start, Start + Value1).dump(OS, AddrSize, DumpOpts);
+ break;
+ } break;
default:
llvm_unreachable("Unsupported range list encoding");
}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 904ceab7b286..81ef0c8c7aec 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -59,25 +59,19 @@ static void dumpRanges(const DWARFObject &Obj, raw_ostream &OS,
const DWARFAddressRangesVector &Ranges,
unsigned AddressSize, unsigned Indent,
const DIDumpOptions &DumpOpts) {
+ if (!DumpOpts.ShowAddresses)
+ return;
+
ArrayRef<SectionName> SectionNames;
if (DumpOpts.Verbose)
SectionNames = Obj.getSectionNames();
for (const DWARFAddressRange &R : Ranges) {
-
OS << '\n';
OS.indent(Indent);
R.dump(OS, AddressSize);
- if (SectionNames.empty() || R.SectionIndex == -1ULL)
- continue;
-
- StringRef Name = SectionNames[R.SectionIndex].Name;
- OS << " \"" << Name << '\"';
-
- // Print section index if name is not unique.
- if (!SectionNames[R.SectionIndex].IsNameUnique)
- OS << format(" [%" PRIu64 "]", R.SectionIndex);
+ DWARFFormValue::dumpAddressSection(Obj, OS, DumpOpts, R.SectionIndex);
}
}
@@ -99,27 +93,45 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
FormValue.dump(OS, DumpOpts);
if (FormValue.isFormClass(DWARFFormValue::FC_SectionOffset)) {
- const DWARFSection &LocSection = Obj.getLocSection();
- const DWARFSection &LocDWOSection = Obj.getLocDWOSection();
uint32_t Offset = *FormValue.getAsSectionOffset();
- if (!LocSection.Data.empty()) {
+ if (!U->isDWOUnit() && !U->getLocSection()->Data.empty()) {
DWARFDebugLoc DebugLoc;
- DWARFDataExtractor Data(Obj, LocSection, Ctx.isLittleEndian(),
+ DWARFDataExtractor Data(Obj, *U->getLocSection(), Ctx.isLittleEndian(),
Obj.getAddressSize());
auto LL = DebugLoc.parseOneLocationList(Data, &Offset);
if (LL) {
uint64_t BaseAddr = 0;
- if (Optional<BaseAddress> BA = U->getBaseAddress())
+ if (Optional<SectionedAddress> BA = U->getBaseAddress())
BaseAddr = BA->Address;
LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, BaseAddr,
Indent);
} else
OS << "error extracting location list.";
- } else if (!LocDWOSection.Data.empty()) {
- DataExtractor Data(LocDWOSection.Data, Ctx.isLittleEndian(), 0);
- auto LL = DWARFDebugLocDWO::parseOneLocationList(Data, &Offset);
+ return;
+ }
+
+ bool UseLocLists = !U->isDWOUnit();
+ StringRef LoclistsSectionData =
+ UseLocLists ? Obj.getLoclistsSection().Data : U->getLocSectionData();
+
+ if (!LoclistsSectionData.empty()) {
+ DataExtractor Data(LoclistsSectionData, Ctx.isLittleEndian(),
+ Obj.getAddressSize());
+
+ // Old-style location list were used in DWARF v4 (.debug_loc.dwo section).
+ // Modern locations list (.debug_loclists) are used starting from v5.
+ // Ideally we should take the version from the .debug_loclists section
+ // header, but using CU's version for simplicity.
+ auto LL = DWARFDebugLoclists::parseOneLocationList(
+ Data, &Offset, UseLocLists ? U->getVersion() : 4);
+
+ uint64_t BaseAddr = 0;
+ if (Optional<SectionedAddress> BA = U->getBaseAddress())
+ BaseAddr = BA->Address;
+
if (LL)
- LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, Indent);
+ LL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI,
+ Indent);
else
OS << "error extracting location list.";
}
@@ -134,10 +146,54 @@ static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) {
OS << TagStr.substr(7, TagStr.size() - 12) << " ";
}
-/// Recursively dump the DIE type name when applicable.
-static void dumpTypeName(raw_ostream &OS, const DWARFDie &Die) {
- DWARFDie D = Die.getAttributeValueAsReferencedDie(DW_AT_type);
+static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) {
+ Optional<uint64_t> Bound;
+ for (const DWARFDie &C : D.children())
+ if (C.getTag() == DW_TAG_subrange_type) {
+ Optional<uint64_t> LB;
+ Optional<uint64_t> Count;
+ Optional<uint64_t> UB;
+ Optional<unsigned> DefaultLB;
+ if (Optional<DWARFFormValue> L = C.find(DW_AT_lower_bound))
+ LB = L->getAsUnsignedConstant();
+ if (Optional<DWARFFormValue> CountV = C.find(DW_AT_count))
+ Count = CountV->getAsUnsignedConstant();
+ if (Optional<DWARFFormValue> UpperV = C.find(DW_AT_upper_bound))
+ UB = UpperV->getAsUnsignedConstant();
+ if (Optional<DWARFFormValue> LV =
+ D.getDwarfUnit()->getUnitDIE().find(DW_AT_language))
+ if (Optional<uint64_t> LC = LV->getAsUnsignedConstant())
+ if ((DefaultLB =
+ LanguageLowerBound(static_cast<dwarf::SourceLanguage>(*LC))))
+ if (LB && *LB == *DefaultLB)
+ LB = None;
+ if (!LB && !Count && !UB)
+ OS << "[]";
+ else if (!LB && (Count || UB) && DefaultLB)
+ OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']';
+ else {
+ OS << "[[";
+ if (LB)
+ OS << *LB;
+ else
+ OS << '?';
+ OS << ", ";
+ if (Count)
+ if (LB)
+ OS << *LB + *Count;
+ else
+ OS << "? + " << *Count;
+ else if (UB)
+ OS << *UB + 1;
+ else
+ OS << '?';
+ OS << ")]";
+ }
+ }
+}
+/// Recursively dump the DIE type name when applicable.
+static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) {
if (!D.isValid())
return;
@@ -155,22 +211,46 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &Die) {
case DW_TAG_ptr_to_member_type:
case DW_TAG_reference_type:
case DW_TAG_rvalue_reference_type:
+ case DW_TAG_subroutine_type:
break;
default:
dumpTypeTagName(OS, T);
}
// Follow the DW_AT_type if possible.
- dumpTypeName(OS, D);
+ DWARFDie TypeDie = D.getAttributeValueAsReferencedDie(DW_AT_type);
+ dumpTypeName(OS, TypeDie);
switch (T) {
- case DW_TAG_array_type:
- OS << "[]";
+ case DW_TAG_subroutine_type: {
+ if (!TypeDie)
+ OS << "void";
+ OS << '(';
+ bool First = true;
+ for (const DWARFDie &C : D.children()) {
+ if (C.getTag() == DW_TAG_formal_parameter) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ dumpTypeName(OS, C.getAttributeValueAsReferencedDie(DW_AT_type));
+ }
+ }
+ OS << ')';
+ break;
+ }
+ case DW_TAG_array_type: {
+ dumpArrayType(OS, D);
break;
+ }
case DW_TAG_pointer_type:
OS << '*';
break;
case DW_TAG_ptr_to_member_type:
+ if (DWARFDie Cont =
+ D.getAttributeValueAsReferencedDie(DW_AT_containing_type)) {
+ dumpTypeName(OS << ' ', Cont);
+ OS << "::";
+ }
OS << '*';
break;
case DW_TAG_reference_type:
@@ -244,16 +324,19 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
else
formValue.dump(OS, DumpOpts);
+ std::string Space = DumpOpts.ShowAddresses ? " " : "";
+
// We have dumped the attribute raw value. For some attributes
// having both the raw value and the pretty-printed value is
// interesting. These attributes are handled below.
if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin) {
- if (const char *Name = Die.getAttributeValueAsReferencedDie(Attr).getName(
- DINameKind::LinkageName))
- OS << " \"" << Name << '\"';
+ if (const char *Name =
+ Die.getAttributeValueAsReferencedDie(formValue).getName(
+ DINameKind::LinkageName))
+ OS << Space << "\"" << Name << '\"';
} else if (Attr == DW_AT_type) {
- OS << " \"";
- dumpTypeName(OS, Die);
+ OS << Space << "\"";
+ dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(formValue));
OS << '"';
} else if (Attr == DW_AT_APPLE_property_attribute) {
if (Optional<uint64_t> OptVal = formValue.getAsUnsignedConstant())
@@ -262,10 +345,9 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
const DWARFObject &Obj = Die.getDwarfUnit()->getContext().getDWARFObj();
// For DW_FORM_rnglistx we need to dump the offset separately, since
// we have only dumped the index so far.
- Optional<DWARFFormValue> Value = Die.find(DW_AT_ranges);
- if (Value && Value->getForm() == DW_FORM_rnglistx)
+ if (formValue.getForm() == DW_FORM_rnglistx)
if (auto RangeListOffset =
- U->getRnglistOffset(*Value->getAsSectionOffset())) {
+ U->getRnglistOffset(*formValue.getAsSectionOffset())) {
DWARFFormValue FV(dwarf::DW_FORM_sec_offset);
FV.setUValue(*RangeListOffset);
FV.dump(OS, DumpOpts);
@@ -349,8 +431,15 @@ DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const {
DWARFDie
DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
- if (auto SpecRef = toReference(find(Attr))) {
- if (auto SpecUnit = U->getUnitSection().getUnitForOffset(*SpecRef))
+ if (Optional<DWARFFormValue> F = find(Attr))
+ return getAttributeValueAsReferencedDie(*F);
+ return DWARFDie();
+}
+
+DWARFDie
+DWARFDie::getAttributeValueAsReferencedDie(const DWARFFormValue &V) const {
+ if (auto SpecRef = toReference(V)) {
+ if (auto SpecUnit = U->getUnitVector().getUnitForOffset(*SpecRef))
return SpecUnit->getDIEForOffset(*SpecRef);
}
return DWARFDie();
@@ -377,13 +466,13 @@ Optional<uint64_t> DWARFDie::getHighPC(uint64_t LowPC) const {
bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC,
uint64_t &SectionIndex) const {
auto F = find(DW_AT_low_pc);
- auto LowPcAddr = toAddress(F);
+ auto LowPcAddr = toSectionedAddress(F);
if (!LowPcAddr)
return false;
- if (auto HighPcAddr = getHighPC(*LowPcAddr)) {
- LowPC = *LowPcAddr;
+ if (auto HighPcAddr = getHighPC(LowPcAddr->Address)) {
+ LowPC = LowPcAddr->Address;
HighPC = *HighPcAddr;
- SectionIndex = F->getSectionIndex();
+ SectionIndex = LowPcAddr->SectionIndex;
return true;
}
return false;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index a9ea26c476ca..2df4456053fb 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -94,6 +94,7 @@ static DescVector getDescriptions() {
Desc(Op::Dwarf3, Op::SizeLEB, Op::SizeBlock);
Descriptions[DW_OP_stack_value] = Desc(Op::Dwarf3);
Descriptions[DW_OP_GNU_push_tls_address] = Desc(Op::Dwarf3);
+ Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB);
Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB);
Descriptions[DW_OP_GNU_const_index] = Desc(Op::Dwarf4, Op::SizeLEB);
return Descriptions;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 1aa43c6b6517..7719fea63120 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -308,6 +308,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
break;
case DW_FORM_GNU_addr_index:
case DW_FORM_GNU_str_index:
+ case DW_FORM_addrx:
case DW_FORM_strx:
Value.uval = Data.getULEB128(OffsetPtr);
break;
@@ -330,6 +331,29 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
return true;
}
+void DWARFFormValue::dumpSectionedAddress(raw_ostream &OS,
+ DIDumpOptions DumpOpts,
+ SectionedAddress SA) const {
+ OS << format("0x%016" PRIx64, SA.Address);
+ dumpAddressSection(U->getContext().getDWARFObj(), OS, DumpOpts,
+ SA.SectionIndex);
+}
+
+void DWARFFormValue::dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS,
+ DIDumpOptions DumpOpts,
+ uint64_t SectionIndex) {
+ if (!DumpOpts.Verbose || SectionIndex == -1ULL)
+ return;
+ ArrayRef<SectionName> SectionNames = Obj.getSectionNames();
+ const auto &SecRef = SectionNames[SectionIndex];
+
+ OS << " \"" << SecRef.Name << '\"';
+
+ // Print section index if name is not unique.
+ if (!SecRef.IsNameUnique)
+ OS << format(" [%" PRIu64 "]", SectionIndex);
+}
+
void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
uint64_t UValue = Value.uval;
bool CURelativeOffset = false;
@@ -338,15 +362,21 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
: nulls();
switch (Form) {
case DW_FORM_addr:
- AddrOS << format("0x%016" PRIx64, UValue);
+ dumpSectionedAddress(AddrOS, DumpOpts, {Value.uval, Value.SectionIndex});
break;
+ case DW_FORM_addrx:
+ case DW_FORM_addrx1:
+ case DW_FORM_addrx2:
+ case DW_FORM_addrx3:
+ case DW_FORM_addrx4:
case DW_FORM_GNU_addr_index: {
- AddrOS << format(" indexed (%8.8x) address = ", (uint32_t)UValue);
- uint64_t Address;
+ Optional<SectionedAddress> A = U->getAddrOffsetSectionItem(UValue);
+ if (!A || DumpOpts.Verbose)
+ AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue);
if (U == nullptr)
OS << "<invalid dwarf unit>";
- else if (U->getAddrOffsetSectionItem(UValue, Address))
- AddrOS << format("0x%016" PRIx64, Address);
+ else if (A)
+ dumpSectionedAddress(AddrOS, DumpOpts, *A);
else
OS << "<no .debug_addr section>";
break;
@@ -387,16 +417,16 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
switch (Form) {
case DW_FORM_exprloc:
case DW_FORM_block:
- OS << format("<0x%" PRIx64 "> ", UValue);
+ AddrOS << format("<0x%" PRIx64 "> ", UValue);
break;
case DW_FORM_block1:
- OS << format("<0x%2.2x> ", (uint8_t)UValue);
+ AddrOS << format("<0x%2.2x> ", (uint8_t)UValue);
break;
case DW_FORM_block2:
- OS << format("<0x%4.4x> ", (uint16_t)UValue);
+ AddrOS << format("<0x%4.4x> ", (uint16_t)UValue);
break;
case DW_FORM_block4:
- OS << format("<0x%8.8x> ", (uint32_t)UValue);
+ AddrOS << format("<0x%8.8x> ", (uint32_t)UValue);
break;
default:
break;
@@ -407,7 +437,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
// UValue contains size of block
const uint8_t *EndDataPtr = DataPtr + UValue;
while (DataPtr < EndDataPtr) {
- OS << format("%2.2x ", *DataPtr);
+ AddrOS << format("%2.2x ", *DataPtr);
++DataPtr;
}
} else
@@ -438,7 +468,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
case DW_FORM_strx4:
case DW_FORM_GNU_str_index:
if (DumpOpts.Verbose)
- OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue);
+ OS << format("indexed (%8.8x) string = ", (uint32_t)UValue);
dumpString(OS);
break;
case DW_FORM_GNU_strp_alt:
@@ -501,8 +531,9 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
if (CURelativeOffset) {
if (DumpOpts.Verbose)
OS << " => {";
- WithColor(OS, HighlightColor::Address).get()
- << format("0x%8.8" PRIx64, UValue + (U ? U->getOffset() : 0));
+ if (DumpOpts.ShowAddresses)
+ WithColor(OS, HighlightColor::Address).get()
+ << format("0x%8.8" PRIx64, UValue + (U ? U->getOffset() : 0));
if (DumpOpts.Verbose)
OS << "}";
}
@@ -536,10 +567,12 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx ||
Form == DW_FORM_strx1 || Form == DW_FORM_strx2 || Form == DW_FORM_strx3 ||
Form == DW_FORM_strx4) {
- uint64_t StrOffset;
- if (!U || !U->getStringOffsetSectionItem(Offset, StrOffset))
+ if (!U)
+ return None;
+ Optional<uint64_t> StrOffset = U->getStringOffsetSectionItem(Offset);
+ if (!StrOffset)
return None;
- Offset = StrOffset;
+ Offset = *StrOffset;
}
// Prefer the Unit's string extractor, because for .dwo it will point to
// .debug_str.dwo, while the Context's extractor always uses .debug_str.
@@ -554,16 +587,23 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
}
Optional<uint64_t> DWARFFormValue::getAsAddress() const {
+ if (auto SA = getAsSectionedAddress())
+ return SA->Address;
+ return None;
+}
+Optional<SectionedAddress> DWARFFormValue::getAsSectionedAddress() const {
if (!isFormClass(FC_Address))
return None;
- if (Form == DW_FORM_GNU_addr_index) {
+ if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx) {
uint32_t Index = Value.uval;
- uint64_t Result;
- if (!U || !U->getAddrOffsetSectionItem(Index, Result))
+ if (!U)
+ return None;
+ Optional<SectionedAddress> SA = U->getAddrOffsetSectionItem(Index);
+ if (!SA)
return None;
- return Result;
+ return SA;
}
- return Value.uval;
+ return {{Value.uval, Value.SectionIndex}};
}
Optional<uint64_t> DWARFFormValue::getAsReference() const {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index ebd6104ab878..1abd931e3b8b 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -33,6 +34,16 @@ void DWARFGdbIndex::dumpCUList(raw_ostream &OS) const {
CU.Length);
}
+void DWARFGdbIndex::dumpTUList(raw_ostream &OS) const {
+ OS << formatv("\n Types CU list offset = {0:x}, has {1} entries:\n",
+ TuListOffset, TuList.size());
+ uint32_t I = 0;
+ for (const TypeUnitEntry &TU : TuList)
+ OS << formatv(" {0}: offset = {1:x8}, type_offset = {2:x8}, "
+ "type_signature = {3:x16}\n",
+ I++, TU.Offset, TU.TypeOffset, TU.TypeSignature);
+}
+
void DWARFGdbIndex::dumpAddressArea(raw_ostream &OS) const {
OS << format("\n Address area offset = 0x%x, has %" PRId64 " entries:",
AddressAreaOffset, (uint64_t)AddressArea.size())
@@ -94,6 +105,7 @@ void DWARFGdbIndex::dump(raw_ostream &OS) {
if (HasContent) {
OS << " Version = " << Version << '\n';
dumpCUList(OS);
+ dumpTUList(OS);
dumpAddressArea(OS);
dumpSymbolTable(OS);
dumpConstantPool(OS);
@@ -127,9 +139,14 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
// CU Types are no longer needed as DWARF skeleton type units never made it
// into the standard.
- uint32_t CuTypesListSize = (AddressAreaOffset - CuTypesOffset) / 24;
- if (CuTypesListSize != 0)
- return false;
+ uint32_t TuListSize = (AddressAreaOffset - CuTypesOffset) / 24;
+ TuList.resize(TuListSize);
+ for (uint32_t I = 0; I < TuListSize; ++I) {
+ uint64_t CuOffset = Data.getU64(&Offset);
+ uint64_t TypeOffset = Data.getU64(&Offset);
+ uint64_t Signature = Data.getU64(&Offset);
+ TuList[I] = {CuOffset, TypeOffset, Signature};
+ }
uint32_t AddressAreaSize = (SymbolTableOffset - AddressAreaOffset) / 20;
AddressArea.reserve(AddressAreaSize);
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
index 559afc7559bd..462c036d73ad 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -9,42 +9,39 @@
#include "llvm/DebugInfo/DWARF/DWARFListTable.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-template <typename... Ts>
-static Error createError(char const *Fmt, const Ts &... Vals) {
- std::string Buffer;
- raw_string_ostream Stream(Buffer);
- Stream << format(Fmt, Vals...);
- return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
-}
-
Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
uint32_t *OffsetPtr) {
HeaderOffset = *OffsetPtr;
// Read and verify the length field.
if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t)))
- return createError("section is not large enough to contain a "
+ return createStringError(errc::invalid_argument,
+ "section is not large enough to contain a "
"%s table length at offset 0x%" PRIx32,
SectionName.data(), *OffsetPtr);
// TODO: Add support for DWARF64.
HeaderData.Length = Data.getU32(OffsetPtr);
if (HeaderData.Length == 0xffffffffu)
- return createError("DWARF64 is not supported in %s at offset 0x%" PRIx32,
+ return createStringError(errc::not_supported,
+ "DWARF64 is not supported in %s at offset 0x%" PRIx32,
SectionName.data(), HeaderOffset);
Format = dwarf::DwarfFormat::DWARF32;
if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header))
- return createError("%s table at offset 0x%" PRIx32
+ return createStringError(errc::invalid_argument,
+ "%s table at offset 0x%" PRIx32
" has too small length (0x%" PRIx32
") to contain a complete header",
SectionName.data(), HeaderOffset, length());
uint32_t End = HeaderOffset + length();
if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset))
- return createError("section is not large enough to contain a %s table "
+ return createStringError(errc::invalid_argument,
+ "section is not large enough to contain a %s table "
"of length 0x%" PRIx32 " at offset 0x%" PRIx32,
SectionName.data(), length(), HeaderOffset);
@@ -55,20 +52,23 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
// Perform basic validation of the remaining header fields.
if (HeaderData.Version != 5)
- return createError("unrecognised %s table version %" PRIu16
+ return createStringError(errc::invalid_argument,
+ "unrecognised %s table version %" PRIu16
" in table at offset 0x%" PRIx32,
SectionName.data(), HeaderData.Version, HeaderOffset);
if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
- return createError("%s table at offset 0x%" PRIx32
- " has unsupported address size %hhu",
+ return createStringError(errc::not_supported,
+ "%s table at offset 0x%" PRIx32
+ " has unsupported address size %" PRIu8,
SectionName.data(), HeaderOffset, HeaderData.AddrSize);
if (HeaderData.SegSize != 0)
- return createError("%s table at offset 0x%" PRIx32
+ return createStringError(errc::not_supported,
+ "%s table at offset 0x%" PRIx32
" has unsupported segment selector size %" PRIu8,
SectionName.data(), HeaderOffset, HeaderData.SegSize);
if (End < HeaderOffset + sizeof(HeaderData) +
HeaderData.OffsetEntryCount * sizeof(uint32_t))
- return createError(
+ return createStringError(errc::invalid_argument,
"%s table at offset 0x%" PRIx32 " has more offset entries (%" PRIu32
") than there is space for",
SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount);
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 3b408857d29f..80234665bdeb 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -11,13 +11,16 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/WithColor.h"
#include <algorithm>
@@ -31,34 +34,161 @@
using namespace llvm;
using namespace dwarf;
-void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
+void DWARFUnitVector::addUnitsForSection(DWARFContext &C,
+ const DWARFSection &Section,
+ DWARFSectionKind SectionKind) {
const DWARFObject &D = C.getDWARFObj();
- parseImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangeSection(),
- D.getStringSection(), D.getStringOffsetSection(),
- &D.getAddrSection(), D.getLineSection(), D.isLittleEndian(), false,
- false);
+ addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangeSection(),
+ &D.getLocSection(), D.getStringSection(),
+ D.getStringOffsetSection(), &D.getAddrSection(),
+ D.getLineSection(), D.isLittleEndian(), false, false,
+ SectionKind);
}
-void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
- const DWARFSection &DWOSection, bool Lazy) {
+void DWARFUnitVector::addUnitsForDWOSection(DWARFContext &C,
+ const DWARFSection &DWOSection,
+ DWARFSectionKind SectionKind,
+ bool Lazy) {
const DWARFObject &D = C.getDWARFObj();
- parseImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangeDWOSection(),
- D.getStringDWOSection(), D.getStringOffsetDWOSection(),
- &D.getAddrSection(), D.getLineDWOSection(), C.isLittleEndian(),
- true, Lazy);
+ addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangeDWOSection(),
+ &D.getLocDWOSection(), D.getStringDWOSection(),
+ D.getStringOffsetDWOSection(), &D.getAddrSection(),
+ D.getLineDWOSection(), C.isLittleEndian(), true, Lazy,
+ SectionKind);
+}
+
+void DWARFUnitVector::addUnitsImpl(
+ DWARFContext &Context, const DWARFObject &Obj, const DWARFSection &Section,
+ const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+ const DWARFSection *LocSection, StringRef SS, const DWARFSection &SOS,
+ const DWARFSection *AOS, const DWARFSection &LS, bool LE, bool IsDWO,
+ bool Lazy, DWARFSectionKind SectionKind) {
+ DWARFDataExtractor Data(Obj, Section, LE, 0);
+ // Lazy initialization of Parser, now that we have all section info.
+ if (!Parser) {
+ Parser = [=, &Context, &Obj, &Section, &SOS,
+ &LS](uint32_t Offset, DWARFSectionKind SectionKind,
+ const DWARFSection *CurSection,
+ const DWARFUnitIndex::Entry *IndexEntry)
+ -> std::unique_ptr<DWARFUnit> {
+ const DWARFSection &InfoSection = CurSection ? *CurSection : Section;
+ DWARFDataExtractor Data(Obj, InfoSection, LE, 0);
+ if (!Data.isValidOffset(Offset))
+ return nullptr;
+ const DWARFUnitIndex *Index = nullptr;
+ if (IsDWO)
+ Index = &getDWARFUnitIndex(Context, SectionKind);
+ DWARFUnitHeader Header;
+ if (!Header.extract(Context, Data, &Offset, SectionKind, Index,
+ IndexEntry))
+ return nullptr;
+ std::unique_ptr<DWARFUnit> U;
+ if (Header.isTypeUnit())
+ U = llvm::make_unique<DWARFTypeUnit>(Context, InfoSection, Header, DA,
+ RS, LocSection, SS, SOS, AOS, LS,
+ LE, IsDWO, *this);
+ else
+ U = llvm::make_unique<DWARFCompileUnit>(Context, InfoSection, Header,
+ DA, RS, LocSection, SS, SOS,
+ AOS, LS, LE, IsDWO, *this);
+ return U;
+ };
+ }
+ if (Lazy)
+ return;
+ // Find a reasonable insertion point within the vector. We skip over
+ // (a) units from a different section, (b) units from the same section
+ // but with lower offset-within-section. This keeps units in order
+ // within a section, although not necessarily within the object file,
+ // even if we do lazy parsing.
+ auto I = this->begin();
+ uint32_t Offset = 0;
+ while (Data.isValidOffset(Offset)) {
+ if (I != this->end() &&
+ (&(*I)->getInfoSection() != &Section || (*I)->getOffset() == Offset)) {
+ ++I;
+ continue;
+ }
+ auto U = Parser(Offset, SectionKind, &Section, nullptr);
+ // If parsing failed, we're done with this section.
+ if (!U)
+ break;
+ Offset = U->getNextUnitOffset();
+ I = std::next(this->insert(I, std::move(U)));
+ }
+}
+
+DWARFUnit *DWARFUnitVector::addUnit(std::unique_ptr<DWARFUnit> Unit) {
+ auto I = std::upper_bound(begin(), end(), Unit,
+ [](const std::unique_ptr<DWARFUnit> &LHS,
+ const std::unique_ptr<DWARFUnit> &RHS) {
+ return LHS->getOffset() < RHS->getOffset();
+ });
+ return this->insert(I, std::move(Unit))->get();
+}
+
+DWARFUnit *DWARFUnitVector::getUnitForOffset(uint32_t Offset) const {
+ auto end = begin() + getNumInfoUnits();
+ auto *CU =
+ std::upper_bound(begin(), end, Offset,
+ [](uint32_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
+ return LHS < RHS->getNextUnitOffset();
+ });
+ if (CU != end && (*CU)->getOffset() <= Offset)
+ return CU->get();
+ return nullptr;
+}
+
+DWARFUnit *
+DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
+ const auto *CUOff = E.getOffset(DW_SECT_INFO);
+ if (!CUOff)
+ return nullptr;
+
+ auto Offset = CUOff->Offset;
+ auto end = begin() + getNumInfoUnits();
+
+ auto *CU =
+ std::upper_bound(begin(), end, CUOff->Offset,
+ [](uint32_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
+ return LHS < RHS->getNextUnitOffset();
+ });
+ if (CU != end && (*CU)->getOffset() <= Offset)
+ return CU->get();
+
+ if (!Parser)
+ return nullptr;
+
+ auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E);
+ if (!U)
+ U = nullptr;
+
+ auto *NewCU = U.get();
+ this->insert(CU, std::move(U));
+ ++NumInfoUnits;
+ return NewCU;
}
DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
- const DWARFUnitHeader &Header,
- const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+ const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
+ const DWARFSection *RS, const DWARFSection *LocSection,
StringRef SS, const DWARFSection &SOS,
const DWARFSection *AOS, const DWARFSection &LS, bool LE,
- bool IsDWO, const DWARFUnitSectionBase &UnitSection)
+ bool IsDWO, const DWARFUnitVector &UnitVector)
: Context(DC), InfoSection(Section), Header(Header), Abbrev(DA),
- RangeSection(RS), LineSection(LS), StringSection(SS),
- StringOffsetSection(SOS), AddrOffsetSection(AOS), isLittleEndian(LE),
- isDWO(IsDWO), UnitSection(UnitSection) {
+ RangeSection(RS), LocSection(LocSection), LineSection(LS),
+ StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS),
+ isLittleEndian(LE), IsDWO(IsDWO), UnitVector(UnitVector) {
clear();
+ // For split DWARF we only need to keep track of the location list section's
+ // data (no relocations), and if we are reading a package file, we need to
+ // adjust the location list data based on the index entries.
+ if (IsDWO) {
+ LocSectionData = LocSection->Data;
+ if (auto *IndexEntry = Header.getIndexEntry())
+ if (const auto *C = IndexEntry->getOffset(DW_SECT_LOC))
+ LocSectionData = LocSectionData.substr(C->Offset, C->Length);
+ }
}
DWARFUnit::~DWARFUnit() = default;
@@ -68,38 +198,50 @@ DWARFDataExtractor DWARFUnit::getDebugInfoExtractor() const {
getAddressByteSize());
}
-bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
- uint64_t &Result) const {
+Optional<SectionedAddress>
+DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const {
+ if (IsDWO) {
+ auto R = Context.info_section_units();
+ auto I = R.begin();
+ // Surprising if a DWO file has more than one skeleton unit in it - this
+ // probably shouldn't be valid, but if a use case is found, here's where to
+ // support it (probably have to linearly search for the matching skeleton CU
+ // here)
+ if (I != R.end() && std::next(I) == R.end())
+ return (*I)->getAddrOffsetSectionItem(Index);
+ }
uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize();
if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize())
- return false;
+ return None;
DWARFDataExtractor DA(Context.getDWARFObj(), *AddrOffsetSection,
isLittleEndian, getAddressByteSize());
- Result = DA.getRelocatedAddress(&Offset);
- return true;
+ uint64_t Section;
+ uint64_t Address = DA.getRelocatedAddress(&Offset, &Section);
+ return {{Address, Section}};
}
-bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
- uint64_t &Result) const {
+Optional<uint64_t> DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const {
if (!StringOffsetsTableContribution)
- return false;
+ return None;
unsigned ItemSize = getDwarfStringOffsetsByteSize();
uint32_t Offset = getStringOffsetsBase() + Index * ItemSize;
if (StringOffsetSection.Data.size() < Offset + ItemSize)
- return false;
+ return None;
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
isLittleEndian, 0);
- Result = DA.getRelocatedValue(ItemSize, &Offset);
- return true;
+ return DA.getRelocatedValue(ItemSize, &Offset);
}
bool DWARFUnitHeader::extract(DWARFContext &Context,
const DWARFDataExtractor &debug_info,
uint32_t *offset_ptr,
DWARFSectionKind SectionKind,
- const DWARFUnitIndex *Index) {
+ const DWARFUnitIndex *Index,
+ const DWARFUnitIndex::Entry *Entry) {
Offset = *offset_ptr;
- IndexEntry = Index ? Index->getFromOffset(*offset_ptr) : nullptr;
+ IndexEntry = Entry;
+ if (!IndexEntry && Index)
+ IndexEntry = Index->getFromOffset(*offset_ptr);
Length = debug_info.getU32(offset_ptr);
// FIXME: Support DWARF64.
unsigned SizeOfLength = 4;
@@ -166,13 +308,10 @@ parseRngListTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
// We are expected to be called with Offset 0 or pointing just past the table
// header, which is 12 bytes long for DWARF32.
if (Offset > 0) {
- if (Offset < 12U) {
- std::string Buffer;
- raw_string_ostream Stream(Buffer);
- Stream << format(
- "Did not detect a valid range list table with base = 0x%x", Offset);
- return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
- }
+ if (Offset < 12U)
+ return createStringError(errc::invalid_argument, "Did not detect a valid"
+ " range list table with base = 0x%" PRIu32,
+ Offset);
Offset -= 12U;
}
llvm::DWARFDebugRnglistTable Table;
@@ -274,11 +413,13 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
DWARFDie UnitDie = getUnitDIE();
if (Optional<uint64_t> DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
Header.setDWOId(*DWOId);
- if (!isDWO) {
+ if (!IsDWO) {
assert(AddrOffsetSectionBase == 0);
assert(RangeSectionBase == 0);
- AddrOffsetSectionBase =
- toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
+ AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0);
+ if (!AddrOffsetSectionBase)
+ AddrOffsetSectionBase =
+ toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
}
@@ -289,27 +430,19 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
// offsets table starting at offset 0 of the debug_str_offsets.dwo section.
// In both cases we need to determine the format of the contribution,
// which may differ from the unit's format.
- uint64_t StringOffsetsContributionBase =
- isDWO ? 0 : toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0);
- auto IndexEntry = Header.getIndexEntry();
- if (IndexEntry)
- if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
- StringOffsetsContributionBase += C->Offset;
-
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
isLittleEndian, 0);
- if (isDWO)
+ if (IsDWO)
StringOffsetsTableContribution =
- determineStringOffsetsTableContributionDWO(
- DA, StringOffsetsContributionBase);
+ determineStringOffsetsTableContributionDWO(DA);
else if (getVersion() >= 5)
- StringOffsetsTableContribution = determineStringOffsetsTableContribution(
- DA, StringOffsetsContributionBase);
+ StringOffsetsTableContribution =
+ determineStringOffsetsTableContribution(DA);
// DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
// describe address ranges.
if (getVersion() >= 5) {
- if (isDWO)
+ if (IsDWO)
setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0);
else
setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
@@ -329,20 +462,20 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
// In a split dwarf unit, there is no DW_AT_rnglists_base attribute.
// Adjust RangeSectionBase to point past the table header.
- if (isDWO && RngListTable)
+ if (IsDWO && RngListTable)
RangeSectionBase = RngListTable->getHeaderSize();
}
}
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
// skeleton CU DIE, so that DWARF users not aware of it are not broken.
- }
+ }
return DieArray.size();
}
bool DWARFUnit::parseDWO() {
- if (isDWO)
+ if (IsDWO)
return false;
if (DWO.get())
return false;
@@ -412,12 +545,12 @@ DWARFUnit::findRnglistFromOffset(uint32_t Offset) {
isLittleEndian, RngListTable->getAddrSize());
auto RangeListOrError = RngListTable->findList(RangesData, Offset);
if (RangeListOrError)
- return RangeListOrError.get().getAbsoluteRanges(getBaseAddress());
+ return RangeListOrError.get().getAbsoluteRanges(getBaseAddress(), *this);
return RangeListOrError.takeError();
}
- return make_error<StringError>("missing or invalid range list table",
- inconvertibleErrorCode());
+ return createStringError(errc::invalid_argument,
+ "missing or invalid range list table");
}
Expected<DWARFAddressRangesVector>
@@ -425,51 +558,26 @@ DWARFUnit::findRnglistFromIndex(uint32_t Index) {
if (auto Offset = getRnglistOffset(Index))
return findRnglistFromOffset(*Offset + RangeSectionBase);
- std::string Buffer;
- raw_string_ostream Stream(Buffer);
if (RngListTable)
- Stream << format("invalid range list table index %d", Index);
+ return createStringError(errc::invalid_argument,
+ "invalid range list table index %d", Index);
else
- Stream << "missing or invalid range list table";
- return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
+ return createStringError(errc::invalid_argument,
+ "missing or invalid range list table");
}
-void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
+Expected<DWARFAddressRangesVector> DWARFUnit::collectAddressRanges() {
DWARFDie UnitDie = getUnitDIE();
if (!UnitDie)
- return;
+ return createStringError(errc::invalid_argument, "No unit DIE");
+
// First, check if unit DIE describes address ranges for the whole unit.
auto CUDIERangesOrError = UnitDie.getAddressRanges();
- if (CUDIERangesOrError) {
- if (!CUDIERangesOrError.get().empty()) {
- CURanges.insert(CURanges.end(), CUDIERangesOrError.get().begin(),
- CUDIERangesOrError.get().end());
- return;
- }
- } else
- WithColor::error() << "decoding address ranges: "
- << toString(CUDIERangesOrError.takeError()) << '\n';
-
- // This function is usually called if there in no .debug_aranges section
- // in order to produce a compile unit level set of address ranges that
- // is accurate. If the DIEs weren't parsed, then we don't want all dies for
- // all compile units to stay loaded when they weren't needed. So we can end
- // up parsing the DWARF and then throwing them all away to keep memory usage
- // down.
- const bool ClearDIEs = extractDIEsIfNeeded(false) > 1;
- getUnitDIE().collectChildrenAddressRanges(CURanges);
-
- // Collect address ranges from DIEs in .dwo if necessary.
- bool DWOCreated = parseDWO();
- if (DWO)
- DWO->collectAddressRanges(CURanges);
- if (DWOCreated)
- DWO.reset();
-
- // Keep memory down by clearing DIEs if this generate function
- // caused them to be parsed.
- if (ClearDIEs)
- clearDIEs(true);
+ if (!CUDIERangesOrError)
+ return createStringError(errc::invalid_argument,
+ "decoding address ranges: %s",
+ toString(CUDIERangesOrError.takeError()).c_str());
+ return *CUDIERangesOrError;
}
void DWARFUnit::updateAddressDieMap(DWARFDie Die) {
@@ -637,15 +745,13 @@ const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
return Abbrevs;
}
-llvm::Optional<BaseAddress> DWARFUnit::getBaseAddress() {
+llvm::Optional<SectionedAddress> DWARFUnit::getBaseAddress() {
if (BaseAddr)
return BaseAddr;
DWARFDie UnitDie = getUnitDIE();
Optional<DWARFFormValue> PC = UnitDie.find({DW_AT_low_pc, DW_AT_entry_pc});
- if (Optional<uint64_t> Addr = toAddress(PC))
- BaseAddr = {*Addr, PC->getSectionIndex()};
-
+ BaseAddr = toSectionedAddress(PC);
return BaseAddr;
}
@@ -660,7 +766,7 @@ StrOffsetsContributionDescriptor::validateContributionSize(
if (ValidationSize >= Size)
if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize))
return *this;
- return Optional<StrOffsetsContributionDescriptor>();
+ return None;
}
// Look for a DWARF64-formatted contribution to the string offsets table
@@ -668,18 +774,17 @@ StrOffsetsContributionDescriptor::validateContributionSize(
static Optional<StrOffsetsContributionDescriptor>
parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
if (!DA.isValidOffsetForDataOfSize(Offset, 16))
- return Optional<StrOffsetsContributionDescriptor>();
+ return None;
if (DA.getU32(&Offset) != 0xffffffff)
- return Optional<StrOffsetsContributionDescriptor>();
+ return None;
uint64_t Size = DA.getU64(&Offset);
uint8_t Version = DA.getU16(&Offset);
(void)DA.getU16(&Offset); // padding
// The encoded length includes the 2-byte version field and the 2-byte
// padding, so we need to subtract them out when we populate the descriptor.
- return StrOffsetsContributionDescriptor(Offset, Size - 4, Version, DWARF64);
- //return Optional<StrOffsetsContributionDescriptor>(Descriptor);
+ return {{Offset, Size - 4, Version, DWARF64}};
}
// Look for a DWARF32-formatted contribution to the string offsets table
@@ -687,22 +792,20 @@ parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
static Optional<StrOffsetsContributionDescriptor>
parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
if (!DA.isValidOffsetForDataOfSize(Offset, 8))
- return Optional<StrOffsetsContributionDescriptor>();
+ return None;
uint32_t ContributionSize = DA.getU32(&Offset);
if (ContributionSize >= 0xfffffff0)
- return Optional<StrOffsetsContributionDescriptor>();
+ return None;
uint8_t Version = DA.getU16(&Offset);
(void)DA.getU16(&Offset); // padding
// The encoded length includes the 2-byte version field and the 2-byte
// padding, so we need to subtract them out when we populate the descriptor.
- return StrOffsetsContributionDescriptor(Offset, ContributionSize - 4, Version,
- DWARF32);
- //return Optional<StrOffsetsContributionDescriptor>(Descriptor);
+ return {{Offset, ContributionSize - 4, Version, DWARF32}};
}
Optional<StrOffsetsContributionDescriptor>
-DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA,
- uint64_t Offset) {
+DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) {
+ auto Offset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base), 0);
Optional<StrOffsetsContributionDescriptor> Descriptor;
// Attempt to find a DWARF64 contribution 16 bytes before the base.
if (Offset >= 16)
@@ -715,8 +818,13 @@ DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA,
}
Optional<StrOffsetsContributionDescriptor>
-DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA,
- uint64_t Offset) {
+DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
+ uint64_t Offset = 0;
+ auto IndexEntry = Header.getIndexEntry();
+ const auto *C =
+ IndexEntry ? IndexEntry->getOffset(DW_SECT_STR_OFFSETS) : nullptr;
+ if (C)
+ Offset = C->Offset;
if (getVersion() >= 5) {
// Look for a valid contribution at the given offset.
auto Descriptor =
@@ -728,15 +836,9 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA,
// Prior to DWARF v5, we derive the contribution size from the
// index table (in a package file). In a .dwo file it is simply
// the length of the string offsets section.
- uint64_t Size = 0;
- auto IndexEntry = Header.getIndexEntry();
if (!IndexEntry)
- Size = StringOffsetSection.Data.size();
- else if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
- Size = C->Length;
- // Return a descriptor with the given offset as base, version 4 and
- // DWARF32 format.
- //return Optional<StrOffsetsContributionDescriptor>(
- //StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32));
- return StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32);
+ return {{0, StringOffsetSection.Data.size(), 4, DWARF32}};
+ if (C)
+ return {{C->Offset, C->Length, 4, DWARF32}};
+ return None;
}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index 17f17572a309..84b6c4b81817 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -164,14 +164,27 @@ DWARFUnitIndex::Entry::getOffset() const {
const DWARFUnitIndex::Entry *
DWARFUnitIndex::getFromOffset(uint32_t Offset) const {
- for (uint32_t i = 0; i != Header.NumBuckets; ++i)
- if (const auto &Contribs = Rows[i].Contributions) {
- const auto &InfoContrib = Contribs[InfoColumn];
- if (InfoContrib.Offset <= Offset &&
- Offset < (InfoContrib.Offset + InfoContrib.Length))
- return &Rows[i];
- }
- return nullptr;
+ if (OffsetLookup.empty()) {
+ for (uint32_t i = 0; i != Header.NumBuckets; ++i)
+ if (Rows[i].Contributions)
+ OffsetLookup.push_back(&Rows[i]);
+ llvm::sort(OffsetLookup, [&](Entry *E1, Entry *E2) {
+ return E1->Contributions[InfoColumn].Offset <
+ E2->Contributions[InfoColumn].Offset;
+ });
+ }
+ auto I =
+ llvm::upper_bound(OffsetLookup, Offset, [&](uint32_t Offset, Entry *E2) {
+ return Offset < E2->Contributions[InfoColumn].Offset;
+ });
+ if (I == OffsetLookup.begin())
+ return nullptr;
+ --I;
+ const auto *E = *I;
+ const auto &InfoContrib = E->Contributions[InfoColumn];
+ if ((InfoContrib.Offset + InfoContrib.Length) <= Offset)
+ return nullptr;
+ return E;
}
const DWARFUnitIndex::Entry *DWARFUnitIndex::getFromHash(uint64_t S) const {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 82d52c467bc0..f8370178b627 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -6,7 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-
#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
@@ -93,7 +92,7 @@ bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
auto End = Ranges.end();
auto Iter = findRange(RHS.Ranges.front());
for (const auto &R : RHS.Ranges) {
- if(Iter == End)
+ if (Iter == End)
return false;
if (R.HighPC <= Iter->LowPC)
continue;
@@ -156,14 +155,14 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
OffsetStart);
if (!ValidLength)
note() << "The length for this unit is too "
- "large for the .debug_info provided.\n";
+ "large for the .debug_info provided.\n";
if (!ValidVersion)
note() << "The 16 bit unit header version is not valid.\n";
if (!ValidType)
note() << "The unit type encoding is not valid.\n";
if (!ValidAbbrevOffset)
note() << "The offset into the .debug_abbrev section is "
- "not valid.\n";
+ "not valid.\n";
if (!ValidAddrSize)
note() << "The address size is unsupported.\n";
}
@@ -171,24 +170,38 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
return Success;
}
-bool DWARFVerifier::verifyUnitContents(DWARFUnit &Unit, uint8_t UnitType) {
- uint32_t NumUnitErrors = 0;
+unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
+ unsigned NumUnitErrors = 0;
unsigned NumDies = Unit.getNumDIEs();
for (unsigned I = 0; I < NumDies; ++I) {
auto Die = Unit.getDIEAtIndex(I);
+
if (Die.getTag() == DW_TAG_null)
continue;
+
+ bool HasTypeAttr = false;
for (auto AttrValue : Die.attributes()) {
NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
+ HasTypeAttr |= (AttrValue.Attr == DW_AT_type);
+ }
+
+ if (!HasTypeAttr && (Die.getTag() == DW_TAG_formal_parameter ||
+ Die.getTag() == DW_TAG_variable ||
+ Die.getTag() == DW_TAG_array_type)) {
+ error() << "DIE with tag " << TagString(Die.getTag())
+ << " is missing type attribute:\n";
+ dump(Die) << '\n';
+ NumUnitErrors++;
}
+ NumUnitErrors += verifyDebugInfoCallSite(Die);
}
DWARFDie Die = Unit.getUnitDIE(/* ExtractUnitDIEOnly = */ false);
if (!Die) {
error() << "Compilation unit without DIE.\n";
NumUnitErrors++;
- return NumUnitErrors == 0;
+ return NumUnitErrors;
}
if (!dwarf::isUnitType(Die.getTag())) {
@@ -197,8 +210,8 @@ bool DWARFVerifier::verifyUnitContents(DWARFUnit &Unit, uint8_t UnitType) {
NumUnitErrors++;
}
- if (UnitType != 0 &&
- !DWARFUnit::isMatchingUnitTypeAndTag(UnitType, Die.getTag())) {
+ uint8_t UnitType = Unit.getUnitType();
+ if (!DWARFUnit::isMatchingUnitTypeAndTag(UnitType, Die.getTag())) {
error() << "Compilation unit type (" << dwarf::UnitTypeString(UnitType)
<< ") and root DIE (" << dwarf::TagString(Die.getTag())
<< ") do not match.\n";
@@ -208,7 +221,39 @@ bool DWARFVerifier::verifyUnitContents(DWARFUnit &Unit, uint8_t UnitType) {
DieRangeInfo RI;
NumUnitErrors += verifyDieRanges(Die, RI);
- return NumUnitErrors == 0;
+ return NumUnitErrors;
+}
+
+unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) {
+ if (Die.getTag() != DW_TAG_call_site)
+ return 0;
+
+ DWARFDie Curr = Die.getParent();
+ for (; Curr.isValid() && !Curr.isSubprogramDIE(); Curr = Die.getParent()) {
+ if (Curr.getTag() == DW_TAG_inlined_subroutine) {
+ error() << "Call site entry nested within inlined subroutine:";
+ Curr.dump(OS);
+ return 1;
+ }
+ }
+
+ if (!Curr.isValid()) {
+ error() << "Call site entry not nested within a valid subprogram:";
+ Die.dump(OS);
+ return 1;
+ }
+
+ Optional<DWARFFormValue> CallAttr =
+ Curr.find({DW_AT_call_all_calls, DW_AT_call_all_source_calls,
+ DW_AT_call_all_tail_calls});
+ if (!CallAttr) {
+ error() << "Subprogram with call site entry has no DW_AT_call attribute:";
+ Curr.dump(OS);
+ Die.dump(OS, /*indent*/ 1);
+ return 1;
+ }
+
+ return 0;
}
unsigned DWARFVerifier::verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev) {
@@ -252,20 +297,18 @@ bool DWARFVerifier::handleDebugAbbrev() {
return NumErrors == 0;
}
-bool DWARFVerifier::handleDebugInfo() {
- OS << "Verifying .debug_info Unit Header Chain...\n";
-
+unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
+ DWARFSectionKind SectionKind) {
const DWARFObject &DObj = DCtx.getDWARFObj();
- DWARFDataExtractor DebugInfoData(DObj, DObj.getInfoSection(),
- DCtx.isLittleEndian(), 0);
- uint32_t NumDebugInfoErrors = 0;
+ DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
+ unsigned NumDebugInfoErrors = 0;
uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
uint8_t UnitType = 0;
bool isUnitDWARF64 = false;
bool isHeaderChainValid = true;
bool hasDIE = DebugInfoData.isValidOffset(Offset);
- DWARFUnitSection<DWARFTypeUnit> TUSection{};
- DWARFUnitSection<DWARFCompileUnit> CUSection{};
+ DWARFUnitVector TypeUnitVector;
+ DWARFUnitVector CompileUnitVector;
while (hasDIE) {
OffsetStart = Offset;
if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
@@ -275,46 +318,64 @@ bool DWARFVerifier::handleDebugInfo() {
break;
} else {
DWARFUnitHeader Header;
- Header.extract(DCtx, DebugInfoData, &OffsetStart);
- std::unique_ptr<DWARFUnit> Unit;
+ Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
+ DWARFUnit *Unit;
switch (UnitType) {
case dwarf::DW_UT_type:
case dwarf::DW_UT_split_type: {
- Unit.reset(new DWARFTypeUnit(
- DCtx, DObj.getInfoSection(), Header, DCtx.getDebugAbbrev(),
- &DObj.getRangeSection(), DObj.getStringSection(),
+ Unit = TypeUnitVector.addUnit(llvm::make_unique<DWARFTypeUnit>(
+ DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(),
+ &DObj.getLocSection(), DObj.getStringSection(),
DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(),
- DObj.getLineSection(), DCtx.isLittleEndian(), false, TUSection));
+ DObj.getLineSection(), DCtx.isLittleEndian(), false,
+ TypeUnitVector));
break;
}
case dwarf::DW_UT_skeleton:
case dwarf::DW_UT_split_compile:
case dwarf::DW_UT_compile:
case dwarf::DW_UT_partial:
- // UnitType = 0 means that we are
- // verifying a compile unit in DWARF v4.
+ // UnitType = 0 means that we are verifying a compile unit in DWARF v4.
case 0: {
- Unit.reset(new DWARFCompileUnit(
- DCtx, DObj.getInfoSection(), Header, DCtx.getDebugAbbrev(),
- &DObj.getRangeSection(), DObj.getStringSection(),
+ Unit = CompileUnitVector.addUnit(llvm::make_unique<DWARFCompileUnit>(
+ DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(),
+ &DObj.getLocSection(), DObj.getStringSection(),
DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(),
- DObj.getLineSection(), DCtx.isLittleEndian(), false, CUSection));
+ DObj.getLineSection(), DCtx.isLittleEndian(), false,
+ CompileUnitVector));
break;
}
default: { llvm_unreachable("Invalid UnitType."); }
}
- if (!verifyUnitContents(*Unit, UnitType))
- ++NumDebugInfoErrors;
+ NumDebugInfoErrors += verifyUnitContents(*Unit);
}
hasDIE = DebugInfoData.isValidOffset(Offset);
++UnitIdx;
}
if (UnitIdx == 0 && !hasDIE) {
- warn() << ".debug_info is empty.\n";
+ warn() << "Section is empty.\n";
isHeaderChainValid = true;
}
+ if (!isHeaderChainValid)
+ ++NumDebugInfoErrors;
NumDebugInfoErrors += verifyDebugInfoReferences();
- return (isHeaderChainValid && NumDebugInfoErrors == 0);
+ return NumDebugInfoErrors;
+}
+
+bool DWARFVerifier::handleDebugInfo() {
+ const DWARFObject &DObj = DCtx.getDWARFObj();
+ unsigned NumErrors = 0;
+
+ OS << "Verifying .debug_info Unit Header Chain...\n";
+ DObj.forEachInfoSections([&](const DWARFSection &S) {
+ NumErrors += verifyUnitSection(S, DW_SECT_INFO);
+ });
+
+ OS << "Verifying .debug_types Unit Header Chain...\n";
+ DObj.forEachTypesSections([&](const DWARFSection &S) {
+ NumErrors += verifyUnitSection(S, DW_SECT_TYPES);
+ });
+ return NumErrors == 0;
}
unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
@@ -336,20 +397,42 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
// Build RI for this DIE and check that ranges within this DIE do not
// overlap.
DieRangeInfo RI(Die);
- for (auto Range : Ranges) {
- if (!Range.valid()) {
- ++NumErrors;
- error() << "Invalid address range " << Range << "\n";
- continue;
- }
- // Verify that ranges don't intersect.
- const auto IntersectingRange = RI.insert(Range);
- if (IntersectingRange != RI.Ranges.end()) {
- ++NumErrors;
- error() << "DIE has overlapping address ranges: " << Range << " and "
- << *IntersectingRange << "\n";
- break;
+ // TODO support object files better
+ //
+ // Some object file formats (i.e. non-MachO) support COMDAT. ELF in
+ // particular does so by placing each function into a section. The DWARF data
+ // for the function at that point uses a section relative DW_FORM_addrp for
+ // the DW_AT_low_pc and a DW_FORM_data4 for the offset as the DW_AT_high_pc.
+ // In such a case, when the Die is the CU, the ranges will overlap, and we
+ // will flag valid conflicting ranges as invalid.
+ //
+ // For such targets, we should read the ranges from the CU and partition them
+ // by the section id. The ranges within a particular section should be
+ // disjoint, although the ranges across sections may overlap. We would map
+ // the child die to the entity that it references and the section with which
+ // it is associated. The child would then be checked against the range
+ // information for the associated section.
+ //
+ // For now, simply elide the range verification for the CU DIEs if we are
+ // processing an object file.
+
+ if (!IsObjectFile || IsMachOObject || Die.getTag() != DW_TAG_compile_unit) {
+ for (auto Range : Ranges) {
+ if (!Range.valid()) {
+ ++NumErrors;
+ error() << "Invalid address range " << Range << "\n";
+ continue;
+ }
+
+ // Verify that ranges don't intersect.
+ const auto IntersectingRange = RI.insert(Range);
+ if (IntersectingRange != RI.Ranges.end()) {
+ ++NumErrors;
+ error() << "DIE has overlapping address ranges: " << Range << " and "
+ << *IntersectingRange << "\n";
+ break;
+ }
}
}
@@ -358,9 +441,8 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
if (IntersectingChild != ParentRI.Children.end()) {
++NumErrors;
error() << "DIEs have overlapping address ranges:";
- Die.dump(OS, 0);
- IntersectingChild->Die.dump(OS, 0);
- OS << "\n";
+ dump(Die);
+ dump(IntersectingChild->Die) << '\n';
}
// Verify that ranges are contained within their parent.
@@ -370,9 +452,8 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
if (ShouldBeContained && !ParentRI.contains(RI)) {
++NumErrors;
error() << "DIE address ranges are not contained in its parent's ranges:";
- ParentRI.Die.dump(OS, 0);
- Die.dump(OS, 2);
- OS << "\n";
+ dump(ParentRI.Die);
+ dump(Die, 2) << '\n';
}
// Recursively check children.
@@ -388,8 +469,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
auto ReportError = [&](const Twine &TitleMsg) {
++NumErrors;
error() << TitleMsg << '\n';
- Die.dump(OS, 0, DumpOpts);
- OS << "\n";
+ dump(Die) << '\n';
};
const DWARFObject &DObj = DCtx.getDWARFObj();
@@ -438,7 +518,33 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
}
break;
}
-
+ case DW_AT_specification:
+ case DW_AT_abstract_origin: {
+ if (auto ReferencedDie = Die.getAttributeValueAsReferencedDie(Attr)) {
+ auto DieTag = Die.getTag();
+ auto RefTag = ReferencedDie.getTag();
+ if (DieTag == RefTag)
+ break;
+ if (DieTag == DW_TAG_inlined_subroutine && RefTag == DW_TAG_subprogram)
+ break;
+ if (DieTag == DW_TAG_variable && RefTag == DW_TAG_member)
+ break;
+ ReportError("DIE with tag " + TagString(DieTag) + " has " +
+ AttributeString(Attr) +
+ " that points to DIE with "
+ "incompatible tag " +
+ TagString(RefTag));
+ }
+ break;
+ }
+ case DW_AT_type: {
+ DWARFDie TypeDie = Die.getAttributeValueAsReferencedDie(DW_AT_type);
+ if (TypeDie && !isType(TypeDie.getTag())) {
+ ReportError("DIE has " + AttributeString(Attr) +
+ " with incompatible tag " + TagString(TypeDie.getTag()));
+ }
+ break;
+ }
default:
break;
}
@@ -448,6 +554,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
DWARFAttribute &AttrValue) {
const DWARFObject &DObj = DCtx.getDWARFObj();
+ auto DieCU = Die.getDwarfUnit();
unsigned NumErrors = 0;
const auto Form = AttrValue.Value.getForm();
switch (Form) {
@@ -460,7 +567,6 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
assert(RefVal);
if (RefVal) {
- auto DieCU = Die.getDwarfUnit();
auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
auto CUOffset = AttrValue.Value.getRawUValue();
if (CUOffset >= CUSize) {
@@ -470,7 +576,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
<< " is invalid (must be less than CU size of "
<< format("0x%08" PRIx32, CUSize) << "):\n";
Die.dump(OS, 0, DumpOpts);
- OS << "\n";
+ dump(Die) << '\n';
} else {
// Valid reference, but we will verify it points to an actual
// DIE later.
@@ -485,12 +591,11 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
assert(RefVal);
if (RefVal) {
- if (*RefVal >= DObj.getInfoSection().Data.size()) {
+ if (*RefVal >= DieCU->getInfoSection().Data.size()) {
++NumErrors;
error() << "DW_FORM_ref_addr offset beyond .debug_info "
"bounds:\n";
- Die.dump(OS, 0, DumpOpts);
- OS << "\n";
+ dump(Die) << '\n';
} else {
// Valid reference, but we will verify it points to an actual
// DIE later.
@@ -505,8 +610,46 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
if (SecOffset && *SecOffset >= DObj.getStringSection().size()) {
++NumErrors;
error() << "DW_FORM_strp offset beyond .debug_str bounds:\n";
- Die.dump(OS, 0, DumpOpts);
- OS << "\n";
+ dump(Die) << '\n';
+ }
+ break;
+ }
+ case DW_FORM_strx:
+ case DW_FORM_strx1:
+ case DW_FORM_strx2:
+ case DW_FORM_strx3:
+ case DW_FORM_strx4: {
+ auto Index = AttrValue.Value.getRawUValue();
+ auto DieCU = Die.getDwarfUnit();
+ // Check that we have a valid DWARF v5 string offsets table.
+ if (!DieCU->getStringOffsetsTableContribution()) {
+ ++NumErrors;
+ error() << FormEncodingString(Form)
+ << " used without a valid string offsets table:\n";
+ dump(Die) << '\n';
+ break;
+ }
+ // Check that the index is within the bounds of the section.
+ unsigned ItemSize = DieCU->getDwarfStringOffsetsByteSize();
+ // Use a 64-bit type to calculate the offset to guard against overflow.
+ uint64_t Offset =
+ (uint64_t)DieCU->getStringOffsetsBase() + Index * ItemSize;
+ if (DObj.getStringOffsetSection().Data.size() < Offset + ItemSize) {
+ ++NumErrors;
+ error() << FormEncodingString(Form) << " uses index "
+ << format("%" PRIu64, Index) << ", which is too large:\n";
+ dump(Die) << '\n';
+ break;
+ }
+ // Check that the string offset is valid.
+ uint64_t StringOffset = *DieCU->getStringOffsetSectionItem(Index);
+ if (StringOffset >= DObj.getStringSection().size()) {
+ ++NumErrors;
+ error() << FormEncodingString(Form) << " uses index "
+ << format("%" PRIu64, Index)
+ << ", but the referenced string"
+ " offset is beyond .debug_str bounds:\n";
+ dump(Die) << '\n';
}
break;
}
@@ -528,11 +671,8 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() {
++NumErrors;
error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
<< ". Offset is in between DIEs:\n";
- for (auto Offset : Pair.second) {
- auto ReferencingDie = DCtx.getDIEForOffset(Offset);
- ReferencingDie.dump(OS, 0, DumpOpts);
- OS << "\n";
- }
+ for (auto Offset : Pair.second)
+ dump(DCtx.getDIEForOffset(Offset)) << '\n';
OS << "\n";
}
return NumErrors;
@@ -555,8 +695,7 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() {
++NumDebugLineErrors;
error() << ".debug_line[" << format("0x%08" PRIx32, LineTableOffset)
<< "] was not able to be parsed for CU:\n";
- Die.dump(OS, 0, DumpOpts);
- OS << '\n';
+ dump(Die) << '\n';
continue;
}
} else {
@@ -573,9 +712,8 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() {
<< format("0x%08" PRIx32, Iter->second.getOffset()) << " and "
<< format("0x%08" PRIx32, Die.getOffset())
<< ", have the same DW_AT_stmt_list section offset:\n";
- Iter->second.dump(OS, 0, DumpOpts);
- Die.dump(OS, 0, DumpOpts);
- OS << '\n';
+ dump(Iter->second);
+ dump(Die) << '\n';
// Already verified this line table before, no need to do it again.
continue;
}
@@ -671,6 +809,16 @@ void DWARFVerifier::verifyDebugLineRows() {
}
}
+DWARFVerifier::DWARFVerifier(raw_ostream &S, DWARFContext &D,
+ DIDumpOptions DumpOpts)
+ : OS(S), DCtx(D), DumpOpts(std::move(DumpOpts)), IsObjectFile(false),
+ IsMachOObject(false) {
+ if (const auto *F = DCtx.getDWARFObj().getFile()) {
+ IsObjectFile = F->isRelocatableObject();
+ IsMachOObject = F->isMachO();
+ }
+}
+
bool DWARFVerifier::handleDebugLine() {
NumDebugLineErrors = 0;
OS << "Verifying .debug_line...\n";
@@ -816,8 +964,8 @@ DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
if (Iter->second != NotIndexed) {
error() << formatv("Name Index @ {0:x} references a CU @ {1:x}, but "
- "this CU is already indexed by Name Index @ {2:x}\n",
- NI.getUnitOffset(), Offset, Iter->second);
+ "this CU is already indexed by Name Index @ {2:x}\n",
+ NI.getUnitOffset(), Offset, Iter->second);
continue;
}
Iter->second = NI.getUnitOffset();
@@ -1048,16 +1196,19 @@ DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
return NumErrors;
}
-static SmallVector<StringRef, 2> getNames(const DWARFDie &DIE) {
+static SmallVector<StringRef, 2> getNames(const DWARFDie &DIE,
+ bool IncludeLinkageName = true) {
SmallVector<StringRef, 2> Result;
if (const char *Str = DIE.getName(DINameKind::ShortName))
Result.emplace_back(Str);
else if (DIE.getTag() == dwarf::DW_TAG_namespace)
Result.emplace_back("(anonymous namespace)");
- if (const char *Str = DIE.getName(DINameKind::LinkageName)) {
- if (Result.empty() || Result[0] != Str)
- Result.emplace_back(Str);
+ if (IncludeLinkageName) {
+ if (const char *Str = DIE.getName(DINameKind::LinkageName)) {
+ if (Result.empty() || Result[0] != Str)
+ Result.emplace_back(Str);
+ }
}
return Result;
@@ -1200,7 +1351,9 @@ unsigned DWARFVerifier::verifyNameIndexCompleteness(
// "If a subprogram or inlined subroutine is included, and has a
// DW_AT_linkage_name attribute, there will be an additional index entry for
// the linkage name."
- auto EntryNames = getNames(Die);
+ auto IncludeLinkageName = Die.getTag() == DW_TAG_subprogram ||
+ Die.getTag() == DW_TAG_inlined_subroutine;
+ auto EntryNames = getNames(Die, IncludeLinkageName);
if (EntryNames.empty())
return 0;
@@ -1212,8 +1365,9 @@ unsigned DWARFVerifier::verifyNameIndexCompleteness(
// make sure we catch any missing items, we instead blacklist all TAGs that we
// know shouldn't be indexed.
switch (Die.getTag()) {
- // Compile unit has a name but it shouldn't be indexed.
+ // Compile units and modules have names but shouldn't be indexed.
case DW_TAG_compile_unit:
+ case DW_TAG_module:
return 0;
// Function and template parameters are not globally visible, so we shouldn't
@@ -1315,11 +1469,12 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
if (NumErrors > 0)
return NumErrors;
- for (const std::unique_ptr<DWARFCompileUnit> &CU : DCtx.compile_units()) {
+ for (const std::unique_ptr<DWARFUnit> &U : DCtx.compile_units()) {
if (const DWARFDebugNames::NameIndex *NI =
- AccelTable.getCUNameIndex(CU->getOffset())) {
+ AccelTable.getCUNameIndex(U->getOffset())) {
+ auto *CU = cast<DWARFCompileUnit>(U.get());
for (const DWARFDebugInfoEntry &Die : CU->dies())
- NumErrors += verifyNameIndexCompleteness(DWARFDie(CU.get(), &Die), *NI);
+ NumErrors += verifyNameIndexCompleteness(DWARFDie(CU, &Die), *NI);
}
}
return NumErrors;
@@ -1330,17 +1485,17 @@ bool DWARFVerifier::handleAccelTables() {
DataExtractor StrData(D.getStringSection(), DCtx.isLittleEndian(), 0);
unsigned NumErrors = 0;
if (!D.getAppleNamesSection().Data.empty())
- NumErrors +=
- verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData, ".apple_names");
+ NumErrors += verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData,
+ ".apple_names");
if (!D.getAppleTypesSection().Data.empty())
- NumErrors +=
- verifyAppleAccelTable(&D.getAppleTypesSection(), &StrData, ".apple_types");
+ NumErrors += verifyAppleAccelTable(&D.getAppleTypesSection(), &StrData,
+ ".apple_types");
if (!D.getAppleNamespacesSection().Data.empty())
NumErrors += verifyAppleAccelTable(&D.getAppleNamespacesSection(), &StrData,
- ".apple_namespaces");
+ ".apple_namespaces");
if (!D.getAppleObjCSection().Data.empty())
- NumErrors +=
- verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData, ".apple_objc");
+ NumErrors += verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData,
+ ".apple_objc");
if (!D.getDebugNamesSection().Data.empty())
NumErrors += verifyDebugNames(D.getDebugNamesSection(), StrData);
@@ -1352,3 +1507,8 @@ raw_ostream &DWARFVerifier::error() const { return WithColor::error(OS); }
raw_ostream &DWARFVerifier::warn() const { return WithColor::warning(OS); }
raw_ostream &DWARFVerifier::note() const { return WithColor::note(OS); }
+
+raw_ostream &DWARFVerifier::dump(const DWARFDie &Die, unsigned indent) const {
+ Die.dump(OS, indent, DumpOpts);
+ return OS;
+}
diff --git a/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp b/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp
index 1b8294e47e75..bfac6bebba3f 100644
--- a/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp
+++ b/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp
@@ -14,14 +14,12 @@
using namespace llvm;
using namespace llvm::msf;
-namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
class MSFErrorCategory : public std::error_category {
public:
const char *name() const noexcept override { return "llvm.msf"; }
-
std::string message(int Condition) const override {
switch (static_cast<msf_error_code>(Condition)) {
case msf_error_code::unspecified:
@@ -41,30 +39,8 @@ public:
llvm_unreachable("Unrecognized msf_error_code");
}
};
-} // end anonymous namespace
-
-static ManagedStatic<MSFErrorCategory> Category;
-
-char MSFError::ID = 0;
-
-MSFError::MSFError(msf_error_code C) : MSFError(C, "") {}
-
-MSFError::MSFError(const std::string &Context)
- : MSFError(msf_error_code::unspecified, Context) {}
-
-MSFError::MSFError(msf_error_code C, const std::string &Context) : Code(C) {
- ErrMsg = "MSF Error: ";
- std::error_code EC = convertToErrorCode();
- if (Code != msf_error_code::unspecified)
- ErrMsg += EC.message() + " ";
- if (!Context.empty())
- ErrMsg += Context;
-}
-
-void MSFError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
-const std::string &MSFError::getErrorMessage() const { return ErrMsg; }
+static llvm::ManagedStatic<MSFErrorCategory> MSFCategory;
+const std::error_category &llvm::msf::MSFErrCategory() { return *MSFCategory; }
-std::error_code MSFError::convertToErrorCode() const {
- return std::error_code(static_cast<int>(Code), *Category);
-}
+char MSFError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
index 7eabed8cad48..6a10513fad97 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
@@ -8,8 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/ConvertUTF.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -23,16 +22,7 @@ uint32_t DIADataStream::getRecordCount() const {
}
std::string DIADataStream::getName() const {
- CComBSTR Name16;
- if (S_OK != StreamData->get_name(&Name16))
- return std::string();
-
- std::string Name8;
- llvm::ArrayRef<char> Name16Bytes(reinterpret_cast<char *>(Name16.m_str),
- Name16.ByteLength());
- if (!llvm::convertUTF16ToUTF8String(Name16Bytes, Name8))
- return std::string();
- return Name8;
+ return invokeBstrMethod(*StreamData, &IDiaEnumDebugStreamData::get_name);
}
llvm::Optional<DIADataStream::RecordType>
@@ -65,11 +55,3 @@ bool DIADataStream::getNext(RecordType &Record) {
}
void DIADataStream::reset() { StreamData->Reset(); }
-
-DIADataStream *DIADataStream::clone() const {
- CComPtr<IDiaEnumDebugStreamData> EnumeratorClone;
- if (S_OK != StreamData->Clone(&EnumeratorClone))
- return nullptr;
-
- return new DIADataStream(EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
index f62c4991fe33..d2451f13e6cb 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
@@ -45,10 +45,3 @@ std::unique_ptr<IPDBDataStream> DIAEnumDebugStreams::getNext() {
}
void DIAEnumDebugStreams::reset() { Enumerator->Reset(); }
-
-DIAEnumDebugStreams *DIAEnumDebugStreams::clone() const {
- CComPtr<IDiaEnumDebugStreams> EnumeratorClone;
- if (S_OK != Enumerator->Clone(&EnumeratorClone))
- return nullptr;
- return new DIAEnumDebugStreams(EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
new file mode 100644
index 000000000000..f873f3525df5
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
@@ -0,0 +1,42 @@
+//==- DIAEnumFrameData.cpp ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAFrameData.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+
+using namespace llvm::pdb;
+
+DIAEnumFrameData::DIAEnumFrameData(CComPtr<IDiaEnumFrameData> DiaEnumerator)
+ : Enumerator(DiaEnumerator) {}
+
+uint32_t DIAEnumFrameData::getChildCount() const {
+ LONG Count = 0;
+ return (S_OK == Enumerator->get_Count(&Count)) ? Count : 0;
+}
+
+std::unique_ptr<IPDBFrameData>
+DIAEnumFrameData::getChildAtIndex(uint32_t Index) const {
+ CComPtr<IDiaFrameData> Item;
+ if (S_OK != Enumerator->Item(Index, &Item))
+ return nullptr;
+
+ return std::unique_ptr<IPDBFrameData>(new DIAFrameData(Item));
+}
+
+std::unique_ptr<IPDBFrameData> DIAEnumFrameData::getNext() {
+ CComPtr<IDiaFrameData> Item;
+ ULONG NumFetched = 0;
+ if (S_OK != Enumerator->Next(1, &Item, &NumFetched))
+ return nullptr;
+
+ return std::unique_ptr<IPDBFrameData>(new DIAFrameData(Item));
+}
+
+void DIAEnumFrameData::reset() { Enumerator->Reset(); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
index d7c908e04593..6c361b81e33d 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
@@ -15,9 +15,8 @@ using namespace llvm;
using namespace llvm::pdb;
DIAEnumInjectedSources::DIAEnumInjectedSources(
- const DIASession &PDBSession,
CComPtr<IDiaEnumInjectedSources> DiaEnumerator)
- : Session(PDBSession), Enumerator(DiaEnumerator) {}
+ : Enumerator(DiaEnumerator) {}
uint32_t DIAEnumInjectedSources::getChildCount() const {
LONG Count = 0;
@@ -43,10 +42,3 @@ std::unique_ptr<IPDBInjectedSource> DIAEnumInjectedSources::getNext() {
}
void DIAEnumInjectedSources::reset() { Enumerator->Reset(); }
-
-DIAEnumInjectedSources *DIAEnumInjectedSources::clone() const {
- CComPtr<IDiaEnumInjectedSources> EnumeratorClone;
- if (S_OK != Enumerator->Clone(&EnumeratorClone))
- return nullptr;
- return new DIAEnumInjectedSources(Session, EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
index 796ce214b383..0820d9dc7c9f 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
@@ -42,10 +42,3 @@ std::unique_ptr<IPDBLineNumber> DIAEnumLineNumbers::getNext() {
}
void DIAEnumLineNumbers::reset() { Enumerator->Reset(); }
-
-DIAEnumLineNumbers *DIAEnumLineNumbers::clone() const {
- CComPtr<IDiaEnumLineNumbers> EnumeratorClone;
- if (S_OK != Enumerator->Clone(&EnumeratorClone))
- return nullptr;
- return new DIAEnumLineNumbers(EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
index 1f405f049198..90c857aa5713 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
@@ -45,10 +45,3 @@ std::unique_ptr<IPDBSectionContrib> DIAEnumSectionContribs::getNext() {
}
void DIAEnumSectionContribs::reset() { Enumerator->Reset(); }
-
-DIAEnumSectionContribs *DIAEnumSectionContribs::clone() const {
- CComPtr<IDiaEnumSectionContribs> EnumeratorClone;
- if (S_OK != Enumerator->Clone(&EnumeratorClone))
- return nullptr;
- return new DIAEnumSectionContribs(Session, EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
index b9311d060128..06595e7ec1c8 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
@@ -42,10 +42,3 @@ std::unique_ptr<IPDBSourceFile> DIAEnumSourceFiles::getNext() {
}
void DIAEnumSourceFiles::reset() { Enumerator->Reset(); }
-
-DIAEnumSourceFiles *DIAEnumSourceFiles::clone() const {
- CComPtr<IDiaEnumSourceFiles> EnumeratorClone;
- if (S_OK != Enumerator->Clone(&EnumeratorClone))
- return nullptr;
- return new DIAEnumSourceFiles(Session, EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
index 266638530c2f..48bc32767e6c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
@@ -46,10 +46,3 @@ std::unique_ptr<PDBSymbol> DIAEnumSymbols::getNext() {
}
void DIAEnumSymbols::reset() { Enumerator->Reset(); }
-
-DIAEnumSymbols *DIAEnumSymbols::clone() const {
- CComPtr<IDiaEnumSymbols> EnumeratorClone;
- if (S_OK != Enumerator->Clone(&EnumeratorClone))
- return nullptr;
- return new DIAEnumSymbols(Session, EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
index 511b55585ebd..6fa096156d48 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
@@ -13,9 +13,8 @@
using namespace llvm;
using namespace llvm::pdb;
-DIAEnumTables::DIAEnumTables(
- CComPtr<IDiaEnumTables> DiaEnumerator)
- : Enumerator(DiaEnumerator) {}
+DIAEnumTables::DIAEnumTables(CComPtr<IDiaEnumTables> DiaEnumerator)
+ : Enumerator(DiaEnumerator) {}
uint32_t DIAEnumTables::getChildCount() const {
LONG Count = 0;
@@ -44,10 +43,3 @@ std::unique_ptr<IPDBTable> DIAEnumTables::getNext() {
}
void DIAEnumTables::reset() { Enumerator->Reset(); }
-
-DIAEnumTables *DIAEnumTables::clone() const {
- CComPtr<IDiaEnumTables> EnumeratorClone;
- if (S_OK != Enumerator->Clone(&EnumeratorClone))
- return nullptr;
- return new DIAEnumTables(EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
index 0da877b0fbad..819651f77787 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
@@ -11,14 +11,13 @@ using namespace llvm::pdb;
class DIAErrorCategory : public std::error_category {
public:
const char *name() const noexcept override { return "llvm.pdb.dia"; }
-
std::string message(int Condition) const override {
switch (static_cast<dia_error_code>(Condition)) {
case dia_error_code::could_not_create_impl:
- return "Failed to connect to DIA at runtime. Verify that Visual Studio "
+ return "Failed to connect to DIA at runtime. Verify that Visual Studio "
"is properly installed, or that msdiaXX.dll is in your PATH.";
case dia_error_code::invalid_file_format:
- return "Unable to load PDB. The file has an unrecognized format.";
+ return "Unable to load PDB. The file has an unrecognized format.";
case dia_error_code::invalid_parameter:
return "The parameter is incorrect.";
case dia_error_code::already_loaded:
@@ -32,27 +31,7 @@ public:
}
};
-static ManagedStatic<DIAErrorCategory> Category;
-
-char DIAError::ID = 0;
-
-DIAError::DIAError(dia_error_code C) : DIAError(C, "") {}
-
-DIAError::DIAError(StringRef Context)
- : DIAError(dia_error_code::unspecified, Context) {}
-
-DIAError::DIAError(dia_error_code C, StringRef Context) : Code(C) {
- ErrMsg = "DIA Error: ";
- std::error_code EC = convertToErrorCode();
- ErrMsg += EC.message() + " ";
- if (!Context.empty())
- ErrMsg += Context;
-}
-
-void DIAError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
-
-StringRef DIAError::getErrorMessage() const { return ErrMsg; }
+static llvm::ManagedStatic<DIAErrorCategory> DIACategory;
+const std::error_category &llvm::pdb::DIAErrCategory() { return *DIACategory; }
-std::error_code DIAError::convertToErrorCode() const {
- return std::error_code(static_cast<int>(Code), *Category);
-}
+char DIAError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
new file mode 100644
index 000000000000..533cce7923c0
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
@@ -0,0 +1,53 @@
+//===- DIAFrameData.cpp - DIA impl. of IPDBFrameData -------------- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/DIA/DIAFrameData.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
+
+using namespace llvm::pdb;
+
+DIAFrameData::DIAFrameData(CComPtr<IDiaFrameData> DiaFrameData)
+ : FrameData(DiaFrameData) {}
+
+template <typename ArgType>
+ArgType
+PrivateGetDIAValue(IDiaFrameData *FrameData,
+ HRESULT (__stdcall IDiaFrameData::*Method)(ArgType *)) {
+ ArgType Value;
+ if (S_OK == (FrameData->*Method)(&Value))
+ return static_cast<ArgType>(Value);
+
+ return ArgType();
+}
+
+uint32_t DIAFrameData::getAddressOffset() const {
+ return PrivateGetDIAValue(FrameData, &IDiaFrameData::get_addressOffset);
+}
+
+uint32_t DIAFrameData::getAddressSection() const {
+ return PrivateGetDIAValue(FrameData, &IDiaFrameData::get_addressSection);
+}
+
+uint32_t DIAFrameData::getLengthBlock() const {
+ return PrivateGetDIAValue(FrameData, &IDiaFrameData::get_lengthBlock);
+}
+
+std::string DIAFrameData::getProgram() const {
+ return invokeBstrMethod(*FrameData, &IDiaFrameData::get_program);
+}
+
+uint32_t DIAFrameData::getRelativeVirtualAddress() const {
+ return PrivateGetDIAValue(FrameData,
+ &IDiaFrameData::get_relativeVirtualAddress);
+}
+
+uint64_t DIAFrameData::getVirtualAddress() const {
+ return PrivateGetDIAValue(FrameData, &IDiaFrameData::get_virtualAddress);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index 7d6cb254e1d1..cd4d00a13b18 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -15,6 +15,7 @@
#include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
#include "llvm/DebugInfo/PDB/DIA/DIALineNumber.h"
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
@@ -115,16 +116,7 @@ RetType PrivateGetDIAValue(IDiaSymbol *Symbol,
std::string
PrivateGetDIAValue(IDiaSymbol *Symbol,
HRESULT (__stdcall IDiaSymbol::*Method)(BSTR *)) {
- CComBSTR Result16;
- if (S_OK != (Symbol->*Method)(&Result16))
- return std::string();
-
- const char *SrcBytes = reinterpret_cast<const char *>(Result16.m_str);
- llvm::ArrayRef<char> SrcByteArray(SrcBytes, Result16.ByteLength());
- std::string Result8;
- if (!llvm::convertUTF16ToUTF8String(SrcByteArray, Result8))
- return std::string();
- return Result8;
+ return invokeBstrMethod(*Symbol, Method);
}
codeview::GUID
@@ -141,16 +133,33 @@ PrivateGetDIAValue(IDiaSymbol *Symbol,
return IdResult;
}
+template <typename PrintType, typename ArgType>
+void DumpDIAValueAs(llvm::raw_ostream &OS, int Indent, StringRef Name,
+ IDiaSymbol *Symbol,
+ HRESULT (__stdcall IDiaSymbol::*Method)(ArgType *)) {
+ ArgType Value;
+ if (S_OK == (Symbol->*Method)(&Value))
+ dumpSymbolField(OS, Name, static_cast<PrintType>(Value), Indent);
+}
+
+void DumpDIAIdValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
+ IDiaSymbol *Symbol,
+ HRESULT (__stdcall IDiaSymbol::*Method)(DWORD *),
+ const IPDBSession &Session, PdbSymbolIdField FieldId,
+ PdbSymbolIdField ShowFlags, PdbSymbolIdField RecurseFlags) {
+ DWORD Value;
+ if (S_OK == (Symbol->*Method)(&Value))
+ dumpSymbolIdField(OS, Name, Value, Indent, Session, FieldId, ShowFlags,
+ RecurseFlags);
+}
+
template <typename ArgType>
void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
IDiaSymbol *Symbol,
HRESULT (__stdcall IDiaSymbol::*Method)(ArgType *)) {
ArgType Value;
- if (S_OK == (Symbol->*Method)(&Value)) {
- OS << "\n";
- OS.indent(Indent);
- OS << Name << ": " << Value;
- }
+ if (S_OK == (Symbol->*Method)(&Value))
+ dumpSymbolField(OS, Name, Value, Indent);
}
void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
@@ -162,11 +171,8 @@ void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
const char *Bytes = reinterpret_cast<const char *>(Value);
ArrayRef<char> ByteArray(Bytes, ::SysStringByteLen(Value));
std::string Result;
- if (llvm::convertUTF16ToUTF8String(ByteArray, Result)) {
- OS << "\n";
- OS.indent(Indent);
- OS << Name << ": " << Result;
- }
+ if (llvm::convertUTF16ToUTF8String(ByteArray, Result))
+ dumpSymbolField(OS, Name, Result, Indent);
::SysFreeString(Value);
}
@@ -177,12 +183,11 @@ void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
Value.vt = VT_EMPTY;
if (S_OK != (Symbol->*Method)(&Value))
return;
- OS << "\n";
- OS.indent(Indent);
Variant V = VariantFromVARIANT(Value);
- OS << Name << ": " << V;
-}
+
+ dumpSymbolField(OS, Name, V, Indent);
}
+} // namespace
namespace llvm {
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GUID &G) {
@@ -191,182 +196,203 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GUID &G) {
A.format(OS, "");
return OS;
}
-}
+} // namespace llvm
DIARawSymbol::DIARawSymbol(const DIASession &PDBSession,
CComPtr<IDiaSymbol> DiaSymbol)
: Session(PDBSession), Symbol(DiaSymbol) {}
+#define RAW_ID_METHOD_DUMP(Stream, Method, Session, FieldId, ShowFlags, \
+ RecurseFlags) \
+ DumpDIAIdValue(Stream, Indent, StringRef{#Method}, Symbol, \
+ &IDiaSymbol::get_##Method, Session, FieldId, ShowFlags, \
+ RecurseFlags);
+
#define RAW_METHOD_DUMP(Stream, Method) \
- DumpDIAValue(Stream, Indent, StringRef(#Method), Symbol, &IDiaSymbol::Method);
-
-void DIARawSymbol::dump(raw_ostream &OS, int Indent) const {
- RAW_METHOD_DUMP(OS, get_access)
- RAW_METHOD_DUMP(OS, get_addressOffset)
- RAW_METHOD_DUMP(OS, get_addressSection)
- RAW_METHOD_DUMP(OS, get_age)
- RAW_METHOD_DUMP(OS, get_arrayIndexTypeId)
- RAW_METHOD_DUMP(OS, get_backEndMajor)
- RAW_METHOD_DUMP(OS, get_backEndMinor)
- RAW_METHOD_DUMP(OS, get_backEndBuild)
- RAW_METHOD_DUMP(OS, get_backEndQFE)
- RAW_METHOD_DUMP(OS, get_baseDataOffset)
- RAW_METHOD_DUMP(OS, get_baseDataSlot)
- RAW_METHOD_DUMP(OS, get_baseSymbolId)
- RAW_METHOD_DUMP(OS, get_baseType)
- RAW_METHOD_DUMP(OS, get_bitPosition)
- RAW_METHOD_DUMP(OS, get_callingConvention)
- RAW_METHOD_DUMP(OS, get_classParentId)
- RAW_METHOD_DUMP(OS, get_compilerName)
- RAW_METHOD_DUMP(OS, get_count)
- RAW_METHOD_DUMP(OS, get_countLiveRanges)
- RAW_METHOD_DUMP(OS, get_frontEndMajor)
- RAW_METHOD_DUMP(OS, get_frontEndMinor)
- RAW_METHOD_DUMP(OS, get_frontEndBuild)
- RAW_METHOD_DUMP(OS, get_frontEndQFE)
- RAW_METHOD_DUMP(OS, get_lexicalParentId)
- RAW_METHOD_DUMP(OS, get_libraryName)
- RAW_METHOD_DUMP(OS, get_liveRangeStartAddressOffset)
- RAW_METHOD_DUMP(OS, get_liveRangeStartAddressSection)
- RAW_METHOD_DUMP(OS, get_liveRangeStartRelativeVirtualAddress)
- RAW_METHOD_DUMP(OS, get_localBasePointerRegisterId)
- RAW_METHOD_DUMP(OS, get_lowerBoundId)
- RAW_METHOD_DUMP(OS, get_memorySpaceKind)
- RAW_METHOD_DUMP(OS, get_name)
- RAW_METHOD_DUMP(OS, get_numberOfAcceleratorPointerTags)
- RAW_METHOD_DUMP(OS, get_numberOfColumns)
- RAW_METHOD_DUMP(OS, get_numberOfModifiers)
- RAW_METHOD_DUMP(OS, get_numberOfRegisterIndices)
- RAW_METHOD_DUMP(OS, get_numberOfRows)
- RAW_METHOD_DUMP(OS, get_objectFileName)
- RAW_METHOD_DUMP(OS, get_oemId)
- RAW_METHOD_DUMP(OS, get_oemSymbolId)
- RAW_METHOD_DUMP(OS, get_offsetInUdt)
- RAW_METHOD_DUMP(OS, get_platform)
- RAW_METHOD_DUMP(OS, get_rank)
- RAW_METHOD_DUMP(OS, get_registerId)
- RAW_METHOD_DUMP(OS, get_registerType)
- RAW_METHOD_DUMP(OS, get_relativeVirtualAddress)
- RAW_METHOD_DUMP(OS, get_samplerSlot)
- RAW_METHOD_DUMP(OS, get_signature)
- RAW_METHOD_DUMP(OS, get_sizeInUdt)
- RAW_METHOD_DUMP(OS, get_slot)
- RAW_METHOD_DUMP(OS, get_sourceFileName)
- RAW_METHOD_DUMP(OS, get_stride)
- RAW_METHOD_DUMP(OS, get_subTypeId)
- RAW_METHOD_DUMP(OS, get_symbolsFileName)
- RAW_METHOD_DUMP(OS, get_symIndexId)
- RAW_METHOD_DUMP(OS, get_targetOffset)
- RAW_METHOD_DUMP(OS, get_targetRelativeVirtualAddress)
- RAW_METHOD_DUMP(OS, get_targetVirtualAddress)
- RAW_METHOD_DUMP(OS, get_targetSection)
- RAW_METHOD_DUMP(OS, get_textureSlot)
- RAW_METHOD_DUMP(OS, get_timeStamp)
- RAW_METHOD_DUMP(OS, get_token)
- RAW_METHOD_DUMP(OS, get_typeId)
- RAW_METHOD_DUMP(OS, get_uavSlot)
- RAW_METHOD_DUMP(OS, get_undecoratedName)
- RAW_METHOD_DUMP(OS, get_unmodifiedTypeId)
- RAW_METHOD_DUMP(OS, get_upperBoundId)
- RAW_METHOD_DUMP(OS, get_virtualBaseDispIndex)
- RAW_METHOD_DUMP(OS, get_virtualBaseOffset)
- RAW_METHOD_DUMP(OS, get_virtualTableShapeId)
- RAW_METHOD_DUMP(OS, get_dataKind)
- RAW_METHOD_DUMP(OS, get_symTag)
- RAW_METHOD_DUMP(OS, get_guid)
- RAW_METHOD_DUMP(OS, get_offset)
- RAW_METHOD_DUMP(OS, get_thisAdjust)
- RAW_METHOD_DUMP(OS, get_virtualBasePointerOffset)
- RAW_METHOD_DUMP(OS, get_locationType)
- RAW_METHOD_DUMP(OS, get_machineType)
- RAW_METHOD_DUMP(OS, get_thunkOrdinal)
- RAW_METHOD_DUMP(OS, get_length)
- RAW_METHOD_DUMP(OS, get_liveRangeLength)
- RAW_METHOD_DUMP(OS, get_virtualAddress)
- RAW_METHOD_DUMP(OS, get_udtKind)
- RAW_METHOD_DUMP(OS, get_constructor)
- RAW_METHOD_DUMP(OS, get_customCallingConvention)
- RAW_METHOD_DUMP(OS, get_farReturn)
- RAW_METHOD_DUMP(OS, get_code)
- RAW_METHOD_DUMP(OS, get_compilerGenerated)
- RAW_METHOD_DUMP(OS, get_constType)
- RAW_METHOD_DUMP(OS, get_editAndContinueEnabled)
- RAW_METHOD_DUMP(OS, get_function)
- RAW_METHOD_DUMP(OS, get_stride)
- RAW_METHOD_DUMP(OS, get_noStackOrdering)
- RAW_METHOD_DUMP(OS, get_hasAlloca)
- RAW_METHOD_DUMP(OS, get_hasAssignmentOperator)
- RAW_METHOD_DUMP(OS, get_isCTypes)
- RAW_METHOD_DUMP(OS, get_hasCastOperator)
- RAW_METHOD_DUMP(OS, get_hasDebugInfo)
- RAW_METHOD_DUMP(OS, get_hasEH)
- RAW_METHOD_DUMP(OS, get_hasEHa)
- RAW_METHOD_DUMP(OS, get_hasInlAsm)
- RAW_METHOD_DUMP(OS, get_framePointerPresent)
- RAW_METHOD_DUMP(OS, get_inlSpec)
- RAW_METHOD_DUMP(OS, get_interruptReturn)
- RAW_METHOD_DUMP(OS, get_hasLongJump)
- RAW_METHOD_DUMP(OS, get_hasManagedCode)
- RAW_METHOD_DUMP(OS, get_hasNestedTypes)
- RAW_METHOD_DUMP(OS, get_noInline)
- RAW_METHOD_DUMP(OS, get_noReturn)
- RAW_METHOD_DUMP(OS, get_optimizedCodeDebugInfo)
- RAW_METHOD_DUMP(OS, get_overloadedOperator)
- RAW_METHOD_DUMP(OS, get_hasSEH)
- RAW_METHOD_DUMP(OS, get_hasSecurityChecks)
- RAW_METHOD_DUMP(OS, get_hasSetJump)
- RAW_METHOD_DUMP(OS, get_strictGSCheck)
- RAW_METHOD_DUMP(OS, get_isAcceleratorGroupSharedLocal)
- RAW_METHOD_DUMP(OS, get_isAcceleratorPointerTagLiveRange)
- RAW_METHOD_DUMP(OS, get_isAcceleratorStubFunction)
- RAW_METHOD_DUMP(OS, get_isAggregated)
- RAW_METHOD_DUMP(OS, get_intro)
- RAW_METHOD_DUMP(OS, get_isCVTCIL)
- RAW_METHOD_DUMP(OS, get_isConstructorVirtualBase)
- RAW_METHOD_DUMP(OS, get_isCxxReturnUdt)
- RAW_METHOD_DUMP(OS, get_isDataAligned)
- RAW_METHOD_DUMP(OS, get_isHLSLData)
- RAW_METHOD_DUMP(OS, get_isHotpatchable)
- RAW_METHOD_DUMP(OS, get_indirectVirtualBaseClass)
- RAW_METHOD_DUMP(OS, get_isInterfaceUdt)
- RAW_METHOD_DUMP(OS, get_intrinsic)
- RAW_METHOD_DUMP(OS, get_isLTCG)
- RAW_METHOD_DUMP(OS, get_isLocationControlFlowDependent)
- RAW_METHOD_DUMP(OS, get_isMSILNetmodule)
- RAW_METHOD_DUMP(OS, get_isMatrixRowMajor)
- RAW_METHOD_DUMP(OS, get_managed)
- RAW_METHOD_DUMP(OS, get_msil)
- RAW_METHOD_DUMP(OS, get_isMultipleInheritance)
- RAW_METHOD_DUMP(OS, get_isNaked)
- RAW_METHOD_DUMP(OS, get_nested)
- RAW_METHOD_DUMP(OS, get_isOptimizedAway)
- RAW_METHOD_DUMP(OS, get_packed)
- RAW_METHOD_DUMP(OS, get_isPointerBasedOnSymbolValue)
- RAW_METHOD_DUMP(OS, get_isPointerToDataMember)
- RAW_METHOD_DUMP(OS, get_isPointerToMemberFunction)
- RAW_METHOD_DUMP(OS, get_pure)
- RAW_METHOD_DUMP(OS, get_RValueReference)
- RAW_METHOD_DUMP(OS, get_isRefUdt)
- RAW_METHOD_DUMP(OS, get_reference)
- RAW_METHOD_DUMP(OS, get_restrictedType)
- RAW_METHOD_DUMP(OS, get_isReturnValue)
- RAW_METHOD_DUMP(OS, get_isSafeBuffers)
- RAW_METHOD_DUMP(OS, get_scoped)
- RAW_METHOD_DUMP(OS, get_isSdl)
- RAW_METHOD_DUMP(OS, get_isSingleInheritance)
- RAW_METHOD_DUMP(OS, get_isSplitted)
- RAW_METHOD_DUMP(OS, get_isStatic)
- RAW_METHOD_DUMP(OS, get_isStripped)
- RAW_METHOD_DUMP(OS, get_unalignedType)
- RAW_METHOD_DUMP(OS, get_notReached)
- RAW_METHOD_DUMP(OS, get_isValueUdt)
- RAW_METHOD_DUMP(OS, get_virtual)
- RAW_METHOD_DUMP(OS, get_virtualBaseClass)
- RAW_METHOD_DUMP(OS, get_isVirtualInheritance)
- RAW_METHOD_DUMP(OS, get_volatileType)
- RAW_METHOD_DUMP(OS, get_wasInlined)
- RAW_METHOD_DUMP(OS, get_unused)
- RAW_METHOD_DUMP(OS, get_value)
+ DumpDIAValue(Stream, Indent, StringRef{#Method}, Symbol, \
+ &IDiaSymbol::get_##Method);
+
+#define RAW_METHOD_DUMP_AS(Stream, Method, Type) \
+ DumpDIAValueAs<Type>(Stream, Indent, StringRef{#Method}, Symbol, \
+ &IDiaSymbol::get_##Method);
+
+void DIARawSymbol::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ RAW_ID_METHOD_DUMP(OS, symIndexId, Session, PdbSymbolIdField::SymIndexId,
+ ShowIdFields, RecurseIdFields);
+ RAW_METHOD_DUMP_AS(OS, symTag, PDB_SymType);
+
+ RAW_METHOD_DUMP(OS, access);
+ RAW_METHOD_DUMP(OS, addressOffset);
+ RAW_METHOD_DUMP(OS, addressSection);
+ RAW_METHOD_DUMP(OS, age);
+ RAW_METHOD_DUMP(OS, arrayIndexTypeId);
+ RAW_METHOD_DUMP(OS, backEndMajor);
+ RAW_METHOD_DUMP(OS, backEndMinor);
+ RAW_METHOD_DUMP(OS, backEndBuild);
+ RAW_METHOD_DUMP(OS, backEndQFE);
+ RAW_METHOD_DUMP(OS, baseDataOffset);
+ RAW_METHOD_DUMP(OS, baseDataSlot);
+ RAW_METHOD_DUMP(OS, baseSymbolId);
+ RAW_METHOD_DUMP_AS(OS, baseType, PDB_BuiltinType);
+ RAW_METHOD_DUMP(OS, bitPosition);
+ RAW_METHOD_DUMP_AS(OS, callingConvention, PDB_CallingConv);
+ RAW_ID_METHOD_DUMP(OS, classParentId, Session, PdbSymbolIdField::ClassParent,
+ ShowIdFields, RecurseIdFields);
+ RAW_METHOD_DUMP(OS, compilerName);
+ RAW_METHOD_DUMP(OS, count);
+ RAW_METHOD_DUMP(OS, countLiveRanges);
+ RAW_METHOD_DUMP(OS, frontEndMajor);
+ RAW_METHOD_DUMP(OS, frontEndMinor);
+ RAW_METHOD_DUMP(OS, frontEndBuild);
+ RAW_METHOD_DUMP(OS, frontEndQFE);
+ RAW_ID_METHOD_DUMP(OS, lexicalParentId, Session,
+ PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+ RAW_METHOD_DUMP(OS, libraryName);
+ RAW_METHOD_DUMP(OS, liveRangeStartAddressOffset);
+ RAW_METHOD_DUMP(OS, liveRangeStartAddressSection);
+ RAW_METHOD_DUMP(OS, liveRangeStartRelativeVirtualAddress);
+ RAW_METHOD_DUMP(OS, localBasePointerRegisterId);
+ RAW_METHOD_DUMP(OS, lowerBoundId);
+ RAW_METHOD_DUMP(OS, memorySpaceKind);
+ RAW_METHOD_DUMP(OS, name);
+ RAW_METHOD_DUMP(OS, numberOfAcceleratorPointerTags);
+ RAW_METHOD_DUMP(OS, numberOfColumns);
+ RAW_METHOD_DUMP(OS, numberOfModifiers);
+ RAW_METHOD_DUMP(OS, numberOfRegisterIndices);
+ RAW_METHOD_DUMP(OS, numberOfRows);
+ RAW_METHOD_DUMP(OS, objectFileName);
+ RAW_METHOD_DUMP(OS, oemId);
+ RAW_METHOD_DUMP(OS, oemSymbolId);
+ RAW_METHOD_DUMP(OS, offsetInUdt);
+ RAW_METHOD_DUMP(OS, platform);
+ RAW_METHOD_DUMP(OS, rank);
+ RAW_METHOD_DUMP(OS, registerId);
+ RAW_METHOD_DUMP(OS, registerType);
+ RAW_METHOD_DUMP(OS, relativeVirtualAddress);
+ RAW_METHOD_DUMP(OS, samplerSlot);
+ RAW_METHOD_DUMP(OS, signature);
+ RAW_METHOD_DUMP(OS, sizeInUdt);
+ RAW_METHOD_DUMP(OS, slot);
+ RAW_METHOD_DUMP(OS, sourceFileName);
+ RAW_METHOD_DUMP(OS, stride);
+ RAW_METHOD_DUMP(OS, subTypeId);
+ RAW_METHOD_DUMP(OS, symbolsFileName);
+ RAW_METHOD_DUMP(OS, targetOffset);
+ RAW_METHOD_DUMP(OS, targetRelativeVirtualAddress);
+ RAW_METHOD_DUMP(OS, targetVirtualAddress);
+ RAW_METHOD_DUMP(OS, targetSection);
+ RAW_METHOD_DUMP(OS, textureSlot);
+ RAW_METHOD_DUMP(OS, timeStamp);
+ RAW_METHOD_DUMP(OS, token);
+ RAW_ID_METHOD_DUMP(OS, typeId, Session, PdbSymbolIdField::Type, ShowIdFields,
+ RecurseIdFields);
+ RAW_METHOD_DUMP(OS, uavSlot);
+ RAW_METHOD_DUMP(OS, undecoratedName);
+ RAW_ID_METHOD_DUMP(OS, unmodifiedTypeId, Session,
+ PdbSymbolIdField::UnmodifiedType, ShowIdFields,
+ RecurseIdFields);
+ RAW_METHOD_DUMP(OS, upperBoundId);
+ RAW_METHOD_DUMP(OS, virtualBaseDispIndex);
+ RAW_METHOD_DUMP(OS, virtualBaseOffset);
+ RAW_METHOD_DUMP(OS, virtualTableShapeId);
+ RAW_METHOD_DUMP_AS(OS, dataKind, PDB_DataKind);
+ RAW_METHOD_DUMP(OS, guid);
+ RAW_METHOD_DUMP(OS, offset);
+ RAW_METHOD_DUMP(OS, thisAdjust);
+ RAW_METHOD_DUMP(OS, virtualBasePointerOffset);
+ RAW_METHOD_DUMP_AS(OS, locationType, PDB_LocType);
+ RAW_METHOD_DUMP(OS, machineType);
+ RAW_METHOD_DUMP(OS, thunkOrdinal);
+ RAW_METHOD_DUMP(OS, length);
+ RAW_METHOD_DUMP(OS, liveRangeLength);
+ RAW_METHOD_DUMP(OS, virtualAddress);
+ RAW_METHOD_DUMP_AS(OS, udtKind, PDB_UdtType);
+ RAW_METHOD_DUMP(OS, constructor);
+ RAW_METHOD_DUMP(OS, customCallingConvention);
+ RAW_METHOD_DUMP(OS, farReturn);
+ RAW_METHOD_DUMP(OS, code);
+ RAW_METHOD_DUMP(OS, compilerGenerated);
+ RAW_METHOD_DUMP(OS, constType);
+ RAW_METHOD_DUMP(OS, editAndContinueEnabled);
+ RAW_METHOD_DUMP(OS, function);
+ RAW_METHOD_DUMP(OS, stride);
+ RAW_METHOD_DUMP(OS, noStackOrdering);
+ RAW_METHOD_DUMP(OS, hasAlloca);
+ RAW_METHOD_DUMP(OS, hasAssignmentOperator);
+ RAW_METHOD_DUMP(OS, isCTypes);
+ RAW_METHOD_DUMP(OS, hasCastOperator);
+ RAW_METHOD_DUMP(OS, hasDebugInfo);
+ RAW_METHOD_DUMP(OS, hasEH);
+ RAW_METHOD_DUMP(OS, hasEHa);
+ RAW_METHOD_DUMP(OS, hasInlAsm);
+ RAW_METHOD_DUMP(OS, framePointerPresent);
+ RAW_METHOD_DUMP(OS, inlSpec);
+ RAW_METHOD_DUMP(OS, interruptReturn);
+ RAW_METHOD_DUMP(OS, hasLongJump);
+ RAW_METHOD_DUMP(OS, hasManagedCode);
+ RAW_METHOD_DUMP(OS, hasNestedTypes);
+ RAW_METHOD_DUMP(OS, noInline);
+ RAW_METHOD_DUMP(OS, noReturn);
+ RAW_METHOD_DUMP(OS, optimizedCodeDebugInfo);
+ RAW_METHOD_DUMP(OS, overloadedOperator);
+ RAW_METHOD_DUMP(OS, hasSEH);
+ RAW_METHOD_DUMP(OS, hasSecurityChecks);
+ RAW_METHOD_DUMP(OS, hasSetJump);
+ RAW_METHOD_DUMP(OS, strictGSCheck);
+ RAW_METHOD_DUMP(OS, isAcceleratorGroupSharedLocal);
+ RAW_METHOD_DUMP(OS, isAcceleratorPointerTagLiveRange);
+ RAW_METHOD_DUMP(OS, isAcceleratorStubFunction);
+ RAW_METHOD_DUMP(OS, isAggregated);
+ RAW_METHOD_DUMP(OS, intro);
+ RAW_METHOD_DUMP(OS, isCVTCIL);
+ RAW_METHOD_DUMP(OS, isConstructorVirtualBase);
+ RAW_METHOD_DUMP(OS, isCxxReturnUdt);
+ RAW_METHOD_DUMP(OS, isDataAligned);
+ RAW_METHOD_DUMP(OS, isHLSLData);
+ RAW_METHOD_DUMP(OS, isHotpatchable);
+ RAW_METHOD_DUMP(OS, indirectVirtualBaseClass);
+ RAW_METHOD_DUMP(OS, isInterfaceUdt);
+ RAW_METHOD_DUMP(OS, intrinsic);
+ RAW_METHOD_DUMP(OS, isLTCG);
+ RAW_METHOD_DUMP(OS, isLocationControlFlowDependent);
+ RAW_METHOD_DUMP(OS, isMSILNetmodule);
+ RAW_METHOD_DUMP(OS, isMatrixRowMajor);
+ RAW_METHOD_DUMP(OS, managed);
+ RAW_METHOD_DUMP(OS, msil);
+ RAW_METHOD_DUMP(OS, isMultipleInheritance);
+ RAW_METHOD_DUMP(OS, isNaked);
+ RAW_METHOD_DUMP(OS, nested);
+ RAW_METHOD_DUMP(OS, isOptimizedAway);
+ RAW_METHOD_DUMP(OS, packed);
+ RAW_METHOD_DUMP(OS, isPointerBasedOnSymbolValue);
+ RAW_METHOD_DUMP(OS, isPointerToDataMember);
+ RAW_METHOD_DUMP(OS, isPointerToMemberFunction);
+ RAW_METHOD_DUMP(OS, pure);
+ RAW_METHOD_DUMP(OS, RValueReference);
+ RAW_METHOD_DUMP(OS, isRefUdt);
+ RAW_METHOD_DUMP(OS, reference);
+ RAW_METHOD_DUMP(OS, restrictedType);
+ RAW_METHOD_DUMP(OS, isReturnValue);
+ RAW_METHOD_DUMP(OS, isSafeBuffers);
+ RAW_METHOD_DUMP(OS, scoped);
+ RAW_METHOD_DUMP(OS, isSdl);
+ RAW_METHOD_DUMP(OS, isSingleInheritance);
+ RAW_METHOD_DUMP(OS, isSplitted);
+ RAW_METHOD_DUMP(OS, isStatic);
+ RAW_METHOD_DUMP(OS, isStripped);
+ RAW_METHOD_DUMP(OS, unalignedType);
+ RAW_METHOD_DUMP(OS, notReached);
+ RAW_METHOD_DUMP(OS, isValueUdt);
+ RAW_METHOD_DUMP(OS, virtual);
+ RAW_METHOD_DUMP(OS, virtualBaseClass);
+ RAW_METHOD_DUMP(OS, isVirtualInheritance);
+ RAW_METHOD_DUMP(OS, volatileType);
+ RAW_METHOD_DUMP(OS, wasInlined);
+ RAW_METHOD_DUMP(OS, unused);
+ RAW_METHOD_DUMP(OS, value);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -414,9 +440,8 @@ DIARawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name,
wchar_t *Name16Str = reinterpret_cast<wchar_t *>(Name16.data());
CComPtr<IDiaEnumSymbols> DiaEnumerator;
- if (S_OK !=
- Symbol->findChildrenExByAddr(EnumVal, Name16Str, CompareFlags, Section,
- Offset, &DiaEnumerator))
+ if (S_OK != Symbol->findChildrenExByAddr(EnumVal, Name16Str, CompareFlags,
+ Section, Offset, &DiaEnumerator))
return nullptr;
return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
@@ -434,9 +459,8 @@ DIARawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name,
wchar_t *Name16Str = reinterpret_cast<wchar_t *>(Name16.data());
CComPtr<IDiaEnumSymbols> DiaEnumerator;
- if (S_OK !=
- Symbol->findChildrenExByVA(EnumVal, Name16Str, CompareFlags, VA,
- &DiaEnumerator))
+ if (S_OK != Symbol->findChildrenExByVA(EnumVal, Name16Str, CompareFlags, VA,
+ &DiaEnumerator))
return nullptr;
return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
@@ -453,9 +477,8 @@ DIARawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
wchar_t *Name16Str = reinterpret_cast<wchar_t *>(Name16.data());
CComPtr<IDiaEnumSymbols> DiaEnumerator;
- if (S_OK !=
- Symbol->findChildrenExByRVA(EnumVal, Name16Str, CompareFlags, RVA,
- &DiaEnumerator))
+ if (S_OK != Symbol->findChildrenExByRVA(EnumVal, Name16Str, CompareFlags, RVA,
+ &DiaEnumerator))
return nullptr;
return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
@@ -500,7 +523,8 @@ std::unique_ptr<IPDBEnumLineNumbers>
DIARawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset,
uint32_t Length) const {
CComPtr<IDiaEnumLineNumbers> DiaEnumerator;
- if (S_OK != Symbol->findInlineeLinesByAddr(Section, Offset, Length, &DiaEnumerator))
+ if (S_OK !=
+ Symbol->findInlineeLinesByAddr(Section, Offset, Length, &DiaEnumerator))
return nullptr;
return llvm::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
@@ -536,8 +560,7 @@ void DIARawSymbol::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) const {
Symbol->get_dataBytes(DataSize, &DataSize, bytes.data());
}
-std::string
-DIARawSymbol::getUndecoratedNameEx(PDB_UndnameFlags Flags) const {
+std::string DIARawSymbol::getUndecoratedNameEx(PDB_UndnameFlags Flags) const {
CComBSTR Result16;
if (S_OK != Symbol->get_undecoratedNameEx((DWORD)Flags, &Result16))
return std::string();
@@ -567,7 +590,7 @@ uint32_t DIARawSymbol::getAge() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_age);
}
-uint32_t DIARawSymbol::getArrayIndexTypeId() const {
+SymIndexId DIARawSymbol::getArrayIndexTypeId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_arrayIndexTypeId);
}
@@ -586,7 +609,7 @@ uint32_t DIARawSymbol::getBaseDataSlot() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_baseDataSlot);
}
-uint32_t DIARawSymbol::getBaseSymbolId() const {
+SymIndexId DIARawSymbol::getBaseSymbolId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_baseSymbolId);
}
@@ -604,7 +627,7 @@ PDB_CallingConv DIARawSymbol::getCallingConvention() const {
Symbol, &IDiaSymbol::get_callingConvention);
}
-uint32_t DIARawSymbol::getClassParentId() const {
+SymIndexId DIARawSymbol::getClassParentId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_classParentId);
}
@@ -631,7 +654,7 @@ PDB_Lang DIARawSymbol::getLanguage() const {
return PrivateGetDIAValue<DWORD, PDB_Lang>(Symbol, &IDiaSymbol::get_language);
}
-uint32_t DIARawSymbol::getLexicalParentId() const {
+SymIndexId DIARawSymbol::getLexicalParentId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_lexicalParentId);
}
@@ -659,7 +682,7 @@ codeview::RegisterId DIARawSymbol::getLocalBasePointerRegisterId() const {
Symbol, &IDiaSymbol::get_localBasePointerRegisterId);
}
-uint32_t DIARawSymbol::getLowerBoundId() const {
+SymIndexId DIARawSymbol::getLowerBoundId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_lowerBoundId);
}
@@ -700,7 +723,7 @@ uint32_t DIARawSymbol::getOemId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_oemId);
}
-uint32_t DIARawSymbol::getOemSymbolId() const {
+SymIndexId DIARawSymbol::getOemSymbolId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_oemSymbolId);
}
@@ -749,8 +772,7 @@ std::string DIARawSymbol::getSourceFileName() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_sourceFileName);
}
-std::unique_ptr<IPDBLineNumber>
-DIARawSymbol::getSrcLineOnTypeDefn() const {
+std::unique_ptr<IPDBLineNumber> DIARawSymbol::getSrcLineOnTypeDefn() const {
CComPtr<IDiaLineNumber> LineNumber;
if (FAILED(Symbol->getSrcLineOnTypeDefn(&LineNumber)) || !LineNumber)
return nullptr;
@@ -762,7 +784,7 @@ uint32_t DIARawSymbol::getStride() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_stride);
}
-uint32_t DIARawSymbol::getSubTypeId() const {
+SymIndexId DIARawSymbol::getSubTypeId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_subTypeId);
}
@@ -770,7 +792,7 @@ std::string DIARawSymbol::getSymbolsFileName() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_symbolsFileName);
}
-uint32_t DIARawSymbol::getSymIndexId() const {
+SymIndexId DIARawSymbol::getSymIndexId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_symIndexId);
}
@@ -803,7 +825,7 @@ uint32_t DIARawSymbol::getToken() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_token);
}
-uint32_t DIARawSymbol::getTypeId() const {
+SymIndexId DIARawSymbol::getTypeId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_typeId);
}
@@ -815,11 +837,11 @@ std::string DIARawSymbol::getUndecoratedName() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_undecoratedName);
}
-uint32_t DIARawSymbol::getUnmodifiedTypeId() const {
+SymIndexId DIARawSymbol::getUnmodifiedTypeId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_unmodifiedTypeId);
}
-uint32_t DIARawSymbol::getUpperBoundId() const {
+SymIndexId DIARawSymbol::getUpperBoundId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_upperBoundId);
}
@@ -840,7 +862,7 @@ uint32_t DIARawSymbol::getVirtualBaseOffset() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualBaseOffset);
}
-uint32_t DIARawSymbol::getVirtualTableShapeId() const {
+SymIndexId DIARawSymbol::getVirtualTableShapeId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualTableShapeId);
}
@@ -852,7 +874,7 @@ DIARawSymbol::getVirtualBaseTableType() const {
auto RawVT = llvm::make_unique<DIARawSymbol>(Session, TableType);
auto Pointer =
- llvm::make_unique<PDBSymbolTypePointer>(Session, std::move(RawVT));
+ PDBSymbol::createAs<PDBSymbolTypePointer>(Session, std::move(RawVT));
return unique_dyn_cast<PDBSymbolTypeBuiltin>(Pointer->getPointeeType());
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
index b7dc49f53e23..8e233ca15161 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
@@ -25,7 +25,7 @@ std::unique_ptr<PDBSymbolCompiland> DIASectionContrib::getCompiland() const {
return nullptr;
auto RawSymbol = llvm::make_unique<DIARawSymbol>(Session, Symbol);
- return llvm::make_unique<PDBSymbolCompiland>(Session, std::move(RawSymbol));
+ return PDBSymbol::createAs<PDBSymbolCompiland>(Session, std::move(RawSymbol));
}
template <typename ArgType>
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
index d81f59400eb3..bd375e172ac0 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h"
#include "llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h"
#include "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
#include "llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h"
@@ -42,7 +43,7 @@ static Error ErrorFromHResult(HRESULT Result, const char *Str, Ts &&... Args) {
switch (Result) {
case E_PDB_NOT_FOUND:
- return make_error<GenericError>(generic_error_code::invalid_path, Context);
+ return errorCodeToError(std::error_code(ENOENT, std::generic_category()));
case E_PDB_FORMAT:
return make_error<DIAError>(dia_error_code::invalid_file_format, Context);
case E_INVALIDARG:
@@ -71,8 +72,7 @@ static Error LoadDIA(CComPtr<IDiaDataSource> &DiaDataSource) {
// If the CoCreateInstance call above failed, msdia*.dll is not registered.
// Try loading the DLL corresponding to the #included DIA SDK.
#if !defined(_MSC_VER)
- return llvm::make_error<GenericError>(
- "DIA is only supported when using MSVC.");
+ return llvm::make_error<PDBError>(pdb_error_code::dia_failed_loading);
#else
const wchar_t *msdia_dll = nullptr;
#if _MSC_VER >= 1900 && _MSC_VER < 2000
@@ -104,7 +104,7 @@ Error DIASession::createFromPdb(StringRef Path,
llvm::SmallVector<UTF16, 128> Path16;
if (!llvm::convertUTF8ToUTF16String(Path, Path16))
- return make_error<GenericError>(generic_error_code::invalid_path);
+ return make_error<PDBError>(pdb_error_code::invalid_utf8_path, Path);
const wchar_t *Path16Str = reinterpret_cast<const wchar_t *>(Path16.data());
HRESULT HR;
@@ -130,7 +130,7 @@ Error DIASession::createFromExe(StringRef Path,
llvm::SmallVector<UTF16, 128> Path16;
if (!llvm::convertUTF8ToUTF16String(Path, Path16))
- return make_error<GenericError>(generic_error_code::invalid_path, Path);
+ return make_error<PDBError>(pdb_error_code::invalid_utf8_path, Path);
const wchar_t *Path16Str = reinterpret_cast<const wchar_t *>(Path16.data());
HRESULT HR;
@@ -188,7 +188,8 @@ bool DIASession::addressForRVA(uint32_t RVA, uint32_t &Section,
return false;
}
-std::unique_ptr<PDBSymbol> DIASession::getSymbolById(uint32_t SymbolId) const {
+std::unique_ptr<PDBSymbol>
+DIASession::getSymbolById(SymIndexId SymbolId) const {
CComPtr<IDiaSymbol> LocatedSymbol;
if (S_OK != Session->symbolById(SymbolId, &LocatedSymbol))
return nullptr;
@@ -407,7 +408,7 @@ DIASession::getInjectedSources() const {
if (!Files)
return nullptr;
- return llvm::make_unique<DIAEnumInjectedSources>(*this, Files);
+ return llvm::make_unique<DIAEnumInjectedSources>(Files);
}
std::unique_ptr<IPDBEnumSectionContribs>
@@ -419,3 +420,13 @@ DIASession::getSectionContribs() const {
return llvm::make_unique<DIAEnumSectionContribs>(*this, Sections);
}
+
+std::unique_ptr<IPDBEnumFrameData>
+DIASession::getFrameData() const {
+ CComPtr<IDiaEnumFrameData> FD =
+ getTableEnumerator<IDiaEnumFrameData>(*Session);
+ if (!FD)
+ return nullptr;
+
+ return llvm::make_unique<DIAEnumFrameData>(FD);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
index 8605f55b402c..d3e408166a87 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
@@ -8,12 +8,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
#include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
-#include "llvm/Support/ConvertUTF.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -23,16 +22,7 @@ DIASourceFile::DIASourceFile(const DIASession &PDBSession,
: Session(PDBSession), SourceFile(DiaSourceFile) {}
std::string DIASourceFile::getFileName() const {
- CComBSTR FileName16;
- HRESULT Result = SourceFile->get_fileName(&FileName16);
- if (S_OK != Result)
- return std::string();
-
- std::string FileName8;
- llvm::ArrayRef<char> FileNameBytes(reinterpret_cast<char *>(FileName16.m_str),
- FileName16.ByteLength());
- llvm::convertUTF16ToUTF8String(FileNameBytes, FileName8);
- return FileName8;
+ return invokeBstrMethod(*SourceFile, &IDiaSourceFile::get_fileName);
}
uint32_t DIASourceFile::getUniqueId() const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp
index 5705c2370dc6..6017081b2cb6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp
@@ -8,14 +8,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/DIA/DIATable.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/ConvertUTF.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
using namespace llvm;
using namespace llvm::pdb;
-DIATable::DIATable(CComPtr<IDiaTable> DiaTable)
- : Table(DiaTable) {}
+DIATable::DIATable(CComPtr<IDiaTable> DiaTable) : Table(DiaTable) {}
uint32_t DIATable::getItemCount() const {
LONG Count = 0;
@@ -23,16 +21,7 @@ uint32_t DIATable::getItemCount() const {
}
std::string DIATable::getName() const {
- CComBSTR Name16;
- if (S_OK != Table->get_name(&Name16))
- return std::string();
-
- std::string Name8;
- llvm::ArrayRef<char> Name16Bytes(reinterpret_cast<char *>(Name16.m_str),
- Name16.ByteLength());
- if (!llvm::convertUTF16ToUTF8String(Name16Bytes, Name8))
- return std::string();
- return Name8;
+ return invokeBstrMethod(*Table, &IDiaTable::get_name);
}
PDB_TableType DIATable::getTableType() const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp b/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
index 2a677b9abe2d..256952073e88 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
@@ -14,55 +14,34 @@
using namespace llvm;
using namespace llvm::pdb;
-namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
-class GenericErrorCategory : public std::error_category {
+class PDBErrorCategory : public std::error_category {
public:
const char *name() const noexcept override { return "llvm.pdb"; }
-
std::string message(int Condition) const override {
- switch (static_cast<generic_error_code>(Condition)) {
- case generic_error_code::unspecified:
+ switch (static_cast<pdb_error_code>(Condition)) {
+ case pdb_error_code::unspecified:
return "An unknown error has occurred.";
- case generic_error_code::type_server_not_found:
- return "Type server PDB was not found.";
- case generic_error_code::dia_sdk_not_present:
- return "LLVM was not compiled with support for DIA. This usually means "
+ case pdb_error_code::dia_sdk_not_present:
+ return "LLVM was not compiled with support for DIA. This usually means "
"that you are not using MSVC, or your Visual Studio "
- "installation "
- "is corrupt.";
- case generic_error_code::invalid_path:
- return "Unable to load PDB. Make sure the file exists and is readable.";
+ "installation is corrupt.";
+ case pdb_error_code::dia_failed_loading:
+ return "DIA is only supported when using MSVC.";
+ case pdb_error_code::invalid_utf8_path:
+ return "The PDB file path is an invalid UTF8 sequence.";
+ case pdb_error_code::signature_out_of_date:
+ return "The signature does not match; the file(s) might be out of date.";
+ case pdb_error_code::external_cmdline_ref:
+ return "The path to this file must be provided on the command-line.";
}
llvm_unreachable("Unrecognized generic_error_code");
}
};
-} // end anonymous namespace
-
-static ManagedStatic<GenericErrorCategory> Category;
-
-char GenericError::ID = 0;
-
-GenericError::GenericError(generic_error_code C) : GenericError(C, "") {}
-
-GenericError::GenericError(StringRef Context)
- : GenericError(generic_error_code::unspecified, Context) {}
-
-GenericError::GenericError(generic_error_code C, StringRef Context) : Code(C) {
- ErrMsg = "PDB Error: ";
- std::error_code EC = convertToErrorCode();
- if (Code != generic_error_code::unspecified)
- ErrMsg += EC.message() + " ";
- if (!Context.empty())
- ErrMsg += Context;
-}
-
-void GenericError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
-StringRef GenericError::getErrorMessage() const { return ErrMsg; }
+static llvm::ManagedStatic<PDBErrorCategory> PDBCategory;
+const std::error_category &llvm::pdb::PDBErrCategory() { return *PDBCategory; }
-std::error_code GenericError::convertToErrorCode() const {
- return std::error_code(static_cast<int>(Code), *Category);
-}
+char PDBError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index b97f1e90bcf8..ab93efc839a9 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -19,7 +19,6 @@
#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/Support/BinaryItemStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
using namespace llvm;
@@ -66,12 +65,22 @@ void DbiModuleDescriptorBuilder::setFirstSectionContrib(
}
void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) {
- Symbols.push_back(Symbol);
- // Symbols written to a PDB file are required to be 4 byte aligned. The same
+ // Defer to the bulk API. It does the same thing.
+ addSymbolsInBulk(Symbol.data());
+}
+
+void DbiModuleDescriptorBuilder::addSymbolsInBulk(
+ ArrayRef<uint8_t> BulkSymbols) {
+ // Do nothing for empty runs of symbols.
+ if (BulkSymbols.empty())
+ return;
+
+ Symbols.push_back(BulkSymbols);
+ // Symbols written to a PDB file are required to be 4 byte aligned. The same
// is not true of object files.
- assert(Symbol.length() % alignOf(CodeViewContainer::Pdb) == 0 &&
+ assert(BulkSymbols.size() % alignOf(CodeViewContainer::Pdb) == 0 &&
"Invalid Symbol alignment!");
- SymbolByteSize += Symbol.length();
+ SymbolByteSize += BulkSymbols.size();
}
void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) {
@@ -145,16 +154,13 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
if (auto EC =
SymbolWriter.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC))
return EC;
- BinaryItemStream<CVSymbol> Records(llvm::support::endianness::little);
- Records.setItems(Symbols);
- BinaryStreamRef RecordsRef(Records);
- if (auto EC = SymbolWriter.writeStreamRef(RecordsRef))
- return EC;
- if (auto EC = SymbolWriter.padToAlignment(4))
- return EC;
- // TODO: Write C11 Line data
+ for (ArrayRef<uint8_t> Syms : Symbols) {
+ if (auto EC = SymbolWriter.writeBytes(Syms))
+ return EC;
+ }
assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
"Invalid debug section alignment!");
+ // TODO: Write C11 Line data
for (const auto &Builder : C13Builders) {
assert(Builder && "Empty C13 Fragment Builder!");
if (auto EC = Builder->commit(SymbolWriter))
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
index edaa783398ca..60ac17b655a7 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -197,7 +197,7 @@ PDB_Machine DbiStream::getMachineType() const {
return static_cast<PDB_Machine>(Machine);
}
-FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() {
+FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() const {
return SectionHeaders;
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index f6043bfd7cf9..094216ea800a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
@@ -74,10 +75,27 @@ void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) {
PublicsStreamIndex = Index;
}
+void DbiStreamBuilder::addNewFpoData(const codeview::FrameData &FD) {
+ if (!NewFpoData.hasValue())
+ NewFpoData.emplace(false);
+
+ NewFpoData->addFrameData(FD);
+}
+
+void DbiStreamBuilder::addOldFpoData(const object::FpoData &FD) {
+ OldFpoData.push_back(FD);
+}
+
Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type,
ArrayRef<uint8_t> Data) {
+ assert(Type != DbgHeaderType::NewFPO &&
+ "NewFPO data should be written via addFrameData()!");
+
DbgStreams[(int)Type].emplace();
- DbgStreams[(int)Type]->Data = Data;
+ DbgStreams[(int)Type]->Size = Data.size();
+ DbgStreams[(int)Type]->WriteFn = [Data](BinaryStreamWriter &Writer) {
+ return Writer.writeArray(Data);
+ };
return Error::success();
}
@@ -272,10 +290,30 @@ Error DbiStreamBuilder::finalize() {
}
Error DbiStreamBuilder::finalizeMsfLayout() {
+ if (NewFpoData.hasValue()) {
+ DbgStreams[(int)DbgHeaderType::NewFPO].emplace();
+ DbgStreams[(int)DbgHeaderType::NewFPO]->Size =
+ NewFpoData->calculateSerializedSize();
+ DbgStreams[(int)DbgHeaderType::NewFPO]->WriteFn =
+ [this](BinaryStreamWriter &Writer) {
+ return NewFpoData->commit(Writer);
+ };
+ }
+
+ if (!OldFpoData.empty()) {
+ DbgStreams[(int)DbgHeaderType::FPO].emplace();
+ DbgStreams[(int)DbgHeaderType::FPO]->Size =
+ sizeof(object::FpoData) * OldFpoData.size();
+ DbgStreams[(int)DbgHeaderType::FPO]->WriteFn =
+ [this](BinaryStreamWriter &Writer) {
+ return Writer.writeArray(makeArrayRef(OldFpoData));
+ };
+ }
+
for (auto &S : DbgStreams) {
if (!S.hasValue())
continue;
- auto ExpectedIndex = Msf.addStream(S->Data.size());
+ auto ExpectedIndex = Msf.addStream(S->Size);
if (!ExpectedIndex)
return ExpectedIndex.takeError();
S->StreamNumber = *ExpectedIndex;
@@ -406,7 +444,8 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
auto WritableStream = WritableMappedBlockStream::createIndexedStream(
Layout, MsfBuffer, Stream->StreamNumber, Allocator);
BinaryStreamWriter DbgStreamWriter(*WritableStream);
- if (auto EC = DbgStreamWriter.writeArray(Stream->Data))
+
+ if (auto EC = Stream->WriteFn(DbgStreamWriter))
return EC;
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 58efc2256ae1..57da7003da2b 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/DebugInfo/CodeView/RecordName.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -20,6 +21,7 @@
#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/Support/BinaryItemStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/xxhash.h"
#include <algorithm>
#include <vector>
@@ -29,8 +31,27 @@ using namespace llvm::pdb;
using namespace llvm::codeview;
struct llvm::pdb::GSIHashStreamBuilder {
+ struct UdtDenseMapInfo {
+ static inline CVSymbol getEmptyKey() {
+ static CVSymbol Empty;
+ return Empty;
+ }
+ static inline CVSymbol getTombstoneKey() {
+ static CVSymbol Tombstone(static_cast<SymbolKind>(-1),
+ ArrayRef<uint8_t>());
+ return Tombstone;
+ }
+ static unsigned getHashValue(const CVSymbol &Val) {
+ return xxHash64(Val.RecordData);
+ }
+ static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
+ return LHS.RecordData == RHS.RecordData;
+ }
+ };
+
std::vector<CVSymbol> Records;
uint32_t StreamIndex;
+ llvm::DenseSet<CVSymbol, UdtDenseMapInfo> UdtHashes;
std::vector<PSHashRecord> HashRecords;
std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
std::vector<support::ulittle32_t> HashBuckets;
@@ -42,10 +63,18 @@ struct llvm::pdb::GSIHashStreamBuilder {
template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
T Copy(Symbol);
- Records.push_back(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
- CodeViewContainer::Pdb));
+ addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
+ CodeViewContainer::Pdb));
+ }
+ void addSymbol(const CVSymbol &Symbol) {
+ if (Symbol.kind() == S_UDT) {
+ auto Iter = UdtHashes.insert(Symbol);
+ if (!Iter.second)
+ return;
+ }
+
+ Records.push_back(Symbol);
}
- void addSymbol(const CVSymbol &Symbol) { Records.push_back(Symbol); }
};
uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
@@ -144,11 +173,10 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
// can properly early-out when it detects the record won't be found. The
// algorithm used here corredsponds to the function
// caseInsensitiveComparePchPchCchCch in the reference implementation.
- llvm::sort(Bucket.begin(), Bucket.end(),
- [](const std::pair<StringRef, PSHashRecord> &Left,
- const std::pair<StringRef, PSHashRecord> &Right) {
- return gsiRecordLess(Left.first, Right.first);
- });
+ llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
+ const std::pair<StringRef, PSHashRecord> &Right) {
+ return gsiRecordLess(Left.first, Right.first);
+ });
for (const auto &Entry : Bucket)
HashRecords.push_back(Entry.second);
@@ -273,10 +301,6 @@ void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
GSH->addSymbol(Sym, Msf);
}
-void GSIStreamBuilder::addGlobalSymbol(const UDTSym &Sym) {
- GSH->addSymbol(Sym, Msf);
-}
-
void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) {
GSH->addSymbol(Sym);
}
@@ -310,13 +334,14 @@ Error GSIStreamBuilder::commitPublicsHashStream(
PublicsStreamHeader Header;
// FIXME: Fill these in. They are for incremental linking.
+ Header.SymHash = PSH->calculateSerializedLength();
+ Header.AddrMap = PSH->Records.size() * 4;
Header.NumThunks = 0;
Header.SizeOfThunk = 0;
Header.ISectThunkTable = 0;
+ memset(Header.Padding, 0, sizeof(Header.Padding));
Header.OffThunkTable = 0;
Header.NumSections = 0;
- Header.SymHash = PSH->calculateSerializedLength();
- Header.AddrMap = PSH->Records.size() * 4;
if (auto EC = Writer.writeObject(Header))
return EC;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
index 36076f436ad0..e36319566821 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
@@ -20,7 +20,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+
+#include "llvm/DebugInfo/CodeView/RecordName.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
#include <algorithm>
@@ -41,6 +45,43 @@ Error GlobalsStream::reload() {
return Error::success();
}
+std::vector<std::pair<uint32_t, codeview::CVSymbol>>
+GlobalsStream::findRecordsByName(StringRef Name,
+ const SymbolStream &Symbols) const {
+ std::vector<std::pair<uint32_t, codeview::CVSymbol>> Result;
+
+ // Hash the name to figure out which bucket this goes into.
+ size_t ExpandedBucketIndex = hashStringV1(Name) % IPHR_HASH;
+ int32_t CompressedBucketIndex = GlobalsTable.BucketMap[ExpandedBucketIndex];
+ if (CompressedBucketIndex == -1)
+ return Result;
+
+ uint32_t LastBucketIndex = GlobalsTable.HashBuckets.size() - 1;
+ uint32_t StartRecordIndex =
+ GlobalsTable.HashBuckets[CompressedBucketIndex] / 12;
+ uint32_t EndRecordIndex = 0;
+ if (LLVM_LIKELY(uint32_t(CompressedBucketIndex) < LastBucketIndex)) {
+ EndRecordIndex = GlobalsTable.HashBuckets[CompressedBucketIndex + 1];
+ } else {
+ // If this is the last bucket, it consists of all hash records until the end
+ // of the HashRecords array.
+ EndRecordIndex = GlobalsTable.HashRecords.size() * 12;
+ }
+
+ EndRecordIndex /= 12;
+
+ assert(EndRecordIndex <= GlobalsTable.HashRecords.size());
+ while (StartRecordIndex < EndRecordIndex) {
+ PSHashRecord PSH = GlobalsTable.HashRecords[StartRecordIndex];
+ uint32_t Off = PSH.Off - 1;
+ codeview::CVSymbol Record = Symbols.readRecord(Off);
+ if (codeview::getSymbolName(Record) == Name)
+ Result.push_back(std::make_pair(Off, std::move(Record)));
+ ++StartRecordIndex;
+ }
+ return Result;
+}
+
static Error checkHashHdrVersion(const GSIHashHeader *HashHdr) {
if (HashHdr->VerHdr != GSIHashHeader::HdrVersion)
return make_error<RawError>(
@@ -86,7 +127,9 @@ static Error readGSIHashRecords(FixedStreamArray<PSHashRecord> &HashRecords,
static Error
readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
- ArrayRef<uint8_t> &HashBitmap, const GSIHashHeader *HashHdr,
+ FixedStreamArray<support::ulittle32_t> &HashBitmap,
+ const GSIHashHeader *HashHdr,
+ MutableArrayRef<int32_t> BucketMap,
BinaryStreamReader &Reader) {
if (auto EC = checkHashHdrVersion(HashHdr))
return EC;
@@ -94,13 +137,27 @@ readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
// Before the actual hash buckets, there is a bitmap of length determined by
// IPHR_HASH.
size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
- uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
- if (auto EC = Reader.readBytes(HashBitmap, NumBitmapEntries))
+ uint32_t NumBitmapEntries = BitmapSizeInBits / 32;
+ if (auto EC = Reader.readArray(HashBitmap, NumBitmapEntries))
return joinErrors(std::move(EC),
make_error<RawError>(raw_error_code::corrupt_file,
"Could not read a bitmap."));
+ uint32_t NumBuckets1 = 0;
+ uint32_t CompressedBucketIdx = 0;
+ for (uint32_t I = 0; I <= IPHR_HASH; ++I) {
+ uint8_t WordIdx = I / 32;
+ uint8_t BitIdx = I % 32;
+ bool IsSet = HashBitmap[WordIdx] & (1U << BitIdx);
+ if (IsSet) {
+ ++NumBuckets1;
+ BucketMap[I] = CompressedBucketIdx++;
+ } else {
+ BucketMap[I] = -1;
+ }
+ }
+
uint32_t NumBuckets = 0;
- for (uint8_t B : HashBitmap)
+ for (uint32_t B : HashBitmap)
NumBuckets += countPopulation(B);
// Hash buckets follow.
@@ -118,7 +175,8 @@ Error GSIHashTable::read(BinaryStreamReader &Reader) {
if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader))
return EC;
if (HashHdr->HrSize > 0)
- if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr, Reader))
+ if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr,
+ BucketMap, Reader))
return EC;
return Error::success();
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index 54d6835f1121..3b5a2accdba6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -32,15 +32,20 @@ InfoStreamBuilder::InfoStreamBuilder(msf::MSFBuilder &Msf,
void InfoStreamBuilder::setVersion(PdbRaw_ImplVer V) { Ver = V; }
+void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) {
+ Features.push_back(Sig);
+}
+
+void InfoStreamBuilder::setHashPDBContentsToGUID(bool B) {
+ HashPDBContentsToGUID = B;
+}
+
void InfoStreamBuilder::setAge(uint32_t A) { Age = A; }
void InfoStreamBuilder::setSignature(uint32_t S) { Signature = S; }
void InfoStreamBuilder::setGuid(GUID G) { Guid = G; }
-void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) {
- Features.push_back(Sig);
-}
Error InfoStreamBuilder::finalizeMsfLayout() {
uint32_t Length = sizeof(InfoStreamHeader) +
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index 2e1f61c7a25d..8c97f4a012f0 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -11,7 +11,9 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -47,7 +49,8 @@ Error ModuleDebugStreamRef::reload() {
if (auto EC = Reader.readInteger(Signature))
return EC;
- if (auto EC = Reader.readSubstream(SymbolsSubstream, SymbolSize - 4))
+ Reader.setOffset(0);
+ if (auto EC = Reader.readSubstream(SymbolsSubstream, SymbolSize))
return EC;
if (auto EC = Reader.readSubstream(C11LinesSubstream, C11Size))
return EC;
@@ -55,8 +58,8 @@ Error ModuleDebugStreamRef::reload() {
return EC;
BinaryStreamReader SymbolReader(SymbolsSubstream.StreamData);
- if (auto EC =
- SymbolReader.readArray(SymbolArray, SymbolReader.bytesRemaining()))
+ if (auto EC = SymbolReader.readArray(
+ SymbolArray, SymbolReader.bytesRemaining(), sizeof(uint32_t)))
return EC;
BinaryStreamReader SubsectionsReader(C13LinesSubstream.StreamData);
@@ -76,6 +79,11 @@ Error ModuleDebugStreamRef::reload() {
return Error::success();
}
+const codeview::CVSymbolArray
+ModuleDebugStreamRef::getSymbolArrayForScope(uint32_t ScopeBegin) const {
+ return limitSymbolArrayToScope(SymbolArray, ScopeBegin);
+}
+
BinarySubstreamRef ModuleDebugStreamRef::getSymbolsSubstream() const {
return SymbolsSubstream;
}
@@ -97,6 +105,12 @@ ModuleDebugStreamRef::symbols(bool *HadError) const {
return make_range(SymbolArray.begin(HadError), SymbolArray.end());
}
+CVSymbol ModuleDebugStreamRef::readSymbolAtOffset(uint32_t Offset) const {
+ auto Iter = SymbolArray.at(Offset);
+ assert(Iter != SymbolArray.end());
+ return *Iter;
+}
+
iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
ModuleDebugStreamRef::subsections() const {
return make_range(Subsections.begin(), Subsections.end());
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp
deleted file mode 100644
index 4644ddcf24e3..000000000000
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- NativeBuiltinSymbol.cpp ------------------------------------ C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
-
-
-namespace llvm {
-namespace pdb {
-
-NativeBuiltinSymbol::NativeBuiltinSymbol(NativeSession &PDBSession,
- SymIndexId Id, PDB_BuiltinType T,
- uint64_t L)
- : NativeRawSymbol(PDBSession, Id), Session(PDBSession), Type(T), Length(L) {
-}
-
-NativeBuiltinSymbol::~NativeBuiltinSymbol() {}
-
-std::unique_ptr<NativeRawSymbol> NativeBuiltinSymbol::clone() const {
- return llvm::make_unique<NativeBuiltinSymbol>(Session, SymbolId, Type, Length);
-}
-
-void NativeBuiltinSymbol::dump(raw_ostream &OS, int Indent) const {
- // TODO: Apparently nothing needs this yet.
-}
-
-PDB_SymType NativeBuiltinSymbol::getSymTag() const {
- return PDB_SymType::BuiltinType;
-}
-
-PDB_BuiltinType NativeBuiltinSymbol::getBuiltinType() const { return Type; }
-
-bool NativeBuiltinSymbol::isConstType() const { return false; }
-
-uint64_t NativeBuiltinSymbol::getLength() const { return Length; }
-
-bool NativeBuiltinSymbol::isUnalignedType() const { return false; }
-
-bool NativeBuiltinSymbol::isVolatileType() const { return false; }
-
-} // namespace pdb
-} // namespace llvm
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
index 7132a99a9f16..efa70b0e7bd8 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/ADT/STLExtras.h"
@@ -17,21 +18,31 @@ namespace pdb {
NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session,
SymIndexId SymbolId,
DbiModuleDescriptor MI)
- : NativeRawSymbol(Session, SymbolId), Module(MI) {}
+ : NativeRawSymbol(Session, PDB_SymType::Compiland, SymbolId), Module(MI) {}
PDB_SymType NativeCompilandSymbol::getSymTag() const {
return PDB_SymType::Compiland;
}
-std::unique_ptr<NativeRawSymbol> NativeCompilandSymbol::clone() const {
- return llvm::make_unique<NativeCompilandSymbol>(Session, SymbolId, Module);
+void NativeCompilandSymbol::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+ dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+ PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolField(OS, "libraryName", getLibraryName(), Indent);
+ dumpSymbolField(OS, "name", getName(), Indent);
+ dumpSymbolField(OS, "editAndContinueEnabled", isEditAndContinueEnabled(),
+ Indent);
}
bool NativeCompilandSymbol::isEditAndContinueEnabled() const {
return Module.hasECInfo();
}
-uint32_t NativeCompilandSymbol::getLexicalParentId() const { return 0; }
+SymIndexId NativeCompilandSymbol::getLexicalParentId() const { return 0; }
// The usage of getObjFileName for getLibraryName and getModuleName for getName
// may seem backwards, but it is consistent with DIA, which is what this API
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
new file mode 100644
index 000000000000..6eece3df2db3
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
@@ -0,0 +1,55 @@
+//==- NativeEnumGlobals.cpp - Native Global Enumerator impl ------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h"
+
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeEnumGlobals::NativeEnumGlobals(NativeSession &PDBSession,
+ std::vector<codeview::SymbolKind> Kinds)
+ : Index(0), Session(PDBSession) {
+ GlobalsStream &GS = cantFail(Session.getPDBFile().getPDBGlobalsStream());
+ SymbolStream &SS = cantFail(Session.getPDBFile().getPDBSymbolStream());
+ for (uint32_t Off : GS.getGlobalsTable()) {
+ CVSymbol S = SS.readRecord(Off);
+ if (!llvm::is_contained(Kinds, S.kind()))
+ continue;
+ MatchOffsets.push_back(Off);
+ }
+}
+
+uint32_t NativeEnumGlobals::getChildCount() const {
+ return static_cast<uint32_t>(MatchOffsets.size());
+}
+
+std::unique_ptr<PDBSymbol>
+NativeEnumGlobals::getChildAtIndex(uint32_t N) const {
+ if (N >= MatchOffsets.size())
+ return nullptr;
+
+ SymIndexId Id =
+ Session.getSymbolCache().getOrCreateGlobalSymbolByOffset(MatchOffsets[N]);
+ return Session.getSymbolCache().getSymbolById(Id);
+}
+
+std::unique_ptr<PDBSymbol> NativeEnumGlobals::getNext() {
+ return getChildAtIndex(Index++);
+}
+
+void NativeEnumGlobals::reset() { Index = 0; }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
index a65782e2d4fc..6e4d56443a07 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -10,42 +10,35 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
namespace llvm {
namespace pdb {
-NativeEnumModules::NativeEnumModules(NativeSession &PDBSession,
- const DbiModuleList &Modules,
- uint32_t Index)
- : Session(PDBSession), Modules(Modules), Index(Index) {}
+NativeEnumModules::NativeEnumModules(NativeSession &PDBSession, uint32_t Index)
+ : Session(PDBSession), Index(Index) {}
uint32_t NativeEnumModules::getChildCount() const {
- return static_cast<uint32_t>(Modules.getModuleCount());
+ return Session.getSymbolCache().getNumCompilands();
}
std::unique_ptr<PDBSymbol>
-NativeEnumModules::getChildAtIndex(uint32_t Index) const {
- if (Index >= Modules.getModuleCount())
- return nullptr;
- return Session.createCompilandSymbol(Modules.getModuleDescriptor(Index));
+NativeEnumModules::getChildAtIndex(uint32_t N) const {
+ return Session.getSymbolCache().getOrCreateCompiland(N);
}
std::unique_ptr<PDBSymbol> NativeEnumModules::getNext() {
- if (Index >= Modules.getModuleCount())
+ if (Index >= getChildCount())
return nullptr;
return getChildAtIndex(Index++);
}
void NativeEnumModules::reset() { Index = 0; }
-NativeEnumModules *NativeEnumModules::clone() const {
- return new NativeEnumModules(Session, Modules, Index);
-}
-
}
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbol.cpp
deleted file mode 100644
index 38d65917306a..000000000000
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbol.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//===- NativeEnumSymbol.cpp - info about enum type --------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h"
-
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
-
-#include <cassert>
-
-using namespace llvm;
-using namespace llvm::pdb;
-
-NativeEnumSymbol::NativeEnumSymbol(NativeSession &Session, SymIndexId Id,
- const codeview::CVType &CVT)
- : NativeRawSymbol(Session, Id), CV(CVT),
- Record(codeview::TypeRecordKind::Enum) {
- assert(CV.kind() == codeview::TypeLeafKind::LF_ENUM);
- cantFail(visitTypeRecord(CV, *this));
-}
-
-NativeEnumSymbol::~NativeEnumSymbol() {}
-
-std::unique_ptr<NativeRawSymbol> NativeEnumSymbol::clone() const {
- return llvm::make_unique<NativeEnumSymbol>(Session, SymbolId, CV);
-}
-
-std::unique_ptr<IPDBEnumSymbols>
-NativeEnumSymbol::findChildren(PDB_SymType Type) const {
- switch (Type) {
- case PDB_SymType::Data: {
- // TODO(amccarth): Provide an actual implementation.
- return nullptr;
- }
- default:
- return nullptr;
- }
-}
-
-Error NativeEnumSymbol::visitKnownRecord(codeview::CVType &CVR,
- codeview::EnumRecord &ER) {
- Record = ER;
- return Error::success();
-}
-
-Error NativeEnumSymbol::visitKnownMember(codeview::CVMemberRecord &CVM,
- codeview::EnumeratorRecord &R) {
- return Error::success();
-}
-
-PDB_SymType NativeEnumSymbol::getSymTag() const { return PDB_SymType::Enum; }
-
-uint32_t NativeEnumSymbol::getClassParentId() const { return 0xFFFFFFFF; }
-
-uint32_t NativeEnumSymbol::getUnmodifiedTypeId() const { return 0; }
-
-bool NativeEnumSymbol::hasConstructor() const {
- return bool(Record.getOptions() &
- codeview::ClassOptions::HasConstructorOrDestructor);
-}
-
-bool NativeEnumSymbol::hasAssignmentOperator() const {
- return bool(Record.getOptions() &
- codeview::ClassOptions::HasOverloadedAssignmentOperator);
-}
-
-bool NativeEnumSymbol::hasCastOperator() const {
- return bool(Record.getOptions() &
- codeview::ClassOptions::HasConversionOperator);
-}
-
-uint64_t NativeEnumSymbol::getLength() const {
- const auto Id = Session.findSymbolByTypeIndex(Record.getUnderlyingType());
- const auto UnderlyingType =
- Session.getConcreteSymbolById<PDBSymbolTypeBuiltin>(Id);
- return UnderlyingType ? UnderlyingType->getLength() : 0;
-}
-
-std::string NativeEnumSymbol::getName() const { return Record.getName(); }
-
-bool NativeEnumSymbol::isNested() const {
- return bool(Record.getOptions() & codeview::ClassOptions::Nested);
-}
-
-bool NativeEnumSymbol::hasOverloadedOperator() const {
- return bool(Record.getOptions() &
- codeview::ClassOptions::HasOverloadedOperator);
-}
-
-bool NativeEnumSymbol::isPacked() const {
- return bool(Record.getOptions() & codeview::ClassOptions::Packed);
-}
-
-bool NativeEnumSymbol::isScoped() const {
- return bool(Record.getOptions() & codeview::ClassOptions::Scoped);
-}
-
-uint32_t NativeEnumSymbol::getTypeId() const {
- return Session.findSymbolByTypeIndex(Record.getUnderlyingType());
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
index 36a68a1c62de..288a9128147a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
@@ -9,39 +9,58 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
-namespace llvm {
-namespace pdb {
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
NativeEnumTypes::NativeEnumTypes(NativeSession &PDBSession,
- codeview::LazyRandomTypeCollection &Types,
- codeview::TypeLeafKind Kind)
- : Matches(), Index(0), Session(PDBSession), Kind(Kind) {
- for (auto Index = Types.getFirst(); Index;
- Index = Types.getNext(Index.getValue())) {
- if (Types.getType(Index.getValue()).kind() == Kind)
- Matches.push_back(Index.getValue());
+ LazyRandomTypeCollection &Types,
+ std::vector<codeview::TypeLeafKind> Kinds)
+ : Matches(), Index(0), Session(PDBSession) {
+ Optional<TypeIndex> TI = Types.getFirst();
+ while (TI) {
+ CVType CVT = Types.getType(*TI);
+ TypeLeafKind K = CVT.kind();
+ if (llvm::is_contained(Kinds, K)) {
+ // Don't add forward refs, we'll find those later while enumerating.
+ if (!isUdtForwardRef(CVT))
+ Matches.push_back(*TI);
+ } else if (K == TypeLeafKind::LF_MODIFIER) {
+ TypeIndex ModifiedTI = getModifiedType(CVT);
+ if (!ModifiedTI.isSimple()) {
+ CVType UnmodifiedCVT = Types.getType(ModifiedTI);
+ // LF_MODIFIERs point to forward refs, but don't worry about that
+ // here. We're pushing the TypeIndex of the LF_MODIFIER itself,
+ // so we'll worry about resolving forward refs later.
+ if (llvm::is_contained(Kinds, UnmodifiedCVT.kind()))
+ Matches.push_back(*TI);
+ }
+ }
+ TI = Types.getNext(*TI);
}
}
-NativeEnumTypes::NativeEnumTypes(
- NativeSession &PDBSession, const std::vector<codeview::TypeIndex> &Matches,
- codeview::TypeLeafKind Kind)
- : Matches(Matches), Index(0), Session(PDBSession), Kind(Kind) {}
+NativeEnumTypes::NativeEnumTypes(NativeSession &PDBSession,
+ std::vector<codeview::TypeIndex> Indices)
+ : Matches(std::move(Indices)), Index(0), Session(PDBSession) {}
uint32_t NativeEnumTypes::getChildCount() const {
return static_cast<uint32_t>(Matches.size());
}
-std::unique_ptr<PDBSymbol>
-NativeEnumTypes::getChildAtIndex(uint32_t Index) const {
- if (Index < Matches.size())
- return Session.createEnumSymbol(Matches[Index]);
+std::unique_ptr<PDBSymbol> NativeEnumTypes::getChildAtIndex(uint32_t N) const {
+ if (N < Matches.size()) {
+ SymIndexId Id = Session.getSymbolCache().findSymbolByTypeIndex(Matches[N]);
+ return Session.getSymbolCache().getSymbolById(Id);
+ }
return nullptr;
}
@@ -50,10 +69,3 @@ std::unique_ptr<PDBSymbol> NativeEnumTypes::getNext() {
}
void NativeEnumTypes::reset() { Index = 0; }
-
-NativeEnumTypes *NativeEnumTypes::clone() const {
- return new NativeEnumTypes(Session, Matches, Kind);
-}
-
-} // namespace pdb
-} // namespace llvm
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
index e8b06065fc60..6dde5d08a500 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -12,34 +12,53 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
-namespace llvm {
-namespace pdb {
+using namespace llvm;
+using namespace llvm::pdb;
-NativeExeSymbol::NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId)
- : NativeRawSymbol(Session, SymbolId), File(Session.getPDBFile()) {}
+static DbiStream *getDbiStreamPtr(NativeSession &Session) {
+ Expected<DbiStream &> DbiS = Session.getPDBFile().getPDBDbiStream();
+ if (DbiS)
+ return &DbiS.get();
-std::unique_ptr<NativeRawSymbol> NativeExeSymbol::clone() const {
- return llvm::make_unique<NativeExeSymbol>(Session, SymbolId);
+ consumeError(DbiS.takeError());
+ return nullptr;
}
+NativeExeSymbol::NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId)
+ : NativeRawSymbol(Session, PDB_SymType::Exe, SymbolId),
+ Dbi(getDbiStreamPtr(Session)) {}
+
std::unique_ptr<IPDBEnumSymbols>
NativeExeSymbol::findChildren(PDB_SymType Type) const {
switch (Type) {
case PDB_SymType::Compiland: {
- auto Dbi = File.getPDBDbiStream();
- if (Dbi) {
- const DbiModuleList &Modules = Dbi->modules();
- return std::unique_ptr<IPDBEnumSymbols>(
- new NativeEnumModules(Session, Modules));
- }
- consumeError(Dbi.takeError());
+ return std::unique_ptr<IPDBEnumSymbols>(new NativeEnumModules(Session));
break;
}
+ case PDB_SymType::ArrayType:
+ return Session.getSymbolCache().createTypeEnumerator(codeview::LF_ARRAY);
case PDB_SymType::Enum:
- return Session.createTypeEnumerator(codeview::LF_ENUM);
+ return Session.getSymbolCache().createTypeEnumerator(codeview::LF_ENUM);
+ case PDB_SymType::PointerType:
+ return Session.getSymbolCache().createTypeEnumerator(codeview::LF_POINTER);
+ case PDB_SymType::UDT:
+ return Session.getSymbolCache().createTypeEnumerator(
+ {codeview::LF_STRUCTURE, codeview::LF_CLASS, codeview::LF_UNION,
+ codeview::LF_INTERFACE});
+ case PDB_SymType::VTableShape:
+ return Session.getSymbolCache().createTypeEnumerator(codeview::LF_VTSHAPE);
+ case PDB_SymType::FunctionSig:
+ return Session.getSymbolCache().createTypeEnumerator(
+ {codeview::LF_PROCEDURE, codeview::LF_MFUNCTION});
+ case PDB_SymType::Typedef:
+ return Session.getSymbolCache().createGlobalsEnumerator(codeview::S_UDT);
+
default:
break;
}
@@ -47,7 +66,7 @@ NativeExeSymbol::findChildren(PDB_SymType Type) const {
}
uint32_t NativeExeSymbol::getAge() const {
- auto IS = File.getPDBInfoStream();
+ auto IS = Session.getPDBFile().getPDBInfoStream();
if (IS)
return IS->getAge();
consumeError(IS.takeError());
@@ -55,11 +74,11 @@ uint32_t NativeExeSymbol::getAge() const {
}
std::string NativeExeSymbol::getSymbolsFileName() const {
- return File.getFilePath();
+ return Session.getPDBFile().getFilePath();
}
codeview::GUID NativeExeSymbol::getGuid() const {
- auto IS = File.getPDBInfoStream();
+ auto IS = Session.getPDBFile().getPDBInfoStream();
if (IS)
return IS->getGuid();
consumeError(IS.takeError());
@@ -67,7 +86,7 @@ codeview::GUID NativeExeSymbol::getGuid() const {
}
bool NativeExeSymbol::hasCTypes() const {
- auto Dbi = File.getPDBDbiStream();
+ auto Dbi = Session.getPDBFile().getPDBDbiStream();
if (Dbi)
return Dbi->hasCTypes();
consumeError(Dbi.takeError());
@@ -75,12 +94,9 @@ bool NativeExeSymbol::hasCTypes() const {
}
bool NativeExeSymbol::hasPrivateSymbols() const {
- auto Dbi = File.getPDBDbiStream();
+ auto Dbi = Session.getPDBFile().getPDBDbiStream();
if (Dbi)
return !Dbi->isStripped();
consumeError(Dbi.takeError());
return false;
}
-
-} // namespace pdb
-} // namespace llvm
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index a4b029596314..62950cb3e52a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -7,82 +7,92 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+#include "llvm/Support/FormatVariadic.h"
using namespace llvm;
using namespace llvm::pdb;
-NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId)
- : Session(PDBSession), SymbolId(SymbolId) {}
+NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, PDB_SymType Tag,
+ SymIndexId SymbolId)
+ : Session(PDBSession), Tag(Tag), SymbolId(SymbolId) {}
-void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {}
+void NativeRawSymbol::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ dumpSymbolIdField(OS, "symIndexId", SymbolId, Indent, Session,
+ PdbSymbolIdField::SymIndexId, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolField(OS, "symTag", Tag, Indent);
+}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildren(PDB_SymType Type) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildren(PDB_SymType Type, StringRef Name,
PDB_NameSearchFlags Flags) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name,
PDB_NameSearchFlags Flags, uint32_t Section, uint32_t Offset) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name,
PDB_NameSearchFlags Flags, uint64_t VA) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
PDB_NameSearchFlags Flags, uint32_t RVA) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findInlineFramesByAddr(uint32_t Section,
uint32_t Offset) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumSymbols>
NativeRawSymbol::findInlineFramesByVA(uint64_t VA) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeRawSymbol::findInlineeLines() const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeRawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset,
uint32_t Length) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeRawSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
}
std::unique_ptr<IPDBEnumLineNumbers>
NativeRawSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const {
- return nullptr;
+ return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
}
void NativeRawSymbol::getDataBytes(SmallVector<uint8_t, 32> &bytes) const {
@@ -105,9 +115,7 @@ uint32_t NativeRawSymbol::getAge() const {
return 0;
}
-uint32_t NativeRawSymbol::getArrayIndexTypeId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getArrayIndexTypeId() const { return 0; }
void NativeRawSymbol::getBackEndVersion(VersionInfo &Version) const {
Version.Major = 0;
@@ -124,9 +132,7 @@ uint32_t NativeRawSymbol::getBaseDataSlot() const {
return 0;
}
-uint32_t NativeRawSymbol::getBaseSymbolId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getBaseSymbolId() const { return 0; }
PDB_BuiltinType NativeRawSymbol::getBuiltinType() const {
return PDB_BuiltinType::None;
@@ -140,9 +146,7 @@ PDB_CallingConv NativeRawSymbol::getCallingConvention() const {
return PDB_CallingConv::FarStdCall;
}
-uint32_t NativeRawSymbol::getClassParentId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getClassParentId() const { return 0; }
std::string NativeRawSymbol::getCompilerName() const {
return {};
@@ -167,9 +171,7 @@ PDB_Lang NativeRawSymbol::getLanguage() const {
return PDB_Lang::Cobol;
}
-uint32_t NativeRawSymbol::getLexicalParentId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getLexicalParentId() const { return 0; }
std::string NativeRawSymbol::getLibraryName() const {
return {};
@@ -188,12 +190,10 @@ uint32_t NativeRawSymbol::getLiveRangeStartRelativeVirtualAddress() const {
}
codeview::RegisterId NativeRawSymbol::getLocalBasePointerRegisterId() const {
- return codeview::RegisterId::CVRegEAX;
+ return codeview::RegisterId::EAX;
}
-uint32_t NativeRawSymbol::getLowerBoundId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getLowerBoundId() const { return 0; }
uint32_t NativeRawSymbol::getMemorySpaceKind() const {
return 0;
@@ -231,9 +231,7 @@ uint32_t NativeRawSymbol::getOemId() const {
return 0;
}
-uint32_t NativeRawSymbol::getOemSymbolId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getOemSymbolId() const { return 0; }
uint32_t NativeRawSymbol::getOffsetInUdt() const {
return 0;
@@ -248,7 +246,7 @@ uint32_t NativeRawSymbol::getRank() const {
}
codeview::RegisterId NativeRawSymbol::getRegisterId() const {
- return codeview::RegisterId::CVRegEAX;
+ return codeview::RegisterId::EAX;
}
uint32_t NativeRawSymbol::getRegisterType() const {
@@ -288,13 +286,11 @@ uint32_t NativeRawSymbol::getStride() const {
return 0;
}
-uint32_t NativeRawSymbol::getSubTypeId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getSubTypeId() const { return 0; }
std::string NativeRawSymbol::getSymbolsFileName() const { return {}; }
-uint32_t NativeRawSymbol::getSymIndexId() const { return SymbolId; }
+SymIndexId NativeRawSymbol::getSymIndexId() const { return SymbolId; }
uint32_t NativeRawSymbol::getTargetOffset() const {
return 0;
@@ -324,9 +320,7 @@ uint32_t NativeRawSymbol::getToken() const {
return 0;
}
-uint32_t NativeRawSymbol::getTypeId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getTypeId() const { return 0; }
uint32_t NativeRawSymbol::getUavSlot() const {
return 0;
@@ -341,13 +335,9 @@ std::string NativeRawSymbol::getUndecoratedNameEx(
return {};
}
-uint32_t NativeRawSymbol::getUnmodifiedTypeId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getUnmodifiedTypeId() const { return 0; }
-uint32_t NativeRawSymbol::getUpperBoundId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getUpperBoundId() const { return 0; }
Variant NativeRawSymbol::getValue() const {
return Variant();
@@ -361,9 +351,7 @@ uint32_t NativeRawSymbol::getVirtualBaseOffset() const {
return 0;
}
-uint32_t NativeRawSymbol::getVirtualTableShapeId() const {
- return 0;
-}
+SymIndexId NativeRawSymbol::getVirtualTableShapeId() const { return 0; }
std::unique_ptr<PDBSymbolTypeBuiltin>
NativeRawSymbol::getVirtualBaseTableType() const {
@@ -374,9 +362,7 @@ PDB_DataKind NativeRawSymbol::getDataKind() const {
return PDB_DataKind::Unknown;
}
-PDB_SymType NativeRawSymbol::getSymTag() const {
- return PDB_SymType::None;
-}
+PDB_SymType NativeRawSymbol::getSymTag() const { return Tag; }
codeview::GUID NativeRawSymbol::getGuid() const { return codeview::GUID{{0}}; }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 086da13135c5..7807e312365c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -11,16 +11,16 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
-#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
@@ -40,31 +40,19 @@ using namespace llvm;
using namespace llvm::msf;
using namespace llvm::pdb;
-namespace {
-// Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary
-// to instantiate a NativeBuiltinSymbol for that type.
-static const struct BuiltinTypeEntry {
- codeview::SimpleTypeKind Kind;
- PDB_BuiltinType Type;
- uint32_t Size;
-} BuiltinTypes[] = {
- {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4},
- {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4},
- {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4},
- {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8},
- {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1},
- {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1},
- {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1},
- {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2},
- {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1}
- // This table can be grown as necessary, but these are the only types we've
- // needed so far.
-};
-} // namespace
+static DbiStream *getDbiStreamPtr(PDBFile &File) {
+ Expected<DbiStream &> DbiS = File.getPDBDbiStream();
+ if (DbiS)
+ return &DbiS.get();
+
+ consumeError(DbiS.takeError());
+ return nullptr;
+}
NativeSession::NativeSession(std::unique_ptr<PDBFile> PdbFile,
std::unique_ptr<BumpPtrAllocator> Allocator)
- : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {}
+ : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)),
+ Cache(*this, getDbiStreamPtr(*Pdb)) {}
NativeSession::~NativeSession() = default;
@@ -92,97 +80,17 @@ Error NativeSession::createFromExe(StringRef Path,
return make_error<RawError>(raw_error_code::feature_unsupported);
}
-std::unique_ptr<PDBSymbolCompiland>
-NativeSession::createCompilandSymbol(DbiModuleDescriptor MI) {
- const auto Id = static_cast<SymIndexId>(SymbolCache.size());
- SymbolCache.push_back(
- llvm::make_unique<NativeCompilandSymbol>(*this, Id, MI));
- return llvm::make_unique<PDBSymbolCompiland>(
- *this, std::unique_ptr<IPDBRawSymbol>(SymbolCache[Id]->clone()));
-}
-
-std::unique_ptr<PDBSymbolTypeEnum>
-NativeSession::createEnumSymbol(codeview::TypeIndex Index) {
- const auto Id = findSymbolByTypeIndex(Index);
- return llvm::make_unique<PDBSymbolTypeEnum>(
- *this, std::unique_ptr<IPDBRawSymbol>(SymbolCache[Id]->clone()));
-}
-
-std::unique_ptr<IPDBEnumSymbols>
-NativeSession::createTypeEnumerator(codeview::TypeLeafKind Kind) {
- auto Tpi = Pdb->getPDBTpiStream();
- if (!Tpi) {
- consumeError(Tpi.takeError());
- return nullptr;
- }
- auto &Types = Tpi->typeCollection();
- return std::unique_ptr<IPDBEnumSymbols>(
- new NativeEnumTypes(*this, Types, codeview::LF_ENUM));
-}
-
-SymIndexId NativeSession::findSymbolByTypeIndex(codeview::TypeIndex Index) {
- // First see if it's already in our cache.
- const auto Entry = TypeIndexToSymbolId.find(Index);
- if (Entry != TypeIndexToSymbolId.end())
- return Entry->second;
-
- // Symbols for built-in types are created on the fly.
- if (Index.isSimple()) {
- // FIXME: We will eventually need to handle pointers to other simple types,
- // which are still simple types in the world of CodeView TypeIndexes.
- if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct)
- return 0;
- const auto Kind = Index.getSimpleKind();
- const auto It =
- std::find_if(std::begin(BuiltinTypes), std::end(BuiltinTypes),
- [Kind](const BuiltinTypeEntry &Builtin) {
- return Builtin.Kind == Kind;
- });
- if (It == std::end(BuiltinTypes))
- return 0;
- SymIndexId Id = SymbolCache.size();
- SymbolCache.emplace_back(
- llvm::make_unique<NativeBuiltinSymbol>(*this, Id, It->Type, It->Size));
- TypeIndexToSymbolId[Index] = Id;
- return Id;
- }
-
- // We need to instantiate and cache the desired type symbol.
- auto Tpi = Pdb->getPDBTpiStream();
- if (!Tpi) {
- consumeError(Tpi.takeError());
- return 0;
- }
- auto &Types = Tpi->typeCollection();
- const auto &I = Types.getType(Index);
- const auto Id = static_cast<SymIndexId>(SymbolCache.size());
- // TODO(amccarth): Make this handle all types, not just LF_ENUMs.
- assert(I.kind() == codeview::LF_ENUM);
- SymbolCache.emplace_back(llvm::make_unique<NativeEnumSymbol>(*this, Id, I));
- TypeIndexToSymbolId[Index] = Id;
- return Id;
-}
-
uint64_t NativeSession::getLoadAddress() const { return 0; }
bool NativeSession::setLoadAddress(uint64_t Address) { return false; }
std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() {
- const auto Id = static_cast<SymIndexId>(SymbolCache.size());
- SymbolCache.push_back(llvm::make_unique<NativeExeSymbol>(*this, Id));
- auto RawSymbol = SymbolCache[Id]->clone();
- auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
- std::unique_ptr<PDBSymbolExe> ExeSymbol(
- static_cast<PDBSymbolExe *>(PdbSymbol.release()));
- return ExeSymbol;
+ return PDBSymbol::createAs<PDBSymbolExe>(*this, getNativeGlobalScope());
}
std::unique_ptr<PDBSymbol>
-NativeSession::getSymbolById(uint32_t SymbolId) const {
- // If the caller has a SymbolId, it'd better be in our SymbolCache.
- return SymbolId < SymbolCache.size()
- ? PDBSymbol::create(*this, SymbolCache[SymbolId]->clone())
- : nullptr;
+NativeSession::getSymbolById(SymIndexId SymbolId) const {
+ return Cache.getSymbolById(SymbolId);
}
bool NativeSession::addressForVA(uint64_t VA, uint32_t &Section,
@@ -291,3 +199,19 @@ std::unique_ptr<IPDBEnumSectionContribs>
NativeSession::getSectionContribs() const {
return nullptr;
}
+
+std::unique_ptr<IPDBEnumFrameData>
+NativeSession::getFrameData() const {
+ return nullptr;
+}
+
+void NativeSession::initializeExeSymbol() {
+ if (ExeSymbol == 0)
+ ExeSymbol = Cache.createSymbol<NativeExeSymbol>();
+}
+
+NativeExeSymbol &NativeSession::getNativeGlobalScope() const {
+ const_cast<NativeSession &>(*this).initializeExeSymbol();
+
+ return Cache.getNativeSymbolById<NativeExeSymbol>(ExeSymbol);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
new file mode 100644
index 000000000000..6ebb8cae3a65
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
@@ -0,0 +1,123 @@
+//===- NativeSymbolEnumerator.cpp - info about enumerators ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h"
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeSymbolEnumerator::NativeSymbolEnumerator(
+ NativeSession &Session, SymIndexId Id, const NativeTypeEnum &Parent,
+ codeview::EnumeratorRecord Record)
+ : NativeRawSymbol(Session, PDB_SymType::Data, Id), Parent(Parent),
+ Record(std::move(Record)) {}
+
+NativeSymbolEnumerator::~NativeSymbolEnumerator() {}
+
+void NativeSymbolEnumerator::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+ dumpSymbolIdField(OS, "classParentId", getClassParentId(), Indent, Session,
+ PdbSymbolIdField::ClassParent, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolIdField(OS, "lexicalParentId", getLexicalParentId(), Indent,
+ Session, PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolField(OS, "name", getName(), Indent);
+ dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+ PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+ dumpSymbolField(OS, "dataKind", getDataKind(), Indent);
+ dumpSymbolField(OS, "locationType", getLocationType(), Indent);
+ dumpSymbolField(OS, "constType", isConstType(), Indent);
+ dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+ dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+ dumpSymbolField(OS, "value", getValue(), Indent);
+}
+
+SymIndexId NativeSymbolEnumerator::getClassParentId() const {
+ return Parent.getSymIndexId();
+}
+
+SymIndexId NativeSymbolEnumerator::getLexicalParentId() const { return 0; }
+
+std::string NativeSymbolEnumerator::getName() const { return Record.Name; }
+
+SymIndexId NativeSymbolEnumerator::getTypeId() const {
+ return Parent.getTypeId();
+}
+
+PDB_DataKind NativeSymbolEnumerator::getDataKind() const {
+ return PDB_DataKind::Constant;
+}
+
+PDB_LocType NativeSymbolEnumerator::getLocationType() const {
+ return PDB_LocType::Constant;
+}
+
+bool NativeSymbolEnumerator::isConstType() const { return false; }
+
+bool NativeSymbolEnumerator::isVolatileType() const { return false; }
+
+bool NativeSymbolEnumerator::isUnalignedType() const { return false; }
+
+Variant NativeSymbolEnumerator::getValue() const {
+ const NativeTypeBuiltin &BT = Parent.getUnderlyingBuiltinType();
+
+ switch (BT.getBuiltinType()) {
+ case PDB_BuiltinType::Int:
+ case PDB_BuiltinType::Long:
+ case PDB_BuiltinType::Char: {
+ assert(Record.Value.isSignedIntN(BT.getLength() * 8));
+ int64_t N = Record.Value.getSExtValue();
+ switch (BT.getLength()) {
+ case 1:
+ return Variant{static_cast<int8_t>(N)};
+ case 2:
+ return Variant{static_cast<int16_t>(N)};
+ case 4:
+ return Variant{static_cast<int32_t>(N)};
+ case 8:
+ return Variant{static_cast<int64_t>(N)};
+ }
+ break;
+ }
+ case PDB_BuiltinType::UInt:
+ case PDB_BuiltinType::ULong: {
+ assert(Record.Value.isIntN(BT.getLength() * 8));
+ uint64_t U = Record.Value.getZExtValue();
+ switch (BT.getLength()) {
+ case 1:
+ return Variant{static_cast<uint8_t>(U)};
+ case 2:
+ return Variant{static_cast<uint16_t>(U)};
+ case 4:
+ return Variant{static_cast<uint32_t>(U)};
+ case 8:
+ return Variant{static_cast<uint64_t>(U)};
+ }
+ break;
+ }
+ case PDB_BuiltinType::Bool: {
+ assert(Record.Value.isIntN(BT.getLength() * 8));
+ uint64_t U = Record.Value.getZExtValue();
+ return Variant{static_cast<bool>(U)};
+ }
+ default:
+ assert(false && "Invalid enumeration type");
+ break;
+ }
+
+ return Variant{Record.Value.getSExtValue()};
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
new file mode 100644
index 000000000000..a52561728a98
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
@@ -0,0 +1,67 @@
+//===- NativeTypeArray.cpp - info about arrays ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h"
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypeArray::NativeTypeArray(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI,
+ codeview::ArrayRecord Record)
+ : NativeRawSymbol(Session, PDB_SymType::ArrayType, Id), Record(Record),
+ Index(TI) {}
+NativeTypeArray::~NativeTypeArray() {}
+
+void NativeTypeArray::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+ dumpSymbolField(OS, "arrayIndexTypeId", getArrayIndexTypeId(), Indent);
+ dumpSymbolIdField(OS, "elementTypeId", getTypeId(), Indent, Session,
+ PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+
+ dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+ PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolField(OS, "length", getLength(), Indent);
+ dumpSymbolField(OS, "count", getCount(), Indent);
+ dumpSymbolField(OS, "constType", isConstType(), Indent);
+ dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+ dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+SymIndexId NativeTypeArray::getArrayIndexTypeId() const {
+ return Session.getSymbolCache().findSymbolByTypeIndex(Record.getIndexType());
+}
+
+bool NativeTypeArray::isConstType() const { return false; }
+
+bool NativeTypeArray::isUnalignedType() const { return false; }
+
+bool NativeTypeArray::isVolatileType() const { return false; }
+
+uint32_t NativeTypeArray::getCount() const {
+ NativeRawSymbol &Element =
+ Session.getSymbolCache().getNativeSymbolById(getTypeId());
+ return getLength() / Element.getLength();
+}
+
+SymIndexId NativeTypeArray::getTypeId() const {
+ return Session.getSymbolCache().findSymbolByTypeIndex(
+ Record.getElementType());
+}
+
+uint64_t NativeTypeArray::getLength() const { return Record.Size; } \ No newline at end of file
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
new file mode 100644
index 000000000000..7b0f13f3c075
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
@@ -0,0 +1,47 @@
+//===- NativeTypeBuiltin.cpp -------------------------------------- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypeBuiltin::NativeTypeBuiltin(NativeSession &PDBSession, SymIndexId Id,
+ ModifierOptions Mods, PDB_BuiltinType T,
+ uint64_t L)
+ : NativeRawSymbol(PDBSession, PDB_SymType::BuiltinType, Id),
+ Session(PDBSession), Mods(Mods), Type(T), Length(L) {}
+
+NativeTypeBuiltin::~NativeTypeBuiltin() {}
+
+void NativeTypeBuiltin::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {}
+
+PDB_SymType NativeTypeBuiltin::getSymTag() const {
+ return PDB_SymType::BuiltinType;
+}
+
+PDB_BuiltinType NativeTypeBuiltin::getBuiltinType() const { return Type; }
+
+bool NativeTypeBuiltin::isConstType() const {
+ return (Mods & ModifierOptions::Const) != ModifierOptions::None;
+}
+
+uint64_t NativeTypeBuiltin::getLength() const { return Length; }
+
+bool NativeTypeBuiltin::isUnalignedType() const {
+ return (Mods & ModifierOptions::Unaligned) != ModifierOptions::None;
+}
+
+bool NativeTypeBuiltin::isVolatileType() const {
+ return (Mods & ModifierOptions::Volatile) != ModifierOptions::None;
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
new file mode 100644
index 000000000000..37176fe083b9
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
@@ -0,0 +1,382 @@
+//===- NativeTypeEnum.cpp - info about enum type ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+
+#include "llvm/Support/FormatVariadic.h"
+
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+namespace {
+// Yea, this is a pretty terrible class name. But if we have an enum:
+//
+// enum Foo {
+// A,
+// B
+// };
+//
+// then A and B are the "enumerators" of the "enum" Foo. And we need
+// to enumerate them.
+class NativeEnumEnumEnumerators : public IPDBEnumSymbols, TypeVisitorCallbacks {
+public:
+ NativeEnumEnumEnumerators(NativeSession &Session,
+ const NativeTypeEnum &ClassParent);
+
+ uint32_t getChildCount() const override;
+ std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
+ std::unique_ptr<PDBSymbol> getNext() override;
+ void reset() override;
+
+private:
+ Error visitKnownMember(CVMemberRecord &CVM,
+ EnumeratorRecord &Record) override;
+ Error visitKnownMember(CVMemberRecord &CVM,
+ ListContinuationRecord &Record) override;
+
+ NativeSession &Session;
+ const NativeTypeEnum &ClassParent;
+ std::vector<EnumeratorRecord> Enumerators;
+ Optional<TypeIndex> ContinuationIndex;
+ uint32_t Index = 0;
+};
+} // namespace
+
+NativeEnumEnumEnumerators::NativeEnumEnumEnumerators(
+ NativeSession &Session, const NativeTypeEnum &ClassParent)
+ : Session(Session), ClassParent(ClassParent) {
+ TpiStream &Tpi = cantFail(Session.getPDBFile().getPDBTpiStream());
+ LazyRandomTypeCollection &Types = Tpi.typeCollection();
+
+ ContinuationIndex = ClassParent.getEnumRecord().FieldList;
+ while (ContinuationIndex) {
+ CVType FieldList = Types.getType(*ContinuationIndex);
+ assert(FieldList.kind() == LF_FIELDLIST);
+ ContinuationIndex.reset();
+ cantFail(visitMemberRecordStream(FieldList.data(), *this));
+ }
+}
+
+Error NativeEnumEnumEnumerators::visitKnownMember(CVMemberRecord &CVM,
+ EnumeratorRecord &Record) {
+ Enumerators.push_back(Record);
+ return Error::success();
+}
+
+Error NativeEnumEnumEnumerators::visitKnownMember(
+ CVMemberRecord &CVM, ListContinuationRecord &Record) {
+ ContinuationIndex = Record.ContinuationIndex;
+ return Error::success();
+}
+
+uint32_t NativeEnumEnumEnumerators::getChildCount() const {
+ return Enumerators.size();
+}
+
+std::unique_ptr<PDBSymbol>
+NativeEnumEnumEnumerators::getChildAtIndex(uint32_t Index) const {
+ if (Index >= getChildCount())
+ return nullptr;
+
+ SymIndexId Id = Session.getSymbolCache()
+ .getOrCreateFieldListMember<NativeSymbolEnumerator>(
+ ClassParent.getEnumRecord().FieldList, Index,
+ ClassParent, Enumerators[Index]);
+ return Session.getSymbolCache().getSymbolById(Id);
+}
+
+std::unique_ptr<PDBSymbol> NativeEnumEnumEnumerators::getNext() {
+ if (Index >= getChildCount())
+ return nullptr;
+
+ return getChildAtIndex(Index++);
+}
+
+void NativeEnumEnumEnumerators::reset() { Index = 0; }
+
+NativeTypeEnum::NativeTypeEnum(NativeSession &Session, SymIndexId Id,
+ TypeIndex Index, EnumRecord Record)
+ : NativeRawSymbol(Session, PDB_SymType::Enum, Id), Index(Index),
+ Record(std::move(Record)) {}
+
+NativeTypeEnum::NativeTypeEnum(NativeSession &Session, SymIndexId Id,
+ NativeTypeEnum &UnmodifiedType,
+ codeview::ModifierRecord Modifier)
+ : NativeRawSymbol(Session, PDB_SymType::Enum, Id),
+ UnmodifiedType(&UnmodifiedType), Modifiers(std::move(Modifier)) {}
+
+NativeTypeEnum::~NativeTypeEnum() {}
+
+void NativeTypeEnum::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+ dumpSymbolField(OS, "baseType", static_cast<uint32_t>(getBuiltinType()),
+ Indent);
+ dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+ PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolField(OS, "name", getName(), Indent);
+ dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+ PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+ if (Modifiers.hasValue())
+ dumpSymbolIdField(OS, "unmodifiedTypeId", getUnmodifiedTypeId(), Indent,
+ Session, PdbSymbolIdField::UnmodifiedType, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolField(OS, "length", getLength(), Indent);
+ dumpSymbolField(OS, "constructor", hasConstructor(), Indent);
+ dumpSymbolField(OS, "constType", isConstType(), Indent);
+ dumpSymbolField(OS, "hasAssignmentOperator", hasAssignmentOperator(), Indent);
+ dumpSymbolField(OS, "hasCastOperator", hasCastOperator(), Indent);
+ dumpSymbolField(OS, "hasNestedTypes", hasNestedTypes(), Indent);
+ dumpSymbolField(OS, "overloadedOperator", hasOverloadedOperator(), Indent);
+ dumpSymbolField(OS, "isInterfaceUdt", isInterfaceUdt(), Indent);
+ dumpSymbolField(OS, "intrinsic", isIntrinsic(), Indent);
+ dumpSymbolField(OS, "nested", isNested(), Indent);
+ dumpSymbolField(OS, "packed", isPacked(), Indent);
+ dumpSymbolField(OS, "isRefUdt", isRefUdt(), Indent);
+ dumpSymbolField(OS, "scoped", isScoped(), Indent);
+ dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+ dumpSymbolField(OS, "isValueUdt", isValueUdt(), Indent);
+ dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeTypeEnum::findChildren(PDB_SymType Type) const {
+ if (Type != PDB_SymType::Data)
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+
+ const NativeTypeEnum *ClassParent = nullptr;
+ if (!Modifiers)
+ ClassParent = this;
+ else
+ ClassParent = UnmodifiedType;
+ return llvm::make_unique<NativeEnumEnumEnumerators>(Session, *ClassParent);
+}
+
+PDB_SymType NativeTypeEnum::getSymTag() const { return PDB_SymType::Enum; }
+
+PDB_BuiltinType NativeTypeEnum::getBuiltinType() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getBuiltinType();
+
+ Session.getSymbolCache().findSymbolByTypeIndex(Record->getUnderlyingType());
+
+ codeview::TypeIndex Underlying = Record->getUnderlyingType();
+
+ // This indicates a corrupt record.
+ if (!Underlying.isSimple() ||
+ Underlying.getSimpleMode() != SimpleTypeMode::Direct) {
+ return PDB_BuiltinType::None;
+ }
+
+ switch (Underlying.getSimpleKind()) {
+ case SimpleTypeKind::Boolean128:
+ case SimpleTypeKind::Boolean64:
+ case SimpleTypeKind::Boolean32:
+ case SimpleTypeKind::Boolean16:
+ case SimpleTypeKind::Boolean8:
+ return PDB_BuiltinType::Bool;
+ case SimpleTypeKind::NarrowCharacter:
+ case SimpleTypeKind::UnsignedCharacter:
+ case SimpleTypeKind::SignedCharacter:
+ return PDB_BuiltinType::Char;
+ case SimpleTypeKind::WideCharacter:
+ return PDB_BuiltinType::WCharT;
+ case SimpleTypeKind::Character16:
+ return PDB_BuiltinType::Char16;
+ case SimpleTypeKind::Character32:
+ return PDB_BuiltinType::Char32;
+ case SimpleTypeKind::Int128:
+ case SimpleTypeKind::Int128Oct:
+ case SimpleTypeKind::Int16:
+ case SimpleTypeKind::Int16Short:
+ case SimpleTypeKind::Int32:
+ case SimpleTypeKind::Int32Long:
+ case SimpleTypeKind::Int64:
+ case SimpleTypeKind::Int64Quad:
+ return PDB_BuiltinType::Int;
+ case SimpleTypeKind::UInt128:
+ case SimpleTypeKind::UInt128Oct:
+ case SimpleTypeKind::UInt16:
+ case SimpleTypeKind::UInt16Short:
+ case SimpleTypeKind::UInt32:
+ case SimpleTypeKind::UInt32Long:
+ case SimpleTypeKind::UInt64:
+ case SimpleTypeKind::UInt64Quad:
+ return PDB_BuiltinType::UInt;
+ case SimpleTypeKind::HResult:
+ return PDB_BuiltinType::HResult;
+ case SimpleTypeKind::Complex16:
+ case SimpleTypeKind::Complex32:
+ case SimpleTypeKind::Complex32PartialPrecision:
+ case SimpleTypeKind::Complex64:
+ case SimpleTypeKind::Complex80:
+ case SimpleTypeKind::Complex128:
+ return PDB_BuiltinType::Complex;
+ case SimpleTypeKind::Float16:
+ case SimpleTypeKind::Float32:
+ case SimpleTypeKind::Float32PartialPrecision:
+ case SimpleTypeKind::Float48:
+ case SimpleTypeKind::Float64:
+ case SimpleTypeKind::Float80:
+ case SimpleTypeKind::Float128:
+ return PDB_BuiltinType::Float;
+ default:
+ return PDB_BuiltinType::None;
+ }
+ llvm_unreachable("Unreachable");
+}
+
+SymIndexId NativeTypeEnum::getUnmodifiedTypeId() const {
+ return UnmodifiedType ? UnmodifiedType->getSymIndexId() : 0;
+}
+
+bool NativeTypeEnum::hasConstructor() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasConstructor();
+
+ return bool(Record->getOptions() &
+ codeview::ClassOptions::HasConstructorOrDestructor);
+}
+
+bool NativeTypeEnum::hasAssignmentOperator() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasAssignmentOperator();
+
+ return bool(Record->getOptions() &
+ codeview::ClassOptions::HasOverloadedAssignmentOperator);
+}
+
+bool NativeTypeEnum::hasNestedTypes() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasNestedTypes();
+
+ return bool(Record->getOptions() &
+ codeview::ClassOptions::ContainsNestedClass);
+}
+
+bool NativeTypeEnum::isIntrinsic() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->isIntrinsic();
+
+ return bool(Record->getOptions() & codeview::ClassOptions::Intrinsic);
+}
+
+bool NativeTypeEnum::hasCastOperator() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasCastOperator();
+
+ return bool(Record->getOptions() &
+ codeview::ClassOptions::HasConversionOperator);
+}
+
+uint64_t NativeTypeEnum::getLength() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getLength();
+
+ const auto Id = Session.getSymbolCache().findSymbolByTypeIndex(
+ Record->getUnderlyingType());
+ const auto UnderlyingType =
+ Session.getConcreteSymbolById<PDBSymbolTypeBuiltin>(Id);
+ return UnderlyingType ? UnderlyingType->getLength() : 0;
+}
+
+std::string NativeTypeEnum::getName() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getName();
+
+ return Record->getName();
+}
+
+bool NativeTypeEnum::isNested() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->isNested();
+
+ return bool(Record->getOptions() & codeview::ClassOptions::Nested);
+}
+
+bool NativeTypeEnum::hasOverloadedOperator() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasOverloadedOperator();
+
+ return bool(Record->getOptions() &
+ codeview::ClassOptions::HasOverloadedOperator);
+}
+
+bool NativeTypeEnum::isPacked() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->isPacked();
+
+ return bool(Record->getOptions() & codeview::ClassOptions::Packed);
+}
+
+bool NativeTypeEnum::isScoped() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->isScoped();
+
+ return bool(Record->getOptions() & codeview::ClassOptions::Scoped);
+}
+
+SymIndexId NativeTypeEnum::getTypeId() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getTypeId();
+
+ return Session.getSymbolCache().findSymbolByTypeIndex(
+ Record->getUnderlyingType());
+}
+
+bool NativeTypeEnum::isRefUdt() const { return false; }
+
+bool NativeTypeEnum::isValueUdt() const { return false; }
+
+bool NativeTypeEnum::isInterfaceUdt() const { return false; }
+
+bool NativeTypeEnum::isConstType() const {
+ if (!Modifiers)
+ return false;
+ return ((Modifiers->getModifiers() & ModifierOptions::Const) !=
+ ModifierOptions::None);
+}
+
+bool NativeTypeEnum::isVolatileType() const {
+ if (!Modifiers)
+ return false;
+ return ((Modifiers->getModifiers() & ModifierOptions::Volatile) !=
+ ModifierOptions::None);
+}
+
+bool NativeTypeEnum::isUnalignedType() const {
+ if (!Modifiers)
+ return false;
+ return ((Modifiers->getModifiers() & ModifierOptions::Unaligned) !=
+ ModifierOptions::None);
+}
+
+const NativeTypeBuiltin &NativeTypeEnum::getUnderlyingBuiltinType() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getUnderlyingBuiltinType();
+
+ return Session.getSymbolCache().getNativeSymbolById<NativeTypeBuiltin>(
+ getTypeId());
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
new file mode 100644
index 000000000000..a9590fffdb87
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
@@ -0,0 +1,200 @@
+//===- NativeTypeFunctionSig.cpp - info about function signature -*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h"
+
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+namespace {
+// This is kind of a silly class, hence why we keep it private to the file.
+// It's only purpose is to wrap the real type record. I guess this is so that
+// we can have the lexical parent point to the function instead of the global
+// scope.
+class NativeTypeFunctionArg : public NativeRawSymbol {
+public:
+ NativeTypeFunctionArg(NativeSession &Session,
+ std::unique_ptr<PDBSymbol> RealType)
+ : NativeRawSymbol(Session, PDB_SymType::FunctionArg, 0),
+ RealType(std::move(RealType)) {}
+
+ void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const override {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+ dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+ PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+ }
+
+ SymIndexId getTypeId() const override { return RealType->getSymIndexId(); }
+
+ std::unique_ptr<PDBSymbol> RealType;
+};
+
+class NativeEnumFunctionArgs : public IPDBEnumChildren<PDBSymbol> {
+public:
+ NativeEnumFunctionArgs(NativeSession &Session,
+ std::unique_ptr<NativeEnumTypes> TypeEnumerator)
+ : Session(Session), TypeEnumerator(std::move(TypeEnumerator)) {}
+
+ uint32_t getChildCount() const override {
+ return TypeEnumerator->getChildCount();
+ }
+ std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override {
+ return wrap(TypeEnumerator->getChildAtIndex(Index));
+ }
+ std::unique_ptr<PDBSymbol> getNext() override {
+ return wrap(TypeEnumerator->getNext());
+ }
+
+ void reset() override { TypeEnumerator->reset(); }
+
+private:
+ std::unique_ptr<PDBSymbol> wrap(std::unique_ptr<PDBSymbol> S) const {
+ if (!S)
+ return nullptr;
+ auto NTFA = llvm::make_unique<NativeTypeFunctionArg>(Session, std::move(S));
+ return PDBSymbol::create(Session, std::move(NTFA));
+ }
+ NativeSession &Session;
+ std::unique_ptr<NativeEnumTypes> TypeEnumerator;
+};
+} // namespace
+
+NativeTypeFunctionSig::NativeTypeFunctionSig(NativeSession &Session,
+ SymIndexId Id,
+ codeview::TypeIndex Index,
+ codeview::ProcedureRecord Proc)
+ : NativeRawSymbol(Session, PDB_SymType::FunctionSig, Id),
+ Proc(std::move(Proc)), Index(Index), IsMemberFunction(false) {}
+
+NativeTypeFunctionSig::NativeTypeFunctionSig(
+ NativeSession &Session, SymIndexId Id, codeview::TypeIndex Index,
+ codeview::MemberFunctionRecord MemberFunc)
+ : NativeRawSymbol(Session, PDB_SymType::FunctionSig, Id),
+ MemberFunc(std::move(MemberFunc)), Index(Index), IsMemberFunction(true) {}
+
+void NativeTypeFunctionSig::initialize() {
+ if (IsMemberFunction) {
+ ClassParentId =
+ Session.getSymbolCache().findSymbolByTypeIndex(MemberFunc.ClassType);
+ initializeArgList(MemberFunc.ArgumentList);
+ } else {
+ initializeArgList(Proc.ArgumentList);
+ }
+}
+
+NativeTypeFunctionSig::~NativeTypeFunctionSig() {}
+
+void NativeTypeFunctionSig::initializeArgList(codeview::TypeIndex ArgListTI) {
+ TpiStream &Tpi = cantFail(Session.getPDBFile().getPDBTpiStream());
+ CVType CVT = Tpi.typeCollection().getType(ArgListTI);
+
+ cantFail(TypeDeserializer::deserializeAs<ArgListRecord>(CVT, ArgList));
+}
+
+void NativeTypeFunctionSig::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+ dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+ PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+
+ dumpSymbolField(OS, "callingConvention", getCallingConvention(), Indent);
+ dumpSymbolField(OS, "count", getCount(), Indent);
+ dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+ PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+ if (IsMemberFunction)
+ dumpSymbolField(OS, "thisAdjust", getThisAdjust(), Indent);
+ dumpSymbolField(OS, "constructor", hasConstructor(), Indent);
+ dumpSymbolField(OS, "constType", isConstType(), Indent);
+ dumpSymbolField(OS, "isConstructorVirtualBase", isConstructorVirtualBase(),
+ Indent);
+ dumpSymbolField(OS, "isCxxReturnUdt", isCxxReturnUdt(), Indent);
+ dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+ dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeTypeFunctionSig::findChildren(PDB_SymType Type) const {
+ if (Type != PDB_SymType::FunctionArg)
+ return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+
+ auto NET = llvm::make_unique<NativeEnumTypes>(Session,
+ /* copy */ ArgList.ArgIndices);
+ return std::unique_ptr<IPDBEnumSymbols>(
+ new NativeEnumFunctionArgs(Session, std::move(NET)));
+}
+
+SymIndexId NativeTypeFunctionSig::getClassParentId() const {
+ if (!IsMemberFunction)
+ return 0;
+
+ return ClassParentId;
+}
+
+PDB_CallingConv NativeTypeFunctionSig::getCallingConvention() const {
+ return IsMemberFunction ? MemberFunc.CallConv : Proc.CallConv;
+}
+
+uint32_t NativeTypeFunctionSig::getCount() const {
+ return IsMemberFunction ? (1 + MemberFunc.getParameterCount())
+ : Proc.getParameterCount();
+}
+
+SymIndexId NativeTypeFunctionSig::getTypeId() const {
+ TypeIndex ReturnTI =
+ IsMemberFunction ? MemberFunc.getReturnType() : Proc.getReturnType();
+
+ SymIndexId Result = Session.getSymbolCache().findSymbolByTypeIndex(ReturnTI);
+ return Result;
+}
+
+int32_t NativeTypeFunctionSig::getThisAdjust() const {
+ return IsMemberFunction ? MemberFunc.getThisPointerAdjustment() : 0;
+}
+
+bool NativeTypeFunctionSig::hasConstructor() const {
+ if (!IsMemberFunction)
+ return false;
+
+ return (MemberFunc.getOptions() & FunctionOptions::Constructor) !=
+ FunctionOptions::None;
+}
+
+bool NativeTypeFunctionSig::isConstType() const { return false; }
+
+bool NativeTypeFunctionSig::isConstructorVirtualBase() const {
+ if (!IsMemberFunction)
+ return false;
+
+ return (MemberFunc.getOptions() &
+ FunctionOptions::ConstructorWithVirtualBases) !=
+ FunctionOptions::None;
+}
+
+bool NativeTypeFunctionSig::isCxxReturnUdt() const {
+ FunctionOptions Options =
+ IsMemberFunction ? MemberFunc.getOptions() : Proc.getOptions();
+ return (Options & FunctionOptions::CxxReturnUdt) != FunctionOptions::None;
+}
+
+bool NativeTypeFunctionSig::isUnalignedType() const { return false; }
+
+bool NativeTypeFunctionSig::isVolatileType() const { return false; }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
new file mode 100644
index 000000000000..bd8ecb6c4007
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
@@ -0,0 +1,194 @@
+//===- NativeTypePointer.cpp - info about pointer type ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypePointer.h"
+
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypePointer::NativeTypePointer(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI)
+ : NativeRawSymbol(Session, PDB_SymType::PointerType, Id), TI(TI) {
+ assert(TI.isSimple());
+ assert(TI.getSimpleMode() != SimpleTypeMode::Direct);
+}
+
+NativeTypePointer::NativeTypePointer(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI,
+ codeview::PointerRecord Record)
+ : NativeRawSymbol(Session, PDB_SymType::PointerType, Id), TI(TI),
+ Record(std::move(Record)) {}
+
+NativeTypePointer::~NativeTypePointer() {}
+
+void NativeTypePointer::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+ if (isMemberPointer()) {
+ dumpSymbolIdField(OS, "classParentId", getClassParentId(), Indent, Session,
+ PdbSymbolIdField::ClassParent, ShowIdFields,
+ RecurseIdFields);
+ }
+ dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+ PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+ PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+ dumpSymbolField(OS, "length", getLength(), Indent);
+ dumpSymbolField(OS, "constType", isConstType(), Indent);
+ dumpSymbolField(OS, "isPointerToDataMember", isPointerToDataMember(), Indent);
+ dumpSymbolField(OS, "isPointerToMemberFunction", isPointerToMemberFunction(),
+ Indent);
+ dumpSymbolField(OS, "RValueReference", isRValueReference(), Indent);
+ dumpSymbolField(OS, "reference", isReference(), Indent);
+ dumpSymbolField(OS, "restrictedType", isRestrictedType(), Indent);
+ if (isMemberPointer()) {
+ if (isSingleInheritance())
+ dumpSymbolField(OS, "isSingleInheritance", 1, Indent);
+ else if (isMultipleInheritance())
+ dumpSymbolField(OS, "isMultipleInheritance", 1, Indent);
+ else if (isVirtualInheritance())
+ dumpSymbolField(OS, "isVirtualInheritance", 1, Indent);
+ }
+ dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+ dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+SymIndexId NativeTypePointer::getClassParentId() const {
+ if (!isMemberPointer())
+ return 0;
+
+ assert(Record);
+ const MemberPointerInfo &MPI = Record->getMemberInfo();
+ return Session.getSymbolCache().findSymbolByTypeIndex(MPI.ContainingType);
+}
+
+uint64_t NativeTypePointer::getLength() const {
+ if (Record)
+ return Record->getSize();
+
+ switch (TI.getSimpleMode()) {
+ case SimpleTypeMode::NearPointer:
+ case SimpleTypeMode::FarPointer:
+ case SimpleTypeMode::HugePointer:
+ return 2;
+ case SimpleTypeMode::NearPointer32:
+ case SimpleTypeMode::FarPointer32:
+ return 4;
+ case SimpleTypeMode::NearPointer64:
+ return 8;
+ case SimpleTypeMode::NearPointer128:
+ return 16;
+ default:
+ assert(false && "invalid simple type mode!");
+ }
+ return 0;
+}
+
+SymIndexId NativeTypePointer::getTypeId() const {
+ // This is the pointee SymIndexId.
+ TypeIndex Referent = Record ? Record->ReferentType : TI.makeDirect();
+
+ return Session.getSymbolCache().findSymbolByTypeIndex(Referent);
+}
+
+bool NativeTypePointer::isReference() const {
+ if (!Record)
+ return false;
+ return Record->getMode() == PointerMode::LValueReference;
+}
+
+bool NativeTypePointer::isRValueReference() const {
+ if (!Record)
+ return false;
+ return Record->getMode() == PointerMode::RValueReference;
+}
+
+bool NativeTypePointer::isPointerToDataMember() const {
+ if (!Record)
+ return false;
+ return Record->getMode() == PointerMode::PointerToDataMember;
+}
+
+bool NativeTypePointer::isPointerToMemberFunction() const {
+ if (!Record)
+ return false;
+ return Record->getMode() == PointerMode::PointerToMemberFunction;
+}
+
+bool NativeTypePointer::isConstType() const {
+ if (!Record)
+ return false;
+ return (Record->getOptions() & PointerOptions::Const) != PointerOptions::None;
+}
+
+bool NativeTypePointer::isRestrictedType() const {
+ if (!Record)
+ return false;
+ return (Record->getOptions() & PointerOptions::Restrict) !=
+ PointerOptions::None;
+}
+
+bool NativeTypePointer::isVolatileType() const {
+ if (!Record)
+ return false;
+ return (Record->getOptions() & PointerOptions::Volatile) !=
+ PointerOptions::None;
+}
+
+bool NativeTypePointer::isUnalignedType() const {
+ if (!Record)
+ return false;
+ return (Record->getOptions() & PointerOptions::Unaligned) !=
+ PointerOptions::None;
+}
+
+static inline bool isInheritanceKind(const MemberPointerInfo &MPI,
+ PointerToMemberRepresentation P1,
+ PointerToMemberRepresentation P2) {
+ return (MPI.getRepresentation() == P1 || MPI.getRepresentation() == P2);
+}
+
+bool NativeTypePointer::isSingleInheritance() const {
+ if (!isMemberPointer())
+ return false;
+ return isInheritanceKind(
+ Record->getMemberInfo(),
+ PointerToMemberRepresentation::SingleInheritanceData,
+ PointerToMemberRepresentation::SingleInheritanceFunction);
+}
+
+bool NativeTypePointer::isMultipleInheritance() const {
+ if (!isMemberPointer())
+ return false;
+ return isInheritanceKind(
+ Record->getMemberInfo(),
+ PointerToMemberRepresentation::MultipleInheritanceData,
+ PointerToMemberRepresentation::MultipleInheritanceFunction);
+}
+
+bool NativeTypePointer::isVirtualInheritance() const {
+ if (!isMemberPointer())
+ return false;
+ return isInheritanceKind(
+ Record->getMemberInfo(),
+ PointerToMemberRepresentation::VirtualInheritanceData,
+ PointerToMemberRepresentation::VirtualInheritanceFunction);
+}
+
+bool NativeTypePointer::isMemberPointer() const {
+ return isPointerToDataMember() || isPointerToMemberFunction();
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp
new file mode 100644
index 000000000000..60b373282267
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp
@@ -0,0 +1,27 @@
+#include "llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypeTypedef::NativeTypeTypedef(NativeSession &Session, SymIndexId Id,
+ codeview::UDTSym Typedef)
+ : NativeRawSymbol(Session, PDB_SymType::Typedef, Id),
+ Record(std::move(Typedef)) {}
+
+NativeTypeTypedef::~NativeTypeTypedef() {}
+
+void NativeTypeTypedef::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+ dumpSymbolField(OS, "name", getName(), Indent);
+ dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+ PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+}
+
+std::string NativeTypeTypedef::getName() const { return Record.Name; }
+
+SymIndexId NativeTypeTypedef::getTypeId() const {
+ return Session.getSymbolCache().findSymbolByTypeIndex(Record.Type);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
new file mode 100644
index 000000000000..3abf91dcc6a3
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
@@ -0,0 +1,221 @@
+//===- NativeTypeUDT.cpp - info about class/struct type ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h"
+
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypeUDT::NativeTypeUDT(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI, codeview::ClassRecord CR)
+ : NativeRawSymbol(Session, PDB_SymType::UDT, Id), Index(TI),
+ Class(std::move(CR)), Tag(Class.getPointer()) {}
+
+NativeTypeUDT::NativeTypeUDT(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI, codeview::UnionRecord UR)
+ : NativeRawSymbol(Session, PDB_SymType::UDT, Id), Index(TI),
+ Union(std::move(UR)), Tag(Union.getPointer()) {}
+
+NativeTypeUDT::NativeTypeUDT(NativeSession &Session, SymIndexId Id,
+ NativeTypeUDT &UnmodifiedType,
+ codeview::ModifierRecord Modifier)
+ : NativeRawSymbol(Session, PDB_SymType::UDT, Id),
+ UnmodifiedType(&UnmodifiedType), Modifiers(std::move(Modifier)) {}
+
+NativeTypeUDT::~NativeTypeUDT() {}
+
+void NativeTypeUDT::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+ dumpSymbolField(OS, "name", getName(), Indent);
+ dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+ PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+ if (Modifiers.hasValue())
+ dumpSymbolIdField(OS, "unmodifiedTypeId", getUnmodifiedTypeId(), Indent,
+ Session, PdbSymbolIdField::UnmodifiedType, ShowIdFields,
+ RecurseIdFields);
+ if (getUdtKind() != PDB_UdtType::Union)
+ dumpSymbolField(OS, "virtualTableShapeId", getVirtualTableShapeId(),
+ Indent);
+ dumpSymbolField(OS, "length", getLength(), Indent);
+ dumpSymbolField(OS, "udtKind", getUdtKind(), Indent);
+ dumpSymbolField(OS, "constructor", hasConstructor(), Indent);
+ dumpSymbolField(OS, "constType", isConstType(), Indent);
+ dumpSymbolField(OS, "hasAssignmentOperator", hasAssignmentOperator(), Indent);
+ dumpSymbolField(OS, "hasCastOperator", hasCastOperator(), Indent);
+ dumpSymbolField(OS, "hasNestedTypes", hasNestedTypes(), Indent);
+ dumpSymbolField(OS, "overloadedOperator", hasOverloadedOperator(), Indent);
+ dumpSymbolField(OS, "isInterfaceUdt", isInterfaceUdt(), Indent);
+ dumpSymbolField(OS, "intrinsic", isIntrinsic(), Indent);
+ dumpSymbolField(OS, "nested", isNested(), Indent);
+ dumpSymbolField(OS, "packed", isPacked(), Indent);
+ dumpSymbolField(OS, "isRefUdt", isRefUdt(), Indent);
+ dumpSymbolField(OS, "scoped", isScoped(), Indent);
+ dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+ dumpSymbolField(OS, "isValueUdt", isValueUdt(), Indent);
+ dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+std::string NativeTypeUDT::getName() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getName();
+
+ return Tag->getName();
+}
+
+SymIndexId NativeTypeUDT::getLexicalParentId() const { return 0; }
+
+SymIndexId NativeTypeUDT::getUnmodifiedTypeId() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getSymIndexId();
+
+ return 0;
+}
+
+SymIndexId NativeTypeUDT::getVirtualTableShapeId() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getVirtualTableShapeId();
+
+ if (Class)
+ return Session.getSymbolCache().findSymbolByTypeIndex(Class->VTableShape);
+
+ return 0;
+}
+
+uint64_t NativeTypeUDT::getLength() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getLength();
+
+ if (Class)
+ return Class->getSize();
+
+ return Union->getSize();
+}
+
+PDB_UdtType NativeTypeUDT::getUdtKind() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->getUdtKind();
+
+ switch (Tag->Kind) {
+ case TypeRecordKind::Class:
+ return PDB_UdtType::Class;
+ case TypeRecordKind::Union:
+ return PDB_UdtType::Union;
+ case TypeRecordKind::Struct:
+ return PDB_UdtType::Struct;
+ case TypeRecordKind::Interface:
+ return PDB_UdtType::Interface;
+ default:
+ llvm_unreachable("Unexected udt kind");
+ }
+}
+
+bool NativeTypeUDT::hasConstructor() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasConstructor();
+
+ return (Tag->Options & ClassOptions::HasConstructorOrDestructor) !=
+ ClassOptions::None;
+}
+
+bool NativeTypeUDT::isConstType() const {
+ if (!Modifiers)
+ return false;
+ return (Modifiers->Modifiers & ModifierOptions::Const) !=
+ ModifierOptions::None;
+}
+
+bool NativeTypeUDT::hasAssignmentOperator() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasAssignmentOperator();
+
+ return (Tag->Options & ClassOptions::HasOverloadedAssignmentOperator) !=
+ ClassOptions::None;
+}
+
+bool NativeTypeUDT::hasCastOperator() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasCastOperator();
+
+ return (Tag->Options & ClassOptions::HasConversionOperator) !=
+ ClassOptions::None;
+}
+
+bool NativeTypeUDT::hasNestedTypes() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasNestedTypes();
+
+ return (Tag->Options & ClassOptions::ContainsNestedClass) !=
+ ClassOptions::None;
+}
+
+bool NativeTypeUDT::hasOverloadedOperator() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->hasOverloadedOperator();
+
+ return (Tag->Options & ClassOptions::HasOverloadedOperator) !=
+ ClassOptions::None;
+}
+
+bool NativeTypeUDT::isInterfaceUdt() const { return false; }
+
+bool NativeTypeUDT::isIntrinsic() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->isIntrinsic();
+
+ return (Tag->Options & ClassOptions::Intrinsic) != ClassOptions::None;
+}
+
+bool NativeTypeUDT::isNested() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->isNested();
+
+ return (Tag->Options & ClassOptions::Nested) != ClassOptions::None;
+}
+
+bool NativeTypeUDT::isPacked() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->isPacked();
+
+ return (Tag->Options & ClassOptions::Packed) != ClassOptions::None;
+}
+
+bool NativeTypeUDT::isRefUdt() const { return false; }
+
+bool NativeTypeUDT::isScoped() const {
+ if (UnmodifiedType)
+ return UnmodifiedType->isScoped();
+
+ return (Tag->Options & ClassOptions::Scoped) != ClassOptions::None;
+}
+
+bool NativeTypeUDT::isValueUdt() const { return false; }
+
+bool NativeTypeUDT::isUnalignedType() const {
+ if (!Modifiers)
+ return false;
+ return (Modifiers->Modifiers & ModifierOptions::Unaligned) !=
+ ModifierOptions::None;
+}
+
+bool NativeTypeUDT::isVolatileType() const {
+ if (!Modifiers)
+ return false;
+ return (Modifiers->Modifiers & ModifierOptions::Volatile) !=
+ ModifierOptions::None;
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp
new file mode 100644
index 000000000000..837fe19ec88c
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp
@@ -0,0 +1,35 @@
+#include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h"
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+// Create a pointer record for a non-simple type.
+NativeTypeVTShape::NativeTypeVTShape(NativeSession &Session, SymIndexId Id,
+ codeview::TypeIndex TI,
+ codeview::VFTableShapeRecord SR)
+ : NativeRawSymbol(Session, PDB_SymType::VTableShape, Id), TI(TI),
+ Record(std::move(SR)) {}
+
+NativeTypeVTShape::~NativeTypeVTShape() {}
+
+void NativeTypeVTShape::dump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowIdFields,
+ PdbSymbolIdField RecurseIdFields) const {
+ NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+ dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+ PdbSymbolIdField::LexicalParent, ShowIdFields,
+ RecurseIdFields);
+ dumpSymbolField(OS, "count", getCount(), Indent);
+ dumpSymbolField(OS, "constType", isConstType(), Indent);
+ dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+ dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+bool NativeTypeVTShape::isConstType() const { return false; }
+
+bool NativeTypeVTShape::isVolatileType() const { return false; }
+
+bool NativeTypeVTShape::isUnalignedType() const { return false; }
+
+uint32_t NativeTypeVTShape::getCount() const { return Record.Slots.size(); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 78b11937f051..a1f8786ff12f 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -125,7 +125,7 @@ Error PDBFile::parseFileHeaders() {
if (auto EC = Reader.readObject(SB)) {
consumeError(std::move(EC));
return make_error<RawError>(raw_error_code::corrupt_file,
- "Does not contain superblock");
+ "MSF superblock is missing");
}
if (auto EC = msf::validateSuperBlock(*SB))
@@ -401,7 +401,9 @@ uint32_t PDBFile::getPointerSize() {
return 4;
}
-bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
+bool PDBFile::hasPDBDbiStream() const {
+ return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
+}
bool PDBFile::hasPDBGlobalsStream() {
auto DbiS = getPDBDbiStream();
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index e164e7cf1c52..e0ceb7499ee5 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -12,7 +12,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
@@ -26,6 +25,7 @@
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/JamCRC.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/xxhash.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -262,13 +262,14 @@ void PDBFileBuilder::commitInjectedSources(WritableBinaryStream &MsfBuffer,
}
}
-Error PDBFileBuilder::commit(StringRef Filename) {
+Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) {
assert(!Filename.empty());
if (auto EC = finalizeMsfLayout())
return EC;
MSFLayout Layout;
- auto ExpectedMsfBuffer = Msf->commit(Filename, Layout);
+ Expected<FileBufferByteStream> ExpectedMsfBuffer =
+ Msf->commit(Filename, Layout);
if (!ExpectedMsfBuffer)
return ExpectedMsfBuffer.takeError();
FileBufferByteStream Buffer = std::move(*ExpectedMsfBuffer);
@@ -330,11 +331,28 @@ Error PDBFileBuilder::commit(StringRef Filename) {
// Set the build id at the very end, after every other byte of the PDB
// has been written.
- // FIXME: Use a hash of the PDB rather than time(nullptr) for the signature.
- H->Age = Info->getAge();
- H->Guid = Info->getGuid();
- Optional<uint32_t> Sig = Info->getSignature();
- H->Signature = Sig.hasValue() ? *Sig : time(nullptr);
+ if (Info->hashPDBContentsToGUID()) {
+ // Compute a hash of all sections of the output file.
+ uint64_t Digest =
+ xxHash64({Buffer.getBufferStart(), Buffer.getBufferEnd()});
+
+ H->Age = 1;
+
+ memcpy(H->Guid.Guid, &Digest, 8);
+ // xxhash only gives us 8 bytes, so put some fixed data in the other half.
+ memcpy(H->Guid.Guid + 8, "LLD PDB.", 8);
+
+ // Put the hash in the Signature field too.
+ H->Signature = static_cast<uint32_t>(Digest);
+
+ // Return GUID to caller.
+ memcpy(Guid, H->Guid.Guid, 16);
+ } else {
+ H->Age = Info->getAge();
+ H->Guid = Info->getGuid();
+ Optional<uint32_t> Sig = Info->getSignature();
+ H->Signature = Sig.hasValue() ? *Sig : time(nullptr);
+ }
return Buffer.commit();
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
index 548289fff3df..dec9797088f2 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
@@ -5,14 +5,12 @@
using namespace llvm;
using namespace llvm::pdb;
-namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
class RawErrorCategory : public std::error_category {
public:
const char *name() const noexcept override { return "llvm.pdb.raw"; }
-
std::string message(int Condition) const override {
switch (static_cast<raw_error_code>(Condition)) {
case raw_error_code::unspecified:
@@ -46,30 +44,8 @@ public:
llvm_unreachable("Unrecognized raw_error_code");
}
};
-} // end anonymous namespace
-
-static ManagedStatic<RawErrorCategory> Category;
-
-char RawError::ID = 0;
-
-RawError::RawError(raw_error_code C) : RawError(C, "") {}
-
-RawError::RawError(const std::string &Context)
- : RawError(raw_error_code::unspecified, Context) {}
-
-RawError::RawError(raw_error_code C, const std::string &Context) : Code(C) {
- ErrMsg = "Native PDB Error: ";
- std::error_code EC = convertToErrorCode();
- if (Code != raw_error_code::unspecified)
- ErrMsg += EC.message() + " ";
- if (!Context.empty())
- ErrMsg += Context;
-}
-
-void RawError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
-const std::string &RawError::getErrorMessage() const { return ErrMsg; }
+static llvm::ManagedStatic<RawErrorCategory> RawCategory;
+const std::error_category &llvm::pdb::RawErrCategory() { return *RawCategory; }
-std::error_code RawError::convertToErrorCode() const {
- return std::error_code(static_cast<int>(Code), *Category);
-}
+char RawError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
new file mode 100644
index 000000000000..5cdd628312fe
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
@@ -0,0 +1,299 @@
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypePointer.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+// Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary
+// to instantiate a NativeBuiltinSymbol for that type.
+static const struct BuiltinTypeEntry {
+ codeview::SimpleTypeKind Kind;
+ PDB_BuiltinType Type;
+ uint32_t Size;
+} BuiltinTypes[] = {
+ {codeview::SimpleTypeKind::None, PDB_BuiltinType::None, 0},
+ {codeview::SimpleTypeKind::Void, PDB_BuiltinType::Void, 0},
+ {codeview::SimpleTypeKind::HResult, PDB_BuiltinType::HResult, 4},
+ {codeview::SimpleTypeKind::Int16Short, PDB_BuiltinType::Int, 2},
+ {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2},
+ {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4},
+ {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4},
+ {codeview::SimpleTypeKind::Int32Long, PDB_BuiltinType::Int, 4},
+ {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4},
+ {codeview::SimpleTypeKind::Int64Quad, PDB_BuiltinType::Int, 8},
+ {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8},
+ {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1},
+ {codeview::SimpleTypeKind::WideCharacter, PDB_BuiltinType::WCharT, 2},
+ {codeview::SimpleTypeKind::Character16, PDB_BuiltinType::Char16, 2},
+ {codeview::SimpleTypeKind::Character32, PDB_BuiltinType::Char32, 4},
+ {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1},
+ {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1},
+ {codeview::SimpleTypeKind::Float32, PDB_BuiltinType::Float, 4},
+ {codeview::SimpleTypeKind::Float64, PDB_BuiltinType::Float, 8},
+ {codeview::SimpleTypeKind::Float80, PDB_BuiltinType::Float, 10},
+ {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1},
+ // This table can be grown as necessary, but these are the only types we've
+ // needed so far.
+};
+
+SymbolCache::SymbolCache(NativeSession &Session, DbiStream *Dbi)
+ : Session(Session), Dbi(Dbi) {
+ // Id 0 is reserved for the invalid symbol.
+ Cache.push_back(nullptr);
+
+ if (Dbi)
+ Compilands.resize(Dbi->modules().getModuleCount());
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+SymbolCache::createTypeEnumerator(TypeLeafKind Kind) {
+ return createTypeEnumerator(std::vector<TypeLeafKind>{Kind});
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+SymbolCache::createTypeEnumerator(std::vector<TypeLeafKind> Kinds) {
+ auto Tpi = Session.getPDBFile().getPDBTpiStream();
+ if (!Tpi) {
+ consumeError(Tpi.takeError());
+ return nullptr;
+ }
+ auto &Types = Tpi->typeCollection();
+ return std::unique_ptr<IPDBEnumSymbols>(
+ new NativeEnumTypes(Session, Types, std::move(Kinds)));
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+SymbolCache::createGlobalsEnumerator(codeview::SymbolKind Kind) {
+ return std::unique_ptr<IPDBEnumSymbols>(
+ new NativeEnumGlobals(Session, {Kind}));
+}
+
+SymIndexId SymbolCache::createSimpleType(TypeIndex Index,
+ ModifierOptions Mods) {
+ if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct)
+ return createSymbol<NativeTypePointer>(Index);
+
+ const auto Kind = Index.getSimpleKind();
+ const auto It = std::find_if(
+ std::begin(BuiltinTypes), std::end(BuiltinTypes),
+ [Kind](const BuiltinTypeEntry &Builtin) { return Builtin.Kind == Kind; });
+ if (It == std::end(BuiltinTypes))
+ return 0;
+ return createSymbol<NativeTypeBuiltin>(Mods, It->Type, It->Size);
+}
+
+SymIndexId
+SymbolCache::createSymbolForModifiedType(codeview::TypeIndex ModifierTI,
+ codeview::CVType CVT) {
+ ModifierRecord Record;
+ if (auto EC = TypeDeserializer::deserializeAs<ModifierRecord>(CVT, Record)) {
+ consumeError(std::move(EC));
+ return 0;
+ }
+
+ if (Record.ModifiedType.isSimple())
+ return createSimpleType(Record.ModifiedType, Record.Modifiers);
+
+ // Make sure we create and cache a record for the unmodified type.
+ SymIndexId UnmodifiedId = findSymbolByTypeIndex(Record.ModifiedType);
+ NativeRawSymbol &UnmodifiedNRS = *Cache[UnmodifiedId];
+
+ switch (UnmodifiedNRS.getSymTag()) {
+ case PDB_SymType::Enum:
+ return createSymbol<NativeTypeEnum>(
+ static_cast<NativeTypeEnum &>(UnmodifiedNRS), std::move(Record));
+ case PDB_SymType::UDT:
+ return createSymbol<NativeTypeUDT>(
+ static_cast<NativeTypeUDT &>(UnmodifiedNRS), std::move(Record));
+ default:
+ // No other types can be modified. (LF_POINTER, for example, records
+ // its modifiers a different way.
+ assert(false && "Invalid LF_MODIFIER record");
+ break;
+ }
+ return 0;
+}
+
+SymIndexId SymbolCache::findSymbolByTypeIndex(codeview::TypeIndex Index) {
+ // First see if it's already in our cache.
+ const auto Entry = TypeIndexToSymbolId.find(Index);
+ if (Entry != TypeIndexToSymbolId.end())
+ return Entry->second;
+
+ // Symbols for built-in types are created on the fly.
+ if (Index.isSimple()) {
+ SymIndexId Result = createSimpleType(Index, ModifierOptions::None);
+ assert(TypeIndexToSymbolId.count(Index) == 0);
+ TypeIndexToSymbolId[Index] = Result;
+ return Result;
+ }
+
+ // We need to instantiate and cache the desired type symbol.
+ auto Tpi = Session.getPDBFile().getPDBTpiStream();
+ if (!Tpi) {
+ consumeError(Tpi.takeError());
+ return 0;
+ }
+ codeview::LazyRandomTypeCollection &Types = Tpi->typeCollection();
+ codeview::CVType CVT = Types.getType(Index);
+
+ if (isUdtForwardRef(CVT)) {
+ Expected<TypeIndex> EFD = Tpi->findFullDeclForForwardRef(Index);
+
+ if (!EFD)
+ consumeError(EFD.takeError());
+ else if (*EFD != Index) {
+ assert(!isUdtForwardRef(Types.getType(*EFD)));
+ SymIndexId Result = findSymbolByTypeIndex(*EFD);
+ // Record a mapping from ForwardRef -> SymIndex of complete type so that
+ // we'll take the fast path next time.
+ assert(TypeIndexToSymbolId.count(Index) == 0);
+ TypeIndexToSymbolId[Index] = Result;
+ return Result;
+ }
+ }
+
+ // At this point if we still have a forward ref udt it means the full decl was
+ // not in the PDB. We just have to deal with it and use the forward ref.
+ SymIndexId Id = 0;
+ switch (CVT.kind()) {
+ case codeview::LF_ENUM:
+ Id = createSymbolForType<NativeTypeEnum, EnumRecord>(Index, std::move(CVT));
+ break;
+ case codeview::LF_ARRAY:
+ Id = createSymbolForType<NativeTypeArray, ArrayRecord>(Index,
+ std::move(CVT));
+ break;
+ case codeview::LF_CLASS:
+ case codeview::LF_STRUCTURE:
+ case codeview::LF_INTERFACE:
+ Id = createSymbolForType<NativeTypeUDT, ClassRecord>(Index, std::move(CVT));
+ break;
+ case codeview::LF_UNION:
+ Id = createSymbolForType<NativeTypeUDT, UnionRecord>(Index, std::move(CVT));
+ break;
+ case codeview::LF_POINTER:
+ Id = createSymbolForType<NativeTypePointer, PointerRecord>(Index,
+ std::move(CVT));
+ break;
+ case codeview::LF_MODIFIER:
+ Id = createSymbolForModifiedType(Index, std::move(CVT));
+ break;
+ case codeview::LF_PROCEDURE:
+ Id = createSymbolForType<NativeTypeFunctionSig, ProcedureRecord>(
+ Index, std::move(CVT));
+ break;
+ case codeview::LF_MFUNCTION:
+ Id = createSymbolForType<NativeTypeFunctionSig, MemberFunctionRecord>(
+ Index, std::move(CVT));
+ break;
+ case codeview::LF_VTSHAPE:
+ Id = createSymbolForType<NativeTypeVTShape, VFTableShapeRecord>(
+ Index, std::move(CVT));
+ break;
+ default:
+ Id = createSymbolPlaceholder();
+ break;
+ }
+ if (Id != 0) {
+ assert(TypeIndexToSymbolId.count(Index) == 0);
+ TypeIndexToSymbolId[Index] = Id;
+ }
+ return Id;
+}
+
+std::unique_ptr<PDBSymbol>
+SymbolCache::getSymbolById(SymIndexId SymbolId) const {
+ assert(SymbolId < Cache.size());
+
+ // Id 0 is reserved.
+ if (SymbolId == 0 || SymbolId >= Cache.size())
+ return nullptr;
+
+ // Make sure to handle the case where we've inserted a placeholder symbol
+ // for types we don't yet suppport.
+ NativeRawSymbol *NRS = Cache[SymbolId].get();
+ if (!NRS)
+ return nullptr;
+
+ return PDBSymbol::create(Session, *NRS);
+}
+
+NativeRawSymbol &SymbolCache::getNativeSymbolById(SymIndexId SymbolId) const {
+ return *Cache[SymbolId];
+}
+
+uint32_t SymbolCache::getNumCompilands() const {
+ if (!Dbi)
+ return 0;
+
+ return Dbi->modules().getModuleCount();
+}
+
+SymIndexId SymbolCache::getOrCreateGlobalSymbolByOffset(uint32_t Offset) {
+ auto Iter = GlobalOffsetToSymbolId.find(Offset);
+ if (Iter != GlobalOffsetToSymbolId.end())
+ return Iter->second;
+
+ SymbolStream &SS = cantFail(Session.getPDBFile().getPDBSymbolStream());
+ CVSymbol CVS = SS.readRecord(Offset);
+ SymIndexId Id = 0;
+ switch (CVS.kind()) {
+ case SymbolKind::S_UDT: {
+ UDTSym US = cantFail(SymbolDeserializer::deserializeAs<UDTSym>(CVS));
+ Id = createSymbol<NativeTypeTypedef>(std::move(US));
+ break;
+ }
+ default:
+ Id = createSymbolPlaceholder();
+ break;
+ }
+ if (Id != 0) {
+ assert(GlobalOffsetToSymbolId.count(Offset) == 0);
+ GlobalOffsetToSymbolId[Offset] = Id;
+ }
+
+ return Id;
+}
+
+std::unique_ptr<PDBSymbolCompiland>
+SymbolCache::getOrCreateCompiland(uint32_t Index) {
+ if (!Dbi)
+ return nullptr;
+
+ if (Index >= Compilands.size())
+ return nullptr;
+
+ if (Compilands[Index] == 0) {
+ const DbiModuleList &Modules = Dbi->modules();
+ Compilands[Index] =
+ createSymbol<NativeCompilandSymbol>(Modules.getModuleDescriptor(Index));
+ }
+
+ return Session.getConcreteSymbolById<PDBSymbolCompiland>(Compilands[Index]);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index 77a2d57a8369..18708826ffc7 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -50,6 +50,32 @@ static Expected<uint32_t> getHashForUdt(const CVType &Rec) {
}
template <typename T>
+static Expected<TagRecordHash> getTagRecordHashForUdt(const CVType &Rec) {
+ T Deserialized;
+ if (auto E = TypeDeserializer::deserializeAs(const_cast<CVType &>(Rec),
+ Deserialized))
+ return std::move(E);
+
+ ClassOptions Opts = Deserialized.getOptions();
+
+ bool ForwardRef = bool(Opts & ClassOptions::ForwardReference);
+
+ uint32_t ThisRecordHash = getHashForUdt(Deserialized, Rec.data());
+
+ // If we don't have a forward ref we can't compute the hash of it from the
+ // full record because it requires hashing the entire buffer.
+ if (!ForwardRef)
+ return TagRecordHash{std::move(Deserialized), ThisRecordHash, 0};
+
+ bool Scoped = bool(Opts & ClassOptions::Scoped);
+
+ StringRef NameToHash =
+ Scoped ? Deserialized.getUniqueName() : Deserialized.getName();
+ uint32_t FullHash = hashStringV1(NameToHash);
+ return TagRecordHash{std::move(Deserialized), FullHash, ThisRecordHash};
+}
+
+template <typename T>
static Expected<uint32_t> getSourceLineHash(const CVType &Rec) {
T Deserialized;
if (auto E = TypeDeserializer::deserializeAs(const_cast<CVType &>(Rec),
@@ -60,6 +86,23 @@ static Expected<uint32_t> getSourceLineHash(const CVType &Rec) {
return hashStringV1(StringRef(Buf, 4));
}
+Expected<TagRecordHash> llvm::pdb::hashTagRecord(const codeview::CVType &Type) {
+ switch (Type.kind()) {
+ case LF_CLASS:
+ case LF_STRUCTURE:
+ case LF_INTERFACE:
+ return getTagRecordHashForUdt<ClassRecord>(Type);
+ case LF_UNION:
+ return getTagRecordHashForUdt<UnionRecord>(Type);
+ case LF_ENUM:
+ return getTagRecordHashForUdt<EnumRecord>(Type);
+ default:
+ assert(false && "Type is not a tag record!");
+ }
+ return make_error<StringError>("Invalid record type",
+ inconvertibleErrorCode());
+}
+
Expected<uint32_t> llvm::pdb::hashTypeRecord(const CVType &Rec) {
switch (Rec.kind()) {
case LF_CLASS:
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp
index 0680b673380a..f234d446e6a0 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -11,8 +11,11 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/CodeView/RecordName.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -140,6 +143,88 @@ uint16_t TpiStream::getTypeHashStreamAuxIndex() const {
uint32_t TpiStream::getNumHashBuckets() const { return Header->NumHashBuckets; }
uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; }
+void TpiStream::buildHashMap() {
+ if (!HashMap.empty())
+ return;
+ if (HashValues.empty())
+ return;
+
+ HashMap.resize(Header->NumHashBuckets);
+
+ TypeIndex TIB{Header->TypeIndexBegin};
+ TypeIndex TIE{Header->TypeIndexEnd};
+ while (TIB < TIE) {
+ uint32_t HV = HashValues[TIB.toArrayIndex()];
+ HashMap[HV].push_back(TIB++);
+ }
+}
+
+std::vector<TypeIndex> TpiStream::findRecordsByName(StringRef Name) const {
+ if (!supportsTypeLookup())
+ const_cast<TpiStream*>(this)->buildHashMap();
+
+ uint32_t Bucket = hashStringV1(Name) % Header->NumHashBuckets;
+ if (Bucket > HashMap.size())
+ return {};
+
+ std::vector<TypeIndex> Result;
+ for (TypeIndex TI : HashMap[Bucket]) {
+ std::string ThisName = computeTypeName(*Types, TI);
+ if (ThisName == Name)
+ Result.push_back(TI);
+ }
+ return Result;
+}
+
+bool TpiStream::supportsTypeLookup() const { return !HashMap.empty(); }
+
+Expected<TypeIndex>
+TpiStream::findFullDeclForForwardRef(TypeIndex ForwardRefTI) const {
+ if (!supportsTypeLookup())
+ const_cast<TpiStream*>(this)->buildHashMap();
+
+ CVType F = Types->getType(ForwardRefTI);
+ if (!isUdtForwardRef(F))
+ return ForwardRefTI;
+
+ Expected<TagRecordHash> ForwardTRH = hashTagRecord(F);
+ if (!ForwardTRH)
+ return ForwardTRH.takeError();
+
+ uint32_t BucketIdx = ForwardTRH->FullRecordHash % Header->NumHashBuckets;
+
+ for (TypeIndex TI : HashMap[BucketIdx]) {
+ CVType CVT = Types->getType(TI);
+ if (CVT.kind() != F.kind())
+ continue;
+
+ Expected<TagRecordHash> FullTRH = hashTagRecord(CVT);
+ if (!FullTRH)
+ return FullTRH.takeError();
+ if (ForwardTRH->FullRecordHash != FullTRH->FullRecordHash)
+ continue;
+ TagRecord &ForwardTR = ForwardTRH->getRecord();
+ TagRecord &FullTR = FullTRH->getRecord();
+
+ if (!ForwardTR.hasUniqueName()) {
+ if (ForwardTR.getName() == FullTR.getName())
+ return TI;
+ continue;
+ }
+
+ if (!FullTR.hasUniqueName())
+ continue;
+ if (ForwardTR.getUniqueName() == FullTR.getUniqueName())
+ return TI;
+ }
+ return ForwardRefTI;
+}
+
+codeview::CVType TpiStream::getType(codeview::TypeIndex Index) {
+ assert(!Index.isSimple());
+ return Types->getType(Index);
+}
+
BinarySubstreamRef TpiStream::getTypeRecordsSubstream() const {
return TypeRecordsSubstream;
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
index 40f5ae9ba845..fc1ad8bcd7cd 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
@@ -29,7 +29,7 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
/*RequiresNullTerminator=*/false);
if (!ErrorOrBuffer)
- return make_error<GenericError>(generic_error_code::invalid_path, Path);
+ return errorCodeToError(ErrorOrBuffer.getError());
return NativeSession::createFromPdb(std::move(*ErrorOrBuffer), Session);
}
@@ -37,7 +37,7 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
#if LLVM_ENABLE_DIA_SDK
return DIASession::createFromPdb(Path, Session);
#else
- return make_error<GenericError>("DIA is not installed on the system");
+ return make_error<PDBError>(pdb_error_code::dia_sdk_not_present);
#endif
}
@@ -50,6 +50,6 @@ Error llvm::pdb::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
#if LLVM_ENABLE_DIA_SDK
return DIASession::createFromExe(Path, Session);
#else
- return make_error<GenericError>("DIA is not installed on the system");
+ return make_error<PDBError>(pdb_error_code::dia_sdk_not_present);
#endif
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
index a4e316417f96..0d8af232cd92 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -43,6 +43,33 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
}
raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
+ const PDB_BuiltinType &Type) {
+ switch (Type) {
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, None, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Void, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, WCharT, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Int, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, UInt, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Float, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, BCD, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Bool, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Long, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, ULong, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Currency, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Date, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Variant, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Complex, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Bitfield, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, BSTR, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, HResult, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char16, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char32, OS)
+ }
+ return OS;
+}
+
+raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
const PDB_CallingConv &Conv) {
OS << "__";
switch (Conv) {
@@ -202,8 +229,20 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_SymType &Tag) {
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CustomType, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, ManagedType, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Dimension, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CallSite, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, InlineSite, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, BaseInterface, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, VectorType, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, MatrixType, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, HLSLType, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Caller, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Callee, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Export, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, HeapAllocationSite, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CoffGroup, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Inlinee, OS)
default:
- OS << "Unknown";
+ OS << "Unknown SymTag " << uint32_t(Tag);
}
return OS;
}
@@ -293,7 +332,7 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const Variant &Value) {
OS << Value.Value.Single;
break;
case PDB_VariantType::UInt16:
- OS << Value.Value.Double;
+ OS << Value.Value.UInt16;
break;
case PDB_VariantType::UInt32:
OS << Value.Value.UInt32;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
index c62796507a01..951909295d13 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/IPDBDataStream.h"
+#include "llvm/DebugInfo/PDB/IPDBFrameData.h"
#include "llvm/DebugInfo/PDB/IPDBInjectedSource.h"
#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
@@ -35,3 +36,5 @@ IPDBTable::~IPDBTable() = default;
IPDBInjectedSource::~IPDBInjectedSource() = default;
IPDBSectionContrib::~IPDBSectionContrib() = default;
+
+IPDBFrameData::~IPDBFrameData() = default;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
index 74010c2dd7dd..d492edafdafe 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -50,23 +50,20 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbol::PDBSymbol(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : Session(PDBSession), RawSymbol(std::move(Symbol)) {}
+PDBSymbol::PDBSymbol(const IPDBSession &PDBSession) : Session(PDBSession) {}
-PDBSymbol::PDBSymbol(PDBSymbol &Symbol)
- : Session(Symbol.Session), RawSymbol(std::move(Symbol.RawSymbol)) {}
+PDBSymbol::PDBSymbol(PDBSymbol &&Other)
+ : Session(Other.Session), RawSymbol(std::move(Other.RawSymbol)) {}
PDBSymbol::~PDBSymbol() = default;
#define FACTORY_SYMTAG_CASE(Tag, Type) \
case PDB_SymType::Tag: \
- return std::unique_ptr<PDBSymbol>(new Type(PDBSession, std::move(Symbol)));
+ return std::unique_ptr<PDBSymbol>(new Type(PDBSession));
std::unique_ptr<PDBSymbol>
-PDBSymbol::create(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol) {
- switch (Symbol->getSymTag()) {
+PDBSymbol::createSymbol(const IPDBSession &PDBSession, PDB_SymType Tag) {
+ switch (Tag) {
FACTORY_SYMTAG_CASE(Exe, PDBSymbolExe)
FACTORY_SYMTAG_CASE(Compiland, PDBSymbolCompiland)
FACTORY_SYMTAG_CASE(CompilandDetails, PDBSymbolCompilandDetails)
@@ -98,18 +95,35 @@ PDBSymbol::create(const IPDBSession &PDBSession,
FACTORY_SYMTAG_CASE(ManagedType, PDBSymbolTypeManaged)
FACTORY_SYMTAG_CASE(Dimension, PDBSymbolTypeDimension)
default:
- return std::unique_ptr<PDBSymbol>(
- new PDBSymbolUnknown(PDBSession, std::move(Symbol)));
+ return std::unique_ptr<PDBSymbol>(new PDBSymbolUnknown(PDBSession));
}
}
-void PDBSymbol::defaultDump(raw_ostream &OS, int Indent) const {
- RawSymbol->dump(OS, Indent);
+std::unique_ptr<PDBSymbol>
+PDBSymbol::create(const IPDBSession &PDBSession,
+ std::unique_ptr<IPDBRawSymbol> RawSymbol) {
+ auto SymbolPtr = createSymbol(PDBSession, RawSymbol->getSymTag());
+ SymbolPtr->RawSymbol = RawSymbol.get();
+ SymbolPtr->OwnedRawSymbol = std::move(RawSymbol);
+ return SymbolPtr;
+}
+
+std::unique_ptr<PDBSymbol> PDBSymbol::create(const IPDBSession &PDBSession,
+ IPDBRawSymbol &RawSymbol) {
+ auto SymbolPtr = createSymbol(PDBSession, RawSymbol.getSymTag());
+ SymbolPtr->RawSymbol = &RawSymbol;
+ return SymbolPtr;
+}
+
+void PDBSymbol::defaultDump(raw_ostream &OS, int Indent,
+ PdbSymbolIdField ShowFlags,
+ PdbSymbolIdField RecurseFlags) const {
+ RawSymbol->dump(OS, Indent, ShowFlags, RecurseFlags);
}
void PDBSymbol::dumpProperties() const {
outs() << "\n";
- defaultDump(outs(), 0);
+ defaultDump(outs(), 0, PdbSymbolIdField::All, PdbSymbolIdField::None);
outs().flush();
}
@@ -123,10 +137,6 @@ void PDBSymbol::dumpChildStats() const {
outs().flush();
}
-std::unique_ptr<PDBSymbol> PDBSymbol::clone() const {
- return Session.getSymbolById(getSymIndexId());
-}
-
PDB_SymType PDBSymbol::getSymTag() const { return RawSymbol->getSymTag(); }
uint32_t PDBSymbol::getSymIndexId() const { return RawSymbol->getSymIndexId(); }
@@ -172,3 +182,34 @@ PDBSymbol::getChildStats(TagStats &Stats) const {
std::unique_ptr<PDBSymbol> PDBSymbol::getSymbolByIdHelper(uint32_t Id) const {
return Session.getSymbolById(Id);
}
+
+void llvm::pdb::dumpSymbolIdField(raw_ostream &OS, StringRef Name,
+ SymIndexId Value, int Indent,
+ const IPDBSession &Session,
+ PdbSymbolIdField FieldId,
+ PdbSymbolIdField ShowFlags,
+ PdbSymbolIdField RecurseFlags) {
+ if ((FieldId & ShowFlags) == PdbSymbolIdField::None)
+ return;
+
+ OS << "\n";
+ OS.indent(Indent);
+ OS << Name << ": " << Value;
+ // Don't recurse unless the user requested it.
+ if ((FieldId & RecurseFlags) == PdbSymbolIdField::None)
+ return;
+ // And obviously don't recurse on the symbol itself.
+ if (FieldId == PdbSymbolIdField::SymIndexId)
+ return;
+
+ auto Child = Session.getSymbolById(Value);
+
+ // It could have been a placeholder symbol for a type we don't yet support,
+ // so just exit in that case.
+ if (!Child)
+ return;
+
+ // Don't recurse more than once, so pass PdbSymbolIdField::None) for the
+ // recurse flags.
+ Child->defaultDump(OS, Indent + 2, ShowFlags, PdbSymbolIdField::None);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
index 3648272e1d0e..cb1a9bee8024 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
@@ -16,12 +16,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolAnnotation::PDBSymbolAnnotation(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Annotation);
-}
-
void PDBSymbolAnnotation::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
index 7076b4aec347..13eec9734d02 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -17,10 +17,4 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolBlock::PDBSymbolBlock(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Block);
-}
-
void PDBSymbolBlock::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
index 8798c7b9db88..bbc5e6dd2a17 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -22,12 +22,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolCompiland::PDBSymbolCompiland(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Compiland);
-}
-
void PDBSymbolCompiland::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
index f73cd36d057a..bdd8535a3ef3 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolCompilandDetails::PDBSymbolCompilandDetails(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::CompilandDetails);
-}
-
void PDBSymbolCompilandDetails::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
index df696fa8c5f2..f88df2df6be4 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -18,12 +18,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolCompilandEnv::PDBSymbolCompilandEnv(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::CompilandEnv);
-}
-
std::string PDBSymbolCompilandEnv::getValue() const {
Variant Value = RawSymbol->getValue();
if (Value.Type != PDB_VariantType::String)
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
index a7b69a755941..10a21806adb6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -18,12 +18,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolCustom::PDBSymbolCustom(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> CustomSymbol)
- : PDBSymbol(PDBSession, std::move(CustomSymbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Custom);
-}
-
void PDBSymbolCustom::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) {
RawSymbol->getDataBytes(bytes);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
index ae4a8038ccd7..7de94670bcb3 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolData::PDBSymbolData(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> DataSymbol)
- : PDBSymbol(PDBSession, std::move(DataSymbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Data);
-}
-
void PDBSymbolData::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
std::unique_ptr<IPDBEnumLineNumbers> PDBSymbolData::getLineNumbers() const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
index 7417167b61ad..eb409412af59 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolExe::PDBSymbolExe(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Exe);
-}
-
void PDBSymbolExe::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
uint32_t PDBSymbolExe::getPointerByteSize() const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 37ca1abe86e9..75063cb3e7f8 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -69,10 +69,6 @@ public:
void reset() override { CurIter = Args.empty() ? Args.end() : Args.begin(); }
- FunctionArgEnumerator *clone() const override {
- return new FunctionArgEnumerator(Session, Func);
- }
-
private:
typedef std::vector<std::unique_ptr<PDBSymbolData>> ArgListType;
const IPDBSession &Session;
@@ -82,12 +78,6 @@ private:
};
}
-PDBSymbolFunc::PDBSymbolFunc(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Function);
-}
-
std::unique_ptr<IPDBEnumChildren<PDBSymbolData>>
PDBSymbolFunc::getArguments() const {
return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
index 4a4195beb4ea..af8aafa7be96 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolFuncDebugEnd::PDBSymbolFuncDebugEnd(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::FuncDebugEnd);
-}
-
void PDBSymbolFuncDebugEnd::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
index a448a404dc4a..77b510873bea 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolFuncDebugStart::PDBSymbolFuncDebugStart(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::FuncDebugStart);
-}
-
void PDBSymbolFuncDebugStart::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
index a67a20d8e352..c802b97925e6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
@@ -16,10 +16,4 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolLabel::PDBSymbolLabel(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Label);
-}
-
void PDBSymbolLabel::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
index dbec16fcbaac..a2dd2ab92dd9 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolPublicSymbol::PDBSymbolPublicSymbol(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::PublicSymbol);
-}
-
void PDBSymbolPublicSymbol::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
index b2648197f9cc..d227e3a7a60c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
@@ -16,10 +16,4 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolThunk::PDBSymbolThunk(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Thunk);
-}
-
void PDBSymbolThunk::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
index ba40f65ef40f..a2064d1ac1eb 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
@@ -16,12 +16,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeArray::PDBSymbolTypeArray(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::ArrayType);
-}
-
void PDBSymbolTypeArray::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
index 0fdf8b6d0f77..f0376c05557f 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeBaseClass::PDBSymbolTypeBaseClass(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::BaseClass);
-}
-
void PDBSymbolTypeBaseClass::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
index 0bf563af7df5..a9f59e5f9d4d 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
@@ -16,12 +16,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeBuiltin::PDBSymbolTypeBuiltin(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::BuiltinType);
-}
-
void PDBSymbolTypeBuiltin::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
index 726e7e1cdbb4..cfb347fbac55 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeCustom::PDBSymbolTypeCustom(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::CustomType);
-}
-
void PDBSymbolTypeCustom::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
index 6c84b984d210..4eb48997635a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -17,13 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-
-PDBSymbolTypeDimension::PDBSymbolTypeDimension(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Dimension);
-}
-
void PDBSymbolTypeDimension::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
index f9c3067c20bf..2e88d9eb284a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
@@ -17,10 +17,4 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeEnum::PDBSymbolTypeEnum(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Enum);
-}
-
void PDBSymbolTypeEnum::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
index c01877287888..00d2d51aa8a7 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeFriend::PDBSymbolTypeFriend(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Friend);
-}
-
void PDBSymbolTypeFriend::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
index 4d5cd63f6857..0399e110d592 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
@@ -16,12 +16,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeFunctionArg::PDBSymbolTypeFunctionArg(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::FunctionArg);
-}
-
void PDBSymbolTypeFunctionArg::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index 8fd3b49155c9..c0564d3941dd 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -56,23 +56,12 @@ public:
void reset() override { Enumerator->reset(); }
- MyType *clone() const override {
- std::unique_ptr<ArgEnumeratorType> Clone(Enumerator->clone());
- return new FunctionArgEnumerator(Session, std::move(Clone));
- }
-
private:
const IPDBSession &Session;
std::unique_ptr<ArgEnumeratorType> Enumerator;
};
}
-PDBSymbolTypeFunctionSig::PDBSymbolTypeFunctionSig(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::FunctionSig);
-}
-
std::unique_ptr<IPDBEnumSymbols>
PDBSymbolTypeFunctionSig::getArguments() const {
return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
index 7cfba823b4fa..1faaf9c67a2c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeManaged::PDBSymbolTypeManaged(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::ManagedType);
-}
-
void PDBSymbolTypeManaged::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
index 69819811d61f..cf5a369116a9 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypePointer::PDBSymbolTypePointer(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::PointerType);
-}
-
void PDBSymbolTypePointer::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
index 102b540e0fef..1838f1612b49 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
@@ -16,12 +16,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeTypedef::PDBSymbolTypeTypedef(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::Typedef);
-}
-
void PDBSymbolTypeTypedef::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
index 715ae15e1a7a..2f5222f34fe4 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
@@ -23,10 +23,4 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeUDT::PDBSymbolTypeUDT(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::UDT);
-}
-
void PDBSymbolTypeUDT::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
index 9a21855f57f0..0262f91e8336 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
@@ -16,12 +16,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeVTable::PDBSymbolTypeVTable(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::VTable);
-}
-
void PDBSymbolTypeVTable::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
index ddc0574617c5..16c3a3606981 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolTypeVTableShape::PDBSymbolTypeVTableShape(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::VTableShape);
-}
-
void PDBSymbolTypeVTableShape::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
index fdbe845f455a..7bcf9457a2b6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
@@ -17,8 +17,4 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolUnknown::PDBSymbolUnknown(const IPDBSession &PDBSession,
- std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
-
void PDBSymbolUnknown::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
index f40578f4372a..ecf2126f8802 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -17,12 +17,6 @@
using namespace llvm;
using namespace llvm::pdb;
-PDBSymbolUsingNamespace::PDBSymbolUsingNamespace(
- const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {
- assert(RawSymbol->getSymTag() == PDB_SymType::UsingNamespace);
-}
-
void PDBSymbolUsingNamespace::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 95a356d33eb4..c3e750a1b932 100644
--- a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -411,7 +411,8 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
Objects.first->getFileName(), Session)) {
Modules.insert(
std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
- return std::move(Err);
+ // Return along the PDB filename to provide more context
+ return createFileError(PDBFileName, std::move(Err));
}
Context.reset(new PDBContext(*CoffObject, std::move(Session)));
#else
diff --git a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp
index 72e4b56c05e3..b2de0be2b70c 100644
--- a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -11,1761 +11,239 @@
// file does not yet support:
// - C++ modules TS
-#include "Compiler.h"
-#include "StringView.h"
-#include "Utility.h"
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/ItaniumDemangle.h"
#include <cassert>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
+#include <functional>
#include <numeric>
#include <utility>
#include <vector>
-namespace {
-// Base class of all AST nodes. The AST is built by the parser, then is
-// traversed by the printLeft/Right functions to produce a demangled string.
-class Node {
-public:
- enum Kind : unsigned char {
- KNodeArrayNode,
- KDotSuffix,
- KVendorExtQualType,
- KQualType,
- KConversionOperatorType,
- KPostfixQualifiedType,
- KElaboratedTypeSpefType,
- KNameType,
- KAbiTagAttr,
- KEnableIfAttr,
- KObjCProtoName,
- KPointerType,
- KReferenceType,
- KPointerToMemberType,
- KArrayType,
- KFunctionType,
- KNoexceptSpec,
- KDynamicExceptionSpec,
- KFunctionEncoding,
- KLiteralOperator,
- KSpecialName,
- KCtorVtableSpecialName,
- KQualifiedName,
- KNestedName,
- KLocalName,
- KVectorType,
- KParameterPack,
- KTemplateArgumentPack,
- KParameterPackExpansion,
- KTemplateArgs,
- KForwardTemplateReference,
- KNameWithTemplateArgs,
- KGlobalQualifiedName,
- KStdQualifiedName,
- KExpandedSpecialSubstitution,
- KSpecialSubstitution,
- KCtorDtorName,
- KDtorName,
- KUnnamedTypeName,
- KClosureTypeName,
- KStructuredBindingName,
- KExpr,
- KBracedExpr,
- KBracedRangeExpr,
- };
-
- Kind K;
-
- /// Three-way bool to track a cached value. Unknown is possible if this node
- /// has an unexpanded parameter pack below it that may affect this cache.
- enum class Cache : unsigned char { Yes, No, Unknown, };
-
- /// Tracks if this node has a component on its right side, in which case we
- /// need to call printRight.
- Cache RHSComponentCache;
-
- /// Track if this node is a (possibly qualified) array type. This can affect
- /// how we format the output string.
- Cache ArrayCache;
-
- /// Track if this node is a (possibly qualified) function type. This can
- /// affect how we format the output string.
- Cache FunctionCache;
-
- Node(Kind K_, Cache RHSComponentCache_ = Cache::No,
- Cache ArrayCache_ = Cache::No, Cache FunctionCache_ = Cache::No)
- : K(K_), RHSComponentCache(RHSComponentCache_), ArrayCache(ArrayCache_),
- FunctionCache(FunctionCache_) {}
-
- bool hasRHSComponent(OutputStream &S) const {
- if (RHSComponentCache != Cache::Unknown)
- return RHSComponentCache == Cache::Yes;
- return hasRHSComponentSlow(S);
- }
-
- bool hasArray(OutputStream &S) const {
- if (ArrayCache != Cache::Unknown)
- return ArrayCache == Cache::Yes;
- return hasArraySlow(S);
- }
-
- bool hasFunction(OutputStream &S) const {
- if (FunctionCache != Cache::Unknown)
- return FunctionCache == Cache::Yes;
- return hasFunctionSlow(S);
- }
-
- Kind getKind() const { return K; }
-
- virtual bool hasRHSComponentSlow(OutputStream &) const { return false; }
- virtual bool hasArraySlow(OutputStream &) const { return false; }
- virtual bool hasFunctionSlow(OutputStream &) const { return false; }
-
- // Dig through "glue" nodes like ParameterPack and ForwardTemplateReference to
- // get at a node that actually represents some concrete syntax.
- virtual const Node *getSyntaxNode(OutputStream &) const {
- return this;
- }
+using namespace llvm;
+using namespace llvm::itanium_demangle;
- void print(OutputStream &S) const {
- printLeft(S);
- if (RHSComponentCache != Cache::No)
- printRight(S);
- }
-
- // Print the "left" side of this Node into OutputStream.
- virtual void printLeft(OutputStream &) const = 0;
-
- // Print the "right". This distinction is necessary to represent C++ types
- // that appear on the RHS of their subtype, such as arrays or functions.
- // Since most types don't have such a component, provide a default
- // implementation.
- virtual void printRight(OutputStream &) const {}
-
- virtual StringView getBaseName() const { return StringView(); }
-
- // Silence compiler warnings, this dtor will never be called.
- virtual ~Node() = default;
+constexpr const char *itanium_demangle::FloatData<float>::spec;
+constexpr const char *itanium_demangle::FloatData<double>::spec;
+constexpr const char *itanium_demangle::FloatData<long double>::spec;
-#ifndef NDEBUG
- LLVM_DUMP_METHOD void dump() const {
- char *Buffer = static_cast<char*>(std::malloc(1024));
- OutputStream S(Buffer, 1024);
- print(S);
- S += '\0';
- printf("Symbol dump for %p: %s\n", (const void*)this, S.getBuffer());
- std::free(S.getBuffer());
- }
-#endif
-};
-
-class NodeArray {
- Node **Elements;
- size_t NumElements;
-
-public:
- NodeArray() : Elements(nullptr), NumElements(0) {}
- NodeArray(Node **Elements_, size_t NumElements_)
- : Elements(Elements_), NumElements(NumElements_) {}
-
- bool empty() const { return NumElements == 0; }
- size_t size() const { return NumElements; }
-
- Node **begin() const { return Elements; }
- Node **end() const { return Elements + NumElements; }
-
- Node *operator[](size_t Idx) const { return Elements[Idx]; }
-
- void printWithComma(OutputStream &S) const {
- bool FirstElement = true;
- for (size_t Idx = 0; Idx != NumElements; ++Idx) {
- size_t BeforeComma = S.getCurrentPosition();
- if (!FirstElement)
- S += ", ";
- size_t AfterComma = S.getCurrentPosition();
- Elements[Idx]->print(S);
-
- // Elements[Idx] is an empty parameter pack expansion, we should erase the
- // comma we just printed.
- if (AfterComma == S.getCurrentPosition()) {
- S.setCurrentPosition(BeforeComma);
- continue;
+// <discriminator> := _ <non-negative number> # when number < 10
+// := __ <non-negative number> _ # when number >= 10
+// extension := decimal-digit+ # at the end of string
+const char *itanium_demangle::parse_discriminator(const char *first,
+ const char *last) {
+ // parse but ignore discriminator
+ if (first != last) {
+ if (*first == '_') {
+ const char *t1 = first + 1;
+ if (t1 != last) {
+ if (std::isdigit(*t1))
+ first = t1 + 1;
+ else if (*t1 == '_') {
+ for (++t1; t1 != last && std::isdigit(*t1); ++t1)
+ ;
+ if (t1 != last && *t1 == '_')
+ first = t1 + 1;
+ }
}
-
- FirstElement = false;
+ } else if (std::isdigit(*first)) {
+ const char *t1 = first + 1;
+ for (; t1 != last && std::isdigit(*t1); ++t1)
+ ;
+ if (t1 == last)
+ first = last;
}
}
-};
-
-struct NodeArrayNode : Node {
- NodeArray Array;
- NodeArrayNode(NodeArray Array_) : Node(KNodeArrayNode), Array(Array_) {}
- void printLeft(OutputStream &S) const override {
- Array.printWithComma(S);
- }
-};
-
-class DotSuffix final : public Node {
- const Node *Prefix;
- const StringView Suffix;
-
-public:
- DotSuffix(Node *Prefix_, StringView Suffix_)
- : Node(KDotSuffix), Prefix(Prefix_), Suffix(Suffix_) {}
-
- void printLeft(OutputStream &s) const override {
- Prefix->print(s);
- s += " (";
- s += Suffix;
- s += ")";
- }
-};
-
-class VendorExtQualType final : public Node {
- const Node *Ty;
- StringView Ext;
-
-public:
- VendorExtQualType(Node *Ty_, StringView Ext_)
- : Node(KVendorExtQualType), Ty(Ty_), Ext(Ext_) {}
-
- void printLeft(OutputStream &S) const override {
- Ty->print(S);
- S += " ";
- S += Ext;
- }
-};
-
-enum FunctionRefQual : unsigned char {
- FrefQualNone,
- FrefQualLValue,
- FrefQualRValue,
-};
-
-enum Qualifiers {
- QualNone = 0,
- QualConst = 0x1,
- QualVolatile = 0x2,
- QualRestrict = 0x4,
-};
-
-void addQualifiers(Qualifiers &Q1, Qualifiers Q2) {
- Q1 = static_cast<Qualifiers>(Q1 | Q2);
+ return first;
}
-class QualType : public Node {
-protected:
- const Qualifiers Quals;
- const Node *Child;
-
- void printQuals(OutputStream &S) const {
- if (Quals & QualConst)
- S += " const";
- if (Quals & QualVolatile)
- S += " volatile";
- if (Quals & QualRestrict)
- S += " restrict";
- }
-
-public:
- QualType(Node *Child_, Qualifiers Quals_)
- : Node(KQualType, Child_->RHSComponentCache,
- Child_->ArrayCache, Child_->FunctionCache),
- Quals(Quals_), Child(Child_) {}
-
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Child->hasRHSComponent(S);
- }
- bool hasArraySlow(OutputStream &S) const override {
- return Child->hasArray(S);
- }
- bool hasFunctionSlow(OutputStream &S) const override {
- return Child->hasFunction(S);
- }
-
- void printLeft(OutputStream &S) const override {
- Child->printLeft(S);
- printQuals(S);
- }
-
- void printRight(OutputStream &S) const override { Child->printRight(S); }
-};
-
-class ConversionOperatorType final : public Node {
- const Node *Ty;
-
-public:
- ConversionOperatorType(Node *Ty_)
- : Node(KConversionOperatorType), Ty(Ty_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "operator ";
- Ty->print(S);
- }
-};
-
-class PostfixQualifiedType final : public Node {
- const Node *Ty;
- const StringView Postfix;
-
-public:
- PostfixQualifiedType(Node *Ty_, StringView Postfix_)
- : Node(KPostfixQualifiedType), Ty(Ty_), Postfix(Postfix_) {}
-
- void printLeft(OutputStream &s) const override {
- Ty->printLeft(s);
- s += Postfix;
- }
-};
-
-class NameType final : public Node {
- const StringView Name;
-
-public:
- NameType(StringView Name_) : Node(KNameType), Name(Name_) {}
-
- StringView getName() const { return Name; }
- StringView getBaseName() const override { return Name; }
-
- void printLeft(OutputStream &s) const override { s += Name; }
-};
-
-class ElaboratedTypeSpefType : public Node {
- StringView Kind;
- Node *Child;
-public:
- ElaboratedTypeSpefType(StringView Kind_, Node *Child_)
- : Node(KElaboratedTypeSpefType), Kind(Kind_), Child(Child_) {}
-
- void printLeft(OutputStream &S) const override {
- S += Kind;
- S += ' ';
- Child->print(S);
- }
-};
-
-struct AbiTagAttr : Node {
- Node *Base;
- StringView Tag;
-
- AbiTagAttr(Node* Base_, StringView Tag_)
- : Node(KAbiTagAttr, Base_->RHSComponentCache,
- Base_->ArrayCache, Base_->FunctionCache),
- Base(Base_), Tag(Tag_) {}
-
- void printLeft(OutputStream &S) const override {
- Base->printLeft(S);
- S += "[abi:";
- S += Tag;
- S += "]";
- }
-};
-
-class EnableIfAttr : public Node {
- NodeArray Conditions;
-public:
- EnableIfAttr(NodeArray Conditions_)
- : Node(KEnableIfAttr), Conditions(Conditions_) {}
-
- void printLeft(OutputStream &S) const override {
- S += " [enable_if:";
- Conditions.printWithComma(S);
- S += ']';
- }
-};
-
-class ObjCProtoName : public Node {
- Node *Ty;
- StringView Protocol;
-
- friend class PointerType;
-
-public:
- ObjCProtoName(Node *Ty_, StringView Protocol_)
- : Node(KObjCProtoName), Ty(Ty_), Protocol(Protocol_) {}
-
- bool isObjCObject() const {
- return Ty->getKind() == KNameType &&
- static_cast<NameType *>(Ty)->getName() == "objc_object";
- }
-
- void printLeft(OutputStream &S) const override {
- Ty->print(S);
- S += "<";
- S += Protocol;
- S += ">";
- }
-};
-
-class PointerType final : public Node {
- const Node *Pointee;
-
-public:
- PointerType(Node *Pointee_)
- : Node(KPointerType, Pointee_->RHSComponentCache),
- Pointee(Pointee_) {}
-
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Pointee->hasRHSComponent(S);
- }
-
- void printLeft(OutputStream &s) const override {
- // We rewrite objc_object<SomeProtocol>* into id<SomeProtocol>.
- if (Pointee->getKind() != KObjCProtoName ||
- !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
- Pointee->printLeft(s);
- if (Pointee->hasArray(s))
- s += " ";
- if (Pointee->hasArray(s) || Pointee->hasFunction(s))
- s += "(";
- s += "*";
- } else {
- const auto *objcProto = static_cast<const ObjCProtoName *>(Pointee);
- s += "id<";
- s += objcProto->Protocol;
- s += ">";
- }
- }
-
- void printRight(OutputStream &s) const override {
- if (Pointee->getKind() != KObjCProtoName ||
- !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
- if (Pointee->hasArray(s) || Pointee->hasFunction(s))
- s += ")";
- Pointee->printRight(s);
- }
- }
-};
-
-enum class ReferenceKind {
- LValue,
- RValue,
-};
-
-// Represents either a LValue or an RValue reference type.
-class ReferenceType : public Node {
- const Node *Pointee;
- ReferenceKind RK;
-
- mutable bool Printing = false;
+#ifndef NDEBUG
+namespace {
+struct DumpVisitor {
+ unsigned Depth = 0;
+ bool PendingNewline = false;
- // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
- // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
- // other combination collapses to a lvalue ref.
- std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
- auto SoFar = std::make_pair(RK, Pointee);
- for (;;) {
- const Node *SN = SoFar.second->getSyntaxNode(S);
- if (SN->getKind() != KReferenceType)
- break;
- auto *RT = static_cast<const ReferenceType *>(SN);
- SoFar.second = RT->Pointee;
- SoFar.first = std::min(SoFar.first, RT->RK);
- }
- return SoFar;
+ template<typename NodeT> static constexpr bool wantsNewline(const NodeT *) {
+ return true;
}
+ static bool wantsNewline(NodeArray A) { return !A.empty(); }
+ static constexpr bool wantsNewline(...) { return false; }
-public:
- ReferenceType(Node *Pointee_, ReferenceKind RK_)
- : Node(KReferenceType, Pointee_->RHSComponentCache),
- Pointee(Pointee_), RK(RK_) {}
-
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Pointee->hasRHSComponent(S);
+ template<typename ...Ts> static bool anyWantNewline(Ts ...Vs) {
+ for (bool B : {wantsNewline(Vs)...})
+ if (B)
+ return true;
+ return false;
}
- void printLeft(OutputStream &s) const override {
- if (Printing)
- return;
- SwapAndRestore<bool> SavePrinting(Printing, true);
- std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
- Collapsed.second->printLeft(s);
- if (Collapsed.second->hasArray(s))
- s += " ";
- if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
- s += "(";
-
- s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
+ void printStr(const char *S) { fprintf(stderr, "%s", S); }
+ void print(StringView SV) {
+ fprintf(stderr, "\"%.*s\"", (int)SV.size(), SV.begin());
}
- void printRight(OutputStream &s) const override {
- if (Printing)
- return;
- SwapAndRestore<bool> SavePrinting(Printing, true);
- std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
- if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
- s += ")";
- Collapsed.second->printRight(s);
- }
-};
-
-class PointerToMemberType final : public Node {
- const Node *ClassType;
- const Node *MemberType;
-
-public:
- PointerToMemberType(Node *ClassType_, Node *MemberType_)
- : Node(KPointerToMemberType, MemberType_->RHSComponentCache),
- ClassType(ClassType_), MemberType(MemberType_) {}
-
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return MemberType->hasRHSComponent(S);
- }
-
- void printLeft(OutputStream &s) const override {
- MemberType->printLeft(s);
- if (MemberType->hasArray(s) || MemberType->hasFunction(s))
- s += "(";
+ void print(const Node *N) {
+ if (N)
+ N->visit(std::ref(*this));
else
- s += " ";
- ClassType->print(s);
- s += "::*";
- }
-
- void printRight(OutputStream &s) const override {
- if (MemberType->hasArray(s) || MemberType->hasFunction(s))
- s += ")";
- MemberType->printRight(s);
- }
-};
-
-class NodeOrString {
- const void *First;
- const void *Second;
-
-public:
- /* implicit */ NodeOrString(StringView Str) {
- const char *FirstChar = Str.begin();
- const char *SecondChar = Str.end();
- if (SecondChar == nullptr) {
- assert(FirstChar == SecondChar);
- ++FirstChar, ++SecondChar;
- }
- First = static_cast<const void *>(FirstChar);
- Second = static_cast<const void *>(SecondChar);
- }
-
- /* implicit */ NodeOrString(Node *N)
- : First(static_cast<const void *>(N)), Second(nullptr) {}
- NodeOrString() : First(nullptr), Second(nullptr) {}
-
- bool isString() const { return Second && First; }
- bool isNode() const { return First && !Second; }
- bool isEmpty() const { return !First && !Second; }
-
- StringView asString() const {
- assert(isString());
- return StringView(static_cast<const char *>(First),
- static_cast<const char *>(Second));
- }
-
- const Node *asNode() const {
- assert(isNode());
- return static_cast<const Node *>(First);
- }
-};
-
-class ArrayType final : public Node {
- Node *Base;
- NodeOrString Dimension;
-
-public:
- ArrayType(Node *Base_, NodeOrString Dimension_)
- : Node(KArrayType,
- /*RHSComponentCache=*/Cache::Yes,
- /*ArrayCache=*/Cache::Yes),
- Base(Base_), Dimension(Dimension_) {}
-
- // Incomplete array type.
- ArrayType(Node *Base_)
- : Node(KArrayType,
- /*RHSComponentCache=*/Cache::Yes,
- /*ArrayCache=*/Cache::Yes),
- Base(Base_) {}
-
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasArraySlow(OutputStream &) const override { return true; }
-
- void printLeft(OutputStream &S) const override { Base->printLeft(S); }
-
- void printRight(OutputStream &S) const override {
- if (S.back() != ']')
- S += " ";
- S += "[";
- if (Dimension.isString())
- S += Dimension.asString();
- else if (Dimension.isNode())
- Dimension.asNode()->print(S);
- S += "]";
- Base->printRight(S);
- }
-};
-
-class FunctionType final : public Node {
- Node *Ret;
- NodeArray Params;
- Qualifiers CVQuals;
- FunctionRefQual RefQual;
- Node *ExceptionSpec;
-
-public:
- FunctionType(Node *Ret_, NodeArray Params_, Qualifiers CVQuals_,
- FunctionRefQual RefQual_, Node *ExceptionSpec_)
- : Node(KFunctionType,
- /*RHSComponentCache=*/Cache::Yes, /*ArrayCache=*/Cache::No,
- /*FunctionCache=*/Cache::Yes),
- Ret(Ret_), Params(Params_), CVQuals(CVQuals_), RefQual(RefQual_),
- ExceptionSpec(ExceptionSpec_) {}
-
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasFunctionSlow(OutputStream &) const override { return true; }
-
- // Handle C++'s ... quirky decl grammar by using the left & right
- // distinction. Consider:
- // int (*f(float))(char) {}
- // f is a function that takes a float and returns a pointer to a function
- // that takes a char and returns an int. If we're trying to print f, start
- // by printing out the return types's left, then print our parameters, then
- // finally print right of the return type.
- void printLeft(OutputStream &S) const override {
- Ret->printLeft(S);
- S += " ";
- }
-
- void printRight(OutputStream &S) const override {
- S += "(";
- Params.printWithComma(S);
- S += ")";
- Ret->printRight(S);
-
- if (CVQuals & QualConst)
- S += " const";
- if (CVQuals & QualVolatile)
- S += " volatile";
- if (CVQuals & QualRestrict)
- S += " restrict";
-
- if (RefQual == FrefQualLValue)
- S += " &";
- else if (RefQual == FrefQualRValue)
- S += " &&";
-
- if (ExceptionSpec != nullptr) {
- S += ' ';
- ExceptionSpec->print(S);
- }
- }
-};
-
-class NoexceptSpec : public Node {
- Node *E;
-public:
- NoexceptSpec(Node *E_) : Node(KNoexceptSpec), E(E_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "noexcept(";
- E->print(S);
- S += ")";
+ printStr("<null>");
}
-};
-
-class DynamicExceptionSpec : public Node {
- NodeArray Types;
-public:
- DynamicExceptionSpec(NodeArray Types_)
- : Node(KDynamicExceptionSpec), Types(Types_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "throw(";
- Types.printWithComma(S);
- S += ')';
- }
-};
-
-class FunctionEncoding final : public Node {
- Node *Ret;
- Node *Name;
- NodeArray Params;
- Node *Attrs;
- Qualifiers CVQuals;
- FunctionRefQual RefQual;
-
-public:
- FunctionEncoding(Node *Ret_, Node *Name_, NodeArray Params_,
- Node *Attrs_, Qualifiers CVQuals_, FunctionRefQual RefQual_)
- : Node(KFunctionEncoding,
- /*RHSComponentCache=*/Cache::Yes, /*ArrayCache=*/Cache::No,
- /*FunctionCache=*/Cache::Yes),
- Ret(Ret_), Name(Name_), Params(Params_), Attrs(Attrs_),
- CVQuals(CVQuals_), RefQual(RefQual_) {}
-
- Qualifiers getCVQuals() const { return CVQuals; }
- FunctionRefQual getRefQual() const { return RefQual; }
- NodeArray getParams() const { return Params; }
- Node *getReturnType() const { return Ret; }
-
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasFunctionSlow(OutputStream &) const override { return true; }
-
- Node *getName() { return const_cast<Node *>(Name); }
-
- void printLeft(OutputStream &S) const override {
- if (Ret) {
- Ret->printLeft(S);
- if (!Ret->hasRHSComponent(S))
- S += " ";
+ void print(NodeOrString NS) {
+ if (NS.isNode())
+ print(NS.asNode());
+ else if (NS.isString())
+ print(NS.asString());
+ else
+ printStr("NodeOrString()");
+ }
+ void print(NodeArray A) {
+ ++Depth;
+ printStr("{");
+ bool First = true;
+ for (const Node *N : A) {
+ if (First)
+ print(N);
+ else
+ printWithComma(N);
+ First = false;
}
- Name->print(S);
- }
-
- void printRight(OutputStream &S) const override {
- S += "(";
- Params.printWithComma(S);
- S += ")";
- if (Ret)
- Ret->printRight(S);
-
- if (CVQuals & QualConst)
- S += " const";
- if (CVQuals & QualVolatile)
- S += " volatile";
- if (CVQuals & QualRestrict)
- S += " restrict";
-
- if (RefQual == FrefQualLValue)
- S += " &";
- else if (RefQual == FrefQualRValue)
- S += " &&";
-
- if (Attrs != nullptr)
- Attrs->print(S);
- }
-};
-
-class LiteralOperator : public Node {
- const Node *OpName;
-
-public:
- LiteralOperator(Node *OpName_) : Node(KLiteralOperator), OpName(OpName_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "operator\"\" ";
- OpName->print(S);
- }
-};
-
-class SpecialName final : public Node {
- const StringView Special;
- const Node *Child;
-
-public:
- SpecialName(StringView Special_, Node* Child_)
- : Node(KSpecialName), Special(Special_), Child(Child_) {}
-
- void printLeft(OutputStream &S) const override {
- S += Special;
- Child->print(S);
+ printStr("}");
+ --Depth;
}
-};
-class CtorVtableSpecialName final : public Node {
- const Node *FirstType;
- const Node *SecondType;
+ // Overload used when T is exactly 'bool', not merely convertible to 'bool'.
+ void print(bool B) { printStr(B ? "true" : "false"); }
-public:
- CtorVtableSpecialName(Node *FirstType_, Node *SecondType_)
- : Node(KCtorVtableSpecialName),
- FirstType(FirstType_), SecondType(SecondType_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "construction vtable for ";
- FirstType->print(S);
- S += "-in-";
- SecondType->print(S);
+ template <class T>
+ typename std::enable_if<std::is_unsigned<T>::value>::type print(T N) {
+ fprintf(stderr, "%llu", (unsigned long long)N);
}
-};
-struct NestedName : Node {
- Node *Qual;
- Node *Name;
-
- NestedName(Node *Qual_, Node *Name_)
- : Node(KNestedName), Qual(Qual_), Name(Name_) {}
-
- StringView getBaseName() const override { return Name->getBaseName(); }
-
- void printLeft(OutputStream &S) const override {
- Qual->print(S);
- S += "::";
- Name->print(S);
+ template <class T>
+ typename std::enable_if<std::is_signed<T>::value>::type print(T N) {
+ fprintf(stderr, "%lld", (long long)N);
}
-};
-
-struct LocalName : Node {
- Node *Encoding;
- Node *Entity;
-
- LocalName(Node *Encoding_, Node *Entity_)
- : Node(KLocalName), Encoding(Encoding_), Entity(Entity_) {}
-
- void printLeft(OutputStream &S) const override {
- Encoding->print(S);
- S += "::";
- Entity->print(S);
- }
-};
-
-class QualifiedName final : public Node {
- // qualifier::name
- const Node *Qualifier;
- const Node *Name;
-
-public:
- QualifiedName(Node* Qualifier_, Node* Name_)
- : Node(KQualifiedName), Qualifier(Qualifier_), Name(Name_) {}
- StringView getBaseName() const override { return Name->getBaseName(); }
-
- void printLeft(OutputStream &S) const override {
- Qualifier->print(S);
- S += "::";
- Name->print(S);
- }
-};
-
-class VectorType final : public Node {
- const Node *BaseType;
- const NodeOrString Dimension;
- const bool IsPixel;
-
-public:
- VectorType(NodeOrString Dimension_)
- : Node(KVectorType), BaseType(nullptr), Dimension(Dimension_),
- IsPixel(true) {}
- VectorType(Node *BaseType_, NodeOrString Dimension_)
- : Node(KVectorType), BaseType(BaseType_),
- Dimension(Dimension_), IsPixel(false) {}
-
- void printLeft(OutputStream &S) const override {
- if (IsPixel) {
- S += "pixel vector[";
- S += Dimension.asString();
- S += "]";
- } else {
- BaseType->print(S);
- S += " vector[";
- if (Dimension.isNode())
- Dimension.asNode()->print(S);
- else if (Dimension.isString())
- S += Dimension.asString();
- S += "]";
+ void print(ReferenceKind RK) {
+ switch (RK) {
+ case ReferenceKind::LValue:
+ return printStr("ReferenceKind::LValue");
+ case ReferenceKind::RValue:
+ return printStr("ReferenceKind::RValue");
}
}
-};
-
-/// An unexpanded parameter pack (either in the expression or type context). If
-/// this AST is correct, this node will have a ParameterPackExpansion node above
-/// it.
-///
-/// This node is created when some <template-args> are found that apply to an
-/// <encoding>, and is stored in the TemplateParams table. In order for this to
-/// appear in the final AST, it has to referenced via a <template-param> (ie,
-/// T_).
-class ParameterPack final : public Node {
- NodeArray Data;
-
- // Setup OutputStream for a pack expansion unless we're already expanding one.
- void initializePackExpansion(OutputStream &S) const {
- if (S.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
- S.CurrentPackMax = static_cast<unsigned>(Data.size());
- S.CurrentPackIndex = 0;
- }
- }
-
-public:
- ParameterPack(NodeArray Data_) : Node(KParameterPack), Data(Data_) {
- ArrayCache = FunctionCache = RHSComponentCache = Cache::Unknown;
- if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
- return P->ArrayCache == Cache::No;
- }))
- ArrayCache = Cache::No;
- if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
- return P->FunctionCache == Cache::No;
- }))
- FunctionCache = Cache::No;
- if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
- return P->RHSComponentCache == Cache::No;
- }))
- RHSComponentCache = Cache::No;
- }
-
- bool hasRHSComponentSlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasRHSComponent(S);
- }
- bool hasArraySlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasArray(S);
- }
- bool hasFunctionSlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasFunction(S);
- }
- const Node *getSyntaxNode(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() ? Data[Idx]->getSyntaxNode(S) : this;
- }
-
- void printLeft(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- if (Idx < Data.size())
- Data[Idx]->printLeft(S);
- }
- void printRight(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- if (Idx < Data.size())
- Data[Idx]->printRight(S);
- }
-};
-
-/// A variadic template argument. This node represents an occurrence of
-/// J<something>E in some <template-args>. It isn't itself unexpanded, unless
-/// one of it's Elements is. The parser inserts a ParameterPack into the
-/// TemplateParams table if the <template-args> this pack belongs to apply to an
-/// <encoding>.
-class TemplateArgumentPack final : public Node {
- NodeArray Elements;
-public:
- TemplateArgumentPack(NodeArray Elements_)
- : Node(KTemplateArgumentPack), Elements(Elements_) {}
-
- NodeArray getElements() const { return Elements; }
-
- void printLeft(OutputStream &S) const override {
- Elements.printWithComma(S);
- }
-};
-
-/// A pack expansion. Below this node, there are some unexpanded ParameterPacks
-/// which each have Child->ParameterPackSize elements.
-class ParameterPackExpansion final : public Node {
- const Node *Child;
-
-public:
- ParameterPackExpansion(Node* Child_)
- : Node(KParameterPackExpansion), Child(Child_) {}
-
- const Node *getChild() const { return Child; }
-
- void printLeft(OutputStream &S) const override {
- constexpr unsigned Max = std::numeric_limits<unsigned>::max();
- SwapAndRestore<unsigned> SavePackIdx(S.CurrentPackIndex, Max);
- SwapAndRestore<unsigned> SavePackMax(S.CurrentPackMax, Max);
- size_t StreamPos = S.getCurrentPosition();
-
- // Print the first element in the pack. If Child contains a ParameterPack,
- // it will set up S.CurrentPackMax and print the first element.
- Child->print(S);
-
- // No ParameterPack was found in Child. This can occur if we've found a pack
- // expansion on a <function-param>.
- if (S.CurrentPackMax == Max) {
- S += "...";
- return;
+ void print(FunctionRefQual RQ) {
+ switch (RQ) {
+ case FunctionRefQual::FrefQualNone:
+ return printStr("FunctionRefQual::FrefQualNone");
+ case FunctionRefQual::FrefQualLValue:
+ return printStr("FunctionRefQual::FrefQualLValue");
+ case FunctionRefQual::FrefQualRValue:
+ return printStr("FunctionRefQual::FrefQualRValue");
}
-
- // We found a ParameterPack, but it has no elements. Erase whatever we may
- // of printed.
- if (S.CurrentPackMax == 0) {
- S.setCurrentPosition(StreamPos);
- return;
- }
-
- // Else, iterate through the rest of the elements in the pack.
- for (unsigned I = 1, E = S.CurrentPackMax; I < E; ++I) {
- S += ", ";
- S.CurrentPackIndex = I;
- Child->print(S);
- }
- }
-};
-
-class TemplateArgs final : public Node {
- NodeArray Params;
-
-public:
- TemplateArgs(NodeArray Params_) : Node(KTemplateArgs), Params(Params_) {}
-
- NodeArray getParams() { return Params; }
-
- void printLeft(OutputStream &S) const override {
- S += "<";
- Params.printWithComma(S);
- if (S.back() == '>')
- S += " ";
- S += ">";
}
-};
-
-struct ForwardTemplateReference : Node {
- size_t Index;
- Node *Ref = nullptr;
-
- // If we're currently printing this node. It is possible (though invalid) for
- // a forward template reference to refer to itself via a substitution. This
- // creates a cyclic AST, which will stack overflow printing. To fix this, bail
- // out if more than one print* function is active.
- mutable bool Printing = false;
-
- ForwardTemplateReference(size_t Index_)
- : Node(KForwardTemplateReference, Cache::Unknown, Cache::Unknown,
- Cache::Unknown),
- Index(Index_) {}
-
- bool hasRHSComponentSlow(OutputStream &S) const override {
- if (Printing)
- return false;
- SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasRHSComponent(S);
- }
- bool hasArraySlow(OutputStream &S) const override {
- if (Printing)
- return false;
- SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasArray(S);
- }
- bool hasFunctionSlow(OutputStream &S) const override {
- if (Printing)
- return false;
- SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasFunction(S);
- }
- const Node *getSyntaxNode(OutputStream &S) const override {
- if (Printing)
- return this;
- SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->getSyntaxNode(S);
- }
-
- void printLeft(OutputStream &S) const override {
- if (Printing)
- return;
- SwapAndRestore<bool> SavePrinting(Printing, true);
- Ref->printLeft(S);
- }
- void printRight(OutputStream &S) const override {
- if (Printing)
- return;
- SwapAndRestore<bool> SavePrinting(Printing, true);
- Ref->printRight(S);
- }
-};
-
-struct NameWithTemplateArgs : Node {
- // name<template_args>
- Node *Name;
- Node *TemplateArgs;
-
- NameWithTemplateArgs(Node *Name_, Node *TemplateArgs_)
- : Node(KNameWithTemplateArgs), Name(Name_), TemplateArgs(TemplateArgs_) {}
-
- StringView getBaseName() const override { return Name->getBaseName(); }
-
- void printLeft(OutputStream &S) const override {
- Name->print(S);
- TemplateArgs->print(S);
- }
-};
-
-class GlobalQualifiedName final : public Node {
- Node *Child;
-
-public:
- GlobalQualifiedName(Node* Child_)
- : Node(KGlobalQualifiedName), Child(Child_) {}
-
- StringView getBaseName() const override { return Child->getBaseName(); }
-
- void printLeft(OutputStream &S) const override {
- S += "::";
- Child->print(S);
- }
-};
-
-struct StdQualifiedName : Node {
- Node *Child;
-
- StdQualifiedName(Node *Child_) : Node(KStdQualifiedName), Child(Child_) {}
-
- StringView getBaseName() const override { return Child->getBaseName(); }
-
- void printLeft(OutputStream &S) const override {
- S += "std::";
- Child->print(S);
- }
-};
-
-enum class SpecialSubKind {
- allocator,
- basic_string,
- string,
- istream,
- ostream,
- iostream,
-};
-
-class ExpandedSpecialSubstitution final : public Node {
- SpecialSubKind SSK;
-
-public:
- ExpandedSpecialSubstitution(SpecialSubKind SSK_)
- : Node(KExpandedSpecialSubstitution), SSK(SSK_) {}
-
- StringView getBaseName() const override {
- switch (SSK) {
- case SpecialSubKind::allocator:
- return StringView("allocator");
- case SpecialSubKind::basic_string:
- return StringView("basic_string");
- case SpecialSubKind::string:
- return StringView("basic_string");
- case SpecialSubKind::istream:
- return StringView("basic_istream");
- case SpecialSubKind::ostream:
- return StringView("basic_ostream");
- case SpecialSubKind::iostream:
- return StringView("basic_iostream");
- }
- LLVM_BUILTIN_UNREACHABLE;
- }
-
- void printLeft(OutputStream &S) const override {
- switch (SSK) {
- case SpecialSubKind::allocator:
- S += "std::basic_string<char, std::char_traits<char>, "
- "std::allocator<char> >";
- break;
- case SpecialSubKind::basic_string:
- case SpecialSubKind::string:
- S += "std::basic_string<char, std::char_traits<char>, "
- "std::allocator<char> >";
- break;
- case SpecialSubKind::istream:
- S += "std::basic_istream<char, std::char_traits<char> >";
- break;
- case SpecialSubKind::ostream:
- S += "std::basic_ostream<char, std::char_traits<char> >";
- break;
- case SpecialSubKind::iostream:
- S += "std::basic_iostream<char, std::char_traits<char> >";
- break;
- }
- }
-};
-
-class SpecialSubstitution final : public Node {
-public:
- SpecialSubKind SSK;
-
- SpecialSubstitution(SpecialSubKind SSK_)
- : Node(KSpecialSubstitution), SSK(SSK_) {}
-
- StringView getBaseName() const override {
- switch (SSK) {
- case SpecialSubKind::allocator:
- return StringView("allocator");
- case SpecialSubKind::basic_string:
- return StringView("basic_string");
- case SpecialSubKind::string:
- return StringView("string");
- case SpecialSubKind::istream:
- return StringView("istream");
- case SpecialSubKind::ostream:
- return StringView("ostream");
- case SpecialSubKind::iostream:
- return StringView("iostream");
+ void print(Qualifiers Qs) {
+ if (!Qs) return printStr("QualNone");
+ struct QualName { Qualifiers Q; const char *Name; } Names[] = {
+ {QualConst, "QualConst"},
+ {QualVolatile, "QualVolatile"},
+ {QualRestrict, "QualRestrict"},
+ };
+ for (QualName Name : Names) {
+ if (Qs & Name.Q) {
+ printStr(Name.Name);
+ Qs = Qualifiers(Qs & ~Name.Q);
+ if (Qs) printStr(" | ");
+ }
}
- LLVM_BUILTIN_UNREACHABLE;
}
-
- void printLeft(OutputStream &S) const override {
+ void print(SpecialSubKind SSK) {
switch (SSK) {
case SpecialSubKind::allocator:
- S += "std::allocator";
- break;
+ return printStr("SpecialSubKind::allocator");
case SpecialSubKind::basic_string:
- S += "std::basic_string";
- break;
+ return printStr("SpecialSubKind::basic_string");
case SpecialSubKind::string:
- S += "std::string";
- break;
+ return printStr("SpecialSubKind::string");
case SpecialSubKind::istream:
- S += "std::istream";
- break;
+ return printStr("SpecialSubKind::istream");
case SpecialSubKind::ostream:
- S += "std::ostream";
- break;
+ return printStr("SpecialSubKind::ostream");
case SpecialSubKind::iostream:
- S += "std::iostream";
- break;
- }
- }
-};
-
-class CtorDtorName final : public Node {
- const Node *Basename;
- const bool IsDtor;
-
-public:
- CtorDtorName(Node *Basename_, bool IsDtor_)
- : Node(KCtorDtorName), Basename(Basename_), IsDtor(IsDtor_) {}
-
- void printLeft(OutputStream &S) const override {
- if (IsDtor)
- S += "~";
- S += Basename->getBaseName();
- }
-};
-
-class DtorName : public Node {
- const Node *Base;
-
-public:
- DtorName(Node *Base_) : Node(KDtorName), Base(Base_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "~";
- Base->printLeft(S);
- }
-};
-
-class UnnamedTypeName : public Node {
- const StringView Count;
-
-public:
- UnnamedTypeName(StringView Count_) : Node(KUnnamedTypeName), Count(Count_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "'unnamed";
- S += Count;
- S += "\'";
- }
-};
-
-class ClosureTypeName : public Node {
- NodeArray Params;
- StringView Count;
-
-public:
- ClosureTypeName(NodeArray Params_, StringView Count_)
- : Node(KClosureTypeName), Params(Params_), Count(Count_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "\'lambda";
- S += Count;
- S += "\'(";
- Params.printWithComma(S);
- S += ")";
- }
-};
-
-class StructuredBindingName : public Node {
- NodeArray Bindings;
-public:
- StructuredBindingName(NodeArray Bindings_)
- : Node(KStructuredBindingName), Bindings(Bindings_) {}
-
- void printLeft(OutputStream &S) const override {
- S += '[';
- Bindings.printWithComma(S);
- S += ']';
- }
-};
-
-// -- Expression Nodes --
-
-struct Expr : public Node {
- Expr(Kind K = KExpr) : Node(K) {}
-};
-
-class BinaryExpr : public Expr {
- const Node *LHS;
- const StringView InfixOperator;
- const Node *RHS;
-
-public:
- BinaryExpr(Node *LHS_, StringView InfixOperator_, Node *RHS_)
- : LHS(LHS_), InfixOperator(InfixOperator_), RHS(RHS_) {}
-
- void printLeft(OutputStream &S) const override {
- // might be a template argument expression, then we need to disambiguate
- // with parens.
- if (InfixOperator == ">")
- S += "(";
-
- S += "(";
- LHS->print(S);
- S += ") ";
- S += InfixOperator;
- S += " (";
- RHS->print(S);
- S += ")";
-
- if (InfixOperator == ">")
- S += ")";
- }
-};
-
-class ArraySubscriptExpr : public Expr {
- const Node *Op1;
- const Node *Op2;
-
-public:
- ArraySubscriptExpr(Node *Op1_, Node *Op2_) : Op1(Op1_), Op2(Op2_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "(";
- Op1->print(S);
- S += ")[";
- Op2->print(S);
- S += "]";
- }
-};
-
-class PostfixExpr : public Expr {
- const Node *Child;
- const StringView Operand;
-
-public:
- PostfixExpr(Node *Child_, StringView Operand_)
- : Child(Child_), Operand(Operand_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "(";
- Child->print(S);
- S += ")";
- S += Operand;
- }
-};
-
-class ConditionalExpr : public Expr {
- const Node *Cond;
- const Node *Then;
- const Node *Else;
-
-public:
- ConditionalExpr(Node *Cond_, Node *Then_, Node *Else_)
- : Cond(Cond_), Then(Then_), Else(Else_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "(";
- Cond->print(S);
- S += ") ? (";
- Then->print(S);
- S += ") : (";
- Else->print(S);
- S += ")";
- }
-};
-
-class MemberExpr : public Expr {
- const Node *LHS;
- const StringView Kind;
- const Node *RHS;
-
-public:
- MemberExpr(Node *LHS_, StringView Kind_, Node *RHS_)
- : LHS(LHS_), Kind(Kind_), RHS(RHS_) {}
-
- void printLeft(OutputStream &S) const override {
- LHS->print(S);
- S += Kind;
- RHS->print(S);
- }
-};
-
-class EnclosingExpr : public Expr {
- const StringView Prefix;
- const Node *Infix;
- const StringView Postfix;
-
-public:
- EnclosingExpr(StringView Prefix_, Node *Infix_, StringView Postfix_)
- : Prefix(Prefix_), Infix(Infix_), Postfix(Postfix_) {}
-
- void printLeft(OutputStream &S) const override {
- S += Prefix;
- Infix->print(S);
- S += Postfix;
- }
-};
-
-class CastExpr : public Expr {
- // cast_kind<to>(from)
- const StringView CastKind;
- const Node *To;
- const Node *From;
-
-public:
- CastExpr(StringView CastKind_, Node *To_, Node *From_)
- : CastKind(CastKind_), To(To_), From(From_) {}
-
- void printLeft(OutputStream &S) const override {
- S += CastKind;
- S += "<";
- To->printLeft(S);
- S += ">(";
- From->printLeft(S);
- S += ")";
- }
-};
-
-class SizeofParamPackExpr : public Expr {
- Node *Pack;
-
-public:
- SizeofParamPackExpr(Node *Pack_) : Pack(Pack_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "sizeof...(";
- ParameterPackExpansion PPE(Pack);
- PPE.printLeft(S);
- S += ")";
- }
-};
-
-class CallExpr : public Expr {
- const Node *Callee;
- NodeArray Args;
-
-public:
- CallExpr(Node *Callee_, NodeArray Args_) : Callee(Callee_), Args(Args_) {}
-
- void printLeft(OutputStream &S) const override {
- Callee->print(S);
- S += "(";
- Args.printWithComma(S);
- S += ")";
- }
-};
-
-class NewExpr : public Expr {
- // new (expr_list) type(init_list)
- NodeArray ExprList;
- Node *Type;
- NodeArray InitList;
- bool IsGlobal; // ::operator new ?
- bool IsArray; // new[] ?
-public:
- NewExpr(NodeArray ExprList_, Node *Type_, NodeArray InitList_, bool IsGlobal_,
- bool IsArray_)
- : ExprList(ExprList_), Type(Type_), InitList(InitList_),
- IsGlobal(IsGlobal_), IsArray(IsArray_) {}
-
- void printLeft(OutputStream &S) const override {
- if (IsGlobal)
- S += "::operator ";
- S += "new";
- if (IsArray)
- S += "[]";
- S += ' ';
- if (!ExprList.empty()) {
- S += "(";
- ExprList.printWithComma(S);
- S += ")";
+ return printStr("SpecialSubKind::iostream");
}
- Type->print(S);
- if (!InitList.empty()) {
- S += "(";
- InitList.printWithComma(S);
- S += ")";
- }
-
- }
-};
-
-class DeleteExpr : public Expr {
- Node *Op;
- bool IsGlobal;
- bool IsArray;
-
-public:
- DeleteExpr(Node *Op_, bool IsGlobal_, bool IsArray_)
- : Op(Op_), IsGlobal(IsGlobal_), IsArray(IsArray_) {}
-
- void printLeft(OutputStream &S) const override {
- if (IsGlobal)
- S += "::";
- S += "delete";
- if (IsArray)
- S += "[] ";
- Op->print(S);
- }
-};
-
-class PrefixExpr : public Expr {
- StringView Prefix;
- Node *Child;
-
-public:
- PrefixExpr(StringView Prefix_, Node *Child_) : Prefix(Prefix_), Child(Child_) {}
-
- void printLeft(OutputStream &S) const override {
- S += Prefix;
- S += "(";
- Child->print(S);
- S += ")";
}
-};
-
-class FunctionParam : public Expr {
- StringView Number;
-public:
- FunctionParam(StringView Number_) : Number(Number_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "fp";
- S += Number;
+ void newLine() {
+ printStr("\n");
+ for (unsigned I = 0; I != Depth; ++I)
+ printStr(" ");
+ PendingNewline = false;
}
-};
-class ConversionExpr : public Expr {
- const Node *Type;
- NodeArray Expressions;
-
-public:
- ConversionExpr(const Node *Type_, NodeArray Expressions_)
- : Type(Type_), Expressions(Expressions_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "(";
- Type->print(S);
- S += ")(";
- Expressions.printWithComma(S);
- S += ")";
+ template<typename T> void printWithPendingNewline(T V) {
+ print(V);
+ if (wantsNewline(V))
+ PendingNewline = true;
}
-};
-
-class InitListExpr : public Expr {
- Node *Ty;
- NodeArray Inits;
-public:
- InitListExpr(Node *Ty_, NodeArray Inits_) : Ty(Ty_), Inits(Inits_) {}
-
- void printLeft(OutputStream &S) const override {
- if (Ty)
- Ty->print(S);
- S += '{';
- Inits.printWithComma(S);
- S += '}';
- }
-};
-class BracedExpr : public Expr {
- Node *Elem;
- Node *Init;
- bool IsArray;
-public:
- BracedExpr(Node *Elem_, Node *Init_, bool IsArray_)
- : Expr(KBracedExpr), Elem(Elem_), Init(Init_), IsArray(IsArray_) {}
-
- void printLeft(OutputStream &S) const override {
- if (IsArray) {
- S += '[';
- Elem->print(S);
- S += ']';
+ template<typename T> void printWithComma(T V) {
+ if (PendingNewline || wantsNewline(V)) {
+ printStr(",");
+ newLine();
} else {
- S += '.';
- Elem->print(S);
+ printStr(", ");
}
- if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
- S += " = ";
- Init->print(S);
- }
-};
-class BracedRangeExpr : public Expr {
- Node *First;
- Node *Last;
- Node *Init;
-public:
- BracedRangeExpr(Node *First_, Node *Last_, Node *Init_)
- : Expr(KBracedRangeExpr), First(First_), Last(Last_), Init(Init_) {}
-
- void printLeft(OutputStream &S) const override {
- S += '[';
- First->print(S);
- S += " ... ";
- Last->print(S);
- S += ']';
- if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
- S += " = ";
- Init->print(S);
+ printWithPendingNewline(V);
}
-};
-
-struct FoldExpr : Expr {
- Node *Pack, *Init;
- StringView OperatorName;
- bool IsLeftFold;
-
- FoldExpr(bool IsLeftFold_, StringView OperatorName_, Node *Pack_, Node *Init_)
- : Pack(Pack_), Init(Init_), OperatorName(OperatorName_),
- IsLeftFold(IsLeftFold_) {}
- void printLeft(OutputStream &S) const override {
- auto PrintPack = [&] {
- S += '(';
- ParameterPackExpansion(Pack).print(S);
- S += ')';
- };
-
- S += '(';
+ struct CtorArgPrinter {
+ DumpVisitor &Visitor;
- if (IsLeftFold) {
- // init op ... op pack
- if (Init != nullptr) {
- Init->print(S);
- S += ' ';
- S += OperatorName;
- S += ' ';
- }
- // ... op pack
- S += "... ";
- S += OperatorName;
- S += ' ';
- PrintPack();
- } else { // !IsLeftFold
- // pack op ...
- PrintPack();
- S += ' ';
- S += OperatorName;
- S += " ...";
- // pack op ... op init
- if (Init != nullptr) {
- S += ' ';
- S += OperatorName;
- S += ' ';
- Init->print(S);
- }
+ template<typename T, typename ...Rest> void operator()(T V, Rest ...Vs) {
+ if (Visitor.anyWantNewline(V, Vs...))
+ Visitor.newLine();
+ Visitor.printWithPendingNewline(V);
+ int PrintInOrder[] = { (Visitor.printWithComma(Vs), 0)..., 0 };
+ (void)PrintInOrder;
}
- S += ')';
- }
-};
-
-class ThrowExpr : public Expr {
- const Node *Op;
-
-public:
- ThrowExpr(Node *Op_) : Op(Op_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "throw ";
- Op->print(S);
- }
-};
-
-class BoolExpr : public Expr {
- bool Value;
-
-public:
- BoolExpr(bool Value_) : Value(Value_) {}
-
- void printLeft(OutputStream &S) const override {
- S += Value ? StringView("true") : StringView("false");
- }
-};
-
-class IntegerCastExpr : public Expr {
- // ty(integer)
- Node *Ty;
- StringView Integer;
-
-public:
- IntegerCastExpr(Node *Ty_, StringView Integer_)
- : Ty(Ty_), Integer(Integer_) {}
-
- void printLeft(OutputStream &S) const override {
- S += "(";
- Ty->print(S);
- S += ")";
- S += Integer;
- }
-};
-
-class IntegerExpr : public Expr {
- StringView Type;
- StringView Value;
-
-public:
- IntegerExpr(StringView Type_, StringView Value_) : Type(Type_), Value(Value_) {}
+ };
- void printLeft(OutputStream &S) const override {
- if (Type.size() > 3) {
- S += "(";
- S += Type;
- S += ")";
+ template<typename NodeT> void operator()(const NodeT *Node) {
+ Depth += 2;
+ fprintf(stderr, "%s(", itanium_demangle::NodeKind<NodeT>::name());
+ Node->match(CtorArgPrinter{*this});
+ fprintf(stderr, ")");
+ Depth -= 2;
+ }
+
+ void operator()(const ForwardTemplateReference *Node) {
+ Depth += 2;
+ fprintf(stderr, "ForwardTemplateReference(");
+ if (Node->Ref && !Node->Printing) {
+ Node->Printing = true;
+ CtorArgPrinter{*this}(Node->Ref);
+ Node->Printing = false;
+ } else {
+ CtorArgPrinter{*this}(Node->Index);
}
-
- if (Value[0] == 'n') {
- S += "-";
- S += Value.dropFront(1);
- } else
- S += Value;
-
- if (Type.size() <= 3)
- S += Type;
+ fprintf(stderr, ")");
+ Depth -= 2;
}
};
+}
-template <class Float> struct FloatData;
-
-template <class Float> class FloatExpr : public Expr {
- const StringView Contents;
-
-public:
- FloatExpr(StringView Contents_) : Contents(Contents_) {}
-
- void printLeft(OutputStream &s) const override {
- const char *first = Contents.begin();
- const char *last = Contents.end() + 1;
-
- const size_t N = FloatData<Float>::mangled_size;
- if (static_cast<std::size_t>(last - first) > N) {
- last = first + N;
- union {
- Float value;
- char buf[sizeof(Float)];
- };
- const char *t = first;
- char *e = buf;
- for (; t != last; ++t, ++e) {
- unsigned d1 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
- : static_cast<unsigned>(*t - 'a' + 10);
- ++t;
- unsigned d0 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
- : static_cast<unsigned>(*t - 'a' + 10);
- *e = static_cast<char>((d1 << 4) + d0);
- }
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- std::reverse(buf, e);
+void itanium_demangle::Node::dump() const {
+ DumpVisitor V;
+ visit(std::ref(V));
+ V.newLine();
+}
#endif
- char num[FloatData<Float>::max_demangled_size] = {0};
- int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value);
- s += StringView(num, num + n);
- }
- }
-};
+namespace {
class BumpPointerAllocator {
struct BlockMeta {
BlockMeta* Next;
@@ -1823,3134 +301,28 @@ public:
~BumpPointerAllocator() { reset(); }
};
-template <class T, size_t N>
-class PODSmallVector {
- static_assert(std::is_pod<T>::value,
- "T is required to be a plain old data type");
-
- T* First;
- T* Last;
- T* Cap;
- T Inline[N];
-
- bool isInline() const { return First == Inline; }
-
- void clearInline() {
- First = Inline;
- Last = Inline;
- Cap = Inline + N;
- }
-
- void reserve(size_t NewCap) {
- size_t S = size();
- if (isInline()) {
- auto* Tmp = static_cast<T*>(std::malloc(NewCap * sizeof(T)));
- if (Tmp == nullptr)
- std::terminate();
- std::copy(First, Last, Tmp);
- First = Tmp;
- } else {
- First = static_cast<T*>(std::realloc(First, NewCap * sizeof(T)));
- if (First == nullptr)
- std::terminate();
- }
- Last = First + S;
- Cap = First + NewCap;
- }
+class DefaultAllocator {
+ BumpPointerAllocator Alloc;
public:
- PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
-
- PODSmallVector(const PODSmallVector&) = delete;
- PODSmallVector& operator=(const PODSmallVector&) = delete;
-
- PODSmallVector(PODSmallVector&& Other) : PODSmallVector() {
- if (Other.isInline()) {
- std::copy(Other.begin(), Other.end(), First);
- Last = First + Other.size();
- Other.clear();
- return;
- }
-
- First = Other.First;
- Last = Other.Last;
- Cap = Other.Cap;
- Other.clearInline();
- }
-
- PODSmallVector& operator=(PODSmallVector&& Other) {
- if (Other.isInline()) {
- if (!isInline()) {
- std::free(First);
- clearInline();
- }
- std::copy(Other.begin(), Other.end(), First);
- Last = First + Other.size();
- Other.clear();
- return *this;
- }
-
- if (isInline()) {
- First = Other.First;
- Last = Other.Last;
- Cap = Other.Cap;
- Other.clearInline();
- return *this;
- }
-
- std::swap(First, Other.First);
- std::swap(Last, Other.Last);
- std::swap(Cap, Other.Cap);
- Other.clear();
- return *this;
- }
-
- void push_back(const T& Elem) {
- if (Last == Cap)
- reserve(size() * 2);
- *Last++ = Elem;
- }
-
- void pop_back() {
- assert(Last != First && "Popping empty vector!");
- --Last;
- }
-
- void dropBack(size_t Index) {
- assert(Index <= size() && "dropBack() can't expand!");
- Last = First + Index;
- }
-
- T* begin() { return First; }
- T* end() { return Last; }
-
- bool empty() const { return First == Last; }
- size_t size() const { return static_cast<size_t>(Last - First); }
- T& back() {
- assert(Last != First && "Calling back() on empty vector!");
- return *(Last - 1);
- }
- T& operator[](size_t Index) {
- assert(Index < size() && "Invalid access!");
- return *(begin() + Index);
- }
- void clear() { Last = First; }
-
- ~PODSmallVector() {
- if (!isInline())
- std::free(First);
- }
-};
-
-struct Db {
- const char *First;
- const char *Last;
-
- // Name stack, this is used by the parser to hold temporary names that were
- // parsed. The parser collapses multiple names into new nodes to construct
- // the AST. Once the parser is finished, names.size() == 1.
- PODSmallVector<Node *, 32> Names;
-
- // Substitution table. Itanium supports name substitutions as a means of
- // compression. The string "S42_" refers to the 44nd entry (base-36) in this
- // table.
- PODSmallVector<Node *, 32> Subs;
-
- // Template parameter table. Like the above, but referenced like "T42_".
- // This has a smaller size compared to Subs and Names because it can be
- // stored on the stack.
- PODSmallVector<Node *, 8> TemplateParams;
-
- // Set of unresolved forward <template-param> references. These can occur in a
- // conversion operator's type, and are resolved in the enclosing <encoding>.
- PODSmallVector<ForwardTemplateReference *, 4> ForwardTemplateRefs;
-
- bool TryToParseTemplateArgs = true;
- bool PermitForwardTemplateReferences = false;
- bool ParsingLambdaParams = false;
-
- BumpPointerAllocator ASTAllocator;
+ void reset() { Alloc.reset(); }
- Db(const char *First_, const char *Last_) : First(First_), Last(Last_) {}
-
- void reset(const char *First_, const char *Last_) {
- First = First_;
- Last = Last_;
- Names.clear();
- Subs.clear();
- TemplateParams.clear();
- ParsingLambdaParams = false;
- TryToParseTemplateArgs = true;
- PermitForwardTemplateReferences = false;
- ASTAllocator.reset();
- }
-
- template <class T, class... Args> T *make(Args &&... args) {
- return new (ASTAllocator.allocate(sizeof(T)))
+ template<typename T, typename ...Args> T *makeNode(Args &&...args) {
+ return new (Alloc.allocate(sizeof(T)))
T(std::forward<Args>(args)...);
}
- template <class It> NodeArray makeNodeArray(It begin, It end) {
- size_t sz = static_cast<size_t>(end - begin);
- void *mem = ASTAllocator.allocate(sizeof(Node *) * sz);
- Node **data = new (mem) Node *[sz];
- std::copy(begin, end, data);
- return NodeArray(data, sz);
- }
-
- NodeArray popTrailingNodeArray(size_t FromPosition) {
- assert(FromPosition <= Names.size());
- NodeArray res =
- makeNodeArray(Names.begin() + (long)FromPosition, Names.end());
- Names.dropBack(FromPosition);
- return res;
- }
-
- bool consumeIf(StringView S) {
- if (StringView(First, Last).startsWith(S)) {
- First += S.size();
- return true;
- }
- return false;
- }
-
- bool consumeIf(char C) {
- if (First != Last && *First == C) {
- ++First;
- return true;
- }
- return false;
- }
-
- char consume() { return First != Last ? *First++ : '\0'; }
-
- char look(unsigned Lookahead = 0) {
- if (static_cast<size_t>(Last - First) <= Lookahead)
- return '\0';
- return First[Lookahead];
- }
-
- size_t numLeft() const { return static_cast<size_t>(Last - First); }
-
- StringView parseNumber(bool AllowNegative = false);
- Qualifiers parseCVQualifiers();
- bool parsePositiveInteger(size_t *Out);
- StringView parseBareSourceName();
-
- bool parseSeqId(size_t *Out);
- Node *parseSubstitution();
- Node *parseTemplateParam();
- Node *parseTemplateArgs(bool TagTemplates = false);
- Node *parseTemplateArg();
-
- /// Parse the <expr> production.
- Node *parseExpr();
- Node *parsePrefixExpr(StringView Kind);
- Node *parseBinaryExpr(StringView Kind);
- Node *parseIntegerLiteral(StringView Lit);
- Node *parseExprPrimary();
- template <class Float> Node *parseFloatingLiteral();
- Node *parseFunctionParam();
- Node *parseNewExpr();
- Node *parseConversionExpr();
- Node *parseBracedExpr();
- Node *parseFoldExpr();
-
- /// Parse the <type> production.
- Node *parseType();
- Node *parseFunctionType();
- Node *parseVectorType();
- Node *parseDecltype();
- Node *parseArrayType();
- Node *parsePointerToMemberType();
- Node *parseClassEnumType();
- Node *parseQualifiedType();
-
- Node *parseEncoding();
- bool parseCallOffset();
- Node *parseSpecialName();
-
- /// Holds some extra information about a <name> that is being parsed. This
- /// information is only pertinent if the <name> refers to an <encoding>.
- struct NameState {
- bool CtorDtorConversion = false;
- bool EndsWithTemplateArgs = false;
- Qualifiers CVQualifiers = QualNone;
- FunctionRefQual ReferenceQualifier = FrefQualNone;
- size_t ForwardTemplateRefsBegin;
-
- NameState(Db *Enclosing)
- : ForwardTemplateRefsBegin(Enclosing->ForwardTemplateRefs.size()) {}
- };
-
- bool resolveForwardTemplateRefs(NameState &State) {
- size_t I = State.ForwardTemplateRefsBegin;
- size_t E = ForwardTemplateRefs.size();
- for (; I < E; ++I) {
- size_t Idx = ForwardTemplateRefs[I]->Index;
- if (Idx >= TemplateParams.size())
- return true;
- ForwardTemplateRefs[I]->Ref = TemplateParams[Idx];
- }
- ForwardTemplateRefs.dropBack(State.ForwardTemplateRefsBegin);
- return false;
+ void *allocateNodeArray(size_t sz) {
+ return Alloc.allocate(sizeof(Node *) * sz);
}
-
- /// Parse the <name> production>
- Node *parseName(NameState *State = nullptr);
- Node *parseLocalName(NameState *State);
- Node *parseOperatorName(NameState *State);
- Node *parseUnqualifiedName(NameState *State);
- Node *parseUnnamedTypeName(NameState *State);
- Node *parseSourceName(NameState *State);
- Node *parseUnscopedName(NameState *State);
- Node *parseNestedName(NameState *State);
- Node *parseCtorDtorName(Node *&SoFar, NameState *State);
-
- Node *parseAbiTags(Node *N);
-
- /// Parse the <unresolved-name> production.
- Node *parseUnresolvedName();
- Node *parseSimpleId();
- Node *parseBaseUnresolvedName();
- Node *parseUnresolvedType();
- Node *parseDestructorName();
-
- /// Top-level entry point into the parser.
- Node *parse();
};
+} // unnamed namespace
-const char* parse_discriminator(const char* first, const char* last);
-
-// <name> ::= <nested-name> // N
-// ::= <local-name> # See Scope Encoding below // Z
-// ::= <unscoped-template-name> <template-args>
-// ::= <unscoped-name>
-//
-// <unscoped-template-name> ::= <unscoped-name>
-// ::= <substitution>
-Node *Db::parseName(NameState *State) {
- consumeIf('L'); // extension
-
- if (look() == 'N')
- return parseNestedName(State);
- if (look() == 'Z')
- return parseLocalName(State);
-
- // ::= <unscoped-template-name> <template-args>
- if (look() == 'S' && look(1) != 't') {
- Node *S = parseSubstitution();
- if (S == nullptr)
- return nullptr;
- if (look() != 'I')
- return nullptr;
- Node *TA = parseTemplateArgs(State != nullptr);
- if (TA == nullptr)
- return nullptr;
- if (State) State->EndsWithTemplateArgs = true;
- return make<NameWithTemplateArgs>(S, TA);
- }
-
- Node *N = parseUnscopedName(State);
- if (N == nullptr)
- return nullptr;
- // ::= <unscoped-template-name> <template-args>
- if (look() == 'I') {
- Subs.push_back(N);
- Node *TA = parseTemplateArgs(State != nullptr);
- if (TA == nullptr)
- return nullptr;
- if (State) State->EndsWithTemplateArgs = true;
- return make<NameWithTemplateArgs>(N, TA);
- }
- // ::= <unscoped-name>
- return N;
-}
-
-// <local-name> := Z <function encoding> E <entity name> [<discriminator>]
-// := Z <function encoding> E s [<discriminator>]
-// := Z <function encoding> Ed [ <parameter number> ] _ <entity name>
-Node *Db::parseLocalName(NameState *State) {
- if (!consumeIf('Z'))
- return nullptr;
- Node *Encoding = parseEncoding();
- if (Encoding == nullptr || !consumeIf('E'))
- return nullptr;
-
- if (consumeIf('s')) {
- First = parse_discriminator(First, Last);
- return make<LocalName>(Encoding, make<NameType>("string literal"));
- }
-
- if (consumeIf('d')) {
- parseNumber(true);
- if (!consumeIf('_'))
- return nullptr;
- Node *N = parseName(State);
- if (N == nullptr)
- return nullptr;
- return make<LocalName>(Encoding, N);
- }
-
- Node *Entity = parseName(State);
- if (Entity == nullptr)
- return nullptr;
- First = parse_discriminator(First, Last);
- return make<LocalName>(Encoding, Entity);
-}
-
-// <unscoped-name> ::= <unqualified-name>
-// ::= St <unqualified-name> # ::std::
-// extension ::= StL<unqualified-name>
-Node *Db::parseUnscopedName(NameState *State) {
- if (consumeIf("StL") || consumeIf("St")) {
- Node *R = parseUnqualifiedName(State);
- if (R == nullptr)
- return nullptr;
- return make<StdQualifiedName>(R);
- }
- return parseUnqualifiedName(State);
-}
-
-// <unqualified-name> ::= <operator-name> [abi-tags]
-// ::= <ctor-dtor-name>
-// ::= <source-name>
-// ::= <unnamed-type-name>
-// ::= DC <source-name>+ E # structured binding declaration
-Node *Db::parseUnqualifiedName(NameState *State) {
- // <ctor-dtor-name>s are special-cased in parseNestedName().
- Node *Result;
- if (look() == 'U')
- Result = parseUnnamedTypeName(State);
- else if (look() >= '1' && look() <= '9')
- Result = parseSourceName(State);
- else if (consumeIf("DC")) {
- size_t BindingsBegin = Names.size();
- do {
- Node *Binding = parseSourceName(State);
- if (Binding == nullptr)
- return nullptr;
- Names.push_back(Binding);
- } while (!consumeIf('E'));
- Result = make<StructuredBindingName>(popTrailingNodeArray(BindingsBegin));
- } else
- Result = parseOperatorName(State);
- if (Result != nullptr)
- Result = parseAbiTags(Result);
- return Result;
-}
-
-// <unnamed-type-name> ::= Ut [<nonnegative number>] _
-// ::= <closure-type-name>
-//
-// <closure-type-name> ::= Ul <lambda-sig> E [ <nonnegative number> ] _
-//
-// <lambda-sig> ::= <parameter type>+ # Parameter types or "v" if the lambda has no parameters
-Node *Db::parseUnnamedTypeName(NameState *) {
- if (consumeIf("Ut")) {
- StringView Count = parseNumber();
- if (!consumeIf('_'))
- return nullptr;
- return make<UnnamedTypeName>(Count);
- }
- if (consumeIf("Ul")) {
- NodeArray Params;
- SwapAndRestore<bool> SwapParams(ParsingLambdaParams, true);
- if (!consumeIf("vE")) {
- size_t ParamsBegin = Names.size();
- do {
- Node *P = parseType();
- if (P == nullptr)
- return nullptr;
- Names.push_back(P);
- } while (!consumeIf('E'));
- Params = popTrailingNodeArray(ParamsBegin);
- }
- StringView Count = parseNumber();
- if (!consumeIf('_'))
- return nullptr;
- return make<ClosureTypeName>(Params, Count);
- }
- return nullptr;
-}
-
-// <source-name> ::= <positive length number> <identifier>
-Node *Db::parseSourceName(NameState *) {
- size_t Length = 0;
- if (parsePositiveInteger(&Length))
- return nullptr;
- if (numLeft() < Length || Length == 0)
- return nullptr;
- StringView Name(First, First + Length);
- First += Length;
- if (Name.startsWith("_GLOBAL__N"))
- return make<NameType>("(anonymous namespace)");
- return make<NameType>(Name);
-}
-
-// <operator-name> ::= aa # &&
-// ::= ad # & (unary)
-// ::= an # &
-// ::= aN # &=
-// ::= aS # =
-// ::= cl # ()
-// ::= cm # ,
-// ::= co # ~
-// ::= cv <type> # (cast)
-// ::= da # delete[]
-// ::= de # * (unary)
-// ::= dl # delete
-// ::= dv # /
-// ::= dV # /=
-// ::= eo # ^
-// ::= eO # ^=
-// ::= eq # ==
-// ::= ge # >=
-// ::= gt # >
-// ::= ix # []
-// ::= le # <=
-// ::= li <source-name> # operator ""
-// ::= ls # <<
-// ::= lS # <<=
-// ::= lt # <
-// ::= mi # -
-// ::= mI # -=
-// ::= ml # *
-// ::= mL # *=
-// ::= mm # -- (postfix in <expression> context)
-// ::= na # new[]
-// ::= ne # !=
-// ::= ng # - (unary)
-// ::= nt # !
-// ::= nw # new
-// ::= oo # ||
-// ::= or # |
-// ::= oR # |=
-// ::= pm # ->*
-// ::= pl # +
-// ::= pL # +=
-// ::= pp # ++ (postfix in <expression> context)
-// ::= ps # + (unary)
-// ::= pt # ->
-// ::= qu # ?
-// ::= rm # %
-// ::= rM # %=
-// ::= rs # >>
-// ::= rS # >>=
-// ::= ss # <=> C++2a
-// ::= v <digit> <source-name> # vendor extended operator
-Node *Db::parseOperatorName(NameState *State) {
- switch (look()) {
- case 'a':
- switch (look(1)) {
- case 'a':
- First += 2;
- return make<NameType>("operator&&");
- case 'd':
- case 'n':
- First += 2;
- return make<NameType>("operator&");
- case 'N':
- First += 2;
- return make<NameType>("operator&=");
- case 'S':
- First += 2;
- return make<NameType>("operator=");
- }
- return nullptr;
- case 'c':
- switch (look(1)) {
- case 'l':
- First += 2;
- return make<NameType>("operator()");
- case 'm':
- First += 2;
- return make<NameType>("operator,");
- case 'o':
- First += 2;
- return make<NameType>("operator~");
- // ::= cv <type> # (cast)
- case 'v': {
- First += 2;
- SwapAndRestore<bool> SaveTemplate(TryToParseTemplateArgs, false);
- // If we're parsing an encoding, State != nullptr and the conversion
- // operators' <type> could have a <template-param> that refers to some
- // <template-arg>s further ahead in the mangled name.
- SwapAndRestore<bool> SavePermit(PermitForwardTemplateReferences,
- PermitForwardTemplateReferences ||
- State != nullptr);
- Node* Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- if (State) State->CtorDtorConversion = true;
- return make<ConversionOperatorType>(Ty);
- }
- }
- return nullptr;
- case 'd':
- switch (look(1)) {
- case 'a':
- First += 2;
- return make<NameType>("operator delete[]");
- case 'e':
- First += 2;
- return make<NameType>("operator*");
- case 'l':
- First += 2;
- return make<NameType>("operator delete");
- case 'v':
- First += 2;
- return make<NameType>("operator/");
- case 'V':
- First += 2;
- return make<NameType>("operator/=");
- }
- return nullptr;
- case 'e':
- switch (look(1)) {
- case 'o':
- First += 2;
- return make<NameType>("operator^");
- case 'O':
- First += 2;
- return make<NameType>("operator^=");
- case 'q':
- First += 2;
- return make<NameType>("operator==");
- }
- return nullptr;
- case 'g':
- switch (look(1)) {
- case 'e':
- First += 2;
- return make<NameType>("operator>=");
- case 't':
- First += 2;
- return make<NameType>("operator>");
- }
- return nullptr;
- case 'i':
- if (look(1) == 'x') {
- First += 2;
- return make<NameType>("operator[]");
- }
- return nullptr;
- case 'l':
- switch (look(1)) {
- case 'e':
- First += 2;
- return make<NameType>("operator<=");
- // ::= li <source-name> # operator ""
- case 'i': {
- First += 2;
- Node *SN = parseSourceName(State);
- if (SN == nullptr)
- return nullptr;
- return make<LiteralOperator>(SN);
- }
- case 's':
- First += 2;
- return make<NameType>("operator<<");
- case 'S':
- First += 2;
- return make<NameType>("operator<<=");
- case 't':
- First += 2;
- return make<NameType>("operator<");
- }
- return nullptr;
- case 'm':
- switch (look(1)) {
- case 'i':
- First += 2;
- return make<NameType>("operator-");
- case 'I':
- First += 2;
- return make<NameType>("operator-=");
- case 'l':
- First += 2;
- return make<NameType>("operator*");
- case 'L':
- First += 2;
- return make<NameType>("operator*=");
- case 'm':
- First += 2;
- return make<NameType>("operator--");
- }
- return nullptr;
- case 'n':
- switch (look(1)) {
- case 'a':
- First += 2;
- return make<NameType>("operator new[]");
- case 'e':
- First += 2;
- return make<NameType>("operator!=");
- case 'g':
- First += 2;
- return make<NameType>("operator-");
- case 't':
- First += 2;
- return make<NameType>("operator!");
- case 'w':
- First += 2;
- return make<NameType>("operator new");
- }
- return nullptr;
- case 'o':
- switch (look(1)) {
- case 'o':
- First += 2;
- return make<NameType>("operator||");
- case 'r':
- First += 2;
- return make<NameType>("operator|");
- case 'R':
- First += 2;
- return make<NameType>("operator|=");
- }
- return nullptr;
- case 'p':
- switch (look(1)) {
- case 'm':
- First += 2;
- return make<NameType>("operator->*");
- case 'l':
- First += 2;
- return make<NameType>("operator+");
- case 'L':
- First += 2;
- return make<NameType>("operator+=");
- case 'p':
- First += 2;
- return make<NameType>("operator++");
- case 's':
- First += 2;
- return make<NameType>("operator+");
- case 't':
- First += 2;
- return make<NameType>("operator->");
- }
- return nullptr;
- case 'q':
- if (look(1) == 'u') {
- First += 2;
- return make<NameType>("operator?");
- }
- return nullptr;
- case 'r':
- switch (look(1)) {
- case 'm':
- First += 2;
- return make<NameType>("operator%");
- case 'M':
- First += 2;
- return make<NameType>("operator%=");
- case 's':
- First += 2;
- return make<NameType>("operator>>");
- case 'S':
- First += 2;
- return make<NameType>("operator>>=");
- }
- return nullptr;
- case 's':
- if (look(1) == 's') {
- First += 2;
- return make<NameType>("operator<=>");
- }
- return nullptr;
- // ::= v <digit> <source-name> # vendor extended operator
- case 'v':
- if (std::isdigit(look(1))) {
- First += 2;
- Node *SN = parseSourceName(State);
- if (SN == nullptr)
- return nullptr;
- return make<ConversionOperatorType>(SN);
- }
- return nullptr;
- }
- return nullptr;
-}
-
-// <ctor-dtor-name> ::= C1 # complete object constructor
-// ::= C2 # base object constructor
-// ::= C3 # complete object allocating constructor
-// extension ::= C5 # ?
-// ::= D0 # deleting destructor
-// ::= D1 # complete object destructor
-// ::= D2 # base object destructor
-// extension ::= D5 # ?
-Node *Db::parseCtorDtorName(Node *&SoFar, NameState *State) {
- if (SoFar->K == Node::KSpecialSubstitution) {
- auto SSK = static_cast<SpecialSubstitution *>(SoFar)->SSK;
- switch (SSK) {
- case SpecialSubKind::string:
- case SpecialSubKind::istream:
- case SpecialSubKind::ostream:
- case SpecialSubKind::iostream:
- SoFar = make<ExpandedSpecialSubstitution>(SSK);
- default:
- break;
- }
- }
-
- if (consumeIf('C')) {
- bool IsInherited = consumeIf('I');
- if (look() != '1' && look() != '2' && look() != '3' && look() != '5')
- return nullptr;
- ++First;
- if (State) State->CtorDtorConversion = true;
- if (IsInherited) {
- if (parseName(State) == nullptr)
- return nullptr;
- }
- return make<CtorDtorName>(SoFar, false);
- }
-
- if (look() == 'D' &&
- (look(1) == '0' || look(1) == '1' || look(1) == '2' || look(1) == '5')) {
- First += 2;
- if (State) State->CtorDtorConversion = true;
- return make<CtorDtorName>(SoFar, true);
- }
-
- return nullptr;
-}
-
-// <nested-name> ::= N [<CV-Qualifiers>] [<ref-qualifier>] <prefix> <unqualified-name> E
-// ::= N [<CV-Qualifiers>] [<ref-qualifier>] <template-prefix> <template-args> E
-//
-// <prefix> ::= <prefix> <unqualified-name>
-// ::= <template-prefix> <template-args>
-// ::= <template-param>
-// ::= <decltype>
-// ::= # empty
-// ::= <substitution>
-// ::= <prefix> <data-member-prefix>
-// extension ::= L
-//
-// <data-member-prefix> := <member source-name> [<template-args>] M
-//
-// <template-prefix> ::= <prefix> <template unqualified-name>
-// ::= <template-param>
-// ::= <substitution>
-Node *Db::parseNestedName(NameState *State) {
- if (!consumeIf('N'))
- return nullptr;
-
- Qualifiers CVTmp = parseCVQualifiers();
- if (State) State->CVQualifiers = CVTmp;
-
- if (consumeIf('O')) {
- if (State) State->ReferenceQualifier = FrefQualRValue;
- } else if (consumeIf('R')) {
- if (State) State->ReferenceQualifier = FrefQualLValue;
- } else
- if (State) State->ReferenceQualifier = FrefQualNone;
-
- Node *SoFar = nullptr;
- auto PushComponent = [&](Node *Comp) {
- if (SoFar) SoFar = make<NestedName>(SoFar, Comp);
- else SoFar = Comp;
- if (State) State->EndsWithTemplateArgs = false;
- };
-
- if (consumeIf("St"))
- SoFar = make<NameType>("std");
-
- while (!consumeIf('E')) {
- consumeIf('L'); // extension
-
- // <data-member-prefix> := <member source-name> [<template-args>] M
- if (consumeIf('M')) {
- if (SoFar == nullptr)
- return nullptr;
- continue;
- }
-
- // ::= <template-param>
- if (look() == 'T') {
- Node *TP = parseTemplateParam();
- if (TP == nullptr)
- return nullptr;
- PushComponent(TP);
- Subs.push_back(SoFar);
- continue;
- }
-
- // ::= <template-prefix> <template-args>
- if (look() == 'I') {
- Node *TA = parseTemplateArgs(State != nullptr);
- if (TA == nullptr || SoFar == nullptr)
- return nullptr;
- SoFar = make<NameWithTemplateArgs>(SoFar, TA);
- if (State) State->EndsWithTemplateArgs = true;
- Subs.push_back(SoFar);
- continue;
- }
-
- // ::= <decltype>
- if (look() == 'D' && (look(1) == 't' || look(1) == 'T')) {
- Node *DT = parseDecltype();
- if (DT == nullptr)
- return nullptr;
- PushComponent(DT);
- Subs.push_back(SoFar);
- continue;
- }
-
- // ::= <substitution>
- if (look() == 'S' && look(1) != 't') {
- Node *S = parseSubstitution();
- if (S == nullptr)
- return nullptr;
- PushComponent(S);
- if (SoFar != S)
- Subs.push_back(S);
- continue;
- }
-
- // Parse an <unqualified-name> thats actually a <ctor-dtor-name>.
- if (look() == 'C' || (look() == 'D' && look(1) != 'C')) {
- if (SoFar == nullptr)
- return nullptr;
- Node *CtorDtor = parseCtorDtorName(SoFar, State);
- if (CtorDtor == nullptr)
- return nullptr;
- PushComponent(CtorDtor);
- SoFar = parseAbiTags(SoFar);
- if (SoFar == nullptr)
- return nullptr;
- Subs.push_back(SoFar);
- continue;
- }
-
- // ::= <prefix> <unqualified-name>
- Node *N = parseUnqualifiedName(State);
- if (N == nullptr)
- return nullptr;
- PushComponent(N);
- Subs.push_back(SoFar);
- }
-
- if (SoFar == nullptr || Subs.empty())
- return nullptr;
-
- Subs.pop_back();
- return SoFar;
-}
-
-// <simple-id> ::= <source-name> [ <template-args> ]
-Node *Db::parseSimpleId() {
- Node *SN = parseSourceName(/*NameState=*/nullptr);
- if (SN == nullptr)
- return nullptr;
- if (look() == 'I') {
- Node *TA = parseTemplateArgs();
- if (TA == nullptr)
- return nullptr;
- return make<NameWithTemplateArgs>(SN, TA);
- }
- return SN;
-}
-
-// <destructor-name> ::= <unresolved-type> # e.g., ~T or ~decltype(f())
-// ::= <simple-id> # e.g., ~A<2*N>
-Node *Db::parseDestructorName() {
- Node *Result;
- if (std::isdigit(look()))
- Result = parseSimpleId();
- else
- Result = parseUnresolvedType();
- if (Result == nullptr)
- return nullptr;
- return make<DtorName>(Result);
-}
-
-// <unresolved-type> ::= <template-param>
-// ::= <decltype>
-// ::= <substitution>
-Node *Db::parseUnresolvedType() {
- if (look() == 'T') {
- Node *TP = parseTemplateParam();
- if (TP == nullptr)
- return nullptr;
- Subs.push_back(TP);
- return TP;
- }
- if (look() == 'D') {
- Node *DT = parseDecltype();
- if (DT == nullptr)
- return nullptr;
- Subs.push_back(DT);
- return DT;
- }
- return parseSubstitution();
-}
-
-// <base-unresolved-name> ::= <simple-id> # unresolved name
-// extension ::= <operator-name> # unresolved operator-function-id
-// extension ::= <operator-name> <template-args> # unresolved operator template-id
-// ::= on <operator-name> # unresolved operator-function-id
-// ::= on <operator-name> <template-args> # unresolved operator template-id
-// ::= dn <destructor-name> # destructor or pseudo-destructor;
-// # e.g. ~X or ~X<N-1>
-Node *Db::parseBaseUnresolvedName() {
- if (std::isdigit(look()))
- return parseSimpleId();
-
- if (consumeIf("dn"))
- return parseDestructorName();
-
- consumeIf("on");
-
- Node *Oper = parseOperatorName(/*NameState=*/nullptr);
- if (Oper == nullptr)
- return nullptr;
- if (look() == 'I') {
- Node *TA = parseTemplateArgs();
- if (TA == nullptr)
- return nullptr;
- return make<NameWithTemplateArgs>(Oper, TA);
- }
- return Oper;
-}
-
-// <unresolved-name>
-// extension ::= srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
-// ::= [gs] <base-unresolved-name> # x or (with "gs") ::x
-// ::= [gs] sr <unresolved-qualifier-level>+ E <base-unresolved-name>
-// # A::x, N::y, A<T>::z; "gs" means leading "::"
-// ::= sr <unresolved-type> <base-unresolved-name> # T::x / decltype(p)::x
-// extension ::= sr <unresolved-type> <template-args> <base-unresolved-name>
-// # T::N::x /decltype(p)::N::x
-// (ignored) ::= srN <unresolved-type> <unresolved-qualifier-level>+ E <base-unresolved-name>
-//
-// <unresolved-qualifier-level> ::= <simple-id>
-Node *Db::parseUnresolvedName() {
- Node *SoFar = nullptr;
-
- // srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
- // srN <unresolved-type> <unresolved-qualifier-level>+ E <base-unresolved-name>
- if (consumeIf("srN")) {
- SoFar = parseUnresolvedType();
- if (SoFar == nullptr)
- return nullptr;
-
- if (look() == 'I') {
- Node *TA = parseTemplateArgs();
- if (TA == nullptr)
- return nullptr;
- SoFar = make<NameWithTemplateArgs>(SoFar, TA);
- }
-
- while (!consumeIf('E')) {
- Node *Qual = parseSimpleId();
- if (Qual == nullptr)
- return nullptr;
- SoFar = make<QualifiedName>(SoFar, Qual);
- }
-
- Node *Base = parseBaseUnresolvedName();
- if (Base == nullptr)
- return nullptr;
- return make<QualifiedName>(SoFar, Base);
- }
-
- bool Global = consumeIf("gs");
-
- // [gs] <base-unresolved-name> # x or (with "gs") ::x
- if (!consumeIf("sr")) {
- SoFar = parseBaseUnresolvedName();
- if (SoFar == nullptr)
- return nullptr;
- if (Global)
- SoFar = make<GlobalQualifiedName>(SoFar);
- return SoFar;
- }
-
- // [gs] sr <unresolved-qualifier-level>+ E <base-unresolved-name>
- if (std::isdigit(look())) {
- do {
- Node *Qual = parseSimpleId();
- if (Qual == nullptr)
- return nullptr;
- if (SoFar)
- SoFar = make<QualifiedName>(SoFar, Qual);
- else if (Global)
- SoFar = make<GlobalQualifiedName>(Qual);
- else
- SoFar = Qual;
- } while (!consumeIf('E'));
- }
- // sr <unresolved-type> <base-unresolved-name>
- // sr <unresolved-type> <template-args> <base-unresolved-name>
- else {
- SoFar = parseUnresolvedType();
- if (SoFar == nullptr)
- return nullptr;
-
- if (look() == 'I') {
- Node *TA = parseTemplateArgs();
- if (TA == nullptr)
- return nullptr;
- SoFar = make<NameWithTemplateArgs>(SoFar, TA);
- }
- }
-
- assert(SoFar != nullptr);
-
- Node *Base = parseBaseUnresolvedName();
- if (Base == nullptr)
- return nullptr;
- return make<QualifiedName>(SoFar, Base);
-}
-
-// <abi-tags> ::= <abi-tag> [<abi-tags>]
-// <abi-tag> ::= B <source-name>
-Node *Db::parseAbiTags(Node *N) {
- while (consumeIf('B')) {
- StringView SN = parseBareSourceName();
- if (SN.empty())
- return nullptr;
- N = make<AbiTagAttr>(N, SN);
- }
- return N;
-}
-
-// <number> ::= [n] <non-negative decimal integer>
-StringView Db::parseNumber(bool AllowNegative) {
- const char *Tmp = First;
- if (AllowNegative)
- consumeIf('n');
- if (numLeft() == 0 || !std::isdigit(*First))
- return StringView();
- while (numLeft() != 0 && std::isdigit(*First))
- ++First;
- return StringView(Tmp, First);
-}
-
-// <positive length number> ::= [0-9]*
-bool Db::parsePositiveInteger(size_t *Out) {
- *Out = 0;
- if (look() < '0' || look() > '9')
- return true;
- while (look() >= '0' && look() <= '9') {
- *Out *= 10;
- *Out += static_cast<size_t>(consume() - '0');
- }
- return false;
-}
-
-StringView Db::parseBareSourceName() {
- size_t Int = 0;
- if (parsePositiveInteger(&Int) || numLeft() < Int)
- return StringView();
- StringView R(First, First + Int);
- First += Int;
- return R;
-}
-
-// <function-type> ::= [<CV-qualifiers>] [<exception-spec>] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E
-//
-// <exception-spec> ::= Do # non-throwing exception-specification (e.g., noexcept, throw())
-// ::= DO <expression> E # computed (instantiation-dependent) noexcept
-// ::= Dw <type>+ E # dynamic exception specification with instantiation-dependent types
-//
-// <ref-qualifier> ::= R # & ref-qualifier
-// <ref-qualifier> ::= O # && ref-qualifier
-Node *Db::parseFunctionType() {
- Qualifiers CVQuals = parseCVQualifiers();
-
- Node *ExceptionSpec = nullptr;
- if (consumeIf("Do")) {
- ExceptionSpec = make<NameType>("noexcept");
- } else if (consumeIf("DO")) {
- Node *E = parseExpr();
- if (E == nullptr || !consumeIf('E'))
- return nullptr;
- ExceptionSpec = make<NoexceptSpec>(E);
- } else if (consumeIf("Dw")) {
- size_t SpecsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *T = parseType();
- if (T == nullptr)
- return nullptr;
- Names.push_back(T);
- }
- ExceptionSpec =
- make<DynamicExceptionSpec>(popTrailingNodeArray(SpecsBegin));
- }
-
- consumeIf("Dx"); // transaction safe
-
- if (!consumeIf('F'))
- return nullptr;
- consumeIf('Y'); // extern "C"
- Node *ReturnType = parseType();
- if (ReturnType == nullptr)
- return nullptr;
-
- FunctionRefQual ReferenceQualifier = FrefQualNone;
- size_t ParamsBegin = Names.size();
- while (true) {
- if (consumeIf('E'))
- break;
- if (consumeIf('v'))
- continue;
- if (consumeIf("RE")) {
- ReferenceQualifier = FrefQualLValue;
- break;
- }
- if (consumeIf("OE")) {
- ReferenceQualifier = FrefQualRValue;
- break;
- }
- Node *T = parseType();
- if (T == nullptr)
- return nullptr;
- Names.push_back(T);
- }
-
- NodeArray Params = popTrailingNodeArray(ParamsBegin);
- return make<FunctionType>(ReturnType, Params, CVQuals,
- ReferenceQualifier, ExceptionSpec);
-}
-
-// extension:
-// <vector-type> ::= Dv <positive dimension number> _ <extended element type>
-// ::= Dv [<dimension expression>] _ <element type>
-// <extended element type> ::= <element type>
-// ::= p # AltiVec vector pixel
-Node *Db::parseVectorType() {
- if (!consumeIf("Dv"))
- return nullptr;
- if (look() >= '1' && look() <= '9') {
- StringView DimensionNumber = parseNumber();
- if (!consumeIf('_'))
- return nullptr;
- if (consumeIf('p'))
- return make<VectorType>(DimensionNumber);
- Node *ElemType = parseType();
- if (ElemType == nullptr)
- return nullptr;
- return make<VectorType>(ElemType, DimensionNumber);
- }
-
- if (!consumeIf('_')) {
- Node *DimExpr = parseExpr();
- if (!DimExpr)
- return nullptr;
- if (!consumeIf('_'))
- return nullptr;
- Node *ElemType = parseType();
- if (!ElemType)
- return nullptr;
- return make<VectorType>(ElemType, DimExpr);
- }
- Node *ElemType = parseType();
- if (!ElemType)
- return nullptr;
- return make<VectorType>(ElemType, StringView());
-}
-
-// <decltype> ::= Dt <expression> E # decltype of an id-expression or class member access (C++0x)
-// ::= DT <expression> E # decltype of an expression (C++0x)
-Node *Db::parseDecltype() {
- if (!consumeIf('D'))
- return nullptr;
- if (!consumeIf('t') && !consumeIf('T'))
- return nullptr;
- Node *E = parseExpr();
- if (E == nullptr)
- return nullptr;
- if (!consumeIf('E'))
- return nullptr;
- return make<EnclosingExpr>("decltype(", E, ")");
-}
-
-// <array-type> ::= A <positive dimension number> _ <element type>
-// ::= A [<dimension expression>] _ <element type>
-Node *Db::parseArrayType() {
- if (!consumeIf('A'))
- return nullptr;
-
- if (std::isdigit(look())) {
- StringView Dimension = parseNumber();
- if (!consumeIf('_'))
- return nullptr;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- return make<ArrayType>(Ty, Dimension);
- }
-
- if (!consumeIf('_')) {
- Node *DimExpr = parseExpr();
- if (DimExpr == nullptr)
- return nullptr;
- if (!consumeIf('_'))
- return nullptr;
- Node *ElementType = parseType();
- if (ElementType == nullptr)
- return nullptr;
- return make<ArrayType>(ElementType, DimExpr);
- }
-
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- return make<ArrayType>(Ty);
-}
-
-// <pointer-to-member-type> ::= M <class type> <member type>
-Node *Db::parsePointerToMemberType() {
- if (!consumeIf('M'))
- return nullptr;
- Node *ClassType = parseType();
- if (ClassType == nullptr)
- return nullptr;
- Node *MemberType = parseType();
- if (MemberType == nullptr)
- return nullptr;
- return make<PointerToMemberType>(ClassType, MemberType);
-}
-
-// <class-enum-type> ::= <name> # non-dependent type name, dependent type name, or dependent typename-specifier
-// ::= Ts <name> # dependent elaborated type specifier using 'struct' or 'class'
-// ::= Tu <name> # dependent elaborated type specifier using 'union'
-// ::= Te <name> # dependent elaborated type specifier using 'enum'
-Node *Db::parseClassEnumType() {
- StringView ElabSpef;
- if (consumeIf("Ts"))
- ElabSpef = "struct";
- else if (consumeIf("Tu"))
- ElabSpef = "union";
- else if (consumeIf("Te"))
- ElabSpef = "enum";
-
- Node *Name = parseName();
- if (Name == nullptr)
- return nullptr;
-
- if (!ElabSpef.empty())
- return make<ElaboratedTypeSpefType>(ElabSpef, Name);
-
- return Name;
-}
-
-// <qualified-type> ::= <qualifiers> <type>
-// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers>
-// <extended-qualifier> ::= U <source-name> [<template-args>] # vendor extended type qualifier
-Node *Db::parseQualifiedType() {
- if (consumeIf('U')) {
- StringView Qual = parseBareSourceName();
- if (Qual.empty())
- return nullptr;
-
- // FIXME parse the optional <template-args> here!
-
- // extension ::= U <objc-name> <objc-type> # objc-type<identifier>
- if (Qual.startsWith("objcproto")) {
- StringView ProtoSourceName = Qual.dropFront(std::strlen("objcproto"));
- StringView Proto;
- {
- SwapAndRestore<const char *> SaveFirst(First, ProtoSourceName.begin()),
- SaveLast(Last, ProtoSourceName.end());
- Proto = parseBareSourceName();
- }
- if (Proto.empty())
- return nullptr;
- Node *Child = parseQualifiedType();
- if (Child == nullptr)
- return nullptr;
- return make<ObjCProtoName>(Child, Proto);
- }
-
- Node *Child = parseQualifiedType();
- if (Child == nullptr)
- return nullptr;
- return make<VendorExtQualType>(Child, Qual);
- }
-
- Qualifiers Quals = parseCVQualifiers();
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- if (Quals != QualNone)
- Ty = make<QualType>(Ty, Quals);
- return Ty;
-}
-
-// <type> ::= <builtin-type>
-// ::= <qualified-type>
-// ::= <function-type>
-// ::= <class-enum-type>
-// ::= <array-type>
-// ::= <pointer-to-member-type>
-// ::= <template-param>
-// ::= <template-template-param> <template-args>
-// ::= <decltype>
-// ::= P <type> # pointer
-// ::= R <type> # l-value reference
-// ::= O <type> # r-value reference (C++11)
-// ::= C <type> # complex pair (C99)
-// ::= G <type> # imaginary (C99)
-// ::= <substitution> # See Compression below
-// extension ::= U <objc-name> <objc-type> # objc-type<identifier>
-// extension ::= <vector-type> # <vector-type> starts with Dv
-//
-// <objc-name> ::= <k0 number> objcproto <k1 number> <identifier> # k0 = 9 + <number of digits in k1> + k1
-// <objc-type> ::= <source-name> # PU<11+>objcproto 11objc_object<source-name> 11objc_object -> id<source-name>
-Node *Db::parseType() {
- Node *Result = nullptr;
-
- switch (look()) {
- // ::= <qualified-type>
- case 'r':
- case 'V':
- case 'K': {
- unsigned AfterQuals = 0;
- if (look(AfterQuals) == 'r') ++AfterQuals;
- if (look(AfterQuals) == 'V') ++AfterQuals;
- if (look(AfterQuals) == 'K') ++AfterQuals;
-
- if (look(AfterQuals) == 'F' ||
- (look(AfterQuals) == 'D' &&
- (look(AfterQuals + 1) == 'o' || look(AfterQuals + 1) == 'O' ||
- look(AfterQuals + 1) == 'w' || look(AfterQuals + 1) == 'x'))) {
- Result = parseFunctionType();
- break;
- }
- LLVM_FALLTHROUGH;
- }
- case 'U': {
- Result = parseQualifiedType();
- break;
- }
- // <builtin-type> ::= v # void
- case 'v':
- ++First;
- return make<NameType>("void");
- // ::= w # wchar_t
- case 'w':
- ++First;
- return make<NameType>("wchar_t");
- // ::= b # bool
- case 'b':
- ++First;
- return make<NameType>("bool");
- // ::= c # char
- case 'c':
- ++First;
- return make<NameType>("char");
- // ::= a # signed char
- case 'a':
- ++First;
- return make<NameType>("signed char");
- // ::= h # unsigned char
- case 'h':
- ++First;
- return make<NameType>("unsigned char");
- // ::= s # short
- case 's':
- ++First;
- return make<NameType>("short");
- // ::= t # unsigned short
- case 't':
- ++First;
- return make<NameType>("unsigned short");
- // ::= i # int
- case 'i':
- ++First;
- return make<NameType>("int");
- // ::= j # unsigned int
- case 'j':
- ++First;
- return make<NameType>("unsigned int");
- // ::= l # long
- case 'l':
- ++First;
- return make<NameType>("long");
- // ::= m # unsigned long
- case 'm':
- ++First;
- return make<NameType>("unsigned long");
- // ::= x # long long, __int64
- case 'x':
- ++First;
- return make<NameType>("long long");
- // ::= y # unsigned long long, __int64
- case 'y':
- ++First;
- return make<NameType>("unsigned long long");
- // ::= n # __int128
- case 'n':
- ++First;
- return make<NameType>("__int128");
- // ::= o # unsigned __int128
- case 'o':
- ++First;
- return make<NameType>("unsigned __int128");
- // ::= f # float
- case 'f':
- ++First;
- return make<NameType>("float");
- // ::= d # double
- case 'd':
- ++First;
- return make<NameType>("double");
- // ::= e # long double, __float80
- case 'e':
- ++First;
- return make<NameType>("long double");
- // ::= g # __float128
- case 'g':
- ++First;
- return make<NameType>("__float128");
- // ::= z # ellipsis
- case 'z':
- ++First;
- return make<NameType>("...");
-
- // <builtin-type> ::= u <source-name> # vendor extended type
- case 'u': {
- ++First;
- StringView Res = parseBareSourceName();
- if (Res.empty())
- return nullptr;
- return make<NameType>(Res);
- }
- case 'D':
- switch (look(1)) {
- // ::= Dd # IEEE 754r decimal floating point (64 bits)
- case 'd':
- First += 2;
- return make<NameType>("decimal64");
- // ::= De # IEEE 754r decimal floating point (128 bits)
- case 'e':
- First += 2;
- return make<NameType>("decimal128");
- // ::= Df # IEEE 754r decimal floating point (32 bits)
- case 'f':
- First += 2;
- return make<NameType>("decimal32");
- // ::= Dh # IEEE 754r half-precision floating point (16 bits)
- case 'h':
- First += 2;
- return make<NameType>("decimal16");
- // ::= Di # char32_t
- case 'i':
- First += 2;
- return make<NameType>("char32_t");
- // ::= Ds # char16_t
- case 's':
- First += 2;
- return make<NameType>("char16_t");
- // ::= Da # auto (in dependent new-expressions)
- case 'a':
- First += 2;
- return make<NameType>("auto");
- // ::= Dc # decltype(auto)
- case 'c':
- First += 2;
- return make<NameType>("decltype(auto)");
- // ::= Dn # std::nullptr_t (i.e., decltype(nullptr))
- case 'n':
- First += 2;
- return make<NameType>("std::nullptr_t");
-
- // ::= <decltype>
- case 't':
- case 'T': {
- Result = parseDecltype();
- break;
- }
- // extension ::= <vector-type> # <vector-type> starts with Dv
- case 'v': {
- Result = parseVectorType();
- break;
- }
- // ::= Dp <type> # pack expansion (C++0x)
- case 'p': {
- First += 2;
- Node *Child = parseType();
- if (!Child)
- return nullptr;
- Result = make<ParameterPackExpansion>(Child);
- break;
- }
- // Exception specifier on a function type.
- case 'o':
- case 'O':
- case 'w':
- // Transaction safe function type.
- case 'x':
- Result = parseFunctionType();
- break;
- }
- break;
- // ::= <function-type>
- case 'F': {
- Result = parseFunctionType();
- break;
- }
- // ::= <array-type>
- case 'A': {
- Result = parseArrayType();
- break;
- }
- // ::= <pointer-to-member-type>
- case 'M': {
- Result = parsePointerToMemberType();
- break;
- }
- // ::= <template-param>
- case 'T': {
- // This could be an elaborate type specifier on a <class-enum-type>.
- if (look(1) == 's' || look(1) == 'u' || look(1) == 'e') {
- Result = parseClassEnumType();
- break;
- }
-
- Result = parseTemplateParam();
- if (Result == nullptr)
- return nullptr;
-
- // Result could be either of:
- // <type> ::= <template-param>
- // <type> ::= <template-template-param> <template-args>
- //
- // <template-template-param> ::= <template-param>
- // ::= <substitution>
- //
- // If this is followed by some <template-args>, and we're permitted to
- // parse them, take the second production.
-
- if (TryToParseTemplateArgs && look() == 'I') {
- Node *TA = parseTemplateArgs();
- if (TA == nullptr)
- return nullptr;
- Result = make<NameWithTemplateArgs>(Result, TA);
- }
- break;
- }
- // ::= P <type> # pointer
- case 'P': {
- ++First;
- Node *Ptr = parseType();
- if (Ptr == nullptr)
- return nullptr;
- Result = make<PointerType>(Ptr);
- break;
- }
- // ::= R <type> # l-value reference
- case 'R': {
- ++First;
- Node *Ref = parseType();
- if (Ref == nullptr)
- return nullptr;
- Result = make<ReferenceType>(Ref, ReferenceKind::LValue);
- break;
- }
- // ::= O <type> # r-value reference (C++11)
- case 'O': {
- ++First;
- Node *Ref = parseType();
- if (Ref == nullptr)
- return nullptr;
- Result = make<ReferenceType>(Ref, ReferenceKind::RValue);
- break;
- }
- // ::= C <type> # complex pair (C99)
- case 'C': {
- ++First;
- Node *P = parseType();
- if (P == nullptr)
- return nullptr;
- Result = make<PostfixQualifiedType>(P, " complex");
- break;
- }
- // ::= G <type> # imaginary (C99)
- case 'G': {
- ++First;
- Node *P = parseType();
- if (P == nullptr)
- return P;
- Result = make<PostfixQualifiedType>(P, " imaginary");
- break;
- }
- // ::= <substitution> # See Compression below
- case 'S': {
- if (look(1) && look(1) != 't') {
- Node *Sub = parseSubstitution();
- if (Sub == nullptr)
- return nullptr;
-
- // Sub could be either of:
- // <type> ::= <substitution>
- // <type> ::= <template-template-param> <template-args>
- //
- // <template-template-param> ::= <template-param>
- // ::= <substitution>
- //
- // If this is followed by some <template-args>, and we're permitted to
- // parse them, take the second production.
-
- if (TryToParseTemplateArgs && look() == 'I') {
- Node *TA = parseTemplateArgs();
- if (TA == nullptr)
- return nullptr;
- Result = make<NameWithTemplateArgs>(Sub, TA);
- break;
- }
-
- // If all we parsed was a substitution, don't re-insert into the
- // substitution table.
- return Sub;
- }
- LLVM_FALLTHROUGH;
- }
- // ::= <class-enum-type>
- default: {
- Result = parseClassEnumType();
- break;
- }
- }
-
- // If we parsed a type, insert it into the substitution table. Note that all
- // <builtin-type>s and <substitution>s have already bailed out, because they
- // don't get substitutions.
- if (Result != nullptr)
- Subs.push_back(Result);
- return Result;
-}
-
-Node *Db::parsePrefixExpr(StringView Kind) {
- Node *E = parseExpr();
- if (E == nullptr)
- return nullptr;
- return make<PrefixExpr>(Kind, E);
-}
-
-Node *Db::parseBinaryExpr(StringView Kind) {
- Node *LHS = parseExpr();
- if (LHS == nullptr)
- return nullptr;
- Node *RHS = parseExpr();
- if (RHS == nullptr)
- return nullptr;
- return make<BinaryExpr>(LHS, Kind, RHS);
-}
-
-Node *Db::parseIntegerLiteral(StringView Lit) {
- StringView Tmp = parseNumber(true);
- if (!Tmp.empty() && consumeIf('E'))
- return make<IntegerExpr>(Lit, Tmp);
- return nullptr;
-}
-
-// <CV-Qualifiers> ::= [r] [V] [K]
-Qualifiers Db::parseCVQualifiers() {
- Qualifiers CVR = QualNone;
- if (consumeIf('r'))
- addQualifiers(CVR, QualRestrict);
- if (consumeIf('V'))
- addQualifiers(CVR, QualVolatile);
- if (consumeIf('K'))
- addQualifiers(CVR, QualConst);
- return CVR;
-}
-
-// <function-param> ::= fp <top-level CV-Qualifiers> _ # L == 0, first parameter
-// ::= fp <top-level CV-Qualifiers> <parameter-2 non-negative number> _ # L == 0, second and later parameters
-// ::= fL <L-1 non-negative number> p <top-level CV-Qualifiers> _ # L > 0, first parameter
-// ::= fL <L-1 non-negative number> p <top-level CV-Qualifiers> <parameter-2 non-negative number> _ # L > 0, second and later parameters
-Node *Db::parseFunctionParam() {
- if (consumeIf("fp")) {
- parseCVQualifiers();
- StringView Num = parseNumber();
- if (!consumeIf('_'))
- return nullptr;
- return make<FunctionParam>(Num);
- }
- if (consumeIf("fL")) {
- if (parseNumber().empty())
- return nullptr;
- if (!consumeIf('p'))
- return nullptr;
- parseCVQualifiers();
- StringView Num = parseNumber();
- if (!consumeIf('_'))
- return nullptr;
- return make<FunctionParam>(Num);
- }
- return nullptr;
-}
-
-// [gs] nw <expression>* _ <type> E # new (expr-list) type
-// [gs] nw <expression>* _ <type> <initializer> # new (expr-list) type (init)
-// [gs] na <expression>* _ <type> E # new[] (expr-list) type
-// [gs] na <expression>* _ <type> <initializer> # new[] (expr-list) type (init)
-// <initializer> ::= pi <expression>* E # parenthesized initialization
-Node *Db::parseNewExpr() {
- bool Global = consumeIf("gs");
- bool IsArray = look(1) == 'a';
- if (!consumeIf("nw") && !consumeIf("na"))
- return nullptr;
- size_t Exprs = Names.size();
- while (!consumeIf('_')) {
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return nullptr;
- Names.push_back(Ex);
- }
- NodeArray ExprList = popTrailingNodeArray(Exprs);
- Node *Ty = parseType();
- if (Ty == nullptr)
- return Ty;
- if (consumeIf("pi")) {
- size_t InitsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *Init = parseExpr();
- if (Init == nullptr)
- return Init;
- Names.push_back(Init);
- }
- NodeArray Inits = popTrailingNodeArray(InitsBegin);
- return make<NewExpr>(ExprList, Ty, Inits, Global, IsArray);
- } else if (!consumeIf('E'))
- return nullptr;
- return make<NewExpr>(ExprList, Ty, NodeArray(), Global, IsArray);
-}
-
-// cv <type> <expression> # conversion with one argument
-// cv <type> _ <expression>* E # conversion with a different number of arguments
-Node *Db::parseConversionExpr() {
- if (!consumeIf("cv"))
- return nullptr;
- Node *Ty;
- {
- SwapAndRestore<bool> SaveTemp(TryToParseTemplateArgs, false);
- Ty = parseType();
- }
-
- if (Ty == nullptr)
- return nullptr;
-
- if (consumeIf('_')) {
- size_t ExprsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *E = parseExpr();
- if (E == nullptr)
- return E;
- Names.push_back(E);
- }
- NodeArray Exprs = popTrailingNodeArray(ExprsBegin);
- return make<ConversionExpr>(Ty, Exprs);
- }
-
- Node *E[1] = {parseExpr()};
- if (E[0] == nullptr)
- return nullptr;
- return make<ConversionExpr>(Ty, makeNodeArray(E, E + 1));
-}
-
-// <expr-primary> ::= L <type> <value number> E # integer literal
-// ::= L <type> <value float> E # floating literal
-// ::= L <string type> E # string literal
-// ::= L <nullptr type> E # nullptr literal (i.e., "LDnE")
-// FIXME: ::= L <type> <real-part float> _ <imag-part float> E # complex floating point literal (C 2000)
-// ::= L <mangled-name> E # external name
-Node *Db::parseExprPrimary() {
- if (!consumeIf('L'))
- return nullptr;
- switch (look()) {
- case 'w':
- ++First;
- return parseIntegerLiteral("wchar_t");
- case 'b':
- if (consumeIf("b0E"))
- return make<BoolExpr>(0);
- if (consumeIf("b1E"))
- return make<BoolExpr>(1);
- return nullptr;
- case 'c':
- ++First;
- return parseIntegerLiteral("char");
- case 'a':
- ++First;
- return parseIntegerLiteral("signed char");
- case 'h':
- ++First;
- return parseIntegerLiteral("unsigned char");
- case 's':
- ++First;
- return parseIntegerLiteral("short");
- case 't':
- ++First;
- return parseIntegerLiteral("unsigned short");
- case 'i':
- ++First;
- return parseIntegerLiteral("");
- case 'j':
- ++First;
- return parseIntegerLiteral("u");
- case 'l':
- ++First;
- return parseIntegerLiteral("l");
- case 'm':
- ++First;
- return parseIntegerLiteral("ul");
- case 'x':
- ++First;
- return parseIntegerLiteral("ll");
- case 'y':
- ++First;
- return parseIntegerLiteral("ull");
- case 'n':
- ++First;
- return parseIntegerLiteral("__int128");
- case 'o':
- ++First;
- return parseIntegerLiteral("unsigned __int128");
- case 'f':
- ++First;
- return parseFloatingLiteral<float>();
- case 'd':
- ++First;
- return parseFloatingLiteral<double>();
- case 'e':
- ++First;
- return parseFloatingLiteral<long double>();
- case '_':
- if (consumeIf("_Z")) {
- Node *R = parseEncoding();
- if (R != nullptr && consumeIf('E'))
- return R;
- }
- return nullptr;
- case 'T':
- // Invalid mangled name per
- // http://sourcerytools.com/pipermail/cxx-abi-dev/2011-August/002422.html
- return nullptr;
- default: {
- // might be named type
- Node *T = parseType();
- if (T == nullptr)
- return nullptr;
- StringView N = parseNumber();
- if (!N.empty()) {
- if (!consumeIf('E'))
- return nullptr;
- return make<IntegerCastExpr>(T, N);
- }
- if (consumeIf('E'))
- return T;
- return nullptr;
- }
- }
-}
-
-// <braced-expression> ::= <expression>
-// ::= di <field source-name> <braced-expression> # .name = expr
-// ::= dx <index expression> <braced-expression> # [expr] = expr
-// ::= dX <range begin expression> <range end expression> <braced-expression>
-Node *Db::parseBracedExpr() {
- if (look() == 'd') {
- switch (look(1)) {
- case 'i': {
- First += 2;
- Node *Field = parseSourceName(/*NameState=*/nullptr);
- if (Field == nullptr)
- return nullptr;
- Node *Init = parseBracedExpr();
- if (Init == nullptr)
- return nullptr;
- return make<BracedExpr>(Field, Init, /*isArray=*/false);
- }
- case 'x': {
- First += 2;
- Node *Index = parseExpr();
- if (Index == nullptr)
- return nullptr;
- Node *Init = parseBracedExpr();
- if (Init == nullptr)
- return nullptr;
- return make<BracedExpr>(Index, Init, /*isArray=*/true);
- }
- case 'X': {
- First += 2;
- Node *RangeBegin = parseExpr();
- if (RangeBegin == nullptr)
- return nullptr;
- Node *RangeEnd = parseExpr();
- if (RangeEnd == nullptr)
- return nullptr;
- Node *Init = parseBracedExpr();
- if (Init == nullptr)
- return nullptr;
- return make<BracedRangeExpr>(RangeBegin, RangeEnd, Init);
- }
- }
- }
- return parseExpr();
-}
-
-// (not yet in the spec)
-// <fold-expr> ::= fL <binary-operator-name> <expression> <expression>
-// ::= fR <binary-operator-name> <expression> <expression>
-// ::= fl <binary-operator-name> <expression>
-// ::= fr <binary-operator-name> <expression>
-Node *Db::parseFoldExpr() {
- if (!consumeIf('f'))
- return nullptr;
-
- char FoldKind = look();
- bool IsLeftFold, HasInitializer;
- HasInitializer = FoldKind == 'L' || FoldKind == 'R';
- if (FoldKind == 'l' || FoldKind == 'L')
- IsLeftFold = true;
- else if (FoldKind == 'r' || FoldKind == 'R')
- IsLeftFold = false;
- else
- return nullptr;
- ++First;
-
- // FIXME: This map is duplicated in parseOperatorName and parseExpr.
- StringView OperatorName;
- if (consumeIf("aa")) OperatorName = "&&";
- else if (consumeIf("an")) OperatorName = "&";
- else if (consumeIf("aN")) OperatorName = "&=";
- else if (consumeIf("aS")) OperatorName = "=";
- else if (consumeIf("cm")) OperatorName = ",";
- else if (consumeIf("ds")) OperatorName = ".*";
- else if (consumeIf("dv")) OperatorName = "/";
- else if (consumeIf("dV")) OperatorName = "/=";
- else if (consumeIf("eo")) OperatorName = "^";
- else if (consumeIf("eO")) OperatorName = "^=";
- else if (consumeIf("eq")) OperatorName = "==";
- else if (consumeIf("ge")) OperatorName = ">=";
- else if (consumeIf("gt")) OperatorName = ">";
- else if (consumeIf("le")) OperatorName = "<=";
- else if (consumeIf("ls")) OperatorName = "<<";
- else if (consumeIf("lS")) OperatorName = "<<=";
- else if (consumeIf("lt")) OperatorName = "<";
- else if (consumeIf("mi")) OperatorName = "-";
- else if (consumeIf("mI")) OperatorName = "-=";
- else if (consumeIf("ml")) OperatorName = "*";
- else if (consumeIf("mL")) OperatorName = "*=";
- else if (consumeIf("ne")) OperatorName = "!=";
- else if (consumeIf("oo")) OperatorName = "||";
- else if (consumeIf("or")) OperatorName = "|";
- else if (consumeIf("oR")) OperatorName = "|=";
- else if (consumeIf("pl")) OperatorName = "+";
- else if (consumeIf("pL")) OperatorName = "+=";
- else if (consumeIf("rm")) OperatorName = "%";
- else if (consumeIf("rM")) OperatorName = "%=";
- else if (consumeIf("rs")) OperatorName = ">>";
- else if (consumeIf("rS")) OperatorName = ">>=";
- else return nullptr;
-
- Node *Pack = parseExpr(), *Init = nullptr;
- if (Pack == nullptr)
- return nullptr;
- if (HasInitializer) {
- Init = parseExpr();
- if (Init == nullptr)
- return nullptr;
- }
-
- if (IsLeftFold && Init)
- std::swap(Pack, Init);
-
- return make<FoldExpr>(IsLeftFold, OperatorName, Pack, Init);
-}
-
-// <expression> ::= <unary operator-name> <expression>
-// ::= <binary operator-name> <expression> <expression>
-// ::= <ternary operator-name> <expression> <expression> <expression>
-// ::= cl <expression>+ E # call
-// ::= cv <type> <expression> # conversion with one argument
-// ::= cv <type> _ <expression>* E # conversion with a different number of arguments
-// ::= [gs] nw <expression>* _ <type> E # new (expr-list) type
-// ::= [gs] nw <expression>* _ <type> <initializer> # new (expr-list) type (init)
-// ::= [gs] na <expression>* _ <type> E # new[] (expr-list) type
-// ::= [gs] na <expression>* _ <type> <initializer> # new[] (expr-list) type (init)
-// ::= [gs] dl <expression> # delete expression
-// ::= [gs] da <expression> # delete[] expression
-// ::= pp_ <expression> # prefix ++
-// ::= mm_ <expression> # prefix --
-// ::= ti <type> # typeid (type)
-// ::= te <expression> # typeid (expression)
-// ::= dc <type> <expression> # dynamic_cast<type> (expression)
-// ::= sc <type> <expression> # static_cast<type> (expression)
-// ::= cc <type> <expression> # const_cast<type> (expression)
-// ::= rc <type> <expression> # reinterpret_cast<type> (expression)
-// ::= st <type> # sizeof (a type)
-// ::= sz <expression> # sizeof (an expression)
-// ::= at <type> # alignof (a type)
-// ::= az <expression> # alignof (an expression)
-// ::= nx <expression> # noexcept (expression)
-// ::= <template-param>
-// ::= <function-param>
-// ::= dt <expression> <unresolved-name> # expr.name
-// ::= pt <expression> <unresolved-name> # expr->name
-// ::= ds <expression> <expression> # expr.*expr
-// ::= sZ <template-param> # size of a parameter pack
-// ::= sZ <function-param> # size of a function parameter pack
-// ::= sP <template-arg>* E # sizeof...(T), size of a captured template parameter pack from an alias template
-// ::= sp <expression> # pack expansion
-// ::= tw <expression> # throw expression
-// ::= tr # throw with no operand (rethrow)
-// ::= <unresolved-name> # f(p), N::f(p), ::f(p),
-// # freestanding dependent name (e.g., T::x),
-// # objectless nonstatic member reference
-// ::= fL <binary-operator-name> <expression> <expression>
-// ::= fR <binary-operator-name> <expression> <expression>
-// ::= fl <binary-operator-name> <expression>
-// ::= fr <binary-operator-name> <expression>
-// ::= <expr-primary>
-Node *Db::parseExpr() {
- bool Global = consumeIf("gs");
- if (numLeft() < 2)
- return nullptr;
-
- switch (*First) {
- case 'L':
- return parseExprPrimary();
- case 'T':
- return parseTemplateParam();
- case 'f': {
- // Disambiguate a fold expression from a <function-param>.
- if (look(1) == 'p' || (look(1) == 'L' && std::isdigit(look(2))))
- return parseFunctionParam();
- return parseFoldExpr();
- }
- case 'a':
- switch (First[1]) {
- case 'a':
- First += 2;
- return parseBinaryExpr("&&");
- case 'd':
- First += 2;
- return parsePrefixExpr("&");
- case 'n':
- First += 2;
- return parseBinaryExpr("&");
- case 'N':
- First += 2;
- return parseBinaryExpr("&=");
- case 'S':
- First += 2;
- return parseBinaryExpr("=");
- case 't': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- return make<EnclosingExpr>("alignof (", Ty, ")");
- }
- case 'z': {
- First += 2;
- Node *Ty = parseExpr();
- if (Ty == nullptr)
- return nullptr;
- return make<EnclosingExpr>("alignof (", Ty, ")");
- }
- }
- return nullptr;
- case 'c':
- switch (First[1]) {
- // cc <type> <expression> # const_cast<type>(expression)
- case 'c': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return Ty;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<CastExpr>("const_cast", Ty, Ex);
- }
- // cl <expression>+ E # call
- case 'l': {
- First += 2;
- Node *Callee = parseExpr();
- if (Callee == nullptr)
- return Callee;
- size_t ExprsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *E = parseExpr();
- if (E == nullptr)
- return E;
- Names.push_back(E);
- }
- return make<CallExpr>(Callee, popTrailingNodeArray(ExprsBegin));
- }
- case 'm':
- First += 2;
- return parseBinaryExpr(",");
- case 'o':
- First += 2;
- return parsePrefixExpr("~");
- case 'v':
- return parseConversionExpr();
- }
- return nullptr;
- case 'd':
- switch (First[1]) {
- case 'a': {
- First += 2;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<DeleteExpr>(Ex, Global, /*is_array=*/true);
- }
- case 'c': {
- First += 2;
- Node *T = parseType();
- if (T == nullptr)
- return T;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<CastExpr>("dynamic_cast", T, Ex);
- }
- case 'e':
- First += 2;
- return parsePrefixExpr("*");
- case 'l': {
- First += 2;
- Node *E = parseExpr();
- if (E == nullptr)
- return E;
- return make<DeleteExpr>(E, Global, /*is_array=*/false);
- }
- case 'n':
- return parseUnresolvedName();
- case 's': {
- First += 2;
- Node *LHS = parseExpr();
- if (LHS == nullptr)
- return nullptr;
- Node *RHS = parseExpr();
- if (RHS == nullptr)
- return nullptr;
- return make<MemberExpr>(LHS, ".*", RHS);
- }
- case 't': {
- First += 2;
- Node *LHS = parseExpr();
- if (LHS == nullptr)
- return LHS;
- Node *RHS = parseExpr();
- if (RHS == nullptr)
- return nullptr;
- return make<MemberExpr>(LHS, ".", RHS);
- }
- case 'v':
- First += 2;
- return parseBinaryExpr("/");
- case 'V':
- First += 2;
- return parseBinaryExpr("/=");
- }
- return nullptr;
- case 'e':
- switch (First[1]) {
- case 'o':
- First += 2;
- return parseBinaryExpr("^");
- case 'O':
- First += 2;
- return parseBinaryExpr("^=");
- case 'q':
- First += 2;
- return parseBinaryExpr("==");
- }
- return nullptr;
- case 'g':
- switch (First[1]) {
- case 'e':
- First += 2;
- return parseBinaryExpr(">=");
- case 't':
- First += 2;
- return parseBinaryExpr(">");
- }
- return nullptr;
- case 'i':
- switch (First[1]) {
- case 'x': {
- First += 2;
- Node *Base = parseExpr();
- if (Base == nullptr)
- return nullptr;
- Node *Index = parseExpr();
- if (Index == nullptr)
- return Index;
- return make<ArraySubscriptExpr>(Base, Index);
- }
- case 'l': {
- First += 2;
- size_t InitsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *E = parseBracedExpr();
- if (E == nullptr)
- return nullptr;
- Names.push_back(E);
- }
- return make<InitListExpr>(nullptr, popTrailingNodeArray(InitsBegin));
- }
- }
- return nullptr;
- case 'l':
- switch (First[1]) {
- case 'e':
- First += 2;
- return parseBinaryExpr("<=");
- case 's':
- First += 2;
- return parseBinaryExpr("<<");
- case 'S':
- First += 2;
- return parseBinaryExpr("<<=");
- case 't':
- First += 2;
- return parseBinaryExpr("<");
- }
- return nullptr;
- case 'm':
- switch (First[1]) {
- case 'i':
- First += 2;
- return parseBinaryExpr("-");
- case 'I':
- First += 2;
- return parseBinaryExpr("-=");
- case 'l':
- First += 2;
- return parseBinaryExpr("*");
- case 'L':
- First += 2;
- return parseBinaryExpr("*=");
- case 'm':
- First += 2;
- if (consumeIf('_'))
- return parsePrefixExpr("--");
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return nullptr;
- return make<PostfixExpr>(Ex, "--");
- }
- return nullptr;
- case 'n':
- switch (First[1]) {
- case 'a':
- case 'w':
- return parseNewExpr();
- case 'e':
- First += 2;
- return parseBinaryExpr("!=");
- case 'g':
- First += 2;
- return parsePrefixExpr("-");
- case 't':
- First += 2;
- return parsePrefixExpr("!");
- case 'x':
- First += 2;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<EnclosingExpr>("noexcept (", Ex, ")");
- }
- return nullptr;
- case 'o':
- switch (First[1]) {
- case 'n':
- return parseUnresolvedName();
- case 'o':
- First += 2;
- return parseBinaryExpr("||");
- case 'r':
- First += 2;
- return parseBinaryExpr("|");
- case 'R':
- First += 2;
- return parseBinaryExpr("|=");
- }
- return nullptr;
- case 'p':
- switch (First[1]) {
- case 'm':
- First += 2;
- return parseBinaryExpr("->*");
- case 'l':
- First += 2;
- return parseBinaryExpr("+");
- case 'L':
- First += 2;
- return parseBinaryExpr("+=");
- case 'p': {
- First += 2;
- if (consumeIf('_'))
- return parsePrefixExpr("++");
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<PostfixExpr>(Ex, "++");
- }
- case 's':
- First += 2;
- return parsePrefixExpr("+");
- case 't': {
- First += 2;
- Node *L = parseExpr();
- if (L == nullptr)
- return nullptr;
- Node *R = parseExpr();
- if (R == nullptr)
- return nullptr;
- return make<MemberExpr>(L, "->", R);
- }
- }
- return nullptr;
- case 'q':
- if (First[1] == 'u') {
- First += 2;
- Node *Cond = parseExpr();
- if (Cond == nullptr)
- return nullptr;
- Node *LHS = parseExpr();
- if (LHS == nullptr)
- return nullptr;
- Node *RHS = parseExpr();
- if (RHS == nullptr)
- return nullptr;
- return make<ConditionalExpr>(Cond, LHS, RHS);
- }
- return nullptr;
- case 'r':
- switch (First[1]) {
- case 'c': {
- First += 2;
- Node *T = parseType();
- if (T == nullptr)
- return T;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<CastExpr>("reinterpret_cast", T, Ex);
- }
- case 'm':
- First += 2;
- return parseBinaryExpr("%");
- case 'M':
- First += 2;
- return parseBinaryExpr("%=");
- case 's':
- First += 2;
- return parseBinaryExpr(">>");
- case 'S':
- First += 2;
- return parseBinaryExpr(">>=");
- }
- return nullptr;
- case 's':
- switch (First[1]) {
- case 'c': {
- First += 2;
- Node *T = parseType();
- if (T == nullptr)
- return T;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<CastExpr>("static_cast", T, Ex);
- }
- case 'p': {
- First += 2;
- Node *Child = parseExpr();
- if (Child == nullptr)
- return nullptr;
- return make<ParameterPackExpansion>(Child);
- }
- case 'r':
- return parseUnresolvedName();
- case 't': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return Ty;
- return make<EnclosingExpr>("sizeof (", Ty, ")");
- }
- case 'z': {
- First += 2;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<EnclosingExpr>("sizeof (", Ex, ")");
- }
- case 'Z':
- First += 2;
- if (look() == 'T') {
- Node *R = parseTemplateParam();
- if (R == nullptr)
- return nullptr;
- return make<SizeofParamPackExpr>(R);
- } else if (look() == 'f') {
- Node *FP = parseFunctionParam();
- if (FP == nullptr)
- return nullptr;
- return make<EnclosingExpr>("sizeof... (", FP, ")");
- }
- return nullptr;
- case 'P': {
- First += 2;
- size_t ArgsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *Arg = parseTemplateArg();
- if (Arg == nullptr)
- return nullptr;
- Names.push_back(Arg);
- }
- return make<EnclosingExpr>(
- "sizeof... (", make<NodeArrayNode>(popTrailingNodeArray(ArgsBegin)),
- ")");
- }
- }
- return nullptr;
- case 't':
- switch (First[1]) {
- case 'e': {
- First += 2;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<EnclosingExpr>("typeid (", Ex, ")");
- }
- case 'i': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return Ty;
- return make<EnclosingExpr>("typeid (", Ty, ")");
- }
- case 'l': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- size_t InitsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *E = parseBracedExpr();
- if (E == nullptr)
- return nullptr;
- Names.push_back(E);
- }
- return make<InitListExpr>(Ty, popTrailingNodeArray(InitsBegin));
- }
- case 'r':
- First += 2;
- return make<NameType>("throw");
- case 'w': {
- First += 2;
- Node *Ex = parseExpr();
- if (Ex == nullptr)
- return nullptr;
- return make<ThrowExpr>(Ex);
- }
- }
- return nullptr;
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- return parseUnresolvedName();
- }
- return nullptr;
-}
-
-// <call-offset> ::= h <nv-offset> _
-// ::= v <v-offset> _
-//
-// <nv-offset> ::= <offset number>
-// # non-virtual base override
-//
-// <v-offset> ::= <offset number> _ <virtual offset number>
-// # virtual base override, with vcall offset
-bool Db::parseCallOffset() {
- // Just scan through the call offset, we never add this information into the
- // output.
- if (consumeIf('h'))
- return parseNumber(true).empty() || !consumeIf('_');
- if (consumeIf('v'))
- return parseNumber(true).empty() || !consumeIf('_') ||
- parseNumber(true).empty() || !consumeIf('_');
- return true;
-}
-
-// <special-name> ::= TV <type> # virtual table
-// ::= TT <type> # VTT structure (construction vtable index)
-// ::= TI <type> # typeinfo structure
-// ::= TS <type> # typeinfo name (null-terminated byte string)
-// ::= Tc <call-offset> <call-offset> <base encoding>
-// # base is the nominal target function of thunk
-// # first call-offset is 'this' adjustment
-// # second call-offset is result adjustment
-// ::= T <call-offset> <base encoding>
-// # base is the nominal target function of thunk
-// ::= GV <object name> # Guard variable for one-time initialization
-// # No <type>
-// ::= TW <object name> # Thread-local wrapper
-// ::= TH <object name> # Thread-local initialization
-// ::= GR <object name> _ # First temporary
-// ::= GR <object name> <seq-id> _ # Subsequent temporaries
-// extension ::= TC <first type> <number> _ <second type> # construction vtable for second-in-first
-// extension ::= GR <object name> # reference temporary for object
-Node *Db::parseSpecialName() {
- switch (look()) {
- case 'T':
- switch (look(1)) {
- // TV <type> # virtual table
- case 'V': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- return make<SpecialName>("vtable for ", Ty);
- }
- // TT <type> # VTT structure (construction vtable index)
- case 'T': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- return make<SpecialName>("VTT for ", Ty);
- }
- // TI <type> # typeinfo structure
- case 'I': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- return make<SpecialName>("typeinfo for ", Ty);
- }
- // TS <type> # typeinfo name (null-terminated byte string)
- case 'S': {
- First += 2;
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- return make<SpecialName>("typeinfo name for ", Ty);
- }
- // Tc <call-offset> <call-offset> <base encoding>
- case 'c': {
- First += 2;
- if (parseCallOffset() || parseCallOffset())
- return nullptr;
- Node *Encoding = parseEncoding();
- if (Encoding == nullptr)
- return nullptr;
- return make<SpecialName>("covariant return thunk to ", Encoding);
- }
- // extension ::= TC <first type> <number> _ <second type>
- // # construction vtable for second-in-first
- case 'C': {
- First += 2;
- Node *FirstType = parseType();
- if (FirstType == nullptr)
- return nullptr;
- if (parseNumber(true).empty() || !consumeIf('_'))
- return nullptr;
- Node *SecondType = parseType();
- if (SecondType == nullptr)
- return nullptr;
- return make<CtorVtableSpecialName>(SecondType, FirstType);
- }
- // TW <object name> # Thread-local wrapper
- case 'W': {
- First += 2;
- Node *Name = parseName();
- if (Name == nullptr)
- return nullptr;
- return make<SpecialName>("thread-local wrapper routine for ", Name);
- }
- // TH <object name> # Thread-local initialization
- case 'H': {
- First += 2;
- Node *Name = parseName();
- if (Name == nullptr)
- return nullptr;
- return make<SpecialName>("thread-local initialization routine for ", Name);
- }
- // T <call-offset> <base encoding>
- default: {
- ++First;
- bool IsVirt = look() == 'v';
- if (parseCallOffset())
- return nullptr;
- Node *BaseEncoding = parseEncoding();
- if (BaseEncoding == nullptr)
- return nullptr;
- if (IsVirt)
- return make<SpecialName>("virtual thunk to ", BaseEncoding);
- else
- return make<SpecialName>("non-virtual thunk to ", BaseEncoding);
- }
- }
- case 'G':
- switch (look(1)) {
- // GV <object name> # Guard variable for one-time initialization
- case 'V': {
- First += 2;
- Node *Name = parseName();
- if (Name == nullptr)
- return nullptr;
- return make<SpecialName>("guard variable for ", Name);
- }
- // GR <object name> # reference temporary for object
- // GR <object name> _ # First temporary
- // GR <object name> <seq-id> _ # Subsequent temporaries
- case 'R': {
- First += 2;
- Node *Name = parseName();
- if (Name == nullptr)
- return nullptr;
- size_t Count;
- bool ParsedSeqId = !parseSeqId(&Count);
- if (!consumeIf('_') && ParsedSeqId)
- return nullptr;
- return make<SpecialName>("reference temporary for ", Name);
- }
- }
- }
- return nullptr;
-}
-
-// <encoding> ::= <function name> <bare-function-type>
-// ::= <data name>
-// ::= <special-name>
-Node *Db::parseEncoding() {
- if (look() == 'G' || look() == 'T')
- return parseSpecialName();
-
- auto IsEndOfEncoding = [&] {
- // The set of chars that can potentially follow an <encoding> (none of which
- // can start a <type>). Enumerating these allows us to avoid speculative
- // parsing.
- return numLeft() == 0 || look() == 'E' || look() == '.' || look() == '_';
- };
-
- NameState NameInfo(this);
- Node *Name = parseName(&NameInfo);
- if (Name == nullptr)
- return nullptr;
-
- if (resolveForwardTemplateRefs(NameInfo))
- return nullptr;
-
- if (IsEndOfEncoding())
- return Name;
-
- Node *Attrs = nullptr;
- if (consumeIf("Ua9enable_ifI")) {
- size_t BeforeArgs = Names.size();
- while (!consumeIf('E')) {
- Node *Arg = parseTemplateArg();
- if (Arg == nullptr)
- return nullptr;
- Names.push_back(Arg);
- }
- Attrs = make<EnableIfAttr>(popTrailingNodeArray(BeforeArgs));
- }
-
- Node *ReturnType = nullptr;
- if (!NameInfo.CtorDtorConversion && NameInfo.EndsWithTemplateArgs) {
- ReturnType = parseType();
- if (ReturnType == nullptr)
- return nullptr;
- }
-
- if (consumeIf('v'))
- return make<FunctionEncoding>(ReturnType, Name, NodeArray(),
- Attrs, NameInfo.CVQualifiers,
- NameInfo.ReferenceQualifier);
-
- size_t ParamsBegin = Names.size();
- do {
- Node *Ty = parseType();
- if (Ty == nullptr)
- return nullptr;
- Names.push_back(Ty);
- } while (!IsEndOfEncoding());
-
- return make<FunctionEncoding>(ReturnType, Name,
- popTrailingNodeArray(ParamsBegin),
- Attrs, NameInfo.CVQualifiers,
- NameInfo.ReferenceQualifier);
-}
-
-template <class Float>
-struct FloatData;
-
-template <>
-struct FloatData<float>
-{
- static const size_t mangled_size = 8;
- static const size_t max_demangled_size = 24;
- static constexpr const char* spec = "%af";
-};
-
-constexpr const char* FloatData<float>::spec;
-
-template <>
-struct FloatData<double>
-{
- static const size_t mangled_size = 16;
- static const size_t max_demangled_size = 32;
- static constexpr const char* spec = "%a";
-};
-
-constexpr const char* FloatData<double>::spec;
-
-template <>
-struct FloatData<long double>
-{
-#if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \
- defined(__wasm__)
- static const size_t mangled_size = 32;
-#elif defined(__arm__) || defined(__mips__) || defined(__hexagon__)
- static const size_t mangled_size = 16;
-#else
- static const size_t mangled_size = 20; // May need to be adjusted to 16 or 24 on other platforms
-#endif
- static const size_t max_demangled_size = 40;
- static constexpr const char *spec = "%LaL";
-};
-
-constexpr const char *FloatData<long double>::spec;
-
-template <class Float> Node *Db::parseFloatingLiteral() {
- const size_t N = FloatData<Float>::mangled_size;
- if (numLeft() <= N)
- return nullptr;
- StringView Data(First, First + N);
- for (char C : Data)
- if (!std::isxdigit(C))
- return nullptr;
- First += N;
- if (!consumeIf('E'))
- return nullptr;
- return make<FloatExpr<Float>>(Data);
-}
-
-// <seq-id> ::= <0-9A-Z>+
-bool Db::parseSeqId(size_t *Out) {
- if (!(look() >= '0' && look() <= '9') &&
- !(look() >= 'A' && look() <= 'Z'))
- return true;
-
- size_t Id = 0;
- while (true) {
- if (look() >= '0' && look() <= '9') {
- Id *= 36;
- Id += static_cast<size_t>(look() - '0');
- } else if (look() >= 'A' && look() <= 'Z') {
- Id *= 36;
- Id += static_cast<size_t>(look() - 'A') + 10;
- } else {
- *Out = Id;
- return false;
- }
- ++First;
- }
-}
-
-// <substitution> ::= S <seq-id> _
-// ::= S_
-// <substitution> ::= Sa # ::std::allocator
-// <substitution> ::= Sb # ::std::basic_string
-// <substitution> ::= Ss # ::std::basic_string < char,
-// ::std::char_traits<char>,
-// ::std::allocator<char> >
-// <substitution> ::= Si # ::std::basic_istream<char, std::char_traits<char> >
-// <substitution> ::= So # ::std::basic_ostream<char, std::char_traits<char> >
-// <substitution> ::= Sd # ::std::basic_iostream<char, std::char_traits<char> >
-Node *Db::parseSubstitution() {
- if (!consumeIf('S'))
- return nullptr;
-
- if (std::islower(look())) {
- Node *SpecialSub;
- switch (look()) {
- case 'a':
- ++First;
- SpecialSub = make<SpecialSubstitution>(SpecialSubKind::allocator);
- break;
- case 'b':
- ++First;
- SpecialSub = make<SpecialSubstitution>(SpecialSubKind::basic_string);
- break;
- case 's':
- ++First;
- SpecialSub = make<SpecialSubstitution>(SpecialSubKind::string);
- break;
- case 'i':
- ++First;
- SpecialSub = make<SpecialSubstitution>(SpecialSubKind::istream);
- break;
- case 'o':
- ++First;
- SpecialSub = make<SpecialSubstitution>(SpecialSubKind::ostream);
- break;
- case 'd':
- ++First;
- SpecialSub = make<SpecialSubstitution>(SpecialSubKind::iostream);
- break;
- default:
- return nullptr;
- }
- // Itanium C++ ABI 5.1.2: If a name that would use a built-in <substitution>
- // has ABI tags, the tags are appended to the substitution; the result is a
- // substitutable component.
- Node *WithTags = parseAbiTags(SpecialSub);
- if (WithTags != SpecialSub) {
- Subs.push_back(WithTags);
- SpecialSub = WithTags;
- }
- return SpecialSub;
- }
-
- // ::= S_
- if (consumeIf('_')) {
- if (Subs.empty())
- return nullptr;
- return Subs[0];
- }
-
- // ::= S <seq-id> _
- size_t Index = 0;
- if (parseSeqId(&Index))
- return nullptr;
- ++Index;
- if (!consumeIf('_') || Index >= Subs.size())
- return nullptr;
- return Subs[Index];
-}
-
-// <template-param> ::= T_ # first template parameter
-// ::= T <parameter-2 non-negative number> _
-Node *Db::parseTemplateParam() {
- if (!consumeIf('T'))
- return nullptr;
-
- size_t Index = 0;
- if (!consumeIf('_')) {
- if (parsePositiveInteger(&Index))
- return nullptr;
- ++Index;
- if (!consumeIf('_'))
- return nullptr;
- }
-
- // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter list
- // are mangled as the corresponding artificial template type parameter.
- if (ParsingLambdaParams)
- return make<NameType>("auto");
-
- // If we're in a context where this <template-param> refers to a
- // <template-arg> further ahead in the mangled name (currently just conversion
- // operator types), then we should only look it up in the right context.
- if (PermitForwardTemplateReferences) {
- ForwardTemplateRefs.push_back(make<ForwardTemplateReference>(Index));
- return ForwardTemplateRefs.back();
- }
-
- if (Index >= TemplateParams.size())
- return nullptr;
- return TemplateParams[Index];
-}
-
-// <template-arg> ::= <type> # type or template
-// ::= X <expression> E # expression
-// ::= <expr-primary> # simple expressions
-// ::= J <template-arg>* E # argument pack
-// ::= LZ <encoding> E # extension
-Node *Db::parseTemplateArg() {
- switch (look()) {
- case 'X': {
- ++First;
- Node *Arg = parseExpr();
- if (Arg == nullptr || !consumeIf('E'))
- return nullptr;
- return Arg;
- }
- case 'J': {
- ++First;
- size_t ArgsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *Arg = parseTemplateArg();
- if (Arg == nullptr)
- return nullptr;
- Names.push_back(Arg);
- }
- NodeArray Args = popTrailingNodeArray(ArgsBegin);
- return make<TemplateArgumentPack>(Args);
- }
- case 'L': {
- // ::= LZ <encoding> E # extension
- if (look(1) == 'Z') {
- First += 2;
- Node *Arg = parseEncoding();
- if (Arg == nullptr || !consumeIf('E'))
- return nullptr;
- return Arg;
- }
- // ::= <expr-primary> # simple expressions
- return parseExprPrimary();
- }
- default:
- return parseType();
- }
-}
-
-// <template-args> ::= I <template-arg>* E
-// extension, the abi says <template-arg>+
-Node *Db::parseTemplateArgs(bool TagTemplates) {
- if (!consumeIf('I'))
- return nullptr;
-
- // <template-params> refer to the innermost <template-args>. Clear out any
- // outer args that we may have inserted into TemplateParams.
- if (TagTemplates)
- TemplateParams.clear();
-
- size_t ArgsBegin = Names.size();
- while (!consumeIf('E')) {
- if (TagTemplates) {
- auto OldParams = std::move(TemplateParams);
- Node *Arg = parseTemplateArg();
- TemplateParams = std::move(OldParams);
- if (Arg == nullptr)
- return nullptr;
- Names.push_back(Arg);
- Node *TableEntry = Arg;
- if (Arg->getKind() == Node::KTemplateArgumentPack) {
- TableEntry = make<ParameterPack>(
- static_cast<TemplateArgumentPack*>(TableEntry)->getElements());
- }
- TemplateParams.push_back(TableEntry);
- } else {
- Node *Arg = parseTemplateArg();
- if (Arg == nullptr)
- return nullptr;
- Names.push_back(Arg);
- }
- }
- return make<TemplateArgs>(popTrailingNodeArray(ArgsBegin));
-}
-
-// <discriminator> := _ <non-negative number> # when number < 10
-// := __ <non-negative number> _ # when number >= 10
-// extension := decimal-digit+ # at the end of string
-
-const char*
-parse_discriminator(const char* first, const char* last)
-{
- // parse but ignore discriminator
- if (first != last)
- {
- if (*first == '_')
- {
- const char* t1 = first+1;
- if (t1 != last)
- {
- if (std::isdigit(*t1))
- first = t1+1;
- else if (*t1 == '_')
- {
- for (++t1; t1 != last && std::isdigit(*t1); ++t1)
- ;
- if (t1 != last && *t1 == '_')
- first = t1 + 1;
- }
- }
- }
- else if (std::isdigit(*first))
- {
- const char* t1 = first+1;
- for (; t1 != last && std::isdigit(*t1); ++t1)
- ;
- if (t1 == last)
- first = last;
- }
- }
- return first;
-}
-
-// <mangled-name> ::= _Z <encoding>
-// ::= <type>
-// extension ::= ___Z <encoding> _block_invoke
-// extension ::= ___Z <encoding> _block_invoke<decimal-digit>+
-// extension ::= ___Z <encoding> _block_invoke_<decimal-digit>+
-Node *Db::parse() {
- if (consumeIf("_Z")) {
- Node *Encoding = parseEncoding();
- if (Encoding == nullptr)
- return nullptr;
- if (look() == '.') {
- Encoding = make<DotSuffix>(Encoding, StringView(First, Last));
- First = Last;
- }
- if (numLeft() != 0)
- return nullptr;
- return Encoding;
- }
-
- if (consumeIf("___Z")) {
- Node *Encoding = parseEncoding();
- if (Encoding == nullptr || !consumeIf("_block_invoke"))
- return nullptr;
- bool RequireNumber = consumeIf('_');
- if (parseNumber().empty() && RequireNumber)
- return nullptr;
- if (numLeft() != 0)
- return nullptr;
- return make<SpecialName>("invocation function for block in ", Encoding);
- }
-
- Node *Ty = parseType();
- if (numLeft() != 0)
- return nullptr;
- return Ty;
-}
-
-bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
- size_t InitSize) {
- size_t BufferSize;
- if (Buf == nullptr) {
- Buf = static_cast<char *>(std::malloc(InitSize));
- if (Buf == nullptr)
- return true;
- BufferSize = InitSize;
- } else
- BufferSize = *N;
-
- S.reset(Buf, BufferSize);
- return false;
-}
+//===----------------------------------------------------------------------===//
+// Code beyond this point should not be synchronized with libc++abi.
+//===----------------------------------------------------------------------===//
-} // unnamed namespace
+using Demangler = itanium_demangle::ManglingParser<DefaultAllocator>;
char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
size_t *N, int *Status) {
@@ -4961,14 +333,14 @@ char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
}
int InternalStatus = demangle_success;
- Db Parser(MangledName, MangledName + std::strlen(MangledName));
+ Demangler Parser(MangledName, MangledName + std::strlen(MangledName));
OutputStream S;
Node *AST = Parser.parse();
if (AST == nullptr)
InternalStatus = demangle_invalid_mangled_name;
- else if (initializeOutputStream(Buf, N, S, 1024))
+ else if (!initializeOutputStream(Buf, N, S, 1024))
InternalStatus = demangle_memory_alloc_failure;
else {
assert(Parser.ForwardTemplateRefs.empty());
@@ -4984,13 +356,11 @@ char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
return InternalStatus == demangle_success ? Buf : nullptr;
}
-namespace llvm {
-
ItaniumPartialDemangler::ItaniumPartialDemangler()
- : RootNode(nullptr), Context(new Db{nullptr, nullptr}) {}
+ : RootNode(nullptr), Context(new Demangler{nullptr, nullptr}) {}
ItaniumPartialDemangler::~ItaniumPartialDemangler() {
- delete static_cast<Db *>(Context);
+ delete static_cast<Demangler *>(Context);
}
ItaniumPartialDemangler::ItaniumPartialDemangler(
@@ -5008,16 +378,16 @@ operator=(ItaniumPartialDemangler &&Other) {
// Demangle MangledName into an AST, storing it into this->RootNode.
bool ItaniumPartialDemangler::partialDemangle(const char *MangledName) {
- Db *Parser = static_cast<Db *>(Context);
+ Demangler *Parser = static_cast<Demangler *>(Context);
size_t Len = std::strlen(MangledName);
Parser->reset(MangledName, MangledName + Len);
RootNode = Parser->parse();
return RootNode == nullptr;
}
-static char *printNode(Node *RootNode, char *Buf, size_t *N) {
+static char *printNode(const Node *RootNode, char *Buf, size_t *N) {
OutputStream S;
- if (initializeOutputStream(Buf, N, S, 128))
+ if (!initializeOutputStream(Buf, N, S, 128))
return nullptr;
RootNode->print(S);
S += '\0';
@@ -5030,24 +400,24 @@ char *ItaniumPartialDemangler::getFunctionBaseName(char *Buf, size_t *N) const {
if (!isFunction())
return nullptr;
- Node *Name = static_cast<FunctionEncoding *>(RootNode)->getName();
+ const Node *Name = static_cast<const FunctionEncoding *>(RootNode)->getName();
while (true) {
switch (Name->getKind()) {
case Node::KAbiTagAttr:
- Name = static_cast<AbiTagAttr *>(Name)->Base;
+ Name = static_cast<const AbiTagAttr *>(Name)->Base;
continue;
case Node::KStdQualifiedName:
- Name = static_cast<StdQualifiedName *>(Name)->Child;
+ Name = static_cast<const StdQualifiedName *>(Name)->Child;
continue;
case Node::KNestedName:
- Name = static_cast<NestedName *>(Name)->Name;
+ Name = static_cast<const NestedName *>(Name)->Name;
continue;
case Node::KLocalName:
- Name = static_cast<LocalName *>(Name)->Entity;
+ Name = static_cast<const LocalName *>(Name)->Entity;
continue;
case Node::KNameWithTemplateArgs:
- Name = static_cast<NameWithTemplateArgs *>(Name)->Name;
+ Name = static_cast<const NameWithTemplateArgs *>(Name)->Name;
continue;
default:
return printNode(Name, Buf, N);
@@ -5059,20 +429,20 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
size_t *N) const {
if (!isFunction())
return nullptr;
- Node *Name = static_cast<FunctionEncoding *>(RootNode)->getName();
+ const Node *Name = static_cast<const FunctionEncoding *>(RootNode)->getName();
OutputStream S;
- if (initializeOutputStream(Buf, N, S, 128))
+ if (!initializeOutputStream(Buf, N, S, 128))
return nullptr;
KeepGoingLocalFunction:
while (true) {
if (Name->getKind() == Node::KAbiTagAttr) {
- Name = static_cast<AbiTagAttr *>(Name)->Base;
+ Name = static_cast<const AbiTagAttr *>(Name)->Base;
continue;
}
if (Name->getKind() == Node::KNameWithTemplateArgs) {
- Name = static_cast<NameWithTemplateArgs *>(Name)->Name;
+ Name = static_cast<const NameWithTemplateArgs *>(Name)->Name;
continue;
}
break;
@@ -5083,10 +453,10 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
S += "std";
break;
case Node::KNestedName:
- static_cast<NestedName *>(Name)->Qual->print(S);
+ static_cast<const NestedName *>(Name)->Qual->print(S);
break;
case Node::KLocalName: {
- auto *LN = static_cast<LocalName *>(Name);
+ auto *LN = static_cast<const LocalName *>(Name);
LN->Encoding->print(S);
S += "::";
Name = LN->Entity;
@@ -5115,7 +485,7 @@ char *ItaniumPartialDemangler::getFunctionParameters(char *Buf,
NodeArray Params = static_cast<FunctionEncoding *>(RootNode)->getParams();
OutputStream S;
- if (initializeOutputStream(Buf, N, S, 128))
+ if (!initializeOutputStream(Buf, N, S, 128))
return nullptr;
S += '(';
@@ -5133,10 +503,11 @@ char *ItaniumPartialDemangler::getFunctionReturnType(
return nullptr;
OutputStream S;
- if (initializeOutputStream(Buf, N, S, 128))
+ if (!initializeOutputStream(Buf, N, S, 128))
return nullptr;
- if (Node *Ret = static_cast<FunctionEncoding *>(RootNode)->getReturnType())
+ if (const Node *Ret =
+ static_cast<const FunctionEncoding *>(RootNode)->getReturnType())
Ret->print(S);
S += '\0';
@@ -5154,12 +525,12 @@ bool ItaniumPartialDemangler::hasFunctionQualifiers() const {
assert(RootNode != nullptr && "must call partialDemangle()");
if (!isFunction())
return false;
- auto *E = static_cast<FunctionEncoding *>(RootNode);
+ auto *E = static_cast<const FunctionEncoding *>(RootNode);
return E->getCVQuals() != QualNone || E->getRefQual() != FrefQualNone;
}
bool ItaniumPartialDemangler::isCtorOrDtor() const {
- Node *N = static_cast<Node *>(RootNode);
+ const Node *N = static_cast<const Node *>(RootNode);
while (N) {
switch (N->getKind()) {
default:
@@ -5168,22 +539,22 @@ bool ItaniumPartialDemangler::isCtorOrDtor() const {
return true;
case Node::KAbiTagAttr:
- N = static_cast<AbiTagAttr *>(N)->Base;
+ N = static_cast<const AbiTagAttr *>(N)->Base;
break;
case Node::KFunctionEncoding:
- N = static_cast<FunctionEncoding *>(N)->getName();
+ N = static_cast<const FunctionEncoding *>(N)->getName();
break;
case Node::KLocalName:
- N = static_cast<LocalName *>(N)->Entity;
+ N = static_cast<const LocalName *>(N)->Entity;
break;
case Node::KNameWithTemplateArgs:
- N = static_cast<NameWithTemplateArgs *>(N)->Name;
+ N = static_cast<const NameWithTemplateArgs *>(N)->Name;
break;
case Node::KNestedName:
- N = static_cast<NestedName *>(N)->Name;
+ N = static_cast<const NestedName *>(N)->Name;
break;
case Node::KStdQualifiedName:
- N = static_cast<StdQualifiedName *>(N)->Child;
+ N = static_cast<const StdQualifiedName *>(N)->Child;
break;
}
}
@@ -5192,17 +563,16 @@ bool ItaniumPartialDemangler::isCtorOrDtor() const {
bool ItaniumPartialDemangler::isFunction() const {
assert(RootNode != nullptr && "must call partialDemangle()");
- return static_cast<Node *>(RootNode)->getKind() == Node::KFunctionEncoding;
+ return static_cast<const Node *>(RootNode)->getKind() ==
+ Node::KFunctionEncoding;
}
bool ItaniumPartialDemangler::isSpecialName() const {
assert(RootNode != nullptr && "must call partialDemangle()");
- auto K = static_cast<Node *>(RootNode)->getKind();
+ auto K = static_cast<const Node *>(RootNode)->getKind();
return K == Node::KSpecialName || K == Node::KCtorVtableSpecialName;
}
bool ItaniumPartialDemangler::isData() const {
return !isFunction() && !isSpecialName();
}
-
-}
diff --git a/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp b/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 3eac87d61011..51ffa0bff7f3 100644
--- a/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -14,334 +14,34 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Demangle/MicrosoftDemangle.h"
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/MicrosoftDemangleNodes.h"
-#include "Compiler.h"
-#include "StringView.h"
-#include "Utility.h"
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/Utility.h"
+#include <array>
#include <cctype>
+#include <cstdio>
#include <tuple>
-// This memory allocator is extremely fast, but it doesn't call dtors
-// for allocated objects. That means you can't use STL containers
-// (such as std::vector) with this allocator. But it pays off --
-// the demangler is 3x faster with this allocator compared to one with
-// STL containers.
-namespace {
- constexpr size_t AllocUnit = 4096;
-
-class ArenaAllocator {
- struct AllocatorNode {
- uint8_t *Buf = nullptr;
- size_t Used = 0;
- size_t Capacity = 0;
- AllocatorNode *Next = nullptr;
- };
-
- void addNode(size_t Capacity) {
- AllocatorNode *NewHead = new AllocatorNode;
- NewHead->Buf = new uint8_t[Capacity];
- NewHead->Next = Head;
- NewHead->Capacity = Capacity;
- Head = NewHead;
- NewHead->Used = 0;
- }
-
-public:
- ArenaAllocator() { addNode(AllocUnit); }
-
- ~ArenaAllocator() {
- while (Head) {
- assert(Head->Buf);
- delete[] Head->Buf;
- AllocatorNode *Next = Head->Next;
- delete Head;
- Head = Next;
- }
- }
-
- char *allocUnalignedBuffer(size_t Length) {
- uint8_t *Buf = Head->Buf + Head->Used;
-
- Head->Used += Length;
- if (Head->Used > Head->Capacity) {
- // It's possible we need a buffer which is larger than our default unit
- // size, so we need to be careful to add a node with capacity that is at
- // least as large as what we need.
- addNode(std::max(AllocUnit, Length));
- Head->Used = Length;
- Buf = Head->Buf;
- }
-
- return reinterpret_cast<char *>(Buf);
- }
-
- template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
-
- size_t Size = sizeof(T);
- assert(Head && Head->Buf);
-
- size_t P = (size_t)Head->Buf + Head->Used;
- uintptr_t AlignedP =
- (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
- uint8_t *PP = (uint8_t *)AlignedP;
- size_t Adjustment = AlignedP - P;
-
- Head->Used += Size + Adjustment;
- if (Head->Used < Head->Capacity)
- return new (PP) T(std::forward<Args>(ConstructorArgs)...);
-
- addNode(AllocUnit);
- Head->Used = Size;
- return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
- }
-
-private:
- AllocatorNode *Head = nullptr;
-};
-} // namespace
+using namespace llvm;
+using namespace ms_demangle;
static bool startsWithDigit(StringView S) {
return !S.empty() && std::isdigit(S.front());
}
-// Writes a space if the last token does not end with a punctuation.
-static void outputSpaceIfNecessary(OutputStream &OS) {
- if (OS.empty())
- return;
-
- char C = OS.back();
- if (isalnum(C) || C == '>')
- OS << " ";
-}
-
-// Storage classes
-enum Qualifiers : uint8_t {
- Q_None = 0,
- Q_Const = 1 << 0,
- Q_Volatile = 1 << 1,
- Q_Far = 1 << 2,
- Q_Huge = 1 << 3,
- Q_Unaligned = 1 << 4,
- Q_Restrict = 1 << 5,
- Q_Pointer64 = 1 << 6
-};
-
-enum class StorageClass : uint8_t {
- None,
- PrivateStatic,
- ProtectedStatic,
- PublicStatic,
- Global,
- FunctionLocalStatic
-};
-
-enum class QualifierMangleMode { Drop, Mangle, Result };
-
-enum class PointerAffinity { Pointer, Reference, RValueReference };
-
-// Calling conventions
-enum class CallingConv : uint8_t {
- None,
- Cdecl,
- Pascal,
- Thiscall,
- Stdcall,
- Fastcall,
- Clrcall,
- Eabi,
- Vectorcall,
- Regcall,
-};
-
-enum class ReferenceKind : uint8_t { None, LValueRef, RValueRef };
-
-// Types
-enum class PrimTy : uint8_t {
- Unknown,
- None,
- Function,
- Ptr,
- MemberPtr,
- Array,
-
- Struct,
- Union,
- Class,
- Enum,
-
- Void,
- Bool,
- Char,
- Schar,
- Uchar,
- Char16,
- Char32,
- Short,
- Ushort,
- Int,
- Uint,
- Long,
- Ulong,
- Int64,
- Uint64,
- Wchar,
- Float,
- Double,
- Ldouble,
- Nullptr
-};
-
-// Function classes
-enum FuncClass : uint8_t {
- Public = 1 << 0,
- Protected = 1 << 1,
- Private = 1 << 2,
- Global = 1 << 3,
- Static = 1 << 4,
- Virtual = 1 << 5,
- Far = 1 << 6,
-};
-
-namespace {
-
-struct Type;
-struct Name;
-
-struct FunctionParams {
- bool IsVariadic = false;
- Type *Current = nullptr;
-
- FunctionParams *Next = nullptr;
+struct NodeList {
+ Node *N = nullptr;
+ NodeList *Next = nullptr;
};
-struct TemplateParams {
- bool IsTemplateTemplate = false;
- bool IsAliasTemplate = false;
-
- // Type can be null if this is a template template parameter. In that case
- // only Name will be valid.
- Type *ParamType = nullptr;
-
- // Name can be valid if this is a template template parameter (see above) or
- // this is a function declaration (e.g. foo<&SomeFunc>). In the latter case
- // Name contains the name of the function and Type contains the signature.
- Name *ParamName = nullptr;
-
- TemplateParams *Next = nullptr;
-};
-
-// The type class. Mangled symbols are first parsed and converted to
-// this type and then converted to string.
-struct Type {
- virtual ~Type() {}
-
- virtual Type *clone(ArenaAllocator &Arena) const;
-
- // Write the "first half" of a given type. This is a static functions to
- // give the code a chance to do processing that is common to a subset of
- // subclasses
- static void outputPre(OutputStream &OS, Type &Ty);
-
- // Write the "second half" of a given type. This is a static functions to
- // give the code a chance to do processing that is common to a subset of
- // subclasses
- static void outputPost(OutputStream &OS, Type &Ty);
-
- virtual void outputPre(OutputStream &OS);
- virtual void outputPost(OutputStream &OS);
-
- // Primitive type such as Int.
- PrimTy Prim = PrimTy::Unknown;
-
- Qualifiers Quals = Q_None;
- StorageClass Storage = StorageClass::None; // storage class
-};
-
-// Represents an identifier which may be a template.
-struct Name {
- // Name read from an MangledName string.
- StringView Str;
-
- // Overloaded operators are represented as special BackReferences in mangled
- // symbols. If this is an operator name, "op" has an operator name (e.g.
- // ">>"). Otherwise, empty.
- StringView Operator;
-
- // Template parameters. Null if not a template.
- TemplateParams *TParams = nullptr;
-
- // Nested BackReferences (e.g. "A::B::C") are represented as a linked list.
- Name *Next = nullptr;
-};
-
-struct PointerType : public Type {
- Type *clone(ArenaAllocator &Arena) const override;
- void outputPre(OutputStream &OS) override;
- void outputPost(OutputStream &OS) override;
-
- PointerAffinity Affinity;
-
- // Represents a type X in "a pointer to X", "a reference to X",
- // "an array of X", or "a function returning X".
- Type *Pointee = nullptr;
-};
-
-struct MemberPointerType : public Type {
- Type *clone(ArenaAllocator &Arena) const override;
- void outputPre(OutputStream &OS) override;
- void outputPost(OutputStream &OS) override;
-
- Name *MemberName = nullptr;
-
- // Represents a type X in "a pointer to X", "a reference to X",
- // "an array of X", or "a function returning X".
- Type *Pointee = nullptr;
-};
-
-struct FunctionType : public Type {
- Type *clone(ArenaAllocator &Arena) const override;
- void outputPre(OutputStream &OS) override;
- void outputPost(OutputStream &OS) override;
-
- // True if this FunctionType instance is the Pointee of a PointerType or
- // MemberPointerType.
- bool IsFunctionPointer = false;
-
- Type *ReturnType = nullptr;
- // If this is a reference, the type of reference.
- ReferenceKind RefKind;
-
- CallingConv CallConvention;
- FuncClass FunctionClass;
-
- FunctionParams Params;
-};
-
-struct UdtType : public Type {
- Type *clone(ArenaAllocator &Arena) const override;
- void outputPre(OutputStream &OS) override;
-
- Name *UdtName = nullptr;
-};
-
-struct ArrayType : public Type {
- Type *clone(ArenaAllocator &Arena) const override;
- void outputPre(OutputStream &OS) override;
- void outputPost(OutputStream &OS) override;
-
- // Either NextDimension or ElementType will be valid.
- ArrayType *NextDimension = nullptr;
- uint32_t ArrayDimension = 0;
-
- Type *ElementType = nullptr;
-};
-
-} // namespace
-
-static bool isMemberPointer(StringView MangledName) {
+static bool isMemberPointer(StringView MangledName, bool &Error) {
+ Error = false;
switch (MangledName.popFront()) {
case '$':
// This is probably an rvalue reference (e.g. $$Q), and you cannot have an
@@ -359,7 +59,8 @@ static bool isMemberPointer(StringView MangledName) {
// what.
break;
default:
- assert(false && "Ty is not a pointer type!");
+ Error = true;
+ return false;
}
// If it starts with a number, then 6 indicates a non-member function
@@ -390,45 +91,46 @@ static bool isMemberPointer(StringView MangledName) {
case 'T':
return true;
default:
- assert(false);
+ Error = true;
+ return false;
}
- return false;
}
-static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
- outputSpaceIfNecessary(OS);
-
- switch (CC) {
- case CallingConv::Cdecl:
- OS << "__cdecl";
- break;
- case CallingConv::Fastcall:
- OS << "__fastcall";
- break;
- case CallingConv::Pascal:
- OS << "__pascal";
- break;
- case CallingConv::Regcall:
- OS << "__regcall";
- break;
- case CallingConv::Stdcall:
- OS << "__stdcall";
- break;
- case CallingConv::Thiscall:
- OS << "__thiscall";
- break;
- case CallingConv::Eabi:
- OS << "__eabi";
- break;
- case CallingConv::Vectorcall:
- OS << "__vectorcall";
- break;
- case CallingConv::Clrcall:
- OS << "__clrcall";
- break;
- default:
- break;
- }
+static SpecialIntrinsicKind
+consumeSpecialIntrinsicKind(StringView &MangledName) {
+ if (MangledName.consumeFront("?_7"))
+ return SpecialIntrinsicKind::Vftable;
+ if (MangledName.consumeFront("?_8"))
+ return SpecialIntrinsicKind::Vbtable;
+ if (MangledName.consumeFront("?_9"))
+ return SpecialIntrinsicKind::VcallThunk;
+ if (MangledName.consumeFront("?_A"))
+ return SpecialIntrinsicKind::Typeof;
+ if (MangledName.consumeFront("?_B"))
+ return SpecialIntrinsicKind::LocalStaticGuard;
+ if (MangledName.consumeFront("?_C"))
+ return SpecialIntrinsicKind::StringLiteralSymbol;
+ if (MangledName.consumeFront("?_P"))
+ return SpecialIntrinsicKind::UdtReturning;
+ if (MangledName.consumeFront("?_R0"))
+ return SpecialIntrinsicKind::RttiTypeDescriptor;
+ if (MangledName.consumeFront("?_R1"))
+ return SpecialIntrinsicKind::RttiBaseClassDescriptor;
+ if (MangledName.consumeFront("?_R2"))
+ return SpecialIntrinsicKind::RttiBaseClassArray;
+ if (MangledName.consumeFront("?_R3"))
+ return SpecialIntrinsicKind::RttiClassHierarchyDescriptor;
+ if (MangledName.consumeFront("?_R4"))
+ return SpecialIntrinsicKind::RttiCompleteObjLocator;
+ if (MangledName.consumeFront("?_S"))
+ return SpecialIntrinsicKind::LocalVftable;
+ if (MangledName.consumeFront("?__E"))
+ return SpecialIntrinsicKind::DynamicInitializer;
+ if (MangledName.consumeFront("?__F"))
+ return SpecialIntrinsicKind::DynamicAtexitDestructor;
+ if (MangledName.consumeFront("?__J"))
+ return SpecialIntrinsicKind::LocalStaticThreadGuard;
+ return SpecialIntrinsicKind::None;
}
static bool startsWithLocalScopePattern(StringView S) {
@@ -472,519 +174,575 @@ static bool startsWithLocalScopePattern(StringView S) {
return true;
}
-static void outputName(OutputStream &OS, const Name *TheName);
-
-// Write a function or template parameter list.
-static void outputParameterList(OutputStream &OS,
- const FunctionParams &Params) {
- if (!Params.Current) {
- OS << "void";
- return;
- }
-
- const FunctionParams *Head = &Params;
- while (Head) {
- Type::outputPre(OS, *Head->Current);
- Type::outputPost(OS, *Head->Current);
-
- Head = Head->Next;
-
- if (Head)
- OS << ", ";
+static bool isTagType(StringView S) {
+ switch (S.front()) {
+ case 'T': // union
+ case 'U': // struct
+ case 'V': // class
+ case 'W': // enum
+ return true;
}
+ return false;
}
-static void outputParameterList(OutputStream &OS,
- const TemplateParams &Params) {
- if (!Params.ParamType && !Params.ParamName) {
- OS << "<>";
- return;
- }
-
- OS << "<";
- const TemplateParams *Head = &Params;
- while (Head) {
- // Type can be null if this is a template template parameter,
- // and Name can be null if this is a simple type.
-
- if (Head->ParamType && Head->ParamName) {
- // Function pointer.
- OS << "&";
- Type::outputPre(OS, *Head->ParamType);
- outputName(OS, Head->ParamName);
- Type::outputPost(OS, *Head->ParamType);
- } else if (Head->ParamType) {
- // simple type.
- Type::outputPre(OS, *Head->ParamType);
- Type::outputPost(OS, *Head->ParamType);
- } else {
- // Template alias.
- outputName(OS, Head->ParamName);
- }
+static bool isCustomType(StringView S) { return S[0] == '?'; }
- Head = Head->Next;
+static bool isPointerType(StringView S) {
+ if (S.startsWith("$$Q")) // foo &&
+ return true;
- if (Head)
- OS << ", ";
+ switch (S.front()) {
+ case 'A': // foo &
+ case 'P': // foo *
+ case 'Q': // foo *const
+ case 'R': // foo *volatile
+ case 'S': // foo *const volatile
+ return true;
}
- OS << ">";
+ return false;
}
-static void outputName(OutputStream &OS, const Name *TheName) {
- if (!TheName)
- return;
-
- outputSpaceIfNecessary(OS);
-
- const Name *Previous = nullptr;
- // Print out namespaces or outer class BackReferences.
- for (; TheName->Next; TheName = TheName->Next) {
- Previous = TheName;
- OS << TheName->Str;
- if (TheName->TParams)
- outputParameterList(OS, *TheName->TParams);
- OS << "::";
- }
-
- // Print out a regular name.
- if (TheName->Operator.empty()) {
- OS << TheName->Str;
- if (TheName->TParams)
- outputParameterList(OS, *TheName->TParams);
- return;
- }
-
- // Print out ctor or dtor.
- if (TheName->Operator == "dtor")
- OS << "~";
-
- if (TheName->Operator == "ctor" || TheName->Operator == "dtor") {
- OS << Previous->Str;
- if (Previous->TParams)
- outputParameterList(OS, *Previous->TParams);
- return;
- }
+static bool isArrayType(StringView S) { return S[0] == 'Y'; }
- // Print out an overloaded operator.
- if (!TheName->Str.empty())
- OS << TheName->Str << "::";
- OS << "operator" << TheName->Operator;
+static bool isFunctionType(StringView S) {
+ return S.startsWith("$$A8@@") || S.startsWith("$$A6");
}
-namespace {
-
-Type *Type::clone(ArenaAllocator &Arena) const {
- return Arena.alloc<Type>(*this);
+static FunctionRefQualifier
+demangleFunctionRefQualifier(StringView &MangledName) {
+ if (MangledName.consumeFront('G'))
+ return FunctionRefQualifier::Reference;
+ else if (MangledName.consumeFront('H'))
+ return FunctionRefQualifier::RValueReference;
+ return FunctionRefQualifier::None;
}
-// Write the "first half" of a given type.
-void Type::outputPre(OutputStream &OS, Type &Ty) {
- // Function types require custom handling of const and static so we
- // handle them separately. All other types use the same decoration
- // for these modifiers, so handle them here in common code.
- if (Ty.Prim == PrimTy::Function) {
- Ty.outputPre(OS);
- return;
- }
+static std::pair<Qualifiers, PointerAffinity>
+demanglePointerCVQualifiers(StringView &MangledName) {
+ if (MangledName.consumeFront("$$Q"))
+ return std::make_pair(Q_None, PointerAffinity::RValueReference);
- switch (Ty.Storage) {
- case StorageClass::PrivateStatic:
- case StorageClass::PublicStatic:
- case StorageClass::ProtectedStatic:
- OS << "static ";
+ switch (MangledName.popFront()) {
+ case 'A':
+ return std::make_pair(Q_None, PointerAffinity::Reference);
+ case 'P':
+ return std::make_pair(Q_None, PointerAffinity::Pointer);
+ case 'Q':
+ return std::make_pair(Q_Const, PointerAffinity::Pointer);
+ case 'R':
+ return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
+ case 'S':
+ return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
+ PointerAffinity::Pointer);
default:
- break;
- }
- Ty.outputPre(OS);
-
- if (Ty.Quals & Q_Const) {
- outputSpaceIfNecessary(OS);
- OS << "const";
+ assert(false && "Ty is not a pointer type!");
}
+ return std::make_pair(Q_None, PointerAffinity::Pointer);
+}
- if (Ty.Quals & Q_Volatile) {
- outputSpaceIfNecessary(OS);
- OS << "volatile";
- }
+StringView Demangler::copyString(StringView Borrowed) {
+ char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
+ std::strcpy(Stable, Borrowed.begin());
- if (Ty.Quals & Q_Restrict) {
- outputSpaceIfNecessary(OS);
- OS << "__restrict";
- }
+ return {Stable, Borrowed.size()};
}
-// Write the "second half" of a given type.
-void Type::outputPost(OutputStream &OS, Type &Ty) { Ty.outputPost(OS); }
-
-void Type::outputPre(OutputStream &OS) {
- switch (Prim) {
- case PrimTy::Void:
- OS << "void";
- break;
- case PrimTy::Bool:
- OS << "bool";
- break;
- case PrimTy::Char:
- OS << "char";
- break;
- case PrimTy::Schar:
- OS << "signed char";
+SpecialTableSymbolNode *
+Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
+ SpecialIntrinsicKind K) {
+ NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
+ switch (K) {
+ case SpecialIntrinsicKind::Vftable:
+ NI->Name = "`vftable'";
break;
- case PrimTy::Uchar:
- OS << "unsigned char";
+ case SpecialIntrinsicKind::Vbtable:
+ NI->Name = "`vbtable'";
break;
- case PrimTy::Char16:
- OS << "char16_t";
+ case SpecialIntrinsicKind::LocalVftable:
+ NI->Name = "`local vftable'";
break;
- case PrimTy::Char32:
- OS << "char32_t";
- break;
- case PrimTy::Short:
- OS << "short";
- break;
- case PrimTy::Ushort:
- OS << "unsigned short";
- break;
- case PrimTy::Int:
- OS << "int";
- break;
- case PrimTy::Uint:
- OS << "unsigned int";
- break;
- case PrimTy::Long:
- OS << "long";
- break;
- case PrimTy::Ulong:
- OS << "unsigned long";
- break;
- case PrimTy::Int64:
- OS << "__int64";
- break;
- case PrimTy::Uint64:
- OS << "unsigned __int64";
- break;
- case PrimTy::Wchar:
- OS << "wchar_t";
- break;
- case PrimTy::Float:
- OS << "float";
- break;
- case PrimTy::Double:
- OS << "double";
- break;
- case PrimTy::Ldouble:
- OS << "long double";
- break;
- case PrimTy::Nullptr:
- OS << "std::nullptr_t";
+ case SpecialIntrinsicKind::RttiCompleteObjLocator:
+ NI->Name = "`RTTI Complete Object Locator'";
break;
default:
- assert(false && "Invalid primitive type!");
+ LLVM_BUILTIN_UNREACHABLE;
}
-}
-void Type::outputPost(OutputStream &OS) {}
-
-Type *PointerType::clone(ArenaAllocator &Arena) const {
- return Arena.alloc<PointerType>(*this);
-}
-
-static void outputPointerIndicator(OutputStream &OS, PointerAffinity Affinity,
- const Name *MemberName,
- const Type *Pointee) {
- // "[]" and "()" (for function parameters) take precedence over "*",
- // so "int *x(int)" means "x is a function returning int *". We need
- // parentheses to supercede the default precedence. (e.g. we want to
- // emit something like "int (*x)(int)".)
- if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array) {
- OS << "(";
- if (Pointee->Prim == PrimTy::Function) {
- const FunctionType *FTy = static_cast<const FunctionType *>(Pointee);
- assert(FTy->IsFunctionPointer);
- outputCallingConvention(OS, FTy->CallConvention);
- OS << " ";
- }
+ QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
+ SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
+ STSN->Name = QN;
+ bool IsMember = false;
+ char Front = MangledName.popFront();
+ if (Front != '6' && Front != '7') {
+ Error = true;
+ return nullptr;
}
- if (MemberName) {
- outputName(OS, MemberName);
- OS << "::";
+ std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
+ if (!MangledName.consumeFront('@'))
+ STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
+ return STSN;
+}
+
+LocalStaticGuardVariableNode *
+Demangler::demangleLocalStaticGuard(StringView &MangledName) {
+ LocalStaticGuardIdentifierNode *LSGI =
+ Arena.alloc<LocalStaticGuardIdentifierNode>();
+ QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
+ LocalStaticGuardVariableNode *LSGVN =
+ Arena.alloc<LocalStaticGuardVariableNode>();
+ LSGVN->Name = QN;
+
+ if (MangledName.consumeFront("4IA"))
+ LSGVN->IsVisible = false;
+ else if (MangledName.consumeFront("5"))
+ LSGVN->IsVisible = true;
+ else {
+ Error = true;
+ return nullptr;
}
- if (Affinity == PointerAffinity::Pointer)
- OS << "*";
- else if (Affinity == PointerAffinity::Reference)
- OS << "&";
- else
- OS << "&&";
+ if (!MangledName.empty())
+ LSGI->ScopeIndex = demangleUnsigned(MangledName);
+ return LSGVN;
}
-void PointerType::outputPre(OutputStream &OS) {
- Type::outputPre(OS, *Pointee);
-
- outputSpaceIfNecessary(OS);
+static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena,
+ StringView Name) {
+ NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>();
+ Id->Name = Name;
+ return Id;
+}
- if (Quals & Q_Unaligned)
- OS << "__unaligned ";
+static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
+ IdentifierNode *Identifier) {
+ QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
+ QN->Components = Arena.alloc<NodeArrayNode>();
+ QN->Components->Count = 1;
+ QN->Components->Nodes = Arena.allocArray<Node *>(1);
+ QN->Components->Nodes[0] = Identifier;
+ return QN;
+}
- outputPointerIndicator(OS, Affinity, nullptr, Pointee);
+static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
+ StringView Name) {
+ NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name);
+ return synthesizeQualifiedName(Arena, Id);
+}
- // FIXME: We should output this, but it requires updating lots of tests.
- // if (Ty.Quals & Q_Pointer64)
- // OS << " __ptr64";
+static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena,
+ TypeNode *Type,
+ StringView VariableName) {
+ VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
+ VSN->Type = Type;
+ VSN->Name = synthesizeQualifiedName(Arena, VariableName);
+ return VSN;
}
-void PointerType::outputPost(OutputStream &OS) {
- if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array)
- OS << ")";
+VariableSymbolNode *Demangler::demangleUntypedVariable(
+ ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) {
+ NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
+ QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
+ VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
+ VSN->Name = QN;
+ if (MangledName.consumeFront("8"))
+ return VSN;
- Type::outputPost(OS, *Pointee);
+ Error = true;
+ return nullptr;
}
-Type *MemberPointerType::clone(ArenaAllocator &Arena) const {
- return Arena.alloc<MemberPointerType>(*this);
-}
+VariableSymbolNode *
+Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
+ StringView &MangledName) {
+ RttiBaseClassDescriptorNode *RBCDN =
+ Arena.alloc<RttiBaseClassDescriptorNode>();
+ RBCDN->NVOffset = demangleUnsigned(MangledName);
+ RBCDN->VBPtrOffset = demangleSigned(MangledName);
+ RBCDN->VBTableOffset = demangleUnsigned(MangledName);
+ RBCDN->Flags = demangleUnsigned(MangledName);
+ if (Error)
+ return nullptr;
-void MemberPointerType::outputPre(OutputStream &OS) {
- Type::outputPre(OS, *Pointee);
+ VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
+ VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
+ MangledName.consumeFront('8');
+ return VSN;
+}
- outputSpaceIfNecessary(OS);
+FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
+ bool IsDestructor) {
+ DynamicStructorIdentifierNode *DSIN =
+ Arena.alloc<DynamicStructorIdentifierNode>();
+ DSIN->IsDestructor = IsDestructor;
- outputPointerIndicator(OS, PointerAffinity::Pointer, MemberName, Pointee);
+ bool IsKnownStaticDataMember = false;
+ if (MangledName.consumeFront('?'))
+ IsKnownStaticDataMember = true;
- // FIXME: We should output this, but it requires updating lots of tests.
- // if (Ty.Quals & Q_Pointer64)
- // OS << " __ptr64";
- if (Quals & Q_Restrict)
- OS << " __restrict";
-}
+ QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
-void MemberPointerType::outputPost(OutputStream &OS) {
- if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array)
- OS << ")";
+ SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
+ FunctionSymbolNode *FSN = nullptr;
+ Symbol->Name = QN;
- Type::outputPost(OS, *Pointee);
-}
+ if (Symbol->kind() == NodeKind::VariableSymbol) {
+ DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
-Type *FunctionType::clone(ArenaAllocator &Arena) const {
- return Arena.alloc<FunctionType>(*this);
-}
+ // Older versions of clang mangled this type of symbol incorrectly. They
+ // would omit the leading ? and they would only emit a single @ at the end.
+ // The correct mangling is a leading ? and 2 trailing @ signs. Handle
+ // both cases.
+ int AtCount = IsKnownStaticDataMember ? 2 : 1;
+ for (int I = 0; I < AtCount; ++I) {
+ if (MangledName.consumeFront('@'))
+ continue;
+ Error = true;
+ return nullptr;
+ }
-void FunctionType::outputPre(OutputStream &OS) {
- if (!(FunctionClass & Global)) {
- if (FunctionClass & Static)
- OS << "static ";
- }
+ FSN = demangleFunctionEncoding(MangledName);
+ FSN->Name = synthesizeQualifiedName(Arena, DSIN);
+ } else {
+ if (IsKnownStaticDataMember) {
+ // This was supposed to be a static data member, but we got a function.
+ Error = true;
+ return nullptr;
+ }
- if (ReturnType) {
- Type::outputPre(OS, *ReturnType);
- OS << " ";
+ FSN = static_cast<FunctionSymbolNode *>(Symbol);
+ DSIN->Name = Symbol->Name;
+ FSN->Name = synthesizeQualifiedName(Arena, DSIN);
}
- // Function pointers print the calling convention as void (__cdecl *)(params)
- // rather than void __cdecl (*)(params). So we need to let the PointerType
- // class handle this.
- if (!IsFunctionPointer)
- outputCallingConvention(OS, CallConvention);
+ return FSN;
}
-void FunctionType::outputPost(OutputStream &OS) {
- OS << "(";
- outputParameterList(OS, Params);
- OS << ")";
- if (Quals & Q_Const)
- OS << " const";
- if (Quals & Q_Volatile)
- OS << " volatile";
- if (Quals & Q_Restrict)
- OS << " __restrict";
- if (Quals & Q_Unaligned)
- OS << " __unaligned";
-
- if (RefKind == ReferenceKind::LValueRef)
- OS << " &";
- else if (RefKind == ReferenceKind::RValueRef)
- OS << " &&";
+SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
+ SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
+ if (SIK == SpecialIntrinsicKind::None)
+ return nullptr;
- if (ReturnType)
- Type::outputPost(OS, *ReturnType);
- return;
+ switch (SIK) {
+ case SpecialIntrinsicKind::StringLiteralSymbol:
+ return demangleStringLiteral(MangledName);
+ case SpecialIntrinsicKind::Vftable:
+ case SpecialIntrinsicKind::Vbtable:
+ case SpecialIntrinsicKind::LocalVftable:
+ case SpecialIntrinsicKind::RttiCompleteObjLocator:
+ return demangleSpecialTableSymbolNode(MangledName, SIK);
+ case SpecialIntrinsicKind::VcallThunk:
+ return demangleVcallThunkNode(MangledName);
+ case SpecialIntrinsicKind::LocalStaticGuard:
+ return demangleLocalStaticGuard(MangledName);
+ case SpecialIntrinsicKind::RttiTypeDescriptor: {
+ TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
+ if (Error)
+ break;
+ if (!MangledName.consumeFront("@8"))
+ break;
+ if (!MangledName.empty())
+ break;
+ return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
+ }
+ case SpecialIntrinsicKind::RttiBaseClassArray:
+ return demangleUntypedVariable(Arena, MangledName,
+ "`RTTI Base Class Array'");
+ case SpecialIntrinsicKind::RttiClassHierarchyDescriptor:
+ return demangleUntypedVariable(Arena, MangledName,
+ "`RTTI Class Hierarchy Descriptor'");
+ case SpecialIntrinsicKind::RttiBaseClassDescriptor:
+ return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
+ case SpecialIntrinsicKind::DynamicInitializer:
+ return demangleInitFiniStub(MangledName, false);
+ case SpecialIntrinsicKind::DynamicAtexitDestructor:
+ return demangleInitFiniStub(MangledName, true);
+ default:
+ break;
+ }
+ Error = true;
+ return nullptr;
}
-Type *UdtType::clone(ArenaAllocator &Arena) const {
- return Arena.alloc<UdtType>(*this);
+IdentifierNode *
+Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
+ assert(MangledName.startsWith('?'));
+ MangledName = MangledName.dropFront();
+
+ if (MangledName.consumeFront("__"))
+ return demangleFunctionIdentifierCode(
+ MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
+ else if (MangledName.consumeFront("_"))
+ return demangleFunctionIdentifierCode(MangledName,
+ FunctionIdentifierCodeGroup::Under);
+ return demangleFunctionIdentifierCode(MangledName,
+ FunctionIdentifierCodeGroup::Basic);
+}
+
+StructorIdentifierNode *
+Demangler::demangleStructorIdentifier(StringView &MangledName,
+ bool IsDestructor) {
+ StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
+ N->IsDestructor = IsDestructor;
+ return N;
+}
+
+ConversionOperatorIdentifierNode *
+Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) {
+ ConversionOperatorIdentifierNode *N =
+ Arena.alloc<ConversionOperatorIdentifierNode>();
+ return N;
+}
+
+LiteralOperatorIdentifierNode *
+Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
+ LiteralOperatorIdentifierNode *N =
+ Arena.alloc<LiteralOperatorIdentifierNode>();
+ N->Name = demangleSimpleString(MangledName, false);
+ return N;
+}
+
+static IntrinsicFunctionKind
+translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
+ // Not all ? identifiers are intrinsics *functions*. This function only maps
+ // operator codes for the special functions, all others are handled elsewhere,
+ // hence the IFK::None entries in the table.
+ using IFK = IntrinsicFunctionKind;
+ static IFK Basic[36] = {
+ IFK::None, // ?0 # Foo::Foo()
+ IFK::None, // ?1 # Foo::~Foo()
+ IFK::New, // ?2 # operator new
+ IFK::Delete, // ?3 # operator delete
+ IFK::Assign, // ?4 # operator=
+ IFK::RightShift, // ?5 # operator>>
+ IFK::LeftShift, // ?6 # operator<<
+ IFK::LogicalNot, // ?7 # operator!
+ IFK::Equals, // ?8 # operator==
+ IFK::NotEquals, // ?9 # operator!=
+ IFK::ArraySubscript, // ?A # operator[]
+ IFK::None, // ?B # Foo::operator <type>()
+ IFK::Pointer, // ?C # operator->
+ IFK::Dereference, // ?D # operator*
+ IFK::Increment, // ?E # operator++
+ IFK::Decrement, // ?F # operator--
+ IFK::Minus, // ?G # operator-
+ IFK::Plus, // ?H # operator+
+ IFK::BitwiseAnd, // ?I # operator&
+ IFK::MemberPointer, // ?J # operator->*
+ IFK::Divide, // ?K # operator/
+ IFK::Modulus, // ?L # operator%
+ IFK::LessThan, // ?M operator<
+ IFK::LessThanEqual, // ?N operator<=
+ IFK::GreaterThan, // ?O operator>
+ IFK::GreaterThanEqual, // ?P operator>=
+ IFK::Comma, // ?Q operator,
+ IFK::Parens, // ?R operator()
+ IFK::BitwiseNot, // ?S operator~
+ IFK::BitwiseXor, // ?T operator^
+ IFK::BitwiseOr, // ?U operator|
+ IFK::LogicalAnd, // ?V operator&&
+ IFK::LogicalOr, // ?W operator||
+ IFK::TimesEqual, // ?X operator*=
+ IFK::PlusEqual, // ?Y operator+=
+ IFK::MinusEqual, // ?Z operator-=
+ };
+ static IFK Under[36] = {
+ IFK::DivEqual, // ?_0 operator/=
+ IFK::ModEqual, // ?_1 operator%=
+ IFK::RshEqual, // ?_2 operator>>=
+ IFK::LshEqual, // ?_3 operator<<=
+ IFK::BitwiseAndEqual, // ?_4 operator&=
+ IFK::BitwiseOrEqual, // ?_5 operator|=
+ IFK::BitwiseXorEqual, // ?_6 operator^=
+ IFK::None, // ?_7 # vftable
+ IFK::None, // ?_8 # vbtable
+ IFK::None, // ?_9 # vcall
+ IFK::None, // ?_A # typeof
+ IFK::None, // ?_B # local static guard
+ IFK::None, // ?_C # string literal
+ IFK::VbaseDtor, // ?_D # vbase destructor
+ IFK::VecDelDtor, // ?_E # vector deleting destructor
+ IFK::DefaultCtorClosure, // ?_F # default constructor closure
+ IFK::ScalarDelDtor, // ?_G # scalar deleting destructor
+ IFK::VecCtorIter, // ?_H # vector constructor iterator
+ IFK::VecDtorIter, // ?_I # vector destructor iterator
+ IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator
+ IFK::VdispMap, // ?_K # virtual displacement map
+ IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator
+ IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator
+ IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
+ IFK::CopyCtorClosure, // ?_O # copy constructor closure
+ IFK::None, // ?_P<name> # udt returning <name>
+ IFK::None, // ?_Q # <unknown>
+ IFK::None, // ?_R0 - ?_R4 # RTTI Codes
+ IFK::None, // ?_S # local vftable
+ IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
+ IFK::ArrayNew, // ?_U operator new[]
+ IFK::ArrayDelete, // ?_V operator delete[]
+ IFK::None, // ?_W <unused>
+ IFK::None, // ?_X <unused>
+ IFK::None, // ?_Y <unused>
+ IFK::None, // ?_Z <unused>
+ };
+ static IFK DoubleUnder[36] = {
+ IFK::None, // ?__0 <unused>
+ IFK::None, // ?__1 <unused>
+ IFK::None, // ?__2 <unused>
+ IFK::None, // ?__3 <unused>
+ IFK::None, // ?__4 <unused>
+ IFK::None, // ?__5 <unused>
+ IFK::None, // ?__6 <unused>
+ IFK::None, // ?__7 <unused>
+ IFK::None, // ?__8 <unused>
+ IFK::None, // ?__9 <unused>
+ IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator
+ IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator
+ IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator
+ IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter
+ IFK::None, // ?__E dynamic initializer for `T'
+ IFK::None, // ?__F dynamic atexit destructor for `T'
+ IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter
+ IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter
+ IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
+ // iter
+ IFK::None, // ?__J local static thread guard
+ IFK::None, // ?__K operator ""_name
+ IFK::CoAwait, // ?__L co_await
+ IFK::None, // ?__M <unused>
+ IFK::None, // ?__N <unused>
+ IFK::None, // ?__O <unused>
+ IFK::None, // ?__P <unused>
+ IFK::None, // ?__Q <unused>
+ IFK::None, // ?__R <unused>
+ IFK::None, // ?__S <unused>
+ IFK::None, // ?__T <unused>
+ IFK::None, // ?__U <unused>
+ IFK::None, // ?__V <unused>
+ IFK::None, // ?__W <unused>
+ IFK::None, // ?__X <unused>
+ IFK::None, // ?__Y <unused>
+ IFK::None, // ?__Z <unused>
+ };
+
+ int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
+ switch (Group) {
+ case FunctionIdentifierCodeGroup::Basic:
+ return Basic[Index];
+ case FunctionIdentifierCodeGroup::Under:
+ return Under[Index];
+ case FunctionIdentifierCodeGroup::DoubleUnder:
+ return DoubleUnder[Index];
+ }
+ LLVM_BUILTIN_UNREACHABLE;
}
-void UdtType::outputPre(OutputStream &OS) {
- switch (Prim) {
- case PrimTy::Class:
- OS << "class ";
- break;
- case PrimTy::Struct:
- OS << "struct ";
- break;
- case PrimTy::Union:
- OS << "union ";
- break;
- case PrimTy::Enum:
- OS << "enum ";
+IdentifierNode *
+Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
+ FunctionIdentifierCodeGroup Group) {
+ switch (Group) {
+ case FunctionIdentifierCodeGroup::Basic:
+ switch (char CH = MangledName.popFront()) {
+ case '0':
+ case '1':
+ return demangleStructorIdentifier(MangledName, CH == '1');
+ case 'B':
+ return demangleConversionOperatorIdentifier(MangledName);
+ default:
+ return Arena.alloc<IntrinsicFunctionIdentifierNode>(
+ translateIntrinsicFunctionCode(CH, Group));
+ }
break;
- default:
- assert(false && "Not a udt type!");
+ case FunctionIdentifierCodeGroup::Under:
+ return Arena.alloc<IntrinsicFunctionIdentifierNode>(
+ translateIntrinsicFunctionCode(MangledName.popFront(), Group));
+ case FunctionIdentifierCodeGroup::DoubleUnder:
+ switch (char CH = MangledName.popFront()) {
+ case 'K':
+ return demangleLiteralOperatorIdentifier(MangledName);
+ default:
+ return Arena.alloc<IntrinsicFunctionIdentifierNode>(
+ translateIntrinsicFunctionCode(CH, Group));
+ }
}
+ // No Mangling Yet: Spaceship, // operator<=>
- outputName(OS, UdtName);
+ return nullptr;
}
-Type *ArrayType::clone(ArenaAllocator &Arena) const {
- return Arena.alloc<ArrayType>(*this);
-}
+SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
+ QualifiedNameNode *Name) {
+ // Read a variable.
+ switch (MangledName.front()) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4': {
+ StorageClass SC = demangleVariableStorageClass(MangledName);
+ return demangleVariableEncoding(MangledName, SC);
+ }
+ case '8':
+ return nullptr;
+ }
+ FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
-void ArrayType::outputPre(OutputStream &OS) {
- Type::outputPre(OS, *ElementType);
+ IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
+ if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
+ ConversionOperatorIdentifierNode *COIN =
+ static_cast<ConversionOperatorIdentifierNode *>(UQN);
+ COIN->TargetType = FSN->Signature->ReturnType;
+ }
+ return FSN;
}
-void ArrayType::outputPost(OutputStream &OS) {
- if (ArrayDimension > 0)
- OS << "[" << ArrayDimension << "]";
- if (NextDimension)
- Type::outputPost(OS, *NextDimension);
- else if (ElementType)
- Type::outputPost(OS, *ElementType);
-}
+// Parser entry point.
+SymbolNode *Demangler::parse(StringView &MangledName) {
+ // We can't demangle MD5 names, just output them as-is.
+ // Also, MSVC-style mangled symbols must start with '?'.
+ if (MangledName.startsWith("??@")) {
+ // This is an MD5 mangled name. We can't demangle it, just return the
+ // mangled name.
+ SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
+ S->Name = synthesizeQualifiedName(Arena, MangledName);
+ return S;
+ }
-struct Symbol {
- Name *SymbolName = nullptr;
- Type *SymbolType = nullptr;
-};
+ if (!MangledName.startsWith('?')) {
+ Error = true;
+ return nullptr;
+ }
-} // namespace
-
-namespace {
-
-// Demangler class takes the main role in demangling symbols.
-// It has a set of functions to parse mangled symbols into Type instances.
-// It also has a set of functions to cnovert Type instances to strings.
-class Demangler {
-public:
- Demangler() = default;
-
- // You are supposed to call parse() first and then check if error is true. If
- // it is false, call output() to write the formatted name to the given stream.
- Symbol *parse(StringView &MangledName);
- void output(const Symbol *S, OutputStream &OS);
-
- // True if an error occurred.
- bool Error = false;
-
-private:
- Type *demangleVariableEncoding(StringView &MangledName);
- Type *demangleFunctionEncoding(StringView &MangledName);
-
- Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
-
- // Parser functions. This is a recursive-descent parser.
- Type *demangleType(StringView &MangledName, QualifierMangleMode QMM);
- Type *demangleBasicType(StringView &MangledName);
- UdtType *demangleClassType(StringView &MangledName);
- PointerType *demanglePointerType(StringView &MangledName);
- MemberPointerType *demangleMemberPointerType(StringView &MangledName);
- FunctionType *demangleFunctionType(StringView &MangledName, bool HasThisQuals,
- bool IsFunctionPointer);
-
- ArrayType *demangleArrayType(StringView &MangledName);
-
- TemplateParams *demangleTemplateParameterList(StringView &MangledName);
- FunctionParams demangleFunctionParameterList(StringView &MangledName);
-
- int demangleNumber(StringView &MangledName);
-
- void memorizeString(StringView s);
-
- /// Allocate a copy of \p Borrowed into memory that we own.
- StringView copyString(StringView Borrowed);
-
- Name *demangleFullyQualifiedTypeName(StringView &MangledName);
- Name *demangleFullyQualifiedSymbolName(StringView &MangledName);
-
- Name *demangleUnqualifiedTypeName(StringView &MangledName);
- Name *demangleUnqualifiedSymbolName(StringView &MangledName);
-
- Name *demangleNameScopeChain(StringView &MangledName, Name *UnqualifiedName);
- Name *demangleNameScopePiece(StringView &MangledName);
-
- Name *demangleBackRefName(StringView &MangledName);
- Name *demangleClassTemplateName(StringView &MangledName);
- Name *demangleOperatorName(StringView &MangledName);
- Name *demangleSimpleName(StringView &MangledName, bool Memorize);
- Name *demangleAnonymousNamespaceName(StringView &MangledName);
- Name *demangleLocallyScopedNamePiece(StringView &MangledName);
-
- StringView demangleSimpleString(StringView &MangledName, bool Memorize);
-
- FuncClass demangleFunctionClass(StringView &MangledName);
- CallingConv demangleCallingConvention(StringView &MangledName);
- StorageClass demangleVariableStorageClass(StringView &MangledName);
- ReferenceKind demangleReferenceKind(StringView &MangledName);
- void demangleThrowSpecification(StringView &MangledName);
-
- std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
-
- // Memory allocator.
- ArenaAllocator Arena;
-
- // A single type uses one global back-ref table for all function params.
- // This means back-refs can even go "into" other types. Examples:
- //
- // // Second int* is a back-ref to first.
- // void foo(int *, int*);
- //
- // // Second int* is not a back-ref to first (first is not a function param).
- // int* foo(int*);
- //
- // // Second int* is a back-ref to first (ALL function types share the same
- // // back-ref map.
- // using F = void(*)(int*);
- // F G(int *);
- Type *FunctionParamBackRefs[10];
- size_t FunctionParamBackRefCount = 0;
-
- // The first 10 BackReferences in a mangled name can be back-referenced by
- // special name @[0-9]. This is a storage for the first 10 BackReferences.
- StringView BackReferences[10];
- size_t BackRefCount = 0;
-};
-} // namespace
+ MangledName.consumeFront('?');
-StringView Demangler::copyString(StringView Borrowed) {
- char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
- std::strcpy(Stable, Borrowed.begin());
+ // ?$ is a template instantiation, but all other names that start with ? are
+ // operators / special names.
+ if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
+ return SI;
- return {Stable, Borrowed.size()};
-}
+ // What follows is a main symbol name. This may include namespaces or class
+ // back references.
+ QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
+ if (Error)
+ return nullptr;
-// Parser entry point.
-Symbol *Demangler::parse(StringView &MangledName) {
- Symbol *S = Arena.alloc<Symbol>();
-
- // MSVC-style mangled symbols must start with '?'.
- if (!MangledName.consumeFront("?")) {
- S->SymbolName = Arena.alloc<Name>();
- S->SymbolName->Str = MangledName;
- S->SymbolType = Arena.alloc<Type>();
- S->SymbolType->Prim = PrimTy::Unknown;
- return S;
+ SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
+ if (Symbol) {
+ Symbol->Name = QN;
}
- // What follows is a main symbol name. This may include
- // namespaces or class BackReferences.
- S->SymbolName = demangleFullyQualifiedSymbolName(MangledName);
+ if (Error)
+ return nullptr;
- // Read a variable.
- S->SymbolType = startsWithDigit(MangledName)
- ? demangleVariableEncoding(MangledName)
- : demangleFunctionEncoding(MangledName);
+ return Symbol;
+}
- return S;
+TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
+ if (!MangledName.consumeFront(".?A"))
+ return nullptr;
+ MangledName.consumeFront(".?A");
+ if (MangledName.empty())
+ return nullptr;
+
+ return demangleClassType(MangledName);
}
// <type-encoding> ::= <storage-class> <variable-type>
@@ -994,44 +752,41 @@ Symbol *Demangler::parse(StringView &MangledName) {
// ::= 3 # global
// ::= 4 # static local
-Type *Demangler::demangleVariableEncoding(StringView &MangledName) {
- StorageClass SC = demangleVariableStorageClass(MangledName);
+VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
+ StorageClass SC) {
+ VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
- Type *Ty = demangleType(MangledName, QualifierMangleMode::Drop);
-
- Ty->Storage = SC;
+ VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
+ VSN->SC = SC;
// <variable-type> ::= <type> <cvr-qualifiers>
// ::= <type> <pointee-cvr-qualifiers> # pointers, references
- switch (Ty->Prim) {
- case PrimTy::Ptr:
- case PrimTy::MemberPtr: {
+ switch (VSN->Type->kind()) {
+ case NodeKind::PointerType: {
+ PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
+
Qualifiers ExtraChildQuals = Q_None;
- Ty->Quals =
- Qualifiers(Ty->Quals | demanglePointerExtQualifiers(MangledName));
+ PTN->Quals = Qualifiers(VSN->Type->Quals |
+ demanglePointerExtQualifiers(MangledName));
bool IsMember = false;
std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
- if (Ty->Prim == PrimTy::MemberPtr) {
- assert(IsMember);
- Name *BackRefName = demangleFullyQualifiedTypeName(MangledName);
+ if (PTN->ClassParent) {
+ QualifiedNameNode *BackRefName =
+ demangleFullyQualifiedTypeName(MangledName);
(void)BackRefName;
- MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty);
- MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals);
- } else {
- PointerType *PTy = static_cast<PointerType *>(Ty);
- PTy->Pointee->Quals = Qualifiers(PTy->Pointee->Quals | ExtraChildQuals);
}
+ PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
break;
}
default:
- Ty->Quals = demangleQualifiers(MangledName).first;
+ VSN->Type->Quals = demangleQualifiers(MangledName).first;
break;
}
- return Ty;
+ return VSN;
}
// Sometimes numbers are encoded in mangled symbols. For example,
@@ -1045,21 +800,21 @@ Type *Demangler::demangleVariableEncoding(StringView &MangledName) {
// ::= <hex digit>+ @ # when Numbrer == 0 or >= 10
//
// <hex-digit> ::= [A-P] # A = 0, B = 1, ...
-int Demangler::demangleNumber(StringView &MangledName) {
- bool neg = MangledName.consumeFront("?");
+std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
+ bool IsNegative = MangledName.consumeFront('?');
if (startsWithDigit(MangledName)) {
- int32_t Ret = MangledName[0] - '0' + 1;
+ uint64_t Ret = MangledName[0] - '0' + 1;
MangledName = MangledName.dropFront(1);
- return neg ? -Ret : Ret;
+ return {Ret, IsNegative};
}
- int Ret = 0;
+ uint64_t Ret = 0;
for (size_t i = 0; i < MangledName.size(); ++i) {
char C = MangledName[i];
if (C == '@') {
MangledName = MangledName.dropFront(i + 1);
- return neg ? -Ret : Ret;
+ return {Ret, IsNegative};
}
if ('A' <= C && C <= 'P') {
Ret = (Ret << 4) + (C - 'A');
@@ -1069,191 +824,457 @@ int Demangler::demangleNumber(StringView &MangledName) {
}
Error = true;
- return 0;
+ return {0ULL, false};
+}
+
+uint64_t Demangler::demangleUnsigned(StringView &MangledName) {
+ bool IsNegative = false;
+ uint64_t Number = 0;
+ std::tie(Number, IsNegative) = demangleNumber(MangledName);
+ if (IsNegative)
+ Error = true;
+ return Number;
+}
+
+int64_t Demangler::demangleSigned(StringView &MangledName) {
+ bool IsNegative = false;
+ uint64_t Number = 0;
+ std::tie(Number, IsNegative) = demangleNumber(MangledName);
+ if (Number > INT64_MAX)
+ Error = true;
+ int64_t I = static_cast<int64_t>(Number);
+ return IsNegative ? -I : I;
}
// First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
// Memorize it.
void Demangler::memorizeString(StringView S) {
- if (BackRefCount >= sizeof(BackReferences) / sizeof(*BackReferences))
+ if (Backrefs.NamesCount >= BackrefContext::Max)
return;
- for (size_t i = 0; i < BackRefCount; ++i)
- if (S == BackReferences[i])
+ for (size_t i = 0; i < Backrefs.NamesCount; ++i)
+ if (S == Backrefs.Names[i]->Name)
return;
- BackReferences[BackRefCount++] = S;
+ NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
+ N->Name = S;
+ Backrefs.Names[Backrefs.NamesCount++] = N;
}
-Name *Demangler::demangleBackRefName(StringView &MangledName) {
+NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
assert(startsWithDigit(MangledName));
size_t I = MangledName[0] - '0';
- if (I >= BackRefCount) {
+ if (I >= Backrefs.NamesCount) {
Error = true;
return nullptr;
}
MangledName = MangledName.dropFront();
- Name *Node = Arena.alloc<Name>();
- Node->Str = BackReferences[I];
- return Node;
+ return Backrefs.Names[I];
}
-Name *Demangler::demangleClassTemplateName(StringView &MangledName) {
- assert(MangledName.startsWith("?$"));
- MangledName.consumeFront("?$");
-
- Name *Node = demangleSimpleName(MangledName, false);
- Node->TParams = demangleTemplateParameterList(MangledName);
-
+void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
// Render this class template name into a string buffer so that we can
// memorize it for the purpose of back-referencing.
- OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
- outputName(OS, Node);
+ OutputStream OS;
+ if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ // FIXME: Propagate out-of-memory as an error?
+ std::terminate();
+ Identifier->output(OS, OF_Default);
OS << '\0';
char *Name = OS.getBuffer();
StringView Owned = copyString(Name);
memorizeString(Owned);
std::free(Name);
-
- return Node;
}
-Name *Demangler::demangleOperatorName(StringView &MangledName) {
- assert(MangledName.startsWith('?'));
- MangledName.consumeFront('?');
+IdentifierNode *
+Demangler::demangleTemplateInstantiationName(StringView &MangledName,
+ NameBackrefBehavior NBB) {
+ assert(MangledName.startsWith("?$"));
+ MangledName.consumeFront("?$");
- auto NameString = [this, &MangledName]() -> StringView {
- switch (MangledName.popFront()) {
- case '0':
- return "ctor";
- case '1':
- return "dtor";
- case '2':
- return " new";
- case '3':
- return " delete";
- case '4':
- return "=";
- case '5':
- return ">>";
- case '6':
- return "<<";
- case '7':
- return "!";
- case '8':
- return "==";
- case '9':
- return "!=";
- case 'A':
- return "[]";
- case 'C':
- return "->";
- case 'D':
- return "*";
- case 'E':
- return "++";
- case 'F':
- return "--";
- case 'G':
- return "-";
- case 'H':
- return "+";
- case 'I':
- return "&";
- case 'J':
- return "->*";
- case 'K':
- return "/";
- case 'L':
- return "%";
- case 'M':
- return "<";
- case 'N':
- return "<=";
- case 'O':
- return ">";
- case 'P':
- return ">=";
- case 'Q':
- return ",";
- case 'R':
- return "()";
- case 'S':
- return "~";
- case 'T':
- return "^";
- case 'U':
- return "|";
- case 'V':
- return "&&";
- case 'W':
- return "||";
- case 'X':
- return "*=";
- case 'Y':
- return "+=";
- case 'Z':
- return "-=";
- case '_': {
- if (MangledName.empty())
- break;
+ BackrefContext OuterContext;
+ std::swap(OuterContext, Backrefs);
- switch (MangledName.popFront()) {
- case '0':
- return "/=";
- case '1':
- return "%=";
- case '2':
- return ">>=";
- case '3':
- return "<<=";
- case '4':
- return "&=";
- case '5':
- return "|=";
- case '6':
- return "^=";
- case 'U':
- return " new[]";
- case 'V':
- return " delete[]";
- case '_':
- if (MangledName.consumeFront("L"))
- return " co_await";
- if (MangledName.consumeFront("K")) {
- size_t EndPos = MangledName.find('@');
- if (EndPos == StringView::npos)
- break;
- StringView OpName = demangleSimpleString(MangledName, false);
- size_t FullSize = OpName.size() + 3; // <space>""OpName
- char *Buffer = Arena.allocUnalignedBuffer(FullSize);
- Buffer[0] = ' ';
- Buffer[1] = '"';
- Buffer[2] = '"';
- std::memcpy(Buffer + 3, OpName.begin(), OpName.size());
- return {Buffer, FullSize};
- }
- }
- }
- }
- Error = true;
- return "";
- };
+ IdentifierNode *Identifier =
+ demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
+ if (!Error)
+ Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
- Name *Node = Arena.alloc<Name>();
- Node->Operator = NameString();
- return Node;
+ std::swap(OuterContext, Backrefs);
+ if (Error)
+ return nullptr;
+
+ if (NBB & NBB_Template)
+ memorizeIdentifier(Identifier);
+
+ return Identifier;
}
-Name *Demangler::demangleSimpleName(StringView &MangledName, bool Memorize) {
+NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName,
+ bool Memorize) {
StringView S = demangleSimpleString(MangledName, Memorize);
if (Error)
return nullptr;
- Name *Node = Arena.alloc<Name>();
- Node->Str = S;
- return Node;
+ NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
+ Name->Name = S;
+ return Name;
+}
+
+static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
+
+static uint8_t rebasedHexDigitToNumber(char C) {
+ assert(isRebasedHexDigit(C));
+ return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
+}
+
+uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
+ if (!MangledName.startsWith('?'))
+ return MangledName.popFront();
+
+ MangledName = MangledName.dropFront();
+ if (MangledName.empty())
+ goto CharLiteralError;
+
+ if (MangledName.consumeFront('$')) {
+ // Two hex digits
+ if (MangledName.size() < 2)
+ goto CharLiteralError;
+ StringView Nibbles = MangledName.substr(0, 2);
+ if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
+ goto CharLiteralError;
+ // Don't append the null terminator.
+ uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
+ uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
+ MangledName = MangledName.dropFront(2);
+ return (C1 << 4) | C2;
+ }
+
+ if (startsWithDigit(MangledName)) {
+ const char *Lookup = ",/\\:. \n\t'-";
+ char C = Lookup[MangledName[0] - '0'];
+ MangledName = MangledName.dropFront();
+ return C;
+ }
+
+ if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
+ char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
+ '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
+ '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
+ '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
+ char C = Lookup[MangledName[0] - 'a'];
+ MangledName = MangledName.dropFront();
+ return C;
+ }
+
+ if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
+ char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
+ '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
+ '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
+ '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
+ char C = Lookup[MangledName[0] - 'A'];
+ MangledName = MangledName.dropFront();
+ return C;
+ }
+
+CharLiteralError:
+ Error = true;
+ return '\0';
+}
+
+wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
+ uint8_t C1, C2;
+
+ C1 = demangleCharLiteral(MangledName);
+ if (Error)
+ goto WCharLiteralError;
+ C2 = demangleCharLiteral(MangledName);
+ if (Error)
+ goto WCharLiteralError;
+
+ return ((wchar_t)C1 << 8) | (wchar_t)C2;
+
+WCharLiteralError:
+ Error = true;
+ return L'\0';
+}
+
+static void writeHexDigit(char *Buffer, uint8_t Digit) {
+ assert(Digit <= 15);
+ *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
+}
+
+static void outputHex(OutputStream &OS, unsigned C) {
+ if (C == 0) {
+ OS << "\\x00";
+ return;
+ }
+ // It's easier to do the math if we can work from right to left, but we need
+ // to print the numbers from left to right. So render this into a temporary
+ // buffer first, then output the temporary buffer. Each byte is of the form
+ // \xAB, which means that each byte needs 4 characters. Since there are at
+ // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
+ char TempBuffer[17];
+
+ ::memset(TempBuffer, 0, sizeof(TempBuffer));
+ constexpr int MaxPos = 15;
+
+ int Pos = MaxPos - 1;
+ while (C != 0) {
+ for (int I = 0; I < 2; ++I) {
+ writeHexDigit(&TempBuffer[Pos--], C % 16);
+ C /= 16;
+ }
+ TempBuffer[Pos--] = 'x';
+ TempBuffer[Pos--] = '\\';
+ assert(Pos >= 0);
+ }
+ OS << StringView(&TempBuffer[Pos + 1]);
+}
+
+static void outputEscapedChar(OutputStream &OS, unsigned C) {
+ switch (C) {
+ case '\'': // single quote
+ OS << "\\\'";
+ return;
+ case '\"': // double quote
+ OS << "\\\"";
+ return;
+ case '\\': // backslash
+ OS << "\\\\";
+ return;
+ case '\a': // bell
+ OS << "\\a";
+ return;
+ case '\b': // backspace
+ OS << "\\b";
+ return;
+ case '\f': // form feed
+ OS << "\\f";
+ return;
+ case '\n': // new line
+ OS << "\\n";
+ return;
+ case '\r': // carriage return
+ OS << "\\r";
+ return;
+ case '\t': // tab
+ OS << "\\t";
+ return;
+ case '\v': // vertical tab
+ OS << "\\v";
+ return;
+ default:
+ break;
+ }
+
+ if (C > 0x1F && C < 0x7F) {
+ // Standard ascii char.
+ OS << (char)C;
+ return;
+ }
+
+ outputHex(OS, C);
+}
+
+static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
+ const uint8_t *End = StringBytes + Length - 1;
+ unsigned Count = 0;
+ while (Length > 0 && *End == 0) {
+ --Length;
+ --End;
+ ++Count;
+ }
+ return Count;
+}
+
+static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
+ unsigned Length) {
+ unsigned Result = 0;
+ for (unsigned I = 0; I < Length; ++I) {
+ if (*StringBytes++ == 0)
+ ++Result;
+ }
+ return Result;
+}
+
+static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
+ unsigned NumBytes) {
+ assert(NumBytes > 0);
+
+ // If the number of bytes is odd, this is guaranteed to be a char string.
+ if (NumBytes % 2 == 1)
+ return 1;
+
+ // All strings can encode at most 32 bytes of data. If it's less than that,
+ // then we encoded the entire string. In this case we check for a 1-byte,
+ // 2-byte, or 4-byte null terminator.
+ if (NumBytes < 32) {
+ unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
+ if (TrailingNulls >= 4)
+ return 4;
+ if (TrailingNulls >= 2)
+ return 2;
+ return 1;
+ }
+
+ // The whole string was not able to be encoded. Try to look at embedded null
+ // terminators to guess. The heuristic is that we count all embedded null
+ // terminators. If more than 2/3 are null, it's a char32. If more than 1/3
+ // are null, it's a char16. Otherwise it's a char8. This obviously isn't
+ // perfect and is biased towards languages that have ascii alphabets, but this
+ // was always going to be best effort since the encoding is lossy.
+ unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
+ if (Nulls >= 2 * NumChars / 3)
+ return 4;
+ if (Nulls >= NumChars / 3)
+ return 2;
+ return 1;
+}
+
+static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
+ unsigned CharIndex, unsigned CharBytes) {
+ assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
+ unsigned Offset = CharIndex * CharBytes;
+ unsigned Result = 0;
+ StringBytes = StringBytes + Offset;
+ for (unsigned I = 0; I < CharBytes; ++I) {
+ unsigned C = static_cast<unsigned>(StringBytes[I]);
+ Result |= C << (8 * I);
+ }
+ return Result;
+}
+
+FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
+ FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
+ VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
+ FSN->Signature = Arena.alloc<ThunkSignatureNode>();
+ FSN->Signature->FunctionClass = FC_NoParameterList;
+
+ FSN->Name = demangleNameScopeChain(MangledName, VTIN);
+ if (!Error)
+ Error = !MangledName.consumeFront("$B");
+ if (!Error)
+ VTIN->OffsetInVTable = demangleUnsigned(MangledName);
+ if (!Error)
+ Error = !MangledName.consumeFront('A');
+ if (!Error)
+ FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
+ return (Error) ? nullptr : FSN;
+}
+
+EncodedStringLiteralNode *
+Demangler::demangleStringLiteral(StringView &MangledName) {
+ // This function uses goto, so declare all variables up front.
+ OutputStream OS;
+ StringView CRC;
+ uint64_t StringByteSize;
+ bool IsWcharT = false;
+ bool IsNegative = false;
+ size_t CrcEndPos = 0;
+ char *ResultBuffer = nullptr;
+
+ EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
+
+ // Prefix indicating the beginning of a string literal
+ if (!MangledName.consumeFront("@_"))
+ goto StringLiteralError;
+ if (MangledName.empty())
+ goto StringLiteralError;
+
+ // Char Type (regular or wchar_t)
+ switch (MangledName.popFront()) {
+ case '1':
+ IsWcharT = true;
+ LLVM_FALLTHROUGH;
+ case '0':
+ break;
+ default:
+ goto StringLiteralError;
+ }
+
+ // Encoded Length
+ std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
+ if (Error || IsNegative)
+ goto StringLiteralError;
+
+ // CRC 32 (always 8 characters plus a terminator)
+ CrcEndPos = MangledName.find('@');
+ if (CrcEndPos == StringView::npos)
+ goto StringLiteralError;
+ CRC = MangledName.substr(0, CrcEndPos);
+ MangledName = MangledName.dropFront(CrcEndPos + 1);
+ if (MangledName.empty())
+ goto StringLiteralError;
+
+ if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ // FIXME: Propagate out-of-memory as an error?
+ std::terminate();
+ if (IsWcharT) {
+ Result->Char = CharKind::Wchar;
+ if (StringByteSize > 64)
+ Result->IsTruncated = true;
+
+ while (!MangledName.consumeFront('@')) {
+ assert(StringByteSize >= 2);
+ wchar_t W = demangleWcharLiteral(MangledName);
+ if (StringByteSize != 2 || Result->IsTruncated)
+ outputEscapedChar(OS, W);
+ StringByteSize -= 2;
+ if (Error)
+ goto StringLiteralError;
+ }
+ } else {
+ // The max byte length is actually 32, but some compilers mangled strings
+ // incorrectly, so we have to assume it can go higher.
+ constexpr unsigned MaxStringByteLength = 32 * 4;
+ uint8_t StringBytes[MaxStringByteLength];
+
+ unsigned BytesDecoded = 0;
+ while (!MangledName.consumeFront('@')) {
+ assert(StringByteSize >= 1);
+ StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
+ }
+
+ if (StringByteSize > BytesDecoded)
+ Result->IsTruncated = true;
+
+ unsigned CharBytes =
+ guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
+ assert(StringByteSize % CharBytes == 0);
+ switch (CharBytes) {
+ case 1:
+ Result->Char = CharKind::Char;
+ break;
+ case 2:
+ Result->Char = CharKind::Char16;
+ break;
+ case 4:
+ Result->Char = CharKind::Char32;
+ break;
+ default:
+ LLVM_BUILTIN_UNREACHABLE;
+ }
+ const unsigned NumChars = BytesDecoded / CharBytes;
+ for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
+ unsigned NextChar =
+ decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
+ if (CharIndex + 1 < NumChars || Result->IsTruncated)
+ outputEscapedChar(OS, NextChar);
+ }
+ }
+
+ OS << '\0';
+ ResultBuffer = OS.getBuffer();
+ Result->DecodedString = copyString(ResultBuffer);
+ std::free(ResultBuffer);
+ return Result;
+
+StringLiteralError:
+ Error = true;
+ return nullptr;
}
StringView Demangler::demangleSimpleString(StringView &MangledName,
@@ -1274,70 +1295,104 @@ StringView Demangler::demangleSimpleString(StringView &MangledName,
return {};
}
-Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
+NamedIdentifierNode *
+Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
assert(MangledName.startsWith("?A"));
MangledName.consumeFront("?A");
- Name *Node = Arena.alloc<Name>();
- Node->Str = "`anonymous namespace'";
- if (MangledName.consumeFront('@'))
- return Node;
-
- Error = true;
- return nullptr;
+ NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
+ Node->Name = "`anonymous namespace'";
+ size_t EndPos = MangledName.find('@');
+ if (EndPos == StringView::npos) {
+ Error = true;
+ return nullptr;
+ }
+ StringView NamespaceKey = MangledName.substr(0, EndPos);
+ memorizeString(NamespaceKey);
+ MangledName = MangledName.substr(EndPos + 1);
+ return Node;
}
-Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
+NamedIdentifierNode *
+Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
assert(startsWithLocalScopePattern(MangledName));
- Name *Node = Arena.alloc<Name>();
+ NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
MangledName.consumeFront('?');
- int ScopeIdentifier = demangleNumber(MangledName);
+ auto Number = demangleNumber(MangledName);
+ assert(!Number.second);
// One ? to terminate the number
MangledName.consumeFront('?');
assert(!Error);
- Symbol *Scope = parse(MangledName);
+ Node *Scope = parse(MangledName);
if (Error)
return nullptr;
// Render the parent symbol's name into a buffer.
- OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
+ OutputStream OS;
+ if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ // FIXME: Propagate out-of-memory as an error?
+ std::terminate();
OS << '`';
- output(Scope, OS);
+ Scope->output(OS, OF_Default);
OS << '\'';
- OS << "::`" << ScopeIdentifier << "'";
+ OS << "::`" << Number.first << "'";
OS << '\0';
char *Result = OS.getBuffer();
- Node->Str = copyString(Result);
+ Identifier->Name = copyString(Result);
std::free(Result);
- return Node;
+ return Identifier;
}
// Parses a type name in the form of A@B@C@@ which represents C::B::A.
-Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
- Name *TypeName = demangleUnqualifiedTypeName(MangledName);
- assert(TypeName);
+QualifiedNameNode *
+Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
+ IdentifierNode *Identifier = demangleUnqualifiedTypeName(MangledName, true);
+ if (Error)
+ return nullptr;
+ assert(Identifier);
- Name *QualName = demangleNameScopeChain(MangledName, TypeName);
- assert(QualName);
- return QualName;
+ QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
+ if (Error)
+ return nullptr;
+ assert(QN);
+ return QN;
}
// Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
// Symbol names have slightly different rules regarding what can appear
// so we separate out the implementations for flexibility.
-Name *Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
- Name *SymbolName = demangleUnqualifiedSymbolName(MangledName);
- assert(SymbolName);
+QualifiedNameNode *
+Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
+ // This is the final component of a symbol name (i.e. the leftmost component
+ // of a mangled name. Since the only possible template instantiation that
+ // can appear in this context is a function template, and since those are
+ // not saved for the purposes of name backreferences, only backref simple
+ // names.
+ IdentifierNode *Identifier =
+ demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
+ if (Error)
+ return nullptr;
- Name *QualName = demangleNameScopeChain(MangledName, SymbolName);
- assert(QualName);
- return QualName;
+ QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
+ if (Error)
+ return nullptr;
+
+ if (Identifier->kind() == NodeKind::StructorIdentifier) {
+ StructorIdentifierNode *SIN =
+ static_cast<StructorIdentifierNode *>(Identifier);
+ assert(QN->Components->Count >= 2);
+ Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
+ SIN->Class = static_cast<IdentifierNode *>(ClassNode);
+ }
+ assert(QN);
+ return QN;
}
-Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) {
+IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName,
+ bool Memorize) {
// An inner-most name can be a back-reference, because a fully-qualified name
// (e.g. Scope + Inner) can contain other fully qualified names inside of
// them (for example template parameters), and these nested parameters can
@@ -1346,27 +1401,29 @@ Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) {
return demangleBackRefName(MangledName);
if (MangledName.startsWith("?$"))
- return demangleClassTemplateName(MangledName);
+ return demangleTemplateInstantiationName(MangledName, NBB_Template);
- return demangleSimpleName(MangledName, true);
+ return demangleSimpleName(MangledName, Memorize);
}
-Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName) {
+IdentifierNode *
+Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
+ NameBackrefBehavior NBB) {
if (startsWithDigit(MangledName))
return demangleBackRefName(MangledName);
if (MangledName.startsWith("?$"))
- return demangleClassTemplateName(MangledName);
+ return demangleTemplateInstantiationName(MangledName, NBB);
if (MangledName.startsWith('?'))
- return demangleOperatorName(MangledName);
- return demangleSimpleName(MangledName, true);
+ return demangleFunctionIdentifierCode(MangledName);
+ return demangleSimpleName(MangledName, (NBB & NBB_Simple) != 0);
}
-Name *Demangler::demangleNameScopePiece(StringView &MangledName) {
+IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
if (startsWithDigit(MangledName))
return demangleBackRefName(MangledName);
if (MangledName.startsWith("?$"))
- return demangleClassTemplateName(MangledName);
+ return demangleTemplateInstantiationName(MangledName, NBB_Template);
if (MangledName.startsWith("?A"))
return demangleAnonymousNamespaceName(MangledName);
@@ -1377,77 +1434,130 @@ Name *Demangler::demangleNameScopePiece(StringView &MangledName) {
return demangleSimpleName(MangledName, true);
}
-Name *Demangler::demangleNameScopeChain(StringView &MangledName,
- Name *UnqualifiedName) {
- Name *Head = UnqualifiedName;
+static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
+ size_t Count) {
+ NodeArrayNode *N = Arena.alloc<NodeArrayNode>();
+ N->Count = Count;
+ N->Nodes = Arena.allocArray<Node *>(Count);
+ for (size_t I = 0; I < Count; ++I) {
+ N->Nodes[I] = Head->N;
+ Head = Head->Next;
+ }
+ return N;
+}
+
+QualifiedNameNode *
+Demangler::demangleNameScopeChain(StringView &MangledName,
+ IdentifierNode *UnqualifiedName) {
+ NodeList *Head = Arena.alloc<NodeList>();
+ Head->N = UnqualifiedName;
+
+ size_t Count = 1;
while (!MangledName.consumeFront("@")) {
+ ++Count;
+ NodeList *NewHead = Arena.alloc<NodeList>();
+ NewHead->Next = Head;
+ Head = NewHead;
+
if (MangledName.empty()) {
Error = true;
return nullptr;
}
assert(!Error);
- Name *Elem = demangleNameScopePiece(MangledName);
+ IdentifierNode *Elem = demangleNameScopePiece(MangledName);
if (Error)
return nullptr;
- Elem->Next = Head;
- Head = Elem;
+ Head->N = Elem;
}
- return Head;
+
+ QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
+ QN->Components = nodeListToNodeArray(Arena, Head, Count);
+ return QN;
}
FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
- SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
- RestoreOnError.shouldRestore(false);
-
switch (MangledName.popFront()) {
+ case '9':
+ return FuncClass(FC_ExternC | FC_NoParameterList);
case 'A':
- return Private;
+ return FC_Private;
case 'B':
- return FuncClass(Private | Far);
+ return FuncClass(FC_Private | FC_Far);
case 'C':
- return FuncClass(Private | Static);
+ return FuncClass(FC_Private | FC_Static);
case 'D':
- return FuncClass(Private | Static);
+ return FuncClass(FC_Private | FC_Static);
case 'E':
- return FuncClass(Private | Virtual);
+ return FuncClass(FC_Private | FC_Virtual);
case 'F':
- return FuncClass(Private | Virtual);
+ return FuncClass(FC_Private | FC_Virtual);
+ case 'G':
+ return FuncClass(FC_Private | FC_StaticThisAdjust);
+ case 'H':
+ return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
case 'I':
- return Protected;
+ return FuncClass(FC_Protected);
case 'J':
- return FuncClass(Protected | Far);
+ return FuncClass(FC_Protected | FC_Far);
case 'K':
- return FuncClass(Protected | Static);
+ return FuncClass(FC_Protected | FC_Static);
case 'L':
- return FuncClass(Protected | Static | Far);
+ return FuncClass(FC_Protected | FC_Static | FC_Far);
case 'M':
- return FuncClass(Protected | Virtual);
+ return FuncClass(FC_Protected | FC_Virtual);
case 'N':
- return FuncClass(Protected | Virtual | Far);
+ return FuncClass(FC_Protected | FC_Virtual | FC_Far);
+ case 'O':
+ return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust);
+ case 'P':
+ return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far);
case 'Q':
- return Public;
+ return FuncClass(FC_Public);
case 'R':
- return FuncClass(Public | Far);
+ return FuncClass(FC_Public | FC_Far);
case 'S':
- return FuncClass(Public | Static);
+ return FuncClass(FC_Public | FC_Static);
case 'T':
- return FuncClass(Public | Static | Far);
+ return FuncClass(FC_Public | FC_Static | FC_Far);
case 'U':
- return FuncClass(Public | Virtual);
+ return FuncClass(FC_Public | FC_Virtual);
case 'V':
- return FuncClass(Public | Virtual | Far);
+ return FuncClass(FC_Public | FC_Virtual | FC_Far);
+ case 'W':
+ return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust);
+ case 'X':
+ return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far);
case 'Y':
- return Global;
+ return FuncClass(FC_Global);
case 'Z':
- return FuncClass(Global | Far);
+ return FuncClass(FC_Global | FC_Far);
+ case '$': {
+ FuncClass VFlag = FC_VirtualThisAdjust;
+ if (MangledName.consumeFront('R'))
+ VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
+
+ switch (MangledName.popFront()) {
+ case '0':
+ return FuncClass(FC_Private | FC_Virtual | VFlag);
+ case '1':
+ return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
+ case '2':
+ return FuncClass(FC_Protected | FC_Virtual | VFlag);
+ case '3':
+ return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
+ case '4':
+ return FuncClass(FC_Public | FC_Virtual | VFlag);
+ case '5':
+ return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
+ }
+ }
}
Error = true;
- RestoreOnError.shouldRestore(true);
- return Public;
+ return FC_Public;
}
CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
@@ -1526,112 +1636,68 @@ Demangler::demangleQualifiers(StringView &MangledName) {
return std::make_pair(Q_None, false);
}
-static bool isTagType(StringView S) {
- switch (S.front()) {
- case 'T': // union
- case 'U': // struct
- case 'V': // class
- case 'W': // enum
- return true;
- }
- return false;
-}
-
-static bool isPointerType(StringView S) {
- if (S.startsWith("$$Q")) // foo &&
- return true;
-
- switch (S.front()) {
- case 'A': // foo &
- case 'P': // foo *
- case 'Q': // foo *const
- case 'R': // foo *volatile
- case 'S': // foo *const volatile
- return true;
- }
- return false;
-}
-
-static bool isArrayType(StringView S) { return S[0] == 'Y'; }
-
-static bool isFunctionType(StringView S) {
- return S.startsWith("$$A8@@") || S.startsWith("$$A6");
-}
-
// <variable-type> ::= <type> <cvr-qualifiers>
// ::= <type> <pointee-cvr-qualifiers> # pointers, references
-Type *Demangler::demangleType(StringView &MangledName,
- QualifierMangleMode QMM) {
+TypeNode *Demangler::demangleType(StringView &MangledName,
+ QualifierMangleMode QMM) {
Qualifiers Quals = Q_None;
bool IsMember = false;
- bool IsMemberKnown = false;
if (QMM == QualifierMangleMode::Mangle) {
std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
- IsMemberKnown = true;
} else if (QMM == QualifierMangleMode::Result) {
- if (MangledName.consumeFront('?')) {
+ if (MangledName.consumeFront('?'))
std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
- IsMemberKnown = true;
- }
}
- Type *Ty = nullptr;
+ TypeNode *Ty = nullptr;
if (isTagType(MangledName))
Ty = demangleClassType(MangledName);
else if (isPointerType(MangledName)) {
- if (!IsMemberKnown)
- IsMember = isMemberPointer(MangledName);
-
- if (IsMember)
+ if (isMemberPointer(MangledName, Error))
Ty = demangleMemberPointerType(MangledName);
- else
+ else if (!Error)
Ty = demanglePointerType(MangledName);
+ else
+ return nullptr;
} else if (isArrayType(MangledName))
Ty = demangleArrayType(MangledName);
else if (isFunctionType(MangledName)) {
if (MangledName.consumeFront("$$A8@@"))
- Ty = demangleFunctionType(MangledName, true, false);
+ Ty = demangleFunctionType(MangledName, true);
else {
assert(MangledName.startsWith("$$A6"));
MangledName.consumeFront("$$A6");
- Ty = demangleFunctionType(MangledName, false, false);
+ Ty = demangleFunctionType(MangledName, false);
}
+ } else if (isCustomType(MangledName)) {
+ Ty = demangleCustomType(MangledName);
} else {
- Ty = demangleBasicType(MangledName);
- assert(Ty && !Error);
- if (!Ty || Error)
- return Ty;
+ Ty = demanglePrimitiveType(MangledName);
}
+ if (!Ty || Error)
+ return Ty;
Ty->Quals = Qualifiers(Ty->Quals | Quals);
return Ty;
}
-ReferenceKind Demangler::demangleReferenceKind(StringView &MangledName) {
- if (MangledName.consumeFront('G'))
- return ReferenceKind::LValueRef;
- else if (MangledName.consumeFront('H'))
- return ReferenceKind::RValueRef;
- return ReferenceKind::None;
-}
-
-void Demangler::demangleThrowSpecification(StringView &MangledName) {
+bool Demangler::demangleThrowSpecification(StringView &MangledName) {
+ if (MangledName.consumeFront("_E"))
+ return true;
if (MangledName.consumeFront('Z'))
- return;
+ return false;
Error = true;
+ return false;
}
-FunctionType *Demangler::demangleFunctionType(StringView &MangledName,
- bool HasThisQuals,
- bool IsFunctionPointer) {
- FunctionType *FTy = Arena.alloc<FunctionType>();
- FTy->Prim = PrimTy::Function;
- FTy->IsFunctionPointer = IsFunctionPointer;
+FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
+ bool HasThisQuals) {
+ FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
if (HasThisQuals) {
FTy->Quals = demanglePointerExtQualifiers(MangledName);
- FTy->RefKind = demangleReferenceKind(MangledName);
+ FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
}
@@ -1646,70 +1712,100 @@ FunctionType *Demangler::demangleFunctionType(StringView &MangledName,
FTy->Params = demangleFunctionParameterList(MangledName);
- demangleThrowSpecification(MangledName);
+ FTy->IsNoexcept = demangleThrowSpecification(MangledName);
return FTy;
}
-Type *Demangler::demangleFunctionEncoding(StringView &MangledName) {
+FunctionSymbolNode *
+Demangler::demangleFunctionEncoding(StringView &MangledName) {
+ FuncClass ExtraFlags = FC_None;
+ if (MangledName.consumeFront("$$J0"))
+ ExtraFlags = FC_ExternC;
+
FuncClass FC = demangleFunctionClass(MangledName);
+ FC = FuncClass(ExtraFlags | FC);
+
+ FunctionSignatureNode *FSN = nullptr;
+ ThunkSignatureNode *TTN = nullptr;
+ if (FC & FC_StaticThisAdjust) {
+ TTN = Arena.alloc<ThunkSignatureNode>();
+ TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
+ } else if (FC & FC_VirtualThisAdjust) {
+ TTN = Arena.alloc<ThunkSignatureNode>();
+ if (FC & FC_VirtualThisAdjustEx) {
+ TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
+ TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
+ }
+ TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
+ TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
+ }
- bool HasThisQuals = !(FC & (Global | Static));
- FunctionType *FTy = demangleFunctionType(MangledName, HasThisQuals, false);
- FTy->FunctionClass = FC;
+ if (FC & FC_NoParameterList) {
+ // This is an extern "C" function whose full signature hasn't been mangled.
+ // This happens when we need to mangle a local symbol inside of an extern
+ // "C" function.
+ FSN = Arena.alloc<FunctionSignatureNode>();
+ } else {
+ bool HasThisQuals = !(FC & (FC_Global | FC_Static));
+ FSN = demangleFunctionType(MangledName, HasThisQuals);
+ }
+ if (TTN) {
+ *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
+ FSN = TTN;
+ }
+ FSN->FunctionClass = FC;
- return FTy;
+ FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
+ Symbol->Signature = FSN;
+ return Symbol;
}
-// Reads a primitive type.
-Type *Demangler::demangleBasicType(StringView &MangledName) {
- Type *Ty = Arena.alloc<Type>();
+CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
+ assert(MangledName.startsWith('?'));
+ MangledName.popFront();
- if (MangledName.consumeFront("$$T")) {
- Ty->Prim = PrimTy::Nullptr;
- return Ty;
- }
+ CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
+ CTN->Identifier = demangleUnqualifiedTypeName(MangledName, true);
+ if (!MangledName.consumeFront('@'))
+ Error = true;
+ if (Error)
+ return nullptr;
+ return CTN;
+}
+
+// Reads a primitive type.
+PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
+ if (MangledName.consumeFront("$$T"))
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
switch (MangledName.popFront()) {
case 'X':
- Ty->Prim = PrimTy::Void;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
case 'D':
- Ty->Prim = PrimTy::Char;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
case 'C':
- Ty->Prim = PrimTy::Schar;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
case 'E':
- Ty->Prim = PrimTy::Uchar;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
case 'F':
- Ty->Prim = PrimTy::Short;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
case 'G':
- Ty->Prim = PrimTy::Ushort;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
case 'H':
- Ty->Prim = PrimTy::Int;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
case 'I':
- Ty->Prim = PrimTy::Uint;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
case 'J':
- Ty->Prim = PrimTy::Long;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
case 'K':
- Ty->Prim = PrimTy::Ulong;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
case 'M':
- Ty->Prim = PrimTy::Float;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
case 'N':
- Ty->Prim = PrimTy::Double;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
case 'O':
- Ty->Prim = PrimTy::Ldouble;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
case '_': {
if (MangledName.empty()) {
Error = true;
@@ -1717,98 +1813,63 @@ Type *Demangler::demangleBasicType(StringView &MangledName) {
}
switch (MangledName.popFront()) {
case 'N':
- Ty->Prim = PrimTy::Bool;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
case 'J':
- Ty->Prim = PrimTy::Int64;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
case 'K':
- Ty->Prim = PrimTy::Uint64;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
case 'W':
- Ty->Prim = PrimTy::Wchar;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
case 'S':
- Ty->Prim = PrimTy::Char16;
- break;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
case 'U':
- Ty->Prim = PrimTy::Char32;
- break;
- default:
- Error = true;
- return nullptr;
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
}
break;
}
- default:
- Error = true;
- return nullptr;
}
- return Ty;
+ Error = true;
+ return nullptr;
}
-UdtType *Demangler::demangleClassType(StringView &MangledName) {
- UdtType *UTy = Arena.alloc<UdtType>();
+TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
+ TagTypeNode *TT = nullptr;
switch (MangledName.popFront()) {
case 'T':
- UTy->Prim = PrimTy::Union;
+ TT = Arena.alloc<TagTypeNode>(TagKind::Union);
break;
case 'U':
- UTy->Prim = PrimTy::Struct;
+ TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
break;
case 'V':
- UTy->Prim = PrimTy::Class;
+ TT = Arena.alloc<TagTypeNode>(TagKind::Class);
break;
case 'W':
if (MangledName.popFront() != '4') {
Error = true;
return nullptr;
}
- UTy->Prim = PrimTy::Enum;
+ TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
break;
default:
assert(false);
}
- UTy->UdtName = demangleFullyQualifiedTypeName(MangledName);
- return UTy;
-}
-
-static std::pair<Qualifiers, PointerAffinity>
-demanglePointerCVQualifiers(StringView &MangledName) {
- if (MangledName.consumeFront("$$Q"))
- return std::make_pair(Q_None, PointerAffinity::RValueReference);
-
- switch (MangledName.popFront()) {
- case 'A':
- return std::make_pair(Q_None, PointerAffinity::Reference);
- case 'P':
- return std::make_pair(Q_None, PointerAffinity::Pointer);
- case 'Q':
- return std::make_pair(Q_Const, PointerAffinity::Pointer);
- case 'R':
- return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
- case 'S':
- return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
- PointerAffinity::Pointer);
- default:
- assert(false && "Ty is not a pointer type!");
- }
- return std::make_pair(Q_None, PointerAffinity::Pointer);
+ TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
+ return TT;
}
// <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
// # the E is required for 64-bit non-static pointers
-PointerType *Demangler::demanglePointerType(StringView &MangledName) {
- PointerType *Pointer = Arena.alloc<PointerType>();
+PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) {
+ PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
std::tie(Pointer->Quals, Pointer->Affinity) =
demanglePointerCVQualifiers(MangledName);
- Pointer->Prim = PrimTy::Ptr;
if (MangledName.consumeFront("6")) {
- Pointer->Pointee = demangleFunctionType(MangledName, false, true);
+ Pointer->Pointee = demangleFunctionType(MangledName, false);
return Pointer;
}
@@ -1819,27 +1880,25 @@ PointerType *Demangler::demanglePointerType(StringView &MangledName) {
return Pointer;
}
-MemberPointerType *
-Demangler::demangleMemberPointerType(StringView &MangledName) {
- MemberPointerType *Pointer = Arena.alloc<MemberPointerType>();
- Pointer->Prim = PrimTy::MemberPtr;
+PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
+ PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
- PointerAffinity Affinity;
- std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName);
- assert(Affinity == PointerAffinity::Pointer);
+ std::tie(Pointer->Quals, Pointer->Affinity) =
+ demanglePointerCVQualifiers(MangledName);
+ assert(Pointer->Affinity == PointerAffinity::Pointer);
Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
if (MangledName.consumeFront("8")) {
- Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
- Pointer->Pointee = demangleFunctionType(MangledName, true, true);
+ Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
+ Pointer->Pointee = demangleFunctionType(MangledName, true);
} else {
Qualifiers PointeeQuals = Q_None;
bool IsMember = false;
std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
assert(IsMember);
- Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
+ Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
Pointer->Pointee->Quals = PointeeQuals;
@@ -1860,77 +1919,94 @@ Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
return Quals;
}
-ArrayType *Demangler::demangleArrayType(StringView &MangledName) {
+ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
assert(MangledName.front() == 'Y');
MangledName.popFront();
- int Dimension = demangleNumber(MangledName);
- if (Dimension <= 0) {
+ uint64_t Rank = 0;
+ bool IsNegative = false;
+ std::tie(Rank, IsNegative) = demangleNumber(MangledName);
+ if (IsNegative || Rank == 0) {
Error = true;
return nullptr;
}
- ArrayType *ATy = Arena.alloc<ArrayType>();
- ArrayType *Dim = ATy;
- for (int I = 0; I < Dimension; ++I) {
- Dim->Prim = PrimTy::Array;
- Dim->ArrayDimension = demangleNumber(MangledName);
- Dim->NextDimension = Arena.alloc<ArrayType>();
- Dim = Dim->NextDimension;
+ ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
+ NodeList *Head = Arena.alloc<NodeList>();
+ NodeList *Tail = Head;
+
+ for (uint64_t I = 0; I < Rank; ++I) {
+ uint64_t D = 0;
+ std::tie(D, IsNegative) = demangleNumber(MangledName);
+ if (IsNegative) {
+ Error = true;
+ return nullptr;
+ }
+ Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
+ if (I + 1 < Rank) {
+ Tail->Next = Arena.alloc<NodeList>();
+ Tail = Tail->Next;
+ }
}
+ ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
if (MangledName.consumeFront("$$C")) {
- if (MangledName.consumeFront("B"))
- ATy->Quals = Q_Const;
- else if (MangledName.consumeFront("C") || MangledName.consumeFront("D"))
- ATy->Quals = Qualifiers(Q_Const | Q_Volatile);
- else if (!MangledName.consumeFront("A"))
+ bool IsMember = false;
+ std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
+ if (IsMember) {
Error = true;
+ return nullptr;
+ }
}
ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
- Dim->ElementType = ATy->ElementType;
return ATy;
}
// Reads a function or a template parameters.
-FunctionParams
+NodeArrayNode *
Demangler::demangleFunctionParameterList(StringView &MangledName) {
// Empty parameter list.
if (MangledName.consumeFront('X'))
return {};
- FunctionParams *Head;
- FunctionParams **Current = &Head;
+ NodeList *Head = Arena.alloc<NodeList>();
+ NodeList **Current = &Head;
+ size_t Count = 0;
while (!Error && !MangledName.startsWith('@') &&
!MangledName.startsWith('Z')) {
+ ++Count;
if (startsWithDigit(MangledName)) {
size_t N = MangledName[0] - '0';
- if (N >= FunctionParamBackRefCount) {
+ if (N >= Backrefs.FunctionParamCount) {
Error = true;
return {};
}
MangledName = MangledName.dropFront();
- *Current = Arena.alloc<FunctionParams>();
- (*Current)->Current = FunctionParamBackRefs[N]->clone(Arena);
+ *Current = Arena.alloc<NodeList>();
+ (*Current)->N = Backrefs.FunctionParams[N];
Current = &(*Current)->Next;
continue;
}
size_t OldSize = MangledName.size();
- *Current = Arena.alloc<FunctionParams>();
- (*Current)->Current = demangleType(MangledName, QualifierMangleMode::Drop);
+ *Current = Arena.alloc<NodeList>();
+ TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
+ if (!TN || Error)
+ return nullptr;
+
+ (*Current)->N = TN;
size_t CharsConsumed = OldSize - MangledName.size();
assert(CharsConsumed != 0);
// Single-letter types are ignored for backreferences because memorizing
// them doesn't save anything.
- if (FunctionParamBackRefCount <= 9 && CharsConsumed > 1)
- FunctionParamBackRefs[FunctionParamBackRefCount++] = (*Current)->Current;
+ if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
+ Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
Current = &(*Current)->Next;
}
@@ -1938,99 +2014,206 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
if (Error)
return {};
+ NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
// A non-empty parameter list is terminated by either 'Z' (variadic) parameter
// list or '@' (non variadic). Careful not to consume "@Z", as in that case
// the following Z could be a throw specifier.
if (MangledName.consumeFront('@'))
- return *Head;
+ return NA;
if (MangledName.consumeFront('Z')) {
- Head->IsVariadic = true;
- return *Head;
+ // This is a variadic parameter list. We probably need a variadic node to
+ // append to the end.
+ return NA;
}
Error = true;
return {};
}
-TemplateParams *
+NodeArrayNode *
Demangler::demangleTemplateParameterList(StringView &MangledName) {
- TemplateParams *Head;
- TemplateParams **Current = &Head;
- while (!Error && !MangledName.startsWith('@')) {
- // Template parameter lists don't participate in back-referencing.
- *Current = Arena.alloc<TemplateParams>();
+ NodeList *Head;
+ NodeList **Current = &Head;
+ size_t Count = 0;
- // Empty parameter pack.
+ while (!Error && !MangledName.startsWith('@')) {
if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
- MangledName.consumeFront("$$$V")) {
- if (!MangledName.startsWith('@'))
- Error = true;
+ MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
+ // parameter pack separator
continue;
}
+ ++Count;
+
+ // Template parameter lists don't participate in back-referencing.
+ *Current = Arena.alloc<NodeList>();
+
+ NodeList &TP = **Current;
+
+ TemplateParameterReferenceNode *TPRN = nullptr;
if (MangledName.consumeFront("$$Y")) {
- (*Current)->IsTemplateTemplate = true;
- (*Current)->IsAliasTemplate = true;
- (*Current)->ParamName = demangleFullyQualifiedTypeName(MangledName);
- } else if (MangledName.consumeFront("$1?")) {
- (*Current)->ParamName = demangleFullyQualifiedSymbolName(MangledName);
- (*Current)->ParamType = demangleFunctionEncoding(MangledName);
+ // Template alias
+ TP.N = demangleFullyQualifiedTypeName(MangledName);
+ } else if (MangledName.consumeFront("$$B")) {
+ // Array
+ TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
+ } else if (MangledName.consumeFront("$$C")) {
+ // Type has qualifiers.
+ TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
+ } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") ||
+ MangledName.startsWith("$I") || MangledName.startsWith("$J")) {
+ // Pointer to member
+ TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
+ TPRN->IsMemberPointer = true;
+
+ MangledName = MangledName.dropFront();
+ // 1 - single inheritance <name>
+ // H - multiple inheritance <name> <number>
+ // I - virtual inheritance <name> <number> <number> <number>
+ // J - unspecified inheritance <name> <number> <number> <number>
+ char InheritanceSpecifier = MangledName.popFront();
+ SymbolNode *S = nullptr;
+ if (MangledName.startsWith('?')) {
+ S = parse(MangledName);
+ memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
+ }
+
+ switch (InheritanceSpecifier) {
+ case 'J':
+ TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+ demangleSigned(MangledName);
+ LLVM_FALLTHROUGH;
+ case 'I':
+ TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+ demangleSigned(MangledName);
+ LLVM_FALLTHROUGH;
+ case 'H':
+ TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+ demangleSigned(MangledName);
+ LLVM_FALLTHROUGH;
+ case '1':
+ break;
+ default:
+ Error = true;
+ break;
+ }
+ TPRN->Affinity = PointerAffinity::Pointer;
+ TPRN->Symbol = S;
+ } else if (MangledName.startsWith("$E?")) {
+ MangledName.consumeFront("$E");
+ // Reference to symbol
+ TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
+ TPRN->Symbol = parse(MangledName);
+ TPRN->Affinity = PointerAffinity::Reference;
+ } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) {
+ TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
+
+ // Data member pointer.
+ MangledName = MangledName.dropFront();
+ char InheritanceSpecifier = MangledName.popFront();
+
+ switch (InheritanceSpecifier) {
+ case 'G':
+ TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+ demangleSigned(MangledName);
+ LLVM_FALLTHROUGH;
+ case 'F':
+ TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+ demangleSigned(MangledName);
+ TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+ demangleSigned(MangledName);
+ LLVM_FALLTHROUGH;
+ case '0':
+ break;
+ default:
+ Error = true;
+ break;
+ }
+ TPRN->IsMemberPointer = true;
+
+ } else if (MangledName.consumeFront("$0")) {
+ // Integral non-type template parameter
+ bool IsNegative = false;
+ uint64_t Value = 0;
+ std::tie(Value, IsNegative) = demangleNumber(MangledName);
+
+ TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
} else {
- (*Current)->ParamType =
- demangleType(MangledName, QualifierMangleMode::Drop);
+ TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
}
+ if (Error)
+ return nullptr;
- Current = &(*Current)->Next;
+ Current = &TP.Next;
}
if (Error)
- return {};
+ return nullptr;
// Template parameter lists cannot be variadic, so it can only be terminated
// by @.
if (MangledName.consumeFront('@'))
- return Head;
+ return nodeListToNodeArray(Arena, Head, Count);
Error = true;
- return {};
+ return nullptr;
}
-void Demangler::output(const Symbol *S, OutputStream &OS) {
- // Converts an AST to a string.
- //
- // Converting an AST representing a C++ type to a string is tricky due
- // to the bad grammar of the C++ declaration inherited from C. You have
- // to construct a string from inside to outside. For example, if a type
- // X is a pointer to a function returning int, the order you create a
- // string becomes something like this:
- //
- // (1) X is a pointer: *X
- // (2) (1) is a function returning int: int (*X)()
- //
- // So you cannot construct a result just by appending strings to a result.
- //
- // To deal with this, we split the function into two. outputPre() writes
- // the "first half" of type declaration, and outputPost() writes the
- // "second half". For example, outputPre() writes a return type for a
- // function and outputPost() writes an parameter list.
- Type::outputPre(OS, *S->SymbolType);
- outputName(OS, S->SymbolName);
- Type::outputPost(OS, *S->SymbolType);
+void Demangler::dumpBackReferences() {
+ std::printf("%d function parameter backreferences\n",
+ (int)Backrefs.FunctionParamCount);
+
+ // Create an output stream so we can render each type.
+ OutputStream OS;
+ if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ std::terminate();
+ for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
+ OS.setCurrentPosition(0);
+
+ TypeNode *T = Backrefs.FunctionParams[I];
+ T->output(OS, OF_Default);
+
+ std::printf(" [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(),
+ OS.getBuffer());
+ }
+ std::free(OS.getBuffer());
+
+ if (Backrefs.FunctionParamCount > 0)
+ std::printf("\n");
+ std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
+ for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
+ std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
+ Backrefs.Names[I]->Name.begin());
+ }
+ if (Backrefs.NamesCount > 0)
+ std::printf("\n");
}
char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N,
- int *Status) {
+ int *Status, MSDemangleFlags Flags) {
+ int InternalStatus = demangle_success;
Demangler D;
+ OutputStream S;
+
StringView Name{MangledName};
- Symbol *S = D.parse(Name);
+ SymbolNode *AST = D.parse(Name);
+
+ if (Flags & MSDF_DumpBackrefs)
+ D.dumpBackReferences();
if (D.Error)
- *Status = llvm::demangle_invalid_mangled_name;
- else
- *Status = llvm::demangle_success;
+ InternalStatus = demangle_invalid_mangled_name;
+ else if (!initializeOutputStream(Buf, N, S, 1024))
+ InternalStatus = demangle_memory_alloc_failure;
+ else {
+ AST->output(S, OF_Default);
+ S += '\0';
+ if (N != nullptr)
+ *N = S.getCurrentPosition();
+ Buf = S.getBuffer();
+ }
- OutputStream OS = OutputStream::create(Buf, N, 1024);
- D.output(S, OS);
- OS << '\0';
- return OS.getBuffer();
+ if (Status)
+ *Status = InternalStatus;
+ return InternalStatus == demangle_success ? Buf : nullptr;
}
diff --git a/contrib/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/contrib/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
new file mode 100644
index 000000000000..622f8e75e351
--- /dev/null
+++ b/contrib/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -0,0 +1,635 @@
+//===- MicrosoftDemangle.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a demangler for MSVC-style mangled symbols.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/MicrosoftDemangleNodes.h"
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/Utility.h"
+#include <cctype>
+#include <string>
+
+using namespace llvm;
+using namespace ms_demangle;
+
+#define OUTPUT_ENUM_CLASS_VALUE(Enum, Value, Desc) \
+ case Enum::Value: \
+ OS << Desc; \
+ break;
+
+// Writes a space if the last token does not end with a punctuation.
+static void outputSpaceIfNecessary(OutputStream &OS) {
+ if (OS.empty())
+ return;
+
+ char C = OS.back();
+ if (std::isalnum(C) || C == '>')
+ OS << " ";
+}
+
+static bool outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
+ switch (Q) {
+ case Q_Const:
+ OS << "const";
+ return true;
+ case Q_Volatile:
+ OS << "volatile";
+ return true;
+ case Q_Restrict:
+ OS << "__restrict";
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static bool outputQualifierIfPresent(OutputStream &OS, Qualifiers Q,
+ Qualifiers Mask, bool NeedSpace) {
+ if (!(Q & Mask))
+ return NeedSpace;
+
+ if (NeedSpace)
+ OS << " ";
+
+ outputSingleQualifier(OS, Mask);
+ return true;
+}
+
+static void outputQualifiers(OutputStream &OS, Qualifiers Q, bool SpaceBefore,
+ bool SpaceAfter) {
+ if (Q == Q_None)
+ return;
+
+ size_t Pos1 = OS.getCurrentPosition();
+ SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Const, SpaceBefore);
+ SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Volatile, SpaceBefore);
+ SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Restrict, SpaceBefore);
+ size_t Pos2 = OS.getCurrentPosition();
+ if (SpaceAfter && Pos2 > Pos1)
+ OS << " ";
+}
+
+static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
+ outputSpaceIfNecessary(OS);
+
+ switch (CC) {
+ case CallingConv::Cdecl:
+ OS << "__cdecl";
+ break;
+ case CallingConv::Fastcall:
+ OS << "__fastcall";
+ break;
+ case CallingConv::Pascal:
+ OS << "__pascal";
+ break;
+ case CallingConv::Regcall:
+ OS << "__regcall";
+ break;
+ case CallingConv::Stdcall:
+ OS << "__stdcall";
+ break;
+ case CallingConv::Thiscall:
+ OS << "__thiscall";
+ break;
+ case CallingConv::Eabi:
+ OS << "__eabi";
+ break;
+ case CallingConv::Vectorcall:
+ OS << "__vectorcall";
+ break;
+ case CallingConv::Clrcall:
+ OS << "__clrcall";
+ break;
+ default:
+ break;
+ }
+}
+
+std::string Node::toString(OutputFlags Flags) const {
+ OutputStream OS;
+ initializeOutputStream(nullptr, nullptr, OS, 1024);
+ this->output(OS, Flags);
+ OS << '\0';
+ return {OS.getBuffer()};
+}
+
+void TypeNode::outputQuals(bool SpaceBefore, bool SpaceAfter) const {}
+
+void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+ switch (PrimKind) {
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Void, "void");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Bool, "bool");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char, "char");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Schar, "signed char");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uchar, "unsigned char");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char16, "char16_t");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char32, "char32_t");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Short, "short");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ushort, "unsigned short");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Int, "int");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uint, "unsigned int");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Long, "long");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ulong, "unsigned long");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Int64, "__int64");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uint64, "unsigned __int64");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Wchar, "wchar_t");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Float, "float");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Double, "double");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ldouble, "long double");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Nullptr, "std::nullptr_t");
+ }
+ outputQualifiers(OS, Quals, true, false);
+}
+
+void NodeArrayNode::output(OutputStream &OS, OutputFlags Flags) const {
+ output(OS, Flags, ", ");
+}
+
+void NodeArrayNode::output(OutputStream &OS, OutputFlags Flags,
+ StringView Separator) const {
+ if (Count == 0)
+ return;
+ if (Nodes[0])
+ Nodes[0]->output(OS, Flags);
+ for (size_t I = 1; I < Count; ++I) {
+ OS << Separator;
+ Nodes[I]->output(OS, Flags);
+ }
+}
+
+void EncodedStringLiteralNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ switch (Char) {
+ case CharKind::Wchar:
+ OS << "L\"";
+ break;
+ case CharKind::Char:
+ OS << "\"";
+ break;
+ case CharKind::Char16:
+ OS << "u\"";
+ break;
+ case CharKind::Char32:
+ OS << "U\"";
+ break;
+ }
+ OS << DecodedString << "\"";
+ if (IsTruncated)
+ OS << "...";
+}
+
+void IntegerLiteralNode::output(OutputStream &OS, OutputFlags Flags) const {
+ if (IsNegative)
+ OS << '-';
+ OS << Value;
+}
+
+void TemplateParameterReferenceNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ if (ThunkOffsetCount > 0)
+ OS << "{";
+ else if (Affinity == PointerAffinity::Pointer)
+ OS << "&";
+
+ if (Symbol) {
+ Symbol->output(OS, Flags);
+ if (ThunkOffsetCount > 0)
+ OS << ", ";
+ }
+
+ if (ThunkOffsetCount > 0)
+ OS << ThunkOffsets[0];
+ for (int I = 1; I < ThunkOffsetCount; ++I) {
+ OS << ", " << ThunkOffsets[I];
+ }
+ if (ThunkOffsetCount > 0)
+ OS << "}";
+}
+
+void IdentifierNode::outputTemplateParameters(OutputStream &OS,
+ OutputFlags Flags) const {
+ if (!TemplateParams)
+ return;
+ OS << "<";
+ TemplateParams->output(OS, Flags);
+ OS << ">";
+}
+
+void DynamicStructorIdentifierNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ if (IsDestructor)
+ OS << "`dynamic atexit destructor for ";
+ else
+ OS << "`dynamic initializer for ";
+
+ if (Variable) {
+ OS << "`";
+ Variable->output(OS, Flags);
+ OS << "''";
+ } else {
+ OS << "'";
+ Name->output(OS, Flags);
+ OS << "''";
+ }
+}
+
+void NamedIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
+ OS << Name;
+ outputTemplateParameters(OS, Flags);
+}
+
+void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ switch (Operator) {
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, New, "operator new");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Delete, "operator delete");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Assign, "operator=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, RightShift, "operator>>");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LeftShift, "operator<<");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LogicalNot, "operator!");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Equals, "operator==");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, NotEquals, "operator!=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ArraySubscript,
+ "operator[]");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Pointer, "operator->");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Increment, "operator++");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Decrement, "operator--");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Minus, "operator-");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Plus, "operator+");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Dereference, "operator*");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseAnd, "operator&");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, MemberPointer,
+ "operator->*");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Divide, "operator/");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Modulus, "operator%");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LessThan, "operator<");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LessThanEqual, "operator<=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, GreaterThan, "operator>");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, GreaterThanEqual,
+ "operator>=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Comma, "operator,");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Parens, "operator()");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseNot, "operator~");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseXor, "operator^");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseOr, "operator|");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LogicalAnd, "operator&&");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LogicalOr, "operator||");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, TimesEqual, "operator*=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, PlusEqual, "operator+=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, MinusEqual, "operator-=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, DivEqual, "operator/=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ModEqual, "operator%=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, RshEqual, "operator>>=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LshEqual, "operator<<=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseAndEqual,
+ "operator&=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseOrEqual,
+ "operator|=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseXorEqual,
+ "operator^=");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VbaseDtor, "`vbase dtor'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VecDelDtor,
+ "`vector deleting dtor'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, DefaultCtorClosure,
+ "`default ctor closure'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ScalarDelDtor,
+ "`scalar deleting dtor'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VecCtorIter,
+ "`vector ctor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VecDtorIter,
+ "`vector dtor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VecVbaseCtorIter,
+ "`vector vbase ctor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VdispMap,
+ "`virtual displacement map'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVecCtorIter,
+ "`eh vector ctor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVecDtorIter,
+ "`eh vector dtor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVecVbaseCtorIter,
+ "`eh vector vbase ctor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, CopyCtorClosure,
+ "`copy ctor closure'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LocalVftableCtorClosure,
+ "`local vftable ctor closure'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ArrayNew, "operator new[]");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ArrayDelete,
+ "operator delete[]");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ManVectorCtorIter,
+ "`managed vector ctor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ManVectorDtorIter,
+ "`managed vector dtor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVectorCopyCtorIter,
+ "`EH vector copy ctor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVectorVbaseCopyCtorIter,
+ "`EH vector vbase copy ctor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VectorCopyCtorIter,
+ "`vector copy ctor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VectorVbaseCopyCtorIter,
+ "`vector vbase copy constructor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ManVectorVbaseCopyCtorIter,
+ "`managed vector vbase copy constructor iterator'");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, CoAwait, "co_await");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Spaceship, "operator <=>");
+ case IntrinsicFunctionKind::MaxIntrinsic:
+ case IntrinsicFunctionKind::None:
+ break;
+ }
+ outputTemplateParameters(OS, Flags);
+}
+
+void LocalStaticGuardIdentifierNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ OS << "`local static guard'";
+ if (ScopeIndex > 0)
+ OS << "{" << ScopeIndex << "}";
+}
+
+void ConversionOperatorIdentifierNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ OS << "operator";
+ outputTemplateParameters(OS, Flags);
+ OS << " ";
+ TargetType->output(OS, Flags);
+}
+
+void StructorIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
+ if (IsDestructor)
+ OS << "~";
+ Class->output(OS, Flags);
+ outputTemplateParameters(OS, Flags);
+}
+
+void LiteralOperatorIdentifierNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ OS << "operator \"\"" << Name;
+ outputTemplateParameters(OS, Flags);
+}
+
+void FunctionSignatureNode::outputPre(OutputStream &OS,
+ OutputFlags Flags) const {
+ if (FunctionClass & FC_Public)
+ OS << "public: ";
+ if (FunctionClass & FC_Protected)
+ OS << "protected: ";
+ if (FunctionClass & FC_Private)
+ OS << "private: ";
+
+ if (!(FunctionClass & FC_Global)) {
+ if (FunctionClass & FC_Static)
+ OS << "static ";
+ }
+ if (FunctionClass & FC_Virtual)
+ OS << "virtual ";
+
+ if (FunctionClass & FC_ExternC)
+ OS << "extern \"C\" ";
+
+ if (ReturnType) {
+ ReturnType->outputPre(OS, Flags);
+ OS << " ";
+ }
+
+ if (!(Flags & OF_NoCallingConvention))
+ outputCallingConvention(OS, CallConvention);
+}
+
+void FunctionSignatureNode::outputPost(OutputStream &OS,
+ OutputFlags Flags) const {
+ if (!(FunctionClass & FC_NoParameterList)) {
+ OS << "(";
+ if (Params)
+ Params->output(OS, Flags);
+ else
+ OS << "void";
+ OS << ")";
+ }
+
+ if (Quals & Q_Const)
+ OS << " const";
+ if (Quals & Q_Volatile)
+ OS << " volatile";
+ if (Quals & Q_Restrict)
+ OS << " __restrict";
+ if (Quals & Q_Unaligned)
+ OS << " __unaligned";
+
+ if (IsNoexcept)
+ OS << " noexcept";
+
+ if (RefQualifier == FunctionRefQualifier::Reference)
+ OS << " &";
+ else if (RefQualifier == FunctionRefQualifier::RValueReference)
+ OS << " &&";
+
+ if (ReturnType)
+ ReturnType->outputPost(OS, Flags);
+}
+
+void ThunkSignatureNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+ OS << "[thunk]: ";
+
+ FunctionSignatureNode::outputPre(OS, Flags);
+}
+
+void ThunkSignatureNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+ if (FunctionClass & FC_StaticThisAdjust) {
+ OS << "`adjustor{" << ThisAdjust.StaticOffset << "}'";
+ } else if (FunctionClass & FC_VirtualThisAdjust) {
+ if (FunctionClass & FC_VirtualThisAdjustEx) {
+ OS << "`vtordispex{" << ThisAdjust.VBPtrOffset << ", "
+ << ThisAdjust.VBOffsetOffset << ", " << ThisAdjust.VtordispOffset
+ << ", " << ThisAdjust.StaticOffset << "}'";
+ } else {
+ OS << "`vtordisp{" << ThisAdjust.VtordispOffset << ", "
+ << ThisAdjust.StaticOffset << "}'";
+ }
+ }
+
+ FunctionSignatureNode::outputPost(OS, Flags);
+}
+
+void PointerTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+ if (Pointee->kind() == NodeKind::FunctionSignature) {
+ // If this is a pointer to a function, don't output the calling convention.
+ // It needs to go inside the parentheses.
+ const FunctionSignatureNode *Sig =
+ static_cast<const FunctionSignatureNode *>(Pointee);
+ Sig->outputPre(OS, OF_NoCallingConvention);
+ } else
+ Pointee->outputPre(OS, Flags);
+
+ outputSpaceIfNecessary(OS);
+
+ if (Quals & Q_Unaligned)
+ OS << "__unaligned ";
+
+ if (Pointee->kind() == NodeKind::ArrayType) {
+ OS << "(";
+ } else if (Pointee->kind() == NodeKind::FunctionSignature) {
+ OS << "(";
+ const FunctionSignatureNode *Sig =
+ static_cast<const FunctionSignatureNode *>(Pointee);
+ outputCallingConvention(OS, Sig->CallConvention);
+ OS << " ";
+ }
+
+ if (ClassParent) {
+ ClassParent->output(OS, Flags);
+ OS << "::";
+ }
+
+ switch (Affinity) {
+ case PointerAffinity::Pointer:
+ OS << "*";
+ break;
+ case PointerAffinity::Reference:
+ OS << "&";
+ break;
+ case PointerAffinity::RValueReference:
+ OS << "&&";
+ break;
+ default:
+ assert(false);
+ }
+ outputQualifiers(OS, Quals, false, false);
+}
+
+void PointerTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+ if (Pointee->kind() == NodeKind::ArrayType ||
+ Pointee->kind() == NodeKind::FunctionSignature)
+ OS << ")";
+
+ Pointee->outputPost(OS, Flags);
+}
+
+void TagTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+ if (!(Flags & OF_NoTagSpecifier)) {
+ switch (Tag) {
+ OUTPUT_ENUM_CLASS_VALUE(TagKind, Class, "class");
+ OUTPUT_ENUM_CLASS_VALUE(TagKind, Struct, "struct");
+ OUTPUT_ENUM_CLASS_VALUE(TagKind, Union, "union");
+ OUTPUT_ENUM_CLASS_VALUE(TagKind, Enum, "enum");
+ }
+ OS << " ";
+ }
+ QualifiedName->output(OS, Flags);
+ outputQualifiers(OS, Quals, true, false);
+}
+
+void TagTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {}
+
+void ArrayTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+ ElementType->outputPre(OS, Flags);
+ outputQualifiers(OS, Quals, true, false);
+}
+
+void ArrayTypeNode::outputOneDimension(OutputStream &OS, OutputFlags Flags,
+ Node *N) const {
+ assert(N->kind() == NodeKind::IntegerLiteral);
+ IntegerLiteralNode *ILN = static_cast<IntegerLiteralNode *>(N);
+ if (ILN->Value != 0)
+ ILN->output(OS, Flags);
+}
+
+void ArrayTypeNode::outputDimensionsImpl(OutputStream &OS,
+ OutputFlags Flags) const {
+ if (Dimensions->Count == 0)
+ return;
+
+ outputOneDimension(OS, Flags, Dimensions->Nodes[0]);
+ for (size_t I = 1; I < Dimensions->Count; ++I) {
+ OS << "][";
+ outputOneDimension(OS, Flags, Dimensions->Nodes[I]);
+ }
+}
+
+void ArrayTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+ OS << "[";
+ outputDimensionsImpl(OS, Flags);
+ OS << "]";
+
+ ElementType->outputPost(OS, Flags);
+}
+
+void SymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+ Name->output(OS, Flags);
+}
+
+void FunctionSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+ Signature->outputPre(OS, Flags);
+ outputSpaceIfNecessary(OS);
+ Name->output(OS, Flags);
+ Signature->outputPost(OS, Flags);
+}
+
+void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+ switch (SC) {
+ case StorageClass::PrivateStatic:
+ OS << "private: static ";
+ break;
+ case StorageClass::PublicStatic:
+ OS << "public: static ";
+ break;
+ case StorageClass::ProtectedStatic:
+ OS << "protected: static ";
+ break;
+ default:
+ break;
+ }
+
+ if (Type) {
+ Type->outputPre(OS, Flags);
+ outputSpaceIfNecessary(OS);
+ }
+ Name->output(OS, Flags);
+ if (Type)
+ Type->outputPost(OS, Flags);
+}
+
+void CustomTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+ Identifier->output(OS, Flags);
+}
+void CustomTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {}
+
+void QualifiedNameNode::output(OutputStream &OS, OutputFlags Flags) const {
+ Components->output(OS, Flags, "::");
+}
+
+void RttiBaseClassDescriptorNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ OS << "`RTTI Base Class Descriptor at (";
+ OS << NVOffset << ", " << VBPtrOffset << ", " << VBTableOffset << ", "
+ << this->Flags;
+ OS << ")'";
+}
+
+void LocalStaticGuardVariableNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ Name->output(OS, Flags);
+}
+
+void VcallThunkIdentifierNode::output(OutputStream &OS,
+ OutputFlags Flags) const {
+ OS << "`vcall'{" << OffsetInVTable << ", {flat}}";
+}
+
+void SpecialTableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+ outputQualifiers(OS, Quals, false, true);
+ Name->output(OS, Flags);
+ if (TargetName) {
+ OS << "{for `";
+ TargetName->output(OS, Flags);
+ OS << "'}";
+ }
+ return;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
index fd4f0746f7f9..8204f5a90268 100644
--- a/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -76,8 +76,8 @@ struct RegisteredObjectInfo {
};
// Buffer for an in-memory object file in executable memory
-typedef llvm::DenseMap< const char*, RegisteredObjectInfo>
- RegisteredObjectBufferMap;
+typedef llvm::DenseMap<JITEventListener::ObjectKey, RegisteredObjectInfo>
+ RegisteredObjectBufferMap;
/// Global access point for the JIT debugging interface designed for use with a
/// singleton toolbox. Handles thread-safe registration and deregistration of
@@ -99,13 +99,13 @@ public:
/// Creates an entry in the JIT registry for the buffer @p Object,
/// which must contain an object file in executable memory with any
/// debug information for the debugger.
- void NotifyObjectEmitted(const ObjectFile &Object,
- const RuntimeDyld::LoadedObjectInfo &L) override;
+ void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) override;
/// Removes the internal registration of @p Object, and
/// frees associated resources.
/// Returns true if @p Object was found in ObjectBufferMap.
- void NotifyFreeingObject(const ObjectFile &Object) override;
+ void notifyFreeingObject(ObjectKey K) override;
private:
/// Deregister the debug info for the given object file from the debugger
@@ -147,11 +147,11 @@ GDBJITRegistrationListener::~GDBJITRegistrationListener() {
ObjectBufferMap.clear();
}
-void GDBJITRegistrationListener::NotifyObjectEmitted(
- const ObjectFile &Object,
- const RuntimeDyld::LoadedObjectInfo &L) {
+void GDBJITRegistrationListener::notifyObjectLoaded(
+ ObjectKey K, const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) {
- OwningBinary<ObjectFile> DebugObj = L.getObjectForDebug(Object);
+ OwningBinary<ObjectFile> DebugObj = L.getObjectForDebug(Obj);
// Bail out if debug objects aren't supported.
if (!DebugObj.getBinary())
@@ -160,11 +160,8 @@ void GDBJITRegistrationListener::NotifyObjectEmitted(
const char *Buffer = DebugObj.getBinary()->getMemoryBufferRef().getBufferStart();
size_t Size = DebugObj.getBinary()->getMemoryBufferRef().getBufferSize();
- const char *Key = Object.getMemoryBufferRef().getBufferStart();
-
- assert(Key && "Attempt to register a null object with a debugger.");
llvm::MutexGuard locked(*JITDebugLock);
- assert(ObjectBufferMap.find(Key) == ObjectBufferMap.end() &&
+ assert(ObjectBufferMap.find(K) == ObjectBufferMap.end() &&
"Second attempt to perform debug registration.");
jit_code_entry* JITCodeEntry = new jit_code_entry();
@@ -175,16 +172,15 @@ void GDBJITRegistrationListener::NotifyObjectEmitted(
JITCodeEntry->symfile_addr = Buffer;
JITCodeEntry->symfile_size = Size;
- ObjectBufferMap[Key] = RegisteredObjectInfo(Size, JITCodeEntry,
- std::move(DebugObj));
+ ObjectBufferMap[K] =
+ RegisteredObjectInfo(Size, JITCodeEntry, std::move(DebugObj));
NotifyDebugger(JITCodeEntry);
}
}
-void GDBJITRegistrationListener::NotifyFreeingObject(const ObjectFile& Object) {
- const char *Key = Object.getMemoryBufferRef().getBufferStart();
+void GDBJITRegistrationListener::notifyFreeingObject(ObjectKey K) {
llvm::MutexGuard locked(*JITDebugLock);
- RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(Key);
+ RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(K);
if (I != ObjectBufferMap.end()) {
deregisterObjectInternal(I);
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 211f5216811f..e9051c198506 100644
--- a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -47,7 +47,7 @@ class IntelJITEventListener : public JITEventListener {
typedef DenseMap<const void *, MethodAddressVector> ObjectMap;
ObjectMap LoadedObjectMap;
- std::map<const char*, OwningBinary<ObjectFile>> DebugObjects;
+ std::map<ObjectKey, OwningBinary<ObjectFile>> DebugObjects;
public:
IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) {
@@ -57,10 +57,10 @@ public:
~IntelJITEventListener() {
}
- void NotifyObjectEmitted(const ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &L) override;
+ void notifyObjectLoaded(ObjectKey Key, const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) override;
- void NotifyFreeingObject(const ObjectFile &Obj) override;
+ void notifyFreeingObject(ObjectKey Key) override;
};
static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress,
@@ -96,9 +96,9 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat(
return Result;
}
-void IntelJITEventListener::NotifyObjectEmitted(
- const ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &L) {
+void IntelJITEventListener::notifyObjectLoaded(
+ ObjectKey Key, const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) {
OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
const ObjectFile *DebugObj = DebugObjOwner.getBinary();
@@ -188,17 +188,17 @@ void IntelJITEventListener::NotifyObjectEmitted(
// registered function addresses for each loaded object. We will
// use the MethodIDs map to get the registered ID for each function.
LoadedObjectMap[ObjData] = Functions;
- DebugObjects[Obj.getData().data()] = std::move(DebugObjOwner);
+ DebugObjects[Key] = std::move(DebugObjOwner);
}
-void IntelJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+void IntelJITEventListener::notifyFreeingObject(ObjectKey Key) {
// This object may not have been registered with the listener. If it wasn't,
// bail out.
- if (DebugObjects.find(Obj.getData().data()) == DebugObjects.end())
+ if (DebugObjects.find(Key) == DebugObjects.end())
return;
// Get the address of the object image for use as a unique identifier
- const ObjectFile &DebugObj = *DebugObjects[Obj.getData().data()].getBinary();
+ const ObjectFile &DebugObj = *DebugObjects[Key].getBinary();
const void* ObjData = DebugObj.getData().data();
// Get the object's function list from LoadedObjectMap
@@ -223,7 +223,7 @@ void IntelJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
// Erase the object from LoadedObjectMap
LoadedObjectMap.erase(OI);
- DebugObjects.erase(Obj.getData().data());
+ DebugObjects.erase(Key);
}
} // anonymous namespace.
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 39cf6d4a32a3..98dca1102759 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1778,17 +1778,14 @@ void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
void Interpreter::visitInsertElementInst(InsertElementInst &I) {
ExecutionContext &SF = ECStack.back();
- Type *Ty = I.getType();
-
- if(!(Ty->isVectorTy()) )
- llvm_unreachable("Unhandled dest type for insertelement instruction");
+ VectorType *Ty = cast<VectorType>(I.getType());
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
GenericValue Dest;
- Type *TyContained = Ty->getContainedType(0);
+ Type *TyContained = Ty->getElementType();
const unsigned indx = unsigned(Src3.IntVal.getZExtValue());
Dest.AggregateVal = Src1.AggregateVal;
@@ -1814,9 +1811,7 @@ void Interpreter::visitInsertElementInst(InsertElementInst &I) {
void Interpreter::visitShuffleVectorInst(ShuffleVectorInst &I){
ExecutionContext &SF = ECStack.back();
- Type *Ty = I.getType();
- if(!(Ty->isVectorTy()))
- llvm_unreachable("Unhandled dest type for shufflevector instruction");
+ VectorType *Ty = cast<VectorType>(I.getType());
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
@@ -1827,7 +1822,7 @@ void Interpreter::visitShuffleVectorInst(ShuffleVectorInst &I){
// bytecode can't contain different types for src1 and src2 for a
// shufflevector instruction.
- Type *TyContained = Ty->getContainedType(0);
+ Type *TyContained = Ty->getElementType();
unsigned src1Size = (unsigned)Src1.AggregateVal.size();
unsigned src2Size = (unsigned)Src2.AggregateVal.size();
unsigned src3Size = (unsigned)Src3.AggregateVal.size();
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 64dca930722e..334fcacf8078 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -103,8 +103,9 @@ static ExFunc lookupFunction(const Function *F) {
// composite function name should be.
std::string ExtName = "lle_";
FunctionType *FT = F->getFunctionType();
- for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
- ExtName += getTypeID(FT->getContainedType(i));
+ ExtName += getTypeID(FT->getReturnType());
+ for (Type *T : FT->params())
+ ExtName += getTypeID(T);
ExtName += ("_" + F->getName()).str();
sys::ScopedLock Writer(*FunctionsLock);
@@ -227,7 +228,8 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
Type *RetTy = FTy->getReturnType();
ffi_type *rtype = ffiTypeFor(RetTy);
- if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
+ if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, args.data()) ==
+ FFI_OK) {
SmallVector<uint8_t, 128> ret;
if (RetTy->getTypeID() != Type::VoidTyID)
ret.resize(TD.getTypeStoreSize(RetTy));
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 2c663c2e1edf..ffc6707e1488 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -95,7 +95,7 @@ MCJIT::~MCJIT() {
for (auto &Obj : LoadedObjects)
if (Obj)
- NotifyFreeingObject(*Obj);
+ notifyFreeingObject(*Obj);
Archives.clear();
}
@@ -119,7 +119,7 @@ void MCJIT::addObjectFile(std::unique_ptr<object::ObjectFile> Obj) {
if (Dyld.hasError())
report_fatal_error(Dyld.getErrorString());
- NotifyObjectEmitted(*Obj, *L);
+ notifyObjectLoaded(*Obj, *L);
LoadedObjects.push_back(std::move(Obj));
}
@@ -216,7 +216,7 @@ void MCJIT::generateCodeForModule(Module *M) {
if (!LoadedObject) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(LoadedObject.takeError(), OS, "");
+ logAllUnhandledErrors(LoadedObject.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -226,7 +226,7 @@ void MCJIT::generateCodeForModule(Module *M) {
if (Dyld.hasError())
report_fatal_error(Dyld.getErrorString());
- NotifyObjectEmitted(*LoadedObject.get(), *L);
+ notifyObjectLoaded(*LoadedObject.get(), *L);
Buffers.push_back(std::move(ObjectToLoad));
LoadedObjects.push_back(std::move(*LoadedObject));
@@ -326,8 +326,9 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
return *AddrOrErr;
else
report_fatal_error(AddrOrErr.takeError());
- } else
+ } else if (auto Err = Sym.takeError())
report_fatal_error(Sym.takeError());
+ return 0;
}
JITSymbol MCJIT::findSymbol(const std::string &Name,
@@ -647,19 +648,23 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
}
}
-void MCJIT::NotifyObjectEmitted(const object::ObjectFile& Obj,
- const RuntimeDyld::LoadedObjectInfo &L) {
+void MCJIT::notifyObjectLoaded(const object::ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) {
+ uint64_t Key =
+ static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Obj.getData().data()));
MutexGuard locked(lock);
MemMgr->notifyObjectLoaded(this, Obj);
for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
- EventListeners[I]->NotifyObjectEmitted(Obj, L);
+ EventListeners[I]->notifyObjectLoaded(Key, Obj, L);
}
}
-void MCJIT::NotifyFreeingObject(const object::ObjectFile& Obj) {
+void MCJIT::notifyFreeingObject(const object::ObjectFile &Obj) {
+ uint64_t Key =
+ static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Obj.getData().data()));
MutexGuard locked(lock);
for (JITEventListener *L : EventListeners)
- L->NotifyFreeingObject(Obj);
+ L->notifyFreeingObject(Key);
}
JITSymbol
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index 943b14942a0f..1119e138720f 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -331,9 +331,9 @@ protected:
/// the future.
std::unique_ptr<MemoryBuffer> emitObject(Module *M);
- void NotifyObjectEmitted(const object::ObjectFile& Obj,
- const RuntimeDyld::LoadedObjectInfo &L);
- void NotifyFreeingObject(const object::ObjectFile& Obj);
+ void notifyObjectLoaded(const object::ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L);
+ void notifyFreeingObject(const object::ObjectFile &Obj);
JITSymbol findExistingSymbol(const std::string &Name);
Module *findModuleForSymbol(const std::string &Name, bool CheckFunctionsOnly);
diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 6f0825fb38da..21af6b585c41 100644
--- a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -40,7 +40,7 @@ class OProfileJITEventListener : public JITEventListener {
std::unique_ptr<OProfileWrapper> Wrapper;
void initialize();
- std::map<const char*, OwningBinary<ObjectFile>> DebugObjects;
+ std::map<ObjectKey, OwningBinary<ObjectFile>> DebugObjects;
public:
OProfileJITEventListener(std::unique_ptr<OProfileWrapper> LibraryWrapper)
@@ -50,10 +50,10 @@ public:
~OProfileJITEventListener();
- void NotifyObjectEmitted(const ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &L) override;
+ void notifyObjectLoaded(ObjectKey Key, const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) override;
- void NotifyFreeingObject(const ObjectFile &Obj) override;
+ void notifyFreeingObject(ObjectKey Key) override;
};
void OProfileJITEventListener::initialize() {
@@ -78,9 +78,9 @@ OProfileJITEventListener::~OProfileJITEventListener() {
}
}
-void OProfileJITEventListener::NotifyObjectEmitted(
- const ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &L) {
+void OProfileJITEventListener::notifyObjectLoaded(
+ ObjectKey Key, const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) {
if (!Wrapper->isAgentAvailable()) {
return;
}
@@ -137,18 +137,18 @@ void OProfileJITEventListener::NotifyObjectEmitted(
}
}
- DebugObjects[Obj.getData().data()] = std::move(DebugObjOwner);
+ DebugObjects[Key] = std::move(DebugObjOwner);
}
-void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+void OProfileJITEventListener::notifyFreeingObject(ObjectKey Key) {
if (Wrapper->isAgentAvailable()) {
// If there was no agent registered when the original object was loaded then
// we won't have created a debug object for it, so bail out.
- if (DebugObjects.find(Obj.getData().data()) == DebugObjects.end())
+ if (DebugObjects.find(Key) == DebugObjects.end())
return;
- const ObjectFile &DebugObj = *DebugObjects[Obj.getData().data()].getBinary();
+ const ObjectFile &DebugObj = *DebugObjects[Key].getBinary();
// Use symbol info to iterate functions in the object.
for (symbol_iterator I = DebugObj.symbol_begin(),
@@ -171,7 +171,7 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
}
}
- DebugObjects.erase(Obj.getData().data());
+ DebugObjects.erase(Key);
}
} // anonymous namespace.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index d42e7b05ba67..241eb3600da7 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -8,201 +8,86 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
using namespace llvm::orc;
-namespace {
+static ThreadSafeModule extractSubModule(ThreadSafeModule &TSM,
+ StringRef Suffix,
+ GVPredicate ShouldExtract) {
-template <typename MaterializerFtor>
-class LambdaValueMaterializer final : public ValueMaterializer {
-public:
- LambdaValueMaterializer(MaterializerFtor M) : M(std::move(M)) {}
-
- Value *materialize(Value *V) final { return M(V); }
-
-private:
- MaterializerFtor M;
-};
+ auto DeleteExtractedDefs = [](GlobalValue &GV) {
+ // Bump the linkage: this global will be provided by the external module.
+ GV.setLinkage(GlobalValue::ExternalLinkage);
-template <typename MaterializerFtor>
-LambdaValueMaterializer<MaterializerFtor>
-createLambdaValueMaterializer(MaterializerFtor M) {
- return LambdaValueMaterializer<MaterializerFtor>(std::move(M));
-}
-} // namespace
-
-static void extractAliases(MaterializationResponsibility &R, Module &M,
- MangleAndInterner &Mangle) {
- SymbolAliasMap Aliases;
-
- std::vector<GlobalAlias *> ModAliases;
- for (auto &A : M.aliases())
- ModAliases.push_back(&A);
-
- for (auto *A : ModAliases) {
- Constant *Aliasee = A->getAliasee();
- assert(A->hasName() && "Anonymous alias?");
- assert(Aliasee->hasName() && "Anonymous aliasee");
- std::string AliasName = A->getName();
-
- Aliases[Mangle(AliasName)] = SymbolAliasMapEntry(
- {Mangle(Aliasee->getName()), JITSymbolFlags::fromGlobalValue(*A)});
-
- if (isa<Function>(Aliasee)) {
- auto *F = cloneFunctionDecl(M, *cast<Function>(Aliasee));
- A->replaceAllUsesWith(F);
- A->eraseFromParent();
- F->setName(AliasName);
- } else if (isa<GlobalValue>(Aliasee)) {
- auto *G = cloneGlobalVariableDecl(M, *cast<GlobalVariable>(Aliasee));
- A->replaceAllUsesWith(G);
- A->eraseFromParent();
- G->setName(AliasName);
- }
- }
-
- R.replace(symbolAliases(std::move(Aliases)));
-}
-
-static std::unique_ptr<Module>
-extractAndClone(Module &M, LLVMContext &NewContext, StringRef Suffix,
- function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
- SmallVector<char, 1> ClonedModuleBuffer;
-
- {
- std::set<GlobalValue *> ClonedDefsInSrc;
- ValueToValueMapTy VMap;
- auto Tmp = CloneModule(M, VMap, [&](const GlobalValue *GV) {
- if (ShouldCloneDefinition(GV)) {
- ClonedDefsInSrc.insert(const_cast<GlobalValue *>(GV));
- return true;
- }
- return false;
- });
-
- for (auto *GV : ClonedDefsInSrc) {
- // Delete the definition and bump the linkage in the source module.
- if (isa<Function>(GV)) {
- auto &F = *cast<Function>(GV);
- F.deleteBody();
- F.setPersonalityFn(nullptr);
- } else if (isa<GlobalVariable>(GV)) {
- cast<GlobalVariable>(GV)->setInitializer(nullptr);
+ // Delete the definition in the source module.
+ if (isa<Function>(GV)) {
+ auto &F = cast<Function>(GV);
+ F.deleteBody();
+ F.setPersonalityFn(nullptr);
+ } else if (isa<GlobalVariable>(GV)) {
+ cast<GlobalVariable>(GV).setInitializer(nullptr);
+ } else if (isa<GlobalAlias>(GV)) {
+ // We need to turn deleted aliases into function or variable decls based
+ // on the type of their aliasee.
+ auto &A = cast<GlobalAlias>(GV);
+ Constant *Aliasee = A.getAliasee();
+ assert(A.hasName() && "Anonymous alias?");
+ assert(Aliasee->hasName() && "Anonymous aliasee");
+ std::string AliasName = A.getName();
+
+ if (isa<Function>(Aliasee)) {
+ auto *F = cloneFunctionDecl(*A.getParent(), *cast<Function>(Aliasee));
+ A.replaceAllUsesWith(F);
+ A.eraseFromParent();
+ F->setName(AliasName);
+ } else if (isa<GlobalVariable>(Aliasee)) {
+ auto *G = cloneGlobalVariableDecl(*A.getParent(),
+ *cast<GlobalVariable>(Aliasee));
+ A.replaceAllUsesWith(G);
+ A.eraseFromParent();
+ G->setName(AliasName);
} else
- llvm_unreachable("Unsupported global type");
+ llvm_unreachable("Alias to unsupported type");
+ } else
+ llvm_unreachable("Unsupported global type");
+ };
- GV->setLinkage(GlobalValue::ExternalLinkage);
- }
-
- BitcodeWriter BCWriter(ClonedModuleBuffer);
-
- BCWriter.writeModule(*Tmp);
- BCWriter.writeSymtab();
- BCWriter.writeStrtab();
- }
-
- MemoryBufferRef ClonedModuleBufferRef(
- StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()),
- "cloned module buffer");
+ auto NewTSMod = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs);
+ auto &M = *NewTSMod.getModule();
+ M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str());
- auto ClonedModule =
- cantFail(parseBitcodeFile(ClonedModuleBufferRef, NewContext));
- ClonedModule->setModuleIdentifier((M.getName() + Suffix).str());
- return ClonedModule;
-}
-
-static std::unique_ptr<Module> extractGlobals(Module &M,
- LLVMContext &NewContext) {
- return extractAndClone(M, NewContext, ".globals", [](const GlobalValue *GV) {
- return isa<GlobalVariable>(GV);
- });
+ return NewTSMod;
}
namespace llvm {
namespace orc {
-class ExtractingIRMaterializationUnit : public IRMaterializationUnit {
+class PartitioningIRMaterializationUnit : public IRMaterializationUnit {
public:
- ExtractingIRMaterializationUnit(ExecutionSession &ES,
- CompileOnDemandLayer2 &Parent,
- std::unique_ptr<Module> M)
- : IRMaterializationUnit(ES, std::move(M)), Parent(Parent) {}
-
- ExtractingIRMaterializationUnit(std::unique_ptr<Module> M,
- SymbolFlagsMap SymbolFlags,
- SymbolNameToDefinitionMap SymbolToDefinition,
- CompileOnDemandLayer2 &Parent)
- : IRMaterializationUnit(std::move(M), std::move(SymbolFlags),
+ PartitioningIRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM,
+ VModuleKey K, CompileOnDemandLayer &Parent)
+ : IRMaterializationUnit(ES, std::move(TSM), std::move(K)),
+ Parent(Parent) {}
+
+ PartitioningIRMaterializationUnit(
+ ThreadSafeModule TSM, SymbolFlagsMap SymbolFlags,
+ SymbolNameToDefinitionMap SymbolToDefinition,
+ CompileOnDemandLayer &Parent)
+ : IRMaterializationUnit(std::move(TSM), std::move(K),
+ std::move(SymbolFlags),
std::move(SymbolToDefinition)),
Parent(Parent) {}
private:
void materialize(MaterializationResponsibility R) override {
- // FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the
- // extracted module key, extracted module, and source module key
- // together. This could be used, for example, to provide a specific
- // memory manager instance to the linking layer.
-
- auto RequestedSymbols = R.getRequestedSymbols();
-
- // Extract the requested functions into a new module.
- std::unique_ptr<Module> ExtractedFunctionsModule;
- if (!RequestedSymbols.empty()) {
- std::string Suffix;
- std::set<const GlobalValue *> FunctionsToClone;
- for (auto &Name : RequestedSymbols) {
- auto I = SymbolToDefinition.find(Name);
- assert(I != SymbolToDefinition.end() && I->second != nullptr &&
- "Should have a non-null definition");
- FunctionsToClone.insert(I->second);
- Suffix += ".";
- Suffix += *Name;
- }
-
- std::lock_guard<std::mutex> Lock(SourceModuleMutex);
- ExtractedFunctionsModule =
- extractAndClone(*M, Parent.GetAvailableContext(), Suffix,
- [&](const GlobalValue *GV) -> bool {
- return FunctionsToClone.count(GV);
- });
- }
-
- // Build a new ExtractingIRMaterializationUnit to delegate the unrequested
- // symbols to.
- SymbolFlagsMap DelegatedSymbolFlags;
- IRMaterializationUnit::SymbolNameToDefinitionMap
- DelegatedSymbolToDefinition;
- for (auto &KV : SymbolToDefinition) {
- if (RequestedSymbols.count(KV.first))
- continue;
- DelegatedSymbolFlags[KV.first] =
- JITSymbolFlags::fromGlobalValue(*KV.second);
- DelegatedSymbolToDefinition[KV.first] = KV.second;
- }
-
- if (!DelegatedSymbolFlags.empty()) {
- assert(DelegatedSymbolFlags.size() ==
- DelegatedSymbolToDefinition.size() &&
- "SymbolFlags and SymbolToDefinition should have the same number "
- "of entries");
- R.replace(llvm::make_unique<ExtractingIRMaterializationUnit>(
- std::move(M), std::move(DelegatedSymbolFlags),
- std::move(DelegatedSymbolToDefinition), Parent));
- }
-
- if (ExtractedFunctionsModule)
- Parent.emitExtractedFunctionsModule(std::move(R),
- std::move(ExtractedFunctionsModule));
+ Parent.emitPartition(std::move(R), std::move(TSM),
+ std::move(SymbolToDefinition));
}
- void discard(const VSO &V, SymbolStringPtr Name) override {
+ void discard(const JITDylib &V, const SymbolStringPtr &Name) override {
// All original symbols were materialized by the CODLayer and should be
// final. The function bodies provided by M should never be overridden.
llvm_unreachable("Discard should never be called on an "
@@ -210,44 +95,98 @@ private:
}
mutable std::mutex SourceModuleMutex;
- CompileOnDemandLayer2 &Parent;
+ CompileOnDemandLayer &Parent;
};
-CompileOnDemandLayer2::CompileOnDemandLayer2(
- ExecutionSession &ES, IRLayer &BaseLayer, JITCompileCallbackManager &CCMgr,
- IndirectStubsManagerBuilder BuildIndirectStubsManager,
- GetAvailableContextFunction GetAvailableContext)
- : IRLayer(ES), BaseLayer(BaseLayer), CCMgr(CCMgr),
- BuildIndirectStubsManager(std::move(BuildIndirectStubsManager)),
- GetAvailableContext(std::move(GetAvailableContext)) {}
-
-Error CompileOnDemandLayer2::add(VSO &V, VModuleKey K,
- std::unique_ptr<Module> M) {
- return IRLayer::add(V, K, std::move(M));
+Optional<CompileOnDemandLayer::GlobalValueSet>
+CompileOnDemandLayer::compileRequested(GlobalValueSet Requested) {
+ return std::move(Requested);
}
-void CompileOnDemandLayer2::emit(MaterializationResponsibility R, VModuleKey K,
- std::unique_ptr<Module> M) {
+Optional<CompileOnDemandLayer::GlobalValueSet>
+CompileOnDemandLayer::compileWholeModule(GlobalValueSet Requested) {
+ return None;
+}
+
+CompileOnDemandLayer::CompileOnDemandLayer(
+ ExecutionSession &ES, IRLayer &BaseLayer, LazyCallThroughManager &LCTMgr,
+ IndirectStubsManagerBuilder BuildIndirectStubsManager)
+ : IRLayer(ES), BaseLayer(BaseLayer), LCTMgr(LCTMgr),
+ BuildIndirectStubsManager(std::move(BuildIndirectStubsManager)) {}
+
+void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) {
+ this->Partition = std::move(Partition);
+}
+
+void CompileOnDemandLayer::emit(MaterializationResponsibility R,
+ ThreadSafeModule TSM) {
+ assert(TSM.getModule() && "Null module");
+
auto &ES = getExecutionSession();
- assert(M && "M should not be null");
+ auto &M = *TSM.getModule();
+
+ // First, do some cleanup on the module:
+ cleanUpModule(M);
+
+ // Now sort the callables and non-callables, build re-exports and lodge the
+ // actual module with the implementation dylib.
+ auto &PDR = getPerDylibResources(R.getTargetJITDylib());
- for (auto &GV : M->global_values())
- if (GV.hasWeakLinkage())
- GV.setLinkage(GlobalValue::ExternalLinkage);
+ MangleAndInterner Mangle(ES, M.getDataLayout());
+ SymbolAliasMap NonCallables;
+ SymbolAliasMap Callables;
+ for (auto &GV : M.global_values()) {
+ if (GV.isDeclaration() || GV.hasLocalLinkage() || GV.hasAppendingLinkage())
+ continue;
- MangleAndInterner Mangle(ES, M->getDataLayout());
+ auto Name = Mangle(GV.getName());
+ auto Flags = JITSymbolFlags::fromGlobalValue(GV);
+ if (Flags.isCallable())
+ Callables[Name] = SymbolAliasMapEntry(Name, Flags);
+ else
+ NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
+ }
+
+ // Create a partitioning materialization unit and lodge it with the
+ // implementation dylib.
+ if (auto Err = PDR.getImplDylib().define(
+ llvm::make_unique<PartitioningIRMaterializationUnit>(
+ ES, std::move(TSM), R.getVModuleKey(), *this))) {
+ ES.reportError(std::move(Err));
+ R.failMaterialization();
+ return;
+ }
- extractAliases(R, *M, Mangle);
+ R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), true));
+ R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(),
+ std::move(Callables)));
+}
- auto GlobalsModule = extractGlobals(*M, GetAvailableContext());
+CompileOnDemandLayer::PerDylibResources &
+CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) {
+ auto I = DylibResources.find(&TargetD);
+ if (I == DylibResources.end()) {
+ auto &ImplD = getExecutionSession().createJITDylib(
+ TargetD.getName() + ".impl", false);
+ TargetD.withSearchOrderDo([&](const JITDylibSearchList &TargetSearchOrder) {
+ auto NewSearchOrder = TargetSearchOrder;
+ assert(!NewSearchOrder.empty() &&
+ NewSearchOrder.front().first == &TargetD &&
+ NewSearchOrder.front().second == true &&
+ "TargetD must be at the front of its own search order and match "
+ "non-exported symbol");
+ NewSearchOrder.insert(std::next(NewSearchOrder.begin()), {&ImplD, true});
+ ImplD.setSearchOrder(std::move(NewSearchOrder), false);
+ });
+ PerDylibResources PDR(ImplD, BuildIndirectStubsManager());
+ I = DylibResources.insert(std::make_pair(&TargetD, std::move(PDR))).first;
+ }
- // Delete the bodies of any available externally functions, rename the
- // rest, and build the compile callbacks.
- std::map<SymbolStringPtr, std::pair<JITTargetAddress, JITSymbolFlags>>
- StubCallbacksAndLinkages;
- auto &TargetVSO = R.getTargetVSO();
+ return I->second;
+}
- for (auto &F : M->functions()) {
+void CompileOnDemandLayer::cleanUpModule(Module &M) {
+ for (auto &F : M.functions()) {
if (F.isDeclaration())
continue;
@@ -256,87 +195,108 @@ void CompileOnDemandLayer2::emit(MaterializationResponsibility R, VModuleKey K,
F.setPersonalityFn(nullptr);
continue;
}
+ }
+}
- assert(F.hasName() && "Function should have a name");
- std::string StubUnmangledName = F.getName();
- F.setName(F.getName() + "$body");
- auto StubDecl = cloneFunctionDecl(*M, F);
- StubDecl->setName(StubUnmangledName);
- StubDecl->setPersonalityFn(nullptr);
- StubDecl->setLinkage(GlobalValue::ExternalLinkage);
- F.replaceAllUsesWith(StubDecl);
-
- auto StubName = Mangle(StubUnmangledName);
- auto BodyName = Mangle(F.getName());
- if (auto CallbackAddr = CCMgr.getCompileCallback(
- [BodyName, &TargetVSO, &ES]() -> JITTargetAddress {
- if (auto Sym = lookup({&TargetVSO}, BodyName))
- return Sym->getAddress();
- else {
- ES.reportError(Sym.takeError());
- return 0;
- }
- })) {
- auto Flags = JITSymbolFlags::fromGlobalValue(F);
- Flags &= ~JITSymbolFlags::Weak;
- StubCallbacksAndLinkages[std::move(StubName)] =
- std::make_pair(*CallbackAddr, Flags);
- } else {
- ES.reportError(CallbackAddr.takeError());
- R.failMaterialization();
- return;
- }
+void CompileOnDemandLayer::expandPartition(GlobalValueSet &Partition) {
+ // Expands the partition to ensure the following rules hold:
+ // (1) If any alias is in the partition, its aliasee is also in the partition.
+ // (2) If any aliasee is in the partition, its aliases are also in the
+ // partiton.
+ // (3) If any global variable is in the partition then all global variables
+ // are in the partition.
+ assert(!Partition.empty() && "Unexpected empty partition");
+
+ const Module &M = *(*Partition.begin())->getParent();
+ bool ContainsGlobalVariables = false;
+ std::vector<const GlobalValue *> GVsToAdd;
+
+ for (auto *GV : Partition)
+ if (isa<GlobalAlias>(GV))
+ GVsToAdd.push_back(
+ cast<GlobalValue>(cast<GlobalAlias>(GV)->getAliasee()));
+ else if (isa<GlobalVariable>(GV))
+ ContainsGlobalVariables = true;
+
+ for (auto &A : M.aliases())
+ if (Partition.count(cast<GlobalValue>(A.getAliasee())))
+ GVsToAdd.push_back(&A);
+
+ if (ContainsGlobalVariables)
+ for (auto &G : M.globals())
+ GVsToAdd.push_back(&G);
+
+ for (auto *GV : GVsToAdd)
+ Partition.insert(GV);
+}
+
+void CompileOnDemandLayer::emitPartition(
+ MaterializationResponsibility R, ThreadSafeModule TSM,
+ IRMaterializationUnit::SymbolNameToDefinitionMap Defs) {
+
+ // FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the
+ // extracted module key, extracted module, and source module key
+ // together. This could be used, for example, to provide a specific
+ // memory manager instance to the linking layer.
+
+ auto &ES = getExecutionSession();
+
+ GlobalValueSet RequestedGVs;
+ for (auto &Name : R.getRequestedSymbols()) {
+ assert(Defs.count(Name) && "No definition for symbol");
+ RequestedGVs.insert(Defs[Name]);
}
- // Build the stub inits map.
- IndirectStubsManager::StubInitsMap StubInits;
- for (auto &KV : StubCallbacksAndLinkages)
- StubInits[*KV.first] = KV.second;
+ auto GVsToExtract = Partition(RequestedGVs);
- // Build the function-body-extracting materialization unit.
- if (auto Err = R.getTargetVSO().define(
- llvm::make_unique<ExtractingIRMaterializationUnit>(ES, *this,
- std::move(M)))) {
- ES.reportError(std::move(Err));
- R.failMaterialization();
+ // Take a 'None' partition to mean the whole module (as opposed to an empty
+ // partition, which means "materialize nothing"). Emit the whole module
+ // unmodified to the base layer.
+ if (GVsToExtract == None) {
+ Defs.clear();
+ BaseLayer.emit(std::move(R), std::move(TSM));
return;
}
- // Build the stubs.
- // FIXME: Remove function bodies materialization unit if stub creation fails.
- auto &StubsMgr = getStubsManager(TargetVSO);
- if (auto Err = StubsMgr.createStubs(StubInits)) {
- ES.reportError(std::move(Err));
- R.failMaterialization();
+ // If the partition is empty, return the whole module to the symbol table.
+ if (GVsToExtract->empty()) {
+ R.replace(llvm::make_unique<PartitioningIRMaterializationUnit>(
+ std::move(TSM), R.getSymbols(), std::move(Defs), *this));
return;
}
- // Resolve and finalize stubs.
- SymbolMap ResolvedStubs;
- for (auto &KV : StubCallbacksAndLinkages) {
- if (auto Sym = StubsMgr.findStub(*KV.first, false))
- ResolvedStubs[KV.first] = Sym;
- else
- llvm_unreachable("Stub went missing");
+ // Ok -- we actually need to partition the symbols. Promote the symbol
+ // linkages/names.
+ // FIXME: We apply this once per partitioning. It's safe, but overkill.
+ {
+ auto PromotedGlobals = PromoteSymbols(*TSM.getModule());
+ if (!PromotedGlobals.empty()) {
+ MangleAndInterner Mangle(ES, TSM.getModule()->getDataLayout());
+ SymbolFlagsMap SymbolFlags;
+ for (auto &GV : PromotedGlobals)
+ SymbolFlags[Mangle(GV->getName())] =
+ JITSymbolFlags::fromGlobalValue(*GV);
+ if (auto Err = R.defineMaterializing(SymbolFlags)) {
+ ES.reportError(std::move(Err));
+ R.failMaterialization();
+ return;
+ }
+ }
}
- R.resolve(ResolvedStubs);
+ expandPartition(*GVsToExtract);
- BaseLayer.emit(std::move(R), std::move(K), std::move(GlobalsModule));
-}
+ // Extract the requested partiton (plus any necessary aliases) and
+ // put the rest back into the impl dylib.
+ auto ShouldExtract = [&](const GlobalValue &GV) -> bool {
+ return GVsToExtract->count(&GV);
+ };
-IndirectStubsManager &CompileOnDemandLayer2::getStubsManager(const VSO &V) {
- std::lock_guard<std::mutex> Lock(CODLayerMutex);
- StubManagersMap::iterator I = StubsMgrs.find(&V);
- if (I == StubsMgrs.end())
- I = StubsMgrs.insert(std::make_pair(&V, BuildIndirectStubsManager())).first;
- return *I->second;
-}
+ auto ExtractedTSM = extractSubModule(TSM, ".submodule", ShouldExtract);
+ R.replace(llvm::make_unique<PartitioningIRMaterializationUnit>(
+ ES, std::move(TSM), R.getVModuleKey(), *this));
-void CompileOnDemandLayer2::emitExtractedFunctionsModule(
- MaterializationResponsibility R, std::unique_ptr<Module> M) {
- auto K = getExecutionSession().allocateVModule();
- BaseLayer.emit(std::move(R), std::move(K), std::move(M));
+ BaseLayer.emit(std::move(R), std::move(ExtractedTSM));
}
} // end namespace orc
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 4325d57f73d0..73c0bcdf7d28 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -1,4 +1,4 @@
-//===----- Core.cpp - Core ORC APIs (MaterializationUnit, VSO, etc.) ------===//
+//===--- Core.cpp - Core ORC APIs (MaterializationUnit, JITDylib, etc.) ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,6 +11,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
@@ -18,98 +19,203 @@
#include <future>
#endif
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+
+namespace {
+
+#ifndef NDEBUG
+
+cl::opt<bool> PrintHidden("debug-orc-print-hidden", cl::init(false),
+ cl::desc("debug print hidden symbols defined by "
+ "materialization units"),
+ cl::Hidden);
+
+cl::opt<bool> PrintCallable("debug-orc-print-callable", cl::init(false),
+ cl::desc("debug print callable symbols defined by "
+ "materialization units"),
+ cl::Hidden);
+
+cl::opt<bool> PrintData("debug-orc-print-data", cl::init(false),
+ cl::desc("debug print data symbols defined by "
+ "materialization units"),
+ cl::Hidden);
+
+#endif // NDEBUG
+
+// SetPrinter predicate that prints every element.
+template <typename T> struct PrintAll {
+ bool operator()(const T &E) { return true; }
+};
+
+bool anyPrintSymbolOptionSet() {
+#ifndef NDEBUG
+ return PrintHidden || PrintCallable || PrintData;
+#else
+ return false;
+#endif // NDEBUG
+}
+
+bool flagsMatchCLOpts(const JITSymbolFlags &Flags) {
+#ifndef NDEBUG
+ // Bail out early if this is a hidden symbol and we're not printing hiddens.
+ if (!PrintHidden && !Flags.isExported())
+ return false;
+
+ // Return true if this is callable and we're printing callables.
+ if (PrintCallable && Flags.isCallable())
+ return true;
+
+ // Return true if this is data and we're printing data.
+ if (PrintData && !Flags.isCallable())
+ return true;
+
+ // otherwise return false.
+ return false;
+#else
+ return false;
+#endif // NDEBUG
+}
+
+// Prints a set of items, filtered by an user-supplied predicate.
+template <typename Set, typename Pred = PrintAll<typename Set::value_type>>
+class SetPrinter {
+public:
+ SetPrinter(const Set &S, Pred ShouldPrint = Pred())
+ : S(S), ShouldPrint(std::move(ShouldPrint)) {}
+
+ void printTo(llvm::raw_ostream &OS) const {
+ bool PrintComma = false;
+ OS << "{";
+ for (auto &E : S) {
+ if (ShouldPrint(E)) {
+ if (PrintComma)
+ OS << ',';
+ OS << ' ' << E;
+ PrintComma = true;
+ }
+ }
+ OS << " }";
+ }
+
+private:
+ const Set &S;
+ mutable Pred ShouldPrint;
+};
+
+template <typename Set, typename Pred>
+SetPrinter<Set, Pred> printSet(const Set &S, Pred P = Pred()) {
+ return SetPrinter<Set, Pred>(S, std::move(P));
+}
+
+// Render a SetPrinter by delegating to its printTo method.
+template <typename Set, typename Pred>
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const SetPrinter<Set, Pred> &Printer) {
+ Printer.printTo(OS);
+ return OS;
+}
+
+struct PrintSymbolFlagsMapElemsMatchingCLOpts {
+ bool operator()(const orc::SymbolFlagsMap::value_type &KV) {
+ return flagsMatchCLOpts(KV.second);
+ }
+};
+
+struct PrintSymbolMapElemsMatchingCLOpts {
+ bool operator()(const orc::SymbolMap::value_type &KV) {
+ return flagsMatchCLOpts(KV.second.getFlags());
+ }
+};
+
+} // end anonymous namespace
+
namespace llvm {
namespace orc {
+ SymbolStringPool::PoolMapEntry SymbolStringPtr::Tombstone(0);
+
char FailedToMaterialize::ID = 0;
char SymbolsNotFound::ID = 0;
+char SymbolsCouldNotBeRemoved::ID = 0;
RegisterDependenciesFunction NoDependenciesToRegister =
RegisterDependenciesFunction();
void MaterializationUnit::anchor() {}
+raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym) {
+ return OS << *Sym;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) {
+ return OS << printSet(Symbols, PrintAll<SymbolStringPtr>());
+}
+
raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) {
+ if (Flags.isCallable())
+ OS << "[Callable]";
+ else
+ OS << "[Data]";
if (Flags.isWeak())
- OS << 'W';
+ OS << "[Weak]";
else if (Flags.isCommon())
- OS << 'C';
- else
- OS << 'S';
+ OS << "[Common]";
- if (Flags.isExported())
- OS << 'E';
- else
- OS << 'H';
+ if (!Flags.isExported())
+ OS << "[Hidden]";
return OS;
}
raw_ostream &operator<<(raw_ostream &OS, const JITEvaluatedSymbol &Sym) {
- OS << format("0x%016x", Sym.getAddress()) << " " << Sym.getFlags();
- return OS;
+ return OS << format("0x%016" PRIx64, Sym.getAddress()) << " "
+ << Sym.getFlags();
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap::value_type &KV) {
+ return OS << "(\"" << KV.first << "\", " << KV.second << ")";
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolMap::value_type &KV) {
- OS << "\"" << *KV.first << "\": " << KV.second;
- return OS;
+ return OS << "(\"" << KV.first << "\": " << KV.second << ")";
}
-raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) {
- OS << "{";
- if (!Symbols.empty()) {
- OS << " \"" << **Symbols.begin() << "\"";
- for (auto &Sym : make_range(std::next(Symbols.begin()), Symbols.end()))
- OS << ", \"" << *Sym << "\"";
- }
- OS << " }";
- return OS;
+raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags) {
+ return OS << printSet(SymbolFlags, PrintSymbolFlagsMapElemsMatchingCLOpts());
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols) {
- OS << "{";
- if (!Symbols.empty()) {
- OS << " {" << *Symbols.begin() << "}";
- for (auto &Sym : make_range(std::next(Symbols.begin()), Symbols.end()))
- OS << ", {" << Sym << "}";
- }
- OS << " }";
- return OS;
+ return OS << printSet(Symbols, PrintSymbolMapElemsMatchingCLOpts());
}
-raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags) {
- OS << "{";
- if (!SymbolFlags.empty()) {
- OS << " {\"" << *SymbolFlags.begin()->first
- << "\": " << SymbolFlags.begin()->second << "}";
- for (auto &KV :
- make_range(std::next(SymbolFlags.begin()), SymbolFlags.end()))
- OS << ", {\"" << *KV.first << "\": " << KV.second << "}";
- }
- OS << " }";
- return OS;
+raw_ostream &operator<<(raw_ostream &OS,
+ const SymbolDependenceMap::value_type &KV) {
+ return OS << "(" << KV.first << ", " << KV.second << ")";
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps) {
- OS << "{";
- if (!Deps.empty()) {
- OS << " { " << Deps.begin()->first->getName() << ": "
- << Deps.begin()->second << " }";
- for (auto &KV : make_range(std::next(Deps.begin()), Deps.end()))
- OS << ", { " << KV.first->getName() << ": " << KV.second << " }";
- }
- OS << " }";
- return OS;
+ return OS << printSet(Deps, PrintAll<SymbolDependenceMap::value_type>());
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU) {
+ OS << "MU@" << &MU << " (\"" << MU.getName() << "\"";
+ if (anyPrintSymbolOptionSet())
+ OS << ", " << MU.getSymbols();
+ return OS << ")";
}
-raw_ostream &operator<<(raw_ostream &OS, const VSOList &VSOs) {
+raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs) {
OS << "[";
- if (!VSOs.empty()) {
- assert(VSOs.front() && "VSOList entries must not be null");
- OS << " " << VSOs.front()->getName();
- for (auto *V : make_range(std::next(VSOs.begin()), VSOs.end())) {
- assert(V && "VSOList entries must not be null");
- OS << ", " << V->getName();
+ if (!JDs.empty()) {
+ assert(JDs.front().first && "JITDylibList entries must not be null");
+ OS << " (\"" << JDs.front().first->getName() << "\", "
+ << (JDs.front().second ? "true" : "false") << ")";
+ for (auto &KV : make_range(std::next(JDs.begin()), JDs.end())) {
+ assert(KV.first && "JITDylibList entries must not be null");
+ OS << ", (\"" << KV.first->getName() << "\", "
+ << (KV.second ? "true" : "false") << ")";
}
}
OS << " ]";
@@ -142,359 +248,17 @@ void SymbolsNotFound::log(raw_ostream &OS) const {
OS << "Symbols not found: " << Symbols;
}
-void ExecutionSessionBase::legacyFailQuery(AsynchronousSymbolQuery &Q,
- Error Err) {
- assert(!!Err && "Error should be in failure state");
-
- bool SendErrorToQuery;
- runSessionLocked([&]() {
- Q.detach();
- SendErrorToQuery = Q.canStillFail();
- });
-
- if (SendErrorToQuery)
- Q.handleFailed(std::move(Err));
- else
- reportError(std::move(Err));
-}
-
-Expected<SymbolMap> ExecutionSessionBase::legacyLookup(
- ExecutionSessionBase &ES, LegacyAsyncLookupFunction AsyncLookup,
- SymbolNameSet Names, bool WaitUntilReady,
- RegisterDependenciesFunction RegisterDependencies) {
-#if LLVM_ENABLE_THREADS
- // In the threaded case we use promises to return the results.
- std::promise<SymbolMap> PromisedResult;
- std::mutex ErrMutex;
- Error ResolutionError = Error::success();
- std::promise<void> PromisedReady;
- Error ReadyError = Error::success();
- auto OnResolve = [&](Expected<SymbolMap> R) {
- if (R)
- PromisedResult.set_value(std::move(*R));
- else {
- {
- ErrorAsOutParameter _(&ResolutionError);
- std::lock_guard<std::mutex> Lock(ErrMutex);
- ResolutionError = R.takeError();
- }
- PromisedResult.set_value(SymbolMap());
- }
- };
-
- std::function<void(Error)> OnReady;
- if (WaitUntilReady) {
- OnReady = [&](Error Err) {
- if (Err) {
- ErrorAsOutParameter _(&ReadyError);
- std::lock_guard<std::mutex> Lock(ErrMutex);
- ReadyError = std::move(Err);
- }
- PromisedReady.set_value();
- };
- } else {
- OnReady = [&](Error Err) {
- if (Err)
- ES.reportError(std::move(Err));
- };
- }
-
-#else
- SymbolMap Result;
- Error ResolutionError = Error::success();
- Error ReadyError = Error::success();
-
- auto OnResolve = [&](Expected<SymbolMap> R) {
- ErrorAsOutParameter _(&ResolutionError);
- if (R)
- Result = std::move(*R);
- else
- ResolutionError = R.takeError();
- };
-
- std::function<void(Error)> OnReady;
- if (WaitUntilReady) {
- OnReady = [&](Error Err) {
- ErrorAsOutParameter _(&ReadyError);
- if (Err)
- ReadyError = std::move(Err);
- };
- } else {
- OnReady = [&](Error Err) {
- if (Err)
- ES.reportError(std::move(Err));
- };
- }
-#endif
-
- auto Query = std::make_shared<AsynchronousSymbolQuery>(
- Names, std::move(OnResolve), std::move(OnReady));
- // FIXME: This should be run session locked along with the registration code
- // and error reporting below.
- SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names));
-
- // If the query was lodged successfully then register the dependencies,
- // otherwise fail it with an error.
- if (UnresolvedSymbols.empty())
- RegisterDependencies(Query->QueryRegistrations);
- else {
- bool DeliverError = runSessionLocked([&]() {
- Query->detach();
- return Query->canStillFail();
- });
- auto Err = make_error<SymbolsNotFound>(std::move(UnresolvedSymbols));
- if (DeliverError)
- Query->handleFailed(std::move(Err));
- else
- ES.reportError(std::move(Err));
- }
-
-#if LLVM_ENABLE_THREADS
- auto ResultFuture = PromisedResult.get_future();
- auto Result = ResultFuture.get();
-
- {
- std::lock_guard<std::mutex> Lock(ErrMutex);
- if (ResolutionError) {
- // ReadyError will never be assigned. Consume the success value.
- cantFail(std::move(ReadyError));
- return std::move(ResolutionError);
- }
- }
-
- if (WaitUntilReady) {
- auto ReadyFuture = PromisedReady.get_future();
- ReadyFuture.get();
-
- {
- std::lock_guard<std::mutex> Lock(ErrMutex);
- if (ReadyError)
- return std::move(ReadyError);
- }
- } else
- cantFail(std::move(ReadyError));
-
- return std::move(Result);
-
-#else
- if (ResolutionError) {
- // ReadyError will never be assigned. Consume the success value.
- cantFail(std::move(ReadyError));
- return std::move(ResolutionError);
- }
-
- if (ReadyError)
- return std::move(ReadyError);
-
- return Result;
-#endif
-}
-
-void ExecutionSessionBase::lookup(
- const VSOList &VSOs, const SymbolNameSet &Symbols,
- SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
- RegisterDependenciesFunction RegisterDependencies) {
-
- // lookup can be re-entered recursively if running on a single thread. Run any
- // outstanding MUs in case this query depends on them, otherwise the main
- // thread will starve waiting for a result from an MU that it failed to run.
- runOutstandingMUs();
-
- auto Unresolved = std::move(Symbols);
- std::map<VSO *, MaterializationUnitList> MUsMap;
- auto Q = std::make_shared<AsynchronousSymbolQuery>(
- Symbols, std::move(OnResolve), std::move(OnReady));
- bool QueryIsFullyResolved = false;
- bool QueryIsFullyReady = false;
- bool QueryFailed = false;
-
- runSessionLocked([&]() {
- for (auto *V : VSOs) {
- assert(V && "VSOList entries must not be null");
- assert(!MUsMap.count(V) &&
- "VSOList should not contain duplicate entries");
- V->lodgeQuery(Q, Unresolved, MUsMap[V]);
- }
-
- if (Unresolved.empty()) {
- // Query lodged successfully.
-
- // Record whether this query is fully ready / resolved. We will use
- // this to call handleFullyResolved/handleFullyReady outside the session
- // lock.
- QueryIsFullyResolved = Q->isFullyResolved();
- QueryIsFullyReady = Q->isFullyReady();
-
- // Call the register dependencies function.
- if (RegisterDependencies && !Q->QueryRegistrations.empty())
- RegisterDependencies(Q->QueryRegistrations);
- } else {
- // Query failed due to unresolved symbols.
- QueryFailed = true;
-
- // Disconnect the query from its dependencies.
- Q->detach();
-
- // Replace the MUs.
- for (auto &KV : MUsMap)
- for (auto &MU : KV.second)
- KV.first->replace(std::move(MU));
- }
- });
-
- if (QueryFailed) {
- Q->handleFailed(make_error<SymbolsNotFound>(std::move(Unresolved)));
- return;
- } else {
- if (QueryIsFullyResolved)
- Q->handleFullyResolved();
- if (QueryIsFullyReady)
- Q->handleFullyReady();
- }
-
- // Move the MUs to the OutstandingMUs list, then materialize.
- {
- std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
-
- for (auto &KV : MUsMap)
- for (auto &MU : KV.second)
- OutstandingMUs.push_back(std::make_pair(KV.first, std::move(MU)));
- }
-
- runOutstandingMUs();
+SymbolsCouldNotBeRemoved::SymbolsCouldNotBeRemoved(SymbolNameSet Symbols)
+ : Symbols(std::move(Symbols)) {
+ assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
}
-Expected<SymbolMap>
-ExecutionSessionBase::lookup(const VSOList &VSOs, const SymbolNameSet &Symbols,
- RegisterDependenciesFunction RegisterDependencies,
- bool WaitUntilReady) {
-#if LLVM_ENABLE_THREADS
- // In the threaded case we use promises to return the results.
- std::promise<SymbolMap> PromisedResult;
- std::mutex ErrMutex;
- Error ResolutionError = Error::success();
- std::promise<void> PromisedReady;
- Error ReadyError = Error::success();
- auto OnResolve = [&](Expected<SymbolMap> R) {
- if (R)
- PromisedResult.set_value(std::move(*R));
- else {
- {
- ErrorAsOutParameter _(&ResolutionError);
- std::lock_guard<std::mutex> Lock(ErrMutex);
- ResolutionError = R.takeError();
- }
- PromisedResult.set_value(SymbolMap());
- }
- };
-
- std::function<void(Error)> OnReady;
- if (WaitUntilReady) {
- OnReady = [&](Error Err) {
- if (Err) {
- ErrorAsOutParameter _(&ReadyError);
- std::lock_guard<std::mutex> Lock(ErrMutex);
- ReadyError = std::move(Err);
- }
- PromisedReady.set_value();
- };
- } else {
- OnReady = [&](Error Err) {
- if (Err)
- reportError(std::move(Err));
- };
- }
-
-#else
- SymbolMap Result;
- Error ResolutionError = Error::success();
- Error ReadyError = Error::success();
-
- auto OnResolve = [&](Expected<SymbolMap> R) {
- ErrorAsOutParameter _(&ResolutionError);
- if (R)
- Result = std::move(*R);
- else
- ResolutionError = R.takeError();
- };
-
- std::function<void(Error)> OnReady;
- if (WaitUntilReady) {
- OnReady = [&](Error Err) {
- ErrorAsOutParameter _(&ReadyError);
- if (Err)
- ReadyError = std::move(Err);
- };
- } else {
- OnReady = [&](Error Err) {
- if (Err)
- reportError(std::move(Err));
- };
- }
-#endif
-
- // Perform the asynchronous lookup.
- lookup(VSOs, Symbols, OnResolve, OnReady, RegisterDependencies);
-
-#if LLVM_ENABLE_THREADS
- auto ResultFuture = PromisedResult.get_future();
- auto Result = ResultFuture.get();
-
- {
- std::lock_guard<std::mutex> Lock(ErrMutex);
- if (ResolutionError) {
- // ReadyError will never be assigned. Consume the success value.
- cantFail(std::move(ReadyError));
- return std::move(ResolutionError);
- }
- }
-
- if (WaitUntilReady) {
- auto ReadyFuture = PromisedReady.get_future();
- ReadyFuture.get();
-
- {
- std::lock_guard<std::mutex> Lock(ErrMutex);
- if (ReadyError)
- return std::move(ReadyError);
- }
- } else
- cantFail(std::move(ReadyError));
-
- return std::move(Result);
-
-#else
- if (ResolutionError) {
- // ReadyError will never be assigned. Consume the success value.
- cantFail(std::move(ReadyError));
- return std::move(ResolutionError);
- }
-
- if (ReadyError)
- return std::move(ReadyError);
-
- return Result;
-#endif
+std::error_code SymbolsCouldNotBeRemoved::convertToErrorCode() const {
+ return orcError(OrcErrorCode::UnknownORCError);
}
-void ExecutionSessionBase::runOutstandingMUs() {
- while (1) {
- std::pair<VSO *, std::unique_ptr<MaterializationUnit>> VSOAndMU;
-
- {
- std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
- if (!OutstandingMUs.empty()) {
- VSOAndMU = std::move(OutstandingMUs.back());
- OutstandingMUs.pop_back();
- }
- }
-
- if (VSOAndMU.first) {
- assert(VSOAndMU.second && "VSO, but no MU?");
- dispatchMaterialization(*VSOAndMU.first, std::move(VSOAndMU.second));
- } else
- break;
- }
+void SymbolsCouldNotBeRemoved::log(raw_ostream &OS) const {
+ OS << "Symbols could not be removed: " << Symbols;
}
AsynchronousSymbolQuery::AsynchronousSymbolQuery(
@@ -520,23 +284,45 @@ void AsynchronousSymbolQuery::resolve(const SymbolStringPtr &Name,
void AsynchronousSymbolQuery::handleFullyResolved() {
assert(NotYetResolvedCount == 0 && "Not fully resolved?");
- assert(NotifySymbolsResolved &&
- "NotifySymbolsResolved already called or error occurred");
- NotifySymbolsResolved(std::move(ResolvedSymbols));
+
+ if (!NotifySymbolsResolved) {
+ // handleFullyResolved may be called by handleFullyReady (see comments in
+ // that method), in which case this is a no-op, so bail out.
+ assert(!NotifySymbolsReady &&
+ "NotifySymbolsResolved already called or an error occurred");
+ return;
+ }
+
+ auto TmpNotifySymbolsResolved = std::move(NotifySymbolsResolved);
NotifySymbolsResolved = SymbolsResolvedCallback();
+ TmpNotifySymbolsResolved(std::move(ResolvedSymbols));
}
void AsynchronousSymbolQuery::notifySymbolReady() {
- assert(NotYetReadyCount != 0 && "All symbols already finalized");
+ assert(NotYetReadyCount != 0 && "All symbols already emitted");
--NotYetReadyCount;
}
void AsynchronousSymbolQuery::handleFullyReady() {
+ assert(NotifySymbolsReady &&
+ "NotifySymbolsReady already called or an error occurred");
+
+ auto TmpNotifySymbolsReady = std::move(NotifySymbolsReady);
+ NotifySymbolsReady = SymbolsReadyCallback();
+
+ if (NotYetResolvedCount == 0 && NotifySymbolsResolved) {
+ // The NotifyResolved callback of one query must have caused this query to
+ // become ready (i.e. there is still a handleFullyResolved callback waiting
+ // to be made back up the stack). Fold the handleFullyResolved call into
+ // this one before proceeding. This will cause the call further up the
+ // stack to become a no-op.
+ handleFullyResolved();
+ }
+
assert(QueryRegistrations.empty() &&
"Query is still registered with some symbols");
assert(!NotifySymbolsResolved && "Resolution not applied yet");
- NotifySymbolsReady(Error::success());
- NotifySymbolsReady = SymbolsReadyCallback();
+ TmpNotifySymbolsReady(Error::success());
}
bool AsynchronousSymbolQuery::canStillFail() {
@@ -557,17 +343,19 @@ void AsynchronousSymbolQuery::handleFailed(Error Err) {
NotifySymbolsReady = SymbolsReadyCallback();
}
-void AsynchronousSymbolQuery::addQueryDependence(VSO &V, SymbolStringPtr Name) {
- bool Added = QueryRegistrations[&V].insert(std::move(Name)).second;
+void AsynchronousSymbolQuery::addQueryDependence(JITDylib &JD,
+ SymbolStringPtr Name) {
+ bool Added = QueryRegistrations[&JD].insert(std::move(Name)).second;
(void)Added;
assert(Added && "Duplicate dependence notification?");
}
void AsynchronousSymbolQuery::removeQueryDependence(
- VSO &V, const SymbolStringPtr &Name) {
- auto QRI = QueryRegistrations.find(&V);
- assert(QRI != QueryRegistrations.end() && "No dependencies registered for V");
- assert(QRI->second.count(Name) && "No dependency on Name in V");
+ JITDylib &JD, const SymbolStringPtr &Name) {
+ auto QRI = QueryRegistrations.find(&JD);
+ assert(QRI != QueryRegistrations.end() &&
+ "No dependencies registered for JD");
+ assert(QRI->second.count(Name) && "No dependency on Name in JD");
QRI->second.erase(Name);
if (QRI->second.empty())
QueryRegistrations.erase(QRI);
@@ -583,8 +371,8 @@ void AsynchronousSymbolQuery::detach() {
}
MaterializationResponsibility::MaterializationResponsibility(
- VSO &V, SymbolFlagsMap SymbolFlags)
- : V(V), SymbolFlags(std::move(SymbolFlags)) {
+ JITDylib &JD, SymbolFlagsMap SymbolFlags, VModuleKey K)
+ : JD(JD), SymbolFlags(std::move(SymbolFlags)), K(std::move(K)) {
assert(!this->SymbolFlags.empty() && "Materializing nothing?");
#ifndef NDEBUG
@@ -598,11 +386,13 @@ MaterializationResponsibility::~MaterializationResponsibility() {
"All symbols should have been explicitly materialized or failed");
}
-SymbolNameSet MaterializationResponsibility::getRequestedSymbols() {
- return V.getRequestedSymbols(SymbolFlags);
+SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
+ return JD.getRequestedSymbols(SymbolFlags);
}
void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
+ LLVM_DEBUG(dbgs() << "In " << JD.getName() << " resolving " << Symbols
+ << "\n");
#ifndef NDEBUG
for (auto &KV : Symbols) {
auto I = SymbolFlags.find(KV.first);
@@ -619,17 +409,17 @@ void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
}
#endif
- V.resolve(Symbols);
+ JD.resolve(Symbols);
}
-void MaterializationResponsibility::finalize() {
+void MaterializationResponsibility::emit() {
#ifndef NDEBUG
for (auto &KV : SymbolFlags)
assert(!KV.second.isMaterializing() &&
- "Failed to resolve symbol before finalization");
+ "Failed to resolve symbol before emission");
#endif // NDEBUG
- V.finalize(SymbolFlags);
+ JD.emit(SymbolFlags);
SymbolFlags.clear();
}
@@ -637,8 +427,8 @@ Error MaterializationResponsibility::defineMaterializing(
const SymbolFlagsMap &NewSymbolFlags) {
// Add the given symbols to this responsibility object.
// It's ok if we hit a duplicate here: In that case the new version will be
- // discarded, and the VSO::defineMaterializing method will return a duplicate
- // symbol error.
+ // discarded, and the JITDylib::defineMaterializing method will return a
+ // duplicate symbol error.
for (auto &KV : NewSymbolFlags) {
auto I = SymbolFlags.insert(KV).first;
(void)I;
@@ -647,7 +437,7 @@ Error MaterializationResponsibility::defineMaterializing(
#endif
}
- return V.defineMaterializing(NewSymbolFlags);
+ return JD.defineMaterializing(NewSymbolFlags);
}
void MaterializationResponsibility::failMaterialization() {
@@ -656,7 +446,7 @@ void MaterializationResponsibility::failMaterialization() {
for (auto &KV : SymbolFlags)
FailedSymbols.insert(KV.first);
- V.notifyFailed(FailedSymbols);
+ JD.notifyFailed(FailedSymbols);
SymbolFlags.clear();
}
@@ -665,11 +455,21 @@ void MaterializationResponsibility::replace(
for (auto &KV : MU->getSymbols())
SymbolFlags.erase(KV.first);
- V.replace(std::move(MU));
+ LLVM_DEBUG(JD.getExecutionSession().runSessionLocked([&]() {
+ dbgs() << "In " << JD.getName() << " replacing symbols with " << *MU
+ << "\n";
+ }););
+
+ JD.replace(std::move(MU));
}
MaterializationResponsibility
-MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) {
+MaterializationResponsibility::delegate(const SymbolNameSet &Symbols,
+ VModuleKey NewKey) {
+
+ if (NewKey == VModuleKey())
+ NewKey = K;
+
SymbolFlagsMap DelegatedFlags;
for (auto &Name : Symbols) {
@@ -682,34 +482,40 @@ MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) {
SymbolFlags.erase(I);
}
- return MaterializationResponsibility(V, std::move(DelegatedFlags));
+ return MaterializationResponsibility(JD, std::move(DelegatedFlags),
+ std::move(NewKey));
}
void MaterializationResponsibility::addDependencies(
const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) {
assert(SymbolFlags.count(Name) &&
"Symbol not covered by this MaterializationResponsibility instance");
- V.addDependencies(Name, Dependencies);
+ JD.addDependencies(Name, Dependencies);
}
void MaterializationResponsibility::addDependenciesForAll(
const SymbolDependenceMap &Dependencies) {
for (auto &KV : SymbolFlags)
- V.addDependencies(KV.first, Dependencies);
+ JD.addDependencies(KV.first, Dependencies);
}
AbsoluteSymbolsMaterializationUnit::AbsoluteSymbolsMaterializationUnit(
- SymbolMap Symbols)
- : MaterializationUnit(extractFlags(Symbols)), Symbols(std::move(Symbols)) {}
+ SymbolMap Symbols, VModuleKey K)
+ : MaterializationUnit(extractFlags(Symbols), std::move(K)),
+ Symbols(std::move(Symbols)) {}
+
+StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
+ return "<Absolute Symbols>";
+}
void AbsoluteSymbolsMaterializationUnit::materialize(
MaterializationResponsibility R) {
R.resolve(Symbols);
- R.finalize();
+ R.emit();
}
-void AbsoluteSymbolsMaterializationUnit::discard(const VSO &V,
- SymbolStringPtr Name) {
+void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD,
+ const SymbolStringPtr &Name) {
assert(Symbols.count(Name) && "Symbol is not part of this MU");
Symbols.erase(Name);
}
@@ -723,19 +529,26 @@ AbsoluteSymbolsMaterializationUnit::extractFlags(const SymbolMap &Symbols) {
}
ReExportsMaterializationUnit::ReExportsMaterializationUnit(
- VSO *SourceVSO, SymbolAliasMap Aliases)
- : MaterializationUnit(extractFlags(Aliases)), SourceVSO(SourceVSO),
+ JITDylib *SourceJD, bool MatchNonExported, SymbolAliasMap Aliases,
+ VModuleKey K)
+ : MaterializationUnit(extractFlags(Aliases), std::move(K)),
+ SourceJD(SourceJD), MatchNonExported(MatchNonExported),
Aliases(std::move(Aliases)) {}
+StringRef ReExportsMaterializationUnit::getName() const {
+ return "<Reexports>";
+}
+
void ReExportsMaterializationUnit::materialize(
MaterializationResponsibility R) {
- auto &ES = R.getTargetVSO().getExecutionSession();
- VSO &TgtV = R.getTargetVSO();
- VSO &SrcV = SourceVSO ? *SourceVSO : TgtV;
+ auto &ES = R.getTargetJITDylib().getExecutionSession();
+ JITDylib &TgtJD = R.getTargetJITDylib();
+ JITDylib &SrcJD = SourceJD ? *SourceJD : TgtJD;
// Find the set of requested aliases and aliasees. Return any unrequested
- // aliases back to the VSO so as to not prematurely materialize any aliasees.
+ // aliases back to the JITDylib so as to not prematurely materialize any
+ // aliasees.
auto RequestedSymbols = R.getRequestedSymbols();
SymbolAliasMap RequestedAliases;
@@ -747,8 +560,8 @@ void ReExportsMaterializationUnit::materialize(
}
if (!Aliases.empty()) {
- if (SourceVSO)
- R.replace(reexports(*SourceVSO, std::move(Aliases)));
+ if (SourceJD)
+ R.replace(reexports(*SourceJD, std::move(Aliases), MatchNonExported));
else
R.replace(symbolAliases(std::move(Aliases)));
}
@@ -776,20 +589,22 @@ void ReExportsMaterializationUnit::materialize(
SymbolNameSet QuerySymbols;
SymbolAliasMap QueryAliases;
- for (auto I = RequestedAliases.begin(), E = RequestedAliases.end();
- I != E;) {
- auto Tmp = I++;
-
+ // Collect as many aliases as we can without including a chain.
+ for (auto &KV : RequestedAliases) {
// Chain detected. Skip this symbol for this round.
- if (&SrcV == &TgtV && (QueryAliases.count(Tmp->second.Aliasee) ||
- RequestedAliases.count(Tmp->second.Aliasee)))
+ if (&SrcJD == &TgtJD && (QueryAliases.count(KV.second.Aliasee) ||
+ RequestedAliases.count(KV.second.Aliasee)))
continue;
- ResponsibilitySymbols.insert(Tmp->first);
- QuerySymbols.insert(Tmp->second.Aliasee);
- QueryAliases[Tmp->first] = std::move(Tmp->second);
- RequestedAliases.erase(Tmp);
+ ResponsibilitySymbols.insert(KV.first);
+ QuerySymbols.insert(KV.second.Aliasee);
+ QueryAliases[KV.first] = std::move(KV.second);
}
+
+ // Remove the aliases collected this round from the RequestedAliases map.
+ for (auto &KV : QueryAliases)
+ RequestedAliases.erase(KV.first);
+
assert(!QuerySymbols.empty() && "Alias cycle detected!");
auto QueryInfo = std::make_shared<OnResolveInfo>(
@@ -806,21 +621,21 @@ void ReExportsMaterializationUnit::materialize(
QueryInfos.pop_back();
auto RegisterDependencies = [QueryInfo,
- &SrcV](const SymbolDependenceMap &Deps) {
+ &SrcJD](const SymbolDependenceMap &Deps) {
// If there were no materializing symbols, just bail out.
if (Deps.empty())
return;
- // Otherwise the only deps should be on SrcV.
- assert(Deps.size() == 1 && Deps.count(&SrcV) &&
+ // Otherwise the only deps should be on SrcJD.
+ assert(Deps.size() == 1 && Deps.count(&SrcJD) &&
"Unexpected dependencies for reexports");
- auto &SrcVDeps = Deps.find(&SrcV)->second;
+ auto &SrcJDDeps = Deps.find(&SrcJD)->second;
SymbolDependenceMap PerAliasDepsMap;
- auto &PerAliasDeps = PerAliasDepsMap[&SrcV];
+ auto &PerAliasDeps = PerAliasDepsMap[&SrcJD];
for (auto &KV : QueryInfo->Aliases)
- if (SrcVDeps.count(KV.second.Aliasee)) {
+ if (SrcJDDeps.count(KV.second.Aliasee)) {
PerAliasDeps = {KV.second.Aliasee};
QueryInfo->R.addDependencies(KV.first, PerAliasDepsMap);
}
@@ -836,9 +651,9 @@ void ReExportsMaterializationUnit::materialize(
(*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags);
}
QueryInfo->R.resolve(ResolutionMap);
- QueryInfo->R.finalize();
+ QueryInfo->R.emit();
} else {
- auto &ES = QueryInfo->R.getTargetVSO().getExecutionSession();
+ auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession();
ES.reportError(Result.takeError());
QueryInfo->R.failMaterialization();
}
@@ -846,12 +661,14 @@ void ReExportsMaterializationUnit::materialize(
auto OnReady = [&ES](Error Err) { ES.reportError(std::move(Err)); };
- ES.lookup({&SrcV}, QuerySymbols, std::move(OnResolve), std::move(OnReady),
+ ES.lookup(JITDylibSearchList({{&SrcJD, MatchNonExported}}), QuerySymbols,
+ std::move(OnResolve), std::move(OnReady),
std::move(RegisterDependencies));
}
}
-void ReExportsMaterializationUnit::discard(const VSO &V, SymbolStringPtr Name) {
+void ReExportsMaterializationUnit::discard(const JITDylib &JD,
+ const SymbolStringPtr &Name) {
assert(Aliases.count(Name) &&
"Symbol not covered by this MaterializationUnit");
Aliases.erase(Name);
@@ -867,8 +684,8 @@ ReExportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
}
Expected<SymbolAliasMap>
-buildSimpleReexportsAliasMap(VSO &SourceV, const SymbolNameSet &Symbols) {
- auto Flags = SourceV.lookupFlags(Symbols);
+buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) {
+ auto Flags = SourceJD.lookupFlags(Symbols);
if (Flags.size() != Symbols.size()) {
SymbolNameSet Unresolved = Symbols;
@@ -886,7 +703,33 @@ buildSimpleReexportsAliasMap(VSO &SourceV, const SymbolNameSet &Symbols) {
return Result;
}
-Error VSO::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
+ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD,
+ bool MatchNonExported,
+ SymbolPredicate Allow)
+ : SourceJD(SourceJD), MatchNonExported(MatchNonExported),
+ Allow(std::move(Allow)) {}
+
+SymbolNameSet ReexportsGenerator::operator()(JITDylib &JD,
+ const SymbolNameSet &Names) {
+ orc::SymbolNameSet Added;
+ orc::SymbolAliasMap AliasMap;
+
+ auto Flags = SourceJD.lookupFlags(Names);
+
+ for (auto &KV : Flags) {
+ if (Allow && !Allow(KV.first))
+ continue;
+ AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second);
+ Added.insert(KV.first);
+ }
+
+ if (!Added.empty())
+ cantFail(JD.define(reexports(SourceJD, AliasMap, MatchNonExported)));
+
+ return Added;
+}
+
+Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
return ES.runSessionLocked([&]() -> Error {
std::vector<SymbolMap::iterator> AddedSyms;
@@ -916,7 +759,7 @@ Error VSO::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
});
}
-void VSO::replace(std::unique_ptr<MaterializationUnit> MU) {
+void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
assert(MU != nullptr && "Can not replace with a null MaterializationUnit");
auto MustRunMU =
@@ -967,13 +810,14 @@ void VSO::replace(std::unique_ptr<MaterializationUnit> MU) {
ES.dispatchMaterialization(*this, std::move(MustRunMU));
}
-SymbolNameSet VSO::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) {
+SymbolNameSet
+JITDylib::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const {
return ES.runSessionLocked([&]() {
SymbolNameSet RequestedSymbols;
for (auto &KV : SymbolFlags) {
- assert(Symbols.count(KV.first) && "VSO does not cover this symbol?");
- assert(Symbols[KV.first].getFlags().isMaterializing() &&
+ assert(Symbols.count(KV.first) && "JITDylib does not cover this symbol?");
+ assert(Symbols.find(KV.first)->second.getFlags().isMaterializing() &&
"getRequestedSymbols can only be called for materializing "
"symbols");
auto I = MaterializingInfos.find(KV.first);
@@ -988,47 +832,47 @@ SymbolNameSet VSO::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) {
});
}
-void VSO::addDependencies(const SymbolStringPtr &Name,
- const SymbolDependenceMap &Dependencies) {
+void JITDylib::addDependencies(const SymbolStringPtr &Name,
+ const SymbolDependenceMap &Dependencies) {
assert(Symbols.count(Name) && "Name not in symbol table");
assert((Symbols[Name].getFlags().isLazy() ||
Symbols[Name].getFlags().isMaterializing()) &&
"Symbol is not lazy or materializing");
auto &MI = MaterializingInfos[Name];
- assert(!MI.IsFinalized && "Can not add dependencies to finalized symbol");
+ assert(!MI.IsEmitted && "Can not add dependencies to an emitted symbol");
for (auto &KV : Dependencies) {
- assert(KV.first && "Null VSO in dependency?");
- auto &OtherVSO = *KV.first;
- auto &DepsOnOtherVSO = MI.UnfinalizedDependencies[&OtherVSO];
+ assert(KV.first && "Null JITDylib in dependency?");
+ auto &OtherJITDylib = *KV.first;
+ auto &DepsOnOtherJITDylib = MI.UnemittedDependencies[&OtherJITDylib];
for (auto &OtherSymbol : KV.second) {
#ifndef NDEBUG
- // Assert that this symbol exists and has not been finalized already.
- auto SymI = OtherVSO.Symbols.find(OtherSymbol);
- assert(SymI != OtherVSO.Symbols.end() &&
+ // Assert that this symbol exists and has not been emitted already.
+ auto SymI = OtherJITDylib.Symbols.find(OtherSymbol);
+ assert(SymI != OtherJITDylib.Symbols.end() &&
(SymI->second.getFlags().isLazy() ||
SymI->second.getFlags().isMaterializing()) &&
- "Dependency on finalized symbol");
+ "Dependency on emitted symbol");
#endif
- auto &OtherMI = OtherVSO.MaterializingInfos[OtherSymbol];
+ auto &OtherMI = OtherJITDylib.MaterializingInfos[OtherSymbol];
- if (OtherMI.IsFinalized)
- transferFinalizedNodeDependencies(MI, Name, OtherMI);
- else if (&OtherVSO != this || OtherSymbol != Name) {
+ if (OtherMI.IsEmitted)
+ transferEmittedNodeDependencies(MI, Name, OtherMI);
+ else if (&OtherJITDylib != this || OtherSymbol != Name) {
OtherMI.Dependants[this].insert(Name);
- DepsOnOtherVSO.insert(OtherSymbol);
+ DepsOnOtherJITDylib.insert(OtherSymbol);
}
}
- if (DepsOnOtherVSO.empty())
- MI.UnfinalizedDependencies.erase(&OtherVSO);
+ if (DepsOnOtherJITDylib.empty())
+ MI.UnemittedDependencies.erase(&OtherJITDylib);
}
}
-void VSO::resolve(const SymbolMap &Resolved) {
+void JITDylib::resolve(const SymbolMap &Resolved) {
auto FullyResolvedQueries = ES.runSessionLocked([&, this]() {
AsynchronousSymbolQuerySet FullyResolvedQueries;
for (const auto &KV : Resolved) {
@@ -1074,11 +918,11 @@ void VSO::resolve(const SymbolMap &Resolved) {
}
}
-void VSO::finalize(const SymbolFlagsMap &Finalized) {
+void JITDylib::emit(const SymbolFlagsMap &Emitted) {
auto FullyReadyQueries = ES.runSessionLocked([&, this]() {
AsynchronousSymbolQuerySet ReadyQueries;
- for (const auto &KV : Finalized) {
+ for (const auto &KV : Emitted) {
const auto &Name = KV.first;
auto MII = MaterializingInfos.find(Name);
@@ -1087,59 +931,59 @@ void VSO::finalize(const SymbolFlagsMap &Finalized) {
auto &MI = MII->second;
- // For each dependant, transfer this node's unfinalized dependencies to
- // it. If the dependant node is fully finalized then notify any pending
- // queries.
+ // For each dependant, transfer this node's emitted dependencies to
+ // it. If the dependant node is ready (i.e. has no unemitted
+ // dependencies) then notify any pending queries.
for (auto &KV : MI.Dependants) {
- auto &DependantVSO = *KV.first;
+ auto &DependantJD = *KV.first;
for (auto &DependantName : KV.second) {
auto DependantMII =
- DependantVSO.MaterializingInfos.find(DependantName);
- assert(DependantMII != DependantVSO.MaterializingInfos.end() &&
+ DependantJD.MaterializingInfos.find(DependantName);
+ assert(DependantMII != DependantJD.MaterializingInfos.end() &&
"Dependant should have MaterializingInfo");
auto &DependantMI = DependantMII->second;
// Remove the dependant's dependency on this node.
- assert(DependantMI.UnfinalizedDependencies[this].count(Name) &&
+ assert(DependantMI.UnemittedDependencies[this].count(Name) &&
"Dependant does not count this symbol as a dependency?");
- DependantMI.UnfinalizedDependencies[this].erase(Name);
- if (DependantMI.UnfinalizedDependencies[this].empty())
- DependantMI.UnfinalizedDependencies.erase(this);
-
- // Transfer unfinalized dependencies from this node to the dependant.
- DependantVSO.transferFinalizedNodeDependencies(DependantMI,
- DependantName, MI);
-
- // If the dependant is finalized and this node was the last of its
- // unfinalized dependencies then notify any pending queries on the
- // dependant node.
- if (DependantMI.IsFinalized &&
- DependantMI.UnfinalizedDependencies.empty()) {
+ DependantMI.UnemittedDependencies[this].erase(Name);
+ if (DependantMI.UnemittedDependencies[this].empty())
+ DependantMI.UnemittedDependencies.erase(this);
+
+ // Transfer unemitted dependencies from this node to the dependant.
+ DependantJD.transferEmittedNodeDependencies(DependantMI,
+ DependantName, MI);
+
+ // If the dependant is emitted and this node was the last of its
+ // unemitted dependencies then the dependant node is now ready, so
+ // notify any pending queries on the dependant node.
+ if (DependantMI.IsEmitted &&
+ DependantMI.UnemittedDependencies.empty()) {
assert(DependantMI.Dependants.empty() &&
"Dependants should be empty by now");
for (auto &Q : DependantMI.PendingQueries) {
Q->notifySymbolReady();
if (Q->isFullyReady())
ReadyQueries.insert(Q);
- Q->removeQueryDependence(DependantVSO, DependantName);
+ Q->removeQueryDependence(DependantJD, DependantName);
}
- // If this dependant node was fully finalized we can erase its
- // MaterializingInfo and update its materializing state.
- assert(DependantVSO.Symbols.count(DependantName) &&
+ // Since this dependant is now ready, we erase its MaterializingInfo
+ // and update its materializing state.
+ assert(DependantJD.Symbols.count(DependantName) &&
"Dependant has no entry in the Symbols table");
- auto &DependantSym = DependantVSO.Symbols[DependantName];
- DependantSym.setFlags(static_cast<JITSymbolFlags::FlagNames>(
- DependantSym.getFlags() & ~JITSymbolFlags::Materializing));
- DependantVSO.MaterializingInfos.erase(DependantMII);
+ auto &DependantSym = DependantJD.Symbols[DependantName];
+ DependantSym.setFlags(DependantSym.getFlags() &
+ ~JITSymbolFlags::Materializing);
+ DependantJD.MaterializingInfos.erase(DependantMII);
}
}
}
MI.Dependants.clear();
- MI.IsFinalized = true;
+ MI.IsEmitted = true;
- if (MI.UnfinalizedDependencies.empty()) {
+ if (MI.UnemittedDependencies.empty()) {
for (auto &Q : MI.PendingQueries) {
Q->notifySymbolReady();
if (Q->isFullyReady())
@@ -1149,8 +993,7 @@ void VSO::finalize(const SymbolFlagsMap &Finalized) {
assert(Symbols.count(Name) &&
"Symbol has no entry in the Symbols table");
auto &Sym = Symbols[Name];
- Sym.setFlags(static_cast<JITSymbolFlags::FlagNames>(
- Sym.getFlags() & ~JITSymbolFlags::Materializing));
+ Sym.setFlags(Sym.getFlags() & ~JITSymbolFlags::Materializing);
MaterializingInfos.erase(MII);
}
}
@@ -1164,7 +1007,7 @@ void VSO::finalize(const SymbolFlagsMap &Finalized) {
}
}
-void VSO::notifyFailed(const SymbolNameSet &FailedSymbols) {
+void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
// FIXME: This should fail any transitively dependant symbols too.
@@ -1173,7 +1016,7 @@ void VSO::notifyFailed(const SymbolNameSet &FailedSymbols) {
for (auto &Name : FailedSymbols) {
auto I = Symbols.find(Name);
- assert(I != Symbols.end() && "Symbol not present in this VSO");
+ assert(I != Symbols.end() && "Symbol not present in this JITDylib");
Symbols.erase(I);
auto MII = MaterializingInfos.find(Name);
@@ -1206,42 +1049,108 @@ void VSO::notifyFailed(const SymbolNameSet &FailedSymbols) {
Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbols));
}
-void VSO::setSearchOrder(VSOList NewSearchOrder, bool SearchThisVSOFirst) {
- if (SearchThisVSOFirst && NewSearchOrder.front() != this)
- NewSearchOrder.insert(NewSearchOrder.begin(), this);
+void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder,
+ bool SearchThisJITDylibFirst,
+ bool MatchNonExportedInThisDylib) {
+ if (SearchThisJITDylibFirst && NewSearchOrder.front().first != this)
+ NewSearchOrder.insert(NewSearchOrder.begin(),
+ {this, MatchNonExportedInThisDylib});
ES.runSessionLocked([&]() { SearchOrder = std::move(NewSearchOrder); });
}
-void VSO::addToSearchOrder(VSO &V) {
- ES.runSessionLocked([&]() { SearchOrder.push_back(&V); });
+void JITDylib::addToSearchOrder(JITDylib &JD, bool MatchNonExported) {
+ ES.runSessionLocked([&]() {
+ SearchOrder.push_back({&JD, MatchNonExported});
+ });
}
-void VSO::replaceInSearchOrder(VSO &OldV, VSO &NewV) {
+void JITDylib::replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD,
+ bool MatchNonExported) {
ES.runSessionLocked([&]() {
- auto I = std::find(SearchOrder.begin(), SearchOrder.end(), &OldV);
+ auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(),
+ [&](const JITDylibSearchList::value_type &KV) {
+ return KV.first == &OldJD;
+ });
if (I != SearchOrder.end())
- *I = &NewV;
+ *I = {&NewJD, MatchNonExported};
});
}
-void VSO::removeFromSearchOrder(VSO &V) {
+void JITDylib::removeFromSearchOrder(JITDylib &JD) {
ES.runSessionLocked([&]() {
- auto I = std::find(SearchOrder.begin(), SearchOrder.end(), &V);
+ auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(),
+ [&](const JITDylibSearchList::value_type &KV) {
+ return KV.first == &JD;
+ });
if (I != SearchOrder.end())
SearchOrder.erase(I);
});
}
-SymbolFlagsMap VSO::lookupFlags(const SymbolNameSet &Names) {
+Error JITDylib::remove(const SymbolNameSet &Names) {
+ return ES.runSessionLocked([&]() -> Error {
+ using SymbolMaterializerItrPair =
+ std::pair<SymbolMap::iterator, UnmaterializedInfosMap::iterator>;
+ std::vector<SymbolMaterializerItrPair> SymbolsToRemove;
+ SymbolNameSet Missing;
+ SymbolNameSet Materializing;
+
+ for (auto &Name : Names) {
+ auto I = Symbols.find(Name);
+
+ // Note symbol missing.
+ if (I == Symbols.end()) {
+ Missing.insert(Name);
+ continue;
+ }
+
+ // Note symbol materializing.
+ if (I->second.getFlags().isMaterializing()) {
+ Materializing.insert(Name);
+ continue;
+ }
+
+ auto UMII = I->second.getFlags().isLazy() ? UnmaterializedInfos.find(Name)
+ : UnmaterializedInfos.end();
+ SymbolsToRemove.push_back(std::make_pair(I, UMII));
+ }
+
+ // If any of the symbols are not defined, return an error.
+ if (!Missing.empty())
+ return make_error<SymbolsNotFound>(std::move(Missing));
+
+ // If any of the symbols are currently materializing, return an error.
+ if (!Materializing.empty())
+ return make_error<SymbolsCouldNotBeRemoved>(std::move(Materializing));
+
+ // Remove the symbols.
+ for (auto &SymbolMaterializerItrPair : SymbolsToRemove) {
+ auto UMII = SymbolMaterializerItrPair.second;
+
+ // If there is a materializer attached, call discard.
+ if (UMII != UnmaterializedInfos.end()) {
+ UMII->second->MU->doDiscard(*this, UMII->first);
+ UnmaterializedInfos.erase(UMII);
+ }
+
+ auto SymI = SymbolMaterializerItrPair.first;
+ Symbols.erase(SymI);
+ }
+
+ return Error::success();
+ });
+}
+
+SymbolFlagsMap JITDylib::lookupFlags(const SymbolNameSet &Names) {
return ES.runSessionLocked([&, this]() {
SymbolFlagsMap Result;
auto Unresolved = lookupFlagsImpl(Result, Names);
- if (FallbackDefinitionGenerator && !Unresolved.empty()) {
- auto FallbackDefs = FallbackDefinitionGenerator(*this, Unresolved);
- if (!FallbackDefs.empty()) {
- auto Unresolved2 = lookupFlagsImpl(Result, FallbackDefs);
+ if (DefGenerator && !Unresolved.empty()) {
+ auto NewDefs = DefGenerator(*this, Unresolved);
+ if (!NewDefs.empty()) {
+ auto Unresolved2 = lookupFlagsImpl(Result, NewDefs);
(void)Unresolved2;
assert(Unresolved2.empty() &&
"All fallback defs should have been found by lookupFlagsImpl");
@@ -1251,8 +1160,8 @@ SymbolFlagsMap VSO::lookupFlags(const SymbolNameSet &Names) {
});
}
-SymbolNameSet VSO::lookupFlagsImpl(SymbolFlagsMap &Flags,
- const SymbolNameSet &Names) {
+SymbolNameSet JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags,
+ const SymbolNameSet &Names) {
SymbolNameSet Unresolved;
for (auto &Name : Names) {
@@ -1270,38 +1179,43 @@ SymbolNameSet VSO::lookupFlagsImpl(SymbolFlagsMap &Flags,
return Unresolved;
}
-void VSO::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, MaterializationUnitList &MUs) {
+void JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ SymbolNameSet &Unresolved, bool MatchNonExported,
+ MaterializationUnitList &MUs) {
assert(Q && "Query can not be null");
- lodgeQueryImpl(Q, Unresolved, MUs);
- if (FallbackDefinitionGenerator && !Unresolved.empty()) {
- auto FallbackDefs = FallbackDefinitionGenerator(*this, Unresolved);
- if (!FallbackDefs.empty()) {
- for (auto &D : FallbackDefs)
+ lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs);
+ if (DefGenerator && !Unresolved.empty()) {
+ auto NewDefs = DefGenerator(*this, Unresolved);
+ if (!NewDefs.empty()) {
+ for (auto &D : NewDefs)
Unresolved.erase(D);
- lodgeQueryImpl(Q, FallbackDefs, MUs);
- assert(FallbackDefs.empty() &&
+ lodgeQueryImpl(Q, NewDefs, MatchNonExported, MUs);
+ assert(NewDefs.empty() &&
"All fallback defs should have been found by lookupImpl");
}
}
}
-void VSO::lodgeQueryImpl(
+void JITDylib::lodgeQueryImpl(
std::shared_ptr<AsynchronousSymbolQuery> &Q, SymbolNameSet &Unresolved,
+ bool MatchNonExported,
std::vector<std::unique_ptr<MaterializationUnit>> &MUs) {
- for (auto I = Unresolved.begin(), E = Unresolved.end(); I != E;) {
- auto TmpI = I++;
- auto Name = *TmpI;
+ std::vector<SymbolStringPtr> ToRemove;
+ for (auto Name : Unresolved) {
// Search for the name in Symbols. Skip it if not found.
auto SymI = Symbols.find(Name);
if (SymI == Symbols.end())
continue;
- // If we found Name in V, remove it frome the Unresolved set and add it
- // to the added set.
- Unresolved.erase(TmpI);
+ // If this is a non exported symbol and we're skipping those then skip it.
+ if (!SymI->second.getFlags().isExported() && !MatchNonExported)
+ continue;
+
+ // If we matched against Name in JD, mark it to be removed from the Unresolved
+ // set.
+ ToRemove.push_back(Name);
// If the symbol has an address then resolve it.
if (SymI->second.getAddress() != 0)
@@ -1333,8 +1247,8 @@ void VSO::lodgeQueryImpl(
// Add MU to the list of MaterializationUnits to be materialized.
MUs.push_back(std::move(MU));
} else if (!SymI->second.getFlags().isMaterializing()) {
- // The symbol is neither lazy nor materializing. Finalize it and
- // continue.
+ // The symbol is neither lazy nor materializing, so it must be
+ // ready. Notify the query and continue.
Q->notifySymbolReady();
continue;
}
@@ -1346,10 +1260,14 @@ void VSO::lodgeQueryImpl(
MI.PendingQueries.push_back(Q);
Q->addQueryDependence(*this, Name);
}
+
+ // Remove any symbols that we found.
+ for (auto &Name : ToRemove)
+ Unresolved.erase(Name);
}
-SymbolNameSet VSO::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
- SymbolNameSet Names) {
+SymbolNameSet JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
+ SymbolNameSet Names) {
assert(Q && "Query can not be null");
ES.runOutstandingMUs();
@@ -1360,15 +1278,15 @@ SymbolNameSet VSO::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
SymbolNameSet Unresolved = std::move(Names);
ES.runSessionLocked([&, this]() {
ActionFlags = lookupImpl(Q, MUs, Unresolved);
- if (FallbackDefinitionGenerator && !Unresolved.empty()) {
+ if (DefGenerator && !Unresolved.empty()) {
assert(ActionFlags == None &&
"ActionFlags set but unresolved symbols remain?");
- auto FallbackDefs = FallbackDefinitionGenerator(*this, Unresolved);
- if (!FallbackDefs.empty()) {
- for (auto &D : FallbackDefs)
+ auto NewDefs = DefGenerator(*this, Unresolved);
+ if (!NewDefs.empty()) {
+ for (auto &D : NewDefs)
Unresolved.erase(D);
- ActionFlags = lookupImpl(Q, MUs, FallbackDefs);
- assert(FallbackDefs.empty() &&
+ ActionFlags = lookupImpl(Q, MUs, NewDefs);
+ assert(NewDefs.empty() &&
"All fallback defs should have been found by lookupImpl");
}
}
@@ -1400,24 +1318,22 @@ SymbolNameSet VSO::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
return Unresolved;
}
-VSO::LookupImplActionFlags
-VSO::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
- SymbolNameSet &Unresolved) {
+JITDylib::LookupImplActionFlags
+JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
+ SymbolNameSet &Unresolved) {
LookupImplActionFlags ActionFlags = None;
+ std::vector<SymbolStringPtr> ToRemove;
- for (auto I = Unresolved.begin(), E = Unresolved.end(); I != E;) {
- auto TmpI = I++;
- auto Name = *TmpI;
+ for (auto Name : Unresolved) {
// Search for the name in Symbols. Skip it if not found.
auto SymI = Symbols.find(Name);
if (SymI == Symbols.end())
continue;
- // If we found Name in V, remove it frome the Unresolved set and add it
- // to the dependencies set.
- Unresolved.erase(TmpI);
+ // If we found Name, mark it to be removed from the Unresolved set.
+ ToRemove.push_back(Name);
// If the symbol has an address then resolve it.
if (SymI->second.getAddress() != 0) {
@@ -1452,8 +1368,8 @@ VSO::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
// Add MU to the list of MaterializationUnits to be materialized.
MUs.push_back(std::move(MU));
} else if (!SymI->second.getFlags().isMaterializing()) {
- // The symbol is neither lazy nor materializing. Finalize it and
- // continue.
+ // The symbol is neither lazy nor materializing, so it must be ready.
+ // Notify the query and continue.
Q->notifySymbolReady();
if (Q->isFullyReady())
ActionFlags |= NotifyFullyReady;
@@ -1468,19 +1384,30 @@ VSO::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
Q->addQueryDependence(*this, Name);
}
+ // Remove any marked symbols from the Unresolved set.
+ for (auto &Name : ToRemove)
+ Unresolved.erase(Name);
+
return ActionFlags;
}
-void VSO::dump(raw_ostream &OS) {
+void JITDylib::dump(raw_ostream &OS) {
ES.runSessionLocked([&, this]() {
- OS << "VSO \"" << VSOName
- << "\" (ES: " << format("0x%016x", reinterpret_cast<uintptr_t>(&ES))
- << "):\n"
+ OS << "JITDylib \"" << JITDylibName << "\" (ES: "
+ << format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES)) << "):\n"
+ << "Search order: [";
+ for (auto &KV : SearchOrder)
+ OS << " (\"" << KV.first->getName() << "\", "
+ << (KV.second ? "all" : "exported only") << ")";
+ OS << " ]\n"
<< "Symbol table:\n";
for (auto &KV : Symbols) {
- OS << " \"" << *KV.first
- << "\": " << format("0x%016x", KV.second.getAddress());
+ OS << " \"" << *KV.first << "\": ";
+ if (auto Addr = KV.second.getAddress())
+ OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags();
+ else
+ OS << "<not resolved>";
if (KV.second.getFlags().isLazy() ||
KV.second.getFlags().isMaterializing()) {
OS << " (";
@@ -1492,7 +1419,7 @@ void VSO::dump(raw_ostream &OS) {
}
if (KV.second.getFlags().isMaterializing())
OS << " Materializing";
- OS << " )\n";
+ OS << ", " << KV.second.getFlags() << " )\n";
} else
OS << "\n";
}
@@ -1501,7 +1428,7 @@ void VSO::dump(raw_ostream &OS) {
OS << " MaterializingInfos entries:\n";
for (auto &KV : MaterializingInfos) {
OS << " \"" << *KV.first << "\":\n"
- << " IsFinalized = " << (KV.second.IsFinalized ? "true" : "false")
+ << " IsEmitted = " << (KV.second.IsEmitted ? "true" : "false")
<< "\n"
<< " " << KV.second.PendingQueries.size()
<< " pending queries: { ";
@@ -1510,19 +1437,19 @@ void VSO::dump(raw_ostream &OS) {
OS << "}\n Dependants:\n";
for (auto &KV2 : KV.second.Dependants)
OS << " " << KV2.first->getName() << ": " << KV2.second << "\n";
- OS << " Unfinalized Dependencies:\n";
- for (auto &KV2 : KV.second.UnfinalizedDependencies)
+ OS << " Unemitted Dependencies:\n";
+ for (auto &KV2 : KV.second.UnemittedDependencies)
OS << " " << KV2.first->getName() << ": " << KV2.second << "\n";
}
});
}
-VSO::VSO(ExecutionSessionBase &ES, std::string Name)
- : ES(ES), VSOName(std::move(Name)) {
- SearchOrder.push_back(this);
+JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
+ : ES(ES), JITDylibName(std::move(Name)) {
+ SearchOrder.push_back({this, true});
}
-Error VSO::defineImpl(MaterializationUnit &MU) {
+Error JITDylib::defineImpl(MaterializationUnit &MU) {
SymbolNameSet Duplicates;
SymbolNameSet MUDefsOverridden;
@@ -1599,8 +1526,8 @@ Error VSO::defineImpl(MaterializationUnit &MU) {
return Error::success();
}
-void VSO::detachQueryHelper(AsynchronousSymbolQuery &Q,
- const SymbolNameSet &QuerySymbols) {
+void JITDylib::detachQueryHelper(AsynchronousSymbolQuery &Q,
+ const SymbolNameSet &QuerySymbols) {
for (auto &QuerySymbol : QuerySymbols) {
assert(MaterializingInfos.count(QuerySymbol) &&
"QuerySymbol does not have MaterializingInfo");
@@ -1619,53 +1546,395 @@ void VSO::detachQueryHelper(AsynchronousSymbolQuery &Q,
}
}
-void VSO::transferFinalizedNodeDependencies(
+void JITDylib::transferEmittedNodeDependencies(
MaterializingInfo &DependantMI, const SymbolStringPtr &DependantName,
- MaterializingInfo &FinalizedMI) {
- for (auto &KV : FinalizedMI.UnfinalizedDependencies) {
- auto &DependencyVSO = *KV.first;
- SymbolNameSet *UnfinalizedDependenciesOnDependencyVSO = nullptr;
+ MaterializingInfo &EmittedMI) {
+ for (auto &KV : EmittedMI.UnemittedDependencies) {
+ auto &DependencyJD = *KV.first;
+ SymbolNameSet *UnemittedDependenciesOnDependencyJD = nullptr;
for (auto &DependencyName : KV.second) {
- auto &DependencyMI = DependencyVSO.MaterializingInfos[DependencyName];
+ auto &DependencyMI = DependencyJD.MaterializingInfos[DependencyName];
// Do not add self dependencies.
if (&DependencyMI == &DependantMI)
continue;
- // If we haven't looked up the dependencies for DependencyVSO yet, do it
+ // If we haven't looked up the dependencies for DependencyJD yet, do it
// now and cache the result.
- if (!UnfinalizedDependenciesOnDependencyVSO)
- UnfinalizedDependenciesOnDependencyVSO =
- &DependantMI.UnfinalizedDependencies[&DependencyVSO];
+ if (!UnemittedDependenciesOnDependencyJD)
+ UnemittedDependenciesOnDependencyJD =
+ &DependantMI.UnemittedDependencies[&DependencyJD];
DependencyMI.Dependants[this].insert(DependantName);
- UnfinalizedDependenciesOnDependencyVSO->insert(DependencyName);
+ UnemittedDependenciesOnDependencyJD->insert(DependencyName);
}
}
}
-VSO &ExecutionSession::createVSO(std::string Name) {
- return runSessionLocked([&, this]() -> VSO & {
- VSOs.push_back(std::unique_ptr<VSO>(new VSO(*this, std::move(Name))));
- return *VSOs.back();
+ExecutionSession::ExecutionSession(std::shared_ptr<SymbolStringPool> SSP)
+ : SSP(SSP ? std::move(SSP) : std::make_shared<SymbolStringPool>()) {
+ // Construct the main dylib.
+ JDs.push_back(std::unique_ptr<JITDylib>(new JITDylib(*this, "<main>")));
+}
+
+JITDylib &ExecutionSession::getMainJITDylib() {
+ return runSessionLocked([this]() -> JITDylib & { return *JDs.front(); });
+}
+
+JITDylib &ExecutionSession::createJITDylib(std::string Name,
+ bool AddToMainDylibSearchOrder) {
+ return runSessionLocked([&, this]() -> JITDylib & {
+ JDs.push_back(
+ std::unique_ptr<JITDylib>(new JITDylib(*this, std::move(Name))));
+ if (AddToMainDylibSearchOrder)
+ JDs.front()->addToSearchOrder(*JDs.back());
+ return *JDs.back();
});
}
-Expected<SymbolMap> lookup(const VSOList &VSOs, SymbolNameSet Names) {
+void ExecutionSession::legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err) {
+ assert(!!Err && "Error should be in failure state");
- if (VSOs.empty())
- return SymbolMap();
+ bool SendErrorToQuery;
+ runSessionLocked([&]() {
+ Q.detach();
+ SendErrorToQuery = Q.canStillFail();
+ });
- auto &ES = (*VSOs.begin())->getExecutionSession();
+ if (SendErrorToQuery)
+ Q.handleFailed(std::move(Err));
+ else
+ reportError(std::move(Err));
+}
- return ES.lookup(VSOs, Names, NoDependenciesToRegister, true);
+Expected<SymbolMap> ExecutionSession::legacyLookup(
+ LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
+ bool WaitUntilReady, RegisterDependenciesFunction RegisterDependencies) {
+#if LLVM_ENABLE_THREADS
+ // In the threaded case we use promises to return the results.
+ std::promise<SymbolMap> PromisedResult;
+ std::mutex ErrMutex;
+ Error ResolutionError = Error::success();
+ std::promise<void> PromisedReady;
+ Error ReadyError = Error::success();
+ auto OnResolve = [&](Expected<SymbolMap> R) {
+ if (R)
+ PromisedResult.set_value(std::move(*R));
+ else {
+ {
+ ErrorAsOutParameter _(&ResolutionError);
+ std::lock_guard<std::mutex> Lock(ErrMutex);
+ ResolutionError = R.takeError();
+ }
+ PromisedResult.set_value(SymbolMap());
+ }
+ };
+
+ std::function<void(Error)> OnReady;
+ if (WaitUntilReady) {
+ OnReady = [&](Error Err) {
+ if (Err) {
+ ErrorAsOutParameter _(&ReadyError);
+ std::lock_guard<std::mutex> Lock(ErrMutex);
+ ReadyError = std::move(Err);
+ }
+ PromisedReady.set_value();
+ };
+ } else {
+ OnReady = [&](Error Err) {
+ if (Err)
+ reportError(std::move(Err));
+ };
+ }
+
+#else
+ SymbolMap Result;
+ Error ResolutionError = Error::success();
+ Error ReadyError = Error::success();
+
+ auto OnResolve = [&](Expected<SymbolMap> R) {
+ ErrorAsOutParameter _(&ResolutionError);
+ if (R)
+ Result = std::move(*R);
+ else
+ ResolutionError = R.takeError();
+ };
+
+ std::function<void(Error)> OnReady;
+ if (WaitUntilReady) {
+ OnReady = [&](Error Err) {
+ ErrorAsOutParameter _(&ReadyError);
+ if (Err)
+ ReadyError = std::move(Err);
+ };
+ } else {
+ OnReady = [&](Error Err) {
+ if (Err)
+ reportError(std::move(Err));
+ };
+ }
+#endif
+
+ auto Query = std::make_shared<AsynchronousSymbolQuery>(
+ Names, std::move(OnResolve), std::move(OnReady));
+ // FIXME: This should be run session locked along with the registration code
+ // and error reporting below.
+ SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names));
+
+ // If the query was lodged successfully then register the dependencies,
+ // otherwise fail it with an error.
+ if (UnresolvedSymbols.empty())
+ RegisterDependencies(Query->QueryRegistrations);
+ else {
+ bool DeliverError = runSessionLocked([&]() {
+ Query->detach();
+ return Query->canStillFail();
+ });
+ auto Err = make_error<SymbolsNotFound>(std::move(UnresolvedSymbols));
+ if (DeliverError)
+ Query->handleFailed(std::move(Err));
+ else
+ reportError(std::move(Err));
+ }
+
+#if LLVM_ENABLE_THREADS
+ auto ResultFuture = PromisedResult.get_future();
+ auto Result = ResultFuture.get();
+
+ {
+ std::lock_guard<std::mutex> Lock(ErrMutex);
+ if (ResolutionError) {
+ // ReadyError will never be assigned. Consume the success value.
+ cantFail(std::move(ReadyError));
+ return std::move(ResolutionError);
+ }
+ }
+
+ if (WaitUntilReady) {
+ auto ReadyFuture = PromisedReady.get_future();
+ ReadyFuture.get();
+
+ {
+ std::lock_guard<std::mutex> Lock(ErrMutex);
+ if (ReadyError)
+ return std::move(ReadyError);
+ }
+ } else
+ cantFail(std::move(ReadyError));
+
+ return std::move(Result);
+
+#else
+ if (ResolutionError) {
+ // ReadyError will never be assigned. Consume the success value.
+ cantFail(std::move(ReadyError));
+ return std::move(ResolutionError);
+ }
+
+ if (ReadyError)
+ return std::move(ReadyError);
+
+ return Result;
+#endif
}
-/// Look up a symbol by searching a list of VSOs.
-Expected<JITEvaluatedSymbol> lookup(const VSOList &VSOs, SymbolStringPtr Name) {
+void ExecutionSession::lookup(
+ const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
+ SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
+ RegisterDependenciesFunction RegisterDependencies) {
+
+ // lookup can be re-entered recursively if running on a single thread. Run any
+ // outstanding MUs in case this query depends on them, otherwise this lookup
+ // will starve waiting for a result from an MU that is stuck in the queue.
+ runOutstandingMUs();
+
+ auto Unresolved = std::move(Symbols);
+ std::map<JITDylib *, MaterializationUnitList> CollectedMUsMap;
+ auto Q = std::make_shared<AsynchronousSymbolQuery>(
+ Unresolved, std::move(OnResolve), std::move(OnReady));
+ bool QueryIsFullyResolved = false;
+ bool QueryIsFullyReady = false;
+ bool QueryFailed = false;
+
+ runSessionLocked([&]() {
+ for (auto &KV : SearchOrder) {
+ assert(KV.first && "JITDylibList entries must not be null");
+ assert(!CollectedMUsMap.count(KV.first) &&
+ "JITDylibList should not contain duplicate entries");
+
+ auto &JD = *KV.first;
+ auto MatchNonExported = KV.second;
+ JD.lodgeQuery(Q, Unresolved, MatchNonExported, CollectedMUsMap[&JD]);
+ }
+
+ if (Unresolved.empty()) {
+ // Query lodged successfully.
+
+ // Record whether this query is fully ready / resolved. We will use
+ // this to call handleFullyResolved/handleFullyReady outside the session
+ // lock.
+ QueryIsFullyResolved = Q->isFullyResolved();
+ QueryIsFullyReady = Q->isFullyReady();
+
+ // Call the register dependencies function.
+ if (RegisterDependencies && !Q->QueryRegistrations.empty())
+ RegisterDependencies(Q->QueryRegistrations);
+ } else {
+ // Query failed due to unresolved symbols.
+ QueryFailed = true;
+
+ // Disconnect the query from its dependencies.
+ Q->detach();
+
+ // Replace the MUs.
+ for (auto &KV : CollectedMUsMap)
+ for (auto &MU : KV.second)
+ KV.first->replace(std::move(MU));
+ }
+ });
+
+ if (QueryFailed) {
+ Q->handleFailed(make_error<SymbolsNotFound>(std::move(Unresolved)));
+ return;
+ } else {
+ if (QueryIsFullyResolved)
+ Q->handleFullyResolved();
+ if (QueryIsFullyReady)
+ Q->handleFullyReady();
+ }
+
+ // Move the MUs to the OutstandingMUs list, then materialize.
+ {
+ std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
+
+ for (auto &KV : CollectedMUsMap)
+ for (auto &MU : KV.second)
+ OutstandingMUs.push_back(std::make_pair(KV.first, std::move(MU)));
+ }
+
+ runOutstandingMUs();
+}
+
+Expected<SymbolMap> ExecutionSession::lookup(
+ const JITDylibSearchList &SearchOrder, const SymbolNameSet &Symbols,
+ RegisterDependenciesFunction RegisterDependencies, bool WaitUntilReady) {
+#if LLVM_ENABLE_THREADS
+ // In the threaded case we use promises to return the results.
+ std::promise<SymbolMap> PromisedResult;
+ std::mutex ErrMutex;
+ Error ResolutionError = Error::success();
+ std::promise<void> PromisedReady;
+ Error ReadyError = Error::success();
+ auto OnResolve = [&](Expected<SymbolMap> R) {
+ if (R)
+ PromisedResult.set_value(std::move(*R));
+ else {
+ {
+ ErrorAsOutParameter _(&ResolutionError);
+ std::lock_guard<std::mutex> Lock(ErrMutex);
+ ResolutionError = R.takeError();
+ }
+ PromisedResult.set_value(SymbolMap());
+ }
+ };
+
+ std::function<void(Error)> OnReady;
+ if (WaitUntilReady) {
+ OnReady = [&](Error Err) {
+ if (Err) {
+ ErrorAsOutParameter _(&ReadyError);
+ std::lock_guard<std::mutex> Lock(ErrMutex);
+ ReadyError = std::move(Err);
+ }
+ PromisedReady.set_value();
+ };
+ } else {
+ OnReady = [&](Error Err) {
+ if (Err)
+ reportError(std::move(Err));
+ };
+ }
+
+#else
+ SymbolMap Result;
+ Error ResolutionError = Error::success();
+ Error ReadyError = Error::success();
+
+ auto OnResolve = [&](Expected<SymbolMap> R) {
+ ErrorAsOutParameter _(&ResolutionError);
+ if (R)
+ Result = std::move(*R);
+ else
+ ResolutionError = R.takeError();
+ };
+
+ std::function<void(Error)> OnReady;
+ if (WaitUntilReady) {
+ OnReady = [&](Error Err) {
+ ErrorAsOutParameter _(&ReadyError);
+ if (Err)
+ ReadyError = std::move(Err);
+ };
+ } else {
+ OnReady = [&](Error Err) {
+ if (Err)
+ reportError(std::move(Err));
+ };
+ }
+#endif
+
+ // Perform the asynchronous lookup.
+ lookup(SearchOrder, Symbols, OnResolve, OnReady, RegisterDependencies);
+
+#if LLVM_ENABLE_THREADS
+ auto ResultFuture = PromisedResult.get_future();
+ auto Result = ResultFuture.get();
+
+ {
+ std::lock_guard<std::mutex> Lock(ErrMutex);
+ if (ResolutionError) {
+ // ReadyError will never be assigned. Consume the success value.
+ cantFail(std::move(ReadyError));
+ return std::move(ResolutionError);
+ }
+ }
+
+ if (WaitUntilReady) {
+ auto ReadyFuture = PromisedReady.get_future();
+ ReadyFuture.get();
+
+ {
+ std::lock_guard<std::mutex> Lock(ErrMutex);
+ if (ReadyError)
+ return std::move(ReadyError);
+ }
+ } else
+ cantFail(std::move(ReadyError));
+
+ return std::move(Result);
+
+#else
+ if (ResolutionError) {
+ // ReadyError will never be assigned. Consume the success value.
+ cantFail(std::move(ReadyError));
+ return std::move(ResolutionError);
+ }
+
+ if (ReadyError)
+ return std::move(ReadyError);
+
+ return Result;
+#endif
+}
+
+Expected<JITEvaluatedSymbol>
+ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
+ SymbolStringPtr Name) {
SymbolNameSet Names({Name});
- if (auto ResultMap = lookup(VSOs, std::move(Names))) {
+
+ if (auto ResultMap = lookup(SearchOrder, std::move(Names),
+ NoDependenciesToRegister, true)) {
assert(ResultMap->size() == 1 && "Unexpected number of results");
assert(ResultMap->count(Name) && "Missing result for symbol");
return std::move(ResultMap->begin()->second);
@@ -1673,8 +1942,53 @@ Expected<JITEvaluatedSymbol> lookup(const VSOList &VSOs, SymbolStringPtr Name) {
return ResultMap.takeError();
}
-MangleAndInterner::MangleAndInterner(ExecutionSessionBase &ES,
- const DataLayout &DL)
+Expected<JITEvaluatedSymbol>
+ExecutionSession::lookup(ArrayRef<JITDylib *> SearchOrder,
+ SymbolStringPtr Name) {
+ SymbolNameSet Names({Name});
+
+ JITDylibSearchList FullSearchOrder;
+ FullSearchOrder.reserve(SearchOrder.size());
+ for (auto *JD : SearchOrder)
+ FullSearchOrder.push_back({JD, false});
+
+ return lookup(FullSearchOrder, Name);
+}
+
+Expected<JITEvaluatedSymbol>
+ExecutionSession::lookup(ArrayRef<JITDylib *> SearchOrder, StringRef Name) {
+ return lookup(SearchOrder, intern(Name));
+}
+
+void ExecutionSession::dump(raw_ostream &OS) {
+ runSessionLocked([this, &OS]() {
+ for (auto &JD : JDs)
+ JD->dump(OS);
+ });
+}
+
+void ExecutionSession::runOutstandingMUs() {
+ while (1) {
+ std::pair<JITDylib *, std::unique_ptr<MaterializationUnit>> JITDylibAndMU;
+
+ {
+ std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
+ if (!OutstandingMUs.empty()) {
+ JITDylibAndMU = std::move(OutstandingMUs.back());
+ OutstandingMUs.pop_back();
+ }
+ }
+
+ if (JITDylibAndMU.first) {
+ assert(JITDylibAndMU.second && "JITDylib, but no MU?");
+ dispatchMaterialization(*JITDylibAndMU.first,
+ std::move(JITDylibAndMU.second));
+ } else
+ break;
+ }
+}
+
+MangleAndInterner::MangleAndInterner(ExecutionSession &ES, const DataLayout &DL)
: ES(ES), DL(DL) {}
SymbolStringPtr MangleAndInterner::operator()(StringRef Name) {
@@ -1683,7 +1997,7 @@ SymbolStringPtr MangleAndInterner::operator()(StringRef Name) {
raw_string_ostream MangledNameStream(MangledName);
Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
}
- return ES.getSymbolStringPool().intern(MangledName);
+ return ES.intern(MangledName);
}
} // End namespace orc.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 6157677ce355..7c3c50b4d6e5 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -19,45 +19,6 @@
namespace llvm {
namespace orc {
-JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT)
- : TT(std::move(TT)) {}
-
-Expected<JITTargetMachineBuilder> JITTargetMachineBuilder::detectHost() {
- return JITTargetMachineBuilder(Triple(sys::getProcessTriple()));
-}
-
-Expected<std::unique_ptr<TargetMachine>>
-JITTargetMachineBuilder::createTargetMachine() {
- if (!Arch.empty()) {
- Triple::ArchType Type = Triple::getArchTypeForLLVMName(Arch);
-
- if (Type == Triple::UnknownArch)
- return make_error<StringError>(std::string("Unknown arch: ") + Arch,
- inconvertibleErrorCode());
- }
-
- std::string ErrMsg;
- auto *TheTarget = TargetRegistry::lookupTarget(TT.getTriple(), ErrMsg);
- if (!TheTarget)
- return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
-
- auto *TM =
- TheTarget->createTargetMachine(TT.getTriple(), CPU, Features.getString(),
- Options, RM, CM, OptLevel, /*JIT*/ true);
- if (!TM)
- return make_error<StringError>("Could not allocate target machine",
- inconvertibleErrorCode());
-
- return std::unique_ptr<TargetMachine>(TM);
-}
-
-JITTargetMachineBuilder &JITTargetMachineBuilder::addFeatures(
- const std::vector<std::string> &FeatureVec) {
- for (const auto &F : FeatureVec)
- Features.AddFeature(F);
- return *this;
-}
-
CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End)
: InitList(
GV ? dyn_cast_or_null<ConstantArray>(GV->getInitializer()) : nullptr),
@@ -126,18 +87,24 @@ iterator_range<CtorDtorIterator> getDestructors(const Module &M) {
CtorDtorIterator(DtorsList, true));
}
-void CtorDtorRunner2::add(iterator_range<CtorDtorIterator> CtorDtors) {
- if (CtorDtors.begin() == CtorDtors.end())
+void CtorDtorRunner::add(iterator_range<CtorDtorIterator> CtorDtors) {
+ if (empty(CtorDtors))
return;
MangleAndInterner Mangle(
- V.getExecutionSession(),
+ JD.getExecutionSession(),
(*CtorDtors.begin()).Func->getParent()->getDataLayout());
for (const auto &CtorDtor : CtorDtors) {
assert(CtorDtor.Func && CtorDtor.Func->hasName() &&
"Ctor/Dtor function must be named to be runnable under the JIT");
+ // FIXME: Maybe use a symbol promoter here instead.
+ if (CtorDtor.Func->hasLocalLinkage()) {
+ CtorDtor.Func->setLinkage(GlobalValue::ExternalLinkage);
+ CtorDtor.Func->setVisibility(GlobalValue::HiddenVisibility);
+ }
+
if (CtorDtor.Data && cast<GlobalValue>(CtorDtor.Data)->isDeclaration()) {
dbgs() << " Skipping because why now?\n";
continue;
@@ -148,7 +115,7 @@ void CtorDtorRunner2::add(iterator_range<CtorDtorIterator> CtorDtors) {
}
}
-Error CtorDtorRunner2::run() {
+Error CtorDtorRunner::run() {
using CtorDtorTy = void (*)();
SymbolNameSet Names;
@@ -161,7 +128,10 @@ Error CtorDtorRunner2::run() {
}
}
- if (auto CtorDtorMap = lookup({&V}, std::move(Names))) {
+ auto &ES = JD.getExecutionSession();
+ if (auto CtorDtorMap =
+ ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names),
+ NoDependenciesToRegister, true)) {
for (auto &KV : CtorDtorsByPriority) {
for (auto &Name : KV.second) {
assert(CtorDtorMap->count(Name) && "No entry for Name");
@@ -195,32 +165,46 @@ int LocalCXXRuntimeOverridesBase::CXAAtExitOverride(DestructorPtr Destructor,
return 0;
}
-Error LocalCXXRuntimeOverrides2::enable(VSO &V, MangleAndInterner &Mangle) {
- SymbolMap RuntimeInterposes(
- {{Mangle("__dso_handle"),
- JITEvaluatedSymbol(toTargetAddress(&DSOHandleOverride),
- JITSymbolFlags::Exported)},
- {Mangle("__cxa_atexit"),
- JITEvaluatedSymbol(toTargetAddress(&CXAAtExitOverride),
- JITSymbolFlags::Exported)}});
+Error LocalCXXRuntimeOverrides::enable(JITDylib &JD,
+ MangleAndInterner &Mangle) {
+ SymbolMap RuntimeInterposes;
+ RuntimeInterposes[Mangle("__dso_handle")] =
+ JITEvaluatedSymbol(toTargetAddress(&DSOHandleOverride),
+ JITSymbolFlags::Exported);
+ RuntimeInterposes[Mangle("__cxa_atexit")] =
+ JITEvaluatedSymbol(toTargetAddress(&CXAAtExitOverride),
+ JITSymbolFlags::Exported);
- return V.define(absoluteSymbols(std::move(RuntimeInterposes)));
+ return JD.define(absoluteSymbols(std::move(RuntimeInterposes)));
}
-DynamicLibraryFallbackGenerator::DynamicLibraryFallbackGenerator(
+DynamicLibrarySearchGenerator::DynamicLibrarySearchGenerator(
sys::DynamicLibrary Dylib, const DataLayout &DL, SymbolPredicate Allow)
: Dylib(std::move(Dylib)), Allow(std::move(Allow)),
GlobalPrefix(DL.getGlobalPrefix()) {}
-SymbolNameSet DynamicLibraryFallbackGenerator::
-operator()(VSO &V, const SymbolNameSet &Names) {
+Expected<DynamicLibrarySearchGenerator>
+DynamicLibrarySearchGenerator::Load(const char *FileName, const DataLayout &DL,
+ SymbolPredicate Allow) {
+ std::string ErrMsg;
+ auto Lib = sys::DynamicLibrary::getPermanentLibrary(FileName, &ErrMsg);
+ if (!Lib.isValid())
+ return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+ return DynamicLibrarySearchGenerator(std::move(Lib), DL, std::move(Allow));
+}
+
+SymbolNameSet DynamicLibrarySearchGenerator::
+operator()(JITDylib &JD, const SymbolNameSet &Names) {
orc::SymbolNameSet Added;
orc::SymbolMap NewSymbols;
bool HasGlobalPrefix = (GlobalPrefix != '\0');
for (auto &Name : Names) {
- if (!Allow(Name) || (*Name).empty())
+ if ((*Name).empty())
+ continue;
+
+ if (Allow && !Allow(Name))
continue;
if (HasGlobalPrefix && (*Name).front() != GlobalPrefix)
@@ -235,11 +219,11 @@ operator()(VSO &V, const SymbolNameSet &Names) {
}
}
- // Add any new symbols to V. Since the fallback generator is only called for
- // symbols that are not already defined, this will never trigger a duplicate
+ // Add any new symbols to JD. Since the generator is only called for symbols
+ // that are not already defined, this will never trigger a duplicate
// definition error, so we can wrap this call in a 'cantFail'.
if (!NewSymbols.empty())
- cantFail(V.define(absoluteSymbols(std::move(NewSymbols))));
+ cantFail(JD.define(absoluteSymbols(std::move(NewSymbols))));
return Added;
}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index 0c17f9b7ad49..d952d1be70da 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -12,28 +12,28 @@
namespace llvm {
namespace orc {
-IRCompileLayer2::IRCompileLayer2(ExecutionSession &ES, ObjectLayer &BaseLayer,
+IRCompileLayer::IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
CompileFunction Compile)
: IRLayer(ES), BaseLayer(BaseLayer), Compile(std::move(Compile)) {}
-void IRCompileLayer2::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) {
+void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) {
std::lock_guard<std::mutex> Lock(IRLayerMutex);
this->NotifyCompiled = std::move(NotifyCompiled);
}
-void IRCompileLayer2::emit(MaterializationResponsibility R, VModuleKey K,
- std::unique_ptr<Module> M) {
- assert(M && "Module must not be null");
+void IRCompileLayer::emit(MaterializationResponsibility R,
+ ThreadSafeModule TSM) {
+ assert(TSM.getModule() && "Module must not be null");
- if (auto Obj = Compile(*M)) {
+ if (auto Obj = Compile(*TSM.getModule())) {
{
std::lock_guard<std::mutex> Lock(IRLayerMutex);
if (NotifyCompiled)
- NotifyCompiled(K, std::move(M));
+ NotifyCompiled(R.getVModuleKey(), std::move(TSM));
else
- M = nullptr;
+ TSM = ThreadSafeModule();
}
- BaseLayer.emit(std::move(R), std::move(K), std::move(*Obj));
+ BaseLayer.emit(std::move(R), std::move(*Obj));
} else {
R.failMaterialization();
getExecutionSession().reportError(Obj.takeError());
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
index 4dd3cfdfe387..7bc0d696e3ac 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
@@ -13,20 +13,20 @@
namespace llvm {
namespace orc {
-IRTransformLayer2::IRTransformLayer2(ExecutionSession &ES,
+IRTransformLayer::IRTransformLayer(ExecutionSession &ES,
IRLayer &BaseLayer,
TransformFunction Transform)
: IRLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
-void IRTransformLayer2::emit(MaterializationResponsibility R, VModuleKey K,
- std::unique_ptr<Module> M) {
- assert(M && "Module must not be null");
+void IRTransformLayer::emit(MaterializationResponsibility R,
+ ThreadSafeModule TSM) {
+ assert(TSM.getModule() && "Module must not be null");
- if (auto TransformedMod = Transform(std::move(M)))
- BaseLayer.emit(std::move(R), std::move(K), std::move(*TransformedMod));
+ if (auto TransformedTSM = Transform(std::move(TSM), R))
+ BaseLayer.emit(std::move(R), std::move(*TransformedTSM));
else {
R.failMaterialization();
- getExecutionSession().reportError(TransformedMod.takeError());
+ getExecutionSession().reportError(TransformedTSM.takeError());
}
}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 9ca2c5cb4a55..82000ec5b32b 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -27,19 +27,22 @@ public:
using CompileFunction = JITCompileCallbackManager::CompileFunction;
CompileCallbackMaterializationUnit(SymbolStringPtr Name,
- CompileFunction Compile)
- : MaterializationUnit(SymbolFlagsMap({{Name, JITSymbolFlags::Exported}})),
+ CompileFunction Compile, VModuleKey K)
+ : MaterializationUnit(SymbolFlagsMap({{Name, JITSymbolFlags::Exported}}),
+ std::move(K)),
Name(std::move(Name)), Compile(std::move(Compile)) {}
+ StringRef getName() const override { return "<Compile Callbacks>"; }
+
private:
- void materialize(MaterializationResponsibility R) {
+ void materialize(MaterializationResponsibility R) override {
SymbolMap Result;
Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported);
R.resolve(Result);
- R.finalize();
+ R.emit();
}
- void discard(const VSO &V, SymbolStringPtr Name) {
+ void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
llvm_unreachable("Discard should never occur on a LMU?");
}
@@ -52,20 +55,21 @@ private:
namespace llvm {
namespace orc {
-void JITCompileCallbackManager::anchor() {}
void IndirectStubsManager::anchor() {}
+void TrampolinePool::anchor() {}
Expected<JITTargetAddress>
JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) {
- if (auto TrampolineAddr = getAvailableTrampolineAddr()) {
- auto CallbackName = ES.getSymbolStringPool().intern(
- std::string("cc") + std::to_string(++NextCallbackId));
+ if (auto TrampolineAddr = TP->getTrampoline()) {
+ auto CallbackName =
+ ES.intern(std::string("cc") + std::to_string(++NextCallbackId));
std::lock_guard<std::mutex> Lock(CCMgrMutex);
AddrToSymbol[*TrampolineAddr] = CallbackName;
- cantFail(CallbacksVSO.define(
+ cantFail(CallbacksJD.define(
llvm::make_unique<CompileCallbackMaterializationUnit>(
- std::move(CallbackName), std::move(Compile))));
+ std::move(CallbackName), std::move(Compile),
+ ES.allocateVModule())));
return *TrampolineAddr;
} else
return TrampolineAddr.takeError();
@@ -88,7 +92,7 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
{
raw_string_ostream ErrMsgStream(ErrMsg);
ErrMsgStream << "No compile callback for trampoline at "
- << format("0x%016x", TrampolineAddr);
+ << format("0x%016" PRIx64, TrampolineAddr);
}
ES.reportError(
make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode()));
@@ -97,9 +101,10 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
Name = I->second;
}
- if (auto Sym = lookup({&CallbacksVSO}, Name))
+ if (auto Sym = ES.lookup(JITDylibSearchList({{&CallbacksJD, true}}), Name))
return Sym->getAddress();
else {
+ llvm::dbgs() << "Didn't find callback.\n";
// If anything goes wrong materializing Sym then report it to the session
// and return the ErrorHandlerAddress;
ES.reportError(Sym.takeError());
@@ -107,29 +112,46 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
}
}
-std::unique_ptr<JITCompileCallbackManager>
+Expected<std::unique_ptr<JITCompileCallbackManager>>
createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
JITTargetAddress ErrorHandlerAddress) {
switch (T.getArch()) {
- default: return nullptr;
-
- case Triple::aarch64: {
- typedef orc::LocalJITCompileCallbackManager<orc::OrcAArch64> CCMgrT;
- return llvm::make_unique<CCMgrT>(ES, ErrorHandlerAddress);
+ default:
+ return make_error<StringError>(
+ std::string("No callback manager available for ") + T.str(),
+ inconvertibleErrorCode());
+ case Triple::aarch64: {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcAArch64> CCMgrT;
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
}
case Triple::x86: {
typedef orc::LocalJITCompileCallbackManager<orc::OrcI386> CCMgrT;
- return llvm::make_unique<CCMgrT>(ES, ErrorHandlerAddress);
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
+ }
+
+ case Triple::mips: {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcMips32Be> CCMgrT;
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
+ }
+ case Triple::mipsel: {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcMips32Le> CCMgrT;
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
+ }
+
+ case Triple::mips64:
+ case Triple::mips64el: {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcMips64> CCMgrT;
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
}
case Triple::x86_64: {
if ( T.getOS() == Triple::OSType::Win32 ) {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
- return llvm::make_unique<CCMgrT>(ES, ErrorHandlerAddress);
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
} else {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_SysV> CCMgrT;
- return llvm::make_unique<CCMgrT>(ES, ErrorHandlerAddress);
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
}
}
@@ -157,6 +179,25 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
orc::LocalIndirectStubsManager<orc::OrcI386>>();
};
+ case Triple::mips:
+ return [](){
+ return llvm::make_unique<
+ orc::LocalIndirectStubsManager<orc::OrcMips32Be>>();
+ };
+
+ case Triple::mipsel:
+ return [](){
+ return llvm::make_unique<
+ orc::LocalIndirectStubsManager<orc::OrcMips32Le>>();
+ };
+
+ case Triple::mips64:
+ case Triple::mips64el:
+ return [](){
+ return llvm::make_unique<
+ orc::LocalIndirectStubsManager<orc::OrcMips64>>();
+ };
+
case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32) {
return [](){
@@ -210,57 +251,34 @@ void makeStub(Function &F, Value &ImplPointer) {
Builder.CreateRet(Call);
}
-// Utility class for renaming global values and functions during partitioning.
-class GlobalRenamer {
-public:
-
- static bool needsRenaming(const Value &New) {
- return !New.hasName() || New.getName().startswith("\01L");
- }
-
- const std::string& getRename(const Value &Orig) {
- // See if we have a name for this global.
- {
- auto I = Names.find(&Orig);
- if (I != Names.end())
- return I->second;
+std::vector<GlobalValue *> SymbolLinkagePromoter::operator()(Module &M) {
+ std::vector<GlobalValue *> PromotedGlobals;
+
+ for (auto &GV : M.global_values()) {
+ bool Promoted = true;
+
+ // Rename if necessary.
+ if (!GV.hasName())
+ GV.setName("__orc_anon." + Twine(NextId++));
+ else if (GV.getName().startswith("\01L"))
+ GV.setName("__" + GV.getName().substr(1) + "." + Twine(NextId++));
+ else if (GV.hasLocalLinkage())
+ GV.setName("__orc_lcl." + GV.getName() + "." + Twine(NextId++));
+ else
+ Promoted = false;
+
+ if (GV.hasLocalLinkage()) {
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+ Promoted = true;
}
+ GV.setUnnamedAddr(GlobalValue::UnnamedAddr::None);
- // Nope. Create a new one.
- // FIXME: Use a more robust uniquing scheme. (This may blow up if the user
- // writes a "__orc_anon[[:digit:]]* method).
- unsigned ID = Names.size();
- std::ostringstream NameStream;
- NameStream << "__orc_anon" << ID++;
- auto I = Names.insert(std::make_pair(&Orig, NameStream.str()));
- return I.first->second;
+ if (Promoted)
+ PromotedGlobals.push_back(&GV);
}
-private:
- DenseMap<const Value*, std::string> Names;
-};
-
-static void raiseVisibilityOnValue(GlobalValue &V, GlobalRenamer &R) {
- if (V.hasLocalLinkage()) {
- if (R.needsRenaming(V))
- V.setName(R.getRename(V));
- V.setLinkage(GlobalValue::ExternalLinkage);
- V.setVisibility(GlobalValue::HiddenVisibility);
- }
- V.setUnnamedAddr(GlobalValue::UnnamedAddr::None);
- assert(!R.needsRenaming(V) && "Invalid global name.");
-}
-
-void makeAllSymbolsExternallyAccessible(Module &M) {
- GlobalRenamer Renamer;
-
- for (auto &F : M)
- raiseVisibilityOnValue(F, Renamer);
-
- for (auto &GV : M.globals())
- raiseVisibilityOnValue(GV, Renamer);
- for (auto &A : M.aliases())
- raiseVisibilityOnValue(A, Renamer);
+ return PromotedGlobals;
}
Function* cloneFunctionDecl(Module &Dst, const Function &F,
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
new file mode 100644
index 000000000000..4af09d196ff9
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -0,0 +1,55 @@
+//===----- JITTargetMachineBuilder.cpp - Build TargetMachines for JIT -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+
+#include "llvm/Support/TargetRegistry.h"
+
+namespace llvm {
+namespace orc {
+
+JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT)
+ : TT(std::move(TT)) {
+ Options.EmulatedTLS = true;
+ Options.ExplicitEmulatedTLS = true;
+}
+
+Expected<JITTargetMachineBuilder> JITTargetMachineBuilder::detectHost() {
+ // FIXME: getProcessTriple is bogus. It returns the host LLVM was compiled on,
+ // rather than a valid triple for the current process.
+ return JITTargetMachineBuilder(Triple(sys::getProcessTriple()));
+}
+
+Expected<std::unique_ptr<TargetMachine>>
+JITTargetMachineBuilder::createTargetMachine() {
+
+ std::string ErrMsg;
+ auto *TheTarget = TargetRegistry::lookupTarget(TT.getTriple(), ErrMsg);
+ if (!TheTarget)
+ return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+
+ auto *TM =
+ TheTarget->createTargetMachine(TT.getTriple(), CPU, Features.getString(),
+ Options, RM, CM, OptLevel, /*JIT*/ true);
+ if (!TM)
+ return make_error<StringError>("Could not allocate target machine",
+ inconvertibleErrorCode());
+
+ return std::unique_ptr<TargetMachine>(TM);
+}
+
+JITTargetMachineBuilder &JITTargetMachineBuilder::addFeatures(
+ const std::vector<std::string> &FeatureVec) {
+ for (const auto &F : FeatureVec)
+ Features.AddFeature(F);
+ return *this;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 52ff4efe56b2..e2089f9106bd 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -12,49 +12,109 @@
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/Mangler.h"
+namespace {
+
+ // A SimpleCompiler that owns its TargetMachine.
+ class TMOwningSimpleCompiler : public llvm::orc::SimpleCompiler {
+ public:
+ TMOwningSimpleCompiler(std::unique_ptr<llvm::TargetMachine> TM)
+ : llvm::orc::SimpleCompiler(*TM), TM(std::move(TM)) {}
+ private:
+ // FIXME: shared because std::functions (and thus
+ // IRCompileLayer::CompileFunction) are not moveable.
+ std::shared_ptr<llvm::TargetMachine> TM;
+ };
+
+} // end anonymous namespace
+
namespace llvm {
namespace orc {
+LLJIT::~LLJIT() {
+ if (CompileThreads)
+ CompileThreads->wait();
+}
+
Expected<std::unique_ptr<LLJIT>>
-LLJIT::Create(std::unique_ptr<ExecutionSession> ES,
- std::unique_ptr<TargetMachine> TM, DataLayout DL) {
- return std::unique_ptr<LLJIT>(
- new LLJIT(std::move(ES), std::move(TM), std::move(DL)));
+LLJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+ unsigned NumCompileThreads) {
+
+ if (NumCompileThreads == 0) {
+ // If NumCompileThreads == 0 then create a single-threaded LLJIT instance.
+ auto TM = JTMB.createTargetMachine();
+ if (!TM)
+ return TM.takeError();
+ return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
+ std::move(*TM), std::move(DL)));
+ }
+
+ return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
+ std::move(JTMB), std::move(DL),
+ NumCompileThreads));
}
Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) {
- auto InternedName = ES->getSymbolStringPool().intern(Name);
+ auto InternedName = ES->intern(Name);
SymbolMap Symbols({{InternedName, Sym}});
return Main.define(absoluteSymbols(std::move(Symbols)));
}
-Error LLJIT::addIRModule(VSO &V, std::unique_ptr<Module> M) {
- assert(M && "Can not add null module");
+Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) {
+ assert(TSM && "Can not add null module");
- if (auto Err = applyDataLayout(*M))
+ if (auto Err = applyDataLayout(*TSM.getModule()))
return Err;
- auto K = ES->allocateVModule();
- return CompileLayer.add(V, K, std::move(M));
+ return CompileLayer.add(JD, std::move(TSM), ES->allocateVModule());
+}
+
+Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
+ assert(Obj && "Can not add null object");
+
+ return ObjLinkingLayer.add(JD, std::move(Obj), ES->allocateVModule());
}
-Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(VSO &V,
+Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
StringRef Name) {
- return llvm::orc::lookup({&V}, ES->getSymbolStringPool().intern(Name));
+ return ES->lookup(JITDylibSearchList({{&JD, true}}), ES->intern(Name));
}
LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES,
std::unique_ptr<TargetMachine> TM, DataLayout DL)
- : ES(std::move(ES)), Main(this->ES->createVSO("main")), TM(std::move(TM)),
- DL(std::move(DL)),
- ObjLinkingLayer(*this->ES,
- [this](VModuleKey K) { return getMemoryManager(K); }),
- CompileLayer(*this->ES, ObjLinkingLayer, SimpleCompiler(*this->TM)),
+ : ES(std::move(ES)), Main(this->ES->getMainJITDylib()), DL(std::move(DL)),
+ ObjLinkingLayer(
+ *this->ES,
+ []() { return llvm::make_unique<SectionMemoryManager>(); }),
+ CompileLayer(*this->ES, ObjLinkingLayer,
+ TMOwningSimpleCompiler(std::move(TM))),
CtorRunner(Main), DtorRunner(Main) {}
-std::shared_ptr<RuntimeDyld::MemoryManager>
-LLJIT::getMemoryManager(VModuleKey K) {
- return llvm::make_unique<SectionMemoryManager>();
+LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+ DataLayout DL, unsigned NumCompileThreads)
+ : ES(std::move(ES)), Main(this->ES->getMainJITDylib()), DL(std::move(DL)),
+ ObjLinkingLayer(
+ *this->ES,
+ []() { return llvm::make_unique<SectionMemoryManager>(); }),
+ CompileLayer(*this->ES, ObjLinkingLayer,
+ ConcurrentIRCompiler(std::move(JTMB))),
+ CtorRunner(Main), DtorRunner(Main) {
+ assert(NumCompileThreads != 0 &&
+ "Multithreaded LLJIT instance can not be created with 0 threads");
+
+ // Move modules to new contexts when they're emitted so that we can compile
+ // them in parallel.
+ CompileLayer.setCloneToNewContextOnEmit(true);
+
+ // Create a thread pool to compile on and set the execution session
+ // dispatcher to use the thread pool.
+ CompileThreads = llvm::make_unique<ThreadPool>(NumCompileThreads);
+ this->ES->setDispatchMaterialization(
+ [this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
+ // FIXME: Switch to move capture once we have c++14.
+ auto SharedMU = std::shared_ptr<MaterializationUnit>(std::move(MU));
+ auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); };
+ CompileThreads->async(std::move(Work));
+ });
}
std::string LLJIT::mangle(StringRef UnmangledName) {
@@ -84,16 +144,15 @@ void LLJIT::recordCtorDtors(Module &M) {
}
Expected<std::unique_ptr<LLLazyJIT>>
-LLLazyJIT::Create(std::unique_ptr<ExecutionSession> ES,
- std::unique_ptr<TargetMachine> TM, DataLayout DL,
- LLVMContext &Ctx) {
- const Triple &TT = TM->getTargetTriple();
+LLLazyJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+ JITTargetAddress ErrorAddr, unsigned NumCompileThreads) {
+ auto ES = llvm::make_unique<ExecutionSession>();
- auto CCMgr = createLocalCompileCallbackManager(TT, *ES, 0);
- if (!CCMgr)
- return make_error<StringError>(
- std::string("No callback manager available for ") + TT.str(),
- inconvertibleErrorCode());
+ const Triple &TT = JTMB.getTargetTriple();
+
+ auto LCTMgr = createLocalLazyCallThroughManager(TT, *ES, ErrorAddr);
+ if (!LCTMgr)
+ return LCTMgr.takeError();
auto ISMBuilder = createLocalIndirectStubsManagerBuilder(TT);
if (!ISMBuilder)
@@ -101,34 +160,51 @@ LLLazyJIT::Create(std::unique_ptr<ExecutionSession> ES,
std::string("No indirect stubs manager builder for ") + TT.str(),
inconvertibleErrorCode());
- return std::unique_ptr<LLLazyJIT>(
- new LLLazyJIT(std::move(ES), std::move(TM), std::move(DL), Ctx,
- std::move(CCMgr), std::move(ISMBuilder)));
+ if (NumCompileThreads == 0) {
+ auto TM = JTMB.createTargetMachine();
+ if (!TM)
+ return TM.takeError();
+ return std::unique_ptr<LLLazyJIT>(
+ new LLLazyJIT(std::move(ES), std::move(*TM), std::move(DL),
+ std::move(*LCTMgr), std::move(ISMBuilder)));
+ }
+
+ return std::unique_ptr<LLLazyJIT>(new LLLazyJIT(
+ std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads,
+ std::move(*LCTMgr), std::move(ISMBuilder)));
}
-Error LLLazyJIT::addLazyIRModule(VSO &V, std::unique_ptr<Module> M) {
- assert(M && "Can not add null module");
+Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
+ assert(TSM && "Can not add null module");
- if (auto Err = applyDataLayout(*M))
+ if (auto Err = applyDataLayout(*TSM.getModule()))
return Err;
- makeAllSymbolsExternallyAccessible(*M);
+ recordCtorDtors(*TSM.getModule());
- recordCtorDtors(*M);
-
- auto K = ES->allocateVModule();
- return CODLayer.add(V, K, std::move(M));
+ return CODLayer.add(JD, std::move(TSM), ES->allocateVModule());
}
LLLazyJIT::LLLazyJIT(
std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
- DataLayout DL, LLVMContext &Ctx,
- std::unique_ptr<JITCompileCallbackManager> CCMgr,
+ DataLayout DL, std::unique_ptr<LazyCallThroughManager> LCTMgr,
std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
: LLJIT(std::move(ES), std::move(TM), std::move(DL)),
- CCMgr(std::move(CCMgr)), TransformLayer(*this->ES, CompileLayer),
- CODLayer(*this->ES, TransformLayer, *this->CCMgr, std::move(ISMBuilder),
- [&]() -> LLVMContext & { return Ctx; }) {}
+ LCTMgr(std::move(LCTMgr)), TransformLayer(*this->ES, CompileLayer),
+ CODLayer(*this->ES, TransformLayer, *this->LCTMgr,
+ std::move(ISMBuilder)) {}
+
+LLLazyJIT::LLLazyJIT(
+ std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+ DataLayout DL, unsigned NumCompileThreads,
+ std::unique_ptr<LazyCallThroughManager> LCTMgr,
+ std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
+ : LLJIT(std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads),
+ LCTMgr(std::move(LCTMgr)), TransformLayer(*this->ES, CompileLayer),
+ CODLayer(*this->ES, TransformLayer, *this->LCTMgr,
+ std::move(ISMBuilder)) {
+ CODLayer.setCloneToNewContextOnEmit(true);
+}
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp
index b9da3b7fb8d5..11af76825e9f 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp
@@ -9,7 +9,9 @@
#include "llvm/ExecutionEngine/Orc/Layer.h"
#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "orc"
namespace llvm {
namespace orc {
@@ -17,17 +19,19 @@ namespace orc {
IRLayer::IRLayer(ExecutionSession &ES) : ES(ES) {}
IRLayer::~IRLayer() {}
-Error IRLayer::add(VSO &V, VModuleKey K, std::unique_ptr<Module> M) {
- return V.define(llvm::make_unique<BasicIRLayerMaterializationUnit>(
- *this, std::move(K), std::move(M)));
+Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) {
+ return JD.define(llvm::make_unique<BasicIRLayerMaterializationUnit>(
+ *this, std::move(K), std::move(TSM)));
}
IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
- std::unique_ptr<Module> M)
- : MaterializationUnit(SymbolFlagsMap()), M(std::move(M)) {
+ ThreadSafeModule TSM, VModuleKey K)
+ : MaterializationUnit(SymbolFlagsMap(), std::move(K)), TSM(std::move(TSM)) {
+
+ assert(this->TSM && "Module must not be null");
- MangleAndInterner Mangle(ES, this->M->getDataLayout());
- for (auto &G : this->M->global_values()) {
+ MangleAndInterner Mangle(ES, this->TSM.getModule()->getDataLayout());
+ for (auto &G : this->TSM.getModule()->global_values()) {
if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() &&
!G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) {
auto MangledName = Mangle(G.getName());
@@ -38,12 +42,24 @@ IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
}
IRMaterializationUnit::IRMaterializationUnit(
- std::unique_ptr<Module> M, SymbolFlagsMap SymbolFlags,
+ ThreadSafeModule TSM, VModuleKey K, SymbolFlagsMap SymbolFlags,
SymbolNameToDefinitionMap SymbolToDefinition)
- : MaterializationUnit(std::move(SymbolFlags)), M(std::move(M)),
- SymbolToDefinition(std::move(SymbolToDefinition)) {}
+ : MaterializationUnit(std::move(SymbolFlags), std::move(K)),
+ TSM(std::move(TSM)), SymbolToDefinition(std::move(SymbolToDefinition)) {}
+
+StringRef IRMaterializationUnit::getName() const {
+ if (TSM.getModule())
+ return TSM.getModule()->getModuleIdentifier();
+ return "<null module>";
+}
+
+void IRMaterializationUnit::discard(const JITDylib &JD,
+ const SymbolStringPtr &Name) {
+ LLVM_DEBUG(JD.getExecutionSession().runSessionLocked([&]() {
+ dbgs() << "In " << JD.getName() << " discarding " << *Name << " from MU@"
+ << this << " (" << getName() << ")\n";
+ }););
-void IRMaterializationUnit::discard(const VSO &V, SymbolStringPtr Name) {
auto I = SymbolToDefinition.find(Name);
assert(I != SymbolToDefinition.end() &&
"Symbol not provided by this MU, or previously discarded");
@@ -54,53 +70,117 @@ void IRMaterializationUnit::discard(const VSO &V, SymbolStringPtr Name) {
}
BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit(
- IRLayer &L, VModuleKey K, std::unique_ptr<Module> M)
- : IRMaterializationUnit(L.getExecutionSession(), std::move(M)),
+ IRLayer &L, VModuleKey K, ThreadSafeModule TSM)
+ : IRMaterializationUnit(L.getExecutionSession(), std::move(TSM),
+ std::move(K)),
L(L), K(std::move(K)) {}
void BasicIRLayerMaterializationUnit::materialize(
MaterializationResponsibility R) {
- L.emit(std::move(R), std::move(K), std::move(M));
+
+ // Throw away the SymbolToDefinition map: it's not usable after we hand
+ // off the module.
+ SymbolToDefinition.clear();
+
+ // If cloneToNewContextOnEmit is set, clone the module now.
+ if (L.getCloneToNewContextOnEmit())
+ TSM = cloneToNewContext(TSM);
+
+#ifndef NDEBUG
+ auto &ES = R.getTargetJITDylib().getExecutionSession();
+#endif // NDEBUG
+
+ auto Lock = TSM.getContextLock();
+ LLVM_DEBUG(ES.runSessionLocked([&]() {
+ dbgs() << "Emitting, for " << R.getTargetJITDylib().getName() << ", "
+ << *this << "\n";
+ }););
+ L.emit(std::move(R), std::move(TSM));
+ LLVM_DEBUG(ES.runSessionLocked([&]() {
+ dbgs() << "Finished emitting, for " << R.getTargetJITDylib().getName()
+ << ", " << *this << "\n";
+ }););
}
ObjectLayer::ObjectLayer(ExecutionSession &ES) : ES(ES) {}
ObjectLayer::~ObjectLayer() {}
-Error ObjectLayer::add(VSO &V, VModuleKey K, std::unique_ptr<MemoryBuffer> O) {
- return V.define(llvm::make_unique<BasicObjectLayerMaterializationUnit>(
- *this, std::move(K), std::move(O)));
+Error ObjectLayer::add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O,
+ VModuleKey K) {
+ auto ObjMU = BasicObjectLayerMaterializationUnit::Create(*this, std::move(K),
+ std::move(O));
+ if (!ObjMU)
+ return ObjMU.takeError();
+ return JD.define(std::move(*ObjMU));
+}
+
+Expected<std::unique_ptr<BasicObjectLayerMaterializationUnit>>
+BasicObjectLayerMaterializationUnit::Create(ObjectLayer &L, VModuleKey K,
+ std::unique_ptr<MemoryBuffer> O) {
+ auto SymbolFlags =
+ getObjectSymbolFlags(L.getExecutionSession(), O->getMemBufferRef());
+
+ if (!SymbolFlags)
+ return SymbolFlags.takeError();
+
+ return std::unique_ptr<BasicObjectLayerMaterializationUnit>(
+ new BasicObjectLayerMaterializationUnit(L, K, std::move(O),
+ std::move(*SymbolFlags)));
}
BasicObjectLayerMaterializationUnit::BasicObjectLayerMaterializationUnit(
- ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O)
- : MaterializationUnit(SymbolFlagsMap()), L(L), K(std::move(K)),
- O(std::move(O)) {
-
- auto &ES = L.getExecutionSession();
- auto Obj = cantFail(
- object::ObjectFile::createObjectFile(this->O->getMemBufferRef()));
-
- for (auto &Sym : Obj->symbols()) {
- if (!(Sym.getFlags() & object::BasicSymbolRef::SF_Undefined) &&
- (Sym.getFlags() & object::BasicSymbolRef::SF_Exported)) {
- auto InternedName =
- ES.getSymbolStringPool().intern(cantFail(Sym.getName()));
- SymbolFlags[InternedName] = JITSymbolFlags::fromObjectSymbol(Sym);
- }
- }
+ ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O,
+ SymbolFlagsMap SymbolFlags)
+ : MaterializationUnit(std::move(SymbolFlags), std::move(K)), L(L),
+ O(std::move(O)) {}
+
+StringRef BasicObjectLayerMaterializationUnit::getName() const {
+ if (O)
+ return O->getBufferIdentifier();
+ return "<null object>";
}
void BasicObjectLayerMaterializationUnit::materialize(
MaterializationResponsibility R) {
- L.emit(std::move(R), std::move(K), std::move(O));
+ L.emit(std::move(R), std::move(O));
}
-void BasicObjectLayerMaterializationUnit::discard(const VSO &V,
- SymbolStringPtr Name) {
+void BasicObjectLayerMaterializationUnit::discard(const JITDylib &JD,
+ const SymbolStringPtr &Name) {
// FIXME: Support object file level discard. This could be done by building a
// filter to pass to the object layer along with the object itself.
}
+Expected<SymbolFlagsMap> getObjectSymbolFlags(ExecutionSession &ES,
+ MemoryBufferRef ObjBuffer) {
+ auto Obj = object::ObjectFile::createObjectFile(ObjBuffer);
+
+ if (!Obj)
+ return Obj.takeError();
+
+ SymbolFlagsMap SymbolFlags;
+ for (auto &Sym : (*Obj)->symbols()) {
+ // Skip symbols not defined in this object file.
+ if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(Sym.getFlags() & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto InternedName = ES.intern(*Name);
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+ SymbolFlags[InternedName] = std::move(*SymFlags);
+ }
+
+ return SymbolFlags;
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
new file mode 100644
index 000000000000..55f4a7c5afce
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -0,0 +1,208 @@
+//===---------- LazyReexports.cpp - Utilities for lazy reexports ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+void LazyCallThroughManager::NotifyResolvedFunction::anchor() {}
+
+LazyCallThroughManager::LazyCallThroughManager(
+ ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr,
+ std::unique_ptr<TrampolinePool> TP)
+ : ES(ES), ErrorHandlerAddr(ErrorHandlerAddr), TP(std::move(TP)) {}
+
+Expected<JITTargetAddress> LazyCallThroughManager::getCallThroughTrampoline(
+ JITDylib &SourceJD, SymbolStringPtr SymbolName,
+ std::shared_ptr<NotifyResolvedFunction> NotifyResolved) {
+ std::lock_guard<std::mutex> Lock(LCTMMutex);
+ auto Trampoline = TP->getTrampoline();
+
+ if (!Trampoline)
+ return Trampoline.takeError();
+
+ Reexports[*Trampoline] = std::make_pair(&SourceJD, std::move(SymbolName));
+ Notifiers[*Trampoline] = std::move(NotifyResolved);
+ return *Trampoline;
+}
+
+JITTargetAddress
+LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) {
+ JITDylib *SourceJD = nullptr;
+ SymbolStringPtr SymbolName;
+
+ {
+ std::lock_guard<std::mutex> Lock(LCTMMutex);
+ auto I = Reexports.find(TrampolineAddr);
+ if (I == Reexports.end())
+ return ErrorHandlerAddr;
+ SourceJD = I->second.first;
+ SymbolName = I->second.second;
+ }
+
+ auto LookupResult = ES.lookup(JITDylibSearchList({{SourceJD, true}}),
+ {SymbolName}, NoDependenciesToRegister, true);
+
+ if (!LookupResult) {
+ ES.reportError(LookupResult.takeError());
+ return ErrorHandlerAddr;
+ }
+
+ assert(LookupResult->size() == 1 && "Unexpected number of results");
+ assert(LookupResult->count(SymbolName) && "Unexpected result");
+
+ auto ResolvedAddr = LookupResult->begin()->second.getAddress();
+
+ std::shared_ptr<NotifyResolvedFunction> NotifyResolved = nullptr;
+ {
+ std::lock_guard<std::mutex> Lock(LCTMMutex);
+ auto I = Notifiers.find(TrampolineAddr);
+ if (I != Notifiers.end()) {
+ NotifyResolved = I->second;
+ Notifiers.erase(I);
+ }
+ }
+
+ if (NotifyResolved) {
+ if (auto Err = (*NotifyResolved)(*SourceJD, SymbolName, ResolvedAddr)) {
+ ES.reportError(std::move(Err));
+ return ErrorHandlerAddr;
+ }
+ }
+
+ return ResolvedAddr;
+}
+
+Expected<std::unique_ptr<LazyCallThroughManager>>
+createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
+ JITTargetAddress ErrorHandlerAddr) {
+ switch (T.getArch()) {
+ default:
+ return make_error<StringError>(
+ std::string("No callback manager available for ") + T.str(),
+ inconvertibleErrorCode());
+
+ case Triple::aarch64:
+ return LocalLazyCallThroughManager::Create<OrcAArch64>(ES,
+ ErrorHandlerAddr);
+
+ case Triple::x86:
+ return LocalLazyCallThroughManager::Create<OrcI386>(ES, ErrorHandlerAddr);
+
+ case Triple::mips:
+ return LocalLazyCallThroughManager::Create<OrcMips32Be>(ES,
+ ErrorHandlerAddr);
+
+ case Triple::mipsel:
+ return LocalLazyCallThroughManager::Create<OrcMips32Le>(ES,
+ ErrorHandlerAddr);
+
+ case Triple::mips64:
+ case Triple::mips64el:
+ return LocalLazyCallThroughManager::Create<OrcMips64>(ES, ErrorHandlerAddr);
+
+ case Triple::x86_64:
+ if (T.getOS() == Triple::OSType::Win32)
+ return LocalLazyCallThroughManager::Create<OrcX86_64_Win32>(
+ ES, ErrorHandlerAddr);
+ else
+ return LocalLazyCallThroughManager::Create<OrcX86_64_SysV>(
+ ES, ErrorHandlerAddr);
+ }
+}
+
+LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit(
+ LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager,
+ JITDylib &SourceJD, SymbolAliasMap CallableAliases, VModuleKey K)
+ : MaterializationUnit(extractFlags(CallableAliases), std::move(K)),
+ LCTManager(LCTManager), ISManager(ISManager), SourceJD(SourceJD),
+ CallableAliases(std::move(CallableAliases)),
+ NotifyResolved(LazyCallThroughManager::createNotifyResolvedFunction(
+ [&ISManager](JITDylib &JD, const SymbolStringPtr &SymbolName,
+ JITTargetAddress ResolvedAddr) {
+ return ISManager.updatePointer(*SymbolName, ResolvedAddr);
+ })) {}
+
+StringRef LazyReexportsMaterializationUnit::getName() const {
+ return "<Lazy Reexports>";
+}
+
+void LazyReexportsMaterializationUnit::materialize(
+ MaterializationResponsibility R) {
+ auto RequestedSymbols = R.getRequestedSymbols();
+
+ SymbolAliasMap RequestedAliases;
+ for (auto &RequestedSymbol : RequestedSymbols) {
+ auto I = CallableAliases.find(RequestedSymbol);
+ assert(I != CallableAliases.end() && "Symbol not found in alias map?");
+ RequestedAliases[I->first] = std::move(I->second);
+ CallableAliases.erase(I);
+ }
+
+ if (!CallableAliases.empty())
+ R.replace(lazyReexports(LCTManager, ISManager, SourceJD,
+ std::move(CallableAliases)));
+
+ IndirectStubsManager::StubInitsMap StubInits;
+ for (auto &Alias : RequestedAliases) {
+
+ auto CallThroughTrampoline = LCTManager.getCallThroughTrampoline(
+ SourceJD, Alias.second.Aliasee, NotifyResolved);
+
+ if (!CallThroughTrampoline) {
+ SourceJD.getExecutionSession().reportError(
+ CallThroughTrampoline.takeError());
+ R.failMaterialization();
+ return;
+ }
+
+ StubInits[*Alias.first] =
+ std::make_pair(*CallThroughTrampoline, Alias.second.AliasFlags);
+ }
+
+ if (auto Err = ISManager.createStubs(StubInits)) {
+ SourceJD.getExecutionSession().reportError(std::move(Err));
+ R.failMaterialization();
+ return;
+ }
+
+ SymbolMap Stubs;
+ for (auto &Alias : RequestedAliases)
+ Stubs[Alias.first] = ISManager.findStub(*Alias.first, false);
+
+ R.resolve(Stubs);
+ R.emit();
+}
+
+void LazyReexportsMaterializationUnit::discard(const JITDylib &JD,
+ const SymbolStringPtr &Name) {
+ assert(CallableAliases.count(Name) &&
+ "Symbol not covered by this MaterializationUnit");
+ CallableAliases.erase(Name);
+}
+
+SymbolFlagsMap
+LazyReexportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
+ SymbolFlagsMap SymbolFlags;
+ for (auto &KV : Aliases) {
+ assert(KV.second.AliasFlags.isCallable() &&
+ "Lazy re-exports must be callable symbols");
+ SymbolFlags[KV.first] = KV.second.AliasFlags;
+ }
+ return SymbolFlags;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
index 18be9a042f7f..ddb72544b770 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
@@ -18,47 +18,47 @@ JITSymbolResolverAdapter::JITSymbolResolverAdapter(
ExecutionSession &ES, SymbolResolver &R, MaterializationResponsibility *MR)
: ES(ES), R(R), MR(MR) {}
-Expected<JITSymbolResolverAdapter::LookupResult>
-JITSymbolResolverAdapter::lookup(const LookupSet &Symbols) {
+void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols,
+ OnResolvedFunction OnResolved) {
SymbolNameSet InternedSymbols;
for (auto &S : Symbols)
- InternedSymbols.insert(ES.getSymbolStringPool().intern(S));
+ InternedSymbols.insert(ES.intern(S));
- auto LookupFn = [&, this](std::shared_ptr<AsynchronousSymbolQuery> Q,
- SymbolNameSet Unresolved) {
- return R.lookup(std::move(Q), std::move(Unresolved));
- };
+ auto OnResolvedWithUnwrap = [OnResolved](Expected<SymbolMap> InternedResult) {
+ if (!InternedResult) {
+ OnResolved(InternedResult.takeError());
+ return;
+ }
- auto RegisterDependencies = [&](const SymbolDependenceMap &Deps) {
- if (MR)
- MR->addDependenciesForAll(Deps);
+ LookupResult Result;
+ for (auto &KV : *InternedResult)
+ Result[*KV.first] = std::move(KV.second);
+ OnResolved(Result);
};
- auto InternedResult =
- ES.legacyLookup(ES, std::move(LookupFn), std::move(InternedSymbols),
- false, RegisterDependencies);
-
- if (!InternedResult)
- return InternedResult.takeError();
+ auto Q = std::make_shared<AsynchronousSymbolQuery>(
+ InternedSymbols, OnResolvedWithUnwrap,
+ [this](Error Err) { ES.reportError(std::move(Err)); });
- JITSymbolResolver::LookupResult Result;
- for (auto &KV : *InternedResult)
- Result[*KV.first] = KV.second;
-
- return Result;
+ auto Unresolved = R.lookup(Q, InternedSymbols);
+ if (Unresolved.empty()) {
+ if (MR)
+ MR->addDependenciesForAll(Q->QueryRegistrations);
+ } else
+ ES.legacyFailQuery(*Q, make_error<SymbolsNotFound>(std::move(Unresolved)));
}
-Expected<JITSymbolResolverAdapter::LookupFlagsResult>
-JITSymbolResolverAdapter::lookupFlags(const LookupSet &Symbols) {
+Expected<JITSymbolResolverAdapter::LookupSet>
+JITSymbolResolverAdapter::getResponsibilitySet(const LookupSet &Symbols) {
SymbolNameSet InternedSymbols;
for (auto &S : Symbols)
- InternedSymbols.insert(ES.getSymbolStringPool().intern(S));
+ InternedSymbols.insert(ES.intern(S));
- SymbolFlagsMap SymbolFlags = R.lookupFlags(InternedSymbols);
- LookupFlagsResult Result;
- for (auto &KV : SymbolFlags) {
- ResolvedStrings.insert(KV.first);
- Result[*KV.first] = KV.second;
+ auto InternedResult = R.getResponsibilitySet(InternedSymbols);
+ LookupSet Result;
+ for (auto &S : InternedResult) {
+ ResolvedStrings.insert(S);
+ Result.insert(*S);
}
return Result;
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
index 3796e3d37bc2..922fc6f021ce 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
@@ -14,8 +14,8 @@
namespace llvm {
namespace orc {
-SymbolFlagsMap NullResolver::lookupFlags(const SymbolNameSet &Symbols) {
- return SymbolFlagsMap();
+SymbolNameSet NullResolver::getResponsibilitySet(const SymbolNameSet &Symbols) {
+ return Symbols;
}
SymbolNameSet
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
index 6980c8140fd0..825f53204736 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
@@ -13,17 +13,17 @@
namespace llvm {
namespace orc {
-ObjectTransformLayer2::ObjectTransformLayer2(ExecutionSession &ES,
- ObjectLayer &BaseLayer,
- TransformFunction Transform)
+ObjectTransformLayer::ObjectTransformLayer(ExecutionSession &ES,
+ ObjectLayer &BaseLayer,
+ TransformFunction Transform)
: ObjectLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
-void ObjectTransformLayer2::emit(MaterializationResponsibility R, VModuleKey K,
- std::unique_ptr<MemoryBuffer> O) {
+void ObjectTransformLayer::emit(MaterializationResponsibility R,
+ std::unique_ptr<MemoryBuffer> O) {
assert(O && "Module must not be null");
if (auto TransformedObj = Transform(std::move(O)))
- BaseLayer.emit(std::move(R), std::move(K), std::move(*TransformedObj));
+ BaseLayer.emit(std::move(R), std::move(*TransformedObj));
else {
R.failMaterialization();
getExecutionSession().reportError(TransformedObj.takeError());
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index e3c968157976..aa4055542426 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -537,5 +537,448 @@ Error OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
return Error::success();
}
+void OrcMips32_Base::writeResolverCode(uint8_t *ResolverMem,
+ JITReentryFn ReentryFn,
+ void *CallbackMgr, bool isBigEndian) {
+
+ const uint32_t ResolverCode[] = {
+ // resolver_entry:
+ 0x27bdff98, // 0x00: addiu $sp,$sp,-104
+ 0xafa20000, // 0x04: sw $v0,0($sp)
+ 0xafa30004, // 0x08: sw $v1,4($sp)
+ 0xafa40008, // 0x0c: sw $a0,8($sp)
+ 0xafa5000c, // 0x10: sw $a1,12($sp)
+ 0xafa60010, // 0x14: sw $a2,16($sp)
+ 0xafa70014, // 0x18: sw $a3,20($sp)
+ 0xafb00018, // 0x1c: sw $s0,24($sp)
+ 0xafb1001c, // 0x20: sw $s1,28($sp)
+ 0xafb20020, // 0x24: sw $s2,32($sp)
+ 0xafb30024, // 0x28: sw $s3,36($sp)
+ 0xafb40028, // 0x2c: sw $s4,40($sp)
+ 0xafb5002c, // 0x30: sw $s5,44($sp)
+ 0xafb60030, // 0x34: sw $s6,48($sp)
+ 0xafb70034, // 0x38: sw $s7,52($sp)
+ 0xafa80038, // 0x3c: sw $t0,56($sp)
+ 0xafa9003c, // 0x40: sw $t1,60($sp)
+ 0xafaa0040, // 0x44: sw $t2,64($sp)
+ 0xafab0044, // 0x48: sw $t3,68($sp)
+ 0xafac0048, // 0x4c: sw $t4,72($sp)
+ 0xafad004c, // 0x50: sw $t5,76($sp)
+ 0xafae0050, // 0x54: sw $t6,80($sp)
+ 0xafaf0054, // 0x58: sw $t7,84($sp)
+ 0xafb80058, // 0x5c: sw $t8,88($sp)
+ 0xafb9005c, // 0x60: sw $t9,92($sp)
+ 0xafbe0060, // 0x64: sw $fp,96($sp)
+ 0xafbf0064, // 0x68: sw $ra,100($sp)
+
+ // Callback manager addr.
+ 0x00000000, // 0x6c: lui $a0,callbackmgr
+ 0x00000000, // 0x70: addiu $a0,$a0,callbackmgr
+
+ 0x03e02825, // 0x74: move $a1, $ra
+ 0x24a5ffec, // 0x78: addiu $a1,$a1,-20
+
+ // JIT re-entry fn addr:
+ 0x00000000, // 0x7c: lui $t9,reentry
+ 0x00000000, // 0x80: addiu $t9,$t9,reentry
+
+ 0x0320f809, // 0x84: jalr $t9
+ 0x00000000, // 0x88: nop
+ 0x8fbf0064, // 0x8c: lw $ra,100($sp)
+ 0x8fbe0060, // 0x90: lw $fp,96($sp)
+ 0x8fb9005c, // 0x94: lw $t9,92($sp)
+ 0x8fb80058, // 0x98: lw $t8,88($sp)
+ 0x8faf0054, // 0x9c: lw $t7,84($sp)
+ 0x8fae0050, // 0xa0: lw $t6,80($sp)
+ 0x8fad004c, // 0xa4: lw $t5,76($sp)
+ 0x8fac0048, // 0xa8: lw $t4,72($sp)
+ 0x8fab0044, // 0xac: lw $t3,68($sp)
+ 0x8faa0040, // 0xb0: lw $t2,64($sp)
+ 0x8fa9003c, // 0xb4: lw $t1,60($sp)
+ 0x8fa80038, // 0xb8: lw $t0,56($sp)
+ 0x8fb70034, // 0xbc: lw $s7,52($sp)
+ 0x8fb60030, // 0xc0: lw $s6,48($sp)
+ 0x8fb5002c, // 0xc4: lw $s5,44($sp)
+ 0x8fb40028, // 0xc8: lw $s4,40($sp)
+ 0x8fb30024, // 0xcc: lw $s3,36($sp)
+ 0x8fb20020, // 0xd0: lw $s2,32($sp)
+ 0x8fb1001c, // 0xd4: lw $s1,28($sp)
+ 0x8fb00018, // 0xd8: lw $s0,24($sp)
+ 0x8fa70014, // 0xdc: lw $a3,20($sp)
+ 0x8fa60010, // 0xe0: lw $a2,16($sp)
+ 0x8fa5000c, // 0xe4: lw $a1,12($sp)
+ 0x8fa40008, // 0xe8: lw $a0,8($sp)
+ 0x27bd0068, // 0xec: addiu $sp,$sp,104
+ 0x0300f825, // 0xf0: move $ra, $t8
+ 0x03200008, // 0xf4: jr $t9
+ 0x00000000, // 0xf8: move $t9, $v0/v1
+ };
+
+ const unsigned ReentryFnAddrOffset = 0x7c; // JIT re-entry fn addr lui
+ const unsigned CallbackMgrAddrOffset = 0x6c; // Callback manager addr lui
+ const unsigned Offsett = 0xf8;
+
+ memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+
+ // Depending on endian return value will be in v0 or v1.
+ uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825;
+ memcpy(ResolverMem + Offsett, &MoveVxT9, sizeof(MoveVxT9));
+
+ uint64_t CallMgrAddr = reinterpret_cast<uint64_t>(CallbackMgr);
+ uint32_t CallMgrLUi = 0x3c040000 | (((CallMgrAddr + 0x8000) >> 16) & 0xFFFF);
+ uint32_t CallMgrADDiu = 0x24840000 | ((CallMgrAddr) & 0xFFFF);
+ memcpy(ResolverMem + CallbackMgrAddrOffset, &CallMgrLUi, sizeof(CallMgrLUi));
+ memcpy(ResolverMem + CallbackMgrAddrOffset + 4, &CallMgrADDiu,
+ sizeof(CallMgrADDiu));
+
+ uint64_t ReentryAddr = reinterpret_cast<uint64_t>(ReentryFn);
+ uint32_t ReentryLUi = 0x3c190000 | (((ReentryAddr + 0x8000) >> 16) & 0xFFFF);
+ uint32_t ReentryADDiu = 0x27390000 | ((ReentryAddr) & 0xFFFF);
+ memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryLUi, sizeof(ReentryLUi));
+ memcpy(ResolverMem + ReentryFnAddrOffset + 4, &ReentryADDiu,
+ sizeof(ReentryADDiu));
+}
+
+void OrcMips32_Base::writeTrampolines(uint8_t *TrampolineMem,
+ void *ResolverAddr,
+ unsigned NumTrampolines) {
+
+ uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
+ uint64_t ResolveAddr = reinterpret_cast<uint64_t>(ResolverAddr);
+ uint32_t RHiAddr = ((ResolveAddr + 0x8000) >> 16);
+
+ for (unsigned I = 0; I < NumTrampolines; ++I) {
+ Trampolines[5 * I + 0] = 0x03e0c025; // move $t8,$ra
+ Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF); // lui $t9,resolveAddr
+ Trampolines[5 * I + 2] = 0x27390000 | (ResolveAddr & 0xFFFF); // addiu $t9,$t9,resolveAddr
+ Trampolines[5 * I + 3] = 0x0320f809; // jalr $t9
+ Trampolines[5 * I + 4] = 0x00000000; // nop
+ }
+}
+
+Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal) {
+ // Stub format is:
+ //
+ // .section __orc_stubs
+ // stub1:
+ // lui $t9, ptr1
+ // lw $t9, %lo(ptr1)($t9)
+ // jr $t9
+ // stub2:
+ // lui $t9, ptr2
+ // lw $t9,%lo(ptr1)($t9)
+ // jr $t9
+ //
+ // ...
+ //
+ // .section __orc_ptrs
+ // ptr1:
+ // .word 0x0
+ // ptr2:
+ // .word 0x0
+ //
+ // ...
+
+ const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+ // Emit at least MinStubs, rounded up to fill the pages allocated.
+ unsigned PageSize = sys::Process::getPageSize();
+ unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+ unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+ // Allocate memory for stubs and pointers in one call.
+ std::error_code EC;
+ auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+ 2 * NumPages * PageSize, nullptr,
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+
+ if (EC)
+ return errorCodeToError(EC);
+
+ // Create separate MemoryBlocks representing the stubs and pointers.
+ sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+ sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
+ NumPages * PageSize,
+ NumPages * PageSize);
+
+ // Populate the stubs page stubs and mark it executable.
+ uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlock.base());
+ uint64_t PtrAddr = reinterpret_cast<uint64_t>(Stub) + NumPages * PageSize;
+
+ for (unsigned I = 0; I < NumStubs; ++I) {
+ uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16);
+ Stub[4 * I + 0] = 0x3c190000 | (HiAddr & 0xFFFF); // lui $t9,ptr1
+ Stub[4 * I + 1] = 0x8f390000 | (PtrAddr & 0xFFFF); // lw $t9,%lo(ptr1)($t9)
+ Stub[4 * I + 2] = 0x03200008; // jr $t9
+ Stub[4 * I + 3] = 0x00000000; // nop
+ PtrAddr += 4;
+ }
+
+ if (auto EC = sys::Memory::protectMappedMemory(
+ StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
+ return errorCodeToError(EC);
+
+ // Initialize all pointers to point at FailureAddress.
+ void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Ptr[I] = InitialPtrVal;
+
+ StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
+
+ return Error::success();
+}
+
+void OrcMips64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
+ void *CallbackMgr) {
+
+ const uint32_t ResolverCode[] = {
+ //resolver_entry:
+ 0x67bdff30, // 0x00: daddiu $sp,$sp,-208
+ 0xffa20000, // 0x04: sd v0,0(sp)
+ 0xffa30008, // 0x08: sd v1,8(sp)
+ 0xffa40010, // 0x0c: sd a0,16(sp)
+ 0xffa50018, // 0x10: sd a1,24(sp)
+ 0xffa60020, // 0x14: sd a2,32(sp)
+ 0xffa70028, // 0x18: sd a3,40(sp)
+ 0xffa80030, // 0x1c: sd a4,48(sp)
+ 0xffa90038, // 0x20: sd a5,56(sp)
+ 0xffaa0040, // 0x24: sd a6,64(sp)
+ 0xffab0048, // 0x28: sd a7,72(sp)
+ 0xffac0050, // 0x2c: sd t0,80(sp)
+ 0xffad0058, // 0x30: sd t1,88(sp)
+ 0xffae0060, // 0x34: sd t2,96(sp)
+ 0xffaf0068, // 0x38: sd t3,104(sp)
+ 0xffb00070, // 0x3c: sd s0,112(sp)
+ 0xffb10078, // 0x40: sd s1,120(sp)
+ 0xffb20080, // 0x44: sd s2,128(sp)
+ 0xffb30088, // 0x48: sd s3,136(sp)
+ 0xffb40090, // 0x4c: sd s4,144(sp)
+ 0xffb50098, // 0x50: sd s5,152(sp)
+ 0xffb600a0, // 0x54: sd s6,160(sp)
+ 0xffb700a8, // 0x58: sd s7,168(sp)
+ 0xffb800b0, // 0x5c: sd t8,176(sp)
+ 0xffb900b8, // 0x60: sd t9,184(sp)
+ 0xffbe00c0, // 0x64: sd fp,192(sp)
+ 0xffbf00c8, // 0x68: sd ra,200(sp)
+
+ // Callback manager addr.
+ 0x00000000, // 0x6c: lui $a0,heighest(callbackmgr)
+ 0x00000000, // 0x70: daddiu $a0,$a0,heigher(callbackmgr)
+ 0x00000000, // 0x74: dsll $a0,$a0,16
+ 0x00000000, // 0x78: daddiu $a0,$a0,hi(callbackmgr)
+ 0x00000000, // 0x7c: dsll $a0,$a0,16
+ 0x00000000, // 0x80: daddiu $a0,$a0,lo(callbackmgr)
+
+ 0x03e02825, // 0x84: move $a1, $ra
+ 0x64a5ffdc, // 0x88: daddiu $a1,$a1,-36
+
+ // JIT re-entry fn addr:
+ 0x00000000, // 0x8c: lui $t9,reentry
+ 0x00000000, // 0x90: daddiu $t9,$t9,reentry
+ 0x00000000, // 0x94: dsll $t9,$t9,
+ 0x00000000, // 0x98: daddiu $t9,$t9,
+ 0x00000000, // 0x9c: dsll $t9,$t9,
+ 0x00000000, // 0xa0: daddiu $t9,$t9,
+ 0x0320f809, // 0xa4: jalr $t9
+ 0x00000000, // 0xa8: nop
+ 0xdfbf00c8, // 0xac: ld ra, 200(sp)
+ 0xdfbe00c0, // 0xb0: ld fp, 192(sp)
+ 0xdfb900b8, // 0xb4: ld t9, 184(sp)
+ 0xdfb800b0, // 0xb8: ld t8, 176(sp)
+ 0xdfb700a8, // 0xbc: ld s7, 168(sp)
+ 0xdfb600a0, // 0xc0: ld s6, 160(sp)
+ 0xdfb50098, // 0xc4: ld s5, 152(sp)
+ 0xdfb40090, // 0xc8: ld s4, 144(sp)
+ 0xdfb30088, // 0xcc: ld s3, 136(sp)
+ 0xdfb20080, // 0xd0: ld s2, 128(sp)
+ 0xdfb10078, // 0xd4: ld s1, 120(sp)
+ 0xdfb00070, // 0xd8: ld s0, 112(sp)
+ 0xdfaf0068, // 0xdc: ld t3, 104(sp)
+ 0xdfae0060, // 0xe0: ld t2, 96(sp)
+ 0xdfad0058, // 0xe4: ld t1, 88(sp)
+ 0xdfac0050, // 0xe8: ld t0, 80(sp)
+ 0xdfab0048, // 0xec: ld a7, 72(sp)
+ 0xdfaa0040, // 0xf0: ld a6, 64(sp)
+ 0xdfa90038, // 0xf4: ld a5, 56(sp)
+ 0xdfa80030, // 0xf8: ld a4, 48(sp)
+ 0xdfa70028, // 0xfc: ld a3, 40(sp)
+ 0xdfa60020, // 0x100: ld a2, 32(sp)
+ 0xdfa50018, // 0x104: ld a1, 24(sp)
+ 0xdfa40010, // 0x108: ld a0, 16(sp)
+ 0xdfa30008, // 0x10c: ld v1, 8(sp)
+ 0x67bd00d0, // 0x110: daddiu $sp,$sp,208
+ 0x0300f825, // 0x114: move $ra, $t8
+ 0x03200008, // 0x118: jr $t9
+ 0x0040c825, // 0x11c: move $t9, $v0
+ };
+
+ const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lui
+ const unsigned CallbackMgrAddrOffset = 0x6c; // Callback manager addr lui
+
+ memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+
+ uint64_t CallMgrAddr = reinterpret_cast<uint64_t>(CallbackMgr);
+
+ uint32_t CallMgrLUi =
+ 0x3c040000 | (((CallMgrAddr + 0x800080008000) >> 48) & 0xFFFF);
+ uint32_t CallMgrDADDiu =
+ 0x64840000 | (((CallMgrAddr + 0x80008000) >> 32) & 0xFFFF);
+ uint32_t CallMgrDSLL = 0x00042438;
+ uint32_t CallMgrDADDiu2 =
+ 0x64840000 | ((((CallMgrAddr + 0x8000) >> 16) & 0xFFFF));
+ uint32_t CallMgrDSLL2 = 0x00042438;
+ uint32_t CallMgrDADDiu3 = 0x64840000 | ((CallMgrAddr)&0xFFFF);
+
+ memcpy(ResolverMem + CallbackMgrAddrOffset, &CallMgrLUi, sizeof(CallMgrLUi));
+ memcpy(ResolverMem + (CallbackMgrAddrOffset + 4), &CallMgrDADDiu,
+ sizeof(CallMgrDADDiu));
+ memcpy(ResolverMem + (CallbackMgrAddrOffset + 8), &CallMgrDSLL,
+ sizeof(CallMgrDSLL));
+ memcpy(ResolverMem + (CallbackMgrAddrOffset + 12), &CallMgrDADDiu2,
+ sizeof(CallMgrDADDiu2));
+ memcpy(ResolverMem + (CallbackMgrAddrOffset + 16), &CallMgrDSLL2,
+ sizeof(CallMgrDSLL2));
+ memcpy(ResolverMem + (CallbackMgrAddrOffset + 20), &CallMgrDADDiu3,
+ sizeof(CallMgrDADDiu3));
+
+ uint64_t ReentryAddr = reinterpret_cast<uint64_t>(ReentryFn);
+
+ uint32_t ReentryLUi =
+ 0x3c190000 | (((ReentryAddr + 0x800080008000) >> 48) & 0xFFFF);
+
+ uint32_t ReentryDADDiu =
+ 0x67390000 | (((ReentryAddr + 0x80008000) >> 32) & 0xFFFF);
+
+ uint32_t ReentryDSLL = 0x0019cc38;
+
+ uint32_t ReentryDADDiu2 =
+ 0x67390000 | (((ReentryAddr + 0x8000) >> 16) & 0xFFFF);
+
+ uint32_t ReentryDSLL2 = 0x0019cc38;
+
+ uint32_t ReentryDADDiu3 = 0x67390000 | ((ReentryAddr)&0xFFFF);
+
+ memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryLUi, sizeof(ReentryLUi));
+ memcpy(ResolverMem + (ReentryFnAddrOffset + 4), &ReentryDADDiu,
+ sizeof(ReentryDADDiu));
+ memcpy(ResolverMem + (ReentryFnAddrOffset + 8), &ReentryDSLL,
+ sizeof(ReentryDSLL));
+ memcpy(ResolverMem + (ReentryFnAddrOffset + 12), &ReentryDADDiu2,
+ sizeof(ReentryDADDiu2));
+ memcpy(ResolverMem + (ReentryFnAddrOffset + 16), &ReentryDSLL2,
+ sizeof(ReentryDSLL2));
+ memcpy(ResolverMem + (ReentryFnAddrOffset + 20), &ReentryDADDiu3,
+ sizeof(ReentryDADDiu3));
+}
+
+void OrcMips64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines) {
+
+ uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
+ uint64_t ResolveAddr = reinterpret_cast<uint64_t>(ResolverAddr);
+
+ uint64_t HeighestAddr = ((ResolveAddr + 0x800080008000) >> 48);
+ uint64_t HeigherAddr = ((ResolveAddr + 0x80008000) >> 32);
+ uint64_t HiAddr = ((ResolveAddr + 0x8000) >> 16);
+
+ for (unsigned I = 0; I < NumTrampolines; ++I) {
+ Trampolines[10 * I + 0] = 0x03e0c025; // move $t8,$ra
+ Trampolines[10 * I + 1] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,resolveAddr
+ Trampolines[10 * I + 2] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(resolveAddr)
+ Trampolines[10 * I + 3] = 0x0019cc38; // dsll $t9,$t9,16
+ Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr)
+ Trampolines[10 * I + 5] = 0x0019cc38; // dsll $t9,$t9,16
+ Trampolines[10 * I + 6] = 0x67390000 | (ResolveAddr & 0xFFFF); // daddiu $t9,$t9,%lo(ptr)
+ Trampolines[10 * I + 7] = 0x0320f809; // jalr $t9
+ Trampolines[10 * I + 8] = 0x00000000; // nop
+ Trampolines[10 * I + 9] = 0x00000000; // nop
+ }
+}
+
+Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal) {
+ // Stub format is:
+ //
+ // .section __orc_stubs
+ // stub1:
+ // lui $t9,ptr1
+ // dsll $t9,$t9,16
+ // daddiu $t9,$t9,%hi(ptr)
+ // dsll $t9,$t9,16
+ // ld $t9,%lo(ptr)
+ // jr $t9
+ // stub2:
+ // lui $t9,ptr1
+ // dsll $t9,$t9,16
+ // daddiu $t9,$t9,%hi(ptr)
+ // dsll $t9,$t9,16
+ // ld $t9,%lo(ptr)
+ // jr $t9
+ //
+ // ...
+ //
+ // .section __orc_ptrs
+ // ptr1:
+ // .dword 0x0
+ // ptr2:
+ // .dword 0x0
+ //
+ // ...
+ const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+ // Emit at least MinStubs, rounded up to fill the pages allocated.
+ unsigned PageSize = sys::Process::getPageSize();
+ unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+ unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+ // Allocate memory for stubs and pointers in one call.
+ std::error_code EC;
+ auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+ 2 * NumPages * PageSize, nullptr,
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+
+ if (EC)
+ return errorCodeToError(EC);
+
+ // Create separate MemoryBlocks representing the stubs and pointers.
+ sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+ sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
+ NumPages * PageSize,
+ NumPages * PageSize);
+
+ // Populate the stubs page stubs and mark it executable.
+ uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlock.base());
+ uint64_t PtrAddr = reinterpret_cast<uint64_t>(PtrsBlock.base());
+
+ for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
+ uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48);
+ uint64_t HeigherAddr = ((PtrAddr + 0x80008000) >> 32);
+ uint64_t HiAddr = ((PtrAddr + 0x8000) >> 16);
+ Stub[8 * I + 0] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,ptr1
+ Stub[8 * I + 1] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(ptr)
+ Stub[8 * I + 2] = 0x0019cc38; // dsll $t9,$t9,16
+ Stub[8 * I + 3] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr)
+ Stub[8 * I + 4] = 0x0019cc38; // dsll $t9,$t9,16
+ Stub[8 * I + 5] = 0xdf390000 | (PtrAddr & 0xFFFF); // ld $t9,%lo(ptr)
+ Stub[8 * I + 6] = 0x03200008; // jr $t9
+ Stub[8 * I + 7] = 0x00000000; // nop
+ }
+
+ if (auto EC = sys::Memory::protectMappedMemory(
+ StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
+ return errorCodeToError(EC);
+
+ // Initialize all pointers to point at FailureAddress.
+ void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Ptr[I] = InitialPtrVal;
+
+ StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
+
+ return Error::success();
+}
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
index d6005d24a648..6dea64a6e78f 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -42,89 +42,110 @@ void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName,
void LLVMOrcDisposeMangledSymbol(char *MangledName) { delete[] MangledName; }
-LLVMOrcErrorCode
-LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- LLVMOrcLazyCompileCallbackFn Callback,
- void *CallbackCtx) {
+LLVMErrorRef LLVMOrcCreateLazyCompileCallback(
+ LLVMOrcJITStackRef JITStack, LLVMOrcTargetAddress *RetAddr,
+ LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
- return J.createLazyCompileCallback(*RetAddr, Callback, CallbackCtx);
+ if (auto Addr = J.createLazyCompileCallback(Callback, CallbackCtx)) {
+ *RetAddr = *Addr;
+ return LLVMErrorSuccess;
+ } else
+ return wrap(Addr.takeError());
}
-LLVMOrcErrorCode LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
- const char *StubName,
- LLVMOrcTargetAddress InitAddr) {
+LLVMErrorRef LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
+ const char *StubName,
+ LLVMOrcTargetAddress InitAddr) {
OrcCBindingsStack &J = *unwrap(JITStack);
- return J.createIndirectStub(StubName, InitAddr);
+ return wrap(J.createIndirectStub(StubName, InitAddr));
}
-LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
- const char *StubName,
- LLVMOrcTargetAddress NewAddr) {
+LLVMErrorRef LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
+ const char *StubName,
+ LLVMOrcTargetAddress NewAddr) {
OrcCBindingsStack &J = *unwrap(JITStack);
- return J.setIndirectStubPointer(StubName, NewAddr);
+ return wrap(J.setIndirectStubPointer(StubName, NewAddr));
}
-LLVMOrcErrorCode
-LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle, LLVMModuleRef Mod,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx) {
+LLVMErrorRef LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle *RetHandle,
+ LLVMModuleRef Mod,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
std::unique_ptr<Module> M(unwrap(Mod));
- return J.addIRModuleEager(*RetHandle, std::move(M), SymbolResolver,
- SymbolResolverCtx);
+ if (auto Handle =
+ J.addIRModuleEager(std::move(M), SymbolResolver, SymbolResolverCtx)) {
+ *RetHandle = *Handle;
+ return LLVMErrorSuccess;
+ } else
+ return wrap(Handle.takeError());
}
-LLVMOrcErrorCode
-LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle, LLVMModuleRef Mod,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx) {
+LLVMErrorRef LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle *RetHandle,
+ LLVMModuleRef Mod,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
std::unique_ptr<Module> M(unwrap(Mod));
- return J.addIRModuleLazy(*RetHandle, std::move(M), SymbolResolver,
- SymbolResolverCtx);
+ if (auto Handle =
+ J.addIRModuleLazy(std::move(M), SymbolResolver, SymbolResolverCtx)) {
+ *RetHandle = *Handle;
+ return LLVMErrorSuccess;
+ } else
+ return wrap(Handle.takeError());
}
-LLVMOrcErrorCode
-LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle *RetHandle,
- LLVMMemoryBufferRef Obj,
- LLVMOrcSymbolResolverFn SymbolResolver,
- void *SymbolResolverCtx) {
+LLVMErrorRef LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle *RetHandle,
+ LLVMMemoryBufferRef Obj,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
std::unique_ptr<MemoryBuffer> O(unwrap(Obj));
- return J.addObject(*RetHandle, std::move(O), SymbolResolver,
- SymbolResolverCtx);
+ if (auto Handle =
+ J.addObject(std::move(O), SymbolResolver, SymbolResolverCtx)) {
+ *RetHandle = *Handle;
+ return LLVMErrorSuccess;
+ } else
+ return wrap(Handle.takeError());
}
-LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
- LLVMOrcModuleHandle H) {
+LLVMErrorRef LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
+ LLVMOrcModuleHandle H) {
OrcCBindingsStack &J = *unwrap(JITStack);
- return J.removeModule(H);
+ return wrap(J.removeModule(H));
}
-LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- const char *SymbolName) {
+LLVMErrorRef LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+ LLVMOrcTargetAddress *RetAddr,
+ const char *SymbolName) {
OrcCBindingsStack &J = *unwrap(JITStack);
- return J.findSymbolAddress(*RetAddr, SymbolName, true);
+ if (auto Addr = J.findSymbolAddress(SymbolName, true)) {
+ *RetAddr = *Addr;
+ return LLVMErrorSuccess;
+ } else
+ return wrap(Addr.takeError());
}
-LLVMOrcErrorCode LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
- LLVMOrcTargetAddress *RetAddr,
- LLVMOrcModuleHandle H,
- const char *SymbolName) {
+LLVMErrorRef LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
+ LLVMOrcTargetAddress *RetAddr,
+ LLVMOrcModuleHandle H,
+ const char *SymbolName) {
OrcCBindingsStack &J = *unwrap(JITStack);
- return J.findSymbolAddressIn(*RetAddr, H, SymbolName, true);
+ if (auto Addr = J.findSymbolAddressIn(H, SymbolName, true)) {
+ *RetAddr = *Addr;
+ return LLVMErrorSuccess;
+ } else
+ return wrap(Addr.takeError());
}
-LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
+LLVMErrorRef LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
auto *J = unwrap(JITStack);
auto Err = J->shutdown();
delete J;
- return Err;
+ return wrap(std::move(Err));
}
void LLVMOrcRegisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L)
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index b9f8a370d2f0..817a4b89bfb0 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -77,9 +77,9 @@ public:
};
template <>
- class GenericLayerImpl<orc::RTDyldObjectLinkingLayer> : public GenericLayer {
+ class GenericLayerImpl<orc::LegacyRTDyldObjectLinkingLayer> : public GenericLayer {
private:
- using LayerT = orc::RTDyldObjectLinkingLayer;
+ using LayerT = orc::LegacyRTDyldObjectLinkingLayer;
public:
GenericLayerImpl(LayerT &Layer) : Layer(Layer) {}
@@ -107,10 +107,10 @@ class OrcCBindingsStack {
public:
using CompileCallbackMgr = orc::JITCompileCallbackManager;
- using ObjLayerT = orc::RTDyldObjectLinkingLayer;
- using CompileLayerT = orc::IRCompileLayer<ObjLayerT, orc::SimpleCompiler>;
+ using ObjLayerT = orc::LegacyRTDyldObjectLinkingLayer;
+ using CompileLayerT = orc::LegacyIRCompileLayer<ObjLayerT, orc::SimpleCompiler>;
using CODLayerT =
- orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>;
+ orc::LegacyCompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>;
using CallbackManagerBuilder =
std::function<std::unique_ptr<CompileCallbackMgr>()>;
@@ -129,20 +129,21 @@ private:
: Stack(Stack), ExternalResolver(std::move(ExternalResolver)),
ExternalResolverCtx(std::move(ExternalResolverCtx)) {}
- orc::SymbolFlagsMap
- lookupFlags(const orc::SymbolNameSet &Symbols) override {
- orc::SymbolFlagsMap SymbolFlags;
+ orc::SymbolNameSet
+ getResponsibilitySet(const orc::SymbolNameSet &Symbols) override {
+ orc::SymbolNameSet Result;
for (auto &S : Symbols) {
- if (auto Sym = findSymbol(*S))
- SymbolFlags[S] = Sym.getFlags();
- else if (auto Err = Sym.takeError()) {
+ if (auto Sym = findSymbol(*S)) {
+ if (!Sym.getFlags().isStrong())
+ Result.insert(S);
+ } else if (auto Err = Sym.takeError()) {
Stack.reportError(std::move(Err));
- return orc::SymbolFlagsMap();
+ return orc::SymbolNameSet();
}
}
- return SymbolFlags;
+ return Result;
}
orc::SymbolNameSet
@@ -182,10 +183,17 @@ private:
// 2. Runtime overrides.
// 3. External resolver (if present).
- if (auto Sym = Stack.CODLayer.findSymbol(Name, true))
- return Sym;
- else if (auto Err = Sym.takeError())
- return Sym.takeError();
+ if (Stack.CODLayer) {
+ if (auto Sym = Stack.CODLayer->findSymbol(Name, true))
+ return Sym;
+ else if (auto Err = Sym.takeError())
+ return Sym.takeError();
+ } else {
+ if (auto Sym = Stack.CompileLayer.findSymbol(Name, true))
+ return Sym;
+ else if (auto Err = Sym.takeError())
+ return Sym.takeError();
+ }
if (auto Sym = Stack.CXXRuntimeOverrides.searchOverrides(Name))
return Sym;
@@ -205,8 +213,8 @@ private:
public:
OrcCBindingsStack(TargetMachine &TM,
IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
- : CCMgr(createLocalCompileCallbackManager(TM.getTargetTriple(), ES, 0)),
- DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()),
+ : CCMgr(createCompileCallbackManager(TM, ES)), DL(TM.createDataLayout()),
+ IndirectStubsMgr(IndirectStubsMgrBuilder()),
ObjectLayer(ES,
[this](orc::VModuleKey K) {
auto ResolverI = Resolvers.find(K);
@@ -226,31 +234,19 @@ public:
this->notifyFreed(K, Obj);
}),
CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
- CODLayer(ES, CompileLayer,
- [this](orc::VModuleKey K) {
- auto ResolverI = Resolvers.find(K);
- assert(ResolverI != Resolvers.end() &&
- "No resolver for module K");
- return ResolverI->second;
- },
- [this](orc::VModuleKey K,
- std::shared_ptr<orc::SymbolResolver> Resolver) {
- assert(!Resolvers.count(K) && "Resolver already present");
- Resolvers[K] = std::move(Resolver);
- },
- [](Function &F) { return std::set<Function *>({&F}); },
- *this->CCMgr, std::move(IndirectStubsMgrBuilder), false),
+ CODLayer(createCODLayer(ES, CompileLayer, CCMgr.get(),
+ std::move(IndirectStubsMgrBuilder), Resolvers)),
CXXRuntimeOverrides(
[this](const std::string &S) { return mangle(S); }) {}
- LLVMOrcErrorCode shutdown() {
+ Error shutdown() {
// Run any destructors registered with __cxa_atexit.
CXXRuntimeOverrides.runDestructors();
// Run any IR destructors.
for (auto &DtorRunner : IRStaticDestructorRunners)
if (auto Err = DtorRunner.runViaLayer(*this))
- return mapError(std::move(Err));
- return LLVMOrcErrSuccess;
+ return Err;
+ return Error::success();
}
std::string mangle(StringRef Name) {
@@ -267,35 +263,28 @@ public:
return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
}
-
- LLVMOrcErrorCode
- createLazyCompileCallback(JITTargetAddress &RetAddr,
- LLVMOrcLazyCompileCallbackFn Callback,
+ Expected<JITTargetAddress>
+ createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback,
void *CallbackCtx) {
auto WrappedCallback = [=]() -> JITTargetAddress {
return Callback(wrap(this), CallbackCtx);
};
- if (auto CCAddr = CCMgr->getCompileCallback(std::move(WrappedCallback))) {
- RetAddr = *CCAddr;
- return LLVMOrcErrSuccess;
- } else
- return mapError(CCAddr.takeError());
+ return CCMgr->getCompileCallback(std::move(WrappedCallback));
}
- LLVMOrcErrorCode createIndirectStub(StringRef StubName,
- JITTargetAddress Addr) {
- return mapError(
- IndirectStubsMgr->createStub(StubName, Addr, JITSymbolFlags::Exported));
+ Error createIndirectStub(StringRef StubName, JITTargetAddress Addr) {
+ return IndirectStubsMgr->createStub(StubName, Addr,
+ JITSymbolFlags::Exported);
}
- LLVMOrcErrorCode setIndirectStubPointer(StringRef Name,
- JITTargetAddress Addr) {
- return mapError(IndirectStubsMgr->updatePointer(Name, Addr));
+ Error setIndirectStubPointer(StringRef Name, JITTargetAddress Addr) {
+ return IndirectStubsMgr->updatePointer(Name, Addr);
}
+
template <typename LayerT>
- LLVMOrcErrorCode
- addIRModule(orc::VModuleKey &RetKey, LayerT &Layer, std::unique_ptr<Module> M,
+ Expected<orc::VModuleKey>
+ addIRModule(LayerT &Layer, std::unique_ptr<Module> M,
std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
LLVMOrcSymbolResolverFn ExternalResolver,
void *ExternalResolverCtx) {
@@ -313,79 +302,84 @@ public:
DtorNames.push_back(mangle(Dtor.Func->getName()));
// Add the module to the JIT.
- RetKey = ES.allocateVModule();
- Resolvers[RetKey] = std::make_shared<CBindingsResolver>(
- *this, ExternalResolver, ExternalResolverCtx);
- if (auto Err = Layer.addModule(RetKey, std::move(M)))
- return mapError(std::move(Err));
+ auto K = ES.allocateVModule();
+ Resolvers[K] = std::make_shared<CBindingsResolver>(*this, ExternalResolver,
+ ExternalResolverCtx);
+ if (auto Err = Layer.addModule(K, std::move(M)))
+ return std::move(Err);
- KeyLayers[RetKey] = detail::createGenericLayer(Layer);
+ KeyLayers[K] = detail::createGenericLayer(Layer);
// Run the static constructors, and save the static destructor runner for
// execution when the JIT is torn down.
- orc::CtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames),
- RetKey);
+ orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), K);
if (auto Err = CtorRunner.runViaLayer(*this))
- return mapError(std::move(Err));
+ return std::move(Err);
- IRStaticDestructorRunners.emplace_back(std::move(DtorNames), RetKey);
+ IRStaticDestructorRunners.emplace_back(std::move(DtorNames), K);
- return LLVMOrcErrSuccess;
+ return K;
}
- LLVMOrcErrorCode addIRModuleEager(orc::VModuleKey &RetKey,
- std::unique_ptr<Module> M,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
- return addIRModule(RetKey, CompileLayer, std::move(M),
+ Expected<orc::VModuleKey>
+ addIRModuleEager(std::unique_ptr<Module> M,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+ return addIRModule(CompileLayer, std::move(M),
llvm::make_unique<SectionMemoryManager>(),
std::move(ExternalResolver), ExternalResolverCtx);
}
- LLVMOrcErrorCode addIRModuleLazy(orc::VModuleKey &RetKey,
- std::unique_ptr<Module> M,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
- return addIRModule(RetKey, CODLayer, std::move(M),
+ Expected<orc::VModuleKey>
+ addIRModuleLazy(std::unique_ptr<Module> M,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+ if (!CODLayer)
+ return make_error<StringError>("Can not add lazy module: No compile "
+ "callback manager available",
+ inconvertibleErrorCode());
+
+ return addIRModule(*CODLayer, std::move(M),
llvm::make_unique<SectionMemoryManager>(),
std::move(ExternalResolver), ExternalResolverCtx);
}
- LLVMOrcErrorCode removeModule(orc::VModuleKey K) {
+ Error removeModule(orc::VModuleKey K) {
// FIXME: Should error release the module key?
if (auto Err = KeyLayers[K]->removeModule(K))
- return mapError(std::move(Err));
+ return Err;
ES.releaseVModule(K);
KeyLayers.erase(K);
- return LLVMOrcErrSuccess;
+ return Error::success();
}
- LLVMOrcErrorCode addObject(orc::VModuleKey &RetKey,
- std::unique_ptr<MemoryBuffer> ObjBuffer,
- LLVMOrcSymbolResolverFn ExternalResolver,
- void *ExternalResolverCtx) {
+ Expected<orc::VModuleKey> addObject(std::unique_ptr<MemoryBuffer> ObjBuffer,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
if (auto Obj = object::ObjectFile::createObjectFile(
ObjBuffer->getMemBufferRef())) {
- RetKey = ES.allocateVModule();
- Resolvers[RetKey] = std::make_shared<CBindingsResolver>(
+ auto K = ES.allocateVModule();
+ Resolvers[K] = std::make_shared<CBindingsResolver>(
*this, ExternalResolver, ExternalResolverCtx);
- if (auto Err = ObjectLayer.addObject(RetKey, std::move(ObjBuffer)))
- return mapError(std::move(Err));
+ if (auto Err = ObjectLayer.addObject(K, std::move(ObjBuffer)))
+ return std::move(Err);
- KeyLayers[RetKey] = detail::createGenericLayer(ObjectLayer);
+ KeyLayers[K] = detail::createGenericLayer(ObjectLayer);
- return LLVMOrcErrSuccess;
+ return K;
} else
- return mapError(Obj.takeError());
+ return Obj.takeError();
}
JITSymbol findSymbol(const std::string &Name,
bool ExportedSymbolsOnly) {
if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly))
return Sym;
- return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
+ if (CODLayer)
+ return CODLayer->findSymbol(mangle(Name), ExportedSymbolsOnly);
+ return CompileLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
}
JITSymbol findSymbolIn(orc::VModuleKey K, const std::string &Name,
@@ -394,45 +388,39 @@ public:
return KeyLayers[K]->findSymbolIn(K, mangle(Name), ExportedSymbolsOnly);
}
- LLVMOrcErrorCode findSymbolAddress(JITTargetAddress &RetAddr,
- const std::string &Name,
- bool ExportedSymbolsOnly) {
- RetAddr = 0;
+ Expected<JITTargetAddress> findSymbolAddress(const std::string &Name,
+ bool ExportedSymbolsOnly) {
if (auto Sym = findSymbol(Name, ExportedSymbolsOnly)) {
// Successful lookup, non-null symbol:
- if (auto AddrOrErr = Sym.getAddress()) {
- RetAddr = *AddrOrErr;
- return LLVMOrcErrSuccess;
- } else
- return mapError(AddrOrErr.takeError());
+ if (auto AddrOrErr = Sym.getAddress())
+ return *AddrOrErr;
+ else
+ return AddrOrErr.takeError();
} else if (auto Err = Sym.takeError()) {
// Lookup failure - report error.
- return mapError(std::move(Err));
+ return std::move(Err);
}
- // Otherwise we had a successful lookup but got a null result. We already
- // set RetAddr to '0' above, so just return success.
- return LLVMOrcErrSuccess;
+
+ // No symbol not found. Return 0.
+ return 0;
}
- LLVMOrcErrorCode findSymbolAddressIn(JITTargetAddress &RetAddr,
- orc::VModuleKey K,
- const std::string &Name,
- bool ExportedSymbolsOnly) {
- RetAddr = 0;
+ Expected<JITTargetAddress> findSymbolAddressIn(orc::VModuleKey K,
+ const std::string &Name,
+ bool ExportedSymbolsOnly) {
if (auto Sym = findSymbolIn(K, Name, ExportedSymbolsOnly)) {
// Successful lookup, non-null symbol:
- if (auto AddrOrErr = Sym.getAddress()) {
- RetAddr = *AddrOrErr;
- return LLVMOrcErrSuccess;
- } else
- return mapError(AddrOrErr.takeError());
+ if (auto AddrOrErr = Sym.getAddress())
+ return *AddrOrErr;
+ else
+ return AddrOrErr.takeError();
} else if (auto Err = Sym.takeError()) {
// Lookup failure - report error.
- return mapError(std::move(Err));
+ return std::move(Err);
}
- // Otherwise we had a successful lookup but got a null result. We already
- // set RetAddr to '0' above, so just return success.
- return LLVMOrcErrSuccess;
+
+ // Symbol not found. Return 0.
+ return 0;
}
const std::string &getErrorMessage() const { return ErrMsg; }
@@ -455,17 +443,45 @@ public:
}
private:
+ using ResolverMap =
+ std::map<orc::VModuleKey, std::shared_ptr<orc::SymbolResolver>>;
+
+ static std::unique_ptr<CompileCallbackMgr>
+ createCompileCallbackManager(TargetMachine &TM, orc::ExecutionSession &ES) {
+ auto CCMgr = createLocalCompileCallbackManager(TM.getTargetTriple(), ES, 0);
+ if (!CCMgr) {
+ // FIXME: It would be good if we could report this somewhere, but we do
+ // have an instance yet.
+ logAllUnhandledErrors(CCMgr.takeError(), errs(), "ORC error: ");
+ return nullptr;
+ }
+ return std::move(*CCMgr);
+ }
- LLVMOrcErrorCode mapError(Error Err) {
- LLVMOrcErrorCode Result = LLVMOrcErrSuccess;
- handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
- // Handler of last resort.
- Result = LLVMOrcErrGeneric;
- ErrMsg = "";
- raw_string_ostream ErrStream(ErrMsg);
- EIB.log(ErrStream);
- });
- return Result;
+ static std::unique_ptr<CODLayerT>
+ createCODLayer(orc::ExecutionSession &ES, CompileLayerT &CompileLayer,
+ CompileCallbackMgr *CCMgr,
+ IndirectStubsManagerBuilder IndirectStubsMgrBuilder,
+ ResolverMap &Resolvers) {
+ // If there is no compile callback manager available we can not create a
+ // compile on demand layer.
+ if (!CCMgr)
+ return nullptr;
+
+ return llvm::make_unique<CODLayerT>(
+ ES, CompileLayer,
+ [&Resolvers](orc::VModuleKey K) {
+ auto ResolverI = Resolvers.find(K);
+ assert(ResolverI != Resolvers.end() && "No resolver for module K");
+ return ResolverI->second;
+ },
+ [&Resolvers](orc::VModuleKey K,
+ std::shared_ptr<orc::SymbolResolver> Resolver) {
+ assert(!Resolvers.count(K) && "Resolver already present");
+ Resolvers[K] = std::move(Resolver);
+ },
+ [](Function &F) { return std::set<Function *>({&F}); }, *CCMgr,
+ std::move(IndirectStubsMgrBuilder), false);
}
void reportError(Error Err) {
@@ -476,13 +492,17 @@ private:
void notifyFinalized(orc::VModuleKey K,
const object::ObjectFile &Obj,
const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
+ uint64_t Key = static_cast<uint64_t>(
+ reinterpret_cast<uintptr_t>(Obj.getData().data()));
for (auto &Listener : EventListeners)
- Listener->NotifyObjectEmitted(Obj, LoadedObjInfo);
+ Listener->notifyObjectLoaded(Key, Obj, LoadedObjInfo);
}
void notifyFreed(orc::VModuleKey K, const object::ObjectFile &Obj) {
+ uint64_t Key = static_cast<uint64_t>(
+ reinterpret_cast<uintptr_t>(Obj.getData().data()));
for (auto &Listener : EventListeners)
- Listener->NotifyFreeingObject(Obj);
+ Listener->notifyFreeingObject(Key);
}
orc::ExecutionSession ES;
@@ -497,15 +517,15 @@ private:
ObjLayerT ObjectLayer;
CompileLayerT CompileLayer;
- CODLayerT CODLayer;
+ std::unique_ptr<CODLayerT> CODLayer;
std::map<orc::VModuleKey, std::unique_ptr<detail::GenericLayer>> KeyLayers;
- orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
- std::vector<orc::CtorDtorRunner<OrcCBindingsStack>> IRStaticDestructorRunners;
+ orc::LegacyLocalCXXRuntimeOverrides CXXRuntimeOverrides;
+ std::vector<orc::LegacyCtorDtorRunner<OrcCBindingsStack>> IRStaticDestructorRunners;
std::string ErrMsg;
- std::map<orc::VModuleKey, std::shared_ptr<orc::SymbolResolver>> Resolvers;
+ ResolverMap Resolvers;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
index 4def579e7097..617bc2fc64b5 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
@@ -128,7 +128,7 @@ void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) {
auto &CtorDtorsMap = isDtors ? UnexecutedDestructors : UnexecutedConstructors;
for (auto &KV : CtorDtorsMap)
- cantFail(CtorDtorRunner<LazyEmitLayerT>(std::move(KV.second), KV.first)
+ cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(std::move(KV.second), KV.first)
.runViaLayer(LazyEmitLayer));
CtorDtorsMap.clear();
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index abe89ce70af9..36e7e83a8bab 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -144,26 +144,29 @@ class OrcMCJITReplacement : public ExecutionEngine {
public:
LinkingORCResolver(OrcMCJITReplacement &M) : M(M) {}
- SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) override {
- SymbolFlagsMap SymbolFlags;
+ SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) override {
+ SymbolNameSet Result;
for (auto &S : Symbols) {
if (auto Sym = M.findMangledSymbol(*S)) {
- SymbolFlags[S] = Sym.getFlags();
+ if (!Sym.getFlags().isStrong())
+ Result.insert(S);
} else if (auto Err = Sym.takeError()) {
M.reportError(std::move(Err));
- return SymbolFlagsMap();
+ return SymbolNameSet();
} else {
if (auto Sym2 = M.ClientResolver->findSymbolInLogicalDylib(*S)) {
- SymbolFlags[S] = Sym2.getFlags();
+ if (!Sym2.getFlags().isStrong())
+ Result.insert(S);
} else if (auto Err = Sym2.takeError()) {
M.reportError(std::move(Err));
- return SymbolFlagsMap();
- }
+ return SymbolNameSet();
+ } else
+ Result.insert(S);
}
}
- return SymbolFlags;
+ return Result;
}
SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
@@ -272,14 +275,14 @@ public:
{
unsigned CtorId = 0, DtorId = 0;
for (auto Ctor : orc::getConstructors(*M)) {
- std::string NewCtorName = ("$static_ctor." + Twine(CtorId++)).str();
+ std::string NewCtorName = ("__ORCstatic_ctor." + Twine(CtorId++)).str();
Ctor.Func->setName(NewCtorName);
Ctor.Func->setLinkage(GlobalValue::ExternalLinkage);
Ctor.Func->setVisibility(GlobalValue::HiddenVisibility);
CtorNames.push_back(mangle(NewCtorName));
}
for (auto Dtor : orc::getDestructors(*M)) {
- std::string NewDtorName = ("$static_dtor." + Twine(DtorId++)).str();
+ std::string NewDtorName = ("__ORCstatic_dtor." + Twine(DtorId++)).str();
dbgs() << "Found dtor: " << NewDtorName << "\n";
Dtor.Func->setName(NewDtorName);
Dtor.Func->setLinkage(GlobalValue::ExternalLinkage);
@@ -458,8 +461,8 @@ private:
return MangledName;
}
- using ObjectLayerT = RTDyldObjectLinkingLayer;
- using CompileLayerT = IRCompileLayer<ObjectLayerT, orc::SimpleCompiler>;
+ using ObjectLayerT = LegacyRTDyldObjectLinkingLayer;
+ using CompileLayerT = LegacyIRCompileLayer<ObjectLayerT, orc::SimpleCompiler>;
using LazyEmitLayerT = LazyEmittingLayer<CompileLayerT>;
ExecutionSession ES;
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 71b4b73ca6d3..299d76183cd4 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -14,57 +14,56 @@ namespace {
using namespace llvm;
using namespace llvm::orc;
-class VSOSearchOrderResolver : public JITSymbolResolver {
+class JITDylibSearchOrderResolver : public JITSymbolResolver {
public:
- VSOSearchOrderResolver(MaterializationResponsibility &MR) : MR(MR) {}
+ JITDylibSearchOrderResolver(MaterializationResponsibility &MR) : MR(MR) {}
- Expected<LookupResult> lookup(const LookupSet &Symbols) {
- auto &ES = MR.getTargetVSO().getExecutionSession();
+ void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) {
+ auto &ES = MR.getTargetJITDylib().getExecutionSession();
SymbolNameSet InternedSymbols;
+ // Intern the requested symbols: lookup takes interned strings.
for (auto &S : Symbols)
- InternedSymbols.insert(ES.getSymbolStringPool().intern(S));
-
+ InternedSymbols.insert(ES.intern(S));
+
+ // Build an OnResolve callback to unwrap the interned strings and pass them
+ // to the OnResolved callback.
+ // FIXME: Switch to move capture of OnResolved once we have c++14.
+ auto OnResolvedWithUnwrap =
+ [OnResolved](Expected<SymbolMap> InternedResult) {
+ if (!InternedResult) {
+ OnResolved(InternedResult.takeError());
+ return;
+ }
+
+ LookupResult Result;
+ for (auto &KV : *InternedResult)
+ Result[*KV.first] = std::move(KV.second);
+ OnResolved(Result);
+ };
+
+ // We're not waiting for symbols to be ready. Just log any errors.
+ auto OnReady = [&ES](Error Err) { ES.reportError(std::move(Err)); };
+
+ // Register dependencies for all symbols contained in this set.
auto RegisterDependencies = [&](const SymbolDependenceMap &Deps) {
MR.addDependenciesForAll(Deps);
};
- auto InternedResult =
- MR.getTargetVSO().withSearchOrderDo([&](const VSOList &VSOs) {
- return ES.lookup(VSOs, InternedSymbols, RegisterDependencies, false);
- });
-
- if (!InternedResult)
- return InternedResult.takeError();
-
- LookupResult Result;
- for (auto &KV : *InternedResult)
- Result[*KV.first] = std::move(KV.second);
-
- return Result;
+ JITDylibSearchList SearchOrder;
+ MR.getTargetJITDylib().withSearchOrderDo(
+ [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
+ ES.lookup(SearchOrder, InternedSymbols, OnResolvedWithUnwrap, OnReady,
+ RegisterDependencies);
}
- Expected<LookupFlagsResult> lookupFlags(const LookupSet &Symbols) {
- auto &ES = MR.getTargetVSO().getExecutionSession();
-
- SymbolNameSet InternedSymbols;
-
- for (auto &S : Symbols)
- InternedSymbols.insert(ES.getSymbolStringPool().intern(S));
-
- SymbolFlagsMap InternedResult;
- MR.getTargetVSO().withSearchOrderDo([&](const VSOList &VSOs) {
- // An empty search order is pathalogical, but allowed.
- if (VSOs.empty())
- return;
+ Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) {
+ LookupSet Result;
- assert(VSOs.front() && "VSOList entry can not be null");
- InternedResult = VSOs.front()->lookupFlags(InternedSymbols);
- });
-
- LookupFlagsResult Result;
- for (auto &KV : InternedResult)
- Result[*KV.first] = std::move(KV.second);
+ for (auto &KV : MR.getSymbols()) {
+ if (Symbols.count(*KV.first))
+ Result.insert(*KV.first);
+ }
return Result;
}
@@ -78,52 +77,41 @@ private:
namespace llvm {
namespace orc {
-RTDyldObjectLinkingLayer2::RTDyldObjectLinkingLayer2(
+RTDyldObjectLinkingLayer::RTDyldObjectLinkingLayer(
ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager,
- NotifyLoadedFunction NotifyLoaded, NotifyFinalizedFunction NotifyFinalized)
+ NotifyLoadedFunction NotifyLoaded, NotifyEmittedFunction NotifyEmitted)
: ObjectLayer(ES), GetMemoryManager(GetMemoryManager),
NotifyLoaded(std::move(NotifyLoaded)),
- NotifyFinalized(std::move(NotifyFinalized)), ProcessAllSections(false) {}
+ NotifyEmitted(std::move(NotifyEmitted)) {}
-void RTDyldObjectLinkingLayer2::emit(MaterializationResponsibility R,
- VModuleKey K,
- std::unique_ptr<MemoryBuffer> O) {
+void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
+ std::unique_ptr<MemoryBuffer> O) {
assert(O && "Object must not be null");
- auto &ES = getExecutionSession();
-
- auto ObjFile = object::ObjectFile::createObjectFile(*O);
- if (!ObjFile) {
- getExecutionSession().reportError(ObjFile.takeError());
- R.failMaterialization();
- }
-
- auto MemoryManager = GetMemoryManager(K);
-
- VSOSearchOrderResolver Resolver(R);
- auto RTDyld = llvm::make_unique<RuntimeDyld>(*MemoryManager, Resolver);
- RTDyld->setProcessAllSections(ProcessAllSections);
+ // This method launches an asynchronous link step that will fulfill our
+ // materialization responsibility. We need to switch R to be heap
+ // allocated before that happens so it can live as long as the asynchronous
+ // link needs it to (i.e. it must be able to outlive this method).
+ auto SharedR = std::make_shared<MaterializationResponsibility>(std::move(R));
- {
- std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
+ auto &ES = getExecutionSession();
- assert(!ActiveRTDylds.count(K) &&
- "An active RTDyld already exists for this key?");
- ActiveRTDylds[K] = RTDyld.get();
+ auto Obj = object::ObjectFile::createObjectFile(*O);
- assert(!MemMgrs.count(K) &&
- "A memory manager already exists for this key?");
- MemMgrs[K] = std::move(MemoryManager);
+ if (!Obj) {
+ getExecutionSession().reportError(Obj.takeError());
+ SharedR->failMaterialization();
+ return;
}
- auto Info = RTDyld->loadObject(**ObjFile);
-
+ // Collect the internal symbols from the object file: We will need to
+ // filter these later.
+ auto InternalSymbols = std::make_shared<std::set<StringRef>>();
{
- std::set<StringRef> InternalSymbols;
- for (auto &Sym : (*ObjFile)->symbols()) {
+ for (auto &Sym : (*Obj)->symbols()) {
if (!(Sym.getFlags() & object::BasicSymbolRef::SF_Global)) {
if (auto SymName = Sym.getName())
- InternalSymbols.insert(*SymName);
+ InternalSymbols->insert(*SymName);
else {
ES.reportError(SymName.takeError());
R.failMaterialization();
@@ -131,46 +119,97 @@ void RTDyldObjectLinkingLayer2::emit(MaterializationResponsibility R,
}
}
}
-
- SymbolMap Symbols;
- for (auto &KV : RTDyld->getSymbolTable())
- if (!InternalSymbols.count(KV.first))
- Symbols[ES.getSymbolStringPool().intern(KV.first)] = KV.second;
-
- R.resolve(Symbols);
}
- if (NotifyLoaded)
- NotifyLoaded(K, **ObjFile, *Info);
-
- RTDyld->finalizeWithMemoryManagerLocking();
+ auto K = R.getVModuleKey();
+ RuntimeDyld::MemoryManager *MemMgr = nullptr;
+ // Create a record a memory manager for this object.
{
+ auto Tmp = GetMemoryManager();
std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
- ActiveRTDylds.erase(K);
+ MemMgrs.push_back(std::move(Tmp));
+ MemMgr = MemMgrs.back().get();
}
- if (RTDyld->hasError()) {
- ES.reportError(make_error<StringError>(RTDyld->getErrorString(),
- inconvertibleErrorCode()));
- R.failMaterialization();
- return;
+ JITDylibSearchOrderResolver Resolver(*SharedR);
+
+ /* Thoughts on proper cross-dylib weak symbol handling:
+ *
+ * Change selection of canonical defs to be a manually triggered process, and
+ * add a 'canonical' bit to symbol definitions. When canonical def selection
+ * is triggered, sweep the JITDylibs to mark defs as canonical, discard
+ * duplicate defs.
+ */
+ jitLinkForORC(
+ **Obj, std::move(O), *MemMgr, Resolver, ProcessAllSections,
+ [this, K, SharedR, &Obj, InternalSymbols](
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+ std::map<StringRef, JITEvaluatedSymbol> ResolvedSymbols) {
+ return onObjLoad(K, *SharedR, **Obj, std::move(LoadedObjInfo),
+ ResolvedSymbols, *InternalSymbols);
+ },
+ [this, K, SharedR](Error Err) {
+ onObjEmit(K, *SharedR, std::move(Err));
+ });
+}
+
+Error RTDyldObjectLinkingLayer::onObjLoad(
+ VModuleKey K, MaterializationResponsibility &R, object::ObjectFile &Obj,
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+ std::map<StringRef, JITEvaluatedSymbol> Resolved,
+ std::set<StringRef> &InternalSymbols) {
+ SymbolFlagsMap ExtraSymbolsToClaim;
+ SymbolMap Symbols;
+ for (auto &KV : Resolved) {
+ // Scan the symbols and add them to the Symbols map for resolution.
+
+ // We never claim internal symbols.
+ if (InternalSymbols.count(KV.first))
+ continue;
+
+ auto InternedName = getExecutionSession().intern(KV.first);
+ auto Flags = KV.second.getFlags();
+
+ // Override object flags and claim responsibility for symbols if
+ // requested.
+ if (OverrideObjectFlags || AutoClaimObjectSymbols) {
+ auto I = R.getSymbols().find(InternedName);
+
+ if (OverrideObjectFlags && I != R.getSymbols().end())
+ Flags = JITSymbolFlags::stripTransientFlags(I->second);
+ else if (AutoClaimObjectSymbols && I == R.getSymbols().end())
+ ExtraSymbolsToClaim[InternedName] = Flags;
+ }
+
+ Symbols[InternedName] = JITEvaluatedSymbol(KV.second.getAddress(), Flags);
}
- R.finalize();
+ if (!ExtraSymbolsToClaim.empty())
+ if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim))
+ return Err;
+
+ R.resolve(Symbols);
+
+ if (NotifyLoaded)
+ NotifyLoaded(K, Obj, *LoadedObjInfo);
- if (NotifyFinalized)
- NotifyFinalized(K);
+ return Error::success();
}
-void RTDyldObjectLinkingLayer2::mapSectionAddress(
- VModuleKey K, const void *LocalAddress, JITTargetAddress TargetAddr) const {
- std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
- auto ActiveRTDyldItr = ActiveRTDylds.find(K);
+void RTDyldObjectLinkingLayer::onObjEmit(VModuleKey K,
+ MaterializationResponsibility &R,
+ Error Err) {
+ if (Err) {
+ getExecutionSession().reportError(std::move(Err));
+ R.failMaterialization();
+ return;
+ }
+
+ R.emit();
- assert(ActiveRTDyldItr != ActiveRTDylds.end() &&
- "No active RTDyld instance found for key");
- ActiveRTDyldItr->second->mapSectionAddress(LocalAddress, TargetAddr);
+ if (NotifyEmitted)
+ NotifyEmitted(K);
}
} // End namespace orc.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
new file mode 100644
index 000000000000..9525b168fbd3
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
@@ -0,0 +1,65 @@
+//===-- ThreadSafeModule.cpp - Thread safe Module, Context, and Utilities
+//h-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+namespace llvm {
+namespace orc {
+
+ThreadSafeModule cloneToNewContext(ThreadSafeModule &TSM,
+ GVPredicate ShouldCloneDef,
+ GVModifier UpdateClonedDefSource) {
+ assert(TSM && "Can not clone null module");
+
+ if (!ShouldCloneDef)
+ ShouldCloneDef = [](const GlobalValue &) { return true; };
+
+ auto Lock = TSM.getContextLock();
+
+ SmallVector<char, 1> ClonedModuleBuffer;
+
+ {
+ std::set<GlobalValue *> ClonedDefsInSrc;
+ ValueToValueMapTy VMap;
+ auto Tmp = CloneModule(*TSM.getModule(), VMap, [&](const GlobalValue *GV) {
+ if (ShouldCloneDef(*GV)) {
+ ClonedDefsInSrc.insert(const_cast<GlobalValue *>(GV));
+ return true;
+ }
+ return false;
+ });
+
+ if (UpdateClonedDefSource)
+ for (auto *GV : ClonedDefsInSrc)
+ UpdateClonedDefSource(*GV);
+
+ BitcodeWriter BCWriter(ClonedModuleBuffer);
+
+ BCWriter.writeModule(*Tmp);
+ BCWriter.writeSymtab();
+ BCWriter.writeStrtab();
+ }
+
+ MemoryBufferRef ClonedModuleBufferRef(
+ StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()),
+ "cloned module buffer");
+ ThreadSafeContext NewTSCtx(llvm::make_unique<LLVMContext>());
+
+ auto ClonedModule =
+ cantFail(parseBitcodeFile(ClonedModuleBufferRef, *NewTSCtx.getContext()));
+ ClonedModule->setModuleIdentifier(TSM.getModule()->getName());
+ return ThreadSafeModule(std::move(ClonedModule), std::move(NewTSCtx));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index 7bf8120d23df..f195d0282998 100644
--- a/contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -66,9 +66,9 @@ public:
CloseMarker();
}
- void NotifyObjectEmitted(const ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &L) override;
- void NotifyFreeingObject(const ObjectFile &Obj) override;
+ void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) override;
+ void notifyFreeingObject(ObjectKey K) override;
private:
bool InitDebuggingDir();
@@ -227,8 +227,9 @@ PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
SuccessfullyInitialized = true;
}
-void PerfJITEventListener::NotifyObjectEmitted(
- const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
+void PerfJITEventListener::notifyObjectLoaded(
+ ObjectKey K, const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) {
if (!SuccessfullyInitialized)
return;
@@ -280,7 +281,7 @@ void PerfJITEventListener::NotifyObjectEmitted(
Dumpstream->flush();
}
-void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+void PerfJITEventListener::notifyFreeingObject(ObjectKey K) {
// perf currently doesn't have an interface for unloading. But munmap()ing the
// code section does, so that's ok.
}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
index 18eb0e461921..0553c217c2a2 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Object/ObjectFile.h"
using namespace llvm;
@@ -25,11 +27,18 @@ JITSymbolFlags llvm::JITSymbolFlags::fromGlobalValue(const GlobalValue &GV) {
Flags |= JITSymbolFlags::Common;
if (!GV.hasLocalLinkage() && !GV.hasHiddenVisibility())
Flags |= JITSymbolFlags::Exported;
+
+ if (isa<Function>(GV))
+ Flags |= JITSymbolFlags::Callable;
+ else if (isa<GlobalAlias>(GV) &&
+ isa<Function>(cast<GlobalAlias>(GV).getAliasee()))
+ Flags |= JITSymbolFlags::Callable;
+
return Flags;
}
-JITSymbolFlags
-llvm::JITSymbolFlags::fromObjectSymbol(const object::BasicSymbolRef &Symbol) {
+Expected<JITSymbolFlags>
+llvm::JITSymbolFlags::fromObjectSymbol(const object::SymbolRef &Symbol) {
JITSymbolFlags Flags = JITSymbolFlags::None;
if (Symbol.getFlags() & object::BasicSymbolRef::SF_Weak)
Flags |= JITSymbolFlags::Weak;
@@ -37,11 +46,19 @@ llvm::JITSymbolFlags::fromObjectSymbol(const object::BasicSymbolRef &Symbol) {
Flags |= JITSymbolFlags::Common;
if (Symbol.getFlags() & object::BasicSymbolRef::SF_Exported)
Flags |= JITSymbolFlags::Exported;
+
+ auto SymbolType = Symbol.getType();
+ if (!SymbolType)
+ return SymbolType.takeError();
+
+ if (*SymbolType & object::SymbolRef::ST_Function)
+ Flags |= JITSymbolFlags::Callable;
+
return Flags;
}
-ARMJITSymbolFlags llvm::ARMJITSymbolFlags::fromObjectSymbol(
- const object::BasicSymbolRef &Symbol) {
+ARMJITSymbolFlags
+llvm::ARMJITSymbolFlags::fromObjectSymbol(const object::SymbolRef &Symbol) {
ARMJITSymbolFlags Flags;
if (Symbol.getFlags() & object::BasicSymbolRef::SF_Thumb)
Flags |= ARMJITSymbolFlags::Thumb;
@@ -51,48 +68,64 @@ ARMJITSymbolFlags llvm::ARMJITSymbolFlags::fromObjectSymbol(
/// Performs lookup by, for each symbol, first calling
/// findSymbolInLogicalDylib and if that fails calling
/// findSymbol.
-Expected<JITSymbolResolver::LookupResult>
-LegacyJITSymbolResolver::lookup(const LookupSet &Symbols) {
+void LegacyJITSymbolResolver::lookup(const LookupSet &Symbols,
+ OnResolvedFunction OnResolved) {
JITSymbolResolver::LookupResult Result;
for (auto &Symbol : Symbols) {
std::string SymName = Symbol.str();
if (auto Sym = findSymbolInLogicalDylib(SymName)) {
if (auto AddrOrErr = Sym.getAddress())
Result[Symbol] = JITEvaluatedSymbol(*AddrOrErr, Sym.getFlags());
- else
- return AddrOrErr.takeError();
- } else if (auto Err = Sym.takeError())
- return std::move(Err);
- else {
+ else {
+ OnResolved(AddrOrErr.takeError());
+ return;
+ }
+ } else if (auto Err = Sym.takeError()) {
+ OnResolved(std::move(Err));
+ return;
+ } else {
// findSymbolInLogicalDylib failed. Lets try findSymbol.
if (auto Sym = findSymbol(SymName)) {
if (auto AddrOrErr = Sym.getAddress())
Result[Symbol] = JITEvaluatedSymbol(*AddrOrErr, Sym.getFlags());
- else
- return AddrOrErr.takeError();
- } else if (auto Err = Sym.takeError())
- return std::move(Err);
- else
- return make_error<StringError>("Symbol not found: " + Symbol,
- inconvertibleErrorCode());
+ else {
+ OnResolved(AddrOrErr.takeError());
+ return;
+ }
+ } else if (auto Err = Sym.takeError()) {
+ OnResolved(std::move(Err));
+ return;
+ } else {
+ OnResolved(make_error<StringError>("Symbol not found: " + Symbol,
+ inconvertibleErrorCode()));
+ return;
+ }
}
}
- return std::move(Result);
+ OnResolved(std::move(Result));
}
/// Performs flags lookup by calling findSymbolInLogicalDylib and
/// returning the flags value for that symbol.
-Expected<JITSymbolResolver::LookupFlagsResult>
-LegacyJITSymbolResolver::lookupFlags(const LookupSet &Symbols) {
- JITSymbolResolver::LookupFlagsResult Result;
+Expected<JITSymbolResolver::LookupSet>
+LegacyJITSymbolResolver::getResponsibilitySet(const LookupSet &Symbols) {
+ JITSymbolResolver::LookupSet Result;
for (auto &Symbol : Symbols) {
std::string SymName = Symbol.str();
- if (auto Sym = findSymbolInLogicalDylib(SymName))
- Result[Symbol] = Sym.getFlags();
- else if (auto Err = Sym.takeError())
+ if (auto Sym = findSymbolInLogicalDylib(SymName)) {
+ // If there's an existing def but it is not strong, then the caller is
+ // responsible for it.
+ if (!Sym.getFlags().isStrong())
+ Result.insert(Symbol);
+ } else if (auto Err = Sym.takeError())
return std::move(Err);
+ else {
+ // If there is no existing definition then the caller is responsible for
+ // it.
+ Result.insert(Symbol);
+ }
}
return std::move(Result);
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 76f5e5ead504..53cb782c55c4 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -19,10 +19,13 @@
#include "RuntimeDyldMachO.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MutexGuard.h"
+#include <future>
+
using namespace llvm;
using namespace llvm::object;
@@ -131,6 +134,14 @@ void RuntimeDyldImpl::resolveRelocations() {
ErrorStr = toString(std::move(Err));
}
+ resolveLocalRelocations();
+
+ // Print out sections after relocation.
+ LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i)
+ dumpSectionMemory(Sections[i], "after relocations"););
+}
+
+void RuntimeDyldImpl::resolveLocalRelocations() {
// Iterate over all outstanding relocations
for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) {
// The Section here (Sections[i]) refers to the section in which the
@@ -143,10 +154,6 @@ void RuntimeDyldImpl::resolveRelocations() {
resolveRelocationList(it->second, Addr);
}
Relocations.clear();
-
- // Print out sections after relocation.
- LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i)
- dumpSectionMemory(Sections[i], "after relocations"););
}
void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
@@ -204,7 +211,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
// First, collect all weak and common symbols. We need to know if stronger
// definitions occur elsewhere.
- JITSymbolResolver::LookupFlagsResult SymbolFlags;
+ JITSymbolResolver::LookupSet ResponsibilitySet;
{
JITSymbolResolver::LookupSet Symbols;
for (auto &Sym : Obj.symbols()) {
@@ -218,10 +225,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
}
}
- if (auto FlagsResultOrErr = Resolver.lookupFlags(Symbols))
- SymbolFlags = std::move(*FlagsResultOrErr);
+ if (auto ResultOrErr = Resolver.getResponsibilitySet(Symbols))
+ ResponsibilitySet = std::move(*ResultOrErr);
else
- return FlagsResultOrErr.takeError();
+ return ResultOrErr.takeError();
}
// Parse symbols
@@ -249,37 +256,36 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
return NameOrErr.takeError();
// Compute JIT symbol flags.
- JITSymbolFlags JITSymFlags = getJITSymbolFlags(*I);
+ auto JITSymFlags = getJITSymbolFlags(*I);
+ if (!JITSymFlags)
+ return JITSymFlags.takeError();
// If this is a weak definition, check to see if there's a strong one.
// If there is, skip this symbol (we won't be providing it: the strong
// definition will). If there's no strong definition, make this definition
// strong.
- if (JITSymFlags.isWeak() || JITSymFlags.isCommon()) {
+ if (JITSymFlags->isWeak() || JITSymFlags->isCommon()) {
// First check whether there's already a definition in this instance.
- // FIXME: Override existing weak definitions with strong ones.
if (GlobalSymbolTable.count(Name))
continue;
- // Then check whether we found flags for an existing symbol during the
- // flags lookup earlier.
- auto FlagsI = SymbolFlags.find(Name);
- if (FlagsI == SymbolFlags.end() ||
- (JITSymFlags.isWeak() && !FlagsI->second.isStrong()) ||
- (JITSymFlags.isCommon() && FlagsI->second.isCommon())) {
- if (JITSymFlags.isWeak())
- JITSymFlags &= ~JITSymbolFlags::Weak;
- if (JITSymFlags.isCommon()) {
- JITSymFlags &= ~JITSymbolFlags::Common;
- uint32_t Align = I->getAlignment();
- uint64_t Size = I->getCommonSize();
- if (!CommonAlign)
- CommonAlign = Align;
- CommonSize = alignTo(CommonSize, Align) + Size;
- CommonSymbolsToAllocate.push_back(*I);
- }
- } else
+ // If we're not responsible for this symbol, skip it.
+ if (!ResponsibilitySet.count(Name))
continue;
+
+ // Otherwise update the flags on the symbol to make this definition
+ // strong.
+ if (JITSymFlags->isWeak())
+ *JITSymFlags &= ~JITSymbolFlags::Weak;
+ if (JITSymFlags->isCommon()) {
+ *JITSymFlags &= ~JITSymbolFlags::Common;
+ uint32_t Align = I->getAlignment();
+ uint64_t Size = I->getCommonSize();
+ if (!CommonAlign)
+ CommonAlign = Align;
+ CommonSize = alignTo(CommonSize, Align) + Size;
+ CommonSymbolsToAllocate.push_back(*I);
+ }
}
if (Flags & SymbolRef::SF_Absolute &&
@@ -296,7 +302,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
<< " SID: " << SectionID
<< " Offset: " << format("%p", (uintptr_t)Addr)
<< " flags: " << Flags << "\n");
- GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, Addr, JITSymFlags);
+ GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, Addr, *JITSymFlags);
} else if (SymType == object::SymbolRef::ST_Function ||
SymType == object::SymbolRef::ST_Data ||
SymType == object::SymbolRef::ST_Unknown ||
@@ -329,7 +335,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
<< " Offset: " << format("%p", (uintptr_t)SectOffset)
<< " flags: " << Flags << "\n");
GlobalSymbolTable[Name] =
- SymbolTableEntry(SectionID, SectOffset, JITSymFlags);
+ SymbolTableEntry(SectionID, SectOffset, *JITSymFlags);
}
}
@@ -642,7 +648,8 @@ void RuntimeDyldImpl::writeBytesUnaligned(uint64_t Value, uint8_t *Dst,
}
}
-JITSymbolFlags RuntimeDyldImpl::getJITSymbolFlags(const BasicSymbolRef &SR) {
+Expected<JITSymbolFlags>
+RuntimeDyldImpl::getJITSymbolFlags(const SymbolRef &SR) {
return JITSymbolFlags::fromObjectSymbol(SR);
}
@@ -683,11 +690,15 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
Addr += AlignOffset;
Offset += AlignOffset;
}
- JITSymbolFlags JITSymFlags = getJITSymbolFlags(Sym);
+ auto JITSymFlags = getJITSymbolFlags(Sym);
+
+ if (!JITSymFlags)
+ return JITSymFlags.takeError();
+
LLVM_DEBUG(dbgs() << "Allocating common symbol " << Name << " address "
<< format("%p", Addr) << "\n");
GlobalSymbolTable[Name] =
- SymbolTableEntry(SectionID, Offset, JITSymFlags);
+ SymbolTableEntry(SectionID, Offset, std::move(*JITSymFlags));
Offset += Size;
Addr += Size;
}
@@ -992,42 +1003,8 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
}
}
-Error RuntimeDyldImpl::resolveExternalSymbols() {
- StringMap<JITEvaluatedSymbol> ExternalSymbolMap;
-
- // Resolution can trigger emission of more symbols, so iterate until
- // we've resolved *everything*.
- {
- JITSymbolResolver::LookupSet ResolvedSymbols;
-
- while (true) {
- JITSymbolResolver::LookupSet NewSymbols;
-
- for (auto &RelocKV : ExternalSymbolRelocations) {
- StringRef Name = RelocKV.first();
- if (!Name.empty() && !GlobalSymbolTable.count(Name) &&
- !ResolvedSymbols.count(Name))
- NewSymbols.insert(Name);
- }
-
- if (NewSymbols.empty())
- break;
-
- auto NewResolverResults = Resolver.lookup(NewSymbols);
- if (!NewResolverResults)
- return NewResolverResults.takeError();
-
- assert(NewResolverResults->size() == NewSymbols.size() &&
- "Should have errored on unresolved symbols");
-
- for (auto &RRKV : *NewResolverResults) {
- assert(!ResolvedSymbols.count(RRKV.first) && "Redundant resolution?");
- ExternalSymbolMap.insert(RRKV);
- ResolvedSymbols.insert(RRKV.first);
- }
- }
- }
-
+void RuntimeDyldImpl::applyExternalSymbolRelocations(
+ const StringMap<JITEvaluatedSymbol> ExternalSymbolMap) {
while (!ExternalSymbolRelocations.empty()) {
StringMap<RelocationList>::iterator i = ExternalSymbolRelocations.begin();
@@ -1089,10 +1066,114 @@ Error RuntimeDyldImpl::resolveExternalSymbols() {
ExternalSymbolRelocations.erase(i);
}
+}
+
+Error RuntimeDyldImpl::resolveExternalSymbols() {
+ StringMap<JITEvaluatedSymbol> ExternalSymbolMap;
+
+ // Resolution can trigger emission of more symbols, so iterate until
+ // we've resolved *everything*.
+ {
+ JITSymbolResolver::LookupSet ResolvedSymbols;
+
+ while (true) {
+ JITSymbolResolver::LookupSet NewSymbols;
+
+ for (auto &RelocKV : ExternalSymbolRelocations) {
+ StringRef Name = RelocKV.first();
+ if (!Name.empty() && !GlobalSymbolTable.count(Name) &&
+ !ResolvedSymbols.count(Name))
+ NewSymbols.insert(Name);
+ }
+
+ if (NewSymbols.empty())
+ break;
+
+#ifdef _MSC_VER
+ using ExpectedLookupResult =
+ MSVCPExpected<JITSymbolResolver::LookupResult>;
+#else
+ using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
+#endif
+
+ auto NewSymbolsP = std::make_shared<std::promise<ExpectedLookupResult>>();
+ auto NewSymbolsF = NewSymbolsP->get_future();
+ Resolver.lookup(NewSymbols,
+ [=](Expected<JITSymbolResolver::LookupResult> Result) {
+ NewSymbolsP->set_value(std::move(Result));
+ });
+
+ auto NewResolverResults = NewSymbolsF.get();
+
+ if (!NewResolverResults)
+ return NewResolverResults.takeError();
+
+ assert(NewResolverResults->size() == NewSymbols.size() &&
+ "Should have errored on unresolved symbols");
+
+ for (auto &RRKV : *NewResolverResults) {
+ assert(!ResolvedSymbols.count(RRKV.first) && "Redundant resolution?");
+ ExternalSymbolMap.insert(RRKV);
+ ResolvedSymbols.insert(RRKV.first);
+ }
+ }
+ }
+
+ applyExternalSymbolRelocations(ExternalSymbolMap);
return Error::success();
}
+void RuntimeDyldImpl::finalizeAsync(
+ std::unique_ptr<RuntimeDyldImpl> This, std::function<void(Error)> OnEmitted,
+ std::unique_ptr<MemoryBuffer> UnderlyingBuffer) {
+
+ // FIXME: Move-capture OnRelocsApplied and UnderlyingBuffer once we have
+ // c++14.
+ auto SharedUnderlyingBuffer =
+ std::shared_ptr<MemoryBuffer>(std::move(UnderlyingBuffer));
+ auto SharedThis = std::shared_ptr<RuntimeDyldImpl>(std::move(This));
+ auto PostResolveContinuation =
+ [SharedThis, OnEmitted, SharedUnderlyingBuffer](
+ Expected<JITSymbolResolver::LookupResult> Result) {
+ if (!Result) {
+ OnEmitted(Result.takeError());
+ return;
+ }
+
+ /// Copy the result into a StringMap, where the keys are held by value.
+ StringMap<JITEvaluatedSymbol> Resolved;
+ for (auto &KV : *Result)
+ Resolved[KV.first] = KV.second;
+
+ SharedThis->applyExternalSymbolRelocations(Resolved);
+ SharedThis->resolveLocalRelocations();
+ SharedThis->registerEHFrames();
+ std::string ErrMsg;
+ if (SharedThis->MemMgr.finalizeMemory(&ErrMsg))
+ OnEmitted(make_error<StringError>(std::move(ErrMsg),
+ inconvertibleErrorCode()));
+ else
+ OnEmitted(Error::success());
+ };
+
+ JITSymbolResolver::LookupSet Symbols;
+
+ for (auto &RelocKV : SharedThis->ExternalSymbolRelocations) {
+ StringRef Name = RelocKV.first();
+ assert(!Name.empty() && "Symbol has no name?");
+ assert(!SharedThis->GlobalSymbolTable.count(Name) &&
+ "Name already processed. RuntimeDyld instances can not be re-used "
+ "when finalizing with finalizeAsync.");
+ Symbols.insert(Name);
+ }
+
+ if (!Symbols.empty()) {
+ SharedThis->Resolver.lookup(Symbols, PostResolveContinuation);
+ } else
+ PostResolveContinuation(std::map<StringRef, JITEvaluatedSymbol>());
+}
+
//===----------------------------------------------------------------------===//
// RuntimeDyld class implementation
@@ -1240,5 +1321,35 @@ void RuntimeDyld::deregisterEHFrames() {
if (Dyld)
Dyld->deregisterEHFrames();
}
+// FIXME: Kill this with fire once we have a new JIT linker: this is only here
+// so that we can re-use RuntimeDyld's implementation without twisting the
+// interface any further for ORC's purposes.
+void jitLinkForORC(object::ObjectFile &Obj,
+ std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
+ RuntimeDyld::MemoryManager &MemMgr,
+ JITSymbolResolver &Resolver, bool ProcessAllSections,
+ std::function<Error(
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObj,
+ std::map<StringRef, JITEvaluatedSymbol>)>
+ OnLoaded,
+ std::function<void(Error)> OnEmitted) {
+
+ RuntimeDyld RTDyld(MemMgr, Resolver);
+ RTDyld.setProcessAllSections(ProcessAllSections);
+
+ auto Info = RTDyld.loadObject(Obj);
+
+ if (RTDyld.hasError()) {
+ OnEmitted(make_error<StringError>(RTDyld.getErrorString(),
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ if (auto Err = OnLoaded(std::move(Info), RTDyld.getSymbolTable()))
+ OnEmitted(std::move(Err));
+
+ RuntimeDyldImpl::finalizeAsync(std::move(RTDyld.Dyld), std::move(OnEmitted),
+ std::move(UnderlyingBuffer));
+}
} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 1c54ad6fb03f..340ddaab186d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -66,7 +66,7 @@ RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) {
} else {
HasError = true;
raw_string_ostream ErrStream(ErrorStr);
- logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream, "");
+ logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream);
return nullptr;
}
}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index fa8906869b3a..6eb6256080ff 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -14,8 +14,10 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Path.h"
#include <cctype>
+#include <future>
#include <memory>
#include <utility>
@@ -729,15 +731,35 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
return DidAllTestsPass && (NumRules != 0);
}
+Expected<JITSymbolResolver::LookupResult> RuntimeDyldCheckerImpl::lookup(
+ const JITSymbolResolver::LookupSet &Symbols) const {
+
+#ifdef _MSC_VER
+ using ExpectedLookupResult = MSVCPExpected<JITSymbolResolver::LookupResult>;
+#else
+ using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
+#endif
+
+ auto ResultP = std::make_shared<std::promise<ExpectedLookupResult>>();
+ auto ResultF = ResultP->get_future();
+
+ getRTDyld().Resolver.lookup(
+ Symbols, [=](Expected<JITSymbolResolver::LookupResult> Result) {
+ ResultP->set_value(std::move(Result));
+ });
+ return ResultF.get();
+}
+
bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
if (getRTDyld().getSymbol(Symbol))
return true;
- JITSymbolResolver::LookupSet Symbols({Symbol});
- auto Result = getRTDyld().Resolver.lookup(Symbols);
+ auto Result = lookup({Symbol});
+
if (!Result) {
logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: ");
return false;
}
+
assert(Result->count(Symbol) && "Missing symbol result");
return true;
}
@@ -751,8 +773,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
return InternalSymbol.getAddress();
- JITSymbolResolver::LookupSet Symbols({Symbol});
- auto Result = getRTDyld().Resolver.lookup(Symbols);
+ auto Result = lookup({Symbol});
if (!Result) {
logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: ");
return 0;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index b462ef2c00ce..6da1a68d06d6 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -41,6 +41,9 @@ private:
RuntimeDyldImpl &getRTDyld() const { return *RTDyld.Dyld; }
+ Expected<JITSymbolResolver::LookupResult>
+ lookup(const JITSymbolResolver::LookupSet &Symbols) const;
+
bool isSymbolValid(StringRef Symbol) const;
uint64_t getSymbolLocalAddr(StringRef Symbol) const;
uint64_t getSymbolRemoteAddr(StringRef Symbol) const;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index f9a81c7bd1b0..226ee715e18b 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -255,7 +255,7 @@ RuntimeDyldELF::loadObject(const object::ObjectFile &O) {
else {
HasError = true;
raw_string_ostream ErrStream(ErrorStr);
- logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream, "");
+ logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream);
return nullptr;
}
}
@@ -1130,7 +1130,7 @@ RuntimeDyldELF::processRelocationRef(
if (!SymTypeOrErr) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(SymTypeOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(SymTypeOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -1151,7 +1151,7 @@ RuntimeDyldELF::processRelocationRef(
if (!SectionOrErr) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(SectionOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(SectionOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 4d7cc36d0666..4c650e09ac1f 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -370,7 +370,7 @@ protected:
void writeBytesUnaligned(uint64_t Value, uint8_t *Dst, unsigned Size) const;
/// Generate JITSymbolFlags from a libObject symbol.
- virtual JITSymbolFlags getJITSymbolFlags(const BasicSymbolRef &Sym);
+ virtual Expected<JITSymbolFlags> getJITSymbolFlags(const SymbolRef &Sym);
/// Modify the given target address based on the given symbol flags.
/// This can be used by subclasses to tweak addresses based on symbol flags,
@@ -433,6 +433,9 @@ protected:
const ObjectFile &Obj, ObjSectionToIDMap &ObjSectionToID,
StubMap &Stubs) = 0;
+ void applyExternalSymbolRelocations(
+ const StringMap<JITEvaluatedSymbol> ExternalSymbolMap);
+
/// Resolve relocations to external symbols.
Error resolveExternalSymbols();
@@ -536,6 +539,12 @@ public:
void resolveRelocations();
+ void resolveLocalRelocations();
+
+ static void finalizeAsync(std::unique_ptr<RuntimeDyldImpl> This,
+ std::function<void(Error)> OnEmitted,
+ std::unique_ptr<MemoryBuffer> UnderlyingBuffer);
+
void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
void mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress);
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index c5a215c83331..d47fcd45be88 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -370,7 +370,7 @@ RuntimeDyldMachO::loadObject(const object::ObjectFile &O) {
else {
HasError = true;
raw_string_ostream ErrStream(ErrorStr);
- logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream, "");
+ logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream);
return nullptr;
}
}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 729ea1ec48a4..8723dd0fd0ea 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -28,7 +28,7 @@ static bool isThumbFunc(symbol_iterator Symbol, const ObjectFile &Obj,
if (!SymTypeOrErr) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(SymTypeOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(SymTypeOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index 2d6e5c4aea67..aee5f6dc3746 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -37,7 +37,13 @@ private:
if (!ImageBase) {
ImageBase = std::numeric_limits<uint64_t>::max();
for (const SectionEntry &Section : Sections)
- ImageBase = std::min(ImageBase, Section.getLoadAddress());
+ // The Sections list may contain sections that weren't loaded for
+ // whatever reason: they may be debug sections, and ProcessAllSections
+ // is false, or they may be sections that contain 0 bytes. If the
+ // section isn't loaded, the load address will be 0, and it should not
+ // be included in the ImageBase calculation.
+ if (Section.getLoadAddress() != 0)
+ ImageBase = std::min(ImageBase, Section.getLoadAddress());
}
return ImageBase;
}
@@ -122,6 +128,13 @@ public:
break;
}
+ case COFF::IMAGE_REL_AMD64_SECREL: {
+ assert(static_cast<int64_t>(RE.Addend) <= INT32_MAX && "Relocation overflow");
+ assert(static_cast<int64_t>(RE.Addend) >= INT32_MIN && "Relocation underflow");
+ writeBytesUnaligned(RE.Addend, Target, 4);
+ break;
+ }
+
default:
llvm_unreachable("Relocation type not implemented yet!");
break;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 64a6b2901819..ab7cd2bdae15 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -34,9 +34,11 @@ public:
unsigned getStubAlignment() override { return 4; }
- JITSymbolFlags getJITSymbolFlags(const BasicSymbolRef &SR) override {
+ Expected<JITSymbolFlags> getJITSymbolFlags(const SymbolRef &SR) override {
auto Flags = RuntimeDyldImpl::getJITSymbolFlags(SR);
- Flags.getTargetFlags() = ARMJITSymbolFlags::fromObjectSymbol(SR);
+ if (!Flags)
+ return Flags.takeError();
+ Flags->getTargetFlags() = ARMJITSymbolFlags::fromObjectSymbol(SR);
return Flags;
}
diff --git a/contrib/llvm/lib/FuzzMutate/IRMutator.cpp b/contrib/llvm/lib/FuzzMutate/IRMutator.cpp
index 2dc7dfb880a2..40e402cdadef 100644
--- a/contrib/llvm/lib/FuzzMutate/IRMutator.cpp
+++ b/contrib/llvm/lib/FuzzMutate/IRMutator.cpp
@@ -73,6 +73,7 @@ static void eliminateDeadCode(Function &F) {
FPM.addPass(DCEPass());
FunctionAnalysisManager FAM;
FAM.registerPass([&] { return TargetLibraryAnalysis(); });
+ FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
FPM.run(F, FAM);
}
diff --git a/contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index 9f5b7d608a1d..337184535558 100644
--- a/contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -136,7 +136,7 @@ Value *RandomIRBuilder::findPointer(BasicBlock &BB,
auto IsMatchingPtr = [&Srcs, &Pred](Instruction *Inst) {
// Invoke instructions sometimes produce valid pointers but currently
// we can't insert loads or stores from them
- if (isa<TerminatorInst>(Inst))
+ if (Inst->isTerminator())
return false;
if (auto PtrTy = dyn_cast<PointerType>(Inst->getType())) {
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index 99a25a723b4a..a5dc623e1a30 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -36,7 +36,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
@@ -199,7 +198,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
!isa<GlobalVariable>(V) && !isa<Function>(V) && !isa<BasicBlock>(V);
if (auto *BA = dyn_cast<BlockAddress>(V))
ID = OM.lookup(BA->getBasicBlock()).first;
- llvm::sort(List.begin(), List.end(), [&](const Entry &L, const Entry &R) {
+ llvm::sort(List, [&](const Entry &L, const Entry &R) {
const Use *LU = L.first;
const Use *RU = R.first;
if (LU == RU)
@@ -363,6 +362,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
+ case CallingConv::AArch64_VectorCall: Out << "aarch64_vector_pcs"; break;
case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break;
case CallingConv::AVR_INTR: Out << "avr_intrcc "; break;
case CallingConv::AVR_SIGNAL: Out << "avr_signalcc "; break;
@@ -704,6 +704,10 @@ private:
DenseMap<GlobalValue::GUID, unsigned> GUIDMap;
unsigned GUIDNext = 0;
+ /// TypeIdMap - The slot map for type ids used in the summary index.
+ StringMap<unsigned> TypeIdMap;
+ unsigned TypeIdNext = 0;
+
public:
/// Construct from a module.
///
@@ -735,6 +739,7 @@ public:
int getAttributeGroupSlot(AttributeSet AS);
int getModulePathSlot(StringRef Path);
int getGUIDSlot(GlobalValue::GUID GUID);
+ int getTypeIdSlot(StringRef Id);
/// If you'd like to deal with a function instead of just a module, use
/// this method to get its data into the SlotTracker.
@@ -789,6 +794,7 @@ private:
inline void CreateModulePathSlot(StringRef Path);
void CreateGUIDSlot(GlobalValue::GUID GUID);
+ void CreateTypeIdSlot(StringRef Id);
/// Add all of the module level global variables (and their initializers)
/// and function declarations, but not the contents of those functions.
@@ -991,9 +997,9 @@ void SlotTracker::processFunction() {
// We allow direct calls to any llvm.foo function here, because the
// target may not be linked into the optimizer.
- if (auto CS = ImmutableCallSite(&I)) {
+ if (const auto *Call = dyn_cast<CallBase>(&I)) {
// Add all the call attributes to the table.
- AttributeSet Attrs = CS.getAttributes().getFnAttributes();
+ AttributeSet Attrs = Call->getAttributes().getFnAttributes();
if (Attrs.hasAttributes())
CreateAttributeSetSlot(Attrs);
}
@@ -1025,8 +1031,12 @@ void SlotTracker::processIndex() {
for (auto &GlobalList : *TheIndex)
CreateGUIDSlot(GlobalList.first);
- for (auto &TId : TheIndex->typeIds())
- CreateGUIDSlot(GlobalValue::getGUID(TId.first));
+ // Start numbering the TypeIds after the GUIDs.
+ TypeIdNext = GUIDNext;
+
+ for (auto TidIter = TheIndex->typeIds().begin();
+ TidIter != TheIndex->typeIds().end(); TidIter++)
+ CreateTypeIdSlot(TidIter->second.first);
ST_DEBUG("end processIndex!\n");
}
@@ -1132,6 +1142,15 @@ int SlotTracker::getGUIDSlot(GlobalValue::GUID GUID) {
return I == GUIDMap.end() ? -1 : (int)I->second;
}
+int SlotTracker::getTypeIdSlot(StringRef Id) {
+ // Check for uninitialized state and do lazy initialization.
+ initializeIndexIfNeeded();
+
+ // Find the TypeId string in the map
+ auto I = TypeIdMap.find(Id);
+ return I == TypeIdMap.end() ? -1 : (int)I->second;
+}
+
/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
assert(V && "Can't insert a null Value into SlotTracker!");
@@ -1202,6 +1221,11 @@ void SlotTracker::CreateGUIDSlot(GlobalValue::GUID GUID) {
GUIDMap[GUID] = GUIDNext++;
}
+/// Create a new slot for the specified Id
+void SlotTracker::CreateTypeIdSlot(StringRef Id) {
+ TypeIdMap[Id] = TypeIdNext++;
+}
+
//===----------------------------------------------------------------------===//
// AsmWriter Implementation
//===----------------------------------------------------------------------===//
@@ -1216,24 +1240,6 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
SlotTracker *Machine, const Module *Context,
bool FromValue = false);
-static void writeAtomicRMWOperation(raw_ostream &Out,
- AtomicRMWInst::BinOp Op) {
- switch (Op) {
- default: Out << " <unknown operation " << Op << ">"; break;
- case AtomicRMWInst::Xchg: Out << " xchg"; break;
- case AtomicRMWInst::Add: Out << " add"; break;
- case AtomicRMWInst::Sub: Out << " sub"; break;
- case AtomicRMWInst::And: Out << " and"; break;
- case AtomicRMWInst::Nand: Out << " nand"; break;
- case AtomicRMWInst::Or: Out << " or"; break;
- case AtomicRMWInst::Xor: Out << " xor"; break;
- case AtomicRMWInst::Max: Out << " max"; break;
- case AtomicRMWInst::Min: Out << " min"; break;
- case AtomicRMWInst::UMax: Out << " umax"; break;
- case AtomicRMWInst::UMin: Out << " umin"; break;
- }
-}
-
static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) {
// 'Fast' is an abbreviation for all fast-math-flags.
@@ -1600,10 +1606,13 @@ struct MDFieldPrinter {
void printInt(StringRef Name, IntTy Int, bool ShouldSkipZero = true);
void printBool(StringRef Name, bool Value, Optional<bool> Default = None);
void printDIFlags(StringRef Name, DINode::DIFlags Flags);
+ void printDISPFlags(StringRef Name, DISubprogram::DISPFlags Flags);
template <class IntTy, class Stringifier>
void printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString,
bool ShouldSkipZero = true);
void printEmissionKind(StringRef Name, DICompileUnit::DebugEmissionKind EK);
+ void printNameTableKind(StringRef Name,
+ DICompileUnit::DebugNameTableKind NTK);
};
} // end anonymous namespace
@@ -1696,11 +1705,42 @@ void MDFieldPrinter::printDIFlags(StringRef Name, DINode::DIFlags Flags) {
Out << FlagsFS << Extra;
}
+void MDFieldPrinter::printDISPFlags(StringRef Name,
+ DISubprogram::DISPFlags Flags) {
+ // Always print this field, because no flags in the IR at all will be
+ // interpreted as old-style isDefinition: true.
+ Out << FS << Name << ": ";
+
+ if (!Flags) {
+ Out << 0;
+ return;
+ }
+
+ SmallVector<DISubprogram::DISPFlags, 8> SplitFlags;
+ auto Extra = DISubprogram::splitFlags(Flags, SplitFlags);
+
+ FieldSeparator FlagsFS(" | ");
+ for (auto F : SplitFlags) {
+ auto StringF = DISubprogram::getFlagString(F);
+ assert(!StringF.empty() && "Expected valid flag");
+ Out << FlagsFS << StringF;
+ }
+ if (Extra || SplitFlags.empty())
+ Out << FlagsFS << Extra;
+}
+
void MDFieldPrinter::printEmissionKind(StringRef Name,
DICompileUnit::DebugEmissionKind EK) {
Out << FS << Name << ": " << DICompileUnit::emissionKindString(EK);
}
+void MDFieldPrinter::printNameTableKind(StringRef Name,
+ DICompileUnit::DebugNameTableKind NTK) {
+ if (NTK == DICompileUnit::DebugNameTableKind::Default)
+ return;
+ Out << FS << Name << ": " << DICompileUnit::nameTableKindString(NTK);
+}
+
template <class IntTy, class Stringifier>
void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value,
Stringifier toString, bool ShouldSkipZero) {
@@ -1744,6 +1784,8 @@ static void writeDILocation(raw_ostream &Out, const DILocation *DL,
Printer.printInt("column", DL->getColumn());
Printer.printMetadata("scope", DL->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("inlinedAt", DL->getRawInlinedAt());
+ Printer.printBool("isImplicitCode", DL->isImplicitCode(),
+ /* Default */ false);
Out << ")";
}
@@ -1787,6 +1829,7 @@ static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N,
Printer.printInt("align", N->getAlignInBits());
Printer.printDwarfEnum("encoding", N->getEncoding(),
dwarf::AttributeEncodingString);
+ Printer.printDIFlags("flags", N->getFlags());
Out << ")";
}
@@ -1890,7 +1933,8 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
Printer.printBool("splitDebugInlining", N->getSplitDebugInlining(), true);
Printer.printBool("debugInfoForProfiling", N->getDebugInfoForProfiling(),
false);
- Printer.printBool("gnuPubnames", N->getGnuPubnames(), false);
+ Printer.printNameTableKind("nameTableKind", N->getNameTableKind());
+ Printer.printBool("rangesBaseAddress", N->getRangesBaseAddress(), false);
Out << ")";
}
@@ -1905,18 +1949,14 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
Printer.printMetadata("type", N->getRawType());
- Printer.printBool("isLocal", N->isLocalToUnit());
- Printer.printBool("isDefinition", N->isDefinition());
Printer.printInt("scopeLine", N->getScopeLine());
Printer.printMetadata("containingType", N->getRawContainingType());
- Printer.printDwarfEnum("virtuality", N->getVirtuality(),
- dwarf::VirtualityString);
if (N->getVirtuality() != dwarf::DW_VIRTUALITY_none ||
N->getVirtualIndex() != 0)
Printer.printInt("virtualIndex", N->getVirtualIndex(), false);
Printer.printInt("thisAdjustment", N->getThisAdjustment());
Printer.printDIFlags("flags", N->getFlags());
- Printer.printBool("isOptimized", N->isOptimized());
+ Printer.printDISPFlags("spFlags", N->getSPFlags());
Printer.printMetadata("unit", N->getRawUnit());
Printer.printMetadata("templateParams", N->getRawTemplateParams());
Printer.printMetadata("declaration", N->getRawDeclaration());
@@ -2040,6 +2080,7 @@ static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N,
Printer.printBool("isLocal", N->isLocalToUnit());
Printer.printBool("isDefinition", N->isDefinition());
Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration());
+ Printer.printMetadata("templateParams", N->getRawTemplateParams());
Printer.printInt("align", N->getAlignInBits());
Out << ")";
}
@@ -2252,11 +2293,15 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
Machine = MachineStorage.get();
}
int Slot = Machine->getMetadataSlot(N);
- if (Slot == -1)
+ if (Slot == -1) {
+ if (const DILocation *Loc = dyn_cast<DILocation>(N)) {
+ writeDILocation(Out, Loc, TypePrinter, Machine, Context);
+ return;
+ }
// Give the pointer value instead of "badref", since this comes up all
// the time when debugging.
Out << "<" << N << ">";
- else
+ } else
Out << '!' << Slot;
return;
}
@@ -2313,7 +2358,7 @@ public:
void writeOperand(const Value *Op, bool PrintType);
void writeParamOperand(const Value *Operand, AttributeSet Attrs);
- void writeOperandBundles(ImmutableCallSite CS);
+ void writeOperandBundles(const CallBase *Call);
void writeSyncScope(const LLVMContext &Context,
SyncScope::ID SSID);
void writeAtomic(const LLVMContext &Context,
@@ -2464,15 +2509,15 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
}
-void AssemblyWriter::writeOperandBundles(ImmutableCallSite CS) {
- if (!CS.hasOperandBundles())
+void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
+ if (!Call->hasOperandBundles())
return;
Out << " [ ";
bool FirstBundle = true;
- for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
- OperandBundleUse BU = CS.getOperandBundleAt(i);
+ for (unsigned i = 0, e = Call->getNumOperandBundles(); i != e; ++i) {
+ OperandBundleUse BU = Call->getOperandBundleAt(i);
if (!FirstBundle)
Out << ", ";
@@ -2643,12 +2688,12 @@ void AssemblyWriter::printModuleSummaryIndex() {
}
// Print the TypeIdMap entries.
- for (auto &TId : TheIndex->typeIds()) {
- auto GUID = GlobalValue::getGUID(TId.first);
- Out << "^" << Machine.getGUIDSlot(GUID) << " = typeid: (name: \""
- << TId.first << "\"";
- printTypeIdSummary(TId.second);
- Out << ") ; guid = " << GUID << "\n";
+ for (auto TidIter = TheIndex->typeIds().begin();
+ TidIter != TheIndex->typeIds().end(); TidIter++) {
+ Out << "^" << Machine.getTypeIdSlot(TidIter->second.first)
+ << " = typeid: (name: \"" << TidIter->second.first << "\"";
+ printTypeIdSummary(TidIter->second.second);
+ Out << ") ; guid = " << TidIter->first << "\n";
}
}
@@ -2800,7 +2845,7 @@ void AssemblyWriter::printAliasSummary(const AliasSummary *AS) {
}
void AssemblyWriter::printGlobalVarSummary(const GlobalVarSummary *GS) {
- // Nothing for now
+ Out << ", varFlags: (readonly: " << GS->VarFlags.ReadOnly << ")";
}
static std::string getLinkageName(GlobalValue::LinkageTypes LT) {
@@ -2840,22 +2885,6 @@ static std::string getLinkageNameWithSpace(GlobalValue::LinkageTypes LT) {
return getLinkageName(LT) + " ";
}
-static const char *getHotnessName(CalleeInfo::HotnessType HT) {
- switch (HT) {
- case CalleeInfo::HotnessType::Unknown:
- return "unknown";
- case CalleeInfo::HotnessType::Cold:
- return "cold";
- case CalleeInfo::HotnessType::None:
- return "none";
- case CalleeInfo::HotnessType::Hot:
- return "hot";
- case CalleeInfo::HotnessType::Critical:
- return "critical";
- }
- llvm_unreachable("invalid hotness");
-}
-
void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
Out << ", insts: " << FS->instCount();
@@ -2867,6 +2896,7 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
Out << ", readOnly: " << FFlags.ReadOnly;
Out << ", noRecurse: " << FFlags.NoRecurse;
Out << ", returnDoesNotAlias: " << FFlags.ReturnDoesNotAlias;
+ Out << ", noInline: " << FFlags.NoInline;
Out << ")";
}
if (!FS->calls().empty()) {
@@ -2897,12 +2927,19 @@ void AssemblyWriter::printTypeIdInfo(
Out << "typeTests: (";
FieldSeparator FS;
for (auto &GUID : TIDInfo.TypeTests) {
- Out << FS;
- auto Slot = Machine.getGUIDSlot(GUID);
- if (Slot != -1)
- Out << "^" << Slot;
- else
+ auto TidIter = TheIndex->typeIds().equal_range(GUID);
+ if (TidIter.first == TidIter.second) {
+ Out << FS;
Out << GUID;
+ continue;
+ }
+ // Print all type id that correspond to this GUID.
+ for (auto It = TidIter.first; It != TidIter.second; ++It) {
+ Out << FS;
+ auto Slot = Machine.getTypeIdSlot(It->second.first);
+ assert(Slot != -1);
+ Out << "^" << Slot;
+ }
}
Out << ")";
}
@@ -2928,14 +2965,25 @@ void AssemblyWriter::printTypeIdInfo(
}
void AssemblyWriter::printVFuncId(const FunctionSummary::VFuncId VFId) {
- Out << "vFuncId: (";
- auto Slot = Machine.getGUIDSlot(VFId.GUID);
- if (Slot != -1)
- Out << "^" << Slot;
- else
+ auto TidIter = TheIndex->typeIds().equal_range(VFId.GUID);
+ if (TidIter.first == TidIter.second) {
+ Out << "vFuncId: (";
Out << "guid: " << VFId.GUID;
- Out << ", offset: " << VFId.Offset;
- Out << ")";
+ Out << ", offset: " << VFId.Offset;
+ Out << ")";
+ return;
+ }
+ // Print all type id that correspond to this GUID.
+ FieldSeparator FS;
+ for (auto It = TidIter.first; It != TidIter.second; ++It) {
+ Out << FS;
+ Out << "vFuncId: (";
+ auto Slot = Machine.getTypeIdSlot(It->second.first);
+ assert(Slot != -1);
+ Out << "^" << Slot;
+ Out << ", offset: " << VFId.Offset;
+ Out << ")";
+ }
}
void AssemblyWriter::printNonConstVCalls(
@@ -2955,11 +3003,13 @@ void AssemblyWriter::printConstVCalls(
FieldSeparator FS;
for (auto &ConstVCall : VCallList) {
Out << FS;
+ Out << "(";
printVFuncId(ConstVCall.VFunc);
if (!ConstVCall.Args.empty()) {
Out << ", ";
printArgs(ConstVCall.Args);
}
+ Out << ")";
}
Out << ")";
}
@@ -2989,6 +3039,8 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {
FieldSeparator FS;
for (auto &Ref : RefList) {
Out << FS;
+ if (Ref.isReadOnly())
+ Out << "readonly ";
Out << "^" << Machine.getGUIDSlot(Ref.getGUID());
}
Out << ")";
@@ -3354,6 +3406,13 @@ void AssemblyWriter::printFunction(const Function *F) {
StringRef UA = getUnnamedAddrEncoding(F->getUnnamedAddr());
if (!UA.empty())
Out << ' ' << UA;
+ // We print the function address space if it is non-zero or if we are writing
+ // a module with a non-zero program address space or if there is no valid
+ // Module* so that the file can be parsed without the datalayout string.
+ const Module *Mod = F->getParent();
+ if (F->getAddressSpace() != 0 || !Mod ||
+ Mod->getDataLayout().getProgramAddressSpace() != 0)
+ Out << " addrspace(" << F->getAddressSpace() << ")";
if (Attrs.hasAttributes(AttributeList::FunctionIndex))
Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
if (F->hasSection()) {
@@ -3491,6 +3550,23 @@ void AssemblyWriter::printInfoComment(const Value &V) {
AnnotationWriter->printInfoComment(V, Out);
}
+static void maybePrintCallAddrSpace(const Value *Operand, const Instruction *I,
+ raw_ostream &Out) {
+ // We print the address space of the call if it is non-zero.
+ unsigned CallAddrSpace = Operand->getType()->getPointerAddressSpace();
+ bool PrintAddrSpace = CallAddrSpace != 0;
+ if (!PrintAddrSpace) {
+ const Module *Mod = getModuleFromVal(I);
+ // We also print it if it is zero but not equal to the program address space
+ // or if we can't find a valid Module* to make it possible to parse
+ // the resulting file even without a datalayout string.
+ if (!Mod || Mod->getDataLayout().getProgramAddressSpace() != 0)
+ PrintAddrSpace = true;
+ }
+ if (PrintAddrSpace)
+ Out << " addrspace(" << CallAddrSpace << ")";
+}
+
// This member is called for each Instruction in a function..
void AssemblyWriter::printInstruction(const Instruction &I) {
if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
@@ -3547,7 +3623,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
// Print out the atomicrmw operation
if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
- writeAtomicRMWOperation(Out, RMWI->getOperation());
+ Out << ' ' << AtomicRMWInst::getOperationName(RMWI->getOperation());
// Print out the type of the operands...
const Value *Operand = I.getNumOperands() ? I.getOperand(0) : nullptr;
@@ -3688,6 +3764,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (PAL.hasAttributes(AttributeList::ReturnIndex))
Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
+ // Only print addrspace(N) if necessary:
+ maybePrintCallAddrSpace(Operand, &I, Out);
+
// If possible, print out the short form of the call instruction. We can
// only do this if the first argument is a pointer to a nonvararg function,
// and if the return type is not a pointer to a function.
@@ -3730,6 +3809,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (PAL.hasAttributes(AttributeList::ReturnIndex))
Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
+ // Only print addrspace(N) if necessary:
+ maybePrintCallAddrSpace(Operand, &I, Out);
+
// If possible, print out the short form of the invoke instruction. We can
// only do this if the first argument is a pointer to a nonvararg function,
// and if the return type is not a pointer to a function.
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index d87187481be0..ff46debb7a9e 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -323,6 +323,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "returns_twice";
if (hasAttribute(Attribute::SExt))
return "signext";
+ if (hasAttribute(Attribute::SpeculativeLoadHardening))
+ return "speculative_load_hardening";
if (hasAttribute(Attribute::Speculatable))
return "speculatable";
if (hasAttribute(Attribute::StackProtect))
@@ -637,7 +639,7 @@ LLVM_DUMP_METHOD void AttributeSet::dump() const {
AttributeSetNode::AttributeSetNode(ArrayRef<Attribute> Attrs)
: AvailableAttrs(0), NumAttrs(Attrs.size()) {
// There's memory after the node where we can store the entries in.
- std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
+ llvm::copy(Attrs, getTrailingObjects<Attribute>());
for (const auto I : *this) {
if (!I.isStringAttribute()) {
@@ -656,7 +658,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
FoldingSetNodeID ID;
SmallVector<Attribute, 8> SortedAttrs(Attrs.begin(), Attrs.end());
- llvm::sort(SortedAttrs.begin(), SortedAttrs.end());
+ llvm::sort(SortedAttrs);
for (const auto Attr : SortedAttrs)
Attr.Profile(ID);
@@ -807,7 +809,7 @@ AttributeListImpl::AttributeListImpl(LLVMContext &C,
assert(!Sets.empty() && "pointless AttributeListImpl");
// There's memory after the node where we can store the entries in.
- std::copy(Sets.begin(), Sets.end(), getTrailingObjects<AttributeSet>());
+ llvm::copy(Sets, getTrailingObjects<AttributeSet>());
// Initialize AvailableFunctionAttrs summary bitset.
static_assert(Attribute::EndAttrKinds <=
@@ -1683,28 +1685,32 @@ adjustCallerStackProbeSize(Function &Caller, const Function &Callee) {
}
/// If the inlined function defines a min legal vector width, then ensure
-/// the calling function has the same or larger min legal vector width. This
-/// function is called after the inlining decision has been made so we have to
-/// merge the attribute this way. Heuristics that would use
+/// the calling function has the same or larger min legal vector width. If the
+/// caller has the attribute, but the callee doesn't, we need to remove the
+/// attribute from the caller since we can't make any guarantees about the
+/// caller's requirements.
+/// This function is called after the inlining decision has been made so we have
+/// to merge the attribute this way. Heuristics that would use
/// min-legal-vector-width to determine inline compatibility would need to be
/// handled as part of inline cost analysis.
static void
adjustMinLegalVectorWidth(Function &Caller, const Function &Callee) {
- if (Callee.hasFnAttribute("min-legal-vector-width")) {
- uint64_t CalleeVectorWidth;
- Callee.getFnAttribute("min-legal-vector-width")
- .getValueAsString()
- .getAsInteger(0, CalleeVectorWidth);
- if (Caller.hasFnAttribute("min-legal-vector-width")) {
+ if (Caller.hasFnAttribute("min-legal-vector-width")) {
+ if (Callee.hasFnAttribute("min-legal-vector-width")) {
uint64_t CallerVectorWidth;
Caller.getFnAttribute("min-legal-vector-width")
.getValueAsString()
.getAsInteger(0, CallerVectorWidth);
- if (CallerVectorWidth < CalleeVectorWidth) {
+ uint64_t CalleeVectorWidth;
+ Callee.getFnAttribute("min-legal-vector-width")
+ .getValueAsString()
+ .getAsInteger(0, CalleeVectorWidth);
+ if (CallerVectorWidth < CalleeVectorWidth)
Caller.addFnAttr(Callee.getFnAttribute("min-legal-vector-width"));
- }
} else {
- Caller.addFnAttr(Callee.getFnAttribute("min-legal-vector-width"));
+ // If the callee doesn't have the attribute then we don't know anything
+ // and must drop the attribute from the caller.
+ Caller.removeFnAttr("min-legal-vector-width");
}
}
}
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
index f098ad9725b6..b2eb8b09982e 100644
--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -71,7 +71,27 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// like to use this information to remove upgrade code for some older
// intrinsics. It is currently undecided how we will determine that future
// point.
- if (Name=="ssse3.pabs.b.128" || // Added in 6.0
+ if (Name == "addcarryx.u32" || // Added in 8.0
+ Name == "addcarryx.u64" || // Added in 8.0
+ Name == "addcarry.u32" || // Added in 8.0
+ Name == "addcarry.u64" || // Added in 8.0
+ Name == "subborrow.u32" || // Added in 8.0
+ Name == "subborrow.u64" || // Added in 8.0
+ Name.startswith("sse2.padds.") || // Added in 8.0
+ Name.startswith("sse2.psubs.") || // Added in 8.0
+ Name.startswith("sse2.paddus.") || // Added in 8.0
+ Name.startswith("sse2.psubus.") || // Added in 8.0
+ Name.startswith("avx2.padds.") || // Added in 8.0
+ Name.startswith("avx2.psubs.") || // Added in 8.0
+ Name.startswith("avx2.paddus.") || // Added in 8.0
+ Name.startswith("avx2.psubus.") || // Added in 8.0
+ Name.startswith("avx512.padds.") || // Added in 8.0
+ Name.startswith("avx512.psubs.") || // Added in 8.0
+ Name.startswith("avx512.mask.padds.") || // Added in 8.0
+ Name.startswith("avx512.mask.psubs.") || // Added in 8.0
+ Name.startswith("avx512.mask.paddus.") || // Added in 8.0
+ Name.startswith("avx512.mask.psubus.") || // Added in 8.0
+ Name=="ssse3.pabs.b.128" || // Added in 6.0
Name=="ssse3.pabs.w.128" || // Added in 6.0
Name=="ssse3.pabs.d.128" || // Added in 6.0
Name.startswith("fma4.vfmadd.s") || // Added in 7.0
@@ -265,6 +285,12 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
+ Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
+ Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
+ Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
+ Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
+ Name.startswith("avx512.vpshld.") || // Added in 8.0
+ Name.startswith("avx512.vpshrd.") || // Added in 8.0
Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
@@ -272,10 +298,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
- Name.startswith("avx512.mask.prorv.") || // Added in 7.0
- Name.startswith("avx512.mask.pror.") || // Added in 7.0
- Name.startswith("avx512.mask.prolv.") || // Added in 7.0
- Name.startswith("avx512.mask.prol.") || // Added in 7.0
+ Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
+ Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -340,6 +364,13 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.cvtmask2") || // Added in 5.0
(Name.startswith("xop.vpcom") && // Added in 3.2
F->arg_size() == 2) ||
+ Name.startswith("xop.vprot") || // Added in 8.0
+ Name.startswith("avx512.prol") || // Added in 8.0
+ Name.startswith("avx512.pror") || // Added in 8.0
+ Name.startswith("avx512.mask.prorv.") || // Added in 8.0
+ Name.startswith("avx512.mask.pror.") || // Added in 8.0
+ Name.startswith("avx512.mask.prolv.") || // Added in 8.0
+ Name.startswith("avx512.mask.prol.") || // Added in 8.0
Name.startswith("avx512.ptestm") || //Added in 6.0
Name.startswith("avx512.ptestnm") || //Added in 6.0
Name.startswith("sse2.pavg") || // Added in 6.0
@@ -363,6 +394,17 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
return true;
}
+ if (Name == "rdtscp") { // Added in 8.0
+ // If this intrinsic has 0 operands, it's the new version.
+ if (F->getFunctionType()->getNumParams() == 0)
+ return false;
+
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::x86_rdtscp);
+ return true;
+ }
+
// SSE4.1 ptest functions may have an old signature.
if (Name.startswith("sse41.ptest")) { // Added in 3.2
if (Name.substr(11) == "c")
@@ -456,7 +498,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// the end of the name. Change name from llvm.arm.neon.vclz.* to
// llvm.ctlz.*
FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
- NewFn = Function::Create(fType, F->getLinkage(),
+ NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
"llvm.ctlz." + Name.substr(14), F->getParent());
return true;
}
@@ -472,7 +514,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// Can't use Intrinsic::getDeclaration here as the return types might
// then only be structurally equal.
FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
- NewFn = Function::Create(fType, F->getLinkage(),
+ NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
"llvm." + Name + ".p0i8", F->getParent());
return true;
}
@@ -502,6 +544,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
+ if (Name == "x86.seh.recoverfp") {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
+ return true;
+ }
break;
}
@@ -899,6 +945,148 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
return EmitX86Select(Builder, Mask, Align, Passthru);
}
+static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
+ bool ZeroMask, bool IndexForm) {
+ Type *Ty = CI.getType();
+ unsigned VecWidth = Ty->getPrimitiveSizeInBits();
+ unsigned EltWidth = Ty->getScalarSizeInBits();
+ bool IsFloat = Ty->isFPOrFPVectorTy();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
+ else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_128;
+ else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
+ else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_128;
+ else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
+ else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_256;
+ else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
+ else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_256;
+ else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
+ else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_512;
+ else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
+ else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_512;
+ else if (VecWidth == 128 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
+ else if (VecWidth == 256 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
+ else if (VecWidth == 512 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
+ else if (VecWidth == 128 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
+ else if (VecWidth == 256 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
+ else if (VecWidth == 512 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
+ CI.getArgOperand(2) };
+
+ // If this isn't index form we need to swap operand 0 and 1.
+ if (!IndexForm)
+ std::swap(Args[0], Args[1]);
+
+ Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
+ Args);
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
+ : Builder.CreateBitCast(CI.getArgOperand(1),
+ Ty);
+ return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
+}
+
+static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
+ bool IsSigned, bool IsAddition) {
+ Type *Ty = CI.getType();
+ Value *Op0 = CI.getOperand(0);
+ Value *Op1 = CI.getOperand(1);
+
+ Intrinsic::ID IID =
+ IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
+ : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
+ Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
+ Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
+
+ if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+ Value *VecSrc = CI.getOperand(2);
+ Value *Mask = CI.getOperand(3);
+ Res = EmitX86Select(Builder, Mask, Res, VecSrc);
+ }
+ return Res;
+}
+
+static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
+ bool IsRotateRight) {
+ Type *Ty = CI.getType();
+ Value *Src = CI.getArgOperand(0);
+ Value *Amt = CI.getArgOperand(1);
+
+ // Amount may be scalar immediate, in which case create a splat vector.
+ // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
+ // we only care about the lowest log2 bits anyway.
+ if (Amt->getType() != Ty) {
+ unsigned NumElts = Ty->getVectorNumElements();
+ Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
+ Amt = Builder.CreateVectorSplat(NumElts, Amt);
+ }
+
+ Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
+ Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
+ Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
+
+ if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+ Value *VecSrc = CI.getOperand(2);
+ Value *Mask = CI.getOperand(3);
+ Res = EmitX86Select(Builder, Mask, Res, VecSrc);
+ }
+ return Res;
+}
+
+static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
+ bool IsShiftRight, bool ZeroMask) {
+ Type *Ty = CI.getType();
+ Value *Op0 = CI.getArgOperand(0);
+ Value *Op1 = CI.getArgOperand(1);
+ Value *Amt = CI.getArgOperand(2);
+
+ if (IsShiftRight)
+ std::swap(Op0, Op1);
+
+ // Amount may be scalar immediate, in which case create a splat vector.
+ // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
+ // we only care about the lowest log2 bits anyway.
+ if (Amt->getType() != Ty) {
+ unsigned NumElts = Ty->getVectorNumElements();
+ Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
+ Amt = Builder.CreateVectorSplat(NumElts, Amt);
+ }
+
+ Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
+ Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
+ Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
+
+ unsigned NumArgs = CI.getNumArgOperands();
+ if (NumArgs >= 4) { // For masked intrinsics.
+ Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
+ ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
+ CI.getArgOperand(0);
+ Value *Mask = CI.getOperand(NumArgs - 1);
+ Res = EmitX86Select(Builder, Mask, Res, VecSrc);
+ }
+ return Res;
+}
+
static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
Value *Ptr, Value *Data, Value *Mask,
bool Aligned) {
@@ -1265,106 +1453,13 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_dbpsadbw_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("vpshld.")) {
- if (VecWidth == 128 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshld_q_128;
- else if (VecWidth == 128 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshld_d_128;
- else if (VecWidth == 128 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshld_w_128;
- else if (VecWidth == 256 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshld_q_256;
- else if (VecWidth == 256 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshld_d_256;
- else if (VecWidth == 256 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshld_w_256;
- else if (VecWidth == 512 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshld_q_512;
- else if (VecWidth == 512 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshld_d_512;
- else if (VecWidth == 512 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshld_w_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("vpshrd.")) {
- if (VecWidth == 128 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshrd_q_128;
- else if (VecWidth == 128 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshrd_d_128;
- else if (VecWidth == 128 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshrd_w_128;
- else if (VecWidth == 256 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshrd_q_256;
- else if (VecWidth == 256 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshrd_d_256;
- else if (VecWidth == 256 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshrd_w_256;
- else if (VecWidth == 512 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshrd_q_512;
- else if (VecWidth == 512 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshrd_d_512;
- else if (VecWidth == 512 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshrd_w_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("prorv.")) {
- if (VecWidth == 128 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prorv_d_128;
- else if (VecWidth == 256 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prorv_d_256;
- else if (VecWidth == 512 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prorv_d_512;
- else if (VecWidth == 128 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prorv_q_128;
- else if (VecWidth == 256 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prorv_q_256;
- else if (VecWidth == 512 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prorv_q_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("prolv.")) {
- if (VecWidth == 128 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prolv_d_128;
- else if (VecWidth == 256 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prolv_d_256;
- else if (VecWidth == 512 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prolv_d_512;
- else if (VecWidth == 128 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prolv_q_128;
- else if (VecWidth == 256 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prolv_q_256;
- else if (VecWidth == 512 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prolv_q_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pror.")) {
- if (VecWidth == 128 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_pror_d_128;
- else if (VecWidth == 256 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_pror_d_256;
- else if (VecWidth == 512 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_pror_d_512;
- else if (VecWidth == 128 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_pror_q_128;
- else if (VecWidth == 256 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_pror_q_256;
- else if (VecWidth == 512 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_pror_q_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("prol.")) {
- if (VecWidth == 128 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prol_d_128;
- else if (VecWidth == 256 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prol_d_256;
- else if (VecWidth == 512 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_prol_d_512;
- else if (VecWidth == 128 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prol_q_128;
- else if (VecWidth == 256 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prol_q_256;
- else if (VecWidth == 512 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_prol_q_512;
+ } else if (Name.startswith("pmultishift.qb.")) {
+ if (VecWidth == 128)
+ IID = Intrinsic::x86_avx512_pmultishift_qb_128;
+ else if (VecWidth == 256)
+ IID = Intrinsic::x86_avx512_pmultishift_qb_256;
+ else if (VecWidth == 512)
+ IID = Intrinsic::x86_avx512_pmultishift_qb_512;
else
llvm_unreachable("Unexpected intrinsic");
} else
@@ -1654,46 +1749,44 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
C = ConstantInt::getNullValue(Builder.getInt16Ty());
Rep = Builder.CreateICmpEQ(Rep, C);
Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
- } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
- Type *I32Ty = Type::getInt32Ty(C);
- Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
- ConstantInt::get(I32Ty, 0));
- Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
- ConstantInt::get(I32Ty, 0));
- Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
- Builder.CreateFAdd(Elt0, Elt1),
- ConstantInt::get(I32Ty, 0));
- } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
+ } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
+ Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
+ Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
+ Name == "sse.div.ss" || Name == "sse2.div.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
ConstantInt::get(I32Ty, 0));
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
ConstantInt::get(I32Ty, 0));
- Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
- Builder.CreateFSub(Elt0, Elt1),
- ConstantInt::get(I32Ty, 0));
- } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
- Type *I32Ty = Type::getInt32Ty(C);
- Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
- ConstantInt::get(I32Ty, 0));
- Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
- ConstantInt::get(I32Ty, 0));
- Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
- Builder.CreateFMul(Elt0, Elt1),
- ConstantInt::get(I32Ty, 0));
- } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
- Type *I32Ty = Type::getInt32Ty(C);
- Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
- ConstantInt::get(I32Ty, 0));
- Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
- ConstantInt::get(I32Ty, 0));
- Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
- Builder.CreateFDiv(Elt0, Elt1),
+ Value *EltOp;
+ if (Name.contains(".add."))
+ EltOp = Builder.CreateFAdd(Elt0, Elt1);
+ else if (Name.contains(".sub."))
+ EltOp = Builder.CreateFSub(Elt0, Elt1);
+ else if (Name.contains(".mul."))
+ EltOp = Builder.CreateFMul(Elt0, Elt1);
+ else
+ EltOp = Builder.CreateFDiv(Elt0, Elt1);
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
ConstantInt::get(I32Ty, 0));
} else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
bool CmpEq = Name[16] == 'e';
Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
+ } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
+ Type *OpTy = CI->getArgOperand(0)->getType();
+ unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+ Intrinsic::ID IID;
+ switch (VecWidth) {
+ default: llvm_unreachable("Unexpected intrinsic");
+ case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
+ case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
+ case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
+ }
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getOperand(0), CI->getArgOperand(1) });
+ Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
Type *OpTy = CI->getArgOperand(0)->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
@@ -1948,6 +2041,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
Rep = Builder.CreateOr(Sel0, Sel1);
+ } else if (IsX86 && (Name.startswith("xop.vprot") ||
+ Name.startswith("avx512.prol") ||
+ Name.startswith("avx512.mask.prol"))) {
+ Rep = upgradeX86Rotate(Builder, *CI, false);
+ } else if (IsX86 && (Name.startswith("avx512.pror") ||
+ Name.startswith("avx512.mask.pror"))) {
+ Rep = upgradeX86Rotate(Builder, *CI, true);
+ } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
+ Name.startswith("avx512.mask.vpshld") ||
+ Name.startswith("avx512.maskz.vpshld"))) {
+ bool ZeroMask = Name[11] == 'z';
+ Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
+ } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
+ Name.startswith("avx512.mask.vpshrd") ||
+ Name.startswith("avx512.maskz.vpshrd"))) {
+ bool ZeroMask = Name[11] == 'z';
+ Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
} else if (IsX86 && Name == "sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_sse42_crc32_32_8);
@@ -2059,6 +2169,24 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
+ } else if (IsX86 && (Name.startswith("sse2.padds.") ||
+ Name.startswith("sse2.psubs.") ||
+ Name.startswith("avx2.padds.") ||
+ Name.startswith("avx2.psubs.") ||
+ Name.startswith("avx512.padds.") ||
+ Name.startswith("avx512.psubs.") ||
+ Name.startswith("avx512.mask.padds.") ||
+ Name.startswith("avx512.mask.psubs."))) {
+ bool IsAdd = Name.contains(".padds");
+ Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
+ } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
+ Name.startswith("sse2.psubus.") ||
+ Name.startswith("avx2.paddus.") ||
+ Name.startswith("avx2.psubus.") ||
+ Name.startswith("avx512.mask.paddus.") ||
+ Name.startswith("avx512.mask.psubus."))) {
+ bool IsAdd = Name.contains(".paddus");
+ Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
@@ -2376,24 +2504,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
- Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
- Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
- CI->getArgOperand(1));
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
- Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
- Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
+ Name.startswith("avx512.mask.pand."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2401,7 +2513,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
+ Name.startswith("avx512.mask.pandn."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
@@ -2410,7 +2523,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
+ Name.startswith("avx512.mask.por."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2418,7 +2532,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
+ Name.startswith("avx512.mask.pxor."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2502,26 +2617,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
+ } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
+ Name.startswith("avx512.mask.min.p")) &&
Name.drop_front(18) == ".512") {
- Intrinsic::ID IID;
- if (Name[17] == 's')
- IID = Intrinsic::x86_avx512_max_ps_512;
- else
- IID = Intrinsic::x86_avx512_max_pd_512;
-
- Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
- { CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(4) });
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
- Name.drop_front(18) == ".512") {
- Intrinsic::ID IID;
- if (Name[17] == 's')
- IID = Intrinsic::x86_avx512_min_ps_512;
- else
- IID = Intrinsic::x86_avx512_min_pd_512;
+ bool IsDouble = Name[17] == 'd';
+ bool IsMin = Name[13] == 'i';
+ static const Intrinsic::ID MinMaxTbl[2][2] = {
+ { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
+ { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
+ };
+ Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
@@ -3065,62 +3170,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.maskz.vpermt2var."))) {
bool ZeroMask = Name[11] == 'z';
bool IndexForm = Name[17] == 'i';
- unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
- unsigned EltWidth = CI->getType()->getScalarSizeInBits();
- bool IsFloat = CI->getType()->isFPOrFPVectorTy();
- Intrinsic::ID IID;
- if (VecWidth == 128 && EltWidth == 32 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
- else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_d_128;
- else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
- else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_q_128;
- else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
- else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_d_256;
- else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
- else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_q_256;
- else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
- else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_d_512;
- else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
- else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_q_512;
- else if (VecWidth == 128 && EltWidth == 16)
- IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
- else if (VecWidth == 256 && EltWidth == 16)
- IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
- else if (VecWidth == 512 && EltWidth == 16)
- IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
- else if (VecWidth == 128 && EltWidth == 8)
- IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
- else if (VecWidth == 256 && EltWidth == 8)
- IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
- else if (VecWidth == 512 && EltWidth == 8)
- IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
- else
- llvm_unreachable("Unexpected intrinsic");
-
- Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
- CI->getArgOperand(2) };
-
- // If this isn't index form we need to swap operand 0 and 1.
- if (!IndexForm)
- std::swap(Args[0], Args[1]);
-
- Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
- Args);
- Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
- : Builder.CreateBitCast(CI->getArgOperand(1),
- CI->getType());
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
+ Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
} else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
Name.startswith("avx512.maskz.vpdpbusd.") ||
Name.startswith("avx512.mask.vpdpbusds.") ||
@@ -3181,6 +3231,39 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
+ } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
+ Name == "addcarry.u32" || Name == "addcarry.u64" ||
+ Name == "subborrow.u32" || Name == "subborrow.u64")) {
+ Intrinsic::ID IID;
+ if (Name[0] == 'a' && Name.back() == '2')
+ IID = Intrinsic::x86_addcarry_32;
+ else if (Name[0] == 'a' && Name.back() == '4')
+ IID = Intrinsic::x86_addcarry_64;
+ else if (Name[0] == 's' && Name.back() == '2')
+ IID = Intrinsic::x86_subborrow_32;
+ else if (Name[0] == 's' && Name.back() == '4')
+ IID = Intrinsic::x86_subborrow_64;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ // Make a call with 3 operands.
+ Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2)};
+ Value *NewCall = Builder.CreateCall(
+ Intrinsic::getDeclaration(CI->getModule(), IID),
+ Args);
+
+ // Extract the second result and store it.
+ Value *Data = Builder.CreateExtractValue(NewCall, 1);
+ // Cast the pointer to the right type.
+ Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
+ llvm::PointerType::getUnqual(Data->getType()));
+ Builder.CreateAlignedStore(Data, Ptr, 1);
+ // Replace the original call result with the first result of the new call.
+ Value *CF = Builder.CreateExtractValue(NewCall, 0);
+
+ CI->replaceAllUsesWith(CF);
+ Rep = nullptr;
} else if (IsX86 && Name.startswith("avx512.mask.") &&
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
// Rep will be updated by the call in the condition.
@@ -3356,6 +3439,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
break;
}
+ case Intrinsic::x86_rdtscp: {
+ // This used to take 1 arguments. If we have no arguments, it is already
+ // upgraded.
+ if (CI->getNumOperands() == 0)
+ return;
+
+ NewCall = Builder.CreateCall(NewFn);
+ // Extract the second result and store it.
+ Value *Data = Builder.CreateExtractValue(NewCall, 1);
+ // Cast the pointer to the right type.
+ Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
+ llvm::PointerType::getUnqual(Data->getType()));
+ Builder.CreateAlignedStore(Data, Ptr, 1);
+ // Replace the original call result with the first result of the new call.
+ Value *TSC = Builder.CreateExtractValue(NewCall, 0);
+
+ std::string Name = CI->getName();
+ if (!Name.empty()) {
+ CI->setName(Name + ".old");
+ NewCall->setName(Name);
+ }
+ CI->replaceAllUsesWith(TSC);
+ CI->eraseFromParent();
+ return;
+ }
+
case Intrinsic::x86_sse41_insertps:
case Intrinsic::x86_sse41_dppd:
case Intrinsic::x86_sse41_dpps:
diff --git a/contrib/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm/lib/IR/BasicBlock.cpp
index 7c3e5862d1cd..375924360dda 100644
--- a/contrib/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm/lib/IR/BasicBlock.cpp
@@ -135,9 +135,10 @@ const Module *BasicBlock::getModule() const {
return getParent()->getParent();
}
-const TerminatorInst *BasicBlock::getTerminator() const {
- if (InstList.empty()) return nullptr;
- return dyn_cast<TerminatorInst>(&InstList.back());
+const Instruction *BasicBlock::getTerminator() const {
+ if (InstList.empty() || !InstList.back().isTerminator())
+ return nullptr;
+ return &InstList.back();
}
const CallInst *BasicBlock::getTerminatingMustTailCall() const {
@@ -205,10 +206,8 @@ const Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() const {
if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I))
continue;
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end)
- continue;
+ if (I.isLifetimeStartOrEnd())
+ continue;
return &I;
}
@@ -259,6 +258,14 @@ const BasicBlock *BasicBlock::getUniquePredecessor() const {
return PredBB;
}
+bool BasicBlock::hasNPredecessors(unsigned N) const {
+ return hasNItems(pred_begin(this), pred_end(this), N);
+}
+
+bool BasicBlock::hasNPredecessorsOrMore(unsigned N) const {
+ return hasNItemsOrMore(pred_begin(this), pred_end(this), N);
+}
+
const BasicBlock *BasicBlock::getSingleSuccessor() const {
succ_const_iterator SI = succ_begin(this), E = succ_end(this);
if (SI == E) return nullptr; // no successors
@@ -384,7 +391,7 @@ bool BasicBlock::isLegalToHoistInto() const {
assert(Term->getNumSuccessors() > 0);
// Instructions should not be hoisted across exception handling boundaries.
- return !Term->isExceptional();
+ return !Term->isExceptionalTerminator();
}
/// This splits a basic block into two at the specified
@@ -437,12 +444,12 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
}
void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
- TerminatorInst *TI = getTerminator();
+ Instruction *TI = getTerminator();
if (!TI)
// Cope with being called on a BasicBlock that doesn't have a terminator
// yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
return;
- for (BasicBlock *Succ : TI->successors()) {
+ for (BasicBlock *Succ : successors(TI)) {
// N.B. Succ might not be a complete BasicBlock, so don't assume
// that it ends with a non-phi instruction.
for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
@@ -468,7 +475,7 @@ const LandingPadInst *BasicBlock::getLandingPadInst() const {
}
Optional<uint64_t> BasicBlock::getIrrLoopHeaderWeight() const {
- const TerminatorInst *TI = getTerminator();
+ const Instruction *TI = getTerminator();
if (MDNode *MDIrrLoopHeader =
TI->getMetadata(LLVMContext::MD_irr_loop)) {
MDString *MDName = cast<MDString>(MDIrrLoopHeader->getOperand(0));
diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp
index 90a8366d1696..57de6b042303 100644
--- a/contrib/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm/lib/IR/ConstantFold.cpp
@@ -916,13 +916,14 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
return ConstantVector::get(Result);
}
-
-Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
- Constant *C1, Constant *C2) {
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
+ Constant *C2) {
assert(Instruction::isBinaryOp(Opcode) && "Non-binary instruction detected");
- // Handle UndefValue up front.
- if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ // Handle scalar UndefValue. Vectors are always evaluated per element.
+ bool HasScalarUndef = !C1->getType()->isVectorTy() &&
+ (isa<UndefValue>(C1) || isa<UndefValue>(C2));
+ if (HasScalarUndef) {
switch (static_cast<Instruction::BinaryOps>(Opcode)) {
case Instruction::Xor:
if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
@@ -1024,9 +1025,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
}
}
- // At this point neither constant should be an UndefValue.
- assert(!isa<UndefValue>(C1) && !isa<UndefValue>(C2) &&
- "Unexpected UndefValue");
+ // Neither constant should be UndefValue, unless these are vector constants.
+ assert(!HasScalarUndef && "Unexpected UndefValue");
// Handle simplifications when the RHS is a constant int.
if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
@@ -1218,7 +1218,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
}
}
} else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
- // Perform elementwise folding.
+ // Fold each element and create a vector constant from those constants.
SmallVector<Constant*, 16> Result;
Type *Ty = IntegerType::get(VTy->getContext(), 32);
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
@@ -2052,7 +2052,7 @@ static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
static bool isIndexInRangeOfArrayType(uint64_t NumElements,
const ConstantInt *CI) {
// We cannot bounds check the index if it doesn't fit in an int64_t.
- if (CI->getValue().getActiveBits() > 64)
+ if (CI->getValue().getMinSignedBits() > 64)
return false;
// A negative index or an index past the end of our sequential type is
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index 2351e7e4a389..d36967fdcfe1 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -184,18 +184,15 @@ bool Constant::isNotMinSignedValue() const {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
return !CFP->getValueAPF().bitcastToAPInt().isMinSignedValue();
- // Check for constant vectors which are splats of INT_MIN values.
- if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
- if (Constant *Splat = CV->getSplatValue())
- return Splat->isNotMinSignedValue();
-
- // Check for constant vectors which are splats of INT_MIN values.
- if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) {
- if (CV->isSplat()) {
- if (CV->getElementType()->isFloatingPointTy())
- return !CV->getElementAsAPFloat(0).bitcastToAPInt().isMinSignedValue();
- return !CV->getElementAsAPInt(0).isMinSignedValue();
+ // Check that vectors don't contain INT_MIN
+ if (this->getType()->isVectorTy()) {
+ unsigned NumElts = this->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Elt = this->getAggregateElement(i);
+ if (!Elt || !Elt->isNotMinSignedValue())
+ return false;
}
+ return true;
}
// It *may* contain INT_MIN, we can't tell.
@@ -353,8 +350,12 @@ Constant *Constant::getAggregateElement(unsigned Elt) const {
Constant *Constant::getAggregateElement(Constant *Elt) const {
assert(isa<IntegerType>(Elt->getType()) && "Index must be an integer");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt)) {
+ // Check if the constant fits into an uint64_t.
+ if (CI->getValue().getActiveBits() > 64)
+ return nullptr;
return getAggregateElement(CI->getZExtValue());
+ }
return nullptr;
}
@@ -722,9 +723,9 @@ Constant *ConstantFP::get(Type *Ty, StringRef Str) {
return C;
}
-Constant *ConstantFP::getNaN(Type *Ty, bool Negative, unsigned Type) {
+Constant *ConstantFP::getNaN(Type *Ty, bool Negative, uint64_t Payload) {
const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
- APFloat NaN = APFloat::getNaN(Semantics, Negative, Type);
+ APFloat NaN = APFloat::getNaN(Semantics, Negative, Payload);
Constant *C = get(Ty->getContext(), NaN);
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
@@ -733,6 +734,28 @@ Constant *ConstantFP::getNaN(Type *Ty, bool Negative, unsigned Type) {
return C;
}
+Constant *ConstantFP::getQNaN(Type *Ty, bool Negative, APInt *Payload) {
+ const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+ APFloat NaN = APFloat::getQNaN(Semantics, Negative, Payload);
+ Constant *C = get(Ty->getContext(), NaN);
+
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+ return C;
+}
+
+Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) {
+ const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+ APFloat NaN = APFloat::getSNaN(Semantics, Negative, Payload);
+ Constant *C = get(Ty->getContext(), NaN);
+
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+ return C;
+}
+
Constant *ConstantFP::getNegativeZero(Type *Ty) {
const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
APFloat NegZero = APFloat::getZero(Semantics, /*Negative=*/true);
@@ -940,7 +963,7 @@ ConstantAggregate::ConstantAggregate(CompositeType *T, ValueTy VT,
ArrayRef<Constant *> V)
: Constant(T, VT, OperandTraits<ConstantAggregate>::op_end(this) - V.size(),
V.size()) {
- std::copy(V.begin(), V.end(), op_begin());
+ llvm::copy(V, op_begin());
// Check that types match, unless this is an opaque struct.
if (auto *ST = dyn_cast<StructType>(T))
@@ -1780,6 +1803,36 @@ Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced);
}
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C, unsigned Flags,
+ Type *OnlyIfReducedTy) {
+ // Check the operands for consistency first.
+ assert(Instruction::isUnaryOp(Opcode) &&
+ "Invalid opcode in unary constant expression");
+
+#ifndef NDEBUG
+ switch (Opcode) {
+ case Instruction::FNeg:
+ assert(C->getType()->isFPOrFPVectorTy() &&
+ "Tried to create a floating-point operation on a "
+ "non-floating-point type!");
+ break;
+ default:
+ break;
+ }
+#endif
+
+ // TODO: Try to constant fold operation.
+
+ if (OnlyIfReducedTy == C->getType())
+ return nullptr;
+
+ Constant *ArgVec[] = { C };
+ ConstantExprKeyType Key(Opcode, ArgVec, 0, Flags);
+
+ LLVMContextImpl *pImpl = C->getContext().pImpl;
+ return pImpl->ExprConstants.getOrCreate(C->getType(), Key);
+}
+
Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
unsigned Flags, Type *OnlyIfReducedTy) {
// Check the operands for consistency first.
@@ -1946,9 +1999,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
if (!Ty)
Ty = cast<PointerType>(C->getType()->getScalarType())->getElementType();
else
- assert(
- Ty ==
- cast<PointerType>(C->getType()->getScalarType())->getContainedType(0u));
+ assert(Ty ==
+ cast<PointerType>(C->getType()->getScalarType())->getElementType());
if (Constant *FC =
ConstantFoldGetElementPtr(Ty, C, InBounds, InRangeIndex, Idxs))
diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h
index e9f31e4ded68..eac171397084 100644
--- a/contrib/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm/lib/IR/ConstantsContext.h
@@ -529,7 +529,9 @@ struct ConstantExprKeyType {
ConstantExpr *create(TypeClass *Ty) const {
switch (Opcode) {
default:
- if (Instruction::isCast(Opcode))
+ if (Instruction::isCast(Opcode) ||
+ (Opcode >= Instruction::UnaryOpsBegin &&
+ Opcode < Instruction::UnaryOpsEnd))
return new UnaryConstantExpr(Opcode, Ops[0], Ty);
if ((Opcode >= Instruction::BinaryOpsBegin &&
Opcode < Instruction::BinaryOpsEnd))
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index bea4dee15c13..815797f4b7ea 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -15,8 +15,8 @@
#include "llvm-c/Core.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
@@ -107,6 +107,14 @@ void LLVMContextSetYieldCallback(LLVMContextRef C, LLVMYieldCallback Callback,
unwrap(C)->setYieldCallback(YieldCallback, OpaqueHandle);
}
+LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C) {
+ return unwrap(C)->shouldDiscardValueNames();
+}
+
+void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard) {
+ unwrap(C)->setDiscardValueNames(Discard);
+}
+
void LLVMContextDispose(LLVMContextRef C) {
delete unwrap(C);
}
@@ -706,6 +714,10 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
return unwrap<StructType>(StructTy)->isOpaque();
}
+LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->isLiteral();
+}
+
LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
return wrap(unwrap(M)->getTypeByName(Name));
}
@@ -868,6 +880,38 @@ void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef Val) {
unwrap<Instruction>(Inst)->setMetadata(KindID, N);
}
+struct LLVMOpaqueValueMetadataEntry {
+ unsigned Kind;
+ LLVMMetadataRef Metadata;
+};
+
+using MetadataEntries = SmallVectorImpl<std::pair<unsigned, MDNode *>>;
+static LLVMValueMetadataEntry *
+llvm_getMetadata(size_t *NumEntries,
+ llvm::function_ref<void(MetadataEntries &)> AccessMD) {
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MVEs;
+ AccessMD(MVEs);
+
+ LLVMOpaqueValueMetadataEntry *Result =
+ static_cast<LLVMOpaqueValueMetadataEntry *>(
+ safe_malloc(MVEs.size() * sizeof(LLVMOpaqueValueMetadataEntry)));
+ for (unsigned i = 0; i < MVEs.size(); ++i) {
+ const auto &ModuleFlag = MVEs[i];
+ Result[i].Kind = ModuleFlag.first;
+ Result[i].Metadata = wrap(ModuleFlag.second);
+ }
+ *NumEntries = MVEs.size();
+ return Result;
+}
+
+LLVMValueMetadataEntry *
+LLVMInstructionGetAllMetadataOtherThanDebugLoc(LLVMValueRef Value,
+ size_t *NumEntries) {
+ return llvm_getMetadata(NumEntries, [&Value](MetadataEntries &Entries) {
+ unwrap<Instruction>(Value)->getAllMetadata(Entries);
+ });
+}
+
/*--.. Conversion functions ................................................--*/
#define LLVM_DEFINE_VALUE_CAST(name) \
@@ -1065,6 +1109,54 @@ unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V) {
return cast<MDNode>(MD->getMetadata())->getNumOperands();
}
+LLVMNamedMDNodeRef LLVMGetFirstNamedMetadata(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::named_metadata_iterator I = Mod->named_metadata_begin();
+ if (I == Mod->named_metadata_end())
+ return nullptr;
+ return wrap(&*I);
+}
+
+LLVMNamedMDNodeRef LLVMGetLastNamedMetadata(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::named_metadata_iterator I = Mod->named_metadata_end();
+ if (I == Mod->named_metadata_begin())
+ return nullptr;
+ return wrap(&*--I);
+}
+
+LLVMNamedMDNodeRef LLVMGetNextNamedMetadata(LLVMNamedMDNodeRef NMD) {
+ NamedMDNode *NamedNode = unwrap<NamedMDNode>(NMD);
+ Module::named_metadata_iterator I(NamedNode);
+ if (++I == NamedNode->getParent()->named_metadata_end())
+ return nullptr;
+ return wrap(&*I);
+}
+
+LLVMNamedMDNodeRef LLVMGetPreviousNamedMetadata(LLVMNamedMDNodeRef NMD) {
+ NamedMDNode *NamedNode = unwrap<NamedMDNode>(NMD);
+ Module::named_metadata_iterator I(NamedNode);
+ if (I == NamedNode->getParent()->named_metadata_begin())
+ return nullptr;
+ return wrap(&*--I);
+}
+
+LLVMNamedMDNodeRef LLVMGetNamedMetadata(LLVMModuleRef M,
+ const char *Name, size_t NameLen) {
+ return wrap(unwrap(M)->getNamedMetadata(StringRef(Name, NameLen)));
+}
+
+LLVMNamedMDNodeRef LLVMGetOrInsertNamedMetadata(LLVMModuleRef M,
+ const char *Name, size_t NameLen) {
+ return wrap(unwrap(M)->getOrInsertNamedMetadata({Name, NameLen}));
+}
+
+const char *LLVMGetNamedMetadataName(LLVMNamedMDNodeRef NMD, size_t *NameLen) {
+ NamedMDNode *NamedNode = unwrap<NamedMDNode>(NMD);
+ *NameLen = NamedNode->getName().size();
+ return NamedNode->getName().data();
+}
+
void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest) {
auto *MD = cast<MetadataAsValue>(unwrap(V));
if (auto *MDV = dyn_cast<ValueAsMetadata>(MD->getMetadata())) {
@@ -1105,6 +1197,78 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name,
N->addOperand(extractMDNode(unwrap<MetadataAsValue>(Val)));
}
+const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length) {
+ if (!Length) return nullptr;
+ StringRef S;
+ if (const auto *I = unwrap<Instruction>(Val)) {
+ S = I->getDebugLoc()->getDirectory();
+ } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+ SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+ GV->getDebugInfo(GVEs);
+ if (GVEs.size())
+ if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
+ S = DGV->getDirectory();
+ } else if (const auto *F = unwrap<Function>(Val)) {
+ if (const DISubprogram *DSP = F->getSubprogram())
+ S = DSP->getDirectory();
+ } else {
+ assert(0 && "Expected Instruction, GlobalVariable or Function");
+ return nullptr;
+ }
+ *Length = S.size();
+ return S.data();
+}
+
+const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length) {
+ if (!Length) return nullptr;
+ StringRef S;
+ if (const auto *I = unwrap<Instruction>(Val)) {
+ S = I->getDebugLoc()->getFilename();
+ } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+ SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+ GV->getDebugInfo(GVEs);
+ if (GVEs.size())
+ if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
+ S = DGV->getFilename();
+ } else if (const auto *F = unwrap<Function>(Val)) {
+ if (const DISubprogram *DSP = F->getSubprogram())
+ S = DSP->getFilename();
+ } else {
+ assert(0 && "Expected Instruction, GlobalVariable or Function");
+ return nullptr;
+ }
+ *Length = S.size();
+ return S.data();
+}
+
+unsigned LLVMGetDebugLocLine(LLVMValueRef Val) {
+ unsigned L = 0;
+ if (const auto *I = unwrap<Instruction>(Val)) {
+ L = I->getDebugLoc()->getLine();
+ } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+ SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+ GV->getDebugInfo(GVEs);
+ if (GVEs.size())
+ if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
+ L = DGV->getLine();
+ } else if (const auto *F = unwrap<Function>(Val)) {
+ if (const DISubprogram *DSP = F->getSubprogram())
+ L = DSP->getLine();
+ } else {
+ assert(0 && "Expected Instruction, GlobalVariable or Function");
+ return -1;
+ }
+ return L;
+}
+
+unsigned LLVMGetDebugLocColumn(LLVMValueRef Val) {
+ unsigned C = 0;
+ if (const auto *I = unwrap<Instruction>(Val))
+ if (const auto &L = I->getDebugLoc())
+ C = L->getColumn();
+ return C;
+}
+
/*--.. Operations on scalar constants ......................................--*/
LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -1453,17 +1617,21 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices, unsigned NumIndices) {
ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
NumIndices);
- return wrap(ConstantExpr::getGetElementPtr(
- nullptr, unwrap<Constant>(ConstantVal), IdxList));
+ Constant *Val = unwrap<Constant>(ConstantVal);
+ Type *Ty =
+ cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+ return wrap(ConstantExpr::getGetElementPtr(Ty, Val, IdxList));
}
LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices,
unsigned NumIndices) {
- Constant* Val = unwrap<Constant>(ConstantVal);
ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
NumIndices);
- return wrap(ConstantExpr::getInBoundsGetElementPtr(nullptr, Val, IdxList));
+ Constant *Val = unwrap<Constant>(ConstantVal);
+ Type *Ty =
+ cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+ return wrap(ConstantExpr::getInBoundsGetElementPtr(Ty, Val, IdxList));
}
LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
@@ -1792,6 +1960,10 @@ void LLVMSetUnnamedAddr(LLVMValueRef Global, LLVMBool HasUnnamedAddr) {
: GlobalValue::UnnamedAddr::None);
}
+LLVMTypeRef LLVMGlobalGetValueType(LLVMValueRef Global) {
+ return wrap(unwrap<GlobalValue>(Global)->getValueType());
+}
+
/*--.. Operations on global variables, load and store instructions .........--*/
unsigned LLVMGetAlignment(LLVMValueRef V) {
@@ -1824,6 +1996,49 @@ void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) {
"only GlobalValue, AllocaInst, LoadInst and StoreInst have alignment");
}
+LLVMValueMetadataEntry *LLVMGlobalCopyAllMetadata(LLVMValueRef Value,
+ size_t *NumEntries) {
+ return llvm_getMetadata(NumEntries, [&Value](MetadataEntries &Entries) {
+ if (Instruction *Instr = dyn_cast<Instruction>(unwrap(Value))) {
+ Instr->getAllMetadata(Entries);
+ } else {
+ unwrap<GlobalObject>(Value)->getAllMetadata(Entries);
+ }
+ });
+}
+
+unsigned LLVMValueMetadataEntriesGetKind(LLVMValueMetadataEntry *Entries,
+ unsigned Index) {
+ LLVMOpaqueValueMetadataEntry MVE =
+ static_cast<LLVMOpaqueValueMetadataEntry>(Entries[Index]);
+ return MVE.Kind;
+}
+
+LLVMMetadataRef
+LLVMValueMetadataEntriesGetMetadata(LLVMValueMetadataEntry *Entries,
+ unsigned Index) {
+ LLVMOpaqueValueMetadataEntry MVE =
+ static_cast<LLVMOpaqueValueMetadataEntry>(Entries[Index]);
+ return MVE.Metadata;
+}
+
+void LLVMDisposeValueMetadataEntries(LLVMValueMetadataEntry *Entries) {
+ free(Entries);
+}
+
+void LLVMGlobalSetMetadata(LLVMValueRef Global, unsigned Kind,
+ LLVMMetadataRef MD) {
+ unwrap<GlobalObject>(Global)->setMetadata(Kind, unwrap<MDNode>(MD));
+}
+
+void LLVMGlobalEraseMetadata(LLVMValueRef Global, unsigned Kind) {
+ unwrap<GlobalObject>(Global)->eraseMetadata(Kind);
+}
+
+void LLVMGlobalClearMetadata(LLVMValueRef Global) {
+ unwrap<GlobalObject>(Global)->clearMetadata();
+}
+
/*--.. Operations on global variables ......................................--*/
LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
@@ -2076,6 +2291,50 @@ unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
return 0;
}
+static Intrinsic::ID llvm_map_to_intrinsic_id(unsigned ID) {
+ assert(ID < llvm::Intrinsic::num_intrinsics && "Intrinsic ID out of range");
+ return llvm::Intrinsic::ID(ID);
+}
+
+LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod,
+ unsigned ID,
+ LLVMTypeRef *ParamTypes,
+ size_t ParamCount) {
+ ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+ auto IID = llvm_map_to_intrinsic_id(ID);
+ return wrap(llvm::Intrinsic::getDeclaration(unwrap(Mod), IID, Tys));
+}
+
+const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength) {
+ auto IID = llvm_map_to_intrinsic_id(ID);
+ auto Str = llvm::Intrinsic::getName(IID);
+ *NameLength = Str.size();
+ return Str.data();
+}
+
+LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID,
+ LLVMTypeRef *ParamTypes, size_t ParamCount) {
+ auto IID = llvm_map_to_intrinsic_id(ID);
+ ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+ return wrap(llvm::Intrinsic::getType(*unwrap(Ctx), IID, Tys));
+}
+
+const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
+ LLVMTypeRef *ParamTypes,
+ size_t ParamCount,
+ size_t *NameLength) {
+ auto IID = llvm_map_to_intrinsic_id(ID);
+ ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+ auto Str = llvm::Intrinsic::getName(IID, Tys);
+ *NameLength = Str.length();
+ return strdup(Str.c_str());
+}
+
+LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID) {
+ auto IID = llvm_map_to_intrinsic_id(ID);
+ return llvm::Intrinsic::isOverloaded(IID);
+}
+
unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
return unwrap<Function>(Fn)->getCallingConv();
}
@@ -2277,6 +2536,11 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
return wrap(&*--I);
}
+LLVMBasicBlockRef LLVMCreateBasicBlockInContext(LLVMContextRef C,
+ const char *Name) {
+ return wrap(llvm::BasicBlock::Create(*unwrap(C), Name));
+}
+
LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
LLVMValueRef FnRef,
const char *Name) {
@@ -2391,47 +2655,52 @@ LLVMValueRef LLVMInstructionClone(LLVMValueRef Inst) {
return nullptr;
}
+LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst) {
+ Instruction *I = dyn_cast<Instruction>(unwrap(Inst));
+ return (I && I->isTerminator()) ? wrap(I) : nullptr;
+}
+
unsigned LLVMGetNumArgOperands(LLVMValueRef Instr) {
if (FuncletPadInst *FPI = dyn_cast<FuncletPadInst>(unwrap(Instr))) {
return FPI->getNumArgOperands();
}
- return CallSite(unwrap<Instruction>(Instr)).getNumArgOperands();
+ return unwrap<CallBase>(Instr)->getNumArgOperands();
}
/*--.. Call and invoke instructions ........................................--*/
unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
- return CallSite(unwrap<Instruction>(Instr)).getCallingConv();
+ return unwrap<CallBase>(Instr)->getCallingConv();
}
void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
- return CallSite(unwrap<Instruction>(Instr))
- .setCallingConv(static_cast<CallingConv::ID>(CC));
+ return unwrap<CallBase>(Instr)->setCallingConv(
+ static_cast<CallingConv::ID>(CC));
}
void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
unsigned align) {
- CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ auto *Call = unwrap<CallBase>(Instr);
Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align);
- Call.addAttribute(index, AlignAttr);
+ Call->addAttribute(index, AlignAttr);
}
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef A) {
- CallSite(unwrap<Instruction>(C)).addAttribute(Idx, unwrap(A));
+ unwrap<CallBase>(C)->addAttribute(Idx, unwrap(A));
}
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
LLVMAttributeIndex Idx) {
- auto CS = CallSite(unwrap<Instruction>(C));
- auto AS = CS.getAttributes().getAttributes(Idx);
+ auto *Call = unwrap<CallBase>(C);
+ auto AS = Call->getAttributes().getAttributes(Idx);
return AS.getNumAttributes();
}
void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs) {
- auto CS = CallSite(unwrap<Instruction>(C));
- auto AS = CS.getAttributes().getAttributes(Idx);
+ auto *Call = unwrap<CallBase>(C);
+ auto AS = Call->getAttributes().getAttributes(Idx);
for (auto A : AS)
*Attrs++ = wrap(A);
}
@@ -2439,30 +2708,32 @@ void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
unsigned KindID) {
- return wrap(CallSite(unwrap<Instruction>(C))
- .getAttribute(Idx, (Attribute::AttrKind)KindID));
+ return wrap(
+ unwrap<CallBase>(C)->getAttribute(Idx, (Attribute::AttrKind)KindID));
}
LLVMAttributeRef LLVMGetCallSiteStringAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- return wrap(CallSite(unwrap<Instruction>(C))
- .getAttribute(Idx, StringRef(K, KLen)));
+ return wrap(unwrap<CallBase>(C)->getAttribute(Idx, StringRef(K, KLen)));
}
void LLVMRemoveCallSiteEnumAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
unsigned KindID) {
- CallSite(unwrap<Instruction>(C))
- .removeAttribute(Idx, (Attribute::AttrKind)KindID);
+ unwrap<CallBase>(C)->removeAttribute(Idx, (Attribute::AttrKind)KindID);
}
void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- CallSite(unwrap<Instruction>(C)).removeAttribute(Idx, StringRef(K, KLen));
+ unwrap<CallBase>(C)->removeAttribute(Idx, StringRef(K, KLen));
}
LLVMValueRef LLVMGetCalledValue(LLVMValueRef Instr) {
- return wrap(CallSite(unwrap<Instruction>(Instr)).getCalledValue());
+ return wrap(unwrap<CallBase>(Instr)->getCalledValue());
+}
+
+LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef Instr) {
+ return wrap(unwrap<CallBase>(Instr)->getFunctionType());
}
/*--.. Operations on call instructions (only) ..............................--*/
@@ -2506,15 +2777,15 @@ void LLVMSetUnwindDest(LLVMValueRef Invoke, LLVMBasicBlockRef B) {
/*--.. Operations on terminators ...........................................--*/
unsigned LLVMGetNumSuccessors(LLVMValueRef Term) {
- return unwrap<TerminatorInst>(Term)->getNumSuccessors();
+ return unwrap<Instruction>(Term)->getNumSuccessors();
}
LLVMBasicBlockRef LLVMGetSuccessor(LLVMValueRef Term, unsigned i) {
- return wrap(unwrap<TerminatorInst>(Term)->getSuccessor(i));
+ return wrap(unwrap<Instruction>(Term)->getSuccessor(i));
}
void LLVMSetSuccessor(LLVMValueRef Term, unsigned i, LLVMBasicBlockRef block) {
- return unwrap<TerminatorInst>(Term)->setSuccessor(i,unwrap(block));
+ return unwrap<Instruction>(Term)->setSuccessor(i, unwrap(block));
}
/*--.. Operations on branch instructions (only) ............................--*/
@@ -2584,6 +2855,8 @@ unsigned LLVMGetNumIndices(LLVMValueRef Inst) {
return EV->getNumIndices();
if (auto *IV = dyn_cast<InsertValueInst>(I))
return IV->getNumIndices();
+ if (auto *CE = dyn_cast<ConstantExpr>(I))
+ return CE->getIndices().size();
llvm_unreachable(
"LLVMGetNumIndices applies only to extractvalue and insertvalue!");
}
@@ -2594,6 +2867,8 @@ const unsigned *LLVMGetIndices(LLVMValueRef Inst) {
return EV->getIndices().data();
if (auto *IV = dyn_cast<InsertValueInst>(I))
return IV->getIndices().data();
+ if (auto *CE = dyn_cast<ConstantExpr>(I))
+ return CE->getIndices().data();
llvm_unreachable(
"LLVMGetIndices applies only to extractvalue and insertvalue!");
}
@@ -2704,9 +2979,22 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
LLVMValueRef *Args, unsigned NumArgs,
LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
const char *Name) {
- return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
- makeArrayRef(unwrap(Args), NumArgs),
- Name));
+ Value *V = unwrap(Fn);
+ FunctionType *FnT =
+ cast<FunctionType>(cast<PointerType>(V->getType())->getElementType());
+
+ return wrap(
+ unwrap(B)->CreateInvoke(FnT, unwrap(Fn), unwrap(Then), unwrap(Catch),
+ makeArrayRef(unwrap(Args), NumArgs), Name));
+}
+
+LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInvoke(
+ unwrap<FunctionType>(Ty), unwrap(Fn), unwrap(Then), unwrap(Catch),
+ makeArrayRef(unwrap(Args), NumArgs), Name));
}
LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
@@ -3021,6 +3309,30 @@ LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
}
+LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr,
+ LLVMValueRef Val, LLVMValueRef Len,
+ unsigned Align) {
+ return wrap(unwrap(B)->CreateMemSet(unwrap(Ptr), unwrap(Val), unwrap(Len), Align));
+}
+
+LLVMValueRef LLVMBuildMemCpy(LLVMBuilderRef B,
+ LLVMValueRef Dst, unsigned DstAlign,
+ LLVMValueRef Src, unsigned SrcAlign,
+ LLVMValueRef Size) {
+ return wrap(unwrap(B)->CreateMemCpy(unwrap(Dst), DstAlign,
+ unwrap(Src), SrcAlign,
+ unwrap(Size)));
+}
+
+LLVMValueRef LLVMBuildMemMove(LLVMBuilderRef B,
+ LLVMValueRef Dst, unsigned DstAlign,
+ LLVMValueRef Src, unsigned SrcAlign,
+ LLVMValueRef Size) {
+ return wrap(unwrap(B)->CreateMemMove(unwrap(Dst), DstAlign,
+ unwrap(Src), SrcAlign,
+ unwrap(Size)));
+}
+
LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
const char *Name) {
return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), nullptr, Name));
@@ -3038,7 +3350,15 @@ LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
const char *Name) {
- return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
+ Value *V = unwrap(PointerVal);
+ PointerType *Ty = cast<PointerType>(V->getType());
+
+ return wrap(unwrap(B)->CreateLoad(Ty->getElementType(), V, Name));
+}
+
+LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef PointerVal, const char *Name) {
+ return wrap(unwrap(B)->CreateLoad(unwrap(Ty), unwrap(PointerVal), Name));
}
LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val,
@@ -3093,20 +3413,50 @@ LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
LLVMValueRef *Indices, unsigned NumIndices,
const char *Name) {
ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
- return wrap(unwrap(B)->CreateGEP(nullptr, unwrap(Pointer), IdxList, Name));
+ Value *Val = unwrap(Pointer);
+ Type *Ty =
+ cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+ return wrap(unwrap(B)->CreateGEP(Ty, Val, IdxList, Name));
+}
+
+LLVMValueRef LLVMBuildGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Pointer, LLVMValueRef *Indices,
+ unsigned NumIndices, const char *Name) {
+ ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+ return wrap(unwrap(B)->CreateGEP(unwrap(Ty), unwrap(Pointer), IdxList, Name));
}
LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
LLVMValueRef *Indices, unsigned NumIndices,
const char *Name) {
ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+ Value *Val = unwrap(Pointer);
+ Type *Ty =
+ cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+ return wrap(unwrap(B)->CreateInBoundsGEP(Ty, Val, IdxList, Name));
+}
+
+LLVMValueRef LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Pointer, LLVMValueRef *Indices,
+ unsigned NumIndices, const char *Name) {
+ ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
return wrap(
- unwrap(B)->CreateInBoundsGEP(nullptr, unwrap(Pointer), IdxList, Name));
+ unwrap(B)->CreateInBoundsGEP(unwrap(Ty), unwrap(Pointer), IdxList, Name));
}
LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
unsigned Idx, const char *Name) {
- return wrap(unwrap(B)->CreateStructGEP(nullptr, unwrap(Pointer), Idx, Name));
+ Value *Val = unwrap(Pointer);
+ Type *Ty =
+ cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+ return wrap(unwrap(B)->CreateStructGEP(Ty, Val, Idx, Name));
+}
+
+LLVMValueRef LLVMBuildStructGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Pointer, unsigned Idx,
+ const char *Name) {
+ return wrap(
+ unwrap(B)->CreateStructGEP(unwrap(Ty), unwrap(Pointer), Idx, Name));
}
LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
@@ -3248,6 +3598,13 @@ LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
}
+LLVMValueRef LLVMBuildIntCast2(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, LLVMBool IsSigned,
+ const char *Name) {
+ return wrap(
+ unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), IsSigned, Name));
+}
+
LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
LLVMTypeRef DestTy, const char *Name) {
return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
@@ -3284,9 +3641,20 @@ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
LLVMValueRef *Args, unsigned NumArgs,
const char *Name) {
- return wrap(unwrap(B)->CreateCall(unwrap(Fn),
- makeArrayRef(unwrap(Args), NumArgs),
- Name));
+ Value *V = unwrap(Fn);
+ FunctionType *FnT =
+ cast<FunctionType>(cast<PointerType>(V->getType())->getElementType());
+
+ return wrap(unwrap(B)->CreateCall(FnT, unwrap(Fn),
+ makeArrayRef(unwrap(Args), NumArgs), Name));
+}
+
+LLVMValueRef LLVMBuildCall2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ const char *Name) {
+ FunctionType *FTy = unwrap<FunctionType>(Ty);
+ return wrap(unwrap(B)->CreateCall(FTy, unwrap(Fn),
+ makeArrayRef(unwrap(Args), NumArgs), Name));
}
LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
index 5c5477f4f40f..fb81634a2868 100644
--- a/contrib/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -139,7 +139,8 @@ DICompileUnit *DIBuilder::createCompileUnit(
unsigned Lang, DIFile *File, StringRef Producer, bool isOptimized,
StringRef Flags, unsigned RunTimeVer, StringRef SplitName,
DICompileUnit::DebugEmissionKind Kind, uint64_t DWOId,
- bool SplitDebugInlining, bool DebugInfoForProfiling, bool GnuPubnames) {
+ bool SplitDebugInlining, bool DebugInfoForProfiling,
+ DICompileUnit::DebugNameTableKind NameTableKind, bool RangesBaseAddress) {
assert(((Lang <= dwarf::DW_LANG_Fortran08 && Lang >= dwarf::DW_LANG_C89) ||
(Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
@@ -149,7 +150,8 @@ DICompileUnit *DIBuilder::createCompileUnit(
CUNode = DICompileUnit::getDistinct(
VMContext, Lang, File, Producer, isOptimized, Flags, RunTimeVer,
SplitName, Kind, nullptr, nullptr, nullptr, nullptr, nullptr, DWOId,
- SplitDebugInlining, DebugInfoForProfiling, GnuPubnames);
+ SplitDebugInlining, DebugInfoForProfiling, NameTableKind,
+ RangesBaseAddress);
// Create a named metadata so that it is easier to find cu in a module.
NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
@@ -256,10 +258,11 @@ DIBasicType *DIBuilder::createNullPtrType() {
}
DIBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
- unsigned Encoding) {
+ unsigned Encoding,
+ DINode::DIFlags Flags) {
assert(!Name.empty() && "Unable to create type without name");
return DIBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits,
- 0, Encoding);
+ 0, Encoding, Flags);
}
DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
@@ -345,13 +348,10 @@ static ConstantAsMetadata *getConstantOrNull(Constant *C) {
return nullptr;
}
-DIDerivedType *DIBuilder::createVariantMemberType(DIScope *Scope, StringRef Name,
- DIFile *File, unsigned LineNumber,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
- uint64_t OffsetInBits,
- Constant *Discriminant,
- DINode::DIFlags Flags, DIType *Ty) {
+DIDerivedType *DIBuilder::createVariantMemberType(
+ DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
+ uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+ Constant *Discriminant, DINode::DIFlags Flags, DIType *Ty) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(Scope), Ty,
SizeInBits, AlignInBits, OffsetInBits, None, Flags,
@@ -504,11 +504,11 @@ DISubroutineType *DIBuilder::createSubroutineType(DITypeRefArray ParameterTypes,
DICompositeType *DIBuilder::createEnumerationType(
DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
- DIType *UnderlyingType, StringRef UniqueIdentifier, bool IsFixed) {
+ DIType *UnderlyingType, StringRef UniqueIdentifier, bool IsScoped) {
auto *CTy = DICompositeType::get(
VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber,
getNonCompileUnitScope(Scope), UnderlyingType, SizeInBits, AlignInBits, 0,
- IsFixed ? DINode::FlagFixedEnum : DINode::FlagZero, Elements, 0, nullptr,
+ IsScoped ? DINode::FlagEnumClass : DINode::FlagZero, Elements, 0, nullptr,
nullptr, UniqueIdentifier);
AllEnumTypes.push_back(CTy);
trackIfUnresolved(CTy);
@@ -640,13 +640,13 @@ static void checkGlobalVariableScope(DIScope *Context) {
DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
unsigned LineNumber, DIType *Ty, bool isLocalToUnit, DIExpression *Expr,
- MDNode *Decl, uint32_t AlignInBits) {
+ MDNode *Decl, MDTuple *templateParams, uint32_t AlignInBits) {
checkGlobalVariableScope(Context);
auto *GV = DIGlobalVariable::getDistinct(
VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
LineNumber, Ty, isLocalToUnit, true, cast_or_null<DIDerivedType>(Decl),
- AlignInBits);
+ templateParams, AlignInBits);
if (!Expr)
Expr = createExpression();
auto *N = DIGlobalVariableExpression::get(VMContext, GV, Expr);
@@ -657,13 +657,13 @@ DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
unsigned LineNumber, DIType *Ty, bool isLocalToUnit, MDNode *Decl,
- uint32_t AlignInBits) {
+ MDTuple *templateParams, uint32_t AlignInBits) {
checkGlobalVariableScope(Context);
return DIGlobalVariable::getTemporary(
VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
LineNumber, Ty, isLocalToUnit, false,
- cast_or_null<DIDerivedType>(Decl), AlignInBits)
+ cast_or_null<DIDerivedType>(Decl), templateParams, AlignInBits)
.release();
}
@@ -751,18 +751,18 @@ static DISubprogram *getSubprogram(bool IsDistinct, Ts &&... Args) {
DISubprogram *DIBuilder::createFunction(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
- unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
- bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags,
- bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl,
+ unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
+ DINode::DIFlags Flags, DISubprogram::DISPFlags SPFlags,
+ DITemplateParameterArray TParams, DISubprogram *Decl,
DITypeArray ThrownTypes) {
+ bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
auto *Node = getSubprogram(
- /* IsDistinct = */ isDefinition, VMContext,
- getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty,
- isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, 0, Flags,
- isOptimized, isDefinition ? CUNode : nullptr, TParams, Decl,
+ /*IsDistinct=*/IsDefinition, VMContext, getNonCompileUnitScope(Context),
+ Name, LinkageName, File, LineNo, Ty, ScopeLine, nullptr, 0, 0, Flags,
+ SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl,
MDTuple::getTemporary(VMContext, None).release(), ThrownTypes);
- if (isDefinition)
+ if (IsDefinition)
AllSubprograms.push_back(Node);
trackIfUnresolved(Node);
return Node;
@@ -770,35 +770,37 @@ DISubprogram *DIBuilder::createFunction(
DISubprogram *DIBuilder::createTempFunctionFwdDecl(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
- unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
- bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags,
- bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl,
+ unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
+ DINode::DIFlags Flags, DISubprogram::DISPFlags SPFlags,
+ DITemplateParameterArray TParams, DISubprogram *Decl,
DITypeArray ThrownTypes) {
- return DISubprogram::getTemporary(
- VMContext, getNonCompileUnitScope(Context), Name, LinkageName,
- File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, nullptr,
- 0, 0, 0, Flags, isOptimized, isDefinition ? CUNode : nullptr,
- TParams, Decl, nullptr, ThrownTypes)
+ bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
+ return DISubprogram::getTemporary(VMContext, getNonCompileUnitScope(Context),
+ Name, LinkageName, File, LineNo, Ty,
+ ScopeLine, nullptr, 0, 0, Flags, SPFlags,
+ IsDefinition ? CUNode : nullptr, TParams,
+ Decl, nullptr, ThrownTypes)
.release();
}
DISubprogram *DIBuilder::createMethod(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
- unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
- bool isDefinition, unsigned VK, unsigned VIndex, int ThisAdjustment,
- DIType *VTableHolder, DINode::DIFlags Flags, bool isOptimized,
- DITemplateParameterArray TParams, DITypeArray ThrownTypes) {
+ unsigned LineNo, DISubroutineType *Ty, unsigned VIndex, int ThisAdjustment,
+ DIType *VTableHolder, DINode::DIFlags Flags,
+ DISubprogram::DISPFlags SPFlags, DITemplateParameterArray TParams,
+ DITypeArray ThrownTypes) {
assert(getNonCompileUnitScope(Context) &&
"Methods should have both a Context and a context that isn't "
"the compile unit.");
// FIXME: Do we want to use different scope/lines?
+ bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
auto *SP = getSubprogram(
- /* IsDistinct = */ isDefinition, VMContext, cast<DIScope>(Context), Name,
- LinkageName, F, LineNo, Ty, isLocalToUnit, isDefinition, LineNo,
- VTableHolder, VK, VIndex, ThisAdjustment, Flags, isOptimized,
- isDefinition ? CUNode : nullptr, TParams, nullptr, nullptr, ThrownTypes);
+ /*IsDistinct=*/IsDefinition, VMContext, cast<DIScope>(Context), Name,
+ LinkageName, F, LineNo, Ty, LineNo, VTableHolder, VIndex, ThisAdjustment,
+ Flags, SPFlags, IsDefinition ? CUNode : nullptr, TParams, nullptr,
+ nullptr, ThrownTypes);
- if (isDefinition)
+ if (IsDefinition)
AllSubprograms.push_back(SP);
trackIfUnresolved(SP);
return SP;
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
index 62c67127276e..63c24b5ee7af 100644
--- a/contrib/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -635,6 +635,14 @@ unsigned DataLayout::getPointerSize(unsigned AS) const {
return I->TypeByteWidth;
}
+unsigned DataLayout::getMaxPointerSize() const {
+ unsigned MaxPointerSize = 0;
+ for (auto &P : Pointers)
+ MaxPointerSize = std::max(MaxPointerSize, P.TypeByteWidth);
+
+ return MaxPointerSize;
+}
+
unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
assert(Ty->isPtrOrPtrVectorTy() &&
"This should only be called with a pointer or pointer vector type");
@@ -808,15 +816,29 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
/// global. This includes an explicitly requested alignment (if the global
/// has one).
unsigned DataLayout::getPreferredAlignment(const GlobalVariable *GV) const {
+ unsigned GVAlignment = GV->getAlignment();
+ // If a section is specified, always precisely honor explicit alignment,
+ // so we don't insert padding into a section we don't control.
+ if (GVAlignment && GV->hasSection())
+ return GVAlignment;
+
+ // If no explicit alignment is specified, compute the alignment based on
+ // the IR type. If an alignment is specified, increase it to match the ABI
+ // alignment of the IR type.
+ //
+ // FIXME: Not sure it makes sense to use the alignment of the type if
+ // there's already an explicit alignment specification.
Type *ElemType = GV->getValueType();
unsigned Alignment = getPrefTypeAlignment(ElemType);
- unsigned GVAlignment = GV->getAlignment();
if (GVAlignment >= Alignment) {
Alignment = GVAlignment;
} else if (GVAlignment != 0) {
Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
}
+ // If no explicit alignment is specified, and the global is large, increase
+ // the alignment to 16.
+ // FIXME: Why 16, specifically?
if (GV->hasInitializer() && GVAlignment == 0) {
if (Alignment < 16) {
// If the global is not external, see if it is large. If so, give it a
diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp
index 165c881c13e7..9fa31773b598 100644
--- a/contrib/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm/lib/IR/DebugInfo.cpp
@@ -280,7 +280,7 @@ bool DebugInfoFinder::addScope(DIScope *Scope) {
}
static MDNode *stripDebugLocFromLoopID(MDNode *N) {
- assert(N->op_begin() != N->op_end() && "Missing self reference?");
+ assert(!empty(N->operands()) && "Missing self reference?");
// if there is no debug location, we do not have to rewrite this MDNode.
if (std::none_of(N->op_begin() + 1, N->op_end(), [](const MDOperand &Op) {
@@ -438,11 +438,10 @@ private:
auto distinctMDSubprogram = [&]() {
return DISubprogram::getDistinct(
MDS->getContext(), FileAndScope, MDS->getName(), LinkageName,
- FileAndScope, MDS->getLine(), Type, MDS->isLocalToUnit(),
- MDS->isDefinition(), MDS->getScopeLine(), ContainingType,
- MDS->getVirtuality(), MDS->getVirtualIndex(),
- MDS->getThisAdjustment(), MDS->getFlags(), MDS->isOptimized(), Unit,
- TemplateParams, Declaration, Variables);
+ FileAndScope, MDS->getLine(), Type, MDS->getScopeLine(),
+ ContainingType, MDS->getVirtualIndex(), MDS->getThisAdjustment(),
+ MDS->getFlags(), MDS->getSPFlags(), Unit, TemplateParams, Declaration,
+ Variables);
};
if (MDS->isDistinct())
@@ -450,11 +449,9 @@ private:
auto *NewMDS = DISubprogram::get(
MDS->getContext(), FileAndScope, MDS->getName(), LinkageName,
- FileAndScope, MDS->getLine(), Type, MDS->isLocalToUnit(),
- MDS->isDefinition(), MDS->getScopeLine(), ContainingType,
- MDS->getVirtuality(), MDS->getVirtualIndex(), MDS->getThisAdjustment(),
- MDS->getFlags(), MDS->isOptimized(), Unit, TemplateParams, Declaration,
- Variables);
+ FileAndScope, MDS->getLine(), Type, MDS->getScopeLine(), ContainingType,
+ MDS->getVirtualIndex(), MDS->getThisAdjustment(), MDS->getFlags(),
+ MDS->getSPFlags(), Unit, TemplateParams, Declaration, Variables);
StringRef OldLinkageName = MDS->getLinkageName();
@@ -491,7 +488,8 @@ private:
CU->getSplitDebugFilename(), DICompileUnit::LineTablesOnly, EnumTypes,
RetainedTypes, GlobalVariables, ImportedEntities, CU->getMacros(),
CU->getDWOId(), CU->getSplitDebugInlining(),
- CU->getDebugInfoForProfiling(), CU->getGnuPubnames());
+ CU->getDebugInfoForProfiling(), CU->getNameTableKind(),
+ CU->getRangesBaseAddress());
}
DILocation *getReplacementMDLocation(DILocation *MLD) {
@@ -690,8 +688,7 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
void Instruction::applyMergedLocation(const DILocation *LocA,
const DILocation *LocB) {
- setDebugLoc(DILocation::getMergedLocation(LocA, LocB,
- DILocation::WithGeneratedLocation));
+ setDebugLoc(DILocation::getMergedLocation(LocA, LocB));
}
//===----------------------------------------------------------------------===//
@@ -700,8 +697,9 @@ void Instruction::applyMergedLocation(const DILocation *LocA,
static unsigned map_from_llvmDWARFsourcelanguage(LLVMDWARFSourceLanguage lang) {
switch (lang) {
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
-case LLVMDWARFSourceLanguage##NAME: return ID;
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
+ case LLVMDWARFSourceLanguage##NAME: \
+ return ID;
#include "llvm/BinaryFormat/Dwarf.def"
#undef HANDLE_DW_LANG
}
@@ -720,6 +718,11 @@ static LLVMDIFlags map_to_llvmDIFlags(DINode::DIFlags Flags) {
return static_cast<LLVMDIFlags>(Flags);
}
+static DISubprogram::DISPFlags
+pack_into_DISPFlags(bool IsLocalToUnit, bool IsDefinition, bool IsOptimized) {
+ return DISubprogram::toSPFlags(IsLocalToUnit, IsDefinition, IsOptimized);
+}
+
unsigned LLVMDebugMetadataVersion() {
return DEBUG_METADATA_VERSION;
}
@@ -803,9 +806,10 @@ LLVMMetadataRef LLVMDIBuilderCreateFunction(
unsigned ScopeLine, LLVMDIFlags Flags, LLVMBool IsOptimized) {
return wrap(unwrap(Builder)->createFunction(
unwrapDI<DIScope>(Scope), {Name, NameLen}, {LinkageName, LinkageNameLen},
- unwrapDI<DIFile>(File), LineNo, unwrapDI<DISubroutineType>(Ty),
- IsLocalToUnit, IsDefinition, ScopeLine, map_from_llvmDIFlags(Flags),
- IsOptimized, nullptr, nullptr, nullptr));
+ unwrapDI<DIFile>(File), LineNo, unwrapDI<DISubroutineType>(Ty), ScopeLine,
+ map_from_llvmDIFlags(Flags),
+ pack_into_DISPFlags(IsLocalToUnit, IsDefinition, IsOptimized), nullptr,
+ nullptr, nullptr));
}
@@ -948,9 +952,11 @@ LLVMDIBuilderCreateVectorType(LLVMDIBuilderRef Builder, uint64_t Size,
LLVMMetadataRef
LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef Builder, const char *Name,
size_t NameLen, uint64_t SizeInBits,
- LLVMDWARFTypeEncoding Encoding) {
+ LLVMDWARFTypeEncoding Encoding,
+ LLVMDIFlags Flags) {
return wrap(unwrap(Builder)->createBasicType({Name, NameLen},
- SizeInBits, Encoding));
+ SizeInBits, Encoding,
+ map_from_llvmDIFlags(Flags)));
}
LLVMMetadataRef LLVMDIBuilderCreatePointerType(
@@ -1219,23 +1225,16 @@ LLVMDIBuilderCreateConstantValueExpression(LLVMDIBuilderRef Builder,
return wrap(unwrap(Builder)->createConstantValueExpression(Value));
}
-LLVMMetadataRef
-LLVMDIBuilderCreateGlobalVariableExpression(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- const char *Name, size_t NameLen,
- const char *Linkage, size_t LinkLen,
- LLVMMetadataRef File,
- unsigned LineNo,
- LLVMMetadataRef Ty,
- LLVMBool LocalToUnit,
- LLVMMetadataRef Expr,
- LLVMMetadataRef Decl,
- uint32_t AlignInBits) {
+LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
+ size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File,
+ unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
+ LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits) {
return wrap(unwrap(Builder)->createGlobalVariableExpression(
- unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LinkLen},
- unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty),
- LocalToUnit, unwrap<DIExpression>(Expr),
- unwrapDI<MDNode>(Decl), AlignInBits));
+ unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LinkLen},
+ unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty), LocalToUnit,
+ unwrap<DIExpression>(Expr), unwrapDI<MDNode>(Decl),
+ nullptr, AlignInBits));
}
LLVMMetadataRef LLVMTemporaryMDNode(LLVMContextRef Ctx, LLVMMetadataRef *Data,
@@ -1255,26 +1254,21 @@ void LLVMMetadataReplaceAllUsesWith(LLVMMetadataRef TargetMetadata,
MDNode::deleteTemporary(Node);
}
-LLVMMetadataRef
-LLVMDIBuilderCreateTempGlobalVariableFwdDecl(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- const char *Name, size_t NameLen,
- const char *Linkage, size_t LnkLen,
- LLVMMetadataRef File,
- unsigned LineNo,
- LLVMMetadataRef Ty,
- LLVMBool LocalToUnit,
- LLVMMetadataRef Decl,
- uint32_t AlignInBits) {
+LLVMMetadataRef LLVMDIBuilderCreateTempGlobalVariableFwdDecl(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
+ size_t NameLen, const char *Linkage, size_t LnkLen, LLVMMetadataRef File,
+ unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
+ LLVMMetadataRef Decl, uint32_t AlignInBits) {
return wrap(unwrap(Builder)->createTempGlobalVariableFwdDecl(
- unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LnkLen},
- unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty),
- LocalToUnit, unwrapDI<MDNode>(Decl), AlignInBits));
+ unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LnkLen},
+ unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty), LocalToUnit,
+ unwrapDI<MDNode>(Decl), nullptr, AlignInBits));
}
-LLVMValueRef LLVMDIBuilderInsertDeclareBefore(
- LLVMDIBuilderRef Builder, LLVMValueRef Storage, LLVMMetadataRef VarInfo,
- LLVMMetadataRef Expr, LLVMMetadataRef DL, LLVMValueRef Instr) {
+LLVMValueRef
+LLVMDIBuilderInsertDeclareBefore(LLVMDIBuilderRef Builder, LLVMValueRef Storage,
+ LLVMMetadataRef VarInfo, LLVMMetadataRef Expr,
+ LLVMMetadataRef DL, LLVMValueRef Instr) {
return wrap(unwrap(Builder)->insertDeclare(
unwrap(Storage), unwrap<DILocalVariable>(VarInfo),
unwrap<DIExpression>(Expr), unwrap<DILocation>(DL),
@@ -1353,3 +1347,14 @@ LLVMMetadataRef LLVMGetSubprogram(LLVMValueRef Func) {
void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) {
unwrap<Function>(Func)->setSubprogram(unwrap<DISubprogram>(SP));
}
+
+LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata) {
+ switch(unwrap(Metadata)->getMetadataID()) {
+#define HANDLE_METADATA_LEAF(CLASS) \
+ case Metadata::CLASS##Kind: \
+ return (LLVMMetadataKind)LLVM##CLASS##MetadataKind;
+#include "llvm/IR/Metadata.def"
+ default:
+ return (LLVMMetadataKind)LLVMGenericDINodeMetadataKind;
+ }
+}
diff --git a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
index 910e8c2fb74f..92f3f21f754c 100644
--- a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -14,16 +14,19 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "LLVMContextImpl.h"
#include "MetadataImpl.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include <numeric>
+
using namespace llvm;
DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
- unsigned Column, ArrayRef<Metadata *> MDs)
+ unsigned Column, ArrayRef<Metadata *> MDs,
+ bool ImplicitCode)
: MDNode(C, DILocationKind, Storage, MDs) {
assert((MDs.size() == 1 || MDs.size() == 2) &&
"Expected a scope and optional inlined-at");
@@ -33,6 +36,8 @@ DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
SubclassData32 = Line;
SubclassData16 = Column;
+
+ setImplicitCode(ImplicitCode);
}
static void adjustColumn(unsigned &Column) {
@@ -43,15 +48,15 @@ static void adjustColumn(unsigned &Column) {
DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
unsigned Column, Metadata *Scope,
- Metadata *InlinedAt, StorageType Storage,
- bool ShouldCreate) {
+ Metadata *InlinedAt, bool ImplicitCode,
+ StorageType Storage, bool ShouldCreate) {
// Fixup column.
adjustColumn(Column);
if (Storage == Uniqued) {
- if (auto *N =
- getUniqued(Context.pImpl->DILocations,
- DILocationInfo::KeyTy(Line, Column, Scope, InlinedAt)))
+ if (auto *N = getUniqued(Context.pImpl->DILocations,
+ DILocationInfo::KeyTy(Line, Column, Scope,
+ InlinedAt, ImplicitCode)))
return N;
if (!ShouldCreate)
return nullptr;
@@ -63,36 +68,94 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
Ops.push_back(Scope);
if (InlinedAt)
Ops.push_back(InlinedAt);
- return storeImpl(new (Ops.size())
- DILocation(Context, Storage, Line, Column, Ops),
+ return storeImpl(new (Ops.size()) DILocation(Context, Storage, Line, Column,
+ Ops, ImplicitCode),
Storage, Context.pImpl->DILocations);
}
const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
- const DILocation *LocB,
- bool GenerateLocation) {
+ const DILocation *LocB) {
if (!LocA || !LocB)
return nullptr;
- if (LocA == LocB || !LocA->canDiscriminate(*LocB))
+ if (LocA == LocB)
return LocA;
- if (!GenerateLocation)
- return nullptr;
-
SmallPtrSet<DILocation *, 5> InlinedLocationsA;
for (DILocation *L = LocA->getInlinedAt(); L; L = L->getInlinedAt())
InlinedLocationsA.insert(L);
+ SmallSet<std::pair<DIScope *, DILocation *>, 5> Locations;
+ DIScope *S = LocA->getScope();
+ DILocation *L = LocA->getInlinedAt();
+ while (S) {
+ Locations.insert(std::make_pair(S, L));
+ S = S->getScope().resolve();
+ if (!S && L) {
+ S = L->getScope();
+ L = L->getInlinedAt();
+ }
+ }
const DILocation *Result = LocB;
- for (DILocation *L = LocB->getInlinedAt(); L; L = L->getInlinedAt()) {
- Result = L;
- if (InlinedLocationsA.count(L))
+ S = LocB->getScope();
+ L = LocB->getInlinedAt();
+ while (S) {
+ if (Locations.count(std::make_pair(S, L)))
break;
+ S = S->getScope().resolve();
+ if (!S && L) {
+ S = L->getScope();
+ L = L->getInlinedAt();
+ }
}
- return DILocation::get(Result->getContext(), 0, 0, Result->getScope(),
- Result->getInlinedAt());
+
+ // If the two locations are irreconsilable, just pick one. This is misleading,
+ // but on the other hand, it's a "line 0" location.
+ if (!S || !isa<DILocalScope>(S))
+ S = LocA->getScope();
+ return DILocation::get(Result->getContext(), 0, 0, S, L);
}
+Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI) {
+ SmallVector<unsigned, 3> Components = {BD, DF, CI};
+ uint64_t RemainingWork = 0U;
+ // We use RemainingWork to figure out if we have no remaining components to
+ // encode. For example: if BD != 0 but DF == 0 && CI == 0, we don't need to
+ // encode anything for the latter 2.
+ // Since any of the input components is at most 32 bits, their sum will be
+ // less than 34 bits, and thus RemainingWork won't overflow.
+ RemainingWork = std::accumulate(Components.begin(), Components.end(), RemainingWork);
+
+ int I = 0;
+ unsigned Ret = 0;
+ unsigned NextBitInsertionIndex = 0;
+ while (RemainingWork > 0) {
+ unsigned C = Components[I++];
+ RemainingWork -= C;
+ unsigned EC = encodeComponent(C);
+ Ret |= (EC << NextBitInsertionIndex);
+ NextBitInsertionIndex += encodingBits(C);
+ }
+
+ // Encoding may be unsuccessful because of overflow. We determine success by
+ // checking equivalence of components before & after encoding. Alternatively,
+ // we could determine Success during encoding, but the current alternative is
+ // simpler.
+ unsigned TBD, TDF, TCI = 0;
+ decodeDiscriminator(Ret, TBD, TDF, TCI);
+ if (TBD == BD && TDF == DF && TCI == CI)
+ return Ret;
+ return None;
+}
+
+void DILocation::decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
+ unsigned &CI) {
+ BD = getUnsignedFromPrefixEncoding(D);
+ DF = getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator(D));
+ CI = getUnsignedFromPrefixEncoding(
+ getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
+}
+
+
DINode::DIFlags DINode::getFlag(StringRef Flag) {
return StringSwitch<DIFlags>(Flag)
#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
@@ -274,13 +337,14 @@ DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, int64_t Value,
DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag,
MDString *Name, uint64_t SizeInBits,
uint32_t AlignInBits, unsigned Encoding,
- StorageType Storage, bool ShouldCreate) {
+ DIFlags Flags, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIBasicType,
- (Tag, Name, SizeInBits, AlignInBits, Encoding));
+ (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags));
Metadata *Ops[] = {nullptr, nullptr, Name};
- DEFINE_GETIMPL_STORE(DIBasicType, (Tag, SizeInBits, AlignInBits, Encoding),
- Ops);
+ DEFINE_GETIMPL_STORE(DIBasicType, (Tag, SizeInBits, AlignInBits, Encoding,
+ Flags), Ops);
}
Optional<DIBasicType::Signedness> DIBasicType::getSignedness() const {
@@ -449,7 +513,8 @@ DICompileUnit *DICompileUnit::getImpl(
unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros,
uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling,
- bool GnuPubnames, StorageType Storage, bool ShouldCreate) {
+ unsigned NameTableKind, bool RangesBaseAddress, StorageType Storage,
+ bool ShouldCreate) {
assert(Storage != Uniqued && "Cannot unique DICompileUnit");
assert(isCanonical(Producer) && "Expected canonical MDString");
assert(isCanonical(Flags) && "Expected canonical MDString");
@@ -462,7 +527,8 @@ DICompileUnit *DICompileUnit::getImpl(
return storeImpl(new (array_lengthof(Ops)) DICompileUnit(
Context, Storage, SourceLanguage, IsOptimized,
RuntimeVersion, EmissionKind, DWOId, SplitDebugInlining,
- DebugInfoForProfiling, GnuPubnames, Ops),
+ DebugInfoForProfiling, NameTableKind, RangesBaseAddress,
+ Ops),
Storage);
}
@@ -472,6 +538,16 @@ DICompileUnit::getEmissionKind(StringRef Str) {
.Case("NoDebug", NoDebug)
.Case("FullDebug", FullDebug)
.Case("LineTablesOnly", LineTablesOnly)
+ .Case("DebugDirectivesOnly", DebugDirectivesOnly)
+ .Default(None);
+}
+
+Optional<DICompileUnit::DebugNameTableKind>
+DICompileUnit::getNameTableKind(StringRef Str) {
+ return StringSwitch<Optional<DebugNameTableKind>>(Str)
+ .Case("Default", DebugNameTableKind::Default)
+ .Case("GNU", DebugNameTableKind::GNU)
+ .Case("None", DebugNameTableKind::None)
.Default(None);
}
@@ -480,6 +556,19 @@ const char *DICompileUnit::emissionKindString(DebugEmissionKind EK) {
case NoDebug: return "NoDebug";
case FullDebug: return "FullDebug";
case LineTablesOnly: return "LineTablesOnly";
+ case DebugDirectivesOnly: return "DebugDirectivesOnly";
+ }
+ return nullptr;
+}
+
+const char *DICompileUnit::nameTableKindString(DebugNameTableKind NTK) {
+ switch (NTK) {
+ case DebugNameTableKind::Default:
+ return nullptr;
+ case DebugNameTableKind::GNU:
+ return "GNU";
+ case DebugNameTableKind::None:
+ return "None";
}
return nullptr;
}
@@ -496,21 +585,55 @@ DILocalScope *DILocalScope::getNonLexicalBlockFileScope() const {
return const_cast<DILocalScope *>(this);
}
+DISubprogram::DISPFlags DISubprogram::getFlag(StringRef Flag) {
+ return StringSwitch<DISPFlags>(Flag)
+#define HANDLE_DISP_FLAG(ID, NAME) .Case("DISPFlag" #NAME, SPFlag##NAME)
+#include "llvm/IR/DebugInfoFlags.def"
+ .Default(SPFlagZero);
+}
+
+StringRef DISubprogram::getFlagString(DISPFlags Flag) {
+ switch (Flag) {
+ // Appease a warning.
+ case SPFlagVirtuality:
+ return "";
+#define HANDLE_DISP_FLAG(ID, NAME) \
+ case SPFlag##NAME: \
+ return "DISPFlag" #NAME;
+#include "llvm/IR/DebugInfoFlags.def"
+ }
+ return "";
+}
+
+DISubprogram::DISPFlags
+DISubprogram::splitFlags(DISPFlags Flags,
+ SmallVectorImpl<DISPFlags> &SplitFlags) {
+ // Multi-bit fields can require special handling. In our case, however, the
+ // only multi-bit field is virtuality, and all its values happen to be
+ // single-bit values, so the right behavior just falls out.
+#define HANDLE_DISP_FLAG(ID, NAME) \
+ if (DISPFlags Bit = Flags & SPFlag##NAME) { \
+ SplitFlags.push_back(Bit); \
+ Flags &= ~Bit; \
+ }
+#include "llvm/IR/DebugInfoFlags.def"
+ return Flags;
+}
+
DISubprogram *DISubprogram::getImpl(
LLVMContext &Context, Metadata *Scope, MDString *Name,
MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
- bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
- Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
- int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
+ unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex,
+ int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes,
Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
assert(isCanonical(LinkageName) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(
- DISubprogram, (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
- IsDefinition, ScopeLine, ContainingType, Virtuality,
- VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit,
- TemplateParams, Declaration, RetainedNodes, ThrownTypes));
+ DEFINE_GETIMPL_LOOKUP(DISubprogram,
+ (Scope, Name, LinkageName, File, Line, Type, ScopeLine,
+ ContainingType, VirtualIndex, ThisAdjustment, Flags,
+ SPFlags, Unit, TemplateParams, Declaration,
+ RetainedNodes, ThrownTypes));
SmallVector<Metadata *, 11> Ops = {
File, Scope, Name, LinkageName, Type, Unit,
Declaration, RetainedNodes, ContainingType, TemplateParams, ThrownTypes};
@@ -522,11 +645,10 @@ DISubprogram *DISubprogram::getImpl(
Ops.pop_back();
}
}
- DEFINE_GETIMPL_STORE_N(DISubprogram,
- (Line, ScopeLine, Virtuality, VirtualIndex,
- ThisAdjustment, Flags, IsLocalToUnit, IsDefinition,
- IsOptimized),
- Ops, Ops.size());
+ DEFINE_GETIMPL_STORE_N(
+ DISubprogram,
+ (Line, ScopeLine, VirtualIndex, ThisAdjustment, Flags, SPFlags), Ops,
+ Ops.size());
}
bool DISubprogram::describes(const Function *F) const {
@@ -609,19 +731,24 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
MDString *LinkageName, Metadata *File, unsigned Line,
Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
Metadata *StaticDataMemberDeclaration,
- uint32_t AlignInBits, StorageType Storage,
- bool ShouldCreate) {
+ Metadata *TemplateParams, uint32_t AlignInBits,
+ StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
assert(isCanonical(LinkageName) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DIGlobalVariable,
- (Scope, Name, LinkageName, File, Line, Type,
- IsLocalToUnit, IsDefinition,
- StaticDataMemberDeclaration, AlignInBits));
- Metadata *Ops[] = {
- Scope, Name, File, Type, Name, LinkageName, StaticDataMemberDeclaration};
+ DEFINE_GETIMPL_LOOKUP(DIGlobalVariable, (Scope, Name, LinkageName, File, Line,
+ Type, IsLocalToUnit, IsDefinition,
+ StaticDataMemberDeclaration,
+ TemplateParams, AlignInBits));
+ Metadata *Ops[] = {Scope,
+ Name,
+ File,
+ Type,
+ Name,
+ LinkageName,
+ StaticDataMemberDeclaration,
+ TemplateParams};
DEFINE_GETIMPL_STORE(DIGlobalVariable,
- (Line, IsLocalToUnit, IsDefinition, AlignInBits),
- Ops);
+ (Line, IsLocalToUnit, IsDefinition, AlignInBits), Ops);
}
DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope,
diff --git a/contrib/llvm/lib/IR/DebugLoc.cpp b/contrib/llvm/lib/IR/DebugLoc.cpp
index 36f3e179a2c0..10ec98ac7e6c 100644
--- a/contrib/llvm/lib/IR/DebugLoc.cpp
+++ b/contrib/llvm/lib/IR/DebugLoc.cpp
@@ -56,15 +56,28 @@ DebugLoc DebugLoc::getFnDebugLoc() const {
return DebugLoc();
}
+bool DebugLoc::isImplicitCode() const {
+ if (DILocation *Loc = get()) {
+ return Loc->isImplicitCode();
+ }
+ return true;
+}
+
+void DebugLoc::setImplicitCode(bool ImplicitCode) {
+ if (DILocation *Loc = get()) {
+ Loc->setImplicitCode(ImplicitCode);
+ }
+}
+
DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope,
- const MDNode *InlinedAt) {
+ const MDNode *InlinedAt, bool ImplicitCode) {
// If no scope is available, this is an unknown location.
if (!Scope)
return DebugLoc();
return DILocation::get(Scope->getContext(), Line, Col,
const_cast<MDNode *>(Scope),
- const_cast<MDNode *>(InlinedAt));
+ const_cast<MDNode *>(InlinedAt), ImplicitCode);
}
DebugLoc DebugLoc::appendInlinedAt(DebugLoc DL, DILocation *InlinedAt,
diff --git a/contrib/llvm/lib/IR/DiagnosticInfo.cpp b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
index 5ddb1196b072..dc957ab7dad9 100644
--- a/contrib/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
@@ -33,9 +33,10 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
#include <atomic>
#include <cassert>
#include <memory>
@@ -103,10 +104,15 @@ void DiagnosticInfoPGOProfile::print(DiagnosticPrinter &DP) const {
DP << getMsg();
}
+void DiagnosticInfo::anchor() {}
+void DiagnosticInfoStackSize::anchor() {}
+void DiagnosticInfoWithLocationBase::anchor() {}
+void DiagnosticInfoIROptimization::anchor() {}
+
DiagnosticLocation::DiagnosticLocation(const DebugLoc &DL) {
if (!DL)
return;
- Filename = DL->getFilename();
+ File = DL->getFile();
Line = DL->getLine();
Column = DL->getColumn();
}
@@ -114,17 +120,36 @@ DiagnosticLocation::DiagnosticLocation(const DebugLoc &DL) {
DiagnosticLocation::DiagnosticLocation(const DISubprogram *SP) {
if (!SP)
return;
- Filename = SP->getFilename();
+
+ File = SP->getFile();
Line = SP->getScopeLine();
Column = 0;
}
-void DiagnosticInfoWithLocationBase::getLocation(StringRef *Filename,
- unsigned *Line,
- unsigned *Column) const {
- *Filename = Loc.getFilename();
- *Line = Loc.getLine();
- *Column = Loc.getColumn();
+StringRef DiagnosticLocation::getRelativePath() const {
+ return File->getFilename();
+}
+
+std::string DiagnosticLocation::getAbsolutePath() const {
+ StringRef Name = File->getFilename();
+ if (sys::path::is_absolute(Name))
+ return Name;
+
+ SmallString<128> Path;
+ sys::path::append(Path, File->getDirectory(), Name);
+ return sys::path::remove_leading_dotslash(Path).str();
+}
+
+std::string DiagnosticInfoWithLocationBase::getAbsolutePath() const {
+ return Loc.getAbsolutePath();
+}
+
+void DiagnosticInfoWithLocationBase::getLocation(StringRef &RelativePath,
+ unsigned &Line,
+ unsigned &Column) const {
+ RelativePath = Loc.getRelativePath();
+ Line = Loc.getLine();
+ Column = Loc.getColumn();
}
const std::string DiagnosticInfoWithLocationBase::getLocationStr() const {
@@ -132,7 +157,7 @@ const std::string DiagnosticInfoWithLocationBase::getLocationStr() const {
unsigned Line = 0;
unsigned Column = 0;
if (isLocationAvailable())
- getLocation(&Filename, &Line, &Column);
+ getLocation(Filename, Line, Column);
return (Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
}
@@ -346,6 +371,9 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
return OS.str();
}
+void OptimizationRemarkAnalysisFPCommute::anchor() {}
+void OptimizationRemarkAnalysisAliasing::anchor() {}
+
namespace llvm {
namespace yaml {
@@ -399,7 +427,7 @@ template <> struct MappingTraits<DiagnosticLocation> {
static void mapping(IO &io, DiagnosticLocation &DL) {
assert(io.outputting() && "input not yet implemented");
- StringRef File = DL.getFilename();
+ StringRef File = DL.getRelativePath();
unsigned Line = DL.getLine();
unsigned Col = DL.getColumn();
diff --git a/contrib/llvm/lib/IR/DomTreeUpdater.cpp b/contrib/llvm/lib/IR/DomTreeUpdater.cpp
index f035a86eddae..b72c1b77c2ce 100644
--- a/contrib/llvm/lib/IR/DomTreeUpdater.cpp
+++ b/contrib/llvm/lib/IR/DomTreeUpdater.cpp
@@ -152,39 +152,34 @@ bool DomTreeUpdater::forceFlushDeletedBB() {
return true;
}
-bool DomTreeUpdater::recalculate(Function &F) {
- if (!DT && !PDT)
- return false;
+void DomTreeUpdater::recalculate(Function &F) {
if (Strategy == UpdateStrategy::Eager) {
if (DT)
DT->recalculate(F);
if (PDT)
PDT->recalculate(F);
- return true;
+ return;
}
+ // There is little performance gain if we pend the recalculation under
+ // Lazy UpdateStrategy so we recalculate available trees immediately.
+
// Prevent forceFlushDeletedBB() from erasing DomTree or PostDomTree nodes.
IsRecalculatingDomTree = IsRecalculatingPostDomTree = true;
// Because all trees are going to be up-to-date after recalculation,
// flush awaiting deleted BasicBlocks.
- if (forceFlushDeletedBB() || hasPendingUpdates()) {
- if (DT)
- DT->recalculate(F);
- if (PDT)
- PDT->recalculate(F);
-
- // Resume forceFlushDeletedBB() to erase DomTree or PostDomTree nodes.
- IsRecalculatingDomTree = IsRecalculatingPostDomTree = false;
- PendDTUpdateIndex = PendPDTUpdateIndex = PendUpdates.size();
- dropOutOfDateUpdates();
- return true;
- }
+ forceFlushDeletedBB();
+ if (DT)
+ DT->recalculate(F);
+ if (PDT)
+ PDT->recalculate(F);
// Resume forceFlushDeletedBB() to erase DomTree or PostDomTree nodes.
IsRecalculatingDomTree = IsRecalculatingPostDomTree = false;
- return false;
+ PendDTUpdateIndex = PendPDTUpdateIndex = PendUpdates.size();
+ dropOutOfDateUpdates();
}
bool DomTreeUpdater::hasPendingUpdates() const {
diff --git a/contrib/llvm/lib/IR/Dominators.cpp b/contrib/llvm/lib/IR/Dominators.cpp
index d8971e05f476..cf9f5759ba53 100644
--- a/contrib/llvm/lib/IR/Dominators.cpp
+++ b/contrib/llvm/lib/IR/Dominators.cpp
@@ -41,7 +41,7 @@ static constexpr bool ExpensiveChecksEnabled = false;
#endif
bool BasicBlockEdge::isSingleEdge() const {
- const TerminatorInst *TI = Start->getTerminator();
+ const Instruction *TI = Start->getTerminator();
unsigned NumEdgesToEnd = 0;
for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
if (TI->getSuccessor(i) == End)
@@ -67,12 +67,17 @@ template class llvm::DomTreeNodeBase<BasicBlock>;
template class llvm::DominatorTreeBase<BasicBlock, false>; // DomTreeBase
template class llvm::DominatorTreeBase<BasicBlock, true>; // PostDomTreeBase
-template struct llvm::DomTreeBuilder::Update<BasicBlock *>;
+template class llvm::cfg::Update<BasicBlock *>;
template void llvm::DomTreeBuilder::Calculate<DomTreeBuilder::BBDomTree>(
DomTreeBuilder::BBDomTree &DT);
+template void
+llvm::DomTreeBuilder::CalculateWithUpdates<DomTreeBuilder::BBDomTree>(
+ DomTreeBuilder::BBDomTree &DT, BBUpdates U);
+
template void llvm::DomTreeBuilder::Calculate<DomTreeBuilder::BBPostDomTree>(
DomTreeBuilder::BBPostDomTree &DT);
+// No CalculateWithUpdates<PostDomTree> instantiation, unless a usecase arises.
template void llvm::DomTreeBuilder::InsertEdge<DomTreeBuilder::BBDomTree>(
DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To);
@@ -372,193 +377,3 @@ void DominatorTreeWrapperPass::print(raw_ostream &OS, const Module *) const {
DT.print(OS);
}
-//===----------------------------------------------------------------------===//
-// DeferredDominance Implementation
-//===----------------------------------------------------------------------===//
-//
-// The implementation details of the DeferredDominance class which allows
-// one to queue updates to a DominatorTree.
-//
-//===----------------------------------------------------------------------===//
-
-/// Queues multiple updates and discards duplicates.
-void DeferredDominance::applyUpdates(
- ArrayRef<DominatorTree::UpdateType> Updates) {
- SmallVector<DominatorTree::UpdateType, 8> Seen;
- for (auto U : Updates)
- // Avoid duplicates to applyUpdate() to save on analysis.
- if (std::none_of(Seen.begin(), Seen.end(),
- [U](DominatorTree::UpdateType S) { return S == U; })) {
- Seen.push_back(U);
- applyUpdate(U.getKind(), U.getFrom(), U.getTo());
- }
-}
-
-/// Helper method for a single edge insertion. It's almost always better
-/// to batch updates and call applyUpdates to quickly remove duplicate edges.
-/// This is best used when there is only a single insertion needed to update
-/// Dominators.
-void DeferredDominance::insertEdge(BasicBlock *From, BasicBlock *To) {
- applyUpdate(DominatorTree::Insert, From, To);
-}
-
-/// Helper method for a single edge deletion. It's almost always better
-/// to batch updates and call applyUpdates to quickly remove duplicate edges.
-/// This is best used when there is only a single deletion needed to update
-/// Dominators.
-void DeferredDominance::deleteEdge(BasicBlock *From, BasicBlock *To) {
- applyUpdate(DominatorTree::Delete, From, To);
-}
-
-/// Delays the deletion of a basic block until a flush() event.
-void DeferredDominance::deleteBB(BasicBlock *DelBB) {
- assert(DelBB && "Invalid push_back of nullptr DelBB.");
- assert(pred_empty(DelBB) && "DelBB has one or more predecessors.");
- // DelBB is unreachable and all its instructions are dead.
- while (!DelBB->empty()) {
- Instruction &I = DelBB->back();
- // Replace used instructions with an arbitrary value (undef).
- if (!I.use_empty())
- I.replaceAllUsesWith(llvm::UndefValue::get(I.getType()));
- DelBB->getInstList().pop_back();
- }
- // Make sure DelBB has a valid terminator instruction. As long as DelBB is a
- // Child of Function F it must contain valid IR.
- new UnreachableInst(DelBB->getContext(), DelBB);
- DeletedBBs.insert(DelBB);
-}
-
-/// Returns true if DelBB is awaiting deletion at a flush() event.
-bool DeferredDominance::pendingDeletedBB(BasicBlock *DelBB) {
- if (DeletedBBs.empty())
- return false;
- return DeletedBBs.count(DelBB) != 0;
-}
-
-/// Returns true if pending DT updates are queued for a flush() event.
-bool DeferredDominance::pending() { return !PendUpdates.empty(); }
-
-/// Flushes all pending updates and block deletions. Returns a
-/// correct DominatorTree reference to be used by the caller for analysis.
-DominatorTree &DeferredDominance::flush() {
- // Updates to DT must happen before blocks are deleted below. Otherwise the
- // DT traversal will encounter badref blocks and assert.
- if (!PendUpdates.empty()) {
- DT.applyUpdates(PendUpdates);
- PendUpdates.clear();
- }
- flushDelBB();
- return DT;
-}
-
-/// Drops all internal state and forces a (slow) recalculation of the
-/// DominatorTree based on the current state of the LLVM IR in F. This should
-/// only be used in corner cases such as the Entry block of F being deleted.
-void DeferredDominance::recalculate(Function &F) {
- // flushDelBB must be flushed before the recalculation. The state of the IR
- // must be consistent before the DT traversal algorithm determines the
- // actual DT.
- if (flushDelBB() || !PendUpdates.empty()) {
- DT.recalculate(F);
- PendUpdates.clear();
- }
-}
-
-/// Debug method to help view the state of pending updates.
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DeferredDominance::dump() const {
- raw_ostream &OS = llvm::dbgs();
- OS << "PendUpdates:\n";
- int I = 0;
- for (auto U : PendUpdates) {
- OS << " " << I << " : ";
- ++I;
- if (U.getKind() == DominatorTree::Insert)
- OS << "Insert, ";
- else
- OS << "Delete, ";
- BasicBlock *From = U.getFrom();
- if (From) {
- auto S = From->getName();
- if (!From->hasName())
- S = "(no name)";
- OS << S << "(" << From << "), ";
- } else {
- OS << "(badref), ";
- }
- BasicBlock *To = U.getTo();
- if (To) {
- auto S = To->getName();
- if (!To->hasName())
- S = "(no_name)";
- OS << S << "(" << To << ")\n";
- } else {
- OS << "(badref)\n";
- }
- }
- OS << "DeletedBBs:\n";
- I = 0;
- for (auto BB : DeletedBBs) {
- OS << " " << I << " : ";
- ++I;
- if (BB->hasName())
- OS << BB->getName() << "(";
- else
- OS << "(no_name)(";
- OS << BB << ")\n";
- }
-}
-#endif
-
-/// Apply an update (Kind, From, To) to the internal queued updates. The
-/// update is only added when determined to be necessary. Checks for
-/// self-domination, unnecessary updates, duplicate requests, and balanced
-/// pairs of requests are all performed. Returns true if the update is
-/// queued and false if it is discarded.
-bool DeferredDominance::applyUpdate(DominatorTree::UpdateKind Kind,
- BasicBlock *From, BasicBlock *To) {
- if (From == To)
- return false; // Cannot dominate self; discard update.
-
- // Discard updates by inspecting the current state of successors of From.
- // Since applyUpdate() must be called *after* the Terminator of From is
- // altered we can determine if the update is unnecessary.
- bool HasEdge = std::any_of(succ_begin(From), succ_end(From),
- [To](BasicBlock *B) { return B == To; });
- if (Kind == DominatorTree::Insert && !HasEdge)
- return false; // Unnecessary Insert: edge does not exist in IR.
- if (Kind == DominatorTree::Delete && HasEdge)
- return false; // Unnecessary Delete: edge still exists in IR.
-
- // Analyze pending updates to determine if the update is unnecessary.
- DominatorTree::UpdateType Update = {Kind, From, To};
- DominatorTree::UpdateType Invert = {Kind != DominatorTree::Insert
- ? DominatorTree::Insert
- : DominatorTree::Delete,
- From, To};
- for (auto I = PendUpdates.begin(), E = PendUpdates.end(); I != E; ++I) {
- if (Update == *I)
- return false; // Discard duplicate updates.
- if (Invert == *I) {
- // Update and Invert are both valid (equivalent to a no-op). Remove
- // Invert from PendUpdates and discard the Update.
- PendUpdates.erase(I);
- return false;
- }
- }
- PendUpdates.push_back(Update); // Save the valid update.
- return true;
-}
-
-/// Performs all pending basic block deletions. We have to defer the deletion
-/// of these blocks until after the DominatorTree updates are applied. The
-/// internal workings of the DominatorTree code expect every update's From
-/// and To blocks to exist and to be a member of the same Function.
-bool DeferredDominance::flushDelBB() {
- if (DeletedBBs.empty())
- return false;
- for (auto *BB : DeletedBBs)
- BB->eraseFromParent();
- DeletedBBs.clear();
- return true;
-}
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index 72090f5bac3e..a88478b89bfc 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -24,7 +24,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -195,14 +194,19 @@ LLVMContext &Function::getContext() const {
return getType()->getContext();
}
-unsigned Function::getInstructionCount() {
+unsigned Function::getInstructionCount() const {
unsigned NumInstrs = 0;
- for (BasicBlock &BB : BasicBlocks)
+ for (const BasicBlock &BB : BasicBlocks)
NumInstrs += std::distance(BB.instructionsWithoutDebug().begin(),
BB.instructionsWithoutDebug().end());
return NumInstrs;
}
+Function *Function::Create(FunctionType *Ty, LinkageTypes Linkage,
+ const Twine &N, Module &M) {
+ return Create(Ty, Linkage, M.getDataLayout().getProgramAddressSpace(), N, &M);
+}
+
void Function::removeFromParent() {
getParent()->getFunctionList().remove(getIterator());
}
@@ -215,10 +219,19 @@ void Function::eraseFromParent() {
// Function Implementation
//===----------------------------------------------------------------------===//
-Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
- Module *ParentModule)
+static unsigned computeAddrSpace(unsigned AddrSpace, Module *M) {
+ // If AS == -1 and we are passed a valid module pointer we place the function
+ // in the program address space. Otherwise we default to AS0.
+ if (AddrSpace == static_cast<unsigned>(-1))
+ return M ? M->getDataLayout().getProgramAddressSpace() : 0;
+ return AddrSpace;
+}
+
+Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
+ const Twine &name, Module *ParentModule)
: GlobalObject(Ty, Value::FunctionVal,
- OperandTraits<Function>::op_begin(this), 0, Linkage, name),
+ OperandTraits<Function>::op_begin(this), 0, Linkage, name,
+ computeAddrSpace(AddrSpace, ParentModule)),
NumArgs(Ty->getNumParams()) {
assert(FunctionType::isValidReturnType(getReturnType()) &&
"invalid return type");
@@ -1243,13 +1256,13 @@ bool Function::hasAddressTaken(const User* *PutOffender) const {
const User *FU = U.getUser();
if (isa<BlockAddress>(FU))
continue;
- if (!isa<CallInst>(FU) && !isa<InvokeInst>(FU)) {
+ const auto *Call = dyn_cast<CallBase>(FU);
+ if (!Call) {
if (PutOffender)
*PutOffender = FU;
return true;
}
- ImmutableCallSite CS(cast<Instruction>(FU));
- if (!CS.isCallee(&U)) {
+ if (!Call->isCallee(&U)) {
if (PutOffender)
*PutOffender = FU;
return true;
@@ -1275,12 +1288,10 @@ bool Function::isDefTriviallyDead() const {
/// callsFunctionThatReturnsTwice - Return true if the function has a call to
/// setjmp or other function that gcc recognizes as "returning twice".
bool Function::callsFunctionThatReturnsTwice() const {
- for (const_inst_iterator
- I = inst_begin(this), E = inst_end(this); I != E; ++I) {
- ImmutableCallSite CS(&*I);
- if (CS && CS.hasFnAttr(Attribute::ReturnsTwice))
- return true;
- }
+ for (const Instruction &I : instructions(this))
+ if (const auto *Call = dyn_cast<CallBase>(&I))
+ if (Call->hasFnAttr(Attribute::ReturnsTwice))
+ return true;
return false;
}
diff --git a/contrib/llvm/lib/IR/Globals.cpp b/contrib/llvm/lib/IR/Globals.cpp
index 20b2334a626f..cbd6450a20c9 100644
--- a/contrib/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm/lib/IR/Globals.cpp
@@ -108,6 +108,11 @@ unsigned GlobalValue::getAlignment() const {
return cast<GlobalObject>(this)->getAlignment();
}
+unsigned GlobalValue::getAddressSpace() const {
+ PointerType *PtrTy = getType();
+ return PtrTy->getAddressSpace();
+}
+
void GlobalObject::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
assert(Align <= MaximumAlignment &&
@@ -247,7 +252,7 @@ bool GlobalValue::canIncreaseAlignment() const {
// Conservatively assume ELF if there's no parent pointer.
bool isELF =
(!Parent || Triple(Parent->getTargetTriple()).isOSBinFormatELF());
- if (isELF && hasDefaultVisibility() && !hasLocalLinkage())
+ if (isELF && !isDSOLocal())
return false;
return true;
diff --git a/contrib/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm/lib/IR/IRBuilder.cpp
index 405a56bfb31d..a98189956770 100644
--- a/contrib/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm/lib/IR/IRBuilder.cpp
@@ -50,6 +50,7 @@ GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
nullptr, GlobalVariable::NotThreadLocal,
AddressSpace);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ GV->setAlignment(1);
return GV;
}
@@ -730,28 +731,29 @@ CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
return createCallHelper(FnGCRelocate, Args, this, Name);
}
-CallInst *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID,
- Value *LHS, Value *RHS,
- const Twine &Name) {
+CallInst *IRBuilderBase::CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
+ Instruction *FMFSource,
+ const Twine &Name) {
Module *M = BB->getModule();
- Function *Fn = Intrinsic::getDeclaration(M, ID, { LHS->getType() });
- return createCallHelper(Fn, { LHS, RHS }, this, Name);
+ Function *Fn = Intrinsic::getDeclaration(M, ID, {V->getType()});
+ return createCallHelper(Fn, {V}, this, Name, FMFSource);
}
-CallInst *IRBuilderBase::CreateIntrinsic(Intrinsic::ID ID,
- Instruction *FMFSource,
- const Twine &Name) {
+CallInst *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS,
+ Value *RHS,
+ Instruction *FMFSource,
+ const Twine &Name) {
Module *M = BB->getModule();
- Function *Fn = Intrinsic::getDeclaration(M, ID);
- return createCallHelper(Fn, {}, this, Name);
+ Function *Fn = Intrinsic::getDeclaration(M, ID, { LHS->getType() });
+ return createCallHelper(Fn, {LHS, RHS}, this, Name, FMFSource);
}
CallInst *IRBuilderBase::CreateIntrinsic(Intrinsic::ID ID,
+ ArrayRef<Type *> Types,
ArrayRef<Value *> Args,
Instruction *FMFSource,
const Twine &Name) {
- assert(!Args.empty() && "Expected at least one argument to intrinsic");
Module *M = BB->getModule();
- Function *Fn = Intrinsic::getDeclaration(M, ID, { Args.front()->getType() });
+ Function *Fn = Intrinsic::getDeclaration(M, ID, Types);
return createCallHelper(Fn, Args, this, Name, FMFSource);
}
diff --git a/contrib/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
index befe1d9ffb1c..43010220b9f3 100644
--- a/contrib/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
@@ -27,7 +27,8 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &) {
- OS << Banner;
+ if (!Banner.empty())
+ OS << Banner << "\n";
if (llvm::isFunctionInPrintList("*"))
M.print(OS, nullptr, ShouldPreserveUseListOrder);
else {
diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp
index 508db9bcaf19..d861b5288592 100644
--- a/contrib/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@@ -303,6 +303,9 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case CatchPad: return "catchpad";
case CatchSwitch: return "catchswitch";
+ // Standard unary operators...
+ case FNeg: return "fneg";
+
// Standard binary operators...
case Add: return "add";
case FAdd: return "fadd";
@@ -592,7 +595,15 @@ bool Instruction::mayThrow() const {
bool Instruction::isSafeToRemove() const {
return (!isa<CallInst>(this) || !this->mayHaveSideEffects()) &&
- !isa<TerminatorInst>(this);
+ !this->isTerminator();
+}
+
+bool Instruction::isLifetimeStartOrEnd() const {
+ auto II = dyn_cast<IntrinsicInst>(this);
+ if (!II)
+ return false;
+ Intrinsic::ID ID = II->getIntrinsicID();
+ return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end;
}
const Instruction *Instruction::getNextNonDebugInstruction() const {
@@ -602,6 +613,13 @@ const Instruction *Instruction::getNextNonDebugInstruction() const {
return nullptr;
}
+const Instruction *Instruction::getPrevNonDebugInstruction() const {
+ for (const Instruction *I = getPrevNode(); I; I = I->getPrevNode())
+ if (!isa<DbgInfoIntrinsic>(I))
+ return I;
+ return nullptr;
+}
+
bool Instruction::isAssociative() const {
unsigned Opcode = getOpcode();
if (isAssociative(Opcode))
@@ -617,6 +635,42 @@ bool Instruction::isAssociative() const {
}
}
+unsigned Instruction::getNumSuccessors() const {
+ switch (getOpcode()) {
+#define HANDLE_TERM_INST(N, OPC, CLASS) \
+ case Instruction::OPC: \
+ return static_cast<const CLASS *>(this)->getNumSuccessors();
+#include "llvm/IR/Instruction.def"
+ default:
+ break;
+ }
+ llvm_unreachable("not a terminator");
+}
+
+BasicBlock *Instruction::getSuccessor(unsigned idx) const {
+ switch (getOpcode()) {
+#define HANDLE_TERM_INST(N, OPC, CLASS) \
+ case Instruction::OPC: \
+ return static_cast<const CLASS *>(this)->getSuccessor(idx);
+#include "llvm/IR/Instruction.def"
+ default:
+ break;
+ }
+ llvm_unreachable("not a terminator");
+}
+
+void Instruction::setSuccessor(unsigned idx, BasicBlock *B) {
+ switch (getOpcode()) {
+#define HANDLE_TERM_INST(N, OPC, CLASS) \
+ case Instruction::OPC: \
+ return static_cast<CLASS *>(this)->setSuccessor(idx, B);
+#include "llvm/IR/Instruction.def"
+ default:
+ break;
+ }
+ llvm_unreachable("not a terminator");
+}
+
Instruction *Instruction::cloneImpl() const {
llvm_unreachable("Subclass of Instruction failed to implement cloneImpl");
}
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
index 32db918dab97..06b46724a87f 100644
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -65,50 +66,7 @@ AllocaInst::getAllocationSizeInBits(const DataLayout &DL) const {
//===----------------------------------------------------------------------===//
User::op_iterator CallSite::getCallee() const {
- Instruction *II(getInstruction());
- return isCall()
- ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
- : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
-}
-
-//===----------------------------------------------------------------------===//
-// TerminatorInst Class
-//===----------------------------------------------------------------------===//
-
-unsigned TerminatorInst::getNumSuccessors() const {
- switch (getOpcode()) {
-#define HANDLE_TERM_INST(N, OPC, CLASS) \
- case Instruction::OPC: \
- return static_cast<const CLASS *>(this)->getNumSuccessors();
-#include "llvm/IR/Instruction.def"
- default:
- break;
- }
- llvm_unreachable("not a terminator");
-}
-
-BasicBlock *TerminatorInst::getSuccessor(unsigned idx) const {
- switch (getOpcode()) {
-#define HANDLE_TERM_INST(N, OPC, CLASS) \
- case Instruction::OPC: \
- return static_cast<const CLASS *>(this)->getSuccessor(idx);
-#include "llvm/IR/Instruction.def"
- default:
- break;
- }
- llvm_unreachable("not a terminator");
-}
-
-void TerminatorInst::setSuccessor(unsigned idx, BasicBlock *B) {
- switch (getOpcode()) {
-#define HANDLE_TERM_INST(N, OPC, CLASS) \
- case Instruction::OPC: \
- return static_cast<CLASS *>(this)->setSuccessor(idx, B);
-#include "llvm/IR/Instruction.def"
- default:
- break;
- }
- llvm_unreachable("not a terminator");
+ return cast<CallBase>(getInstruction())->op_end() - 1;
}
//===----------------------------------------------------------------------===//
@@ -294,6 +252,112 @@ void LandingPadInst::addClause(Constant *Val) {
}
//===----------------------------------------------------------------------===//
+// CallBase Implementation
+//===----------------------------------------------------------------------===//
+
+Function *CallBase::getCaller() { return getParent()->getParent(); }
+
+bool CallBase::isIndirectCall() const {
+ const Value *V = getCalledValue();
+ if (isa<Function>(V) || isa<Constant>(V))
+ return false;
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ if (CI->isInlineAsm())
+ return false;
+ return true;
+}
+
+Intrinsic::ID CallBase::getIntrinsicID() const {
+ if (auto *F = getCalledFunction())
+ return F->getIntrinsicID();
+ return Intrinsic::not_intrinsic;
+}
+
+bool CallBase::isReturnNonNull() const {
+ if (hasRetAttr(Attribute::NonNull))
+ return true;
+
+ if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
+ !NullPointerIsDefined(getCaller(),
+ getType()->getPointerAddressSpace()))
+ return true;
+
+ return false;
+}
+
+Value *CallBase::getReturnedArgOperand() const {
+ unsigned Index;
+
+ if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
+ return getArgOperand(Index - AttributeList::FirstArgIndex);
+ if (const Function *F = getCalledFunction())
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
+ Index)
+ return getArgOperand(Index - AttributeList::FirstArgIndex);
+
+ return nullptr;
+}
+
+bool CallBase::hasRetAttr(Attribute::AttrKind Kind) const {
+ if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+ return true;
+
+ // Look at the callee, if available.
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+ return false;
+}
+
+/// Determine whether the argument or parameter has the given attribute.
+bool CallBase::paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
+ assert(ArgNo < getNumArgOperands() && "Param index out of bounds!");
+
+ if (Attrs.hasParamAttribute(ArgNo, Kind))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasParamAttribute(ArgNo, Kind);
+ return false;
+}
+
+bool CallBase::hasFnAttrOnCalledFunction(Attribute::AttrKind Kind) const {
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeList::FunctionIndex, Kind);
+ return false;
+}
+
+bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const {
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeList::FunctionIndex, Kind);
+ return false;
+}
+
+CallBase::op_iterator
+CallBase::populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
+ const unsigned BeginIndex) {
+ auto It = op_begin() + BeginIndex;
+ for (auto &B : Bundles)
+ It = std::copy(B.input_begin(), B.input_end(), It);
+
+ auto *ContextImpl = getContext().pImpl;
+ auto BI = Bundles.begin();
+ unsigned CurrentIndex = BeginIndex;
+
+ for (auto &BOI : bundle_op_infos()) {
+ assert(BI != Bundles.end() && "Incorrect allocation?");
+
+ BOI.Tag = ContextImpl->getOrInsertBundleTag(BI->getTag());
+ BOI.Begin = CurrentIndex;
+ BOI.End = CurrentIndex + BI->input_size();
+ CurrentIndex = BOI.End;
+ BI++;
+ }
+
+ assert(BI == Bundles.end() && "Incorrect allocation?");
+
+ return It;
+}
+
+//===----------------------------------------------------------------------===//
// CallInst Implementation
//===----------------------------------------------------------------------===//
@@ -302,7 +366,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
this->FTy = FTy;
assert(getNumOperands() == Args.size() + CountBundleInputs(Bundles) + 1 &&
"NumOperands not set up?");
- Op<-1>() = Func;
+ setCalledOperand(Func);
#ifndef NDEBUG
assert((Args.size() == FTy->getNumParams() ||
@@ -315,7 +379,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
"Calling a function with a bad signature!");
#endif
- std::copy(Args.begin(), Args.end(), op_begin());
+ llvm::copy(Args, op_begin());
auto It = populateBundleOperandInfos(Bundles, Args.size());
(void)It;
@@ -324,43 +388,34 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
setName(NameStr);
}
-void CallInst::init(Value *Func, const Twine &NameStr) {
- FTy =
- cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+void CallInst::init(FunctionType *FTy, Value *Func, const Twine &NameStr) {
+ this->FTy = FTy;
assert(getNumOperands() == 1 && "NumOperands not set up?");
- Op<-1>() = Func;
+ setCalledOperand(Func);
assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
setName(NameStr);
}
-CallInst::CallInst(Value *Func, const Twine &Name, Instruction *InsertBefore)
- : CallBase<CallInst>(
- cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType())
- ->getReturnType(),
- Instruction::Call,
- OperandTraits<CallBase<CallInst>>::op_end(this) - 1, 1,
- InsertBefore) {
- init(Func, Name);
+CallInst::CallInst(FunctionType *Ty, Value *Func, const Twine &Name,
+ Instruction *InsertBefore)
+ : CallBase(Ty->getReturnType(), Instruction::Call,
+ OperandTraits<CallBase>::op_end(this) - 1, 1, InsertBefore) {
+ init(Ty, Func, Name);
}
-CallInst::CallInst(Value *Func, const Twine &Name, BasicBlock *InsertAtEnd)
- : CallBase<CallInst>(
- cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType())
- ->getReturnType(),
- Instruction::Call,
- OperandTraits<CallBase<CallInst>>::op_end(this) - 1, 1, InsertAtEnd) {
- init(Func, Name);
+CallInst::CallInst(FunctionType *Ty, Value *Func, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : CallBase(Ty->getReturnType(), Instruction::Call,
+ OperandTraits<CallBase>::op_end(this) - 1, 1, InsertAtEnd) {
+ init(Ty, Func, Name);
}
CallInst::CallInst(const CallInst &CI)
- : CallBase<CallInst>(CI.Attrs, CI.FTy, CI.getType(), Instruction::Call,
- OperandTraits<CallBase<CallInst>>::op_end(this) -
- CI.getNumOperands(),
- CI.getNumOperands()) {
+ : CallBase(CI.Attrs, CI.FTy, CI.getType(), Instruction::Call,
+ OperandTraits<CallBase>::op_end(this) - CI.getNumOperands(),
+ CI.getNumOperands()) {
setTailCallKind(CI.getTailCallKind());
setCallingConv(CI.getCallingConv());
@@ -600,11 +655,12 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
const Twine &NameStr) {
this->FTy = FTy;
- assert(getNumOperands() == 3 + Args.size() + CountBundleInputs(Bundles) &&
+ assert((int)getNumOperands() ==
+ ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)) &&
"NumOperands not set up?");
- Op<-3>() = Fn;
- Op<-2>() = IfNormal;
- Op<-1>() = IfException;
+ setNormalDest(IfNormal);
+ setUnwindDest(IfException);
+ setCalledOperand(Fn);
#ifndef NDEBUG
assert(((Args.size() == FTy->getNumParams()) ||
@@ -617,7 +673,7 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
"Invoking a function with a bad signature!");
#endif
- std::copy(Args.begin(), Args.end(), op_begin());
+ llvm::copy(Args, op_begin());
auto It = populateBundleOperandInfos(Bundles, Args.size());
(void)It;
@@ -627,10 +683,9 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
}
InvokeInst::InvokeInst(const InvokeInst &II)
- : CallBase<InvokeInst>(II.Attrs, II.FTy, II.getType(), Instruction::Invoke,
- OperandTraits<CallBase<InvokeInst>>::op_end(this) -
- II.getNumOperands(),
- II.getNumOperands()) {
+ : CallBase(II.Attrs, II.FTy, II.getType(), Instruction::Invoke,
+ OperandTraits<CallBase>::op_end(this) - II.getNumOperands(),
+ II.getNumOperands()) {
setCallingConv(II.getCallingConv());
std::copy(II.op_begin(), II.op_end(), op_begin());
std::copy(II.bundle_op_info_begin(), II.bundle_op_info_end(),
@@ -662,55 +717,53 @@ LandingPadInst *InvokeInst::getLandingPadInst() const {
//===----------------------------------------------------------------------===//
ReturnInst::ReturnInst(const ReturnInst &RI)
- : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
- OperandTraits<ReturnInst>::op_end(this) -
- RI.getNumOperands(),
- RI.getNumOperands()) {
+ : Instruction(Type::getVoidTy(RI.getContext()), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - RI.getNumOperands(),
+ RI.getNumOperands()) {
if (RI.getNumOperands())
Op<0>() = RI.Op<0>();
SubclassOptionalData = RI.SubclassOptionalData;
}
ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
- OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
- InsertBefore) {
+ : Instruction(Type::getVoidTy(C), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+ InsertBefore) {
if (retVal)
Op<0>() = retVal;
}
ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
- OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
- InsertAtEnd) {
+ : Instruction(Type::getVoidTy(C), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+ InsertAtEnd) {
if (retVal)
Op<0>() = retVal;
}
ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
- OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
-}
+ : Instruction(Type::getVoidTy(Context), Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {}
//===----------------------------------------------------------------------===//
// ResumeInst Implementation
//===----------------------------------------------------------------------===//
ResumeInst::ResumeInst(const ResumeInst &RI)
- : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Resume,
- OperandTraits<ResumeInst>::op_begin(this), 1) {
+ : Instruction(Type::getVoidTy(RI.getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1) {
Op<0>() = RI.Op<0>();
}
ResumeInst::ResumeInst(Value *Exn, Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
- OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) {
+ : Instruction(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) {
Op<0>() = Exn;
}
ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
- OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) {
+ : Instruction(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) {
Op<0>() = Exn;
}
@@ -719,10 +772,10 @@ ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
//===----------------------------------------------------------------------===//
CleanupReturnInst::CleanupReturnInst(const CleanupReturnInst &CRI)
- : TerminatorInst(CRI.getType(), Instruction::CleanupRet,
- OperandTraits<CleanupReturnInst>::op_end(this) -
- CRI.getNumOperands(),
- CRI.getNumOperands()) {
+ : Instruction(CRI.getType(), Instruction::CleanupRet,
+ OperandTraits<CleanupReturnInst>::op_end(this) -
+ CRI.getNumOperands(),
+ CRI.getNumOperands()) {
setInstructionSubclassData(CRI.getSubclassDataFromInstruction());
Op<0>() = CRI.Op<0>();
if (CRI.hasUnwindDest())
@@ -740,19 +793,19 @@ void CleanupReturnInst::init(Value *CleanupPad, BasicBlock *UnwindBB) {
CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
unsigned Values, Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()),
- Instruction::CleanupRet,
- OperandTraits<CleanupReturnInst>::op_end(this) - Values,
- Values, InsertBefore) {
+ : Instruction(Type::getVoidTy(CleanupPad->getContext()),
+ Instruction::CleanupRet,
+ OperandTraits<CleanupReturnInst>::op_end(this) - Values,
+ Values, InsertBefore) {
init(CleanupPad, UnwindBB);
}
CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
unsigned Values, BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()),
- Instruction::CleanupRet,
- OperandTraits<CleanupReturnInst>::op_end(this) - Values,
- Values, InsertAtEnd) {
+ : Instruction(Type::getVoidTy(CleanupPad->getContext()),
+ Instruction::CleanupRet,
+ OperandTraits<CleanupReturnInst>::op_end(this) - Values,
+ Values, InsertAtEnd) {
init(CleanupPad, UnwindBB);
}
@@ -765,25 +818,25 @@ void CatchReturnInst::init(Value *CatchPad, BasicBlock *BB) {
}
CatchReturnInst::CatchReturnInst(const CatchReturnInst &CRI)
- : TerminatorInst(Type::getVoidTy(CRI.getContext()), Instruction::CatchRet,
- OperandTraits<CatchReturnInst>::op_begin(this), 2) {
+ : Instruction(Type::getVoidTy(CRI.getContext()), Instruction::CatchRet,
+ OperandTraits<CatchReturnInst>::op_begin(this), 2) {
Op<0>() = CRI.Op<0>();
Op<1>() = CRI.Op<1>();
}
CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
- OperandTraits<CatchReturnInst>::op_begin(this), 2,
- InsertBefore) {
+ : Instruction(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
+ OperandTraits<CatchReturnInst>::op_begin(this), 2,
+ InsertBefore) {
init(CatchPad, BB);
}
CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
- OperandTraits<CatchReturnInst>::op_begin(this), 2,
- InsertAtEnd) {
+ : Instruction(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
+ OperandTraits<CatchReturnInst>::op_begin(this), 2,
+ InsertAtEnd) {
init(CatchPad, BB);
}
@@ -795,8 +848,8 @@ CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
unsigned NumReservedValues,
const Twine &NameStr,
Instruction *InsertBefore)
- : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
- InsertBefore) {
+ : Instruction(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
+ InsertBefore) {
if (UnwindDest)
++NumReservedValues;
init(ParentPad, UnwindDest, NumReservedValues + 1);
@@ -806,8 +859,8 @@ CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
unsigned NumReservedValues,
const Twine &NameStr, BasicBlock *InsertAtEnd)
- : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
- InsertAtEnd) {
+ : Instruction(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
+ InsertAtEnd) {
if (UnwindDest)
++NumReservedValues;
init(ParentPad, UnwindDest, NumReservedValues + 1);
@@ -815,8 +868,8 @@ CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
}
CatchSwitchInst::CatchSwitchInst(const CatchSwitchInst &CSI)
- : TerminatorInst(CSI.getType(), Instruction::CatchSwitch, nullptr,
- CSI.getNumOperands()) {
+ : Instruction(CSI.getType(), Instruction::CatchSwitch, nullptr,
+ CSI.getNumOperands()) {
init(CSI.getParentPad(), CSI.getUnwindDest(), CSI.getNumOperands());
setNumHungOffUseOperands(ReservedSpace);
Use *OL = getOperandList();
@@ -876,7 +929,7 @@ void CatchSwitchInst::removeHandler(handler_iterator HI) {
void FuncletPadInst::init(Value *ParentPad, ArrayRef<Value *> Args,
const Twine &NameStr) {
assert(getNumOperands() == 1 + Args.size() && "NumOperands not set up?");
- std::copy(Args.begin(), Args.end(), op_begin());
+ llvm::copy(Args, op_begin());
setParentPad(ParentPad);
setName(NameStr);
}
@@ -914,13 +967,11 @@ FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
UnreachableInst::UnreachableInst(LLVMContext &Context,
Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
- nullptr, 0, InsertBefore) {
-}
+ : Instruction(Type::getVoidTy(Context), Instruction::Unreachable, nullptr,
+ 0, InsertBefore) {}
UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
- nullptr, 0, InsertAtEnd) {
-}
+ : Instruction(Type::getVoidTy(Context), Instruction::Unreachable, nullptr,
+ 0, InsertAtEnd) {}
//===----------------------------------------------------------------------===//
// BranchInst Implementation
@@ -933,18 +984,18 @@ void BranchInst::AssertOK() {
}
BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
- OperandTraits<BranchInst>::op_end(this) - 1,
- 1, InsertBefore) {
+ : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 1, 1,
+ InsertBefore) {
assert(IfTrue && "Branch destination may not be null!");
Op<-1>() = IfTrue;
}
BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
- OperandTraits<BranchInst>::op_end(this) - 3,
- 3, InsertBefore) {
+ : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 3, 3,
+ InsertBefore) {
Op<-1>() = IfTrue;
Op<-2>() = IfFalse;
Op<-3>() = Cond;
@@ -954,18 +1005,16 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
}
BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
- OperandTraits<BranchInst>::op_end(this) - 1,
- 1, InsertAtEnd) {
+ : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 1, 1, InsertAtEnd) {
assert(IfTrue && "Branch destination may not be null!");
Op<-1>() = IfTrue;
}
BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
- BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
- OperandTraits<BranchInst>::op_end(this) - 3,
- 3, InsertAtEnd) {
+ BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 3, 3, InsertAtEnd) {
Op<-1>() = IfTrue;
Op<-2>() = IfFalse;
Op<-3>() = Cond;
@@ -974,10 +1023,10 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
#endif
}
-BranchInst::BranchInst(const BranchInst &BI) :
- TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
- OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
- BI.getNumOperands()) {
+BranchInst::BranchInst(const BranchInst &BI)
+ : Instruction(Type::getVoidTy(BI.getContext()), Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
+ BI.getNumOperands()) {
Op<-1>() = BI.Op<-1>();
if (BI.getNumOperands() != 1) {
assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
@@ -1089,28 +1138,30 @@ void LoadInst::AssertOK() {
"Alignment required for atomic load");
}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
- : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertBef) {}
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name,
+ Instruction *InsertBef)
+ : LoadInst(Ty, Ptr, Name, /*isVolatile=*/false, InsertBef) {}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
- : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertAE) {}
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name,
+ BasicBlock *InsertAE)
+ : LoadInst(Ty, Ptr, Name, /*isVolatile=*/false, InsertAE) {}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
Instruction *InsertBef)
: LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertBef) {}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
BasicBlock *InsertAE)
- : LoadInst(Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {}
+ : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, Instruction *InsertBef)
: LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
SyncScope::System, InsertBef) {}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, BasicBlock *InsertAE)
- : LoadInst(Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
+ : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
SyncScope::System, InsertAE) {}
LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
@@ -1125,12 +1176,11 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
setName(Name);
}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
- unsigned Align, AtomicOrdering Order,
- SyncScope::ID SSID,
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, AtomicOrdering Order, SyncScope::ID SSID,
BasicBlock *InsertAE)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertAE) {
+ : UnaryInstruction(Ty, Load, Ptr, InsertAE) {
+ assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
setVolatile(isVolatile);
setAlignment(Align);
setAtomic(Order, SSID);
@@ -1138,48 +1188,6 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
setName(Name);
}
-LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertBef) {
- setVolatile(false);
- setAlignment(0);
- setAtomic(AtomicOrdering::NotAtomic);
- AssertOK();
- if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertAE) {
- setVolatile(false);
- setAlignment(0);
- setAtomic(AtomicOrdering::NotAtomic);
- AssertOK();
- if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Type *Ty, Value *Ptr, const char *Name, bool isVolatile,
- Instruction *InsertBef)
- : UnaryInstruction(Ty, Load, Ptr, InsertBef) {
- assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
- setVolatile(isVolatile);
- setAlignment(0);
- setAtomic(AtomicOrdering::NotAtomic);
- AssertOK();
- if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
- BasicBlock *InsertAE)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertAE) {
- setVolatile(isVolatile);
- setAlignment(0);
- setAtomic(AtomicOrdering::NotAtomic);
- AssertOK();
- if (Name && Name[0]) setName(Name);
-}
-
void LoadInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
assert(Align <= MaximumAlignment &&
@@ -1376,6 +1384,37 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
Init(Operation, Ptr, Val, Ordering, SSID);
}
+StringRef AtomicRMWInst::getOperationName(BinOp Op) {
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return "xchg";
+ case AtomicRMWInst::Add:
+ return "add";
+ case AtomicRMWInst::Sub:
+ return "sub";
+ case AtomicRMWInst::And:
+ return "and";
+ case AtomicRMWInst::Nand:
+ return "nand";
+ case AtomicRMWInst::Or:
+ return "or";
+ case AtomicRMWInst::Xor:
+ return "xor";
+ case AtomicRMWInst::Max:
+ return "max";
+ case AtomicRMWInst::Min:
+ return "min";
+ case AtomicRMWInst::UMax:
+ return "umax";
+ case AtomicRMWInst::UMin:
+ return "umin";
+ case AtomicRMWInst::BAD_BINOP:
+ return "<invalid operation>";
+ }
+
+ llvm_unreachable("invalid atomicrmw operation");
+}
+
//===----------------------------------------------------------------------===//
// FenceInst Implementation
//===----------------------------------------------------------------------===//
@@ -1405,7 +1444,7 @@ void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
assert(getNumOperands() == 1 + IdxList.size() &&
"NumOperands not initialized?");
Op<0>() = Ptr;
- std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1);
+ llvm::copy(IdxList, op_begin() + 1);
setName(Name);
}
@@ -1700,17 +1739,17 @@ void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
}
}
-bool ShuffleVectorInst::isSingleSourceMask(ArrayRef<int> Mask) {
+static bool isSingleSourceMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
assert(!Mask.empty() && "Shuffle mask must contain elements");
bool UsesLHS = false;
bool UsesRHS = false;
- for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) {
+ for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
if (Mask[i] == -1)
continue;
- assert(Mask[i] >= 0 && Mask[i] < (NumElts * 2) &&
+ assert(Mask[i] >= 0 && Mask[i] < (NumOpElts * 2) &&
"Out-of-bounds shuffle mask element");
- UsesLHS |= (Mask[i] < NumElts);
- UsesRHS |= (Mask[i] >= NumElts);
+ UsesLHS |= (Mask[i] < NumOpElts);
+ UsesRHS |= (Mask[i] >= NumOpElts);
if (UsesLHS && UsesRHS)
return false;
}
@@ -1718,18 +1757,30 @@ bool ShuffleVectorInst::isSingleSourceMask(ArrayRef<int> Mask) {
return true;
}
-bool ShuffleVectorInst::isIdentityMask(ArrayRef<int> Mask) {
- if (!isSingleSourceMask(Mask))
+bool ShuffleVectorInst::isSingleSourceMask(ArrayRef<int> Mask) {
+ // We don't have vector operand size information, so assume operands are the
+ // same size as the mask.
+ return isSingleSourceMaskImpl(Mask, Mask.size());
+}
+
+static bool isIdentityMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
+ if (!isSingleSourceMaskImpl(Mask, NumOpElts))
return false;
- for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) {
+ for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
if (Mask[i] == -1)
continue;
- if (Mask[i] != i && Mask[i] != (NumElts + i))
+ if (Mask[i] != i && Mask[i] != (NumOpElts + i))
return false;
}
return true;
}
+bool ShuffleVectorInst::isIdentityMask(ArrayRef<int> Mask) {
+ // We don't have vector operand size information, so assume operands are the
+ // same size as the mask.
+ return isIdentityMaskImpl(Mask, Mask.size());
+}
+
bool ShuffleVectorInst::isReverseMask(ArrayRef<int> Mask) {
if (!isSingleSourceMask(Mask))
return false;
@@ -1801,6 +1852,79 @@ bool ShuffleVectorInst::isTransposeMask(ArrayRef<int> Mask) {
return true;
}
+bool ShuffleVectorInst::isExtractSubvectorMask(ArrayRef<int> Mask,
+ int NumSrcElts, int &Index) {
+ // Must extract from a single source.
+ if (!isSingleSourceMaskImpl(Mask, NumSrcElts))
+ return false;
+
+ // Must be smaller (else this is an Identity shuffle).
+ if (NumSrcElts <= (int)Mask.size())
+ return false;
+
+ // Find start of extraction, accounting that we may start with an UNDEF.
+ int SubIndex = -1;
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ int Offset = (M % NumSrcElts) - i;
+ if (0 <= SubIndex && SubIndex != Offset)
+ return false;
+ SubIndex = Offset;
+ }
+
+ if (0 <= SubIndex) {
+ Index = SubIndex;
+ return true;
+ }
+ return false;
+}
+
+bool ShuffleVectorInst::isIdentityWithPadding() const {
+ int NumOpElts = Op<0>()->getType()->getVectorNumElements();
+ int NumMaskElts = getType()->getVectorNumElements();
+ if (NumMaskElts <= NumOpElts)
+ return false;
+
+ // The first part of the mask must choose elements from exactly 1 source op.
+ SmallVector<int, 16> Mask = getShuffleMask();
+ if (!isIdentityMaskImpl(Mask, NumOpElts))
+ return false;
+
+ // All extending must be with undef elements.
+ for (int i = NumOpElts; i < NumMaskElts; ++i)
+ if (Mask[i] != -1)
+ return false;
+
+ return true;
+}
+
+bool ShuffleVectorInst::isIdentityWithExtract() const {
+ int NumOpElts = Op<0>()->getType()->getVectorNumElements();
+ int NumMaskElts = getType()->getVectorNumElements();
+ if (NumMaskElts >= NumOpElts)
+ return false;
+
+ return isIdentityMaskImpl(getShuffleMask(), NumOpElts);
+}
+
+bool ShuffleVectorInst::isConcat() const {
+ // Vector concatenation is differentiated from identity with padding.
+ if (isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>()))
+ return false;
+
+ int NumOpElts = Op<0>()->getType()->getVectorNumElements();
+ int NumMaskElts = getType()->getVectorNumElements();
+ if (NumMaskElts != NumOpElts * 2)
+ return false;
+
+ // Use the mask length rather than the operands' vector lengths here. We
+ // already know that the shuffle returns a vector twice as long as the inputs,
+ // and neither of the inputs are undef vectors. If the mask picks consecutive
+ // elements from both inputs, then this is a concatenation of the inputs.
+ return isIdentityMaskImpl(getShuffleMask(), NumMaskElts);
+}
//===----------------------------------------------------------------------===//
// InsertValueInst Class
@@ -1887,6 +2011,59 @@ Type *ExtractValueInst::getIndexedType(Type *Agg,
}
//===----------------------------------------------------------------------===//
+// UnaryOperator Class
+//===----------------------------------------------------------------------===//
+
+UnaryOperator::UnaryOperator(UnaryOps iType, Value *S,
+ Type *Ty, const Twine &Name,
+ Instruction *InsertBefore)
+ : UnaryInstruction(Ty, iType, S, InsertBefore) {
+ Op<0>() = S;
+ setName(Name);
+ AssertOK();
+}
+
+UnaryOperator::UnaryOperator(UnaryOps iType, Value *S,
+ Type *Ty, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : UnaryInstruction(Ty, iType, S, InsertAtEnd) {
+ Op<0>() = S;
+ setName(Name);
+ AssertOK();
+}
+
+UnaryOperator *UnaryOperator::Create(UnaryOps Op, Value *S,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ return new UnaryOperator(Op, S, S->getType(), Name, InsertBefore);
+}
+
+UnaryOperator *UnaryOperator::Create(UnaryOps Op, Value *S,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ UnaryOperator *Res = Create(Op, S, Name);
+ InsertAtEnd->getInstList().push_back(Res);
+ return Res;
+}
+
+void UnaryOperator::AssertOK() {
+ Value *LHS = getOperand(0);
+ (void)LHS; // Silence warnings.
+#ifndef NDEBUG
+ switch (getOpcode()) {
+ case FNeg:
+ assert(getType() == LHS->getType() &&
+ "Unary operation should return same type as operand!");
+ assert(getType()->isFPOrFPVectorTy() &&
+ "Tried to create a floating-point operation on a "
+ "non-floating-point type!");
+ break;
+ default: llvm_unreachable("Invalid opcode provided");
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
// BinaryOperator Class
//===----------------------------------------------------------------------===//
@@ -2068,71 +2245,6 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
Op->getType(), Name, InsertAtEnd);
}
-// isConstantAllOnes - Helper function for several functions below
-static inline bool isConstantAllOnes(const Value *V) {
- if (const Constant *C = dyn_cast<Constant>(V))
- return C->isAllOnesValue();
- return false;
-}
-
-bool BinaryOperator::isNeg(const Value *V) {
- if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
- if (Bop->getOpcode() == Instruction::Sub)
- if (Constant *C = dyn_cast<Constant>(Bop->getOperand(0)))
- return C->isNegativeZeroValue();
- return false;
-}
-
-bool BinaryOperator::isFNeg(const Value *V, bool IgnoreZeroSign) {
- if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
- if (Bop->getOpcode() == Instruction::FSub)
- if (Constant *C = dyn_cast<Constant>(Bop->getOperand(0))) {
- if (!IgnoreZeroSign)
- IgnoreZeroSign = cast<Instruction>(V)->hasNoSignedZeros();
- return !IgnoreZeroSign ? C->isNegativeZeroValue() : C->isZeroValue();
- }
- return false;
-}
-
-bool BinaryOperator::isNot(const Value *V) {
- if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
- return (Bop->getOpcode() == Instruction::Xor &&
- (isConstantAllOnes(Bop->getOperand(1)) ||
- isConstantAllOnes(Bop->getOperand(0))));
- return false;
-}
-
-Value *BinaryOperator::getNegArgument(Value *BinOp) {
- return cast<BinaryOperator>(BinOp)->getOperand(1);
-}
-
-const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
- return getNegArgument(const_cast<Value*>(BinOp));
-}
-
-Value *BinaryOperator::getFNegArgument(Value *BinOp) {
- return cast<BinaryOperator>(BinOp)->getOperand(1);
-}
-
-const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
- return getFNegArgument(const_cast<Value*>(BinOp));
-}
-
-Value *BinaryOperator::getNotArgument(Value *BinOp) {
- assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
- BinaryOperator *BO = cast<BinaryOperator>(BinOp);
- Value *Op0 = BO->getOperand(0);
- Value *Op1 = BO->getOperand(1);
- if (isConstantAllOnes(Op0)) return Op1;
-
- assert(isConstantAllOnes(Op1));
- return Op0;
-}
-
-const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
- return getNotArgument(const_cast<Value*>(BinOp));
-}
-
// Exchange the two operands to this instruction. This instruction is safe to
// use on any binary instruction and does not modify the semantics of the
// instruction. If the instruction is order-dependent (SetLT f.e.), the opcode
@@ -2978,12 +3090,14 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
return false;
// A vector of pointers must have the same number of elements.
- if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy)) {
- if (VectorType *DstVecTy = dyn_cast<VectorType>(DstTy))
- return (SrcVecTy->getNumElements() == DstVecTy->getNumElements());
-
- return false;
- }
+ VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy);
+ VectorType *DstVecTy = dyn_cast<VectorType>(DstTy);
+ if (SrcVecTy && DstVecTy)
+ return (SrcVecTy->getNumElements() == DstVecTy->getNumElements());
+ if (SrcVecTy)
+ return SrcVecTy->getNumElements() == 1;
+ if (DstVecTy)
+ return DstVecTy->getNumElements() == 1;
return true;
}
@@ -3171,15 +3285,18 @@ AddrSpaceCastInst::AddrSpaceCastInst(
//===----------------------------------------------------------------------===//
CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
- Value *RHS, const Twine &Name, Instruction *InsertBefore)
+ Value *RHS, const Twine &Name, Instruction *InsertBefore,
+ Instruction *FlagsSource)
: Instruction(ty, op,
OperandTraits<CmpInst>::op_begin(this),
OperandTraits<CmpInst>::operands(this),
InsertBefore) {
- Op<0>() = LHS;
- Op<1>() = RHS;
+ Op<0>() = LHS;
+ Op<1>() = RHS;
setPredicate((Predicate)predicate);
setName(Name);
+ if (FlagsSource)
+ copyIRFlags(FlagsSource);
}
CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
@@ -3518,8 +3635,8 @@ void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
/// constructor can also autoinsert before another instruction.
SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
Instruction *InsertBefore)
- : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
- nullptr, 0, InsertBefore) {
+ : Instruction(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+ nullptr, 0, InsertBefore) {
init(Value, Default, 2+NumCases*2);
}
@@ -3529,13 +3646,13 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
/// constructor also autoinserts at the end of the specified BasicBlock.
SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
BasicBlock *InsertAtEnd)
- : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
- nullptr, 0, InsertAtEnd) {
+ : Instruction(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+ nullptr, 0, InsertAtEnd) {
init(Value, Default, 2+NumCases*2);
}
SwitchInst::SwitchInst(const SwitchInst &SI)
- : TerminatorInst(SI.getType(), Instruction::Switch, nullptr, 0) {
+ : Instruction(SI.getType(), Instruction::Switch, nullptr, 0) {
init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
setNumHungOffUseOperands(SI.getNumOperands());
Use *OL = getOperandList();
@@ -3547,7 +3664,6 @@ SwitchInst::SwitchInst(const SwitchInst &SI)
SubclassOptionalData = SI.SubclassOptionalData;
}
-
/// addCase - Add an entry to the switch instruction...
///
void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
@@ -3626,21 +3742,21 @@ void IndirectBrInst::growOperands() {
IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
Instruction *InsertBefore)
-: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
- nullptr, 0, InsertBefore) {
+ : Instruction(Type::getVoidTy(Address->getContext()),
+ Instruction::IndirectBr, nullptr, 0, InsertBefore) {
init(Address, NumCases);
}
IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
BasicBlock *InsertAtEnd)
-: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
- nullptr, 0, InsertAtEnd) {
+ : Instruction(Type::getVoidTy(Address->getContext()),
+ Instruction::IndirectBr, nullptr, 0, InsertAtEnd) {
init(Address, NumCases);
}
IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
- : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
- nullptr, IBI.getNumOperands()) {
+ : Instruction(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+ nullptr, IBI.getNumOperands()) {
allocHungoffUses(IBI.getNumOperands());
Use *OL = getOperandList();
const Use *InOL = IBI.getOperandList();
@@ -3688,6 +3804,10 @@ GetElementPtrInst *GetElementPtrInst::cloneImpl() const {
return new (getNumOperands()) GetElementPtrInst(*this);
}
+UnaryOperator *UnaryOperator::cloneImpl() const {
+ return Create(getOpcode(), Op<0>());
+}
+
BinaryOperator *BinaryOperator::cloneImpl() const {
return Create(getOpcode(), Op<0>(), Op<1>());
}
@@ -3718,7 +3838,7 @@ AllocaInst *AllocaInst::cloneImpl() const {
}
LoadInst *LoadInst::cloneImpl() const {
- return new LoadInst(getOperand(0), Twine(), isVolatile(),
+ return new LoadInst(getType(), getOperand(0), Twine(), isVolatile(),
getAlignment(), getOrdering(), getSyncScopeID());
}
diff --git a/contrib/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm/lib/IR/IntrinsicInst.cpp
index 787889934d82..df3a38ac147f 100644
--- a/contrib/llvm/lib/IR/IntrinsicInst.cpp
+++ b/contrib/llvm/lib/IR/IntrinsicInst.cpp
@@ -32,10 +32,11 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
-/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+/// DbgVariableIntrinsic - This is the common base class for debug info
+/// intrinsics for variables.
///
-Value *DbgInfoIntrinsic::getVariableLocation(bool AllowNullOp) const {
+Value *DbgVariableIntrinsic::getVariableLocation(bool AllowNullOp) const {
Value *Op = getArgOperand(0);
if (AllowNullOp && !Op)
return nullptr;
@@ -45,14 +46,11 @@ Value *DbgInfoIntrinsic::getVariableLocation(bool AllowNullOp) const {
return V->getValue();
// When the value goes to null, it gets replaced by an empty MDNode.
- assert((isa<DbgLabelInst>(this)
- || !cast<MDNode>(MD)->getNumOperands())
- && "DbgValueInst Expected an empty MDNode");
-
+ assert(!cast<MDNode>(MD)->getNumOperands() && "Expected an empty MDNode");
return nullptr;
}
-Optional<uint64_t> DbgInfoIntrinsic::getFragmentSizeInBits() const {
+Optional<uint64_t> DbgVariableIntrinsic::getFragmentSizeInBits() const {
if (auto Fragment = getExpression()->getFragmentInfo())
return Fragment->SizeInBits;
return getVariable()->getSizeInBits();
@@ -154,6 +152,10 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_trunc:
return true;
}
}
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
index 62d9e387162e..944d8265151d 100644
--- a/contrib/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -61,6 +61,7 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
{MD_associated, "associated"},
{MD_callees, "callees"},
{MD_irr_loop, "irr_loop"},
+ {MD_access_group, "llvm.access.group"},
};
for (auto &MDKind : MDKinds) {
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
index 3b2e1e81b1c1..2d120869860a 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -280,21 +280,24 @@ template <> struct MDNodeKeyImpl<DILocation> {
unsigned Column;
Metadata *Scope;
Metadata *InlinedAt;
+ bool ImplicitCode;
MDNodeKeyImpl(unsigned Line, unsigned Column, Metadata *Scope,
- Metadata *InlinedAt)
- : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {}
+ Metadata *InlinedAt, bool ImplicitCode)
+ : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt),
+ ImplicitCode(ImplicitCode) {}
MDNodeKeyImpl(const DILocation *L)
: Line(L->getLine()), Column(L->getColumn()), Scope(L->getRawScope()),
- InlinedAt(L->getRawInlinedAt()) {}
+ InlinedAt(L->getRawInlinedAt()), ImplicitCode(L->isImplicitCode()) {}
bool isKeyOf(const DILocation *RHS) const {
return Line == RHS->getLine() && Column == RHS->getColumn() &&
- Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt();
+ Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt() &&
+ ImplicitCode == RHS->isImplicitCode();
}
unsigned getHashValue() const {
- return hash_combine(Line, Column, Scope, InlinedAt);
+ return hash_combine(Line, Column, Scope, InlinedAt, ImplicitCode);
}
};
@@ -376,20 +379,22 @@ template <> struct MDNodeKeyImpl<DIBasicType> {
uint64_t SizeInBits;
uint32_t AlignInBits;
unsigned Encoding;
+ unsigned Flags;
MDNodeKeyImpl(unsigned Tag, MDString *Name, uint64_t SizeInBits,
- uint32_t AlignInBits, unsigned Encoding)
+ uint32_t AlignInBits, unsigned Encoding, unsigned Flags)
: Tag(Tag), Name(Name), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
- Encoding(Encoding) {}
+ Encoding(Encoding), Flags(Flags) {}
MDNodeKeyImpl(const DIBasicType *N)
: Tag(N->getTag()), Name(N->getRawName()), SizeInBits(N->getSizeInBits()),
- AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()) {}
+ AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()), Flags(N->getFlags()) {}
bool isKeyOf(const DIBasicType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
SizeInBits == RHS->getSizeInBits() &&
AlignInBits == RHS->getAlignInBits() &&
- Encoding == RHS->getEncoding();
+ Encoding == RHS->getEncoding() &&
+ Flags == RHS->getFlags();
}
unsigned getHashValue() const {
@@ -607,15 +612,12 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
Metadata *File;
unsigned Line;
Metadata *Type;
- bool IsLocalToUnit;
- bool IsDefinition;
unsigned ScopeLine;
Metadata *ContainingType;
- unsigned Virtuality;
unsigned VirtualIndex;
int ThisAdjustment;
unsigned Flags;
- bool IsOptimized;
+ unsigned SPFlags;
Metadata *Unit;
Metadata *TemplateParams;
Metadata *Declaration;
@@ -624,45 +626,39 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
Metadata *File, unsigned Line, Metadata *Type,
- bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
- Metadata *ContainingType, unsigned Virtuality,
+ unsigned ScopeLine, Metadata *ContainingType,
unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
- bool IsOptimized, Metadata *Unit, Metadata *TemplateParams,
+ unsigned SPFlags, Metadata *Unit, Metadata *TemplateParams,
Metadata *Declaration, Metadata *RetainedNodes,
Metadata *ThrownTypes)
: Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
- Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
- IsDefinition(IsDefinition), ScopeLine(ScopeLine),
- ContainingType(ContainingType), Virtuality(Virtuality),
- VirtualIndex(VirtualIndex), ThisAdjustment(ThisAdjustment),
- Flags(Flags), IsOptimized(IsOptimized), Unit(Unit),
- TemplateParams(TemplateParams), Declaration(Declaration),
+ Line(Line), Type(Type), ScopeLine(ScopeLine),
+ ContainingType(ContainingType), VirtualIndex(VirtualIndex),
+ ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags),
+ Unit(Unit), TemplateParams(TemplateParams), Declaration(Declaration),
RetainedNodes(RetainedNodes), ThrownTypes(ThrownTypes) {}
MDNodeKeyImpl(const DISubprogram *N)
: Scope(N->getRawScope()), Name(N->getRawName()),
LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
- Line(N->getLine()), Type(N->getRawType()),
- IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
- ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()),
- Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()),
+ Line(N->getLine()), Type(N->getRawType()), ScopeLine(N->getScopeLine()),
+ ContainingType(N->getRawContainingType()),
+ VirtualIndex(N->getVirtualIndex()),
ThisAdjustment(N->getThisAdjustment()), Flags(N->getFlags()),
- IsOptimized(N->isOptimized()), Unit(N->getRawUnit()),
+ SPFlags(N->getSPFlags()), Unit(N->getRawUnit()),
TemplateParams(N->getRawTemplateParams()),
- Declaration(N->getRawDeclaration()), RetainedNodes(N->getRawRetainedNodes()),
+ Declaration(N->getRawDeclaration()),
+ RetainedNodes(N->getRawRetainedNodes()),
ThrownTypes(N->getRawThrownTypes()) {}
bool isKeyOf(const DISubprogram *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
LinkageName == RHS->getRawLinkageName() &&
File == RHS->getRawFile() && Line == RHS->getLine() &&
- Type == RHS->getRawType() && IsLocalToUnit == RHS->isLocalToUnit() &&
- IsDefinition == RHS->isDefinition() &&
- ScopeLine == RHS->getScopeLine() &&
+ Type == RHS->getRawType() && ScopeLine == RHS->getScopeLine() &&
ContainingType == RHS->getRawContainingType() &&
- Virtuality == RHS->getVirtuality() &&
VirtualIndex == RHS->getVirtualIndex() &&
ThisAdjustment == RHS->getThisAdjustment() &&
- Flags == RHS->getFlags() && IsOptimized == RHS->isOptimized() &&
+ Flags == RHS->getFlags() && SPFlags == RHS->getSPFlags() &&
Unit == RHS->getUnit() &&
TemplateParams == RHS->getRawTemplateParams() &&
Declaration == RHS->getRawDeclaration() &&
@@ -670,11 +666,13 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
ThrownTypes == RHS->getRawThrownTypes();
}
+ bool isDefinition() const { return SPFlags & DISubprogram::SPFlagDefinition; }
+
unsigned getHashValue() const {
// If this is a declaration inside an ODR type, only hash the type and the
// name. Otherwise the hash will be stronger than
// MDNodeSubsetEqualImpl::isDeclarationOfODRMember().
- if (!IsDefinition && LinkageName)
+ if (!isDefinition() && LinkageName)
if (auto *CT = dyn_cast_or_null<DICompositeType>(Scope))
if (CT->getRawIdentifier())
return hash_combine(LinkageName, Scope);
@@ -691,7 +689,7 @@ template <> struct MDNodeSubsetEqualImpl<DISubprogram> {
using KeyTy = MDNodeKeyImpl<DISubprogram>;
static bool isSubsetEqual(const KeyTy &LHS, const DISubprogram *RHS) {
- return isDeclarationOfODRMember(LHS.IsDefinition, LHS.Scope,
+ return isDeclarationOfODRMember(LHS.isDefinition(), LHS.Scope,
LHS.LinkageName, LHS.TemplateParams, RHS);
}
@@ -865,23 +863,26 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
bool IsLocalToUnit;
bool IsDefinition;
Metadata *StaticDataMemberDeclaration;
+ Metadata *TemplateParams;
uint32_t AlignInBits;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition,
- Metadata *StaticDataMemberDeclaration, uint32_t AlignInBits)
+ Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
+ uint32_t AlignInBits)
: Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
IsDefinition(IsDefinition),
StaticDataMemberDeclaration(StaticDataMemberDeclaration),
- AlignInBits(AlignInBits) {}
+ TemplateParams(TemplateParams), AlignInBits(AlignInBits) {}
MDNodeKeyImpl(const DIGlobalVariable *N)
: Scope(N->getRawScope()), Name(N->getRawName()),
LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
Line(N->getLine()), Type(N->getRawType()),
IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()),
+ TemplateParams(N->getRawTemplateParams()),
AlignInBits(N->getAlignInBits()) {}
bool isKeyOf(const DIGlobalVariable *RHS) const {
@@ -892,6 +893,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
IsDefinition == RHS->isDefinition() &&
StaticDataMemberDeclaration ==
RHS->getRawStaticDataMemberDeclaration() &&
+ TemplateParams == RHS->getRawTemplateParams() &&
AlignInBits == RHS->getAlignInBits();
}
diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp
index 54d602d926e5..01d14f17bba5 100644
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/IR/LegacyPassNameParser.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -99,27 +100,31 @@ static cl::list<std::string>
/// This is a helper to determine whether to print IR before or
/// after a pass.
-static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
+bool llvm::shouldPrintBeforePass() {
+ return PrintBeforeAll || !PrintBefore.empty();
+}
+
+bool llvm::shouldPrintAfterPass() {
+ return PrintAfterAll || !PrintAfter.empty();
+}
+
+static bool ShouldPrintBeforeOrAfterPass(StringRef PassID,
PassOptionList &PassesToPrint) {
for (auto *PassInf : PassesToPrint) {
if (PassInf)
- if (PassInf->getPassArgument() == PI->getPassArgument()) {
+ if (PassInf->getPassArgument() == PassID) {
return true;
}
}
return false;
}
-/// This is a utility to check whether a pass should have IR dumped
-/// before it.
-static bool ShouldPrintBeforePass(const PassInfo *PI) {
- return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
+bool llvm::shouldPrintBeforePass(StringRef PassID) {
+ return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
}
-/// This is a utility to check whether a pass should have IR dumped
-/// after it.
-static bool ShouldPrintAfterPass(const PassInfo *PI) {
- return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
+bool llvm::shouldPrintAfterPass(StringRef PassID) {
+ return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
}
bool llvm::forcePrintModuleIR() { return PrintModuleScope; }
@@ -135,34 +140,32 @@ bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
return PassDebugging >= Executions;
}
-unsigned PMDataManager::initSizeRemarkInfo(Module &M) {
+unsigned PMDataManager::initSizeRemarkInfo(
+ Module &M, StringMap<std::pair<unsigned, unsigned>> &FunctionToInstrCount) {
// Only calculate getInstructionCount if the size-info remark is requested.
- return M.getInstructionCount();
-}
-
-void PMDataManager::emitInstrCountChangedRemark(Pass *P, Module &M,
- unsigned CountBefore) {
- // We need a function containing at least one basic block in order to output
- // remarks. Since it's possible that the first function in the module doesn't
- // actually contain a basic block, we have to go and find one that's suitable
- // for emitting remarks.
- auto It = std::find_if(M.begin(), M.end(),
- [](const Function &Fn) { return !Fn.empty(); });
-
- // Didn't find a function. Quit.
- if (It == M.end())
- return;
-
- // We found a function containing at least one basic block.
- Function *F = &*It;
+ unsigned InstrCount = 0;
- // How many instructions are in the module now?
- unsigned CountAfter = M.getInstructionCount();
+ // Collect instruction counts for every function. We'll use this to emit
+ // per-function size remarks later.
+ for (Function &F : M) {
+ unsigned FCount = F.getInstructionCount();
- // If there was no change, don't emit a remark.
- if (CountBefore == CountAfter)
- return;
+ // Insert a record into FunctionToInstrCount keeping track of the current
+ // size of the function as the first member of a pair. Set the second
+ // member to 0; if the function is deleted by the pass, then when we get
+ // here, we'll be able to let the user know that F no longer contributes to
+ // the module.
+ FunctionToInstrCount[F.getName().str()] =
+ std::pair<unsigned, unsigned>(FCount, 0);
+ InstrCount += FCount;
+ }
+ return InstrCount;
+}
+void PMDataManager::emitInstrCountChangedRemark(
+ Pass *P, Module &M, int64_t Delta, unsigned CountBefore,
+ StringMap<std::pair<unsigned, unsigned>> &FunctionToInstrCount,
+ Function *F) {
// If it's a pass manager, don't emit a remark. (This hinges on the assumption
// that the only passes that return non-null with getAsPMDataManager are pass
// managers.) The reason we have to do this is to avoid emitting remarks for
@@ -170,11 +173,53 @@ void PMDataManager::emitInstrCountChangedRemark(Pass *P, Module &M,
if (P->getAsPMDataManager())
return;
- // Compute a possibly negative delta between the instruction count before
- // running P, and after running P.
- int64_t Delta =
- static_cast<int64_t>(CountAfter) - static_cast<int64_t>(CountBefore);
+ // Set to true if this isn't a module pass or CGSCC pass.
+ bool CouldOnlyImpactOneFunction = (F != nullptr);
+
+ // Helper lambda that updates the changes to the size of some function.
+ auto UpdateFunctionChanges =
+ [&FunctionToInstrCount](Function &MaybeChangedFn) {
+ // Update the total module count.
+ unsigned FnSize = MaybeChangedFn.getInstructionCount();
+ auto It = FunctionToInstrCount.find(MaybeChangedFn.getName());
+
+ // If we created a new function, then we need to add it to the map and
+ // say that it changed from 0 instructions to FnSize.
+ if (It == FunctionToInstrCount.end()) {
+ FunctionToInstrCount[MaybeChangedFn.getName()] =
+ std::pair<unsigned, unsigned>(0, FnSize);
+ return;
+ }
+ // Insert the new function size into the second member of the pair. This
+ // tells us whether or not this function changed in size.
+ It->second.second = FnSize;
+ };
+
+ // We need to initially update all of the function sizes.
+ // If no function was passed in, then we're either a module pass or an
+ // CGSCC pass.
+ if (!CouldOnlyImpactOneFunction)
+ std::for_each(M.begin(), M.end(), UpdateFunctionChanges);
+ else
+ UpdateFunctionChanges(*F);
+
+ // Do we have a function we can use to emit a remark?
+ if (!CouldOnlyImpactOneFunction) {
+ // We need a function containing at least one basic block in order to output
+ // remarks. Since it's possible that the first function in the module
+ // doesn't actually contain a basic block, we have to go and find one that's
+ // suitable for emitting remarks.
+ auto It = std::find_if(M.begin(), M.end(),
+ [](const Function &Fn) { return !Fn.empty(); });
+ // Didn't find a function. Quit.
+ if (It == M.end())
+ return;
+
+ // We found a function containing at least one basic block.
+ F = &*It;
+ }
+ int64_t CountAfter = static_cast<int64_t>(CountBefore) + Delta;
BasicBlock &BB = *F->begin();
OptimizationRemarkAnalysis R("size-info", "IRSizeChange",
DiagnosticLocation(), &BB);
@@ -188,6 +233,55 @@ void PMDataManager::emitInstrCountChangedRemark(Pass *P, Module &M,
<< "; Delta: "
<< DiagnosticInfoOptimizationBase::Argument("DeltaInstrCount", Delta);
F->getContext().diagnose(R); // Not using ORE for layering reasons.
+
+ // Emit per-function size change remarks separately.
+ std::string PassName = P->getPassName().str();
+
+ // Helper lambda that emits a remark when the size of a function has changed.
+ auto EmitFunctionSizeChangedRemark = [&FunctionToInstrCount, &F, &BB,
+ &PassName](const std::string &Fname) {
+ unsigned FnCountBefore, FnCountAfter;
+ std::pair<unsigned, unsigned> &Change = FunctionToInstrCount[Fname];
+ std::tie(FnCountBefore, FnCountAfter) = Change;
+ int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
+ static_cast<int64_t>(FnCountBefore);
+
+ if (FnDelta == 0)
+ return;
+
+ // FIXME: We shouldn't use BB for the location here. Unfortunately, because
+ // the function that we're looking at could have been deleted, we can't use
+ // it for the source location. We *want* remarks when a function is deleted
+ // though, so we're kind of stuck here as is. (This remark, along with the
+ // whole-module size change remarks really ought not to have source
+ // locations at all.)
+ OptimizationRemarkAnalysis FR("size-info", "FunctionIRSizeChange",
+ DiagnosticLocation(), &BB);
+ FR << DiagnosticInfoOptimizationBase::Argument("Pass", PassName)
+ << ": Function: "
+ << DiagnosticInfoOptimizationBase::Argument("Function", Fname)
+ << ": IR instruction count changed from "
+ << DiagnosticInfoOptimizationBase::Argument("IRInstrsBefore",
+ FnCountBefore)
+ << " to "
+ << DiagnosticInfoOptimizationBase::Argument("IRInstrsAfter",
+ FnCountAfter)
+ << "; Delta: "
+ << DiagnosticInfoOptimizationBase::Argument("DeltaInstrCount", FnDelta);
+ F->getContext().diagnose(FR);
+
+ // Update the function size.
+ Change.first = FnCountAfter;
+ };
+
+ // Are we looking at more than one function? If so, emit remarks for all of
+ // the functions in the module. Otherwise, only emit one remark.
+ if (!CouldOnlyImpactOneFunction)
+ std::for_each(FunctionToInstrCount.keys().begin(),
+ FunctionToInstrCount.keys().end(),
+ EmitFunctionSizeChangedRemark);
+ else
+ EmitFunctionSizeChangedRemark(F->getName().str());
}
void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
@@ -494,65 +588,6 @@ char PassManagerImpl::ID = 0;
} // End of legacy namespace
} // End of llvm namespace
-namespace {
-
-//===----------------------------------------------------------------------===//
-/// TimingInfo Class - This class is used to calculate information about the
-/// amount of time each pass takes to execute. This only happens when
-/// -time-passes is enabled on the command line.
-///
-
-static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
-
-class TimingInfo {
- DenseMap<Pass*, Timer*> TimingData;
- TimerGroup TG;
-public:
- // Use 'create' member to get this.
- TimingInfo() : TG("pass", "... Pass execution timing report ...") {}
-
- // TimingDtor - Print out information about timing information
- ~TimingInfo() {
- // Delete all of the timers, which accumulate their info into the
- // TimerGroup.
- for (auto &I : TimingData)
- delete I.second;
- // TimerGroup is deleted next, printing the report.
- }
-
- // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
- // to a non-null value (if the -time-passes option is enabled) or it leaves it
- // null. It may be called multiple times.
- static void createTheTimeInfo();
-
- // print - Prints out timing information and then resets the timers.
- void print() {
- TG.print(*CreateInfoOutputFile());
- }
-
- /// getPassTimer - Return the timer for the specified pass if it exists.
- Timer *getPassTimer(Pass *P) {
- if (P->getAsPMDataManager())
- return nullptr;
-
- sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
- Timer *&T = TimingData[P];
- if (!T) {
- StringRef PassName = P->getPassName();
- StringRef PassArgument;
- if (const PassInfo *PI = Pass::lookupPassInfo(P->getPassID()))
- PassArgument = PI->getPassArgument();
- T = new Timer(PassArgument.empty() ? PassName : PassArgument, PassName,
- TG);
- }
- return T;
- }
-};
-
-} // End of anon namespace
-
-static TimingInfo *TheTimeInfo;
-
//===----------------------------------------------------------------------===//
// PMTopLevelManager implementation
@@ -677,6 +712,8 @@ void PMTopLevelManager::schedulePass(Pass *P) {
// available at this point.
const PassInfo *PI = findAnalysisPassInfo(P->getPassID());
if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
+ // Remove any cached AnalysisUsage information.
+ AnUsageMap.erase(P);
delete P;
return;
}
@@ -747,7 +784,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
return;
}
- if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
+ if (PI && !PI->isAnalysis() && shouldPrintBeforePass(PI->getPassArgument())) {
Pass *PP = P->createPrinterPass(
dbgs(), ("*** IR Dump Before " + P->getPassName() + " ***").str());
PP->assignPassManager(activeStack, getTopLevelPassManagerType());
@@ -756,7 +793,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
// Add the requested pass to the best available pass manager.
P->assignPassManager(activeStack, getTopLevelPassManagerType());
- if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
+ if (PI && !PI->isAnalysis() && shouldPrintAfterPass(PI->getPassArgument())) {
Pass *PP = P->createPrinterPass(
dbgs(), ("*** IR Dump After " + P->getPassName() + " ***").str());
PP->assignPassManager(activeStack, getTopLevelPassManagerType());
@@ -1343,9 +1380,16 @@ bool BBPassManager::runOnFunction(Function &F) {
bool Changed = doInitialization(F);
Module &M = *F.getParent();
- unsigned InstrCount = 0;
+ unsigned InstrCount, BBSize = 0;
+ StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
- for (BasicBlock &BB : F)
+ if (EmitICRemark)
+ InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+
+ for (BasicBlock &BB : F) {
+ // Collect the initial size of the basic block.
+ if (EmitICRemark)
+ BBSize = BB.size();
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
BasicBlockPass *BP = getContainedPass(Index);
bool LocalChanged = false;
@@ -1359,11 +1403,20 @@ bool BBPassManager::runOnFunction(Function &F) {
// If the pass crashes, remember this.
PassManagerPrettyStackEntry X(BP, BB);
TimeRegion PassTimer(getPassTimer(BP));
- if (EmitICRemark)
- InstrCount = initSizeRemarkInfo(M);
LocalChanged |= BP->runOnBasicBlock(BB);
- if (EmitICRemark)
- emitInstrCountChangedRemark(BP, M, InstrCount);
+ if (EmitICRemark) {
+ unsigned NewSize = BB.size();
+ // Update the size of the basic block, emit a remark, and update the
+ // size of the module.
+ if (NewSize != BBSize) {
+ int64_t Delta =
+ static_cast<int64_t>(NewSize) - static_cast<int64_t>(BBSize);
+ emitInstrCountChangedRemark(BP, M, Delta, InstrCount,
+ FunctionToInstrCount, &F);
+ InstrCount = static_cast<int64_t>(InstrCount) + Delta;
+ BBSize = NewSize;
+ }
+ }
}
Changed |= LocalChanged;
@@ -1378,6 +1431,7 @@ bool BBPassManager::runOnFunction(Function &F) {
recordAvailableAnalysis(BP);
removeDeadPasses(BP, BB.getName(), ON_BASICBLOCK_MSG);
}
+ }
return doFinalization(F) || Changed;
}
@@ -1525,7 +1579,6 @@ void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
// Return true if any function is modified by a pass.
bool FunctionPassManagerImpl::run(Function &F) {
bool Changed = false;
- TimingInfo::createTheTimeInfo();
initializeAllAnalysisInfo();
for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
@@ -1567,8 +1620,15 @@ bool FPPassManager::runOnFunction(Function &F) {
// Collect inherited analysis from Module level pass manager.
populateInheritedAnalysis(TPM->activeStack);
- unsigned InstrCount = 0;
+ unsigned InstrCount, FunctionSize = 0;
+ StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
+ // Collect the initial size of the module.
+ if (EmitICRemark) {
+ InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+ FunctionSize = F.getInstructionCount();
+ }
+
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
FunctionPass *FP = getContainedPass(Index);
bool LocalChanged = false;
@@ -1581,11 +1641,21 @@ bool FPPassManager::runOnFunction(Function &F) {
{
PassManagerPrettyStackEntry X(FP, F);
TimeRegion PassTimer(getPassTimer(FP));
- if (EmitICRemark)
- InstrCount = initSizeRemarkInfo(M);
LocalChanged |= FP->runOnFunction(F);
- if (EmitICRemark)
- emitInstrCountChangedRemark(FP, M, InstrCount);
+ if (EmitICRemark) {
+ unsigned NewSize = F.getInstructionCount();
+
+ // Update the size of the function, emit a remark, and update the size
+ // of the module.
+ if (NewSize != FunctionSize) {
+ int64_t Delta = static_cast<int64_t>(NewSize) -
+ static_cast<int64_t>(FunctionSize);
+ emitInstrCountChangedRemark(FP, M, Delta, InstrCount,
+ FunctionToInstrCount, &F);
+ InstrCount = static_cast<int64_t>(InstrCount) + Delta;
+ FunctionSize = NewSize;
+ }
+ }
}
Changed |= LocalChanged;
@@ -1649,8 +1719,15 @@ MPPassManager::runOnModule(Module &M) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
Changed |= getContainedPass(Index)->doInitialization(M);
- unsigned InstrCount = 0;
+ unsigned InstrCount, ModuleCount = 0;
+ StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
+ // Collect the initial size of the module.
+ if (EmitICRemark) {
+ InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+ ModuleCount = InstrCount;
+ }
+
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
ModulePass *MP = getContainedPass(Index);
bool LocalChanged = false;
@@ -1664,11 +1741,18 @@ MPPassManager::runOnModule(Module &M) {
PassManagerPrettyStackEntry X(MP, M);
TimeRegion PassTimer(getPassTimer(MP));
- if (EmitICRemark)
- InstrCount = initSizeRemarkInfo(M);
LocalChanged |= MP->runOnModule(M);
- if (EmitICRemark)
- emitInstrCountChangedRemark(MP, M, InstrCount);
+ if (EmitICRemark) {
+ // Update the size of the module.
+ ModuleCount = M.getInstructionCount();
+ if (ModuleCount != InstrCount) {
+ int64_t Delta = static_cast<int64_t>(ModuleCount) -
+ static_cast<int64_t>(InstrCount);
+ emitInstrCountChangedRemark(MP, M, Delta, InstrCount,
+ FunctionToInstrCount);
+ InstrCount = ModuleCount;
+ }
+ }
}
Changed |= LocalChanged;
@@ -1761,7 +1845,6 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
/// whether any of the passes modifies the module, and if so, return true.
bool PassManagerImpl::run(Module &M) {
bool Changed = false;
- TimingInfo::createTheTimeInfo();
dumpArguments();
dumpPasses();
@@ -1806,41 +1889,6 @@ bool PassManager::run(Module &M) {
}
//===----------------------------------------------------------------------===//
-// TimingInfo implementation
-
-bool llvm::TimePassesIsEnabled = false;
-static cl::opt<bool, true> EnableTiming(
- "time-passes", cl::location(TimePassesIsEnabled), cl::Hidden,
- cl::desc("Time each pass, printing elapsed time for each on exit"));
-
-// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
-// a non-null value (if the -time-passes option is enabled) or it leaves it
-// null. It may be called multiple times.
-void TimingInfo::createTheTimeInfo() {
- if (!TimePassesIsEnabled || TheTimeInfo) return;
-
- // Constructed the first time this is called, iff -time-passes is enabled.
- // This guarantees that the object will be constructed before static globals,
- // thus it will be destroyed before them.
- static ManagedStatic<TimingInfo> TTI;
- TheTimeInfo = &*TTI;
-}
-
-/// If TimingInfo is enabled then start pass timer.
-Timer *llvm::getPassTimer(Pass *P) {
- if (TheTimeInfo)
- return TheTimeInfo->getPassTimer(P);
- return nullptr;
-}
-
-/// If timing is enabled, report the times collected up to now and then reset
-/// them.
-void llvm::reportAndResetTimings() {
- if (TheTimeInfo)
- TheTimeInfo->print();
-}
-
-//===----------------------------------------------------------------------===//
// PMStack implementation
//
diff --git a/contrib/llvm/lib/IR/MDBuilder.cpp b/contrib/llvm/lib/IR/MDBuilder.cpp
index 1bb23c0330f3..3fa541f1b535 100644
--- a/contrib/llvm/lib/IR/MDBuilder.cpp
+++ b/contrib/llvm/lib/IR/MDBuilder.cpp
@@ -260,8 +260,9 @@ MDNode *MDBuilder::createMutableTBAAAccessTag(MDNode *Tag) {
}
MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) {
- SmallVector<Metadata *, 2> Vals(2);
- Vals[0] = createString("loop_header_weight");
- Vals[1] = createConstant(ConstantInt::get(Type::getInt64Ty(Context), Weight));
+ Metadata *Vals[] = {
+ createString("loop_header_weight"),
+ createConstant(ConstantInt::get(Type::getInt64Ty(Context), Weight)),
+ };
return MDNode::get(Context, Vals);
}
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index 83a22d95bd81..5536c2497f1e 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -237,7 +237,7 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
// Copy out uses since UseMap will get touched below.
using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>;
SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end());
- llvm::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) {
+ llvm::sort(Uses, [](const UseTy &L, const UseTy &R) {
return L.second.second < R.second.second;
});
for (const auto &Pair : Uses) {
@@ -290,7 +290,7 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
// Copy out uses since UseMap could get touched below.
using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>;
SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end());
- llvm::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) {
+ llvm::sort(Uses, [](const UseTy &L, const UseTy &R) {
return L.second.second < R.second.second;
});
UseMap.clear();
@@ -1484,7 +1484,7 @@ void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) {
std::vector<uint64_t> Elements(OrigElements.size() + 2);
Elements[0] = dwarf::DW_OP_plus_uconst;
Elements[1] = Offset;
- std::copy(OrigElements.begin(), OrigElements.end(), Elements.begin() + 2);
+ llvm::copy(OrigElements, Elements.begin() + 2);
E = DIExpression::get(getContext(), Elements);
Attachment = DIGlobalVariableExpression::get(getContext(), GV, E);
}
diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp
index f18024063533..93f27304424f 100644
--- a/contrib/llvm/lib/IR/Module.cpp
+++ b/contrib/llvm/lib/IR/Module.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/Module.h"
#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -45,6 +46,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/VersionTuple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -145,7 +147,8 @@ Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
GlobalValue *F = getNamedValue(Name);
if (!F) {
// Nope, add it
- Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+ Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage,
+ DL.getProgramAddressSpace(), Name);
if (!New->isIntrinsic()) // Intrinsics get attrs set on construction
New->setAttributes(AttributeList);
FunctionList.push_back(New);
@@ -154,8 +157,9 @@ Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
// If the function exists but has the wrong type, return a bitcast to the
// right type.
- if (F->getType() != PointerType::getUnqual(Ty))
- return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
+ auto *PTy = PointerType::get(Ty, F->getAddressSpace());
+ if (F->getType() != PTy)
+ return ConstantExpr::getBitCast(F, PTy);
// Otherwise, we just found the existing function or a prototype.
return F;
@@ -199,16 +203,14 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name,
/// with a constantexpr cast to the right type.
/// 3. Finally, if the existing global is the correct declaration, return the
/// existing global.
-Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
+Constant *Module::getOrInsertGlobal(
+ StringRef Name, Type *Ty,
+ function_ref<GlobalVariable *()> CreateGlobalCallback) {
// See if we have a definition for the specified global already.
GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
- if (!GV) {
- // Nope, add it
- GlobalVariable *New =
- new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
- nullptr, Name);
- return New; // Return the new declaration.
- }
+ if (!GV)
+ GV = CreateGlobalCallback();
+ assert(GV && "The CreateGlobalCallback is expected to create a global");
// If the variable exists but has the wrong type, return a bitcast to the
// right type.
@@ -221,6 +223,14 @@ Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
return GV;
}
+// Overload to construct a global variable using its constructor's defaults.
+Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
+ return getOrInsertGlobal(Name, Ty, [&] {
+ return new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, Name);
+ });
+}
+
//===----------------------------------------------------------------------===//
// Methods for easy access to the global variables in the module.
//
@@ -505,6 +515,24 @@ void Module::setPIELevel(PIELevel::Level PL) {
addModuleFlag(ModFlagBehavior::Max, "PIE Level", PL);
}
+Optional<CodeModel::Model> Module::getCodeModel() const {
+ auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("Code Model"));
+
+ if (!Val)
+ return None;
+
+ return static_cast<CodeModel::Model>(
+ cast<ConstantInt>(Val->getValue())->getZExtValue());
+}
+
+void Module::setCodeModel(CodeModel::Model CL) {
+ // Linking object files with different code models is undefined behavior
+ // because the compiler would have to generate additional code (to span
+ // longer jumps) if a larger code model is used with a smaller one.
+ // Therefore we will treat attempts to mix code models as an error.
+ addModuleFlag(ModFlagBehavior::Error, "Code Model", CL);
+}
+
void Module::setProfileSummary(Metadata *M) {
addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M);
}
@@ -526,6 +554,45 @@ void Module::setRtLibUseGOT() {
addModuleFlag(ModFlagBehavior::Max, "RtLibUseGOT", 1);
}
+void Module::setSDKVersion(const VersionTuple &V) {
+ SmallVector<unsigned, 3> Entries;
+ Entries.push_back(V.getMajor());
+ if (auto Minor = V.getMinor()) {
+ Entries.push_back(*Minor);
+ if (auto Subminor = V.getSubminor())
+ Entries.push_back(*Subminor);
+ // Ignore the 'build' component as it can't be represented in the object
+ // file.
+ }
+ addModuleFlag(ModFlagBehavior::Warning, "SDK Version",
+ ConstantDataArray::get(Context, Entries));
+}
+
+VersionTuple Module::getSDKVersion() const {
+ auto *CM = dyn_cast_or_null<ConstantAsMetadata>(getModuleFlag("SDK Version"));
+ if (!CM)
+ return {};
+ auto *Arr = dyn_cast_or_null<ConstantDataArray>(CM->getValue());
+ if (!Arr)
+ return {};
+ auto getVersionComponent = [&](unsigned Index) -> Optional<unsigned> {
+ if (Index >= Arr->getNumElements())
+ return None;
+ return (unsigned)Arr->getElementAsInteger(Index);
+ };
+ auto Major = getVersionComponent(0);
+ if (!Major)
+ return {};
+ VersionTuple Result = VersionTuple(*Major);
+ if (auto Minor = getVersionComponent(1)) {
+ Result = VersionTuple(*Major, *Minor);
+ if (auto Subminor = getVersionComponent(2)) {
+ Result = VersionTuple(*Major, *Minor, *Subminor);
+ }
+ }
+ return Result;
+}
+
GlobalVariable *llvm::collectUsedGlobalVariables(
const Module &M, SmallPtrSetImpl<GlobalValue *> &Set, bool CompilerUsed) {
const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
diff --git a/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp b/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
index 4c4466f9a902..46b88cd31779 100644
--- a/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -14,11 +14,17 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "module-summary-index"
+
+STATISTIC(ReadOnlyLiveGVars,
+ "Number of live global variables marked read only");
+
FunctionSummary FunctionSummary::ExternalNode =
FunctionSummary::makeDummyFunctionSummary({});
bool ValueInfo::isDSOLocal() const {
@@ -30,6 +36,17 @@ bool ValueInfo::isDSOLocal() const {
});
}
+// Gets the number of immutable refs in RefEdgeList
+unsigned FunctionSummary::immutableRefCount() const {
+ // Here we take advantage of having all readonly references
+ // located in the end of the RefEdgeList.
+ auto Refs = refs();
+ unsigned ImmutableRefCnt = 0;
+ for (int I = Refs.size() - 1; I >= 0 && Refs[I].isReadOnly(); --I)
+ ImmutableRefCnt++;
+ return ImmutableRefCnt;
+}
+
// Collect for the given module the list of function it defines
// (GUID -> Summary).
void ModuleSummaryIndex::collectDefinedFunctionsForModule(
@@ -84,6 +101,80 @@ bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const {
return false;
}
+static void propagateConstantsToRefs(GlobalValueSummary *S) {
+ // If reference is not readonly then referenced summary is not
+ // readonly either. Note that:
+ // - All references from GlobalVarSummary are conservatively considered as
+ // not readonly. Tracking them properly requires more complex analysis
+ // then we have now.
+ //
+ // - AliasSummary objects have no refs at all so this function is a no-op
+ // for them.
+ for (auto &VI : S->refs()) {
+ if (VI.isReadOnly()) {
+ // We only mark refs as readonly when computing function summaries on
+ // analysis phase.
+ assert(isa<FunctionSummary>(S));
+ continue;
+ }
+ for (auto &Ref : VI.getSummaryList())
+ // If references to alias is not readonly then aliasee is not readonly
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(Ref->getBaseObject()))
+ GVS->setReadOnly(false);
+ }
+}
+
+// Do the constant propagation in combined index.
+// The goal of constant propagation is internalization of readonly
+// variables. To determine which variables are readonly and which
+// are not we take following steps:
+// - During analysis we speculatively assign readonly attribute to
+// all variables which can be internalized. When computing function
+// summary we also assign readonly attribute to a reference if
+// function doesn't modify referenced variable.
+//
+// - After computing dead symbols in combined index we do the constant
+// propagation. During this step we clear readonly attribute from
+// all variables which:
+// a. are preserved or can't be imported
+// b. referenced by any global variable initializer
+// c. referenced by a function and reference is not readonly
+//
+// Internalization itself happens in the backend after import is finished
+// See internalizeImmutableGVs.
+void ModuleSummaryIndex::propagateConstants(
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ for (auto &P : *this)
+ for (auto &S : P.second.SummaryList) {
+ if (!isGlobalValueLive(S.get()))
+ // We don't examine references from dead objects
+ continue;
+
+ // Global variable can't be marked read only if it is not eligible
+ // to import since we need to ensure that all external references
+ // get a local (imported) copy. It also can't be marked read only
+ // if it or any alias (since alias points to the same memory) are
+ // preserved or notEligibleToImport, since either of those means
+ // there could be writes that are not visible (because preserved
+ // means it could have external to DSO writes, and notEligibleToImport
+ // means it could have writes via inline assembly leading it to be
+ // in the @llvm.*used).
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(S->getBaseObject()))
+ // Here we intentionally pass S.get() not GVS, because S could be
+ // an alias.
+ if (!canImportGlobalVar(S.get()) || GUIDPreservedSymbols.count(P.first))
+ GVS->setReadOnly(false);
+ propagateConstantsToRefs(S.get());
+ }
+ if (llvm::AreStatisticsEnabled())
+ for (auto &P : *this)
+ if (P.second.SummaryList.size())
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(
+ P.second.SummaryList[0]->getBaseObject()))
+ if (isGlobalValueLive(GVS) && GVS->isReadOnly())
+ ReadOnlyLiveGVars++;
+}
+
// TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot)
// then delete this function and update its tests
LLVM_DUMP_METHOD
@@ -108,6 +199,7 @@ namespace {
struct Attributes {
void add(const Twine &Name, const Twine &Value,
const Twine &Comment = Twine());
+ void addComment(const Twine &Comment);
std::string getAsString() const;
std::vector<std::string> Attrs;
@@ -129,6 +221,10 @@ void Attributes::add(const Twine &Name, const Twine &Value,
A += Value.str();
A += "\"";
Attrs.push_back(A);
+ addComment(Comment);
+}
+
+void Attributes::addComment(const Twine &Comment) {
if (!Comment.isTriviallyEmpty()) {
if (Comments.empty())
Comments = " // ";
@@ -182,8 +278,9 @@ static std::string linkageToString(GlobalValue::LinkageTypes LT) {
static std::string fflagsToString(FunctionSummary::FFlags F) {
auto FlagValue = [](unsigned V) { return V ? '1' : '0'; };
- char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
- FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias), 0};
+ char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
+ FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias),
+ FlagValue(F.NoInline), 0};
return FlagRep;
}
@@ -198,9 +295,12 @@ static std::string getSummaryAttributes(GlobalValueSummary* GVS) {
", ffl: " + fflagsToString(FS->fflags());
}
+static std::string getNodeVisualName(GlobalValue::GUID Id) {
+ return std::string("@") + std::to_string(Id);
+}
+
static std::string getNodeVisualName(const ValueInfo &VI) {
- return VI.name().empty() ? std::string("@") + std::to_string(VI.getGUID())
- : VI.name().str();
+ return VI.name().empty() ? getNodeVisualName(VI.getGUID()) : VI.name().str();
}
static std::string getNodeLabel(const ValueInfo &VI, GlobalValueSummary *GVS) {
@@ -221,13 +321,25 @@ static std::string getNodeLabel(const ValueInfo &VI, GlobalValueSummary *GVS) {
// specific module associated with it. Typically this is function
// or variable defined in native object or library.
static void defineExternalNode(raw_ostream &OS, const char *Pfx,
- const ValueInfo &VI) {
- auto StrId = std::to_string(VI.getGUID());
- OS << " " << StrId << " [label=\"" << getNodeVisualName(VI)
- << "\"]; // defined externally\n";
+ const ValueInfo &VI, GlobalValue::GUID Id) {
+ auto StrId = std::to_string(Id);
+ OS << " " << StrId << " [label=\"";
+
+ if (VI) {
+ OS << getNodeVisualName(VI);
+ } else {
+ OS << getNodeVisualName(Id);
+ }
+ OS << "\"]; // defined externally\n";
+}
+
+static bool hasReadOnlyFlag(const GlobalValueSummary *S) {
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(S))
+ return GVS->isReadOnly();
+ return false;
}
-void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
+void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
std::vector<Edge> CrossModuleEdges;
DenseMap<GlobalValue::GUID, std::vector<uint64_t>> NodeMap;
StringMap<GVSummaryMapTy> ModuleToDefinedGVS;
@@ -241,14 +353,18 @@ void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
"_" + std::to_string(Id);
};
- auto DrawEdge = [&](const char *Pfx, int SrcMod, GlobalValue::GUID SrcId,
- int DstMod, GlobalValue::GUID DstId, int TypeOrHotness) {
- // 0 corresponds to alias edge, 1 to ref edge, 2 to call with unknown
- // hotness, ...
- TypeOrHotness += 2;
+ auto DrawEdge = [&](const char *Pfx, uint64_t SrcMod, GlobalValue::GUID SrcId,
+ uint64_t DstMod, GlobalValue::GUID DstId,
+ int TypeOrHotness) {
+ // 0 - alias
+ // 1 - reference
+ // 2 - constant reference
+ // Other value: (hotness - 3).
+ TypeOrHotness += 3;
static const char *EdgeAttrs[] = {
" [style=dotted]; // alias",
" [style=dashed]; // ref",
+ " [style=dashed,color=forestgreen]; // const-ref",
" // call (hotness : Unknown)",
" [color=blue]; // call (hotness : Cold)",
" // call (hotness : None)",
@@ -291,6 +407,8 @@ void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
A.add("shape", "box");
} else {
A.add("shape", "Mrecord", "variable");
+ if (Flags.Live && hasReadOnlyFlag(SummaryIt.second))
+ A.addComment("immutable");
}
auto VI = getValueInfo(SummaryIt.first);
@@ -308,13 +426,20 @@ void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
for (auto &SummaryIt : GVSMap) {
auto *GVS = SummaryIt.second;
for (auto &R : GVS->refs())
- Draw(SummaryIt.first, R.getGUID(), -1);
+ Draw(SummaryIt.first, R.getGUID(), R.isReadOnly() ? -1 : -2);
if (auto *AS = dyn_cast_or_null<AliasSummary>(SummaryIt.second)) {
- auto AliaseeOrigId = AS->getAliasee().getOriginalName();
- auto AliaseeId = getGUIDFromOriginalID(AliaseeOrigId);
-
- Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -2);
+ GlobalValue::GUID AliaseeId;
+ if (AS->hasAliaseeGUID())
+ AliaseeId = AS->getAliaseeGUID();
+ else {
+ auto AliaseeOrigId = AS->getAliasee().getOriginalName();
+ AliaseeId = getGUIDFromOriginalID(AliaseeOrigId);
+ if (!AliaseeId)
+ AliaseeId = AliaseeOrigId;
+ }
+
+ Draw(SummaryIt.first, AliaseeId, -3);
continue;
}
@@ -330,7 +455,7 @@ void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
for (auto &E : CrossModuleEdges) {
auto &ModList = NodeMap[E.Dst];
if (ModList.empty()) {
- defineExternalNode(OS, " ", getValueInfo(E.Dst));
+ defineExternalNode(OS, " ", getValueInfo(E.Dst), E.Dst);
// Add fake module to the list to draw an edge to an external node
// in the loop below.
ModList.push_back(-1);
diff --git a/contrib/llvm/lib/IR/PassInstrumentation.cpp b/contrib/llvm/lib/IR/PassInstrumentation.cpp
new file mode 100644
index 000000000000..5aa2bc6d895e
--- /dev/null
+++ b/contrib/llvm/lib/IR/PassInstrumentation.cpp
@@ -0,0 +1,22 @@
+//===- PassInstrumentation.cpp - Pass Instrumentation interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the implementation of PassInstrumentation class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+AnalysisKey PassInstrumentationAnalysis::Key;
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/IR/PassTimingInfo.cpp b/contrib/llvm/lib/IR/PassTimingInfo.cpp
new file mode 100644
index 000000000000..40b3977ecbd9
--- /dev/null
+++ b/contrib/llvm/lib/IR/PassTimingInfo.cpp
@@ -0,0 +1,268 @@
+//===- PassTimingInfo.cpp - LLVM Pass Timing Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass Timing infrastructure for both
+// new and legacy pass managers.
+//
+// PassTimingInfo Class - This class is used to calculate information about the
+// amount of time each pass takes to execute. This only happens when
+// -time-passes is enabled on the command line.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "time-passes"
+
+namespace llvm {
+
+bool TimePassesIsEnabled = false;
+
+static cl::opt<bool, true> EnableTiming(
+ "time-passes", cl::location(TimePassesIsEnabled), cl::Hidden,
+ cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+namespace {
+namespace legacy {
+
+//===----------------------------------------------------------------------===//
+// Legacy pass manager's PassTimingInfo implementation
+
+/// Provides an interface for collecting pass timing information.
+///
+/// It was intended to be generic but now we decided to split
+/// interfaces completely. This is now exclusively for legacy-pass-manager use.
+class PassTimingInfo {
+public:
+ using PassInstanceID = void *;
+
+private:
+ StringMap<unsigned> PassIDCountMap; ///< Map that counts instances of passes
+ DenseMap<PassInstanceID, std::unique_ptr<Timer>> TimingData; ///< timers for pass instances
+ TimerGroup TG;
+
+public:
+ /// Default constructor for yet-inactive timeinfo.
+ /// Use \p init() to activate it.
+ PassTimingInfo();
+
+ /// Print out timing information and release timers.
+ ~PassTimingInfo();
+
+ /// Initializes the static \p TheTimeInfo member to a non-null value when
+ /// -time-passes is enabled. Leaves it null otherwise.
+ ///
+ /// This method may be called multiple times.
+ static void init();
+
+ /// Prints out timing information and then resets the timers.
+ void print();
+
+ /// Returns the timer for the specified pass if it exists.
+ Timer *getPassTimer(Pass *, PassInstanceID);
+
+ static PassTimingInfo *TheTimeInfo;
+
+private:
+ Timer *newPassTimer(StringRef PassID, StringRef PassDesc);
+};
+
+static ManagedStatic<sys::SmartMutex<true>> TimingInfoMutex;
+
+PassTimingInfo::PassTimingInfo()
+ : TG("pass", "... Pass execution timing report ...") {}
+
+PassTimingInfo::~PassTimingInfo() {
+ // Deleting the timers accumulates their info into the TG member.
+ // Then TG member is (implicitly) deleted, actually printing the report.
+ TimingData.clear();
+}
+
+void PassTimingInfo::init() {
+ if (!TimePassesIsEnabled || TheTimeInfo)
+ return;
+
+ // Constructed the first time this is called, iff -time-passes is enabled.
+ // This guarantees that the object will be constructed after static globals,
+ // thus it will be destroyed before them.
+ static ManagedStatic<PassTimingInfo> TTI;
+ TheTimeInfo = &*TTI;
+}
+
+/// Prints out timing information and then resets the timers.
+void PassTimingInfo::print() { TG.print(*CreateInfoOutputFile()); }
+
+Timer *PassTimingInfo::newPassTimer(StringRef PassID, StringRef PassDesc) {
+ unsigned &num = PassIDCountMap[PassID];
+ num++;
+ // Appending description with a pass-instance number for all but the first one
+ std::string PassDescNumbered =
+ num <= 1 ? PassDesc.str() : formatv("{0} #{1}", PassDesc, num).str();
+ return new Timer(PassID, PassDescNumbered, TG);
+}
+
+Timer *PassTimingInfo::getPassTimer(Pass *P, PassInstanceID Pass) {
+ if (P->getAsPMDataManager())
+ return nullptr;
+
+ init();
+ sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
+ std::unique_ptr<Timer> &T = TimingData[Pass];
+
+ if (!T) {
+ StringRef PassName = P->getPassName();
+ StringRef PassArgument;
+ if (const PassInfo *PI = Pass::lookupPassInfo(P->getPassID()))
+ PassArgument = PI->getPassArgument();
+ T.reset(newPassTimer(PassArgument.empty() ? PassName : PassArgument, PassName));
+ }
+ return T.get();
+}
+
+PassTimingInfo *PassTimingInfo::TheTimeInfo;
+} // namespace legacy
+} // namespace
+
+Timer *getPassTimer(Pass *P) {
+ legacy::PassTimingInfo::init();
+ if (legacy::PassTimingInfo::TheTimeInfo)
+ return legacy::PassTimingInfo::TheTimeInfo->getPassTimer(P, P);
+ return nullptr;
+}
+
+/// If timing is enabled, report the times collected up to now and then reset
+/// them.
+void reportAndResetTimings() {
+ if (legacy::PassTimingInfo::TheTimeInfo)
+ legacy::PassTimingInfo::TheTimeInfo->print();
+}
+
+//===----------------------------------------------------------------------===//
+// Pass timing handling for the New Pass Manager
+//===----------------------------------------------------------------------===//
+
+/// Returns the timer for the specified pass invocation of \p PassID.
+/// Each time it creates a new timer.
+Timer &TimePassesHandler::getPassTimer(StringRef PassID) {
+ // Bump counts for each request of the timer.
+ unsigned Count = nextPassID(PassID);
+
+ // Unconditionally appending description with a pass-invocation number.
+ std::string FullDesc = formatv("{0} #{1}", PassID, Count).str();
+
+ PassInvocationID UID{PassID, Count};
+ Timer *T = new Timer(PassID, FullDesc, TG);
+ auto Pair = TimingData.try_emplace(UID, T);
+ assert(Pair.second && "should always create a new timer");
+ return *(Pair.first->second.get());
+}
+
+TimePassesHandler::TimePassesHandler(bool Enabled)
+ : TG("pass", "... Pass execution timing report ..."), Enabled(Enabled) {}
+
+void TimePassesHandler::print() { TG.print(*CreateInfoOutputFile()); }
+
+LLVM_DUMP_METHOD void TimePassesHandler::dump() const {
+ dbgs() << "Dumping timers for " << getTypeName<TimePassesHandler>()
+ << ":\n\tRunning:\n";
+ for (auto &I : TimingData) {
+ const Timer *MyTimer = I.second.get();
+ if (!MyTimer || MyTimer->isRunning())
+ dbgs() << "\tTimer " << MyTimer << " for pass " << I.first.first << "("
+ << I.first.second << ")\n";
+ }
+ dbgs() << "\tTriggered:\n";
+ for (auto &I : TimingData) {
+ const Timer *MyTimer = I.second.get();
+ if (!MyTimer || (MyTimer->hasTriggered() && !MyTimer->isRunning()))
+ dbgs() << "\tTimer " << MyTimer << " for pass " << I.first.first << "("
+ << I.first.second << ")\n";
+ }
+}
+
+void TimePassesHandler::startTimer(StringRef PassID) {
+ Timer &MyTimer = getPassTimer(PassID);
+ TimerStack.push_back(&MyTimer);
+ if (!MyTimer.isRunning())
+ MyTimer.startTimer();
+}
+
+void TimePassesHandler::stopTimer(StringRef PassID) {
+ assert(TimerStack.size() > 0 && "empty stack in popTimer");
+ Timer *MyTimer = TimerStack.pop_back_val();
+ assert(MyTimer && "timer should be present");
+ if (MyTimer->isRunning())
+ MyTimer->stopTimer();
+}
+
+static bool matchPassManager(StringRef PassID) {
+ size_t prefix_pos = PassID.find('<');
+ if (prefix_pos == StringRef::npos)
+ return false;
+ StringRef Prefix = PassID.substr(0, prefix_pos);
+ return Prefix.endswith("PassManager") || Prefix.endswith("PassAdaptor") ||
+ Prefix.endswith("AnalysisManagerProxy");
+}
+
+bool TimePassesHandler::runBeforePass(StringRef PassID) {
+ if (matchPassManager(PassID))
+ return true;
+
+ startTimer(PassID);
+
+ LLVM_DEBUG(dbgs() << "after runBeforePass(" << PassID << ")\n");
+ LLVM_DEBUG(dump());
+
+ // we are not going to skip this pass, thus return true.
+ return true;
+}
+
+void TimePassesHandler::runAfterPass(StringRef PassID) {
+ if (matchPassManager(PassID))
+ return;
+
+ stopTimer(PassID);
+
+ LLVM_DEBUG(dbgs() << "after runAfterPass(" << PassID << ")\n");
+ LLVM_DEBUG(dump());
+}
+
+void TimePassesHandler::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+ if (!Enabled)
+ return;
+
+ PIC.registerBeforePassCallback(
+ [this](StringRef P, Any) { return this->runBeforePass(P); });
+ PIC.registerAfterPassCallback(
+ [this](StringRef P, Any) { this->runAfterPass(P); });
+ PIC.registerAfterPassInvalidatedCallback(
+ [this](StringRef P) { this->runAfterPass(P); });
+ PIC.registerBeforeAnalysisCallback(
+ [this](StringRef P, Any) { this->runBeforePass(P); });
+ PIC.registerAfterAnalysisCallback(
+ [this](StringRef P, Any) { this->runAfterPass(P); });
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/IR/SafepointIRVerifier.cpp b/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
index 6f73126be738..12ada1320225 100644
--- a/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -92,6 +92,7 @@ public:
Listed = true;
}
}
+ (void)Listed;
assert(Listed && "basic block is not found among incoming blocks");
return false;
}
@@ -133,7 +134,7 @@ public:
// Top-down walk of the dominator tree
ReversePostOrderTraversal<const Function *> RPOT(&F);
for (const BasicBlock *BB : RPOT) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
assert(TI && "blocks must be well formed");
// For conditional branches, we can perform simple conditional propagation on
@@ -256,8 +257,7 @@ static bool containsGCPtrType(Type *Ty) {
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
- return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
- containsGCPtrType);
+ return llvm::any_of(ST->elements(), containsGCPtrType);
return false;
}
diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp
index 83016496ff7e..0fb079c5ab73 100644
--- a/contrib/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm/lib/IR/Type.cpp
@@ -297,20 +297,26 @@ FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
FunctionType *FunctionType::get(Type *ReturnType,
ArrayRef<Type*> Params, bool isVarArg) {
LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
- FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
- auto I = pImpl->FunctionTypes.find_as(Key);
+ const FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
FunctionType *FT;
-
- if (I == pImpl->FunctionTypes.end()) {
+ // Since we only want to allocate a fresh function type in case none is found
+ // and we don't want to perform two lookups (one for checking if existent and
+ // one for inserting the newly allocated one), here we instead lookup based on
+ // Key and update the reference to the function type in-place to a newly
+ // allocated one if not found.
+ auto Insertion = pImpl->FunctionTypes.insert_as(nullptr, Key);
+ if (Insertion.second) {
+ // The function type was not found. Allocate one and update FunctionTypes
+ // in-place.
FT = (FunctionType *)pImpl->TypeAllocator.Allocate(
sizeof(FunctionType) + sizeof(Type *) * (Params.size() + 1),
alignof(FunctionType));
new (FT) FunctionType(ReturnType, Params, isVarArg);
- pImpl->FunctionTypes.insert(FT);
+ *Insertion.first = FT;
} else {
- FT = *I;
+ // The function type was found. Just return it.
+ FT = *Insertion.first;
}
-
return FT;
}
@@ -336,18 +342,25 @@ bool FunctionType::isValidArgumentType(Type *ArgTy) {
StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
bool isPacked) {
LLVMContextImpl *pImpl = Context.pImpl;
- AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
- auto I = pImpl->AnonStructTypes.find_as(Key);
- StructType *ST;
+ const AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
- if (I == pImpl->AnonStructTypes.end()) {
- // Value not found. Create a new type!
+ StructType *ST;
+ // Since we only want to allocate a fresh struct type in case none is found
+ // and we don't want to perform two lookups (one for checking if existent and
+ // one for inserting the newly allocated one), here we instead lookup based on
+ // Key and update the reference to the struct type in-place to a newly
+ // allocated one if not found.
+ auto Insertion = pImpl->AnonStructTypes.insert_as(nullptr, Key);
+ if (Insertion.second) {
+ // The struct type was not found. Allocate one and update AnonStructTypes
+ // in-place.
ST = new (Context.pImpl->TypeAllocator) StructType(Context);
ST->setSubclassData(SCDB_IsLiteral); // Literal struct.
ST->setBody(ETypes, isPacked);
- Context.pImpl->AnonStructTypes.insert(ST);
+ *Insertion.first = ST;
} else {
- ST = *I;
+ // The struct type was found. Just return it.
+ ST = *Insertion.first;
}
return ST;
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
index 295d6ecf0db0..80b993c89f7f 100644
--- a/contrib/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -130,20 +129,11 @@ void Value::destroyValueName() {
}
bool Value::hasNUses(unsigned N) const {
- const_use_iterator UI = use_begin(), E = use_end();
-
- for (; N; --N, ++UI)
- if (UI == E) return false; // Too few.
- return UI == E;
+ return hasNItems(use_begin(), use_end(), N);
}
bool Value::hasNUsesOrMore(unsigned N) const {
- const_use_iterator UI = use_begin(), E = use_end();
-
- for (; N; --N, ++UI)
- if (UI == E) return false; // Too few.
-
- return true;
+ return hasNItemsOrMore(use_begin(), use_end(), N);
}
bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
@@ -405,7 +395,7 @@ static bool contains(Value *Expr, Value *V) {
}
#endif // NDEBUG
-void Value::doRAUW(Value *New, bool NoMetadata) {
+void Value::doRAUW(Value *New, ReplaceMetadataUses ReplaceMetaUses) {
assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
assert(!contains(New, this) &&
"this->replaceAllUsesWith(expr(this)) is NOT valid!");
@@ -415,7 +405,7 @@ void Value::doRAUW(Value *New, bool NoMetadata) {
// Notify all ValueHandles (if present) that this value is going away.
if (HasValueHandle)
ValueHandleBase::ValueIsRAUWd(this, New);
- if (!NoMetadata && isUsedByMetadata())
+ if (ReplaceMetaUses == ReplaceMetadataUses::Yes && isUsedByMetadata())
ValueAsMetadata::handleRAUW(this, New);
while (!materialized_use_empty()) {
@@ -437,11 +427,11 @@ void Value::doRAUW(Value *New, bool NoMetadata) {
}
void Value::replaceAllUsesWith(Value *New) {
- doRAUW(New, false /* NoMetadata */);
+ doRAUW(New, ReplaceMetadataUses::Yes);
}
void Value::replaceNonMetadataUsesWith(Value *New) {
- doRAUW(New, true /* NoMetadata */);
+ doRAUW(New, ReplaceMetadataUses::No);
}
// Like replaceAllUsesWith except it does not handle constants or basic blocks.
@@ -512,8 +502,8 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
return V;
V = GA->getAliasee();
} else {
- if (auto CS = ImmutableCallSite(V)) {
- if (const Value *RV = CS.getReturnedArgOperand()) {
+ if (const auto *Call = dyn_cast<CallBase>(V)) {
+ if (const Value *RV = Call->getReturnedArgOperand()) {
V = RV;
continue;
}
@@ -521,9 +511,9 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
// but it can't be marked with returned attribute, that's why it needs
// special case.
if (StripKind == PSK_ZeroIndicesAndAliasesAndInvariantGroups &&
- (CS.getIntrinsicID() == Intrinsic::launder_invariant_group ||
- CS.getIntrinsicID() == Intrinsic::strip_invariant_group)) {
- V = CS.getArgOperand(0);
+ (Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
+ Call->getIntrinsicID() == Intrinsic::strip_invariant_group)) {
+ V = Call->getArgOperand(0);
continue;
}
}
@@ -582,8 +572,8 @@ Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
} else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
V = GA->getAliasee();
} else {
- if (auto CS = ImmutableCallSite(V))
- if (const Value *RV = CS.getReturnedArgOperand()) {
+ if (const auto *Call = dyn_cast<CallBase>(V))
+ if (const Value *RV = Call->getReturnedArgOperand()) {
V = RV;
continue;
}
@@ -617,10 +607,11 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
DerefBytes = A->getDereferenceableOrNullBytes();
CanBeNull = true;
}
- } else if (auto CS = ImmutableCallSite(this)) {
- DerefBytes = CS.getDereferenceableBytes(AttributeList::ReturnIndex);
+ } else if (const auto *Call = dyn_cast<CallBase>(this)) {
+ DerefBytes = Call->getDereferenceableBytes(AttributeList::ReturnIndex);
if (DerefBytes == 0) {
- DerefBytes = CS.getDereferenceableOrNullBytes(AttributeList::ReturnIndex);
+ DerefBytes =
+ Call->getDereferenceableOrNullBytes(AttributeList::ReturnIndex);
CanBeNull = true;
}
} else if (const LoadInst *LI = dyn_cast<LoadInst>(this)) {
@@ -692,8 +683,8 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const {
if (AllocatedType->isSized())
Align = DL.getPrefTypeAlignment(AllocatedType);
}
- } else if (auto CS = ImmutableCallSite(this))
- Align = CS.getAttributes().getRetAlignment();
+ } else if (const auto *Call = dyn_cast<CallBase>(this))
+ Align = Call->getAttributes().getRetAlignment();
else if (const LoadInst *LI = dyn_cast<LoadInst>(this))
if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index e5231bb78a36..30e77b92009f 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -65,7 +65,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
@@ -140,21 +139,20 @@ private:
}
void Write(const Value *V) {
- if (!V)
- return;
+ if (V)
+ Write(*V);
+ }
+
+ void Write(const Value &V) {
if (isa<Instruction>(V)) {
- V->print(*OS, MST);
+ V.print(*OS, MST);
*OS << '\n';
} else {
- V->printAsOperand(*OS, true, MST);
+ V.printAsOperand(*OS, true, MST);
*OS << '\n';
}
}
- void Write(ImmutableCallSite CS) {
- Write(CS.getInstruction());
- }
-
void Write(const Metadata *MD) {
if (!MD)
return;
@@ -281,13 +279,16 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// Whether the current function has a DISubprogram attached to it.
bool HasDebugInfo = false;
+ /// Whether source was present on the first DIFile encountered in each CU.
+ DenseMap<const DICompileUnit *, bool> HasSourceDebugInfo;
+
/// Stores the count of how many objects were passed to llvm.localescape for a
/// given function and the largest index passed to llvm.localrecover.
DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
// Maps catchswitches and cleanuppads that unwind to siblings to the
// terminators that indicate the unwind, used to detect cycles therein.
- MapVector<Instruction *, TerminatorInst *> SiblingFuncletInfo;
+ MapVector<Instruction *, Instruction *> SiblingFuncletInfo;
/// Cache of constants visited in search of ConstantExprs.
SmallPtrSet<const Constant *, 32> ConstantExprVisited;
@@ -383,6 +384,7 @@ public:
visitModuleFlags(M);
visitModuleIdents(M);
+ visitModuleCommandLines(M);
verifyCompileUnits();
@@ -405,6 +407,7 @@ private:
void visitValueAsMetadata(const ValueAsMetadata &MD, Function *F);
void visitComdat(const Comdat &C);
void visitModuleIdents(const Module &M);
+ void visitModuleCommandLines(const Module &M);
void visitModuleFlags(const Module &M);
void visitModuleFlag(const MDNode *Op,
DenseMap<const MDString *, const MDNode *> &SeenIDs,
@@ -443,6 +446,8 @@ private:
void visitBitCastInst(BitCastInst &I);
void visitAddrSpaceCastInst(AddrSpaceCastInst &I);
void visitPHINode(PHINode &PN);
+ void visitCallBase(CallBase &Call);
+ void visitUnaryOperator(UnaryOperator &U);
void visitBinaryOperator(BinaryOperator &B);
void visitICmpInst(ICmpInst &IC);
void visitFCmpInst(FCmpInst &FC);
@@ -457,7 +462,7 @@ private:
void visitStoreInst(StoreInst &SI);
void verifyDominatesUse(Instruction &I, unsigned i);
void visitInstruction(Instruction &I);
- void visitTerminatorInst(TerminatorInst &I);
+ void visitTerminator(Instruction &I);
void visitBranchInst(BranchInst &BI);
void visitReturnInst(ReturnInst &RI);
void visitSwitchInst(SwitchInst &SI);
@@ -465,9 +470,9 @@ private:
void visitSelectInst(SelectInst &SI);
void visitUserOp1(Instruction &I);
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
- void visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS);
+ void visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call);
void visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI);
- void visitDbgIntrinsic(StringRef Kind, DbgInfoIntrinsic &DII);
+ void visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII);
void visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI);
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
void visitAtomicRMWInst(AtomicRMWInst &RMWI);
@@ -485,8 +490,7 @@ private:
void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch);
void visitCleanupReturnInst(CleanupReturnInst &CRI);
- void verifyCallSite(CallSite CS);
- void verifySwiftErrorCallSite(CallSite CS, const Value *SwiftErrorVal);
+ void verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal);
void verifySwiftErrorValue(const Value *SwiftErrorVal);
void verifyMustTailCall(CallInst &CI);
bool performTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT,
@@ -501,16 +505,16 @@ private:
void visitConstantExprsRecursively(const Constant *EntryC);
void visitConstantExpr(const ConstantExpr *CE);
- void verifyStatepoint(ImmutableCallSite CS);
+ void verifyStatepoint(const CallBase &Call);
void verifyFrameRecoverIndices();
void verifySiblingFuncletUnwinds();
- void verifyFragmentExpression(const DbgInfoIntrinsic &I);
+ void verifyFragmentExpression(const DbgVariableIntrinsic &I);
template <typename ValueOrMetadata>
void verifyFragmentExpression(const DIVariable &V,
DIExpression::FragmentInfo Fragment,
ValueOrMetadata *Desc);
- void verifyFnArgs(const DbgInfoIntrinsic &I);
+ void verifyFnArgs(const DbgVariableIntrinsic &I);
/// Module-level debug info verification...
void verifyCompileUnits();
@@ -518,6 +522,9 @@ private:
/// Module-level verification that all @llvm.experimental.deoptimize
/// declarations share the same calling convention.
void verifyDeoptimizeCallingConvs();
+
+ /// Verify all-or-nothing property of DIFile source attribute within a CU.
+ void verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F);
};
} // end anonymous namespace
@@ -632,7 +639,8 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getValueType())) {
StructType *STy = dyn_cast<StructType>(ATy->getElementType());
PointerType *FuncPtrTy =
- FunctionType::get(Type::getVoidTy(Context), false)->getPointerTo();
+ FunctionType::get(Type::getVoidTy(Context), false)->
+ getPointerTo(DL.getProgramAddressSpace());
// FIXME: Reject the 2-field form in LLVM 4.0.
Assert(STy &&
(STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
@@ -886,6 +894,8 @@ void Verifier::visitDIBasicType(const DIBasicType &N) {
AssertDI(N.getTag() == dwarf::DW_TAG_base_type ||
N.getTag() == dwarf::DW_TAG_unspecified_type,
"invalid tag", &N);
+ AssertDI(!(N.isBigEndian() && N.isLittleEndian()) ,
+ "has conflicting flags", &N);
}
void Verifier::visitDIDerivedType(const DIDerivedType &N) {
@@ -1028,6 +1038,8 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) {
AssertDI(!N.getFile()->getFilename().empty(), "invalid filename", &N,
N.getFile());
+ verifySourceDebugInfo(N, *N.getFile());
+
AssertDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind),
"invalid emission kind", &N);
@@ -1105,6 +1117,8 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
AssertDI(N.isDistinct(), "subprogram definitions must be distinct", &N);
AssertDI(Unit, "subprogram definitions must have a compile unit", &N);
AssertDI(isa<DICompileUnit>(Unit), "invalid unit type", &N, Unit);
+ if (N.getFile())
+ verifySourceDebugInfo(*N.getUnit(), *N.getFile());
} else {
// Subprogram declarations (part of the type hierarchy).
AssertDI(!Unit, "subprogram declarations must not have a compile unit", &N);
@@ -1117,6 +1131,10 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
AssertDI(Op && isa<DIType>(Op), "invalid thrown type", &N, ThrownTypes,
Op);
}
+
+ if (N.areAllCallsDescribed())
+ AssertDI(N.isDefinition(),
+ "DIFlagAllCallsDescribed must be attached to a definition");
}
void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) {
@@ -1223,6 +1241,8 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) {
AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
"local variable requires a valid scope", &N, N.getRawScope());
+ if (auto Ty = N.getType())
+ AssertDI(!isa<DISubroutineType>(Ty), "invalid type", &N, N.getType());
}
void Verifier::visitDILabel(const DILabel &N) {
@@ -1295,6 +1315,24 @@ void Verifier::visitModuleIdents(const Module &M) {
}
}
+void Verifier::visitModuleCommandLines(const Module &M) {
+ const NamedMDNode *CommandLines = M.getNamedMetadata("llvm.commandline");
+ if (!CommandLines)
+ return;
+
+ // llvm.commandline takes a list of metadata entry. Each entry has only one
+ // string. Scan each llvm.commandline entry and make sure that this
+ // requirement is met.
+ for (const MDNode *N : CommandLines->operands()) {
+ Assert(N->getNumOperands() == 1,
+ "incorrect number of operands in llvm.commandline metadata", N);
+ Assert(dyn_cast_or_null<MDString>(N->getOperand(0)),
+ ("invalid value for llvm.commandline metadata entry operand"
+ "(the operand should be a string)"),
+ N->getOperand(0));
+ }
+}
+
void Verifier::visitModuleFlags(const Module &M) {
const NamedMDNode *Flags = M.getModuleFlagsMetadata();
if (!Flags) return;
@@ -1476,6 +1514,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
case Attribute::InaccessibleMemOnly:
case Attribute::InaccessibleMemOrArgMemOnly:
case Attribute::AllocSize:
+ case Attribute::SpeculativeLoadHardening:
case Attribute::Speculatable:
case Attribute::StrictFP:
return true;
@@ -1854,127 +1893,136 @@ bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) {
}
/// Verify that statepoint intrinsic is well formed.
-void Verifier::verifyStatepoint(ImmutableCallSite CS) {
- assert(CS.getCalledFunction() &&
- CS.getCalledFunction()->getIntrinsicID() ==
- Intrinsic::experimental_gc_statepoint);
-
- const Instruction &CI = *CS.getInstruction();
+void Verifier::verifyStatepoint(const CallBase &Call) {
+ assert(Call.getCalledFunction() &&
+ Call.getCalledFunction()->getIntrinsicID() ==
+ Intrinsic::experimental_gc_statepoint);
- Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory() &&
- !CS.onlyAccessesArgMemory(),
+ Assert(!Call.doesNotAccessMemory() && !Call.onlyReadsMemory() &&
+ !Call.onlyAccessesArgMemory(),
"gc.statepoint must read and write all memory to preserve "
"reordering restrictions required by safepoint semantics",
- &CI);
+ Call);
- const Value *IDV = CS.getArgument(0);
+ const Value *IDV = Call.getArgOperand(0);
Assert(isa<ConstantInt>(IDV), "gc.statepoint ID must be a constant integer",
- &CI);
+ Call);
- const Value *NumPatchBytesV = CS.getArgument(1);
+ const Value *NumPatchBytesV = Call.getArgOperand(1);
Assert(isa<ConstantInt>(NumPatchBytesV),
"gc.statepoint number of patchable bytes must be a constant integer",
- &CI);
+ Call);
const int64_t NumPatchBytes =
cast<ConstantInt>(NumPatchBytesV)->getSExtValue();
assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!");
- Assert(NumPatchBytes >= 0, "gc.statepoint number of patchable bytes must be "
- "positive",
- &CI);
+ Assert(NumPatchBytes >= 0,
+ "gc.statepoint number of patchable bytes must be "
+ "positive",
+ Call);
- const Value *Target = CS.getArgument(2);
+ const Value *Target = Call.getArgOperand(2);
auto *PT = dyn_cast<PointerType>(Target->getType());
Assert(PT && PT->getElementType()->isFunctionTy(),
- "gc.statepoint callee must be of function pointer type", &CI, Target);
+ "gc.statepoint callee must be of function pointer type", Call, Target);
FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
- const Value *NumCallArgsV = CS.getArgument(3);
+ const Value *NumCallArgsV = Call.getArgOperand(3);
Assert(isa<ConstantInt>(NumCallArgsV),
"gc.statepoint number of arguments to underlying call "
"must be constant integer",
- &CI);
+ Call);
const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue();
Assert(NumCallArgs >= 0,
"gc.statepoint number of arguments to underlying call "
"must be positive",
- &CI);
+ Call);
const int NumParams = (int)TargetFuncType->getNumParams();
if (TargetFuncType->isVarArg()) {
Assert(NumCallArgs >= NumParams,
- "gc.statepoint mismatch in number of vararg call args", &CI);
+ "gc.statepoint mismatch in number of vararg call args", Call);
// TODO: Remove this limitation
Assert(TargetFuncType->getReturnType()->isVoidTy(),
"gc.statepoint doesn't support wrapping non-void "
"vararg functions yet",
- &CI);
+ Call);
} else
Assert(NumCallArgs == NumParams,
- "gc.statepoint mismatch in number of call args", &CI);
+ "gc.statepoint mismatch in number of call args", Call);
- const Value *FlagsV = CS.getArgument(4);
+ const Value *FlagsV = Call.getArgOperand(4);
Assert(isa<ConstantInt>(FlagsV),
- "gc.statepoint flags must be constant integer", &CI);
+ "gc.statepoint flags must be constant integer", Call);
const uint64_t Flags = cast<ConstantInt>(FlagsV)->getZExtValue();
Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
- "unknown flag used in gc.statepoint flags argument", &CI);
+ "unknown flag used in gc.statepoint flags argument", Call);
// Verify that the types of the call parameter arguments match
// the type of the wrapped callee.
+ AttributeList Attrs = Call.getAttributes();
for (int i = 0; i < NumParams; i++) {
Type *ParamType = TargetFuncType->getParamType(i);
- Type *ArgType = CS.getArgument(5 + i)->getType();
+ Type *ArgType = Call.getArgOperand(5 + i)->getType();
Assert(ArgType == ParamType,
"gc.statepoint call argument does not match wrapped "
"function type",
- &CI);
+ Call);
+
+ if (TargetFuncType->isVarArg()) {
+ AttributeSet ArgAttrs = Attrs.getParamAttributes(5 + i);
+ Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
+ "Attribute 'sret' cannot be used for vararg call arguments!",
+ Call);
+ }
}
const int EndCallArgsInx = 4 + NumCallArgs;
- const Value *NumTransitionArgsV = CS.getArgument(EndCallArgsInx+1);
+ const Value *NumTransitionArgsV = Call.getArgOperand(EndCallArgsInx + 1);
Assert(isa<ConstantInt>(NumTransitionArgsV),
"gc.statepoint number of transition arguments "
"must be constant integer",
- &CI);
+ Call);
const int NumTransitionArgs =
cast<ConstantInt>(NumTransitionArgsV)->getZExtValue();
Assert(NumTransitionArgs >= 0,
- "gc.statepoint number of transition arguments must be positive", &CI);
+ "gc.statepoint number of transition arguments must be positive", Call);
const int EndTransitionArgsInx = EndCallArgsInx + 1 + NumTransitionArgs;
- const Value *NumDeoptArgsV = CS.getArgument(EndTransitionArgsInx+1);
+ const Value *NumDeoptArgsV = Call.getArgOperand(EndTransitionArgsInx + 1);
Assert(isa<ConstantInt>(NumDeoptArgsV),
"gc.statepoint number of deoptimization arguments "
"must be constant integer",
- &CI);
+ Call);
const int NumDeoptArgs = cast<ConstantInt>(NumDeoptArgsV)->getZExtValue();
- Assert(NumDeoptArgs >= 0, "gc.statepoint number of deoptimization arguments "
- "must be positive",
- &CI);
+ Assert(NumDeoptArgs >= 0,
+ "gc.statepoint number of deoptimization arguments "
+ "must be positive",
+ Call);
const int ExpectedNumArgs =
7 + NumCallArgs + NumTransitionArgs + NumDeoptArgs;
- Assert(ExpectedNumArgs <= (int)CS.arg_size(),
- "gc.statepoint too few arguments according to length fields", &CI);
+ Assert(ExpectedNumArgs <= (int)Call.arg_size(),
+ "gc.statepoint too few arguments according to length fields", Call);
// Check that the only uses of this gc.statepoint are gc.result or
// gc.relocate calls which are tied to this statepoint and thus part
// of the same statepoint sequence
- for (const User *U : CI.users()) {
- const CallInst *Call = dyn_cast<const CallInst>(U);
- Assert(Call, "illegal use of statepoint token", &CI, U);
- if (!Call) continue;
- Assert(isa<GCRelocateInst>(Call) || isa<GCResultInst>(Call),
+ for (const User *U : Call.users()) {
+ const CallInst *UserCall = dyn_cast<const CallInst>(U);
+ Assert(UserCall, "illegal use of statepoint token", Call, U);
+ if (!UserCall)
+ continue;
+ Assert(isa<GCRelocateInst>(UserCall) || isa<GCResultInst>(UserCall),
"gc.result or gc.relocate are the only value uses "
"of a gc.statepoint",
- &CI, U);
- if (isa<GCResultInst>(Call)) {
- Assert(Call->getArgOperand(0) == &CI,
- "gc.result connected to wrong gc.statepoint", &CI, Call);
+ Call, U);
+ if (isa<GCResultInst>(UserCall)) {
+ Assert(UserCall->getArgOperand(0) == &Call,
+ "gc.result connected to wrong gc.statepoint", Call, UserCall);
} else if (isa<GCRelocateInst>(Call)) {
- Assert(Call->getArgOperand(0) == &CI,
- "gc.relocate connected to wrong gc.statepoint", &CI, Call);
+ Assert(UserCall->getArgOperand(0) == &Call,
+ "gc.relocate connected to wrong gc.statepoint", Call, UserCall);
}
}
@@ -2001,7 +2049,7 @@ void Verifier::verifyFrameRecoverIndices() {
}
}
-static Instruction *getSuccPad(TerminatorInst *Terminator) {
+static Instruction *getSuccPad(Instruction *Terminator) {
BasicBlock *UnwindDest;
if (auto *II = dyn_cast<InvokeInst>(Terminator))
UnwindDest = II->getUnwindDest();
@@ -2020,7 +2068,7 @@ void Verifier::verifySiblingFuncletUnwinds() {
if (Visited.count(PredPad))
continue;
Active.insert(PredPad);
- TerminatorInst *Terminator = Pair.second;
+ Instruction *Terminator = Pair.second;
do {
Instruction *SuccPad = getSuccPad(Terminator);
if (Active.count(SuccPad)) {
@@ -2029,7 +2077,7 @@ void Verifier::verifySiblingFuncletUnwinds() {
SmallVector<Instruction *, 8> CycleNodes;
do {
CycleNodes.push_back(CyclePad);
- TerminatorInst *CycleTerminator = SiblingFuncletInfo[CyclePad];
+ Instruction *CycleTerminator = SiblingFuncletInfo[CyclePad];
if (CycleTerminator != CyclePad)
CycleNodes.push_back(CycleTerminator);
CyclePad = getSuccPad(CycleTerminator);
@@ -2262,6 +2310,10 @@ void Verifier::visitFunction(const Function &F) {
if (!Seen.insert(DL).second)
continue;
+ Metadata *Parent = DL->getRawScope();
+ AssertDI(Parent && isa<DILocalScope>(Parent),
+ "DILocation's scope must be a DILocalScope", N, &F, &I, DL,
+ Parent);
DILocalScope *Scope = DL->getInlinedAtScope();
if (Scope && !Seen.insert(Scope).second)
continue;
@@ -2293,7 +2345,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
if (isa<PHINode>(BB.front())) {
SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
- llvm::sort(Preds.begin(), Preds.end());
+ llvm::sort(Preds);
for (const PHINode &PN : BB.phis()) {
// Ensure that PHI nodes have at least one entry!
Assert(PN.getNumIncomingValues() != 0,
@@ -2311,7 +2363,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
Values.push_back(
std::make_pair(PN.getIncomingBlock(i), PN.getIncomingValue(i)));
- llvm::sort(Values.begin(), Values.end());
+ llvm::sort(Values);
for (unsigned i = 0, e = Values.size(); i != e; ++i) {
// Check to make sure that if there is more than one entry for a
@@ -2340,7 +2392,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
}
}
-void Verifier::visitTerminatorInst(TerminatorInst &I) {
+void Verifier::visitTerminator(Instruction &I) {
// Ensure that terminators only exist at the end of the basic block.
Assert(&I == I.getParent()->getTerminator(),
"Terminator found in the middle of a basic block!", I.getParent());
@@ -2352,7 +2404,7 @@ void Verifier::visitBranchInst(BranchInst &BI) {
Assert(BI.getCondition()->getType()->isIntegerTy(1),
"Branch condition is not 'i1' type!", &BI, BI.getCondition());
}
- visitTerminatorInst(BI);
+ visitTerminator(BI);
}
void Verifier::visitReturnInst(ReturnInst &RI) {
@@ -2371,7 +2423,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
// Check to make sure that the return value has necessary properties for
// terminators...
- visitTerminatorInst(RI);
+ visitTerminator(RI);
}
void Verifier::visitSwitchInst(SwitchInst &SI) {
@@ -2386,7 +2438,7 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
"Duplicate integer as switch case", &SI, Case.getCaseValue());
}
- visitTerminatorInst(SI);
+ visitTerminator(SI);
}
void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
@@ -2396,7 +2448,7 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
Assert(BI.getDestination(i)->getType()->isLabelTy(),
"Indirectbr destinations must all have pointer type!", &BI);
- visitTerminatorInst(BI);
+ visitTerminator(BI);
}
void Verifier::visitSelectInst(SelectInst &SI) {
@@ -2695,77 +2747,79 @@ void Verifier::visitPHINode(PHINode &PN) {
visitInstruction(PN);
}
-void Verifier::verifyCallSite(CallSite CS) {
- Instruction *I = CS.getInstruction();
-
- Assert(CS.getCalledValue()->getType()->isPointerTy(),
- "Called function must be a pointer!", I);
- PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+void Verifier::visitCallBase(CallBase &Call) {
+ Assert(Call.getCalledValue()->getType()->isPointerTy(),
+ "Called function must be a pointer!", Call);
+ PointerType *FPTy = cast<PointerType>(Call.getCalledValue()->getType());
Assert(FPTy->getElementType()->isFunctionTy(),
- "Called function is not pointer to function type!", I);
+ "Called function is not pointer to function type!", Call);
- Assert(FPTy->getElementType() == CS.getFunctionType(),
- "Called function is not the same type as the call!", I);
+ Assert(FPTy->getElementType() == Call.getFunctionType(),
+ "Called function is not the same type as the call!", Call);
- FunctionType *FTy = CS.getFunctionType();
+ FunctionType *FTy = Call.getFunctionType();
// Verify that the correct number of arguments are being passed
if (FTy->isVarArg())
- Assert(CS.arg_size() >= FTy->getNumParams(),
- "Called function requires more parameters than were provided!", I);
+ Assert(Call.arg_size() >= FTy->getNumParams(),
+ "Called function requires more parameters than were provided!",
+ Call);
else
- Assert(CS.arg_size() == FTy->getNumParams(),
- "Incorrect number of arguments passed to called function!", I);
+ Assert(Call.arg_size() == FTy->getNumParams(),
+ "Incorrect number of arguments passed to called function!", Call);
// Verify that all arguments to the call match the function type.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Assert(CS.getArgument(i)->getType() == FTy->getParamType(i),
+ Assert(Call.getArgOperand(i)->getType() == FTy->getParamType(i),
"Call parameter type does not match function signature!",
- CS.getArgument(i), FTy->getParamType(i), I);
+ Call.getArgOperand(i), FTy->getParamType(i), Call);
- AttributeList Attrs = CS.getAttributes();
+ AttributeList Attrs = Call.getAttributes();
- Assert(verifyAttributeCount(Attrs, CS.arg_size()),
- "Attribute after last parameter!", I);
+ Assert(verifyAttributeCount(Attrs, Call.arg_size()),
+ "Attribute after last parameter!", Call);
if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) {
// Don't allow speculatable on call sites, unless the underlying function
// declaration is also speculatable.
- Function *Callee
- = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ Function *Callee =
+ dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
Assert(Callee && Callee->isSpeculatable(),
- "speculatable attribute may not apply to call sites", I);
+ "speculatable attribute may not apply to call sites", Call);
}
// Verify call attributes.
- verifyFunctionAttrs(FTy, Attrs, I);
+ verifyFunctionAttrs(FTy, Attrs, &Call);
// Conservatively check the inalloca argument.
// We have a bug if we can find that there is an underlying alloca without
// inalloca.
- if (CS.hasInAllocaArgument()) {
- Value *InAllocaArg = CS.getArgument(FTy->getNumParams() - 1);
+ if (Call.hasInAllocaArgument()) {
+ Value *InAllocaArg = Call.getArgOperand(FTy->getNumParams() - 1);
if (auto AI = dyn_cast<AllocaInst>(InAllocaArg->stripInBoundsOffsets()))
Assert(AI->isUsedWithInAlloca(),
- "inalloca argument for call has mismatched alloca", AI, I);
+ "inalloca argument for call has mismatched alloca", AI, Call);
}
// For each argument of the callsite, if it has the swifterror argument,
// make sure the underlying alloca/parameter it comes from has a swifterror as
// well.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- if (CS.paramHasAttr(i, Attribute::SwiftError)) {
- Value *SwiftErrorArg = CS.getArgument(i);
+ if (Call.paramHasAttr(i, Attribute::SwiftError)) {
+ Value *SwiftErrorArg = Call.getArgOperand(i);
if (auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets())) {
Assert(AI->isSwiftError(),
- "swifterror argument for call has mismatched alloca", AI, I);
+ "swifterror argument for call has mismatched alloca", AI, Call);
continue;
}
auto ArgI = dyn_cast<Argument>(SwiftErrorArg);
- Assert(ArgI, "swifterror argument should come from an alloca or parameter", SwiftErrorArg, I);
+ Assert(ArgI,
+ "swifterror argument should come from an alloca or parameter",
+ SwiftErrorArg, Call);
Assert(ArgI->hasSwiftErrorAttr(),
- "swifterror argument for call has mismatched parameter", ArgI, I);
+ "swifterror argument for call has mismatched parameter", ArgI,
+ Call);
}
if (FTy->isVarArg()) {
@@ -2781,90 +2835,97 @@ void Verifier::verifyCallSite(CallSite CS) {
}
// Check attributes on the varargs part.
- for (unsigned Idx = FTy->getNumParams(); Idx < CS.arg_size(); ++Idx) {
- Type *Ty = CS.getArgument(Idx)->getType();
+ for (unsigned Idx = FTy->getNumParams(); Idx < Call.arg_size(); ++Idx) {
+ Type *Ty = Call.getArgOperand(Idx)->getType();
AttributeSet ArgAttrs = Attrs.getParamAttributes(Idx);
- verifyParameterAttrs(ArgAttrs, Ty, I);
+ verifyParameterAttrs(ArgAttrs, Ty, &Call);
if (ArgAttrs.hasAttribute(Attribute::Nest)) {
- Assert(!SawNest, "More than one parameter has attribute nest!", I);
+ Assert(!SawNest, "More than one parameter has attribute nest!", Call);
SawNest = true;
}
if (ArgAttrs.hasAttribute(Attribute::Returned)) {
Assert(!SawReturned, "More than one parameter has attribute returned!",
- I);
+ Call);
Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
"Incompatible argument and return types for 'returned' "
"attribute",
- I);
+ Call);
SawReturned = true;
}
- Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
- "Attribute 'sret' cannot be used for vararg call arguments!", I);
+ // Statepoint intrinsic is vararg but the wrapped function may be not.
+ // Allow sret here and check the wrapped function in verifyStatepoint.
+ if (!Call.getCalledFunction() ||
+ Call.getCalledFunction()->getIntrinsicID() !=
+ Intrinsic::experimental_gc_statepoint)
+ Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
+ "Attribute 'sret' cannot be used for vararg call arguments!",
+ Call);
if (ArgAttrs.hasAttribute(Attribute::InAlloca))
- Assert(Idx == CS.arg_size() - 1, "inalloca isn't on the last argument!",
- I);
+ Assert(Idx == Call.arg_size() - 1,
+ "inalloca isn't on the last argument!", Call);
}
}
// Verify that there's no metadata unless it's a direct call to an intrinsic.
- if (CS.getCalledFunction() == nullptr ||
- !CS.getCalledFunction()->getName().startswith("llvm.")) {
+ if (!Call.getCalledFunction() ||
+ !Call.getCalledFunction()->getName().startswith("llvm.")) {
for (Type *ParamTy : FTy->params()) {
Assert(!ParamTy->isMetadataTy(),
- "Function has metadata parameter but isn't an intrinsic", I);
+ "Function has metadata parameter but isn't an intrinsic", Call);
Assert(!ParamTy->isTokenTy(),
- "Function has token parameter but isn't an intrinsic", I);
+ "Function has token parameter but isn't an intrinsic", Call);
}
}
// Verify that indirect calls don't return tokens.
- if (CS.getCalledFunction() == nullptr)
+ if (!Call.getCalledFunction())
Assert(!FTy->getReturnType()->isTokenTy(),
"Return type cannot be token for indirect call!");
- if (Function *F = CS.getCalledFunction())
+ if (Function *F = Call.getCalledFunction())
if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
- visitIntrinsicCallSite(ID, CS);
+ visitIntrinsicCall(ID, Call);
// Verify that a callsite has at most one "deopt", at most one "funclet" and
// at most one "gc-transition" operand bundle.
bool FoundDeoptBundle = false, FoundFuncletBundle = false,
FoundGCTransitionBundle = false;
- for (unsigned i = 0, e = CS.getNumOperandBundles(); i < e; ++i) {
- OperandBundleUse BU = CS.getOperandBundleAt(i);
+ for (unsigned i = 0, e = Call.getNumOperandBundles(); i < e; ++i) {
+ OperandBundleUse BU = Call.getOperandBundleAt(i);
uint32_t Tag = BU.getTagID();
if (Tag == LLVMContext::OB_deopt) {
- Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", I);
+ Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", Call);
FoundDeoptBundle = true;
} else if (Tag == LLVMContext::OB_gc_transition) {
Assert(!FoundGCTransitionBundle, "Multiple gc-transition operand bundles",
- I);
+ Call);
FoundGCTransitionBundle = true;
} else if (Tag == LLVMContext::OB_funclet) {
- Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", I);
+ Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", Call);
FoundFuncletBundle = true;
Assert(BU.Inputs.size() == 1,
- "Expected exactly one funclet bundle operand", I);
+ "Expected exactly one funclet bundle operand", Call);
Assert(isa<FuncletPadInst>(BU.Inputs.front()),
"Funclet bundle operands should correspond to a FuncletPadInst",
- I);
+ Call);
}
}
// Verify that each inlinable callsite of a debug-info-bearing function in a
// debug-info-bearing function has a debug location attached to it. Failure to
// do so causes assertion failures when the inliner sets up inline scope info.
- if (I->getFunction()->getSubprogram() && CS.getCalledFunction() &&
- CS.getCalledFunction()->getSubprogram())
- AssertDI(I->getDebugLoc(), "inlinable function call in a function with "
- "debug info must have a !dbg location",
- I);
+ if (Call.getFunction()->getSubprogram() && Call.getCalledFunction() &&
+ Call.getCalledFunction()->getSubprogram())
+ AssertDI(Call.getDebugLoc(),
+ "inlinable function call in a function with "
+ "debug info must have a !dbg location",
+ Call);
- visitInstruction(*I);
+ visitInstruction(Call);
}
/// Two types are "congruent" if they are identical, or if they are both pointer
@@ -2959,14 +3020,14 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
}
void Verifier::visitCallInst(CallInst &CI) {
- verifyCallSite(&CI);
+ visitCallBase(CI);
if (CI.isMustTailCall())
verifyMustTailCall(CI);
}
void Verifier::visitInvokeInst(InvokeInst &II) {
- verifyCallSite(&II);
+ visitCallBase(II);
// Verify that the first non-PHI instruction of the unwind destination is an
// exception handling instruction.
@@ -2975,7 +3036,29 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
"The unwind destination does not have an exception handling instruction!",
&II);
- visitTerminatorInst(II);
+ visitTerminator(II);
+}
+
+/// visitUnaryOperator - Check the argument to the unary operator.
+///
+void Verifier::visitUnaryOperator(UnaryOperator &U) {
+ Assert(U.getType() == U.getOperand(0)->getType(),
+ "Unary operators must have same type for"
+ "operands and result!",
+ &U);
+
+ switch (U.getOpcode()) {
+ // Check that floating-point arithmetic operators are only used with
+ // floating-point operands.
+ case Instruction::FNeg:
+ Assert(U.getType()->isFPOrFPVectorTy(),
+ "FNeg operator only works with float types!", &U);
+ break;
+ default:
+ llvm_unreachable("Unknown UnaryOperator opcode!");
+ }
+
+ visitInstruction(U);
}
/// visitBinaryOperator - Check that both arguments to the binary operator are
@@ -3131,6 +3214,12 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
"All GEP indices should be of integer type");
}
}
+
+ if (auto *PTy = dyn_cast<PointerType>(GEP.getType())) {
+ Assert(GEP.getAddressSpace() == PTy->getAddressSpace(),
+ "GEP address space doesn't match type", &GEP);
+ }
+
visitInstruction(GEP);
}
@@ -3247,16 +3336,15 @@ void Verifier::visitStoreInst(StoreInst &SI) {
}
/// Check that SwiftErrorVal is used as a swifterror argument in CS.
-void Verifier::verifySwiftErrorCallSite(CallSite CS,
- const Value *SwiftErrorVal) {
+void Verifier::verifySwiftErrorCall(CallBase &Call,
+ const Value *SwiftErrorVal) {
unsigned Idx = 0;
- for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I, ++Idx) {
+ for (auto I = Call.arg_begin(), E = Call.arg_end(); I != E; ++I, ++Idx) {
if (*I == SwiftErrorVal) {
- Assert(CS.paramHasAttr(Idx, Attribute::SwiftError),
+ Assert(Call.paramHasAttr(Idx, Attribute::SwiftError),
"swifterror value when used in a callsite should be marked "
"with swifterror attribute",
- SwiftErrorVal, CS);
+ SwiftErrorVal, Call);
}
}
}
@@ -3275,10 +3363,8 @@ void Verifier::verifySwiftErrorValue(const Value *SwiftErrorVal) {
Assert(StoreI->getOperand(1) == SwiftErrorVal,
"swifterror value should be the second operand when used "
"by stores", SwiftErrorVal, U);
- if (auto CallI = dyn_cast<CallInst>(U))
- verifySwiftErrorCallSite(const_cast<CallInst*>(CallI), SwiftErrorVal);
- if (auto II = dyn_cast<InvokeInst>(U))
- verifySwiftErrorCallSite(const_cast<InvokeInst*>(II), SwiftErrorVal);
+ if (auto *Call = dyn_cast<CallBase>(U))
+ verifySwiftErrorCall(*const_cast<CallBase *>(Call), SwiftErrorVal);
}
}
@@ -3341,17 +3427,19 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
"atomicrmw instructions must be atomic.", &RMWI);
Assert(RMWI.getOrdering() != AtomicOrdering::Unordered,
"atomicrmw instructions cannot be unordered.", &RMWI);
+ auto Op = RMWI.getOperation();
PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
Type *ElTy = PTy->getElementType();
- Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!",
+ Assert(ElTy->isIntegerTy(), "atomicrmw " +
+ AtomicRMWInst::getOperationName(Op) +
+ " operand must have integer type!",
&RMWI, ElTy);
checkAtomicMemAccessSize(ElTy, &RMWI);
Assert(ElTy == RMWI.getOperand(1)->getType(),
"Argument value type does not match pointer operand type!", &RMWI,
ElTy);
- Assert(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
- RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
+ Assert(AtomicRMWInst::FIRST_BINOP <= Op && Op <= AtomicRMWInst::LAST_BINOP,
"Invalid binary operation!", &RMWI);
visitInstruction(RMWI);
}
@@ -3430,7 +3518,7 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
Instruction *ToPad = &I;
Value *ToPadParent = getParentPad(ToPad);
for (BasicBlock *PredBB : predecessors(BB)) {
- TerminatorInst *TI = PredBB->getTerminator();
+ Instruction *TI = PredBB->getTerminator();
Value *FromPad;
if (auto *II = dyn_cast<InvokeInst>(TI)) {
Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB,
@@ -3518,7 +3606,7 @@ void Verifier::visitResumeInst(ResumeInst &RI) {
"inside a function.",
&RI);
- visitTerminatorInst(RI);
+ visitTerminator(RI);
}
void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
@@ -3546,7 +3634,7 @@ void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) {
"CatchReturnInst needs to be provided a CatchPad", &CatchReturn,
CatchReturn.getOperand(0));
- visitTerminatorInst(CatchReturn);
+ visitTerminator(CatchReturn);
}
void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
@@ -3667,7 +3755,7 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
// Record cleanup sibling unwinds for verifySiblingFuncletUnwinds
if (isa<CleanupPadInst>(&FPI) && !isa<ConstantTokenNone>(UnwindPad) &&
getParentPad(UnwindPad) == getParentPad(&FPI))
- SiblingFuncletInfo[&FPI] = cast<TerminatorInst>(U);
+ SiblingFuncletInfo[&FPI] = cast<Instruction>(U);
}
}
// Make sure we visit all uses of FPI, but for nested pads stop as
@@ -3768,7 +3856,7 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
}
visitEHPadPredecessors(CatchSwitch);
- visitTerminatorInst(CatchSwitch);
+ visitTerminator(CatchSwitch);
}
void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) {
@@ -3784,7 +3872,7 @@ void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) {
&CRI);
}
- visitTerminatorInst(CRI);
+ visitTerminator(CRI);
}
void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
@@ -3867,6 +3955,10 @@ void Verifier::visitInstruction(Instruction &I) {
}
}
+ // Get a pointer to the call base of the instruction if it is some form of
+ // call.
+ const CallBase *CBI = dyn_cast<CallBase>(&I);
+
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
Assert(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
@@ -3879,10 +3971,9 @@ void Verifier::visitInstruction(Instruction &I) {
if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
// Check to make sure that the "address of" an intrinsic function is never
// taken.
- Assert(
- !F->isIntrinsic() ||
- i == (isa<CallInst>(I) ? e - 1 : isa<InvokeInst>(I) ? e - 3 : 0),
- "Cannot take the address of an intrinsic!", &I);
+ Assert(!F->isIntrinsic() ||
+ (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)),
+ "Cannot take the address of an intrinsic!", &I);
Assert(
!F->isIntrinsic() || isa<CallInst>(I) ||
F->getIntrinsicID() == Intrinsic::donothing ||
@@ -3908,8 +3999,7 @@ void Verifier::visitInstruction(Instruction &I) {
} else if (isa<Instruction>(I.getOperand(i))) {
verifyDominatesUse(I, i);
} else if (isa<InlineAsm>(I.getOperand(i))) {
- Assert((i + 1 == e && isa<CallInst>(I)) ||
- (i + 3 == e && isa<InvokeInst>(I)),
+ Assert(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i),
"Cannot take the address of an inline asm!", &I);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I.getOperand(i))) {
if (CE->getType()->isPtrOrPtrVectorTy() ||
@@ -3984,15 +4074,15 @@ void Verifier::visitInstruction(Instruction &I) {
visitMDNode(*N);
}
- if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
verifyFragmentExpression(*DII);
InstsInThisBlock.insert(&I);
}
/// Allow intrinsics to be verified in different ways.
-void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
- Function *IF = CS.getCalledFunction();
+void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
+ Function *IF = Call.getCalledFunction();
Assert(IF->isDeclaration(), "Intrinsic functions should never be defined!",
IF);
@@ -4038,15 +4128,15 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
// If the intrinsic takes MDNode arguments, verify that they are either global
// or are local to *this* function.
- for (Value *V : CS.args())
+ for (Value *V : Call.args())
if (auto *MD = dyn_cast<MetadataAsValue>(V))
- visitMetadataAsValue(*MD, CS.getCaller());
+ visitMetadataAsValue(*MD, Call.getCaller());
switch (ID) {
default:
break;
case Intrinsic::coro_id: {
- auto *InfoArg = CS.getArgOperand(3)->stripPointerCasts();
+ auto *InfoArg = Call.getArgOperand(3)->stripPointerCasts();
if (isa<ConstantPointerNull>(InfoArg))
break;
auto *GV = dyn_cast<GlobalVariable>(InfoArg);
@@ -4061,10 +4151,10 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
}
case Intrinsic::ctlz: // llvm.ctlz
case Intrinsic::cttz: // llvm.cttz
- Assert(isa<ConstantInt>(CS.getArgOperand(1)),
+ Assert(isa<ConstantInt>(Call.getArgOperand(1)),
"is_zero_undef argument of bit counting intrinsics must be a "
"constant int",
- CS);
+ Call);
break;
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
@@ -4084,59 +4174,64 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
case Intrinsic::experimental_constrained_log2:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
- visitConstrainedFPIntrinsic(
- cast<ConstrainedFPIntrinsic>(*CS.getInstruction()));
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_trunc:
+ visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(Call));
break;
case Intrinsic::dbg_declare: // llvm.dbg.declare
- Assert(isa<MetadataAsValue>(CS.getArgOperand(0)),
- "invalid llvm.dbg.declare intrinsic call 1", CS);
- visitDbgIntrinsic("declare", cast<DbgInfoIntrinsic>(*CS.getInstruction()));
+ Assert(isa<MetadataAsValue>(Call.getArgOperand(0)),
+ "invalid llvm.dbg.declare intrinsic call 1", Call);
+ visitDbgIntrinsic("declare", cast<DbgVariableIntrinsic>(Call));
break;
case Intrinsic::dbg_addr: // llvm.dbg.addr
- visitDbgIntrinsic("addr", cast<DbgInfoIntrinsic>(*CS.getInstruction()));
+ visitDbgIntrinsic("addr", cast<DbgVariableIntrinsic>(Call));
break;
case Intrinsic::dbg_value: // llvm.dbg.value
- visitDbgIntrinsic("value", cast<DbgInfoIntrinsic>(*CS.getInstruction()));
+ visitDbgIntrinsic("value", cast<DbgVariableIntrinsic>(Call));
break;
case Intrinsic::dbg_label: // llvm.dbg.label
- visitDbgLabelIntrinsic("label", cast<DbgLabelInst>(*CS.getInstruction()));
+ visitDbgLabelIntrinsic("label", cast<DbgLabelInst>(Call));
break;
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset: {
- const auto *MI = cast<MemIntrinsic>(CS.getInstruction());
+ const auto *MI = cast<MemIntrinsic>(&Call);
auto IsValidAlignment = [&](unsigned Alignment) -> bool {
return Alignment == 0 || isPowerOf2_32(Alignment);
};
Assert(IsValidAlignment(MI->getDestAlignment()),
"alignment of arg 0 of memory intrinsic must be 0 or a power of 2",
- CS);
+ Call);
if (const auto *MTI = dyn_cast<MemTransferInst>(MI)) {
Assert(IsValidAlignment(MTI->getSourceAlignment()),
"alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
- CS);
+ Call);
}
- Assert(isa<ConstantInt>(CS.getArgOperand(3)),
+ Assert(isa<ConstantInt>(Call.getArgOperand(3)),
"isvolatile argument of memory intrinsics must be a constant int",
- CS);
+ Call);
break;
}
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic: {
- const auto *AMI = cast<AtomicMemIntrinsic>(CS.getInstruction());
+ const auto *AMI = cast<AtomicMemIntrinsic>(&Call);
ConstantInt *ElementSizeCI =
dyn_cast<ConstantInt>(AMI->getRawElementSizeInBytes());
Assert(ElementSizeCI,
"element size of the element-wise unordered atomic memory "
"intrinsic must be a constant int",
- CS);
+ Call);
const APInt &ElementSizeVal = ElementSizeCI->getValue();
Assert(ElementSizeVal.isPowerOf2(),
"element size of the element-wise atomic memory intrinsic "
"must be a power of 2",
- CS);
+ Call);
if (auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength())) {
uint64_t Length = LengthCI->getZExtValue();
@@ -4144,7 +4239,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
Assert((Length % ElementSize) == 0,
"constant length must be a multiple of the element size in the "
"element-wise atomic memory intrinsic",
- CS);
+ Call);
}
auto IsValidAlignment = [&](uint64_t Alignment) {
@@ -4152,11 +4247,11 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
};
uint64_t DstAlignment = AMI->getDestAlignment();
Assert(IsValidAlignment(DstAlignment),
- "incorrect alignment of the destination argument", CS);
+ "incorrect alignment of the destination argument", Call);
if (const auto *AMT = dyn_cast<AtomicMemTransferInst>(AMI)) {
uint64_t SrcAlignment = AMT->getSourceAlignment();
Assert(IsValidAlignment(SrcAlignment),
- "incorrect alignment of the source argument", CS);
+ "incorrect alignment of the source argument", Call);
}
break;
}
@@ -4165,76 +4260,76 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
case Intrinsic::gcread:
if (ID == Intrinsic::gcroot) {
AllocaInst *AI =
- dyn_cast<AllocaInst>(CS.getArgOperand(0)->stripPointerCasts());
- Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", CS);
- Assert(isa<Constant>(CS.getArgOperand(1)),
- "llvm.gcroot parameter #2 must be a constant.", CS);
+ dyn_cast<AllocaInst>(Call.getArgOperand(0)->stripPointerCasts());
+ Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", Call);
+ Assert(isa<Constant>(Call.getArgOperand(1)),
+ "llvm.gcroot parameter #2 must be a constant.", Call);
if (!AI->getAllocatedType()->isPointerTy()) {
- Assert(!isa<ConstantPointerNull>(CS.getArgOperand(1)),
+ Assert(!isa<ConstantPointerNull>(Call.getArgOperand(1)),
"llvm.gcroot parameter #1 must either be a pointer alloca, "
"or argument #2 must be a non-null constant.",
- CS);
+ Call);
}
}
- Assert(CS.getParent()->getParent()->hasGC(),
- "Enclosing function does not use GC.", CS);
+ Assert(Call.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", Call);
break;
case Intrinsic::init_trampoline:
- Assert(isa<Function>(CS.getArgOperand(1)->stripPointerCasts()),
+ Assert(isa<Function>(Call.getArgOperand(1)->stripPointerCasts()),
"llvm.init_trampoline parameter #2 must resolve to a function.",
- CS);
+ Call);
break;
case Intrinsic::prefetch:
- Assert(isa<ConstantInt>(CS.getArgOperand(1)) &&
- isa<ConstantInt>(CS.getArgOperand(2)) &&
- cast<ConstantInt>(CS.getArgOperand(1))->getZExtValue() < 2 &&
- cast<ConstantInt>(CS.getArgOperand(2))->getZExtValue() < 4,
- "invalid arguments to llvm.prefetch", CS);
+ Assert(isa<ConstantInt>(Call.getArgOperand(1)) &&
+ isa<ConstantInt>(Call.getArgOperand(2)) &&
+ cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
+ cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+ "invalid arguments to llvm.prefetch", Call);
break;
case Intrinsic::stackprotector:
- Assert(isa<AllocaInst>(CS.getArgOperand(1)->stripPointerCasts()),
- "llvm.stackprotector parameter #2 must resolve to an alloca.", CS);
+ Assert(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
+ "llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
break;
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
- Assert(isa<ConstantInt>(CS.getArgOperand(0)),
+ Assert(isa<ConstantInt>(Call.getArgOperand(0)),
"size argument of memory use markers must be a constant integer",
- CS);
+ Call);
break;
case Intrinsic::invariant_end:
- Assert(isa<ConstantInt>(CS.getArgOperand(1)),
- "llvm.invariant.end parameter #2 must be a constant integer", CS);
+ Assert(isa<ConstantInt>(Call.getArgOperand(1)),
+ "llvm.invariant.end parameter #2 must be a constant integer", Call);
break;
case Intrinsic::localescape: {
- BasicBlock *BB = CS.getParent();
+ BasicBlock *BB = Call.getParent();
Assert(BB == &BB->getParent()->front(),
- "llvm.localescape used outside of entry block", CS);
+ "llvm.localescape used outside of entry block", Call);
Assert(!SawFrameEscape,
- "multiple calls to llvm.localescape in one function", CS);
- for (Value *Arg : CS.args()) {
+ "multiple calls to llvm.localescape in one function", Call);
+ for (Value *Arg : Call.args()) {
if (isa<ConstantPointerNull>(Arg))
continue; // Null values are allowed as placeholders.
auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
Assert(AI && AI->isStaticAlloca(),
- "llvm.localescape only accepts static allocas", CS);
+ "llvm.localescape only accepts static allocas", Call);
}
- FrameEscapeInfo[BB->getParent()].first = CS.getNumArgOperands();
+ FrameEscapeInfo[BB->getParent()].first = Call.getNumArgOperands();
SawFrameEscape = true;
break;
}
case Intrinsic::localrecover: {
- Value *FnArg = CS.getArgOperand(0)->stripPointerCasts();
+ Value *FnArg = Call.getArgOperand(0)->stripPointerCasts();
Function *Fn = dyn_cast<Function>(FnArg);
Assert(Fn && !Fn->isDeclaration(),
"llvm.localrecover first "
"argument must be function defined in this module",
- CS);
- auto *IdxArg = dyn_cast<ConstantInt>(CS.getArgOperand(2));
+ Call);
+ auto *IdxArg = dyn_cast<ConstantInt>(Call.getArgOperand(2));
Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int",
- CS);
+ Call);
auto &Entry = FrameEscapeInfo[Fn];
Entry.second = unsigned(
std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
@@ -4242,45 +4337,46 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
}
case Intrinsic::experimental_gc_statepoint:
- Assert(!CS.isInlineAsm(),
- "gc.statepoint support for inline assembly unimplemented", CS);
- Assert(CS.getParent()->getParent()->hasGC(),
- "Enclosing function does not use GC.", CS);
+ if (auto *CI = dyn_cast<CallInst>(&Call))
+ Assert(!CI->isInlineAsm(),
+ "gc.statepoint support for inline assembly unimplemented", CI);
+ Assert(Call.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", Call);
- verifyStatepoint(CS);
+ verifyStatepoint(Call);
break;
case Intrinsic::experimental_gc_result: {
- Assert(CS.getParent()->getParent()->hasGC(),
- "Enclosing function does not use GC.", CS);
+ Assert(Call.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", Call);
// Are we tied to a statepoint properly?
- CallSite StatepointCS(CS.getArgOperand(0));
+ const auto *StatepointCall = dyn_cast<CallBase>(Call.getArgOperand(0));
const Function *StatepointFn =
- StatepointCS.getInstruction() ? StatepointCS.getCalledFunction() : nullptr;
+ StatepointCall ? StatepointCall->getCalledFunction() : nullptr;
Assert(StatepointFn && StatepointFn->isDeclaration() &&
StatepointFn->getIntrinsicID() ==
Intrinsic::experimental_gc_statepoint,
- "gc.result operand #1 must be from a statepoint", CS,
- CS.getArgOperand(0));
+ "gc.result operand #1 must be from a statepoint", Call,
+ Call.getArgOperand(0));
// Assert that result type matches wrapped callee.
- const Value *Target = StatepointCS.getArgument(2);
+ const Value *Target = StatepointCall->getArgOperand(2);
auto *PT = cast<PointerType>(Target->getType());
auto *TargetFuncType = cast<FunctionType>(PT->getElementType());
- Assert(CS.getType() == TargetFuncType->getReturnType(),
- "gc.result result type does not match wrapped callee", CS);
+ Assert(Call.getType() == TargetFuncType->getReturnType(),
+ "gc.result result type does not match wrapped callee", Call);
break;
}
case Intrinsic::experimental_gc_relocate: {
- Assert(CS.getNumArgOperands() == 3, "wrong number of arguments", CS);
+ Assert(Call.getNumArgOperands() == 3, "wrong number of arguments", Call);
- Assert(isa<PointerType>(CS.getType()->getScalarType()),
- "gc.relocate must return a pointer or a vector of pointers", CS);
+ Assert(isa<PointerType>(Call.getType()->getScalarType()),
+ "gc.relocate must return a pointer or a vector of pointers", Call);
// Check that this relocate is correctly tied to the statepoint
// This is case for relocate on the unwinding path of an invoke statepoint
if (LandingPadInst *LandingPad =
- dyn_cast<LandingPadInst>(CS.getArgOperand(0))) {
+ dyn_cast<LandingPadInst>(Call.getArgOperand(0))) {
const BasicBlock *InvokeBB =
LandingPad->getParent()->getUniquePredecessor();
@@ -4293,167 +4389,198 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
InvokeBB);
Assert(isStatepoint(InvokeBB->getTerminator()),
"gc relocate should be linked to a statepoint", InvokeBB);
- }
- else {
+ } else {
// In all other cases relocate should be tied to the statepoint directly.
// This covers relocates on a normal return path of invoke statepoint and
// relocates of a call statepoint.
- auto Token = CS.getArgOperand(0);
+ auto Token = Call.getArgOperand(0);
Assert(isa<Instruction>(Token) && isStatepoint(cast<Instruction>(Token)),
- "gc relocate is incorrectly tied to the statepoint", CS, Token);
+ "gc relocate is incorrectly tied to the statepoint", Call, Token);
}
// Verify rest of the relocate arguments.
-
- ImmutableCallSite StatepointCS(
- cast<GCRelocateInst>(*CS.getInstruction()).getStatepoint());
+ const CallBase &StatepointCall =
+ *cast<CallBase>(cast<GCRelocateInst>(Call).getStatepoint());
// Both the base and derived must be piped through the safepoint.
- Value* Base = CS.getArgOperand(1);
+ Value *Base = Call.getArgOperand(1);
Assert(isa<ConstantInt>(Base),
- "gc.relocate operand #2 must be integer offset", CS);
+ "gc.relocate operand #2 must be integer offset", Call);
- Value* Derived = CS.getArgOperand(2);
+ Value *Derived = Call.getArgOperand(2);
Assert(isa<ConstantInt>(Derived),
- "gc.relocate operand #3 must be integer offset", CS);
+ "gc.relocate operand #3 must be integer offset", Call);
const int BaseIndex = cast<ConstantInt>(Base)->getZExtValue();
const int DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue();
// Check the bounds
- Assert(0 <= BaseIndex && BaseIndex < (int)StatepointCS.arg_size(),
- "gc.relocate: statepoint base index out of bounds", CS);
- Assert(0 <= DerivedIndex && DerivedIndex < (int)StatepointCS.arg_size(),
- "gc.relocate: statepoint derived index out of bounds", CS);
+ Assert(0 <= BaseIndex && BaseIndex < (int)StatepointCall.arg_size(),
+ "gc.relocate: statepoint base index out of bounds", Call);
+ Assert(0 <= DerivedIndex && DerivedIndex < (int)StatepointCall.arg_size(),
+ "gc.relocate: statepoint derived index out of bounds", Call);
// Check that BaseIndex and DerivedIndex fall within the 'gc parameters'
// section of the statepoint's argument.
- Assert(StatepointCS.arg_size() > 0,
+ Assert(StatepointCall.arg_size() > 0,
"gc.statepoint: insufficient arguments");
- Assert(isa<ConstantInt>(StatepointCS.getArgument(3)),
+ Assert(isa<ConstantInt>(StatepointCall.getArgOperand(3)),
"gc.statement: number of call arguments must be constant integer");
const unsigned NumCallArgs =
- cast<ConstantInt>(StatepointCS.getArgument(3))->getZExtValue();
- Assert(StatepointCS.arg_size() > NumCallArgs + 5,
+ cast<ConstantInt>(StatepointCall.getArgOperand(3))->getZExtValue();
+ Assert(StatepointCall.arg_size() > NumCallArgs + 5,
"gc.statepoint: mismatch in number of call arguments");
- Assert(isa<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 5)),
+ Assert(isa<ConstantInt>(StatepointCall.getArgOperand(NumCallArgs + 5)),
"gc.statepoint: number of transition arguments must be "
"a constant integer");
const int NumTransitionArgs =
- cast<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 5))
+ cast<ConstantInt>(StatepointCall.getArgOperand(NumCallArgs + 5))
->getZExtValue();
const int DeoptArgsStart = 4 + NumCallArgs + 1 + NumTransitionArgs + 1;
- Assert(isa<ConstantInt>(StatepointCS.getArgument(DeoptArgsStart)),
+ Assert(isa<ConstantInt>(StatepointCall.getArgOperand(DeoptArgsStart)),
"gc.statepoint: number of deoptimization arguments must be "
"a constant integer");
const int NumDeoptArgs =
- cast<ConstantInt>(StatepointCS.getArgument(DeoptArgsStart))
+ cast<ConstantInt>(StatepointCall.getArgOperand(DeoptArgsStart))
->getZExtValue();
const int GCParamArgsStart = DeoptArgsStart + 1 + NumDeoptArgs;
- const int GCParamArgsEnd = StatepointCS.arg_size();
+ const int GCParamArgsEnd = StatepointCall.arg_size();
Assert(GCParamArgsStart <= BaseIndex && BaseIndex < GCParamArgsEnd,
"gc.relocate: statepoint base index doesn't fall within the "
"'gc parameters' section of the statepoint call",
- CS);
+ Call);
Assert(GCParamArgsStart <= DerivedIndex && DerivedIndex < GCParamArgsEnd,
"gc.relocate: statepoint derived index doesn't fall within the "
"'gc parameters' section of the statepoint call",
- CS);
+ Call);
// Relocated value must be either a pointer type or vector-of-pointer type,
// but gc_relocate does not need to return the same pointer type as the
// relocated pointer. It can be casted to the correct type later if it's
// desired. However, they must have the same address space and 'vectorness'
- GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
+ GCRelocateInst &Relocate = cast<GCRelocateInst>(Call);
Assert(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(),
- "gc.relocate: relocated value must be a gc pointer", CS);
+ "gc.relocate: relocated value must be a gc pointer", Call);
- auto ResultType = CS.getType();
+ auto ResultType = Call.getType();
auto DerivedType = Relocate.getDerivedPtr()->getType();
Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(),
"gc.relocate: vector relocates to vector and pointer to pointer",
- CS);
+ Call);
Assert(
ResultType->getPointerAddressSpace() ==
DerivedType->getPointerAddressSpace(),
"gc.relocate: relocating a pointer shouldn't change its address space",
- CS);
+ Call);
break;
}
case Intrinsic::eh_exceptioncode:
case Intrinsic::eh_exceptionpointer: {
- Assert(isa<CatchPadInst>(CS.getArgOperand(0)),
- "eh.exceptionpointer argument must be a catchpad", CS);
+ Assert(isa<CatchPadInst>(Call.getArgOperand(0)),
+ "eh.exceptionpointer argument must be a catchpad", Call);
break;
}
case Intrinsic::masked_load: {
- Assert(CS.getType()->isVectorTy(), "masked_load: must return a vector", CS);
+ Assert(Call.getType()->isVectorTy(), "masked_load: must return a vector",
+ Call);
- Value *Ptr = CS.getArgOperand(0);
- //Value *Alignment = CS.getArgOperand(1);
- Value *Mask = CS.getArgOperand(2);
- Value *PassThru = CS.getArgOperand(3);
- Assert(Mask->getType()->isVectorTy(),
- "masked_load: mask must be vector", CS);
+ Value *Ptr = Call.getArgOperand(0);
+ // Value *Alignment = Call.getArgOperand(1);
+ Value *Mask = Call.getArgOperand(2);
+ Value *PassThru = Call.getArgOperand(3);
+ Assert(Mask->getType()->isVectorTy(), "masked_load: mask must be vector",
+ Call);
// DataTy is the overloaded type
Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
- Assert(DataTy == CS.getType(),
- "masked_load: return must match pointer type", CS);
+ Assert(DataTy == Call.getType(),
+ "masked_load: return must match pointer type", Call);
Assert(PassThru->getType() == DataTy,
- "masked_load: pass through and data type must match", CS);
+ "masked_load: pass through and data type must match", Call);
Assert(Mask->getType()->getVectorNumElements() ==
- DataTy->getVectorNumElements(),
- "masked_load: vector mask must be same length as data", CS);
+ DataTy->getVectorNumElements(),
+ "masked_load: vector mask must be same length as data", Call);
break;
}
case Intrinsic::masked_store: {
- Value *Val = CS.getArgOperand(0);
- Value *Ptr = CS.getArgOperand(1);
- //Value *Alignment = CS.getArgOperand(2);
- Value *Mask = CS.getArgOperand(3);
- Assert(Mask->getType()->isVectorTy(),
- "masked_store: mask must be vector", CS);
+ Value *Val = Call.getArgOperand(0);
+ Value *Ptr = Call.getArgOperand(1);
+ // Value *Alignment = Call.getArgOperand(2);
+ Value *Mask = Call.getArgOperand(3);
+ Assert(Mask->getType()->isVectorTy(), "masked_store: mask must be vector",
+ Call);
// DataTy is the overloaded type
Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
Assert(DataTy == Val->getType(),
- "masked_store: storee must match pointer type", CS);
+ "masked_store: storee must match pointer type", Call);
Assert(Mask->getType()->getVectorNumElements() ==
- DataTy->getVectorNumElements(),
- "masked_store: vector mask must be same length as data", CS);
+ DataTy->getVectorNumElements(),
+ "masked_store: vector mask must be same length as data", Call);
break;
}
case Intrinsic::experimental_guard: {
- Assert(CS.isCall(), "experimental_guard cannot be invoked", CS);
- Assert(CS.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
+ Assert(isa<CallInst>(Call), "experimental_guard cannot be invoked", Call);
+ Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
"experimental_guard must have exactly one "
"\"deopt\" operand bundle");
break;
}
case Intrinsic::experimental_deoptimize: {
- Assert(CS.isCall(), "experimental_deoptimize cannot be invoked", CS);
- Assert(CS.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
+ Assert(isa<CallInst>(Call), "experimental_deoptimize cannot be invoked",
+ Call);
+ Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
"experimental_deoptimize must have exactly one "
"\"deopt\" operand bundle");
- Assert(CS.getType() == CS.getInstruction()->getFunction()->getReturnType(),
+ Assert(Call.getType() == Call.getFunction()->getReturnType(),
"experimental_deoptimize return type must match caller return type");
- if (CS.isCall()) {
- auto *DeoptCI = CS.getInstruction();
- auto *RI = dyn_cast<ReturnInst>(DeoptCI->getNextNode());
+ if (isa<CallInst>(Call)) {
+ auto *RI = dyn_cast<ReturnInst>(Call.getNextNode());
Assert(RI,
"calls to experimental_deoptimize must be followed by a return");
- if (!CS.getType()->isVoidTy() && RI)
- Assert(RI->getReturnValue() == DeoptCI,
+ if (!Call.getType()->isVoidTy() && RI)
+ Assert(RI->getReturnValue() == &Call,
"calls to experimental_deoptimize must be followed by a return "
"of the value computed by experimental_deoptimize");
}
break;
}
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::usub_sat: {
+ Value *Op1 = Call.getArgOperand(0);
+ Value *Op2 = Call.getArgOperand(1);
+ Assert(Op1->getType()->isIntOrIntVectorTy(),
+ "first operand of [us][add|sub]_sat must be an int type or vector "
+ "of ints");
+ Assert(Op2->getType()->isIntOrIntVectorTy(),
+ "second operand of [us][add|sub]_sat must be an int type or vector "
+ "of ints");
+ break;
+ }
+ case Intrinsic::smul_fix: {
+ Value *Op1 = Call.getArgOperand(0);
+ Value *Op2 = Call.getArgOperand(1);
+ Assert(Op1->getType()->isIntOrIntVectorTy(),
+ "first operand of smul_fix must be an int type or vector "
+ "of ints");
+ Assert(Op2->getType()->isIntOrIntVectorTy(),
+ "second operand of smul_fix must be an int type or vector "
+ "of ints");
+
+ auto *Op3 = dyn_cast<ConstantInt>(Call.getArgOperand(2));
+ Assert(Op3, "third argument of smul_fix must be a constant integer");
+ Assert(Op3->getType()->getBitWidth() <= 32,
+ "third argument of smul_fix must fit within 32 bits");
+ Assert(Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
+ "the scale of smul_fix must be less than the width of the operands");
+ break;
+ }
};
}
@@ -4491,7 +4618,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
"invalid exception behavior argument", &FPI);
}
-void Verifier::visitDbgIntrinsic(StringRef Kind, DbgInfoIntrinsic &DII) {
+void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) {
auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata();
AssertDI(isa<ValueAsMetadata>(MD) ||
(isa<MDNode>(MD) && !cast<MDNode>(MD)->getNumOperands()),
@@ -4527,13 +4654,21 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgInfoIntrinsic &DII) {
&DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
Loc->getScope()->getSubprogram());
+ // This check is redundant with one in visitLocalVariable().
+ AssertDI(isType(Var->getRawType()), "invalid type ref", Var,
+ Var->getRawType());
+ if (auto *Type = dyn_cast_or_null<DIType>(Var->getRawType()))
+ if (Type->isBlockByrefStruct())
+ AssertDI(DII.getExpression() && DII.getExpression()->getNumElements(),
+ "BlockByRef variable without complex expression", Var, &DII);
+
verifyFnArgs(DII);
}
void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) {
- AssertDI(isa<DILabel>(DLI.getRawVariable()),
+ AssertDI(isa<DILabel>(DLI.getRawLabel()),
"invalid llvm.dbg." + Kind + " intrinsic variable", &DLI,
- DLI.getRawVariable());
+ DLI.getRawLabel());
// Ignore broken !dbg attachments; they're checked elsewhere.
if (MDNode *N = DLI.getDebugLoc().getAsMDNode())
@@ -4560,10 +4695,7 @@ void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) {
Loc->getScope()->getSubprogram());
}
-void Verifier::verifyFragmentExpression(const DbgInfoIntrinsic &I) {
- if (dyn_cast<DbgLabelInst>(&I))
- return;
-
+void Verifier::verifyFragmentExpression(const DbgVariableIntrinsic &I) {
DILocalVariable *V = dyn_cast_or_null<DILocalVariable>(I.getRawVariable());
DIExpression *E = dyn_cast_or_null<DIExpression>(I.getRawExpression());
@@ -4605,7 +4737,7 @@ void Verifier::verifyFragmentExpression(const DIVariable &V,
AssertDI(FragSize != *VarSize, "fragment covers entire variable", Desc, &V);
}
-void Verifier::verifyFnArgs(const DbgInfoIntrinsic &I) {
+void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) {
// This function does not take the scope of noninlined function arguments into
// account. Don't run it if current function is nodebug, because it may
// contain inlined debug intrinsics.
@@ -4662,6 +4794,14 @@ void Verifier::verifyDeoptimizeCallingConvs() {
}
}
+void Verifier::verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F) {
+ bool HasSource = F.getSource().hasValue();
+ if (!HasSourceDebugInfo.count(&U))
+ HasSourceDebugInfo[&U] = HasSource;
+ AssertDI(HasSource == HasSourceDebugInfo[&U],
+ "inconsistent use of embedded source");
+}
+
//===----------------------------------------------------------------------===//
// Implement the public interfaces to this file...
//===----------------------------------------------------------------------===//
@@ -4718,9 +4858,10 @@ struct VerifierLegacyPass : public FunctionPass {
}
bool runOnFunction(Function &F) override {
- if (!V->verify(F) && FatalErrors)
+ if (!V->verify(F) && FatalErrors) {
+ errs() << "in function " << F.getName() << '\n';
report_fatal_error("Broken function found, compilation aborted!");
-
+ }
return false;
}
diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp
index 68d210cb7d73..3a955060deaa 100644
--- a/contrib/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm/lib/LTO/LTO.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/LTO/LTOBackend.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/Error.h"
@@ -42,6 +43,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
#include <set>
@@ -56,22 +58,20 @@ static cl::opt<bool>
DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
-// The values are (type identifier, summary) pairs.
-typedef DenseMap<
- GlobalValue::GUID,
- TinyPtrVector<const std::pair<const std::string, TypeIdSummary> *>>
- TypeIdSummariesByGuidTy;
+/// Enable global value internalization in LTO.
+cl::opt<bool> EnableLTOInternalization(
+ "enable-lto-internalization", cl::init(true), cl::Hidden,
+ cl::desc("Enable global value internalization in LTO"));
-// Returns a unique hash for the Module considering the current list of
+// Computes a unique hash for the Module considering the current list of
// export/import and other global analysis results.
// The hash is produced in \p Key.
-static void computeCacheKey(
+void llvm::computeLTOCacheKey(
SmallString<40> &Key, const Config &Conf, const ModuleSummaryIndex &Index,
StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
- const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid,
const std::set<GlobalValue::GUID> &CfiFunctionDefs,
const std::set<GlobalValue::GUID> &CfiFunctionDecls) {
// Compute the unique hash for this entry.
@@ -134,6 +134,7 @@ static void computeCacheKey(
AddUnsigned(Conf.CGFileType);
AddUnsigned(Conf.OptLevel);
AddUnsigned(Conf.UseNewPM);
+ AddUnsigned(Conf.Freestanding);
AddString(Conf.OptPipeline);
AddString(Conf.AAPipeline);
AddString(Conf.OverrideTriple);
@@ -189,6 +190,8 @@ static void computeCacheKey(
AddUnsigned(VI.isDSOLocal());
AddUsedCfiGlobal(VI.getGUID());
}
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(GS))
+ AddUnsigned(GVS->isReadOnly());
if (auto *FS = dyn_cast<FunctionSummary>(GS)) {
for (auto &TT : FS->type_tests())
UsedTypeIds.insert(TT);
@@ -220,8 +223,14 @@ static void computeCacheKey(
// Imported functions may introduce new uses of type identifier resolutions,
// so we need to collect their used resolutions as well.
for (auto &ImpM : ImportList)
- for (auto &ImpF : ImpM.second)
- AddUsedThings(Index.findSummaryInModule(ImpF, ImpM.first()));
+ for (auto &ImpF : ImpM.second) {
+ GlobalValueSummary *S = Index.findSummaryInModule(ImpF, ImpM.first());
+ AddUsedThings(S);
+ // If this is an alias, we also care about any types/etc. that the aliasee
+ // may reference.
+ if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
+ AddUsedThings(AS->getBaseObject());
+ }
auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
AddString(TId);
@@ -255,10 +264,9 @@ static void computeCacheKey(
// Include the hash for all type identifiers used by this module.
for (GlobalValue::GUID TId : UsedTypeIds) {
- auto SummariesI = TypeIdSummariesByGuid.find(TId);
- if (SummariesI != TypeIdSummariesByGuid.end())
- for (auto *Summary : SummariesI->second)
- AddTypeIdSummary(Summary->first, Summary->second);
+ auto TidIter = Index.typeIds().equal_range(TId);
+ for (auto It = TidIter.first; It != TidIter.second; ++It)
+ AddTypeIdSummary(It->second.first, It->second.second);
}
AddUnsigned(UsedCfiDefs.size());
@@ -271,14 +279,21 @@ static void computeCacheKey(
if (!Conf.SampleProfile.empty()) {
auto FileOrErr = MemoryBuffer::getFile(Conf.SampleProfile);
- if (FileOrErr)
+ if (FileOrErr) {
Hasher.update(FileOrErr.get()->getBuffer());
+
+ if (!Conf.ProfileRemapping.empty()) {
+ FileOrErr = MemoryBuffer::getFile(Conf.ProfileRemapping);
+ if (FileOrErr)
+ Hasher.update(FileOrErr.get()->getBuffer());
+ }
+ }
}
Key = toHex(Hasher.result());
}
-static void thinLTOResolveWeakForLinkerGUID(
+static void thinLTOResolvePrevailingGUID(
GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID,
DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
@@ -287,7 +302,10 @@ static void thinLTOResolveWeakForLinkerGUID(
recordNewLinkage) {
for (auto &S : GVSummaryList) {
GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
- if (!GlobalValue::isWeakForLinker(OriginalLinkage))
+ // Ignore local and appending linkage values since the linker
+ // doesn't resolve them.
+ if (GlobalValue::isLocalLinkage(OriginalLinkage) ||
+ GlobalValue::isAppendingLinkage(S->linkage()))
continue;
// We need to emit only one of these. The prevailing module will keep it,
// but turned into a weak, while the others will drop it when possible.
@@ -311,13 +329,13 @@ static void thinLTOResolveWeakForLinkerGUID(
}
}
-// Resolve Weak and LinkOnce values in the \p Index.
+/// Resolve linkage for prevailing symbols in the \p Index.
//
// We'd like to drop these functions if they are no longer referenced in the
// current module. However there is a chance that another module is still
// referencing them because of the import. We make sure we always emit at least
// one copy.
-void llvm::thinLTOResolveWeakForLinkerInIndex(
+void llvm::thinLTOResolvePrevailingInIndex(
ModuleSummaryIndex &Index,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
@@ -333,9 +351,9 @@ void llvm::thinLTOResolveWeakForLinkerInIndex(
GlobalInvolvedWithAlias.insert(&AS->getAliasee());
for (auto &I : Index)
- thinLTOResolveWeakForLinkerGUID(I.second.SummaryList, I.first,
- GlobalInvolvedWithAlias, isPrevailing,
- recordNewLinkage);
+ thinLTOResolvePrevailingGUID(I.second.SummaryList, I.first,
+ GlobalInvolvedWithAlias, isPrevailing,
+ recordNewLinkage);
}
static void thinLTOInternalizeAndPromoteGUID(
@@ -345,7 +363,14 @@ static void thinLTOInternalizeAndPromoteGUID(
if (isExported(S->modulePath(), GUID)) {
if (GlobalValue::isLocalLinkage(S->linkage()))
S->setLinkage(GlobalValue::ExternalLinkage);
- } else if (!GlobalValue::isLocalLinkage(S->linkage()))
+ } else if (EnableLTOInternalization &&
+ // Ignore local and appending linkage values since the linker
+ // doesn't resolve them.
+ !GlobalValue::isLocalLinkage(S->linkage()) &&
+ S->linkage() != GlobalValue::AppendingLinkage &&
+ // We can't internalize available_externally globals because this
+ // can break function pointer equality.
+ S->linkage() != GlobalValue::AvailableExternallyLinkage)
S->setLinkage(GlobalValue::InternalLinkage);
}
}
@@ -521,6 +546,15 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
if (!LTOInfo)
return LTOInfo.takeError();
+ if (EnableSplitLTOUnit.hasValue()) {
+ // If only some modules were split, flag this in the index so that
+ // we can skip or error on optimizations that need consistently split
+ // modules (whole program devirt and lower type tests).
+ if (EnableSplitLTOUnit.getValue() != LTOInfo->EnableSplitLTOUnit)
+ ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
+ } else
+ EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
+
BitcodeModule BM = Input.Mods[ModI];
auto ModSyms = Input.module_symbols(ModI);
addModuleToGlobalRes(ModSyms, {ResI, ResE},
@@ -668,8 +702,12 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
}
// Set the 'local' flag based on the linker resolution for this symbol.
- if (Res.FinalDefinitionInLinkageUnit)
+ if (Res.FinalDefinitionInLinkageUnit) {
GV->setDSOLocal(true);
+ if (GV->hasDLLImportStorageClass())
+ GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
+ DefaultStorageClass);
+ }
}
// Common resolution: collect the maximum size/alignment over all commons.
// We also record if we see an instance of a common as prevailing, so that
@@ -798,7 +836,8 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
return PrevailingType::Unknown;
return It->second;
};
- computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols, isPrevailing);
+ computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols,
+ isPrevailing, Conf.OptLevel > 0);
// Setup output file to emit statistics.
std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
@@ -877,7 +916,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
continue;
GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
: GlobalValue::UnnamedAddr::None);
- if (R.second.Partition == 0)
+ if (EnableLTOInternalization && R.second.Partition == 0)
GV->setLinkage(GlobalValue::InternalLinkage);
}
@@ -917,7 +956,6 @@ class InProcessThinBackend : public ThinBackendProc {
ThreadPool BackendThreadPool;
AddStreamFn AddStream;
NativeObjectCache Cache;
- TypeIdSummariesByGuidTy TypeIdSummariesByGuid;
std::set<GlobalValue::GUID> CfiFunctionDefs;
std::set<GlobalValue::GUID> CfiFunctionDecls;
@@ -933,12 +971,6 @@ public:
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
BackendThreadPool(ThinLTOParallelismLevel),
AddStream(std::move(AddStream)), Cache(std::move(Cache)) {
- // Create a mapping from type identifier GUIDs to type identifier summaries.
- // This allows backends to use the type identifier GUIDs stored in the
- // function summaries to determine which type identifier summaries affect
- // each function without needing to compute GUIDs in each backend.
- for (auto &TId : CombinedIndex.typeIds())
- TypeIdSummariesByGuid[GlobalValue::getGUID(TId.first)].push_back(&TId);
for (auto &Name : CombinedIndex.cfiFunctionDefs())
CfiFunctionDefs.insert(
GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
@@ -954,8 +986,7 @@ public:
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
- MapVector<StringRef, BitcodeModule> &ModuleMap,
- const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid) {
+ MapVector<StringRef, BitcodeModule> &ModuleMap) {
auto RunThinBackend = [&](AddStreamFn AddStream) {
LTOLLVMContext BackendContext(Conf);
Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext);
@@ -977,9 +1008,9 @@ public:
SmallString<40> Key;
// The module may be cached, this helps handling it.
- computeCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList, ExportList,
- ResolvedODR, DefinedGlobals, TypeIdSummariesByGuid,
- CfiFunctionDefs, CfiFunctionDecls);
+ computeLTOCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList,
+ ExportList, ResolvedODR, DefinedGlobals, CfiFunctionDefs,
+ CfiFunctionDecls);
if (AddStreamFn CacheAddStream = Cache(Task, Key))
return RunThinBackend(CacheAddStream);
@@ -1003,11 +1034,10 @@ public:
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
&ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
- MapVector<StringRef, BitcodeModule> &ModuleMap,
- const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid) {
+ MapVector<StringRef, BitcodeModule> &ModuleMap) {
Error E = runThinLTOBackendThread(
AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
- ResolvedODR, DefinedGlobals, ModuleMap, TypeIdSummariesByGuid);
+ ResolvedODR, DefinedGlobals, ModuleMap);
if (E) {
std::unique_lock<std::mutex> L(ErrMu);
if (Err)
@@ -1017,8 +1047,7 @@ public:
}
},
BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList),
- std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap),
- std::ref(TypeIdSummariesByGuid));
+ std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap));
return Error::success();
}
@@ -1156,6 +1185,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
if (!ModuleToDefinedGVSummaries.count(Mod.first))
ModuleToDefinedGVSummaries.try_emplace(Mod.first);
+ // Synthesize entry counts for functions in the CombinedIndex.
+ computeSyntheticCounts(ThinLTO.CombinedIndex);
+
StringMap<FunctionImporter::ImportMapTy> ImportLists(
ThinLTO.ModuleMap.size());
StringMap<FunctionImporter::ExportSetTy> ExportLists(
@@ -1210,8 +1242,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
GlobalValue::LinkageTypes NewLinkage) {
ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
};
- thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing,
- recordNewLinkage);
+ thinLTOResolvePrevailingInIndex(ThinLTO.CombinedIndex, isPrevailing,
+ recordNewLinkage);
std::unique_ptr<ThinBackendProc> BackendProc =
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
diff --git a/contrib/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm/lib/LTO/LTOBackend.cpp
index eadbb410bd5a..926c419e34a8 100644
--- a/contrib/llvm/lib/LTO/LTOBackend.cpp
+++ b/contrib/llvm/lib/LTO/LTOBackend.cpp
@@ -138,9 +138,15 @@ createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
RelocModel =
M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
+ Optional<CodeModel::Model> CodeModel;
+ if (Conf.CodeModel)
+ CodeModel = *Conf.CodeModel;
+ else
+ CodeModel = M.getCodeModel();
+
return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
- Conf.CodeModel, Conf.CGOptLevel));
+ CodeModel, Conf.CGOptLevel));
}
static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
@@ -149,13 +155,14 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
const ModuleSummaryIndex *ImportSummary) {
Optional<PGOOptions> PGOOpt;
if (!Conf.SampleProfile.empty())
- PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true);
+ PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping,
+ false, true);
PassBuilder PB(TM, PGOOpt);
AAManager AA;
// Parse a custom AA pipeline if asked to.
- if (!PB.parseAAPipeline(AA, "default"))
+ if (auto Err = PB.parseAAPipeline(AA, "default"))
report_fatal_error("Error parsing default AA pipeline");
LoopAnalysisManager LAM(Conf.DebugPassManager);
@@ -214,9 +221,9 @@ static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
// Parse a custom AA pipeline if asked to.
if (!AAPipelineDesc.empty())
- if (!PB.parseAAPipeline(AA, AAPipelineDesc))
- report_fatal_error("unable to parse AA pipeline description: " +
- AAPipelineDesc);
+ if (auto Err = PB.parseAAPipeline(AA, AAPipelineDesc))
+ report_fatal_error("unable to parse AA pipeline description '" +
+ AAPipelineDesc + "': " + toString(std::move(Err)));
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
@@ -239,9 +246,9 @@ static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
MPM.addPass(VerifierPass());
// Now, add all the passes we've been requested to.
- if (!PB.parsePassPipeline(MPM, PipelineDesc))
- report_fatal_error("unable to parse pass pipeline description: " +
- PipelineDesc);
+ if (auto Err = PB.parsePassPipeline(MPM, PipelineDesc))
+ report_fatal_error("unable to parse pass pipeline description '" +
+ PipelineDesc + "': " + toString(std::move(Err)));
if (!DisableVerify)
MPM.addPass(VerifierPass());
@@ -483,7 +490,7 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex);
- thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
+ thinLTOResolvePrevailingInModule(Mod, DefinedGlobals);
if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
index ffe9af74cdca..3b63bbc7e256 100644
--- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/LTO/LTO.h"
diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp
index 20fc0943539f..0d40d49dbe39 100644
--- a/contrib/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm/lib/LTO/LTOModule.cpp
@@ -73,7 +73,7 @@ bool LTOModule::isBitcodeFile(StringRef Path) {
bool LTOModule::isThinLTO() {
Expected<BitcodeLTOInfo> Result = getBitcodeLTOInfo(MBRef);
if (!Result) {
- logAllUnhandledErrors(Result.takeError(), errs(), "");
+ logAllUnhandledErrors(Result.takeError(), errs());
return false;
}
return Result->IsThinLTO;
diff --git a/contrib/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/contrib/llvm/lib/LTO/SummaryBasedOptimizations.cpp
new file mode 100644
index 000000000000..bcdd984daa58
--- /dev/null
+++ b/contrib/llvm/lib/LTO/SummaryBasedOptimizations.cpp
@@ -0,0 +1,86 @@
+//==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements optimizations that are based on the module summaries.
+// These optimizations are performed during the thinlink phase of the
+// compilation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/SummaryBasedOptimizations.h"
+#include "llvm/Analysis/SyntheticCountsUtils.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+
+using namespace llvm;
+
+cl::opt<bool> ThinLTOSynthesizeEntryCounts(
+ "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden,
+ cl::desc("Synthesize entry counts based on the summary"));
+
+extern cl::opt<int> InitialSyntheticCount;
+
+static void initializeCounts(ModuleSummaryIndex &Index) {
+ auto Root = Index.calculateCallGraphRoot();
+ // Root is a fake node. All its successors are the actual roots of the
+ // callgraph.
+ // FIXME: This initializes the entry counts of only the root nodes. This makes
+ // sense when compiling a binary with ThinLTO, but for libraries any of the
+ // non-root nodes could be called from outside.
+ for (auto &C : Root.calls()) {
+ auto &V = C.first;
+ for (auto &GVS : V.getSummaryList()) {
+ auto S = GVS.get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ F->setEntryCount(InitialSyntheticCount);
+ }
+ }
+}
+
+void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) {
+ if (!ThinLTOSynthesizeEntryCounts)
+ return;
+
+ using Scaled64 = ScaledNumber<uint64_t>;
+ initializeCounts(Index);
+ auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) {
+ return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift);
+ };
+ auto GetEntryCount = [](ValueInfo V) {
+ if (V.getSummaryList().size()) {
+ auto S = V.getSummaryList().front().get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ return F->entryCount();
+ } else {
+ return UINT64_C(0);
+ }
+ };
+ auto AddToEntryCount = [](ValueInfo V, Scaled64 New) {
+ if (!V.getSummaryList().size())
+ return;
+ for (auto &GVS : V.getSummaryList()) {
+ auto S = GVS.get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ F->setEntryCount(
+ SaturatingAdd(F->entryCount(), New.template toInt<uint64_t>()));
+ }
+ };
+
+ auto GetProfileCount = [&](ValueInfo V, FunctionSummary::EdgeTy &Edge) {
+ auto RelFreq = GetCallSiteRelFreq(Edge);
+ Scaled64 EC(GetEntryCount(V), 0);
+ return RelFreq * EC;
+ };
+ // After initializing the counts in initializeCounts above, the counts have to
+ // be propagated across the combined callgraph.
+ // SyntheticCountsUtils::propagate takes care of this propagation on any
+ // callgraph that specialized GraphTraits.
+ SyntheticCountsUtils<ModuleSummaryIndex *>::propagate(&Index, GetProfileCount,
+ AddToEntryCount);
+ Index.setHasSyntheticEntryCounts();
+}
diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 642e538ecf92..d9ec68fe3eb5 100644
--- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -29,9 +29,11 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/LTO/LTO.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/CachePruning.h"
@@ -297,8 +299,7 @@ public:
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
- const GVSummaryMapTy &DefinedFunctions,
- const DenseSet<GlobalValue::GUID> &PreservedSymbols, unsigned OptLevel,
+ const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel,
bool Freestanding, const TargetMachineBuilder &TMBuilder) {
if (CachePath.empty())
return;
@@ -307,87 +308,26 @@ public:
// The module does not have an entry, it can't have a hash at all
return;
- // Compute the unique hash for this entry
- // This is based on the current compiler version, the module itself, the
- // export list, the hash for every single module in the import list, the
- // list of ResolvedODR for the module, and the list of preserved symbols.
-
- // Include the hash for the current module
- auto ModHash = Index.getModuleHash(ModuleID);
-
- if (all_of(ModHash, [](uint32_t V) { return V == 0; }))
+ if (all_of(Index.getModuleHash(ModuleID),
+ [](uint32_t V) { return V == 0; }))
// No hash entry, no caching!
return;
- SHA1 Hasher;
-
- // Include the parts of the LTO configuration that affect code generation.
- auto AddString = [&](StringRef Str) {
- Hasher.update(Str);
- Hasher.update(ArrayRef<uint8_t>{0});
- };
- auto AddUnsigned = [&](unsigned I) {
- uint8_t Data[4];
- Data[0] = I;
- Data[1] = I >> 8;
- Data[2] = I >> 16;
- Data[3] = I >> 24;
- Hasher.update(ArrayRef<uint8_t>{Data, 4});
- };
-
- // Start with the compiler revision
- Hasher.update(LLVM_VERSION_STRING);
-#ifdef LLVM_REVISION
- Hasher.update(LLVM_REVISION);
-#endif
-
- // Hash the optimization level and the target machine settings.
- AddString(TMBuilder.MCpu);
- // FIXME: Hash more of Options. For now all clients initialize Options from
- // command-line flags (which is unsupported in production), but may set
- // RelaxELFRelocations. The clang driver can also pass FunctionSections,
- // DataSections and DebuggerTuning via command line flags.
- AddUnsigned(TMBuilder.Options.RelaxELFRelocations);
- AddUnsigned(TMBuilder.Options.FunctionSections);
- AddUnsigned(TMBuilder.Options.DataSections);
- AddUnsigned((unsigned)TMBuilder.Options.DebuggerTuning);
- AddString(TMBuilder.MAttr);
- if (TMBuilder.RelocModel)
- AddUnsigned(*TMBuilder.RelocModel);
- AddUnsigned(TMBuilder.CGOptLevel);
- AddUnsigned(OptLevel);
- AddUnsigned(Freestanding);
-
- Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
- for (auto F : ExportList)
- // The export list can impact the internalization, be conservative here
- Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
-
- // Include the hash for every module we import functions from
- for (auto &Entry : ImportList) {
- auto ModHash = Index.getModuleHash(Entry.first());
- Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
- }
-
- // Include the hash for the resolved ODR.
- for (auto &Entry : ResolvedODR) {
- Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
- sizeof(GlobalValue::GUID)));
- Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
- sizeof(GlobalValue::LinkageTypes)));
- }
-
- // Include the hash for the preserved symbols.
- for (auto &Entry : PreservedSymbols) {
- if (DefinedFunctions.count(Entry))
- Hasher.update(
- ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
- }
+ llvm::lto::Config Conf;
+ Conf.OptLevel = OptLevel;
+ Conf.Options = TMBuilder.Options;
+ Conf.CPU = TMBuilder.MCpu;
+ Conf.MAttrs.push_back(TMBuilder.MAttr);
+ Conf.RelocModel = TMBuilder.RelocModel;
+ Conf.CGOptLevel = TMBuilder.CGOptLevel;
+ Conf.Freestanding = Freestanding;
+ SmallString<40> Key;
+ computeLTOCacheKey(Key, Conf, Index, ModuleID, ImportList, ExportList,
+ ResolvedODR, DefinedGVSummaries);
// This choice of file name allows the cache to be pruned (see pruneCache()
// in include/llvm/Support/CachePruning.h).
- sys::path::append(EntryPath, CachePath,
- "llvmcache-" + toHex(Hasher.result()));
+ sys::path::append(EntryPath, CachePath, "llvmcache-" + Key);
}
// Access the path to this entry in the cache.
@@ -456,8 +396,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
if (!SingleModule) {
promoteModule(TheModule, Index);
- // Apply summary-based LinkOnce/Weak resolution decisions.
- thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals);
+ // Apply summary-based prevailing-symbol resolution decisions.
+ thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals);
// Save temps: after promotion.
saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
@@ -499,12 +439,12 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
return codegenModule(TheModule, TM);
}
-/// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map
+/// Resolve prevailing symbols. Record resolutions in the \p ResolvedODR map
/// for caching, and in the \p Index for application during the ThinLTO
/// backends. This is needed for correctness for exported symbols (ensure
/// at least one copy kept) and a compile-time optimization (to drop duplicate
/// copies when possible).
-static void resolveWeakForLinkerInIndex(
+static void resolvePrevailingInIndex(
ModuleSummaryIndex &Index,
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
&ResolvedODR) {
@@ -526,7 +466,7 @@ static void resolveWeakForLinkerInIndex(
ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
};
- thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage);
+ thinLTOResolvePrevailingInIndex(Index, isPrevailing, recordNewLinkage);
}
// Initialize the TargetMachine builder for a given Triple
@@ -645,7 +585,8 @@ static void computeDeadSymbolsInIndex(
auto isPrevailing = [&](GlobalValue::GUID G) {
return PrevailingType::Unknown;
};
- computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
+ computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing,
+ /* ImportEnabled = */ true);
}
/**
@@ -674,11 +615,11 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
ExportLists);
- // Resolve LinkOnce/Weak symbols.
+ // Resolve prevailing symbols
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
- resolveWeakForLinkerInIndex(Index, ResolvedODR);
+ resolvePrevailingInIndex(Index, ResolvedODR);
- thinLTOResolveWeakForLinkerModule(
+ thinLTOResolvePrevailingInModule(
TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
// Promote the exported values in the index, so that they are promoted
@@ -721,37 +662,52 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
* Compute the list of summaries needed for importing into module.
*/
void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
- StringRef ModulePath, ModuleSummaryIndex &Index,
+ Module &TheModule, ModuleSummaryIndex &Index,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
auto ModuleCount = Index.modulePaths().size();
+ auto ModuleIdentifier = TheModule.getModuleIdentifier();
// Collect for each module the list of function it defines (GUID -> Summary).
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Convert the preserved symbols set from string to GUID
+ auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+ PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+ // Compute "dead" symbols, we don't want to import/export these!
+ computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
ExportLists);
- llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
- ImportLists[ModulePath],
- ModuleToSummariesForIndex);
+ llvm::gatherImportedSummariesForModule(
+ ModuleIdentifier, ModuleToDefinedGVSummaries,
+ ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
}
/**
* Emit the list of files needed for importing into module.
*/
-void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
- StringRef OutputName,
+void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
ModuleSummaryIndex &Index) {
auto ModuleCount = Index.modulePaths().size();
+ auto ModuleIdentifier = TheModule.getModuleIdentifier();
// Collect for each module the list of function it defines (GUID -> Summary).
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Convert the preserved symbols set from string to GUID
+ auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+ PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+ // Compute "dead" symbols, we don't want to import/export these!
+ computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
@@ -759,13 +715,13 @@ void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
ExportLists);
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
- llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
- ImportLists[ModulePath],
- ModuleToSummariesForIndex);
+ llvm::gatherImportedSummariesForModule(
+ ModuleIdentifier, ModuleToDefinedGVSummaries,
+ ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
std::error_code EC;
- if ((EC =
- EmitImportsFiles(ModulePath, OutputName, ModuleToSummariesForIndex)))
+ if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName,
+ ModuleToSummariesForIndex)))
report_fatal_error(Twine("Failed to open ") + OutputName +
" to save imports lists\n");
}
@@ -818,14 +774,6 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
}
-/**
- * Perform ThinLTO CodeGen.
- */
-std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
- initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
- return codegenModule(TheModule, *TMBuilder.create());
-}
-
/// Write out the generated object file, either from CacheEntryPath or from
/// OutputBuffer, preferring hard-link when possible.
/// Returns the path to the generated file in SavedObjectsDirectoryPath.
@@ -893,7 +841,7 @@ void ThinLTOCodeGenerator::run() {
/*IsImporting*/ false);
// CodeGen
- auto OutputBuffer = codegen(*TheModule);
+ auto OutputBuffer = codegenModule(*TheModule, *TMBuilder.create());
if (SavedObjectsDirectoryPath.empty())
ProducedBinaries[count] = std::move(OutputBuffer);
else
@@ -936,6 +884,9 @@ void ThinLTOCodeGenerator::run() {
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
+ // Synthesize entry counts for functions in the combined index.
+ computeSyntheticCounts(*Index);
+
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
@@ -949,20 +900,24 @@ void ThinLTOCodeGenerator::run() {
// on the index, and nuke this map.
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
- // Resolve LinkOnce/Weak symbols, this has to be computed early because it
+ // Resolve prevailing symbols, this has to be computed early because it
// impacts the caching.
- resolveWeakForLinkerInIndex(*Index, ResolvedODR);
+ resolvePrevailingInIndex(*Index, ResolvedODR);
// Use global summary-based analysis to identify symbols that can be
// internalized (because they aren't exported or preserved as per callback).
// Changes are made in the index, consumed in the ThinLTO backends.
internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, *Index);
- // Make sure that every module has an entry in the ExportLists and
- // ResolvedODR maps to enable threaded access to these maps below.
- for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
- ExportLists[DefinedGVSummaries.first()];
- ResolvedODR[DefinedGVSummaries.first()];
+ // Make sure that every module has an entry in the ExportLists, ImportList,
+ // GVSummary and ResolvedODR maps to enable threaded access to these maps
+ // below.
+ for (auto &Module : Modules) {
+ auto ModuleIdentifier = Module.getBufferIdentifier();
+ ExportLists[ModuleIdentifier];
+ ImportLists[ModuleIdentifier];
+ ResolvedODR[ModuleIdentifier];
+ ModuleToDefinedGVSummaries[ModuleIdentifier];
}
// Compute the ordering we will process the inputs: the rough heuristic here
@@ -971,12 +926,11 @@ void ThinLTOCodeGenerator::run() {
std::vector<int> ModulesOrdering;
ModulesOrdering.resize(Modules.size());
std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
- llvm::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
- [&](int LeftIndex, int RightIndex) {
- auto LSize = Modules[LeftIndex].getBuffer().size();
- auto RSize = Modules[RightIndex].getBuffer().size();
- return LSize > RSize;
- });
+ llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+ auto LSize = Modules[LeftIndex].getBuffer().size();
+ auto RSize = Modules[RightIndex].getBuffer().size();
+ return LSize > RSize;
+ });
// Parallel optimizer + codegen
{
@@ -987,14 +941,14 @@ void ThinLTOCodeGenerator::run() {
auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
auto &ExportList = ExportLists[ModuleIdentifier];
- auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
+ auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier];
// The module may be cached, this helps handling it.
ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
ImportLists[ModuleIdentifier], ExportList,
ResolvedODR[ModuleIdentifier],
- DefinedFunctions, GUIDPreservedSymbols,
- OptLevel, Freestanding, TMBuilder);
+ DefinedGVSummaries, OptLevel, Freestanding,
+ TMBuilder);
auto CacheEntryPath = CacheEntry.getEntryPath();
{
diff --git a/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp b/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
index c982a5b0e5aa..00482dee6e10 100644
--- a/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
+++ b/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
@@ -95,12 +95,18 @@ private:
if (GV.hasPrivateLinkage())
return;
- // Conservatively append user-supplied runtime library functions to
- // llvm.compiler.used. These could be internalized and deleted by
- // optimizations like -globalopt, causing problems when later optimizations
- // add new library calls (e.g., llvm.memset => memset and printf => puts).
+ // Conservatively append user-supplied runtime library functions (supplied
+ // either directly, or via a function alias) to llvm.compiler.used. These
+ // could be internalized and deleted by optimizations like -globalopt,
+ // causing problems when later optimizations add new library calls (e.g.,
+ // llvm.memset => memset and printf => puts).
// Leave it to the linker to remove any dead code (e.g. with -dead_strip).
- if (isa<Function>(GV) && Libcalls.count(GV.getName())) {
+ GlobalValue *FuncAliasee = nullptr;
+ if (isa<GlobalAlias>(GV)) {
+ auto *A = cast<GlobalAlias>(&GV);
+ FuncAliasee = dyn_cast<Function>(A->getAliasee());
+ }
+ if ((isa<Function>(GV) || FuncAliasee) && Libcalls.count(GV.getName())) {
LLVMUsed.push_back(&GV);
return;
}
diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp
index 738dec8e1f29..afbc57abfcc0 100644
--- a/contrib/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm/lib/Linker/IRMover.cpp
@@ -978,11 +978,14 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
// containing a GV from the source module, in which case SGV will be
// the same as DGV and NewGV, and TypeMap.get() will assert since it
// assumes it is being invoked on a type in the source module.
- if (DGV && NewGV != SGV)
- C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));
+ if (DGV && NewGV != SGV) {
+ C = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ NewGV, TypeMap.get(SGV->getType()));
+ }
if (DGV && NewGV != DGV) {
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType()));
+ DGV->replaceAllUsesWith(
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewGV, DGV->getType()));
DGV->eraseFromParent();
}
@@ -1059,10 +1062,15 @@ void IRLinker::prepareCompileUnitsForImport() {
ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr);
ValueMap.MD()[CU->getRawMacros()].reset(nullptr);
ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr);
- // We import global variables only temporarily in order for instcombine
- // and globalopt to perform constant folding and static constructor
- // evaluation. After that elim-avail-extern will covert imported globals
- // back to declarations, so we don't need debug info for them.
+ // The original definition (or at least its debug info - if the variable is
+ // internalized an optimized away) will remain in the source module, so
+ // there's no need to import them.
+ // If LLVM ever does more advanced optimizations on global variables
+ // (removing/localizing write operations, for instance) that can track
+ // through debug info, this decision may need to be revisited - but do so
+ // with care when it comes to debug info size. Emitting small CUs containing
+ // only a few imported entities into every destination module may be very
+ // size inefficient.
ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr);
// Imported entities only need to be mapped in if they have local
@@ -1227,8 +1235,14 @@ Error IRLinker::linkModuleFlagsMetadata() {
case Module::Warning: {
// Emit a warning if the values differ.
if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
- emitWarning("linking module flags '" + ID->getString() +
- "': IDs have conflicting values");
+ std::string str;
+ raw_string_ostream(str)
+ << "linking module flags '" << ID->getString()
+ << "': IDs have conflicting values ('" << *SrcOp->getOperand(2)
+ << "' from " << SrcM->getModuleIdentifier() << " with '"
+ << *DstOp->getOperand(2) << "' from " << DstM.getModuleIdentifier()
+ << ')';
+ emitWarning(str);
}
continue;
}
diff --git a/contrib/llvm/lib/MC/ConstantPools.cpp b/contrib/llvm/lib/MC/ConstantPools.cpp
index ca5440237e49..18277a225640 100644
--- a/contrib/llvm/lib/MC/ConstantPools.cpp
+++ b/contrib/llvm/lib/MC/ConstantPools.cpp
@@ -97,16 +97,14 @@ void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
MCSection *Section = Streamer.getCurrentSectionOnly();
- if (ConstantPool *CP = getConstantPool(Section)) {
+ if (ConstantPool *CP = getConstantPool(Section))
emitConstantPool(Streamer, Section, *CP);
- }
}
void AssemblerConstantPools::clearCacheForCurrentSection(MCStreamer &Streamer) {
MCSection *Section = Streamer.getCurrentSectionOnly();
- if (ConstantPool *CP = getConstantPool(Section)) {
+ if (ConstantPool *CP = getConstantPool(Section))
CP->clearCache();
- }
}
const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
index db531f75c87c..89f3b30cddd6 100644
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -29,6 +29,7 @@
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
@@ -668,6 +669,20 @@ void ELFWriter::computeSymbolTable(
} else {
const MCSectionELF &Section =
static_cast<const MCSectionELF &>(Symbol.getSection());
+
+ // We may end up with a situation when section symbol is technically
+ // defined, but should not be. That happens because we explicitly
+ // pre-create few .debug_* sections to have accessors.
+ // And if these sections were not really defined in the code, but were
+ // referenced, we simply error out.
+ if (!Section.isRegistered()) {
+ assert(static_cast<const MCSymbolELF &>(Symbol).getType() ==
+ ELF::STT_SECTION);
+ Ctx.reportError(SMLoc(),
+ "Undefined section reference: " + Symbol.getName());
+ continue;
+ }
+
if (Mode == NonDwoOnly && isDwoSection(Section))
continue;
MSD.SectionIndex = SectionIndexMap.lookup(&Section);
@@ -1107,6 +1122,8 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
SectionIndexMap[RelSection] = addToSectionTable(RelSection);
Relocations.push_back(RelSection);
}
+
+ OWriter.TargetObjectWriter->addTargetSectionFlags(Ctx, Section);
}
MCSectionELF *CGProfileSection = nullptr;
@@ -1273,6 +1290,8 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
for (const MCSymbol *&Sym : AddrsigSyms) {
if (const MCSymbol *R = Renames.lookup(cast<MCSymbolELF>(Sym)))
Sym = R;
+ if (Sym->isInSection() && Sym->getName().startswith(".L"))
+ Sym = Sym->getSection().getBeginSymbol();
Sym->setUsedInReloc();
}
}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
index d8fb875b67c6..15886eb619b9 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -25,7 +25,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
COMMDirectiveAlignmentIsInBytes = false;
LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
HasDotTypeDotSizeDirective = false;
- HasSingleParameterDotFile = false;
+ HasSingleParameterDotFile = true;
WeakRefDirective = "\t.weak\t";
HasLinkOnceDirective = true;
diff --git a/contrib/llvm/lib/MC/MCAsmInfoWasm.cpp b/contrib/llvm/lib/MC/MCAsmInfoWasm.cpp
index fc55059ff75d..d448664baa14 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoWasm.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoWasm.cpp
@@ -15,7 +15,7 @@
#include "llvm/MC/MCAsmInfoWasm.h"
using namespace llvm;
-void MCAsmInfoWasm::anchor() { }
+void MCAsmInfoWasm::anchor() {}
MCAsmInfoWasm::MCAsmInfoWasm() {
HasIdentDirective = true;
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index ae02f50bf8bd..e017103070bf 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -147,9 +147,9 @@ public:
void EmitLinkerOptions(ArrayRef<std::string> Options) override;
void EmitDataRegion(MCDataRegionType Kind) override;
void EmitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor,
- unsigned Update) override;
+ unsigned Update, VersionTuple SDKVersion) override;
void EmitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor,
- unsigned Update) override;
+ unsigned Update, VersionTuple SDKVersion) override;
void EmitThumbFunc(MCSymbol *Func) override;
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
@@ -266,6 +266,7 @@ public:
void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override;
void EmitIdent(StringRef IdentString) override;
+ void EmitCFIBKeyFrame() override;
void EmitCFISections(bool EH, bool Debug) override;
void EmitCFIDefCfa(int64_t Register, int64_t Offset) override;
void EmitCFIDefCfaOffset(int64_t Offset) override;
@@ -285,10 +286,12 @@ public:
void EmitCFIUndefined(int64_t Register) override;
void EmitCFIRegister(int64_t Register1, int64_t Register2) override;
void EmitCFIWindowSave() override;
+ void EmitCFINegateRAState() override;
void EmitCFIReturnColumn(int64_t Register) override;
void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override;
void EmitWinCFIEndProc(SMLoc Loc) override;
+ void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) override;
void EmitWinCFIStartChained(SMLoc Loc) override;
void EmitWinCFIEndChained(SMLoc Loc) override;
void EmitWinCFIPushReg(unsigned Register, SMLoc Loc) override;
@@ -513,31 +516,51 @@ static const char *getVersionMinDirective(MCVersionMinType Type) {
llvm_unreachable("Invalid MC version min type");
}
+static void EmitSDKVersionSuffix(raw_ostream &OS,
+ const VersionTuple &SDKVersion) {
+ if (SDKVersion.empty())
+ return;
+ OS << '\t' << "sdk_version " << SDKVersion.getMajor();
+ if (auto Minor = SDKVersion.getMinor()) {
+ OS << ", " << *Minor;
+ if (auto Subminor = SDKVersion.getSubminor()) {
+ OS << ", " << *Subminor;
+ }
+ }
+}
+
void MCAsmStreamer::EmitVersionMin(MCVersionMinType Type, unsigned Major,
- unsigned Minor, unsigned Update) {
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) {
OS << '\t' << getVersionMinDirective(Type) << ' ' << Major << ", " << Minor;
if (Update)
OS << ", " << Update;
+ EmitSDKVersionSuffix(OS, SDKVersion);
EmitEOL();
}
static const char *getPlatformName(MachO::PlatformType Type) {
switch (Type) {
- case MachO::PLATFORM_MACOS: return "macos";
- case MachO::PLATFORM_IOS: return "ios";
- case MachO::PLATFORM_TVOS: return "tvos";
- case MachO::PLATFORM_WATCHOS: return "watchos";
- case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
+ case MachO::PLATFORM_MACOS: return "macos";
+ case MachO::PLATFORM_IOS: return "ios";
+ case MachO::PLATFORM_TVOS: return "tvos";
+ case MachO::PLATFORM_WATCHOS: return "watchos";
+ case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
+ case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
+ case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
+ case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
}
llvm_unreachable("Invalid Mach-O platform type");
}
void MCAsmStreamer::EmitBuildVersion(unsigned Platform, unsigned Major,
- unsigned Minor, unsigned Update) {
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) {
const char *PlatformName = getPlatformName((MachO::PlatformType)Platform);
OS << "\t.build_version " << PlatformName << ", " << Major << ", " << Minor;
if (Update)
OS << ", " << Update;
+ EmitSDKVersionSuffix(OS, SDKVersion);
EmitEOL();
}
@@ -858,10 +881,14 @@ void MCAsmStreamer::EmitBytes(StringRef Data) {
// supported, emit as vector of 8bits data.
if (Data.size() == 1 ||
!(MAI->getAscizDirective() || MAI->getAsciiDirective())) {
- const char *Directive = MAI->getData8bitsDirective();
- for (const unsigned char C : Data.bytes()) {
- OS << Directive << (unsigned)C;
- EmitEOL();
+ if (MCTargetStreamer *TS = getTargetStreamer()) {
+ TS->emitRawBytes(Data);
+ } else {
+ const char *Directive = MAI->getData8bitsDirective();
+ for (const unsigned char C : Data.bytes()) {
+ OS << Directive << (unsigned)C;
+ EmitEOL();
+ }
}
return;
}
@@ -1298,20 +1325,17 @@ void MCAsmStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
unsigned Line, unsigned Column,
bool PrologueEnd, bool IsStmt,
StringRef FileName, SMLoc Loc) {
+ // Validate the directive.
+ if (!checkCVLocSection(FunctionId, FileNo, Loc))
+ return;
+
OS << "\t.cv_loc\t" << FunctionId << " " << FileNo << " " << Line << " "
<< Column;
if (PrologueEnd)
OS << " prologue_end";
- unsigned OldIsStmt = getContext().getCVContext().getCurrentCVLoc().isStmt();
- if (IsStmt != OldIsStmt) {
- OS << " is_stmt ";
-
- if (IsStmt)
- OS << "1";
- else
- OS << "0";
- }
+ if (IsStmt)
+ OS << " is_stmt 1";
if (IsVerboseAsm) {
OS.PadToColumn(MAI->getCommentColumn());
@@ -1319,8 +1343,6 @@ void MCAsmStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
<< Column;
}
EmitEOL();
- this->MCStreamer::EmitCVLocDirective(FunctionId, FileNo, Line, Column,
- PrologueEnd, IsStmt, FileName, Loc);
}
void MCAsmStreamer::EmitCVLinetableDirective(unsigned FunctionId,
@@ -1569,12 +1591,24 @@ void MCAsmStreamer::EmitCFIWindowSave() {
EmitEOL();
}
+void MCAsmStreamer::EmitCFINegateRAState() {
+ MCStreamer::EmitCFINegateRAState();
+ OS << "\t.cfi_negate_ra_state";
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitCFIReturnColumn(int64_t Register) {
MCStreamer::EmitCFIReturnColumn(Register);
OS << "\t.cfi_return_column " << Register;
EmitEOL();
}
+void MCAsmStreamer::EmitCFIBKeyFrame() {
+ MCStreamer::EmitCFIBKeyFrame();
+ OS << "\t.cfi_b_key_frame";
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
MCStreamer::EmitWinCFIStartProc(Symbol, Loc);
@@ -1590,6 +1624,10 @@ void MCAsmStreamer::EmitWinCFIEndProc(SMLoc Loc) {
EmitEOL();
}
+// TODO: Implement
+void MCAsmStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
+}
+
void MCAsmStreamer::EmitWinCFIStartChained(SMLoc Loc) {
MCStreamer::EmitWinCFIStartChained(Loc);
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index 1e23b6d816e8..cde6a93a1647 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -111,6 +111,7 @@ void MCAssembler::reset() {
ELFHeaderEFlags = 0;
LOHContainer.reset();
VersionInfo.Major = 0;
+ VersionInfo.SDKVersion = VersionTuple();
// reset objects owned by us
if (getBackendPtr())
diff --git a/contrib/llvm/lib/MC/MCCodeView.cpp b/contrib/llvm/lib/MC/MCCodeView.cpp
index 155fd7eeb576..978ac789c31e 100644
--- a/contrib/llvm/lib/MC/MCCodeView.cpp
+++ b/contrib/llvm/lib/MC/MCCodeView.cpp
@@ -128,6 +128,14 @@ bool CodeViewContext::recordInlinedCallSiteId(unsigned FuncId, unsigned IAFunc,
return true;
}
+void CodeViewContext::recordCVLoc(MCContext &Ctx, const MCSymbol *Label,
+ unsigned FunctionId, unsigned FileNo,
+ unsigned Line, unsigned Column,
+ bool PrologueEnd, bool IsStmt) {
+ addLineEntry(MCCVLoc{
+ Label, FunctionId, FileNo, Line, Column, PrologueEnd, IsStmt});
+}
+
MCDataFragment *CodeViewContext::getStringTableFragment() {
if (!StrTabFragment) {
StrTabFragment = new MCDataFragment();
@@ -255,7 +263,7 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS,
OS.EmitValueImpl(SRE, 4);
}
-void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {
+void CodeViewContext::addLineEntry(const MCCVLoc &LineEntry) {
size_t Offset = MCCVLines.size();
auto I = MCCVLineStartStop.insert(
{LineEntry.getFunctionId(), {Offset, Offset + 1}});
@@ -264,9 +272,9 @@ void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {
MCCVLines.push_back(LineEntry);
}
-std::vector<MCCVLineEntry>
+std::vector<MCCVLoc>
CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
- std::vector<MCCVLineEntry> FilteredLines;
+ std::vector<MCCVLoc> FilteredLines;
auto I = MCCVLineStartStop.find(FuncId);
if (I != MCCVLineStartStop.end()) {
MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
@@ -289,9 +297,9 @@ CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
FilteredLines.back().getFileNum() != IA.File ||
FilteredLines.back().getLine() != IA.Line ||
FilteredLines.back().getColumn() != IA.Col) {
- FilteredLines.push_back(MCCVLineEntry(
+ FilteredLines.push_back(MCCVLoc(
MCCVLines[Idx].getLabel(),
- MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false)));
+ FuncId, IA.File, IA.Line, IA.Col, false, false));
}
}
}
@@ -308,7 +316,7 @@ std::pair<size_t, size_t> CodeViewContext::getLineExtent(unsigned FuncId) {
return I->second;
}
-ArrayRef<MCCVLineEntry> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
+ArrayRef<MCCVLoc> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
if (R <= L)
return None;
if (L >= MCCVLines.size())
@@ -331,8 +339,8 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
OS.EmitCOFFSectionIndex(FuncBegin);
// Actual line info.
- std::vector<MCCVLineEntry> Locs = getFunctionLineEntries(FuncId);
- bool HaveColumns = any_of(Locs, [](const MCCVLineEntry &LineEntry) {
+ std::vector<MCCVLoc> Locs = getFunctionLineEntries(FuncId);
+ bool HaveColumns = any_of(Locs, [](const MCCVLoc &LineEntry) {
return LineEntry.getColumn() != 0;
});
OS.EmitIntValue(HaveColumns ? int(LF_HaveColumns) : 0, 2);
@@ -342,7 +350,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
// Emit a file segment for the run of locations that share a file id.
unsigned CurFileNum = I->getFileNum();
auto FileSegEnd =
- std::find_if(I, E, [CurFileNum](const MCCVLineEntry &Loc) {
+ std::find_if(I, E, [CurFileNum](const MCCVLoc &Loc) {
return Loc.getFileNum() != CurFileNum;
});
unsigned EntryCount = FileSegEnd - I;
@@ -424,13 +432,13 @@ void CodeViewContext::emitInlineLineTableForFunction(MCObjectStreamer &OS,
OS.getCurrentSectionOnly());
}
-void CodeViewContext::emitDefRange(
+MCFragment *CodeViewContext::emitDefRange(
MCObjectStreamer &OS,
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion) {
// Create and insert a fragment into the current section that will be encoded
// later.
- new MCCVDefRangeFragment(Ranges, FixedSizePortion,
+ return new MCCVDefRangeFragment(Ranges, FixedSizePortion,
OS.getCurrentSectionOnly());
}
@@ -468,14 +476,14 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
if (LocBegin >= LocEnd)
return;
- ArrayRef<MCCVLineEntry> Locs = getLinesForExtent(LocBegin, LocEnd);
+ ArrayRef<MCCVLoc> Locs = getLinesForExtent(LocBegin, LocEnd);
if (Locs.empty())
return;
// Check that the locations are all in the same section.
#ifndef NDEBUG
const MCSection *FirstSec = &Locs.front().getLabel()->getSection();
- for (const MCCVLineEntry &Loc : Locs) {
+ for (const MCCVLoc &Loc : Locs) {
if (&Loc.getLabel()->getSection() != FirstSec) {
errs() << ".cv_loc " << Loc.getFunctionId() << ' ' << Loc.getFileNum()
<< ' ' << Loc.getLine() << ' ' << Loc.getColumn()
@@ -488,7 +496,8 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
// Make an artificial start location using the function start and the inlinee
// lines start location information. All deltas start relative to this
// location.
- MCCVLineEntry StartLoc(Frag.getFnStartSym(), MCCVLoc(Locs.front()));
+ MCCVLoc StartLoc = Locs.front();
+ StartLoc.setLabel(Frag.getFnStartSym());
StartLoc.setFileNum(Frag.StartFileId);
StartLoc.setLine(Frag.StartLineNum);
bool HaveOpenRange = false;
@@ -500,7 +509,7 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
SmallVectorImpl<char> &Buffer = Frag.getContents();
Buffer.clear(); // Clear old contents if we went through relaxation.
- for (const MCCVLineEntry &Loc : Locs) {
+ for (const MCCVLoc &Loc : Locs) {
// Exit early if our line table would produce an oversized InlineSiteSym
// record. Account for the ChangeCodeLength annotation emitted after the
// loop ends.
@@ -585,10 +594,10 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
unsigned EndSymLength =
computeLabelDiff(Layout, LastLabel, Frag.getFnEndSym());
unsigned LocAfterLength = ~0U;
- ArrayRef<MCCVLineEntry> LocAfter = getLinesForExtent(LocEnd, LocEnd + 1);
+ ArrayRef<MCCVLoc> LocAfter = getLinesForExtent(LocEnd, LocEnd + 1);
if (!LocAfter.empty()) {
// Only try to compute this difference if we're in the same section.
- const MCCVLineEntry &Loc = LocAfter[0];
+ const MCCVLoc &Loc = LocAfter[0];
if (&Loc.getLabel()->getSection() == &LastLabel->getSection())
LocAfterLength = computeLabelDiff(Layout, LastLabel, Loc.getLabel());
}
@@ -686,31 +695,3 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
}
}
}
-
-//
-// This is called when an instruction is assembled into the specified section
-// and if there is information from the last .cv_loc directive that has yet to have
-// a line entry made for it is made.
-//
-void MCCVLineEntry::Make(MCObjectStreamer *MCOS) {
- CodeViewContext &CVC = MCOS->getContext().getCVContext();
- if (!CVC.getCVLocSeen())
- return;
-
- // Create a symbol at in the current section for use in the line entry.
- MCSymbol *LineSym = MCOS->getContext().createTempSymbol();
- // Set the value of the symbol to use for the MCCVLineEntry.
- MCOS->EmitLabel(LineSym);
-
- // Get the current .loc info saved in the context.
- const MCCVLoc &CVLoc = CVC.getCurrentCVLoc();
-
- // Create a (local) line entry with the symbol and the current .loc info.
- MCCVLineEntry LineEntry(LineSym, CVLoc);
-
- // clear CVLocSeen saying the current .loc info is now used.
- CVC.clearCVLocSeen();
-
- // Add the line entry to this section's entries.
- CVC.addLineEntry(LineEntry);
-}
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
index 606da2526890..fab517075c5a 100644
--- a/contrib/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -592,7 +592,7 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
return !LineTable.getMCDwarfFiles()[FileNumber].Name.empty();
}
-/// Remove empty sections from SectionStartEndSyms, to avoid generating
+/// Remove empty sections from SectionsForRanges, to avoid generating
/// useless debug info for them.
void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
SectionsForRanges.remove_if(
@@ -605,11 +605,6 @@ CodeViewContext &MCContext::getCVContext() {
return *CVContext.get();
}
-void MCContext::clearCVLocSeen() {
- if (CVContext)
- CVContext->clearCVLocSeen();
-}
-
//===----------------------------------------------------------------------===//
// Error Reporting
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index 0461c2564ccf..38b02694d81d 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -463,10 +463,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym, 4), 4);
// Next 2 bytes is the Version.
- // FIXME: On Darwin we still default to V2.
unsigned LineTableVersion = context.getDwarfVersion();
- if (context.getObjectFileInfo()->getTargetTriple().isOSDarwin())
- LineTableVersion = 2;
MCOS->EmitIntValue(LineTableVersion, 2);
// Keep track of the bytes between the very start and where the header length
@@ -1335,6 +1332,10 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
Streamer.EmitIntValue(dwarf::DW_CFA_GNU_window_save, 1);
return;
+ case MCCFIInstruction::OpNegateRAState:
+ Streamer.EmitIntValue(dwarf::DW_CFA_AARCH64_negate_ra_state, 1);
+ return;
+
case MCCFIInstruction::OpUndefined: {
unsigned Reg = Instr.getRegister();
Streamer.EmitIntValue(dwarf::DW_CFA_undefined, 1);
@@ -1421,7 +1422,12 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
unsigned Reg = Instr.getRegister();
if (!IsEH)
Reg = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg);
- Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
+ if (Reg < 64) {
+ Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
+ } else {
+ Streamer.EmitIntValue(dwarf::DW_CFA_restore_extended, 1);
+ Streamer.EmitULEB128IntValue(Reg);
+ }
return;
}
case MCCFIInstruction::OpGnuArgsSize:
@@ -1559,9 +1565,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) {
uint8_t CIEVersion = getCIEVersion(IsEH, context.getDwarfVersion());
Streamer.EmitIntValue(CIEVersion, 1);
- // Augmentation String
- SmallString<8> Augmentation;
if (IsEH) {
+ SmallString<8> Augmentation;
Augmentation += "z";
if (Frame.Personality)
Augmentation += "P";
@@ -1570,6 +1575,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) {
Augmentation += "R";
if (Frame.IsSignalFrame)
Augmentation += "S";
+ if (Frame.IsBKeyFrame)
+ Augmentation += "B";
Streamer.EmitBytes(Augmentation);
}
Streamer.EmitIntValue(0, 1);
@@ -1724,25 +1731,28 @@ namespace {
struct CIEKey {
static const CIEKey getEmptyKey() {
- return CIEKey(nullptr, 0, -1, false, false, static_cast<unsigned>(INT_MAX));
+ return CIEKey(nullptr, 0, -1, false, false, static_cast<unsigned>(INT_MAX),
+ false);
}
static const CIEKey getTombstoneKey() {
- return CIEKey(nullptr, -1, 0, false, false, static_cast<unsigned>(INT_MAX));
+ return CIEKey(nullptr, -1, 0, false, false, static_cast<unsigned>(INT_MAX),
+ false);
}
CIEKey(const MCSymbol *Personality, unsigned PersonalityEncoding,
unsigned LSDAEncoding, bool IsSignalFrame, bool IsSimple,
- unsigned RAReg)
+ unsigned RAReg, bool IsBKeyFrame)
: Personality(Personality), PersonalityEncoding(PersonalityEncoding),
LsdaEncoding(LSDAEncoding), IsSignalFrame(IsSignalFrame),
- IsSimple(IsSimple), RAReg(RAReg) {}
+ IsSimple(IsSimple), RAReg(RAReg), IsBKeyFrame(IsBKeyFrame) {}
explicit CIEKey(const MCDwarfFrameInfo &Frame)
: Personality(Frame.Personality),
PersonalityEncoding(Frame.PersonalityEncoding),
LsdaEncoding(Frame.LsdaEncoding), IsSignalFrame(Frame.IsSignalFrame),
- IsSimple(Frame.IsSimple), RAReg(Frame.RAReg) {}
+ IsSimple(Frame.IsSimple), RAReg(Frame.RAReg),
+ IsBKeyFrame(Frame.IsBKeyFrame) {}
const MCSymbol *Personality;
unsigned PersonalityEncoding;
@@ -1750,6 +1760,7 @@ struct CIEKey {
bool IsSignalFrame;
bool IsSimple;
unsigned RAReg;
+ bool IsBKeyFrame;
};
} // end anonymous namespace
@@ -1761,9 +1772,9 @@ template <> struct DenseMapInfo<CIEKey> {
static CIEKey getTombstoneKey() { return CIEKey::getTombstoneKey(); }
static unsigned getHashValue(const CIEKey &Key) {
- return static_cast<unsigned>(
- hash_combine(Key.Personality, Key.PersonalityEncoding, Key.LsdaEncoding,
- Key.IsSignalFrame, Key.IsSimple, Key.RAReg));
+ return static_cast<unsigned>(hash_combine(
+ Key.Personality, Key.PersonalityEncoding, Key.LsdaEncoding,
+ Key.IsSignalFrame, Key.IsSimple, Key.RAReg, Key.IsBKeyFrame));
}
static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) {
@@ -1771,8 +1782,8 @@ template <> struct DenseMapInfo<CIEKey> {
LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
LHS.LsdaEncoding == RHS.LsdaEncoding &&
LHS.IsSignalFrame == RHS.IsSignalFrame &&
- LHS.IsSimple == RHS.IsSimple &&
- LHS.RAReg == RHS.RAReg;
+ LHS.IsSimple == RHS.IsSimple && LHS.RAReg == RHS.RAReg &&
+ LHS.IsBKeyFrame == RHS.IsBKeyFrame;
}
};
diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
index 483ee94c0db1..ff53dd7299c1 100644
--- a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -26,3 +26,6 @@ void
MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) {
}
+
+void MCELFObjectTargetWriter::addTargetSectionFlags(MCContext &Ctx,
+ MCSectionELF &Sec) {}
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index ef6f0041e0c8..3c022199145f 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -304,7 +304,9 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Hexagon_IE: return "IE";
case VK_Hexagon_IE_GOT: return "IEGOT";
case VK_WebAssembly_FUNCTION: return "FUNCTION";
+ case VK_WebAssembly_GLOBAL: return "GLOBAL";
case VK_WebAssembly_TYPEINDEX: return "TYPEINDEX";
+ case VK_WebAssembly_EVENT: return "EVENT";
case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
case VK_AMDGPU_REL32_LO: return "rel32@lo";
@@ -418,7 +420,9 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("hi8", VK_AVR_HI8)
.Case("hlo8", VK_AVR_HLO8)
.Case("function", VK_WebAssembly_FUNCTION)
+ .Case("global", VK_WebAssembly_GLOBAL)
.Case("typeindex", VK_WebAssembly_TYPEINDEX)
+ .Case("event", VK_WebAssembly_EVENT)
.Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
.Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
.Case("rel32@lo", VK_AMDGPU_REL32_LO)
@@ -766,7 +770,7 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
case MCBinaryExpr::NE:
Res = MCValue::get((R->isEqualTo(R)) ? 0 : -1);
return true;
- default: {}
+ default: break;
}
}
return false;
diff --git a/contrib/llvm/lib/MC/MCFragment.cpp b/contrib/llvm/lib/MC/MCFragment.cpp
index 0ebcf21a422e..d22b117972bf 100644
--- a/contrib/llvm/lib/MC/MCFragment.cpp
+++ b/contrib/llvm/lib/MC/MCFragment.cpp
@@ -237,8 +237,8 @@ MCFragment::~MCFragment() = default;
MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
MCSection *Parent)
- : Kind(Kind), HasInstructions(HasInstructions), Parent(Parent),
- Atom(nullptr), Offset(~UINT64_C(0)) {
+ : Kind(Kind), HasInstructions(HasInstructions), LayoutOrder(0),
+ Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)) {
if (Parent && !isDummy())
Parent->getFragmentList().push_back(this);
}
diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp
index f9b71caaf91c..64f111fc7114 100644
--- a/contrib/llvm/lib/MC/MCInst.cpp
+++ b/contrib/llvm/lib/MC/MCInst.cpp
@@ -72,11 +72,17 @@ void MCInst::print(raw_ostream &OS) const {
void MCInst::dump_pretty(raw_ostream &OS, const MCInstPrinter *Printer,
StringRef Separator) const {
+ StringRef InstName = Printer ? Printer->getOpcodeName(getOpcode()) : "";
+ dump_pretty(OS, InstName, Separator);
+}
+
+void MCInst::dump_pretty(raw_ostream &OS, StringRef Name,
+ StringRef Separator) const {
OS << "<MCInst #" << getOpcode();
- // Show the instruction opcode name if we have access to a printer.
- if (Printer)
- OS << ' ' << Printer->getOpcodeName(getOpcode());
+ // Show the instruction opcode name if we have it.
+ if (!Name.empty())
+ OS << ' ' << Name;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
OS << Separator;
diff --git a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp
index 4d7c89116893..8223f3a5c66f 100644
--- a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -24,11 +24,6 @@ bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
return false;
}
-bool MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const {
- return false;
-}
-
bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target) const {
if (Inst.getNumOperands() == 0 ||
diff --git a/contrib/llvm/lib/MC/MCInstrDesc.cpp b/contrib/llvm/lib/MC/MCInstrDesc.cpp
index ee55f3eff3ac..53cba864a85d 100644
--- a/contrib/llvm/lib/MC/MCInstrDesc.cpp
+++ b/contrib/llvm/lib/MC/MCInstrDesc.cpp
@@ -39,15 +39,6 @@ bool MCInstrDesc::mayAffectControlFlow(const MCInst &MI,
return false;
if (hasDefOfPhysReg(MI, PC, RI))
return true;
- // A variadic instruction may define PC in the variable operand list.
- // There's currently no indication of which entries in a variable
- // list are defs and which are uses. While that's the case, this function
- // needs to assume they're defs in order to be conservatively correct.
- for (int i = NumOperands, e = MI.getNumOperands(); i != e; ++i) {
- if (MI.getOperand(i).isReg() &&
- RI.isSubRegisterEq(PC, MI.getOperand(i).getReg()))
- return true;
- }
return false;
}
@@ -66,5 +57,10 @@ bool MCInstrDesc::hasDefOfPhysReg(const MCInst &MI, unsigned Reg,
if (MI.getOperand(i).isReg() &&
RI.isSubRegisterEq(Reg, MI.getOperand(i).getReg()))
return true;
+ if (variadicOpsAreDefs())
+ for (int i = NumOperands - 1, e = MI.getNumOperands(); i != e; ++i)
+ if (MI.getOperand(i).isReg() &&
+ RI.isSubRegisterEq(Reg, MI.getOperand(i).getReg()))
+ return true;
return hasImplicitDefOfPhysReg(Reg, &RI);
}
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
index 43e69605787c..b30317e74672 100644
--- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -89,10 +89,10 @@ public:
void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
void EmitLinkerOptions(ArrayRef<std::string> Options) override;
void EmitDataRegion(MCDataRegionType Kind) override;
- void EmitVersionMin(MCVersionMinType Kind, unsigned Major,
- unsigned Minor, unsigned Update) override;
- void EmitBuildVersion(unsigned Platform, unsigned Major,
- unsigned Minor, unsigned Update) override;
+ void EmitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor,
+ unsigned Update, VersionTuple SDKVersion) override;
+ void EmitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor,
+ unsigned Update, VersionTuple SDKVersion) override;
void EmitThumbFunc(MCSymbol *Func) override;
bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
@@ -270,14 +270,16 @@ void MCMachOStreamer::EmitDataRegion(MCDataRegionType Kind) {
}
void MCMachOStreamer::EmitVersionMin(MCVersionMinType Kind, unsigned Major,
- unsigned Minor, unsigned Update) {
- getAssembler().setVersionMin(Kind, Major, Minor, Update);
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) {
+ getAssembler().setVersionMin(Kind, Major, Minor, Update, SDKVersion);
}
void MCMachOStreamer::EmitBuildVersion(unsigned Platform, unsigned Major,
- unsigned Minor, unsigned Update) {
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) {
getAssembler().setBuildVersion((MachO::PlatformType)Platform, Major, Minor,
- Update);
+ Update, SDKVersion);
}
void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
@@ -507,7 +509,7 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
new MCMachOStreamer(Context, std::move(MAB), std::move(OW), std::move(CE),
DWARFMustBeAtTheEnd, LabelSections);
const Triple &Target = Context.getObjectFileInfo()->getTargetTriple();
- S->EmitVersionForTarget(Target);
+ S->EmitVersionForTarget(Target, Context.getObjectFileInfo()->getSDKVersion());
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
index a96dec184441..4e97e7550bcb 100644
--- a/contrib/llvm/lib/MC/MCNullStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -22,6 +23,9 @@ namespace {
/// @name MCStreamer Interface
/// @{
+ bool hasRawTextSupport() const override { return true; }
+ void EmitRawTextImpl(StringRef String) override {}
+
bool EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) override {
return true;
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index b88d2d801822..9e35355d06e0 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -63,11 +63,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
if (T.isWatchABI())
OmitDwarfIfHaveCompactUnwind = true;
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
- | dwarf::DW_EH_PE_sdata4;
- LSDAEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
+ FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
// .comm doesn't support alignment before Leopard.
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
@@ -258,9 +254,16 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
DwarfStrOffSection =
Ctx->getMachOSection("__DWARF", "__debug_str_offs", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata(), "section_str_off");
+ DwarfAddrSection =
+ Ctx->getMachOSection("__DWARF", "__debug_addr", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "section_info");
DwarfLocSection =
Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata(), "section_debug_loc");
+ DwarfLoclistsSection =
+ Ctx->getMachOSection("__DWARF", "__debug_loclists", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "section_debug_loc");
+
DwarfARangesSection =
Ctx->getMachOSection("__DWARF", "__debug_aranges", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
@@ -295,11 +298,11 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
switch (T.getArch()) {
case Triple::mips:
case Triple::mipsel:
- FDECFIEncoding = dwarf::DW_EH_PE_sdata4;
- break;
case Triple::mips64:
case Triple::mips64el:
- FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
+ FDECFIEncoding = Ctx->getAsmInfo()->getCodePointerSize() == 4
+ ? dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_sdata8;
break;
case Triple::ppc64:
case Triple::ppc64le:
@@ -311,158 +314,12 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
case Triple::bpfeb:
FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
break;
- default:
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- break;
- }
-
- switch (T.getArch()) {
- case Triple::arm:
- case Triple::armeb:
- case Triple::thumb:
- case Triple::thumbeb:
- if (Ctx->getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
- break;
- // Fallthrough if not using EHABI
- LLVM_FALLTHROUGH;
- case Triple::ppc:
- case Triple::x86:
- PersonalityEncoding = PositionIndependent
- ? dwarf::DW_EH_PE_indirect |
- dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4
- : dwarf::DW_EH_PE_absptr;
- LSDAEncoding = PositionIndependent
- ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
- : dwarf::DW_EH_PE_absptr;
- TTypeEncoding = PositionIndependent
- ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4
- : dwarf::DW_EH_PE_absptr;
- break;
- case Triple::x86_64:
- if (PositionIndependent) {
- PersonalityEncoding =
- dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
- LSDAEncoding = dwarf::DW_EH_PE_pcrel |
- (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
- } else {
- PersonalityEncoding =
- Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
- LSDAEncoding = Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
- TTypeEncoding = Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
- }
- break;
case Triple::hexagon:
- PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- LSDAEncoding = dwarf::DW_EH_PE_absptr;
- FDECFIEncoding = dwarf::DW_EH_PE_absptr;
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
- if (PositionIndependent) {
- PersonalityEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
- LSDAEncoding |= dwarf::DW_EH_PE_pcrel;
- FDECFIEncoding |= dwarf::DW_EH_PE_pcrel;
- TTypeEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
- }
- break;
- case Triple::aarch64:
- case Triple::aarch64_be:
- // The small model guarantees static code/data size < 4GB, but not where it
- // will be in memory. Most of these could end up >2GB away so even a signed
- // pc-relative 32-bit address is insufficient, theoretically.
- if (PositionIndependent) {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
- } else {
- PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- LSDAEncoding = dwarf::DW_EH_PE_absptr;
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
- }
- break;
- case Triple::lanai:
- LSDAEncoding = dwarf::DW_EH_PE_absptr;
- PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
- break;
- case Triple::mips:
- case Triple::mipsel:
- case Triple::mips64:
- case Triple::mips64el:
- // MIPS uses indirect pointer to refer personality functions and types, so
- // that the eh_frame section can be read-only. DW.ref.personality will be
- // generated for relocation.
- PersonalityEncoding = dwarf::DW_EH_PE_indirect;
- // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't
- // identify N64 from just a triple.
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- // We don't support PC-relative LSDA references in GAS so we use the default
- // DW_EH_PE_absptr for those.
-
- // FreeBSD must be explicit about the data size and using pcrel since it's
- // assembler/linker won't do the automatic conversion that the Linux tools
- // do.
- if (T.isOSFreeBSD()) {
- PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- }
- break;
- case Triple::ppc64:
- case Triple::ppc64le:
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_udata8;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_udata8;
- break;
- case Triple::sparcel:
- case Triple::sparc:
- if (PositionIndependent) {
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- } else {
- LSDAEncoding = dwarf::DW_EH_PE_absptr;
- PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
- }
- break;
- case Triple::sparcv9:
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- if (PositionIndependent) {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- } else {
- PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
- }
- break;
- case Triple::systemz:
- // All currently-defined code models guarantee that 4-byte PC-relative
- // values will be in range.
- if (PositionIndependent) {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- } else {
- PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- LSDAEncoding = dwarf::DW_EH_PE_absptr;
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
- }
+ FDECFIEncoding =
+ PositionIndependent ? dwarf::DW_EH_PE_pcrel : dwarf::DW_EH_PE_absptr;
break;
default:
+ FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
break;
}
@@ -582,25 +439,26 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
Ctx->getELFSection(".debug_str_offsets", DebugSecType, 0);
DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0);
DwarfRnglistsSection = Ctx->getELFSection(".debug_rnglists", DebugSecType, 0);
+ DwarfLoclistsSection = Ctx->getELFSection(".debug_loclists", DebugSecType, 0);
// Fission Sections
DwarfInfoDWOSection =
- Ctx->getELFSection(".debug_info.dwo", DebugSecType, 0);
+ Ctx->getELFSection(".debug_info.dwo", DebugSecType, ELF::SHF_EXCLUDE);
DwarfTypesDWOSection =
- Ctx->getELFSection(".debug_types.dwo", DebugSecType, 0);
+ Ctx->getELFSection(".debug_types.dwo", DebugSecType, ELF::SHF_EXCLUDE);
DwarfAbbrevDWOSection =
- Ctx->getELFSection(".debug_abbrev.dwo", DebugSecType, 0);
- DwarfStrDWOSection =
- Ctx->getELFSection(".debug_str.dwo", DebugSecType,
- ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+ Ctx->getELFSection(".debug_abbrev.dwo", DebugSecType, ELF::SHF_EXCLUDE);
+ DwarfStrDWOSection = Ctx->getELFSection(
+ ".debug_str.dwo", DebugSecType,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS | ELF::SHF_EXCLUDE, 1, "");
DwarfLineDWOSection =
- Ctx->getELFSection(".debug_line.dwo", DebugSecType, 0);
+ Ctx->getELFSection(".debug_line.dwo", DebugSecType, ELF::SHF_EXCLUDE);
DwarfLocDWOSection =
- Ctx->getELFSection(".debug_loc.dwo", DebugSecType, 0);
- DwarfStrOffDWOSection =
- Ctx->getELFSection(".debug_str_offsets.dwo", DebugSecType, 0);
+ Ctx->getELFSection(".debug_loc.dwo", DebugSecType, ELF::SHF_EXCLUDE);
+ DwarfStrOffDWOSection = Ctx->getELFSection(".debug_str_offsets.dwo",
+ DebugSecType, ELF::SHF_EXCLUDE);
DwarfRnglistsDWOSection =
- Ctx->getELFSection(".debug_rnglists.dwo", DebugSecType, 0);
+ Ctx->getELFSection(".debug_rnglists.dwo", DebugSecType, ELF::SHF_EXCLUDE);
// DWP Sections
DwarfCUIndexSection =
@@ -621,10 +479,10 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
}
void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
- EHFrameSection = Ctx->getCOFFSection(
- ".eh_frame", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getData());
+ EHFrameSection =
+ Ctx->getCOFFSection(".eh_frame", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getData());
// Set the `IMAGE_SCN_MEM_16BIT` flag when compiling for thumb mode. This is
// used to indicate to the linker that the text segment contains thumb instructions
@@ -652,11 +510,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
- // FIXME: We're emitting LSDA info into a readonly section on COFF, even
- // though it contains relocatable pointers. In PIC mode, this is probably a
- // big runtime hit for C++ apps. Either the contents of the LSDA need to be
- // adjusted or this should be a data section.
- if (T.getArch() == Triple::x86_64) {
+ if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::aarch64) {
// On Windows 64 with SEH, the LSDA is emitted into the .xdata section
LSDASection = nullptr;
} else {
@@ -893,6 +747,12 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
DwarfPubNamesSection = Ctx->getWasmSection(".debug_pubnames", SectionKind::getMetadata());
DwarfPubTypesSection = Ctx->getWasmSection(".debug_pubtypes", SectionKind::getMetadata());
+ // Wasm use data section for LSDA.
+ // TODO Consider putting each function's exception table in a separate
+ // section, as in -function-sections, to facilitate lld's --gc-section.
+ LSDASection = Ctx->getWasmSection(".rodata.gcc_except_table",
+ SectionKind::getReadOnlyWithRel());
+
// TODO: Define more sections.
}
@@ -908,8 +768,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
SupportsCompactUnwindWithoutEHFrame = false;
OmitDwarfIfHaveCompactUnwind = false;
- PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding =
- dwarf::DW_EH_PE_absptr;
+ FDECFIEncoding = dwarf::DW_EH_PE_absptr;
CompactUnwindDwarfEHFrameOnly = 0;
@@ -949,16 +808,17 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
}
}
-MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
+MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
+ uint64_t Hash) const {
switch (TT.getObjectFormat()) {
case Triple::ELF:
- return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
- 0, utostr(Hash));
+ return Ctx->getELFSection(Name, ELF::SHT_PROGBITS, ELF::SHF_GROUP, 0,
+ utostr(Hash));
case Triple::MachO:
case Triple::COFF:
case Triple::Wasm:
case Triple::UnknownObjectFormat:
- report_fatal_error("Cannot get DWARF types section for this object file "
+ report_fatal_error("Cannot get DWARF comdat section for this object file "
"format: not implemented.");
break;
}
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index 4b6dad5ce8f3..6ec705bdddb7 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -59,11 +59,35 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
PendingLabels.clear();
}
+// When fixup's offset is a forward declared label, e.g.:
+//
+// .reloc 1f, R_MIPS_JALR, foo
+// 1: nop
+//
+// postpone adding it to Fixups vector until the label is defined and its offset
+// is known.
+void MCObjectStreamer::resolvePendingFixups() {
+ for (PendingMCFixup &PendingFixup : PendingFixups) {
+ if (!PendingFixup.Sym || PendingFixup.Sym->isUndefined ()) {
+ getContext().reportError(PendingFixup.Fixup.getLoc(),
+ "unresolved relocation offset");
+ continue;
+ }
+ flushPendingLabels(PendingFixup.DF, PendingFixup.DF->getContents().size());
+ PendingFixup.Fixup.setOffset(PendingFixup.Sym->getOffset());
+ PendingFixup.DF->getFixups().push_back(PendingFixup.Fixup);
+ }
+ PendingFixups.clear();
+}
+
// As a compile-time optimization, avoid allocating and evaluating an MCExpr
// tree for (Hi - Lo) when Hi and Lo are offsets into the same fragment.
-static Optional<uint64_t> absoluteSymbolDiff(const MCSymbol *Hi,
- const MCSymbol *Lo) {
+static Optional<uint64_t>
+absoluteSymbolDiff(MCAssembler &Asm, const MCSymbol *Hi, const MCSymbol *Lo) {
assert(Hi && Lo);
+ if (Asm.getBackendPtr()->requiresDiffExpressionRelocations())
+ return None;
+
if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment() ||
Hi->isVariable() || Lo->isVariable())
return None;
@@ -74,7 +98,7 @@ static Optional<uint64_t> absoluteSymbolDiff(const MCSymbol *Hi,
void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
const MCSymbol *Lo,
unsigned Size) {
- if (Optional<uint64_t> Diff = absoluteSymbolDiff(Hi, Lo)) {
+ if (Optional<uint64_t> Diff = absoluteSymbolDiff(getAssembler(), Hi, Lo)) {
EmitIntValue(*Diff, Size);
return;
}
@@ -83,7 +107,7 @@ void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
void MCObjectStreamer::emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi,
const MCSymbol *Lo) {
- if (Optional<uint64_t> Diff = absoluteSymbolDiff(Hi, Lo)) {
+ if (Optional<uint64_t> Diff = absoluteSymbolDiff(getAssembler(), Hi, Lo)) {
EmitULEB128IntValue(*Diff);
return;
}
@@ -170,7 +194,6 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
- MCCVLineEntry::Make(this);
MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
// Avoid fixups when possible.
@@ -267,7 +290,6 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
flushPendingLabels(nullptr);
- getContext().clearCVLocSeen();
getContext().clearDwarfLocSeen();
bool Created = getAssembler().registerSection(*Section);
@@ -308,7 +330,6 @@ void MCObjectStreamer::EmitInstructionImpl(const MCInst &Inst,
// Now that a machine instruction has been assembled into this section, make
// a line entry for any .loc directive that has been seen.
- MCCVLineEntry::Make(this);
MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
// If this instruction doesn't need relaxation, just emit it as data.
@@ -443,12 +464,16 @@ void MCObjectStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
unsigned Line, unsigned Column,
bool PrologueEnd, bool IsStmt,
StringRef FileName, SMLoc Loc) {
- // In case we see two .cv_loc directives in a row, make sure the
- // first one gets a line entry.
- MCCVLineEntry::Make(this);
+ // Validate the directive.
+ if (!checkCVLocSection(FunctionId, FileNo, Loc))
+ return;
- this->MCStreamer::EmitCVLocDirective(FunctionId, FileNo, Line, Column,
- PrologueEnd, IsStmt, FileName, Loc);
+ // Emit a label at the current position and record it in the CodeViewContext.
+ MCSymbol *LineSym = getContext().createTempSymbol();
+ EmitLabel(LineSym);
+ getContext().getCVContext().recordCVLoc(getContext(), LineSym, FunctionId,
+ FileNo, Line, Column, PrologueEnd,
+ IsStmt);
}
void MCObjectStreamer::EmitCVLinetableDirective(unsigned FunctionId,
@@ -472,7 +497,11 @@ void MCObjectStreamer::EmitCVInlineLinetableDirective(
void MCObjectStreamer::EmitCVDefRangeDirective(
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion) {
- getContext().getCVContext().emitDefRange(*this, Ranges, FixedSizePortion);
+ MCFragment *Frag =
+ getContext().getCVContext().emitDefRange(*this, Ranges, FixedSizePortion);
+ // Attach labels that were pending before we created the defrange fragment to
+ // the beginning of the new fragment.
+ flushPendingLabels(Frag, 0);
this->MCStreamer::EmitCVDefRangeDirective(Ranges, FixedSizePortion);
}
@@ -488,11 +517,16 @@ void MCObjectStreamer::EmitCVFileChecksumOffsetDirective(unsigned FileNo) {
}
void MCObjectStreamer::EmitBytes(StringRef Data) {
- MCCVLineEntry::Make(this);
MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
DF->getContents().append(Data.begin(), Data.end());
+
+ // EmitBytes might not cover all possible ways we emit data (or could be used
+ // to emit executable code in some cases), but is the best method we have
+ // right now for checking this.
+ MCSection *Sec = getCurrentSectionOnly();
+ Sec->setHasData(true);
}
void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
@@ -594,16 +628,6 @@ void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
const MCExpr *Expr, SMLoc Loc,
const MCSubtargetInfo &STI) {
- int64_t OffsetValue;
- if (!Offset.evaluateAsAbsolute(OffsetValue))
- llvm_unreachable("Offset is not absolute");
-
- if (OffsetValue < 0)
- llvm_unreachable("Offset is negative");
-
- MCDataFragment *DF = getOrCreateDataFragment(&STI);
- flushPendingLabels(DF, DF->getContents().size());
-
Optional<MCFixupKind> MaybeKind = Assembler->getBackend().getFixupKind(Name);
if (!MaybeKind.hasValue())
return true;
@@ -613,7 +637,30 @@ bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
if (Expr == nullptr)
Expr =
MCSymbolRefExpr::create(getContext().createTempSymbol(), getContext());
- DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc));
+
+ MCDataFragment *DF = getOrCreateDataFragment(&STI);
+ flushPendingLabels(DF, DF->getContents().size());
+
+ int64_t OffsetValue;
+ if (Offset.evaluateAsAbsolute(OffsetValue)) {
+ if (OffsetValue < 0)
+ llvm_unreachable(".reloc offset is negative");
+ DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc));
+ return false;
+ }
+
+ if (Offset.getKind() != llvm::MCExpr::SymbolRef)
+ llvm_unreachable(".reloc offset is not absolute nor a label");
+
+ const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Offset);
+ if (SRE.getSymbol().isDefined()) {
+ DF->getFixups().push_back(MCFixup::create(SRE.getSymbol().getOffset(),
+ Expr, Kind, Loc));
+ return false;
+ }
+
+ PendingFixups.emplace_back(&SRE.getSymbol(), DF,
+ MCFixup::create(-1, Expr, Kind, Loc));
return false;
}
@@ -680,5 +727,6 @@ void MCObjectStreamer::FinishImpl() {
MCDwarfLineTable::Emit(this, getAssembler().getDWARFLinetableParams());
flushPendingLabels();
+ resolvePendingFixups();
getAssembler().Finish();
}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
index 74835fd70c04..2b0d20f9b8e2 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -243,22 +243,26 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
// integer as a hexadecimal, possibly with leading zeroes.
-static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
- const char *FirstHex = nullptr;
+static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
+ bool LexHex) {
+ const char *FirstNonDec = nullptr;
const char *LookAhead = CurPtr;
while (true) {
if (isDigit(*LookAhead)) {
++LookAhead;
- } else if (isHexDigit(*LookAhead)) {
- if (!FirstHex)
- FirstHex = LookAhead;
- ++LookAhead;
} else {
- break;
+ if (!FirstNonDec)
+ FirstNonDec = LookAhead;
+
+ // Keep going if we are looking for a 'h' suffix.
+ if (LexHex && isHexDigit(*LookAhead))
+ ++LookAhead;
+ else
+ break;
}
}
- bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
- CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
+ bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
+ CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
if (isHex)
return 16;
return DefaultRadix;
@@ -281,7 +285,7 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
AsmToken AsmLexer::LexDigit() {
// MASM-flavor binary integer: [01]+[bB]
// MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
- if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
+ if (LexMasmIntegers && isdigit(CurPtr[-1])) {
const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
CurPtr - 1 : nullptr;
const char *OldCurPtr = CurPtr;
@@ -320,7 +324,7 @@ AsmToken AsmLexer::LexDigit() {
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
- unsigned Radix = doLookAhead(CurPtr, 10);
+ unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
bool isHex = Radix == 16;
// Check for floating point literals.
if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
@@ -335,8 +339,8 @@ AsmToken AsmLexer::LexDigit() {
return ReturnError(TokStart, !isHex ? "invalid decimal number" :
"invalid hexdecimal number");
- // Consume the [bB][hH].
- if (Radix == 2 || Radix == 16)
+ // Consume the [hH].
+ if (LexMasmIntegers && Radix == 16)
++CurPtr;
// The darwin/x86 (and x86-64) assembler accepts and ignores type
@@ -346,7 +350,7 @@ AsmToken AsmLexer::LexDigit() {
return intToken(Result, Value);
}
- if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
+ if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
++CurPtr;
// See if we actually have "0b" as part of something like "jmp 0b\n"
if (!isDigit(CurPtr[0])) {
@@ -395,7 +399,7 @@ AsmToken AsmLexer::LexDigit() {
return ReturnError(TokStart, "invalid hexadecimal number");
// Consume the optional [hH].
- if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
+ if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
++CurPtr;
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
@@ -407,7 +411,7 @@ AsmToken AsmLexer::LexDigit() {
// Either octal or hexadecimal.
APInt Value(128, 0, true);
- unsigned Radix = doLookAhead(CurPtr, 8);
+ unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
bool isHex = Radix == 16;
StringRef Result(TokStart, CurPtr - TokStart);
if (Result.getAsInteger(Radix, Value))
@@ -623,7 +627,6 @@ AsmToken AsmLexer::LexToken() {
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
- case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
@@ -642,6 +645,12 @@ AsmToken AsmLexer::LexToken() {
return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
}
return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
+ case '-':
+ if (*CurPtr == '>') {
+ ++CurPtr;
+ return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2));
+ }
+ return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
case '|':
if (*CurPtr == '|') {
++CurPtr;
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index d88c6f76826f..cf42a6f7075b 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -180,6 +180,9 @@ private:
/// Did we already inform the user about inconsistent MD5 usage?
bool ReportedInconsistentMD5 = false;
+ // Is alt macro mode enabled.
+ bool AltMacroMode = false;
+
public:
AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
const MCAsmInfo &MAI, unsigned CB);
@@ -226,7 +229,9 @@ public:
void setParsingInlineAsm(bool V) override {
ParsingInlineAsm = V;
- Lexer.setParsingMSInlineAsm(V);
+ // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
+ // hex integer literals.
+ Lexer.setLexMasmIntegers(V);
}
bool isParsingInlineAsm() override { return ParsingInlineAsm; }
@@ -260,8 +265,6 @@ public:
/// }
private:
- bool isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc);
- void altMacroString(StringRef AltMacroStr, std::string &Res);
bool parseStatement(ParseStatementInfo &Info,
MCAsmParserSemaCallback *SI);
bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
@@ -467,6 +470,7 @@ private:
DK_CV_INLINE_LINETABLE,
DK_CV_DEF_RANGE,
DK_CV_STRINGTABLE,
+ DK_CV_STRING,
DK_CV_FILECHECKSUMS,
DK_CV_FILECHECKSUM_OFFSET,
DK_CV_FPO_DATA,
@@ -491,6 +495,7 @@ private:
DK_CFI_UNDEFINED,
DK_CFI_REGISTER,
DK_CFI_WINDOW_SAVE,
+ DK_CFI_B_KEY_FRAME,
DK_MACROS_ON,
DK_MACROS_OFF,
DK_ALTMACRO,
@@ -538,7 +543,7 @@ private:
bool parseDirectiveStabs();
// ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
- // ".cv_inline_linetable", ".cv_def_range"
+ // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
bool parseDirectiveCVFile();
bool parseDirectiveCVFuncId();
bool parseDirectiveCVInlineSiteId();
@@ -546,6 +551,7 @@ private:
bool parseDirectiveCVLinetable();
bool parseDirectiveCVInlineLinetable();
bool parseDirectiveCVDefRange();
+ bool parseDirectiveCVString();
bool parseDirectiveCVStringTable();
bool parseDirectiveCVFileChecksums();
bool parseDirectiveCVFileChecksumOffset();
@@ -670,6 +676,7 @@ namespace llvm {
extern MCAsmParserExtension *createDarwinAsmParser();
extern MCAsmParserExtension *createELFAsmParser();
extern MCAsmParserExtension *createCOFFAsmParser();
+extern MCAsmParserExtension *createWasmAsmParser();
} // end namespace llvm
@@ -700,10 +707,7 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
PlatformParser.reset(createELFAsmParser());
break;
case MCObjectFileInfo::IsWasm:
- // TODO: WASM will need its own MCAsmParserExtension implementation, but
- // for now we can re-use the ELF one, since the directives can be the
- // same for now.
- PlatformParser.reset(createELFAsmParser());
+ PlatformParser.reset(createWasmAsmParser());
break;
}
@@ -895,6 +899,9 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
eatToEndOfStatement();
}
+ getTargetParser().onEndOfFile();
+ printPendingErrors();
+
// All errors should have been emitted.
assert(!hasPendingError() && "unexpected error from parseStatement");
@@ -1100,7 +1107,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// This is a symbol reference.
StringRef SymbolName = Identifier;
if (SymbolName.empty())
- return true;
+ return Error(getLexer().getLoc(), "expected a symbol reference");
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
@@ -1123,7 +1130,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// semantics in the face of reassignment.
if (Sym->isVariable()) {
auto V = Sym->getVariableValue(/*SetUsed*/ false);
- bool DoInline = isa<MCConstantExpr>(V);
+ bool DoInline = isa<MCConstantExpr>(V) && !Variant;
if (auto TV = dyn_cast<MCTargetExpr>(V))
DoInline = TV->inlineAssignedExpr();
if (DoInline) {
@@ -1321,11 +1328,12 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
/// the End argument will be filled with the last location pointed to the '>'
/// character.
-/// There is a gap between the AltMacro's documentation and the single quote implementation.
-/// GCC does not fully support this feature and so we will not support it.
+/// There is a gap between the AltMacro's documentation and the single quote
+/// implementation. GCC does not fully support this feature and so we will not
+/// support it.
/// TODO: Adding single quote as a string.
-bool AsmParser::isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc) {
- assert((StrLoc.getPointer() != NULL) &&
+static bool isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc) {
+ assert((StrLoc.getPointer() != nullptr) &&
"Argument to the function cannot be a NULL value");
const char *CharPtr = StrLoc.getPointer();
while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
@@ -1342,12 +1350,14 @@ bool AsmParser::isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc) {
}
/// creating a string without the escape characters '!'.
-void AsmParser::altMacroString(StringRef AltMacroStr,std::string &Res) {
+static std::string altMacroString(StringRef AltMacroStr) {
+ std::string Res;
for (size_t Pos = 0; Pos < AltMacroStr.size(); Pos++) {
if (AltMacroStr[Pos] == '!')
Pos++;
Res += AltMacroStr[Pos];
}
+ return Res;
}
/// Parse an expression and return it.
@@ -1806,6 +1816,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
Lex();
}
+ getTargetParser().doBeforeLabelEmit(Sym);
+
// Emit the label.
if (!getTargetParser().isParsingInlineAsm())
Out.EmitLabel(Sym, IDLoc);
@@ -1842,7 +1854,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// Otherwise, we have a normal instruction or directive.
// Directives start with "."
- if (IDVal[0] == '.' && IDVal != ".") {
+ if (IDVal.startswith(".") && IDVal != ".") {
// There are several entities interested in parsing directives:
//
// 1. The target-specific assembly parser. Some directives are target
@@ -2029,6 +2041,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveCVInlineLinetable();
case DK_CV_DEF_RANGE:
return parseDirectiveCVDefRange();
+ case DK_CV_STRING:
+ return parseDirectiveCVString();
case DK_CV_STRINGTABLE:
return parseDirectiveCVStringTable();
case DK_CV_FILECHECKSUMS:
@@ -2431,21 +2445,20 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
for (const AsmToken &Token : A[Index])
// For altmacro mode, you can write '%expr'.
// The prefix '%' evaluates the expression 'expr'
- // and uses the result as a string (e.g. replace %(1+2) with the string "3").
+ // and uses the result as a string (e.g. replace %(1+2) with the
+ // string "3").
// Here, we identify the integer token which is the result of the
- // absolute expression evaluation and replace it with its string representation.
- if ((Lexer.IsaAltMacroMode()) &&
- (*(Token.getString().begin()) == '%') && Token.is(AsmToken::Integer))
+ // absolute expression evaluation and replace it with its string
+ // representation.
+ if (AltMacroMode && Token.getString().front() == '%' &&
+ Token.is(AsmToken::Integer))
// Emit an integer value to the buffer.
OS << Token.getIntVal();
// Only Token that was validated as a string and begins with '<'
// is considered altMacroString!!!
- else if ((Lexer.IsaAltMacroMode()) &&
- (*(Token.getString().begin()) == '<') &&
+ else if (AltMacroMode && Token.getString().front() == '<' &&
Token.is(AsmToken::String)) {
- std::string Res;
- altMacroString(Token.getStringContents(), Res);
- OS << Res;
+ OS << altMacroString(Token.getStringContents());
}
// We expect no quotes around the string's contents when
// parsing for varargs.
@@ -2627,31 +2640,33 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
SMLoc StrLoc = Lexer.getLoc();
SMLoc EndLoc;
- if (Lexer.IsaAltMacroMode() && Lexer.is(AsmToken::Percent)) {
- const MCExpr *AbsoluteExp;
- int64_t Value;
- /// Eat '%'
- Lex();
- if (parseExpression(AbsoluteExp, EndLoc))
- return false;
- if (!AbsoluteExp->evaluateAsAbsolute(Value,
- getStreamer().getAssemblerPtr()))
- return Error(StrLoc, "expected absolute expression");
- const char *StrChar = StrLoc.getPointer();
- const char *EndChar = EndLoc.getPointer();
- AsmToken newToken(AsmToken::Integer, StringRef(StrChar , EndChar - StrChar), Value);
- FA.Value.push_back(newToken);
- } else if (Lexer.IsaAltMacroMode() && Lexer.is(AsmToken::Less) &&
+ if (AltMacroMode && Lexer.is(AsmToken::Percent)) {
+ const MCExpr *AbsoluteExp;
+ int64_t Value;
+ /// Eat '%'
+ Lex();
+ if (parseExpression(AbsoluteExp, EndLoc))
+ return false;
+ if (!AbsoluteExp->evaluateAsAbsolute(Value,
+ getStreamer().getAssemblerPtr()))
+ return Error(StrLoc, "expected absolute expression");
+ const char *StrChar = StrLoc.getPointer();
+ const char *EndChar = EndLoc.getPointer();
+ AsmToken newToken(AsmToken::Integer,
+ StringRef(StrChar, EndChar - StrChar), Value);
+ FA.Value.push_back(newToken);
+ } else if (AltMacroMode && Lexer.is(AsmToken::Less) &&
isAltmacroString(StrLoc, EndLoc)) {
- const char *StrChar = StrLoc.getPointer();
- const char *EndChar = EndLoc.getPointer();
- jumpToLoc(EndLoc, CurBuffer);
- /// Eat from '<' to '>'
- Lex();
- AsmToken newToken(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
- FA.Value.push_back(newToken);
+ const char *StrChar = StrLoc.getPointer();
+ const char *EndChar = EndLoc.getPointer();
+ jumpToLoc(EndLoc, CurBuffer);
+ /// Eat from '<' to '>'
+ Lex();
+ AsmToken newToken(AsmToken::String,
+ StringRef(StrChar, EndChar - StrChar));
+ FA.Value.push_back(newToken);
} else if(parseMacroArgument(FA.Value, Vararg))
- return true;
+ return true;
unsigned PI = Parameter;
if (!FA.Name.empty()) {
@@ -2927,20 +2942,20 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
const MCExpr *Offset;
const MCExpr *Expr = nullptr;
-
- SMLoc OffsetLoc = Lexer.getTok().getLoc();
int64_t OffsetValue;
- // We can only deal with constant expressions at the moment.
+ SMLoc OffsetLoc = Lexer.getTok().getLoc();
if (parseExpression(Offset))
return true;
- if (check(!Offset->evaluateAsAbsolute(OffsetValue,
- getStreamer().getAssemblerPtr()),
- OffsetLoc, "expression is not a constant value") ||
- check(OffsetValue < 0, OffsetLoc, "expression is negative") ||
- parseToken(AsmToken::Comma, "expected comma") ||
- check(getTok().isNot(AsmToken::Identifier), "expected relocation name"))
+ if ((Offset->evaluateAsAbsolute(OffsetValue,
+ getStreamer().getAssemblerPtr()) &&
+ check(OffsetValue < 0, OffsetLoc, "expression is negative")) ||
+ (check(Offset->getKind() != llvm::MCExpr::Constant &&
+ Offset->getKind() != llvm::MCExpr::SymbolRef,
+ OffsetLoc, "expected non-negative number or a label")) ||
+ (parseToken(AsmToken::Comma, "expected comma") ||
+ check(getTok().isNot(AsmToken::Identifier), "expected relocation name")))
return true;
SMLoc NameLoc = Lexer.getTok().getLoc();
@@ -3348,9 +3363,12 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
}
}
- if (FileNumber == -1)
+ if (FileNumber == -1) {
+ if (!getContext().getAsmInfo()->hasSingleParameterDotFile())
+ return Error(DirectiveLoc,
+ "target does not support '.file' without a number");
getStreamer().EmitFileDirective(Filename);
- else {
+ } else {
// In case there is a -g option as well as debug info from directive .file,
// we turn off the -g option, directly use the existing debug info instead.
// Also reset any implicit ".file 0" for the assembler source.
@@ -3813,6 +3831,20 @@ bool AsmParser::parseDirectiveCVDefRange() {
return false;
}
+/// parseDirectiveCVString
+/// ::= .cv_stringtable "string"
+bool AsmParser::parseDirectiveCVString() {
+ std::string Data;
+ if (checkForValidSection() || parseEscapedString(Data))
+ return addErrorSuffix(" in '.cv_string' directive");
+
+ // Put the string in the table and emit the offset.
+ std::pair<StringRef, unsigned> Insertion =
+ getCVContext().addToStringTable(Data);
+ getStreamer().EmitIntValue(Insertion.second, 4);
+ return false;
+}
+
/// parseDirectiveCVStringTable
/// ::= .cv_stringtable
bool AsmParser::parseDirectiveCVStringTable() {
@@ -3895,7 +3927,12 @@ bool AsmParser::parseDirectiveCFIStartProc() {
return addErrorSuffix(" in '.cfi_startproc' directive");
}
- getStreamer().EmitCFIStartProc(!Simple.empty());
+ // TODO(kristina): Deal with a corner case of incorrect diagnostic context
+ // being produced if this directive is emitted as part of preprocessor macro
+ // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
+ // Tools like llvm-mc on the other hand are not affected by it, and report
+ // correct context information.
+ getStreamer().EmitCFIStartProc(!Simple.empty(), Lexer.getLoc());
return false;
}
@@ -4163,10 +4200,7 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
bool AsmParser::parseDirectiveAltmacro(StringRef Directive) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '" + Directive + "' directive");
- if (Directive == ".altmacro")
- getLexer().SetAltMacroMode(true);
- else
- getLexer().SetAltMacroMode(false);
+ AltMacroMode = (Directive == ".altmacro");
return false;
}
@@ -5238,6 +5272,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
+ DirectiveKindMap[".cv_string"] = DK_CV_STRING;
DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
@@ -5265,6 +5300,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
+ DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
DirectiveKindMap[".macros_on"] = DK_MACROS_ON;
DirectiveKindMap[".macros_off"] = DK_MACROS_OFF;
DirectiveKindMap[".macro"] = DK_MACRO;
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index e6fc1fac81ba..cd99112292a9 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -459,7 +459,12 @@ public:
bool parseBuildVersion(StringRef Directive, SMLoc Loc);
bool parseVersionMin(StringRef Directive, SMLoc Loc, MCVersionMinType Type);
+ bool parseMajorMinorVersionComponent(unsigned *Major, unsigned *Minor,
+ const char *VersionName);
+ bool parseOptionalTrailingVersionComponent(unsigned *Component,
+ const char *ComponentName);
bool parseVersion(unsigned *Major, unsigned *Minor, unsigned *Update);
+ bool parseSDKVersion(VersionTuple &SDKVersion);
void checkVersion(StringRef Directive, StringRef Arg, SMLoc Loc,
Triple::OSType ExpectedOS);
};
@@ -1000,43 +1005,89 @@ bool DarwinAsmParser::parseDirectiveDataRegionEnd(StringRef, SMLoc) {
return false;
}
-/// parseVersion ::= major, minor [, update]
-bool DarwinAsmParser::parseVersion(unsigned *Major, unsigned *Minor,
- unsigned *Update) {
+static bool isSDKVersionToken(const AsmToken &Tok) {
+ return Tok.is(AsmToken::Identifier) && Tok.getIdentifier() == "sdk_version";
+}
+
+/// parseMajorMinorVersionComponent ::= major, minor
+bool DarwinAsmParser::parseMajorMinorVersionComponent(unsigned *Major,
+ unsigned *Minor,
+ const char *VersionName) {
// Get the major version number.
if (getLexer().isNot(AsmToken::Integer))
- return TokError("invalid OS major version number, integer expected");
+ return TokError(Twine("invalid ") + VersionName +
+ " major version number, integer expected");
int64_t MajorVal = getLexer().getTok().getIntVal();
if (MajorVal > 65535 || MajorVal <= 0)
- return TokError("invalid OS major version number");
+ return TokError(Twine("invalid ") + VersionName + " major version number");
*Major = (unsigned)MajorVal;
Lex();
if (getLexer().isNot(AsmToken::Comma))
- return TokError("OS minor version number required, comma expected");
+ return TokError(Twine(VersionName) +
+ " minor version number required, comma expected");
Lex();
// Get the minor version number.
if (getLexer().isNot(AsmToken::Integer))
- return TokError("invalid OS minor version number, integer expected");
+ return TokError(Twine("invalid ") + VersionName +
+ " minor version number, integer expected");
int64_t MinorVal = getLexer().getTok().getIntVal();
if (MinorVal > 255 || MinorVal < 0)
- return TokError("invalid OS minor version number");
+ return TokError(Twine("invalid ") + VersionName + " minor version number");
*Minor = MinorVal;
Lex();
+ return false;
+}
+
+/// parseOptionalTrailingVersionComponent ::= , version_number
+bool DarwinAsmParser::parseOptionalTrailingVersionComponent(
+ unsigned *Component, const char *ComponentName) {
+ assert(getLexer().is(AsmToken::Comma) && "comma expected");
+ Lex();
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError(Twine("invalid ") + ComponentName +
+ " version number, integer expected");
+ int64_t Val = getLexer().getTok().getIntVal();
+ if (Val > 255 || Val < 0)
+ return TokError(Twine("invalid ") + ComponentName + " version number");
+ *Component = Val;
+ Lex();
+ return false;
+}
+
+/// parseVersion ::= parseMajorMinorVersionComponent
+/// parseOptionalTrailingVersionComponent
+bool DarwinAsmParser::parseVersion(unsigned *Major, unsigned *Minor,
+ unsigned *Update) {
+ if (parseMajorMinorVersionComponent(Major, Minor, "OS"))
+ return true;
// Get the update level, if specified
*Update = 0;
- if (getLexer().is(AsmToken::EndOfStatement))
+ if (getLexer().is(AsmToken::EndOfStatement) ||
+ isSDKVersionToken(getLexer().getTok()))
return false;
if (getLexer().isNot(AsmToken::Comma))
return TokError("invalid OS update specifier, comma expected");
+ if (parseOptionalTrailingVersionComponent(Update, "OS update"))
+ return true;
+ return false;
+}
+
+bool DarwinAsmParser::parseSDKVersion(VersionTuple &SDKVersion) {
+ assert(isSDKVersionToken(getLexer().getTok()) && "expected sdk_version");
Lex();
- if (getLexer().isNot(AsmToken::Integer))
- return TokError("invalid OS update version number, integer expected");
- int64_t UpdateVal = getLexer().getTok().getIntVal();
- if (UpdateVal > 255 || UpdateVal < 0)
- return TokError("invalid OS update version number");
- *Update = UpdateVal;
- Lex();
+ unsigned Major, Minor;
+ if (parseMajorMinorVersionComponent(&Major, &Minor, "SDK"))
+ return true;
+ SDKVersion = VersionTuple(Major, Minor);
+
+ // Get the subminor version, if specified.
+ if (getLexer().is(AsmToken::Comma)) {
+ unsigned Subminor;
+ if (parseOptionalTrailingVersionComponent(&Subminor, "SDK subminor"))
+ return true;
+ SDKVersion = VersionTuple(Major, Minor, Subminor);
+ }
return false;
}
@@ -1066,10 +1117,10 @@ static Triple::OSType getOSTypeFromMCVM(MCVersionMinType Type) {
}
/// parseVersionMin
-/// ::= .ios_version_min parseVersion
-/// | .macosx_version_min parseVersion
-/// | .tvos_version_min parseVersion
-/// | .watchos_version_min parseVersion
+/// ::= .ios_version_min parseVersion parseSDKVersion
+/// | .macosx_version_min parseVersion parseSDKVersion
+/// | .tvos_version_min parseVersion parseSDKVersion
+/// | .watchos_version_min parseVersion parseSDKVersion
bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc,
MCVersionMinType Type) {
unsigned Major;
@@ -1078,13 +1129,16 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc,
if (parseVersion(&Major, &Minor, &Update))
return true;
+ VersionTuple SDKVersion;
+ if (isSDKVersionToken(getLexer().getTok()) && parseSDKVersion(SDKVersion))
+ return true;
+
if (parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(Twine(" in '") + Directive + "' directive");
Triple::OSType ExpectedOS = getOSTypeFromMCVM(Type);
checkVersion(Directive, StringRef(), Loc, ExpectedOS);
-
- getStreamer().EmitVersionMin(Type, Major, Minor, Update);
+ getStreamer().EmitVersionMin(Type, Major, Minor, Update, SDKVersion);
return false;
}
@@ -1094,13 +1148,16 @@ static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
case MachO::PLATFORM_IOS: return Triple::IOS;
case MachO::PLATFORM_TVOS: return Triple::TvOS;
case MachO::PLATFORM_WATCHOS: return Triple::WatchOS;
- case MachO::PLATFORM_BRIDGEOS: /* silence warning */break;
+ case MachO::PLATFORM_BRIDGEOS: /* silence warning */ break;
+ case MachO::PLATFORM_IOSSIMULATOR: /* silence warning */ break;
+ case MachO::PLATFORM_TVOSSIMULATOR: /* silence warning */ break;
+ case MachO::PLATFORM_WATCHOSSIMULATOR: /* silence warning */ break;
}
llvm_unreachable("Invalid mach-o platform type");
}
/// parseBuildVersion
-/// ::= .build_version (macos|ios|tvos|watchos), parseVersion
+/// ::= .build_version (macos|ios|tvos|watchos), parseVersion parseSDKVersion
bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
StringRef PlatformName;
SMLoc PlatformLoc = getTok().getLoc();
@@ -1126,14 +1183,17 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
if (parseVersion(&Major, &Minor, &Update))
return true;
+ VersionTuple SDKVersion;
+ if (isSDKVersionToken(getLexer().getTok()) && parseSDKVersion(SDKVersion))
+ return true;
+
if (parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(" in '.build_version' directive");
Triple::OSType ExpectedOS
= getOSTypeFromPlatform((MachO::PlatformType)Platform);
checkVersion(Directive, PlatformName, Loc, ExpectedOS);
-
- getStreamer().EmitBuildVersion(Platform, Major, Minor, Update);
+ getStreamer().EmitBuildVersion(Platform, Major, Minor, Update, SDKVersion);
return false;
}
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 3d9590e1f9f5..d568f7a71eeb 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -321,6 +321,9 @@ static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
case 'y':
flags |= ELF::SHF_ARM_PURECODE;
break;
+ case 's':
+ flags |= ELF::SHF_HEX_GPREL;
+ break;
case 'G':
flags |= ELF::SHF_GROUP;
break;
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
index 75cd318e4fa3..10960fc69633 100644
--- a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -15,7 +15,7 @@
using namespace llvm;
-MCAsmLexer::MCAsmLexer() : AltMacroMode(false) {
+MCAsmLexer::MCAsmLexer() {
CurTok.emplace_back(AsmToken::Space, StringRef());
}
@@ -85,6 +85,7 @@ void AsmToken::dump(raw_ostream &OS) const {
case AsmToken::LessGreater: OS << "LessGreater"; break;
case AsmToken::LessLess: OS << "LessLess"; break;
case AsmToken::Minus: OS << "Minus"; break;
+ case AsmToken::MinusGreater: OS << "MinusGreater"; break;
case AsmToken::Percent: OS << "Percent"; break;
case AsmToken::Pipe: OS << "Pipe"; break;
case AsmToken::PipePipe: OS << "PipePipe"; break;
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
index d439734e76fc..efedcdc5a314 100644
--- a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
-MCAsmParser::MCAsmParser() : ShowParsedOperands(0) {}
+MCAsmParser::MCAsmParser() {}
MCAsmParser::~MCAsmParser() = default;
diff --git a/contrib/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/WasmAsmParser.cpp
new file mode 100644
index 000000000000..93bb0cb3c72e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -0,0 +1,145 @@
+//===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// --
+//
+// Note, this is for wasm, the binary format (analogous to ELF), not wasm,
+// the instruction set (analogous to x86), for which parsing code lives in
+// WebAssemblyAsmParser.
+//
+// This file contains processing for generic directives implemented using
+// MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in
+// WebAssemblyAsmParser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/Support/MachineValueType.h"
+
+using namespace llvm;
+
+namespace {
+
+class WasmAsmParser : public MCAsmParserExtension {
+ MCAsmParser *Parser;
+ MCAsmLexer *Lexer;
+
+ template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+ void addDirectiveHandler(StringRef Directive) {
+ MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
+ this, HandleDirective<WasmAsmParser, HandlerMethod>);
+
+ getParser().addDirectiveHandler(Directive, Handler);
+ }
+
+public:
+ WasmAsmParser() : Parser(nullptr), Lexer(nullptr) {
+ BracketExpressionsSupported = true;
+ }
+
+ void Initialize(MCAsmParser &P) override {
+ Parser = &P;
+ Lexer = &Parser->getLexer();
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(*Parser);
+
+ addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text");
+ addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
+ addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
+ addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
+ }
+
+ bool Error(const StringRef &msg, const AsmToken &tok) {
+ return Parser->Error(tok.getLoc(), msg + tok.getString());
+ }
+
+ bool IsNext(AsmToken::TokenKind Kind) {
+ auto ok = Lexer->is(Kind);
+ if (ok) Lex();
+ return ok;
+ }
+
+ bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
+ if (!IsNext(Kind))
+ return Error(std::string("Expected ") + KindName + ", instead got: ",
+ Lexer->getTok());
+ return false;
+ }
+
+ bool parseSectionDirectiveText(StringRef, SMLoc) {
+ // FIXME: .text currently no-op.
+ return false;
+ }
+
+ bool parseSectionDirective(StringRef, SMLoc) {
+ // FIXME: .section currently no-op.
+ while (Lexer->isNot(AsmToken::EndOfStatement)) Parser->Lex();
+ return false;
+ }
+
+ // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize
+ // so maybe could be shared somehow.
+ bool parseDirectiveSize(StringRef, SMLoc) {
+ StringRef Name;
+ if (Parser->parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ auto Sym = getContext().getOrCreateSymbol(Name);
+ if (Lexer->isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ const MCExpr *Expr;
+ if (Parser->parseExpression(Expr))
+ return true;
+ if (Lexer->isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+ Lex();
+ // MCWasmStreamer implements this.
+ getStreamer().emitELFSize(Sym, Expr);
+ return false;
+ }
+
+ bool parseDirectiveType(StringRef, SMLoc) {
+ // This could be the start of a function, check if followed by
+ // "label,@function"
+ if (!Lexer->is(AsmToken::Identifier))
+ return Error("Expected label after .type directive, got: ",
+ Lexer->getTok());
+ auto WasmSym = cast<MCSymbolWasm>(
+ getStreamer().getContext().getOrCreateSymbol(
+ Lexer->getTok().getString()));
+ Lex();
+ if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) &&
+ Lexer->is(AsmToken::Identifier)))
+ return Error("Expected label,@type declaration, got: ", Lexer->getTok());
+ auto TypeName = Lexer->getTok().getString();
+ if (TypeName == "function")
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ else if (TypeName == "global")
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ else
+ return Error("Unknown WASM symbol type: ", Lexer->getTok());
+ Lex();
+ return Expect(AsmToken::EndOfStatement, "EOL");
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+MCAsmParserExtension *createWasmAsmParser() {
+ return new WasmAsmParser;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/MC/MCRegisterInfo.cpp b/contrib/llvm/lib/MC/MCRegisterInfo.cpp
index 8e47963b4418..5abae5379867 100644
--- a/contrib/llvm/lib/MC/MCRegisterInfo.cpp
+++ b/contrib/llvm/lib/MC/MCRegisterInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
@@ -127,6 +128,8 @@ int MCRegisterInfo::getCodeViewRegNum(unsigned RegNum) const {
report_fatal_error("target does not implement codeview register mapping");
const DenseMap<unsigned, int>::const_iterator I = L2CVRegs.find(RegNum);
if (I == L2CVRegs.end())
- report_fatal_error("unknown codeview register");
+ report_fatal_error("unknown codeview register " + (RegNum < getNumRegs()
+ ? getName(RegNum)
+ : Twine(RegNum)));
return I->second;
}
diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp
index 97bc65387dd5..d4f11d10136a 100644
--- a/contrib/llvm/lib/MC/MCSection.cpp
+++ b/contrib/llvm/lib/MC/MCSection.cpp
@@ -23,7 +23,8 @@ using namespace llvm;
MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin)
: Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false),
- IsRegistered(false), DummyFragment(this), Variant(V), Kind(K) {}
+ HasData(false), IsRegistered(false), DummyFragment(this), Variant(V),
+ Kind(K) {}
MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
if (!End)
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
index 4d77d05cc505..7ee1694ebbf7 100644
--- a/contrib/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -116,6 +116,9 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
} else if (T.isARM() || T.isThumb()) {
if (Flags & ELF::SHF_ARM_PURECODE)
OS << 'y';
+ } else if (Arch == Triple::hexagon) {
+ if (Flags & ELF::SHF_HEX_GPREL)
+ OS << 's';
}
OS << '"';
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index 21a9c3604cfc..6a8471bc61b4 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -72,6 +72,18 @@ void MCTargetStreamer::emitValue(const MCExpr *Value) {
Streamer.EmitRawText(OS.str());
}
+void MCTargetStreamer::emitRawBytes(StringRef Data) {
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+ const char *Directive = MAI->getData8bitsDirective();
+ for (const unsigned char C : Data.bytes()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
+ OS << Directive << (unsigned)C;
+ Streamer.EmitRawText(OS.str());
+ }
+}
+
void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
MCStreamer::MCStreamer(MCContext &Ctx)
@@ -209,6 +221,13 @@ void MCStreamer::emitDwarfFile0Directive(StringRef Directory,
Source);
}
+void MCStreamer::EmitCFIBKeyFrame() {
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+ if (!CurFrame)
+ return;
+ CurFrame->IsBKeyFrame = true;
+}
+
void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
unsigned Column, unsigned Flags,
unsigned Isa,
@@ -270,22 +289,28 @@ bool MCStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId,
void MCStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
unsigned Line, unsigned Column,
bool PrologueEnd, bool IsStmt,
- StringRef FileName, SMLoc Loc) {
+ StringRef FileName, SMLoc Loc) {}
+
+bool MCStreamer::checkCVLocSection(unsigned FuncId, unsigned FileNo,
+ SMLoc Loc) {
CodeViewContext &CVC = getContext().getCVContext();
- MCCVFunctionInfo *FI = CVC.getCVFunctionInfo(FunctionId);
- if (!FI)
- return getContext().reportError(
+ MCCVFunctionInfo *FI = CVC.getCVFunctionInfo(FuncId);
+ if (!FI) {
+ getContext().reportError(
Loc, "function id not introduced by .cv_func_id or .cv_inline_site_id");
+ return false;
+ }
// Track the section
if (FI->Section == nullptr)
FI->Section = getCurrentSectionOnly();
- else if (FI->Section != getCurrentSectionOnly())
- return getContext().reportError(
+ else if (FI->Section != getCurrentSectionOnly()) {
+ getContext().reportError(
Loc,
"all .cv_loc directives for a function must be in the same section");
-
- CVC.setCurrentCVLoc(FunctionId, FileNo, Line, Column, PrologueEnd, IsStmt);
+ return false;
+ }
+ return true;
}
void MCStreamer::EmitCVLinetableDirective(unsigned FunctionId,
@@ -341,10 +366,10 @@ void MCStreamer::EmitCFISections(bool EH, bool Debug) {
assert(EH || Debug);
}
-void MCStreamer::EmitCFIStartProc(bool IsSimple) {
+void MCStreamer::EmitCFIStartProc(bool IsSimple, SMLoc Loc) {
if (hasUnfinishedDwarfFrameInfo())
- getContext().reportError(
- SMLoc(), "starting new .cfi frame before finishing the previous one");
+ return getContext().reportError(
+ Loc, "starting new .cfi frame before finishing the previous one");
MCDwarfFrameInfo Frame;
Frame.IsSimple = IsSimple;
@@ -559,6 +584,15 @@ void MCStreamer::EmitCFIWindowSave() {
CurFrame->Instructions.push_back(Instruction);
}
+void MCStreamer::EmitCFINegateRAState() {
+ MCSymbol *Label = EmitCFILabel();
+ MCCFIInstruction Instruction = MCCFIInstruction::createNegateRAState(Label);
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+ if (!CurFrame)
+ return;
+ CurFrame->Instructions.push_back(Instruction);
+}
+
void MCStreamer::EmitCFIReturnColumn(int64_t Register) {
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
@@ -609,6 +643,17 @@ void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) {
CurFrame->End = Label;
}
+void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
+ WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
+ if (!CurFrame)
+ return;
+ if (CurFrame->ChainedParent)
+ getContext().reportError(Loc, "Not all chained regions terminated!");
+
+ MCSymbol *Label = EmitCFILabel();
+ CurFrame->FuncletOrFuncEnd = Label;
+}
+
void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
@@ -820,13 +865,11 @@ void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) {
CurFrame->PrologEnd = Label;
}
-void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
-}
+void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {}
void MCStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {}
-void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
-}
+void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {}
void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {}
@@ -836,9 +879,12 @@ void MCStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {}
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
void MCStreamer::EmitRawTextImpl(StringRef String) {
- errs() << "EmitRawText called on an MCStreamer that doesn't support it, "
- " something must not be fully mc'ized\n";
- abort();
+ // This is not llvm_unreachable for the sake of out of tree backend
+ // developers who may not have assembly streamers and should serve as a
+ // reminder to not accidentally call EmitRawText in the absence of such.
+ report_fatal_error("EmitRawText called on an MCStreamer that doesn't support "
+ "it (target backend is likely missing an AsmStreamer "
+ "implementation)");
}
void MCStreamer::EmitRawText(const Twine &T) {
@@ -872,8 +918,9 @@ void MCStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
TS->emitAssignment(Symbol, Value);
}
-void MCTargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
- const MCInst &Inst, const MCSubtargetInfo &STI) {
+void MCTargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter,
+ raw_ostream &OS, const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
InstPrinter.printInst(&Inst, OS, "", STI);
}
@@ -1016,7 +1063,8 @@ MCSymbol *MCStreamer::endSection(MCSection *Section) {
return Sym;
}
-void MCStreamer::EmitVersionForTarget(const Triple &Target) {
+void MCStreamer::EmitVersionForTarget(const Triple &Target,
+ const VersionTuple &SDKVersion) {
if (!Target.isOSBinFormatMachO() || !Target.isOSDarwin())
return;
// Do we even know the version?
@@ -1042,5 +1090,5 @@ void MCStreamer::EmitVersionForTarget(const Triple &Target) {
Target.getiOSVersion(Major, Minor, Update);
}
if (Major != 0)
- EmitVersionMin(VersionType, Major, Minor, Update);
+ EmitVersionMin(VersionType, Major, Minor, Update, SDKVersion);
}
diff --git a/contrib/llvm/lib/MC/MCWasmStreamer.cpp b/contrib/llvm/lib/MC/MCWasmStreamer.cpp
index 0e5932214047..d2a152058b90 100644
--- a/contrib/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCWasmStreamer.cpp
@@ -61,7 +61,7 @@ void MCWasmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
void MCWasmStreamer::ChangeSection(MCSection *Section,
const MCExpr *Subsection) {
MCAssembler &Asm = getAssembler();
- auto *SectionWasm = static_cast<const MCSectionWasm *>(Section);
+ auto *SectionWasm = cast<MCSectionWasm>(Section);
const MCSymbol *Grp = SectionWasm->getGroup();
if (Grp)
Asm.registerSymbol(*Grp);
@@ -119,7 +119,6 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
break;
case MCSA_ELF_TypeObject:
- Symbol->setType(wasm::WASM_SYMBOL_TYPE_DATA);
break;
default:
diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp
index 1407f25e6f2a..0724b109e1a1 100644
--- a/contrib/llvm/lib/MC/MCWin64EH.cpp
+++ b/contrib/llvm/lib/MC/MCWin64EH.cpp
@@ -11,6 +11,9 @@
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Win64EH.h"
@@ -23,6 +26,8 @@ static uint8_t CountOfUnwindCodes(std::vector<WinEH::Instruction> &Insns) {
uint8_t Count = 0;
for (const auto &I : Insns) {
switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
+ default:
+ llvm_unreachable("Unsupported unwind code");
case Win64EH::UOP_PushNonVol:
case Win64EH::UOP_AllocSmall:
case Win64EH::UOP_SetFPReg:
@@ -60,6 +65,8 @@ static void EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
uint16_t w;
b2 = (inst.Operation & 0x0F);
switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
+ default:
+ llvm_unreachable("Unsupported unwind code");
case Win64EH::UOP_PushNonVol:
EmitAbsDifference(streamer, inst.Label, begin);
b2 |= (inst.Register & 0x0F) << 4;
@@ -242,3 +249,348 @@ void llvm::Win64EH::UnwindEmitter::EmitUnwindInfo(
::EmitUnwindInfo(Streamer, info);
}
+static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
+ const MCSymbol *RHS) {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Diff =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context),
+ MCSymbolRefExpr::create(RHS, Context), Context);
+ MCObjectStreamer *OS = (MCObjectStreamer *)(&Streamer);
+ int64_t value;
+ Diff->evaluateAsAbsolute(value, OS->getAssembler());
+ return value;
+}
+
+static uint32_t
+ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
+ uint32_t Count = 0;
+ for (const auto &I : Insns) {
+ switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
+ default:
+ llvm_unreachable("Unsupported ARM64 unwind code");
+ case Win64EH::UOP_AllocSmall:
+ Count += 1;
+ break;
+ case Win64EH::UOP_AllocMedium:
+ Count += 2;
+ break;
+ case Win64EH::UOP_AllocLarge:
+ Count += 4;
+ break;
+ case Win64EH::UOP_SaveFPLRX:
+ Count += 1;
+ break;
+ case Win64EH::UOP_SaveFPLR:
+ Count += 1;
+ break;
+ case Win64EH::UOP_SaveReg:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveRegP:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveRegPX:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveRegX:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveFReg:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveFRegP:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveFRegX:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveFRegPX:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SetFP:
+ Count += 1;
+ break;
+ case Win64EH::UOP_AddFP:
+ Count += 2;
+ break;
+ case Win64EH::UOP_Nop:
+ Count += 1;
+ break;
+ case Win64EH::UOP_End:
+ Count += 1;
+ break;
+ }
+ }
+ return Count;
+}
+
+// Unwind opcode encodings and restrictions are documented at
+// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
+ WinEH::Instruction &inst) {
+ uint8_t b, reg;
+ switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
+ default:
+ llvm_unreachable("Unsupported ARM64 unwind code");
+ case Win64EH::UOP_AllocSmall:
+ b = (inst.Offset >> 4) & 0x1F;
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_AllocMedium: {
+ uint16_t hw = (inst.Offset >> 4) & 0x7FF;
+ b = 0xC0;
+ b |= (hw >> 8);
+ streamer.EmitIntValue(b, 1);
+ b = hw & 0xFF;
+ streamer.EmitIntValue(b, 1);
+ break;
+ }
+ case Win64EH::UOP_AllocLarge: {
+ uint32_t w;
+ b = 0xE0;
+ streamer.EmitIntValue(b, 1);
+ w = inst.Offset >> 4;
+ b = (w & 0x00FF0000) >> 16;
+ streamer.EmitIntValue(b, 1);
+ b = (w & 0x0000FF00) >> 8;
+ streamer.EmitIntValue(b, 1);
+ b = w & 0x000000FF;
+ streamer.EmitIntValue(b, 1);
+ break;
+ }
+ case Win64EH::UOP_SetFP:
+ b = 0xE1;
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_AddFP:
+ b = 0xE2;
+ streamer.EmitIntValue(b, 1);
+ b = (inst.Offset >> 3);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_Nop:
+ b = 0xE3;
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveFPLRX:
+ b = 0x80;
+ b |= ((inst.Offset - 1) >> 3) & 0x3F;
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveFPLR:
+ b = 0x40;
+ b |= (inst.Offset >> 3) & 0x3F;
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveReg:
+ assert(inst.Register >= 19 && "Saved reg must be >= 19");
+ reg = inst.Register - 19;
+ b = 0xD0 | ((reg & 0xC) >> 2);
+ streamer.EmitIntValue(b, 1);
+ b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveRegX:
+ assert(inst.Register >= 19 && "Saved reg must be >= 19");
+ reg = inst.Register - 19;
+ b = 0xD4 | ((reg & 0x8) >> 3);
+ streamer.EmitIntValue(b, 1);
+ b = ((reg & 0x7) << 5) | ((inst.Offset >> 3) - 1);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveRegP:
+ assert(inst.Register >= 19 && "Saved registers must be >= 19");
+ reg = inst.Register - 19;
+ b = 0xC8 | ((reg & 0xC) >> 2);
+ streamer.EmitIntValue(b, 1);
+ b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveRegPX:
+ assert(inst.Register >= 19 && "Saved registers must be >= 19");
+ reg = inst.Register - 19;
+ b = 0xCC | ((reg & 0xC) >> 2);
+ streamer.EmitIntValue(b, 1);
+ b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveFReg:
+ assert(inst.Register >= 8 && "Saved dreg must be >= 8");
+ reg = inst.Register - 8;
+ b = 0xDC | ((reg & 0x4) >> 2);
+ streamer.EmitIntValue(b, 1);
+ b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveFRegX:
+ assert(inst.Register >= 8 && "Saved dreg must be >= 8");
+ reg = inst.Register - 8;
+ b = 0xDE;
+ streamer.EmitIntValue(b, 1);
+ b = ((reg & 0x7) << 5) | ((inst.Offset >> 3) - 1);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveFRegP:
+ assert(inst.Register >= 8 && "Saved dregs must be >= 8");
+ reg = inst.Register - 8;
+ b = 0xD8 | ((reg & 0x4) >> 2);
+ streamer.EmitIntValue(b, 1);
+ b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_SaveFRegPX:
+ assert(inst.Register >= 8 && "Saved dregs must be >= 8");
+ reg = inst.Register - 8;
+ b = 0xDA | ((reg & 0x4) >> 2);
+ streamer.EmitIntValue(b, 1);
+ b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1);
+ streamer.EmitIntValue(b, 1);
+ break;
+ case Win64EH::UOP_End:
+ b = 0xE4;
+ streamer.EmitIntValue(b, 1);
+ break;
+ }
+}
+
+// Populate the .xdata section. The format of .xdata on ARM64 is documented at
+// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
+ // If this UNWIND_INFO already has a symbol, it's already been emitted.
+ if (info->Symbol)
+ return;
+
+ MCContext &context = streamer.getContext();
+ MCSymbol *Label = context.createTempSymbol();
+
+ streamer.EmitValueToAlignment(4);
+ streamer.EmitLabel(Label);
+ info->Symbol = Label;
+
+ uint32_t FuncLength = 0x0;
+ if (info->FuncletOrFuncEnd)
+ FuncLength = (uint32_t)GetAbsDifference(streamer, info->FuncletOrFuncEnd,
+ info->Begin);
+ FuncLength /= 4;
+ uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
+ uint32_t TotalCodeBytes = PrologCodeBytes;
+
+ // Process epilogs.
+ MapVector<MCSymbol *, uint32_t> EpilogInfo;
+ for (auto &I : info->EpilogMap) {
+ MCSymbol *EpilogStart = I.first;
+ auto &EpilogInstrs = I.second;
+ uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs);
+ EpilogInfo[EpilogStart] = TotalCodeBytes;
+ TotalCodeBytes += CodeBytes;
+ }
+
+ // Code Words, Epilog count, E, X, Vers, Function Length
+ uint32_t row1 = 0x0;
+ uint32_t CodeWords = TotalCodeBytes / 4;
+ uint32_t CodeWordsMod = TotalCodeBytes % 4;
+ if (CodeWordsMod)
+ CodeWords++;
+ uint32_t EpilogCount = info->EpilogMap.size();
+ bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124;
+ if (!ExtensionWord) {
+ row1 |= (EpilogCount & 0x1F) << 22;
+ row1 |= (CodeWords & 0x1F) << 27;
+ }
+ // E is always 0 right now, TODO: packed epilog setup
+ if (info->HandlesExceptions) // X
+ row1 |= 1 << 20;
+ row1 |= FuncLength & 0x3FFFF;
+ streamer.EmitIntValue(row1, 4);
+
+ // Extended Code Words, Extended Epilog Count
+ if (ExtensionWord) {
+ // FIXME: We should be able to split unwind info into multiple sections.
+ // FIXME: We should share epilog codes across epilogs, where possible,
+ // which would make this issue show up less frequently.
+ if (CodeWords > 0xFF || EpilogCount > 0xFFFF)
+ report_fatal_error("SEH unwind data splitting not yet implemented");
+ uint32_t row2 = 0x0;
+ row2 |= (CodeWords & 0xFF) << 16;
+ row2 |= (EpilogCount & 0xFFFF);
+ streamer.EmitIntValue(row2, 4);
+ }
+
+ // Epilog Start Index, Epilog Start Offset
+ for (auto &I : EpilogInfo) {
+ MCSymbol *EpilogStart = I.first;
+ uint32_t EpilogIndex = I.second;
+ uint32_t EpilogOffset =
+ (uint32_t)GetAbsDifference(streamer, EpilogStart, info->Begin);
+ if (EpilogOffset)
+ EpilogOffset /= 4;
+ uint32_t row3 = EpilogOffset;
+ row3 |= (EpilogIndex & 0x3FF) << 22;
+ streamer.EmitIntValue(row3, 4);
+ }
+
+ // Emit prolog unwind instructions (in reverse order).
+ uint8_t numInst = info->Instructions.size();
+ for (uint8_t c = 0; c < numInst; ++c) {
+ WinEH::Instruction inst = info->Instructions.back();
+ info->Instructions.pop_back();
+ ARM64EmitUnwindCode(streamer, info->Begin, inst);
+ }
+
+ // Emit epilog unwind instructions
+ for (auto &I : info->EpilogMap) {
+ auto &EpilogInstrs = I.second;
+ for (uint32_t i = 0; i < EpilogInstrs.size(); i++) {
+ WinEH::Instruction inst = EpilogInstrs[i];
+ ARM64EmitUnwindCode(streamer, info->Begin, inst);
+ }
+ }
+
+ int32_t BytesMod = CodeWords * 4 - TotalCodeBytes;
+ assert(BytesMod >= 0);
+ for (int i = 0; i < BytesMod; i++)
+ streamer.EmitIntValue(0xE3, 1);
+
+ if (info->HandlesExceptions)
+ streamer.EmitValue(
+ MCSymbolRefExpr::create(info->ExceptionHandler,
+ MCSymbolRefExpr::VK_COFF_IMGREL32, context),
+ 4);
+}
+
+static void ARM64EmitRuntimeFunction(MCStreamer &streamer,
+ const WinEH::FrameInfo *info) {
+ MCContext &context = streamer.getContext();
+
+ streamer.EmitValueToAlignment(4);
+ EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
+ streamer.EmitValue(MCSymbolRefExpr::create(info->Symbol,
+ MCSymbolRefExpr::VK_COFF_IMGREL32,
+ context),
+ 4);
+}
+
+void llvm::Win64EH::ARM64UnwindEmitter::Emit(MCStreamer &Streamer) const {
+ // Emit the unwind info structs first.
+ for (const auto &CFI : Streamer.getWinFrameInfos()) {
+ MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection);
+ Streamer.SwitchSection(XData);
+ ARM64EmitUnwindInfo(Streamer, CFI.get());
+ }
+
+ // Now emit RUNTIME_FUNCTION entries.
+ for (const auto &CFI : Streamer.getWinFrameInfos()) {
+ MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection);
+ Streamer.SwitchSection(PData);
+ ARM64EmitRuntimeFunction(Streamer, CFI.get());
+ }
+}
+
+void llvm::Win64EH::ARM64UnwindEmitter::EmitUnwindInfo(
+ MCStreamer &Streamer, WinEH::FrameInfo *info) const {
+ // Switch sections (the static function above is meant to be called from
+ // here and from Emit().
+ MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection);
+ Streamer.SwitchSection(XData);
+ ARM64EmitUnwindInfo(Streamer, info);
+}
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
index 2664528909af..2fa65658ccfa 100644
--- a/contrib/llvm/lib/MC/MachObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -597,8 +597,8 @@ void MachObjectWriter::computeSymbolTable(
}
// External and undefined symbols are required to be in lexicographic order.
- llvm::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
- llvm::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+ llvm::sort(ExternalSymbolData);
+ llvm::sort(UndefinedSymbolData);
// Set the symbol indices.
Index = 0;
@@ -846,18 +846,27 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
// Write out the deployment target information, if it's available.
if (VersionInfo.Major != 0) {
- assert(VersionInfo.Update < 256 && "unencodable update target version");
- assert(VersionInfo.Minor < 256 && "unencodable minor target version");
- assert(VersionInfo.Major < 65536 && "unencodable major target version");
- uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) |
- (VersionInfo.Major << 16);
+ auto EncodeVersion = [](VersionTuple V) -> uint32_t {
+ assert(!V.empty() && "empty version");
+ unsigned Update = V.getSubminor() ? *V.getSubminor() : 0;
+ unsigned Minor = V.getMinor() ? *V.getMinor() : 0;
+ assert(Update < 256 && "unencodable update target version");
+ assert(Minor < 256 && "unencodable minor target version");
+ assert(V.getMajor() < 65536 && "unencodable major target version");
+ return Update | (Minor << 8) | (V.getMajor() << 16);
+ };
+ uint32_t EncodedVersion = EncodeVersion(
+ VersionTuple(VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
+ uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
+ ? EncodeVersion(VersionInfo.SDKVersion)
+ : 0;
if (VersionInfo.EmitBuildVersion) {
// FIXME: Currently empty tools. Add clang version in the future.
W.write<uint32_t>(MachO::LC_BUILD_VERSION);
W.write<uint32_t>(sizeof(MachO::build_version_command));
W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
W.write<uint32_t>(EncodedVersion);
- W.write<uint32_t>(0); // SDK version.
+ W.write<uint32_t>(SDKVersion);
W.write<uint32_t>(0); // Empty tools list.
} else {
MachO::LoadCommandType LCType
@@ -865,7 +874,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
W.write<uint32_t>(LCType);
W.write<uint32_t>(sizeof(MachO::version_min_command));
W.write<uint32_t>(EncodedVersion);
- W.write<uint32_t>(0); // reserved.
+ W.write<uint32_t>(SDKVersion);
}
}
diff --git a/contrib/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm/lib/MC/WasmObjectWriter.cpp
index 5a979d36e81b..0cca3757be90 100644
--- a/contrib/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WasmObjectWriter.cpp
@@ -56,9 +56,10 @@ struct SectionBookkeeping {
uint32_t Index;
};
-// The signature of a wasm function, in a struct capable of being used as a
-// DenseMap key.
-struct WasmFunctionType {
+// The signature of a wasm function or event, in a struct capable of being used
+// as a DenseMap key.
+// TODO: Consider using wasm::WasmSignature directly instead.
+struct WasmSignature {
// Support empty and tombstone instances, needed by DenseMap.
enum { Plain, Empty, Tombstone } State;
@@ -68,36 +69,35 @@ struct WasmFunctionType {
// The parameter types of the function.
SmallVector<wasm::ValType, 4> Params;
- WasmFunctionType() : State(Plain) {}
+ WasmSignature() : State(Plain) {}
- bool operator==(const WasmFunctionType &Other) const {
+ bool operator==(const WasmSignature &Other) const {
return State == Other.State && Returns == Other.Returns &&
Params == Other.Params;
}
};
-// Traits for using WasmFunctionType in a DenseMap.
-struct WasmFunctionTypeDenseMapInfo {
- static WasmFunctionType getEmptyKey() {
- WasmFunctionType FuncTy;
- FuncTy.State = WasmFunctionType::Empty;
- return FuncTy;
+// Traits for using WasmSignature in a DenseMap.
+struct WasmSignatureDenseMapInfo {
+ static WasmSignature getEmptyKey() {
+ WasmSignature Sig;
+ Sig.State = WasmSignature::Empty;
+ return Sig;
}
- static WasmFunctionType getTombstoneKey() {
- WasmFunctionType FuncTy;
- FuncTy.State = WasmFunctionType::Tombstone;
- return FuncTy;
+ static WasmSignature getTombstoneKey() {
+ WasmSignature Sig;
+ Sig.State = WasmSignature::Tombstone;
+ return Sig;
}
- static unsigned getHashValue(const WasmFunctionType &FuncTy) {
- uintptr_t Value = FuncTy.State;
- for (wasm::ValType Ret : FuncTy.Returns)
- Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret));
- for (wasm::ValType Param : FuncTy.Params)
- Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param));
+ static unsigned getHashValue(const WasmSignature &Sig) {
+ uintptr_t Value = Sig.State;
+ for (wasm::ValType Ret : Sig.Returns)
+ Value += DenseMapInfo<uint32_t>::getHashValue(uint32_t(Ret));
+ for (wasm::ValType Param : Sig.Params)
+ Value += DenseMapInfo<uint32_t>::getHashValue(uint32_t(Param));
return Value;
}
- static bool isEqual(const WasmFunctionType &LHS,
- const WasmFunctionType &RHS) {
+ static bool isEqual(const WasmSignature &LHS, const WasmSignature &RHS) {
return LHS == RHS;
}
};
@@ -117,7 +117,7 @@ struct WasmDataSegment {
// A wasm function to be written into the function section.
struct WasmFunction {
- int32_t Type;
+ uint32_t SigIndex;
const MCSymbolWasm *Sym;
};
@@ -137,11 +137,11 @@ struct WasmComdatEntry {
// Information about a single relocation.
struct WasmRelocationEntry {
- uint64_t Offset; // Where is the relocation.
- const MCSymbolWasm *Symbol; // The symbol to relocate with.
- int64_t Addend; // A value to add to the symbol.
- unsigned Type; // The type of the relocation.
- const MCSectionWasm *FixupSection;// The section the relocation is targeting.
+ uint64_t Offset; // Where is the relocation.
+ const MCSymbolWasm *Symbol; // The symbol to relocate with.
+ int64_t Addend; // A value to add to the symbol.
+ unsigned Type; // The type of the relocation.
+ const MCSectionWasm *FixupSection; // The section the relocation is targeting.
WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol,
int64_t Addend, unsigned Type,
@@ -163,8 +163,8 @@ struct WasmRelocationEntry {
}
void print(raw_ostream &Out) const {
- Out << wasm::relocTypetoString(Type)
- << " Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend
+ Out << wasm::relocTypetoString(Type) << " Off=" << Offset
+ << ", Sym=" << *Symbol << ", Addend=" << Addend
<< ", FixupSection=" << FixupSection->getSectionName();
}
@@ -215,7 +215,8 @@ class WasmObjectWriter : public MCObjectWriter {
// Maps function symbols to the table element index space. Used
// for TABLE_INDEX relocation types (i.e. address taken functions).
DenseMap<const MCSymbolWasm *, uint32_t> TableIndices;
- // Maps function/global symbols to the function/global/section index space.
+ // Maps function/global symbols to the function/global/event/section index
+ // space.
DenseMap<const MCSymbolWasm *, uint32_t> WasmIndices;
// Maps data symbols to the Wasm segment and offset/size with the segment.
DenseMap<const MCSymbolWasm *, wasm::WasmDataReference> DataLocations;
@@ -230,13 +231,13 @@ class WasmObjectWriter : public MCObjectWriter {
// Map from section to defining function symbol.
DenseMap<const MCSection *, const MCSymbol *> SectionFunctions;
- DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo>
- FunctionTypeIndices;
- SmallVector<WasmFunctionType, 4> FunctionTypes;
+ DenseMap<WasmSignature, uint32_t, WasmSignatureDenseMapInfo> SignatureIndices;
+ SmallVector<WasmSignature, 4> Signatures;
SmallVector<WasmGlobal, 4> Globals;
SmallVector<WasmDataSegment, 4> DataSegments;
unsigned NumFunctionImports = 0;
unsigned NumGlobalImports = 0;
+ unsigned NumEventImports = 0;
uint32_t SectionCount = 0;
// TargetObjectWriter wrappers.
@@ -265,8 +266,8 @@ private:
TableIndices.clear();
DataLocations.clear();
CustomSectionsRelocations.clear();
- FunctionTypeIndices.clear();
- FunctionTypes.clear();
+ SignatureIndices.clear();
+ Signatures.clear();
Globals.clear();
DataSegments.clear();
SectionFunctions.clear();
@@ -291,11 +292,9 @@ private:
W.OS << Str;
}
- void writeValueType(wasm::ValType Ty) {
- W.OS << static_cast<char>(Ty);
- }
+ void writeValueType(wasm::ValType Ty) { W.OS << static_cast<char>(Ty); }
- void writeTypeSection(ArrayRef<WasmFunctionType> FunctionTypes);
+ void writeTypeSection(ArrayRef<WasmSignature> Signatures);
void writeImportSection(ArrayRef<wasm::WasmImport> Imports, uint32_t DataSize,
uint32_t NumElements);
void writeFunctionSection(ArrayRef<WasmFunction> Functions);
@@ -305,8 +304,9 @@ private:
void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
ArrayRef<WasmFunction> Functions);
void writeDataSection();
+ void writeEventSection(ArrayRef<wasm::WasmEventType> Events);
void writeRelocSection(uint32_t SectionIndex, StringRef Name,
- ArrayRef<WasmRelocationEntry> Relocations);
+ std::vector<WasmRelocationEntry> &Relocations);
void writeLinkingMetaDataSection(
ArrayRef<wasm::WasmSymbolInfo> SymbolInfos,
ArrayRef<std::pair<uint16_t, uint32_t>> InitFuncs,
@@ -323,7 +323,9 @@ private:
uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry);
uint32_t getFunctionType(const MCSymbolWasm &Symbol);
- uint32_t registerFunctionType(const MCSymbolWasm &Symbol);
+ uint32_t getEventType(const MCSymbolWasm &Symbol);
+ void registerFunctionType(const MCSymbolWasm &Symbol);
+ void registerEventType(const MCSymbolWasm &Symbol);
};
} // end anonymous namespace
@@ -529,8 +531,8 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
// to allow patching.
-static void
-WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
+static void WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X,
+ uint64_t Offset) {
uint8_t Buffer[5];
unsigned SizeLen = encodeULEB128(X, Buffer, 5);
assert(SizeLen == 5);
@@ -539,8 +541,8 @@ WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
// Write X as an signed LEB value at offset Offset in Stream, padded
// to allow patching.
-static void
-WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X, uint64_t Offset) {
+static void WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X,
+ uint64_t Offset) {
uint8_t Buffer[5];
unsigned SizeLen = encodeSLEB128(X, Buffer, 5);
assert(SizeLen == 5);
@@ -554,7 +556,7 @@ static void WriteI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
}
-static const MCSymbolWasm* ResolveSymbol(const MCSymbolWasm& Symbol) {
+static const MCSymbolWasm *ResolveSymbol(const MCSymbolWasm &Symbol) {
if (Symbol.isVariable()) {
const MCExpr *Expr = Symbol.getVariableValue();
auto *Inner = cast<MCSymbolRefExpr>(Expr);
@@ -582,7 +584,8 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
return getRelocationIndexValue(RelEntry);
case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
- // Provisional value is function/global Wasm index
+ case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
+ // Provisional value is function/global/event Wasm index
if (!WasmIndices.count(RelEntry.Symbol))
report_fatal_error("symbol not found in wasm index space: " +
RelEntry.Symbol->getName());
@@ -626,10 +629,9 @@ static void addData(SmallVectorImpl<char> &DataBytes,
report_fatal_error("only byte values supported for alignment");
// If nops are requested, use zeros, as this is the data section.
uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue();
- uint64_t Size = std::min<uint64_t>(alignTo(DataBytes.size(),
- Align->getAlignment()),
- DataBytes.size() +
- Align->getMaxBytesToEmit());
+ uint64_t Size =
+ std::min<uint64_t>(alignTo(DataBytes.size(), Align->getAlignment()),
+ DataBytes.size() + Align->getMaxBytesToEmit());
DataBytes.resize(Size, Value);
} else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) {
int64_t NumValues;
@@ -637,10 +639,12 @@ static void addData(SmallVectorImpl<char> &DataBytes,
llvm_unreachable("The fill should be an assembler constant");
DataBytes.insert(DataBytes.end(), Fill->getValueSize() * NumValues,
Fill->getValue());
+ } else if (auto *LEB = dyn_cast<MCLEBFragment>(&Frag)) {
+ const SmallVectorImpl<char> &Contents = LEB->getContents();
+ DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
} else {
const auto &DataFrag = cast<MCDataFragment>(Frag);
const SmallVectorImpl<char> &Contents = DataFrag.getContents();
-
DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
}
}
@@ -678,6 +682,7 @@ void WasmObjectWriter::applyRelocations(
case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+ case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
WritePatchableLEB(Stream, Value, Offset);
break;
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
@@ -696,23 +701,22 @@ void WasmObjectWriter::applyRelocations(
}
}
-void WasmObjectWriter::writeTypeSection(
- ArrayRef<WasmFunctionType> FunctionTypes) {
- if (FunctionTypes.empty())
+void WasmObjectWriter::writeTypeSection(ArrayRef<WasmSignature> Signatures) {
+ if (Signatures.empty())
return;
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_TYPE);
- encodeULEB128(FunctionTypes.size(), W.OS);
+ encodeULEB128(Signatures.size(), W.OS);
- for (const WasmFunctionType &FuncTy : FunctionTypes) {
+ for (const WasmSignature &Sig : Signatures) {
W.OS << char(wasm::WASM_TYPE_FUNC);
- encodeULEB128(FuncTy.Params.size(), W.OS);
- for (wasm::ValType Ty : FuncTy.Params)
+ encodeULEB128(Sig.Params.size(), W.OS);
+ for (wasm::ValType Ty : Sig.Params)
writeValueType(Ty);
- encodeULEB128(FuncTy.Returns.size(), W.OS);
- for (wasm::ValType Ty : FuncTy.Returns)
+ encodeULEB128(Sig.Returns.size(), W.OS);
+ for (wasm::ValType Ty : Sig.Returns)
writeValueType(Ty);
}
@@ -745,14 +749,18 @@ void WasmObjectWriter::writeImportSection(ArrayRef<wasm::WasmImport> Imports,
W.OS << char(Import.Global.Mutable ? 1 : 0);
break;
case wasm::WASM_EXTERNAL_MEMORY:
- encodeULEB128(0, W.OS); // flags
+ encodeULEB128(0, W.OS); // flags
encodeULEB128(NumPages, W.OS); // initial
break;
case wasm::WASM_EXTERNAL_TABLE:
W.OS << char(Import.Table.ElemType);
- encodeULEB128(0, W.OS); // flags
+ encodeULEB128(0, W.OS); // flags
encodeULEB128(NumElements, W.OS); // initial
break;
+ case wasm::WASM_EXTERNAL_EVENT:
+ encodeULEB128(Import.Event.Attribute, W.OS);
+ encodeULEB128(Import.Event.SigIndex, W.OS);
+ break;
default:
llvm_unreachable("unsupported import kind");
}
@@ -770,7 +778,7 @@ void WasmObjectWriter::writeFunctionSection(ArrayRef<WasmFunction> Functions) {
encodeULEB128(Functions.size(), W.OS);
for (const WasmFunction &Func : Functions)
- encodeULEB128(Func.Type, W.OS);
+ encodeULEB128(Func.SigIndex, W.OS);
endSection(Section);
}
@@ -795,6 +803,22 @@ void WasmObjectWriter::writeGlobalSection() {
endSection(Section);
}
+void WasmObjectWriter::writeEventSection(ArrayRef<wasm::WasmEventType> Events) {
+ if (Events.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_EVENT);
+
+ encodeULEB128(Events.size(), W.OS);
+ for (const wasm::WasmEventType &Event : Events) {
+ encodeULEB128(Event.Attribute, W.OS);
+ encodeULEB128(Event.SigIndex, W.OS);
+ }
+
+ endSection(Section);
+}
+
void WasmObjectWriter::writeExportSection(ArrayRef<wasm::WasmExport> Exports) {
if (Exports.empty())
return;
@@ -892,21 +916,33 @@ void WasmObjectWriter::writeDataSection() {
void WasmObjectWriter::writeRelocSection(
uint32_t SectionIndex, StringRef Name,
- ArrayRef<WasmRelocationEntry> Relocations) {
+ std::vector<WasmRelocationEntry> &Relocs) {
// See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
// for descriptions of the reloc sections.
- if (Relocations.empty())
+ if (Relocs.empty())
return;
+ // First, ensure the relocations are sorted in offset order. In general they
+ // should already be sorted since `recordRelocation` is called in offset
+ // order, but for the code section we combine many MC sections into single
+ // wasm section, and this order is determined by the order of Asm.Symbols()
+ // not the sections order.
+ std::stable_sort(
+ Relocs.begin(), Relocs.end(),
+ [](const WasmRelocationEntry &A, const WasmRelocationEntry &B) {
+ return (A.Offset + A.FixupSection->getSectionOffset()) <
+ (B.Offset + B.FixupSection->getSectionOffset());
+ });
+
SectionBookkeeping Section;
startCustomSection(Section, std::string("reloc.") + Name.str());
encodeULEB128(SectionIndex, W.OS);
- encodeULEB128(Relocations.size(), W.OS);
- for (const WasmRelocationEntry& RelEntry : Relocations) {
- uint64_t Offset = RelEntry.Offset +
- RelEntry.FixupSection->getSectionOffset();
+ encodeULEB128(Relocs.size(), W.OS);
+ for (const WasmRelocationEntry &RelEntry : Relocs) {
+ uint64_t Offset =
+ RelEntry.Offset + RelEntry.FixupSection->getSectionOffset();
uint32_t Index = getRelocationIndexValue(RelEntry);
W.OS << char(RelEntry.Type);
@@ -944,6 +980,7 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
switch (Sym.Kind) {
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+ case wasm::WASM_SYMBOL_TYPE_EVENT:
encodeULEB128(Sym.ElementIndex, W.OS);
if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0)
writeString(Sym.Name);
@@ -984,7 +1021,7 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
startSection(SubSection, wasm::WASM_INIT_FUNCS);
encodeULEB128(InitFuncs.size(), W.OS);
for (auto &StartFunc : InitFuncs) {
- encodeULEB128(StartFunc.first, W.OS); // priority
+ encodeULEB128(StartFunc.first, W.OS); // priority
encodeULEB128(StartFunc.second, W.OS); // function index
}
endSection(SubSection);
@@ -1029,30 +1066,57 @@ void WasmObjectWriter::writeCustomSections(const MCAssembler &Asm,
}
}
-uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm& Symbol) {
+uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm &Symbol) {
assert(Symbol.isFunction());
assert(TypeIndices.count(&Symbol));
return TypeIndices[&Symbol];
}
-uint32_t WasmObjectWriter::registerFunctionType(const MCSymbolWasm& Symbol) {
+uint32_t WasmObjectWriter::getEventType(const MCSymbolWasm &Symbol) {
+ assert(Symbol.isEvent());
+ assert(TypeIndices.count(&Symbol));
+ return TypeIndices[&Symbol];
+}
+
+void WasmObjectWriter::registerFunctionType(const MCSymbolWasm &Symbol) {
assert(Symbol.isFunction());
- WasmFunctionType F;
- const MCSymbolWasm* ResolvedSym = ResolveSymbol(Symbol);
- F.Returns = ResolvedSym->getReturns();
- F.Params = ResolvedSym->getParams();
+ WasmSignature S;
+ const MCSymbolWasm *ResolvedSym = ResolveSymbol(Symbol);
+ if (auto *Sig = ResolvedSym->getSignature()) {
+ S.Returns = Sig->Returns;
+ S.Params = Sig->Params;
+ }
- auto Pair =
- FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
+ auto Pair = SignatureIndices.insert(std::make_pair(S, Signatures.size()));
if (Pair.second)
- FunctionTypes.push_back(F);
+ Signatures.push_back(S);
TypeIndices[&Symbol] = Pair.first->second;
LLVM_DEBUG(dbgs() << "registerFunctionType: " << Symbol
<< " new:" << Pair.second << "\n");
LLVM_DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n");
- return Pair.first->second;
+}
+
+void WasmObjectWriter::registerEventType(const MCSymbolWasm &Symbol) {
+ assert(Symbol.isEvent());
+
+ // TODO Currently we don't generate imported exceptions, but if we do, we
+ // should have a way of infering types of imported exceptions.
+ WasmSignature S;
+ if (auto *Sig = Symbol.getSignature()) {
+ S.Returns = Sig->Returns;
+ S.Params = Sig->Params;
+ }
+
+ auto Pair = SignatureIndices.insert(std::make_pair(S, Signatures.size()));
+ if (Pair.second)
+ Signatures.push_back(S);
+ TypeIndices[&Symbol] = Pair.first->second;
+
+ LLVM_DEBUG(dbgs() << "registerEventType: " << Symbol << " new:" << Pair.second
+ << "\n");
+ LLVM_DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n");
}
static bool isInSymtab(const MCSymbolWasm &Sym) {
@@ -1086,6 +1150,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
SmallVector<uint32_t, 4> TableElems;
SmallVector<wasm::WasmImport, 4> Imports;
SmallVector<wasm::WasmExport, 4> Exports;
+ SmallVector<wasm::WasmEventType, 1> Events;
SmallVector<wasm::WasmSymbolInfo, 4> SymbolInfos;
SmallVector<std::pair<uint16_t, uint32_t>, 2> InitFuncs;
std::map<StringRef, std::vector<WasmComdatEntry>> Comdats;
@@ -1111,10 +1176,10 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
TableImport.Module = TableSym->getModuleName();
TableImport.Field = TableSym->getName();
TableImport.Kind = wasm::WASM_EXTERNAL_TABLE;
- TableImport.Table.ElemType = wasm::WASM_TYPE_ANYFUNC;
+ TableImport.Table.ElemType = wasm::WASM_TYPE_FUNCREF;
Imports.push_back(TableImport);
- // Populate FunctionTypeIndices, and Imports and WasmIndices for undefined
+ // Populate SignatureIndices, and Imports and WasmIndices for undefined
// symbols. This must be done before populating WasmIndices for defined
// symbols.
for (const MCSymbol &S : Asm.symbols()) {
@@ -1125,6 +1190,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
if (WS.isFunction())
registerFunctionType(WS);
+ if (WS.isEvent())
+ registerEventType(WS);
+
if (WS.isTemporary())
continue;
@@ -1149,6 +1217,18 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
Import.Global = WS.getGlobalType();
Imports.push_back(Import);
WasmIndices[&WS] = NumGlobalImports++;
+ } else if (WS.isEvent()) {
+ if (WS.isWeak())
+ report_fatal_error("undefined event symbol cannot be weak");
+
+ wasm::WasmImport Import;
+ Import.Module = WS.getModuleName();
+ Import.Field = WS.getName();
+ Import.Kind = wasm::WASM_EXTERNAL_EVENT;
+ Import.Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
+ Import.Event.SigIndex = getEventType(WS);
+ Imports.push_back(Import);
+ WasmIndices[&WS] = NumEventImports++;
}
}
}
@@ -1176,7 +1256,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
Segment.Offset = DataSize;
Segment.Section = &Section;
addData(Segment.Data, Section);
- Segment.Alignment = Section.getAlignment();
+ Segment.Alignment = Log2_32(Section.getAlignment());
Segment.Flags = 0;
DataSize += Segment.Data.size();
Section.setSegmentIndex(SegmentIndex);
@@ -1195,7 +1275,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
if (Name.startswith(".custom_section."))
Name = Name.substr(strlen(".custom_section."));
- MCSymbol* Begin = Sec.getBeginSymbol();
+ MCSymbol *Begin = Sec.getBeginSymbol();
if (Begin) {
WasmIndices[cast<MCSymbolWasm>(Begin)] = CustomSections.size();
if (SectionName != Begin->getName())
@@ -1240,7 +1320,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// A definition. Write out the function body.
Index = NumFunctionImports + Functions.size();
WasmFunction Func;
- Func.Type = getFunctionType(WS);
+ Func.SigIndex = getFunctionType(WS);
Func.Sym = &WS;
WasmIndices[&WS] = Index;
Functions.push_back(Func);
@@ -1256,6 +1336,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
}
LLVM_DEBUG(dbgs() << " -> function index: " << Index << "\n");
+
} else if (WS.isData()) {
if (WS.isTemporary() && !WS.getSize())
continue;
@@ -1285,6 +1366,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
static_cast<uint32_t>(Size)};
DataLocations[&WS] = Ref;
LLVM_DEBUG(dbgs() << " -> segment index: " << Ref.Segment << "\n");
+
} else if (WS.isGlobal()) {
// A "true" Wasm global (currently just __stack_pointer)
if (WS.isDefined())
@@ -1293,6 +1375,24 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// An import; the index was assigned above
LLVM_DEBUG(dbgs() << " -> global index: "
<< WasmIndices.find(&WS)->second << "\n");
+
+ } else if (WS.isEvent()) {
+ // C++ exception symbol (__cpp_exception)
+ unsigned Index;
+ if (WS.isDefined()) {
+ Index = NumEventImports + Events.size();
+ wasm::WasmEventType Event;
+ Event.SigIndex = getEventType(WS);
+ Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
+ WasmIndices[&WS] = Index;
+ Events.push_back(Event);
+ } else {
+ // An import; the index was assigned above.
+ Index = WasmIndices.find(&WS)->second;
+ }
+ LLVM_DEBUG(dbgs() << " -> event index: " << WasmIndices.find(&WS)->second
+ << "\n");
+
} else {
assert(WS.isSection());
}
@@ -1326,7 +1426,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
DataLocations[&WS] = Ref;
LLVM_DEBUG(dbgs() << " -> index:" << Ref.Segment << "\n");
} else {
- report_fatal_error("don't yet support global aliases");
+ report_fatal_error("don't yet support global/event aliases");
}
}
@@ -1424,7 +1524,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
unsigned PrefixLength = strlen(".init_array");
if (WS.getSectionName().size() > PrefixLength) {
if (WS.getSectionName()[PrefixLength] != '.')
- report_fatal_error(".init_array section priority should start with '.'");
+ report_fatal_error(
+ ".init_array section priority should start with '.'");
if (WS.getSectionName()
.substr(PrefixLength + 1)
.getAsInteger(10, Priority))
@@ -1432,14 +1533,16 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
}
const auto &DataFrag = cast<MCDataFragment>(Frag);
const SmallVectorImpl<char> &Contents = DataFrag.getContents();
- for (const uint8_t *p = (const uint8_t *)Contents.data(),
- *end = (const uint8_t *)Contents.data() + Contents.size();
+ for (const uint8_t *
+ p = (const uint8_t *)Contents.data(),
+ *end = (const uint8_t *)Contents.data() + Contents.size();
p != end; ++p) {
if (*p != 0)
report_fatal_error("non-symbolic data in .init_array section");
}
for (const MCFixup &Fixup : DataFrag.getFixups()) {
- assert(Fixup.getKind() == MCFixup::getKindForSize(is64Bit() ? 8 : 4, false));
+ assert(Fixup.getKind() ==
+ MCFixup::getKindForSize(is64Bit() ? 8 : 4, false));
const MCExpr *Expr = Fixup.getValue();
auto *Sym = dyn_cast<MCSymbolRefExpr>(Expr);
if (!Sym)
@@ -1456,12 +1559,13 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// Write out the Wasm header.
writeHeader(Asm);
- writeTypeSection(FunctionTypes);
+ writeTypeSection(Signatures);
writeImportSection(Imports, DataSize, TableElems.size());
writeFunctionSection(Functions);
// Skip the "table" section; we import the table instead.
// Skip the "memory" section; we import the memory instead.
writeGlobalSection();
+ writeEventSection(Events);
writeExportSection(Exports);
writeElemSection(TableElems);
writeCodeSection(Asm, Layout, Functions);
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 9ffecd99df68..b774852eabe6 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/JamCRC.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -58,8 +59,6 @@ namespace {
using name = SmallString<COFF::NameSize>;
enum AuxiliaryType {
- ATFunctionDefinition,
- ATbfAndefSymbol,
ATWeakExternal,
ATFile,
ATSectionDefinition
@@ -147,6 +146,10 @@ public:
bool UseBigObj;
+ bool EmitAddrsigSection = false;
+ MCSectionCOFF *AddrsigSection;
+ std::vector<const MCSymbol *> AddrsigSyms;
+
WinCOFFObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS);
@@ -206,6 +209,11 @@ public:
void assignSectionNumbers();
void assignFileOffsets(MCAssembler &Asm, const MCAsmLayout &Layout);
+ void emitAddrsigSection() override { EmitAddrsigSection = true; }
+ void addAddrsigSymbol(const MCSymbol *Sym) override {
+ AddrsigSyms.push_back(Sym);
+ }
+
uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
};
@@ -515,24 +523,6 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
const COFFSymbol::AuxiliarySymbols &S) {
for (const AuxSymbol &i : S) {
switch (i.AuxType) {
- case ATFunctionDefinition:
- W.write<uint32_t>(i.Aux.FunctionDefinition.TagIndex);
- W.write<uint32_t>(i.Aux.FunctionDefinition.TotalSize);
- W.write<uint32_t>(i.Aux.FunctionDefinition.PointerToLinenumber);
- W.write<uint32_t>(i.Aux.FunctionDefinition.PointerToNextFunction);
- W.OS.write_zeros(sizeof(i.Aux.FunctionDefinition.unused));
- if (UseBigObj)
- W.OS.write_zeros(COFF::Symbol32Size - COFF::Symbol16Size);
- break;
- case ATbfAndefSymbol:
- W.OS.write_zeros(sizeof(i.Aux.bfAndefSymbol.unused1));
- W.write<uint16_t>(i.Aux.bfAndefSymbol.Linenumber);
- W.OS.write_zeros(sizeof(i.Aux.bfAndefSymbol.unused2));
- W.write<uint32_t>(i.Aux.bfAndefSymbol.PointerToNextFunction);
- W.OS.write_zeros(sizeof(i.Aux.bfAndefSymbol.unused3));
- if (UseBigObj)
- W.OS.write_zeros(COFF::Symbol32Size - COFF::Symbol16Size);
- break;
case ATWeakExternal:
W.write<uint32_t>(i.Aux.WeakExternal.TagIndex);
W.write<uint32_t>(i.Aux.WeakExternal.Characteristics);
@@ -568,10 +558,9 @@ void WinCOFFObjectWriter::writeSectionHeaders() {
std::vector<COFFSection *> Arr;
for (auto &Section : Sections)
Arr.push_back(Section.get());
- llvm::sort(Arr.begin(), Arr.end(),
- [](const COFFSection *A, const COFFSection *B) {
- return A->Number < B->Number;
- });
+ llvm::sort(Arr, [](const COFFSection *A, const COFFSection *B) {
+ return A->Number < B->Number;
+ });
for (auto &Section : Arr) {
if (Section->Number == -1)
@@ -630,14 +619,9 @@ void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
// Write the section contents.
if (Sec.Header.PointerToRawData != 0) {
- assert(W.OS.tell() <= Sec.Header.PointerToRawData &&
+ assert(W.OS.tell() == Sec.Header.PointerToRawData &&
"Section::PointerToRawData is insane!");
- unsigned PaddingSize = Sec.Header.PointerToRawData - W.OS.tell();
- assert(PaddingSize < 4 &&
- "Should only need at most three bytes of padding!");
- W.OS.write_zeros(PaddingSize);
-
uint32_t CRC = writeSectionContents(Asm, Layout, MCSec);
// Update the section definition auxiliary symbol to record the CRC.
@@ -677,6 +661,13 @@ void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
+ if (EmitAddrsigSection) {
+ AddrsigSection = Asm.getContext().getCOFFSection(
+ ".llvm_addrsig", COFF::IMAGE_SCN_LNK_REMOVE,
+ SectionKind::getMetadata());
+ Asm.registerSection(*AddrsigSection);
+ }
+
// "Define" each section & symbol. This creates section & symbol
// entries in the staging area.
for (const auto &Section : Asm)
@@ -915,10 +906,7 @@ void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm,
Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);
if (IsPhysicalSection(Sec)) {
- // Align the section data to a four byte boundary.
- Offset = alignTo(Offset, 4);
Sec->Header.PointerToRawData = Offset;
-
Offset += Sec->Header.SizeOfRawData;
}
@@ -1020,22 +1008,47 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
continue;
const MCSectionCOFF &MCSec = *Section->MCSection;
+ const MCSymbol *AssocMCSym = MCSec.getCOMDATSymbol();
+ assert(AssocMCSym);
+
+ // It's an error to try to associate with an undefined symbol or a symbol
+ // without a section.
+ if (!AssocMCSym->isInSection()) {
+ Asm.getContext().reportError(
+ SMLoc(), Twine("cannot make section ") + MCSec.getSectionName() +
+ Twine(" associative with sectionless symbol ") +
+ AssocMCSym->getName());
+ continue;
+ }
- const MCSymbol *COMDAT = MCSec.getCOMDATSymbol();
- assert(COMDAT);
- COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(COMDAT);
- assert(COMDATSymbol);
- COFFSection *Assoc = COMDATSymbol->Section;
- if (!Assoc)
- report_fatal_error(
- Twine("Missing associated COMDAT section for section ") +
- MCSec.getSectionName());
+ const auto *AssocMCSec = cast<MCSectionCOFF>(&AssocMCSym->getSection());
+ assert(SectionMap.count(AssocMCSec));
+ COFFSection *AssocSec = SectionMap[AssocMCSec];
// Skip this section if the associated section is unused.
- if (Assoc->Number == -1)
+ if (AssocSec->Number == -1)
continue;
- Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Assoc->Number;
+ Section->Symbol->Aux[0].Aux.SectionDefinition.Number = AssocSec->Number;
+ }
+
+ // Create the contents of the .llvm_addrsig section.
+ if (EmitAddrsigSection) {
+ auto Frag = new MCDataFragment(AddrsigSection);
+ Frag->setLayoutOrder(0);
+ raw_svector_ostream OS(Frag->getContents());
+ for (const MCSymbol *S : AddrsigSyms) {
+ if (!S->isTemporary()) {
+ encodeULEB128(S->getIndex(), OS);
+ continue;
+ }
+
+ MCSection *TargetSection = &S->getSection();
+ assert(SectionMap.find(TargetSection) != SectionMap.end() &&
+ "Section must already have been defined in "
+ "executePostLayoutBinding!");
+ encodeULEB128(SectionMap[TargetSection]->Symbol->getIndex(), OS);
+ }
}
assignFileOffsets(Asm, Layout);
diff --git a/contrib/llvm/lib/MCA/Context.cpp b/contrib/llvm/lib/MCA/Context.cpp
new file mode 100644
index 000000000000..c1b197dfe2e6
--- /dev/null
+++ b/contrib/llvm/lib/MCA/Context.cpp
@@ -0,0 +1,65 @@
+//===---------------------------- Context.cpp -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a class for holding ownership of various simulated
+/// hardware units. A Context also provides a utility routine for constructing
+/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
+/// stages.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Context.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/DispatchStage.h"
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/MCA/Stages/RetireStage.h"
+
+namespace llvm {
+namespace mca {
+
+std::unique_ptr<Pipeline>
+Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
+ SourceMgr &SrcMgr) {
+ const MCSchedModel &SM = STI.getSchedModel();
+
+ // Create the hardware units defining the backend.
+ auto RCU = llvm::make_unique<RetireControlUnit>(SM);
+ auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+ auto LSU = llvm::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
+ Opts.StoreQueueSize, Opts.AssumeNoAlias);
+ auto HWS = llvm::make_unique<Scheduler>(SM, *LSU);
+
+ // Create the pipeline stages.
+ auto Fetch = llvm::make_unique<EntryStage>(SrcMgr);
+ auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
+ *RCU, *PRF);
+ auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
+ auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
+
+ // Pass the ownership of all the hardware units to this Context.
+ addHardwareUnit(std::move(RCU));
+ addHardwareUnit(std::move(PRF));
+ addHardwareUnit(std::move(LSU));
+ addHardwareUnit(std::move(HWS));
+
+ // Build the pipeline.
+ auto StagePipeline = llvm::make_unique<Pipeline>();
+ StagePipeline->appendStage(std::move(Fetch));
+ StagePipeline->appendStage(std::move(Dispatch));
+ StagePipeline->appendStage(std::move(Execute));
+ StagePipeline->appendStage(std::move(Retire));
+ return StagePipeline;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/HWEventListener.cpp b/contrib/llvm/lib/MCA/HWEventListener.cpp
index f27a04a9a980..4a0e5b1754dd 100644
--- a/contrib/llvm/tools/llvm-mca/HWEventListener.cpp
+++ b/contrib/llvm/lib/MCA/HWEventListener.cpp
@@ -12,10 +12,12 @@
///
//===----------------------------------------------------------------------===//
-#include "HWEventListener.h"
+#include "llvm/MCA/HWEventListener.h"
+namespace llvm {
namespace mca {
// Anchor the vtable here.
void HWEventListener::anchor() {}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/HardwareUnit.cpp b/contrib/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
index 103cde9afcc8..edd32b9c0c1a 100644
--- a/contrib/llvm/tools/llvm-mca/HardwareUnit.cpp
+++ b/contrib/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
@@ -13,11 +13,13 @@
///
//===----------------------------------------------------------------------===//
-#include "HardwareUnit.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+namespace llvm {
namespace mca {
// Pin the vtable with this method.
HardwareUnit::~HardwareUnit() = default;
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/contrib/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
new file mode 100644
index 000000000000..8895eb392b60
--- /dev/null
+++ b/contrib/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -0,0 +1,190 @@
+//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load-Store Unit for the llvm-mca tool.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
+ bool AssumeNoAlias)
+ : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ if (!LQ_Size && EPI.LoadQueueID) {
+ const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
+ LQ_Size = LdQDesc.BufferSize;
+ }
+
+ if (!SQ_Size && EPI.StoreQueueID) {
+ const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
+ SQ_Size = StQDesc.BufferSize;
+ }
+ }
+}
+
+#ifndef NDEBUG
+void LSUnit::dump() const {
+ dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
+ dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
+ dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
+ dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
+}
+#endif
+
+void LSUnit::assignLQSlot(unsigned Index) {
+ assert(!isLQFull());
+ assert(LoadQueue.count(Index) == 0);
+
+ LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
+ << ",slot=" << LoadQueue.size() << ">\n");
+ LoadQueue.insert(Index);
+}
+
+void LSUnit::assignSQSlot(unsigned Index) {
+ assert(!isSQFull());
+ assert(StoreQueue.count(Index) == 0);
+
+ LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
+ << ",slot=" << StoreQueue.size() << ">\n");
+ StoreQueue.insert(Index);
+}
+
+void LSUnit::dispatch(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ unsigned IsMemBarrier = Desc.HasSideEffects;
+ assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
+
+ const unsigned Index = IR.getSourceIndex();
+ if (Desc.MayLoad) {
+ if (IsMemBarrier)
+ LoadBarriers.insert(Index);
+ assignLQSlot(Index);
+ }
+
+ if (Desc.MayStore) {
+ if (IsMemBarrier)
+ StoreBarriers.insert(Index);
+ assignSQSlot(Index);
+ }
+}
+
+LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ if (Desc.MayLoad && isLQFull())
+ return LSUnit::LSU_LQUEUE_FULL;
+ if (Desc.MayStore && isSQFull())
+ return LSUnit::LSU_SQUEUE_FULL;
+ return LSUnit::LSU_AVAILABLE;
+}
+
+bool LSUnit::isReady(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ const unsigned Index = IR.getSourceIndex();
+ bool IsALoad = Desc.MayLoad;
+ bool IsAStore = Desc.MayStore;
+ assert((IsALoad || IsAStore) && "Not a memory operation!");
+ assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
+ assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
+
+ if (IsALoad && !LoadBarriers.empty()) {
+ unsigned LoadBarrierIndex = *LoadBarriers.begin();
+ // A younger load cannot pass a older load barrier.
+ if (Index > LoadBarrierIndex)
+ return false;
+ // A load barrier cannot pass a older load.
+ if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
+ return false;
+ }
+
+ if (IsAStore && !StoreBarriers.empty()) {
+ unsigned StoreBarrierIndex = *StoreBarriers.begin();
+ // A younger store cannot pass a older store barrier.
+ if (Index > StoreBarrierIndex)
+ return false;
+ // A store barrier cannot pass a older store.
+ if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
+ return false;
+ }
+
+ // A load may not pass a previous store unless flag 'NoAlias' is set.
+ // A load may pass a previous load.
+ if (NoAlias && IsALoad)
+ return true;
+
+ if (StoreQueue.size()) {
+ // A load may not pass a previous store.
+ // A store may not pass a previous store.
+ if (Index > *StoreQueue.begin())
+ return false;
+ }
+
+ // Okay, we are older than the oldest store in the queue.
+ // If there are no pending loads, then we can say for sure that this
+ // instruction is ready.
+ if (isLQEmpty())
+ return true;
+
+ // Check if there are no older loads.
+ if (Index <= *LoadQueue.begin())
+ return true;
+
+ // There is at least one younger load.
+ //
+ // A store may not pass a previous load.
+ // A load may pass a previous load.
+ return !IsAStore;
+}
+
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ const unsigned Index = IR.getSourceIndex();
+ bool IsALoad = Desc.MayLoad;
+ bool IsAStore = Desc.MayStore;
+
+ if (IsALoad) {
+ if (LoadQueue.erase(Index)) {
+ LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+ << " has been removed from the load queue.\n");
+ }
+ if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
+ LLVM_DEBUG(
+ dbgs() << "[LSUnit]: Instruction idx=" << Index
+ << " has been removed from the set of load barriers.\n");
+ LoadBarriers.erase(Index);
+ }
+ }
+
+ if (IsAStore) {
+ if (StoreQueue.erase(Index)) {
+ LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+ << " has been removed from the store queue.\n");
+ }
+
+ if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
+ LLVM_DEBUG(
+ dbgs() << "[LSUnit]: Instruction idx=" << Index
+ << " has been removed from the set of store barriers.\n");
+ StoreBarriers.erase(Index);
+ }
+ }
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RegisterFile.cpp b/contrib/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
index 44de105b8996..22977e5ded65 100644
--- a/contrib/llvm/tools/llvm-mca/RegisterFile.cpp
+++ b/contrib/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -14,20 +14,20 @@
///
//===----------------------------------------------------------------------===//
-#include "RegisterFile.h"
-#include "Instruction.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/Instruction.h"
#include "llvm/Support/Debug.h"
-using namespace llvm;
-
#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
namespace mca {
-RegisterFile::RegisterFile(const llvm::MCSchedModel &SM,
- const llvm::MCRegisterInfo &mri, unsigned NumRegs)
- : MRI(mri), RegisterMappings(mri.getNumRegs(),
- {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) {
+RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
+ unsigned NumRegs)
+ : MRI(mri),
+ RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}),
+ ZeroRegisters(mri.getNumRegs(), false) {
initialize(SM, NumRegs);
}
@@ -36,7 +36,7 @@ void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
// declared by the target. The number of physical registers in the default
// register file is set equal to `NumRegs`. A value of zero for `NumRegs`
// means: this register file has an unbounded number of physical registers.
- addRegisterFile({} /* all registers */, NumRegs);
+ RegisterFiles.emplace_back(NumRegs);
if (!SM.hasExtraProcessorInfo())
return;
@@ -44,30 +44,36 @@ void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
// object. The size of every register file, as well as the mapping between
// register files and register classes is specified via tablegen.
const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
- for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) {
+
+ // Skip invalid register file at index 0.
+ for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) {
const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
- // Skip invalid register files with zero physical registers.
- unsigned Length = RF.NumRegisterCostEntries;
- if (!RF.NumPhysRegs)
- continue;
+ assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!");
+
// The cost of a register definition is equivalent to the number of
// physical registers that are allocated at register renaming stage.
+ unsigned Length = RF.NumRegisterCostEntries;
const MCRegisterCostEntry *FirstElt =
&Info.RegisterCostTable[RF.RegisterCostEntryIdx];
- addRegisterFile(ArrayRef<MCRegisterCostEntry>(FirstElt, Length),
- RF.NumPhysRegs);
+ addRegisterFile(RF, ArrayRef<MCRegisterCostEntry>(FirstElt, Length));
}
}
-void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
- unsigned NumPhysRegs) {
+void RegisterFile::cycleStart() {
+ for (RegisterMappingTracker &RMT : RegisterFiles)
+ RMT.NumMoveEliminated = 0;
+}
+
+void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF,
+ ArrayRef<MCRegisterCostEntry> Entries) {
// A default register file is always allocated at index #0. That register file
// is mainly used to count the total number of mappings created by all
// register files at runtime. Users can limit the number of available physical
// registers in register file #0 through the command line flag
// `-register-file-size`.
unsigned RegisterFileIndex = RegisterFiles.size();
- RegisterFiles.emplace_back(NumPhysRegs);
+ RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle,
+ RF.AllowZeroMoveEliminationOnly);
// Special case where there is no register class identifier in the set.
// An empty set of register classes means: this register file contains all
@@ -93,6 +99,7 @@ void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
}
IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
Entry.RenameAs = Reg;
+ Entry.AllowMoveElimination = RCE.AllowMoveElimination;
// Assume the same cost for each sub-register.
for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
@@ -139,8 +146,7 @@ void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
}
void RegisterFile::addRegisterWrite(WriteRef Write,
- MutableArrayRef<unsigned> UsedPhysRegs,
- bool ShouldAllocatePhysRegs) {
+ MutableArrayRef<unsigned> UsedPhysRegs) {
WriteState &WS = *Write.getWriteState();
unsigned RegID = WS.getRegisterID();
assert(RegID && "Adding an invalid register definition?");
@@ -163,10 +169,15 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// a false dependency on RenameAs. The only exception is for when the write
// implicitly clears the upper portion of the underlying register.
// If a write clears its super-registers, then it is renamed as `RenameAs`.
+ bool IsWriteZero = WS.isWriteZero();
+ bool IsEliminated = WS.isEliminated();
+ bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ WS.setPRF(RRI.IndexPlusCost.first);
+
if (RRI.RenameAs && RRI.RenameAs != RegID) {
RegID = RRI.RenameAs;
- const WriteRef &OtherWrite = RegisterMappings[RegID].first;
+ WriteRef &OtherWrite = RegisterMappings[RegID].first;
if (!WS.clearsSuperRegisters()) {
// The processor keeps the definition of `RegID` together with register
@@ -174,34 +185,69 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// register is allocated.
ShouldAllocatePhysRegs = false;
- if (OtherWrite.getSourceIndex() != Write.getSourceIndex()) {
+ WriteState *OtherWS = OtherWrite.getWriteState();
+ if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
// This partial write has a false dependency on RenameAs.
- WS.setDependentWrite(OtherWrite.getWriteState());
+ assert(!IsEliminated && "Unexpected partial update!");
+ OtherWS->addUser(&WS);
}
}
}
- // Update the mapping for register RegID including its sub-registers.
- RegisterMappings[RegID].first = Write;
- for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
- RegisterMappings[*I].first = Write;
+ // Update zero registers.
+ unsigned ZeroRegisterID =
+ WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
+ if (IsWriteZero) {
+ ZeroRegisters.setBit(ZeroRegisterID);
+ for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+ ZeroRegisters.setBit(*I);
+ } else {
+ ZeroRegisters.clearBit(ZeroRegisterID);
+ for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+ ZeroRegisters.clearBit(*I);
+ }
+
+ // If this is move has been eliminated, then the call to tryEliminateMove
+ // should have already updated all the register mappings.
+ if (!IsEliminated) {
+ // Update the mapping for register RegID including its sub-registers.
+ RegisterMappings[RegID].first = Write;
+ RegisterMappings[RegID].second.AliasRegID = 0U;
+ for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ RegisterMappings[*I].first = Write;
+ RegisterMappings[*I].second.AliasRegID = 0U;
+ }
- // No physical registers are allocated for instructions that are optimized in
- // hardware. For example, zero-latency data-dependency breaking instructions
- // don't consume physical registers.
- if (ShouldAllocatePhysRegs)
- allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+ // No physical registers are allocated for instructions that are optimized
+ // in hardware. For example, zero-latency data-dependency breaking
+ // instructions don't consume physical registers.
+ if (ShouldAllocatePhysRegs)
+ allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+ }
if (!WS.clearsSuperRegisters())
return;
- for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
- RegisterMappings[*I].first = Write;
+ for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ if (!IsEliminated) {
+ RegisterMappings[*I].first = Write;
+ RegisterMappings[*I].second.AliasRegID = 0U;
+ }
+
+ if (IsWriteZero)
+ ZeroRegisters.setBit(*I);
+ else
+ ZeroRegisters.clearBit(*I);
+ }
}
-void RegisterFile::removeRegisterWrite(const WriteState &WS,
- MutableArrayRef<unsigned> FreedPhysRegs,
- bool ShouldFreePhysRegs) {
+void RegisterFile::removeRegisterWrite(
+ const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
+ // Early exit if this write was eliminated. A write eliminated at register
+ // renaming stage generates an alias, and it is not added to the PRF.
+ if (WS.isEliminated())
+ return;
+
unsigned RegID = WS.getRegisterID();
assert(RegID != 0 && "Invalidating an already invalid register?");
@@ -209,6 +255,7 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS,
"Invalidating a write of unknown cycles!");
assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
+ bool ShouldFreePhysRegs = !WS.isWriteZero();
unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
if (RenameAs && RenameAs != RegID) {
RegID = RenameAs;
@@ -242,11 +289,88 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS,
}
}
-void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
- unsigned RegID) const {
+bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
+ const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
+ const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
+
+ // From and To must be owned by the same PRF.
+ const RegisterRenamingInfo &RRIFrom = RMFrom.second;
+ const RegisterRenamingInfo &RRITo = RMTo.second;
+ unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first;
+ if (RegisterFileIndex != RRITo.IndexPlusCost.first)
+ return false;
+
+ // We only allow move elimination for writes that update a full physical
+ // register. On X86, move elimination is possible with 32-bit general purpose
+ // registers because writes to those registers are not partial writes. If a
+ // register move is a partial write, then we conservatively assume that move
+ // elimination fails, since it would either trigger a partial update, or the
+ // issue of a merge opcode.
+ //
+ // Note that this constraint may be lifted in future. For example, we could
+ // make this model more flexible, and let users customize the set of registers
+ // (i.e. register classes) that allow move elimination.
+ //
+ // For now, we assume that there is a strong correlation between registers
+ // that allow move elimination, and how those same registers are renamed in
+ // hardware.
+ if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) {
+ // Early exit if the PRF doesn't support move elimination for this register.
+ if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination)
+ return false;
+ if (!WS.clearsSuperRegisters())
+ return false;
+ }
+
+ RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+ if (RMT.MaxMoveEliminatedPerCycle &&
+ RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle)
+ return false;
+
+ bool IsZeroMove = ZeroRegisters[RS.getRegisterID()];
+ if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove)
+ return false;
+
+ MCPhysReg FromReg = RS.getRegisterID();
+ MCPhysReg ToReg = WS.getRegisterID();
+
+ // Construct an alias.
+ MCPhysReg AliasReg = FromReg;
+ if (RRIFrom.RenameAs)
+ AliasReg = RRIFrom.RenameAs;
+
+ const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second;
+ if (RMAlias.AliasRegID)
+ AliasReg = RMAlias.AliasRegID;
+
+ if (AliasReg != ToReg) {
+ RegisterMappings[ToReg].second.AliasRegID = AliasReg;
+ for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I)
+ RegisterMappings[*I].second.AliasRegID = AliasReg;
+ }
+
+ RMT.NumMoveEliminated++;
+ if (IsZeroMove) {
+ WS.setWriteZero();
+ RS.setReadZero();
+ }
+ WS.setEliminated();
+
+ return true;
+}
+
+void RegisterFile::collectWrites(const ReadState &RS,
+ SmallVectorImpl<WriteRef> &Writes) const {
+ unsigned RegID = RS.getRegisterID();
assert(RegID && RegID < RegisterMappings.size());
LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
<< MRI.getName(RegID) << '\n');
+
+ // Check if this is an alias.
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ if (RRI.AliasRegID)
+ RegID = RRI.AliasRegID;
+
const WriteRef &WR = RegisterMappings[RegID].first;
if (WR.isValid())
Writes.push_back(WR);
@@ -259,23 +383,38 @@ void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
}
// Remove duplicate entries and resize the input vector.
- llvm::sort(Writes.begin(), Writes.end(),
- [](const WriteRef &Lhs, const WriteRef &Rhs) {
- return Lhs.getWriteState() < Rhs.getWriteState();
- });
- auto It = std::unique(Writes.begin(), Writes.end());
- Writes.resize(std::distance(Writes.begin(), It));
+ if (Writes.size() > 1) {
+ sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
+ return Lhs.getWriteState() < Rhs.getWriteState();
+ });
+ auto It = std::unique(Writes.begin(), Writes.end());
+ Writes.resize(std::distance(Writes.begin(), It));
+ }
LLVM_DEBUG({
for (const WriteRef &WR : Writes) {
const WriteState &WS = *WR.getWriteState();
dbgs() << "[PRF] Found a dependent use of Register "
- << MRI.getName(WS.getRegisterID()) << " (defined by intruction #"
+ << MRI.getName(WS.getRegisterID()) << " (defined by instruction #"
<< WR.getSourceIndex() << ")\n";
}
});
}
+void RegisterFile::addRegisterRead(ReadState &RS,
+ SmallVectorImpl<WriteRef> &Defs) const {
+ unsigned RegID = RS.getRegisterID();
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ RS.setPRF(RRI.IndexPlusCost.first);
+ if (RS.isIndependentFromDef())
+ return;
+
+ if (ZeroRegisters[RS.getRegisterID()])
+ RS.setReadZero();
+ collectWrites(RS, Defs);
+ RS.setDependentWrites(Defs.size());
+}
+
unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
@@ -306,8 +445,14 @@ unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
// microarchitectural registers in register file #0 was changed by the
// users via flag -reg-file-size. Alternatively, the scheduling model
// specified a too small number of registers for this register file.
- report_fatal_error(
- "Not enough microarchitectural registers in the register file");
+ LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n");
+
+ // FIXME: Normalize the instruction register count to match the
+ // NumPhysRegs value. This is a highly unusual case, and is not expected
+ // to occur. This normalization is hiding an inconsistency in either the
+ // scheduling model or in the value that the user might have specified
+ // for NumPhysRegs.
+ NumRegs = RMT.NumPhysRegs;
}
if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs))
@@ -321,14 +466,16 @@ unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
void RegisterFile::dump() const {
for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
const RegisterMapping &RM = RegisterMappings[I];
- if (!RM.first.getWriteState())
- continue;
const RegisterRenamingInfo &RRI = RM.second;
- dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first
- << ", Cost=" << RRI.IndexPlusCost.second
- << ", RenameAs=" << RRI.RenameAs << ", ";
- RM.first.dump();
- dbgs() << '\n';
+ if (ZeroRegisters[I]) {
+ dbgs() << MRI.getName(I) << ", " << I
+ << ", PRF=" << RRI.IndexPlusCost.first
+ << ", Cost=" << RRI.IndexPlusCost.second
+ << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I]
+ << ",";
+ RM.first.dump();
+ dbgs() << '\n';
+ }
}
for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
@@ -341,3 +488,4 @@ void RegisterFile::dump() const {
#endif
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/contrib/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
new file mode 100644
index 000000000000..2039b58e8ee5
--- /dev/null
+++ b/contrib/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -0,0 +1,331 @@
+//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy. These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+ResourceStrategy::~ResourceStrategy() = default;
+
+// Returns the index of the highest bit set. For resource masks, the position of
+// the highest bit set can be used to construct a resource mask identifier.
+static unsigned getResourceStateIndex(uint64_t Mask) {
+ return std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask);
+}
+
+static uint64_t selectImpl(uint64_t CandidateMask,
+ uint64_t &NextInSequenceMask) {
+ // The upper bit set in CandidateMask identifies our next candidate resource.
+ CandidateMask = 1ULL << (getResourceStateIndex(CandidateMask) - 1);
+ NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+ return CandidateMask;
+}
+
+uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) {
+ // This method assumes that ReadyMask cannot be zero.
+ uint64_t CandidateMask = ReadyMask & NextInSequenceMask;
+ if (CandidateMask)
+ return selectImpl(CandidateMask, NextInSequenceMask);
+
+ NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+ RemovedFromNextInSequence = 0;
+ CandidateMask = ReadyMask & NextInSequenceMask;
+ if (CandidateMask)
+ return selectImpl(CandidateMask, NextInSequenceMask);
+
+ NextInSequenceMask = ResourceUnitMask;
+ CandidateMask = ReadyMask & NextInSequenceMask;
+ return selectImpl(CandidateMask, NextInSequenceMask);
+}
+
+void DefaultResourceStrategy::used(uint64_t Mask) {
+ if (Mask > NextInSequenceMask) {
+ RemovedFromNextInSequence |= Mask;
+ return;
+ }
+
+ NextInSequenceMask &= (~Mask);
+ if (NextInSequenceMask)
+ return;
+
+ NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+ RemovedFromNextInSequence = 0;
+}
+
+ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
+ uint64_t Mask)
+ : ProcResourceDescIndex(Index), ResourceMask(Mask),
+ BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask) > 1) {
+ if (IsAGroup) {
+ ResourceSizeMask =
+ ResourceMask ^ 1ULL << (getResourceStateIndex(ResourceMask) - 1);
+ } else {
+ ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
+ }
+ ReadyMask = ResourceSizeMask;
+ AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
+ Unavailable = false;
+}
+
+bool ResourceState::isReady(unsigned NumUnits) const {
+ return (!isReserved() || isADispatchHazard()) &&
+ countPopulation(ReadyMask) >= NumUnits;
+}
+
+ResourceStateEvent ResourceState::isBufferAvailable() const {
+ if (isADispatchHazard() && isReserved())
+ return RS_RESERVED;
+ if (!isBuffered() || AvailableSlots)
+ return RS_BUFFER_AVAILABLE;
+ return RS_BUFFER_UNAVAILABLE;
+}
+
+#ifndef NDEBUG
+void ResourceState::dump() const {
+ dbgs() << "MASK=" << format_hex(ResourceMask, 16)
+ << ", SZMASK=" << format_hex(ResourceSizeMask, 16)
+ << ", RDYMASK=" << format_hex(ReadyMask, 16)
+ << ", BufferSize=" << BufferSize
+ << ", AvailableSlots=" << AvailableSlots
+ << ", Reserved=" << Unavailable << '\n';
+}
+#endif
+
+static std::unique_ptr<ResourceStrategy>
+getStrategyFor(const ResourceState &RS) {
+ if (RS.isAResourceGroup() || RS.getNumUnits() > 1)
+ return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
+ return std::unique_ptr<ResourceStrategy>(nullptr);
+}
+
+ResourceManager::ResourceManager(const MCSchedModel &SM)
+ : Resources(SM.getNumProcResourceKinds()),
+ Strategies(SM.getNumProcResourceKinds()),
+ Resource2Groups(SM.getNumProcResourceKinds(), 0),
+ ProcResID2Mask(SM.getNumProcResourceKinds()) {
+ computeProcResourceMasks(SM, ProcResID2Mask);
+
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ uint64_t Mask = ProcResID2Mask[I];
+ unsigned Index = getResourceStateIndex(Mask);
+ Resources[Index] =
+ llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
+ Strategies[Index] = getStrategyFor(*Resources[Index]);
+ }
+
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ uint64_t Mask = ProcResID2Mask[I];
+ unsigned Index = getResourceStateIndex(Mask);
+ const ResourceState &RS = *Resources[Index];
+ if (!RS.isAResourceGroup())
+ continue;
+
+ uint64_t GroupMaskIdx = 1ULL << (Index - 1);
+ Mask -= GroupMaskIdx;
+ while (Mask) {
+ // Extract lowest set isolated bit.
+ uint64_t Unit = Mask & (-Mask);
+ unsigned IndexUnit = getResourceStateIndex(Unit);
+ Resource2Groups[IndexUnit] |= GroupMaskIdx;
+ Mask ^= Unit;
+ }
+ }
+}
+
+void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+ uint64_t ResourceMask) {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ assert(Index < Resources.size() && "Invalid processor resource index!");
+ assert(S && "Unexpected null strategy in input!");
+ Strategies[Index] = std::move(S);
+}
+
+unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
+ return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
+}
+
+unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
+ return Resources[getResourceStateIndex(ResourceID)]->getNumUnits();
+}
+
+// Returns the actual resource consumed by this Use.
+// First, is the primary resource ID.
+// Second, is the specific sub-resource ID.
+ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) {
+ unsigned Index = getResourceStateIndex(ResourceID);
+ assert(Index < Resources.size() && "Invalid resource use!");
+ ResourceState &RS = *Resources[Index];
+ assert(RS.isReady() && "No available units to select!");
+
+ // Special case where RS is not a group, and it only declares a single
+ // resource unit.
+ if (!RS.isAResourceGroup() && RS.getNumUnits() == 1)
+ return std::make_pair(ResourceID, RS.getReadyMask());
+
+ uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask());
+ if (RS.isAResourceGroup())
+ return selectPipe(SubResourceID);
+ return std::make_pair(ResourceID, SubResourceID);
+}
+
+void ResourceManager::use(const ResourceRef &RR) {
+ // Mark the sub-resource referenced by RR as used.
+ unsigned RSID = getResourceStateIndex(RR.first);
+ ResourceState &RS = *Resources[RSID];
+ RS.markSubResourceAsUsed(RR.second);
+ // Remember to update the resource strategy for non-group resources with
+ // multiple units.
+ if (RS.getNumUnits() > 1)
+ Strategies[RSID]->used(RR.second);
+
+ // If there are still available units in RR.first,
+ // then we are done.
+ if (RS.isReady())
+ return;
+
+ // Notify groups that RR.first is no longer available.
+ uint64_t Users = Resource2Groups[RSID];
+ while (Users) {
+ // Extract lowest set isolated bit.
+ unsigned GroupIndex = getResourceStateIndex(Users & (-Users));
+ ResourceState &CurrentUser = *Resources[GroupIndex];
+ CurrentUser.markSubResourceAsUsed(RR.first);
+ Strategies[GroupIndex]->used(RR.first);
+ // Reset lowest set bit.
+ Users &= Users - 1;
+ }
+}
+
+void ResourceManager::release(const ResourceRef &RR) {
+ ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
+ bool WasFullyUsed = !RS.isReady();
+ RS.releaseSubResource(RR.second);
+ if (!WasFullyUsed)
+ return;
+
+ for (std::unique_ptr<ResourceState> &Res : Resources) {
+ ResourceState &Current = *Res;
+ if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
+ continue;
+
+ if (Current.containsResource(RR.first))
+ Current.releaseSubResource(RR.first);
+ }
+}
+
+ResourceStateEvent
+ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
+ ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
+ for (uint64_t Buffer : Buffers) {
+ ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+ Result = RS.isBufferAvailable();
+ if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
+ break;
+ }
+ return Result;
+}
+
+void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
+ for (const uint64_t Buffer : Buffers) {
+ ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+ assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE);
+ RS.reserveBuffer();
+
+ if (RS.isADispatchHazard()) {
+ assert(!RS.isReserved());
+ RS.setReserved();
+ }
+ }
+}
+
+void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
+ for (const uint64_t R : Buffers)
+ Resources[getResourceStateIndex(R)]->releaseBuffer();
+}
+
+bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
+ return all_of(
+ Desc.Resources, [&](const std::pair<uint64_t, const ResourceUsage> &E) {
+ unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
+ unsigned Index = getResourceStateIndex(E.first);
+ return Resources[Index]->isReady(NumUnits);
+ });
+}
+
+void ResourceManager::issueInstruction(
+ const InstrDesc &Desc,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) {
+ for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
+ const CycleSegment &CS = R.second.CS;
+ if (!CS.size()) {
+ releaseResource(R.first);
+ continue;
+ }
+
+ assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
+ if (!R.second.isReserved()) {
+ ResourceRef Pipe = selectPipe(R.first);
+ use(Pipe);
+ BusyResources[Pipe] += CS.size();
+ Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
+ Pipe, ResourceCycles(CS.size())));
+ } else {
+ assert((countPopulation(R.first) > 1) && "Expected a group!");
+ // Mark this group as reserved.
+ assert(R.second.isReserved());
+ reserveResource(R.first);
+ BusyResources[ResourceRef(R.first, R.first)] += CS.size();
+ }
+ }
+}
+
+void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
+ for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
+ if (BR.second)
+ BR.second--;
+ if (!BR.second) {
+ // Release this resource.
+ const ResourceRef &RR = BR.first;
+
+ if (countPopulation(RR.first) == 1)
+ release(RR);
+
+ releaseResource(RR.first);
+ ResourcesFreed.push_back(RR);
+ }
+ }
+
+ for (const ResourceRef &RF : ResourcesFreed)
+ BusyResources.erase(RF);
+}
+
+void ResourceManager::reserveResource(uint64_t ResourceID) {
+ ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+ assert(!Resource.isReserved());
+ Resource.setReserved();
+}
+
+void ResourceManager::releaseResource(uint64_t ResourceID) {
+ ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+ Resource.clearReserved();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnit.cpp b/contrib/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
index 123058541f28..de9f24552c38 100644
--- a/contrib/llvm/tools/llvm-mca/RetireControlUnit.cpp
+++ b/contrib/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -12,16 +12,15 @@
///
//===----------------------------------------------------------------------===//
-#include "RetireControlUnit.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
#include "llvm/Support/Debug.h"
-using namespace llvm;
-
#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
namespace mca {
-RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM)
+RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
: NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
// Check if the scheduling model provides extra information about the machine
@@ -41,10 +40,10 @@ RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM)
// Reserves a number of slots, and returns a new token.
unsigned RetireControlUnit::reserveSlot(const InstRef &IR,
unsigned NumMicroOps) {
- assert(isAvailable(NumMicroOps));
+ assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!");
unsigned NormalizedQuantity =
std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
- // Zero latency instructions may have zero mOps. Artificially bump this
+ // Zero latency instructions may have zero uOps. Artificially bump this
// value to 1. Although zero latency instructions don't consume scheduler
// resources, they still consume one slot in the retire queue.
NormalizedQuantity = std::max(NormalizedQuantity, 1U);
@@ -61,9 +60,10 @@ const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const {
}
void RetireControlUnit::consumeCurrentToken() {
- const RetireControlUnit::RUToken &Current = peekCurrentToken();
+ RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx];
assert(Current.NumSlots && "Reserved zero slots?");
- assert(Current.IR.isValid() && "Invalid RUToken in the RCU queue.");
+ assert(Current.IR && "Invalid RUToken in the RCU queue.");
+ Current.IR.getInstruction()->retire();
// Update the slot index to be the next item in the circular queue.
CurrentInstructionSlotIdx += Current.NumSlots;
@@ -73,7 +73,7 @@ void RetireControlUnit::consumeCurrentToken() {
void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
assert(Queue.size() > TokenID);
- assert(Queue[TokenID].Executed == false && Queue[TokenID].IR.isValid());
+ assert(Queue[TokenID].Executed == false && Queue[TokenID].IR);
Queue[TokenID].Executed = true;
}
@@ -85,3 +85,4 @@ void RetireControlUnit::dump() const {
#endif
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/contrib/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
new file mode 100644
index 000000000000..355ef79d06a6
--- /dev/null
+++ b/contrib/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -0,0 +1,247 @@
+//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A scheduler for processor resource units and processor resource groups.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) {
+ // Ensure we have a valid (non-null) strategy object.
+ Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>();
+}
+
+// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy.
+SchedulerStrategy::~SchedulerStrategy() = default;
+DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default;
+
+#ifndef NDEBUG
+void Scheduler::dump() const {
+ dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
+ dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
+ dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
+ Resources->dump();
+}
+#endif
+
+Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+
+ switch (Resources->canBeDispatched(Desc.Buffers)) {
+ case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
+ return Scheduler::SC_BUFFERS_FULL;
+ case ResourceStateEvent::RS_RESERVED:
+ return Scheduler::SC_DISPATCH_GROUP_STALL;
+ case ResourceStateEvent::RS_BUFFER_AVAILABLE:
+ break;
+ }
+
+ // Give lower priority to LSUnit stall events.
+ switch (LSU.isAvailable(IR)) {
+ case LSUnit::LSU_LQUEUE_FULL:
+ return Scheduler::SC_LOAD_QUEUE_FULL;
+ case LSUnit::LSU_SQUEUE_FULL:
+ return Scheduler::SC_STORE_QUEUE_FULL;
+ case LSUnit::LSU_AVAILABLE:
+ return Scheduler::SC_AVAILABLE;
+ }
+
+ llvm_unreachable("Don't know how to process this LSU state result!");
+}
+
+void Scheduler::issueInstructionImpl(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) {
+ Instruction *IS = IR.getInstruction();
+ const InstrDesc &D = IS->getDesc();
+
+ // Issue the instruction and collect all the consumed resources
+ // into a vector. That vector is then used to notify the listener.
+ Resources->issueInstruction(D, UsedResources);
+
+ // Notify the instruction that it started executing.
+ // This updates the internal state of each write.
+ IS->execute();
+
+ if (IS->isExecuting())
+ IssuedSet.emplace_back(IR);
+ else if (IS->isExecuted())
+ LSU.onInstructionExecuted(IR);
+}
+
+// Release the buffered resources and issue the instruction.
+void Scheduler::issueInstruction(
+ InstRef &IR,
+ SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
+ SmallVectorImpl<InstRef> &ReadyInstructions) {
+ const Instruction &Inst = *IR.getInstruction();
+ bool HasDependentUsers = Inst.hasDependentUsers();
+
+ Resources->releaseBuffers(Inst.getDesc().Buffers);
+ issueInstructionImpl(IR, UsedResources);
+ // Instructions that have been issued during this cycle might have unblocked
+ // other dependent instructions. Dependent instructions may be issued during
+ // this same cycle if operands have ReadAdvance entries. Promote those
+ // instructions to the ReadySet and notify the caller that those are ready.
+ if (HasDependentUsers)
+ promoteToReadySet(ReadyInstructions);
+}
+
+void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+ // Scan the set of waiting instructions and promote them to the
+ // ready queue if operands are all ready.
+ unsigned RemovedElements = 0;
+ for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
+ InstRef &IR = *I;
+ if (!IR)
+ break;
+
+ // Check if this instruction is now ready. In case, force
+ // a transition in state using method 'update()'.
+ Instruction &IS = *IR.getInstruction();
+ if (!IS.isReady())
+ IS.update();
+
+ // Check if there are still unsolved data dependencies.
+ if (!isReady(IR)) {
+ ++I;
+ continue;
+ }
+
+ Ready.emplace_back(IR);
+ ReadySet.emplace_back(IR);
+
+ IR.invalidate();
+ ++RemovedElements;
+ std::iter_swap(I, E - RemovedElements);
+ }
+
+ WaitSet.resize(WaitSet.size() - RemovedElements);
+}
+
+InstRef Scheduler::select() {
+ unsigned QueueIndex = ReadySet.size();
+ for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
+ const InstRef &IR = ReadySet[I];
+ if (QueueIndex == ReadySet.size() ||
+ Strategy->compare(IR, ReadySet[QueueIndex])) {
+ const InstrDesc &D = IR.getInstruction()->getDesc();
+ if (Resources->canBeIssued(D))
+ QueueIndex = I;
+ }
+ }
+
+ if (QueueIndex == ReadySet.size())
+ return InstRef();
+
+ // We found an instruction to issue.
+ InstRef IR = ReadySet[QueueIndex];
+ std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
+ ReadySet.pop_back();
+ return IR;
+}
+
+void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
+ unsigned RemovedElements = 0;
+ for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) {
+ InstRef &IR = *I;
+ if (!IR)
+ break;
+ Instruction &IS = *IR.getInstruction();
+ if (!IS.isExecuted()) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+ << " is still executing.\n");
+ ++I;
+ continue;
+ }
+
+ // Instruction IR has completed execution.
+ LSU.onInstructionExecuted(IR);
+ Executed.emplace_back(IR);
+ ++RemovedElements;
+ IR.invalidate();
+ std::iter_swap(I, E - RemovedElements);
+ }
+
+ IssuedSet.resize(IssuedSet.size() - RemovedElements);
+}
+
+void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+ SmallVectorImpl<InstRef> &Executed,
+ SmallVectorImpl<InstRef> &Ready) {
+ // Release consumed resources.
+ Resources->cycleEvent(Freed);
+
+ // Propagate the cycle event to the 'Issued' and 'Wait' sets.
+ for (InstRef &IR : IssuedSet)
+ IR.getInstruction()->cycleEvent();
+
+ updateIssuedSet(Executed);
+
+ for (InstRef &IR : WaitSet)
+ IR.getInstruction()->cycleEvent();
+
+ promoteToReadySet(Ready);
+}
+
+bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ if (Desc.isZeroLatency())
+ return true;
+ // Instructions that use an in-order dispatch/issue processor resource must be
+ // issued immediately to the pipeline(s). Any other in-order buffered
+ // resources (i.e. BufferSize=1) is consumed.
+ return Desc.MustIssueImmediately;
+}
+
+void Scheduler::dispatch(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ Resources->reserveBuffers(Desc.Buffers);
+
+ // If necessary, reserve queue entries in the load-store unit (LSU).
+ bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+ if (IsMemOp)
+ LSU.dispatch(IR);
+
+ if (!isReady(IR)) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
+ WaitSet.push_back(IR);
+ return;
+ }
+
+ // Don't add a zero-latency instruction to the Ready queue.
+ // A zero-latency instruction doesn't consume any scheduler resources. That is
+ // because it doesn't need to be executed, and it is often removed at register
+ // renaming stage. For example, register-register moves are often optimized at
+ // register renaming stage by simply updating register aliases. On some
+ // targets, zero-idiom instructions (for example: a xor that clears the value
+ // of a register) are treated specially, and are often eliminated at register
+ // renaming stage.
+ if (!mustIssueImmediately(IR)) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
+ ReadySet.push_back(IR);
+ }
+}
+
+bool Scheduler::isReady(const InstRef &IR) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+ return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR));
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/InstrBuilder.cpp b/contrib/llvm/lib/MCA/InstrBuilder.cpp
index 053b7b4e8175..d2d65e55537c 100644
--- a/contrib/llvm/tools/llvm-mca/InstrBuilder.cpp
+++ b/contrib/llvm/lib/MCA/InstrBuilder.cpp
@@ -12,7 +12,7 @@
///
//===----------------------------------------------------------------------===//
-#include "InstrBuilder.h"
+#include "llvm/MCA/InstrBuilder.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCInst.h"
@@ -22,9 +22,19 @@
#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
namespace mca {
-using namespace llvm;
+InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
+ const llvm::MCInstrInfo &mcii,
+ const llvm::MCRegisterInfo &mri,
+ const llvm::MCInstrAnalysis *mcia)
+ : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
+ FirstReturnInst(true) {
+ const MCSchedModel &SM = STI.getSchedModel();
+ ProcResourceMasks.resize(SM.getNumProcResourceKinds());
+ computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
+}
static void initializeUsedResources(InstrDesc &ID,
const MCSchedClassDesc &SCDesc,
@@ -48,12 +58,23 @@ static void initializeUsedResources(InstrDesc &ID,
// part of a "Super" resource. The key value is the "Super" resource mask ID.
DenseMap<uint64_t, unsigned> SuperResources;
+ unsigned NumProcResources = SM.getNumProcResourceKinds();
+ APInt Buffers(NumProcResources, 0);
+
+ bool AllInOrderResources = true;
+ bool AnyDispatchHazards = false;
for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
- if (PR.BufferSize != -1)
- ID.Buffers.push_back(Mask);
+ if (PR.BufferSize < 0) {
+ AllInOrderResources = false;
+ } else {
+ Buffers.setBit(PRE->ProcResourceIdx);
+ AnyDispatchHazards |= (PR.BufferSize == 0);
+ AllInOrderResources &= (PR.BufferSize <= 1);
+ }
+
CycleSegment RCy(0, PRE->Cycles, false);
Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
if (PR.SuperIdx) {
@@ -62,18 +83,19 @@ static void initializeUsedResources(InstrDesc &ID,
}
}
+ ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
+
// Sort elements by mask popcount, so that we prioritize resource units over
// resource groups, and smaller groups over larger groups.
- llvm::sort(Worklist.begin(), Worklist.end(),
- [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
- unsigned popcntA = countPopulation(A.first);
- unsigned popcntB = countPopulation(B.first);
- if (popcntA < popcntB)
- return true;
- if (popcntA > popcntB)
- return false;
- return A.first < B.first;
- });
+ sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
+ unsigned popcntA = countPopulation(A.first);
+ unsigned popcntB = countPopulation(B.first);
+ if (popcntA < popcntB)
+ return true;
+ if (popcntA > popcntB)
+ return false;
+ return A.first < B.first;
+ });
uint64_t UsedResourceUnits = 0;
@@ -99,7 +121,7 @@ static void initializeUsedResources(InstrDesc &ID,
for (unsigned J = I + 1; J < E; ++J) {
ResourcePlusCycles &B = Worklist[J];
if ((NormalizedMask & B.first) == NormalizedMask) {
- B.second.CS.Subtract(A.second.size() - SuperResources[A.first]);
+ B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
if (countPopulation(B.first) > 1)
B.second.NumUnits++;
}
@@ -132,11 +154,36 @@ static void initializeUsedResources(InstrDesc &ID,
}
}
+ // Identify extra buffers that are consumed through super resources.
+ for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
+ for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
+ const MCProcResourceDesc &PR = *SM.getProcResource(I);
+ if (PR.BufferSize == -1)
+ continue;
+
+ uint64_t Mask = ProcResourceMasks[I];
+ if (Mask != SR.first && ((Mask & SR.first) == SR.first))
+ Buffers.setBit(I);
+ }
+ }
+
+ // Now set the buffers.
+ if (unsigned NumBuffers = Buffers.countPopulation()) {
+ ID.Buffers.resize(NumBuffers);
+ for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) {
+ if (Buffers[I]) {
+ --NumBuffers;
+ ID.Buffers[NumBuffers] = ProcResourceMasks[I];
+ }
+ }
+ }
+
LLVM_DEBUG({
for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
- dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
+ dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", "
+ << "cy=" << R.second.size() << '\n';
for (const uint64_t R : ID.Buffers)
- dbgs() << "\t\tBuffer Mask=" << R << '\n';
+ dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
});
}
@@ -155,33 +202,92 @@ static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
}
+static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
+ // Count register definitions, and skip non register operands in the process.
+ unsigned I, E;
+ unsigned NumExplicitDefs = MCDesc.getNumDefs();
+ for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
+ const MCOperand &Op = MCI.getOperand(I);
+ if (Op.isReg())
+ --NumExplicitDefs;
+ }
+
+ if (NumExplicitDefs) {
+ return make_error<InstructionError<MCInst>>(
+ "Expected more register operand definitions.", MCI);
+ }
+
+ if (MCDesc.hasOptionalDef()) {
+ // Always assume that the optional definition is the last operand.
+ const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
+ if (I == MCI.getNumOperands() || !Op.isReg()) {
+ std::string Message =
+ "expected a register operand for an optional definition. Instruction "
+ "has not been correctly analyzed.";
+ return make_error<InstructionError<MCInst>>(Message, MCI);
+ }
+ }
+
+ return ErrorSuccess();
+}
+
void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
unsigned SchedClassID) {
const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
const MCSchedModel &SM = STI.getSchedModel();
const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
- // These are for now the (strong) assumptions made by this algorithm:
- // * The number of explicit and implicit register definitions in a MCInst
- // matches the number of explicit and implicit definitions according to
- // the opcode descriptor (MCInstrDesc).
- // * Register definitions take precedence over register uses in the operands
- // list.
- // * If an opcode specifies an optional definition, then the optional
- // definition is always the last operand in the sequence, and it can be
- // set to zero (i.e. "no register").
+ // Assumptions made by this algorithm:
+ // 1. The number of explicit and implicit register definitions in a MCInst
+ // matches the number of explicit and implicit definitions according to
+ // the opcode descriptor (MCInstrDesc).
+ // 2. Uses start at index #(MCDesc.getNumDefs()).
+ // 3. There can only be a single optional register definition, an it is
+ // always the last operand of the sequence (excluding extra operands
+ // contributed by variadic opcodes).
//
// These assumptions work quite well for most out-of-order in-tree targets
// like x86. This is mainly because the vast majority of instructions is
// expanded to MCInst using a straightforward lowering logic that preserves
// the ordering of the operands.
+ //
+ // About assumption 1.
+ // The algorithm allows non-register operands between register operand
+ // definitions. This helps to handle some special ARM instructions with
+ // implicit operand increment (-mtriple=armv7):
+ //
+ // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
+ // @ <MCOperand Reg:59>
+ // @ <MCOperand Imm:0> (!!)
+ // @ <MCOperand Reg:67>
+ // @ <MCOperand Imm:0>
+ // @ <MCOperand Imm:14>
+ // @ <MCOperand Reg:0>>
+ //
+ // MCDesc reports:
+ // 6 explicit operands.
+ // 1 optional definition
+ // 2 explicit definitions (!!)
+ //
+ // The presence of an 'Imm' operand between the two register definitions
+ // breaks the assumption that "register definitions are always at the
+ // beginning of the operand sequence".
+ //
+ // To workaround this issue, this algorithm ignores (i.e. skips) any
+ // non-register operands between register definitions. The optional
+ // definition is still at index #(NumOperands-1).
+ //
+ // According to assumption 2. register reads start at #(NumExplicitDefs-1).
+ // That means, register R1 from the example is both read and written.
unsigned NumExplicitDefs = MCDesc.getNumDefs();
unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
if (MCDesc.hasOptionalDef())
TotalDefs++;
- ID.Writes.resize(TotalDefs);
+
+ unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+ ID.Writes.resize(TotalDefs + NumVariadicOps);
// Iterate over the operands list, and skip non-register operands.
// The first NumExplictDefs register operands are expected to be register
// definitions.
@@ -208,18 +314,15 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
}
Write.IsOptionalDef = false;
LLVM_DEBUG({
- dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+ dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
<< ", Latency=" << Write.Latency
<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
});
CurrentDef++;
}
- if (CurrentDef != NumExplicitDefs)
- llvm::report_fatal_error(
- "error: Expected more register operand definitions. ");
-
- CurrentDef = 0;
+ assert(CurrentDef == NumExplicitDefs &&
+ "Expected more register operand definitions.");
for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
unsigned Index = NumExplicitDefs + CurrentDef;
WriteDescriptor &Write = ID.Writes[Index];
@@ -241,7 +344,7 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
Write.IsOptionalDef = false;
assert(Write.RegisterID != 0 && "Expected a valid phys register!");
LLVM_DEBUG({
- dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+ dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
<< ", PhysReg=" << MRI.getName(Write.RegisterID)
<< ", Latency=" << Write.Latency
<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
@@ -249,73 +352,149 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
}
if (MCDesc.hasOptionalDef()) {
- // Always assume that the optional definition is the last operand of the
- // MCInst sequence.
- const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
- if (i == MCI.getNumOperands() || !Op.isReg())
- llvm::report_fatal_error(
- "error: expected a register operand for an optional "
- "definition. Instruction has not be correctly analyzed.\n",
- false);
-
- WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
- Write.OpIndex = MCI.getNumOperands() - 1;
+ WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
+ Write.OpIndex = MCDesc.getNumOperands() - 1;
// Assign a default latency for this write.
Write.Latency = ID.MaxLatency;
Write.SClassOrWriteResourceID = 0;
Write.IsOptionalDef = true;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
}
+
+ if (!NumVariadicOps)
+ return;
+
+ // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
+ // "unmodeledSideEffects', then this logic optimistically assumes that any
+ // extra register operands in the variadic sequence is not a register
+ // definition.
+ //
+ // Otherwise, we conservatively assume that any register operand from the
+ // variadic sequence is both a register read and a register write.
+ bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
+ !MCDesc.hasUnmodeledSideEffects();
+ CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
+ for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+ I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
+
+ WriteDescriptor &Write = ID.Writes[CurrentDef];
+ Write.OpIndex = OpIndex;
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ Write.IsOptionalDef = false;
+ ++CurrentDef;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ }
+
+ ID.Writes.resize(CurrentDef);
}
void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
unsigned SchedClassID) {
const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
- unsigned NumExplicitDefs = MCDesc.getNumDefs();
+ unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
+ unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
+ // Remove the optional definition.
+ if (MCDesc.hasOptionalDef())
+ --NumExplicitUses;
+ unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+ unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
+ ID.Reads.resize(TotalUses);
+ unsigned CurrentUse = 0;
+ for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
+ ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
- // Skip explicit definitions.
- unsigned i = 0;
- for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
- const MCOperand &Op = MCI.getOperand(i);
- if (Op.isReg())
- NumExplicitDefs--;
+ ReadDescriptor &Read = ID.Reads[CurrentUse];
+ Read.OpIndex = OpIndex;
+ Read.UseIndex = I;
+ Read.SchedClassID = SchedClassID;
+ ++CurrentUse;
+ LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << '\n');
}
- if (NumExplicitDefs)
- llvm::report_fatal_error(
- "error: Expected more register operand definitions. ", false);
-
- unsigned NumExplicitUses = MCI.getNumOperands() - i;
- unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
- if (MCDesc.hasOptionalDef()) {
- assert(NumExplicitUses);
- NumExplicitUses--;
+ // For the purpose of ReadAdvance, implicit uses come directly after explicit
+ // uses. The "UseIndex" must be updated according to that implicit layout.
+ for (unsigned I = 0; I < NumImplicitUses; ++I) {
+ ReadDescriptor &Read = ID.Reads[CurrentUse + I];
+ Read.OpIndex = ~I;
+ Read.UseIndex = NumExplicitUses + I;
+ Read.RegisterID = MCDesc.getImplicitUses()[I];
+ Read.SchedClassID = SchedClassID;
+ LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << ", RegisterID="
+ << MRI.getName(Read.RegisterID) << '\n');
}
- unsigned TotalUses = NumExplicitUses + NumImplicitUses;
- if (!TotalUses)
- return;
- ID.Reads.resize(TotalUses);
- for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
+ CurrentUse += NumImplicitUses;
+
+ // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
+ // "unmodeledSideEffects", then this logic optimistically assumes that any
+ // extra register operands in the variadic sequence are not register
+ // definition.
+
+ bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
+ !MCDesc.hasUnmodeledSideEffects();
+ for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+ I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
+
ReadDescriptor &Read = ID.Reads[CurrentUse];
- Read.OpIndex = i + CurrentUse;
- Read.UseIndex = CurrentUse;
+ Read.OpIndex = OpIndex;
+ Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
Read.SchedClassID = SchedClassID;
- LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
+ ++CurrentUse;
+ LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
<< ", UseIndex=" << Read.UseIndex << '\n');
}
- for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
- ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
- Read.OpIndex = ~CurrentUse;
- Read.UseIndex = NumExplicitUses + CurrentUse;
- Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
- Read.SchedClassID = SchedClassID;
- LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
- << MRI.getName(Read.RegisterID) << '\n');
+ ID.Reads.resize(CurrentUse);
+}
+
+Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
+ const MCInst &MCI) const {
+ if (ID.NumMicroOps != 0)
+ return ErrorSuccess();
+
+ bool UsesMemory = ID.MayLoad || ID.MayStore;
+ bool UsesBuffers = !ID.Buffers.empty();
+ bool UsesResources = !ID.Resources.empty();
+ if (!UsesMemory && !UsesBuffers && !UsesResources)
+ return ErrorSuccess();
+
+ StringRef Message;
+ if (UsesMemory) {
+ Message = "found an inconsistent instruction that decodes "
+ "into zero opcodes and that consumes load/store "
+ "unit resources.";
+ } else {
+ Message = "found an inconsistent instruction that decodes "
+ "to zero opcodes and that consumes scheduler "
+ "resources.";
}
+
+ return make_error<InstructionError<MCInst>>(Message, MCI);
}
-const InstrDesc &InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
+Expected<const InstrDesc &>
+InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
assert(STI.getSchedModel().hasInstrSchedModel() &&
"Itineraries are not yet supported!");
@@ -326,64 +505,76 @@ const InstrDesc &InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
// Then obtain the scheduling class information from the instruction.
unsigned SchedClassID = MCDesc.getSchedClass();
- unsigned CPUID = SM.getProcessorID();
+ bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
// Try to solve variant scheduling classes.
- if (SchedClassID) {
+ if (IsVariant) {
+ unsigned CPUID = SM.getProcessorID();
while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
- if (!SchedClassID)
- llvm::report_fatal_error("unable to resolve this variant class.");
+ if (!SchedClassID) {
+ return make_error<InstructionError<MCInst>>(
+ "unable to resolve scheduling class for write variant.", MCI);
+ }
}
- // Check if this instruction is supported. Otherwise, report a fatal error.
+ // Check if this instruction is supported. Otherwise, report an error.
const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
- std::string ToString;
- llvm::raw_string_ostream OS(ToString);
- WithColor::error() << "found an unsupported instruction in the input"
- << " assembly sequence.\n";
- MCIP.printInst(&MCI, OS, "", STI);
- OS.flush();
-
- WithColor::note() << "instruction: " << ToString << '\n';
- llvm::report_fatal_error(
- "Don't know how to analyze unsupported instructions.");
+ return make_error<InstructionError<MCInst>>(
+ "found an unsupported instruction in the input assembly sequence.",
+ MCI);
}
+ LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
+
// Create a new empty descriptor.
std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
ID->NumMicroOps = SCDesc.NumMicroOps;
- if (MCDesc.isCall()) {
+ if (MCDesc.isCall() && FirstCallInst) {
// We don't correctly model calls.
WithColor::warning() << "found a call in the input assembly sequence.\n";
WithColor::note() << "call instructions are not correctly modeled. "
<< "Assume a latency of 100cy.\n";
+ FirstCallInst = false;
}
- if (MCDesc.isReturn()) {
+ if (MCDesc.isReturn() && FirstReturnInst) {
WithColor::warning() << "found a return instruction in the input"
<< " assembly sequence.\n";
WithColor::note() << "program counter updates are ignored.\n";
+ FirstReturnInst = false;
}
ID->MayLoad = MCDesc.mayLoad();
ID->MayStore = MCDesc.mayStore();
ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
+ ID->BeginGroup = SCDesc.BeginGroup;
+ ID->EndGroup = SCDesc.EndGroup;
initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
computeMaxLatency(*ID, MCDesc, SCDesc, STI);
+
+ if (Error Err = verifyOperands(MCDesc, MCI))
+ return std::move(Err);
+
populateWrites(*ID, MCI, SchedClassID);
populateReads(*ID, MCI, SchedClassID);
LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
+ // Sanity check on the instruction descriptor.
+ if (Error Err = verifyInstrDesc(*ID, MCI))
+ return std::move(Err);
+
// Now add the new descriptor.
SchedClassID = MCDesc.getSchedClass();
- if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) {
+ bool IsVariadic = MCDesc.isVariadic();
+ if (!IsVariadic && !IsVariant) {
Descriptors[MCI.getOpcode()] = std::move(ID);
return *Descriptors[MCI.getOpcode()];
}
@@ -392,7 +583,8 @@ const InstrDesc &InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
return *VariantDescriptors[&MCI];
}
-const InstrDesc &InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
+Expected<const InstrDesc &>
+InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
return *Descriptors[MCI.getOpcode()];
@@ -402,11 +594,28 @@ const InstrDesc &InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
return createInstrDescImpl(MCI);
}
-std::unique_ptr<Instruction>
+Expected<std::unique_ptr<Instruction>>
InstrBuilder::createInstruction(const MCInst &MCI) {
- const InstrDesc &D = getOrCreateInstrDesc(MCI);
+ Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
+ if (!DescOrErr)
+ return DescOrErr.takeError();
+ const InstrDesc &D = *DescOrErr;
std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
+ // Check if this is a dependency breaking instruction.
+ APInt Mask;
+
+ bool IsZeroIdiom = false;
+ bool IsDepBreaking = false;
+ if (MCIA) {
+ unsigned ProcID = STI.getSchedModel().getProcessorID();
+ IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
+ IsDepBreaking =
+ IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
+ if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
+ NewIS->setOptimizableMove();
+ }
+
// Initialize Reads first.
for (const ReadDescriptor &RD : D.Reads) {
int RegID = -1;
@@ -428,12 +637,33 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
// Okay, this is a register operand. Create a ReadState for it.
assert(RegID > 0 && "Invalid register ID found!");
- NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID));
+ NewIS->getUses().emplace_back(RD, RegID);
+ ReadState &RS = NewIS->getUses().back();
+
+ if (IsDepBreaking) {
+ // A mask of all zeroes means: explicit input operands are not
+ // independent.
+ if (Mask.isNullValue()) {
+ if (!RD.isImplicitRead())
+ RS.setIndependentFromDef();
+ } else {
+ // Check if this register operand is independent according to `Mask`.
+ // Note that Mask may not have enough bits to describe all explicit and
+ // implicit input operands. If this register operand doesn't have a
+ // corresponding bit in Mask, then conservatively assume that it is
+ // dependent.
+ if (Mask.getBitWidth() > RD.UseIndex) {
+ // Okay. This map describe register use `RD.UseIndex`.
+ if (Mask[RD.UseIndex])
+ RS.setIndependentFromDef();
+ }
+ }
+ }
}
// Early exit if there are no writes.
if (D.Writes.empty())
- return NewIS;
+ return std::move(NewIS);
// Track register writes that implicitly clear the upper portion of the
// underlying super-registers using an APInt.
@@ -441,11 +671,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
// Now query the MCInstrAnalysis object to obtain information about which
// register writes implicitly clear the upper portion of a super-register.
- MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
-
- // Check if this is a dependency breaking instruction.
- if (MCIA.isDependencyBreaking(STI, MCI))
- NewIS->setDependencyBreaking();
+ if (MCIA)
+ MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
// Initialize writes.
unsigned WriteIndex = 0;
@@ -459,11 +686,13 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
}
assert(RegID && "Expected a valid register ID!");
- NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(
- WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex]));
+ NewIS->getDefs().emplace_back(WD, RegID,
+ /* ClearsSuperRegs */ WriteMask[WriteIndex],
+ /* WritesZero */ IsZeroIdiom);
++WriteIndex;
}
- return NewIS;
+ return std::move(NewIS);
}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/Instruction.cpp b/contrib/llvm/lib/MCA/Instruction.cpp
index 0c8476705572..057e95ca9990 100644
--- a/contrib/llvm/tools/llvm-mca/Instruction.cpp
+++ b/contrib/llvm/lib/MCA/Instruction.cpp
@@ -12,14 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#include "Instruction.h"
+#include "llvm/MCA/Instruction.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+namespace llvm {
namespace mca {
-using namespace llvm;
-
void ReadState::writeStartEvent(unsigned Cycles) {
assert(DependentWrites);
assert(CyclesLeft == UNKNOWN_CYCLES);
@@ -50,6 +49,10 @@ void WriteState::onInstructionIssued() {
unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
RS->writeStartEvent(ReadCycles);
}
+
+ // Notify any writes that are in a false dependency with this write.
+ if (PartialWrite)
+ PartialWrite->writeStartEvent(CyclesLeft);
}
void WriteState::addUser(ReadState *User, int ReadAdvance) {
@@ -62,8 +65,22 @@ void WriteState::addUser(ReadState *User, int ReadAdvance) {
return;
}
- std::pair<ReadState *, int> NewPair(User, ReadAdvance);
- Users.insert(NewPair);
+ if (llvm::find_if(Users, [&User](const std::pair<ReadState *, int> &Use) {
+ return Use.first == User;
+ }) == Users.end()) {
+ Users.emplace_back(User, ReadAdvance);
+ }
+}
+
+void WriteState::addUser(WriteState *User) {
+ if (CyclesLeft != UNKNOWN_CYCLES) {
+ User->writeStartEvent(std::max(0, CyclesLeft));
+ return;
+ }
+
+ assert(!PartialWrite && "PartialWrite already set!");
+ PartialWrite = User;
+ User->setDependentWrite(this);
}
void WriteState::cycleEvent() {
@@ -72,6 +89,9 @@ void WriteState::cycleEvent() {
// specify a negative ReadAdvance.
if (CyclesLeft != UNKNOWN_CYCLES)
CyclesLeft--;
+
+ if (DependentWriteCyclesLeft)
+ DependentWriteCyclesLeft--;
}
void ReadState::cycleEvent() {
@@ -93,7 +113,7 @@ void ReadState::cycleEvent() {
#ifndef NDEBUG
void WriteState::dump() const {
- dbgs() << "{ OpIdx=" << WD.OpIndex << ", Lat=" << getLatency() << ", RegID "
+ dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID "
<< getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }";
}
@@ -120,34 +140,38 @@ void Instruction::execute() {
Stage = IS_EXECUTING;
// Set the cycles left before the write-back stage.
- CyclesLeft = Desc.MaxLatency;
+ CyclesLeft = getLatency();
- for (UniqueDef &Def : Defs)
- Def->onInstructionIssued();
+ for (WriteState &WS : getDefs())
+ WS.onInstructionIssued();
// Transition to the "executed" stage if this is a zero-latency instruction.
if (!CyclesLeft)
Stage = IS_EXECUTED;
}
+void Instruction::forceExecuted() {
+ assert(Stage == IS_READY && "Invalid internal state!");
+ CyclesLeft = 0;
+ Stage = IS_EXECUTED;
+}
+
void Instruction::update() {
assert(isDispatched() && "Unexpected instruction stage found!");
- if (!llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); }))
+ if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); }))
return;
// A partial register write cannot complete before a dependent write.
- auto IsDefReady = [&](const UniqueDef &Def) {
- if (const WriteState *Write = Def->getDependentWrite()) {
- int WriteLatency = Write->getCyclesLeft();
- if (WriteLatency == UNKNOWN_CYCLES)
- return false;
- return static_cast<unsigned>(WriteLatency) < Desc.MaxLatency;
+ auto IsDefReady = [&](const WriteState &Def) {
+ if (!Def.getDependentWrite()) {
+ unsigned CyclesLeft = Def.getDependentWriteCyclesLeft();
+ return !CyclesLeft || CyclesLeft < getLatency();
}
- return true;
+ return false;
};
- if (llvm::all_of(Defs, IsDefReady))
+ if (all_of(getDefs(), IsDefReady))
Stage = IS_READY;
}
@@ -156,8 +180,11 @@ void Instruction::cycleEvent() {
return;
if (isDispatched()) {
- for (UniqueUse &Use : Uses)
- Use->cycleEvent();
+ for (ReadState &Use : getUses())
+ Use.cycleEvent();
+
+ for (WriteState &Def : getDefs())
+ Def.cycleEvent();
update();
return;
@@ -165,8 +192,8 @@ void Instruction::cycleEvent() {
assert(isExecuting() && "Instruction not in-flight?");
assert(CyclesLeft && "Instruction already executed?");
- for (UniqueDef &Def : Defs)
- Def->cycleEvent();
+ for (WriteState &Def : getDefs())
+ Def.cycleEvent();
CyclesLeft--;
if (!CyclesLeft)
Stage = IS_EXECUTED;
@@ -175,3 +202,4 @@ void Instruction::cycleEvent() {
const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/Pipeline.cpp b/contrib/llvm/lib/MCA/Pipeline.cpp
new file mode 100644
index 000000000000..4c0e37c9ba7e
--- /dev/null
+++ b/contrib/llvm/lib/MCA/Pipeline.cpp
@@ -0,0 +1,97 @@
+//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements an ordered container of stages that simulate the
+/// pipeline of a hardware backend.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Pipeline::addEventListener(HWEventListener *Listener) {
+ if (Listener)
+ Listeners.insert(Listener);
+ for (auto &S : Stages)
+ S->addListener(Listener);
+}
+
+bool Pipeline::hasWorkToProcess() {
+ return any_of(Stages, [](const std::unique_ptr<Stage> &S) {
+ return S->hasWorkToComplete();
+ });
+}
+
+Expected<unsigned> Pipeline::run() {
+ assert(!Stages.empty() && "Unexpected empty pipeline found!");
+
+ do {
+ notifyCycleBegin();
+ if (Error Err = runCycle())
+ return std::move(Err);
+ notifyCycleEnd();
+ ++Cycles;
+ } while (hasWorkToProcess());
+
+ return Cycles;
+}
+
+Error Pipeline::runCycle() {
+ Error Err = ErrorSuccess();
+ // Update stages before we start processing new instructions.
+ for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+ const std::unique_ptr<Stage> &S = *I;
+ Err = S->cycleStart();
+ }
+
+ // Now fetch and execute new instructions.
+ InstRef IR;
+ Stage &FirstStage = *Stages[0];
+ while (!Err && FirstStage.isAvailable(IR))
+ Err = FirstStage.execute(IR);
+
+ // Update stages in preparation for a new cycle.
+ for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+ const std::unique_ptr<Stage> &S = *I;
+ Err = S->cycleEnd();
+ }
+
+ return Err;
+}
+
+void Pipeline::appendStage(std::unique_ptr<Stage> S) {
+ assert(S && "Invalid null stage in input!");
+ if (!Stages.empty()) {
+ Stage *Last = Stages.back().get();
+ Last->setNextInSequence(S.get());
+ }
+
+ Stages.push_back(std::move(S));
+}
+
+void Pipeline::notifyCycleBegin() {
+ LLVM_DEBUG(dbgs() << "\n[E] Cycle begin: " << Cycles << '\n');
+ for (HWEventListener *Listener : Listeners)
+ Listener->onCycleBegin();
+}
+
+void Pipeline::notifyCycleEnd() {
+ LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n");
+ for (HWEventListener *Listener : Listeners)
+ Listener->onCycleEnd();
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/DispatchStage.cpp b/contrib/llvm/lib/MCA/Stages/DispatchStage.cpp
index 1f508886c298..7fb4eb6a1c0e 100644
--- a/contrib/llvm/tools/llvm-mca/DispatchStage.cpp
+++ b/contrib/llvm/lib/MCA/Stages/DispatchStage.cpp
@@ -16,28 +16,28 @@
///
//===----------------------------------------------------------------------===//
-#include "DispatchStage.h"
-#include "HWEventListener.h"
-#include "Scheduler.h"
+#include "llvm/MCA/Stages/DispatchStage.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
#include "llvm/Support/Debug.h"
-using namespace llvm;
-
#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
namespace mca {
void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
- ArrayRef<unsigned> UsedRegs) {
+ ArrayRef<unsigned> UsedRegs,
+ unsigned UOps) const {
LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n');
- notifyEvent<HWInstructionEvent>(HWInstructionDispatchedEvent(IR, UsedRegs));
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionDispatchedEvent(IR, UsedRegs, UOps));
}
-bool DispatchStage::checkPRF(const InstRef &IR) {
+bool DispatchStage::checkPRF(const InstRef &IR) const {
SmallVector<unsigned, 4> RegDefs;
- for (const std::unique_ptr<WriteState> &RegDef :
- IR.getInstruction()->getDefs())
- RegDefs.emplace_back(RegDef->getRegisterID());
+ for (const WriteState &RegDef : IR.getInstruction()->getDefs())
+ RegDefs.emplace_back(RegDef.getRegisterID());
const unsigned RegisterMask = PRF.isAvailable(RegDefs);
// A mask with all zeroes means: register files are available.
@@ -50,7 +50,7 @@ bool DispatchStage::checkPRF(const InstRef &IR) {
return true;
}
-bool DispatchStage::checkRCU(const InstRef &IR) {
+bool DispatchStage::checkRCU(const InstRef &IR) const {
const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
if (RCU.isAvailable(NumMicroOps))
return true;
@@ -59,20 +59,17 @@ bool DispatchStage::checkRCU(const InstRef &IR) {
return false;
}
-bool DispatchStage::checkScheduler(const InstRef &IR) {
- HWStallEvent::GenericEventType Event;
- const bool Ready = SC.canBeDispatched(IR, Event);
- if (!Ready)
- notifyEvent<HWStallEvent>(HWStallEvent(Event, IR));
- return Ready;
+bool DispatchStage::canDispatch(const InstRef &IR) const {
+ return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR);
}
void DispatchStage::updateRAWDependencies(ReadState &RS,
const MCSubtargetInfo &STI) {
SmallVector<WriteRef, 4> DependentWrites;
- collectWrites(DependentWrites, RS.getRegisterID());
- RS.setDependentWrites(DependentWrites.size());
+ // Collect all the dependent writes, and update RS internal state.
+ PRF.addRegisterRead(RS, DependentWrites);
+
// We know that this read depends on all the writes in DependentWrites.
// For each write, check if we have ReadAdvance information, and use it
// to figure out in how many cycles this read becomes available.
@@ -87,7 +84,7 @@ void DispatchStage::updateRAWDependencies(ReadState &RS,
}
}
-void DispatchStage::dispatch(InstRef IR) {
+Error DispatchStage::dispatch(InstRef IR) {
assert(!CarryOver && "Cannot dispatch another instruction!");
Instruction &IS = *IR.getInstruction();
const InstrDesc &Desc = IS.getDesc();
@@ -96,52 +93,94 @@ void DispatchStage::dispatch(InstRef IR) {
assert(AvailableEntries == DispatchWidth);
AvailableEntries = 0;
CarryOver = NumMicroOps - DispatchWidth;
+ CarriedOver = IR;
} else {
assert(AvailableEntries >= NumMicroOps);
AvailableEntries -= NumMicroOps;
}
+ // Check if this instructions ends the dispatch group.
+ if (Desc.EndGroup)
+ AvailableEntries = 0;
+
+ // Check if this is an optimizable reg-reg move.
+ bool IsEliminated = false;
+ if (IS.isOptimizableMove()) {
+ assert(IS.getDefs().size() == 1 && "Expected a single input!");
+ assert(IS.getUses().size() == 1 && "Expected a single output!");
+ IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]);
+ }
+
// A dependency-breaking instruction doesn't have to wait on the register
// input operands, and it is often optimized at register renaming stage.
// Update RAW dependencies if this instruction is not a dependency-breaking
// instruction. A dependency-breaking instruction is a zero-latency
// instruction that doesn't consume hardware resources.
// An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
- bool IsDependencyBreaking = IS.isDependencyBreaking();
- for (std::unique_ptr<ReadState> &RS : IS.getUses())
- if (RS->isImplicitRead() || !IsDependencyBreaking)
- updateRAWDependencies(*RS, STI);
-
- // By default, a dependency-breaking zero-latency instruction is expected to
- // be optimized at register renaming stage. That means, no physical register
- // is allocated to the instruction.
- bool ShouldAllocateRegisters =
- !(Desc.isZeroLatency() && IsDependencyBreaking);
- SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
- for (std::unique_ptr<WriteState> &WS : IS.getDefs()) {
- PRF.addRegisterWrite(WriteRef(IR.first, WS.get()), RegisterFiles,
- ShouldAllocateRegisters);
+ //
+ // We also don't update data dependencies for instructions that have been
+ // eliminated at register renaming stage.
+ if (!IsEliminated) {
+ for (ReadState &RS : IS.getUses())
+ updateRAWDependencies(RS, STI);
}
+ // By default, a dependency-breaking zero-idiom is expected to be optimized
+ // at register renaming stage. That means, no physical register is allocated
+ // to the instruction.
+ SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
+ for (WriteState &WS : IS.getDefs())
+ PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles);
+
// Reserve slots in the RCU, and notify the instruction that it has been
// dispatched to the schedulers for execution.
IS.dispatch(RCU.reserveSlot(IR, NumMicroOps));
- // Notify listeners of the "instruction dispatched" event.
- notifyInstructionDispatched(IR, RegisterFiles);
+ // Notify listeners of the "instruction dispatched" event,
+ // and move IR to the next stage.
+ notifyInstructionDispatched(IR, RegisterFiles,
+ std::min(DispatchWidth, NumMicroOps));
+ return moveToTheNextStage(IR);
}
-void DispatchStage::cycleStart() {
+Error DispatchStage::cycleStart() {
+ PRF.cycleStart();
+
+ if (!CarryOver) {
+ AvailableEntries = DispatchWidth;
+ return ErrorSuccess();
+ }
+
AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
- CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
+ unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries;
+ CarryOver -= DispatchedOpcodes;
+ assert(CarriedOver && "Invalid dispatched instruction");
+
+ SmallVector<unsigned, 8> RegisterFiles(PRF.getNumRegisterFiles(), 0U);
+ notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes);
+ if (!CarryOver)
+ CarriedOver = InstRef();
+ return ErrorSuccess();
}
-bool DispatchStage::execute(InstRef &IR) {
+bool DispatchStage::isAvailable(const InstRef &IR) const {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
- if (!isAvailable(Desc.NumMicroOps) || !canDispatch(IR))
+ unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth);
+ if (Required > AvailableEntries)
return false;
- dispatch(IR);
- return true;
+
+ if (Desc.BeginGroup && AvailableEntries != DispatchWidth)
+ return false;
+
+ // The dispatch logic doesn't internally buffer instructions. It only accepts
+ // instructions that can be successfully moved to the next stage during this
+ // same cycle.
+ return canDispatch(IR);
+}
+
+Error DispatchStage::execute(InstRef &IR) {
+ assert(canDispatch(IR) && "Cannot dispatch another instruction!");
+ return dispatch(IR);
}
#ifndef NDEBUG
@@ -151,3 +190,4 @@ void DispatchStage::dump() const {
}
#endif
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/Stages/EntryStage.cpp b/contrib/llvm/lib/MCA/Stages/EntryStage.cpp
new file mode 100644
index 000000000000..3325bb36f5af
--- /dev/null
+++ b/contrib/llvm/lib/MCA/Stages/EntryStage.cpp
@@ -0,0 +1,76 @@
+//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Fetch stage of an instruction pipeline. Its sole
+/// purpose in life is to produce instructions for the rest of the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Instruction.h"
+
+namespace llvm {
+namespace mca {
+
+bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; }
+
+bool EntryStage::isAvailable(const InstRef & /* unused */) const {
+ if (CurrentInstruction)
+ return checkNextStage(CurrentInstruction);
+ return false;
+}
+
+void EntryStage::getNextInstruction() {
+ assert(!CurrentInstruction && "There is already an instruction to process!");
+ if (!SM.hasNext())
+ return;
+ SourceRef SR = SM.peekNext();
+ std::unique_ptr<Instruction> Inst = llvm::make_unique<Instruction>(SR.second);
+ CurrentInstruction = InstRef(SR.first, Inst.get());
+ Instructions.emplace_back(std::move(Inst));
+ SM.updateNext();
+}
+
+llvm::Error EntryStage::execute(InstRef & /*unused */) {
+ assert(CurrentInstruction && "There is no instruction to process!");
+ if (llvm::Error Val = moveToTheNextStage(CurrentInstruction))
+ return Val;
+
+ // Move the program counter.
+ CurrentInstruction.invalidate();
+ getNextInstruction();
+ return llvm::ErrorSuccess();
+}
+
+llvm::Error EntryStage::cycleStart() {
+ if (!CurrentInstruction)
+ getNextInstruction();
+ return llvm::ErrorSuccess();
+}
+
+llvm::Error EntryStage::cycleEnd() {
+ // Find the first instruction which hasn't been retired.
+ auto Range = make_range(&Instructions[NumRetired], Instructions.end());
+ auto It = find_if(Range, [](const std::unique_ptr<Instruction> &I) {
+ return !I->isRetired();
+ });
+
+ NumRetired = std::distance(Instructions.begin(), It);
+ // Erase instructions up to the first that hasn't been retired.
+ if ((NumRetired * 2) >= Instructions.size()) {
+ Instructions.erase(Instructions.begin(), It);
+ NumRetired = 0;
+ }
+
+ return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/Stages/ExecuteStage.cpp b/contrib/llvm/lib/MCA/Stages/ExecuteStage.cpp
new file mode 100644
index 000000000000..e78327763fa1
--- /dev/null
+++ b/contrib/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -0,0 +1,225 @@
+//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of an instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) {
+ switch (Status) {
+ case Scheduler::SC_LOAD_QUEUE_FULL:
+ return HWStallEvent::LoadQueueFull;
+ case Scheduler::SC_STORE_QUEUE_FULL:
+ return HWStallEvent::StoreQueueFull;
+ case Scheduler::SC_BUFFERS_FULL:
+ return HWStallEvent::SchedulerQueueFull;
+ case Scheduler::SC_DISPATCH_GROUP_STALL:
+ return HWStallEvent::DispatchGroupStall;
+ case Scheduler::SC_AVAILABLE:
+ return HWStallEvent::Invalid;
+ }
+
+ llvm_unreachable("Don't know how to process this StallKind!");
+}
+
+bool ExecuteStage::isAvailable(const InstRef &IR) const {
+ if (Scheduler::Status S = HWS.isAvailable(IR)) {
+ HWStallEvent::GenericEventType ET = toHWStallEventType(S);
+ notifyEvent<HWStallEvent>(HWStallEvent(ET, IR));
+ return false;
+ }
+
+ return true;
+}
+
+Error ExecuteStage::issueInstruction(InstRef &IR) {
+ SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used;
+ SmallVector<InstRef, 4> Ready;
+ HWS.issueInstruction(IR, Used, Ready);
+
+ notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
+
+ notifyInstructionIssued(IR, Used);
+ if (IR.getInstruction()->isExecuted()) {
+ notifyInstructionExecuted(IR);
+ // FIXME: add a buffer of executed instructions.
+ if (Error S = moveToTheNextStage(IR))
+ return S;
+ }
+
+ for (const InstRef &I : Ready)
+ notifyInstructionReady(I);
+ return ErrorSuccess();
+}
+
+Error ExecuteStage::issueReadyInstructions() {
+ InstRef IR = HWS.select();
+ while (IR) {
+ if (Error Err = issueInstruction(IR))
+ return Err;
+
+ // Select the next instruction to issue.
+ IR = HWS.select();
+ }
+
+ return ErrorSuccess();
+}
+
+Error ExecuteStage::cycleStart() {
+ SmallVector<ResourceRef, 8> Freed;
+ SmallVector<InstRef, 4> Executed;
+ SmallVector<InstRef, 4> Ready;
+
+ HWS.cycleEvent(Freed, Executed, Ready);
+
+ for (const ResourceRef &RR : Freed)
+ notifyResourceAvailable(RR);
+
+ for (InstRef &IR : Executed) {
+ notifyInstructionExecuted(IR);
+ // FIXME: add a buffer of executed instructions.
+ if (Error S = moveToTheNextStage(IR))
+ return S;
+ }
+
+ for (const InstRef &IR : Ready)
+ notifyInstructionReady(IR);
+
+ return issueReadyInstructions();
+}
+
+#ifndef NDEBUG
+static void verifyInstructionEliminated(const InstRef &IR) {
+ const Instruction &Inst = *IR.getInstruction();
+ assert(Inst.isEliminated() && "Instruction was not eliminated!");
+ assert(Inst.isReady() && "Instruction in an inconsistent state!");
+
+ // Ensure that instructions eliminated at register renaming stage are in a
+ // consistent state.
+ const InstrDesc &Desc = Inst.getDesc();
+ assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!");
+}
+#endif
+
+Error ExecuteStage::handleInstructionEliminated(InstRef &IR) {
+#ifndef NDEBUG
+ verifyInstructionEliminated(IR);
+#endif
+ notifyInstructionReady(IR);
+ notifyInstructionIssued(IR, {});
+ IR.getInstruction()->forceExecuted();
+ notifyInstructionExecuted(IR);
+ return moveToTheNextStage(IR);
+}
+
+// Schedule the instruction for execution on the hardware.
+Error ExecuteStage::execute(InstRef &IR) {
+ assert(isAvailable(IR) && "Scheduler is not available!");
+
+#ifndef NDEBUG
+ // Ensure that the HWS has not stored this instruction in its queues.
+ HWS.sanityCheck(IR);
+#endif
+
+ if (IR.getInstruction()->isEliminated())
+ return handleInstructionEliminated(IR);
+
+ // Reserve a slot in each buffered resource. Also, mark units with
+ // BufferSize=0 as reserved. Resources with a buffer size of zero will only
+ // be released after MCIS is issued, and all the ResourceCycles for those
+ // units have been consumed.
+ HWS.dispatch(IR);
+ notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
+ if (!HWS.isReady(IR))
+ return ErrorSuccess();
+
+ // If we did not return early, then the scheduler is ready for execution.
+ notifyInstructionReady(IR);
+
+ // If we cannot issue immediately, the HWS will add IR to its ready queue for
+ // execution later, so we must return early here.
+ if (!HWS.mustIssueImmediately(IR))
+ return ErrorSuccess();
+
+ // Issue IR to the underlying pipelines.
+ return issueInstruction(IR);
+}
+
+void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const {
+ LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionEvent(HWInstructionEvent::Executed, IR));
+}
+
+void ExecuteStage::notifyInstructionReady(const InstRef &IR) const {
+ LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionEvent(HWInstructionEvent::Ready, IR));
+}
+
+void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const {
+ LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.'
+ << RR.second << "]\n");
+ for (HWEventListener *Listener : getListeners())
+ Listener->onResourceAvailable(RR);
+}
+
+void ExecuteStage::notifyInstructionIssued(
+ const InstRef &IR,
+ MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const {
+ LLVM_DEBUG({
+ dbgs() << "[E] Instruction Issued: #" << IR << '\n';
+ for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
+ dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
+ << Resource.first.second << "], ";
+ dbgs() << "cycles: " << Resource.second << '\n';
+ }
+ });
+
+ // Replace resource masks with valid resource processor IDs.
+ for (std::pair<ResourceRef, ResourceCycles> &Use : Used)
+ Use.first.first = HWS.getResourceID(Use.first.first);
+
+ notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
+}
+
+void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR,
+ bool Reserved) const {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ if (Desc.Buffers.empty())
+ return;
+
+ SmallVector<unsigned, 4> BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end());
+ std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(),
+ [&](uint64_t Op) { return HWS.getResourceID(Op); });
+ if (Reserved) {
+ for (HWEventListener *Listener : getListeners())
+ Listener->onReservedBuffers(IR, BufferIDs);
+ return;
+ }
+
+ for (HWEventListener *Listener : getListeners())
+ Listener->onReleasedBuffers(IR, BufferIDs);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/InstructionTables.cpp b/contrib/llvm/lib/MCA/Stages/InstructionTables.cpp
index 9b9dbc37fbdb..f918c183aa5a 100644
--- a/contrib/llvm/tools/llvm-mca/InstructionTables.cpp
+++ b/contrib/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -15,14 +15,12 @@
///
//===----------------------------------------------------------------------===//
-#include "InstructionTables.h"
+#include "llvm/MCA/Stages/InstructionTables.h"
+namespace llvm {
namespace mca {
-using namespace llvm;
-
-bool InstructionTables::execute(InstRef &IR) {
- ArrayRef<uint64_t> Masks = IB.getProcResourceMasks();
+Error InstructionTables::execute(InstRef &IR) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
UsedResources.clear();
@@ -31,17 +29,17 @@ bool InstructionTables::execute(InstRef &IR) {
// Skip zero-cycle resources (i.e., unused resources).
if (!Resource.second.size())
continue;
- double Cycles = static_cast<double>(Resource.second.size());
+ unsigned Cycles = Resource.second.size();
unsigned Index = std::distance(
Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
unsigned NumUnits = ProcResource.NumUnits;
if (!ProcResource.SubUnitsIdxBegin) {
// The number of cycles consumed by each unit.
- Cycles /= NumUnits;
for (unsigned I = 0, E = NumUnits; I < E; ++I) {
ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
- UsedResources.emplace_back(std::make_pair(ResourceUnit, Cycles));
+ UsedResources.emplace_back(
+ std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits)));
}
continue;
}
@@ -53,10 +51,10 @@ bool InstructionTables::execute(InstRef &IR) {
unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1];
const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx);
// Compute the number of cycles consumed by each resource unit.
- double RUCycles = Cycles / (NumUnits * SubUnit.NumUnits);
for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
- UsedResources.emplace_back(std::make_pair(ResourceUnit, RUCycles));
+ UsedResources.emplace_back(std::make_pair(
+ ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits)));
}
}
}
@@ -64,7 +62,8 @@ bool InstructionTables::execute(InstRef &IR) {
// Send a fake instruction issued event to all the views.
HWInstructionIssuedEvent Event(IR, UsedResources);
notifyEvent<HWInstructionIssuedEvent>(Event);
- return true;
+ return ErrorSuccess();
}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RetireStage.cpp b/contrib/llvm/lib/MCA/Stages/RetireStage.cpp
index 55c3b887e478..d6bcc518662f 100644
--- a/contrib/llvm/tools/llvm-mca/RetireStage.cpp
+++ b/contrib/llvm/lib/MCA/Stages/RetireStage.cpp
@@ -14,19 +14,18 @@
///
//===----------------------------------------------------------------------===//
-#include "RetireStage.h"
-#include "HWEventListener.h"
+#include "llvm/MCA/Stages/RetireStage.h"
+#include "llvm/MCA/HWEventListener.h"
#include "llvm/Support/Debug.h"
-using namespace llvm;
-
#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
namespace mca {
-void RetireStage::cycleStart() {
+llvm::Error RetireStage::cycleStart() {
if (RCU.isEmpty())
- return;
+ return llvm::ErrorSuccess();
const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
unsigned NumRetired = 0;
@@ -40,18 +39,24 @@ void RetireStage::cycleStart() {
notifyInstructionRetired(Current.IR);
NumRetired++;
}
+
+ return llvm::ErrorSuccess();
+}
+
+llvm::Error RetireStage::execute(InstRef &IR) {
+ RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+ return llvm::ErrorSuccess();
}
-void RetireStage::notifyInstructionRetired(const InstRef &IR) {
- LLVM_DEBUG(dbgs() << "[E] Instruction Retired: #" << IR << '\n');
- SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
+void RetireStage::notifyInstructionRetired(const InstRef &IR) const {
+ LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n');
+ llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
const Instruction &Inst = *IR.getInstruction();
- const InstrDesc &Desc = Inst.getDesc();
- bool ShouldFreeRegs = !(Desc.isZeroLatency() && Inst.isDependencyBreaking());
- for (const std::unique_ptr<WriteState> &WS : Inst.getDefs())
- PRF.removeRegisterWrite(*WS.get(), FreedRegs, ShouldFreeRegs);
+ for (const WriteState &WS : Inst.getDefs())
+ PRF.removeRegisterWrite(WS, FreedRegs);
notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/Stage.cpp b/contrib/llvm/lib/MCA/Stages/Stage.cpp
index 7ead940e63c1..38191645e736 100644
--- a/contrib/llvm/tools/llvm-mca/Stage.cpp
+++ b/contrib/llvm/lib/MCA/Stages/Stage.cpp
@@ -13,15 +13,17 @@
///
//===----------------------------------------------------------------------===//
-#include "Stage.h"
+#include "llvm/MCA/Stages/Stage.h"
+namespace llvm {
namespace mca {
// Pin the vtable here in the implementation file.
-Stage::Stage() {}
+Stage::~Stage() = default;
void Stage::addListener(HWEventListener *Listener) {
Listeners.insert(Listener);
}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/Support.cpp b/contrib/llvm/lib/MCA/Support.cpp
index 8f6b8a91f38f..335953e10481 100644
--- a/contrib/llvm/tools/llvm-mca/Support.cpp
+++ b/contrib/llvm/lib/MCA/Support.cpp
@@ -13,20 +13,24 @@
///
//===----------------------------------------------------------------------===//
-#include "Support.h"
+#include "llvm/MCA/Support.h"
#include "llvm/MC/MCSchedule.h"
+namespace llvm {
namespace mca {
-using namespace llvm;
+#define DEBUG_TYPE "llvm-mca"
void computeProcResourceMasks(const MCSchedModel &SM,
- SmallVectorImpl<uint64_t> &Masks) {
+ MutableArrayRef<uint64_t> Masks) {
unsigned ProcResourceID = 0;
+ assert(Masks.size() == SM.getNumProcResourceKinds() &&
+ "Invalid number of elements");
+ // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it.
+ Masks[0] = 0;
+
// Create a unique bitmask for every processor resource unit.
- // Skip resource at index 0, since it always references 'InvalidUnit'.
- Masks.resize(SM.getNumProcResourceKinds());
for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
const MCProcResourceDesc &Desc = *SM.getProcResource(I);
if (Desc.SubUnitsIdxBegin)
@@ -47,6 +51,16 @@ void computeProcResourceMasks(const MCSchedModel &SM,
}
ProcResourceID++;
}
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "\nProcessor resource masks:"
+ << "\n");
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ LLVM_DEBUG(dbgs() << '[' << I << "] " << Desc.Name << " - " << Masks[I]
+ << '\n');
+ }
+#endif
}
double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
@@ -77,3 +91,4 @@ double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp
index ea17b2220a0b..da93602cbb28 100644
--- a/contrib/llvm/lib/Object/ArchiveWriter.cpp
+++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp
@@ -27,6 +27,8 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
+#include <map>
+
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
#else
@@ -119,6 +121,11 @@ static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) {
OS.indent(Size - SizeSoFar);
}
+static bool isDarwin(object::Archive::Kind Kind) {
+ return Kind == object::Archive::K_DARWIN ||
+ Kind == object::Archive::K_DARWIN64;
+}
+
static bool isBSDLike(object::Archive::Kind Kind) {
switch (Kind) {
case object::Archive::K_GNU:
@@ -126,8 +133,8 @@ static bool isBSDLike(object::Archive::Kind Kind) {
return false;
case object::Archive::K_BSD:
case object::Archive::K_DARWIN:
- return true;
case object::Archive::K_DARWIN64:
+ return true;
case object::Archive::K_COFF:
break;
}
@@ -243,20 +250,33 @@ static void addToStringTable(raw_ostream &Out, StringRef ArcName,
static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
raw_ostream &StringTable,
+ StringMap<uint64_t> &MemberNames,
object::Archive::Kind Kind, bool Thin,
StringRef ArcName, const NewArchiveMember &M,
+ sys::TimePoint<std::chrono::seconds> ModTime,
unsigned Size) {
+
if (isBSDLike(Kind))
- return printBSDMemberHeader(Out, Pos, M.MemberName, M.ModTime, M.UID, M.GID,
+ return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID,
M.Perms, Size);
if (!useStringTable(Thin, M.MemberName))
- return printGNUSmallMemberHeader(Out, M.MemberName, M.ModTime, M.UID, M.GID,
+ return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID,
M.Perms, Size);
Out << '/';
- uint64_t NamePos = StringTable.tell();
- addToStringTable(StringTable, ArcName, M, Thin);
+ uint64_t NamePos;
+ if (Thin) {
+ NamePos = StringTable.tell();
+ addToStringTable(StringTable, ArcName, M, Thin);
+ } else {
+ auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
+ if (Insertion.second) {
+ Insertion.first->second = StringTable.tell();
+ addToStringTable(StringTable, ArcName, M, Thin);
+ }
+ NamePos = Insertion.first->second;
+ }
printWithSpacePadding(Out, NamePos, 15);
- printRestOfMemberHeader(Out, M.ModTime, M.UID, M.GID, M.Perms, Size);
+ printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size);
}
namespace {
@@ -310,7 +330,9 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind,
static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
bool Deterministic, ArrayRef<MemberData> Members,
StringRef StringTable) {
- if (StringTable.empty())
+ // We don't write a symbol table on an archive with no members -- except on
+ // Darwin, where the linker will abort unless the archive has a symbol table.
+ if (StringTable.empty() && !isDarwin(Kind))
return;
unsigned NumSyms = 0;
@@ -318,15 +340,15 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
NumSyms += M.Symbols.size();
unsigned Size = 0;
- Size += is64BitKind(Kind) ? 8 : 4; // Number of entries
+ unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t);
+
+ Size += OffsetSize; // Number of entries
if (isBSDLike(Kind))
- Size += NumSyms * 8; // Table
- else if (is64BitKind(Kind))
- Size += NumSyms * 8; // Table
+ Size += NumSyms * OffsetSize * 2; // Table
else
- Size += NumSyms * 4; // Table
+ Size += NumSyms * OffsetSize; // Table
if (isBSDLike(Kind))
- Size += 4; // byte count
+ Size += OffsetSize; // byte count
Size += StringTable.size();
// ld64 expects the members to be 8-byte aligned for 64-bit content and at
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
@@ -336,25 +358,26 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
unsigned Pad = OffsetToAlignment(Size, Alignment);
Size += Pad;
- if (isBSDLike(Kind))
- printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0,
- 0, Size);
- else if (is64BitKind(Kind))
- printGNUSmallMemberHeader(Out, "/SYM64", now(Deterministic), 0, 0, 0, Size);
- else
- printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size);
+ if (isBSDLike(Kind)) {
+ const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
+ printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
+ Size);
+ } else {
+ const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
+ printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
+ }
uint64_t Pos = Out.tell() + Size;
if (isBSDLike(Kind))
- print<uint32_t>(Out, Kind, NumSyms * 8);
+ printNBits(Out, Kind, NumSyms * 2 * OffsetSize);
else
printNBits(Out, Kind, NumSyms);
for (const MemberData &M : Members) {
for (unsigned StringOffset : M.Symbols) {
if (isBSDLike(Kind))
- print<uint32_t>(Out, Kind, StringOffset);
+ printNBits(Out, Kind, StringOffset);
printNBits(Out, Kind, Pos); // member offset
}
Pos += M.Header.size() + M.Data.size() + M.Padding.size();
@@ -362,7 +385,7 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
if (isBSDLike(Kind))
// byte count of the string table
- print<uint32_t>(Out, Kind, StringTable.size());
+ printNBits(Out, Kind, StringTable.size());
Out << StringTable;
while (Pad--)
@@ -372,20 +395,32 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
static Expected<std::vector<unsigned>>
getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
std::vector<unsigned> Ret;
- LLVMContext Context;
- Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
- object::SymbolicFile::createSymbolicFile(Buf, llvm::file_magic::unknown,
- &Context);
- if (!ObjOrErr) {
- // FIXME: check only for "not an object file" errors.
- consumeError(ObjOrErr.takeError());
- return Ret;
+ // In the scenario when LLVMContext is populated SymbolicFile will contain a
+ // reference to it, thus SymbolicFile should be destroyed first.
+ LLVMContext Context;
+ std::unique_ptr<object::SymbolicFile> Obj;
+ if (identify_magic(Buf.getBuffer()) == file_magic::bitcode) {
+ auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
+ Buf, file_magic::bitcode, &Context);
+ if (!ObjOrErr) {
+ // FIXME: check only for "not an object file" errors.
+ consumeError(ObjOrErr.takeError());
+ return Ret;
+ }
+ Obj = std::move(*ObjOrErr);
+ } else {
+ auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
+ if (!ObjOrErr) {
+ // FIXME: check only for "not an object file" errors.
+ consumeError(ObjOrErr.takeError());
+ return Ret;
+ }
+ Obj = std::move(*ObjOrErr);
}
HasObject = true;
- object::SymbolicFile &Obj = *ObjOrErr.get();
- for (const object::BasicSymbolRef &S : Obj.symbols()) {
+ for (const object::BasicSymbolRef &S : Obj->symbols()) {
if (!isArchiveSymbol(S))
continue;
Ret.push_back(SymNames.tell());
@@ -399,7 +434,7 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
object::Archive::Kind Kind, bool Thin, StringRef ArcName,
- ArrayRef<NewArchiveMember> NewMembers) {
+ bool Deterministic, ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
// This ignores the symbol table, but we only need the value mod 8 and the
@@ -408,6 +443,62 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
std::vector<MemberData> Ret;
bool HasObject = false;
+
+ // Deduplicate long member names in the string table and reuse earlier name
+ // offsets. This especially saves space for COFF Import libraries where all
+ // members have the same name.
+ StringMap<uint64_t> MemberNames;
+
+ // UniqueTimestamps is a special case to improve debugging on Darwin:
+ //
+ // The Darwin linker does not link debug info into the final
+ // binary. Instead, it emits entries of type N_OSO in in the output
+ // binary's symbol table, containing references to the linked-in
+ // object files. Using that reference, the debugger can read the
+ // debug data directly from the object files. Alternatively, an
+ // invocation of 'dsymutil' will link the debug data from the object
+ // files into a dSYM bundle, which can be loaded by the debugger,
+ // instead of the object files.
+ //
+ // For an object file, the N_OSO entries contain the absolute path
+ // path to the file, and the file's timestamp. For an object
+ // included in an archive, the path is formatted like
+ // "/absolute/path/to/archive.a(member.o)", and the timestamp is the
+ // archive member's timestamp, rather than the archive's timestamp.
+ //
+ // However, this doesn't always uniquely identify an object within
+ // an archive -- an archive file can have multiple entries with the
+ // same filename. (This will happen commonly if the original object
+ // files started in different directories.) The only way they get
+ // distinguished, then, is via the timestamp. But this process is
+ // unable to find the correct object file in the archive when there
+ // are two files of the same name and timestamp.
+ //
+ // Additionally, timestamp==0 is treated specially, and causes the
+ // timestamp to be ignored as a match criteria.
+ //
+ // That will "usually" work out okay when creating an archive not in
+ // deterministic timestamp mode, because the objects will probably
+ // have been created at different timestamps.
+ //
+ // To ameliorate this problem, in deterministic archive mode (which
+ // is the default), on Darwin we will emit a unique non-zero
+ // timestamp for each entry with a duplicated name. This is still
+ // deterministic: the only thing affecting that timestamp is the
+ // order of the files in the resultant archive.
+ //
+ // See also the functions that handle the lookup:
+ // in lldb: ObjectContainerBSDArchive::Archive::FindObject()
+ // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers().
+ bool UniqueTimestamps = Deterministic && isDarwin(Kind);
+ std::map<StringRef, unsigned> FilenameCount;
+ if (UniqueTimestamps) {
+ for (const NewArchiveMember &M : NewMembers)
+ FilenameCount[M.MemberName]++;
+ for (auto &Entry : FilenameCount)
+ Entry.second = Entry.second > 1 ? 1 : 0;
+ }
+
for (const NewArchiveMember &M : NewMembers) {
std::string Header;
raw_string_ostream Out(Header);
@@ -419,14 +510,19 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly. This matches the behaviour with cctools and ensures that ld64
// is happy with archives that we generate.
- unsigned MemberPadding = Kind == object::Archive::K_DARWIN
- ? OffsetToAlignment(Data.size(), 8)
- : 0;
+ unsigned MemberPadding =
+ isDarwin(Kind) ? OffsetToAlignment(Data.size(), 8) : 0;
unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2);
StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding);
- printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M,
- Buf.getBufferSize() + MemberPadding);
+ sys::TimePoint<std::chrono::seconds> ModTime;
+ if (UniqueTimestamps)
+ // Increment timestamp for each file of a given name.
+ ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
+ else
+ ModTime = M.ModTime;
+ printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, ArcName,
+ M, ModTime, Buf.getBufferSize() + MemberPadding);
Out.flush();
Expected<std::vector<unsigned>> Symbols =
@@ -457,8 +553,8 @@ Error llvm::writeArchive(StringRef ArcName,
SmallString<0> StringTableBuf;
raw_svector_ostream StringTable(StringTableBuf);
- Expected<std::vector<MemberData>> DataOrErr =
- computeMemberData(StringTable, SymNames, Kind, Thin, ArcName, NewMembers);
+ Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
+ StringTable, SymNames, Kind, Thin, ArcName, Deterministic, NewMembers);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
@@ -470,7 +566,7 @@ Error llvm::writeArchive(StringRef ArcName,
if (WriteSymtab) {
uint64_t MaxOffset = 0;
uint64_t LastOffset = MaxOffset;
- for (const auto& M : Data) {
+ for (const auto &M : Data) {
// Record the start of the member's offset
LastOffset = MaxOffset;
// Account for the size of each part associated with the member.
@@ -494,8 +590,12 @@ Error llvm::writeArchive(StringRef ArcName,
// If LastOffset isn't going to fit in a 32-bit varible we need to switch
// to 64-bit. Note that the file can be larger than 4GB as long as the last
// member starts before the 4GB offset.
- if (LastOffset >= (1ULL << Sym64Threshold))
- Kind = object::Archive::K_GNU64;
+ if (LastOffset >= (1ULL << Sym64Threshold)) {
+ if (Kind == object::Archive::K_DARWIN)
+ Kind = object::Archive::K_DARWIN64;
+ else
+ Kind = object::Archive::K_GNU64;
+ }
}
Expected<sys::fs::TempFile> Temp =
diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp
index d7c25921ec36..fe41987f5c27 100644
--- a/contrib/llvm/lib/Object/Binary.cpp
+++ b/contrib/llvm/lib/Object/Binary.cpp
@@ -88,7 +88,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
Expected<OwningBinary<Binary>> object::createBinary(StringRef Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
- MemoryBuffer::getFileOrSTDIN(Path);
+ MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
+ /*RequiresNullTerminator=*/false);
if (std::error_code EC = FileOrErr.getError())
return errorCodeToError(EC);
std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get();
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index 85b1913cb23b..fc1deeba339a 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -616,6 +616,8 @@ std::error_code COFFObjectFile::initBaseRelocPtr() {
IntPtr);
BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>(
IntPtr + DataEntry->Size);
+ // FIXME: Verify the section containing BaseRelocHeader has at least
+ // DataEntry->Size bytes after DataEntry->RelativeVirtualAddress.
return std::error_code();
}
@@ -637,10 +639,10 @@ std::error_code COFFObjectFile::initDebugDirectoryPtr() {
if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
return EC;
DebugDirectoryBegin = reinterpret_cast<const debug_directory *>(IntPtr);
- if (std::error_code EC = getRvaPtr(
- DataEntry->RelativeVirtualAddress + DataEntry->Size, IntPtr))
- return EC;
- DebugDirectoryEnd = reinterpret_cast<const debug_directory *>(IntPtr);
+ DebugDirectoryEnd = reinterpret_cast<const debug_directory *>(
+ IntPtr + DataEntry->Size);
+ // FIXME: Verify the section containing DebugDirectoryBegin has at least
+ // DataEntry->Size bytes after DataEntry->RelativeVirtualAddress.
return std::error_code();
}
@@ -936,6 +938,18 @@ iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const {
return make_range(base_reloc_begin(), base_reloc_end());
}
+std::error_code
+COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const {
+ Res = COFFHeader;
+ return std::error_code();
+}
+
+std::error_code
+COFFObjectFile::getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const {
+ Res = COFFBigObjHeader;
+ return std::error_code();
+}
+
std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const {
Res = PE32Header;
return std::error_code();
@@ -1051,6 +1065,16 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const {
return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize);
}
+uint32_t COFFObjectFile::getSymbolIndex(COFFSymbolRef Symbol) const {
+ uintptr_t Offset =
+ reinterpret_cast<uintptr_t>(Symbol.getRawPtr()) - getSymbolTable();
+ assert(Offset % getSymbolTableEntrySize() == 0 &&
+ "Symbol did not point to the beginning of a symbol");
+ size_t Index = Offset / getSymbolTableEntrySize();
+ assert(Index < getNumberOfSymbols());
+ return Index;
+}
+
std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
StringRef &Res) const {
StringRef Name;
@@ -1176,16 +1200,12 @@ COFFObjectFile::getRelocations(const coff_section *Sec) const {
#define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(reloc_type) \
case COFF::reloc_type: \
- Res = #reloc_type; \
- break;
+ return #reloc_type;
-void COFFObjectFile::getRelocationTypeName(
- DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
- const coff_relocation *Reloc = toRel(Rel);
- StringRef Res;
+StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
switch (getMachine()) {
case COFF::IMAGE_FILE_MACHINE_AMD64:
- switch (Reloc->Type) {
+ switch (Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32);
@@ -1204,11 +1224,11 @@ void COFFObjectFile::getRelocationTypeName(
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_PAIR);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SSPAN32);
default:
- Res = "Unknown";
+ return "Unknown";
}
break;
case COFF::IMAGE_FILE_MACHINE_ARMNT:
- switch (Reloc->Type) {
+ switch (Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ABSOLUTE);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32NB);
@@ -1225,11 +1245,11 @@ void COFFObjectFile::getRelocationTypeName(
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T);
default:
- Res = "Unknown";
+ return "Unknown";
}
break;
case COFF::IMAGE_FILE_MACHINE_ARM64:
- switch (Reloc->Type) {
+ switch (Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32NB);
@@ -1248,11 +1268,11 @@ void COFFObjectFile::getRelocationTypeName(
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14);
default:
- Res = "Unknown";
+ return "Unknown";
}
break;
case COFF::IMAGE_FILE_MACHINE_I386:
- switch (Reloc->Type) {
+ switch (Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL16);
@@ -1265,21 +1285,33 @@ void COFFObjectFile::getRelocationTypeName(
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL7);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL32);
default:
- Res = "Unknown";
+ return "Unknown";
}
break;
default:
- Res = "Unknown";
+ return "Unknown";
}
- Result.append(Res.begin(), Res.end());
}
#undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME
+void COFFObjectFile::getRelocationTypeName(
+ DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
+ const coff_relocation *Reloc = toRel(Rel);
+ StringRef Res = getRelocationTypeName(Reloc->Type);
+ Result.append(Res.begin(), Res.end());
+}
+
bool COFFObjectFile::isRelocatableObject() const {
return !DataDirectory;
}
+StringRef COFFObjectFile::mapDebugSectionName(StringRef Name) const {
+ return StringSwitch<StringRef>(Name)
+ .Case("eh_fram", "eh_frame")
+ .Default(Name);
+}
+
bool ImportDirectoryEntryRef::
operator==(const ImportDirectoryEntryRef &Other) const {
return ImportTable == Other.ImportTable && Index == Other.Index;
diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp
index 2eefb7ef13a3..cf8313f88f93 100644
--- a/contrib/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm/lib/Object/ELF.cpp
@@ -139,6 +139,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
break;
}
break;
+ case ELF::EM_MSP430:
+ switch (Type) {
+#include "llvm/BinaryFormat/ELFRelocs/MSP430.def"
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -147,7 +154,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
#undef ELF_RELOC
-uint32_t llvm::object::getELFRelrRelocationType(uint32_t Machine) {
+uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) {
switch (Machine) {
case ELF::EM_X86_64:
return ELF::R_X86_64_RELATIVE;
@@ -293,7 +300,7 @@ ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
Elf_Rela Rela;
Rela.r_info = 0;
Rela.r_addend = 0;
- Rela.setType(getRelrRelocationType(), false);
+ Rela.setType(getRelativeRelocationType(), false);
std::vector<Elf_Rela> Relocs;
// Word type: uint32_t for Elf32, and uint64_t for Elf64.
@@ -393,20 +400,17 @@ ELFFile<ELFT>::android_relas(const Elf_Shdr *Sec) const {
if (GroupedByAddend && GroupHasAddend)
Addend += ReadSLEB();
+ if (!GroupHasAddend)
+ Addend = 0;
+
for (uint64_t I = 0; I != NumRelocsInGroup; ++I) {
Elf_Rela R;
Offset += GroupedByOffsetDelta ? GroupOffsetDelta : ReadSLEB();
R.r_offset = Offset;
R.r_info = GroupedByInfo ? GroupRInfo : ReadSLEB();
-
- if (GroupHasAddend) {
- if (!GroupedByAddend)
- Addend += ReadSLEB();
- R.r_addend = Addend;
- } else {
- R.r_addend = 0;
- }
-
+ if (GroupHasAddend && !GroupedByAddend)
+ Addend += ReadSLEB();
+ R.r_addend = Addend;
Relocs.push_back(R);
if (ErrStr)
diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp
index e806c8f28b15..9fb3a55ac7b1 100644
--- a/contrib/llvm/lib/Object/ELFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp
@@ -14,6 +14,7 @@
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFTypes.h"
@@ -23,6 +24,7 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -327,3 +329,68 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
TheTriple.setArchName(Triple);
}
+
+std::vector<std::pair<DataRefImpl, uint64_t>>
+ELFObjectFileBase::getPltAddresses() const {
+ std::string Err;
+ const auto Triple = makeTriple();
+ const auto *T = TargetRegistry::lookupTarget(Triple.str(), Err);
+ if (!T)
+ return {};
+ uint64_t JumpSlotReloc = 0;
+ switch (Triple.getArch()) {
+ case Triple::x86:
+ JumpSlotReloc = ELF::R_386_JUMP_SLOT;
+ break;
+ case Triple::x86_64:
+ JumpSlotReloc = ELF::R_X86_64_JUMP_SLOT;
+ break;
+ case Triple::aarch64:
+ JumpSlotReloc = ELF::R_AARCH64_JUMP_SLOT;
+ break;
+ default:
+ return {};
+ }
+ std::unique_ptr<const MCInstrInfo> MII(T->createMCInstrInfo());
+ std::unique_ptr<const MCInstrAnalysis> MIA(
+ T->createMCInstrAnalysis(MII.get()));
+ if (!MIA)
+ return {};
+ Optional<SectionRef> Plt = None, RelaPlt = None, GotPlt = None;
+ for (const SectionRef &Section : sections()) {
+ StringRef Name;
+ if (Section.getName(Name))
+ continue;
+ if (Name == ".plt")
+ Plt = Section;
+ else if (Name == ".rela.plt" || Name == ".rel.plt")
+ RelaPlt = Section;
+ else if (Name == ".got.plt")
+ GotPlt = Section;
+ }
+ if (!Plt || !RelaPlt || !GotPlt)
+ return {};
+ StringRef PltContents;
+ if (Plt->getContents(PltContents))
+ return {};
+ ArrayRef<uint8_t> PltBytes((const uint8_t *)PltContents.data(),
+ Plt->getSize());
+ auto PltEntries = MIA->findPltEntries(Plt->getAddress(), PltBytes,
+ GotPlt->getAddress(), Triple);
+ // Build a map from GOT entry virtual address to PLT entry virtual address.
+ DenseMap<uint64_t, uint64_t> GotToPlt;
+ for (const auto &Entry : PltEntries)
+ GotToPlt.insert(std::make_pair(Entry.second, Entry.first));
+ // Find the relocations in the dynamic relocation table that point to
+ // locations in the GOT for which we know the corresponding PLT entry.
+ std::vector<std::pair<DataRefImpl, uint64_t>> Result;
+ for (const auto &Relocation : RelaPlt->relocations()) {
+ if (Relocation.getType() != JumpSlotReloc)
+ continue;
+ auto PltEntryIter = GotToPlt.find(Relocation.getOffset());
+ if (PltEntryIter != GotToPlt.end())
+ Result.push_back(std::make_pair(
+ Relocation.getSymbol()->getRawDataRefImpl(), PltEntryIter->second));
+ }
+ return Result;
+}
diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp
index 7d43a84f3e0e..6fa23e06c409 100644
--- a/contrib/llvm/lib/Object/Error.cpp
+++ b/contrib/llvm/lib/Object/Error.cpp
@@ -57,6 +57,7 @@ std::string _object_error_category::message(int EV) const {
"defined.");
}
+void BinaryError::anchor() {}
char BinaryError::ID = 0;
char GenericBinaryError::ID = 0;
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
index e422903f2805..ce4d1cf92e20 100644
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -1592,8 +1592,8 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
"command extends past the end of the symbol table");
return;
}
- if (Dysymtab.nextdefsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) {
- Err = malformedError("nextdefsym in LC_DYSYMTAB load command "
+ if (Dysymtab.nextdefsym != 0 && Dysymtab.iextdefsym > Symtab.nsyms) {
+ Err = malformedError("iextdefsym in LC_DYSYMTAB load command "
"extends past the end of the symbol table");
return;
}
@@ -1606,7 +1606,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
return;
}
if (Dysymtab.nundefsym != 0 && Dysymtab.iundefsym > Symtab.nsyms) {
- Err = malformedError("nundefsym in LC_DYSYMTAB load command "
+ Err = malformedError("iundefsym in LC_DYSYMTAB load command "
"extends past the end of the symbol table");
return;
}
@@ -2438,7 +2438,7 @@ basic_symbol_iterator MachOObjectFile::symbol_end() const {
return basic_symbol_iterator(SymbolRef(DRI, this));
}
-basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const {
+symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const {
MachO::symtab_command Symtab = getSymtabLoadCommand();
if (!SymtabLoadCmd || Index >= Symtab.nsyms)
report_fatal_error("Requested symbol index is out of range.");
diff --git a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp
index b353ef3c835b..33ce7d8109fb 100644
--- a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -100,6 +100,7 @@ initializeRecordStreamer(const Module &M,
MCObjectFileInfo MOFI;
MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, MCCtx);
+ MOFI.setSDKVersion(M.getSDKVersion());
RecordStreamer Streamer(MCCtx, M);
T->createNullTargetStreamer(Streamer);
diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp
index 5fd823e0117e..f5de2e1d5ce2 100644
--- a/contrib/llvm/lib/Object/Object.cpp
+++ b/contrib/llvm/lib/Object/Object.cpp
@@ -105,7 +105,7 @@ void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
if (!SecOrErr) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(SecOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(SecOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -187,7 +187,7 @@ const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
if (!Ret) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(Ret.takeError(), OS, "");
+ logAllUnhandledErrors(Ret.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -199,7 +199,7 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
if (!Ret) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(Ret.takeError(), OS, "");
+ logAllUnhandledErrors(Ret.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -229,7 +229,7 @@ const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
SmallVector<char, 0> ret;
(*unwrap(RI))->getTypeName(ret);
char *str = static_cast<char*>(safe_malloc(ret.size()));
- std::copy(ret.begin(), ret.end(), str);
+ llvm::copy(ret, str);
return str;
}
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
index db0ff220c4d8..cf63b89adc12 100644
--- a/contrib/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -77,6 +77,14 @@ bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const {
bool ObjectFile::isSectionStripped(DataRefImpl Sec) const { return false; }
+bool ObjectFile::isBerkeleyText(DataRefImpl Sec) const {
+ return isSectionText(Sec);
+}
+
+bool ObjectFile::isBerkeleyData(DataRefImpl Sec) const {
+ return isSectionData(Sec);
+}
+
section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const {
return section_iterator(SectionRef(Sec, this));
}
diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp
index 4d4c887b2d97..d84cb48c9fbd 100644
--- a/contrib/llvm/lib/Object/WasmObjectFile.cpp
+++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/ScopedPrinter.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -37,8 +38,8 @@ using namespace object;
void WasmSymbol::print(raw_ostream &Out) const {
Out << "Name=" << Info.Name
- << ", Kind=" << toString(wasm::WasmSymbolType(Info.Kind))
- << ", Flags=" << Info.Flags;
+ << ", Kind=" << toString(wasm::WasmSymbolType(Info.Kind))
+ << ", Flags=" << Info.Flags;
if (!isTypeData()) {
Out << ", ElemIndex=" << Info.ElementIndex;
} else if (isDefined()) {
@@ -62,9 +63,9 @@ ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) {
return std::move(ObjectFile);
}
-#define VARINT7_MAX ((1<<7)-1)
-#define VARINT7_MIN (-(1<<7))
-#define VARUINT7_MAX (1<<7)
+#define VARINT7_MAX ((1 << 7) - 1)
+#define VARINT7_MIN (-(1 << 7))
+#define VARUINT7_MAX (1 << 7)
#define VARUINT1_MAX (1)
static uint8_t readUint8(WasmObjectFile::ReadContext &Ctx) {
@@ -82,6 +83,8 @@ static uint32_t readUint32(WasmObjectFile::ReadContext &Ctx) {
}
static int32_t readFloat32(WasmObjectFile::ReadContext &Ctx) {
+ if (Ctx.Ptr + 4 > Ctx.End)
+ report_fatal_error("EOF while reading float64");
int32_t Result = 0;
memcpy(&Result, Ctx.Ptr, sizeof(Result));
Ctx.Ptr += sizeof(Result);
@@ -89,6 +92,8 @@ static int32_t readFloat32(WasmObjectFile::ReadContext &Ctx) {
}
static int64_t readFloat64(WasmObjectFile::ReadContext &Ctx) {
+ if (Ctx.Ptr + 8 > Ctx.End)
+ report_fatal_error("EOF while reading float64");
int64_t Result = 0;
memcpy(&Result, Ctx.Ptr, sizeof(Result));
Ctx.Ptr += sizeof(Result);
@@ -97,7 +102,7 @@ static int64_t readFloat64(WasmObjectFile::ReadContext &Ctx) {
static uint64_t readULEB128(WasmObjectFile::ReadContext &Ctx) {
unsigned Count;
- const char* Error = nullptr;
+ const char *Error = nullptr;
uint64_t Result = decodeULEB128(Ctx.Ptr, &Count, Ctx.End, &Error);
if (Error)
report_fatal_error(Error);
@@ -117,7 +122,7 @@ static StringRef readString(WasmObjectFile::ReadContext &Ctx) {
static int64_t readLEB128(WasmObjectFile::ReadContext &Ctx) {
unsigned Count;
- const char* Error = nullptr;
+ const char *Error = nullptr;
uint64_t Result = decodeSLEB128(Ctx.Ptr, &Count, Ctx.End, &Error);
if (Error)
report_fatal_error(Error);
@@ -171,7 +176,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
case wasm::WASM_OPCODE_F64_CONST:
Expr.Value.Float64 = readFloat64(Ctx);
break;
- case wasm::WASM_OPCODE_GET_GLOBAL:
+ case wasm::WASM_OPCODE_GLOBAL_GET:
Expr.Value.Global = readULEB128(Ctx);
break;
default:
@@ -189,7 +194,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
static wasm::WasmLimits readLimits(WasmObjectFile::ReadContext &Ctx) {
wasm::WasmLimits Result;
- Result.Flags = readVaruint1(Ctx);
+ Result.Flags = readVaruint32(Ctx);
Result.Initial = readVaruint32(Ctx);
if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
Result.Maximum = readVaruint32(Ctx);
@@ -203,8 +208,8 @@ static wasm::WasmTable readTable(WasmObjectFile::ReadContext &Ctx) {
return Table;
}
-static Error readSection(WasmSection &Section,
- WasmObjectFile::ReadContext &Ctx) {
+static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
+ WasmSectionOrderChecker &Checker) {
Section.Offset = Ctx.Ptr - Ctx.Start;
Section.Type = readUint8(Ctx);
LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
@@ -216,10 +221,24 @@ static Error readSection(WasmSection &Section,
return make_error<StringError>("Section too large",
object_error::parse_failed);
if (Section.Type == wasm::WASM_SEC_CUSTOM) {
- const uint8_t *NameStart = Ctx.Ptr;
- Section.Name = readString(Ctx);
- Size -= Ctx.Ptr - NameStart;
+ WasmObjectFile::ReadContext SectionCtx;
+ SectionCtx.Start = Ctx.Ptr;
+ SectionCtx.Ptr = Ctx.Ptr;
+ SectionCtx.End = Ctx.Ptr + Size;
+
+ Section.Name = readString(SectionCtx);
+
+ uint32_t SectionNameSize = SectionCtx.Ptr - SectionCtx.Start;
+ Ctx.Ptr += SectionNameSize;
+ Size -= SectionNameSize;
+ }
+
+ if (!Checker.isValidSectionOrder(Section.Type, Section.Name)) {
+ return make_error<StringError>("Out of order section type: " +
+ llvm::to_string(Section.Type),
+ object_error::parse_failed);
}
+
Section.Content = ArrayRef<uint8_t>(Ctx.Ptr, Size);
Ctx.Ptr += Size;
return Error::success();
@@ -230,8 +249,8 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
ErrorAsOutParameter ErrAsOutParam(&Err);
Header.Magic = getData().substr(0, 4);
if (Header.Magic != StringRef("\0asm", 4)) {
- Err = make_error<StringError>("Bad magic number",
- object_error::parse_failed);
+ Err =
+ make_error<StringError>("Bad magic number", object_error::parse_failed);
return;
}
@@ -254,8 +273,9 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
}
WasmSection Sec;
+ WasmSectionOrderChecker Checker;
while (Ctx.Ptr < Ctx.End) {
- if ((Err = readSection(Sec, Ctx)))
+ if ((Err = readSection(Sec, Ctx, Checker)))
return;
if ((Err = parseSection(Sec)))
return;
@@ -284,6 +304,8 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
return parseMemorySection(Ctx);
case wasm::WASM_SEC_GLOBAL:
return parseGlobalSection(Ctx);
+ case wasm::WASM_SEC_EVENT:
+ return parseEventSection(Ctx);
case wasm::WASM_SEC_EXPORT:
return parseExportSection(Ctx);
case wasm::WASM_SEC_START:
@@ -300,6 +322,22 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
}
}
+Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
+ // See https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
+ DylinkInfo.MemorySize = readVaruint32(Ctx);
+ DylinkInfo.MemoryAlignment = readVaruint32(Ctx);
+ DylinkInfo.TableSize = readVaruint32(Ctx);
+ DylinkInfo.TableAlignment = readVaruint32(Ctx);
+ uint32_t Count = readVaruint32(Ctx);
+ while (Count--) {
+ DylinkInfo.Needed.push_back(readString(Ctx));
+ }
+ if (Ctx.Ptr != Ctx.End)
+ return make_error<GenericBinaryError>("dylink section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
llvm::DenseSet<uint64_t> Seen;
if (Functions.size() != FunctionTypes.size()) {
@@ -336,8 +374,8 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
break;
}
if (Ctx.Ptr != SubSectionEnd)
- return make_error<GenericBinaryError>("Name sub-section ended prematurely",
- object_error::parse_failed);
+ return make_error<GenericBinaryError>(
+ "Name sub-section ended prematurely", object_error::parse_failed);
}
if (Ctx.Ptr != Ctx.End)
@@ -350,7 +388,8 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
HasLinkingSection = true;
if (Functions.size() != FunctionTypes.size()) {
return make_error<GenericBinaryError>(
- "Linking data must come after code section", object_error::parse_failed);
+ "Linking data must come after code section",
+ object_error::parse_failed);
}
LinkingData.Version = readVaruint32(Ctx);
@@ -427,19 +466,24 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
std::vector<wasm::WasmImport *> ImportedGlobals;
std::vector<wasm::WasmImport *> ImportedFunctions;
+ std::vector<wasm::WasmImport *> ImportedEvents;
ImportedGlobals.reserve(Imports.size());
ImportedFunctions.reserve(Imports.size());
+ ImportedEvents.reserve(Imports.size());
for (auto &I : Imports) {
if (I.Kind == wasm::WASM_EXTERNAL_FUNCTION)
ImportedFunctions.emplace_back(&I);
else if (I.Kind == wasm::WASM_EXTERNAL_GLOBAL)
ImportedGlobals.emplace_back(&I);
+ else if (I.Kind == wasm::WASM_EXTERNAL_EVENT)
+ ImportedEvents.emplace_back(&I);
}
while (Count--) {
wasm::WasmSymbolInfo Info;
- const wasm::WasmSignature *FunctionType = nullptr;
+ const wasm::WasmSignature *Signature = nullptr;
const wasm::WasmGlobalType *GlobalType = nullptr;
+ const wasm::WasmEventType *EventType = nullptr;
Info.Kind = readUint8(Ctx);
Info.Flags = readVaruint32(Ctx);
@@ -455,13 +499,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
if (IsDefined) {
Info.Name = readString(Ctx);
unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions;
- FunctionType = &Signatures[FunctionTypes[FuncIndex]];
+ Signature = &Signatures[FunctionTypes[FuncIndex]];
wasm::WasmFunction &Function = Functions[FuncIndex];
if (Function.SymbolName.empty())
Function.SymbolName = Info.Name;
} else {
wasm::WasmImport &Import = *ImportedFunctions[Info.ElementIndex];
- FunctionType = &Signatures[Import.SigIndex];
+ Signature = &Signatures[Import.SigIndex];
Info.Name = Import.Field;
Info.Module = Import.Module;
}
@@ -473,9 +517,8 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
IsDefined != isDefinedGlobalIndex(Info.ElementIndex))
return make_error<GenericBinaryError>("invalid global symbol index",
object_error::parse_failed);
- if (!IsDefined &&
- (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) ==
- wasm::WASM_SYMBOL_BINDING_WEAK)
+ if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) ==
+ wasm::WASM_SYMBOL_BINDING_WEAK)
return make_error<GenericBinaryError>("undefined weak global symbol",
object_error::parse_failed);
if (IsDefined) {
@@ -521,6 +564,34 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
break;
}
+ case wasm::WASM_SYMBOL_TYPE_EVENT: {
+ Info.ElementIndex = readVaruint32(Ctx);
+ if (!isValidEventIndex(Info.ElementIndex) ||
+ IsDefined != isDefinedEventIndex(Info.ElementIndex))
+ return make_error<GenericBinaryError>("invalid event symbol index",
+ object_error::parse_failed);
+ if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) ==
+ wasm::WASM_SYMBOL_BINDING_WEAK)
+ return make_error<GenericBinaryError>("undefined weak global symbol",
+ object_error::parse_failed);
+ if (IsDefined) {
+ Info.Name = readString(Ctx);
+ unsigned EventIndex = Info.ElementIndex - NumImportedEvents;
+ wasm::WasmEvent &Event = Events[EventIndex];
+ Signature = &Signatures[Event.Type.SigIndex];
+ EventType = &Event.Type;
+ if (Event.SymbolName.empty())
+ Event.SymbolName = Info.Name;
+
+ } else {
+ wasm::WasmImport &Import = *ImportedEvents[Info.ElementIndex];
+ EventType = &Import.Event;
+ Signature = &Signatures[EventType->SigIndex];
+ Info.Name = Import.Field;
+ }
+ break;
+ }
+
default:
return make_error<GenericBinaryError>("Invalid symbol type",
object_error::parse_failed);
@@ -533,8 +604,8 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
Twine(Info.Name),
object_error::parse_failed);
LinkingData.SymbolTable.emplace_back(Info);
- Symbols.emplace_back(LinkingData.SymbolTable.back(), FunctionType,
- GlobalType);
+ Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, EventType,
+ Signature);
LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n");
}
@@ -547,7 +618,8 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
for (unsigned ComdatIndex = 0; ComdatIndex < ComdatCount; ++ComdatIndex) {
StringRef Name = readString(Ctx);
if (Name.empty() || !ComdatSet.insert(Name).second)
- return make_error<GenericBinaryError>("Bad/duplicate COMDAT name " + Twine(Name),
+ return make_error<GenericBinaryError>("Bad/duplicate COMDAT name " +
+ Twine(Name),
object_error::parse_failed);
LinkingData.Comdats.emplace_back(Name);
uint32_t Flags = readVaruint32(Ctx);
@@ -565,8 +637,8 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
object_error::parse_failed);
case wasm::WASM_COMDAT_DATA:
if (Index >= DataSegments.size())
- return make_error<GenericBinaryError>("COMDAT data index out of range",
- object_error::parse_failed);
+ return make_error<GenericBinaryError>(
+ "COMDAT data index out of range", object_error::parse_failed);
if (DataSegments[Index].Data.Comdat != UINT32_MAX)
return make_error<GenericBinaryError>("Data segment in two COMDATs",
object_error::parse_failed);
@@ -574,8 +646,8 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
break;
case wasm::WASM_COMDAT_FUNCTION:
if (!isDefinedFunctionIndex(Index))
- return make_error<GenericBinaryError>("COMDAT function index out of range",
- object_error::parse_failed);
+ return make_error<GenericBinaryError>(
+ "COMDAT function index out of range", object_error::parse_failed);
if (getDefinedFunction(Index).Comdat != UINT32_MAX)
return make_error<GenericBinaryError>("Function in two COMDATs",
object_error::parse_failed);
@@ -592,13 +664,18 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
if (SectionIndex >= Sections.size())
return make_error<GenericBinaryError>("Invalid section index",
object_error::parse_failed);
- WasmSection& Section = Sections[SectionIndex];
+ WasmSection &Section = Sections[SectionIndex];
uint32_t RelocCount = readVaruint32(Ctx);
uint32_t EndOffset = Section.Content.size();
+ uint32_t PreviousOffset = 0;
while (RelocCount--) {
wasm::WasmRelocation Reloc = {};
Reloc.Type = readVaruint32(Ctx);
Reloc.Offset = readVaruint32(Ctx);
+ if (Reloc.Offset < PreviousOffset)
+ return make_error<GenericBinaryError>("Relocations not in offset order",
+ object_error::parse_failed);
+ PreviousOffset = Reloc.Offset;
Reloc.Index = readVaruint32(Ctx);
switch (Reloc.Type) {
case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
@@ -618,6 +695,11 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
return make_error<GenericBinaryError>("Bad relocation global index",
object_error::parse_failed);
break;
+ case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
+ if (!isValidEventSymbol(Reloc.Index))
+ return make_error<GenericBinaryError>("Bad relocation event index",
+ object_error::parse_failed);
+ break;
case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
@@ -666,7 +748,10 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
}
Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) {
- if (Sec.Name == "name") {
+ if (Sec.Name == "dylink") {
+ if (Error Err = parseDylinkSection(Ctx))
+ return Err;
+ } else if (Sec.Name == "name") {
if (Error Err = parseNameSection(Ctx))
return Err;
} else if (Sec.Name == "linking") {
@@ -684,17 +769,16 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
Signatures.reserve(Count);
while (Count--) {
wasm::WasmSignature Sig;
- Sig.ReturnType = wasm::WASM_TYPE_NORESULT;
uint8_t Form = readUint8(Ctx);
if (Form != wasm::WASM_TYPE_FUNC) {
return make_error<GenericBinaryError>("Invalid signature type",
object_error::parse_failed);
}
uint32_t ParamCount = readVaruint32(Ctx);
- Sig.ParamTypes.reserve(ParamCount);
+ Sig.Params.reserve(ParamCount);
while (ParamCount--) {
uint32_t ParamType = readUint8(Ctx);
- Sig.ParamTypes.push_back(ParamType);
+ Sig.Params.push_back(wasm::ValType(ParamType));
}
uint32_t ReturnCount = readVaruint32(Ctx);
if (ReturnCount) {
@@ -702,9 +786,9 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
return make_error<GenericBinaryError>(
"Multiple return types not supported", object_error::parse_failed);
}
- Sig.ReturnType = readUint8(Ctx);
+ Sig.Returns.push_back(wasm::ValType(readUint8(Ctx)));
}
- Signatures.push_back(Sig);
+ Signatures.push_back(std::move(Sig));
}
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>("Type section ended prematurely",
@@ -735,13 +819,18 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
break;
case wasm::WASM_EXTERNAL_TABLE:
Im.Table = readTable(Ctx);
- if (Im.Table.ElemType != wasm::WASM_TYPE_ANYFUNC)
+ if (Im.Table.ElemType != wasm::WASM_TYPE_FUNCREF)
return make_error<GenericBinaryError>("Invalid table element type",
object_error::parse_failed);
break;
+ case wasm::WASM_EXTERNAL_EVENT:
+ NumImportedEvents++;
+ Im.Event.Attribute = readVarint32(Ctx);
+ Im.Event.SigIndex = readVarint32(Ctx);
+ break;
default:
- return make_error<GenericBinaryError>(
- "Unexpected import kind", object_error::parse_failed);
+ return make_error<GenericBinaryError>("Unexpected import kind",
+ object_error::parse_failed);
}
Imports.push_back(Im);
}
@@ -773,7 +862,7 @@ Error WasmObjectFile::parseTableSection(ReadContext &Ctx) {
Tables.reserve(Count);
while (Count--) {
Tables.push_back(readTable(Ctx));
- if (Tables.back().ElemType != wasm::WASM_TYPE_ANYFUNC) {
+ if (Tables.back().ElemType != wasm::WASM_TYPE_FUNCREF) {
return make_error<GenericBinaryError>("Invalid table element type",
object_error::parse_failed);
}
@@ -815,6 +904,24 @@ Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) {
return Error::success();
}
+Error WasmObjectFile::parseEventSection(ReadContext &Ctx) {
+ EventSection = Sections.size();
+ uint32_t Count = readVarint32(Ctx);
+ Events.reserve(Count);
+ while (Count--) {
+ wasm::WasmEvent Event;
+ Event.Index = NumImportedEvents + Events.size();
+ Event.Type.Attribute = readVaruint32(Ctx);
+ Event.Type.SigIndex = readVarint32(Ctx);
+ Events.push_back(Event);
+ }
+
+ if (Ctx.Ptr != Ctx.End)
+ return make_error<GenericBinaryError>("Event section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
Exports.reserve(Count);
@@ -834,12 +941,17 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
return make_error<GenericBinaryError>("Invalid global export",
object_error::parse_failed);
break;
+ case wasm::WASM_EXTERNAL_EVENT:
+ if (!isValidEventIndex(Ex.Index))
+ return make_error<GenericBinaryError>("Invalid event export",
+ object_error::parse_failed);
+ break;
case wasm::WASM_EXTERNAL_MEMORY:
case wasm::WASM_EXTERNAL_TABLE:
break;
default:
- return make_error<GenericBinaryError>(
- "Unexpected export kind", object_error::parse_failed);
+ return make_error<GenericBinaryError>("Unexpected export kind",
+ object_error::parse_failed);
}
Exports.push_back(Ex);
}
@@ -865,6 +977,14 @@ bool WasmObjectFile::isDefinedGlobalIndex(uint32_t Index) const {
return Index >= NumImportedGlobals && isValidGlobalIndex(Index);
}
+bool WasmObjectFile::isValidEventIndex(uint32_t Index) const {
+ return Index < NumImportedEvents + Events.size();
+}
+
+bool WasmObjectFile::isDefinedEventIndex(uint32_t Index) const {
+ return Index >= NumImportedEvents && isValidEventIndex(Index);
+}
+
bool WasmObjectFile::isValidFunctionSymbol(uint32_t Index) const {
return Index < Symbols.size() && Symbols[Index].isTypeFunction();
}
@@ -873,6 +993,10 @@ bool WasmObjectFile::isValidGlobalSymbol(uint32_t Index) const {
return Index < Symbols.size() && Symbols[Index].isTypeGlobal();
}
+bool WasmObjectFile::isValidEventSymbol(uint32_t Index) const {
+ return Index < Symbols.size() && Symbols[Index].isTypeEvent();
+}
+
bool WasmObjectFile::isValidDataSymbol(uint32_t Index) const {
return Index < Symbols.size() && Symbols[Index].isTypeData();
}
@@ -891,6 +1015,11 @@ wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) {
return Globals[Index - NumImportedGlobals];
}
+wasm::WasmEvent &WasmObjectFile::getDefinedEvent(uint32_t Index) {
+ assert(isDefinedEventIndex(Index));
+ return Events[Index - NumImportedEvents];
+}
+
Error WasmObjectFile::parseStartSection(ReadContext &Ctx) {
StartFunction = readVaruint32(Ctx);
if (!isValidFunctionIndex(StartFunction))
@@ -1050,10 +1179,11 @@ Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
return getSymbolValue(Symb);
}
-uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol& Sym) const {
+uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
switch (Sym.Info.Kind) {
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+ case wasm::WASM_SYMBOL_TYPE_EVENT:
return Sym.Info.ElementIndex;
case wasm::WASM_SYMBOL_TYPE_DATA: {
// The value of a data symbol is the segment offset, plus the symbol
@@ -1096,6 +1226,8 @@ WasmObjectFile::getSymbolType(DataRefImpl Symb) const {
return SymbolRef::ST_Data;
case wasm::WASM_SYMBOL_TYPE_SECTION:
return SymbolRef::ST_Debug;
+ case wasm::WASM_SYMBOL_TYPE_EVENT:
+ return SymbolRef::ST_Other;
}
llvm_unreachable("Unknown WasmSymbol::SymbolType");
@@ -1104,7 +1236,7 @@ WasmObjectFile::getSymbolType(DataRefImpl Symb) const {
Expected<section_iterator>
WasmObjectFile::getSymbolSection(DataRefImpl Symb) const {
- const WasmSymbol& Sym = getWasmSymbol(Symb);
+ const WasmSymbol &Sym = getWasmSymbol(Symb);
if (Sym.isUndefined())
return section_end();
@@ -1119,10 +1251,12 @@ WasmObjectFile::getSymbolSection(DataRefImpl Symb) const {
case wasm::WASM_SYMBOL_TYPE_DATA:
Ref.d.a = DataSection;
break;
- case wasm::WASM_SYMBOL_TYPE_SECTION: {
+ case wasm::WASM_SYMBOL_TYPE_SECTION:
Ref.d.a = Sym.Info.ElementIndex;
break;
- }
+ case wasm::WASM_SYMBOL_TYPE_EVENT:
+ Ref.d.a = EventSection;
+ break;
default:
llvm_unreachable("Unknown WasmSymbol::SymbolType");
}
@@ -1145,6 +1279,7 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
ECase(TABLE);
ECase(MEMORY);
ECase(GLOBAL);
+ ECase(EVENT);
ECase(EXPORT);
ECase(START);
ECase(ELEM);
@@ -1218,9 +1353,7 @@ relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Ref) const {
return relocation_iterator(RelocationRef(RelocRef, this));
}
-void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
- Rel.d.b++;
-}
+void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const { Rel.d.b++; }
uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const {
const wasm::WasmRelocation &Rel = getWasmRelocation(Ref);
@@ -1244,12 +1377,12 @@ uint64_t WasmObjectFile::getRelocationType(DataRefImpl Ref) const {
void WasmObjectFile::getRelocationTypeName(
DataRefImpl Ref, SmallVectorImpl<char> &Result) const {
- const wasm::WasmRelocation& Rel = getWasmRelocation(Ref);
+ const wasm::WasmRelocation &Rel = getWasmRelocation(Ref);
StringRef Res = "Unknown";
-#define WASM_RELOC(name, value) \
- case wasm::name: \
- Res = #name; \
+#define WASM_RELOC(name, value) \
+ case wasm::name: \
+ Res = #name; \
break;
switch (Rel.Type) {
@@ -1283,9 +1416,9 @@ SubtargetFeatures WasmObjectFile::getFeatures() const {
return SubtargetFeatures();
}
-bool WasmObjectFile::isRelocatableObject() const {
- return HasLinkingSection;
-}
+bool WasmObjectFile::isRelocatableObject() const { return HasLinkingSection; }
+
+bool WasmObjectFile::isSharedObject() const { return HasDylinkSection; }
const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const {
assert(Ref.d.a < Sections.size());
@@ -1305,7 +1438,62 @@ WasmObjectFile::getWasmRelocation(const RelocationRef &Ref) const {
const wasm::WasmRelocation &
WasmObjectFile::getWasmRelocation(DataRefImpl Ref) const {
assert(Ref.d.a < Sections.size());
- const WasmSection& Sec = Sections[Ref.d.a];
+ const WasmSection &Sec = Sections[Ref.d.a];
assert(Ref.d.b < Sec.Relocations.size());
return Sec.Relocations[Ref.d.b];
}
+
+int WasmSectionOrderChecker::getSectionOrder(unsigned ID,
+ StringRef CustomSectionName) {
+ switch (ID) {
+ case wasm::WASM_SEC_CUSTOM:
+ return StringSwitch<unsigned>(CustomSectionName)
+ .Case("dylink", WASM_SEC_ORDER_DYLINK)
+ .Case("linking", WASM_SEC_ORDER_LINKING)
+ .StartsWith("reloc.", WASM_SEC_ORDER_RELOC)
+ .Case("name", WASM_SEC_ORDER_NAME)
+ .Case("producers", WASM_SEC_ORDER_PRODUCERS)
+ .Default(-1);
+ case wasm::WASM_SEC_TYPE:
+ return WASM_SEC_ORDER_TYPE;
+ case wasm::WASM_SEC_IMPORT:
+ return WASM_SEC_ORDER_IMPORT;
+ case wasm::WASM_SEC_FUNCTION:
+ return WASM_SEC_ORDER_FUNCTION;
+ case wasm::WASM_SEC_TABLE:
+ return WASM_SEC_ORDER_TABLE;
+ case wasm::WASM_SEC_MEMORY:
+ return WASM_SEC_ORDER_MEMORY;
+ case wasm::WASM_SEC_GLOBAL:
+ return WASM_SEC_ORDER_GLOBAL;
+ case wasm::WASM_SEC_EXPORT:
+ return WASM_SEC_ORDER_EXPORT;
+ case wasm::WASM_SEC_START:
+ return WASM_SEC_ORDER_START;
+ case wasm::WASM_SEC_ELEM:
+ return WASM_SEC_ORDER_ELEM;
+ case wasm::WASM_SEC_CODE:
+ return WASM_SEC_ORDER_CODE;
+ case wasm::WASM_SEC_DATA:
+ return WASM_SEC_ORDER_DATA;
+ case wasm::WASM_SEC_DATACOUNT:
+ return WASM_SEC_ORDER_DATACOUNT;
+ case wasm::WASM_SEC_EVENT:
+ return WASM_SEC_ORDER_EVENT;
+ default:
+ llvm_unreachable("invalid section");
+ }
+}
+
+bool WasmSectionOrderChecker::isValidSectionOrder(unsigned ID,
+ StringRef CustomSectionName) {
+ int Order = getSectionOrder(ID, CustomSectionName);
+ if (Order == -1) // Skip unknown sections
+ return true;
+ // There can be multiple "reloc." sections. Otherwise there shouldn't be any
+ // duplicate section orders.
+ bool IsValid = (LastOrder == Order && Order == WASM_SEC_ORDER_RELOC) ||
+ LastOrder < Order;
+ LastOrder = Order;
+ return IsValid;
+}
diff --git a/contrib/llvm/lib/Object/WindowsResource.cpp b/contrib/llvm/lib/Object/WindowsResource.cpp
index 1b7282f13db0..65413dd8bea1 100644
--- a/contrib/llvm/lib/Object/WindowsResource.cpp
+++ b/contrib/llvm/lib/Object/WindowsResource.cpp
@@ -259,7 +259,7 @@ WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef,
std::vector<UTF16> EndianCorrectedName;
if (sys::IsBigEndianHost) {
EndianCorrectedName.resize(NameRef.size() + 1);
- std::copy(NameRef.begin(), NameRef.end(), EndianCorrectedName.begin() + 1);
+ llvm::copy(NameRef, EndianCorrectedName.begin() + 1);
EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED;
CorrectedName = makeArrayRef(EndianCorrectedName);
} else
@@ -501,8 +501,7 @@ void WindowsResourceCOFFWriter::writeFirstSection() {
void WindowsResourceCOFFWriter::writeSecondSection() {
// Now write the .rsrc$02 section.
for (auto const &RawDataEntry : Data) {
- std::copy(RawDataEntry.begin(), RawDataEntry.end(),
- BufferStart + CurrentOffset);
+ llvm::copy(RawDataEntry, BufferStart + CurrentOffset);
CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t));
}
@@ -672,7 +671,7 @@ void WindowsResourceCOFFWriter::writeDirectoryStringTable() {
support::endian::write16le(BufferStart + CurrentOffset, Length);
CurrentOffset += sizeof(uint16_t);
auto *Start = reinterpret_cast<UTF16 *>(BufferStart + CurrentOffset);
- std::copy(String.begin(), String.end(), Start);
+ llvm::copy(String, Start);
CurrentOffset += Length * sizeof(UTF16);
TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t);
}
diff --git a/contrib/llvm/lib/ObjectYAML/COFFYAML.cpp b/contrib/llvm/lib/ObjectYAML/COFFYAML.cpp
index 9351ef96beb2..fdd94f4054e1 100644
--- a/contrib/llvm/lib/ObjectYAML/COFFYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/COFFYAML.cpp
@@ -407,7 +407,8 @@ struct NDLLCharacteristics {
void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
COFFYAML::Relocation &Rel) {
IO.mapRequired("VirtualAddress", Rel.VirtualAddress);
- IO.mapRequired("SymbolName", Rel.SymbolName);
+ IO.mapOptional("SymbolName", Rel.SymbolName, StringRef());
+ IO.mapOptional("SymbolTableIndex", Rel.SymbolTableIndex);
COFF::header &H = *static_cast<COFF::header *>(IO.getContext());
if (H.Machine == COFF::IMAGE_FILE_MACHINE_I386) {
diff --git a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index 6debd8ab0c6e..4deeae878013 100644
--- a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -511,7 +511,7 @@ std::shared_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection(
const codeview::StringsAndChecksums &SC) const {
assert(SC.hasStrings());
- auto Result = std::make_shared<DebugFrameDataSubsection>();
+ auto Result = std::make_shared<DebugFrameDataSubsection>(true);
for (const auto &YF : Frames) {
codeview::FrameData F;
F.CodeSize = YF.CodeSize;
diff --git a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 745f79cd77f3..713e9a710e94 100644
--- a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -108,7 +108,7 @@ void ScalarBitSetTraits<ExportFlags>::bitset(IO &io, ExportFlags &Flags) {
}
void ScalarBitSetTraits<PublicSymFlags>::bitset(IO &io, PublicSymFlags &Flags) {
- auto FlagNames = getProcSymFlagNames();
+ auto FlagNames = getPublicSymFlagNames();
for (const auto &E : FlagNames) {
io.bitSetCase(Flags, E.Name.str().c_str(),
static_cast<PublicSymFlags>(E.Value));
diff --git a/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp b/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
index f916b5d5f392..215d6bdd091e 100644
--- a/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -337,10 +337,18 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCase(EF_HEXAGON_MACH_V3);
BCase(EF_HEXAGON_MACH_V4);
BCase(EF_HEXAGON_MACH_V5);
+ BCase(EF_HEXAGON_MACH_V55);
+ BCase(EF_HEXAGON_MACH_V60);
+ BCase(EF_HEXAGON_MACH_V62);
+ BCase(EF_HEXAGON_MACH_V65);
BCase(EF_HEXAGON_ISA_V2);
BCase(EF_HEXAGON_ISA_V3);
BCase(EF_HEXAGON_ISA_V4);
BCase(EF_HEXAGON_ISA_V5);
+ BCase(EF_HEXAGON_ISA_V55);
+ BCase(EF_HEXAGON_ISA_V60);
+ BCase(EF_HEXAGON_ISA_V62);
+ BCase(EF_HEXAGON_ISA_V65);
break;
case ELF::EM_AVR:
BCase(EF_AVR_ARCH_AVR1);
@@ -402,7 +410,9 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX902, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
+ BCase(EF_AMDGPU_SRAM_ECC);
break;
case ELF::EM_X86_64:
break;
@@ -743,6 +753,7 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
IO.mapRequired("Class", FileHdr.Class);
IO.mapRequired("Data", FileHdr.Data);
IO.mapOptional("OSABI", FileHdr.OSABI, ELFYAML::ELF_ELFOSABI(0));
+ IO.mapOptional("ABIVersion", FileHdr.ABIVersion, Hex8(0));
IO.mapRequired("Type", FileHdr.Type);
IO.mapRequired("Machine", FileHdr.Machine);
IO.mapOptional("Flags", FileHdr.Flags, ELFYAML::ELF_EF(0));
@@ -816,6 +827,7 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
IO.mapOptional("Address", Section.Address, Hex64(0));
IO.mapOptional("Link", Section.Link, StringRef());
IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0));
+ IO.mapOptional("EntSize", Section.EntSize);
IO.mapOptional("Info", Section.Info, StringRef());
}
diff --git a/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp b/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp
index 3c20bb74d501..47bf853e0d3e 100644
--- a/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -48,6 +48,16 @@ static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) {
IO.mapOptional("Relocations", Section.Relocations);
}
+static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Name", Section.Name);
+ IO.mapRequired("MemorySize", Section.MemorySize);
+ IO.mapRequired("MemoryAlignment", Section.MemoryAlignment);
+ IO.mapRequired("TableSize", Section.TableSize);
+ IO.mapRequired("TableAlignment", Section.TableAlignment);
+ IO.mapRequired("Needed", Section.Needed);
+}
+
static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) {
commonSectionMapping(IO, Section);
IO.mapRequired("Name", Section.Name);
@@ -100,6 +110,11 @@ static void sectionMapping(IO &IO, WasmYAML::GlobalSection &Section) {
IO.mapOptional("Globals", Section.Globals);
}
+static void sectionMapping(IO &IO, WasmYAML::EventSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Events", Section.Events);
+}
+
static void sectionMapping(IO &IO, WasmYAML::ExportSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Exports", Section.Exports);
@@ -142,7 +157,11 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
} else {
IO.mapRequired("Name", SectionName);
}
- if (SectionName == "linking") {
+ if (SectionName == "dylink") {
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::DylinkSection());
+ sectionMapping(IO, *cast<WasmYAML::DylinkSection>(Section.get()));
+ } else if (SectionName == "linking") {
if (!IO.outputting())
Section.reset(new WasmYAML::LinkingSection());
sectionMapping(IO, *cast<WasmYAML::LinkingSection>(Section.get()));
@@ -187,6 +206,11 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
Section.reset(new WasmYAML::GlobalSection());
sectionMapping(IO, *cast<WasmYAML::GlobalSection>(Section.get()));
break;
+ case wasm::WASM_SEC_EVENT:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::EventSection());
+ sectionMapping(IO, *cast<WasmYAML::EventSection>(Section.get()));
+ break;
case wasm::WASM_SEC_EXPORT:
if (!IO.outputting())
Section.reset(new WasmYAML::ExportSection());
@@ -227,6 +251,7 @@ void ScalarEnumerationTraits<WasmYAML::SectionType>::enumeration(
ECase(TABLE);
ECase(MEMORY);
ECase(GLOBAL);
+ ECase(EVENT);
ECase(EXPORT);
ECase(START);
ECase(ELEM);
@@ -307,9 +332,12 @@ void MappingTraits<WasmYAML::Import>::mapping(IO &IO,
} else if (Import.Kind == wasm::WASM_EXTERNAL_GLOBAL) {
IO.mapRequired("GlobalType", Import.GlobalImport.Type);
IO.mapRequired("GlobalMutable", Import.GlobalImport.Mutable);
+ } else if (Import.Kind == wasm::WASM_EXTERNAL_EVENT) {
+ IO.mapRequired("EventAttribute", Import.EventImport.Attribute);
+ IO.mapRequired("EventSigIndex", Import.EventImport.SigIndex);
} else if (Import.Kind == wasm::WASM_EXTERNAL_TABLE) {
IO.mapRequired("Table", Import.TableImport);
- } else if (Import.Kind == wasm::WASM_EXTERNAL_MEMORY ) {
+ } else if (Import.Kind == wasm::WASM_EXTERNAL_MEMORY) {
IO.mapRequired("Memory", Import.Memory);
} else {
llvm_unreachable("unhandled import type");
@@ -349,7 +377,7 @@ void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO,
case wasm::WASM_OPCODE_F64_CONST:
IO.mapRequired("Value", Expr.Value.Float64);
break;
- case wasm::WASM_OPCODE_GET_GLOBAL:
+ case wasm::WASM_OPCODE_GLOBAL_GET:
IO.mapRequired("Index", Expr.Value.Global);
break;
}
@@ -383,8 +411,8 @@ void MappingTraits<WasmYAML::ComdatEntry>::mapping(
IO.mapRequired("Index", ComdatEntry.Index);
}
-void MappingTraits<WasmYAML::Comdat>::mapping(
- IO &IO, WasmYAML::Comdat &Comdat) {
+void MappingTraits<WasmYAML::Comdat>::mapping(IO &IO,
+ WasmYAML::Comdat &Comdat) {
IO.mapRequired("Name", Comdat.Name);
IO.mapRequired("Entries", Comdat.Entries);
}
@@ -399,6 +427,8 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
IO.mapRequired("Function", Info.ElementIndex);
} else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_GLOBAL) {
IO.mapRequired("Global", Info.ElementIndex);
+ } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_EVENT) {
+ IO.mapRequired("Event", Info.ElementIndex);
} else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_DATA) {
if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) {
IO.mapRequired("Segment", Info.DataRef.Segment);
@@ -412,24 +442,31 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
}
}
+void MappingTraits<WasmYAML::Event>::mapping(IO &IO, WasmYAML::Event &Event) {
+ IO.mapRequired("Index", Event.Index);
+ IO.mapRequired("Attribute", Event.Attribute);
+ IO.mapRequired("SigIndex", Event.SigIndex);
+}
+
void ScalarBitSetTraits<WasmYAML::LimitFlags>::bitset(
IO &IO, WasmYAML::LimitFlags &Value) {
#define BCase(X) IO.bitSetCase(Value, #X, wasm::WASM_LIMITS_FLAG_##X)
BCase(HAS_MAX);
+ BCase(IS_SHARED);
#undef BCase
}
void ScalarBitSetTraits<WasmYAML::SegmentFlags>::bitset(
- IO &IO, WasmYAML::SegmentFlags &Value) {
-}
+ IO &IO, WasmYAML::SegmentFlags &Value) {}
void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
IO &IO, WasmYAML::SymbolFlags &Value) {
-#define BCaseMask(M, X) IO.maskedBitSetCase(Value, #X, wasm::WASM_SYMBOL_##X, wasm::WASM_SYMBOL_##M)
- //BCaseMask(BINDING_MASK, BINDING_GLOBAL);
+#define BCaseMask(M, X) \
+ IO.maskedBitSetCase(Value, #X, wasm::WASM_SYMBOL_##X, wasm::WASM_SYMBOL_##M)
+ // BCaseMask(BINDING_MASK, BINDING_GLOBAL);
BCaseMask(BINDING_MASK, BINDING_WEAK);
BCaseMask(BINDING_MASK, BINDING_LOCAL);
- //BCaseMask(VISIBILITY_MASK, VISIBILITY_DEFAULT);
+ // BCaseMask(VISIBILITY_MASK, VISIBILITY_DEFAULT);
BCaseMask(VISIBILITY_MASK, VISIBILITY_HIDDEN);
BCaseMask(UNDEFINED, UNDEFINED);
#undef BCaseMask
@@ -442,6 +479,7 @@ void ScalarEnumerationTraits<WasmYAML::SymbolKind>::enumeration(
ECase(DATA);
ECase(GLOBAL);
ECase(SECTION);
+ ECase(EVENT);
#undef ECase
}
@@ -452,7 +490,8 @@ void ScalarEnumerationTraits<WasmYAML::ValueType>::enumeration(
ECase(I64);
ECase(F32);
ECase(F64);
- ECase(ANYFUNC);
+ ECase(V128);
+ ECase(FUNCREF);
ECase(FUNC);
ECase(NORESULT);
#undef ECase
@@ -465,6 +504,7 @@ void ScalarEnumerationTraits<WasmYAML::ExportKind>::enumeration(
ECase(TABLE);
ECase(MEMORY);
ECase(GLOBAL);
+ ECase(EVENT);
#undef ECase
}
@@ -476,14 +516,14 @@ void ScalarEnumerationTraits<WasmYAML::Opcode>::enumeration(
ECase(I64_CONST);
ECase(F64_CONST);
ECase(F32_CONST);
- ECase(GET_GLOBAL);
+ ECase(GLOBAL_GET);
#undef ECase
}
void ScalarEnumerationTraits<WasmYAML::TableType>::enumeration(
IO &IO, WasmYAML::TableType &Type) {
#define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X);
- ECase(ANYFUNC);
+ ECase(FUNCREF);
#undef ECase
}
diff --git a/contrib/llvm/lib/OptRemarks/OptRemarksParser.cpp b/contrib/llvm/lib/OptRemarks/OptRemarksParser.cpp
new file mode 100644
index 000000000000..0478d2bfbfa6
--- /dev/null
+++ b/contrib/llvm/lib/OptRemarks/OptRemarksParser.cpp
@@ -0,0 +1,368 @@
+//===- OptRemarksParser.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utility methods used by clients that want to use the
+// parser for optimization remarks in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/OptRemarks.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm;
+
+namespace {
+struct RemarkParser {
+ /// Source manager for better error messages.
+ SourceMgr SM;
+ /// Stream for yaml parsing.
+ yaml::Stream Stream;
+ /// Storage for the error stream.
+ std::string ErrorString;
+ /// The error stream.
+ raw_string_ostream ErrorStream;
+ /// Iterator in the YAML stream.
+ yaml::document_iterator DI;
+ /// The parsed remark (if any).
+ Optional<LLVMOptRemarkEntry> LastRemark;
+ /// Temporary parsing buffer for the arguments.
+ SmallVector<LLVMOptRemarkArg, 8> TmpArgs;
+ /// The state used by the parser to parse a remark entry. Invalidated with
+ /// every call to `parseYAMLElement`.
+ struct ParseState {
+ /// Temporary parsing buffer for the arguments.
+ SmallVectorImpl<LLVMOptRemarkArg> *Args;
+ StringRef Type;
+ StringRef Pass;
+ StringRef Name;
+ StringRef Function;
+ /// Optional.
+ Optional<StringRef> File;
+ Optional<unsigned> Line;
+ Optional<unsigned> Column;
+ Optional<unsigned> Hotness;
+
+ ParseState(SmallVectorImpl<LLVMOptRemarkArg> &Args) : Args(&Args) {}
+ /// Use Args only as a **temporary** buffer.
+ ~ParseState() { Args->clear(); }
+ };
+
+ ParseState State;
+
+ /// Set to `true` if we had any errors during parsing.
+ bool HadAnyErrors = false;
+
+ RemarkParser(StringRef Buf)
+ : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
+ DI(Stream.begin()), LastRemark(), TmpArgs(), State(TmpArgs) {
+ SM.setDiagHandler(RemarkParser::HandleDiagnostic, this);
+ }
+
+ /// Parse a YAML element.
+ Error parseYAMLElement(yaml::Document &Remark);
+
+private:
+ /// Parse one key to a string.
+ /// otherwise.
+ Error parseKey(StringRef &Result, yaml::KeyValueNode &Node);
+ /// Parse one value to a string.
+ Error parseValue(StringRef &Result, yaml::KeyValueNode &Node);
+ /// Parse one value to an unsigned.
+ Error parseValue(Optional<unsigned> &Result, yaml::KeyValueNode &Node);
+ /// Parse a debug location.
+ Error parseDebugLoc(Optional<StringRef> &File, Optional<unsigned> &Line,
+ Optional<unsigned> &Column, yaml::KeyValueNode &Node);
+ /// Parse an argument.
+ Error parseArg(SmallVectorImpl<LLVMOptRemarkArg> &TmpArgs, yaml::Node &Node);
+
+ /// Handle a diagnostic from the YAML stream. Records the error in the
+ /// RemarkParser class.
+ static void HandleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
+ assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
+ auto *Parser = static_cast<RemarkParser *>(Ctx);
+ Diag.print(/*ProgName=*/nullptr, Parser->ErrorStream, /*ShowColors*/ false,
+ /*ShowKindLabels*/ true);
+ }
+};
+
+class ParseError : public ErrorInfo<ParseError> {
+public:
+ static char ID;
+
+ ParseError(StringRef Message, yaml::Node &Node)
+ : Message(Message), Node(Node) {}
+
+ void log(raw_ostream &OS) const override { OS << Message; }
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+ StringRef getMessage() const { return Message; }
+ yaml::Node &getNode() const { return Node; }
+
+private:
+ StringRef Message; // No need to hold a full copy of the buffer.
+ yaml::Node &Node;
+};
+
+char ParseError::ID = 0;
+
+static LLVMOptRemarkStringRef toOptRemarkStr(StringRef Str) {
+ return {Str.data(), static_cast<uint32_t>(Str.size())};
+}
+
+Error RemarkParser::parseKey(StringRef &Result, yaml::KeyValueNode &Node) {
+ auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey());
+ if (!Key)
+ return make_error<ParseError>("key is not a string.", Node);
+
+ Result = Key->getRawValue();
+ return Error::success();
+}
+
+Error RemarkParser::parseValue(StringRef &Result, yaml::KeyValueNode &Node) {
+ auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+ if (!Value)
+ return make_error<ParseError>("expected a value of scalar type.", Node);
+ Result = Value->getRawValue();
+
+ if (Result.front() == '\'')
+ Result = Result.drop_front();
+
+ if (Result.back() == '\'')
+ Result = Result.drop_back();
+
+ return Error::success();
+}
+
+Error RemarkParser::parseValue(Optional<unsigned> &Result,
+ yaml::KeyValueNode &Node) {
+ SmallVector<char, 4> Tmp;
+ auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+ if (!Value)
+ return make_error<ParseError>("expected a value of scalar type.", Node);
+ unsigned UnsignedValue = 0;
+ if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
+ return make_error<ParseError>("expected a value of integer type.", *Value);
+ Result = UnsignedValue;
+ return Error::success();
+}
+
+Error RemarkParser::parseDebugLoc(Optional<StringRef> &File,
+ Optional<unsigned> &Line,
+ Optional<unsigned> &Column,
+ yaml::KeyValueNode &Node) {
+ auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
+ if (!DebugLoc)
+ return make_error<ParseError>("expected a value of mapping type.", Node);
+
+ for (yaml::KeyValueNode &DLNode : *DebugLoc) {
+ StringRef KeyName;
+ if (Error E = parseKey(KeyName, DLNode))
+ return E;
+ if (KeyName == "File") {
+ File = StringRef(); // Set the optional to contain a default constructed
+ // value, to be passed to the parsing function.
+ if (Error E = parseValue(*File, DLNode))
+ return E;
+ } else if (KeyName == "Column") {
+ if (Error E = parseValue(Column, DLNode))
+ return E;
+ } else if (KeyName == "Line") {
+ if (Error E = parseValue(Line, DLNode))
+ return E;
+ } else {
+ return make_error<ParseError>("unknown entry in DebugLoc map.", DLNode);
+ }
+ }
+
+ // If any of the debug loc fields is missing, return an error.
+ if (!File || !Line || !Column)
+ return make_error<ParseError>("DebugLoc node incomplete.", Node);
+
+ return Error::success();
+}
+
+Error RemarkParser::parseArg(SmallVectorImpl<LLVMOptRemarkArg> &Args,
+ yaml::Node &Node) {
+ auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
+ if (!ArgMap)
+ return make_error<ParseError>("expected a value of mapping type.", Node);
+
+ StringRef ValueStr;
+ StringRef KeyStr;
+ Optional<StringRef> File;
+ Optional<unsigned> Line;
+ Optional<unsigned> Column;
+
+ for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
+ StringRef KeyName;
+ if (Error E = parseKey(KeyName, ArgEntry))
+ return E;
+
+ // Try to parse debug locs.
+ if (KeyName == "DebugLoc") {
+ // Can't have multiple DebugLoc entries per argument.
+ if (File || Line || Column)
+ return make_error<ParseError>(
+ "only one DebugLoc entry is allowed per argument.", ArgEntry);
+
+ if (Error E = parseDebugLoc(File, Line, Column, ArgEntry))
+ return E;
+ continue;
+ }
+
+ // If we already have a string, error out.
+ if (!ValueStr.empty())
+ return make_error<ParseError>(
+ "only one string entry is allowed per argument.", ArgEntry);
+
+ // Try to parse a string.
+ if (Error E = parseValue(ValueStr, ArgEntry))
+ return E;
+
+ // Keep the key from the string.
+ KeyStr = KeyName;
+ }
+
+ if (KeyStr.empty())
+ return make_error<ParseError>("argument key is missing.", *ArgMap);
+ if (ValueStr.empty())
+ return make_error<ParseError>("argument value is missing.", *ArgMap);
+
+ Args.push_back(LLVMOptRemarkArg{
+ toOptRemarkStr(KeyStr), toOptRemarkStr(ValueStr),
+ LLVMOptRemarkDebugLoc{toOptRemarkStr(File.getValueOr(StringRef())),
+ Line.getValueOr(0), Column.getValueOr(0)}});
+
+ return Error::success();
+}
+
+Error RemarkParser::parseYAMLElement(yaml::Document &Remark) {
+ // Parsing a new remark, clear the previous one.
+ LastRemark = None;
+ State = ParseState(TmpArgs);
+
+ auto *Root = dyn_cast<yaml::MappingNode>(Remark.getRoot());
+ if (!Root)
+ return make_error<ParseError>("document root is not of mapping type.",
+ *Remark.getRoot());
+
+ State.Type = Root->getRawTag();
+
+ for (yaml::KeyValueNode &RemarkField : *Root) {
+ StringRef KeyName;
+ if (Error E = parseKey(KeyName, RemarkField))
+ return E;
+
+ if (KeyName == "Pass") {
+ if (Error E = parseValue(State.Pass, RemarkField))
+ return E;
+ } else if (KeyName == "Name") {
+ if (Error E = parseValue(State.Name, RemarkField))
+ return E;
+ } else if (KeyName == "Function") {
+ if (Error E = parseValue(State.Function, RemarkField))
+ return E;
+ } else if (KeyName == "Hotness") {
+ if (Error E = parseValue(State.Hotness, RemarkField))
+ return E;
+ } else if (KeyName == "DebugLoc") {
+ if (Error E =
+ parseDebugLoc(State.File, State.Line, State.Column, RemarkField))
+ return E;
+ } else if (KeyName == "Args") {
+ auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
+ if (!Args)
+ return make_error<ParseError>("wrong value type for key.", RemarkField);
+
+ for (yaml::Node &Arg : *Args)
+ if (Error E = parseArg(*State.Args, Arg))
+ return E;
+ } else {
+ return make_error<ParseError>("unknown key.", RemarkField);
+ }
+ }
+
+ // If the YAML parsing failed, don't even continue parsing. We might
+ // encounter malformed YAML.
+ if (Stream.failed())
+ return make_error<ParseError>("YAML parsing failed.", *Remark.getRoot());
+
+ // Check if any of the mandatory fields are missing.
+ if (State.Type.empty() || State.Pass.empty() || State.Name.empty() ||
+ State.Function.empty())
+ return make_error<ParseError>("Type, Pass, Name or Function missing.",
+ *Remark.getRoot());
+
+ LastRemark = LLVMOptRemarkEntry{
+ toOptRemarkStr(State.Type),
+ toOptRemarkStr(State.Pass),
+ toOptRemarkStr(State.Name),
+ toOptRemarkStr(State.Function),
+ LLVMOptRemarkDebugLoc{toOptRemarkStr(State.File.getValueOr(StringRef())),
+ State.Line.getValueOr(0),
+ State.Column.getValueOr(0)},
+ State.Hotness.getValueOr(0),
+ static_cast<uint32_t>(State.Args->size()),
+ State.Args->data()};
+
+ return Error::success();
+}
+} // namespace
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RemarkParser, LLVMOptRemarkParserRef)
+
+extern "C" LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf,
+ uint64_t Size) {
+ return wrap(
+ new RemarkParser(StringRef(static_cast<const char *>(Buf), Size)));
+}
+
+extern "C" LLVMOptRemarkEntry *
+LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser) {
+ RemarkParser &TheParser = *unwrap(Parser);
+ // Check for EOF.
+ if (TheParser.HadAnyErrors || TheParser.DI == TheParser.Stream.end())
+ return nullptr;
+
+ // Try to parse an entry.
+ if (Error E = TheParser.parseYAMLElement(*TheParser.DI)) {
+ handleAllErrors(std::move(E), [&](const ParseError &PE) {
+ TheParser.Stream.printError(&PE.getNode(),
+ Twine(PE.getMessage()) + Twine('\n'));
+ TheParser.HadAnyErrors = true;
+ });
+ return nullptr;
+ }
+
+ // Move on.
+ ++TheParser.DI;
+
+ // Return the just-parsed remark.
+ if (Optional<LLVMOptRemarkEntry> &Entry = TheParser.LastRemark)
+ return &*Entry;
+ return nullptr;
+}
+
+extern "C" LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser) {
+ return unwrap(Parser)->HadAnyErrors;
+}
+
+extern "C" const char *
+LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser) {
+ return unwrap(Parser)->ErrorStream.str().c_str();
+}
+
+extern "C" void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser) {
+ delete unwrap(Parser);
+}
diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp
index 022b9d5d933e..312ff7808759 100644
--- a/contrib/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm/lib/Option/OptTable.cpp
@@ -521,19 +521,17 @@ static const char *getOptionHelpGroup(const OptTable &Opts, OptSpecifier Id) {
return getOptionHelpGroup(Opts, GroupID);
}
-void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
+void OptTable::PrintHelp(raw_ostream &OS, const char *Usage, const char *Title,
bool ShowHidden, bool ShowAllAliases) const {
- PrintHelp(OS, Name, Title, /*Include*/ 0, /*Exclude*/
+ PrintHelp(OS, Usage, Title, /*Include*/ 0, /*Exclude*/
(ShowHidden ? 0 : HelpHidden), ShowAllAliases);
}
-void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
+void OptTable::PrintHelp(raw_ostream &OS, const char *Usage, const char *Title,
unsigned FlagsToInclude, unsigned FlagsToExclude,
bool ShowAllAliases) const {
- OS << "OVERVIEW: " << Title << "\n";
- OS << '\n';
- OS << "USAGE: " << Name << " [options] <inputs>\n";
- OS << '\n';
+ OS << "OVERVIEW: " << Title << "\n\n";
+ OS << "USAGE: " << Usage << "\n\n";
// Render help text into a map of group-name to a list of (option, help)
// pairs.
diff --git a/contrib/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm/lib/Passes/PassBuilder.cpp
index eb04dcc8b6ef..5ec94ea6f40a 100644
--- a/contrib/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm/lib/Passes/PassBuilder.cpp
@@ -48,6 +48,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
@@ -58,10 +59,10 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Regex.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
-#include "llvm/Transforms/Instrumentation/CGProfile.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
@@ -75,6 +76,7 @@
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/GlobalOpt.h"
#include "llvm/Transforms/IPO/GlobalSplit.h"
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/Internalize.h"
@@ -86,9 +88,14 @@
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
@@ -127,6 +134,7 @@
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
@@ -136,14 +144,17 @@
#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
@@ -152,6 +163,7 @@
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
@@ -193,6 +205,12 @@ static cl::opt<bool> EnableSyntheticCounts(
static Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
+static cl::opt<bool>
+ EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
+ cl::desc("Enable control height reduction optimization (CHR)"));
+
+extern cl::opt<bool> EnableHotColdSplit;
+
static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
switch (Level) {
case PassBuilder::O0:
@@ -486,6 +504,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
+ if (EnableCHR && Level == O3 && PGOOpt &&
+ (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty()))
+ FPM.addPass(ControlHeightReductionPass());
+
return FPM;
}
@@ -493,7 +515,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
PassBuilder::OptimizationLevel Level,
bool RunProfileGen,
std::string ProfileGenFile,
- std::string ProfileUseFile) {
+ std::string ProfileUseFile,
+ std::string ProfileRemappingFile) {
// Generally running simplification passes and the inliner with an high
// threshold results in smaller executables, but there may be cases where
// the size grows, so let's be conservative here and skip this simplification
@@ -547,7 +570,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
}
if (!ProfileUseFile.empty())
- MPM.addPass(PGOInstrumentationUse(ProfileUseFile));
+ MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile));
}
static InlineParams
@@ -593,6 +616,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Annotate sample profile right after early FPM to ensure freshness of
// the debug info.
MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile,
+ PGOOpt->ProfileRemappingFile,
Phase == ThinLTOPhase::PreLink));
// Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
// for the profile annotation to be accurate in the ThinLTO backend.
@@ -642,7 +666,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
(!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) {
addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
- PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile);
+ PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile,
+ PGOOpt->ProfileRemappingFile);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
@@ -693,6 +718,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
+ // We only want to do hot cold splitting once for ThinLTO, during the
+ // post-link ThinLTO.
+ if (EnableHotColdSplit && Phase != ThinLTOPhase::PreLink)
+ MPM.addPass(HotColdSplittingPass());
+
for (auto &C : CGSCCOptimizerLateEPCallbacks)
C(MainCGPipeline, Level);
@@ -809,7 +839,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
}
- OptimizePM.addPass(LoopUnrollPass(Level));
+ OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
+ OptimizePM.addPass(WarnMissedTransformationsPass());
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
@@ -841,6 +872,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// inserting redudnancies into the progrem. This even includes SimplifyCFG.
OptimizePM.addPass(SpeculateAroundPHIsPass());
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(OptimizePM, Level);
+
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
@@ -980,6 +1014,13 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
assert(Level != O0 && "Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
+ if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) {
+ // Load sample profile before running the LTO optimization pipeline.
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile,
+ PGOOpt->ProfileRemappingFile,
+ false /* ThinLTOPhase::PreLink */));
+ }
+
// Remove unused virtual tables to improve the quality of code generated by
// whole-program devirtualization and bitset lowering.
MPM.addPass(GlobalDCEPass());
@@ -1202,6 +1243,91 @@ static Optional<int> parseDevirtPassName(StringRef Name) {
return Count;
}
+static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
+ if (!Name.consume_front(PassName))
+ return false;
+ // normal pass name w/o parameters == default parameters
+ if (Name.empty())
+ return true;
+ return Name.startswith("<") && Name.endswith(">");
+}
+
+namespace {
+
+/// This performs customized parsing of pass name with parameters.
+///
+/// We do not need parametrization of passes in textual pipeline very often,
+/// yet on a rare occasion ability to specify parameters right there can be
+/// useful.
+///
+/// \p Name - parameterized specification of a pass from a textual pipeline
+/// is a string in a form of :
+/// PassName '<' parameter-list '>'
+///
+/// Parameter list is being parsed by the parser callable argument, \p Parser,
+/// It takes a string-ref of parameters and returns either StringError or a
+/// parameter list in a form of a custom parameters type, all wrapped into
+/// Expected<> template class.
+///
+template <typename ParametersParseCallableT>
+auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
+ StringRef PassName) -> decltype(Parser(StringRef{})) {
+ using ParametersT = typename decltype(Parser(StringRef{}))::value_type;
+
+ StringRef Params = Name;
+ if (!Params.consume_front(PassName)) {
+ assert(false &&
+ "unable to strip pass name from parametrized pass specification");
+ }
+ if (Params.empty())
+ return ParametersT{};
+ if (!Params.consume_front("<") || !Params.consume_back(">")) {
+ assert(false && "invalid format for parametrized pass name");
+ }
+
+ Expected<ParametersT> Result = Parser(Params);
+ assert((Result || Result.template errorIsA<StringError>()) &&
+ "Pass parameter parser can only return StringErrors.");
+ return std::move(Result);
+}
+
+/// Parser of parameters for LoopUnroll pass.
+Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
+ LoopUnrollOptions UnrollOpts;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+ int OptLevel = StringSwitch<int>(ParamName)
+ .Case("O0", 0)
+ .Case("O1", 1)
+ .Case("O2", 2)
+ .Case("O3", 3)
+ .Default(-1);
+ if (OptLevel >= 0) {
+ UnrollOpts.setOptLevel(OptLevel);
+ continue;
+ }
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "partial") {
+ UnrollOpts.setPartial(Enable);
+ } else if (ParamName == "peeling") {
+ UnrollOpts.setPeeling(Enable);
+ } else if (ParamName == "runtime") {
+ UnrollOpts.setRuntime(Enable);
+ } else if (ParamName == "upperbound") {
+ UnrollOpts.setUpperBound(Enable);
+ } else {
+ return make_error<StringError>(
+ formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return UnrollOpts;
+}
+
+} // namespace
+
/// Tests whether a pass name starts with a valid prefix for a default pipeline
/// alias.
static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) {
@@ -1297,6 +1423,9 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) \
+ return true;
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
@@ -1383,9 +1512,9 @@ PassBuilder::parsePipelineText(StringRef Text) {
return {std::move(ResultPipeline)};
}
-bool PassBuilder::parseModulePass(ModulePassManager &MPM,
- const PipelineElement &E, bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parseModulePass(ModulePassManager &MPM,
+ const PipelineElement &E,
+ bool VerifyEachPass, bool DebugLogging) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
@@ -1393,50 +1522,56 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
if (!InnerPipeline.empty()) {
if (Name == "module") {
ModulePassManager NestedMPM(DebugLogging);
- if (!parseModulePassPipeline(NestedMPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
MPM.addPass(std::move(NestedMPM));
- return true;
+ return Error::success();
}
if (Name == "cgscc") {
CGSCCPassManager CGPM(DebugLogging);
- if (!parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass,
+ DebugLogging))
+ return Err;
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
- return true;
+ return Error::success();
}
if (Name == "function") {
FunctionPassManager FPM(DebugLogging);
- if (!parseFunctionPassPipeline(FPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- return true;
+ return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
ModulePassManager NestedMPM(DebugLogging);
- if (!parseModulePassPipeline(NestedMPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM)));
- return true;
+ return Error::success();
}
for (auto &C : ModulePipelineParsingCallbacks)
if (C(Name, MPM, InnerPipeline))
- return true;
+ return Error::success();
// Normal passes can't have pipelines.
- return false;
+ return make_error<StringError>(
+ formatv("invalid use of '{0}' pass as module pipeline", Name).str(),
+ inconvertibleErrorCode());
+ ;
}
// Manually handle aliases for pre-configured pipeline fragments.
if (startsWithDefaultPipelineAliasPrefix(Name)) {
SmallVector<StringRef, 3> Matches;
if (!DefaultAliasRegex.match(Name, &Matches))
- return false;
+ return make_error<StringError>(
+ formatv("unknown default pipeline alias '{0}'", Name).str(),
+ inconvertibleErrorCode());
+
assert(Matches.size() == 3 && "Must capture two matched strings!");
OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
@@ -1448,7 +1583,7 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
.Case("Oz", Oz);
if (L == O0)
// At O0 we do nothing at all!
- return true;
+ return Error::success();
if (Matches[1] == "default") {
MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
@@ -1462,38 +1597,40 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
assert(Matches[1] == "lto" && "Not one of the matched options!");
MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr));
}
- return true;
+ return Error::success();
}
// Finally expand the basic registered passes from the .inc file.
#define MODULE_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(CREATE_PASS); \
- return true; \
+ return Error::success(); \
}
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
MPM.addPass( \
RequireAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type, Module>()); \
- return true; \
+ return Error::success(); \
} \
if (Name == "invalidate<" NAME ">") { \
MPM.addPass(InvalidateAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
- return true; \
+ return Error::success(); \
}
#include "PassRegistry.def"
for (auto &C : ModulePipelineParsingCallbacks)
if (C(Name, MPM, InnerPipeline))
- return true;
- return false;
+ return Error::success();
+ return make_error<StringError>(
+ formatv("unknown module pass '{0}'", Name).str(),
+ inconvertibleErrorCode());
}
-bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
- const PipelineElement &E, bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
+ const PipelineElement &E, bool VerifyEachPass,
+ bool DebugLogging) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
@@ -1501,53 +1638,55 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
if (!InnerPipeline.empty()) {
if (Name == "cgscc") {
CGSCCPassManager NestedCGPM(DebugLogging);
- if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
// Add the nested pass manager with the appropriate adaptor.
CGPM.addPass(std::move(NestedCGPM));
- return true;
+ return Error::success();
}
if (Name == "function") {
FunctionPassManager FPM(DebugLogging);
- if (!parseFunctionPassPipeline(FPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
// Add the nested pass manager with the appropriate adaptor.
CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
- return true;
+ return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
CGSCCPassManager NestedCGPM(DebugLogging);
- if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
- return true;
+ return Error::success();
}
if (auto MaxRepetitions = parseDevirtPassName(Name)) {
CGSCCPassManager NestedCGPM(DebugLogging);
- if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
CGPM.addPass(
createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions));
- return true;
+ return Error::success();
}
for (auto &C : CGSCCPipelineParsingCallbacks)
if (C(Name, CGPM, InnerPipeline))
- return true;
+ return Error::success();
// Normal passes can't have pipelines.
- return false;
+ return make_error<StringError>(
+ formatv("invalid use of '{0}' pass as cgscc pipeline", Name).str(),
+ inconvertibleErrorCode());
}
// Now expand the basic registered passes from the .inc file.
#define CGSCC_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(CREATE_PASS); \
- return true; \
+ return Error::success(); \
}
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
@@ -1555,24 +1694,26 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
std::remove_reference<decltype(CREATE_PASS)>::type, \
LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, \
CGSCCUpdateResult &>()); \
- return true; \
+ return Error::success(); \
} \
if (Name == "invalidate<" NAME ">") { \
CGPM.addPass(InvalidateAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
- return true; \
+ return Error::success(); \
}
#include "PassRegistry.def"
for (auto &C : CGSCCPipelineParsingCallbacks)
if (C(Name, CGPM, InnerPipeline))
- return true;
- return false;
+ return Error::success();
+ return make_error<StringError>(
+ formatv("unknown cgscc pass '{0}'", Name).str(),
+ inconvertibleErrorCode());
}
-bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
- const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging) {
+Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
+ const PipelineElement &E,
+ bool VerifyEachPass, bool DebugLogging) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
@@ -1580,68 +1721,80 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
if (!InnerPipeline.empty()) {
if (Name == "function") {
FunctionPassManager NestedFPM(DebugLogging);
- if (!parseFunctionPassPipeline(NestedFPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
// Add the nested pass manager with the appropriate adaptor.
FPM.addPass(std::move(NestedFPM));
- return true;
+ return Error::success();
}
if (Name == "loop") {
LoopPassManager LPM(DebugLogging);
- if (!parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,
+ DebugLogging))
+ return Err;
// Add the nested pass manager with the appropriate adaptor.
FPM.addPass(
createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging));
- return true;
+ return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
FunctionPassManager NestedFPM(DebugLogging);
- if (!parseFunctionPassPipeline(NestedFPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
- return true;
+ return Error::success();
}
for (auto &C : FunctionPipelineParsingCallbacks)
if (C(Name, FPM, InnerPipeline))
- return true;
+ return Error::success();
// Normal passes can't have pipelines.
- return false;
+ return make_error<StringError>(
+ formatv("invalid use of '{0}' pass as function pipeline", Name).str(),
+ inconvertibleErrorCode());
}
// Now expand the basic registered passes from the .inc file.
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
FPM.addPass(CREATE_PASS); \
- return true; \
+ return Error::success(); \
+ }
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ FPM.addPass(CREATE_PASS(Params.get())); \
+ return Error::success(); \
}
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
FPM.addPass( \
RequireAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type, Function>()); \
- return true; \
+ return Error::success(); \
} \
if (Name == "invalidate<" NAME ">") { \
FPM.addPass(InvalidateAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
- return true; \
+ return Error::success(); \
}
#include "PassRegistry.def"
for (auto &C : FunctionPipelineParsingCallbacks)
if (C(Name, FPM, InnerPipeline))
- return true;
- return false;
+ return Error::success();
+ return make_error<StringError>(
+ formatv("unknown function pass '{0}'", Name).str(),
+ inconvertibleErrorCode());
}
-bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
- bool VerifyEachPass, bool DebugLogging) {
+Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
+ bool VerifyEachPass, bool DebugLogging) {
StringRef Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
@@ -1649,35 +1802,37 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
if (!InnerPipeline.empty()) {
if (Name == "loop") {
LoopPassManager NestedLPM(DebugLogging);
- if (!parseLoopPassPipeline(NestedLPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
// Add the nested pass manager with the appropriate adaptor.
LPM.addPass(std::move(NestedLPM));
- return true;
+ return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
LoopPassManager NestedLPM(DebugLogging);
- if (!parseLoopPassPipeline(NestedLPM, InnerPipeline, VerifyEachPass,
- DebugLogging))
- return false;
+ if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline,
+ VerifyEachPass, DebugLogging))
+ return Err;
LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM)));
- return true;
+ return Error::success();
}
for (auto &C : LoopPipelineParsingCallbacks)
if (C(Name, LPM, InnerPipeline))
- return true;
+ return Error::success();
// Normal passes can't have pipelines.
- return false;
+ return make_error<StringError>(
+ formatv("invalid use of '{0}' pass as loop pipeline", Name).str(),
+ inconvertibleErrorCode());
}
// Now expand the basic registered passes from the .inc file.
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
LPM.addPass(CREATE_PASS); \
- return true; \
+ return Error::success(); \
}
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
@@ -1685,19 +1840,20 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
std::remove_reference<decltype(CREATE_PASS)>::type, Loop, \
LoopAnalysisManager, LoopStandardAnalysisResults &, \
LPMUpdater &>()); \
- return true; \
+ return Error::success(); \
} \
if (Name == "invalidate<" NAME ">") { \
LPM.addPass(InvalidateAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
- return true; \
+ return Error::success(); \
}
#include "PassRegistry.def"
for (auto &C : LoopPipelineParsingCallbacks)
if (C(Name, LPM, InnerPipeline))
- return true;
- return false;
+ return Error::success();
+ return make_error<StringError>(formatv("unknown loop pass '{0}'", Name).str(),
+ inconvertibleErrorCode());
}
bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
@@ -1721,41 +1877,42 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
return false;
}
-bool PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
+ ArrayRef<PipelineElement> Pipeline,
+ bool VerifyEachPass,
+ bool DebugLogging) {
for (const auto &Element : Pipeline) {
- if (!parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging))
- return false;
+ if (auto Err = parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging))
+ return Err;
// FIXME: No verifier support for Loop passes!
}
- return true;
+ return Error::success();
}
-bool PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
+ ArrayRef<PipelineElement> Pipeline,
+ bool VerifyEachPass,
+ bool DebugLogging) {
for (const auto &Element : Pipeline) {
- if (!parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging))
- return false;
+ if (auto Err =
+ parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging))
+ return Err;
if (VerifyEachPass)
FPM.addPass(VerifierPass());
}
- return true;
+ return Error::success();
}
-bool PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
+ ArrayRef<PipelineElement> Pipeline,
+ bool VerifyEachPass,
+ bool DebugLogging) {
for (const auto &Element : Pipeline) {
- if (!parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging))
- return false;
+ if (auto Err = parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging))
+ return Err;
// FIXME: No verifier support for CGSCC passes!
}
- return true;
+ return Error::success();
}
void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
@@ -1771,28 +1928,30 @@ void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); });
}
-bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
- ArrayRef<PipelineElement> Pipeline,
- bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
+ ArrayRef<PipelineElement> Pipeline,
+ bool VerifyEachPass,
+ bool DebugLogging) {
for (const auto &Element : Pipeline) {
- if (!parseModulePass(MPM, Element, VerifyEachPass, DebugLogging))
- return false;
+ if (auto Err = parseModulePass(MPM, Element, VerifyEachPass, DebugLogging))
+ return Err;
if (VerifyEachPass)
MPM.addPass(VerifierPass());
}
- return true;
+ return Error::success();
}
// Primary pass pipeline description parsing routine for a \c ModulePassManager
// FIXME: Should this routine accept a TargetMachine or require the caller to
// pre-populate the analysis managers with target-specific stuff?
-bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
- StringRef PipelineText, bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
+ StringRef PipelineText,
+ bool VerifyEachPass, bool DebugLogging) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
- return false;
+ return make_error<StringError>(
+ formatv("invalid pipeline '{0}'", PipelineText).str(),
+ inconvertibleErrorCode());
// If the first name isn't at the module layer, wrap the pipeline up
// automatically.
@@ -1809,73 +1968,106 @@ bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
} else {
for (auto &C : TopLevelPipelineParsingCallbacks)
if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging))
- return true;
-
- // Unknown pass name!
- return false;
+ return Error::success();
+
+ // Unknown pass or pipeline name!
+ auto &InnerPipeline = Pipeline->front().InnerPipeline;
+ return make_error<StringError>(
+ formatv("unknown {0} name '{1}'",
+ (InnerPipeline.empty() ? "pass" : "pipeline"), FirstName)
+ .str(),
+ inconvertibleErrorCode());
}
}
- return parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging);
+ if (auto Err =
+ parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging))
+ return Err;
+ return Error::success();
}
// Primary pass pipeline description parsing routine for a \c CGSCCPassManager
-bool PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
- StringRef PipelineText, bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
+ StringRef PipelineText,
+ bool VerifyEachPass, bool DebugLogging) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
- return false;
+ return make_error<StringError>(
+ formatv("invalid pipeline '{0}'", PipelineText).str(),
+ inconvertibleErrorCode());
StringRef FirstName = Pipeline->front().Name;
if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks))
- return false;
-
- return parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging);
+ return make_error<StringError>(
+ formatv("unknown cgscc pass '{0}' in pipeline '{1}'", FirstName,
+ PipelineText)
+ .str(),
+ inconvertibleErrorCode());
+
+ if (auto Err =
+ parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging))
+ return Err;
+ return Error::success();
}
// Primary pass pipeline description parsing routine for a \c
// FunctionPassManager
-bool PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
- StringRef PipelineText, bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
+ StringRef PipelineText,
+ bool VerifyEachPass, bool DebugLogging) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
- return false;
+ return make_error<StringError>(
+ formatv("invalid pipeline '{0}'", PipelineText).str(),
+ inconvertibleErrorCode());
StringRef FirstName = Pipeline->front().Name;
if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks))
- return false;
-
- return parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass,
- DebugLogging);
+ return make_error<StringError>(
+ formatv("unknown function pass '{0}' in pipeline '{1}'", FirstName,
+ PipelineText)
+ .str(),
+ inconvertibleErrorCode());
+
+ if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass,
+ DebugLogging))
+ return Err;
+ return Error::success();
}
// Primary pass pipeline description parsing routine for a \c LoopPassManager
-bool PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
- StringRef PipelineText, bool VerifyEachPass,
- bool DebugLogging) {
+Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
+ StringRef PipelineText,
+ bool VerifyEachPass, bool DebugLogging) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
- return false;
+ return make_error<StringError>(
+ formatv("invalid pipeline '{0}'", PipelineText).str(),
+ inconvertibleErrorCode());
+
+ if (auto Err =
+ parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging))
+ return Err;
- return parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging);
+ return Error::success();
}
-bool PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
+Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
// If the pipeline just consists of the word 'default' just replace the AA
// manager with our default one.
if (PipelineText == "default") {
AA = buildDefaultAAPipeline();
- return true;
+ return Error::success();
}
while (!PipelineText.empty()) {
StringRef Name;
std::tie(Name, PipelineText) = PipelineText.split(',');
if (!parseAAPassName(AA, Name))
- return false;
+ return make_error<StringError>(
+ formatv("unknown alias analysis name '{0}'", Name).str(),
+ inconvertibleErrorCode());
}
- return true;
+ return Error::success();
}
diff --git a/contrib/llvm/lib/Passes/PassRegistry.def b/contrib/llvm/lib/Passes/PassRegistry.def
index 6ae93a476968..771d2f5b212a 100644
--- a/contrib/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm/lib/Passes/PassRegistry.def
@@ -24,8 +24,10 @@ MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis())
MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis())
MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis())
MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis())
+MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
MODULE_ANALYSIS("verify", VerifierAnalysis())
+MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#ifndef MODULE_ALIAS_ANALYSIS
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@@ -40,6 +42,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
#endif
MODULE_PASS("always-inline", AlwaysInlinerPass())
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
+MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
MODULE_PASS("cg-profile", CGProfilePass())
MODULE_PASS("constmerge", ConstantMergePass())
MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
@@ -50,6 +53,7 @@ MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globaldce", GlobalDCEPass())
MODULE_PASS("globalopt", GlobalOptPass())
MODULE_PASS("globalsplit", GlobalSplitPass())
+MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
MODULE_PASS("instrprof", InstrProfiling())
@@ -69,6 +73,7 @@ MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs()))
MODULE_PASS("print", PrintModulePass(dbgs()))
MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs()))
MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs()))
+MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs()))
MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC())
MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
@@ -84,6 +89,7 @@ MODULE_PASS("verify", VerifierPass())
#endif
CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis())
CGSCC_ANALYSIS("fam-proxy", FunctionAnalysisManagerCGSCCProxy())
+CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#undef CGSCC_ANALYSIS
#ifndef CGSCC_PASS
@@ -117,10 +123,12 @@ FUNCTION_ANALYSIS("regions", RegionInfoAnalysis())
FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
+FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
FUNCTION_ANALYSIS("targetir",
TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
FUNCTION_ANALYSIS("verify", VerifierAnalysis())
+FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#ifndef FUNCTION_ALIAS_ANALYSIS
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@@ -148,6 +156,7 @@ FUNCTION_PASS("bounds-checking", BoundsCheckingPass())
FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
FUNCTION_PASS("consthoist", ConstantHoistingPass())
+FUNCTION_PASS("chr", ControlHeightReductionPass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
FUNCTION_PASS("dce", DCEPass())
FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
@@ -157,6 +166,7 @@ FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false))
+FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true))
FUNCTION_PASS("gvn-hoist", GVNHoistPass())
FUNCTION_PASS("instcombine", InstCombinePass())
@@ -170,6 +180,7 @@ FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("gvn", GVN())
+FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
FUNCTION_PASS("loop-sink", LoopSinkPass())
FUNCTION_PASS("lowerinvoke", LowerInvokePass())
@@ -190,6 +201,7 @@ FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
+FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs()))
@@ -199,7 +211,9 @@ FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
+FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs()))
FUNCTION_PASS("reassociate", ReassociatePass())
+FUNCTION_PASS("scalarizer", ScalarizerPass())
FUNCTION_PASS("sccp", SCCPPass())
FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
FUNCTION_PASS("sink", SinkingPass())
@@ -209,7 +223,6 @@ FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
FUNCTION_PASS("sroa", SROA())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
-FUNCTION_PASS("unroll", LoopUnrollPass())
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())
@@ -217,14 +230,26 @@ FUNCTION_PASS("verify<memoryssa>", MemorySSAVerifierPass())
FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass())
FUNCTION_PASS("view-cfg", CFGViewerPass())
FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
+FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
+FUNCTION_PASS("msan", MemorySanitizerPass())
+FUNCTION_PASS("tsan", ThreadSanitizerPass())
#undef FUNCTION_PASS
+#ifndef FUNCTION_PASS_WITH_PARAMS
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#endif
+FUNCTION_PASS_WITH_PARAMS("unroll",
+ [](LoopUnrollOptions Opts) { return LoopUnrollPass(Opts); },
+ parseLoopUnrollOptions)
+#undef FUNCTION_PASS_WITH_PARAMS
+
#ifndef LOOP_ANALYSIS
#define LOOP_ANALYSIS(NAME, CREATE_PASS)
#endif
LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
LOOP_ANALYSIS("access-info", LoopAccessAnalysis())
LOOP_ANALYSIS("ivusers", IVUsersAnalysis())
+LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#undef LOOP_ANALYSIS
#ifndef LOOP_PASS
diff --git a/contrib/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm/lib/Passes/StandardInstrumentations.cpp
new file mode 100644
index 000000000000..a1dfc39d472c
--- /dev/null
+++ b/contrib/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -0,0 +1,243 @@
+//===- Standard pass instrumentations handling ----------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines IR-printing pass instrumentation callbacks as well as
+/// StandardInstrumentations class that manages standard pass instrumentations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Passes/StandardInstrumentations.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extracting Module out of \p IR unit. Also fills a textual description
+/// of \p IR for use in header when printing.
+Optional<std::pair<const Module *, std::string>> unwrapModule(Any IR) {
+ if (any_isa<const Module *>(IR))
+ return std::make_pair(any_cast<const Module *>(IR), std::string());
+
+ if (any_isa<const Function *>(IR)) {
+ const Function *F = any_cast<const Function *>(IR);
+ if (!llvm::isFunctionInPrintList(F->getName()))
+ return None;
+ const Module *M = F->getParent();
+ return std::make_pair(M, formatv(" (function: {0})", F->getName()).str());
+ }
+
+ if (any_isa<const LazyCallGraph::SCC *>(IR)) {
+ const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR);
+ for (const LazyCallGraph::Node &N : *C) {
+ const Function &F = N.getFunction();
+ if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) {
+ const Module *M = F.getParent();
+ return std::make_pair(M, formatv(" (scc: {0})", C->getName()).str());
+ }
+ }
+ return None;
+ }
+
+ if (any_isa<const Loop *>(IR)) {
+ const Loop *L = any_cast<const Loop *>(IR);
+ const Function *F = L->getHeader()->getParent();
+ if (!isFunctionInPrintList(F->getName()))
+ return None;
+ const Module *M = F->getParent();
+ std::string LoopName;
+ raw_string_ostream ss(LoopName);
+ L->getHeader()->printAsOperand(ss, false);
+ return std::make_pair(M, formatv(" (loop: {0})", ss.str()).str());
+ }
+
+ llvm_unreachable("Unknown IR unit");
+}
+
+void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef()) {
+ dbgs() << Banner << Extra << "\n";
+ M->print(dbgs(), nullptr, false);
+}
+void printIR(const Function *F, StringRef Banner,
+ StringRef Extra = StringRef()) {
+ if (!llvm::isFunctionInPrintList(F->getName()))
+ return;
+ dbgs() << Banner << Extra << "\n" << static_cast<const Value &>(*F);
+}
+void printIR(const LazyCallGraph::SCC *C, StringRef Banner,
+ StringRef Extra = StringRef()) {
+ bool BannerPrinted = false;
+ for (const LazyCallGraph::Node &N : *C) {
+ const Function &F = N.getFunction();
+ if (!F.isDeclaration() && llvm::isFunctionInPrintList(F.getName())) {
+ if (!BannerPrinted) {
+ dbgs() << Banner << Extra << "\n";
+ BannerPrinted = true;
+ }
+ F.print(dbgs());
+ }
+ }
+}
+void printIR(const Loop *L, StringRef Banner) {
+ const Function *F = L->getHeader()->getParent();
+ if (!llvm::isFunctionInPrintList(F->getName()))
+ return;
+ llvm::printLoop(const_cast<Loop &>(*L), dbgs(), Banner);
+}
+
+/// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into
+/// llvm::Any and does actual print job.
+void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false) {
+ if (ForceModule) {
+ if (auto UnwrappedModule = unwrapModule(IR))
+ printIR(UnwrappedModule->first, Banner, UnwrappedModule->second);
+ return;
+ }
+
+ if (any_isa<const Module *>(IR)) {
+ const Module *M = any_cast<const Module *>(IR);
+ assert(M && "module should be valid for printing");
+ printIR(M, Banner);
+ return;
+ }
+
+ if (any_isa<const Function *>(IR)) {
+ const Function *F = any_cast<const Function *>(IR);
+ assert(F && "function should be valid for printing");
+ printIR(F, Banner);
+ return;
+ }
+
+ if (any_isa<const LazyCallGraph::SCC *>(IR)) {
+ const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR);
+ assert(C && "scc should be valid for printing");
+ std::string Extra = formatv(" (scc: {0})", C->getName());
+ printIR(C, Banner, Extra);
+ return;
+ }
+
+ if (any_isa<const Loop *>(IR)) {
+ const Loop *L = any_cast<const Loop *>(IR);
+ assert(L && "Loop should be valid for printing");
+ printIR(L, Banner);
+ return;
+ }
+ llvm_unreachable("Unknown wrapped IR type");
+}
+
+} // namespace
+
+PrintIRInstrumentation::~PrintIRInstrumentation() {
+ assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit");
+}
+
+void PrintIRInstrumentation::pushModuleDesc(StringRef PassID, Any IR) {
+ assert(StoreModuleDesc);
+ const Module *M = nullptr;
+ std::string Extra;
+ if (auto UnwrappedModule = unwrapModule(IR))
+ std::tie(M, Extra) = UnwrappedModule.getValue();
+ ModuleDescStack.emplace_back(M, Extra, PassID);
+}
+
+PrintIRInstrumentation::PrintModuleDesc
+PrintIRInstrumentation::popModuleDesc(StringRef PassID) {
+ assert(!ModuleDescStack.empty() && "empty ModuleDescStack");
+ PrintModuleDesc ModuleDesc = ModuleDescStack.pop_back_val();
+ assert(std::get<2>(ModuleDesc).equals(PassID) && "malformed ModuleDescStack");
+ return ModuleDesc;
+}
+
+bool PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
+ if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
+ return true;
+
+ // Saving Module for AfterPassInvalidated operations.
+ // Note: here we rely on a fact that we do not change modules while
+ // traversing the pipeline, so the latest captured module is good
+ // for all print operations that has not happen yet.
+ if (StoreModuleDesc && llvm::shouldPrintAfterPass(PassID))
+ pushModuleDesc(PassID, IR);
+
+ if (!llvm::shouldPrintBeforePass(PassID))
+ return true;
+
+ SmallString<20> Banner = formatv("*** IR Dump Before {0} ***", PassID);
+ unwrapAndPrint(IR, Banner, llvm::forcePrintModuleIR());
+ return true;
+}
+
+void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
+ if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
+ return;
+
+ if (!llvm::shouldPrintAfterPass(PassID))
+ return;
+
+ if (StoreModuleDesc)
+ popModuleDesc(PassID);
+
+ SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID);
+ unwrapAndPrint(IR, Banner, llvm::forcePrintModuleIR());
+}
+
+void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
+ if (!StoreModuleDesc || !llvm::shouldPrintAfterPass(PassID))
+ return;
+
+ if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
+ return;
+
+ const Module *M;
+ std::string Extra;
+ StringRef StoredPassID;
+ std::tie(M, Extra, StoredPassID) = popModuleDesc(PassID);
+ // Additional filtering (e.g. -filter-print-func) can lead to module
+ // printing being skipped.
+ if (!M)
+ return;
+
+ SmallString<20> Banner =
+ formatv("*** IR Dump After {0} *** invalidated: ", PassID);
+ printIR(M, Banner, Extra);
+}
+
+void PrintIRInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ // BeforePass callback is not just for printing, it also saves a Module
+ // for later use in AfterPassInvalidated.
+ StoreModuleDesc = llvm::forcePrintModuleIR() && llvm::shouldPrintAfterPass();
+ if (llvm::shouldPrintBeforePass() || StoreModuleDesc)
+ PIC.registerBeforePassCallback(
+ [this](StringRef P, Any IR) { return this->printBeforePass(P, IR); });
+
+ if (llvm::shouldPrintAfterPass()) {
+ PIC.registerAfterPassCallback(
+ [this](StringRef P, Any IR) { this->printAfterPass(P, IR); });
+ PIC.registerAfterPassInvalidatedCallback(
+ [this](StringRef P) { this->printAfterPassInvalidated(P); });
+ }
+}
+
+void StandardInstrumentations::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ PrintIR.registerCallbacks(PIC);
+ TimePasses.registerCallbacks(PIC);
+}
diff --git a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index b3c2b182e76c..b2dde3406a63 100644
--- a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -83,7 +83,7 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
return Counter::getZero();
// Group the terms by counter ID.
- llvm::sort(Terms.begin(), Terms.end(), [](const Term &LHS, const Term &RHS) {
+ llvm::sort(Terms, [](const Term &LHS, const Term &RHS) {
return LHS.CounterID < RHS.CounterID;
});
@@ -207,12 +207,6 @@ Error CoverageMapping::loadFunctionRecord(
else
OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]);
- // Don't load records for (filenames, function) pairs we've already seen.
- auto FilenamesHash = hash_combine_range(Record.Filenames.begin(),
- Record.Filenames.end());
- if (!RecordProvenance[FilenamesHash].insert(hash_value(OrigFuncName)).second)
- return Error::success();
-
CounterMappingContext Ctx(Record.Expressions);
std::vector<uint64_t> Counts;
@@ -230,6 +224,15 @@ Error CoverageMapping::loadFunctionRecord(
assert(!Record.MappingRegions.empty() && "Function has no regions");
+ // This coverage record is a zero region for a function that's unused in
+ // some TU, but used in a different TU. Ignore it. The coverage maps from the
+ // the other TU will either be loaded (providing full region counts) or they
+ // won't (in which case we don't unintuitively report functions as uncovered
+ // when they have non-zero counts in the profile).
+ if (Record.MappingRegions.size() == 1 &&
+ Record.MappingRegions[0].Count.isZero() && Counts[0] > 0)
+ return Error::success();
+
FunctionRecord Function(OrigFuncName, Record.Filenames);
for (const auto &Region : Record.MappingRegions) {
Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
@@ -239,11 +242,12 @@ Error CoverageMapping::loadFunctionRecord(
}
Function.pushRegion(Region, *ExecutionCount);
}
- if (Function.CountedRegions.size() != Record.MappingRegions.size()) {
- FuncCounterMismatches.emplace_back(Record.FunctionName,
- Function.CountedRegions.size());
+
+ // Don't create records for (filenames, function) pairs we've already seen.
+ auto FilenamesHash = hash_combine_range(Record.Filenames.begin(),
+ Record.Filenames.end());
+ if (!RecordProvenance[FilenamesHash].insert(hash_value(OrigFuncName)).second)
return Error::success();
- }
Functions.push_back(std::move(Function));
return Error::success();
@@ -459,8 +463,7 @@ class SegmentBuilder {
/// Sort a nested sequence of regions from a single file.
static void sortNestedRegions(MutableArrayRef<CountedRegion> Regions) {
- llvm::sort(Regions.begin(), Regions.end(), [](const CountedRegion &LHS,
- const CountedRegion &RHS) {
+ llvm::sort(Regions, [](const CountedRegion &LHS, const CountedRegion &RHS) {
if (LHS.startLoc() != RHS.startLoc())
return LHS.startLoc() < RHS.startLoc();
if (LHS.endLoc() != RHS.endLoc())
@@ -557,7 +560,7 @@ std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
for (const auto &Function : getCoveredFunctions())
Filenames.insert(Filenames.end(), Function.Filenames.begin(),
Function.Filenames.end());
- llvm::sort(Filenames.begin(), Filenames.end());
+ llvm::sort(Filenames);
auto Last = std::unique(Filenames.begin(), Filenames.end());
Filenames.erase(Last, Filenames.end());
return Filenames;
diff --git a/contrib/llvm/lib/ProfileData/GCOV.cpp b/contrib/llvm/lib/ProfileData/GCOV.cpp
index c9155439ec46..b687346a2c05 100644
--- a/contrib/llvm/lib/ProfileData/GCOV.cpp
+++ b/contrib/llvm/lib/ProfileData/GCOV.cpp
@@ -111,9 +111,7 @@ void GCOVFile::print(raw_ostream &OS) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dump - Dump GCOVFile content to dbgs() for debugging purposes.
-LLVM_DUMP_METHOD void GCOVFile::dump() const {
- print(dbgs());
-}
+LLVM_DUMP_METHOD void GCOVFile::dump() const { print(dbgs()); }
#endif
/// collectLineCounts - Collect line counts. This must be used after
@@ -359,9 +357,7 @@ void GCOVFunction::print(raw_ostream &OS) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dump - Dump GCOVFunction content to dbgs() for debugging purposes.
-LLVM_DUMP_METHOD void GCOVFunction::dump() const {
- print(dbgs());
-}
+LLVM_DUMP_METHOD void GCOVFunction::dump() const { print(dbgs()); }
#endif
/// collectLineCounts - Collect line counts. This must be used after
@@ -437,12 +433,135 @@ void GCOVBlock::print(raw_ostream &OS) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dump - Dump GCOVBlock content to dbgs() for debugging purposes.
-LLVM_DUMP_METHOD void GCOVBlock::dump() const {
- print(dbgs());
-}
+LLVM_DUMP_METHOD void GCOVBlock::dump() const { print(dbgs()); }
#endif
//===----------------------------------------------------------------------===//
+// Cycles detection
+//
+// The algorithm in GCC is based on the algorihtm by Hawick & James:
+// "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs"
+// http://complexity.massey.ac.nz/cstn/013/cstn-013.pdf.
+
+/// Get the count for the detected cycle.
+uint64_t GCOVBlock::getCycleCount(const Edges &Path) {
+ uint64_t CycleCount = std::numeric_limits<uint64_t>::max();
+ for (auto E : Path) {
+ CycleCount = std::min(E->CyclesCount, CycleCount);
+ }
+ for (auto E : Path) {
+ E->CyclesCount -= CycleCount;
+ }
+ return CycleCount;
+}
+
+/// Unblock a vertex previously marked as blocked.
+void GCOVBlock::unblock(const GCOVBlock *U, BlockVector &Blocked,
+ BlockVectorLists &BlockLists) {
+ auto it = find(Blocked, U);
+ if (it == Blocked.end()) {
+ return;
+ }
+
+ const size_t index = it - Blocked.begin();
+ Blocked.erase(it);
+
+ const BlockVector ToUnblock(BlockLists[index]);
+ BlockLists.erase(BlockLists.begin() + index);
+ for (auto GB : ToUnblock) {
+ GCOVBlock::unblock(GB, Blocked, BlockLists);
+ }
+}
+
+bool GCOVBlock::lookForCircuit(const GCOVBlock *V, const GCOVBlock *Start,
+ Edges &Path, BlockVector &Blocked,
+ BlockVectorLists &BlockLists,
+ const BlockVector &Blocks, uint64_t &Count) {
+ Blocked.push_back(V);
+ BlockLists.emplace_back(BlockVector());
+ bool FoundCircuit = false;
+
+ for (auto E : V->dsts()) {
+ const GCOVBlock *W = &E->Dst;
+ if (W < Start || find(Blocks, W) == Blocks.end()) {
+ continue;
+ }
+
+ Path.push_back(E);
+
+ if (W == Start) {
+ // We've a cycle.
+ Count += GCOVBlock::getCycleCount(Path);
+ FoundCircuit = true;
+ } else if (find(Blocked, W) == Blocked.end() && // W is not blocked.
+ GCOVBlock::lookForCircuit(W, Start, Path, Blocked, BlockLists,
+ Blocks, Count)) {
+ FoundCircuit = true;
+ }
+
+ Path.pop_back();
+ }
+
+ if (FoundCircuit) {
+ GCOVBlock::unblock(V, Blocked, BlockLists);
+ } else {
+ for (auto E : V->dsts()) {
+ const GCOVBlock *W = &E->Dst;
+ if (W < Start || find(Blocks, W) == Blocks.end()) {
+ continue;
+ }
+ const size_t index = find(Blocked, W) - Blocked.begin();
+ BlockVector &List = BlockLists[index];
+ if (find(List, V) == List.end()) {
+ List.push_back(V);
+ }
+ }
+ }
+
+ return FoundCircuit;
+}
+
+/// Get the count for the list of blocks which lie on the same line.
+void GCOVBlock::getCyclesCount(const BlockVector &Blocks, uint64_t &Count) {
+ for (auto Block : Blocks) {
+ Edges Path;
+ BlockVector Blocked;
+ BlockVectorLists BlockLists;
+
+ GCOVBlock::lookForCircuit(Block, Block, Path, Blocked, BlockLists, Blocks,
+ Count);
+ }
+}
+
+/// Get the count for the list of blocks which lie on the same line.
+uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) {
+ uint64_t Count = 0;
+
+ for (auto Block : Blocks) {
+ if (Block->getNumSrcEdges() == 0) {
+ // The block has no predecessors and a non-null counter
+ // (can be the case with entry block in functions).
+ Count += Block->getCount();
+ } else {
+ // Add counts from predecessors that are not on the same line.
+ for (auto E : Block->srcs()) {
+ const GCOVBlock *W = &E->Src;
+ if (find(Blocks, W) == Blocks.end()) {
+ Count += E->Count;
+ }
+ }
+ }
+ for (auto E : Block->dsts()) {
+ E->CyclesCount = E->Count;
+ }
+ }
+
+ GCOVBlock::getCyclesCount(Blocks, Count);
+
+ return Count;
+}
+
+//===----------------------------------------------------------------------===//
// FileInfo implementation.
// Safe integer division, returns 0 if numerator is 0.
@@ -578,8 +697,8 @@ FileInfo::openCoveragePath(StringRef CoveragePath) {
return llvm::make_unique<raw_null_ostream>();
std::error_code EC;
- auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath, EC,
- sys::fs::F_Text);
+ auto OS =
+ llvm::make_unique<raw_fd_ostream>(CoveragePath, EC, sys::fs::F_Text);
if (EC) {
errs() << EC.message() << "\n";
return llvm::make_unique<raw_null_ostream>();
@@ -593,7 +712,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
SmallVector<StringRef, 4> Filenames;
for (const auto &LI : LineInfo)
Filenames.push_back(LI.first());
- llvm::sort(Filenames.begin(), Filenames.end());
+ llvm::sort(Filenames);
for (StringRef Filename : Filenames) {
auto AllLines = LineConsumer(Filename);
@@ -628,17 +747,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
// Add up the block counts to form line counts.
DenseMap<const GCOVFunction *, bool> LineExecs;
- uint64_t LineCount = 0;
for (const GCOVBlock *Block : Blocks) {
- if (Options.AllBlocks) {
- // Only take the highest block count for that line.
- uint64_t BlockCount = Block->getCount();
- LineCount = LineCount > BlockCount ? LineCount : BlockCount;
- } else {
- // Sum up all of the block counts.
- LineCount += Block->getCount();
- }
-
if (Options.FuncCoverage) {
// This is a slightly convoluted way to most accurately gather line
// statistics for functions. Basically what is happening is that we
@@ -674,6 +783,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
}
}
+ const uint64_t LineCount = GCOVBlock::getLineCount(Blocks);
if (LineCount == 0)
CovOS << " #####:";
else {
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index 544a77ec20a5..aaa8000ff2f9 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -252,11 +252,12 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
// data, its original linkage must be non-internal.
std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
if (!InLTO) {
- StringRef FileName = (StaticFuncFullModulePrefix
- ? F.getParent()->getName()
- : sys::path::filename(F.getParent()->getName()));
- if (StaticFuncFullModulePrefix && StaticFuncStripDirNamePrefix != 0)
- FileName = stripDirPrefix(FileName, StaticFuncStripDirNamePrefix);
+ StringRef FileName(F.getParent()->getSourceFileName());
+ uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
+ if (StripLevel < StaticFuncStripDirNamePrefix)
+ StripLevel = StaticFuncStripDirNamePrefix;
+ if (StripLevel)
+ FileName = stripDirPrefix(FileName, StripLevel);
return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
}
diff --git a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
index 3b704158a5c5..eaf0eb04bfbf 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -14,6 +14,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
@@ -23,6 +24,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SymbolRemappingReader.h"
#include "llvm/Support/SwapByteOrder.h"
#include <algorithm>
#include <cctype>
@@ -88,16 +90,29 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
}
Expected<std::unique_ptr<IndexedInstrProfReader>>
-IndexedInstrProfReader::create(const Twine &Path) {
+IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
// Set up the buffer to read.
auto BufferOrError = setupMemoryBuffer(Path);
if (Error E = BufferOrError.takeError())
return std::move(E);
- return IndexedInstrProfReader::create(std::move(BufferOrError.get()));
+
+ // Set up the remapping buffer if requested.
+ std::unique_ptr<MemoryBuffer> RemappingBuffer;
+ std::string RemappingPathStr = RemappingPath.str();
+ if (!RemappingPathStr.empty()) {
+ auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
+ if (Error E = RemappingBufferOrError.takeError())
+ return std::move(E);
+ RemappingBuffer = std::move(RemappingBufferOrError.get());
+ }
+
+ return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
+ std::move(RemappingBuffer));
}
Expected<std::unique_ptr<IndexedInstrProfReader>>
-IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
+IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
+ std::unique_ptr<MemoryBuffer> RemappingBuffer) {
// Sanity check the buffer.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
return make_error<InstrProfError>(instrprof_error::too_large);
@@ -105,7 +120,8 @@ IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
// Create the reader.
if (!IndexedInstrProfReader::hasFormat(*Buffer))
return make_error<InstrProfError>(instrprof_error::bad_magic);
- auto Result = llvm::make_unique<IndexedInstrProfReader>(std::move(Buffer));
+ auto Result = llvm::make_unique<IndexedInstrProfReader>(
+ std::move(Buffer), std::move(RemappingBuffer));
// Initialize the reader and return the result.
if (Error E = initializeReader(*Result))
@@ -587,6 +603,124 @@ InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
RecordIterator = HashTable->data_begin();
}
+namespace {
+/// A remapper that does not apply any remappings.
+class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
+ InstrProfReaderIndexBase &Underlying;
+
+public:
+ InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
+ : Underlying(Underlying) {}
+
+ Error getRecords(StringRef FuncName,
+ ArrayRef<NamedInstrProfRecord> &Data) override {
+ return Underlying.getRecords(FuncName, Data);
+ }
+};
+}
+
+/// A remapper that applies remappings based on a symbol remapping file.
+template <typename HashTableImpl>
+class llvm::InstrProfReaderItaniumRemapper
+ : public InstrProfReaderRemapper {
+public:
+ InstrProfReaderItaniumRemapper(
+ std::unique_ptr<MemoryBuffer> RemapBuffer,
+ InstrProfReaderIndex<HashTableImpl> &Underlying)
+ : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
+ }
+
+ /// Extract the original function name from a PGO function name.
+ static StringRef extractName(StringRef Name) {
+ // We can have multiple :-separated pieces; there can be pieces both
+ // before and after the mangled name. Find the first part that starts
+ // with '_Z'; we'll assume that's the mangled name we want.
+ std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
+ while (true) {
+ Parts = Parts.second.split(':');
+ if (Parts.first.startswith("_Z"))
+ return Parts.first;
+ if (Parts.second.empty())
+ return Name;
+ }
+ }
+
+ /// Given a mangled name extracted from a PGO function name, and a new
+ /// form for that mangled name, reconstitute the name.
+ static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
+ StringRef Replacement,
+ SmallVectorImpl<char> &Out) {
+ Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
+ Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
+ Out.insert(Out.end(), Replacement.begin(), Replacement.end());
+ Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
+ }
+
+ Error populateRemappings() override {
+ if (Error E = Remappings.read(*RemapBuffer))
+ return E;
+ for (StringRef Name : Underlying.HashTable->keys()) {
+ StringRef RealName = extractName(Name);
+ if (auto Key = Remappings.insert(RealName)) {
+ // FIXME: We could theoretically map the same equivalence class to
+ // multiple names in the profile data. If that happens, we should
+ // return NamedInstrProfRecords from all of them.
+ MappedNames.insert({Key, RealName});
+ }
+ }
+ return Error::success();
+ }
+
+ Error getRecords(StringRef FuncName,
+ ArrayRef<NamedInstrProfRecord> &Data) override {
+ StringRef RealName = extractName(FuncName);
+ if (auto Key = Remappings.lookup(RealName)) {
+ StringRef Remapped = MappedNames.lookup(Key);
+ if (!Remapped.empty()) {
+ if (RealName.begin() == FuncName.begin() &&
+ RealName.end() == FuncName.end())
+ FuncName = Remapped;
+ else {
+ // Try rebuilding the name from the given remapping.
+ SmallString<256> Reconstituted;
+ reconstituteName(FuncName, RealName, Remapped, Reconstituted);
+ Error E = Underlying.getRecords(Reconstituted, Data);
+ if (!E)
+ return E;
+
+ // If we failed because the name doesn't exist, fall back to asking
+ // about the original name.
+ if (Error Unhandled = handleErrors(
+ std::move(E), [](std::unique_ptr<InstrProfError> Err) {
+ return Err->get() == instrprof_error::unknown_function
+ ? Error::success()
+ : Error(std::move(Err));
+ }))
+ return Unhandled;
+ }
+ }
+ }
+ return Underlying.getRecords(FuncName, Data);
+ }
+
+private:
+ /// The memory buffer containing the remapping configuration. Remappings
+ /// holds pointers into this buffer.
+ std::unique_ptr<MemoryBuffer> RemapBuffer;
+
+ /// The mangling remapper.
+ SymbolRemappingReader Remappings;
+
+ /// Mapping from mangled name keys to the name used for the key in the
+ /// profile data.
+ /// FIXME: Can we store a location within the on-disk hash table instead of
+ /// redoing lookup?
+ DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
+
+ /// The real profile data reader.
+ InstrProfReaderIndex<HashTableImpl> &Underlying;
+};
+
bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
using namespace support;
@@ -683,10 +817,22 @@ Error IndexedInstrProfReader::readHeader() {
uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
// The rest of the file is an on disk hash table.
- InstrProfReaderIndexBase *IndexPtr = nullptr;
- IndexPtr = new InstrProfReaderIndex<OnDiskHashTableImplV3>(
- Start + HashOffset, Cur, Start, HashType, FormatVersion);
- Index.reset(IndexPtr);
+ auto IndexPtr =
+ llvm::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
+ Start + HashOffset, Cur, Start, HashType, FormatVersion);
+
+ // Load the remapping table now if requested.
+ if (RemappingBuffer) {
+ Remapper = llvm::make_unique<
+ InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
+ std::move(RemappingBuffer), *IndexPtr);
+ if (Error E = Remapper->populateRemappings())
+ return E;
+ } else {
+ Remapper = llvm::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
+ }
+ Index = std::move(IndexPtr);
+
return success();
}
@@ -707,7 +853,7 @@ Expected<InstrProfRecord>
IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash) {
ArrayRef<NamedInstrProfRecord> Data;
- Error Err = Index->getRecords(FuncName, Data);
+ Error Err = Remapper->getRecords(FuncName, Data);
if (Err)
return std::move(Err);
// Found it. Look for counters with the right hash.
diff --git a/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index 62f00d693c68..3a8462fd9b0d 100644
--- a/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -58,7 +58,7 @@ void SampleProfileSummaryBuilder::addRecord(
void ProfileSummaryBuilder::computeDetailedSummary() {
if (DetailedSummaryCutoffs.empty())
return;
- llvm::sort(DetailedSummaryCutoffs.begin(), DetailedSummaryCutoffs.end());
+ llvm::sort(DetailedSummaryCutoffs);
auto Iter = CountFrequencies.begin();
const auto End = CountFrequencies.end();
diff --git a/contrib/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm/lib/ProfileData/SampleProf.cpp
index 30438ba7962a..1a124415f179 100644
--- a/contrib/llvm/lib/ProfileData/SampleProf.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProf.cpp
@@ -26,6 +26,14 @@
using namespace llvm;
using namespace sampleprof;
+namespace llvm {
+namespace sampleprof {
+SampleProfileFormat FunctionSamples::Format;
+DenseMap<uint64_t, StringRef> FunctionSamples::GUIDToFuncNameMap;
+Module *FunctionSamples::CurrentModule;
+} // namespace sampleprof
+} // namespace llvm
+
namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
@@ -59,6 +67,8 @@ class SampleProfErrorCategoryType : public std::error_category {
return "Unimplemented feature";
case sampleprof_error::counter_overflow:
return "Counter overflow";
+ case sampleprof_error::ostream_seek_unsupported:
+ return "Ostream does not support seek";
}
llvm_unreachable("A value of sampleprof_error has no message.");
}
diff --git a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
index 79335e67cd98..a68d1e9d3ab0 100644
--- a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MD5.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -320,6 +321,21 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
}
template <typename T>
+ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
+ std::error_code EC;
+
+ if (Data + sizeof(T) > End) {
+ EC = sampleprof_error::truncated;
+ reportError(0, EC.message());
+ return EC;
+ }
+
+ using namespace support;
+ T Val = endian::readNext<T, little, unaligned>(Data);
+ return Val;
+}
+
+template <typename T>
inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
std::error_code EC;
auto Idx = readNumber<uint32_t>();
@@ -423,29 +439,51 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderBinary::read() {
- while (!at_eof()) {
- auto NumHeadSamples = readNumber<uint64_t>();
- if (std::error_code EC = NumHeadSamples.getError())
- return EC;
+std::error_code SampleProfileReaderBinary::readFuncProfile() {
+ auto NumHeadSamples = readNumber<uint64_t>();
+ if (std::error_code EC = NumHeadSamples.getError())
+ return EC;
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
- return EC;
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
- Profiles[*FName] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[*FName];
- FProfile.setName(*FName);
+ Profiles[*FName] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[*FName];
+ FProfile.setName(*FName);
- FProfile.addHeadSamples(*NumHeadSamples);
+ FProfile.addHeadSamples(*NumHeadSamples);
+
+ if (std::error_code EC = readProfile(FProfile))
+ return EC;
+ return sampleprof_error::success;
+}
- if (std::error_code EC = readProfile(FProfile))
+std::error_code SampleProfileReaderBinary::read() {
+ while (!at_eof()) {
+ if (std::error_code EC = readFuncProfile())
return EC;
}
return sampleprof_error::success;
}
+std::error_code SampleProfileReaderCompactBinary::read() {
+ for (auto Name : FuncsToUse) {
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *SavedData = Data;
+ Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
+ iter->second;
+ if (std::error_code EC = readFuncProfile())
+ return EC;
+ Data = SavedData;
+ }
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
if (Magic == SPMagic())
return sampleprof_error::success;
@@ -514,6 +552,53 @@ std::error_code SampleProfileReaderBinary::readHeader() {
return sampleprof_error::success;
}
+std::error_code SampleProfileReaderCompactBinary::readHeader() {
+ SampleProfileReaderBinary::readHeader();
+ if (std::error_code EC = readFuncOffsetTable())
+ return EC;
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
+ auto TableOffset = readUnencodedNumber<uint64_t>();
+ if (std::error_code EC = TableOffset.getError())
+ return EC;
+
+ const uint8_t *SavedData = Data;
+ const uint8_t *TableStart =
+ reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
+ *TableOffset;
+ Data = TableStart;
+
+ auto Size = readNumber<uint64_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+
+ FuncOffsetTable.reserve(*Size);
+ for (uint32_t I = 0; I < *Size; ++I) {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+
+ auto Offset = readNumber<uint64_t>();
+ if (std::error_code EC = Offset.getError())
+ return EC;
+
+ FuncOffsetTable[*FName] = *Offset;
+ }
+ End = TableStart;
+ Data = SavedData;
+ return sampleprof_error::success;
+}
+
+void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
+ FuncsToUse.clear();
+ for (auto &F : M) {
+ StringRef Fname = F.getName().split('.').first;
+ FuncsToUse.insert(Fname);
+ }
+}
+
std::error_code SampleProfileReaderBinary::readSummaryEntry(
std::vector<ProfileSummaryEntry> &Entries) {
auto Cutoff = readNumber<uint64_t>();
@@ -827,6 +912,40 @@ bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
return Magic == "adcg*704";
}
+std::error_code SampleProfileReaderItaniumRemapper::read() {
+ // If the underlying data is in compact format, we can't remap it because
+ // we don't know what the original function names were.
+ if (getFormat() == SPF_Compact_Binary) {
+ Ctx.diagnose(DiagnosticInfoSampleProfile(
+ Buffer->getBufferIdentifier(),
+ "Profile data remapping cannot be applied to profile data "
+ "in compact format (original mangled names are not available).",
+ DS_Warning));
+ return sampleprof_error::success;
+ }
+
+ if (Error E = Remappings.read(*Buffer)) {
+ handleAllErrors(
+ std::move(E), [&](const SymbolRemappingParseError &ParseError) {
+ reportError(ParseError.getLineNum(), ParseError.getMessage());
+ });
+ return sampleprof_error::malformed;
+ }
+
+ for (auto &Sample : getProfiles())
+ if (auto Key = Remappings.insert(Sample.first()))
+ SampleMap.insert({Key, &Sample.second});
+
+ return sampleprof_error::success;
+}
+
+FunctionSamples *
+SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname) {
+ if (auto Key = Remappings.lookup(Fname))
+ return SampleMap.lookup(Key);
+ return SampleProfileReader::getSamplesFor(Fname);
+}
+
/// Prepare a memory buffer for the contents of \p Filename.
///
/// \returns an error code indicating the status of the buffer.
@@ -859,6 +978,27 @@ SampleProfileReader::create(const Twine &Filename, LLVMContext &C) {
return create(BufferOrError.get(), C);
}
+/// Create a sample profile remapper from the given input, to remap the
+/// function names in the given profile data.
+///
+/// \param Filename The file to open.
+///
+/// \param C The LLVM context to use to emit diagnostics.
+///
+/// \param Underlying The underlying profile data reader to remap.
+///
+/// \returns an error code indicating the status of the created reader.
+ErrorOr<std::unique_ptr<SampleProfileReader>>
+SampleProfileReaderItaniumRemapper::create(
+ const Twine &Filename, LLVMContext &C,
+ std::unique_ptr<SampleProfileReader> Underlying) {
+ auto BufferOrError = setupMemoryBuffer(Filename);
+ if (std::error_code EC = BufferOrError.getError())
+ return EC;
+ return llvm::make_unique<SampleProfileReaderItaniumRemapper>(
+ std::move(BufferOrError.get()), C, std::move(Underlying));
+}
+
/// Create a sample profile reader based on the format of the input data.
///
/// \param B The memory buffer to create the reader from (assumes ownership).
@@ -880,6 +1020,7 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
else
return sampleprof_error::unrecognized_format;
+ FunctionSamples::Format = Reader->getFormat();
if (std::error_code EC = Reader->readHeader())
return EC;
diff --git a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
index b4de30118b8b..b1c669ec31c4 100644
--- a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -22,6 +22,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LEB128.h"
@@ -64,6 +66,15 @@ SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
return sampleprof_error::success;
}
+std::error_code SampleProfileWriterCompactBinary::write(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
+ return EC;
+ if (std::error_code EC = writeFuncOffsetTable())
+ return EC;
+ return sampleprof_error::success;
+}
+
/// Write samples to a text file.
///
/// Note: it may be tempting to implement this in terms of
@@ -168,6 +179,30 @@ std::error_code SampleProfileWriterRawBinary::writeNameTable() {
return sampleprof_error::success;
}
+std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
+ auto &OS = *OutputStream;
+
+ // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable.
+ auto &OFS = static_cast<raw_fd_ostream &>(OS);
+ uint64_t FuncOffsetTableStart = OS.tell();
+ if (OFS.seek(TableOffset) == (uint64_t)-1)
+ return sampleprof_error::ostream_seek_unsupported;
+ support::endian::Writer Writer(*OutputStream, support::little);
+ Writer.write(FuncOffsetTableStart);
+ if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1)
+ return sampleprof_error::ostream_seek_unsupported;
+
+ // Write out the table size.
+ encodeULEB128(FuncOffsetTable.size(), OS);
+
+ // Write out FuncOffsetTable.
+ for (auto entry : FuncOffsetTable) {
+ writeNameIdx(entry.first);
+ encodeULEB128(entry.second, OS);
+ }
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
auto &OS = *OutputStream;
std::set<StringRef> V;
@@ -215,6 +250,19 @@ std::error_code SampleProfileWriterBinary::writeHeader(
return sampleprof_error::success;
}
+std::error_code SampleProfileWriterCompactBinary::writeHeader(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ support::endian::Writer Writer(*OutputStream, support::little);
+ if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap))
+ return EC;
+
+ // Reserve a slot for the offset of function offset table. The slot will
+ // be populated with the offset of FuncOffsetTable later.
+ TableOffset = OutputStream->tell();
+ Writer.write(static_cast<uint64_t>(-2));
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileWriterBinary::writeSummary() {
auto &OS = *OutputStream;
encodeULEB128(Summary->getTotalCount(), OS);
@@ -283,6 +331,15 @@ std::error_code SampleProfileWriterBinary::write(const FunctionSamples &S) {
return writeBody(S);
}
+std::error_code
+SampleProfileWriterCompactBinary::write(const FunctionSamples &S) {
+ uint64_t Offset = OutputStream->tell();
+ StringRef Name = S.getName();
+ FuncOffsetTable[Name] = Offset;
+ encodeULEB128(S.getHeadSamples(), *OutputStream);
+ return writeBody(S);
+}
+
/// Create a sample profile file writer based on the specified format.
///
/// \param Filename The file to create.
diff --git a/contrib/llvm/lib/Support/AArch64TargetParser.cpp b/contrib/llvm/lib/Support/AArch64TargetParser.cpp
new file mode 100644
index 000000000000..e897137df680
--- /dev/null
+++ b/contrib/llvm/lib/Support/AArch64TargetParser.cpp
@@ -0,0 +1,206 @@
+//===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise AArch64 hardware features
+// such as FPU/CPU/ARCH and extension names.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/AArch64TargetParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <cctype>
+
+using namespace llvm;
+
+static unsigned checkArchVersion(llvm::StringRef Arch) {
+ if (Arch.size() >= 2 && Arch[0] == 'v' && std::isdigit(Arch[1]))
+ return (Arch[1] - 48);
+ return 0;
+}
+
+unsigned AArch64::getDefaultFPU(StringRef CPU, AArch64::ArchKind AK) {
+ if (CPU == "generic")
+ return AArch64ARCHNames[static_cast<unsigned>(AK)].DefaultFPU;
+
+ return StringSwitch<unsigned>(CPU)
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ .Case(NAME, ARM::DEFAULT_FPU)
+#include "../../include/llvm/Support/AArch64TargetParser.def"
+ .Default(ARM::FK_INVALID);
+}
+
+unsigned AArch64::getDefaultExtensions(StringRef CPU, AArch64::ArchKind AK) {
+ if (CPU == "generic")
+ return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
+
+ return StringSwitch<unsigned>(CPU)
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ .Case(NAME, AArch64ARCHNames[static_cast<unsigned>(ArchKind::ID)] \
+ .ArchBaseExtensions | \
+ DEFAULT_EXT)
+#include "../../include/llvm/Support/AArch64TargetParser.def"
+ .Default(AArch64::AEK_INVALID);
+}
+
+AArch64::ArchKind AArch64::getCPUArchKind(StringRef CPU) {
+ if (CPU == "generic")
+ return ArchKind::ARMV8A;
+
+ return StringSwitch<AArch64::ArchKind>(CPU)
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ .Case(NAME, ArchKind::ID)
+#include "../../include/llvm/Support/AArch64TargetParser.def"
+ .Default(ArchKind::INVALID);
+}
+
+bool AArch64::getExtensionFeatures(unsigned Extensions,
+ std::vector<StringRef> &Features) {
+ if (Extensions == AArch64::AEK_INVALID)
+ return false;
+
+ if (Extensions & AEK_FP)
+ Features.push_back("+fp-armv8");
+ if (Extensions & AEK_SIMD)
+ Features.push_back("+neon");
+ if (Extensions & AEK_CRC)
+ Features.push_back("+crc");
+ if (Extensions & AEK_CRYPTO)
+ Features.push_back("+crypto");
+ if (Extensions & AEK_DOTPROD)
+ Features.push_back("+dotprod");
+ if (Extensions & AEK_FP16FML)
+ Features.push_back("+fp16fml");
+ if (Extensions & AEK_FP16)
+ Features.push_back("+fullfp16");
+ if (Extensions & AEK_PROFILE)
+ Features.push_back("+spe");
+ if (Extensions & AEK_RAS)
+ Features.push_back("+ras");
+ if (Extensions & AEK_LSE)
+ Features.push_back("+lse");
+ if (Extensions & AEK_RDM)
+ Features.push_back("+rdm");
+ if (Extensions & AEK_SVE)
+ Features.push_back("+sve");
+ if (Extensions & AEK_RCPC)
+ Features.push_back("+rcpc");
+
+ return true;
+}
+
+bool AArch64::getArchFeatures(AArch64::ArchKind AK,
+ std::vector<StringRef> &Features) {
+ if (AK == ArchKind::ARMV8_1A)
+ Features.push_back("+v8.1a");
+ if (AK == ArchKind::ARMV8_2A)
+ Features.push_back("+v8.2a");
+ if (AK == ArchKind::ARMV8_3A)
+ Features.push_back("+v8.3a");
+ if (AK == ArchKind::ARMV8_4A)
+ Features.push_back("+v8.4a");
+ if (AK == ArchKind::ARMV8_5A)
+ Features.push_back("+v8.5a");
+
+ return AK != ArchKind::INVALID;
+}
+
+StringRef AArch64::getArchName(AArch64::ArchKind AK) {
+ return AArch64ARCHNames[static_cast<unsigned>(AK)].getName();
+}
+
+StringRef AArch64::getCPUAttr(AArch64::ArchKind AK) {
+ return AArch64ARCHNames[static_cast<unsigned>(AK)].getCPUAttr();
+}
+
+StringRef AArch64::getSubArch(AArch64::ArchKind AK) {
+ return AArch64ARCHNames[static_cast<unsigned>(AK)].getSubArch();
+}
+
+unsigned AArch64::getArchAttr(AArch64::ArchKind AK) {
+ return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchAttr;
+}
+
+StringRef AArch64::getArchExtName(unsigned ArchExtKind) {
+ for (const auto &AE : AArch64ARCHExtNames)
+ if (ArchExtKind == AE.ID)
+ return AE.getName();
+ return StringRef();
+}
+
+StringRef AArch64::getArchExtFeature(StringRef ArchExt) {
+ if (ArchExt.startswith("no")) {
+ StringRef ArchExtBase(ArchExt.substr(2));
+ for (const auto &AE : AArch64ARCHExtNames) {
+ if (AE.NegFeature && ArchExtBase == AE.getName())
+ return StringRef(AE.NegFeature);
+ }
+ }
+
+ for (const auto &AE : AArch64ARCHExtNames)
+ if (AE.Feature && ArchExt == AE.getName())
+ return StringRef(AE.Feature);
+ return StringRef();
+}
+
+StringRef AArch64::getDefaultCPU(StringRef Arch) {
+ ArchKind AK = parseArch(Arch);
+ if (AK == ArchKind::INVALID)
+ return StringRef();
+
+ // Look for multiple AKs to find the default for pair AK+Name.
+ for (const auto &CPU : AArch64CPUNames)
+ if (CPU.ArchID == AK && CPU.Default)
+ return CPU.getName();
+
+ // If we can't find a default then target the architecture instead
+ return "generic";
+}
+
+void AArch64::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
+ for (const auto &Arch : AArch64CPUNames) {
+ if (Arch.ArchID != ArchKind::INVALID)
+ Values.push_back(Arch.getName());
+ }
+}
+
+bool AArch64::isX18ReservedByDefault(const Triple &TT) {
+ return TT.isAndroid() || TT.isOSDarwin() || TT.isOSFuchsia() ||
+ TT.isOSWindows();
+}
+
+// Allows partial match, ex. "v8a" matches "armv8a".
+AArch64::ArchKind AArch64::parseArch(StringRef Arch) {
+ Arch = ARM::getCanonicalArchName(Arch);
+ if (checkArchVersion(Arch) < 8)
+ return ArchKind::INVALID;
+
+ StringRef Syn = ARM::getArchSynonym(Arch);
+ for (const auto A : AArch64ARCHNames) {
+ if (A.getName().endswith(Syn))
+ return A.ID;
+ }
+ return ArchKind::INVALID;
+}
+
+AArch64::ArchExtKind AArch64::parseArchExt(StringRef ArchExt) {
+ for (const auto A : AArch64ARCHExtNames) {
+ if (ArchExt == A.getName())
+ return static_cast<ArchExtKind>(A.ID);
+ }
+ return AArch64::AEK_INVALID;
+}
+
+AArch64::ArchKind AArch64::parseCPUArch(StringRef CPU) {
+ for (const auto C : AArch64CPUNames) {
+ if (CPU == C.getName())
+ return C.ArchID;
+ }
+ return ArchKind::INVALID;
+}
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
index 1fae0e9b8d6d..a5f4f98c489a 100644
--- a/contrib/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -16,8 +16,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/bit.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -78,7 +80,7 @@ void APInt::initSlowCase(uint64_t val, bool isSigned) {
U.pVal[0] = val;
if (isSigned && int64_t(val) < 0)
for (unsigned i = 1; i < getNumWords(); ++i)
- U.pVal[i] = WORD_MAX;
+ U.pVal[i] = WORDTYPE_MAX;
clearUnusedBits();
}
@@ -304,13 +306,13 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
unsigned hiWord = whichWord(hiBit);
// Create an initial mask for the low word with zeros below loBit.
- uint64_t loMask = WORD_MAX << whichBit(loBit);
+ uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
// If hiBit is not aligned, we need a high mask.
unsigned hiShiftAmt = whichBit(hiBit);
if (hiShiftAmt != 0) {
// Create a high mask with zeros above hiBit.
- uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
+ uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
// If loWord and hiWord are equal, then we combine the masks. Otherwise,
// set the bits in hiWord.
if (hiWord == loWord)
@@ -323,7 +325,7 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
// Fill any words between loWord and hiWord with all ones.
for (unsigned word = loWord + 1; word < hiWord; ++word)
- U.pVal[word] = WORD_MAX;
+ U.pVal[word] = WORDTYPE_MAX;
}
/// Toggle every bit to its opposite value.
@@ -354,7 +356,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
// Single word result can be done as a direct bitmask.
if (isSingleWord()) {
- uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
U.VAL &= ~(mask << bitPosition);
U.VAL |= (subBits.U.VAL << bitPosition);
return;
@@ -366,7 +368,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
// Insertion within a single word can be done as a direct bitmask.
if (loWord == hi1Word) {
- uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
U.pVal[loWord] &= ~(mask << loBit);
U.pVal[loWord] |= (subBits.U.VAL << loBit);
return;
@@ -382,7 +384,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
// Mask+insert remaining bits.
unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
if (remainingBits != 0) {
- uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
U.pVal[hi1Word] &= ~mask;
U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
}
@@ -558,7 +560,7 @@ unsigned APInt::countLeadingOnesSlowCase() const {
unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
if (Count == highWordBits) {
for (i--; i >= 0; --i) {
- if (U.pVal[i] == WORD_MAX)
+ if (U.pVal[i] == WORDTYPE_MAX)
Count += APINT_BITS_PER_WORD;
else {
Count += llvm::countLeadingOnes(U.pVal[i]);
@@ -582,7 +584,7 @@ unsigned APInt::countTrailingZerosSlowCase() const {
unsigned APInt::countTrailingOnesSlowCase() const {
unsigned Count = 0;
unsigned i = 0;
- for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i)
+ for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
Count += APINT_BITS_PER_WORD;
if (i < getNumWords())
Count += llvm::countTrailingOnes(U.pVal[i]);
@@ -711,24 +713,20 @@ APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
}
APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
- union {
- double D;
- uint64_t I;
- } T;
- T.D = Double;
+ uint64_t I = bit_cast<uint64_t>(Double);
// Get the sign bit from the highest order bit
- bool isNeg = T.I >> 63;
+ bool isNeg = I >> 63;
// Get the 11-bit exponent and adjust for the 1023 bit bias
- int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
+ int64_t exp = ((I >> 52) & 0x7ff) - 1023;
// If the exponent is negative, the value is < 0 so just return 0.
if (exp < 0)
return APInt(width, 0u);
// Extract the mantissa by clearing the top 12 bits (sign + exponent).
- uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
+ uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
// If the exponent doesn't shift all bits out of the mantissa
if (exp < 52)
@@ -805,12 +803,8 @@ double APInt::roundToDouble(bool isSigned) const {
// The leading bit of mantissa is implicit, so get rid of it.
uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
- union {
- double D;
- uint64_t I;
- } T;
- T.I = sign | (exp << 52) | mantissa;
- return T.D;
+ uint64_t I = sign | (exp << 52) | mantissa;
+ return bit_cast<double>(I);
}
// Truncate to new width.
@@ -1253,20 +1247,18 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
// The DEBUG macros here tend to be spam in the debug output if you're not
// debugging this code. Disable them unless KNUTH_DEBUG is defined.
-#pragma push_macro("LLVM_DEBUG")
-#ifndef KNUTH_DEBUG
-#undef LLVM_DEBUG
-#define LLVM_DEBUG(X) \
- do { \
- } while (false)
+#ifdef KNUTH_DEBUG
+#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
+#else
+#define DEBUG_KNUTH(X) do {} while(false)
#endif
- LLVM_DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
- LLVM_DEBUG(dbgs() << "KnuthDiv: original:");
- LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
- LLVM_DEBUG(dbgs() << " by");
- LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
- LLVM_DEBUG(dbgs() << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
+ DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+ DEBUG_KNUTH(dbgs() << " by");
+ DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
+ DEBUG_KNUTH(dbgs() << '\n');
// D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
// u and v by d. Note that we have taken Knuth's advice here to use a power
// of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
@@ -1292,16 +1284,16 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
}
u[m+n] = u_carry;
- LLVM_DEBUG(dbgs() << "KnuthDiv: normal:");
- LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
- LLVM_DEBUG(dbgs() << " by");
- LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
- LLVM_DEBUG(dbgs() << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
+ DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+ DEBUG_KNUTH(dbgs() << " by");
+ DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
+ DEBUG_KNUTH(dbgs() << '\n');
// D2. [Initialize j.] Set j to m. This is the loop counter over the places.
int j = m;
do {
- LLVM_DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
// D3. [Calculate q'.].
// Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
// Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
@@ -1311,7 +1303,7 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
// value qp is one too large, and it eliminates all cases where qp is two
// too large.
uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
- LLVM_DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
uint64_t qp = dividend / v[n-1];
uint64_t rp = dividend % v[n-1];
if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
@@ -1320,7 +1312,7 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
qp--;
}
- LLVM_DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
// D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
// (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
@@ -1336,15 +1328,15 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
u[j+i] = Lo_32(subres);
borrow = Hi_32(p) - Hi_32(subres);
- LLVM_DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
<< ", borrow = " << borrow << '\n');
}
bool isNeg = u[j+n] < borrow;
u[j+n] -= Lo_32(borrow);
- LLVM_DEBUG(dbgs() << "KnuthDiv: after subtraction:");
- LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
- LLVM_DEBUG(dbgs() << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
+ DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+ DEBUG_KNUTH(dbgs() << '\n');
// D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
// negative, go to step D6; otherwise go on to step D7.
@@ -1365,16 +1357,16 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
}
u[j+n] += carry;
}
- LLVM_DEBUG(dbgs() << "KnuthDiv: after correction:");
- LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
- LLVM_DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
+ DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+ DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
// D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
} while (--j >= 0);
- LLVM_DEBUG(dbgs() << "KnuthDiv: quotient:");
- LLVM_DEBUG(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
- LLVM_DEBUG(dbgs() << '\n');
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
+ DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
+ DEBUG_KNUTH(dbgs() << '\n');
// D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
// remainder may be obtained by dividing u[...] by d. If r is non-null we
@@ -1385,23 +1377,21 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
// shift right here.
if (shift) {
uint32_t carry = 0;
- LLVM_DEBUG(dbgs() << "KnuthDiv: remainder:");
+ DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
for (int i = n-1; i >= 0; i--) {
r[i] = (u[i] >> shift) | carry;
carry = u[i] << (32 - shift);
- LLVM_DEBUG(dbgs() << " " << r[i]);
+ DEBUG_KNUTH(dbgs() << " " << r[i]);
}
} else {
for (int i = n-1; i >= 0; i--) {
r[i] = u[i];
- LLVM_DEBUG(dbgs() << " " << r[i]);
+ DEBUG_KNUTH(dbgs() << " " << r[i]);
}
}
- LLVM_DEBUG(dbgs() << '\n');
+ DEBUG_KNUTH(dbgs() << '\n');
}
- LLVM_DEBUG(dbgs() << '\n');
-
-#pragma pop_macro("LLVM_DEBUG")
+ DEBUG_KNUTH(dbgs() << '\n');
}
void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
@@ -1957,7 +1947,43 @@ APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
return *this << ShAmt;
}
+APInt APInt::sadd_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = sadd_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return isNegative() ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getSignedMaxValue(BitWidth);
+}
+
+APInt APInt::uadd_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = uadd_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return APInt::getMaxValue(BitWidth);
+}
+
+APInt APInt::ssub_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = ssub_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+ return isNegative() ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getSignedMaxValue(BitWidth);
+}
+
+APInt APInt::usub_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = usub_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return APInt(BitWidth, 0);
+}
void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
@@ -2707,3 +2733,193 @@ APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
}
llvm_unreachable("Unknown APInt::Rounding enum");
}
+
+Optional<APInt>
+llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
+ unsigned RangeWidth) {
+ unsigned CoeffWidth = A.getBitWidth();
+ assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
+ assert(RangeWidth <= CoeffWidth &&
+ "Value range width should be less than coefficient width");
+ assert(RangeWidth > 1 && "Value range bit width should be > 1");
+
+ LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
+ << "x + " << C << ", rw:" << RangeWidth << '\n');
+
+ // Identify 0 as a (non)solution immediately.
+ if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
+ LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
+ return APInt(CoeffWidth, 0);
+ }
+
+ // The result of APInt arithmetic has the same bit width as the operands,
+ // so it can actually lose high bits. A product of two n-bit integers needs
+ // 2n-1 bits to represent the full value.
+ // The operation done below (on quadratic coefficients) that can produce
+ // the largest value is the evaluation of the equation during bisection,
+ // which needs 3 times the bitwidth of the coefficient, so the total number
+ // of required bits is 3n.
+ //
+ // The purpose of this extension is to simulate the set Z of all integers,
+ // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
+ // and negative numbers (not so much in a modulo arithmetic). The method
+ // used to solve the equation is based on the standard formula for real
+ // numbers, and uses the concepts of "positive" and "negative" with their
+ // usual meanings.
+ CoeffWidth *= 3;
+ A = A.sext(CoeffWidth);
+ B = B.sext(CoeffWidth);
+ C = C.sext(CoeffWidth);
+
+ // Make A > 0 for simplicity. Negate cannot overflow at this point because
+ // the bit width has increased.
+ if (A.isNegative()) {
+ A.negate();
+ B.negate();
+ C.negate();
+ }
+
+ // Solving an equation q(x) = 0 with coefficients in modular arithmetic
+ // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
+ // and R = 2^BitWidth.
+ // Since we're trying not only to find exact solutions, but also values
+ // that "wrap around", such a set will always have a solution, i.e. an x
+ // that satisfies at least one of the equations, or such that |q(x)|
+ // exceeds kR, while |q(x-1)| for the same k does not.
+ //
+ // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
+ // positive solution n (in the above sense), and also such that the n
+ // will be the least among all solutions corresponding to k = 0, 1, ...
+ // (more precisely, the least element in the set
+ // { n(k) | k is such that a solution n(k) exists }).
+ //
+ // Consider the parabola (over real numbers) that corresponds to the
+ // quadratic equation. Since A > 0, the arms of the parabola will point
+ // up. Picking different values of k will shift it up and down by R.
+ //
+ // We want to shift the parabola in such a way as to reduce the problem
+ // of solving q(x) = kR to solving shifted_q(x) = 0.
+ // (The interesting solutions are the ceilings of the real number
+ // solutions.)
+ APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
+ APInt TwoA = 2 * A;
+ APInt SqrB = B * B;
+ bool PickLow;
+
+ auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
+ assert(A.isStrictlyPositive());
+ APInt T = V.abs().urem(A);
+ if (T.isNullValue())
+ return V;
+ return V.isNegative() ? V+T : V+(A-T);
+ };
+
+ // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
+ // iff B is positive.
+ if (B.isNonNegative()) {
+ // If B >= 0, the vertex it at a negative location (or at 0), so in
+ // order to have a non-negative solution we need to pick k that makes
+ // C-kR negative. To satisfy all the requirements for the solution
+ // that we are looking for, it needs to be closest to 0 of all k.
+ C = C.srem(R);
+ if (C.isStrictlyPositive())
+ C -= R;
+ // Pick the greater solution.
+ PickLow = false;
+ } else {
+ // If B < 0, the vertex is at a positive location. For any solution
+ // to exist, the discriminant must be non-negative. This means that
+ // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
+ // lower bound on values of k: kR >= C - B^2/4A.
+ APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
+ // Round LowkR up (towards +inf) to the nearest kR.
+ LowkR = RoundUp(LowkR, R);
+
+ // If there exists k meeting the condition above, and such that
+ // C-kR > 0, there will be two positive real number solutions of
+ // q(x) = kR. Out of all such values of k, pick the one that makes
+ // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
+ // In other words, find maximum k such that LowkR <= kR < C.
+ if (C.sgt(LowkR)) {
+ // If LowkR < C, then such a k is guaranteed to exist because
+ // LowkR itself is a multiple of R.
+ C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
+ // Pick the smaller solution.
+ PickLow = true;
+ } else {
+ // If C-kR < 0 for all potential k's, it means that one solution
+ // will be negative, while the other will be positive. The positive
+ // solution will shift towards 0 if the parabola is moved up.
+ // Pick the kR closest to the lower bound (i.e. make C-kR closest
+ // to 0, or in other words, out of all parabolas that have solutions,
+ // pick the one that is the farthest "up").
+ // Since LowkR is itself a multiple of R, simply take C-LowkR.
+ C -= LowkR;
+ // Pick the greater solution.
+ PickLow = false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
+ << B << "x + " << C << ", rw:" << RangeWidth << '\n');
+
+ APInt D = SqrB - 4*A*C;
+ assert(D.isNonNegative() && "Negative discriminant");
+ APInt SQ = D.sqrt();
+
+ APInt Q = SQ * SQ;
+ bool InexactSQ = Q != D;
+ // The calculated SQ may actually be greater than the exact (non-integer)
+ // value. If that's the case, decremement SQ to get a value that is lower.
+ if (Q.sgt(D))
+ SQ -= 1;
+
+ APInt X;
+ APInt Rem;
+
+ // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
+ // When using the quadratic formula directly, the calculated low root
+ // may be greater than the exact one, since we would be subtracting SQ.
+ // To make sure that the calculated root is not greater than the exact
+ // one, subtract SQ+1 when calculating the low root (for inexact value
+ // of SQ).
+ if (PickLow)
+ APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
+ else
+ APInt::sdivrem(-B + SQ, TwoA, X, Rem);
+
+ // The updated coefficients should be such that the (exact) solution is
+ // positive. Since APInt division rounds towards 0, the calculated one
+ // can be 0, but cannot be negative.
+ assert(X.isNonNegative() && "Solution should be non-negative");
+
+ if (!InexactSQ && Rem.isNullValue()) {
+ LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
+ return X;
+ }
+
+ assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
+ // The exact value of the square root of D should be between SQ and SQ+1.
+ // This implies that the solution should be between that corresponding to
+ // SQ (i.e. X) and that corresponding to SQ+1.
+ //
+ // The calculated X cannot be greater than the exact (real) solution.
+ // Actually it must be strictly less than the exact solution, while
+ // X+1 will be greater than or equal to it.
+
+ APInt VX = (A*X + B)*X + C;
+ APInt VY = VX + TwoA*X + A + B;
+ bool SignChange = VX.isNegative() != VY.isNegative() ||
+ VX.isNullValue() != VY.isNullValue();
+ // If the sign did not change between X and X+1, X is not a valid solution.
+ // This could happen when the actual (exact) roots don't have an integer
+ // between them, so they would both be contained between X and X+1.
+ if (!SignChange) {
+ LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
+ return None;
+ }
+
+ X += 1;
+ LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
+ return X;
+}
diff --git a/contrib/llvm/lib/Support/ARMTargetParser.cpp b/contrib/llvm/lib/Support/ARMTargetParser.cpp
new file mode 100644
index 000000000000..07294b0c09a3
--- /dev/null
+++ b/contrib/llvm/lib/Support/ARMTargetParser.cpp
@@ -0,0 +1,577 @@
+//===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise ARM hardware features
+// such as FPU/CPU/ARCH/extensions and specific support such as HWDIV.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ARMTargetParser.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <cctype>
+
+using namespace llvm;
+
+static StringRef getHWDivSynonym(StringRef HWDiv) {
+ return StringSwitch<StringRef>(HWDiv)
+ .Case("thumb,arm", "arm,thumb")
+ .Default(HWDiv);
+}
+
+// Allows partial match, ex. "v7a" matches "armv7a".
+ARM::ArchKind ARM::parseArch(StringRef Arch) {
+ Arch = getCanonicalArchName(Arch);
+ StringRef Syn = getArchSynonym(Arch);
+ for (const auto A : ARCHNames) {
+ if (A.getName().endswith(Syn))
+ return A.ID;
+ }
+ return ArchKind::INVALID;
+}
+
+// Version number (ex. v7 = 7).
+unsigned ARM::parseArchVersion(StringRef Arch) {
+ Arch = getCanonicalArchName(Arch);
+ switch (parseArch(Arch)) {
+ case ArchKind::ARMV2:
+ case ArchKind::ARMV2A:
+ return 2;
+ case ArchKind::ARMV3:
+ case ArchKind::ARMV3M:
+ return 3;
+ case ArchKind::ARMV4:
+ case ArchKind::ARMV4T:
+ return 4;
+ case ArchKind::ARMV5T:
+ case ArchKind::ARMV5TE:
+ case ArchKind::IWMMXT:
+ case ArchKind::IWMMXT2:
+ case ArchKind::XSCALE:
+ case ArchKind::ARMV5TEJ:
+ return 5;
+ case ArchKind::ARMV6:
+ case ArchKind::ARMV6K:
+ case ArchKind::ARMV6T2:
+ case ArchKind::ARMV6KZ:
+ case ArchKind::ARMV6M:
+ return 6;
+ case ArchKind::ARMV7A:
+ case ArchKind::ARMV7VE:
+ case ArchKind::ARMV7R:
+ case ArchKind::ARMV7M:
+ case ArchKind::ARMV7S:
+ case ArchKind::ARMV7EM:
+ case ArchKind::ARMV7K:
+ return 7;
+ case ArchKind::ARMV8A:
+ case ArchKind::ARMV8_1A:
+ case ArchKind::ARMV8_2A:
+ case ArchKind::ARMV8_3A:
+ case ArchKind::ARMV8_4A:
+ case ArchKind::ARMV8_5A:
+ case ArchKind::ARMV8R:
+ case ArchKind::ARMV8MBaseline:
+ case ArchKind::ARMV8MMainline:
+ return 8;
+ case ArchKind::INVALID:
+ return 0;
+ }
+ llvm_unreachable("Unhandled architecture");
+}
+
+// Profile A/R/M
+ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
+ Arch = getCanonicalArchName(Arch);
+ switch (parseArch(Arch)) {
+ case ArchKind::ARMV6M:
+ case ArchKind::ARMV7M:
+ case ArchKind::ARMV7EM:
+ case ArchKind::ARMV8MMainline:
+ case ArchKind::ARMV8MBaseline:
+ return ProfileKind::M;
+ case ArchKind::ARMV7R:
+ case ArchKind::ARMV8R:
+ return ProfileKind::R;
+ case ArchKind::ARMV7A:
+ case ArchKind::ARMV7VE:
+ case ArchKind::ARMV7K:
+ case ArchKind::ARMV8A:
+ case ArchKind::ARMV8_1A:
+ case ArchKind::ARMV8_2A:
+ case ArchKind::ARMV8_3A:
+ case ArchKind::ARMV8_4A:
+ case ArchKind::ARMV8_5A:
+ return ProfileKind::A;
+ case ArchKind::ARMV2:
+ case ArchKind::ARMV2A:
+ case ArchKind::ARMV3:
+ case ArchKind::ARMV3M:
+ case ArchKind::ARMV4:
+ case ArchKind::ARMV4T:
+ case ArchKind::ARMV5T:
+ case ArchKind::ARMV5TE:
+ case ArchKind::ARMV5TEJ:
+ case ArchKind::ARMV6:
+ case ArchKind::ARMV6K:
+ case ArchKind::ARMV6T2:
+ case ArchKind::ARMV6KZ:
+ case ArchKind::ARMV7S:
+ case ArchKind::IWMMXT:
+ case ArchKind::IWMMXT2:
+ case ArchKind::XSCALE:
+ case ArchKind::INVALID:
+ return ProfileKind::INVALID;
+ }
+ llvm_unreachable("Unhandled architecture");
+}
+
+StringRef ARM::getArchSynonym(StringRef Arch) {
+ return StringSwitch<StringRef>(Arch)
+ .Case("v5", "v5t")
+ .Case("v5e", "v5te")
+ .Case("v6j", "v6")
+ .Case("v6hl", "v6k")
+ .Cases("v6m", "v6sm", "v6s-m", "v6-m")
+ .Cases("v6z", "v6zk", "v6kz")
+ .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
+ .Case("v7r", "v7-r")
+ .Case("v7m", "v7-m")
+ .Case("v7em", "v7e-m")
+ .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a")
+ .Case("v8.1a", "v8.1-a")
+ .Case("v8.2a", "v8.2-a")
+ .Case("v8.3a", "v8.3-a")
+ .Case("v8.4a", "v8.4-a")
+ .Case("v8.5a", "v8.5-a")
+ .Case("v8r", "v8-r")
+ .Case("v8m.base", "v8-m.base")
+ .Case("v8m.main", "v8-m.main")
+ .Default(Arch);
+}
+
+bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
+
+ if (FPUKind >= FK_LAST || FPUKind == FK_INVALID)
+ return false;
+
+ // fp-only-sp and d16 subtarget features are independent of each other, so we
+ // must enable/disable both.
+ switch (FPUNames[FPUKind].Restriction) {
+ case FPURestriction::SP_D16:
+ Features.push_back("+fp-only-sp");
+ Features.push_back("+d16");
+ break;
+ case FPURestriction::D16:
+ Features.push_back("-fp-only-sp");
+ Features.push_back("+d16");
+ break;
+ case FPURestriction::None:
+ Features.push_back("-fp-only-sp");
+ Features.push_back("-d16");
+ break;
+ }
+
+ // FPU version subtarget features are inclusive of lower-numbered ones, so
+ // enable the one corresponding to this version and disable all that are
+ // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
+ // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
+ switch (FPUNames[FPUKind].FPUVer) {
+ case FPUVersion::VFPV5:
+ Features.push_back("+fp-armv8");
+ break;
+ case FPUVersion::VFPV4:
+ Features.push_back("+vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ case FPUVersion::VFPV3_FP16:
+ Features.push_back("+vfp3");
+ Features.push_back("+fp16");
+ Features.push_back("-vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ case FPUVersion::VFPV3:
+ Features.push_back("+vfp3");
+ Features.push_back("-fp16");
+ Features.push_back("-vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ case FPUVersion::VFPV2:
+ Features.push_back("+vfp2");
+ Features.push_back("-vfp3");
+ Features.push_back("-fp16");
+ Features.push_back("-vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ case FPUVersion::NONE:
+ Features.push_back("-vfp2");
+ Features.push_back("-vfp3");
+ Features.push_back("-fp16");
+ Features.push_back("-vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ }
+
+ // crypto includes neon, so we handle this similarly to FPU version.
+ switch (FPUNames[FPUKind].NeonSupport) {
+ case NeonSupportLevel::Crypto:
+ Features.push_back("+neon");
+ Features.push_back("+crypto");
+ break;
+ case NeonSupportLevel::Neon:
+ Features.push_back("+neon");
+ Features.push_back("-crypto");
+ break;
+ case NeonSupportLevel::None:
+ Features.push_back("-neon");
+ Features.push_back("-crypto");
+ break;
+ }
+
+ return true;
+}
+
+// Little/Big endian
+ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
+ if (Arch.startswith("armeb") || Arch.startswith("thumbeb") ||
+ Arch.startswith("aarch64_be"))
+ return EndianKind::BIG;
+
+ if (Arch.startswith("arm") || Arch.startswith("thumb")) {
+ if (Arch.endswith("eb"))
+ return EndianKind::BIG;
+ else
+ return EndianKind::LITTLE;
+ }
+
+ if (Arch.startswith("aarch64"))
+ return EndianKind::LITTLE;
+
+ return EndianKind::INVALID;
+}
+
+// ARM, Thumb, AArch64
+ARM::ISAKind ARM::parseArchISA(StringRef Arch) {
+ return StringSwitch<ISAKind>(Arch)
+ .StartsWith("aarch64", ISAKind::AARCH64)
+ .StartsWith("arm64", ISAKind::AARCH64)
+ .StartsWith("thumb", ISAKind::THUMB)
+ .StartsWith("arm", ISAKind::ARM)
+ .Default(ISAKind::INVALID);
+}
+
+unsigned ARM::parseFPU(StringRef FPU) {
+ StringRef Syn = getFPUSynonym(FPU);
+ for (const auto F : FPUNames) {
+ if (Syn == F.getName())
+ return F.ID;
+ }
+ return FK_INVALID;
+}
+
+ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(unsigned FPUKind) {
+ if (FPUKind >= FK_LAST)
+ return NeonSupportLevel::None;
+ return FPUNames[FPUKind].NeonSupport;
+}
+
+// MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but
+// (iwmmxt|xscale)(eb)? is also permitted. If the former, return
+// "v.+", if the latter, return unmodified string, minus 'eb'.
+// If invalid, return empty string.
+StringRef ARM::getCanonicalArchName(StringRef Arch) {
+ size_t offset = StringRef::npos;
+ StringRef A = Arch;
+ StringRef Error = "";
+
+ // Begins with "arm" / "thumb", move past it.
+ if (A.startswith("arm64"))
+ offset = 5;
+ else if (A.startswith("arm"))
+ offset = 3;
+ else if (A.startswith("thumb"))
+ offset = 5;
+ else if (A.startswith("aarch64")) {
+ offset = 7;
+ // AArch64 uses "_be", not "eb" suffix.
+ if (A.find("eb") != StringRef::npos)
+ return Error;
+ if (A.substr(offset, 3) == "_be")
+ offset += 3;
+ }
+
+ // Ex. "armebv7", move past the "eb".
+ if (offset != StringRef::npos && A.substr(offset, 2) == "eb")
+ offset += 2;
+ // Or, if it ends with eb ("armv7eb"), chop it off.
+ else if (A.endswith("eb"))
+ A = A.substr(0, A.size() - 2);
+ // Trim the head
+ if (offset != StringRef::npos)
+ A = A.substr(offset);
+
+ // Empty string means offset reached the end, which means it's valid.
+ if (A.empty())
+ return Arch;
+
+ // Only match non-marketing names
+ if (offset != StringRef::npos) {
+ // Must start with 'vN'.
+ if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1])))
+ return Error;
+ // Can't have an extra 'eb'.
+ if (A.find("eb") != StringRef::npos)
+ return Error;
+ }
+
+ // Arch will either be a 'v' name (v7a) or a marketing name (xscale).
+ return A;
+}
+
+StringRef ARM::getFPUSynonym(StringRef FPU) {
+ return StringSwitch<StringRef>(FPU)
+ .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
+ .Case("vfp2", "vfpv2")
+ .Case("vfp3", "vfpv3")
+ .Case("vfp4", "vfpv4")
+ .Case("vfp3-d16", "vfpv3-d16")
+ .Case("vfp4-d16", "vfpv4-d16")
+ .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
+ .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
+ .Case("fp5-sp-d16", "fpv5-sp-d16")
+ .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
+ // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
+ .Case("neon-vfpv3", "neon")
+ .Default(FPU);
+}
+
+StringRef ARM::getFPUName(unsigned FPUKind) {
+ if (FPUKind >= FK_LAST)
+ return StringRef();
+ return FPUNames[FPUKind].getName();
+}
+
+ARM::FPUVersion ARM::getFPUVersion(unsigned FPUKind) {
+ if (FPUKind >= FK_LAST)
+ return FPUVersion::NONE;
+ return FPUNames[FPUKind].FPUVer;
+}
+
+ARM::FPURestriction ARM::getFPURestriction(unsigned FPUKind) {
+ if (FPUKind >= FK_LAST)
+ return FPURestriction::None;
+ return FPUNames[FPUKind].Restriction;
+}
+
+unsigned ARM::getDefaultFPU(StringRef CPU, ARM::ArchKind AK) {
+ if (CPU == "generic")
+ return ARM::ARCHNames[static_cast<unsigned>(AK)].DefaultFPU;
+
+ return StringSwitch<unsigned>(CPU)
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ .Case(NAME, DEFAULT_FPU)
+#include "llvm/Support/ARMTargetParser.def"
+ .Default(ARM::FK_INVALID);
+}
+
+unsigned ARM::getDefaultExtensions(StringRef CPU, ARM::ArchKind AK) {
+ if (CPU == "generic")
+ return ARM::ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
+
+ return StringSwitch<unsigned>(CPU)
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ .Case(NAME, \
+ ARCHNames[static_cast<unsigned>(ArchKind::ID)].ArchBaseExtensions | \
+ DEFAULT_EXT)
+#include "llvm/Support/ARMTargetParser.def"
+ .Default(ARM::AEK_INVALID);
+}
+
+bool ARM::getHWDivFeatures(unsigned HWDivKind,
+ std::vector<StringRef> &Features) {
+
+ if (HWDivKind == AEK_INVALID)
+ return false;
+
+ if (HWDivKind & AEK_HWDIVARM)
+ Features.push_back("+hwdiv-arm");
+ else
+ Features.push_back("-hwdiv-arm");
+
+ if (HWDivKind & AEK_HWDIVTHUMB)
+ Features.push_back("+hwdiv");
+ else
+ Features.push_back("-hwdiv");
+
+ return true;
+}
+
+bool ARM::getExtensionFeatures(unsigned Extensions,
+ std::vector<StringRef> &Features) {
+
+ if (Extensions == AEK_INVALID)
+ return false;
+
+ if (Extensions & AEK_CRC)
+ Features.push_back("+crc");
+ else
+ Features.push_back("-crc");
+
+ if (Extensions & AEK_DSP)
+ Features.push_back("+dsp");
+ else
+ Features.push_back("-dsp");
+
+ if (Extensions & AEK_FP16FML)
+ Features.push_back("+fp16fml");
+ else
+ Features.push_back("-fp16fml");
+
+ if (Extensions & AEK_RAS)
+ Features.push_back("+ras");
+ else
+ Features.push_back("-ras");
+
+ if (Extensions & AEK_DOTPROD)
+ Features.push_back("+dotprod");
+ else
+ Features.push_back("-dotprod");
+
+ return getHWDivFeatures(Extensions, Features);
+}
+
+StringRef ARM::getArchName(ARM::ArchKind AK) {
+ return ARCHNames[static_cast<unsigned>(AK)].getName();
+}
+
+StringRef ARM::getCPUAttr(ARM::ArchKind AK) {
+ return ARCHNames[static_cast<unsigned>(AK)].getCPUAttr();
+}
+
+StringRef ARM::getSubArch(ARM::ArchKind AK) {
+ return ARCHNames[static_cast<unsigned>(AK)].getSubArch();
+}
+
+unsigned ARM::getArchAttr(ARM::ArchKind AK) {
+ return ARCHNames[static_cast<unsigned>(AK)].ArchAttr;
+}
+
+StringRef ARM::getArchExtName(unsigned ArchExtKind) {
+ for (const auto AE : ARCHExtNames) {
+ if (ArchExtKind == AE.ID)
+ return AE.getName();
+ }
+ return StringRef();
+}
+
+StringRef ARM::getArchExtFeature(StringRef ArchExt) {
+ if (ArchExt.startswith("no")) {
+ StringRef ArchExtBase(ArchExt.substr(2));
+ for (const auto AE : ARCHExtNames) {
+ if (AE.NegFeature && ArchExtBase == AE.getName())
+ return StringRef(AE.NegFeature);
+ }
+ }
+ for (const auto AE : ARCHExtNames) {
+ if (AE.Feature && ArchExt == AE.getName())
+ return StringRef(AE.Feature);
+ }
+
+ return StringRef();
+}
+
+StringRef ARM::getHWDivName(unsigned HWDivKind) {
+ for (const auto D : HWDivNames) {
+ if (HWDivKind == D.ID)
+ return D.getName();
+ }
+ return StringRef();
+}
+
+StringRef ARM::getDefaultCPU(StringRef Arch) {
+ ArchKind AK = parseArch(Arch);
+ if (AK == ArchKind::INVALID)
+ return StringRef();
+
+ // Look for multiple AKs to find the default for pair AK+Name.
+ for (const auto CPU : CPUNames) {
+ if (CPU.ArchID == AK && CPU.Default)
+ return CPU.getName();
+ }
+
+ // If we can't find a default then target the architecture instead
+ return "generic";
+}
+
+unsigned ARM::parseHWDiv(StringRef HWDiv) {
+ StringRef Syn = getHWDivSynonym(HWDiv);
+ for (const auto D : HWDivNames) {
+ if (Syn == D.getName())
+ return D.ID;
+ }
+ return AEK_INVALID;
+}
+
+unsigned ARM::parseArchExt(StringRef ArchExt) {
+ for (const auto A : ARCHExtNames) {
+ if (ArchExt == A.getName())
+ return A.ID;
+ }
+ return AEK_INVALID;
+}
+
+ARM::ArchKind ARM::parseCPUArch(StringRef CPU) {
+ for (const auto C : CPUNames) {
+ if (CPU == C.getName())
+ return C.ArchID;
+ }
+ return ArchKind::INVALID;
+}
+
+void ARM::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
+ for (const CpuNames<ArchKind> &Arch : CPUNames) {
+ if (Arch.ArchID != ArchKind::INVALID)
+ Values.push_back(Arch.getName());
+ }
+}
+
+StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
+ StringRef ArchName =
+ CPU.empty() ? TT.getArchName() : getArchName(parseCPUArch(CPU));
+
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getEnvironment() == Triple::EABI ||
+ TT.getOS() == Triple::UnknownOS ||
+ parseArchProfile(ArchName) == ProfileKind::M)
+ return "aapcs";
+ if (TT.isWatchABI())
+ return "aapcs16";
+ return "apcs-gnu";
+ } else if (TT.isOSWindows())
+ // FIXME: this is invalid for WindowsCE.
+ return "aapcs";
+
+ // Select the default based on the platform.
+ switch (TT.getEnvironment()) {
+ case Triple::Android:
+ case Triple::GNUEABI:
+ case Triple::GNUEABIHF:
+ case Triple::MuslEABI:
+ case Triple::MuslEABIHF:
+ return "aapcs-linux";
+ case Triple::EABIHF:
+ case Triple::EABI:
+ return "aapcs";
+ default:
+ if (TT.isOSNetBSD())
+ return "apcs-gnu";
+ if (TT.isOSOpenBSD())
+ return "aapcs-linux";
+ return "aapcs";
+ }
+}
diff --git a/contrib/llvm/lib/Support/BinaryStreamError.cpp b/contrib/llvm/lib/Support/BinaryStreamError.cpp
index 60f5e21f041a..cdc811d78d63 100644
--- a/contrib/llvm/lib/Support/BinaryStreamError.cpp
+++ b/contrib/llvm/lib/Support/BinaryStreamError.cpp
@@ -47,7 +47,7 @@ BinaryStreamError::BinaryStreamError(stream_error_code C, StringRef Context)
}
}
-void BinaryStreamError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
+void BinaryStreamError::log(raw_ostream &OS) const { OS << ErrMsg; }
StringRef BinaryStreamError::getErrorMessage() const { return ErrMsg; }
diff --git a/contrib/llvm/lib/Support/BuryPointer.cpp b/contrib/llvm/lib/Support/BuryPointer.cpp
new file mode 100644
index 000000000000..6c988b4a0ab2
--- /dev/null
+++ b/contrib/llvm/lib/Support/BuryPointer.cpp
@@ -0,0 +1,31 @@
+//===- BuryPointer.cpp - Memory Manipulation/Leak ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BuryPointer.h"
+#include "llvm/Support/Compiler.h"
+#include <atomic>
+
+namespace llvm {
+
+void BuryPointer(const void *Ptr) {
+ // This function may be called only a small fixed amount of times per each
+ // invocation, otherwise we do actually have a leak which we want to report.
+ // If this function is called more than kGraveYardMaxSize times, the pointers
+ // will not be properly buried and a leak detector will report a leak, which
+ // is what we want in such case.
+ static const size_t kGraveYardMaxSize = 16;
+ LLVM_ATTRIBUTE_UNUSED static const void *GraveYard[kGraveYardMaxSize];
+ static std::atomic<unsigned> GraveYardSize;
+ unsigned Idx = GraveYardSize++;
+ if (Idx >= kGraveYardMaxSize)
+ return;
+ GraveYard[Idx] = Ptr;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/COM.cpp b/contrib/llvm/lib/Support/COM.cpp
index 2e3ff66843d3..97cd085853b0 100644
--- a/contrib/llvm/lib/Support/COM.cpp
+++ b/contrib/llvm/lib/Support/COM.cpp
@@ -18,6 +18,6 @@
// Include the platform-specific parts of this class.
#ifdef LLVM_ON_UNIX
#include "Unix/COM.inc"
-#elif _WIN32
+#elif defined(_WIN32)
#include "Windows/COM.inc"
#endif
diff --git a/contrib/llvm/lib/Support/CachePruning.cpp b/contrib/llvm/lib/Support/CachePruning.cpp
index 7326c4fc91fb..a0aa6024b3ed 100644
--- a/contrib/llvm/lib/Support/CachePruning.cpp
+++ b/contrib/llvm/lib/Support/CachePruning.cpp
@@ -27,6 +27,28 @@
using namespace llvm;
+namespace {
+struct FileInfo {
+ sys::TimePoint<> Time;
+ uint64_t Size;
+ std::string Path;
+
+ /// Used to determine which files to prune first. Also used to determine
+ /// set membership, so must take into account all fields.
+ bool operator<(const FileInfo &Other) const {
+ if (Time < Other.Time)
+ return true;
+ else if (Other.Time < Time)
+ return false;
+ if (Other.Size < Size)
+ return true;
+ else if (Size < Other.Size)
+ return false;
+ return Path < Other.Path;
+ }
+};
+} // anonymous namespace
+
/// Write a new timestamp file with the given path. This is used for the pruning
/// interval option.
static void writeTimestampFile(StringRef TimestampFile) {
@@ -185,8 +207,9 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
writeTimestampFile(TimestampFile);
}
- // Keep track of space. Needs to be kept ordered by size for determinism.
- std::set<std::pair<uint64_t, std::string>> FileSizes;
+ // Keep track of files to delete to get below the size limit.
+ // Order by time of last use so that recently used files are preserved.
+ std::set<FileInfo> FileInfos;
uint64_t TotalSize = 0;
// Walk the entire directory cache, looking for unused files.
@@ -224,22 +247,22 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
// Leave it here for now, but add it to the list of size-based pruning.
TotalSize += StatusOrErr->getSize();
- FileSizes.insert({StatusOrErr->getSize(), std::string(File->path())});
+ FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()});
}
- auto FileAndSize = FileSizes.rbegin();
- size_t NumFiles = FileSizes.size();
+ auto FileInfo = FileInfos.begin();
+ size_t NumFiles = FileInfos.size();
auto RemoveCacheFile = [&]() {
// Remove the file.
- sys::fs::remove(FileAndSize->second);
+ sys::fs::remove(FileInfo->Path);
// Update size
- TotalSize -= FileAndSize->first;
+ TotalSize -= FileInfo->Size;
NumFiles--;
- LLVM_DEBUG(dbgs() << " - Remove " << FileAndSize->second << " (size "
- << FileAndSize->first << "), new occupancy is "
- << TotalSize << "%\n");
- ++FileAndSize;
+ LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size "
+ << FileInfo->Size << "), new occupancy is " << TotalSize
+ << "%\n");
+ ++FileInfo;
};
// Prune for number of files.
@@ -270,7 +293,7 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
<< Policy.MaxSizeBytes << " bytes\n");
// Remove the oldest accessed files first, till we get below the threshold.
- while (TotalSize > TotalSizeTarget && FileAndSize != FileSizes.rend())
+ while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end())
RemoveCacheFile();
}
return true;
diff --git a/contrib/llvm/lib/Support/CodeGenCoverage.cpp b/contrib/llvm/lib/Support/CodeGenCoverage.cpp
index f0a53db4e32a..811020e3254a 100644
--- a/contrib/llvm/lib/Support/CodeGenCoverage.cpp
+++ b/contrib/llvm/lib/Support/CodeGenCoverage.cpp
@@ -22,7 +22,7 @@
#if LLVM_ON_UNIX
#include <unistd.h>
-#elif _WIN32
+#elif defined(_WIN32)
#include <windows.h>
#endif
@@ -93,7 +93,7 @@ bool CodeGenCoverage::emit(StringRef CoveragePrefix,
std::string Pid =
#if LLVM_ON_UNIX
llvm::to_string(::getpid());
-#elif _WIN32
+#elif defined(_WIN32)
llvm::to_string(::GetCurrentProcessId());
#else
"";
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index a1e659a01c8e..f7290b54dcf3 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -426,12 +426,17 @@ Option *CommandLineParser::LookupOption(SubCommand &Sub, StringRef &Arg,
return I != Sub.OptionsMap.end() ? I->second : nullptr;
}
- // If the argument before the = is a valid option name, we match. If not,
- // return Arg unmolested.
+ // If the argument before the = is a valid option name and the option allows
+ // non-prefix form (ie is not AlwaysPrefix), we match. If not, signal match
+ // failure by returning nullptr.
auto I = Sub.OptionsMap.find(Arg.substr(0, EqualPos));
if (I == Sub.OptionsMap.end())
return nullptr;
+ auto O = I->second;
+ if (O->getFormattingFlag() == cl::AlwaysPrefix)
+ return nullptr;
+
Value = Arg.substr(EqualPos + 1);
Arg = Arg.substr(0, EqualPos);
return I->second;
@@ -539,7 +544,9 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
switch (Handler->getValueExpectedFlag()) {
case ValueRequired:
if (!Value.data()) { // No value specified?
- if (i + 1 >= argc)
+ // If no other argument or the option only supports prefix form, we
+ // cannot look at the next argument.
+ if (i + 1 >= argc || Handler->getFormattingFlag() == cl::AlwaysPrefix)
return Handler->error("requires a value!");
// Steal the next argument, like for '-o filename'
assert(argv && "null check");
@@ -597,7 +604,8 @@ static inline bool isGrouping(const Option *O) {
return O->getFormattingFlag() == cl::Grouping;
}
static inline bool isPrefixedOrGrouping(const Option *O) {
- return isGrouping(O) || O->getFormattingFlag() == cl::Prefix;
+ return isGrouping(O) || O->getFormattingFlag() == cl::Prefix ||
+ O->getFormattingFlag() == cl::AlwaysPrefix;
}
// getOptionPred - Check to see if there are any options that satisfy the
@@ -647,7 +655,8 @@ HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
// If the option is a prefixed option, then the value is simply the
// rest of the name... so fall through to later processing, by
// setting up the argument name flags and value fields.
- if (PGOpt->getFormattingFlag() == cl::Prefix) {
+ if (PGOpt->getFormattingFlag() == cl::Prefix ||
+ PGOpt->getFormattingFlag() == cl::AlwaysPrefix) {
Value = Arg.substr(Length);
Arg = Arg.substr(0, Length);
assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
@@ -693,6 +702,10 @@ static bool isWhitespace(char C) {
return C == ' ' || C == '\t' || C == '\r' || C == '\n';
}
+static bool isWhitespaceOrNull(char C) {
+ return isWhitespace(C) || C == '\0';
+}
+
static bool isQuote(char C) { return C == '\"' || C == '\''; }
void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
@@ -808,7 +821,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
// INIT state indicates that the current input index is at the start of
// the string or between tokens.
if (State == INIT) {
- if (isWhitespace(C)) {
+ if (isWhitespaceOrNull(C)) {
// Mark the end of lines in response files
if (MarkEOLs && C == '\n')
NewArgv.push_back(nullptr);
@@ -832,7 +845,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
// quotes.
if (State == UNQUOTED) {
// Whitespace means the end of the token.
- if (isWhitespace(C)) {
+ if (isWhitespaceOrNull(C)) {
NewArgv.push_back(Saver.save(StringRef(Token)).data());
Token.clear();
State = INIT;
@@ -1057,8 +1070,27 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
}
bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
- StringRef Overview, raw_ostream *Errs) {
- return GlobalParser->ParseCommandLineOptions(argc, argv, Overview,
+ StringRef Overview, raw_ostream *Errs,
+ const char *EnvVar) {
+ SmallVector<const char *, 20> NewArgv;
+ BumpPtrAllocator A;
+ StringSaver Saver(A);
+ NewArgv.push_back(argv[0]);
+
+ // Parse options from environment variable.
+ if (EnvVar) {
+ if (llvm::Optional<std::string> EnvValue =
+ sys::Process::GetEnv(StringRef(EnvVar)))
+ TokenizeGNUCommandLine(*EnvValue, Saver, NewArgv);
+ }
+
+ // Append options from command line.
+ for (int I = 1; I < argc; ++I)
+ NewArgv.push_back(argv[I]);
+ int NewArgc = static_cast<int>(NewArgv.size());
+
+ // Parse all options.
+ return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview,
Errs);
}
diff --git a/contrib/llvm/lib/Support/Compression.cpp b/contrib/llvm/lib/Support/Compression.cpp
index c279d10f6c61..95261d4aad23 100644
--- a/contrib/llvm/lib/Support/Compression.cpp
+++ b/contrib/llvm/lib/Support/Compression.cpp
@@ -29,16 +29,6 @@ static Error createError(StringRef Err) {
return make_error<StringError>(Err, inconvertibleErrorCode());
}
-static int encodeZlibCompressionLevel(zlib::CompressionLevel Level) {
- switch (Level) {
- case zlib::NoCompression: return 0;
- case zlib::BestSpeedCompression: return 1;
- case zlib::DefaultCompression: return Z_DEFAULT_COMPRESSION;
- case zlib::BestSizeCompression: return 9;
- }
- llvm_unreachable("Invalid zlib::CompressionLevel!");
-}
-
static StringRef convertZlibCodeToString(int Code) {
switch (Code) {
case Z_MEM_ERROR:
@@ -58,18 +48,16 @@ static StringRef convertZlibCodeToString(int Code) {
bool zlib::isAvailable() { return true; }
Error zlib::compress(StringRef InputBuffer,
- SmallVectorImpl<char> &CompressedBuffer,
- CompressionLevel Level) {
+ SmallVectorImpl<char> &CompressedBuffer, int Level) {
unsigned long CompressedSize = ::compressBound(InputBuffer.size());
- CompressedBuffer.resize(CompressedSize);
- int CLevel = encodeZlibCompressionLevel(Level);
- int Res = ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
- (const Bytef *)InputBuffer.data(), InputBuffer.size(),
- CLevel);
+ CompressedBuffer.reserve(CompressedSize);
+ int Res =
+ ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
+ (const Bytef *)InputBuffer.data(), InputBuffer.size(), Level);
// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(CompressedBuffer.data(), CompressedSize);
- CompressedBuffer.resize(CompressedSize);
+ CompressedBuffer.set_size(CompressedSize);
return Res ? createError(convertZlibCodeToString(Res)) : Error::success();
}
@@ -101,8 +89,7 @@ uint32_t zlib::crc32(StringRef Buffer) {
#else
bool zlib::isAvailable() { return false; }
Error zlib::compress(StringRef InputBuffer,
- SmallVectorImpl<char> &CompressedBuffer,
- CompressionLevel Level) {
+ SmallVectorImpl<char> &CompressedBuffer, int Level) {
llvm_unreachable("zlib::compress is unavailable");
}
Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
@@ -118,4 +105,3 @@ uint32_t zlib::crc32(StringRef Buffer) {
llvm_unreachable("zlib::crc32 is unavailable");
}
#endif
-
diff --git a/contrib/llvm/lib/Support/DebugCounter.cpp b/contrib/llvm/lib/Support/DebugCounter.cpp
index 9c12de0776ad..6598103658da 100644
--- a/contrib/llvm/lib/Support/DebugCounter.cpp
+++ b/contrib/llvm/lib/Support/DebugCounter.cpp
@@ -49,8 +49,18 @@ static DebugCounterList DebugCounterOption(
cl::desc("Comma separated list of debug counter skip and count"),
cl::CommaSeparated, cl::ZeroOrMore, cl::location(DebugCounter::instance()));
+static cl::opt<bool> PrintDebugCounter(
+ "print-debug-counter", cl::Hidden, cl::init(false), cl::Optional,
+ cl::desc("Print out debug counter info after all counters accumulated"));
+
static ManagedStatic<DebugCounter> DC;
+// Print information when destroyed, iff command line option is specified.
+DebugCounter::~DebugCounter() {
+ if (isCountingEnabled() && PrintDebugCounter)
+ print(dbgs());
+}
+
DebugCounter &DebugCounter::instance() { return *DC; }
// This is called by the command line parser when it sees a value for the
@@ -83,8 +93,10 @@ void DebugCounter::push_back(const std::string &Val) {
return;
}
enableAllCounters();
- Counters[CounterID].Skip = CounterVal;
- Counters[CounterID].IsSet = true;
+
+ CounterInfo &Counter = Counters[CounterID];
+ Counter.Skip = CounterVal;
+ Counter.IsSet = true;
} else if (CounterPair.first.endswith("-count")) {
auto CounterName = CounterPair.first.drop_back(6);
unsigned CounterID = getCounterId(CounterName);
@@ -94,8 +106,10 @@ void DebugCounter::push_back(const std::string &Val) {
return;
}
enableAllCounters();
- Counters[CounterID].StopAfter = CounterVal;
- Counters[CounterID].IsSet = true;
+
+ CounterInfo &Counter = Counters[CounterID];
+ Counter.StopAfter = CounterVal;
+ Counter.IsSet = true;
} else {
errs() << "DebugCounter Error: " << CounterPair.first
<< " does not end with -skip or -count\n";
@@ -103,11 +117,18 @@ void DebugCounter::push_back(const std::string &Val) {
}
void DebugCounter::print(raw_ostream &OS) const {
+ SmallVector<StringRef, 16> CounterNames(RegisteredCounters.begin(),
+ RegisteredCounters.end());
+ sort(CounterNames.begin(), CounterNames.end());
+
+ auto &Us = instance();
OS << "Counters and values:\n";
- for (const auto &KV : Counters)
- OS << left_justify(RegisteredCounters[KV.first], 32) << ": {"
- << KV.second.Count << "," << KV.second.Skip << ","
- << KV.second.StopAfter << "}\n";
+ for (auto &CounterName : CounterNames) {
+ unsigned CounterID = getCounterId(CounterName);
+ OS << left_justify(RegisteredCounters[CounterID], 32) << ": {"
+ << Us.Counters[CounterID].Count << "," << Us.Counters[CounterID].Skip
+ << "," << Us.Counters[CounterID].StopAfter << "}\n";
+ }
}
LLVM_DUMP_METHOD void DebugCounter::dump() const {
diff --git a/contrib/llvm/lib/Support/Error.cpp b/contrib/llvm/lib/Support/Error.cpp
index 83345bf6edb9..30bfc3e6d2fb 100644
--- a/contrib/llvm/lib/Support/Error.cpp
+++ b/contrib/llvm/lib/Support/Error.cpp
@@ -19,6 +19,7 @@ namespace {
enum class ErrorErrorCode : int {
MultipleErrors = 1,
+ FileError,
InconvertibleError
};
@@ -37,6 +38,8 @@ namespace {
return "Inconvertible error value. An error has occurred that could "
"not be converted to a known std::error_code. Please file a "
"bug.";
+ case ErrorErrorCode::FileError:
+ return "A file error occurred.";
}
llvm_unreachable("Unhandled error code");
}
@@ -51,8 +54,10 @@ namespace llvm {
void ErrorInfoBase::anchor() {}
char ErrorInfoBase::ID = 0;
char ErrorList::ID = 0;
+void ECError::anchor() {}
char ECError::ID = 0;
char StringError::ID = 0;
+char FileError::ID = 0;
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner) {
if (!E)
@@ -75,6 +80,11 @@ std::error_code inconvertibleErrorCode() {
*ErrorErrorCat);
}
+std::error_code FileError::convertToErrorCode() const {
+ return std::error_code(static_cast<int>(ErrorErrorCode::FileError),
+ *ErrorErrorCat);
+}
+
Error errorCodeToError(std::error_code EC) {
if (!EC)
return Error::success();
@@ -103,10 +113,21 @@ void Error::fatalUncheckedError() const {
}
#endif
-StringError::StringError(const Twine &S, std::error_code EC)
+StringError::StringError(std::error_code EC, const Twine &S)
: Msg(S.str()), EC(EC) {}
-void StringError::log(raw_ostream &OS) const { OS << Msg; }
+StringError::StringError(const Twine &S, std::error_code EC)
+ : Msg(S.str()), EC(EC), PrintMsgOnly(true) {}
+
+void StringError::log(raw_ostream &OS) const {
+ if (PrintMsgOnly) {
+ OS << Msg;
+ } else {
+ OS << EC.message();
+ if (!Msg.empty())
+ OS << (" " + Msg);
+ }
+}
std::error_code StringError::convertToErrorCode() const {
return EC;
@@ -121,11 +142,31 @@ void report_fatal_error(Error Err, bool GenCrashDiag) {
std::string ErrMsg;
{
raw_string_ostream ErrStream(ErrMsg);
- logAllUnhandledErrors(std::move(Err), ErrStream, "");
+ logAllUnhandledErrors(std::move(Err), ErrStream);
}
report_fatal_error(ErrMsg);
}
+} // end namespace llvm
+
+LLVMErrorTypeId LLVMGetErrorTypeId(LLVMErrorRef Err) {
+ return reinterpret_cast<ErrorInfoBase *>(Err)->dynamicClassID();
+}
+
+void LLVMConsumeError(LLVMErrorRef Err) { consumeError(unwrap(Err)); }
+
+char *LLVMGetErrorMessage(LLVMErrorRef Err) {
+ std::string Tmp = toString(unwrap(Err));
+ char *ErrMsg = new char[Tmp.size() + 1];
+ memcpy(ErrMsg, Tmp.data(), Tmp.size());
+ ErrMsg[Tmp.size()] = '\0';
+ return ErrMsg;
+}
+
+void LLVMDisposeErrorMessage(char *ErrMsg) { delete[] ErrMsg; }
+
+LLVMErrorTypeId LLVMGetStringErrorTypeId() {
+ return reinterpret_cast<void *>(&StringError::ID);
}
#ifndef _MSC_VER
diff --git a/contrib/llvm/lib/Support/FileCheck.cpp b/contrib/llvm/lib/Support/FileCheck.cpp
new file mode 100644
index 000000000000..37986c96c081
--- /dev/null
+++ b/contrib/llvm/lib/Support/FileCheck.cpp
@@ -0,0 +1,1446 @@
+//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FileCheck does a line-by line check of a file that validates whether it
+// contains the expected content. This is useful for regression tests etc.
+//
+// This file implements most of the API that will be used by the FileCheck utility
+// as well as various unittests.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileCheck.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <cstdint>
+#include <list>
+#include <map>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+/// Parses the given string into the Pattern.
+///
+/// \p Prefix provides which prefix is being matched, \p SM provides the
+/// SourceMgr used for error reports, and \p LineNumber is the line number in
+/// the input file from which the pattern string was read. Returns true in
+/// case of an error, false otherwise.
+bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
+ SourceMgr &SM, unsigned LineNumber,
+ const FileCheckRequest &Req) {
+ bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
+
+ this->LineNumber = LineNumber;
+ PatternLoc = SMLoc::getFromPointer(PatternStr.data());
+
+ if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
+ // Ignore trailing whitespace.
+ while (!PatternStr.empty() &&
+ (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
+ PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
+
+ // Check that there is something on the line.
+ if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
+ SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
+ "found empty check string with prefix '" + Prefix + ":'");
+ return true;
+ }
+
+ if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
+ SM.PrintMessage(
+ PatternLoc, SourceMgr::DK_Error,
+ "found non-empty check string for empty check with prefix '" + Prefix +
+ ":'");
+ return true;
+ }
+
+ if (CheckTy == Check::CheckEmpty) {
+ RegExStr = "(\n$)";
+ return false;
+ }
+
+ // Check to see if this is a fixed string, or if it has regex pieces.
+ if (!MatchFullLinesHere &&
+ (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
+ PatternStr.find("[[") == StringRef::npos))) {
+ FixedStr = PatternStr;
+ return false;
+ }
+
+ if (MatchFullLinesHere) {
+ RegExStr += '^';
+ if (!Req.NoCanonicalizeWhiteSpace)
+ RegExStr += " *";
+ }
+
+ // Paren value #0 is for the fully matched string. Any new parenthesized
+ // values add from there.
+ unsigned CurParen = 1;
+
+ // Otherwise, there is at least one regex piece. Build up the regex pattern
+ // by escaping scary characters in fixed strings, building up one big regex.
+ while (!PatternStr.empty()) {
+ // RegEx matches.
+ if (PatternStr.startswith("{{")) {
+ // This is the start of a regex match. Scan for the }}.
+ size_t End = PatternStr.find("}}");
+ if (End == StringRef::npos) {
+ SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+ SourceMgr::DK_Error,
+ "found start of regex string with no end '}}'");
+ return true;
+ }
+
+ // Enclose {{}} patterns in parens just like [[]] even though we're not
+ // capturing the result for any purpose. This is required in case the
+ // expression contains an alternation like: CHECK: abc{{x|z}}def. We
+ // want this to turn into: "abc(x|z)def" not "abcx|zdef".
+ RegExStr += '(';
+ ++CurParen;
+
+ if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
+ return true;
+ RegExStr += ')';
+
+ PatternStr = PatternStr.substr(End + 2);
+ continue;
+ }
+
+ // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
+ // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
+ // second form is [[foo]] which is a reference to foo. The variable name
+ // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
+ // it. This is to catch some common errors.
+ if (PatternStr.startswith("[[")) {
+ // Find the closing bracket pair ending the match. End is going to be an
+ // offset relative to the beginning of the match string.
+ size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
+
+ if (End == StringRef::npos) {
+ SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+ SourceMgr::DK_Error,
+ "invalid named regex reference, no ]] found");
+ return true;
+ }
+
+ StringRef MatchStr = PatternStr.substr(2, End);
+ PatternStr = PatternStr.substr(End + 4);
+
+ // Get the regex name (e.g. "foo").
+ size_t NameEnd = MatchStr.find(':');
+ StringRef Name = MatchStr.substr(0, NameEnd);
+
+ if (Name.empty()) {
+ SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+ "invalid name in named regex: empty name");
+ return true;
+ }
+
+ // Verify that the name/expression is well formed. FileCheck currently
+ // supports @LINE, @LINE+number, @LINE-number expressions. The check here
+ // is relaxed, more strict check is performed in \c EvaluateExpression.
+ bool IsExpression = false;
+ for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ if (i == 0) {
+ if (Name[i] == '$') // Global vars start with '$'
+ continue;
+ if (Name[i] == '@') {
+ if (NameEnd != StringRef::npos) {
+ SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+ SourceMgr::DK_Error,
+ "invalid name in named regex definition");
+ return true;
+ }
+ IsExpression = true;
+ continue;
+ }
+ }
+ if (Name[i] != '_' && !isalnum(Name[i]) &&
+ (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
+ SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
+ SourceMgr::DK_Error, "invalid name in named regex");
+ return true;
+ }
+ }
+
+ // Name can't start with a digit.
+ if (isdigit(static_cast<unsigned char>(Name[0]))) {
+ SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+ "invalid name in named regex");
+ return true;
+ }
+
+ // Handle [[foo]].
+ if (NameEnd == StringRef::npos) {
+ // Handle variables that were defined earlier on the same line by
+ // emitting a backreference.
+ if (VariableDefs.find(Name) != VariableDefs.end()) {
+ unsigned VarParenNum = VariableDefs[Name];
+ if (VarParenNum < 1 || VarParenNum > 9) {
+ SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+ SourceMgr::DK_Error,
+ "Can't back-reference more than 9 variables");
+ return true;
+ }
+ AddBackrefToRegEx(VarParenNum);
+ } else {
+ VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+ }
+ continue;
+ }
+
+ // Handle [[foo:.*]].
+ VariableDefs[Name] = CurParen;
+ RegExStr += '(';
+ ++CurParen;
+
+ if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
+ return true;
+
+ RegExStr += ')';
+ }
+
+ // Handle fixed string matches.
+ // Find the end, which is the start of the next regex.
+ size_t FixedMatchEnd = PatternStr.find("{{");
+ FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
+ RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
+ PatternStr = PatternStr.substr(FixedMatchEnd);
+ }
+
+ if (MatchFullLinesHere) {
+ if (!Req.NoCanonicalizeWhiteSpace)
+ RegExStr += " *";
+ RegExStr += '$';
+ }
+
+ return false;
+}
+
+bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
+ Regex R(RS);
+ std::string Error;
+ if (!R.isValid(Error)) {
+ SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
+ "invalid regex: " + Error);
+ return true;
+ }
+
+ RegExStr += RS.str();
+ CurParen += R.getNumMatches();
+ return false;
+}
+
+void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) {
+ assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
+ std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
+ RegExStr += Backref;
+}
+
+/// Evaluates expression and stores the result to \p Value.
+///
+/// Returns true on success and false when the expression has invalid syntax.
+bool FileCheckPattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
+ // The only supported expression is @LINE([\+-]\d+)?
+ if (!Expr.startswith("@LINE"))
+ return false;
+ Expr = Expr.substr(StringRef("@LINE").size());
+ int Offset = 0;
+ if (!Expr.empty()) {
+ if (Expr[0] == '+')
+ Expr = Expr.substr(1);
+ else if (Expr[0] != '-')
+ return false;
+ if (Expr.getAsInteger(10, Offset))
+ return false;
+ }
+ Value = llvm::itostr(LineNumber + Offset);
+ return true;
+}
+
+/// Matches the pattern string against the input buffer \p Buffer
+///
+/// This returns the position that is matched or npos if there is no match. If
+/// there is a match, the size of the matched string is returned in \p
+/// MatchLen.
+///
+/// The \p VariableTable StringMap provides the current values of filecheck
+/// variables and is updated if this match defines new values.
+size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
+ StringMap<StringRef> &VariableTable) const {
+ // If this is the EOF pattern, match it immediately.
+ if (CheckTy == Check::CheckEOF) {
+ MatchLen = 0;
+ return Buffer.size();
+ }
+
+ // If this is a fixed string pattern, just match it now.
+ if (!FixedStr.empty()) {
+ MatchLen = FixedStr.size();
+ return Buffer.find(FixedStr);
+ }
+
+ // Regex match.
+
+ // If there are variable uses, we need to create a temporary string with the
+ // actual value.
+ StringRef RegExToMatch = RegExStr;
+ std::string TmpStr;
+ if (!VariableUses.empty()) {
+ TmpStr = RegExStr;
+
+ unsigned InsertOffset = 0;
+ for (const auto &VariableUse : VariableUses) {
+ std::string Value;
+
+ if (VariableUse.first[0] == '@') {
+ if (!EvaluateExpression(VariableUse.first, Value))
+ return StringRef::npos;
+ } else {
+ StringMap<StringRef>::iterator it =
+ VariableTable.find(VariableUse.first);
+ // If the variable is undefined, return an error.
+ if (it == VariableTable.end())
+ return StringRef::npos;
+
+ // Look up the value and escape it so that we can put it into the regex.
+ Value += Regex::escape(it->second);
+ }
+
+ // Plop it into the regex at the adjusted offset.
+ TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
+ Value.begin(), Value.end());
+ InsertOffset += Value.size();
+ }
+
+ // Match the newly constructed regex.
+ RegExToMatch = TmpStr;
+ }
+
+ SmallVector<StringRef, 4> MatchInfo;
+ if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
+ return StringRef::npos;
+
+ // Successful regex match.
+ assert(!MatchInfo.empty() && "Didn't get any match");
+ StringRef FullMatch = MatchInfo[0];
+
+ // If this defines any variables, remember their values.
+ for (const auto &VariableDef : VariableDefs) {
+ assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
+ VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
+ }
+
+ // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
+ // the required preceding newline, which is consumed by the pattern in the
+ // case of CHECK-EMPTY but not CHECK-NEXT.
+ size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
+ MatchLen = FullMatch.size() - MatchStartSkip;
+ return FullMatch.data() - Buffer.data() + MatchStartSkip;
+}
+
+
+/// Computes an arbitrary estimate for the quality of matching this pattern at
+/// the start of \p Buffer; a distance of zero should correspond to a perfect
+/// match.
+unsigned
+FileCheckPattern::ComputeMatchDistance(StringRef Buffer,
+ const StringMap<StringRef> &VariableTable) const {
+ // Just compute the number of matching characters. For regular expressions, we
+ // just compare against the regex itself and hope for the best.
+ //
+ // FIXME: One easy improvement here is have the regex lib generate a single
+ // example regular expression which matches, and use that as the example
+ // string.
+ StringRef ExampleString(FixedStr);
+ if (ExampleString.empty())
+ ExampleString = RegExStr;
+
+ // Only compare up to the first line in the buffer, or the string size.
+ StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
+ BufferPrefix = BufferPrefix.split('\n').first;
+ return BufferPrefix.edit_distance(ExampleString);
+}
+
+void FileCheckPattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
+ const StringMap<StringRef> &VariableTable,
+ SMRange MatchRange) const {
+ // If this was a regular expression using variables, print the current
+ // variable values.
+ if (!VariableUses.empty()) {
+ for (const auto &VariableUse : VariableUses) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ StringRef Var = VariableUse.first;
+ if (Var[0] == '@') {
+ std::string Value;
+ if (EvaluateExpression(Var, Value)) {
+ OS << "with expression \"";
+ OS.write_escaped(Var) << "\" equal to \"";
+ OS.write_escaped(Value) << "\"";
+ } else {
+ OS << "uses incorrect expression \"";
+ OS.write_escaped(Var) << "\"";
+ }
+ } else {
+ StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
+
+ // Check for undefined variable references.
+ if (it == VariableTable.end()) {
+ OS << "uses undefined variable \"";
+ OS.write_escaped(Var) << "\"";
+ } else {
+ OS << "with variable \"";
+ OS.write_escaped(Var) << "\" equal to \"";
+ OS.write_escaped(it->second) << "\"";
+ }
+ }
+
+ if (MatchRange.isValid())
+ SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
+ {MatchRange});
+ else
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+ SourceMgr::DK_Note, OS.str());
+ }
+ }
+}
+
+static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
+ const SourceMgr &SM, SMLoc Loc,
+ Check::FileCheckType CheckTy,
+ StringRef Buffer, size_t Pos, size_t Len,
+ std::vector<FileCheckDiag> *Diags,
+ bool AdjustPrevDiag = false) {
+ SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
+ SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
+ SMRange Range(Start, End);
+ if (Diags) {
+ if (AdjustPrevDiag)
+ Diags->rbegin()->MatchTy = MatchTy;
+ else
+ Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
+ }
+ return Range;
+}
+
+void FileCheckPattern::PrintFuzzyMatch(
+ const SourceMgr &SM, StringRef Buffer,
+ const StringMap<StringRef> &VariableTable,
+ std::vector<FileCheckDiag> *Diags) const {
+ // Attempt to find the closest/best fuzzy match. Usually an error happens
+ // because some string in the output didn't exactly match. In these cases, we
+ // would like to show the user a best guess at what "should have" matched, to
+ // save them having to actually check the input manually.
+ size_t NumLinesForward = 0;
+ size_t Best = StringRef::npos;
+ double BestQuality = 0;
+
+ // Use an arbitrary 4k limit on how far we will search.
+ for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
+ if (Buffer[i] == '\n')
+ ++NumLinesForward;
+
+ // Patterns have leading whitespace stripped, so skip whitespace when
+ // looking for something which looks like a pattern.
+ if (Buffer[i] == ' ' || Buffer[i] == '\t')
+ continue;
+
+ // Compute the "quality" of this match as an arbitrary combination of the
+ // match distance and the number of lines skipped to get to this match.
+ unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
+ double Quality = Distance + (NumLinesForward / 100.);
+
+ if (Quality < BestQuality || Best == StringRef::npos) {
+ Best = i;
+ BestQuality = Quality;
+ }
+ }
+
+ // Print the "possible intended match here" line if we found something
+ // reasonable and not equal to what we showed in the "scanning from here"
+ // line.
+ if (Best && Best != StringRef::npos && BestQuality < 50) {
+ SMRange MatchRange =
+ ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(),
+ getCheckTy(), Buffer, Best, 0, Diags);
+ SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note,
+ "possible intended match here");
+
+ // FIXME: If we wanted to be really friendly we would show why the match
+ // failed, as it can be hard to spot simple one character differences.
+ }
+}
+
+/// Finds the closing sequence of a regex variable usage or definition.
+///
+/// \p Str has to point in the beginning of the definition (right after the
+/// opening sequence). Returns the offset of the closing sequence within Str,
+/// or npos if it was not found.
+size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
+ // Offset keeps track of the current offset within the input Str
+ size_t Offset = 0;
+ // [...] Nesting depth
+ size_t BracketDepth = 0;
+
+ while (!Str.empty()) {
+ if (Str.startswith("]]") && BracketDepth == 0)
+ return Offset;
+ if (Str[0] == '\\') {
+ // Backslash escapes the next char within regexes, so skip them both.
+ Str = Str.substr(2);
+ Offset += 2;
+ } else {
+ switch (Str[0]) {
+ default:
+ break;
+ case '[':
+ BracketDepth++;
+ break;
+ case ']':
+ if (BracketDepth == 0) {
+ SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
+ SourceMgr::DK_Error,
+ "missing closing \"]\" for regex variable");
+ exit(1);
+ }
+ BracketDepth--;
+ break;
+ }
+ Str = Str.substr(1);
+ Offset++;
+ }
+ }
+
+ return StringRef::npos;
+}
+
+/// Canonicalize whitespaces in the file. Line endings are replaced with
+/// UNIX-style '\n'.
+StringRef
+llvm::FileCheck::CanonicalizeFile(MemoryBuffer &MB,
+ SmallVectorImpl<char> &OutputBuffer) {
+ OutputBuffer.reserve(MB.getBufferSize());
+
+ for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
+ Ptr != End; ++Ptr) {
+ // Eliminate trailing dosish \r.
+ if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
+ continue;
+ }
+
+ // If current char is not a horizontal whitespace or if horizontal
+ // whitespace canonicalization is disabled, dump it to output as is.
+ if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
+ OutputBuffer.push_back(*Ptr);
+ continue;
+ }
+
+ // Otherwise, add one space and advance over neighboring space.
+ OutputBuffer.push_back(' ');
+ while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
+ ++Ptr;
+ }
+
+ // Add a null byte and then return all but that byte.
+ OutputBuffer.push_back('\0');
+ return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
+}
+
+FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
+ const Check::FileCheckType &CheckTy,
+ SMLoc CheckLoc, MatchType MatchTy,
+ SMRange InputRange)
+ : CheckTy(CheckTy), MatchTy(MatchTy) {
+ auto Start = SM.getLineAndColumn(InputRange.Start);
+ auto End = SM.getLineAndColumn(InputRange.End);
+ InputStartLine = Start.first;
+ InputStartCol = Start.second;
+ InputEndLine = End.first;
+ InputEndCol = End.second;
+ Start = SM.getLineAndColumn(CheckLoc);
+ CheckLine = Start.first;
+ CheckCol = Start.second;
+}
+
+static bool IsPartOfWord(char c) {
+ return (isalnum(c) || c == '-' || c == '_');
+}
+
+Check::FileCheckType &Check::FileCheckType::setCount(int C) {
+ assert(Count > 0 && "zero and negative counts are not supported");
+ assert((C == 1 || Kind == CheckPlain) &&
+ "count supported only for plain CHECK directives");
+ Count = C;
+ return *this;
+}
+
+// Get a description of the type.
+std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
+ switch (Kind) {
+ case Check::CheckNone:
+ return "invalid";
+ case Check::CheckPlain:
+ if (Count > 1)
+ return Prefix.str() + "-COUNT";
+ return Prefix;
+ case Check::CheckNext:
+ return Prefix.str() + "-NEXT";
+ case Check::CheckSame:
+ return Prefix.str() + "-SAME";
+ case Check::CheckNot:
+ return Prefix.str() + "-NOT";
+ case Check::CheckDAG:
+ return Prefix.str() + "-DAG";
+ case Check::CheckLabel:
+ return Prefix.str() + "-LABEL";
+ case Check::CheckEmpty:
+ return Prefix.str() + "-EMPTY";
+ case Check::CheckEOF:
+ return "implicit EOF";
+ case Check::CheckBadNot:
+ return "bad NOT";
+ case Check::CheckBadCount:
+ return "bad COUNT";
+ }
+ llvm_unreachable("unknown FileCheckType");
+}
+
+static std::pair<Check::FileCheckType, StringRef>
+FindCheckType(StringRef Buffer, StringRef Prefix) {
+ if (Buffer.size() <= Prefix.size())
+ return {Check::CheckNone, StringRef()};
+
+ char NextChar = Buffer[Prefix.size()];
+
+ StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
+ // Verify that the : is present after the prefix.
+ if (NextChar == ':')
+ return {Check::CheckPlain, Rest};
+
+ if (NextChar != '-')
+ return {Check::CheckNone, StringRef()};
+
+ if (Rest.consume_front("COUNT-")) {
+ int64_t Count;
+ if (Rest.consumeInteger(10, Count))
+ // Error happened in parsing integer.
+ return {Check::CheckBadCount, Rest};
+ if (Count <= 0 || Count > INT32_MAX)
+ return {Check::CheckBadCount, Rest};
+ if (!Rest.consume_front(":"))
+ return {Check::CheckBadCount, Rest};
+ return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest};
+ }
+
+ if (Rest.consume_front("NEXT:"))
+ return {Check::CheckNext, Rest};
+
+ if (Rest.consume_front("SAME:"))
+ return {Check::CheckSame, Rest};
+
+ if (Rest.consume_front("NOT:"))
+ return {Check::CheckNot, Rest};
+
+ if (Rest.consume_front("DAG:"))
+ return {Check::CheckDAG, Rest};
+
+ if (Rest.consume_front("LABEL:"))
+ return {Check::CheckLabel, Rest};
+
+ if (Rest.consume_front("EMPTY:"))
+ return {Check::CheckEmpty, Rest};
+
+ // You can't combine -NOT with another suffix.
+ if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
+ Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
+ Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
+ Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
+ return {Check::CheckBadNot, Rest};
+
+ return {Check::CheckNone, Rest};
+}
+
+// From the given position, find the next character after the word.
+static size_t SkipWord(StringRef Str, size_t Loc) {
+ while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
+ ++Loc;
+ return Loc;
+}
+
+/// Search the buffer for the first prefix in the prefix regular expression.
+///
+/// This searches the buffer using the provided regular expression, however it
+/// enforces constraints beyond that:
+/// 1) The found prefix must not be a suffix of something that looks like
+/// a valid prefix.
+/// 2) The found prefix must be followed by a valid check type suffix using \c
+/// FindCheckType above.
+///
+/// Returns a pair of StringRefs into the Buffer, which combines:
+/// - the first match of the regular expression to satisfy these two is
+/// returned,
+/// otherwise an empty StringRef is returned to indicate failure.
+/// - buffer rewound to the location right after parsed suffix, for parsing
+/// to continue from
+///
+/// If this routine returns a valid prefix, it will also shrink \p Buffer to
+/// start at the beginning of the returned prefix, increment \p LineNumber for
+/// each new line consumed from \p Buffer, and set \p CheckTy to the type of
+/// check found by examining the suffix.
+///
+/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
+/// is unspecified.
+static std::pair<StringRef, StringRef>
+FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
+ unsigned &LineNumber, Check::FileCheckType &CheckTy) {
+ SmallVector<StringRef, 2> Matches;
+
+ while (!Buffer.empty()) {
+ // Find the first (longest) match using the RE.
+ if (!PrefixRE.match(Buffer, &Matches))
+ // No match at all, bail.
+ return {StringRef(), StringRef()};
+
+ StringRef Prefix = Matches[0];
+ Matches.clear();
+
+ assert(Prefix.data() >= Buffer.data() &&
+ Prefix.data() < Buffer.data() + Buffer.size() &&
+ "Prefix doesn't start inside of buffer!");
+ size_t Loc = Prefix.data() - Buffer.data();
+ StringRef Skipped = Buffer.substr(0, Loc);
+ Buffer = Buffer.drop_front(Loc);
+ LineNumber += Skipped.count('\n');
+
+ // Check that the matched prefix isn't a suffix of some other check-like
+ // word.
+ // FIXME: This is a very ad-hoc check. it would be better handled in some
+ // other way. Among other things it seems hard to distinguish between
+ // intentional and unintentional uses of this feature.
+ if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
+ // Now extract the type.
+ StringRef AfterSuffix;
+ std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix);
+
+ // If we've found a valid check type for this prefix, we're done.
+ if (CheckTy != Check::CheckNone)
+ return {Prefix, AfterSuffix};
+ }
+
+ // If we didn't successfully find a prefix, we need to skip this invalid
+ // prefix and continue scanning. We directly skip the prefix that was
+ // matched and any additional parts of that check-like word.
+ Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
+ }
+
+ // We ran out of buffer while skipping partial matches so give up.
+ return {StringRef(), StringRef()};
+}
+
+/// Read the check file, which specifies the sequence of expected strings.
+///
+/// The strings are added to the CheckStrings vector. Returns true in case of
+/// an error, false otherwise.
+bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
+ Regex &PrefixRE,
+ std::vector<FileCheckString> &CheckStrings) {
+ std::vector<FileCheckPattern> ImplicitNegativeChecks;
+ for (const auto &PatternString : Req.ImplicitCheckNot) {
+ // Create a buffer with fake command line content in order to display the
+ // command line option responsible for the specific implicit CHECK-NOT.
+ std::string Prefix = "-implicit-check-not='";
+ std::string Suffix = "'";
+ std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
+ Prefix + PatternString + Suffix, "command line");
+
+ StringRef PatternInBuffer =
+ CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
+ SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
+
+ ImplicitNegativeChecks.push_back(FileCheckPattern(Check::CheckNot));
+ ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
+ "IMPLICIT-CHECK", SM, 0, Req);
+ }
+
+ std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks;
+
+ // LineNumber keeps track of the line on which CheckPrefix instances are
+ // found.
+ unsigned LineNumber = 1;
+
+ while (1) {
+ Check::FileCheckType CheckTy;
+
+ // See if a prefix occurs in the memory buffer.
+ StringRef UsedPrefix;
+ StringRef AfterSuffix;
+ std::tie(UsedPrefix, AfterSuffix) =
+ FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy);
+ if (UsedPrefix.empty())
+ break;
+ assert(UsedPrefix.data() == Buffer.data() &&
+ "Failed to move Buffer's start forward, or pointed prefix outside "
+ "of the buffer!");
+ assert(AfterSuffix.data() >= Buffer.data() &&
+ AfterSuffix.data() < Buffer.data() + Buffer.size() &&
+ "Parsing after suffix doesn't start inside of buffer!");
+
+ // Location to use for error messages.
+ const char *UsedPrefixStart = UsedPrefix.data();
+
+ // Skip the buffer to the end of parsed suffix (or just prefix, if no good
+ // suffix was processed).
+ Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
+ : AfterSuffix;
+
+ // Complain about useful-looking but unsupported suffixes.
+ if (CheckTy == Check::CheckBadNot) {
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
+ "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
+ return true;
+ }
+
+ // Complain about invalid count specification.
+ if (CheckTy == Check::CheckBadCount) {
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
+ "invalid count in -COUNT specification on prefix '" +
+ UsedPrefix + "'");
+ return true;
+ }
+
+ // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
+ // leading whitespace.
+ if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
+ Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
+
+ // Scan ahead to the end of line.
+ size_t EOL = Buffer.find_first_of("\n\r");
+
+ // Remember the location of the start of the pattern, for diagnostics.
+ SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
+
+ // Parse the pattern.
+ FileCheckPattern P(CheckTy);
+ if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber, Req))
+ return true;
+
+ // Verify that CHECK-LABEL lines do not define or use variables
+ if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
+ SM.PrintMessage(
+ SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
+ "found '" + UsedPrefix + "-LABEL:'"
+ " with variable definition or use");
+ return true;
+ }
+
+ Buffer = Buffer.substr(EOL);
+
+ // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
+ if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
+ CheckTy == Check::CheckEmpty) &&
+ CheckStrings.empty()) {
+ StringRef Type = CheckTy == Check::CheckNext
+ ? "NEXT"
+ : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
+ SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
+ SourceMgr::DK_Error,
+ "found '" + UsedPrefix + "-" + Type +
+ "' without previous '" + UsedPrefix + ": line");
+ return true;
+ }
+
+ // Handle CHECK-DAG/-NOT.
+ if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
+ DagNotMatches.push_back(P);
+ continue;
+ }
+
+ // Okay, add the string we captured to the output vector and move on.
+ CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
+ std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
+ DagNotMatches = ImplicitNegativeChecks;
+ }
+
+ // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
+ // prefix as a filler for the error message.
+ if (!DagNotMatches.empty()) {
+ CheckStrings.emplace_back(FileCheckPattern(Check::CheckEOF), *Req.CheckPrefixes.begin(),
+ SMLoc::getFromPointer(Buffer.data()));
+ std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
+ }
+
+ if (CheckStrings.empty()) {
+ errs() << "error: no check strings found with prefix"
+ << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
+ auto I = Req.CheckPrefixes.begin();
+ auto E = Req.CheckPrefixes.end();
+ if (I != E) {
+ errs() << "\'" << *I << ":'";
+ ++I;
+ }
+ for (; I != E; ++I)
+ errs() << ", \'" << *I << ":'";
+
+ errs() << '\n';
+ return true;
+ }
+
+ return false;
+}
+
+static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
+ StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
+ int MatchedCount, StringRef Buffer,
+ StringMap<StringRef> &VariableTable, size_t MatchPos,
+ size_t MatchLen, const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) {
+ if (ExpectedMatch) {
+ if (!Req.Verbose)
+ return;
+ if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
+ return;
+ }
+ SMRange MatchRange = ProcessMatchResult(
+ ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
+ : FileCheckDiag::MatchFoundButExcluded,
+ SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
+ std::string Message = formatv("{0}: {1} string found in input",
+ Pat.getCheckTy().getDescription(Prefix),
+ (ExpectedMatch ? "expected" : "excluded"))
+ .str();
+ if (Pat.getCount() > 1)
+ Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
+
+ SM.PrintMessage(
+ Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
+ SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
+ {MatchRange});
+ Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
+}
+
+static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
+ const FileCheckString &CheckStr, int MatchedCount,
+ StringRef Buffer, StringMap<StringRef> &VariableTable,
+ size_t MatchPos, size_t MatchLen, FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) {
+ PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
+ MatchedCount, Buffer, VariableTable, MatchPos, MatchLen, Req,
+ Diags);
+}
+
+static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
+ StringRef Prefix, SMLoc Loc,
+ const FileCheckPattern &Pat, int MatchedCount,
+ StringRef Buffer, StringMap<StringRef> &VariableTable,
+ bool VerboseVerbose,
+ std::vector<FileCheckDiag> *Diags) {
+ if (!ExpectedMatch && !VerboseVerbose)
+ return;
+
+ // Otherwise, we have an error, emit an error message.
+ std::string Message = formatv("{0}: {1} string not found in input",
+ Pat.getCheckTy().getDescription(Prefix),
+ (ExpectedMatch ? "expected" : "excluded"))
+ .str();
+ if (Pat.getCount() > 1)
+ Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
+
+ SM.PrintMessage(
+ Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
+
+ // Print the "scanning from here" line. If the current position is at the
+ // end of a line, advance to the start of the next line.
+ Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
+ SMRange SearchRange = ProcessMatchResult(
+ ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
+ : FileCheckDiag::MatchNoneAndExcluded,
+ SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
+ SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
+
+ // Allow the pattern to print additional information if desired.
+ Pat.PrintVariableUses(SM, Buffer, VariableTable);
+
+ if (ExpectedMatch)
+ Pat.PrintFuzzyMatch(SM, Buffer, VariableTable, Diags);
+}
+
+static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
+ const FileCheckString &CheckStr, int MatchedCount,
+ StringRef Buffer, StringMap<StringRef> &VariableTable,
+ bool VerboseVerbose,
+ std::vector<FileCheckDiag> *Diags) {
+ PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
+ MatchedCount, Buffer, VariableTable, VerboseVerbose, Diags);
+}
+
+/// Count the number of newlines in the specified range.
+static unsigned CountNumNewlinesBetween(StringRef Range,
+ const char *&FirstNewLine) {
+ unsigned NumNewLines = 0;
+ while (1) {
+ // Scan for newline.
+ Range = Range.substr(Range.find_first_of("\n\r"));
+ if (Range.empty())
+ return NumNewLines;
+
+ ++NumNewLines;
+
+ // Handle \n\r and \r\n as a single newline.
+ if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
+ (Range[0] != Range[1]))
+ Range = Range.substr(1);
+ Range = Range.substr(1);
+
+ if (NumNewLines == 1)
+ FirstNewLine = Range.begin();
+ }
+}
+
+/// Match check string and its "not strings" and/or "dag strings".
+size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
+ bool IsLabelScanMode, size_t &MatchLen,
+ StringMap<StringRef> &VariableTable,
+ FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const {
+ size_t LastPos = 0;
+ std::vector<const FileCheckPattern *> NotStrings;
+
+ // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
+ // bounds; we have not processed variable definitions within the bounded block
+ // yet so cannot handle any final CHECK-DAG yet; this is handled when going
+ // over the block again (including the last CHECK-LABEL) in normal mode.
+ if (!IsLabelScanMode) {
+ // Match "dag strings" (with mixed "not strings" if any).
+ LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable, Req, Diags);
+ if (LastPos == StringRef::npos)
+ return StringRef::npos;
+ }
+
+ // Match itself from the last position after matching CHECK-DAG.
+ size_t LastMatchEnd = LastPos;
+ size_t FirstMatchPos = 0;
+ // Go match the pattern Count times. Majority of patterns only match with
+ // count 1 though.
+ assert(Pat.getCount() != 0 && "pattern count can not be zero");
+ for (int i = 1; i <= Pat.getCount(); i++) {
+ StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
+ size_t CurrentMatchLen;
+ // get a match at current start point
+ size_t MatchPos = Pat.Match(MatchBuffer, CurrentMatchLen, VariableTable);
+ if (i == 1)
+ FirstMatchPos = LastPos + MatchPos;
+
+ // report
+ if (MatchPos == StringRef::npos) {
+ PrintNoMatch(true, SM, *this, i, MatchBuffer, VariableTable,
+ Req.VerboseVerbose, Diags);
+ return StringRef::npos;
+ }
+ PrintMatch(true, SM, *this, i, MatchBuffer, VariableTable, MatchPos,
+ CurrentMatchLen, Req, Diags);
+
+ // move start point after the match
+ LastMatchEnd += MatchPos + CurrentMatchLen;
+ }
+ // Full match len counts from first match pos.
+ MatchLen = LastMatchEnd - FirstMatchPos;
+
+ // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
+ // or CHECK-NOT
+ if (!IsLabelScanMode) {
+ size_t MatchPos = FirstMatchPos - LastPos;
+ StringRef MatchBuffer = Buffer.substr(LastPos);
+ StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
+
+ // If this check is a "CHECK-NEXT", verify that the previous match was on
+ // the previous line (i.e. that there is one newline between them).
+ if (CheckNext(SM, SkippedRegion)) {
+ ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
+ Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
+ Diags, Req.Verbose);
+ return StringRef::npos;
+ }
+
+ // If this check is a "CHECK-SAME", verify that the previous match was on
+ // the same line (i.e. that there is no newline between them).
+ if (CheckSame(SM, SkippedRegion)) {
+ ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
+ Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
+ Diags, Req.Verbose);
+ return StringRef::npos;
+ }
+
+ // If this match had "not strings", verify that they don't exist in the
+ // skipped region.
+ if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
+ return StringRef::npos;
+ }
+
+ return FirstMatchPos;
+}
+
+/// Verify there is a single line in the given buffer.
+bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
+ if (Pat.getCheckTy() != Check::CheckNext &&
+ Pat.getCheckTy() != Check::CheckEmpty)
+ return false;
+
+ Twine CheckName =
+ Prefix +
+ Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
+
+ // Count the number of newlines between the previous match and this one.
+ assert(Buffer.data() !=
+ SM.getMemoryBuffer(SM.FindBufferContainingLoc(
+ SMLoc::getFromPointer(Buffer.data())))
+ ->getBufferStart() &&
+ "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
+
+ const char *FirstNewLine = nullptr;
+ unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
+
+ if (NumNewLines == 0) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ CheckName + ": is on the same line as previous match");
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
+ "'next' match was here");
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
+ "previous match ended here");
+ return true;
+ }
+
+ if (NumNewLines != 1) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ CheckName +
+ ": is not on the line after the previous match");
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
+ "'next' match was here");
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
+ "previous match ended here");
+ SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
+ "non-matching line after previous match is here");
+ return true;
+ }
+
+ return false;
+}
+
+/// Verify there is no newline in the given buffer.
+bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
+ if (Pat.getCheckTy() != Check::CheckSame)
+ return false;
+
+ // Count the number of newlines between the previous match and this one.
+ assert(Buffer.data() !=
+ SM.getMemoryBuffer(SM.FindBufferContainingLoc(
+ SMLoc::getFromPointer(Buffer.data())))
+ ->getBufferStart() &&
+ "CHECK-SAME can't be the first check in a file");
+
+ const char *FirstNewLine = nullptr;
+ unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
+
+ if (NumNewLines != 0) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ Prefix +
+ "-SAME: is not on the same line as the previous match");
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
+ "'next' match was here");
+ SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
+ "previous match ended here");
+ return true;
+ }
+
+ return false;
+}
+
+/// Verify there's no "not strings" in the given buffer.
+bool FileCheckString::CheckNot(
+ const SourceMgr &SM, StringRef Buffer,
+ const std::vector<const FileCheckPattern *> &NotStrings,
+ StringMap<StringRef> &VariableTable, const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const {
+ for (const FileCheckPattern *Pat : NotStrings) {
+ assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
+
+ size_t MatchLen = 0;
+ size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
+
+ if (Pos == StringRef::npos) {
+ PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
+ VariableTable, Req.VerboseVerbose, Diags);
+ continue;
+ }
+
+ PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, VariableTable,
+ Pos, MatchLen, Req, Diags);
+
+ return true;
+ }
+
+ return false;
+}
+
+/// Match "dag strings" and their mixed "not strings".
+size_t
+FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
+ std::vector<const FileCheckPattern *> &NotStrings,
+ StringMap<StringRef> &VariableTable,
+ const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const {
+ if (DagNotStrings.empty())
+ return 0;
+
+ // The start of the search range.
+ size_t StartPos = 0;
+
+ struct MatchRange {
+ size_t Pos;
+ size_t End;
+ };
+ // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match
+ // ranges are erased from this list once they are no longer in the search
+ // range.
+ std::list<MatchRange> MatchRanges;
+
+ // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
+ // group, so we don't use a range-based for loop here.
+ for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
+ PatItr != PatEnd; ++PatItr) {
+ const FileCheckPattern &Pat = *PatItr;
+ assert((Pat.getCheckTy() == Check::CheckDAG ||
+ Pat.getCheckTy() == Check::CheckNot) &&
+ "Invalid CHECK-DAG or CHECK-NOT!");
+
+ if (Pat.getCheckTy() == Check::CheckNot) {
+ NotStrings.push_back(&Pat);
+ continue;
+ }
+
+ assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
+
+ // CHECK-DAG always matches from the start.
+ size_t MatchLen = 0, MatchPos = StartPos;
+
+ // Search for a match that doesn't overlap a previous match in this
+ // CHECK-DAG group.
+ for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
+ StringRef MatchBuffer = Buffer.substr(MatchPos);
+ size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
+ // With a group of CHECK-DAGs, a single mismatching means the match on
+ // that group of CHECK-DAGs fails immediately.
+ if (MatchPosBuf == StringRef::npos) {
+ PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
+ VariableTable, Req.VerboseVerbose, Diags);
+ return StringRef::npos;
+ }
+ // Re-calc it as the offset relative to the start of the original string.
+ MatchPos += MatchPosBuf;
+ if (Req.VerboseVerbose)
+ PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer,
+ VariableTable, MatchPos, MatchLen, Req, Diags);
+ MatchRange M{MatchPos, MatchPos + MatchLen};
+ if (Req.AllowDeprecatedDagOverlap) {
+ // We don't need to track all matches in this mode, so we just maintain
+ // one match range that encompasses the current CHECK-DAG group's
+ // matches.
+ if (MatchRanges.empty())
+ MatchRanges.insert(MatchRanges.end(), M);
+ else {
+ auto Block = MatchRanges.begin();
+ Block->Pos = std::min(Block->Pos, M.Pos);
+ Block->End = std::max(Block->End, M.End);
+ }
+ break;
+ }
+ // Iterate previous matches until overlapping match or insertion point.
+ bool Overlap = false;
+ for (; MI != ME; ++MI) {
+ if (M.Pos < MI->End) {
+ // !Overlap => New match has no overlap and is before this old match.
+ // Overlap => New match overlaps this old match.
+ Overlap = MI->Pos < M.End;
+ break;
+ }
+ }
+ if (!Overlap) {
+ // Insert non-overlapping match into list.
+ MatchRanges.insert(MI, M);
+ break;
+ }
+ if (Req.VerboseVerbose) {
+ SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
+ SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
+ SMRange OldRange(OldStart, OldEnd);
+ SM.PrintMessage(OldStart, SourceMgr::DK_Note,
+ "match discarded, overlaps earlier DAG match here",
+ {OldRange});
+ if (Diags)
+ Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
+ }
+ MatchPos = MI->End;
+ }
+ if (!Req.VerboseVerbose)
+ PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, VariableTable,
+ MatchPos, MatchLen, Req, Diags);
+
+ // Handle the end of a CHECK-DAG group.
+ if (std::next(PatItr) == PatEnd ||
+ std::next(PatItr)->getCheckTy() == Check::CheckNot) {
+ if (!NotStrings.empty()) {
+ // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
+ // CHECK-DAG, verify that there are no 'not' strings occurred in that
+ // region.
+ StringRef SkippedRegion =
+ Buffer.slice(StartPos, MatchRanges.begin()->Pos);
+ if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
+ return StringRef::npos;
+ // Clear "not strings".
+ NotStrings.clear();
+ }
+ // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
+ // end of this CHECK-DAG group's match range.
+ StartPos = MatchRanges.rbegin()->End;
+ // Don't waste time checking for (impossible) overlaps before that.
+ MatchRanges.clear();
+ }
+ }
+
+ return StartPos;
+}
+
+// A check prefix must contain only alphanumeric, hyphens and underscores.
+static bool ValidateCheckPrefix(StringRef CheckPrefix) {
+ Regex Validator("^[a-zA-Z0-9_-]*$");
+ return Validator.match(CheckPrefix);
+}
+
+bool llvm::FileCheck::ValidateCheckPrefixes() {
+ StringSet<> PrefixSet;
+
+ for (StringRef Prefix : Req.CheckPrefixes) {
+ // Reject empty prefixes.
+ if (Prefix == "")
+ return false;
+
+ if (!PrefixSet.insert(Prefix).second)
+ return false;
+
+ if (!ValidateCheckPrefix(Prefix))
+ return false;
+ }
+
+ return true;
+}
+
+// Combines the check prefixes into a single regex so that we can efficiently
+// scan for any of the set.
+//
+// The semantics are that the longest-match wins which matches our regex
+// library.
+Regex llvm::FileCheck::buildCheckPrefixRegex() {
+ // I don't think there's a way to specify an initial value for cl::list,
+ // so if nothing was specified, add the default
+ if (Req.CheckPrefixes.empty())
+ Req.CheckPrefixes.push_back("CHECK");
+
+ // We already validated the contents of CheckPrefixes so just concatenate
+ // them as alternatives.
+ SmallString<32> PrefixRegexStr;
+ for (StringRef Prefix : Req.CheckPrefixes) {
+ if (Prefix != Req.CheckPrefixes.front())
+ PrefixRegexStr.push_back('|');
+
+ PrefixRegexStr.append(Prefix);
+ }
+
+ return Regex(PrefixRegexStr);
+}
+
+// Remove local variables from \p VariableTable. Global variables
+// (start with '$') are preserved.
+static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
+ SmallVector<StringRef, 16> LocalVars;
+ for (const auto &Var : VariableTable)
+ if (Var.first()[0] != '$')
+ LocalVars.push_back(Var.first());
+
+ for (const auto &Var : LocalVars)
+ VariableTable.erase(Var);
+}
+
+/// Check the input to FileCheck provided in the \p Buffer against the \p
+/// CheckStrings read from the check file.
+///
+/// Returns false if the input fails to satisfy the checks.
+bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
+ ArrayRef<FileCheckString> CheckStrings,
+ std::vector<FileCheckDiag> *Diags) {
+ bool ChecksFailed = false;
+
+ /// VariableTable - This holds all the current filecheck variables.
+ StringMap<StringRef> VariableTable;
+
+ for (const auto& Def : Req.GlobalDefines)
+ VariableTable.insert(StringRef(Def).split('='));
+
+ unsigned i = 0, j = 0, e = CheckStrings.size();
+ while (true) {
+ StringRef CheckRegion;
+ if (j == e) {
+ CheckRegion = Buffer;
+ } else {
+ const FileCheckString &CheckLabelStr = CheckStrings[j];
+ if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
+ ++j;
+ continue;
+ }
+
+ // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
+ size_t MatchLabelLen = 0;
+ size_t MatchLabelPos = CheckLabelStr.Check(
+ SM, Buffer, true, MatchLabelLen, VariableTable, Req, Diags);
+ if (MatchLabelPos == StringRef::npos)
+ // Immediately bail of CHECK-LABEL fails, nothing else we can do.
+ return false;
+
+ CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
+ Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
+ ++j;
+ }
+
+ if (Req.EnableVarScope)
+ ClearLocalVars(VariableTable);
+
+ for (; i != j; ++i) {
+ const FileCheckString &CheckStr = CheckStrings[i];
+
+ // Check each string within the scanned region, including a second check
+ // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
+ size_t MatchLen = 0;
+ size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
+ VariableTable, Req, Diags);
+
+ if (MatchPos == StringRef::npos) {
+ ChecksFailed = true;
+ i = j;
+ break;
+ }
+
+ CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
+ }
+
+ if (j == e)
+ break;
+ }
+
+ // Success if no checks failed.
+ return !ChecksFailed;
+}
diff --git a/contrib/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
index 1214b5a0ba1f..b8223126227d 100644
--- a/contrib/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
@@ -61,6 +61,12 @@ public:
consumeError(Temp.discard());
}
+ void discard() override {
+ // Delete the temp file if it still was open, but keeping the mapping
+ // active.
+ consumeError(Temp.discard());
+ }
+
private:
std::unique_ptr<fs::mapped_file_region> Buffer;
fs::TempFile Temp;
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
index cf9847faccd1..ee69a64ac97b 100644
--- a/contrib/llvm/lib/Support/FoldingSet.cpp
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -275,7 +275,7 @@ void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) {
// Clear out new buckets.
Buckets = AllocateBuckets(NewBucketCount);
- // Set NumBuckets only if allocation of new buckets was succesful
+ // Set NumBuckets only if allocation of new buckets was successful.
NumBuckets = NewBucketCount;
NumNodes = 0;
diff --git a/contrib/llvm/lib/Support/FormatVariadic.cpp b/contrib/llvm/lib/Support/FormatVariadic.cpp
index 6dd133e6c50a..1f3505d5f74f 100644
--- a/contrib/llvm/lib/Support/FormatVariadic.cpp
+++ b/contrib/llvm/lib/Support/FormatVariadic.cpp
@@ -152,3 +152,5 @@ formatv_object_base::parseFormatString(StringRef Fmt) {
}
return Replacements;
}
+
+void detail::format_adapter::anchor() { }
diff --git a/contrib/llvm/lib/Support/Hashing.cpp b/contrib/llvm/lib/Support/Hashing.cpp
index c69efb7c3cc9..7de25cec7371 100644
--- a/contrib/llvm/lib/Support/Hashing.cpp
+++ b/contrib/llvm/lib/Support/Hashing.cpp
@@ -20,10 +20,10 @@ using namespace llvm;
// Provide a definition and static initializer for the fixed seed. This
// initializer should always be zero to ensure its value can never appear to be
// non-zero, even during dynamic initialization.
-size_t llvm::hashing::detail::fixed_seed_override = 0;
+uint64_t llvm::hashing::detail::fixed_seed_override = 0;
// Implement the function for forced setting of the fixed seed.
// FIXME: Use atomic operations here so that there is no data race.
-void llvm::set_fixed_execution_hash_seed(size_t fixed_value) {
+void llvm::set_fixed_execution_hash_seed(uint64_t fixed_value) {
hashing::detail::fixed_seed_override = fixed_value;
}
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index 2c718dd3f5a8..d5a688c7fb9b 100644
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -196,6 +196,32 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Default("generic");
}
+ if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
+ if (Lines[I].startswith("CPU part")) {
+ return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
+ .Case("0x516", "thunderx2t99")
+ .Case("0x0516", "thunderx2t99")
+ .Case("0xaf", "thunderx2t99")
+ .Case("0x0af", "thunderx2t99")
+ .Case("0xa1", "thunderxt88")
+ .Case("0x0a1", "thunderxt88")
+ .Default("generic");
+ }
+ }
+ }
+
+ if (Implementer == "0x48") // HiSilicon Technologies, Inc.
+ // Look for the CPU part line.
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+ if (Lines[I].startswith("CPU part"))
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
+ .Case("0xd01", "tsv110")
+ .Default("generic");
+
if (Implementer == "0x51") // Qualcomm Technologies, Inc.
// Look for the CPU part line.
for (unsigned I = 0, E = Lines.size(); I != E; ++I)
@@ -496,8 +522,8 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
static void
getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
unsigned Brand_id, unsigned Features,
- unsigned Features2, unsigned *Type,
- unsigned *Subtype) {
+ unsigned Features2, unsigned Features3,
+ unsigned *Type, unsigned *Subtype) {
if (Brand_id != 0)
return;
switch (Family) {
@@ -664,12 +690,24 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
default: // Unknown family 6 CPU, try to guess.
+ if (Features & (1 << X86::FEATURE_AVX512VBMI2)) {
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
+ break;
+ }
+
if (Features & (1 << X86::FEATURE_AVX512VBMI)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_CANNONLAKE;
break;
}
+ if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) {
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_CASCADELAKE;
+ break;
+ }
+
if (Features & (1 << X86::FEATURE_AVX512VL)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
@@ -681,8 +719,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
}
- if (Features2 & (1 << (X86::FEATURE_CLFLUSHOPT - 32))) {
- if (Features2 & (1 << (X86::FEATURE_SHA - 32))) {
+ if (Features3 & (1 << (X86::FEATURE_CLFLUSHOPT - 64))) {
+ if (Features3 & (1 << (X86::FEATURE_SHA - 64))) {
*Type = X86::INTEL_GOLDMONT;
} else {
*Type = X86::INTEL_COREI7;
@@ -690,7 +728,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
break;
}
- if (Features2 & (1 << (X86::FEATURE_ADX - 32))) {
+ if (Features3 & (1 << (X86::FEATURE_ADX - 64))) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_BROADWELL;
break;
@@ -706,7 +744,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
}
if (Features & (1 << X86::FEATURE_SSE4_2)) {
- if (Features2 & (1 << (X86::FEATURE_MOVBE - 32))) {
+ if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) {
*Type = X86::INTEL_SILVERMONT;
} else {
*Type = X86::INTEL_COREI7;
@@ -720,7 +758,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
}
if (Features & (1 << X86::FEATURE_SSSE3)) {
- if (Features2 & (1 << (X86::FEATURE_MOVBE - 32))) {
+ if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) {
*Type = X86::INTEL_BONNELL; // "bonnell"
} else {
*Type = X86::INTEL_CORE2; // "core2"
@@ -728,7 +766,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
break;
}
- if (Features2 & (1 << (X86::FEATURE_EM64T - 32))) {
+ if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) {
*Type = X86::INTEL_CORE2; // "core2"
*Subtype = X86::INTEL_CORE2_65;
break;
@@ -754,7 +792,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
break;
case 15: {
- if (Features2 & (1 << (X86::FEATURE_EM64T - 32))) {
+ if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) {
*Type = X86::INTEL_NOCONA;
break;
}
@@ -862,40 +900,52 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
- unsigned *FeaturesOut,
- unsigned *Features2Out) {
+ unsigned *FeaturesOut, unsigned *Features2Out,
+ unsigned *Features3Out) {
unsigned Features = 0;
unsigned Features2 = 0;
+ unsigned Features3 = 0;
unsigned EAX, EBX;
+ auto setFeature = [&](unsigned F) {
+ if (F < 32)
+ Features |= 1U << (F & 0x1f);
+ else if (F < 64)
+ Features2 |= 1U << ((F - 32) & 0x1f);
+ else if (F < 96)
+ Features3 |= 1U << ((F - 64) & 0x1f);
+ else
+ llvm_unreachable("Unexpected FeatureBit");
+ };
+
if ((EDX >> 15) & 1)
- Features |= 1 << X86::FEATURE_CMOV;
+ setFeature(X86::FEATURE_CMOV);
if ((EDX >> 23) & 1)
- Features |= 1 << X86::FEATURE_MMX;
+ setFeature(X86::FEATURE_MMX);
if ((EDX >> 25) & 1)
- Features |= 1 << X86::FEATURE_SSE;
+ setFeature(X86::FEATURE_SSE);
if ((EDX >> 26) & 1)
- Features |= 1 << X86::FEATURE_SSE2;
+ setFeature(X86::FEATURE_SSE2);
if ((ECX >> 0) & 1)
- Features |= 1 << X86::FEATURE_SSE3;
+ setFeature(X86::FEATURE_SSE3);
if ((ECX >> 1) & 1)
- Features |= 1 << X86::FEATURE_PCLMUL;
+ setFeature(X86::FEATURE_PCLMUL);
if ((ECX >> 9) & 1)
- Features |= 1 << X86::FEATURE_SSSE3;
+ setFeature(X86::FEATURE_SSSE3);
if ((ECX >> 12) & 1)
- Features |= 1 << X86::FEATURE_FMA;
+ setFeature(X86::FEATURE_FMA);
if ((ECX >> 19) & 1)
- Features |= 1 << X86::FEATURE_SSE4_1;
+ setFeature(X86::FEATURE_SSE4_1);
if ((ECX >> 20) & 1)
- Features |= 1 << X86::FEATURE_SSE4_2;
+ setFeature(X86::FEATURE_SSE4_2);
if ((ECX >> 23) & 1)
- Features |= 1 << X86::FEATURE_POPCNT;
+ setFeature(X86::FEATURE_POPCNT);
if ((ECX >> 25) & 1)
- Features |= 1 << X86::FEATURE_AES;
+ setFeature(X86::FEATURE_AES);
if ((ECX >> 22) & 1)
- Features2 |= 1 << (X86::FEATURE_MOVBE - 32);
+ setFeature(X86::FEATURE_MOVBE);
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
@@ -906,49 +956,59 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
if (HasAVX)
- Features |= 1 << X86::FEATURE_AVX;
+ setFeature(X86::FEATURE_AVX);
bool HasLeaf7 =
MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
if (HasLeaf7 && ((EBX >> 3) & 1))
- Features |= 1 << X86::FEATURE_BMI;
+ setFeature(X86::FEATURE_BMI);
if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
- Features |= 1 << X86::FEATURE_AVX2;
+ setFeature(X86::FEATURE_AVX2);
if (HasLeaf7 && ((EBX >> 9) & 1))
- Features |= 1 << X86::FEATURE_BMI2;
+ setFeature(X86::FEATURE_BMI2);
if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512F;
+ setFeature(X86::FEATURE_AVX512F);
if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512DQ;
+ setFeature(X86::FEATURE_AVX512DQ);
if (HasLeaf7 && ((EBX >> 19) & 1))
- Features2 |= 1 << (X86::FEATURE_ADX - 32);
+ setFeature(X86::FEATURE_ADX);
if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512IFMA;
+ setFeature(X86::FEATURE_AVX512IFMA);
if (HasLeaf7 && ((EBX >> 23) & 1))
- Features2 |= 1 << (X86::FEATURE_CLFLUSHOPT - 32);
+ setFeature(X86::FEATURE_CLFLUSHOPT);
if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512PF;
+ setFeature(X86::FEATURE_AVX512PF);
if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512ER;
+ setFeature(X86::FEATURE_AVX512ER);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512CD;
+ setFeature(X86::FEATURE_AVX512CD);
if (HasLeaf7 && ((EBX >> 29) & 1))
- Features2 |= 1 << (X86::FEATURE_SHA - 32);
+ setFeature(X86::FEATURE_SHA);
if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512BW;
+ setFeature(X86::FEATURE_AVX512BW);
if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512VL;
+ setFeature(X86::FEATURE_AVX512VL);
if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512VBMI;
+ setFeature(X86::FEATURE_AVX512VBMI);
+ if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
+ setFeature(X86::FEATURE_AVX512VBMI2);
+ if (HasLeaf7 && ((ECX >> 8) & 1))
+ setFeature(X86::FEATURE_GFNI);
+ if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
+ setFeature(X86::FEATURE_VPCLMULQDQ);
+ if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
+ setFeature(X86::FEATURE_AVX512VNNI);
+ if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
+ setFeature(X86::FEATURE_AVX512BITALG);
if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX512VPOPCNTDQ;
+ setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX5124VNNIW;
+ setFeature(X86::FEATURE_AVX5124VNNIW);
if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
- Features |= 1 << X86::FEATURE_AVX5124FMAPS;
+ setFeature(X86::FEATURE_AVX5124FMAPS);
unsigned MaxExtLevel;
getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -956,17 +1016,18 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
!getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
if (HasExtLeaf1 && ((ECX >> 6) & 1))
- Features |= 1 << X86::FEATURE_SSE4_A;
+ setFeature(X86::FEATURE_SSE4_A);
if (HasExtLeaf1 && ((ECX >> 11) & 1))
- Features |= 1 << X86::FEATURE_XOP;
+ setFeature(X86::FEATURE_XOP);
if (HasExtLeaf1 && ((ECX >> 16) & 1))
- Features |= 1 << X86::FEATURE_FMA4;
+ setFeature(X86::FEATURE_FMA4);
if (HasExtLeaf1 && ((EDX >> 29) & 1))
- Features2 |= 1 << (X86::FEATURE_EM64T - 32);
+ setFeature(X86::FEATURE_EM64T);
*FeaturesOut = Features;
*Features2Out = Features2;
+ *Features3Out = Features3;
}
StringRef sys::getHostCPUName() {
@@ -987,16 +1048,16 @@ StringRef sys::getHostCPUName() {
unsigned Brand_id = EBX & 0xff;
unsigned Family = 0, Model = 0;
- unsigned Features = 0, Features2 = 0;
+ unsigned Features = 0, Features2 = 0, Features3 = 0;
detectX86FamilyModel(EAX, &Family, &Model);
- getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
+ getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2, &Features3);
unsigned Type = 0;
unsigned Subtype = 0;
if (Vendor == SIG_INTEL) {
getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
- Features2, &Type, &Subtype);
+ Features2, Features3, &Type, &Subtype);
} else if (Vendor == SIG_AMD) {
getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype);
}
@@ -1022,8 +1083,10 @@ StringRef sys::getHostCPUName() {
mach_msg_type_number_t infoCount;
infoCount = HOST_BASIC_INFO_COUNT;
- host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
+ mach_port_t hostPort = mach_host_self();
+ host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
&infoCount);
+ mach_port_deallocate(mach_task_self(), hostPort);
if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
return "generic";
@@ -1215,6 +1278,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
+ Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1);
+
// Miscellaneous memory related features, detected by
// using the 0x80000008 leaf of the CPUID instruction
bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
diff --git a/contrib/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/contrib/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
new file mode 100644
index 000000000000..e55dcd761809
--- /dev/null
+++ b/contrib/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
@@ -0,0 +1,322 @@
+//===----------------- ItaniumManglingCanonicalizer.cpp -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ItaniumManglingCanonicalizer.h"
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Demangle/ItaniumDemangle.h"
+#include "llvm/Support/Allocator.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+using llvm::itanium_demangle::ForwardTemplateReference;
+using llvm::itanium_demangle::Node;
+using llvm::itanium_demangle::NodeKind;
+
+namespace {
+struct FoldingSetNodeIDBuilder {
+ llvm::FoldingSetNodeID &ID;
+ void operator()(const Node *P) { ID.AddPointer(P); }
+ void operator()(StringView Str) {
+ ID.AddString(llvm::StringRef(Str.begin(), Str.size()));
+ }
+ template<typename T>
+ typename std::enable_if<std::is_integral<T>::value ||
+ std::is_enum<T>::value>::type
+ operator()(T V) {
+ ID.AddInteger((unsigned long long)V);
+ }
+ void operator()(itanium_demangle::NodeOrString NS) {
+ if (NS.isNode()) {
+ ID.AddInteger(0);
+ (*this)(NS.asNode());
+ } else if (NS.isString()) {
+ ID.AddInteger(1);
+ (*this)(NS.asString());
+ } else {
+ ID.AddInteger(2);
+ }
+ }
+ void operator()(itanium_demangle::NodeArray A) {
+ ID.AddInteger(A.size());
+ for (const Node *N : A)
+ (*this)(N);
+ }
+};
+
+template<typename ...T>
+void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) {
+ FoldingSetNodeIDBuilder Builder = {ID};
+ Builder(K);
+ int VisitInOrder[] = {
+ (Builder(V), 0) ...,
+ 0 // Avoid empty array if there are no arguments.
+ };
+ (void)VisitInOrder;
+}
+
+// FIXME: Convert this to a generic lambda when possible.
+template<typename NodeT> struct ProfileSpecificNode {
+ FoldingSetNodeID &ID;
+ template<typename ...T> void operator()(T ...V) {
+ profileCtor(ID, NodeKind<NodeT>::Kind, V...);
+ }
+};
+
+struct ProfileNode {
+ FoldingSetNodeID &ID;
+ template<typename NodeT> void operator()(const NodeT *N) {
+ N->match(ProfileSpecificNode<NodeT>{ID});
+ }
+};
+
+template<> void ProfileNode::operator()(const ForwardTemplateReference *N) {
+ llvm_unreachable("should never canonicalize a ForwardTemplateReference");
+}
+
+void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) {
+ N->visit(ProfileNode{ID});
+}
+
+class FoldingNodeAllocator {
+ class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode {
+ public:
+ // 'Node' in this context names the injected-class-name of the base class.
+ itanium_demangle::Node *getNode() {
+ return reinterpret_cast<itanium_demangle::Node *>(this + 1);
+ }
+ void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); }
+ };
+
+ BumpPtrAllocator RawAlloc;
+ llvm::FoldingSet<NodeHeader> Nodes;
+
+public:
+ void reset() {}
+
+ template <typename T, typename... Args>
+ std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) {
+ // FIXME: Don't canonicalize forward template references for now, because
+ // they contain state (the resolved template node) that's not known at their
+ // point of creation.
+ if (std::is_same<T, ForwardTemplateReference>::value) {
+ // Note that we don't use if-constexpr here and so we must still write
+ // this code in a generic form.
+ return {new (RawAlloc.Allocate(sizeof(T), alignof(T)))
+ T(std::forward<Args>(As)...),
+ true};
+ }
+
+ llvm::FoldingSetNodeID ID;
+ profileCtor(ID, NodeKind<T>::Kind, As...);
+
+ void *InsertPos;
+ if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos))
+ return {static_cast<T*>(Existing->getNode()), false};
+
+ if (!CreateNewNodes)
+ return {nullptr, true};
+
+ static_assert(alignof(T) <= alignof(NodeHeader),
+ "underaligned node header for specific node kind");
+ void *Storage =
+ RawAlloc.Allocate(sizeof(NodeHeader) + sizeof(T), alignof(NodeHeader));
+ NodeHeader *New = new (Storage) NodeHeader;
+ T *Result = new (New->getNode()) T(std::forward<Args>(As)...);
+ Nodes.InsertNode(New, InsertPos);
+ return {Result, true};
+ }
+
+ template<typename T, typename... Args>
+ Node *makeNode(Args &&...As) {
+ return getOrCreateNode<T>(true, std::forward<Args>(As)...).first;
+ }
+
+ void *allocateNodeArray(size_t sz) {
+ return RawAlloc.Allocate(sizeof(Node *) * sz, alignof(Node *));
+ }
+};
+
+class CanonicalizerAllocator : public FoldingNodeAllocator {
+ Node *MostRecentlyCreated = nullptr;
+ Node *TrackedNode = nullptr;
+ bool TrackedNodeIsUsed = false;
+ bool CreateNewNodes = true;
+ llvm::SmallDenseMap<Node*, Node*, 32> Remappings;
+
+ template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) {
+ std::pair<Node *, bool> Result =
+ getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...);
+ if (Result.second) {
+ // Node is new. Make a note of that.
+ MostRecentlyCreated = Result.first;
+ } else if (Result.first) {
+ // Node is pre-existing; check if it's in our remapping table.
+ if (auto *N = Remappings.lookup(Result.first)) {
+ Result.first = N;
+ assert(Remappings.find(Result.first) == Remappings.end() &&
+ "should never need multiple remap steps");
+ }
+ if (Result.first == TrackedNode)
+ TrackedNodeIsUsed = true;
+ }
+ return Result.first;
+ }
+
+ /// Helper to allow makeNode to be partially-specialized on T.
+ template<typename T> struct MakeNodeImpl {
+ CanonicalizerAllocator &Self;
+ template<typename ...Args> Node *make(Args &&...As) {
+ return Self.makeNodeSimple<T>(std::forward<Args>(As)...);
+ }
+ };
+
+public:
+ template<typename T, typename ...Args> Node *makeNode(Args &&...As) {
+ return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...);
+ }
+
+ void reset() { MostRecentlyCreated = nullptr; }
+
+ void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; }
+
+ void addRemapping(Node *A, Node *B) {
+ // Note, we don't need to check whether B is also remapped, because if it
+ // was we would have already remapped it when building it.
+ Remappings.insert(std::make_pair(A, B));
+ }
+
+ bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; }
+
+ void trackUsesOf(Node *N) {
+ TrackedNode = N;
+ TrackedNodeIsUsed = false;
+ }
+ bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; }
+};
+
+/// Convert St3foo to NSt3fooE so that equivalences naming one also affect the
+/// other.
+template<>
+struct CanonicalizerAllocator::MakeNodeImpl<
+ itanium_demangle::StdQualifiedName> {
+ CanonicalizerAllocator &Self;
+ Node *make(Node *Child) {
+ Node *StdNamespace = Self.makeNode<itanium_demangle::NameType>("std");
+ if (!StdNamespace)
+ return nullptr;
+ return Self.makeNode<itanium_demangle::NestedName>(StdNamespace, Child);
+ }
+};
+
+// FIXME: Also expand built-in substitutions?
+
+using CanonicalizingDemangler =
+ itanium_demangle::ManglingParser<CanonicalizerAllocator>;
+}
+
+struct ItaniumManglingCanonicalizer::Impl {
+ CanonicalizingDemangler Demangler = {nullptr, nullptr};
+};
+
+ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {}
+ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; }
+
+ItaniumManglingCanonicalizer::EquivalenceError
+ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First,
+ StringRef Second) {
+ auto &Alloc = P->Demangler.ASTAllocator;
+ Alloc.setCreateNewNodes(true);
+
+ auto Parse = [&](StringRef Str) {
+ P->Demangler.reset(Str.begin(), Str.end());
+ Node *N = nullptr;
+ switch (Kind) {
+ // A <name>, with minor extensions to allow arbitrary namespace and
+ // template names that can't easily be written as <name>s.
+ case FragmentKind::Name:
+ // Very special case: allow "St" as a shorthand for "3std". It's not
+ // valid as a <name> mangling, but is nonetheless the most natural
+ // way to name the 'std' namespace.
+ if (Str.size() == 2 && P->Demangler.consumeIf("St"))
+ N = P->Demangler.make<itanium_demangle::NameType>("std");
+ // We permit substitutions to name templates without their template
+ // arguments. This mostly just falls out, as almost all template names
+ // are valid as <name>s, but we also want to parse <substitution>s as
+ // <name>s, even though they're not.
+ else if (Str.startswith("S"))
+ // Parse the substitution and optional following template arguments.
+ N = P->Demangler.parseType();
+ else
+ N = P->Demangler.parseName();
+ break;
+
+ // A <type>.
+ case FragmentKind::Type:
+ N = P->Demangler.parseType();
+ break;
+
+ // An <encoding>.
+ case FragmentKind::Encoding:
+ N = P->Demangler.parseEncoding();
+ break;
+ }
+
+ // If we have trailing junk, the mangling is invalid.
+ if (P->Demangler.numLeft() != 0)
+ N = nullptr;
+
+ // If any node was created after N, then we cannot safely remap it because
+ // it might already be in use by another node.
+ return std::make_pair(N, Alloc.isMostRecentlyCreated(N));
+ };
+
+ Node *FirstNode, *SecondNode;
+ bool FirstIsNew, SecondIsNew;
+
+ std::tie(FirstNode, FirstIsNew) = Parse(First);
+ if (!FirstNode)
+ return EquivalenceError::InvalidFirstMangling;
+
+ Alloc.trackUsesOf(FirstNode);
+ std::tie(SecondNode, SecondIsNew) = Parse(Second);
+ if (!SecondNode)
+ return EquivalenceError::InvalidSecondMangling;
+
+ // If they're already equivalent, there's nothing to do.
+ if (FirstNode == SecondNode)
+ return EquivalenceError::Success;
+
+ if (FirstIsNew && !Alloc.trackedNodeIsUsed())
+ Alloc.addRemapping(FirstNode, SecondNode);
+ else if (SecondIsNew)
+ Alloc.addRemapping(SecondNode, FirstNode);
+ else
+ return EquivalenceError::ManglingAlreadyUsed;
+
+ return EquivalenceError::Success;
+}
+
+ItaniumManglingCanonicalizer::Key
+ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) {
+ P->Demangler.ASTAllocator.setCreateNewNodes(true);
+ P->Demangler.reset(Mangling.begin(), Mangling.end());
+ return reinterpret_cast<Key>(P->Demangler.parse());
+}
+
+ItaniumManglingCanonicalizer::Key
+ItaniumManglingCanonicalizer::lookup(StringRef Mangling) {
+ P->Demangler.ASTAllocator.setCreateNewNodes(false);
+ P->Demangler.reset(Mangling.begin(), Mangling.end());
+ return reinterpret_cast<Key>(P->Demangler.parse());
+}
diff --git a/contrib/llvm/lib/Support/JSON.cpp b/contrib/llvm/lib/Support/JSON.cpp
index a5dae7a7c2e0..d468013fb94a 100644
--- a/contrib/llvm/lib/Support/JSON.cpp
+++ b/contrib/llvm/lib/Support/JSON.cpp
@@ -517,7 +517,7 @@ static std::vector<const Object::value_type *> sortedElements(const Object &O) {
std::vector<const Object::value_type *> Elements;
for (const auto &E : O)
Elements.push_back(&E);
- llvm::sort(Elements.begin(), Elements.end(),
+ llvm::sort(Elements,
[](const Object::value_type *L, const Object::value_type *R) {
return L->first < R->first;
});
diff --git a/contrib/llvm/lib/Support/Locale.cpp b/contrib/llvm/lib/Support/Locale.cpp
index e57d377c9ab5..1b3300b90f2a 100644
--- a/contrib/llvm/lib/Support/Locale.cpp
+++ b/contrib/llvm/lib/Support/Locale.cpp
@@ -7,24 +7,11 @@ namespace sys {
namespace locale {
int columnWidth(StringRef Text) {
-#if _WIN32
- return Text.size();
-#else
return llvm::sys::unicode::columnWidthUTF8(Text);
-#endif
}
bool isPrint(int UCS) {
-#if _WIN32
- // Restrict characters that we'll try to print to the lower part of ASCII
- // except for the control characters (0x20 - 0x7E). In general one can not
- // reliably output code points U+0080 and higher using narrow character C/C++
- // output functions in Windows, because the meaning of the upper 128 codes is
- // determined by the active code page in the console.
- return ' ' <= UCS && UCS <= '~';
-#else
return llvm::sys::unicode::isPrintable(UCS);
-#endif
}
} // namespace locale
diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp
index 77baf7ac4bdd..c166230ba3a3 100644
--- a/contrib/llvm/lib/Support/LockFileManager.cpp
+++ b/contrib/llvm/lib/Support/LockFileManager.cpp
@@ -24,7 +24,7 @@
#include <sys/types.h>
#include <system_error>
#include <tuple>
-#if _WIN32
+#ifdef _WIN32
#include <windows.h>
#endif
#if LLVM_ON_UNIX
@@ -295,7 +295,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
if (getState() != LFS_Shared)
return Res_Success;
-#if _WIN32
+#ifdef _WIN32
unsigned long Interval = 1;
#else
struct timespec Interval;
@@ -310,7 +310,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
// finish up and remove the lock file.
// FIXME: Should we hook in to system APIs to get a notification when the
// lock file is deleted?
-#if _WIN32
+#ifdef _WIN32
Sleep(Interval);
#else
nanosleep(&Interval, nullptr);
@@ -329,7 +329,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
return Res_OwnerDied;
// Exponentially increase the time we wait for the lock to be removed.
-#if _WIN32
+#ifdef _WIN32
Interval *= 2;
#else
Interval.tv_sec *= 2;
@@ -340,7 +340,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
}
#endif
} while (
-#if _WIN32
+#ifdef _WIN32
Interval < MaxSeconds * 1000
#else
Interval.tv_sec < (time_t)MaxSeconds
diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
index 768a819c8d05..5ce2f50ebdaa 100644
--- a/contrib/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -190,48 +190,57 @@ createUniqueEntity(const Twine &Model, int &ResultFD,
ResultPath.push_back(0);
ResultPath.pop_back();
-retry_random_path:
- // Replace '%' with random chars.
- for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) {
- if (ModelStorage[i] == '%')
- ResultPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
- }
-
- // Try to open + create the file.
- switch (Type) {
- case FS_File: {
- if (std::error_code EC =
- sys::fs::openFileForReadWrite(Twine(ResultPath.begin()), ResultFD,
- sys::fs::CD_CreateNew, Flags, Mode)) {
- if (EC == errc::file_exists)
- goto retry_random_path;
- return EC;
+ // Limit the number of attempts we make, so that we don't infinite loop. E.g.
+ // "permission denied" could be for a specific file (so we retry with a
+ // different name) or for the whole directory (retry would always fail).
+ // Checking which is racy, so we try a number of times, then give up.
+ std::error_code EC;
+ for (int Retries = 128; Retries > 0; --Retries) {
+ // Replace '%' with random chars.
+ for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) {
+ if (ModelStorage[i] == '%')
+ ResultPath[i] =
+ "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
}
- return std::error_code();
- }
+ // Try to open + create the file.
+ switch (Type) {
+ case FS_File: {
+ EC = sys::fs::openFileForReadWrite(Twine(ResultPath.begin()), ResultFD,
+ sys::fs::CD_CreateNew, Flags, Mode);
+ if (EC) {
+ // errc::permission_denied happens on Windows when we try to open a file
+ // that has been marked for deletion.
+ if (EC == errc::file_exists || EC == errc::permission_denied)
+ continue;
+ return EC;
+ }
- case FS_Name: {
- std::error_code EC =
- sys::fs::access(ResultPath.begin(), sys::fs::AccessMode::Exist);
- if (EC == errc::no_such_file_or_directory)
return std::error_code();
- if (EC)
- return EC;
- goto retry_random_path;
- }
+ }
- case FS_Dir: {
- if (std::error_code EC =
- sys::fs::create_directory(ResultPath.begin(), false)) {
- if (EC == errc::file_exists)
- goto retry_random_path;
- return EC;
+ case FS_Name: {
+ EC = sys::fs::access(ResultPath.begin(), sys::fs::AccessMode::Exist);
+ if (EC == errc::no_such_file_or_directory)
+ return std::error_code();
+ if (EC)
+ return EC;
+ continue;
}
- return std::error_code();
- }
+
+ case FS_Dir: {
+ EC = sys::fs::create_directory(ResultPath.begin(), false);
+ if (EC) {
+ if (EC == errc::file_exists)
+ continue;
+ return EC;
+ }
+ return std::error_code();
+ }
+ }
+ llvm_unreachable("Invalid Type");
}
- llvm_unreachable("Invalid Type");
+ return EC;
}
namespace llvm {
@@ -524,7 +533,7 @@ void replace_path_prefix(SmallVectorImpl<char> &Path,
// If prefixes have the same size we can simply copy the new one over.
if (OldPrefix.size() == NewPrefix.size()) {
- std::copy(NewPrefix.begin(), NewPrefix.end(), Path.begin());
+ llvm::copy(NewPrefix, Path.begin());
return;
}
@@ -840,9 +849,8 @@ getPotentiallyUniqueTempFileName(const Twine &Prefix, StringRef Suffix,
return createTemporaryFile(Prefix, Suffix, Dummy, ResultPath, FS_Name);
}
-static std::error_code make_absolute(const Twine &current_directory,
- SmallVectorImpl<char> &path,
- bool use_current_directory) {
+void make_absolute(const Twine &current_directory,
+ SmallVectorImpl<char> &path) {
StringRef p(path.data(), path.size());
bool rootDirectory = path::has_root_directory(p);
@@ -851,14 +859,11 @@ static std::error_code make_absolute(const Twine &current_directory,
// Already absolute.
if (rootName && rootDirectory)
- return std::error_code();
+ return;
// All of the following conditions will need the current directory.
SmallString<128> current_dir;
- if (use_current_directory)
- current_directory.toVector(current_dir);
- else if (std::error_code ec = current_path(current_dir))
- return ec;
+ current_directory.toVector(current_dir);
// Relative path. Prepend the current directory.
if (!rootName && !rootDirectory) {
@@ -866,7 +871,7 @@ static std::error_code make_absolute(const Twine &current_directory,
path::append(current_dir, p);
// Set path to the result.
path.swap(current_dir);
- return std::error_code();
+ return;
}
if (!rootName && rootDirectory) {
@@ -875,7 +880,7 @@ static std::error_code make_absolute(const Twine &current_directory,
path::append(curDirRootName, p);
// Set path to the result.
path.swap(curDirRootName);
- return std::error_code();
+ return;
}
if (rootName && !rootDirectory) {
@@ -887,20 +892,23 @@ static std::error_code make_absolute(const Twine &current_directory,
SmallString<128> res;
path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
path.swap(res);
- return std::error_code();
+ return;
}
llvm_unreachable("All rootName and rootDirectory combinations should have "
"occurred above!");
}
-std::error_code make_absolute(const Twine &current_directory,
- SmallVectorImpl<char> &path) {
- return make_absolute(current_directory, path, true);
-}
-
std::error_code make_absolute(SmallVectorImpl<char> &path) {
- return make_absolute(Twine(), path, false);
+ if (path::is_absolute(path))
+ return {};
+
+ SmallString<128> current_dir;
+ if (std::error_code ec = current_path(current_dir))
+ return ec;
+
+ make_absolute(current_dir, path);
+ return {};
}
std::error_code create_directories(const Twine &Path, bool IgnoreExisting,
@@ -1076,12 +1084,13 @@ std::error_code is_other(const Twine &Path, bool &Result) {
return std::error_code();
}
-void directory_entry::replace_filename(const Twine &filename,
- basic_file_status st) {
- SmallString<128> path = path::parent_path(Path);
- path::append(path, filename);
- Path = path.str();
- Status = st;
+void directory_entry::replace_filename(const Twine &Filename, file_type Type,
+ basic_file_status Status) {
+ SmallString<128> PathStr = path::parent_path(Path);
+ path::append(PathStr, Filename);
+ this->Path = PathStr.str();
+ this->Type = Type;
+ this->Status = Status;
}
ErrorOr<perms> getPermissions(const Twine &Path) {
@@ -1230,17 +1239,5 @@ Expected<TempFile> TempFile::create(const Twine &Model, unsigned Mode) {
}
}
-namespace path {
-
-bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
- const Twine &Path2, const Twine &Path3) {
- if (getUserCacheDir(Result)) {
- append(Result, Path1, Path2, Path3);
- return true;
- }
- return false;
-}
-
-} // end namespace path
} // end namsspace sys
} // end namespace llvm
diff --git a/contrib/llvm/lib/Support/Process.cpp b/contrib/llvm/lib/Support/Process.cpp
index 3f5a9d722ca0..f32355aefbb7 100644
--- a/contrib/llvm/lib/Support/Process.cpp
+++ b/contrib/llvm/lib/Support/Process.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Config/config.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
@@ -82,12 +83,11 @@ static const char colorcodes[2][2][8][10] = {
{ ALLCOLORS("4",""), ALLCOLORS("4","1;") }
};
-// This is set to true when Process::PreventCoreFiles() is called.
-static bool coreFilesPrevented = false;
+// A CMake option controls wheter we emit core dumps by default. An application
+// may disable core dumps by calling Process::PreventCoreFiles().
+static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS;
-bool Process::AreCoreFilesPrevented() {
- return coreFilesPrevented;
-}
+bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; }
// Include the platform-specific parts of this class.
#ifdef LLVM_ON_UNIX
diff --git a/contrib/llvm/lib/Support/RandomNumberGenerator.cpp b/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
index f1f22af82a81..df0d87fab021 100644
--- a/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
+++ b/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
@@ -49,7 +49,7 @@ RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) {
Data[0] = Seed;
Data[1] = Seed >> 32;
- std::copy(Salt.begin(), Salt.end(), Data.begin() + 2);
+ llvm::copy(Salt, Data.begin() + 2);
std::seed_seq SeedSeq(Data.begin(), Data.end());
Generator.seed(SeedSeq);
diff --git a/contrib/llvm/lib/Support/Signals.cpp b/contrib/llvm/lib/Support/Signals.cpp
index 6534ff69b84c..333f492d4589 100644
--- a/contrib/llvm/lib/Support/Signals.cpp
+++ b/contrib/llvm/lib/Support/Signals.cpp
@@ -20,6 +20,8 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
@@ -155,7 +157,7 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
}
Optional<StringRef> Redirects[] = {StringRef(InputFile),
- StringRef(OutputFile), llvm::None};
+ StringRef(OutputFile), StringRef("")};
StringRef Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
#ifdef _WIN32
// Pass --relative-address on Windows so that we don't
@@ -180,8 +182,14 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
auto CurLine = Lines.begin();
int frame_no = 0;
for (int i = 0; i < Depth; i++) {
+ auto PrintLineHeader = [&]() {
+ OS << right_justify(formatv("#{0}", frame_no++).str(),
+ std::log10(Depth) + 2)
+ << ' ' << format_ptr(StackTrace[i]) << ' ';
+ };
if (!Modules[i]) {
- OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << '\n';
+ PrintLineHeader();
+ OS << '\n';
continue;
}
// Read pairs of lines (function name and file/line info) until we
@@ -192,7 +200,7 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
StringRef FunctionName = *CurLine++;
if (FunctionName.empty())
break;
- OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << ' ';
+ PrintLineHeader();
if (!FunctionName.startswith("??"))
OS << FunctionName << ' ';
if (CurLine == Lines.end())
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
index d8fde7fa8990..a55ad881d012 100644
--- a/contrib/llvm/lib/Support/SourceMgr.cpp
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -269,7 +270,7 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
: SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
FixIts(Hints.begin(), Hints.end()) {
- llvm::sort(FixIts.begin(), FixIts.end());
+ llvm::sort(FixIts);
}
static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
@@ -345,12 +346,18 @@ static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
static void printSourceLine(raw_ostream &S, StringRef LineContents) {
// Print out the source line one character at a time, so we can expand tabs.
for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
- if (LineContents[i] != '\t') {
- S << LineContents[i];
- ++OutCol;
- continue;
+ size_t NextTab = LineContents.find('\t', i);
+ // If there were no tabs left, print the rest, we are done.
+ if (NextTab == StringRef::npos) {
+ S << LineContents.drop_front(i);
+ break;
}
+ // Otherwise, print from i to NextTab.
+ S << LineContents.slice(i, NextTab);
+ OutCol += NextTab - i;
+ i = NextTab;
+
// If we have a tab, emit at least one space, then round up to 8 columns.
do {
S << ' ';
@@ -364,65 +371,48 @@ static bool isNonASCII(char c) {
return c & 0x80;
}
-void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
- bool ShowKindLabel) const {
- // Display colors only if OS supports colors.
- ShowColors &= S.has_colors();
-
- if (ShowColors)
- S.changeColor(raw_ostream::SAVEDCOLOR, true);
+void SMDiagnostic::print(const char *ProgName, raw_ostream &OS,
+ bool ShowColors, bool ShowKindLabel) const {
+ {
+ WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors);
- if (ProgName && ProgName[0])
- S << ProgName << ": ";
+ if (ProgName && ProgName[0])
+ S << ProgName << ": ";
- if (!Filename.empty()) {
- if (Filename == "-")
- S << "<stdin>";
- else
- S << Filename;
+ if (!Filename.empty()) {
+ if (Filename == "-")
+ S << "<stdin>";
+ else
+ S << Filename;
- if (LineNo != -1) {
- S << ':' << LineNo;
- if (ColumnNo != -1)
- S << ':' << (ColumnNo+1);
+ if (LineNo != -1) {
+ S << ':' << LineNo;
+ if (ColumnNo != -1)
+ S << ':' << (ColumnNo + 1);
+ }
+ S << ": ";
}
- S << ": ";
}
if (ShowKindLabel) {
switch (Kind) {
case SourceMgr::DK_Error:
- if (ShowColors)
- S.changeColor(raw_ostream::RED, true);
- S << "error: ";
+ WithColor::error(OS, "", !ShowColors);
break;
case SourceMgr::DK_Warning:
- if (ShowColors)
- S.changeColor(raw_ostream::MAGENTA, true);
- S << "warning: ";
+ WithColor::warning(OS, "", !ShowColors);
break;
case SourceMgr::DK_Note:
- if (ShowColors)
- S.changeColor(raw_ostream::BLACK, true);
- S << "note: ";
+ WithColor::note(OS, "", !ShowColors);
break;
case SourceMgr::DK_Remark:
- if (ShowColors)
- S.changeColor(raw_ostream::BLUE, true);
- S << "remark: ";
+ WithColor::remark(OS, "", !ShowColors);
break;
}
-
- if (ShowColors) {
- S.resetColor();
- S.changeColor(raw_ostream::SAVEDCOLOR, true);
- }
}
- S << Message << '\n';
-
- if (ShowColors)
- S.resetColor();
+ WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors)
+ << Message << '\n';
if (LineNo == -1 || ColumnNo == -1)
return;
@@ -433,7 +423,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
// expanding them later, and bail out rather than show incorrect ranges and
// misaligned fixits for any other odd characters.
if (find_if(LineContents, isNonASCII) != LineContents.end()) {
- printSourceLine(S, LineContents);
+ printSourceLine(OS, LineContents);
return;
}
size_t NumColumns = LineContents.size();
@@ -467,29 +457,27 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
// least.
CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
- printSourceLine(S, LineContents);
+ printSourceLine(OS, LineContents);
- if (ShowColors)
- S.changeColor(raw_ostream::GREEN, true);
+ {
+ WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors);
- // Print out the caret line, matching tabs in the source line.
- for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
- if (i >= LineContents.size() || LineContents[i] != '\t') {
- S << CaretLine[i];
- ++OutCol;
- continue;
- }
+ // Print out the caret line, matching tabs in the source line.
+ for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
+ if (i >= LineContents.size() || LineContents[i] != '\t') {
+ S << CaretLine[i];
+ ++OutCol;
+ continue;
+ }
- // Okay, we have a tab. Insert the appropriate number of characters.
- do {
- S << CaretLine[i];
- ++OutCol;
- } while ((OutCol % TabStop) != 0);
+ // Okay, we have a tab. Insert the appropriate number of characters.
+ do {
+ S << CaretLine[i];
+ ++OutCol;
+ } while ((OutCol % TabStop) != 0);
+ }
+ S << '\n';
}
- S << '\n';
-
- if (ShowColors)
- S.resetColor();
// Print out the replacement line, matching tabs in the source line.
if (FixItInsertionLine.empty())
@@ -497,14 +485,14 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
if (i >= LineContents.size() || LineContents[i] != '\t') {
- S << FixItInsertionLine[i];
+ OS << FixItInsertionLine[i];
++OutCol;
continue;
}
// Okay, we have a tab. Insert the appropriate number of characters.
do {
- S << FixItInsertionLine[i];
+ OS << FixItInsertionLine[i];
// FIXME: This is trying not to break up replacements, but then to re-sync
// with the tabs between replacements. This will fail, though, if two
// fix-it replacements are exactly adjacent, or if a fix-it contains a
@@ -515,5 +503,5 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
++OutCol;
} while (((OutCol % TabStop) != 0) && i != e);
}
- S << '\n';
+ OS << '\n';
}
diff --git a/contrib/llvm/lib/Support/StringSaver.cpp b/contrib/llvm/lib/Support/StringSaver.cpp
index 1ded2bdb09de..bf0ac8de9821 100644
--- a/contrib/llvm/lib/Support/StringSaver.cpp
+++ b/contrib/llvm/lib/Support/StringSaver.cpp
@@ -13,7 +13,8 @@ using namespace llvm;
StringRef StringSaver::save(StringRef S) {
char *P = Alloc.Allocate<char>(S.size() + 1);
- memcpy(P, S.data(), S.size());
+ if (!S.empty())
+ memcpy(P, S.data(), S.size());
P[S.size()] = '\0';
return StringRef(P, S.size());
}
diff --git a/contrib/llvm/lib/Support/SymbolRemappingReader.cpp b/contrib/llvm/lib/Support/SymbolRemappingReader.cpp
new file mode 100644
index 000000000000..264c890ce8f1
--- /dev/null
+++ b/contrib/llvm/lib/Support/SymbolRemappingReader.cpp
@@ -0,0 +1,81 @@
+//===- SymbolRemappingReader.cpp - Read symbol remapping file -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions needed for reading and applying symbol
+// remapping files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SymbolRemappingReader.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/LineIterator.h"
+
+using namespace llvm;
+
+char SymbolRemappingParseError::ID;
+
+/// Load a set of name remappings from a text file.
+///
+/// See the documentation at the top of the file for an explanation of
+/// the expected format.
+Error SymbolRemappingReader::read(MemoryBuffer &B) {
+ line_iterator LineIt(B, /*SkipBlanks=*/true, '#');
+
+ auto ReportError = [&](Twine Msg) {
+ return llvm::make_error<SymbolRemappingParseError>(
+ B.getBufferIdentifier(), LineIt.line_number(), Msg);
+ };
+
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef Line = *LineIt;
+ Line = Line.ltrim(' ');
+ // line_iterator only detects comments starting in column 1.
+ if (Line.startswith("#") || Line.empty())
+ continue;
+
+ SmallVector<StringRef, 4> Parts;
+ Line.split(Parts, ' ', /*MaxSplits*/-1, /*KeepEmpty*/false);
+
+ if (Parts.size() != 3)
+ return ReportError("Expected 'kind mangled_name mangled_name', "
+ "found '" + Line + "'");
+
+ using FK = ItaniumManglingCanonicalizer::FragmentKind;
+ Optional<FK> FragmentKind = StringSwitch<Optional<FK>>(Parts[0])
+ .Case("name", FK::Name)
+ .Case("type", FK::Type)
+ .Case("encoding", FK::Encoding)
+ .Default(None);
+ if (!FragmentKind)
+ return ReportError("Invalid kind, expected 'name', 'type', or 'encoding',"
+ " found '" + Parts[0] + "'");
+
+ using EE = ItaniumManglingCanonicalizer::EquivalenceError;
+ switch (Canonicalizer.addEquivalence(*FragmentKind, Parts[1], Parts[2])) {
+ case EE::Success:
+ break;
+
+ case EE::ManglingAlreadyUsed:
+ return ReportError("Manglings '" + Parts[1] + "' and '" + Parts[2] + "' "
+ "have both been used in prior remappings. Move this "
+ "remapping earlier in the file.");
+
+ case EE::InvalidFirstMangling:
+ return ReportError("Could not demangle '" + Parts[1] + "' "
+ "as a <" + Parts[0] + ">; invalid mangling?");
+
+ case EE::InvalidSecondMangling:
+ return ReportError("Could not demangle '" + Parts[2] + "' "
+ "as a <" + Parts[0] + ">; invalid mangling?");
+ }
+ }
+
+ return Error::success();
+}
diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp
index 2c167a4d086c..bdc0dc52c5e2 100644
--- a/contrib/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm/lib/Support/TargetParser.cpp
@@ -14,926 +14,186 @@
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/TargetParser.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
-#include <cctype>
using namespace llvm;
-using namespace ARM;
-using namespace AArch64;
+using namespace AMDGPU;
namespace {
-// List of canonical FPU names (use getFPUSynonym) and which architectural
-// features they correspond to (use getFPUFeatures).
-// FIXME: TableGen this.
-// The entries must appear in the order listed in ARM::FPUKind for correct indexing
-static const struct {
- const char *NameCStr;
- size_t NameLength;
- ARM::FPUKind ID;
- ARM::FPUVersion FPUVersion;
- ARM::NeonSupportLevel NeonSupport;
- ARM::FPURestriction Restriction;
-
- StringRef getName() const { return StringRef(NameCStr, NameLength); }
-} FPUNames[] = {
-#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) \
- { NAME, sizeof(NAME) - 1, KIND, VERSION, NEON_SUPPORT, RESTRICTION },
-#include "llvm/Support/ARMTargetParser.def"
-};
-
-// List of canonical arch names (use getArchSynonym).
-// This table also provides the build attribute fields for CPU arch
-// and Arch ID, according to the Addenda to the ARM ABI, chapters
-// 2.4 and 2.3.5.2 respectively.
-// FIXME: SubArch values were simplified to fit into the expectations
-// of the triples and are not conforming with their official names.
-// Check to see if the expectation should be changed.
-// FIXME: TableGen this.
-template <typename T> struct ArchNames {
- const char *NameCStr;
- size_t NameLength;
- const char *CPUAttrCStr;
- size_t CPUAttrLength;
- const char *SubArchCStr;
- size_t SubArchLength;
- unsigned DefaultFPU;
- unsigned ArchBaseExtensions;
- T ID;
- ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
-
- StringRef getName() const { return StringRef(NameCStr, NameLength); }
-
- // CPU class in build attributes.
- StringRef getCPUAttr() const { return StringRef(CPUAttrCStr, CPUAttrLength); }
-
- // Sub-Arch name.
- StringRef getSubArch() const { return StringRef(SubArchCStr, SubArchLength); }
+struct GPUInfo {
+ StringLiteral Name;
+ StringLiteral CanonicalName;
+ AMDGPU::GPUKind Kind;
+ unsigned Features;
};
-ArchNames<ARM::ArchKind> ARCHNames[] = {
-#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) \
- {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH, \
- sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, ARM::ArchKind::ID, ARCH_ATTR},
-#include "llvm/Support/ARMTargetParser.def"
-};
-
-ArchNames<AArch64::ArchKind> AArch64ARCHNames[] = {
- #define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) \
- {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH, \
- sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, AArch64::ArchKind::ID, ARCH_ATTR},
- #include "llvm/Support/AArch64TargetParser.def"
- };
-
-// List of Arch Extension names.
-// FIXME: TableGen this.
-static const struct {
- const char *NameCStr;
- size_t NameLength;
- unsigned ID;
- const char *Feature;
- const char *NegFeature;
-
- StringRef getName() const { return StringRef(NameCStr, NameLength); }
-} ARCHExtNames[] = {
-#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
- { NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE },
-#include "llvm/Support/ARMTargetParser.def"
-},AArch64ARCHExtNames[] = {
-#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
- { NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE },
-#include "llvm/Support/AArch64TargetParser.def"
+constexpr GPUInfo R600GPUs[26] = {
+ // Name Canonical Kind Features
+ // Name
+ {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
+ {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
+ {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
+ {{"r630"}, {"r630"}, GK_R630, FEATURE_NONE },
+ {{"rs780"}, {"rs880"}, GK_RS880, FEATURE_NONE },
+ {{"rs880"}, {"rs880"}, GK_RS880, FEATURE_NONE },
+ {{"rv610"}, {"rs880"}, GK_RS880, FEATURE_NONE },
+ {{"rv620"}, {"rs880"}, GK_RS880, FEATURE_NONE },
+ {{"rv670"}, {"rv670"}, GK_RV670, FEATURE_NONE },
+ {{"rv710"}, {"rv710"}, GK_RV710, FEATURE_NONE },
+ {{"rv730"}, {"rv730"}, GK_RV730, FEATURE_NONE },
+ {{"rv740"}, {"rv770"}, GK_RV770, FEATURE_NONE },
+ {{"rv770"}, {"rv770"}, GK_RV770, FEATURE_NONE },
+ {{"cedar"}, {"cedar"}, GK_CEDAR, FEATURE_NONE },
+ {{"palm"}, {"cedar"}, GK_CEDAR, FEATURE_NONE },
+ {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA },
+ {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA },
+ {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE },
+ {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE },
+ {{"sumo"}, {"sumo"}, GK_SUMO, FEATURE_NONE },
+ {{"sumo2"}, {"sumo"}, GK_SUMO, FEATURE_NONE },
+ {{"barts"}, {"barts"}, GK_BARTS, FEATURE_NONE },
+ {{"caicos"}, {"caicos"}, GK_CAICOS, FEATURE_NONE },
+ {{"aruba"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA },
+ {{"cayman"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA },
+ {{"turks"}, {"turks"}, GK_TURKS, FEATURE_NONE }
};
-// List of HWDiv names (use getHWDivSynonym) and which architectural
-// features they correspond to (use getHWDivFeatures).
-// FIXME: TableGen this.
-static const struct {
- const char *NameCStr;
- size_t NameLength;
- unsigned ID;
-
- StringRef getName() const { return StringRef(NameCStr, NameLength); }
-} HWDivNames[] = {
-#define ARM_HW_DIV_NAME(NAME, ID) { NAME, sizeof(NAME) - 1, ID },
-#include "llvm/Support/ARMTargetParser.def"
+// This table should be sorted by the value of GPUKind
+// Don't bother listing the implicitly true features
+constexpr GPUInfo AMDGCNGPUs[33] = {
+ // Name Canonical Kind Features
+ // Name
+ {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
+ {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
+ {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"hainan"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"oland"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
+ {{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
+ {{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
+ {{"hawaii"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
+ {{"gfx702"}, {"gfx702"}, GK_GFX702, FEATURE_FAST_FMA_F32},
+ {{"gfx703"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
+ {{"kabini"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
+ {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
+ {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
+ {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
+ {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
+ {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
+ {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
+ {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
+ {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
+ {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
+ {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
+ {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32},
+ {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32},
+ {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
};
-// List of CPU names and their arches.
-// The same CPU can have multiple arches and can be default on multiple arches.
-// When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
-// When this becomes table-generated, we'd probably need two tables.
-// FIXME: TableGen this.
-template <typename T> struct CpuNames {
- const char *NameCStr;
- size_t NameLength;
- T ArchID;
- bool Default; // is $Name the default CPU for $ArchID ?
- unsigned DefaultExtensions;
-
- StringRef getName() const { return StringRef(NameCStr, NameLength); }
-};
-CpuNames<ARM::ArchKind> CPUNames[] = {
-#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
- { NAME, sizeof(NAME) - 1, ARM::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT },
-#include "llvm/Support/ARMTargetParser.def"
-};
-
-CpuNames<AArch64::ArchKind> AArch64CPUNames[] = {
- #define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
- { NAME, sizeof(NAME) - 1, AArch64::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT },
- #include "llvm/Support/AArch64TargetParser.def"
- };
-
-} // namespace
-
-// ======================================================= //
-// Information by ID
-// ======================================================= //
-
-StringRef ARM::getFPUName(unsigned FPUKind) {
- if (FPUKind >= ARM::FK_LAST)
- return StringRef();
- return FPUNames[FPUKind].getName();
-}
-
-FPUVersion ARM::getFPUVersion(unsigned FPUKind) {
- if (FPUKind >= ARM::FK_LAST)
- return FPUVersion::NONE;
- return FPUNames[FPUKind].FPUVersion;
-}
-
-ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(unsigned FPUKind) {
- if (FPUKind >= ARM::FK_LAST)
- return ARM::NeonSupportLevel::None;
- return FPUNames[FPUKind].NeonSupport;
-}
-
-ARM::FPURestriction ARM::getFPURestriction(unsigned FPUKind) {
- if (FPUKind >= ARM::FK_LAST)
- return ARM::FPURestriction::None;
- return FPUNames[FPUKind].Restriction;
-}
-
-unsigned llvm::ARM::getDefaultFPU(StringRef CPU, ArchKind AK) {
- if (CPU == "generic")
- return ARCHNames[static_cast<unsigned>(AK)].DefaultFPU;
-
- return StringSwitch<unsigned>(CPU)
-#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
- .Case(NAME, DEFAULT_FPU)
-#include "llvm/Support/ARMTargetParser.def"
- .Default(ARM::FK_INVALID);
-}
-
-unsigned llvm::ARM::getDefaultExtensions(StringRef CPU, ArchKind AK) {
- if (CPU == "generic")
- return ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
-
- return StringSwitch<unsigned>(CPU)
-#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
- .Case(NAME, ARCHNames[static_cast<unsigned>(ARM::ArchKind::ID)]\
- .ArchBaseExtensions | DEFAULT_EXT)
-#include "llvm/Support/ARMTargetParser.def"
- .Default(ARM::AEK_INVALID);
-}
-
-bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind,
- std::vector<StringRef> &Features) {
-
- if (HWDivKind == ARM::AEK_INVALID)
- return false;
-
- if (HWDivKind & ARM::AEK_HWDIVARM)
- Features.push_back("+hwdiv-arm");
- else
- Features.push_back("-hwdiv-arm");
-
- if (HWDivKind & ARM::AEK_HWDIVTHUMB)
- Features.push_back("+hwdiv");
- else
- Features.push_back("-hwdiv");
-
- return true;
-}
-
-bool llvm::ARM::getExtensionFeatures(unsigned Extensions,
- std::vector<StringRef> &Features) {
-
- if (Extensions == ARM::AEK_INVALID)
- return false;
-
- if (Extensions & ARM::AEK_CRC)
- Features.push_back("+crc");
- else
- Features.push_back("-crc");
-
- if (Extensions & ARM::AEK_DSP)
- Features.push_back("+dsp");
- else
- Features.push_back("-dsp");
-
- if (Extensions & ARM::AEK_RAS)
- Features.push_back("+ras");
- else
- Features.push_back("-ras");
-
- if (Extensions & ARM::AEK_DOTPROD)
- Features.push_back("+dotprod");
- else
- Features.push_back("-dotprod");
-
- return getHWDivFeatures(Extensions, Features);
-}
-
-bool llvm::ARM::getFPUFeatures(unsigned FPUKind,
- std::vector<StringRef> &Features) {
-
- if (FPUKind >= ARM::FK_LAST || FPUKind == ARM::FK_INVALID)
- return false;
-
- // fp-only-sp and d16 subtarget features are independent of each other, so we
- // must enable/disable both.
- switch (FPUNames[FPUKind].Restriction) {
- case ARM::FPURestriction::SP_D16:
- Features.push_back("+fp-only-sp");
- Features.push_back("+d16");
- break;
- case ARM::FPURestriction::D16:
- Features.push_back("-fp-only-sp");
- Features.push_back("+d16");
- break;
- case ARM::FPURestriction::None:
- Features.push_back("-fp-only-sp");
- Features.push_back("-d16");
- break;
- }
-
- // FPU version subtarget features are inclusive of lower-numbered ones, so
- // enable the one corresponding to this version and disable all that are
- // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
- // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
- switch (FPUNames[FPUKind].FPUVersion) {
- case ARM::FPUVersion::VFPV5:
- Features.push_back("+fp-armv8");
- break;
- case ARM::FPUVersion::VFPV4:
- Features.push_back("+vfp4");
- Features.push_back("-fp-armv8");
- break;
- case ARM::FPUVersion::VFPV3_FP16:
- Features.push_back("+vfp3");
- Features.push_back("+fp16");
- Features.push_back("-vfp4");
- Features.push_back("-fp-armv8");
- break;
- case ARM::FPUVersion::VFPV3:
- Features.push_back("+vfp3");
- Features.push_back("-fp16");
- Features.push_back("-vfp4");
- Features.push_back("-fp-armv8");
- break;
- case ARM::FPUVersion::VFPV2:
- Features.push_back("+vfp2");
- Features.push_back("-vfp3");
- Features.push_back("-fp16");
- Features.push_back("-vfp4");
- Features.push_back("-fp-armv8");
- break;
- case ARM::FPUVersion::NONE:
- Features.push_back("-vfp2");
- Features.push_back("-vfp3");
- Features.push_back("-fp16");
- Features.push_back("-vfp4");
- Features.push_back("-fp-armv8");
- break;
- }
-
- // crypto includes neon, so we handle this similarly to FPU version.
- switch (FPUNames[FPUKind].NeonSupport) {
- case ARM::NeonSupportLevel::Crypto:
- Features.push_back("+neon");
- Features.push_back("+crypto");
- break;
- case ARM::NeonSupportLevel::Neon:
- Features.push_back("+neon");
- Features.push_back("-crypto");
- break;
- case ARM::NeonSupportLevel::None:
- Features.push_back("-neon");
- Features.push_back("-crypto");
- break;
- }
+const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
+ GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
- return true;
-}
-
-StringRef llvm::ARM::getArchName(ArchKind AK) {
- return ARCHNames[static_cast<unsigned>(AK)].getName();
-}
+ auto I = std::lower_bound(Table.begin(), Table.end(), Search,
+ [](const GPUInfo &A, const GPUInfo &B) {
+ return A.Kind < B.Kind;
+ });
-StringRef llvm::ARM::getCPUAttr(ArchKind AK) {
- return ARCHNames[static_cast<unsigned>(AK)].getCPUAttr();
+ if (I == Table.end())
+ return nullptr;
+ return I;
}
-StringRef llvm::ARM::getSubArch(ArchKind AK) {
- return ARCHNames[static_cast<unsigned>(AK)].getSubArch();
-}
-
-unsigned llvm::ARM::getArchAttr(ArchKind AK) {
- return ARCHNames[static_cast<unsigned>(AK)].ArchAttr;
-}
-
-StringRef llvm::ARM::getArchExtName(unsigned ArchExtKind) {
- for (const auto AE : ARCHExtNames) {
- if (ArchExtKind == AE.ID)
- return AE.getName();
- }
- return StringRef();
-}
-
-StringRef llvm::ARM::getArchExtFeature(StringRef ArchExt) {
- if (ArchExt.startswith("no")) {
- StringRef ArchExtBase(ArchExt.substr(2));
- for (const auto AE : ARCHExtNames) {
- if (AE.NegFeature && ArchExtBase == AE.getName())
- return StringRef(AE.NegFeature);
- }
- }
- for (const auto AE : ARCHExtNames) {
- if (AE.Feature && ArchExt == AE.getName())
- return StringRef(AE.Feature);
- }
-
- return StringRef();
-}
-
-StringRef llvm::ARM::getHWDivName(unsigned HWDivKind) {
- for (const auto D : HWDivNames) {
- if (HWDivKind == D.ID)
- return D.getName();
- }
- return StringRef();
-}
-
-StringRef llvm::ARM::getDefaultCPU(StringRef Arch) {
- ArchKind AK = parseArch(Arch);
- if (AK == ARM::ArchKind::INVALID)
- return StringRef();
-
- // Look for multiple AKs to find the default for pair AK+Name.
- for (const auto CPU : CPUNames) {
- if (CPU.ArchID == AK && CPU.Default)
- return CPU.getName();
- }
-
- // If we can't find a default then target the architecture instead
- return "generic";
-}
-
-StringRef llvm::AArch64::getFPUName(unsigned FPUKind) {
- return ARM::getFPUName(FPUKind);
-}
-
-ARM::FPUVersion AArch64::getFPUVersion(unsigned FPUKind) {
- return ARM::getFPUVersion(FPUKind);
-}
-
-ARM::NeonSupportLevel AArch64::getFPUNeonSupportLevel(unsigned FPUKind) {
- return ARM::getFPUNeonSupportLevel( FPUKind);
-}
-
-ARM::FPURestriction AArch64::getFPURestriction(unsigned FPUKind) {
- return ARM::getFPURestriction(FPUKind);
-}
-
-unsigned llvm::AArch64::getDefaultFPU(StringRef CPU, ArchKind AK) {
- if (CPU == "generic")
- return AArch64ARCHNames[static_cast<unsigned>(AK)].DefaultFPU;
-
- return StringSwitch<unsigned>(CPU)
-#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
- .Case(NAME, DEFAULT_FPU)
-#include "llvm/Support/AArch64TargetParser.def"
- .Default(ARM::FK_INVALID);
-}
-
-unsigned llvm::AArch64::getDefaultExtensions(StringRef CPU, ArchKind AK) {
- if (CPU == "generic")
- return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
-
- return StringSwitch<unsigned>(CPU)
-#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
- .Case(NAME, \
- AArch64ARCHNames[static_cast<unsigned>(AArch64::ArchKind::ID)] \
- .ArchBaseExtensions | \
- DEFAULT_EXT)
-#include "llvm/Support/AArch64TargetParser.def"
- .Default(AArch64::AEK_INVALID);
-}
-
-AArch64::ArchKind llvm::AArch64::getCPUArchKind(StringRef CPU) {
- if (CPU == "generic")
- return AArch64::ArchKind::ARMV8A;
-
- return StringSwitch<AArch64::ArchKind>(CPU)
-#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
- .Case(NAME, AArch64::ArchKind:: ID)
-#include "llvm/Support/AArch64TargetParser.def"
- .Default(AArch64::ArchKind::INVALID);
-}
-
-bool llvm::AArch64::getExtensionFeatures(unsigned Extensions,
- std::vector<StringRef> &Features) {
-
- if (Extensions == AArch64::AEK_INVALID)
- return false;
-
- if (Extensions & AArch64::AEK_FP)
- Features.push_back("+fp-armv8");
- if (Extensions & AArch64::AEK_SIMD)
- Features.push_back("+neon");
- if (Extensions & AArch64::AEK_CRC)
- Features.push_back("+crc");
- if (Extensions & AArch64::AEK_CRYPTO)
- Features.push_back("+crypto");
- if (Extensions & AArch64::AEK_DOTPROD)
- Features.push_back("+dotprod");
- if (Extensions & AArch64::AEK_FP16)
- Features.push_back("+fullfp16");
- if (Extensions & AArch64::AEK_PROFILE)
- Features.push_back("+spe");
- if (Extensions & AArch64::AEK_RAS)
- Features.push_back("+ras");
- if (Extensions & AArch64::AEK_LSE)
- Features.push_back("+lse");
- if (Extensions & AArch64::AEK_RDM)
- Features.push_back("+rdm");
- if (Extensions & AArch64::AEK_SVE)
- Features.push_back("+sve");
- if (Extensions & AArch64::AEK_RCPC)
- Features.push_back("+rcpc");
-
- return true;
-}
-
-bool llvm::AArch64::getFPUFeatures(unsigned FPUKind,
- std::vector<StringRef> &Features) {
- return ARM::getFPUFeatures(FPUKind, Features);
-}
-
-bool llvm::AArch64::getArchFeatures(AArch64::ArchKind AK,
- std::vector<StringRef> &Features) {
- if (AK == AArch64::ArchKind::ARMV8_1A)
- Features.push_back("+v8.1a");
- if (AK == AArch64::ArchKind::ARMV8_2A)
- Features.push_back("+v8.2a");
- if (AK == AArch64::ArchKind::ARMV8_3A)
- Features.push_back("+v8.3a");
- if (AK == AArch64::ArchKind::ARMV8_4A)
- Features.push_back("+v8.4a");
-
- return AK != AArch64::ArchKind::INVALID;
-}
-
-StringRef llvm::AArch64::getArchName(ArchKind AK) {
- return AArch64ARCHNames[static_cast<unsigned>(AK)].getName();
-}
-
-StringRef llvm::AArch64::getCPUAttr(ArchKind AK) {
- return AArch64ARCHNames[static_cast<unsigned>(AK)].getCPUAttr();
-}
-
-StringRef llvm::AArch64::getSubArch(ArchKind AK) {
- return AArch64ARCHNames[static_cast<unsigned>(AK)].getSubArch();
-}
-
-unsigned llvm::AArch64::getArchAttr(ArchKind AK) {
- return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchAttr;
-}
-
-StringRef llvm::AArch64::getArchExtName(unsigned ArchExtKind) {
- for (const auto &AE : AArch64ARCHExtNames)
- if (ArchExtKind == AE.ID)
- return AE.getName();
- return StringRef();
-}
-
-StringRef llvm::AArch64::getArchExtFeature(StringRef ArchExt) {
- if (ArchExt.startswith("no")) {
- StringRef ArchExtBase(ArchExt.substr(2));
- for (const auto &AE : AArch64ARCHExtNames) {
- if (AE.NegFeature && ArchExtBase == AE.getName())
- return StringRef(AE.NegFeature);
- }
- }
-
- for (const auto &AE : AArch64ARCHExtNames)
- if (AE.Feature && ArchExt == AE.getName())
- return StringRef(AE.Feature);
- return StringRef();
-}
-
-StringRef llvm::AArch64::getDefaultCPU(StringRef Arch) {
- AArch64::ArchKind AK = parseArch(Arch);
- if (AK == ArchKind::INVALID)
- return StringRef();
-
- // Look for multiple AKs to find the default for pair AK+Name.
- for (const auto &CPU : AArch64CPUNames)
- if (CPU.ArchID == AK && CPU.Default)
- return CPU.getName();
-
- // If we can't find a default then target the architecture instead
- return "generic";
-}
-
-unsigned llvm::AArch64::checkArchVersion(StringRef Arch) {
- if (Arch.size() >= 2 && Arch[0] == 'v' && std::isdigit(Arch[1]))
- return (Arch[1] - 48);
- return 0;
-}
-
-// ======================================================= //
-// Parsers
-// ======================================================= //
-
-static StringRef getHWDivSynonym(StringRef HWDiv) {
- return StringSwitch<StringRef>(HWDiv)
- .Case("thumb,arm", "arm,thumb")
- .Default(HWDiv);
-}
-
-static StringRef getFPUSynonym(StringRef FPU) {
- return StringSwitch<StringRef>(FPU)
- .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
- .Case("vfp2", "vfpv2")
- .Case("vfp3", "vfpv3")
- .Case("vfp4", "vfpv4")
- .Case("vfp3-d16", "vfpv3-d16")
- .Case("vfp4-d16", "vfpv4-d16")
- .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
- .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
- .Case("fp5-sp-d16", "fpv5-sp-d16")
- .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
- // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
- .Case("neon-vfpv3", "neon")
- .Default(FPU);
-}
-
-static StringRef getArchSynonym(StringRef Arch) {
- return StringSwitch<StringRef>(Arch)
- .Case("v5", "v5t")
- .Case("v5e", "v5te")
- .Case("v6j", "v6")
- .Case("v6hl", "v6k")
- .Cases("v6m", "v6sm", "v6s-m", "v6-m")
- .Cases("v6z", "v6zk", "v6kz")
- .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
- .Case("v7r", "v7-r")
- .Case("v7m", "v7-m")
- .Case("v7em", "v7e-m")
- .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a")
- .Case("v8.1a", "v8.1-a")
- .Case("v8.2a", "v8.2-a")
- .Case("v8.3a", "v8.3-a")
- .Case("v8.4a", "v8.4-a")
- .Case("v8r", "v8-r")
- .Case("v8m.base", "v8-m.base")
- .Case("v8m.main", "v8-m.main")
- .Default(Arch);
-}
-
-// MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but
-// (iwmmxt|xscale)(eb)? is also permitted. If the former, return
-// "v.+", if the latter, return unmodified string, minus 'eb'.
-// If invalid, return empty string.
-StringRef llvm::ARM::getCanonicalArchName(StringRef Arch) {
- size_t offset = StringRef::npos;
- StringRef A = Arch;
- StringRef Error = "";
-
- // Begins with "arm" / "thumb", move past it.
- if (A.startswith("arm64"))
- offset = 5;
- else if (A.startswith("arm"))
- offset = 3;
- else if (A.startswith("thumb"))
- offset = 5;
- else if (A.startswith("aarch64")) {
- offset = 7;
- // AArch64 uses "_be", not "eb" suffix.
- if (A.find("eb") != StringRef::npos)
- return Error;
- if (A.substr(offset, 3) == "_be")
- offset += 3;
- }
-
- // Ex. "armebv7", move past the "eb".
- if (offset != StringRef::npos && A.substr(offset, 2) == "eb")
- offset += 2;
- // Or, if it ends with eb ("armv7eb"), chop it off.
- else if (A.endswith("eb"))
- A = A.substr(0, A.size() - 2);
- // Trim the head
- if (offset != StringRef::npos)
- A = A.substr(offset);
-
- // Empty string means offset reached the end, which means it's valid.
- if (A.empty())
- return Arch;
-
- // Only match non-marketing names
- if (offset != StringRef::npos) {
- // Must start with 'vN'.
- if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1])))
- return Error;
- // Can't have an extra 'eb'.
- if (A.find("eb") != StringRef::npos)
- return Error;
- }
-
- // Arch will either be a 'v' name (v7a) or a marketing name (xscale).
- return A;
-}
-
-unsigned llvm::ARM::parseHWDiv(StringRef HWDiv) {
- StringRef Syn = getHWDivSynonym(HWDiv);
- for (const auto D : HWDivNames) {
- if (Syn == D.getName())
- return D.ID;
- }
- return ARM::AEK_INVALID;
-}
-
-unsigned llvm::ARM::parseFPU(StringRef FPU) {
- StringRef Syn = getFPUSynonym(FPU);
- for (const auto F : FPUNames) {
- if (Syn == F.getName())
- return F.ID;
- }
- return ARM::FK_INVALID;
-}
-
-// Allows partial match, ex. "v7a" matches "armv7a".
-ARM::ArchKind ARM::parseArch(StringRef Arch) {
- Arch = getCanonicalArchName(Arch);
- StringRef Syn = getArchSynonym(Arch);
- for (const auto A : ARCHNames) {
- if (A.getName().endswith(Syn))
- return A.ID;
- }
- return ARM::ArchKind::INVALID;
-}
+} // namespace
-unsigned llvm::ARM::parseArchExt(StringRef ArchExt) {
- for (const auto A : ARCHExtNames) {
- if (ArchExt == A.getName())
- return A.ID;
- }
- return ARM::AEK_INVALID;
+StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
+ if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
+ return Entry->CanonicalName;
+ return "";
}
-ARM::ArchKind llvm::ARM::parseCPUArch(StringRef CPU) {
- for (const auto C : CPUNames) {
- if (CPU == C.getName())
- return C.ArchID;
- }
- return ARM::ArchKind::INVALID;
+StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) {
+ if (const auto *Entry = getArchEntry(AK, R600GPUs))
+ return Entry->CanonicalName;
+ return "";
}
-void llvm::ARM::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
- for (const CpuNames<ARM::ArchKind> &Arch : CPUNames) {
- if (Arch.ArchID != ARM::ArchKind::INVALID)
- Values.push_back(Arch.getName());
+AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
+ for (const auto C : AMDGCNGPUs) {
+ if (CPU == C.Name)
+ return C.Kind;
}
-}
-void llvm::AArch64::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
- for (const CpuNames<AArch64::ArchKind> &Arch : AArch64CPUNames) {
- if (Arch.ArchID != AArch64::ArchKind::INVALID)
- Values.push_back(Arch.getName());
- }
-}
-
-// ARM, Thumb, AArch64
-ARM::ISAKind ARM::parseArchISA(StringRef Arch) {
- return StringSwitch<ARM::ISAKind>(Arch)
- .StartsWith("aarch64", ARM::ISAKind::AARCH64)
- .StartsWith("arm64", ARM::ISAKind::AARCH64)
- .StartsWith("thumb", ARM::ISAKind::THUMB)
- .StartsWith("arm", ARM::ISAKind::ARM)
- .Default(ARM::ISAKind::INVALID);
+ return AMDGPU::GPUKind::GK_NONE;
}
-// Little/Big endian
-ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
- if (Arch.startswith("armeb") || Arch.startswith("thumbeb") ||
- Arch.startswith("aarch64_be"))
- return ARM::EndianKind::BIG;
-
- if (Arch.startswith("arm") || Arch.startswith("thumb")) {
- if (Arch.endswith("eb"))
- return ARM::EndianKind::BIG;
- else
- return ARM::EndianKind::LITTLE;
+AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) {
+ for (const auto C : R600GPUs) {
+ if (CPU == C.Name)
+ return C.Kind;
}
- if (Arch.startswith("aarch64"))
- return ARM::EndianKind::LITTLE;
-
- return ARM::EndianKind::INVALID;
+ return AMDGPU::GPUKind::GK_NONE;
}
-// Profile A/R/M
-ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
- Arch = getCanonicalArchName(Arch);
- switch (parseArch(Arch)) {
- case ARM::ArchKind::ARMV6M:
- case ARM::ArchKind::ARMV7M:
- case ARM::ArchKind::ARMV7EM:
- case ARM::ArchKind::ARMV8MMainline:
- case ARM::ArchKind::ARMV8MBaseline:
- return ARM::ProfileKind::M;
- case ARM::ArchKind::ARMV7R:
- case ARM::ArchKind::ARMV8R:
- return ARM::ProfileKind::R;
- case ARM::ArchKind::ARMV7A:
- case ARM::ArchKind::ARMV7VE:
- case ARM::ArchKind::ARMV7K:
- case ARM::ArchKind::ARMV8A:
- case ARM::ArchKind::ARMV8_1A:
- case ARM::ArchKind::ARMV8_2A:
- case ARM::ArchKind::ARMV8_3A:
- case ARM::ArchKind::ARMV8_4A:
- return ARM::ProfileKind::A;
- case ARM::ArchKind::ARMV2:
- case ARM::ArchKind::ARMV2A:
- case ARM::ArchKind::ARMV3:
- case ARM::ArchKind::ARMV3M:
- case ARM::ArchKind::ARMV4:
- case ARM::ArchKind::ARMV4T:
- case ARM::ArchKind::ARMV5T:
- case ARM::ArchKind::ARMV5TE:
- case ARM::ArchKind::ARMV5TEJ:
- case ARM::ArchKind::ARMV6:
- case ARM::ArchKind::ARMV6K:
- case ARM::ArchKind::ARMV6T2:
- case ARM::ArchKind::ARMV6KZ:
- case ARM::ArchKind::ARMV7S:
- case ARM::ArchKind::IWMMXT:
- case ARM::ArchKind::IWMMXT2:
- case ARM::ArchKind::XSCALE:
- case ARM::ArchKind::INVALID:
- return ARM::ProfileKind::INVALID;
- }
- llvm_unreachable("Unhandled architecture");
+unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) {
+ if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
+ return Entry->Features;
+ return FEATURE_NONE;
}
-// Version number (ex. v7 = 7).
-unsigned llvm::ARM::parseArchVersion(StringRef Arch) {
- Arch = getCanonicalArchName(Arch);
- switch (parseArch(Arch)) {
- case ARM::ArchKind::ARMV2:
- case ARM::ArchKind::ARMV2A:
- return 2;
- case ARM::ArchKind::ARMV3:
- case ARM::ArchKind::ARMV3M:
- return 3;
- case ARM::ArchKind::ARMV4:
- case ARM::ArchKind::ARMV4T:
- return 4;
- case ARM::ArchKind::ARMV5T:
- case ARM::ArchKind::ARMV5TE:
- case ARM::ArchKind::IWMMXT:
- case ARM::ArchKind::IWMMXT2:
- case ARM::ArchKind::XSCALE:
- case ARM::ArchKind::ARMV5TEJ:
- return 5;
- case ARM::ArchKind::ARMV6:
- case ARM::ArchKind::ARMV6K:
- case ARM::ArchKind::ARMV6T2:
- case ARM::ArchKind::ARMV6KZ:
- case ARM::ArchKind::ARMV6M:
- return 6;
- case ARM::ArchKind::ARMV7A:
- case ARM::ArchKind::ARMV7VE:
- case ARM::ArchKind::ARMV7R:
- case ARM::ArchKind::ARMV7M:
- case ARM::ArchKind::ARMV7S:
- case ARM::ArchKind::ARMV7EM:
- case ARM::ArchKind::ARMV7K:
- return 7;
- case ARM::ArchKind::ARMV8A:
- case ARM::ArchKind::ARMV8_1A:
- case ARM::ArchKind::ARMV8_2A:
- case ARM::ArchKind::ARMV8_3A:
- case ARM::ArchKind::ARMV8_4A:
- case ARM::ArchKind::ARMV8R:
- case ARM::ArchKind::ARMV8MBaseline:
- case ARM::ArchKind::ARMV8MMainline:
- return 8;
- case ARM::ArchKind::INVALID:
- return 0;
- }
- llvm_unreachable("Unhandled architecture");
+unsigned AMDGPU::getArchAttrR600(GPUKind AK) {
+ if (const auto *Entry = getArchEntry(AK, R600GPUs))
+ return Entry->Features;
+ return FEATURE_NONE;
}
-StringRef llvm::ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
- StringRef ArchName =
- CPU.empty() ? TT.getArchName() : ARM::getArchName(ARM::parseCPUArch(CPU));
-
- if (TT.isOSBinFormatMachO()) {
- if (TT.getEnvironment() == Triple::EABI ||
- TT.getOS() == Triple::UnknownOS ||
- llvm::ARM::parseArchProfile(ArchName) == ARM::ProfileKind::M)
- return "aapcs";
- if (TT.isWatchABI())
- return "aapcs16";
- return "apcs-gnu";
- } else if (TT.isOSWindows())
- // FIXME: this is invalid for WindowsCE.
- return "aapcs";
-
- // Select the default based on the platform.
- switch (TT.getEnvironment()) {
- case Triple::Android:
- case Triple::GNUEABI:
- case Triple::GNUEABIHF:
- case Triple::MuslEABI:
- case Triple::MuslEABIHF:
- return "aapcs-linux";
- case Triple::EABIHF:
- case Triple::EABI:
- return "aapcs";
- default:
- if (TT.isOSNetBSD())
- return "apcs-gnu";
- if (TT.isOSOpenBSD())
- return "aapcs-linux";
- return "aapcs";
- }
+void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) {
+ // XXX: Should this only report unique canonical names?
+ for (const auto C : AMDGCNGPUs)
+ Values.push_back(C.Name);
}
-StringRef llvm::AArch64::getCanonicalArchName(StringRef Arch) {
- return ARM::getCanonicalArchName(Arch);
+void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
+ for (const auto C : R600GPUs)
+ Values.push_back(C.Name);
}
-unsigned llvm::AArch64::parseFPU(StringRef FPU) {
- return ARM::parseFPU(FPU);
-}
+AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
+ if (GPU == "generic")
+ return {7, 0, 0};
-// Allows partial match, ex. "v8a" matches "armv8a".
-AArch64::ArchKind AArch64::parseArch(StringRef Arch) {
- Arch = getCanonicalArchName(Arch);
- if (checkArchVersion(Arch) < 8)
- return ArchKind::INVALID;
-
- StringRef Syn = getArchSynonym(Arch);
- for (const auto A : AArch64ARCHNames) {
- if (A.getName().endswith(Syn))
- return A.ID;
- }
- return ArchKind::INVALID;
-}
-
-AArch64::ArchExtKind llvm::AArch64::parseArchExt(StringRef ArchExt) {
- for (const auto A : AArch64ARCHExtNames) {
- if (ArchExt == A.getName())
- return static_cast<ArchExtKind>(A.ID);
- }
- return AArch64::AEK_INVALID;
-}
+ AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
+ if (AK == AMDGPU::GPUKind::GK_NONE)
+ return {0, 0, 0};
-AArch64::ArchKind llvm::AArch64::parseCPUArch(StringRef CPU) {
- for (const auto C : AArch64CPUNames) {
- if (CPU == C.getName())
- return C.ArchID;
+ switch (AK) {
+ case GK_GFX600: return {6, 0, 0};
+ case GK_GFX601: return {6, 0, 1};
+ case GK_GFX700: return {7, 0, 0};
+ case GK_GFX701: return {7, 0, 1};
+ case GK_GFX702: return {7, 0, 2};
+ case GK_GFX703: return {7, 0, 3};
+ case GK_GFX704: return {7, 0, 4};
+ case GK_GFX801: return {8, 0, 1};
+ case GK_GFX802: return {8, 0, 2};
+ case GK_GFX803: return {8, 0, 3};
+ case GK_GFX810: return {8, 1, 0};
+ case GK_GFX900: return {9, 0, 0};
+ case GK_GFX902: return {9, 0, 2};
+ case GK_GFX904: return {9, 0, 4};
+ case GK_GFX906: return {9, 0, 6};
+ case GK_GFX909: return {9, 0, 9};
+ default: return {0, 0, 0};
}
- return ArchKind::INVALID;
-}
-
-// ARM, Thumb, AArch64
-ARM::ISAKind AArch64::parseArchISA(StringRef Arch) {
- return ARM::parseArchISA(Arch);
-}
-
-// Little/Big endian
-ARM::EndianKind AArch64::parseArchEndian(StringRef Arch) {
- return ARM::parseArchEndian(Arch);
-}
-
-// Profile A/R/M
-ARM::ProfileKind AArch64::parseArchProfile(StringRef Arch) {
- return ARM::parseArchProfile(Arch);
-}
-
-// Version number (ex. v8 = 8).
-unsigned llvm::AArch64::parseArchVersion(StringRef Arch) {
- return ARM::parseArchVersion(Arch);
-}
-
-bool llvm::AArch64::isX18ReservedByDefault(const Triple &TT) {
- return TT.isOSDarwin() || TT.isOSFuchsia() || TT.isOSWindows();
}
diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp
index c5eba5714766..bb63891cd713 100644
--- a/contrib/llvm/lib/Support/TargetRegistry.cpp
+++ b/contrib/llvm/lib/Support/TargetRegistry.cpp
@@ -72,7 +72,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT,
auto I = find_if(targets(), ArchMatch);
if (I == targets().end()) {
- Error = "No available targets are compatible with this triple.";
+ Error = "No available targets are compatible with triple \"" + TT + "\"";
return nullptr;
}
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
index 61d3b6c6e319..82f5810dd107 100644
--- a/contrib/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -295,7 +295,7 @@ void TimerGroup::addTimer(Timer &T) {
void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
// Sort the timers in descending order by amount of time taken.
- llvm::sort(TimersToPrint.begin(), TimersToPrint.end());
+ llvm::sort(TimersToPrint);
TimeRecord Total;
for (const PrintRecord &Record : TimersToPrint)
@@ -343,8 +343,7 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
}
void TimerGroup::prepareToPrintList() {
- // See if any of our timers were started, if so add them to TimersToPrint and
- // reset them.
+ // See if any of our timers were started, if so add them to TimersToPrint.
for (Timer *T = FirstTimer; T; T = T->Next) {
if (!T->hasTriggered()) continue;
bool WasRunning = T->isRunning();
@@ -368,6 +367,12 @@ void TimerGroup::print(raw_ostream &OS) {
PrintQueuedTimers(OS);
}
+void TimerGroup::clear() {
+ sys::SmartScopedLock<true> L(*TimerLock);
+ for (Timer *T = FirstTimer; T; T = T->Next)
+ T->clear();
+}
+
void TimerGroup::printAll(raw_ostream &OS) {
sys::SmartScopedLock<true> L(*TimerLock);
@@ -375,6 +380,12 @@ void TimerGroup::printAll(raw_ostream &OS) {
TG->print(OS);
}
+void TimerGroup::clearAll() {
+ sys::SmartScopedLock<true> L(*TimerLock);
+ for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
+ TG->clear();
+}
+
void TimerGroup::printJSONValue(raw_ostream &OS, const PrintRecord &R,
const char *suffix, double Value) {
assert(yaml::needsQuotes(Name) == yaml::QuotingType::None &&
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index b14d6492b1ed..26d9327f6208 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -35,7 +35,6 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case mips64: return "mips64";
case mips64el: return "mips64el";
case msp430: return "msp430";
- case nios2: return "nios2";
case ppc64: return "powerpc64";
case ppc64le: return "powerpc64le";
case ppc: return "powerpc";
@@ -102,8 +101,6 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case mips64:
case mips64el: return "mips";
- case nios2: return "nios2";
-
case hexagon: return "hexagon";
case amdgcn: return "amdgcn";
@@ -209,6 +206,9 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case Mesa3D: return "mesa3d";
case Contiki: return "contiki";
case AMDPAL: return "amdpal";
+ case HermitCore: return "hermit";
+ case Hurd: return "hurd";
+ case WASI: return "wasi";
}
llvm_unreachable("Invalid OSType");
@@ -271,7 +271,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("mips64", mips64)
.Case("mips64el", mips64el)
.Case("msp430", msp430)
- .Case("nios2", nios2)
.Case("ppc64", ppc64)
.Case("ppc32", ppc)
.Case("ppc", ppc)
@@ -398,11 +397,14 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("thumbeb", Triple::thumbeb)
.Case("avr", Triple::avr)
.Case("msp430", Triple::msp430)
- .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
- .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
- .Cases("mips64", "mips64eb", Triple::mips64)
- .Case("mips64el", Triple::mips64el)
- .Case("nios2", Triple::nios2)
+ .Cases("mips", "mipseb", "mipsallegrex", "mipsisa32r6",
+ "mipsr6", Triple::mips)
+ .Cases("mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el",
+ Triple::mipsel)
+ .Cases("mips64", "mips64eb", "mipsn32", "mipsisa64r6",
+ "mips64r6", "mipsn32r6", Triple::mips64)
+ .Cases("mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el",
+ "mipsn32r6el", Triple::mips64el)
.Case("r600", Triple::r600)
.Case("amdgcn", Triple::amdgcn)
.Case("riscv32", Triple::riscv32)
@@ -502,6 +504,9 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("mesa3d", Triple::Mesa3D)
.StartsWith("contiki", Triple::Contiki)
.StartsWith("amdpal", Triple::AMDPAL)
+ .StartsWith("hermit", Triple::HermitCore)
+ .StartsWith("hurd", Triple::Hurd)
+ .StartsWith("wasi", Triple::WASI)
.Default(Triple::UnknownOS);
}
@@ -538,6 +543,10 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
}
static Triple::SubArchType parseSubArch(StringRef SubArchName) {
+ if (SubArchName.startswith("mips") &&
+ (SubArchName.endswith("r6el") || SubArchName.endswith("r6")))
+ return Triple::MipsSubArch_r6;
+
StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
// For now, this is the small part. Early return.
@@ -594,6 +603,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
return Triple::ARMSubArch_v8_3a;
case ARM::ArchKind::ARMV8_4A:
return Triple::ARMSubArch_v8_4a;
+ case ARM::ArchKind::ARMV8_5A:
+ return Triple::ARMSubArch_v8_5a;
case ARM::ArchKind::ARMV8R:
return Triple::ARMSubArch_v8r;
case ARM::ArchKind::ARMV8MBaseline:
@@ -651,7 +662,6 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::mips64el:
case Triple::mipsel:
case Triple::msp430:
- case Triple::nios2:
case Triple::nvptx:
case Triple::nvptx64:
case Triple::ppc64le:
@@ -709,6 +719,15 @@ Triple::Triple(const Twine &Str)
ObjectFormat = parseFormat(Components[3]);
}
}
+ } else {
+ Environment =
+ StringSwitch<Triple::EnvironmentType>(Components[0])
+ .StartsWith("mipsn32", Triple::GNUABIN32)
+ .StartsWith("mips64", Triple::GNUABI64)
+ .StartsWith("mipsisa64", Triple::GNUABI64)
+ .StartsWith("mipsisa32", Triple::GNU)
+ .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU)
+ .Default(UnknownEnvironment);
}
}
if (ObjectFormat == UnknownObjectFormat)
@@ -887,6 +906,12 @@ std::string Triple::normalize(StringRef Str) {
}
}
+ // Replace empty components with "unknown" value.
+ for (unsigned i = 0, e = Components.size(); i < e; ++i) {
+ if (Components[i].empty())
+ Components[i] = "unknown";
+ }
+
// Special case logic goes here. At this point Arch, Vendor and OS have the
// correct values for the computed components.
std::string NormalizedEnvironment;
@@ -1194,7 +1219,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::le32:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
- case llvm::Triple::nios2:
case llvm::Triple::nvptx:
case llvm::Triple::ppc:
case llvm::Triple::r600:
@@ -1279,7 +1303,6 @@ Triple Triple::get32BitArchVariant() const {
case Triple::le32:
case Triple::mips:
case Triple::mipsel:
- case Triple::nios2:
case Triple::nvptx:
case Triple::ppc:
case Triple::r600:
@@ -1328,7 +1351,6 @@ Triple Triple::get64BitArchVariant() const {
case Triple::kalimba:
case Triple::lanai:
case Triple::msp430:
- case Triple::nios2:
case Triple::r600:
case Triple::tce:
case Triple::tcele:
@@ -1400,7 +1422,6 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::le32:
case Triple::le64:
case Triple::msp430:
- case Triple::nios2:
case Triple::nvptx64:
case Triple::nvptx:
case Triple::r600:
@@ -1487,7 +1508,6 @@ bool Triple::isLittleEndian() const {
case Triple::mips64el:
case Triple::mipsel:
case Triple::msp430:
- case Triple::nios2:
case Triple::nvptx64:
case Triple::nvptx:
case Triple::ppc64le:
diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc
index b4279d4fcc0c..d7cc0d627d09 100644
--- a/contrib/llvm/lib/Support/Unix/Path.inc
+++ b/contrib/llvm/lib/Support/Unix/Path.inc
@@ -38,6 +38,8 @@
#ifdef __APPLE__
#include <mach-o/dyld.h>
#include <sys/attr.h>
+#elif defined(__DragonFly__)
+#include <sys/mount.h>
#endif
// Both stdio.h and cstdio are included via different paths and
@@ -49,11 +51,12 @@
// For GNU Hurd
#if defined(__GNU__) && !defined(PATH_MAX)
# define PATH_MAX 4096
+# define MAXPATHLEN 4096
#endif
#include <sys/types.h>
#if !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && \
- !defined(__linux__)
+ !defined(__linux__) && !defined(__FreeBSD_kernel__)
#include <sys/statvfs.h>
#define STATVFS statvfs
#define FSTATVFS fstatvfs
@@ -82,7 +85,7 @@
#define STATVFS_F_FRSIZE(vfs) static_cast<uint64_t>(vfs.f_bsize)
#endif
-#if defined(__NetBSD__)
+#if defined(__NetBSD__) || defined(__DragonFly__) || defined(__GNU__)
#define STATVFS_F_FLAG(vfs) (vfs).f_flag
#else
#define STATVFS_F_FLAG(vfs) (vfs).f_flags
@@ -98,7 +101,7 @@ const file_t kInvalidFile = -1;
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__minix) || defined(__FreeBSD_kernel__) || defined(__linux__) || \
- defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX)
+ defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) || defined(__GNU__)
static int
test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
{
@@ -178,14 +181,34 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
char exe_path[MAXPATHLEN];
StringRef aPath("/proc/self/exe");
if (sys::fs::exists(aPath)) {
- // /proc is not always mounted under Linux (chroot for example).
- ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path));
- if (len >= 0)
- return std::string(exe_path, len);
+ // /proc is not always mounted under Linux (chroot for example).
+ ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path));
+ if (len < 0)
+ return "";
+
+ // Null terminate the string for realpath. readlink never null
+ // terminates its output.
+ len = std::min(len, ssize_t(sizeof(exe_path) - 1));
+ exe_path[len] = '\0';
+
+ // On Linux, /proc/self/exe always looks through symlinks. However, on
+ // GNU/Hurd, /proc/self/exe is a symlink to the path that was used to start
+ // the program, and not the eventual binary file. Therefore, call realpath
+ // so this behaves the same on all platforms.
+#if _POSIX_VERSION >= 200112 || defined(__GLIBC__)
+ char *real_path = realpath(exe_path, NULL);
+ std::string ret = std::string(real_path);
+ free(real_path);
+ return ret;
+#else
+ char real_path[MAXPATHLEN];
+ realpath(exe_path, real_path);
+ return std::string(real_path);
+#endif
} else {
- // Fall back to the classical detection.
- if (getprogpath(exe_path, argv0))
- return exe_path;
+ // Fall back to the classical detection.
+ if (getprogpath(exe_path, argv0))
+ return exe_path;
}
#elif defined(HAVE_DLFCN_H) && defined(HAVE_DLADDR)
// Use dladdr to get executable path if available.
@@ -206,11 +229,11 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
}
TimePoint<> basic_file_status::getLastAccessedTime() const {
- return toTimePoint(fs_st_atime);
+ return toTimePoint(fs_st_atime, fs_st_atime_nsec);
}
TimePoint<> basic_file_status::getLastModificationTime() const {
- return toTimePoint(fs_st_mtime);
+ return toTimePoint(fs_st_mtime, fs_st_mtime_nsec);
}
UniqueID file_status::getUniqueID() const {
@@ -347,7 +370,7 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
}
static bool is_local_impl(struct STATVFS &Vfs) {
-#if defined(__linux__)
+#if defined(__linux__) || defined(__GNU__)
#ifndef NFS_SUPER_MAGIC
#define NFS_SUPER_MAGIC 0x6969
#endif
@@ -357,7 +380,11 @@ static bool is_local_impl(struct STATVFS &Vfs) {
#ifndef CIFS_MAGIC_NUMBER
#define CIFS_MAGIC_NUMBER 0xFF534D42
#endif
+#ifdef __GNU__
+ switch ((uint32_t)Vfs.__f_type) {
+#else
switch ((uint32_t)Vfs.f_type) {
+#endif
case NFS_SUPER_MAGIC:
case SMB_SUPER_MAGIC:
case CIFS_MAGIC_NUMBER:
@@ -523,37 +550,62 @@ static void expandTildeExpr(SmallVectorImpl<char> &Path) {
llvm::sys::path::append(Path, Storage);
}
+
+void expand_tilde(const Twine &path, SmallVectorImpl<char> &dest) {
+ dest.clear();
+ if (path.isTriviallyEmpty())
+ return;
+
+ path.toVector(dest);
+ expandTildeExpr(dest);
+
+ return;
+}
+
+static file_type typeForMode(mode_t Mode) {
+ if (S_ISDIR(Mode))
+ return file_type::directory_file;
+ else if (S_ISREG(Mode))
+ return file_type::regular_file;
+ else if (S_ISBLK(Mode))
+ return file_type::block_file;
+ else if (S_ISCHR(Mode))
+ return file_type::character_file;
+ else if (S_ISFIFO(Mode))
+ return file_type::fifo_file;
+ else if (S_ISSOCK(Mode))
+ return file_type::socket_file;
+ else if (S_ISLNK(Mode))
+ return file_type::symlink_file;
+ return file_type::type_unknown;
+}
+
static std::error_code fillStatus(int StatRet, const struct stat &Status,
file_status &Result) {
if (StatRet != 0) {
- std::error_code ec(errno, std::generic_category());
- if (ec == errc::no_such_file_or_directory)
+ std::error_code EC(errno, std::generic_category());
+ if (EC == errc::no_such_file_or_directory)
Result = file_status(file_type::file_not_found);
else
Result = file_status(file_type::status_error);
- return ec;
+ return EC;
}
- file_type Type = file_type::type_unknown;
-
- if (S_ISDIR(Status.st_mode))
- Type = file_type::directory_file;
- else if (S_ISREG(Status.st_mode))
- Type = file_type::regular_file;
- else if (S_ISBLK(Status.st_mode))
- Type = file_type::block_file;
- else if (S_ISCHR(Status.st_mode))
- Type = file_type::character_file;
- else if (S_ISFIFO(Status.st_mode))
- Type = file_type::fifo_file;
- else if (S_ISSOCK(Status.st_mode))
- Type = file_type::socket_file;
- else if (S_ISLNK(Status.st_mode))
- Type = file_type::symlink_file;
+ uint32_t atime_nsec, mtime_nsec;
+#if defined(HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC)
+ atime_nsec = Status.st_atimespec.tv_nsec;
+ mtime_nsec = Status.st_mtimespec.tv_nsec;
+#elif defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC)
+ atime_nsec = Status.st_atim.tv_nsec;
+ mtime_nsec = Status.st_mtim.tv_nsec;
+#else
+ atime_nsec = mtime_nsec = 0;
+#endif
perms Perms = static_cast<perms>(Status.st_mode) & all_perms;
- Result = file_status(Type, Perms, Status.st_dev, Status.st_nlink,
- Status.st_ino, Status.st_atime, Status.st_mtime,
+ Result = file_status(typeForMode(Status.st_mode), Perms, Status.st_dev,
+ Status.st_nlink, Status.st_ino,
+ Status.st_atime, atime_nsec, Status.st_mtime, mtime_nsec,
Status.st_uid, Status.st_gid, Status.st_size);
return std::error_code();
@@ -583,17 +635,22 @@ std::error_code setPermissions(const Twine &Path, perms Permissions) {
return std::error_code();
}
-std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time) {
+std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
+ TimePoint<> ModificationTime) {
#if defined(HAVE_FUTIMENS)
timespec Times[2];
- Times[0] = Times[1] = sys::toTimeSpec(Time);
+ Times[0] = sys::toTimeSpec(AccessTime);
+ Times[1] = sys::toTimeSpec(ModificationTime);
if (::futimens(FD, Times))
return std::error_code(errno, std::generic_category());
return std::error_code();
#elif defined(HAVE_FUTIMES)
timeval Times[2];
- Times[0] = Times[1] = sys::toTimeVal(
- std::chrono::time_point_cast<std::chrono::microseconds>(Time));
+ Times[0] = sys::toTimeVal(
+ std::chrono::time_point_cast<std::chrono::microseconds>(AccessTime));
+ Times[1] =
+ sys::toTimeVal(std::chrono::time_point_cast<std::chrono::microseconds>(
+ ModificationTime));
if (::futimes(FD, Times))
return std::error_code(errno, std::generic_category());
return std::error_code();
@@ -691,19 +748,30 @@ std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
return std::error_code();
}
-std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
+static file_type direntType(dirent* Entry) {
+ // Most platforms provide the file type in the dirent: Linux/BSD/Mac.
+ // The DTTOIF macro lets us reuse our status -> type conversion.
+#if defined(_DIRENT_HAVE_D_TYPE) && defined(DTTOIF)
+ return typeForMode(DTTOIF(Entry->d_type));
+#else
+ // Other platforms such as Solaris require a stat() to get the type.
+ return file_type::type_unknown;
+#endif
+}
+
+std::error_code detail::directory_iterator_increment(detail::DirIterState &It) {
errno = 0;
- dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
- if (cur_dir == nullptr && errno != 0) {
+ dirent *CurDir = ::readdir(reinterpret_cast<DIR *>(It.IterationHandle));
+ if (CurDir == nullptr && errno != 0) {
return std::error_code(errno, std::generic_category());
- } else if (cur_dir != nullptr) {
- StringRef name(cur_dir->d_name);
- if ((name.size() == 1 && name[0] == '.') ||
- (name.size() == 2 && name[0] == '.' && name[1] == '.'))
- return directory_iterator_increment(it);
- it.CurrentEntry.replace_filename(name);
+ } else if (CurDir != nullptr) {
+ StringRef Name(CurDir->d_name);
+ if ((Name.size() == 1 && Name[0] == '.') ||
+ (Name.size() == 2 && Name[0] == '.' && Name[1] == '.'))
+ return directory_iterator_increment(It);
+ It.CurrentEntry.replace_filename(Name, direntType(CurDir));
} else
- return directory_iterator_destruct(it);
+ return directory_iterator_destruct(It);
return std::error_code();
}
@@ -952,29 +1020,6 @@ static bool getDarwinConfDir(bool TempDir, SmallVectorImpl<char> &Result) {
return false;
}
-static bool getUserCacheDir(SmallVectorImpl<char> &Result) {
- // First try using XDG_CACHE_HOME env variable,
- // as specified in XDG Base Directory Specification at
- // http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
- if (const char *XdgCacheDir = std::getenv("XDG_CACHE_HOME")) {
- Result.clear();
- Result.append(XdgCacheDir, XdgCacheDir + strlen(XdgCacheDir));
- return true;
- }
-
- // Try Darwin configuration query
- if (getDarwinConfDir(false, Result))
- return true;
-
- // Use "$HOME/.cache" if $HOME is available
- if (home_directory(Result)) {
- append(Result, ".cache");
- return true;
- }
-
- return false;
-}
-
static const char *getEnvTempDir() {
// Check whether the temporary directory is specified by an environment
// variable.
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
index de26695d64ea..ad88d5e96906 100644
--- a/contrib/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -47,6 +47,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
+#include <sysexits.h>
#ifdef HAVE_BACKTRACE
# include BACKTRACE_HEADER // For backtrace().
#endif
@@ -334,6 +335,10 @@ static RETSIGTYPE SignalHandler(int Sig) {
if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr))
return OldInterruptFunction();
+ // Send a special return code that drivers can check for, from sysexits.h.
+ if (Sig == SIGPIPE)
+ exit(EX_IOERR);
+
raise(Sig); // Execute the default handler.
return;
}
diff --git a/contrib/llvm/lib/Support/VirtualFileSystem.cpp b/contrib/llvm/lib/Support/VirtualFileSystem.cpp
new file mode 100644
index 000000000000..f2a8a1bb27af
--- /dev/null
+++ b/contrib/llvm/lib/Support/VirtualFileSystem.cpp
@@ -0,0 +1,2070 @@
+//===- VirtualFileSystem.cpp - Virtual File System Layer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtualFileSystem interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::vfs;
+
+using llvm::sys::fs::file_status;
+using llvm::sys::fs::file_type;
+using llvm::sys::fs::perms;
+using llvm::sys::fs::UniqueID;
+
+Status::Status(const file_status &Status)
+ : UID(Status.getUniqueID()), MTime(Status.getLastModificationTime()),
+ User(Status.getUser()), Group(Status.getGroup()), Size(Status.getSize()),
+ Type(Status.type()), Perms(Status.permissions()) {}
+
+Status::Status(StringRef Name, UniqueID UID, sys::TimePoint<> MTime,
+ uint32_t User, uint32_t Group, uint64_t Size, file_type Type,
+ perms Perms)
+ : Name(Name), UID(UID), MTime(MTime), User(User), Group(Group), Size(Size),
+ Type(Type), Perms(Perms) {}
+
+Status Status::copyWithNewName(const Status &In, StringRef NewName) {
+ return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
+ In.getUser(), In.getGroup(), In.getSize(), In.getType(),
+ In.getPermissions());
+}
+
+Status Status::copyWithNewName(const file_status &In, StringRef NewName) {
+ return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
+ In.getUser(), In.getGroup(), In.getSize(), In.type(),
+ In.permissions());
+}
+
+bool Status::equivalent(const Status &Other) const {
+ assert(isStatusKnown() && Other.isStatusKnown());
+ return getUniqueID() == Other.getUniqueID();
+}
+
+bool Status::isDirectory() const { return Type == file_type::directory_file; }
+
+bool Status::isRegularFile() const { return Type == file_type::regular_file; }
+
+bool Status::isOther() const {
+ return exists() && !isRegularFile() && !isDirectory() && !isSymlink();
+}
+
+bool Status::isSymlink() const { return Type == file_type::symlink_file; }
+
+bool Status::isStatusKnown() const { return Type != file_type::status_error; }
+
+bool Status::exists() const {
+ return isStatusKnown() && Type != file_type::file_not_found;
+}
+
+File::~File() = default;
+
+FileSystem::~FileSystem() = default;
+
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+FileSystem::getBufferForFile(const llvm::Twine &Name, int64_t FileSize,
+ bool RequiresNullTerminator, bool IsVolatile) {
+ auto F = openFileForRead(Name);
+ if (!F)
+ return F.getError();
+
+ return (*F)->getBuffer(Name, FileSize, RequiresNullTerminator, IsVolatile);
+}
+
+std::error_code FileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const {
+ if (llvm::sys::path::is_absolute(Path))
+ return {};
+
+ auto WorkingDir = getCurrentWorkingDirectory();
+ if (!WorkingDir)
+ return WorkingDir.getError();
+
+ llvm::sys::fs::make_absolute(WorkingDir.get(), Path);
+ return {};
+}
+
+std::error_code FileSystem::getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const {
+ return errc::operation_not_permitted;
+}
+
+std::error_code FileSystem::isLocal(const Twine &Path, bool &Result) {
+ return errc::operation_not_permitted;
+}
+
+bool FileSystem::exists(const Twine &Path) {
+ auto Status = status(Path);
+ return Status && Status->exists();
+}
+
+#ifndef NDEBUG
+static bool isTraversalComponent(StringRef Component) {
+ return Component.equals("..") || Component.equals(".");
+}
+
+static bool pathHasTraversal(StringRef Path) {
+ using namespace llvm::sys;
+
+ for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path)))
+ if (isTraversalComponent(Comp))
+ return true;
+ return false;
+}
+#endif
+
+//===-----------------------------------------------------------------------===/
+// RealFileSystem implementation
+//===-----------------------------------------------------------------------===/
+
+namespace {
+
+/// Wrapper around a raw file descriptor.
+class RealFile : public File {
+ friend class RealFileSystem;
+
+ int FD;
+ Status S;
+ std::string RealName;
+
+ RealFile(int FD, StringRef NewName, StringRef NewRealPathName)
+ : FD(FD), S(NewName, {}, {}, {}, {}, {},
+ llvm::sys::fs::file_type::status_error, {}),
+ RealName(NewRealPathName.str()) {
+ assert(FD >= 0 && "Invalid or inactive file descriptor");
+ }
+
+public:
+ ~RealFile() override;
+
+ ErrorOr<Status> status() override;
+ ErrorOr<std::string> getName() override;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> getBuffer(const Twine &Name,
+ int64_t FileSize,
+ bool RequiresNullTerminator,
+ bool IsVolatile) override;
+ std::error_code close() override;
+};
+
+} // namespace
+
+RealFile::~RealFile() { close(); }
+
+ErrorOr<Status> RealFile::status() {
+ assert(FD != -1 && "cannot stat closed file");
+ if (!S.isStatusKnown()) {
+ file_status RealStatus;
+ if (std::error_code EC = sys::fs::status(FD, RealStatus))
+ return EC;
+ S = Status::copyWithNewName(RealStatus, S.getName());
+ }
+ return S;
+}
+
+ErrorOr<std::string> RealFile::getName() {
+ return RealName.empty() ? S.getName().str() : RealName;
+}
+
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+RealFile::getBuffer(const Twine &Name, int64_t FileSize,
+ bool RequiresNullTerminator, bool IsVolatile) {
+ assert(FD != -1 && "cannot get buffer for closed file");
+ return MemoryBuffer::getOpenFile(FD, Name, FileSize, RequiresNullTerminator,
+ IsVolatile);
+}
+
+std::error_code RealFile::close() {
+ std::error_code EC = sys::Process::SafelyCloseFileDescriptor(FD);
+ FD = -1;
+ return EC;
+}
+
+namespace {
+
+/// The file system according to your operating system.
+class RealFileSystem : public FileSystem {
+public:
+ ErrorOr<Status> status(const Twine &Path) override;
+ ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
+ directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+
+ llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
+ std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+ std::error_code isLocal(const Twine &Path, bool &Result) override;
+ std::error_code getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const override;
+
+private:
+ mutable std::mutex CWDMutex;
+ mutable std::string CWDCache;
+};
+
+} // namespace
+
+ErrorOr<Status> RealFileSystem::status(const Twine &Path) {
+ sys::fs::file_status RealStatus;
+ if (std::error_code EC = sys::fs::status(Path, RealStatus))
+ return EC;
+ return Status::copyWithNewName(RealStatus, Path.str());
+}
+
+ErrorOr<std::unique_ptr<File>>
+RealFileSystem::openFileForRead(const Twine &Name) {
+ int FD;
+ SmallString<256> RealName;
+ if (std::error_code EC =
+ sys::fs::openFileForRead(Name, FD, sys::fs::OF_None, &RealName))
+ return EC;
+ return std::unique_ptr<File>(new RealFile(FD, Name.str(), RealName.str()));
+}
+
+llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const {
+ std::lock_guard<std::mutex> Lock(CWDMutex);
+ if (!CWDCache.empty())
+ return CWDCache;
+ SmallString<256> Dir;
+ if (std::error_code EC = llvm::sys::fs::current_path(Dir))
+ return EC;
+ CWDCache = Dir.str();
+ return CWDCache;
+}
+
+std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
+ // FIXME: chdir is thread hostile; on the other hand, creating the same
+ // behavior as chdir is complex: chdir resolves the path once, thus
+ // guaranteeing that all subsequent relative path operations work
+ // on the same path the original chdir resulted in. This makes a
+ // difference for example on network filesystems, where symlinks might be
+ // switched during runtime of the tool. Fixing this depends on having a
+ // file system abstraction that allows openat() style interactions.
+ if (auto EC = llvm::sys::fs::set_current_path(Path))
+ return EC;
+
+ // Invalidate cache.
+ std::lock_guard<std::mutex> Lock(CWDMutex);
+ CWDCache.clear();
+ return std::error_code();
+}
+
+std::error_code RealFileSystem::isLocal(const Twine &Path, bool &Result) {
+ return llvm::sys::fs::is_local(Path, Result);
+}
+
+std::error_code
+RealFileSystem::getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const {
+ return llvm::sys::fs::real_path(Path, Output);
+}
+
+IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
+ static IntrusiveRefCntPtr<FileSystem> FS = new RealFileSystem();
+ return FS;
+}
+
+namespace {
+
+class RealFSDirIter : public llvm::vfs::detail::DirIterImpl {
+ llvm::sys::fs::directory_iterator Iter;
+
+public:
+ RealFSDirIter(const Twine &Path, std::error_code &EC) : Iter(Path, EC) {
+ if (Iter != llvm::sys::fs::directory_iterator())
+ CurrentEntry = directory_entry(Iter->path(), Iter->type());
+ }
+
+ std::error_code increment() override {
+ std::error_code EC;
+ Iter.increment(EC);
+ CurrentEntry = (Iter == llvm::sys::fs::directory_iterator())
+ ? directory_entry()
+ : directory_entry(Iter->path(), Iter->type());
+ return EC;
+ }
+};
+
+} // namespace
+
+directory_iterator RealFileSystem::dir_begin(const Twine &Dir,
+ std::error_code &EC) {
+ return directory_iterator(std::make_shared<RealFSDirIter>(Dir, EC));
+}
+
+//===-----------------------------------------------------------------------===/
+// OverlayFileSystem implementation
+//===-----------------------------------------------------------------------===/
+
+OverlayFileSystem::OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> BaseFS) {
+ FSList.push_back(std::move(BaseFS));
+}
+
+void OverlayFileSystem::pushOverlay(IntrusiveRefCntPtr<FileSystem> FS) {
+ FSList.push_back(FS);
+ // Synchronize added file systems by duplicating the working directory from
+ // the first one in the list.
+ FS->setCurrentWorkingDirectory(getCurrentWorkingDirectory().get());
+}
+
+ErrorOr<Status> OverlayFileSystem::status(const Twine &Path) {
+ // FIXME: handle symlinks that cross file systems
+ for (iterator I = overlays_begin(), E = overlays_end(); I != E; ++I) {
+ ErrorOr<Status> Status = (*I)->status(Path);
+ if (Status || Status.getError() != llvm::errc::no_such_file_or_directory)
+ return Status;
+ }
+ return make_error_code(llvm::errc::no_such_file_or_directory);
+}
+
+ErrorOr<std::unique_ptr<File>>
+OverlayFileSystem::openFileForRead(const llvm::Twine &Path) {
+ // FIXME: handle symlinks that cross file systems
+ for (iterator I = overlays_begin(), E = overlays_end(); I != E; ++I) {
+ auto Result = (*I)->openFileForRead(Path);
+ if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
+ return Result;
+ }
+ return make_error_code(llvm::errc::no_such_file_or_directory);
+}
+
+llvm::ErrorOr<std::string>
+OverlayFileSystem::getCurrentWorkingDirectory() const {
+ // All file systems are synchronized, just take the first working directory.
+ return FSList.front()->getCurrentWorkingDirectory();
+}
+
+std::error_code
+OverlayFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
+ for (auto &FS : FSList)
+ if (std::error_code EC = FS->setCurrentWorkingDirectory(Path))
+ return EC;
+ return {};
+}
+
+std::error_code OverlayFileSystem::isLocal(const Twine &Path, bool &Result) {
+ for (auto &FS : FSList)
+ if (FS->exists(Path))
+ return FS->isLocal(Path, Result);
+ return errc::no_such_file_or_directory;
+}
+
+std::error_code
+OverlayFileSystem::getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const {
+ for (auto &FS : FSList)
+ if (FS->exists(Path))
+ return FS->getRealPath(Path, Output);
+ return errc::no_such_file_or_directory;
+}
+
+llvm::vfs::detail::DirIterImpl::~DirIterImpl() = default;
+
+namespace {
+
+class OverlayFSDirIterImpl : public llvm::vfs::detail::DirIterImpl {
+ OverlayFileSystem &Overlays;
+ std::string Path;
+ OverlayFileSystem::iterator CurrentFS;
+ directory_iterator CurrentDirIter;
+ llvm::StringSet<> SeenNames;
+
+ std::error_code incrementFS() {
+ assert(CurrentFS != Overlays.overlays_end() && "incrementing past end");
+ ++CurrentFS;
+ for (auto E = Overlays.overlays_end(); CurrentFS != E; ++CurrentFS) {
+ std::error_code EC;
+ CurrentDirIter = (*CurrentFS)->dir_begin(Path, EC);
+ if (EC && EC != errc::no_such_file_or_directory)
+ return EC;
+ if (CurrentDirIter != directory_iterator())
+ break; // found
+ }
+ return {};
+ }
+
+ std::error_code incrementDirIter(bool IsFirstTime) {
+ assert((IsFirstTime || CurrentDirIter != directory_iterator()) &&
+ "incrementing past end");
+ std::error_code EC;
+ if (!IsFirstTime)
+ CurrentDirIter.increment(EC);
+ if (!EC && CurrentDirIter == directory_iterator())
+ EC = incrementFS();
+ return EC;
+ }
+
+ std::error_code incrementImpl(bool IsFirstTime) {
+ while (true) {
+ std::error_code EC = incrementDirIter(IsFirstTime);
+ if (EC || CurrentDirIter == directory_iterator()) {
+ CurrentEntry = directory_entry();
+ return EC;
+ }
+ CurrentEntry = *CurrentDirIter;
+ StringRef Name = llvm::sys::path::filename(CurrentEntry.path());
+ if (SeenNames.insert(Name).second)
+ return EC; // name not seen before
+ }
+ llvm_unreachable("returned above");
+ }
+
+public:
+ OverlayFSDirIterImpl(const Twine &Path, OverlayFileSystem &FS,
+ std::error_code &EC)
+ : Overlays(FS), Path(Path.str()), CurrentFS(Overlays.overlays_begin()) {
+ CurrentDirIter = (*CurrentFS)->dir_begin(Path, EC);
+ EC = incrementImpl(true);
+ }
+
+ std::error_code increment() override { return incrementImpl(false); }
+};
+
+} // namespace
+
+directory_iterator OverlayFileSystem::dir_begin(const Twine &Dir,
+ std::error_code &EC) {
+ return directory_iterator(
+ std::make_shared<OverlayFSDirIterImpl>(Dir, *this, EC));
+}
+
+void ProxyFileSystem::anchor() {}
+
+namespace llvm {
+namespace vfs {
+
+namespace detail {
+
+enum InMemoryNodeKind { IME_File, IME_Directory, IME_HardLink };
+
+/// The in memory file system is a tree of Nodes. Every node can either be a
+/// file , hardlink or a directory.
+class InMemoryNode {
+ InMemoryNodeKind Kind;
+ std::string FileName;
+
+public:
+ InMemoryNode(llvm::StringRef FileName, InMemoryNodeKind Kind)
+ : Kind(Kind), FileName(llvm::sys::path::filename(FileName)) {}
+ virtual ~InMemoryNode() = default;
+
+ /// Get the filename of this node (the name without the directory part).
+ StringRef getFileName() const { return FileName; }
+ InMemoryNodeKind getKind() const { return Kind; }
+ virtual std::string toString(unsigned Indent) const = 0;
+};
+
+class InMemoryFile : public InMemoryNode {
+ Status Stat;
+ std::unique_ptr<llvm::MemoryBuffer> Buffer;
+
+public:
+ InMemoryFile(Status Stat, std::unique_ptr<llvm::MemoryBuffer> Buffer)
+ : InMemoryNode(Stat.getName(), IME_File), Stat(std::move(Stat)),
+ Buffer(std::move(Buffer)) {}
+
+ /// Return the \p Status for this node. \p RequestedName should be the name
+ /// through which the caller referred to this node. It will override
+ /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
+ Status getStatus(StringRef RequestedName) const {
+ return Status::copyWithNewName(Stat, RequestedName);
+ }
+ llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); }
+
+ std::string toString(unsigned Indent) const override {
+ return (std::string(Indent, ' ') + Stat.getName() + "\n").str();
+ }
+
+ static bool classof(const InMemoryNode *N) {
+ return N->getKind() == IME_File;
+ }
+};
+
+namespace {
+
+class InMemoryHardLink : public InMemoryNode {
+ const InMemoryFile &ResolvedFile;
+
+public:
+ InMemoryHardLink(StringRef Path, const InMemoryFile &ResolvedFile)
+ : InMemoryNode(Path, IME_HardLink), ResolvedFile(ResolvedFile) {}
+ const InMemoryFile &getResolvedFile() const { return ResolvedFile; }
+
+ std::string toString(unsigned Indent) const override {
+ return std::string(Indent, ' ') + "HardLink to -> " +
+ ResolvedFile.toString(0);
+ }
+
+ static bool classof(const InMemoryNode *N) {
+ return N->getKind() == IME_HardLink;
+ }
+};
+
+/// Adapt a InMemoryFile for VFS' File interface. The goal is to make
+/// \p InMemoryFileAdaptor mimic as much as possible the behavior of
+/// \p RealFile.
+class InMemoryFileAdaptor : public File {
+ const InMemoryFile &Node;
+ /// The name to use when returning a Status for this file.
+ std::string RequestedName;
+
+public:
+ explicit InMemoryFileAdaptor(const InMemoryFile &Node,
+ std::string RequestedName)
+ : Node(Node), RequestedName(std::move(RequestedName)) {}
+
+ llvm::ErrorOr<Status> status() override {
+ return Node.getStatus(RequestedName);
+ }
+
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+ getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+ bool IsVolatile) override {
+ llvm::MemoryBuffer *Buf = Node.getBuffer();
+ return llvm::MemoryBuffer::getMemBuffer(
+ Buf->getBuffer(), Buf->getBufferIdentifier(), RequiresNullTerminator);
+ }
+
+ std::error_code close() override { return {}; }
+};
+} // namespace
+
+class InMemoryDirectory : public InMemoryNode {
+ Status Stat;
+ llvm::StringMap<std::unique_ptr<InMemoryNode>> Entries;
+
+public:
+ InMemoryDirectory(Status Stat)
+ : InMemoryNode(Stat.getName(), IME_Directory), Stat(std::move(Stat)) {}
+
+ /// Return the \p Status for this node. \p RequestedName should be the name
+ /// through which the caller referred to this node. It will override
+ /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
+ Status getStatus(StringRef RequestedName) const {
+ return Status::copyWithNewName(Stat, RequestedName);
+ }
+ InMemoryNode *getChild(StringRef Name) {
+ auto I = Entries.find(Name);
+ if (I != Entries.end())
+ return I->second.get();
+ return nullptr;
+ }
+
+ InMemoryNode *addChild(StringRef Name, std::unique_ptr<InMemoryNode> Child) {
+ return Entries.insert(make_pair(Name, std::move(Child)))
+ .first->second.get();
+ }
+
+ using const_iterator = decltype(Entries)::const_iterator;
+
+ const_iterator begin() const { return Entries.begin(); }
+ const_iterator end() const { return Entries.end(); }
+
+ std::string toString(unsigned Indent) const override {
+ std::string Result =
+ (std::string(Indent, ' ') + Stat.getName() + "\n").str();
+ for (const auto &Entry : Entries)
+ Result += Entry.second->toString(Indent + 2);
+ return Result;
+ }
+
+ static bool classof(const InMemoryNode *N) {
+ return N->getKind() == IME_Directory;
+ }
+};
+
+namespace {
+Status getNodeStatus(const InMemoryNode *Node, StringRef RequestedName) {
+ if (auto Dir = dyn_cast<detail::InMemoryDirectory>(Node))
+ return Dir->getStatus(RequestedName);
+ if (auto File = dyn_cast<detail::InMemoryFile>(Node))
+ return File->getStatus(RequestedName);
+ if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node))
+ return Link->getResolvedFile().getStatus(RequestedName);
+ llvm_unreachable("Unknown node type");
+}
+} // namespace
+} // namespace detail
+
+InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
+ : Root(new detail::InMemoryDirectory(
+ Status("", getNextVirtualUniqueID(), llvm::sys::TimePoint<>(), 0, 0,
+ 0, llvm::sys::fs::file_type::directory_file,
+ llvm::sys::fs::perms::all_all))),
+ UseNormalizedPaths(UseNormalizedPaths) {}
+
+InMemoryFileSystem::~InMemoryFileSystem() = default;
+
+std::string InMemoryFileSystem::toString() const {
+ return Root->toString(/*Indent=*/0);
+}
+
+bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
+ std::unique_ptr<llvm::MemoryBuffer> Buffer,
+ Optional<uint32_t> User,
+ Optional<uint32_t> Group,
+ Optional<llvm::sys::fs::file_type> Type,
+ Optional<llvm::sys::fs::perms> Perms,
+ const detail::InMemoryFile *HardLinkTarget) {
+ SmallString<128> Path;
+ P.toVector(Path);
+
+ // Fix up relative paths. This just prepends the current working directory.
+ std::error_code EC = makeAbsolute(Path);
+ assert(!EC);
+ (void)EC;
+
+ if (useNormalizedPaths())
+ llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+ if (Path.empty())
+ return false;
+
+ detail::InMemoryDirectory *Dir = Root.get();
+ auto I = llvm::sys::path::begin(Path), E = sys::path::end(Path);
+ const auto ResolvedUser = User.getValueOr(0);
+ const auto ResolvedGroup = Group.getValueOr(0);
+ const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file);
+ const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all);
+ assert(!(HardLinkTarget && Buffer) && "HardLink cannot have a buffer");
+ // Any intermediate directories we create should be accessible by
+ // the owner, even if Perms says otherwise for the final path.
+ const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
+ while (true) {
+ StringRef Name = *I;
+ detail::InMemoryNode *Node = Dir->getChild(Name);
+ ++I;
+ if (!Node) {
+ if (I == E) {
+ // End of the path.
+ std::unique_ptr<detail::InMemoryNode> Child;
+ if (HardLinkTarget)
+ Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget));
+ else {
+ // Create a new file or directory.
+ Status Stat(P.str(), getNextVirtualUniqueID(),
+ llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
+ ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
+ ResolvedPerms);
+ if (ResolvedType == sys::fs::file_type::directory_file) {
+ Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
+ } else {
+ Child.reset(
+ new detail::InMemoryFile(std::move(Stat), std::move(Buffer)));
+ }
+ }
+ Dir->addChild(Name, std::move(Child));
+ return true;
+ }
+
+ // Create a new directory. Use the path up to here.
+ Status Stat(
+ StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
+ getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime),
+ ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file,
+ NewDirectoryPerms);
+ Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
+ Name, llvm::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
+ continue;
+ }
+
+ if (auto *NewDir = dyn_cast<detail::InMemoryDirectory>(Node)) {
+ Dir = NewDir;
+ } else {
+ assert((isa<detail::InMemoryFile>(Node) ||
+ isa<detail::InMemoryHardLink>(Node)) &&
+ "Must be either file, hardlink or directory!");
+
+ // Trying to insert a directory in place of a file.
+ if (I != E)
+ return false;
+
+ // Return false only if the new file is different from the existing one.
+ if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node)) {
+ return Link->getResolvedFile().getBuffer()->getBuffer() ==
+ Buffer->getBuffer();
+ }
+ return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
+ Buffer->getBuffer();
+ }
+ }
+}
+
+bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
+ std::unique_ptr<llvm::MemoryBuffer> Buffer,
+ Optional<uint32_t> User,
+ Optional<uint32_t> Group,
+ Optional<llvm::sys::fs::file_type> Type,
+ Optional<llvm::sys::fs::perms> Perms) {
+ return addFile(P, ModificationTime, std::move(Buffer), User, Group, Type,
+ Perms, /*HardLinkTarget=*/nullptr);
+}
+
+bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
+ llvm::MemoryBuffer *Buffer,
+ Optional<uint32_t> User,
+ Optional<uint32_t> Group,
+ Optional<llvm::sys::fs::file_type> Type,
+ Optional<llvm::sys::fs::perms> Perms) {
+ return addFile(P, ModificationTime,
+ llvm::MemoryBuffer::getMemBuffer(
+ Buffer->getBuffer(), Buffer->getBufferIdentifier()),
+ std::move(User), std::move(Group), std::move(Type),
+ std::move(Perms));
+}
+
+static ErrorOr<const detail::InMemoryNode *>
+lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir,
+ const Twine &P) {
+ SmallString<128> Path;
+ P.toVector(Path);
+
+ // Fix up relative paths. This just prepends the current working directory.
+ std::error_code EC = FS.makeAbsolute(Path);
+ assert(!EC);
+ (void)EC;
+
+ if (FS.useNormalizedPaths())
+ llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+ if (Path.empty())
+ return Dir;
+
+ auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path);
+ while (true) {
+ detail::InMemoryNode *Node = Dir->getChild(*I);
+ ++I;
+ if (!Node)
+ return errc::no_such_file_or_directory;
+
+ // Return the file if it's at the end of the path.
+ if (auto File = dyn_cast<detail::InMemoryFile>(Node)) {
+ if (I == E)
+ return File;
+ return errc::no_such_file_or_directory;
+ }
+
+ // If Node is HardLink then return the resolved file.
+ if (auto File = dyn_cast<detail::InMemoryHardLink>(Node)) {
+ if (I == E)
+ return &File->getResolvedFile();
+ return errc::no_such_file_or_directory;
+ }
+ // Traverse directories.
+ Dir = cast<detail::InMemoryDirectory>(Node);
+ if (I == E)
+ return Dir;
+ }
+}
+
+bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
+ const Twine &ToPath) {
+ auto FromNode = lookupInMemoryNode(*this, Root.get(), FromPath);
+ auto ToNode = lookupInMemoryNode(*this, Root.get(), ToPath);
+ // FromPath must not have been added before. ToPath must have been added
+ // before. Resolved ToPath must be a File.
+ if (!ToNode || FromNode || !isa<detail::InMemoryFile>(*ToNode))
+ return false;
+ return this->addFile(FromPath, 0, nullptr, None, None, None, None,
+ cast<detail::InMemoryFile>(*ToNode));
+}
+
+llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
+ auto Node = lookupInMemoryNode(*this, Root.get(), Path);
+ if (Node)
+ return detail::getNodeStatus(*Node, Path.str());
+ return Node.getError();
+}
+
+llvm::ErrorOr<std::unique_ptr<File>>
+InMemoryFileSystem::openFileForRead(const Twine &Path) {
+ auto Node = lookupInMemoryNode(*this, Root.get(), Path);
+ if (!Node)
+ return Node.getError();
+
+ // When we have a file provide a heap-allocated wrapper for the memory buffer
+ // to match the ownership semantics for File.
+ if (auto *F = dyn_cast<detail::InMemoryFile>(*Node))
+ return std::unique_ptr<File>(
+ new detail::InMemoryFileAdaptor(*F, Path.str()));
+
+ // FIXME: errc::not_a_file?
+ return make_error_code(llvm::errc::invalid_argument);
+}
+
+namespace {
+
+/// Adaptor from InMemoryDir::iterator to directory_iterator.
+class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl {
+ detail::InMemoryDirectory::const_iterator I;
+ detail::InMemoryDirectory::const_iterator E;
+ std::string RequestedDirName;
+
+ void setCurrentEntry() {
+ if (I != E) {
+ SmallString<256> Path(RequestedDirName);
+ llvm::sys::path::append(Path, I->second->getFileName());
+ sys::fs::file_type Type;
+ switch (I->second->getKind()) {
+ case detail::IME_File:
+ case detail::IME_HardLink:
+ Type = sys::fs::file_type::regular_file;
+ break;
+ case detail::IME_Directory:
+ Type = sys::fs::file_type::directory_file;
+ break;
+ }
+ CurrentEntry = directory_entry(Path.str(), Type);
+ } else {
+ // When we're at the end, make CurrentEntry invalid and DirIterImpl will
+ // do the rest.
+ CurrentEntry = directory_entry();
+ }
+ }
+
+public:
+ InMemoryDirIterator() = default;
+
+ explicit InMemoryDirIterator(const detail::InMemoryDirectory &Dir,
+ std::string RequestedDirName)
+ : I(Dir.begin()), E(Dir.end()),
+ RequestedDirName(std::move(RequestedDirName)) {
+ setCurrentEntry();
+ }
+
+ std::error_code increment() override {
+ ++I;
+ setCurrentEntry();
+ return {};
+ }
+};
+
+} // namespace
+
+directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir,
+ std::error_code &EC) {
+ auto Node = lookupInMemoryNode(*this, Root.get(), Dir);
+ if (!Node) {
+ EC = Node.getError();
+ return directory_iterator(std::make_shared<InMemoryDirIterator>());
+ }
+
+ if (auto *DirNode = dyn_cast<detail::InMemoryDirectory>(*Node))
+ return directory_iterator(
+ std::make_shared<InMemoryDirIterator>(*DirNode, Dir.str()));
+
+ EC = make_error_code(llvm::errc::not_a_directory);
+ return directory_iterator(std::make_shared<InMemoryDirIterator>());
+}
+
+std::error_code InMemoryFileSystem::setCurrentWorkingDirectory(const Twine &P) {
+ SmallString<128> Path;
+ P.toVector(Path);
+
+ // Fix up relative paths. This just prepends the current working directory.
+ std::error_code EC = makeAbsolute(Path);
+ assert(!EC);
+ (void)EC;
+
+ if (useNormalizedPaths())
+ llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+ if (!Path.empty())
+ WorkingDirectory = Path.str();
+ return {};
+}
+
+std::error_code
+InMemoryFileSystem::getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const {
+ auto CWD = getCurrentWorkingDirectory();
+ if (!CWD || CWD->empty())
+ return errc::operation_not_permitted;
+ Path.toVector(Output);
+ if (auto EC = makeAbsolute(Output))
+ return EC;
+ llvm::sys::path::remove_dots(Output, /*remove_dot_dot=*/true);
+ return {};
+}
+
+std::error_code InMemoryFileSystem::isLocal(const Twine &Path, bool &Result) {
+ Result = false;
+ return {};
+}
+
+} // namespace vfs
+} // namespace llvm
+
+//===-----------------------------------------------------------------------===/
+// RedirectingFileSystem implementation
+//===-----------------------------------------------------------------------===/
+
+// FIXME: reuse implementation common with OverlayFSDirIterImpl as these
+// iterators are conceptually similar.
+class llvm::vfs::VFSFromYamlDirIterImpl
+ : public llvm::vfs::detail::DirIterImpl {
+ std::string Dir;
+ RedirectingFileSystem::RedirectingDirectoryEntry::iterator Current, End;
+
+ // To handle 'fallthrough' mode we need to iterate at first through
+ // RedirectingDirectoryEntry and then through ExternalFS. These operations are
+ // done sequentially, we just need to keep a track of what kind of iteration
+ // we are currently performing.
+
+ /// Flag telling if we should iterate through ExternalFS or stop at the last
+ /// RedirectingDirectoryEntry::iterator.
+ bool IterateExternalFS;
+ /// Flag telling if we have switched to iterating through ExternalFS.
+ bool IsExternalFSCurrent = false;
+ FileSystem &ExternalFS;
+ directory_iterator ExternalDirIter;
+ llvm::StringSet<> SeenNames;
+
+ /// To combine multiple iterations, different methods are responsible for
+ /// different iteration steps.
+ /// @{
+
+ /// Responsible for dispatching between RedirectingDirectoryEntry iteration
+ /// and ExternalFS iteration.
+ std::error_code incrementImpl(bool IsFirstTime);
+ /// Responsible for RedirectingDirectoryEntry iteration.
+ std::error_code incrementContent(bool IsFirstTime);
+ /// Responsible for ExternalFS iteration.
+ std::error_code incrementExternal();
+ /// @}
+
+public:
+ VFSFromYamlDirIterImpl(
+ const Twine &Path,
+ RedirectingFileSystem::RedirectingDirectoryEntry::iterator Begin,
+ RedirectingFileSystem::RedirectingDirectoryEntry::iterator End,
+ bool IterateExternalFS, FileSystem &ExternalFS, std::error_code &EC);
+
+ std::error_code increment() override;
+};
+
+llvm::ErrorOr<std::string>
+RedirectingFileSystem::getCurrentWorkingDirectory() const {
+ return ExternalFS->getCurrentWorkingDirectory();
+}
+
+std::error_code
+RedirectingFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
+ return ExternalFS->setCurrentWorkingDirectory(Path);
+}
+
+std::error_code RedirectingFileSystem::isLocal(const Twine &Path,
+ bool &Result) {
+ return ExternalFS->isLocal(Path, Result);
+}
+
+directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
+ std::error_code &EC) {
+ ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Dir);
+ if (!E) {
+ EC = E.getError();
+ if (IsFallthrough && EC == errc::no_such_file_or_directory)
+ return ExternalFS->dir_begin(Dir, EC);
+ return {};
+ }
+ ErrorOr<Status> S = status(Dir, *E);
+ if (!S) {
+ EC = S.getError();
+ return {};
+ }
+ if (!S->isDirectory()) {
+ EC = std::error_code(static_cast<int>(errc::not_a_directory),
+ std::system_category());
+ return {};
+ }
+
+ auto *D = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(*E);
+ return directory_iterator(std::make_shared<VFSFromYamlDirIterImpl>(
+ Dir, D->contents_begin(), D->contents_end(),
+ /*IterateExternalFS=*/IsFallthrough, *ExternalFS, EC));
+}
+
+void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) {
+ ExternalContentsPrefixDir = PrefixDir.str();
+}
+
+StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const {
+ return ExternalContentsPrefixDir;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const {
+ for (const auto &Root : Roots)
+ dumpEntry(Root.get());
+}
+
+LLVM_DUMP_METHOD void
+RedirectingFileSystem::dumpEntry(RedirectingFileSystem::Entry *E,
+ int NumSpaces) const {
+ StringRef Name = E->getName();
+ for (int i = 0, e = NumSpaces; i < e; ++i)
+ dbgs() << " ";
+ dbgs() << "'" << Name.str().c_str() << "'"
+ << "\n";
+
+ if (E->getKind() == RedirectingFileSystem::EK_Directory) {
+ auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E);
+ assert(DE && "Should be a directory");
+
+ for (std::unique_ptr<Entry> &SubEntry :
+ llvm::make_range(DE->contents_begin(), DE->contents_end()))
+ dumpEntry(SubEntry.get(), NumSpaces + 2);
+ }
+}
+#endif
+
+/// A helper class to hold the common YAML parsing state.
+class llvm::vfs::RedirectingFileSystemParser {
+ yaml::Stream &Stream;
+
+ void error(yaml::Node *N, const Twine &Msg) { Stream.printError(N, Msg); }
+
+ // false on error
+ bool parseScalarString(yaml::Node *N, StringRef &Result,
+ SmallVectorImpl<char> &Storage) {
+ const auto *S = dyn_cast<yaml::ScalarNode>(N);
+
+ if (!S) {
+ error(N, "expected string");
+ return false;
+ }
+ Result = S->getValue(Storage);
+ return true;
+ }
+
+ // false on error
+ bool parseScalarBool(yaml::Node *N, bool &Result) {
+ SmallString<5> Storage;
+ StringRef Value;
+ if (!parseScalarString(N, Value, Storage))
+ return false;
+
+ if (Value.equals_lower("true") || Value.equals_lower("on") ||
+ Value.equals_lower("yes") || Value == "1") {
+ Result = true;
+ return true;
+ } else if (Value.equals_lower("false") || Value.equals_lower("off") ||
+ Value.equals_lower("no") || Value == "0") {
+ Result = false;
+ return true;
+ }
+
+ error(N, "expected boolean value");
+ return false;
+ }
+
+ struct KeyStatus {
+ bool Required;
+ bool Seen = false;
+
+ KeyStatus(bool Required = false) : Required(Required) {}
+ };
+
+ using KeyStatusPair = std::pair<StringRef, KeyStatus>;
+
+ // false on error
+ bool checkDuplicateOrUnknownKey(yaml::Node *KeyNode, StringRef Key,
+ DenseMap<StringRef, KeyStatus> &Keys) {
+ if (!Keys.count(Key)) {
+ error(KeyNode, "unknown key");
+ return false;
+ }
+ KeyStatus &S = Keys[Key];
+ if (S.Seen) {
+ error(KeyNode, Twine("duplicate key '") + Key + "'");
+ return false;
+ }
+ S.Seen = true;
+ return true;
+ }
+
+ // false on error
+ bool checkMissingKeys(yaml::Node *Obj, DenseMap<StringRef, KeyStatus> &Keys) {
+ for (const auto &I : Keys) {
+ if (I.second.Required && !I.second.Seen) {
+ error(Obj, Twine("missing key '") + I.first + "'");
+ return false;
+ }
+ }
+ return true;
+ }
+
+ RedirectingFileSystem::Entry *
+ lookupOrCreateEntry(RedirectingFileSystem *FS, StringRef Name,
+ RedirectingFileSystem::Entry *ParentEntry = nullptr) {
+ if (!ParentEntry) { // Look for a existent root
+ for (const auto &Root : FS->Roots) {
+ if (Name.equals(Root->getName())) {
+ ParentEntry = Root.get();
+ return ParentEntry;
+ }
+ }
+ } else { // Advance to the next component
+ auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ ParentEntry);
+ for (std::unique_ptr<RedirectingFileSystem::Entry> &Content :
+ llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+ auto *DirContent =
+ dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ Content.get());
+ if (DirContent && Name.equals(Content->getName()))
+ return DirContent;
+ }
+ }
+
+ // ... or create a new one
+ std::unique_ptr<RedirectingFileSystem::Entry> E =
+ llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ Name, Status("", getNextVirtualUniqueID(),
+ std::chrono::system_clock::now(), 0, 0, 0,
+ file_type::directory_file, sys::fs::all_all));
+
+ if (!ParentEntry) { // Add a new root to the overlay
+ FS->Roots.push_back(std::move(E));
+ ParentEntry = FS->Roots.back().get();
+ return ParentEntry;
+ }
+
+ auto *DE =
+ dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(ParentEntry);
+ DE->addContent(std::move(E));
+ return DE->getLastContent();
+ }
+
+ void uniqueOverlayTree(RedirectingFileSystem *FS,
+ RedirectingFileSystem::Entry *SrcE,
+ RedirectingFileSystem::Entry *NewParentE = nullptr) {
+ StringRef Name = SrcE->getName();
+ switch (SrcE->getKind()) {
+ case RedirectingFileSystem::EK_Directory: {
+ auto *DE =
+ dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(SrcE);
+ assert(DE && "Must be a directory");
+ // Empty directories could be present in the YAML as a way to
+ // describe a file for a current directory after some of its subdir
+ // is parsed. This only leads to redundant walks, ignore it.
+ if (!Name.empty())
+ NewParentE = lookupOrCreateEntry(FS, Name, NewParentE);
+ for (std::unique_ptr<RedirectingFileSystem::Entry> &SubEntry :
+ llvm::make_range(DE->contents_begin(), DE->contents_end()))
+ uniqueOverlayTree(FS, SubEntry.get(), NewParentE);
+ break;
+ }
+ case RedirectingFileSystem::EK_File: {
+ auto *FE = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(SrcE);
+ assert(FE && "Must be a file");
+ assert(NewParentE && "Parent entry must exist");
+ auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ NewParentE);
+ DE->addContent(
+ llvm::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
+ Name, FE->getExternalContentsPath(), FE->getUseName()));
+ break;
+ }
+ }
+ }
+
+ std::unique_ptr<RedirectingFileSystem::Entry>
+ parseEntry(yaml::Node *N, RedirectingFileSystem *FS, bool IsRootEntry) {
+ auto *M = dyn_cast<yaml::MappingNode>(N);
+ if (!M) {
+ error(N, "expected mapping node for file or directory entry");
+ return nullptr;
+ }
+
+ KeyStatusPair Fields[] = {
+ KeyStatusPair("name", true),
+ KeyStatusPair("type", true),
+ KeyStatusPair("contents", false),
+ KeyStatusPair("external-contents", false),
+ KeyStatusPair("use-external-name", false),
+ };
+
+ DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields));
+
+ bool HasContents = false; // external or otherwise
+ std::vector<std::unique_ptr<RedirectingFileSystem::Entry>>
+ EntryArrayContents;
+ std::string ExternalContentsPath;
+ std::string Name;
+ yaml::Node *NameValueNode;
+ auto UseExternalName =
+ RedirectingFileSystem::RedirectingFileEntry::NK_NotSet;
+ RedirectingFileSystem::EntryKind Kind;
+
+ for (auto &I : *M) {
+ StringRef Key;
+ // Reuse the buffer for key and value, since we don't look at key after
+ // parsing value.
+ SmallString<256> Buffer;
+ if (!parseScalarString(I.getKey(), Key, Buffer))
+ return nullptr;
+
+ if (!checkDuplicateOrUnknownKey(I.getKey(), Key, Keys))
+ return nullptr;
+
+ StringRef Value;
+ if (Key == "name") {
+ if (!parseScalarString(I.getValue(), Value, Buffer))
+ return nullptr;
+
+ NameValueNode = I.getValue();
+ if (FS->UseCanonicalizedPaths) {
+ SmallString<256> Path(Value);
+ // Guarantee that old YAML files containing paths with ".." and "."
+ // are properly canonicalized before read into the VFS.
+ Path = sys::path::remove_leading_dotslash(Path);
+ sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+ Name = Path.str();
+ } else {
+ Name = Value;
+ }
+ } else if (Key == "type") {
+ if (!parseScalarString(I.getValue(), Value, Buffer))
+ return nullptr;
+ if (Value == "file")
+ Kind = RedirectingFileSystem::EK_File;
+ else if (Value == "directory")
+ Kind = RedirectingFileSystem::EK_Directory;
+ else {
+ error(I.getValue(), "unknown value for 'type'");
+ return nullptr;
+ }
+ } else if (Key == "contents") {
+ if (HasContents) {
+ error(I.getKey(),
+ "entry already has 'contents' or 'external-contents'");
+ return nullptr;
+ }
+ HasContents = true;
+ auto *Contents = dyn_cast<yaml::SequenceNode>(I.getValue());
+ if (!Contents) {
+ // FIXME: this is only for directories, what about files?
+ error(I.getValue(), "expected array");
+ return nullptr;
+ }
+
+ for (auto &I : *Contents) {
+ if (std::unique_ptr<RedirectingFileSystem::Entry> E =
+ parseEntry(&I, FS, /*IsRootEntry*/ false))
+ EntryArrayContents.push_back(std::move(E));
+ else
+ return nullptr;
+ }
+ } else if (Key == "external-contents") {
+ if (HasContents) {
+ error(I.getKey(),
+ "entry already has 'contents' or 'external-contents'");
+ return nullptr;
+ }
+ HasContents = true;
+ if (!parseScalarString(I.getValue(), Value, Buffer))
+ return nullptr;
+
+ SmallString<256> FullPath;
+ if (FS->IsRelativeOverlay) {
+ FullPath = FS->getExternalContentsPrefixDir();
+ assert(!FullPath.empty() &&
+ "External contents prefix directory must exist");
+ llvm::sys::path::append(FullPath, Value);
+ } else {
+ FullPath = Value;
+ }
+
+ if (FS->UseCanonicalizedPaths) {
+ // Guarantee that old YAML files containing paths with ".." and "."
+ // are properly canonicalized before read into the VFS.
+ FullPath = sys::path::remove_leading_dotslash(FullPath);
+ sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true);
+ }
+ ExternalContentsPath = FullPath.str();
+ } else if (Key == "use-external-name") {
+ bool Val;
+ if (!parseScalarBool(I.getValue(), Val))
+ return nullptr;
+ UseExternalName =
+ Val ? RedirectingFileSystem::RedirectingFileEntry::NK_External
+ : RedirectingFileSystem::RedirectingFileEntry::NK_Virtual;
+ } else {
+ llvm_unreachable("key missing from Keys");
+ }
+ }
+
+ if (Stream.failed())
+ return nullptr;
+
+ // check for missing keys
+ if (!HasContents) {
+ error(N, "missing key 'contents' or 'external-contents'");
+ return nullptr;
+ }
+ if (!checkMissingKeys(N, Keys))
+ return nullptr;
+
+ // check invalid configuration
+ if (Kind == RedirectingFileSystem::EK_Directory &&
+ UseExternalName !=
+ RedirectingFileSystem::RedirectingFileEntry::NK_NotSet) {
+ error(N, "'use-external-name' is not supported for directories");
+ return nullptr;
+ }
+
+ if (IsRootEntry && !sys::path::is_absolute(Name)) {
+ assert(NameValueNode && "Name presence should be checked earlier");
+ error(NameValueNode,
+ "entry with relative path at the root level is not discoverable");
+ return nullptr;
+ }
+
+ // Remove trailing slash(es), being careful not to remove the root path
+ StringRef Trimmed(Name);
+ size_t RootPathLen = sys::path::root_path(Trimmed).size();
+ while (Trimmed.size() > RootPathLen &&
+ sys::path::is_separator(Trimmed.back()))
+ Trimmed = Trimmed.slice(0, Trimmed.size() - 1);
+ // Get the last component
+ StringRef LastComponent = sys::path::filename(Trimmed);
+
+ std::unique_ptr<RedirectingFileSystem::Entry> Result;
+ switch (Kind) {
+ case RedirectingFileSystem::EK_File:
+ Result = llvm::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
+ LastComponent, std::move(ExternalContentsPath), UseExternalName);
+ break;
+ case RedirectingFileSystem::EK_Directory:
+ Result =
+ llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ LastComponent, std::move(EntryArrayContents),
+ Status("", getNextVirtualUniqueID(),
+ std::chrono::system_clock::now(), 0, 0, 0,
+ file_type::directory_file, sys::fs::all_all));
+ break;
+ }
+
+ StringRef Parent = sys::path::parent_path(Trimmed);
+ if (Parent.empty())
+ return Result;
+
+ // if 'name' contains multiple components, create implicit directory entries
+ for (sys::path::reverse_iterator I = sys::path::rbegin(Parent),
+ E = sys::path::rend(Parent);
+ I != E; ++I) {
+ std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> Entries;
+ Entries.push_back(std::move(Result));
+ Result =
+ llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+ *I, std::move(Entries),
+ Status("", getNextVirtualUniqueID(),
+ std::chrono::system_clock::now(), 0, 0, 0,
+ file_type::directory_file, sys::fs::all_all));
+ }
+ return Result;
+ }
+
+public:
+ RedirectingFileSystemParser(yaml::Stream &S) : Stream(S) {}
+
+ // false on error
+ bool parse(yaml::Node *Root, RedirectingFileSystem *FS) {
+ auto *Top = dyn_cast<yaml::MappingNode>(Root);
+ if (!Top) {
+ error(Root, "expected mapping node");
+ return false;
+ }
+
+ KeyStatusPair Fields[] = {
+ KeyStatusPair("version", true),
+ KeyStatusPair("case-sensitive", false),
+ KeyStatusPair("use-external-names", false),
+ KeyStatusPair("overlay-relative", false),
+ KeyStatusPair("fallthrough", false),
+ KeyStatusPair("roots", true),
+ };
+
+ DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields));
+ std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> RootEntries;
+
+ // Parse configuration and 'roots'
+ for (auto &I : *Top) {
+ SmallString<10> KeyBuffer;
+ StringRef Key;
+ if (!parseScalarString(I.getKey(), Key, KeyBuffer))
+ return false;
+
+ if (!checkDuplicateOrUnknownKey(I.getKey(), Key, Keys))
+ return false;
+
+ if (Key == "roots") {
+ auto *Roots = dyn_cast<yaml::SequenceNode>(I.getValue());
+ if (!Roots) {
+ error(I.getValue(), "expected array");
+ return false;
+ }
+
+ for (auto &I : *Roots) {
+ if (std::unique_ptr<RedirectingFileSystem::Entry> E =
+ parseEntry(&I, FS, /*IsRootEntry*/ true))
+ RootEntries.push_back(std::move(E));
+ else
+ return false;
+ }
+ } else if (Key == "version") {
+ StringRef VersionString;
+ SmallString<4> Storage;
+ if (!parseScalarString(I.getValue(), VersionString, Storage))
+ return false;
+ int Version;
+ if (VersionString.getAsInteger<int>(10, Version)) {
+ error(I.getValue(), "expected integer");
+ return false;
+ }
+ if (Version < 0) {
+ error(I.getValue(), "invalid version number");
+ return false;
+ }
+ if (Version != 0) {
+ error(I.getValue(), "version mismatch, expected 0");
+ return false;
+ }
+ } else if (Key == "case-sensitive") {
+ if (!parseScalarBool(I.getValue(), FS->CaseSensitive))
+ return false;
+ } else if (Key == "overlay-relative") {
+ if (!parseScalarBool(I.getValue(), FS->IsRelativeOverlay))
+ return false;
+ } else if (Key == "use-external-names") {
+ if (!parseScalarBool(I.getValue(), FS->UseExternalNames))
+ return false;
+ } else if (Key == "fallthrough") {
+ if (!parseScalarBool(I.getValue(), FS->IsFallthrough))
+ return false;
+ } else {
+ llvm_unreachable("key missing from Keys");
+ }
+ }
+
+ if (Stream.failed())
+ return false;
+
+ if (!checkMissingKeys(Top, Keys))
+ return false;
+
+ // Now that we sucessefully parsed the YAML file, canonicalize the internal
+ // representation to a proper directory tree so that we can search faster
+ // inside the VFS.
+ for (auto &E : RootEntries)
+ uniqueOverlayTree(FS, E.get());
+
+ return true;
+ }
+};
+
+RedirectingFileSystem *
+RedirectingFileSystem::create(std::unique_ptr<MemoryBuffer> Buffer,
+ SourceMgr::DiagHandlerTy DiagHandler,
+ StringRef YAMLFilePath, void *DiagContext,
+ IntrusiveRefCntPtr<FileSystem> ExternalFS) {
+ SourceMgr SM;
+ yaml::Stream Stream(Buffer->getMemBufferRef(), SM);
+
+ SM.setDiagHandler(DiagHandler, DiagContext);
+ yaml::document_iterator DI = Stream.begin();
+ yaml::Node *Root = DI->getRoot();
+ if (DI == Stream.end() || !Root) {
+ SM.PrintMessage(SMLoc(), SourceMgr::DK_Error, "expected root node");
+ return nullptr;
+ }
+
+ RedirectingFileSystemParser P(Stream);
+
+ std::unique_ptr<RedirectingFileSystem> FS(
+ new RedirectingFileSystem(std::move(ExternalFS)));
+
+ if (!YAMLFilePath.empty()) {
+ // Use the YAML path from -ivfsoverlay to compute the dir to be prefixed
+ // to each 'external-contents' path.
+ //
+ // Example:
+ // -ivfsoverlay dummy.cache/vfs/vfs.yaml
+ // yields:
+ // FS->ExternalContentsPrefixDir => /<absolute_path_to>/dummy.cache/vfs
+ //
+ SmallString<256> OverlayAbsDir = sys::path::parent_path(YAMLFilePath);
+ std::error_code EC = llvm::sys::fs::make_absolute(OverlayAbsDir);
+ assert(!EC && "Overlay dir final path must be absolute");
+ (void)EC;
+ FS->setExternalContentsPrefixDir(OverlayAbsDir);
+ }
+
+ if (!P.parse(Root, FS.get()))
+ return nullptr;
+
+ return FS.release();
+}
+
+ErrorOr<RedirectingFileSystem::Entry *>
+RedirectingFileSystem::lookupPath(const Twine &Path_) const {
+ SmallString<256> Path;
+ Path_.toVector(Path);
+
+ // Handle relative paths
+ if (std::error_code EC = makeAbsolute(Path))
+ return EC;
+
+ // Canonicalize path by removing ".", "..", "./", etc components. This is
+ // a VFS request, do bot bother about symlinks in the path components
+ // but canonicalize in order to perform the correct entry search.
+ if (UseCanonicalizedPaths) {
+ Path = sys::path::remove_leading_dotslash(Path);
+ sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+ }
+
+ if (Path.empty())
+ return make_error_code(llvm::errc::invalid_argument);
+
+ sys::path::const_iterator Start = sys::path::begin(Path);
+ sys::path::const_iterator End = sys::path::end(Path);
+ for (const auto &Root : Roots) {
+ ErrorOr<RedirectingFileSystem::Entry *> Result =
+ lookupPath(Start, End, Root.get());
+ if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
+ return Result;
+ }
+ return make_error_code(llvm::errc::no_such_file_or_directory);
+}
+
+ErrorOr<RedirectingFileSystem::Entry *>
+RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
+ sys::path::const_iterator End,
+ RedirectingFileSystem::Entry *From) const {
+#ifndef _WIN32
+ assert(!isTraversalComponent(*Start) &&
+ !isTraversalComponent(From->getName()) &&
+ "Paths should not contain traversal components");
+#else
+ // FIXME: this is here to support windows, remove it once canonicalized
+ // paths become globally default.
+ if (Start->equals("."))
+ ++Start;
+#endif
+
+ StringRef FromName = From->getName();
+
+ // Forward the search to the next component in case this is an empty one.
+ if (!FromName.empty()) {
+ if (CaseSensitive ? !Start->equals(FromName)
+ : !Start->equals_lower(FromName))
+ // failure to match
+ return make_error_code(llvm::errc::no_such_file_or_directory);
+
+ ++Start;
+
+ if (Start == End) {
+ // Match!
+ return From;
+ }
+ }
+
+ auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(From);
+ if (!DE)
+ return make_error_code(llvm::errc::not_a_directory);
+
+ for (const std::unique_ptr<RedirectingFileSystem::Entry> &DirEntry :
+ llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+ ErrorOr<RedirectingFileSystem::Entry *> Result =
+ lookupPath(Start, End, DirEntry.get());
+ if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
+ return Result;
+ }
+ return make_error_code(llvm::errc::no_such_file_or_directory);
+}
+
+static Status getRedirectedFileStatus(const Twine &Path, bool UseExternalNames,
+ Status ExternalStatus) {
+ Status S = ExternalStatus;
+ if (!UseExternalNames)
+ S = Status::copyWithNewName(S, Path.str());
+ S.IsVFSMapped = true;
+ return S;
+}
+
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path,
+ RedirectingFileSystem::Entry *E) {
+ assert(E != nullptr);
+ if (auto *F = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(E)) {
+ ErrorOr<Status> S = ExternalFS->status(F->getExternalContentsPath());
+ assert(!S || S->getName() == F->getExternalContentsPath());
+ if (S)
+ return getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames),
+ *S);
+ return S;
+ } else { // directory
+ auto *DE = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E);
+ return Status::copyWithNewName(DE->getStatus(), Path.str());
+ }
+}
+
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path) {
+ ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path);
+ if (!Result) {
+ if (IsFallthrough &&
+ Result.getError() == llvm::errc::no_such_file_or_directory) {
+ return ExternalFS->status(Path);
+ }
+ return Result.getError();
+ }
+ return status(Path, *Result);
+}
+
+namespace {
+
+/// Provide a file wrapper with an overriden status.
+class FileWithFixedStatus : public File {
+ std::unique_ptr<File> InnerFile;
+ Status S;
+
+public:
+ FileWithFixedStatus(std::unique_ptr<File> InnerFile, Status S)
+ : InnerFile(std::move(InnerFile)), S(std::move(S)) {}
+
+ ErrorOr<Status> status() override { return S; }
+ ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+
+ getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+ bool IsVolatile) override {
+ return InnerFile->getBuffer(Name, FileSize, RequiresNullTerminator,
+ IsVolatile);
+ }
+
+ std::error_code close() override { return InnerFile->close(); }
+};
+
+} // namespace
+
+ErrorOr<std::unique_ptr<File>>
+RedirectingFileSystem::openFileForRead(const Twine &Path) {
+ ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Path);
+ if (!E) {
+ if (IsFallthrough &&
+ E.getError() == llvm::errc::no_such_file_or_directory) {
+ return ExternalFS->openFileForRead(Path);
+ }
+ return E.getError();
+ }
+
+ auto *F = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(*E);
+ if (!F) // FIXME: errc::not_a_file?
+ return make_error_code(llvm::errc::invalid_argument);
+
+ auto Result = ExternalFS->openFileForRead(F->getExternalContentsPath());
+ if (!Result)
+ return Result;
+
+ auto ExternalStatus = (*Result)->status();
+ if (!ExternalStatus)
+ return ExternalStatus.getError();
+
+ // FIXME: Update the status with the name and VFSMapped.
+ Status S = getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames),
+ *ExternalStatus);
+ return std::unique_ptr<File>(
+ llvm::make_unique<FileWithFixedStatus>(std::move(*Result), S));
+}
+
+std::error_code
+RedirectingFileSystem::getRealPath(const Twine &Path,
+ SmallVectorImpl<char> &Output) const {
+ ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path);
+ if (!Result) {
+ if (IsFallthrough &&
+ Result.getError() == llvm::errc::no_such_file_or_directory) {
+ return ExternalFS->getRealPath(Path, Output);
+ }
+ return Result.getError();
+ }
+
+ if (auto *F =
+ dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(*Result)) {
+ return ExternalFS->getRealPath(F->getExternalContentsPath(), Output);
+ }
+ // Even if there is a directory entry, fall back to ExternalFS if allowed,
+ // because directories don't have a single external contents path.
+ return IsFallthrough ? ExternalFS->getRealPath(Path, Output)
+ : llvm::errc::invalid_argument;
+}
+
+IntrusiveRefCntPtr<FileSystem>
+vfs::getVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer,
+ SourceMgr::DiagHandlerTy DiagHandler,
+ StringRef YAMLFilePath, void *DiagContext,
+ IntrusiveRefCntPtr<FileSystem> ExternalFS) {
+ return RedirectingFileSystem::create(std::move(Buffer), DiagHandler,
+ YAMLFilePath, DiagContext,
+ std::move(ExternalFS));
+}
+
+static void getVFSEntries(RedirectingFileSystem::Entry *SrcE,
+ SmallVectorImpl<StringRef> &Path,
+ SmallVectorImpl<YAMLVFSEntry> &Entries) {
+ auto Kind = SrcE->getKind();
+ if (Kind == RedirectingFileSystem::EK_Directory) {
+ auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(SrcE);
+ assert(DE && "Must be a directory");
+ for (std::unique_ptr<RedirectingFileSystem::Entry> &SubEntry :
+ llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+ Path.push_back(SubEntry->getName());
+ getVFSEntries(SubEntry.get(), Path, Entries);
+ Path.pop_back();
+ }
+ return;
+ }
+
+ assert(Kind == RedirectingFileSystem::EK_File && "Must be a EK_File");
+ auto *FE = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(SrcE);
+ assert(FE && "Must be a file");
+ SmallString<128> VPath;
+ for (auto &Comp : Path)
+ llvm::sys::path::append(VPath, Comp);
+ Entries.push_back(YAMLVFSEntry(VPath.c_str(), FE->getExternalContentsPath()));
+}
+
+void vfs::collectVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer,
+ SourceMgr::DiagHandlerTy DiagHandler,
+ StringRef YAMLFilePath,
+ SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
+ void *DiagContext,
+ IntrusiveRefCntPtr<FileSystem> ExternalFS) {
+ RedirectingFileSystem *VFS = RedirectingFileSystem::create(
+ std::move(Buffer), DiagHandler, YAMLFilePath, DiagContext,
+ std::move(ExternalFS));
+ ErrorOr<RedirectingFileSystem::Entry *> RootE = VFS->lookupPath("/");
+ if (!RootE)
+ return;
+ SmallVector<StringRef, 8> Components;
+ Components.push_back("/");
+ getVFSEntries(*RootE, Components, CollectedEntries);
+}
+
+UniqueID vfs::getNextVirtualUniqueID() {
+ static std::atomic<unsigned> UID;
+ unsigned ID = ++UID;
+ // The following assumes that uint64_t max will never collide with a real
+ // dev_t value from the OS.
+ return UniqueID(std::numeric_limits<uint64_t>::max(), ID);
+}
+
+void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef RealPath) {
+ assert(sys::path::is_absolute(VirtualPath) && "virtual path not absolute");
+ assert(sys::path::is_absolute(RealPath) && "real path not absolute");
+ assert(!pathHasTraversal(VirtualPath) && "path traversal is not supported");
+ Mappings.emplace_back(VirtualPath, RealPath);
+}
+
+namespace {
+
+class JSONWriter {
+ llvm::raw_ostream &OS;
+ SmallVector<StringRef, 16> DirStack;
+
+ unsigned getDirIndent() { return 4 * DirStack.size(); }
+ unsigned getFileIndent() { return 4 * (DirStack.size() + 1); }
+ bool containedIn(StringRef Parent, StringRef Path);
+ StringRef containedPart(StringRef Parent, StringRef Path);
+ void startDirectory(StringRef Path);
+ void endDirectory();
+ void writeEntry(StringRef VPath, StringRef RPath);
+
+public:
+ JSONWriter(llvm::raw_ostream &OS) : OS(OS) {}
+
+ void write(ArrayRef<YAMLVFSEntry> Entries, Optional<bool> UseExternalNames,
+ Optional<bool> IsCaseSensitive, Optional<bool> IsOverlayRelative,
+ StringRef OverlayDir);
+};
+
+} // namespace
+
+bool JSONWriter::containedIn(StringRef Parent, StringRef Path) {
+ using namespace llvm::sys;
+
+ // Compare each path component.
+ auto IParent = path::begin(Parent), EParent = path::end(Parent);
+ for (auto IChild = path::begin(Path), EChild = path::end(Path);
+ IParent != EParent && IChild != EChild; ++IParent, ++IChild) {
+ if (*IParent != *IChild)
+ return false;
+ }
+ // Have we exhausted the parent path?
+ return IParent == EParent;
+}
+
+StringRef JSONWriter::containedPart(StringRef Parent, StringRef Path) {
+ assert(!Parent.empty());
+ assert(containedIn(Parent, Path));
+ return Path.slice(Parent.size() + 1, StringRef::npos);
+}
+
+void JSONWriter::startDirectory(StringRef Path) {
+ StringRef Name =
+ DirStack.empty() ? Path : containedPart(DirStack.back(), Path);
+ DirStack.push_back(Path);
+ unsigned Indent = getDirIndent();
+ OS.indent(Indent) << "{\n";
+ OS.indent(Indent + 2) << "'type': 'directory',\n";
+ OS.indent(Indent + 2) << "'name': \"" << llvm::yaml::escape(Name) << "\",\n";
+ OS.indent(Indent + 2) << "'contents': [\n";
+}
+
+void JSONWriter::endDirectory() {
+ unsigned Indent = getDirIndent();
+ OS.indent(Indent + 2) << "]\n";
+ OS.indent(Indent) << "}";
+
+ DirStack.pop_back();
+}
+
+void JSONWriter::writeEntry(StringRef VPath, StringRef RPath) {
+ unsigned Indent = getFileIndent();
+ OS.indent(Indent) << "{\n";
+ OS.indent(Indent + 2) << "'type': 'file',\n";
+ OS.indent(Indent + 2) << "'name': \"" << llvm::yaml::escape(VPath) << "\",\n";
+ OS.indent(Indent + 2) << "'external-contents': \""
+ << llvm::yaml::escape(RPath) << "\"\n";
+ OS.indent(Indent) << "}";
+}
+
+void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries,
+ Optional<bool> UseExternalNames,
+ Optional<bool> IsCaseSensitive,
+ Optional<bool> IsOverlayRelative,
+ StringRef OverlayDir) {
+ using namespace llvm::sys;
+
+ OS << "{\n"
+ " 'version': 0,\n";
+ if (IsCaseSensitive.hasValue())
+ OS << " 'case-sensitive': '"
+ << (IsCaseSensitive.getValue() ? "true" : "false") << "',\n";
+ if (UseExternalNames.hasValue())
+ OS << " 'use-external-names': '"
+ << (UseExternalNames.getValue() ? "true" : "false") << "',\n";
+ bool UseOverlayRelative = false;
+ if (IsOverlayRelative.hasValue()) {
+ UseOverlayRelative = IsOverlayRelative.getValue();
+ OS << " 'overlay-relative': '" << (UseOverlayRelative ? "true" : "false")
+ << "',\n";
+ }
+ OS << " 'roots': [\n";
+
+ if (!Entries.empty()) {
+ const YAMLVFSEntry &Entry = Entries.front();
+ startDirectory(path::parent_path(Entry.VPath));
+
+ StringRef RPath = Entry.RPath;
+ if (UseOverlayRelative) {
+ unsigned OverlayDirLen = OverlayDir.size();
+ assert(RPath.substr(0, OverlayDirLen) == OverlayDir &&
+ "Overlay dir must be contained in RPath");
+ RPath = RPath.slice(OverlayDirLen, RPath.size());
+ }
+
+ writeEntry(path::filename(Entry.VPath), RPath);
+
+ for (const auto &Entry : Entries.slice(1)) {
+ StringRef Dir = path::parent_path(Entry.VPath);
+ if (Dir == DirStack.back())
+ OS << ",\n";
+ else {
+ while (!DirStack.empty() && !containedIn(DirStack.back(), Dir)) {
+ OS << "\n";
+ endDirectory();
+ }
+ OS << ",\n";
+ startDirectory(Dir);
+ }
+ StringRef RPath = Entry.RPath;
+ if (UseOverlayRelative) {
+ unsigned OverlayDirLen = OverlayDir.size();
+ assert(RPath.substr(0, OverlayDirLen) == OverlayDir &&
+ "Overlay dir must be contained in RPath");
+ RPath = RPath.slice(OverlayDirLen, RPath.size());
+ }
+ writeEntry(path::filename(Entry.VPath), RPath);
+ }
+
+ while (!DirStack.empty()) {
+ OS << "\n";
+ endDirectory();
+ }
+ OS << "\n";
+ }
+
+ OS << " ]\n"
+ << "}\n";
+}
+
+void YAMLVFSWriter::write(llvm::raw_ostream &OS) {
+ llvm::sort(Mappings, [](const YAMLVFSEntry &LHS, const YAMLVFSEntry &RHS) {
+ return LHS.VPath < RHS.VPath;
+ });
+
+ JSONWriter(OS).write(Mappings, UseExternalNames, IsCaseSensitive,
+ IsOverlayRelative, OverlayDir);
+}
+
+VFSFromYamlDirIterImpl::VFSFromYamlDirIterImpl(
+ const Twine &_Path,
+ RedirectingFileSystem::RedirectingDirectoryEntry::iterator Begin,
+ RedirectingFileSystem::RedirectingDirectoryEntry::iterator End,
+ bool IterateExternalFS, FileSystem &ExternalFS, std::error_code &EC)
+ : Dir(_Path.str()), Current(Begin), End(End),
+ IterateExternalFS(IterateExternalFS), ExternalFS(ExternalFS) {
+ EC = incrementImpl(/*IsFirstTime=*/true);
+}
+
+std::error_code VFSFromYamlDirIterImpl::increment() {
+ return incrementImpl(/*IsFirstTime=*/false);
+}
+
+std::error_code VFSFromYamlDirIterImpl::incrementExternal() {
+ assert(!(IsExternalFSCurrent && ExternalDirIter == directory_iterator()) &&
+ "incrementing past end");
+ std::error_code EC;
+ if (IsExternalFSCurrent) {
+ ExternalDirIter.increment(EC);
+ } else if (IterateExternalFS) {
+ ExternalDirIter = ExternalFS.dir_begin(Dir, EC);
+ IsExternalFSCurrent = true;
+ if (EC && EC != errc::no_such_file_or_directory)
+ return EC;
+ EC = {};
+ }
+ if (EC || ExternalDirIter == directory_iterator()) {
+ CurrentEntry = directory_entry();
+ } else {
+ CurrentEntry = *ExternalDirIter;
+ }
+ return EC;
+}
+
+std::error_code VFSFromYamlDirIterImpl::incrementContent(bool IsFirstTime) {
+ assert((IsFirstTime || Current != End) && "cannot iterate past end");
+ if (!IsFirstTime)
+ ++Current;
+ while (Current != End) {
+ SmallString<128> PathStr(Dir);
+ llvm::sys::path::append(PathStr, (*Current)->getName());
+ sys::fs::file_type Type;
+ switch ((*Current)->getKind()) {
+ case RedirectingFileSystem::EK_Directory:
+ Type = sys::fs::file_type::directory_file;
+ break;
+ case RedirectingFileSystem::EK_File:
+ Type = sys::fs::file_type::regular_file;
+ break;
+ }
+ CurrentEntry = directory_entry(PathStr.str(), Type);
+ return {};
+ }
+ return incrementExternal();
+}
+
+std::error_code VFSFromYamlDirIterImpl::incrementImpl(bool IsFirstTime) {
+ while (true) {
+ std::error_code EC = IsExternalFSCurrent ? incrementExternal()
+ : incrementContent(IsFirstTime);
+ if (EC || CurrentEntry.path().empty())
+ return EC;
+ StringRef Name = llvm::sys::path::filename(CurrentEntry.path());
+ if (SeenNames.insert(Name).second)
+ return EC; // name not seen before
+ }
+ llvm_unreachable("returned above");
+}
+
+vfs::recursive_directory_iterator::recursive_directory_iterator(
+ FileSystem &FS_, const Twine &Path, std::error_code &EC)
+ : FS(&FS_) {
+ directory_iterator I = FS->dir_begin(Path, EC);
+ if (I != directory_iterator()) {
+ State = std::make_shared<detail::RecDirIterState>();
+ State->Stack.push(I);
+ }
+}
+
+vfs::recursive_directory_iterator &
+recursive_directory_iterator::increment(std::error_code &EC) {
+ assert(FS && State && !State->Stack.empty() && "incrementing past end");
+ assert(!State->Stack.top()->path().empty() && "non-canonical end iterator");
+ vfs::directory_iterator End;
+
+ if (State->HasNoPushRequest)
+ State->HasNoPushRequest = false;
+ else {
+ if (State->Stack.top()->type() == sys::fs::file_type::directory_file) {
+ vfs::directory_iterator I = FS->dir_begin(State->Stack.top()->path(), EC);
+ if (I != End) {
+ State->Stack.push(I);
+ return *this;
+ }
+ }
+ }
+
+ while (!State->Stack.empty() && State->Stack.top().increment(EC) == End)
+ State->Stack.pop();
+
+ if (State->Stack.empty())
+ State.reset(); // end iterator
+
+ return *this;
+}
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
index f425d607af47..d34aa763124c 100644
--- a/contrib/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -416,7 +416,7 @@ static std::error_code rename_internal(HANDLE FromHandle, const Twine &To,
*reinterpret_cast<FILE_RENAME_INFO *>(RenameInfoBuf.data());
RenameInfo.ReplaceIfExists = ReplaceIfExists;
RenameInfo.RootDirectory = 0;
- RenameInfo.FileNameLength = ToWide.size();
+ RenameInfo.FileNameLength = ToWide.size() * sizeof(wchar_t);
std::copy(ToWide.begin(), ToWide.end(), &RenameInfo.FileName[0]);
SetLastError(ERROR_SUCCESS);
@@ -766,10 +766,12 @@ std::error_code setPermissions(const Twine &Path, perms Permissions) {
return std::error_code();
}
-std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time) {
- FILETIME FT = toFILETIME(Time);
+std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
+ TimePoint<> ModificationTime) {
+ FILETIME AccessFT = toFILETIME(AccessTime);
+ FILETIME ModifyFT = toFILETIME(ModificationTime);
HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
- if (!SetFileTime(FileHandle, NULL, &FT, &FT))
+ if (!SetFileTime(FileHandle, NULL, &AccessFT, &ModifyFT))
return mapWindowsError(::GetLastError());
return std::error_code();
}
@@ -852,16 +854,37 @@ mapped_file_region::mapped_file_region(int fd, mapmode mode, size_t length,
Mapping = 0;
}
+static bool hasFlushBufferKernelBug() {
+ static bool Ret{GetWindowsOSVersion() < llvm::VersionTuple(10, 0, 0, 17763)};
+ return Ret;
+}
+
+static bool isEXE(StringRef Magic) {
+ static const char PEMagic[] = {'P', 'E', '\0', '\0'};
+ if (Magic.startswith(StringRef("MZ")) && Magic.size() >= 0x3c + 4) {
+ uint32_t off = read32le(Magic.data() + 0x3c);
+ // PE/COFF file, either EXE or DLL.
+ if (Magic.substr(off).startswith(StringRef(PEMagic, sizeof(PEMagic))))
+ return true;
+ }
+ return false;
+}
+
mapped_file_region::~mapped_file_region() {
if (Mapping) {
+
+ bool Exe = isEXE(StringRef((char *)Mapping, Size));
+
::UnmapViewOfFile(Mapping);
- if (Mode == mapmode::readwrite) {
+ if (Mode == mapmode::readwrite && Exe && hasFlushBufferKernelBug()) {
// There is a Windows kernel bug, the exact trigger conditions of which
// are not well understood. When triggered, dirty pages are not properly
// flushed and subsequent process's attempts to read a file can return
// invalid data. Calling FlushFileBuffers on the write handle is
// sufficient to ensure that this bug is not triggered.
+ // The bug only occurs when writing an executable and executing it right
+ // after, under high I/O pressure.
::FlushFileBuffers(FileHandle);
}
@@ -900,28 +923,28 @@ static basic_file_status status_from_find_data(WIN32_FIND_DATAW *FindData) {
FindData->nFileSizeHigh, FindData->nFileSizeLow);
}
-std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
- StringRef path,
- bool follow_symlinks) {
- SmallVector<wchar_t, 128> path_utf16;
+std::error_code detail::directory_iterator_construct(detail::DirIterState &IT,
+ StringRef Path,
+ bool FollowSymlinks) {
+ SmallVector<wchar_t, 128> PathUTF16;
- if (std::error_code ec = widenPath(path, path_utf16))
- return ec;
+ if (std::error_code EC = widenPath(Path, PathUTF16))
+ return EC;
// Convert path to the format that Windows is happy with.
- if (path_utf16.size() > 0 &&
- !is_separator(path_utf16[path.size() - 1]) &&
- path_utf16[path.size() - 1] != L':') {
- path_utf16.push_back(L'\\');
- path_utf16.push_back(L'*');
+ if (PathUTF16.size() > 0 &&
+ !is_separator(PathUTF16[Path.size() - 1]) &&
+ PathUTF16[Path.size() - 1] != L':') {
+ PathUTF16.push_back(L'\\');
+ PathUTF16.push_back(L'*');
} else {
- path_utf16.push_back(L'*');
+ PathUTF16.push_back(L'*');
}
// Get the first directory entry.
WIN32_FIND_DATAW FirstFind;
ScopedFindHandle FindHandle(::FindFirstFileExW(
- c_str(path_utf16), FindExInfoBasic, &FirstFind, FindExSearchNameMatch,
+ c_str(PathUTF16), FindExInfoBasic, &FirstFind, FindExSearchNameMatch,
NULL, FIND_FIRST_EX_LARGE_FETCH));
if (!FindHandle)
return mapWindowsError(::GetLastError());
@@ -934,43 +957,45 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
DWORD LastError = ::GetLastError();
// Check for end.
if (LastError == ERROR_NO_MORE_FILES)
- return detail::directory_iterator_destruct(it);
+ return detail::directory_iterator_destruct(IT);
return mapWindowsError(LastError);
} else
FilenameLen = ::wcslen(FirstFind.cFileName);
// Construct the current directory entry.
- SmallString<128> directory_entry_name_utf8;
- if (std::error_code ec =
+ SmallString<128> DirectoryEntryNameUTF8;
+ if (std::error_code EC =
UTF16ToUTF8(FirstFind.cFileName, ::wcslen(FirstFind.cFileName),
- directory_entry_name_utf8))
- return ec;
+ DirectoryEntryNameUTF8))
+ return EC;
- it.IterationHandle = intptr_t(FindHandle.take());
- SmallString<128> directory_entry_path(path);
- path::append(directory_entry_path, directory_entry_name_utf8);
- it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks,
- status_from_find_data(&FirstFind));
+ IT.IterationHandle = intptr_t(FindHandle.take());
+ SmallString<128> DirectoryEntryPath(Path);
+ path::append(DirectoryEntryPath, DirectoryEntryNameUTF8);
+ IT.CurrentEntry =
+ directory_entry(DirectoryEntryPath, FollowSymlinks,
+ file_type_from_attrs(FirstFind.dwFileAttributes),
+ status_from_find_data(&FirstFind));
return std::error_code();
}
-std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
- if (it.IterationHandle != 0)
+std::error_code detail::directory_iterator_destruct(detail::DirIterState &IT) {
+ if (IT.IterationHandle != 0)
// Closes the handle if it's valid.
- ScopedFindHandle close(HANDLE(it.IterationHandle));
- it.IterationHandle = 0;
- it.CurrentEntry = directory_entry();
+ ScopedFindHandle close(HANDLE(IT.IterationHandle));
+ IT.IterationHandle = 0;
+ IT.CurrentEntry = directory_entry();
return std::error_code();
}
-std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
+std::error_code detail::directory_iterator_increment(detail::DirIterState &IT) {
WIN32_FIND_DATAW FindData;
- if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
+ if (!::FindNextFileW(HANDLE(IT.IterationHandle), &FindData)) {
DWORD LastError = ::GetLastError();
// Check for end.
if (LastError == ERROR_NO_MORE_FILES)
- return detail::directory_iterator_destruct(it);
+ return detail::directory_iterator_destruct(IT);
return mapWindowsError(LastError);
}
@@ -978,16 +1003,18 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
(FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
FindData.cFileName[1] == L'.'))
- return directory_iterator_increment(it);
+ return directory_iterator_increment(IT);
- SmallString<128> directory_entry_path_utf8;
- if (std::error_code ec =
+ SmallString<128> DirectoryEntryPathUTF8;
+ if (std::error_code EC =
UTF16ToUTF8(FindData.cFileName, ::wcslen(FindData.cFileName),
- directory_entry_path_utf8))
- return ec;
+ DirectoryEntryPathUTF8))
+ return EC;
- it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8),
- status_from_find_data(&FindData));
+ IT.CurrentEntry.replace_filename(
+ Twine(DirectoryEntryPathUTF8),
+ file_type_from_attrs(FindData.dwFileAttributes),
+ status_from_find_data(&FindData));
return std::error_code();
}
@@ -1226,6 +1253,17 @@ static void expandTildeExpr(SmallVectorImpl<char> &Path) {
Path.insert(Path.begin() + 1, HomeDir.begin() + 1, HomeDir.end());
}
+void expand_tilde(const Twine &path, SmallVectorImpl<char> &dest) {
+ dest.clear();
+ if (path.isTriviallyEmpty())
+ return;
+
+ path.toVector(dest);
+ expandTildeExpr(dest);
+
+ return;
+}
+
std::error_code real_path(const Twine &path, SmallVectorImpl<char> &dest,
bool expand_tilde) {
dest.clear();
@@ -1264,10 +1302,6 @@ static bool getKnownFolderPath(KNOWNFOLDERID folderId,
return ok;
}
-bool getUserCacheDir(SmallVectorImpl<char> &Result) {
- return getKnownFolderPath(FOLDERID_LocalAppData, Result);
-}
-
bool home_directory(SmallVectorImpl<char> &result) {
return getKnownFolderPath(FOLDERID_Profile, result);
}
diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
index 30126568769c..2b2d79231434 100644
--- a/contrib/llvm/lib/Support/Windows/Process.inc
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
#include "llvm/Support/WindowsError.h"
#include <malloc.h>
@@ -140,73 +142,59 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
return std::string(Res.data());
}
-static const char *AllocateString(const SmallVectorImpl<char> &S,
- BumpPtrAllocator &Alloc) {
- char *Buf = reinterpret_cast<char *>(Alloc.Allocate(S.size() + 1, 1));
- ::memcpy(Buf, S.data(), S.size());
- Buf[S.size()] = '\0';
- return Buf;
-}
-
-/// Convert Arg from UTF-16 to UTF-8 and push it onto Args.
-static std::error_code ConvertAndPushArg(const wchar_t *Arg,
- SmallVectorImpl<const char *> &Args,
- BumpPtrAllocator &Alloc) {
- SmallVector<char, MAX_PATH> ArgString;
- if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), ArgString))
- return ec;
- Args.push_back(AllocateString(ArgString, Alloc));
- return std::error_code();
-}
-
-/// Perform wildcard expansion of Arg, or just push it into Args if it
-/// doesn't have wildcards or doesn't match any files.
-static std::error_code WildcardExpand(const wchar_t *Arg,
+/// Perform wildcard expansion of Arg, or just push it into Args if it doesn't
+/// have wildcards or doesn't match any files.
+static std::error_code WildcardExpand(StringRef Arg,
SmallVectorImpl<const char *> &Args,
- BumpPtrAllocator &Alloc) {
- if (!wcspbrk(Arg, L"*?")) {
- // Arg does not contain any wildcard characters. This is the common case.
- return ConvertAndPushArg(Arg, Args, Alloc);
- }
+ StringSaver &Saver) {
+ std::error_code EC;
- if (wcscmp(Arg, L"/?") == 0 || wcscmp(Arg, L"-?") == 0) {
- // Don't wildcard expand /?. Always treat it as an option.
- return ConvertAndPushArg(Arg, Args, Alloc);
+ // Don't expand Arg if it does not contain any wildcard characters. This is
+ // the common case. Also don't wildcard expand /?. Always treat it as an
+ // option.
+ if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" ||
+ Arg == "-?") {
+ Args.push_back(Arg.data());
+ return EC;
}
- // Extract any directory part of the argument.
- SmallVector<char, MAX_PATH> Dir;
- if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), Dir))
- return ec;
- sys::path::remove_filename(Dir);
- const int DirSize = Dir.size();
+ // Convert back to UTF-16 so we can call FindFirstFileW.
+ SmallVector<wchar_t, MAX_PATH> ArgW;
+ EC = windows::UTF8ToUTF16(Arg, ArgW);
+ if (EC)
+ return EC;
// Search for matching files.
// FIXME: This assumes the wildcard is only in the file name and not in the
// directory portion of the file path. For example, it doesn't handle
// "*\foo.c" nor "s?c\bar.cpp".
WIN32_FIND_DATAW FileData;
- HANDLE FindHandle = FindFirstFileW(Arg, &FileData);
+ HANDLE FindHandle = FindFirstFileW(ArgW.data(), &FileData);
if (FindHandle == INVALID_HANDLE_VALUE) {
- return ConvertAndPushArg(Arg, Args, Alloc);
+ Args.push_back(Arg.data());
+ return EC;
}
- std::error_code ec;
+ // Extract any directory part of the argument.
+ SmallString<MAX_PATH> Dir = Arg;
+ sys::path::remove_filename(Dir);
+ const int DirSize = Dir.size();
+
do {
- SmallVector<char, MAX_PATH> FileName;
- ec = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
+ SmallString<MAX_PATH> FileName;
+ EC = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
FileName);
- if (ec)
+ if (EC)
break;
// Append FileName to Dir, and remove it afterwards.
- llvm::sys::path::append(Dir, StringRef(FileName.data(), FileName.size()));
- Args.push_back(AllocateString(Dir, Alloc));
+ llvm::sys::path::append(Dir, FileName);
+ Args.push_back(Saver.save(StringRef(Dir)).data());
Dir.resize(DirSize);
} while (FindNextFileW(FindHandle, &FileData));
FindClose(FindHandle);
- return ec;
+ return EC;
}
static std::error_code GetExecutableName(SmallVectorImpl<char> &Filename) {
@@ -243,18 +231,20 @@ static std::error_code GetExecutableName(SmallVectorImpl<char> &Filename) {
std::error_code
windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
BumpPtrAllocator &Alloc) {
- int ArgCount;
- std::unique_ptr<wchar_t *[], decltype(&LocalFree)> UnicodeCommandLine{
- CommandLineToArgvW(GetCommandLineW(), &ArgCount), &LocalFree};
- if (!UnicodeCommandLine)
- return mapWindowsError(::GetLastError());
-
+ const wchar_t *CmdW = GetCommandLineW();
+ assert(CmdW);
std::error_code EC;
+ SmallString<MAX_PATH> Cmd;
+ EC = windows::UTF16ToUTF8(CmdW, wcslen(CmdW), Cmd);
+ if (EC)
+ return EC;
- Args.reserve(ArgCount);
+ SmallVector<const char *, 20> TmpArgs;
+ StringSaver Saver(Alloc);
+ cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false);
- for (int I = 0; I < ArgCount; ++I) {
- EC = WildcardExpand(UnicodeCommandLine[I], Args, Alloc);
+ for (const char *Arg : TmpArgs) {
+ EC = WildcardExpand(Arg, Args, Saver);
if (EC)
return EC;
}
@@ -266,7 +256,7 @@ windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
if (EC)
return EC;
sys::path::append(Arg0, Filename);
- Args[0] = AllocateString(Arg0, Alloc);
+ Args[0] = Saver.save(Arg0).data();
return std::error_code();
}
@@ -328,6 +318,15 @@ bool Process::StandardErrHasColors() {
static bool UseANSI = false;
void Process::UseANSIEscapeCodes(bool enable) {
+#if defined(ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+ if (enable) {
+ HANDLE Console = GetStdHandle(STD_OUTPUT_HANDLE);
+ DWORD Mode;
+ GetConsoleMode(Console, &Mode);
+ Mode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
+ SetConsoleMode(Console, Mode);
+ }
+#endif
UseANSI = enable;
}
@@ -461,3 +460,27 @@ unsigned Process::GetRandomNumber() {
ReportLastErrorFatal("Could not generate a random number");
return Ret;
}
+
+typedef NTSTATUS(WINAPI* RtlGetVersionPtr)(PRTL_OSVERSIONINFOW);
+#define STATUS_SUCCESS ((NTSTATUS)0x00000000L)
+
+llvm::VersionTuple llvm::GetWindowsOSVersion() {
+ HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
+ if (hMod) {
+ auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
+ if (getVer) {
+ RTL_OSVERSIONINFOEXW info{};
+ info.dwOSVersionInfoSize = sizeof(info);
+ if (getVer((PRTL_OSVERSIONINFOW)&info) == STATUS_SUCCESS) {
+ return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0,
+ info.dwBuildNumber);
+ }
+ }
+ }
+ return llvm::VersionTuple(0, 0, 0, 0);
+}
+
+bool llvm::RunningWindows8OrGreater() {
+ // Windows 8 is version 6.2, service pack 0.
+ return GetWindowsOSVersion() >= llvm::VersionTuple(6, 2, 0, 0);
+}
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index cb68c5b10e52..c037956603f2 100644
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -105,6 +105,25 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
return std::string(U8Result.begin(), U8Result.end());
}
+bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix) {
+ if (!ErrMsg)
+ return true;
+ char *buffer = NULL;
+ DWORD LastError = GetLastError();
+ DWORD R = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_MAX_WIDTH_MASK,
+ NULL, LastError, 0, (LPSTR)&buffer, 1, NULL);
+ if (R)
+ *ErrMsg = prefix + ": " + buffer;
+ else
+ *ErrMsg = prefix + ": Unknown error";
+ *ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")";
+
+ LocalFree(buffer);
+ return R != 0;
+}
+
static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
std::string *ErrMsg) {
HANDLE h;
@@ -317,7 +336,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
static bool argNeedsQuotes(StringRef Arg) {
if (Arg.empty())
return true;
- return StringRef::npos != Arg.find_first_of("\t \"&\'()*<>\\`^|");
+ return StringRef::npos != Arg.find_first_of("\t \"&\'()*<>\\`^|\n");
}
static std::string quoteSingleArg(StringRef Arg) {
diff --git a/contrib/llvm/lib/Support/Windows/Threading.inc b/contrib/llvm/lib/Support/Windows/Threading.inc
index decb48887af2..0bd92f66c6b8 100644
--- a/contrib/llvm/lib/Support/Windows/Threading.inc
+++ b/contrib/llvm/lib/Support/Windows/Threading.inc
@@ -14,7 +14,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
-#include "Windows/WindowsSupport.h"
+#include "WindowsSupport.h"
#include <process.h>
// Windows will at times define MemoryFence.
diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
index c2fd6bb982d4..979cc5d01390 100644
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@@ -41,6 +41,7 @@
#include "llvm/Config/config.h" // Get build system configuration settings
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/VersionTuple.h"
#include <cassert>
#include <string>
#include <system_error>
@@ -49,54 +50,29 @@
// Must be included after windows.h
#include <wincrypt.h>
+namespace llvm {
+
/// Determines if the program is running on Windows 8 or newer. This
/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't
/// yet have VersionHelpers.h, so we have our own helper.
-inline bool RunningWindows8OrGreater() {
- // Windows 8 is version 6.2, service pack 0.
- OSVERSIONINFOEXW osvi = {};
- osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
- osvi.dwMajorVersion = 6;
- osvi.dwMinorVersion = 2;
- osvi.wServicePackMajor = 0;
-
- DWORDLONG Mask = 0;
- Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL);
- Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL);
- Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
-
- return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION |
- VER_SERVICEPACKMAJOR,
- Mask) != FALSE;
-}
+bool RunningWindows8OrGreater();
-inline bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix) {
- if (!ErrMsg)
- return true;
- char *buffer = NULL;
- DWORD LastError = GetLastError();
- DWORD R = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
- FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_MAX_WIDTH_MASK,
- NULL, LastError, 0, (LPSTR)&buffer, 1, NULL);
- if (R)
- *ErrMsg = prefix + ": " + buffer;
- else
- *ErrMsg = prefix + ": Unknown error";
- *ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")";
-
- LocalFree(buffer);
- return R != 0;
-}
+/// Returns the Windows version as Major.Minor.0.BuildNumber. Uses
+/// RtlGetVersion or GetVersionEx under the hood depending on what is available.
+/// GetVersionEx is deprecated, but this API exposes the build number which can
+/// be useful for working around certain kernel bugs.
+llvm::VersionTuple GetWindowsOSVersion();
+
+bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix);
template <typename HandleTraits>
class ScopedHandle {
typedef typename HandleTraits::handle_type handle_type;
handle_type Handle;
- ScopedHandle(const ScopedHandle &other); // = delete;
- void operator=(const ScopedHandle &other); // = delete;
+ ScopedHandle(const ScopedHandle &other) = delete;
+ void operator=(const ScopedHandle &other) = delete;
public:
ScopedHandle()
: Handle(HandleTraits::GetInvalid()) {}
@@ -201,7 +177,6 @@ typedef ScopedHandle<RegTraits> ScopedRegHandle;
typedef ScopedHandle<FindHandleTraits> ScopedFindHandle;
typedef ScopedHandle<JobHandleTraits> ScopedJobHandle;
-namespace llvm {
template <class T>
class SmallVectorImpl;
diff --git a/contrib/llvm/lib/Support/WithColor.cpp b/contrib/llvm/lib/Support/WithColor.cpp
index d2e13f0e86de..cf4c10956f21 100644
--- a/contrib/llvm/lib/Support/WithColor.cpp
+++ b/contrib/llvm/lib/Support/WithColor.cpp
@@ -19,15 +19,10 @@ static cl::opt<cl::boolOrDefault>
cl::desc("Use colors in output (default=autodetect)"),
cl::init(cl::BOU_UNSET));
-bool WithColor::colorsEnabled(raw_ostream &OS) {
- if (UseColor == cl::BOU_UNSET)
- return OS.has_colors();
- return UseColor == cl::BOU_TRUE;
-}
-
-WithColor::WithColor(raw_ostream &OS, HighlightColor Color) : OS(OS) {
+WithColor::WithColor(raw_ostream &OS, HighlightColor Color, bool DisableColors)
+ : OS(OS), DisableColors(DisableColors) {
// Detect color from terminal type unless the user passed the --color option.
- if (colorsEnabled(OS)) {
+ if (colorsEnabled()) {
switch (Color) {
case HighlightColor::Address:
OS.changeColor(raw_ostream::YELLOW);
@@ -56,6 +51,9 @@ WithColor::WithColor(raw_ostream &OS, HighlightColor Color) : OS(OS) {
case HighlightColor::Note:
OS.changeColor(raw_ostream::BLACK, true);
break;
+ case HighlightColor::Remark:
+ OS.changeColor(raw_ostream::BLUE, true);
+ break;
}
}
}
@@ -66,25 +64,58 @@ raw_ostream &WithColor::warning() { return warning(errs()); }
raw_ostream &WithColor::note() { return note(errs()); }
-raw_ostream &WithColor::error(raw_ostream &OS, StringRef Prefix) {
+raw_ostream &WithColor::remark() { return remark(errs()); }
+
+raw_ostream &WithColor::error(raw_ostream &OS, StringRef Prefix,
+ bool DisableColors) {
if (!Prefix.empty())
OS << Prefix << ": ";
- return WithColor(OS, HighlightColor::Error).get() << "error: ";
+ return WithColor(OS, HighlightColor::Error, DisableColors).get()
+ << "error: ";
}
-raw_ostream &WithColor::warning(raw_ostream &OS, StringRef Prefix) {
+raw_ostream &WithColor::warning(raw_ostream &OS, StringRef Prefix,
+ bool DisableColors) {
if (!Prefix.empty())
OS << Prefix << ": ";
- return WithColor(OS, HighlightColor::Warning).get() << "warning: ";
+ return WithColor(OS, HighlightColor::Warning, DisableColors).get()
+ << "warning: ";
}
-raw_ostream &WithColor::note(raw_ostream &OS, StringRef Prefix) {
+raw_ostream &WithColor::note(raw_ostream &OS, StringRef Prefix,
+ bool DisableColors) {
if (!Prefix.empty())
OS << Prefix << ": ";
- return WithColor(OS, HighlightColor::Note).get() << "note: ";
+ return WithColor(OS, HighlightColor::Note, DisableColors).get() << "note: ";
}
-WithColor::~WithColor() {
- if (colorsEnabled(OS))
+raw_ostream &WithColor::remark(raw_ostream &OS, StringRef Prefix,
+ bool DisableColors) {
+ if (!Prefix.empty())
+ OS << Prefix << ": ";
+ return WithColor(OS, HighlightColor::Remark, DisableColors).get()
+ << "remark: ";
+}
+
+bool WithColor::colorsEnabled() {
+ if (DisableColors)
+ return false;
+ if (UseColor == cl::BOU_UNSET)
+ return OS.has_colors();
+ return UseColor == cl::BOU_TRUE;
+}
+
+WithColor &WithColor::changeColor(raw_ostream::Colors Color, bool Bold,
+ bool BG) {
+ if (colorsEnabled())
+ OS.changeColor(Color, Bold, BG);
+ return *this;
+}
+
+WithColor &WithColor::resetColor() {
+ if (colorsEnabled())
OS.resetColor();
+ return *this;
}
+
+WithColor::~WithColor() { resetColor(); }
diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp
index d6345efd00cd..b9bbee7883c6 100644
--- a/contrib/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm/lib/Support/YAMLTraits.cpp
@@ -98,7 +98,7 @@ bool Input::setCurrentDocument() {
++DocIterator;
return setCurrentDocument();
}
- TopNode = this->createHNodes(N);
+ TopNode = createHNodes(N);
CurrentNode = TopNode.get();
return true;
}
@@ -341,9 +341,23 @@ void Input::scalarString(StringRef &S, QuotingType) {
void Input::blockScalarString(StringRef &S) { scalarString(S, QuotingType::None); }
+void Input::scalarTag(std::string &Tag) {
+ Tag = CurrentNode->_node->getVerbatimTag();
+}
+
void Input::setError(HNode *hnode, const Twine &message) {
assert(hnode && "HNode must not be NULL");
- this->setError(hnode->_node, message);
+ setError(hnode->_node, message);
+}
+
+NodeKind Input::getNodeKind() {
+ if (isa<ScalarHNode>(CurrentNode))
+ return NodeKind::Scalar;
+ else if (isa<MapHNode>(CurrentNode))
+ return NodeKind::Map;
+ else if (isa<SequenceHNode>(CurrentNode))
+ return NodeKind::Sequence;
+ llvm_unreachable("Unsupported node kind");
}
void Input::setError(Node *node, const Twine &message) {
@@ -366,7 +380,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
} else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
auto SQHNode = llvm::make_unique<SequenceHNode>(N);
for (Node &SN : *SQ) {
- auto Entry = this->createHNodes(&SN);
+ auto Entry = createHNodes(&SN);
if (EC)
break;
SQHNode->Entries.push_back(std::move(Entry));
@@ -391,7 +405,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
// Copy string to permanent storage
KeyStr = StringStorage.str().copy(StringAllocator);
}
- auto ValueHNode = this->createHNodes(Value);
+ auto ValueHNode = createHNodes(Value);
if (EC)
break;
mapHNode->Mapping[KeyStr] = std::move(ValueHNode);
@@ -406,7 +420,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
}
void Input::setError(const Twine &Message) {
- this->setError(CurrentNode, Message);
+ setError(CurrentNode, Message);
}
bool Input::canElideEmptySequence() {
@@ -436,15 +450,17 @@ bool Output::mapTag(StringRef Tag, bool Use) {
// If this tag is being written inside a sequence we should write the start
// of the sequence before writing the tag, otherwise the tag won't be
// attached to the element in the sequence, but rather the sequence itself.
- bool SequenceElement =
- StateStack.size() > 1 && (StateStack[StateStack.size() - 2] == inSeq ||
- StateStack[StateStack.size() - 2] == inFlowSeq);
+ bool SequenceElement = false;
+ if (StateStack.size() > 1) {
+ auto &E = StateStack[StateStack.size() - 2];
+ SequenceElement = inSeqAnyElement(E) || inFlowSeqAnyElement(E);
+ }
if (SequenceElement && StateStack.back() == inMapFirstKey) {
- this->newLineCheck();
+ newLineCheck();
} else {
- this->output(" ");
+ output(" ");
}
- this->output(Tag);
+ output(Tag);
if (SequenceElement) {
// If we're writing the tag during the first element of a map, the tag
// takes the place of the first element in the sequence.
@@ -461,6 +477,9 @@ bool Output::mapTag(StringRef Tag, bool Use) {
}
void Output::endMapping() {
+ // If we did not map anything, we should explicitly emit an empty map
+ if (StateStack.back() == inMapFirstKey)
+ output("{}");
StateStack.pop_back();
}
@@ -476,8 +495,8 @@ bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
if (State == inFlowMapFirstKey || State == inFlowMapOtherKey) {
flowKey(Key);
} else {
- this->newLineCheck();
- this->paddedKey(Key);
+ newLineCheck();
+ paddedKey(Key);
}
return true;
}
@@ -496,23 +515,23 @@ void Output::postflightKey(void *) {
void Output::beginFlowMapping() {
StateStack.push_back(inFlowMapFirstKey);
- this->newLineCheck();
+ newLineCheck();
ColumnAtMapFlowStart = Column;
output("{ ");
}
void Output::endFlowMapping() {
StateStack.pop_back();
- this->outputUpToEndOfLine(" }");
+ outputUpToEndOfLine(" }");
}
void Output::beginDocuments() {
- this->outputUpToEndOfLine("---");
+ outputUpToEndOfLine("---");
}
bool Output::preflightDocument(unsigned index) {
if (index > 0)
- this->outputUpToEndOfLine("\n---");
+ outputUpToEndOfLine("\n---");
return true;
}
@@ -524,12 +543,15 @@ void Output::endDocuments() {
}
unsigned Output::beginSequence() {
- StateStack.push_back(inSeq);
+ StateStack.push_back(inSeqFirstElement);
NeedsNewLine = true;
return 0;
}
void Output::endSequence() {
+ // If we did not emit anything, we should explicitly emit an empty sequence
+ if (StateStack.back() == inSeqFirstElement)
+ output("[]");
StateStack.pop_back();
}
@@ -538,11 +560,18 @@ bool Output::preflightElement(unsigned, void *&) {
}
void Output::postflightElement(void *) {
+ if (StateStack.back() == inSeqFirstElement) {
+ StateStack.pop_back();
+ StateStack.push_back(inSeqOtherElement);
+ } else if (StateStack.back() == inFlowSeqFirstElement) {
+ StateStack.pop_back();
+ StateStack.push_back(inFlowSeqOtherElement);
+ }
}
unsigned Output::beginFlowSequence() {
- StateStack.push_back(inFlowSeq);
- this->newLineCheck();
+ StateStack.push_back(inFlowSeqFirstElement);
+ newLineCheck();
ColumnAtFlowStart = Column;
output("[ ");
NeedFlowSequenceComma = false;
@@ -551,7 +580,7 @@ unsigned Output::beginFlowSequence() {
void Output::endFlowSequence() {
StateStack.pop_back();
- this->outputUpToEndOfLine(" ]");
+ outputUpToEndOfLine(" ]");
}
bool Output::preflightFlowElement(unsigned, void *&) {
@@ -577,8 +606,8 @@ void Output::beginEnumScalar() {
bool Output::matchEnumScalar(const char *Str, bool Match) {
if (Match && !EnumerationMatchFound) {
- this->newLineCheck();
- this->outputUpToEndOfLine(Str);
+ newLineCheck();
+ outputUpToEndOfLine(Str);
EnumerationMatchFound = true;
}
return false;
@@ -597,7 +626,7 @@ void Output::endEnumScalar() {
}
bool Output::beginBitSetScalar(bool &DoClear) {
- this->newLineCheck();
+ newLineCheck();
output("[ ");
NeedBitValueComma = false;
DoClear = false;
@@ -608,27 +637,27 @@ bool Output::bitSetMatch(const char *Str, bool Matches) {
if (Matches) {
if (NeedBitValueComma)
output(", ");
- this->output(Str);
+ output(Str);
NeedBitValueComma = true;
}
return false;
}
void Output::endBitSetScalar() {
- this->outputUpToEndOfLine(" ]");
+ outputUpToEndOfLine(" ]");
}
void Output::scalarString(StringRef &S, QuotingType MustQuote) {
- this->newLineCheck();
+ newLineCheck();
if (S.empty()) {
// Print '' for the empty string because leaving the field empty is not
// allowed.
- this->outputUpToEndOfLine("''");
+ outputUpToEndOfLine("''");
return;
}
if (MustQuote == QuotingType::None) {
// Only quote if we must.
- this->outputUpToEndOfLine(S);
+ outputUpToEndOfLine(S);
return;
}
@@ -645,7 +674,7 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
// escapes. This is handled in yaml::escape.
if (MustQuote == QuotingType::Double) {
output(yaml::escape(Base, /* EscapePrintable= */ false));
- this->outputUpToEndOfLine(Quote);
+ outputUpToEndOfLine(Quote);
return;
}
@@ -659,7 +688,7 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
++j;
}
output(StringRef(&Base[i], j - i));
- this->outputUpToEndOfLine(Quote); // Ending quote.
+ outputUpToEndOfLine(Quote); // Ending quote.
}
void Output::blockScalarString(StringRef &S) {
@@ -680,6 +709,14 @@ void Output::blockScalarString(StringRef &S) {
}
}
+void Output::scalarTag(std::string &Tag) {
+ if (Tag.empty())
+ return;
+ newLineCheck();
+ output(Tag);
+ output(" ");
+}
+
void Output::setError(const Twine &message) {
}
@@ -693,7 +730,7 @@ bool Output::canElideEmptySequence() {
return true;
if (StateStack.back() != inMapFirstKey)
return true;
- return (StateStack[StateStack.size()-2] != inSeq);
+ return !inSeqAnyElement(StateStack[StateStack.size() - 2]);
}
void Output::output(StringRef s) {
@@ -702,10 +739,9 @@ void Output::output(StringRef s) {
}
void Output::outputUpToEndOfLine(StringRef s) {
- this->output(s);
- if (StateStack.empty() || (StateStack.back() != inFlowSeq &&
- StateStack.back() != inFlowMapFirstKey &&
- StateStack.back() != inFlowMapOtherKey))
+ output(s);
+ if (StateStack.empty() || (!inFlowSeqAnyElement(StateStack.back()) &&
+ !inFlowMapAnyKey(StateStack.back())))
NeedsNewLine = true;
}
@@ -723,18 +759,22 @@ void Output::newLineCheck() {
return;
NeedsNewLine = false;
- this->outputNewLine();
+ outputNewLine();
+
+ if (StateStack.size() == 0)
+ return;
- assert(StateStack.size() > 0);
unsigned Indent = StateStack.size() - 1;
bool OutputDash = false;
- if (StateStack.back() == inSeq) {
+ if (StateStack.back() == inSeqFirstElement ||
+ StateStack.back() == inSeqOtherElement) {
OutputDash = true;
- } else if ((StateStack.size() > 1) && ((StateStack.back() == inMapFirstKey) ||
- (StateStack.back() == inFlowSeq) ||
- (StateStack.back() == inFlowMapFirstKey)) &&
- (StateStack[StateStack.size() - 2] == inSeq)) {
+ } else if ((StateStack.size() > 1) &&
+ ((StateStack.back() == inMapFirstKey) ||
+ inFlowSeqAnyElement(StateStack.back()) ||
+ (StateStack.back() == inFlowMapFirstKey)) &&
+ inSeqAnyElement(StateStack[StateStack.size() - 2])) {
--Indent;
OutputDash = true;
}
@@ -772,6 +812,24 @@ void Output::flowKey(StringRef Key) {
output(": ");
}
+NodeKind Output::getNodeKind() { report_fatal_error("invalid call"); }
+
+bool Output::inSeqAnyElement(InState State) {
+ return State == inSeqFirstElement || State == inSeqOtherElement;
+}
+
+bool Output::inFlowSeqAnyElement(InState State) {
+ return State == inFlowSeqFirstElement || State == inFlowSeqOtherElement;
+}
+
+bool Output::inMapAnyKey(InState State) {
+ return State == inMapFirstKey || State == inMapOtherKey;
+}
+
+bool Output::inFlowMapAnyKey(InState State) {
+ return State == inFlowMapFirstKey || State == inFlowMapOtherKey;
+}
+
//===----------------------------------------------------------------------===//
// traits for built-in types
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index 038ad00bd608..21dde7ff914a 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -60,6 +60,7 @@
#endif
#ifdef _WIN32
+#include "llvm/Support/ConvertUTF.h"
#include "Windows/WindowsSupport.h"
#endif
@@ -567,6 +568,12 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
if (FD <= STDERR_FILENO)
ShouldClose = false;
+#ifdef _WIN32
+ // Check if this is a console device. This is not equivalent to isatty.
+ IsWindowsConsole =
+ ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR;
+#endif
+
// Get the starting position.
off_t loc = ::lseek(FD, 0, SEEK_CUR);
#ifdef _WIN32
@@ -609,25 +616,77 @@ raw_fd_ostream::~raw_fd_ostream() {
/*GenCrashDiag=*/false);
}
+#if defined(_WIN32)
+// The most reliable way to print unicode in a Windows console is with
+// WriteConsoleW. To use that, first transcode from UTF-8 to UTF-16. This
+// assumes that LLVM programs always print valid UTF-8 to the console. The data
+// might not be UTF-8 for two major reasons:
+// 1. The program is printing binary (-filetype=obj -o -), in which case it
+// would have been gibberish anyway.
+// 2. The program is printing text in a semi-ascii compatible codepage like
+// shift-jis or cp1252.
+//
+// Most LLVM programs don't produce non-ascii text unless they are quoting
+// user source input. A well-behaved LLVM program should either validate that
+// the input is UTF-8 or transcode from the local codepage to UTF-8 before
+// quoting it. If they don't, this may mess up the encoding, but this is still
+// probably the best compromise we can make.
+static bool write_console_impl(int FD, StringRef Data) {
+ SmallVector<wchar_t, 256> WideText;
+
+ // Fall back to ::write if it wasn't valid UTF-8.
+ if (auto EC = sys::windows::UTF8ToUTF16(Data, WideText))
+ return false;
+
+ // On Windows 7 and earlier, WriteConsoleW has a low maximum amount of data
+ // that can be written to the console at a time.
+ size_t MaxWriteSize = WideText.size();
+ if (!RunningWindows8OrGreater())
+ MaxWriteSize = 32767;
+
+ size_t WCharsWritten = 0;
+ do {
+ size_t WCharsToWrite =
+ std::min(MaxWriteSize, WideText.size() - WCharsWritten);
+ DWORD ActuallyWritten;
+ bool Success =
+ ::WriteConsoleW((HANDLE)::_get_osfhandle(FD), &WideText[WCharsWritten],
+ WCharsToWrite, &ActuallyWritten,
+ /*Reserved=*/nullptr);
+
+ // The most likely reason for WriteConsoleW to fail is that FD no longer
+ // points to a console. Fall back to ::write. If this isn't the first loop
+ // iteration, something is truly wrong.
+ if (!Success)
+ return false;
+
+ WCharsWritten += ActuallyWritten;
+ } while (WCharsWritten != WideText.size());
+ return true;
+}
+#endif
+
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
- // The maximum write size is limited to SSIZE_MAX because a write
- // greater than SSIZE_MAX is implementation-defined in POSIX.
- // Since SSIZE_MAX is not portable, we use SIZE_MAX >> 1 instead.
- size_t MaxWriteSize = SIZE_MAX >> 1;
+#if defined(_WIN32)
+ // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16
+ // and using WriteConsoleW. If that fails, fall back to plain write().
+ if (IsWindowsConsole)
+ if (write_console_impl(FD, StringRef(Ptr, Size)))
+ return;
+#endif
+
+ // The maximum write size is limited to INT32_MAX. A write
+ // greater than SSIZE_MAX is implementation-defined in POSIX,
+ // and Windows _write requires 32 bit input.
+ size_t MaxWriteSize = INT32_MAX;
#if defined(__linux__)
// It is observed that Linux returns EINVAL for a very large write (>2G).
// Make it a reasonably small value.
MaxWriteSize = 1024 * 1024 * 1024;
-#elif defined(_WIN32)
- // Writing a large size of output to Windows console returns ENOMEM. It seems
- // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
- // the latter has a size limit (66000 bytes or less, depending on heap usage).
- if (::_isatty(FD) && !RunningWindows8OrGreater())
- MaxWriteSize = 32767;
#endif
do {
@@ -696,8 +755,17 @@ void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
}
size_t raw_fd_ostream::preferred_buffer_size() const {
-#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
- // Windows and Minix have no st_blksize.
+#if defined(_WIN32)
+ // Disable buffering for console devices. Console output is re-encoded from
+ // UTF-8 to UTF-16 on Windows, and buffering it would require us to split the
+ // buffer on a valid UTF-8 codepoint boundary. Terminal buffering is disabled
+ // below on most other OSs, so do the same thing on Windows and avoid that
+ // complexity.
+ if (IsWindowsConsole)
+ return 0;
+ return raw_ostream::preferred_buffer_size();
+#elif !defined(__minix)
+ // Minix has no st_blksize.
assert(FD >= 0 && "File not yet open!");
struct stat statbuf;
if (fstat(FD, &statbuf) != 0)
@@ -846,3 +914,5 @@ void raw_null_ostream::pwrite_impl(const char *Ptr, size_t Size,
uint64_t Offset) {}
void raw_pwrite_stream::anchor() {}
+
+void buffer_ostream::anchor() {}
diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp
index 3a0701626089..02698416609f 100644
--- a/contrib/llvm/lib/TableGen/Main.cpp
+++ b/contrib/llvm/lib/TableGen/Main.cpp
@@ -46,6 +46,10 @@ static cl::list<std::string>
IncludeDirs("I", cl::desc("Directory of include files"),
cl::value_desc("directory"), cl::Prefix);
+static cl::list<std::string>
+MacroNames("D", cl::desc("Name of the macro to be defined"),
+ cl::value_desc("macro name"), cl::Prefix);
+
static int reportError(const char *ProgName, Twine Msg) {
errs() << ProgName << ": " << Msg;
errs().flush();
@@ -91,28 +95,44 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
// it later.
SrcMgr.setIncludeDirs(IncludeDirs);
- TGParser Parser(SrcMgr, Records);
+ TGParser Parser(SrcMgr, MacroNames, Records);
if (Parser.ParseFile())
return 1;
- std::error_code EC;
- ToolOutputFile Out(OutputFilename, EC, sys::fs::F_Text);
- if (EC)
- return reportError(argv0, "error opening " + OutputFilename + ":" +
- EC.message() + "\n");
+ // Write output to memory.
+ std::string OutString;
+ raw_string_ostream Out(OutString);
+ if (MainFn(Out, Records))
+ return 1;
+
+ // Always write the depfile, even if the main output hasn't changed.
+ // If it's missing, Ninja considers the output dirty. If this was below
+ // the early exit below and someone deleted the .inc.d file but not the .inc
+ // file, tablegen would never write the depfile.
if (!DependFilename.empty()) {
if (int Ret = createDependencyFile(Parser, argv0))
return Ret;
}
- if (MainFn(Out.os(), Records))
- return 1;
+ // Only updates the real output file if there are any differences.
+ // This prevents recompilation of all the files depending on it if there
+ // aren't any.
+ if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename))
+ if (std::move(ExistingOrErr.get())->getBuffer() == Out.str())
+ return 0;
+
+ std::error_code EC;
+ ToolOutputFile OutFile(OutputFilename, EC, sys::fs::F_Text);
+ if (EC)
+ return reportError(argv0, "error opening " + OutputFilename + ":" +
+ EC.message() + "\n");
+ OutFile.os() << Out.str();
if (ErrorsPrinted > 0)
return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n");
// Declare success.
- Out.keep();
+ OutFile.keep();
return 0;
}
diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp
index 43d178caef30..cf1685a2e8c2 100644
--- a/contrib/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm/lib/TableGen/Record.cpp
@@ -158,10 +158,9 @@ RecordRecTy *RecordRecTy::get(ArrayRef<Record *> UnsortedClasses) {
SmallVector<Record *, 4> Classes(UnsortedClasses.begin(),
UnsortedClasses.end());
- llvm::sort(Classes.begin(), Classes.end(),
- [](Record *LHS, Record *RHS) {
- return LHS->getNameInitAsString() < RHS->getNameInitAsString();
- });
+ llvm::sort(Classes, [](Record *LHS, Record *RHS) {
+ return LHS->getNameInitAsString() < RHS->getNameInitAsString();
+ });
FoldingSetNodeID ID;
ProfileRecordRecTy(ID, Classes);
@@ -487,7 +486,7 @@ Init *IntInit::convertInitializerTo(RecTy *Ty) const {
SmallVector<Init *, 16> NewBits(BRT->getNumBits());
for (unsigned i = 0; i != BRT->getNumBits(); ++i)
- NewBits[i] = BitInit::get(Value & (1LL << i));
+ NewBits[i] = BitInit::get(Value & ((i < 64) ? (1LL << i) : 0));
return BitsInit::get(NewBits);
}
@@ -710,6 +709,8 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
return StringInit::get(LHSi->getAsString());
} else if (isa<RecordRecTy>(getType())) {
if (StringInit *Name = dyn_cast<StringInit>(LHS)) {
+ if (!CurRec && !IsFinal)
+ break;
assert(CurRec && "NULL pointer");
Record *D;
diff --git a/contrib/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm/lib/TableGen/TGLexer.cpp
index 652be6e8dbbf..16aeee561075 100644
--- a/contrib/llvm/lib/TableGen/TGLexer.cpp
+++ b/contrib/llvm/lib/TableGen/TGLexer.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/TableGen/Error.h"
+#include <algorithm>
#include <cctype>
#include <cerrno>
#include <cstdint>
@@ -28,11 +29,35 @@
using namespace llvm;
-TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
+namespace {
+// A list of supported preprocessing directives with their
+// internal token kinds and names.
+struct {
+ tgtok::TokKind Kind;
+ const char *Word;
+} PreprocessorDirs[] = {
+ { tgtok::Ifdef, "ifdef" },
+ { tgtok::Else, "else" },
+ { tgtok::Endif, "endif" },
+ { tgtok::Define, "define" }
+};
+} // end anonymous namespace
+
+TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
CurBuffer = SrcMgr.getMainFileID();
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
CurPtr = CurBuf.begin();
TokStart = nullptr;
+
+ // Pretend that we enter the "top-level" include file.
+ PrepIncludeStack.push_back(
+ make_unique<std::vector<PreprocessorControlDesc>>());
+
+ // Put all macros defined in the command line into the DefinedMacros set.
+ std::for_each(Macros.begin(), Macros.end(),
+ [this](const std::string &MacroName) {
+ DefinedMacros.insert(MacroName);
+ });
}
SMLoc TGLexer::getLoc() const {
@@ -41,11 +66,42 @@ SMLoc TGLexer::getLoc() const {
/// ReturnError - Set the error to the specified string at the specified
/// location. This is defined to always return tgtok::Error.
-tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
+tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {
PrintError(Loc, Msg);
return tgtok::Error;
}
+tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
+ return ReturnError(SMLoc::getFromPointer(Loc), Msg);
+}
+
+bool TGLexer::processEOF() {
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc != SMLoc()) {
+ // If prepExitInclude() detects a problem with the preprocessing
+ // control stack, it will return false. Pretend that we reached
+ // the final EOF and stop lexing more tokens by returning false
+ // to LexToken().
+ if (!prepExitInclude(false))
+ return false;
+
+ CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
+ CurPtr = ParentIncludeLoc.getPointer();
+ // Make sure TokStart points into the parent file's buffer.
+ // LexToken() assigns to it before calling getNextChar(),
+ // so it is pointing into the included file now.
+ TokStart = CurPtr;
+ return true;
+ }
+
+ // Pretend that we exit the "top-level" include file.
+ // Note that in case of an error (e.g. control stack imbalance)
+ // the routine will issue a fatal error.
+ prepExitInclude(true);
+ return false;
+}
+
int TGLexer::getNextChar() {
char CurChar = *CurPtr++;
switch (CurChar) {
@@ -57,16 +113,6 @@ int TGLexer::getNextChar() {
if (CurPtr-1 != CurBuf.end())
return 0; // Just whitespace.
- // If this is the end of an included file, pop the parent file off the
- // include stack.
- SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
- if (ParentIncludeLoc != SMLoc()) {
- CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
- CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
- CurPtr = ParentIncludeLoc.getPointer();
- return getNextChar();
- }
-
// Otherwise, return end of file.
--CurPtr; // Another call to lex will return EOF again.
return EOF;
@@ -83,11 +129,11 @@ int TGLexer::getNextChar() {
}
}
-int TGLexer::peekNextChar(int Index) {
+int TGLexer::peekNextChar(int Index) const {
return *(CurPtr + Index);
}
-tgtok::TokKind TGLexer::LexToken() {
+tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
TokStart = CurPtr;
// This always consumes at least one character.
int CurChar = getNextChar();
@@ -100,7 +146,18 @@ tgtok::TokKind TGLexer::LexToken() {
// Unknown character, emit an error.
return ReturnError(TokStart, "Unexpected character");
- case EOF: return tgtok::Eof;
+ case EOF:
+ // Lex next token, if we just left an include file.
+ // Note that leaving an include file means that the next
+ // symbol is located at the end of 'include "..."'
+ // construct, so LexToken() is called with default
+ // false parameter.
+ if (processEOF())
+ return LexToken();
+
+ // Return EOF denoting the end of lexing.
+ return tgtok::Eof;
+
case ':': return tgtok::colon;
case ';': return tgtok::semi;
case '.': return tgtok::period;
@@ -114,15 +171,27 @@ tgtok::TokKind TGLexer::LexToken() {
case ')': return tgtok::r_paren;
case '=': return tgtok::equal;
case '?': return tgtok::question;
- case '#': return tgtok::paste;
+ case '#':
+ if (FileOrLineStart) {
+ tgtok::TokKind Kind = prepIsDirective();
+ if (Kind != tgtok::Error)
+ return lexPreprocessor(Kind);
+ }
+
+ return tgtok::paste;
+
+ case '\r':
+ PrintFatalError("getNextChar() must never return '\r'");
+ return tgtok::Error;
case 0:
case ' ':
case '\t':
- case '\n':
- case '\r':
// Ignore whitespace.
- return LexToken();
+ return LexToken(FileOrLineStart);
+ case '\n':
+ // Ignore whitespace, and identify the new line.
+ return LexToken(true);
case '/':
// If this is the start of a // comment, skip until the end of the line or
// the end of the buffer.
@@ -133,7 +202,7 @@ tgtok::TokKind TGLexer::LexToken() {
return tgtok::Error;
} else // Otherwise, this is an error.
return ReturnError(TokStart, "Unexpected character");
- return LexToken();
+ return LexToken(FileOrLineStart);
case '-': case '+':
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9': {
@@ -249,10 +318,10 @@ tgtok::TokKind TGLexer::LexVarName() {
}
tgtok::TokKind TGLexer::LexIdentifier() {
- // The first letter is [a-zA-Z_#].
+ // The first letter is [a-zA-Z_].
const char *IdentStart = TokStart;
- // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
+ // Match the rest of the identifier regex: [0-9a-zA-Z_]*
while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
++CurPtr;
@@ -322,6 +391,9 @@ bool TGLexer::LexInclude() {
// Save the line number and lex buffer of the includer.
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
CurPtr = CurBuf.begin();
+
+ PrepIncludeStack.push_back(
+ make_unique<std::vector<PreprocessorControlDesc>>());
return false;
}
@@ -496,3 +568,444 @@ tgtok::TokKind TGLexer::LexExclaim() {
return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
}
+
+bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
+ // Report an error, if preprocessor control stack for the current
+ // file is not empty.
+ if (!PrepIncludeStack.back()->empty()) {
+ prepReportPreprocessorStackError();
+
+ return false;
+ }
+
+ // Pop the preprocessing controls from the include stack.
+ if (PrepIncludeStack.empty()) {
+ PrintFatalError("Preprocessor include stack is empty");
+ }
+
+ PrepIncludeStack.pop_back();
+
+ if (IncludeStackMustBeEmpty) {
+ if (!PrepIncludeStack.empty())
+ PrintFatalError("Preprocessor include stack is not empty");
+ } else {
+ if (PrepIncludeStack.empty())
+ PrintFatalError("Preprocessor include stack is empty");
+ }
+
+ return true;
+}
+
+tgtok::TokKind TGLexer::prepIsDirective() const {
+ for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) {
+ int NextChar = *CurPtr;
+ bool Match = true;
+ unsigned I = 0;
+ for (; I < strlen(PreprocessorDirs[ID].Word); ++I) {
+ if (NextChar != PreprocessorDirs[ID].Word[I]) {
+ Match = false;
+ break;
+ }
+
+ NextChar = peekNextChar(I + 1);
+ }
+
+ // Check for whitespace after the directive. If there is no whitespace,
+ // then we do not recognize it as a preprocessing directive.
+ if (Match) {
+ tgtok::TokKind Kind = PreprocessorDirs[ID].Kind;
+
+ // New line and EOF may follow only #else/#endif. It will be reported
+ // as an error for #ifdef/#define after the call to prepLexMacroName().
+ if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||
+ NextChar == '\n' ||
+ // It looks like TableGen does not support '\r' as the actual
+ // carriage return, e.g. getNextChar() treats a single '\r'
+ // as '\n'. So we do the same here.
+ NextChar == '\r')
+ return Kind;
+
+ // Allow comments after some directives, e.g.:
+ // #else// OR #else/**/
+ // #endif// OR #endif/**/
+ //
+ // Note that we do allow comments after #ifdef/#define here, e.g.
+ // #ifdef/**/ AND #ifdef//
+ // #define/**/ AND #define//
+ //
+ // These cases will be reported as incorrect after calling
+ // prepLexMacroName(). We could have supported C-style comments
+ // after #ifdef/#define, but this would complicate the code
+ // for little benefit.
+ if (NextChar == '/') {
+ NextChar = peekNextChar(I + 1);
+
+ if (NextChar == '*' || NextChar == '/')
+ return Kind;
+
+ // Pretend that we do not recognize the directive.
+ }
+ }
+ }
+
+ return tgtok::Error;
+}
+
+bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
+ TokStart = CurPtr;
+
+ for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID)
+ if (PreprocessorDirs[ID].Kind == Kind) {
+ // Advance CurPtr to the end of the preprocessing word.
+ CurPtr += strlen(PreprocessorDirs[ID].Word);
+ return true;
+ }
+
+ PrintFatalError("Unsupported preprocessing token in "
+ "prepEatPreprocessorDirective()");
+ return false;
+}
+
+tgtok::TokKind TGLexer::lexPreprocessor(
+ tgtok::TokKind Kind, bool ReturnNextLiveToken) {
+
+ // We must be looking at a preprocessing directive. Eat it!
+ if (!prepEatPreprocessorDirective(Kind))
+ PrintFatalError("lexPreprocessor() called for unknown "
+ "preprocessor directive");
+
+ if (Kind == tgtok::Ifdef) {
+ StringRef MacroName = prepLexMacroName();
+ if (MacroName.empty())
+ return ReturnError(TokStart, "Expected macro name after #ifdef");
+
+ bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
+
+ // Regardless of whether we are processing tokens or not,
+ // we put the #ifdef control on stack.
+ PrepIncludeStack.back()->push_back(
+ {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
+
+ if (!prepSkipDirectiveEnd())
+ return ReturnError(CurPtr,
+ "Only comments are supported after #ifdef NAME");
+
+ // If we were not processing tokens before this #ifdef,
+ // then just return back to the lines skipping code.
+ if (!ReturnNextLiveToken)
+ return Kind;
+
+ // If we were processing tokens before this #ifdef,
+ // and the macro is defined, then just return the next token.
+ if (MacroIsDefined)
+ return LexToken();
+
+ // We were processing tokens before this #ifdef, and the macro
+ // is not defined, so we have to start skipping the lines.
+ // If the skipping is successful, it will return the token following
+ // either #else or #endif corresponding to this #ifdef.
+ if (prepSkipRegion(ReturnNextLiveToken))
+ return LexToken();
+
+ return tgtok::Error;
+ } else if (Kind == tgtok::Else) {
+ // Check if this #else is correct before calling prepSkipDirectiveEnd(),
+ // which will move CurPtr away from the beginning of #else.
+ if (PrepIncludeStack.back()->empty())
+ return ReturnError(TokStart, "#else without #ifdef");
+
+ PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
+
+ if (IfdefEntry.Kind != tgtok::Ifdef) {
+ PrintError(TokStart, "double #else");
+ return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");
+ }
+
+ // Replace the corresponding #ifdef's control with its negation
+ // on the control stack.
+ PrepIncludeStack.back()->pop_back();
+ PrepIncludeStack.back()->push_back(
+ {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});
+
+ if (!prepSkipDirectiveEnd())
+ return ReturnError(CurPtr, "Only comments are supported after #else");
+
+ // If we were processing tokens before this #else,
+ // we have to start skipping lines until the matching #endif.
+ if (ReturnNextLiveToken) {
+ if (prepSkipRegion(ReturnNextLiveToken))
+ return LexToken();
+
+ return tgtok::Error;
+ }
+
+ // Return to the lines skipping code.
+ return Kind;
+ } else if (Kind == tgtok::Endif) {
+ // Check if this #endif is correct before calling prepSkipDirectiveEnd(),
+ // which will move CurPtr away from the beginning of #endif.
+ if (PrepIncludeStack.back()->empty())
+ return ReturnError(TokStart, "#endif without #ifdef");
+
+ auto &IfdefOrElseEntry = PrepIncludeStack.back()->back();
+
+ if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&
+ IfdefOrElseEntry.Kind != tgtok::Else) {
+ PrintFatalError("Invalid preprocessor control on the stack");
+ return tgtok::Error;
+ }
+
+ if (!prepSkipDirectiveEnd())
+ return ReturnError(CurPtr, "Only comments are supported after #endif");
+
+ PrepIncludeStack.back()->pop_back();
+
+ // If we were processing tokens before this #endif, then
+ // we should continue it.
+ if (ReturnNextLiveToken) {
+ return LexToken();
+ }
+
+ // Return to the lines skipping code.
+ return Kind;
+ } else if (Kind == tgtok::Define) {
+ StringRef MacroName = prepLexMacroName();
+ if (MacroName.empty())
+ return ReturnError(TokStart, "Expected macro name after #define");
+
+ if (!DefinedMacros.insert(MacroName).second)
+ PrintWarning(getLoc(),
+ "Duplicate definition of macro: " + Twine(MacroName));
+
+ if (!prepSkipDirectiveEnd())
+ return ReturnError(CurPtr,
+ "Only comments are supported after #define NAME");
+
+ if (!ReturnNextLiveToken) {
+ PrintFatalError("#define must be ignored during the lines skipping");
+ return tgtok::Error;
+ }
+
+ return LexToken();
+ }
+
+ PrintFatalError("Preprocessing directive is not supported");
+ return tgtok::Error;
+}
+
+bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
+ if (!MustNeverBeFalse)
+ PrintFatalError("Invalid recursion.");
+
+ do {
+ // Skip all symbols to the line end.
+ prepSkipToLineEnd();
+
+ // Find the first non-whitespace symbol in the next line(s).
+ if (!prepSkipLineBegin())
+ return false;
+
+ // If the first non-blank/comment symbol on the line is '#',
+ // it may be a start of preprocessing directive.
+ //
+ // If it is not '#' just go to the next line.
+ if (*CurPtr == '#')
+ ++CurPtr;
+ else
+ continue;
+
+ tgtok::TokKind Kind = prepIsDirective();
+
+ // If we did not find a preprocessing directive or it is #define,
+ // then just skip to the next line. We do not have to do anything
+ // for #define in the line-skipping mode.
+ if (Kind == tgtok::Error || Kind == tgtok::Define)
+ continue;
+
+ tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
+
+ // If lexPreprocessor() encountered an error during lexing this
+ // preprocessor idiom, then return false to the calling lexPreprocessor().
+ // This will force tgtok::Error to be returned to the tokens processing.
+ if (ProcessedKind == tgtok::Error)
+ return false;
+
+ if (Kind != ProcessedKind)
+ PrintFatalError("prepIsDirective() and lexPreprocessor() "
+ "returned different token kinds");
+
+ // If this preprocessing directive enables tokens processing,
+ // then return to the lexPreprocessor() and get to the next token.
+ // We can move from line-skipping mode to processing tokens only
+ // due to #else or #endif.
+ if (prepIsProcessingEnabled()) {
+ if (Kind != tgtok::Else && Kind != tgtok::Endif) {
+ PrintFatalError("Tokens processing was enabled by an unexpected "
+ "preprocessing directive");
+ return false;
+ }
+
+ return true;
+ }
+ } while (CurPtr != CurBuf.end());
+
+ // We have reached the end of the file, but never left the lines-skipping
+ // mode. This means there is no matching #endif.
+ prepReportPreprocessorStackError();
+ return false;
+}
+
+StringRef TGLexer::prepLexMacroName() {
+ // Skip whitespaces between the preprocessing directive and the macro name.
+ while (*CurPtr == ' ' || *CurPtr == '\t')
+ ++CurPtr;
+
+ TokStart = CurPtr;
+ // Macro names start with [a-zA-Z_].
+ if (*CurPtr != '_' && !isalpha(*CurPtr))
+ return "";
+
+ // Match the rest of the identifier regex: [0-9a-zA-Z_]*
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+ ++CurPtr;
+
+ return StringRef(TokStart, CurPtr - TokStart);
+}
+
+bool TGLexer::prepSkipLineBegin() {
+ while (CurPtr != CurBuf.end()) {
+ switch (*CurPtr) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ break;
+
+ case '/': {
+ int NextChar = peekNextChar(1);
+ if (NextChar == '*') {
+ // Skip C-style comment.
+ // Note that we do not care about skipping the C++-style comments.
+ // If the line contains "//", it may not contain any processable
+ // preprocessing directive. Just return CurPtr pointing to
+ // the first '/' in this case. We also do not care about
+ // incorrect symbols after the first '/' - we are in lines-skipping
+ // mode, so incorrect code is allowed to some extent.
+
+ // Set TokStart to the beginning of the comment to enable proper
+ // diagnostic printing in case of error in SkipCComment().
+ TokStart = CurPtr;
+
+ // CurPtr must point to '*' before call to SkipCComment().
+ ++CurPtr;
+ if (SkipCComment())
+ return false;
+ } else {
+ // CurPtr points to the non-whitespace '/'.
+ return true;
+ }
+
+ // We must not increment CurPtr after the comment was lexed.
+ continue;
+ }
+
+ default:
+ return true;
+ }
+
+ ++CurPtr;
+ }
+
+ // We have reached the end of the file. Return to the lines skipping
+ // code, and allow it to handle the EOF as needed.
+ return true;
+}
+
+bool TGLexer::prepSkipDirectiveEnd() {
+ while (CurPtr != CurBuf.end()) {
+ switch (*CurPtr) {
+ case ' ':
+ case '\t':
+ break;
+
+ case '\n':
+ case '\r':
+ return true;
+
+ case '/': {
+ int NextChar = peekNextChar(1);
+ if (NextChar == '/') {
+ // Skip C++-style comment.
+ // We may just return true now, but let's skip to the line/buffer end
+ // to simplify the method specification.
+ ++CurPtr;
+ SkipBCPLComment();
+ } else if (NextChar == '*') {
+ // When we are skipping C-style comment at the end of a preprocessing
+ // directive, we can skip several lines. If any meaningful TD token
+ // follows the end of the C-style comment on the same line, it will
+ // be considered as an invalid usage of TD token.
+ // For example, we want to forbid usages like this one:
+ // #define MACRO class Class {}
+ // But with C-style comments we also disallow the following:
+ // #define MACRO /* This macro is used
+ // to ... */ class Class {}
+ // One can argue that this should be allowed, but it does not seem
+ // to be worth of the complication. Moreover, this matches
+ // the C preprocessor behavior.
+
+ // Set TokStart to the beginning of the comment to enable proper
+ // diagnostic printer in case of error in SkipCComment().
+ TokStart = CurPtr;
+ ++CurPtr;
+ if (SkipCComment())
+ return false;
+ } else {
+ TokStart = CurPtr;
+ PrintError(CurPtr, "Unexpected character");
+ return false;
+ }
+
+ // We must not increment CurPtr after the comment was lexed.
+ continue;
+ }
+
+ default:
+ // Do not allow any non-whitespaces after the directive.
+ TokStart = CurPtr;
+ return false;
+ }
+
+ ++CurPtr;
+ }
+
+ return true;
+}
+
+void TGLexer::prepSkipToLineEnd() {
+ while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end())
+ ++CurPtr;
+}
+
+bool TGLexer::prepIsProcessingEnabled() {
+ for (auto I = PrepIncludeStack.back()->rbegin(),
+ E = PrepIncludeStack.back()->rend();
+ I != E; ++I) {
+ if (!I->IsDefined)
+ return false;
+ }
+
+ return true;
+}
+
+void TGLexer::prepReportPreprocessorStackError() {
+ if (PrepIncludeStack.back()->empty())
+ PrintFatalError("prepReportPreprocessorStackError() called with "
+ "empty control stack");
+
+ auto &PrepControl = PrepIncludeStack.back()->back();
+ PrintError(CurBuf.end(), "Reached EOF without matching #endif");
+ PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");
+
+ TokStart = CurPtr;
+}
diff --git a/contrib/llvm/lib/TableGen/TGLexer.h b/contrib/llvm/lib/TableGen/TGLexer.h
index 2c80743e3a68..e9980b36b97b 100644
--- a/contrib/llvm/lib/TableGen/TGLexer.h
+++ b/contrib/llvm/lib/TableGen/TGLexer.h
@@ -14,11 +14,14 @@
#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
#define LLVM_LIB_TABLEGEN_TGLEXER_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/SMLoc.h"
#include <cassert>
#include <map>
+#include <memory>
#include <string>
namespace llvm {
@@ -59,7 +62,11 @@ namespace tgtok {
BinaryIntVal,
// String valued tokens.
- Id, StrVal, VarName, CodeFragment
+ Id, StrVal, VarName, CodeFragment,
+
+ // Preprocessing tokens for internal usage by the lexer.
+ // They are never returned as a result of Lex().
+ Ifdef, Else, Endif, Define
};
}
@@ -87,10 +94,10 @@ private:
DependenciesMapTy Dependencies;
public:
- TGLexer(SourceMgr &SrcMgr);
+ TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
tgtok::TokKind Lex() {
- return CurCode = LexToken();
+ return CurCode = LexToken(CurPtr == CurBuf.begin());
}
const DependenciesMapTy &getDependencies() const {
@@ -119,12 +126,13 @@ public:
private:
/// LexToken - Read the next token and return its code.
- tgtok::TokKind LexToken();
+ tgtok::TokKind LexToken(bool FileOrLineStart = false);
+ tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
int getNextChar();
- int peekNextChar(int Index);
+ int peekNextChar(int Index) const;
void SkipBCPLComment();
bool SkipCComment();
tgtok::TokKind LexIdentifier();
@@ -134,6 +142,231 @@ private:
tgtok::TokKind LexNumber();
tgtok::TokKind LexBracket();
tgtok::TokKind LexExclaim();
+
+ // Process EOF encountered in LexToken().
+ // If EOF is met in an include file, then the method will update
+ // CurPtr, CurBuf and preprocessing include stack, and return true.
+ // If EOF is met in the top-level file, then the method will
+ // update and check the preprocessing include stack, and return false.
+ bool processEOF();
+
+ // *** Structures and methods for preprocessing support ***
+
+ // A set of macro names that are defined either via command line or
+ // by using:
+ // #define NAME
+ StringSet<> DefinedMacros;
+
+ // Each of #ifdef and #else directives has a descriptor associated
+ // with it.
+ //
+ // An ordered list of preprocessing controls defined by #ifdef/#else
+ // directives that are in effect currently is called preprocessing
+ // control stack. It is represented as a vector of PreprocessorControlDesc's.
+ //
+ // The control stack is updated according to the following rules:
+ //
+ // For each #ifdef we add an element to the control stack.
+ // For each #else we replace the top element with a descriptor
+ // with an inverted IsDefined value.
+ // For each #endif we pop the top element from the control stack.
+ //
+ // When CurPtr reaches the current buffer's end, the control stack
+ // must be empty, i.e. #ifdef and the corresponding #endif
+ // must be located in the same file.
+ struct PreprocessorControlDesc {
+ // Either tgtok::Ifdef or tgtok::Else.
+ tgtok::TokKind Kind;
+
+ // True, if the condition for this directive is true, false - otherwise.
+ // Examples:
+ // #ifdef NAME : true, if NAME is defined, false - otherwise.
+ // ...
+ // #else : false, if NAME is defined, true - otherwise.
+ bool IsDefined;
+
+ // Pointer into CurBuf to the beginning of the preprocessing directive
+ // word, e.g.:
+ // #ifdef NAME
+ // ^ - SrcPos
+ SMLoc SrcPos;
+ };
+
+ // We want to disallow code like this:
+ // file1.td:
+ // #define NAME
+ // #ifdef NAME
+ // include "file2.td"
+ // EOF
+ // file2.td:
+ // #endif
+ // EOF
+ //
+ // To do this, we clear the preprocessing control stack on entry
+ // to each of the included file. PrepIncludeStack is used to store
+ // preprocessing control stacks for the current file and all its
+ // parent files. The back() element is the preprocessing control
+ // stack for the current file.
+ std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
+ PrepIncludeStack;
+
+ // Validate that the current preprocessing control stack is empty,
+ // since we are about to exit a file, and pop the include stack.
+ //
+ // If IncludeStackMustBeEmpty is true, the include stack must be empty
+ // after the popping, otherwise, the include stack must not be empty
+ // after the popping. Basically, the include stack must be empty
+ // only if we exit the "top-level" file (i.e. finish lexing).
+ //
+ // The method returns false, if the current preprocessing control stack
+ // is not empty (e.g. there is an unterminated #ifdef/#else),
+ // true - otherwise.
+ bool prepExitInclude(bool IncludeStackMustBeEmpty);
+
+ // Look ahead for a preprocessing directive starting from CurPtr. The caller
+ // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
+ // a preprocessing directive word followed by a whitespace, then it returns
+ // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
+ //
+ // CurPtr is not adjusted by this method.
+ tgtok::TokKind prepIsDirective() const;
+
+ // Given a preprocessing token kind, adjusts CurPtr to the end
+ // of the preprocessing directive word. Returns true, unless
+ // an unsupported token kind is passed in.
+ //
+ // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
+ // to avoid adjusting CurPtr before we are sure that '#' is followed
+ // by a preprocessing directive. If it is not, then we fall back to
+ // tgtok::paste interpretation of '#'.
+ bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
+
+ // The main "exit" point from the token parsing to preprocessor.
+ //
+ // The method is called for CurPtr, when prepIsDirective() returns
+ // true. The first parameter matches the result of prepIsDirective(),
+ // denoting the actual preprocessor directive to be processed.
+ //
+ // If the preprocessing directive disables the tokens processing, e.g.:
+ // #ifdef NAME // NAME is undefined
+ // then lexPreprocessor() enters the lines-skipping mode.
+ // In this mode, it does not parse any tokens, because the code under
+ // the #ifdef may not even be a correct tablegen code. The preprocessor
+ // looks for lines containing other preprocessing directives, which
+ // may be prepended with whitespaces and C-style comments. If the line
+ // does not contain a preprocessing directive, it is skipped completely.
+ // Otherwise, the preprocessing directive is processed by recursively
+ // calling lexPreprocessor(). The processing of the encountered
+ // preprocessing directives includes updating preprocessing control stack
+ // and adding new macros into DefinedMacros set.
+ //
+ // The second parameter controls whether lexPreprocessor() is called from
+ // LexToken() (true) or recursively from lexPreprocessor() (false).
+ //
+ // If ReturnNextLiveToken is true, the method returns the next
+ // LEX token following the current directive or following the end
+ // of the disabled preprocessing region corresponding to this directive.
+ // If ReturnNextLiveToken is false, the method returns the first parameter,
+ // unless there were errors encountered in the disabled preprocessing
+ // region - in this case, it returns tgtok::Error.
+ tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
+ bool ReturnNextLiveToken = true);
+
+ // Worker method for lexPreprocessor() to skip lines after some
+ // preprocessing directive up to the buffer end or to the directive
+ // that re-enables token processing. The method returns true
+ // upon processing the next directive that re-enables tokens
+ // processing. False is returned if an error was encountered.
+ //
+ // Note that prepSkipRegion() calls lexPreprocessor() to process
+ // encountered preprocessing directives. In this case, the second
+ // parameter to lexPreprocessor() is set to false. Being passed
+ // false ReturnNextLiveToken, lexPreprocessor() must never call
+ // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
+ // to prepSkipRegion() and checking that it is never set to false.
+ bool prepSkipRegion(bool MustNeverBeFalse);
+
+ // Lex name of the macro after either #ifdef or #define. We could have used
+ // LexIdentifier(), but it has special handling of "include" word, which
+ // could result in awkward diagnostic errors. Consider:
+ // ----
+ // #ifdef include
+ // class ...
+ // ----
+ // LexIdentifier() will engage LexInclude(), which will complain about
+ // missing file with name "class". Instead, prepLexMacroName() will treat
+ // "include" as a normal macro name.
+ //
+ // On entry, CurPtr points to the end of a preprocessing directive word.
+ // The method allows for whitespaces between the preprocessing directive
+ // and the macro name. The allowed whitespaces are ' ' and '\t'.
+ //
+ // If the first non-whitespace symbol after the preprocessing directive
+ // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
+ // the method updates TokStart to the position of the first non-whitespace
+ // symbol, sets CurPtr to the position of the macro name's last symbol,
+ // and returns a string reference to the macro name. Otherwise,
+ // TokStart is set to the first non-whitespace symbol after the preprocessing
+ // directive, and the method returns an empty string reference.
+ //
+ // In all cases, TokStart may be used to point to the word following
+ // the preprocessing directive.
+ StringRef prepLexMacroName();
+
+ // Skip any whitespaces starting from CurPtr. The method is used
+ // only in the lines-skipping mode to find the first non-whitespace
+ // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
+ // and '\r'. The method skips C-style comments as well, because
+ // it is used to find the beginning of the preprocessing directive.
+ // If we do not handle C-style comments the following code would
+ // result in incorrect detection of a preprocessing directive:
+ // /*
+ // #ifdef NAME
+ // */
+ // As long as we skip C-style comments, the following code is correctly
+ // recognized as a preprocessing directive:
+ // /* first line comment
+ // second line comment */ #ifdef NAME
+ //
+ // The method returns true upon reaching the first non-whitespace symbol
+ // or EOF, CurPtr is set to point to this symbol. The method returns false,
+ // if an error occured during skipping of a C-style comment.
+ bool prepSkipLineBegin();
+
+ // Skip any whitespaces or comments after a preprocessing directive.
+ // The method returns true upon reaching either end of the line
+ // or end of the file. If there is a multiline C-style comment
+ // after the preprocessing directive, the method skips
+ // the comment, so the final CurPtr may point to one of the next lines.
+ // The method returns false, if an error occured during skipping
+ // C- or C++-style comment, or a non-whitespace symbol appears
+ // after the preprocessing directive.
+ //
+ // The method maybe called both during lines-skipping and tokens
+ // processing. It actually verifies that only whitespaces or/and
+ // comments follow a preprocessing directive.
+ //
+ // After the execution of this mehod, CurPtr points either to new line
+ // symbol, buffer end or non-whitespace symbol following the preprocesing
+ // directive.
+ bool prepSkipDirectiveEnd();
+
+ // Skip all symbols to the end of the line/file.
+ // The method adjusts CurPtr, so that it points to either new line
+ // symbol in the current line or the buffer end.
+ void prepSkipToLineEnd();
+
+ // Return true, if the current preprocessor control stack is such that
+ // we should allow lexer to process the next token, false - otherwise.
+ //
+ // In particular, the method returns true, if all the #ifdef/#else
+ // controls on the stack have their IsDefined member set to true.
+ bool prepIsProcessingEnabled();
+
+ // Report an error, if we reach EOF with non-empty preprocessing control
+ // stack. This means there is no matching #endif for the previous
+ // #ifdef/#else.
+ void prepReportPreprocessorStackError();
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h
index 0a28b3a03aa1..e3849043513b 100644
--- a/contrib/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm/lib/TableGen/TGParser.h
@@ -115,8 +115,9 @@ class TGParser {
};
public:
- TGParser(SourceMgr &SrcMgr, RecordKeeper &records)
- : Lex(SrcMgr), CurMultiClass(nullptr), Records(records) {}
+ TGParser(SourceMgr &SrcMgr, ArrayRef<std::string> Macros,
+ RecordKeeper &records)
+ : Lex(SrcMgr, Macros), CurMultiClass(nullptr), Records(records) {}
/// ParseFile - Main entrypoint for parsing a tblgen file. These parser
/// routines return true on error, or false on success.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h
index edda13ce97ef..c36d9354f3ba 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.h
@@ -32,12 +32,14 @@ class MachineFunctionPass;
FunctionPass *createAArch64DeadRegisterDefinitions();
FunctionPass *createAArch64RedundantCopyEliminationPass();
FunctionPass *createAArch64CondBrTuning();
+FunctionPass *createAArch64CompressJumpTablesPass();
FunctionPass *createAArch64ConditionalCompares();
FunctionPass *createAArch64AdvSIMDScalar();
FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
+FunctionPass *createAArch64SpeculationHardeningPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
FunctionPass *createAArch64SIMDInstrOptPass();
ModulePass *createAArch64PromoteConstantPass();
@@ -46,6 +48,7 @@ FunctionPass *createAArch64A57FPLoadBalancing();
FunctionPass *createAArch64A53Fix835769();
FunctionPass *createFalkorHWPFFixPass();
FunctionPass *createFalkorMarkStridedAccessesPass();
+FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
@@ -53,18 +56,23 @@ FunctionPass *createAArch64CollectLOHPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
+FunctionPass *createAArch64PreLegalizeCombiner();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
+void initializeAArch64BranchTargetsPass(PassRegistry&);
void initializeAArch64CollectLOHPass(PassRegistry&);
void initializeAArch64CondBrTuningPass(PassRegistry &);
+void initializeAArch64CompressJumpTablesPass(PassRegistry&);
void initializeAArch64ConditionalComparesPass(PassRegistry&);
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry&);
+void initializeAArch64SpeculationHardeningPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
+void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index a69d38144c78..8f79140cba64 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -65,25 +65,56 @@ def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
"Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
+def FeaturePAN : SubtargetFeature<
+ "pan", "HasPAN", "true",
+ "Enables ARM v8.1 Privileged Access-Never extension">;
+
+def FeatureLOR : SubtargetFeature<
+ "lor", "HasLOR", "true",
+ "Enables ARM v8.1 Limited Ordering Regions extension">;
+
+def FeatureVH : SubtargetFeature<
+ "vh", "HasVH", "true",
+ "Enables ARM v8.1 Virtual Host extension">;
+
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable ARMv8 PMUv3 Performance Monitors extension">;
def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"Full FP16", [FeatureFPARMv8]>;
+def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+ "Enable FP16 FML instructions", [FeatureFullFP16]>;
+
def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
"Enable Statistical Profiling extension">;
+def FeaturePAN_RWV : SubtargetFeature<
+ "pan-rwv", "HasPAN_RWV", "true",
+ "Enable v8.2 PAN s1e1R and s1e1W Variants",
+ [FeaturePAN]>;
+
+// UAO PState
+def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true",
+ "Enable v8.2 UAO PState">;
+
+def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
+ "true", "Enable v8.2 data Cache Clean to Point of Persistence" >;
+
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions">;
-/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
+def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
+ "Has zero-cycle zeroing instructions for generic registers">;
+
+def FeatureZCZeroingFP : SubtargetFeature<"zcz-fp", "HasZeroCycleZeroingFP", "true",
+ "Has zero-cycle zeroing instructions for FP registers">;
-/// Cyclone has instructions which zero registers for "free".
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
- "Has zero-cycle zeroing instructions">;
+ "Has zero-cycle zeroing instructions",
+ [FeatureZCZeroingGP, FeatureZCZeroingFP]>;
/// ... but the floating-point version doesn't quite work in rare cases on older
/// CPUs.
@@ -96,13 +127,14 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align",
"Disallow all unaligned memory "
"access">;
-def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
- "Reserve X18, making it unavailable "
- "as a GPR">;
+foreach i = {1-7,18,20} in
+ def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
+ "Reserve X"#i#", making it unavailable "
+ "as a GPR">;
-def FeatureReserveX20 : SubtargetFeature<"reserve-x20", "ReserveX20", "true",
- "Reserve X20, making it unavailable "
- "as a GPR">;
+foreach i = {8-15,18} in
+ def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i,
+ "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">;
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;
@@ -117,11 +149,11 @@ def FeaturePredictableSelectIsExpensive : SubtargetFeature<
def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
"CustomAsCheapAsMove", "true",
- "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
+ "Use custom handling of cheap instructions">;
def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
"ExynosAsCheapAsMove", "true",
- "Use Exynos specific code in TargetInstrInfo::isAsCheapAsAMove()",
+ "Use Exynos specific handling of cheap instructions",
[FeatureCustomCheapAsMoveHandling]>;
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
@@ -156,10 +188,18 @@ def FeatureFuseAES : SubtargetFeature<
"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
+def FeatureFuseArithmeticLogic : SubtargetFeature<
+ "fuse-arith-logic", "HasFuseArithmeticLogic", "true",
+ "CPU fuses arithmetic and logic operations">;
+
def FeatureFuseCCSelect : SubtargetFeature<
"fuse-csel", "HasFuseCCSelect", "true",
"CPU fuses conditional select operations">;
+def FeatureFuseCryptoEOR : SubtargetFeature<
+ "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
+ "CPU fuses AES/PMULL and EOR operations">;
+
def FeatureFuseLiterals : SubtargetFeature<
"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
@@ -168,6 +208,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
+def FeatureForce32BitJumpTables
+ : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
+ "Force jump table entries to be 32-bits wide except at MinSize">;
+
def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
"Enable support for RCPC extension">;
@@ -179,6 +223,66 @@ def FeatureDotProd : SubtargetFeature<
"dotprod", "HasDotProd", "true",
"Enable dot product support">;
+def FeaturePA : SubtargetFeature<
+ "pa", "HasPA", "true",
+ "Enable v8.3-A Pointer Authentication enchancement">;
+
+def FeatureJS : SubtargetFeature<
+ "jsconv", "HasJS", "true",
+ "Enable v8.3-A JavaScript FP conversion enchancement",
+ [FeatureFPARMv8]>;
+
+def FeatureCCIDX : SubtargetFeature<
+ "ccidx", "HasCCIDX", "true",
+ "Enable v8.3-A Extend of the CCSIDR number of sets">;
+
+def FeatureComplxNum : SubtargetFeature<
+ "complxnum", "HasComplxNum", "true",
+ "Enable v8.3-A Floating-point complex number support",
+ [FeatureNEON]>;
+
+def FeatureNV : SubtargetFeature<
+ "nv", "HasNV", "true",
+ "Enable v8.4-A Nested Virtualization Enchancement">;
+
+def FeatureRASv8_4 : SubtargetFeature<
+ "rasv8_4", "HasRASv8_4", "true",
+ "Enable v8.4-A Reliability, Availability and Serviceability extension",
+ [FeatureRAS]>;
+
+def FeatureMPAM : SubtargetFeature<
+ "mpam", "HasMPAM", "true",
+ "Enable v8.4-A Memory system Partitioning and Monitoring extension">;
+
+def FeatureDIT : SubtargetFeature<
+ "dit", "HasDIT", "true",
+ "Enable v8.4-A Data Independent Timing instructions">;
+
+def FeatureTRACEV8_4 : SubtargetFeature<
+ "tracev8.4", "HasTRACEV8_4", "true",
+ "Enable v8.4-A Trace extension">;
+
+def FeatureAM : SubtargetFeature<
+ "am", "HasAM", "true",
+ "Enable v8.4-A Activity Monitors extension">;
+
+def FeatureSEL2 : SubtargetFeature<
+ "sel2", "HasSEL2", "true",
+ "Enable v8.4-A Secure Exception Level 2 extension">;
+
+def FeatureTLB_RMI : SubtargetFeature<
+ "tlb-rmi", "HasTLB_RMI", "true",
+ "Enable v8.4-A TLB Range and Maintenance Instructions">;
+
+def FeatureFMI : SubtargetFeature<
+ "fmi", "HasFMI", "true",
+ "Enable v8.4-A Flag Manipulation Instructions">;
+
+// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
+def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true",
+ "Enable v8.4-A RCPC instructions with Immediate Offsets",
+ [FeatureRCPC]>;
+
def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
"NegativeImmediates", "false",
"Convert immediates and instructions "
@@ -196,21 +300,65 @@ def FeatureAggressiveFMA :
"true",
"Enable Aggressive FMA for floating-point.">;
+def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true",
+ "Enable alternative NZCV format for floating point comparisons">;
+
+def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true",
+ "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to "
+ "an integer (in FP format) forcing it to fit into a 32- or 64-bit int" >;
+
+def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict",
+ "true", "Enable architectural speculation restriction" >;
+
+def FeatureSB : SubtargetFeature<"sb", "HasSB",
+ "true", "Enable v8.5 Speculation Barrier" >;
+
+def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS",
+ "true", "Enable Speculative Store Bypass Safe bit" >;
+
+def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true",
+ "Enable v8.5a execution and data prediction invalidation instructions" >;
+
+def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP",
+ "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >;
+
+def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI",
+ "true", "Enable Branch Target Identification" >;
+
+def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen",
+ "true", "Enable Random Number generation instructions" >;
+
+def FeatureMTE : SubtargetFeature<"mte", "HasMTE",
+ "true", "Enable Memory Tagging Extension" >;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
- "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>;
+ "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM,
+ FeaturePAN, FeatureLOR, FeatureVH]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
- "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
+ "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO,
+ FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
- "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>;
+ "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePA,
+ FeatureJS, FeatureCCIDX, FeatureComplxNum]>;
def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
- "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd]>;
+ "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
+ FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT,
+ FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI,
+ FeatureFMI, FeatureRCPC_IMMO]>;
+
+def HasV8_5aOps : SubtargetFeature<
+ "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
+ [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
+ FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
+ FeatureBranchTargetId]
+>;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -226,6 +374,8 @@ include "AArch64CallingConvention.td"
include "AArch64Schedule.td"
include "AArch64InstrInfo.td"
+include "AArch64SchedPredicates.td"
+include "AArch64SchedPredExynos.td"
def AArch64InstrInfo : InstrInfo;
@@ -245,6 +395,7 @@ include "AArch64SchedFalkor.td"
include "AArch64SchedKryo.td"
include "AArch64SchedExynosM1.td"
include "AArch64SchedExynosM3.td"
+include "AArch64SchedExynosM4.td"
include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td"
@@ -343,6 +494,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeatureDisableLatencySchedHeuristic,
FeatureFPARMv8,
FeatureFuseAES,
+ FeatureFuseCryptoEOR,
FeatureNEON,
FeaturePerfMon,
FeatureZCRegMove,
@@ -356,14 +508,13 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
FeatureCRC,
FeatureCrypto,
FeatureExynosCheapAsMoveHandling,
- FeatureFPARMv8,
+ FeatureForce32BitJumpTables,
FeatureFuseAES,
- FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
FeatureSlowMisaligned128Store,
FeatureUseRSqrt,
- FeatureZCZeroing]>;
+ FeatureZCZeroingFP]>;
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M2 processors",
@@ -371,29 +522,47 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
FeatureCRC,
FeatureCrypto,
FeatureExynosCheapAsMoveHandling,
- FeatureFPARMv8,
+ FeatureForce32BitJumpTables,
FeatureFuseAES,
- FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
FeatureSlowMisaligned128Store,
- FeatureZCZeroing]>;
+ FeatureZCZeroingFP]>;
def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
[FeatureCRC,
FeatureCrypto,
FeatureExynosCheapAsMoveHandling,
- FeatureFPARMv8,
+ FeatureForce32BitJumpTables,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseCCSelect,
FeatureFuseLiterals,
FeatureLSLFast,
- FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
+ FeatureZCZeroingFP]>;
+
+def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
+ "Samsung Exynos-M4 processors",
+ [HasV8_2aOps,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureCrypto,
+ FeatureDotProd,
+ FeatureExynosCheapAsMoveHandling,
+ FeatureForce32BitJumpTables,
+ FeatureFP16FML,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseLiterals,
+ FeatureLSLFast,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
FeatureZCZeroing]>;
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
@@ -438,7 +607,7 @@ def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
FeatureLSLFast,
- HasV8_3aOps]>;
+ HasV8_4aOps]>;
def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
"ThunderX2T99",
@@ -497,6 +666,21 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
FeaturePredictableSelectIsExpensive,
FeatureNEON]>;
+def ProcTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
+ "HiSilicon TS-V110 processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureSPE,
+ FeatureFullFP16,
+ FeatureFP16FML,
+ FeatureDotProd]>;
+
def : ProcessorModel<"generic", NoSchedModel, [
FeatureFPARMv8,
FeatureFuseAES,
@@ -518,7 +702,7 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
-def : ProcessorModel<"exynos-m4", ExynosM3Model, [ProcExynosM3]>;
+def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
@@ -529,6 +713,8 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
+// FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
+def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;
//===----------------------------------------------------------------------===//
// Assembly parser
@@ -577,3 +763,9 @@ def AArch64 : Target {
let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
let AllowRegisterRenaming = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "AArch64PfmCounters.td"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index a95476b91187..452fbd3488b0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -377,11 +377,10 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
// Now we have a set of sets, order them by start address so
// we can iterate over them sequentially.
- llvm::sort(V.begin(), V.end(),
- [](const std::vector<Chain*> &A,
- const std::vector<Chain*> &B) {
- return A.front()->startsBefore(B.front());
- });
+ llvm::sort(V,
+ [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) {
+ return A.front()->startsBefore(B.front());
+ });
// As we only have two colors, we can track the global (BB-level) balance of
// odds versus evens. We aim to keep this near zero to keep both execution
@@ -453,16 +452,16 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
// change them to!
// Final tie-break with instruction order so pass output is stable (i.e. not
// dependent on malloc'd pointer values).
- llvm::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
- if (G1->size() != G2->size())
- return G1->size() > G2->size();
- if (G1->requiresFixup() != G2->requiresFixup())
- return G1->requiresFixup() > G2->requiresFixup();
- // Make sure startsBefore() produces a stable final order.
- assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
- "Starts before not total order!");
- return G1->startsBefore(G2);
- });
+ llvm::sort(GV, [](const Chain *G1, const Chain *G2) {
+ if (G1->size() != G2->size())
+ return G1->size() > G2->size();
+ if (G1->requiresFixup() != G2->requiresFixup())
+ return G1->requiresFixup() > G2->requiresFixup();
+ // Make sure startsBefore() produces a stable final order.
+ assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
+ "Starts before not total order!");
+ return G1->startsBefore(G2);
+ });
Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 52819dedc23d..0442076992e2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -21,16 +21,20 @@
#include "InstPrinter/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +81,12 @@ public:
return MCInstLowering.lowerOperand(MO, MCOp);
}
+ void EmitJumpTableInfo() override;
+ void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB, unsigned JTI);
+
+ void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
+
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -100,12 +110,33 @@ public:
AU.setPreservesAll();
}
- bool runOnMachineFunction(MachineFunction &F) override {
- AArch64FI = F.getInfo<AArch64FunctionInfo>();
- STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
- bool Result = AsmPrinter::runOnMachineFunction(F);
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ AArch64FI = MF.getInfo<AArch64FunctionInfo>();
+ STI = static_cast<const AArch64Subtarget*>(&MF.getSubtarget());
+
+ SetupMachineFunction(MF);
+
+ if (STI->isTargetCOFF()) {
+ bool Internal = MF.getFunction().hasInternalLinkage();
+ COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ int Type =
+ COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
+
+ OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer->EmitCOFFSymbolStorageClass(Scl);
+ OutStreamer->EmitCOFFSymbolType(Type);
+ OutStreamer->EndCOFFSymbolDef();
+ }
+
+ // Emit the rest of the function body.
+ EmitFunctionBody();
+
+ // Emit the XRay table for this function.
emitXRayTable();
- return Result;
+
+ // We didn't modify anything.
+ return false;
}
private:
@@ -208,7 +239,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
// linker can safely perform dead code stripping. Since LLVM never
// generates code that does this, it is always safe to set.
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
}
}
@@ -433,6 +464,104 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
printOperand(MI, NOps - 2, OS);
}
+void AArch64AsmPrinter::EmitJumpTableInfo() {
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+ MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
+ OutStreamer->SwitchSection(ReadOnlySec);
+
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ unsigned Size = AFI->getJumpTableEntrySize(JTI);
+ EmitAlignment(Log2_32(Size));
+ OutStreamer->EmitLabel(GetJTISymbol(JTI));
+
+ for (auto *JTBB : JTBBs)
+ emitJumpTableEntry(MJTI, JTBB, JTI);
+ }
+}
+
+void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned JTI) {
+ const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ unsigned Size = AFI->getJumpTableEntrySize(JTI);
+
+ if (Size == 4) {
+ // .word LBB - LJTI
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ } else {
+ // .byte (LBB - LBB) >> 2 (or .hword)
+ const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI);
+ const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ Value = MCBinaryExpr::createLShr(
+ Value, MCConstantExpr::create(2, OutContext), OutContext);
+ }
+
+ OutStreamer->EmitValue(Value, Size);
+}
+
+/// Small jump tables contain an unsigned byte or half, representing the offset
+/// from the lowest-addressed possible destination to the desired basic
+/// block. Since all instructions are 4-byte aligned, this is further compressed
+/// by counting in instructions rather than bytes (i.e. divided by 4). So, to
+/// materialize the correct destination we need:
+///
+/// adr xDest, .LBB0_0
+/// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh).
+/// add xDest, xDest, xScratch, lsl #2
+void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
+ const llvm::MachineInstr &MI) {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned ScratchRegW =
+ STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32);
+ unsigned TableReg = MI.getOperand(2).getReg();
+ unsigned EntryReg = MI.getOperand(3).getReg();
+ int JTIdx = MI.getOperand(4).getIndex();
+ bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
+
+ // This has to be first because the compression pass based its reachability
+ // calculations on the start of the JumpTableDest instruction.
+ auto Label =
+ MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
+ .addReg(DestReg)
+ .addExpr(MCSymbolRefExpr::create(
+ Label, MF->getContext())));
+
+ // Load the number of instruction-steps to offset from the label.
+ unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX;
+ EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
+ .addReg(ScratchRegW)
+ .addReg(TableReg)
+ .addReg(EntryReg)
+ .addImm(0)
+ .addImm(IsByteEntry ? 0 : 1));
+
+ // Multiply the steps by 4 and add to the already materialized base label
+ // address.
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+ .addReg(DestReg)
+ .addReg(DestReg)
+ .addReg(ScratchReg)
+ .addImm(2));
+}
+
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -503,7 +632,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
unsigned DestReg = MI.getOperand(0).getReg();
- if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) {
+ if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
// Convert H/S/D register to corresponding Q register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
@@ -559,6 +688,8 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer->EmitLabel(LOHLabel);
}
+ AArch64TargetStreamer *TS =
+ static_cast<AArch64TargetStreamer *>(OutStreamer->getTargetStreamer());
// Do any manual lowerings.
switch (MI->getOpcode()) {
default:
@@ -585,12 +716,27 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer->EmitRawText(StringRef(OS.str()));
}
return;
+
+ case AArch64::EMITBKEY: {
+ ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
+ if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+ ExceptionHandlingType != ExceptionHandling::ARM)
+ return;
+
+ if (needsCFIMoves() == CFI_M_None)
+ return;
+
+ OutStreamer->EmitCFIBKeyFrame();
+ return;
+ }
}
// Tail calls use pseudo instructions so they have the proper code-gen
// attributes (isCall, isReturn, etc.). We lower them to the real
// instruction here.
- case AArch64::TCRETURNri: {
+ case AArch64::TCRETURNri:
+ case AArch64::TCRETURNriBTI:
+ case AArch64::TCRETURNriALL: {
MCInst TmpInst;
TmpInst.setOpcode(AArch64::BR);
TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
@@ -660,6 +806,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case AArch64::JumpTableDest32: {
+ // We want:
+ // ldrsw xScratch, [xTable, xEntry, lsl #2]
+ // add xDest, xTable, xScratch
+ unsigned DestReg = MI->getOperand(0).getReg(),
+ ScratchReg = MI->getOperand(1).getReg(),
+ TableReg = MI->getOperand(2).getReg(),
+ EntryReg = MI->getOperand(3).getReg();
+ EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
+ .addReg(ScratchReg)
+ .addReg(TableReg)
+ .addReg(EntryReg)
+ .addImm(0)
+ .addImm(1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+ .addReg(DestReg)
+ .addReg(TableReg)
+ .addReg(ScratchReg)
+ .addImm(0));
+ return;
+ }
+ case AArch64::JumpTableDest16:
+ case AArch64::JumpTableDest8:
+ LowerJumpTableDestSmall(*OutStreamer, *MI);
+ return;
+
case AArch64::FMOVH0:
case AArch64::FMOVS0:
case AArch64::FMOVD0:
@@ -683,6 +855,100 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::PATCHABLE_TAIL_CALL:
LowerPATCHABLE_TAIL_CALL(*MI);
return;
+
+ case AArch64::SEH_StackAlloc:
+ TS->EmitARM64WinCFIAllocStack(MI->getOperand(0).getImm());
+ return;
+
+ case AArch64::SEH_SaveFPLR:
+ TS->EmitARM64WinCFISaveFPLR(MI->getOperand(0).getImm());
+ return;
+
+ case AArch64::SEH_SaveFPLR_X:
+ assert(MI->getOperand(0).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveFPLRX(-MI->getOperand(0).getImm());
+ return;
+
+ case AArch64::SEH_SaveReg:
+ TS->EmitARM64WinCFISaveReg(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveReg_X:
+ assert(MI->getOperand(1).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveRegX(MI->getOperand(0).getImm(),
+ -MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveRegP:
+ assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+ "Non-consecutive registers not allowed for save_regp");
+ TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),
+ MI->getOperand(2).getImm());
+ return;
+
+ case AArch64::SEH_SaveRegP_X:
+ assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+ "Non-consecutive registers not allowed for save_regp_x");
+ assert(MI->getOperand(2).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveRegPX(MI->getOperand(0).getImm(),
+ -MI->getOperand(2).getImm());
+ return;
+
+ case AArch64::SEH_SaveFReg:
+ TS->EmitARM64WinCFISaveFReg(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveFReg_X:
+ assert(MI->getOperand(1).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveFRegX(MI->getOperand(0).getImm(),
+ -MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveFRegP:
+ assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+ "Non-consecutive registers not allowed for save_regp");
+ TS->EmitARM64WinCFISaveFRegP(MI->getOperand(0).getImm(),
+ MI->getOperand(2).getImm());
+ return;
+
+ case AArch64::SEH_SaveFRegP_X:
+ assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+ "Non-consecutive registers not allowed for save_regp_x");
+ assert(MI->getOperand(2).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveFRegPX(MI->getOperand(0).getImm(),
+ -MI->getOperand(2).getImm());
+ return;
+
+ case AArch64::SEH_SetFP:
+ TS->EmitARM64WinCFISetFP();
+ return;
+
+ case AArch64::SEH_AddFP:
+ TS->EmitARM64WinCFIAddFP(MI->getOperand(0).getImm());
+ return;
+
+ case AArch64::SEH_Nop:
+ TS->EmitARM64WinCFINop();
+ return;
+
+ case AArch64::SEH_PrologEnd:
+ TS->EmitARM64WinCFIPrologEnd();
+ return;
+
+ case AArch64::SEH_EpilogStart:
+ TS->EmitARM64WinCFIEpilogStart();
+ return;
+
+ case AArch64::SEH_EpilogEnd:
+ TS->EmitARM64WinCFIEpilogEnd();
+ return;
}
// Finally, do the automated lowerings for everything else.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
new file mode 100644
index 000000000000..da70a624c5be
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -0,0 +1,130 @@
+//===-- AArch64BranchTargets.cpp -- Harden code using v8.5-A BTI extension -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts BTI instructions at the start of every function and basic
+// block which could be indirectly called. The hardware will (when enabled)
+// trap when an indirect branch or call instruction targets an instruction
+// which is not a valid BTI instruction. This is intended to guard against
+// control-flow hijacking attacks. Note that this does not do anything for RET
+// instructions, as they can be more precisely protected by return address
+// signing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-branch-targets"
+#define AARCH64_BRANCH_TARGETS_NAME "AArch64 Branch Targets"
+
+namespace {
+class AArch64BranchTargets : public MachineFunctionPass {
+public:
+ static char ID;
+ AArch64BranchTargets() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return AARCH64_BRANCH_TARGETS_NAME; }
+
+private:
+ void addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump);
+};
+} // end anonymous namespace
+
+char AArch64BranchTargets::ID = 0;
+
+INITIALIZE_PASS(AArch64BranchTargets, "aarch64-branch-targets",
+ AARCH64_BRANCH_TARGETS_NAME, false, false)
+
+void AArch64BranchTargets::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createAArch64BranchTargetsPass() {
+ return new AArch64BranchTargets();
+}
+
+bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("branch-target-enforcement"))
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << "********** AArch64 Branch Targets **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ // LLVM does not consider basic blocks which are the targets of jump tables
+ // to be address-taken (the address can't escape anywhere else), but they are
+ // used for indirect branches, so need BTI instructions.
+ SmallPtrSet<MachineBasicBlock *, 8> JumpTableTargets;
+ if (auto *JTI = MF.getJumpTableInfo())
+ for (auto &JTE : JTI->getJumpTables())
+ for (auto *MBB : JTE.MBBs)
+ JumpTableTargets.insert(MBB);
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ bool CouldCall = false, CouldJump = false;
+ // If the function is address-taken or externally-visible, it could be
+ // indirectly called. PLT entries and tail-calls use BR, but when they are
+ // are in guarded pages should all use x16 or x17 to hold the called
+ // address, so we don't need to set CouldJump here. BR instructions in
+ // non-guarded pages (which might be non-BTI-aware code) are allowed to
+ // branch to a "BTI c" using any register.
+ if (&MBB == &*MF.begin() && (F.hasAddressTaken() || !F.hasLocalLinkage()))
+ CouldCall = true;
+
+ // If the block itself is address-taken, it could be indirectly branched
+ // to, but not called.
+ if (MBB.hasAddressTaken() || JumpTableTargets.count(&MBB))
+ CouldJump = true;
+
+ if (CouldCall || CouldJump) {
+ addBTI(MBB, CouldCall, CouldJump);
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall,
+ bool CouldJump) {
+ LLVM_DEBUG(dbgs() << "Adding BTI " << (CouldJump ? "j" : "")
+ << (CouldCall ? "c" : "") << " to " << MBB.getName()
+ << "\n");
+
+ const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
+
+ unsigned HintNum = 32;
+ if (CouldCall)
+ HintNum |= 2;
+ if (CouldJump)
+ HintNum |= 4;
+ assert(HintNum != 32 && "No target kinds!");
+
+ auto MBBI = MBB.begin();
+
+ // PACI[AB]SP are implicitly BTI JC, so no BTI instruction needed there.
+ if (MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::PACIASP ||
+ MBBI->getOpcode() == AArch64::PACIBSP))
+ return;
+
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AArch64::HINT))
+ .addImm(HintNum);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index 26d532555e78..5980e5684e89 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -227,32 +227,45 @@ void AArch64CallLowering::splitToValueTypes(
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
- MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = MF.getFunction();
-
+ const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
- assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
- bool Success = true;
- if (VReg) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
+ "Return value without a vreg");
- // We zero-extend i1s to i8.
- if (MRI.getType(VReg).getSizeInBits() == 1)
- VReg = MIRBuilder.buildZExt(LLT::scalar(8), VReg)->getOperand(0).getReg();
+ bool Success = true;
+ if (!VRegs.empty()) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
auto &DL = F.getParent()->getDataLayout();
+ LLVMContext &Ctx = Val->getType()->getContext();
- ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+ SmallVector<EVT, 4> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
SmallVector<ArgInfo, 8> SplitArgs;
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
- [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, VReg, Offset);
- });
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ // We zero-extend i1s to i8.
+ unsigned CurVReg = VRegs[i];
+ if (MRI.getType(VRegs[i]).getSizeInBits() == 1) {
+ CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg)
+ ->getOperand(0)
+ .getReg();
+ }
+
+ ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+ splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, F.getCallingConv(),
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, CurVReg, Offset);
+ });
+ }
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
@@ -324,6 +337,10 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
}
+ auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ if (Subtarget.hasCustomCallingConv())
+ Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
+
// Move back to the end of the basic block.
MIRBuilder.setMBB(MBB);
@@ -364,8 +381,14 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIB.add(Callee);
// Tell the call which registers are clobbered.
- auto TRI = MF.getSubtarget().getRegisterInfo();
- MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
+ auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
+ if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(MF, &Mask);
+ MIB.addRegMask(Mask);
+
+ if (TRI->isAnyArgRegReserved(MF))
+ TRI->emitReservedArgRegCallError(MF);
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
index 68c127fc42e5..1c2bd6a4de5d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -34,8 +34,8 @@ class AArch64CallLowering: public CallLowering {
public:
AArch64CallLowering(const AArch64TargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 30492003df14..5db941e9dac7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -123,7 +123,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
// Vararg functions on windows pass floats in integer registers
def CC_AArch64_Win64_VarArg : CallingConv<[
- CCIfType<[f16, f32], CCPromoteToType<f64>>,
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
CCIfType<[f64], CCBitConvertToType<i64>>,
CCDelegateTo<CC_AArch64_AAPCS>
]>;
@@ -288,6 +288,20 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
+// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
+// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
+// and not (LR,FP) pairs.
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22,
+ X23, X24, X25, X26, X27, X28,
+ D8, D9, D10, D11,
+ D12, D13, D14, D15)>;
+
+// AArch64 PCS for vector functions (VPCS)
+// must (additionally) preserve full Q8-Q23 registers
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+ X23, X24, X25, X26, X27, X28,
+ (sequence "Q%u", 8, 23))>;
+
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
// this can be partially modelled by treating X0 as a callee-saved register;
@@ -362,5 +376,7 @@ def CSR_AArch64_AAPCS_SwiftError_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
def CSR_AArch64_RT_MostRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_AAVPCS_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
def CSR_AArch64_AAPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
new file mode 100644
index 000000000000..0924a27e2586
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -0,0 +1,162 @@
+//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This pass looks at the basic blocks each jump-table refers to and works out
+// whether they can be emitted in a compressed form (with 8 or 16-bit
+// entries). If so, it changes the opcode and flags them in the associated
+// AArch64FunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-jump-tables"
+
+STATISTIC(NumJT8, "Number of jump-tables with 1-byte entries");
+STATISTIC(NumJT16, "Number of jump-tables with 2-byte entries");
+STATISTIC(NumJT32, "Number of jump-tables with 4-byte entries");
+
+namespace {
+class AArch64CompressJumpTables : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineFunction *MF;
+ SmallVector<int, 8> BlockInfo;
+
+ int computeBlockSize(MachineBasicBlock &MBB);
+ void scanFunction();
+
+ bool compressJumpTable(MachineInstr &MI, int Offset);
+
+public:
+ static char ID;
+ AArch64CompressJumpTables() : MachineFunctionPass(ID) {
+ initializeAArch64CompressJumpTablesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+ StringRef getPassName() const override {
+ return "AArch64 Compress Jump Tables";
+ }
+};
+char AArch64CompressJumpTables::ID = 0;
+}
+
+INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
+ "AArch64 compress jump tables pass", false, false)
+
+int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
+ int Size = 0;
+ for (const MachineInstr &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+}
+
+void AArch64CompressJumpTables::scanFunction() {
+ BlockInfo.clear();
+ BlockInfo.resize(MF->getNumBlockIDs());
+
+ int Offset = 0;
+ for (MachineBasicBlock &MBB : *MF) {
+ BlockInfo[MBB.getNumber()] = Offset;
+ Offset += computeBlockSize(MBB);
+ }
+}
+
+bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
+ int Offset) {
+ if (MI.getOpcode() != AArch64::JumpTableDest32)
+ return false;
+
+ int JTIdx = MI.getOperand(4).getIndex();
+ auto &JTInfo = *MF->getJumpTableInfo();
+ const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx];
+
+ // The jump-table might have been optimized away.
+ if (JT.MBBs.empty())
+ return false;
+
+ int MaxOffset = std::numeric_limits<int>::min(),
+ MinOffset = std::numeric_limits<int>::max();
+ MachineBasicBlock *MinBlock = nullptr;
+ for (auto Block : JT.MBBs) {
+ int BlockOffset = BlockInfo[Block->getNumber()];
+ assert(BlockOffset % 4 == 0 && "misaligned basic block");
+
+ MaxOffset = std::max(MaxOffset, BlockOffset);
+ if (BlockOffset <= MinOffset) {
+ MinOffset = BlockOffset;
+ MinBlock = Block;
+ }
+ }
+
+ // The ADR instruction needed to calculate the address of the first reachable
+ // basic block can address +/-1MB.
+ if (!isInt<21>(MinOffset - Offset)) {
+ ++NumJT32;
+ return false;
+ }
+
+ int Span = MaxOffset - MinOffset;
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ if (isUInt<8>(Span / 4)) {
+ AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
+ MI.setDesc(TII->get(AArch64::JumpTableDest8));
+ ++NumJT8;
+ return true;
+ } else if (isUInt<16>(Span / 4)) {
+ AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
+ MI.setDesc(TII->get(AArch64::JumpTableDest16));
+ ++NumJT16;
+ return true;
+ }
+
+ ++NumJT32;
+ return false;
+}
+
+bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
+ bool Changed = false;
+ MF = &MFIn;
+
+ const auto &ST = MF->getSubtarget<AArch64Subtarget>();
+ TII = ST.getInstrInfo();
+
+ if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize())
+ return false;
+
+ scanFunction();
+
+ for (MachineBasicBlock &MBB : *MF) {
+ int Offset = BlockInfo[MBB.getNumber()];
+ for (MachineInstr &MI : MBB) {
+ Changed |= compressJumpTable(MI, Offset);
+ Offset += TII->getInstSizeInBytes(MI);
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64CompressJumpTablesPass() {
+ return new AArch64CompressJumpTables();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 9226a9dd879b..f7190d58fbf9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -835,36 +835,55 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
}
case AArch64::LOADgot: {
- // Expand into ADRP + LDR.
+ MachineFunction *MF = MBB.getParent();
unsigned DstReg = MI.getOperand(0).getReg();
const MachineOperand &MO1 = MI.getOperand(1);
unsigned Flags = MO1.getTargetFlags();
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
- MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
- .add(MI.getOperand(0))
- .addReg(DstReg);
-
- if (MO1.isGlobal()) {
- MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
- MIB2.addGlobalAddress(MO1.getGlobal(), 0,
- Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- } else if (MO1.isSymbol()) {
- MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
- MIB2.addExternalSymbol(MO1.getSymbolName(),
- Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+
+ if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
+ // Tiny codemodel expand to LDR
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(AArch64::LDRXl), DstReg);
+
+ if (MO1.isGlobal()) {
+ MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
+ } else if (MO1.isSymbol()) {
+ MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
+ } else {
+ assert(MO1.isCPI() &&
+ "Only expect globals, externalsymbols, or constant pools");
+ MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
+ }
} else {
- assert(MO1.isCPI() &&
- "Only expect globals, externalsymbols, or constant pools");
- MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
- Flags | AArch64II::MO_PAGE);
- MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
- Flags | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
+ // Small codemodel expand into ADRP + LDR.
+ MachineInstrBuilder MIB1 =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
+ MachineInstrBuilder MIB2 =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
+ .add(MI.getOperand(0))
+ .addReg(DstReg);
+
+ if (MO1.isGlobal()) {
+ MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
+ MIB2.addGlobalAddress(MO1.getGlobal(), 0,
+ Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ } else if (MO1.isSymbol()) {
+ MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
+ MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
+ AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ } else {
+ assert(MO1.isCPI() &&
+ "Only expect globals, externalsymbols, or constant pools");
+ MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
+ Flags | AArch64II::MO_PAGE);
+ MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
+ Flags | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ }
+
+ transferImpOps(MI, MIB1, MIB2);
}
-
- transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 572d1c22feea..47550cabb9f0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2016,8 +2016,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
if (RetVT == MVT::i64 && VT <= MVT::i32) {
if (WantZExt) {
// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
- std::prev(FuncInfo.InsertPt)->eraseFromParent();
- ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
+ MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
+ ResultReg = std::prev(I)->getOperand(0).getReg();
+ removeDeadCode(I, std::next(I));
} else
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
/*IsKill=*/true,
@@ -2038,7 +2039,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
break;
}
}
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
MI = nullptr;
if (Reg)
MI = MRI.getUniqueVRegDef(Reg);
@@ -2256,6 +2258,13 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
/// Try to emit a combined compare-and-branch instruction.
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ if (FuncInfo.MF->getFunction().hasFnAttribute(
+ Attribute::SpeculativeLoadHardening))
+ return false;
+
assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
const CmpInst *CI = cast<CmpInst>(BI->getCondition());
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
@@ -2918,6 +2927,9 @@ bool AArch64FastISel::fastLowerArguments() {
if (CC != CallingConv::C && CC != CallingConv::Swift)
return false;
+ if (Subtarget->hasCustomCallingConv())
+ return false;
+
// Only handle simple cases of up to 8 GPR and FPR each.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;
@@ -3208,6 +3220,10 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!processCallArgs(CLI, OutVTs, NumBytes))
return false;
+ const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ if (RegInfo->isAnyArgRegReserved(*MF))
+ RegInfo->emitReservedArgRegCallError(*MF);
+
// Issue the call.
MachineInstrBuilder MIB;
if (Subtarget->useSmallAddressing()) {
@@ -3443,6 +3459,21 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, SrcReg);
return true;
}
+ case Intrinsic::sponentry: {
+ MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
+
+ // SP = FP + Fixed Object + 16
+ int FI = MFI.CreateFixedObject(4, 0, false);
+ unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::ADDXri), ResultReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0);
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
case Intrinsic::memcpy:
case Intrinsic::memmove: {
const auto *MTI = cast<MemTransferInst>(II);
@@ -3738,6 +3769,9 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
}
+ if (!ResultReg1)
+ return false;
+
ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
/*IsKill=*/true, getInvertedCondCode(CC));
@@ -4483,7 +4517,8 @@ bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
"Expected copy instruction");
Reg = MI->getOperand(1).getReg();
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
}
updateValueMap(I, Reg);
return true;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 6dc5d19862a9..538a8d7e8fbc 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -98,6 +98,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -114,11 +115,13 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -201,6 +204,11 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ // Win64 EH requires a frame pointer if funclets are present, as the locals
+ // are accessed off the frame pointer in both the parent function and the
+ // funclets.
+ if (MF.hasEHFunclets())
+ return true;
// Retain behavior of always omitting the FP for leaf functions when possible.
if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
return true;
@@ -279,6 +287,31 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(I);
}
+static bool ShouldSignReturnAddress(MachineFunction &MF) {
+ // The function should be signed in the following situations:
+ // - sign-return-address=all
+ // - sign-return-address=non-leaf and the functions spills the LR
+
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("sign-return-address"))
+ return false;
+
+ StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
+ if (Scope.equals("none"))
+ return false;
+
+ if (Scope.equals("all"))
+ return true;
+
+ assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
+
+ for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
+ if (Info.getReg() == AArch64::LR)
+ return true;
+
+ return false;
+}
+
void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
MachineFunction &MF = *MBB.getParent();
@@ -330,7 +363,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
LiveRegs.addLiveIns(*MBB);
// Mark callee saved registers as used so we will not choose them.
- const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
+ const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
@@ -408,54 +441,217 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
return true;
}
+// Given a load or a store instruction, generate an appropriate unwinding SEH
+// code on Windows.
+static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo &TII,
+ MachineInstr::MIFlag Flag) {
+ unsigned Opc = MBBI->getOpcode();
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ DebugLoc DL = MBBI->getDebugLoc();
+ unsigned ImmIdx = MBBI->getNumOperands() - 1;
+ int Imm = MBBI->getOperand(ImmIdx).getImm();
+ MachineInstrBuilder MIB;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("No SEH Opcode for this instruction");
+ case AArch64::LDPDpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STPDpre: {
+ unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
+ .addImm(Reg0)
+ .addImm(Reg1)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDPXpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STPXpre: {
+ unsigned Reg0 = MBBI->getOperand(1).getReg();
+ unsigned Reg1 = MBBI->getOperand(2).getReg();
+ if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ else
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
+ .addImm(RegInfo->getSEHRegNum(Reg0))
+ .addImm(RegInfo->getSEHRegNum(Reg1))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDRDpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STRDpre: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
+ .addImm(Reg)
+ .addImm(Imm)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDRXpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STRXpre: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
+ .addImm(Reg)
+ .addImm(Imm)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STPDi:
+ case AArch64::LDPDi: {
+ unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
+ .addImm(Reg0)
+ .addImm(Reg1)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STPXi:
+ case AArch64::LDPXi: {
+ unsigned Reg0 = MBBI->getOperand(0).getReg();
+ unsigned Reg1 = MBBI->getOperand(1).getReg();
+ if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ else
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
+ .addImm(RegInfo->getSEHRegNum(Reg0))
+ .addImm(RegInfo->getSEHRegNum(Reg1))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STRXui:
+ case AArch64::LDRXui: {
+ int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
+ .addImm(Reg)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STRDui:
+ case AArch64::LDRDui: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
+ .addImm(Reg)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ }
+ auto I = MBB->insertAfter(MBBI, MIB);
+ return I;
+}
+
+// Fix up the SEH opcode associated with the save/restore instruction.
+static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
+ unsigned LocalStackSize) {
+ MachineOperand *ImmOpnd = nullptr;
+ unsigned ImmIdx = MBBI->getNumOperands() - 1;
+ switch (MBBI->getOpcode()) {
+ default:
+ llvm_unreachable("Fix the offset in the SEH instruction");
+ case AArch64::SEH_SaveFPLR:
+ case AArch64::SEH_SaveRegP:
+ case AArch64::SEH_SaveReg:
+ case AArch64::SEH_SaveFRegP:
+ case AArch64::SEH_SaveFReg:
+ ImmOpnd = &MBBI->getOperand(ImmIdx);
+ break;
+ }
+ if (ImmOpnd)
+ ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
+}
+
// Convert callee-save register save/restore instruction to do stack pointer
// decrement/increment to allocate/deallocate the callee-save stack area by
// converting store/load to use pre/post increment version.
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
+ const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
+ bool NeedsWinCFI, bool InProlog = true) {
// Ignore instructions that do not operate on SP, i.e. shadow call stack
- // instructions.
+ // instructions and associated CFI instruction.
while (MBBI->getOpcode() == AArch64::STRXpost ||
- MBBI->getOpcode() == AArch64::LDRXpre) {
- assert(MBBI->getOperand(0).getReg() != AArch64::SP);
+ MBBI->getOpcode() == AArch64::LDRXpre ||
+ MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
+ if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
+ assert(MBBI->getOperand(0).getReg() != AArch64::SP);
++MBBI;
}
-
unsigned NewOpc;
- bool NewIsUnscaled = false;
+ int Scale = 1;
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
case AArch64::STPXi:
NewOpc = AArch64::STPXpre;
+ Scale = 8;
break;
case AArch64::STPDi:
NewOpc = AArch64::STPDpre;
+ Scale = 8;
+ break;
+ case AArch64::STPQi:
+ NewOpc = AArch64::STPQpre;
+ Scale = 16;
break;
case AArch64::STRXui:
NewOpc = AArch64::STRXpre;
- NewIsUnscaled = true;
break;
case AArch64::STRDui:
NewOpc = AArch64::STRDpre;
- NewIsUnscaled = true;
+ break;
+ case AArch64::STRQui:
+ NewOpc = AArch64::STRQpre;
break;
case AArch64::LDPXi:
NewOpc = AArch64::LDPXpost;
+ Scale = 8;
break;
case AArch64::LDPDi:
NewOpc = AArch64::LDPDpost;
+ Scale = 8;
+ break;
+ case AArch64::LDPQi:
+ NewOpc = AArch64::LDPQpost;
+ Scale = 16;
break;
case AArch64::LDRXui:
NewOpc = AArch64::LDRXpost;
- NewIsUnscaled = true;
break;
case AArch64::LDRDui:
NewOpc = AArch64::LDRDpost;
- NewIsUnscaled = true;
+ break;
+ case AArch64::LDRQui:
+ NewOpc = AArch64::LDRQpost;
break;
}
+ // Get rid of the SEH code associated with the old instruction.
+ if (NeedsWinCFI) {
+ auto SEH = std::next(MBBI);
+ if (AArch64InstrInfo::isSEHInstruction(*SEH))
+ SEH->eraseFromParent();
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
MIB.addReg(AArch64::SP, RegState::Define);
@@ -471,15 +667,16 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
"instruction!");
assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
- // Last operand is immediate offset that needs fixing.
- assert(CSStackSizeInc % 8 == 0);
- int64_t CSStackSizeIncImm = CSStackSizeInc;
- if (!NewIsUnscaled)
- CSStackSizeIncImm /= 8;
- MIB.addImm(CSStackSizeIncImm);
+ assert(CSStackSizeInc % Scale == 0);
+ MIB.addImm(CSStackSizeInc / Scale);
MIB.setMIFlags(MBBI->getFlags());
- MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
+ MIB.setMemRefs(MBBI->memoperands());
+
+ // Generate a new SEH code that corresponds to the new instruction.
+ if (NeedsWinCFI)
+ InsertSEH(*MIB, *TII,
+ InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
return std::prev(MBB.erase(MBBI));
}
@@ -487,22 +684,43 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// Fixup callee-save register save/restore instructions to take into account
// combined SP bump by adding the local stack size to the stack offsets.
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
- unsigned LocalStackSize) {
+ unsigned LocalStackSize,
+ bool NeedsWinCFI) {
+ if (AArch64InstrInfo::isSEHInstruction(MI))
+ return;
+
unsigned Opc = MI.getOpcode();
// Ignore instructions that do not operate on SP, i.e. shadow call stack
- // instructions.
- if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) {
- assert(MI.getOperand(0).getReg() != AArch64::SP);
+ // instructions and associated CFI instruction.
+ if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
+ Opc == AArch64::CFI_INSTRUCTION) {
+ if (Opc != AArch64::CFI_INSTRUCTION)
+ assert(MI.getOperand(0).getReg() != AArch64::SP);
return;
}
- (void)Opc;
- assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
- Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
- Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
- Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
- "Unexpected callee-save save/restore opcode!");
+ unsigned Scale;
+ switch (Opc) {
+ case AArch64::STPXi:
+ case AArch64::STRXui:
+ case AArch64::STPDi:
+ case AArch64::STRDui:
+ case AArch64::LDPXi:
+ case AArch64::LDRXui:
+ case AArch64::LDPDi:
+ case AArch64::LDRDui:
+ Scale = 8;
+ break;
+ case AArch64::STPQi:
+ case AArch64::STRQui:
+ case AArch64::LDPQi:
+ case AArch64::LDRQui:
+ Scale = 16;
+ break;
+ default:
+ llvm_unreachable("Unexpected callee-save save/restore opcode!");
+ }
unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
@@ -510,8 +728,16 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
// Last operand is immediate offset that needs fixing.
MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
// All generated opcodes have scaled offsets.
- assert(LocalStackSize % 8 == 0);
- OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
+ assert(LocalStackSize % Scale == 0);
+ OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
+
+ if (NeedsWinCFI) {
+ auto MBBI = std::next(MachineBasicBlock::iterator(MI));
+ assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
+ assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
+ "Expecting a SEH instruction");
+ fixupSEHOpcode(MBBI, LocalStackSize);
+ }
}
static void adaptForLdStOpt(MachineBasicBlock &MBB,
@@ -546,6 +772,23 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB,
//
}
+static bool ShouldSignWithAKey(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("sign-return-address-key"))
+ return true;
+
+ const StringRef Key =
+ F.getFnAttribute("sign-return-address-key").getValueAsString();
+ assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
+ return Key.equals_lower("a_key");
+}
+
+static bool needsWinCFI(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ F.needsUnwindTableEntry();
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -556,8 +799,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
+ bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool HasFP = hasFP(MF);
+ bool NeedsWinCFI = needsWinCFI(MF);
+ MF.setHasWinCFI(NeedsWinCFI);
+ bool IsFunclet = MBB.isEHFuncletEntry();
// At this point, we're going to decide whether or not the function uses a
// redzone. In most cases, the function doesn't have a redzone so let's
@@ -568,18 +815,41 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
+ if (ShouldSignReturnAddress(MF)) {
+ if (ShouldSignWithAKey(MF))
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
+ .setMIFlag(MachineInstr::FrameSetup);
+ else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
- int NumBytes = (int)MFI.getStackSize();
+ // getStackSize() includes all the locals in its size calculation. We don't
+ // include these locals when computing the stack size of a funclet, as they
+ // are allocated in the parent's stack frame and accessed via the frame
+ // pointer from the funclet. We only save the callee saved registers in the
+ // funclet, which are really the callee saved registers of the parent
+ // function, including the funclet.
+ int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
+ : (int)MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
-
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
-
if (!NumBytes)
return;
// REDZONE: If the stack size is less than 128 bytes, we don't need
@@ -589,36 +859,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
-
- // Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
+ if (!NeedsWinCFI) {
+ // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+ // Encode the stack size of the leaf function.
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
}
+
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+
return;
}
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ // Var args are accounted for in the containing function, so don't
+ // include them for funclets.
+ unsigned FixedObject = (IsWin64 && !IsFunclet) ?
+ alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
-
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
if (CombineSPBump) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
NumBytes = 0;
} else if (PrologueSaveSize != 0) {
- MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
- -PrologueSaveSize);
+ MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -629,9 +907,21 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock::iterator End = MBB.end();
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
if (CombineSPBump)
- fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
+ fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
+ NeedsWinCFI);
++MBBI;
}
+
+ // The code below is not applicable to funclets. We have emitted all the SEH
+ // opcodes that we needed to emit. The FP and BP belong to the containing
+ // function.
+ if (IsFunclet) {
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ return;
+ }
+
if (HasFP) {
// Only set up FP if we actually need to. Frame pointer is fp =
// sp - fixedobject - 16.
@@ -644,24 +934,58 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
uint32_t NumWords = NumBytes >> 4;
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
+ // exceed this amount. We need to move at most 2^24 - 1 into x15.
+ // This is at most two instructions, MOVZ follwed by MOVK.
+ // TODO: Fix to use multiple stack alloc unwind codes for stacks
+ // exceeding 256MB in size.
+ if (NumBytes >= (1 << 28))
+ report_fatal_error("Stack size cannot exceed 256MB for stack "
+ "unwinding purposes");
+
+ uint32_t LowNumWords = NumWords & 0xFFFF;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
+ .addImm(LowNumWords)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ if ((NumWords & 0xFFFF0000) != 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
+ .addReg(AArch64::X15)
+ .addImm((NumWords & 0xFFFF0000) >> 16) // High half
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
switch (MF.getTarget().getCodeModel()) {
+ case CodeModel::Tiny:
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
.addExternalSymbol("__chkstk")
.addReg(AArch64::X15, RegState::Implicit)
+ .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
break;
case CodeModel::Large:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
@@ -669,11 +993,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addExternalSymbol("__chkstk")
.addExternalSymbol("__chkstk")
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+ .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
break;
}
@@ -682,6 +1015,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(AArch64::X15, RegState::Kill)
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
NumBytes = 0;
}
@@ -701,7 +1038,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
if (NeedsRealignment) {
const unsigned Alignment = MFI.getMaxAlignment();
@@ -724,6 +1061,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(scratchSPReg, RegState::Kill)
.addImm(andMaskEncoded);
AFI->setStackRealigned(true);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes & andMaskEncoded)
+ .setMIFlag(MachineInstr::FrameSetup);
}
}
@@ -737,8 +1078,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (RegInfo->hasBasePointer(MF)) {
TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
false);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
}
+ // The very last FrameSetup instruction indicates the end of prologue. Emit a
+ // SEH opcode indicating the prologue end.
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+
if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
const int StackGrowth = -TD.getPointerSize(0);
@@ -832,6 +1182,46 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
+static void InsertReturnAddressAuth(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (!ShouldSignReturnAddress(MF))
+ return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc DL;
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ // The AUTIASP instruction assembles to a hint instruction before v8.3a so
+ // this instruction can safely used for any v8a architecture.
+ // From v8.3a onwards there are optimised authenticate LR and return
+ // instructions, namely RETA{A,B}, that can be used instead.
+ if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
+ MBBI->getOpcode() == AArch64::RET_ReallyLR) {
+ BuildMI(MBB, MBBI, DL,
+ TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
+ .copyImplicitOps(*MBBI);
+ MBB.erase(MBBI);
+ } else {
+ BuildMI(
+ MBB, MBBI, DL,
+ TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+static bool isFuncletReturnInstr(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case AArch64::CATCHRET:
+ case AArch64::CLEANUPRET:
+ return true;
+ }
+}
+
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -840,14 +1230,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool IsTailCallReturn = false;
+ bool NeedsWinCFI = needsWinCFI(MF);
+ bool IsFunclet = false;
+
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
unsigned RetOpcode = MBBI->getOpcode();
IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
- RetOpcode == AArch64::TCRETURNri;
+ RetOpcode == AArch64::TCRETURNri ||
+ RetOpcode == AArch64::TCRETURNriBTI;
+ IsFunclet = isFuncletReturnInstr(*MBBI);
}
- int NumBytes = MFI.getStackSize();
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
+ : MFI.getStackSize();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
@@ -899,25 +1296,38 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
// it as the 2nd argument of AArch64ISD::TC_RETURN.
+ auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
+
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ // Var args are accounted for in the containing function, so don't
+ // include them for funclets.
+ unsigned FixedObject =
+ (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
uint64_t AfterCSRPopSize = ArgumentPopSize;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+ // We cannot rely on the local stack size set in emitPrologue if the function
+ // has funclets, as funclets have different local stack size requirements, and
+ // the current value set in emitPrologue may be that of the containing
+ // function.
+ if (MF.hasEHFunclets())
+ AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
// Assume we can't combine the last pop with the sp restore.
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
+ while (AArch64InstrInfo::isSEHInstruction(*Pop))
+ Pop = std::prev(Pop);
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
// If the offset is 0, convert it to a post-index ldp.
- if (OffsetOp.getImm() == 0) {
- convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII,
- PrologueSaveSize);
- } else {
+ if (OffsetOp.getImm() == 0)
+ convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false);
+ else {
// If not, make sure to emit an add after the last ldp.
// We're doing this by transfering the size to be restored from the
// adjustment *before* the CSR pops to the adjustment *after* the CSR
@@ -937,14 +1347,23 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
++LastPopI;
break;
} else if (CombineSPBump)
- fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
+ fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
+ NeedsWinCFI);
}
+ if (NeedsWinCFI)
+ BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
- NumBytes + AfterCSRPopSize, TII,
- MachineInstr::FrameDestroy);
+ NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL,
+ TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
return;
}
@@ -972,9 +1391,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackRestoreBytes, TII, MachineInstr::FrameDestroy);
- if (Done)
+ StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI);
+ if (Done) {
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL,
+ TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
return;
+ }
NumBytes = 0;
}
@@ -983,13 +1408,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
- if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
+ if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-AFI->getCalleeSavedStackSize() + 16, TII,
- MachineInstr::FrameDestroy);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
- MachineInstr::FrameDestroy);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
@@ -1010,8 +1435,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
- AfterCSRPopSize, TII, MachineInstr::FrameDestroy);
+ AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI);
}
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1084,6 +1513,14 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// being in range for direct access. If the FPOffset is positive,
// that'll always be best, as the SP will be even further away.
UseFP = true;
+ } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
+ // Funclets access the locals contained in the parent's stack frame
+ // via the frame pointer, so we have to use the FP in the parent
+ // function.
+ assert(
+ Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
+ "Funclets should only be present on Win64");
+ UseFP = true;
} else {
// We have the choice between FP and (SP or BP).
if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
@@ -1136,6 +1573,23 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
Attrs.hasAttrSomewhere(Attribute::SwiftError));
}
+static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
+ bool NeedsWinCFI) {
+ // If we are generating register pairs for a Windows function that requires
+ // EH support, then pair consecutive registers only. There are no unwind
+ // opcodes for saves/restores of non-consectuve register pairs.
+ // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
+ // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+
+ // TODO: LR can be paired with any register. We don't support this yet in
+ // the MCLayer. We need to add support for the save_lrpair unwind code.
+ if (!NeedsWinCFI)
+ return false;
+ if (Reg2 == Reg1 + 1)
+ return false;
+ return true;
+}
+
namespace {
struct RegPairInfo {
@@ -1143,7 +1597,7 @@ struct RegPairInfo {
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
- bool IsGPR;
+ enum RegType { GPR, FPR64, FPR128 } Type;
RegPairInfo() = default;
@@ -1160,6 +1614,7 @@ static void computeCalleeSaveRegisterPairs(
if (CSI.empty())
return;
+ bool NeedsWinCFI = needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineFrameInfo &MFI = MF.getFrameInfo();
CallingConv::ID CC = MF.getFunction().getCallingConv();
@@ -1172,28 +1627,50 @@ static void computeCalleeSaveRegisterPairs(
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int Offset = AFI->getCalleeSavedStackSize();
-
+ // On Linux, we will have either one or zero non-paired register. On Windows
+ // with CFI, we can have multiple unpaired registers in order to utilize the
+ // available unwind codes. This flag assures that the alignment fixup is done
+ // only once, as intened.
+ bool FixupDone = false;
for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
- assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
- AArch64::FPR64RegClass.contains(RPI.Reg1));
- RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
+ if (AArch64::GPR64RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::GPR;
+ else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::FPR64;
+ else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::FPR128;
+ else
+ llvm_unreachable("Unsupported register class.");
// Add the next reg to the pair if it is in the same register class.
if (i + 1 < Count) {
unsigned NextReg = CSI[i + 1].getReg();
- if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
- (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
- RPI.Reg2 = NextReg;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ if (AArch64::GPR64RegClass.contains(NextReg) &&
+ !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ RPI.Reg2 = NextReg;
+ break;
+ case RegPairInfo::FPR64:
+ if (AArch64::FPR64RegClass.contains(NextReg) &&
+ !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ RPI.Reg2 = NextReg;
+ break;
+ case RegPairInfo::FPR128:
+ if (AArch64::FPR128RegClass.contains(NextReg))
+ RPI.Reg2 = NextReg;
+ break;
+ }
}
// If either of the registers to be saved is the lr register, it means that
// we also need to save lr in the shadow call stack.
if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
- if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
+ if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
report_fatal_error("Must reserve x18 to use shadow call stack");
NeedShadowCallStackProlog = true;
}
@@ -1219,17 +1696,22 @@ static void computeCalleeSaveRegisterPairs(
RPI.FrameIdx = CSI[i].getFrameIdx();
- if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
- // Round up size of non-pair to pair size if we need to pad the
- // callee-save area to ensure 16-byte alignment.
- Offset -= 16;
+ int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
+ Offset -= RPI.isPaired() ? 2 * Scale : Scale;
+
+ // Round up size of non-pair to pair size if we need to pad the
+ // callee-save area to ensure 16-byte alignment.
+ if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
+ RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
+ FixupDone = true;
+ Offset -= 8;
+ assert(Offset % 16 == 0);
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
MFI.setObjectAlignment(RPI.FrameIdx, 16);
- AFI->setCalleeSaveStackHasFreeSpace(true);
- } else
- Offset -= RPI.isPaired() ? 16 : 8;
- assert(Offset % 8 == 0);
- RPI.Offset = Offset / 8;
+ }
+
+ assert(Offset % Scale == 0);
+ RPI.Offset = Offset / Scale;
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
"Offset out of bounds for LDP/STP immediate");
@@ -1245,6 +1727,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ bool NeedsWinCFI = needsWinCFI(MF);
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
@@ -1262,6 +1745,27 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
.addImm(8)
.setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, CFIInst));
+ BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// This instruction also makes x18 live-in to the entry block.
MBB.addLiveIn(AArch64::X18);
}
@@ -1283,16 +1787,41 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rationale and sequence for restores in epilog.
- if (RPI.IsGPR)
- StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
- else
- StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+ unsigned Size, Align;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR64:
+ StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR128:
+ StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
+ Size = 16;
+ Align = 16;
+ break;
+ }
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
dbgs() << ")\n");
+ assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
+ "Windows unwdinding requires a consecutive (FP,LR) pair");
+ // Windows unwind codes require consecutive registers if registers are
+ // paired. Make the switch here, so that the code below will save (x,x+1)
+ // and not (x+1,x).
+ unsigned FrameIdxReg1 = RPI.FrameIdx;
+ unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
+ if (NeedsWinCFI && RPI.isPaired()) {
+ std::swap(Reg1, Reg2);
+ std::swap(FrameIdxReg1, FrameIdxReg2);
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (!MRI.isReserved(Reg1))
MBB.addLiveIn(Reg1);
@@ -1301,16 +1830,20 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
- MachineMemOperand::MOStore, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
+ MachineMemOperand::MOStore, Size, Align));
}
MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
- .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
+ .addImm(RPI.Offset) // [sp, #offset*scale],
+ // where factor*scale is implicit
.setMIFlag(MachineInstr::FrameSetup);
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
- MachineMemOperand::MOStore, 8, 8));
+ MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
+ MachineMemOperand::MOStore, Size, Align));
+ if (NeedsWinCFI)
+ InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+
}
return true;
}
@@ -1323,6 +1856,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
+ bool NeedsWinCFI = needsWinCFI(MF);
if (MI != MBB.end())
DL = MI->getDebugLoc();
@@ -1344,32 +1878,57 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// ldp x22, x21, [sp, #0] // addImm(+0)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
- if (RPI.IsGPR)
- LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
- else
- LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+ unsigned Size, Align;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR64:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR128:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
+ Size = 16;
+ Align = 16;
+ break;
+ }
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
dbgs() << ")\n");
+ // Windows unwind codes require consecutive registers if registers are
+ // paired. Make the switch here, so that the code below will save (x,x+1)
+ // and not (x+1,x).
+ unsigned FrameIdxReg1 = RPI.FrameIdx;
+ unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
+ if (NeedsWinCFI && RPI.isPaired()) {
+ std::swap(Reg1, Reg2);
+ std::swap(FrameIdxReg1, FrameIdxReg2);
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
if (RPI.isPaired()) {
MIB.addReg(Reg2, getDefRegState(true));
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
- MachineMemOperand::MOLoad, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
+ MachineMemOperand::MOLoad, Size, Align));
}
MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
- .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+ .addImm(RPI.Offset) // [sp, #offset*scale]
+ // where factor*scale is implicit
.setMIFlag(MachineInstr::FrameDestroy);
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
- MachineMemOperand::MOLoad, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
+ MachineMemOperand::MOLoad, Size, Align));
+ if (NeedsWinCFI)
+ InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
-
if (ReverseCSRRestoreSeq)
for (const RegPairInfo &RPI : reverse(RegPairs))
EmitMI(RPI);
@@ -1406,30 +1965,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
MachineFrameInfo &MFI = MF.getFrameInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
? RegInfo->getBaseRegister()
: (unsigned)AArch64::NoRegister;
- unsigned SpillEstimate = SavedRegs.count();
- for (unsigned i = 0; CSRegs[i]; ++i) {
- unsigned Reg = CSRegs[i];
- unsigned PairedReg = CSRegs[i ^ 1];
- if (Reg == BasePointerReg)
- SpillEstimate++;
- if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
- SpillEstimate++;
- }
- SpillEstimate += 2; // Conservatively include FP+LR in the estimate
- unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
-
- // The frame record needs to be created by saving the appropriate registers
- if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
- SavedRegs.set(AArch64::FP);
- SavedRegs.set(AArch64::LR);
- }
-
unsigned ExtraCSSpill = 0;
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -1453,7 +1994,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
- if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
+ if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
+ !SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
!RegInfo->isReservedReg(MF, PairedReg))
@@ -1461,6 +2003,24 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Calculates the callee saved stack size.
+ unsigned CSStackSize = 0;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned Reg : SavedRegs.set_bits())
+ CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
+
+ // Save number of saved regs, so we can easily update CSStackSize later.
+ unsigned NumSavedRegs = SavedRegs.count();
+
+ // The frame record needs to be created by saving the appropriate registers
+ unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
+ if (hasFP(MF) ||
+ windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
+ SavedRegs.set(AArch64::FP);
+ SavedRegs.set(AArch64::LR);
+ }
+
LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
for (unsigned Reg
: SavedRegs.set_bits()) dbgs()
@@ -1468,15 +2028,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumRegsSpilled = SavedRegs.count();
- bool CanEliminateFrame = NumRegsSpilled == 0;
+ bool CanEliminateFrame = SavedRegs.count() == 0;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
- unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
- LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
- bool BigStack = (CFSize > EstimatedStackSizeLimit);
+ bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
@@ -1497,7 +2054,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (produceCompactUnwindFrame(MF))
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = UnspilledCSGPRPaired;
- NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
@@ -1514,9 +2070,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Adding the size of additional 64bit GPR saves.
+ CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
+ unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
+ LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
+ << EstimatedStackSize + AlignedCSStackSize
+ << " bytes.\n");
+
// Round up to register pair alignment to avoid additional SP adjustment
// instructions.
- AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
+ AFI->setCalleeSavedStackSize(AlignedCSStackSize);
+ AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
}
bool AArch64FrameLowering::enableStackSlotScavenging(
@@ -1524,3 +2088,69 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return AFI->hasCalleeSaveStackFreeSpace();
}
+
+void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ // If this function isn't doing Win64-style C++ EH, we don't need to do
+ // anything.
+ if (!MF.hasEHFunclets())
+ return;
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
+
+ MachineBasicBlock &MBB = MF.front();
+ auto MBBI = MBB.begin();
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
+ if (MBBI->isTerminator())
+ return;
+
+ // Create an UnwindHelp object.
+ int UnwindHelpFI =
+ MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
+ EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
+ // We need to store -2 into the UnwindHelp object at the start of the
+ // function.
+ DebugLoc DL;
+ RS->enterBasicBlock(MBB);
+ unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0);
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
+ .addReg(DstReg, getKillRegState(true))
+ .addFrameIndex(UnwindHelpFI)
+ .addImm(0);
+}
+
+/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
+/// the update. This is easily retrieved as it is exactly the offset that is set
+/// in processFunctionBeforeFrameFinalized.
+int AArch64FrameLowering::getFrameIndexReferencePreferSP(
+ const MachineFunction &MF, int FI, unsigned &FrameReg,
+ bool IgnoreSPUpdates) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
+ << MFI.getObjectOffset(FI) << "\n");
+ FrameReg = AArch64::SP;
+ return MFI.getObjectOffset(FI);
+}
+
+/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
+/// the parent's frame pointer
+unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
+ const MachineFunction &MF) const {
+ return 0;
+}
+
+/// Funclets only need to account for space for the callee saved registers,
+/// as the locals are accounted for in the parent's stack frame.
+unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
+ const MachineFunction &MF) const {
+ // This is the size of the pushed CSRs.
+ unsigned CSSize =
+ MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
+ // This is the amount of stack a funclet needs to allocate.
+ return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
+ getStackAlignment());
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 104e52b5f1f3..0d0385acf46e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -69,6 +69,17 @@ public:
bool enableStackSlotScavenging(const MachineFunction &MF) const override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const override;
+
+ unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
+
+ unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
+
+ int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+ unsigned &FrameReg,
+ bool IgnoreSPUpdates) const override;
+
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c1a9ee333b62..fc9855f6a0da 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1208,9 +1208,8 @@ void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
CurDAG->RemoveDeadNode(N);
}
@@ -1261,9 +1260,8 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
}
@@ -1441,9 +1439,8 @@ void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
}
@@ -1476,9 +1473,8 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
}
@@ -2091,8 +2087,7 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
(void)BitWidth;
assert(BitWidth == 32 || BitWidth == 64);
- KnownBits Known;
- CurDAG->computeKnownBits(Op, Known);
+ KnownBits Known = CurDAG->computeKnownBits(Op);
// Non-zero in the sense that they're not provably zero, which is the key
// point if we want to use this value
@@ -2171,8 +2166,7 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
// Compute the Known Zero for the AND as this allows us to catch more general
// cases than just looking for AND with imm.
- KnownBits Known;
- CurDAG->computeKnownBits(And, Known);
+ KnownBits Known = CurDAG->computeKnownBits(And);
// Non-zero in the sense that they're not provably zero, which is the key
// point if we want to use this value.
@@ -2313,8 +2307,7 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
// This allows to catch more general case than just looking for
// AND with imm. Indeed, simplify-demanded-bits may have removed
// the AND instruction because it proves it was useless.
- KnownBits Known;
- CurDAG->computeKnownBits(OrOpd1Val, Known);
+ KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
// Check if there is enough room for the second operand to appear
// in the first one
@@ -2690,7 +2683,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
unsigned Reg = PMapper->Encoding;
uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
unsigned State;
- if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
+ if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
assert(Immed < 2 && "Bad imm");
State = AArch64::MSRpstateImm1;
} else {
@@ -2751,9 +2744,8 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
Opcode, SDLoc(N),
CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
@@ -2923,9 +2915,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
MVT::Other, MemAddr, Chain);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
- cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp =
+ cast<MemIntrinsicSDNode>(Node)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
ReplaceNode(Node, Ld);
return;
}
@@ -2944,9 +2936,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp =
+ cast<MemIntrinsicSDNode>(Node)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(Node, St);
return;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cfc7aa96d31f..e01ca14d7f63 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -187,7 +187,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
@@ -333,36 +333,38 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::v4f16, Promote);
- setOperationAction(ISD::FREM, MVT::v8f16, Promote);
+ setOperationAction(ISD::FREM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FREM, MVT::v8f16, Expand);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
- setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
- setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
- setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
- setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
if (!Subtarget->hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
@@ -385,8 +387,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
@@ -450,8 +452,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FMINNUM, Ty, Legal);
setOperationAction(ISD::FMAXNUM, Ty, Legal);
- setOperationAction(ISD::FMINNAN, Ty, Legal);
- setOperationAction(ISD::FMAXNAN, Ty, Legal);
+ setOperationAction(ISD::FMINIMUM, Ty, Legal);
+ setOperationAction(ISD::FMAXIMUM, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
@@ -463,8 +465,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -714,8 +716,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
- setOperationAction(ISD::MULHS, VT, Custom);
- setOperationAction(ISD::MULHU, VT, Custom);
+ setOperationAction(ISD::MULHS, VT, Legal);
+ setOperationAction(ISD::MULHU, VT, Legal);
} else {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
@@ -792,9 +794,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
for (MVT InnerVT : MVT::all_valuetypes())
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
- // CNT supports only B element sizes.
+ // CNT supports only B element sizes, then use UADDLP to widen.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
- setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
@@ -816,8 +818,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
if (VT.isFloatingPoint() &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
- for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
- ISD::FMINNUM, ISD::FMAXNUM})
+ for (unsigned Opcode :
+ {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
setOperationAction(Opcode, VT, Legal);
if (Subtarget->isLittleEndian()) {
@@ -993,8 +995,8 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
break;
case AArch64ISD::CSEL: {
KnownBits Known2;
- DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
- DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
@@ -1086,6 +1088,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::FIRST_NUMBER: break;
case AArch64ISD::CALL: return "AArch64ISD::CALL";
case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
+ case AArch64ISD::ADR: return "AArch64ISD::ADR";
case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
@@ -1272,6 +1275,20 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
return EndBB;
}
+MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
+ MachineInstr &MI, MachineBasicBlock *BB) const {
+ assert(!isAsynchronousEHPersonality(classifyEHPersonality(
+ BB->getParent()->getFunction().getPersonalityFn())) &&
+ "SEH does not use catchret!");
+ return BB;
+}
+
+MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
+ MachineInstr &MI, MachineBasicBlock *BB) const {
+ MI.eraseFromParent();
+ return BB;
+}
+
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
@@ -1287,6 +1304,11 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
+
+ case AArch64::CATCHRET:
+ return EmitLoweredCatchRet(MI, BB);
+ case AArch64::CATCHPAD:
+ return EmitLoweredCatchPad(MI, BB);
}
}
@@ -1459,6 +1481,21 @@ static bool isLegalArithImmed(uint64_t C) {
return IsLegal;
}
+// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
+// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
+// can be set differently by this operation. It comes down to whether
+// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
+// everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+//
+// So, finally, the only LLVM-native comparisons that don't mention C and V
+// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
+// the absence of information about op2.
+static bool isCMN(SDValue Op, ISD::CondCode CC) {
+ return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE);
+}
+
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
@@ -1481,20 +1518,15 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
- if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
- // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
- // can be set differently by this operation. It comes down to whether
- // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
- // everything is fine. If not then the optimization is wrong. Thus general
- // comparisons are only valid if op2 != 0.
-
- // So, finally, the only LLVM-native comparisons that don't mention C and V
- // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
- // the absence of information about op2.
+ if (isCMN(RHS, CC)) {
+ // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
+ } else if (isCMN(LHS, CC)) {
+ // As we are looking for EQ/NE compares, the operands can be commuted ; can
+ // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
+ Opcode = AArch64ISD::ADDS;
+ LHS = LHS.getOperand(1);
} else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
!isUnsignedIntSetCC(CC)) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -1796,6 +1828,42 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
/// @}
+/// Returns how profitable it is to fold a comparison's operand's shift and/or
+/// extension operations.
+static unsigned getCmpOperandFoldingProfit(SDValue Op) {
+ auto isSupportedExtend = [&](SDValue V) {
+ if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
+ return true;
+
+ if (V.getOpcode() == ISD::AND)
+ if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ uint64_t Mask = MaskCst->getZExtValue();
+ return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
+ }
+
+ return false;
+ };
+
+ if (!Op.hasOneUse())
+ return 0;
+
+ if (isSupportedExtend(Op))
+ return 1;
+
+ unsigned Opc = Op.getOpcode();
+ if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
+ if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ uint64_t Shift = ShiftCst->getZExtValue();
+ if (isSupportedExtend(Op.getOperand(0)))
+ return (Shift <= 4) ? 2 : 1;
+ EVT VT = Op.getValueType();
+ if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
+ return 1;
+ }
+
+ return 0;
+}
+
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
@@ -1853,6 +1921,27 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
}
+
+ // Comparisons are canonicalized so that the RHS operand is simpler than the
+ // LHS one, the extreme case being when RHS is an immediate. However, AArch64
+ // can fold some shift+extend operations on the RHS operand, so swap the
+ // operands if that can be done.
+ //
+ // For example:
+ // lsl w13, w11, #1
+ // cmp w13, w12
+ // can be turned into:
+ // cmp w12, w11, lsl #1
+ if (!isa<ConstantSDNode>(RHS) ||
+ !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
+ SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
+
+ if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
+ std::swap(LHS, RHS);
+ CC = ISD::getSetCCSwappedOperands(CC);
+ }
+ }
+
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
@@ -2619,66 +2708,6 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
-// Lower vector multiply high (ISD::MULHS and ISD::MULHU).
-static SDValue LowerMULH(SDValue Op, SelectionDAG &DAG) {
- // Multiplications are only custom-lowered for 128-bit vectors so that
- // {S,U}MULL{2} can be detected. Otherwise v2i64 multiplications are not
- // legal.
- EVT VT = Op.getValueType();
- assert(VT.is128BitVector() && VT.isInteger() &&
- "unexpected type for custom-lowering ISD::MULH{U,S}");
-
- SDValue V0 = Op.getOperand(0);
- SDValue V1 = Op.getOperand(1);
-
- SDLoc DL(Op);
-
- EVT ExtractVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
-
- // We turn (V0 mulhs/mulhu V1) to:
- //
- // (uzp2 (smull (extract_subvector (ExtractVT V128:V0, (i64 0)),
- // (extract_subvector (ExtractVT V128:V1, (i64 0))))),
- // (smull (extract_subvector (ExtractVT V128:V0, (i64 VMull2Idx)),
- // (extract_subvector (ExtractVT V128:V2, (i64 VMull2Idx))))))
- //
- // Where ExtractVT is a subvector with half number of elements, and
- // VMullIdx2 is the index of the middle element (the high part).
- //
- // The vector hight part extract and multiply will be matched against
- // {S,U}MULL{v16i8_v8i16,v8i16_v4i32,v4i32_v2i64} which in turn will
- // issue a {s}mull2 instruction.
- //
- // This basically multiply the lower subvector with '{s,u}mull', the high
- // subvector with '{s,u}mull2', and shuffle both results high part in
- // resulting vector.
- unsigned Mull2VectorIdx = VT.getVectorNumElements () / 2;
- SDValue VMullIdx = DAG.getConstant(0, DL, MVT::i64);
- SDValue VMull2Idx = DAG.getConstant(Mull2VectorIdx, DL, MVT::i64);
-
- SDValue VMullV0 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMullIdx);
- SDValue VMullV1 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMullIdx);
-
- SDValue VMull2V0 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMull2Idx);
- SDValue VMull2V1 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMull2Idx);
-
- unsigned MullOpc = Op.getOpcode() == ISD::MULHS ? AArch64ISD::SMULL
- : AArch64ISD::UMULL;
-
- EVT MullVT = ExtractVT.widenIntegerVectorElementType(*DAG.getContext());
- SDValue Mull = DAG.getNode(MullOpc, DL, MullVT, VMullV0, VMullV1);
- SDValue Mull2 = DAG.getNode(MullOpc, DL, MullVT, VMull2V0, VMull2V1);
-
- Mull = DAG.getNode(ISD::BITCAST, DL, VT, Mull);
- Mull2 = DAG.getNode(ISD::BITCAST, DL, VT, Mull2);
-
- return DAG.getNode(AArch64ISD::UZP2, DL, VT, Mull, Mull2);
-}
-
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -2689,9 +2718,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
}
- case Intrinsic::aarch64_neon_abs:
- return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
- Op.getOperand(1));
+ case Intrinsic::aarch64_neon_abs: {
+ EVT Ty = Op.getValueType();
+ if (Ty == MVT::i64) {
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
+ Op.getOperand(1));
+ Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
+ } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
+ return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
+ } else {
+ report_fatal_error("Unexpected type for AArch64 NEON intrinic");
+ }
+ }
case Intrinsic::aarch64_neon_smax:
return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
@@ -2794,6 +2833,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerSELECT_CC(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT:
+ return LowerBR_JT(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
case ISD::BlockAddress:
@@ -2830,8 +2871,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
+ case ISD::SPONENTRY:
+ return LowerSPONENTRY(Op, DAG);
case ISD::RETURNADDR:
return LowerRETURNADDR(Op, DAG);
+ case ISD::ADDROFRETURNADDR:
+ return LowerADDROFRETURNADDR(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
@@ -2875,9 +2920,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
- case ISD::MULHS:
- case ISD::MULHU:
- return LowerMULH(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:
@@ -2927,6 +2969,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
+ case CallingConv::AArch64_VectorCall:
+ return CC_AArch64_AAPCS;
}
}
@@ -3113,6 +3157,17 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// We currently pass all varargs at 8-byte alignment.
StackOffset = ((StackOffset + 7) & ~7);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
+
+ if (MFI.hasMustTailInVarArgFunc()) {
+ SmallVector<MVT, 2> RegParmTypes;
+ RegParmTypes.push_back(MVT::i64);
+ RegParmTypes.push_back(MVT::f128);
+ // Compute the set of forwarded registers. The rest are scratch.
+ SmallVectorImpl<ForwardedRegister> &Forwards =
+ FuncInfo->getForwardedMustTailRegParms();
+ CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
+ CC_AArch64_AAPCS);
+ }
}
unsigned StackArgSize = CCInfo.getNextStackOffset();
@@ -3135,6 +3190,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// much is there while considering tail calls (because we can reuse it).
FuncInfo->setBytesInStackArgArea(StackArgSize);
+ if (Subtarget->hasCustomCallingConv())
+ Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
+
return Chain;
}
@@ -3365,6 +3423,10 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
+ if (Subtarget->hasCustomCallingConv()) {
+ TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
+ TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
+ }
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
@@ -3566,6 +3628,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<SDValue, 8> MemOpChains;
auto PtrVT = getPointerTy(DAG.getDataLayout());
+ if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
+ const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
+ for (const auto &F : Forwards) {
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
+ RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
+ }
+ }
+
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
++i, ++realArgIdx) {
@@ -3758,6 +3828,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
} else
Mask = TRI->getCallPreservedMask(MF, CallConv);
+ if (Subtarget->hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(MF, &Mask);
+
+ if (TRI->isAnyArgRegReserved(MF))
+ TRI->emitReservedArgRegCallError(MF);
+
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3944,13 +4020,21 @@ SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
}
+// (adr sym)
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
+ unsigned Flags) const {
+ LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
+ return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
+}
+
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
- const AArch64II::TOF TargetFlags =
- (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
- : AArch64II::MO_NO_FLAG);
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
@@ -3958,20 +4042,23 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node");
- // This also catches the large code model case for Darwin.
+ // This also catches the large code model case for Darwin, and tiny code
+ // model with got relocations.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
- return getGOT(GN, DAG, TargetFlags);
+ return getGOT(GN, DAG, OpFlags);
}
SDValue Result;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
- Result = getAddrLarge(GN, DAG, TargetFlags);
+ Result = getAddrLarge(GN, DAG, OpFlags);
+ } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ Result = getAddrTiny(GN, DAG, OpFlags);
} else {
- Result = getAddr(GN, DAG, TargetFlags);
+ Result = getAddr(GN, DAG, OpFlags);
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(GN);
- if (GV->hasDLLImportStorageClass())
+ if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
@@ -4036,8 +4123,10 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const uint32_t *Mask = TRI->getTLSCallPreservedMask();
+ if (Subtarget->hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
@@ -4087,13 +4176,15 @@ SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
- assert(Subtarget->useSmallAddressing() &&
- "ELF TLS only supported in small memory model");
+ if (getTargetMachine().getCodeModel() == CodeModel::Large)
+ report_fatal_error("ELF TLS only supported in small memory model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
// maximum TLS size is 4GiB.
// FIXME: add -mtls-size command line option and make it control the 16MiB
// vs. 4GiB code sequence generation.
+ // FIXME: add tiny codemodel support. We currently generate the same code as
+ // small, which may be larger than needed.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -4264,6 +4355,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ bool ProduceNonFlagSettingCondBr =
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+
// Handle f128 first, since lowering it will result in comparing the return
// value of a libcall against zero, which is just what the rest of LowerBR_CC
// is expecting to deal with.
@@ -4306,7 +4404,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// If the RHS of the comparison is zero, we can potentially fold this
// to a specialized branch.
const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
- if (RHSC && RHSC->getZExtValue() == 0) {
+ if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
if (CC == ISD::SETEQ) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
@@ -4349,7 +4447,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
- LHS.getOpcode() != ISD::AND) {
+ LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
@@ -4478,18 +4576,42 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
- if (VT == MVT::i32)
- Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
- Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
+ if (VT == MVT::i32 || VT == MVT::i64) {
+ if (VT == MVT::i32)
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
- SDValue UaddLV = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
- DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
+ SDValue UaddLV = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
- if (VT == MVT::i64)
- UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
- return UaddLV;
+ if (VT == MVT::i64)
+ UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
+ return UaddLV;
+ }
+
+ assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
+ VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
+ "Unexpected type for custom ctpop lowering");
+
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ Val = DAG.getBitcast(VT8Bit, Val);
+ Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
+
+ // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
+ unsigned EltSize = 8;
+ unsigned NumElts = VT.is64BitVector() ? 8 : 16;
+ while (EltSize != VT.getScalarSizeInBits()) {
+ EltSize *= 2;
+ NumElts /= 2;
+ MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
+ Val = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
+ DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
+ }
+
+ return Val;
}
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -4811,10 +4933,28 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(JT, DAG);
+ } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ return getAddrTiny(JT, DAG);
}
return getAddr(JT, DAG);
}
+SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Jump table entries as PC relative offsets. No additional tweaking
+ // is necessary here. Just get the address of the jump table.
+ SDLoc DL(Op);
+ SDValue JT = Op.getOperand(1);
+ SDValue Entry = Op.getOperand(2);
+ int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
+
+ SDNode *Dest =
+ DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
+ Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
+ return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
+ SDValue(Dest, 0));
+}
+
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
@@ -4825,6 +4965,8 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
return getGOT(CP, DAG);
}
return getAddrLarge(CP, DAG);
+ } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ return getAddrTiny(CP, DAG);
} else {
return getAddr(CP, DAG);
}
@@ -4836,9 +4978,10 @@ SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(BA, DAG);
- } else {
- return getAddr(BA, DAG);
+ } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ return getAddrTiny(BA, DAG);
}
+ return getAddr(BA, DAG);
}
SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
@@ -5044,21 +5187,59 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
return FrameAddr;
}
+SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ EVT VT = getPointerTy(DAG.getDataLayout());
+ SDLoc DL(Op);
+ int FI = MFI.CreateFixedObject(4, 0, false);
+ return DAG.getFrameIndex(FI, VT);
+}
+
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", AArch64::SP)
+ .Case("x1", AArch64::X1)
+ .Case("w1", AArch64::W1)
+ .Case("x2", AArch64::X2)
+ .Case("w2", AArch64::W2)
+ .Case("x3", AArch64::X3)
+ .Case("w3", AArch64::W3)
+ .Case("x4", AArch64::X4)
+ .Case("w4", AArch64::W4)
+ .Case("x5", AArch64::X5)
+ .Case("w5", AArch64::W5)
+ .Case("x6", AArch64::X6)
+ .Case("w6", AArch64::W6)
+ .Case("x7", AArch64::X7)
+ .Case("w7", AArch64::W7)
.Case("x18", AArch64::X18)
.Case("w18", AArch64::W18)
.Case("x20", AArch64::X20)
.Case("w20", AArch64::W20)
.Default(0);
- if (((Reg == AArch64::X18 || Reg == AArch64::W18) &&
- !Subtarget->isX18Reserved()) ||
+ if (((Reg == AArch64::X1 || Reg == AArch64::W1) &&
+ !Subtarget->isXRegisterReserved(1)) ||
+ ((Reg == AArch64::X2 || Reg == AArch64::W2) &&
+ !Subtarget->isXRegisterReserved(2)) ||
+ ((Reg == AArch64::X3 || Reg == AArch64::W3) &&
+ !Subtarget->isXRegisterReserved(3)) ||
+ ((Reg == AArch64::X4 || Reg == AArch64::W4) &&
+ !Subtarget->isXRegisterReserved(4)) ||
+ ((Reg == AArch64::X5 || Reg == AArch64::W5) &&
+ !Subtarget->isXRegisterReserved(5)) ||
+ ((Reg == AArch64::X6 || Reg == AArch64::W6) &&
+ !Subtarget->isXRegisterReserved(6)) ||
+ ((Reg == AArch64::X7 || Reg == AArch64::W7) &&
+ !Subtarget->isXRegisterReserved(7)) ||
+ ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
+ !Subtarget->isXRegisterReserved(18)) ||
((Reg == AArch64::X20 || Reg == AArch64::W20) &&
- !Subtarget->isX20Reserved()))
+ !Subtarget->isXRegisterReserved(20)))
Reg = 0;
if (Reg)
return Reg;
@@ -5066,6 +5247,20 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ StringRef(RegName) + "\"."));
}
+SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ SDValue FrameAddr =
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
+ SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
+
+ return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
+}
+
SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -5208,40 +5403,29 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// FIXME: We should be able to handle f128 as well with a clever lowering.
if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
(VT == MVT::f16 && Subtarget->hasFullFP16()))) {
- LLVM_DEBUG(
- dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
+ LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n");
return true;
}
- StringRef FPType;
bool IsLegal = false;
SmallString<128> ImmStrVal;
Imm.toString(ImmStrVal);
- if (VT == MVT::f64) {
- FPType = "f64";
+ if (VT == MVT::f64)
IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
- } else if (VT == MVT::f32) {
- FPType = "f32";
+ else if (VT == MVT::f32)
IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
- } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
- FPType = "f16";
+ else if (VT == MVT::f16 && Subtarget->hasFullFP16())
IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
- }
if (IsLegal) {
- LLVM_DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal
- << "\n");
+ LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()
+ << " imm value: " << ImmStrVal << "\n");
return true;
}
- if (!FPType.empty())
- LLVM_DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal
- << "\n");
- else
- LLVM_DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal
- << ": unsupported fp type\n");
-
+ LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()
+ << " imm value: " << ImmStrVal << "\n");
return false;
}
@@ -5453,6 +5637,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
case 'w':
+ if (!Subtarget->hasFPARMv8())
+ break;
if (VT.getSizeInBits() == 16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
if (VT.getSizeInBits() == 32)
@@ -5465,6 +5651,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
// The instructions that this constraint is designed for can
// only take 128-bit registers so just use that regclass.
case 'x':
+ if (!Subtarget->hasFPARMv8())
+ break;
if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &AArch64::FPR128_loRegClass);
break;
@@ -5500,6 +5688,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
}
}
+ if (Res.second && !Subtarget->hasFPARMv8() &&
+ !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
+ !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
+ return std::make_pair(0U, nullptr);
+
return Res;
}
@@ -6921,10 +7114,19 @@ static SDValue NormalizeBuildVector(SDValue Op,
SmallVector<SDValue, 16> Ops;
for (SDValue Lane : Op->ops()) {
+ // For integer vectors, type legalization would have promoted the
+ // operands already. Otherwise, if Op is a floating-point splat
+ // (with operands cast to integers), then the only possibilities
+ // are constants and UNDEFs.
if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
APInt LowBits(EltTy.getSizeInBits(),
CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
+ } else if (Lane.getNode()->isUndef()) {
+ Lane = DAG.getUNDEF(MVT::i32);
+ } else {
+ assert(Lane.getValueType() == MVT::i32 &&
+ "Unexpected BUILD_VECTOR operand type");
}
Ops.push_back(Lane);
}
@@ -7050,7 +7252,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getUNDEF(VT);
}
- if (isOnlyLowElement) {
+ // Convert BUILD_VECTOR where all elements but the lowest are undef into
+ // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
+ // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
+ if (isOnlyLowElement && !(NumElts == 1 && isa<ConstantSDNode>(Value))) {
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n");
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
@@ -7632,7 +7837,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
if (ShouldInvert)
- return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
+ Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
return Cmp;
}
@@ -7718,8 +7923,10 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getWindowsStackProbePreservedMask();
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
+ if (Subtarget->hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
@@ -7827,7 +8034,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
- for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
@@ -7891,6 +8098,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
+ // TODO: This may be worth removing. Check regression tests for diffs.
+ if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
+ return false;
+
// If we're reducing the load width in order to avoid having to use an extra
// instruction to do extension then it's probably a good idea.
if (ExtTy != ISD::NON_EXTLOAD)
@@ -8348,27 +8559,30 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
- // instruction to materialize the v2i64 zero and one store (with restrictive
- // addressing mode). Just do two i64 store of zero-registers.
- bool Fast;
const Function &F = MF.getFunction();
- if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
- !F.hasFnAttribute(Attribute::NoImplicitFloat) &&
- (memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
- return MVT::f128;
+ bool CanImplicitFloat = !F.hasFnAttribute(Attribute::NoImplicitFloat);
+ bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
+ bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
+ // Only use AdvSIMD to implement memset of 32-byte and above. It would have
+ // taken one instruction to materialize the v2i64 zero and one store (with
+ // restrictive addressing mode). Just do i64 stores.
+ bool IsSmallMemset = IsMemset && Size < 32;
+ auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
+ if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
+ return true;
+ bool Fast;
+ return allowsMisalignedMemoryAccesses(VT, 0, 1, &Fast) && Fast;
+ };
- if (Size >= 8 &&
- (memOpAlign(SrcAlign, DstAlign, 8) ||
- (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+ if (CanUseNEON && IsMemset && !IsSmallMemset &&
+ AlignmentIsAcceptable(MVT::v2i64, 16))
+ return MVT::v2i64;
+ if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
+ return MVT::f128;
+ if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
return MVT::i64;
-
- if (Size >= 4 &&
- (memOpAlign(SrcAlign, DstAlign, 4) ||
- (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+ if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
return MVT::i32;
-
return MVT::Other;
}
@@ -8496,7 +8710,9 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
}
bool
-AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const {
+AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const {
+ N = N->getOperand(0).getNode();
EVT VT = N->getValueType(0);
// If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
// it with shift to let it be lowered to UBFX.
@@ -9717,10 +9933,10 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
- return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmin:
- return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmaxnm:
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
@@ -9849,6 +10065,7 @@ static SDValue performExtendCombine(SDNode *N,
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
SDValue SplatVal, unsigned NumVecElts) {
+ assert(!St.isTruncatingStore() && "cannot split truncating vector store");
unsigned OrigAlignment = St.getAlignment();
unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
@@ -9923,6 +10140,11 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
if (!StVal.hasOneUse())
return SDValue();
+ // If the store is truncating then it's going down to i16 or smaller, which
+ // means it can be implemented in a single store anyway.
+ if (St.isTruncatingStore())
+ return SDValue();
+
// If the immediate offset of the address operand is too large for the stp
// instruction, then bail out.
if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
@@ -9973,6 +10195,11 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
if (NumVecElts != 4 && NumVecElts != 2)
return SDValue();
+ // If the store is truncating then it's going down to i16 or smaller, which
+ // means it can be implemented in a single store anyway.
+ if (St.isTruncatingStore())
+ return SDValue();
+
// Check that this is a splat.
// Make sure that each of the relevant vector element locations are inserted
// to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
@@ -10129,15 +10356,6 @@ static SDValue performPostLD1Combine(SDNode *N,
|| UI.getUse().getResNo() != Addr.getResNo())
continue;
- // Check that the add is independent of the load. Otherwise, folding it
- // would create a cycle.
- if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User))
- continue;
- // Also check that add is not used in the vector operand. This would also
- // create a cycle.
- if (User->isPredecessorOf(Vector.getNode()))
- continue;
-
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
@@ -10148,11 +10366,16 @@ static SDValue performPostLD1Combine(SDNode *N,
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
- // Finally, check that the vector doesn't depend on the load.
- // Again, this would create a cycle.
- // The load depending on the vector is fine, as that's the case for the
- // LD1*post we'll eventually generate anyway.
- if (LoadSDN->isPredecessorOf(Vector.getNode()))
+ // To avoid cycle construction make sure that neither the load nor the add
+ // are predecessors to each other or the Vector.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(N);
+ Worklist.push_back(User);
+ Worklist.push_back(LD);
+ Worklist.push_back(Vector.getNode());
+ if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
SmallVector<SDValue, 8> Ops;
@@ -10238,7 +10461,13 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
// Check that the add is independent of the load/store. Otherwise, folding
// it would create a cycle.
- if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Addr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(User);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
// Find the new opcode for the updating load/store.
@@ -10608,6 +10837,13 @@ SDValue performCONDCombine(SDNode *N,
static SDValue performBRCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ return SDValue();
+
if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
N = NV.getNode();
SDValue Chain = N->getOperand(0);
@@ -10865,9 +11101,9 @@ static SDValue performNVCASTCombine(SDNode *N) {
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget,
const TargetMachine &TM) {
- auto *GN = dyn_cast<GlobalAddressSDNode>(N);
- if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
- AArch64II::MO_NO_FLAG)
+ auto *GN = cast<GlobalAddressSDNode>(N);
+ if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
+ AArch64II::MO_NO_FLAG)
return SDValue();
uint64_t MinOffset = -1ull;
@@ -10999,6 +11235,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default:
break;
}
+ break;
case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}
@@ -11196,12 +11433,10 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
N->getOperand(0), // Chain in
};
- MachineFunction &MF = DAG.getMachineFunction();
- MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
unsigned Opcode;
- switch (MemOp[0]->getOrdering()) {
+ switch (MemOp->getOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CASPX;
break;
@@ -11221,7 +11456,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
MachineSDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
- CmpSwap->setMemRefs(MemOp, MemOp + 1);
+ DAG.setNodeMemRefs(CmpSwap, {MemOp});
unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
if (DAG.getDataLayout().isBigEndian())
@@ -11242,10 +11477,8 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
AArch64::CMP_SWAP_128, SDLoc(N),
DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other), Ops);
- MachineFunction &MF = DAG.getMachineFunction();
- MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
Results.push_back(SDValue(CmpSwap, 0));
Results.push_back(SDValue(CmpSwap, 1));
@@ -11310,12 +11543,11 @@ unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
}
TargetLoweringBase::LegalizeTypeAction
-AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
- MVT SVT = VT.getSimpleVT();
+AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
// During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
// v4i16, v2i32 instead of to promote.
- if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
- || SVT == MVT::v1f32)
+ if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
+ VT == MVT::v1f32)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
@@ -11349,16 +11581,20 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
}
-bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
+TargetLowering::AtomicExpansionKind
+AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// If subtarget has LSE, leave cmpxchg intact for codegen.
- if (Subtarget->hasLSE()) return false;
+ if (Subtarget->hasLSE())
+ return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
- return getTargetMachine().getOptLevel() != 0;
+ if (getTargetMachine().getOptLevel() == 0)
+ return AtomicExpansionKind::None;
+ return AtomicExpansionKind::LLSC;
}
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
@@ -11468,6 +11704,39 @@ Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
return TargetLowering::getIRStackGuard(IRB);
}
+void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
+ // MSVC CRT provides functionalities for stack protection.
+ if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
+ // MSVC CRT has a global variable holding security cookie.
+ M.getOrInsertGlobal("__security_cookie",
+ Type::getInt8PtrTy(M.getContext()));
+
+ // MSVC CRT has a function to validate security cookie.
+ auto *SecurityCheckCookie = cast<Function>(
+ M.getOrInsertFunction("__security_check_cookie",
+ Type::getVoidTy(M.getContext()),
+ Type::getInt8PtrTy(M.getContext())));
+ SecurityCheckCookie->setCallingConv(CallingConv::Win64);
+ SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
+ return;
+ }
+ TargetLowering::insertSSPDeclarations(M);
+}
+
+Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
+ // MSVC CRT has a global variable holding security cookie.
+ if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+ return M.getGlobalVariable("__security_cookie");
+ return TargetLowering::getSDagStackGuard(M);
+}
+
+Value *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
+ // MSVC CRT has a function to validate security cookie.
+ if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+ return M.getFunction("__security_check_cookie");
+ return TargetLowering::getSSPStackGuardCheck(M);
+}
+
Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
@@ -11572,3 +11841,8 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
MF.getFrameInfo().computeMaxCallFrameSize(MF);
TargetLoweringBase::finalizeLowering(MF);
}
+
+// Unlike X86, we let frame lowering assign offsets to all catch objects.
+bool AArch64TargetLowering::needsFixedCatchObjects() const {
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d783c8a6048c..ffc4cc3ef534 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -35,6 +35,7 @@ enum NodeType : unsigned {
// offset of a variable into X0, using the TLSDesc model.
TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
+ ADR, // ADR
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
// Offset Table, TLS record).
@@ -301,6 +302,12 @@ public:
MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
@@ -363,7 +370,8 @@ public:
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
- bool isDesirableToCommuteWithShift(const SDNode *N) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
@@ -388,16 +396,21 @@ public:
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
- bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
- getPreferredVectorAction(EVT VT) const override;
+ getPreferredVectorAction(MVT VT) const override;
/// If the target has a standard location for the stack protector cookie,
/// returns the address of that location. Otherwise, returns nullptr.
Value *getIRStackGuard(IRBuilder<> &IRB) const override;
+ void insertSSPDeclarations(Module &M) const override;
+ Value *getSDagStackGuard(const Module &M) const override;
+ Value *getSSPStackGuardCheck(const Module &M) const override;
+
/// If the target has a standard location for the unsafe stack pointer,
/// returns the address of that location. Otherwise, returns nullptr.
Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
@@ -514,6 +527,8 @@ public:
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
CallingConv::ID CallConv,
bool isVarArg) const override;
+ /// Used for exception handling on Win64.
+ bool needsFixedCatchObjects() const override;
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -586,6 +601,8 @@ private:
SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
template <class NodeTy>
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
+ template <class NodeTy>
+ SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -602,6 +619,7 @@ private:
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
@@ -611,6 +629,7 @@ private:
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 7caf32dbde2a..9061ed4f9f54 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -263,6 +263,14 @@ class SImmOperand<int width> : AsmOperandClass {
let PredicateMethod = "isSImm<" # width # ">";
}
+
+class AsmImmRange<int Low, int High> : AsmOperandClass {
+ let Name = "Imm" # Low # "_" # High;
+ let DiagnosticType = "InvalidImm" # Low # "_" # High;
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
+}
+
// Authenticated loads for v8.3 can have scaled 10-bit immediate offsets.
def SImm10s8Operand : SImmScaledMemoryIndexed<10, 8>;
def simm10Scaled : Operand<i64> {
@@ -271,6 +279,12 @@ def simm10Scaled : Operand<i64> {
let PrintMethod = "printImmScale<8>";
}
+def simm9s16 : Operand<i64> {
+ let ParserMatchClass = SImmScaledMemoryIndexed<9, 16>;
+ let DecoderMethod = "DecodeSImm<9>";
+ let PrintMethod = "printImmScale<16>";
+}
+
// uimm6 predicate - True if the immediate is in the range [0, 63].
def UImm6Operand : AsmOperandClass {
let Name = "UImm6";
@@ -281,6 +295,10 @@ def uimm6 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
let ParserMatchClass = UImm6Operand;
}
+def uimm16 : Operand<i16>, ImmLeaf<i16, [{return Imm >= 0 && Imm < 65536;}]>{
+ let ParserMatchClass = AsmImmRange<0, 65535>;
+}
+
def SImm9Operand : SImmOperand<9>;
def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
let ParserMatchClass = SImm9Operand;
@@ -366,6 +384,7 @@ def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;
def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>;
def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>;
def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>;
+def UImm6s16Operand : UImmScaledMemoryIndexed<6, 16>;
def uimm6s1 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
let ParserMatchClass = UImm6s1Operand;
@@ -385,6 +404,11 @@ def uimm6s8 : Operand<i64>, ImmLeaf<i64,
let PrintMethod = "printImmScale<8>";
let ParserMatchClass = UImm6s8Operand;
}
+def uimm6s16 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >= 0 && Imm < (64*16) && ((Imm % 16) == 0); }]> {
+ let PrintMethod = "printImmScale<16>";
+ let ParserMatchClass = UImm6s16Operand;
+}
// simm6sN predicate - True if the immediate is a multiple of N in the range
// [-32 * N, 31 * N].
@@ -435,13 +459,6 @@ def simm4s16 : Operand<i64>, ImmLeaf<i64,
let DecoderMethod = "DecodeSImm<4>";
}
-class AsmImmRange<int Low, int High> : AsmOperandClass {
- let Name = "Imm" # Low # "_" # High;
- let DiagnosticType = "InvalidImm" # Low # "_" # High;
- let RenderMethod = "addImmOperands";
- let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
-}
-
def Imm1_8Operand : AsmImmRange<1, 8>;
def Imm1_16Operand : AsmImmRange<1, 16>;
def Imm1_32Operand : AsmImmRange<1, 32>;
@@ -696,11 +713,10 @@ def logical_imm64_not : Operand<i64> {
}
// imm0_65535 predicate - True if the immediate is in the range [0,65535].
-def Imm0_65535Operand : AsmImmRange<0, 65535>;
def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 65536;
}]> {
- let ParserMatchClass = Imm0_65535Operand;
+ let ParserMatchClass = AsmImmRange<0, 65535>;
let PrintMethod = "printImmHex";
}
@@ -1149,6 +1165,21 @@ def psbhint_op : Operand<i32> {
}];
}
+def BTIHintOperand : AsmOperandClass {
+ let Name = "BTIHint";
+ let ParserMethod = "tryParseBTIHint";
+}
+def btihint_op : Operand<i32> {
+ let ParserMatchClass = BTIHintOperand;
+ let PrintMethod = "printBTIHintOp";
+ let MCOperandPredicate = [{
+ // "bti" is an alias to "hint" only for certain values of CRm:Op2 fields.
+ if (!MCOp.isImm())
+ return false;
+ return AArch64BTIHint::lookupBTIByEncoding((MCOp.getImm() ^ 32) >> 1) != nullptr;
+ }];
+}
+
class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
bits<16> systemreg;
@@ -1173,16 +1204,23 @@ def pstatefield4_op : Operand<i32> {
let PrintMethod = "printSystemPStateField";
}
+// Instructions to modify PSTATE, no input reg
let Defs = [NZCV] in
+class PstateWriteSimple<dag iops, string asm, string operands>
+ : SimpleSystemI<0, iops, asm, operands> {
+
+ let Inst{20-19} = 0b00;
+ let Inst{15-12} = 0b0100;
+}
+
class MSRpstateImm0_15
- : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm),
- "msr", "\t$pstatefield, $imm">,
+ : PstateWriteSimple<(ins pstatefield4_op:$pstatefield, imm0_15:$imm), "msr",
+ "\t$pstatefield, $imm">,
Sched<[WriteSys]> {
+
bits<6> pstatefield;
bits<4> imm;
- let Inst{20-19} = 0b00;
let Inst{18-16} = pstatefield{5-3};
- let Inst{15-12} = 0b0100;
let Inst{11-8} = imm;
let Inst{7-5} = pstatefield{2-0};
@@ -1201,16 +1239,15 @@ def pstatefield1_op : Operand<i32> {
let PrintMethod = "printSystemPStateField";
}
-let Defs = [NZCV] in
class MSRpstateImm0_1
- : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm),
- "msr", "\t$pstatefield, $imm">,
+ : PstateWriteSimple<(ins pstatefield1_op:$pstatefield, imm0_1:$imm), "msr",
+ "\t$pstatefield, $imm">,
Sched<[WriteSys]> {
+
bits<6> pstatefield;
bit imm;
- let Inst{20-19} = 0b00;
let Inst{18-16} = pstatefield{5-3};
- let Inst{15-9} = 0b0100000;
+ let Inst{11-9} = 0b000;
let Inst{8} = imm;
let Inst{7-5} = pstatefield{2-0};
@@ -1719,10 +1756,12 @@ multiclass AddSubCarry<bit isSub, string asm, string asm_setflags,
}
class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm,
- SDPatternOperator OpNode>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ SDPatternOperator OpNode,
+ RegisterClass in1regtype = regtype,
+ RegisterClass in2regtype = regtype>
+ : I<(outs regtype:$Rd), (ins in1regtype:$Rn, in2regtype:$Rm),
asm, "\t$Rd, $Rn, $Rm", "",
- [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> {
+ [(set regtype:$Rd, (OpNode in1regtype:$Rn, in2regtype:$Rm))]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -1902,7 +1941,7 @@ class ADRI<bit page, string asm, Operand adr, list<dag> pattern>
//---
def movimm32_imm : Operand<i32> {
- let ParserMatchClass = Imm0_65535Operand;
+ let ParserMatchClass = AsmImmRange<0, 65535>;
let EncoderMethod = "getMoveWideImmOpValue";
let PrintMethod = "printImm";
}
@@ -1977,23 +2016,29 @@ multiclass InsertImmediate<bits<2> opc, string asm> {
//---
class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype,
- RegisterClass srcRegtype, addsub_shifted_imm immtype,
- string asm, SDPatternOperator OpNode>
- : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
- asm, "\t$Rd, $Rn, $imm", "",
- [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
- Sched<[WriteI, ReadI]> {
+ string asm_inst, string asm_ops,
+ dag inputs, dag pattern>
+ : I<(outs dstRegtype:$Rd), inputs, asm_inst, asm_ops, "", [pattern]>,
+ Sched<[WriteI, ReadI]> {
bits<5> Rd;
bits<5> Rn;
- bits<14> imm;
let Inst{30} = isSub;
let Inst{29} = setFlags;
let Inst{28-24} = 0b10001;
- let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
- let Inst{21-10} = imm{11-0};
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
- let DecoderMethod = "DecodeBaseAddSubImm";
+}
+
+class AddSubImmShift<bit isSub, bit setFlags, RegisterClass dstRegtype,
+ RegisterClass srcRegtype, addsub_shifted_imm immtype,
+ string asm_inst, SDPatternOperator OpNode>
+ : BaseAddSubImm<isSub, setFlags, dstRegtype, asm_inst, "\t$Rd, $Rn, $imm",
+ (ins srcRegtype:$Rn, immtype:$imm),
+ (set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))> {
+ bits<14> imm;
+ let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
+ let Inst{21-10} = imm{11-0};
+ let DecoderMethod = "DecodeAddSubImmShift";
}
class BaseAddSubRegPseudo<RegisterClass regtype,
@@ -2097,12 +2142,12 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
// We used to match the register variant before the immediate when the
// register argument could be implicitly zero-extended.
let AddedComplexity = 6 in
- def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
+ def Wri : AddSubImmShift<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
mnemonic, OpNode> {
let Inst{31} = 0;
}
let AddedComplexity = 6 in
- def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
+ def Xri : AddSubImmShift<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
mnemonic, OpNode> {
let Inst{31} = 1;
}
@@ -2173,11 +2218,11 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
string alias, string cmpAlias> {
let isCompare = 1, Defs = [NZCV] in {
// Add/Subtract immediate
- def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
+ def Wri : AddSubImmShift<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
mnemonic, OpNode> {
let Inst{31} = 0;
}
- def Xri : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
+ def Xri : AddSubImmShift<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
mnemonic, OpNode> {
let Inst{31} = 1;
}
@@ -2271,6 +2316,27 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
GPR64, GPR64sponly, GPR64, 24>; // UXTX #0
}
+class AddSubG<bit isSub, string asm_inst, SDPatternOperator OpNode>
+ : BaseAddSubImm<
+ isSub, 0, GPR64sp, asm_inst, "\t$Rd, $Rn, $imm6, $imm4",
+ (ins GPR64sp:$Rn, uimm6s16:$imm6, imm0_15:$imm4),
+ (set GPR64sp:$Rd, (OpNode GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4))> {
+ bits<6> imm6;
+ bits<4> imm4;
+ let Inst{31} = 1;
+ let Inst{23-22} = 0b10;
+ let Inst{21-16} = imm6;
+ let Inst{15-14} = 0b00;
+ let Inst{13-10} = imm4;
+ let Unpredictable{15-14} = 0b11;
+}
+
+class SUBP<bit setsFlags, string asm_instr, SDPatternOperator OpNode>
+ : BaseTwoOperand<0b0000, GPR64, asm_instr, null_frag, GPR64sp, GPR64sp> {
+ let Inst{31} = 1;
+ let Inst{29} = setsFlags;
+}
+
//---
// Extract
//---
@@ -2853,10 +2919,10 @@ def am_ldrlit : Operand<iPTR> {
let OperandType = "OPERAND_PCREL";
}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class LoadLiteral<bits<2> opc, bit V, RegisterOperand regtype, string asm>
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0, AddedComplexity = 20 in
+class LoadLiteral<bits<2> opc, bit V, RegisterOperand regtype, string asm, list<dag> pat>
: I<(outs regtype:$Rt), (ins am_ldrlit:$label),
- asm, "\t$Rt, $label", "", []>,
+ asm, "\t$Rt, $label", "", pat>,
Sched<[WriteLD]> {
bits<5> Rt;
bits<19> label;
@@ -3932,6 +3998,78 @@ class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
let Constraints = "@earlyclobber $Ws";
}
+// Armv8.5-A Memory Tagging Extension
+class BaseMemTag<bits<2> opc1, bits<2> opc2, string asm_insn,
+ string asm_opnds, string cstr, dag oops, dag iops>
+ : I<oops, iops, asm_insn, asm_opnds, cstr, []>,
+ Sched<[]> {
+ bits<5> Rn;
+
+ let Inst{31-24} = 0b11011001;
+ let Inst{23-22} = opc1;
+ let Inst{21} = 1;
+ // Inst{20-12} defined by subclass
+ let Inst{11-10} = opc2;
+ let Inst{9-5} = Rn;
+ // Inst{4-0} defined by subclass
+}
+
+class MemTagVector<bit Load, string asm_insn, string asm_opnds,
+ dag oops, dag iops>
+ : BaseMemTag<{0b1, Load}, 0b00, asm_insn, asm_opnds,
+ "$Rn = $wback,@earlyclobber $wback", oops, iops> {
+ bits<5> Rt;
+
+ let Inst{20-12} = 0b000000000;
+ let Inst{4-0} = Rt;
+
+ let mayLoad = Load;
+}
+
+class MemTagLoad<string asm_insn, string asm_opnds>
+ : BaseMemTag<0b01, 0b00, asm_insn, asm_opnds, "", (outs GPR64:$Rt),
+ (ins GPR64sp:$Rn, simm9s16:$offset)> {
+ bits<5> Rt;
+ bits<9> offset;
+
+ let Inst{20-12} = offset;
+ let Inst{4-0} = Rt;
+
+ let mayLoad = 1;
+}
+
+class BaseMemTagStore<bits<2> opc1, bits<2> opc2, string asm_insn,
+ string asm_opnds, string cstr, dag oops, dag iops>
+ : BaseMemTag<opc1, opc2, asm_insn, asm_opnds, cstr, oops, iops> {
+ bits<5> Rt;
+ bits<9> offset;
+
+ let Inst{20-12} = offset;
+ let Inst{4-0} = 0b11111;
+ let Unpredictable{4-0} = 0b11111;
+
+ let mayStore = 1;
+}
+
+multiclass MemTagStore<bits<2> opc1, string insn> {
+ def Offset :
+ BaseMemTagStore<opc1, 0b10, insn, "\t[$Rn, $offset]", "",
+ (outs), (ins GPR64sp:$Rn, simm9s16:$offset)>;
+ def PreIndex :
+ BaseMemTagStore<opc1, 0b11, insn, "\t[$Rn, $offset]!",
+ "$Rn = $wback,@earlyclobber $wback",
+ (outs GPR64sp:$wback),
+ (ins GPR64sp:$Rn, simm9s16:$offset)>;
+ def PostIndex :
+ BaseMemTagStore<opc1, 0b01, insn, "\t[$Rn], $offset",
+ "$Rn = $wback,@earlyclobber $wback",
+ (outs GPR64sp:$wback),
+ (ins GPR64sp:$Rn, simm9s16:$offset)>;
+
+ def : InstAlias<insn # "\t[$Rn]",
+ (!cast<Instruction>(NAME # "Offset") GPR64sp:$Rn, 0)>;
+}
+
//---
// Exception generation
//---
@@ -3948,6 +4086,19 @@ class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
let Inst{1-0} = ll;
}
+//---
+// UDF : Permanently UNDEFINED instructions. Format: Opc = 0x0000, 16 bit imm.
+//--
+let hasSideEffects = 1, isTrap = 1, mayLoad = 0, mayStore = 0 in {
+class UDFType<bits<16> opc, string asm>
+ : I<(outs), (ins uimm16:$imm),
+ asm, "\t$imm", "", []>,
+ Sched<[]> {
+ bits<16> imm;
+ let Inst{31-16} = opc;
+ let Inst{15-0} = imm;
+}
+}
let Predicates = [HasFPARMv8] in {
//---
@@ -4395,7 +4546,7 @@ multiclass FPConversion<string asm> {
//---
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
+class BaseSingleOperandFPData<bits<6> opcode, RegisterClass regtype,
ValueType vt, string asm, SDPatternOperator node>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
[(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>,
@@ -4403,8 +4554,8 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
bits<5> Rd;
bits<5> Rn;
let Inst{31-24} = 0b00011110;
- let Inst{21-19} = 0b100;
- let Inst{18-15} = opcode;
+ let Inst{21} = 0b1;
+ let Inst{20-15} = opcode;
let Inst{14-10} = 0b10000;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
@@ -4412,20 +4563,37 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
multiclass SingleOperandFPData<bits<4> opcode, string asm,
SDPatternOperator node = null_frag> {
- def Hr : BaseSingleOperandFPData<opcode, FPR16, f16, asm, node> {
+
+ def Hr : BaseSingleOperandFPData<{0b00,opcode}, FPR16, f16, asm, node> {
let Inst{23-22} = 0b11; // 16-bit size flag
let Predicates = [HasFullFP16];
}
- def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
+ def Sr : BaseSingleOperandFPData<{0b00,opcode}, FPR32, f32, asm, node> {
let Inst{23-22} = 0b00; // 32-bit size flag
}
- def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
+ def Dr : BaseSingleOperandFPData<{0b00,opcode}, FPR64, f64, asm, node> {
let Inst{23-22} = 0b01; // 64-bit size flag
}
}
+multiclass SingleOperandFPNo16<bits<6> opcode, string asm,
+ SDPatternOperator node = null_frag>{
+
+ def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
+ let Inst{23-22} = 0b00; // 32-bit registers
+ }
+
+ def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
+ let Inst{23-22} = 0b01; // 64-bit registers
+ }
+}
+
+// FRInt[32|64][Z|N] instructions
+multiclass FRIntNNT<bits<2> opcode, string asm, SDPatternOperator node = null_frag> :
+ SingleOperandFPNo16<{0b0100,opcode}, asm, node>;
+
//---
// Two operand floating point data processing
//---
@@ -4790,25 +4958,6 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
-class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
- string kind2, RegisterOperand RegType,
- ValueType AccumType, ValueType InputType,
- SDPatternOperator OpNode> :
- BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1,
- [(set (AccumType RegType:$dst),
- (OpNode (AccumType RegType:$Rd),
- (InputType RegType:$Rn),
- (InputType RegType:$Rm)))]> {
- let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
-}
-
-multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64,
- v2i32, v8i8, OpNode>;
- def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128,
- v4i32, v16i8, OpNode>;
-}
-
// All operand sizes distinguished in the encoding.
multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -5049,6 +5198,51 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
V128:$LHS, V128:$MHS, V128:$RHS)>;
}
+// ARMv8.2-A Dot Product Instructions (Vector): These instructions extract
+// bytes from S-sized elements.
+class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
+ string kind2, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1,
+ [(set (AccumType RegType:$dst),
+ (OpNode (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType RegType:$Rm)))]> {
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+}
+
+multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64,
+ v2i32, v8i8, OpNode>;
+ def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128,
+ v4i32, v16i8, OpNode>;
+}
+
+// ARMv8.2-A Fused Multiply Add-Long Instructions (Vector): These instructions
+// select inputs from 4H vectors and accumulate outputs to a 2S vector (or from
+// 8H to 4S, when Q=1).
+class BaseSIMDThreeSameVectorFML<bit Q, bit U, bit b13, bits<3> size, string asm, string kind1,
+ string kind2, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDThreeSameVectorTied<Q, U, size, 0b11101, RegType, asm, kind1,
+ [(set (AccumType RegType:$dst),
+ (OpNode (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType RegType:$Rm)))]> {
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+ let Inst{13} = b13;
+}
+
+multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
+ SDPatternOperator OpNode> {
+ def v4f16 : BaseSIMDThreeSameVectorFML<0, U, b13, size, asm, ".2s", ".2h", V64,
+ v2f32, v4f16, OpNode>;
+ def v8f16 : BaseSIMDThreeSameVectorFML<1, U, b13, size, asm, ".4s", ".4h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
//----------------------------------------------------------------------------
// AdvSIMD two register vector instructions.
@@ -5293,7 +5487,7 @@ multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
[(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
}
-// Supports only S and D element sizes, uses high bit of the size field
+// Supports H, S and D element sizes, uses high bit of the size field
// as an extra opcode bit.
multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -5316,6 +5510,25 @@ multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
}
+// Supports only S and D element sizes
+multiclass SIMDTwoVectorSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+
+ def v2f32 : BaseSIMDTwoSameVector<0, U, 00, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
+ def v4f32 : BaseSIMDTwoSameVector<1, U, 00, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
+ def v2f64 : BaseSIMDTwoSameVector<1, U, 01, opc, 0b00, V128,
+ asm, ".2d", ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+}
+
+multiclass FRIntNNTVector<bit U, bit op, string asm,
+ SDPatternOperator OpNode = null_frag> :
+ SIMDTwoVectorSD<U, {0b1111,op}, asm, OpNode>;
+
// Supports only S element size.
multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -7236,7 +7449,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
let Inst{4-0} = Rd;
}
-// ARMv8.2 Index Dot product instructions
+// ARMv8.2-A Dot Product Instructions (Indexed)
class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
string lhs_kind, string rhs_kind,
RegisterOperand RegType,
@@ -7257,10 +7470,38 @@ class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
multiclass SIMDThreeSameVectorDotIndex<bit U, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64,
- v2i32, v8i8, OpNode>;
- def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b", V128,
- v4i32, v16i8, OpNode>;
+ def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b",
+ V64, v2i32, v8i8, OpNode>;
+ def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b",
+ V128, v4i32, v16i8, OpNode>;
+}
+
+// ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed)
+class BaseSIMDThreeSameVectorFMLIndex<bit Q, bit U, bits<4> opc, string asm,
+ string dst_kind, string lhs_kind,
+ string rhs_kind, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDIndexedTied<Q, U, 0, 0b10, opc, RegType, RegType, V128,
+ VectorIndexH, asm, "", dst_kind, lhs_kind, rhs_kind,
+ [(set (AccumType RegType:$dst),
+ (AccumType (OpNode (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType (AArch64duplane16 (v8f16 V128:$Rm),
+ VectorIndexH:$idx)))))]> {
+ // idx = H:L:M
+ bits<3> idx;
+ let Inst{11} = idx{2}; // H
+ let Inst{21} = idx{1}; // L
+ let Inst{20} = idx{0}; // M
+}
+
+multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4f16 : BaseSIMDThreeSameVectorFMLIndex<0, U, opc, asm, ".2s", ".2h", ".h",
+ V64, v2f32, v4f16, OpNode>;
+ def v8f16 : BaseSIMDThreeSameVectorFMLIndex<1, U, opc, asm, ".4s", ".4h", ".h",
+ V128, v4f32, v8f16, OpNode>;
}
multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
@@ -9748,9 +9989,10 @@ class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
let Inst{4-0} = Rd;
}
+//8.3 CompNum - Floating-point complex number support
multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
string asm, SDPatternOperator OpNode>{
- let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype,
asm, ".4h",
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
@@ -9766,7 +10008,7 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
(rottype i32:$rot)))]>;
}
- let Predicates = [HasV8_3a, HasNEON] in {
+ let Predicates = [HasComplxNum, HasNEON] in {
def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype,
asm, ".2s",
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
@@ -9822,7 +10064,7 @@ class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
Operand rottype, string asm,
SDPatternOperator OpNode> {
- let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64,
rottype, asm, ".4h",
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
@@ -9838,7 +10080,7 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
(rottype i32:$rot)))]>;
}
- let Predicates = [HasV8_3a, HasNEON] in {
+ let Predicates = [HasComplxNum, HasNEON] in {
def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64,
rottype, asm, ".2s",
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
@@ -9904,7 +10146,7 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
// classes.
multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
string asm, SDPatternOperator OpNode> {
- let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in {
+ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
".4h", ".h", []> {
@@ -9920,9 +10162,9 @@ multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
let Inst{11} = idx{1};
let Inst{21} = idx{0};
}
- } // Predicates = [HasV8_3a,HasNEON,HasFullFP16]
+ } // Predicates = HasComplxNum, HasNEON, HasFullFP16]
- let Predicates = [HasV8_3a,HasNEON] in {
+ let Predicates = [HasComplxNum, HasNEON] in {
def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2,
V128, V128, V128, VectorIndexD, rottype, asm, ".4s",
".4s", ".4s", ".s", []> {
@@ -9930,7 +10172,7 @@ multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
let Inst{11} = idx{0};
let Inst{21} = 0;
}
- } // Predicates = [HasV8_3a,HasNEON]
+ } // Predicates = [HasComplxNum, HasNEON]
}
//----------------------------------------------------------------------------
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 032d53d19620..ada067888572 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -66,7 +66,8 @@ static cl::opt<unsigned>
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
- : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+ : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
+ AArch64::CATCHRET),
RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
@@ -108,6 +109,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// This gets lowered to an instruction sequence which takes 16 bytes
NumBytes = 16;
break;
+ case AArch64::JumpTableDest32:
+ case AArch64::JumpTableDest16:
+ case AArch64::JumpTableDest8:
+ NumBytes = 12;
+ break;
+ case AArch64::SPACE:
+ NumBytes = MI.getOperand(1).getImm();
+ break;
}
return NumBytes;
@@ -675,14 +684,36 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
if (!Subtarget.hasCustomCheapAsMoveHandling())
return MI.isAsCheapAsAMove();
+ const unsigned Opcode = MI.getOpcode();
+
+ // Firstly, check cases gated by features.
+
+ if (Subtarget.hasZeroCycleZeroingFP()) {
+ if (Opcode == AArch64::FMOVH0 ||
+ Opcode == AArch64::FMOVS0 ||
+ Opcode == AArch64::FMOVD0)
+ return true;
+ }
+
+ if (Subtarget.hasZeroCycleZeroingGP()) {
+ if (Opcode == TargetOpcode::COPY &&
+ (MI.getOperand(1).getReg() == AArch64::WZR ||
+ MI.getOperand(1).getReg() == AArch64::XZR))
+ return true;
+ }
+
+ // Secondly, check cases specific to sub-targets.
+
if (Subtarget.hasExynosCheapAsMoveHandling()) {
- if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
+ if (isExynosCheapAsMove(MI))
return true;
- else
- return MI.isAsCheapAsAMove();
+
+ return MI.isAsCheapAsAMove();
}
- switch (MI.getOpcode()) {
+ // Finally, check generic cases.
+
+ switch (Opcode) {
default:
return false;
@@ -723,217 +754,12 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
return canBeExpandedToORR(MI, 32);
case AArch64::MOVi64imm:
return canBeExpandedToORR(MI, 64);
-
- // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
- // feature.
- case AArch64::FMOVH0:
- case AArch64::FMOVS0:
- case AArch64::FMOVD0:
- return Subtarget.hasZeroCycleZeroing();
- case TargetOpcode::COPY:
- return (Subtarget.hasZeroCycleZeroing() &&
- (MI.getOperand(1).getReg() == AArch64::WZR ||
- MI.getOperand(1).getReg() == AArch64::XZR));
}
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
-bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
- unsigned Reg, Imm, Shift;
-
- switch (MI.getOpcode()) {
- default:
- return false;
-
- // MOV Rd, SP
- case AArch64::ADDWri:
- case AArch64::ADDXri:
- if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(2).getImm();
- return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
-
- // Literal
- case AArch64::ADR:
- case AArch64::ADRP:
- return true;
-
- // MOVI Vd, #0
- case AArch64::MOVID:
- case AArch64::MOVIv8b_ns:
- case AArch64::MOVIv2d_ns:
- case AArch64::MOVIv16b_ns:
- Imm = MI.getOperand(1).getImm();
- return (Imm == 0);
-
- // MOVI Vd, #0
- case AArch64::MOVIv2i32:
- case AArch64::MOVIv4i16:
- case AArch64::MOVIv4i32:
- case AArch64::MOVIv8i16:
- Imm = MI.getOperand(1).getImm();
- Shift = MI.getOperand(2).getImm();
- return (Imm == 0 && Shift == 0);
-
- // MOV Rd, Imm
- case AArch64::MOVNWi:
- case AArch64::MOVNXi:
-
- // MOV Rd, Imm
- case AArch64::MOVZWi:
- case AArch64::MOVZXi:
- return true;
-
- // MOV Rd, Imm
- case AArch64::ORRWri:
- case AArch64::ORRXri:
- if (!MI.getOperand(1).isReg())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(2).getImm();
- return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
-
- // MOV Rd, Rm
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- if (!MI.getOperand(1).isReg())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getShiftValue(Imm);
- return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
- }
-}
-
-bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
- unsigned Imm, Shift;
- AArch64_AM::ShiftExtendType Ext;
-
- switch (MI.getOpcode()) {
- default:
- return false;
-
- // WriteI
- case AArch64::ADDSWri:
- case AArch64::ADDSXri:
- case AArch64::ADDWri:
- case AArch64::ADDXri:
- case AArch64::SUBSWri:
- case AArch64::SUBSXri:
- case AArch64::SUBWri:
- case AArch64::SUBXri:
- return true;
-
- // WriteISReg
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- case AArch64::EONWrs:
- case AArch64::EONXrs:
- case AArch64::EORWrs:
- case AArch64::EORXrs:
- case AArch64::ORNWrs:
- case AArch64::ORNXrs:
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getShiftValue(Imm);
- Ext = AArch64_AM::getShiftType(Imm);
- return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
-
- // WriteIEReg
- case AArch64::ADDSWrx:
- case AArch64::ADDSXrx:
- case AArch64::ADDSXrx64:
- case AArch64::ADDWrx:
- case AArch64::ADDXrx:
- case AArch64::ADDXrx64:
- case AArch64::SUBSWrx:
- case AArch64::SUBSXrx:
- case AArch64::SUBSXrx64:
- case AArch64::SUBWrx:
- case AArch64::SUBXrx:
- case AArch64::SUBXrx64:
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getArithShiftValue(Imm);
- Ext = AArch64_AM::getArithExtendType(Imm);
- return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
-
- case AArch64::PRFMroW:
- case AArch64::PRFMroX:
-
- // WriteLDIdx
- case AArch64::LDRBBroW:
- case AArch64::LDRBBroX:
- case AArch64::LDRHHroW:
- case AArch64::LDRHHroX:
- case AArch64::LDRSBWroW:
- case AArch64::LDRSBWroX:
- case AArch64::LDRSBXroW:
- case AArch64::LDRSBXroX:
- case AArch64::LDRSHWroW:
- case AArch64::LDRSHWroX:
- case AArch64::LDRSHXroW:
- case AArch64::LDRSHXroX:
- case AArch64::LDRSWroW:
- case AArch64::LDRSWroX:
- case AArch64::LDRWroW:
- case AArch64::LDRWroX:
- case AArch64::LDRXroW:
- case AArch64::LDRXroX:
-
- case AArch64::LDRBroW:
- case AArch64::LDRBroX:
- case AArch64::LDRDroW:
- case AArch64::LDRDroX:
- case AArch64::LDRHroW:
- case AArch64::LDRHroX:
- case AArch64::LDRSroW:
- case AArch64::LDRSroX:
-
- // WriteSTIdx
- case AArch64::STRBBroW:
- case AArch64::STRBBroX:
- case AArch64::STRHHroW:
- case AArch64::STRHHroX:
- case AArch64::STRWroW:
- case AArch64::STRWroX:
- case AArch64::STRXroW:
- case AArch64::STRXroX:
-
- case AArch64::STRBroW:
- case AArch64::STRBroX:
- case AArch64::STRDroW:
- case AArch64::STRDroX:
- case AArch64::STRHroW:
- case AArch64::STRHroX:
- case AArch64::STRSroW:
- case AArch64::STRSroX:
- Imm = MI.getOperand(3).getImm();
- Ext = AArch64_AM::getMemExtendType(Imm);
- return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
- }
-}
-
-bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
+bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
@@ -1055,6 +881,32 @@ bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
}
}
+bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::SEH_StackAlloc:
+ case AArch64::SEH_SaveFPLR:
+ case AArch64::SEH_SaveFPLR_X:
+ case AArch64::SEH_SaveReg:
+ case AArch64::SEH_SaveReg_X:
+ case AArch64::SEH_SaveRegP:
+ case AArch64::SEH_SaveRegP_X:
+ case AArch64::SEH_SaveFReg:
+ case AArch64::SEH_SaveFReg_X:
+ case AArch64::SEH_SaveFRegP:
+ case AArch64::SEH_SaveFRegP_X:
+ case AArch64::SEH_SetFP:
+ case AArch64::SEH_AddFP:
+ case AArch64::SEH_Nop:
+ case AArch64::SEH_PrologEnd:
+ case AArch64::SEH_EpilogStart:
+ case AArch64::SEH_EpilogEnd:
+ return true;
+ }
+}
+
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
@@ -1078,7 +930,7 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned BaseRegA = 0, BaseRegB = 0;
+ MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned WidthA = 0, WidthB = 0;
@@ -1089,14 +941,14 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
- // Retrieve the base register, offset from the base register and width. Width
+ // Retrieve the base, offset from the base and width. Width
// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
- // base registers are identical, and the offset of a lower memory access +
+ // base are identical, and the offset of a lower memory access +
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
- if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
- getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
- if (BaseRegA == BaseRegB) {
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB)) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
@@ -1107,6 +959,26 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
return false;
}
+bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
+ return true;
+ switch (MI.getOpcode()) {
+ case AArch64::HINT:
+ // CSDB hints are scheduling barriers.
+ if (MI.getOperand(0).getImm() == 0x14)
+ return true;
+ break;
+ case AArch64::DSB:
+ case AArch64::ISB:
+ // DSB and ISB also are scheduling barriers.
+ return true;
+ default:;
+ }
+ return isSEHInstruction(MI);
+}
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
@@ -1593,11 +1465,36 @@ bool AArch64InstrInfo::substituteCmpToZero(
}
bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
- if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
+ if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
+ MI.getOpcode() != AArch64::CATCHRET)
return false;
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
+
+ if (MI.getOpcode() == AArch64::CATCHRET) {
+ // Skip to the first instruction before the epilog.
+ const TargetInstrInfo *TII =
+ MBB.getParent()->getSubtarget().getInstrInfo();
+ MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
+ auto MBBI = MachineBasicBlock::iterator(MI);
+ MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
+ while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
+ FirstEpilogSEH != MBB.begin())
+ FirstEpilogSEH = std::prev(FirstEpilogSEH);
+ if (FirstEpilogSEH != MBB.begin())
+ FirstEpilogSEH = std::next(FirstEpilogSEH);
+ BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
+ .addReg(AArch64::X0, RegState::Define)
+ .addMBB(TargetMBB);
+ BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
+ .addReg(AArch64::X0, RegState::Define)
+ .addReg(AArch64::X0)
+ .addMBB(TargetMBB)
+ .addImm(0);
+ return true;
+ }
+
unsigned Reg = MI.getOperand(0).getReg();
const GlobalValue *GV =
cast<GlobalValue>((*MI.memoperands_begin())->getValue());
@@ -1607,7 +1504,7 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if ((OpFlags & AArch64II::MO_GOT) != 0) {
BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
+ .addGlobalAddress(GV, 0, OpFlags);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
@@ -1632,6 +1529,9 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(Reg, RegState::Kill)
.addImm(0)
.addMemOperand(*MI.memoperands_begin());
+ } else if (TM.getCodeModel() == CodeModel::Tiny) {
+ BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
+ .addGlobalAddress(GV, 0, OpFlags);
} else {
BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
@@ -1647,71 +1547,6 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
-/// Return true if this is this instruction has a non-zero immediate
-bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- case AArch64::EONWrs:
- case AArch64::EONXrs:
- case AArch64::EORWrs:
- case AArch64::EORXrs:
- case AArch64::ORNWrs:
- case AArch64::ORNXrs:
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- if (MI.getOperand(3).isImm()) {
- unsigned val = MI.getOperand(3).getImm();
- return (val != 0);
- }
- break;
- }
- return false;
-}
-
-/// Return true if this is this instruction has a non-zero immediate
-bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::ADDSWrx:
- case AArch64::ADDSXrx:
- case AArch64::ADDSXrx64:
- case AArch64::ADDWrx:
- case AArch64::ADDXrx:
- case AArch64::ADDXrx64:
- case AArch64::SUBSWrx:
- case AArch64::SUBSXrx:
- case AArch64::SUBSXrx64:
- case AArch64::SUBWrx:
- case AArch64::SUBXrx:
- case AArch64::SUBXrx64:
- if (MI.getOperand(3).isImm()) {
- unsigned val = MI.getOperand(3).getImm();
- return (val != 0);
- }
- break;
- }
-
- return false;
-}
-
// Return true if this instruction simply sets its single destination register
// to zero. This is equivalent to a register rename of the zero-register.
bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
@@ -1834,67 +1669,6 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return 0;
}
-/// Return true if this is load/store scales or extends its register offset.
-/// This refers to scaling a dynamic index as opposed to scaled immediates.
-/// MI should be a memory op that allows scaled addressing.
-bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::LDRBBroW:
- case AArch64::LDRBroW:
- case AArch64::LDRDroW:
- case AArch64::LDRHHroW:
- case AArch64::LDRHroW:
- case AArch64::LDRQroW:
- case AArch64::LDRSBWroW:
- case AArch64::LDRSBXroW:
- case AArch64::LDRSHWroW:
- case AArch64::LDRSHXroW:
- case AArch64::LDRSWroW:
- case AArch64::LDRSroW:
- case AArch64::LDRWroW:
- case AArch64::LDRXroW:
- case AArch64::STRBBroW:
- case AArch64::STRBroW:
- case AArch64::STRDroW:
- case AArch64::STRHHroW:
- case AArch64::STRHroW:
- case AArch64::STRQroW:
- case AArch64::STRSroW:
- case AArch64::STRWroW:
- case AArch64::STRXroW:
- case AArch64::LDRBBroX:
- case AArch64::LDRBroX:
- case AArch64::LDRDroX:
- case AArch64::LDRHHroX:
- case AArch64::LDRHroX:
- case AArch64::LDRQroX:
- case AArch64::LDRSBWroX:
- case AArch64::LDRSBXroX:
- case AArch64::LDRSHWroX:
- case AArch64::LDRSHXroX:
- case AArch64::LDRSWroX:
- case AArch64::LDRSroX:
- case AArch64::LDRWroX:
- case AArch64::LDRXroX:
- case AArch64::STRBBroX:
- case AArch64::STRBroX:
- case AArch64::STRDroX:
- case AArch64::STRHHroX:
- case AArch64::STRHroX:
- case AArch64::STRQroX:
- case AArch64::STRSroX:
- case AArch64::STRWroX:
- case AArch64::STRXroX:
-
- unsigned Val = MI.getOperand(3).getImm();
- AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
- return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
- }
- return false;
-}
-
/// Check all MachineMemOperands for a hint to suppress pairing.
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
@@ -2068,17 +1842,21 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
if (MI.hasOrderedMemoryRef())
return false;
- // Make sure this is a reg+imm (as opposed to an address reloc).
- assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
+ // Make sure this is a reg/fi+imm (as opposed to an address reloc).
+ assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
+ "Expected a reg or frame index operand.");
if (!MI.getOperand(2).isImm())
return false;
// Can't merge/pair if the instruction modifies the base register.
// e.g., ldr x0, [x0]
- unsigned BaseReg = MI.getOperand(1).getReg();
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- if (MI.modifiesRegister(BaseReg, TRI))
- return false;
+ // This case will never occur with an FI base.
+ if (MI.getOperand(1).isReg()) {
+ unsigned BaseReg = MI.getOperand(1).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (MI.modifiesRegister(BaseReg, TRI))
+ return false;
+ }
// Check if this load/store has a hint to avoid pair formation.
// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
@@ -2101,25 +1879,28 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
return true;
}
-bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
- MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+ MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
unsigned Width;
- return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
+ return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
}
-bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
- MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
- const TargetRegisterInfo *TRI) const {
+bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
+ MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ unsigned &Width, const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
if (LdSt.getNumExplicitOperands() == 3) {
// Non-paired instruction (e.g., ldr x1, [x0, #8]).
- if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
+ if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
+ !LdSt.getOperand(2).isImm())
return false;
} else if (LdSt.getNumExplicitOperands() == 4) {
// Paired instruction (e.g., ldp x1, x2, [x0, #8]).
- if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
+ if (!LdSt.getOperand(1).isReg() ||
+ (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
!LdSt.getOperand(3).isImm())
return false;
} else
@@ -2138,13 +1919,18 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
// multiplied by the scaling factor. Unscaled instructions have scaling factor
// set to 1.
if (LdSt.getNumExplicitOperands() == 3) {
- BaseReg = LdSt.getOperand(1).getReg();
+ BaseOp = &LdSt.getOperand(1);
Offset = LdSt.getOperand(2).getImm() * Scale;
} else {
assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
- BaseReg = LdSt.getOperand(2).getReg();
+ BaseOp = &LdSt.getOperand(2);
Offset = LdSt.getOperand(3).getImm() * Scale;
}
+
+ assert((BaseOp->isReg() || BaseOp->isFI()) &&
+ "getMemOperandWithOffset only supports base "
+ "operands of type register or frame index.");
+
return true;
}
@@ -2299,31 +2085,33 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
return true;
}
-// Scale the unscaled offsets. Returns false if the unscaled offset can't be
-// scaled.
-static bool scaleOffset(unsigned Opc, int64_t &Offset) {
- unsigned OffsetStride = 1;
+static unsigned getOffsetStride(unsigned Opc) {
switch (Opc) {
default:
- return false;
+ return 0;
case AArch64::LDURQi:
case AArch64::STURQi:
- OffsetStride = 16;
- break;
+ return 16;
case AArch64::LDURXi:
case AArch64::LDURDi:
case AArch64::STURXi:
case AArch64::STURDi:
- OffsetStride = 8;
- break;
+ return 8;
case AArch64::LDURWi:
case AArch64::LDURSi:
case AArch64::LDURSWi:
case AArch64::STURWi:
case AArch64::STURSi:
- OffsetStride = 4;
- break;
+ return 4;
}
+}
+
+// Scale the unscaled offsets. Returns false if the unscaled offset can't be
+// scaled.
+static bool scaleOffset(unsigned Opc, int64_t &Offset) {
+ unsigned OffsetStride = getOffsetStride(Opc);
+ if (OffsetStride == 0)
+ return false;
// If the byte-offset isn't a multiple of the stride, we can't scale this
// offset.
if (Offset % OffsetStride != 0)
@@ -2335,6 +2123,19 @@ static bool scaleOffset(unsigned Opc, int64_t &Offset) {
return true;
}
+// Unscale the scaled offsets. Returns false if the scaled offset can't be
+// unscaled.
+static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
+ unsigned OffsetStride = getOffsetStride(Opc);
+ if (OffsetStride == 0)
+ return false;
+
+ // Convert the "element" offset used by scaled pair load/store instructions
+ // into the byte-offset used by unscaled.
+ Offset *= OffsetStride;
+ return true;
+}
+
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
if (FirstOpc == SecondOpc)
return true;
@@ -2353,15 +2154,46 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
return false;
}
+static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
+ int64_t Offset1, unsigned Opcode1, int FI2,
+ int64_t Offset2, unsigned Opcode2) {
+ // Accesses through fixed stack object frame indices may access a different
+ // fixed stack slot. Check that the object offsets + offsets match.
+ if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
+ int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
+ int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
+ assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
+ // Get the byte-offset from the object offset.
+ if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
+ return false;
+ ObjectOffset1 += Offset1;
+ ObjectOffset2 += Offset2;
+ // Get the "element" index in the object.
+ if (!scaleOffset(Opcode1, ObjectOffset1) ||
+ !scaleOffset(Opcode2, ObjectOffset2))
+ return false;
+ return ObjectOffset1 + 1 == ObjectOffset2;
+ }
+
+ return FI1 == FI2;
+}
+
/// Detect opportunities for ldp/stp formation.
///
-/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
- unsigned BaseReg1,
- MachineInstr &SecondLdSt,
- unsigned BaseReg2,
+/// Only called for LdSt for which getMemOperandWithOffset returns true.
+bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
+ MachineOperand &BaseOp2,
unsigned NumLoads) const {
- if (BaseReg1 != BaseReg2)
+ MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ MachineInstr &SecondLdSt = *BaseOp2.getParent();
+ if (BaseOp1.getType() != BaseOp2.getType())
+ return false;
+
+ assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+ "Only base registers and frame indices are supported.");
+
+ // Check for both base regs and base FI.
+ if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
return false;
// Only cluster up to a single pair.
@@ -2397,7 +2229,20 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return false;
// The caller should already have ordered First/SecondLdSt by offset.
- assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
+ // Note: except for non-equal frame index bases
+ if (BaseOp1.isFI()) {
+ assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
+ "Caller should have ordered offsets.");
+
+ const MachineFrameInfo &MFI =
+ FirstLdSt.getParent()->getParent()->getFrameInfo();
+ return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
+ BaseOp2.getIndex(), Offset2, SecondOpc);
+ }
+
+ assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
+ "Caller should have ordered offsets.");
+
return Offset1 + 1 == Offset2;
}
@@ -2478,7 +2323,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
}
- } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
+ } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
@@ -2515,7 +2360,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
- } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
+ } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
@@ -2730,13 +2575,33 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
llvm_unreachable("unimplemented reg-to-reg copy");
}
+static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const MCInstrDesc &MCID,
+ unsigned SrcReg, bool IsKill,
+ unsigned SubIdx0, unsigned SubIdx1, int FI,
+ MachineMemOperand *MMO) {
+ unsigned SrcReg0 = SrcReg;
+ unsigned SrcReg1 = SrcReg;
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
+ SubIdx0 = 0;
+ SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
+ SubIdx1 = 0;
+ }
+ BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
+ .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
+ .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
void AArch64InstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
bool isKill, int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -2772,8 +2637,14 @@ void AArch64InstrInfo::storeRegToStackSlot(
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
else
assert(SrcReg != AArch64::SP);
- } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+ } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
Opc = AArch64::STRDui;
+ } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
+ storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
+ get(AArch64::STPWi), SrcReg, isKill,
+ AArch64::sube32, AArch64::subo32, FI, MMO);
+ return;
+ }
break;
case 16:
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
@@ -2783,14 +2654,9 @@ void AArch64InstrInfo::storeRegToStackSlot(
Opc = AArch64::ST1Twov1d;
Offset = false;
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
- .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
- getKillRegState(isKill))
- .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
- getKillRegState(isKill))
- .addFrameIndex(FI)
- .addImm(0)
- .addMemOperand(MMO);
+ storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
+ get(AArch64::STPXi), SrcReg, isKill,
+ AArch64::sube64, AArch64::subo64, FI, MMO);
return;
}
break;
@@ -2829,7 +2695,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
}
assert(Opc && "Unknown register class");
- const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI);
@@ -2838,13 +2704,35 @@ void AArch64InstrInfo::storeRegToStackSlot(
MI.addMemOperand(MMO);
}
+static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const MCInstrDesc &MCID,
+ unsigned DestReg, unsigned SubIdx0,
+ unsigned SubIdx1, int FI,
+ MachineMemOperand *MMO) {
+ unsigned DestReg0 = DestReg;
+ unsigned DestReg1 = DestReg;
+ bool IsUndef = true;
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+ DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
+ SubIdx0 = 0;
+ DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
+ SubIdx1 = 0;
+ IsUndef = false;
+ }
+ BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
+ .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
+ .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
void AArch64InstrInfo::loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -2880,8 +2768,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
else
assert(DestReg != AArch64::SP);
- } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+ } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDRDui;
+ } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
+ loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
+ get(AArch64::LDPWi), DestReg, AArch64::sube32,
+ AArch64::subo32, FI, MMO);
+ return;
+ }
break;
case 16:
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
@@ -2891,14 +2785,9 @@ void AArch64InstrInfo::loadRegFromStackSlot(
Opc = AArch64::LD1Twov1d;
Offset = false;
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
- .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
- getDefRegState(true))
- .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
- getDefRegState(true))
- .addFrameIndex(FI)
- .addImm(0)
- .addMemOperand(MMO);
+ loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
+ get(AArch64::LDPXi), DestReg, AArch64::sube64,
+ AArch64::subo64, FI, MMO);
return;
}
break;
@@ -2937,7 +2826,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
}
assert(Opc && "Unknown register class");
- const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
.addReg(DestReg, getDefRegState(true))
.addFrameIndex(FI);
if (Offset)
@@ -2949,7 +2838,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg, int Offset,
const TargetInstrInfo *TII,
- MachineInstr::MIFlag Flag, bool SetNZCV) {
+ MachineInstr::MIFlag Flag, bool SetNZCV,
+ bool NeedsWinCFI) {
if (DestReg == SrcReg && Offset == 0)
return;
@@ -2994,6 +2884,11 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
.setMIFlag(Flag);
+ if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(ThisVal)
+ .setMIFlag(Flag);
+
SrcReg = DestReg;
Offset -= ThisVal;
if (Offset == 0)
@@ -3004,6 +2899,21 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.addImm(Offset)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
.setMIFlag(Flag);
+
+ if (NeedsWinCFI) {
+ if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
+ (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
+ if (Offset == 0)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
+ setMIFlag(Flag);
+ else
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
+ addImm(Offset).setMIFlag(Flag);
+ } else if (DestReg == AArch64::SP) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
+ addImm(Offset).setMIFlag(Flag);
+ }
+ }
}
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
@@ -4839,7 +4749,10 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
using namespace AArch64II;
static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
+ {MO_COFFSTUB, "aarch64-coffstub"},
+ {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
+ {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"},
+ {MO_DLLIMPORT, "aarch64-dllimport"}};
return makeArrayRef(TargetFlags);
}
@@ -4941,11 +4854,13 @@ enum MachineOutlinerClass {
enum MachineOutlinerMBBFlags {
LRUnavailableSomewhere = 0x2,
- HasCalls = 0x4
+ HasCalls = 0x4,
+ UnsafeRegsDead = 0x8
};
unsigned
AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+ assert(C.LRUWasSet && "LRU wasn't set?");
MachineFunction *MF = C.getMF();
const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
@@ -4968,17 +4883,22 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
outliner::OutlinedFunction
AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
- unsigned SequenceSize = std::accumulate(
- RepeatedSequenceLocs[0].front(),
- std::next(RepeatedSequenceLocs[0].back()),
- 0, [this](unsigned Sum, const MachineInstr &MI) {
- return Sum + getInstSizeInBytes(MI);
- });
-
- // Compute liveness information for each candidate.
+ outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
+ unsigned SequenceSize =
+ std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
+ [this](unsigned Sum, const MachineInstr &MI) {
+ return Sum + getInstSizeInBytes(MI);
+ });
+
+ // Properties about candidate MBBs that hold for all of them.
+ unsigned FlagsSetInAll = 0xF;
+
+ // Compute liveness information for each candidate, and set FlagsSetInAll.
const TargetRegisterInfo &TRI = getRegisterInfo();
std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
- [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
+ [&FlagsSetInAll](outliner::Candidate &C) {
+ FlagsSetInAll &= C.Flags;
+ });
// According to the AArch64 Procedure Call Standard, the following are
// undefined on entry/exit from a function call:
@@ -4991,23 +4911,31 @@ AArch64InstrInfo::getOutliningCandidateInfo(
// of these registers is live into/across it. Thus, we need to delete
// those
// candidates.
- auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
+ auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
+ // If the unsafe registers in this block are all dead, then we don't need
+ // to compute liveness here.
+ if (C.Flags & UnsafeRegsDead)
+ return false;
+ C.initLRU(TRI);
LiveRegUnits LRU = C.LRU;
return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
!LRU.available(AArch64::NZCV));
};
- // Erase every candidate that violates the restrictions above. (It could be
- // true that we have viable candidates, so it's not worth bailing out in
- // the case that, say, 1 out of 20 candidates violate the restructions.)
- RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- CantGuaranteeValueAcrossCall),
- RepeatedSequenceLocs.end());
+ // Are there any candidates where those registers are live?
+ if (!(FlagsSetInAll & UnsafeRegsDead)) {
+ // Erase every candidate that violates the restrictions above. (It could be
+ // true that we have viable candidates, so it's not worth bailing out in
+ // the case that, say, 1 out of 20 candidates violate the restructions.)
+ RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+ RepeatedSequenceLocs.end(),
+ CantGuaranteeValueAcrossCall),
+ RepeatedSequenceLocs.end());
- // If the sequence is empty, we're done.
- if (RepeatedSequenceLocs.empty())
- return outliner::OutlinedFunction();
+ // If the sequence doesn't have enough candidates left, then we're done.
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+ }
// At this point, we have only "safe" candidates to outline. Figure out
// frame + call instruction information.
@@ -5024,6 +4952,64 @@ AArch64InstrInfo::getOutliningCandidateInfo(
unsigned FrameID = MachineOutlinerDefault;
unsigned NumBytesToCreateFrame = 4;
+ bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
+ return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
+ });
+
+ // Returns true if an instructions is safe to fix up, false otherwise.
+ auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
+ if (MI.isCall())
+ return true;
+
+ if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
+ !MI.readsRegister(AArch64::SP, &TRI))
+ return true;
+
+ // Any modification of SP will break our code to save/restore LR.
+ // FIXME: We could handle some instructions which add a constant
+ // offset to SP, with a bit more work.
+ if (MI.modifiesRegister(AArch64::SP, &TRI))
+ return false;
+
+ // At this point, we have a stack instruction that we might need to
+ // fix up. We'll handle it if it's a load or store.
+ if (MI.mayLoadOrStore()) {
+ MachineOperand *Base; // Filled with the base operand of MI.
+ int64_t Offset; // Filled with the offset of MI.
+
+ // Does it allow us to offset the base operand and is the base the
+ // register SP?
+ if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
+ Base->getReg() != AArch64::SP)
+ return false;
+
+ // Find the minimum/maximum offset for this instruction and check
+ // if fixing it up would be in range.
+ int64_t MinOffset,
+ MaxOffset; // Unscaled offsets for the instruction.
+ unsigned Scale; // The scale to multiply the offsets by.
+ unsigned DummyWidth;
+ getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
+
+ Offset += 16; // Update the offset to what it would be if we outlined.
+ if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
+ return false;
+
+ // It's in range, so we can outline it.
+ return true;
+ }
+
+ // FIXME: Add handling for instructions like "add x0, sp, #8".
+
+ // We can't fix it up, so don't outline it.
+ return false;
+ };
+
+ // True if it's possible to fix up each stack instruction in this sequence.
+ // Important for frames/call variants that modify the stack.
+ bool AllStackInstrsSafe = std::all_of(
+ FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
+
// If the last instruction in any candidate is a terminator, then we should
// tail call all of the candidates.
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
@@ -5032,65 +5018,102 @@ AArch64InstrInfo::getOutliningCandidateInfo(
SetCandidateCallInfo(MachineOutlinerTailCall, 4);
}
- else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
+ else if (LastInstrOpcode == AArch64::BL ||
+ (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
// FIXME: Do we need to check if the code after this uses the value of LR?
FrameID = MachineOutlinerThunk;
NumBytesToCreateFrame = 0;
SetCandidateCallInfo(MachineOutlinerThunk, 4);
}
- // Make sure that LR isn't live on entry to this candidate. The only
- // instructions that use LR that could possibly appear in a repeated sequence
- // are calls. Therefore, we only have to check and see if LR is dead on entry
- // to (or exit from) some candidate.
- else if (std::all_of(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- [](outliner::Candidate &C) {
- return C.LRU.available(AArch64::LR);
- })) {
- FrameID = MachineOutlinerNoLRSave;
- NumBytesToCreateFrame = 4;
- SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
- }
-
- // LR is live, so we need to save it. Decide whether it should be saved to
- // the stack, or if it can be saved to a register.
else {
- if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
- [this](outliner::Candidate &C) {
- return findRegisterToSaveLRTo(C);
- })) {
- // Every candidate has an available callee-saved register for the save.
- // We can save LR to a register.
- FrameID = MachineOutlinerRegSave;
- NumBytesToCreateFrame = 4;
- SetCandidateCallInfo(MachineOutlinerRegSave, 12);
+ // We need to decide how to emit calls + frames. We can always emit the same
+ // frame if we don't need to save to the stack. If we have to save to the
+ // stack, then we need a different frame.
+ unsigned NumBytesNoStackCalls = 0;
+ std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
+
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ C.initLRU(TRI);
+
+ // Is LR available? If so, we don't need a save.
+ if (C.LRU.available(AArch64::LR)) {
+ NumBytesNoStackCalls += 4;
+ C.setCallInfo(MachineOutlinerNoLRSave, 4);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // Is an unused register available? If so, we won't modify the stack, so
+ // we can outline with the same frame type as those that don't save LR.
+ else if (findRegisterToSaveLRTo(C)) {
+ NumBytesNoStackCalls += 12;
+ C.setCallInfo(MachineOutlinerRegSave, 12);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // Is SP used in the sequence at all? If not, we don't have to modify
+ // the stack, so we are guaranteed to get the same frame.
+ else if (C.UsedInSequence.available(AArch64::SP)) {
+ NumBytesNoStackCalls += 12;
+ C.setCallInfo(MachineOutlinerDefault, 12);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // If we outline this, we need to modify the stack. Pretend we don't
+ // outline this by saving all of its bytes.
+ else {
+ NumBytesNoStackCalls += SequenceSize;
+ }
}
- else {
- // At least one candidate does not have an available callee-saved
- // register. We must save LR to the stack.
- FrameID = MachineOutlinerDefault;
- NumBytesToCreateFrame = 4;
+ // If there are no places where we have to save LR, then note that we
+ // don't have to update the stack. Otherwise, give every candidate the
+ // default call type, as long as it's safe to do so.
+ if (!AllStackInstrsSafe ||
+ NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
+ RepeatedSequenceLocs = CandidatesWithoutStackFixups;
+ FrameID = MachineOutlinerNoLRSave;
+ } else {
SetCandidateCallInfo(MachineOutlinerDefault, 12);
}
+
+ // If we dropped all of the candidates, bail out here.
+ if (RepeatedSequenceLocs.size() < 2) {
+ RepeatedSequenceLocs.clear();
+ return outliner::OutlinedFunction();
+ }
}
- // Check if the range contains a call. These require a save + restore of the
- // link register.
- if (std::any_of(RepeatedSequenceLocs[0].front(),
- RepeatedSequenceLocs[0].back(),
- [](const MachineInstr &MI) { return MI.isCall(); }))
- NumBytesToCreateFrame += 8; // Save + restore the link register.
+ // Does every candidate's MBB contain a call? If so, then we might have a call
+ // in the range.
+ if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+ // Check if the range contains a call. These require a save + restore of the
+ // link register.
+ bool ModStackToSaveLR = false;
+ if (std::any_of(FirstCand.front(), FirstCand.back(),
+ [](const MachineInstr &MI) { return MI.isCall(); }))
+ ModStackToSaveLR = true;
+
+ // Handle the last instruction separately. If this is a tail call, then the
+ // last instruction is a call. We don't want to save + restore in this case.
+ // However, it could be possible that the last instruction is a call without
+ // it being valid to tail call this sequence. We should consider this as
+ // well.
+ else if (FrameID != MachineOutlinerThunk &&
+ FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
+ ModStackToSaveLR = true;
+
+ if (ModStackToSaveLR) {
+ // We can't fix up the stack. Bail out.
+ if (!AllStackInstrsSafe) {
+ RepeatedSequenceLocs.clear();
+ return outliner::OutlinedFunction();
+ }
- // Handle the last instruction separately. If this is a tail call, then the
- // last instruction is a call. We don't want to save + restore in this case.
- // However, it could be possible that the last instruction is a call without
- // it being valid to tail call this sequence. We should consider this as well.
- else if (FrameID != MachineOutlinerThunk &&
- FrameID != MachineOutlinerTailCall &&
- RepeatedSequenceLocs[0].back()->isCall())
- NumBytesToCreateFrame += 8;
+ // Save + restore LR.
+ NumBytesToCreateFrame += 8;
+ }
+ }
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
NumBytesToCreateFrame, FrameID);
@@ -5122,30 +5145,70 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
return true;
}
-unsigned
-AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
- unsigned Flags = 0x0;
- // Check if there's a call inside this MachineBasicBlock. If there is, then
- // set a flag.
- if (std::any_of(MBB.begin(), MBB.end(),
- [](MachineInstr &MI) { return MI.isCall(); }))
- Flags |= MachineOutlinerMBBFlags::HasCalls;
-
+bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
// Check if LR is available through all of the MBB. If it's not, then set
// a flag.
assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
"Suitable Machine Function for outlining must track liveness");
LiveRegUnits LRU(getRegisterInfo());
- LRU.addLiveOuts(MBB);
- std::for_each(MBB.rbegin(),
- MBB.rend(),
+ std::for_each(MBB.rbegin(), MBB.rend(),
[&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
- if (!LRU.available(AArch64::LR))
- Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+ // Check if each of the unsafe registers are available...
+ bool W16AvailableInBlock = LRU.available(AArch64::W16);
+ bool W17AvailableInBlock = LRU.available(AArch64::W17);
+ bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
+
+ // If all of these are dead (and not live out), we know we don't have to check
+ // them later.
+ if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
+ Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
+
+ // Now, add the live outs to the set.
+ LRU.addLiveOuts(MBB);
- return Flags;
+ // If any of these registers is available in the MBB, but also a live out of
+ // the block, then we know outlining is unsafe.
+ if (W16AvailableInBlock && !LRU.available(AArch64::W16))
+ return false;
+ if (W17AvailableInBlock && !LRU.available(AArch64::W17))
+ return false;
+ if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
+ return false;
+
+ // Check if there's a call inside this MachineBasicBlock. If there is, then
+ // set a flag.
+ if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
+ Flags |= MachineOutlinerMBBFlags::HasCalls;
+
+ MachineFunction *MF = MBB.getParent();
+
+ // In the event that we outline, we may have to save LR. If there is an
+ // available register in the MBB, then we'll always save LR there. Check if
+ // this is true.
+ bool CanSaveLR = false;
+ const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+
+ // Check if there is an available register across the sequence that we can
+ // use.
+ for (unsigned Reg : AArch64::GPR64RegClass) {
+ if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
+ Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
+ CanSaveLR = true;
+ break;
+ }
+ }
+
+ // Check if we have a register we can save LR to, and if LR was used
+ // somewhere. If both of those things are true, then we need to evaluate the
+ // safety of outlining stack instructions later.
+ if (!CanSaveLR && !LRU.available(AArch64::LR))
+ Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+
+ return true;
}
outliner::InstrType
@@ -5268,108 +5331,19 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
return outliner::InstrType::Illegal;
- // Does this use the stack?
- if (MI.modifiesRegister(AArch64::SP, &RI) ||
- MI.readsRegister(AArch64::SP, &RI)) {
- // True if there is no chance that any outlined candidate from this range
- // could require stack fixups. That is, both
- // * LR is available in the range (No save/restore around call)
- // * The range doesn't include calls (No save/restore in outlined frame)
- // are true.
- // FIXME: This is very restrictive; the flags check the whole block,
- // not just the bit we will try to outline.
- bool MightNeedStackFixUp =
- (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
- MachineOutlinerMBBFlags::HasCalls));
-
- // If this instruction is in a range where it *never* needs to be fixed
- // up, then we can *always* outline it. This is true even if it's not
- // possible to fix that instruction up.
- //
- // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
- // use SP. Suppose that I1 sits within a range that definitely doesn't
- // need stack fixups, while I2 sits in a range that does.
- //
- // First, I1 can be outlined as long as we *never* fix up the stack in
- // any sequence containing it. I1 is already a safe instruction in the
- // original program, so as long as we don't modify it we're good to go.
- // So this leaves us with showing that outlining I2 won't break our
- // program.
- //
- // Suppose I1 and I2 belong to equivalent candidate sequences. When we
- // look at I2, we need to see if it can be fixed up. Suppose I2, (and
- // thus I1) cannot be fixed up. Then I2 will be assigned an unique
- // integer label; thus, I2 cannot belong to any candidate sequence (a
- // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
- // as well, so we're good. Thus, I1 is always safe to outline.
- //
- // This gives us two things: first off, it buys us some more instructions
- // for our search space by deeming stack instructions illegal only when
- // they can't be fixed up AND we might have to fix them up. Second off,
- // This allows us to catch tricky instructions like, say,
- // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
- // be paired with later SUBXris, which might *not* end up being outlined.
- // If we mess with the stack to save something, then an ADDXri messes with
- // it *after*, then we aren't going to restore the right something from
- // the stack if we don't outline the corresponding SUBXri first. ADDXris and
- // SUBXris are extremely common in prologue/epilogue code, so supporting
- // them in the outliner can be a pretty big win!
- if (!MightNeedStackFixUp)
- return outliner::InstrType::Legal;
-
- // Any modification of SP will break our code to save/restore LR.
- // FIXME: We could handle some instructions which add a constant offset to
- // SP, with a bit more work.
- if (MI.modifiesRegister(AArch64::SP, &RI))
- return outliner::InstrType::Illegal;
-
- // At this point, we have a stack instruction that we might need to fix
- // up. We'll handle it if it's a load or store.
- if (MI.mayLoadOrStore()) {
- unsigned Base; // Filled with the base regiser of MI.
- int64_t Offset; // Filled with the offset of MI.
- unsigned DummyWidth;
-
- // Does it allow us to offset the base register and is the base SP?
- if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
- Base != AArch64::SP)
- return outliner::InstrType::Illegal;
-
- // Find the minimum/maximum offset for this instruction and check if
- // fixing it up would be in range.
- int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
- unsigned Scale; // The scale to multiply the offsets by.
- getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
-
- // TODO: We should really test what happens if an instruction overflows.
- // This is tricky to test with IR tests, but when the outliner is moved
- // to a MIR test, it really ought to be checked.
- Offset += 16; // Update the offset to what it would be if we outlined.
- if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
- return outliner::InstrType::Illegal;
-
- // It's in range, so we can outline it.
- return outliner::InstrType::Legal;
- }
-
- // FIXME: Add handling for instructions like "add x0, sp, #8".
-
- // We can't fix it up, so don't outline it.
- return outliner::InstrType::Illegal;
- }
-
return outliner::InstrType::Legal;
}
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : MBB) {
- unsigned Base, Width;
+ MachineOperand *Base;
+ unsigned Width;
int64_t Offset;
// Is this a load or store with an immediate offset with SP as the base?
if (!MI.mayLoadOrStore() ||
- !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
- Base != AArch64::SP)
+ !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
+ (Base->isReg() && Base->getReg() != AArch64::SP))
continue;
// It is, so we have to fix it up.
@@ -5401,7 +5375,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
TailOpcode = AArch64::TCRETURNdi;
} else {
assert(Call->getOpcode() == AArch64::BLR);
- TailOpcode = AArch64::TCRETURNri;
+ TailOpcode = AArch64::TCRETURNriALL;
}
MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
.add(Call->getOperand(0))
@@ -5562,3 +5536,6 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
MachineFunction &MF) const {
return MF.getFunction().optForMinSize();
}
+
+#define GET_INSTRINFO_HELPERS
+#include "AArch64GenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 11882e238b70..9954669d5675 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -62,14 +62,6 @@ public:
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- /// Returns true if there is a shiftable register and that the shift value
- /// is non-zero.
- static bool hasShiftedReg(const MachineInstr &MI);
-
- /// Returns true if there is an extendable register and that the extending
- /// value is non-zero.
- static bool hasExtendedReg(const MachineInstr &MI);
-
/// Does this instruction set its full destination register to zero?
static bool isGPRZero(const MachineInstr &MI);
@@ -79,11 +71,6 @@ public:
/// Does this instruction rename an FPR without modifying bits?
static bool isFPRCopy(const MachineInstr &MI);
- /// Return true if this is load/store scales or extends its register offset.
- /// This refers to scaling a dynamic index as opposed to scaled immediates.
- /// MI should be a memory op that allows scaled addressing.
- static bool isScaledAddr(const MachineInstr &MI);
-
/// Return true if pairing the given load or store is hinted to be
/// unprofitable.
static bool isLdStPairSuppressed(const MachineInstr &MI);
@@ -110,13 +97,13 @@ public:
/// Hint that pairing the given load or store is unprofitable.
static void suppressLdStPair(MachineInstr &MI);
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const override;
- bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset, unsigned &Width,
- const TargetRegisterInfo *TRI) const;
+ bool getMemOperandWithOffsetWidth(MachineInstr &MI, MachineOperand *&BaseOp,
+ int64_t &Offset, unsigned &Width,
+ const TargetRegisterInfo *TRI) const;
/// Return the immediate offset of the base register in a load/store \p LdSt.
MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
@@ -128,8 +115,7 @@ public:
bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
int64_t &MinOffset, int64_t &MaxOffset) const;
- bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
- MachineInstr &SecondLdSt, unsigned BaseReg2,
+ bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
unsigned NumLoads) const override;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -189,6 +175,10 @@ public:
unsigned FalseReg) const override;
void getNoop(MCInst &NopInst) const override;
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
@@ -242,7 +232,8 @@ public:
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
outliner::InstrType
getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
- unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const override;
+ bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const override;
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const override;
MachineBasicBlock::iterator
@@ -250,15 +241,15 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
- /// Returns true if the instruction sets to an immediate value that can be
- /// executed more efficiently.
- bool isExynosResetFast(const MachineInstr &MI) const;
- /// Returns true if the instruction has a shift left that can be executed
- /// more efficiently.
- bool isExynosShiftLeftFast(const MachineInstr &MI) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
- bool isFalkorShiftExtFast(const MachineInstr &MI) const;
+ static bool isFalkorShiftExtFast(const MachineInstr &MI);
+ /// Return true if the instructions is a SEH instruciton used for unwinding
+ /// on Windows.
+ static bool isSEHInstruction(const MachineInstr &MI);
+
+#define GET_INSTRINFO_HELPER_DECLS
+#include "AArch64GenInstrInfo.inc"
private:
/// Sets the offsets on outlined instructions in \p MBB which use SP
@@ -286,7 +277,7 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
int Offset, const TargetInstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
- bool SetNZCV = false);
+ bool SetNZCV = false, bool NeedsWinCFI = false);
/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
/// FP. Return false if the offset could not be handled directly in MI, and
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d6b8bb5d89c7..c24b8b36441b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -22,6 +22,56 @@ def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
+ AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
+def HasVH : Predicate<"Subtarget->hasVH()">,
+ AssemblerPredicate<"FeatureVH", "vh">;
+
+def HasLOR : Predicate<"Subtarget->hasLOR()">,
+ AssemblerPredicate<"FeatureLOR", "lor">;
+
+def HasPA : Predicate<"Subtarget->hasPA()">,
+ AssemblerPredicate<"FeaturePA", "pa">;
+
+def HasJS : Predicate<"Subtarget->hasJS()">,
+ AssemblerPredicate<"FeatureJS", "jsconv">;
+
+def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
+ AssemblerPredicate<"FeatureCCIDX", "ccidx">;
+
+def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
+ AssemblerPredicate<"FeatureComplxNum", "complxnum">;
+
+def HasNV : Predicate<"Subtarget->hasNV()">,
+ AssemblerPredicate<"FeatureNV", "nv">;
+
+def HasRASv8_4 : Predicate<"Subtarget->hasRASv8_4()">,
+ AssemblerPredicate<"FeatureRASv8_4", "rasv8_4">;
+
+def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
+ AssemblerPredicate<"FeatureMPAM", "mpam">;
+
+def HasDIT : Predicate<"Subtarget->hasDIT()">,
+ AssemblerPredicate<"FeatureDIT", "dit">;
+
+def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
+ AssemblerPredicate<"FeatureTRACEV8_4", "tracev8.4">;
+
+def HasAM : Predicate<"Subtarget->hasAM()">,
+ AssemblerPredicate<"FeatureAM", "am">;
+
+def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
+ AssemblerPredicate<"FeatureSEL2", "sel2">;
+
+def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
+ AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">;
+
+def HasFMI : Predicate<"Subtarget->hasFMI()">,
+ AssemblerPredicate<"FeatureFMI", "fmi">;
+
+def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
+ AssemblerPredicate<"FeatureRCPC_IMMO", "rcpc-immo">;
+
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
@@ -49,6 +99,8 @@ def HasRDM : Predicate<"Subtarget->hasRDM()">,
def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
+ AssemblerPredicate<"FeatureFP16FML", "fp16fml">;
def HasSPE : Predicate<"Subtarget->hasSPE()">,
AssemblerPredicate<"FeatureSPE", "spe">;
def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
@@ -58,7 +110,20 @@ def HasSVE : Predicate<"Subtarget->hasSVE()">,
AssemblerPredicate<"FeatureSVE", "sve">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<"FeatureRCPC", "rcpc">;
-
+def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
+ AssemblerPredicate<"FeatureAltFPCmp", "altnzcv">;
+def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
+ AssemblerPredicate<"FeatureFRInt3264", "frint3264">;
+def HasSB : Predicate<"Subtarget->hasSB()">,
+ AssemblerPredicate<"FeatureSB", "sb">;
+def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
+ AssemblerPredicate<"FeaturePredRes", "predres">;
+def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
+ AssemblerPredicate<"FeatureCacheDeepPersist", "ccdp">;
+def HasBTI : Predicate<"Subtarget->hasBTI()">,
+ AssemblerPredicate<"FeatureBranchTargetId", "bti">;
+def HasMTE : Predicate<"Subtarget->hasMTE()">,
+ AssemblerPredicate<"FeatureMTE", "mte">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def UseAlternateSExtLoadCVTF32
@@ -174,6 +239,7 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
+def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
@@ -342,6 +408,9 @@ let RecomputePerFunction = 1 in {
def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">;
// Avoid generating STRQro if it is slow, unless we're optimizing for code size.
def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">;
+
+ def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
+ def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
}
include "AArch64InstrFormats.td"
@@ -427,6 +496,38 @@ def : Pat<(AArch64LOADgot texternalsym:$addr),
def : Pat<(AArch64LOADgot tconstpool:$addr),
(LOADgot tconstpool:$addr)>;
+// 32-bit jump table destination is actually only 2 instructions since we can
+// use the table itself as a PC-relative base. But optimization occurs after
+// branch relaxation so be pessimistic.
+let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in {
+def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+}
+
+// Space-consuming pseudo to aid testing of placement and reachability
+// algorithms. Immediate operand is the number of bytes this "instruction"
+// occupies; register operands can be used to enforce dependency and constrain
+// the scheduler.
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
+ [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
+ Sched<[]>;
+
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
+ def SpeculationSafeValueX
+ : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
+ def SpeculationSafeValueW
+ : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
+}
+
+
//===----------------------------------------------------------------------===//
// System instructions.
//===----------------------------------------------------------------------===//
@@ -440,6 +541,8 @@ def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>;
def : InstAlias<"csdb", (HINT 20)>;
+def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>;
+def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
// v8.2a Statistical Profiling extension
def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
@@ -463,11 +566,11 @@ def ISB : CRmSystemI<barrier_op, 0b110, "isb",
def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
let CRm = 0b0010;
let Inst{12} = 0;
- let Predicates = [HasV8_4a];
+ let Predicates = [HasTRACEV8_4];
}
}
-// ARMv8.2 Dot Product
+// ARMv8.2-A Dot Product
let Predicates = [HasDotProd] in {
defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>;
defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>;
@@ -475,6 +578,18 @@ defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
}
+// ARMv8.2-A FP16 Fused Multiply-Add Long
+let Predicates = [HasNEON, HasFP16FML] in {
+defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
+defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
+defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
+defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
+defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
+defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
+defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
+defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
+}
+
// Armv8.2-A Crypto extensions
let Predicates = [HasSHA3] in {
def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">;
@@ -543,7 +658,7 @@ let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
}
// These pointer authentication isntructions require armv8.3a
-let Predicates = [HasV8_3a] in {
+let Predicates = [HasPA] in {
multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
@@ -583,17 +698,17 @@ let Predicates = [HasV8_3a] in {
defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>;
defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>;
- // v8.3a floating point conversion for javascript
- let Predicates = [HasV8_3a, HasFPARMv8] in
- def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
- "fjcvtzs", []> {
- let Inst{31} = 0;
- }
+}
-} // HasV8_3a
+// v8.3a floating point conversion for javascript
+let Predicates = [HasJS, HasFPARMv8] in
+def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
+ "fjcvtzs", []> {
+ let Inst{31} = 0;
+} // HasJS, HasFPARMv8
// v8.4 Flag manipulation instructions
-let Predicates = [HasV8_4a] in {
+let Predicates = [HasFMI] in {
def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
let Inst{20-5} = 0b0000001000000000;
}
@@ -601,10 +716,39 @@ def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
"{\t$Rn, $imm, $mask}">;
-} // HasV8_4a
+} // HasFMI
+
+// v8.5 flag manipulation instructions
+let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
+
+def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
+ let Inst{18-16} = 0b000;
+ let Inst{11-8} = 0b0000;
+ let Unpredictable{11-8} = 0b1111;
+ let Inst{7-5} = 0b001;
+}
+
+def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
+ let Inst{18-16} = 0b000;
+ let Inst{11-8} = 0b0000;
+ let Unpredictable{11-8} = 0b1111;
+ let Inst{7-5} = 0b010;
+}
+} // HasAltNZCV
+
+
+// Armv8.5-A speculation barrier
+def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
+ let Inst{20-5} = 0b0001100110000111;
+ let Unpredictable{11-8} = 0b1111;
+ let Predicates = [HasSB];
+ let hasSideEffects = 1;
+}
def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
+def : InstAlias<"ssbb", (DSB 0)>;
+def : InstAlias<"pssbb", (DSB 4)>;
def MRS : MRSI;
def MSR : MSRI;
@@ -1076,6 +1220,50 @@ defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
defm : STOPregister<"stumin","LDUMIN">;// STUMINx
+// v8.5 Memory Tagging Extension
+let Predicates = [HasMTE] in {
+
+def IRG : BaseTwoOperand<0b0100, GPR64sp, "irg", null_frag, GPR64sp, GPR64>,
+ Sched<[]>{
+ let Inst{31} = 1;
+}
+def GMI : BaseTwoOperand<0b0101, GPR64, "gmi", null_frag, GPR64sp>, Sched<[]>{
+ let Inst{31} = 1;
+ let isNotDuplicable = 1;
+}
+def ADDG : AddSubG<0, "addg", null_frag>;
+def SUBG : AddSubG<1, "subg", null_frag>;
+
+def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
+
+def SUBP : SUBP<0, "subp", null_frag>, Sched<[]>;
+def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
+ let Defs = [NZCV];
+}
+
+def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
+
+def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
+def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
+
+def LDGV : MemTagVector<1, "ldgv", "\t$Rt, [$Rn]!",
+ (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn)> {
+ let DecoderMethod = "DecodeLoadAllocTagArrayInstruction";
+}
+def STGV : MemTagVector<0, "stgv", "\t$Rt, [$Rn]!",
+ (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn)>;
+
+defm STG : MemTagStore<0b00, "stg">;
+defm STZG : MemTagStore<0b01, "stzg">;
+defm ST2G : MemTagStore<0b10, "st2g">;
+defm STZ2G : MemTagStore<0b11, "stz2g">;
+
+defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
+def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
+def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
+
+} // Predicates = [HasMTE]
+
//===----------------------------------------------------------------------===//
// Logical instructions.
//===----------------------------------------------------------------------===//
@@ -1383,7 +1571,8 @@ def : InstAlias<"cneg $dst, $src, $cc",
//===----------------------------------------------------------------------===//
let isReMaterializable = 1 in {
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
-def ADR : ADRI<0, "adr", adrlabel, []>;
+def ADR : ADRI<0, "adr", adrlabel,
+ [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
} // hasSideEffects = 0
def ADRP : ADRI<1, "adrp", adrplabel,
@@ -1391,6 +1580,10 @@ def ADRP : ADRI<1, "adrp", adrplabel,
} // isReMaterializable = 1
// page address of a constant pool entry, block address
+def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
+def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
+def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
+def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
@@ -1434,6 +1627,10 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
let AsmString = ".tlsdesccall $sym";
}
+// Pseudo instruction to tell the streamer to emit a 'B' character into the
+// augmentation string.
+def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
+
// FIXME: maybe the scratch register used shouldn't be fixed to X1?
// FIXME: can "hasSideEffects be dropped?
let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
@@ -1493,6 +1690,8 @@ def : InstAlias<"dcps1", (DCPS1 0)>;
def : InstAlias<"dcps2", (DCPS2 0)>;
def : InstAlias<"dcps3", (DCPS3 0)>;
+def UDF : UDFType<0, "udf">;
+
//===----------------------------------------------------------------------===//
// Load instructions.
//===----------------------------------------------------------------------===//
@@ -1883,14 +2082,37 @@ def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
//---
// (literal)
-def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr">;
-def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr">;
-def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr">;
-def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr">;
-def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr">;
+
+def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
+ if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
+ const DataLayout &DL = MF->getDataLayout();
+ unsigned Align = G->getGlobal()->getPointerAlignment(DL);
+ return Align >= 4 && G->getOffset() % 4 == 0;
+ }
+ if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
+ return C->getAlignment() >= 4 && C->getOffset() % 4 == 0;
+ return false;
+}]>;
+
+def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
+ [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
+def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
+ [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
+def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
+ [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
+def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
+ [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
+def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
+ [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
// load sign-extended word
-def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw">;
+def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
+ [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
+
+let AddedComplexity = 20 in {
+def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
+ (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
+}
// prefetch
def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
@@ -2467,8 +2689,9 @@ defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
[(truncstorei8 GPR32z:$Rt,
(am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
-// Armv8.4 LDAPR & STLR with Immediate Offset instruction
-let Predicates = [HasV8_4a] in {
+// Armv8.4 Weaker Release Consistency enhancements
+// LDAPR & STLR with Immediate Offset instructions
+let Predicates = [HasRCPC_IMMO] in {
defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>;
defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>;
defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>;
@@ -2753,7 +2976,7 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
-let Predicates = [HasV8_1a] in {
+let Predicates = [HasLOR] in {
// v8.1a "Limited Order Region" extension load-acquire instructions
def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
@@ -2886,6 +3109,13 @@ let SchedRW = [WriteFDiv] in {
defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
}
+let Predicates = [HasFRInt3264] in {
+ defm FRINT32Z : FRIntNNT<0b00, "frint32z">;
+ defm FRINT64Z : FRIntNNT<0b10, "frint64z">;
+ defm FRINT32X : FRIntNNT<0b01, "frint32x">;
+ defm FRINT64X : FRIntNNT<0b11, "frint64x">;
+} // HasFRInt3264
+
//===----------------------------------------------------------------------===//
// Floating point two operand instructions.
//===----------------------------------------------------------------------===//
@@ -2895,18 +3125,18 @@ let SchedRW = [WriteFDiv] in {
defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
}
defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
-defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
+defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>;
defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
-defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>;
+defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>;
let SchedRW = [WriteFMul] in {
defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
}
defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
-def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMINDrr FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
@@ -2983,6 +3213,42 @@ def F128CSEL : Pseudo<(outs FPR128:$Rd),
let hasNoSchedulingInfo = 1;
}
+//===----------------------------------------------------------------------===//
+// Instructions used for emitting unwind opcodes on ARM64 Windows.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in {
+ def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
+ def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+ def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
+}
+
+// Pseudo instructions for Windows EH
+//===----------------------------------------------------------------------===//
+let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
+ isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
+ def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
+ let usesCustomInserter = 1 in
+ def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
+ Sched<[]>;
+}
+
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in
+def CATCHPAD : Pseudo<(outs), (ins), [(catchpad)]>, Sched<[]>;
//===----------------------------------------------------------------------===//
// Floating point immediate move.
@@ -3104,6 +3370,14 @@ defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
+
+let Predicates = [HasFRInt3264] in {
+ defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z">;
+ defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z">;
+ defm FRINT32X : FRIntNNTVector<1, 0, "frint32x">;
+ defm FRINT64X : FRIntNNTVector<1, 1, "frint64x">;
+} // HasFRInt3264
+
defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
@@ -3224,11 +3498,11 @@ defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
-defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>;
+defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>;
defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
-defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>;
+defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>;
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
// instruction expects the addend first, while the fma intrinsic puts it last.
@@ -3895,25 +4169,6 @@ defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
-// Patterns for smull2/umull2.
-multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
- Instruction INST8B, Instruction INST4H, Instruction INST2S> {
- def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm))),
- (INST8B V128:$Rn, V128:$Rm)>;
- def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm))),
- (INST4H V128:$Rn, V128:$Rm)>;
- def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm))),
- (INST2S V128:$Rn, V128:$Rm)>;
-}
-
-defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
- SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
-defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
- UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
-
// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
@@ -4004,44 +4259,43 @@ def : Pat<(concat_vectors (v2i32 V64:$Rd),
defm EXT : SIMDBitwiseExtract<"ext">;
-def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-
-// We use EXT to handle extract_subvector to copy the upper 64-bits of a
-// 128-bit vector.
-def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-
+def AdjustExtImm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
+ def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+ def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+ // We use EXT to handle extract_subvector to copy the upper 64-bits of a
+ // 128-bit vector.
+ def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+ // A 64-bit EXT of two halves of the same 128-bit register can be done as a
+ // single 128-bit EXT.
+ def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
+ (extract_subvector V128:$Rn, (i64 N)),
+ (i32 imm:$imm))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
+ // A 64-bit EXT of the high half of a 128-bit register can be done using a
+ // 128-bit EXT of the whole register with an adjustment to the immediate. The
+ // top half of the other operand will be unset, but that doesn't matter as it
+ // will not be used.
+ def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
+ V64:$Rm,
+ (i32 imm:$imm))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+ (AdjustExtImm imm:$imm)), dsub)>;
+}
+
+defm : ExtPat<v8i8, v16i8, 8>;
+defm : ExtPat<v4i16, v8i16, 4>;
+defm : ExtPat<v4f16, v8f16, 4>;
+defm : ExtPat<v2i32, v4i32, 2>;
+defm : ExtPat<v2f32, v4f32, 2>;
+defm : ExtPat<v1i64, v2i64, 1>;
+defm : ExtPat<v1f64, v2f64, 1>;
//----------------------------------------------------------------------------
// AdvSIMD zip vector
@@ -4137,6 +4391,12 @@ def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
+// DUP from a 64-bit register to a 64-bit register is just a copy
+def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
+ (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
+def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
+ (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
+
def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
(v2f32 (DUPv2i32lane
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
@@ -4739,16 +4999,6 @@ def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
(MOVID imm0_255:$shift)>;
-def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>;
-
-def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>;
-
// EDIT byte mask: 2d
// The movi_edit node has the immediate value already encoded, so we use
@@ -4769,6 +5019,18 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
+// extract is free and this gives better MachineCSE results.
+def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+
+def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
@@ -5770,6 +6032,41 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
// __builtin_trap() uses the BRK instruction on AArch64.
def : Pat<(trap), (BRK 1)>;
+// Multiply high patterns which multiply the lower subvector using smull/umull
+// and the upper subvector with smull2/umull2. Then shuffle the high the high
+// part of both results together.
+def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v16i8
+ (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
+def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v8i16
+ (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
+def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v4i32
+ (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
+def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v16i8
+ (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
+def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v8i16
+ (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
+def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v4i32
+ (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
// Conversions within AdvSIMD types in the same register size are free.
// But because we need a consistent lane ordering, in big endian many
// conversions require one or more REV instructions.
@@ -6481,10 +6778,24 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
Sched<[WriteBrReg]>;
def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
Sched<[WriteBrReg]>;
+ // Indirect tail-call with any register allowed, used by MachineOutliner when
+ // this is proven safe.
+ // FIXME: If we have to add any more hacks like this, we should instead relax
+ // some verifier checks for outlined functions.
+ def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
+ Sched<[WriteBrReg]>;
+ // Indirect tail-call limited to only use registers (x16 and x17) which are
+ // allowed to tail-call a "BTI c" instruction.
+ def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
+ Sched<[WriteBrReg]>;
}
def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
- (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>;
+ (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
+ Requires<[NotUseBTI]>;
+def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
+ (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
+ Requires<[UseBTI]>;
def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index b2b500320b5c..5eb589bf66d5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -65,6 +65,16 @@ private:
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
+ // Helper to generate an equivalent of scalar_to_vector into a new register,
+ // returned via 'Dst'.
+ bool emitScalarToVector(unsigned &Dst, const LLT DstTy,
+ const TargetRegisterClass *DstRC, unsigned Scalar,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineRegisterInfo &MRI) const;
+ bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
@@ -667,7 +677,7 @@ void AArch64InstructionSelector::materializeLargeCMVal(
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineIRBuilder MIB(I);
- auto MovZ = MIB.buildInstr(AArch64::MOVZXi, &AArch64::GPR64RegClass);
+ auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
MovZ->addOperand(MF, I.getOperand(1));
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
AArch64II::MO_NC);
@@ -779,16 +789,36 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
const unsigned CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- if (selectCompareBranch(I, MF, MRI))
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ bool ProduceNonFlagSettingCondBr =
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+ if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
return true;
- auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
- .addUse(CondReg)
- .addImm(/*bit offset=*/0)
- .addMBB(DestMBB);
+ if (ProduceNonFlagSettingCondBr) {
+ auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
+ .addUse(CondReg)
+ .addImm(/*bit offset=*/0)
+ .addMBB(DestMBB);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ } else {
+ auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
+ .addDef(AArch64::WZR)
+ .addUse(CondReg)
+ .addImm(1);
+ constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
+ auto Bcc =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
+ .addImm(AArch64CC::EQ)
+ .addMBB(DestMBB);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
+ }
}
case TargetOpcode::G_BRINDIRECT: {
@@ -983,6 +1013,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
return true;
+ } else if (TM.getCodeModel() == CodeModel::Tiny) {
+ I.setDesc(TII.get(AArch64::ADR));
+ I.getOperand(1).setTargetFlags(OpFlags);
} else {
I.setDesc(TII.get(AArch64::MOVaddr));
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
@@ -1010,12 +1043,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
}
unsigned MemSizeInBits = MemOp.getSize() * 8;
- // FIXME: PR36018: Volatile loads in some cases are incorrectly selected by
- // folding with an extend. Until we have a G_SEXTLOAD solution bail out if
- // we hit one.
- if (Opcode == TargetOpcode::G_LOAD && MemOp.isVolatile())
- return false;
-
const unsigned PtrReg = I.getOperand(1).getReg();
#ifndef NDEBUG
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
@@ -1525,11 +1552,178 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
+ case TargetOpcode::G_BUILD_VECTOR:
+ return selectBuildVector(I, MRI);
+ case TargetOpcode::G_MERGE_VALUES:
+ return selectMergeValues(I, MRI);
}
return false;
}
+bool AArch64InstructionSelector::emitScalarToVector(
+ unsigned &Dst, const LLT DstTy, const TargetRegisterClass *DstRC,
+ unsigned Scalar, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, MachineRegisterInfo &MRI) const {
+ Dst = MRI.createVirtualRegister(DstRC);
+
+ unsigned UndefVec = MRI.createVirtualRegister(DstRC);
+ MachineInstr &UndefMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII.get(TargetOpcode::IMPLICIT_DEF))
+ .addDef(UndefVec);
+
+ auto BuildFn = [&](unsigned SubregIndex) {
+ MachineInstr &InsMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII.get(TargetOpcode::INSERT_SUBREG))
+ .addDef(Dst)
+ .addUse(UndefVec)
+ .addUse(Scalar)
+ .addImm(SubregIndex);
+ constrainSelectedInstRegOperands(UndefMI, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+ };
+
+ switch (DstTy.getElementType().getSizeInBits()) {
+ case 32:
+ return BuildFn(AArch64::ssub);
+ case 64:
+ return BuildFn(AArch64::dsub);
+ default:
+ return false;
+ }
+}
+
+bool AArch64InstructionSelector::selectMergeValues(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
+ const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+ assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
+
+ // At the moment we only support merging two s32s into an s64.
+ if (I.getNumOperands() != 3)
+ return false;
+ if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
+ return false;
+ const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+ if (RB.getID() != AArch64::GPRRegBankID)
+ return false;
+
+ auto *DstRC = &AArch64::GPR64RegClass;
+ unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
+ MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::SUBREG_TO_REG))
+ .addDef(SubToRegDef)
+ .addImm(0)
+ .addUse(I.getOperand(1).getReg())
+ .addImm(AArch64::sub_32);
+ unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
+ // Need to anyext the second scalar before we can use bfm
+ MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::SUBREG_TO_REG))
+ .addDef(SubToRegDef2)
+ .addImm(0)
+ .addUse(I.getOperand(2).getReg())
+ .addImm(AArch64::sub_32);
+ MachineInstr &BFM =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(SubToRegDef)
+ .addUse(SubToRegDef2)
+ .addImm(32)
+ .addImm(31);
+ constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectBuildVector(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ // Until we port more of the optimized selections, for now just use a vector
+ // insert sequence.
+ const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
+ unsigned EltSize = EltTy.getSizeInBits();
+ if (EltSize < 32 || EltSize > 64)
+ return false; // Don't support all element types yet.
+ const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+ unsigned Opc;
+ unsigned SubregIdx;
+ if (RB.getID() == AArch64::GPRRegBankID) {
+ if (EltSize == 32) {
+ Opc = AArch64::INSvi32gpr;
+ SubregIdx = AArch64::ssub;
+ } else {
+ Opc = AArch64::INSvi64gpr;
+ SubregIdx = AArch64::dsub;
+ }
+ } else {
+ if (EltSize == 32) {
+ Opc = AArch64::INSvi32lane;
+ SubregIdx = AArch64::ssub;
+ } else {
+ Opc = AArch64::INSvi64lane;
+ SubregIdx = AArch64::dsub;
+ }
+ }
+
+ if (EltSize * DstTy.getNumElements() != 128)
+ return false; // Don't handle unpacked vectors yet.
+
+ unsigned DstVec = 0;
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
+ DstTy, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
+ emitScalarToVector(DstVec, DstTy, DstRC, I.getOperand(1).getReg(),
+ *I.getParent(), I.getIterator(), MRI);
+ for (unsigned i = 2, e = DstTy.getSizeInBits() / EltSize + 1; i < e; ++i) {
+ unsigned InsDef;
+ // For the last insert re-use the dst reg of the G_BUILD_VECTOR.
+ if (i + 1 < e)
+ InsDef = MRI.createVirtualRegister(DstRC);
+ else
+ InsDef = I.getOperand(0).getReg();
+ unsigned LaneIdx = i - 1;
+ if (RB.getID() == AArch64::FPRRegBankID) {
+ unsigned ImpDef = MRI.createVirtualRegister(DstRC);
+ MachineInstr &ImpDefMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::IMPLICIT_DEF))
+ .addDef(ImpDef);
+ unsigned InsSubDef = MRI.createVirtualRegister(DstRC);
+ MachineInstr &InsSubMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::INSERT_SUBREG))
+ .addDef(InsSubDef)
+ .addUse(ImpDef)
+ .addUse(I.getOperand(i).getReg())
+ .addImm(SubregIdx);
+ MachineInstr &InsEltMI =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
+ .addDef(InsDef)
+ .addUse(DstVec)
+ .addImm(LaneIdx)
+ .addUse(InsSubDef)
+ .addImm(0);
+ constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(InsSubMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(InsEltMI, TII, TRI, RBI);
+ DstVec = InsDef;
+ } else {
+ MachineInstr &InsMI =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
+ .addDef(InsDef)
+ .addUse(DstVec)
+ .addImm(LaneIdx)
+ .addUse(I.getOperand(i).getReg());
+ constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+ DstVec = InsDef;
+ }
+ }
+ I.eraseFromParent();
+ return true;
+}
+
/// SelectArithImmed - Select an immediate value that can be represented as
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
/// Val set to the 12-bit value and Shift set to the shifter operand.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 327c758a7f8e..6f7fb7a8bc21 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -48,9 +48,21 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
const LLT v2s64 = LLT::vector(2, 64);
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalFor({p0, s1, s8, s16, s32, s64})
- .clampScalar(0, s1, s64)
- .widenScalarToNextPow2(0, 8);
+ .legalFor({p0, s1, s8, s16, s32, s64, v2s64})
+ .clampScalar(0, s1, s64)
+ .widenScalarToNextPow2(0, 8)
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].isVector() &&
+ (Query.Types[0].getElementType() != s64 ||
+ Query.Types[0].getNumElements() != 2);
+ },
+ [=](const LegalityQuery &Query) {
+ LLT EltTy = Query.Types[0].getElementType();
+ if (EltTy == s64)
+ return std::make_pair(0, LLT::vector(2, 64));
+ return std::make_pair(0, EltTy);
+ });
getActionDefinitionsBuilder(G_PHI)
.legalFor({p0, s16, s32, s64})
@@ -97,6 +109,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
+ getActionDefinitionsBuilder(G_FCEIL)
+ // If we don't have full FP16 support, then widen s16 to s32 if we
+ // encounter it.
+ .widenScalarIf(
+ [=, &ST](const LegalityQuery &Query) {
+ return Query.Types[0] == s16 && !ST.hasFullFP16();
+ },
+ [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
+ .legalFor({s16, s32, s64, v2s32, v4s32, v2s64});
+
getActionDefinitionsBuilder(G_INSERT)
.unsupportedIf([=](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
@@ -167,9 +189,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.unsupportedIfMemSizeNotPow2()
// Lower any any-extending loads left into G_ANYEXT and G_LOAD
.lowerIf([=](const LegalityQuery &Query) {
- return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
+ return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
- .clampNumElements(0, v2s32, v2s32);
+ .clampNumElements(0, v2s32, v2s32)
+ .clampMaxNumElements(0, s64, 1);
getActionDefinitionsBuilder(G_STORE)
.legalForTypesWithMemSize({{s8, p0, 8},
@@ -185,9 +208,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.unsupportedIfMemSizeNotPow2()
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
- Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
+ Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
- .clampNumElements(0, v2s32, v2s32);
+ .clampNumElements(0, v2s32, v2s32)
+ .clampMaxNumElements(0, s64, 1);
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
@@ -385,13 +409,37 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
});
}
+ getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
+ .unsupportedIf([=](const LegalityQuery &Query) {
+ const LLT &EltTy = Query.Types[1].getElementType();
+ return Query.Types[0] != EltTy;
+ })
+ .minScalar(2, s64)
+ .legalIf([=](const LegalityQuery &Query) {
+ const LLT &VecTy = Query.Types[1];
+ return VecTy == v4s32 || VecTy == v2s64;
+ });
+
+ getActionDefinitionsBuilder(G_BUILD_VECTOR)
+ .legalFor({{v4s32, s32}, {v2s64, s64}})
+ .clampNumElements(0, v4s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+
+ // Deal with larger scalar types, which will be implicitly truncated.
+ .legalIf([=](const LegalityQuery &Query) {
+ return Query.Types[0].getScalarSizeInBits() <
+ Query.Types[1].getSizeInBits();
+ })
+ .minScalarSameAs(1, 0);
+
computeTables();
verify(*ST.getInstrInfo());
}
bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
switch (MI.getOpcode()) {
default:
// No idea what to do.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
index a745b0edbc6d..77e8bdc7623c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
namespace llvm {
@@ -28,7 +29,8 @@ public:
AArch64LegalizerInfo(const AArch64Subtarget &ST);
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const override;
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const override;
private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 4a19ecd69103..aa732a99469c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -702,7 +702,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
.add(BaseRegOp)
.addImm(OffsetImm)
- .setMemRefs(I->mergeMemRefsWith(*MergeMI))
+ .cloneMergedMemRefs({&*I, &*MergeMI})
.setMIFlags(I->mergeFlagsWith(*MergeMI));
(void)MIB;
@@ -819,7 +819,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
.add(RegOp1)
.add(BaseRegOp)
.addImm(OffsetImm)
- .setMemRefs(I->mergeMemRefsWith(*Paired))
+ .cloneMergedMemRefs({&*I, &*Paired})
.setMIFlags(I->mergeFlagsWith(*Paired));
(void)MIB;
@@ -1338,7 +1338,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.add(getLdStRegOp(*I))
.add(getLdStBaseOp(*I))
.addImm(Value)
- .setMemRefs(I->memoperands_begin(), I->memoperands_end())
+ .setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
} else {
// Paired instruction.
@@ -1349,7 +1349,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.add(getLdStRegOp(*I, 1))
.add(getLdStBaseOp(*I))
.addImm(Value / Scale)
- .setMemRefs(I->memoperands_begin(), I->memoperands_end())
+ .setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
}
(void)MIB;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index 6c0263585933..d71359223b1b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -44,16 +45,31 @@ AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
assert(TheTriple.isOSWindows() &&
"Windows is the only supported COFF target");
- bool IsIndirect = (TargetFlags & AArch64II::MO_DLLIMPORT);
+ bool IsIndirect = (TargetFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB));
if (!IsIndirect)
return Printer.getSymbol(GV);
SmallString<128> Name;
- Name = "__imp_";
+ if (TargetFlags & AArch64II::MO_DLLIMPORT)
+ Name = "__imp_";
+ else if (TargetFlags & AArch64II::MO_COFFSTUB)
+ Name = ".refptr.";
Printer.TM.getNameWithPrefix(Name, GV,
Printer.getObjFileLowering().getMangler());
- return Ctx.getOrCreateSymbol(Name);
+ MCSymbol *MCSym = Ctx.getOrCreateSymbol(Name);
+
+ if (TargetFlags & AArch64II::MO_COFFSTUB) {
+ MachineModuleInfoCOFF &MMICOFF =
+ Printer.MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMICOFF.getGVStubEntry(MCSym);
+
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(Printer.getSymbol(GV), true);
+ }
+
+ return MCSym;
}
MCSymbol *
@@ -173,20 +189,51 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
MCSymbol *Sym) const {
- AArch64MCExpr::VariantKind RefKind = AArch64MCExpr::VK_NONE;
+ uint32_t RefFlags = 0;
+
if (MO.getTargetFlags() & AArch64II::MO_TLS) {
if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGEOFF)
- RefKind = AArch64MCExpr::VK_SECREL_LO12;
+ RefFlags |= AArch64MCExpr::VK_SECREL_LO12;
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
AArch64II::MO_HI12)
- RefKind = AArch64MCExpr::VK_SECREL_HI12;
+ RefFlags |= AArch64MCExpr::VK_SECREL_HI12;
+
+ } else if (MO.getTargetFlags() & AArch64II::MO_S) {
+ RefFlags |= AArch64MCExpr::VK_SABS;
+ } else {
+ RefFlags |= AArch64MCExpr::VK_ABS;
+ }
+
+ if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3)
+ RefFlags |= AArch64MCExpr::VK_G3;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G2)
+ RefFlags |= AArch64MCExpr::VK_G2;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G1)
+ RefFlags |= AArch64MCExpr::VK_G1;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
+ RefFlags |= AArch64MCExpr::VK_G0;
+
+ // FIXME: Currently we only set VK_NC for MO_G3/MO_G2/MO_G1/MO_G0. This is
+ // because setting VK_NC for others would mean setting their respective
+ // RefFlags correctly. We should do this in a separate patch.
+ if (MO.getTargetFlags() & AArch64II::MO_NC) {
+ auto MOFrag = (MO.getTargetFlags() & AArch64II::MO_FRAGMENT);
+ if (MOFrag == AArch64II::MO_G3 || MOFrag == AArch64II::MO_G2 ||
+ MOFrag == AArch64II::MO_G1 || MOFrag == AArch64II::MO_G0)
+ RefFlags |= AArch64MCExpr::VK_NC;
}
+
const MCExpr *Expr =
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+
+ auto RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags);
+ assert(RefKind != AArch64MCExpr::VK_INVALID &&
+ "Invalid relocation requested");
Expr = AArch64MCExpr::create(Expr, RefKind, Ctx);
+
return MCOperand::createExpr(Expr);
}
@@ -253,4 +300,17 @@ void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
if (lowerOperand(MO, MCOp))
OutMI.addOperand(MCOp);
}
+
+ switch (OutMI.getOpcode()) {
+ case AArch64::CATCHRET:
+ OutMI = MCInst();
+ OutMI.setOpcode(AArch64::RET);
+ OutMI.addOperand(MCOperand::createReg(AArch64::LR));
+ break;
+ case AArch64::CLEANUPRET:
+ OutMI = MCInst();
+ OutMI.setOpcode(AArch64::RET);
+ OutMI.addOperand(MCOperand::createReg(AArch64::LR));
+ break;
+ }
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index e42214d15699..5183e7d3c0d0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include <cassert>
@@ -97,6 +98,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// attribute, in which case it is set to false at construction.
Optional<bool> HasRedZone;
+ /// ForwardedMustTailRegParms - A list of virtual and physical registers
+ /// that must be forwarded to every musttail call.
+ SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
public:
AArch64FunctionInfo() = default;
@@ -162,6 +166,19 @@ public:
unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
+ unsigned getJumpTableEntrySize(int Idx) const {
+ auto It = JumpTableEntryInfo.find(Idx);
+ if (It != JumpTableEntryInfo.end())
+ return It->second.first;
+ return 4;
+ }
+ MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
+ return JumpTableEntryInfo.find(Idx)->second.second;
+ }
+ void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
+ JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
+ }
+
using SetOfInstructions = SmallPtrSet<const MachineInstr *, 16>;
const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
@@ -196,10 +213,16 @@ public:
LOHRelated.insert(Args.begin(), Args.end());
}
+ SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
+ return ForwardedMustTailRegParms;
+ }
+
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
+
+ DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index bc0168e783be..bc596dd38b6e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -20,161 +20,175 @@ using namespace llvm;
namespace {
-// Fuse CMN, CMP, TST followed by Bcc.
+/// CMN, CMP, TST followed by Bcc
static bool isArithmeticBccPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
- if (SecondMI.getOpcode() == AArch64::Bcc) {
- // Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
- return true;
+ if (SecondMI.getOpcode() != AArch64::Bcc)
+ return false;
- switch (FirstMI->getOpcode()) {
- case AArch64::ADDSWri:
- case AArch64::ADDSWrr:
- case AArch64::ADDSXri:
- case AArch64::ADDSXrr:
- case AArch64::ANDSWri:
- case AArch64::ANDSWrr:
- case AArch64::ANDSXri:
- case AArch64::ANDSXrr:
- case AArch64::SUBSWri:
- case AArch64::SUBSWrr:
- case AArch64::SUBSXri:
- case AArch64::SUBSXrr:
- case AArch64::BICSWrr:
- case AArch64::BICSXrr:
- return true;
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
- }
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
+
+ switch (FirstMI->getOpcode()) {
+ case AArch64::ADDSWri:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
+ return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
}
+
return false;
}
-// Fuse ALU operations followed by CBZ/CBNZ.
+/// ALU operations followed by CBZ/CBNZ.
static bool isArithmeticCbzPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
- unsigned SecondOpcode = SecondMI.getOpcode();
+ if (SecondMI.getOpcode() != AArch64::CBZW &&
+ SecondMI.getOpcode() != AArch64::CBZX &&
+ SecondMI.getOpcode() != AArch64::CBNZW &&
+ SecondMI.getOpcode() != AArch64::CBNZX)
+ return false;
- if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
- SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
- // Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
- return true;
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
- switch (FirstMI->getOpcode()) {
- case AArch64::ADDWri:
- case AArch64::ADDWrr:
- case AArch64::ADDXri:
- case AArch64::ADDXrr:
- case AArch64::ANDWri:
- case AArch64::ANDWrr:
- case AArch64::ANDXri:
- case AArch64::ANDXrr:
- case AArch64::EORWri:
- case AArch64::EORWrr:
- case AArch64::EORXri:
- case AArch64::EORXrr:
- case AArch64::ORRWri:
- case AArch64::ORRWrr:
- case AArch64::ORRXri:
- case AArch64::ORRXrr:
- case AArch64::SUBWri:
- case AArch64::SUBWrr:
- case AArch64::SUBXri:
- case AArch64::SUBXrr:
- return true;
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
- }
+ switch (FirstMI->getOpcode()) {
+ case AArch64::ADDWri:
+ case AArch64::ADDWrr:
+ case AArch64::ADDXri:
+ case AArch64::ADDXrr:
+ case AArch64::ANDWri:
+ case AArch64::ANDWrr:
+ case AArch64::ANDXri:
+ case AArch64::ANDXrr:
+ case AArch64::EORWri:
+ case AArch64::EORWrr:
+ case AArch64::EORXri:
+ case AArch64::EORXrr:
+ case AArch64::ORRWri:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXri:
+ case AArch64::ORRXrr:
+ case AArch64::SUBWri:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXri:
+ case AArch64::SUBXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
}
+
return false;
}
-// Fuse AES crypto encoding or decoding.
+/// AES crypto encoding or decoding.
static bool isAESPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
+ switch (SecondMI.getOpcode()) {
// AES encode.
- if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::AESErr) &&
- (SecondOpcode == AArch64::AESMCrr ||
- SecondOpcode == AArch64::AESMCrrTied))
- return true;
+ case AArch64::AESMCrr:
+ case AArch64::AESMCrrTied:
+ return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESErr;
// AES decode.
- else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::AESDrr) &&
- (SecondOpcode == AArch64::AESIMCrr ||
- SecondOpcode == AArch64::AESIMCrrTied))
+ case AArch64::AESIMCrr:
+ case AArch64::AESIMCrrTied:
+ return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESDrr;
+ }
+
+ return false;
+}
+
+/// AESE/AESD/PMULL + EOR.
+static bool isCryptoEORPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != AArch64::EORv16i8)
+ return false;
+
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
+
+ switch (FirstMI->getOpcode()) {
+ case AArch64::AESErr:
+ case AArch64::AESDrr:
+ case AArch64::PMULLv16i8:
+ case AArch64::PMULLv8i8:
+ case AArch64::PMULLv1i64:
+ case AArch64::PMULLv2i64:
return true;
+ }
return false;
}
-// Fuse literal generation.
+/// Literal generation.
static bool isLiteralsPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
// PC relative address.
- if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::ADRP) &&
- SecondOpcode == AArch64::ADDXri)
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) &&
+ SecondMI.getOpcode() == AArch64::ADDXri)
return true;
+
// 32 bit immediate.
- else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::MOVZWi) &&
- (SecondOpcode == AArch64::MOVKWi &&
- SecondMI.getOperand(3).getImm() == 16))
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) &&
+ (SecondMI.getOpcode() == AArch64::MOVKWi &&
+ SecondMI.getOperand(3).getImm() == 16))
return true;
+
// Lower half of 64 bit immediate.
- else if((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::MOVZXi) &&
- (SecondOpcode == AArch64::MOVKXi &&
- SecondMI.getOperand(3).getImm() == 16))
+ if((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZXi) &&
+ (SecondMI.getOpcode() == AArch64::MOVKXi &&
+ SecondMI.getOperand(3).getImm() == 16))
return true;
+
// Upper half of 64 bit immediate.
- else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- (FirstOpcode == AArch64::MOVKXi &&
- FirstMI->getOperand(3).getImm() == 32)) &&
- (SecondOpcode == AArch64::MOVKXi &&
- SecondMI.getOperand(3).getImm() == 48))
+ if ((FirstMI == nullptr ||
+ (FirstMI->getOpcode() == AArch64::MOVKXi &&
+ FirstMI->getOperand(3).getImm() == 32)) &&
+ (SecondMI.getOpcode() == AArch64::MOVKXi &&
+ SecondMI.getOperand(3).getImm() == 48))
return true;
return false;
}
-// Fuse address generation and loads or stores.
+/// Fuse address generation and loads or stores.
static bool isAddressLdStPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- switch (SecondOpcode) {
+ switch (SecondMI.getOpcode()) {
case AArch64::STRBBui:
case AArch64::STRBui:
case AArch64::STRDui:
@@ -199,63 +213,164 @@ static bool isAddressLdStPair(const MachineInstr *FirstMI,
case AArch64::LDRSHXui:
case AArch64::LDRSWui:
// Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
+ if (FirstMI == nullptr)
return true;
- switch (FirstMI->getOpcode()) {
+ switch (FirstMI->getOpcode()) {
case AArch64::ADR:
- return (SecondMI.getOperand(2).getImm() == 0);
+ return SecondMI.getOperand(2).getImm() == 0;
case AArch64::ADRP:
return true;
}
}
+
return false;
}
-// Fuse compare and conditional select.
+/// Compare and conditional select.
static bool isCCSelectPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
- unsigned SecondOpcode = SecondMI.getOpcode();
-
// 32 bits
- if (SecondOpcode == AArch64::CSELWr) {
+ if (SecondMI.getOpcode() == AArch64::CSELWr) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
+ if (FirstMI == nullptr)
return true;
if (FirstMI->definesRegister(AArch64::WZR))
switch (FirstMI->getOpcode()) {
case AArch64::SUBSWrs:
- return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
case AArch64::SUBSWrx:
- return (!AArch64InstrInfo::hasExtendedReg(*FirstMI));
+ return !AArch64InstrInfo::hasExtendedReg(*FirstMI);
case AArch64::SUBSWrr:
case AArch64::SUBSWri:
return true;
}
}
+
// 64 bits
- else if (SecondOpcode == AArch64::CSELXr) {
+ if (SecondMI.getOpcode() == AArch64::CSELXr) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
+ if (FirstMI == nullptr)
return true;
if (FirstMI->definesRegister(AArch64::XZR))
switch (FirstMI->getOpcode()) {
case AArch64::SUBSXrs:
- return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
case AArch64::SUBSXrx:
case AArch64::SUBSXrx64:
- return (!AArch64InstrInfo::hasExtendedReg(*FirstMI));
+ return !AArch64InstrInfo::hasExtendedReg(*FirstMI);
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
return true;
}
}
+
+ return false;
+}
+
+// Arithmetic and logic.
+static bool isArithmeticLogicPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (AArch64InstrInfo::hasShiftedReg(SecondMI))
+ return false;
+
+ switch (SecondMI.getOpcode()) {
+ // Arithmetic
+ case AArch64::ADDWrr:
+ case AArch64::ADDXrr:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXrr:
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ // Logic
+ case AArch64::ANDWrr:
+ case AArch64::ANDXrr:
+ case AArch64::BICWrr:
+ case AArch64::BICXrr:
+ case AArch64::EONWrr:
+ case AArch64::EONXrr:
+ case AArch64::EORWrr:
+ case AArch64::EORXrr:
+ case AArch64::ORNWrr:
+ case AArch64::ORNXrr:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXrr:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ case AArch64::EONWrs:
+ case AArch64::EONXrs:
+ case AArch64::EORWrs:
+ case AArch64::EORXrs:
+ case AArch64::ORNWrs:
+ case AArch64::ORNXrs:
+ case AArch64::ORRWrs:
+ case AArch64::ORRXrs:
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
+
+ // Arithmetic
+ switch (FirstMI->getOpcode()) {
+ case AArch64::ADDWrr:
+ case AArch64::ADDXrr:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
+ }
+ break;
+
+ // Arithmetic, setting flags.
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
+
+ // Arithmetic, not setting flags.
+ switch (FirstMI->getOpcode()) {
+ case AArch64::ADDWrr:
+ case AArch64::ADDXrr:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
+ }
+ break;
+ }
+
return false;
}
-/// Check if the instr pair, FirstMI and SecondMI, should be fused
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -264,18 +379,24 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const MachineInstr &SecondMI) {
const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
+ // All checking functions assume that the 1st instr is a wildcard if it is
+ // unspecified.
if (ST.hasArithmeticBccFusion() && isArithmeticBccPair(FirstMI, SecondMI))
return true;
if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
return true;
+ if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI))
+ return true;
if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseCCSelect() && isCCSelectPair(FirstMI, SecondMI))
return true;
+ if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI))
+ return true;
return false;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td b/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td
new file mode 100644
index 000000000000..16ba3e4282a0
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td
@@ -0,0 +1,19 @@
+//===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for AArch64.
+//
+//===----------------------------------------------------------------------===//
+
+def CpuCyclesPfmCounter : PfmCounter<"CPU_CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+ let CycleCounter = CpuCyclesPfmCounter;
+}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..3da9306e6460
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -0,0 +1,108 @@
+//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+namespace {
+class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
+public:
+ AArch64PreLegalizerCombinerInfo()
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr) {}
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B);
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ return Helper.tryCombineExtendingLoads(MI);
+ }
+
+ return false;
+}
+
+// Pass boilerplate
+// ================
+
+class AArch64PreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64PreLegalizerCombiner();
+
+ StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+}
+
+void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) {
+ initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ AArch64PreLegalizerCombinerInfo PCInfo;
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AArch64PreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
+ "Combine AArch64 machine instrs before legalization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
+ "Combine AArch64 machine instrs before legalization", false,
+ false)
+
+
+namespace llvm {
+FunctionPass *createAArch64PreLegalizeCombiner() {
+ return new AArch64PreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index c497669f937f..68c48a5ec216 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -389,6 +389,7 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCEIL:
return true;
}
return false;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index a7c2c1b8125b..96ae45ae3d0d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -42,12 +43,16 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
+ return CSR_Win_AArch64_AAPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::GHC)
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_AArch64_NoRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
+ return CSR_AArch64_AAVPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
@@ -72,6 +77,23 @@ const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
return nullptr;
}
+void AArch64RegisterInfo::UpdateCustomCalleeSavedRegs(
+ MachineFunction &MF) const {
+ const MCPhysReg *CSRs = getCalleeSavedRegs(&MF);
+ SmallVector<MCPhysReg, 32> UpdatedCSRs;
+ for (const MCPhysReg *I = CSRs; *I; ++I)
+ UpdatedCSRs.push_back(*I);
+
+ for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
+ if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
+ UpdatedCSRs.push_back(AArch64::GPR64commonRegClass.getRegister(i));
+ }
+ }
+ // Register lists are zero-terminated.
+ UpdatedCSRs.push_back(0);
+ MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs);
+}
+
const TargetRegisterClass *
AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
unsigned Idx) const {
@@ -97,6 +119,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::CXX_FAST_TLS)
return SCS ? CSR_AArch64_CXX_TLS_Darwin_SCS_RegMask
: CSR_AArch64_CXX_TLS_Darwin_RegMask;
+ if (CC == CallingConv::AArch64_VectorCall)
+ return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
->supportSwiftError() &&
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
@@ -117,6 +141,30 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
return CSR_AArch64_TLS_ELF_RegMask;
}
+void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF,
+ const uint32_t **Mask) const {
+ uint32_t *UpdatedMask = MF.allocateRegMask();
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(getNumRegs());
+ memcpy(UpdatedMask, *Mask, sizeof(UpdatedMask[0]) * RegMaskSize);
+
+ for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
+ if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
+ for (MCSubRegIterator SubReg(AArch64::GPR64commonRegClass.getRegister(i),
+ this, true);
+ SubReg.isValid(); ++SubReg) {
+ // See TargetRegisterInfo::getCallPreservedMask for how to interpret the
+ // register mask.
+ UpdatedMask[*SubReg / 32] |= 1u << (*SubReg % 32);
+ }
+ }
+ }
+ *Mask = UpdatedMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const {
+ return CSR_AArch64_NoRegs_RegMask;
+}
+
const uint32_t *
AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -147,46 +195,46 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasFP(MF) || TT.isOSDarwin())
markSuperRegs(Reserved, AArch64::W29);
- if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
- markSuperRegs(Reserved, AArch64::W18); // Platform register
-
- if (MF.getSubtarget<AArch64Subtarget>().isX20Reserved())
- markSuperRegs(Reserved, AArch64::W20); // Platform register
+ for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
+ if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
+ markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
+ }
if (hasBasePointer(MF))
markSuperRegs(Reserved, AArch64::W19);
+ // SLH uses register W16/X16 as the taint register.
+ if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ markSuperRegs(Reserved, AArch64::W16);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
unsigned Reg) const {
- const AArch64FrameLowering *TFI = getFrameLowering(MF);
+ return getReservedRegs(MF)[Reg];
+}
- switch (Reg) {
- default:
- break;
- case AArch64::SP:
- case AArch64::XZR:
- case AArch64::WSP:
- case AArch64::WZR:
- return true;
- case AArch64::X18:
- case AArch64::W18:
- return MF.getSubtarget<AArch64Subtarget>().isX18Reserved();
- case AArch64::X19:
- case AArch64::W19:
- return hasBasePointer(MF);
- case AArch64::X20:
- case AArch64::W20:
- return MF.getSubtarget<AArch64Subtarget>().isX20Reserved();
- case AArch64::FP:
- case AArch64::W29:
- return TFI->hasFP(MF) || TT.isOSDarwin();
- }
+bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
+ // FIXME: Get the list of argument registers from TableGen.
+ static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7 };
+ return std::any_of(std::begin(GPRArgRegs), std::end(GPRArgRegs),
+ [this, &MF](MCPhysReg r){return isReservedReg(MF, r);});
+}
- return false;
+void AArch64RegisterInfo::emitReservedArgRegCallError(
+ const MachineFunction &MF) const {
+ const Function &F = MF.getFunction();
+ F.getContext().diagnose(DiagnosticInfoUnsupported{F, "AArch64 doesn't support"
+ " function calls if any of the argument registers is reserved."});
+}
+
+bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ return !isReservedReg(MF, PhysReg);
}
bool AArch64RegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
@@ -211,14 +259,15 @@ unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- // In the presence of variable sized objects, if the fixed stack size is
- // large enough that referencing from the FP won't result in things being
- // in range relatively often, we can use a base pointer to allow access
+ // In the presence of variable sized objects or funclets, if the fixed stack
+ // size is large enough that referencing from the FP won't result in things
+ // being in range relatively often, we can use a base pointer to allow access
// from the other direction like the SP normally works.
+ //
// Furthermore, if both variable sized objects are present, and the
// stack needs to be dynamically re-aligned, the base pointer is the only
// reliable way to reference the locals.
- if (MFI.hasVarSizedObjects()) {
+ if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
if (needsStackRealignment(MF))
return true;
// Conservatively estimate whether the negative offset from the frame
@@ -449,10 +498,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64commonRegClassID:
return 32 - 1 // XZR/SP
- (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
- - MF.getSubtarget<AArch64Subtarget>()
- .isX18Reserved() // X18 reserved as platform register
- - MF.getSubtarget<AArch64Subtarget>()
- .isX20Reserved() // X20 reserved as platform register
+ - MF.getSubtarget<AArch64Subtarget>().getNumXRegisterReserved()
- hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 57000d37090d..c4153228a7c0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -30,7 +30,18 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
public:
AArch64RegisterInfo(const Triple &TT);
+ // FIXME: This should be tablegen'd like getDwarfRegNum is
+ int getSEHRegNum(unsigned i) const {
+ return getEncodingValue(i);
+ }
+
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+ bool isAnyArgRegReserved(const MachineFunction &MF) const;
+ void emitReservedArgRegCallError(const MachineFunction &MF) const;
+
+ void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const;
+ void UpdateCustomCallPreservedMask(MachineFunction &MF,
+ const uint32_t **Mask) const;
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
@@ -54,6 +65,9 @@ public:
// normal calls, so they need a different mask to represent this.
const uint32_t *getTLSCallPreservedMask() const;
+ // Funclets on ARM64 Windows don't preserve any registers.
+ const uint32_t *getNoPreservedMask() const override;
+
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i64 first argument if the calling convention
/// is one that can (partially) model this attribute with a preserved mask
@@ -69,6 +83,8 @@ public:
const uint32_t *getWindowsStackProbePreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const override;
bool isConstantPhysReg(unsigned PhysReg) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
@@ -76,8 +92,6 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index bbf401b474ca..d3710cea0687 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -200,6 +200,12 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
X22, X23, X24, X25, X26,
X27, X28, FP, LR)>;
+// Restricted set of tail call registers, for use when branch target
+// enforcement is enabled. These are the only registers which can be used to
+// indirectly branch (not call) to the "BTI c" instruction at the start of a
+// BTI-protected function.
+def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
+
// GPR register classes for post increment amount of vector load/store that
// has alternate printing when Rm=31 and prints a constant immediate value
// equal to the total number of bytes transferred.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
index ecc68aed1550..f757d53b6c1c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
@@ -62,13 +62,6 @@ def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
M1UnitNAL1]>; // All simple vector
//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
- MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
// Coarse scheduling model.
def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
@@ -85,14 +78,15 @@ def M1WriteAC : SchedWriteRes<[M1UnitALU,
def M1WriteAD : SchedWriteRes<[M1UnitALU,
M1UnitC]> { let Latency = 2;
let NumMicroOps = 2; }
-def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
- SchedVar<NoSchedPred, [M1WriteAA]>]>;
+def M1WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M1WriteA1]>,
+ SchedVar<ExynosLogicPred, [M1WriteA1]>,
+ SchedVar<NoSchedPred, [M1WriteAA]>]>;
def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>,
- SchedVar<NoSchedPred, [M1WriteAC]>]>;
+def M1WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M1WriteAC]>,
+ SchedVar<NoSchedPred, [M1WriteAB]>]>;
def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
@@ -110,40 +104,27 @@ def M1WriteLD : SchedWriteRes<[M1UnitL,
let ResourceCycles = [2, 1]; }
def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
-def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
- SchedVar<NoSchedPred, [M1WriteLC]>]>;
-def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
- SchedVar<NoSchedPred, [M1WriteLD]>]>;
+def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteLC]>,
+ SchedVar<NoSchedPred, [M1WriteL5]>]>;
def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
def M1WriteSA : SchedWriteRes<[M1UnitS,
M1UnitFST,
- M1UnitS,
- M1UnitFST]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M1WriteSB : SchedWriteRes<[M1UnitS,
- M1UnitFST,
M1UnitA]> { let Latency = 3;
let NumMicroOps = 2; }
-def M1WriteSC : SchedWriteRes<[M1UnitS,
+def M1WriteSB : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitS,
M1UnitFST,
M1UnitA]> { let Latency = 3;
let NumMicroOps = 3; }
-def M1WriteSD : SchedWriteRes<[M1UnitS,
- M1UnitFST,
- M1UnitA]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M1WriteSE : SchedWriteRes<[M1UnitS,
+def M1WriteSC : SchedWriteRes<[M1UnitS,
M1UnitA]> { let Latency = 2;
let NumMicroOps = 2; }
-def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
- SchedVar<NoSchedPred, [M1WriteSE]>]>;
-def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
- SchedVar<NoSchedPred, [M1WriteSB]>]>;
+def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteSC]>,
+ SchedVar<NoSchedPred, [M1WriteS1]>]>;
def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
@@ -414,9 +395,9 @@ def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
M1UnitFST,
- M1UnitFST]> { let Latency = 14;
- let NumMicroOps = 4;
- let ResourceCycles = [1, 7, 1, 7, 1]; }
+ M1UnitFST]> { let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 7, 1, 7, 1]; }
def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
@@ -427,9 +408,17 @@ def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
M1UnitFST,
- M1UnitFST]> { let Latency = 17;
- let NumMicroOps = 7;
- let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+ M1UnitFST]> { let Latency = 17;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+
+// Special cases.
+def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
+def M1WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M1WriteNALU1]>,
+ SchedVar<NoSchedPred, [M1WriteA1]>]>;
+
+// Fast forwarding.
+def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
// Branch instructions
def : InstRW<[M1WriteB1], (instrs Bcc)>;
@@ -439,8 +428,11 @@ def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
-def : InstRW<[M1WriteA1], (instrs COPY)>;
-def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
+def : InstRW<[M1WriteAX], (instregex ".+rx(64)?$")>;
+def : InstRW<[M1WriteAX], (instregex ".+rs$")>;
+
+// Move instructions.
+def : InstRW<[M1WriteCOPY], (instrs COPY)>;
// Divide and multiply instructions.
@@ -450,10 +442,20 @@ def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
def : InstRW<[M1WriteLB,
WriteLDHi,
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
-def : InstRW<[M1WriteLX,
- ReadAdrBase], (instregex "^PRFMro[WX]")>;
+def : InstRW<[M1WriteLC,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[M1WriteL5,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M1WriteLC,
+ ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[M1WriteL5,
+ ReadAdrBase], (instrs PRFMroX)>;
// Store instructions.
+def : InstRW<[M1WriteSC,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
// FP data instructions.
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
@@ -487,8 +489,10 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M1WriteLY,
- ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
+def : InstRW<[M1WriteLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
def : InstRW<[M1WriteLD,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
@@ -507,14 +511,16 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M1WriteSY,
- ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
-def : InstRW<[M1WriteSB,
+def : InstRW<[M1WriteSA,
+ ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BDHS]roX")>;
+def : InstRW<[M1WriteSA,
ReadAdrBase], (instregex "^STRQro[WX]")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M1WriteSC,
+def : InstRW<[M1WriteSB,
WriteAdr], (instregex "^STPQ(post|pre)")>;
// ASIMD instructions.
@@ -608,21 +614,21 @@ def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
def : InstRW<[M1WriteVLDE,
WriteAdr], (instregex "LD1i(64)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
def : InstRW<[M1WriteVLDA,
@@ -830,8 +836,6 @@ def : InstRW<[M1WriteVSTI,
WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
// Cryptography instructions.
-def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index 5e5369a5a7fe..15935088a17e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -26,9 +26,6 @@ def ExynosM3Model : SchedMachineModel {
let CompleteModel = 1; // Use the default model otherwise.
list<Predicate> UnsupportedFeatures = [HasSVE];
-
- // FIXME: Remove when all errors have been fixed.
- let FullInstRWOverlapCheck = 0;
}
//===----------------------------------------------------------------------===//
@@ -107,23 +104,12 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
M3UnitNSHF2]>;
//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M3BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
- MI->getOperand(0).isReg() &&
- MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M3ResetFastPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
-def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
- MI->getOpcode() == AArch64::EXTRXrri) &&
- MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
-def M3ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
// Coarse scheduling model.
def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0;
let NumMicroOps = 1; }
+def M3WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+ let NumMicroOps = 0; }
def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; }
def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2;
@@ -140,15 +126,23 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU,
let NumMicroOps = 2; }
def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; }
def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; }
-def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
- SchedVar<M3ShiftLeftFastPred, [M3WriteA1]>,
- SchedVar<NoSchedPred, [M3WriteAA]>]>;
-def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotateRightFastPred, [M3WriteA1]>,
- SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+ SchedVar<ExynosArithPred, [M3WriteA1]>,
+ SchedVar<ExynosLogicPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
+ SchedVar<ExynosLogicPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
-def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkFastPred, [M3WriteAB]>,
- SchedVar<NoSchedPred, [M3WriteAC]>]>;
+def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
+ SchedVar<NoSchedPred, [M3WriteAB]>]>;
def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
@@ -165,44 +159,38 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
def M3WriteLD : SchedWriteRes<[M3UnitA,
M3UnitL]> { let Latency = 4;
let NumMicroOps = 2; }
+def M3WriteLE : SchedWriteRes<[M3UnitA,
+ M3UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
def M3WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
-
-def M3WriteLX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteL5]>,
- SchedVar<NoSchedPred, [M3WriteLB]>]>;
+def M3WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteL5]>,
+ SchedVar<NoSchedPred, [M3WriteL4]>]>;
def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; }
def M3WriteSA : SchedWriteRes<[M3UnitA,
M3UnitS,
- M3UnitFST]> { let Latency = 2;
+ M3UnitFST]> { let Latency = 3;
let NumMicroOps = 2; }
def M3WriteSB : SchedWriteRes<[M3UnitA,
- M3UnitS]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M3WriteSC : SchedWriteRes<[M3UnitA,
M3UnitS]> { let Latency = 2;
let NumMicroOps = 2; }
-def M3WriteSX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
- SchedVar<NoSchedPred, [M3WriteSB]>]>;
-def M3WriteSY : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
- SchedVar<NoSchedPred, [M3WriteSC]>]>;
-
-def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
- SchedVar<NoSchedPred, [ReadDefault]>]>;
+def M3ReadAdrBase : SchedReadVariant<[SchedVar<ExynosScaledIdxPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
// Branch instructions.
def : SchedAlias<WriteBr, M3WriteZ0>;
-def : WriteRes<WriteBrReg, [M3UnitC]> { let Latency = 1; }
+def : SchedAlias<WriteBrReg, M3WriteC1>;
// Arithmetic and logical integer instructions.
-def : WriteRes<WriteI, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIEReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIS, [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteI, M3WriteA1>;
+def : SchedAlias<WriteISReg, M3WriteA1>;
+def : SchedAlias<WriteIEReg, M3WriteA1>;
+def : SchedAlias<WriteIS, M3WriteA1>;
// Move instructions.
-def : WriteRes<WriteImm, [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteImm, M3WriteA1>;
// Divide and multiply instructions.
def : WriteRes<WriteID32, [M3UnitC,
@@ -216,26 +204,23 @@ def : WriteRes<WriteIM64, [M3UnitC]> { let Latency = 4;
let ResourceCycles = [2]; }
// Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M3UnitALU,
- M3UnitALU]> { let Latency = 1;
- let NumMicroOps = 2; }
+def : SchedAlias<WriteExtr, M3WriteAY>;
// Addressing modes.
-def : WriteRes<WriteAdr, []> { let Latency = 1;
- let NumMicroOps = 0; }
+def : SchedAlias<WriteAdr, M3WriteZ1>;
def : SchedAlias<ReadAdrBase, M3ReadAdrBase>;
// Load instructions.
def : SchedAlias<WriteLD, M3WriteL4>;
def : WriteRes<WriteLDHi, []> { let Latency = 4;
let NumMicroOps = 0; }
-def : SchedAlias<WriteLDIdx, M3WriteLX>;
+def : SchedAlias<WriteLDIdx, M3WriteLB>;
// Store instructions.
def : SchedAlias<WriteST, M3WriteS1>;
def : SchedAlias<WriteSTP, M3WriteS1>;
def : SchedAlias<WriteSTX, M3WriteS1>;
-def : SchedAlias<WriteSTIdx, M3WriteSX>;
+def : SchedAlias<WriteSTIdx, M3WriteSB>;
// FP data instructions.
def : WriteRes<WriteF, [M3UnitFADD]> { let Latency = 2; }
@@ -245,7 +230,6 @@ def : WriteRes<WriteFDiv, [M3UnitFDIV]> { let Latency = 12;
def : WriteRes<WriteFMul, [M3UnitFMAC]> { let Latency = 4; }
// FP miscellaneous instructions.
-// TODO: Conversion between register files is much different.
def : WriteRes<WriteFCvt, [M3UnitFCVT]> { let Latency = 3; }
def : WriteRes<WriteFImm, [M3UnitNALU]> { let Latency = 1; }
def : WriteRes<WriteFCopy, [M3UnitNALU]> { let Latency = 1; }
@@ -481,11 +465,15 @@ def M3WriteVSTI : SchedWriteRes<[M3UnitNALU,
// Special cases.
def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; }
+def M3WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
+ SchedVar<NoSchedPred, [M3WriteZ0]>]>;
+def M3WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
+
+// Fast forwarding.
def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>;
def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4,
M3WriteFMAC5]>;
-def M3WriteMOVI : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
- SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>;
// Branch instructions
@@ -496,29 +484,40 @@ def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
-def : InstRW<[M3WriteA1], (instrs COPY)>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
+def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;
// Move instructions.
-def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
-def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+def : InstRW<[M3WriteCOPY], (instrs COPY)>;
+def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
+def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
// Divide and multiply instructions.
// Miscellaneous instructions.
-def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;
// Load instructions.
def : InstRW<[M3WriteLD,
WriteLDHi,
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M3WriteLB,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[M3WriteLX,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M3WriteLB,
+ ReadAdrBase], (instrs PRFMroW)>;
def : InstRW<[M3WriteLX,
- ReadAdrBase], (instregex "^PRFMro[WX]")>;
+ ReadAdrBase], (instrs PRFMroX)>;
// Store instructions.
+def : InstRW<[M3WriteSB,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
// FP data instructions.
def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>;
@@ -555,9 +554,11 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M3WriteLX,
- ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
-def : InstRW<[M3WriteLB,
+def : InstRW<[M3WriteLE,
+ ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
+def : InstRW<[M3WriteLE,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
M3WriteLH], (instregex "^LDN?P[DS]i")>;
@@ -575,8 +576,10 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M3WriteSY,
- ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
+def : InstRW<[M3WriteSA,
+ ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BDHS]roX")>;
def : InstRW<[M3WriteSA,
ReadAdrBase], (instregex "^STRQro[WX]")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
@@ -588,7 +591,7 @@ def : InstRW<[M3WriteSA,
// ASIMD instructions.
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>;
def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^((SQ)?ABS|SQNEG)v")>;
def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>;
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>;
@@ -597,7 +600,6 @@ def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>;
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>;
def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>;
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>;
-def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>;
def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>;
def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
@@ -668,108 +670,108 @@ def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
// ASIMD load instructions.
def : InstRW<[M3WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteL5,
- WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteL5,
- WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDA,
- WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDA,
- WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDB,
- WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDB,
- WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDC,
- WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDC,
- WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>;
def : InstRW<[M3WriteVLDD,
- WriteAdr], (instregex "LD1i(8|16|32)_POST")>;
+ M3WriteA1], (instregex "LD1i(8|16|32)_POST")>;
def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>;
def : InstRW<[M3WriteVLDE,
- WriteAdr], (instregex "LD1i(64)_POST")>;
+ M3WriteA1], (instregex "LD1i(64)_POST")>;
def : InstRW<[M3WriteL5], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteL5,
- WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteL5,
- WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[M3WriteVLDF,
- WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST")>;
+ M3WriteA1], (instregex "LD2Twov(8b|4h|2s)_POST")>;
def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDF,
- WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>;
def : InstRW<[M3WriteVLDG,
- WriteAdr], (instregex "LD2i(8|16|32)_POST")>;
+ M3WriteA1], (instregex "LD2i(8|16|32)_POST")>;
def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>;
def : InstRW<[M3WriteVLDH,
- WriteAdr], (instregex "LD2i(64)_POST")>;
+ M3WriteA1], (instregex "LD2i(64)_POST")>;
def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDA,
- WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDA,
- WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
def : InstRW<[M3WriteVLDI,
- WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST")>;
+ M3WriteA1], (instregex "LD3Threev(8b|4h|2s)_POST")>;
def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDI,
- WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
def : InstRW<[M3WriteVLDJ,
- WriteAdr], (instregex "LD3i(8|16|32)_POST")>;
+ M3WriteA1], (instregex "LD3i(8|16|32)_POST")>;
def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>;
def : InstRW<[M3WriteVLDL,
- WriteAdr], (instregex "LD3i(64)_POST")>;
+ M3WriteA1], (instregex "LD3i(64)_POST")>;
def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDB,
- WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDB,
- WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
def : InstRW<[M3WriteVLDN,
- WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST")>;
+ M3WriteA1], (instregex "LD4Fourv(8b|4h|2s)_POST")>;
def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDN,
- WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>;
def : InstRW<[M3WriteVLDK,
- WriteAdr], (instregex "LD4i(8|16|32)_POST")>;
+ M3WriteA1], (instregex "LD4i(8|16|32)_POST")>;
def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>;
def : InstRW<[M3WriteVLDM,
- WriteAdr], (instregex "LD4i(64)_POST")>;
+ M3WriteA1], (instregex "LD4i(64)_POST")>;
def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDC,
- WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDC,
- WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
// ASIMD store instructions.
def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
new file mode 100644
index 000000000000..4d892465b3f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -0,0 +1,1004 @@
+//=- AArch64SchedExynosM4.td - Samsung Exynos M4 Sched Defs --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the Samsung Exynos M4 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The Exynos-M4 is an advanced superscalar microprocessor with a 6-wide
+// in-order stage for decode and dispatch and a wider issue stage.
+// The execution units and loads and stores are out-of-order.
+
+def ExynosM4Model : SchedMachineModel {
+ let IssueWidth = 6; // Up to 6 uops per cycle.
+ let MicroOpBufferSize = 228; // ROB size.
+ let LoopMicroOpBufferSize = 48; // Based on the instruction queue size.
+ let LoadLatency = 4; // Optimistic load cases.
+ let MispredictPenalty = 16; // Minimum branch misprediction penalty.
+ let CompleteModel = 1; // Use the default model otherwise.
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on the Exynos-M4.
+
+let SchedModel = ExynosM4Model in {
+
+def M4UnitA : ProcResource<2>; // Simple integer
+def M4UnitC : ProcResource<2>; // Simple and complex integer
+let Super = M4UnitC, BufferSize = 1 in
+def M4UnitD : ProcResource<1>; // Integer division (inside C0, serialized)
+let Super = M4UnitC in
+def M4UnitE : ProcResource<1>; // CRC (inside C0)
+def M4UnitB : ProcResource<2>; // Branch
+def M4UnitL0 : ProcResource<1>; // Load
+def M4UnitS0 : ProcResource<1>; // Store
+def M4PipeLS : ProcResource<1>; // Load/Store
+let Super = M4PipeLS in {
+ def M4UnitL1 : ProcResource<1>;
+ def M4UnitS1 : ProcResource<1>;
+}
+def M4PipeF0 : ProcResource<1>; // FP #0
+let Super = M4PipeF0 in {
+ def M4UnitFMAC0 : ProcResource<1>; // FP multiplication
+ def M4UnitFADD0 : ProcResource<1>; // Simple FP
+ def M4UnitFCVT0 : ProcResource<1>; // FP conversion
+ def M4UnitNALU0 : ProcResource<1>; // Simple vector
+ def M4UnitNHAD : ProcResource<1>; // Horizontal vector
+ def M4UnitNMSC : ProcResource<1>; // FP and vector miscellanea
+ def M4UnitNMUL0 : ProcResource<1>; // Vector multiplication
+ def M4UnitNSHT0 : ProcResource<1>; // Vector shifting
+ def M4UnitNSHF0 : ProcResource<1>; // Vector shuffling
+ def M4UnitNCRY0 : ProcResource<1>; // Cryptographic
+}
+def M4PipeF1 : ProcResource<1>; // FP #1
+let Super = M4PipeF1 in {
+ def M4UnitFMAC1 : ProcResource<1>; // FP multiplication
+ def M4UnitFADD1 : ProcResource<1>; // Simple FP
+ def M4UnitFDIV0 : ProcResource<2>; // FP division (serialized)
+ def M4UnitFSQR0 : ProcResource<2>; // FP square root (serialized)
+ def M4UnitFST0 : ProcResource<1>; // FP store
+ def M4UnitNALU1 : ProcResource<1>; // Simple vector
+ def M4UnitNSHT1 : ProcResource<1>; // Vector shifting
+ def M4UnitNSHF1 : ProcResource<1>; // Vector shuffling
+}
+def M4PipeF2 : ProcResource<1>; // FP #2
+let Super = M4PipeF2 in {
+ def M4UnitFMAC2 : ProcResource<1>; // FP multiplication
+ def M4UnitFADD2 : ProcResource<1>; // Simple FP
+ def M4UnitFCVT1 : ProcResource<1>; // FP conversion
+ def M4UnitFDIV1 : ProcResource<2>; // FP division (serialized)
+ def M4UnitFSQR1 : ProcResource<2>; // FP square root (serialized)
+ def M4UnitFST1 : ProcResource<1>; // FP store
+ def M4UnitNALU2 : ProcResource<1>; // Simple vector
+ def M4UnitNMUL1 : ProcResource<1>; // Vector multiplication
+ def M4UnitNSHT2 : ProcResource<1>; // Vector shifting
+ def M4UnitNCRY1 : ProcResource<1>; // Cryptographic
+}
+
+def M4UnitALU : ProcResGroup<[M4UnitA,
+ M4UnitC]>;
+def M4UnitL : ProcResGroup<[M4UnitL0,
+ M4UnitL1]>;
+def M4UnitS : ProcResGroup<[M4UnitS0,
+ M4UnitS1]>;
+def M4UnitFMAC : ProcResGroup<[M4UnitFMAC0,
+ M4UnitFMAC1,
+ M4UnitFMAC2]>;
+def M4UnitFMACH : ProcResGroup<[M4UnitFMAC0,
+ M4UnitFMAC1]>;
+def M4UnitFADD : ProcResGroup<[M4UnitFADD0,
+ M4UnitFADD1,
+ M4UnitFADD2]>;
+def M4UnitFADDH : ProcResGroup<[M4UnitFADD0,
+ M4UnitFADD1]>;
+def M4UnitFCVT : ProcResGroup<[M4UnitFCVT0,
+ M4UnitFCVT1]>;
+def M4UnitFCVTH : ProcResGroup<[M4UnitFCVT0]>;
+def M4UnitFDIV : ProcResGroup<[M4UnitFDIV0,
+ M4UnitFDIV1]>;
+def M4UnitFDIVH : ProcResGroup<[M4UnitFDIV0]>;
+def M4UnitFSQR : ProcResGroup<[M4UnitFSQR0,
+ M4UnitFSQR1]>;
+def M4UnitFSQRH : ProcResGroup<[M4UnitFSQR0]>;
+def M4UnitFST : ProcResGroup<[M4UnitFST0,
+ M4UnitFST1]>;
+def M4UnitNALU : ProcResGroup<[M4UnitNALU0,
+ M4UnitNALU1,
+ M4UnitNALU2]>;
+def M4UnitNALUH : ProcResGroup<[M4UnitNALU0,
+ M4UnitNALU1]>;
+def M4UnitNMUL : ProcResGroup<[M4UnitNMUL0,
+ M4UnitNMUL1]>;
+def M4UnitNSHT : ProcResGroup<[M4UnitNSHT0,
+ M4UnitNSHT1,
+ M4UnitNSHT2]>;
+def M4UnitNSHF : ProcResGroup<[M4UnitNSHF0,
+ M4UnitNSHF1]>;
+def M4UnitNSHFH : ProcResGroup<[M4UnitNSHF0]>;
+def M4UnitNCRY : ProcResGroup<[M4UnitNCRY0,
+ M4UnitNCRY1]>;
+
+//===----------------------------------------------------------------------===//
+// Resources details.
+
+def M4WriteZ0 : SchedWriteRes<[]> { let Latency = 0; }
+def M4WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+ let NumMicroOps = 0; }
+def M4WriteZ4 : SchedWriteRes<[]> { let Latency = 4;
+ let NumMicroOps = 0; }
+
+def M4WriteA1 : SchedWriteRes<[M4UnitALU]> { let Latency = 1; }
+def M4WriteA2 : SchedWriteRes<[M4UnitALU]> { let Latency = 2; }
+def M4WriteAA : SchedWriteRes<[M4UnitALU]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M4WriteAB : SchedWriteRes<[M4UnitALU,
+ M4UnitC]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteAC : SchedWriteRes<[M4UnitALU,
+ M4UnitALU,
+ M4UnitC]> { let Latency = 3;
+ let NumMicroOps = 3; }
+def M4WriteAD : SchedWriteRes<[M4UnitALU,
+ M4UnitC]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteAF : SchedWriteRes<[M4UnitALU]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M4WriteZ0]>,
+ SchedVar<ExynosArithPred, [M4WriteA1]>,
+ SchedVar<ExynosLogicExPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAA]>]>;
+def M4WriteAV : SchedWriteVariant<[SchedVar<ExynosResetPred, [M4WriteZ0]>,
+ SchedVar<NoSchedPred, [M4WriteAA]>]>;
+def M4WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M4WriteA1]>,
+ SchedVar<ExynosLogicExPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAA]>]>;
+def M4WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAF]>]>;
+
+def M4WriteB1 : SchedWriteRes<[M4UnitB]> { let Latency = 1; }
+def M4WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M4WriteAC]>,
+ SchedVar<NoSchedPred, [M4WriteAB]>]>;
+
+def M4WriteC1 : SchedWriteRes<[M4UnitC]> { let Latency = 1; }
+def M4WriteC3 : SchedWriteRes<[M4UnitC]> { let Latency = 3; }
+def M4WriteCA : SchedWriteRes<[M4UnitC]> { let Latency = 4;
+ let ResourceCycles = [2]; }
+
+def M4WriteD12 : SchedWriteRes<[M4UnitD]> { let Latency = 12; }
+def M4WriteD21 : SchedWriteRes<[M4UnitD]> { let Latency = 21; }
+
+def M4WriteE2 : SchedWriteRes<[M4UnitE]> { let Latency = 2; }
+
+def M4WriteL4 : SchedWriteRes<[M4UnitL]> { let Latency = 4; }
+def M4WriteL5 : SchedWriteRes<[M4UnitL]> { let Latency = 5; }
+def M4WriteLA : SchedWriteRes<[M4UnitL,
+ M4UnitL]> { let Latency = 5;
+ let NumMicroOps = 1; }
+def M4WriteLB : SchedWriteRes<[M4UnitA,
+ M4UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteLC : SchedWriteRes<[M4UnitA,
+ M4UnitL,
+ M4UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteLD : SchedWriteRes<[M4UnitA,
+ M4UnitL]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M4WriteLE : SchedWriteRes<[M4UnitA,
+ M4UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M4WriteLH : SchedWriteRes<[]> { let Latency = 5;
+ let NumMicroOps = 0; }
+def M4WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M4WriteL5]>,
+ SchedVar<NoSchedPred, [M4WriteL4]>]>;
+
+def M4WriteS1 : SchedWriteRes<[M4UnitS]> { let Latency = 1; }
+def M4WriteSA : SchedWriteRes<[M4UnitS0]> { let Latency = 3; }
+def M4WriteSB : SchedWriteRes<[M4UnitA,
+ M4UnitS]> { let Latency = 2;
+ let NumMicroOps = 1; }
+def M4WriteSX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M4WriteSB]>,
+ SchedVar<NoSchedPred, [M4WriteS1]>]>;
+
+def M4ReadAdrBase : SchedReadVariant<[SchedVar<
+ MCSchedPredicate<
+ CheckAny<
+ [ScaledIdxFn,
+ ExynosScaledIdxFn]>>, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+
+def M4WriteNEONA : SchedWriteRes<[M4UnitNSHF,
+ M4UnitFADD]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M4WriteNEONB : SchedWriteRes<[M4UnitNALU,
+ M4UnitS0]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteNEOND : SchedWriteRes<[M4UnitNSHF,
+ M4UnitFST]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M4WriteNEONH : SchedWriteRes<[M4UnitNALU,
+ M4UnitS0]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteNEONI : SchedWriteRes<[M4UnitNSHF,
+ M4UnitS0]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteNEONJ : SchedWriteRes<[M4UnitNMSC,
+ M4UnitS0]> { let Latency = 4; }
+def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNMSC,
+ M4UnitS0]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
+def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
+def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC,
+ M4UnitNMSC]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteNEONO : SchedWriteRes<[M4UnitNMSC,
+ M4UnitNMSC,
+ M4UnitNMSC]> { let Latency = 8;
+ let NumMicroOps = 3; }
+def M4WriteNEONP : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNMSC]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M4WriteNEONQ : SchedWriteRes<[M4UnitNMSC,
+ M4UnitC]> { let Latency = 3;
+ let NumMicroOps = 1; }
+def M4WriteNEONR : SchedWriteRes<[M4UnitFCVT0,
+ M4UnitS0]> { let Latency = 4;
+ let NumMicroOps = 1; }
+def M4WriteNEONV : SchedWriteRes<[M4UnitFDIV,
+ M4UnitFDIV]> { let Latency = 7;
+ let ResourceCycles = [6, 6]; }
+def M4WriteNEONVH : SchedWriteRes<[M4UnitFDIVH,
+ M4UnitFDIVH]> { let Latency = 7;
+ let ResourceCycles = [6, 6]; }
+def M4WriteNEONW : SchedWriteRes<[M4UnitFDIV,
+ M4UnitFDIV]> { let Latency = 12;
+ let ResourceCycles = [9, 9]; }
+def M4WriteNEONX : SchedWriteRes<[M4UnitFSQR,
+ M4UnitFSQR]> { let Latency = 8;
+ let ResourceCycles = [7, 7]; }
+def M4WriteNEONXH : SchedWriteRes<[M4UnitFSQRH,
+ M4UnitFSQRH]> { let Latency = 7;
+ let ResourceCycles = [6, 6]; }
+def M4WriteNEONY : SchedWriteRes<[M4UnitFSQR,
+ M4UnitFSQR]> { let Latency = 12;
+ let ResourceCycles = [9, 9]; }
+def M4WriteNEONZ : SchedWriteVariant<[SchedVar<ExynosQFormPred, [M4WriteNEONO]>,
+ SchedVar<NoSchedPred, [M4WriteNEONN]>]>;
+
+def M4WriteFADD2 : SchedWriteRes<[M4UnitFADD]> { let Latency = 2; }
+def M4WriteFADD2H : SchedWriteRes<[M4UnitFADDH]> { let Latency = 2; }
+
+def M4WriteFCVT2 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 2; }
+def M4WriteFCVT2A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 2; }
+def M4WriteFCVT2H : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 2; }
+def M4WriteFCVT3 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 3; }
+def M4WriteFCVT3A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 3; }
+def M4WriteFCVT3H : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 3; }
+def M4WriteFCVT4 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 4; }
+def M4WriteFCVT4A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 4; }
+def M4WriteFCVT6A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 6; }
+
+def M4WriteFDIV7 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 7;
+ let ResourceCycles = [6]; }
+def M4WriteFDIV7H : SchedWriteRes<[M4UnitFDIVH]> { let Latency = 7;
+ let ResourceCycles = [6]; }
+def M4WriteFDIV12 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+
+def M4WriteFMAC2H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 2; }
+def M4WriteFMAC3H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 3; }
+def M4WriteFMAC3 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 3; }
+def M4WriteFMAC4 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 4; }
+def M4WriteFMAC4H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 4; }
+def M4WriteFMAC5 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 5; }
+
+def M4WriteFSQR7H : SchedWriteRes<[M4UnitFSQRH]> { let Latency = 7;
+ let ResourceCycles = [6]; }
+def M4WriteFSQR8 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 8;
+ let ResourceCycles = [7]; }
+def M4WriteFSQR12 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+
+def M4WriteNALU1 : SchedWriteRes<[M4UnitNALU]> { let Latency = 1; }
+def M4WriteNALU1H : SchedWriteRes<[M4UnitNALUH]> { let Latency = 1; }
+
+def M4WriteNCRY1 : SchedWriteRes<[M4UnitNCRY]> { let Latency = 1; }
+def M4WriteNCRY1A : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 1; }
+def M4WriteNCRY3A : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 3; }
+def M4WriteNCRY5A : SchedWriteRes<[M4UnitNCRY]> { let Latency = 5; }
+
+def M4WriteNHAD1 : SchedWriteRes<[M4UnitNHAD]> { let Latency = 1; }
+def M4WriteNHAD3 : SchedWriteRes<[M4UnitNHAD]> { let Latency = 3; }
+
+def M4WriteNMSC1 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 1; }
+def M4WriteNMSC2 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 2; }
+def M4WriteNMSC3 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 3; }
+
+def M4WriteNMUL3 : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
+
+def M4WriteNSHF1 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1; }
+def M4WriteNSHF1H : SchedWriteRes<[M4UnitNSHFH]> { let Latency = 1; }
+def M4WriteNSHF3 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3; }
+def M4WriteNSHFA : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1;
+ let ResourceCycles = [2]; }
+def M4WriteNSHFB : SchedWriteRes<[M4UnitNSHF]> { let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2]; }
+def M4WriteNSHFC : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [4]; }
+def M4WriteNSHFD : SchedWriteRes<[M4UnitNSHF]> { let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4]; }
+
+def M4WriteNSHT1 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 1; }
+def M4WriteNSHT2 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 2; }
+def M4WriteNSHT3 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 3; }
+def M4WriteNSHT4A : SchedWriteRes<[M4UnitNSHT1]> { let Latency = 4; }
+
+def M4WriteVLDA : SchedWriteRes<[M4UnitL,
+ M4UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteVLDB : SchedWriteRes<[M4UnitL,
+ M4UnitL,
+ M4UnitL]> { let Latency = 6;
+ let NumMicroOps = 3; }
+def M4WriteVLDC : SchedWriteRes<[M4UnitL,
+ M4UnitL,
+ M4UnitL,
+ M4UnitL]> { let Latency = 6;
+ let NumMicroOps = 4; }
+def M4WriteVLDD : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF]> { let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 1]; }
+def M4WriteVLDF : SchedWriteRes<[M4UnitL,
+ M4UnitL]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [3, 3]; }
+def M4WriteVLDG : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF]> { let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1, 1]; }
+def M4WriteVLDI : SchedWriteRes<[M4UnitL,
+ M4UnitL,
+ M4UnitL]> { let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3, 3, 3]; }
+def M4WriteVLDJ : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3, 1, 1, 1]; }
+def M4WriteVLDK : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [3, 1, 1, 1, 1]; }
+def M4WriteVLDL : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitL,
+ M4UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [3, 1, 1, 6, 1]; }
+def M4WriteVLDM : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 6;
+ let ResourceCycles = [3, 1, 1, 3, 1, 1]; }
+def M4WriteVLDN : SchedWriteRes<[M4UnitL,
+ M4UnitL,
+ M4UnitL,
+ M4UnitL]> { let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3, 3, 3, 3]; }
+
+def M4WriteVST1 : SchedWriteRes<[M4UnitS,
+ M4UnitFST]> { let Latency = 1;
+ let NumMicroOps = 1; }
+def M4WriteVSTA : WriteSequence<[WriteVST], 2>;
+def M4WriteVSTB : WriteSequence<[WriteVST], 3>;
+def M4WriteVSTC : WriteSequence<[WriteVST], 4>;
+def M4WriteVSTD : SchedWriteRes<[M4UnitS,
+ M4UnitFST]> { let Latency = 2; }
+def M4WriteVSTE : SchedWriteRes<[M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteVSTF : SchedWriteRes<[M4UnitNSHF,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1, 2, 1]; }
+def M4WriteVSTG : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 5;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1, 1, 1, 2, 1, 2, 1, 2, 1]; }
+def M4WriteVSTI : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
+def M4WriteVSTJ : SchedWriteRes<[M4UnitA,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M4WriteVSTK : SchedWriteRes<[M4UnitA,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M4WriteVSTL : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 2, 1, 2, 1]; }
+
+// Special cases.
+def M4WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
+ SchedVar<NoSchedPred, [M4WriteZ0]>]>;
+def M4WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>,
+ SchedVar<NoSchedPred, [M4WriteNALU1]>]>;
+def M4WriteMULL : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>,
+ SchedVar<NoSchedPred, [M4WriteNMUL3]>]>;
+
+// Fast forwarding.
+def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>;
+def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4,
+ M4WriteFMAC4H,
+ M4WriteFMAC5]>;
+def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>;
+def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>;
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model.
+
+// Branch instructions.
+def : SchedAlias<WriteBr, M4WriteZ0>;
+def : SchedAlias<WriteBrReg, M4WriteC1>;
+
+// Arithmetic and logical integer instructions.
+def : SchedAlias<WriteI, M4WriteA1>;
+def : SchedAlias<WriteIEReg, M4WriteAA>; // FIXME: M4WriteAX crashes TableGen.
+def : SchedAlias<WriteISReg, M4WriteAA>; // FIXME: M4WriteAX crashes TableGen.
+def : SchedAlias<WriteIS, M4WriteA1>;
+
+// Move instructions.
+def : SchedAlias<WriteImm, M4WriteA1>;
+
+// Divide and multiply instructions.
+def : SchedAlias<WriteID32, M4WriteD12>;
+def : SchedAlias<WriteID64, M4WriteD21>;
+def : SchedAlias<WriteIM32, M4WriteC3>;
+def : SchedAlias<WriteIM64, M4WriteCA>;
+
+// Miscellaneous instructions.
+def : SchedAlias<WriteExtr, M4WriteAY>;
+
+// Addressing modes.
+def : SchedAlias<WriteAdr, M4WriteZ1>;
+def : SchedAlias<ReadAdrBase, M4ReadAdrBase>;
+
+// Load instructions.
+def : SchedAlias<WriteLD, M4WriteL4>;
+def : SchedAlias<WriteLDHi, M4WriteZ4>;
+def : SchedAlias<WriteLDIdx, M4WriteLX>;
+
+// Store instructions.
+def : SchedAlias<WriteST, M4WriteS1>;
+def : SchedAlias<WriteSTP, M4WriteS1>;
+def : SchedAlias<WriteSTX, M4WriteS1>;
+def : SchedAlias<WriteSTIdx, M4WriteSX>;
+
+// FP data instructions.
+def : SchedAlias<WriteF, M4WriteFADD2>;
+def : SchedAlias<WriteFCmp, M4WriteNMSC2>;
+def : SchedAlias<WriteFDiv, M4WriteFDIV12>;
+def : SchedAlias<WriteFMul, M4WriteFMAC3>;
+
+// FP miscellaneous instructions.
+def : SchedAlias<WriteFCvt, M4WriteFCVT2>;
+def : SchedAlias<WriteFImm, M4WriteNALU1>;
+def : SchedAlias<WriteFCopy, M4WriteCOPY>;
+
+// FP load instructions.
+def : SchedAlias<WriteVLD, M4WriteL5>;
+
+// FP store instructions.
+def : SchedAlias<WriteVST, M4WriteVST1>;
+
+// ASIMD FP instructions.
+def : SchedAlias<WriteV, M4WriteNALU1>;
+
+// Other miscellaneous instructions.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Generic fast forwarding.
+
+// TODO: Add FP register forwarding rules.
+
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+// TODO: The forwarding for 32 bits actually saves 2 cycles.
+def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// Finer scheduling model.
+
+// Branch instructions
+def : InstRW<[M4WriteB1], (instrs Bcc)>;
+def : InstRW<[M4WriteAF], (instrs BL)>;
+def : InstRW<[M4WriteBX], (instrs BLR)>;
+def : InstRW<[M4WriteC1], (instregex "^CBN?Z[WX]")>;
+def : InstRW<[M4WriteAD], (instregex "^TBN?Z[WX]")>;
+
+// Arithmetic and logical integer instructions.
+def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M4WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M4WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M4WriteAV], (instrs ADDWri, ADDXri, ORRWri, ORRXri)>;
+
+// Move instructions.
+def : InstRW<[M4WriteCOPY], (instrs COPY)>;
+def : InstRW<[M4WriteZ0], (instrs ADR, ADRP)>;
+def : InstRW<[M4WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+
+// Divide and multiply instructions.
+
+// Miscellaneous instructions.
+
+// Load instructions.
+def : InstRW<[M4WriteLD,
+ WriteLDHi,
+ WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M4WriteL5,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[WriteLDIdx,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M4WriteL5,
+ ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[WriteLDIdx,
+ ReadAdrBase], (instrs PRFMroX)>;
+
+// Store instructions.
+def : InstRW<[M4WriteSB,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
+
+// FP data instructions.
+def : InstRW<[M4WriteNSHF1H], (instrs FABSHr)>;
+def : InstRW<[M4WriteNSHF1], (instregex "^FABS[SD]r")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^F(ADD|SUB)Hrr")>;
+def : InstRW<[M4WriteFADD2], (instregex "^F(ADD|SUB)[SD]rr")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.i16")>;
+def : InstRW<[M4WriteFADD2], (instregex "^FADDPv.i(32|64)")>;
+def : InstRW<[M4WriteNEONQ], (instregex "^FCCMPE?[HSD]rr")>;
+def : InstRW<[M4WriteNMSC2], (instregex "^FCMPE?[HSD]r[ir]")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(16|32|64|v1)")>;
+def : InstRW<[M4WriteFDIV7H], (instrs FDIVHrr)>;
+def : InstRW<[M4WriteFDIV7], (instrs FDIVSrr)>;
+def : InstRW<[M4WriteFDIV12], (instrs FDIVDrr)>;
+def : InstRW<[M4WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?[HSD]rr")>;
+def : InstRW<[M4WriteFMAC3H], (instregex "^FN?MULHrr")>;
+def : InstRW<[M4WriteFMAC3], (instregex "^FN?MUL[SD]rr")>;
+def : InstRW<[M4WriteFMAC3H], (instrs FMULX16)>;
+def : InstRW<[M4WriteFMAC3], (instregex "^FMULX(32|64)")>;
+def : InstRW<[M4WriteFMAC4H,
+ M4ReadFMACM1], (instregex "^FN?M(ADD|SUB)Hrrr")>;
+def : InstRW<[M4WriteFMAC4,
+ M4ReadFMACM1], (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
+def : InstRW<[M4WriteNALU1H], (instrs FNEGHr)>;
+def : InstRW<[M4WriteNALU1], (instregex "^FNEG[SD]r")>;
+def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT.+r")>;
+def : InstRW<[M4WriteNEONH], (instregex "^FCSEL[HSD]rrr")>;
+def : InstRW<[M4WriteFSQR7H], (instrs FSQRTHr)>;
+def : InstRW<[M4WriteFSQR8], (instrs FSQRTSr)>;
+def : InstRW<[M4WriteFSQR12], (instrs FSQRTDr)>;
+
+// FP miscellaneous instructions.
+def : InstRW<[M4WriteFCVT2H], (instregex "^FCVTH[SD]r")>;
+def : InstRW<[M4WriteFCVT2H], (instregex "^FCVT[SD]Hr")>;
+def : InstRW<[M4WriteFCVT2], (instregex "^FCVT[SD][SD]r")>;
+def : InstRW<[M4WriteFCVT6A], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>;
+def : InstRW<[M4WriteNEONR], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
+def : InstRW<[M4WriteNALU1], (instregex "^FMOV[HSD][ir]")>;
+def : InstRW<[M4WriteSA], (instregex "^FMOV[WX][HSD]r")>;
+def : InstRW<[M4WriteNEONJ], (instregex "^FMOV[HSD][WX]r")>;
+def : InstRW<[M4WriteNEONI], (instregex "^FMOVXDHighr")>;
+def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>;
+def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
+def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>;
+def : InstRW<[M4WriteFMAC4H,
+ M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[M4WriteFMAC4,
+ M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>;
+
+// FP load instructions.
+def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>;
+def : InstRW<[WriteVLD], (instregex "^LDUR[BHSDQ]i")>;
+def : InstRW<[WriteVLD,
+ WriteAdr], (instregex "^LDR[BHSDQ](post|pre)")>;
+def : InstRW<[WriteVLD], (instregex "^LDR[BHSDQ]ui")>;
+def : InstRW<[M4WriteLE,
+ ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BHSD]roX")>;
+def : InstRW<[M4WriteLE,
+ ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[WriteVLD,
+ M4WriteLH], (instregex "^LDN?P[SD]i")>;
+def : InstRW<[M4WriteLA,
+ M4WriteLH], (instregex "^LDN?PQi")>;
+def : InstRW<[M4WriteL5,
+ M4WriteLH,
+ WriteAdr], (instregex "^LDP[SD]post")>;
+def : InstRW<[M4WriteLB,
+ M4WriteLH,
+ WriteAdr], (instrs LDPQpost)>;
+def : InstRW<[M4WriteLB,
+ M4WriteLH,
+ WriteAdr], (instregex "^LDP[SD]pre")>;
+def : InstRW<[M4WriteLC,
+ M4WriteLH,
+ WriteAdr], (instrs LDPQpre)>;
+
+// FP store instructions.
+def : InstRW<[WriteVST], (instregex "^STUR[BHSDQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STR[BHSDQ](post|pre)")>;
+def : InstRW<[WriteVST], (instregex "^STR[BHSDQ]ui")>;
+def : InstRW<[M4WriteVSTJ,
+ ReadAdrBase], (instregex "^STR[BHSD]roW")>;
+def : InstRW<[M4WriteVSTK,
+ ReadAdrBase], (instrs STRQroW)>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BHSD]roX")>;
+def : InstRW<[M4WriteVSTK,
+ ReadAdrBase], (instrs STRQroX)>;
+def : InstRW<[WriteVST], (instregex "^STN?P[SD]i")>;
+def : InstRW<[M4WriteVSTA], (instregex "^STN?PQi")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STP[SD](post|pre)")>;
+def : InstRW<[M4WriteVSTJ,
+ WriteAdr], (instregex "^STPQ(post|pre)")>;
+
+// ASIMD instructions.
+def : InstRW<[M4WriteNHAD1], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^ABSv")>;
+def : InstRW<[M4WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]?ADDL?Pv")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU](ADD|SUB)[LW]v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^(SU|US)QADDv")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]RHADDv")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^SQ(ABS|NEG)v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]?ADDL?Vv")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
+def : InstRW<[M4WriteNALU1], (instregex "^CMTSTv")>;
+def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>;
+def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>;
+def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^ML[AS]v")>;
+def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>;
+def : InstRW<[M4WriteMULL,
+ M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>;
+def : InstRW<[M4WriteMULL,
+ M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
+def : InstRW<[M4WriteNSHT1], (instregex "^SHL[dv]")>;
+def : InstRW<[M4WriteNSHT1], (instregex "^S[LR]I[dv]")>;
+def : InstRW<[M4WriteNSHT1], (instregex "^[SU]SH[LR][dv]")>;
+def : InstRW<[M4WriteNSHT2], (instregex "^[SU]?SHLLv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?Q?R?SHRU?N[bhsv]")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]RSH[LR][dv]")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]QR?SHLU?[bhsdv]")>;
+
+// ASIMD FP instructions.
+def : InstRW<[M4WriteNSHF1H], (instregex "^FABSv.f16")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^FABSv.f(32|64)")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^F(ABD|ADD|SUB)v.f16")>;
+def : InstRW<[M4WriteFADD2], (instregex "^F(ABD|ADD|SUB)v.f(32|64)")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.f16")>;
+def : InstRW<[M4WriteFADD2], (instregex "^FADDPv.f(32|64)")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
+def : InstRW<[M4WriteFCVT2], (instregex "^FCVT(L|N|XN)v")>;
+def : InstRW<[M4WriteFCVT2A], (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[M4WriteFCVT2H], (instregex "^[SU]CVTFv.[fi]16")>;
+def : InstRW<[M4WriteFCVT2], (instregex "^[SU]CVTFv.[fi](32|64)")>;
+def : InstRW<[M4WriteFDIV7H], (instrs FDIVv4f16)>;
+def : InstRW<[M4WriteNEONVH], (instrs FDIVv8f16)>;
+def : InstRW<[M4WriteFDIV7], (instrs FDIVv2f32)>;
+def : InstRW<[M4WriteNEONV], (instrs FDIVv4f32)>;
+def : InstRW<[M4WriteNEONW], (instrs FDIVv2f64)>;
+def : InstRW<[M4WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[M4WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[M4WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>;
+def : InstRW<[M4WriteFMAC2H], (instregex "^FMULX?v.[fi]16")>;
+def : InstRW<[M4WriteFMAC3], (instregex "^FMULX?v.[fi](32|64)")>;
+def : InstRW<[M4WriteFMAC4H,
+ M4ReadFMACM1], (instregex "^FML[AS]v.[fi]16")>;
+def : InstRW<[M4WriteFMAC4,
+ M4ReadFMACM1], (instregex "^FML[AS]v.[fi](32|64)")>;
+def : InstRW<[M4WriteNALU1H], (instregex "^FNEGv.f16")>;
+def : InstRW<[M4WriteNALU1], (instregex "^FNEGv.f(32|64)")>;
+def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[M4WriteFSQR7H], (instrs FSQRTv4f16)>;
+def : InstRW<[M4WriteNEONXH], (instrs FSQRTv8f16)>;
+def : InstRW<[M4WriteFSQR8], (instrs FSQRTv2f32)>;
+def : InstRW<[M4WriteNEONX], (instrs FSQRTv4f32)>;
+def : InstRW<[M4WriteNEONY], (instrs FSQRTv2f64)>;
+
+// ASIMD miscellaneous instructions.
+def : InstRW<[M4WriteNALU1], (instregex "^RBITv")>;
+def : InstRW<[M4WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M4WriteNALU1], (instregex "^CL[STZ]v")>;
+def : InstRW<[M4WriteNEONB], (instregex "^DUPv.+gpr")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^CPY")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^DUPv.+lane")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^EXTv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^XTNv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?QXTU?Nv")>;
+def : InstRW<[M4WriteNEONB], (instregex "^INSv.+gpr")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^INSv.+lane")>;
+def : InstRW<[M4WriteMOVI], (instregex "^(MOV|MVN)I")>;
+def : InstRW<[M4WriteNALU1H], (instregex "^FMOVv.f16")>;
+def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>;
+def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
+def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
+def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
+def : InstRW<[M4WriteFMAC4H,
+ M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>;
+def : InstRW<[M4WriteFMAC4,
+ M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>;
+def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>;
+def : InstRW<[M4WriteNSHFC], (instregex "^TB[LX]v(8|16)i8Three")>;
+def : InstRW<[M4WriteNSHFD], (instregex "^TB[LX]v(8|16)i8Four")>;
+def : InstRW<[M4WriteNEONP], (instregex "^[SU]MOVv")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
+
+// ASIMD load instructions.
+def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
+ M4WriteA1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
+ M4WriteA1], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDA,
+ M4WriteA1], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDA,
+ M4WriteA1], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDB,
+ M4WriteA1], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDB,
+ M4WriteA1], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDC,
+ M4WriteA1], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDC,
+ M4WriteA1], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDD], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVLDD,
+ M4WriteA1], (instregex "LD1i(8|16|32|64)_POST$")>;
+
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
+ M4WriteA1], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
+ M4WriteA1], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDF,
+ M4WriteA1], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDF,
+ M4WriteA1], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDG], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVLDG,
+ M4WriteA1], (instregex "LD2i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDA,
+ M4WriteA1], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDA,
+ M4WriteA1], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDI,
+ M4WriteA1], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDI,
+ M4WriteA1], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[M4WriteVLDJ,
+ M4WriteA1], (instregex "LD3i(8|16|32)_POST$")>;
+def : InstRW<[M4WriteVLDL], (instregex "LD3i64$")>;
+def : InstRW<[M4WriteVLDL,
+ M4WriteA1], (instregex "LD3i64_POST$")>;
+
+def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDB,
+ M4WriteA1], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDB,
+ M4WriteA1], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDN,
+ M4WriteA1], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDN,
+ M4WriteA1], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDK], (instregex "LD4i(8|16|32)$")>;
+def : InstRW<[M4WriteVLDK,
+ M4WriteA1], (instregex "LD4i(8|16|32)_POST$")>;
+def : InstRW<[M4WriteVLDM], (instregex "LD4i64$")>;
+def : InstRW<[M4WriteVLDM,
+ M4WriteA1], (instregex "LD4i64_POST$")>;
+
+def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDC,
+ M4WriteA1], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDC,
+ M4WriteA1], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store instructions.
+def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVST,
+ M4WriteA1], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVST,
+ M4WriteA1], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTA,
+ M4WriteA1], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTA,
+ M4WriteA1], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTB,
+ M4WriteA1], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTB,
+ M4WriteA1], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTC,
+ M4WriteA1], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTC,
+ M4WriteA1], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[WriteVST,
+ M4WriteA1], (instregex "ST1i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTD,
+ M4WriteA1], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTE,
+ M4WriteA1], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTD], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTD,
+ M4WriteA1], (instregex "ST2i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTF,
+ M4WriteA1], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTG,
+ M4WriteA1], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTE], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTE,
+ M4WriteA1], (instregex "ST3i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTL], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTL,
+ M4WriteA1], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTI,
+ M4WriteA1], (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTE], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTE,
+ M4WriteA1], (instregex "ST4i(8|16|32|64)_POST$")>;
+
+// Cryptography instructions.
+def : InstRW<[M4WriteNCRY1], (instregex "^AES[DE]")>;
+def : InstRW<[M4WriteNCRY1,
+ M4ReadAESM1], (instregex "^AESI?MC")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^PMULv")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^PMULLv(1|8)i")>;
+def : InstRW<[M4WriteNCRY3A], (instregex "^PMULLv(2|16)i")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>;
+def : InstRW<[M4WriteNCRY1A], (instrs SHA256SU0rr)>;
+def : InstRW<[M4WriteNCRY5A], (instrs SHA256SU1rrr)>;
+def : InstRW<[M4WriteNCRY5A], (instrs SHA256H2rrr)>;
+
+// CRC instructions.
+def : InstRW<[M4WriteE2], (instregex "^CRC32C?[BHWX]rr$")>;
+
+} // SchedModel = ExynosM4Model
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
new file mode 100644
index 000000000000..48c54230e9d8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -0,0 +1,157 @@
+//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Exynos processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check the shift in arithmetic and logic instructions.
+def ExynosCheckShift : CheckAny<[CheckShiftBy0,
+ CheckAll<
+ [CheckShiftLSL,
+ CheckAny<
+ [CheckShiftBy1,
+ CheckShiftBy2,
+ CheckShiftBy3]>]>]>;
+
+// Exynos predicates.
+
+// Identify BLR specifying the LR register as the indirect target register.
+def ExynosBranchLinkLRPred : MCSchedPredicate<
+ CheckAll<[CheckOpcode<[BLR]>,
+ CheckRegOperand<0, LR>]>>;
+
+// Identify arithmetic instructions without or with limited extension or shift.
+def ExynosArithFn : TIIPredicate<
+ "isExynosArithFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithExtOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<[CheckExtBy0,
+ CheckAll<
+ [CheckAny<
+ [CheckExtUXTW,
+ CheckExtUXTX]>,
+ CheckAny<
+ [CheckExtBy1,
+ CheckExtBy2,
+ CheckExtBy3]>]>]>>>,
+ MCOpcodeSwitchCase<
+ IsArithShiftOp.ValidOpcodes,
+ MCReturnStatement<ExynosCheckShift>>,
+ MCOpcodeSwitchCase<
+ IsArithUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosArithPred : MCSchedPredicate<ExynosArithFn>;
+
+// Identify logic instructions with limited shift.
+def ExynosLogicFn : TIIPredicate<
+ "isExynosLogicFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<ExynosCheckShift>>,
+ MCOpcodeSwitchCase<
+ IsLogicUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosLogicPred : MCSchedPredicate<ExynosLogicFn>;
+
+// Identify more logic instructions with limited shift.
+def ExynosLogicExFn : TIIPredicate<
+ "isExynosLogicExFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<
+ [ExynosCheckShift,
+ CheckAll<
+ [CheckShiftLSL,
+ CheckShiftBy8]>]>>>,
+ MCOpcodeSwitchCase<
+ IsLogicUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosLogicExPred : MCSchedPredicate<ExynosLogicExFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with a scaled non-extended register.
+def ExynosScaledIdxFn : TIIPredicate<"isExynosScaledAddr",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLoadStoreRegOffsetOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<
+ [CheckMemExtSXTW,
+ CheckMemExtUXTW,
+ CheckMemScaled]>>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
+
+// Identify FP instructions.
+def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
+
+// Identify whether an instruction whose result is a long vector
+// operates on the upper half of the input registers.
+def ExynosLongVectorUpperFn : TIIPredicate<
+ "isExynosLongVectorUpper",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLongVectorUpperOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
+
+// Identify 128-bit NEON instructions.
+def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
+
+// Identify instructions that reset a register efficiently.
+def ExynosResetFn : TIIPredicate<
+ "isExynosResetFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ [ADR, ADRP,
+ MOVNWi, MOVNXi,
+ MOVZWi, MOVZXi],
+ MCReturnStatement<TruePred>>,
+ MCOpcodeSwitchCase<
+ [ORRWri, ORRXri],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<1>,
+ CheckAny<
+ [CheckRegOperand<1, WZR>,
+ CheckRegOperand<1, XZR>]>]>>>],
+ MCReturnStatement<
+ CheckAny<
+ [IsCopyIdiomFn,
+ IsZeroFPIdiomFn]>>>>;
+def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
+
+// Identify EXTR as the alias for ROR (immediate).
+def ExynosRotateRightImmPred : MCSchedPredicate<
+ CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
+ CheckSameRegOperand<1, 2>]>>;
+
+// Identify cheap arithmetic and logic immediate instructions.
+def ExynosCheapFn : TIIPredicate<
+ "isExynosCheapAsMove",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithLogicImmOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<
+ CheckAny<
+ [ExynosArithFn, ExynosResetFn, ExynosLogicFn]>>>>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
new file mode 100644
index 000000000000..dbaf11fc95dd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -0,0 +1,423 @@
+//===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+// Function mappers.
+
+// Check the extension type in arithmetic instructions.
+let FunctionMapper = "AArch64_AM::getArithExtendType" in {
+ def CheckExtUXTB : CheckImmOperand_s<3, "AArch64_AM::UXTB">;
+ def CheckExtUXTH : CheckImmOperand_s<3, "AArch64_AM::UXTH">;
+ def CheckExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+ def CheckExtUXTX : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+ def CheckExtSXTB : CheckImmOperand_s<3, "AArch64_AM::SXTB">;
+ def CheckExtSXTH : CheckImmOperand_s<3, "AArch64_AM::SXTH">;
+ def CheckExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+ def CheckExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for shifting in extended arithmetic instructions.
+foreach I = {0-3} in {
+ let FunctionMapper = "AArch64_AM::getArithShiftValue" in
+ def CheckExtBy#I : CheckImmOperand<3, I>;
+}
+
+// Check the extension type in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemExtendType" in {
+ def CheckMemExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+ def CheckMemExtLSL : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+ def CheckMemExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+ def CheckMemExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for scaling in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemDoShift" in
+def CheckMemScaled : CheckImmOperandSimple<3>;
+
+// Check the shifting type in arithmetic and logic instructions.
+let FunctionMapper = "AArch64_AM::getShiftType" in {
+ def CheckShiftLSL : CheckImmOperand_s<3, "AArch64_AM::LSL">;
+ def CheckShiftLSR : CheckImmOperand_s<3, "AArch64_AM::LSR">;
+ def CheckShiftASR : CheckImmOperand_s<3, "AArch64_AM::ASR">;
+ def CheckShiftROR : CheckImmOperand_s<3, "AArch64_AM::ROR">;
+ def CheckShiftMSL : CheckImmOperand_s<3, "AArch64_AM::MSL">;
+}
+
+// Check for shifting in arithmetic and logic instructions.
+foreach I = {0-3, 8} in {
+ let FunctionMapper = "AArch64_AM::getShiftValue" in
+ def CheckShiftBy#I : CheckImmOperand<3, I>;
+}
+
+// Generic predicates.
+
+// Identify whether an instruction is the 64-bit NEON form based on its result.
+def CheckDForm : CheckAll<[CheckIsRegOperand<0>,
+ CheckAny<[CheckRegOperand<0, D0>,
+ CheckRegOperand<0, D1>,
+ CheckRegOperand<0, D2>,
+ CheckRegOperand<0, D3>,
+ CheckRegOperand<0, D4>,
+ CheckRegOperand<0, D5>,
+ CheckRegOperand<0, D6>,
+ CheckRegOperand<0, D7>,
+ CheckRegOperand<0, D8>,
+ CheckRegOperand<0, D9>,
+ CheckRegOperand<0, D10>,
+ CheckRegOperand<0, D11>,
+ CheckRegOperand<0, D12>,
+ CheckRegOperand<0, D13>,
+ CheckRegOperand<0, D14>,
+ CheckRegOperand<0, D15>,
+ CheckRegOperand<0, D16>,
+ CheckRegOperand<0, D17>,
+ CheckRegOperand<0, D18>,
+ CheckRegOperand<0, D19>,
+ CheckRegOperand<0, D20>,
+ CheckRegOperand<0, D21>,
+ CheckRegOperand<0, D22>,
+ CheckRegOperand<0, D23>,
+ CheckRegOperand<0, D24>,
+ CheckRegOperand<0, D25>,
+ CheckRegOperand<0, D26>,
+ CheckRegOperand<0, D27>,
+ CheckRegOperand<0, D28>,
+ CheckRegOperand<0, D29>,
+ CheckRegOperand<0, D30>,
+ CheckRegOperand<0, D31>]>]>;
+
+// Identify whether an instruction is the 128-bit NEON form based on its result.
+def CheckQForm : CheckAll<[CheckIsRegOperand<0>,
+ CheckAny<[CheckRegOperand<0, Q0>,
+ CheckRegOperand<0, Q1>,
+ CheckRegOperand<0, Q2>,
+ CheckRegOperand<0, Q3>,
+ CheckRegOperand<0, Q4>,
+ CheckRegOperand<0, Q5>,
+ CheckRegOperand<0, Q6>,
+ CheckRegOperand<0, Q7>,
+ CheckRegOperand<0, Q8>,
+ CheckRegOperand<0, Q9>,
+ CheckRegOperand<0, Q10>,
+ CheckRegOperand<0, Q11>,
+ CheckRegOperand<0, Q12>,
+ CheckRegOperand<0, Q13>,
+ CheckRegOperand<0, Q14>,
+ CheckRegOperand<0, Q15>,
+ CheckRegOperand<0, Q16>,
+ CheckRegOperand<0, Q17>,
+ CheckRegOperand<0, Q18>,
+ CheckRegOperand<0, Q19>,
+ CheckRegOperand<0, Q20>,
+ CheckRegOperand<0, Q21>,
+ CheckRegOperand<0, Q22>,
+ CheckRegOperand<0, Q23>,
+ CheckRegOperand<0, Q24>,
+ CheckRegOperand<0, Q25>,
+ CheckRegOperand<0, Q26>,
+ CheckRegOperand<0, Q27>,
+ CheckRegOperand<0, Q28>,
+ CheckRegOperand<0, Q29>,
+ CheckRegOperand<0, Q30>,
+ CheckRegOperand<0, Q31>]>]>;
+
+// Identify arithmetic instructions with extend.
+def IsArithExtOp : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx,
+ SUBWrx, SUBXrx, SUBSWrx, SUBSXrx,
+ ADDXrx64, ADDSXrx64,
+ SUBXrx64, SUBSXrx64]>;
+
+// Identify arithmetic immediate instructions.
+def IsArithImmOp : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri,
+ SUBWri, SUBXri, SUBSWri, SUBSXri]>;
+
+// Identify arithmetic instructions with shift.
+def IsArithShiftOp : CheckOpcode<[ADDWrs, ADDXrs, ADDSWrs, ADDSXrs,
+ SUBWrs, SUBXrs, SUBSWrs, SUBSXrs]>;
+
+// Identify arithmetic instructions without shift.
+def IsArithUnshiftOp : CheckOpcode<[ADDWrr, ADDXrr, ADDSWrr, ADDSXrr,
+ SUBWrr, SUBXrr, SUBSWrr, SUBSXrr]>;
+
+// Identify logic immediate instructions.
+def IsLogicImmOp : CheckOpcode<[ANDWri, ANDXri,
+ EORWri, EORXri,
+ ORRWri, ORRXri]>;
+
+// Identify logic instructions with shift.
+def IsLogicShiftOp : CheckOpcode<[ANDWrs, ANDXrs, ANDSWrs, ANDSXrs,
+ BICWrs, BICXrs, BICSWrs, BICSXrs,
+ EONWrs, EONXrs,
+ EORWrs, EORXrs,
+ ORNWrs, ORNXrs,
+ ORRWrs, ORRXrs]>;
+
+// Identify logic instructions without shift.
+def IsLogicUnshiftOp : CheckOpcode<[ANDWrr, ANDXrr, ANDSWrr, ANDSXrr,
+ BICWrr, BICXrr, BICSWrr, BICSXrr,
+ EONWrr, EONXrr,
+ EORWrr, EORXrr,
+ ORNWrr, ORNXrr,
+ ORRWrr, ORRXrr]>;
+
+// Identify arithmetic and logic immediate instructions.
+def IsArithLogicImmOp : CheckOpcode<!listconcat(IsArithImmOp.ValidOpcodes,
+ IsLogicImmOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions with shift.
+def IsArithLogicShiftOp : CheckOpcode<!listconcat(IsArithShiftOp.ValidOpcodes,
+ IsLogicShiftOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions without shift.
+def IsArithLogicUnshiftOp : CheckOpcode<!listconcat(IsArithUnshiftOp.ValidOpcodes,
+ IsLogicUnshiftOp.ValidOpcodes)>;
+
+// Identify whether an instruction is an ASIMD
+// load using the post index addressing mode.
+def IsLoadASIMDPostOp : CheckOpcode<[LD1Onev8b_POST, LD1Onev4h_POST, LD1Onev2s_POST, LD1Onev1d_POST,
+ LD1Onev16b_POST, LD1Onev8h_POST, LD1Onev4s_POST, LD1Onev2d_POST,
+ LD1Twov8b_POST, LD1Twov4h_POST, LD1Twov2s_POST, LD1Twov1d_POST,
+ LD1Twov16b_POST, LD1Twov8h_POST, LD1Twov4s_POST, LD1Twov2d_POST,
+ LD1Threev8b_POST, LD1Threev4h_POST, LD1Threev2s_POST, LD1Threev1d_POST,
+ LD1Threev16b_POST, LD1Threev8h_POST, LD1Threev4s_POST, LD1Threev2d_POST,
+ LD1Fourv8b_POST, LD1Fourv4h_POST, LD1Fourv2s_POST, LD1Fourv1d_POST,
+ LD1Fourv16b_POST, LD1Fourv8h_POST, LD1Fourv4s_POST, LD1Fourv2d_POST,
+ LD1i8_POST, LD1i16_POST, LD1i32_POST, LD1i64_POST,
+ LD1Rv8b_POST, LD1Rv4h_POST, LD1Rv2s_POST, LD1Rv1d_POST,
+ LD1Rv16b_POST, LD1Rv8h_POST, LD1Rv4s_POST, LD1Rv2d_POST,
+ LD2Twov8b_POST, LD2Twov4h_POST, LD2Twov2s_POST,
+ LD2Twov16b_POST, LD2Twov8h_POST, LD2Twov4s_POST, LD2Twov2d_POST,
+ LD2i8_POST, LD2i16_POST, LD2i32_POST, LD2i64_POST,
+ LD2Rv8b_POST, LD2Rv4h_POST, LD2Rv2s_POST, LD2Rv1d_POST,
+ LD2Rv16b_POST, LD2Rv8h_POST, LD2Rv4s_POST, LD2Rv2d_POST,
+ LD3Threev8b_POST, LD3Threev4h_POST, LD3Threev2s_POST,
+ LD3Threev16b_POST, LD3Threev8h_POST, LD3Threev4s_POST, LD3Threev2d_POST,
+ LD3i8_POST, LD3i16_POST, LD3i32_POST, LD3i64_POST,
+ LD3Rv8b_POST, LD3Rv4h_POST, LD3Rv2s_POST, LD3Rv1d_POST,
+ LD3Rv16b_POST, LD3Rv8h_POST, LD3Rv4s_POST, LD3Rv2d_POST,
+ LD4Fourv8b_POST, LD4Fourv4h_POST, LD4Fourv2s_POST,
+ LD4Fourv16b_POST, LD4Fourv8h_POST, LD4Fourv4s_POST, LD4Fourv2d_POST,
+ LD4i8_POST, LD4i16_POST, LD4i32_POST, LD4i64_POST,
+ LD4Rv8b_POST, LD4Rv4h_POST, LD4Rv2s_POST, LD4Rv1d_POST,
+ LD4Rv16b_POST, LD4Rv8h_POST, LD4Rv4s_POST, LD4Rv2d_POST]>;
+
+// Identify whether an instruction is an ASIMD
+// store using the post index addressing mode.
+def IsStoreASIMDPostOp : CheckOpcode<[ST1Onev8b_POST, ST1Onev4h_POST, ST1Onev2s_POST, ST1Onev1d_POST,
+ ST1Onev16b_POST, ST1Onev8h_POST, ST1Onev4s_POST, ST1Onev2d_POST,
+ ST1Twov8b_POST, ST1Twov4h_POST, ST1Twov2s_POST, ST1Twov1d_POST,
+ ST1Twov16b_POST, ST1Twov8h_POST, ST1Twov4s_POST, ST1Twov2d_POST,
+ ST1Threev8b_POST, ST1Threev4h_POST, ST1Threev2s_POST, ST1Threev1d_POST,
+ ST1Threev16b_POST, ST1Threev8h_POST, ST1Threev4s_POST, ST1Threev2d_POST,
+ ST1Fourv8b_POST, ST1Fourv4h_POST, ST1Fourv2s_POST, ST1Fourv1d_POST,
+ ST1Fourv16b_POST, ST1Fourv8h_POST, ST1Fourv4s_POST, ST1Fourv2d_POST,
+ ST1i8_POST, ST1i16_POST, ST1i32_POST, ST1i64_POST,
+ ST2Twov8b_POST, ST2Twov4h_POST, ST2Twov2s_POST,
+ ST2Twov16b_POST, ST2Twov8h_POST, ST2Twov4s_POST, ST2Twov2d_POST,
+ ST2i8_POST, ST2i16_POST, ST2i32_POST, ST2i64_POST,
+ ST3Threev8b_POST, ST3Threev4h_POST, ST3Threev2s_POST,
+ ST3Threev16b_POST, ST3Threev8h_POST, ST3Threev4s_POST, ST3Threev2d_POST,
+ ST3i8_POST, ST3i16_POST, ST3i32_POST, ST3i64_POST,
+ ST4Fourv8b_POST, ST4Fourv4h_POST, ST4Fourv2s_POST,
+ ST4Fourv16b_POST, ST4Fourv8h_POST, ST4Fourv4s_POST, ST4Fourv2d_POST,
+ ST4i8_POST, ST4i16_POST, ST4i32_POST, ST4i64_POST]>;
+
+// Identify whether an instruction is an ASIMD load
+// or store using the post index addressing mode.
+def IsLoadStoreASIMDPostOp : CheckOpcode<!listconcat(IsLoadASIMDPostOp.ValidOpcodes,
+ IsStoreASIMDPostOp.ValidOpcodes)>;
+
+// Identify whether an instruction is a load
+// using the register offset addressing mode.
+def IsLoadRegOffsetOp : CheckOpcode<[PRFMroW, PRFMroX,
+ LDRBBroW, LDRBBroX,
+ LDRSBWroW, LDRSBWroX, LDRSBXroW, LDRSBXroX,
+ LDRHHroW, LDRHHroX,
+ LDRSHWroW, LDRSHWroX, LDRSHXroW, LDRSHXroX,
+ LDRWroW, LDRWroX,
+ LDRSWroW, LDRSWroX,
+ LDRXroW, LDRXroX,
+ LDRBroW, LDRBroX,
+ LDRHroW, LDRHroX,
+ LDRSroW, LDRSroX,
+ LDRDroW, LDRDroX]>;
+
+// Identify whether an instruction is a load
+// using the register offset addressing mode.
+def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX,
+ STRHHroW, STRHHroX,
+ STRWroW, STRWroX,
+ STRXroW, STRXroX,
+ STRBroW, STRBroX,
+ STRHroW, STRHroX,
+ STRSroW, STRSroX,
+ STRDroW, STRDroX]>;
+
+// Identify whether an instruction is a load or
+// store using the register offset addressing mode.
+def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
+ IsStoreRegOffsetOp.ValidOpcodes)>;
+
+// Identify whether an instruction whose result is a long vector
+// operates on the upper half of the input registers.
+def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
+ FCVTNv8i16, FCVTNv4i32,
+ FCVTXNv4f32,
+ PMULLv16i8, PMULLv2i64,
+ RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
+ RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
+ RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
+ SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
+ SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
+ SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
+ SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
+ SHLLv16i8, SHLLv8i16, SHLLv4i32,
+ SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
+ SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
+ SMLALv8i16_indexed, SMLALv4i32_indexed,
+ SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
+ SMLSLv8i16_indexed, SMLSLv4i32_indexed,
+ SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
+ SMULLv8i16_indexed, SMULLv4i32_indexed,
+ SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
+ SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
+ SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
+ SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
+ SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
+ SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
+ SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
+ SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
+ SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
+ SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
+ SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
+ SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
+ SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
+ SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
+ SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
+ UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
+ UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
+ UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
+ UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
+ UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
+ UMLALv8i16_indexed, UMLALv4i32_indexed,
+ UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
+ UMLSLv8i16_indexed, UMLSLv4i32_indexed,
+ UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
+ UMULLv8i16_indexed, UMULLv4i32_indexed,
+ UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
+ UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
+ USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
+ USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
+ USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
+ XTNv16i8, XTNv8i16, XTNv4i32]>;
+
+// Target predicates.
+
+// Identify an instruction that effectively transfers a register to another.
+def IsCopyIdiomFn : TIIPredicate<"isCopyIdiom",
+ MCOpcodeSwitchStatement<
+ [// MOV {Rd, SP}, {SP, Rn} =>
+ // ADD {Rd, SP}, {SP, Rn}, #0
+ MCOpcodeSwitchCase<
+ [ADDWri, ADDXri],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<0>,
+ CheckIsRegOperand<1>,
+ CheckAny<
+ [CheckRegOperand<0, WSP>,
+ CheckRegOperand<0, SP>,
+ CheckRegOperand<1, WSP>,
+ CheckRegOperand<1, SP>]>,
+ CheckZeroOperand<2>]>>>,
+ // MOV Rd, Rm =>
+ // ORR Rd, ZR, Rm, LSL #0
+ MCOpcodeSwitchCase<
+ [ORRWrs, ORRXrs],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<1>,
+ CheckIsRegOperand<2>,
+ CheckAny<
+ [CheckRegOperand<1, WZR>,
+ CheckRegOperand<1, XZR>]>,
+ CheckShiftBy0]>>>],
+ MCReturnStatement<FalsePred>>>;
+def IsCopyIdiomPred : MCSchedPredicate<IsCopyIdiomFn>;
+
+// Identify arithmetic instructions with an extended register.
+def RegExtendedFn : TIIPredicate<"hasExtendedReg",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithExtOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckNot<CheckZeroOperand<3>>>>],
+ MCReturnStatement<FalsePred>>>;
+def RegExtendedPred : MCSchedPredicate<RegExtendedFn>;
+
+// Identify arithmetic and logic instructions with a shifted register.
+def RegShiftedFn : TIIPredicate<"hasShiftedReg",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckNot<CheckZeroOperand<3>>>>],
+ MCReturnStatement<FalsePred>>>;
+def RegShiftedPred : MCSchedPredicate<RegShiftedFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with an extended or scaled register.
+def ScaledIdxFn : TIIPredicate<"isScaledAddr",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLoadStoreRegOffsetOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<[CheckNot<CheckMemExtLSL>,
+ CheckMemScaled]>>>],
+ MCReturnStatement<FalsePred>>>;
+def ScaledIdxPred : MCSchedPredicate<ScaledIdxFn>;
+
+// Identify an instruction that effectively resets a FP register to zero.
+def IsZeroFPIdiomFn : TIIPredicate<"isZeroFPIdiom",
+ MCOpcodeSwitchStatement<
+ [// MOVI Vd, #0
+ MCOpcodeSwitchCase<
+ [MOVIv8b_ns, MOVIv16b_ns,
+ MOVID, MOVIv2d_ns],
+ MCReturnStatement<CheckZeroOperand<1>>>,
+ // MOVI Vd, #0, LSL #0
+ MCOpcodeSwitchCase<
+ [MOVIv4i16, MOVIv8i16,
+ MOVIv2i32, MOVIv4i32],
+ MCReturnStatement<
+ CheckAll<
+ [CheckZeroOperand<1>,
+ CheckZeroOperand<2>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def IsZeroFPIdiomPred : MCSchedPredicate<IsZeroFPIdiomFn>;
+
+// Identify an instruction that effectively resets a GP register to zero.
+def IsZeroIdiomFn : TIIPredicate<"isZeroIdiom",
+ MCOpcodeSwitchStatement<
+ [// ORR Rd, ZR, #0
+ MCOpcodeSwitchCase<
+ [ORRWri, ORRXri],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<1>,
+ CheckAny<
+ [CheckRegOperand<1, WZR>,
+ CheckRegOperand<1, XZR>]>,
+ CheckZeroOperand<2>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def IsZeroIdiomPred : MCSchedPredicate<IsZeroIdiomFn>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
index ce81f48acf71..f55ba4d42fce 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
@@ -50,17 +50,6 @@ def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
-// Predicate for determining when a shiftable register is shifted.
-def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>;
-
-// Predicate for determining when a extendedable register is extended.
-def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>;
-
-// ScaledIdxPred is true if a WriteLDIdx operand will be
-// scaled. Subtargets can use this to dynamically select resources and
-// latency for WriteLDIdx and ReadAdrBase.
-def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>;
-
// Serialized two-level address load.
// EXAMPLE: LOADGot
def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
new file mode 100644
index 000000000000..e9699b0367d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -0,0 +1,641 @@
+//===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass to insert code to mitigate against side channel
+// vulnerabilities that may happen under control flow miss-speculation.
+//
+// The pass implements tracking of control flow miss-speculation into a "taint"
+// register. That taint register can then be used to mask off registers with
+// sensitive data when executing under miss-speculation, a.k.a. "transient
+// execution".
+// This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
+//
+// It also implements speculative load hardening, i.e. using the taint register
+// to automatically mask off loaded data.
+//
+// As a possible follow-on improvement, also an intrinsics-based approach as
+// explained at https://lwn.net/Articles/759423/ could be implemented on top of
+// the current design.
+//
+// For AArch64, the following implementation choices are made to implement the
+// tracking of control flow miss-speculation into a taint register:
+// Some of these are different than the implementation choices made in
+// the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
+// the instruction set characteristics result in different trade-offs.
+// - The speculation hardening is done after register allocation. With a
+// relative abundance of registers, one register is reserved (X16) to be
+// the taint register. X16 is expected to not clash with other register
+// reservation mechanisms with very high probability because:
+// . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
+// . The only way to request X16 to be used as a programmer is through
+// inline assembly. In the rare case a function explicitly demands to
+// use X16/W16, this pass falls back to hardening against speculation
+// by inserting a DSB SYS/ISB barrier pair which will prevent control
+// flow speculation.
+// - It is easy to insert mask operations at this late stage as we have
+// mask operations available that don't set flags.
+// - The taint variable contains all-ones when no miss-speculation is detected,
+// and contains all-zeros when miss-speculation is detected. Therefore, when
+// masking, an AND instruction (which only changes the register to be masked,
+// no other side effects) can easily be inserted anywhere that's needed.
+// - The tracking of miss-speculation is done by using a data-flow conditional
+// select instruction (CSEL) to evaluate the flags that were also used to
+// make conditional branch direction decisions. Speculation of the CSEL
+// instruction can be limited with a CSDB instruction - so the combination of
+// CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
+// aren't speculated. When conditional branch direction gets miss-speculated,
+// the semantics of the inserted CSEL instruction is such that the taint
+// register will contain all zero bits.
+// One key requirement for this to work is that the conditional branch is
+// followed by an execution of the CSEL instruction, where the CSEL
+// instruction needs to use the same flags status as the conditional branch.
+// This means that the conditional branches must not be implemented as one
+// of the AArch64 conditional branches that do not use the flags as input
+// (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
+// selectors to not produce these instructions when speculation hardening
+// is enabled. This pass will assert if it does encounter such an instruction.
+// - On function call boundaries, the miss-speculation state is transferred from
+// the taint register X16 to be encoded in the SP register as value 0.
+//
+// For the aspect of automatically hardening loads, using the taint register,
+// (a.k.a. speculative load hardening, see
+// https://llvm.org/docs/SpeculativeLoadHardening.html), the following
+// implementation choices are made for AArch64:
+// - Many of the optimizations described at
+// https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer
+// loads haven't been implemented yet - but for some of them there are
+// FIXMEs in the code.
+// - loads that load into general purpose (X or W) registers get hardened by
+// masking the loaded data. For loads that load into other registers, the
+// address loaded from gets hardened. It is expected that hardening the
+// loaded data may be more efficient; but masking data in registers other
+// than X or W is not easy and may result in being slower than just
+// hardening the X address register loaded from.
+// - On AArch64, CSDB instructions are inserted between the masking of the
+// register and its first use, to ensure there's no non-control-flow
+// speculation that might undermine the hardening mechanism.
+//
+// Future extensions/improvements could be:
+// - Implement this functionality using full speculation barriers, akin to the
+// x86-slh-lfence option. This may be more useful for the intrinsics-based
+// approach than for the SLH approach to masking.
+// Note that this pass already inserts the full speculation barriers if the
+// function for some niche reason makes use of X16/W16.
+// - no indirect branch misprediction gets protected/instrumented; but this
+// could be done for some indirect branches, such as switch jump tables.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-speculation-hardening"
+
+#define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
+
+cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
+ cl::desc("Sanitize loads from memory."),
+ cl::init(true));
+
+namespace {
+
+class AArch64SpeculationHardening : public MachineFunctionPass {
+public:
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ static char ID;
+
+ AArch64SpeculationHardening() : MachineFunctionPass(ID) {
+ initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override {
+ return AARCH64_SPECULATION_HARDENING_NAME;
+ }
+
+private:
+ unsigned MisspeculatingTaintReg;
+ unsigned MisspeculatingTaintReg32Bit;
+ bool UseControlFlowSpeculationBarrier;
+ BitVector RegsNeedingCSDBBeforeUse;
+ BitVector RegsAlreadyMasked;
+
+ bool functionUsesHardeningRegister(MachineFunction &MF) const;
+ bool instrumentControlFlow(MachineBasicBlock &MBB);
+ bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ AArch64CC::CondCode &CondCode) const;
+ void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
+ AArch64CC::CondCode &CondCode, DebugLoc DL) const;
+ void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI) const;
+ void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned TmpReg) const;
+
+ bool slhLoads(MachineBasicBlock &MBB);
+ bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineInstr &MI, unsigned Reg);
+ bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB);
+ bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL);
+};
+
+} // end anonymous namespace
+
+char AArch64SpeculationHardening::ID = 0;
+
+INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
+ AARCH64_SPECULATION_HARDENING_NAME, false, false)
+
+bool AArch64SpeculationHardening::endsWithCondControlFlow(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ AArch64CC::CondCode &CondCode) const {
+ SmallVector<MachineOperand, 1> analyzeBranchCondCode;
+ if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false))
+ return false;
+
+ // Ignore if the BB ends in an unconditional branch/fall-through.
+ if (analyzeBranchCondCode.empty())
+ return false;
+
+ // If the BB ends with a single conditional branch, FBB will be set to
+ // nullptr (see API docs for TII->analyzeBranch). For the rest of the
+ // analysis we want the FBB block to be set always.
+ assert(TBB != nullptr);
+ if (FBB == nullptr)
+ FBB = MBB.getFallThrough();
+
+ // If both the true and the false condition jump to the same basic block,
+ // there isn't need for any protection - whether the branch is speculated
+ // correctly or not, we end up executing the architecturally correct code.
+ if (TBB == FBB)
+ return false;
+
+ assert(MBB.succ_size() == 2);
+ // translate analyzeBranchCondCode to CondCode.
+ assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
+ CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
+ return true;
+}
+
+void AArch64SpeculationHardening::insertTrackingCode(
+ MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
+ DebugLoc DL) const {
+ if (UseControlFlowSpeculationBarrier) {
+ // insert full control flow speculation barrier (DSB SYS + ISB)
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
+ .addImm(0xf);
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
+ .addImm(0xf);
+ } else {
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
+ .addDef(MisspeculatingTaintReg)
+ .addUse(MisspeculatingTaintReg)
+ .addUse(AArch64::XZR)
+ .addImm(CondCode);
+ SplitEdgeBB.addLiveIn(AArch64::NZCV);
+ }
+}
+
+bool AArch64SpeculationHardening::instrumentControlFlow(
+ MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
+
+ bool Modified = false;
+ MachineBasicBlock *TBB = nullptr;
+ MachineBasicBlock *FBB = nullptr;
+ AArch64CC::CondCode CondCode;
+
+ if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
+ LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
+ } else {
+ // Now insert:
+ // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
+ // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
+ // edge.
+ AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode);
+
+ MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this);
+ MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this);
+
+ assert(SplitEdgeTBB != nullptr);
+ assert(SplitEdgeFBB != nullptr);
+
+ DebugLoc DL;
+ if (MBB.instr_end() != MBB.instr_begin())
+ DL = (--MBB.instr_end())->getDebugLoc();
+
+ insertTrackingCode(*SplitEdgeTBB, CondCode, DL);
+ insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL);
+
+ LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
+ LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
+ Modified = true;
+ }
+
+ // Perform correct code generation around function calls and before returns.
+ {
+ SmallVector<MachineInstr *, 4> ReturnInstructions;
+ SmallVector<MachineInstr *, 4> CallInstructions;
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.isReturn())
+ ReturnInstructions.push_back(&MI);
+ else if (MI.isCall())
+ CallInstructions.push_back(&MI);
+ }
+
+ Modified |=
+ (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+
+ for (MachineInstr *Return : ReturnInstructions)
+ insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
+ for (MachineInstr *Call : CallInstructions) {
+ // Just after the call:
+ MachineBasicBlock::iterator i = Call;
+ i++;
+ insertSPToRegTaintPropagation(Call->getParent(), i);
+ // Just before the call:
+ insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
+ }
+ }
+
+ return Modified;
+}
+
+void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+ // If full control flow speculation barriers are used, emit a control flow
+ // barrier to block potential miss-speculation in flight coming in to this
+ // function.
+ if (UseControlFlowSpeculationBarrier) {
+ // insert full control flow speculation barrier (DSB SYS + ISB)
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+ return;
+ }
+
+ // CMP SP, #0 === SUBS xzr, SP, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+ .addDef(AArch64::XZR)
+ .addUse(AArch64::SP)
+ .addImm(0)
+ .addImm(0); // no shift
+ // CSETM x16, NE === CSINV x16, xzr, xzr, EQ
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+ .addDef(MisspeculatingTaintReg)
+ .addUse(AArch64::XZR)
+ .addUse(AArch64::XZR)
+ .addImm(AArch64CC::EQ);
+}
+
+void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+ unsigned TmpReg) const {
+ // If full control flow speculation barriers are used, there will not be
+ // miss-speculation when returning from this function, and therefore, also
+ // no need to encode potential miss-speculation into the stack pointer.
+ if (UseControlFlowSpeculationBarrier)
+ return;
+
+ // mov Xtmp, SP === ADD Xtmp, SP, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+ .addDef(TmpReg)
+ .addUse(AArch64::SP)
+ .addImm(0)
+ .addImm(0); // no shift
+ // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+ .addDef(TmpReg, RegState::Renamable)
+ .addUse(TmpReg, RegState::Kill | RegState::Renamable)
+ .addUse(MisspeculatingTaintReg, RegState::Kill)
+ .addImm(0);
+ // mov SP, Xtmp === ADD SP, Xtmp, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+ .addDef(AArch64::SP)
+ .addUse(TmpReg, RegState::Kill)
+ .addImm(0)
+ .addImm(0); // no shift
+}
+
+bool AArch64SpeculationHardening::functionUsesHardeningRegister(
+ MachineFunction &MF) const {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // treat function calls specially, as the hardening register does not
+ // need to remain live across function calls.
+ if (MI.isCall())
+ continue;
+ if (MI.readsRegister(MisspeculatingTaintReg, TRI) ||
+ MI.modifiesRegister(MisspeculatingTaintReg, TRI))
+ return true;
+ }
+ }
+ return false;
+}
+
+// Make GPR register Reg speculation-safe by putting it through the
+// SpeculationSafeValue pseudo instruction, if we can't prove that
+// the value in the register has already been hardened.
+bool AArch64SpeculationHardening::makeGPRSpeculationSafe(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI,
+ unsigned Reg) {
+ assert(AArch64::GPR32allRegClass.contains(Reg) ||
+ AArch64::GPR64allRegClass.contains(Reg));
+
+ // Loads cannot directly load a value into the SP (nor WSP).
+ // Therefore, if Reg is SP or WSP, it is because the instruction loads from
+ // the stack through the stack pointer.
+ //
+ // Since the stack pointer is never dynamically controllable, don't harden it.
+ if (Reg == AArch64::SP || Reg == AArch64::WSP)
+ return false;
+
+ // Do not harden the register again if already hardened before.
+ if (RegsAlreadyMasked[Reg])
+ return false;
+
+ const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg);
+ LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n");
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Is64Bit ? AArch64::SpeculationSafeValueX
+ : AArch64::SpeculationSafeValueW))
+ .addDef(Reg)
+ .addUse(Reg);
+ RegsAlreadyMasked.set(Reg);
+ return true;
+}
+
+bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB);
+
+ RegsAlreadyMasked.reset();
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMBBI;
+ for (; MBBI != E; MBBI = NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ NextMBBI = std::next(MBBI);
+ // Only harden loaded values or addresses used in loads.
+ if (!MI.mayLoad())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "About to harden: " << MI);
+
+ // For general purpose register loads, harden the registers loaded into.
+ // For other loads, harden the address loaded from.
+ // Masking the loaded value is expected to result in less performance
+ // overhead, as the load can still execute speculatively in comparison to
+ // when the address loaded from gets masked. However, masking is only
+ // easy to do efficiently on GPR registers, so for loads into non-GPR
+ // registers (e.g. floating point loads), mask the address loaded from.
+ bool AllDefsAreGPR = llvm::all_of(MI.defs(), [&](MachineOperand &Op) {
+ return Op.isReg() && (AArch64::GPR32allRegClass.contains(Op.getReg()) ||
+ AArch64::GPR64allRegClass.contains(Op.getReg()));
+ });
+ // FIXME: it might be a worthwhile optimization to not mask loaded
+ // values if all the registers involved in address calculation are already
+ // hardened, leading to this load not able to execute on a miss-speculated
+ // path.
+ bool HardenLoadedData = AllDefsAreGPR;
+ bool HardenAddressLoadedFrom = !HardenLoadedData;
+
+ // First remove registers from AlreadyMaskedRegisters if their value is
+ // updated by this instruction - it makes them contain a new value that is
+ // not guaranteed to already have been masked.
+ for (MachineOperand Op : MI.defs())
+ for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
+ RegsAlreadyMasked.reset(*AI);
+
+ // FIXME: loads from the stack with an immediate offset from the stack
+ // pointer probably shouldn't be hardened, which could result in a
+ // significant optimization. See section "Don’t check loads from
+ // compile-time constant stack offsets", in
+ // https://llvm.org/docs/SpeculativeLoadHardening.html
+
+ if (HardenLoadedData)
+ for (auto Def : MI.defs()) {
+ if (Def.isDead())
+ // Do not mask a register that is not used further.
+ continue;
+ // FIXME: For pre/post-increment addressing modes, the base register
+ // used in address calculation is also defined by this instruction.
+ // It might be a worthwhile optimization to not harden that
+ // base register increment/decrement when the increment/decrement is
+ // an immediate.
+ Modified |= makeGPRSpeculationSafe(MBB, NextMBBI, MI, Def.getReg());
+ }
+
+ if (HardenAddressLoadedFrom)
+ for (auto Use : MI.uses()) {
+ if (!Use.isReg())
+ continue;
+ unsigned Reg = Use.getReg();
+ // Some loads of floating point data have implicit defs/uses on a
+ // super register of that floating point data. Some examples:
+ // $s0 = LDRSui $sp, 22, implicit-def $q0
+ // $q0 = LD1i64 $q0, 1, renamable $x0
+ // We need to filter out these uses for non-GPR register which occur
+ // because the load partially fills a non-GPR register with the loaded
+ // data. Just skipping all non-GPR registers is safe (for now) as all
+ // AArch64 load instructions only use GPR registers to perform the
+ // address calculation. FIXME: However that might change once we can
+ // produce SVE gather instructions.
+ if (!(AArch64::GPR32allRegClass.contains(Reg) ||
+ AArch64::GPR64allRegClass.contains(Reg)))
+ continue;
+ Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg);
+ }
+ }
+ return Modified;
+}
+
+/// \brief If MBBI references a pseudo instruction that should be expanded
+/// here, do the expansion and return true. Otherwise return false.
+bool AArch64SpeculationHardening::expandSpeculationSafeValue(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ bool Is64Bit = true;
+
+ switch (Opcode) {
+ default:
+ break;
+ case AArch64::SpeculationSafeValueW:
+ Is64Bit = false;
+ LLVM_FALLTHROUGH;
+ case AArch64::SpeculationSafeValueX:
+ // Just remove the SpeculationSafe pseudo's if control flow
+ // miss-speculation isn't happening because we're already inserting barriers
+ // to guarantee that.
+ if (!UseControlFlowSpeculationBarrier) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ // Mark this register and all its aliasing registers as needing to be
+ // value speculation hardened before its next use, by using a CSDB
+ // barrier instruction.
+ for (MachineOperand Op : MI.defs())
+ for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
+ RegsNeedingCSDBBeforeUse.set(*AI);
+
+ // Mask off with taint state.
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ Is64Bit ? TII->get(AArch64::ANDXrs) : TII->get(AArch64::ANDWrs))
+ .addDef(DstReg)
+ .addUse(SrcReg, RegState::Kill)
+ .addUse(Is64Bit ? MisspeculatingTaintReg
+ : MisspeculatingTaintReg32Bit)
+ .addImm(0);
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) {
+ assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when "
+ "control flow miss-speculation "
+ "is already blocked");
+ // insert data value speculation barrier (CSDB)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT)).addImm(0x14);
+ RegsNeedingCSDBBeforeUse.reset();
+ return true;
+}
+
+bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
+ MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ RegsNeedingCSDBBeforeUse.reset();
+
+ // The following loop iterates over all instructions in the basic block,
+ // and performs 2 operations:
+ // 1. Insert a CSDB at this location if needed.
+ // 2. Expand the SpeculationSafeValuePseudo if the current instruction is
+ // one.
+ //
+ // The insertion of the CSDB is done as late as possible (i.e. just before
+ // the use of a masked register), in the hope that that will reduce the
+ // total number of CSDBs in a block when there are multiple masked registers
+ // in the block.
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ DebugLoc DL;
+ while (MBBI != E) {
+ MachineInstr &MI = *MBBI;
+ DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+
+ // First check if a CSDB needs to be inserted due to earlier registers
+ // that were masked and that are used by the next instruction.
+ // Also emit the barrier on any potential control flow changes.
+ bool NeedToEmitBarrier = false;
+ if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator()))
+ NeedToEmitBarrier = true;
+ if (!NeedToEmitBarrier)
+ for (MachineOperand Op : MI.uses())
+ if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) {
+ NeedToEmitBarrier = true;
+ break;
+ }
+
+ if (NeedToEmitBarrier)
+ Modified |= insertCSDB(MBB, MBBI, DL);
+
+ Modified |= expandSpeculationSafeValue(MBB, MBBI);
+
+ MBBI = NMBBI;
+ }
+
+ if (RegsNeedingCSDBBeforeUse.any())
+ Modified |= insertCSDB(MBB, MBBI, DL);
+
+ return Modified;
+}
+
+bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ return false;
+
+ MisspeculatingTaintReg = AArch64::X16;
+ MisspeculatingTaintReg32Bit = AArch64::W16;
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ RegsNeedingCSDBBeforeUse.resize(TRI->getNumRegs());
+ RegsAlreadyMasked.resize(TRI->getNumRegs());
+ UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
+
+ bool Modified = false;
+
+ // Step 1: Enable automatic insertion of SpeculationSafeValue.
+ if (HardenLoads) {
+ LLVM_DEBUG(
+ dbgs() << "***** AArch64SpeculationHardening - automatic insertion of "
+ "SpeculationSafeValue intrinsics *****\n");
+ for (auto &MBB : MF)
+ Modified |= slhLoads(MBB);
+ }
+
+ // 2.a Add instrumentation code to function entry and exits.
+ LLVM_DEBUG(
+ dbgs()
+ << "***** AArch64SpeculationHardening - track control flow *****\n");
+
+ SmallVector<MachineBasicBlock *, 2> EntryBlocks;
+ EntryBlocks.push_back(&MF.front());
+ for (const LandingPadInfo &LPI : MF.getLandingPads())
+ EntryBlocks.push_back(LPI.LandingPadBlock);
+ for (auto Entry : EntryBlocks)
+ insertSPToRegTaintPropagation(
+ Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
+
+ // 2.b Add instrumentation code to every basic block.
+ for (auto &MBB : MF)
+ Modified |= instrumentControlFlow(MBB);
+
+ LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering "
+ "SpeculationSafeValue Pseudos *****\n");
+ // Step 3: Lower SpeculationSafeValue pseudo instructions.
+ for (auto &MBB : MF)
+ Modified |= lowerSpeculationSafeValuePseudos(MBB);
+
+ return Modified;
+}
+
+/// \brief Returns an instance of the pseudo instruction expansion pass.
+FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
+ return new AArch64SpeculationHardening();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index fc7b5984fe3e..d5643d384283 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -148,9 +148,11 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
for (auto &MI : MBB) {
if (!isNarrowFPStore(MI))
continue;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) {
+ if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
+ BaseOp->isReg()) {
+ unsigned BaseReg = BaseOp->getReg();
if (PrevBaseReg == BaseReg) {
// If this block can take STPs, skip ahead to the next block.
if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 04bb90d30d6d..dd30d25b2b50 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -14,13 +14,13 @@
#include "AArch64Subtarget.h"
#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64PBQPRegAlloc.h"
-#include "AArch64TargetMachine.h"
-
#include "AArch64CallLowering.h"
+#include "AArch64InstrInfo.h"
#include "AArch64LegalizerInfo.h"
+#include "AArch64PBQPRegAlloc.h"
#include "AArch64RegisterBankInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/GlobalValue.h"
@@ -67,16 +67,30 @@ void AArch64Subtarget::initializeProperties() {
// this in the future so we can specify it together with the subtarget
// features.
switch (ARMProcFamily) {
+ case Others:
+ break;
+ case CortexA35:
+ break;
+ case CortexA53:
+ PrefFunctionAlignment = 3;
+ break;
+ case CortexA55:
+ break;
+ case CortexA57:
+ MaxInterleaveFactor = 4;
+ PrefFunctionAlignment = 4;
+ break;
+ case CortexA72:
+ case CortexA73:
+ case CortexA75:
+ PrefFunctionAlignment = 4;
+ break;
case Cyclone:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
MaxPrefetchIterationsAhead = 3;
break;
- case CortexA57:
- MaxInterleaveFactor = 4;
- PrefFunctionAlignment = 4;
- break;
case ExynosM1:
MaxInterleaveFactor = 4;
MaxJumpTableSize = 8;
@@ -98,11 +112,6 @@ void AArch64Subtarget::initializeProperties() {
MinPrefetchStride = 2048;
MaxPrefetchIterationsAhead = 8;
break;
- case Saphira:
- MaxInterleaveFactor = 4;
- // FIXME: remove this to enable 64-bit SLP if performance looks good.
- MinVectorRegisterBitWidth = 128;
- break;
case Kryo:
MaxInterleaveFactor = 4;
VectorInsertExtractBaseCost = 2;
@@ -113,6 +122,11 @@ void AArch64Subtarget::initializeProperties() {
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
+ case Saphira:
+ MaxInterleaveFactor = 4;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ break;
case ThunderX2T99:
CacheLineSize = 64;
PrefFunctionAlignment = 3;
@@ -134,17 +148,11 @@ void AArch64Subtarget::initializeProperties() {
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
- case CortexA35: break;
- case CortexA53:
- PrefFunctionAlignment = 3;
- break;
- case CortexA55: break;
- case CortexA72:
- case CortexA73:
- case CortexA75:
+ case TSV110:
+ CacheLineSize = 64;
PrefFunctionAlignment = 4;
+ PrefLoopAlignment = 2;
break;
- case Others: break;
}
}
@@ -152,10 +160,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS),
- ReserveX18(AArch64::isX18ReservedByDefault(TT)), IsLittle(LittleEndian),
+ ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
+ CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
+ IsLittle(LittleEndian),
TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
TLInfo(TM, *this) {
+ if (AArch64::isX18ReservedByDefault(TT))
+ ReserveXRegister.set(18);
+
CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
Legalizer.reset(new AArch64LegalizerInfo(*this));
@@ -196,18 +209,22 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
return AArch64II::MO_GOT;
- unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
- : AArch64II::MO_NO_FLAG;
-
- if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
- return AArch64II::MO_GOT | Flags;
+ if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
+ if (GV->hasDLLImportStorageClass())
+ return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
+ if (getTargetTriple().isOSWindows())
+ return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
+ return AArch64II::MO_GOT;
+ }
// The small code model's direct accesses use ADRP, which cannot
// necessarily produce the value 0 (if the code is above 4GB).
- if (useSmallAddressing() && GV->hasExternalWeakLinkage())
- return AArch64II::MO_GOT | Flags;
+ // Same for the tiny code model, where we have a pc relative LDR.
+ if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
+ GV->hasExternalWeakLinkage())
+ return AArch64II::MO_GOT;
- return Flags;
+ return AArch64II::MO_NO_FLAG;
}
unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
@@ -265,7 +282,7 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
// We usually compute max call frame size after ISel. Do the computation now
// if the .mir file didn't specify it. Note that this will probably give you
// bogus values after PEI has eliminated the callframe setup/destroy pseudo
- // instructions, specify explicitely if you need it to be correct.
+ // instructions, specify explicitly if you need it to be correct.
MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.isMaxCallFrameSizeComputed())
MFI.computeMaxCallFrameSize(MF);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5af4c0dd9c19..82f7bb755951 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -56,7 +56,8 @@ public:
ThunderX,
ThunderXT81,
ThunderXT83,
- ThunderXT88
+ ThunderXT88,
+ TSV110
};
protected:
@@ -67,6 +68,7 @@ protected:
bool HasV8_2aOps = false;
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
+ bool HasV8_5aOps = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
@@ -78,8 +80,36 @@ protected:
bool HasRDM = false;
bool HasPerfMon = false;
bool HasFullFP16 = false;
+ bool HasFP16FML = false;
bool HasSPE = false;
+ // ARMv8.1 extensions
+ bool HasVH = false;
+ bool HasPAN = false;
+ bool HasLOR = false;
+
+ // ARMv8.2 extensions
+ bool HasPsUAO = false;
+ bool HasPAN_RWV = false;
+ bool HasCCPP = false;
+
+ // ARMv8.3 extensions
+ bool HasPA = false;
+ bool HasJS = false;
+ bool HasCCIDX = false;
+ bool HasComplxNum = false;
+
+ // ARMv8.4 extensions
+ bool HasNV = false;
+ bool HasRASv8_4 = false;
+ bool HasMPAM = false;
+ bool HasDIT = false;
+ bool HasTRACEV8_4 = false;
+ bool HasAM = false;
+ bool HasSEL2 = false;
+ bool HasTLB_RMI = false;
+ bool HasFMI = false;
+ bool HasRCPC_IMMO = false;
// ARMv8.4 Crypto extensions
bool HasSM4 = true;
bool HasSHA3 = true;
@@ -92,11 +122,25 @@ protected:
bool HasRCPC = false;
bool HasAggressiveFMA = false;
+ // Armv8.5-A Extensions
+ bool HasAlternativeNZCV = false;
+ bool HasFRInt3264 = false;
+ bool HasSpecRestrict = false;
+ bool HasSSBS = false;
+ bool HasSB = false;
+ bool HasPredRes = false;
+ bool HasCCDP = false;
+ bool HasBTI = false;
+ bool HasRandGen = false;
+ bool HasMTE = false;
+
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing = false;
+ bool HasZeroCycleZeroingGP = false;
+ bool HasZeroCycleZeroingFP = false;
bool HasZeroCycleZeroingFPWorkaround = false;
// StrictAlign - Disallow unaligned memory accesses.
@@ -122,10 +166,13 @@ protected:
bool HasArithmeticCbzFusion = false;
bool HasFuseAddress = false;
bool HasFuseAES = false;
+ bool HasFuseArithmeticLogic = false;
bool HasFuseCCSelect = false;
+ bool HasFuseCryptoEOR = false;
bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
+ bool Force32BitJumpTables = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@@ -137,11 +184,11 @@ protected:
unsigned MaxJumpTableSize = 0;
unsigned WideningBaseCost = 0;
- // ReserveX18 - X18 is not available as a general purpose register.
- bool ReserveX18;
+ // ReserveXRegister[i] - X#i is not available as a general purpose register.
+ BitVector ReserveXRegister;
- // ReserveX20 - X20 is not available as a general purpose register.
- bool ReserveX20 = false;
+ // CustomCallUsedXRegister[i] - X#i call saved.
+ BitVector CustomCallSavedXRegs;
bool IsLittle;
@@ -211,10 +258,13 @@ public:
bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
+ bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
- bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; }
+
+ bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; }
bool hasZeroCycleZeroingFPWorkaround() const {
return HasZeroCycleZeroingFPWorkaround;
@@ -228,8 +278,12 @@ public:
return MinVectorRegisterBitWidth;
}
- bool isX18Reserved() const { return ReserveX18; }
- bool isX20Reserved() const { return ReserveX20; }
+ bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
+ unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); }
+ bool isXRegCustomCalleeSaved(size_t i) const {
+ return CustomCallSavedXRegs[i];
+ }
+ bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
@@ -258,16 +312,20 @@ public:
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
bool hasFuseCCSelect() const { return HasFuseCCSelect; }
+ bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const {
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
- hasFuseAES() || hasFuseCCSelect() || hasFuseLiterals();
+ hasFuseAES() || hasFuseArithmeticLogic() ||
+ hasFuseCCSelect() || hasFuseLiterals();
}
bool useRSqrt() const { return UseRSqrt; }
+ bool force32BitJumpTables() const { return Force32BitJumpTables; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
@@ -291,11 +349,22 @@ public:
bool hasPerfMon() const { return HasPerfMon; }
bool hasFullFP16() const { return HasFullFP16; }
+ bool hasFP16FML() const { return HasFP16FML; }
bool hasSPE() const { return HasSPE; }
bool hasLSLFast() const { return HasLSLFast; }
bool hasSVE() const { return HasSVE; }
bool hasRCPC() const { return HasRCPC; }
bool hasAggressiveFMA() const { return HasAggressiveFMA; }
+ bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
+ bool hasFRInt3264() const { return HasFRInt3264; }
+ bool hasSpecRestrict() const { return HasSpecRestrict; }
+ bool hasSSBS() const { return HasSSBS; }
+ bool hasSB() const { return HasSB; }
+ bool hasPredRes() const { return HasPredRes; }
+ bool hasCCDP() const { return HasCCDP; }
+ bool hasBTI() const { return HasBTI; }
+ bool hasRandGen() const { return HasRandGen; }
+ bool hasMTE() const { return HasMTE; }
bool isLittleEndian() const { return IsLittle; }
@@ -312,6 +381,30 @@ public:
bool useAA() const override { return UseAA; }
+ bool hasVH() const { return HasVH; }
+ bool hasPAN() const { return HasPAN; }
+ bool hasLOR() const { return HasLOR; }
+
+ bool hasPsUAO() const { return HasPsUAO; }
+ bool hasPAN_RWV() const { return HasPAN_RWV; }
+ bool hasCCPP() const { return HasCCPP; }
+
+ bool hasPA() const { return HasPA; }
+ bool hasJS() const { return HasJS; }
+ bool hasCCIDX() const { return HasCCIDX; }
+ bool hasComplxNum() const { return HasComplxNum; }
+
+ bool hasNV() const { return HasNV; }
+ bool hasRASv8_4() const { return HasRASv8_4; }
+ bool hasMPAM() const { return HasMPAM; }
+ bool hasDIT() const { return HasDIT; }
+ bool hasTRACEV8_4() const { return HasTRACEV8_4; }
+ bool hasAM() const { return HasAM; }
+ bool hasSEL2() const { return HasSEL2; }
+ bool hasTLB_RMI() const { return HasTLB_RMI; }
+ bool hasFMI() const { return HasFMI; }
+ bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
+
bool useSmallAddressing() const {
switch (TLInfo.getTargetMachine().getCodeModel()) {
case CodeModel::Kernel:
@@ -346,6 +439,8 @@ public:
bool isCallingConvWin64(CallingConv::ID CC) const {
switch (CC) {
case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::Swift:
return isTargetWindows();
case CallingConv::Win64:
return true;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index dbc4deaf3f9f..a804fb11175b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -15,6 +15,25 @@
include "llvm/TableGen/SearchableTable.td"
//===----------------------------------------------------------------------===//
+// Features that, for the compiler, only enable system operands and PStates
+//===----------------------------------------------------------------------===//
+
+def HasCCPP : Predicate<"Subtarget->hasCCPP()">,
+ AssemblerPredicate<"FeatureCCPP", "ccpp">;
+
+def HasPAN : Predicate<"Subtarget->hasPAN()">,
+ AssemblerPredicate<"FeaturePAN",
+ "ARM v8.1 Privileged Access-Never extension">;
+
+def HasPsUAO : Predicate<"Subtarget->hasPsUAO()">,
+ AssemblerPredicate<"FeaturePsUAO",
+ "ARM v8.2 UAO PState extension (psuao)">;
+
+def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
+ AssemblerPredicate<"FeaturePAN_RWV",
+ "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
+
+//===----------------------------------------------------------------------===//
// AT (address translate) instruction options.
//===----------------------------------------------------------------------===//
@@ -45,7 +64,7 @@ def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
-let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+let Requires = [{ {AArch64::FeaturePAN_RWV} }] in {
def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
}
@@ -102,9 +121,33 @@ def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>;
def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>;
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeatureCCPP} }] in
def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>;
+let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in
+def : DC<"CVADP", 0b011, 0b0111, 0b1101, 0b001>;
+
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : DC<"IGVAC", 0b000, 0b0111, 0b0110, 0b011>;
+def : DC<"IGSW", 0b000, 0b0111, 0b0110, 0b100>;
+def : DC<"CGSW", 0b000, 0b0111, 0b1010, 0b100>;
+def : DC<"CIGSW", 0b000, 0b0111, 0b1110, 0b100>;
+def : DC<"CGVAC", 0b011, 0b0111, 0b1010, 0b011>;
+def : DC<"CGVAP", 0b011, 0b0111, 0b1100, 0b011>;
+def : DC<"CGVADP", 0b011, 0b0111, 0b1101, 0b011>;
+def : DC<"CIGVAC", 0b011, 0b0111, 0b1110, 0b011>;
+def : DC<"GVA", 0b011, 0b0111, 0b0100, 0b011>;
+def : DC<"IGDVAC", 0b000, 0b0111, 0b0110, 0b101>;
+def : DC<"IGDSW", 0b000, 0b0111, 0b0110, 0b110>;
+def : DC<"CGDSW", 0b000, 0b0111, 0b1010, 0b110>;
+def : DC<"CIGDSW", 0b000, 0b0111, 0b1110, 0b110>;
+def : DC<"CGDVAC", 0b011, 0b0111, 0b1010, 0b101>;
+def : DC<"CGDVAP", 0b011, 0b0111, 0b1100, 0b101>;
+def : DC<"CGDVADP", 0b011, 0b0111, 0b1101, 0b101>;
+def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>;
+def : DC<"GZVA", 0b011, 0b0111, 0b0100, 0b100>;
+}
+
//===----------------------------------------------------------------------===//
// IC (instruction cache maintenance) instruction options.
//===----------------------------------------------------------------------===//
@@ -154,7 +197,7 @@ class TSB<string name, bits<4> encoding> : SearchableTable{
bits<4> Encoding;
let Encoding = encoding;
- code Requires = [{ {AArch64::HasV8_4aOps} }];
+ code Requires = [{ {AArch64::FeatureTRACEV8_4} }];
}
def : TSB<"csync", 0>;
@@ -290,14 +333,21 @@ def : PState<"SPSel", 0b00101>;
def : PState<"DAIFSet", 0b11110>;
def : PState<"DAIFClr", 0b11111>;
// v8.1a "Privileged Access Never" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
def : PState<"PAN", 0b00100>;
+
// v8.2a "User Access Override" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
def : PState<"UAO", 0b00011>;
// v8.4a timining insensitivity of data processing instructions
-let Requires = [{ {AArch64::HasV8_4aOps} }] in
+let Requires = [{ {AArch64::FeatureDIT} }] in
def : PState<"DIT", 0b11010>;
+// v8.5a Spectre Mitigation
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : PState<"SSBS", 0b11001>;
+// v8.5a Memory Tagging Extension
+let Requires = [{ {AArch64::FeatureMTE} }] in
+def : PState<"TCO", 0b11100>;
//===----------------------------------------------------------------------===//
// PSB instruction options.
@@ -315,6 +365,23 @@ class PSB<string name, bits<5> encoding> : SearchableTable {
def : PSB<"csync", 0x11>;
//===----------------------------------------------------------------------===//
+// BTI instruction options.
+//===----------------------------------------------------------------------===//
+
+class BTI<string name, bits<2> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<2> Encoding;
+ let Encoding = encoding;
+}
+
+def : BTI<"c", 0b01>;
+def : BTI<"j", 0b10>;
+def : BTI<"jc", 0b11>;
+
+//===----------------------------------------------------------------------===//
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
@@ -366,8 +433,9 @@ def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
+// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
+let Requires = [{ {AArch64::FeatureTLB_RMI} }] in {
// Armv8.4-A Outer Sharable TLB Maintenance instructions:
-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
// op1 CRn CRm op2
def : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
def : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
@@ -418,6 +486,23 @@ def : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
def : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
def : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
def : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
+} //FeatureTLB_RMI
+
+// Armv8.5-A Prediction Restriction by Context instruction options:
+class PRCTX<string name, bits<4> crm> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<11> Encoding;
+ let Encoding{10-4} = 0b0110111;
+ let Encoding{3-0} = crm;
+ bit NeedsReg = 1;
+ code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeaturePredRes} }] in {
+def : PRCTX<"RCTX", 0b0011>;
}
//===----------------------------------------------------------------------===//
@@ -476,8 +561,10 @@ def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>;
def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>;
def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>;
def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>;
+
+//v8.3 CCIDX - extending the CCsIDr number of sets
def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> {
- let Requires = [{ {AArch64::HasV8_3aOps} }];
+ let Requires = [{ {AArch64::FeatureCCIDX} }];
}
def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>;
def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>;
@@ -487,6 +574,9 @@ def : ROSysReg<"AIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b111>;
def : ROSysReg<"DCZID_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b111>;
def : ROSysReg<"ID_PFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b000>;
def : ROSysReg<"ID_PFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b001>;
+def : ROSysReg<"ID_PFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b100> {
+ let Requires = [{ {AArch64::FeatureSpecRestrict} }];
+}
def : ROSysReg<"ID_DFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b010>;
def : ROSysReg<"ID_AFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b011>;
def : ROSysReg<"ID_MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b100>;
@@ -512,9 +602,7 @@ def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>;
def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>;
def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>;
def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>;
-def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010> {
- let Requires = [{ {AArch64::HasV8_2aOps} }];
-}
+def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010>;
def : ROSysReg<"MVFR0_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b000>;
def : ROSysReg<"MVFR1_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b001>;
def : ROSysReg<"MVFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b010>;
@@ -584,7 +672,7 @@ def : ROSysReg<"ID_AA64ZFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b100>;
// v8.1a "Limited Ordering Regions" extension-specific system register
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeatureLOR} }] in
def : ROSysReg<"LORID_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b111>;
// v8.2a "RAS extension" registers
@@ -594,6 +682,22 @@ def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>;
def : ROSysReg<"ERXFR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b000>;
}
+// v8.5a "random number" registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureRandGen} }] in {
+def : ROSysReg<"RNDR", 0b11, 0b011, 0b0010, 0b0100, 0b000>;
+def : ROSysReg<"RNDRRS", 0b11, 0b011, 0b0010, 0b0100, 0b001>;
+}
+
+// v8.5a Software Context Number registers
+let Requires = [{ {AArch64::FeatureSpecRestrict} }] in {
+def : RWSysReg<"SCXTNUM_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b111>;
+}
+
//===----------------------
// Write-only regs
//===----------------------
@@ -1102,21 +1206,21 @@ def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>;
def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>;
// v8.1a "Privileged Access Never" extension-specific system registers
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;
// v8.1a "Limited Ordering Regions" extension-specific system registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureLOR} }] in {
def : RWSysReg<"LORSA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b000>;
def : RWSysReg<"LOREA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b001>;
def : RWSysReg<"LORN_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b010>;
def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>;
}
-// v8.1a "Virtualization hos extensions" system registers
+// v8.1a "Virtualization Host extensions" system registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureVH} }] in {
def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>;
def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>;
@@ -1147,7 +1251,7 @@ def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>;
}
// v8.2a registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>;
// v8.2a "Statistical Profiling extension" registers
@@ -1184,7 +1288,7 @@ def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>;
// v8.3a "Pointer authentication extension" registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_3aOps} }] in {
+let Requires = [{ {AArch64::FeaturePA} }] in {
def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@@ -1197,8 +1301,8 @@ def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>;
def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
}
-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
-
+// v8.4 "Secure Exception Level 2 extension"
+let Requires = [{ {AArch64::FeatureSEL2} }] in {
// v8.4a "Virtualization secure second stage translation" registers
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
@@ -1216,18 +1320,22 @@ def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>;
// v8.4a "Virtualization debug state" registers
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
+} // FeatureSEL2
// v8.4a RAS registers
-// Op0 Op1 CRn CRm Op2
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureRASv8_4} }] in {
def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>;
def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>;
def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>;
def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>;
+} // FeatureRASv8_4
// v8.4a MPAM registers
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureMPAM} }] in {
def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>;
def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>;
def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>;
@@ -1244,9 +1352,11 @@ def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
+} //FeatureMPAM
-// v8.4a Activitiy monitor registers
+// v8.4a Activitiy Monitor registers
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureAM} }] in {
def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>;
def : ROSysReg<"AMCFGR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b001>;
def : ROSysReg<"AMCGCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b010>;
@@ -1295,6 +1405,7 @@ def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>;
def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>;
def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>;
def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
+} //FeatureAM
// v8.4a Trace Extension registers
//
@@ -1303,19 +1414,24 @@ def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
// but they are already defined above.
//
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in {
def : RWSysReg<"TRFCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b001>;
def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>;
def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>;
+} //FeatureTRACEV8_4
// v8.4a Timining insensitivity of data processing instructions
+// DIT: Data Independent Timing instructions
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureDIT} }] in {
def : RWSysReg<"DIT", 0b11, 0b011, 0b0100, 0b0010, 0b101>;
+} //FeatureDIT
// v8.4a Enhanced Support for Nested Virtualization
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureNV} }] in {
def : RWSysReg<"VNCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b000>;
-
-} // HasV8_4aOps
+} //FeatureNV
// SVE control registers
// Op0 Op1 CRn CRm Op2
@@ -1326,6 +1442,24 @@ def : RWSysReg<"ZCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b000>;
def : RWSysReg<"ZCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b000>;
}
+// V8.5a Spectre mitigation SSBS register
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : RWSysReg<"SSBS", 0b11, 0b011, 0b0100, 0b0010, 0b110>;
+
+// v8.5a Memory Tagging Extension
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : RWSysReg<"TCO", 0b11, 0b011, 0b0100, 0b0010, 0b111>;
+def : RWSysReg<"GCR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b110>;
+def : RWSysReg<"RGSR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b101>;
+def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0110, 0b0101, 0b000>;
+def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0110, 0b0101, 0b000>;
+def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0110, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0110, 0b0110, 0b000>;
+def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0110, 0b0110, 0b001>;
+} // HasMTE
+
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::ProcCyclone} }] in
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 120d71381c67..4e016525f7e4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
@@ -123,6 +124,10 @@ static cl::opt<bool>
BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
cl::desc("Relax out of range conditional branches"));
+static cl::opt<bool> EnableCompressJumpTables(
+ "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Use smallest entry possible for jump tables"));
+
// FIXME: Unify control over GlobalMerge.
static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
@@ -141,6 +146,11 @@ static cl::opt<int> EnableGlobalISelAtO(
static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
+ cl::desc("Enable the AAcrh64 branch target pass"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -151,19 +161,23 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeAArch64A53Fix835769Pass(*PR);
initializeAArch64A57FPLoadBalancingPass(*PR);
initializeAArch64AdvSIMDScalarPass(*PR);
+ initializeAArch64BranchTargetsPass(*PR);
initializeAArch64CollectLOHPass(*PR);
+ initializeAArch64CompressJumpTablesPass(*PR);
initializeAArch64ConditionalComparesPass(*PR);
initializeAArch64ConditionOptimizerPass(*PR);
initializeAArch64DeadRegisterDefinitionsPass(*PR);
initializeAArch64ExpandPseudoPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
+ initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
initializeFalkorHWPFFixPass(*PR);
initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);
+ initializeAArch64SpeculationHardeningPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -206,18 +220,20 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
- Optional<CodeModel::Model> CM,
- bool JIT) {
+static CodeModel::Model
+getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM,
+ bool JIT) {
if (CM) {
- if (*CM != CodeModel::Small && *CM != CodeModel::Large) {
+ if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
+ *CM != CodeModel::Large) {
if (!TT.isOSFuchsia())
report_fatal_error(
- "Only small and large code models are allowed on AArch64");
- else if (CM != CodeModel::Kernel)
- report_fatal_error(
- "Only small, kernel, and large code models are allowed on AArch64");
- }
+ "Only small, tiny and large code models are allowed on AArch64");
+ else if (*CM != CodeModel::Kernel)
+ report_fatal_error("Only small, tiny, kernel, and large code models "
+ "are allowed on AArch64");
+ } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
+ report_fatal_error("tiny code model is only supported on ELF");
return *CM;
}
// The default MCJIT memory managers make no guarantees about where they can
@@ -240,7 +256,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T,
computeDataLayout(TT, Options.MCOptions, LittleEndian),
TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(TT, CM, JIT), OL),
+ getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
initAsmInfo();
@@ -249,9 +265,21 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
this->Options.NoTrapAfterNoreturn = true;
}
+ if (getMCAsmInfo()->usesWindowsCFI()) {
+ // Unwinding can get confused if the last instruction in an
+ // exception-handling region (function, funclet, try block, etc.)
+ // is a call.
+ //
+ // FIXME: We could elide the trap if the next instruction would be in
+ // the same region anyway.
+ this->Options.TrapUnreachable = true;
+ }
+
// Enable GlobalISel at or below EnableGlobalISelAt0.
- if (getOptLevel() <= EnableGlobalISelAtO)
+ if (getOptLevel() <= EnableGlobalISelAtO) {
setGlobalISel(true);
+ setGlobalISelAbort(GlobalISelAbortMode::Disable);
+ }
// AArch64 supports the MachineOutliner.
setMachineOutliner(true);
@@ -346,6 +374,7 @@ public:
bool addPreISel() override;
bool addInstSelector() override;
bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
@@ -393,8 +422,10 @@ void AArch64PassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
// Match interleaved memory accesses to ldN/stN intrinsics.
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ addPass(createInterleavedLoadCombinePass());
addPass(createInterleavedAccessPass());
+ }
if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
@@ -447,6 +478,10 @@ bool AArch64PassConfig::addIRTranslator() {
return false;
}
+void AArch64PassConfig::addPreLegalizeMachineIR() {
+ addPass(createAArch64PreLegalizeCombiner());
+}
+
bool AArch64PassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
@@ -516,12 +551,28 @@ void AArch64PassConfig::addPreSched2() {
if (TM->getOptLevel() != CodeGenOpt::None) {
if (EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
+ }
+
+ // The AArch64SpeculationHardeningPass destroys dominator tree and natural
+ // loop info, which is needed for the FalkorHWPFFixPass and also later on.
+ // Therefore, run the AArch64SpeculationHardeningPass before the
+ // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
+ // info.
+ addPass(createAArch64SpeculationHardeningPass());
+
+ if (TM->getOptLevel() != CodeGenOpt::None) {
if (EnableFalkorHWPFFix)
addPass(createFalkorHWPFFixPass());
}
}
void AArch64PassConfig::addPreEmitPass() {
+ // Machine Block Placement might have created new opportunities when run
+ // at O3, where the Tail Duplication Threshold is set to 4 instructions.
+ // Run the load/store optimizer once more.
+ if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
+ addPass(createAArch64LoadStoreOptimizationPass());
+
if (EnableA53Fix835769)
addPass(createAArch64A53Fix835769());
// Relax conditional branch instructions if they're otherwise out of
@@ -529,6 +580,12 @@ void AArch64PassConfig::addPreEmitPass() {
if (BranchRelaxation)
addPass(&BranchRelaxationPassID);
+ if (EnableBranchTargets)
+ addPass(createAArch64BranchTargetsPass());
+
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
+ addPass(createAArch64CompressJumpTablesPass());
+
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 4bc2c060a068..8ae72a7ddb57 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -22,6 +22,9 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
+ // AARCH64 ELF ABI does not define static relocation type for TLS offset
+ // within a module. Do not generate AT_location for TLS variables.
+ SupportDebugThreadLocalLocation = false;
}
AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 96e751e86971..a256cb7c9215 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -659,11 +659,14 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
- if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ if (!UseMaskForCond && !UseMaskForGaps &&
+ Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
@@ -676,7 +679,8 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
}
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
@@ -945,9 +949,20 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- if (Kind == TTI::SK_Transpose || Kind == TTI::SK_Select ||
- Kind == TTI::SK_PermuteSingleSrc) {
+ if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
+ Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
static const CostTblEntry ShuffleTbl[] = {
+ // Broadcast shuffle kinds can be performed with 'dup'.
+ { TTI::SK_Broadcast, MVT::v8i8, 1 },
+ { TTI::SK_Broadcast, MVT::v16i8, 1 },
+ { TTI::SK_Broadcast, MVT::v4i16, 1 },
+ { TTI::SK_Broadcast, MVT::v8i16, 1 },
+ { TTI::SK_Broadcast, MVT::v2i32, 1 },
+ { TTI::SK_Broadcast, MVT::v4i32, 1 },
+ { TTI::SK_Broadcast, MVT::v2i64, 1 },
+ { TTI::SK_Broadcast, MVT::v2f32, 1 },
+ { TTI::SK_Broadcast, MVT::v4f32, 1 },
+ { TTI::SK_Broadcast, MVT::v2f64, 1 },
// Transpose shuffle kinds can be performed with 'trn1/trn2' and
// 'zip1/zip2' instructions.
{ TTI::SK_Transpose, MVT::v8i8, 1 },
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c056a7d2428b..08c1a8924220 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -146,7 +146,9 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
bool
shouldConsiderAddressTypePromotion(const Instruction &I,
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 30a9a08f2346..6cc9b67e4d27 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -164,6 +165,7 @@ private:
OperandVector &Operands);
bool parseDirectiveArch(SMLoc L);
+ bool parseDirectiveArchExtension(SMLoc L);
bool parseDirectiveCPU(SMLoc L);
bool parseDirectiveInst(SMLoc L);
@@ -174,6 +176,8 @@ private:
bool parseDirectiveReq(StringRef Name, SMLoc L);
bool parseDirectiveUnreq(SMLoc L);
+ bool parseDirectiveCFINegateRAState();
+ bool parseDirectiveCFIBKeyFrame();
bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
SmallVectorImpl<SMLoc> &Loc);
@@ -200,6 +204,7 @@ private:
template <bool IsSVEPrefetch = false>
OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
OperandMatchResultTy tryParsePSBHint(OperandVector &Operands);
+ OperandMatchResultTy tryParseBTIHint(OperandVector &Operands);
OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
template<bool AddFPZeroAsLiteral>
@@ -282,6 +287,7 @@ private:
k_FPImm,
k_Barrier,
k_PSBHint,
+ k_BTIHint,
} Kind;
SMLoc StartLoc, EndLoc;
@@ -385,6 +391,12 @@ private:
unsigned Val;
};
+ struct BTIHintOp {
+ const char *Data;
+ unsigned Length;
+ unsigned Val;
+ };
+
struct ExtendOp {
unsigned Val;
};
@@ -403,6 +415,7 @@ private:
struct SysCRImmOp SysCRImm;
struct PrefetchOp Prefetch;
struct PSBHintOp PSBHint;
+ struct BTIHintOp BTIHint;
struct ShiftExtendOp ShiftExtend;
};
@@ -457,6 +470,9 @@ public:
case k_PSBHint:
PSBHint = o.PSBHint;
break;
+ case k_BTIHint:
+ BTIHint = o.BTIHint;
+ break;
case k_ShiftExtend:
ShiftExtend = o.ShiftExtend;
break;
@@ -568,6 +584,16 @@ public:
return StringRef(PSBHint.Data, PSBHint.Length);
}
+ unsigned getBTIHint() const {
+ assert(Kind == k_BTIHint && "Invalid access!");
+ return BTIHint.Val;
+ }
+
+ StringRef getBTIHintName() const {
+ assert(Kind == k_BTIHint && "Invalid access!");
+ return StringRef(BTIHint.Data, BTIHint.Length);
+ }
+
StringRef getPrefetchName() const {
assert(Kind == k_Prefetch && "Invalid access!");
return StringRef(Prefetch.Data, Prefetch.Length);
@@ -658,7 +684,7 @@ public:
return DiagnosticPredicateTy::NearMatch;
}
- bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const {
+ bool isSymbolicUImm12Offset(const MCExpr *Expr) const {
AArch64MCExpr::VariantKind ELFRefKind;
MCSymbolRefExpr::VariantKind DarwinRefKind;
int64_t Addend;
@@ -683,7 +709,7 @@ public:
// Note that we don't range-check the addend. It's adjusted modulo page
// size when converted, so there is no "out of range" condition when using
// @pageoff.
- return Addend >= 0 && (Addend % Scale) == 0;
+ return true;
} else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF ||
DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) {
// @gotpageoff/@tlvppageoff can only be used directly, not with an addend.
@@ -699,7 +725,7 @@ public:
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE)
- return isSymbolicUImm12Offset(getImm(), Scale);
+ return isSymbolicUImm12Offset(getImm());
int64_t Val = MCE->getValue();
return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
@@ -901,7 +927,7 @@ public:
for (unsigned i = 0; i != AllowedModifiers.size(); ++i) {
if (ELFRefKind == AllowedModifiers[i])
- return Addend == 0;
+ return true;
}
return false;
@@ -996,7 +1022,8 @@ public:
if (!isSysReg()) return false;
return (SysReg.PStateField == AArch64PState::PAN ||
SysReg.PStateField == AArch64PState::DIT ||
- SysReg.PStateField == AArch64PState::UAO);
+ SysReg.PStateField == AArch64PState::UAO ||
+ SysReg.PStateField == AArch64PState::SSBS);
}
bool isSystemPStateFieldWithImm0_15() const {
@@ -1185,6 +1212,7 @@ public:
bool isSysCR() const { return Kind == k_SysCR; }
bool isPrefetch() const { return Kind == k_Prefetch; }
bool isPSBHint() const { return Kind == k_PSBHint; }
+ bool isBTIHint() const { return Kind == k_BTIHint; }
bool isShiftExtend() const { return Kind == k_ShiftExtend; }
bool isShifter() const {
if (!isShiftExtend())
@@ -1702,6 +1730,11 @@ public:
Inst.addOperand(MCOperand::createImm(getPSBHint()));
}
+ void addBTIHintOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getBTIHint()));
+ }
+
void addShifterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
unsigned Imm =
@@ -1950,6 +1983,19 @@ public:
return Op;
}
+ static std::unique_ptr<AArch64Operand> CreateBTIHint(unsigned Val,
+ StringRef Str,
+ SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_BTIHint, Ctx);
+ Op->BTIHint.Val = Val << 1 | 32;
+ Op->BTIHint.Data = Str.data();
+ Op->BTIHint.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<AArch64Operand>
CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@@ -2030,6 +2076,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
if (!getShiftExtendAmount() && !hasShiftExtendAmount())
break;
LLVM_FALLTHROUGH;
+ case k_BTIHint:
+ OS << getBTIHintName();
+ break;
case k_ShiftExtend:
OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
<< getShiftExtendAmount();
@@ -2395,6 +2444,29 @@ AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
return MatchOperand_Success;
}
+/// tryParseBTIHint - Try to parse a BTI operand, mapped to Hint command
+OperandMatchResultTy
+AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ auto BTI = AArch64BTIHint::lookupBTIByName(Tok.getString());
+ if (!BTI) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(AArch64Operand::CreateBTIHint(
+ BTI->Encoding, Tok.getString(), S, getContext()));
+ return MatchOperand_Success;
+}
+
/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
/// instruction.
OperandMatchResultTy
@@ -2453,17 +2525,34 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Expr;
- const AsmToken &Tok = getParser().getTok();
- if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
- if (getParser().parseExpression(Expr))
- return MatchOperand_ParseFail;
+ // Leave anything with a bracket to the default for SVE
+ if (getParser().getTok().is(AsmToken::LBrac))
+ return MatchOperand_NoMatch;
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
+ if (getParser().getTok().is(AsmToken::Hash))
+ getParser().Lex(); // Eat hash token.
- return MatchOperand_Success;
+ if (parseSymbolicImmVal(Expr))
+ return MatchOperand_ParseFail;
+
+ AArch64MCExpr::VariantKind ELFRefKind;
+ MCSymbolRefExpr::VariantKind DarwinRefKind;
+ int64_t Addend;
+ if (classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
+ if (DarwinRefKind == MCSymbolRefExpr::VK_None &&
+ ELFRefKind == AArch64MCExpr::VK_INVALID) {
+ // No modifier was specified at all; this is the syntax for an ELF basic
+ // ADR relocation (unfortunately).
+ Expr = AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS, getContext());
+ } else {
+ Error(S, "unexpected adr label");
+ return MatchOperand_ParseFail;
+ }
}
- return MatchOperand_NoMatch;
+
+ SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
+ return MatchOperand_Success;
}
/// tryParseFPImm - A floating point immediate expression operand.
@@ -2723,6 +2812,34 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
return MatchOperand_Success;
}
+static const struct Extension {
+ const char *Name;
+ const FeatureBitset Features;
+} ExtensionMap[] = {
+ {"crc", {AArch64::FeatureCRC}},
+ {"sm4", {AArch64::FeatureSM4}},
+ {"sha3", {AArch64::FeatureSHA3}},
+ {"sha2", {AArch64::FeatureSHA2}},
+ {"aes", {AArch64::FeatureAES}},
+ {"crypto", {AArch64::FeatureCrypto}},
+ {"fp", {AArch64::FeatureFPARMv8}},
+ {"simd", {AArch64::FeatureNEON}},
+ {"ras", {AArch64::FeatureRAS}},
+ {"lse", {AArch64::FeatureLSE}},
+ {"predres", {AArch64::FeaturePredRes}},
+ {"ccdp", {AArch64::FeatureCacheDeepPersist}},
+ {"mte", {AArch64::FeatureMTE}},
+ {"tlb-rmi", {AArch64::FeatureTLB_RMI}},
+ {"pan-rwv", {AArch64::FeaturePAN_RWV}},
+ {"ccpp", {AArch64::FeatureCCPP}},
+ {"sve", {AArch64::FeatureSVE}},
+ // FIXME: Unsupported extensions
+ {"pan", {}},
+ {"lor", {}},
+ {"rdma", {}},
+ {"profile", {}},
+};
+
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
if (FBS[AArch64::HasV8_1aOps])
Str += "ARMv8.1a";
@@ -2732,8 +2849,18 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.3a";
else if (FBS[AArch64::HasV8_4aOps])
Str += "ARMv8.4a";
- else
- Str += "(unknown)";
+ else if (FBS[AArch64::HasV8_5aOps])
+ Str += "ARMv8.5a";
+ else {
+ auto ext = std::find_if(std::begin(ExtensionMap),
+ std::end(ExtensionMap),
+ [&](const Extension& e)
+ // Use & in case multiple features are enabled
+ { return (FBS & e.Features) != FeatureBitset(); }
+ );
+
+ Str += ext != std::end(ExtensionMap) ? ext->Name : "(unknown)";
+ }
}
void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands,
@@ -2812,6 +2939,23 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
return TokError(Str.c_str());
}
createSysAlias(TLBI->Encoding, Operands, S);
+ } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp") {
+ const AArch64PRCTX::PRCTX *PRCTX = AArch64PRCTX::lookupPRCTXByName(Op);
+ if (!PRCTX)
+ return TokError("invalid operand for prediction restriction instruction");
+ else if (!PRCTX->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str(
+ Mnemonic.upper() + std::string(PRCTX->Name) + " requires ");
+ setRequiredFeatureString(PRCTX->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
+ }
+ uint16_t PRCTX_Op2 =
+ Mnemonic == "cfp" ? 4 :
+ Mnemonic == "dvp" ? 5 :
+ Mnemonic == "cpp" ? 7 :
+ 0;
+ assert(PRCTX_Op2 && "Invalid mnemonic for prediction restriction instruction");
+ createSysAlias(PRCTX->Encoding << 3 | PRCTX_Op2 , Operands, S);
}
Parser.Lex(); // Eat operand.
@@ -3630,8 +3774,10 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction.
- if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi")
+ // IC, DC, AT, TLBI and Prediction invalidation instructions are aliases for
+ // the SYS instruction.
+ if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi" ||
+ Head == "cfp" || Head == "dvp" || Head == "cpp")
return parseSysAlias(Head, NameLoc, Operands);
Operands.push_back(
@@ -3685,13 +3831,9 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- // Read the first operand.
- if (parseOperand(Operands, false, false)) {
- return true;
- }
- unsigned N = 2;
- while (parseOptionalToken(AsmToken::Comma)) {
+ unsigned N = 1;
+ do {
// Parse and remember the operand.
if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) ||
(N == 3 && condCodeThirdOperand) ||
@@ -3719,7 +3861,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
AArch64Operand::CreateToken("!", false, ELoc, getContext()));
++N;
- }
+ } while (parseOptionalToken(AsmToken::Comma));
}
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
@@ -3956,6 +4098,15 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
"unpredictable STXP instruction, status is also a source");
break;
}
+ case AArch64::LDGV: {
+ unsigned Rt = Inst.getOperand(0).getReg();
+ unsigned Rn = Inst.getOperand(1).getReg();
+ if (RI->isSubRegisterEq(Rt, Rn)) {
+ return Error(Loc[0],
+ "unpredictable LDGV instruction, writeback register is also "
+ "the target register");
+ }
+ }
}
@@ -4090,6 +4241,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "index must be an integer in range [-128, 127].");
case Match_InvalidMemoryIndexedSImm9:
return Error(Loc, "index must be an integer in range [-256, 255].");
+ case Match_InvalidMemoryIndexed16SImm9:
+ return Error(Loc, "index must be a multiple of 16 in range [-4096, 4080].");
case Match_InvalidMemoryIndexed8SImm10:
return Error(Loc, "index must be a multiple of 8 in range [-4096, 4088].");
case Match_InvalidMemoryIndexed4SImm7:
@@ -4106,6 +4259,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "index must be a multiple of 2 in range [0, 62].");
case Match_InvalidMemoryIndexed8UImm6:
return Error(Loc, "index must be a multiple of 8 in range [0, 504].");
+ case Match_InvalidMemoryIndexed16UImm6:
+ return Error(Loc, "index must be a multiple of 16 in range [0, 1008].");
case Match_InvalidMemoryIndexed4UImm6:
return Error(Loc, "index must be a multiple of 4 in range [0, 252].");
case Match_InvalidMemoryIndexed2UImm6:
@@ -4742,10 +4897,12 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexed2UImm6:
case Match_InvalidMemoryIndexed4UImm6:
case Match_InvalidMemoryIndexed8UImm6:
+ case Match_InvalidMemoryIndexed16UImm6:
case Match_InvalidMemoryIndexedSImm6:
case Match_InvalidMemoryIndexedSImm5:
case Match_InvalidMemoryIndexedSImm8:
case Match_InvalidMemoryIndexedSImm9:
+ case Match_InvalidMemoryIndexed16SImm9:
case Match_InvalidMemoryIndexed8SImm10:
case Match_InvalidImm0_1:
case Match_InvalidImm0_7:
@@ -4874,6 +5031,12 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveUnreq(Loc);
else if (IDVal == ".inst")
parseDirectiveInst(Loc);
+ else if (IDVal == ".cfi_negate_ra_state")
+ parseDirectiveCFINegateRAState();
+ else if (IDVal == ".cfi_b_key_frame")
+ parseDirectiveCFIBKeyFrame();
+ else if (IDVal == ".arch_extension")
+ parseDirectiveArchExtension(Loc);
else if (IsMachO) {
if (IDVal == MCLOHDirectiveName())
parseDirectiveLOH(IDVal, Loc);
@@ -4884,28 +5047,6 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
}
-static const struct {
- const char *Name;
- const FeatureBitset Features;
-} ExtensionMap[] = {
- { "crc", {AArch64::FeatureCRC} },
- { "sm4", {AArch64::FeatureSM4} },
- { "sha3", {AArch64::FeatureSHA3} },
- { "sha2", {AArch64::FeatureSHA2} },
- { "aes", {AArch64::FeatureAES} },
- { "crypto", {AArch64::FeatureCrypto} },
- { "fp", {AArch64::FeatureFPARMv8} },
- { "simd", {AArch64::FeatureNEON} },
- { "ras", {AArch64::FeatureRAS} },
- { "lse", {AArch64::FeatureLSE} },
-
- // FIXME: Unsupported extensions
- { "pan", {} },
- { "lor", {} },
- { "rdma", {} },
- { "profile", {} },
-};
-
static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
SmallVector<StringRef, 4> &RequestedExtensions) {
const bool NoCrypto =
@@ -4927,6 +5068,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
RequestedExtensions.push_back("aes");
break;
case AArch64::ArchKind::ARMV8_4A:
+ case AArch64::ArchKind::ARMV8_5A:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
RequestedExtensions.push_back("sha2");
@@ -4945,6 +5087,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
RequestedExtensions.push_back("noaes");
break;
case AArch64::ArchKind::ARMV8_4A:
+ case AArch64::ArchKind::ARMV8_5A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");
@@ -5014,6 +5157,50 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
return false;
}
+/// parseDirectiveArchExtension
+/// ::= .arch_extension [no]feature
+bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(getLexer().getLoc(), "expected architecture extension name");
+
+ const AsmToken &Tok = Parser.getTok();
+ StringRef Name = Tok.getString();
+ SMLoc ExtLoc = Tok.getLoc();
+ Lex();
+
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.arch_extension' directive"))
+ return true;
+
+ bool EnableFeature = true;
+ if (Name.startswith_lower("no")) {
+ EnableFeature = false;
+ Name = Name.substr(2);
+ }
+
+ MCSubtargetInfo &STI = copySTI();
+ FeatureBitset Features = STI.getFeatureBits();
+ for (const auto &Extension : ExtensionMap) {
+ if (Extension.Name != Name)
+ continue;
+
+ if (Extension.Features.none())
+ return Error(ExtLoc, "unsupported architectural extension: " + Name);
+
+ FeatureBitset ToggleFeatures = EnableFeature
+ ? (~Features & Extension.Features)
+ : (Features & Extension.Features);
+ uint64_t Features =
+ ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+ setAvailableFeatures(Features);
+ return false;
+ }
+
+ return Error(ExtLoc, "unknown architectural extension: " + Name);
+}
+
static SMLoc incrementLoc(SMLoc L, int Offset) {
return SMLoc::getFromPointer(L.getPointer() + Offset);
}
@@ -5267,6 +5454,23 @@ bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
return false;
}
+bool AArch64AsmParser::parseDirectiveCFINegateRAState() {
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ return true;
+ getStreamer().EmitCFINegateRAState();
+ return false;
+}
+
+/// parseDirectiveCFIBKeyFrame
+/// ::= .cfi_b_key
+bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.cfi_b_key_frame'"))
+ return true;
+ getStreamer().EmitCFIBKeyFrame();
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
@@ -5288,28 +5492,20 @@ AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
return true;
}
- const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
- if (!BE)
- return false;
-
- SE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
- if (!SE)
- return false;
- DarwinRefKind = SE->getKind();
-
- if (BE->getOpcode() != MCBinaryExpr::Add &&
- BE->getOpcode() != MCBinaryExpr::Sub)
+ // Check that it looks like a symbol + an addend
+ MCValue Res;
+ bool Relocatable = Expr->evaluateAsRelocatable(Res, nullptr, nullptr);
+ if (!Relocatable || Res.getSymB())
return false;
- // See if the addend is a constant, otherwise there's more going
- // on here than we can deal with.
- auto AddendExpr = dyn_cast<MCConstantExpr>(BE->getRHS());
- if (!AddendExpr)
+ // Treat expressions with an ELFRefKind (like ":abs_g1:3", or
+ // ":abs_g1:x" where x is constant) as symbolic even if there is no symbol.
+ if (!Res.getSymA() && ELFRefKind == AArch64MCExpr::VK_INVALID)
return false;
- Addend = AddendExpr->getValue();
- if (BE->getOpcode() == MCBinaryExpr::Sub)
- Addend = -Addend;
+ if (Res.getSymA())
+ DarwinRefKind = Res.getSymA()->getKind();
+ Addend = Res.getConstant();
// It's some symbol reference + a constant addend, but really
// shouldn't use both Darwin and ELF syntax.
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index cef0ff346448..4102f1eb5cc1 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -12,7 +12,6 @@
#include "AArch64Disassembler.h"
#include "AArch64ExternalSymbolizer.h"
-#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
@@ -20,6 +19,8 @@
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -159,8 +160,8 @@ static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
const void *Decoder);
static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBaseAddSubImm(MCInst &Inst, uint32_t insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
@@ -219,6 +220,11 @@ static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
+ uint32_t insn,
+ uint64_t address,
+ const void* Decoder);
+
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
case MCDisassembler::Success:
@@ -1402,6 +1408,8 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
case AArch64::STPSpost:
case AArch64::LDPSpre:
case AArch64::STPSpre:
+ case AArch64::STGPpre:
+ case AArch64::STGPpost:
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
break;
}
@@ -1415,6 +1423,8 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
case AArch64::LDPXpre:
case AArch64::STPXpre:
case AArch64::LDPSWpre:
+ case AArch64::STGPpre:
+ case AArch64::STGPpost:
NeedsDisjointWritebackTransfer = true;
LLVM_FALLTHROUGH;
case AArch64::LDNPXi:
@@ -1422,6 +1432,7 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
case AArch64::LDPXi:
case AArch64::STPXi:
case AArch64::LDPSWi:
+ case AArch64::STGPi:
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
break;
@@ -1652,8 +1663,8 @@ static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeBaseAddSubImm(MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
+static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
+ uint64_t Addr, const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned Imm = fieldFromInstruction(insn, 10, 14);
@@ -1711,11 +1722,17 @@ static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn,
uint64_t op1 = fieldFromInstruction(insn, 16, 3);
uint64_t op2 = fieldFromInstruction(insn, 5, 3);
uint64_t crm = fieldFromInstruction(insn, 8, 4);
-
uint64_t pstate_field = (op1 << 3) | op2;
+ switch (pstate_field) {
+ case 0x01: // XAFlag
+ case 0x02: // AXFlag
+ return Fail;
+ }
+
if ((pstate_field == AArch64PState::PAN ||
- pstate_field == AArch64PState::UAO) && crm > 1)
+ pstate_field == AArch64PState::UAO ||
+ pstate_field == AArch64PState::SSBS) && crm > 1)
return Fail;
Inst.addOperand(MCOperand::createImm(pstate_field));
@@ -1835,3 +1852,25 @@ static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
Inst.addOperand(MCOperand::createImm(Imm + 1));
return Success;
}
+
+static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
+ uint32_t insn,
+ uint64_t address,
+ const void* Decoder) {
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned Rt = fieldFromInstruction(insn, 0, 5);
+
+ // Outputs
+ DecodeGPR64spRegisterClass(Inst, Rn, address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt, address, Decoder);
+
+ // Input (Rn again)
+ Inst.addOperand(Inst.getOperand(0));
+
+ //Do this post decode since the raw number for xzr and sp is the same
+ if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
+ return SoftFail;
+ } else {
+ return Success;
+ }
+}
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 6e64fc9347b9..342655a29b1d 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
#include "AArch64ExternalSymbolizer.h"
-#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -60,6 +60,8 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
bool IsBranch, uint64_t Offset, uint64_t InstSize) {
+ if (!SymbolLookUp)
+ return false;
// FIXME: This method shares a lot of code with
// MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
// refactor the MCExternalSymbolizer interface to allow more of this
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 26e41215afc6..dcf2dd251149 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -775,8 +775,33 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
if (CnVal == 7) {
switch (CmVal) {
default: return false;
+ // Maybe IC, maybe Prediction Restriction
+ case 1:
+ switch (Op1Val) {
+ default: return false;
+ case 0: goto Search_IC;
+ case 3: goto Search_PRCTX;
+ }
+ // Prediction Restriction aliases
+ case 3: {
+ Search_PRCTX:
+ const AArch64PRCTX::PRCTX *PRCTX = AArch64PRCTX::lookupPRCTXByEncoding(Encoding >> 3);
+ if (!PRCTX || !PRCTX->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = PRCTX->NeedsReg;
+ switch (Op2Val) {
+ default: return false;
+ case 4: Ins = "cfp\t"; break;
+ case 5: Ins = "dvp\t"; break;
+ case 7: Ins = "cpp\t"; break;
+ }
+ Name = std::string(PRCTX->Name);
+ }
+ break;
// IC aliases
- case 1: case 5: {
+ case 5: {
+ Search_IC:
const AArch64IC::IC *IC = AArch64IC::lookupICByEncoding(Encoding);
if (!IC || !IC->haveFeatures(STI.getFeatureBits()))
return false;
@@ -787,7 +812,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
}
break;
// DC aliases
- case 4: case 6: case 10: case 11: case 12: case 14:
+ case 4: case 6: case 10: case 11: case 12: case 13: case 14:
{
const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
if (!DC || !DC->haveFeatures(STI.getFeatureBits()))
@@ -1097,6 +1122,17 @@ void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
O << '#' << formatImm(psbhintop);
}
+void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned btihintop = (MI->getOperand(OpNum).getImm() ^ 32) >> 1;
+ auto BTI = AArch64BTIHint::lookupBTIByEncoding(btihintop);
+ if (BTI)
+ O << BTI->Name;
+ else
+ O << '#' << formatImm(btihintop);
+}
+
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 8dc9264f94a1..4e9982f5b7be 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -131,6 +131,9 @@ protected:
void printPSBHintOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBTIHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
void printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 62644ab2f457..688ca755d0b5 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/bit.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -342,27 +343,23 @@ static inline bool isValidDecodeLogicalImmediate(uint64_t val,
//
static inline float getFPImmFloat(unsigned Imm) {
// We expect an 8-bit binary encoding of a floating-point number here.
- union {
- uint32_t I;
- float F;
- } FPUnion;
uint8_t Sign = (Imm >> 7) & 0x1;
uint8_t Exp = (Imm >> 4) & 0x7;
uint8_t Mantissa = Imm & 0xf;
- // 8-bit FP iEEEE Float Encoding
+ // 8-bit FP IEEE Float Encoding
// abcd efgh aBbbbbbc defgh000 00000000 00000000
//
// where B = NOT(b);
- FPUnion.I = 0;
- FPUnion.I |= Sign << 31;
- FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
- FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
- FPUnion.I |= (Exp & 0x3) << 23;
- FPUnion.I |= Mantissa << 19;
- return FPUnion.F;
+ uint32_t I = 0;
+ I |= Sign << 31;
+ I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ I |= (Exp & 0x3) << 23;
+ I |= Mantissa << 19;
+ return bit_cast<float>(I);
}
/// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
@@ -757,12 +754,8 @@ static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) {
/// Returns true if Imm is the concatenation of a repeating pattern of type T.
template <typename T>
static inline bool isSVEMaskOfIdenticalElements(int64_t Imm) {
- union {
- int64_t Whole;
- T Parts[sizeof(int64_t)/sizeof(T)];
- } Vec { Imm };
-
- return all_of(Vec.Parts, [Vec](T Elem) { return Elem == Vec.Parts[0]; });
+ auto Parts = bit_cast<std::array<T, sizeof(int64_t) / sizeof(T)>>(Imm);
+ return all_of(Parts, [&](T Elem) { return Elem == Parts[0]; });
}
/// Returns true if Imm is valid for CPY/DUP.
@@ -790,29 +783,20 @@ static inline bool isSVEAddSubImm(int64_t Imm) {
/// Return true if Imm is valid for DUPM and has no single CPY/DUP equivalent.
static inline bool isSVEMoveMaskPreferredLogicalImmediate(int64_t Imm) {
- union {
- int64_t D;
- int32_t S[2];
- int16_t H[4];
- int8_t B[8];
- } Vec = { Imm };
-
- if (isSVECpyImm<int64_t>(Vec.D))
+ if (isSVECpyImm<int64_t>(Imm))
return false;
- if (isSVEMaskOfIdenticalElements<int32_t>(Imm) &&
- isSVECpyImm<int32_t>(Vec.S[0]))
- return false;
+ auto S = bit_cast<std::array<int32_t, 2>>(Imm);
+ auto H = bit_cast<std::array<int16_t, 4>>(Imm);
+ auto B = bit_cast<std::array<int8_t, 8>>(Imm);
- if (isSVEMaskOfIdenticalElements<int16_t>(Imm) &&
- isSVECpyImm<int16_t>(Vec.H[0]))
+ if (isSVEMaskOfIdenticalElements<int32_t>(Imm) && isSVECpyImm<int32_t>(S[0]))
return false;
-
- if (isSVEMaskOfIdenticalElements<int8_t>(Imm) &&
- isSVECpyImm<int8_t>(Vec.B[0]))
+ if (isSVEMaskOfIdenticalElements<int16_t>(Imm) && isSVECpyImm<int16_t>(H[0]))
return false;
-
- return isLogicalImmediate(Vec.D, 64);
+ if (isSVEMaskOfIdenticalElements<int8_t>(Imm) && isSVECpyImm<int8_t>(B[0]))
+ return false;
+ return isLogicalImmediate(Imm, 64);
}
inline static bool isAnyMOVZMovAlias(uint64_t Value, int RegWidth) {
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 856946555198..ed89d991d9fb 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64RegisterInfo.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -19,6 +19,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
@@ -109,11 +110,11 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case FK_Data_1:
return 1;
- case AArch64::fixup_aarch64_movw:
case FK_Data_2:
case FK_SecRel_2:
return 2;
+ case AArch64::fixup_aarch64_movw:
case AArch64::fixup_aarch64_pcrel_branch14:
case AArch64::fixup_aarch64_add_imm12:
case AArch64::fixup_aarch64_ldst_imm12_scale1:
@@ -144,9 +145,9 @@ static unsigned AdrImmBits(unsigned Value) {
return (hi19 << 5) | (lo2 << 29);
}
-static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext &Ctx, const Triple &TheTriple,
- bool IsResolved) {
+static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
+ uint64_t Value, MCContext &Ctx,
+ const Triple &TheTriple, bool IsResolved) {
unsigned Kind = Fixup.getKind();
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
@@ -214,10 +215,79 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
if (Value & 0xf)
Ctx.reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
return Value >> 4;
- case AArch64::fixup_aarch64_movw:
- Ctx.reportError(Fixup.getLoc(),
- "no resolvable MOVZ/MOVK fixups supported yet");
+ case AArch64::fixup_aarch64_movw: {
+ AArch64MCExpr::VariantKind RefKind =
+ static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ if (AArch64MCExpr::getSymbolLoc(RefKind) != AArch64MCExpr::VK_ABS &&
+ AArch64MCExpr::getSymbolLoc(RefKind) != AArch64MCExpr::VK_SABS) {
+ // VK_GOTTPREL, VK_TPREL, VK_DTPREL are movw fixups, but they can't
+ // ever be resolved in the assembler.
+ Ctx.reportError(Fixup.getLoc(),
+ "relocation for a thread-local variable points to an "
+ "absolute symbol");
+ return Value;
+ }
+
+ if (!IsResolved) {
+ // FIXME: Figure out when this can actually happen, and verify our
+ // behavior.
+ Ctx.reportError(Fixup.getLoc(), "unresolved movw fixup not yet "
+ "implemented");
+ return Value;
+ }
+
+ if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS) {
+ switch (AArch64MCExpr::getAddressFrag(RefKind)) {
+ case AArch64MCExpr::VK_G0:
+ break;
+ case AArch64MCExpr::VK_G1:
+ SignedValue = SignedValue >> 16;
+ break;
+ case AArch64MCExpr::VK_G2:
+ SignedValue = SignedValue >> 32;
+ break;
+ case AArch64MCExpr::VK_G3:
+ SignedValue = SignedValue >> 48;
+ break;
+ default:
+ llvm_unreachable("Variant kind doesn't correspond to fixup");
+ }
+
+ } else {
+ switch (AArch64MCExpr::getAddressFrag(RefKind)) {
+ case AArch64MCExpr::VK_G0:
+ break;
+ case AArch64MCExpr::VK_G1:
+ Value = Value >> 16;
+ break;
+ case AArch64MCExpr::VK_G2:
+ Value = Value >> 32;
+ break;
+ case AArch64MCExpr::VK_G3:
+ Value = Value >> 48;
+ break;
+ default:
+ llvm_unreachable("Variant kind doesn't correspond to fixup");
+ }
+ }
+
+ if (RefKind & AArch64MCExpr::VK_NC) {
+ Value &= 0xFFFF;
+ }
+ else if (RefKind & AArch64MCExpr::VK_SABS) {
+ if (SignedValue > 0xFFFF || SignedValue < -0xFFFF)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+
+ // Invert the negative immediate because it will feed into a MOVN.
+ if (SignedValue < 0)
+ SignedValue = ~SignedValue;
+ Value = static_cast<uint64_t>(SignedValue);
+ }
+ else if (Value > 0xFFFF) {
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ }
return Value;
+ }
case AArch64::fixup_aarch64_pcrel_branch14:
// Signed 16-bit immediate
if (SignedValue > 32767 || SignedValue < -32768)
@@ -294,8 +364,9 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
return; // Doesn't change encoding.
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
MCContext &Ctx = Asm.getContext();
+ int64_t SignedValue = static_cast<int64_t>(Value);
// Apply any target-specific value adjustments.
- Value = adjustFixupValue(Fixup, Value, Ctx, TheTriple, IsResolved);
+ Value = adjustFixupValue(Fixup, Target, Value, Ctx, TheTriple, IsResolved);
// Shift the value into position.
Value <<= Info.TargetOffset;
@@ -322,6 +393,19 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
+
+ // FIXME: getFixupKindInfo() and getFixupKindNumBytes() could be fixed to
+ // handle this more cleanly. This may affect the output of -show-mc-encoding.
+ AArch64MCExpr::VariantKind RefKind =
+ static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ if (RefKind & AArch64MCExpr::VK_SABS) {
+ // If the immediate is negative, generate MOVN else MOVZ.
+ // (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ.
+ if (SignedValue < 0)
+ Data[Offset + 3] &= ~(1 << 6);
+ else
+ Data[Offset + 3] |= (1 << 6);
+ }
}
bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst,
@@ -376,6 +460,14 @@ bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
return true;
+
+ AArch64MCExpr::VariantKind RefKind =
+ static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
+ // LDR GOT relocations need a relocation
+ if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_ldr_pcrel_imm19 &&
+ SymLoc == AArch64MCExpr::VK_GOT)
+ return true;
return false;
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a11e396217af..2ccd7cef8bef 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -138,7 +138,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else
return ELF::R_AARCH64_PREL64;
case AArch64::fixup_aarch64_pcrel_adr_imm21:
- assert(SymLoc == AArch64MCExpr::VK_NONE && "unexpected ADR relocation");
+ if (SymLoc != AArch64MCExpr::VK_ABS)
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid symbol kind for ADR relocation");
return R_CLS(ADR_PREL_LO21);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC)
@@ -169,6 +171,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
case AArch64::fixup_aarch64_ldr_pcrel_imm19:
if (SymLoc == AArch64MCExpr::VK_GOTTPREL)
return R_CLS(TLSIE_LD_GOTTPREL_PREL19);
+ if (SymLoc == AArch64MCExpr::VK_GOT)
+ return R_CLS(GOT_LD_PREL19);
return R_CLS(LD_PREL_LO19);
case AArch64::fixup_aarch64_pcrel_branch14:
return R_CLS(TSTBR14);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index c0ef8b670286..9a7e34b0aeb1 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -60,16 +60,6 @@ void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
OS << "\t.inst\t0x" << Twine::utohexstr(Inst) << "\n";
}
-class AArch64TargetELFStreamer : public AArch64TargetStreamer {
-private:
- AArch64ELFStreamer &getStreamer();
-
- void emitInst(uint32_t Inst) override;
-
-public:
- AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
-};
-
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
/// the appropriate points in the object files. These symbols are defined in the
/// AArch64 ELF ABI:
@@ -85,8 +75,6 @@ public:
/// by MachO. Beware!
class AArch64ELFStreamer : public MCELFStreamer {
public:
- friend class AArch64TargetELFStreamer;
-
AArch64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> Emitter)
@@ -154,6 +142,11 @@ public:
MCELFStreamer::EmitValueImpl(Value, Size, Loc);
}
+ void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+ SMLoc Loc) override {
+ EmitDataMappingSymbol();
+ MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
+ }
private:
enum ElfMappingSymbol {
EMS_None,
@@ -192,6 +185,8 @@ private:
} // end anonymous namespace
+namespace llvm {
+
AArch64ELFStreamer &AArch64TargetELFStreamer::getStreamer() {
return static_cast<AArch64ELFStreamer &>(Streamer);
}
@@ -200,8 +195,6 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
getStreamer().emitInst(Inst);
}
-namespace llvm {
-
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
@@ -221,14 +214,4 @@ MCELFStreamer *createAArch64ELFStreamer(MCContext &Context,
return S;
}
-MCTargetStreamer *
-createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
- const Triple &TT = STI.getTargetTriple();
- if (TT.isOSBinFormatELF())
- return new AArch64TargetELFStreamer(S);
- if (TT.isOSBinFormatCOFF())
- return new AArch64TargetWinCOFFStreamer(S);
- return nullptr;
-}
-
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index ebb49121c1bf..58e4a9c9a9e9 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -115,6 +115,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
CommentString = ";";
ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
}
AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
@@ -131,4 +132,7 @@ AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
CommentString = "//";
ExceptionsType = ExceptionHandling::DwarfCFI;
+ // The default is dwarf, but WinEH can be enabled optionally, which requires
+ // WinEHEncodingType to be set.
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index cd937935ddbf..729486b1020c 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -13,11 +13,11 @@
//===----------------------------------------------------------------------===//
#include "AArch64MCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -62,8 +62,10 @@ StringRef AArch64MCExpr::getVariantKindName() const {
case VK_TLSDESC_LO12: return ":tlsdesc_lo12:";
case VK_ABS_PAGE: return "";
case VK_ABS_PAGE_NC: return ":pg_hi21_nc:";
+ case VK_GOT: return ":got:";
case VK_GOT_PAGE: return ":got:";
case VK_GOT_LO12: return ":got_lo12:";
+ case VK_GOTTPREL: return ":gottprel:";
case VK_GOTTPREL_PAGE: return ":gottprel:";
case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:";
case VK_GOTTPREL_G1: return ":gottprel_g1:";
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 4ceda7e122f4..0f8198ba4e9b 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -16,6 +16,7 @@
#include "AArch64MCAsmInfo.h"
#include "AArch64WinCOFFStreamer.h"
#include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInstrAnalysis.h"
@@ -24,12 +25,14 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define GET_INSTRINFO_MC_HELPERS
#include "AArch64GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
@@ -153,6 +156,31 @@ public:
}
return false;
}
+
+ std::vector<std::pair<uint64_t, uint64_t>>
+ findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotPltSectionVA,
+ const Triple &TargetTriple) const override {
+ // Do a lightweight parsing of PLT entries.
+ std::vector<std::pair<uint64_t, uint64_t>> Result;
+ for (uint64_t Byte = 0, End = PltContents.size(); Byte + 7 < End;
+ Byte += 4) {
+ uint32_t Insn = support::endian::read32le(PltContents.data() + Byte);
+ // Check for adrp.
+ if ((Insn & 0x9f000000) != 0x90000000)
+ continue;
+ uint64_t Imm = (((PltSectionVA + Byte) >> 12) << 12) +
+ (((Insn >> 29) & 3) << 12) + (((Insn >> 5) & 0x3ffff) << 14);
+ uint32_t Insn2 = support::endian::read32le(PltContents.data() + Byte + 4);
+ // Check for: ldr Xt, [Xn, #pimm].
+ if (Insn2 >> 22 == 0x3e5) {
+ Imm += ((Insn2 >> 10) & 0xfff) << 3;
+ Result.push_back(std::make_pair(PltSectionVA + Byte, Imm));
+ Byte += 4;
+ }
+ }
+ return Result;
+ }
};
} // end anonymous namespace
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 63f50778ccdb..0f22f69bd5b0 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -84,6 +84,7 @@ void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
// Defines symbolic names for the AArch64 instructions.
//
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "AArch64GenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index dee964df2635..a6b8d963bef9 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -13,6 +13,7 @@
#include "AArch64TargetStreamer.h"
#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCSubtargetInfo.h"
using namespace llvm;
@@ -52,3 +53,17 @@ void AArch64TargetStreamer::emitInst(uint32_t Inst) {
getStreamer().EmitBytes(StringRef(Buffer, 4));
}
+
+namespace llvm {
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new AArch64TargetELFStreamer(S);
+ if (TT.isOSBinFormatCOFF())
+ return new AArch64TargetWinCOFFStreamer(S);
+ return nullptr;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 51432830f795..73fb9baea3e3 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -12,6 +12,10 @@
#include "llvm/MC/MCStreamer.h"
+namespace {
+class AArch64ELFStreamer;
+}
+
namespace llvm {
class AArch64TargetStreamer : public MCTargetStreamer {
@@ -33,10 +37,75 @@ public:
/// Callback used to implement the .inst directive.
virtual void emitInst(uint32_t Inst);
+ virtual void EmitARM64WinCFIAllocStack(unsigned Size) {}
+ virtual void EmitARM64WinCFISaveFPLR(int Offset) {}
+ virtual void EmitARM64WinCFISaveFPLRX(int Offset) {}
+ virtual void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISetFP() {}
+ virtual void EmitARM64WinCFIAddFP(unsigned Size) {}
+ virtual void EmitARM64WinCFINop() {}
+ virtual void EmitARM64WinCFIPrologEnd() {}
+ virtual void EmitARM64WinCFIEpilogStart() {}
+ virtual void EmitARM64WinCFIEpilogEnd() {}
+
private:
std::unique_ptr<AssemblerConstantPools> ConstantPools;
};
+class AArch64TargetELFStreamer : public AArch64TargetStreamer {
+private:
+ AArch64ELFStreamer &getStreamer();
+
+ void emitInst(uint32_t Inst) override;
+
+public:
+ AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
+};
+
+class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
+private:
+ // True if we are processing SEH directives in an epilogue.
+ bool InEpilogCFI = false;
+
+ // Symbol of the current epilog for which we are processing SEH directives.
+ MCSymbol *CurrentEpilog = nullptr;
+public:
+ AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
+ : AArch64TargetStreamer(S) {}
+
+ // The unwind codes on ARM64 Windows are documented at
+ // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+ void EmitARM64WinCFIAllocStack(unsigned Size) override;
+ void EmitARM64WinCFISaveFPLR(int Offset) override;
+ void EmitARM64WinCFISaveFPLRX(int Offset) override;
+ void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISetFP() override;
+ void EmitARM64WinCFIAddFP(unsigned Size) override;
+ void EmitARM64WinCFINop() override;
+ void EmitARM64WinCFIPrologEnd() override;
+ void EmitARM64WinCFIEpilogStart() override;
+ void EmitARM64WinCFIEpilogEnd() override;
+private:
+ void EmitARM64WinUnwindCode(unsigned UnwindCode, int Reg, int Offset);
+};
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 9871dc553bed..b828ab832e9d 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -11,31 +11,184 @@
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
using namespace llvm;
namespace {
class AArch64WinCOFFStreamer : public MCWinCOFFStreamer {
-public:
- friend class AArch64TargetWinCOFFStreamer;
+ Win64EH::ARM64UnwindEmitter EHStreamer;
+public:
AArch64WinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> AB,
std::unique_ptr<MCCodeEmitter> CE,
std::unique_ptr<MCObjectWriter> OW)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
+ void EmitWinEHHandlerData(SMLoc Loc) override;
+ void EmitWindowsUnwindTables() override;
void FinishImpl() override;
};
+void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
+ MCStreamer::EmitWinEHHandlerData(Loc);
+
+ // We have to emit the unwind info now, because this directive
+ // actually switches to the .xdata section!
+ EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo());
+}
+
+void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
+ if (!getNumWinFrameInfos())
+ return;
+ EHStreamer.Emit(*this);
+}
+
void AArch64WinCOFFStreamer::FinishImpl() {
EmitFrames(nullptr);
+ EmitWindowsUnwindTables();
MCWinCOFFStreamer::FinishImpl();
}
} // end anonymous namespace
namespace llvm {
+
+// Helper function to common out unwind code setup for those codes that can
+// belong to both prolog and epilog.
+// There are three types of Windows ARM64 SEH codes. They can
+// 1) take no operands: SEH_Nop, SEH_PrologEnd, SEH_EpilogStart, SEH_EpilogEnd
+// 2) take an offset: SEH_StackAlloc, SEH_SaveFPLR, SEH_SaveFPLR_X
+// 3) take a register and an offset/size: all others
+void AArch64TargetWinCOFFStreamer::EmitARM64WinUnwindCode(unsigned UnwindCode,
+ int Reg,
+ int Offset) {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+ MCSymbol *Label = S.EmitCFILabel();
+ auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset);
+ if (InEpilogCFI)
+ CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
+ else
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAllocStack(unsigned Size) {
+ unsigned Op = Win64EH::UOP_AllocSmall;
+ if (Size >= 16384)
+ Op = Win64EH::UOP_AllocLarge;
+ else if (Size >= 512)
+ Op = Win64EH::UOP_AllocMedium;
+ EmitARM64WinUnwindCode(Op, -1, Size);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLR(int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLR, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLRX(int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLRX, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveReg(unsigned Reg,
+ int Offset) {
+ assert(Offset >= 0 && Offset <= 504 &&
+ "Offset for save reg should be >= 0 && <= 504");
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveReg, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegX(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegP(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegP, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegPX(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegPX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFReg(unsigned Reg,
+ int Offset) {
+ assert(Offset >= 0 && Offset <= 504 &&
+ "Offset for save reg should be >= 0 && <= 504");
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFReg, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegX(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegP(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegP, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegPX(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegPX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISetFP() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SetFP, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAddFP(unsigned Offset) {
+ assert(Offset <= 2040 && "UOP_AddFP must have offset <= 2040");
+ EmitARM64WinUnwindCode(Win64EH::UOP_AddFP, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFINop() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_Nop, -1, 0);
+}
+
+// The functions below handle opcodes that can end up in either a prolog or
+// an epilog, but not both.
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ MCSymbol *Label = S.EmitCFILabel();
+ CurFrame->PrologEnd = Label;
+ WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
+ auto it = CurFrame->Instructions.begin();
+ CurFrame->Instructions.insert(it, Inst);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogStart() {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ InEpilogCFI = true;
+ CurrentEpilog = S.EmitCFILabel();
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ InEpilogCFI = false;
+ MCSymbol *Label = S.EmitCFILabel();
+ WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
+ CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
+ CurrentEpilog = nullptr;
+}
+
MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
index c05422163584..ed265a876ab3 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -17,20 +17,6 @@
#include "AArch64TargetStreamer.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
-namespace {
-class AArch64WinCOFFStreamer;
-
-class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
-private:
- AArch64WinCOFFStreamer &getStreamer();
-
-public:
- AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
- : AArch64TargetStreamer(S) {}
-};
-
-} // end anonymous namespace
-
namespace llvm {
MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
diff --git a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7a8dd8bc5aee..23a65b345bad 100644
--- a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1535,7 +1535,7 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
}
-
+
//===----------------------------------------------------------------------===//
// SVE Floating Point Unary Operations - Unpredicated Group
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 23cc21ce2e7c..c88155db7037 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -62,6 +62,13 @@ namespace llvm {
}
namespace llvm {
+ namespace AArch64PRCTX {
+#define GET_PRCTX_IMPL
+#include "AArch64GenSystemOperands.inc"
+ }
+}
+
+namespace llvm {
namespace AArch64PRFM {
#define GET_PRFM_IMPL
#include "AArch64GenSystemOperands.inc"
@@ -104,6 +111,13 @@ namespace llvm {
}
namespace llvm {
+ namespace AArch64BTIHint {
+#define GET_BTI_IMPL
+#include "AArch64GenSystemOperands.inc"
+ }
+}
+
+namespace llvm {
namespace AArch64SysReg {
#define GET_SYSREG_IMPL
#include "AArch64GenSystemOperands.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 2874c4ab42ea..44c6a6b44895 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -388,6 +388,14 @@ namespace AArch64PSBHint {
#include "AArch64GenSystemOperands.inc"
}
+namespace AArch64BTIHint {
+ struct BTI : SysAlias {
+ using SysAlias::SysAlias;
+ };
+ #define GET_BTI_DECL
+ #include "AArch64GenSystemOperands.inc"
+}
+
namespace AArch64SE {
enum ShiftExtSpecifiers {
Invalid = -1,
@@ -499,6 +507,14 @@ namespace AArch64TLBI {
#include "AArch64GenSystemOperands.inc"
}
+namespace AArch64PRCTX {
+ struct PRCTX : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
+ };
+ #define GET_PRCTX_DECL
+ #include "AArch64GenSystemOperands.inc"
+}
+
namespace AArch64II {
/// Target Operand Flag enum.
enum TOF {
@@ -507,7 +523,7 @@ namespace AArch64II {
MO_NO_FLAG,
- MO_FRAGMENT = 0xf,
+ MO_FRAGMENT = 0x7,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
@@ -540,6 +556,11 @@ namespace AArch64II {
/// by-12-bits instruction.
MO_HI12 = 7,
+ /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the ".refptrp.FOO" symbol. This is used for
+ /// stub symbols on windows.
+ MO_COFFSTUB = 0x8,
+
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
@@ -560,6 +581,10 @@ namespace AArch64II {
/// to the symbol is for an import stub. This is used for DLL import
/// storage class indication on Windows.
MO_DLLIMPORT = 0x80,
+
+ /// MO_S - Indicates that the bits of the symbol operand represented by
+ /// MO_G0 etc are signed.
+ MO_S = 0x100,
};
} // end namespace AArch64II
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
index 2b49c2ea88e1..bb7801c172f6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -37,10 +37,13 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
// SI Passes
+FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSIFixupVectorISelPass();
+FunctionPass *createSIAddIMGInitPass();
FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass();
FunctionPass *createSIWholeQuadModePass();
@@ -57,6 +60,7 @@ FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
FunctionPass *createAMDGPURewriteOutArgumentsPass();
+FunctionPass *createSIModeRegisterPass();
void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
@@ -69,10 +73,18 @@ Pass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
+FunctionPass *createAMDGPUAtomicOptimizerPass();
+void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
+extern char &AMDGPUAtomicOptimizerID;
+
ModulePass *createAMDGPULowerIntrinsicsPass();
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
extern char &AMDGPULowerIntrinsicsID;
+ModulePass *createAMDGPUFixFunctionBitcastsPass();
+void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
+extern char &AMDGPUFixFunctionBitcastsID;
+
FunctionPass *createAMDGPULowerKernelArgumentsPass();
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
extern char &AMDGPULowerKernelArgumentsID;
@@ -84,6 +96,9 @@ extern char &AMDGPULowerKernelAttributesID;
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;
+void initializeGCNDPPCombinePass(PassRegistry &);
+extern char &GCNDPPCombineID;
+
void initializeR600ClauseMergePassPass(PassRegistry &);
extern char &R600ClauseMergePassID;
@@ -114,6 +129,9 @@ extern char &SIFixSGPRCopiesID;
void initializeSIFixVGPRCopiesPass(PassRegistry &);
extern char &SIFixVGPRCopiesID;
+void initializeSIFixupVectorISelPass(PassRegistry &);
+extern char &SIFixupVectorISelID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
@@ -141,6 +159,9 @@ extern char &AMDGPUSimplifyLibCallsID;
void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
extern char &AMDGPUUseNativeCallsID;
+void initializeSIAddIMGInitPass(PassRegistry &);
+extern char &SIAddIMGInitID;
+
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisID;
@@ -179,6 +200,9 @@ extern char &SIMemoryLegalizerID;
void initializeSIDebuggerInsertNopsPass(PassRegistry&);
extern char &SIDebuggerInsertNopsID;
+void initializeSIModeRegisterPass(PassRegistry&);
+extern char &SIModeRegisterID;
+
void initializeSIInsertWaitcntsPass(PassRegistry&);
extern char &SIInsertWaitcntsID;
@@ -190,6 +214,8 @@ extern char &AMDGPUUnifyDivergentExitNodesID;
ImmutablePass *createAMDGPUAAWrapperPass();
void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
+ImmutablePass *createAMDGPUExternalAAWrapperPass();
+void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
@@ -221,19 +247,18 @@ enum TargetIndex {
/// however on the GPU, each address space points to
/// a separate piece of memory that is unique from other
/// memory locations.
-struct AMDGPUAS {
- // The following address space values depend on the triple environment.
- unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
- unsigned FLAT_ADDRESS; ///< Address space for flat memory.
- unsigned REGION_ADDRESS; ///< Address space for region memory.
-
+namespace AMDGPUAS {
enum : unsigned {
// The maximum value for flat, generic, local, private, constant and region.
MAX_AMDGPU_ADDRESS = 6,
+ FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
+ REGION_ADDRESS = 2, ///< Address space for region memory.
+
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
LOCAL_ADDRESS = 3, ///< Address space for local memory.
+ PRIVATE_ADDRESS = 5, ///< Address space for private memory.
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
@@ -268,14 +293,6 @@ struct AMDGPUAS {
// Some places use this if the address space can't be determined.
UNKNOWN_ADDRESS_SPACE = ~0u,
};
-};
-
-namespace llvm {
-namespace AMDGPU {
-AMDGPUAS getAMDGPUAS(const Module &M);
-AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
-AMDGPUAS getAMDGPUAS(Triple T);
-} // namespace AMDGPU
-} // namespace llvm
+}
#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
index 445b69b35eb1..6a4cfe08e491 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -11,6 +11,10 @@ include "llvm/TableGen/SearchableTable.td"
include "llvm/Target/Target.td"
include "AMDGPUFeatures.td"
+class BoolToList<bit Value> {
+ list<int> ret = !if(Value, [1]<int>, []<int>);
+}
+
//===------------------------------------------------------------===//
// Subtarget Features (device properties)
//===------------------------------------------------------------===//
@@ -140,6 +144,12 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
"Additional instructions for CI+"
>;
+def FeatureVIInsts : SubtargetFeature<"vi-insts",
+ "VIInsts",
+ "true",
+ "Additional instructions for VI+"
+>;
+
def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
"GFX9Insts",
"true",
@@ -236,6 +246,12 @@ def FeatureDPP : SubtargetFeature<"dpp",
"Support DPP (Data Parallel Primitives) extension"
>;
+def FeatureR128A16 : SubtargetFeature<"r128-a16",
+ "HasR128A16",
+ "true",
+ "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
+>;
+
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",
"true",
@@ -251,16 +267,19 @@ def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
def FeatureDLInsts : SubtargetFeature<"dl-insts",
"HasDLInsts",
"true",
- "Has deep learning instructions"
+ "Has v_fmac_f32 and v_xnor_b32 instructions"
+>;
+
+def FeatureDotInsts : SubtargetFeature<"dot-insts",
+ "HasDotInsts",
+ "true",
+ "Has v_dot* instructions"
>;
-def FeatureD16PreservesUnusedBits : SubtargetFeature<
- "d16-preserves-unused-bits",
- "D16PreservesUnusedBits",
+def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
+ "EnableSRAMECC",
"true",
- "If present, then instructions defined by HasD16LoadStore predicate preserve "
- "unused bits. Otherwise instructions defined by HasD16LoadStore predicate "
- "zero unused bits."
+ "Enable SRAM ECC"
>;
//===------------------------------------------------------------===//
@@ -315,12 +334,6 @@ def FeatureEnableHugePrivateBuffer : SubtargetFeature<
"Enable private/scratch buffer sizes greater than 128 GB"
>;
-def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
- "EnableVGPRSpilling",
- "true",
- "Enable spilling of VGPRs to scratch memory"
->;
-
def FeatureDumpCode : SubtargetFeature <"DumpCode",
"DumpCode",
"true",
@@ -364,6 +377,16 @@ def FeatureEnableDS128 : SubtargetFeature<"enable-ds128",
"Use ds_{read|write}_b128"
>;
+// Sparse texture support requires that all result registers are zeroed when
+// PRTStrictNull is set to true. This feature is turned on for all architectures
+// but is enabled as a feature in case there are situations where PRTStrictNull
+// is disabled by the driver.
+def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null",
+ "EnablePRTStrictNull",
+ "true",
+ "Enable zeroing of result registers for sparse texture fetches"
+>;
+
// Unless +-flat-for-global is specified, turn on FlatForGlobal for
// all OS-es on VI and newer hardware to avoid assertion failures due
// to missing ADDR64 variants of MUBUF instructions.
@@ -390,6 +413,12 @@ def FeatureCodeObjectV3 : SubtargetFeature <
"Generate code object version 3"
>;
+def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
+ "HasTrigReducedRange",
+ "true",
+ "Requires use of fract on arguments to trig instructions"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -409,36 +438,36 @@ class GCNSubtargetFeatureGeneration <string Value,
def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureGCN,
- FeatureLDSBankCount32, FeatureMovrel]
+ FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange]
>;
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
- FeatureCIInsts, FeatureMovrel]
+ FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange]
>;
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+ FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
- FeatureIntClamp
+ FeatureIntClamp, FeatureTrigReducedRange
]
>;
def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
[FeatureFP64, FeatureLocalMemorySize65536,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+ FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
- FeatureAddNoCarryInsts, FeatureScalarAtomics
+ FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
]
>;
@@ -456,34 +485,41 @@ def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
[FeatureSouthernIslands,
FeatureFastFMAF32,
HalfRate64Ops,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
[FeatureSouthernIslands,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
[FeatureSeaIslands,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
[FeatureSeaIslands,
HalfRate64Ops,
FeatureLDSBankCount32,
- FeatureFastFMAF32]>;
+ FeatureFastFMAF32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
[FeatureSeaIslands,
FeatureLDSBankCount16,
- FeatureFastFMAF32]>;
+ FeatureFastFMAF32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
[FeatureSeaIslands,
- FeatureLDSBankCount16]>;
+ FeatureLDSBankCount16,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_4 : SubtargetFeatureISAVersion <7,0,4,
[FeatureSeaIslands,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
[FeatureVolcanicIslands,
@@ -491,49 +527,63 @@ def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
HalfRate64Ops,
FeatureLDSBankCount32,
FeatureXNACK,
- FeatureUnpackedD16VMem]>;
+ FeatureUnpackedD16VMem,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
FeatureSGPRInitBug,
- FeatureUnpackedD16VMem]>;
+ FeatureUnpackedD16VMem,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
- FeatureUnpackedD16VMem]>;
+ FeatureUnpackedD16VMem,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
[FeatureVolcanicIslands,
FeatureLDSBankCount16,
- FeatureXNACK]>;
+ FeatureXNACK,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
- FeatureD16PreservesUnusedBits]>;
+ FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
- FeatureD16PreservesUnusedBits]>;
+ FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
[FeatureGFX9,
FeatureLDSBankCount32,
FeatureFmaMixInsts,
- FeatureD16PreservesUnusedBits]>;
+ FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
[FeatureGFX9,
HalfRate64Ops,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
- FeatureDLInsts]>;
+ FeatureDLInsts,
+ FeatureDotInsts,
+ FeatureSRAMECC,
+ FeatureCodeObjectV3]>;
+
+def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9,
+ [FeatureGFX9,
+ FeatureMadMixInsts,
+ FeatureLDSBankCount32,
+ FeatureXNACK,
+ FeatureCodeObjectV3]>;
//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
@@ -665,8 +715,9 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<"!FeatureUnpackedD16VMem">;
-def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">,
- AssemblerPredicate<"FeatureD16PreservesUnusedBits">;
+def D16PreservesUnusedBits :
+ Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">,
+ AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
@@ -674,10 +725,10 @@ def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
AssemblerPredicate<"FeatureGFX9Insts">;
-def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">,
+def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
AssemblerPredicate<"FeatureAddNoCarryInsts">;
-def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">,
+def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">,
AssemblerPredicate<"!FeatureAddNoCarryInsts">;
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
@@ -697,6 +748,9 @@ def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureDPP">;
+def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
+ AssemblerPredicate<"FeatureR128A16">;
+
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
AssemblerPredicate<"FeatureIntClamp">;
@@ -719,6 +773,9 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
AssemblerPredicate<"FeatureDLInsts">;
+def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">,
+ AssemblerPredicate<"FeatureDotInsts">;
+
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
@@ -727,7 +784,6 @@ def EnableLateCFGStructurize : Predicate<
include "SISchedule.td"
include "GCNProcessors.td"
include "AMDGPUInstrInfo.td"
-include "AMDGPUIntrinsics.td"
include "SIIntrinsics.td"
include "AMDGPURegisterInfo.td"
include "AMDGPURegisterBanks.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 974fbcb87191..73709ba13643 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -34,34 +34,28 @@ using namespace llvm;
// Register this pass...
char AMDGPUAAWrapperPass::ID = 0;
+char AMDGPUExternalAAWrapper::ID = 0;
INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
"AMDGPU Address space based Alias Analysis", false, true)
+INITIALIZE_PASS(AMDGPUExternalAAWrapper, "amdgpu-aa-wrapper",
+ "AMDGPU Address space based Alias Analysis Wrapper", false, true)
+
ImmutablePass *llvm::createAMDGPUAAWrapperPass() {
return new AMDGPUAAWrapperPass();
}
+ImmutablePass *llvm::createAMDGPUExternalAAWrapperPass() {
+ return new AMDGPUExternalAAWrapper();
+}
+
void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
-// Must match the table in getAliasResult.
-AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_)
- : Arch(Arch_), AS(AS_) {
- // These arrarys are indexed by address space value
- // enum elements 0 ... to 6
- static const AliasResult ASAliasRulesPrivIsZero[7][7] = {
- /* Private Global Constant Group Flat Region Constant 32-bit */
- /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
- /* Global */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
- /* Constant */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
- /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias , NoAlias},
- /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
- /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
- /* Constant 32-bit */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}
- };
- static const AliasResult ASAliasRulesGenIsZero[7][7] = {
+// These arrays are indexed by address space value enum elements 0 ... to 6
+static const AliasResult ASAliasRules[7][7] = {
/* Flat Global Region Group Constant Private Constant 32-bit */
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
@@ -70,37 +64,15 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar
/* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias, NoAlias , MayAlias},
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
/* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
- };
+};
+
+static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
- if (AS.FLAT_ADDRESS == 0) {
- assert(AS.GLOBAL_ADDRESS == 1 &&
- AS.REGION_ADDRESS == 2 &&
- AS.LOCAL_ADDRESS == 3 &&
- AS.CONSTANT_ADDRESS == 4 &&
- AS.PRIVATE_ADDRESS == 5 &&
- AS.CONSTANT_ADDRESS_32BIT == 6);
- ASAliasRules = &ASAliasRulesGenIsZero;
- } else {
- assert(AS.PRIVATE_ADDRESS == 0 &&
- AS.GLOBAL_ADDRESS == 1 &&
- AS.CONSTANT_ADDRESS == 2 &&
- AS.LOCAL_ADDRESS == 3 &&
- AS.FLAT_ADDRESS == 4 &&
- AS.REGION_ADDRESS == 5 &&
- AS.CONSTANT_ADDRESS_32BIT == 6);
- ASAliasRules = &ASAliasRulesPrivIsZero;
- }
-}
-AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
- unsigned AS2) const {
- if (AS1 > AS.MAX_AMDGPU_ADDRESS || AS2 > AS.MAX_AMDGPU_ADDRESS) {
- if (Arch == Triple::amdgcn)
- report_fatal_error("Pointer address space out of range");
- return AS1 == AS2 ? MayAlias : NoAlias;
- }
+ if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
+ return MayAlias;
- return (*ASAliasRules)[AS1][AS2];
+ return ASAliasRules[AS1][AS2];
}
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
@@ -108,8 +80,9 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
- AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
- if (Result == NoAlias) return Result;
+ AliasResult Result = getAliasResult(asA, asB);
+ if (Result == NoAlias)
+ return Result;
// Forward the query to the next alias analysis.
return AAResultBase::alias(LocA, LocB);
@@ -118,9 +91,9 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
bool OrLocal) {
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
-
- if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS ||
- Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS_32BIT) {
+ unsigned AS = Base->getType()->getPointerAddressSpace();
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
return true;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index 09ad51d5e42f..d76c9fc48199 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -33,14 +33,12 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
friend AAResultBase<AMDGPUAAResult>;
const DataLayout &DL;
- AMDGPUAS AS;
public:
explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
- DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS, T.getArch()) {}
+ DL(DL) {}
AMDGPUAAResult(AMDGPUAAResult &&Arg)
- : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
- ASAliasRules(Arg.ASAliasRules){}
+ : AAResultBase(std::move(Arg)), DL(Arg.DL) {}
/// Handle invalidation events from the new pass manager.
///
@@ -53,18 +51,6 @@ public:
private:
bool Aliases(const MDNode *A, const MDNode *B) const;
bool PathAliases(const MDNode *A, const MDNode *B) const;
-
- class ASAliasRulesTy {
- public:
- ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
-
- AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
-
- private:
- Triple::ArchType Arch;
- AMDGPUAS AS;
- const AliasResult (*ASAliasRules)[7][7];
- } ASAliasRules;
};
/// Analysis pass providing a never-invalidated alias analysis result.
@@ -110,6 +96,19 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
+// Wrapper around ExternalAAWrapperPass so that the default constructor gets the
+// callback.
+class AMDGPUExternalAAWrapper : public ExternalAAWrapperPass {
+public:
+ static char ID;
+
+ AMDGPUExternalAAWrapper() : ExternalAAWrapperPass(
+ [](Pass &P, Function &, AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }) {}
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index d4bbb2c1eb8d..fc65430b745f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -86,8 +86,6 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers(
}
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
- AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);
-
std::vector<GlobalAlias*> AliasesToRemove;
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
@@ -122,7 +120,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
for (GlobalVariable &GV : M.globals()) {
// TODO: Region address
unsigned AS = GV.getType()->getAddressSpace();
- if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
+ if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
continue;
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 1a70833a4472..896ac9c87779 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -46,7 +46,6 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
const TargetMachine *TM = nullptr;
- AMDGPUAS AS;
bool addFeatureAttributes(Function &F);
@@ -67,11 +66,10 @@ public:
CallGraphSCCPass::getAnalysisUsage(AU);
}
- static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
+ static bool visitConstantExpr(const ConstantExpr *CE);
static bool visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
- AMDGPUAS AS);
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
};
} // end anonymous namespace
@@ -85,20 +83,18 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
// The queue ptr is only needed when casting to flat, not from it.
-static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
- return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
+static bool castRequiresQueuePtr(unsigned SrcAS) {
+ return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
}
-static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
- const AMDGPUAS &AS) {
- return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
+static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
+ return castRequiresQueuePtr(ASC->getSrcAddressSpace());
}
-bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
- AMDGPUAS AS) {
+bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
- return castRequiresQueuePtr(SrcAS, AS);
+ return castRequiresQueuePtr(SrcAS);
}
return false;
@@ -106,8 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
- AMDGPUAS AS) {
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
if (!ConstantExprVisited.insert(EntryC).second)
return false;
@@ -120,7 +115,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
// Check this constant expression.
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (visitConstantExpr(CE, AS))
+ if (visitConstantExpr(CE))
return true;
}
@@ -262,7 +257,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
continue;
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (castRequiresQueuePtr(ASC, AS)) {
+ if (castRequiresQueuePtr(ASC)) {
NeedQueuePtr = true;
continue;
}
@@ -273,7 +268,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
if (!OpC)
continue;
- if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
+ if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
NeedQueuePtr = true;
break;
}
@@ -318,7 +313,6 @@ bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
if (!TPC)
report_fatal_error("TargetMachine is required");
- AS = AMDGPU::getAMDGPUAS(CG.getModule());
TM = &TPC->getTM<TargetMachine>();
return false;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index ed5370826647..f88e3b0dac86 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -16,7 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/IR/IRBuilder.h"
@@ -32,12 +32,11 @@ namespace {
class AMDGPUAnnotateUniformValues : public FunctionPass,
public InstVisitor<AMDGPUAnnotateUniformValues> {
- DivergenceAnalysis *DA;
+ LegacyDivergenceAnalysis *DA;
MemoryDependenceResults *MDR;
LoopInfo *LI;
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
bool isKernelFunc;
- AMDGPUAS AMDGPUASI;
public:
static char ID;
@@ -49,7 +48,7 @@ public:
return "AMDGPU Annotate Uniform Values";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
AU.addRequired<MemoryDependenceWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
@@ -64,7 +63,7 @@ public:
INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
"Add AMDGPU uniform metadata", false, false)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
@@ -118,14 +117,8 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
}
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
- if (I.isUnconditional())
- return;
-
- Value *Cond = I.getCondition();
- if (!DA->isUniform(Cond))
- return;
-
- setUniformMetadata(I.getParent()->getTerminator());
+ if (DA->isUniform(&I))
+ setUniformMetadata(I.getParent()->getTerminator());
}
void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
@@ -133,7 +126,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
if (!DA->isUniform(Ptr))
return;
auto isGlobalLoad = [&](LoadInst &Load)->bool {
- return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+ return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
};
// We're tracking up to the Function boundaries
// We cannot go beyond because of FunctionPass restrictions
@@ -168,7 +161,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
}
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
- AMDGPUASI = AMDGPU::getAMDGPUAS(M);
return false;
}
@@ -176,7 +168,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- DA = &getAnalysis<DivergenceAnalysis>();
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index e62e5d52ad74..2ded7cdb6489 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -40,11 +40,13 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::HSAMD;
// TODO: This should get the default rounding mode from the kernel. We just set
// the default here, but this could change if the OpenCL rounding mode pragmas
@@ -98,8 +100,11 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
- AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
- }
+ if (IsaInfo::hasCodeObjectV3(getSTI()))
+ HSAMetadataStream.reset(new MetadataStreamerV3());
+ else
+ HSAMetadataStream.reset(new MetadataStreamerV2());
+}
StringRef AMDGPUAsmPrinter::getPassName() const {
return "AMDGPU Assembly Printer";
@@ -116,62 +121,70 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
- TM.getTargetTriple().getOS() == Triple::AMDHSA)
- return;
+ if (IsaInfo::hasCodeObjectV3(getSTI())) {
+ std::string ExpectedTarget;
+ raw_string_ostream ExpectedTargetOS(ExpectedTarget);
+ IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
+
+ getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
+ }
if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
- HSAMetadataStream.begin(M);
+ HSAMetadataStream->begin(M);
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
readPALMetadata(M);
+ if (IsaInfo::hasCodeObjectV3(getSTI()))
+ return;
+
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
- IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
+ IsaVersion Version = getIsaVersion(getSTI()->getCPU());
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
- ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
+ Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
- // TODO: Add metadata to code object v3.
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
- TM.getTargetTriple().getOS() == Triple::AMDHSA)
- return;
-
// Following code requires TargetStreamer to be present.
if (!getTargetStreamer())
return;
- // Emit ISA Version (NT_AMD_AMDGPU_ISA).
- std::string ISAVersionString;
- raw_string_ostream ISAVersionStream(ISAVersionString);
- IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
- getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
+ if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+ // Emit ISA Version (NT_AMD_AMDGPU_ISA).
+ std::string ISAVersionString;
+ raw_string_ostream ISAVersionStream(ISAVersionString);
+ IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
+ getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
+ }
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
- HSAMetadataStream.end();
- getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata());
+ HSAMetadataStream->end();
+ bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
+ (void)Success;
+ assert(Success && "Malformed HSA Metadata");
}
- // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
- if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
- // Copy the PAL metadata from the map where we collected it into a vector,
- // then write it as a .note.
- PALMD::Metadata PALMetadataVector;
- for (auto i : PALMetadataMap) {
- PALMetadataVector.push_back(i.first);
- PALMetadataVector.push_back(i.second);
+ if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+ // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
+ if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
+ // Copy the PAL metadata from the map where we collected it into a vector,
+ // then write it as a .note.
+ PALMD::Metadata PALMetadataVector;
+ for (auto i : PALMetadataMap) {
+ PALMetadataVector.push_back(i.first);
+ PALMetadataVector.push_back(i.second);
+ }
+ getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
}
- getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
}
}
@@ -193,13 +206,10 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
if (!MFI.isEntryFunction())
return;
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
- TM.getTargetTriple().getOS() == Triple::AMDHSA)
- return;
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
- if (STM.isAmdCodeObjectV2(F) &&
+ if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
@@ -207,10 +217,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
- return;
-
- HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
+ if (STM.isAmdHsaOS())
+ HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
}
void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
@@ -241,7 +249,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
*getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
+ IsaInfo::getNumExtraSGPRs(getSTI(),
CurrentProgramInfo.VCCUsed,
CurrentProgramInfo.FlatUsed),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
@@ -259,7 +267,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
- if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) {
+ if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
SmallString<128> SymbolName;
getNameWithPrefix(SymbolName, &MF->getFunction()),
getTargetStreamer()->EmitAMDGPUSymbolType(
@@ -562,7 +570,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
const GCNSubtarget &ST) const {
- return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
+ return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
UsesVCC, UsesFlatScratch);
}
@@ -759,7 +767,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// 48 SGPRs - vcc, - flat_scr, -xnack
int MaxSGPRGuess =
- 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
+ 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
@@ -824,7 +832,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -906,9 +914,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
- STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
+ &STM, ProgInfo.NumSGPRsForWavesPerEU);
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
- STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
+ &STM, ProgInfo.NumVGPRsForWavesPerEU);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
@@ -1003,7 +1011,6 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo) {
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
@@ -1024,10 +1031,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(RsrcReg, 4);
OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
- if (STM.isVGPRSpillingEnabled(MF.getFunction())) {
- OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
- OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
- }
+ OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+ OutStreamer->EmitIntValue(
+ S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
}
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
@@ -1138,7 +1144,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
Out.compute_pgm_resource_registers =
CurrentProgramInfo.ComputePGMRSrc1 |
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 22982d912c70..167ac4b21e1e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -56,7 +56,7 @@ private:
SIProgramInfo CurrentProgramInfo;
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
- AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream;
+ std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
std::map<uint32_t, uint32_t> PALMetadataMap;
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
@@ -143,7 +143,6 @@ public:
protected:
mutable std::vector<std::string> DisasmLines, HexLines;
mutable size_t DisasmLineMaxLen;
- AMDGPUAS AMDGPUASI;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
new file mode 100644
index 000000000000..644e4fd558ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -0,0 +1,458 @@
+//===-- AMDGPUAtomicOptimizer.cpp -----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass optimizes atomic operations by using a single lane of a wavefront
+/// to perform the atomic operation, thus reducing contention on that memory
+/// location.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "amdgpu-atomic-optimizer"
+
+using namespace llvm;
+
+namespace {
+
+enum DPP_CTRL {
+ DPP_ROW_SR1 = 0x111,
+ DPP_ROW_SR2 = 0x112,
+ DPP_ROW_SR4 = 0x114,
+ DPP_ROW_SR8 = 0x118,
+ DPP_WF_SR1 = 0x138,
+ DPP_ROW_BCAST15 = 0x142,
+ DPP_ROW_BCAST31 = 0x143
+};
+
+struct ReplacementInfo {
+ Instruction *I;
+ Instruction::BinaryOps Op;
+ unsigned ValIdx;
+ bool ValDivergent;
+};
+
+class AMDGPUAtomicOptimizer : public FunctionPass,
+ public InstVisitor<AMDGPUAtomicOptimizer> {
+private:
+ SmallVector<ReplacementInfo, 8> ToReplace;
+ const LegacyDivergenceAnalysis *DA;
+ const DataLayout *DL;
+ DominatorTree *DT;
+ bool HasDPP;
+ bool IsPixelShader;
+
+ void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op,
+ unsigned ValIdx, bool ValDivergent) const;
+
+ void setConvergent(CallInst *const CI) const;
+
+public:
+ static char ID;
+
+ AMDGPUAtomicOptimizer() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<TargetPassConfig>();
+ }
+
+ void visitAtomicRMWInst(AtomicRMWInst &I);
+ void visitIntrinsicInst(IntrinsicInst &I);
+};
+
+} // namespace
+
+char AMDGPUAtomicOptimizer::ID = 0;
+
+char &llvm::AMDGPUAtomicOptimizerID = AMDGPUAtomicOptimizer::ID;
+
+bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) {
+ if (skipFunction(F)) {
+ return false;
+ }
+
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ DL = &F.getParent()->getDataLayout();
+ DominatorTreeWrapperPass *const DTW =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTW ? &DTW->getDomTree() : nullptr;
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ const TargetMachine &TM = TPC.getTM<TargetMachine>();
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ HasDPP = ST.hasDPP();
+ IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS;
+
+ visit(F);
+
+ const bool Changed = !ToReplace.empty();
+
+ for (ReplacementInfo &Info : ToReplace) {
+ optimizeAtomic(*Info.I, Info.Op, Info.ValIdx, Info.ValDivergent);
+ }
+
+ ToReplace.clear();
+
+ return Changed;
+}
+
+void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
+ // Early exit for unhandled address space atomic instructions.
+ switch (I.getPointerAddressSpace()) {
+ default:
+ return;
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::LOCAL_ADDRESS:
+ break;
+ }
+
+ Instruction::BinaryOps Op;
+
+ switch (I.getOperation()) {
+ default:
+ return;
+ case AtomicRMWInst::Add:
+ Op = Instruction::Add;
+ break;
+ case AtomicRMWInst::Sub:
+ Op = Instruction::Sub;
+ break;
+ }
+
+ const unsigned PtrIdx = 0;
+ const unsigned ValIdx = 1;
+
+ // If the pointer operand is divergent, then each lane is doing an atomic
+ // operation on a different address, and we cannot optimize that.
+ if (DA->isDivergent(I.getOperand(PtrIdx))) {
+ return;
+ }
+
+ const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+
+ // If the value operand is divergent, each lane is contributing a different
+ // value to the atomic calculation. We can only optimize divergent values if
+ // we have DPP available on our subtarget, and the atomic operation is 32
+ // bits.
+ if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) {
+ return;
+ }
+
+ // If we get here, we can optimize the atomic using a single wavefront-wide
+ // atomic operation to do the calculation for the entire wavefront, so
+ // remember the instruction so we can come back to it.
+ const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent};
+
+ ToReplace.push_back(Info);
+}
+
+void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
+ Instruction::BinaryOps Op;
+
+ switch (I.getIntrinsicID()) {
+ default:
+ return;
+ case Intrinsic::amdgcn_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ Op = Instruction::Add;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ Op = Instruction::Sub;
+ break;
+ }
+
+ const unsigned ValIdx = 0;
+
+ const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+
+ // If the value operand is divergent, each lane is contributing a different
+ // value to the atomic calculation. We can only optimize divergent values if
+ // we have DPP available on our subtarget, and the atomic operation is 32
+ // bits.
+ if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) {
+ return;
+ }
+
+ // If any of the other arguments to the intrinsic are divergent, we can't
+ // optimize the operation.
+ for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
+ if (DA->isDivergent(I.getOperand(Idx))) {
+ return;
+ }
+ }
+
+ // If we get here, we can optimize the atomic using a single wavefront-wide
+ // atomic operation to do the calculation for the entire wavefront, so
+ // remember the instruction so we can come back to it.
+ const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent};
+
+ ToReplace.push_back(Info);
+}
+
+void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
+ Instruction::BinaryOps Op,
+ unsigned ValIdx,
+ bool ValDivergent) const {
+ LLVMContext &Context = I.getContext();
+
+ // Start building just before the instruction.
+ IRBuilder<> B(&I);
+
+ // If we are in a pixel shader, because of how we have to mask out helper
+ // lane invocations, we need to record the entry and exit BB's.
+ BasicBlock *PixelEntryBB = nullptr;
+ BasicBlock *PixelExitBB = nullptr;
+
+ // If we're optimizing an atomic within a pixel shader, we need to wrap the
+ // entire atomic operation in a helper-lane check. We do not want any helper
+ // lanes that are around only for the purposes of derivatives to take part
+ // in any cross-lane communication, and we use a branch on whether the lane is
+ // live to do this.
+ if (IsPixelShader) {
+ // Record I's original position as the entry block.
+ PixelEntryBB = I.getParent();
+
+ Value *const Cond = B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {}, {});
+ Instruction *const NonHelperTerminator =
+ SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, DT, nullptr);
+
+ // Record I's new position as the exit block.
+ PixelExitBB = I.getParent();
+
+ I.moveBefore(NonHelperTerminator);
+ B.SetInsertPoint(&I);
+ }
+
+ Type *const Ty = I.getType();
+ const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty);
+ Type *const VecTy = VectorType::get(B.getInt32Ty(), 2);
+
+ // This is the value in the atomic operation we need to combine in order to
+ // reduce the number of atomic operations.
+ Value *const V = I.getOperand(ValIdx);
+
+ // We need to know how many lanes are active within the wavefront, and we do
+ // this by getting the exec register, which tells us all the lanes that are
+ // active.
+ MDNode *const RegName =
+ llvm::MDNode::get(Context, llvm::MDString::get(Context, "exec"));
+ Value *const Metadata = llvm::MetadataAsValue::get(Context, RegName);
+ CallInst *const Exec =
+ B.CreateIntrinsic(Intrinsic::read_register, {B.getInt64Ty()}, {Metadata});
+ setConvergent(Exec);
+
+ // We need to know how many lanes are active within the wavefront that are
+ // below us. If we counted each lane linearly starting from 0, a lane is
+ // below us only if its associated index was less than ours. We do this by
+ // using the mbcnt intrinsic.
+ Value *const BitCast = B.CreateBitCast(Exec, VecTy);
+ Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
+ Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
+ CallInst *const PartialMbcnt = B.CreateIntrinsic(
+ Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)});
+ CallInst *const Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
+ {ExtractHi, PartialMbcnt});
+
+ Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false);
+
+ Value *LaneOffset = nullptr;
+ Value *NewV = nullptr;
+
+ // If we have a divergent value in each lane, we need to combine the value
+ // using DPP.
+ if (ValDivergent) {
+ // First we need to set all inactive invocations to 0, so that they can
+ // correctly contribute to the final result.
+ CallInst *const SetInactive = B.CreateIntrinsic(
+ Intrinsic::amdgcn_set_inactive, Ty, {V, B.getIntN(TyBitWidth, 0)});
+ setConvergent(SetInactive);
+ NewV = SetInactive;
+
+ const unsigned Iters = 6;
+ const unsigned DPPCtrl[Iters] = {DPP_ROW_SR1, DPP_ROW_SR2,
+ DPP_ROW_SR4, DPP_ROW_SR8,
+ DPP_ROW_BCAST15, DPP_ROW_BCAST31};
+ const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
+
+ // This loop performs an inclusive scan across the wavefront, with all lanes
+ // active (by using the WWM intrinsic).
+ for (unsigned Idx = 0; Idx < Iters; Idx++) {
+ CallInst *const DPP = B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, Ty,
+ {NewV, B.getInt32(DPPCtrl[Idx]),
+ B.getInt32(RowMask[Idx]),
+ B.getInt32(0xf), B.getFalse()});
+ setConvergent(DPP);
+ Value *const WWM = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
+
+ NewV = B.CreateBinOp(Op, NewV, WWM);
+ NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
+ }
+
+ // NewV has returned the inclusive scan of V, but for the lane offset we
+ // require an exclusive scan. We do this by shifting the values from the
+ // entire wavefront right by 1, and by setting the bound_ctrl (last argument
+ // to the intrinsic below) to true, we can guarantee that 0 will be shifted
+ // into the 0'th invocation.
+ CallInst *const DPP =
+ B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, {Ty},
+ {NewV, B.getInt32(DPP_WF_SR1), B.getInt32(0xf),
+ B.getInt32(0xf), B.getTrue()});
+ setConvergent(DPP);
+ LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
+
+ // Read the value from the last lane, which has accumlated the values of
+ // each active lane in the wavefront. This will be our new value with which
+ // we will provide to the atomic operation.
+ if (TyBitWidth == 64) {
+ Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
+ Value *const ExtractHi =
+ B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty());
+ CallInst *const ReadLaneLo = B.CreateIntrinsic(
+ Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)});
+ setConvergent(ReadLaneLo);
+ CallInst *const ReadLaneHi = B.CreateIntrinsic(
+ Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)});
+ setConvergent(ReadLaneHi);
+ Value *const PartialInsert = B.CreateInsertElement(
+ UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
+ Value *const Insert =
+ B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1));
+ NewV = B.CreateBitCast(Insert, Ty);
+ } else if (TyBitWidth == 32) {
+ CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane,
+ {}, {NewV, B.getInt32(63)});
+ setConvergent(ReadLane);
+ NewV = ReadLane;
+ } else {
+ llvm_unreachable("Unhandled atomic bit width");
+ }
+ } else {
+ // Get the total number of active lanes we have by using popcount.
+ Instruction *const Ctpop = B.CreateUnaryIntrinsic(Intrinsic::ctpop, Exec);
+ Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false);
+
+ // Calculate the new value we will be contributing to the atomic operation
+ // for the entire wavefront.
+ NewV = B.CreateMul(V, CtpopCast);
+ LaneOffset = B.CreateMul(V, MbcntCast);
+ }
+
+ // We only want a single lane to enter our new control flow, and we do this
+ // by checking if there are any active lanes below us. Only one lane will
+ // have 0 active lanes below us, so that will be the only one to progress.
+ Value *const Cond = B.CreateICmpEQ(MbcntCast, B.getIntN(TyBitWidth, 0));
+
+ // Store I's original basic block before we split the block.
+ BasicBlock *const EntryBB = I.getParent();
+
+ // We need to introduce some new control flow to force a single lane to be
+ // active. We do this by splitting I's basic block at I, and introducing the
+ // new block such that:
+ // entry --> single_lane -\
+ // \------------------> exit
+ Instruction *const SingleLaneTerminator =
+ SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, DT, nullptr);
+
+ // Move the IR builder into single_lane next.
+ B.SetInsertPoint(SingleLaneTerminator);
+
+ // Clone the original atomic operation into single lane, replacing the
+ // original value with our newly created one.
+ Instruction *const NewI = I.clone();
+ B.Insert(NewI);
+ NewI->setOperand(ValIdx, NewV);
+
+ // Move the IR builder into exit next, and start inserting just before the
+ // original instruction.
+ B.SetInsertPoint(&I);
+
+ // Create a PHI node to get our new atomic result into the exit block.
+ PHINode *const PHI = B.CreatePHI(Ty, 2);
+ PHI->addIncoming(UndefValue::get(Ty), EntryBB);
+ PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
+
+ // We need to broadcast the value who was the lowest active lane (the first
+ // lane) to all other lanes in the wavefront. We use an intrinsic for this,
+ // but have to handle 64-bit broadcasts with two calls to this intrinsic.
+ Value *BroadcastI = nullptr;
+
+ if (TyBitWidth == 64) {
+ Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty());
+ Value *const ExtractHi =
+ B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty());
+ CallInst *const ReadFirstLaneLo =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
+ setConvergent(ReadFirstLaneLo);
+ CallInst *const ReadFirstLaneHi =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
+ setConvergent(ReadFirstLaneHi);
+ Value *const PartialInsert = B.CreateInsertElement(
+ UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
+ Value *const Insert =
+ B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
+ BroadcastI = B.CreateBitCast(Insert, Ty);
+ } else if (TyBitWidth == 32) {
+ CallInst *const ReadFirstLane =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
+ setConvergent(ReadFirstLane);
+ BroadcastI = ReadFirstLane;
+ } else {
+ llvm_unreachable("Unhandled atomic bit width");
+ }
+
+ // Now that we have the result of our single atomic operation, we need to
+ // get our individual lane's slice into the result. We use the lane offset we
+ // previously calculated combined with the atomic result value we got from the
+ // first lane, to get our lane's index into the atomic result.
+ Value *const Result = B.CreateBinOp(Op, BroadcastI, LaneOffset);
+
+ if (IsPixelShader) {
+ // Need a final PHI to reconverge to above the helper lane branch mask.
+ B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
+
+ PHINode *const PHI = B.CreatePHI(Ty, 2);
+ PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB);
+ PHI->addIncoming(Result, I.getParent());
+ I.replaceAllUsesWith(PHI);
+ } else {
+ // Replace the original atomic instruction with the new one.
+ I.replaceAllUsesWith(Result);
+ }
+
+ // And delete the original.
+ I.eraseFromParent();
+}
+
+void AMDGPUAtomicOptimizer::setConvergent(CallInst *const CI) const {
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
+}
+
+INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
+ "AMDGPU atomic optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AMDGPUAtomicOptimizer, DEBUG_TYPE,
+ "AMDGPU atomic optimizations", false, false)
+
+FunctionPass *llvm::createAMDGPUAtomicOptimizerPass() {
+ return new AMDGPUAtomicOptimizer();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 18c7df0d94f2..daef37f9c21f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -28,11 +28,12 @@
using namespace llvm;
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
- : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
+ : CallLowering(&TLI) {
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
+ const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
// FIXME: Add support for non-void returns.
if (Val)
return false;
@@ -50,7 +51,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
- PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
LLT PtrType = getLLTForType(*PtrTy, DL);
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
unsigned KernArgSegmentPtr =
@@ -72,7 +73,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
- PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index f51cb6abbf65..ed859716218e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -23,8 +23,6 @@ namespace llvm {
class AMDGPUTargetLowering;
class AMDGPUCallLowering: public CallLowering {
- AMDGPUAS AMDGPUASI;
-
unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
uint64_t Offset) const;
@@ -35,8 +33,8 @@ class AMDGPUCallLowering: public CallLowering {
public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 68bc7fdd9961..367f120b5fa6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -19,7 +19,7 @@ class CCIfExtend<CCAction A>
// Calling convention for SI
def CC_SI : CallingConv<[
- CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
+ CCIfInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -33,7 +33,7 @@ def CC_SI : CallingConv<[
CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
- CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
+ CCIfNotInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -64,7 +64,7 @@ def RetCC_SI_Shader : CallingConv<[
]>>,
// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
- CCIfType<[f32, f16] , CCAssignToReg<[
+ CCIfType<[f32, f16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 5713b7b7f9a8..4dc1e67c573d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -18,7 +18,7 @@
#include "AMDGPUTargetMachine.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Passes.h"
@@ -60,10 +60,9 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNSubtarget *ST = nullptr;
AssumptionCache *AC = nullptr;
- DivergenceAnalysis *DA = nullptr;
+ LegacyDivergenceAnalysis *DA = nullptr;
Module *Mod = nullptr;
bool HasUnsafeFPMath = false;
- AMDGPUAS AMDGPUASI;
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
@@ -177,7 +176,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
AU.setPreservesAll();
}
};
@@ -559,7 +558,7 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
Value *FQM = Builder.CreateFMul(FA, RCP);
// fq = trunc(fqm);
- CallInst* FQ = Builder.CreateIntrinsic(Intrinsic::trunc, { FQM });
+ CallInst *FQ = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, FQM);
FQ->copyFastMathFlags(Builder.getFastMathFlags());
// float fqneg = -fq;
@@ -567,17 +566,17 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
// float fr = mad(fqneg, fb, fa);
Value *FR = Builder.CreateIntrinsic(Intrinsic::amdgcn_fmad_ftz,
- { FQNeg, FB, FA }, FQ);
+ {FQNeg->getType()}, {FQNeg, FB, FA}, FQ);
// int iq = (int)fq;
Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty)
: Builder.CreateFPToUI(FQ, I32Ty);
// fr = fabs(fr);
- FR = Builder.CreateIntrinsic(Intrinsic::fabs, { FR }, FQ);
+ FR = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FR, FQ);
// fb = fabs(fb);
- FB = Builder.CreateIntrinsic(Intrinsic::fabs, { FB }, FQ);
+ FB = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FB, FQ);
// int cv = fr >= fb;
Value *CV = Builder.CreateFCmpOGE(FR, FB);
@@ -799,8 +798,8 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
if (!WidenLoads)
return false;
- if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+ if ((I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
canWidenScalarExtLoad(I)) {
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
@@ -898,9 +897,8 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
ST = &TM.getSubtarget<GCNSubtarget>(F);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DA = &getAnalysis<DivergenceAnalysis>();
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
HasUnsafeFPMath = hasUnsafeFPMath(F);
- AMDGPUASI = TM.getAMDGPUAS();
bool MadeChange = false;
@@ -918,7 +916,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
false, false)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
new file mode 100644
index 000000000000..6e2a981d3396
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
@@ -0,0 +1,63 @@
+//===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Promote indirect (bitcast) calls to direct calls when they are statically
+/// known to be direct. Required when InstCombine is not run (e.g. at OptNone)
+/// because AMDGPU does not support indirect calls.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-fix-function-bitcasts"
+
+namespace {
+class AMDGPUFixFunctionBitcasts final
+ : public ModulePass,
+ public InstVisitor<AMDGPUFixFunctionBitcasts> {
+
+ bool runOnModule(Module &M) override;
+
+ bool Modified;
+
+public:
+ void visitCallSite(CallSite CS) {
+ if (CS.getCalledFunction())
+ return;
+ auto Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (Callee && isLegalToPromote(CS, Callee)) {
+ promoteCall(CS, Callee);
+ Modified = true;
+ }
+ }
+
+ static char ID;
+ AMDGPUFixFunctionBitcasts() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char AMDGPUFixFunctionBitcasts::ID = 0;
+char &llvm::AMDGPUFixFunctionBitcastsID = AMDGPUFixFunctionBitcasts::ID;
+INITIALIZE_PASS(AMDGPUFixFunctionBitcasts, DEBUG_TYPE,
+ "Fix function bitcasts for AMDGPU", false, false)
+
+ModulePass *llvm::createAMDGPUFixFunctionBitcastsPass() {
+ return new AMDGPUFixFunctionBitcasts();
+}
+
+bool AMDGPUFixFunctionBitcasts::runOnModule(Module &M) {
+ Modified = false;
+ visit(M);
+ return Modified;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index ba735390f679..59bb2a16e0f3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -122,15 +122,14 @@ def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
}
def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
-// FIXME: Select directly to _e32 so we don't need to deal with modifiers.
// FIXME: We can't re-use SelectionDAG patterns here because they match
// against a custom SDNode and we would need to create a generic machine
// instruction that is equivalent to the custom SDNode. This would also require
// us to custom legalize the intrinsic to the new generic machine instruction,
// but I can't get custom legalizing of intrinsic to work and I'm not sure if
// this is even supported yet.
-defm : GISelVop2IntrPat <
- int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e32, v2f16, f32>;
+def : GISelVop3Pat2ModsPat <
+ int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e64, v2f16, f32>;
defm : GISelVop2IntrPat <int_maxnum, V_MAX_F32_e32, f32>;
def : GISelVop3Pat2ModsPat <int_maxnum, V_MAX_F64, f64>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 3a58c6c6a29f..6eab59ab4e09 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -16,34 +16,38 @@ namespace AMDGPU {
enum PartialMappingIdx {
None = - 1,
- PM_SGPR1 = 0,
- PM_SGPR16 = 4,
- PM_SGPR32 = 5,
- PM_SGPR64 = 6,
- PM_SGPR128 = 7,
- PM_SGPR256 = 8,
- PM_SGPR512 = 9,
- PM_VGPR1 = 10,
- PM_VGPR16 = 14,
- PM_VGPR32 = 15,
- PM_VGPR64 = 16,
- PM_VGPR128 = 17,
- PM_VGPR256 = 18,
- PM_VGPR512 = 19,
- PM_SGPR96 = 20,
- PM_VGPR96 = 21
+ PM_SGPR1 = 2,
+ PM_SGPR16 = 6,
+ PM_SGPR32 = 7,
+ PM_SGPR64 = 8,
+ PM_SGPR128 = 9,
+ PM_SGPR256 = 10,
+ PM_SGPR512 = 11,
+ PM_VGPR1 = 12,
+ PM_VGPR16 = 16,
+ PM_VGPR32 = 17,
+ PM_VGPR64 = 18,
+ PM_VGPR128 = 19,
+ PM_VGPR256 = 20,
+ PM_VGPR512 = 21,
+ PM_SGPR96 = 22,
+ PM_VGPR96 = 23
};
const RegisterBankInfo::PartialMapping PartMappings[] {
// StartIdx, Length, RegBank
{0, 1, SCCRegBank},
+ {0, 1, VCCRegBank},
+
+ {0, 1, SGPRRegBank}, // SGPR begin
{0, 16, SGPRRegBank},
{0, 32, SGPRRegBank},
{0, 64, SGPRRegBank},
{0, 128, SGPRRegBank},
{0, 256, SGPRRegBank},
{0, 512, SGPRRegBank},
- {0, 1, SGPRRegBank},
+
+ {0, 1, VGPRRegBank}, // VGPR begin
{0, 16, VGPRRegBank},
{0, 32, VGPRRegBank},
{0, 64, VGPRRegBank},
@@ -55,33 +59,43 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
};
const RegisterBankInfo::ValueMapping ValMappings[] {
+ // SCC
{&PartMappings[0], 1},
- {nullptr, 0},
- {nullptr, 0},
- {nullptr, 0},
+
+ // VCC
{&PartMappings[1], 1},
+
+ // SGPRs
{&PartMappings[2], 1},
+ {nullptr, 0}, // Illegal power of 2 sizes
+ {nullptr, 0},
+ {nullptr, 0},
{&PartMappings[3], 1},
{&PartMappings[4], 1},
{&PartMappings[5], 1},
{&PartMappings[6], 1},
{&PartMappings[7], 1},
+ {&PartMappings[8], 1},
+
+ // VGPRs
+ {&PartMappings[9], 1},
{nullptr, 0},
{nullptr, 0},
{nullptr, 0},
- {&PartMappings[8], 1},
- {&PartMappings[9], 1},
{&PartMappings[10], 1},
{&PartMappings[11], 1},
{&PartMappings[12], 1},
{&PartMappings[13], 1},
{&PartMappings[14], 1},
- {&PartMappings[15], 1}
+ {&PartMappings[15], 1},
+ {&PartMappings[16], 1},
+ {&PartMappings[17], 1}
};
enum ValueMappingIdx {
- SGPRStartIdx = 0,
- VGPRStartIdx = 10
+ SCCStartIdx = 0,
+ SGPRStartIdx = 2,
+ VGPRStartIdx = 12
};
const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
@@ -89,16 +103,28 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
unsigned Idx;
switch (Size) {
case 1:
- Idx = BankID == AMDGPU::SCCRegBankID ? PM_SGPR1 : PM_VGPR1;
+ if (BankID == AMDGPU::SCCRegBankID)
+ return &ValMappings[0];
+ if (BankID == AMDGPU::VCCRegBankID)
+ return &ValMappings[1];
+
+ // 1-bit values not from a compare etc.
+ Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR1 : PM_VGPR1;
break;
case 96:
+ assert(BankID != AMDGPU::VCCRegBankID);
Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR96 : PM_VGPR96;
break;
default:
+ assert(BankID != AMDGPU::VCCRegBankID);
Idx = BankID == AMDGPU::VGPRRegBankID ? VGPRStartIdx : SGPRStartIdx;
Idx += Log2_32_Ceil(Size);
break;
}
+
+ assert(Log2_32_Ceil(Size) == Log2_32_Ceil(ValMappings[Idx].BreakDown->Length));
+ assert(BankID == ValMappings[Idx].BreakDown->RegBank->getID());
+
return &ValMappings[Idx];
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 01ef346f74ee..c38b0e61558b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -16,6 +16,7 @@
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIMachineFunctionInfo.h"
#include "SIProgramInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -36,11 +37,14 @@ static cl::opt<bool> VerifyHSAMetadata(
namespace AMDGPU {
namespace HSAMD {
-void MetadataStreamer::dump(StringRef HSAMetadataString) const {
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV2
+//===----------------------------------------------------------------------===//
+void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
}
-void MetadataStreamer::verify(StringRef HSAMetadataString) const {
+void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
HSAMD::Metadata FromHSAMetadataString;
@@ -63,7 +67,8 @@ void MetadataStreamer::verify(StringRef HSAMetadataString) const {
}
}
-AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
+AccessQualifier
+MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
if (AccQual.empty())
return AccessQualifier::Unknown;
@@ -74,26 +79,29 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
.Default(AccessQualifier::Default);
}
-AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
+AddressSpaceQualifier
+MetadataStreamerV2::getAddressSpaceQualifier(
unsigned AddressSpace) const {
- if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
+ switch (AddressSpace) {
+ case AMDGPUAS::PRIVATE_ADDRESS:
return AddressSpaceQualifier::Private;
- if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
+ case AMDGPUAS::GLOBAL_ADDRESS:
return AddressSpaceQualifier::Global;
- if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
+ case AMDGPUAS::CONSTANT_ADDRESS:
return AddressSpaceQualifier::Constant;
- if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
+ case AMDGPUAS::LOCAL_ADDRESS:
return AddressSpaceQualifier::Local;
- if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
+ case AMDGPUAS::FLAT_ADDRESS:
return AddressSpaceQualifier::Generic;
- if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
+ case AMDGPUAS::REGION_ADDRESS:
return AddressSpaceQualifier::Region;
-
- llvm_unreachable("Unknown address space qualifier");
+ default:
+ return AddressSpaceQualifier::Unknown;
+ }
}
-ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
- StringRef BaseTypeName) const {
+ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const {
if (TypeQual.find("pipe") != StringRef::npos)
return ValueKind::Pipe;
@@ -114,13 +122,13 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
.Case("queue_t", ValueKind::Queue)
.Default(isa<PointerType>(Ty) ?
(Ty->getPointerAddressSpace() ==
- AMDGPUASI.LOCAL_ADDRESS ?
+ AMDGPUAS::LOCAL_ADDRESS ?
ValueKind::DynamicSharedPointer :
ValueKind::GlobalBuffer) :
ValueKind::ByValue);
}
-ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
+ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
auto Signed = !TypeName.startswith("u");
@@ -152,7 +160,7 @@ ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
}
}
-std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
+std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
if (!Signed)
@@ -189,8 +197,8 @@ std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
}
}
-std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
- MDNode *Node) const {
+std::vector<uint32_t>
+MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
std::vector<uint32_t> Dims;
if (Node->getNumOperands() != 3)
return Dims;
@@ -200,9 +208,9 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
return Dims;
}
-Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const {
+Kernel::CodeProps::Metadata
+MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
@@ -229,9 +237,9 @@ Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
return HSACodeProps;
}
-Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const {
+Kernel::DebugProps::Metadata
+MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
@@ -251,14 +259,14 @@ Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
return HSADebugProps;
}
-void MetadataStreamer::emitVersion() {
+void MetadataStreamerV2::emitVersion() {
auto &Version = HSAMetadata.mVersion;
Version.push_back(VersionMajor);
Version.push_back(VersionMinor);
}
-void MetadataStreamer::emitPrintf(const Module &Mod) {
+void MetadataStreamerV2::emitPrintf(const Module &Mod) {
auto &Printf = HSAMetadata.mPrintf;
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
@@ -270,7 +278,7 @@ void MetadataStreamer::emitPrintf(const Module &Mod) {
Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
}
-void MetadataStreamer::emitKernelLanguage(const Function &Func) {
+void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
auto &Kernel = HSAMetadata.mKernels.back();
// TODO: What about other languages?
@@ -288,7 +296,7 @@ void MetadataStreamer::emitKernelLanguage(const Function &Func) {
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
}
-void MetadataStreamer::emitKernelAttrs(const Function &Func) {
+void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
if (auto Node = Func.getMetadata("reqd_work_group_size"))
@@ -306,14 +314,14 @@ void MetadataStreamer::emitKernelAttrs(const Function &Func) {
}
}
-void MetadataStreamer::emitKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
for (auto &Arg : Func.args())
emitKernelArg(Arg);
emitHiddenKernelArgs(Func);
}
-void MetadataStreamer::emitKernelArg(const Argument &Arg) {
+void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
auto Func = Arg.getParent();
auto ArgNo = Arg.getArgNo();
const MDNode *Node;
@@ -355,7 +363,7 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
unsigned PointeeAlign = 0;
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
- if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
+ if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
PointeeAlign = Arg.getParamAlignment();
if (PointeeAlign == 0)
PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
@@ -366,12 +374,12 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
}
-void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
- ValueKind ValueKind,
- unsigned PointeeAlign,
- StringRef Name,
- StringRef TypeName, StringRef BaseTypeName,
- StringRef AccQual, StringRef TypeQual) {
+void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
+ ValueKind ValueKind,
+ unsigned PointeeAlign, StringRef Name,
+ StringRef TypeName,
+ StringRef BaseTypeName,
+ StringRef AccQual, StringRef TypeQual) {
HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
@@ -384,7 +392,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
Arg.mPointeeAlign = PointeeAlign;
if (auto PtrTy = dyn_cast<PointerType>(Ty))
- Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
+ Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
Arg.mAccQual = getAccessQualifier(AccQual);
@@ -404,7 +412,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
}
}
-void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
int HiddenArgNumBytes =
getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
@@ -422,7 +430,7 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
- AMDGPUASI.GLOBAL_ADDRESS);
+ AMDGPUAS::GLOBAL_ADDRESS);
// Emit "printf buffer" argument if printf is used, otherwise emit dummy
// "none" argument.
@@ -446,13 +454,16 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
}
}
-void MetadataStreamer::begin(const Module &Mod) {
- AMDGPUASI = getAMDGPUAS(Mod);
+bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+ return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
+}
+
+void MetadataStreamerV2::begin(const Module &Mod) {
emitVersion();
emitPrintf(Mod);
}
-void MetadataStreamer::end() {
+void MetadataStreamerV2::end() {
std::string HSAMetadataString;
if (toString(HSAMetadata, HSAMetadataString))
return;
@@ -463,7 +474,8 @@ void MetadataStreamer::end() {
verify(HSAMetadataString);
}
-void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
+void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return;
@@ -483,6 +495,505 @@ void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo
HSAMetadata.mKernels.back().mDebugProps = DebugProps;
}
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV3
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
+ errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
+}
+
+void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
+ errs() << "AMDGPU HSA Metadata Parser Test: ";
+
+ std::shared_ptr<msgpack::Node> FromHSAMetadataString =
+ std::make_shared<msgpack::MapNode>();
+
+ yaml::Input YIn(HSAMetadataString);
+ YIn >> FromHSAMetadataString;
+ if (YIn.error()) {
+ errs() << "FAIL\n";
+ return;
+ }
+
+ std::string ToHSAMetadataString;
+ raw_string_ostream StrOS(ToHSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << FromHSAMetadataString;
+
+ errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
+ if (HSAMetadataString != ToHSAMetadataString) {
+ errs() << "Original input: " << HSAMetadataString << '\n'
+ << "Produced output: " << StrOS.str() << '\n';
+ }
+}
+
+Optional<StringRef>
+MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
+ return StringSwitch<Optional<StringRef>>(AccQual)
+ .Case("read_only", StringRef("read_only"))
+ .Case("write_only", StringRef("write_only"))
+ .Case("read_write", StringRef("read_write"))
+ .Default(None);
+}
+
+Optional<StringRef>
+MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
+ switch (AddressSpace) {
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ return StringRef("private");
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ return StringRef("global");
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ return StringRef("constant");
+ case AMDGPUAS::LOCAL_ADDRESS:
+ return StringRef("local");
+ case AMDGPUAS::FLAT_ADDRESS:
+ return StringRef("generic");
+ case AMDGPUAS::REGION_ADDRESS:
+ return StringRef("region");
+ default:
+ return None;
+ }
+}
+
+StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const {
+ if (TypeQual.find("pipe") != StringRef::npos)
+ return "pipe";
+
+ return StringSwitch<StringRef>(BaseTypeName)
+ .Case("image1d_t", "image")
+ .Case("image1d_array_t", "image")
+ .Case("image1d_buffer_t", "image")
+ .Case("image2d_t", "image")
+ .Case("image2d_array_t", "image")
+ .Case("image2d_array_depth_t", "image")
+ .Case("image2d_array_msaa_t", "image")
+ .Case("image2d_array_msaa_depth_t", "image")
+ .Case("image2d_depth_t", "image")
+ .Case("image2d_msaa_t", "image")
+ .Case("image2d_msaa_depth_t", "image")
+ .Case("image3d_t", "image")
+ .Case("sampler_t", "sampler")
+ .Case("queue_t", "queue")
+ .Default(isa<PointerType>(Ty)
+ ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
+ ? "dynamic_shared_pointer"
+ : "global_buffer")
+ : "by_value");
+}
+
+StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ auto Signed = !TypeName.startswith("u");
+ switch (Ty->getIntegerBitWidth()) {
+ case 8:
+ return Signed ? "i8" : "u8";
+ case 16:
+ return Signed ? "i16" : "u16";
+ case 32:
+ return Signed ? "i32" : "u32";
+ case 64:
+ return Signed ? "i64" : "u64";
+ default:
+ return "struct";
+ }
+ }
+ case Type::HalfTyID:
+ return "f16";
+ case Type::FloatTyID:
+ return "f32";
+ case Type::DoubleTyID:
+ return "f64";
+ case Type::PointerTyID:
+ return getValueType(Ty->getPointerElementType(), TypeName);
+ case Type::VectorTyID:
+ return getValueType(Ty->getVectorElementType(), TypeName);
+ default:
+ return "struct";
+ }
+}
+
+std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ if (!Signed)
+ return (Twine('u') + getTypeName(Ty, true)).str();
+
+ auto BitWidth = Ty->getIntegerBitWidth();
+ switch (BitWidth) {
+ case 8:
+ return "char";
+ case 16:
+ return "short";
+ case 32:
+ return "int";
+ case 64:
+ return "long";
+ default:
+ return (Twine('i') + Twine(BitWidth)).str();
+ }
+ }
+ case Type::HalfTyID:
+ return "half";
+ case Type::FloatTyID:
+ return "float";
+ case Type::DoubleTyID:
+ return "double";
+ case Type::VectorTyID: {
+ auto VecTy = cast<VectorType>(Ty);
+ auto ElTy = VecTy->getElementType();
+ auto NumElements = VecTy->getVectorNumElements();
+ return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
+ }
+ default:
+ return "unknown";
+ }
+}
+
+std::shared_ptr<msgpack::ArrayNode>
+MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
+ auto Dims = std::make_shared<msgpack::ArrayNode>();
+ if (Node->getNumOperands() != 3)
+ return Dims;
+
+ for (auto &Op : Node->operands())
+ Dims->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op)->getZExtValue()));
+ return Dims;
+}
+
+void MetadataStreamerV3::emitVersion() {
+ auto Version = std::make_shared<msgpack::ArrayNode>();
+ Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
+ Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
+ getRootMetadata("amdhsa.version") = std::move(Version);
+}
+
+void MetadataStreamerV3::emitPrintf(const Module &Mod) {
+ auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
+ if (!Node)
+ return;
+
+ auto Printf = std::make_shared<msgpack::ArrayNode>();
+ for (auto Op : Node->operands())
+ if (Op->getNumOperands())
+ Printf->push_back(std::make_shared<msgpack::ScalarNode>(
+ cast<MDString>(Op->getOperand(0))->getString()));
+ getRootMetadata("amdhsa.printf") = std::move(Printf);
+}
+
+void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
+ msgpack::MapNode &Kern) {
+ // TODO: What about other languages?
+ auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
+ if (!Node || !Node->getNumOperands())
+ return;
+ auto Op0 = Node->getOperand(0);
+ if (Op0->getNumOperands() <= 1)
+ return;
+
+ Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
+ auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
+ LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
+ LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
+ Kern[".language_version"] = std::move(LanguageVersion);
+}
+
+void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
+ msgpack::MapNode &Kern) {
+
+ if (auto Node = Func.getMetadata("reqd_work_group_size"))
+ Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("work_group_size_hint"))
+ Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("vec_type_hint")) {
+ Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
+ cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+ mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
+ }
+ if (Func.hasFnAttribute("runtime-handle")) {
+ Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
+ Func.getFnAttribute("runtime-handle").getValueAsString().str());
+ }
+}
+
+void MetadataStreamerV3::emitKernelArgs(const Function &Func,
+ msgpack::MapNode &Kern) {
+ unsigned Offset = 0;
+ auto Args = std::make_shared<msgpack::ArrayNode>();
+ for (auto &Arg : Func.args())
+ emitKernelArg(Arg, Offset, *Args);
+
+ emitHiddenKernelArgs(Func, Offset, *Args);
+
+ // TODO: What about other languages?
+ if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
+
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
+ else
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel
+ // is used, otherwise emit dummy "none" arguments.
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+ }
+ }
+
+ Kern[".args"] = std::move(Args);
+}
+
+void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
+ msgpack::ArrayNode &Args) {
+ auto Func = Arg.getParent();
+ auto ArgNo = Arg.getArgNo();
+ const MDNode *Node;
+
+ StringRef Name;
+ Node = Func->getMetadata("kernel_arg_name");
+ if (Node && ArgNo < Node->getNumOperands())
+ Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ else if (Arg.hasName())
+ Name = Arg.getName();
+
+ StringRef TypeName;
+ Node = Func->getMetadata("kernel_arg_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef BaseTypeName;
+ Node = Func->getMetadata("kernel_arg_base_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef AccQual;
+ if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
+ Arg.hasNoAliasAttr()) {
+ AccQual = "read_only";
+ } else {
+ Node = Func->getMetadata("kernel_arg_access_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ }
+
+ StringRef TypeQual;
+ Node = Func->getMetadata("kernel_arg_type_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ Type *Ty = Arg.getType();
+ const DataLayout &DL = Func->getParent()->getDataLayout();
+
+ unsigned PointeeAlign = 0;
+ if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+ if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ PointeeAlign = Arg.getParamAlignment();
+ if (PointeeAlign == 0)
+ PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
+ }
+ }
+
+ emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
+ getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
+ Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
+ TypeQual);
+}
+
+void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
+ StringRef ValueKind, unsigned &Offset,
+ msgpack::ArrayNode &Args,
+ unsigned PointeeAlign, StringRef Name,
+ StringRef TypeName,
+ StringRef BaseTypeName,
+ StringRef AccQual, StringRef TypeQual) {
+ auto ArgPtr = std::make_shared<msgpack::MapNode>();
+ auto &Arg = *ArgPtr;
+
+ if (!Name.empty())
+ Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
+ if (!TypeName.empty())
+ Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
+ auto Size = DL.getTypeAllocSize(Ty);
+ auto Align = DL.getABITypeAlignment(Ty);
+ Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
+ Offset = alignTo(Offset, Align);
+ Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
+ Offset += Size;
+ Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
+ Arg[".value_type"] =
+ std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
+ if (PointeeAlign)
+ Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
+
+ if (auto PtrTy = dyn_cast<PointerType>(Ty))
+ if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
+ Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
+
+ if (auto AQ = getAccessQualifier(AccQual))
+ Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
+
+ // TODO: Emit Arg[".actual_access"].
+
+ SmallVector<StringRef, 1> SplitTypeQuals;
+ TypeQual.split(SplitTypeQuals, " ", -1, false);
+ for (StringRef Key : SplitTypeQuals) {
+ if (Key == "const")
+ Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "restrict")
+ Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "volatile")
+ Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "pipe")
+ Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
+ }
+
+ Args.push_back(std::move(ArgPtr));
+}
+
+void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
+ unsigned &Offset,
+ msgpack::ArrayNode &Args) {
+ int HiddenArgNumBytes =
+ getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
+
+ if (!HiddenArgNumBytes)
+ return;
+
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ if (HiddenArgNumBytes >= 8)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
+ if (HiddenArgNumBytes >= 16)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
+ if (HiddenArgNumBytes >= 24)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
+
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (HiddenArgNumBytes >= 32) {
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
+ else
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ }
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel is
+ // used, otherwise emit dummy "none" arguments.
+ if (HiddenArgNumBytes >= 48) {
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ }
+ }
+}
+
+std::shared_ptr<msgpack::MapNode>
+MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ const Function &F = MF.getFunction();
+
+ auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
+ auto &Kern = *HSAKernelProps;
+
+ unsigned MaxKernArgAlign;
+ Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign));
+ Kern[".group_segment_fixed_size"] =
+ std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
+ Kern[".private_segment_fixed_size"] =
+ std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
+ Kern[".kernarg_segment_align"] =
+ std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
+ Kern[".wavefront_size"] =
+ std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
+ Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
+ Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
+ Kern[".max_flat_workgroup_size"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
+ Kern[".sgpr_spill_count"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
+ Kern[".vgpr_spill_count"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
+
+ return HSAKernelProps;
+}
+
+bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+ return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
+}
+
+void MetadataStreamerV3::begin(const Module &Mod) {
+ emitVersion();
+ emitPrintf(Mod);
+ getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
+}
+
+void MetadataStreamerV3::end() {
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << HSAMetadataRoot;
+
+ if (DumpHSAMetadata)
+ dump(StrOS.str());
+ if (VerifyHSAMetadata)
+ verify(StrOS.str());
+}
+
+void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) {
+ auto &Func = MF.getFunction();
+ auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
+
+ assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ Func.getCallingConv() == CallingConv::SPIR_KERNEL);
+
+ auto &KernelsNode = getRootMetadata("amdhsa.kernels");
+ auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
+
+ {
+ auto &Kern = *KernelProps;
+ Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
+ Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
+ (Twine(Func.getName()) + Twine(".kd")).str());
+ emitKernelLanguage(Func, Kern);
+ emitKernelAttrs(Func, Kern);
+ emitKernelArgs(Func, Kern);
+ }
+
+ Kernels->push_back(std::move(KernelProps));
+}
+
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 3424c956d781..afc09baf952d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -19,10 +19,12 @@
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/Support/AMDGPUMetadata.h"
namespace llvm {
+class AMDGPUTargetStreamer;
class Argument;
class DataLayout;
class Function;
@@ -34,10 +36,94 @@ class Type;
namespace AMDGPU {
namespace HSAMD {
-class MetadataStreamer final {
+class MetadataStreamer {
+public:
+ virtual ~MetadataStreamer(){};
+
+ virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
+
+ virtual void begin(const Module &Mod) = 0;
+
+ virtual void end() = 0;
+
+ virtual void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) = 0;
+};
+
+class MetadataStreamerV3 final : public MetadataStreamer {
+private:
+ std::shared_ptr<msgpack::Node> HSAMetadataRoot =
+ std::make_shared<msgpack::MapNode>();
+
+ void dump(StringRef HSAMetadataString) const;
+
+ void verify(StringRef HSAMetadataString) const;
+
+ Optional<StringRef> getAccessQualifier(StringRef AccQual) const;
+
+ Optional<StringRef> getAddressSpaceQualifier(unsigned AddressSpace) const;
+
+ StringRef getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const;
+
+ StringRef getValueType(Type *Ty, StringRef TypeName) const;
+
+ std::string getTypeName(Type *Ty, bool Signed) const;
+
+ std::shared_ptr<msgpack::ArrayNode>
+ getWorkGroupDimensions(MDNode *Node) const;
+
+ std::shared_ptr<msgpack::MapNode>
+ getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const;
+
+ void emitVersion();
+
+ void emitPrintf(const Module &Mod);
+
+ void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelArg(const Argument &Arg, unsigned &Offset,
+ msgpack::ArrayNode &Args);
+
+ void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
+ unsigned &Offset, msgpack::ArrayNode &Args,
+ unsigned PointeeAlign = 0, StringRef Name = "",
+ StringRef TypeName = "", StringRef BaseTypeName = "",
+ StringRef AccQual = "", StringRef TypeQual = "");
+
+ void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
+ msgpack::ArrayNode &Args);
+
+ std::shared_ptr<msgpack::Node> &getRootMetadata(StringRef Key) {
+ return (*cast<msgpack::MapNode>(HSAMetadataRoot.get()))[Key];
+ }
+
+ std::shared_ptr<msgpack::Node> &getHSAMetadataRoot() {
+ return HSAMetadataRoot;
+ }
+
+public:
+ MetadataStreamerV3() = default;
+ ~MetadataStreamerV3() = default;
+
+ bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
+
+ void begin(const Module &Mod) override;
+
+ void end() override;
+
+ void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) override;
+};
+
+class MetadataStreamerV2 final : public MetadataStreamer {
private:
Metadata HSAMetadata;
- AMDGPUAS AMDGPUASI;
void dump(StringRef HSAMetadataString) const;
@@ -45,7 +131,7 @@ private:
AccessQualifier getAccessQualifier(StringRef AccQual) const;
- AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
+ AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;
ValueKind getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const;
@@ -83,19 +169,22 @@ private:
void emitHiddenKernelArgs(const Function &Func);
-public:
- MetadataStreamer() = default;
- ~MetadataStreamer() = default;
-
const Metadata &getHSAMetadata() const {
return HSAMetadata;
}
- void begin(const Module &Mod);
+public:
+ MetadataStreamerV2() = default;
+ ~MetadataStreamerV2() = default;
+
+ bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
+
+ void begin(const Module &Mod) override;
- void end();
+ void end() override;
- void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
+ void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) override;
};
} // end namespace HSAMD
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 7cb0e12a6809..a0a045e72a58 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -29,7 +29,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -72,14 +72,12 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const GCNSubtarget *Subtarget;
- AMDGPUAS AMDGPUASI;
bool EnableLateStructurizeCFG;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
: SelectionDAGISel(*TM, OptLevel) {
- AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
~AMDGPUDAGToDAGISel() override = default;
@@ -87,7 +85,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>();
AU.addRequired<AMDGPUPerfHintAnalysis>();
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
SelectionDAGISel::getAnalysisUsage(AU);
}
@@ -103,9 +101,12 @@ private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
-
+ bool isVGPRImm(const SDNode *N) const;
+ bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
+ MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
+
SDNode *glueCopyToM0(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
@@ -140,13 +141,6 @@ private:
SDValue &Offset, SDValue &SLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
- bool SelectMUBUFConstant(SDValue Constant,
- SDValue &SOffset,
- SDValue &ImmOffset) const;
- bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
- SDValue &ImmOffset) const;
- bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
- SDValue &ImmOffset, SDValue &VOffset) const;
bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
@@ -224,7 +218,6 @@ protected:
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
const R600Subtarget *Subtarget;
- AMDGPUAS AMDGPUASI;
bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
@@ -232,9 +225,7 @@ class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
SDValue& Offset);
public:
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
- AMDGPUDAGToDAGISel(TM, OptLevel) {
- AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
- }
+ AMDGPUDAGToDAGISel(TM, OptLevel) {}
void Select(SDNode *N) override;
@@ -251,12 +242,12 @@ protected:
} // end anonymous namespace
-INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
+INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
-INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
/// This pass converts a legalized DAG into a AMDGPU-specific
@@ -350,7 +341,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
}
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
- if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
+ if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
!Subtarget->ldsRequiresM0Init())
return N;
@@ -372,6 +363,22 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
}
+MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
+ EVT VT) const {
+ SDNode *Lo = CurDAG->getMachineNode(
+ AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
+ SDNode *Hi =
+ CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
+ const SDValue Ops[] = {
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+ SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
+
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
+}
+
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
switch (NumVectorElts) {
case 1:
@@ -557,19 +564,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
SDLoc DL(N);
- SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
- MVT::i32));
- SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
- const SDValue Ops[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
- SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
- };
-
- ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- N->getValueType(0), Ops));
+ ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
return;
}
case ISD::LOAD:
@@ -641,6 +636,20 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::ATOMIC_CMP_SWAP:
SelectATOMIC_CMP_SWAP(N);
return;
+ case AMDGPUISD::CVT_PKRTZ_F16_F32:
+ case AMDGPUISD::CVT_PKNORM_I16_F32:
+ case AMDGPUISD::CVT_PKNORM_U16_F32:
+ case AMDGPUISD::CVT_PK_U16_U32:
+ case AMDGPUISD::CVT_PK_I16_I32: {
+ // Hack around using a legal type if f16 is illegal.
+ if (N->getValueType(0) == MVT::i32) {
+ MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
+ N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
+ { N->getOperand(0), N->getOperand(1) });
+ SelectCode(N);
+ return;
+ }
+ }
}
SelectCode(N);
@@ -969,8 +978,6 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
// default case
- // FIXME: This is broken on SI where we still need to check if the base
- // pointer is positive here.
Base = Addr;
Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
@@ -1000,55 +1007,72 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ ConstantSDNode *C1 = nullptr;
+ SDValue N0 = Addr;
if (CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+ C1 = cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isUInt<32>(C1->getZExtValue()))
+ N0 = Addr.getOperand(0);
+ else
+ C1 = nullptr;
+ }
+
+ if (N0.getOpcode() == ISD::ADD) {
+ // (add N2, N3) -> addr64, or
+ // (add (add N2, N3), C1) -> addr64
+ SDValue N2 = N0.getOperand(0);
+ SDValue N3 = N0.getOperand(1);
+ Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
- if (N0.getOpcode() == ISD::ADD) {
- // (add (add N2, N3), C1) -> addr64
- SDValue N2 = N0.getOperand(0);
- SDValue N3 = N0.getOperand(1);
- Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+ if (N2->isDivergent()) {
+ if (N3->isDivergent()) {
+ // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
+ // addr64, and construct the resource from a 0 address.
+ Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
+ VAddr = N0;
+ } else {
+ // N2 is divergent, N3 is not.
+ Ptr = N3;
+ VAddr = N2;
+ }
+ } else {
+ // N2 is not divergent.
Ptr = N2;
VAddr = N3;
- } else {
- // (add N0, C1) -> offset
- VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Ptr = N0;
- }
-
- if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
- return true;
}
-
- if (isUInt<32>(C1->getZExtValue())) {
- // Illegal offset, store it in soffset.
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
- 0);
- return true;
- }
- }
-
- if (Addr.getOpcode() == ISD::ADD) {
- // (add N0, N1) -> addr64
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ } else if (N0->isDivergent()) {
+ // N0 is divergent. Use it as the addr64, and construct the resource from a
+ // 0 address.
+ Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
+ VAddr = N0;
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+ } else {
+ // N0 -> offset, or
+ // (N0 + C1) -> offset
+ VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = N0;
- VAddr = N1;
+ }
+
+ if (!C1) {
+ // No offset.
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
- // default case -> offset
- VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Ptr = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
+ // Legal offset for instruction.
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+ return true;
+ }
+ // Illegal offset, store it in soffset.
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ SOffset =
+ SDValue(CurDAG->getMachineNode(
+ AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
+ 0);
return true;
}
@@ -1252,101 +1276,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
- SDValue &SOffset,
- SDValue &ImmOffset) const {
- SDLoc DL(Constant);
- const uint32_t Align = 4;
- const uint32_t MaxImm = alignDown(4095, Align);
- uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
- uint32_t Overflow = 0;
-
- if (Imm > MaxImm) {
- if (Imm <= MaxImm + 64) {
- // Use an SOffset inline constant for 4..64
- Overflow = Imm - MaxImm;
- Imm = MaxImm;
- } else {
- // Try to keep the same value in SOffset for adjacent loads, so that
- // the corresponding register contents can be re-used.
- //
- // Load values with all low-bits (except for alignment bits) set into
- // SOffset, so that a larger range of values can be covered using
- // s_movk_i32.
- //
- // Atomic operations fail to work correctly when individual address
- // components are unaligned, even if their sum is aligned.
- uint32_t High = (Imm + Align) & ~4095;
- uint32_t Low = (Imm + Align) & 4095;
- Imm = Low;
- Overflow = High - Align;
- }
- }
-
- // There is a hardware bug in SI and CI which prevents address clamping in
- // MUBUF instructions from working correctly with SOffsets. The immediate
- // offset is unaffected.
- if (Overflow > 0 &&
- Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
- return false;
-
- ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
-
- if (Overflow <= 64)
- SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
- else
- SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
- 0);
-
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
- SDValue &SOffset,
- SDValue &ImmOffset) const {
- SDLoc DL(Offset);
-
- if (!isa<ConstantSDNode>(Offset))
- return false;
-
- return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
- SDValue &SOffset,
- SDValue &ImmOffset,
- SDValue &VOffset) const {
- SDLoc DL(Offset);
-
- // Don't generate an unnecessary voffset for constant offsets.
- if (isa<ConstantSDNode>(Offset)) {
- SDValue Tmp1, Tmp2;
-
- // When necessary, use a voffset in <= CI anyway to work around a hardware
- // bug.
- if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
- SelectMUBUFConstant(Offset, Tmp1, Tmp2))
- return false;
- }
-
- if (CurDAG->isBaseWithConstantOffset(Offset)) {
- SDValue N0 = Offset.getOperand(0);
- SDValue N1 = Offset.getOperand(1);
- if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
- SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
- VOffset = N0;
- return true;
- }
- }
-
- SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- VOffset = Offset;
-
- return true;
-}
-
template <bool IsSigned>
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
SDValue &VAddr,
@@ -1525,9 +1454,13 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
// (add n0, c0)
- Base = N0;
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
- return true;
+ // Don't peel off the offset (c0) if doing so could possibly lead
+ // the base (n0) to be negative.
+ if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
+ Base = N0;
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
+ return true;
+ }
}
if (isa<ConstantSDNode>(Index))
@@ -1768,7 +1701,7 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MemSDNode *Mem = cast<MemSDNode>(N);
unsigned AS = Mem->getAddressSpace();
- if (AS == AMDGPUASI.FLAT_ADDRESS) {
+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
SelectCode(N);
return;
}
@@ -1816,9 +1749,8 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
return;
}
- MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
- *MMOs = Mem->getMemOperand();
- CmpSwap->setMemRefs(MMOs, MMOs + 1);
+ MachineMemOperand *MMO = Mem->getMemOperand();
+ CurDAG->setNodeMemRefs(CmpSwap, {MMO});
unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
SDValue Extract
@@ -2117,6 +2049,80 @@ bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
return isExtractHiElt(In, Src);
}
+bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ return false;
+ }
+ const SIRegisterInfo *SIRI =
+ static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ const SIInstrInfo * SII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+ unsigned Limit = 0;
+ bool AllUsesAcceptSReg = true;
+ for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+ Limit < 10 && U != E; ++U, ++Limit) {
+ const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+
+ // If the register class is unknown, it could be an unknown
+ // register class that needs to be an SGPR, e.g. an inline asm
+ // constraint
+ if (!RC || SIRI->isSGPRClass(RC))
+ return false;
+
+ if (RC != &AMDGPU::VS_32RegClass) {
+ AllUsesAcceptSReg = false;
+ SDNode * User = *U;
+ if (User->isMachineOpcode()) {
+ unsigned Opc = User->getMachineOpcode();
+ MCInstrDesc Desc = SII->get(Opc);
+ if (Desc.isCommutable()) {
+ unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
+ unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
+ unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
+ const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
+ if (CommutedRC == &AMDGPU::VS_32RegClass)
+ AllUsesAcceptSReg = true;
+ }
+ }
+ }
+ // If "AllUsesAcceptSReg == false" so far we haven't suceeded
+ // commuting current user. This means have at least one use
+ // that strictly require VGPR. Thus, we will not attempt to commute
+ // other user instructions.
+ if (!AllUsesAcceptSReg)
+ break;
+ }
+ }
+ return !AllUsesAcceptSReg && (Limit < 10);
+}
+
+bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
+ auto Ld = cast<LoadSDNode>(N);
+
+ return Ld->getAlignment() >= 4 &&
+ (
+ (
+ (
+ Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
+ )
+ &&
+ !N->isDivergent()
+ )
+ ||
+ (
+ Subtarget->getScalarizeGlobalBehavior() &&
+ Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ !Ld->isVolatile() &&
+ !N->isDivergent() &&
+ static_cast<const SITargetLowering *>(
+ getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
+ )
+ );
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
@@ -2152,10 +2158,10 @@ bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
- return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
- return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
}
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 21e44e9589d3..6951c915b177 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -128,10 +128,8 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
}
unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
- KnownBits Known;
EVT VT = Op.getValueType();
- DAG.computeKnownBits(Op, Known);
-
+ KnownBits Known = DAG.computeKnownBits(Op);
return VT.getSizeInBits() - Known.countMinLeadingZeros();
}
@@ -146,7 +144,6 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
- AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::LOAD, MVT::f32, Promote);
@@ -318,6 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG, MVT::f32, Custom);
setOperationAction(ISD::FLOG10, MVT::f32, Custom);
+ setOperationAction(ISD::FEXP, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
@@ -450,6 +448,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
@@ -470,6 +469,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
// This causes using an unrolled select operation rather than expansion with
@@ -550,6 +550,8 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case ISD::FMAD:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
case ISD::FSIN:
case ISD::FTRUNC:
case ISD::FRINT:
@@ -562,6 +564,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case AMDGPUISD::FMUL_LEGACY:
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY:
+ case AMDGPUISD::FMED3:
return true;
default:
return false;
@@ -650,8 +653,11 @@ bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
}
bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
- ISD::LoadExtType,
+ ISD::LoadExtType ExtTy,
EVT NewVT) const {
+ // TODO: This may be worth removing. Check regression tests for diffs.
+ if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
+ return false;
unsigned NewSize = NewVT.getStoreSizeInBits();
@@ -662,6 +668,18 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
EVT OldVT = N->getValueType(0);
unsigned OldSize = OldVT.getStoreSizeInBits();
+ MemSDNode *MN = cast<MemSDNode>(N);
+ unsigned AS = MN->getAddressSpace();
+ // Do not shrink an aligned scalar load to sub-dword.
+ // Scalar engine cannot do sub-dword loads.
+ if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 &&
+ (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ (isa<LoadSDNode>(N) &&
+ AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) &&
+ AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
+ return false;
+
// Don't produce extloads from sub 32-bit types. SI doesn't have scalar
// extloads, so doing one requires using a buffer_load. In cases where we
// still couldn't use a scalar load, using the wider load shouldn't really
@@ -722,7 +740,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
{
const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
if (L->getMemOperand()->getAddrSpace()
- == AMDGPUASI.CONSTANT_ADDRESS_32BIT)
+ == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return true;
return false;
}
@@ -1140,6 +1158,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F);
case ISD::FLOG10:
return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F);
+ case ISD::FEXP:
+ return lowerFEXP(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
@@ -1188,8 +1208,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
- if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
- G->getAddressSpace() == AMDGPUASI.REGION_ADDRESS) {
+ if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isEntryFunction()) {
const Function &Fn = DAG.getMachineFunction().getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
@@ -2213,6 +2233,34 @@ SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
}
+// Return M_LOG2E of appropriate type
+static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) {
+ switch (VT.getScalarType().getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT);
+ case MVT::f16:
+ return DAG.getConstantFP(
+ APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"),
+ SL, VT);
+ case MVT::f64:
+ return DAG.getConstantFP(
+ APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT);
+ default:
+ llvm_unreachable("unsupported fp type");
+ }
+}
+
+// exp2(M_LOG2E_F * f);
+SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ const SDValue K = getLog2EVal(DAG, SL, VT);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags());
+ return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags());
+}
+
static bool isCtlzOpc(unsigned Opc) {
return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
}
@@ -2669,21 +2717,33 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24;
}
-static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
- TargetLowering::DAGCombinerInfo &DCI) {
-
+static SDValue simplifyI24(SDNode *Node24,
+ TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
- SDValue Op = Node24->getOperand(OpIdx);
+ SDValue LHS = Node24->getOperand(0);
+ SDValue RHS = Node24->getOperand(1);
+
+ APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
+
+ // First try to simplify using GetDemandedBits which allows the operands to
+ // have other uses, but will only perform simplifications that involve
+ // bypassing some nodes for this user.
+ SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded);
+ SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded);
+ if (DemandedLHS || DemandedRHS)
+ return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(),
+ DemandedLHS ? DemandedLHS : LHS,
+ DemandedRHS ? DemandedRHS : RHS);
+
+ // Now try SimplifyDemandedBits which can simplify the nodes used by our
+ // operands if this node is the only user.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = Op.getValueType();
+ if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
+ return SDValue(Node24, 0);
+ if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
+ return SDValue(Node24, 0);
- APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
- if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO))
- return true;
-
- return false;
+ return SDValue();
}
template <typename IntTy>
@@ -2920,8 +2980,7 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
// shl (ext x) => zext (shl x), if shift does not overflow int
if (VT != MVT::i64)
break;
- KnownBits Known;
- DAG.computeKnownBits(X, Known);
+ KnownBits Known = DAG.computeKnownBits(X);
unsigned LZ = Known.countMinLeadingZeros();
if (LZ < RHSVal)
break;
@@ -3080,8 +3139,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
Src.getOpcode() == ISD::SRA ||
Src.getOpcode() == ISD::SHL)) {
SDValue Amt = Src.getOperand(1);
- KnownBits Known;
- DAG.computeKnownBits(Amt, Known);
+ KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
(Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) {
@@ -3233,8 +3291,8 @@ SDValue AMDGPUTargetLowering::performMulLoHi24Combine(
SelectionDAG &DAG = DCI.DAG;
// Simplify demanded bits before splitting into multiple users.
- if (simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI))
- return SDValue();
+ if (SDValue V = simplifyI24(N, DCI))
+ return V;
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3449,9 +3507,27 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
}
-static bool isConstantFPZero(SDValue N) {
- if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
- return C->isZero() && !C->isNegative();
+static bool isInv2Pi(const APFloat &APF) {
+ static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
+ static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
+ static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
+
+ return APF.bitwiseIsEqual(KF16) ||
+ APF.bitwiseIsEqual(KF32) ||
+ APF.bitwiseIsEqual(KF64);
+}
+
+// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
+// additional cost to negate them.
+bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
+ if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) {
+ if (C->isZero() && !C->isNegative())
+ return true;
+
+ if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
+ return true;
+ }
+
return false;
}
@@ -3461,6 +3537,10 @@ static unsigned inverseMinMax(unsigned Opc) {
return ISD::FMINNUM;
case ISD::FMINNUM:
return ISD::FMAXNUM;
+ case ISD::FMAXNUM_IEEE:
+ return ISD::FMINNUM_IEEE;
+ case ISD::FMINNUM_IEEE:
+ return ISD::FMAXNUM_IEEE;
case AMDGPUISD::FMAX_LEGACY:
return AMDGPUISD::FMIN_LEGACY;
case AMDGPUISD::FMIN_LEGACY:
@@ -3566,6 +3646,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
}
case ISD::FMAXNUM:
case ISD::FMINNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMIN_LEGACY: {
// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
@@ -3577,9 +3659,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue RHS = N0.getOperand(1);
// 0 doesn't have a negated inline immediate.
- // TODO: Shouldn't fold 1/2pi either, and should be generalized to other
- // operations.
- if (isConstantFPZero(RHS))
+ // TODO: This constant check should be generalized to other operations.
+ if (isConstantCostlierToNegate(RHS))
return SDValue();
SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
@@ -3591,6 +3672,16 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
}
+ case AMDGPUISD::FMED3: {
+ SDValue Ops[3];
+ for (unsigned I = 0; I < 3; ++I)
+ Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
+
+ SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
+ if (!N0.hasOneUse())
+ DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
+ return Res;
+ }
case ISD::FP_EXTEND:
case ISD::FTRUNC:
case ISD::FRINT:
@@ -3737,9 +3828,10 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
if (Src.getValueType() == MVT::i64) {
SDLoc SL(N);
uint64_t CVal = C->getZExtValue();
- return DAG.getNode(ISD::BUILD_VECTOR, SL, DestVT,
- DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
- DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+ DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
+ DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
}
}
@@ -3786,9 +3878,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::MUL_U24:
case AMDGPUISD::MULHI_I24:
case AMDGPUISD::MULHI_U24: {
- // If the first call to simplify is successfull, then N may end up being
- // deleted, so we shouldn't call simplifyI24 again.
- simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI);
+ if (SDValue V = simplifyI24(N, DCI))
+ return V;
return SDValue();
}
case AMDGPUISD::MUL_LOHI_I24:
@@ -3943,13 +4034,12 @@ SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Chain,
- SDValue StackPtr,
SDValue ArgVal,
int64_t Offset) const {
MachineFunction &MF = DAG.getMachineFunction();
MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
- SDValue Ptr = DAG.getObjectPtrOffset(SL, StackPtr, Offset);
+ SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4,
MachineMemOperand::MODereferenceable);
return Store;
@@ -4111,6 +4201,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_LOAD)
NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
+ NODE_NAME_CASE(SBUFFER_LOAD)
NODE_NAME_CASE(BUFFER_STORE)
NODE_NAME_CASE(BUFFER_STORE_FORMAT)
NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
@@ -4210,33 +4301,42 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
}
case AMDGPUISD::MUL_U24:
case AMDGPUISD::MUL_I24: {
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(Op.getOperand(0), LHSKnown, Depth + 1);
- DAG.computeKnownBits(Op.getOperand(1), RHSKnown, Depth + 1);
-
+ KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
RHSKnown.countMinTrailingZeros();
Known.Zero.setLowBits(std::min(TrailZ, 32u));
- unsigned LHSValBits = 32 - std::max(LHSKnown.countMinSignBits(), 8u);
- unsigned RHSValBits = 32 - std::max(RHSKnown.countMinSignBits(), 8u);
- unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
- if (MaxValBits >= 32)
- break;
+ // Truncate to 24 bits.
+ LHSKnown = LHSKnown.trunc(24);
+ RHSKnown = RHSKnown.trunc(24);
+
bool Negative = false;
if (Opc == AMDGPUISD::MUL_I24) {
- bool LHSNegative = !!(LHSKnown.One & (1 << 23));
- bool LHSPositive = !!(LHSKnown.Zero & (1 << 23));
- bool RHSNegative = !!(RHSKnown.One & (1 << 23));
- bool RHSPositive = !!(RHSKnown.Zero & (1 << 23));
+ unsigned LHSValBits = 24 - LHSKnown.countMinSignBits();
+ unsigned RHSValBits = 24 - RHSKnown.countMinSignBits();
+ unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+ if (MaxValBits >= 32)
+ break;
+ bool LHSNegative = LHSKnown.isNegative();
+ bool LHSPositive = LHSKnown.isNonNegative();
+ bool RHSNegative = RHSKnown.isNegative();
+ bool RHSPositive = RHSKnown.isNonNegative();
if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive))
break;
Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative);
- }
- if (Negative)
- Known.One.setHighBits(32 - MaxValBits);
- else
+ if (Negative)
+ Known.One.setHighBits(32 - MaxValBits);
+ else
+ Known.Zero.setHighBits(32 - MaxValBits);
+ } else {
+ unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros();
+ unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros();
+ unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+ if (MaxValBits >= 32)
+ break;
Known.Zero.setHighBits(32 - MaxValBits);
+ }
break;
}
case AMDGPUISD::PERM: {
@@ -4244,9 +4344,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
if (!CMask)
return;
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(Op.getOperand(0), LHSKnown, Depth + 1);
- DAG.computeKnownBits(Op.getOperand(1), RHSKnown, Depth + 1);
+ KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
unsigned Sel = CMask->getZExtValue();
for (unsigned I = 0; I < 32; I += 8) {
@@ -4320,3 +4419,107 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
}
+
+bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case AMDGPUISD::FMIN_LEGACY:
+ case AMDGPUISD::FMAX_LEGACY: {
+ if (SNaN)
+ return true;
+
+ // TODO: Can check no nans on one of the operands for each one, but which
+ // one?
+ return false;
+ }
+ case AMDGPUISD::FMUL_LEGACY:
+ case AMDGPUISD::CVT_PKRTZ_F16_F32: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case AMDGPUISD::FMED3:
+ case AMDGPUISD::FMIN3:
+ case AMDGPUISD::FMAX3:
+ case AMDGPUISD::FMAD_FTZ: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ }
+ case AMDGPUISD::CVT_F32_UBYTE0:
+ case AMDGPUISD::CVT_F32_UBYTE1:
+ case AMDGPUISD::CVT_F32_UBYTE2:
+ case AMDGPUISD::CVT_F32_UBYTE3:
+ return true;
+
+ case AMDGPUISD::RCP:
+ case AMDGPUISD::RSQ:
+ case AMDGPUISD::RCP_LEGACY:
+ case AMDGPUISD::RSQ_LEGACY:
+ case AMDGPUISD::RSQ_CLAMP: {
+ if (SNaN)
+ return true;
+
+ // TODO: Need is known positive check.
+ return false;
+ }
+ case AMDGPUISD::LDEXP:
+ case AMDGPUISD::FRACT: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case AMDGPUISD::DIV_SCALE:
+ case AMDGPUISD::DIV_FMAS:
+ case AMDGPUISD::DIV_FIXUP:
+ case AMDGPUISD::TRIG_PREOP:
+ // TODO: Refine on operands.
+ return SNaN;
+ case AMDGPUISD::SIN_HW:
+ case AMDGPUISD::COS_HW: {
+ // TODO: Need check for infinity
+ return SNaN;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID
+ = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ // TODO: Handle more intrinsics
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_cubeid:
+ return true;
+
+ case Intrinsic::amdgcn_frexp_mant: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ }
+ case Intrinsic::amdgcn_fdot2:
+ // TODO: Refine on operand
+ return SNaN;
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
+
+TargetLowering::AtomicExpansionKind
+AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ if (RMW->getOperation() == AtomicRMWInst::Nand)
+ return AtomicExpansionKind::CmpXChg;
+ return AtomicExpansionKind::None;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index a4c3b413e103..0d22cb2e3e20 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -41,8 +41,6 @@ public:
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
protected:
- AMDGPUAS AMDGPUASI;
-
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
/// Split a vector store into multiple scalar stores.
@@ -58,8 +56,9 @@ protected:
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFLOG(SDValue Op, SelectionDAG &Dag,
+ SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG,
double Log2BaseInverted) const;
+ SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
@@ -95,6 +94,8 @@ protected:
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isConstantCostlierToNegate(SDValue N) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -246,6 +247,11 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN = false,
+ unsigned Depth = 0) const override;
+
/// Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
///
@@ -279,7 +285,6 @@ public:
SDValue storeStackInputValue(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Chain,
- SDValue StackPtr,
SDValue ArgVal,
int64_t Offset) const;
@@ -299,13 +304,11 @@ public:
uint32_t getImplicitParameterOffset(const MachineFunction &MF,
const ImplicitParameter Param) const;
- AMDGPUAS getAMDGPUAS() const {
- return AMDGPUASI;
- }
-
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
+
+ AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
};
namespace AMDGPUISD {
@@ -357,6 +360,7 @@ enum NodeType : unsigned {
SIN_HW,
FMAX_LEGACY,
FMIN_LEGACY,
+
FMAX3,
SMAX3,
UMAX3,
@@ -479,6 +483,7 @@ enum NodeType : unsigned {
BUFFER_LOAD,
BUFFER_LOAD_FORMAT,
BUFFER_LOAD_FORMAT_D16,
+ SBUFFER_LOAD,
BUFFER_STORE,
BUFFER_STORE_FORMAT,
BUFFER_STORE_FORMAT_D16,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index 35dd9eb0a478..945c9acd379a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -44,7 +44,7 @@ ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
cl::desc("Cost of alloca argument"));
// If the amount of scratch memory to eliminate exceeds our ability to allocate
-// it into registers we gain nothing by agressively inlining functions for that
+// it into registers we gain nothing by aggressively inlining functions for that
// heuristic.
static cl::opt<unsigned>
ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
@@ -118,8 +118,6 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
if (!Callee)
return (unsigned)Thres;
- const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*Caller->getParent());
-
// If we have a pointer to private array passed into a function
// it will not be optimized out, leaving scratch usage.
// Increase the inline threshold to allow inliniting in this case.
@@ -128,7 +126,7 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
for (Value *PtrArg : CS.args()) {
Type *Ty = PtrArg->getType();
if (!Ty->isPointerTy() ||
- Ty->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
+ Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
PtrArg = GetUnderlyingObject(PtrArg, DL);
if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
@@ -174,18 +172,23 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
Function *Caller = CS.getCaller();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
- if (!Callee || Callee->isDeclaration() || CS.isNoInline() ||
- !TTI.areInlineCompatible(Caller, Callee))
- return llvm::InlineCost::getNever();
+ if (!Callee || Callee->isDeclaration())
+ return llvm::InlineCost::getNever("undefined callee");
+
+ if (CS.isNoInline())
+ return llvm::InlineCost::getNever("noinline");
+
+ if (!TTI.areInlineCompatible(Caller, Callee))
+ return llvm::InlineCost::getNever("incompatible");
if (CS.hasFnAttr(Attribute::AlwaysInline)) {
if (isInlineViable(*Callee))
- return llvm::InlineCost::getAlways();
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getAlways("alwaysinline viable");
+ return llvm::InlineCost::getNever("alwaysinline unviable");
}
if (isWrapperOnlyCall(CS))
- return llvm::InlineCost::getAlways();
+ return llvm::InlineCost::getAlways("wrapper-only call");
InlineParams LocalParams = Params;
LocalParams.DefaultThreshold = (int)getInlineThreshold(CS);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 7442a59e594f..82644be26563 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -62,18 +62,10 @@ def AMDGPULoopOp : SDTypeProfile<0, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
>;
-def AMDGPUBreakOp : SDTypeProfile<1, 1,
- [SDTCisVT<0, i64>, SDTCisVT<1, i64>]
->;
-
def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
>;
-def AMDGPUElseBreakOp : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>]
->;
-
def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
[SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 219d430fbb39..8eb49d49b2e0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -55,7 +55,6 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
#define GET_GLOBALISEL_TEMPORARIES_INIT
#include "AMDGPUGenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_INIT
- ,AMDGPUASI(STI.getAMDGPUAS())
{
}
@@ -506,8 +505,8 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
if (!I.hasOneMemOperand())
return false;
- if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
- (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT)
+ if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+ (*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return false;
if (!isInstrUniform(I))
@@ -631,6 +630,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
return selectImpl(I, CoverageInfo);
case TargetOpcode::G_ADD:
return selectG_ADD(I);
+ case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_BITCAST:
return selectCOPY(I);
case TargetOpcode::G_CONSTANT:
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 68b40b20aca2..449431adc561 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -105,9 +105,6 @@ private:
#define GET_GLOBALISEL_TEMPORARIES_DECL
#include "AMDGPUGenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_DECL
-
-protected:
- AMDGPUAS AMDGPUASI;
};
} // End llvm namespace.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index c9c932ef2f5f..eb8f2002ff2d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -135,6 +135,12 @@ def brtarget : Operand<OtherVT>;
// Misc. PatFrags
//===----------------------------------------------------------------------===//
+class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
+ (ops node:$src0),
+ (op $src0),
+ [{ return N->hasOneUse(); }]
+>;
+
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
(ops node:$src0, node:$src1),
(op $src0, $src1),
@@ -152,13 +158,21 @@ def smax_oneuse : HasOneUseBinOp<smax>;
def smin_oneuse : HasOneUseBinOp<smin>;
def umax_oneuse : HasOneUseBinOp<umax>;
def umin_oneuse : HasOneUseBinOp<umin>;
+
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
+
+def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
+def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
+
+
def and_oneuse : HasOneUseBinOp<and>;
def or_oneuse : HasOneUseBinOp<or>;
def xor_oneuse : HasOneUseBinOp<xor>;
} // Properties = [SDNPCommutative, SDNPAssociative]
+def not_oneuse : HasOneUseUnaryOp<not>;
+
def add_oneuse : HasOneUseBinOp<add>;
def sub_oneuse : HasOneUseBinOp<sub>;
@@ -167,6 +181,9 @@ def shl_oneuse : HasOneUseBinOp<shl>;
def select_oneuse : HasOneUseTernaryOp<select>;
+def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
+def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
+
def srl_16 : PatFrag<
(ops node:$src0), (srl_oneuse node:$src0, (i32 16))
>;
@@ -328,37 +345,37 @@ class StoreHi16<SDPatternOperator op> : PatFrag <
>;
class PrivateAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
}]>;
class ConstantAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
class LocalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
class GlobalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class GlobalLoadAddress : CodePatPred<[{
auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
+ return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
class FlatLoadAddress : CodePatPred<[{
const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS;
+ return AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
class FlatStoreAddress : CodePatPred<[{
const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.GLOBAL_ADDRESS;
+ return AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
@@ -480,7 +497,7 @@ def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
@@ -497,14 +514,14 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
(ops node:$ptr, node:$cmp, node:$swap),
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
AtomicSDNode *AN = cast<AtomicSDNode>(N);
- return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+ return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
@@ -513,17 +530,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> {
def "" : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
def _noret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def _ret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
}
defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
@@ -550,12 +567,12 @@ def atomic_cmp_swap_global : PatFrag<
def atomic_cmp_swap_global_noret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def atomic_cmp_swap_global_ret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
@@ -787,18 +804,30 @@ class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(BIT_ALIGN $src0, $src0, $src1)
>;
-// This matches 16 permutations of
-// max(min(x, y), min(max(x, y), z))
-class IntMed3Pat<Instruction med3Inst,
+multiclass IntMed3Pat<Instruction med3Inst,
+ SDPatternOperator min,
SDPatternOperator max,
- SDPatternOperator max_oneuse,
SDPatternOperator min_oneuse,
- ValueType vt = i32> : AMDGPUPat<
+ SDPatternOperator max_oneuse,
+ ValueType vt = i32> {
+
+ // This matches 16 permutations of
+ // min(max(a, b), max(min(a, b), c))
+ def : AMDGPUPat <
+ (min (max_oneuse vt:$src0, vt:$src1),
+ (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
+ (med3Inst vt:$src0, vt:$src1, vt:$src2)
+>;
+
+ // This matches 16 permutations of
+ // max(min(x, y), min(max(x, y), z))
+ def : AMDGPUPat <
(max (min_oneuse vt:$src0, vt:$src1),
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst $src0, $src1, $src2)
>;
-
+}
+
// Special conversion patterns
def cvt_rpi_i32_f32 : PatFrag <
@@ -813,6 +842,7 @@ def cvt_flr_i32_f32 : PatFrag <
[{ (void)N; return TM.Options.NoNaNsFPMath; }]
>;
+let AddedComplexity = 2 in {
class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
(add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
!if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
@@ -824,6 +854,7 @@ class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
!if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
(Inst $src0, $src1, $src2))
>;
+} // AddedComplexity.
class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
(fdiv FP_ONE, vt:$src),
@@ -834,3 +865,25 @@ class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
(AMDGPUrcp (fsqrt vt:$src)),
(RsqInst $src)
>;
+
+// Instructions which select to the same v_min_f*
+def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
+ [(fminnum_ieee node:$src0, node:$src1),
+ (fminnum node:$src0, node:$src1)]
+>;
+
+// Instructions which select to the same v_max_f*
+def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
+ [(fmaxnum_ieee node:$src0, node:$src1),
+ (fmaxnum node:$src0, node:$src1)]
+>;
+
+def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
+ [(fminnum_ieee_oneuse node:$src0, node:$src1),
+ (fminnum_oneuse node:$src0, node:$src1)]
+>;
+
+def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
+ [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
+ (fmaxnum_oneuse node:$src0, node:$src1)]
+>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
index 896e2055cf62..02108ca3ddd7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
@@ -40,7 +40,7 @@ StringRef AMDGPUIntrinsicInfo::getName(unsigned IntrID,
if (IntrID < Intrinsic::num_intrinsics)
return StringRef();
- assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
+ assert(IntrID < SIIntrinsic::num_AMDGPU_intrinsics &&
"Invalid intrinsic ID");
return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics];
@@ -91,7 +91,7 @@ Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
= cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
AttributeList AS =
- getAttributes(M->getContext(), static_cast<AMDGPUIntrinsic::ID>(IntrID));
+ getAttributes(M->getContext(), static_cast<SIIntrinsic::ID>(IntrID));
F->setAttributes(AS);
return F;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
index ef42f9a319af..a1a094dded23 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
@@ -20,7 +20,7 @@
namespace llvm {
class TargetMachine;
-namespace AMDGPUIntrinsic {
+namespace SIIntrinsic {
enum ID {
last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
#define GET_INTRINSIC_ENUM_VALUES
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 87b072c9ea20..ef85c1040545 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -32,20 +32,52 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
};
- auto AMDGPUAS = ST.getAMDGPUAS();
-
const LLT S1 = LLT::scalar(1);
- const LLT V2S16 = LLT::vector(2, 16);
-
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT S512 = LLT::scalar(512);
+ const LLT V2S16 = LLT::vector(2, 16);
+ const LLT V4S16 = LLT::vector(4, 16);
+ const LLT V8S16 = LLT::vector(8, 16);
+
+ const LLT V2S32 = LLT::vector(2, 32);
+ const LLT V3S32 = LLT::vector(3, 32);
+ const LLT V4S32 = LLT::vector(4, 32);
+ const LLT V5S32 = LLT::vector(5, 32);
+ const LLT V6S32 = LLT::vector(6, 32);
+ const LLT V7S32 = LLT::vector(7, 32);
+ const LLT V8S32 = LLT::vector(8, 32);
+ const LLT V9S32 = LLT::vector(9, 32);
+ const LLT V10S32 = LLT::vector(10, 32);
+ const LLT V11S32 = LLT::vector(11, 32);
+ const LLT V12S32 = LLT::vector(12, 32);
+ const LLT V13S32 = LLT::vector(13, 32);
+ const LLT V14S32 = LLT::vector(14, 32);
+ const LLT V15S32 = LLT::vector(15, 32);
+ const LLT V16S32 = LLT::vector(16, 32);
+
+ const LLT V2S64 = LLT::vector(2, 64);
+ const LLT V3S64 = LLT::vector(3, 64);
+ const LLT V4S64 = LLT::vector(4, 64);
+ const LLT V5S64 = LLT::vector(5, 64);
+ const LLT V6S64 = LLT::vector(6, 64);
+ const LLT V7S64 = LLT::vector(7, 64);
+ const LLT V8S64 = LLT::vector(8, 64);
+
+ std::initializer_list<LLT> AllS32Vectors =
+ {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
+ V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
+ std::initializer_list<LLT> AllS64Vectors =
+ {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
+
const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
- const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS.FLAT_ADDRESS);
- const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS.PRIVATE_ADDRESS);
+ const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
+ const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
+
+ const LLT CodePtr = FlatPtr;
const LLT AddrSpaces[] = {
GlobalPtr,
@@ -55,13 +87,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
PrivatePtr
};
+ setAction({G_BRCOND, S1}, Legal);
+
setAction({G_ADD, S32}, Legal);
setAction({G_ASHR, S32}, Legal);
setAction({G_SUB, S32}, Legal);
setAction({G_MUL, S32}, Legal);
- setAction({G_AND, S32}, Legal);
- setAction({G_OR, S32}, Legal);
- setAction({G_XOR, S32}, Legal);
+
+ // FIXME: 64-bit ones only legal for scalar
+ getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
+ .legalFor({S32, S1, S64, V2S32});
+
+ getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
+ G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
+ .legalFor({{S32, S1}});
setAction({G_BITCAST, V2S16}, Legal);
setAction({G_BITCAST, 1, S32}, Legal);
@@ -90,35 +129,80 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
// between these two scenarios.
setAction({G_CONSTANT, S1}, Legal);
- setAction({G_FADD, S32}, Legal);
+ setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
+
+ getActionDefinitionsBuilder(
+ { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
+ .legalFor({S32, S64});
+
+ getActionDefinitionsBuilder(G_FPTRUNC)
+ .legalFor({{S32, S64}});
+
+ // Use actual fsub instruction
+ setAction({G_FSUB, S32}, Legal);
+
+ // Must use fadd + fneg
+ setAction({G_FSUB, S64}, Lower);
setAction({G_FCMP, S1}, Legal);
setAction({G_FCMP, 1, S32}, Legal);
setAction({G_FCMP, 1, S64}, Legal);
- setAction({G_FMUL, S32}, Legal);
-
setAction({G_ZEXT, S64}, Legal);
setAction({G_ZEXT, 1, S32}, Legal);
+ setAction({G_SEXT, S64}, Legal);
+ setAction({G_SEXT, 1, S32}, Legal);
+
+ setAction({G_ANYEXT, S64}, Legal);
+ setAction({G_ANYEXT, 1, S32}, Legal);
+
setAction({G_FPTOSI, S32}, Legal);
setAction({G_FPTOSI, 1, S32}, Legal);
setAction({G_SITOFP, S32}, Legal);
setAction({G_SITOFP, 1, S32}, Legal);
+ setAction({G_UITOFP, S32}, Legal);
+ setAction({G_UITOFP, 1, S32}, Legal);
+
setAction({G_FPTOUI, S32}, Legal);
setAction({G_FPTOUI, 1, S32}, Legal);
+ setAction({G_FPOW, S32}, Legal);
+ setAction({G_FEXP2, S32}, Legal);
+ setAction({G_FLOG2, S32}, Legal);
+
+ getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND})
+ .legalFor({S32, S64});
+
for (LLT PtrTy : AddrSpaces) {
LLT IdxTy = LLT::scalar(PtrTy.getSizeInBits());
setAction({G_GEP, PtrTy}, Legal);
setAction({G_GEP, 1, IdxTy}, Legal);
}
+ setAction({G_BLOCK_ADDR, CodePtr}, Legal);
+
setAction({G_ICMP, S1}, Legal);
setAction({G_ICMP, 1, S32}, Legal);
+ setAction({G_CTLZ, S32}, Legal);
+ setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal);
+ setAction({G_CTTZ, S32}, Legal);
+ setAction({G_CTTZ_ZERO_UNDEF, S32}, Legal);
+ setAction({G_BSWAP, S32}, Legal);
+ setAction({G_CTPOP, S32}, Legal);
+
+ getActionDefinitionsBuilder(G_INTTOPTR)
+ .legalIf([](const LegalityQuery &Query) {
+ return true;
+ });
+
+ getActionDefinitionsBuilder(G_PTRTOINT)
+ .legalIf([](const LegalityQuery &Query) {
+ return true;
+ });
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.legalIf([=, &ST](const LegalityQuery &Query) {
@@ -145,6 +229,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
});
+ auto &Atomics = getActionDefinitionsBuilder(
+ {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
+ G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
+ G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
+ G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
+ .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
+ {S64, GlobalPtr}, {S64, LocalPtr}});
+ if (ST.hasFlatAddressSpace()) {
+ Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
+ }
setAction({G_SELECT, S32}, Legal);
setAction({G_SELECT, 1, S1}, Legal);
@@ -180,6 +274,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
(Ty1.getSizeInBits() % 32 == 0);
});
+ getActionDefinitionsBuilder(G_BUILD_VECTOR)
+ .legalForCartesianProduct(AllS32Vectors, {S32})
+ .legalForCartesianProduct(AllS64Vectors, {S64})
+ .clampNumElements(0, V16S32, V16S32)
+ .clampNumElements(0, V2S64, V8S64)
+ .minScalarSameAs(1, 0);
+
+ // TODO: Support any combination of v2s32
+ getActionDefinitionsBuilder(G_CONCAT_VECTORS)
+ .legalFor({{V4S32, V2S32},
+ {V8S32, V2S32},
+ {V8S32, V4S32},
+ {V4S64, V2S64},
+ {V4S16, V2S16},
+ {V8S16, V2S16},
+ {V8S16, V4S16}});
+
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 7a7ed7a4f065..14e880042691 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1333,8 +1333,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// for OpenCL 2.0 we have only generic implementation of sincos
// function.
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
- const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
- nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS);
+ nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
if (!Fsincos) return false;
@@ -1347,7 +1346,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// The allocaInst allocates the memory in private address space. This need
// to be bitcasted to point to the address space of cos pointer type.
// In OpenCL 2.0 this is generic, while in 1.2 that is private.
- if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
+ if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
P = B.CreateAddrSpaceCast(Alloc, PTy);
CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index f37795e961e8..4fc3fe0f105b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -995,8 +995,10 @@ Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
} else {
AttributeList Attr;
LLVMContext &Ctx = M->getContext();
- Attr.addAttribute(Ctx, AttributeList::FunctionIndex, Attribute::ReadOnly);
- Attr.addAttribute(Ctx, AttributeList::FunctionIndex, Attribute::NoUnwind);
+ Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
+ Attribute::ReadOnly);
+ Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
C = M->getOrInsertFunction(FuncName, FuncTy, Attr);
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index c147830e12ed..743dc7a0d00b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -16,7 +16,6 @@
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -84,8 +83,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
return false;
CallInst *KernArgSegment =
- Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, nullptr,
- F.getName() + ".kernarg.segment");
+ Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
+ nullptr, F.getName() + ".kernarg.segment");
KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
KernArgSegment->addAttribute(AttributeList::ReturnIndex,
@@ -123,14 +122,17 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
VectorType *VT = dyn_cast<VectorType>(ArgTy);
bool IsV3 = VT && VT->getNumElements() == 3;
+ bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();
+
VectorType *V4Ty = nullptr;
int64_t AlignDownOffset = alignDown(EltOffset, 4);
int64_t OffsetDiff = EltOffset - AlignDownOffset;
- unsigned AdjustedAlign = MinAlign(KernArgBaseAlign, AlignDownOffset);
+ unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
+ KernArgBaseAlign);
Value *ArgPtr;
- if (Size < 32 && !ArgTy->isAggregateType()) { // FIXME: Handle aggregate types
+ if (DoShiftOpt) { // FIXME: Handle aggregate types
// Since we don't have sub-dword scalar loads, avoid doing an extload by
// loading earlier than the argument address, and extracting the relevant
// bits.
@@ -148,7 +150,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
} else {
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
KernArgSegment,
- AlignDownOffset,
+ EltOffset,
Arg.getName() + ".kernarg.offset");
ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
ArgPtr->getName() + ".cast");
@@ -199,7 +201,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// TODO: Convert noalias arg to !noalias
- if (Size < 32 && !ArgTy->isAggregateType()) {
+ if (DoShiftOpt) {
Value *ExtractBits = OffsetDiff == 0 ?
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 1876dc3f7122..f6bdbf5e9be2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -301,6 +301,26 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
+#ifdef EXPENSIVE_CHECKS
+ // Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot
+ // work correctly for the generic CPU).
+ //
+ // The isPseudo check really shouldn't be here, but unfortunately there are
+ // some negative lit tests that depend on being able to continue through
+ // here even when pseudo instructions haven't been lowered.
+ if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU())) {
+ SmallVector<MCFixup, 4> Fixups;
+ SmallVector<char, 16> CodeBytes;
+ raw_svector_ostream CodeStream(CodeBytes);
+
+ std::unique_ptr<MCCodeEmitter> InstEmitter(createSIMCCodeEmitter(
+ *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext));
+ InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI);
+
+ assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI));
+ }
+#endif
+
if (STI.dumpCode()) {
// Disassemble instruction/operands to text.
DisasmLines.resize(DisasmLines.size() + 1);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index 995d9ae3907f..5e0b7d429022 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -42,9 +42,12 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
if (!FirstMI)
return true;
+ const MachineBasicBlock &MBB = *FirstMI->getParent();
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
const MachineOperand *Src2 = TII.getNamedOperand(SecondMI,
AMDGPU::OpName::src2);
- return FirstMI->definesRegister(Src2->getReg());
+ return FirstMI->definesRegister(Src2->getReg(), TRI);
}
default:
return false;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
index b50a2eb8e9e7..2feff14d34a1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -23,7 +23,8 @@ namespace ElfNote {
const char SectionName[] = ".note";
-const char NoteName[] = "AMD";
+const char NoteNameV2[] = "AMD";
+const char NoteNameV3[] = "AMDGPU";
// TODO: Remove this file once we drop code object v2.
enum NoteType{
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 3cfdccc9fe51..e53a8fe7c074 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -99,8 +99,6 @@ private:
const DataLayout *DL;
- AMDGPUAS AS;
-
const TargetLowering *TLI;
void visit(const Function &F);
@@ -267,7 +265,6 @@ void AMDGPUPerfHint::runOnFunction(Function &F) {
const Module &M = *F.getParent();
DL = &M.getDataLayout();
- AS = AMDGPU::getAMDGPUAS(M);
visit(F);
auto Loc = FIM.find(&F);
@@ -306,14 +303,14 @@ bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
if (auto PT = dyn_cast<PointerType>(V->getType())) {
unsigned As = PT->getAddressSpace();
// Flat likely points to global too.
- return As == AS.GLOBAL_ADDRESS || As == AS.FLAT_ADDRESS;
+ return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
}
return false;
}
bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
if (auto PT = dyn_cast<PointerType>(V->getType()))
- return PT->getAddressSpace() == AS.LOCAL_ADDRESS;
+ return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
return false;
}
@@ -346,7 +343,8 @@ AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
if (auto PT = dyn_cast<PointerType>(V->getType())) {
unsigned As = PT->getAddressSpace();
- return As == AS.CONSTANT_ADDRESS || As == AS.CONSTANT_ADDRESS_32BIT;
+ return As == AMDGPUAS::CONSTANT_ADDRESS ||
+ As == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
}
return false;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index d341fec6296f..5d087c099184 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -70,13 +70,17 @@ static cl::opt<bool> DisablePromoteAllocaToVector(
cl::desc("Disable promote alloca to vector"),
cl::init(false));
+static cl::opt<bool> DisablePromoteAllocaToLDS(
+ "disable-promote-alloca-to-lds",
+ cl::desc("Disable promote alloca to LDS"),
+ cl::init(false));
+
// FIXME: This can create globals so should be a module pass.
class AMDGPUPromoteAlloca : public FunctionPass {
private:
const TargetMachine *TM;
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
- AMDGPUAS AS;
// FIXME: This should be per-kernel.
uint32_t LocalMemLimit = 0;
@@ -156,8 +160,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
if (!ST.isPromoteAllocaEnabled())
return false;
- AS = AMDGPU::getAMDGPUAS(*F.getParent());
-
bool SufficientLDS = hasSufficientLocalMem(F);
bool Changed = false;
BasicBlock &EntryBB = *F.begin();
@@ -238,7 +240,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
Type *I32Ty = Type::getInt32Ty(Mod->getContext());
Value *CastDispatchPtr = Builder.CreateBitCast(
- DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
+ DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
// We could do a single 64-bit load here, but it's likely that the basic
// 32-bit and extract sequence is already present, and it is probably easier
@@ -326,6 +328,10 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
// Currently only handle the case where the Pointer Operand is a GEP.
// Also we could not vectorize volatile or atomic loads.
LoadInst *LI = cast<LoadInst>(Inst);
+ if (isa<AllocaInst>(User) &&
+ LI->getPointerOperandType() == User->getType() &&
+ isa<VectorType>(LI->getType()))
+ return true;
return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple();
}
case Instruction::BitCast:
@@ -335,6 +341,10 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
// since it should be canonical form, the User should be a GEP.
// Also we could not vectorize volatile or atomic stores.
StoreInst *SI = cast<StoreInst>(Inst);
+ if (isa<AllocaInst>(User) &&
+ SI->getPointerOperandType() == User->getType() &&
+ isa<VectorType>(SI->getValueOperand()->getType()))
+ return true;
return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple();
}
default:
@@ -342,14 +352,15 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
}
}
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
if (DisablePromoteAllocaToVector) {
LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
return false;
}
- ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
+ Type *AT = Alloca->getAllocatedType();
+ SequentialType *AllocaTy = dyn_cast<SequentialType>(AT);
LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
@@ -396,7 +407,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
}
}
- VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
+ VectorType *VectorTy = dyn_cast<VectorType>(AllocaTy);
+ if (!VectorTy)
+ VectorTy = arrayTypeToVecType(cast<ArrayType>(AllocaTy));
LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
<< *VectorTy << '\n');
@@ -406,7 +419,10 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
- Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
+ if (Inst->getType() == AT)
+ break;
+
+ Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@@ -418,9 +434,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
break;
}
case Instruction::Store: {
- Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
-
StoreInst *SI = cast<StoreInst>(Inst);
+ if (SI->getValueOperand()->getType() == AT)
+ break;
+
+ Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
@@ -610,7 +628,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
// we cannot use local memory in the pass.
for (Type *ParamTy : FTy->params()) {
PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
- if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
+ if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
LocalMemLimit = 0;
LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
"local memory disabled.\n");
@@ -627,7 +645,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
// Check how much local memory is being used by global objects
CurrentLocalMemUsage = 0;
for (GlobalVariable &GV : Mod->globals()) {
- if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
+ if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
continue;
for (const User *U : GV.users()) {
@@ -706,9 +724,12 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
- if (tryPromoteAllocaToVector(&I, AS))
+ if (tryPromoteAllocaToVector(&I))
return true; // Promoted to vector.
+ if (DisablePromoteAllocaToLDS)
+ return false;
+
const Function &ContainingFunction = *I.getParent()->getParent();
CallingConv::ID CC = ContainingFunction.getCallingConv();
@@ -775,7 +796,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
Twine(F->getName()) + Twine('.') + I.getName(),
nullptr,
GlobalVariable::NotThreadLocal,
- AS.LOCAL_ADDRESS);
+ AMDGPUAS::LOCAL_ADDRESS);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
GV->setAlignment(I.getAlignment());
@@ -808,7 +829,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
Value *Src0 = CI->getOperand(0);
Type *EltTy = Src0->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
if (isa<ConstantPointerNull>(CI->getOperand(0)))
CI->setOperand(0, ConstantPointerNull::get(NewTy));
@@ -825,7 +846,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
continue;
Type *EltTy = V->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
@@ -894,7 +915,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
Type *SrcTy = Src->getType()->getPointerElementType();
Function *ObjectSize = Intrinsic::getDeclaration(Mod,
Intrinsic::objectsize,
- { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
+ { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
);
CallInst *NewCall = Builder.CreateCall(
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 012e4fe200aa..7a760dcf7a90 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -35,7 +35,7 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
: AMDGPUGenRegisterBankInfo(),
TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
- // HACK: Until this is fully tablegen'd
+ // HACK: Until this is fully tablegen'd.
static bool AlreadyInit = false;
if (AlreadyInit)
return;
@@ -74,13 +74,16 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
const RegisterBank &Src,
unsigned Size) const {
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
- Src.getID() == AMDGPU::VGPRRegBankID)
+ Src.getID() == AMDGPU::VGPRRegBankID) {
return std::numeric_limits<unsigned>::max();
+ }
// SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
// the valu.
if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
- Src.getID() == AMDGPU::SGPRRegBankID)
+ (Src.getID() == AMDGPU::SGPRRegBankID ||
+ Src.getID() == AMDGPU::VGPRRegBankID ||
+ Src.getID() == AMDGPU::VCCRegBankID))
return std::numeric_limits<unsigned>::max();
return RegisterBankInfo::copyCost(Dst, Src, Size);
@@ -145,7 +148,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&SSMapping);
const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr, // Predicate operand.
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
@@ -153,7 +156,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&SVMapping);
const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr, // Predicate operand.
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
@@ -161,7 +164,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&VSMapping);
const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr, // Predicate operand.
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
@@ -170,6 +173,67 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
return AltMappings;
}
+ case TargetOpcode::G_SELECT: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+ 4); // Num Operands
+ AltMappings.push_back(&SSMapping);
+
+ const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
+ 4); // Num Operands
+ AltMappings.push_back(&VVMapping);
+
+ return AltMappings;
+ }
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_SSUBE: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
+ getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
+ 5); // Num Operands
+ AltMappings.push_back(&SSMapping);
+
+ const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
+ 5); // Num Operands
+ AltMappings.push_back(&VVMapping);
+ return AltMappings;
+ }
+ case AMDGPU::G_BRCOND: {
+ assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
+
+ const InstructionMapping &SMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
+ 2); // Num Operands
+ AltMappings.push_back(&SMapping);
+
+ const InstructionMapping &VMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
+ 2); // Num Operands
+ AltMappings.push_back(&VMapping);
+ return AltMappings;
+ }
default:
break;
}
@@ -193,10 +257,16 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
unsigned Reg = MI.getOperand(i).getReg();
- const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
- if (Bank && Bank->getID() != AMDGPU::SGPRRegBankID)
- return false;
+ if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
+ if (Bank->getID() == AMDGPU::VGPRRegBankID)
+ return false;
+
+ assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
+ Bank->getID() == AMDGPU::SCCRegBankID);
+ }
}
return true;
}
@@ -209,7 +279,8 @@ AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
- OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
+ OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
MI.getNumOperands());
@@ -230,12 +301,32 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
unsigned Reg1 = MI.getOperand(OpdIdx).getReg();
unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
- unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI);
+
+ unsigned DefaultBankID = Size1 == 1 ?
+ AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
+ unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
+
OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
unsigned Size = getSizeInBits(MI.getOperand(OpdIdx).getReg(), MRI, *TRI);
- OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
+ OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
+ }
+
+ return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
+ MI.getNumOperands());
+}
+
+const RegisterBankInfo::InstructionMapping &
+AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
@@ -304,21 +395,49 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
return getInvalidInstructionMapping();
+
+ case AMDGPU::G_AND:
+ case AMDGPU::G_OR:
+ case AMDGPU::G_XOR: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (Size == 1) {
+ OpdsMapping[0] = OpdsMapping[1] =
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
case AMDGPU::G_MUL:
- case AMDGPU::G_AND:
- case AMDGPU::G_OR:
- case AMDGPU::G_XOR:
case AMDGPU::G_SHL:
+ case AMDGPU::G_UADDO:
+ case AMDGPU::G_SADDO:
+ case AMDGPU::G_USUBO:
+ case AMDGPU::G_SSUBO:
+ case AMDGPU::G_UADDE:
+ case AMDGPU::G_SADDE:
+ case AMDGPU::G_USUBE:
+ case AMDGPU::G_SSUBE:
if (isSALUMapping(MI))
return getDefaultMappingSOP(MI);
- // Fall-through
+ LLVM_FALLTHROUGH;
case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
case AMDGPU::G_FPTOSI:
case AMDGPU::G_FPTOUI:
case AMDGPU::G_FMUL:
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_SITOFP:
+ case AMDGPU::G_UITOFP:
+ case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FEXP2:
+ case AMDGPU::G_FLOG2:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_INTRINSIC_ROUND:
return getDefaultMappingVOP(MI);
case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -326,11 +445,25 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_FCONSTANT:
- case AMDGPU::G_CONSTANT: {
+ case AMDGPU::G_CONSTANT:
+ case AMDGPU::G_FRAME_INDEX:
+ case AMDGPU::G_BLOCK_ADDR: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
+ case AMDGPU::G_INSERT: {
+ unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
+ AMDGPU::VGPRRegBankID;
+ unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
+ OpdsMapping[3] = nullptr;
+ break;
+ }
case AMDGPU::G_EXTRACT: {
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
@@ -352,7 +485,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
break;
}
- case AMDGPU::G_BITCAST: {
+ case AMDGPU::G_BITCAST:
+ case AMDGPU::G_INTTOPTR:
+ case AMDGPU::G_PTRTOINT:
+ case AMDGPU::G_CTLZ:
+ case AMDGPU::G_CTLZ_ZERO_UNDEF:
+ case AMDGPU::G_CTTZ:
+ case AMDGPU::G_CTTZ_ZERO_UNDEF:
+ case AMDGPU::G_CTPOP:
+ case AMDGPU::G_BSWAP:
+ case AMDGPU::G_FABS:
+ case AMDGPU::G_FNEG: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
@@ -368,7 +511,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
break;
}
- case AMDGPU::G_ZEXT: {
+ case AMDGPU::G_ZEXT:
+ case AMDGPU::G_SEXT:
+ case AMDGPU::G_ANYEXT: {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Src = MI.getOperand(1).getReg();
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
@@ -391,7 +536,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FCMP: {
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 1);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
@@ -431,7 +576,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID &&
Op3Bank == AMDGPU::SGPRRegBankID ?
- AMDGPU::SCCRegBankID : AMDGPU::VGPRRegBankID;
+ AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
@@ -479,6 +624,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
+ case AMDGPU::G_UNMERGE_VALUES: {
+ unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
+ AMDGPU::VGPRRegBankID;
+
+ // Op1 and Dst should use the same register bank.
+ // FIXME: Shouldn't this be the default? Why do we need to handle this?
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
+ OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
+ }
+ break;
+ }
case AMDGPU::G_INTRINSIC: {
switch (MI.getOperand(1).getIntrinsicID()) {
default:
@@ -492,6 +649,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
+ case Intrinsic::amdgcn_wqm_vote: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = OpdsMapping[2]
+ = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
}
break;
}
@@ -528,8 +691,50 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
+ case AMDGPU::G_SELECT: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+ bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID &&
+ Op2Bank == AMDGPU::SGPRRegBankID &&
+ Op3Bank == AMDGPU::SGPRRegBankID;
+ unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+ Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+ OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
+ break;
+ }
+
case AMDGPU::G_LOAD:
return getInstrMappingForLoad(MI);
+
+ case AMDGPU::G_ATOMICRMW_XCHG:
+ case AMDGPU::G_ATOMICRMW_ADD:
+ case AMDGPU::G_ATOMICRMW_SUB:
+ case AMDGPU::G_ATOMICRMW_AND:
+ case AMDGPU::G_ATOMICRMW_OR:
+ case AMDGPU::G_ATOMICRMW_XOR:
+ case AMDGPU::G_ATOMICRMW_MAX:
+ case AMDGPU::G_ATOMICRMW_MIN:
+ case AMDGPU::G_ATOMICRMW_UMAX:
+ case AMDGPU::G_ATOMICRMW_UMIN:
+ case AMDGPU::G_ATOMIC_CMPXCHG: {
+ return getDefaultMappingAllVGPR(MI);
+ }
+ case AMDGPU::G_BRCOND: {
+ unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
+ if (Bank != AMDGPU::SCCRegBankID)
+ Bank = AMDGPU::VCCRegBankID;
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
+ break;
+ }
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index d48a66589873..d29f4bc79a51 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -49,6 +49,8 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
bool isSALUMapping(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
+ const InstructionMapping &getDefaultMappingAllVGPR(
+ const MachineInstr &MI) const;
public:
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 7f7f75f65647..570379a820e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -15,4 +15,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
[VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
>;
-def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS ]>;
+def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS]>;
+
+// It is helpful to distinguish conditions from ordinary SGPRs.
+def VCCRegBank : RegisterBank <"VCC", [SReg_64]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 07de5fc549e2..922d974f2ebd 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -27,8 +27,6 @@ class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
AMDGPURegisterInfo();
- bool enableMultipleCopyHints() const override { return true; }
-
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
static unsigned getSubRegFromChannel(unsigned Channel);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index a861762a8c9e..efe501cb73c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -163,7 +163,7 @@ bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
// some casts between structs and non-structs, but we can't bitcast
// directly between them. directly bitcast between them. Blender uses
// some casts that look like { <3 x float> }* to <4 x float>*
- if ((SrcEltTy->isStructTy() && (SrcEltTy->getNumContainedTypes() != 1)))
+ if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
return false;
// Clang emits OpenCL 3-vector type accesses with a bitcast to the
@@ -401,8 +401,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (Val->getType() != EltTy) {
Type *EffectiveEltTy = EltTy;
if (StructType *CT = dyn_cast<StructType>(EltTy)) {
- assert(CT->getNumContainedTypes() == 1);
- EffectiveEltTy = CT->getContainedType(0);
+ assert(CT->getNumElements() == 1);
+ EffectiveEltTy = CT->getElementType(0);
}
if (DL->getTypeSizeInBits(EffectiveEltTy) !=
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 98b49070fa99..ed0cc70c3d9a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -74,6 +74,9 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
// We want to be able to turn these off, but making this a subtarget feature
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
+ //
+ // Similarly we want enable-prt-strict-null to be on by default and not to
+ // unset everything else if it is disabled
SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
@@ -89,6 +92,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += "-fp32-denormals,";
}
+ FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
+
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -124,10 +129,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
return *this;
}
-AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
- const FeatureBitset &FeatureBits) :
+AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
TargetTriple(TT),
- SubtargetFeatureBits(FeatureBits),
Has16BitInsts(false),
HasMadMixInsts(false),
FP32Denormals(false),
@@ -136,19 +139,22 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
HasVOP3PInsts(false),
HasMulI24(true),
HasMulU24(true),
+ HasInv2PiInlineImm(false),
HasFminFmaxLegacy(true),
EnablePromoteAlloca(false),
+ HasTrigReducedRange(false),
LocalMemorySize(0),
WavefrontSize(0)
{ }
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const GCNTargetMachine &TM) :
+ const GCNTargetMachine &TM) :
AMDGPUGenSubtargetInfo(TT, GPU, FS),
- AMDGPUSubtarget(TT, getFeatureBits()),
+ AMDGPUSubtarget(TT),
TargetTriple(TT),
Gen(SOUTHERN_ISLANDS),
IsaVersion(ISAVersion0_0_0),
+ InstrItins(getInstrItineraryForCPU(GPU)),
LDSBankCount(0),
MaxPrivateElementSize(0),
@@ -170,16 +176,17 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DebuggerEmitPrologue(false),
EnableHugePrivateBuffer(false),
- EnableVGPRSpilling(false),
EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
EnableSIScheduler(false),
EnableDS128(false),
+ EnablePRTStrictNull(false),
DumpCode(false),
FP64(false),
GCN3Encoding(false),
CIInsts(false),
+ VIInsts(false),
GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
@@ -189,15 +196,16 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasVGPRIndexMode(false),
HasScalarStores(false),
HasScalarAtomics(false),
- HasInv2PiInlineImm(false),
HasSDWAOmod(false),
HasSDWAScalar(false),
HasSDWASdst(false),
HasSDWAMac(false),
HasSDWAOutModsVOPC(false),
HasDPP(false),
+ HasR128A16(false),
HasDLInsts(false),
- D16PreservesUnusedBits(false),
+ HasDotInsts(false),
+ EnableSRAMECC(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),
@@ -211,7 +219,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
TLInfo(TM, *this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
- AS = AMDGPU::getAMDGPUAS(TT);
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
@@ -447,7 +454,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
R600GenSubtargetInfo(TT, GPU, FS),
- AMDGPUSubtarget(TT, getFeatureBits()),
+ AMDGPUSubtarget(TT),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
FMA(false),
@@ -460,8 +467,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
TexVTXClauseSize(0),
Gen(R600),
TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
- InstrItins(getInstrItineraryForCPU(GPU)),
- AS (AMDGPU::getAMDGPUAS(TT)) { }
+ InstrItins(getInstrItineraryForCPU(GPU)) { }
void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const {
@@ -480,10 +486,6 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.ShouldTrackLaneMasks = true;
}
-bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const {
- return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
-}
-
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 623109733651..5584759e5580 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -63,7 +63,6 @@ private:
Triple TargetTriple;
protected:
- const FeatureBitset &SubtargetFeatureBits;
bool Has16BitInsts;
bool HasMadMixInsts;
bool FP32Denormals;
@@ -72,13 +71,15 @@ protected:
bool HasVOP3PInsts;
bool HasMulI24;
bool HasMulU24;
+ bool HasInv2PiInlineImm;
bool HasFminFmaxLegacy;
bool EnablePromoteAlloca;
+ bool HasTrigReducedRange;
int LocalMemorySize;
unsigned WavefrontSize;
public:
- AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+ AMDGPUSubtarget(const Triple &TT);
static const AMDGPUSubtarget &get(const MachineFunction &MF);
static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -134,7 +135,7 @@ public:
return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
}
- bool isAmdCodeObjectV2(const Function &F) const {
+ bool isAmdHsaOrMesa(const Function &F) const {
return isAmdHsaOS() || isMesaKernel(F);
}
@@ -170,10 +171,18 @@ public:
return HasMulU24;
}
+ bool hasInv2PiInlineImm() const {
+ return HasInv2PiInlineImm;
+ }
+
bool hasFminFmaxLegacy() const {
return HasFminFmaxLegacy;
}
+ bool hasTrigReducedRange() const {
+ return HasTrigReducedRange;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -193,38 +202,26 @@ public:
/// Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const Function &F) const {
- return isAmdCodeObjectV2(F) ? 0 : 36;
+ return isAmdHsaOrMesa(F) ? 0 : 36;
}
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
- FlatWorkGroupSize);
- }
+ virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
- }
+ virtual unsigned getMinFlatWorkGroupSize() const = 0;
/// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
- }
+ virtual unsigned getMaxFlatWorkGroupSize() const = 0;
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
- FlatWorkGroupSize);
- }
+ virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
- unsigned getMinWavesPerEU() const {
- return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
- }
+ virtual unsigned getMinWavesPerEU() const = 0;
unsigned getMaxWavesPerEU() const { return 10; }
@@ -266,6 +263,7 @@ public:
ISAVersion9_0_2,
ISAVersion9_0_4,
ISAVersion9_0_6,
+ ISAVersion9_0_9,
};
enum TrapHandlerAbi {
@@ -300,6 +298,7 @@ protected:
Triple TargetTriple;
unsigned Gen;
unsigned IsaVersion;
+ InstrItineraryData InstrItins;
int LDSBankCount;
unsigned MaxPrivateElementSize;
@@ -323,11 +322,11 @@ protected:
// Used as options.
bool EnableHugePrivateBuffer;
- bool EnableVGPRSpilling;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler;
bool EnableDS128;
+ bool EnablePRTStrictNull;
bool DumpCode;
// Subtarget statically properties set by tablegen
@@ -337,6 +336,7 @@ protected:
bool IsGCN;
bool GCN3Encoding;
bool CIInsts;
+ bool VIInsts;
bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
@@ -346,15 +346,16 @@ protected:
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasScalarAtomics;
- bool HasInv2PiInlineImm;
bool HasSDWAOmod;
bool HasSDWAScalar;
bool HasSDWASdst;
bool HasSDWAMac;
bool HasSDWAOutModsVOPC;
bool HasDPP;
+ bool HasR128A16;
bool HasDLInsts;
- bool D16PreservesUnusedBits;
+ bool HasDotInsts;
+ bool EnableSRAMECC;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
@@ -372,7 +373,6 @@ protected:
bool FeatureDisable;
SelectionDAGTargetInfo TSInfo;
- AMDGPUAS AS;
private:
SIInstrInfo InstrInfo;
SITargetLowering TLInfo;
@@ -423,6 +423,10 @@ public:
return &TSInfo;
}
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
Generation getGeneration() const {
@@ -441,10 +445,6 @@ public:
return MaxPrivateElementSize;
}
- AMDGPUAS getAMDGPUAS() const {
- return AS;
- }
-
bool hasIntClamp() const {
return HasIntClamp;
}
@@ -517,6 +517,10 @@ public:
return FMA;
}
+ bool hasSwap() const {
+ return GFX9Insts;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
@@ -574,12 +578,19 @@ public:
return getGeneration() < AMDGPUSubtarget::GFX9;
}
+ /// \returns If target requires PRT Struct NULL support (zero result registers
+ /// for sparse texture support).
+ bool usePRTStrictNull() const {
+ return EnablePRTStrictNull;
+ }
+
bool hasAutoWaitcntBeforeBarrier() const {
return AutoWaitcntBeforeBarrier;
}
bool hasCodeObjectV3() const {
- return CodeObjectV3;
+ // FIXME: Need to add code object v3 support for mesa and pal.
+ return isAmdHsaOS() ? CodeObjectV3 : false;
}
bool hasUnalignedBufferAccess() const {
@@ -677,8 +688,12 @@ public:
return HasDLInsts;
}
- bool d16PreservesUnusedBits() const {
- return D16PreservesUnusedBits;
+ bool hasDotInsts() const {
+ return HasDotInsts;
+ }
+
+ bool isSRAMECCEnabled() const {
+ return EnableSRAMECC;
}
// Scratch is allocated in 256 dword per wave blocks for the entire
@@ -707,20 +722,19 @@ public:
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getEUsPerCU(this);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
- FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
}
/// \returns Maximum number of waves per execution unit supported by the
@@ -732,8 +746,7 @@ public:
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(
- MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
}
// static wrappers
@@ -747,8 +760,6 @@ public:
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
- bool isVGPRSpillingEnabled(const Function &F) const;
-
unsigned getMaxNumUserSGPRs() const {
return 16;
}
@@ -781,14 +792,15 @@ public:
return HasScalarAtomics;
}
- bool hasInv2PiInlineImm() const {
- return HasInv2PiInlineImm;
- }
bool hasDPP() const {
return HasDPP;
}
+ bool hasR128A16() const {
+ return HasR128A16;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@@ -817,6 +829,11 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ // \returns true if the subtarget supports DWORDX3 load/store instructions.
+ bool hasDwordx3LoadStores() const {
+ return CIInsts;
+ }
+
bool hasSMovFedHazard() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
@@ -851,39 +868,34 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getSGPRAllocGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getSGPREncodingGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumSGPRs(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
}
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
- return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU, Addressable);
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
}
/// \returns Reserved number of SGPRs for given function \p MF.
@@ -901,39 +913,34 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getVGPREncodingGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumVGPRs(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -949,6 +956,34 @@ public:
void getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
};
class R600Subtarget final : public R600GenSubtargetInfo,
@@ -968,7 +1003,6 @@ private:
R600TargetLowering TLInfo;
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
- AMDGPUAS AS;
public:
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -1053,8 +1087,6 @@ public:
short getTexVTXClauseSize() const { return TexVTXClauseSize; }
- AMDGPUAS getAMDGPUAS() const { return AS; }
-
bool enableMachineScheduler() const override {
return true;
}
@@ -1062,6 +1094,34 @@ public:
bool enableSubRegLiveness() const override {
return true;
}
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2205819c444f..e8cefdbf74b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -45,6 +45,7 @@
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
#include <memory>
@@ -105,6 +106,11 @@ static cl::opt<bool> EnableSDWAPeephole(
cl::desc("Enable SDWA peepholer"),
cl::init(true));
+static cl::opt<bool> EnableDPPCombine(
+ "amdgpu-dpp-combine",
+ cl::desc("Enable DPP combiner"),
+ cl::init(false));
+
// Enable address space based alias analysis
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
cl::desc("Enable AMDGPU Alias Analysis"),
@@ -137,6 +143,20 @@ static cl::opt<bool> EnableLowerKernelArguments(
cl::init(true),
cl::Hidden);
+// Enable atomic optimization
+static cl::opt<bool> EnableAtomicOptimizations(
+ "amdgpu-atomic-optimizations",
+ cl::desc("Enable atomic optimizations"),
+ cl::init(false),
+ cl::Hidden);
+
+// Enable Mode register optimization
+static cl::opt<bool> EnableSIModeRegisterPass(
+ "amdgpu-mode-register",
+ cl::desc("Enable mode register pass"),
+ cl::init(true),
+ cl::Hidden);
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -150,18 +170,22 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeR600VectorRegMergerPass(*PR);
initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelPass(*PR);
+ initializeGCNDPPCombinePass(*PR);
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
+ initializeSIFixupVectorISelPass(*PR);
initializeSIFoldOperandsPass(*PR);
initializeSIPeepholeSDWAPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
initializeSIOptimizeExecMaskingPreRAPass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
+ initializeAMDGPUFixFunctionBitcastsPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
initializeAMDGPUArgumentUsageInfoPass(*PR);
+ initializeAMDGPUAtomicOptimizerPass(*PR);
initializeAMDGPULowerKernelArgumentsPass(*PR);
initializeAMDGPULowerKernelAttributesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
@@ -172,6 +196,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
+ initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
initializeSIInsertSkipsPass(*PR);
@@ -182,6 +207,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSIFormMemoryClausesPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
+ initializeAMDGPUExternalAAWrapperPass(*PR);
initializeAMDGPUUseNativeCallsPass(*PR);
initializeAMDGPUSimplifyLibCallsPass(*PR);
initializeAMDGPUInlinerPass(*PR);
@@ -292,12 +318,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return Reloc::PIC_;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
@@ -306,9 +326,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OptLevel)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
FS, Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), OptLevel),
+ getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
TLOF(createTLOF(getTargetTriple())) {
- AS = AMDGPU::getAMDGPUAS(TT);
initAsmInfo();
}
@@ -331,13 +350,6 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
FSAttr.getValueAsString();
}
-static ImmutablePass *createAMDGPUExternalAAWrapperPass() {
- return createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
- if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
- AAR.addAAResult(WrapperPass->getResult());
- });
-}
-
/// Predicate for Internalize pass.
static bool mustPreserveGV(const GlobalValue &GV) {
if (const Function *F = dyn_cast<Function>(&GV))
@@ -360,17 +372,6 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.Inliner = createAMDGPUFunctionInliningPass();
}
- if (Internalize) {
- // If we're generating code, we always have the whole program available. The
- // relocations expected for externally visible functions aren't supported,
- // so make sure every non-entry function is hidden.
- Builder.addExtension(
- PassManagerBuilder::EP_EnabledOnOptLevel0,
- [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- PM.add(createInternalizePass(mustPreserveGV));
- });
- }
-
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
[Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
@@ -613,20 +614,23 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
+ addPass(createAtomicExpandPass());
+
+ // This must occur before inlining, as the inliner will not look through
+ // bitcast calls.
+ addPass(createAMDGPUFixFunctionBitcastsPass());
+
addPass(createAMDGPULowerIntrinsicsPass());
- if (TM.getTargetTriple().getArch() == Triple::r600 ||
- !EnableAMDGPUFunctionCalls) {
- // Function calls are not supported, so make sure we inline everything.
- addPass(createAMDGPUAlwaysInlinePass());
- addPass(createAlwaysInlinerLegacyPass());
- // We need to add the barrier noop pass, otherwise adding the function
- // inlining pass will cause all of the PassConfigs passes to be run
- // one function at a time, which means if we have a nodule with two
- // functions, then we will generate code for the first function
- // without ever running any passes on the second.
- addPass(createBarrierNoopPass());
- }
+ // Function calls are not supported, so make sure we inline everything.
+ addPass(createAMDGPUAlwaysInlinePass());
+ addPass(createAlwaysInlinerLegacyPass());
+ // We need to add the barrier noop pass, otherwise adding the function
+ // inlining pass will cause all of the PassConfigs passes to be run
+ // one function at a time, which means if we have a nodule with two
+ // functions, then we will generate code for the first function
+ // without ever running any passes on the second.
+ addPass(createBarrierNoopPass());
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
// TODO: May want to move later or split into an early and late one.
@@ -690,6 +694,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
}
bool AMDGPUPassConfig::addPreISel() {
+ addPass(createLowerSwitchPass());
addPass(createFlattenCFGPass());
return false;
}
@@ -759,6 +764,10 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
+ if (EnableAtomicOptimizations) {
+ addPass(createAMDGPUAtomicOptimizerPass());
+ }
+
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
addPass(createAMDGPUAnnotateKernelFeaturesPass());
@@ -789,6 +798,8 @@ void GCNPassConfig::addMachineSSAOptimization() {
//
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsID);
+ if (EnableDPPCombine)
+ addPass(&GCNDPPCombineID);
addPass(&DeadMachineInstructionElimID);
addPass(&SILoadStoreOptimizerID);
if (EnableSDWAPeephole) {
@@ -811,8 +822,10 @@ bool GCNPassConfig::addILPOpts() {
bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
- addPass(createSILowerI1CopiesPass());
addPass(&SIFixSGPRCopiesID);
+ addPass(createSILowerI1CopiesPass());
+ addPass(createSIFixupVectorISelPass());
+ addPass(createSIAddIMGInitPass());
return false;
}
@@ -878,7 +891,8 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
void GCNPassConfig::addPostRegAlloc() {
addPass(&SIFixVGPRCopiesID);
- addPass(&SIOptimizeExecMaskingID);
+ if (getOptLevel() > CodeGenOpt::None)
+ addPass(&SIOptimizeExecMaskingID);
TargetPassConfig::addPostRegAlloc();
}
@@ -889,6 +903,7 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createSIMemoryLegalizerPass());
addPass(createSIInsertWaitcntsPass());
addPass(createSIShrinkInstructionsPass());
+ addPass(createSIModeRegisterPass());
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 0fe14493fabd..62fbe71d1902 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -34,7 +34,6 @@ namespace llvm {
class AMDGPUTargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- AMDGPUAS AS;
StringRef getGPUName(const Function &F) const;
StringRef getFeatureString(const Function &F) const;
@@ -55,16 +54,13 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
- AMDGPUAS getAMDGPUAS() const {
- return AS;
- }
void adjustPassManager(PassManagerBuilder &) override;
+
/// Get the integer value of a null pointer in the given address space.
uint64_t getNullPointerValue(unsigned AddrSpace) const {
- if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
- return -1;
- return 0;
+ return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0;
}
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index e2f718bd3c34..c4e1efde130b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -29,3 +29,13 @@ MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
}
+
+MCSection *AMDGPUTargetObjectFile::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
+ // Set metadata access for the explicit section
+ StringRef SectionName = GO->getSection();
+ if (SectionName.startswith(".AMDGPU.comment."))
+ SK = SectionKind::getMetadata();
+
+ return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index dd9dc1a88fc2..a4ae1a2c18c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -26,6 +26,8 @@ class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF {
public:
MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
const TargetMachine &TM) const override;
+ MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index a68b8d03f06e..11e4ba4b5010 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -102,7 +102,6 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned ThresholdPrivate = UnrollThresholdPrivate;
unsigned ThresholdLocal = UnrollThresholdLocal;
unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
- const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
for (const BasicBlock *BB : L->getBlocks()) {
const DataLayout &DL = BB->getModule()->getDataLayout();
unsigned LocalGEPsSeen = 0;
@@ -140,9 +139,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned AS = GEP->getAddressSpace();
unsigned Threshold = 0;
- if (AS == ASST.PRIVATE_ADDRESS)
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS)
Threshold = ThresholdPrivate;
- else if (AS == ASST.LOCAL_ADDRESS)
+ else if (AS == AMDGPUAS::LOCAL_ADDRESS)
Threshold = ThresholdLocal;
else
continue;
@@ -150,7 +149,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (UP.Threshold >= Threshold)
continue;
- if (AS == ASST.PRIVATE_ADDRESS) {
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
const Value *Ptr = GEP->getPointerOperand();
const AllocaInst *Alloca =
dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
@@ -160,7 +159,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
if (AllocaSize > MaxAlloca)
continue;
- } else if (AS == ASST.LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
LocalGEPsSeen++;
// Inhibit unroll for local memory if we have seen addressing not to
// a variable, most likely we will be unable to combine it.
@@ -253,19 +252,18 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
}
unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
- AMDGPUAS AS = ST->getAMDGPUAS();
- if (AddrSpace == AS.GLOBAL_ADDRESS ||
- AddrSpace == AS.CONSTANT_ADDRESS ||
- AddrSpace == AS.CONSTANT_ADDRESS_32BIT) {
+ if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
return 512;
}
- if (AddrSpace == AS.FLAT_ADDRESS ||
- AddrSpace == AS.LOCAL_ADDRESS ||
- AddrSpace == AS.REGION_ADDRESS)
+ if (AddrSpace == AMDGPUAS::FLAT_ADDRESS ||
+ AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)
return 128;
- if (AddrSpace == AS.PRIVATE_ADDRESS)
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
return 8 * ST->getMaxPrivateElementSize();
llvm_unreachable("unhandled address space");
@@ -277,7 +275,7 @@ bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
// We allow vectorization of flat stores, even though we may need to decompose
// them later if they may access private memory. We don't have enough context
// here, and legalization can handle it.
- if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
ChainSizeInBytes <= ST->getMaxPrivateElementSize();
}
@@ -545,14 +543,15 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
if (const Argument *A = dyn_cast<Argument>(V))
return !isArgPassedInSGPR(A);
- // Loads from the private address space are divergent, because threads
- // can execute the load instruction with the same inputs and get different
- // results.
+ // Loads from the private and flat address spaces are divergent, because
+ // threads can execute the load instruction with the same inputs and get
+ // different results.
//
// All other loads are not divergent, because if threads issue loads with the
// same arguments, they will always get the same result.
if (const LoadInst *Load = dyn_cast<LoadInst>(V))
- return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS;
+ return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
+ Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
// Atomics are divergent because they are executed sequentially: when an
// atomic operation refers to the same address in each thread, then each
@@ -642,20 +641,19 @@ unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
}
unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
- AMDGPUAS AS = ST->getAMDGPUAS();
- if (AddrSpace == AS.GLOBAL_ADDRESS ||
- AddrSpace == AS.CONSTANT_ADDRESS)
+ if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
return 128;
- if (AddrSpace == AS.LOCAL_ADDRESS ||
- AddrSpace == AS.REGION_ADDRESS)
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)
return 64;
- if (AddrSpace == AS.PRIVATE_ADDRESS)
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
return 32;
- if ((AddrSpace == AS.PARAM_D_ADDRESS ||
- AddrSpace == AS.PARAM_I_ADDRESS ||
- (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
- AddrSpace <= AS.CONSTANT_BUFFER_15)))
+ if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
+ AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
+ (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
+ AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
return 128;
llvm_unreachable("unhandled address space");
}
@@ -666,9 +664,7 @@ bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
// We allow vectorization of flat stores, even though we may need to decompose
// them later if they may access private memory. We don't have enough context
// here, and legalization can handle it.
- if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS)
- return false;
- return true;
+ return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
}
bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 8e63d789e17d..397c5c6fa6fb 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -179,7 +179,7 @@ public:
if (IsGraphicsShader)
return -1;
return ST->hasFlatAddressSpace() ?
- ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
+ AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
}
unsigned getVectorSplitCost() { return 0; }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 0d3a1673696a..ced3f6f567e2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -25,7 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -70,7 +70,7 @@ char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
@@ -78,10 +78,10 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
// TODO: Preserve dominator tree.
AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
// No divergent values are changed, only blocks and branch edges.
- AU.addPreserved<DivergenceAnalysis>();
+ AU.addPreserved<LegacyDivergenceAnalysis>();
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
@@ -95,7 +95,7 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
/// \returns true if \p BB is reachable through only uniform branches.
/// XXX - Is there a more efficient way to find this?
-static bool isUniformlyReached(const DivergenceAnalysis &DA,
+static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
BasicBlock &BB) {
SmallVector<BasicBlock *, 8> Stack;
SmallPtrSet<BasicBlock *, 8> Visited;
@@ -163,7 +163,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
if (PDT.getRoots().size() <= 1)
return false;
- DivergenceAnalysis &DA = getAnalysis<DivergenceAnalysis>();
+ LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
// Loop over all of the blocks in a function, tracking all of the blocks that
// return.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 31e2885c833d..3f9af27a2e5e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -49,6 +49,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -156,13 +157,12 @@ public:
ImmTyDMask,
ImmTyUNorm,
ImmTyDA,
- ImmTyR128,
+ ImmTyR128A16,
ImmTyLWE,
ImmTyExpTgt,
ImmTyExpCompr,
ImmTyExpVM,
- ImmTyDFMT,
- ImmTyNFMT,
+ ImmTyFORMAT,
ImmTyHwreg,
ImmTyOff,
ImmTySendMsg,
@@ -291,7 +291,7 @@ public:
bool isDMask() const { return isImmTy(ImmTyDMask); }
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
- bool isR128() const { return isImmTy(ImmTyR128); }
+ bool isR128A16() const { return isImmTy(ImmTyR128A16); }
bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -312,8 +312,7 @@ public:
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
- bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
- bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
+ bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
@@ -666,8 +665,7 @@ public:
case ImmTySLC: OS << "SLC"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
- case ImmTyDFMT: OS << "DFMT"; break;
- case ImmTyNFMT: OS << "NFMT"; break;
+ case ImmTyFORMAT: OS << "FORMAT"; break;
case ImmTyClampSI: OS << "ClampSI"; break;
case ImmTyOModSI: OS << "OModSI"; break;
case ImmTyDppCtrl: OS << "DppCtrl"; break;
@@ -681,7 +679,7 @@ public:
case ImmTyDMask: OS << "DMask"; break;
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
- case ImmTyR128: OS << "R128"; break;
+ case ImmTyR128A16: OS << "R128A16"; break;
case ImmTyLWE: OS << "LWE"; break;
case ImmTyOff: OS << "Off"; break;
case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -920,8 +918,7 @@ public:
// Currently there is none suitable machinery in the core llvm-mc for this.
// MCSymbol::isRedefinable is intended for another purpose, and
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
MCSymbol *Sym =
@@ -1061,6 +1058,7 @@ public:
OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
+ OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
@@ -1092,7 +1090,6 @@ private:
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
- bool validateMIMGR128(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -1829,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
unsigned RegWidth) {
// Symbols are only defined for GCN targets
- if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
+ if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
return true;
auto SymbolName = getGprCountSymbolName(RegKind);
@@ -2447,22 +2444,6 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
}
-bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
-
- const unsigned Opc = Inst.getOpcode();
- const MCInstrDesc &Desc = MII.get(Opc);
-
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
- return true;
-
- int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
- assert(Idx != -1);
-
- bool R128 = (Inst.getOperand(Idx).getImm() != 0);
-
- return !R128 || hasMIMG_R128();
-}
-
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -2497,11 +2478,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"integer clamping is not supported on this GPU");
return false;
}
- if (!validateMIMGR128(Inst)) {
- Error(IDLoc,
- "r128 modifier is not supported on this GPU");
- return false;
- }
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {
Error(IDLoc,
@@ -2661,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
unsigned &SGPRBlocks) {
// TODO(scott.linder): These calculations are duplicated from
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
- IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(getSTI().getCPU());
unsigned NumVGPRs = NextFreeVGPR;
unsigned NumSGPRs = NextFreeSGPR;
- unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
+ unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
NumSGPRs > MaxAddressableNumSGPRs)
return OutOfRangeError(SGPRRange);
NumSGPRs +=
- IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
+ IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
NumSGPRs > MaxAddressableNumSGPRs)
@@ -2681,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
if (Features.test(FeatureSGPRInitBug))
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
- VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
- SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
+ VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
+ SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
return false;
}
@@ -2702,8 +2678,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
StringSet<> Seen;
- IsaInfo::IsaVersion IVersion =
- IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+ IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
@@ -2962,8 +2937,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
ISA.Stepping,
"AMD", "AMDGPU");
@@ -3025,7 +2999,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -3091,9 +3065,18 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
}
bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
+ const char *AssemblerDirectiveBegin;
+ const char *AssemblerDirectiveEnd;
+ std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
+ AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
+ ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
+ HSAMD::V3::AssemblerDirectiveEnd)
+ : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
+ HSAMD::AssemblerDirectiveEnd);
+
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
return Error(getParser().getTok().getLoc(),
- (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
+ (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
"not available on non-amdhsa OSes")).str());
}
@@ -3111,7 +3094,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
if (getLexer().is(AsmToken::Identifier)) {
StringRef ID = getLexer().getTok().getIdentifier();
- if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
+ if (ID == AssemblerDirectiveEnd) {
Lex();
FoundEnd = true;
break;
@@ -3133,8 +3116,13 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
YamlStream.flush();
- if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
- return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ if (IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
+ return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ } else {
+ if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
+ return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ }
return false;
}
@@ -3171,6 +3159,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
+
+ // TODO: Restructure/combine with PAL metadata directive.
+ if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
+ return ParseDirectiveHSAMetadata();
} else {
if (IDVal == ".hsa_code_object_version")
return ParseDirectiveHSACodeObjectVersion();
@@ -3186,10 +3178,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_amdgpu_isa")
return ParseDirectiveISAVersion();
- }
- if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
- return ParseDirectiveHSAMetadata();
+ if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
+ return ParseDirectiveHSAMetadata();
+ }
if (IDVal == PALMD::AssemblerDirective)
return ParseDirectivePALMetadata();
@@ -3465,6 +3457,10 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
case AsmToken::Identifier: {
StringRef Tok = Parser.getTok().getString();
if (Tok == Name) {
+ if (Tok == "r128" && isGFX9())
+ Error(S, "r128 modifier is not supported on this GPU");
+ if (Tok == "a16" && !isGFX9())
+ Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;
Parser.Lex();
} else if (Tok.startswith("no") && Tok.endswith(Name)) {
@@ -3522,6 +3518,53 @@ AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
return MatchOperand_Success;
}
+// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
+// values to live in a joint format operand in the MCInst encoding.
+OperandMatchResultTy
+AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t Dfmt = 0, Nfmt = 0;
+ // dfmt and nfmt can appear in either order, and each is optional.
+ bool GotDfmt = false, GotNfmt = false;
+ while (!GotDfmt || !GotNfmt) {
+ if (!GotDfmt) {
+ auto Res = parseIntWithPrefix("dfmt", Dfmt);
+ if (Res != MatchOperand_NoMatch) {
+ if (Res != MatchOperand_Success)
+ return Res;
+ if (Dfmt >= 16) {
+ Error(Parser.getTok().getLoc(), "out of range dfmt");
+ return MatchOperand_ParseFail;
+ }
+ GotDfmt = true;
+ Parser.Lex();
+ continue;
+ }
+ }
+ if (!GotNfmt) {
+ auto Res = parseIntWithPrefix("nfmt", Nfmt);
+ if (Res != MatchOperand_NoMatch) {
+ if (Res != MatchOperand_Success)
+ return Res;
+ if (Nfmt >= 8) {
+ Error(Parser.getTok().getLoc(), "out of range nfmt");
+ return MatchOperand_ParseFail;
+ }
+ GotNfmt = true;
+ Parser.Lex();
+ continue;
+ }
+ }
+ break;
+ }
+ if (!GotDfmt && !GotNfmt)
+ return MatchOperand_NoMatch;
+ auto Format = Dfmt | Nfmt << 4;
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
+ return MatchOperand_Success;
+}
+
//===----------------------------------------------------------------------===//
// ds
//===----------------------------------------------------------------------===//
@@ -3652,12 +3695,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
static bool
encodeCnt(
- const AMDGPU::IsaInfo::IsaVersion ISA,
+ const AMDGPU::IsaVersion ISA,
int64_t &IntVal,
int64_t CntVal,
bool Saturate,
- unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
- unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
+ unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
+ unsigned (*decode)(const IsaVersion &Version, unsigned))
{
bool Failed = false;
@@ -3688,8 +3731,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getParser().parseAbsoluteExpression(CntVal))
return true;
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
bool Failed = true;
bool Sat = CntName.endswith("_sat");
@@ -3724,8 +3766,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = Parser.getTok().getLoc();
@@ -4617,8 +4658,7 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOffset);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
@@ -4661,7 +4701,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
@@ -4761,8 +4801,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
- {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr},
- {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr},
+ {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
@@ -4772,7 +4811,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
- {"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
+ {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
+ {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
@@ -4844,6 +4884,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
Op.Type == AMDGPUOperand::ImmTyNegHi) {
res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
Op.ConvertResult);
+ } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
+ res = parseDfmtNfmt(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@@ -5251,12 +5293,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
- // All DPP instructions with at least one source operand have a fake "old"
- // source at the beginning that's tied to the dst operand. Handle it here.
- if (Desc.getNumOperands() >= 2)
- Inst.addOperand(Inst.getOperand(0));
-
for (unsigned E = Operands.size(); I != E; ++I) {
+ auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
+ MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ assert((unsigned)TiedTo < Inst.getNumOperands());
+ // handle tied old or src2 for MAC instructions
+ Inst.addOperand(Inst.getOperand(TiedTo));
+ }
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
index b87c47a6b9ee..51c2abeac2ff 100644
--- a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -17,14 +17,12 @@ def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [],
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
-def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
-def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
class MubufLoad <SDPatternOperator op> : PatFrag <
(ops node:$ptr), (op node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS;
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
def mubuf_load : MubufLoad <load>;
@@ -100,15 +98,11 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> has_vaddr = 1;
bits<1> has_glc = 1;
bits<1> glc_value = 0; // the value for glc if no such operand
- bits<4> dfmt_value = 1; // the value for dfmt if no such operand
- bits<3> nfmt_value = 0; // the value for nfmt if no such operand
bits<1> has_srsrc = 1;
bits<1> has_soffset = 1;
bits<1> has_offset = 1;
bits<1> has_slc = 1;
bits<1> has_tfe = 1;
- bits<1> has_dfmt = 1;
- bits<1> has_nfmt = 1;
}
class MTBUF_Real <MTBUF_Pseudo ps> :
@@ -126,14 +120,16 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
bits<12> offset;
bits<1> glc;
- bits<4> dfmt;
- bits<3> nfmt;
+ bits<7> format;
bits<8> vaddr;
bits<8> vdata;
bits<7> srsrc;
bits<1> slc;
bits<1> tfe;
bits<8> soffset;
+
+ bits<4> dfmt = format{3-0};
+ bits<3> nfmt = format{6-4};
}
class getMTBUFInsDA<list<RegisterClass> vdataList,
@@ -142,16 +138,16 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, SLC:$slc, TFE:$tfe),
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, SLC:$slc, TFE:$tfe)
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
);
dag InsData = !if(!empty(vaddrList),
(ins vdataClass:$vdata, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
SLC:$slc, TFE:$tfe),
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
SLC:$slc, TFE:$tfe)
);
dag ret = !if(!empty(vdataList), InsNoData, InsData);
@@ -169,15 +165,15 @@ class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
class getMTBUFAsmOps<int addrKind> {
string Pfx =
- !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset",
+ !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $format, $soffset",
!if(!eq(addrKind, BUFAddrKind.OffEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen",
+ "$vaddr, $srsrc, $format, $soffset offen",
!if(!eq(addrKind, BUFAddrKind.IdxEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen",
+ "$vaddr, $srsrc, $format, $soffset idxen",
!if(!eq(addrKind, BUFAddrKind.BothEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen",
+ "$vaddr, $srsrc, $format, $soffset idxen offen",
!if(!eq(addrKind, BUFAddrKind.Addr64),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64",
+ "$vaddr, $srsrc, $format, $soffset addr64",
"")))));
string ret = Pfx # "$offset";
}
@@ -217,14 +213,14 @@ multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set load_vt:$vdata,
- (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt,
- i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
+ i1:$glc, i1:$slc, i1:$tfe)))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set load_vt:$vdata,
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
- i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
MTBUFAddr64Table<1, NAME>;
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -263,13 +259,13 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i16:$offset, i8:$format, i1:$glc,
i1:$slc, i1:$tfe))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i16:$offset, i8:$format, i1:$glc,
i1:$slc, i1:$tfe))]>,
MTBUFAddr64Table<1, NAME>;
@@ -290,6 +286,12 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
// MUBUF classes
//===----------------------------------------------------------------------===//
+class MUBUFGetBaseOpcode<string Op> {
+ string ret = !subst("DWORDX2", "DWORD",
+ !subst("DWORDX3", "DWORD",
+ !subst("DWORDX4", "DWORD", Op)));
+}
+
class MUBUF_Pseudo <string opName, dag outs, dag ins,
string asmOps, list<dag> pattern=[]> :
InstSI<outs, ins, "", pattern>,
@@ -303,6 +305,9 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
string Mnemonic = opName;
string AsmOperands = asmOps;
+ Instruction Opcode = !cast<Instruction>(NAME);
+ Instruction BaseOpcode = !cast<Instruction>(MUBUFGetBaseOpcode<NAME>.ret);
+
let VM_CNT = 1;
let EXP_CNT = 1;
let MUBUF = 1;
@@ -325,6 +330,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> has_offset = 1;
bits<1> has_slc = 1;
bits<1> has_tfe = 1;
+ bits<4> dwords = 0;
}
class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
@@ -398,6 +404,16 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
);
}
+class getMUBUFDwords<RegisterClass regClass> {
+ string regClassAsInt = !cast<string>(regClass);
+ int ret =
+ !if(!eq(regClassAsInt, !cast<string>(VGPR_32)), 1,
+ !if(!eq(regClassAsInt, !cast<string>(VReg_64)), 2,
+ !if(!eq(regClassAsInt, !cast<string>(VReg_96)), 3,
+ !if(!eq(regClassAsInt, !cast<string>(VReg_128)), 4,
+ 0))));
+}
+
class getMUBUFIns<int addrKind, list<RegisterClass> vdataList=[], bit isLds = 0> {
dag ret =
!if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA<vdataList, [], isLds>.ret,
@@ -458,6 +474,7 @@ class MUBUF_Load_Pseudo <string opName,
let Uses = !if(isLds, [EXEC, M0], [EXEC]);
let has_tfe = !if(isLds, 0, 1);
let lds = isLds;
+ let dwords = getMUBUFDwords<vdataClass>.ret;
}
// FIXME: tfe can't be an operand because it requires a separate
@@ -521,6 +538,7 @@ class MUBUF_Store_Pseudo <string opName,
let mayLoad = 0;
let mayStore = 1;
let maybeAtomic = 1;
+ let dwords = getMUBUFDwords<vdataClass>.ret;
}
multiclass MUBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
@@ -660,11 +678,10 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
let AsmMatchConverter = "cvtMubufAtomicReturn";
}
-multiclass MUBUF_Pseudo_Atomics <string opName,
- RegisterClass vdataClass,
- ValueType vdataType,
- SDPatternOperator atomic> {
-
+multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
+ RegisterClass vdataClass,
+ ValueType vdataType,
+ SDPatternOperator atomic> {
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
MUBUFAddr64Table <0, NAME>;
def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
@@ -672,7 +689,12 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+}
+multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
+ RegisterClass vdataClass,
+ ValueType vdataType,
+ SDPatternOperator atomic> {
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
@@ -690,6 +712,13 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
}
+multiclass MUBUF_Pseudo_Atomics <string opName,
+ RegisterClass vdataClass,
+ ValueType vdataType,
+ SDPatternOperator atomic> :
+ MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType, atomic>,
+ MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>;
+
//===----------------------------------------------------------------------===//
// MUBUF Instructions
@@ -1030,6 +1059,14 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
// MUBUF Patterns
//===----------------------------------------------------------------------===//
+def extract_glc : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
+}]>;
+
+def extract_slc : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
+}]>;
+
//===----------------------------------------------------------------------===//
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
@@ -1037,119 +1074,129 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (vt (name v4i32:$rsrc, 0,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$glc, imm:$slc)),
+ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$glc, imm:$slc)),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0)),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, 0,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$glc, imm:$slc)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm)),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$glc, imm:$slc)),
+ (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2i32, "BUFFER_LOAD_FORMAT_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_XYZW">;
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i32, "BUFFER_LOAD_DWORDX4">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$glc, imm:$slc),
+ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$glc, imm:$slc),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
+ (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$glc, imm:$slc),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
+ (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$glc, imm:$slc),
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2i32, "BUFFER_STORE_FORMAT_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">;
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i16, "BUFFER_STORE_FORMAT_D16_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">;
//===----------------------------------------------------------------------===//
// buffer_atomic patterns
@@ -1158,36 +1205,36 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
def : GCNPat<
(name i32:$vdata_in, v4i32:$rsrc, 0,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$slc),
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $slc))
+ (as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$slc),
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $slc))
+ (as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name i32:$vdata_in, v4i32:$rsrc, 0,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$slc),
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $slc))
+ (as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$slc),
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc))
+ $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
>;
}
@@ -1205,49 +1252,49 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$slc),
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
sub0)
>;
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$slc),
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
sub0)
>;
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$slc),
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
sub0)
>;
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$slc),
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
sub0)
>;
@@ -1397,54 +1444,6 @@ defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D
defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, i16, az_extloadi8_private>;
defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, i16, sextloadi8_private>;
}
-
-// BUFFER_LOAD_DWORD*, addr64=0
-multiclass MUBUF_Load_Dword <ValueType vt,
- MUBUF_Pseudo offset,
- MUBUF_Pseudo offen,
- MUBUF_Pseudo idxen,
- MUBUF_Pseudo bothen> {
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
- imm:$offset, 0, 0, imm:$glc, imm:$slc,
- imm:$tfe)),
- (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm:$offset, 1, 0, imm:$glc, imm:$slc,
- imm:$tfe)),
- (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
- (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm:$offset, 0, 1, imm:$glc, imm:$slc,
- imm:$tfe)),
- (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
- imm:$offset, 1, 1, imm:$glc, imm:$slc,
- imm:$tfe)),
- (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
- (as_i1imm $tfe))
- >;
-}
-
-defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
- BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
-defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
- BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
-defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
- BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
-
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is forst
@@ -1524,32 +1523,36 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
@@ -1576,39 +1579,36 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
- imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$offset, imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
@@ -1781,8 +1781,8 @@ class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{15} = ps.addr64;
let Inst{18-16} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -1811,6 +1811,7 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
//===----------------------------------------------------------------------===//
// CI
+// MTBUF - GFX6, GFX7.
//===----------------------------------------------------------------------===//
class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
@@ -2013,8 +2014,8 @@ class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{18-15} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -2043,8 +2044,8 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{18-15} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -2089,3 +2090,22 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0e>;
defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0f>;
} // End HasUnpackedD16VMem.
+
+def MUBUFInfoTable : GenericTable {
+ let FilterClass = "MUBUF_Pseudo";
+ let CppTypeName = "MUBUFInfo";
+ let Fields = ["Opcode", "BaseOpcode", "dwords", "has_vaddr", "has_srsrc", "has_soffset"];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getMUBUFOpcodeHelper";
+}
+
+def getMUBUFInfoFromOpcode : SearchIndex {
+ let Table = MUBUFInfoTable;
+ let Key = ["Opcode"];
+}
+
+def getMUBUFInfoFromBaseOpcodeAndDwords : SearchIndex {
+ let Table = MUBUFInfoTable;
+ let Key = ["BaseOpcode", "dwords"];
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
index cdc6ab9412e6..31d2ebef481d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -728,7 +728,9 @@ class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, PatFrag frag> : GCNPat<
(i1 0))
>;
-let OtherPredicates = [LDSRequiresM0Init] in {
+// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
+// related to bounds checking.
+let OtherPredicates = [LDSRequiresM0Init, isCIVI] in {
def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, load_local_m0>;
def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, store_local_m0>;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 3ef473b7fd96..44040d352e6a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -121,6 +121,11 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
}
+class GlobalSaddrTable <bit is_saddr, string Name = ""> {
+ bit IsSaddr = is_saddr;
+ string SaddrOp = Name;
+}
+
// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
// same encoding value as exec_hi, so it isn't possible to use that if
// saddr is 32-bit (which isn't handled here yet).
@@ -171,15 +176,19 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
let is_flat_global = 1 in {
- def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>;
- def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>;
+ def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>,
+ GlobalSaddrTable<1, opName>;
}
}
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
let is_flat_global = 1 in {
- def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>;
- def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>;
+ def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>,
+ GlobalSaddrTable<1, opName>;
}
}
@@ -262,6 +271,7 @@ multiclass FLAT_Atomic_Pseudo<
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
" $vaddr, $vdata$offset$slc">,
+ GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
}
@@ -272,10 +282,11 @@ multiclass FLAT_Atomic_Pseudo<
" $vdst, $vaddr, $vdata$offset glc$slc",
[(set vt:$vdst,
(atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+ GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1>;
}
-multiclass FLAT_Global_Atomic_Pseudo<
+multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
string opName,
RegisterClass vdst_rc,
ValueType vt,
@@ -287,35 +298,48 @@ multiclass FLAT_Global_Atomic_Pseudo<
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
" $vaddr, $vdata, off$offset$slc">,
+ GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let has_saddr = 1;
let PseudoInstr = NAME;
}
- def _RTN : FLAT_AtomicRet_Pseudo <opName,
- (outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
- " $vdst, $vaddr, $vdata, off$offset glc$slc",
- [(set vt:$vdst,
- (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
- AtomicNoRet <opName, 1> {
- let has_saddr = 1;
- }
-
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
" $vaddr, $vdata, $saddr$offset$slc">,
+ GlobalSaddrTable<1, opName>,
AtomicNoRet <opName#"_saddr", 0> {
let has_saddr = 1;
let enabled_saddr = 1;
let PseudoInstr = NAME#"_SADDR";
}
+}
+
+multiclass FLAT_Global_Atomic_Pseudo_RTN<
+ string opName,
+ RegisterClass vdst_rc,
+ ValueType vt,
+ SDPatternOperator atomic = null_frag,
+ ValueType data_vt = vt,
+ RegisterClass data_rc = vdst_rc> {
+
+ def _RTN : FLAT_AtomicRet_Pseudo <opName,
+ (outs vdst_rc:$vdst),
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
+ " $vdst, $vaddr, $vdata, off$offset glc$slc",
+ [(set vt:$vdst,
+ (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+ GlobalSaddrTable<0, opName#"_rtn">,
+ AtomicNoRet <opName, 1> {
+ let has_saddr = 1;
+ }
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
" $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
+ GlobalSaddrTable<1, opName#"_rtn">,
AtomicNoRet <opName#"_saddr", 1> {
let has_saddr = 1;
let enabled_saddr = 1;
@@ -323,10 +347,20 @@ multiclass FLAT_Global_Atomic_Pseudo<
}
}
+multiclass FLAT_Global_Atomic_Pseudo<
+ string opName,
+ RegisterClass vdst_rc,
+ ValueType vt,
+ SDPatternOperator atomic = null_frag,
+ ValueType data_vt = vt,
+ RegisterClass data_rc = vdst_rc> :
+ FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
+ FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
+
class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
>;
def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
new file mode 100644
index 000000000000..56071d0d2374
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -0,0 +1,446 @@
+//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
+// operand.If any of the use instruction cannot be combined with the mov the
+// whole sequence is reverted.
+//
+// $old = ...
+// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
+// dpp_controls..., $bound_ctrl
+// $res = VALU $dpp_value, ...
+//
+// to
+//
+// $res = VALU_DPP $folded_old, $vgpr_to_be_read_from_other_lane, ...,
+// dpp_controls..., $folded_bound_ctrl
+//
+// Combining rules :
+//
+// $bound_ctrl is DPP_BOUND_ZERO, $old is any
+// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+//
+// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_ZERO
+// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+//
+// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_OFF
+// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+//
+// ->$folded_old = folded value, $folded_bound_ctrl = DPP_BOUND_OFF
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gcn-dpp-combine"
+
+STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
+
+namespace {
+
+class GCNDPPCombine : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const SIInstrInfo *TII;
+
+ using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
+
+ MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
+
+ RegSubRegPair foldOldOpnd(MachineInstr &OrigMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand &OldOpndValue) const;
+
+ MachineInstr *createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand *OldOpnd,
+ bool BoundCtrlZero) const;
+
+ MachineInstr *createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ bool BoundCtrlZero) const;
+
+ bool hasNoImmOrEqual(MachineInstr &MI,
+ unsigned OpndName,
+ int64_t Value,
+ int64_t Mask = -1) const;
+
+ bool combineDPPMov(MachineInstr &MI) const;
+
+public:
+ static char ID;
+
+ GCNDPPCombine() : MachineFunctionPass(ID) {
+ initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "GCN DPP Combine"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+
+char GCNDPPCombine::ID = 0;
+
+char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+
+FunctionPass *llvm::createGCNDPPCombinePass() {
+ return new GCNDPPCombine();
+}
+
+static int getDPPOp(unsigned Op) {
+ auto DPP32 = AMDGPU::getDPPOp32(Op);
+ if (DPP32 != -1)
+ return DPP32;
+
+ auto E32 = AMDGPU::getVOPe32(Op);
+ return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
+}
+
+// tracks the register operand definition and returns:
+// 1. immediate operand used to initialize the register if found
+// 2. nullptr if the register operand is undef
+// 3. the operand itself otherwise
+MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
+ auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
+ if (!Def)
+ return nullptr;
+
+ switch(Def->getOpcode()) {
+ default: break;
+ case AMDGPU::IMPLICIT_DEF:
+ return nullptr;
+ case AMDGPU::COPY:
+ case AMDGPU::V_MOV_B32_e32: {
+ auto &Op1 = Def->getOperand(1);
+ if (Op1.isImm())
+ return &Op1;
+ break;
+ }
+ }
+ return &OldOpnd;
+}
+
+MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ bool BoundCtrlZero) const {
+ assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+ assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
+ TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
+
+ auto OrigOp = OrigMI.getOpcode();
+ auto DPPOp = getDPPOp(OrigOp);
+ if (DPPOp == -1) {
+ LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
+ return nullptr;
+ }
+
+ auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
+ OrigMI.getDebugLoc(), TII->get(DPPOp));
+ bool Fail = false;
+ do {
+ auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
+ assert(Dst);
+ DPPInst.add(*Dst);
+ int NumOperands = 1;
+
+ const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
+ if (OldIdx != -1) {
+ assert(OldIdx == NumOperands);
+ assert(isOfRegClass(OldOpndVGPR, AMDGPU::VGPR_32RegClass, *MRI));
+ DPPInst.addReg(OldOpndVGPR.Reg, 0, OldOpndVGPR.SubReg);
+ ++NumOperands;
+ }
+
+ if (auto *Mod0 = TII->getNamedOperand(OrigMI,
+ AMDGPU::OpName::src0_modifiers)) {
+ assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src0_modifiers));
+ assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ DPPInst.addImm(Mod0->getImm());
+ ++NumOperands;
+ }
+ auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
+ assert(Src0);
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
+ LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src0);
+ ++NumOperands;
+
+ if (auto *Mod1 = TII->getNamedOperand(OrigMI,
+ AMDGPU::OpName::src1_modifiers)) {
+ assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src1_modifiers));
+ assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ DPPInst.addImm(Mod1->getImm());
+ ++NumOperands;
+ }
+ if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
+ LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src1);
+ ++NumOperands;
+ }
+
+ if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
+ LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src2);
+ }
+
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
+ DPPInst.addImm(BoundCtrlZero ? 1 : 0);
+ } while (false);
+
+ if (Fail) {
+ DPPInst.getInstr()->eraseFromParent();
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
+ return DPPInst.getInstr();
+}
+
+GCNDPPCombine::RegSubRegPair
+GCNDPPCombine::foldOldOpnd(MachineInstr &OrigMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand &OldOpndValue) const {
+ assert(OldOpndValue.isImm());
+ switch (OrigMI.getOpcode()) {
+ default: break;
+ case AMDGPU::V_MAX_U32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<uint32_t>::max())
+ return OldOpndVGPR;
+ break;
+ case AMDGPU::V_MAX_I32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::max())
+ return OldOpndVGPR;
+ break;
+ case AMDGPU::V_MIN_I32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::min())
+ return OldOpndVGPR;
+ break;
+
+ case AMDGPU::V_MUL_I32_I24_e32:
+ case AMDGPU::V_MUL_U32_U24_e32:
+ if (OldOpndValue.getImm() == 1) {
+ auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+ assert(Src1 && Src1->isReg());
+ return getRegSubRegPair(*Src1);
+ }
+ break;
+ }
+ return RegSubRegPair();
+}
+
+// Cases to combine:
+// $bound_ctrl is DPP_BOUND_ZERO, $old is any
+// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+// -> $old = undef, $bound_ctrl = DPP_BOUND_ZERO
+
+// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+// -> $old = undef, $bound_ctrl = DPP_BOUND_OFF
+
+// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+// -> $old = folded value, $bound_ctrl = DPP_BOUND_OFF
+
+MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand *OldOpndValue,
+ bool BoundCtrlZero) const {
+ assert(OldOpndVGPR.Reg);
+ if (!BoundCtrlZero && OldOpndValue) {
+ assert(OldOpndValue->isImm());
+ OldOpndVGPR = foldOldOpnd(OrigMI, OldOpndVGPR, *OldOpndValue);
+ if (!OldOpndVGPR.Reg) {
+ LLVM_DEBUG(dbgs() << " failed: old immediate cannot be folded\n");
+ return nullptr;
+ }
+ }
+ return createDPPInst(OrigMI, MovMI, OldOpndVGPR, BoundCtrlZero);
+}
+
+// returns true if MI doesn't have OpndName immediate operand or the
+// operand has Value
+bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
+ int64_t Value, int64_t Mask) const {
+ auto *Imm = TII->getNamedOperand(MI, OpndName);
+ if (!Imm)
+ return true;
+
+ assert(Imm->isImm());
+ return (Imm->getImm() & Mask) == Value;
+}
+
+bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
+ assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+ auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
+ assert(BCZOpnd && BCZOpnd->isImm());
+ bool BoundCtrlZero = 0 != BCZOpnd->getImm();
+
+ LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
+
+ auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
+ assert(OldOpnd && OldOpnd->isReg());
+ auto OldOpndVGPR = getRegSubRegPair(*OldOpnd);
+ auto *OldOpndValue = getOldOpndValue(*OldOpnd);
+ assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
+ if (OldOpndValue) {
+ if (BoundCtrlZero) {
+ OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef, ignore old opnd
+ OldOpndValue = nullptr;
+ } else {
+ if (!OldOpndValue->isImm()) {
+ LLVM_DEBUG(dbgs() << " failed: old operand isn't an imm or undef\n");
+ return false;
+ }
+ if (OldOpndValue->getImm() == 0) {
+ OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef
+ OldOpndValue = nullptr;
+ BoundCtrlZero = true;
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " old=";
+ if (!OldOpndValue)
+ dbgs() << "undef";
+ else
+ dbgs() << OldOpndValue->getImm();
+ dbgs() << ", bound_ctrl=" << BoundCtrlZero << '\n');
+
+ std::vector<MachineInstr*> OrigMIs, DPPMIs;
+ if (!OldOpndVGPR.Reg) { // OldOpndVGPR = undef
+ OldOpndVGPR = RegSubRegPair(
+ MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
+ auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), OldOpndVGPR.Reg);
+ DPPMIs.push_back(UndefInst.getInstr());
+ }
+
+ OrigMIs.push_back(&MovMI);
+ bool Rollback = true;
+ for (auto &Use : MRI->use_nodbg_operands(
+ TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg())) {
+ Rollback = true;
+
+ auto &OrigMI = *Use.getParent();
+ auto OrigOp = OrigMI.getOpcode();
+ if (TII->isVOP3(OrigOp)) {
+ if (!TII->hasVALU32BitEncoding(OrigOp)) {
+ LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
+ break;
+ }
+ // check if other than abs|neg modifiers are set (opsel for example)
+ const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
+ if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
+ LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n");
+ break;
+ }
+ } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
+ LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
+ if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
+ if (auto *DPPInst = createDPPInst(OrigMI, MovMI, OldOpndVGPR,
+ OldOpndValue, BoundCtrlZero)) {
+ DPPMIs.push_back(DPPInst);
+ Rollback = false;
+ }
+ } else if (OrigMI.isCommutable() &&
+ &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ auto *BB = OrigMI.getParent();
+ auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
+ BB->insert(OrigMI, NewMI);
+ if (TII->commuteInstruction(*NewMI)) {
+ LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
+ if (auto *DPPInst = createDPPInst(*NewMI, MovMI, OldOpndVGPR,
+ OldOpndValue, BoundCtrlZero)) {
+ DPPMIs.push_back(DPPInst);
+ Rollback = false;
+ }
+ } else
+ LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
+ NewMI->eraseFromParent();
+ } else
+ LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
+ if (Rollback)
+ break;
+ OrigMIs.push_back(&OrigMI);
+ }
+
+ for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
+ MI->eraseFromParent();
+
+ return !Rollback;
+}
+
+bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+ auto &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasDPP() || skipFunction(MF.getFunction()))
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TII = ST.getInstrInfo();
+
+ assert(MRI->isSSA() && "Must be run on SSA");
+
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
+ auto &MI = *I++;
+ if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
+ Changed = true;
+ ++NumDPPMovsCombined;
+ }
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index f236f10ba75a..c6396de89c4f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -215,6 +215,14 @@ void GCNHazardRecognizer::AdvanceCycle() {
if (!CurrCycleInstr)
return;
+ // Do not track non-instructions which do not affect the wait states.
+ // If included, these instructions can lead to buffer overflow such that
+ // detectable hazards are missed.
+ if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF)
+ return;
+ else if (CurrCycleInstr->isDebugInstr())
+ return;
+
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
// Keep track of emitted instructions
@@ -253,8 +261,7 @@ int GCNHazardRecognizer::getWaitStatesSince(
return WaitStates;
unsigned Opcode = MI->getOpcode();
- if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF ||
- Opcode == AMDGPU::INLINEASM)
+ if (Opcode == AMDGPU::INLINEASM)
continue;
}
++WaitStates;
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
index 651091d44136..d62dc8d86781 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -335,7 +335,7 @@ GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
assert(C);
AvailQueue.remove(*C);
auto SU = C->SU;
- LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
advanceToCycle(SU->getHeight());
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 15366d66bd85..8e4cc391dc21 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -434,8 +434,7 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
// Sort recorded regions by pressure - highest at the front
void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
- llvm::sort(Regions.begin(), Regions.end(),
- [&ST, TargetOcc](const Region *R1, const Region *R2) {
+ llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
});
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index 192d534bb9cf..ec6bcae33555 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -258,7 +258,7 @@ GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
assert(C);
RQ.remove(*C);
auto SU = C->SU;
- LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
releaseSuccessors(SU, StepNo);
Schedule.push_back(SU);
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td b/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
index d76acfa24f90..b8142a4e4ff8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -156,3 +156,8 @@ def : ProcessorModel<"gfx904", SIQuarterSpeedModel,
def : ProcessorModel<"gfx906", SIQuarterSpeedModel,
[FeatureISAVersion9_0_6]
>;
+
+def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_9]
+>;
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index db908368a179..fab0f87dfcbe 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -207,9 +207,12 @@ void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "da");
}
-void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "r128");
+ if (STI.hasFeature(AMDGPU::FeatureR128A16))
+ printNamedBit(MI, OpNo, O, "a16");
+ else
+ printNamedBit(MI, OpNo, O, "r128");
}
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
@@ -236,21 +239,12 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
O << " vm";
}
-void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm()) {
- O << " dfmt:";
- printU8ImmDecOperand(MI, OpNo, O);
- }
-}
-
-void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm()) {
- O << " nfmt:";
- printU8ImmDecOperand(MI, OpNo, O);
+void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (unsigned Val = MI->getOperand(OpNo).getImm()) {
+ O << " dfmt:" << (Val & 15);
+ O << ", nfmt:" << (Val >> 4);
}
}
@@ -1161,8 +1155,7 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt, Expcnt, Lgkmcnt;
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 11a496a38b2c..0ba74ca0f3e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -80,7 +80,7 @@ private:
raw_ostream &O);
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -90,10 +90,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printExpVM(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printDFMT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
- void printNFMT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFORMAT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 07bef9103c0d..c85a1ea5b054 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -46,11 +46,9 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
if (const auto *SymA = Target.getSymA()) {
// SCRATCH_RSRC_DWORD[01] is a special global variable that represents
// the scratch buffer.
- if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD0")
+ if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD0" ||
+ SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
return ELF::R_AMDGPU_ABS32_LO;
-
- if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
- return ELF::R_AMDGPU_ABS32_HI;
}
switch (Target.getAccessVariant()) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 6a41e3f650bc..c17fe126546c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -17,7 +17,9 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
@@ -27,6 +29,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetParser.h"
namespace llvm {
#include "AMDGPUPTNote.h"
@@ -34,90 +37,13 @@ namespace llvm {
using namespace llvm;
using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::HSAMD;
//===----------------------------------------------------------------------===//
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
-static const struct {
- const char *Name;
- unsigned Mach;
-} MachTable[] = {
- // Radeon HD 2000/3000 Series (R600).
- { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
- { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
- { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
- { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
- // Radeon HD 4000 Series (R700).
- { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
- { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
- { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
- // Radeon HD 5000 Series (Evergreen).
- { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
- { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
- { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
- { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
- { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
- // Radeon HD 6000 Series (Northern Islands).
- { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
- { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
- { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
- { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
- // AMDGCN GFX6.
- { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
- { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
- { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- // AMDGCN GFX7.
- { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
- { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
- { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
- { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
- { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
- { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
- { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
- // AMDGCN GFX8.
- { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
- { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
- { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
- { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
- // AMDGCN GFX9.
- { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
- { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
- { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
- { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
- // Not specified processor.
- { nullptr, ELF::EF_AMDGPU_MACH_NONE }
-};
-
-unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
- auto Entry = MachTable;
- for (; Entry->Name && GPU != Entry->Name; ++Entry)
- ;
- return Entry->Mach;
-}
-
-const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
- auto Entry = MachTable;
- for (; Entry->Name && Mach != Entry->Mach; ++Entry)
- ;
- return Entry->Name;
-}
-
-bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
+bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
return false;
@@ -125,6 +51,104 @@ bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
return EmitHSAMetadata(HSAMetadata);
}
+bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
+ std::shared_ptr<msgpack::Node> HSAMetadataRoot;
+ yaml::Input YIn(HSAMetadataString);
+ YIn >> HSAMetadataRoot;
+ if (YIn.error())
+ return false;
+ return EmitHSAMetadata(HSAMetadataRoot, false);
+}
+
+StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
+ AMDGPU::GPUKind AK;
+
+ switch (ElfMach) {
+ case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
+ case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
+ case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
+ case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
+ case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
+ case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
+ case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
+ case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
+ case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
+ case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
+ case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
+ }
+
+ StringRef GPUName = getArchNameAMDGCN(AK);
+ if (GPUName != "")
+ return GPUName;
+ return getArchNameR600(AK);
+}
+
+unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
+ AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
+ if (AK == AMDGPU::GPUKind::GK_NONE)
+ AK = parseArchR600(GPU);
+
+ switch (AK) {
+ case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
+ case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
+ case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
+ case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
+ case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
+ case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
+ case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
+ case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
+ case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
+ case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
+ case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
+ case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
+ case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
+ case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
+ case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
+ case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
+ case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
+ case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
+ case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
+ case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
+ case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
+ case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
+ case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
+ case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
+ case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
+ case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
+ case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
+ case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
+ case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
+ case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
+ case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
+ case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
+ case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
+ }
+
+ llvm_unreachable("unknown GPU");
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetAsmStreamer
//===----------------------------------------------------------------------===//
@@ -183,9 +207,26 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
if (HSAMD::toString(HSAMetadata, HSAMetadataString))
return false;
- OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
+ OS << '\t' << AssemblerDirectiveBegin << '\n';
OS << HSAMetadataString << '\n';
- OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
+ OS << '\t' << AssemblerDirectiveEnd << '\n';
+ return true;
+}
+
+bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
+ std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+ V3::MetadataVerifier Verifier(Strict);
+ if (!Verifier.verify(*HSAMetadataRoot))
+ return false;
+
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << HSAMetadataRoot;
+
+ OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
+ OS << StrOS.str() << '\n';
+ OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
return true;
}
@@ -203,70 +244,59 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
- amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
-
- IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
+ IsaVersion IVersion = getIsaVersion(STI.getCPU());
OS << "\t.amdhsa_kernel " << KernelName << '\n';
-#define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC, \
- DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
- if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) != \
- AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME)) \
- STREAM << "\t\t" << DIRECTIVE << " " \
- << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
-
- if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size)
- OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
- << '\n';
- if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size)
- OS << "\t\t.amdhsa_private_segment_fixed_size "
- << KD.private_segment_fixed_size << '\n';
-
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD,
+#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
+ STREAM << "\t\t" << DIRECTIVE << " " \
+ << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
+
+ OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
+ << '\n';
+ OS << "\t\t.amdhsa_private_segment_fixed_size "
+ << KD.private_segment_fixed_size << '\n';
+
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ PRINT_FIELD(
+ OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+ PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
+ PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
+ PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
+ PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
+ PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
// These directives are required.
OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
@@ -279,54 +309,52 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+ PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
+ PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
+ PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
+ PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+ PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
+ PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
if (IVersion.Major >= 9)
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD,
+ PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+ PRINT_FIELD(
+ OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD,
+ PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
+ PRINT_FIELD(
+ OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
-#undef PRINT_IF_NOT_DEFAULT
+ PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
+ PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
+ PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
+ PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
+#undef PRINT_FIELD
OS << "\t.end_amdhsa_kernel\n";
}
@@ -342,12 +370,16 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
unsigned EFlags = MCA.getELFHeaderEFlags();
EFlags &= ~ELF::EF_AMDGPU_MACH;
- EFlags |= getMACH(STI.getCPU());
+ EFlags |= getElfMach(STI.getCPU());
EFlags &= ~ELF::EF_AMDGPU_XNACK;
if (AMDGPU::hasXNACK(STI))
EFlags |= ELF::EF_AMDGPU_XNACK;
+ EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
+ if (AMDGPU::hasSRAMECC(STI))
+ EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
+
MCA.setELFHeaderEFlags(EFlags);
}
@@ -355,13 +387,13 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
-void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
- const MCExpr *DescSZ, unsigned NoteType,
+void AMDGPUTargetELFStreamer::EmitNote(
+ StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
function_ref<void(MCELFStreamer &)> EmitDesc) {
auto &S = getStreamer();
auto &Context = S.getContext();
- auto NameSZ = sizeof(ElfNote::NoteName);
+ auto NameSZ = Name.size() + 1;
S.PushSection();
S.SwitchSection(Context.getELFSection(
@@ -369,7 +401,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
S.EmitIntValue(NameSZ, 4); // namesz
S.EmitValue(DescSZ, 4); // descz
S.EmitIntValue(NoteType, 4); // type
- S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
+ S.EmitBytes(Name); // name
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
@@ -381,14 +413,11 @@ void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
uint32_t Major, uint32_t Minor) {
- EmitAMDGPUNote(
- MCConstantExpr::create(8, getContext()),
- ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
- [&](MCELFStreamer &OS){
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
+ ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
+ OS.EmitIntValue(Major, 4);
+ OS.EmitIntValue(Minor, 4);
+ });
}
void
@@ -404,21 +433,18 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
- EmitAMDGPUNote(
- MCConstantExpr::create(DescSZ, getContext()),
- ElfNote::NT_AMDGPU_HSA_ISA,
- [&](MCELFStreamer &OS) {
- OS.EmitIntValue(VendorNameSize, 2);
- OS.EmitIntValue(ArchNameSize, 2);
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
- OS.EmitIntValue(Stepping, 4);
- OS.EmitBytes(VendorName);
- OS.EmitIntValue(0, 1); // NULL terminate VendorName
- OS.EmitBytes(ArchName);
- OS.EmitIntValue(0, 1); // NULL terminte ArchName
- }
- );
+ EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
+ ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
+ OS.EmitIntValue(VendorNameSize, 2);
+ OS.EmitIntValue(ArchNameSize, 2);
+ OS.EmitIntValue(Major, 4);
+ OS.EmitIntValue(Minor, 4);
+ OS.EmitIntValue(Stepping, 4);
+ OS.EmitBytes(VendorName);
+ OS.EmitIntValue(0, 1); // NULL terminate VendorName
+ OS.EmitBytes(ArchName);
+ OS.EmitIntValue(0, 1); // NULL terminte ArchName
+ });
}
void
@@ -447,15 +473,41 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
- EmitAMDGPUNote(
- DescSZ,
- ELF::NT_AMD_AMDGPU_ISA,
- [&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(IsaVersionString);
- OS.EmitLabel(DescEnd);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(IsaVersionString);
+ OS.EmitLabel(DescEnd);
+ });
+ return true;
+}
+
+bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
+ std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+ V3::MetadataVerifier Verifier(Strict);
+ if (!Verifier.verify(*HSAMetadataRoot))
+ return false;
+
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ msgpack::Writer MPWriter(StrOS);
+ HSAMetadataRoot->write(MPWriter);
+
+ // Create two labels to mark the beginning and end of the desc field
+ // and a MCExpr to calculate the size of the desc field.
+ auto &Context = getContext();
+ auto *DescBegin = Context.createTempSymbol();
+ auto *DescEnd = Context.createTempSymbol();
+ auto *DescSZ = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(DescEnd, Context),
+ MCSymbolRefExpr::create(DescBegin, Context), Context);
+
+ EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(StrOS.str());
+ OS.EmitLabel(DescEnd);
+ });
return true;
}
@@ -474,28 +526,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
- EmitAMDGPUNote(
- DescSZ,
- ELF::NT_AMD_AMDGPU_HSA_METADATA,
- [&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(HSAMetadataString);
- OS.EmitLabel(DescEnd);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(HSAMetadataString);
+ OS.EmitLabel(DescEnd);
+ });
return true;
}
bool AMDGPUTargetELFStreamer::EmitPALMetadata(
const PALMD::Metadata &PALMetadata) {
- EmitAMDGPUNote(
- MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
- ELF::NT_AMD_AMDGPU_PAL_METADATA,
- [&](MCELFStreamer &OS){
- for (auto I : PALMetadata)
- OS.EmitIntValue(I, sizeof(uint32_t));
- }
- );
+ EmitNote(ElfNote::NoteNameV2,
+ MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
+ getContext()),
+ ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
+ for (auto I : PALMetadata)
+ OS.EmitIntValue(I, sizeof(uint32_t));
+ });
return true;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 472da1b73593..9a807c804f9f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -11,6 +11,7 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#include "AMDKernelCodeT.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/AMDGPUMetadata.h"
@@ -31,13 +32,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
protected:
MCContext &getContext() const { return Streamer.getContext(); }
- /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
- unsigned getMACH(StringRef GPU) const;
-
public:
- /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
- static const char *getMachName(unsigned Mach);
-
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
@@ -58,7 +53,20 @@ public:
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
/// \returns True on success, false on failure.
- virtual bool EmitHSAMetadata(StringRef HSAMetadataString);
+ virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
+
+ /// \returns True on success, false on failure.
+ virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString);
+
+ /// Emit HSA Metadata
+ ///
+ /// When \p Strict is true, known metadata elements must already be
+ /// well-typed. When \p Strict is false, known types are inferred and
+ /// the \p HSAMetadata structure is updated with the correct types.
+ ///
+ /// \returns True on success, false on failure.
+ virtual bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) = 0;
/// \returns True on success, false on failure.
virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
@@ -71,6 +79,9 @@ public:
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
bool ReserveXNACK) = 0;
+
+ static StringRef getArchNameFromElfMach(unsigned ElfMach);
+ static unsigned getElfMach(StringRef GPU);
};
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
@@ -95,6 +106,10 @@ public:
bool EmitISAVersion(StringRef IsaVersionString) override;
/// \returns True on success, false on failure.
+ bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) override;
+
+ /// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
/// \returns True on success, false on failure.
@@ -110,8 +125,8 @@ public:
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
MCStreamer &Streamer;
- void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType,
- function_ref<void(MCELFStreamer &)> EmitDesc);
+ void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
+ function_ref<void(MCELFStreamer &)> EmitDesc);
public:
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
@@ -135,6 +150,10 @@ public:
bool EmitISAVersion(StringRef IsaVersionString) override;
/// \returns True on success, false on failure.
+ bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) override;
+
+ /// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
/// \returns True on success, false on failure.
diff --git a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 44c2d366e461..1c68dbd78e75 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -29,6 +29,7 @@ class MIMGBaseOpcode {
bit Atomic = 0;
bit AtomicX2 = 0; // (f)cmpswap
bit Sampler = 0;
+ bit Gather4 = 0;
bits<8> NumExtraArgs = 0;
bit Gradients = 0;
bit Coordinates = 1;
@@ -43,7 +44,7 @@ def MIMGBaseOpcode : GenericEnum {
def MIMGBaseOpcodesTable : GenericTable {
let FilterClass = "MIMGBaseOpcode";
let CppTypeName = "MIMGBaseOpcodeInfo";
- let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
+ let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4",
"NumExtraArgs", "Gradients", "Coordinates", "LodOrClampOrMip",
"HasD16"];
GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
@@ -141,7 +142,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -179,6 +180,8 @@ multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16, bit mip = 0,
defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 0>;
let VDataDwords = 4 in
defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 0>;
+ let VDataDwords = 8 in
+ defm _V8 : MIMG_NoSampler_Src_Helper <op, asm, VReg_256, 0>;
}
}
@@ -199,7 +202,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -252,7 +255,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
}
@@ -316,7 +319,7 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -411,6 +414,8 @@ multiclass MIMG_Sampler <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
defm _V3 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_96>;
let VDataDwords = 4 in
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128>;
+ let VDataDwords = 8 in
+ defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
}
}
@@ -421,6 +426,7 @@ multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
string asm = "image_gather4"#sample.LowerCaseMod> {
def "" : MIMG_Sampler_BaseOpcode<sample> {
let HasD16 = 1;
+ let Gather4 = 1;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
@@ -429,6 +435,8 @@ multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
let VDataDwords = 4 in
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;
+ let VDataDwords = 8 in
+ defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 1683fe6c9a57..679cf18d2c20 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -226,11 +226,11 @@ private:
// occur in the same basic block as its definition, because
// it is illegal for the scheduler to schedule them in
// different blocks.
- if (UseI->readsRegister(MOI->getReg()))
+ if (UseI->readsRegister(MOI->getReg(), &TRI))
LastUseCount = AluInstCount;
// Exit early if the current use kills the register
- if (UseI != Def && UseI->killsRegister(MOI->getReg()))
+ if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI))
break;
}
if (LastUseCount)
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index e00dffc4be99..e2a0f05d2b34 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -589,7 +589,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
case Intrinsic::r600_implicitarg_ptr: {
- MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
+ MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
return DAG.getConstant(ByteOffset, DL, PtrVT);
}
@@ -741,12 +741,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
- if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
+ if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
const DataLayout &DL = DAG.getDataLayout();
const GlobalValue *GV = GSD->getGlobal();
- MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
+ MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
@@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUASI.PARAM_I_ADDRESS);
+ AMDGPUAS::PARAM_I_ADDRESS);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
@@ -1141,7 +1141,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
//TODO: Who creates the i8 stores?
assert(Store->isTruncatingStore()
|| Store->getValue().getValueType() == MVT::i8);
- assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
+ assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
SDValue Mask;
if (Store->getMemoryVT() == MVT::i8) {
@@ -1175,7 +1175,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
// Load dword
// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo(UndefValue::get(
- Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
+ Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
Chain = Dst.getValue(1);
@@ -1241,9 +1241,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
// Neither LOCAL nor PRIVATE can do vectors at the moment
- if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
- if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+ if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
StoreNode->isTruncatingStore()) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
@@ -1267,7 +1267,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
DAG.getConstant(2, DL, PtrVT));
- if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
// It is beneficial to create MSKOR here instead of combiner to avoid
// artificial dependencies introduced by RMW
if (StoreNode->isTruncatingStore()) {
@@ -1320,7 +1320,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
- if (AS != AMDGPUASI.PRIVATE_ADDRESS)
+ if (AS != AMDGPUAS::PRIVATE_ADDRESS)
return SDValue();
if (MemVT.bitsLT(MVT::i32))
@@ -1403,7 +1403,7 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
// Load dword
// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo(UndefValue::get(
- Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
+ Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
// Get offset within the register.
@@ -1441,7 +1441,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = LoadNode->getMemoryVT();
ISD::LoadExtType ExtType = LoadNode->getExtensionType();
- if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
return lowerPrivateExtLoad(Op, DAG);
}
@@ -1451,8 +1451,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = LoadNode->getChain();
SDValue Ptr = LoadNode->getBasePtr();
- if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
- LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
+ if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
return scalarizeVectorLoad(LoadNode, DAG);
}
@@ -1473,7 +1473,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(4, DL, MVT::i32)),
DAG.getConstant(LoadNode->getAddressSpace() -
- AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
+ AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
);
}
@@ -1509,7 +1509,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(MergedValues, DL);
}
- if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
+ if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
return SDValue();
}
@@ -1606,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUASI.PARAM_I_ADDRESS);
+ AMDGPUAS::PARAM_I_ADDRESS);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
@@ -1656,7 +1656,7 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const {
// Local and Private addresses do not handle vectors. Limit to i32
- if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
return (MemVT.getSizeInBits() <= 32);
}
return true;
@@ -1685,14 +1685,15 @@ bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
static SDValue CompactSwizzlableVector(
SelectionDAG &DAG, SDValue VectorEntry,
DenseMap<unsigned, unsigned> &RemapSwizzle) {
- assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
assert(RemapSwizzle.empty());
- SDValue NewBldVec[4] = {
- VectorEntry.getOperand(0),
- VectorEntry.getOperand(1),
- VectorEntry.getOperand(2),
- VectorEntry.getOperand(3)
- };
+
+ SDLoc DL(VectorEntry);
+ EVT EltTy = VectorEntry.getValueType().getVectorElementType();
+
+ SDValue NewBldVec[4];
+ for (unsigned i = 0; i < 4; i++)
+ NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
+ DAG.getIntPtrConstant(i, DL));
for (unsigned i = 0; i < 4; i++) {
if (NewBldVec[i].isUndef())
@@ -1727,15 +1728,17 @@ static SDValue CompactSwizzlableVector(
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
DenseMap<unsigned, unsigned> &RemapSwizzle) {
- assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
assert(RemapSwizzle.empty());
- SDValue NewBldVec[4] = {
- VectorEntry.getOperand(0),
- VectorEntry.getOperand(1),
- VectorEntry.getOperand(2),
- VectorEntry.getOperand(3)
- };
- bool isUnmovable[4] = { false, false, false, false };
+
+ SDLoc DL(VectorEntry);
+ EVT EltTy = VectorEntry.getValueType().getVectorElementType();
+
+ SDValue NewBldVec[4];
+ bool isUnmovable[4] = {false, false, false, false};
+ for (unsigned i = 0; i < 4; i++)
+ NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
+ DAG.getIntPtrConstant(i, DL));
+
for (unsigned i = 0; i < 4; i++) {
RemapSwizzle[i] = i;
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
@@ -1766,7 +1769,6 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
SelectionDAG &DAG,
const SDLoc &DL) const {
- assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
// Old -> New swizzle values
DenseMap<unsigned, unsigned> SwizzleRemap;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 5397e779474c..9cc3e5f3c314 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -229,11 +229,11 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
}
bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
- return MI.findRegisterUseOperandIdx(R600::AR_X) != -1;
+ return MI.findRegisterUseOperandIdx(R600::AR_X, false, &RI) != -1;
}
bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
- return MI.findRegisterDefOperandIdx(R600::AR_X) != -1;
+ return MI.findRegisterDefOperandIdx(R600::AR_X, false, false, &RI) != -1;
}
bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
@@ -1500,19 +1500,19 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
}
unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
- PseudoSourceValue::PSVKind Kind) const {
+ unsigned Kind) const {
switch (Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
- return ST.getAMDGPUAS().PRIVATE_ADDRESS;
+ return AMDGPUAS::PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
- return ST.getAMDGPUAS().CONSTANT_ADDRESS;
+ return AMDGPUAS::CONSTANT_ADDRESS;
}
+
llvm_unreachable("Invalid pseudo source kind");
- return ST.getAMDGPUAS().PRIVATE_ADDRESS;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index 7a3dece31665..e6e34dc125f4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -324,7 +324,7 @@ public:
}
unsigned getAddressSpaceForPseudoSourceKind(
- PseudoSourceValue::PSVKind Kind) const override;
+ unsigned Kind) const override;
};
namespace R600 {
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
index 7bf174f4cd86..10e873755222 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -299,7 +299,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
- (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
+ (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
>;
def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
@@ -309,8 +309,8 @@ def vtx_id3_load : LoadParamFrag<load>;
class LoadVtxId1 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
- return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
- (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
+ return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
!isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
}]>;
@@ -322,7 +322,7 @@ def vtx_id1_load : LoadVtxId1 <load>;
class LoadVtxId2 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
- return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
+ return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
}]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index a1429a2ac50f..7769a35aadce 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -127,13 +127,13 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
LLVM_DEBUG(if (SU) {
dbgs() << " ** Pick node **\n";
- SU->dump(DAG);
+ DAG->dumpNode(*SU);
} else {
dbgs() << "NO NODE \n";
for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
const SUnit &S = DAG->SUnits[i];
if (!S.isScheduled)
- S.dump(DAG);
+ DAG->dumpNode(S);
}
});
@@ -188,11 +188,11 @@ isPhysicalRegCopy(MachineInstr *MI) {
}
void R600SchedStrategy::releaseTopNode(SUnit *SU) {
- LLVM_DEBUG(dbgs() << "Top Releasing "; SU->dump(DAG););
+ LLVM_DEBUG(dbgs() << "Top Releasing "; DAG->dumpNode(*SU));
}
void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
- LLVM_DEBUG(dbgs() << "Bottom Releasing "; SU->dump(DAG););
+ LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU));
if (isPhysicalRegCopy(SU->getInstr())) {
PhysicalRegCopy.push_back(SU);
return;
@@ -236,6 +236,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
// MI will become a KILL, don't considers it in scheduling
return AluDiscarded;
}
+ break;
default:
break;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
new file mode 100644
index 000000000000..69cafef4a351
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -0,0 +1,181 @@
+//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Any MIMG instructions that use tfe or lwe require an initialization of the
+/// result register that will be written in the case of a memory access failure
+/// The required code is also added to tie this init code to the result of the
+/// img instruction
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-img-init"
+
+using namespace llvm;
+
+namespace {
+
+class SIAddIMGInit : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIAddIMGInit() : MachineFunctionPass(ID) {
+ initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
+
+char SIAddIMGInit::ID = 0;
+
+char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
+
+FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
+
+bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *RI = ST.getRegisterInfo();
+ bool Changed = false;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ auto Opcode = MI.getOpcode();
+ if (TII->isMIMG(Opcode) && !MI.mayStore()) {
+ MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
+ MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
+ MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
+
+ // Check for instructions that don't have tfe or lwe fields
+ // There shouldn't be any at this point.
+ assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
+
+ unsigned TFEVal = TFE->getImm();
+ unsigned LWEVal = LWE->getImm();
+ unsigned D16Val = D16 ? D16->getImm() : 0;
+
+ if (TFEVal || LWEVal) {
+ // At least one of TFE or LWE are non-zero
+ // We have to insert a suitable initialization of the result value and
+ // tie this to the dest of the image instruction.
+
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ int DstIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+
+ // Calculate which dword we have to initialize to 0.
+ MachineOperand *MO_Dmask =
+ TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
+
+ // check that dmask operand is found.
+ assert(MO_Dmask && "Expected dmask operand in instruction");
+
+ unsigned dmask = MO_Dmask->getImm();
+ // Determine the number of active lanes taking into account the
+ // Gather4 special case
+ unsigned ActiveLanes =
+ TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
+
+ // Subreg indices are counted from 1
+ // When D16 then we want next whole VGPR after write data.
+ static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
+
+ bool Packed = !ST.hasUnpackedD16VMem();
+
+ unsigned InitIdx =
+ D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
+
+ // Abandon attempt if the dst size isn't large enough
+ // - this is in fact an error but this is picked up elsewhere and
+ // reported correctly.
+ uint32_t DstSize =
+ RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
+ if (DstSize < InitIdx)
+ continue;
+
+ // Create a register for the intialization value.
+ unsigned PrevDst =
+ MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
+ unsigned NewDst = 0; // Final initialized value will be in here
+
+ // If PRTStrictNull feature is enabled (the default) then initialize
+ // all the result registers to 0, otherwise just the error indication
+ // register (VGPRn+1)
+ unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
+ unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
+
+ if (DstSize == 1) {
+ // In this case we can just initialize the result directly
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
+ .addImm(0);
+ NewDst = PrevDst;
+ } else {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
+ for (; SizeLeft; SizeLeft--, CurrIdx++) {
+ NewDst =
+ MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
+ // Initialize dword
+ unsigned SubReg =
+ MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
+ .addImm(0);
+ // Insert into the super-reg
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
+ .addReg(PrevDst)
+ .addReg(SubReg)
+ .addImm(CurrIdx);
+
+ PrevDst = NewDst;
+ }
+ }
+
+ // Add as an implicit operand
+ MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
+
+ // Tie the just added implicit operand to the dst
+ MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
+
+ Changed = true;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 74f1bd8fb986..98e9ea662324 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
@@ -52,7 +52,7 @@ using StackEntry = std::pair<BasicBlock *, Value *>;
using StackVector = SmallVector<StackEntry, 16>;
class SIAnnotateControlFlow : public FunctionPass {
- DivergenceAnalysis *DA;
+ LegacyDivergenceAnalysis *DA;
Type *Boolean;
Type *Void;
@@ -66,9 +66,7 @@ class SIAnnotateControlFlow : public FunctionPass {
Function *If;
Function *Else;
- Function *Break;
Function *IfBreak;
- Function *ElseBreak;
Function *Loop;
Function *EndCf;
@@ -95,8 +93,7 @@ class SIAnnotateControlFlow : public FunctionPass {
Value *
handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
- BranchInst *Term,
- SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions);
+ BranchInst *Term);
void handleLoop(BranchInst *Term);
@@ -116,7 +113,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
@@ -127,7 +124,7 @@ public:
INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
@@ -149,9 +146,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
- Break = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_break);
IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
- ElseBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else_break);
Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
return false;
@@ -160,7 +155,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
/// Is the branch condition uniform or did the StructurizeCFG pass
/// consider it as such?
bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
- return DA->isUniform(T->getCondition()) ||
+ return DA->isUniform(T) ||
T->getMetadata("structurizecfg.uniform") != nullptr;
}
@@ -227,76 +222,7 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
/// Recursively handle the condition leading to a loop
Value *SIAnnotateControlFlow::handleLoopCondition(
- Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term,
- SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions) {
- // Only search through PHI nodes which are inside the loop. If we try this
- // with PHI nodes that are outside of the loop, we end up inserting new PHI
- // nodes outside of the loop which depend on values defined inside the loop.
- // This will break the module with
- // 'Instruction does not dominate all users!' errors.
- PHINode *Phi = nullptr;
- if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
- BasicBlock *Parent = Phi->getParent();
- PHINode *NewPhi = PHINode::Create(Int64, 0, "loop.phi", &Parent->front());
- Value *Ret = NewPhi;
-
- // Handle all non-constant incoming values first
- for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = Phi->getIncomingValue(i);
- BasicBlock *From = Phi->getIncomingBlock(i);
- if (isa<ConstantInt>(Incoming)) {
- NewPhi->addIncoming(Broken, From);
- continue;
- }
-
- Phi->setIncomingValue(i, BoolFalse);
- Value *PhiArg = handleLoopCondition(Incoming, Broken, L,
- Term, LoopPhiConditions);
- NewPhi->addIncoming(PhiArg, From);
- }
-
- BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
-
- for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = Phi->getIncomingValue(i);
- if (Incoming != BoolTrue)
- continue;
-
- BasicBlock *From = Phi->getIncomingBlock(i);
- if (From == IDom) {
- // We're in the following situation:
- // IDom/From
- // | \
- // | If-block
- // | /
- // Parent
- // where we want to break out of the loop if the If-block is not taken.
- // Due to the depth-first traversal, there should be an end.cf
- // intrinsic in Parent, and we insert an else.break before it.
- //
- // Note that the end.cf need not be the first non-phi instruction
- // of parent, particularly when we're dealing with a multi-level
- // break, but it should occur within a group of intrinsic calls
- // at the beginning of the block.
- CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
- while (OldEnd && OldEnd->getCalledFunction() != EndCf)
- OldEnd = dyn_cast<CallInst>(OldEnd->getNextNode());
- if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
- Value *Args[] = { OldEnd->getArgOperand(0), NewPhi };
- Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
- continue;
- }
- }
-
- TerminatorInst *Insert = From->getTerminator();
- Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
- NewPhi->setIncomingValue(i, PhiArg);
- }
-
- LoopPhiConditions.push_back(WeakTrackingVH(Phi));
- return Ret;
- }
-
+ Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
Instruction *Insert;
@@ -335,21 +261,15 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
- SmallVector<WeakTrackingVH, 8> LoopPhiConditions;
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- Value *Arg = handleLoopCondition(Cond, Broken, L, Term, LoopPhiConditions);
+ Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
for (BasicBlock *Pred : predecessors(Target))
Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred);
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
- for (WeakTrackingVH Val : llvm::reverse(LoopPhiConditions)) {
- if (PHINode *Cond = cast_or_null<PHINode>(Val))
- eraseIfUnused(Cond);
- }
-
push(Term->getSuccessor(0), Arg);
}
@@ -372,7 +292,8 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
Preds.push_back(Pred);
}
- BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
+ BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, nullptr,
+ false);
}
Value *Exec = popSaved();
@@ -386,7 +307,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DA = &getAnalysis<DivergenceAnalysis>();
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
index a6d28d6999e5..7f6abc34cff3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -88,7 +88,10 @@ enum : uint64_t {
IsPacked = UINT64_C(1) << 49,
// Is a D16 buffer instruction.
- D16Buf = UINT64_C(1) << 50
+ D16Buf = UINT64_C(1) << 50,
+
+ // Uses floating point double precision rounding mode
+ FPDPRounding = UINT64_C(1) << 51
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 566e0d3febc7..809f5bab4693 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -183,13 +183,15 @@ getCopyRegClasses(const MachineInstr &Copy,
static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const SIRegisterInfo &TRI) {
- return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
+ return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) &&
+ TRI.hasVGPRs(SrcRC);
}
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const SIRegisterInfo &TRI) {
- return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
+ return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) &&
+ TRI.hasVGPRs(DstRC);
}
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
@@ -327,9 +329,7 @@ static bool phiHasBreakDef(const MachineInstr &PHI,
switch (DefInstr->getOpcode()) {
default:
break;
- case AMDGPU::SI_BREAK:
case AMDGPU::SI_IF_BREAK:
- case AMDGPU::SI_ELSE_BREAK:
return true;
case AMDGPU::PHI:
if (phiHasBreakDef(*DefInstr, MRI, Visited))
@@ -599,7 +599,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
unsigned SrcReg = MI.getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
break;
}
@@ -614,7 +614,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MI.setDesc(TII->get(SMovOp));
break;
}
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
} else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
}
@@ -677,7 +677,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 8> Visited;
if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
}
break;
}
@@ -690,7 +690,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
break;
case AMDGPU::INSERT_SUBREG: {
const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
@@ -700,7 +700,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
if (TRI->isSGPRClass(DstRC) &&
(TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
}
break;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
index 5d613d8874fa..7761418c5336 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
@@ -10,7 +10,7 @@
/// \file
/// Computations in WWM can overwrite values in inactive channels for
/// variables that the register allocator thinks are dead. This pass adds fake
-/// uses of those variables to WWM instructions to make sure that they aren't
+/// uses of those variables to their def(s) to make sure that they aren't
/// overwritten.
///
/// As an example, consider this snippet:
@@ -29,25 +29,44 @@
/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
/// it would clobber even the inactive channels for which the if-condition is
/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
-/// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the
+/// of %vgpr0 to its def to make sure they aren't allocated to the
/// same register.
///
/// In general, we need to figure out what registers might have their inactive
/// channels which are eventually used accidentally clobbered by a WWM
-/// instruction. We approximate this using two conditions:
+/// instruction. We do that by spotting three separate cases of registers:
///
-/// 1. A definition of the variable reaches the WWM instruction.
-/// 2. The variable would be live at the WWM instruction if all its defs were
-/// partial defs (i.e. considered as a use), ignoring normal uses.
+/// 1. A "then phi": the value resulting from phi elimination of a phi node at
+/// the end of an if..endif. If there is WWM code in the "then", then we
+/// make the def at the end of the "then" branch a partial def by adding an
+/// implicit use of the register.
///
-/// If a register matches both conditions, then we add an implicit use of it to
-/// the WWM instruction. Condition #2 is the heart of the matter: every
-/// definition is really a partial definition, since every VALU instruction is
-/// implicitly predicated. We can usually ignore this, but WWM forces us not
-/// to. Condition #1 prevents false positives if the variable is undefined at
-/// the WWM instruction anyways. This is overly conservative in certain cases,
-/// especially in uniform control flow, but this is a workaround anyways until
-/// LLVM gains the notion of predicated uses and definitions of variables.
+/// 2. A "loop exit register": a value written inside a loop but used outside the
+/// loop, where there is WWM code inside the loop (the case in the example
+/// above). We add an implicit_def of the register in the loop pre-header,
+/// and make the original def a partial def by adding an implicit use of the
+/// register.
+///
+/// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
+/// in a loop header. If there is WWM code inside the loop, then we make all
+/// defs inside the loop partial defs by adding an implicit use of the
+/// register on each one.
+///
+/// Note that we do not need to consider an if..else..endif phi. We only need to
+/// consider non-uniform control flow, and control flow structurization would
+/// have transformed a non-uniform if..else..endif into two if..endifs.
+///
+/// The analysis to detect these cases relies on a property of the MIR
+/// arising from this pass running straight after PHIElimination and before any
+/// coalescing: that any virtual register with more than one definition must be
+/// the new register added to lower a phi node by PHIElimination.
+///
+/// FIXME: We should detect whether a register in one of the above categories is
+/// already live at the WWM code before deciding to add the implicit uses to
+/// synthesize its liveness.
+///
+/// FIXME: I believe this whole scheme may be flawed due to the possibility of
+/// the register allocator doing live interval splitting.
///
//===----------------------------------------------------------------------===//
@@ -59,7 +78,9 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -71,10 +92,18 @@ namespace {
class SIFixWWMLiveness : public MachineFunctionPass {
private:
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *LoopInfo;
LiveIntervals *LIS = nullptr;
+ const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
+ std::vector<MachineInstr *> WWMs;
+ std::vector<MachineOperand *> ThenDefs;
+ std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
+ std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
+
public:
static char ID;
@@ -84,13 +113,11 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- bool runOnWWMInstruction(MachineInstr &MI);
-
- void addDefs(const MachineInstr &MI, SparseBitVector<> &set);
-
StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addRequiredID(MachineLoopInfoID);
// Should preserve the same set that TwoAddressInstructions does.
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
@@ -100,11 +127,21 @@ public:
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+private:
+ void processDef(MachineOperand &DefOpnd);
+ bool processThenDef(MachineOperand *DefOpnd);
+ bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
+ bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
};
} // End anonymous namespace.
-INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
+ "SI fix WWM liveness", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
"SI fix WWM liveness", false, false)
char SIFixWWMLiveness::ID = 0;
@@ -115,89 +152,267 @@ FunctionPass *llvm::createSIFixWWMLivenessPass() {
return new SIFixWWMLiveness();
}
-void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs)
-{
- for (const MachineOperand &Op : MI.defs()) {
- if (Op.isReg()) {
- unsigned Reg = Op.getReg();
- if (TRI->isVGPR(*MRI, Reg))
- Regs.set(Reg);
- }
- }
-}
+bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
+ bool Modified = false;
+
+ // This doesn't actually need LiveIntervals, but we can preserve them.
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
-bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) {
- MachineBasicBlock *MBB = WWM.getParent();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- // Compute the registers that are live out of MI by figuring out which defs
- // are reachable from MI.
- SparseBitVector<> LiveOut;
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
- for (auto II = MachineBasicBlock::iterator(WWM), IE =
- MBB->end(); II != IE; ++II) {
- addDefs(*II, LiveOut);
- }
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ LoopInfo = &getAnalysis<MachineLoopInfo>();
- for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB),
- E = df_end(MBB);
- I != E; ++I) {
- for (const MachineInstr &MI : **I) {
- addDefs(MI, LiveOut);
+ // Scan the function to find the WWM sections and the candidate registers for
+ // having liveness modified.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() == AMDGPU::EXIT_WWM)
+ WWMs.push_back(&MI);
+ else {
+ for (MachineOperand &DefOpnd : MI.defs()) {
+ if (DefOpnd.isReg()) {
+ unsigned Reg = DefOpnd.getReg();
+ if (TRI->isVGPR(*MRI, Reg))
+ processDef(DefOpnd);
+ }
+ }
+ }
}
}
+ if (!WWMs.empty()) {
+ // Synthesize liveness over WWM sections as required.
+ for (auto ThenDef : ThenDefs)
+ Modified |= processThenDef(ThenDef);
+ for (auto LoopExitDef : LoopExitDefs)
+ Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
+ for (auto LoopPhiDef : LoopPhiDefs)
+ Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
+ }
- // Compute the registers that reach MI.
- SparseBitVector<> Reachable;
+ WWMs.clear();
+ ThenDefs.clear();
+ LoopExitDefs.clear();
+ LoopPhiDefs.clear();
- for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE =
- MBB->rend(); II != IE; ++II) {
- addDefs(*II, Reachable);
- }
+ return Modified;
+}
- for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB),
- E = idf_end(MBB);
- I != E; ++I) {
- for (const MachineInstr &MI : **I) {
- addDefs(MI, Reachable);
+// During the function scan, process an operand that defines a VGPR.
+// This categorizes the register and puts it in the appropriate list for later
+// use when processing a WWM section.
+void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
+ unsigned Reg = DefOpnd.getReg();
+ // Get all the defining instructions. For convenience, make Defs[0] the def
+ // we are on now.
+ SmallVector<const MachineInstr *, 4> Defs;
+ Defs.push_back(DefOpnd.getParent());
+ for (auto &MI : MRI->def_instructions(Reg)) {
+ if (&MI != DefOpnd.getParent())
+ Defs.push_back(&MI);
+ }
+ // Check whether this def dominates all the others. If not, ignore this def.
+ // Either it is going to be processed when the scan encounters its other def
+ // that dominates all defs, or there is no def that dominates all others.
+ // The latter case is an eliminated phi from an if..else..endif or similar,
+ // which must be for uniform control flow so can be ignored.
+ // Because this pass runs shortly after PHIElimination, we assume that any
+ // multi-def register is a lowered phi, and thus has each def in a separate
+ // basic block.
+ for (unsigned I = 1; I != Defs.size(); ++I) {
+ if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
+ return;
+ }
+ // Check for the case of an if..endif lowered phi: It has two defs, one
+ // dominates the other, and there is a single use in a successor of the
+ // dominant def.
+ // Later we will spot any WWM code inside
+ // the "then" clause and turn the second def into a partial def so its
+ // liveness goes through the WWM code in the "then" clause.
+ if (Defs.size() == 2) {
+ auto DomDefBlock = Defs[0]->getParent();
+ if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
+ auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
+ for (auto Succ : DomDefBlock->successors()) {
+ if (Succ == UseBlock) {
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
+ ThenDefs.push_back(&DefOpnd);
+ return;
+ }
+ }
}
}
-
- // find the intersection, and add implicit uses.
- LiveOut &= Reachable;
-
- bool Modified = false;
- for (unsigned Reg : LiveOut) {
- WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
- if (LIS) {
- // FIXME: is there a better way to update the live interval?
- LIS->removeInterval(Reg);
- LIS->createAndComputeVirtRegInterval(Reg);
+ // Check for the case of a non-lowered-phi register (single def) that exits
+ // a loop, that is, it has a use that is outside a loop that the def is
+ // inside. We find the outermost loop that the def is inside but a use is
+ // outside. Later we will spot any WWM code inside that loop and then make
+ // the def a partial def so its liveness goes round the loop and through the
+ // WWM code.
+ if (Defs.size() == 1) {
+ auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
+ if (!Loop)
+ return;
+ bool IsLoopExit = false;
+ for (auto &Use : MRI->use_instructions(Reg)) {
+ auto UseBlock = Use.getParent();
+ if (Loop->contains(UseBlock))
+ continue;
+ IsLoopExit = true;
+ while (auto Parent = Loop->getParentLoop()) {
+ if (Parent->contains(UseBlock))
+ break;
+ Loop = Parent;
+ }
}
- Modified = true;
+ if (!IsLoopExit)
+ return;
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+ << " is a loop exit reg with loop header at "
+ << "bb." << Loop->getHeader()->getNumber() << "\n");
+ LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
+ &DefOpnd, Loop));
+ return;
}
-
- return Modified;
+ // Check for the case of a lowered single-preheader-loop phi, that is, a
+ // multi-def register where the dominating def is in the loop pre-header and
+ // all other defs are in backedges. Later we will spot any WWM code inside
+ // that loop and then make the backedge defs partial defs so the liveness
+ // goes through the WWM code.
+ // Note that we are ignoring multi-preheader loops on the basis that the
+ // structurizer does not allow that for non-uniform loops.
+ // There must be a single use in the loop header.
+ if (!MRI->hasOneUse(Reg))
+ return;
+ auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
+ auto Loop = LoopInfo->getLoopFor(UseBlock);
+ if (!Loop || Loop->getHeader() != UseBlock
+ || Loop->contains(Defs[0]->getParent())) {
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+ << " is multi-def but single use not in loop header\n");
+ return;
+ }
+ for (unsigned I = 1; I != Defs.size(); ++I) {
+ if (!Loop->contains(Defs[I]->getParent()))
+ return;
+ }
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+ << " is a loop phi reg with loop header at "
+ << "bb." << Loop->getHeader()->getNumber() << "\n");
+ LoopPhiDefs.push_back(
+ std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
}
-bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
- bool Modified = false;
-
- // This doesn't actually need LiveIntervals, but we can preserve them.
- LIS = getAnalysisIfAvailable<LiveIntervals>();
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
-
- TRI = &TII->getRegisterInfo();
- MRI = &MF.getRegInfo();
+// Process a then phi def: It has two defs, one dominates the other, and there
+// is a single use in a successor of the dominant def. Here we spot any WWM
+// code inside the "then" clause and turn the second def into a partial def so
+// its liveness goes through the WWM code in the "then" clause.
+bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
+ LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
+ if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Ignore if dominating def is undef.
+ LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n");
+ return false;
+ }
+ unsigned Reg = DefOpnd->getReg();
+ // Get the use block, which is the endif block.
+ auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
+ // Check whether there is WWM code inside the then branch. The WWM code must
+ // be dominated by the if but not dominated by the endif.
+ bool ContainsWWM = false;
+ for (auto WWM : WWMs) {
+ if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
+ && !DomTree->dominates(UseBlock, WWM->getParent())) {
+ LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
+ ContainsWWM = true;
+ break;
+ }
+ }
+ if (!ContainsWWM)
+ return false;
+ // Get the other def.
+ MachineInstr *OtherDef = nullptr;
+ for (auto &MI : MRI->def_instructions(Reg)) {
+ if (&MI != DefOpnd->getParent())
+ OtherDef = &MI;
+ }
+ // Make it a partial def.
+ OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
+ LLVM_DEBUG(dbgs() << *OtherDef);
+ return true;
+}
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
- Modified |= runOnWWMInstruction(MI);
- }
+// Process a loop exit def, that is, a register with a single use in a loop
+// that has a use outside the loop. Here we spot any WWM code inside that loop
+// and then make the def a partial def so its liveness goes round the loop and
+// through the WWM code.
+bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
+ MachineLoop *Loop) {
+ LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
+ // Check whether there is WWM code inside the loop.
+ bool ContainsWWM = false;
+ for (auto WWM : WWMs) {
+ if (Loop->contains(WWM->getParent())) {
+ LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
+ ContainsWWM = true;
+ break;
}
}
+ if (!ContainsWWM)
+ return false;
+ unsigned Reg = DefOpnd->getReg();
+ // Add a new implicit_def in loop preheader(s).
+ for (auto Pred : Loop->getHeader()->predecessors()) {
+ if (!Loop->contains(Pred)) {
+ auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
+ LLVM_DEBUG(dbgs() << *ImplicitDef);
+ (void)ImplicitDef;
+ }
+ }
+ // Make the original def partial.
+ DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
+ Reg, false, /*isImp=*/true));
+ LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
+ return true;
+}
- return Modified;
+// Process a loop phi def, that is, a multi-def register where the dominating
+// def is in the loop pre-header and all other defs are in backedges. Here we
+// spot any WWM code inside that loop and then make the backedge defs partial
+// defs so the liveness goes through the WWM code.
+bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
+ MachineLoop *Loop) {
+ LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
+ // Check whether there is WWM code inside the loop.
+ bool ContainsWWM = false;
+ for (auto WWM : WWMs) {
+ if (Loop->contains(WWM->getParent())) {
+ LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
+ ContainsWWM = true;
+ break;
+ }
+ }
+ if (!ContainsWWM)
+ return false;
+ unsigned Reg = DefOpnd->getReg();
+ // Remove kill mark from uses.
+ for (auto &Use : MRI->use_operands(Reg))
+ Use.setIsKill(false);
+ // Make all defs except the dominating one partial defs.
+ SmallVector<MachineInstr *, 4> Defs;
+ for (auto &Def : MRI->def_instructions(Reg))
+ Defs.push_back(&Def);
+ for (auto Def : Defs) {
+ if (DefOpnd->getParent() == Def)
+ continue;
+ Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
+ LLVM_DEBUG(dbgs() << *Def);
+ }
+ return true;
}
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
new file mode 100644
index 000000000000..ee39eb04d831
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
@@ -0,0 +1,231 @@
+//===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// SIFixupVectorISel pass cleans up post ISEL Vector issues.
+/// Currently this will convert GLOBAL_{LOAD|STORE}_*
+/// and GLOBAL_Atomic_* instructions into their _SADDR variants,
+/// feeding the sreg into the saddr field of the new instruction.
+/// We currently handle a REG_SEQUENCE feeding the vaddr
+/// and decompose it into a base and index.
+///
+/// Transform:
+/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
+/// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
+/// %24:vgpr_32, %19:sreg_64_xexec
+/// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
+/// %11:vreg_64 = COPY %16:vreg_64
+/// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0
+/// Into:
+/// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0
+/// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1
+/// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16...
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#define DEBUG_TYPE "si-fixup-vector-isel"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableGlobalSGPRAddr(
+ "amdgpu-enable-global-sgpr-addr",
+ cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"),
+ cl::init(false));
+
+STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
+STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
+
+namespace {
+
+class SIFixupVectorISel : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFixupVectorISel() : MachineFunctionPass(ID) {
+ initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE,
+ "SI Fixup Vector ISel", false, false)
+
+char SIFixupVectorISel::ID = 0;
+
+char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID;
+
+FunctionPass *llvm::createSIFixupVectorISelPass() {
+ return new SIFixupVectorISel();
+}
+
+static bool findSRegBaseAndIndex(MachineOperand *Op,
+ unsigned &BaseReg,
+ unsigned &IndexReg,
+ MachineRegisterInfo &MRI,
+ const SIRegisterInfo *TRI) {
+ SmallVector<MachineOperand *, 8> Worklist;
+ Worklist.push_back(Op);
+ while (!Worklist.empty()) {
+ MachineOperand *WOp = Worklist.pop_back_val();
+ if (!WOp->isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(WOp->getReg()))
+ continue;
+ MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
+ switch (DefInst->getOpcode()) {
+ default:
+ continue;
+ case AMDGPU::COPY:
+ Worklist.push_back(&DefInst->getOperand(1));
+ break;
+ case AMDGPU::REG_SEQUENCE:
+ if (DefInst->getNumOperands() != 5)
+ continue;
+ Worklist.push_back(&DefInst->getOperand(1));
+ Worklist.push_back(&DefInst->getOperand(3));
+ break;
+ case AMDGPU::V_ADD_I32_e64:
+ // The V_ADD_* and its analogous V_ADDCV_* are generated by
+ // a previous pass which lowered from an ADD_64_PSEUDO,
+ // which generates subregs to break up the 64 bit args.
+ if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister)
+ continue;
+ BaseReg = DefInst->getOperand(2).getReg();
+ if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister)
+ continue;
+ IndexReg = DefInst->getOperand(3).getReg();
+ // Chase the IndexReg.
+ MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg);
+ if (!MI || !MI->isCopy())
+ continue;
+ // Make sure the reg class is 64 bit for Index.
+ // If the Index register is a subreg, we want it to reference
+ // a 64 bit register which we will use as the Index reg.
+ const TargetRegisterClass *IdxRC, *BaseRC;
+ IdxRC = MRI.getRegClass(MI->getOperand(1).getReg());
+ if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64)
+ continue;
+ IndexReg = MI->getOperand(1).getReg();
+ // Chase the BaseReg.
+ MI = MRI.getUniqueVRegDef(BaseReg);
+ if (!MI || !MI->isCopy())
+ continue;
+ // Make sure the register class is 64 bit for Base.
+ BaseReg = MI->getOperand(1).getReg();
+ BaseRC = MRI.getRegClass(BaseReg);
+ if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64)
+ continue;
+ // Make sure Base is SReg and Index is VReg.
+ if (!TRI->isSGPRReg(MRI, BaseReg))
+ return false;
+ if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg)))
+ return false;
+ // clear any killed flags on Index and Base regs, used later.
+ MRI.clearKillFlags(IndexReg);
+ MRI.clearKillFlags(BaseReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+// Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR.
+static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ MachineRegisterInfo &MRI,
+ const GCNSubtarget &ST,
+ const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI) {
+ if (!EnableGlobalSGPRAddr)
+ return false;
+ bool FuncModified = false;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+ int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode());
+ if (NewOpcd < 0)
+ continue;
+ // Update our statistics on opportunities seen.
+ ++NumSGPRGlobalOccurs;
+ LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n');
+ // Need a Base and Index or we cant transform to _SADDR.
+ unsigned BaseReg = 0;
+ unsigned IndexReg = 0;
+ MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+ if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI))
+ continue;
+ ++NumSGPRGlobalSaddrs;
+ FuncModified = true;
+ // Create the new _SADDR Memory instruction.
+ bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr;
+ MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
+ MachineInstr *NewGlob = nullptr;
+ NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd));
+ if (HasVdst)
+ NewGlob->addOperand(MF, MI.getOperand(0));
+ NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false));
+ if (VData)
+ NewGlob->addOperand(MF, *VData);
+ NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false));
+ NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset));
+
+ MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc);
+ // Atomics dont have a GLC, so omit the field if not there.
+ if (Glc)
+ NewGlob->addOperand(MF, *Glc);
+ NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
+ // _D16 have an vdst_in operand, copy it in.
+ MachineOperand *VDstInOp = TII->getNamedOperand(MI,
+ AMDGPU::OpName::vdst_in);
+ if (VDstInOp)
+ NewGlob->addOperand(MF, *VDstInOp);
+ NewGlob->copyImplicitOps(MF, MI);
+ NewGlob->cloneMemRefs(MF, MI);
+ // Remove the old Global Memop instruction.
+ MI.eraseFromParent();
+ LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n');
+ }
+ return FuncModified;
+}
+
+bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ bool FuncModified = false;
+ for (MachineBasicBlock &MBB : MF) {
+ // Cleanup missed Saddr opportunites from ISel.
+ FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI);
+ }
+ return FuncModified;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 338cabcb906b..f4e866958369 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -35,13 +35,16 @@ struct FoldCandidate {
uint64_t ImmToFold;
int FrameIndexToFold;
};
+ int ShrinkOpcode;
unsigned char UseOpNo;
MachineOperand::MachineOperandType Kind;
bool Commuted;
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
- bool Commuted_ = false) :
- UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
+ bool Commuted_ = false,
+ int ShrinkOp = -1) :
+ UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+ Kind(FoldOp->getType()),
Commuted(Commuted_) {
if (FoldOp->isImm()) {
ImmToFold = FoldOp->getImm();
@@ -68,6 +71,14 @@ struct FoldCandidate {
bool isCommuted() const {
return Commuted;
}
+
+ bool needsShrink() const {
+ return ShrinkOpcode != -1;
+ }
+
+ int getShrinkOpcode() const {
+ return ShrinkOpcode;
+ }
};
class SIFoldOperands : public MachineFunctionPass {
@@ -154,6 +165,7 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
}
static bool updateOperand(FoldCandidate &Fold,
+ const SIInstrInfo &TII,
const TargetRegisterInfo &TRI) {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
@@ -189,10 +201,49 @@ static bool updateOperand(FoldCandidate &Fold,
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
}
}
+
+ if (Fold.needsShrink()) {
+ MachineBasicBlock *MBB = MI->getParent();
+ auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+ if (Liveness != MachineBasicBlock::LQR_Dead)
+ return false;
+
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ int Op32 = Fold.getShrinkOpcode();
+ MachineOperand &Dst0 = MI->getOperand(0);
+ MachineOperand &Dst1 = MI->getOperand(1);
+ assert(Dst0.isDef() && Dst1.isDef());
+
+ bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+
+ const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+ unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
+ const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
+ unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+
+ MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+
+ if (HaveNonDbgCarryUse) {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+ .addReg(AMDGPU::VCC, RegState::Kill);
+ }
+
+ // Keep the old instruction around to avoid breaking iterators, but
+ // replace the outputs with dummy registers.
+ Dst0.setReg(NewReg0);
+ Dst1.setReg(NewReg1);
+
+ if (Fold.isCommuted())
+ TII.commuteInstruction(*Inst32, false);
+ return true;
+ }
+
Old.ChangeToImmediate(Fold.ImmToFold);
return true;
}
+ assert(!Fold.needsShrink() && "not handled");
+
if (Fold.isFI()) {
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
return true;
@@ -261,6 +312,8 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (isUseMIInFoldList(FoldList, MI))
return false;
+ unsigned CommuteOpNo = OpNo;
+
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
@@ -269,11 +322,12 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (CanCommute) {
if (CommuteIdx0 == OpNo)
- OpNo = CommuteIdx1;
+ CommuteOpNo = CommuteIdx1;
else if (CommuteIdx1 == OpNo)
- OpNo = CommuteIdx0;
+ CommuteOpNo = CommuteIdx0;
}
+
// One of operands might be an Imm operand, and OpNo may refer to it after
// the call of commuteInstruction() below. Such situations are avoided
// here explicitly as OpNo must be a register operand to be a candidate
@@ -286,12 +340,34 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
!TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
return false;
- if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
+ if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
+ if ((Opc == AMDGPU::V_ADD_I32_e64 ||
+ Opc == AMDGPU::V_SUB_I32_e64 ||
+ Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
+ OpToFold->isImm()) {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ // Verify the other operand is a VGPR, otherwise we would violate the
+ // constant bus restriction.
+ unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
+ MachineOperand &OtherOp = MI->getOperand(OtherIdx);
+ if (!OtherOp.isReg() ||
+ !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
+ return false;
+
+ assert(MI->getOperand(1).isDef());
+
+ int Op32 = AMDGPU::getVOPe32(Opc);
+ FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
+ Op32));
+ return true;
+ }
+
TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
return false;
}
- FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
+ FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
return true;
}
@@ -362,8 +438,6 @@ void SIFoldOperands::foldOperand(
bool FoldingImm = OpToFold.isImm();
- // In order to fold immediates into copies, we need to change the
- // copy to a MOV.
if (FoldingImm && UseMI->isCopy()) {
unsigned DestReg = UseMI->getOperand(0).getReg();
const TargetRegisterClass *DestRC
@@ -371,6 +445,31 @@ void SIFoldOperands::foldOperand(
MRI->getRegClass(DestReg) :
TRI->getPhysRegClass(DestReg);
+ unsigned SrcReg = UseMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
+ if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
+ MachineRegisterInfo::use_iterator NextUse;
+ SmallVector<FoldCandidate, 4> CopyUses;
+ for (MachineRegisterInfo::use_iterator
+ Use = MRI->use_begin(DestReg), E = MRI->use_end();
+ Use != E; Use = NextUse) {
+ NextUse = std::next(Use);
+ FoldCandidate FC = FoldCandidate(Use->getParent(),
+ Use.getOperandNo(), &UseMI->getOperand(1));
+ CopyUses.push_back(FC);
+ }
+ for (auto & F : CopyUses) {
+ foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
+ FoldList, CopiesToReplace);
+ }
+ }
+ }
+
+ // In order to fold immediates into copies, we need to change the
+ // copy to a MOV.
+
unsigned MovOp = TII->getMovOpcode(DestRC);
if (MovOp == AMDGPU::COPY)
return;
@@ -378,6 +477,20 @@ void SIFoldOperands::foldOperand(
UseMI->setDesc(TII->get(MovOp));
CopiesToReplace.push_back(UseMI);
} else {
+ if (UseMI->isCopy() && OpToFold.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
+ TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) &&
+ TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+ TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
+ !UseMI->getOperand(1).getSubReg()) {
+ UseMI->getOperand(1).setReg(OpToFold.getReg());
+ UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
+ UseMI->getOperand(1).setIsKill(false);
+ CopiesToReplace.push_back(UseMI);
+ OpToFold.setIsKill(false);
+ return;
+ }
+
const MCInstrDesc &UseDesc = UseMI->getDesc();
// Don't fold into target independent nodes. Target independent opcodes
@@ -550,6 +663,19 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
if (!Src0->isImm() && !Src1->isImm())
return false;
+ if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
+ if (Src0->isImm() && Src0->getImm() == 0) {
+ // v_lshl_or_b32 0, X, Y -> copy Y
+ // v_lshl_or_b32 0, X, K -> v_mov_b32 K
+ bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
+ MI->RemoveOperand(Src1Idx);
+ MI->RemoveOperand(Src0Idx);
+
+ MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
+ return true;
+ }
+ }
+
// and k0, k1 -> v_mov_b32 (k0 & k1)
// or k0, k1 -> v_mov_b32 (k0 | k1)
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
@@ -728,13 +854,17 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
}
} else {
// Folding register.
+ SmallVector <MachineRegisterInfo::use_iterator, 4> UsesToProcess;
for (MachineRegisterInfo::use_iterator
Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
Use != E; ++Use) {
- MachineInstr *UseMI = Use->getParent();
+ UsesToProcess.push_back(Use);
+ }
+ for (auto U : UsesToProcess) {
+ MachineInstr *UseMI = U->getParent();
- foldOperand(OpToFold, UseMI, Use.getOperandNo(),
- FoldList, CopiesToReplace);
+ foldOperand(OpToFold, UseMI, U.getOperandNo(),
+ FoldList, CopiesToReplace);
}
}
@@ -744,7 +874,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) {
- if (updateOperand(Fold, *TRI)) {
+ if (updateOperand(Fold, *TII, *TRI)) {
// Clear kill flags.
if (Fold.isReg()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
@@ -981,9 +1111,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// omod is ignored by hardware if IEEE bit is enabled. omod also does not
// correctly handle signed zeros.
//
- // TODO: Check nsz on instructions when fast math flags are preserved to MI
- // level.
- bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
+ bool IsIEEEMode = ST->enableIEEEBit(MF);
+ bool HasNSZ = MFI->hasNoSignedZerosFPMath();
for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next;
@@ -994,7 +1123,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
tryFoldInst(TII, &MI);
if (!TII->isFoldableCopy(MI)) {
- if (IsIEEEMode || !tryFoldOMod(MI))
+ // TODO: Omod might be OK if there is NSZ only on the source
+ // instruction, and not the omod multiply.
+ if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
+ !tryFoldOMod(MI))
tryFoldClamp(MI);
continue;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index cd14239de822..aa976d5141f8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -168,16 +168,15 @@ void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask,
CoveringSubregs.push_back(Idx);
}
- llvm::sort(CoveringSubregs.begin(), CoveringSubregs.end(),
- [this](unsigned A, unsigned B) {
- LaneBitmask MaskA = TRI->getSubRegIndexLaneMask(A);
- LaneBitmask MaskB = TRI->getSubRegIndexLaneMask(B);
- unsigned NA = MaskA.getNumLanes();
- unsigned NB = MaskB.getNumLanes();
- if (NA != NB)
- return NA > NB;
- return MaskA.getHighestLane() > MaskB.getHighestLane();
- });
+ llvm::sort(CoveringSubregs, [this](unsigned A, unsigned B) {
+ LaneBitmask MaskA = TRI->getSubRegIndexLaneMask(A);
+ LaneBitmask MaskB = TRI->getSubRegIndexLaneMask(B);
+ unsigned NA = MaskA.getNumLanes();
+ unsigned NB = MaskB.getNumLanes();
+ if (NA != NB)
+ return NA > NB;
+ return MaskA.getHighestLane() > MaskB.getHighestLane();
+ });
for (unsigned Idx : CoveringSubregs) {
LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(Idx);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index ac0ef90f25a4..e4633c88e18f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -289,7 +289,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
- if (ST.isAmdCodeObjectV2(F)) {
+ if (ST.isAmdHsaOrMesa(F)) {
PreloadedPrivateBufferReg = MFI->getPreloadedReg(
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
}
@@ -308,7 +308,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
- assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F));
+ assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
MRI.addLiveIn(PreloadedPrivateBufferReg);
MBB.addLiveIn(PreloadedPrivateBufferReg);
}
@@ -333,7 +333,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
bool CopyBuffer = ResourceRegUsed &&
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
- ST.isAmdCodeObjectV2(F) &&
+ ST.isAmdHsaOrMesa(F) &&
ScratchRsrcReg != PreloadedPrivateBufferReg;
// This needs to be careful of the copying order to avoid overwriting one of
@@ -433,7 +433,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
}
if (ST.isMesaGfxShader(Fn)
|| (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
- assert(!ST.isAmdCodeObjectV2(Fn));
+ assert(!ST.isAmdHsaOrMesa(Fn));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 25007861fd15..0ba921647097 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#ifdef _MSC_VER
+#if defined(_MSC_VER) || defined(__MINGW32__)
// Provide M_PI.
#define _USE_MATH_DEFINES
#endif
@@ -156,12 +156,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v32i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v32i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
@@ -207,11 +209,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
@@ -232,6 +237,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+
#if 0
setOperationAction(ISD::ADDCARRY, MVT::i64, Legal);
setOperationAction(ISD::SUBCARRY, MVT::i64, Legal);
@@ -240,7 +249,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
- MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16 }) {
+ MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -339,6 +348,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::FLOG, MVT::f16, Custom);
+ setOperationAction(ISD::FEXP, MVT::f16, Custom);
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
}
@@ -375,8 +385,20 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasBFE())
setHasExtractBitsInsn(true);
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Custom);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Custom);
+
+
+ // These are really only legal for ieee_mode functions. We should be avoiding
+ // them for functions that don't have ieee_mode enabled, so just say they are
+ // legal.
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
@@ -465,8 +487,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// F16 - VOP2 Actions.
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
+
setOperationAction(ISD::FDIV, MVT::f16, Custom);
// F16 - VOP3 Actions.
@@ -549,6 +570,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// This isn't really legal, but this avoids the legalizer unrolling it (and
// allows matching fneg (fabs x) patterns)
setOperationAction(ISD::FABS, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f16, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f16, Legal);
+
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand);
}
if (Subtarget->hasVOP3PInsts()) {
@@ -566,8 +598,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FADD, MVT::v2f16, Legal);
setOperationAction(ISD::FMUL, MVT::v2f16, Legal);
setOperationAction(ISD::FMA, MVT::v2f16, Legal);
- setOperationAction(ISD::FMINNUM, MVT::v2f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::v2f16, Legal);
+
setOperationAction(ISD::FCANONICALIZE, MVT::v2f16, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
@@ -587,9 +621,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FADD, MVT::v4f16, Custom);
setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
+
+ setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
+
setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
+ setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
setOperationAction(ISD::SELECT, MVT::v4f16, Custom);
}
@@ -623,6 +663,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);
+ setTargetDAGCombine(ISD::FMINNUM_IEEE);
+ setTargetDAGCombine(ISD::FMAXNUM_IEEE);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::SMAX);
@@ -638,7 +680,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.
@@ -707,9 +749,7 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
if (Size == 64)
return MVT::i32;
- if (Size == 16 &&
- Subtarget->has16BitInsts() &&
- isPowerOf2_32(VT.getVectorNumElements()))
+ if (Size == 16 && Subtarget->has16BitInsts())
return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
}
@@ -730,9 +770,8 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
if (Size == 64)
return 2 * NumElts;
- // FIXME: Fails to break down as we want with v3.
- if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts))
- return VT.getVectorNumElements() / 2;
+ if (Size == 16 && Subtarget->has16BitInsts())
+ return (VT.getVectorNumElements() + 1) / 2;
}
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
@@ -763,10 +802,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
// FIXME: We should fix the ABI to be the same on targets without 16-bit
// support, but unless we can properly handle 3-vectors, it will be still be
// inconsistent.
- if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) {
+ if (Size == 16 && Subtarget->has16BitInsts()) {
RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
IntermediateVT = RegisterVT;
- NumIntermediates = NumElts / 2;
+ NumIntermediates = (NumElts + 1) / 2;
return NumIntermediates;
}
}
@@ -775,6 +814,47 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
}
+static MVT memVTFromAggregate(Type *Ty) {
+ // Only limited forms of aggregate type currently expected.
+ assert(Ty->isStructTy() && "Expected struct type");
+
+
+ Type *ElementType = nullptr;
+ unsigned NumElts;
+ if (Ty->getContainedType(0)->isVectorTy()) {
+ VectorType *VecComponent = cast<VectorType>(Ty->getContainedType(0));
+ ElementType = VecComponent->getElementType();
+ NumElts = VecComponent->getNumElements();
+ } else {
+ ElementType = Ty->getContainedType(0);
+ NumElts = 1;
+ }
+
+ assert((Ty->getContainedType(1) && Ty->getContainedType(1)->isIntegerTy(32)) && "Expected int32 type");
+
+ // Calculate the size of the memVT type from the aggregate
+ unsigned Pow2Elts = 0;
+ unsigned ElementSize;
+ switch (ElementType->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ case Type::IntegerTyID:
+ ElementSize = cast<IntegerType>(ElementType)->getBitWidth();
+ break;
+ case Type::HalfTyID:
+ ElementSize = 16;
+ break;
+ case Type::FloatTyID:
+ ElementSize = 32;
+ break;
+ }
+ unsigned AdditionalElts = ElementSize == 16 ? 2 : 1;
+ Pow2Elts = 1 << Log2_32_Ceil(NumElts + AdditionalElts);
+
+ return MVT::getVectorVT(MVT::getVT(ElementType, false),
+ Pow2Elts);
+}
+
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
@@ -802,7 +882,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MODereferenceable;
if (Attr.hasFnAttribute(Attribute::ReadOnly)) {
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(CI.getType());
+ Info.memVT = MVT::getVT(CI.getType(), true);
+ if (Info.memVT == MVT::Other) {
+ // Some intrinsics return an aggregate type - special case to work out
+ // the correct memVT
+ Info.memVT = memVTFromAggregate(CI.getType());
+ }
Info.flags |= MachineMemOperand::MOLoad;
} else if (Attr.hasFnAttribute(Attribute::WriteOnly)) {
Info.opc = ISD::INTRINSIC_VOID;
@@ -941,11 +1026,11 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.BaseGV)
return false;
- if (AS == AMDGPUASI.GLOBAL_ADDRESS)
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS)
return isLegalGlobalAddressingMode(AM);
- if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@@ -983,10 +1068,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
- } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
return isLegalMUBUFAddressingMode(AM);
- } else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
- AS == AMDGPUASI.REGION_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+ AS == AMDGPUAS::REGION_ADDRESS) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@@ -1001,8 +1086,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
- } else if (AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
+ } else if (AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) {
// For an unknown address space, this usually means that this is for some
// reason being used for pure arithmetic, and not based on some addressing
// computation. We don't have instructions that compute pointers with any
@@ -1016,12 +1101,12 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const {
- if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) {
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) {
return (MemVT.getSizeInBits() <= 4 * 32);
- } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
return (MemVT.getSizeInBits() <= MaxPrivateBits);
- } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
return (MemVT.getSizeInBits() <= 2 * 32);
}
return true;
@@ -1043,8 +1128,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return false;
}
- if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
- AddrSpace == AMDGPUASI.REGION_ADDRESS) {
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
@@ -1059,17 +1144,21 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// will access scratch. If we had access to the IR function, then we
// could determine if any private memory was used in the function.
if (!Subtarget->hasUnalignedScratchAccess() &&
- (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
- AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
- return false;
+ (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
+ AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
+ bool AlignedBy4 = Align >= 4;
+ if (IsFast)
+ *IsFast = AlignedBy4;
+
+ return AlignedBy4;
}
if (Subtarget->hasUnalignedBufferAccess()) {
// If we have an uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
- *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS ||
- AddrSpace == AMDGPUASI.CONSTANT_ADDRESS_32BIT) ?
+ *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
(Align % 4 == 0) : true;
}
@@ -1109,17 +1198,15 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
return MVT::Other;
}
-static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
- return AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
+static bool isFlatGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
- return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
- isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
+ return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
}
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
@@ -1133,7 +1220,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
// Flat -> global is no-op
- if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
return true;
return isNoopAddrSpaceCast(SrcAS, DestAS);
@@ -1146,7 +1233,7 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
}
TargetLoweringBase::LegalizeTypeAction
-SITargetLowering::getPreferredVectorAction(EVT VT) const {
+SITargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
return TypeSplitVector;
@@ -1200,7 +1287,7 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
= Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
+ MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
@@ -1240,7 +1327,7 @@ SDValue SITargetLowering::lowerKernargMemParameter(
uint64_t Offset, unsigned Align, bool Signed,
const ISD::InputArg *Arg) const {
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
// Try to avoid using an extload by loading earlier than the argument address,
@@ -1349,7 +1436,8 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
const ISD::InputArg *Arg = &Ins[I];
- assert(!Arg->VT.isVector() && "vector type argument should have been split");
+ assert((!Arg->VT.isVector() || Arg->VT.getScalarSizeInBits() == 16) &&
+ "vector type argument should have been split");
// First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS &&
@@ -1642,7 +1730,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (ST.isAmdCodeObjectV2(MF.getFunction())) {
+ if (ST.isAmdHsaOrMesa(MF.getFunction())) {
if (RequiresStackAccess) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
@@ -1951,29 +2039,6 @@ SDValue SITargetLowering::LowerFormalArguments(
llvm_unreachable("Unknown loc info!");
}
- if (IsShader && Arg.VT.isVector()) {
- // Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- SmallVector<SDValue, 4> Regs;
- Regs.push_back(Val);
- for (unsigned j = 1; j != NumElements; ++j) {
- Reg = ArgLocs[ArgIdx++].getLocReg();
- Reg = MF.addLiveIn(Reg, RC);
-
- SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- Regs.push_back(Copy);
- }
-
- // Fill up the missing vector elements
- NumElements = Arg.VT.getVectorNumElements() - NumElements;
- Regs.append(NumElements, DAG.getUNDEF(VT));
-
- InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
- continue;
- }
-
InVals.push_back(Val);
}
@@ -2037,48 +2102,19 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsShader = AMDGPU::isShader(CallConv);
- Info->setIfReturnsVoid(Outs.size() == 0);
+ Info->setIfReturnsVoid(Outs.empty());
bool IsWaveEnd = Info->returnsVoid() && IsShader;
- SmallVector<ISD::OutputArg, 48> Splits;
- SmallVector<SDValue, 48> SplitVals;
-
- // Split vectors into their elements.
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- const ISD::OutputArg &Out = Outs[i];
-
- if (IsShader && Out.VT.isVector()) {
- MVT VT = Out.VT.getVectorElementType();
- ISD::OutputArg NewOut = Out;
- NewOut.Flags.setSplit();
- NewOut.VT = VT;
-
- // We want the original number of vector elements here, e.g.
- // three or five, not four or eight.
- unsigned NumElements = Out.ArgVT.getVectorNumElements();
-
- for (unsigned j = 0; j != NumElements; ++j) {
- SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i],
- DAG.getConstant(j, DL, MVT::i32));
- SplitVals.push_back(Elem);
- Splits.push_back(NewOut);
- NewOut.PartOffset += NewOut.VT.getStoreSize();
- }
- } else {
- SplitVals.push_back(OutVals[i]);
- Splits.push_back(Out);
- }
- }
-
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 48> RVLocs;
+ SmallVector<ISD::OutputArg, 48> Splits;
// CCState - Info about the registers and stack slots.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze outgoing return values.
- CCInfo.AnalyzeReturn(Splits, CCAssignFnForReturn(CallConv, isVarArg));
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
SDValue Flag;
SmallVector<SDValue, 48> RetOps;
@@ -2103,14 +2139,12 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
// Copy the result values into the output registers.
- for (unsigned i = 0, realRVLocIdx = 0;
- i != RVLocs.size();
- ++i, ++realRVLocIdx) {
- CCValAssign &VA = RVLocs[i];
+ for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
+ ++I, ++RealRVLocIdx) {
+ CCValAssign &VA = RVLocs[I];
assert(VA.isRegLoc() && "Can only return in registers!");
// TODO: Partially return in registers if return values don't fit.
-
- SDValue Arg = SplitVals[realRVLocIdx];
+ SDValue Arg = OutVals[RealRVLocIdx];
// Copied from other backends.
switch (VA.getLocInfo()) {
@@ -2225,11 +2259,11 @@ SDValue SITargetLowering::LowerCallResult(
// from the explicit user arguments present in the IR.
void SITargetLowering::passSpecialInputs(
CallLoweringInfo &CLI,
+ CCState &CCInfo,
const SIMachineFunctionInfo &Info,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &MemOpChains,
- SDValue Chain,
- SDValue StackPtr) const {
+ SDValue Chain) const {
// If we don't have a call site, this was a call inserted by
// legalization. These can never use special inputs.
if (!CLI.CS)
@@ -2297,9 +2331,9 @@ void SITargetLowering::passSpecialInputs(
if (OutgoingArg->isRegister()) {
RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
} else {
- SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, StackPtr,
- InputReg,
- OutgoingArg->getStackOffset());
+ unsigned SpecialArgOffset = CCInfo.AllocateStack(ArgVT.getStoreSize(), 4);
+ SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+ SpecialArgOffset);
MemOpChains.push_back(ArgStore);
}
}
@@ -2424,6 +2458,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
"unsupported call to variadic function ");
}
+ if (!CLI.CS.getInstruction())
+ report_fatal_error("unsupported libcall legalization");
+
if (!CLI.CS.getCalledFunction()) {
return lowerUnhandledCall(CLI, InVals,
"unsupported indirect call to function ");
@@ -2442,8 +2479,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// The first 4 bytes are reserved for the callee's emergency stack slot.
- const unsigned CalleeUsableStackOffset = 4;
-
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
@@ -2463,25 +2498,16 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
++NumTailCalls;
}
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // FIXME: Remove this hack for function pointer types after removing
- // support of old address space mapping. In the new address space
- // mapping the pointer in default address space is 64 bit, therefore
- // does not need this hack.
- if (Callee.getValueType() == MVT::i32) {
- const GlobalValue *GV = GA->getGlobal();
- Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), false,
- GA->getTargetFlags());
- }
- }
- assert(Callee.getValueType() == MVT::i64);
-
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
+
+ // The first 4 bytes are reserved for the callee's emergency stack slot.
+ CCInfo.AllocateStack(4, 4);
+
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
// Get a count of how many bytes are to be pushed on the stack.
@@ -2529,10 +2555,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
}
- // Stack pointer relative accesses are done by changing the offset SGPR. This
- // is just the VGPR offset component.
- SDValue StackPtr = DAG.getConstant(CalleeUsableStackOffset, DL, MVT::i32);
-
SmallVector<SDValue, 8> MemOpChains;
MVT PtrVT = MVT::i32;
@@ -2576,18 +2598,22 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset;
- SDValue PtrOff = DAG.getObjectPtrOffset(DL, StackPtr, Offset);
+ SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
+ unsigned Align = 0;
if (IsTailCall) {
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
unsigned OpSize = Flags.isByVal() ?
Flags.getByValSize() : VA.getValVT().getStoreSize();
+ // FIXME: We can have better than the minimum byval required alignment.
+ Align = Flags.isByVal() ? Flags.getByValAlign() :
+ MinAlign(Subtarget->getStackAlignment(), Offset);
+
Offset = Offset + FPDiff;
int FI = MFI.CreateFixedObject(OpSize, Offset, true);
- DstAddr = DAG.getObjectPtrOffset(DL, DAG.getFrameIndex(FI, PtrVT),
- StackPtr);
+ DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
// Make sure any stack arguments overlapping with where we're storing
@@ -2601,6 +2627,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
} else {
DstAddr = PtrOff;
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
+ Align = MinAlign(Subtarget->getStackAlignment(), LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
@@ -2611,18 +2638,18 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
/*isVol = */ false, /*AlwaysInline = */ true,
/*isTailCall = */ false, DstInfo,
MachinePointerInfo(UndefValue::get(Type::getInt8PtrTy(
- *DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS))));
+ *DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))));
MemOpChains.push_back(Cpy);
} else {
- SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
+ SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Align);
MemOpChains.push_back(Store);
}
}
}
// Copy special input registers after user input arguments.
- passSpecialInputs(CLI, *Info, RegsToPass, MemOpChains, Chain, StackPtr);
+ passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
@@ -3460,7 +3487,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
MIB.add(MI.getOperand(I));
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return BB;
}
@@ -3628,7 +3655,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerDEBUGTRAP(Op, DAG);
case ISD::FABS:
case ISD::FNEG:
+ case ISD::FCANONICALIZE:
return splitUnaryVectorOp(Op, DAG);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ return lowerFMINNUM_FMAXNUM(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -3639,10 +3670,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
case ISD::FADD:
case ISD::FMUL:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
return splitBinaryVectorOp(Op, DAG);
}
return SDValue();
@@ -3678,18 +3709,9 @@ static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT,
SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
MemSDNode *M,
SelectionDAG &DAG,
+ ArrayRef<SDValue> Ops,
bool IsIntrinsic) const {
SDLoc DL(M);
- SmallVector<SDValue, 10> Ops;
- Ops.reserve(M->getNumOperands());
-
- Ops.push_back(M->getOperand(0));
- if (IsIntrinsic)
- Ops.push_back(DAG.getConstant(Opcode, DL, MVT::i32));
-
- // Skip 1, as it is the intrinsic ID.
- for (unsigned I = 2, E = M->getNumOperands(); I != E; ++I)
- Ops.push_back(M->getOperand(I));
bool Unpacked = Subtarget->hasUnpackedD16VMem();
EVT LoadVT = M->getValueType(0);
@@ -3717,6 +3739,69 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL);
}
+static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
+ SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
+ if (!CD)
+ return DAG.getUNDEF(VT);
+
+ int CondCode = CD->getSExtValue();
+ if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
+ CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
+ return DAG.getUNDEF(VT);
+
+ ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
+
+
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ SDLoc DL(N);
+
+ EVT CmpVT = LHS.getValueType();
+ if (CmpVT == MVT::i16 && !TLI.isTypeLegal(MVT::i16)) {
+ unsigned PromoteOp = ICmpInst::isSigned(IcInput) ?
+ ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ LHS = DAG.getNode(PromoteOp, DL, MVT::i32, LHS);
+ RHS = DAG.getNode(PromoteOp, DL, MVT::i32, RHS);
+ }
+
+ ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
+
+ return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS,
+ DAG.getCondCode(CCOpcode));
+}
+
+static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
+ SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
+ if (!CD)
+ return DAG.getUNDEF(VT);
+
+ int CondCode = CD->getSExtValue();
+ if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
+ CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE) {
+ return DAG.getUNDEF(VT);
+ }
+
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ EVT CmpVT = Src0.getValueType();
+ SDLoc SL(N);
+
+ if (CmpVT == MVT::f16 && !TLI.isTypeLegal(CmpVT)) {
+ Src0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
+ Src1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
+ }
+
+ FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
+ ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
+ return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0,
+ Src1, DAG.getCondCode(CCOpcode));
+}
+
void SITargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -3761,8 +3846,13 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
else
Opcode = AMDGPUISD::CVT_PK_U16_U32;
- SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
- Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
+ EVT VT = N->getValueType(0);
+ if (isTypeLegal(VT))
+ Results.push_back(DAG.getNode(Opcode, SL, VT, Src0, Src1));
+ else {
+ SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
+ }
return;
}
}
@@ -3895,15 +3985,15 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
- return (GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+ return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
- return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+ return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
@@ -4038,6 +4128,23 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
}
+SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+
+ // FIXME: Assert during eslection that this is only selected for
+ // ieee_mode. Currently a combine can produce the ieee version for non-ieee
+ // mode functions, but this happens to be OK since it's only done in cases
+ // where there is known no sNaN.
+ if (IsIEEEMode)
+ return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
+
+ if (VT == MVT::v4f16)
+ return splitBinaryVectorOp(Op, DAG);
+ return Op;
+}
+
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
@@ -4091,10 +4198,10 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
// FIXME: Use inline constants (src_{shared, private}_base) instead.
if (Subtarget->hasApertureRegs()) {
- unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ?
+ unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
- unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ?
+ unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
unsigned Encoding =
@@ -4119,7 +4226,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// Offset into amd_queue_t for group_segment_aperture_base_hi /
// private_segment_aperture_base_hi.
- uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
+ uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
SDValue Ptr = DAG.getObjectPtrOffset(DL, QueuePtr, StructOffset);
@@ -4127,7 +4234,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// TODO: We should use the value from the IR intrinsic call, but it might not
// be available and how do we get it?
Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
- AMDGPUASI.CONSTANT_ADDRESS));
+ AMDGPUAS::CONSTANT_ADDRESS));
MachinePointerInfo PtrInfo(V, StructOffset);
return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
@@ -4148,11 +4255,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
// flat -> local/private
- if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+ if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
unsigned DestAS = ASC->getDestAddressSpace();
- if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
- DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
+ if (DestAS == AMDGPUAS::LOCAL_ADDRESS ||
+ DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned NullVal = TM.getNullPointerValue(DestAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
@@ -4164,11 +4271,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
}
// local/private -> flat
- if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+ if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
unsigned SrcAS = ASC->getSrcAddressSpace();
- if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
- SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
+ if (SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
+ SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned NullVal = TM.getNullPointerValue(SrcAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
@@ -4335,30 +4442,39 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
}
assert(VT == MVT::v2f16 || VT == MVT::v2i16);
+ assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
- Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
- Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
+ // Avoid adding defined bits with the zero_extend.
+ if (Hi.isUndef()) {
+ Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+ SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
+ return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
+ }
- Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
DAG.getConstant(16, SL, MVT::i32));
+ if (Lo.isUndef())
+ return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
- SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
+ Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
+ SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
return DAG.getNode(ISD::BITCAST, SL, VT, Or);
}
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// We can fold offsets for anything that doesn't require a GOT relocation.
- return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
- GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+ return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
!shouldEmitGOTReloc(GA->getGlobal());
}
@@ -4409,18 +4525,15 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSD->getGlobal();
-
- if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
- GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
- GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS &&
- // FIXME: It isn't correct to rely on the type of the pointer. This should
- // be removed when address space 0 is 64-bit.
- !GV->getType()->getElementType()->isFunctionTy())
+ if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
+ GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
SDLoc DL(GSD);
EVT PtrVT = Op.getValueType();
+ // FIXME: Should not make address space based decisions here.
if (shouldEmitFixup(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
else if (shouldEmitPCReloc(GV))
@@ -4431,11 +4544,11 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SIInstrInfo::MO_GOTPCREL32);
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
const DataLayout &DataLayout = DAG.getDataLayout();
unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
- // FIXME: Use a PseudoSourceValue once those can be assigned an address space.
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getGOT(DAG.getMachineFunction());
return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Align,
MachineMemOperand::MODereferenceable |
@@ -4547,11 +4660,115 @@ static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
return Value == 0;
}
+// Re-construct the required return value for a image load intrinsic.
+// This is more complicated due to the optional use TexFailCtrl which means the required
+// return type is an aggregate
+static SDValue constructRetValue(SelectionDAG &DAG,
+ MachineSDNode *Result,
+ ArrayRef<EVT> ResultTypes,
+ bool IsTexFail, bool Unpacked, bool IsD16,
+ int DMaskPop, int NumVDataDwords,
+ const SDLoc &DL, LLVMContext &Context) {
+ // Determine the required return type. This is the same regardless of IsTexFail flag
+ EVT ReqRetVT = ResultTypes[0];
+ EVT ReqRetEltVT = ReqRetVT.isVector() ? ReqRetVT.getVectorElementType() : ReqRetVT;
+ int ReqRetNumElts = ReqRetVT.isVector() ? ReqRetVT.getVectorNumElements() : 1;
+ EVT AdjEltVT = Unpacked && IsD16 ? MVT::i32 : ReqRetEltVT;
+ EVT AdjVT = Unpacked ? ReqRetNumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, ReqRetNumElts)
+ : AdjEltVT
+ : ReqRetVT;
+
+ // Extract data part of the result
+ // Bitcast the result to the same type as the required return type
+ int NumElts;
+ if (IsD16 && !Unpacked)
+ NumElts = NumVDataDwords << 1;
+ else
+ NumElts = NumVDataDwords;
+
+ EVT CastVT = NumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, NumElts)
+ : AdjEltVT;
+
+ // Special case for v8f16. Rather than add support for this, use v4i32 to
+ // extract the data elements
+ bool V8F16Special = false;
+ if (CastVT == MVT::v8f16) {
+ CastVT = MVT::v4i32;
+ DMaskPop >>= 1;
+ ReqRetNumElts >>= 1;
+ V8F16Special = true;
+ AdjVT = MVT::v2i32;
+ }
+
+ SDValue N = SDValue(Result, 0);
+ SDValue CastRes = DAG.getNode(ISD::BITCAST, DL, CastVT, N);
+
+ // Iterate over the result
+ SmallVector<SDValue, 4> BVElts;
+
+ if (CastVT.isVector()) {
+ DAG.ExtractVectorElements(CastRes, BVElts, 0, DMaskPop);
+ } else {
+ BVElts.push_back(CastRes);
+ }
+ int ExtraElts = ReqRetNumElts - DMaskPop;
+ while(ExtraElts--)
+ BVElts.push_back(DAG.getUNDEF(AdjEltVT));
+
+ SDValue PreTFCRes;
+ if (ReqRetNumElts > 1) {
+ SDValue NewVec = DAG.getBuildVector(AdjVT, DL, BVElts);
+ if (IsD16 && Unpacked)
+ PreTFCRes = adjustLoadValueTypeImpl(NewVec, ReqRetVT, DL, DAG, Unpacked);
+ else
+ PreTFCRes = NewVec;
+ } else {
+ PreTFCRes = BVElts[0];
+ }
+
+ if (V8F16Special)
+ PreTFCRes = DAG.getNode(ISD::BITCAST, DL, MVT::v4f16, PreTFCRes);
+
+ if (!IsTexFail) {
+ if (Result->getNumValues() > 1)
+ return DAG.getMergeValues({PreTFCRes, SDValue(Result, 1)}, DL);
+ else
+ return PreTFCRes;
+ }
+
+ // Extract the TexFail result and insert into aggregate return
+ SmallVector<SDValue, 1> TFCElt;
+ DAG.ExtractVectorElements(N, TFCElt, DMaskPop, 1);
+ SDValue TFCRes = DAG.getNode(ISD::BITCAST, DL, ResultTypes[1], TFCElt[0]);
+ return DAG.getMergeValues({PreTFCRes, TFCRes, SDValue(Result, 1)}, DL);
+}
+
+static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
+ SDValue *LWE, bool &IsTexFail) {
+ auto TexFailCtrlConst = dyn_cast<ConstantSDNode>(TexFailCtrl.getNode());
+ if (!TexFailCtrlConst)
+ return false;
+
+ uint64_t Value = TexFailCtrlConst->getZExtValue();
+ if (Value) {
+ IsTexFail = true;
+ }
+
+ SDLoc DL(TexFailCtrlConst);
+ *TFE = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
+ Value &= ~(uint64_t)0x1;
+ *LWE = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
+ Value &= ~(uint64_t)0x2;
+
+ return Value == 0;
+}
+
SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
+ const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
@@ -4559,12 +4776,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;
- SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
+ SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
+ SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
+ bool IsA16 = false;
SDValue VData;
int NumVDataDwords;
+ bool AdjustRetType = false;
+
unsigned AddrIdx; // Index of first address argument
unsigned DMask;
+ unsigned DMaskLanes = 0;
if (BaseOpcode->Atomic) {
VData = Op.getOperand(2);
@@ -4587,7 +4809,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
AddrIdx = 3;
}
} else {
- unsigned DMaskIdx;
+ unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa<MemSDNode>(Op) ? 2 : 1;
+ auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
+ if (!DMaskConst)
+ return Op;
+ DMask = DMaskConst->getZExtValue();
+ DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
if (BaseOpcode->Store) {
VData = Op.getOperand(2);
@@ -4603,58 +4830,91 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
- DMaskIdx = 3;
} else {
- MVT LoadVT = Op.getSimpleValueType();
+ // Work out the num dwords based on the dmask popcount and underlying type
+ // and whether packing is supported.
+ MVT LoadVT = ResultTypes[0].getSimpleVT();
if (LoadVT.getScalarType() == MVT::f16) {
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
!BaseOpcode->HasD16)
return Op; // D16 is unsupported for this instruction
IsD16 = true;
- if (LoadVT.isVector() && Subtarget->hasUnpackedD16VMem())
- ResultTypes[0] = (LoadVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
}
- NumVDataDwords = (ResultTypes[0].getSizeInBits() + 31) / 32;
- DMaskIdx = isa<MemSDNode>(Op) ? 2 : 1;
- }
+ // Confirm that the return type is large enough for the dmask specified
+ if ((LoadVT.isVector() && LoadVT.getVectorNumElements() < DMaskLanes) ||
+ (!LoadVT.isVector() && DMaskLanes > 1))
+ return Op;
- auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
- if (!DMaskConst)
- return Op;
+ if (IsD16 && !Subtarget->hasUnpackedD16VMem())
+ NumVDataDwords = (DMaskLanes + 1) / 2;
+ else
+ NumVDataDwords = DMaskLanes;
- AddrIdx = DMaskIdx + 1;
- DMask = DMaskConst->getZExtValue();
- if (!DMask && !BaseOpcode->Store) {
- // Eliminate no-op loads. Stores with dmask == 0 are *not* no-op: they
- // store the channels' default values.
- SDValue Undef = DAG.getUNDEF(Op.getValueType());
- if (isa<MemSDNode>(Op))
- return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
- return Undef;
+ AdjustRetType = true;
}
+
+ AddrIdx = DMaskIdx + 1;
}
- unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
- (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
- (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
- (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
+ unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
+ unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
+ unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
+ NumCoords + NumLCM;
+ unsigned NumMIVAddrs = NumVAddrs;
+
SmallVector<SDValue, 4> VAddrs;
- for (unsigned i = 0; i < NumVAddrs; ++i)
- VAddrs.push_back(Op.getOperand(AddrIdx + i));
// Optimize _L to _LZ when _L is zero
if (LZMappingInfo) {
if (auto ConstantLod =
- dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+ dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
- VAddrs.pop_back(); // remove 'lod'
+ NumMIVAddrs--; // remove 'lod'
}
}
}
+ // Check for 16 bit addresses and pack if true.
+ unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+ MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
+ const MVT VAddrScalarVT = VAddrVT.getScalarType();
+ if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16)) &&
+ ST->hasFeature(AMDGPU::FeatureR128A16)) {
+ IsA16 = true;
+ const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
+ for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
+ SDValue AddrLo, AddrHi;
+ // Push back extra arguments.
+ if (i < DimIdx) {
+ AddrLo = Op.getOperand(i);
+ } else {
+ AddrLo = Op.getOperand(i);
+ // Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
+ // in 1D, derivatives dx/dh and dx/dv are packed with undef.
+ if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
+ ((NumGradients / 2) % 2 == 1 &&
+ (i == DimIdx + (NumGradients / 2) - 1 ||
+ i == DimIdx + NumGradients - 1))) {
+ AddrHi = DAG.getUNDEF(MVT::f16);
+ } else {
+ AddrHi = Op.getOperand(i + 1);
+ i++;
+ }
+ AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorVT,
+ {AddrLo, AddrHi});
+ AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
+ }
+ VAddrs.push_back(AddrLo);
+ }
+ } else {
+ for (unsigned i = 0; i < NumMIVAddrs; ++i)
+ VAddrs.push_back(Op.getOperand(AddrIdx + i));
+ }
+
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
@@ -4674,11 +4934,53 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
CtrlIdx = AddrIdx + NumVAddrs + 3;
}
+ SDValue TFE;
+ SDValue LWE;
SDValue TexFail = Op.getOperand(CtrlIdx);
- auto TexFailConst = dyn_cast<ConstantSDNode>(TexFail.getNode());
- if (!TexFailConst || TexFailConst->getZExtValue() != 0)
+ bool IsTexFail = false;
+ if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
return Op;
+ if (IsTexFail) {
+ if (!DMaskLanes) {
+ // Expecting to get an error flag since TFC is on - and dmask is 0
+ // Force dmask to be at least 1 otherwise the instruction will fail
+ DMask = 0x1;
+ DMaskLanes = 1;
+ NumVDataDwords = 1;
+ }
+ NumVDataDwords += 1;
+ AdjustRetType = true;
+ }
+
+ // Has something earlier tagged that the return type needs adjusting
+ // This happens if the instruction is a load or has set TexFailCtrl flags
+ if (AdjustRetType) {
+ // NumVDataDwords reflects the true number of dwords required in the return type
+ if (DMaskLanes == 0 && !BaseOpcode->Store) {
+ // This is a no-op load. This can be eliminated
+ SDValue Undef = DAG.getUNDEF(Op.getValueType());
+ if (isa<MemSDNode>(Op))
+ return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
+ return Undef;
+ }
+
+ // Have to use a power of 2 number of dwords
+ NumVDataDwords = 1 << Log2_32_Ceil(NumVDataDwords);
+
+ EVT NewVT = NumVDataDwords > 1 ?
+ EVT::getVectorVT(*DAG.getContext(), MVT::f32, NumVDataDwords)
+ : MVT::f32;
+
+ ResultTypes[0] = NewVT;
+ if (ResultTypes.size() == 3) {
+ // Original result was aggregate type used for TexFailCtrl results
+ // The actual instruction returns as a vector type which has now been
+ // created. Remove the aggregate result.
+ ResultTypes.erase(&ResultTypes[1]);
+ }
+ }
+
SDValue GLC;
SDValue SLC;
if (BaseOpcode->Atomic) {
@@ -4701,9 +5003,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Ops.push_back(Unorm);
Ops.push_back(GLC);
Ops.push_back(SLC);
- Ops.push_back(False); // r128
- Ops.push_back(False); // tfe
- Ops.push_back(False); // lwe
+ Ops.push_back(IsA16 && // a16 or r128
+ ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
+ Ops.push_back(TFE); // tfe
+ Ops.push_back(LWE); // lwe
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
Ops.push_back(IsD16 ? True : False);
@@ -4723,25 +5026,90 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
if (auto MemOp = dyn_cast<MemSDNode>(Op)) {
- MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
- *MemRefs = MemOp->getMemOperand();
- NewNode->setMemRefs(MemRefs, MemRefs + 1);
+ MachineMemOperand *MemRef = MemOp->getMemOperand();
+ DAG.setNodeMemRefs(NewNode, {MemRef});
}
if (BaseOpcode->AtomicX2) {
SmallVector<SDValue, 1> Elt;
DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
- } else if (IsD16 && !BaseOpcode->Store) {
- MVT LoadVT = Op.getSimpleValueType();
- SDValue Adjusted = adjustLoadValueTypeImpl(
- SDValue(NewNode, 0), LoadVT, DL, DAG, Subtarget->hasUnpackedD16VMem());
- return DAG.getMergeValues({Adjusted, SDValue(NewNode, 1)}, DL);
+ } else if (!BaseOpcode->Store) {
+ return constructRetValue(DAG, NewNode,
+ OrigResultTypes, IsTexFail,
+ Subtarget->hasUnpackedD16VMem(), IsD16,
+ DMaskLanes, NumVDataDwords, DL,
+ *DAG.getContext());
}
return SDValue(NewNode, 0);
}
+SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
+ SDValue Offset, SDValue GLC,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ VT.getStoreSize(), VT.getStoreSize());
+
+ if (!Offset->isDivergent()) {
+ SDValue Ops[] = {
+ Rsrc,
+ Offset, // Offset
+ GLC // glc
+ };
+ return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
+ DAG.getVTList(VT), Ops, VT, MMO);
+ }
+
+ // We have a divergent offset. Emit a MUBUF buffer load instead. We can
+ // assume that the buffer is unswizzled.
+ SmallVector<SDValue, 4> Loads;
+ unsigned NumLoads = 1;
+ MVT LoadVT = VT.getSimpleVT();
+ unsigned NumElts = LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
+ assert((LoadVT.getScalarType() == MVT::i32 ||
+ LoadVT.getScalarType() == MVT::f32) &&
+ isPowerOf2_32(NumElts));
+
+ if (NumElts == 8 || NumElts == 16) {
+ NumLoads = NumElts == 16 ? 4 : 2;
+ LoadVT = MVT::v4i32;
+ }
+
+ SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
+ unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
+ SDValue Ops[] = {
+ DAG.getEntryNode(), // Chain
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ {}, // voffset
+ {}, // soffset
+ {}, // offset
+ DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+
+ // Use the alignment to ensure that the required offsets will fit into the
+ // immediate offsets.
+ setBufferOffsets(Offset, DAG, &Ops[3], NumLoads > 1 ? 16 * NumLoads : 4);
+
+ uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
+ Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
+ Ops, LoadVT, MMO));
+ }
+
+ if (VT == MVT::v8i32 || VT == MVT::v16i32)
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Loads);
+
+ return Loads[0];
+}
+
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -4755,14 +5123,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
case Intrinsic::amdgcn_implicit_buffer_ptr: {
- if (getSubtarget()->isAmdCodeObjectV2(MF.getFunction()))
+ if (getSubtarget()->isAmdHsaOrMesa(MF.getFunction()))
return emitNonHSAIntrinsicError(DAG, DL, VT);
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
}
case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: {
- if (!Subtarget->isAmdCodeObjectV2(MF.getFunction())) {
+ if (!Subtarget->isAmdHsaOrMesa(MF.getFunction())) {
DiagnosticInfoUnsupported BadIntrin(
MF.getFunction(), "unsupported hsa intrinsic without hsa target",
DL.getDebugLoc());
@@ -4880,12 +5248,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::r600_read_tgid_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
- case Intrinsic::amdgcn_workitem_id_x: {
+ case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::r600_read_tidig_x:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDX);
- }
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
@@ -4896,19 +5263,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ);
- case AMDGPUIntrinsic::SI_load_const: {
- SDValue Ops[] = {
- Op.getOperand(1),
- Op.getOperand(2)
- };
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
- Op->getVTList(), Ops, VT, MMO);
+ case SIIntrinsic::SI_load_const: {
+ SDValue Load =
+ lowerSBuffer(MVT::i32, DL, Op.getOperand(1), Op.getOperand(2),
+ DAG.getTargetConstant(0, DL, MVT::i1), DAG);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Load);
+ }
+ case Intrinsic::amdgcn_s_buffer_load: {
+ unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2),
+ DAG.getTargetConstant(Cache & 1, DL, MVT::i1), DAG);
}
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
@@ -4991,34 +5355,15 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Denominator, Numerator);
}
case Intrinsic::amdgcn_icmp: {
- const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- if (!CD)
- return DAG.getUNDEF(VT);
-
- int CondCode = CD->getSExtValue();
- if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
- CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
- return DAG.getUNDEF(VT);
-
- ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
- ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
- Op.getOperand(2), DAG.getCondCode(CCOpcode));
+ // There is a Pat that handles this variant, so return it as-is.
+ if (Op.getOperand(1).getValueType() == MVT::i1 &&
+ Op.getConstantOperandVal(2) == 0 &&
+ Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
+ return Op;
+ return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_fcmp: {
- const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- if (!CD)
- return DAG.getUNDEF(VT);
-
- int CondCode = CD->getSExtValue();
- if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
- CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE)
- return DAG.getUNDEF(VT);
-
- FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
- ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
- Op.getOperand(2), DAG.getCondCode(CCOpcode));
+ return lowerFCMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_fmed3:
return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
@@ -5058,6 +5403,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else
Opcode = AMDGPUISD::CVT_PK_U16_U32;
+ if (isTypeLegal(VT))
+ return DAG.getNode(Opcode, DL, VT, Op.getOperand(1), Op.getOperand(2));
+
SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
Op.getOperand(1), Op.getOperand(2));
return DAG.getNode(ISD::BITCAST, DL, VT, Node);
@@ -5127,36 +5475,104 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format: {
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
Op.getOperand(3), // vindex
- Op.getOperand(4), // offset
- Op.getOperand(5), // glc
- Op.getOperand(6) // slc
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
};
+ setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
+ EVT VT = Op.getValueType();
+ EVT IntVT = VT.changeTypeToInteger();
+ auto *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(4), // soffset
+ Offsets.second, // offset
+ Op.getOperand(5), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+
+ unsigned Opc = (IntrID == Intrinsic::amdgcn_raw_buffer_load) ?
+ AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
auto *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
- bool IsD16 = LoadVT.getScalarType() == MVT::f16;
- if (IsD16)
- return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG);
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load_format: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
+ };
+
+ unsigned Opc = (IntrID == Intrinsic::amdgcn_struct_buffer_load) ?
+ AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
+ EVT VT = Op.getValueType();
+ EVT IntVT = VT.changeTypeToInteger();
+ auto *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
M->getMemOperand());
}
case Intrinsic::amdgcn_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
- bool IsD16 = LoadVT.getScalarType() == MVT::f16;
- if (IsD16) {
- return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, M, DAG);
- }
+ unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
@@ -5164,12 +5580,62 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // voffset
Op.getOperand(5), // soffset
Op.getOperand(6), // offset
- Op.getOperand(7), // dfmt
- Op.getOperand(8), // nfmt
- Op.getOperand(9), // glc
- Op.getOperand(10) // slc
+ DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ };
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_raw_tbuffer_load: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+ auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(4), // soffset
+ Offsets.second, // offset
+ Op.getOperand(5), // format
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_tbuffer_load: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // format
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
};
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
Op->getVTList(), Ops, LoadVT,
M->getMemOperand());
@@ -5184,14 +5650,22 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_buffer_atomic_and:
case Intrinsic::amdgcn_buffer_atomic_or:
case Intrinsic::amdgcn_buffer_atomic_xor: {
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
- Op.getOperand(5), // offset
- Op.getOperand(6) // slc
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
};
+ setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
@@ -5235,16 +5709,193 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
M->getMemOperand());
}
+ case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ case Intrinsic::amdgcn_raw_buffer_atomic_xor: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+ EVT VT = Op.getValueType();
+
+ auto *M = cast<MemSDNode>(Op);
+ unsigned Opcode = 0;
+
+ switch (IntrID) {
+ case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
+ break;
+ default:
+ llvm_unreachable("unhandled atomic opcode");
+ }
+
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_buffer_atomic_xor: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
+ };
+ EVT VT = Op.getValueType();
+
+ auto *M = cast<MemSDNode>(Op);
+ unsigned Opcode = 0;
+
+ switch (IntrID) {
+ case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
+ break;
+ default:
+ llvm_unreachable("unhandled atomic opcode");
+ }
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+ M->getMemOperand());
+ }
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(5)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // src
Op.getOperand(3), // cmp
Op.getOperand(4), // rsrc
Op.getOperand(5), // vindex
- Op.getOperand(6), // offset
- Op.getOperand(7) // slc
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ };
+ setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
+ EVT VT = Op.getValueType();
+ auto *M = cast<MemSDNode>(Op);
+
+ return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
+ Op->getVTList(), Ops, VT, M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // src
+ Op.getOperand(3), // cmp
+ Op.getOperand(4), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+ EVT VT = Op.getValueType();
+ auto *M = cast<MemSDNode>(Op);
+
+ return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
+ Op->getVTList(), Ops, VT, M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(6), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // src
+ Op.getOperand(3), // cmp
+ Op.getOperand(4), // rsrc
+ Op.getOperand(5), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(7), // soffset
+ Offsets.second, // offset
+ Op.getOperand(8), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
@@ -5360,19 +6011,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
Op.getOperand(2), Op.getOperand(3));
}
- case AMDGPUIntrinsic::AMDGPU_kill: {
- SDValue Src = Op.getOperand(2);
- if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) {
- if (!K->isNegative())
- return Chain;
-
- SDValue NegOne = DAG.getTargetConstant(FloatToBits(-1.0f), DL, MVT::i32);
- return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, NegOne);
- }
-
- SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
- return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
- }
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -5383,69 +6021,79 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
return SDValue();
};
- case AMDGPUIntrinsic::SI_tbuffer_store: {
-
- // Extract vindex and voffset from vaddr as appropriate
- const ConstantSDNode *OffEn = cast<ConstantSDNode>(Op.getOperand(10));
- const ConstantSDNode *IdxEn = cast<ConstantSDNode>(Op.getOperand(11));
- SDValue VAddr = Op.getOperand(5);
-
- SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
-
- assert(!(OffEn->isOne() && IdxEn->isOne()) &&
- "Legacy intrinsic doesn't support both offset and index - use new version");
-
- SDValue VIndex = IdxEn->isOne() ? VAddr : Zero;
- SDValue VOffset = OffEn->isOne() ? VAddr : Zero;
-
- // Deal with the vec-3 case
- const ConstantSDNode *NumChannels = cast<ConstantSDNode>(Op.getOperand(4));
- auto Opcode = NumChannels->getZExtValue() == 3 ?
- AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT;
-
+ case Intrinsic::amdgcn_tbuffer_store: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+ unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
- Chain,
- Op.getOperand(3), // vdata
- Op.getOperand(2), // rsrc
- VIndex,
- VOffset,
- Op.getOperand(6), // soffset
- Op.getOperand(7), // inst_offset
- Op.getOperand(8), // dfmt
- Op.getOperand(9), // nfmt
- Op.getOperand(12), // glc
- Op.getOperand(13), // slc
+ Chain,
+ VData, // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Op.getOperand(5), // voffset
+ Op.getOperand(6), // soffset
+ Op.getOperand(7), // offset
+ DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
};
-
- assert((cast<ConstantSDNode>(Op.getOperand(14)))->getZExtValue() == 0 &&
- "Value of tfe other than zero is unsupported");
-
- EVT VT = Op.getOperand(3).getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(Opcode, DL,
- Op->getVTList(), Ops, VT, MMO);
+ unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+ AMDGPUISD::TBUFFER_STORE_FORMAT;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
}
- case Intrinsic::amdgcn_tbuffer_store: {
+ case Intrinsic::amdgcn_struct_tbuffer_store: {
SDValue VData = Op.getOperand(2);
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
Chain,
VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
- Op.getOperand(5), // voffset
+ Offsets.first, // voffset
Op.getOperand(6), // soffset
- Op.getOperand(7), // offset
- Op.getOperand(8), // dfmt
- Op.getOperand(9), // nfmt
- Op.getOperand(10), // glc
- Op.getOperand(11) // slc
+ Offsets.second, // offset
+ Op.getOperand(7), // format
+ Op.getOperand(8), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idexen
+ };
+ unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+ AMDGPUISD::TBUFFER_STORE_FORMAT;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
+ }
+
+ case Intrinsic::amdgcn_raw_tbuffer_store: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Chain,
+ VData, // vdata
+ Op.getOperand(3), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // format
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -5460,15 +6108,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Chain,
- VData, // vdata
+ VData,
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
- Op.getOperand(5), // offset
- Op.getOperand(6), // glc
- Op.getOperand(7) // slc
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
};
+ setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
@@ -5476,6 +6132,59 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
+
+ case Intrinsic::amdgcn_raw_buffer_store:
+ case Intrinsic::amdgcn_raw_buffer_store_format: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Chain,
+ VData,
+ Op.getOperand(3), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+ unsigned Opc = IntrinsicID == Intrinsic::amdgcn_raw_buffer_store ?
+ AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+ Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
+ }
+
+ case Intrinsic::amdgcn_struct_buffer_store:
+ case Intrinsic::amdgcn_struct_buffer_store_format: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ SDValue Ops[] = {
+ Chain,
+ VData,
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
+ };
+ unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
+ AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+ Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
+ }
+
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -5486,6 +6195,94 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
}
+// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
+// offset (the offset that is included in bounds checking and swizzling, to be
+// split between the instruction's voffset and immoffset fields) and soffset
+// (the offset that is excluded from bounds checking and swizzling, to go in
+// the instruction's soffset field). This function takes the first kind of
+// offset and figures out how to split it between voffset and immoffset.
+std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
+ SDValue Offset, SelectionDAG &DAG) const {
+ SDLoc DL(Offset);
+ const unsigned MaxImm = 4095;
+ SDValue N0 = Offset;
+ ConstantSDNode *C1 = nullptr;
+
+ if ((C1 = dyn_cast<ConstantSDNode>(N0)))
+ N0 = SDValue();
+ else if (DAG.isBaseWithConstantOffset(N0)) {
+ C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ N0 = N0.getOperand(0);
+ }
+
+ if (C1) {
+ unsigned ImmOffset = C1->getZExtValue();
+ // If the immediate value is too big for the immoffset field, put the value
+ // and -4096 into the immoffset field so that the value that is copied/added
+ // for the voffset field is a multiple of 4096, and it stands more chance
+ // of being CSEd with the copy/add for another similar load/store.
+ // However, do not do that rounding down to a multiple of 4096 if that is a
+ // negative number, as it appears to be illegal to have a negative offset
+ // in the vgpr, even if adding the immediate offset makes it positive.
+ unsigned Overflow = ImmOffset & ~MaxImm;
+ ImmOffset -= Overflow;
+ if ((int32_t)Overflow < 0) {
+ Overflow += ImmOffset;
+ ImmOffset = 0;
+ }
+ C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
+ if (Overflow) {
+ auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
+ if (!N0)
+ N0 = OverflowVal;
+ else {
+ SDValue Ops[] = { N0, OverflowVal };
+ N0 = DAG.getNode(ISD::ADD, DL, MVT::i32, Ops);
+ }
+ }
+ }
+ if (!N0)
+ N0 = DAG.getConstant(0, DL, MVT::i32);
+ if (!C1)
+ C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
+ return {N0, SDValue(C1, 0)};
+}
+
+// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
+// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
+// pointed to by Offsets.
+void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
+ SelectionDAG &DAG, SDValue *Offsets,
+ unsigned Align) const {
+ SDLoc DL(CombinedOffset);
+ if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
+ uint32_t Imm = C->getZExtValue();
+ uint32_t SOffset, ImmOffset;
+ if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
+ Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
+ Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
+ Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+ return;
+ }
+ }
+ if (DAG.isBaseWithConstantOffset(CombinedOffset)) {
+ SDValue N0 = CombinedOffset.getOperand(0);
+ SDValue N1 = CombinedOffset.getOperand(1);
+ uint32_t SOffset, ImmOffset;
+ int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
+ if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
+ Subtarget, Align)) {
+ Offsets[0] = N0;
+ Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
+ Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+ return;
+ }
+ }
+ Offsets[0] = CombinedOffset;
+ Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
+ Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
+}
+
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
ISD::LoadExtType ExtType, SDValue Op,
const SDLoc &SL, EVT VT) {
@@ -5513,8 +6310,8 @@ SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const
// FIXME: Constant loads should all be marked invariant.
unsigned AS = Ld->getAddressSpace();
- if (AS != AMDGPUASI.CONSTANT_ADDRESS &&
- AS != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
+ if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
+ AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
(AS != AMDGPUAS::GLOBAL_ADDRESS || !Ld->isInvariant()))
return SDValue();
@@ -5625,15 +6422,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUASI.FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
- AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
+ AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
- if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
- if (!Op->isDivergent() && Alignment >= 4)
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32)
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
@@ -5641,28 +6438,28 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
//
}
- if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
- AS == AMDGPUASI.GLOBAL_ADDRESS) {
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
- Alignment >= 4)
+ Alignment >= 4 && NumElements < 32)
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
//
}
- if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
- AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.FLAT_ADDRESS) {
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v4 loads are supported for private and global memory.
return SDValue();
}
- if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
// Depending on the setting of the private_element_size field in the
// resource descriptor, we can only make private accesses up to a certain
// size.
@@ -5681,7 +6478,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
- } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
// Use ds_read_b128 if possible.
if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
MemVT.getStoreSize() == 16)
@@ -5689,6 +6486,17 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (NumElements > 2)
return SplitVectorLoad(Op, DAG);
+
+ // SI has a hardware bug in the LDS / GDS boounds checking: if the base
+ // address is negative, then the instruction is incorrectly treated as
+ // out-of-bounds even if base + offsets is in bounds. Split vectorized
+ // loads here to avoid emitting ds_read2_b32. We may re-combine the
+ // load later in the SILoadStoreOptimizer.
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ NumElements == 2 && MemVT.getStoreSize() == 8 &&
+ Load->getAlignment() < 8) {
+ return SplitVectorLoad(Op, DAG);
+ }
}
return SDValue();
}
@@ -6058,17 +6866,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUASI.FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
- AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
+ AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = VT.getVectorNumElements();
- if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.FLAT_ADDRESS) {
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorStore(Op, DAG);
return SDValue();
- } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
switch (Subtarget->getMaxPrivateElementSize()) {
case 4:
return scalarizeVectorStore(Store, DAG);
@@ -6083,7 +6891,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
- } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
// Use ds_write_b128 if possible.
if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
VT.getStoreSize() == 16)
@@ -6091,6 +6899,18 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (NumElements > 2)
return SplitVectorStore(Op, DAG);
+
+ // SI has a hardware bug in the LDS / GDS boounds checking: if the base
+ // address is negative, then the instruction is incorrectly treated as
+ // out-of-bounds even if base + offsets is in bounds. Split vectorized
+ // stores here to avoid emitting ds_write2_b32. We may re-combine the
+ // store later in the SILoadStoreOptimizer.
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ NumElements == 2 && VT.getStoreSize() == 8 &&
+ Store->getAlignment() < 8) {
+ return SplitVectorStore(Op, DAG);
+ }
+
return SDValue();
} else {
llvm_unreachable("unhandled address space");
@@ -6101,17 +6921,24 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
+ SDValue TrigVal;
+
// TODO: Should this propagate fast-math-flags?
- SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
- DAG.getNode(ISD::FMUL, DL, VT, Arg,
- DAG.getConstantFP(0.5/M_PI, DL,
- VT)));
+
+ SDValue OneOver2Pi = DAG.getConstantFP(0.5 / M_PI, DL, VT);
+
+ if (Subtarget->hasTrigReducedRange()) {
+ SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
+ TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal);
+ } else {
+ TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
+ }
switch (Op.getOpcode()) {
case ISD::FCOS:
- return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
+ return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal);
case ISD::FSIN:
- return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
+ return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal);
default:
llvm_unreachable("Wrong trig opcode");
}
@@ -6123,7 +6950,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
unsigned AS = AtomicNode->getAddressSpace();
// No custom lowering required for local address space
- if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
+ if (!isFlatGlobalAddrSpace(AS))
return Op;
// Non-local address space requires custom lowering for atomic compare
@@ -6475,6 +7302,29 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
}
}
+ if (RHS.getOpcode() == ISD::SETCC && LHS.getOpcode() == AMDGPUISD::FP_CLASS)
+ std::swap(LHS, RHS);
+
+ if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == AMDGPUISD::FP_CLASS &&
+ RHS.hasOneUse()) {
+ ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ // and (fcmp seto), (fp_class x, mask) -> fp_class x, mask & ~(p_nan | n_nan)
+ // and (fcmp setuo), (fp_class x, mask) -> fp_class x, mask & (p_nan | n_nan)
+ const ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+ if ((LCC == ISD::SETO || LCC == ISD::SETUO) && Mask &&
+ (RHS.getOperand(0) == LHS.getOperand(0) &&
+ LHS.getOperand(0) == LHS.getOperand(1))) {
+ const unsigned OrdMask = SIInstrFlags::S_NAN | SIInstrFlags::Q_NAN;
+ unsigned NewMask = LCC == ISD::SETO ?
+ Mask->getZExtValue() & ~OrdMask :
+ Mask->getZExtValue() & OrdMask;
+
+ SDLoc DL(N);
+ return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1, RHS.getOperand(0),
+ DAG.getConstant(NewMask, DL, MVT::i32));
+ }
+ }
+
if (VT == MVT::i32 &&
(RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
// and x, (sext cc from i1) => select cc, x, 0
@@ -6798,158 +7648,294 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
return AMDGPUTargetLowering::performRcpCombine(N, DCI);
}
-static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
- if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
+bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
+ unsigned MaxDepth) const {
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::FCANONICALIZE)
return true;
- return DAG.isKnownNeverNaN(Op);
-}
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ auto F = CFP->getValueAPF();
+ if (F.isNaN() && F.isSignaling())
+ return false;
+ return !F.isDenormal() || denormalsEnabledForType(Op.getValueType());
+ }
-static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
- const GCNSubtarget *ST, unsigned MaxDepth=5) {
// If source is a result of another standard FP operation it is already in
// canonical form.
+ if (MaxDepth == 0)
+ return false;
- switch (Op.getOpcode()) {
- default:
- break;
-
+ switch (Opcode) {
// These will flush denorms if required.
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
- case ISD::FSQRT:
case ISD::FCEIL:
case ISD::FFLOOR:
case ISD::FMA:
case ISD::FMAD:
-
- case ISD::FCANONICALIZE:
- return true;
-
+ case ISD::FSQRT:
+ case ISD::FDIV:
+ case ISD::FREM:
case ISD::FP_ROUND:
- return Op.getValueType().getScalarType() != MVT::f16 ||
- ST->hasFP16Denormals();
-
case ISD::FP_EXTEND:
- return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
- ST->hasFP16Denormals();
+ case AMDGPUISD::FMUL_LEGACY:
+ case AMDGPUISD::FMAD_FTZ:
+ case AMDGPUISD::RCP:
+ case AMDGPUISD::RSQ:
+ case AMDGPUISD::RSQ_CLAMP:
+ case AMDGPUISD::RCP_LEGACY:
+ case AMDGPUISD::RSQ_LEGACY:
+ case AMDGPUISD::RCP_IFLAG:
+ case AMDGPUISD::TRIG_PREOP:
+ case AMDGPUISD::DIV_SCALE:
+ case AMDGPUISD::DIV_FMAS:
+ case AMDGPUISD::DIV_FIXUP:
+ case AMDGPUISD::FRACT:
+ case AMDGPUISD::LDEXP:
+ case AMDGPUISD::CVT_PKRTZ_F16_F32:
+ case AMDGPUISD::CVT_F32_UBYTE0:
+ case AMDGPUISD::CVT_F32_UBYTE1:
+ case AMDGPUISD::CVT_F32_UBYTE2:
+ case AMDGPUISD::CVT_F32_UBYTE3:
+ return true;
// It can/will be lowered or combined as a bit operation.
// Need to check their input recursively to handle.
case ISD::FNEG:
case ISD::FABS:
- return (MaxDepth > 0) &&
- isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1);
+ case ISD::FCOPYSIGN:
+ return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
case ISD::FSIN:
case ISD::FCOS:
case ISD::FSINCOS:
return Op.getValueType().getScalarType() != MVT::f16;
- // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms.
- // For such targets need to check their input recursively.
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
-
- if (ST->supportsMinMaxDenormModes() &&
- DAG.isKnownNeverNaN(Op.getOperand(0)) &&
- DAG.isKnownNeverNaN(Op.getOperand(1)))
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case AMDGPUISD::CLAMP:
+ case AMDGPUISD::FMED3:
+ case AMDGPUISD::FMAX3:
+ case AMDGPUISD::FMIN3: {
+ // FIXME: Shouldn't treat the generic operations different based these.
+ // However, we aren't really required to flush the result from
+ // minnum/maxnum..
+
+ // snans will be quieted, so we only need to worry about denormals.
+ if (Subtarget->supportsMinMaxDenormModes() ||
+ denormalsEnabledForType(Op.getValueType()))
return true;
- return (MaxDepth > 0) &&
- isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1) &&
- isCanonicalized(DAG, Op.getOperand(1), ST, MaxDepth - 1);
+ // Flushing may be required.
+ // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. For such
+ // targets need to check their input recursively.
+
+ // FIXME: Does this apply with clamp? It's implemented with max.
+ for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
+ if (!isCanonicalized(DAG, Op.getOperand(I), MaxDepth - 1))
+ return false;
+ }
+
+ return true;
+ }
+ case ISD::SELECT: {
+ return isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1) &&
+ isCanonicalized(DAG, Op.getOperand(2), MaxDepth - 1);
+ }
+ case ISD::BUILD_VECTOR: {
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ SDValue SrcOp = Op.getOperand(i);
+ if (!isCanonicalized(DAG, SrcOp, MaxDepth - 1))
+ return false;
+ }
- case ISD::ConstantFP: {
- auto F = cast<ConstantFPSDNode>(Op)->getValueAPF();
- return !F.isDenormal() && !(F.isNaN() && F.isSignaling());
+ return true;
}
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_SUBVECTOR: {
+ return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
}
- return false;
+ case ISD::INSERT_VECTOR_ELT: {
+ return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
+ isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
+ }
+ case ISD::UNDEF:
+ // Could be anything.
+ return false;
+
+ case ISD::BITCAST: {
+ // Hack round the mess we make when legalizing extract_vector_elt
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::i16 &&
+ Src.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncSrc = Src.getOperand(0);
+ if (TruncSrc.getValueType() == MVT::i32 &&
+ TruncSrc.getOpcode() == ISD::BITCAST &&
+ TruncSrc.getOperand(0).getValueType() == MVT::v2f16) {
+ return isCanonicalized(DAG, TruncSrc.getOperand(0), MaxDepth - 1);
+ }
+ }
+
+ return false;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID
+ = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ // TODO: Handle more intrinsics
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_cvt_pkrtz:
+ case Intrinsic::amdgcn_cubeid:
+ case Intrinsic::amdgcn_frexp_mant:
+ case Intrinsic::amdgcn_fdot2:
+ return true;
+ default:
+ break;
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+ default:
+ return denormalsEnabledForType(Op.getValueType()) &&
+ DAG.isKnownNeverSNaN(Op);
+ }
+
+ llvm_unreachable("invalid operation");
}
// Constant fold canonicalize.
+SDValue SITargetLowering::getCanonicalConstantFP(
+ SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
+ // Flush denormals to 0 if not enabled.
+ if (C.isDenormal() && !denormalsEnabledForType(VT))
+ return DAG.getConstantFP(0.0, SL, VT);
+
+ if (C.isNaN()) {
+ APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
+ if (C.isSignaling()) {
+ // Quiet a signaling NaN.
+ // FIXME: Is this supposed to preserve payload bits?
+ return DAG.getConstantFP(CanonicalQNaN, SL, VT);
+ }
+
+ // Make sure it is the canonical NaN bitpattern.
+ //
+ // TODO: Can we use -1 as the canonical NaN value since it's an inline
+ // immediate?
+ if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
+ return DAG.getConstantFP(CanonicalQNaN, SL, VT);
+ }
+
+ // Already canonical.
+ return DAG.getConstantFP(C, SL, VT);
+}
+
+static bool vectorEltWillFoldAway(SDValue Op) {
+ return Op.isUndef() || isa<ConstantFPSDNode>(Op);
+}
+
SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
// fcanonicalize undef -> qnan
if (N0.isUndef()) {
- EVT VT = N->getValueType(0);
APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
return DAG.getConstantFP(QNaN, SDLoc(N), VT);
}
- ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
- if (!CFP) {
- SDValue N0 = N->getOperand(0);
- EVT VT = N0.getValueType().getScalarType();
- auto ST = getSubtarget();
-
- if (((VT == MVT::f32 && ST->hasFP32Denormals()) ||
- (VT == MVT::f64 && ST->hasFP64Denormals()) ||
- (VT == MVT::f16 && ST->hasFP16Denormals())) &&
- DAG.isKnownNeverNaN(N0))
- return N0;
+ if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0)) {
+ EVT VT = N->getValueType(0);
+ return getCanonicalConstantFP(DAG, SDLoc(N), VT, CFP->getValueAPF());
+ }
- bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+ // fcanonicalize (build_vector x, k) -> build_vector (fcanonicalize x),
+ // (fcanonicalize k)
+ //
+ // fcanonicalize (build_vector x, undef) -> build_vector (fcanonicalize x), 0
- if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) &&
- isCanonicalized(DAG, N0, ST))
- return N0;
+ // TODO: This could be better with wider vectors that will be split to v2f16,
+ // and to consider uses since there aren't that many packed operations.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16 &&
+ isTypeLegal(MVT::v2f16)) {
+ SDLoc SL(N);
+ SDValue NewElts[2];
+ SDValue Lo = N0.getOperand(0);
+ SDValue Hi = N0.getOperand(1);
+ EVT EltVT = Lo.getValueType();
+
+ if (vectorEltWillFoldAway(Lo) || vectorEltWillFoldAway(Hi)) {
+ for (unsigned I = 0; I != 2; ++I) {
+ SDValue Op = N0.getOperand(I);
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ NewElts[I] = getCanonicalConstantFP(DAG, SL, EltVT,
+ CFP->getValueAPF());
+ } else if (Op.isUndef()) {
+ // Handled below based on what the other operand is.
+ NewElts[I] = Op;
+ } else {
+ NewElts[I] = DAG.getNode(ISD::FCANONICALIZE, SL, EltVT, Op);
+ }
+ }
- return SDValue();
- }
+ // If one half is undef, and one is constant, perfer a splat vector rather
+ // than the normal qNaN. If it's a register, prefer 0.0 since that's
+ // cheaper to use and may be free with a packed operation.
+ if (NewElts[0].isUndef()) {
+ if (isa<ConstantFPSDNode>(NewElts[1]))
+ NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
+ NewElts[1]: DAG.getConstantFP(0.0f, SL, EltVT);
+ }
- const APFloat &C = CFP->getValueAPF();
+ if (NewElts[1].isUndef()) {
+ NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
+ NewElts[0] : DAG.getConstantFP(0.0f, SL, EltVT);
+ }
- // Flush denormals to 0 if not enabled.
- if (C.isDenormal()) {
- EVT VT = N->getValueType(0);
- EVT SVT = VT.getScalarType();
- if (SVT == MVT::f32 && !Subtarget->hasFP32Denormals())
- return DAG.getConstantFP(0.0, SDLoc(N), VT);
+ return DAG.getBuildVector(VT, SL, NewElts);
+ }
+ }
- if (SVT == MVT::f64 && !Subtarget->hasFP64Denormals())
- return DAG.getConstantFP(0.0, SDLoc(N), VT);
+ unsigned SrcOpc = N0.getOpcode();
- if (SVT == MVT::f16 && !Subtarget->hasFP16Denormals())
- return DAG.getConstantFP(0.0, SDLoc(N), VT);
- }
+ // If it's free to do so, push canonicalizes further up the source, which may
+ // find a canonical source.
+ //
+ // TODO: More opcodes. Note this is unsafe for the the _ieee minnum/maxnum for
+ // sNaNs.
+ if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) {
+ auto *CRHS = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CRHS && N0.hasOneUse()) {
+ SDLoc SL(N);
+ SDValue Canon0 = DAG.getNode(ISD::FCANONICALIZE, SL, VT,
+ N0.getOperand(0));
+ SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF());
+ DCI.AddToWorklist(Canon0.getNode());
- if (C.isNaN()) {
- EVT VT = N->getValueType(0);
- APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
- if (C.isSignaling()) {
- // Quiet a signaling NaN.
- return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
+ return DAG.getNode(N0.getOpcode(), SL, VT, Canon0, Canon1);
}
-
- // Make sure it is the canonical NaN bitpattern.
- //
- // TODO: Can we use -1 as the canonical NaN value since it's an inline
- // immediate?
- if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
- return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
}
- return N0;
+ return isCanonicalized(DAG, N0) ? N0 : SDValue();
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
switch (Opc) {
case ISD::FMAXNUM:
+ case ISD::FMAXNUM_IEEE:
return AMDGPUISD::FMAX3;
case ISD::SMAX:
return AMDGPUISD::SMAX3;
case ISD::UMAX:
return AMDGPUISD::UMAX3;
case ISD::FMINNUM:
+ case ISD::FMINNUM_IEEE:
return AMDGPUISD::FMIN3;
case ISD::SMIN:
return AMDGPUISD::SMIN3;
@@ -7044,11 +8030,18 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
// then give the other result, which is different from med3 with a NaN
// input.
SDValue Var = Op0.getOperand(0);
- if (!isKnownNeverSNan(DAG, Var))
+ if (!DAG.isKnownNeverSNaN(Var))
return SDValue();
- return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
- Var, SDValue(K0, 0), SDValue(K1, 0));
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+ if ((!K0->hasOneUse() ||
+ TII->isInlineConstant(K0->getValueAPF().bitcastToAPInt())) &&
+ (!K1->hasOneUse() ||
+ TII->isInlineConstant(K1->getValueAPF().bitcastToAPInt()))) {
+ return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
+ Var, SDValue(K0, 0), SDValue(K1, 0));
+ }
}
return SDValue();
@@ -7109,6 +8102,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
// fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1)
if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
+ (Opc == ISD::FMINNUM_IEEE && Op0.getOpcode() == ISD::FMAXNUM_IEEE) ||
(Opc == AMDGPUISD::FMIN_LEGACY &&
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
(VT == MVT::f32 || VT == MVT::f64 ||
@@ -7216,9 +8210,11 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
switch(Opc) {
default:
- return SDValue();
+ break;
// TODO: Support other binary operations.
case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
case ISD::ADD:
case ISD::UMIN:
case ISD::UMAX:
@@ -7226,25 +8222,54 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
case ISD::SMAX:
case ISD::FMAXNUM:
case ISD::FMINNUM:
- return DAG.getNode(Opc, SL, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Vec.getOperand(0), Idx),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Vec.getOperand(1), Idx));
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE: {
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec.getOperand(0), Idx);
+ SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec.getOperand(1), Idx);
+
+ DCI.AddToWorklist(Elt0.getNode());
+ DCI.AddToWorklist(Elt1.getNode());
+ return DAG.getNode(Opc, SL, EltVT, Elt0, Elt1, Vec->getFlags());
+ }
}
}
- if (!DCI.isBeforeLegalize())
- return SDValue();
-
unsigned VecSize = VecVT.getSizeInBits();
unsigned EltSize = EltVT.getSizeInBits();
+ // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
+ // This elminates non-constant index and subsequent movrel or scratch access.
+ // Sub-dword vectors of size 2 dword or less have better implementation.
+ // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32
+ // instructions.
+ if (VecSize <= 256 && (VecSize > 64 || EltSize >= 32) &&
+ !isa<ConstantSDNode>(N->getOperand(1))) {
+ SDLoc SL(N);
+ SDValue Idx = N->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ SDValue V;
+ for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
+ SDValue IC = DAG.getConstant(I, SL, IdxVT);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
+ if (I == 0)
+ V = Elt;
+ else
+ V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
+ }
+ return V;
+ }
+
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
// Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
// elements. This exposes more load reduction opportunities by replacing
// multiple small extract_vector_elements with a single 32-bit extract.
auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (EltSize <= 16 &&
+ if (isa<MemSDNode>(Vec) &&
+ EltSize <= 16 &&
EltVT.isByteSized() &&
VecSize > 32 &&
VecSize % 32 == 0 &&
@@ -7274,46 +8299,40 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
return SDValue();
}
-static bool convertBuildVectorCastElt(SelectionDAG &DAG,
- SDValue &Lo, SDValue &Hi) {
- if (Hi.getOpcode() == ISD::BITCAST &&
- Hi.getOperand(0).getValueType() == MVT::f16 &&
- (isa<ConstantSDNode>(Lo) || Lo.isUndef())) {
- Lo = DAG.getNode(ISD::BITCAST, SDLoc(Lo), MVT::f16, Lo);
- Hi = Hi.getOperand(0);
- return true;
- }
-
- return false;
-}
-
-SDValue SITargetLowering::performBuildVectorCombine(
- SDNode *N, DAGCombinerInfo &DCI) const {
- SDLoc SL(N);
+SDValue
+SITargetLowering::performInsertVectorEltCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(2);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned VecSize = VecVT.getSizeInBits();
+ unsigned EltSize = EltVT.getSizeInBits();
- if (!isTypeLegal(MVT::v2i16))
+ // INSERT_VECTOR_ELT (<n x e>, var-idx)
+ // => BUILD_VECTOR n x select (e, const-idx)
+ // This elminates non-constant index and subsequent movrel or scratch access.
+ // Sub-dword vectors of size 2 dword or less have better implementation.
+ // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32
+ // instructions.
+ if (isa<ConstantSDNode>(Idx) ||
+ VecSize > 256 || (VecSize <= 64 && EltSize < 32))
return SDValue();
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
-
- if (VT == MVT::v2i16) {
- SDValue Lo = N->getOperand(0);
- SDValue Hi = N->getOperand(1);
- // v2i16 build_vector (const|undef), (bitcast f16:$x)
- // -> bitcast (v2f16 build_vector const|undef, $x
- if (convertBuildVectorCastElt(DAG, Lo, Hi)) {
- SDValue NewVec = DAG.getBuildVector(MVT::v2f16, SL, { Lo, Hi });
- return DAG.getNode(ISD::BITCAST, SL, VT, NewVec);
- }
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ SDValue Ins = N->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
- if (convertBuildVectorCastElt(DAG, Hi, Lo)) {
- SDValue NewVec = DAG.getBuildVector(MVT::v2f16, SL, { Hi, Lo });
- return DAG.getNode(ISD::BITCAST, SL, VT, NewVec);
- }
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
+ SDValue IC = DAG.getConstant(I, SL, IdxVT);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
+ SDValue V = DAG.getSelectCC(SL, Idx, IC, Ins, Elt, ISD::SETEQ);
+ Ops.push_back(V);
}
- return SDValue();
+ return DAG.getBuildVector(VecVT, SL, Ops);
}
unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
@@ -7568,7 +8587,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
EVT VT = N->getValueType(0);
SDLoc SL(N);
- if (!Subtarget->hasDLInsts() || VT != MVT::f32)
+ if (!Subtarget->hasDotInsts() || VT != MVT::f32)
return SDValue();
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
@@ -7705,16 +8724,26 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
VT != MVT::f16))
return SDValue();
- // Match isinf pattern
+ // Match isinf/isfinite pattern
// (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
- if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
+ // (fcmp one (fabs x), inf) -> (fp_class x,
+ // (p_normal | n_normal | p_subnormal | n_subnormal | p_zero | n_zero)
+ if ((CC == ISD::SETOEQ || CC == ISD::SETONE) && LHS.getOpcode() == ISD::FABS) {
const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
if (!CRHS)
return SDValue();
const APFloat &APF = CRHS->getValueAPF();
if (APF.isInfinity() && !APF.isNegative()) {
- unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
+ const unsigned IsInfMask = SIInstrFlags::P_INFINITY |
+ SIInstrFlags::N_INFINITY;
+ const unsigned IsFiniteMask = SIInstrFlags::N_ZERO |
+ SIInstrFlags::P_ZERO |
+ SIInstrFlags::N_NORMAL |
+ SIInstrFlags::P_NORMAL |
+ SIInstrFlags::N_SUBNORMAL |
+ SIInstrFlags::P_SUBNORMAL;
+ unsigned Mask = CC == ISD::SETOEQ ? IsInfMask : IsFiniteMask;
return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
DAG.getConstant(Mask, SL, MVT::i32));
}
@@ -7759,8 +8788,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) ||
- TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
+ if (TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);
}
@@ -7792,6 +8820,9 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return SDValue();
+
switch (N->getOpcode()) {
default:
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -7810,17 +8841,15 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performSetCCCombine(N, DCI);
case ISD::FMAXNUM:
case ISD::FMINNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE:
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN:
case AMDGPUISD::FMIN_LEGACY:
- case AMDGPUISD::FMAX_LEGACY: {
- if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
- getTargetMachine().getOptLevel() > CodeGenOpt::None)
- return performMinMaxCombine(N, DCI);
- break;
- }
+ case AMDGPUISD::FMAX_LEGACY:
+ return performMinMaxCombine(N, DCI);
case ISD::FMA:
return performFMACombine(N, DCI);
case ISD::LOAD: {
@@ -7912,8 +8941,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DCI);
- case ISD::BUILD_VECTOR:
- return performBuildVectorCombine(N, DCI);
+ case ISD::INSERT_VECTOR_ELT:
+ return performInsertVectorEltCombine(N, DCI);
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
@@ -7926,6 +8955,7 @@ static unsigned SubIdx2Lane(unsigned Idx) {
case AMDGPU::sub1: return 1;
case AMDGPU::sub2: return 2;
case AMDGPU::sub3: return 3;
+ case AMDGPU::sub4: return 4; // Possible with TFE/LWE
}
}
@@ -7939,11 +8969,16 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
return Node; // not implemented for D16
- SDNode *Users[4] = { nullptr };
+ SDNode *Users[5] = { nullptr };
unsigned Lane = 0;
unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
unsigned NewDmask = 0;
+ unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
+ unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
+ bool UsesTFC = (Node->getConstantOperandVal(TFEIdx) ||
+ Node->getConstantOperandVal(LWEIdx)) ? 1 : 0;
+ unsigned TFCLane = 0;
bool HasChain = Node->getNumValues() > 1;
if (OldDmask == 0) {
@@ -7951,6 +8986,12 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
return Node;
}
+ unsigned OldBitsSet = countPopulation(OldDmask);
+ // Work out which is the TFE/LWE lane if that is enabled.
+ if (UsesTFC) {
+ TFCLane = OldBitsSet;
+ }
+
// Try to figure out the used register components
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
I != E; ++I) {
@@ -7970,28 +9011,49 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// set, etc.
Lane = SubIdx2Lane(I->getConstantOperandVal(1));
- // Set which texture component corresponds to the lane.
- unsigned Comp;
- for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
- Comp = countTrailingZeros(Dmask);
- Dmask &= ~(1 << Comp);
- }
+ // Check if the use is for the TFE/LWE generated result at VGPRn+1.
+ if (UsesTFC && Lane == TFCLane) {
+ Users[Lane] = *I;
+ } else {
+ // Set which texture component corresponds to the lane.
+ unsigned Comp;
+ for (unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
+ Comp = countTrailingZeros(Dmask);
+ Dmask &= ~(1 << Comp);
+ }
- // Abort if we have more than one user per component
- if (Users[Lane])
- return Node;
+ // Abort if we have more than one user per component.
+ if (Users[Lane])
+ return Node;
- Users[Lane] = *I;
- NewDmask |= 1 << Comp;
+ Users[Lane] = *I;
+ NewDmask |= 1 << Comp;
+ }
}
+ // Don't allow 0 dmask, as hardware assumes one channel enabled.
+ bool NoChannels = !NewDmask;
+ if (NoChannels) {
+ // If the original dmask has one channel - then nothing to do
+ if (OldBitsSet == 1)
+ return Node;
+ // Use an arbitrary dmask - required for the instruction to work
+ NewDmask = 1;
+ }
// Abort if there's no change
if (NewDmask == OldDmask)
return Node;
unsigned BitsSet = countPopulation(NewDmask);
- int NewOpcode = AMDGPU::getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet);
+ // Check for TFE or LWE - increase the number of channels by one to account
+ // for the extra return value
+ // This will need adjustment for D16 if this is also included in
+ // adjustWriteMask (this function) but at present D16 are excluded.
+ unsigned NewChannels = BitsSet + UsesTFC;
+
+ int NewOpcode =
+ AMDGPU::getMaskedMIMGOp(Node->getMachineOpcode(), NewChannels);
assert(NewOpcode != -1 &&
NewOpcode != static_cast<int>(Node->getMachineOpcode()) &&
"failed to find equivalent MIMG op");
@@ -8004,8 +9066,9 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT();
- MVT ResultVT = BitsSet == 1 ?
- SVT : MVT::getVectorVT(SVT, BitsSet == 3 ? 4 : BitsSet);
+ MVT ResultVT = NewChannels == 1 ?
+ SVT : MVT::getVectorVT(SVT, NewChannels == 3 ? 4 :
+ NewChannels == 5 ? 8 : NewChannels);
SDVTList NewVTList = HasChain ?
DAG.getVTList(ResultVT, MVT::Other) : DAG.getVTList(ResultVT);
@@ -8015,11 +9078,11 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
if (HasChain) {
// Update chain.
- NewNode->setMemRefs(Node->memoperands_begin(), Node->memoperands_end());
+ DAG.setNodeMemRefs(NewNode, Node->memoperands());
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1));
}
- if (BitsSet == 1) {
+ if (NewChannels == 1) {
assert(Node->hasNUsesOfValue(1, 0));
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY,
SDLoc(Node), Users[Lane]->getValueType(0),
@@ -8029,19 +9092,24 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
}
// Update the users of the node with the new indices
- for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+ for (unsigned i = 0, Idx = AMDGPU::sub0; i < 5; ++i) {
SDNode *User = Users[i];
- if (!User)
- continue;
-
- SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
- DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
+ if (!User) {
+ // Handle the special case of NoChannels. We set NewDmask to 1 above, but
+ // Users[0] is still nullptr because channel 0 doesn't really have a use.
+ if (i || !NoChannels)
+ continue;
+ } else {
+ SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
+ DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
+ }
switch (Idx) {
default: break;
case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+ case AMDGPU::sub3: Idx = AMDGPU::sub4; break;
}
}
@@ -8457,49 +9525,56 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
}
+LLVM_ATTRIBUTE_UNUSED
+static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
+ assert(N->getOpcode() == ISD::CopyFromReg);
+ do {
+ // Follow the chain until we find an INLINEASM node.
+ N = N->getOperand(0).getNode();
+ if (N->getOpcode() == ISD::INLINEASM)
+ return true;
+ } while (N->getOpcode() == ISD::CopyFromReg);
+ return false;
+}
+
bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
- FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const
+ FunctionLoweringInfo * FLI, LegacyDivergenceAnalysis * KDA) const
{
switch (N->getOpcode()) {
- case ISD::Register:
case ISD::CopyFromReg:
{
- const RegisterSDNode *R = nullptr;
- if (N->getOpcode() == ISD::Register) {
- R = dyn_cast<RegisterSDNode>(N);
- }
- else {
- R = dyn_cast<RegisterSDNode>(N->getOperand(1));
- }
- if (R)
- {
- const MachineFunction * MF = FLI->MF;
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
- unsigned Reg = R->getReg();
- if (TRI.isPhysicalRegister(Reg))
- return TRI.isVGPR(MRI, Reg);
-
- if (MRI.isLiveIn(Reg)) {
- // workitem.id.x workitem.id.y workitem.id.z
- // Any VGPR formal argument is also considered divergent
- if (TRI.isVGPR(MRI, Reg))
- return true;
- // Formal arguments of non-entry functions
- // are conservatively considered divergent
- else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
- return true;
- }
- return !DA || DA->isDivergent(FLI->getValueFromVirtualReg(Reg));
+ const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
+ const MachineFunction * MF = FLI->MF;
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
+ unsigned Reg = R->getReg();
+ if (TRI.isPhysicalRegister(Reg))
+ return !TRI.isSGPRReg(MRI, Reg);
+
+ if (MRI.isLiveIn(Reg)) {
+ // workitem.id.x workitem.id.y workitem.id.z
+ // Any VGPR formal argument is also considered divergent
+ if (!TRI.isSGPRReg(MRI, Reg))
+ return true;
+ // Formal arguments of non-entry functions
+ // are conservatively considered divergent
+ else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
+ return true;
+ return false;
}
+ const Value *V = FLI->getValueFromVirtualReg(Reg);
+ if (V)
+ return KDA->isDivergent(V);
+ assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
+ return !TRI.isSGPRReg(MRI, Reg);
}
break;
case ISD::LOAD: {
- const LoadSDNode *L = dyn_cast<LoadSDNode>(N);
- if (L->getMemOperand()->getAddrSpace() ==
- Subtarget->getAMDGPUAS().PRIVATE_ADDRESS)
- return true;
+ const LoadSDNode *L = cast<LoadSDNode>(N);
+ unsigned AS = L->getAddressSpace();
+ // A flat load may access private memory.
+ return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
} break;
case ISD::CALLSEQ_END:
return true;
@@ -8522,3 +9597,30 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
}
return false;
}
+
+bool SITargetLowering::denormalsEnabledForType(EVT VT) const {
+ switch (VT.getScalarType().getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ return Subtarget->hasFP32Denormals();
+ case MVT::f64:
+ return Subtarget->hasFP64Denormals();
+ case MVT::f16:
+ return Subtarget->hasFP16Denormals();
+ default:
+ return false;
+ }
+}
+
+bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ if (Op.getOpcode() == AMDGPUISD::CLAMP) {
+ if (Subtarget->enableDX10Clamp())
+ return true; // Clamped to 0.
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+
+ return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG,
+ SNaN, Depth);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 5b3d49b3d8e3..bcef519ee663 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -60,11 +60,22 @@ private:
MVT VT, unsigned Offset) const;
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const;
+ SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
+ SDValue GLC, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
+ // (the offset that is included in bounds checking and swizzling, to be split
+ // between the instruction's voffset and immoffset fields) and soffset (the
+ // offset that is excluded from bounds checking and swizzling, to go in the
+ // instruction's soffset field). This function takes the first kind of
+ // offset and figures out how to split it between voffset and immoffset.
+ std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset,
+ SelectionDAG &DAG) const;
+
SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -81,7 +92,7 @@ private:
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
- SelectionDAG &DAG,
+ SelectionDAG &DAG, ArrayRef<SDValue> Ops,
bool IsIntrinsic = false) const;
SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const;
@@ -99,6 +110,7 @@ private:
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const;
@@ -130,6 +142,8 @@ private:
SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT,
+ const APFloat &C) const;
SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
@@ -140,7 +154,7 @@ private:
SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performBuildVectorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
@@ -156,7 +170,6 @@ private:
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
- bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
unsigned isCFIntrinsic(const SDNode *Intr) const;
@@ -175,6 +188,12 @@ private:
/// global value \p GV, false otherwise.
bool shouldEmitPCReloc(const GlobalValue *GV) const;
+ // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
+ // three offsets (voffset, soffset and instoffset) into the SDValue[3] array
+ // pointed to by Offsets.
+ void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
+ SDValue *Offsets, unsigned Align = 4) const;
+
public:
SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
@@ -192,6 +211,7 @@ public:
SmallVectorImpl<Value*> &/*Ops*/,
Type *&/*AccessTy*/) const override;
+ bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I = nullptr) const override;
@@ -215,7 +235,7 @@ public:
bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
TargetLoweringBase::LegalizeTypeAction
- getPreferredVectorAction(EVT VT) const override;
+ getPreferredVectorAction(MVT VT) const override;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
@@ -248,11 +268,11 @@ public:
void passSpecialInputs(
CallLoweringInfo &CLI,
+ CCState &CCInfo,
const SIMachineFunctionInfo &Info,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &MemOpChains,
- SDValue Chain,
- SDValue StackPtr) const;
+ SDValue Chain) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -322,7 +342,16 @@ public:
unsigned Depth = 0) const override;
bool isSDNodeSourceOfDivergence(const SDNode *N,
- FunctionLoweringInfo *FLI, DivergenceAnalysis *DA) const override;
+ FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
+
+ bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
+ unsigned MaxDepth = 5) const;
+ bool denormalsEnabledForType(EVT VT) const;
+
+ bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN = false,
+ unsigned Depth = 0) const override;
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index dc9397cf7b85..ba21a5ce1293 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -66,6 +66,8 @@ private:
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
+ bool optimizeVccBranch(MachineInstr &MI) const;
+
public:
static char ID;
@@ -320,6 +322,96 @@ bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
return true;
}
+bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
+ // Match:
+ // sreg = -1
+ // vcc = S_AND_B64 exec, sreg
+ // S_CBRANCH_VCC[N]Z
+ // =>
+ // S_CBRANCH_EXEC[N]Z
+ bool Changed = false;
+ MachineBasicBlock &MBB = *MI.getParent();
+ const unsigned CondReg = AMDGPU::VCC;
+ const unsigned ExecReg = AMDGPU::EXEC;
+ const unsigned And = AMDGPU::S_AND_B64;
+
+ MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
+ E = MBB.rend();
+ bool ReadsCond = false;
+ unsigned Threshold = 5;
+ for (++A ; A != E ; ++A) {
+ if (!--Threshold)
+ return false;
+ if (A->modifiesRegister(ExecReg, TRI))
+ return false;
+ if (A->modifiesRegister(CondReg, TRI)) {
+ if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
+ return false;
+ break;
+ }
+ ReadsCond |= A->readsRegister(CondReg, TRI);
+ }
+ if (A == E)
+ return false;
+
+ MachineOperand &Op1 = A->getOperand(1);
+ MachineOperand &Op2 = A->getOperand(2);
+ if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
+ TII->commuteInstruction(*A);
+ Changed = true;
+ }
+ if (Op1.getReg() != ExecReg)
+ return Changed;
+ if (Op2.isImm() && Op2.getImm() != -1)
+ return Changed;
+
+ unsigned SReg = AMDGPU::NoRegister;
+ if (Op2.isReg()) {
+ SReg = Op2.getReg();
+ auto M = std::next(A);
+ bool ReadsSreg = false;
+ for ( ; M != E ; ++M) {
+ if (M->definesRegister(SReg, TRI))
+ break;
+ if (M->modifiesRegister(SReg, TRI))
+ return Changed;
+ ReadsSreg |= M->readsRegister(SReg, TRI);
+ }
+ if (M == E ||
+ !M->isMoveImmediate() ||
+ !M->getOperand(1).isImm() ||
+ M->getOperand(1).getImm() != -1)
+ return Changed;
+ // First if sreg is only used in and instruction fold the immediate
+ // into that and.
+ if (!ReadsSreg && Op2.isKill()) {
+ A->getOperand(2).ChangeToImmediate(-1);
+ M->eraseFromParent();
+ }
+ }
+
+ if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
+ MI.killsRegister(CondReg, TRI))
+ A->eraseFromParent();
+
+ bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
+ if (SReg == ExecReg) {
+ if (IsVCCZ) {
+ MI.eraseFromParent();
+ return true;
+ }
+ MI.setDesc(TII->get(AMDGPU::S_BRANCH));
+ } else {
+ MI.setDesc(TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ
+ : AMDGPU::S_CBRANCH_EXECNZ));
+ }
+
+ MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
+ MI.addImplicitDefUseOperands(*MBB.getParent());
+
+ return true;
+}
+
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -384,7 +476,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
kill(MI);
if (ExecBranchStack.empty()) {
- if (skipIfDead(MI, *NextBB)) {
+ if (NextBB != BE && skipIfDead(MI, *NextBB)) {
HaveSkipBlock = true;
NextBB = std::next(BI);
BE = MF.end();
@@ -417,6 +509,11 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
}
break;
+ case AMDGPU::S_CBRANCH_VCCZ:
+ case AMDGPU::S_CBRANCH_VCCNZ:
+ MadeChange |= optimizeVccBranch(MI);
+ break;
+
default:
break;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index d456e3d9b94d..afc0b4467610 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -13,6 +13,14 @@
/// Memory reads and writes are issued asynchronously, so we need to insert
/// S_WAITCNT instructions when we want to access any of their results or
/// overwrite any register that's used asynchronously.
+///
+/// TODO: This pass currently keeps one timeline per hardware counter. A more
+/// finely-grained approach that keeps one timeline per event type could
+/// sometimes get away with generating weaker s_waitcnt instructions. For
+/// example, when both SMEM and LDS are in flight and we need to wait for
+/// the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient,
+/// but the pass will currently generate a conservative lgkmcnt(0) because
+/// multiple event types are in flight.
//
//===----------------------------------------------------------------------===//
@@ -33,7 +41,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -69,6 +76,25 @@ static cl::opt<unsigned> ForceEmitZeroFlag(
namespace {
+template <typename EnumT>
+class enum_iterator
+ : public iterator_facade_base<enum_iterator<EnumT>,
+ std::forward_iterator_tag, const EnumT> {
+ EnumT Value;
+public:
+ enum_iterator() = default;
+ enum_iterator(EnumT Value) : Value(Value) {}
+
+ enum_iterator &operator++() {
+ Value = static_cast<EnumT>(Value + 1);
+ return *this;
+ }
+
+ bool operator==(const enum_iterator &RHS) const { return Value == RHS.Value; }
+
+ EnumT operator*() const { return Value; }
+};
+
// Class of object that encapsulates latest instruction counter score
// associated with the operand. Used for determining whether
// s_waitcnt instruction needs to be emited.
@@ -77,12 +103,17 @@ namespace {
enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
+iterator_range<enum_iterator<InstCounterType>> inst_counter_types() {
+ return make_range(enum_iterator<InstCounterType>(VM_CNT),
+ enum_iterator<InstCounterType>(NUM_INST_CNTS));
+}
+
using RegInterval = std::pair<signed, signed>;
struct {
- int32_t VmcntMax;
- int32_t ExpcntMax;
- int32_t LgkmcntMax;
+ uint32_t VmcntMax;
+ uint32_t ExpcntMax;
+ uint32_t LgkmcntMax;
int32_t NumVGPRsMax;
int32_t NumSGPRsMax;
} HardwareLimits;
@@ -108,6 +139,14 @@ enum WaitEventType {
NUM_WAIT_EVENTS,
};
+static const uint32_t WaitEventMaskForInst[NUM_INST_CNTS] = {
+ (1 << VMEM_ACCESS),
+ (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
+ (1 << SQ_MESSAGE),
+ (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
+ (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS),
+};
+
// The mapping is:
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
// SQ_MAX_PGM_VGPRS .. NUM_ALL_VGPRS-1 extra VGPR-like slots
@@ -122,30 +161,38 @@ enum RegisterMapping {
NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
};
-#define ForAllWaitEventType(w) \
- for (enum WaitEventType w = (enum WaitEventType)0; \
- (w) < (enum WaitEventType)NUM_WAIT_EVENTS; \
- (w) = (enum WaitEventType)((w) + 1))
+void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
+ switch (T) {
+ case VM_CNT:
+ Wait.VmCnt = std::min(Wait.VmCnt, Count);
+ break;
+ case EXP_CNT:
+ Wait.ExpCnt = std::min(Wait.ExpCnt, Count);
+ break;
+ case LGKM_CNT:
+ Wait.LgkmCnt = std::min(Wait.LgkmCnt, Count);
+ break;
+ default:
+ llvm_unreachable("bad InstCounterType");
+ }
+}
-// This is a per-basic-block object that maintains current score brackets
-// of each wait counter, and a per-register scoreboard for each wait counter.
+// This objects maintains the current score brackets of each wait counter, and
+// a per-register scoreboard for each wait counter.
+//
// We also maintain the latest score for every event type that can change the
// waitcnt in order to know if there are multiple types of events within
// the brackets. When multiple types of event happen in the bracket,
// wait count may get decreased out of order, therefore we need to put in
// "s_waitcnt 0" before use.
-class BlockWaitcntBrackets {
+class WaitcntBrackets {
public:
- BlockWaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
+ WaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {
+ for (auto T : inst_counter_types())
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
- }
}
- ~BlockWaitcntBrackets() = default;
-
- static int32_t getWaitCountMax(InstCounterType T) {
+ static uint32_t getWaitCountMax(InstCounterType T) {
switch (T) {
case VM_CNT:
return HardwareLimits.VmcntMax;
@@ -159,33 +206,14 @@ public:
return 0;
}
- void setScoreLB(InstCounterType T, int32_t Val) {
- assert(T < NUM_INST_CNTS);
- if (T >= NUM_INST_CNTS)
- return;
- ScoreLBs[T] = Val;
- }
-
- void setScoreUB(InstCounterType T, int32_t Val) {
- assert(T < NUM_INST_CNTS);
- if (T >= NUM_INST_CNTS)
- return;
- ScoreUBs[T] = Val;
- if (T == EXP_CNT) {
- int32_t UB = (int)(ScoreUBs[T] - getWaitCountMax(EXP_CNT));
- if (ScoreLBs[T] < UB)
- ScoreLBs[T] = UB;
- }
- }
-
- int32_t getScoreLB(InstCounterType T) {
+ uint32_t getScoreLB(InstCounterType T) const {
assert(T < NUM_INST_CNTS);
if (T >= NUM_INST_CNTS)
return 0;
return ScoreLBs[T];
}
- int32_t getScoreUB(InstCounterType T) {
+ uint32_t getScoreUB(InstCounterType T) const {
assert(T < NUM_INST_CNTS);
if (T >= NUM_INST_CNTS)
return 0;
@@ -194,89 +222,56 @@ public:
// Mapping from event to counter.
InstCounterType eventCounter(WaitEventType E) {
- switch (E) {
- case VMEM_ACCESS:
+ if (E == VMEM_ACCESS)
return VM_CNT;
- case LDS_ACCESS:
- case GDS_ACCESS:
- case SQ_MESSAGE:
- case SMEM_ACCESS:
+ if (WaitEventMaskForInst[LGKM_CNT] & (1 << E))
return LGKM_CNT;
- case EXP_GPR_LOCK:
- case GDS_GPR_LOCK:
- case VMW_GPR_LOCK:
- case EXP_POS_ACCESS:
- case EXP_PARAM_ACCESS:
- return EXP_CNT;
- default:
- llvm_unreachable("unhandled event type");
- }
- return NUM_INST_CNTS;
+ assert(WaitEventMaskForInst[EXP_CNT] & (1 << E));
+ return EXP_CNT;
}
- void setRegScore(int GprNo, InstCounterType T, int32_t Val) {
- if (GprNo < NUM_ALL_VGPRS) {
- if (GprNo > VgprUB) {
- VgprUB = GprNo;
- }
- VgprScores[T][GprNo] = Val;
- } else {
- assert(T == LGKM_CNT);
- if (GprNo - NUM_ALL_VGPRS > SgprUB) {
- SgprUB = GprNo - NUM_ALL_VGPRS;
- }
- SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
- }
- }
-
- int32_t getRegScore(int GprNo, InstCounterType T) {
+ uint32_t getRegScore(int GprNo, InstCounterType T) {
if (GprNo < NUM_ALL_VGPRS) {
return VgprScores[T][GprNo];
}
+ assert(T == LGKM_CNT);
return SgprScores[GprNo - NUM_ALL_VGPRS];
}
void clear() {
memset(ScoreLBs, 0, sizeof(ScoreLBs));
memset(ScoreUBs, 0, sizeof(ScoreUBs));
- memset(EventUBs, 0, sizeof(EventUBs));
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
+ PendingEvents = 0;
+ memset(MixedPendingEvents, 0, sizeof(MixedPendingEvents));
+ for (auto T : inst_counter_types())
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
- }
memset(SgprScores, 0, sizeof(SgprScores));
}
+ bool merge(const WaitcntBrackets &Other);
+
RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII,
const MachineRegisterInfo *MRI,
const SIRegisterInfo *TRI, unsigned OpNo,
bool Def) const;
- void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
- const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
- unsigned OpNo, int32_t Val);
-
- void setWaitAtBeginning() { WaitAtBeginning = true; }
- void clearWaitAtBeginning() { WaitAtBeginning = false; }
- bool getWaitAtBeginning() const { return WaitAtBeginning; }
- void setEventUB(enum WaitEventType W, int32_t Val) { EventUBs[W] = Val; }
int32_t getMaxVGPR() const { return VgprUB; }
int32_t getMaxSGPR() const { return SgprUB; }
- int32_t getEventUB(enum WaitEventType W) const {
- assert(W < NUM_WAIT_EVENTS);
- return EventUBs[W];
- }
-
- bool counterOutOfOrder(InstCounterType T);
- unsigned int updateByWait(InstCounterType T, int ScoreToWait);
+ bool counterOutOfOrder(InstCounterType T) const;
+ bool simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
+ bool simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
+ void determineWait(InstCounterType T, uint32_t ScoreToWait,
+ AMDGPU::Waitcnt &Wait) const;
+ void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
+ void applyWaitcnt(InstCounterType T, unsigned Count);
void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI, WaitEventType E,
MachineInstr &MI);
- bool hasPendingSMEM() const {
- return (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
- EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]);
+ bool hasPending() const { return PendingEvents != 0; }
+ bool hasPendingEvent(WaitEventType E) const {
+ return PendingEvents & (1 << E);
}
bool hasPendingFlat() const {
@@ -291,75 +286,71 @@ public:
LastFlat[LGKM_CNT] = ScoreUBs[LGKM_CNT];
}
- int pendingFlat(InstCounterType Ct) const { return LastFlat[Ct]; }
-
- void setLastFlat(InstCounterType Ct, int Val) { LastFlat[Ct] = Val; }
-
- bool getRevisitLoop() const { return RevisitLoop; }
- void setRevisitLoop(bool RevisitLoopIn) { RevisitLoop = RevisitLoopIn; }
+ void print(raw_ostream &);
+ void dump() { print(dbgs()); }
- void setPostOrder(int32_t PostOrderIn) { PostOrder = PostOrderIn; }
- int32_t getPostOrder() const { return PostOrder; }
+private:
+ struct MergeInfo {
+ uint32_t OldLB;
+ uint32_t OtherLB;
+ uint32_t MyShift;
+ uint32_t OtherShift;
+ };
+ static bool mergeScore(const MergeInfo &M, uint32_t &Score,
+ uint32_t OtherScore);
+
+ void setScoreLB(InstCounterType T, uint32_t Val) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return;
+ ScoreLBs[T] = Val;
+ }
- void setWaitcnt(MachineInstr *WaitcntIn) { Waitcnt = WaitcntIn; }
- void clearWaitcnt() { Waitcnt = nullptr; }
- MachineInstr *getWaitcnt() const { return Waitcnt; }
+ void setScoreUB(InstCounterType T, uint32_t Val) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return;
+ ScoreUBs[T] = Val;
+ if (T == EXP_CNT) {
+ uint32_t UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
+ if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
+ ScoreLBs[T] = UB;
+ }
+ }
- bool mixedExpTypes() const { return MixedExpTypes; }
- void setMixedExpTypes(bool MixedExpTypesIn) {
- MixedExpTypes = MixedExpTypesIn;
+ void setRegScore(int GprNo, InstCounterType T, uint32_t Val) {
+ if (GprNo < NUM_ALL_VGPRS) {
+ if (GprNo > VgprUB) {
+ VgprUB = GprNo;
+ }
+ VgprScores[T][GprNo] = Val;
+ } else {
+ assert(T == LGKM_CNT);
+ if (GprNo - NUM_ALL_VGPRS > SgprUB) {
+ SgprUB = GprNo - NUM_ALL_VGPRS;
+ }
+ SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
+ }
}
- void print(raw_ostream &);
- void dump() { print(dbgs()); }
+ void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
+ unsigned OpNo, uint32_t Val);
-private:
const GCNSubtarget *ST = nullptr;
- bool WaitAtBeginning = false;
- bool RevisitLoop = false;
- bool MixedExpTypes = false;
- int32_t PostOrder = 0;
- MachineInstr *Waitcnt = nullptr;
- int32_t ScoreLBs[NUM_INST_CNTS] = {0};
- int32_t ScoreUBs[NUM_INST_CNTS] = {0};
- int32_t EventUBs[NUM_WAIT_EVENTS] = {0};
+ uint32_t ScoreLBs[NUM_INST_CNTS] = {0};
+ uint32_t ScoreUBs[NUM_INST_CNTS] = {0};
+ uint32_t PendingEvents = 0;
+ bool MixedPendingEvents[NUM_INST_CNTS] = {false};
// Remember the last flat memory operation.
- int32_t LastFlat[NUM_INST_CNTS] = {0};
+ uint32_t LastFlat[NUM_INST_CNTS] = {0};
// wait_cnt scores for every vgpr.
// Keep track of the VgprUB and SgprUB to make merge at join efficient.
int32_t VgprUB = 0;
int32_t SgprUB = 0;
- int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
+ uint32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
// Wait cnt scores for every sgpr, only lgkmcnt is relevant.
- int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
-};
-
-// This is a per-loop-region object that records waitcnt status at the end of
-// loop footer from the previous iteration. We also maintain an iteration
-// count to track the number of times the loop has been visited. When it
-// doesn't converge naturally, we force convergence by inserting s_waitcnt 0
-// at the end of the loop footer.
-class LoopWaitcntData {
-public:
- LoopWaitcntData() = default;
- ~LoopWaitcntData() = default;
-
- void incIterCnt() { IterCnt++; }
- void resetIterCnt() { IterCnt = 0; }
- unsigned getIterCnt() { return IterCnt; }
-
- void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; }
- MachineInstr *getWaitcnt() const { return LfWaitcnt; }
-
- void print() { LLVM_DEBUG(dbgs() << " iteration " << IterCnt << '\n';); }
-
-private:
- // s_waitcnt added at the end of loop footer to stablize wait scores
- // at the end of the loop footer.
- MachineInstr *LfWaitcnt = nullptr;
- // Number of iterations the loop has been visited, not including the initial
- // walk over.
- int32_t IterCnt = 0;
+ uint32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
};
class SIInsertWaitcnts : public MachineFunctionPass {
@@ -368,22 +359,21 @@ private:
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
- const MachineLoopInfo *MLI = nullptr;
- AMDGPU::IsaInfo::IsaVersion IV;
- AMDGPUAS AMDGPUASI;
+ AMDGPU::IsaVersion IV;
- DenseSet<MachineBasicBlock *> BlockVisitedSet;
DenseSet<MachineInstr *> TrackedWaitcntSet;
DenseSet<MachineInstr *> VCCZBugHandledSet;
- DenseMap<MachineBasicBlock *, std::unique_ptr<BlockWaitcntBrackets>>
- BlockWaitcntBracketsMap;
-
- std::vector<MachineBasicBlock *> BlockWaitcntProcessedSet;
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ std::unique_ptr<WaitcntBrackets> Incoming;
+ bool Dirty = true;
- DenseMap<MachineLoop *, std::unique_ptr<LoopWaitcntData>> LoopWaitcntDataMap;
+ explicit BlockInfo(MachineBasicBlock *MBB) : MBB(MBB) {}
+ };
- std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
+ std::vector<BlockInfo> BlockInfos; // by reverse post-order traversal index
+ DenseMap<MachineBasicBlock *, unsigned> RpotIdxMap;
// ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0
// because of amdgpu-waitcnt-forcezero flag
@@ -407,20 +397,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
- void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
- // The waitcnt information is copied because it changes as the block is
- // traversed.
- KillWaitBrackets.push_back(
- llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
- }
-
bool isForceEmitWaitcnt() const {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1))
+ for (auto T : inst_counter_types())
if (ForceEmitWaitcnt[T])
return true;
return false;
@@ -454,27 +435,22 @@ public:
}
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
- void generateWaitcntInstBefore(MachineInstr &MI,
- BlockWaitcntBrackets *ScoreBrackets);
+ bool generateWaitcntInstBefore(MachineInstr &MI,
+ WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr);
void updateEventWaitcntAfter(MachineInstr &Inst,
- BlockWaitcntBrackets *ScoreBrackets);
- void mergeInputScoreBrackets(MachineBasicBlock &Block);
- bool isLoopBottom(const MachineLoop *Loop, const MachineBasicBlock *Block);
- unsigned countNumBottomBlocks(const MachineLoop *Loop);
- void insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block);
- void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst);
- bool isWaitcntStronger(unsigned LHS, unsigned RHS);
- unsigned combineWaitcnt(unsigned LHS, unsigned RHS);
+ WaitcntBrackets *ScoreBrackets);
+ bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets);
};
} // end anonymous namespace
-RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
- const SIInstrInfo *TII,
- const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI,
- unsigned OpNo,
- bool Def) const {
+RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
+ const SIInstrInfo *TII,
+ const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI,
+ unsigned OpNo, bool Def) const {
const MachineOperand &Op = MI->getOperand(OpNo);
if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) ||
(Def && !Op.isDef()))
@@ -512,11 +488,11 @@ RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
return Result;
}
-void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI,
- unsigned OpNo, int32_t Val) {
+void WaitcntBrackets::setExpScore(const MachineInstr *MI,
+ const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI, unsigned OpNo,
+ uint32_t Val) {
RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
LLVM_DEBUG({
const MachineOperand &Opnd = MI->getOperand(OpNo);
@@ -527,26 +503,26 @@ void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
}
}
-void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI,
- WaitEventType E, MachineInstr &Inst) {
+void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI,
+ WaitEventType E, MachineInstr &Inst) {
const MachineRegisterInfo &MRIA = *MRI;
InstCounterType T = eventCounter(E);
- int32_t CurrScore = getScoreUB(T) + 1;
- // EventUB and ScoreUB need to be update regardless if this event changes
- // the score of a register or not.
+ uint32_t CurrScore = getScoreUB(T) + 1;
+ if (CurrScore == 0)
+ report_fatal_error("InsertWaitcnt score wraparound");
+ // PendingEvents and ScoreUB need to be update regardless if this event
+ // changes the score of a register or not.
// Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
- EventUBs[E] = CurrScore;
+ if (!hasPendingEvent(E)) {
+ if (PendingEvents & WaitEventMaskForInst[T])
+ MixedPendingEvents[T] = true;
+ PendingEvents |= 1 << E;
+ }
setScoreUB(T, CurrScore);
if (T == EXP_CNT) {
- // Check for mixed export types. If they are mixed, then a waitcnt exp(0)
- // is required.
- if (!MixedExpTypes) {
- MixedExpTypes = counterOutOfOrder(EXP_CNT);
- }
-
// Put score on the source vgprs. If this is a store, just use those
// specific register(s).
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
@@ -671,12 +647,11 @@ void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
}
}
-void BlockWaitcntBrackets::print(raw_ostream &OS) {
+void WaitcntBrackets::print(raw_ostream &OS) {
OS << '\n';
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int LB = getScoreLB(T);
- int UB = getScoreUB(T);
+ for (auto T : inst_counter_types()) {
+ uint32_t LB = getScoreLB(T);
+ uint32_t UB = getScoreUB(T);
switch (T) {
case VM_CNT:
@@ -696,10 +671,10 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
if (LB < UB) {
// Print vgpr scores.
for (int J = 0; J <= getMaxVGPR(); J++) {
- int RegScore = getRegScore(J, T);
+ uint32_t RegScore = getRegScore(J, T);
if (RegScore <= LB)
continue;
- int RelScore = RegScore - LB - 1;
+ uint32_t RelScore = RegScore - LB - 1;
if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
OS << RelScore << ":v" << J << " ";
} else {
@@ -709,10 +684,10 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
// Also need to print sgpr scores for lgkm_cnt.
if (T == LGKM_CNT) {
for (int J = 0; J <= getMaxSGPR(); J++) {
- int RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ uint32_t RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
if (RegScore <= LB)
continue;
- int RelScore = RegScore - LB - 1;
+ uint32_t RelScore = RegScore - LB - 1;
OS << RelScore << ":s" << J << " ";
}
}
@@ -722,23 +697,31 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
OS << '\n';
}
-unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
- int ScoreToWait) {
- unsigned int NeedWait = 0;
- if (ScoreToWait == -1) {
- // The score to wait is unknown. This implies that it was not encountered
- // during the path of the CFG walk done during the current traversal but
- // may be seen on a different path. Emit an s_wait counter with a
- // conservative value of 0 for the counter.
- NeedWait = CNT_MASK(T);
- setScoreLB(T, getScoreUB(T));
- return NeedWait;
- }
+/// Simplify the waitcnt, in the sense of removing redundant counts, and return
+/// whether a waitcnt instruction is needed at all.
+bool WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
+ return simplifyWaitcnt(VM_CNT, Wait.VmCnt) |
+ simplifyWaitcnt(EXP_CNT, Wait.ExpCnt) |
+ simplifyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
+}
+bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
+ unsigned &Count) const {
+ const uint32_t LB = getScoreLB(T);
+ const uint32_t UB = getScoreUB(T);
+ if (Count < UB && UB - Count > LB)
+ return true;
+
+ Count = ~0u;
+ return false;
+}
+
+void WaitcntBrackets::determineWait(InstCounterType T, uint32_t ScoreToWait,
+ AMDGPU::Waitcnt &Wait) const {
// If the score of src_operand falls within the bracket, we need an
// s_waitcnt instruction.
- const int32_t LB = getScoreLB(T);
- const int32_t UB = getScoreUB(T);
+ const uint32_t LB = getScoreLB(T);
+ const uint32_t UB = getScoreUB(T);
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
if ((T == VM_CNT || T == LGKM_CNT) &&
hasPendingFlat() &&
@@ -746,90 +729,46 @@ unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
// If there is a pending FLAT operation, and this is a VMem or LGKM
// waitcnt and the target can report early completion, then we need
// to force a waitcnt 0.
- NeedWait = CNT_MASK(T);
- setScoreLB(T, getScoreUB(T));
+ addWait(Wait, T, 0);
} else if (counterOutOfOrder(T)) {
// Counter can get decremented out-of-order when there
// are multiple types event in the bracket. Also emit an s_wait counter
// with a conservative value of 0 for the counter.
- NeedWait = CNT_MASK(T);
- setScoreLB(T, getScoreUB(T));
+ addWait(Wait, T, 0);
} else {
- NeedWait = CNT_MASK(T);
- setScoreLB(T, ScoreToWait);
+ addWait(Wait, T, UB - ScoreToWait);
}
}
+}
- return NeedWait;
+void WaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) {
+ applyWaitcnt(VM_CNT, Wait.VmCnt);
+ applyWaitcnt(EXP_CNT, Wait.ExpCnt);
+ applyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
}
-// Where there are multiple types of event in the bracket of a counter,
-// the decrement may go out of order.
-bool BlockWaitcntBrackets::counterOutOfOrder(InstCounterType T) {
- switch (T) {
- case VM_CNT:
- return false;
- case LGKM_CNT: {
- if (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
- EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]) {
- // Scalar memory read always can go out of order.
- return true;
- }
- int NumEventTypes = 0;
- if (EventUBs[LDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
- EventUBs[LDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[GDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
- EventUBs[GDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[SQ_MESSAGE] > ScoreLBs[LGKM_CNT] &&
- EventUBs[SQ_MESSAGE] <= ScoreUBs[LGKM_CNT]) {
- NumEventTypes++;
- }
- if (NumEventTypes <= 1) {
- return false;
- }
- break;
+void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
+ const uint32_t UB = getScoreUB(T);
+ if (Count >= UB)
+ return;
+ if (Count != 0) {
+ if (counterOutOfOrder(T))
+ return;
+ setScoreLB(T, std::max(getScoreLB(T), UB - Count));
+ } else {
+ setScoreLB(T, UB);
+ MixedPendingEvents[T] = false;
+ PendingEvents &= ~WaitEventMaskForInst[T];
}
- case EXP_CNT: {
- // If there has been a mixture of export types, then a waitcnt exp(0) is
- // required.
- if (MixedExpTypes)
- return true;
- int NumEventTypes = 0;
- if (EventUBs[EXP_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
- EventUBs[EXP_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[GDS_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
- EventUBs[GDS_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[VMW_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
- EventUBs[VMW_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[EXP_PARAM_ACCESS] > ScoreLBs[EXP_CNT] &&
- EventUBs[EXP_PARAM_ACCESS] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
-
- if (EventUBs[EXP_POS_ACCESS] > ScoreLBs[EXP_CNT] &&
- EventUBs[EXP_POS_ACCESS] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
+}
- if (NumEventTypes <= 1) {
- return false;
- }
- break;
- }
- default:
- break;
- }
- return true;
+// Where there are multiple types of event in the bracket of a counter,
+// the decrement may go out of order.
+bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
+ // Scalar memory read always can go out of order.
+ if (T == LGKM_CNT && hasPendingEvent(SMEM_ACCESS))
+ return true;
+ return MixedPendingEvents[T];
}
INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
@@ -851,29 +790,6 @@ static bool readsVCCZ(const MachineInstr &MI) {
!MI.getOperand(1).isUndef();
}
-/// Given wait count encodings checks if LHS is stronger than RHS.
-bool SIInsertWaitcnts::isWaitcntStronger(unsigned LHS, unsigned RHS) {
- if (AMDGPU::decodeVmcnt(IV, LHS) > AMDGPU::decodeVmcnt(IV, RHS))
- return false;
- if (AMDGPU::decodeLgkmcnt(IV, LHS) > AMDGPU::decodeLgkmcnt(IV, RHS))
- return false;
- if (AMDGPU::decodeExpcnt(IV, LHS) > AMDGPU::decodeExpcnt(IV, RHS))
- return false;
- return true;
-}
-
-/// Given wait count encodings create a new encoding which is stronger
-/// or equal to both.
-unsigned SIInsertWaitcnts::combineWaitcnt(unsigned LHS, unsigned RHS) {
- unsigned VmCnt = std::min(AMDGPU::decodeVmcnt(IV, LHS),
- AMDGPU::decodeVmcnt(IV, RHS));
- unsigned LgkmCnt = std::min(AMDGPU::decodeLgkmcnt(IV, LHS),
- AMDGPU::decodeLgkmcnt(IV, RHS));
- unsigned ExpCnt = std::min(AMDGPU::decodeExpcnt(IV, LHS),
- AMDGPU::decodeExpcnt(IV, RHS));
- return AMDGPU::encodeWaitcnt(IV, VmCnt, ExpCnt, LgkmCnt);
-}
-
/// Generate s_waitcnt instruction to be placed before cur_Inst.
/// Instructions of a given type are returned in order,
/// but instructions of different types can complete out of order.
@@ -884,51 +800,23 @@ unsigned SIInsertWaitcnts::combineWaitcnt(unsigned LHS, unsigned RHS) {
/// and if so what the value of each counter is.
/// The "score bracket" is bound by the lower bound and upper bound
/// scores (*_score_LB and *_score_ub respectively).
-void SIInsertWaitcnts::generateWaitcntInstBefore(
- MachineInstr &MI, BlockWaitcntBrackets *ScoreBrackets) {
- // To emit, or not to emit - that's the question!
- // Start with an assumption that there is no need to emit.
- unsigned int EmitWaitcnt = 0;
-
- // No need to wait before phi. If a phi-move exists, then the wait should
- // has been inserted before the move. If a phi-move does not exist, then
- // wait should be inserted before the real use. The same is true for
- // sc-merge. It is not a coincident that all these cases correspond to the
- // instructions that are skipped in the assembling loop.
- bool NeedLineMapping = false; // TODO: Check on this.
-
- // ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
- bool ForceEmitZeroWaitcnt = false;
-
+bool SIInsertWaitcnts::generateWaitcntInstBefore(
+ MachineInstr &MI, WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr) {
setForceEmitWaitcnt();
bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
- if (MI.isDebugInstr() &&
- // TODO: any other opcode?
- !NeedLineMapping) {
- return;
- }
+ if (MI.isDebugInstr())
+ return false;
- // See if an s_waitcnt is forced at block entry, or is needed at
- // program end.
- if (ScoreBrackets->getWaitAtBeginning()) {
- // Note that we have already cleared the state, so we don't need to update
- // it.
- ScoreBrackets->clearWaitAtBeginning();
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- EmitWaitcnt |= CNT_MASK(T);
- ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
- }
- }
+ AMDGPU::Waitcnt Wait;
// See if this instruction has a forced S_WAITCNT VM.
// TODO: Handle other cases of NeedsWaitcntVmBefore()
- else if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
- MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
- MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
- EmitWaitcnt |=
- ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
+ Wait.VmCnt = 0;
}
// All waits must be resolved at call return.
@@ -936,23 +824,14 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
- ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
- EmitWaitcnt |= CNT_MASK(T);
- }
- }
+ Wait = AMDGPU::Waitcnt::allZero();
}
// Resolve vm waits before gs-done.
else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_) ==
AMDGPU::SendMsg::ID_GS_DONE)) {
- if (ScoreBrackets->getScoreUB(VM_CNT) > ScoreBrackets->getScoreLB(VM_CNT)) {
- ScoreBrackets->setScoreLB(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
- EmitWaitcnt |= CNT_MASK(VM_CNT);
- }
+ Wait.VmCnt = 0;
}
#if 0 // TODO: the following blocks of logic when we have fence.
else if (MI.getOpcode() == SC_FENCE) {
@@ -1016,14 +895,12 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
// Export and GDS are tracked individually, either may trigger a waitcnt
// for EXEC.
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getEventUB(EXP_GPR_LOCK));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getEventUB(EXP_PARAM_ACCESS));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getEventUB(EXP_POS_ACCESS));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getEventUB(GDS_GPR_LOCK));
+ if (ScoreBrackets.hasPendingEvent(EXP_GPR_LOCK) ||
+ ScoreBrackets.hasPendingEvent(EXP_PARAM_ACCESS) ||
+ ScoreBrackets.hasPendingEvent(EXP_POS_ACCESS) ||
+ ScoreBrackets.hasPendingEvent(GDS_GPR_LOCK)) {
+ Wait.ExpCnt = 0;
+ }
}
#if 0 // TODO: the following code to handle CALL.
@@ -1051,27 +928,27 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// instruction.
for (const MachineMemOperand *Memop : MI.memoperands()) {
unsigned AS = Memop->getAddrSpace();
- if (AS != AMDGPUASI.LOCAL_ADDRESS)
+ if (AS != AMDGPUAS::LOCAL_ADDRESS)
continue;
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
// VM_CNT is only relevant to vgpr or LDS.
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
}
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &Op = MI.getOperand(I);
const MachineRegisterInfo &MRIA = *MRI;
RegInterval Interval =
- ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, false);
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false);
for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
if (TRI->isVGPR(MRIA, Op.getReg())) {
// VM_CNT is only relevant to vgpr or LDS.
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
}
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+ ScoreBrackets.determineWait(
+ LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
}
}
// End of for loop that looks at all source operands to decide vm_wait_cnt
@@ -1086,29 +963,29 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// FIXME: Should not be relying on memoperands.
for (const MachineMemOperand *Memop : MI.memoperands()) {
unsigned AS = Memop->getAddrSpace();
- if (AS != AMDGPUASI.LOCAL_ADDRESS)
+ if (AS != AMDGPUAS::LOCAL_ADDRESS)
continue;
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ ScoreBrackets.determineWait(
+ EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
}
}
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
MachineOperand &Def = MI.getOperand(I);
const MachineRegisterInfo &MRIA = *MRI;
RegInterval Interval =
- ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, true);
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true);
for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
if (TRI->isVGPR(MRIA, Def.getReg())) {
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ ScoreBrackets.determineWait(
+ EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
}
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+ ScoreBrackets.determineWait(
+ LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
}
} // End of for loop that looks at all dest operands.
}
@@ -1119,182 +996,79 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// requiring a WAITCNT beforehand.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier()) {
- EmitWaitcnt |=
- ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT));
+ Wait = AMDGPU::Waitcnt::allZero();
}
// TODO: Remove this work-around, enable the assert for Bug 457939
// after fixing the scheduler. Also, the Shader Compiler code is
// independent of target.
if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
- if (ScoreBrackets->getScoreLB(LGKM_CNT) <
- ScoreBrackets->getScoreUB(LGKM_CNT) &&
- ScoreBrackets->hasPendingSMEM()) {
- // Wait on everything, not just LGKM. vccz reads usually come from
- // terminators, and we always wait on everything at the end of the
- // block, so if we only wait on LGKM here, we might end up with
- // another s_waitcnt inserted right after this if there are non-LGKM
- // instructions still outstanding.
- // FIXME: this is too conservative / the comment is wrong.
- // We don't wait on everything at the end of the block and we combine
- // waitcnts so we should never have back-to-back waitcnts.
- ForceEmitZeroWaitcnt = true;
- EmitWaitcnt = true;
+ if (ScoreBrackets.getScoreLB(LGKM_CNT) <
+ ScoreBrackets.getScoreUB(LGKM_CNT) &&
+ ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
+ Wait.LgkmCnt = 0;
}
}
- // Does this operand processing indicate s_wait counter update?
- if (EmitWaitcnt || IsForceEmitWaitcnt) {
- int CntVal[NUM_INST_CNTS];
-
- bool UseDefaultWaitcntStrategy = true;
- if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
- // Force all waitcnts to 0.
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
- }
- CntVal[VM_CNT] = 0;
- CntVal[EXP_CNT] = 0;
- CntVal[LGKM_CNT] = 0;
- UseDefaultWaitcntStrategy = false;
- }
-
- if (UseDefaultWaitcntStrategy) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- if (EmitWaitcnt & CNT_MASK(T)) {
- int Delta =
- ScoreBrackets->getScoreUB(T) - ScoreBrackets->getScoreLB(T);
- int MaxDelta = ScoreBrackets->getWaitCountMax(T);
- if (Delta >= MaxDelta) {
- Delta = -1;
- if (T != EXP_CNT) {
- ScoreBrackets->setScoreLB(
- T, ScoreBrackets->getScoreUB(T) - MaxDelta);
- }
- EmitWaitcnt &= ~CNT_MASK(T);
- }
- CntVal[T] = Delta;
- } else {
- // If we are not waiting for a particular counter then encode
- // it as -1 which means "don't care."
- CntVal[T] = -1;
- }
+ // Early-out if no wait is indicated.
+ if (!ScoreBrackets.simplifyWaitcnt(Wait) && !IsForceEmitWaitcnt) {
+ bool Modified = false;
+ if (OldWaitcntInstr) {
+ if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
+ TrackedWaitcntSet.erase(OldWaitcntInstr);
+ OldWaitcntInstr->eraseFromParent();
+ Modified = true;
+ } else {
+ int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
+ ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
}
+ Modified = true;
}
+ return Modified;
+ }
- // If we are not waiting on any counter we can skip the wait altogether.
- if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
- MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
- int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
- if (!OldWaitcnt ||
- (AMDGPU::decodeVmcnt(IV, Imm) !=
- (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
- (AMDGPU::decodeExpcnt(IV, Imm) !=
- (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
- (AMDGPU::decodeLgkmcnt(IV, Imm) !=
- (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
- MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
- if (ContainingLoop) {
- MachineBasicBlock *TBB = ContainingLoop->getHeader();
- BlockWaitcntBrackets *ScoreBracket =
- BlockWaitcntBracketsMap[TBB].get();
- if (!ScoreBracket) {
- assert(!BlockVisitedSet.count(TBB));
- BlockWaitcntBracketsMap[TBB] =
- llvm::make_unique<BlockWaitcntBrackets>(ST);
- ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
- }
- ScoreBracket->setRevisitLoop(true);
- LLVM_DEBUG(dbgs()
- << "set-revisit2: Block"
- << ContainingLoop->getHeader()->getNumber() << '\n';);
- }
- }
+ if (ForceEmitZeroWaitcnts)
+ Wait = AMDGPU::Waitcnt::allZero();
- // Update an existing waitcount, or make a new one.
- unsigned Enc = AMDGPU::encodeWaitcnt(IV,
- ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
- ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
- ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
- // We don't remove waitcnts that existed prior to the waitcnt
- // pass. Check if the waitcnt to-be-inserted can be avoided
- // or if the prev waitcnt can be updated.
- bool insertSWaitInst = true;
- for (MachineBasicBlock::iterator I = MI.getIterator(),
- B = MI.getParent()->begin();
- insertSWaitInst && I != B; --I) {
- if (I == MI.getIterator())
- continue;
+ if (ForceEmitWaitcnt[VM_CNT])
+ Wait.VmCnt = 0;
+ if (ForceEmitWaitcnt[EXP_CNT])
+ Wait.ExpCnt = 0;
+ if (ForceEmitWaitcnt[LGKM_CNT])
+ Wait.LgkmCnt = 0;
- switch (I->getOpcode()) {
- case AMDGPU::S_WAITCNT:
- if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
- insertSWaitInst = false;
- else if (!OldWaitcnt) {
- OldWaitcnt = &*I;
- Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
- }
- break;
- // TODO: skip over instructions which never require wait.
- }
- break;
- }
- if (insertSWaitInst) {
- if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
- if (ForceEmitZeroWaitcnts)
- LLVM_DEBUG(
- dbgs()
- << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
- if (IsForceEmitWaitcnt)
- LLVM_DEBUG(dbgs()
- << "Force emit a s_waitcnt due to debug counter\n");
-
- OldWaitcnt->getOperand(0).setImm(Enc);
- if (!OldWaitcnt->getParent())
- MI.getParent()->insert(MI, OldWaitcnt);
-
- LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
- << "Old Instr: " << MI << '\n'
- << "New Instr: " << *OldWaitcnt << '\n');
- } else {
- auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
- MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(Enc);
- TrackedWaitcntSet.insert(SWaitInst);
-
- LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
- << "Old Instr: " << MI << '\n'
- << "New Instr: " << *SWaitInst << '\n');
- }
- }
+ ScoreBrackets.applyWaitcnt(Wait);
- if (CntVal[EXP_CNT] == 0) {
- ScoreBrackets->setMixedExpTypes(false);
- }
- }
+ AMDGPU::Waitcnt OldWait;
+ if (OldWaitcntInstr) {
+ OldWait =
+ AMDGPU::decodeWaitcnt(IV, OldWaitcntInstr->getOperand(0).getImm());
}
-}
+ if (OldWait.dominates(Wait))
+ return false;
-void SIInsertWaitcnts::insertWaitcntBeforeCF(MachineBasicBlock &MBB,
- MachineInstr *Waitcnt) {
- if (MBB.empty()) {
- MBB.push_back(Waitcnt);
- return;
- }
+ if (OldWaitcntInstr && !TrackedWaitcntSet.count(OldWaitcntInstr))
+ Wait = Wait.combined(OldWait);
- MachineBasicBlock::iterator It = MBB.end();
- MachineInstr *MI = &*(--It);
- if (MI->isBranch()) {
- MBB.insert(It, Waitcnt);
+ unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
+ if (OldWaitcntInstr) {
+ OldWaitcntInstr->getOperand(0).setImm(Enc);
+
+ LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
+ << "Old Instr: " << MI << '\n'
+ << "New Instr: " << *OldWaitcntInstr << '\n');
} else {
- MBB.push_back(Waitcnt);
+ auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
+ MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(Enc);
+ TrackedWaitcntSet.insert(SWaitInst);
+
+ LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
+ << "Old Instr: " << MI << '\n'
+ << "New Instr: " << *SWaitInst << '\n');
}
+
+ return true;
}
// This is a flat memory operation. Check to see if it has memory
@@ -1305,15 +1079,15 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
for (const MachineMemOperand *Memop : MI.memoperands()) {
unsigned AS = Memop->getAddrSpace();
- if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS)
+ if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS)
return true;
}
return false;
}
-void SIInsertWaitcnts::updateEventWaitcntAfter(
- MachineInstr &Inst, BlockWaitcntBrackets *ScoreBrackets) {
+void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
+ WaitcntBrackets *ScoreBrackets) {
// Now look at the instruction opcode. If it is a memory access
// instruction, update the upper-bound of the appropriate counter's
// bracket and the destination operand scores.
@@ -1379,342 +1153,124 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(
}
}
-// Merge the score brackets of the Block's predecessors;
-// this merged score bracket is used when adding waitcnts to the Block
-void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
- BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
- int32_t MaxPending[NUM_INST_CNTS] = {0};
- int32_t MaxFlat[NUM_INST_CNTS] = {0};
- bool MixedExpTypes = false;
-
- // For single basic block loops, we need to retain the Block's
- // score bracket to have accurate Pred info. So, make a copy of Block's
- // score bracket, clear() it (which retains several important bits of info),
- // populate, and then replace en masse. For non-single basic block loops,
- // just clear Block's current score bracket and repopulate in-place.
- bool IsSelfPred;
- std::unique_ptr<BlockWaitcntBrackets> S;
-
- IsSelfPred = (std::find(Block.pred_begin(), Block.pred_end(), &Block))
- != Block.pred_end();
- if (IsSelfPred) {
- S = llvm::make_unique<BlockWaitcntBrackets>(*ScoreBrackets);
- ScoreBrackets = S.get();
- }
-
- ScoreBrackets->clear();
-
- // See if there are any uninitialized predecessors. If so, emit an
- // s_waitcnt 0 at the beginning of the block.
- for (MachineBasicBlock *Pred : Block.predecessors()) {
- BlockWaitcntBrackets *PredScoreBrackets =
- BlockWaitcntBracketsMap[Pred].get();
- bool Visited = BlockVisitedSet.count(Pred);
- if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
- continue;
- }
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int span =
- PredScoreBrackets->getScoreUB(T) - PredScoreBrackets->getScoreLB(T);
- MaxPending[T] = std::max(MaxPending[T], span);
- span =
- PredScoreBrackets->pendingFlat(T) - PredScoreBrackets->getScoreLB(T);
- MaxFlat[T] = std::max(MaxFlat[T], span);
- }
-
- MixedExpTypes |= PredScoreBrackets->mixedExpTypes();
- }
-
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- // Also handle kills for exit block.
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int Span = KillWaitBrackets[I]->getScoreUB(T) -
- KillWaitBrackets[I]->getScoreLB(T);
- MaxPending[T] = std::max(MaxPending[T], Span);
- Span = KillWaitBrackets[I]->pendingFlat(T) -
- KillWaitBrackets[I]->getScoreLB(T);
- MaxFlat[T] = std::max(MaxFlat[T], Span);
- }
-
- MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes();
- }
- }
-
- // Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK.
- for (MachineBasicBlock *Pred : Block.predecessors()) {
- BlockWaitcntBrackets *PredScoreBrackets =
- BlockWaitcntBracketsMap[Pred].get();
- bool Visited = BlockVisitedSet.count(Pred);
- if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
- continue;
- }
-
- int GDSSpan = PredScoreBrackets->getEventUB(GDS_GPR_LOCK) -
- PredScoreBrackets->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
- int EXPSpan = PredScoreBrackets->getEventUB(EXP_GPR_LOCK) -
- PredScoreBrackets->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
- }
-
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) -
- KillWaitBrackets[I]->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
- int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) -
- KillWaitBrackets[I]->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
- }
- }
-
-#if 0
- // LC does not (unlike) add a waitcnt at beginning. Leaving it as marker.
- // TODO: how does LC distinguish between function entry and main entry?
- // If this is the entry to a function, force a wait.
- MachineBasicBlock &Entry = Block.getParent()->front();
- if (Entry.getNumber() == Block.getNumber()) {
- ScoreBrackets->setWaitAtBeginning();
- return;
- }
-#endif
-
- // Now set the current Block's brackets to the largest ending bracket.
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- ScoreBrackets->setScoreUB(T, MaxPending[T]);
- ScoreBrackets->setScoreLB(T, 0);
- ScoreBrackets->setLastFlat(T, MaxFlat[T]);
- }
-
- ScoreBrackets->setMixedExpTypes(MixedExpTypes);
-
- // Set the register scoreboard.
- for (MachineBasicBlock *Pred : Block.predecessors()) {
- if (!BlockVisitedSet.count(Pred)) {
- continue;
- }
-
- BlockWaitcntBrackets *PredScoreBrackets =
- BlockWaitcntBracketsMap[Pred].get();
-
- // Now merge the gpr_reg_score information
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int PredLB = PredScoreBrackets->getScoreLB(T);
- int PredUB = PredScoreBrackets->getScoreUB(T);
- if (PredLB < PredUB) {
- int PredScale = MaxPending[T] - PredUB;
- // Merge vgpr scores.
- for (int J = 0; J <= PredScoreBrackets->getMaxVGPR(); J++) {
- int PredRegScore = PredScoreBrackets->getRegScore(J, T);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
- }
- // Also need to merge sgpr scores for lgkm_cnt.
- if (T == LGKM_CNT) {
- for (int J = 0; J <= PredScoreBrackets->getMaxSGPR(); J++) {
- int PredRegScore =
- PredScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J + NUM_ALL_VGPRS, LGKM_CNT,
- std::max(
- ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
- NewRegScore));
- }
- }
- }
- }
-
- // Also merge the WaitEvent information.
- ForAllWaitEventType(W) {
- enum InstCounterType T = PredScoreBrackets->eventCounter(W);
- int PredEventUB = PredScoreBrackets->getEventUB(W);
- if (PredEventUB > PredScoreBrackets->getScoreLB(T)) {
- int NewEventUB =
- MaxPending[T] + PredEventUB - PredScoreBrackets->getScoreUB(T);
- if (NewEventUB > 0) {
- ScoreBrackets->setEventUB(
- W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
- }
- }
- }
- }
-
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- // Set the register scoreboard.
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- // Now merge the gpr_reg_score information.
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int PredLB = KillWaitBrackets[I]->getScoreLB(T);
- int PredUB = KillWaitBrackets[I]->getScoreUB(T);
- if (PredLB < PredUB) {
- int PredScale = MaxPending[T] - PredUB;
- // Merge vgpr scores.
- for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) {
- int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
- }
- // Also need to merge sgpr scores for lgkm_cnt.
- if (T == LGKM_CNT) {
- for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) {
- int PredRegScore =
- KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J + NUM_ALL_VGPRS, LGKM_CNT,
- std::max(
- ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
- NewRegScore));
- }
- }
- }
- }
-
- // Also merge the WaitEvent information.
- ForAllWaitEventType(W) {
- enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W);
- int PredEventUB = KillWaitBrackets[I]->getEventUB(W);
- if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) {
- int NewEventUB =
- MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T);
- if (NewEventUB > 0) {
- ScoreBrackets->setEventUB(
- W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
- }
- }
- }
- }
- }
-
- // Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the
- // sequencing predecessors, because changes to EXEC require waitcnts due to
- // the delayed nature of these operations.
- for (MachineBasicBlock *Pred : Block.predecessors()) {
- if (!BlockVisitedSet.count(Pred)) {
- continue;
- }
+bool WaitcntBrackets::mergeScore(const MergeInfo &M, uint32_t &Score,
+ uint32_t OtherScore) {
+ uint32_t MyShifted = Score <= M.OldLB ? 0 : Score + M.MyShift;
+ uint32_t OtherShifted =
+ OtherScore <= M.OtherLB ? 0 : OtherScore + M.OtherShift;
+ Score = std::max(MyShifted, OtherShifted);
+ return OtherShifted > MyShifted;
+}
- BlockWaitcntBrackets *PredScoreBrackets =
- BlockWaitcntBracketsMap[Pred].get();
-
- int pred_gds_ub = PredScoreBrackets->getEventUB(GDS_GPR_LOCK);
- if (pred_gds_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
- int new_gds_ub = MaxPending[EXP_CNT] + pred_gds_ub -
- PredScoreBrackets->getScoreUB(EXP_CNT);
- if (new_gds_ub > 0) {
- ScoreBrackets->setEventUB(
- GDS_GPR_LOCK,
- std::max(ScoreBrackets->getEventUB(GDS_GPR_LOCK), new_gds_ub));
- }
- }
- int pred_exp_ub = PredScoreBrackets->getEventUB(EXP_GPR_LOCK);
- if (pred_exp_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
- int new_exp_ub = MaxPending[EXP_CNT] + pred_exp_ub -
- PredScoreBrackets->getScoreUB(EXP_CNT);
- if (new_exp_ub > 0) {
- ScoreBrackets->setEventUB(
- EXP_GPR_LOCK,
- std::max(ScoreBrackets->getEventUB(EXP_GPR_LOCK), new_exp_ub));
+/// Merge the pending events and associater score brackets of \p Other into
+/// this brackets status.
+///
+/// Returns whether the merge resulted in a change that requires tighter waits
+/// (i.e. the merged brackets strictly dominate the original brackets).
+bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
+ bool StrictDom = false;
+
+ for (auto T : inst_counter_types()) {
+ // Merge event flags for this counter
+ const bool OldOutOfOrder = counterOutOfOrder(T);
+ const uint32_t OldEvents = PendingEvents & WaitEventMaskForInst[T];
+ const uint32_t OtherEvents = Other.PendingEvents & WaitEventMaskForInst[T];
+ if (OtherEvents & ~OldEvents)
+ StrictDom = true;
+ if (Other.MixedPendingEvents[T] ||
+ (OldEvents && OtherEvents && OldEvents != OtherEvents))
+ MixedPendingEvents[T] = true;
+ PendingEvents |= OtherEvents;
+
+ // Merge scores for this counter
+ const uint32_t MyPending = ScoreUBs[T] - ScoreLBs[T];
+ const uint32_t OtherPending = Other.ScoreUBs[T] - Other.ScoreLBs[T];
+ MergeInfo M;
+ M.OldLB = ScoreLBs[T];
+ M.OtherLB = Other.ScoreLBs[T];
+ M.MyShift = OtherPending > MyPending ? OtherPending - MyPending : 0;
+ M.OtherShift = ScoreUBs[T] - Other.ScoreUBs[T] + M.MyShift;
+
+ const uint32_t NewUB = ScoreUBs[T] + M.MyShift;
+ if (NewUB < ScoreUBs[T])
+ report_fatal_error("waitcnt score overflow");
+ ScoreUBs[T] = NewUB;
+ ScoreLBs[T] = std::min(M.OldLB + M.MyShift, M.OtherLB + M.OtherShift);
+
+ StrictDom |= mergeScore(M, LastFlat[T], Other.LastFlat[T]);
+
+ bool RegStrictDom = false;
+ for (int J = 0, E = std::max(getMaxVGPR(), Other.getMaxVGPR()) + 1; J != E;
+ J++) {
+ RegStrictDom |= mergeScore(M, VgprScores[T][J], Other.VgprScores[T][J]);
+ }
+
+ if (T == LGKM_CNT) {
+ for (int J = 0, E = std::max(getMaxSGPR(), Other.getMaxSGPR()) + 1;
+ J != E; J++) {
+ RegStrictDom |= mergeScore(M, SgprScores[J], Other.SgprScores[J]);
}
}
- }
- // if a single block loop, update the score brackets. Not needed for other
- // blocks, as we did this in-place
- if (IsSelfPred) {
- BlockWaitcntBracketsMap[&Block] = llvm::make_unique<BlockWaitcntBrackets>(*ScoreBrackets);
+ if (RegStrictDom && !OldOutOfOrder)
+ StrictDom = true;
}
-}
-/// Return true if the given basic block is a "bottom" block of a loop.
-/// This works even if the loop is discontiguous. This also handles
-/// multiple back-edges for the same "header" block of a loop.
-bool SIInsertWaitcnts::isLoopBottom(const MachineLoop *Loop,
- const MachineBasicBlock *Block) {
- for (MachineBasicBlock *MBB : Loop->blocks()) {
- if (MBB == Block && MBB->isSuccessor(Loop->getHeader())) {
- return true;
- }
- }
- return false;
-}
+ VgprUB = std::max(getMaxVGPR(), Other.getMaxVGPR());
+ SgprUB = std::max(getMaxSGPR(), Other.getMaxSGPR());
-/// Count the number of "bottom" basic blocks of a loop.
-unsigned SIInsertWaitcnts::countNumBottomBlocks(const MachineLoop *Loop) {
- unsigned Count = 0;
- for (MachineBasicBlock *MBB : Loop->blocks()) {
- if (MBB->isSuccessor(Loop->getHeader())) {
- Count++;
- }
- }
- return Count;
+ return StrictDom;
}
// Generate s_waitcnt instructions where needed.
-void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
- MachineBasicBlock &Block) {
- // Initialize the state information.
- mergeInputScoreBrackets(Block);
-
- BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
+bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
+ MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets) {
+ bool Modified = false;
LLVM_DEBUG({
dbgs() << "*** Block" << Block.getNumber() << " ***";
- ScoreBrackets->dump();
+ ScoreBrackets.dump();
});
// Walk over the instructions.
+ MachineInstr *OldWaitcntInstr = nullptr;
+
for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
Iter != E;) {
MachineInstr &Inst = *Iter;
+
// Remove any previously existing waitcnts.
if (Inst.getOpcode() == AMDGPU::S_WAITCNT) {
- // Leave pre-existing waitcnts, but note their existence via setWaitcnt.
- // Remove the waitcnt-pass-generated waitcnts; the pass will add them back
- // as needed.
- if (!TrackedWaitcntSet.count(&Inst))
- ++Iter;
- else {
- ++Iter;
- Inst.removeFromParent();
+ if (OldWaitcntInstr) {
+ if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
+ TrackedWaitcntSet.erase(OldWaitcntInstr);
+ OldWaitcntInstr->eraseFromParent();
+ OldWaitcntInstr = nullptr;
+ } else if (!TrackedWaitcntSet.count(&Inst)) {
+ // Two successive s_waitcnt's, both of which are pre-existing and
+ // are therefore preserved.
+ int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
+ ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
+ } else {
+ ++Iter;
+ Inst.eraseFromParent();
+ Modified = true;
+ continue;
+ }
}
- ScoreBrackets->setWaitcnt(&Inst);
- continue;
- }
- // Kill instructions generate a conditional branch to the endmain block.
- // Merge the current waitcnt state into the endmain block information.
- // TODO: Are there other flavors of KILL instruction?
- if (Inst.getOpcode() == AMDGPU::KILL) {
- addKillWaitBracket(ScoreBrackets);
+ OldWaitcntInstr = &Inst;
+ ++Iter;
+ continue;
}
bool VCCZBugWorkAround = false;
if (readsVCCZ(Inst) &&
(!VCCZBugHandledSet.count(&Inst))) {
- if (ScoreBrackets->getScoreLB(LGKM_CNT) <
- ScoreBrackets->getScoreUB(LGKM_CNT) &&
- ScoreBrackets->hasPendingSMEM()) {
+ if (ScoreBrackets.getScoreLB(LGKM_CNT) <
+ ScoreBrackets.getScoreUB(LGKM_CNT) &&
+ ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
VCCZBugWorkAround = true;
}
@@ -1722,9 +1278,10 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
// Generate an s_waitcnt instruction to be placed before
// cur_Inst, if needed.
- generateWaitcntInstBefore(Inst, ScoreBrackets);
+ Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
+ OldWaitcntInstr = nullptr;
- updateEventWaitcntAfter(Inst, ScoreBrackets);
+ updateEventWaitcntAfter(Inst, &ScoreBrackets);
#if 0 // TODO: implement resource type check controlled by options with ub = LB.
// If this instruction generates a S_SETVSKIP because it is an
@@ -1737,11 +1294,9 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
#endif
- ScoreBrackets->clearWaitcnt();
-
LLVM_DEBUG({
Inst.print(dbgs());
- ScoreBrackets->dump();
+ ScoreBrackets.dump();
});
// Check to see if this is a GWS instruction. If so, and if this is CI or
@@ -1753,10 +1308,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
// TODO: && context->target_info->GwsRequiresMemViolTest() ) {
- ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
- ScoreBrackets->updateByWait(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
- ScoreBrackets->updateByWait(LGKM_CNT,
- ScoreBrackets->getScoreUB(LGKM_CNT));
+ ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZero());
}
// TODO: Remove this work-around after fixing the scheduler and enable the
@@ -1769,71 +1321,13 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
AMDGPU::VCC)
.addReg(AMDGPU::VCC);
VCCZBugHandledSet.insert(&Inst);
+ Modified = true;
}
++Iter;
}
- // Check if we need to force convergence at loop footer.
- MachineLoop *ContainingLoop = MLI->getLoopFor(&Block);
- if (ContainingLoop && isLoopBottom(ContainingLoop, &Block)) {
- LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
- WaitcntData->print();
- LLVM_DEBUG(dbgs() << '\n';);
-
- // The iterative waitcnt insertion algorithm aims for optimal waitcnt
- // placement, but doesn't guarantee convergence for a loop. Each
- // loop should take at most (n+1) iterations for it to converge naturally,
- // where n is the number of bottom blocks. If this threshold is reached and
- // the result hasn't converged, then we force convergence by inserting
- // a s_waitcnt at the end of loop footer.
- if (WaitcntData->getIterCnt() > (countNumBottomBlocks(ContainingLoop) + 1)) {
- // To ensure convergence, need to make wait events at loop footer be no
- // more than those from the previous iteration.
- // As a simplification, instead of tracking individual scores and
- // generating the precise wait count, just wait on 0.
- bool HasPending = false;
- MachineInstr *SWaitInst = WaitcntData->getWaitcnt();
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
- ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
- HasPending = true;
- break;
- }
- }
-
- if (HasPending) {
- if (!SWaitInst) {
- SWaitInst = BuildMI(Block, Block.getFirstNonPHI(),
- DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(0);
- TrackedWaitcntSet.insert(SWaitInst);
-#if 0 // TODO: Format the debug output
- OutputTransformBanner("insertWaitcntInBlock",0,"Create:",context);
- OutputTransformAdd(SWaitInst, context);
-#endif
- }
-#if 0 // TODO: ??
- _DEV( REPORTED_STATS->force_waitcnt_converge = 1; )
-#endif
- }
-
- if (SWaitInst) {
- LLVM_DEBUG({
- SWaitInst->print(dbgs());
- dbgs() << "\nAdjusted score board:";
- ScoreBrackets->dump();
- });
-
- // Add this waitcnt to the block. It is either newly created or
- // created in previous iterations and added back since block traversal
- // always removes waitcnts.
- insertWaitcntBeforeCF(Block, SWaitInst);
- WaitcntData->setWaitcnt(SWaitInst);
- }
- }
- }
+ return Modified;
}
bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
@@ -1841,14 +1335,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
- MLI = &getAnalysis<MachineLoopInfo>();
- IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
+ IV = AMDGPU::getIsaVersion(ST->getCPU());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- AMDGPUASI = ST->getAMDGPUAS();
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1))
+ for (auto T : inst_counter_types())
ForceEmitWaitcnt[T] = false;
HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
@@ -1868,93 +1359,70 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
TrackedWaitcntSet.clear();
- BlockVisitedSet.clear();
VCCZBugHandledSet.clear();
- LoopWaitcntDataMap.clear();
- BlockWaitcntProcessedSet.clear();
+ RpotIdxMap.clear();
+ BlockInfos.clear();
+
+ // Keep iterating over the blocks in reverse post order, inserting and
+ // updating s_waitcnt where needed, until a fix point is reached.
+ for (MachineBasicBlock *MBB :
+ ReversePostOrderTraversal<MachineFunction *>(&MF)) {
+ RpotIdxMap[MBB] = BlockInfos.size();
+ BlockInfos.emplace_back(MBB);
+ }
- // Walk over the blocks in reverse post-dominator order, inserting
- // s_waitcnt where needed.
- ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ std::unique_ptr<WaitcntBrackets> Brackets;
bool Modified = false;
- for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
- I = RPOT.begin(),
- E = RPOT.end(), J = RPOT.begin();
- I != E;) {
- MachineBasicBlock &MBB = **I;
-
- BlockVisitedSet.insert(&MBB);
-
- BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
- if (!ScoreBrackets) {
- BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>(ST);
- ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
- }
- ScoreBrackets->setPostOrder(MBB.getNumber());
- MachineLoop *ContainingLoop = MLI->getLoopFor(&MBB);
- if (ContainingLoop && LoopWaitcntDataMap[ContainingLoop] == nullptr)
- LoopWaitcntDataMap[ContainingLoop] = llvm::make_unique<LoopWaitcntData>();
-
- // If we are walking into the block from before the loop, then guarantee
- // at least 1 re-walk over the loop to propagate the information, even if
- // no S_WAITCNT instructions were generated.
- if (ContainingLoop && ContainingLoop->getHeader() == &MBB) {
- unsigned Count = countNumBottomBlocks(ContainingLoop);
-
- // If the loop has multiple back-edges, and so more than one "bottom"
- // basic block, we have to guarantee a re-walk over every blocks.
- if ((std::count(BlockWaitcntProcessedSet.begin(),
- BlockWaitcntProcessedSet.end(), &MBB) < (int)Count)) {
- BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
- LLVM_DEBUG(dbgs() << "set-revisit1: Block"
- << ContainingLoop->getHeader()->getNumber() << '\n';);
+ bool Repeat;
+ do {
+ Repeat = false;
+
+ for (BlockInfo &BI : BlockInfos) {
+ if (!BI.Dirty)
+ continue;
+
+ unsigned Idx = std::distance(&*BlockInfos.begin(), &BI);
+
+ if (BI.Incoming) {
+ if (!Brackets)
+ Brackets = llvm::make_unique<WaitcntBrackets>(*BI.Incoming);
+ else
+ *Brackets = *BI.Incoming;
+ } else {
+ if (!Brackets)
+ Brackets = llvm::make_unique<WaitcntBrackets>(ST);
+ else
+ Brackets->clear();
}
- }
- // Walk over the instructions.
- insertWaitcntInBlock(MF, MBB);
-
- // Record that waitcnts have been processed at least once for this block.
- BlockWaitcntProcessedSet.push_back(&MBB);
-
- // See if we want to revisit the loop. If a loop has multiple back-edges,
- // we shouldn't revisit the same "bottom" basic block.
- if (ContainingLoop && isLoopBottom(ContainingLoop, &MBB) &&
- std::count(BlockWaitcntProcessedSet.begin(),
- BlockWaitcntProcessedSet.end(), &MBB) == 1) {
- MachineBasicBlock *EntryBB = ContainingLoop->getHeader();
- BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get();
- if (EntrySB && EntrySB->getRevisitLoop()) {
- EntrySB->setRevisitLoop(false);
- J = I;
- int32_t PostOrder = EntrySB->getPostOrder();
- // TODO: Avoid this loop. Find another way to set I.
- for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
- X = RPOT.begin(),
- Y = RPOT.end();
- X != Y; ++X) {
- MachineBasicBlock &MBBX = **X;
- if (MBBX.getNumber() == PostOrder) {
- I = X;
- break;
+ Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets);
+ BI.Dirty = false;
+
+ if (Brackets->hasPending()) {
+ BlockInfo *MoveBracketsToSucc = nullptr;
+ for (MachineBasicBlock *Succ : BI.MBB->successors()) {
+ unsigned SuccIdx = RpotIdxMap[Succ];
+ BlockInfo &SuccBI = BlockInfos[SuccIdx];
+ if (!SuccBI.Incoming) {
+ SuccBI.Dirty = true;
+ if (SuccIdx <= Idx)
+ Repeat = true;
+ if (!MoveBracketsToSucc) {
+ MoveBracketsToSucc = &SuccBI;
+ } else {
+ SuccBI.Incoming = llvm::make_unique<WaitcntBrackets>(*Brackets);
+ }
+ } else if (SuccBI.Incoming->merge(*Brackets)) {
+ SuccBI.Dirty = true;
+ if (SuccIdx <= Idx)
+ Repeat = true;
}
}
- LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
- WaitcntData->incIterCnt();
- LLVM_DEBUG(dbgs() << "revisit: Block" << EntryBB->getNumber() << '\n';);
- continue;
- } else {
- LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
- // Loop converged, reset iteration count. If this loop gets revisited,
- // it must be from an outer loop, the counter will restart, this will
- // ensure we don't force convergence on such revisits.
- WaitcntData->resetIterCnt();
+ if (MoveBracketsToSucc)
+ MoveBracketsToSucc->Incoming = std::move(Brackets);
}
}
-
- J = I;
- ++I;
- }
+ } while (Repeat);
SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index b73d30940fc3..65ffc27b8b60 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -121,6 +121,10 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that this is a D16 buffer instruction.
field bit D16Buf = 0;
+ // This bit indicates that this uses the floating point double precision
+ // rounding mode flags
+ field bit FPDPRounding = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -178,6 +182,8 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{50} = D16Buf;
+ let TSFlags{51} = FPDPRounding;
+
let SchedRW = [Write32Bit];
field bits<1> DisableSIDecoder = 0;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f3745382a6f4..2370d5fa7b27 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -264,9 +265,10 @@ static bool isStride64(unsigned Opc) {
}
}
-bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+ MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
unsigned Opc = LdSt.getOpcode();
if (isDS(LdSt)) {
@@ -274,11 +276,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
getNamedOperand(LdSt, AMDGPU::OpName::offset);
if (OffsetImm) {
// Normal, single offset LDS instruction.
- const MachineOperand *AddrReg =
- getNamedOperand(LdSt, AMDGPU::OpName::addr);
-
- BaseReg = AddrReg->getReg();
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Offset = OffsetImm->getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
@@ -309,10 +310,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
if (isStride64(Opc))
EltSize *= 64;
- const MachineOperand *AddrReg =
- getNamedOperand(LdSt, AMDGPU::OpName::addr);
- BaseReg = AddrReg->getReg();
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Offset = EltSize * Offset0;
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
@@ -324,19 +325,20 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
if (SOffset && SOffset->isReg())
return false;
- const MachineOperand *AddrReg =
- getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (!AddrReg)
return false;
const MachineOperand *OffsetImm =
getNamedOperand(LdSt, AMDGPU::OpName::offset);
- BaseReg = AddrReg->getReg();
+ BaseOp = AddrReg;
Offset = OffsetImm->getImm();
if (SOffset) // soffset can be an inline immediate.
Offset += SOffset->getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
@@ -346,36 +348,46 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
if (!OffsetImm)
return false;
- const MachineOperand *SBaseReg =
- getNamedOperand(LdSt, AMDGPU::OpName::sbase);
- BaseReg = SBaseReg->getReg();
+ MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
+ BaseOp = SBaseReg;
Offset = OffsetImm->getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
if (isFLAT(LdSt)) {
- const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (VAddr) {
// Can't analyze 2 offsets.
if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
return false;
- BaseReg = VAddr->getReg();
+ BaseOp = VAddr;
} else {
// scratch instructions have either vaddr or saddr.
- BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg();
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
}
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
return false;
}
-static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
- const MachineInstr &MI2, unsigned BaseReg2) {
- if (BaseReg1 == BaseReg2)
+static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
+ const MachineOperand &BaseOp1,
+ const MachineInstr &MI2,
+ const MachineOperand &BaseOp2) {
+ // Support only base operands with base registers.
+ // Note: this could be extended to support FI operands.
+ if (!BaseOp1.isReg() || !BaseOp2.isReg())
+ return false;
+
+ if (BaseOp1.isIdenticalTo(BaseOp2))
return true;
if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
@@ -401,12 +413,13 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
return Base1 == Base2;
}
-bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
- unsigned BaseReg1,
- MachineInstr &SecondLdSt,
- unsigned BaseReg2,
+bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
+ MachineOperand &BaseOp2,
unsigned NumLoads) const {
- if (!memOpsHaveSameBasePtr(FirstLdSt, BaseReg1, SecondLdSt, BaseReg2))
+ MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ MachineInstr &SecondLdSt = *BaseOp2.getParent();
+
+ if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
return false;
const MachineOperand *FirstDst = nullptr;
@@ -863,7 +876,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const DebugLoc &DL = MBB.findDebugLoc(MI);
unsigned Size = FrameInfo.getObjectSize(FrameIndex);
unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
@@ -907,16 +920,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
- if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
- LLVMContext &Ctx = MF->getFunction().getContext();
- Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
- " spill register");
- BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
- .addReg(SrcReg);
-
- return;
- }
-
assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
@@ -972,9 +975,9 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const DebugLoc &DL = MBB.findDebugLoc(MI);
unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
unsigned Size = FrameInfo.getObjectSize(FrameIndex);
unsigned SpillSize = TRI->getSpillSize(*RC);
@@ -986,6 +989,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
PtrInfo, MachineMemOperand::MOLoad, Size, Align);
if (RI.isSGPRClass(RC)) {
+ MFI->setHasSpilledSGPRs();
+
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
@@ -1009,15 +1014,6 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
- LLVMContext &Ctx = MF->getFunction().getContext();
- Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
- " restore register");
- BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
-
- return;
- }
-
assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
@@ -1036,7 +1032,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const DebugLoc &DL = MBB.findDebugLoc(MI);
unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
unsigned WavefrontSize = ST.getWavefrontSize();
@@ -1044,7 +1040,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
if (!MFI->hasCalculatedTID()) {
MachineBasicBlock &Entry = MBB.getParent()->front();
MachineBasicBlock::iterator Insert = Entry.front();
- DebugLoc DL = Insert->getDebugLoc();
+ const DebugLoc &DL = Insert->getDebugLoc();
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
*MF);
@@ -1421,10 +1417,15 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// TargetInstrInfo::commuteInstruction uses it.
bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
unsigned &SrcOpIdx1) const {
- if (!MI.isCommutable())
+ return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
+}
+
+bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0,
+ unsigned &SrcOpIdx1) const {
+ if (!Desc.isCommutable())
return false;
- unsigned Opc = MI.getOpcode();
+ unsigned Opc = Desc.getOpcode();
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
return false;
@@ -1549,8 +1550,9 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// buzz;
RS->enterBasicBlockEnd(MBB);
- unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
- MachineBasicBlock::iterator(GetPC), 0);
+ unsigned Scav = RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_64RegClass,
+ MachineBasicBlock::iterator(GetPC), false, 0);
MRI.replaceRegWith(PCReg, Scav);
MRI.clearVirtRegs();
RS->setRegUsed(Scav);
@@ -1644,7 +1646,34 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
MachineBasicBlock::iterator I = MBB.getFirstTerminator();
- if (I == MBB.end())
+ auto E = MBB.end();
+ if (I == E)
+ return false;
+
+ // Skip over the instructions that are artificially terminators for special
+ // exec management.
+ while (I != E && !I->isBranch() && !I->isReturn() &&
+ I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
+ switch (I->getOpcode()) {
+ case AMDGPU::SI_MASK_BRANCH:
+ case AMDGPU::S_MOV_B64_term:
+ case AMDGPU::S_XOR_B64_term:
+ case AMDGPU::S_ANDN2_B64_term:
+ break;
+ case AMDGPU::SI_IF:
+ case AMDGPU::SI_ELSE:
+ case AMDGPU::SI_KILL_I1_TERMINATOR:
+ case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
+ // FIXME: It's messy that these need to be considered here at all.
+ return true;
+ default:
+ llvm_unreachable("unexpected non-branch terminator inst");
+ }
+
+ ++I;
+ }
+
+ if (I == E)
return false;
if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
@@ -1933,20 +1962,20 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
}
unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
- PseudoSourceValue::PSVKind Kind) const {
+ unsigned Kind) const {
switch(Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
- return ST.getAMDGPUAS().PRIVATE_ADDRESS;
+ return AMDGPUAS::PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
- return ST.getAMDGPUAS().CONSTANT_ADDRESS;
+ return AMDGPUAS::CONSTANT_ADDRESS;
}
- return ST.getAMDGPUAS().FLAT_ADDRESS;
+ return AMDGPUAS::FLAT_ADDRESS;
}
static void removeModOperands(MachineInstr &MI) {
@@ -2066,12 +2095,40 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Src2->isReg() && Src2->getReg() == Reg) {
// Not allowed to use constant bus for another operand.
// We can however allow an inline immediate as src0.
- if (!Src0->isImm() &&
- (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
- return false;
+ bool Src0Inlined = false;
+ if (Src0->isReg()) {
+ // Try to inline constant if possible.
+ // If the Def moves immediate and the use is single
+ // We are saving VGPR here.
+ MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
+ if (Def && Def->isMoveImmediate() &&
+ isInlineConstant(Def->getOperand(1)) &&
+ MRI->hasOneUse(Src0->getReg())) {
+ Src0->ChangeToImmediate(Def->getOperand(1).getImm());
+ Src0Inlined = true;
+ } else if ((RI.isPhysicalRegister(Src0->getReg()) &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
+ (RI.isVirtualRegister(Src0->getReg()) &&
+ RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+ return false;
+ // VGPR is okay as Src0 - fallthrough
+ }
- if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
- return false;
+ if (Src1->isReg() && !Src0Inlined ) {
+ // We have one slot for inlinable constant so far - try to fill it
+ MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
+ if (Def && Def->isMoveImmediate() &&
+ isInlineConstant(Def->getOperand(1)) &&
+ MRI->hasOneUse(Src1->getReg()) &&
+ commuteInstruction(UseMI)) {
+ Src0->ChangeToImmediate(Def->getOperand(1).getImm());
+ } else if ((RI.isPhysicalRegister(Src1->getReg()) &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
+ (RI.isVirtualRegister(Src1->getReg()) &&
+ RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+ return false;
+ // VGPR is okay as Src1 - fallthrough
+ }
const int64_t Imm = ImmOp->getImm();
@@ -2117,11 +2174,13 @@ static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
MachineInstr &MIb) const {
- unsigned BaseReg0, BaseReg1;
+ MachineOperand *BaseOp0, *BaseOp1;
int64_t Offset0, Offset1;
- if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
- getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
+ if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
+ getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
+ if (!BaseOp0->isIdenticalTo(*BaseOp1))
+ return false;
if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
// FIXME: Handle ds_read2 / ds_write2.
@@ -2129,8 +2188,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
}
unsigned Width0 = (*MIa.memoperands_begin())->getSize();
unsigned Width1 = (*MIb.memoperands_begin())->getSize();
- if (BaseReg0 == BaseReg1 &&
- offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
+ if (offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
return true;
}
}
@@ -2398,8 +2456,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
int32_t Trunc = static_cast<int32_t>(Imm);
- return Trunc == Imm &&
- AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
+ return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
}
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -2523,6 +2580,115 @@ bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
hasModifiersSet(MI, AMDGPU::OpName::omod);
}
+bool SIInstrInfo::canShrink(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+ // Can't shrink instruction with three operands.
+ // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+ // a special case for it. It can only be shrunk if the third operand
+ // is vcc. We should handle this the same way we handle vopc, by addding
+ // a register allocation hint pre-regalloc and then do the shrinking
+ // post-regalloc.
+ if (Src2) {
+ switch (MI.getOpcode()) {
+ default: return false;
+
+ case AMDGPU::V_ADDC_U32_e64:
+ case AMDGPU::V_SUBB_U32_e64:
+ case AMDGPU::V_SUBBREV_U32_e64: {
+ const MachineOperand *Src1
+ = getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()))
+ return false;
+ // Additional verification is needed for sdst/src2.
+ return true;
+ }
+ case AMDGPU::V_MAC_F32_e64:
+ case AMDGPU::V_MAC_F16_e64:
+ case AMDGPU::V_FMAC_F32_e64:
+ if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
+ return false;
+ break;
+
+ case AMDGPU::V_CNDMASK_B32_e64:
+ break;
+ }
+ }
+
+ const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
+ return false;
+
+ // We don't need to check src0, all input types are legal, so just make sure
+ // src0 isn't using any modifiers.
+ if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
+ return false;
+
+ // Can it be shrunk to a valid 32 bit opcode?
+ if (!hasVALU32BitEncoding(MI.getOpcode()))
+ return false;
+
+ // Check output modifiers
+ return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+ !hasModifiersSet(MI, AMDGPU::OpName::clamp);
+}
+
+// Set VCC operand with all flags from \p Orig, except for setting it as
+// implicit.
+static void copyFlagsToImplicitVCC(MachineInstr &MI,
+ const MachineOperand &Orig) {
+
+ for (MachineOperand &Use : MI.implicit_operands()) {
+ if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
+ Use.setIsUndef(Orig.isUndef());
+ Use.setIsKill(Orig.isKill());
+ return;
+ }
+ }
+}
+
+MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
+ unsigned Op32) const {
+ MachineBasicBlock *MBB = MI.getParent();;
+ MachineInstrBuilder Inst32 =
+ BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
+
+ // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
+ // For VOPC instructions, this is replaced by an implicit def of vcc.
+ int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
+ if (Op32DstIdx != -1) {
+ // dst
+ Inst32.add(MI.getOperand(0));
+ } else {
+ assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+ "Unexpected case");
+ }
+
+ Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
+
+ const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1)
+ Inst32.add(*Src1);
+
+ const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+
+ if (Src2) {
+ int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+ if (Op32Src2Idx != -1) {
+ Inst32.add(*Src2);
+ } else {
+ // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+ // replaced with an implicit read of vcc. This was already added
+ // during the initial BuildMI, so find it to preserve the flags.
+ copyFlagsToImplicitVCC(*Inst32, *Src2);
+ }
+ }
+
+ return Inst32;
+}
+
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
const MachineOperand &MO,
const MCOperandInfo &OpInfo) const {
@@ -2806,6 +2972,42 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ // Verify MIMG
+ if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
+ // Ensure that the return type used is large enough for all the options
+ // being used TFE/LWE require an extra result register.
+ const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
+ if (DMask) {
+ uint64_t DMaskImm = DMask->getImm();
+ uint32_t RegCount =
+ isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm);
+ const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe);
+ const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe);
+ const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16);
+
+ // Adjust for packed 16 bit values
+ if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
+ RegCount >>= 1;
+
+ // Adjust if using LWE or TFE
+ if ((LWE && LWE->getImm()) || (TFE && TFE->getImm()))
+ RegCount += 1;
+
+ const uint32_t DstIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+ const MachineOperand &Dst = MI.getOperand(DstIdx);
+ if (Dst.isReg()) {
+ const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx);
+ uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
+ if (RegCount > DstSize) {
+ ErrInfo = "MIMG instruction returns too many registers for dst "
+ "register class";
+ return false;
+ }
+ }
+ }
+ }
+
// Verify VOP*. Ignore multiple sgpr operands on writelane.
if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32
&& (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI))) {
@@ -3001,6 +3203,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
+ case AMDGPU::S_XNOR_B32:
+ return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
@@ -3438,8 +3642,13 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
// pointer value is uniform.
MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
- unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
- SBase->setReg(SGPR);
+ unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
+ SBase->setReg(SGPR);
+ }
+ MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
+ if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
+ unsigned SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
+ SOff->setReg(SGPR);
}
}
@@ -3475,7 +3684,191 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
FoldImmediate(*Copy, *Def, OpReg, &MRI);
}
-void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
+// Emit the actual waterfall loop, executing the wrapped instruction for each
+// unique value of \p Rsrc across all lanes. In the best case we execute 1
+// iteration, in the worst case we execute 64 (once per lane).
+static void
+emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
+ MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
+ const DebugLoc &DL, MachineOperand &Rsrc) {
+ MachineBasicBlock::iterator I = LoopBB.begin();
+
+ unsigned VRsrc = Rsrc.getReg();
+ unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
+
+ unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+ // Beginning of the loop, read the next Rsrc variant.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
+ .addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
+ .addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
+ .addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
+ .addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
+
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
+ .addReg(SRsrcSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(SRsrcSub1)
+ .addImm(AMDGPU::sub1)
+ .addReg(SRsrcSub2)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcSub3)
+ .addImm(AMDGPU::sub3);
+
+ // Update Rsrc operand to use the SGPR Rsrc.
+ Rsrc.setReg(SRsrc);
+ Rsrc.setIsKill(true);
+
+ // Identify all lanes with identical Rsrc operands in their VGPRs.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
+ .addReg(SRsrc, 0, AMDGPU::sub0_sub1)
+ .addReg(VRsrc, 0, AMDGPU::sub0_sub1);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
+ .addReg(SRsrc, 0, AMDGPU::sub2_sub3)
+ .addReg(VRsrc, 0, AMDGPU::sub2_sub3);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
+ .addReg(CondReg0)
+ .addReg(CondReg1);
+
+ MRI.setSimpleHint(SaveExec, AndCond);
+
+ // Update EXEC to matching lanes, saving original to SaveExec.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
+ .addReg(AndCond, RegState::Kill);
+
+ // The original instruction is here; we insert the terminators after it.
+ I = LoopBB.end();
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(SaveExec);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
+}
+
+// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
+// with SGPRs by iterating over all unique values across all lanes.
+static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
+ MachineOperand &Rsrc, MachineDominatorTree *MDT) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineBasicBlock::iterator I(&MI);
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ // Save the EXEC mask
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
+ .addReg(AMDGPU::EXEC);
+
+ // Killed uses in the instruction we are waterfalling around will be
+ // incorrect due to the added control-flow.
+ for (auto &MO : MI.uses()) {
+ if (MO.isReg() && MO.isUse()) {
+ MRI.clearKillFlags(MO.getReg());
+ }
+ }
+
+ // To insert the loop we need to split the block. Move everything after this
+ // point to a new block, and insert a new empty block between the two.
+ MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+
+ MF.insert(MBBI, LoopBB);
+ MF.insert(MBBI, RemainderBB);
+
+ LoopBB->addSuccessor(LoopBB);
+ LoopBB->addSuccessor(RemainderBB);
+
+ // Move MI to the LoopBB, and the remainder of the block to RemainderBB.
+ MachineBasicBlock::iterator J = I++;
+ RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+ LoopBB->splice(LoopBB->begin(), &MBB, J);
+
+ MBB.addSuccessor(LoopBB);
+
+ // Update dominators. We know that MBB immediately dominates LoopBB, that
+ // LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
+ // dominates all of the successors transferred to it from MBB that MBB used
+ // to dominate.
+ if (MDT) {
+ MDT->addNewBlock(LoopBB, &MBB);
+ MDT->addNewBlock(RemainderBB, LoopBB);
+ for (auto &Succ : RemainderBB->successors()) {
+ if (MDT->dominates(&MBB, Succ)) {
+ MDT->changeImmediateDominator(Succ, RemainderBB);
+ }
+ }
+ }
+
+ emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc);
+
+ // Restore the EXEC mask
+ MachineBasicBlock::iterator First = RemainderBB->begin();
+ BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(SaveExec);
+}
+
+// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
+static std::tuple<unsigned, unsigned>
+extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Extract the ptr from the resource descriptor.
+ unsigned RsrcPtr =
+ TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
+ AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
+
+ // Create an empty resource descriptor
+ unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
+
+ // Zero64 = 0
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64)
+ .addImm(0);
+
+ // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
+ .addImm(RsrcDataFormat & 0xFFFFFFFF);
+
+ // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
+ .addImm(RsrcDataFormat >> 32);
+
+ // NewSRsrc = {Zero64, SRsrcFormat}
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
+
+ return std::make_tuple(RsrcPtr, NewSRsrc);
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr &MI,
+ MachineDominatorTree *MDT) const {
MachineFunction &MF = *MI.getParent()->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -3617,75 +4010,56 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
return;
}
- // Legalize MUBUF* instructions by converting to addr64 form.
- // FIXME: If we start using the non-addr64 instructions for compute, we
- // may need to legalize them as above. This especially applies to the
- // buffer_load_format_* variants and variants with idxen (or bothen).
- int SRsrcIdx =
+ // Legalize MUBUF* instructions.
+ int RsrcIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
- if (SRsrcIdx != -1) {
+ if (RsrcIdx != -1) {
// We have an MUBUF instruction
- MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
- unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
- if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
- RI.getRegClass(SRsrcRC))) {
+ MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
+ unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass;
+ if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
+ RI.getRegClass(RsrcRC))) {
// The operands are legal.
// FIXME: We may need to legalize operands besided srsrc.
return;
}
- MachineBasicBlock &MBB = *MI.getParent();
-
- // Extract the ptr from the resource descriptor.
- unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
+ // Legalize a VGPR Rsrc.
+ //
+ // If the instruction is _ADDR64, we can avoid a waterfall by extracting
+ // the base pointer from the VGPR Rsrc, adding it to the VAddr, then using
+ // a zero-value SRsrc.
+ //
+ // If the instruction is _OFFSET (both idxen and offen disabled), and we
+ // support ADDR64 instructions, we can convert to ADDR64 and do the same as
+ // above.
+ //
+ // Otherwise we are on non-ADDR64 hardware, and/or we have
+ // idxen/offen/bothen and we fall back to a waterfall loop.
- // Create an empty resource descriptor
- unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
- uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
-
- // Zero64 = 0
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
- .addImm(0);
-
- // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
- .addImm(RsrcDataFormat & 0xFFFFFFFF);
-
- // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
- .addImm(RsrcDataFormat >> 32);
-
- // NewSRsrc = {Zero64, SRsrcFormat}
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
- .addReg(Zero64)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SRsrcFormatLo)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcFormatHi)
- .addImm(AMDGPU::sub3);
+ MachineBasicBlock &MBB = *MI.getParent();
MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- if (VAddr) {
+ if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
// This is already an ADDR64 instruction so we need to add the pointer
// extracted from the resource descriptor to the current value of VAddr.
unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
+ unsigned RsrcPtr, NewSRsrc;
+ std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
+
+ // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
DebugLoc DL = MI.getDebugLoc();
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
- .addReg(SRsrcPtr, 0, AMDGPU::sub0)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
+ .addReg(RsrcPtr, 0, AMDGPU::sub0)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
- // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
+ // NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
- .addReg(SRsrcPtr, 0, AMDGPU::sub1)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
+ .addReg(RsrcPtr, 0, AMDGPU::sub1)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
// NewVaddr = {NewVaddrHi, NewVaddrLo}
BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
@@ -3693,13 +4067,20 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
.addImm(AMDGPU::sub0)
.addReg(NewVAddrHi)
.addImm(AMDGPU::sub1);
- } else {
+
+ VAddr->setReg(NewVAddr);
+ Rsrc->setReg(NewSRsrc);
+ } else if (!VAddr && ST.hasAddr64()) {
// This instructions is the _OFFSET variant, so we need to convert it to
// ADDR64.
assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
"FIXME: Need to emit flat atomics here");
+ unsigned RsrcPtr, NewSRsrc;
+ std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
+
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
@@ -3715,10 +4096,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
MachineInstrBuilder MIB =
BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
.add(*VData)
- .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
- // This will be replaced later
- // with the new value of vaddr.
- .add(*SRsrc)
+ .addReg(NewVAddr)
+ .addReg(NewSRsrc)
.add(*SOffset)
.add(*Offset);
@@ -3735,21 +4114,19 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
MIB.addImm(TFE->getImm());
}
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
Addr64 = MIB;
} else {
// Atomics with return.
Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
.add(*VData)
.add(*VDataIn)
- .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
- // This will be replaced later
- // with the new value of vaddr.
- .add(*SRsrc)
+ .addReg(NewVAddr)
+ .addReg(NewSRsrc)
.add(*SOffset)
.add(*Offset)
.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .cloneMemRefs(MI);
}
MI.removeFromParent();
@@ -3757,23 +4134,20 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
// NewVaddr = {NewVaddrHi, NewVaddrLo}
BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
NewVAddr)
- .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+ .addReg(RsrcPtr, 0, AMDGPU::sub0)
.addImm(AMDGPU::sub0)
- .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+ .addReg(RsrcPtr, 0, AMDGPU::sub1)
.addImm(AMDGPU::sub1);
-
- VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
- SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
+ } else {
+ // This is another variant; legalize Rsrc with waterfall loop from VGPRs
+ // to SGPRs.
+ loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT);
}
-
- // Update the instruction to use NewVaddr
- VAddr->setReg(NewVAddr);
- // Update the instruction to use NewSRsrc
- SRsrc->setReg(NewSRsrc);
}
}
-void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
+void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
+ MachineDominatorTree *MDT) const {
SetVectorType Worklist;
Worklist.insert(&TopInst);
@@ -3791,34 +4165,62 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
break;
case AMDGPU::S_ADD_U64_PSEUDO:
case AMDGPU::S_SUB_U64_PSEUDO:
- splitScalar64BitAddSub(Worklist, Inst);
+ splitScalar64BitAddSub(Worklist, Inst, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_ADD_I32:
case AMDGPU::S_SUB_I32:
// FIXME: The u32 versions currently selected use the carry.
- if (moveScalarAddSub(Worklist, Inst))
+ if (moveScalarAddSub(Worklist, Inst, MDT))
continue;
// Default handling
break;
case AMDGPU::S_AND_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_OR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_XOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_NAND_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_NOR_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_XNOR_B64:
+ if (ST.hasDLInsts())
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
+ else
+ splitScalar64BitXnor(Worklist, Inst, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_ANDN2_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_ORN2_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_NOT_B64:
- splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
Inst.eraseFromParent();
continue;
@@ -3899,90 +4301,31 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Inst.eraseFromParent();
continue;
- case AMDGPU::S_XNOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32);
+ case AMDGPU::S_NAND_B32:
+ splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
Inst.eraseFromParent();
continue;
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
- unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
- auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
- unsigned Offset = 0;
-
- // FIXME: This isn't safe because the addressing mode doesn't work
- // correctly if vaddr is negative.
- //
- // FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
- //
- // See if we can extract an immediate offset by recognizing one of these:
- // V_ADD_I32_e32 dst, imm, src1
- // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
- // V_ADD will be removed by "Remove dead machine instructions".
- if (Add &&
- (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
- Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
- static const unsigned SrcNames[2] = {
- AMDGPU::OpName::src0,
- AMDGPU::OpName::src1,
- };
-
- // Find a literal offset in one of source operands.
- for (int i = 0; i < 2; i++) {
- const MachineOperand *Src =
- getNamedOperand(*Add, SrcNames[i]);
-
- if (Src->isReg()) {
- auto Mov = MRI.getUniqueVRegDef(Src->getReg());
- if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
- Src = &Mov->getOperand(1);
- }
-
- if (Src) {
- if (Src->isImm())
- Offset = Src->getImm();
- else if (Src->isCImm())
- Offset = Src->getCImm()->getZExtValue();
- }
-
- if (Offset && isLegalMUBUFImmOffset(Offset)) {
- VAddr = getNamedOperand(*Add, SrcNames[!i]);
- break;
- }
-
- Offset = 0;
- }
- }
+ case AMDGPU::S_NOR_B32:
+ splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst.eraseFromParent();
+ continue;
- MachineInstr *NewInstr =
- BuildMI(*MBB, Inst, Inst.getDebugLoc(),
- get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
- .add(*VAddr) // vaddr
- .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
- .addImm(0) // soffset
- .addImm(Offset) // offset
- .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
- .addImm(0) // slc
- .addImm(0) // tfe
- .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
- .getInstr();
-
- MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
- VDst);
- addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
+ case AMDGPU::S_ANDN2_B32:
+ splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
Inst.eraseFromParent();
+ continue;
- // Legalize all operands other than the offset. Notably, convert the srsrc
- // into SGPRs using v_readfirstlane if needed.
- legalizeOperands(*NewInstr);
+ case AMDGPU::S_ORN2_B32:
+ splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst.eraseFromParent();
continue;
}
- }
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
// legalize its operands instead.
- legalizeOperands(Inst);
+ legalizeOperands(Inst, MDT);
continue;
}
@@ -4071,7 +4414,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
// Legalize the operands
- legalizeOperands(Inst);
+ legalizeOperands(Inst, MDT);
if (HasDst)
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
@@ -4079,8 +4422,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
// Add/sub require special handling to deal with carry outs.
-bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist,
- MachineInstr &Inst) const {
+bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
if (ST.hasAddNoCarry()) {
// Assume there is no user of scc since we don't select this in that case.
// Since scc isn't used, it doesn't really matter if the i32 or u32 variant
@@ -4104,7 +4447,7 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist,
Inst.setDesc(get(NewOpc));
Inst.addImplicitDefUseOperands(*MBB.getParent());
MRI.replaceRegWith(OldDstReg, ResultReg);
- legalizeOperands(Inst);
+ legalizeOperands(Inst, MDT);
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
return true;
@@ -4151,23 +4494,116 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
MachineOperand &Src0 = Inst.getOperand(1);
MachineOperand &Src1 = Inst.getOperand(2);
- legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
- legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
-
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
if (ST.hasDLInsts()) {
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
+ legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
+
BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
.add(Src0)
.add(Src1);
+
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
+ addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
} else {
- unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
- .add(Src0)
+ // Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
+ // invert either source and then perform the XOR. If either source is a
+ // scalar register, then we can leave the inversion on the scalar unit to
+ // acheive a better distrubution of scalar and vector instructions.
+ bool Src0IsSGPR = Src0.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
+ bool Src1IsSGPR = Src1.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
+ MachineInstr *Not = nullptr;
+ MachineInstr *Xor = nullptr;
+ unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ // Build a pair of scalar instructions and add them to the work list.
+ // The next iteration over the work list will lower these to the vector
+ // unit as necessary.
+ if (Src0IsSGPR) {
+ Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
+ .add(Src0);
+ Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
+ .addReg(Temp)
.add(Src1);
+ } else if (Src1IsSGPR) {
+ Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
+ .add(Src1);
+ Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
+ .add(Src0)
+ .addReg(Temp);
+ } else {
+ Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
+ .add(Src0)
+ .add(Src1);
+ Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
+ .addReg(Temp);
+ Worklist.insert(Not);
+ }
+
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
- BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), NewDest)
- .addReg(Xor);
+ Worklist.insert(Xor);
+
+ addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
}
+}
+
+void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
+ .add(Src0)
+ .add(Src1);
+
+ MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
+ .addReg(Interm);
+
+ Worklist.insert(&Op);
+ Worklist.insert(&Not);
+
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
+ addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
+}
+
+void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
+ MachineInstr &Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
+ .add(Src1);
+
+ MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest)
+ .add(Src0)
+ .addReg(Interm);
+
+ Worklist.insert(&Not);
+ Worklist.insert(&Op);
MRI.replaceRegWith(Dest.getReg(), NewDest);
addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
@@ -4200,13 +4636,13 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
- BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
+ MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
- BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
+ MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
@@ -4217,6 +4653,9 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+ Worklist.insert(&LoHalf);
+ Worklist.insert(&HiHalf);
+
// We don't need to legalizeOperands here because for a single operand, src0
// will support any kind of input.
@@ -4224,8 +4663,9 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitAddSub(
- SetVectorType &Worklist, MachineInstr &Inst) const {
+void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
MachineBasicBlock &MBB = *Inst.getParent();
@@ -4285,16 +4725,16 @@ void SIInstrInfo::splitScalar64BitAddSub(
// Try to legalize the operands in case we need to swap the order to keep it
// valid.
- legalizeOperands(*LoHalf);
- legalizeOperands(*HiHalf);
+ legalizeOperands(*LoHalf, MDT);
+ legalizeOperands(*HiHalf, MDT);
// Move all users of this moved vlaue.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitBinaryOp(
- SetVectorType &Worklist, MachineInstr &Inst,
- unsigned Opcode) const {
+void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
+ MachineInstr &Inst, unsigned Opcode,
+ MachineDominatorTree *MDT) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -4321,6 +4761,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
AMDGPU::sub0, Src0SubRC);
MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
AMDGPU::sub0, Src1SubRC);
+ MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub1, Src0SubRC);
+ MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
+ AMDGPU::sub1, Src1SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
@@ -4331,11 +4775,6 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
.add(SrcReg0Sub0)
.add(SrcReg1Sub0);
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
- MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub1, Src1SubRC);
-
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.add(SrcReg0Sub1)
@@ -4350,22 +4789,62 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
- // Try to legalize the operands in case we need to swap the order to keep it
- // valid.
- legalizeOperands(LoHalf);
- legalizeOperands(HiHalf);
+ Worklist.insert(&LoHalf);
+ Worklist.insert(&HiHalf);
// Move all users of this moved vlaue.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
+void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ MachineBasicBlock::iterator MII = Inst;
+
+ const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+
+ unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ MachineOperand* Op0;
+ MachineOperand* Op1;
+
+ if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) {
+ Op0 = &Src0;
+ Op1 = &Src1;
+ } else {
+ Op0 = &Src1;
+ Op1 = &Src0;
+ }
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
+ .add(*Op0);
+
+ unsigned NewDest = MRI.createVirtualRegister(DestRC);
+
+ MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
+ .addReg(Interm)
+ .add(*Op1);
+
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
+
+ Worklist.insert(&Xor);
+}
+
void SIInstrInfo::splitScalar64BitBCNT(
SetVectorType &Worklist, MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineBasicBlock::iterator MII = Inst;
- DebugLoc DL = Inst.getDebugLoc();
+ const DebugLoc &DL = Inst.getDebugLoc();
MachineOperand &Dest = Inst.getOperand(0);
MachineOperand &Src = Inst.getOperand(1);
@@ -4401,7 +4880,7 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineBasicBlock::iterator MII = Inst;
- DebugLoc DL = Inst.getDebugLoc();
+ const DebugLoc &DL = Inst.getDebugLoc();
MachineOperand &Dest = Inst.getOperand(0);
uint32_t Imm = Inst.getOperand(2).getImm();
@@ -4546,10 +5025,10 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(
make_range(MachineBasicBlock::iterator(SCCDefInst),
SCCDefInst.getParent()->end())) {
// Exit if we find another SCC def.
- if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
+ if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
return;
- if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
+ if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
Worklist.insert(&MI);
}
}
@@ -4716,7 +5195,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
return AMDGPU::NoRegister;
assert(!MI.memoperands_empty() &&
- (*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);
+ (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
FrameIndex = Addr->getIndex();
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -4777,12 +5256,6 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// If we have a definitive size, we can use it. Otherwise we need to inspect
// the operands to know the size.
- //
- // FIXME: Instructions that have a base 32-bit encoding report their size as
- // 4, even though they are really 8 bytes if they have a literal operand.
- if (DescSize != 0 && DescSize != 4)
- return DescSize;
-
if (isFixedSize(MI))
return DescSize;
@@ -4791,23 +5264,27 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (isVALU(MI) || isSALU(MI)) {
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
- return 4; // No operands.
+ return DescSize; // No operands.
if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
- return 8;
+ return DescSize + 4;
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
- return 4;
+ return DescSize;
if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
- return 8;
+ return DescSize + 4;
- return 4;
- }
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (Src2Idx == -1)
+ return DescSize;
+
+ if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
+ return DescSize + 4;
- if (DescSize == 4)
- return 4;
+ return DescSize;
+ }
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
@@ -4823,7 +5300,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
default:
- llvm_unreachable("unable to find instruction size");
+ return DescSize;
}
}
@@ -4835,7 +5312,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
return true;
for (const MachineMemOperand *MMO : MI.memoperands()) {
- if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)
+ if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
return true;
}
return false;
@@ -5069,3 +5546,84 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
return MCOp;
}
+
+static
+TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) {
+ assert(RegOpnd.isReg());
+ return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
+ getRegSubRegPair(RegOpnd);
+}
+
+TargetInstrInfo::RegSubRegPair
+llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) {
+ assert(MI.isRegSequence());
+ for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
+ if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
+ auto &RegOp = MI.getOperand(1 + 2 * I);
+ return getRegOrUndef(RegOp);
+ }
+ return TargetInstrInfo::RegSubRegPair();
+}
+
+// Try to find the definition of reg:subreg in subreg-manipulation pseudos
+// Following a subreg of reg:subreg isn't supported
+static bool followSubRegDef(MachineInstr &MI,
+ TargetInstrInfo::RegSubRegPair &RSR) {
+ if (!RSR.SubReg)
+ return false;
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::REG_SEQUENCE:
+ RSR = getRegSequenceSubReg(MI, RSR.SubReg);
+ return true;
+ // EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg
+ case AMDGPU::INSERT_SUBREG:
+ if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm())
+ // inserted the subreg we're looking for
+ RSR = getRegOrUndef(MI.getOperand(2));
+ else { // the subreg in the rest of the reg
+ auto R1 = getRegOrUndef(MI.getOperand(1));
+ if (R1.SubReg) // subreg of subreg isn't supported
+ return false;
+ RSR.Reg = R1.Reg;
+ }
+ return true;
+ }
+ return false;
+}
+
+MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
+ MachineRegisterInfo &MRI) {
+ assert(MRI.isSSA());
+ if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
+ return nullptr;
+
+ auto RSR = P;
+ auto *DefInst = MRI.getVRegDef(RSR.Reg);
+ while (auto *MI = DefInst) {
+ DefInst = nullptr;
+ switch (MI->getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::V_MOV_B32_e32: {
+ auto &Op1 = MI->getOperand(1);
+ if (Op1.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
+ if (Op1.isUndef())
+ return nullptr;
+ RSR = getRegSubRegPair(Op1);
+ DefInst = MRI.getVRegDef(RSR.Reg);
+ }
+ break;
+ }
+ default:
+ if (followSubRegDef(*MI, RSR)) {
+ if (!RSR.Reg)
+ return nullptr;
+ DefInst = MRI.getVRegDef(RSR.Reg);
+ }
+ }
+ if (!DefInst)
+ return MI;
+ }
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index d681b926504e..5b1a05f3785e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -37,6 +37,7 @@
namespace llvm {
class APInt;
+class MachineDominatorTree;
class MachineRegisterInfo;
class RegScavenger;
class GCNSubtarget;
@@ -79,8 +80,8 @@ public:
private:
void swapOperands(MachineInstr &Inst) const;
- bool moveScalarAddSub(SetVectorType &Worklist,
- MachineInstr &Inst) const;
+ bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
void lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const;
@@ -88,14 +89,26 @@ private:
void lowerScalarXnor(SetVectorType &Worklist,
MachineInstr &Inst) const;
+ void splitScalarNotBinop(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ unsigned Opcode) const;
+
+ void splitScalarBinOpN2(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ unsigned Opcode) const;
+
void splitScalar64BitUnaryOp(SetVectorType &Worklist,
MachineInstr &Inst, unsigned Opcode) const;
- void splitScalar64BitAddSub(SetVectorType &Worklist,
- MachineInstr &Inst) const;
+ void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
+
+ void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
+ unsigned Opcode,
+ MachineDominatorTree *MDT = nullptr) const;
- void splitScalar64BitBinaryOp(SetVectorType &Worklist,
- MachineInstr &Inst, unsigned Opcode) const;
+ void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
void splitScalar64BitBCNT(SetVectorType &Worklist,
MachineInstr &Inst) const;
@@ -160,12 +173,11 @@ public:
int64_t &Offset1,
int64_t &Offset2) const override;
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const final;
+ bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const final;
- bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
- MachineInstr &SecondLdSt, unsigned BaseReg2,
+ bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
unsigned NumLoads) const override;
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
@@ -225,6 +237,9 @@ public:
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
+ bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
+ unsigned & SrcOpIdx1) const;
+
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
@@ -276,7 +291,7 @@ public:
unsigned TrueReg, unsigned FalseReg) const;
unsigned getAddressSpaceForPseudoSourceKind(
- PseudoSourceValue::PSVKind Kind) const override;
+ unsigned Kind) const override;
bool
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
@@ -589,6 +604,14 @@ public:
return MI.getDesc().TSFlags & ClampFlags;
}
+ static bool usesFPDPRounding(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
+ }
+
+ bool usesFPDPRounding(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
@@ -689,6 +712,12 @@ public:
unsigned OpName) const;
bool hasAnyModifiersSet(const MachineInstr &MI) const;
+ bool canShrink(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const;
+
+ MachineInstr *buildShrunkInst(MachineInstr &MI,
+ unsigned NewOpcode) const;
+
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
@@ -719,6 +748,16 @@ public:
/// This form should usually be preferred since it handles operands
/// with unknown register classes.
unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) {
+ if (unsigned SubReg = MO.getSubReg()) {
+ assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
+ MI.getParent()->getParent()->getRegInfo().
+ getRegClass(MO.getReg()), SubReg)) >= 32 &&
+ "Sub-dword subregs are not supported");
+ return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4;
+ }
+ }
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
}
@@ -777,14 +816,16 @@ public:
MachineOperand &Op, MachineRegisterInfo &MRI,
const DebugLoc &DL) const;
- /// Legalize all operands in this instruction. This function may
- /// create new instruction and insert them before \p MI.
- void legalizeOperands(MachineInstr &MI) const;
+ /// Legalize all operands in this instruction. This function may create new
+ /// instructions and control-flow around \p MI. If present, \p MDT is
+ /// updated.
+ void legalizeOperands(MachineInstr &MI,
+ MachineDominatorTree *MDT = nullptr) const;
/// Replace this instruction's opcode with the equivalent VALU
/// opcode. This function will also move the users of \p MI to the
- /// VALU if necessary.
- void moveToVALU(MachineInstr &MI) const;
+ /// VALU if necessary. If present, \p MDT is updated.
+ void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
int Count) const;
@@ -885,9 +926,36 @@ public:
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
-
};
+/// \brief Returns true if a reg:subreg pair P has a TRC class
+inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
+ const TargetRegisterClass &TRC,
+ MachineRegisterInfo &MRI) {
+ auto *RC = MRI.getRegClass(P.Reg);
+ if (!P.SubReg)
+ return RC == &TRC;
+ auto *TRI = MRI.getTargetRegisterInfo();
+ return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
+}
+
+/// \brief Create RegSubRegPair from a register MachineOperand
+inline
+TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
+ assert(O.isReg());
+ return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
+}
+
+/// \brief Return the SubReg component from REG_SEQUENCE
+TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
+ unsigned SubReg);
+
+/// \brief Return the defining instruction for a given reg:subreg pair
+/// skipping copy like instructions and subreg-manipulation pseudos.
+/// Following another subreg of a reg:subreg isn't supported.
+MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
+ MachineRegisterInfo &MRI);
+
namespace AMDGPU {
LLVM_READONLY
@@ -900,6 +968,9 @@ namespace AMDGPU {
int getSDWAOp(uint16_t Opcode);
LLVM_READONLY
+ int getDPPOp32(uint16_t Opcode);
+
+ LLVM_READONLY
int getBasicFromSDWAOp(uint16_t Opcode);
LLVM_READONLY
@@ -911,6 +982,12 @@ namespace AMDGPU {
LLVM_READONLY
int getAddr64Inst(uint16_t Opcode);
+ /// Check if \p Opcode is an Addr64 opcode.
+ ///
+ /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
+ LLVM_READONLY
+ int getIfAddr64Inst(uint16_t Opcode);
+
LLVM_READONLY
int getMUBUFNoLdsInst(uint16_t Opcode);
@@ -923,6 +1000,9 @@ namespace AMDGPU {
LLVM_READONLY
int getSOPKOp(uint16_t Opcode);
+ LLVM_READONLY
+ int getGlobalSaddrOp(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8fa37aa83dae..13afa4d4974b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -40,9 +40,9 @@ def SIEncodingFamily {
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
-def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
- SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
- [SDNPMayLoad, SDNPMemOperand]
+def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
+ SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>]>,
+ [SDNPMayLoad, SDNPMemOperand]
>;
def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
@@ -69,36 +69,34 @@ def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
-def SDTbuffer_load : SDTypeProfile<1, 9,
+def SDTtbuffer_load : SDTypeProfile<1, 8,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
SDTCisVT<2, i32>, // vindex(VGPR)
SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // dfmt(imm)
- SDTCisVT<7, i32>, // nfmt(imm)
- SDTCisVT<8, i32>, // glc(imm)
- SDTCisVT<9, i32> // slc(imm)
+ SDTCisVT<6, i32>, // format(imm)
+ SDTCisVT<7, i32>, // cachecontrol(imm)
+ SDTCisVT<8, i1> // idxen(imm)
]>;
-def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTbuffer_load,
+def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
- SDTbuffer_load,
+ SDTtbuffer_load,
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
-def SDTtbuffer_store : SDTypeProfile<0, 10,
+def SDTtbuffer_store : SDTypeProfile<0, 9,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
SDTCisVT<2, i32>, // vindex(VGPR)
SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // dfmt(imm)
- SDTCisVT<7, i32>, // nfmt(imm)
- SDTCisVT<8, i32>, // glc(imm)
- SDTCisVT<9, i32> // slc(imm)
+ SDTCisVT<6, i32>, // format(imm)
+ SDTCisVT<7, i32>, // cachecontrol(imm)
+ SDTCisVT<8, i1> // idxen(imm)
]>;
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
@@ -110,13 +108,15 @@ def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
SDTtbuffer_store,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
-def SDTBufferLoad : SDTypeProfile<1, 5,
+def SDTBufferLoad : SDTypeProfile<1, 7,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
- SDTCisVT<2, i32>, // vindex
- SDTCisVT<3, i32>, // offset
- SDTCisVT<4, i1>, // glc
- SDTCisVT<5, i1>]>; // slc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // offset(imm)
+ SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<7, i1>]>; // idxen(imm)
def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
@@ -126,13 +126,15 @@ def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
-def SDTBufferStore : SDTypeProfile<0, 6,
+def SDTBufferStore : SDTypeProfile<0, 8,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
- SDTCisVT<2, i32>, // vindex
- SDTCisVT<3, i32>, // offset
- SDTCisVT<4, i1>, // glc
- SDTCisVT<5, i1>]>; // slc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // offset(imm)
+ SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<7, i1>]>; // idxen(imm)
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
@@ -144,13 +146,16 @@ def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
class SDBufferAtomic<string opcode> : SDNode <opcode,
- SDTypeProfile<1, 5,
+ SDTypeProfile<1, 8,
[SDTCisVT<0, i32>, // dst
SDTCisVT<1, i32>, // vdata
SDTCisVT<2, v4i32>, // rsrc
- SDTCisVT<3, i32>, // vindex
- SDTCisVT<4, i32>, // offset
- SDTCisVT<5, i1>]>, // slc
+ SDTCisVT<3, i32>, // vindex(VGPR)
+ SDTCisVT<4, i32>, // voffset(VGPR)
+ SDTCisVT<5, i32>, // soffset(SGPR)
+ SDTCisVT<6, i32>, // offset(imm)
+ SDTCisVT<7, i32>, // cachepolicy(imm)
+ SDTCisVT<8, i1>]>, // idxen(imm)
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
@@ -166,14 +171,17 @@ def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
- SDTypeProfile<1, 6,
+ SDTypeProfile<1, 9,
[SDTCisVT<0, i32>, // dst
SDTCisVT<1, i32>, // src
SDTCisVT<2, i32>, // cmp
SDTCisVT<3, v4i32>, // rsrc
- SDTCisVT<4, i32>, // vindex
- SDTCisVT<5, i32>, // offset
- SDTCisVT<6, i1>]>, // slc
+ SDTCisVT<4, i32>, // vindex(VGPR)
+ SDTCisVT<5, i32>, // voffset(VGPR)
+ SDTCisVT<6, i32>, // soffset(SGPR)
+ SDTCisVT<7, i32>, // offset(imm)
+ SDTCisVT<8, i32>, // cachepolicy(imm)
+ SDTCisVT<9, i1>]>, // idxen(imm)
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
@@ -487,24 +495,7 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
}]>;
class VGPRImm <dag frag> : PatLeaf<frag, [{
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return false;
- }
- const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
- unsigned Limit = 0;
- for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
- Limit < 10 && U != E; ++U, ++Limit) {
- const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
-
- // If the register class is unknown, it could be an unknown
- // register class that needs to be an SGPR, e.g. an inline asm
- // constraint
- if (!RC || SIRI->isSGPRClass(RC))
- return false;
- }
-
- return Limit < 10;
+ return isVGPRImm(N);
}]>;
def NegateImm : SDNodeXForm<imm, [{
@@ -746,14 +737,13 @@ def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
-def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
+def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
-def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>;
-def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>;
+def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
@@ -1632,7 +1622,7 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
0, // 64-bit dst - No DPP or SDWA for 64-bit operands
!if(!eq(Src0VT.Size, 64),
0, // 64-bit src0
- !if(!eq(Src0VT.Size, 64),
+ !if(!eq(Src1VT.Size, 64),
0, // 64-bit src2
1
)
@@ -1641,6 +1631,12 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
);
}
+class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
+ ValueType Src1VT = i32> {
+ bit ret = !if(!eq(NumSrcArgs, 0), 0,
+ getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
+}
+
class BitOr<bit a, bit b> {
bit ret = !if(a, 1, !if(b, 1, 0));
}
@@ -1649,6 +1645,11 @@ class BitAnd<bit a, bit b> {
bit ret = !if(a, !if(b, 1, 0), 0);
}
+def PatGenMode {
+ int NoPattern = 0;
+ int Pattern = 1;
+}
+
class VOPProfile <list<ValueType> _ArgVT> {
field list<ValueType> ArgVT = _ArgVT;
@@ -1715,7 +1716,10 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSDWAOMod = isFloatType<DstVT>.ret;
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
- field bit HasSDWA9 = HasExt;
+ field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+ field bit HasExtSDWA = HasExt;
+ field bit HasExtSDWA9 = HasExt;
+ field int NeedPatGen = PatGenMode.NoPattern;
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -1743,8 +1747,10 @@ class VOPProfile <list<ValueType> _ArgVT> {
getOpSelMod<Src0VT>.ret,
getOpSelMod<Src1VT>.ret,
getOpSelMod<Src2VT>.ret>.ret;
- field dag InsDPP = getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
- HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
+ field dag InsDPP = !if(HasExtDPP,
+ getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
+ (ins));
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
@@ -1758,14 +1764,21 @@ class VOPProfile <list<ValueType> _ArgVT> {
HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret;
- field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string AsmDPP = !if(HasExtDPP,
+ getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
}
class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasExt = 0;
- let HasSDWA9 = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
+}
+
+class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
+ let NeedPatGen = mode;
}
def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
@@ -1788,6 +1801,8 @@ def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
+def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
+def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
@@ -1925,6 +1940,15 @@ def getBasicFromSDWAOp : InstrMapping {
let ValueCols = [["Default"]];
}
+// Maps ordinary instructions to their DPP counterparts
+def getDPPOp32 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["AsmVariantName"];
+ let KeyCol = ["Default"];
+ let ValueCols = [["DPP"]];
+}
+
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "Commutable_REV";
@@ -1977,6 +2001,14 @@ def getAddr64Inst : InstrMapping {
let ValueCols = [["1"]];
}
+def getIfAddr64Inst : InstrMapping {
+ let FilterClass = "MUBUFAddr64Table";
+ let RowFields = ["OpName"];
+ let ColFields = ["IsAddr64"];
+ let KeyCol = ["1"];
+ let ValueCols = [["1"]];
+}
+
def getMUBUFNoLdsInst : InstrMapping {
let FilterClass = "MUBUFLdsTable";
let RowFields = ["OpName"];
@@ -2003,6 +2035,15 @@ def getAtomicNoRetOp : InstrMapping {
let ValueCols = [["0"]];
}
+// Maps a GLOBAL to its SADDR form.
+def getGlobalSaddrOp : InstrMapping {
+ let FilterClass = "GlobalSaddrTable";
+ let RowFields = ["SaddrOp"];
+ let ColFields = ["IsSaddr"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
include "SIInstructions.td"
include "DSInstructions.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index 5c10646161b3..b6b00c2e4257 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -15,8 +15,8 @@ class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateContro
let SubtargetPredicate = isGCN;
}
-include "VOPInstructions.td"
include "SOPInstructions.td"
+include "VOPInstructions.td"
include "SMInstructions.td"
include "FLATInstructions.td"
include "BUFInstructions.td"
@@ -164,29 +164,26 @@ def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
} // End usesCustomInserter = 1, Defs = [SCC]
-let usesCustomInserter = 1, SALU = 1 in {
-def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
+let usesCustomInserter = 1 in {
+def GET_GROUPSTATICSIZE : SPseudoInstSI <(outs SReg_32:$sdst), (ins),
[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
} // End let usesCustomInserter = 1, SALU = 1
-def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_MOV_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
(ins SSrc_b64:$src0)> {
- let SALU = 1;
let isAsCheapAsAMove = 1;
let isTerminator = 1;
}
-def S_XOR_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_XOR_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
(ins SSrc_b64:$src0, SSrc_b64:$src1)> {
- let SALU = 1;
let isAsCheapAsAMove = 1;
let isTerminator = 1;
let Defs = [SCC];
}
-def S_ANDN2_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_ANDN2_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
(ins SSrc_b64:$src0, SSrc_b64:$src1)> {
- let SALU = 1;
let isAsCheapAsAMove = 1;
let isTerminator = 1;
}
@@ -250,7 +247,7 @@ def SI_LOOP : CFPseudoInstSI <
(outs), (ins SReg_64:$saved, brtarget:$target),
[(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
let Size = 8;
- let isBranch = 0;
+ let isBranch = 1;
let hasSideEffects = 1;
}
@@ -267,14 +264,6 @@ def SI_END_CF : CFPseudoInstSI <
let mayStore = 1;
}
-def SI_BREAK : CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$src),
- [(set i64:$dst, (int_amdgcn_break i64:$src))], 1> {
- let Size = 4;
- let isAsCheapAsAMove = 1;
- let isReMaterializable = 1;
-}
-
def SI_IF_BREAK : CFPseudoInstSI <
(outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
[(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
@@ -283,14 +272,6 @@ def SI_IF_BREAK : CFPseudoInstSI <
let isReMaterializable = 1;
}
-def SI_ELSE_BREAK : CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1),
- [(set i64:$dst, (int_amdgcn_else_break i64:$src0, i64:$src1))]> {
- let Size = 4;
- let isAsCheapAsAMove = 1;
- let isReMaterializable = 1;
-}
-
let Uses = [EXEC] in {
multiclass PseudoInstKill <dag ins> {
@@ -326,6 +307,7 @@ def SI_ILLEGAL_COPY : SPseudoInstSI <
def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
let isTerminator = 1;
let usesCustomInserter = 1;
+ let isBranch = 1;
}
def SI_PS_LIVE : PseudoInstSI <
@@ -598,7 +580,13 @@ def : Pat <
(int_amdgcn_kill (i1 (setcc f32:$src, InlineFPImm<f32>:$imm, cond:$cond))),
(SI_KILL_F32_COND_IMM_PSEUDO $src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))
>;
-// TODO: we could add more variants for other types of conditionals
+
+ // TODO: we could add more variants for other types of conditionals
+
+def : Pat <
+ (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+ (COPY $src) // Return the SGPRs representing i1 src
+>;
//===----------------------------------------------------------------------===//
// VOP1 Patterns
@@ -730,12 +718,14 @@ defm : SelectPat <i32, V_CNDMASK_B32_e64>;
defm : SelectPat <f16, V_CNDMASK_B32_e64>;
defm : SelectPat <f32, V_CNDMASK_B32_e64>;
+let AddedComplexity = 1 in {
def : GCNPat <
- (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
+ (i32 (add (i32 (getDivergentFrag<ctpop>.ret i32:$popcnt)), i32:$val)),
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+}
def : GCNPat <
- (i16 (add (i16 (trunc (ctpop i32:$popcnt))), i16:$val)),
+ (i16 (add (i16 (trunc (getDivergentFrag<ctpop>.ret i32:$popcnt))), i16:$val)),
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
@@ -867,6 +857,8 @@ def : BitConvert <f64, v2f32, VReg_64>;
def : BitConvert <v2f32, f64, VReg_64>;
def : BitConvert <f64, v2i32, VReg_64>;
def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <v4i16, v4f16, VReg_64>;
+def : BitConvert <v4f16, v4i16, VReg_64>;
// FIXME: Make SGPR
def : BitConvert <v2i32, v4f16, VReg_64>;
@@ -1324,6 +1316,38 @@ def : GCNPat <
>;
def : GCNPat <
+ (i1 (add i1:$src0, i1:$src1)),
+ (S_XOR_B64 $src0, $src1)
+>;
+
+def : GCNPat <
+ (i1 (sub i1:$src0, i1:$src1)),
+ (S_XOR_B64 $src0, $src1)
+>;
+
+let AddedComplexity = 1 in {
+def : GCNPat <
+ (i1 (add i1:$src0, (i1 -1))),
+ (S_NOT_B64 $src0)
+>;
+
+def : GCNPat <
+ (i1 (sub i1:$src0, (i1 -1))),
+ (S_NOT_B64 $src0)
+>;
+}
+
+def : GCNPat <
+ (f16 (sint_to_fp i1:$src)),
+ (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src))
+>;
+
+def : GCNPat <
+ (f16 (uint_to_fp i1:$src)),
+ (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src))
+>;
+
+def : GCNPat <
(f32 (sint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
>;
@@ -1464,13 +1488,32 @@ class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPa
def : ExpPattern<AMDGPUexport, i32, EXP>;
def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
-// COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs
+// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
(v2i16 (build_vector (i16 0), i16:$src1)),
- (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0))
+ (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
+>;
+
+def : GCNPat <
+ (v2i16 (build_vector i16:$src0, (i16 undef))),
+ (v2i16 (COPY $src0))
+>;
+
+def : GCNPat <
+ (v2f16 (build_vector f16:$src0, (f16 undef))),
+ (v2f16 (COPY $src0))
+>;
+
+def : GCNPat <
+ (v2i16 (build_vector (i16 undef), i16:$src1)),
+ (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
>;
+def : GCNPat <
+ (v2f16 (build_vector (f16 undef), f16:$src1)),
+ (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
@@ -1501,15 +1544,15 @@ def : GCNPat <
} // End SubtargetPredicate = HasVOP3PInsts
-// def : GCNPat <
-// (v2f16 (scalar_to_vector f16:$src0)),
-// (COPY $src0)
-// >;
+def : GCNPat <
+ (v2f16 (scalar_to_vector f16:$src0)),
+ (COPY $src0)
+>;
-// def : GCNPat <
-// (v2i16 (scalar_to_vector i16:$src0)),
-// (COPY $src0)
-// >;
+def : GCNPat <
+ (v2i16 (scalar_to_vector i16:$src0)),
+ (COPY $src0)
+>;
def : GCNPat <
(v4i16 (scalar_to_vector i16:$src0)),
@@ -1587,18 +1630,19 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
-def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
-def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
+defm : IntMed3Pat<V_MED3_I32, smin, smax, smin_oneuse, smax_oneuse>;
+defm : IntMed3Pat<V_MED3_U32, umin, umax, umin_oneuse, umax_oneuse>;
}
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
class FPMed3Pat<ValueType vt,
+ //SDPatternOperator max, SDPatternOperator min,
Instruction med3Inst> : GCNPat<
- (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
- (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
(vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
@@ -1606,28 +1650,41 @@ class FPMed3Pat<ValueType vt,
class FP16Med3Pat<ValueType vt,
Instruction med3Inst> : GCNPat<
- (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
- (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
- (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
- (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
(vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
>;
-class Int16Med3Pat<Instruction med3Inst,
+multiclass Int16Med3Pat<Instruction med3Inst,
+ SDPatternOperator min,
SDPatternOperator max,
SDPatternOperator max_oneuse,
SDPatternOperator min_oneuse,
- ValueType vt = i32> : GCNPat<
+ ValueType vt = i16> {
+ // This matches 16 permutations of
+ // max(min(x, y), min(max(x, y), z))
+ def : GCNPat <
(max (min_oneuse vt:$src0, vt:$src1),
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
>;
+ // This matches 16 permutations of
+ // min(max(a, b), max(min(a, b), c))
+ def : GCNPat <
+ (min (max_oneuse vt:$src0, vt:$src1),
+ (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
+ (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+>;
+}
+
def : FPMed3Pat<f32, V_MED3_F32>;
let OtherPredicates = [isGFX9] in {
def : FP16Med3Pat<f16, V_MED3_F16>;
-def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
-def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
+defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
+defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
} // End Predicates = [isGFX9]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
index 7b7cf1635050..e51ff4b4bc50 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
@@ -16,36 +16,4 @@
let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
- // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
- def int_SI_tbuffer_store : Intrinsic <
- [],
- [llvm_anyint_ty, // rsrc(SGPR)
- llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
- llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
- llvm_i32_ty, // vaddr(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // inst_offset(imm)
- llvm_i32_ty, // dfmt(imm)
- llvm_i32_ty, // nfmt(imm)
- llvm_i32_ty, // offen(imm)
- llvm_i32_ty, // idxen(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty], // tfe(imm)
- []>;
-
- // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
- def int_SI_buffer_load_dword : Intrinsic <
- [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
- [llvm_anyint_ty, // rsrc(SGPR)
- llvm_anyint_ty, // vaddr(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // inst_offset(imm)
- llvm_i32_ty, // offen(imm)
- llvm_i32_ty, // idxen(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty], // tfe(imm)
- [IntrReadMem, IntrArgMemOnly]>;
-
} // End TargetPrefix = "SI", isTarget = 1
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 4b537540046f..be291b127301 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -20,6 +20,26 @@
// ==>
// s_buffer_load_dwordx2 s[4:5], s[0:3], 4
//
+// This pass also tries to promote constant offset to the immediate by
+// adjusting the base. It tries to use a base from the nearby instructions that
+// allows it to have a 13bit constant offset and then promotes the 13bit offset
+// to the immediate.
+// E.g.
+// s_movk_i32 s0, 0x1800
+// v_add_co_u32_e32 v0, vcc, s0, v2
+// v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+//
+// s_movk_i32 s0, 0x1000
+// v_add_co_u32_e32 v5, vcc, s0, v2
+// v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+// global_load_dwordx2 v[5:6], v[5:6], off
+// global_load_dwordx2 v[0:1], v[0:1], off
+// =>
+// s_movk_i32 s0, 0x1000
+// v_add_co_u32_e32 v5, vcc, s0, v2
+// v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+// global_load_dwordx2 v[5:6], v[5:6], off
+// global_load_dwordx2 v[0:1], v[5:6], off offset:2048
//
// Future improvements:
//
@@ -43,9 +63,9 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
@@ -74,23 +94,38 @@ using namespace llvm;
#define DEBUG_TYPE "si-load-store-opt"
namespace {
+enum InstClassEnum {
+ UNKNOWN,
+ DS_READ,
+ DS_WRITE,
+ S_BUFFER_LOAD_IMM,
+ BUFFER_LOAD_OFFEN = AMDGPU::BUFFER_LOAD_DWORD_OFFEN,
+ BUFFER_LOAD_OFFSET = AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+ BUFFER_STORE_OFFEN = AMDGPU::BUFFER_STORE_DWORD_OFFEN,
+ BUFFER_STORE_OFFSET = AMDGPU::BUFFER_STORE_DWORD_OFFSET,
+ BUFFER_LOAD_OFFEN_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact,
+ BUFFER_LOAD_OFFSET_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact,
+ BUFFER_STORE_OFFEN_exact = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact,
+ BUFFER_STORE_OFFSET_exact = AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact,
+};
-class SILoadStoreOptimizer : public MachineFunctionPass {
- enum InstClassEnum {
- DS_READ_WRITE,
- S_BUFFER_LOAD_IMM,
- BUFFER_LOAD_OFFEN,
- BUFFER_LOAD_OFFSET,
- BUFFER_STORE_OFFEN,
- BUFFER_STORE_OFFSET,
- };
+enum RegisterEnum {
+ SBASE = 0x1,
+ SRSRC = 0x2,
+ SOFFSET = 0x4,
+ VADDR = 0x8,
+ ADDR = 0x10,
+};
+class SILoadStoreOptimizer : public MachineFunctionPass {
struct CombineInfo {
MachineBasicBlock::iterator I;
MachineBasicBlock::iterator Paired;
unsigned EltSize;
unsigned Offset0;
unsigned Offset1;
+ unsigned Width0;
+ unsigned Width1;
unsigned BaseOff;
InstClassEnum InstClass;
bool GLC0;
@@ -98,9 +133,23 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
bool SLC0;
bool SLC1;
bool UseST64;
- bool IsX2;
- SmallVector<MachineInstr*, 8> InstsToMove;
- };
+ SmallVector<MachineInstr *, 8> InstsToMove;
+ };
+
+ struct BaseRegisters {
+ unsigned LoReg = 0;
+ unsigned HiReg = 0;
+
+ unsigned LoSubReg = 0;
+ unsigned HiSubReg = 0;
+ };
+
+ struct MemAddress {
+ BaseRegisters Base;
+ int64_t Offset = 0;
+ };
+
+ using MemInfoMap = DenseMap<MachineInstr *, MemAddress>;
private:
const GCNSubtarget *STM = nullptr;
@@ -108,9 +157,16 @@ private:
const SIRegisterInfo *TRI = nullptr;
MachineRegisterInfo *MRI = nullptr;
AliasAnalysis *AA = nullptr;
- unsigned CreatedX2;
+ bool OptimizeAgain;
static bool offsetsCanBeCombined(CombineInfo &CI);
+ static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
+ static unsigned getNewOpcode(const CombineInfo &CI);
+ static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI);
+ const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI);
+ unsigned getOpcodeWidth(const MachineInstr &MI);
+ InstClassEnum getInstClass(unsigned Opc);
+ unsigned getRegs(unsigned Opc);
bool findMatchingInst(CombineInfo &CI);
@@ -123,10 +179,21 @@ private:
MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
- unsigned promoteBufferStoreOpcode(const MachineInstr &I, bool &IsX2,
- bool &IsOffen) const;
MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI);
+ void updateBaseAndOffset(MachineInstr &I, unsigned NewBase,
+ int32_t NewOffset);
+ unsigned computeBase(MachineInstr &MI, const MemAddress &Addr);
+ MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI);
+ Optional<int32_t> extractConstOffset(const MachineOperand &Op);
+ void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr);
+ /// Promotes constant offset to the immediate by adjusting the base. It
+ /// tries to use a base from the nearby instructions that allows it to have
+ /// a 13bit constant offset which gets promoted to the immediate.
+ bool promoteConstantOffsetToImm(MachineInstr &CI,
+ MemInfoMap &Visited,
+ SmallPtrSet<MachineInstr *, 4> &Promoted);
+
public:
static char ID;
@@ -153,8 +220,8 @@ public:
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
"SI Load Store Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
- "SI Load Store Optimizer", false, false)
+INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE, "SI Load Store Optimizer",
+ false, false)
char SILoadStoreOptimizer::ID = 0;
@@ -165,7 +232,7 @@ FunctionPass *llvm::createSILoadStoreOptimizerPass() {
}
static void moveInstsAfter(MachineBasicBlock::iterator I,
- ArrayRef<MachineInstr*> InstsToMove) {
+ ArrayRef<MachineInstr *> InstsToMove) {
MachineBasicBlock *MBB = I->getParent();
++I;
for (MachineInstr *MI : InstsToMove) {
@@ -191,21 +258,19 @@ static void addDefsUsesToList(const MachineInstr &MI,
static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
MachineBasicBlock::iterator B,
const SIInstrInfo *TII,
- AliasAnalysis * AA) {
+ AliasAnalysis *AA) {
// RAW or WAR - cannot reorder
// WAW - cannot reorder
// RAR - safe to reorder
return !(A->mayStore() || B->mayStore()) ||
- TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
+ TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
}
// Add MI and its defs to the lists if MI reads one of the defs that are
// already in the list. Returns true in that case.
-static bool
-addToListsIfDependent(MachineInstr &MI,
- DenseSet<unsigned> &RegDefs,
- DenseSet<unsigned> &PhysRegUses,
- SmallVectorImpl<MachineInstr*> &Insts) {
+static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
+ DenseSet<unsigned> &PhysRegUses,
+ SmallVectorImpl<MachineInstr *> &Insts) {
for (MachineOperand &Use : MI.operands()) {
// If one of the defs is read, then there is a use of Def between I and the
// instruction that I will potentially be merged with. We will need to move
@@ -228,18 +293,16 @@ addToListsIfDependent(MachineInstr &MI,
return false;
}
-static bool
-canMoveInstsAcrossMemOp(MachineInstr &MemOp,
- ArrayRef<MachineInstr*> InstsToMove,
- const SIInstrInfo *TII,
- AliasAnalysis *AA) {
+static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
+ ArrayRef<MachineInstr *> InstsToMove,
+ const SIInstrInfo *TII, AliasAnalysis *AA) {
assert(MemOp.mayLoadOrStore());
for (MachineInstr *InstToMove : InstsToMove) {
if (!InstToMove->mayLoadOrStore())
continue;
if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA))
- return false;
+ return false;
}
return true;
}
@@ -260,10 +323,9 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
CI.BaseOff = 0;
// Handle SMEM and VMEM instructions.
- if (CI.InstClass != DS_READ_WRITE) {
- unsigned Diff = CI.IsX2 ? 2 : 1;
- return (EltOffset0 + Diff == EltOffset1 ||
- EltOffset1 + Diff == EltOffset0) &&
+ if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
+ return (EltOffset0 + CI.Width0 == EltOffset1 ||
+ EltOffset1 + CI.Width1 == EltOffset0) &&
CI.GLC0 == CI.GLC1 &&
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
}
@@ -305,42 +367,176 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
return false;
}
+bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
+ const CombineInfo &CI) {
+ const unsigned Width = (CI.Width0 + CI.Width1);
+ switch (CI.InstClass) {
+ default:
+ return (Width <= 4) && (STM.hasDwordx3LoadStores() || (Width != 3));
+ case S_BUFFER_LOAD_IMM:
+ switch (Width) {
+ default:
+ return false;
+ case 2:
+ case 4:
+ return true;
+ }
+ }
+}
+
+unsigned SILoadStoreOptimizer::getOpcodeWidth(const MachineInstr &MI) {
+ const unsigned Opc = MI.getOpcode();
+
+ if (TII->isMUBUF(MI)) {
+ return AMDGPU::getMUBUFDwords(Opc);
+ }
+
+ switch (Opc) {
+ default:
+ return 0;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ return 1;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ return 2;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return 4;
+ }
+}
+
+InstClassEnum SILoadStoreOptimizer::getInstClass(unsigned Opc) {
+ if (TII->isMUBUF(Opc)) {
+ const int baseOpcode = AMDGPU::getMUBUFBaseOpcode(Opc);
+
+ // If we couldn't identify the opcode, bail out.
+ if (baseOpcode == -1) {
+ return UNKNOWN;
+ }
+
+ switch (baseOpcode) {
+ default:
+ return UNKNOWN;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
+ return BUFFER_LOAD_OFFEN;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
+ return BUFFER_LOAD_OFFSET;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
+ return BUFFER_STORE_OFFEN;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
+ return BUFFER_STORE_OFFSET;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact:
+ return BUFFER_LOAD_OFFEN_exact;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact:
+ return BUFFER_LOAD_OFFSET_exact;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
+ return BUFFER_STORE_OFFEN_exact;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
+ return BUFFER_STORE_OFFSET_exact;
+ }
+ }
+
+ switch (Opc) {
+ default:
+ return UNKNOWN;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return S_BUFFER_LOAD_IMM;
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_B64:
+ case AMDGPU::DS_READ_B32_gfx9:
+ case AMDGPU::DS_READ_B64_gfx9:
+ return DS_READ;
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B64:
+ case AMDGPU::DS_WRITE_B32_gfx9:
+ case AMDGPU::DS_WRITE_B64_gfx9:
+ return DS_WRITE;
+ }
+}
+
+unsigned SILoadStoreOptimizer::getRegs(unsigned Opc) {
+ if (TII->isMUBUF(Opc)) {
+ unsigned result = 0;
+
+ if (AMDGPU::getMUBUFHasVAddr(Opc)) {
+ result |= VADDR;
+ }
+
+ if (AMDGPU::getMUBUFHasSrsrc(Opc)) {
+ result |= SRSRC;
+ }
+
+ if (AMDGPU::getMUBUFHasSoffset(Opc)) {
+ result |= SOFFSET;
+ }
+
+ return result;
+ }
+
+ switch (Opc) {
+ default:
+ return 0;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return SBASE;
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_B64:
+ case AMDGPU::DS_READ_B32_gfx9:
+ case AMDGPU::DS_READ_B64_gfx9:
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B64:
+ case AMDGPU::DS_WRITE_B32_gfx9:
+ case AMDGPU::DS_WRITE_B64_gfx9:
+ return ADDR;
+ }
+}
+
bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
MachineBasicBlock::iterator E = MBB->end();
MachineBasicBlock::iterator MBBI = CI.I;
- unsigned AddrOpName[3] = {0};
- int AddrIdx[3];
- const MachineOperand *AddrReg[3];
+ const unsigned Opc = CI.I->getOpcode();
+ const InstClassEnum InstClass = getInstClass(Opc);
+
+ if (InstClass == UNKNOWN) {
+ return false;
+ }
+
+ const unsigned Regs = getRegs(Opc);
+
+ unsigned AddrOpName[5] = {0};
+ int AddrIdx[5];
+ const MachineOperand *AddrReg[5];
unsigned NumAddresses = 0;
- switch (CI.InstClass) {
- case DS_READ_WRITE:
+ if (Regs & ADDR) {
AddrOpName[NumAddresses++] = AMDGPU::OpName::addr;
- break;
- case S_BUFFER_LOAD_IMM:
+ }
+
+ if (Regs & SBASE) {
AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase;
- break;
- case BUFFER_LOAD_OFFEN:
- case BUFFER_STORE_OFFEN:
- AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
- AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
- AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
- break;
- case BUFFER_LOAD_OFFSET:
- case BUFFER_STORE_OFFSET:
+ }
+
+ if (Regs & SRSRC) {
AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
+ }
+
+ if (Regs & SOFFSET) {
AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
- break;
+ }
+
+ if (Regs & VADDR) {
+ AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
}
for (unsigned i = 0; i < NumAddresses; i++) {
AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]);
AddrReg[i] = &CI.I->getOperand(AddrIdx[i]);
- // We only ever merge operations with the same base address register, so don't
- // bother scanning forward if there are no other uses.
+ // We only ever merge operations with the same base address register, so
+ // don't bother scanning forward if there are no other uses.
if (AddrReg[i]->isReg() &&
(TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) ||
MRI->hasOneNonDBGUse(AddrReg[i]->getReg())))
@@ -353,8 +549,11 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
DenseSet<unsigned> PhysRegUsesToMove;
addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
- for ( ; MBBI != E; ++MBBI) {
- if (MBBI->getOpcode() != CI.I->getOpcode()) {
+ for (; MBBI != E; ++MBBI) {
+ const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE);
+
+ if ((getInstClass(MBBI->getOpcode()) != InstClass) ||
+ (IsDS && (MBBI->getOpcode() != Opc))) {
// This is not a matching DS instruction, but we can keep looking as
// long as one of these conditions are met:
// 1. It is safe to move I down past MBBI.
@@ -368,8 +567,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
}
if (MBBI->mayLoadOrStore() &&
- (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
+ (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
+ !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
// We fail condition #1, but we may still be able to satisfy condition
// #2. Add this instruction to the move list and then we will check
// if condition #2 holds once we have selected the matching instruction.
@@ -413,8 +612,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
continue;
}
- // Check same base pointer. Be careful of subregisters, which can occur with
- // vectors of pointers.
+ // Check same base pointer. Be careful of subregisters, which can occur
+ // with vectors of pointers.
if (AddrReg[i]->getReg() != AddrRegNext.getReg() ||
AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) {
Match = false;
@@ -423,13 +622,15 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
}
if (Match) {
- int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
- AMDGPU::OpName::offset);
+ int OffsetIdx =
+ AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset);
CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm();
+ CI.Width0 = getOpcodeWidth(*CI.I);
CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm();
+ CI.Width1 = getOpcodeWidth(*MBBI);
CI.Paired = MBBI;
- if (CI.InstClass == DS_READ_WRITE) {
+ if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) {
CI.Offset0 &= 0xffff;
CI.Offset1 &= 0xffff;
} else {
@@ -445,7 +646,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
// We also need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
// instruction.
- if (offsetsCanBeCombined(CI))
+ if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
return true;
}
@@ -472,12 +673,12 @@ unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const {
if (STM->ldsRequiresM0Init())
return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
- return (EltSize == 4) ?
- AMDGPU::DS_READ2ST64_B32_gfx9 : AMDGPU::DS_READ2ST64_B64_gfx9;
+ return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32_gfx9
+ : AMDGPU::DS_READ2ST64_B64_gfx9;
}
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
- CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be careful, since the addresses could be subregisters themselves in weird
@@ -489,8 +690,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
unsigned NewOffset0 = CI.Offset0;
unsigned NewOffset1 = CI.Offset1;
- unsigned Opc = CI.UseST64 ?
- read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
+ unsigned Opc =
+ CI.UseST64 ? read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
@@ -502,39 +703,40 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
}
assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
- (NewOffset0 != NewOffset1) &&
- "Computed offset doesn't fit");
+ (NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
const MCInstrDesc &Read2Desc = TII->get(Opc);
- const TargetRegisterClass *SuperRC
- = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
+ const TargetRegisterClass *SuperRC =
+ (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
unsigned DestReg = MRI->createVirtualRegister(SuperRC);
DebugLoc DL = CI.I->getDebugLoc();
unsigned BaseReg = AddrReg->getReg();
+ unsigned BaseSubReg = AddrReg->getSubReg();
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
- .addImm(CI.BaseOff);
+ .addImm(CI.BaseOff);
BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BaseRegFlags = RegState::Kill;
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
- .addReg(ImmReg)
- .addReg(AddrReg->getReg());
+ .addReg(ImmReg)
+ .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ BaseSubReg = 0;
}
MachineInstrBuilder Read2 =
- BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
- .addReg(BaseReg, BaseRegFlags) // addr
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+ BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
+ .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
(void)Read2;
@@ -561,32 +763,36 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const {
if (STM->ldsRequiresM0Init())
return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
- return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9 : AMDGPU::DS_WRITE2_B64_gfx9;
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9
+ : AMDGPU::DS_WRITE2_B64_gfx9;
}
unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const {
if (STM->ldsRequiresM0Init())
- return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32
+ : AMDGPU::DS_WRITE2ST64_B64;
- return (EltSize == 4) ?
- AMDGPU::DS_WRITE2ST64_B32_gfx9 : AMDGPU::DS_WRITE2ST64_B64_gfx9;
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32_gfx9
+ : AMDGPU::DS_WRITE2ST64_B64_gfx9;
}
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
- CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
// sure we preserve the subregister index and any register flags set on them.
- const MachineOperand *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
- const MachineOperand *Data0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
- const MachineOperand *Data1
- = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
+ const MachineOperand *AddrReg =
+ TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
+ const MachineOperand *Data0 =
+ TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
+ const MachineOperand *Data1 =
+ TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
unsigned NewOffset0 = CI.Offset0;
unsigned NewOffset1 = CI.Offset1;
- unsigned Opc = CI.UseST64 ?
- write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
+ unsigned Opc =
+ CI.UseST64 ? write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
if (NewOffset0 > NewOffset1) {
// Canonicalize the merged instruction so the smaller offset comes first.
@@ -595,36 +801,37 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
}
assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
- (NewOffset0 != NewOffset1) &&
- "Computed offset doesn't fit");
+ (NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
const MCInstrDesc &Write2Desc = TII->get(Opc);
DebugLoc DL = CI.I->getDebugLoc();
unsigned BaseReg = AddrReg->getReg();
+ unsigned BaseSubReg = AddrReg->getSubReg();
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
- .addImm(CI.BaseOff);
+ .addImm(CI.BaseOff);
BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BaseRegFlags = RegState::Kill;
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
- .addReg(ImmReg)
- .addReg(AddrReg->getReg());
+ .addReg(ImmReg)
+ .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ BaseSubReg = 0;
}
MachineInstrBuilder Write2 =
- BuildMI(*MBB, CI.Paired, DL, Write2Desc)
- .addReg(BaseReg, BaseRegFlags) // addr
- .add(*Data0) // data0
- .add(*Data1) // data1
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+ BuildMI(*MBB, CI.Paired, DL, Write2Desc)
+ .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
+ .add(*Data0) // data0
+ .add(*Data1) // data1
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
moveInstsAfter(Write2, CI.InstsToMove);
@@ -636,15 +843,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
return Next;
}
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
- CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- unsigned Opcode = CI.IsX2 ? AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM :
- AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
+ const unsigned Opcode = getNewOpcode(CI);
+
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
- const TargetRegisterClass *SuperRC =
- CI.IsX2 ? &AMDGPU::SReg_128RegClass : &AMDGPU::SReg_64_XEXECRegClass;
unsigned DestReg = MRI->createVirtualRegister(SuperRC);
unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
@@ -652,14 +858,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.GLC0) // glc
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
-
- unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
- unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
- // Handle descending offsets
- if (CI.Offset0 > CI.Offset1)
- std::swap(SubRegIdx0, SubRegIdx1);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the old destination registers.
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
@@ -681,29 +884,25 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
return Next;
}
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
- CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- unsigned Opcode;
- if (CI.InstClass == BUFFER_LOAD_OFFEN) {
- Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN :
- AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
- } else {
- Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET :
- AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
- }
+ const unsigned Opcode = getNewOpcode(CI);
- const TargetRegisterClass *SuperRC =
- CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass;
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+
+ // Copy to the new source register.
unsigned DestReg = MRI->createVirtualRegister(SuperRC);
unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
- if (CI.InstClass == BUFFER_LOAD_OFFEN)
- MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+ const unsigned Regs = getRegs(Opcode);
+
+ if (Regs & VADDR)
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -711,14 +910,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(CI.GLC0) // glc
.addImm(CI.SLC0) // slc
.addImm(0) // tfe
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
-
- unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
- unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
- // Handle descending offsets
- if (CI.Offset0 > CI.Offset1)
- std::swap(SubRegIdx0, SubRegIdx1);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the old destination registers.
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
@@ -740,57 +936,137 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
return Next;
}
-unsigned SILoadStoreOptimizer::promoteBufferStoreOpcode(
- const MachineInstr &I, bool &IsX2, bool &IsOffen) const {
- IsX2 = false;
- IsOffen = false;
-
- switch (I.getOpcode()) {
- case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
- IsOffen = true;
- return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
- case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
- IsOffen = true;
- return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN_exact;
- case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
- IsX2 = true;
- IsOffen = true;
- return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
- case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN_exact:
- IsX2 = true;
- IsOffen = true;
- return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN_exact;
- case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
- return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
- case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
- return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET_exact;
- case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
- IsX2 = true;
- return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
- case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET_exact:
- IsX2 = true;
- return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET_exact;
- }
- return 0;
-}
-
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
- CombineInfo &CI) {
+unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
+ const unsigned Width = CI.Width0 + CI.Width1;
+
+ switch (CI.InstClass) {
+ default:
+ return AMDGPU::getMUBUFOpcode(CI.InstClass, Width);
+ case UNKNOWN:
+ llvm_unreachable("Unknown instruction class");
+ case S_BUFFER_LOAD_IMM:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
+ case 4:
+ return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
+ }
+ }
+}
+
+std::pair<unsigned, unsigned>
+SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) {
+ if (CI.Offset0 > CI.Offset1) {
+ switch (CI.Width0) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub1, AMDGPU::sub0);
+ case 2:
+ return std::make_pair(AMDGPU::sub2, AMDGPU::sub0_sub1);
+ case 3:
+ return std::make_pair(AMDGPU::sub3, AMDGPU::sub0_sub1_sub2);
+ }
+ case 2:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub1_sub2, AMDGPU::sub0);
+ case 2:
+ return std::make_pair(AMDGPU::sub2_sub3, AMDGPU::sub0_sub1);
+ }
+ case 3:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0);
+ }
+ }
+ } else {
+ switch (CI.Width0) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub0, AMDGPU::sub1);
+ case 2:
+ return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2);
+ case 3:
+ return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2_sub3);
+ }
+ case 2:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2);
+ case 2:
+ return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2_sub3);
+ }
+ case 3:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub0_sub1_sub2, AMDGPU::sub3);
+ }
+ }
+ }
+}
+
+const TargetRegisterClass *
+SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) {
+ if (CI.InstClass == S_BUFFER_LOAD_IMM) {
+ switch (CI.Width0 + CI.Width1) {
+ default:
+ return nullptr;
+ case 2:
+ return &AMDGPU::SReg_64_XEXECRegClass;
+ case 4:
+ return &AMDGPU::SReg_128RegClass;
+ case 8:
+ return &AMDGPU::SReg_256RegClass;
+ case 16:
+ return &AMDGPU::SReg_512RegClass;
+ }
+ } else {
+ switch (CI.Width0 + CI.Width1) {
+ default:
+ return nullptr;
+ case 2:
+ return &AMDGPU::VReg_64RegClass;
+ case 3:
+ return &AMDGPU::VReg_96RegClass;
+ case 4:
+ return &AMDGPU::VReg_128RegClass;
+ }
+ }
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- bool Unused1, Unused2;
- unsigned Opcode = promoteBufferStoreOpcode(*CI.I, Unused1, Unused2);
- unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
- unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+ const unsigned Opcode = getNewOpcode(CI);
- // Handle descending offsets
- if (CI.Offset0 > CI.Offset1)
- std::swap(SubRegIdx0, SubRegIdx1);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the new source register.
- const TargetRegisterClass *SuperRC =
- CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass;
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
unsigned SrcReg = MRI->createVirtualRegister(SuperRC);
const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
@@ -803,18 +1079,20 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
.addImm(SubRegIdx1);
auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode))
- .addReg(SrcReg, RegState::Kill);
+ .addReg(SrcReg, RegState::Kill);
- if (CI.InstClass == BUFFER_STORE_OFFEN)
- MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+ const unsigned Regs = getRegs(Opcode);
+
+ if (Regs & VADDR)
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset0, CI.Offset1)) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
- .addImm(0) // tfe
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
moveInstsAfter(MIB, CI.InstsToMove);
@@ -824,105 +1102,399 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
return Next;
}
+MachineOperand
+SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) {
+ APInt V(32, Val, true);
+ if (TII->isInlineConstant(V))
+ return MachineOperand::CreateImm(Val);
+
+ unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ MachineInstr *Mov =
+ BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+ TII->get(AMDGPU::S_MOV_B32), Reg)
+ .addImm(Val);
+ (void)Mov;
+ LLVM_DEBUG(dbgs() << " "; Mov->dump());
+ return MachineOperand::CreateReg(Reg, false);
+}
+
+// Compute base address using Addr and return the final register.
+unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
+ const MemAddress &Addr) {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
+ DebugLoc DL = MI.getDebugLoc();
+
+ assert((TRI->getRegSizeInBits(Addr.Base.LoReg, *MRI) == 32 ||
+ Addr.Base.LoSubReg) &&
+ "Expected 32-bit Base-Register-Low!!");
+
+ assert((TRI->getRegSizeInBits(Addr.Base.HiReg, *MRI) == 32 ||
+ Addr.Base.HiSubReg) &&
+ "Expected 32-bit Base-Register-Hi!!");
+
+ LLVM_DEBUG(dbgs() << " Re-Computed Anchor-Base:\n");
+ MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
+ MachineOperand OffsetHi =
+ createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
+ unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned DeadCarryReg =
+ MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MachineInstr *LoHalf =
+ BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
+ .addReg(CarryReg, RegState::Define)
+ .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
+ .add(OffsetLo);
+ (void)LoHalf;
+ LLVM_DEBUG(dbgs() << " "; LoHalf->dump(););
+
+ MachineInstr *HiHalf =
+ BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADDC_U32_e64), DestSub1)
+ .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
+ .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg)
+ .add(OffsetHi)
+ .addReg(CarryReg, RegState::Kill);
+ (void)HiHalf;
+ LLVM_DEBUG(dbgs() << " "; HiHalf->dump(););
+
+ unsigned FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ MachineInstr *FullBase =
+ BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+ (void)FullBase;
+ LLVM_DEBUG(dbgs() << " "; FullBase->dump(); dbgs() << "\n";);
+
+ return FullDestReg;
+}
+
+// Update base and offset with the NewBase and NewOffset in MI.
+void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI,
+ unsigned NewBase,
+ int32_t NewOffset) {
+ TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase);
+ TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset);
+}
+
+Optional<int32_t>
+SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) {
+ if (Op.isImm())
+ return Op.getImm();
+
+ if (!Op.isReg())
+ return None;
+
+ MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
+ if (!Def || Def->getOpcode() != AMDGPU::S_MOV_B32 ||
+ !Def->getOperand(1).isImm())
+ return None;
+
+ return Def->getOperand(1).getImm();
+}
+
+// Analyze Base and extracts:
+// - 32bit base registers, subregisters
+// - 64bit constant offset
+// Expecting base computation as:
+// %OFFSET0:sgpr_32 = S_MOV_B32 8000
+// %LO:vgpr_32, %c:sreg_64_xexec =
+// V_ADD_I32_e64 %BASE_LO:vgpr_32, %103:sgpr_32,
+// %HI:vgpr_32, = V_ADDC_U32_e64 %BASE_HI:vgpr_32, 0, killed %c:sreg_64_xexec
+// %Base:vreg_64 =
+// REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1
+void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base,
+ MemAddress &Addr) {
+ if (!Base.isReg())
+ return;
+
+ MachineInstr *Def = MRI->getUniqueVRegDef(Base.getReg());
+ if (!Def || Def->getOpcode() != AMDGPU::REG_SEQUENCE
+ || Def->getNumOperands() != 5)
+ return;
+
+ MachineOperand BaseLo = Def->getOperand(1);
+ MachineOperand BaseHi = Def->getOperand(3);
+ if (!BaseLo.isReg() || !BaseHi.isReg())
+ return;
+
+ MachineInstr *BaseLoDef = MRI->getUniqueVRegDef(BaseLo.getReg());
+ MachineInstr *BaseHiDef = MRI->getUniqueVRegDef(BaseHi.getReg());
+
+ if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_I32_e64 ||
+ !BaseHiDef || BaseHiDef->getOpcode() != AMDGPU::V_ADDC_U32_e64)
+ return;
+
+ const auto *Src0 = TII->getNamedOperand(*BaseLoDef, AMDGPU::OpName::src0);
+ const auto *Src1 = TII->getNamedOperand(*BaseLoDef, AMDGPU::OpName::src1);
+
+ auto Offset0P = extractConstOffset(*Src0);
+ if (Offset0P)
+ BaseLo = *Src1;
+ else {
+ if (!(Offset0P = extractConstOffset(*Src1)))
+ return;
+ BaseLo = *Src0;
+ }
+
+ Src0 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src0);
+ Src1 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src1);
+
+ if (Src0->isImm())
+ std::swap(Src0, Src1);
+
+ if (!Src1->isImm())
+ return;
+
+ uint64_t Offset1 = Src1->getImm();
+ BaseHi = *Src0;
+
+ Addr.Base.LoReg = BaseLo.getReg();
+ Addr.Base.HiReg = BaseHi.getReg();
+ Addr.Base.LoSubReg = BaseLo.getSubReg();
+ Addr.Base.HiSubReg = BaseHi.getSubReg();
+ Addr.Offset = (*Offset0P & 0x00000000ffffffff) | (Offset1 << 32);
+}
+
+bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
+ MachineInstr &MI,
+ MemInfoMap &Visited,
+ SmallPtrSet<MachineInstr *, 4> &AnchorList) {
+
+ // TODO: Support flat and scratch.
+ if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0 ||
+ TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL)
+ return false;
+
+ // TODO: Support Store.
+ if (!MI.mayLoad())
+ return false;
+
+ if (AnchorList.count(&MI))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\nTryToPromoteConstantOffsetToImmFor "; MI.dump());
+
+ if (TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm()) {
+ LLVM_DEBUG(dbgs() << " Const-offset is already promoted.\n";);
+ return false;
+ }
+
+ // Step1: Find the base-registers and a 64bit constant offset.
+ MachineOperand &Base = *TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+ MemAddress MAddr;
+ if (Visited.find(&MI) == Visited.end()) {
+ processBaseWithConstOffset(Base, MAddr);
+ Visited[&MI] = MAddr;
+ } else
+ MAddr = Visited[&MI];
+
+ if (MAddr.Offset == 0) {
+ LLVM_DEBUG(dbgs() << " Failed to extract constant-offset or there are no"
+ " constant offsets that can be promoted.\n";);
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << " BASE: {" << MAddr.Base.HiReg << ", "
+ << MAddr.Base.LoReg << "} Offset: " << MAddr.Offset << "\n\n";);
+
+ // Step2: Traverse through MI's basic block and find an anchor(that has the
+ // same base-registers) with the highest 13bit distance from MI's offset.
+ // E.g. (64bit loads)
+ // bb:
+ // addr1 = &a + 4096; load1 = load(addr1, 0)
+ // addr2 = &a + 6144; load2 = load(addr2, 0)
+ // addr3 = &a + 8192; load3 = load(addr3, 0)
+ // addr4 = &a + 10240; load4 = load(addr4, 0)
+ // addr5 = &a + 12288; load5 = load(addr5, 0)
+ //
+ // Starting from the first load, the optimization will try to find a new base
+ // from which (&a + 4096) has 13 bit distance. Both &a + 6144 and &a + 8192
+ // has 13bit distance from &a + 4096. The heuristic considers &a + 8192
+ // as the new-base(anchor) because of the maximum distance which can
+ // accomodate more intermediate bases presumeably.
+ //
+ // Step3: move (&a + 8192) above load1. Compute and promote offsets from
+ // (&a + 8192) for load1, load2, load4.
+ // addr = &a + 8192
+ // load1 = load(addr, -4096)
+ // load2 = load(addr, -2048)
+ // load3 = load(addr, 0)
+ // load4 = load(addr, 2048)
+ // addr5 = &a + 12288; load5 = load(addr5, 0)
+ //
+ MachineInstr *AnchorInst = nullptr;
+ MemAddress AnchorAddr;
+ uint32_t MaxDist = std::numeric_limits<uint32_t>::min();
+ SmallVector<std::pair<MachineInstr *, int64_t>, 4> InstsWCommonBase;
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator E = MBB->end();
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
+ ++MBBI;
+ const SITargetLowering *TLI =
+ static_cast<const SITargetLowering *>(STM->getTargetLowering());
+
+ for ( ; MBBI != E; ++MBBI) {
+ MachineInstr &MINext = *MBBI;
+ // TODO: Support finding an anchor(with same base) from store addresses or
+ // any other load addresses where the opcodes are different.
+ if (MINext.getOpcode() != MI.getOpcode() ||
+ TII->getNamedOperand(MINext, AMDGPU::OpName::offset)->getImm())
+ continue;
+
+ const MachineOperand &BaseNext =
+ *TII->getNamedOperand(MINext, AMDGPU::OpName::vaddr);
+ MemAddress MAddrNext;
+ if (Visited.find(&MINext) == Visited.end()) {
+ processBaseWithConstOffset(BaseNext, MAddrNext);
+ Visited[&MINext] = MAddrNext;
+ } else
+ MAddrNext = Visited[&MINext];
+
+ if (MAddrNext.Base.LoReg != MAddr.Base.LoReg ||
+ MAddrNext.Base.HiReg != MAddr.Base.HiReg ||
+ MAddrNext.Base.LoSubReg != MAddr.Base.LoSubReg ||
+ MAddrNext.Base.HiSubReg != MAddr.Base.HiSubReg)
+ continue;
+
+ InstsWCommonBase.push_back(std::make_pair(&MINext, MAddrNext.Offset));
+
+ int64_t Dist = MAddr.Offset - MAddrNext.Offset;
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = Dist;
+ if (TLI->isLegalGlobalAddressingMode(AM) &&
+ (uint32_t)std::abs(Dist) > MaxDist) {
+ MaxDist = std::abs(Dist);
+
+ AnchorAddr = MAddrNext;
+ AnchorInst = &MINext;
+ }
+ }
+
+ if (AnchorInst) {
+ LLVM_DEBUG(dbgs() << " Anchor-Inst(with max-distance from Offset): ";
+ AnchorInst->dump());
+ LLVM_DEBUG(dbgs() << " Anchor-Offset from BASE: "
+ << AnchorAddr.Offset << "\n\n");
+
+ // Instead of moving up, just re-compute anchor-instruction's base address.
+ unsigned Base = computeBase(MI, AnchorAddr);
+
+ updateBaseAndOffset(MI, Base, MAddr.Offset - AnchorAddr.Offset);
+ LLVM_DEBUG(dbgs() << " After promotion: "; MI.dump(););
+
+ for (auto P : InstsWCommonBase) {
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = P.second - AnchorAddr.Offset;
+
+ if (TLI->isLegalGlobalAddressingMode(AM)) {
+ LLVM_DEBUG(dbgs() << " Promote Offset(" << P.second;
+ dbgs() << ")"; P.first->dump());
+ updateBaseAndOffset(*P.first, Base, P.second - AnchorAddr.Offset);
+ LLVM_DEBUG(dbgs() << " After promotion: "; P.first->dump());
+ }
+ }
+ AnchorList.insert(AnchorInst);
+ return true;
+ }
+
+ return false;
+}
+
// Scan through looking for adjacent LDS operations with constant offsets from
// the same base register. We rely on the scheduler to do the hard work of
// clustering nearby loads, and assume these are all adjacent.
bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
bool Modified = false;
+ // Contain the list
+ MemInfoMap Visited;
+ // Contains the list of instructions for which constant offsets are being
+ // promoted to the IMM.
+ SmallPtrSet<MachineInstr *, 4> AnchorList;
+
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
MachineInstr &MI = *I;
+ if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
+ Modified = true;
+
// Don't combine if volatile.
if (MI.hasOrderedMemoryRef()) {
++I;
continue;
}
+ const unsigned Opc = MI.getOpcode();
+
CombineInfo CI;
CI.I = I;
- unsigned Opc = MI.getOpcode();
- if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64 ||
- Opc == AMDGPU::DS_READ_B32_gfx9 || Opc == AMDGPU::DS_READ_B64_gfx9) {
+ CI.InstClass = getInstClass(Opc);
- CI.InstClass = DS_READ_WRITE;
+ switch (CI.InstClass) {
+ default:
+ break;
+ case DS_READ:
CI.EltSize =
- (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 : 4;
-
+ (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8
+ : 4;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeRead2Pair(CI);
} else {
++I;
}
-
continue;
- } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64 ||
- Opc == AMDGPU::DS_WRITE_B32_gfx9 ||
- Opc == AMDGPU::DS_WRITE_B64_gfx9) {
- CI.InstClass = DS_READ_WRITE;
- CI.EltSize
- = (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 : 4;
-
+ case DS_WRITE:
+ CI.EltSize =
+ (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8
+ : 4;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeWrite2Pair(CI);
} else {
++I;
}
-
continue;
- }
- if (Opc == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
- Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM) {
- // EltSize is in units of the offset encoding.
- CI.InstClass = S_BUFFER_LOAD_IMM;
+ case S_BUFFER_LOAD_IMM:
CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4);
- CI.IsX2 = Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeSBufferLoadImmPair(CI);
- if (!CI.IsX2)
- CreatedX2++;
+ OptimizeAgain |= (CI.Width0 + CI.Width1) < 16;
} else {
++I;
}
continue;
- }
- if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN ||
- Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN ||
- Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFSET ||
- Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET) {
- if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN ||
- Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN)
- CI.InstClass = BUFFER_LOAD_OFFEN;
- else
- CI.InstClass = BUFFER_LOAD_OFFSET;
-
+ case BUFFER_LOAD_OFFEN:
+ case BUFFER_LOAD_OFFSET:
+ case BUFFER_LOAD_OFFEN_exact:
+ case BUFFER_LOAD_OFFSET_exact:
CI.EltSize = 4;
- CI.IsX2 = Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN ||
- Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeBufferLoadPair(CI);
- if (!CI.IsX2)
- CreatedX2++;
+ OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
} else {
++I;
}
continue;
- }
-
- bool StoreIsX2, IsOffen;
- if (promoteBufferStoreOpcode(*I, StoreIsX2, IsOffen)) {
- CI.InstClass = IsOffen ? BUFFER_STORE_OFFEN : BUFFER_STORE_OFFSET;
+ case BUFFER_STORE_OFFEN:
+ case BUFFER_STORE_OFFSET:
+ case BUFFER_STORE_OFFEN_exact:
+ case BUFFER_STORE_OFFSET_exact:
CI.EltSize = 4;
- CI.IsX2 = StoreIsX2;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeBufferStorePair(CI);
- if (!CI.IsX2)
- CreatedX2++;
+ OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
} else {
++I;
}
@@ -956,12 +1528,10 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Modified = false;
for (MachineBasicBlock &MBB : MF) {
- CreatedX2 = 0;
- Modified |= optimizeBlock(MBB);
-
- // Run again to convert x2 to x4.
- if (CreatedX2 >= 1)
+ do {
+ OptimizeAgain = false;
Modified |= optimizeBlock(MBB);
+ } while (OptimizeAgain);
}
return Modified;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index ad30317c344c..1aa1feebbdae 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -85,9 +85,7 @@ private:
void emitIf(MachineInstr &MI);
void emitElse(MachineInstr &MI);
- void emitBreak(MachineInstr &MI);
void emitIfBreak(MachineInstr &MI);
- void emitElseBreak(MachineInstr &MI);
void emitLoop(MachineInstr &MI);
void emitEndCf(MachineInstr &MI);
@@ -329,20 +327,6 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
}
-void SILowerControlFlow::emitBreak(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- const DebugLoc &DL = MI.getDebugLoc();
- unsigned Dst = MI.getOperand(0).getReg();
-
- MachineInstr *Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
- .addReg(AMDGPU::EXEC)
- .add(MI.getOperand(1));
-
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *Or);
- MI.eraseFromParent();
-}
-
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
@@ -384,11 +368,6 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
MI.eraseFromParent();
}
-void SILowerControlFlow::emitElseBreak(MachineInstr &MI) {
- // Lowered in the same way as emitIfBreak above.
- emitIfBreak(MI);
-}
-
void SILowerControlFlow::emitLoop(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
@@ -515,18 +494,10 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
emitElse(MI);
break;
- case AMDGPU::SI_BREAK:
- emitBreak(MI);
- break;
-
case AMDGPU::SI_IF_BREAK:
emitIfBreak(MI);
break;
- case AMDGPU::SI_ELSE_BREAK:
- emitElseBreak(MI);
- break;
-
case AMDGPU::SI_LOOP:
emitLoop(MI);
break;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index ecc6cff407e1..eb038bb5d5fc 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -5,37 +5,61 @@
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-/// i1 values are usually inserted by the CFG Structurize pass and they are
-/// unique in that they can be copied from VALU to SALU registers.
-/// This is not possible for any other value type. Since there are no
-/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
-///
//===----------------------------------------------------------------------===//
//
+// This pass lowers all occurrences of i1 values (with a vreg_1 register class)
+// to lane masks (64-bit scalar registers). The pass assumes machine SSA form
+// and a wave-level control flow graph.
+//
+// Before this pass, values that are semantically i1 and are defined and used
+// within the same basic block are already represented as lane masks in scalar
+// registers. However, values that cross basic blocks are always transferred
+// between basic blocks in vreg_1 virtual registers and are lowered by this
+// pass.
+//
+// The only instructions that use or define vreg_1 virtual registers are COPY,
+// PHI, and IMPLICIT_DEF.
+//
+//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "si-i1-copies"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPULaneDominator.h"
-#include "llvm/CodeGen/LiveIntervals.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
+#define DEBUG_TYPE "si-i1-copies"
+
using namespace llvm;
+static unsigned createLaneMaskReg(MachineFunction &MF);
+static unsigned insertUndefLaneMask(MachineBasicBlock &MBB);
+
namespace {
class SILowerI1Copies : public MachineFunctionPass {
public:
static char ID;
+private:
+ MachineFunction *MF = nullptr;
+ MachineDominatorTree *DT = nullptr;
+ MachinePostDominatorTree *PDT = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const GCNSubtarget *ST = nullptr;
+ const SIInstrInfo *TII = nullptr;
+
+ DenseSet<unsigned> ConstrainRegs;
+
public:
SILowerI1Copies() : MachineFunctionPass(ID) {
initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
@@ -47,14 +71,337 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+private:
+ void lowerCopiesFromI1();
+ void lowerPhis();
+ void lowerCopiesToI1();
+ bool isConstantLaneMask(unsigned Reg, bool &Val) const;
+ void buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned DstReg, unsigned PrevReg, unsigned CurReg);
+ MachineBasicBlock::iterator
+ getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+
+ bool isLaneMaskReg(unsigned Reg) const {
+ return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
+ TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
+ ST->getWavefrontSize();
+ }
+};
+
+/// Helper class that determines the relationship between incoming values of a
+/// phi in the control flow graph to determine where an incoming value can
+/// simply be taken as a scalar lane mask as-is, and where it needs to be
+/// merged with another, previously defined lane mask.
+///
+/// The approach is as follows:
+/// - Determine all basic blocks which, starting from the incoming blocks,
+/// a wave may reach before entering the def block (the block containing the
+/// phi).
+/// - If an incoming block has no predecessors in this set, we can take the
+/// incoming value as a scalar lane mask as-is.
+/// -- A special case of this is when the def block has a self-loop.
+/// - Otherwise, the incoming value needs to be merged with a previously
+/// defined lane mask.
+/// - If there is a path into the set of reachable blocks that does _not_ go
+/// through an incoming block where we can take the scalar lane mask as-is,
+/// we need to invent an available value for the SSAUpdater. Choices are
+/// 0 and undef, with differing consequences for how to merge values etc.
+///
+/// TODO: We could use region analysis to quickly skip over SESE regions during
+/// the traversal.
+///
+class PhiIncomingAnalysis {
+ MachinePostDominatorTree &PDT;
+
+ // For each reachable basic block, whether it is a source in the induced
+ // subgraph of the CFG.
+ DenseMap<MachineBasicBlock *, bool> ReachableMap;
+ SmallVector<MachineBasicBlock *, 4> ReachableOrdered;
+ SmallVector<MachineBasicBlock *, 4> Stack;
+ SmallVector<MachineBasicBlock *, 4> Predecessors;
+
+public:
+ PhiIncomingAnalysis(MachinePostDominatorTree &PDT) : PDT(PDT) {}
+
+ /// Returns whether \p MBB is a source in the induced subgraph of reachable
+ /// blocks.
+ bool isSource(MachineBasicBlock &MBB) const {
+ return ReachableMap.find(&MBB)->second;
+ }
+
+ ArrayRef<MachineBasicBlock *> predecessors() const { return Predecessors; }
+
+ void analyze(MachineBasicBlock &DefBlock,
+ ArrayRef<MachineBasicBlock *> IncomingBlocks) {
+ assert(Stack.empty());
+ ReachableMap.clear();
+ ReachableOrdered.clear();
+ Predecessors.clear();
+
+ // Insert the def block first, so that it acts as an end point for the
+ // traversal.
+ ReachableMap.try_emplace(&DefBlock, false);
+ ReachableOrdered.push_back(&DefBlock);
+
+ for (MachineBasicBlock *MBB : IncomingBlocks) {
+ if (MBB == &DefBlock) {
+ ReachableMap[&DefBlock] = true; // self-loop on DefBlock
+ continue;
+ }
+
+ ReachableMap.try_emplace(MBB, false);
+ ReachableOrdered.push_back(MBB);
+
+ // If this block has a divergent terminator and the def block is its
+ // post-dominator, the wave may first visit the other successors.
+ bool Divergent = false;
+ for (MachineInstr &MI : MBB->terminators()) {
+ if (MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
+ MI.getOpcode() == AMDGPU::SI_IF ||
+ MI.getOpcode() == AMDGPU::SI_ELSE ||
+ MI.getOpcode() == AMDGPU::SI_LOOP) {
+ Divergent = true;
+ break;
+ }
+ }
+
+ if (Divergent && PDT.dominates(&DefBlock, MBB)) {
+ for (MachineBasicBlock *Succ : MBB->successors())
+ Stack.push_back(Succ);
+ }
+ }
+
+ while (!Stack.empty()) {
+ MachineBasicBlock *MBB = Stack.pop_back_val();
+ if (!ReachableMap.try_emplace(MBB, false).second)
+ continue;
+ ReachableOrdered.push_back(MBB);
+
+ for (MachineBasicBlock *Succ : MBB->successors())
+ Stack.push_back(Succ);
+ }
+
+ for (MachineBasicBlock *MBB : ReachableOrdered) {
+ bool HaveReachablePred = false;
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (ReachableMap.count(Pred)) {
+ HaveReachablePred = true;
+ } else {
+ Stack.push_back(Pred);
+ }
+ }
+ if (!HaveReachablePred)
+ ReachableMap[MBB] = true;
+ if (HaveReachablePred) {
+ for (MachineBasicBlock *UnreachablePred : Stack) {
+ if (llvm::find(Predecessors, UnreachablePred) == Predecessors.end())
+ Predecessors.push_back(UnreachablePred);
+ }
+ }
+ Stack.clear();
+ }
+ }
+};
+
+/// Helper class that detects loops which require us to lower an i1 COPY into
+/// bitwise manipulation.
+///
+/// Unfortunately, we cannot use LoopInfo because LoopInfo does not distinguish
+/// between loops with the same header. Consider this example:
+///
+/// A-+-+
+/// | | |
+/// B-+ |
+/// | |
+/// C---+
+///
+/// A is the header of a loop containing A, B, and C as far as LoopInfo is
+/// concerned. However, an i1 COPY in B that is used in C must be lowered to
+/// bitwise operations to combine results from different loop iterations when
+/// B has a divergent branch (since by default we will compile this code such
+/// that threads in a wave are merged at the entry of C).
+///
+/// The following rule is implemented to determine whether bitwise operations
+/// are required: use the bitwise lowering for a def in block B if a backward
+/// edge to B is reachable without going through the nearest common
+/// post-dominator of B and all uses of the def.
+///
+/// TODO: This rule is conservative because it does not check whether the
+/// relevant branches are actually divergent.
+///
+/// The class is designed to cache the CFG traversal so that it can be re-used
+/// for multiple defs within the same basic block.
+///
+/// TODO: We could use region analysis to quickly skip over SESE regions during
+/// the traversal.
+///
+class LoopFinder {
+ MachineDominatorTree &DT;
+ MachinePostDominatorTree &PDT;
+
+ // All visited / reachable block, tagged by level (level 0 is the def block,
+ // level 1 are all blocks reachable including but not going through the def
+ // block's IPDOM, etc.).
+ DenseMap<MachineBasicBlock *, unsigned> Visited;
+
+ // Nearest common dominator of all visited blocks by level (level 0 is the
+ // def block). Used for seeding the SSAUpdater.
+ SmallVector<MachineBasicBlock *, 4> CommonDominators;
+
+ // Post-dominator of all visited blocks.
+ MachineBasicBlock *VisitedPostDom = nullptr;
+
+ // Level at which a loop was found: 0 is not possible; 1 = a backward edge is
+ // reachable without going through the IPDOM of the def block (if the IPDOM
+ // itself has an edge to the def block, the loop level is 2), etc.
+ unsigned FoundLoopLevel = ~0u;
+
+ MachineBasicBlock *DefBlock = nullptr;
+ SmallVector<MachineBasicBlock *, 4> Stack;
+ SmallVector<MachineBasicBlock *, 4> NextLevel;
+
+public:
+ LoopFinder(MachineDominatorTree &DT, MachinePostDominatorTree &PDT)
+ : DT(DT), PDT(PDT) {}
+
+ void initialize(MachineBasicBlock &MBB) {
+ Visited.clear();
+ CommonDominators.clear();
+ Stack.clear();
+ NextLevel.clear();
+ VisitedPostDom = nullptr;
+ FoundLoopLevel = ~0u;
+
+ DefBlock = &MBB;
+ }
+
+ /// Check whether a backward edge can be reached without going through the
+ /// given \p PostDom of the def block.
+ ///
+ /// Return the level of \p PostDom if a loop was found, or 0 otherwise.
+ unsigned findLoop(MachineBasicBlock *PostDom) {
+ MachineDomTreeNode *PDNode = PDT.getNode(DefBlock);
+
+ if (!VisitedPostDom)
+ advanceLevel();
+
+ unsigned Level = 0;
+ while (PDNode->getBlock() != PostDom) {
+ if (PDNode->getBlock() == VisitedPostDom)
+ advanceLevel();
+ PDNode = PDNode->getIDom();
+ Level++;
+ if (FoundLoopLevel == Level)
+ return Level;
+ }
+
+ return 0;
+ }
+
+ /// Add undef values dominating the loop and the optionally given additional
+ /// blocks, so that the SSA updater doesn't have to search all the way to the
+ /// function entry.
+ void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
+ ArrayRef<MachineBasicBlock *> Blocks = {}) {
+ assert(LoopLevel < CommonDominators.size());
+
+ MachineBasicBlock *Dom = CommonDominators[LoopLevel];
+ for (MachineBasicBlock *MBB : Blocks)
+ Dom = DT.findNearestCommonDominator(Dom, MBB);
+
+ if (!inLoopLevel(*Dom, LoopLevel, Blocks)) {
+ SSAUpdater.AddAvailableValue(Dom, insertUndefLaneMask(*Dom));
+ } else {
+ // The dominator is part of the loop or the given blocks, so add the
+ // undef value to unreachable predecessors instead.
+ for (MachineBasicBlock *Pred : Dom->predecessors()) {
+ if (!inLoopLevel(*Pred, LoopLevel, Blocks))
+ SSAUpdater.AddAvailableValue(Pred, insertUndefLaneMask(*Pred));
+ }
+ }
+ }
+
+private:
+ bool inLoopLevel(MachineBasicBlock &MBB, unsigned LoopLevel,
+ ArrayRef<MachineBasicBlock *> Blocks) const {
+ auto DomIt = Visited.find(&MBB);
+ if (DomIt != Visited.end() && DomIt->second <= LoopLevel)
+ return true;
+
+ if (llvm::find(Blocks, &MBB) != Blocks.end())
+ return true;
+
+ return false;
+ }
+
+ void advanceLevel() {
+ MachineBasicBlock *VisitedDom;
+
+ if (!VisitedPostDom) {
+ VisitedPostDom = DefBlock;
+ VisitedDom = DefBlock;
+ Stack.push_back(DefBlock);
+ } else {
+ VisitedPostDom = PDT.getNode(VisitedPostDom)->getIDom()->getBlock();
+ VisitedDom = CommonDominators.back();
+
+ for (unsigned i = 0; i < NextLevel.size();) {
+ if (PDT.dominates(VisitedPostDom, NextLevel[i])) {
+ Stack.push_back(NextLevel[i]);
+
+ NextLevel[i] = NextLevel.back();
+ NextLevel.pop_back();
+ } else {
+ i++;
+ }
+ }
+ }
+
+ unsigned Level = CommonDominators.size();
+ while (!Stack.empty()) {
+ MachineBasicBlock *MBB = Stack.pop_back_val();
+ if (!PDT.dominates(VisitedPostDom, MBB))
+ NextLevel.push_back(MBB);
+
+ Visited[MBB] = Level;
+ VisitedDom = DT.findNearestCommonDominator(VisitedDom, MBB);
+
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ == DefBlock) {
+ if (MBB == VisitedPostDom)
+ FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
+ else
+ FoundLoopLevel = std::min(FoundLoopLevel, Level);
+ continue;
+ }
+
+ if (Visited.try_emplace(Succ, ~0u).second) {
+ if (MBB == VisitedPostDom)
+ NextLevel.push_back(Succ);
+ else
+ Stack.push_back(Succ);
+ }
+ }
+ }
+
+ CommonDominators.push_back(VisitedDom);
+ }
};
} // End anonymous namespace.
-INITIALIZE_PASS(SILowerI1Copies, DEBUG_TYPE,
- "SI Lower i1 Copies", false, false)
+INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false,
+ false)
char SILowerI1Copies::ID = 0;
@@ -64,104 +411,415 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
return new SILowerI1Copies();
}
-bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
+static unsigned createLaneMaskReg(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
+ return MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+}
+
+static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
+ MachineFunction &MF = *MBB.getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ unsigned UndefReg = createLaneMaskReg(MF);
+ BuildMI(MBB, MBB.getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
+ UndefReg);
+ return UndefReg;
+}
- std::vector<unsigned> I1Defs;
+/// Lower all instructions that def or use vreg_1 registers.
+///
+/// In a first pass, we lower COPYs from vreg_1 to vector registers, as can
+/// occur around inline assembly. We do this first, before vreg_1 registers
+/// are changed to scalar mask registers.
+///
+/// Then we lower all defs of vreg_1 registers. Phi nodes are lowered before
+/// all others, because phi lowering looks through copies and can therefore
+/// often make copy lowering unnecessary.
+bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
+ MF = &TheMF;
+ MRI = &MF->getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
+ ST = &MF->getSubtarget<GCNSubtarget>();
+ TII = ST->getInstrInfo();
- MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MI = *I;
+ lowerCopiesFromI1();
+ lowerPhis();
+ lowerCopiesToI1();
- if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) {
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- if (RC == &AMDGPU::VReg_1RegClass)
- MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass);
- continue;
- }
+ for (unsigned Reg : ConstrainRegs)
+ MRI->constrainRegClass(Reg, &AMDGPU::SReg_64_XEXECRegClass);
+ ConstrainRegs.clear();
+
+ return true;
+}
+void SILowerI1Copies::lowerCopiesFromI1() {
+ SmallVector<MachineInstr *, 4> DeadCopies;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
if (MI.getOpcode() != AMDGPU::COPY)
continue;
- const MachineOperand &Dst = MI.getOperand(0);
- const MachineOperand &Src = MI.getOperand(1);
-
- if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) ||
- !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
continue;
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
- const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
+ if (isLaneMaskReg(DstReg) ||
+ (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
+ continue;
+ // Copy into a 32-bit vector register.
+ LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI);
DebugLoc DL = MI.getDebugLoc();
- MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
- if (DstRC == &AMDGPU::VReg_1RegClass &&
- TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
- I1Defs.push_back(Dst.getReg());
-
- if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
- if (DefInst->getOperand(1).isImm()) {
- I1Defs.push_back(Dst.getReg());
-
- int64_t Val = DefInst->getOperand(1).getImm();
- assert(Val == 0 || Val == -1);
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
- .add(Dst)
- .addImm(Val);
- MI.eraseFromParent();
- continue;
+
+ assert(TII->getRegisterInfo().getRegSizeInBits(DstReg, *MRI) == 32);
+ assert(!MI.getOperand(0).getSubReg());
+
+ ConstrainRegs.insert(SrcReg);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
+ .addImm(-1)
+ .addReg(SrcReg);
+ DeadCopies.push_back(&MI);
+ }
+
+ for (MachineInstr *MI : DeadCopies)
+ MI->eraseFromParent();
+ DeadCopies.clear();
+ }
+}
+
+void SILowerI1Copies::lowerPhis() {
+ MachineSSAUpdater SSAUpdater(*MF);
+ LoopFinder LF(*DT, *PDT);
+ PhiIncomingAnalysis PIA(*PDT);
+ SmallVector<MachineInstr *, 4> DeadPhis;
+ SmallVector<MachineBasicBlock *, 4> IncomingBlocks;
+ SmallVector<unsigned, 4> IncomingRegs;
+ SmallVector<unsigned, 4> IncomingUpdated;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ LF.initialize(MBB);
+
+ for (MachineInstr &MI : MBB.phis()) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
+
+ MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+
+ // Collect incoming values.
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ assert(i + 1 < MI.getNumOperands());
+ unsigned IncomingReg = MI.getOperand(i).getReg();
+ MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
+ MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
+
+ if (IncomingDef->getOpcode() == AMDGPU::COPY) {
+ IncomingReg = IncomingDef->getOperand(1).getReg();
+ assert(isLaneMaskReg(IncomingReg));
+ assert(!IncomingDef->getOperand(1).getSubReg());
+ } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ continue;
+ } else {
+ assert(IncomingDef->isPHI());
+ }
+
+ IncomingBlocks.push_back(IncomingMBB);
+ IncomingRegs.push_back(IncomingReg);
+ }
+
+ // Phis in a loop that are observed outside the loop receive a simple but
+ // conservatively correct treatment.
+ MachineBasicBlock *PostDomBound = &MBB;
+ for (MachineInstr &Use : MRI->use_instructions(DstReg)) {
+ PostDomBound =
+ PDT->findNearestCommonDominator(PostDomBound, Use.getParent());
+ }
+
+ unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
+
+ SSAUpdater.Initialize(DstReg);
+
+ if (FoundLoopLevel) {
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks);
+
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ IncomingUpdated.push_back(createLaneMaskReg(*MF));
+ SSAUpdater.AddAvailableValue(IncomingBlocks[i],
+ IncomingUpdated.back());
+ }
+
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ buildMergeLaneMasks(
+ IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
+ SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
+ }
+ } else {
+ // The phi is not observed from outside a loop. Use a more accurate
+ // lowering.
+ PIA.analyze(MBB, IncomingBlocks);
+
+ for (MachineBasicBlock *MBB : PIA.predecessors())
+ SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB));
+
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ if (PIA.isSource(IMBB)) {
+ IncomingUpdated.push_back(0);
+ SSAUpdater.AddAvailableValue(&IMBB, IncomingRegs[i]);
+ } else {
+ IncomingUpdated.push_back(createLaneMaskReg(*MF));
+ SSAUpdater.AddAvailableValue(&IMBB, IncomingUpdated.back());
}
}
- unsigned int TmpSrc = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::COPY), TmpSrc)
- .add(Src);
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
- .add(Dst)
- .addImm(0)
- .addImm(-1)
- .addReg(TmpSrc);
- MI.eraseFromParent();
- } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
- SrcRC == &AMDGPU::VReg_1RegClass) {
- if (DefInst->getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
- DefInst->getOperand(1).isImm() && DefInst->getOperand(2).isImm() &&
- DefInst->getOperand(1).getImm() == 0 &&
- DefInst->getOperand(2).getImm() != 0 &&
- DefInst->getOperand(3).isReg() &&
- TargetRegisterInfo::isVirtualRegister(
- DefInst->getOperand(3).getReg()) &&
- TRI->getCommonSubClass(
- MRI.getRegClass(DefInst->getOperand(3).getReg()),
- &AMDGPU::SGPR_64RegClass) &&
- AMDGPU::laneDominates(DefInst->getParent(), &MBB)) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64))
- .add(Dst)
- .addReg(AMDGPU::EXEC)
- .add(DefInst->getOperand(3));
- } else {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64))
- .add(Dst)
- .add(Src)
- .addImm(0);
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ if (!IncomingUpdated[i])
+ continue;
+
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ buildMergeLaneMasks(
+ IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
+ SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
}
- MI.eraseFromParent();
+ }
+
+ unsigned NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
+ if (NewReg != DstReg) {
+ MRI->replaceRegWith(NewReg, DstReg);
+
+ // Ensure that DstReg has a single def and mark the old PHI node for
+ // deletion.
+ MI.getOperand(0).setReg(NewReg);
+ DeadPhis.push_back(&MI);
+ }
+
+ IncomingBlocks.clear();
+ IncomingRegs.clear();
+ IncomingUpdated.clear();
+ }
+
+ for (MachineInstr *MI : DeadPhis)
+ MI->eraseFromParent();
+ DeadPhis.clear();
+ }
+}
+
+void SILowerI1Copies::lowerCopiesToI1() {
+ MachineSSAUpdater SSAUpdater(*MF);
+ LoopFinder LF(*DT, *PDT);
+ SmallVector<MachineInstr *, 4> DeadCopies;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ LF.initialize(MBB);
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
+ MI.getOpcode() != AMDGPU::COPY)
+ continue;
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ continue;
+
+ if (MRI->use_empty(DstReg)) {
+ DeadCopies.push_back(&MI);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
+
+ MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+ if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
+ continue;
+
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ assert(!MI.getOperand(1).getSubReg());
+
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !isLaneMaskReg(SrcReg)) {
+ assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
+ unsigned TmpReg = createLaneMaskReg(*MF);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ MI.getOperand(1).setReg(TmpReg);
+ SrcReg = TmpReg;
+ }
+
+ // Defs in a loop that are observed outside the loop must be transformed
+ // into appropriate bit manipulation.
+ MachineBasicBlock *PostDomBound = &MBB;
+ for (MachineInstr &Use : MRI->use_instructions(DstReg)) {
+ PostDomBound =
+ PDT->findNearestCommonDominator(PostDomBound, Use.getParent());
+ }
+
+ unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
+ if (FoundLoopLevel) {
+ SSAUpdater.Initialize(DstReg);
+ SSAUpdater.AddAvailableValue(&MBB, DstReg);
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater);
+
+ buildMergeLaneMasks(MBB, MI, DL, DstReg,
+ SSAUpdater.GetValueInMiddleOfBlock(&MBB), SrcReg);
+ DeadCopies.push_back(&MI);
}
}
+
+ for (MachineInstr *MI : DeadCopies)
+ MI->eraseFromParent();
+ DeadCopies.clear();
}
+}
- for (unsigned Reg : I1Defs)
- MRI.setRegClass(Reg, &AMDGPU::VGPR_32RegClass);
+bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
+ const MachineInstr *MI;
+ for (;;) {
+ MI = MRI->getUniqueVRegDef(Reg);
+ if (MI->getOpcode() != AMDGPU::COPY)
+ break;
+
+ Reg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (!isLaneMaskReg(Reg))
+ return false;
+ }
+
+ if (MI->getOpcode() != AMDGPU::S_MOV_B64)
+ return false;
+
+ if (!MI->getOperand(1).isImm())
+ return false;
+
+ int64_t Imm = MI->getOperand(1).getImm();
+ if (Imm == 0) {
+ Val = false;
+ return true;
+ }
+ if (Imm == -1) {
+ Val = true;
+ return true;
+ }
return false;
}
+
+static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
+ Def = false;
+ Use = false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
+ if (MO.isUse())
+ Use = true;
+ else
+ Def = true;
+ }
+ }
+}
+
+/// Return a point at the end of the given \p MBB to insert SALU instructions
+/// for lane mask calculation. Take terminators and SCC into account.
+MachineBasicBlock::iterator
+SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
+ auto InsertionPt = MBB.getFirstTerminator();
+ bool TerminatorsUseSCC = false;
+ for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) {
+ bool DefsSCC;
+ instrDefsUsesSCC(*I, DefsSCC, TerminatorsUseSCC);
+ if (TerminatorsUseSCC || DefsSCC)
+ break;
+ }
+
+ if (!TerminatorsUseSCC)
+ return InsertionPt;
+
+ while (InsertionPt != MBB.begin()) {
+ InsertionPt--;
+
+ bool DefSCC, UseSCC;
+ instrDefsUsesSCC(*InsertionPt, DefSCC, UseSCC);
+ if (DefSCC)
+ return InsertionPt;
+ }
+
+ // We should have at least seen an IMPLICIT_DEF or COPY
+ llvm_unreachable("SCC used by terminator but no def in block");
+}
+
+void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DstReg,
+ unsigned PrevReg, unsigned CurReg) {
+ bool PrevVal;
+ bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
+ bool CurVal;
+ bool CurConstant = isConstantLaneMask(CurReg, CurVal);
+
+ if (PrevConstant && CurConstant) {
+ if (PrevVal == CurVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
+ } else if (CurVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(AMDGPU::EXEC);
+ } else {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), DstReg)
+ .addReg(AMDGPU::EXEC)
+ .addImm(-1);
+ }
+ return;
+ }
+
+ unsigned PrevMaskedReg = 0;
+ unsigned CurMaskedReg = 0;
+ if (!PrevConstant) {
+ if (CurConstant && CurVal) {
+ PrevMaskedReg = PrevReg;
+ } else {
+ PrevMaskedReg = createLaneMaskReg(*MF);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ANDN2_B64), PrevMaskedReg)
+ .addReg(PrevReg)
+ .addReg(AMDGPU::EXEC);
+ }
+ }
+ if (!CurConstant) {
+ // TODO: check whether CurReg is already masked by EXEC
+ if (PrevConstant && PrevVal) {
+ CurMaskedReg = CurReg;
+ } else {
+ CurMaskedReg = createLaneMaskReg(*MF);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), CurMaskedReg)
+ .addReg(CurReg)
+ .addReg(AMDGPU::EXEC);
+ }
+ }
+
+ if (PrevConstant && !PrevVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
+ .addReg(CurMaskedReg);
+ } else if (CurConstant && !CurVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
+ .addReg(PrevMaskedReg);
+ } else if (PrevConstant && PrevVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ORN2_B64), DstReg)
+ .addReg(CurMaskedReg)
+ .addReg(AMDGPU::EXEC);
+ } else {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_OR_B64), DstReg)
+ .addReg(PrevMaskedReg)
+ .addReg(CurMaskedReg ? CurMaskedReg : (unsigned)AMDGPU::EXEC);
+ }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 0d5ff75e37ed..181cc41bd5ff 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -117,7 +117,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- bool MaySpill = ST.isVGPRSpillingEnabled(F);
bool HasStackObjects = FrameInfo.hasStackObjects();
if (isEntryFunction()) {
@@ -126,21 +125,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (WorkItemIDZ)
WorkItemIDY = true;
- if (HasStackObjects || MaySpill) {
- PrivateSegmentWaveByteOffset = true;
+ PrivateSegmentWaveByteOffset = true;
// HS and GS always have the scratch wave offset in SGPR5 on GFX9.
if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
(CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
- ArgInfo.PrivateSegmentWaveByteOffset
- = ArgDescriptor::createRegister(AMDGPU::SGPR5);
- }
+ ArgInfo.PrivateSegmentWaveByteOffset =
+ ArgDescriptor::createRegister(AMDGPU::SGPR5);
}
- bool IsCOV2 = ST.isAmdCodeObjectV2(F);
- if (IsCOV2) {
- if (HasStackObjects || MaySpill)
- PrivateSegmentBuffer = true;
+ bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
+ if (isAmdHsaOrMesa) {
+ PrivateSegmentBuffer = true;
if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
DispatchPtr = true;
@@ -151,14 +147,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F.hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true;
} else if (ST.isMesaGfxShader(F)) {
- if (HasStackObjects || MaySpill)
- ImplicitBufferPtr = true;
+ ImplicitBufferPtr = true;
}
if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
KernargSegmentPtr = true;
- if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+ if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
// TODO: This could be refined a lot. The attribute is a poor way of
// detecting calls that may require it before argument lowering.
if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 18754442898f..fb7e670068fe 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -471,7 +471,7 @@ void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(DAG);
+ DAG->dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -611,13 +611,11 @@ void SIScheduleBlock::printDebug(bool full) {
dbgs() << "\nInstructions:\n";
if (!Scheduled) {
- for (SUnit* SU : SUnits) {
- SU->dump(DAG);
- }
+ for (const SUnit* SU : SUnits)
+ DAG->dumpNode(*SU);
} else {
- for (SUnit* SU : SUnits) {
- SU->dump(DAG);
- }
+ for (const SUnit* SU : SUnits)
+ DAG->dumpNode(*SU);
}
dbgs() << "///////////////////////\n";
@@ -1933,7 +1931,7 @@ void SIScheduleDAGMI::schedule()
LLVM_DEBUG(dbgs() << "Preparing Scheduling\n");
buildDAGWithRegPressure();
- LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
+ LLVM_DEBUG(dump());
topologicalSort();
findRootsAndBiasEdges(TopRoots, BotRoots);
@@ -1957,12 +1955,12 @@ void SIScheduleDAGMI::schedule()
for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
SUnit *SU = &SUnits[i];
- unsigned BaseLatReg;
+ MachineOperand *BaseLatOp;
int64_t OffLatReg;
if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
IsLowLatencySU[i] = 1;
- if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg,
- TRI))
+ if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
+ TRI))
LowLatencyOffset[i] = OffLatReg;
} else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
IsHighLatencySU[i] = 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 938cdaf1ef8f..b4a4e9e33133 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -202,8 +202,6 @@ public:
class SIMemOpAccess final {
private:
-
- AMDGPUAS SIAddrSpaceInfo;
AMDGPUMachineModuleInfo *MMI = nullptr;
/// Reports unsupported message \p Msg for \p MI to LLVM context.
@@ -255,7 +253,7 @@ protected:
/// Instruction info.
const SIInstrInfo *TII = nullptr;
- IsaInfo::IsaVersion IV;
+ IsaVersion IV;
SICacheControl(const GCNSubtarget &ST);
@@ -453,22 +451,21 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
}
SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
- if (AS == SIAddrSpaceInfo.FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
return SIAtomicAddrSpace::FLAT;
- if (AS == SIAddrSpaceInfo.GLOBAL_ADDRESS)
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS)
return SIAtomicAddrSpace::GLOBAL;
- if (AS == SIAddrSpaceInfo.LOCAL_ADDRESS)
+ if (AS == AMDGPUAS::LOCAL_ADDRESS)
return SIAtomicAddrSpace::LDS;
- if (AS == SIAddrSpaceInfo.PRIVATE_ADDRESS)
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return SIAtomicAddrSpace::SCRATCH;
- if (AS == SIAddrSpaceInfo.REGION_ADDRESS)
+ if (AS == AMDGPUAS::REGION_ADDRESS)
return SIAtomicAddrSpace::GDS;
return SIAtomicAddrSpace::OTHER;
}
SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
- SIAddrSpaceInfo = getAMDGPUAS(MF.getTarget());
MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
}
@@ -608,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
TII = ST.getInstrInfo();
- IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
+ IV = getIsaVersion(ST.getCPU());
}
/* static */
@@ -815,6 +812,12 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
+ const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
+
+ const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
+ ? AMDGPU::BUFFER_WBINVL1
+ : AMDGPU::BUFFER_WBINVL1_VOL;
+
if (Pos == Position::AFTER)
++MI;
@@ -822,7 +825,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL));
+ BuildMI(MBB, MI, DL, TII->get(Flush));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
new file mode 100644
index 000000000000..883fd308f2f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -0,0 +1,406 @@
+//===-- SIModeRegister.cpp - Mode Register --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass inserts changes to the Mode register settings as required.
+/// Note that currently it only deals with the Double Precision Floating Point
+/// rounding mode setting, but is intended to be generic enough to be easily
+/// expanded.
+///
+//===----------------------------------------------------------------------===//
+//
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <queue>
+
+#define DEBUG_TYPE "si-mode-register"
+
+STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
+
+using namespace llvm;
+
+struct Status {
+ // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
+ // known value
+ unsigned Mask;
+ unsigned Mode;
+
+ Status() : Mask(0), Mode(0){};
+
+ Status(unsigned Mask, unsigned Mode) : Mask(Mask), Mode(Mode) {
+ Mode &= Mask;
+ };
+
+ // merge two status values such that only values that don't conflict are
+ // preserved
+ Status merge(const Status &S) const {
+ return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
+ }
+
+ // merge an unknown value by using the unknown value's mask to remove bits
+ // from the result
+ Status mergeUnknown(unsigned newMask) {
+ return Status(Mask & ~newMask, Mode & ~newMask);
+ }
+
+ // intersect two Status values to produce a mode and mask that is a subset
+ // of both values
+ Status intersect(const Status &S) const {
+ unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
+ unsigned NewMode = (Mode & NewMask);
+ return Status(NewMask, NewMode);
+ }
+
+ // produce the delta required to change the Mode to the required Mode
+ Status delta(const Status &S) const {
+ return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
+ }
+
+ bool operator==(const Status &S) const {
+ return (Mask == S.Mask) && (Mode == S.Mode);
+ }
+
+ bool operator!=(const Status &S) const { return !(*this == S); }
+
+ bool isCompatible(Status &S) {
+ return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
+ }
+
+ bool isCombinable(Status &S) {
+ return !(Mask & S.Mask) || isCompatible(S);
+ }
+};
+
+class BlockData {
+public:
+ // The Status that represents the mode register settings required by the
+ // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
+ Status Require;
+
+ // The Status that represents the net changes to the Mode register made by
+ // this block, Calculated in Phase 1.
+ Status Change;
+
+ // The Status that represents the mode register settings on exit from this
+ // block. Calculated in Phase 2.
+ Status Exit;
+
+ // The Status that represents the intersection of exit Mode register settings
+ // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
+ Status Pred;
+
+ // In Phase 1 we record the first instruction that has a mode requirement,
+ // which is used in Phase 3 if we need to insert a mode change.
+ MachineInstr *FirstInsertionPoint;
+
+ BlockData() : FirstInsertionPoint(nullptr) {};
+};
+
+namespace {
+
+class SIModeRegister : public MachineFunctionPass {
+public:
+ static char ID;
+
+ std::vector<std::unique_ptr<BlockData>> BlockInfo;
+ std::queue<MachineBasicBlock *> Phase2List;
+
+ // The default mode register setting currently only caters for the floating
+ // point double precision rounding mode.
+ // We currently assume the default rounding mode is Round to Nearest
+ // NOTE: this should come from a per function rounding mode setting once such
+ // a setting exists.
+ unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
+ Status DefaultStatus =
+ Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
+
+public:
+ SIModeRegister() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+ void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+ void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+ Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
+
+ void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
+ const SIInstrInfo *TII, Status InstrMode);
+};
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
+ "Insert required mode register values", false, false)
+
+char SIModeRegister::ID = 0;
+
+char &llvm::SIModeRegisterID = SIModeRegister::ID;
+
+FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
+
+// Determine the Mode register setting required for this instruction.
+// Instructions which don't use the Mode register return a null Status.
+// Note this currently only deals with instructions that use the floating point
+// double precision setting.
+Status SIModeRegister::getInstructionMode(MachineInstr &MI,
+ const SIInstrInfo *TII) {
+ if (TII->usesFPDPRounding(MI)) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_INTERP_P1LL_F16:
+ case AMDGPU::V_INTERP_P1LV_F16:
+ case AMDGPU::V_INTERP_P2_F16:
+ // f16 interpolation instructions need double precision round to zero
+ return Status(FP_ROUND_MODE_DP(3),
+ FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
+ default:
+ return DefaultStatus;
+ }
+ }
+ return Status();
+}
+
+// Insert a setreg instruction to update the Mode register.
+// It is possible (though unlikely) for an instruction to require a change to
+// the value of disjoint parts of the Mode register when we don't know the
+// value of the intervening bits. In that case we need to use more than one
+// setreg instruction.
+void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
+ const SIInstrInfo *TII, Status InstrMode) {
+ while (InstrMode.Mask) {
+ unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
+ unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
+ unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
+ BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
+ .addImm(Value)
+ .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
+ (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
+ (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
+ ++NumSetregInserted;
+ InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
+ }
+}
+
+// In Phase 1 we iterate through the instructions of the block and for each
+// instruction we get its mode usage. If the instruction uses the Mode register
+// we:
+// - update the Change status, which tracks the changes to the Mode register
+// made by this block
+// - if this instruction's requirements are compatible with the current setting
+// of the Mode register we merge the modes
+// - if it isn't compatible and an InsertionPoint isn't set, then we set the
+// InsertionPoint to the current instruction, and we remember the current
+// mode
+// - if it isn't compatible and InsertionPoint is set we insert a seteg before
+// that instruction (unless this instruction forms part of the block's
+// entry requirements in which case the insertion is deferred until Phase 3
+// when predecessor exit values are known), and move the insertion point to
+// this instruction
+// - if this is a setreg instruction we treat it as an incompatible instruction.
+// This is sub-optimal but avoids some nasty corner cases, and is expected to
+// occur very rarely.
+// - on exit we have set the Require, Change, and initial Exit modes.
+void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
+ const SIInstrInfo *TII) {
+ auto NewInfo = llvm::make_unique<BlockData>();
+ MachineInstr *InsertionPoint = nullptr;
+ // RequirePending is used to indicate whether we are collecting the initial
+ // requirements for the block, and need to defer the first InsertionPoint to
+ // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
+ // we discover an explict setreg that means this block doesn't have any
+ // initial requirements.
+ bool RequirePending = true;
+ Status IPChange;
+ for (MachineInstr &MI : MBB) {
+ Status InstrMode = getInstructionMode(MI, TII);
+ if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) ||
+ (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
+ // We preserve any explicit mode register setreg instruction we encounter,
+ // as we assume it has been inserted by a higher authority (this is
+ // likely to be a very rare occurrence).
+ unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
+ if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
+ AMDGPU::Hwreg::ID_MODE)
+ continue;
+
+ unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
+ AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
+ 1;
+ unsigned Offset =
+ (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
+ unsigned Mask = ((1 << Width) - 1) << Offset;
+
+ // If an InsertionPoint is set we will insert a setreg there.
+ if (InsertionPoint) {
+ insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
+ InsertionPoint = nullptr;
+ }
+ // If this is an immediate then we know the value being set, but if it is
+ // not an immediate then we treat the modified bits of the mode register
+ // as unknown.
+ if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
+ unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
+ unsigned Mode = (Val << Offset) & Mask;
+ Status Setreg = Status(Mask, Mode);
+ // If we haven't already set the initial requirements for the block we
+ // don't need to as the requirements start from this explicit setreg.
+ RequirePending = false;
+ NewInfo->Change = NewInfo->Change.merge(Setreg);
+ } else {
+ NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
+ }
+ } else if (!NewInfo->Change.isCompatible(InstrMode)) {
+ // This instruction uses the Mode register and its requirements aren't
+ // compatible with the current mode.
+ if (InsertionPoint) {
+ // If the required mode change cannot be included in the current
+ // InsertionPoint changes, we need a setreg and start a new
+ // InsertionPoint.
+ if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
+ if (RequirePending) {
+ // This is the first insertionPoint in the block so we will defer
+ // the insertion of the setreg to Phase 3 where we know whether or
+ // not it is actually needed.
+ NewInfo->FirstInsertionPoint = InsertionPoint;
+ NewInfo->Require = NewInfo->Change;
+ RequirePending = false;
+ } else {
+ insertSetreg(MBB, InsertionPoint, TII,
+ IPChange.delta(NewInfo->Change));
+ IPChange = NewInfo->Change;
+ }
+ // Set the new InsertionPoint
+ InsertionPoint = &MI;
+ }
+ NewInfo->Change = NewInfo->Change.merge(InstrMode);
+ } else {
+ // No InsertionPoint is currently set - this is either the first in
+ // the block or we have previously seen an explicit setreg.
+ InsertionPoint = &MI;
+ IPChange = NewInfo->Change;
+ NewInfo->Change = NewInfo->Change.merge(InstrMode);
+ }
+ }
+ }
+ if (RequirePending) {
+ // If we haven't yet set the initial requirements for the block we set them
+ // now.
+ NewInfo->FirstInsertionPoint = InsertionPoint;
+ NewInfo->Require = NewInfo->Change;
+ } else if (InsertionPoint) {
+ // We need to insert a setreg at the InsertionPoint
+ insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
+ }
+ NewInfo->Exit = NewInfo->Change;
+ BlockInfo[MBB.getNumber()] = std::move(NewInfo);
+}
+
+// In Phase 2 we revisit each block and calculate the common Mode register
+// value provided by all predecessor blocks. If the Exit value for the block
+// is changed, then we add the successor blocks to the worklist so that the
+// exit value is propagated.
+void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
+ const SIInstrInfo *TII) {
+// BlockData *BI = BlockInfo[MBB.getNumber()];
+ unsigned ThisBlock = MBB.getNumber();
+ if (MBB.pred_empty()) {
+ // There are no predecessors, so use the default starting status.
+ BlockInfo[ThisBlock]->Pred = DefaultStatus;
+ } else {
+ // Build a status that is common to all the predecessors by intersecting
+ // all the predecessor exit status values.
+ MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
+ MachineBasicBlock &PB = *(*P);
+ BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
+
+ for (P = std::next(P); P != E; P = std::next(P)) {
+ MachineBasicBlock *Pred = *P;
+ BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit);
+ }
+ }
+ Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
+ if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
+ BlockInfo[ThisBlock]->Exit = TmpStatus;
+ // Add the successors to the work list so we can propagate the changed exit
+ // status.
+ for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
+ E = MBB.succ_end();
+ S != E; S = std::next(S)) {
+ MachineBasicBlock &B = *(*S);
+ Phase2List.push(&B);
+ }
+ }
+}
+
+// In Phase 3 we revisit each block and if it has an insertion point defined we
+// check whether the predecessor mode meets the block's entry requirements. If
+// not we insert an appropriate setreg instruction to modify the Mode register.
+void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
+ const SIInstrInfo *TII) {
+// BlockData *BI = BlockInfo[MBB.getNumber()];
+ unsigned ThisBlock = MBB.getNumber();
+ if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
+ Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
+ if (BlockInfo[ThisBlock]->FirstInsertionPoint)
+ insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
+ else
+ insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
+ }
+}
+
+bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
+ BlockInfo.resize(MF.getNumBlockIDs());
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ // Processing is performed in a number of phases
+
+ // Phase 1 - determine the initial mode required by each block, and add setreg
+ // instructions for intra block requirements.
+ for (MachineBasicBlock &BB : MF)
+ processBlockPhase1(BB, TII);
+
+ // Phase 2 - determine the exit mode from each block. We add all blocks to the
+ // list here, but will also add any that need to be revisited during Phase 2
+ // processing.
+ for (MachineBasicBlock &BB : MF)
+ Phase2List.push(&BB);
+ while (!Phase2List.empty()) {
+ processBlockPhase2(*Phase2List.front(), TII);
+ Phase2List.pop();
+ }
+
+ // Phase 3 - add an initial setreg to each block where the required entry mode
+ // is not satisfied by the exit mode of all its predecessors.
+ for (MachineBasicBlock &BB : MF)
+ processBlockPhase3(BB, TII);
+
+ BlockInfo.clear();
+
+ return NumSetregInserted > 0;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 7b678d12ba81..c671fed34bdf 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -103,6 +103,122 @@ static MachineInstr* getOrExecSource(const MachineInstr &MI,
return SaveExecInst;
}
+// Optimize sequence
+// %sel = V_CNDMASK_B32_e64 0, 1, %cc
+// %cmp = V_CMP_NE_U32 1, %1
+// $vcc = S_AND_B64 $exec, %cmp
+// S_CBRANCH_VCC[N]Z
+// =>
+// $vcc = S_ANDN2_B64 $exec, %cc
+// S_CBRANCH_VCC[N]Z
+//
+// It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the
+// rebuildSetCC(). We start with S_CBRANCH to avoid exhaustive search, but
+// only 3 first instructions are really needed. S_AND_B64 with exec is a
+// required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
+// lanes.
+//
+// Returns %cc register on success.
+static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
+ const GCNSubtarget &ST,
+ MachineRegisterInfo &MRI,
+ LiveIntervals *LIS) {
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const unsigned AndOpc = AMDGPU::S_AND_B64;
+ const unsigned Andn2Opc = AMDGPU::S_ANDN2_B64;
+ const unsigned CondReg = AMDGPU::VCC;
+ const unsigned ExecReg = AMDGPU::EXEC;
+
+ auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return Opc == AMDGPU::S_CBRANCH_VCCZ ||
+ Opc == AMDGPU::S_CBRANCH_VCCNZ; });
+ if (I == MBB.terminators().end())
+ return AMDGPU::NoRegister;
+
+ auto *And = TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister,
+ *I, MRI, LIS);
+ if (!And || And->getOpcode() != AndOpc ||
+ !And->getOperand(1).isReg() || !And->getOperand(2).isReg())
+ return AMDGPU::NoRegister;
+
+ MachineOperand *AndCC = &And->getOperand(1);
+ unsigned CmpReg = AndCC->getReg();
+ unsigned CmpSubReg = AndCC->getSubReg();
+ if (CmpReg == ExecReg) {
+ AndCC = &And->getOperand(2);
+ CmpReg = AndCC->getReg();
+ CmpSubReg = AndCC->getSubReg();
+ } else if (And->getOperand(2).getReg() != ExecReg) {
+ return AMDGPU::NoRegister;
+ }
+
+ auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, MRI, LIS);
+ if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
+ Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
+ Cmp->getParent() != And->getParent())
+ return AMDGPU::NoRegister;
+
+ MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
+ MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
+ if (Op1->isImm() && Op2->isReg())
+ std::swap(Op1, Op2);
+ if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
+ return AMDGPU::NoRegister;
+
+ unsigned SelReg = Op1->getReg();
+ auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS);
+ if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+ return AMDGPU::NoRegister;
+
+ Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
+ Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
+ MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
+ if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
+ Op1->getImm() != 0 || Op2->getImm() != 1)
+ return AMDGPU::NoRegister;
+
+ LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
+ << *Cmp << '\t' << *And);
+
+ unsigned CCReg = CC->getReg();
+ LIS->RemoveMachineInstrFromMaps(*And);
+ MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
+ TII->get(Andn2Opc), And->getOperand(0).getReg())
+ .addReg(ExecReg)
+ .addReg(CCReg, CC->getSubReg());
+ And->eraseFromParent();
+ LIS->InsertMachineInstrInMaps(*Andn2);
+
+ LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
+
+ // Try to remove compare. Cmp value should not used in between of cmp
+ // and s_and_b64 if VCC or just unused if any other register.
+ if ((TargetRegisterInfo::isVirtualRegister(CmpReg) &&
+ MRI.use_nodbg_empty(CmpReg)) ||
+ (CmpReg == CondReg &&
+ std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
+ [&](const MachineInstr &MI) {
+ return MI.readsRegister(CondReg, TRI); }))) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
+
+ LIS->RemoveMachineInstrFromMaps(*Cmp);
+ Cmp->eraseFromParent();
+
+ // Try to remove v_cndmask_b32.
+ if (TargetRegisterInfo::isVirtualRegister(SelReg) &&
+ MRI.use_nodbg_empty(SelReg)) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+
+ LIS->RemoveMachineInstrFromMaps(*Sel);
+ Sel->eraseFromParent();
+ }
+ }
+
+ return CCReg;
+}
+
bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -117,9 +233,24 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
+ if (unsigned Reg = optimizeVcndVcmpPair(MBB, ST, MRI, LIS)) {
+ RecalcRegs.insert(Reg);
+ RecalcRegs.insert(AMDGPU::VCC_LO);
+ RecalcRegs.insert(AMDGPU::VCC_HI);
+ RecalcRegs.insert(AMDGPU::SCC);
+ Changed = true;
+ }
+
// Try to remove unneeded instructions before s_endpgm.
if (MBB.succ_empty()) {
- if (MBB.empty() || MBB.back().getOpcode() != AMDGPU::S_ENDPGM)
+ if (MBB.empty())
+ continue;
+
+ // Skip this if the endpgm has any implicit uses, otherwise we would need
+ // to be careful to update / remove them.
+ MachineInstr &Term = MBB.back();
+ if (Term.getOpcode() != AMDGPU::S_ENDPGM ||
+ Term.getNumOperands() != 0)
continue;
SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 0e000b72962e..2d43d5d05ef6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -90,7 +90,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void matchSDWAOperands(MachineBasicBlock &MBB);
std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
- bool isConvertibleToSDWA(const MachineInstr &MI, const GCNSubtarget &ST) const;
+ bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST) const;
+ void pseudoOpConvertToVOP2(MachineInstr &MI,
+ const GCNSubtarget &ST) const;
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
@@ -854,7 +856,82 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
}
}
-bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
+// Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and
+// V_ADD_I32_e64 into V_ADD_I32_e32. This allows isConvertibleToSDWA
+// to perform its transformation on V_ADD_I32_e32 into V_ADD_I32_sdwa.
+//
+// We are transforming from a VOP3 into a VOP2 form of the instruction.
+// %19:vgpr_32 = V_AND_B32_e32 255,
+// killed %16:vgpr_32, implicit $exec
+// %47:vgpr_32, %49:sreg_64_xexec = V_ADD_I32_e64
+// %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
+// %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
+// %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
+//
+// becomes
+// %47:vgpr_32 = V_ADD_I32_sdwa
+// 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
+// implicit-def $vcc, implicit $exec
+// %48:vgpr_32 = V_ADDC_U32_e32
+// 0, %26.sub1:vreg_64, implicit-def $vcc, implicit $vcc, implicit $exec
+void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
+ const GCNSubtarget &ST) const {
+ int Opc = MI.getOpcode();
+ assert((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64) &&
+ "Currently only handles V_ADD_I32_e64 or V_SUB_I32_e64");
+
+ // Can the candidate MI be shrunk?
+ if (!TII->canShrink(MI, *MRI))
+ return;
+ Opc = AMDGPU::getVOPe32(Opc);
+ // Find the related ADD instruction.
+ const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ if (!Sdst)
+ return;
+ MachineOperand *NextOp = findSingleRegUse(Sdst, MRI);
+ if (!NextOp)
+ return;
+ MachineInstr &MISucc = *NextOp->getParent();
+ // Can the successor be shrunk?
+ if (!TII->canShrink(MISucc, *MRI))
+ return;
+ int SuccOpc = AMDGPU::getVOPe32(MISucc.getOpcode());
+ // Make sure the carry in/out are subsequently unused.
+ MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
+ if (!CarryIn)
+ return;
+ MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
+ if (!CarryOut)
+ return;
+ if (!MRI->hasOneUse(CarryIn->getReg()) || !MRI->use_empty(CarryOut->getReg()))
+ return;
+ // Make sure VCC or its subregs are dead before MI.
+ MachineBasicBlock &MBB = *MI.getParent();
+ auto Liveness = MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
+ if (Liveness != MachineBasicBlock::LQR_Dead)
+ return;
+ // Check if VCC is referenced in range of (MI,MISucc].
+ for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
+ I != E; ++I) {
+ if (I->modifiesRegister(AMDGPU::VCC, TRI))
+ return;
+ }
+ // Make the two new e32 instruction variants.
+ // Replace MI with V_{SUB|ADD}_I32_e32
+ auto NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc));
+ NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst));
+ NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+ NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1));
+ MI.eraseFromParent();
+ // Replace MISucc with V_{SUBB|ADDC}_U32_e32
+ auto NewInst = BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc));
+ NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst));
+ NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0));
+ NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1));
+ MISucc.eraseFromParent();
+}
+
+bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
const GCNSubtarget &ST) const {
// Check if this is already an SDWA instruction
unsigned Opc = MI.getOpcode();
@@ -1127,6 +1204,22 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
bool Changed = false;
do {
+ // Preprocess the ADD/SUB pairs so they could be SDWA'ed.
+ // Look for a possible ADD or SUB that resulted from a previously lowered
+ // V_{ADD|SUB}_U64_PSEUDO. The function pseudoOpConvertToVOP2
+ // lowers the pair of instructions into e32 form.
+ matchSDWAOperands(MBB);
+ for (const auto &OperandPair : SDWAOperands) {
+ const auto &Operand = OperandPair.second;
+ MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
+ if (PotentialMI &&
+ (PotentialMI->getOpcode() == AMDGPU::V_ADD_I32_e64 ||
+ PotentialMI->getOpcode() == AMDGPU::V_SUB_I32_e64))
+ pseudoOpConvertToVOP2(*PotentialMI, ST);
+ }
+ SDWAOperands.clear();
+
+ // Generate potential match list.
matchSDWAOperands(MBB);
for (const auto &OperandPair : SDWAOperands) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 624607f6ea54..97cfde2b2354 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -18,9 +18,12 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -495,15 +498,16 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
return false;
const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
- MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .add(*Reg)
- .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
- .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MachineInstrBuilder NewMI =
+ BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
+ .add(*Reg)
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .cloneMemRefs(*MI);
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata_in);
@@ -900,7 +904,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
.addImm(0) // glc
.addMemOperand(MMO);
- if (NumSubRegs > 1)
+ if (NumSubRegs > 1 && i == 0)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
continue;
@@ -914,7 +918,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
- if (NumSubRegs > 1)
+ if (NumSubRegs > 1 && i == 0)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
} else {
if (OnlyToVGPR)
@@ -1598,3 +1602,57 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
llvm_unreachable("not implemented");
}
}
+
+// Find reaching register definition
+MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
+ MachineInstr &Use,
+ MachineRegisterInfo &MRI,
+ LiveIntervals *LIS) const {
+ auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
+ SlotIndex UseIdx = LIS->getInstructionIndex(Use);
+ SlotIndex DefIdx;
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!LIS->hasInterval(Reg))
+ return nullptr;
+ LiveInterval &LI = LIS->getInterval(Reg);
+ LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ VNInfo *V = nullptr;
+ if (LI.hasSubRanges()) {
+ for (auto &S : LI.subranges()) {
+ if ((S.LaneMask & SubLanes) == SubLanes) {
+ V = S.getVNInfoAt(UseIdx);
+ break;
+ }
+ }
+ } else {
+ V = LI.getVNInfoAt(UseIdx);
+ }
+ if (!V)
+ return nullptr;
+ DefIdx = V->def;
+ } else {
+ // Find last def.
+ for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
+ LiveRange &LR = LIS->getRegUnit(*Units);
+ if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
+ if (!DefIdx.isValid() ||
+ MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
+ LIS->getInstructionFromIndex(V->def)))
+ DefIdx = V->def;
+ } else {
+ return nullptr;
+ }
+ }
+ }
+
+ MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
+
+ if (!Def || !MDT.dominates(Def, &Use))
+ return nullptr;
+
+ assert(Def->modifiesRegister(Reg, this));
+
+ return Def;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 5a51b67ca719..b82fefde47e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -228,6 +228,12 @@ public:
getConstrainedRegClassForOperand(const MachineOperand &MO,
const MachineRegisterInfo &MRI) const override;
+ // Find reaching register definition
+ MachineInstr *findReachingDef(unsigned Reg, unsigned SubReg,
+ MachineInstr &Use,
+ MachineRegisterInfo &MRI,
+ LiveIntervals *LIS) const;
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index f87a0763b353..c625ecc9b750 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -435,7 +435,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
let AllocationPriority = 7;
}
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
let CopyCost = 1;
let AllocationPriority = 8;
}
@@ -444,13 +444,13 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add
let isAllocatable = 0;
}
-def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
+def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 8;
}
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SReg_64_XEXEC, EXEC)> {
let CopyCost = 1;
let AllocationPriority = 8;
@@ -459,15 +459,15 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
// Requires 2 s_mov_b64 to copy
let CopyCost = 2 in {
-def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> {
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add SGPR_128Regs)> {
let AllocationPriority = 10;
}
-def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add TTMP_128Regs)> {
let isAllocatable = 0;
}
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64, v2f64], 32,
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
(add SGPR_128, TTMP_128)> {
let AllocationPriority = 10;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 4189bcce52ea..6ad7dd0e3a7c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -64,59 +64,6 @@ FunctionPass *llvm::createSIShrinkInstructionsPass() {
return new SIShrinkInstructions();
}
-static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
- const SIRegisterInfo &TRI,
- const MachineRegisterInfo &MRI) {
-
- const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- // Can't shrink instruction with three operands.
- // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
- // a special case for it. It can only be shrunk if the third operand
- // is vcc. We should handle this the same way we handle vopc, by addding
- // a register allocation hint pre-regalloc and then do the shrinking
- // post-regalloc.
- if (Src2) {
- switch (MI.getOpcode()) {
- default: return false;
-
- case AMDGPU::V_ADDC_U32_e64:
- case AMDGPU::V_SUBB_U32_e64:
- case AMDGPU::V_SUBBREV_U32_e64: {
- const MachineOperand *Src1
- = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (!Src1->isReg() || !TRI.isVGPR(MRI, Src1->getReg()))
- return false;
- // Additional verification is needed for sdst/src2.
- return true;
- }
- case AMDGPU::V_MAC_F32_e64:
- case AMDGPU::V_MAC_F16_e64:
- case AMDGPU::V_FMAC_F32_e64:
- if (!Src2->isReg() || !TRI.isVGPR(MRI, Src2->getReg()) ||
- TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
- return false;
- break;
-
- case AMDGPU::V_CNDMASK_B32_e64:
- break;
- }
- }
-
- const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Src1 && (!Src1->isReg() || !TRI.isVGPR(MRI, Src1->getReg()) ||
- TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
- return false;
-
- // We don't need to check src0, all input types are legal, so just make sure
- // src0 isn't using any modifiers.
- if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
- return false;
-
- // Check output modifiers
- return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
- !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
-}
-
/// This function checks \p MI for operands defined by a move immediate
/// instruction and then folds the literal constant into the instruction if it
/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
@@ -173,19 +120,6 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
return false;
}
-// Copy MachineOperand with all flags except setting it as implicit.
-static void copyFlagsToImplicitVCC(MachineInstr &MI,
- const MachineOperand &Orig) {
-
- for (MachineOperand &Use : MI.implicit_operands()) {
- if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
- Use.setIsUndef(Orig.isUndef());
- Use.setIsKill(Orig.isKill());
- return;
- }
- }
-}
-
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
return isInt<16>(Src.getImm()) &&
!TII->isInlineConstant(*Src.getParent(),
@@ -278,6 +212,245 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
}
}
+/// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
+/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
+/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
+/// XNOR (as a ^ b == ~(a ^ ~b)).
+/// \returns true if the caller should continue the machine function iterator
+static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
+ MachineRegisterInfo &MRI,
+ const SIInstrInfo *TII,
+ MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ const MachineOperand *Dest = &MI.getOperand(0);
+ MachineOperand *Src0 = &MI.getOperand(1);
+ MachineOperand *Src1 = &MI.getOperand(2);
+ MachineOperand *SrcReg = Src0;
+ MachineOperand *SrcImm = Src1;
+
+ if (SrcImm->isImm() &&
+ !AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm())) {
+ uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
+ uint32_t NewImm = 0;
+
+ if (Opc == AMDGPU::S_AND_B32) {
+ if (isPowerOf2_32(~Imm)) {
+ NewImm = countTrailingOnes(Imm);
+ Opc = AMDGPU::S_BITSET0_B32;
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_ANDN2_B32;
+ }
+ } else if (Opc == AMDGPU::S_OR_B32) {
+ if (isPowerOf2_32(Imm)) {
+ NewImm = countTrailingZeros(Imm);
+ Opc = AMDGPU::S_BITSET1_B32;
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_ORN2_B32;
+ }
+ } else if (Opc == AMDGPU::S_XOR_B32) {
+ if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_XNOR_B32;
+ }
+ } else {
+ llvm_unreachable("unexpected opcode");
+ }
+
+ if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
+ SrcImm == Src0) {
+ if (!TII->commuteInstruction(MI, false, 1, 2))
+ NewImm = 0;
+ }
+
+ if (NewImm != 0) {
+ if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
+ SrcReg->isReg()) {
+ MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
+ MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+ return true;
+ }
+
+ if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
+ MI.setDesc(TII->get(Opc));
+ if (Opc == AMDGPU::S_BITSET0_B32 ||
+ Opc == AMDGPU::S_BITSET1_B32) {
+ Src0->ChangeToImmediate(NewImm);
+ MI.RemoveOperand(2);
+ } else {
+ SrcImm->setImm(NewImm);
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// This is the same as MachineInstr::readsRegister/modifiesRegister except
+// it takes subregs into account.
+static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ for (const MachineOperand &MO : R) {
+ if (!MO.isReg())
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (TRI.regsOverlap(Reg, MO.getReg()))
+ return true;
+ } else if (MO.getReg() == Reg &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
+ TRI.getSubRegIndexLaneMask(MO.getSubReg());
+ if (Overlap.any())
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool instReadsReg(const MachineInstr *MI,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ return instAccessReg(MI->uses(), Reg, SubReg, TRI);
+}
+
+static bool instModifiesReg(const MachineInstr *MI,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ return instAccessReg(MI->defs(), Reg, SubReg, TRI);
+}
+
+static TargetInstrInfo::RegSubRegPair
+getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
+ const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
+ if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
+ } else {
+ LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
+ Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger()));
+ }
+ }
+ return TargetInstrInfo::RegSubRegPair(Reg, Sub);
+}
+
+// Match:
+// mov t, x
+// mov x, y
+// mov y, t
+//
+// =>
+//
+// mov t, x (t is potentially dead and move eliminated)
+// v_swap_b32 x, y
+//
+// Returns next valid instruction pointer if was able to create v_swap_b32.
+//
+// This shall not be done too early not to prevent possible folding which may
+// remove matched moves, and this should prefereably be done before RA to
+// release saved registers and also possibly after RA which can insert copies
+// too.
+//
+// This is really just a generic peephole that is not a canocical shrinking,
+// although requirements match the pass placement and it reduces code size too.
+static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
+ const SIInstrInfo *TII) {
+ assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MovT.getOpcode() == AMDGPU::COPY);
+
+ unsigned T = MovT.getOperand(0).getReg();
+ unsigned Tsub = MovT.getOperand(0).getSubReg();
+ MachineOperand &Xop = MovT.getOperand(1);
+
+ if (!Xop.isReg())
+ return nullptr;
+ unsigned X = Xop.getReg();
+ unsigned Xsub = Xop.getSubReg();
+
+ unsigned Size = TII->getOpSize(MovT, 0) / 4;
+
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ if (!TRI.isVGPR(MRI, X))
+ return nullptr;
+
+ for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
+ if (YTop.getSubReg() != Tsub)
+ continue;
+
+ MachineInstr &MovY = *YTop.getParent();
+ if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ MovY.getOpcode() != AMDGPU::COPY) ||
+ MovY.getOperand(1).getSubReg() != Tsub)
+ continue;
+
+ unsigned Y = MovY.getOperand(0).getReg();
+ unsigned Ysub = MovY.getOperand(0).getSubReg();
+
+ if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
+ continue;
+
+ MachineInstr *MovX = nullptr;
+ auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
+ for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
+ if (instReadsReg(&*I, X, Xsub, TRI) ||
+ instModifiesReg(&*I, Y, Ysub, TRI) ||
+ instModifiesReg(&*I, T, Tsub, TRI) ||
+ (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
+ MovX = nullptr;
+ break;
+ }
+ if (!instReadsReg(&*I, Y, Ysub, TRI)) {
+ if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
+ MovX = nullptr;
+ break;
+ }
+ continue;
+ }
+ if (MovX ||
+ (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ I->getOpcode() != AMDGPU::COPY) ||
+ I->getOperand(0).getReg() != X ||
+ I->getOperand(0).getSubReg() != Xsub) {
+ MovX = nullptr;
+ break;
+ }
+ MovX = &*I;
+ }
+
+ if (!MovX || I == E)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
+
+ for (unsigned I = 0; I < Size; ++I) {
+ TargetInstrInfo::RegSubRegPair X1, Y1;
+ X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
+ Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
+ BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
+ TII->get(AMDGPU::V_SWAP_B32))
+ .addDef(X1.Reg, 0, X1.SubReg)
+ .addDef(Y1.Reg, 0, Y1.SubReg)
+ .addReg(Y1.Reg, 0, Y1.SubReg)
+ .addReg(X1.Reg, 0, X1.SubReg).getInstr();
+ }
+ MovX->eraseFromParent();
+ MovY.eraseFromParent();
+ MachineInstr *Next = &*std::next(MovT.getIterator());
+ if (MRI.use_nodbg_empty(T))
+ MovT.eraseFromParent();
+ else
+ Xop.setIsKill(false);
+
+ return Next;
+ }
+
+ return nullptr;
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -285,7 +458,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
std::vector<unsigned> I1Defs;
@@ -319,6 +491,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MI.getOpcode() == AMDGPU::COPY)) {
+ if (auto *NextMI = matchSwap(MI, MRI, TII)) {
+ Next = NextMI->getIterator();
+ continue;
+ }
+ }
+
// Combine adjacent s_nops to use the immediate operand encoding how long
// to wait.
//
@@ -408,14 +588,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ // Shrink scalar logic operations.
+ if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
+ MI.getOpcode() == AMDGPU::S_OR_B32 ||
+ MI.getOpcode() == AMDGPU::S_XOR_B32) {
+ if (shrinkScalarLogicOp(ST, MRI, TII, MI))
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
- if (!canShrink(MI, TII, TRI, MRI)) {
+ if (!TII->canShrink(MI, MRI)) {
// Try commuting the instruction and see if that enables us to shrink
// it.
if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
- !canShrink(MI, TII, TRI, MRI))
+ !TII->canShrink(MI, MRI))
continue;
}
@@ -488,40 +676,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// We can shrink this instruction
LLVM_DEBUG(dbgs() << "Shrinking " << MI);
- MachineInstrBuilder Inst32 =
- BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
-
- // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
- // For VOPC instructions, this is replaced by an implicit def of vcc.
- int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
- if (Op32DstIdx != -1) {
- // dst
- Inst32.add(MI.getOperand(0));
- } else {
- assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
- "Unexpected case");
- }
-
-
- Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
-
- const MachineOperand *Src1 =
- TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Src1)
- Inst32.add(*Src1);
-
- if (Src2) {
- int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
- if (Op32Src2Idx != -1) {
- Inst32.add(*Src2);
- } else {
- // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
- // replaced with an implicit read of vcc. This was already added
- // during the initial BuildMI, so find it to preserve the flags.
- copyFlagsToImplicitVCC(*Inst32, *Src2);
- }
- }
-
+ MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
++NumInstructionsShrunk;
// Copy extra operands not present in the instruction definition.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
index 7485326017b2..8a063e1a4867 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -375,83 +375,6 @@ defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
}
//===----------------------------------------------------------------------===//
-// Scalar Memory Patterns
-//===----------------------------------------------------------------------===//
-
-
-def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
- auto Ld = cast<LoadSDNode>(N);
- return Ld->getAlignment() >= 4 &&
- ((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) ||
- (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
- !Ld->isVolatile() && !N->isDivergent() &&
- static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
-}]>;
-
-def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
-def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
-def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
-def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
-def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
-
-multiclass SMRD_Pattern <string Instr, ValueType vt> {
-
- // 1. IMM offset
- def : GCNPat <
- (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
- >;
-
- // 2. SGPR offset
- def : GCNPat <
- (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
- >;
-}
-
-let OtherPredicates = [isSICI] in {
-def : GCNPat <
- (i64 (readcyclecounter)),
- (S_MEMTIME)
->;
-}
-
-// Global and constant loads can be selected to either MUBUF or SMRD
-// instructions, but SMRD instructions are faster so we want the instruction
-// selector to prefer those.
-let AddedComplexity = 100 in {
-
-defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
-
-// 1. Offset as an immediate
-def SM_LOAD_PATTERN : GCNPat < // name this pattern to reuse AddedComplexity on CI
- (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0)
->;
-
-// 2. Offset loaded in an 32bit SGPR
-def : GCNPat <
- (SIload_constant v4i32:$sbase, i32:$offset),
- (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0)
->;
-
-} // End let AddedComplexity = 100
-
-let OtherPredicates = [isVI] in {
-
-def : GCNPat <
- (i64 (readcyclecounter)),
- (S_MEMREALTIME)
->;
-
-} // let OtherPredicates = [isVI]
-
-
-//===----------------------------------------------------------------------===//
// Targets
//===----------------------------------------------------------------------===//
@@ -757,25 +680,97 @@ class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
-let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in {
+//===----------------------------------------------------------------------===//
+// Scalar Memory Patterns
+//===----------------------------------------------------------------------===//
+
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]>;
+
+def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
+def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
+def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
+def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
+def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
+
+multiclass SMRD_Pattern <string Instr, ValueType vt> {
+
+ // 1. IMM offset
+ def : GCNPat <
+ (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
+ >;
+
+ // 2. 32-bit IMM offset on CI
+ def : GCNPat <
+ (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
+ (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
+ let OtherPredicates = [isCIOnly];
+ }
-class SMRD_Pattern_ci <string Instr, ValueType vt> : GCNPat <
- (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
- (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
- let OtherPredicates = [isCIOnly];
+ // 3. SGPR offset
+ def : GCNPat <
+ (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+ >;
}
-def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX2", v2i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>;
+multiclass SMLoad_Pattern <string Instr, ValueType vt> {
+ // 1. Offset as an immediate
+ def : GCNPat <
+ (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc)))
+ >;
+ // 2. 32-bit IMM offset on CI
+ def : GCNPat <
+ (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
+ (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> {
+ let OtherPredicates = [isCIOnly];
+ }
+
+ // 3. Offset loaded in an 32bit SGPR
+ def : GCNPat <
+ (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc)))
+ >;
+}
+
+// Global and constant loads can be selected to either MUBUF or SMRD
+// instructions, but SMRD instructions are faster so we want the instruction
+// selector to prefer those.
+let AddedComplexity = 100 in {
+
+defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
+
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>;
+
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
+} // End let AddedComplexity = 100
+
+let OtherPredicates = [isSICI] in {
def : GCNPat <
- (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
- (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> {
- let OtherPredicates = [isCI]; // should this be isCIOnly?
+ (i64 (readcyclecounter)),
+ (S_MEMTIME)
+>;
}
-} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity
+let OtherPredicates = [isVI] in {
+def : GCNPat <
+ (i64 (readcyclecounter)),
+ (S_MEMREALTIME)
+>;
+
+} // let OtherPredicates = [isVI]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 6f5db9644c86..ca5e981ac5c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -336,42 +336,54 @@ class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
"$sdst, $src0, $src1", pattern
>;
+class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0),
+ (Op $src0),
+ [{ return !N->isDivergent(); }]
+>;
+
+class UniformBinFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0, node:$src1),
+ (Op $src0, $src1),
+ [{ return !N->isDivergent(); }]
+>;
+
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
def S_ADD_U32 : SOP2_32 <"s_add_u32">;
def S_ADD_I32 : SOP2_32 <"s_add_i32",
- [(set i32:$sdst, (add SSrc_b32:$src0, SSrc_b32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<add> SSrc_b32:$src0, SSrc_b32:$src1))]
>;
} // End isCommutable = 1
def S_SUB_U32 : SOP2_32 <"s_sub_u32">;
def S_SUB_I32 : SOP2_32 <"s_sub_i32",
- [(set i32:$sdst, (sub SSrc_b32:$src0, SSrc_b32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<sub> SSrc_b32:$src0, SSrc_b32:$src1))]
>;
let Uses = [SCC] in { // Carry in comes from SCC
let isCommutable = 1 in {
def S_ADDC_U32 : SOP2_32 <"s_addc_u32",
- [(set i32:$sdst, (adde (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
+ [(set i32:$sdst, (UniformBinFrag<adde> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
} // End isCommutable = 1
def S_SUBB_U32 : SOP2_32 <"s_subb_u32",
- [(set i32:$sdst, (sube (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
+ [(set i32:$sdst, (UniformBinFrag<sube> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
} // End Uses = [SCC]
let isCommutable = 1 in {
def S_MIN_I32 : SOP2_32 <"s_min_i32",
- [(set i32:$sdst, (smin i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<smin> i32:$src0, i32:$src1))]
>;
def S_MIN_U32 : SOP2_32 <"s_min_u32",
- [(set i32:$sdst, (umin i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<umin> i32:$src0, i32:$src1))]
>;
def S_MAX_I32 : SOP2_32 <"s_max_i32",
- [(set i32:$sdst, (smax i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<smax> i32:$src0, i32:$src1))]
>;
def S_MAX_U32 : SOP2_32 <"s_max_u32",
- [(set i32:$sdst, (umax i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<umax> i32:$src0, i32:$src1))]
>;
} // End isCommutable = 1
} // End Defs = [SCC]
@@ -385,27 +397,27 @@ let Uses = [SCC] in {
let Defs = [SCC] in {
let isCommutable = 1 in {
def S_AND_B32 : SOP2_32 <"s_and_b32",
- [(set i32:$sdst, (and i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<and> i32:$src0, i32:$src1))]
>;
def S_AND_B64 : SOP2_64 <"s_and_b64",
- [(set i64:$sdst, (and i64:$src0, i64:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<and> i64:$src0, i64:$src1))]
>;
def S_OR_B32 : SOP2_32 <"s_or_b32",
- [(set i32:$sdst, (or i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<or> i32:$src0, i32:$src1))]
>;
def S_OR_B64 : SOP2_64 <"s_or_b64",
- [(set i64:$sdst, (or i64:$src0, i64:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<or> i64:$src0, i64:$src1))]
>;
def S_XOR_B32 : SOP2_32 <"s_xor_b32",
- [(set i32:$sdst, (xor i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<xor> i32:$src0, i32:$src1))]
>;
def S_XOR_B64 : SOP2_64 <"s_xor_b64",
- [(set i64:$sdst, (xor i64:$src0, i64:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<xor> i64:$src0, i64:$src1))]
>;
def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
@@ -415,45 +427,71 @@ def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
def S_XNOR_B64 : SOP2_64 <"s_xnor_b64",
[(set i64:$sdst, (not (xor_oneuse i64:$src0, i64:$src1)))]
>;
+
+def S_NAND_B32 : SOP2_32 <"s_nand_b32",
+ [(set i32:$sdst, (not (and_oneuse i32:$src0, i32:$src1)))]
+>;
+
+def S_NAND_B64 : SOP2_64 <"s_nand_b64",
+ [(set i64:$sdst, (not (and_oneuse i64:$src0, i64:$src1)))]
+>;
+
+def S_NOR_B32 : SOP2_32 <"s_nor_b32",
+ [(set i32:$sdst, (not (or_oneuse i32:$src0, i32:$src1)))]
+>;
+
+def S_NOR_B64 : SOP2_64 <"s_nor_b64",
+ [(set i64:$sdst, (not (or_oneuse i64:$src0, i64:$src1)))]
+>;
} // End isCommutable = 1
-def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32">;
-def S_ANDN2_B64 : SOP2_64 <"s_andn2_b64">;
-def S_ORN2_B32 : SOP2_32 <"s_orn2_b32">;
-def S_ORN2_B64 : SOP2_64 <"s_orn2_b64">;
-def S_NAND_B32 : SOP2_32 <"s_nand_b32">;
-def S_NAND_B64 : SOP2_64 <"s_nand_b64">;
-def S_NOR_B32 : SOP2_32 <"s_nor_b32">;
-def S_NOR_B64 : SOP2_64 <"s_nor_b64">;
+def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32",
+ [(set i32:$sdst, (UniformBinFrag<and> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
+>;
+
+def S_ANDN2_B64 : SOP2_64 <"s_andn2_b64",
+ [(set i64:$sdst, (UniformBinFrag<and> i64:$src0, (UniformUnaryFrag<not> i64:$src1)))]
+>;
+
+def S_ORN2_B32 : SOP2_32 <"s_orn2_b32",
+ [(set i32:$sdst, (UniformBinFrag<or> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
+>;
+
+def S_ORN2_B64 : SOP2_64 <"s_orn2_b64",
+ [(set i64:$sdst, (UniformBinFrag<or> i64:$src0, (UniformUnaryFrag<not> i64:$src1)))]
+>;
} // End Defs = [SCC]
// Use added complexity so these patterns are preferred to the VALU patterns.
let AddedComplexity = 1 in {
let Defs = [SCC] in {
+// TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
- [(set i32:$sdst, (shl i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))]
>;
def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
- [(set i64:$sdst, (shl i64:$src0, i32:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))]
>;
def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
- [(set i32:$sdst, (srl i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))]
>;
def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
- [(set i64:$sdst, (srl i64:$src0, i32:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<srl> i64:$src0, i32:$src1))]
>;
def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
- [(set i32:$sdst, (sra i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))]
>;
def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
- [(set i64:$sdst, (sra i64:$src0, i32:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<sra> i64:$src0, i32:$src1))]
>;
} // End Defs = [SCC]
def S_BFM_B32 : SOP2_32 <"s_bfm_b32",
- [(set i32:$sdst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
+ [(set i32:$sdst, (UniformBinFrag<AMDGPUbfm> i32:$src0, i32:$src1))]>;
def S_BFM_B64 : SOP2_64_32_32 <"s_bfm_b64">;
+
+// TODO: S_MUL_I32 require V_MUL_LO_I32 from VOP3 change
def S_MUL_I32 : SOP2_32 <"s_mul_i32",
[(set i32:$sdst, (mul i32:$src0, i32:$src1))]> {
let isCommutable = 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4eba19382315..54c866bdc63c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -128,6 +128,49 @@ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
return NewInfo ? NewInfo->Opcode : -1;
}
+struct MUBUFInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t dwords;
+ bool has_vaddr;
+ bool has_srsrc;
+ bool has_soffset;
+};
+
+#define GET_MUBUFInfoTable_DECL
+#define GET_MUBUFInfoTable_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+int getMUBUFBaseOpcode(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
+ return Info ? Info->BaseOpcode : -1;
+}
+
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) {
+ const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords);
+ return Info ? Info->Opcode : -1;
+}
+
+int getMUBUFDwords(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->dwords : 0;
+}
+
+bool getMUBUFHasVAddr(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_vaddr : false;
+}
+
+bool getMUBUFHasSrsrc(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_srsrc : false;
+}
+
+bool getMUBUFHasSoffset(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_soffset : false;
+}
+
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
@@ -137,122 +180,75 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
namespace IsaInfo {
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
- // GCN GFX6 (Southern Islands (SI)).
- if (Features.test(FeatureISAVersion6_0_0))
- return {6, 0, 0};
- if (Features.test(FeatureISAVersion6_0_1))
- return {6, 0, 1};
-
- // GCN GFX7 (Sea Islands (CI)).
- if (Features.test(FeatureISAVersion7_0_0))
- return {7, 0, 0};
- if (Features.test(FeatureISAVersion7_0_1))
- return {7, 0, 1};
- if (Features.test(FeatureISAVersion7_0_2))
- return {7, 0, 2};
- if (Features.test(FeatureISAVersion7_0_3))
- return {7, 0, 3};
- if (Features.test(FeatureISAVersion7_0_4))
- return {7, 0, 4};
- if (Features.test(FeatureSeaIslands))
- return {7, 0, 0};
-
- // GCN GFX8 (Volcanic Islands (VI)).
- if (Features.test(FeatureISAVersion8_0_1))
- return {8, 0, 1};
- if (Features.test(FeatureISAVersion8_0_2))
- return {8, 0, 2};
- if (Features.test(FeatureISAVersion8_0_3))
- return {8, 0, 3};
- if (Features.test(FeatureISAVersion8_1_0))
- return {8, 1, 0};
- if (Features.test(FeatureVolcanicIslands))
- return {8, 0, 0};
-
- // GCN GFX9.
- if (Features.test(FeatureISAVersion9_0_0))
- return {9, 0, 0};
- if (Features.test(FeatureISAVersion9_0_2))
- return {9, 0, 2};
- if (Features.test(FeatureISAVersion9_0_4))
- return {9, 0, 4};
- if (Features.test(FeatureISAVersion9_0_6))
- return {9, 0, 6};
- if (Features.test(FeatureGFX9))
- return {9, 0, 0};
-
- if (Features.test(FeatureSouthernIslands))
- return {0, 0, 0};
- return {7, 0, 0};
-}
-
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
auto TargetTriple = STI->getTargetTriple();
- auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
+ auto Version = getIsaVersion(STI->getCPU());
Stream << TargetTriple.getArchName() << '-'
<< TargetTriple.getVendorName() << '-'
<< TargetTriple.getOSName() << '-'
<< TargetTriple.getEnvironmentName() << '-'
<< "gfx"
- << ISAVersion.Major
- << ISAVersion.Minor
- << ISAVersion.Stepping;
+ << Version.Major
+ << Version.Minor
+ << Version.Stepping;
if (hasXNACK(*STI))
Stream << "+xnack";
+ if (hasSRAMECC(*STI))
+ Stream << "+sram-ecc";
Stream.flush();
}
bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
- return STI->getFeatureBits().test(FeatureCodeObjectV3);
+ return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
+ STI->getFeatureBits().test(FeatureCodeObjectV3);
}
-unsigned getWavefrontSize(const FeatureBitset &Features) {
- if (Features.test(FeatureWavefrontSize16))
+unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize16))
return 16;
- if (Features.test(FeatureWavefrontSize32))
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32))
return 32;
return 64;
}
-unsigned getLocalMemorySize(const FeatureBitset &Features) {
- if (Features.test(FeatureLocalMemorySize32768))
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
return 32768;
- if (Features.test(FeatureLocalMemorySize65536))
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
return 65536;
return 0;
}
-unsigned getEUsPerCU(const FeatureBitset &Features) {
+unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
return 4;
}
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- if (!Features.test(FeatureGCN))
+ if (!STI->getFeatureBits().test(FeatureGCN))
return 8;
- unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
if (N == 1)
return 40;
N = 40 / N;
return std::min(N, 16u);
}
-unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
- return getMaxWavesPerEU() * getEUsPerCU(Features);
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
+ return getMaxWavesPerEU() * getEUsPerCU(STI);
}
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
}
-unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
return 1;
}
@@ -261,89 +257,89 @@ unsigned getMaxWavesPerEU() {
return 10;
}
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
- getEUsPerCU(Features)) / getEUsPerCU(Features);
+ return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
+ getEUsPerCU(STI)) / getEUsPerCU(STI);
}
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 1;
}
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 2048;
}
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
- getWavefrontSize(Features);
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
+ getWavefrontSize(STI);
}
-unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
- IsaVersion Version = getIsaVersion(Features);
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 16;
return 8;
}
-unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
return 8;
}
-unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
- IsaVersion Version = getIsaVersion(Features);
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 800;
return 512;
}
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
- if (Features.test(FeatureSGPRInitBug))
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureSGPRInitBug))
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
- IsaVersion Version = getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 102;
return 104;
}
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
- unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
- if (Features.test(FeatureTrapHandler))
+ unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
- return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
+ MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
}
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool Addressable) {
assert(WavesPerEU != 0);
- IsaVersion Version = getIsaVersion(Features);
- unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
- unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
- if (Features.test(FeatureTrapHandler))
+ unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
+ MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed) {
unsigned ExtraSGPRs = 0;
if (VCCUsed)
ExtraSGPRs = 2;
- IsaVersion Version = getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major < 8) {
if (FlatScrUsed)
ExtraSGPRs = 4;
@@ -358,74 +354,74 @@ unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
return ExtraSGPRs;
}
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed) {
- return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
- Features[AMDGPU::FeatureXNACK]);
+ return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
+ STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
}
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
- NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
+ NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
// SGPRBlocks is actual number of SGPR blocks minus 1.
- return NumSGPRs / getSGPREncodingGranule(Features) - 1;
+ return NumSGPRs / getSGPREncodingGranule(STI) - 1;
}
-unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
return 4;
}
-unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
- return getVGPRAllocGranule(Features);
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
+ return getVGPRAllocGranule(STI);
}
-unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
return 256;
}
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
- return getTotalNumVGPRs(Features);
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
+ return getTotalNumVGPRs(STI);
}
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumVGPRs =
- alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
- getVGPRAllocGranule(Features)) + 1;
- return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
+ alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
+ getVGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
}
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
- getVGPRAllocGranule(Features));
- unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
+ getVGPRAllocGranule(STI));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
- NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
// VGPRBlocks is actual number of VGPR blocks minus 1.
- return NumVGPRs / getVGPREncodingGranule(Features) - 1;
+ return NumVGPRs / getVGPREncodingGranule(STI) - 1;
}
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const FeatureBitset &Features) {
- IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
+ const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 2;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
- Header.amd_machine_version_major = ISA.Major;
- Header.amd_machine_version_minor = ISA.Minor;
- Header.amd_machine_version_stepping = ISA.Stepping;
+ Header.amd_machine_version_major = Version.Major;
+ Header.amd_machine_version_minor = Version.Minor;
+ Header.amd_machine_version_stepping = Version.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
@@ -513,7 +509,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Ints;
}
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getVmcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
if (Version.Major < 9)
return VmcntLo;
@@ -522,15 +518,15 @@ unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
return VmcntLo | VmcntHi;
}
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getExpcntBitMask(const IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getLgkmcntBitMask(const IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getWaitcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
@@ -542,7 +538,7 @@ unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
return Waitcnt | VmcntHi;
}
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
unsigned VmcntLo =
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
if (Version.Major < 9)
@@ -554,22 +550,30 @@ unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return VmcntLo | VmcntHi;
}
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
+ Waitcnt Decoded;
+ Decoded.VmCnt = decodeVmcnt(Version, Encoded);
+ Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
+ Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
+ return Decoded;
+}
+
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt) {
Waitcnt =
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
@@ -580,17 +584,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
@@ -599,6 +603,10 @@ unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
return Waitcnt;
}
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
+ return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
+}
+
unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
@@ -643,6 +651,10 @@ bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}
+bool hasSRAMECC(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
+}
+
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
}
@@ -798,6 +810,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VS_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
+ case AMDGPU::SReg_64_XEXECRegClassID:
return 64;
case AMDGPU::VReg_96RegClassID:
return 96;
@@ -935,27 +948,50 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
}
-} // end namespace AMDGPU
-
-} // end namespace llvm
-
-namespace llvm {
-namespace AMDGPU {
-
-AMDGPUAS getAMDGPUAS(Triple T) {
- AMDGPUAS AS;
- AS.FLAT_ADDRESS = 0;
- AS.PRIVATE_ADDRESS = 5;
- AS.REGION_ADDRESS = 2;
- return AS;
-}
+// Given Imm, split it into the values to put into the SOffset and ImmOffset
+// fields in an MUBUF instruction. Return false if it is not possible (due to a
+// hardware bug needing a workaround).
+//
+// The required alignment ensures that individual address components remain
+// aligned if they are aligned to begin with. It also ensures that additional
+// offsets within the given alignment can be added to the resulting ImmOffset.
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ const GCNSubtarget *Subtarget, uint32_t Align) {
+ const uint32_t MaxImm = alignDown(4095, Align);
+ uint32_t Overflow = 0;
+
+ if (Imm > MaxImm) {
+ if (Imm <= MaxImm + 64) {
+ // Use an SOffset inline constant for 4..64
+ Overflow = Imm - MaxImm;
+ Imm = MaxImm;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits (except for alignment bits) set into
+ // SOffset, so that a larger range of values can be covered using
+ // s_movk_i32.
+ //
+ // Atomic operations fail to work correctly when individual address
+ // components are unaligned, even if their sum is aligned.
+ uint32_t High = (Imm + Align) & ~4095;
+ uint32_t Low = (Imm + Align) & 4095;
+ Imm = Low;
+ Overflow = High - Align;
+ }
+ }
-AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
- return getAMDGPUAS(M.getTargetTriple());
-}
+ // There is a hardware bug in SI and CI which prevents address clamping in
+ // MUBUF instructions from working correctly with SOffsets. The immediate
+ // offset is unaffected.
+ if (Overflow > 0 &&
+ Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
-AMDGPUAS getAMDGPUAS(const Module &M) {
- return getAMDGPUAS(Triple(M.getTargetTriple()));
+ ImmOffset = Imm;
+ SOffset = Overflow;
+ return true;
}
namespace {
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 5b7af8268cda..20123ed4ac81 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -19,6 +19,7 @@
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
#include <cstdint>
#include <string>
#include <utility>
@@ -26,8 +27,10 @@
namespace llvm {
class Argument;
+class AMDGPUSubtarget;
class FeatureBitset;
class Function;
+class GCNSubtarget;
class GlobalValue;
class MCContext;
class MCRegisterClass;
@@ -54,16 +57,6 @@ enum {
TRAP_NUM_SGPRS = 16
};
-/// Instruction set architecture version.
-struct IsaVersion {
- unsigned Major;
- unsigned Minor;
- unsigned Stepping;
-};
-
-/// \returns Isa version for given subtarget \p Features.
-IsaVersion getIsaVersion(const FeatureBitset &Features);
-
/// Streams isa version string for given subtarget \p STI into \p Stream.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
@@ -71,114 +64,114 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
/// false otherwise.
bool hasCodeObjectV3(const MCSubtargetInfo *STI);
-/// \returns Wavefront size for given subtarget \p Features.
-unsigned getWavefrontSize(const FeatureBitset &Features);
+/// \returns Wavefront size for given subtarget \p STI.
+unsigned getWavefrontSize(const MCSubtargetInfo *STI);
-/// \returns Local memory size in bytes for given subtarget \p Features.
-unsigned getLocalMemorySize(const FeatureBitset &Features);
+/// \returns Local memory size in bytes for given subtarget \p STI.
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
/// \returns Number of execution units per compute unit for given subtarget \p
-/// Features.
-unsigned getEUsPerCU(const FeatureBitset &Features);
+/// STI.
+unsigned getEUsPerCU(const MCSubtargetInfo *STI);
/// \returns Maximum number of work groups per compute unit for given subtarget
-/// \p Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+/// \p STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// Features without any kind of limitation.
-unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+/// STI without any kind of limitation.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
/// \returns Minimum number of waves per execution unit for given subtarget \p
-/// Features.
-unsigned getMinWavesPerEU(const FeatureBitset &Features);
+/// STI.
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// Features without any kind of limitation.
+/// STI without any kind of limitation.
unsigned getMaxWavesPerEU();
/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
-/// \returns Minimum flat work group size for given subtarget \p Features.
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+/// \returns Minimum flat work group size for given subtarget \p STI.
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
-/// \returns Maximum flat work group size for given subtarget \p Features.
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+/// \returns Maximum flat work group size for given subtarget \p STI.
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
-/// \returns Number of waves per work group for given subtarget \p Features and
+/// \returns Number of waves per work group for given subtarget \p STI and
/// limited by given \p FlatWorkGroupSize.
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
-/// \returns SGPR allocation granularity for given subtarget \p Features.
-unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+/// \returns SGPR allocation granularity for given subtarget \p STI.
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
-/// \returns SGPR encoding granularity for given subtarget \p Features.
-unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+/// \returns SGPR encoding granularity for given subtarget \p STI.
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
-/// \returns Total number of SGPRs for given subtarget \p Features.
-unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+/// \returns Total number of SGPRs for given subtarget \p STI.
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
-/// \returns Addressable number of SGPRs for given subtarget \p Features.
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+/// \returns Addressable number of SGPRs for given subtarget \p STI.
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
/// \returns Minimum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Maximum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool Addressable);
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// Features when the given special registers are used.
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+/// STI when the given special registers are used.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed);
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// Features when the given special registers are used. XNACK is inferred from
-/// \p Features.
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+/// STI when the given special registers are used. XNACK is inferred from
+/// \p STI.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed);
-/// \returns Number of SGPR blocks needed for given subtarget \p Features when
+/// \returns Number of SGPR blocks needed for given subtarget \p STI when
/// \p NumSGPRs are used. \p NumSGPRs should already include any special
/// register counts.
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
-/// \returns VGPR allocation granularity for given subtarget \p Features.
-unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+/// \returns VGPR allocation granularity for given subtarget \p STI.
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
-/// \returns VGPR encoding granularity for given subtarget \p Features.
-unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+/// \returns VGPR encoding granularity for given subtarget \p STI.
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
-/// \returns Total number of VGPRs for given subtarget \p Features.
-unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+/// \returns Total number of VGPRs for given subtarget \p STI.
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
-/// \returns Addressable number of VGPRs for given subtarget \p Features.
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+/// \returns Addressable number of VGPRs for given subtarget \p STI.
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
/// \returns Minimum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Maximum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
-/// \returns Number of VGPR blocks needed for given subtarget \p Features when
+/// \returns Number of VGPR blocks needed for given subtarget \p STI when
/// \p NumVGPRs are used.
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
} // end namespace IsaInfo
@@ -191,6 +184,7 @@ struct MIMGBaseOpcodeInfo {
bool Atomic;
bool AtomicX2;
bool Sampler;
+ bool Gather4;
uint8_t NumExtraArgs;
bool Gradients;
@@ -228,10 +222,28 @@ LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
LLVM_READONLY
+int getMUBUFBaseOpcode(unsigned Opc);
+
+LLVM_READONLY
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords);
+
+LLVM_READONLY
+int getMUBUFDwords(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasVAddr(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasSrsrc(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasSoffset(unsigned Opc);
+
+LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const FeatureBitset &Features);
+ const MCSubtargetInfo *STI);
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
@@ -265,26 +277,52 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
std::pair<int, int> Default,
bool OnlyFirstRequired = false);
+/// Represents the counter values to wait for in an s_waitcnt instruction.
+///
+/// Large values (including the maximum possible integer) can be used to
+/// represent "don't care" waits.
+struct Waitcnt {
+ unsigned VmCnt = ~0u;
+ unsigned ExpCnt = ~0u;
+ unsigned LgkmCnt = ~0u;
+
+ Waitcnt() {}
+ Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
+ : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
+
+ static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
+
+ bool dominates(const Waitcnt &Other) const {
+ return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
+ LgkmCnt <= Other.LgkmCnt;
+ }
+
+ Waitcnt combined(const Waitcnt &Other) const {
+ return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
+ std::min(LgkmCnt, Other.LgkmCnt));
+ }
+};
+
/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getVmcntBitMask(const IsaVersion &Version);
/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getExpcntBitMask(const IsaVersion &Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getLgkmcntBitMask(const IsaVersion &Version);
/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getWaitcntBitMask(const IsaVersion &Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
@@ -295,19 +333,21 @@ unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
+
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt);
/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
@@ -322,9 +362,11 @@ unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
unsigned getInitialPSInputAddr(const Function &F);
LLVM_READNONE
@@ -349,6 +391,7 @@ inline bool isKernel(CallingConv::ID CC) {
}
bool hasXNACK(const MCSubtargetInfo &STI);
+bool hasSRAMECC(const MCSubtargetInfo &STI);
bool hasMIMG_R128(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);
@@ -447,6 +490,9 @@ int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
/// not the encoded offset.
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ const GCNSubtarget *Subtarget, uint32_t Align = 4);
+
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp
deleted file mode 100644
index 1924f71f11c8..000000000000
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- AMDGPULaneDominator.cpp - Determine Lane Dominators ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// MBB A lane-dominates MBB B if
-// 1. A dominates B in the usual sense, i.e. every path from the entry to B
-// goes through A, and
-// 2. whenever B executes, every active lane during that execution of B was
-// also active during the most recent execution of A.
-//
-// The simplest example where A dominates B but does not lane-dominate it is
-// where A is a loop:
-//
-// |
-// +--+
-// A |
-// +--+
-// |
-// B
-//
-// Unfortunately, the second condition is not fully captured by the control
-// flow graph when it is unstructured (as may happen when branch conditions are
-// uniform).
-//
-// The following replacement of the second condition is a conservative
-// approximation. It is an equivalent condition when the CFG is fully
-// structured:
-//
-// 2'. every cycle in the CFG that contains A also contains B.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPULaneDominator.h"
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-
-namespace llvm {
-
-namespace AMDGPU {
-
-// Given machine basic blocks A and B where A dominates B, check whether
-// A lane-dominates B.
-//
-// The check is conservative, i.e. there can be false-negatives.
-bool laneDominates(MachineBasicBlock *A, MachineBasicBlock *B) {
- // Check whether A is reachable from itself without going through B.
- DenseSet<MachineBasicBlock *> Reachable;
- SmallVector<MachineBasicBlock *, 8> Stack;
-
- Stack.push_back(A);
- do {
- MachineBasicBlock *MBB = Stack.back();
- Stack.pop_back();
-
- for (MachineBasicBlock *Succ : MBB->successors()) {
- if (Succ == A)
- return false;
- if (Succ != B && Reachable.insert(Succ).second)
- Stack.push_back(Succ);
- }
- } while (!Stack.empty());
-
- return true;
-}
-
-} // namespace AMDGPU
-
-} // namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h
deleted file mode 100644
index 4f33a89a364b..000000000000
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===- AMDGPULaneDominator.h ------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
-#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
-
-namespace llvm {
-
-class MachineBasicBlock;
-
-namespace AMDGPU {
-
-bool laneDominates(MachineBasicBlock *MBBA, MachineBasicBlock *MBBB);
-
-} // end namespace AMDGPU
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 9f0a4d29b5e4..82ffdef8e674 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -46,6 +46,7 @@
int64_t Value = 0; \
if (!expectAbsExpression(MCParser, Value, Err)) \
return false; \
+ C.compute_pgm_resource_registers &= ~(SetMacro(0xFFFFFFFFFFFFFFFFULL) << Shift); \
C.compute_pgm_resource_registers |= SetMacro(Value) << Shift; \
return true; \
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 4c7a92219755..68446ab79720 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -84,6 +84,10 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP1";
}
+class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_DPP_Pseudo <OpName, P, pattern> {
+}
+
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret =
!if(P.HasModifiers,
@@ -103,6 +107,8 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _e32 : VOP1_Pseudo <opName, P>;
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP1_DPP_Pseudo <opName, P>;
}
// Special profile for instructions which have clamp
@@ -173,7 +179,9 @@ defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
+let FPDPRounding = 1 in {
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
+} // End FPDPRounding = 1
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
@@ -226,7 +234,9 @@ defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
let SchedRW = [WriteDoubleAdd] in {
defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>;
+let FPDPRounding = 1 in {
defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>;
+} // End FPDPRounding = 1
} // End SchedRW = [WriteDoubleAdd]
defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>;
@@ -242,7 +252,9 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC64 = VRegSrc_32;
let HasExt = 0;
- let HasSDWA9 = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
}
// Special case because there are no true output operands. Hack vdst
@@ -271,7 +283,10 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
let HasExt = 0;
- let HasSDWA9 = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
+
let HasDst = 0;
let EmitDst = 1; // force vdst emission
}
@@ -328,8 +343,10 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
let SubtargetPredicate = Has16BitInsts in {
+let FPDPRounding = 1 in {
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
+} // End FPDPRounding = 1
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
let SchedRW = [WriteQuarterRate32] in {
@@ -347,7 +364,9 @@ defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>;
defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>;
defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>;
+let FPDPRounding = 1 in {
defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;
+} // End FPDPRounding = 1
}
@@ -495,13 +514,8 @@ defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>;
// VI
//===----------------------------------------------------------------------===//
-class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
- VOP_DPP <ps.OpName, P> {
- let Defs = ps.Defs;
- let Uses = ps.Uses;
- let SchedRW = ps.SchedRW;
- let hasSideEffects = ps.hasSideEffects;
-
+class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_DPPe <P> {
bits<8> vdst;
let Inst{8-0} = 0xfa; // dpp
let Inst{16-9} = op;
@@ -539,9 +553,10 @@ multiclass VOP1_Real_vi <bits<10> op> {
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
}
defm V_NOP : VOP1_Real_vi <0x0>;
@@ -712,9 +727,11 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
+
}
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 5ec1a15c5cd2..e3fd7b5f9fad 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -105,6 +105,11 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP2";
}
+class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_DPP_Pseudo <OpName, P, pattern> {
+}
+
+
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst,
@@ -116,22 +121,49 @@ class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
}
-multiclass VOP2Inst <string opName,
- VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName,
- bit GFX9Renamed = 0> {
-
+multiclass VOP2Inst_e32<string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> {
let renamedInGFX9 = GFX9Renamed in {
-
- def _e32 : VOP2_Pseudo <opName, P>,
+ def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ } // End renamedInGFX9 = GFX9Renamed
+}
+multiclass VOP2Inst_e64<string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> {
+ let renamedInGFX9 = GFX9Renamed in {
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+ } // End renamedInGFX9 = GFX9Renamed
+}
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+multiclass VOP2Inst_sdwa<string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> {
+ let renamedInGFX9 = GFX9Renamed in {
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+ } // End renamedInGFX9 = GFX9Renamed
+}
+multiclass VOP2Inst<string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> :
+ VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
+ VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
+ VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> {
+ let renamedInGFX9 = GFX9Renamed in {
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
}
@@ -144,12 +176,14 @@ multiclass VOP2bInst <string opName,
let renamedInGFX9 = GFX9Renamed in {
let SchedRW = [Write32Bit, WriteSALU] in {
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
- def _e32 : VOP2_Pseudo <opName, P>,
+ def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -172,6 +206,9 @@ multiclass VOP2eInst <string opName,
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
+
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -211,9 +248,9 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
- let InsDPP = (ins DstRCDPP:$old,
- Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
@@ -230,21 +267,15 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret;
let HasSrc2 = 0;
let HasSrc2Mods = 0;
- let HasExt = 1;
- let HasSDWA9 = 0;
-}
-def VOP_MAC_F16 : VOP_MAC <f16> {
- // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
- // 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f16>.ret;
+ let HasExt = 1;
+ let HasExtDPP = 1;
+ let HasExtSDWA = 1;
+ let HasExtSDWA9 = 0;
}
-def VOP_MAC_F32 : VOP_MAC <f32> {
- // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
- // 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f32>.ret;
-}
+def VOP_MAC_F16 : VOP_MAC <f16>;
+def VOP_MAC_F32 : VOP_MAC <f32>;
// Write out to vcc or arbitrary SGPR.
def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
@@ -290,7 +321,9 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let HasExt = 1;
- let HasSDWA9 = 1;
+ let HasExtDPP = 1;
+ let HasExtSDWA = 1;
+ let HasExtSDWA9 = 1;
}
// Read in from vcc or arbitrary SGPR
@@ -321,7 +354,9 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let HasExt = 1;
- let HasSDWA9 = 1;
+ let HasExtDPP = 1;
+ let HasExtSDWA = 1;
+ let HasExtSDWA9 = 1;
}
def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
@@ -331,8 +366,11 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
+
let HasExt = 0;
- let HasSDWA9 = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
}
def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
@@ -342,20 +380,23 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
- let HasExt = 0;
- let HasSDWA9 = 0;
let HasSrc2 = 0;
let HasSrc2Mods = 0;
+
+ let HasExt = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
}
//===----------------------------------------------------------------------===//
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isGCN in {
+let SubtargetPredicate = isGCN, Predicates = [isGCN] in {
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
-def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, [], "">;
+def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
@@ -363,29 +404,29 @@ defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
-defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
-defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
-defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
-defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
-defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>;
-defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>;
-defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>;
-defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>;
-defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>;
-defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>;
+defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_i24>;
+defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
+defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>;
+defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
+defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
+defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
+defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
+defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
+defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
+defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
-defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>;
-defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>;
-defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>;
+defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
+defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
+defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
isConvertibleToThreeAddress = 1 in {
defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
}
-def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, [], "">;
+def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
@@ -411,11 +452,11 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub
// These are special and do not read the exec mask.
let isConvergent = 1, Uses = []<Register> in {
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
- [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">;
+ [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
- [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))], "">;
+ [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
} // End isConvergent = 1
@@ -425,13 +466,13 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
-defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_i16_f32>;
-defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_u16_f32>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpkrtz_f16_f32>;
-defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_u16_u32>;
-defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_i16_i32>;
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
-} // End SubtargetPredicate = isGCN
+} // End SubtargetPredicate = isGCN, Predicates = [isGCN]
def : GCNPat<
(AMDGPUadde i32:$src0, i32:$src1, i1:$src2),
@@ -444,40 +485,99 @@ def : GCNPat<
>;
// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
let isCommutable = 1 in {
defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
} // End isCommutable = 1
-} // End let SubtargetPredicate = SICI
+} // End let SubtargetPredicate = SICI, Predicates = [isSICI]
+
+class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
+ GCNPat<
+ (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
+ !if(!cast<Commutable_REV>(Inst).IsOrig,
+ (Inst $src0, $src1),
+ (Inst $src1, $src0)
+ )
+ >;
+
+let AddedComplexity = 1 in {
+ def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
+ def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
+ def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
+}
+
+let SubtargetPredicate = HasAddNoCarryInsts in {
+ def : DivergentBinOp<add, V_ADD_U32_e32>;
+ def : DivergentBinOp<sub, V_SUB_U32_e32>;
+ def : DivergentBinOp<sub, V_SUBREV_U32_e32>;
+}
+
+
+def : DivergentBinOp<add, V_ADD_I32_e32>;
+
+def : DivergentBinOp<add, V_ADD_I32_e64>;
+def : DivergentBinOp<sub, V_SUB_I32_e32>;
+
+def : DivergentBinOp<sub, V_SUBREV_I32_e32>;
+
+def : DivergentBinOp<srl, V_LSHRREV_B32_e32>;
+def : DivergentBinOp<sra, V_ASHRREV_I32_e32>;
+def : DivergentBinOp<shl, V_LSHLREV_B32_e32>;
+def : DivergentBinOp<adde, V_ADDC_U32_e32>;
+def : DivergentBinOp<sube, V_SUBB_U32_e32>;
+
+class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> :
+ GCNPat<
+ (getDivergentFrag<Op>.ret i64:$src0, i64:$src1),
+ (REG_SEQUENCE VReg_64,
+ (Inst
+ (i32 (EXTRACT_SUBREG $src0, sub0)),
+ (i32 (EXTRACT_SUBREG $src1, sub0))
+ ), sub0,
+ (Inst
+ (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG $src1, sub1))
+ ), sub1
+ )
+ >;
+
+def : divergent_i64_BinOp <and, V_AND_B32_e32>;
+def : divergent_i64_BinOp <or, V_OR_B32_e32>;
+def : divergent_i64_BinOp <xor, V_XOR_B32_e32>;
let SubtargetPredicate = Has16BitInsts in {
+let FPDPRounding = 1 in {
def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
+defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
+} // End FPDPRounding = 1
+
defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>;
-defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
let isCommutable = 1 in {
+let FPDPRounding = 1 in {
defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>;
defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>;
defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
+} // End FPDPRounding = 1
defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
-defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum>;
-defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum>;
+defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
+defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
@@ -698,13 +798,8 @@ defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>;
// VI
//===----------------------------------------------------------------------===//
-class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfile P = ps.Pfl> :
- VOP_DPP <OpName, P> {
- let Defs = ps.Defs;
- let Uses = ps.Uses;
- let SchedRW = ps.SchedRW;
- let hasSideEffects = ps.hasSideEffects;
-
+class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_DPPe <P> {
bits<8> vdst;
bits<8> src1;
let Inst{8-0} = 0xfa; //dpp
@@ -716,12 +811,6 @@ class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfil
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
-multiclass VOP32_Real_vi <bits<10> op> {
- def _vi :
- VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>;
-}
-
multiclass VOP2_Real_MADK_vi <bits<6> op> {
def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
@@ -791,8 +880,13 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
}
- def _dpp :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName>;
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
+ VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
+ let AsmString = AsmName # ps.AsmOperands;
+ }
}
}
@@ -819,10 +913,14 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
}
- def _dpp_gfx9 :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName> {
- let DecoderNamespace = "SDWA9";
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
+ VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
+ let AsmString = AsmName # ps.AsmOperands;
+ let DecoderNamespace = "SDWA9";
+ }
}
multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
@@ -840,19 +938,23 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
}
- def _dpp_gfx9 :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
- let DecoderNamespace = "SDWA9";
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
+ let DecoderNamespace = "SDWA9";
+ }
}
} // AssemblerPredicates = [isGFX9]
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>;
@@ -899,9 +1001,6 @@ defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
-defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
-defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
-
defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;
defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>;
defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 26bc5260e17f..4b8c1f208a0e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -17,16 +17,16 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT src0),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node (P.Src0VT src0),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT src0)))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0)))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -35,18 +35,18 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
(P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -55,18 +55,18 @@ class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
(P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -75,18 +75,18 @@ class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
(P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -94,9 +94,9 @@ class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
}
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
- list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
- list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
- list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))];
+ list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
+ list<dag> ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1))];
+ list<dag> ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
ret1));
@@ -185,6 +185,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
getAsm64<HasDst, NumSrcArgs, HasIntClamp,
HasModifiers, HasOMod, DstVT>.ret,
P.Asm64));
+ let NeedPatGen = P.NeedPatGen;
}
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
@@ -219,7 +220,8 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
// VOP3 INTERP
//===----------------------------------------------------------------------===//
-class VOP3Interp<string OpName, VOPProfile P> : VOP3_Pseudo<OpName, P> {
+class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
+ VOP3_Pseudo<OpName, P, pattern> {
let AsmMatchConverter = "cvtVOP3Interp";
}
@@ -291,11 +293,13 @@ def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>;
def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SchedRW = [WriteDoubleAdd] in {
+let FPDPRounding = 1 in {
def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>;
def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, fadd, 1>;
def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
-def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum, 1>;
-def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum, 1>;
+} // End FPDPRounding = 1
+def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
+def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like, 1>;
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteQuarterRate32] in {
@@ -323,6 +327,7 @@ def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
let SchedRW = [WriteDouble];
+ let FPDPRounding = 1;
}
} // End Uses = [VCC, EXEC]
@@ -353,10 +358,10 @@ def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CL
def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
-let SchedRW = [WriteDoubleAdd] in {
+let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
-} // End SchedRW = [WriteDoubleAdd]
+} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU];
@@ -367,6 +372,7 @@ def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32,
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
let SchedRW = [WriteDouble, WriteSALU];
let AsmMatchConverter = "";
+ let FPDPRounding = 1;
}
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
@@ -381,12 +387,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
let SchedRW = [Write64Bit] in {
// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>>;
-def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>>;
-def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>>;
+let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
+def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
+def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
+def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-} // End SubtargetPredicate = isSICI
+} // End SubtargetPredicate = isSICI, Predicates = [isSICI]
let SubtargetPredicate = isVI in {
def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
@@ -395,6 +401,22 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
} // End SubtargetPredicate = isVI
} // End SchedRW = [Write64Bit]
+let Predicates = [isVI] in {
+def : GCNPat <
+ (getDivergentFrag<shl>.ret i64:$x, i32:$y),
+ (V_LSHLREV_B64 $y, $x)
+>;
+def : AMDGPUPat <
+ (getDivergentFrag<srl>.ret i64:$x, i32:$y),
+ (V_LSHRREV_B64 $y, $x)
+>;
+def : AMDGPUPat <
+ (getDivergentFrag<sra>.ret i64:$x, i32:$y),
+ (V_ASHRREV_I64 $y, $x)
+>;
+}
+
+
let SubtargetPredicate = isCIVI in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
@@ -414,33 +436,51 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup> {
let Predicates = [Has16BitInsts, isVIOnly];
+ let FPDPRounding = 1;
}
def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup> {
let renamedInGFX9 = 1;
let Predicates = [Has16BitInsts, isGFX9];
+ let FPDPRounding = 1;
+}
+
+def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
+ let Predicates = [Has16BitInsts, isVIOnly];
+ let FPDPRounding = 1;
+}
+def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
+ let renamedInGFX9 = 1;
+ let Predicates = [Has16BitInsts, isGFX9];
+ let FPDPRounding = 1;
}
let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
let renamedInGFX9 = 1 in {
-def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
-def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
+let FPDPRounding = 1 in {
+def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
+let Uses = [M0, EXEC] in {
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
-}
+} // End Uses = [M0, EXEC]
+} // End FPDPRounding = 1
+} // End renamedInGFX9 = 1
let SubtargetPredicate = isGFX9 in {
-def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
+def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> {
+ let FPDPRounding = 1;
+}
def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
-def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
} // End SubtargetPredicate = isGFX9
+let Uses = [M0, EXEC], FPDPRounding = 1 in {
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>;
def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
+} // End Uses = [M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
@@ -468,6 +508,37 @@ defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
} // End Predicates = [Has16BitInsts]
+class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
+ (ops node:$x, node:$y, node:$z),
+ // When the inner operation is used multiple times, selecting 3-op
+ // instructions may still be beneficial -- if the other users can be
+ // combined similarly. Let's be conservative for now.
+ (op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z),
+ [{
+ // Only use VALU ops when the result is divergent.
+ if (!N->isDivergent())
+ return false;
+
+ // Check constant bus limitations.
+ //
+ // Note: Use !isDivergent as a conservative proxy for whether the value
+ // is in an SGPR (uniform values can end up in VGPRs as well).
+ unsigned ConstantBusUses = 0;
+ for (unsigned i = 0; i < 3; ++i) {
+ if (!Operands[i]->isDivergent() &&
+ !isInlineImmediate(Operands[i].getNode())) {
+ ConstantBusUses++;
+ if (ConstantBusUses >= 2)
+ return false;
+ }
+ }
+
+ return true;
+ }]
+> {
+ let PredicateCodeUsesOperands = 1;
+}
+
let SubtargetPredicate = isGFX9 in {
def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -502,6 +573,22 @@ def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B3
def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
+
+
+class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
+ // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
+ (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
+ (inst i32:$src0, i32:$src1, i32:$src2)
+>;
+
+def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32>;
+def : ThreeOp_i32_Pats<add, shl, V_ADD_LSHL_U32>;
+def : ThreeOp_i32_Pats<add, add, V_ADD3_U32>;
+def : ThreeOp_i32_Pats<shl, or, V_LSHL_OR_B32>;
+def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32>;
+def : ThreeOp_i32_Pats<or, or, V_OR3_B32>;
+def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>;
+
} // End SubtargetPredicate = isGFX9
//===----------------------------------------------------------------------===//
@@ -651,23 +738,23 @@ defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>;
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
multiclass VOP3_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3be_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3Interp_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
@@ -775,12 +862,15 @@ defm V_FMA_F16 : VOP3_F16_Real_vi <0x1ee>;
defm V_DIV_FIXUP_F16 : VOP3_F16_Real_vi <0x1ef>;
defm V_INTERP_P2_F16 : VOP3Interp_F16_Real_vi <0x276>;
+let FPDPRounding = 1 in {
defm V_MAD_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ea, "V_MAD_F16", "v_mad_legacy_f16">;
-defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">;
-defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">;
defm V_FMA_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16", "v_fma_legacy_f16">;
defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">;
defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16", "v_interp_p2_legacy_f16">;
+} // End FPDPRounding = 1
+
+defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">;
+defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">;
defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">;
defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
@@ -813,6 +903,9 @@ defm V_MUL_LO_I32 : VOP3_Real_vi <0x285>;
defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>;
defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>;
+defm V_READLANE_B32 : VOP3_Real_vi <0x289>;
+defm V_WRITELANE_B32 : VOP3_Real_vi <0x28a>;
+
defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>;
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index b51828b54679..91b45583c848 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -42,14 +42,16 @@ class VOP3_VOP3PInst<string OpName, VOPProfile P, bit UseTiedOutput = 0,
}
let isCommutable = 1 in {
-def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
def V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
def V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
+let FPDPRounding = 1 in {
+def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
-def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum>;
-def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum>;
+} // End FPDPRounding = 1
+def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
+def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
@@ -137,12 +139,14 @@ let SubtargetPredicate = HasMadMixInsts in {
let isCommutable = 1 in {
def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
+let FPDPRounding = 1 in {
// Clamp modifier is applied after conversion to f16.
def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
let ClampLo = 0, ClampHi = 1 in {
def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
}
+} // End FPDPRounding = 1
}
defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
@@ -154,18 +158,99 @@ let SubtargetPredicate = HasFmaMixInsts in {
let isCommutable = 1 in {
def V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
+let FPDPRounding = 1 in {
// Clamp modifier is applied after conversion to f16.
def V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
let ClampLo = 0, ClampHi = 1 in {
def V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
}
+} // End FPDPRounding = 1
}
defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}
-let SubtargetPredicate = HasDLInsts in {
+// Defines patterns that extract signed 4bit from each Idx[0].
+foreach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in
+ def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src),
+ (sra (shl node:$src, (i32 Idx[1])), (i32 28))>;
+
+// Defines code pattern that extracts U(unsigned/signed) 4/8bit from FromBitIndex.
+class Extract<int FromBitIndex, int BitMask, bit U>: PatFrag<
+ (ops node:$src),
+ !if (!or (!and (!eq (BitMask, 255), !eq (FromBitIndex, 24)), !eq (FromBitIndex, 28)), // last element
+ !if (U, (srl node:$src, (i32 FromBitIndex)), (sra node:$src, (i32 FromBitIndex))),
+ !if (!eq (FromBitIndex, 0), // first element
+ !if (U, (and node:$src, (i32 BitMask)),
+ !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
+ (sext_inreg node:$src, i8))),
+ !if (U, (and (srl node:$src, (i32 FromBitIndex)), (i32 BitMask)),
+ !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
+ (sext_inreg (srl node:$src, (i32 FromBitIndex)), i8)))))>;
+
+
+foreach Type = ["I", "U"] in
+ foreach Index = 0-3 in {
+ // Defines patterns that extract each Index'ed 8bit from an unsigned
+ // 32bit scalar value;
+ def #Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !if (!eq (Type, "U"), 1, 0)>;
+
+ // Defines multiplication patterns where the multiplication is happening on each
+ // Index'ed 8bit of a 32bit scalar value.
+
+ def Mul#Type#_Elt#Index : PatFrag<
+ (ops node:$src0, node:$src1),
+ (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse))
+ (!cast<Extract>(#Type#Index#"_8bit") node:$src0),
+ (!cast<Extract>(#Type#Index#"_8bit") node:$src1))>;
+ }
+
+// Different variants of dot8 patterns cause a huge increase in the compile time.
+// Define non-associative/commutative add/mul to prevent permutation in the dot8
+// pattern.
+def NonACAdd : SDNode<"ISD::ADD" , SDTIntBinOp>;
+def NonACAdd_oneuse : HasOneUseBinOp<NonACAdd>;
+
+def NonACAMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24" , SDTIntBinOp>;
+def NonACAMDGPUmul_u24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_u24>;
+
+def NonACAMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24" , SDTIntBinOp>;
+def NonACAMDGPUmul_i24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_i24>;
+
+foreach Type = ["I", "U"] in
+ foreach Index = 0-7 in {
+ // Defines patterns that extract each Index'ed 4bit from an unsigned
+ // 32bit scalar value;
+ def #Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !if (!eq (Type, "U"), 1, 0)>;
+
+ // Defines multiplication patterns where the multiplication is happening on each
+ // Index'ed 8bit of a 32bit scalar value.
+ def Mul#Type#Index#"_4bit" : PatFrag<
+ (ops node:$src0, node:$src1),
+ (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse))
+ (!cast<Extract>(#Type#Index#"_4bit") node:$src0),
+ (!cast<Extract>(#Type#Index#"_4bit") node:$src1))>;
+ }
+
+class UDot2Pat<Instruction Inst> : GCNPat <
+ (add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)),
+ (srl i32:$src1, (i32 16))), i32:$src2),
+ (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
+ (and i32:$src1, (i32 65535)))
+ ),
+ (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
+>;
+
+class SDot2Pat<Instruction Inst> : GCNPat <
+ (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
+ (sra i32:$src1, (i32 16))), i32:$src2),
+ (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
+ (sext_inreg i32:$src1, i16))),
+ (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
+>;
+
+let SubtargetPredicate = HasDotInsts in {
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
@@ -192,7 +277,32 @@ defm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>;
defm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>;
defm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>;
-} // End SubtargetPredicate = HasDLInsts
+def : UDot2Pat<V_DOT2_U32_U16>;
+def : SDot2Pat<V_DOT2_I32_I16>;
+
+foreach Type = ["U", "I"] in
+ def : GCNPat <
+ !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
+ (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
+ (!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+foreach Type = ["U", "I"] in
+ def : GCNPat <
+ !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
+ [1, 2, 3, 4, 5, 6, 7], lhs, y,
+ (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
+ (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
+// in the compile time. Directly handle the pattern generated by the FE here.
+foreach Type = ["U", "I"] in
+ def : GCNPat <
+ !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
+ [7, 1, 2, 3, 4, 5, 6], lhs, y,
+ (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
+ (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+} // End SubtargetPredicate = HasDotInsts
multiclass VOP3P_Real_vi<bits<10> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -242,7 +352,7 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
}
-let SubtargetPredicate = HasDLInsts in {
+let SubtargetPredicate = HasDotInsts in {
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
@@ -252,4 +362,4 @@ defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
-} // End SubtargetPredicate = HasDLInsts
+} // End SubtargetPredicate = HasDotInsts
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index cc6b8116afee..091cac8cd35c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -635,6 +635,17 @@ def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
+def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
+def : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
+def : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
+def : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
+def : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
+def : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
+def : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
+def : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
+def : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
+def : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
+
class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat <
(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
@@ -656,6 +667,14 @@ def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
+def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
+def : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
+def : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
+def : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;
+
+
def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
@@ -670,6 +689,13 @@ def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
+def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
+def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
+def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
+def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
+def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
+
//===----------------------------------------------------------------------===//
// Target
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f0f7f259f71d..7de7d90d27b3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -420,10 +420,10 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
let SDWA = 1;
let Uses = [EXEC];
- let SubtargetPredicate = !if(P.HasExt, HasSDWA, DisableInst);
- let AssemblerPredicate = !if(P.HasExt, HasSDWA, DisableInst);
- let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
- AMDGPUAsmVariants.Disable);
+ let SubtargetPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
+ let AssemblerPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
+ let AsmVariantName = !if(P.HasExtSDWA, AMDGPUAsmVariants.SDWA,
+ AMDGPUAsmVariants.Disable);
let DecoderNamespace = "SDWA";
VOPProfile Pfl = P;
@@ -471,10 +471,10 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
- let SubtargetPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
- let AssemblerPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
- let AsmVariantName = !if(ps.Pfl.HasSDWA9, AMDGPUAsmVariants.SDWA9,
- AMDGPUAsmVariants.Disable);
+ let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
+ let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
+ let AsmVariantName = !if(ps.Pfl.HasExtSDWA9, AMDGPUAsmVariants.SDWA9,
+ AMDGPUAsmVariants.Disable);
let DecoderNamespace = "SDWA9";
// Copy relevant pseudo op flags
@@ -505,9 +505,14 @@ class VOP_DPPe<VOPProfile P> : Enc64 {
let Inst{63-60} = row_mask;
}
-class VOP_DPP <string OpName, VOPProfile P> :
- InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, []>,
- VOP_DPPe<P> {
+class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, pattern>,
+ VOP <OpName>,
+ SIMCInstr <OpName#"_dpp", SIEncodingFamily.NONE>,
+ MnemonicAlias <OpName#"_dpp", OpName> {
+
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
let mayLoad = 0;
let mayStore = 0;
@@ -517,15 +522,99 @@ class VOP_DPP <string OpName, VOPProfile P> :
let VALU = 1;
let DPP = 1;
let Size = 8;
+ let Uses = [EXEC];
+ let isConvergent = 1;
+
+ string Mnemonic = OpName;
+ string AsmOperands = P.AsmDPP;
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
let SubtargetPredicate = HasDPP;
- let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst);
- let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
- AMDGPUAsmVariants.Disable);
+ let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
+ let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
+ AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
let DecoderNamespace = "DPP";
+
+ VOPProfile Pfl = P;
+}
+
+class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+
+ let Defs = ps.Defs;
+ let Uses = ps.Uses;
+ let SchedRW = ps.SchedRW;
+ let hasSideEffects = ps.hasSideEffects;
+
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+
+ // Copy relevant pseudo op flags
+ let isConvergent = ps.isConvergent;
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AssemblerPredicate = ps.AssemblerPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let AsmVariantName = ps.AsmVariantName;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let DecoderNamespace = ps.DecoderNamespace;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+ let TSFlags = ps.TSFlags;
+}
+
+class getNumNodeArgs<SDPatternOperator Op> {
+ SDNode N = !cast<SDNode>(Op);
+ SDTypeProfile TP = N.TypeProfile;
+ int ret = TP.NumOperands;
+}
+
+
+class getDivergentFrag<SDPatternOperator Op> {
+
+ int NumSrcArgs = getNumNodeArgs<Op>.ret;
+ PatFrag ret = PatFrag <
+ !if(!eq(NumSrcArgs, 1),
+ (ops node:$src0),
+ !if(!eq(NumSrcArgs, 2),
+ (ops node:$src0, node:$src1),
+ (ops node:$src0, node:$src1, node:$src2))),
+ !if(!eq(NumSrcArgs, 1),
+ (Op $src0),
+ !if(!eq(NumSrcArgs, 2),
+ (Op $src0, $src1),
+ (Op $src0, $src1, $src2))),
+ [{ return N->isDivergent(); }]
+ >;
+}
+
+class VOPPatGen<SDPatternOperator Op, VOPProfile P> {
+
+ PatFrag Operator = getDivergentFrag < Op >.ret;
+
+ dag Ins = !foreach(tmp, P.Ins32, !subst(ins, Operator,
+ !subst(P.Src0RC32, P.Src0VT,
+ !subst(P.Src1RC32, P.Src1VT, tmp))));
+
+
+ dag Outs = !foreach(tmp, P.Outs32, !subst(outs, set,
+ !subst(P.DstRC, P.DstVT, tmp)));
+
+ list<dag> ret = [!con(Outs, (set Ins))];
+}
+
+class VOPPatOrNull<SDPatternOperator Op, VOPProfile P> {
+ list<dag> ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen<Op, P>.ret, []);
+}
+
+class DivergentFragOrOp<SDPatternOperator Op, VOPProfile P> {
+ SDPatternOperator ret = !if(!eq(P.NeedPatGen,PatGenMode.Pattern),
+ !if(!isa<SDNode>(Op), getDivergentFrag<Op>.ret, Op), Op);
}
include "VOPCInstructions.td"
diff --git a/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index 1acae3a88870..6f5bbd3b4ef3 100644
--- a/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -26,12 +26,6 @@ static Reloc::Model getRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
/// ARCTargetMachine ctor - Create an ILP32 architecture model
ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -43,7 +37,7 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
"e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-"
"f32:32:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getRelocModel(RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp b/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
index 0c627d04698b..9c820c2fc595 100644
--- a/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
@@ -20,7 +20,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -29,6 +28,12 @@ using namespace llvm;
#include "ARCGenAsmWriter.inc"
+template <class T>
+static const char *BadConditionCode(T cc) {
+ LLVM_DEBUG(dbgs() << "Unknown condition code passed: " << cc << "\n");
+ return "{unknown-cc}";
+}
+
static const char *ARCBRCondCodeToString(ARCCC::BRCondCode BRCC) {
switch (BRCC) {
case ARCCC::BREQ:
@@ -44,7 +49,7 @@ static const char *ARCBRCondCodeToString(ARCCC::BRCondCode BRCC) {
case ARCCC::BRHS:
return "hs";
}
- llvm_unreachable("Unhandled ARCCC::BRCondCode");
+ return BadConditionCode(BRCC);
}
static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
@@ -86,7 +91,7 @@ static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
case ARCCC::Z:
return "z";
}
- llvm_unreachable("Unhandled ARCCC::CondCode");
+ return BadConditionCode(CC);
}
void ARCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 2e62a0790418..3db60f1c16d6 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -61,6 +61,11 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"floating point",
[FeatureFPARMv8]>;
+def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+ "Enable full half-precision "
+ "floating point fml instructions",
+ [FeatureFullFP16]>;
+
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
"Floating point unit supports "
"single precision only">;
@@ -194,6 +199,10 @@ def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
"SlowLoadDSubregister", "true",
"Loading into D subregs is slow">;
+def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp",
+ "UseWideStrideVFP", "true",
+ "Use a wide stride when allocating VFP registers">;
+
// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
"DontWidenVMOVS", "true",
@@ -256,6 +265,9 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
+def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopAlignment","2",
+ "Prefer 32-bit alignment for loops">;
+
/// Some instructions update CPSR partially, which can add false dependency for
/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
/// mapped to a separate physical register. Avoid partial CPSR update for these
@@ -351,6 +363,11 @@ def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;
+// Armv8.5-A extensions
+
+def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
+ "Enable v8.5a Speculation Barrier" >;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
@@ -440,6 +457,10 @@ def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
"Support ARM v8.4a instructions",
[HasV8_3aOps, FeatureDotProd]>;
+def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
+ "Support ARM v8.5a instructions",
+ [HasV8_4aOps, FeatureSB]>;
+
//===----------------------------------------------------------------------===//
// ARM Processor subtarget features.
//
@@ -482,8 +503,25 @@ def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors", []>;
-def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
- "Samsung Exynos-Mx processors", []>;
+def ProcExynos : SubtargetFeature<"exynos", "ARMProcFamily", "Exynos",
+ "Samsung Exynos processors",
+ [FeatureZCZeroing,
+ FeatureUseWideStrideVFP,
+ FeatureUseAA,
+ FeatureSplatVFPToNeon,
+ FeatureSlowVGETLNi32,
+ FeatureSlowVDUP32,
+ FeatureSlowFPBrcc,
+ FeatureProfUnpredicate,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureHasRetAddrStack,
+ FeatureFuseLiterals,
+ FeatureFuseAES,
+ FeatureExpandMLx,
+ FeatureCrypto,
+ FeatureCRC]>;
def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
"Cortex-R4 ARM processors", []>;
@@ -659,6 +697,20 @@ def ARMv84a : Architecture<"armv8.4-a", "ARMv84a", [HasV8_4aOps,
FeatureRAS,
FeatureDotProd]>;
+def ARMv85a : Architecture<"armv8.5-a", "ARMv85a", [HasV8_5aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
+
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
@@ -865,6 +917,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
FeatureVFP4,
+ FeatureUseWideStrideVFP,
FeatureMP,
FeatureHWDivThumb,
FeatureHWDivARM,
@@ -926,6 +979,7 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
ProcM3,
+ FeaturePrefLoopAlign32,
FeatureHasNoBranchPredictor]>;
def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
@@ -936,6 +990,8 @@ def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
FeatureVFP4,
FeatureVFPOnlySP,
FeatureD16,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
@@ -950,6 +1006,8 @@ def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
FeatureFPARMv8,
FeatureD16,
FeatureVFPOnlySP,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
@@ -985,7 +1043,7 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
FeatureAvoidPartialCPSR,
FeatureCheapPredicableCPSR]>;
-def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
+def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72,
FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
@@ -1017,29 +1075,12 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureZCZeroing,
FeatureNoPostRASched]>;
-def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m4", [ARMv8a, ProcExynosM1,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC]>;
+def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m4", [ARMv82a, ProcExynos,
+ FeatureFullFP16,
+ FeatureDotProd]>;
def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
FeatureHWDivThumb,
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index b227eaed8d61..b7cd3a0c2dae 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -367,6 +367,18 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
unsigned RC;
+ bool FirstHalf;
+ const ARMBaseTargetMachine &ATM =
+ static_cast<const ARMBaseTargetMachine &>(TM);
+
+ // 'Q' should correspond to the low order register and 'R' to the high
+ // order register. Whether this corresponds to the upper or lower half
+ // depends on the endianess mode.
+ if (ExtraCode[0] == 'Q')
+ FirstHalf = ATM.isLittleEndian();
+ else
+ // ExtraCode[0] == 'R'.
+ FirstHalf = !ATM.isLittleEndian();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
if (InlineAsm::hasRegClassConstraint(Flags, RC) &&
ARM::GPRPairRegClass.hasSubClassEq(TRI->getRegClass(RC))) {
@@ -376,14 +388,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!MO.isReg())
return true;
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ?
+ unsigned Reg = TRI->getSubReg(MO.getReg(), FirstHalf ?
ARM::gsub_0 : ARM::gsub_1);
O << ARMInstPrinter::getRegisterName(Reg);
return false;
}
if (NumVals != 2)
return true;
- unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
+ unsigned RegOp = FirstHalf ? OpNum : OpNum + 1;
if (RegOp >= MI->getNumOperands())
return true;
const MachineOperand &MO = MI->getOperand(RegOp);
@@ -815,15 +827,31 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
- bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT);
+ bool IsIndirect =
+ (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB));
if (!IsIndirect)
return getSymbol(GV);
SmallString<128> Name;
- Name = "__imp_";
+ if (TargetFlags & ARMII::MO_DLLIMPORT)
+ Name = "__imp_";
+ else if (TargetFlags & ARMII::MO_COFFSTUB)
+ Name = ".refptr.";
getNameWithPrefix(Name, GV);
- return OutContext.getOrCreateSymbol(Name);
+ MCSymbol *MCSym = OutContext.getOrCreateSymbol(Name);
+
+ if (TargetFlags & ARMII::MO_COFFSTUB) {
+ MachineModuleInfoCOFF &MMICOFF =
+ MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMICOFF.getGVStubEntry(MCSym);
+
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), true);
+ }
+
+ return MCSym;
} else if (Subtarget->isTargetELF()) {
return getSymbol(GV);
}
@@ -1043,10 +1071,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
const MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo *TargetRegInfo =
+ MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo();
const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned FramePtr = TargetRegInfo->getFrameRegister(MF);
unsigned Opc = MI->getOpcode();
unsigned SrcReg, DstReg;
@@ -1103,7 +1133,9 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
if (MO.isUndef()) {
assert(RegList.empty() &&
"Pad registers must come before restored ones");
- Pad += 4;
+ unsigned Width =
+ TargetRegInfo->getRegSizeInBits(MO.getReg(), MachineRegInfo) / 8;
+ Pad += Width;
continue;
}
RegList.push_back(MO.getReg());
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b1c2031c7d7b..bbebed59c851 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -708,8 +708,12 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return MCID.getSize();
// If this machine instr is an inline asm, measure it.
- if (MI.getOpcode() == ARM::INLINEASM)
- return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ if (MI.getOpcode() == ARM::INLINEASM) {
+ unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
+ Size = alignTo(Size, 4);
+ return Size;
+ }
unsigned Opc = MI.getOpcode();
switch (Opc) {
default:
@@ -935,9 +939,9 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Mov->addRegisterKilled(SrcReg, TRI);
}
-bool ARMBaseInstrInfo::isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Src,
+ const MachineOperand *&Dest) const {
// VMOVRRD is also a copy instruction but it requires
// special way of handling. It is more complex copy version
// and since that we are not considering it. For recognition
@@ -971,8 +975,6 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -984,7 +986,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
switch (TRI->getSpillSize(*RC)) {
case 2:
if (ARM::HPRRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(ARM::VSTRH))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
@@ -995,14 +997,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(ARM::STRi12))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
@@ -1013,7 +1015,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
@@ -1021,7 +1023,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
if (Subtarget.hasV5TEOps()) {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
@@ -1029,7 +1031,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else {
// Fallback to STM instruction, which has existed since the dawn of
// time.
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
@@ -1043,14 +1045,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
- BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addMemOperand(MMO)
@@ -1063,14 +1065,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
+ get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
@@ -1086,14 +1089,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
+ get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
@@ -1107,7 +1111,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
@@ -1172,8 +1176,14 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
- const MachineMemOperand *Dummy;
- return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return true;
+ }
+ return false;
}
void ARMBaseInstrInfo::
@@ -1386,8 +1396,14 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
- const MachineMemOperand *Dummy;
- return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return true;
+ }
+ return false;
}
/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
@@ -1432,9 +1448,8 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
SmallVector<unsigned, 6> ScratchRegs;
for(unsigned I = 5; I < MI->getNumOperands(); ++I)
ScratchRegs.push_back(MI->getOperand(I).getReg());
- llvm::sort(ScratchRegs.begin(), ScratchRegs.end(),
- [&TRI](const unsigned &Reg1,
- const unsigned &Reg2) -> bool {
+ llvm::sort(ScratchRegs,
+ [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
return TRI.getEncodingValue(Reg1) <
TRI.getEncodingValue(Reg2);
});
@@ -1590,11 +1605,10 @@ void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
unsigned CPI = Orig.getOperand(1).getIndex();
unsigned PCLabelId = duplicateCPV(MF, CPI);
- MachineInstrBuilder MIB =
- BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
- .addConstantPoolIndex(CPI)
- .addImm(PCLabelId);
- MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
+ BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
+ .addConstantPoolIndex(CPI)
+ .addImm(PCLabelId)
+ .cloneMemRefs(Orig);
break;
}
}
@@ -2185,6 +2199,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::tSUBSi8, ARM::tSUBi8},
{ARM::tSUBSrr, ARM::tSUBrr},
{ARM::tSBCS, ARM::tSBC},
+ {ARM::tRSBS, ARM::tRSB},
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
@@ -2949,6 +2964,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+ MI->clearRegisterDeads(ARM::CPSR);
+
return true;
}
@@ -4534,9 +4551,9 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill)
- .addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
- .add(predOps(ARMCC::AL));
+ .addImm(0)
+ .cloneMemRefs(*MI)
+ .add(predOps(ARMCC::AL));
}
bool
@@ -5061,3 +5078,32 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
}
llvm_unreachable("Target dependent opcode missing");
}
+
+std::pair<unsigned, unsigned>
+ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ const unsigned Mask = ARMII::MO_OPTION_MASK;
+ return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace ARMII;
+
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
+ return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace ARMII;
+
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_COFFSTUB, "arm-coffstub"},
+ {MO_GOT, "arm-got"},
+ {MO_SBREL, "arm-sbrel"},
+ {MO_DLLIMPORT, "arm-dllimport"},
+ {MO_SECREL, "arm-secrel"},
+ {MO_NONLAZY, "arm-nonlazy"}};
+ return makeArrayRef(TargetFlags);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index b54be15097b1..de1f307083ba 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -101,6 +101,12 @@ protected:
unsigned OpIdx1,
unsigned OpIdx2) const override;
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
@@ -201,9 +207,6 @@ public:
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
- const MachineOperand *&Dest) const override;
-
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -331,6 +334,13 @@ public:
/// Get the number of addresses by LDM or VLDM or zero for unknown.
unsigned getNumLDMAddresses(const MachineInstr &MI) const;
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
+
private:
unsigned getInstBundleLength(const MachineInstr &MI) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 5342e6e2cd13..02b3daf3c6fd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -209,6 +209,11 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+bool ARMBaseRegisterInfo::
+isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const {
+ return !getReservedRegs(MF).test(PhysReg);
+}
+
const TargetRegisterClass *
ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index f755f66a0f3a..45d29ebc0bd3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -131,6 +131,8 @@ public:
CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
@@ -154,7 +156,6 @@ public:
void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
bool hasBasePointer(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 47f998b696f5..8e80c32bcf89 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -237,7 +237,7 @@ void ARMCallLowering::splitToValueTypes(
/// Lower the return value for the already existing \p Ret. This assumes that
/// \p MIRBuilder's insertion point is correct.
bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg,
+ const Value *Val, ArrayRef<unsigned> VRegs,
MachineInstrBuilder &Ret) const {
if (!Val)
// Nothing to do here.
@@ -251,16 +251,24 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
if (!isSupportedType(DL, TLI, Val->getType()))
return false;
- SmallVector<ArgInfo, 4> SplitVTs;
- SmallVector<unsigned, 4> Regs;
- ArgInfo RetInfo(VReg, Val->getType());
- setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
- splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) {
- Regs.push_back(Reg);
- });
+ SmallVector<EVT, 4> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
- if (Regs.size() > 1)
- MIRBuilder.buildUnmerge(Regs, VReg);
+ SmallVector<ArgInfo, 4> SplitVTs;
+ LLVMContext &Ctx = Val->getType()->getContext();
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ ArgInfo CurArgInfo(VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx));
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+
+ SmallVector<unsigned, 4> Regs;
+ splitToValueTypes(
+ CurArgInfo, SplitVTs, MF,
+ [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); });
+ if (Regs.size() > 1)
+ MIRBuilder.buildUnmerge(Regs, VRegs[i]);
+ }
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
@@ -270,14 +278,15 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
}
bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
- assert(!Val == !VReg && "Return value without a vreg");
+ const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
+ assert(!Val == VRegs.empty() && "Return value without a vreg");
auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
unsigned Opcode = ST.getReturnOpcode();
auto Ret = MIRBuilder.buildInstrNoInsert(Opcode).add(predOps(ARMCC::AL));
- if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
+ if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret))
return false;
MIRBuilder.insertInstr(Ret);
@@ -420,7 +429,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
auto &TLI = *getTLI<ARMTargetLowering>();
auto Subtarget = TLI.getSubtarget();
- if (Subtarget->isThumb())
+ if (Subtarget->isThumb1Only())
return false;
// Quick exit if there aren't any args
@@ -491,6 +500,22 @@ struct CallReturnHandler : public IncomingValueHandler {
MachineInstrBuilder MIB;
};
+// FIXME: This should move to the ARMSubtarget when it supports all the opcodes.
+unsigned getCallOpcode(const ARMSubtarget &STI, bool isDirect) {
+ if (isDirect)
+ return STI.isThumb() ? ARM::tBL : ARM::BL;
+
+ if (STI.isThumb())
+ return ARM::tBLXr;
+
+ if (STI.hasV5TOps())
+ return ARM::BLX;
+
+ if (STI.hasV4TOps())
+ return ARM::BX_CALL;
+
+ return ARM::BMOVPCRX_CALL;
+}
} // end anonymous namespace
bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
@@ -508,27 +533,34 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (STI.genLongCalls())
return false;
+ if (STI.isThumb1Only())
+ return false;
+
auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN);
// Create the call instruction so we can add the implicit uses of arg
// registers, but don't insert it yet.
bool isDirect = !Callee.isReg();
- auto CallOpcode =
- isDirect ? ARM::BL
- : STI.hasV5TOps()
- ? ARM::BLX
- : STI.hasV4TOps() ? ARM::BX_CALL : ARM::BMOVPCRX_CALL;
- auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode)
- .add(Callee)
- .addRegMask(TRI->getCallPreservedMask(MF, CallConv));
- if (Callee.isReg()) {
+ auto CallOpcode = getCallOpcode(STI, isDirect);
+ auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
+
+ bool isThumb = STI.isThumb();
+ if (isThumb)
+ MIB.add(predOps(ARMCC::AL));
+
+ MIB.add(Callee);
+ if (!isDirect) {
auto CalleeReg = Callee.getReg();
- if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg))
- MIB->getOperand(0).setReg(constrainOperandRegClass(
+ if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) {
+ unsigned CalleeIdx = isThumb ? 2 : 0;
+ MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass(
MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
- *MIB.getInstr(), MIB->getDesc(), Callee, 0));
+ *MIB.getInstr(), MIB->getDesc(), Callee, CalleeIdx));
+ }
}
+ MIB.addRegMask(TRI->getCallPreservedMask(MF, CallConv));
+
SmallVector<ArgInfo, 8> ArgInfos;
for (auto Arg : OrigArgs) {
if (!isSupportedType(DL, TLI, Arg.Ty))
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
index 86854c53f179..45a988a2f00e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -33,8 +33,8 @@ class ARMCallLowering : public CallLowering {
public:
ARMCallLowering(const ARMTargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
@@ -45,7 +45,8 @@ public:
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
- unsigned VReg, MachineInstrBuilder &Ret) const;
+ ArrayRef<unsigned> VRegs,
+ MachineInstrBuilder &Ret) const;
using SplitArgTy = std::function<void(unsigned Reg, uint64_t Offset)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 24071277427a..b631c2bc687b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -54,47 +54,108 @@ EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
cl::desc("Use DSP instructions for scalar operations\
with immediate operands"));
-namespace {
+// The goal of this pass is to enable more efficient code generation for
+// operations on narrow types (i.e. types with < 32-bits) and this is a
+// motivating IR code example:
+//
+// define hidden i32 @cmp(i8 zeroext) {
+// %2 = add i8 %0, -49
+// %3 = icmp ult i8 %2, 3
+// ..
+// }
+//
+// The issue here is that i8 is type-legalized to i32 because i8 is not a
+// legal type. Thus, arithmetic is done in integer-precision, but then the
+// byte value is masked out as follows:
+//
+// t19: i32 = add t4, Constant:i32<-49>
+// t24: i32 = and t19, Constant:i32<255>
+//
+// Consequently, we generate code like this:
+//
+// subs r0, #49
+// uxtb r1, r0
+// cmp r1, #3
+//
+// This shows that masking out the byte value results in generation of
+// the UXTB instruction. This is not optimal as r0 already contains the byte
+// value we need, and so instead we can just generate:
+//
+// sub.w r1, r0, #49
+// cmp r1, #3
+//
+// We achieve this by type promoting the IR to i32 like so for this example:
+//
+// define i32 @cmp(i8 zeroext %c) {
+// %0 = zext i8 %c to i32
+// %c.off = add i32 %0, -49
+// %1 = icmp ult i32 %c.off, 3
+// ..
+// }
+//
+// For this to be valid and legal, we need to prove that the i32 add is
+// producing the same value as the i8 addition, and that e.g. no overflow
+// happens.
+//
+// A brief sketch of the algorithm and some terminology.
+// We pattern match interesting IR patterns:
+// - which have "sources": instructions producing narrow values (i8, i16), and
+// - they have "sinks": instructions consuming these narrow values.
+//
+// We collect all instruction connecting sources and sinks in a worklist, so
+// that we can mutate these instruction and perform type promotion when it is
+// legal to do so.
+namespace {
class IRPromoter {
SmallPtrSet<Value*, 8> NewInsts;
- SmallVector<Instruction*, 4> InstsToRemove;
+ SmallPtrSet<Instruction*, 4> InstsToRemove;
+ DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
+ SmallPtrSet<Value*, 8> Promoted;
Module *M = nullptr;
LLVMContext &Ctx;
+ IntegerType *ExtTy = nullptr;
+ IntegerType *OrigTy = nullptr;
+ SmallPtrSetImpl<Value*> *Visited;
+ SmallPtrSetImpl<Value*> *Sources;
+ SmallPtrSetImpl<Instruction*> *Sinks;
+ SmallPtrSetImpl<Instruction*> *SafeToPromote;
+
+ void ReplaceAllUsersOfWith(Value *From, Value *To);
+ void PrepareConstants(void);
+ void ExtendSources(void);
+ void ConvertTruncs(void);
+ void PromoteTree(void);
+ void TruncateSinks(void);
+ void Cleanup(void);
public:
- IRPromoter(Module *M) : M(M), Ctx(M->getContext()) { }
+ IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
+ ExtTy(Type::getInt32Ty(Ctx)) { }
- void Cleanup() {
- for (auto *I : InstsToRemove) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
- I->dropAllReferences();
- I->eraseFromParent();
- }
- InstsToRemove.clear();
- NewInsts.clear();
- }
void Mutate(Type *OrigTy,
SmallPtrSetImpl<Value*> &Visited,
- SmallPtrSetImpl<Value*> &Leaves,
- SmallPtrSetImpl<Instruction*> &Roots);
+ SmallPtrSetImpl<Value*> &Sources,
+ SmallPtrSetImpl<Instruction*> &Sinks,
+ SmallPtrSetImpl<Instruction*> &SafeToPromote);
};
class ARMCodeGenPrepare : public FunctionPass {
const ARMSubtarget *ST = nullptr;
IRPromoter *Promoter = nullptr;
std::set<Value*> AllVisited;
- Type *OrigTy = nullptr;
- unsigned TypeSize = 0;
+ SmallPtrSet<Instruction*, 8> SafeToPromote;
- bool isNarrowInstSupported(Instruction *I);
+ bool isSafeOverflow(Instruction *I);
bool isSupportedValue(Value *V);
bool isLegalToPromote(Value *V);
bool TryToPromote(Value *V);
public:
static char ID;
+ static unsigned TypeSize;
+ Type *OrigTy = nullptr;
ARMCodeGenPrepare() : FunctionPass(ID) {}
@@ -111,8 +172,7 @@ public:
}
-/// Can the given value generate sign bits.
-static bool isSigned(Value *V) {
+static bool generateSignBits(Value *V) {
if (!isa<Instruction>(V))
return false;
@@ -121,120 +181,226 @@ static bool isSigned(Value *V) {
Opc == Instruction::SRem;
}
+static bool EqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
+}
+
+static bool LessOrEqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
+}
+
+static bool GreaterThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
+}
+
+static bool LessThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize;
+}
+
/// Some instructions can use 8- and 16-bit operands, and we don't need to
/// promote anything larger. We disallow booleans to make life easier when
/// dealing with icmps but allow any other integer that is <= 16 bits. Void
/// types are accepted so we can handle switches.
static bool isSupportedType(Value *V) {
- if (V->getType()->isVoidTy())
+ Type *Ty = V->getType();
+
+ // Allow voids and pointers, these won't be promoted.
+ if (Ty->isVoidTy() || Ty->isPointerTy())
return true;
- const IntegerType *IntTy = dyn_cast<IntegerType>(V->getType());
- if (!IntTy)
- return false;
+ if (auto *Ld = dyn_cast<LoadInst>(V))
+ Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
- // Don't try to promote boolean values.
- if (IntTy->getBitWidth() == 1)
+ if (!isa<IntegerType>(Ty) ||
+ cast<IntegerType>(V->getType())->getBitWidth() == 1)
return false;
- if (auto *ZExt = dyn_cast<ZExtInst>(V))
- return isSupportedType(ZExt->getOperand(0));
+ return LessOrEqualTypeSize(V);
+}
- return IntTy->getBitWidth() <= 16;
+/// Return true if the given value is a source in the use-def chain, producing
+/// a narrow 'TypeSize' value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+static bool isSource(Value *V) {
+ if (!isa<IntegerType>(V->getType()))
+ return false;
+
+ // TODO Allow zext to be sources.
+ if (isa<Argument>(V))
+ return true;
+ else if (isa<LoadInst>(V))
+ return true;
+ else if (isa<BitCastInst>(V))
+ return true;
+ else if (auto *Call = dyn_cast<CallInst>(V))
+ return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+ else if (auto *Trunc = dyn_cast<TruncInst>(V))
+ return EqualTypeSize(Trunc);
+ return false;
}
/// Return true if V will require any promoted values to be truncated for the
-/// use to be valid.
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
static bool isSink(Value *V) {
- auto UsesNarrowValue = [](Value *V) {
- return V->getType()->getScalarSizeInBits() <= 32;
- };
-
+ // TODO The truncate also isn't actually necessary because we would already
+ // proved that the data value is kept within the range of the original data
+ // type.
+
+ // Sinks are:
+ // - points where the value in the register is being observed, such as an
+ // icmp, switch or store.
+ // - points where value types have to match, such as calls and returns.
+ // - zext are included to ease the transformation and are generally removed
+ // later on.
if (auto *Store = dyn_cast<StoreInst>(V))
- return UsesNarrowValue(Store->getValueOperand());
+ return LessOrEqualTypeSize(Store->getValueOperand());
if (auto *Return = dyn_cast<ReturnInst>(V))
- return UsesNarrowValue(Return->getReturnValue());
+ return LessOrEqualTypeSize(Return->getReturnValue());
+ if (auto *ZExt = dyn_cast<ZExtInst>(V))
+ return GreaterThanTypeSize(ZExt);
+ if (auto *Switch = dyn_cast<SwitchInst>(V))
+ return LessThanTypeSize(Switch->getCondition());
+ if (auto *ICmp = dyn_cast<ICmpInst>(V))
+ return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
return isa<CallInst>(V);
}
-/// Return true if the given value is a leaf that will need to be zext'd.
-static bool isSource(Value *V) {
- if (isa<Argument>(V) && isSupportedType(V))
- return true;
- else if (isa<TruncInst>(V))
- return true;
- else if (auto *ZExt = dyn_cast<ZExtInst>(V))
- // ZExt can be a leaf if its the only user of a load.
- return isa<LoadInst>(ZExt->getOperand(0)) &&
- ZExt->getOperand(0)->hasOneUse();
- else if (auto *Call = dyn_cast<CallInst>(V))
- return Call->hasRetAttr(Attribute::AttrKind::ZExt);
- else if (auto *Load = dyn_cast<LoadInst>(V)) {
- if (!isa<IntegerType>(Load->getType()))
- return false;
- // A load is a leaf, unless its already just being zext'd.
- if (Load->hasOneUse() && isa<ZExtInst>(*Load->use_begin()))
- return false;
-
- return true;
- }
- return false;
-}
-
/// Return whether the instruction can be promoted within any modifications to
-/// it's operands or result.
-static bool isSafeOverflow(Instruction *I) {
+/// its operands or result.
+bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
+ // FIXME Do we need NSW too?
if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
return true;
+ // We can support a, potentially, overflowing instruction (I) if:
+ // - It is only used by an unsigned icmp.
+ // - The icmp uses a constant.
+ // - The overflowing value (I) is decreasing, i.e would underflow - wrapping
+ // around zero to become a larger number than before.
+ // - The underflowing instruction (I) also uses a constant.
+ //
+ // We can then use the two constants to calculate whether the result would
+ // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
+ // just underflows the range, the icmp would give the same result whether the
+ // result has been truncated or not. We calculate this by:
+ // - Zero extending both constants, if needed, to 32-bits.
+ // - Take the absolute value of I's constant, adding this to the icmp const.
+ // - Check that this value is not out of range for small type. If it is, it
+ // means that it has underflowed enough to wrap around the icmp constant.
+ //
+ // For example:
+ //
+ // %sub = sub i8 %a, 2
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -2 == FE == 254
+ // But if this is evalulated as a i32
+ // %sub = -2 == FF FF FF FE == 4294967294
+ // So the unsigned compares (i8 and i32) would not yield the same result.
+ //
+ // Another way to look at it is:
+ // %a - 2 <= 254
+ // %a + 2 <= 254 + 2
+ // %a <= 256
+ // And we can't represent 256 in the i8 format, so we don't support it.
+ //
+ // Whereas:
+ //
+ // %sub i8 %a, 1
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -1 == FF == 255
+ // As i32:
+ // %sub = -1 == FF FF FF FF == 4294967295
+ //
+ // In this case, the unsigned compare results would be the same and this
+ // would also be true for ult, uge and ugt:
+ // - (255 < 254) == (0xFFFFFFFF < 254) == false
+ // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
+ // - (255 > 254) == (0xFFFFFFFF > 254) == true
+ // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
+ //
+ // To demonstrate why we can't handle increasing values:
+ //
+ // %add = add i8 %a, 2
+ // %cmp = icmp ult i8 %add, 127
+ //
+ // If %a = 254, %add = 256 == (i8 1)
+ // As i32:
+ // %add = 256
+ //
+ // (1 < 127) != (256 < 127)
+
unsigned Opc = I->getOpcode();
- if (Opc == Instruction::Add || Opc == Instruction::Sub) {
- // We don't care if the add or sub could wrap if the value is decreasing
- // and is only being used by an unsigned compare.
- if (!I->hasOneUse() ||
- !isa<ICmpInst>(*I->user_begin()) ||
- !isa<ConstantInt>(I->getOperand(1)))
- return false;
+ if (Opc != Instruction::Add && Opc != Instruction::Sub)
+ return false;
- auto *CI = cast<ICmpInst>(*I->user_begin());
- if (CI->isSigned())
- return false;
+ if (!I->hasOneUse() ||
+ !isa<ICmpInst>(*I->user_begin()) ||
+ !isa<ConstantInt>(I->getOperand(1)))
+ return false;
- bool NegImm = cast<ConstantInt>(I->getOperand(1))->isNegative();
- bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
- ((Opc == Instruction::Add) && NegImm);
- if (!IsDecreasing)
- return false;
+ ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
+ bool NegImm = OverflowConst->isNegative();
+ bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
+ ((Opc == Instruction::Add) && NegImm);
+ if (!IsDecreasing)
+ return false;
- LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
- return true;
- }
+ // Don't support an icmp that deals with sign bits.
+ auto *CI = cast<ICmpInst>(*I->user_begin());
+ if (CI->isSigned() || CI->isEquality())
+ return false;
- // Otherwise, if an instruction is using a negative immediate we will need
- // to fix it up during the promotion.
- for (auto &Op : I->operands()) {
- if (auto *Const = dyn_cast<ConstantInt>(Op))
- if (Const->isNegative())
- return false;
- }
- return false;
+ ConstantInt *ICmpConst = nullptr;
+ if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
+ ICmpConst = Const;
+ else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
+ ICmpConst = Const;
+ else
+ return false;
+
+ // Now check that the result can't wrap on itself.
+ APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
+ ICmpConst->getValue().zext(32) : ICmpConst->getValue();
+
+ Total += OverflowConst->getValue().getBitWidth() < 32 ?
+ OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
+
+ APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
+
+ if (Total.getBitWidth() > Max.getBitWidth()) {
+ if (Total.ugt(Max.zext(Total.getBitWidth())))
+ return false;
+ } else if (Max.getBitWidth() > Total.getBitWidth()) {
+ if (Total.zext(Max.getBitWidth()).ugt(Max))
+ return false;
+ } else if (Total.ugt(Max))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
+ return true;
}
static bool shouldPromote(Value *V) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
+ if (!isa<IntegerType>(V->getType()) || isSink(V))
return false;
- if (!isa<IntegerType>(V->getType()))
- return false;
+ if (isSource(V))
+ return true;
- if (isa<StoreInst>(I) || isa<TerminatorInst>(I) || isa<TruncInst>(I) ||
- isa<ICmpInst>(I))
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
return false;
- if (auto *ZExt = dyn_cast<ZExtInst>(I))
- return !ZExt->getDestTy()->isIntegerTy(32);
+ if (isa<ICmpInst>(I))
+ return false;
return true;
}
@@ -245,24 +411,16 @@ static bool isPromotedResultSafe(Value *V) {
if (!isa<Instruction>(V))
return true;
- if (isSigned(V))
+ if (generateSignBits(V))
return false;
- // If I is only being used by something that will require its value to be
- // truncated, then we don't care about the promoted result.
- auto *I = cast<Instruction>(V);
- if (I->hasOneUse() && isSink(*I->use_begin()))
- return true;
-
- if (isa<OverflowingBinaryOperator>(I))
- return isSafeOverflow(I);
- return true;
+ return !isa<OverflowingBinaryOperator>(V);
}
/// Return the intrinsic for the instruction that can perform the same
/// operation but on a narrow type. This is using the parallel dsp intrinsics
/// on scalar values.
-static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
+static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
// Whether we use the signed or unsigned versions of these intrinsics
// doesn't matter because we're not using the GE bits that they set in
// the APSR.
@@ -270,124 +428,163 @@ static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
default:
break;
case Instruction::Add:
- return TypeSize == 16 ? Intrinsic::arm_uadd16 :
+ return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
Intrinsic::arm_uadd8;
case Instruction::Sub:
- return TypeSize == 16 ? Intrinsic::arm_usub16 :
+ return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
Intrinsic::arm_usub8;
}
llvm_unreachable("unhandled opcode for narrow intrinsic");
}
-void IRPromoter::Mutate(Type *OrigTy,
- SmallPtrSetImpl<Value*> &Visited,
- SmallPtrSetImpl<Value*> &Leaves,
- SmallPtrSetImpl<Instruction*> &Roots) {
+void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
+ SmallVector<Instruction*, 4> Users;
+ Instruction *InstTo = dyn_cast<Instruction>(To);
+ bool ReplacedAll = true;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To
+ << "\n");
+
+ for (Use &U : From->uses()) {
+ auto *User = cast<Instruction>(U.getUser());
+ if (InstTo && User->isIdenticalTo(InstTo)) {
+ ReplacedAll = false;
+ continue;
+ }
+ Users.push_back(User);
+ }
+
+ for (auto *U : Users)
+ U->replaceUsesOfWith(From, To);
+
+ if (ReplacedAll)
+ if (auto *I = dyn_cast<Instruction>(From))
+ InstsToRemove.insert(I);
+}
+
+void IRPromoter::PrepareConstants() {
IRBuilder<> Builder{Ctx};
- Type *ExtTy = Type::getInt32Ty(M->getContext());
- unsigned TypeSize = OrigTy->getPrimitiveSizeInBits();
- SmallPtrSet<Value*, 8> Promoted;
- LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << TypeSize
- << " to 32-bits\n");
-
- auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
- SmallVector<Instruction*, 4> Users;
- Instruction *InstTo = dyn_cast<Instruction>(To);
- for (Use &U : From->uses()) {
- auto *User = cast<Instruction>(U.getUser());
- if (InstTo && User->isIdenticalTo(InstTo))
+ // First step is to prepare the instructions for mutation. Most constants
+ // just need to be zero extended into their new type, but complications arise
+ // because:
+ // - For nuw binary operators, negative immediates would need sign extending;
+ // however, instead we'll change them to positive and zext them. We can do
+ // this because:
+ // > The operators that can wrap are: add, sub, mul and shl.
+ // > shl interprets its second operand as unsigned and if the first operand
+ // is an immediate, it will need zext to be nuw.
+ // > I'm assuming mul has to interpret immediates as unsigned for nuw.
+ // > Which leaves the nuw add and sub to be handled; as with shl, if an
+ // immediate is used as operand 0, it will need zext to be nuw.
+ // - We also allow add and sub to safely overflow in certain circumstances
+ // and only when the value (operand 0) is being decreased.
+ //
+ // For adds and subs, that are either nuw or safely wrap and use a negative
+ // immediate as operand 1, we create an equivalent instruction using a
+ // positive immediate. That positive immediate can then be zext along with
+ // all the other immediates later.
+ for (auto *V : *Visited) {
+ if (!isa<Instruction>(V))
+ continue;
+
+ auto *I = cast<Instruction>(V);
+ if (SafeToPromote->count(I)) {
+
+ if (!isa<OverflowingBinaryOperator>(I))
continue;
- Users.push_back(User);
- }
- for (auto &U : Users)
- U->replaceUsesOfWith(From, To);
- };
+ if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!Const->isNegative())
+ break;
- auto FixConst = [&](ConstantInt *Const, Instruction *I) {
- Constant *NewConst = nullptr;
- if (isSafeOverflow(I)) {
- NewConst = (Const->isNegative()) ?
- ConstantExpr::getSExt(Const, ExtTy) :
- ConstantExpr::getZExt(Const, ExtTy);
- } else {
- uint64_t NewVal = *Const->getValue().getRawData();
- if (Const->getType() == Type::getInt16Ty(Ctx))
- NewVal &= 0xFFFF;
- else
- NewVal &= 0xFF;
- NewConst = ConstantInt::get(ExtTy, NewVal);
+ unsigned Opc = I->getOpcode();
+ if (Opc != Instruction::Add && Opc != Instruction::Sub)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
+ auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
+ Builder.SetInsertPoint(I);
+ Value *NewVal = Opc == Instruction::Sub ?
+ Builder.CreateAdd(I->getOperand(0), NewConst) :
+ Builder.CreateSub(I->getOperand(0), NewConst);
+ LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
+
+ if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
+ NewInst->copyIRFlags(I);
+ NewInsts.insert(NewInst);
+ }
+ InstsToRemove.insert(I);
+ I->replaceAllUsesWith(NewVal);
+ }
}
- I->replaceUsesOfWith(Const, NewConst);
- };
+ }
+ for (auto *I : NewInsts)
+ Visited->insert(I);
+}
- auto InsertDSPIntrinsic = [&](Instruction *I) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
- << *I << "\n");
- Function *DSPInst =
- Intrinsic::getDeclaration(M, getNarrowIntrinsic(I, TypeSize));
- Builder.SetInsertPoint(I);
- Builder.SetCurrentDebugLocation(I->getDebugLoc());
- Value *Args[] = { I->getOperand(0), I->getOperand(1) };
- CallInst *Call = Builder.CreateCall(DSPInst, Args);
- ReplaceAllUsersOfWith(I, Call);
- InstsToRemove.push_back(I);
- NewInsts.insert(Call);
- };
+void IRPromoter::ExtendSources() {
+ IRBuilder<> Builder{Ctx};
auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
+ assert(V->getType() != ExtTy && "zext already extends to i32");
LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n");
Builder.SetInsertPoint(InsertPt);
if (auto *I = dyn_cast<Instruction>(V))
Builder.SetCurrentDebugLocation(I->getDebugLoc());
- auto *ZExt = cast<Instruction>(Builder.CreateZExt(V, ExtTy));
- if (isa<Argument>(V))
- ZExt->moveBefore(InsertPt);
- else
- ZExt->moveAfter(InsertPt);
+
+ Value *ZExt = Builder.CreateZExt(V, ExtTy);
+ if (auto *I = dyn_cast<Instruction>(ZExt)) {
+ if (isa<Argument>(V))
+ I->moveBefore(InsertPt);
+ else
+ I->moveAfter(InsertPt);
+ NewInsts.insert(I);
+ }
+
ReplaceAllUsersOfWith(V, ZExt);
- NewInsts.insert(ZExt);
};
- // First, insert extending instructions between the leaves and their users.
- LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n");
- for (auto V : Leaves) {
+ // Now, insert extending instructions between the sources and their users.
+ LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
+ for (auto V : *Sources) {
LLVM_DEBUG(dbgs() << " - " << *V << "\n");
- if (auto *ZExt = dyn_cast<ZExtInst>(V))
- ZExt->mutateType(ExtTy);
- else if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *I = dyn_cast<Instruction>(V))
InsertZExt(I, I);
else if (auto *Arg = dyn_cast<Argument>(V)) {
BasicBlock &BB = Arg->getParent()->front();
InsertZExt(Arg, &*BB.getFirstInsertionPt());
} else {
- llvm_unreachable("unhandled leaf that needs extending");
+ llvm_unreachable("unhandled source that needs extending");
}
Promoted.insert(V);
}
+}
+void IRPromoter::PromoteTree() {
LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
- // Then mutate the types of the instructions within the tree. Here we handle
- // constant operands.
- for (auto *V : Visited) {
- if (Leaves.count(V))
- continue;
- if (!isa<Instruction>(V))
+ IRBuilder<> Builder{Ctx};
+
+ // Mutate the types of the instructions within the tree. Here we handle
+ // constant operands.
+ for (auto *V : *Visited) {
+ if (Sources->count(V))
continue;
auto *I = cast<Instruction>(V);
- if (Roots.count(I))
+ if (Sinks->count(I))
continue;
- for (auto &U : I->operands()) {
- if ((U->getType() == ExtTy) || !isSupportedType(&*U))
+ for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+ Value *Op = I->getOperand(i);
+ if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
continue;
- if (auto *Const = dyn_cast<ConstantInt>(&*U))
- FixConst(Const, I);
- else if (isa<UndefValue>(&*U))
- U->mutateType(ExtTy);
+ if (auto *Const = dyn_cast<ConstantInt>(Op)) {
+ Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
+ I->setOperand(i, NewConst);
+ } else if (isa<UndefValue>(Op))
+ I->setOperand(i, UndefValue::get(ExtTy));
}
if (shouldPromote(I)) {
@@ -396,91 +593,215 @@ void IRPromoter::Mutate(Type *OrigTy,
}
}
- // Now we need to remove any zexts that have become unnecessary, as well
- // as insert any intrinsics.
- for (auto *V : Visited) {
- if (Leaves.count(V))
+ // Finally, any instructions that should be promoted but haven't yet been,
+ // need to be handled using intrinsics.
+ for (auto *V : *Visited) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
continue;
- if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
- if (ZExt->getDestTy() != ExtTy) {
- ZExt->mutateType(ExtTy);
- Promoted.insert(ZExt);
- }
- else if (ZExt->getSrcTy() == ExtTy) {
- ReplaceAllUsersOfWith(V, ZExt->getOperand(0));
- InstsToRemove.push_back(ZExt);
- }
+
+ if (Sources->count(I) || Sinks->count(I))
continue;
- }
- if (!shouldPromote(V) || isPromotedResultSafe(V))
+ if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
continue;
+
+ assert(EnableDSP && "DSP intrinisc insertion not enabled!");
// Replace unsafe instructions with appropriate intrinsic calls.
- InsertDSPIntrinsic(cast<Instruction>(V));
+ LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
+ << *I << "\n");
+ Function *DSPInst =
+ Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
+ Builder.SetInsertPoint(I);
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
+ Value *Args[] = { I->getOperand(0), I->getOperand(1) };
+ CallInst *Call = Builder.CreateCall(DSPInst, Args);
+ NewInsts.insert(Call);
+ ReplaceAllUsersOfWith(I, Call);
}
+}
+
+void IRPromoter::TruncateSinks() {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
+
+ IRBuilder<> Builder{Ctx};
+
+ auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
+ if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
+ return nullptr;
+
+ if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
+ << *V << "\n");
+ Builder.SetInsertPoint(cast<Instruction>(V));
+ auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
+ if (Trunc)
+ NewInsts.insert(Trunc);
+ return Trunc;
+ };
- LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n");
// Fix up any stores or returns that use the results of the promoted
// chain.
- for (auto I : Roots) {
- LLVM_DEBUG(dbgs() << " - " << *I << "\n");
- Type *TruncTy = OrigTy;
- if (auto *Store = dyn_cast<StoreInst>(I)) {
- auto *PtrTy = cast<PointerType>(Store->getPointerOperandType());
- TruncTy = PtrTy->getElementType();
- } else if (isa<ReturnInst>(I)) {
- Function *F = I->getParent()->getParent();
- TruncTy = F->getFunctionType()->getReturnType();
+ for (auto I : *Sinks) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n");
+
+ // Handle calls separately as we need to iterate over arg operands.
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ Value *Arg = Call->getArgOperand(i);
+ Type *Ty = TruncTysMap[Call][i];
+ if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
+ Trunc->moveBefore(Call);
+ Call->setArgOperand(i, Trunc);
+ }
+ }
+ continue;
}
+ // Special case switches because we need to truncate the condition.
+ if (auto *Switch = dyn_cast<SwitchInst>(I)) {
+ Type *Ty = TruncTysMap[Switch][0];
+ if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
+ Trunc->moveBefore(Switch);
+ Switch->setCondition(Trunc);
+ }
+ continue;
+ }
+
+ // Now handle the others.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value *V = I->getOperand(i);
- if (Promoted.count(V) || NewInsts.count(V)) {
- if (auto *Op = dyn_cast<Instruction>(V)) {
-
- if (auto *Call = dyn_cast<CallInst>(I))
- TruncTy = Call->getFunctionType()->getParamType(i);
-
- if (TruncTy == ExtTy)
- continue;
-
- LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy
- << " Trunc for " << *Op << "\n");
- Builder.SetInsertPoint(Op);
- auto *Trunc = cast<Instruction>(Builder.CreateTrunc(Op, TruncTy));
- Trunc->moveBefore(I);
- I->setOperand(i, Trunc);
- NewInsts.insert(Trunc);
- }
+ Type *Ty = TruncTysMap[I][i];
+ if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
+ Trunc->moveBefore(I);
+ I->setOperand(i, Trunc);
}
}
}
- LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete.\n");
}
-bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
- if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
- return false;
+void IRPromoter::Cleanup() {
+ // Some zexts will now have become redundant, along with their trunc
+ // operands, so remove them
+ for (auto V : *Visited) {
+ if (!isa<CastInst>(V))
+ continue;
- if (ST->isThumb() && !ST->hasThumb2())
- return false;
+ auto ZExt = cast<CastInst>(V);
+ if (ZExt->getDestTy() != ExtTy)
+ continue;
- if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
- return false;
+ Value *Src = ZExt->getOperand(0);
+ if (ZExt->getSrcTy() == ZExt->getDestTy()) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt
+ << "\n");
+ ReplaceAllUsersOfWith(ZExt, Src);
+ continue;
+ }
- // TODO
- // Would it be profitable? For Thumb code, these parallel DSP instructions
- // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
- // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
- // halved. They also do not take immediates as operands.
- for (auto &Op : I->operands()) {
- if (isa<Constant>(Op)) {
- if (!EnableDSPWithImms)
- return false;
+ // For any truncs that we insert to handle zexts, we can replace the
+ // result of the zext with the input to the trunc.
+ if (NewInsts.count(Src) && isa<ZExtInst>(V) && isa<TruncInst>(Src)) {
+ auto *Trunc = cast<TruncInst>(Src);
+ assert(Trunc->getOperand(0)->getType() == ExtTy &&
+ "expected inserted trunc to be operating on i32");
+ ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
}
}
- return true;
+
+ for (auto *I : InstsToRemove) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
+ I->dropAllReferences();
+ I->eraseFromParent();
+ }
+
+ InstsToRemove.clear();
+ NewInsts.clear();
+ TruncTysMap.clear();
+ Promoted.clear();
+}
+
+void IRPromoter::ConvertTruncs() {
+ IRBuilder<> Builder{Ctx};
+
+ for (auto *V : *Visited) {
+ if (!isa<TruncInst>(V) || Sources->count(V))
+ continue;
+
+ auto *Trunc = cast<TruncInst>(V);
+ assert(LessThanTypeSize(Trunc) && "expected narrow trunc");
+
+ Builder.SetInsertPoint(Trunc);
+ unsigned NumBits =
+ cast<IntegerType>(Trunc->getType())->getScalarSizeInBits();
+ ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits));
+ Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
+
+ if (auto *I = dyn_cast<Instruction>(Masked))
+ NewInsts.insert(I);
+
+ ReplaceAllUsersOfWith(Trunc, Masked);
+ }
+}
+
+void IRPromoter::Mutate(Type *OrigTy,
+ SmallPtrSetImpl<Value*> &Visited,
+ SmallPtrSetImpl<Value*> &Sources,
+ SmallPtrSetImpl<Instruction*> &Sinks,
+ SmallPtrSetImpl<Instruction*> &SafeToPromote) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
+ << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
+
+ assert(isa<IntegerType>(OrigTy) && "expected integer type");
+ this->OrigTy = cast<IntegerType>(OrigTy);
+ assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() &&
+ "original type not smaller than extended type");
+
+ this->Visited = &Visited;
+ this->Sources = &Sources;
+ this->Sinks = &Sinks;
+ this->SafeToPromote = &SafeToPromote;
+
+ // Cache original types of the values that will likely need truncating
+ for (auto *I : Sinks) {
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ Value *Arg = Call->getArgOperand(i);
+ TruncTysMap[Call].push_back(Arg->getType());
+ }
+ } else if (auto *Switch = dyn_cast<SwitchInst>(I))
+ TruncTysMap[I].push_back(Switch->getCondition()->getType());
+ else {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ TruncTysMap[I].push_back(I->getOperand(i)->getType());
+ }
+ }
+
+ // Convert adds and subs using negative immediates to equivalent instructions
+ // that use positive constants.
+ PrepareConstants();
+
+ // Insert zext instructions between sources and their users.
+ ExtendSources();
+
+ // Convert any truncs, that aren't sources, into AND masks.
+ ConvertTruncs();
+
+ // Promote visited instructions, mutating their types in place. Also insert
+ // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
+ // promote.
+ PromoteTree();
+
+ // Insert trunc instructions for use by calls, stores etc...
+ TruncateSinks();
+
+ // Finally, remove unecessary zexts and truncs, delete old instructions and
+ // clear the data structures.
+ Cleanup();
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
}
/// We accept most instructions, as well as Arguments and ConstantInsts. We
@@ -488,102 +809,133 @@ bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
/// return value is zeroext. We don't allow opcodes that can introduce sign
/// bits.
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
-
- // Non-instruction values that we can handle.
- if (isa<ConstantInt>(V) || isa<Argument>(V))
- return true;
+ if (auto *I = dyn_cast<ICmpInst>(V)) {
+ // Now that we allow small types than TypeSize, only allow icmp of
+ // TypeSize because they will require a trunc to be legalised.
+ // TODO: Allow icmp of smaller types, and calculate at the end
+ // whether the transform would be beneficial.
+ if (isa<PointerType>(I->getOperand(0)->getType()))
+ return true;
+ return EqualTypeSize(I->getOperand(0));
+ }
// Memory instructions
- if (isa<StoreInst>(V) || isa<LoadInst>(V) || isa<GetElementPtrInst>(V))
+ if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
return true;
// Branches and targets.
- if (auto *ICmp = dyn_cast<ICmpInst>(V))
- return ICmp->isEquality() || !ICmp->isSigned();
-
if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
return true;
- if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V))
- return true;
+ // Non-instruction values that we can handle.
+ if ((isa<Constant>(V) && !isa<ConstantExpr>(V)) || isa<Argument>(V))
+ return isSupportedType(V);
+
+ if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
+ isa<LoadInst>(V))
+ return isSupportedType(V);
+
+ if (isa<SExtInst>(V))
+ return false;
+
+ if (auto *Cast = dyn_cast<CastInst>(V))
+ return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
// Special cases for calls as we need to check for zeroext
// TODO We should accept calls even if they don't have zeroext, as they can
- // still be roots.
+ // still be sinks.
if (auto *Call = dyn_cast<CallInst>(V))
- return Call->hasRetAttr(Attribute::AttrKind::ZExt);
- else if (auto *Cast = dyn_cast<CastInst>(V)) {
- if (isa<ZExtInst>(Cast))
- return Cast->getDestTy()->getScalarSizeInBits() <= 32;
- else if (auto *Trunc = dyn_cast<TruncInst>(V))
- return Trunc->getDestTy()->getScalarSizeInBits() <= TypeSize;
- else {
- LLVM_DEBUG(dbgs() << "ARM CGP: No, unsupported cast.\n");
- return false;
- }
- } else if (!isa<BinaryOperator>(V)) {
- LLVM_DEBUG(dbgs() << "ARM CGP: No, not a binary operator.\n");
+ return isSupportedType(Call) &&
+ Call->hasRetAttr(Attribute::AttrKind::ZExt);
+
+ if (!isa<BinaryOperator>(V))
+ return false;
+
+ if (!isSupportedType(V))
return false;
- }
- bool res = !isSigned(V);
- if (!res)
- LLVM_DEBUG(dbgs() << "ARM CGP: No, it's a signed instruction.\n");
- return res;
+ if (generateSignBits(V)) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
+ return false;
+ }
+ return true;
}
/// Check that the type of V would be promoted and that the original type is
/// smaller than the targeted promoted type. Check that we're not trying to
/// promote something larger than our base 'TypeSize' type.
bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
- if (!isSupportedType(V))
- return false;
- unsigned VSize = 0;
- if (auto *Ld = dyn_cast<LoadInst>(V)) {
- auto *PtrTy = cast<PointerType>(Ld->getPointerOperandType());
- VSize = PtrTy->getElementType()->getPrimitiveSizeInBits();
- } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
- VSize = ZExt->getOperand(0)->getType()->getPrimitiveSizeInBits();
- } else {
- VSize = V->getType()->getPrimitiveSizeInBits();
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+
+ if (SafeToPromote.count(I))
+ return true;
+
+ if (isPromotedResultSafe(V) || isSafeOverflow(I)) {
+ SafeToPromote.insert(I);
+ return true;
}
- if (VSize > TypeSize)
+ if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
return false;
- if (isPromotedResultSafe(V))
- return true;
+ // If promotion is not safe, can we use a DSP instruction to natively
+ // handle the narrow type?
+ if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
+ return false;
- if (auto *I = dyn_cast<Instruction>(V))
- return isNarrowInstSupported(I);
+ if (ST->isThumb() && !ST->hasThumb2())
+ return false;
- return false;
+ // TODO
+ // Would it be profitable? For Thumb code, these parallel DSP instructions
+ // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
+ // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
+ // halved. They also do not take immediates as operands.
+ for (auto &Op : I->operands()) {
+ if (isa<Constant>(Op)) {
+ if (!EnableDSPWithImms)
+ return false;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n");
+ return true;
}
bool ARMCodeGenPrepare::TryToPromote(Value *V) {
OrigTy = V->getType();
TypeSize = OrigTy->getPrimitiveSizeInBits();
+ if (TypeSize > 16 || TypeSize < 8)
+ return false;
+
+ SafeToPromote.clear();
if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
return false;
- LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
+ << TypeSize << "\n");
SetVector<Value*> WorkList;
- SmallPtrSet<Value*, 8> Leaves;
- SmallPtrSet<Instruction*, 4> Roots;
- WorkList.insert(V);
+ SmallPtrSet<Value*, 8> Sources;
+ SmallPtrSet<Instruction*, 4> Sinks;
SmallPtrSet<Value*, 16> CurrentVisited;
- CurrentVisited.clear();
+ WorkList.insert(V);
- // Return true if the given value can, or has been, visited. Add V to the
- // worklist if needed.
+ // Return true if V was added to the worklist as a supported instruction,
+ // if it was already visited, or if we don't need to explore it (e.g.
+ // pointer values and GEPs), and false otherwise.
auto AddLegalInst = [&](Value *V) {
if (CurrentVisited.count(V))
return true;
+ // Ignore GEPs because they don't need promoting and the constant indices
+ // will prevent the transformation.
+ if (isa<GetElementPtrInst>(V))
+ return true;
+
if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
return false;
@@ -600,6 +952,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
if (CurrentVisited.count(V))
continue;
+ // Ignore non-instructions, other than arguments.
if (!isa<Instruction>(V) && !isSource(V))
continue;
@@ -607,24 +960,26 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
// the tree has already been explored.
// TODO: This could limit the transform, ie if we try to promote something
// from an i8 and fail first, before trying an i16.
- if (AllVisited.count(V)) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Already visited this: " << *V << "\n");
+ if (AllVisited.count(V))
return false;
- }
CurrentVisited.insert(V);
AllVisited.insert(V);
// Calls can be both sources and sinks.
if (isSink(V))
- Roots.insert(cast<Instruction>(V));
+ Sinks.insert(cast<Instruction>(V));
+
if (isSource(V))
- Leaves.insert(V);
- else if (auto *I = dyn_cast<Instruction>(V)) {
- // Visit operands of any instruction visited.
- for (auto &U : I->operands()) {
- if (!AddLegalInst(U))
- return false;
+ Sources.insert(V);
+
+ if (!isSink(V) && !isSource(V)) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // Visit operands of any instruction visited.
+ for (auto &U : I->operands()) {
+ if (!AddLegalInst(U))
+ return false;
+ }
}
}
@@ -638,43 +993,23 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
}
}
- unsigned NumToPromote = 0;
- unsigned Cost = 0;
+ LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
+ for (auto *I : CurrentVisited)
+ I->dump();
+ );
+ unsigned ToPromote = 0;
for (auto *V : CurrentVisited) {
- // Truncs will cause a uxt and no zeroext arguments will often require
- // a uxt somewhere.
- if (isa<TruncInst>(V))
- ++Cost;
- else if (auto *Arg = dyn_cast<Argument>(V)) {
- if (!Arg->hasZExtAttr())
- ++Cost;
- }
-
- // Mem ops can automatically be extended/truncated and non-instructions
- // don't need anything done.
- if (Leaves.count(V) || isa<StoreInst>(V) || !isa<Instruction>(V))
+ if (Sources.count(V))
continue;
-
- // Will need to truncate calls args and returns.
- if (Roots.count(cast<Instruction>(V))) {
- ++Cost;
+ if (Sinks.count(cast<Instruction>(V)))
continue;
- }
-
- if (shouldPromote(V))
- ++NumToPromote;
+ ++ToPromote;
}
- LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
- for (auto *I : CurrentVisited)
- I->dump();
- );
- LLVM_DEBUG(dbgs() << "ARM CGP: Cost of promoting " << NumToPromote
- << " instructions = " << Cost << "\n");
- if (Cost > NumToPromote || (NumToPromote == 0))
+ if (ToPromote < 2)
return false;
- Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
+ Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote);
return true;
}
@@ -711,19 +1046,15 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
continue;
LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
+
for (auto &Op : CI.operands()) {
- if (auto *I = dyn_cast<Instruction>(Op)) {
- if (isa<ZExtInst>(I))
- MadeChange |= TryToPromote(I->getOperand(0));
- else
- MadeChange |= TryToPromote(I);
- }
+ if (auto *I = dyn_cast<Instruction>(Op))
+ MadeChange |= TryToPromote(I);
}
}
}
- Promoter->Cleanup();
LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
- dbgs();
+ dbgs() << F;
report_fatal_error("Broken function after type promotion");
});
}
@@ -744,6 +1075,7 @@ INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
false, false)
char ARMCodeGenPrepare::ID = 0;
+unsigned ARMCodeGenPrepare::TypeSize = 0;
FunctionPass *llvm::createARMCodeGenPreparePass() {
return new ARMCodeGenPrepare();
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 2c4738d3cb74..5e97c4cb35e3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1420,6 +1420,22 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
MI = LastIT;
}
+ // Avoid splitting a MOVW+MOVT pair with a relocation on Windows.
+ // On Windows, this instruction pair is covered by one single
+ // IMAGE_REL_ARM_MOV32T relocation which covers both instructions. If a
+ // constant island is injected inbetween them, the relocation will clobber
+ // the instruction and fail to update the MOVT instruction.
+ // (These instructions are bundled up until right before the ConstantIslands
+ // pass.)
+ if (STI->isTargetWindows() && isThumb && MI->getOpcode() == ARM::t2MOVTi16 &&
+ (MI->getOperand(2).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
+ ARMII::MO_HI16) {
+ --MI;
+ assert(MI->getOpcode() == ARM::t2MOVi16 &&
+ (MI->getOperand(1).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
+ ARMII::MO_LO16);
+ }
+
// We really must not split an IT block.
LLVM_DEBUG(unsigned PredReg; assert(
!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL));
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 5dac6ec0b799..eecd0a10dc7d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -570,7 +570,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
}
@@ -645,7 +645,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
}
@@ -735,7 +735,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
}
@@ -848,8 +848,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
- LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.cloneMemRefs(MI);
+ HI16.cloneMemRefs(MI);
LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
if (isCC)
@@ -899,8 +899,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
}
}
- LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.cloneMemRefs(MI);
+ HI16.cloneMemRefs(MI);
LO16.addImm(Pred).addReg(PredReg);
HI16.addImm(Pred).addReg(PredReg);
@@ -1030,10 +1030,10 @@ static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
if (IsThumb) {
unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
- MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead()));
- MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead()));
+ MIB.addReg(RegLo, Flags);
+ MIB.addReg(RegHi, Flags);
} else
- MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead()));
+ MIB.addReg(Reg.getReg(), Flags);
}
/// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
@@ -1103,7 +1103,8 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
// bne .Lloadcmp
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
- addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
+ unsigned Flags = getKillRegState(New.isDead());
+ addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI);
MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
@@ -1425,7 +1426,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.addExternalSymbol("__aeabi_read_tp", 0);
}
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
@@ -1440,7 +1441,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
.add(MI.getOperand(1))
.add(predOps(ARMCC::AL));
- MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB1.cloneMemRefs(MI);
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -1544,7 +1545,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (isARM) {
MIB3.add(predOps(ARMCC::AL));
if (Opcode == ARM::MOV_ga_pcrel_ldr)
- MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB3.cloneMemRefs(MI);
}
TransferImpOps(MI, MIB1, MIB3);
MI.eraseFromParent();
@@ -1596,7 +1597,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
}
@@ -1629,7 +1630,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index a66cd7053c0a..a50abfdbee44 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -2951,7 +2951,8 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned ResultReg = MI->getOperand(0).getReg();
if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
return true;
}
@@ -2970,12 +2971,16 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned ConstAlign =
MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
+ MachineMemOperand *CPMMO =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
- .addConstantPoolIndex(Idx);
+ .addConstantPoolIndex(Idx)
+ .addMemOperand(CPMMO);
if (Opc == ARM::LDRcp)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
@@ -2988,6 +2993,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(TempReg)
.addImm(ARMPCLabelIndex);
+
if (!Subtarget->isThumb())
MIB.add(predOps(ARMCC::AL));
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 56ad7a0f0446..a9d87ced31f3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -79,12 +79,11 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
STI(sti) {}
-bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
+bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
// iOS always has a FP for backtracking, force other targets to keep their FP
// when doing FastISel. The emitted code is currently superior, and in cases
// like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
- return TargetFrameLowering::noFramePointerElim(MF) ||
- MF.getSubtarget<ARMSubtarget>().useFastISel();
+ return MF.getSubtarget<ARMSubtarget>().useFastISel();
}
/// Returns true if the target can safely skip saving callee-saved registers
@@ -526,6 +525,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
switch (TM.getCodeModel()) {
+ case CodeModel::Tiny:
+ llvm_unreachable("Tiny code model not available on ARM.");
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
@@ -909,6 +910,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
assert(RegInfo->hasBasePointer(MF) &&
"VLAs and dynamic stack alignment, but missing base pointer!");
FrameReg = RegInfo->getBaseRegister();
+ Offset -= SPAdj;
}
return Offset;
}
@@ -1006,8 +1008,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
if (Regs.empty())
continue;
- llvm::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS,
- const RegAndKill &RHS) {
+ llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
});
@@ -1103,7 +1104,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Regs.empty())
continue;
- llvm::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) {
+ llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
});
@@ -1921,9 +1922,13 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
<< "\n");
}
+ // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
+ // restore LR in that case.
+ bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
+
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
- if (!LRSpilled && CS1Spilled) {
+ if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
SavedRegs.set(ARM::LR);
NumGPRSpills++;
SmallVectorImpl<unsigned>::iterator LRPos;
@@ -1949,7 +1954,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Windows on ARM, accept R11 (frame pointer)
if (!AFI->isThumbFunction() ||
(STI.isTargetWindows() && Reg == ARM::R11) ||
- isARMLowRegister(Reg) || Reg == ARM::LR) {
+ isARMLowRegister(Reg) ||
+ (Reg == ARM::LR && !ExpensiveLRRestore)) {
SavedRegs.set(Reg);
LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
<< " to make up alignment\n");
@@ -2151,9 +2157,15 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Do not generate a prologue for leaf functions with a stack of size zero.
// For non-leaf functions we have to allow for the possibility that the
- // call is to a non-split function, as in PR37807.
- if (StackSize == 0 && !MFI.hasTailCall())
+ // callis to a non-split function, as in PR37807. This function could also
+ // take the address of a non-split function. When the linker tries to adjust
+ // its non-existent prologue, it would fail with an error. Mark the object
+ // file so that such failures are not errors. See this Go language bug-report
+ // https://go-review.googlesource.com/c/go/+/148819/
+ if (StackSize == 0 && !MFI.hasTailCall()) {
+ MF.getMMI().setHasNosplitStack(true);
return;
+ }
// Use R4 and R5 as scratch registers.
// We save R4 and R5 before use and restore them before leaving the function.
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index e994cab28fe7..2f7e23840e75 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -42,7 +42,7 @@ public:
std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override;
- bool noFramePointerElim(const MachineFunction &MF) const override;
+ bool keepFramePointer(const MachineFunction &MF) const override;
bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9592dd53c347..8e0e82388251 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1345,9 +1345,8 @@ static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
}
void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
}
bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
@@ -1764,12 +1763,14 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
default: llvm_unreachable("unhandled vld type");
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4f16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
case MVT::v1i64: OpcodeIndex = 3; break;
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8f16:
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
@@ -1854,9 +1855,8 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
}
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
if (NumVecs == 1) {
ReplaceNode(N, VLd);
@@ -1893,8 +1893,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
SDValue Chain = N->getOperand(0);
EVT VT = N->getOperand(Vec0Idx).getValueType();
@@ -1983,7 +1982,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
- cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
ReplaceNode(N, VSt);
return;
@@ -2007,7 +2006,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
MemAddr.getValueType(),
MVT::Other, OpsA);
- cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
Chain = SDValue(VStA, 1);
// Store the odd D registers.
@@ -2026,7 +2025,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Chain);
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
Ops);
- cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
ReplaceNode(N, VStB);
}
@@ -2045,8 +2044,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
SDValue Chain = N->getOperand(0);
unsigned Lane =
@@ -2135,7 +2133,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
QOpcodes[OpcodeIndex]);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
if (!IsLoad) {
ReplaceNode(N, VLdLn);
return;
@@ -2264,9 +2262,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
}
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
// Extract the subregisters.
if (NumVecs == 1) {
@@ -2309,6 +2306,11 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ // Mask off the unnecessary bits of the AND immediate; normally
+ // DAGCombine will do this, but that might not happen if
+ // targetShrinkDemandedConstant chooses a different immediate.
+ And_imm &= -1U >> Srl_imm;
+
// Note: The width operand is encoded as width-1.
unsigned Width = countTrailingOnes(And_imm) - 1;
unsigned LSB = Srl_imm;
@@ -2476,9 +2478,8 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
Opcode, SDLoc(N),
CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
@@ -2627,12 +2628,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// queries work properly. This e.g. gives the register allocation the
// required information for rematerialization.
MachineFunction& MF = CurDAG->getMachineFunction();
- MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
- MemOp[0] = MF.getMachineMemOperand(
- MachinePointerInfo::getConstantPool(MF),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *MemOp =
+ MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+ MachineMemOperand::MOLoad, 4, 4);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
ReplaceNode(N, ResNode);
return;
@@ -3030,11 +3030,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VZIPd8; break;
+ case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VZIPq8; break;
+ case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VZIPq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VZIPq32; break;
@@ -3051,11 +3053,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VUZPd8; break;
+ case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VUZPq8; break;
+ case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VUZPq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VUZPq32; break;
@@ -3072,10 +3076,12 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VTRNd8; break;
+ case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VTRNd16; break;
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VTRNq8; break;
+ case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VTRNq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VTRNq32; break;
@@ -3410,9 +3416,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32), Chain};
SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
// Remap uses.
SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
@@ -3478,9 +3483,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
return;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ede276dd91bb..21de0f6a7630 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -651,9 +651,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// it have a FP_TO_[SU]INT instruction with a narrower destination than
// source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
@@ -665,8 +669,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
- setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
- setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
+ setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
@@ -846,8 +850,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
+ }
// @llvm.readcyclecounter requires the Performance Monitors extension.
// Default to the 0 expansion on unsupported platforms.
@@ -950,6 +956,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// Use the default implementation.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -977,7 +984,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
- if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
+ if (!Subtarget->hasAcquireRelease() ||
+ getTargetMachine().getOptLevel() == 0) {
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
InsertFencesForAtomic = true;
}
@@ -1136,14 +1144,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON()) {
// vmin and vmax aren't available in a scalar form, so we use
// a NEON instruction with an undef lane instead.
- setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
- setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
- setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
+
+ if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
+
+ setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
+ }
}
// We have target-specific dag combine patterns for the following nodes:
@@ -1181,6 +1201,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
+ setPrefLoopAlignment(Subtarget->getPrefLoopAlignment());
+
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -1261,6 +1283,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::SUBS: return "ARMISD::SUBS";
case ARMISD::SSAT: return "ARMISD::SSAT";
case ARMISD::USAT: return "ARMISD::USAT";
@@ -3052,41 +3075,8 @@ static bool allUsersAreInFunction(const Value *V, const Function *F) {
return true;
}
-/// Return true if all users of V are within some (any) function, looking through
-/// ConstantExprs. In other words, are there any global constant users?
-static bool allUsersAreInFunctions(const Value *V) {
- SmallVector<const User*,4> Worklist;
- for (auto *U : V->users())
- Worklist.push_back(U);
- while (!Worklist.empty()) {
- auto *U = Worklist.pop_back_val();
- if (isa<ConstantExpr>(U)) {
- for (auto *UU : U->users())
- Worklist.push_back(UU);
- continue;
- }
-
- if (!isa<Instruction>(U))
- return false;
- }
- return true;
-}
-
-// Return true if T is an integer, float or an array/vector of either.
-static bool isSimpleType(Type *T) {
- if (T->isIntegerTy() || T->isFloatingPointTy())
- return true;
- Type *SubT = nullptr;
- if (T->isArrayTy())
- SubT = T->getArrayElementType();
- else if (T->isVectorTy())
- SubT = T->getVectorElementType();
- else
- return false;
- return SubT->isIntegerTy() || SubT->isFloatingPointTy();
-}
-
-static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
+static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
+ const GlobalValue *GV, SelectionDAG &DAG,
EVT PtrVT, const SDLoc &dl) {
// If we're creating a pool entry for a constant global with unnamed address,
// and the global is small enough, we can emit it inline into the constant pool
@@ -3113,11 +3103,11 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
!GVar->hasLocalLinkage())
return SDValue();
- // Ensure that we don't try and inline any type that contains pointers. If
- // we inline a value that contains relocations, we move the relocations from
- // .data to .text which is not ideal.
+ // If we inline a value that contains relocations, we move the relocations
+ // from .data to .text. This is not allowed in position-independent code.
auto *Init = GVar->getInitializer();
- if (!isSimpleType(Init->getType()))
+ if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
+ Init->needsRelocation())
return SDValue();
// The constant islands pass can only really deal with alignment requests
@@ -3128,7 +3118,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
// that are strings for simplicity.
auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
- unsigned Align = GVar->getAlignment();
+ unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
unsigned RequiredPadding = 4 - (Size % 4);
bool PaddingPossible =
RequiredPadding == 4 || (CDAInit && CDAInit->isString());
@@ -3149,12 +3139,14 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
ConstpoolPromotionMaxTotal)
return SDValue();
- // This is only valid if all users are in a single function OR it has users
- // in multiple functions but it no larger than a pointer. We also check if
- // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
- // address taken.
- if (!allUsersAreInFunction(GVar, &F) &&
- !(Size <= 4 && allUsersAreInFunctions(GVar)))
+ // This is only valid if all users are in a single function; we can't clone
+ // the constant in general. The LLVM IR unnamed_addr allows merging
+ // constants, but not cloning them.
+ //
+ // We could potentially allow cloning if we could prove all uses of the
+ // constant in the current function don't care about the address, like
+ // printf format strings. But that isn't implemented for now.
+ if (!allUsersAreInFunction(GVar, &F))
return SDValue();
// We're going to inline this global. Pad it out if needed.
@@ -3182,9 +3174,11 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->getBaseObject();
- return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
- isa<Function>(GV);
+ if (!(GV = GA->getBaseObject()))
+ return false;
+ if (const auto *V = dyn_cast<GlobalVariable>(GV))
+ return V->isConstant();
+ return isa<Function>(GV);
}
SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -3210,7 +3204,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
// promoteToConstantPool only if not generating XO text section
if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
- if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
+ if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
return V;
if (isPositionIndependent()) {
@@ -3299,9 +3293,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows");
+ const TargetMachine &TM = getTargetMachine();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- const ARMII::TOF TargetFlags =
- (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
+ ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
+ if (GV->hasDLLImportStorageClass())
+ TargetFlags = ARMII::MO_DLLIMPORT;
+ else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ TargetFlags = ARMII::MO_COFFSTUB;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
SDLoc DL(Op);
@@ -3313,7 +3311,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
TargetFlags));
- if (GV->hasDLLImportStorageClass())
+ if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
@@ -3412,7 +3410,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
Op.getOperand(1), Op.getOperand(2));
}
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
- ? ISD::FMINNAN : ISD::FMAXNAN;
+ ? ISD::FMINIMUM : ISD::FMAXIMUM;
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
@@ -4832,12 +4830,24 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.UnrollVectorOp(Op.getNode());
}
- assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
- "Invalid type for custom lowering!");
- if (VT != MVT::v4i16)
+ const bool HasFullFP16 =
+ static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+
+ EVT NewTy;
+ const EVT OpTy = Op.getOperand(0).getValueType();
+ if (OpTy == MVT::v4f32)
+ NewTy = MVT::v4i32;
+ else if (OpTy == MVT::v4f16 && HasFullFP16)
+ NewTy = MVT::v4i16;
+ else if (OpTy == MVT::v8f16 && HasFullFP16)
+ NewTy = MVT::v8i16;
+ else
+ llvm_unreachable("Invalid type for custom lowering!");
+
+ if (VT != MVT::v4i16 && VT != MVT::v8i16)
return DAG.UnrollVectorOp(Op.getNode());
- Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
}
@@ -4870,9 +4880,21 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.UnrollVectorOp(Op.getNode());
}
- assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+ assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
+ Op.getOperand(0).getValueType() == MVT::v8i16) &&
"Invalid type for custom lowering!");
- if (VT != MVT::v4f32)
+
+ const bool HasFullFP16 =
+ static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+
+ EVT DestVecType;
+ if (VT == MVT::v4f32)
+ DestVecType = MVT::v4i32;
+ else if (VT == MVT::v4f16 && HasFullFP16)
+ DestVecType = MVT::v4i16;
+ else if (VT == MVT::v8f16 && HasFullFP16)
+ DestVecType = MVT::v8i16;
+ else
return DAG.UnrollVectorOp(Op.getNode());
unsigned CastOpc;
@@ -4889,7 +4911,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
break;
}
- Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
return DAG.getNode(Opc, dl, VT, Op);
}
@@ -5392,10 +5414,6 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
// Compute with: cttz(x) = ctpop(lsb - 1)
- // Since we can only compute the number of bits in a byte with vcnt.8, we
- // have to gather the result with pairwise addition (vpaddl) for i16, i32,
- // and i64.
-
// Compute LSB - 1.
SDValue Bits;
if (ElemTy == MVT::i64) {
@@ -5408,32 +5426,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
DAG.getTargetConstant(1, dl, ElemTy));
Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
}
-
- // Count #bits with vcnt.8.
- EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
- SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
- SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
-
- // Gather the #bits with vpaddl (pairwise add.)
- EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
- SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
- DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
- Cnt8);
- if (ElemTy == MVT::i16)
- return Cnt16;
-
- EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
- SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
- DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
- Cnt16);
- if (ElemTy == MVT::i32)
- return Cnt32;
-
- assert(ElemTy == MVT::i64);
- SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
- Cnt32);
- return Cnt64;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
}
if (!ST->hasV6T2Ops())
@@ -5443,112 +5436,37 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
}
-/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
-/// for each 16-bit element from operand, repeated. The basic idea is to
-/// leverage vcnt to get the 8-bit counts, gather and add the results.
-///
-/// Trace for v4i16:
-/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
-/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
-/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
-/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
-/// [b0 b1 b2 b3 b4 b5 b6 b7]
-/// +[b1 b0 b3 b2 b5 b4 b7 b6]
-/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
-/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
-static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
-
- EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
- SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
- SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
- SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
- SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
- return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
-}
-
-/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
-/// bit-count for each 16-bit element from the operand. We need slightly
-/// different sequencing for v4i16 and v8i16 to stay within NEON's available
-/// 64/128-bit registers.
-///
-/// Trace for v4i16:
-/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
-/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
-/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
-/// v4i16:Extracted = [k0 k1 k2 k3 ]
-static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
- if (VT.is64BitVector()) {
- SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
- DAG.getIntPtrConstant(0, DL));
- } else {
- SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
- BitCounts, DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
- }
-}
-
-/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
-/// bit-count for each 32-bit element from the operand. The idea here is
-/// to split the vector into 16-bit elements, leverage the 16-bit count
-/// routine, and then combine the results.
-///
-/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
-/// input = [v0 v1 ] (vi: 32-bit elements)
-/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
-/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
-/// vrev: N0 = [k1 k0 k3 k2 ]
-/// [k0 k1 k2 k3 ]
-/// N1 =+[k1 k0 k3 k2 ]
-/// [k0 k2 k1 k3 ]
-/// N2 =+[k1 k3 k0 k2 ]
-/// [k0 k2 k1 k3 ]
-/// Extended =+[k1 k3 k0 k2 ]
-/// [k0 k2 ]
-/// Extracted=+[k1 k3 ]
-///
-static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
+ assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+ assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
+ VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
+ "Unexpected type for custom ctpop lowering");
- EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
+ Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
- SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
- SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
- SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
- SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
- SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+ // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
+ unsigned EltSize = 8;
+ unsigned NumElts = VT.is64BitVector() ? 8 : 16;
+ while (EltSize != VT.getScalarSizeInBits()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Ops.push_back(Res);
- if (VT.is64BitVector()) {
- SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
- DAG.getIntPtrConstant(0, DL));
- } else {
- SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
- DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+ EltSize *= 2;
+ NumElts /= 2;
+ MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
}
-}
-static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
- EVT VT = N->getValueType(0);
-
- assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
- assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
- VT == MVT::v4i16 || VT == MVT::v8i16) &&
- "Unexpected type for custom ctpop lowering");
-
- if (VT.getVectorElementType() == MVT::i32)
- return lowerCTPOP32BitElements(N, DAG);
- else
- return lowerCTPOP16BitElements(N, DAG);
+ return Res;
}
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
@@ -7878,6 +7796,50 @@ SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
return LowerCallTo(CLI).first;
}
+// This is a code size optimisation: return the original SDIV node to
+// DAGCombiner when we don't want to expand SDIV into a sequence of
+// instructions, and an empty node otherwise which will cause the
+// SDIV to be expanded in DAGCombine.
+SDValue
+ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // TODO: Support SREM
+ if (N->getOpcode() != ISD::SDIV)
+ return SDValue();
+
+ const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
+ const auto &MF = DAG.getMachineFunction();
+ const bool MinSize = MF.getFunction().optForMinSize();
+ const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
+ : ST.hasDivideInARMMode();
+
+ // Don't touch vector types; rewriting this may lead to scalarizing
+ // the int divs.
+ if (N->getOperand(0).getValueType().isVector())
+ return SDValue();
+
+ // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
+ // hwdiv support for this to be really profitable.
+ if (!(MinSize && HasDivide))
+ return SDValue();
+
+ // ARM mode is a bit simpler than Thumb: we can handle large power
+ // of 2 immediates with 1 mov instruction; no further checks required,
+ // just return the sdiv node.
+ if (!ST.isThumb())
+ return SDValue(N, 0);
+
+ // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
+ // and thus lose the code size benefits of a MOVS that requires only 2.
+ // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
+ // but as it's doing exactly this, it's not worth the trouble to get TTI.
+ if (Divisor.sgt(128))
+ return SDValue();
+
+ return SDValue(N, 0);
+}
+
SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
assert(Op.getValueType() == MVT::i32 &&
@@ -7990,10 +7952,8 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
ARM::CMP_SWAP_64, SDLoc(N),
DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
- MachineFunction &MF = DAG.getMachineFunction();
- MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
bool isBigEndian = DAG.getDataLayout().isBigEndian();
@@ -9169,6 +9129,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
// IP.
switch (TM.getCodeModel()) {
+ case CodeModel::Tiny:
+ llvm_unreachable("Tiny code model not available on ARM.");
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
@@ -9244,6 +9206,42 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
return ContBB;
}
+// The CPSR operand of SelectItr might be missing a kill marker
+// because there were multiple uses of CPSR, and ISel didn't know
+// which to mark. Figure out whether SelectItr should have had a
+// kill marker, and set it if it should. Returns the correct kill
+// marker value.
+static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
+ MachineBasicBlock* BB,
+ const TargetRegisterInfo* TRI) {
+ // Scan forward through BB for a use/def of CPSR.
+ MachineBasicBlock::iterator miI(std::next(SelectItr));
+ for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
+ const MachineInstr& mi = *miI;
+ if (mi.readsRegister(ARM::CPSR))
+ return false;
+ if (mi.definesRegister(ARM::CPSR))
+ break; // Should have kill-flag - update below.
+ }
+
+ // If we hit the end of the block, check whether CPSR is live into a
+ // successor.
+ if (miI == BB->end()) {
+ for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
+ sEnd = BB->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock* succ = *sItr;
+ if (succ->isLiveIn(ARM::CPSR))
+ return false;
+ }
+ }
+
+ // We found a def, or hit the end of the basic block and CPSR wasn't live
+ // out. SelectMI should have a kill flag on CPSR.
+ SelectItr->addRegisterKilled(ARM::CPSR, TRI);
+ return true;
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -9343,6 +9341,14 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
+ // Check whether CPSR is live past the tMOVCCr_pseudo.
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ if (!MI.killsRegister(ARM::CPSR) &&
+ !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
+ copy0MBB->addLiveIn(ARM::CPSR);
+ sinkMBB->addLiveIn(ARM::CPSR);
+ }
+
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
@@ -10407,6 +10413,37 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+bool
+ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const {
+ if (Level == BeforeLegalizeTypes)
+ return true;
+
+ if (Subtarget->isThumb() && Subtarget->isThumb1Only())
+ return true;
+
+ if (N->getOpcode() != ISD::SHL)
+ return true;
+
+ // Turn off commute-with-shift transform after legalization, so it doesn't
+ // conflict with PerformSHLSimplify. (We could try to detect when
+ // PerformSHLSimplify would trigger more precisely, but it isn't
+ // really necessary.)
+ return false;
+}
+
+bool
+ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const {
+ if (!Subtarget->isThumb1Only())
+ return true;
+
+ if (Level == BeforeLegalizeTypes)
+ return true;
+
+ return false;
+}
+
static SDValue PerformSHLSimplify(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
@@ -10506,9 +10543,7 @@ static SDValue PerformSHLSimplify(SDNode *N,
LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
SHL.dump(); N->dump());
LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
-
- DAG.ReplaceAllUsesWith(SDValue(N, 0), Res);
- return SDValue(N, 0);
+ return Res;
}
@@ -10712,6 +10747,12 @@ static SDValue CombineANDShift(SDNode *N,
if (!C2 || C2 >= 32)
return SDValue();
+ // Clear irrelevant bits in the mask.
+ if (LeftShift)
+ C1 &= (-1U << C2);
+ else
+ C1 &= (-1U >> C2);
+
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
@@ -10719,9 +10760,7 @@ static SDValue CombineANDShift(SDNode *N,
// "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
// transform to a pair of shifts, to save materializing c1.
- // First pattern: right shift, and c1+1 is a power of two.
- // FIXME: Also check reversed pattern (left shift, and ~c1+1 is a power
- // of two).
+ // First pattern: right shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (!LeftShift && isMask_32(C1)) {
uint32_t C3 = countLeadingZeros(C1);
@@ -10733,13 +10772,23 @@ static SDValue CombineANDShift(SDNode *N,
}
}
- // Second pattern: left shift, and (c1>>c2)+1 is a power of two.
- // FIXME: Also check reversed pattern (right shift, and ~(c1<<c2)+1
- // is a power of two).
+ // First pattern, reversed: left shift, then mask off trailing bits.
+ if (LeftShift && isMask_32(~C1)) {
+ uint32_t C3 = countTrailingZeros(C1);
+ if (C2 < C3) {
+ SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+ DAG.getConstant(C3 - C2, DL, MVT::i32));
+ return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+ DAG.getConstant(C3, DL, MVT::i32));
+ }
+ }
+
+ // Second pattern: left shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (LeftShift && isShiftedMask_32(C1)) {
+ uint32_t Trailing = countTrailingZeros(C1);
uint32_t C3 = countLeadingZeros(C1);
- if (C2 + C3 < 32 && C1 == ((-1U << (C2 + C3)) >> C3)) {
+ if (Trailing == C2 && C2 + C3 < 32) {
SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C2 + C3, DL, MVT::i32));
return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
@@ -10747,6 +10796,19 @@ static SDValue CombineANDShift(SDNode *N,
}
}
+ // Second pattern, reversed: right shift, then mask off trailing bits.
+ // FIXME: Handle other patterns of known/demanded bits.
+ if (!LeftShift && isShiftedMask_32(C1)) {
+ uint32_t Leading = countLeadingZeros(C1);
+ uint32_t C3 = countTrailingZeros(C1);
+ if (Leading == C2 && C2 + C3 < 32) {
+ SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+ DAG.getConstant(C2 + C3, DL, MVT::i32));
+ return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+ DAG.getConstant(C3, DL, MVT::i32));
+ }
+ }
+
// FIXME: Transform "(and (shl x, c2) c1)" ->
// "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
// c1.
@@ -11541,8 +11603,15 @@ static SDValue CombineBaseUpdate(SDNode *N,
continue;
// Check that the add is independent of the load/store. Otherwise, folding
- // it would create a cycle.
- if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ // it would create a cycle. We can avoid searching through Addr as it's a
+ // predecessor to both.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Addr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(User);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
// Find the new opcode for the updating load/store.
@@ -12507,8 +12576,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
// Lastly, can we determine that the bits defined by OrCI
// are zero in Y?
- KnownBits Known;
- DAG.computeKnownBits(Y, Known);
+ KnownBits Known = DAG.computeKnownBits(Y);
if ((OrCI & Known.Zero) != OrCI)
return SDValue();
@@ -12679,30 +12747,38 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
}
- } else if (CC == ARMCC::NE && LHS != RHS &&
+ } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
(!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
// This seems pointless but will allow us to combine it further below.
- // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+ // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
+ SDValue Sub =
+ DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+ SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+ Sub.getValue(1), SDValue());
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
- N->getOperand(3), Cmp);
+ N->getOperand(3), CPSRGlue.getValue(1));
+ FalseVal = Sub;
}
} else if (isNullConstant(TrueVal)) {
- if (CC == ARMCC::EQ && LHS != RHS &&
+ if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
(!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
// This seems pointless but will allow us to combine it further below
// Note that we change == for != as this is the dual for the case above.
- // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+ // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
+ SDValue Sub =
+ DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+ SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+ Sub.getValue(1), SDValue());
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
DAG.getConstant(ARMCC::NE, dl, MVT::i32),
- N->getOperand(3), Cmp);
+ N->getOperand(3), CPSRGlue.getValue(1));
+ FalseVal = Sub;
}
}
// On Thumb1, the DAG above may be further combined if z is a power of 2
// (z == 2 ^ K).
- // CMOV (SUB x, y), z, !=, (CMPZ x, y) ->
+ // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
// merge t3, t4
// where t1 = (SUBCARRY (SUB x, y), z, 0)
// t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
@@ -12710,8 +12786,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ]
const APInt *TrueConst;
if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
- (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) &&
- (FalseVal.getOperand(1) == RHS) &&
+ (FalseVal.getOpcode() == ARMISD::SUBS) &&
+ (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
(TrueConst = isPowerOf2Constant(TrueVal))) {
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned ShiftAmount = TrueConst->logBase2();
@@ -12730,8 +12806,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
}
if (Res.getNode()) {
- KnownBits Known;
- DAG.computeKnownBits(SDValue(N,0), Known);
+ KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
// Capture demanded bits information that would be otherwise lost.
if (Known.Zero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
@@ -13522,12 +13597,11 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
if (Known.isUnknown())
return;
- KnownBits KnownRHS;
- DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1);
+ KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
Known.Zero &= KnownRHS.Zero;
Known.One &= KnownRHS.One;
return;
@@ -13549,7 +13623,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case ARMISD::BFI: {
// Conservatively, we can recurse down the first operand
// and just mask out all affected bits.
- DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
+ Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// The operand to BFI is already a mask suitable for removing the bits it
// sets.
@@ -13559,9 +13633,120 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.One &= Mask;
return;
}
+ case ARMISD::VGETLANEs:
+ case ARMISD::VGETLANEu: {
+ const SDValue &SrcSV = Op.getOperand(0);
+ EVT VecVT = SrcSV.getValueType();
+ assert(VecVT.isVector() && "VGETLANE expected a vector type");
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+ ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
+ assert(Pos->getAPIntValue().ult(NumSrcElts) &&
+ "VGETLANE index out of bounds");
+ unsigned Idx = Pos->getZExtValue();
+ APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
+ Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
+
+ EVT VT = Op.getValueType();
+ const unsigned DstSz = VT.getScalarSizeInBits();
+ const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
+ assert(SrcSz == Known.getBitWidth());
+ assert(DstSz > SrcSz);
+ if (Op.getOpcode() == ARMISD::VGETLANEs)
+ Known = Known.sext(DstSz);
+ else {
+ Known = Known.zext(DstSz);
+ Known.Zero.setBitsFrom(SrcSz);
+ }
+ assert(DstSz == Known.getBitWidth());
+ break;
+ }
}
}
+bool
+ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedAPInt,
+ TargetLoweringOpt &TLO) const {
+ // Delay optimization, so we don't have to deal with illegal types, or block
+ // optimizations.
+ if (!TLO.LegalOps)
+ return false;
+
+ // Only optimize AND for now.
+ if (Op.getOpcode() != ISD::AND)
+ return false;
+
+ EVT VT = Op.getValueType();
+
+ // Ignore vectors.
+ if (VT.isVector())
+ return false;
+
+ assert(VT == MVT::i32 && "Unexpected integer type");
+
+ // Make sure the RHS really is a constant.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C)
+ return false;
+
+ unsigned Mask = C->getZExtValue();
+
+ unsigned Demanded = DemandedAPInt.getZExtValue();
+ unsigned ShrunkMask = Mask & Demanded;
+ unsigned ExpandedMask = Mask | ~Demanded;
+
+ // If the mask is all zeros, let the target-independent code replace the
+ // result with zero.
+ if (ShrunkMask == 0)
+ return false;
+
+ // If the mask is all ones, erase the AND. (Currently, the target-independent
+ // code won't do this, so we have to do it explicitly to avoid an infinite
+ // loop in obscure cases.)
+ if (ExpandedMask == ~0U)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
+ return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
+ };
+ auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
+ if (NewMask == Mask)
+ return true;
+ SDLoc DL(Op);
+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ };
+
+ // Prefer uxtb mask.
+ if (IsLegalMask(0xFF))
+ return UseMask(0xFF);
+
+ // Prefer uxth mask.
+ if (IsLegalMask(0xFFFF))
+ return UseMask(0xFFFF);
+
+ // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
+ // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+ if (ShrunkMask < 256)
+ return UseMask(ShrunkMask);
+
+ // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
+ // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+ if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
+ return UseMask(ExpandedMask);
+
+ // Potential improvements:
+ //
+ // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
+ // We could try to prefer Thumb1 immediates which can be lowered to a
+ // two-instruction sequence.
+ // We could try to recognize more legal ARM/Thumb2 immediates here.
+
+ return false;
+}
+
+
//===----------------------------------------------------------------------===//
// ARM Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -14412,16 +14597,18 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
: AtomicExpansionKind::None;
}
-bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
- AtomicCmpXchgInst *AI) const {
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
- bool hasAtomicCmpXchg =
+ bool HasAtomicCmpXchg =
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
- return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
+ if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
+ return AtomicExpansionKind::LLSC;
+ return AtomicExpansionKind::None;
}
bool ARMTargetLowering::shouldInsertFencesForAtomic(
@@ -14548,6 +14735,11 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Addr});
}
+
+bool ARMTargetLowering::alignLoopsWithOptSize() const {
+ return Subtarget->isMClass();
+}
+
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
unsigned
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 50b4c2977fb5..7a9fc739fc13 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -85,6 +85,7 @@ class VectorType;
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
+ SUBS, // Flag-setting subtraction.
SSAT, // Signed saturation
USAT, // Unsigned saturation
@@ -389,6 +390,9 @@ class VectorType;
const SelectionDAG &DAG,
unsigned Depth) const override;
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const override;
+
bool ExpandInlineAsm(CallInst *CI) const override;
@@ -535,7 +539,8 @@ class VectorType;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
- bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
@@ -572,6 +577,8 @@ class VectorType;
bool isLegalInterleavedAccessType(VectorType *VecTy,
const DataLayout &DL) const;
+ bool alignLoopsWithOptSize() const override;
+
/// Returns the number of interleaved accesses that will be generated when
/// lowering accesses of the given type.
unsigned getNumInterleavedAccesses(VectorType *VecTy,
@@ -583,6 +590,11 @@ class VectorType;
unsigned getABIAlignmentForCallingConv(Type *ArgTy,
DataLayout DL) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
+ bool shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const override;
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -685,6 +697,9 @@ class VectorType;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
+
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
@@ -764,6 +779,8 @@ class VectorType;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ bool shouldConsiderGEPOffsetSplit() const override { return true; }
+
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 1d3b1414f090..0df48ba61299 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -2580,6 +2580,37 @@ class N3VLaneCP8<bit op23, bits<2> op21_20, bit op6, bit op4,
let Inst{3-0} = Vm{3-0};
}
+// In Armv8.2-A, some NEON instructions are added that encode Vn and Vm
+// differently:
+// if Q == ‘1’ then UInt(N:Vn) else UInt(Vn:N);
+// if Q == ‘1’ then UInt(M:Vm) else UInt(Vm:M);
+// Class N3VCP8 above describes the Q=1 case, and this class the Q=0 case.
+class N3VCP8Q0<bits<2> op24_23, bits<2> op21_20, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc, dt, asm, cstr, pattern> {
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let DecoderNamespace = "VFPV8";
+ // These have the same encodings in ARM and Thumb2
+ let PostEncoderMethod = "";
+
+ let Inst{31-25} = 0b1111110;
+ let Inst{24-23} = op24_23;
+ let Inst{22} = Vd{4};
+ let Inst{21-20} = op21_20;
+ let Inst{19-16} = Vn{4-1};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{11-8} = 0b1000;
+ let Inst{7} = Vn{0};
+ let Inst{6} = op6;
+ let Inst{5} = Vm{0};
+ let Inst{4} = op4;
+ let Inst{3-0} = Vm{4-1};
+}
+
// Operand types for complex instructions
class ComplexRotationOperand<int Angle, int Remainder, string Type, string Diag>
: AsmOperandClass {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 397c9dadb4ac..bcc31f5fa4cc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -132,34 +132,6 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+ .cloneMemRefs(*MI)
.add(predOps(ARMCC::AL));
}
-
-std::pair<unsigned, unsigned>
-ARMInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
- const unsigned Mask = ARMII::MO_OPTION_MASK;
- return std::make_pair(TF & Mask, TF & ~Mask);
-}
-
-ArrayRef<std::pair<unsigned, const char *>>
-ARMInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
- using namespace ARMII;
-
- static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
- return makeArrayRef(TargetFlags);
-}
-
-ArrayRef<std::pair<unsigned, const char *>>
-ARMInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
- using namespace ARMII;
-
- static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_GOT, "arm-got"},
- {MO_SBREL, "arm-sbrel"},
- {MO_DLLIMPORT, "arm-dllimport"},
- {MO_SECREL, "arm-secrel"},
- {MO_NONLAZY, "arm-nonlazy"}};
- return makeArrayRef(TargetFlags);
-}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
index c54c987134df..c87fb97448c9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -38,13 +38,6 @@ public:
///
const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
- std::pair<unsigned, unsigned>
- decomposeMachineOperandsTargetFlags(unsigned TF) const override;
- ArrayRef<std::pair<unsigned, const char *>>
- getSerializableDirectMachineOperandTargetFlags() const override;
- ArrayRef<std::pair<unsigned, const char *>>
- getSerializableBitmaskMachineOperandTargetFlags() const override;
-
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index d4c342cee5c0..13abdc9687ec 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -144,6 +144,7 @@ def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
+def ARMsubs : SDNode<"ARMISD::SUBS", SDTIntBinOp, [SDNPOutGlue]>;
def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
@@ -221,6 +222,7 @@ def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
def HasV5T : Predicate<"Subtarget->hasV5TOps()">,
AssemblerPredicate<"HasV5TOps", "armv5t">;
+def NoV5T : Predicate<"!Subtarget->hasV5TOps()">;
def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
AssemblerPredicate<"HasV5TEOps", "armv5te">;
def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
@@ -255,6 +257,8 @@ def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
+ AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -285,6 +289,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16","full half-float">;
+def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
+ AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
@@ -351,23 +357,24 @@ def UseNegativeImmediates :
let RecomputePerFunction = 1 in {
def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
- def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
- def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+ def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
+ def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+
+ def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
+ " TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
+ "MF->getFunction().optForMinSize())">;
}
-def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
-// But only select them if more precision in FP computation is allowed.
+// But only select them if more precision in FP computation is allowed, and when
+// they are not slower than a mul + add sequence.
// Do not use them for Darwin platforms.
def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
" FPOpFusion::Fast && "
" Subtarget->hasVFP4()) && "
- "!Subtarget->isTargetDarwin()">;
-def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion =="
- " FPOpFusion::Fast &&"
- " Subtarget->hasVFP4()) || "
- "Subtarget->isTargetDarwin()">;
+ "!Subtarget->isTargetDarwin() &&"
+ "Subtarget->useFPVMLx()">;
def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
@@ -387,6 +394,10 @@ let RecomputePerFunction = 1 in {
def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
+// Armv8.5-A extensions
+def HasSB : Predicate<"Subtarget->hasSB()">,
+ AssemblerPredicate<"FeatureSB", "sb">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -415,24 +426,22 @@ def imm16_31 : ImmLeaf<i32, [{
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
- if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17)
- return true;
-
- if (N->getOpcode() != ISD::SRA)
- return false;
- if (N->getOperand(0).getOpcode() != ISD::SHL)
- return false;
-
- auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!ShiftVal || ShiftVal->getZExtValue() != 16)
- return false;
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
- ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
- if (!ShiftVal || ShiftVal->getZExtValue() != 16)
- return false;
+def sext_bottom_16 : PatFrag<(ops node:$a),
+ (sext_inreg node:$a, i16)>;
+def sext_top_16 : PatFrag<(ops node:$a),
+ (i32 (sra node:$a, (i32 16)))>;
- return true;
-}]>;
+def bb_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sext_bottom_16 node:$a), (sext_bottom_16 node:$b))>;
+def bt_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sext_bottom_16 node:$a), (sra node:$b, (i32 16)))>;
+def tb_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sra node:$a, (i32 16)), (sext_bottom_16 node:$b))>;
+def tt_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sra node:$a, (i32 16)), (sra node:$b, (i32 16)))>;
/// Split a 32-bit immediate into two 16 bit parts.
def hi16 : SDNodeXForm<imm, [{
@@ -713,7 +722,20 @@ def arm_i32imm : PatLeaf<(imm), [{
if (Subtarget->useMovt(*MF))
return true;
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
-}]>;
+}]> {
+ // Ideally this would be an IntImmLeaf, but then we wouldn't have access to
+ // the MachineFunction.
+ let GISelPredicateCode = [{
+ const auto &MF = *MI.getParent()->getParent();
+ if (STI.useMovt(MF))
+ return true;
+
+ const auto &MO = MI.getOperand(1);
+ if (!MO.isCImm())
+ return false;
+ return ARM_AM::isSOImmTwoPartVal(MO.getCImm()->getZExtValue());
+ }];
+}
/// imm0_1 predicate - Immediate in the range [0,1].
def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; }
@@ -2191,6 +2213,9 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
let Inst = 0xe7ffdefe;
}
+def : Pat<(debugtrap), (BKPT 0)>, Requires<[IsARM, HasV5T]>;
+def : Pat<(debugtrap), (UDF 254)>, Requires<[IsARM, NoV5T]>;
+
// Address computation and loads and stores in PIC mode.
let isNotDuplicable = 1 in {
def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
@@ -3321,7 +3346,7 @@ multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
let hasSideEffects = 0 in {
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">;
@@ -3519,10 +3544,14 @@ def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot),
def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src),
(SXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(int_arm_sxtb16 (rotr GPR:$Src, rot_imm:$rot)),
+ (SXTB16 GPR:$Src, rot_imm:$rot)>;
def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS),
(SXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
+def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)),
+ (SXTAB16 GPR:$LHS, GPR:$RHS, rot_imm:$rot)>;
// Zero extenders
@@ -3544,6 +3573,8 @@ def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16 GPR:$Src, 1)>;
def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src),
(UXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(int_arm_uxtb16 (rotr GPR:$Src, rot_imm:$rot)),
+ (UXTB16 GPR:$Src, rot_imm:$rot)>;
def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -3560,6 +3591,8 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS),
(UXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
+def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)),
+ (UXTAB16 GPR:$LHS, GPR:$RHS, rot_imm:$rot)>;
def SBFX : I<(outs GPRnopc:$Rd),
@@ -3620,6 +3653,14 @@ let isAdd = 1 in
defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>;
defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
+def : ARMPat<(ARMsubs GPR:$Rn, mod_imm:$imm), (SUBSri $Rn, mod_imm:$imm)>;
+def : ARMPat<(ARMsubs GPR:$Rn, GPR:$Rm), (SUBSrr $Rn, $Rm)>;
+def : ARMPat<(ARMsubs GPR:$Rn, so_reg_imm:$shift),
+ (SUBSrsi $Rn, so_reg_imm:$shift)>;
+def : ARMPat<(ARMsubs GPR:$Rn, so_reg_reg:$shift),
+ (SUBSrsr $Rn, so_reg_reg:$shift)>;
+
+
let isAdd = 1 in
defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>;
@@ -4211,29 +4252,25 @@ def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
multiclass AI_smul<string opc> {
def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
- (sext_inreg GPR:$Rm, i16)))]>,
+ [(set GPR:$Rd, (bb_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
- (sra GPR:$Rm, (i32 16))))]>,
+ [(set GPR:$Rd, (bt_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
- (sext_inreg GPR:$Rm, i16)))]>,
+ [(set GPR:$Rd, (tb_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
- (sra GPR:$Rm, (i32 16))))]>,
+ [(set GPR:$Rd, (tt_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
@@ -4257,35 +4294,31 @@ multiclass AI_smla<string opc> {
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd, (add GPR:$Ra,
- (mul (sext_inreg GPRnopc:$Rn, i16),
- (sext_inreg GPRnopc:$Rm, i16))))]>,
+ (bb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16),
- (sra GPRnopc:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (bt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
- (sext_inreg GPRnopc:$Rm, i16))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (tb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
- (sra GPRnopc:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (tt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
@@ -4863,6 +4896,14 @@ def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary,
}
+// Armv8.5-A speculation barrier
+def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>,
+ Requires<[IsARM, HasSB]>, Sched<[]> {
+ let Inst{31-0} = 0xf57ff070;
+ let Unpredictable = 0x000fff0f;
+ let hasSideEffects = 1;
+}
+
let usesCustomInserter = 1, Defs = [CPSR] in {
// Pseudo instruction that combines movs + predicated rsbmi
@@ -4870,7 +4911,7 @@ let usesCustomInserter = 1, Defs = [CPSR] in {
def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
}
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, Defs = [CPSR] in {
def COPY_STRUCT_BYVAL_I32 : PseudoInst<
(outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
NoItinerary,
@@ -5778,26 +5819,21 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
// smul* and smla*
def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
- (SMULBB GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
- (SMULTB GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul sext_16_node:$a, sext_16_node:$b)),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sext_bottom_16 GPR:$b)),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sext_top_16 GPR:$b)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sext_top_16 GPR:$a), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_bottom_16 GPR:$b))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_top_16 GPR:$b))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul (sext_top_16 GPR:$a), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b),
(SMULBB GPR:$a, GPR:$b)>;
@@ -5902,6 +5938,8 @@ include "ARMInstrNEON.td"
// Memory barriers
def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>;
def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"ssbb", (DSB 0x0), 1>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"pssbb", (DSB 0x4), 1>, Requires<[IsARM, HasDB]>;
def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>;
// Armv8-R 'Data Full Barrier'
def : InstAlias<"dfb", (DSB 0xc), 1>, Requires<[IsARM, HasDFB]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 4525eec8da03..96986e74415b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4305,17 +4305,29 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(v2f32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-
+def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
+ (v8f16 (NEONvduplane (v8f16 QPR:$src2), imm:$lane)))),
+ (v8f16 (VMULslhq(v8f16 QPR:$src1),
+ (v4f16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
(VMULslfd DPR:$Rn,
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
(i32 0))>;
+def : Pat<(v4f16 (fmul DPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+ (VMULslhd DPR:$Rn,
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
+ (i32 0))>;
def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
(VMULslfq QPR:$Rn,
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
(i32 0))>;
-
+def : Pat<(v8f16 (fmul QPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+ (VMULslhq QPR:$Rn,
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
+ (i32 0))>;
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
@@ -4390,16 +4402,16 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
v4f16, fmul_su, fadd_mlx>,
- Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
v8f16, fmul_su, fadd_mlx>,
- Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4620,16 +4632,16 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
v4f16, fmul, fsub>,
- Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
v8f16, fmul, fsub>,
- Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4734,6 +4746,12 @@ def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
+def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
+ (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON,HasFullFP16]>;
+def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
+ (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON,HasFullFP16]>;
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
@@ -5066,7 +5084,7 @@ def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
"f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
Requires<[HasNEON, HasFullFP16]>;
def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
- "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+ "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
Requires<[HasNEON, HasFullFP16]>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
@@ -5091,6 +5109,54 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
(VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
}
+// +fp16fml Floating Point Multiplication Variants
+let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
+
+class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
+ RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
+ : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
+ asm, "f16", "$Vd, $Vn, $Vm", "", []>;
+
+class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
+ RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
+ : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
+ asm, "f16", "$Vd, $Vn, $Vm", "", []>;
+
+class VFMQ0<string opc, bits<2> S>
+ : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
+ (ins SPR:$Vn, SPR:$Vm, VectorIndex32:$idx),
+ IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+ bit idx;
+ let Inst{3} = idx;
+ let Inst{19-16} = Vn{4-1};
+ let Inst{7} = Vn{0};
+ let Inst{5} = Vm{0};
+ let Inst{2-0} = Vm{3-1};
+}
+
+class VFMQ1<string opc, bits<2> S>
+ : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm, VectorIndex16:$idx),
+ IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+ bits<2> idx;
+ let Inst{5} = idx{1};
+ let Inst{3} = idx{0};
+}
+
+let hasNoSchedulingInfo = 1 in {
+// op1 op2 op3
+def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
+def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
+def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
+def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
+def VFMALDI : VFMQ0<"vfmal", 0b00>;
+def VFMSLDI : VFMQ0<"vfmsl", 0b01>;
+def VFMALQI : VFMQ1<"vfmal", 0b00>;
+def VFMSLQI : VFMQ1<"vfmsl", 0b01>;
+}
+} // HasNEON, HasFP16FML
+
+
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
(VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
@@ -5455,17 +5521,17 @@ defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
"vmax", "u", umax, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmax", "f32",
- v2f32, v2f32, fmaxnan, 1>;
+ v2f32, v2f32, fmaximum, 1>;
def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f32",
- v4f32, v4f32, fmaxnan, 1>;
+ v4f32, v4f32, fmaximum, 1>;
def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmax", "f16",
- v4f16, v4f16, fmaxnan, 1>,
+ v4f16, v4f16, fmaximum, 1>,
Requires<[HasNEON, HasFullFP16]>;
def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f16",
- v8f16, v8f16, fmaxnan, 1>,
+ v8f16, v8f16, fmaximum, 1>,
Requires<[HasNEON, HasFullFP16]>;
// VMAXNM
@@ -5497,17 +5563,17 @@ defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
"vmin", "u", umin, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmin", "f32",
- v2f32, v2f32, fminnan, 1>;
+ v2f32, v2f32, fminimum, 1>;
def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f32",
- v4f32, v4f32, fminnan, 1>;
+ v4f32, v4f32, fminimum, 1>;
def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmin", "f16",
- v4f16, v4f16, fminnan, 1>,
+ v4f16, v4f16, fminimum, 1>,
Requires<[HasNEON, HasFullFP16]>;
def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f16",
- v8f16, v8f16, fminnan, 1>,
+ v8f16, v8f16, fminimum, 1>,
Requires<[HasNEON, HasFullFP16]>;
// VMINNM
@@ -6318,6 +6384,9 @@ def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
let Inst{19} = lane{0};
}
+def : Pat<(v4f16 (NEONvduplane (v4f16 DPR:$Vm), imm:$lane)),
+ (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
(VDUPLN32d DPR:$Vm, imm:$lane)>;
@@ -6332,6 +6401,10 @@ def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
(v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v8f16 (NEONvduplane (v8f16 QPR:$src), imm:$lane)),
+ (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
(v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
@@ -6341,12 +6414,18 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f16 (NEONvdup HPR:$src)),
+ (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
+ HPR:$src, ssub_0), (i32 0)))>;
def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
(v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$src, ssub_0), (i32 0)))>;
def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
(v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$src, ssub_0), (i32 0)))>;
+def : Pat<(v8f16 (NEONvdup HPR:$src)),
+ (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
+ HPR:$src, ssub_0), (i32 0)))>;
// VMOVN : Vector Narrowing Move
defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
@@ -6558,6 +6637,8 @@ def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+def : Pat<(v8f16 (NEONvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
+def : Pat<(v4f16 (NEONvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
// VREV32 : Vector Reverse elements within 32-bit words
@@ -6647,13 +6728,14 @@ def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{10-9} = index{1-0};
let Inst{8} = 0b0;
}
+def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
+ (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{10} = index{0};
let Inst{9-8} = 0b00;
}
-def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
- (v2f32 DPR:$Vm),
- (i32 imm:$index))),
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
@@ -6663,6 +6745,9 @@ def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
+def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
+ (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
@@ -6671,9 +6756,7 @@ def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
let Inst{11} = index{0};
let Inst{10-8} = 0b000;
}
-def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
- (v4f32 QPR:$Vm),
- (i32 imm:$index))),
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
(VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
// VTRN : Vector Transpose
@@ -7001,19 +7084,19 @@ def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
-def : N3VSPatFP16<fmaxnan, VMAXhd>, Requires<[HasFullFP16]>;
-def : N3VSPatFP16<fminnan, VMINhd>, Requires<[HasFullFP16]>;
-def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
-def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
+def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
+def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
+def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
+def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index 88aab47a79bf..b20b34eaa6a9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -781,7 +781,7 @@ defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
// These require base address to be written back or one of the loaded regs.
let hasSideEffects = 0 in {
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
bits<3> Rn;
@@ -826,7 +826,8 @@ def : InstAlias<"ldm${p} $Rn!, $regs",
(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs), 0>,
Requires<[IsThumb, IsThumb1Only]>;
-let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1,
+ variadicOpsAreDefs = 1 in
def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iPop,
"pop${p}\t$regs", []>,
@@ -1343,8 +1344,20 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
tGPR:$Rm))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
+
+ def tRSBS : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMsubc 0, tGPR:$Rn))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
}
+
+def : T1Pat<(ARMsubs tGPR:$Rn, tGPR:$Rm), (tSUBSrr $Rn, $Rm)>;
+def : T1Pat<(ARMsubs tGPR:$Rn, imm0_7:$imm3), (tSUBSi3 $Rn, imm0_7:$imm3)>;
+def : T1Pat<(ARMsubs tGPR:$Rn, imm0_255:$imm8), (tSUBSi8 $Rn, imm0_255:$imm8)>;
+
+
// Sign-extend byte
def tSXTB : // A8.6.222
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1380,6 +1393,9 @@ def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8",
let Inst{7-0} = imm8;
}
+def : Pat<(debugtrap), (tBKPT 0)>, Requires<[IsThumb, HasV5T]>;
+def : Pat<(debugtrap), (tUDF 254)>, Requires<[IsThumb, NoV5T]>;
+
def t__brkdiv0 : TI<(outs), (ins), IIC_Br, "__brkdiv0",
[(int_arm_undefined 249)]>, Encoding16,
Requires<[IsThumb, IsWindows]> {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index f67075fbf9fd..7a6673b49d57 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1775,7 +1775,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
let hasSideEffects = 0 in {
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
multiclass thumb2_st_mult<string asm, InstrItinClass itin,
@@ -1997,6 +1997,10 @@ def : Thumb2DSPPat<(int_arm_sxtb16 rGPR:$Rn),
(t2SXTB16 rGPR:$Rn, 0)>;
def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, rGPR:$Rm),
(t2SXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_sxtb16 (rotr rGPR:$Rn, rot_imm:$rot)),
+ (t2SXTB16 rGPR:$Rn, rot_imm:$rot)>;
+def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, (rotr rGPR:$Rm, rot_imm:$rot)),
+ (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
// A simple right-shift can also be used in most cases (the exception is the
@@ -2032,6 +2036,8 @@ def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF),
def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm),
(t2UXTB16 rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_uxtb16 (rotr rGPR:$Rn, rot_imm:$rot)),
+ (t2UXTB16 rGPR:$Rn, rot_imm:$rot)>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
@@ -2062,6 +2068,8 @@ def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm),
(t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, (rotr rGPR:$Rm, rot_imm:$rot)),
+ (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
}
@@ -2086,6 +2094,12 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>;
defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMaddc, 1>;
defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMsubc>;
+def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_imm:$imm),
+ (t2SUBSri $Rn, t2_so_imm:$imm)>;
+def : T2Pat<(ARMsubs GPRnopc:$Rn, rGPR:$Rm), (t2SUBSrr $Rn, $Rm)>;
+def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
+ (t2SUBSrs $Rn, t2_so_reg:$ShiftedRm)>;
+
let hasPostISelHook = 1 in {
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;
@@ -2718,28 +2732,25 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
}
def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb",
- [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
- (sext_inreg rGPR:$Rm, i16)))]>;
+ [(set rGPR:$Rd, (bb_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt",
- [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
- (sra rGPR:$Rm, (i32 16))))]>;
+ [(set rGPR:$Rd, (bt_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
- [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
- (sext_inreg rGPR:$Rm, i16)))]>;
+ [(set rGPR:$Rd, (tb_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
- [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
- (sra rGPR:$Rm, (i32 16))))]>;
+ [(set rGPR:$Rd, (tt_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb",
[(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt",
[(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>;
-def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
- (t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
-def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))),
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_bottom_16 rGPR:$Rm)),
+ (t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_top_16 rGPR:$Rm)),
(t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
-def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
+def : Thumb2DSPPat<(mul (sext_top_16 rGPR:$Rn), sext_16_node:$Rm),
(t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+
def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm),
(t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm),
@@ -2767,18 +2778,13 @@ class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
}
def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb",
- [(set rGPR:$Rd, (add rGPR:$Ra,
- (mul (sext_inreg rGPR:$Rn, i16),
- (sext_inreg rGPR:$Rm, i16))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (bb_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16),
- (sra rGPR:$Rm, (i32 16)))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (bt_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
- (sext_inreg rGPR:$Rm, i16))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (tb_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
- (sra rGPR:$Rm, (i32 16)))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (tt_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb",
[(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
@@ -2786,11 +2792,14 @@ def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
(t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-def : Thumb2DSPMulPat<(add rGPR:$Ra,
- (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn,
+ (sext_bottom_16 rGPR:$Rm))),
+ (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn,
+ (sext_top_16 rGPR:$Rm))),
(t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-def : Thumb2DSPMulPat<(add rGPR:$Ra,
- (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sext_top_16 rGPR:$Rn),
+ sext_16_node:$Rm)),
(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc),
@@ -3223,6 +3232,14 @@ def t2TSB : T2I<(outs), (ins tsb_opt:$opt), NoItinerary,
}
}
+// Armv8.5-A speculation barrier
+def t2SB : Thumb2XI<(outs), (ins), AddrModeNone, 4, NoItinerary, "sb", "", []>,
+ Requires<[IsThumb2, HasSB]>, Sched<[]> {
+ let Inst{31-0} = 0xf3bf8f70;
+ let Unpredictable = 0x000f2f0f;
+ let hasSideEffects = 1;
+}
+
class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
@@ -4429,13 +4446,13 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
(t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
-let AddedComplexity = 8 in {
- def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
- def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
- def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
- def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
- def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
- def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
+let AddedComplexity = 8, Predicates = [IsThumb, HasAcquireRelease, HasV7Clrex] in {
+ def : Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
+ def : Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
+ def : Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
+ def : Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
+ def : Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
+ def : Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
}
@@ -4538,6 +4555,12 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+
+// Non-predicable aliases of a predicable DSB: the predicate is (14, 0) where
+// 14 = AL (always execute) and 0 = "instruction doesn't read the CPSR".
+def : InstAlias<"ssbb", (t2DSB 0x0, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
+def : InstAlias<"pssbb", (t2DSB 0x4, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
+
// Armv8-R 'Data Full Barrier'
def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index 2f14b78c91fd..b58730c452f7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -725,9 +725,11 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
}
def : FullFP16Pat<(f64 (fpextend HPR:$Sm)),
- (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+ (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
- (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+ (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
@@ -746,9 +748,11 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
}
def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
- (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>;
+ (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
- (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
+ (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -1810,7 +1814,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
@@ -1819,7 +1823,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -1832,17 +1836,17 @@ def VMLAH : AHbI<0b11100, 0b00, 0, 0,
[(set HPR:$Sd, (fadd_mlx (fmul_su HPR:$Sn, HPR:$Sm),
HPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,UseFPVMLx]>;
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
def : Pat<(fadd_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
(VMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx]>;
def VMLSD : ADbI<0b11100, 0b00, 1, 0,
@@ -1851,7 +1855,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
@@ -1860,7 +1864,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -1873,17 +1877,17 @@ def VMLSH : AHbI<0b11100, 0b00, 1, 0,
[(set HPR:$Sd, (fadd_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
HPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,UseFPVMLx]>;
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
def : Pat<(fsub_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
(VMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1891,7 +1895,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
@@ -1900,7 +1904,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -1913,29 +1917,29 @@ def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
[(set HPR:$Sd, (fsub_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
HPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,UseFPVMLx]>;
// (-(a * b) - dst) -> -(dst + (a * b))
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fneg (fmul_su HPR:$a, HPR:$b)), HPR:$dstin),
(VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
// (-dst - (a * b)) -> -(dst + (a * b))
def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fneg HPR:$dstin), (fmul_su HPR:$a, HPR:$b)),
(VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1943,7 +1947,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
@@ -1951,7 +1955,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -1963,17 +1967,17 @@ def VNMLSH : AHbI<0b11100, 0b01, 0, 0,
IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm",
[(set HPR:$Sd, (fsub_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,UseFPVMLx]>;
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fmul_su HPR:$a, HPR:$b), HPR:$dstin),
(VNMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
//===----------------------------------------------------------------------===//
// Fused FP Multiply-Accumulate Operations.
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 6692a4d41420..293e734c97cd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -76,6 +76,42 @@ private:
const ARMRegisterBankInfo &RBI;
const ARMSubtarget &STI;
+ // Store the opcodes that we might need, so we don't have to check what kind
+ // of subtarget (ARM vs Thumb) we have all the time.
+ struct OpcodeCache {
+ unsigned ZEXT16;
+ unsigned SEXT16;
+
+ unsigned ZEXT8;
+ unsigned SEXT8;
+
+ // Used for implementing ZEXT/SEXT from i1
+ unsigned AND;
+ unsigned RSB;
+
+ unsigned STORE32;
+ unsigned LOAD32;
+
+ unsigned STORE16;
+ unsigned LOAD16;
+
+ unsigned STORE8;
+ unsigned LOAD8;
+
+ OpcodeCache(const ARMSubtarget &STI);
+ } const Opcodes;
+
+ // Select the opcode for simple extensions (that translate to a single SXT/UXT
+ // instruction). Extension operations more complicated than that should not
+ // invoke this. Returns the original opcode if it doesn't know how to select a
+ // better one.
+ unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) const;
+
+ // Select the opcode for simple loads and stores. Returns the original opcode
+ // if it doesn't know how to select a better one.
+ unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
+ unsigned Size) const;
+
#define GET_GLOBALISEL_PREDICATES_DECL
#include "ARMGenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
@@ -107,7 +143,7 @@ ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM,
const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI)
: InstructionSelector(), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI),
+ TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), Opcodes(STI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "ARMGenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
@@ -225,41 +261,63 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
return true;
}
-/// Select the opcode for simple extensions (that translate to a single SXT/UXT
-/// instruction). Extension operations more complicated than that should not
-/// invoke this. Returns the original opcode if it doesn't know how to select a
-/// better one.
-static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) {
+ARMInstructionSelector::OpcodeCache::OpcodeCache(const ARMSubtarget &STI) {
+ bool isThumb = STI.isThumb();
+
+ using namespace TargetOpcode;
+
+#define STORE_OPCODE(VAR, OPC) VAR = isThumb ? ARM::t2##OPC : ARM::OPC
+ STORE_OPCODE(SEXT16, SXTH);
+ STORE_OPCODE(ZEXT16, UXTH);
+
+ STORE_OPCODE(SEXT8, SXTB);
+ STORE_OPCODE(ZEXT8, UXTB);
+
+ STORE_OPCODE(AND, ANDri);
+ STORE_OPCODE(RSB, RSBri);
+
+ STORE_OPCODE(STORE32, STRi12);
+ STORE_OPCODE(LOAD32, LDRi12);
+
+ // LDRH/STRH are special...
+ STORE16 = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ LOAD16 = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+
+ STORE_OPCODE(STORE8, STRBi12);
+ STORE_OPCODE(LOAD8, LDRBi12);
+#undef MAP_OPCODE
+}
+
+unsigned ARMInstructionSelector::selectSimpleExtOpc(unsigned Opc,
+ unsigned Size) const {
using namespace TargetOpcode;
if (Size != 8 && Size != 16)
return Opc;
if (Opc == G_SEXT)
- return Size == 8 ? ARM::SXTB : ARM::SXTH;
+ return Size == 8 ? Opcodes.SEXT8 : Opcodes.SEXT16;
if (Opc == G_ZEXT)
- return Size == 8 ? ARM::UXTB : ARM::UXTH;
+ return Size == 8 ? Opcodes.ZEXT8 : Opcodes.ZEXT16;
return Opc;
}
-/// Select the opcode for simple loads and stores. For types smaller than 32
-/// bits, the value will be zero extended. Returns the original opcode if it
-/// doesn't know how to select a better one.
-static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
- unsigned Size) {
+unsigned ARMInstructionSelector::selectLoadStoreOpCode(unsigned Opc,
+ unsigned RegBank,
+ unsigned Size) const {
bool isStore = Opc == TargetOpcode::G_STORE;
if (RegBank == ARM::GPRRegBankID) {
switch (Size) {
case 1:
case 8:
- return isStore ? ARM::STRBi12 : ARM::LDRBi12;
+ return isStore ? Opcodes.STORE8 : Opcodes.LOAD8;
case 16:
- return isStore ? ARM::STRH : ARM::LDRH;
+ return isStore ? Opcodes.STORE16 : Opcodes.LOAD16;
case 32:
- return isStore ? ARM::STRi12 : ARM::LDRi12;
+ return isStore ? Opcodes.STORE32 : Opcodes.LOAD32;
default:
return Opc;
}
@@ -702,7 +760,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
switch (SrcSize) {
case 1: {
// ZExt boils down to & 0x1; for SExt we also subtract that from 0
- I.setDesc(TII.get(ARM::ANDri));
+ I.setDesc(TII.get(Opcodes.AND));
MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());
if (isSExt) {
@@ -714,7 +772,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
auto InsertBefore = std::next(I.getIterator());
auto SubI =
- BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri))
+ BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.RSB))
.addDef(SExtResult)
.addUse(AndResult)
.addImm(0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 891418306903..4a0c24d58474 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -75,13 +75,48 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
- getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+ if (ST.isThumb1Only()) {
+ // Thumb1 is not supported yet.
+ computeTables();
+ verify(*ST.getInstrInfo());
+ return;
+ }
+
+ getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
+ .legalForCartesianProduct({s32}, {s1, s8, s16});
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32})
.minScalar(0, s32);
+ getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
+ getActionDefinitionsBuilder(G_PTRTOINT).legalFor({{s32, p0}});
+
+ getActionDefinitionsBuilder(G_CONSTANT)
+ .legalFor({s32, p0})
+ .clampScalar(0, s32, s32);
+
+ // We're keeping these builders around because we'll want to add support for
+ // floating point to them.
+ auto &LoadStoreBuilder =
+ getActionDefinitionsBuilder({G_LOAD, G_STORE})
+ .legalForTypesWithMemSize({
+ {s1, p0, 8},
+ {s8, p0, 8},
+ {s16, p0, 16},
+ {s32, p0, 32},
+ {p0, p0, 32}});
+
+ if (ST.isThumb()) {
+ // FIXME: merge with the code for non-Thumb.
+ computeTables();
+ verify(*ST.getInstrInfo());
+ return;
+ }
+
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
+ getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+
if (ST.hasDivideInARMMode())
getActionDefinitionsBuilder({G_SDIV, G_UDIV})
.legalFor({s32})
@@ -101,14 +136,24 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Libcall);
}
- getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
- .legalForCartesianProduct({s32}, {s1, s8, s16});
-
- getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
- getActionDefinitionsBuilder(G_PTRTOINT).legalFor({{s32, p0}});
-
getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}).legalFor({s32});
+ if (ST.hasV5TOps()) {
+ getActionDefinitionsBuilder(G_CTLZ)
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
+ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+ .lowerFor({s32})
+ .clampScalar(0, s32, s32);
+ } else {
+ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+ .libcallFor({s32})
+ .clampScalar(0, s32, s32);
+ getActionDefinitionsBuilder(G_CTLZ)
+ .lowerFor({s32})
+ .clampScalar(0, s32, s32);
+ }
+
getActionDefinitionsBuilder(G_GEP).legalFor({{p0, s32}});
getActionDefinitionsBuilder(G_SELECT).legalForCartesianProduct({s32, p0},
@@ -116,20 +161,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
- getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({s32, p0})
- .clampScalar(0, s32, s32);
-
getActionDefinitionsBuilder(G_ICMP)
.legalForCartesianProduct({s1}, {s32, p0})
.minScalar(1, s32);
// We're keeping these builders around because we'll want to add support for
// floating point to them.
- auto &LoadStoreBuilder =
- getActionDefinitionsBuilder({G_LOAD, G_STORE})
- .legalForCartesianProduct({s1, s8, s16, s32, p0}, {p0});
-
auto &PhiBuilder =
getActionDefinitionsBuilder(G_PHI).legalFor({s32, p0}).minScalar(0, s32);
@@ -302,7 +339,8 @@ ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate,
bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
using namespace TargetOpcode;
MIRBuilder.setInstr(MI);
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
index 78ab9412c04b..527bf87f1093 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/IR/Instructions.h"
@@ -29,7 +30,8 @@ public:
ARMLegalizerInfo(const ARMSubtarget &ST);
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const override;
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const override;
private:
void setFCmpLibcallsGNU();
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index db5f28480e90..6da7430a8e51 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1027,6 +1027,18 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
CanMergeToLSMulti = CanMergeToLSDouble = false;
+ // vldm / vstm limit are 32 for S variants, 16 for D variants.
+ unsigned Limit;
+ switch (Opcode) {
+ default:
+ Limit = UINT_MAX;
+ break;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ Limit = 16;
+ break;
+ }
+
// Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
@@ -1036,6 +1048,8 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
unsigned Reg = MO.getReg();
if (Reg == ARM::SP || Reg == ARM::PC)
break;
+ if (Count == Limit)
+ break;
// See if the current load/store may be part of a multi load/store.
unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
@@ -1303,7 +1317,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
MBB.erase(MBBI);
return true;
@@ -1527,7 +1541,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
// Transfer implicit operands.
for (const MachineOperand &MO : MI.implicit_operands())
MIB.add(MO);
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.setMemRefs(MI.memoperands());
MBB.erase(MBBI);
return true;
@@ -1834,7 +1848,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
return M0->InsertPos < M1->InsertPos;
};
- llvm::sort(Candidates.begin(), Candidates.end(), LessThan);
+ llvm::sort(Candidates, LessThan);
// Go through list of candidates and merge.
bool Changed = false;
@@ -2172,13 +2186,12 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
bool RetVal = false;
// Sort by offset (in reverse order).
- llvm::sort(Ops.begin(), Ops.end(),
- [](const MachineInstr *LHS, const MachineInstr *RHS) {
- int LOffset = getMemoryOpOffset(*LHS);
- int ROffset = getMemoryOpOffset(*RHS);
- assert(LHS == RHS || LOffset != ROffset);
- return LOffset > ROffset;
- });
+ llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
+ int LOffset = getMemoryOpOffset(*LHS);
+ int ROffset = getMemoryOpOffset(*RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ });
// The loads / stores of the same base are in order. Scan them from first to
// last and check for the following:
@@ -2290,7 +2303,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
- MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
+ MIB.cloneMergedMemRefs({Op0, Op1});
LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumLDRDFormed;
} else {
@@ -2304,7 +2317,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
- MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
+ MIB.cloneMergedMemRefs({Op0, Op1});
LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumSTRDFormed;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
index d11fe9d5c502..df1da9d8e474 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -23,20 +23,13 @@ namespace llvm {
static bool isAESPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- switch(SecondOpcode) {
+ switch(SecondMI.getOpcode()) {
// AES encode.
case ARM::AESMC :
- return FirstOpcode == ARM::AESE ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ return FirstMI == nullptr || FirstMI->getOpcode() == ARM::AESE;
// AES decode.
case ARM::AESIMC:
- return FirstOpcode == ARM::AESD ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ return FirstMI == nullptr || FirstMI->getOpcode() == ARM::AESD;
}
return false;
@@ -46,15 +39,8 @@ static bool isAESPair(const MachineInstr *FirstMI,
static bool isLiteralsPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- // 32 bit immediate.
- if ((FirstOpcode == ARM::INSTRUCTION_LIST_END ||
- FirstOpcode == ARM::MOVi16) &&
- SecondOpcode == ARM::MOVTi16)
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == ARM::MOVi16) &&
+ SecondMI.getOpcode() == ARM::MOVTi16)
return true;
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
index 1e4fc6687eae..b3abd7b593a1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACROFUSION_H
+#define LLVM_LIB_TARGET_ARM_ARMMACROFUSION_H
+
#include "llvm/CodeGen/MachineScheduler.h"
namespace llvm {
@@ -22,3 +25,5 @@ namespace llvm {
std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation();
} // llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index 9d5478b76c18..fc3258914f92 100644
--- a/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -42,6 +42,10 @@ using namespace PatternMatch;
STATISTIC(NumSMLAD , "Number of smlad instructions generated");
+static cl::opt<bool>
+DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
+ cl::desc("Disable the ARM Parallel DSP pass"));
+
namespace {
struct OpChain;
struct BinOpChain;
@@ -67,7 +71,7 @@ namespace {
virtual ~OpChain() = default;
void SetMemoryLocations() {
- const auto Size = MemoryLocation::UnknownSize;
+ const auto Size = LocationSize::unknown();
for (auto *V : AllValues) {
if (auto *I = dyn_cast<Instruction>(V)) {
if (I->mayWriteToMemory())
@@ -88,12 +92,15 @@ namespace {
struct BinOpChain : public OpChain {
ValueList LHS; // List of all (narrow) left hand operands.
ValueList RHS; // List of all (narrow) right hand operands.
+ bool Exchange = false;
BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) :
OpChain(I, lhs), LHS(lhs), RHS(rhs) {
for (auto *V : RHS)
AllValues.push_back(V);
}
+
+ bool AreSymmetrical(BinOpChain *Other);
};
struct Reduction {
@@ -101,9 +108,9 @@ namespace {
// pattern matching.
Instruction *AccIntAdd; // The accumulating integer add statement,
// i.e, the reduction statement.
-
OpChainList MACCandidates; // The MAC candidates associated with
// this reduction statement.
+ PMACPairList PMACPairs;
Reduction (PHINode *P, Instruction *Acc) : Phi(P), AccIntAdd(Acc) { };
};
@@ -116,12 +123,16 @@ namespace {
Loop *L;
const DataLayout *DL;
Module *M;
+ std::map<LoadInst*, LoadInst*> LoadPairs;
+ std::map<LoadInst*, SmallVector<LoadInst*, 4>> SequentialLoads;
- bool InsertParallelMACs(Reduction &Reduction, PMACPairList &PMACPairs);
+ bool RecordSequentialLoads(BasicBlock *Header);
+ bool InsertParallelMACs(Reduction &Reduction);
bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem);
- PMACPairList CreateParallelMACPairs(OpChainList &Candidates);
+ void CreateParallelMACPairs(Reduction &R);
Instruction *CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
- Instruction *Acc, Instruction *InsertAfter);
+ Instruction *Acc, bool Exchange,
+ Instruction *InsertAfter);
/// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate
/// Dual performs two signed 16x16-bit multiplications. It adds the
@@ -149,6 +160,8 @@ namespace {
}
bool runOnLoop(Loop *TheLoop, LPPassManager &) override {
+ if (DisableParallelDSP)
+ return false;
L = TheLoop;
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -192,7 +205,14 @@ namespace {
LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI);
bool Changes = false;
- LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n\n");
+ LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n");
+ LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n");
+
+ if (!RecordSequentialLoads(Header)) {
+ LLVM_DEBUG(dbgs() << " - No sequential loads found.\n");
+ return false;
+ }
+
Changes = MatchSMLAD(F);
return Changes;
}
@@ -245,57 +265,14 @@ static bool IsNarrowSequence(Value *V, ValueList &VL) {
return false;
}
-// Element-by-element comparison of Value lists returning true if they are
-// instructions with the same opcode or constants with the same value.
-static bool AreSymmetrical(const ValueList &VL0,
- const ValueList &VL1) {
- if (VL0.size() != VL1.size()) {
- LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: "
- << VL0.size() << " != " << VL1.size() << "\n");
- return false;
- }
-
- const unsigned Pairs = VL0.size();
- LLVM_DEBUG(dbgs() << "Number of operand pairs: " << Pairs << "\n");
-
- for (unsigned i = 0; i < Pairs; ++i) {
- const Value *V0 = VL0[i];
- const Value *V1 = VL1[i];
- const auto *Inst0 = dyn_cast<Instruction>(V0);
- const auto *Inst1 = dyn_cast<Instruction>(V1);
-
- LLVM_DEBUG(dbgs() << "Pair " << i << ":\n";
- dbgs() << "mul1: "; V0->dump();
- dbgs() << "mul2: "; V1->dump());
-
- if (!Inst0 || !Inst1)
- return false;
-
- if (Inst0->isSameOperationAs(Inst1)) {
- LLVM_DEBUG(dbgs() << "OK: same operation found!\n");
- continue;
- }
-
- const APInt *C0, *C1;
- if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "OK: found symmetrical operand lists.\n");
- return true;
-}
-
template<typename MemInst>
static bool AreSequentialAccesses(MemInst *MemOp0, MemInst *MemOp1,
- MemInstList &VecMem, const DataLayout &DL,
- ScalarEvolution &SE) {
+ const DataLayout &DL, ScalarEvolution &SE) {
if (!MemOp0->isSimple() || !MemOp1->isSimple()) {
LLVM_DEBUG(dbgs() << "No, not touching volatile access\n");
return false;
}
if (isConsecutiveAccess(MemOp0, MemOp1, DL, SE)) {
- VecMem.push_back(MemOp0);
- VecMem.push_back(MemOp1);
LLVM_DEBUG(dbgs() << "OK: accesses are consecutive.\n");
return true;
}
@@ -318,82 +295,156 @@ bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1,
return false;
}
- return AreSequentialAccesses<LoadInst>(Ld0, Ld1, VecMem, *DL, *SE);
+ if (!LoadPairs.count(Ld0) || LoadPairs[Ld0] != Ld1)
+ return false;
+
+ VecMem.clear();
+ VecMem.push_back(Ld0);
+ VecMem.push_back(Ld1);
+ return true;
}
-PMACPairList
-ARMParallelDSP::CreateParallelMACPairs(OpChainList &Candidates) {
+/// Iterate through the block and record base, offset pairs of loads as well as
+/// maximal sequences of sequential loads.
+bool ARMParallelDSP::RecordSequentialLoads(BasicBlock *Header) {
+ SmallVector<LoadInst*, 8> Loads;
+ for (auto &I : *Header) {
+ auto *Ld = dyn_cast<LoadInst>(&I);
+ if (!Ld)
+ continue;
+ Loads.push_back(Ld);
+ }
+
+ std::map<LoadInst*, LoadInst*> BaseLoads;
+
+ for (auto *Ld0 : Loads) {
+ for (auto *Ld1 : Loads) {
+ if (Ld0 == Ld1)
+ continue;
+
+ if (AreSequentialAccesses<LoadInst>(Ld0, Ld1, *DL, *SE)) {
+ LoadPairs[Ld0] = Ld1;
+ if (BaseLoads.count(Ld0)) {
+ LoadInst *Base = BaseLoads[Ld0];
+ BaseLoads[Ld1] = Base;
+ SequentialLoads[Base].push_back(Ld1);
+ } else {
+ BaseLoads[Ld1] = Ld0;
+ SequentialLoads[Ld0].push_back(Ld1);
+ }
+ }
+ }
+ }
+ return LoadPairs.size() > 1;
+}
+
+void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
+ OpChainList &Candidates = R.MACCandidates;
+ PMACPairList &PMACPairs = R.PMACPairs;
const unsigned Elems = Candidates.size();
- PMACPairList PMACPairs;
if (Elems < 2)
- return PMACPairs;
+ return;
- // TODO: for now we simply try to match consecutive pairs i and i+1.
- // We can compare all elements, but then we need to compare and evaluate
- // different solutions.
- for(unsigned i=0; i<Elems-1; i+=2) {
- BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i].get());
- BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[i+1].get());
- const Instruction *Mul0 = PMul0->Root;
- const Instruction *Mul1 = PMul1->Root;
+ auto CanPair = [&](BinOpChain *PMul0, BinOpChain *PMul1) {
+ if (!PMul0->AreSymmetrical(PMul1))
+ return false;
+
+ // The first elements of each vector should be loads with sexts. If we
+ // find that its two pairs of consecutive loads, then these can be
+ // transformed into two wider loads and the users can be replaced with
+ // DSP intrinsics.
+ for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) {
+ auto *Ld0 = dyn_cast<LoadInst>(PMul0->LHS[x]);
+ auto *Ld1 = dyn_cast<LoadInst>(PMul1->LHS[x]);
+ auto *Ld2 = dyn_cast<LoadInst>(PMul0->RHS[x]);
+ auto *Ld3 = dyn_cast<LoadInst>(PMul1->RHS[x]);
+
+ if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
+ return false;
- if (Mul0 == Mul1)
+ LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n"
+ << "\t Ld0: " << *Ld0 << "\n"
+ << "\t Ld1: " << *Ld1 << "\n"
+ << "and operands " << x + 2 << ":\n"
+ << "\t Ld2: " << *Ld2 << "\n"
+ << "\t Ld3: " << *Ld3 << "\n");
+
+ if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
+ if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+ return true;
+ } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n");
+ PMul1->Exchange = true;
+ PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+ return true;
+ }
+ } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
+ AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n");
+ LLVM_DEBUG(dbgs() << " and swapping muls\n");
+ PMul0->Exchange = true;
+ // Only the second operand can be exchanged, so swap the muls.
+ PMACPairs.push_back(std::make_pair(PMul1, PMul0));
+ return true;
+ }
+ }
+ return false;
+ };
+
+ SmallPtrSet<const Instruction*, 4> Paired;
+ for (unsigned i = 0; i < Elems; ++i) {
+ BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i].get());
+ if (Paired.count(PMul0->Root))
continue;
- LLVM_DEBUG(dbgs() << "\nCheck parallel muls:\n";
- dbgs() << "- "; Mul0->dump();
- dbgs() << "- "; Mul1->dump());
+ for (unsigned j = 0; j < Elems; ++j) {
+ if (i == j)
+ continue;
- const ValueList &Mul0_LHS = PMul0->LHS;
- const ValueList &Mul0_RHS = PMul0->RHS;
- const ValueList &Mul1_LHS = PMul1->LHS;
- const ValueList &Mul1_RHS = PMul1->RHS;
+ BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[j].get());
+ if (Paired.count(PMul1->Root))
+ continue;
- if (!AreSymmetrical(Mul0_LHS, Mul1_LHS) ||
- !AreSymmetrical(Mul0_RHS, Mul1_RHS))
- continue;
+ const Instruction *Mul0 = PMul0->Root;
+ const Instruction *Mul1 = PMul1->Root;
+ if (Mul0 == Mul1)
+ continue;
- LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");
- // The first elements of each vector should be loads with sexts. If we find
- // that its two pairs of consecutive loads, then these can be transformed
- // into two wider loads and the users can be replaced with DSP
- // intrinsics.
- for (unsigned x = 0; x < Mul0_LHS.size(); x += 2) {
- auto *Ld0 = dyn_cast<LoadInst>(Mul0_LHS[x]);
- auto *Ld1 = dyn_cast<LoadInst>(Mul1_LHS[x]);
- auto *Ld2 = dyn_cast<LoadInst>(Mul0_RHS[x]);
- auto *Ld3 = dyn_cast<LoadInst>(Mul1_RHS[x]);
-
- LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n";
- dbgs() << "\t mul1: "; Mul0_LHS[x]->dump();
- dbgs() << "\t mul2: "; Mul1_LHS[x]->dump();
- dbgs() << "and operands " << x + 2 << ":\n";
- dbgs() << "\t mul1: "; Mul0_RHS[x]->dump();
- dbgs() << "\t mul2: "; Mul1_RHS[x]->dump());
-
- if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd) &&
- AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
- LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+ assert(PMul0 != PMul1 && "expected different chains");
+
+ LLVM_DEBUG(dbgs() << "\nCheck parallel muls:\n";
+ dbgs() << "- "; Mul0->dump();
+ dbgs() << "- "; Mul1->dump());
+
+ LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");
+ if (CanPair(PMul0, PMul1)) {
+ Paired.insert(Mul0);
+ Paired.insert(Mul1);
+ break;
}
}
}
- return PMACPairs;
}
-bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction,
- PMACPairList &PMACPairs) {
+bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction) {
Instruction *Acc = Reduction.Phi;
Instruction *InsertAfter = Reduction.AccIntAdd;
- for (auto &Pair : PMACPairs) {
+ for (auto &Pair : Reduction.PMACPairs) {
+ BinOpChain *PMul0 = Pair.first;
+ BinOpChain *PMul1 = Pair.second;
LLVM_DEBUG(dbgs() << "Found parallel MACs!!\n";
- dbgs() << "- "; Pair.first->Root->dump();
- dbgs() << "- "; Pair.second->Root->dump());
- auto *VecLd0 = cast<LoadInst>(Pair.first->VecLd[0]);
- auto *VecLd1 = cast<LoadInst>(Pair.second->VecLd[0]);
- Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, InsertAfter);
+ dbgs() << "- "; PMul0->Root->dump();
+ dbgs() << "- "; PMul1->Root->dump());
+
+ auto *VecLd0 = cast<LoadInst>(PMul0->VecLd[0]);
+ auto *VecLd1 = cast<LoadInst>(PMul1->VecLd[0]);
+ Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, PMul1->Exchange, InsertAfter);
InsertAfter = Acc;
}
@@ -420,7 +471,7 @@ static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
for (PHINode &Phi : Header->phis()) {
const auto *Ty = Phi.getType();
- if (!Ty->isIntegerTy(32))
+ if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
continue;
const bool IsReduction =
@@ -447,10 +498,11 @@ static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
}
static void AddMACCandidate(OpChainList &Candidates,
- const Instruction *Acc,
- Value *MulOp0, Value *MulOp1, int MulOpNum) {
- Instruction *Mul = dyn_cast<Instruction>(Acc->getOperand(MulOpNum));
+ Instruction *Mul,
+ Value *MulOp0, Value *MulOp1) {
LLVM_DEBUG(dbgs() << "OK, found acc mul:\t"; Mul->dump());
+ assert(Mul->getOpcode() == Instruction::Mul &&
+ "expected mul instruction");
ValueList LHS;
ValueList RHS;
if (IsNarrowSequence<16>(MulOp0, LHS) &&
@@ -462,31 +514,38 @@ static void AddMACCandidate(OpChainList &Candidates,
static void MatchParallelMACSequences(Reduction &R,
OpChainList &Candidates) {
- const Instruction *Acc = R.AccIntAdd;
- Value *A, *MulOp0, *MulOp1;
- LLVM_DEBUG(dbgs() << "\n- Analysing:\t"; Acc->dump());
-
- // Pattern 1: the accumulator is the RHS of the mul.
- while(match(Acc, m_Add(m_Mul(m_Value(MulOp0), m_Value(MulOp1)),
- m_Value(A)))){
- AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 0);
- Acc = dyn_cast<Instruction>(A);
- }
- // Pattern 2: the accumulator is the LHS of the mul.
- while(match(Acc, m_Add(m_Value(A),
- m_Mul(m_Value(MulOp0), m_Value(MulOp1))))) {
- AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 1);
- Acc = dyn_cast<Instruction>(A);
- }
+ Instruction *Acc = R.AccIntAdd;
+ LLVM_DEBUG(dbgs() << "\n- Analysing:\t" << *Acc);
- // The last mul in the chain has a slightly different pattern:
- // the mul is the first operand
- if (match(Acc, m_Add(m_Mul(m_Value(MulOp0), m_Value(MulOp1)), m_Value(A))))
- AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 0);
+ // Returns false to signal the search should be stopped.
+ std::function<bool(Value*)> Match =
+ [&Candidates, &Match](Value *V) -> bool {
- // Because we start at the bottom of the chain, and we work our way up,
- // the muls are added in reverse program order to the list.
- std::reverse(Candidates.begin(), Candidates.end());
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ if (Match(I->getOperand(0)) || (Match(I->getOperand(1))))
+ return true;
+ break;
+ case Instruction::Mul: {
+ Value *MulOp0 = I->getOperand(0);
+ Value *MulOp1 = I->getOperand(1);
+ if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1))
+ AddMACCandidate(Candidates, I, MulOp0, MulOp1);
+ return false;
+ }
+ case Instruction::SExt:
+ return Match(I->getOperand(0));
+ }
+ return false;
+ };
+
+ while (Match (Acc));
+ LLVM_DEBUG(dbgs() << "Finished matching MAC sequences, found "
+ << Candidates.size() << " candidates.\n");
}
// Collects all instructions that are not part of the MAC chains, which is the
@@ -621,45 +680,100 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) {
for (auto &R : Reductions) {
if (AreAliased(AA, Reads, Writes, R.MACCandidates))
return false;
- PMACPairList PMACPairs = CreateParallelMACPairs(R.MACCandidates);
- Changed |= InsertParallelMACs(R, PMACPairs);
+ CreateParallelMACPairs(R);
+ Changed |= InsertParallelMACs(R);
}
LLVM_DEBUG(if (Changed) dbgs() << "Header block:\n"; Header->dump(););
return Changed;
}
-static void CreateLoadIns(IRBuilder<NoFolder> &IRB, Instruction *Acc,
- LoadInst **VecLd) {
- const Type *AccTy = Acc->getType();
- const unsigned AddrSpace = (*VecLd)->getPointerAddressSpace();
+static LoadInst *CreateLoadIns(IRBuilder<NoFolder> &IRB, LoadInst &BaseLoad,
+ const Type *LoadTy) {
+ const unsigned AddrSpace = BaseLoad.getPointerAddressSpace();
- Value *VecPtr = IRB.CreateBitCast((*VecLd)->getPointerOperand(),
- AccTy->getPointerTo(AddrSpace));
- *VecLd = IRB.CreateAlignedLoad(VecPtr, (*VecLd)->getAlignment());
+ Value *VecPtr = IRB.CreateBitCast(BaseLoad.getPointerOperand(),
+ LoadTy->getPointerTo(AddrSpace));
+ return IRB.CreateAlignedLoad(VecPtr, BaseLoad.getAlignment());
}
Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
- Instruction *Acc,
+ Instruction *Acc, bool Exchange,
Instruction *InsertAfter) {
- LLVM_DEBUG(dbgs() << "Create SMLAD intrinsic using:\n";
- dbgs() << "- "; VecLd0->dump();
- dbgs() << "- "; VecLd1->dump();
- dbgs() << "- "; Acc->dump());
+ LLVM_DEBUG(dbgs() << "Create SMLAD intrinsic using:\n"
+ << "- " << *VecLd0 << "\n"
+ << "- " << *VecLd1 << "\n"
+ << "- " << *Acc << "\n"
+ << "Exchange: " << Exchange << "\n");
IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
++BasicBlock::iterator(InsertAfter));
// Replace the reduction chain with an intrinsic call
- CreateLoadIns(Builder, Acc, &VecLd0);
- CreateLoadIns(Builder, Acc, &VecLd1);
- Value* Args[] = { VecLd0, VecLd1, Acc };
- Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad);
+ const Type *Ty = IntegerType::get(M->getContext(), 32);
+ LoadInst *NewLd0 = CreateLoadIns(Builder, VecLd0[0], Ty);
+ LoadInst *NewLd1 = CreateLoadIns(Builder, VecLd1[0], Ty);
+ Value* Args[] = { NewLd0, NewLd1, Acc };
+ Function *SMLAD = nullptr;
+ if (Exchange)
+ SMLAD = Acc->getType()->isIntegerTy(32) ?
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smladx) :
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlaldx);
+ else
+ SMLAD = Acc->getType()->isIntegerTy(32) ?
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlad) :
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlald);
CallInst *Call = Builder.CreateCall(SMLAD, Args);
NumSMLAD++;
return Call;
}
+// Compare the value lists in Other to this chain.
+bool BinOpChain::AreSymmetrical(BinOpChain *Other) {
+ // Element-by-element comparison of Value lists returning true if they are
+ // instructions with the same opcode or constants with the same value.
+ auto CompareValueList = [](const ValueList &VL0,
+ const ValueList &VL1) {
+ if (VL0.size() != VL1.size()) {
+ LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: "
+ << VL0.size() << " != " << VL1.size() << "\n");
+ return false;
+ }
+
+ const unsigned Pairs = VL0.size();
+ LLVM_DEBUG(dbgs() << "Number of operand pairs: " << Pairs << "\n");
+
+ for (unsigned i = 0; i < Pairs; ++i) {
+ const Value *V0 = VL0[i];
+ const Value *V1 = VL1[i];
+ const auto *Inst0 = dyn_cast<Instruction>(V0);
+ const auto *Inst1 = dyn_cast<Instruction>(V1);
+
+ LLVM_DEBUG(dbgs() << "Pair " << i << ":\n";
+ dbgs() << "mul1: "; V0->dump();
+ dbgs() << "mul2: "; V1->dump());
+
+ if (!Inst0 || !Inst1)
+ return false;
+
+ if (Inst0->isSameOperationAs(Inst1)) {
+ LLVM_DEBUG(dbgs() << "OK: same operation found!\n");
+ continue;
+ }
+
+ const APInt *C0, *C1;
+ if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "OK: found symmetrical operand lists.\n");
+ return true;
+ };
+
+ return CompareValueList(LHS, Other->LHS) &&
+ CompareValueList(RHS, Other->RHS);
+}
+
Pass *llvm::createARMParallelDSPPass() {
return new ARMParallelDSP();
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 0e16d6bcfe2b..4f28f2dafc70 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -234,6 +234,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_GEP:
case G_INTTOPTR:
case G_PTRTOINT:
+ case G_CTLZ:
// FIXME: We're abusing the fact that everything lives in a GPR for now; in
// the real world we would use different mappings.
OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index f42cbbda1b71..b1d0761e3231 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -188,8 +188,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
assert(hasV6T2Ops() || !hasThumb2());
// Execute only support requires movt support
- if (genExecuteOnly())
- assert(hasV8MBaselineOps() && !NoMovt && "Cannot generate execute-only code for this target");
+ if (genExecuteOnly()) {
+ NoMovt = false;
+ assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target");
+ }
// Keep a pointer to static instruction cost data for the specified CPU.
SchedModel = getSchedModelForCPU(CPUString);
@@ -287,7 +289,13 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexR7:
case CortexM3:
case CortexR52:
- case ExynosM1:
+ break;
+ case Exynos:
+ LdStMultipleTiming = SingleIssuePlusExtras;
+ MaxInterleaveFactor = 4;
+ if (!isThumb())
+ PrefLoopAlignment = 3;
+ break;
case Kryo:
break;
case Krait:
@@ -370,7 +378,8 @@ bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
// For general targets, the prologue can grow when VFPs are allocated with
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
// format which it's more important to get right.
- return isTargetWatchABI() || (isSwift() && !MF.getFunction().optForMinSize());
+ return isTargetWatchABI() ||
+ (useWideStrideVFP() && !MF.getFunction().optForMinSize());
}
bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 74aee9a8ed38..11841b4467a2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -68,7 +68,7 @@ protected:
CortexR5,
CortexR52,
CortexR7,
- ExynosM1,
+ Exynos,
Krait,
Kryo,
Swift
@@ -106,6 +106,7 @@ protected:
ARMv82a,
ARMv83a,
ARMv84a,
+ ARMv85a,
ARMv8a,
ARMv8mBaseline,
ARMv8mMainline,
@@ -153,6 +154,7 @@ protected:
bool HasV8_2aOps = false;
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
+ bool HasV8_5aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
@@ -227,6 +229,9 @@ protected:
/// HasFullFP16 - True if subtarget supports half-precision FP operations
bool HasFullFP16 = false;
+ /// HasFP16FML - True if subtarget supports half-precision FP fml operations
+ bool HasFP16FML = false;
+
/// HasD16 - True if subtarget is limited to 16 double precision
/// FP registers for VFPv3.
bool HasD16 = false;
@@ -353,6 +358,9 @@ protected:
/// If true, loading into a D subregister will be penalized.
bool SlowLoadDSubregister = false;
+ /// If true, use a wider stride when allocating VFP registers.
+ bool UseWideStrideVFP = false;
+
/// If true, the AGU and NEON/FPU units are multiplexed.
bool HasMuxedUnits = false;
@@ -408,6 +416,9 @@ protected:
/// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
bool UseSjLjEH = false;
+ /// Has speculation barrier
+ bool HasSB = false;
+
/// Implicitly convert an instruction to a different one if its immediates
/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
bool NegativeImmediates = true;
@@ -432,6 +443,9 @@ protected:
/// operand cycle returned by the itinerary data for pre-ISel operands.
int PreISelOperandLatencyAdjustment = 2;
+ /// What alignment is preferred for loop bodies, in log2(bytes).
+ unsigned PrefLoopAlignment = 0;
+
/// IsLittle - The target is Little Endian
bool IsLittle;
@@ -529,6 +543,7 @@ public:
bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
+ bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
@@ -596,6 +611,7 @@ public:
bool hasVMLxHazards() const { return HasVMLxHazards; }
bool hasSlowOddRegister() const { return SlowOddRegister; }
bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
+ bool useWideStrideVFP() const { return UseWideStrideVFP; }
bool hasMuxedUnits() const { return HasMuxedUnits; }
bool dontWidenVMOVS() const { return DontWidenVMOVS; }
bool useSplatVFPToNeon() const { return SplatVFPToNeon; }
@@ -612,12 +628,14 @@ public:
bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
bool useSjLjEH() const { return UseSjLjEH; }
+ bool hasSB() const { return HasSB; }
bool genLongCalls() const { return GenLongCalls; }
bool genExecuteOnly() const { return GenExecuteOnly; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
bool hasFullFP16() const { return HasFullFP16; }
+ bool hasFP16FML() const { return HasFP16FML; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
@@ -796,6 +814,10 @@ public:
bool allowPositionIndependentMovt() const {
return isROPI() || !isTargetELF();
}
+
+ unsigned getPrefLoopAlignment() const {
+ return PrefLoopAlignment;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 519f789fc215..ec02c840d5e1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -194,12 +194,6 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
/// Create an ARM architecture model.
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
@@ -210,7 +204,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool isLittle)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TargetABI(computeTargetABI(TT, CPU, Options)),
TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index d0620761ea9c..9c13359cba71 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -32,7 +32,8 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM);
bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS;
- // genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
+ bool genExecuteOnly =
+ ARM_TM.getMCSubtargetInfo()->hasFeature(ARM::FeatureExecuteOnly);
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(isAAPCS_ABI);
@@ -40,6 +41,17 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
if (isAAPCS_ABI) {
LSDASection = nullptr;
}
+
+ // Make code section unreadable when in execute-only mode
+ if (genExecuteOnly) {
+ unsigned Type = ELF::SHT_PROGBITS;
+ unsigned Flags =
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE;
+ // Since we cannot modify flags for an existing section, we create a new
+ // section with the right flags, and use 0 as the unique ID for
+ // execute-only text
+ TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
+ }
}
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 94f9cefe429c..f72bb8632eb7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -77,8 +77,8 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 1;
return ST->hasV6T2Ops() ? 2 : 3;
}
- // Thumb1.
- if (SImmVal >= 0 && SImmVal < 256)
+ // Thumb1, any i8 imm cost 1.
+ if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
return 1;
if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
return 2;
@@ -400,10 +400,29 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- // We only handle costs of reverse and select shuffles for now.
- if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select)
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ if (Kind == TTI::SK_Broadcast) {
+ static const CostTblEntry NEONDupTbl[] = {
+ // VDUP handles these cases.
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost;
+
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
if (Kind == TTI::SK_Reverse) {
static const CostTblEntry NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a
@@ -412,6 +431,8 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
@@ -542,14 +563,17 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
- if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
+ if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
+ !UseMaskForCond && !UseMaskForGaps) {
unsigned NumElts = VecTy->getVectorNumElements();
auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
@@ -562,7 +586,8 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index e0cd2d8e26a6..2dd143d48a15 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -57,7 +57,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const FeatureBitset InlineFeatureWhitelist = {
ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
- ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
+ ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
@@ -169,7 +169,9 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index a5fbbbf26be9..3832b0112b87 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARMFeatures.h"
+#include "InstPrinter/ARMInstPrinter.h"
#include "Utils/ARMBaseInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -631,6 +632,8 @@ public:
void ReportNearMisses(SmallVectorImpl<NearMissInfo> &NearMisses, SMLoc IDLoc,
OperandVector &Operands);
+ void doBeforeLabelEmit(MCSymbol *Symbol) override;
+
void onLabelParsed(MCSymbol *Symbol) override;
};
@@ -3203,17 +3206,26 @@ public:
} // end anonymous namespace.
void ARMOperand::print(raw_ostream &OS) const {
+ auto RegName = [](unsigned Reg) {
+ if (Reg)
+ return ARMInstPrinter::getRegisterName(Reg);
+ else
+ return "noreg";
+ };
+
switch (Kind) {
case k_CondCode:
OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
break;
case k_CCOut:
- OS << "<ccout " << getReg() << ">";
+ OS << "<ccout " << RegName(getReg()) << ">";
break;
case k_ITCondMask: {
static const char *const MaskStr[] = {
- "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
- "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
+ "(invalid)", "(teee)", "(tee)", "(teet)",
+ "(te)", "(tete)", "(tet)", "(tett)",
+ "(t)", "(ttee)", "(tte)", "(ttet)",
+ "(tt)", "(ttte)", "(ttt)", "(tttt)"
};
assert((ITMask.Mask & 0xf) == ITMask.Mask);
OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
@@ -3247,13 +3259,25 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<ARM_TSB::" << TraceSyncBOptToString(getTraceSyncBarrierOpt()) << ">";
break;
case k_Memory:
- OS << "<memory "
- << " base:" << Memory.BaseRegNum;
+ OS << "<memory";
+ if (Memory.BaseRegNum)
+ OS << " base:" << RegName(Memory.BaseRegNum);
+ if (Memory.OffsetImm)
+ OS << " offset-imm:" << *Memory.OffsetImm;
+ if (Memory.OffsetRegNum)
+ OS << " offset-reg:" << (Memory.isNegative ? "-" : "")
+ << RegName(Memory.OffsetRegNum);
+ if (Memory.ShiftType != ARM_AM::no_shift) {
+ OS << " shift-type:" << ARM_AM::getShiftOpcStr(Memory.ShiftType);
+ OS << " shift-imm:" << Memory.ShiftImm;
+ }
+ if (Memory.Alignment)
+ OS << " alignment:" << Memory.Alignment;
OS << ">";
break;
case k_PostIndexRegister:
OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-")
- << PostIdxReg.RegNum;
+ << RegName(PostIdxReg.RegNum);
if (PostIdxReg.ShiftTy != ARM_AM::no_shift)
OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " "
<< PostIdxReg.ShiftImm;
@@ -3269,23 +3293,21 @@ void ARMOperand::print(raw_ostream &OS) const {
break;
}
case k_Register:
- OS << "<register " << getReg() << ">";
+ OS << "<register " << RegName(getReg()) << ">";
break;
case k_ShifterImmediate:
OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl")
<< " #" << ShifterImm.Imm << ">";
break;
case k_ShiftedRegister:
- OS << "<so_reg_reg "
- << RegShiftedReg.SrcReg << " "
- << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
- << " " << RegShiftedReg.ShiftReg << ">";
+ OS << "<so_reg_reg " << RegName(RegShiftedReg.SrcReg) << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy) << " "
+ << RegName(RegShiftedReg.ShiftReg) << ">";
break;
case k_ShiftedImmediate:
- OS << "<so_reg_imm "
- << RegShiftedImm.SrcReg << " "
- << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
- << " #" << RegShiftedImm.ShiftImm << ">";
+ OS << "<so_reg_imm " << RegName(RegShiftedImm.SrcReg) << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy) << " #"
+ << RegShiftedImm.ShiftImm << ">";
break;
case k_RotateImmediate:
OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
@@ -3309,7 +3331,7 @@ void ARMOperand::print(raw_ostream &OS) const {
const SmallVectorImpl<unsigned> &RegList = getRegList();
for (SmallVectorImpl<unsigned>::const_iterator
I = RegList.begin(), E = RegList.end(); I != E; ) {
- OS << *I;
+ OS << RegName(*I);
if (++I < E) OS << ", ";
}
@@ -3318,15 +3340,15 @@ void ARMOperand::print(raw_ostream &OS) const {
}
case k_VectorList:
OS << "<vector_list " << VectorList.Count << " * "
- << VectorList.RegNum << ">";
+ << RegName(VectorList.RegNum) << ">";
break;
case k_VectorListAllLanes:
OS << "<vector_list(all lanes) " << VectorList.Count << " * "
- << VectorList.RegNum << ">";
+ << RegName(VectorList.RegNum) << ">";
break;
case k_VectorListIndexed:
OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
- << VectorList.Count << " * " << VectorList.RegNum << ">";
+ << VectorList.Count << " * " << RegName(VectorList.RegNum) << ">";
break;
case k_Token:
OS << "'" << getToken() << "'";
@@ -5626,7 +5648,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
Mnemonic == "bxns" || Mnemonic == "blxns" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
- Mnemonic == "vcmla" || Mnemonic == "vcadd")
+ Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+ Mnemonic == "vfmal" || Mnemonic == "vfmsl")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -5716,7 +5739,10 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
(FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
Mnemonic == "vmovx" || Mnemonic == "vins" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
- Mnemonic == "vcmla" || Mnemonic == "vcadd") {
+ Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+ Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
+ Mnemonic == "sb" || Mnemonic == "ssbb" ||
+ Mnemonic == "pssbb") {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
@@ -6819,6 +6845,26 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"code specified");
break;
}
+ case ARM::DSB:
+ case ARM::t2DSB: {
+
+ if (Inst.getNumOperands() < 2)
+ break;
+
+ unsigned Option = Inst.getOperand(0).getImm();
+ unsigned Pred = Inst.getOperand(1).getImm();
+
+ // SSBB and PSSBB (DSB #0|#4) are not predicable (pred must be AL).
+ if (Option == 0 && Pred != ARMCC::AL)
+ return Error(Operands[1]->getStartLoc(),
+ "instruction 'ssbb' is not predicable, but condition code "
+ "specified");
+ if (Option == 4 && Pred != ARMCC::AL)
+ return Error(Operands[1]->getStartLoc(),
+ "instruction 'pssbb' is not predicable, but condition code "
+ "specified");
+ break;
+ }
case ARM::VMOVRRS: {
// Source registers must be sequential.
const unsigned Sm = MRI->getEncodingValue(Inst.getOperand(2).getReg());
@@ -6837,6 +6883,15 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"destination operands must be sequential");
break;
}
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA: {
+ ARMOperand &Op = static_cast<ARMOperand&>(*Operands[3]);
+ auto &RegList = Op.getRegList();
+ if (RegList.size() < 1 || RegList.size() > 16)
+ return Error(Operands[3]->getStartLoc(),
+ "list of registers must be at least 1 and at most 16");
+ break;
+ }
}
return false;
@@ -9122,33 +9177,9 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const {
// Any arithmetic instruction which writes to the PC also terminates the IT
// block.
- for (unsigned OpIdx = 0; OpIdx < MCID.getNumDefs(); ++OpIdx) {
- MCOperand &Op = Inst.getOperand(OpIdx);
- if (Op.isReg() && Op.getReg() == ARM::PC)
- return true;
- }
-
- if (MCID.hasImplicitDefOfPhysReg(ARM::PC, MRI))
+ if (MCID.hasDefOfPhysReg(Inst, ARM::PC, *MRI))
return true;
- // Instructions with variable operand lists, which write to the variable
- // operands. We only care about Thumb instructions here, as ARM instructions
- // obviously can't be in an IT block.
- switch (Inst.getOpcode()) {
- case ARM::tLDMIA:
- case ARM::t2LDMIA:
- case ARM::t2LDMIA_UPD:
- case ARM::t2LDMDB:
- case ARM::t2LDMDB_UPD:
- if (listContainsReg(Inst, 3, ARM::PC))
- return true;
- break;
- case ARM::tPOP:
- if (listContainsReg(Inst, 2, ARM::PC))
- return true;
- break;
- }
-
return false;
}
@@ -9255,6 +9286,10 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (MatchResult) {
case Match_Success:
+ LLVM_DEBUG(dbgs() << "Parsed as: ";
+ Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
+ dbgs() << "\n");
+
// Context sensitive operand constraints aren't handled by the matcher,
// so check them here.
if (validateInstruction(Inst, Operands)) {
@@ -9272,7 +9307,9 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// individual transformations can chain off each other. E.g.,
// tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
while (processInstruction(Inst, Operands, Out))
- ;
+ LLVM_DEBUG(dbgs() << "Changed to: ";
+ Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
+ dbgs() << "\n");
// Only after the instruction is fully processed, we can validate it
if (wasInITBlock && hasV8Ops() && isThumb() &&
@@ -9441,10 +9478,13 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
return false;
}
-void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+void ARMAsmParser::doBeforeLabelEmit(MCSymbol *Symbol) {
// We need to flush the current implicit IT block on a label, because it is
// not legal to branch into an IT block.
flushPendingInstructions(getStreamer());
+}
+
+void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
if (NextSymbolIsThumb) {
getParser().getStreamer().EmitThumbFunc(Symbol);
NextSymbolIsThumb = false;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index bfc32073ba18..2f84719c4c4f 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -273,6 +273,21 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
case ARM::t2TSB:
O << "\ttsb\tcsync";
return;
+ case ARM::t2DSB:
+ switch (MI->getOperand(0).getImm()) {
+ default:
+ if (!printAliasInstr(MI, STI, O))
+ printInstruction(MI, STI, O);
+ break;
+ case 0:
+ O << "\tssbb";
+ break;
+ case 4:
+ O << "\tpssbb";
+ break;
+ }
+ printAnnotation(O, Annot);
+ return;
}
if (!printAliasInstr(MI, STI, O))
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index f472b2154314..e1ea5964cf67 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/bit.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -627,27 +628,22 @@ namespace ARM_AM {
//
inline float getFPImmFloat(unsigned Imm) {
// We expect an 8-bit binary encoding of a floating-point number here.
- union {
- uint32_t I;
- float F;
- } FPUnion;
uint8_t Sign = (Imm >> 7) & 0x1;
uint8_t Exp = (Imm >> 4) & 0x7;
uint8_t Mantissa = Imm & 0xf;
- // 8-bit FP iEEEE Float Encoding
+ // 8-bit FP IEEE Float Encoding
// abcd efgh aBbbbbbc defgh000 00000000 00000000
//
// where B = NOT(b);
-
- FPUnion.I = 0;
- FPUnion.I |= Sign << 31;
- FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
- FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
- FPUnion.I |= (Exp & 0x3) << 23;
- FPUnion.I |= Mantissa << 19;
- return FPUnion.F;
+ uint32_t I = 0;
+ I |= Sign << 31;
+ I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ I |= (Exp & 0x3) << 23;
+ I |= Mantissa << 19;
+ return bit_cast<float>(I);
}
/// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index f524a0081301..c2a07d4ddcef 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -373,6 +373,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
// interfere with checking valid expressions.
if (const MCSymbolRefExpr *A = Target.getSymA()) {
if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) &&
+ A->getSymbol().isExternal() &&
(Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 ||
Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 ||
Kind == ARM::fixup_t2_movt_hi16))
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index beeb5dec4baf..33c32d5464af 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -248,6 +248,11 @@ namespace ARMII {
/// just that part of the flag set.
MO_OPTION_MASK = 0x3,
+ /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the ".refptrp.FOO" symbol. This is used for
+ /// stub symbols on windows.
+ MO_COFFSTUB = 0x4,
+
/// MO_GOT - On a symbol operand, this represents a GOT relative relocation.
MO_GOT = 0x8,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 7d04c73fb3f2..b8ba7584911b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
@@ -40,6 +41,8 @@ namespace {
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
+
+ void addTargetSectionFlags(MCContext &Ctx, MCSectionELF &Sec) override;
};
} // end anonymous namespace
@@ -236,6 +239,21 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
}
}
+void ARMELFObjectWriter::addTargetSectionFlags(MCContext &Ctx,
+ MCSectionELF &Sec) {
+ // The mix of execute-only and non-execute-only at link time is
+ // non-execute-only. To avoid the empty implicitly created .text
+ // section from making the whole .text section non-execute-only, we
+ // mark it execute-only if it is empty and there is at least one
+ // execute-only section in the object.
+ MCSectionELF *TextSection =
+ static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
+ if (Sec.getKind().isExecuteOnly() && !TextSection->hasInstructions() &&
+ !TextSection->hasData()) {
+ TextSection->setFlags(TextSection->getFlags() | ELF::SHF_ARM_PURECODE);
+ }
+}
+
std::unique_ptr<MCObjectTargetWriter>
llvm::createARMELFObjectWriter(uint8_t OSABI) {
return llvm::make_unique<ARMELFObjectWriter>(OSABI);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 3373d691db50..d3744fffac32 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -465,6 +465,11 @@ public:
void emitPad(int64_t Offset);
void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector);
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
+ void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+ SMLoc Loc) override {
+ EmitDataMappingSymbol();
+ MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
+ }
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo);
@@ -861,6 +866,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::ArchKind::ARMV8_2A:
case ARM::ArchKind::ARMV8_3A:
case ARM::ArchKind::ARMV8_4A:
+ case ARM::ArchKind::ARMV8_5A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1071,7 +1077,7 @@ void ARMTargetELFStreamer::finishAttributeSection() {
if (Contents.empty())
return;
- llvm::sort(Contents.begin(), Contents.end(), AttributeItem::LessTag);
+ llvm::sort(Contents, AttributeItem::LessTag);
ARMELFStreamer &Streamer = getStreamer();
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 0cef683778e5..3ee63ac374b3 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -31,6 +31,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
SupportsDebugInformation = true;
+ // Conditional Thumb 4-byte instructions can have an implicit IT.
+ MaxInstLength = 6;
+
// Exceptions handling
ExceptionsType = (TheTriple.isOSDarwin() && !TheTriple.isWatchABI())
? ExceptionHandling::SjLj
@@ -56,6 +59,9 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
SupportsDebugInformation = true;
+ // Conditional Thumb 4-byte instructions can have an implicit IT.
+ MaxInstLength = 6;
+
// Exceptions handling
switch (TheTriple.getOS()) {
case Triple::NetBSD:
@@ -90,6 +96,9 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
CommentString = ";";
+
+ // Conditional Thumb 4-byte instructions can have an implicit IT.
+ MaxInstLength = 6;
}
void ARMCOFFMCAsmInfoGNU::anchor() { }
@@ -110,5 +119,7 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
UseIntegratedAssembler = true;
DwarfRegNumForCFI = false;
-}
+ // Conditional Thumb 4-byte instructions can have an implicit IT.
+ MaxInstLength = 6;
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 4b4956e914f2..0ced8195790d 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -22,6 +22,8 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ScopedPrinter.h"
+
using namespace llvm;
namespace {
@@ -144,6 +146,15 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
MCValue Target,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+
+ if (FixupOffset & 0xff000000) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "can not encode offset '0x" +
+ to_hexString(FixupOffset) +
+ "' in resulting scattered relocation.");
+ return;
+ }
+
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Type = MachO::ARM_RELOC_HALF;
@@ -250,6 +261,15 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
unsigned Log2Size,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+
+ if (FixupOffset & 0xff000000) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "can not encode offset '0x" +
+ to_hexString(FixupOffset) +
+ "' in resulting scattered relocation.");
+ return;
+ }
+
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
// See <reloc.h>.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 63aa9735e8a4..91836cff95c8 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 8ae713b7b489..30cbde1ca71f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -75,8 +75,8 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx,
case ARM::fixup_t2_condbranch:
return COFF::IMAGE_REL_ARM_BRANCH20T;
case ARM::fixup_t2_uncondbranch:
- return COFF::IMAGE_REL_ARM_BRANCH24T;
case ARM::fixup_arm_thumb_bl:
+ return COFF::IMAGE_REL_ARM_BRANCH24T;
case ARM::fixup_arm_thumb_blx:
return COFF::IMAGE_REL_ARM_BLX23T;
case ARM::fixup_t2_movw_lo16:
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 1a91a7030657..d567d3339049 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -146,9 +146,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
- if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
- RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
- RC == &ARM::GPRnopcRegClass) {
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::t2STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
@@ -190,9 +188,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
- RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
- RC == &ARM::GPRnopcRegClass) {
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
.addFrameIndex(FI)
.addImm(0)
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index abf54ba7e87c..65889fc4e28b 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -122,6 +122,7 @@ namespace {
{ ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
{ ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
{ ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
{ ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
{ ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
{ ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
@@ -485,7 +486,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
.addReg(Rt, IsStore ? 0 : RegState::Define);
// Transfer memoperands.
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -605,7 +606,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -717,6 +718,16 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
return true;
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
+ case ARM::t2TEQrr: {
+ unsigned PredReg = 0;
+ // Can only convert to eors if we're not in an IT block.
+ if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
+ break;
+ // TODO if Operand 0 is not killed but Operand 1 is, then we could write
+ // to Op1 instead.
+ if (MI->getOperand(0).isKill())
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ }
}
return false;
}
@@ -903,9 +914,24 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
- MIB.add(MI->getOperand(0));
- if (NewMCID.hasOptionalDef())
- MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+
+ // TEQ is special in that it doesn't define a register but we're converting
+ // it into an EOR which does. So add the first operand as a def and then
+ // again as a use.
+ if (MCID.getOpcode() == ARM::t2TEQrr) {
+ MIB.add(MI->getOperand(0));
+ MIB->getOperand(0).setIsKill(false);
+ MIB->getOperand(0).setIsDef(true);
+ MIB->getOperand(0).setIsDead(true);
+
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+ MIB.add(MI->getOperand(0));
+ } else {
+ MIB.add(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+ }
// Transfer the rest of operands.
unsigned NumOps = MCID.getNumOperands();
diff --git a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index fec7080081d0..536a54759c77 100644
--- a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -568,8 +568,8 @@ bool AVRExpandPseudo::expand<AVR::LDSWRdK>(Block &MBB, BlockIt MBBI) {
llvm_unreachable("Unknown operand type!");
}
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -617,8 +617,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
}
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -648,8 +648,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPi>(Block &MBB, BlockIt MBBI) {
.addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
.addReg(SrcReg, RegState::Kill);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -679,8 +679,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPd>(Block &MBB, BlockIt MBBI) {
.addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
.addReg(SrcReg, RegState::Kill);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -734,8 +734,8 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
}
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -782,8 +782,8 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
}
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1003,8 +1003,8 @@ bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
MIBLO.addReg(SrcLoReg, getKillRegState(SrcIsKill));
MIBHI.addReg(SrcHiReg, getKillRegState(SrcIsKill));
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1031,8 +1031,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
.addImm(1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill));
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1065,8 +1065,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPiRr>(Block &MBB, BlockIt MBBI) {
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.addImm(Imm);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1099,8 +1099,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.addImm(Imm);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1133,8 +1133,8 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
.addImm(Imm + 1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill));
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1163,8 +1163,8 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addImm(Imm + 1);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1194,8 +1194,8 @@ bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
.addImm(Imm)
.addReg(SrcLoReg, getKillRegState(SrcIsKill));
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1251,24 +1251,26 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
- OpLo = AVR::LSLRd;
- OpHi = AVR::ROLRd;
+ OpLo = AVR::ADDRdRr; // ADD Rd, Rd <==> LSL Rd
+ OpHi = AVR::ADCRdRr; // ADC Rd, Rd <==> ROL Rd
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Low part
buildMI(MBB, MBBI, OpLo)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg)
.addReg(DstLoReg, getKillRegState(DstIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg)
.addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
- MIBHI->getOperand(2).setIsDead();
+ MIBHI->getOperand(3).setIsDead();
// SREG is always implicitly killed
- MIBHI->getOperand(3).setIsKill();
+ MIBHI->getOperand(4).setIsKill();
MI.eraseFromParent();
return true;
@@ -1387,8 +1389,9 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
.addReg(SrcReg, getKillRegState(SrcIsKill));
}
- buildMI(MBB, MBBI, AVR::LSLRd)
+ buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rr
.addReg(DstHiReg, RegState::Define)
+ .addReg(DstHiReg)
.addReg(DstHiReg, RegState::Kill);
auto SBC = buildMI(MBB, MBBI, AVR::SBCRdRr)
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index b0b23effc6c6..85abf42eaa67 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -350,9 +350,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::STORE>(SDNode *N) {
SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, MVT::Other, Ops);
// Transfer memory operands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = ST->getMemOperand();
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {ST->getMemOperand()});
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
CurDAG->RemoveDeadNode(N);
@@ -407,9 +405,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
}
// Transfer memory operands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {LD->getMemOperand()});
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 1b412a9c6813..57fc978b54bb 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1430,6 +1430,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
MachineBasicBlock *BB) const {
unsigned Opc;
const TargetRegisterClass *RC;
+ bool HasRepeatedOperand = false;
MachineFunction *F = BB->getParent();
MachineRegisterInfo &RI = F->getRegInfo();
const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
@@ -1440,8 +1441,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
default:
llvm_unreachable("Invalid shift opcode!");
case AVR::Lsl8:
- Opc = AVR::LSLRd;
+ Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd
RC = &AVR::GPR8RegClass;
+ HasRepeatedOperand = true;
break;
case AVR::Lsl16:
Opc = AVR::LSLWRd;
@@ -1464,8 +1466,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
RC = &AVR::DREGSRegClass;
break;
case AVR::Rol8:
- Opc = AVR::ROLRd;
+ Opc = AVR::ADCRdRr; // ROL is an alias of ADC Rd, Rd
RC = &AVR::GPR8RegClass;
+ HasRepeatedOperand = true;
break;
case AVR::Rol16:
Opc = AVR::ROLWRd;
@@ -1535,7 +1538,11 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
.addMBB(BB)
.addReg(ShiftAmtReg2)
.addMBB(LoopBB);
- BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+
+ auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+ if (HasRepeatedOperand)
+ ShiftMI.addReg(ShiftReg);
+
BuildMI(LoopBB, dl, TII.get(AVR::SUBIRdK), ShiftAmtReg2)
.addReg(ShiftAmtReg)
.addImm(1);
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
index a2129cc0e2e9..5720af7d8df6 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -730,15 +730,15 @@ Defs = [SREG] in
// TST Rd
// Test for zero of minus.
// This operation is identical to a `Rd AND Rd`.
-//def : InstAlias<"tst\t$rd", (ANDRdRr GPR8:$rd, GPR8:$rd), 1>;
+def : InstAlias<"tst\t$rd", (ANDRdRr GPR8:$rd, GPR8:$rd)>;
-let Defs = [SREG] in
-def TSTRd : FTST<0b0010,
- 0b00,
- (outs),
- (ins GPR8:$rd),
- "tst\t$rd",
- [(AVRtst i8:$rd)]>;
+// SBR Rd, K
+//
+// Mnemonic alias to 'ORI Rd, K'. Same bit pattern, same operands,
+// same everything.
+def : InstAlias<"sbr\t$rd, $k",
+ (ORIRdK LD8:$rd, imm_ldi8:$k),
+ /* Disable display, so we don't override ORI */ 0>;
//===----------------------------------------------------------------------===//
// Jump instructions
@@ -1222,7 +1222,7 @@ isReMaterializable = 1 in
// ldd Rd, P+q
// ldd Rd+1, P+q+1
let Constraints = "@earlyclobber $dst" in
- def LDDWRdPtrQ : Pseudo<(outs DREGS:$dst),
+ def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_Z_WORKAROUND:$dst),
(ins memri:$memri),
"lddw\t$dst, $memri",
[(set i16:$dst, (load addr:$memri))]>,
@@ -1632,12 +1632,7 @@ def LATZRd : FZRd<0b111,
let Constraints = "$src = $rd",
Defs = [SREG] in
{
- def LSLRd : FRdRr<0b0000,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "lsl\t$rd",
- [(set i8:$rd, (AVRlsl i8:$src)), (implicit SREG)]>;
+ // 8-bit LSL is an alias of ADD Rd, Rd
def LSLWRd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
@@ -1671,12 +1666,7 @@ Defs = [SREG] in
// Bit rotate operations.
let Uses = [SREG] in
{
- def ROLRd : FRdRr<0b0001,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "rol\t$rd",
- [(set i8:$rd, (AVRrol i8:$src)), (implicit SREG)]>;
+ // 8-bit ROL is an alias of ADC Rd, Rd
def ROLWRd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
@@ -1743,15 +1733,6 @@ def BLD : FRdB<0b00,
let Constraints = "$src = $rd",
Defs = [SREG] in
{
- // SBR Rd, K
- // Alias for ORI Rd, K
- def SBRRdK : FRdK<0b0110,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "sbr\t$rd, $k",
- [(set i8:$rd, (or i8:$src, imm:$k)),
- (implicit SREG)]>;
-
// CBR Rd, K
// Alias for `ANDI Rd, COM(K)` where COM(K) is the complement of K.
// FIXME: This uses the 'complement' encoder. We need it to also use the
@@ -1769,6 +1750,14 @@ Defs = [SREG] in
// Clears all bits in a register.
def CLR : InstAlias<"clr\t$rd", (EORRdRr GPR8:$rd, GPR8:$rd)>;
+// LSL Rd
+// Alias for ADD Rd, Rd
+// --------------
+// Logical shift left one bit.
+def LSL : InstAlias<"lsl\t$rd", (ADDRdRr GPR8:$rd, GPR8:$rd)>;
+
+def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8:$rd, GPR8:$rd)>;
+
// SER Rd
// Alias for LDI Rd, 0xff
// ---------
@@ -2107,3 +2096,13 @@ def : Pat<(i8 (trunc (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr
def : Pat<(shl i16:$src1, (i8 1)),
(LSLWRd i16:$src1)>;
+// Lowering of 'tst' node to 'TST' instruction.
+// TST is an alias of AND Rd, Rd.
+def : Pat<(AVRtst i8:$rd),
+ (ANDRdRr GPR8:$rd, GPR8:$rd)>;
+
+// Lowering of 'lsl' node to 'LSL' instruction.
+// LSL is an alias of 'ADD Rd, Rd'
+def : Pat<(AVRlsl i8:$rd),
+ (ADDRdRr GPR8:$rd, GPR8:$rd)>;
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index d171a620760e..808a85e459c1 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -152,6 +152,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (MI.getOpcode() == AVR::FRMIDX) {
MI.setDesc(TII.get(AVR::MOVWRdRr));
MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false);
+ MI.RemoveOperand(2);
assert(Offset > 0 && "Invalid offset");
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
index 8162f12052be..d55252bcac46 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -157,6 +157,26 @@ def DREGS : RegisterClass<"AVR", [i16], 8,
R9R8, R7R6, R5R4, R3R2, R1R0
)>;
+// The 16-bit DREGS register class, excluding the Z pointer register.
+//
+// This is used by instructions which cause high pointer register
+// contention which leads to an assertion in the register allocator.
+//
+// There is no technical reason why instructions that use this class
+// cannot use Z; it's simply a workaround a regalloc bug.
+//
+// More information can be found in PR39553.
+def DREGS_WITHOUT_Z_WORKAROUND : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10,
+ R9R8, R7R6, R5R4, R3R2, R1R0
+ )>;
+
// 16-bit register class for immediate instructions.
def DLDREGS : RegisterClass<"AVR", [i16], 8,
(
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 74300d9a451c..9828cdab68c3 100644
--- a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -40,12 +40,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return RM.hasValue() ? *RM : Reloc::Static;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -53,8 +47,8 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, AVRDataLayout, TT, getCPU(CPU), FS, Options,
- getEffectiveRelocModel(RM), getEffectiveCodeModel(CM),
- OL),
+ getEffectiveRelocModel(RM),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
SubTarget(TT, getCPU(CPU), FS, *this) {
this->TLOF = make_unique<AVRTargetObjectFile>();
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index d57cc098497f..f2bb59265271 100644
--- a/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -34,8 +34,9 @@
#define DEBUG_TYPE "avr-asm-parser"
-namespace llvm {
+using namespace llvm;
+namespace {
/// Parses AVR assembly from a stream.
class AVRAsmParser : public MCTargetAsmParser {
const MCSubtargetInfo &STI;
@@ -245,6 +246,8 @@ public:
}
};
+} // end anonymous namespace.
+
// Auto-generated Match Functions
/// Maps from the set of all register names to a register number.
@@ -510,6 +513,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) {
case AsmToken::Real:
if (!tryParseExpression(Operands))
return false;
+ break;
default:
break;
}
@@ -708,5 +712,3 @@ unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
}
return Match_InvalidOperand;
}
-
-} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 496f2befde58..8890fb8adf4d 100644
--- a/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -357,8 +357,8 @@ BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
case AsmToken::Plus: {
if (getLexer().peekTok().is(AsmToken::Integer))
return MatchOperand_NoMatch;
+ LLVM_FALLTHROUGH;
}
- // Fall through.
case AsmToken::Equal:
case AsmToken::Greater:
diff --git a/contrib/llvm/lib/Target/BPF/BPF.h b/contrib/llvm/lib/Target/BPF/BPF.h
index 76d3e1ca5f6f..9749e369c2c1 100644
--- a/contrib/llvm/lib/Target/BPF/BPF.h
+++ b/contrib/llvm/lib/Target/BPF/BPF.h
@@ -19,9 +19,11 @@ class BPFTargetMachine;
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMIPeepholePass();
FunctionPass *createBPFMIPreEmitPeepholePass();
+FunctionPass *createBPFMIPreEmitCheckingPass();
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
+void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
}
#endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 705211b486bf..ada5eb923f40 100644
--- a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -16,6 +16,7 @@
#include "BPFInstrInfo.h"
#include "BPFMCInstLower.h"
#include "BPFTargetMachine.h"
+#include "BTFDebug.h"
#include "InstPrinter/BPFInstPrinter.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -40,6 +41,7 @@ public:
: AsmPrinter(TM, std::move(Streamer)) {}
StringRef getPassName() const override { return "BPF Assembly Printer"; }
+ bool doInitialization(Module &M) override;
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
@@ -52,6 +54,18 @@ public:
};
} // namespace
+bool BPFAsmPrinter::doInitialization(Module &M) {
+ AsmPrinter::doInitialization(M);
+
+ if (MAI->doesSupportDebugInformation()) {
+ Handlers.push_back(HandlerInfo(new BTFDebug(this), "emit",
+ "Debug Info Emission", "BTF",
+ "BTF Emission"));
+ }
+
+ return false;
+}
+
void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNum);
diff --git a/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp b/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp
new file mode 100644
index 000000000000..0a311378e777
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -0,0 +1,96 @@
+//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs checking to signal errors for certain illegal usages at
+// MachineInstruction layer. Specially, the result of XADD{32,64} insn should
+// not be used. The pass is done at the PreEmit pass right before the
+// machine code is emitted at which point the register liveness information
+// is still available.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFInstrInfo.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-mi-checking"
+
+namespace {
+
+struct BPFMIPreEmitChecking : public MachineFunctionPass {
+
+ static char ID;
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+
+ BPFMIPreEmitChecking() : MachineFunctionPass(ID) {
+ initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ void checkingIllegalXADD(void);
+
+public:
+
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!skipFunction(MF.getFunction())) {
+ initialize(MF);
+ checkingIllegalXADD();
+ }
+ return false;
+ }
+};
+
+// Initialize class variables.
+void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
+}
+
+void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() != BPF::XADD32 && MI.getOpcode() != BPF::XADD64)
+ continue;
+
+ LLVM_DEBUG(MI.dump());
+ if (!MI.allDefsAreDead()) {
+ DebugLoc Empty;
+ const DebugLoc &DL = MI.getDebugLoc();
+ if (DL != Empty)
+ report_fatal_error("line " + std::to_string(DL.getLine()) +
+ ": Invalid usage of the XADD return value", false);
+ else
+ report_fatal_error("Invalid usage of the XADD return value", false);
+ }
+ }
+ }
+
+ return;
+}
+
+} // end default namespace
+
+INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
+ "BPF PreEmit Checking", false, false)
+
+char BPFMIPreEmitChecking::ID = 0;
+FunctionPass* llvm::createBPFMIPreEmitCheckingPass()
+{
+ return new BPFMIPreEmitChecking();
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
index bb0d6bcf5450..4202850e9eb9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
+++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
@@ -29,8 +29,6 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 84d89bff74fe..350465b118ed 100644
--- a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -51,12 +51,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -64,13 +58,14 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
- getEffectiveRelocModel(RM), getEffectiveCodeModel(CM),
- OL),
+ getEffectiveRelocModel(RM),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
- BPFMCAsmInfo *MAI = static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo));
+ BPFMCAsmInfo *MAI =
+ static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo.get()));
MAI->setDwarfUsesRelocationsAcrossSections(!Subtarget.getUseDwarfRIS());
}
namespace {
@@ -115,6 +110,7 @@ void BPFPassConfig::addMachineSSAOptimization() {
void BPFPassConfig::addPreEmitPass() {
const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
+ addPass(createBPFMIPreEmitCheckingPass());
if (getOptLevel() != CodeGenOpt::None)
if (Subtarget->getHasAlu32() && !DisableMIPeephole)
addPass(createBPFMIPreEmitPeepholePass());
diff --git a/contrib/llvm/lib/Target/BPF/BTF.def b/contrib/llvm/lib/Target/BPF/BTF.def
new file mode 100644
index 000000000000..54c5bc3cf092
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTF.def
@@ -0,0 +1,33 @@
+//===- BTF.def - BTF definitions --------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Macros for BTF.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(HANDLE_BTF_KIND)
+#error "Missing macro definition of HANDLE_BTF_*"
+#endif
+
+HANDLE_BTF_KIND(0, UNKN)
+HANDLE_BTF_KIND(1, INT)
+HANDLE_BTF_KIND(2, PTR)
+HANDLE_BTF_KIND(3, ARRAY)
+HANDLE_BTF_KIND(4, STRUCT)
+HANDLE_BTF_KIND(5, UNION)
+HANDLE_BTF_KIND(6, ENUM)
+HANDLE_BTF_KIND(7, FWD)
+HANDLE_BTF_KIND(8, TYPEDEF)
+HANDLE_BTF_KIND(9, VOLATILE)
+HANDLE_BTF_KIND(10, CONST)
+HANDLE_BTF_KIND(11, RESTRICT)
+HANDLE_BTF_KIND(12, FUNC)
+HANDLE_BTF_KIND(13, FUNC_PROTO)
+
+#undef HANDLE_BTF_KIND
diff --git a/contrib/llvm/lib/Target/BPF/BTF.h b/contrib/llvm/lib/Target/BPF/BTF.h
new file mode 100644
index 000000000000..1e1680faf1b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTF.h
@@ -0,0 +1,209 @@
+//===-- BTF.h --------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the layout of .BTF and .BTF.ext ELF sections.
+///
+/// The binary layout for .BTF section:
+/// struct Header
+/// Type and Str subsections
+/// The Type subsection is a collection of types with type id starting with 1.
+/// The Str subsection is simply a collection of strings.
+///
+/// The binary layout for .BTF.ext section:
+/// struct ExtHeader
+/// FuncInfo and LineInfo subsections
+/// The FuncInfo subsection is defined as below:
+/// BTFFuncInfo Size
+/// struct SecFuncInfo for ELF section #1
+/// A number of struct BPFFuncInfo for ELF section #1
+/// struct SecFuncInfo for ELF section #2
+/// A number of struct BPFFuncInfo for ELF section #2
+/// ...
+/// The LineInfo subsection is defined as below:
+/// BPFLineInfo Size
+/// struct SecLineInfo for ELF section #1
+/// A number of struct BPFLineInfo for ELF section #1
+/// struct SecLineInfo for ELF section #2
+/// A number of struct BPFLineInfo for ELF section #2
+/// ...
+///
+/// The section formats are also defined at
+/// https://github.com/torvalds/linux/blob/master/include/uapi/linux/btf.h
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BTF_H
+#define LLVM_LIB_TARGET_BPF_BTF_H
+
+namespace llvm {
+namespace BTF {
+
+enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 };
+
+/// Sizes in bytes of various things in the BTF format.
+enum {
+ HeaderSize = 24,
+ ExtHeaderSize = 24,
+ CommonTypeSize = 12,
+ BTFArraySize = 12,
+ BTFEnumSize = 8,
+ BTFMemberSize = 12,
+ BTFParamSize = 8,
+ SecFuncInfoSize = 8,
+ SecLineInfoSize = 8,
+ BPFFuncInfoSize = 8,
+ BPFLineInfoSize = 16
+};
+
+/// The .BTF section header definition.
+struct Header {
+ uint16_t Magic; ///< Magic value
+ uint8_t Version; ///< Version number
+ uint8_t Flags; ///< Extra flags
+ uint32_t HdrLen; ///< Length of this header
+
+ /// All offsets are in bytes relative to the end of this header.
+ uint32_t TypeOff; ///< Offset of type section
+ uint32_t TypeLen; ///< Length of type section
+ uint32_t StrOff; ///< Offset of string section
+ uint32_t StrLen; ///< Length of string section
+};
+
+enum : uint32_t {
+ MAX_VLEN = 0xffff ///< Max # of struct/union/enum members or func args
+};
+
+enum TypeKinds : uint8_t {
+#define HANDLE_BTF_KIND(ID, NAME) BTF_KIND_##NAME = ID,
+#include "BTF.def"
+};
+
+/// The BTF common type definition. Different kinds may have
+/// additional information after this structure data.
+struct CommonType {
+ /// Type name offset in the string table.
+ uint32_t NameOff;
+
+ /// "Info" bits arrangement:
+ /// Bits 0-15: vlen (e.g. # of struct's members)
+ /// Bits 16-23: unused
+ /// Bits 24-27: kind (e.g. int, ptr, array...etc)
+ /// Bits 28-30: unused
+ /// Bit 31: kind_flag, currently used by
+ /// struct, union and fwd
+ uint32_t Info;
+
+ /// "Size" is used by INT, ENUM, STRUCT and UNION.
+ /// "Size" tells the size of the type it is describing.
+ ///
+ /// "Type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+ /// FUNC and FUNC_PROTO.
+ /// "Type" is a type_id referring to another type.
+ union {
+ uint32_t Size;
+ uint32_t Type;
+ };
+};
+
+// For some specific BTF_KIND, "struct CommonType" is immediately
+// followed by extra data.
+
+// BTF_KIND_INT is followed by a u32 and the following
+// is the 32 bits arrangement:
+// BTF_INT_ENCODING(VAL) : (((VAL) & 0x0f000000) >> 24)
+// BTF_INT_OFFSET(VAL) : (((VAL & 0x00ff0000)) >> 16)
+// BTF_INT_BITS(VAL) : ((VAL) & 0x000000ff)
+
+/// Attributes stored in the INT_ENCODING.
+enum : uint8_t { INT_SIGNED = (1 << 0), INT_CHAR = (1 << 1), INT_BOOL = (1 << 2) };
+
+/// BTF_KIND_ENUM is followed by multiple "struct BTFEnum".
+/// The exact number of btf_enum is stored in the vlen (of the
+/// info in "struct CommonType").
+struct BTFEnum {
+ uint32_t NameOff; ///< Enum name offset in the string table
+ int32_t Val; ///< Enum member value
+};
+
+/// BTF_KIND_ARRAY is followed by one "struct BTFArray".
+struct BTFArray {
+ uint32_t ElemType; ///< Element type
+ uint32_t IndexType; ///< Index type
+ uint32_t Nelems; ///< Number of elements for this array
+};
+
+/// BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+/// by multiple "struct BTFMember". The exact number
+/// of BTFMember is stored in the vlen (of the info in
+/// "struct CommonType").
+///
+/// If the struct/union contains any bitfield member,
+/// the Offset below represents BitOffset (bits 0 - 23)
+/// and BitFieldSize(bits 24 - 31) with BitFieldSize = 0
+/// for non bitfield members. Otherwise, the Offset
+/// represents the BitOffset.
+struct BTFMember {
+ uint32_t NameOff; ///< Member name offset in the string table
+ uint32_t Type; ///< Member type
+ uint32_t Offset; ///< BitOffset or BitFieldSize+BitOffset
+};
+
+/// BTF_KIND_FUNC_PROTO are followed by multiple "struct BTFParam".
+/// The exist number of BTFParam is stored in the vlen (of the info
+/// in "struct CommonType").
+struct BTFParam {
+ uint32_t NameOff;
+ uint32_t Type;
+};
+
+/// The .BTF.ext section header definition.
+struct ExtHeader {
+ uint16_t Magic;
+ uint8_t Version;
+ uint8_t Flags;
+ uint32_t HdrLen;
+
+ uint32_t FuncInfoOff; ///< Offset of func info section
+ uint32_t FuncInfoLen; ///< Length of func info section
+ uint32_t LineInfoOff; ///< Offset of line info section
+ uint32_t LineInfoLen; ///< Length of line info section
+};
+
+/// Specifying one function info.
+struct BPFFuncInfo {
+ uint32_t InsnOffset; ///< Byte offset in the section
+ uint32_t TypeId; ///< Type id referring to .BTF type section
+};
+
+/// Specifying function info's in one section.
+struct SecFuncInfo {
+ uint32_t SecNameOff; ///< Section name index in the .BTF string table
+ uint32_t NumFuncInfo; ///< Number of func info's in this section
+};
+
+/// Specifying one line info.
+struct BPFLineInfo {
+ uint32_t InsnOffset; ///< Byte offset in this section
+ uint32_t FileNameOff; ///< File name index in the .BTF string table
+ uint32_t LineOff; ///< Line index in the .BTF string table
+ uint32_t LineCol; ///< Line num: line_col >> 10,
+ /// col num: line_col & 0x3ff
+};
+
+/// Specifying line info's in one section.
+struct SecLineInfo {
+ uint32_t SecNameOff; ///< Section name index in the .BTF string tble
+ uint32_t NumLineInfo; ///< Number of line info's in this section
+};
+
+} // End namespace BTF.
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm/lib/Target/BPF/BTFDebug.cpp
new file mode 100644
index 000000000000..96efea4ba8ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -0,0 +1,759 @@
+//===- BTFDebug.cpp - BTF Generator ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing BTF debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BTFDebug.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include <fstream>
+#include <sstream>
+
+using namespace llvm;
+
+static const char *BTFKindStr[] = {
+#define HANDLE_BTF_KIND(ID, NAME) "BTF_KIND_" #NAME,
+#include "BTF.def"
+};
+
+/// Emit a BTF common type.
+void BTFTypeBase::emitType(MCStreamer &OS) {
+ OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
+ ")");
+ OS.EmitIntValue(BTFType.NameOff, 4);
+ OS.AddComment("0x" + Twine::utohexstr(BTFType.Info));
+ OS.EmitIntValue(BTFType.Info, 4);
+ OS.EmitIntValue(BTFType.Size, 4);
+}
+
+BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag)
+ : DTy(DTy) {
+ switch (Tag) {
+ case dwarf::DW_TAG_pointer_type:
+ Kind = BTF::BTF_KIND_PTR;
+ break;
+ case dwarf::DW_TAG_const_type:
+ Kind = BTF::BTF_KIND_CONST;
+ break;
+ case dwarf::DW_TAG_volatile_type:
+ Kind = BTF::BTF_KIND_VOLATILE;
+ break;
+ case dwarf::DW_TAG_typedef:
+ Kind = BTF::BTF_KIND_TYPEDEF;
+ break;
+ case dwarf::DW_TAG_restrict_type:
+ Kind = BTF::BTF_KIND_RESTRICT;
+ break;
+ default:
+ llvm_unreachable("Unknown DIDerivedType Tag");
+ }
+ BTFType.Info = Kind << 24;
+}
+
+void BTFTypeDerived::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(DTy->getName());
+
+ // The base type for PTR/CONST/VOLATILE could be void.
+ const DIType *ResolvedType = DTy->getBaseType().resolve();
+ if (!ResolvedType) {
+ assert((Kind == BTF::BTF_KIND_PTR || Kind == BTF::BTF_KIND_CONST ||
+ Kind == BTF::BTF_KIND_VOLATILE) &&
+ "Invalid null basetype");
+ BTFType.Type = 0;
+ } else {
+ BTFType.Type = BDebug.getTypeId(ResolvedType);
+ }
+}
+
+void BTFTypeDerived::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+/// Represent a struct/union forward declaration.
+BTFTypeFwd::BTFTypeFwd(StringRef Name, bool IsUnion) : Name(Name) {
+ Kind = BTF::BTF_KIND_FWD;
+ BTFType.Info = IsUnion << 31 | Kind << 24;
+ BTFType.Type = 0;
+}
+
+void BTFTypeFwd::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeFwd::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+BTFTypeInt::BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits,
+ uint32_t OffsetInBits, StringRef TypeName)
+ : Name(TypeName) {
+ // Translate IR int encoding to BTF int encoding.
+ uint8_t BTFEncoding;
+ switch (Encoding) {
+ case dwarf::DW_ATE_boolean:
+ BTFEncoding = BTF::INT_BOOL;
+ break;
+ case dwarf::DW_ATE_signed:
+ case dwarf::DW_ATE_signed_char:
+ BTFEncoding = BTF::INT_SIGNED;
+ break;
+ case dwarf::DW_ATE_unsigned:
+ case dwarf::DW_ATE_unsigned_char:
+ BTFEncoding = 0;
+ break;
+ default:
+ llvm_unreachable("Unknown BTFTypeInt Encoding");
+ }
+
+ Kind = BTF::BTF_KIND_INT;
+ BTFType.Info = Kind << 24;
+ BTFType.Size = roundupToBytes(SizeInBits);
+ IntVal = (BTFEncoding << 24) | OffsetInBits << 16 | SizeInBits;
+}
+
+void BTFTypeInt::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeInt::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ OS.AddComment("0x" + Twine::utohexstr(IntVal));
+ OS.EmitIntValue(IntVal, 4);
+}
+
+BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen) : ETy(ETy) {
+ Kind = BTF::BTF_KIND_ENUM;
+ BTFType.Info = Kind << 24 | VLen;
+ BTFType.Size = roundupToBytes(ETy->getSizeInBits());
+}
+
+void BTFTypeEnum::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(ETy->getName());
+
+ DINodeArray Elements = ETy->getElements();
+ for (const auto Element : Elements) {
+ const auto *Enum = cast<DIEnumerator>(Element);
+
+ struct BTF::BTFEnum BTFEnum;
+ BTFEnum.NameOff = BDebug.addString(Enum->getName());
+ // BTF enum value is 32bit, enforce it.
+ BTFEnum.Val = static_cast<uint32_t>(Enum->getValue());
+ EnumValues.push_back(BTFEnum);
+ }
+}
+
+void BTFTypeEnum::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ for (const auto &Enum : EnumValues) {
+ OS.EmitIntValue(Enum.NameOff, 4);
+ OS.EmitIntValue(Enum.Val, 4);
+ }
+}
+
+BTFTypeArray::BTFTypeArray(const DICompositeType *ATy) : ATy(ATy) {
+ Kind = BTF::BTF_KIND_ARRAY;
+ BTFType.Info = Kind << 24;
+}
+
+/// Represent a BTF array. BTF does not record array dimensions,
+/// so conceptually a BTF array is a one-dimensional array.
+void BTFTypeArray::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(ATy->getName());
+ BTFType.Size = 0;
+
+ auto *BaseType = ATy->getBaseType().resolve();
+ ArrayInfo.ElemType = BDebug.getTypeId(BaseType);
+
+ // The IR does not really have a type for the index.
+ // A special type for array index should have been
+ // created during initial type traversal. Just
+ // retrieve that type id.
+ ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
+
+ // Get the number of array elements.
+ // If the array size is 0, set the number of elements as 0.
+ // Otherwise, recursively traverse the base types to
+ // find the element size. The number of elements is
+ // the totoal array size in bits divided by
+ // element size in bits.
+ uint64_t ArraySizeInBits = ATy->getSizeInBits();
+ if (!ArraySizeInBits) {
+ ArrayInfo.Nelems = 0;
+ } else {
+ uint32_t BaseTypeSize = BaseType->getSizeInBits();
+ while (!BaseTypeSize) {
+ const auto *DDTy = cast<DIDerivedType>(BaseType);
+ BaseType = DDTy->getBaseType().resolve();
+ assert(BaseType);
+ BaseTypeSize = BaseType->getSizeInBits();
+ }
+ ArrayInfo.Nelems = ATy->getSizeInBits() / BaseTypeSize;
+ }
+}
+
+void BTFTypeArray::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ OS.EmitIntValue(ArrayInfo.ElemType, 4);
+ OS.EmitIntValue(ArrayInfo.IndexType, 4);
+ OS.EmitIntValue(ArrayInfo.Nelems, 4);
+}
+
+/// Represent either a struct or a union.
+BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct,
+ bool HasBitField, uint32_t Vlen)
+ : STy(STy), HasBitField(HasBitField) {
+ Kind = IsStruct ? BTF::BTF_KIND_STRUCT : BTF::BTF_KIND_UNION;
+ BTFType.Size = roundupToBytes(STy->getSizeInBits());
+ BTFType.Info = (HasBitField << 31) | (Kind << 24) | Vlen;
+}
+
+void BTFTypeStruct::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(STy->getName());
+
+ // Add struct/union members.
+ const DINodeArray Elements = STy->getElements();
+ for (const auto *Element : Elements) {
+ struct BTF::BTFMember BTFMember;
+ const auto *DDTy = cast<DIDerivedType>(Element);
+
+ BTFMember.NameOff = BDebug.addString(DDTy->getName());
+ if (HasBitField) {
+ uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0;
+ BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits();
+ } else {
+ BTFMember.Offset = DDTy->getOffsetInBits();
+ }
+ BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType().resolve());
+ Members.push_back(BTFMember);
+ }
+}
+
+void BTFTypeStruct::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ for (const auto &Member : Members) {
+ OS.EmitIntValue(Member.NameOff, 4);
+ OS.EmitIntValue(Member.Type, 4);
+ OS.AddComment("0x" + Twine::utohexstr(Member.Offset));
+ OS.EmitIntValue(Member.Offset, 4);
+ }
+}
+
+/// The Func kind represents both subprogram and pointee of function
+/// pointers. If the FuncName is empty, it represents a pointee of function
+/// pointer. Otherwise, it represents a subprogram. The func arg names
+/// are empty for pointee of function pointer case, and are valid names
+/// for subprogram.
+BTFTypeFuncProto::BTFTypeFuncProto(
+ const DISubroutineType *STy, uint32_t VLen,
+ const std::unordered_map<uint32_t, StringRef> &FuncArgNames)
+ : STy(STy), FuncArgNames(FuncArgNames) {
+ Kind = BTF::BTF_KIND_FUNC_PROTO;
+ BTFType.Info = (Kind << 24) | VLen;
+}
+
+void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
+ DITypeRefArray Elements = STy->getTypeArray();
+ auto RetType = Elements[0].resolve();
+ BTFType.Type = RetType ? BDebug.getTypeId(RetType) : 0;
+ BTFType.NameOff = 0;
+
+ // For null parameter which is typically the last one
+ // to represent the vararg, encode the NameOff/Type to be 0.
+ for (unsigned I = 1, N = Elements.size(); I < N; ++I) {
+ struct BTF::BTFParam Param;
+ auto Element = Elements[I].resolve();
+ if (Element) {
+ Param.NameOff = BDebug.addString(FuncArgNames[I]);
+ Param.Type = BDebug.getTypeId(Element);
+ } else {
+ Param.NameOff = 0;
+ Param.Type = 0;
+ }
+ Parameters.push_back(Param);
+ }
+}
+
+void BTFTypeFuncProto::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ for (const auto &Param : Parameters) {
+ OS.EmitIntValue(Param.NameOff, 4);
+ OS.EmitIntValue(Param.Type, 4);
+ }
+}
+
+BTFTypeFunc::BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId)
+ : Name(FuncName) {
+ Kind = BTF::BTF_KIND_FUNC;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = ProtoTypeId;
+}
+
+void BTFTypeFunc::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeFunc::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+uint32_t BTFStringTable::addString(StringRef S) {
+ // Check whether the string already exists.
+ for (auto &OffsetM : OffsetToIdMap) {
+ if (Table[OffsetM.second] == S)
+ return OffsetM.first;
+ }
+ // Not find, add to the string table.
+ uint32_t Offset = Size;
+ OffsetToIdMap[Offset] = Table.size();
+ Table.push_back(S);
+ Size += S.size() + 1;
+ return Offset;
+}
+
+BTFDebug::BTFDebug(AsmPrinter *AP)
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), SkipInstruction(false),
+ LineInfoGenerated(false), SecNameOff(0), ArrayIndexTypeId(0) {
+ addString("\0");
+}
+
+void BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry,
+ const DIType *Ty) {
+ TypeEntry->setId(TypeEntries.size() + 1);
+ DIToIdMap[Ty] = TypeEntry->getId();
+ TypeEntries.push_back(std::move(TypeEntry));
+}
+
+uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
+ TypeEntry->setId(TypeEntries.size() + 1);
+ uint32_t Id = TypeEntry->getId();
+ TypeEntries.push_back(std::move(TypeEntry));
+ return Id;
+}
+
+void BTFDebug::visitBasicType(const DIBasicType *BTy) {
+ // Only int types are supported in BTF.
+ uint32_t Encoding = BTy->getEncoding();
+ if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
+ Encoding != dwarf::DW_ATE_signed_char &&
+ Encoding != dwarf::DW_ATE_unsigned &&
+ Encoding != dwarf::DW_ATE_unsigned_char)
+ return;
+
+ // Create a BTF type instance for this DIBasicType and put it into
+ // DIToIdMap for cross-type reference check.
+ auto TypeEntry = llvm::make_unique<BTFTypeInt>(
+ Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
+ addType(std::move(TypeEntry), BTy);
+}
+
+/// Handle subprogram or subroutine types.
+void BTFDebug::visitSubroutineType(
+ const DISubroutineType *STy, bool ForSubprog,
+ const std::unordered_map<uint32_t, StringRef> &FuncArgNames,
+ uint32_t &TypeId) {
+ DITypeRefArray Elements = STy->getTypeArray();
+ uint32_t VLen = Elements.size() - 1;
+ if (VLen > BTF::MAX_VLEN)
+ return;
+
+ // Subprogram has a valid non-zero-length name, and the pointee of
+ // a function pointer has an empty name. The subprogram type will
+ // not be added to DIToIdMap as it should not be referenced by
+ // any other types.
+ auto TypeEntry = llvm::make_unique<BTFTypeFuncProto>(STy, VLen, FuncArgNames);
+ if (ForSubprog)
+ TypeId = addType(std::move(TypeEntry)); // For subprogram
+ else
+ addType(std::move(TypeEntry), STy); // For func ptr
+
+ // Visit return type and func arg types.
+ for (const auto Element : Elements) {
+ visitTypeEntry(Element.resolve());
+ }
+}
+
+/// Handle structure/union types.
+void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct) {
+ const DINodeArray Elements = CTy->getElements();
+ uint32_t VLen = Elements.size();
+ if (VLen > BTF::MAX_VLEN)
+ return;
+
+ // Check whether we have any bitfield members or not
+ bool HasBitField = false;
+ for (const auto *Element : Elements) {
+ auto E = cast<DIDerivedType>(Element);
+ if (E->isBitField()) {
+ HasBitField = true;
+ break;
+ }
+ }
+
+ auto TypeEntry =
+ llvm::make_unique<BTFTypeStruct>(CTy, IsStruct, HasBitField, VLen);
+ addType(std::move(TypeEntry), CTy);
+
+ // Visit all struct members.
+ for (const auto *Element : Elements)
+ visitTypeEntry(cast<DIDerivedType>(Element));
+}
+
+void BTFDebug::visitArrayType(const DICompositeType *CTy) {
+ auto TypeEntry = llvm::make_unique<BTFTypeArray>(CTy);
+ addType(std::move(TypeEntry), CTy);
+
+ // The IR does not have a type for array index while BTF wants one.
+ // So create an array index type if there is none.
+ if (!ArrayIndexTypeId) {
+ auto TypeEntry = llvm::make_unique<BTFTypeInt>(dwarf::DW_ATE_unsigned, 32,
+ 0, "__ARRAY_SIZE_TYPE__");
+ ArrayIndexTypeId = addType(std::move(TypeEntry));
+ }
+
+ // Visit array element type.
+ visitTypeEntry(CTy->getBaseType().resolve());
+}
+
+void BTFDebug::visitEnumType(const DICompositeType *CTy) {
+ DINodeArray Elements = CTy->getElements();
+ uint32_t VLen = Elements.size();
+ if (VLen > BTF::MAX_VLEN)
+ return;
+
+ auto TypeEntry = llvm::make_unique<BTFTypeEnum>(CTy, VLen);
+ addType(std::move(TypeEntry), CTy);
+ // No need to visit base type as BTF does not encode it.
+}
+
+/// Handle structure/union forward declarations.
+void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion) {
+ auto TypeEntry = llvm::make_unique<BTFTypeFwd>(CTy->getName(), IsUnion);
+ addType(std::move(TypeEntry), CTy);
+}
+
+/// Handle structure, union, array and enumeration types.
+void BTFDebug::visitCompositeType(const DICompositeType *CTy) {
+ auto Tag = CTy->getTag();
+ if (Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+ // Handle forward declaration differently as it does not have members.
+ if (CTy->isForwardDecl())
+ visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type);
+ else
+ visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type);
+ } else if (Tag == dwarf::DW_TAG_array_type)
+ visitArrayType(CTy);
+ else if (Tag == dwarf::DW_TAG_enumeration_type)
+ visitEnumType(CTy);
+}
+
+/// Handle pointer, typedef, const, volatile, restrict and member types.
+void BTFDebug::visitDerivedType(const DIDerivedType *DTy) {
+ unsigned Tag = DTy->getTag();
+
+ if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef ||
+ Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
+ auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag);
+ addType(std::move(TypeEntry), DTy);
+ } else if (Tag != dwarf::DW_TAG_member) {
+ return;
+ }
+
+ // Visit base type of pointer, typedef, const, volatile, restrict or
+ // struct/union member.
+ visitTypeEntry(DTy->getBaseType().resolve());
+}
+
+void BTFDebug::visitTypeEntry(const DIType *Ty) {
+ if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end())
+ return;
+
+ uint32_t TypeId;
+ if (const auto *BTy = dyn_cast<DIBasicType>(Ty))
+ visitBasicType(BTy);
+ else if (const auto *STy = dyn_cast<DISubroutineType>(Ty))
+ visitSubroutineType(STy, false, std::unordered_map<uint32_t, StringRef>(),
+ TypeId);
+ else if (const auto *CTy = dyn_cast<DICompositeType>(Ty))
+ visitCompositeType(CTy);
+ else if (const auto *DTy = dyn_cast<DIDerivedType>(Ty))
+ visitDerivedType(DTy);
+ else
+ llvm_unreachable("Unknown DIType");
+}
+
+/// Read file contents from the actual file or from the source
+std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
+ auto File = SP->getFile();
+ std::string FileName;
+
+ if (File->getDirectory().size())
+ FileName = File->getDirectory().str() + "/" + File->getFilename().str();
+ else
+ FileName = File->getFilename();
+
+ // No need to populate the contends if it has been populated!
+ if (FileContent.find(FileName) != FileContent.end())
+ return FileName;
+
+ std::vector<std::string> Content;
+ std::string Line;
+ Content.push_back(Line); // Line 0 for empty string
+
+ auto Source = File->getSource();
+ if (Source) {
+ std::istringstream InputString(Source.getValue());
+ while (std::getline(InputString, Line))
+ Content.push_back(Line);
+ } else {
+ std::ifstream InputFile(FileName);
+ while (std::getline(InputFile, Line))
+ Content.push_back(Line);
+ }
+
+ FileContent[FileName] = Content;
+ return FileName;
+}
+
+void BTFDebug::constructLineInfo(const DISubprogram *SP, MCSymbol *Label,
+ uint32_t Line, uint32_t Column) {
+ std::string FileName = populateFileContent(SP);
+ BTFLineInfo LineInfo;
+
+ LineInfo.Label = Label;
+ LineInfo.FileNameOff = addString(FileName);
+ // If file content is not available, let LineOff = 0.
+ if (Line < FileContent[FileName].size())
+ LineInfo.LineOff = addString(FileContent[FileName][Line]);
+ else
+ LineInfo.LineOff = 0;
+ LineInfo.LineNum = Line;
+ LineInfo.ColumnNum = Column;
+ LineInfoTable[SecNameOff].push_back(LineInfo);
+}
+
+void BTFDebug::emitCommonHeader() {
+ OS.AddComment("0x" + Twine::utohexstr(BTF::MAGIC));
+ OS.EmitIntValue(BTF::MAGIC, 2);
+ OS.EmitIntValue(BTF::VERSION, 1);
+ OS.EmitIntValue(0, 1);
+}
+
+void BTFDebug::emitBTFSection() {
+ MCContext &Ctx = OS.getContext();
+ OS.SwitchSection(Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0));
+
+ // Emit header.
+ emitCommonHeader();
+ OS.EmitIntValue(BTF::HeaderSize, 4);
+
+ uint32_t TypeLen = 0, StrLen;
+ for (const auto &TypeEntry : TypeEntries)
+ TypeLen += TypeEntry->getSize();
+ StrLen = StringTable.getSize();
+
+ OS.EmitIntValue(0, 4);
+ OS.EmitIntValue(TypeLen, 4);
+ OS.EmitIntValue(TypeLen, 4);
+ OS.EmitIntValue(StrLen, 4);
+
+ // Emit type table.
+ for (const auto &TypeEntry : TypeEntries)
+ TypeEntry->emitType(OS);
+
+ // Emit string table.
+ uint32_t StringOffset = 0;
+ for (const auto &S : StringTable.getTable()) {
+ OS.AddComment("string offset=" + std::to_string(StringOffset));
+ OS.EmitBytes(S);
+ OS.EmitBytes(StringRef("\0", 1));
+ StringOffset += S.size() + 1;
+ }
+}
+
+void BTFDebug::emitBTFExtSection() {
+ MCContext &Ctx = OS.getContext();
+ OS.SwitchSection(Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0));
+
+ // Emit header.
+ emitCommonHeader();
+ OS.EmitIntValue(BTF::ExtHeaderSize, 4);
+
+ // Account for FuncInfo/LineInfo record size as well.
+ uint32_t FuncLen = 4, LineLen = 4;
+ for (const auto &FuncSec : FuncInfoTable) {
+ FuncLen += BTF::SecFuncInfoSize;
+ FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize;
+ }
+ for (const auto &LineSec : LineInfoTable) {
+ LineLen += BTF::SecLineInfoSize;
+ LineLen += LineSec.second.size() * BTF::BPFLineInfoSize;
+ }
+
+ OS.EmitIntValue(0, 4);
+ OS.EmitIntValue(FuncLen, 4);
+ OS.EmitIntValue(FuncLen, 4);
+ OS.EmitIntValue(LineLen, 4);
+
+ // Emit func_info table.
+ OS.AddComment("FuncInfo");
+ OS.EmitIntValue(BTF::BPFFuncInfoSize, 4);
+ for (const auto &FuncSec : FuncInfoTable) {
+ OS.AddComment("FuncInfo section string offset=" +
+ std::to_string(FuncSec.first));
+ OS.EmitIntValue(FuncSec.first, 4);
+ OS.EmitIntValue(FuncSec.second.size(), 4);
+ for (const auto &FuncInfo : FuncSec.second) {
+ Asm->EmitLabelReference(FuncInfo.Label, 4);
+ OS.EmitIntValue(FuncInfo.TypeId, 4);
+ }
+ }
+
+ // Emit line_info table.
+ OS.AddComment("LineInfo");
+ OS.EmitIntValue(BTF::BPFLineInfoSize, 4);
+ for (const auto &LineSec : LineInfoTable) {
+ OS.AddComment("LineInfo section string offset=" +
+ std::to_string(LineSec.first));
+ OS.EmitIntValue(LineSec.first, 4);
+ OS.EmitIntValue(LineSec.second.size(), 4);
+ for (const auto &LineInfo : LineSec.second) {
+ Asm->EmitLabelReference(LineInfo.Label, 4);
+ OS.EmitIntValue(LineInfo.FileNameOff, 4);
+ OS.EmitIntValue(LineInfo.LineOff, 4);
+ OS.AddComment("Line " + std::to_string(LineInfo.LineNum) + " Col " +
+ std::to_string(LineInfo.ColumnNum));
+ OS.EmitIntValue(LineInfo.LineNum << 10 | LineInfo.ColumnNum, 4);
+ }
+ }
+}
+
+void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
+ auto *SP = MF->getFunction().getSubprogram();
+ auto *Unit = SP->getUnit();
+
+ if (Unit->getEmissionKind() == DICompileUnit::NoDebug) {
+ SkipInstruction = true;
+ return;
+ }
+ SkipInstruction = false;
+
+ // Collect all types locally referenced in this function.
+ // Use RetainedNodes so we can collect all argument names
+ // even if the argument is not used.
+ std::unordered_map<uint32_t, StringRef> FuncArgNames;
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ visitTypeEntry(DV->getType().resolve());
+
+ // Collect function arguments for subprogram func type.
+ uint32_t Arg = DV->getArg();
+ if (Arg)
+ FuncArgNames[Arg] = DV->getName();
+ }
+ }
+
+ // Construct subprogram func proto type.
+ uint32_t ProtoTypeId;
+ visitSubroutineType(SP->getType(), true, FuncArgNames, ProtoTypeId);
+
+ // Construct subprogram func type
+ auto FuncTypeEntry =
+ llvm::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId);
+ uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
+
+ // Construct funcinfo and the first lineinfo for the function.
+ MCSymbol *FuncLabel = Asm->getFunctionBegin();
+ BTFFuncInfo FuncInfo;
+ FuncInfo.Label = FuncLabel;
+ FuncInfo.TypeId = FuncTypeId;
+ if (FuncLabel->isInSection()) {
+ MCSection &Section = FuncLabel->getSection();
+ const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
+ assert(SectionELF && "Null section for Function Label");
+ SecNameOff = addString(SectionELF->getSectionName());
+ } else {
+ SecNameOff = addString(".text");
+ }
+ FuncInfoTable[SecNameOff].push_back(FuncInfo);
+}
+
+void BTFDebug::endFunctionImpl(const MachineFunction *MF) {
+ SkipInstruction = false;
+ LineInfoGenerated = false;
+ SecNameOff = 0;
+}
+
+void BTFDebug::beginInstruction(const MachineInstr *MI) {
+ DebugHandlerBase::beginInstruction(MI);
+
+ if (SkipInstruction || MI->isMetaInstruction() ||
+ MI->getFlag(MachineInstr::FrameSetup))
+ return;
+
+ if (MI->isInlineAsm()) {
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ ;
+
+ // Skip this inline asm instruction if the asmstr is empty.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ if (AsmStr[0] == 0)
+ return;
+ }
+
+ // Skip this instruction if no DebugLoc or the DebugLoc
+ // is the same as the previous instruction.
+ const DebugLoc &DL = MI->getDebugLoc();
+ if (!DL || PrevInstLoc == DL) {
+ // This instruction will be skipped, no LineInfo has
+ // been generated, construct one based on function signature.
+ if (LineInfoGenerated == false) {
+ auto *S = MI->getMF()->getFunction().getSubprogram();
+ MCSymbol *FuncLabel = Asm->getFunctionBegin();
+ constructLineInfo(S, FuncLabel, S->getLine(), 0);
+ LineInfoGenerated = true;
+ }
+
+ return;
+ }
+
+ // Create a temporary label to remember the insn for lineinfo.
+ MCSymbol *LineSym = OS.getContext().createTempSymbol();
+ OS.EmitLabel(LineSym);
+
+ // Construct the lineinfo.
+ auto SP = DL.get()->getScope()->getSubprogram();
+ constructLineInfo(SP, LineSym, DL.getLine(), DL.getCol());
+
+ LineInfoGenerated = true;
+ PrevInstLoc = DL;
+}
+
+void BTFDebug::endModule() {
+ // Collect all types referenced by globals.
+ const Module *M = MMI->getModule();
+ for (const DICompileUnit *CUNode : M->debug_compile_units()) {
+ for (const auto *GVE : CUNode->getGlobalVariables()) {
+ DIGlobalVariable *GV = GVE->getVariable();
+ visitTypeEntry(GV->getType().resolve());
+ }
+ }
+
+ // Complete BTF type cross refereences.
+ for (const auto &TypeEntry : TypeEntries)
+ TypeEntry->completeType(*this);
+
+ // Emit BTF sections.
+ emitBTFSection();
+ emitBTFExtSection();
+}
diff --git a/contrib/llvm/lib/Target/BPF/BTFDebug.h b/contrib/llvm/lib/Target/BPF/BTFDebug.h
new file mode 100644
index 000000000000..afd4ed87f63d
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTFDebug.h
@@ -0,0 +1,285 @@
+//===- BTFDebug.h -----------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains support for writing BTF debug info.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BTFDEBUG_H
+#define LLVM_LIB_TARGET_BPF_BTFDEBUG_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
+#include <unordered_map>
+#include "BTF.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class BTFDebug;
+class DIType;
+class MCStreamer;
+class MCSymbol;
+class MachineFunction;
+
+/// The base class for BTF type generation.
+class BTFTypeBase {
+protected:
+ uint8_t Kind;
+ uint32_t Id;
+ struct BTF::CommonType BTFType;
+
+public:
+ virtual ~BTFTypeBase() = default;
+ void setId(uint32_t Id) { this->Id = Id; }
+ uint32_t getId() { return Id; }
+ uint32_t roundupToBytes(uint32_t NumBits) { return (NumBits + 7) >> 3; }
+ /// Get the size of this BTF type entry.
+ virtual uint32_t getSize() { return BTF::CommonTypeSize; }
+ /// Complete BTF type generation after all related DebugInfo types
+ /// have been visited so their BTF type id's are available
+ /// for cross referece.
+ virtual void completeType(BTFDebug &BDebug) {}
+ /// Emit types for this BTF type entry.
+ virtual void emitType(MCStreamer &OS);
+};
+
+/// Handle several derived types include pointer, const,
+/// volatile, typedef and restrict.
+class BTFTypeDerived : public BTFTypeBase {
+ const DIDerivedType *DTy;
+
+public:
+ BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag);
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle struct or union forward declaration.
+class BTFTypeFwd : public BTFTypeBase {
+ StringRef Name;
+
+public:
+ BTFTypeFwd(StringRef Name, bool IsUnion);
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle int type.
+class BTFTypeInt : public BTFTypeBase {
+ StringRef Name;
+ uint32_t IntVal; ///< Encoding, offset, bits
+
+public:
+ BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits, uint32_t OffsetInBits,
+ StringRef TypeName);
+ uint32_t getSize() { return BTFTypeBase::getSize() + sizeof(uint32_t); }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle enumerate type.
+class BTFTypeEnum : public BTFTypeBase {
+ const DICompositeType *ETy;
+ std::vector<struct BTF::BTFEnum> EnumValues;
+
+public:
+ BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues);
+ uint32_t getSize() {
+ return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnumSize;
+ }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle array type.
+class BTFTypeArray : public BTFTypeBase {
+ const DICompositeType *ATy;
+ struct BTF::BTFArray ArrayInfo;
+
+public:
+ BTFTypeArray(const DICompositeType *ATy);
+ uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle struct/union type.
+class BTFTypeStruct : public BTFTypeBase {
+ const DICompositeType *STy;
+ bool HasBitField;
+ std::vector<struct BTF::BTFMember> Members;
+
+public:
+ BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,
+ uint32_t NumMembers);
+ uint32_t getSize() {
+ return BTFTypeBase::getSize() + Members.size() * BTF::BTFMemberSize;
+ }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle function pointer.
+class BTFTypeFuncProto : public BTFTypeBase {
+ const DISubroutineType *STy;
+ std::unordered_map<uint32_t, StringRef> FuncArgNames;
+ std::vector<struct BTF::BTFParam> Parameters;
+
+public:
+ BTFTypeFuncProto(const DISubroutineType *STy, uint32_t NumParams,
+ const std::unordered_map<uint32_t, StringRef> &FuncArgNames);
+ uint32_t getSize() {
+ return BTFTypeBase::getSize() + Parameters.size() * BTF::BTFParamSize;
+ }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle subprogram
+class BTFTypeFunc : public BTFTypeBase {
+ StringRef Name;
+
+public:
+ BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId);
+ uint32_t getSize() { return BTFTypeBase::getSize(); }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// String table.
+class BTFStringTable {
+ /// String table size in bytes.
+ uint32_t Size;
+ /// A mapping from string table offset to the index
+ /// of the Table. It is used to avoid putting
+ /// duplicated strings in the table.
+ std::unordered_map<uint32_t, uint32_t> OffsetToIdMap;
+ /// A vector of strings to represent the string table.
+ std::vector<std::string> Table;
+
+public:
+ BTFStringTable() : Size(0) {}
+ uint32_t getSize() { return Size; }
+ std::vector<std::string> &getTable() { return Table; }
+ /// Add a string to the string table and returns its offset
+ /// in the table.
+ uint32_t addString(StringRef S);
+};
+
+/// Represent one func and its type id.
+struct BTFFuncInfo {
+ const MCSymbol *Label; ///< Func MCSymbol
+ uint32_t TypeId; ///< Type id referring to .BTF type section
+};
+
+/// Represent one line info.
+struct BTFLineInfo {
+ MCSymbol *Label; ///< MCSymbol identifying insn for the lineinfo
+ uint32_t FileNameOff; ///< file name offset in the .BTF string table
+ uint32_t LineOff; ///< line offset in the .BTF string table
+ uint32_t LineNum; ///< the line number
+ uint32_t ColumnNum; ///< the column number
+};
+
+/// Collect and emit BTF information.
+class BTFDebug : public DebugHandlerBase {
+ MCStreamer &OS;
+ bool SkipInstruction;
+ bool LineInfoGenerated;
+ uint32_t SecNameOff;
+ uint32_t ArrayIndexTypeId;
+ BTFStringTable StringTable;
+ std::vector<std::unique_ptr<BTFTypeBase>> TypeEntries;
+ std::unordered_map<const DIType *, uint32_t> DIToIdMap;
+ std::unordered_map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
+ std::unordered_map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
+ StringMap<std::vector<std::string>> FileContent;
+
+ /// Add types to TypeEntries.
+ /// @{
+ /// Add types to TypeEntries and DIToIdMap.
+ void addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty);
+ /// Add types to TypeEntries only and return type id.
+ uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry);
+ /// @}
+
+ /// IR type visiting functions.
+ /// @{
+ void visitTypeEntry(const DIType *Ty);
+ void visitBasicType(const DIBasicType *BTy);
+ void visitSubroutineType(
+ const DISubroutineType *STy, bool ForSubprog,
+ const std::unordered_map<uint32_t, StringRef> &FuncArgNames,
+ uint32_t &TypeId);
+ void visitFwdDeclType(const DICompositeType *CTy, bool IsUnion);
+ void visitCompositeType(const DICompositeType *CTy);
+ void visitStructType(const DICompositeType *STy, bool IsStruct);
+ void visitArrayType(const DICompositeType *ATy);
+ void visitEnumType(const DICompositeType *ETy);
+ void visitDerivedType(const DIDerivedType *DTy);
+ /// @}
+
+ /// Get the file content for the subprogram. Certain lines of the file
+ /// later may be put into string table and referenced by line info.
+ std::string populateFileContent(const DISubprogram *SP);
+
+ /// Construct a line info.
+ void constructLineInfo(const DISubprogram *SP, MCSymbol *Label, uint32_t Line,
+ uint32_t Column);
+
+ /// Emit common header of .BTF and .BTF.ext sections.
+ void emitCommonHeader();
+
+ /// Emit the .BTF section.
+ void emitBTFSection();
+
+ /// Emit the .BTF.ext section.
+ void emitBTFExtSection();
+
+protected:
+ /// Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// Post process after all instructions in this function are processed.
+ void endFunctionImpl(const MachineFunction *MF) override;
+
+public:
+ BTFDebug(AsmPrinter *AP);
+
+ /// Get the special array index type id.
+ uint32_t getArrayIndexTypeId() {
+ assert(ArrayIndexTypeId);
+ return ArrayIndexTypeId;
+ }
+
+ /// Add string to the string table.
+ size_t addString(StringRef S) { return StringTable.addString(S); }
+
+ /// Get the type id for a particular DIType.
+ uint32_t getTypeId(const DIType *Ty) {
+ assert(Ty && "Invalid null Type");
+ assert(DIToIdMap.find(Ty) != DIToIdMap.end() &&
+ "DIType not added in the BDIToIdMap");
+ return DIToIdMap[Ty];
+ }
+
+ void setSymbolSize(const MCSymbol *Symbol, uint64_t Size) override {}
+
+ /// Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override;
+
+ /// Complete all the types and emit the BTF sections.
+ void endModule() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index e7790ddb3d7e..9f80b762fe36 100644
--- a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "BPF.h"
-#include "BPFSubtarget.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCAsmInfo.h"
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 134e890dfe49..32e79d0f527e 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -12,6 +12,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstdint>
@@ -50,6 +51,23 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case FK_Data_8:
return ELF::R_BPF_64_64;
case FK_Data_4:
+ // .BTF.ext generates FK_Data_4 relocations for
+ // insn offset by creating temporary labels.
+ // The insn offset is within the code section and
+ // already been fulfilled by applyFixup(). No
+ // further relocation is needed.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ if (A->getSymbol().isTemporary()) {
+ MCSection &Section = A->getSymbol().getSection();
+ const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
+ assert(SectionELF && "Null section for reloc symbol");
+
+ // The reloc symbol should be in text section.
+ unsigned Flags = SectionELF->getFlags();
+ if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
+ return ELF::R_BPF_NONE;
+ }
+ }
return ELF::R_BPF_64_32;
}
}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 171f7f607ff4..af3ad5315253 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -30,8 +30,8 @@ public:
WeakRefDirective = "\t.weak\t";
UsesELFSectionDirectiveForBSS = true;
- HasSingleParameterDotFile = false;
- HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = true;
+ HasDotTypeDotSizeDirective = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 92bda224f3dc..2eb1f0fc8bd9 100644
--- a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -313,8 +313,6 @@ public:
bool iss30_2Imm() const { return true; }
bool iss29_3Imm() const { return true; }
bool iss27_2Imm() const { return CheckImmRange(27, 2, true, true, false); }
- bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); }
- bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); }
bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); }
bool iss8_0Imm() const { return CheckImmRange(8, 0, true, false, false); }
bool iss8_0Imm64() const { return CheckImmRange(8, 0, true, true, false); }
@@ -581,6 +579,7 @@ bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
case Match_MnemonicFail:
return Error(IDLoc, "unrecognized instruction");
case Match_InvalidOperand:
+ case Match_InvalidTiedOperand:
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0U) {
if (ErrorInfo >= InstOperands.size())
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 1a619ebda84e..428b42eba30d 100644
--- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -9,7 +9,6 @@
#define DEBUG_TYPE "hexagon-disassembler"
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
@@ -118,6 +117,10 @@ DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -146,62 +149,7 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder);
static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
-
-static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<4>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<14>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<8>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<7>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<12>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<3>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<13>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<9>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<5>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
+#include "HexagonDepDecoders.h"
#include "HexagonGenDisassemblerTables.inc"
static MCDisassembler *createHexagonDisassembler(const Target &T,
@@ -664,6 +612,18 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
return (DecodeRegisterClass(Inst, RegNo >> 1, HvxWRDecoderTable));
}
+LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg HvxVQRDecoderTable[] = {
+ Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3,
+ Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7};
+
+ return DecodeRegisterClass(Inst, RegNo >> 2, HvxVQRDecoderTable);
+}
+
static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index 6ec52d18cdc4..c18492da803b 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -15,33 +15,6 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
-#define Hexagon_POINTER_SIZE 4
-
-#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
-#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
-#define Hexagon_WordSize Hexagon_PointerSize
-#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
-
-// allocframe saves LR and FP on stack before allocating
-// a new stack frame. This takes 8 bytes.
-#define HEXAGON_LRFP_SIZE 8
-
-// Normal instruction size (in bytes).
-#define HEXAGON_INSTR_SIZE 4
-
-// Maximum number of words and instructions in a packet.
-#define HEXAGON_PACKET_SIZE 4
-#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
-// Minimum number of instructions in an end-loop packet.
-#define HEXAGON_PACKET_INNER_SIZE 2
-#define HEXAGON_PACKET_OUTER_SIZE 3
-// Maximum number of instructions in a packet before shuffling,
-// including a compound one or a duplex or an extender.
-#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
-
-// Name of the global offset table as defined by the Hexagon ABI
-#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
-
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 69e263a425f8..868353e18832 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -25,6 +25,9 @@ include "llvm/Target/Target.td"
include "HexagonDepArch.td"
// Hexagon ISA Extensions
+def ExtensionZReg: SubtargetFeature<"zreg", "UseZRegOps", "true",
+ "Hexagon ZReg extension instructions">;
+
def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion",
"Hexagon::ArchEnum::V60", "Hexagon HVX instructions">;
def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion",
@@ -32,10 +35,14 @@ def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion",
[ExtensionHVX]>;
def ExtensionHVXV62: SubtargetFeature<"hvxv62", "HexagonHVXVersion",
"Hexagon::ArchEnum::V62", "Hexagon HVX instructions",
- [ExtensionHVX,ExtensionHVXV60]>;
+ [ExtensionHVX, ExtensionHVXV60]>;
def ExtensionHVXV65: SubtargetFeature<"hvxv65", "HexagonHVXVersion",
"Hexagon::ArchEnum::V65", "Hexagon HVX instructions",
- [ExtensionHVX,ExtensionHVXV60, ExtensionHVXV62]>;
+ [ExtensionHVX, ExtensionHVXV60, ExtensionHVXV62]>;
+def ExtensionHVXV66: SubtargetFeature<"hvxv66", "HexagonHVXVersion",
+ "Hexagon::ArchEnum::V66", "Hexagon HVX instructions",
+ [ExtensionHVX, ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
+ ExtensionZReg]>;
def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
"true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
@@ -60,6 +67,9 @@ def FeatureDuplex: SubtargetFeature<"duplex", "EnableDuplex", "true",
"Enable generation of duplex instruction">;
def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19",
"true", "Reserve register R19">;
+def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim",
+ "NoreturnStackElim", "true",
+ "Eliminate stack allocation in a noreturn function when possible">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
@@ -78,6 +88,10 @@ def UseHVXV62 : Predicate<"HST->useHVXOps()">,
AssemblerPredicate<"ExtensionHVXV62">;
def UseHVXV65 : Predicate<"HST->useHVXOps()">,
AssemblerPredicate<"ExtensionHVXV65">;
+def UseHVXV66 : Predicate<"HST->useHVXOps()">,
+ AssemblerPredicate<"ExtensionHVXV66">;
+def UseZReg : Predicate<"HST->useZRegOps()">,
+ AssemblerPredicate<"ExtensionZReg">;
def Hvx64: HwMode<"+hvx-length64b">;
def Hvx128: HwMode<"+hvx-length128b">;
@@ -309,8 +323,6 @@ include "HexagonPatternsHVX.td"
include "HexagonPatternsV65.td"
include "HexagonDepMappings.td"
include "HexagonIntrinsics.td"
-include "HexagonMapAsm2IntrinV62.gen.td"
-include "HexagonMapAsm2IntrinV65.gen.td"
def HexagonInstrInfo : InstrInfo;
@@ -323,31 +335,31 @@ class Proc<string Name, SchedMachineModel Model,
: ProcessorModel<Name, Model, Features>;
def : Proc<"generic", HexagonModelV60,
- [ArchV4, ArchV5, ArchV55, ArchV60,
- FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
- FeaturePackets, FeatureSmallData]>;
-def : Proc<"hexagonv4", HexagonModelV4,
- [ArchV4,
+ [ArchV5, ArchV55, ArchV60,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
-def : Proc<"hexagonv5", HexagonModelV4,
- [ArchV4, ArchV5,
+def : Proc<"hexagonv5", HexagonModelV5,
+ [ArchV5,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv55", HexagonModelV55,
- [ArchV4, ArchV5, ArchV55,
+ [ArchV5, ArchV55,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv60", HexagonModelV60,
- [ArchV4, ArchV5, ArchV55, ArchV60,
+ [ArchV5, ArchV55, ArchV60,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv62", HexagonModelV62,
- [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62,
+ [ArchV5, ArchV55, ArchV60, ArchV62,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv65", HexagonModelV65,
- [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
+ [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
+ FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
+ FeatureNVS, FeaturePackets, FeatureSmallData]>;
+def : Proc<"hexagonv66", HexagonModelV66,
+ [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66,
FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
FeatureNVS, FeaturePackets, FeatureSmallData]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 0ac83ea7c5fc..f44fb16e2d8e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -755,7 +755,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
if (MI->isBundle()) {
- assert(Subtarget->usePackets() && "Support for packets is disabled");
const MachineBasicBlock* MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index ba255d30fede..1bdebe557a8c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1985,6 +1985,10 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
case Hexagon::S2_storeri_io:
V = int32_t(U);
break;
+ default:
+ // Opc is already checked above to be one of the three store instructions.
+ // This silences a -Wuninitialized false positive on GCC 5.4.
+ llvm_unreachable("Unexpected store opcode");
}
if (!isInt<8>(V))
return false;
@@ -2223,6 +2227,10 @@ bool BitSimplification::genBitSplit(MachineInstr *MI,
for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
// The number of leading zeros here should be the number of trailing
// non-zeros in RC.
+ unsigned SRC = MRI.getRegClass(S)->getID();
+ if (SRC != Hexagon::IntRegsRegClassID &&
+ SRC != Hexagon::DoubleRegsRegClassID)
+ continue;
if (!BT.has(S))
continue;
const BitTracker::RegisterCell &SC = BT.lookup(S);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 94aacbed6af6..92b6da871a4c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -93,11 +93,12 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
unsigned ID = RC.getID();
uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
- auto &HRI = static_cast<const HexagonRegisterInfo&>(TRI);
+ const auto &HRI = static_cast<const HexagonRegisterInfo&>(TRI);
bool IsSubLo = (Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo));
switch (ID) {
case Hexagon::DoubleRegsRegClassID:
case Hexagon::HvxWRRegClassID:
+ case Hexagon::HvxVQRRegClassID:
return IsSubLo ? BT::BitMask(0, RW-1)
: BT::BitMask(RW, 2*RW-1);
default:
@@ -114,9 +115,13 @@ uint16_t HexagonEvaluator::getPhysRegBitWidth(unsigned Reg) const {
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
using namespace Hexagon;
- for (auto &RC : {HvxVRRegClass, HvxWRRegClass, HvxQRRegClass})
- if (RC.contains(Reg))
- return TRI.getRegSizeInBits(RC);
+ const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ if (HST.useHVXOps()) {
+ for (auto &RC : {HvxVRRegClass, HvxWRRegClass, HvxQRRegClass,
+ HvxVQRRegClass})
+ if (RC.contains(Reg))
+ return TRI.getRegSizeInBits(RC);
+ }
// Default treatment for other physical registers.
if (const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg))
return TRI.getRegSizeInBits(*RC);
@@ -142,6 +147,8 @@ const TargetRegisterClass &HexagonEvaluator::composeWithSubRegIndex(
return Hexagon::IntRegsRegClass;
case Hexagon::HvxWRRegClassID:
return Hexagon::HvxVRRegClass;
+ case Hexagon::HvxVQRRegClassID:
+ return Hexagon::HvxWRRegClass;
default:
break;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index cbce61bc63c9..ba9f638796eb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -376,7 +376,7 @@ namespace {
using IndexList = SetVector<unsigned>;
using ExtenderInit = std::pair<ExtValue, ExtExpr>;
using AssignmentMap = std::map<ExtenderInit, IndexList>;
- using LocDefMap = std::map<Loc, IndexList>;
+ using LocDefList = std::vector<std::pair<Loc, IndexList>>;
const HexagonInstrInfo *HII = nullptr;
const HexagonRegisterInfo *HRI = nullptr;
@@ -399,7 +399,7 @@ namespace {
void assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
AssignmentMap &IMap);
void calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
- LocDefMap &Defs);
+ LocDefList &Defs);
Register insertInitializer(Loc DefL, const ExtenderInit &ExtI);
bool replaceInstrExact(const ExtDesc &ED, Register ExtR);
bool replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
@@ -730,21 +730,13 @@ bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const {
}
case MachineOperand::MO_ExternalSymbol:
return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName);
- case MachineOperand::MO_GlobalAddress: {
- // Global values may not have names, so compare their positions
- // in the parent module.
- const Module &M = *V.GV->getParent();
- auto FindPos = [&M] (const GlobalValue &V) {
- unsigned P = 0;
- for (const GlobalValue &T : M.global_values()) {
- if (&T == &V)
- return P;
- P++;
- }
- llvm_unreachable("Global value not found in module");
- };
- return FindPos(*V.GV) < FindPos(*ER.V.GV);
- }
+ case MachineOperand::MO_GlobalAddress:
+ // Do not use GUIDs, since they depend on the source path. Moving the
+ // source file to a different directory could cause different GUID
+ // values for a pair of given symbols. These symbols could then compare
+ // "less" in one directory, but "greater" in another.
+ assert(!V.GV->getName().empty() && !ER.V.GV->getName().empty());
+ return V.GV->getName() < ER.V.GV->getName();
case MachineOperand::MO_BlockAddress: {
const BasicBlock *ThisB = V.BA->getBasicBlock();
const BasicBlock *OtherB = ER.V.BA->getBasicBlock();
@@ -796,6 +788,7 @@ HCE::ExtValue::operator MachineOperand() const {
return MachineOperand::CreateCPI(V.ImmVal, Offset, TF);
case MachineOperand::MO_JumpTableIndex:
assert(Offset == 0);
+ return MachineOperand::CreateJTI(V.ImmVal, TF);
default:
llvm_unreachable("Unhandled kind");
}
@@ -1215,12 +1208,19 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) {
case Hexagon::S4_subaddi: // (__: ## - Rs<<0)
ED.Expr.Rs = MI.getOperand(OpNum+1);
ED.Expr.Neg = true;
+ break;
default: // (__: ## + __<<_)
break;
}
}
ED.UseMI = &MI;
+
+ // Ignore unnamed globals.
+ ExtRoot ER(ED.getOp());
+ if (ER.Kind == MachineOperand::MO_GlobalAddress)
+ if (ER.V.GV->getName().empty())
+ return;
Extenders.push_back(ED);
}
@@ -1243,9 +1243,13 @@ void HCE::collectInstr(MachineInstr &MI) {
void HCE::collect(MachineFunction &MF) {
Extenders.clear();
- for (MachineBasicBlock &MBB : MF)
+ for (MachineBasicBlock &MBB : MF) {
+ // Skip unreachable blocks.
+ if (MBB.getNumber() == -1)
+ continue;
for (MachineInstr &MI : MBB)
collectInstr(MI);
+ }
}
void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
@@ -1470,7 +1474,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
}
void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
- LocDefMap &Defs) {
+ LocDefList &Defs) {
if (Refs.empty())
return;
@@ -1517,7 +1521,7 @@ void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
It = DomB->getFirstTerminator();
}
Loc DefLoc(DomB, It);
- Defs.emplace(DefLoc, Refs);
+ Defs.emplace_back(DefLoc, Refs);
}
HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
@@ -1629,7 +1633,7 @@ bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {
else
MIB.add(MachineOperand(ExtR));
}
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1680,7 +1684,7 @@ bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {
// Add the stored value for stores.
if (MI.mayStore())
MIB.add(getStoredValueOp(MI));
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1715,6 +1719,15 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
// Clamp Diff to the 16 bit range.
int32_t D = isInt<16>(Diff) ? Diff : (Diff > 0 ? 32767 : -32768);
+ if (Diff > 32767) {
+ // Split Diff into two values: one that is close to min/max int16,
+ // and the other being the rest, and such that both have the same
+ // "alignment" as Diff.
+ uint32_t UD = Diff;
+ OffsetRange R = getOffsetRange(MI.getOperand(0));
+ uint32_t A = std::min<uint32_t>(R.Align, 1u << countTrailingZeros(UD));
+ D &= ~(A-1);
+ }
BuildMI(MBB, At, dl, HII->get(IdxOpc))
.add(MI.getOperand(0))
.add(MachineOperand(ExtR))
@@ -1797,7 +1810,7 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
// Add the stored value for stores.
if (MI.mayStore())
MIB.add(getStoredValueOp(MI));
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1878,7 +1891,7 @@ bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) {
}
bool HCE::replaceExtenders(const AssignmentMap &IMap) {
- LocDefMap Defs;
+ LocDefList Defs;
bool Changed = false;
for (const std::pair<ExtenderInit,IndexList> &P : IMap) {
@@ -1931,6 +1944,11 @@ const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const {
bool HCE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ if (MF.getFunction().hasPersonalityFn()) {
+ LLVM_DEBUG(dbgs() << getPassName() << ": skipping " << MF.getName()
+ << " due to exception handling\n");
+ return false;
+ }
LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
@@ -1940,10 +1958,24 @@ bool HCE::runOnMachineFunction(MachineFunction &MF) {
AssignmentMap IMap;
collect(MF);
- llvm::sort(Extenders.begin(), Extenders.end(),
- [](const ExtDesc &A, const ExtDesc &B) {
- return ExtValue(A) < ExtValue(B);
- });
+ llvm::sort(Extenders, [this](const ExtDesc &A, const ExtDesc &B) {
+ ExtValue VA(A), VB(B);
+ if (VA != VB)
+ return VA < VB;
+ const MachineInstr *MA = A.UseMI;
+ const MachineInstr *MB = B.UseMI;
+ if (MA == MB) {
+ // If it's the same instruction, compare operand numbers.
+ return A.OpNum < B.OpNum;
+ }
+
+ const MachineBasicBlock *BA = MA->getParent();
+ const MachineBasicBlock *BB = MB->getParent();
+ assert(BA->getNumber() != -1 && BB->getNumber() != -1);
+ if (BA != BB)
+ return BA->getNumber() < BB->getNumber();
+ return MDT->dominates(MA, MB);
+ });
bool Changed = false;
LLVM_DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n");
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 8f22a71dc1f3..fa192391313e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -2463,6 +2463,7 @@ APInt HexagonConstEvaluator::getCmpImm(unsigned Opc, unsigned OpX,
case Hexagon::A4_cmpheqi: // s8
case Hexagon::C4_cmpneqi: // s8
Signed = true;
+ break;
case Hexagon::A4_cmpbeqi: // u8
break;
case Hexagon::C2_cmpgtui: // u9
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index fccde96d8a32..28965b69e284 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -555,8 +555,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
continue;
- // Check that the two instructions are combinable. V4 allows more
- // instructions to be merged into a combine.
+ // Check that the two instructions are combinable.
// The order matters because in a A2_tfrsi we might can encode a int8 as
// the hi reg operand but only a uint6 as the low reg operand.
if ((IsI2LowReg && !areCombinableOperations(TRI, I1, *I2, AllowC64)) ||
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
index dc75f8f63400..dff2b2f471d0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -1,4 +1,4 @@
-//===- HexagonDepArch.h ---------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,12 +10,11 @@
//===----------------------------------------------------------------------===//
-
#ifndef HEXAGON_DEP_ARCH_H
#define HEXAGON_DEP_ARCH_H
namespace llvm {
namespace Hexagon {
-enum class ArchEnum { V4,V5,V55,V60,V62,V65 };
+enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66 };
} // namespace Hexagon
} // namespace llvm;
#endif // HEXAGON_DEP_ARCH_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
index 3594379aa841..f1aadae555c8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -1,4 +1,4 @@
-//===- HexagonDepArch.td --------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,8 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
+def ArchV66: SubtargetFeature<"v66", "HexagonArchVersion", "Hexagon::ArchEnum::V66", "Enable Hexagon V66 architecture">;
+def HasV66 : Predicate<"HST->hasV66Ops()">, AssemblerPredicate<"ArchV66">;
def ArchV65: SubtargetFeature<"v65", "HexagonArchVersion", "Hexagon::ArchEnum::V65", "Enable Hexagon V65 architecture">;
def HasV65 : Predicate<"HST->hasV65Ops()">, AssemblerPredicate<"ArchV65">;
def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">;
@@ -18,7 +19,5 @@ def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V
def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<"ArchV60">;
def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">;
def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<"ArchV55">;
-def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "Hexagon::ArchEnum::V4", "Enable Hexagon V4 architecture">;
-def HasV4 : Predicate<"HST->hasV4Ops()">, AssemblerPredicate<"ArchV4">;
def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">;
def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<"ArchV5">;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h
new file mode 100644
index 000000000000..9f78412f45d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, please consult code owner before editing.
+//===----------------------------------------------------------------------===//
+
+// clang-format off
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<4>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<14>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<8>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<7>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<12>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<3>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<13>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<9>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<5>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+// clang-format on
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index b27cdae81a28..9e3dea9f3e9b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -1,4 +1,4 @@
-//===- HexagonDepIICHVX.td ------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,1849 +9,2549 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
-def tc_0317c6ca : InstrItinClass;
-def tc_1b93bdc6 : InstrItinClass;
-def tc_2171ebae : InstrItinClass;
-def tc_28978789 : InstrItinClass;
-def tc_29841470 : InstrItinClass;
-def tc_316c637c : InstrItinClass;
-def tc_354299ad : InstrItinClass;
-def tc_35e92f8e : InstrItinClass;
-def tc_38208312 : InstrItinClass;
-def tc_4105d6b5 : InstrItinClass;
-def tc_41f4b64e : InstrItinClass;
-def tc_41f99e1c : InstrItinClass;
-def tc_45453b98 : InstrItinClass;
-def tc_4e2a5159 : InstrItinClass;
-def tc_4f190ba3 : InstrItinClass;
-def tc_4fd8566e : InstrItinClass;
-def tc_51cd3aab : InstrItinClass;
-def tc_5a9fc4ec : InstrItinClass;
-def tc_5c03dc63 : InstrItinClass;
-def tc_5c120602 : InstrItinClass;
-def tc_5cbf490b : InstrItinClass;
-def tc_63e3d94c : InstrItinClass;
-def tc_644584f8 : InstrItinClass;
-def tc_66bb62ea : InstrItinClass;
-def tc_69b6dd20 : InstrItinClass;
-def tc_6b78cf13 : InstrItinClass;
-def tc_6fd9ad30 : InstrItinClass;
-def tc_71337255 : InstrItinClass;
-def tc_72ad7b54 : InstrItinClass;
-def tc_7474003e : InstrItinClass;
-def tc_77a4c701 : InstrItinClass;
-def tc_7c3f55c4 : InstrItinClass;
-def tc_7e9f581b : InstrItinClass;
-def tc_7fa82b08 : InstrItinClass;
-def tc_7fa8b40f : InstrItinClass;
-def tc_85d237e3 : InstrItinClass;
-def tc_8a6eb39a : InstrItinClass;
-def tc_8b6a873f : InstrItinClass;
-def tc_908a4c8c : InstrItinClass;
-def tc_9311da3f : InstrItinClass;
-def tc_94f43c04 : InstrItinClass;
-def tc_9777e6bf : InstrItinClass;
-def tc_97c165b9 : InstrItinClass;
-def tc_98733e9d : InstrItinClass;
-def tc_99093773 : InstrItinClass;
-def tc_9b9642a1 : InstrItinClass;
-def tc_9c267309 : InstrItinClass;
-def tc_a3127e12 : InstrItinClass;
-def tc_a4c9df3b : InstrItinClass;
-def tc_a807365d : InstrItinClass;
-def tc_aedb9f9e : InstrItinClass;
-def tc_b06ab583 : InstrItinClass;
-def tc_b712833a : InstrItinClass;
-def tc_b77635b4 : InstrItinClass;
-def tc_bbaf280e : InstrItinClass;
-def tc_bf142ae2 : InstrItinClass;
-def tc_bfe309d5 : InstrItinClass;
-def tc_c00bf9c9 : InstrItinClass;
-def tc_c4b515c5 : InstrItinClass;
-def tc_cbf6d1dc : InstrItinClass;
-def tc_cedf314b : InstrItinClass;
-def tc_d2cb81ea : InstrItinClass;
-def tc_d5090f3e : InstrItinClass;
-def tc_d642eff3 : InstrItinClass;
-def tc_d725e5b0 : InstrItinClass;
-def tc_d7bea0ec : InstrItinClass;
-def tc_d98f4d63 : InstrItinClass;
-def tc_da979fb3 : InstrItinClass;
-def tc_db5b9e2f : InstrItinClass;
-def tc_df54ad52 : InstrItinClass;
-def tc_e172d86a : InstrItinClass;
-def tc_e231aa4f : InstrItinClass;
-def tc_e3748cdf : InstrItinClass;
-def tc_e5053c8f : InstrItinClass;
-def tc_e6299d16 : InstrItinClass;
-def tc_eb669007 : InstrItinClass;
-def tc_ec58f88a : InstrItinClass;
-def tc_eda67dcd : InstrItinClass;
-def tc_ee927c0e : InstrItinClass;
-def tc_f3fc3f83 : InstrItinClass;
-def tc_fa99dc24 : InstrItinClass;
+def tc_04da405a : InstrItinClass;
+def tc_05058f6f : InstrItinClass;
+def tc_05ac6f98 : InstrItinClass;
+def tc_05ca8cfd : InstrItinClass;
+def tc_08a4f1b6 : InstrItinClass;
+def tc_0b04c6c7 : InstrItinClass;
+def tc_0ec46cf9 : InstrItinClass;
+def tc_131f1c81 : InstrItinClass;
+def tc_1381a97c : InstrItinClass;
+def tc_15fdf750 : InstrItinClass;
+def tc_16ff9ef8 : InstrItinClass;
+def tc_191381c1 : InstrItinClass;
+def tc_1ad8a370 : InstrItinClass;
+def tc_1ba8a0cd : InstrItinClass;
+def tc_20a4bbec : InstrItinClass;
+def tc_257f6f7c : InstrItinClass;
+def tc_26a377fe : InstrItinClass;
+def tc_2c745bb8 : InstrItinClass;
+def tc_2d4051cd : InstrItinClass;
+def tc_2e8f5f6e : InstrItinClass;
+def tc_309dbb4f : InstrItinClass;
+def tc_3904b926 : InstrItinClass;
+def tc_3aacf4a8 : InstrItinClass;
+def tc_3ad719fb : InstrItinClass;
+def tc_3c56e5ce : InstrItinClass;
+def tc_3ce09744 : InstrItinClass;
+def tc_3e2aaafc : InstrItinClass;
+def tc_447d9895 : InstrItinClass;
+def tc_453fe68d : InstrItinClass;
+def tc_46d6c3e0 : InstrItinClass;
+def tc_51d0ecc3 : InstrItinClass;
+def tc_52447ecc : InstrItinClass;
+def tc_540c3da3 : InstrItinClass;
+def tc_54a0dc47 : InstrItinClass;
+def tc_561aaa58 : InstrItinClass;
+def tc_56c4f9fe : InstrItinClass;
+def tc_56e64202 : InstrItinClass;
+def tc_58d21193 : InstrItinClass;
+def tc_5bf8afbb : InstrItinClass;
+def tc_61bf7c03 : InstrItinClass;
+def tc_649072c2 : InstrItinClass;
+def tc_660769f1 : InstrItinClass;
+def tc_663c80a7 : InstrItinClass;
+def tc_6942b6e0 : InstrItinClass;
+def tc_6e7fa133 : InstrItinClass;
+def tc_71646d06 : InstrItinClass;
+def tc_7177e272 : InstrItinClass;
+def tc_718b5c53 : InstrItinClass;
+def tc_7273323b : InstrItinClass;
+def tc_7417e785 : InstrItinClass;
+def tc_767c4e9d : InstrItinClass;
+def tc_7e6a3e89 : InstrItinClass;
+def tc_8772086c : InstrItinClass;
+def tc_87adc037 : InstrItinClass;
+def tc_8e420e4d : InstrItinClass;
+def tc_90bcc1db : InstrItinClass;
+def tc_933f2b39 : InstrItinClass;
+def tc_946013d8 : InstrItinClass;
+def tc_9d1dc972 : InstrItinClass;
+def tc_9f363d21 : InstrItinClass;
+def tc_a02a10a8 : InstrItinClass;
+def tc_a0dbea28 : InstrItinClass;
+def tc_a7e6707d : InstrItinClass;
+def tc_ab23f776 : InstrItinClass;
+def tc_abe8c3b2 : InstrItinClass;
+def tc_ac4046bc : InstrItinClass;
+def tc_af25efd9 : InstrItinClass;
+def tc_b091f1c6 : InstrItinClass;
+def tc_b28e51aa : InstrItinClass;
+def tc_b4416217 : InstrItinClass;
+def tc_b9db8205 : InstrItinClass;
+def tc_c0749f3c : InstrItinClass;
+def tc_c127de3a : InstrItinClass;
+def tc_c4edf264 : InstrItinClass;
+def tc_c5dba46e : InstrItinClass;
+def tc_c7039829 : InstrItinClass;
+def tc_cd94bfe0 : InstrItinClass;
+def tc_d8287c14 : InstrItinClass;
+def tc_db5555f3 : InstrItinClass;
+def tc_dd5b0695 : InstrItinClass;
+def tc_df80eeb0 : InstrItinClass;
+def tc_e2d2e9e5 : InstrItinClass;
+def tc_e35c1e93 : InstrItinClass;
+def tc_e3f68a46 : InstrItinClass;
+def tc_e675c45a : InstrItinClass;
+def tc_e699ae41 : InstrItinClass;
+def tc_e8797b98 : InstrItinClass;
+def tc_e99d4c2e : InstrItinClass;
+def tc_f1de44ef : InstrItinClass;
+def tc_f21e8abb : InstrItinClass;
+def tc_fd7610da : InstrItinClass;
class DepHVXItinV55 {
list<InstrItinData> DepHVXItinV55_list = [
- InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
[Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_29841470, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_38208312, /*SLOT01,LOAD*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
-
- InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_71337255, /*SLOT0123,VA*/
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
[HVX_FWD]>,
- InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9311da3f, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
- [Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
- [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9b9642a1, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
-
- InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
[Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL]>], [3],
[HVX_FWD]>,
- InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
- InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
+class DepHVXItinV60 {
+ list<InstrItinData> DepHVXItinV60_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLSHF]>], [9, 5],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL]>], [],
[]>,
- InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
-class DepHVXItinV60 {
- list<InstrItinData> DepHVXItinV60_list = [
- InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3, 2],
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_29841470, /*SLOT0,STORE*/
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
- InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_38208312, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 2],
+ InstrStage<1, [CVI_ALL]>], [3, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+ InstrItinData <tc_8772086c, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_71337255, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ InstrStage<1, [CVI_ALL]>], [3],
[HVX_FWD]>,
- InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
- InstrItinData <tc_9311da3f, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
- [Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
- InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+class DepHVXItinV62 {
+ list<InstrItinData> DepHVXItinV62_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
- [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9b9642a1, /*SLOT0123,VS*/
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [2],
- [Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3],
- [HVX_FWD]>,
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
-
- InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [],
- []>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-class DepHVXItinV62 {
- list<InstrItinData> DepHVXItinV62_list = [
- InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_29841470, /*SLOT0,STORE*/
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
- InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
-
- InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_38208312, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 2],
+ InstrStage<1, [CVI_ALL]>], [3, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_71337255, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ InstrStage<1, [CVI_ALL]>], [3],
[HVX_FWD]>,
- InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
- InstrItinData <tc_9311da3f, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
- [Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
- InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+class DepHVXItinV65 {
+ list<InstrItinData> DepHVXItinV65_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
- [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9b9642a1, /*SLOT0123,VA*/
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [2],
- [Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3],
- [HVX_FWD]>,
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
-
- InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [],
- []>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-class DepHVXItinV65 {
- list<InstrItinData> DepHVXItinV65_list = [
- InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_29841470, /*SLOT0,STORE*/
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_38208312, /*SLOT01,LOAD*/
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
[Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
- InstrItinData <tc_71337255, /*SLOT0123,VA*/
+class DepHVXItinV66 {
+ list<InstrItinData> DepHVXItinV66_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
- [HVX_FWD]>,
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9311da3f, /*SLOT23,VX*/
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
- [Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
- [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9b9642a1, /*SLOT0123,VA*/
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
- InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [2],
- [Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3],
- [HVX_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
-
- InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
[Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [],
- []>,
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
- InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
];
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index 931504b56ccb..9da25952fb1c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -1,4 +1,4 @@
-//===- HexagonDepIICScalar.td ---------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,3087 +9,3789 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
-def tc_00afc57e : InstrItinClass;
-def tc_00e7c26e : InstrItinClass;
-def tc_03220ffa : InstrItinClass;
-def tc_038a1342 : InstrItinClass;
-def tc_04c9decc : InstrItinClass;
-def tc_05b6c987 : InstrItinClass;
-def tc_0cd51c76 : InstrItinClass;
-def tc_0dc560de : InstrItinClass;
-def tc_0fc1ae07 : InstrItinClass;
-def tc_10b97e27 : InstrItinClass;
-def tc_1372bca1 : InstrItinClass;
-def tc_14cd4cfa : InstrItinClass;
-def tc_15411484 : InstrItinClass;
-def tc_16d0d8d5 : InstrItinClass;
-def tc_181af5d0 : InstrItinClass;
-def tc_1853ea6d : InstrItinClass;
-def tc_1b82a277 : InstrItinClass;
-def tc_1b9c9ee5 : InstrItinClass;
-def tc_1d5a38a8 : InstrItinClass;
-def tc_1e856f58 : InstrItinClass;
-def tc_234a11a5 : InstrItinClass;
-def tc_238d91d2 : InstrItinClass;
-def tc_29175780 : InstrItinClass;
-def tc_2a160009 : InstrItinClass;
-def tc_2b2f4060 : InstrItinClass;
-def tc_2b6f77c6 : InstrItinClass;
-def tc_2f185f5c : InstrItinClass;
-def tc_2fc0c436 : InstrItinClass;
-def tc_351fed2d : InstrItinClass;
-def tc_3669266a : InstrItinClass;
-def tc_367f7f3d : InstrItinClass;
-def tc_36c68ad1 : InstrItinClass;
-def tc_395dc00f : InstrItinClass;
-def tc_3bc2c5d3 : InstrItinClass;
-def tc_3cb8ea06 : InstrItinClass;
-def tc_3d04548d : InstrItinClass;
-def tc_3da80ba5 : InstrItinClass;
-def tc_3e07fb90 : InstrItinClass;
-def tc_41d5298e : InstrItinClass;
-def tc_4403ca65 : InstrItinClass;
-def tc_44126683 : InstrItinClass;
-def tc_452f85af : InstrItinClass;
-def tc_481e5e5c : InstrItinClass;
-def tc_49eb22c8 : InstrItinClass;
-def tc_4ca572d4 : InstrItinClass;
-def tc_4d9914c9 : InstrItinClass;
-def tc_4d99bca9 : InstrItinClass;
-def tc_4f7cd700 : InstrItinClass;
-def tc_513bef45 : InstrItinClass;
-def tc_51b866be : InstrItinClass;
-def tc_523fcf30 : InstrItinClass;
-def tc_5274e61a : InstrItinClass;
-def tc_52d7bbea : InstrItinClass;
-def tc_53bc8a6a : InstrItinClass;
-def tc_53bdb2f6 : InstrItinClass;
-def tc_540fdfbc : InstrItinClass;
-def tc_55050d58 : InstrItinClass;
-def tc_57288781 : InstrItinClass;
-def tc_594ab548 : InstrItinClass;
-def tc_59a01ead : InstrItinClass;
-def tc_5acef64a : InstrItinClass;
-def tc_5ba5997d : InstrItinClass;
-def tc_5eb851fc : InstrItinClass;
-def tc_5f6847a1 : InstrItinClass;
-def tc_60571023 : InstrItinClass;
-def tc_609d2efe : InstrItinClass;
-def tc_63fe3df7 : InstrItinClass;
-def tc_66888ded : InstrItinClass;
-def tc_6792d5ff : InstrItinClass;
-def tc_681a2300 : InstrItinClass;
-def tc_68cb12ce : InstrItinClass;
-def tc_6aa5711a : InstrItinClass;
-def tc_6ac37025 : InstrItinClass;
-def tc_6ebb4a12 : InstrItinClass;
-def tc_6efc556e : InstrItinClass;
-def tc_6fa4db47 : InstrItinClass;
-def tc_73043bf4 : InstrItinClass;
-def tc_746baa8e : InstrItinClass;
-def tc_74e47fd9 : InstrItinClass;
-def tc_7934b9df : InstrItinClass;
-def tc_7a830544 : InstrItinClass;
-def tc_7f881c76 : InstrItinClass;
-def tc_84df2cd3 : InstrItinClass;
-def tc_855b0b61 : InstrItinClass;
-def tc_87735c3b : InstrItinClass;
-def tc_897d1a9d : InstrItinClass;
-def tc_8b15472a : InstrItinClass;
-def tc_8fd5f294 : InstrItinClass;
-def tc_8fe6b782 : InstrItinClass;
-def tc_90f3e30c : InstrItinClass;
-def tc_976ddc4f : InstrItinClass;
-def tc_97743097 : InstrItinClass;
-def tc_994333cd : InstrItinClass;
-def tc_999d32db : InstrItinClass;
-def tc_99be14ca : InstrItinClass;
-def tc_9c00ce8d : InstrItinClass;
-def tc_9c98e8af : InstrItinClass;
-def tc_9d5941c7 : InstrItinClass;
-def tc_9ef61e5c : InstrItinClass;
-def tc_9faf76ae : InstrItinClass;
-def tc_9fdb5406 : InstrItinClass;
-def tc_a21dc435 : InstrItinClass;
-def tc_a27582fa : InstrItinClass;
-def tc_a46f0df5 : InstrItinClass;
-def tc_a788683e : InstrItinClass;
-def tc_a8acdac0 : InstrItinClass;
-def tc_a904d137 : InstrItinClass;
-def tc_adb14c66 : InstrItinClass;
-def tc_b13761ae : InstrItinClass;
-def tc_b166348b : InstrItinClass;
-def tc_b44c6e2a : InstrItinClass;
-def tc_b77c481f : InstrItinClass;
-def tc_b7dd427e : InstrItinClass;
-def tc_b9488031 : InstrItinClass;
-def tc_b9c0b731 : InstrItinClass;
-def tc_b9c4623f : InstrItinClass;
-def tc_bad2bcaf : InstrItinClass;
-def tc_bcc96cee : InstrItinClass;
-def tc_bde7aaf4 : InstrItinClass;
-def tc_be706f30 : InstrItinClass;
-def tc_c2f7d806 : InstrItinClass;
-def tc_c5e2426d : InstrItinClass;
-def tc_c6aa82f7 : InstrItinClass;
-def tc_c6ce9b3f : InstrItinClass;
-def tc_c6ebf8dd : InstrItinClass;
-def tc_c74f796f : InstrItinClass;
-def tc_c82dc1ff : InstrItinClass;
-def tc_caaebcba : InstrItinClass;
-def tc_cd7374a0 : InstrItinClass;
-def tc_cde8b071 : InstrItinClass;
-def tc_cf47a43f : InstrItinClass;
-def tc_cf59f215 : InstrItinClass;
-def tc_d088982c : InstrItinClass;
-def tc_d1090e34 : InstrItinClass;
-def tc_d24b2d85 : InstrItinClass;
-def tc_d580173f : InstrItinClass;
-def tc_d6bf0472 : InstrItinClass;
-def tc_d9709180 : InstrItinClass;
-def tc_d9f95eef : InstrItinClass;
-def tc_daa058fa : InstrItinClass;
-def tc_dbdffe3d : InstrItinClass;
-def tc_e0739b8c : InstrItinClass;
-def tc_e1e99bfa : InstrItinClass;
-def tc_e216a5db : InstrItinClass;
-def tc_e421e012 : InstrItinClass;
-def tc_e7624c08 : InstrItinClass;
-def tc_e7d02c66 : InstrItinClass;
-def tc_e913dc32 : InstrItinClass;
-def tc_e9c822f7 : InstrItinClass;
-def tc_e9fae2d6 : InstrItinClass;
-def tc_ef52ed71 : InstrItinClass;
-def tc_ef84f62f : InstrItinClass;
-def tc_f2704b9a : InstrItinClass;
-def tc_f3eaa14b : InstrItinClass;
-def tc_f47d212f : InstrItinClass;
-def tc_f49e76f4 : InstrItinClass;
-def tc_f7dd9c9f : InstrItinClass;
-def tc_f86c328a : InstrItinClass;
-def tc_f8eeed7a : InstrItinClass;
-def tc_fcab4871 : InstrItinClass;
-def tc_ff9ee76e : InstrItinClass;
-
-class DepScalarItinV4 {
- list<InstrItinData> DepScalarItinV4_list = [
- InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_181af5d0, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3669266a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_367f7f3d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_36c68ad1, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_395dc00f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3bc2c5d3, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3cb8ea06, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3d04548d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3da80ba5, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3e07fb90, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_41d5298e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4403ca65, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_44126683, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_452f85af, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_481e5e5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_49eb22c8, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4ca572d4, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_4d9914c9, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_4d99bca9, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4f7cd700, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_513bef45, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_51b866be, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_681a2300, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_68cb12ce, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6aa5711a, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_7934b9df, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9c98e8af, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_9d5941c7, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_9ef61e5c, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_9faf76ae, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_9fdb5406, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a21dc435, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_a27582fa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a46f0df5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a788683e, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a8acdac0, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_a904d137, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_adb14c66, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_b9c0b731, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c5e2426d, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_c6aa82f7, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_c6ce9b3f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c6ebf8dd, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c74f796f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c82dc1ff, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_caaebcba, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cd7374a0, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_cde8b071, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cf47a43f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_cf59f215, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_d088982c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d1090e34, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_d24b2d85, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_d580173f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d6bf0472, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_d9709180, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_d9f95eef, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_fcab4871, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_ff9ee76e, [InstrStage<1, [SLOT0]>]> ];
-}
+def tc_002cb246 : InstrItinClass;
+def tc_0371abea : InstrItinClass;
+def tc_05c070ec : InstrItinClass;
+def tc_05d3a09b : InstrItinClass;
+def tc_0663f615 : InstrItinClass;
+def tc_096199d3 : InstrItinClass;
+def tc_0a705168 : InstrItinClass;
+def tc_0ae0825c : InstrItinClass;
+def tc_0b2be201 : InstrItinClass;
+def tc_0d8f5752 : InstrItinClass;
+def tc_13bfbcf9 : InstrItinClass;
+def tc_14b272fa : InstrItinClass;
+def tc_14b5c689 : InstrItinClass;
+def tc_15aa71c5 : InstrItinClass;
+def tc_174516e8 : InstrItinClass;
+def tc_17e0d2cd : InstrItinClass;
+def tc_1a2fd869 : InstrItinClass;
+def tc_1ad90acd : InstrItinClass;
+def tc_1ae57e39 : InstrItinClass;
+def tc_1b6f7cec : InstrItinClass;
+def tc_1c4528a2 : InstrItinClass;
+def tc_1c80410a : InstrItinClass;
+def tc_1d81e60e : InstrItinClass;
+def tc_1fc97744 : InstrItinClass;
+def tc_20cdee80 : InstrItinClass;
+def tc_2332b92e : InstrItinClass;
+def tc_24b66c99 : InstrItinClass;
+def tc_25a78932 : InstrItinClass;
+def tc_2b8da4c2 : InstrItinClass;
+def tc_2eabeebe : InstrItinClass;
+def tc_2f7c551d : InstrItinClass;
+def tc_2ff964b4 : InstrItinClass;
+def tc_30b9bb4a : InstrItinClass;
+def tc_32779c6f : InstrItinClass;
+def tc_36153880 : InstrItinClass;
+def tc_362c6592 : InstrItinClass;
+def tc_3962fa26 : InstrItinClass;
+def tc_39dfefe8 : InstrItinClass;
+def tc_3a867367 : InstrItinClass;
+def tc_3b470976 : InstrItinClass;
+def tc_3b5b7ef9 : InstrItinClass;
+def tc_3bd75825 : InstrItinClass;
+def tc_3c76b0ff : InstrItinClass;
+def tc_3d495a39 : InstrItinClass;
+def tc_40116ca8 : InstrItinClass;
+def tc_434c8e1e : InstrItinClass;
+def tc_4414d8b1 : InstrItinClass;
+def tc_44d3da28 : InstrItinClass;
+def tc_4560740b : InstrItinClass;
+def tc_4837eefb : InstrItinClass;
+def tc_49a8207d : InstrItinClass;
+def tc_4ae7b58b : InstrItinClass;
+def tc_4b68bce4 : InstrItinClass;
+def tc_4c5ba658 : InstrItinClass;
+def tc_4d5fa3a1 : InstrItinClass;
+def tc_53559e35 : InstrItinClass;
+def tc_56336eb0 : InstrItinClass;
+def tc_56f114f4 : InstrItinClass;
+def tc_57890846 : InstrItinClass;
+def tc_5a2711e5 : InstrItinClass;
+def tc_5abb5e3f : InstrItinClass;
+def tc_5aee39f7 : InstrItinClass;
+def tc_5b54b33f : InstrItinClass;
+def tc_5b7c0967 : InstrItinClass;
+def tc_5bf126a6 : InstrItinClass;
+def tc_5d7f5414 : InstrItinClass;
+def tc_5ef37dc4 : InstrItinClass;
+def tc_6132ba3d : InstrItinClass;
+def tc_61830035 : InstrItinClass;
+def tc_640086b5 : InstrItinClass;
+def tc_643b4717 : InstrItinClass;
+def tc_67435e81 : InstrItinClass;
+def tc_675e4897 : InstrItinClass;
+def tc_679309b8 : InstrItinClass;
+def tc_6b25e783 : InstrItinClass;
+def tc_703e822c : InstrItinClass;
+def tc_7186d325 : InstrItinClass;
+def tc_7646c131 : InstrItinClass;
+def tc_76851da1 : InstrItinClass;
+def tc_779080bf : InstrItinClass;
+def tc_784490da : InstrItinClass;
+def tc_785f65a7 : InstrItinClass;
+def tc_7a91e76a : InstrItinClass;
+def tc_838b34ea : InstrItinClass;
+def tc_85c9c08f : InstrItinClass;
+def tc_85d5d03f : InstrItinClass;
+def tc_862b3e70 : InstrItinClass;
+def tc_88b4f13d : InstrItinClass;
+def tc_89e94ad3 : InstrItinClass;
+def tc_8b121f4a : InstrItinClass;
+def tc_8b3e402a : InstrItinClass;
+def tc_8c945be0 : InstrItinClass;
+def tc_8c99de45 : InstrItinClass;
+def tc_8d9d0154 : InstrItinClass;
+def tc_8fb7ab1b : InstrItinClass;
+def tc_9461ff31 : InstrItinClass;
+def tc_946df596 : InstrItinClass;
+def tc_9ad9998f : InstrItinClass;
+def tc_9bfd761f : InstrItinClass;
+def tc_9c3ecd83 : InstrItinClass;
+def tc_9ca930f7 : InstrItinClass;
+def tc_9da59d12 : InstrItinClass;
+def tc_9debc299 : InstrItinClass;
+def tc_9e313203 : InstrItinClass;
+def tc_9fc3dae0 : InstrItinClass;
+def tc_a1123dda : InstrItinClass;
+def tc_a1c00888 : InstrItinClass;
+def tc_a58fd5cc : InstrItinClass;
+def tc_a5d4aeec : InstrItinClass;
+def tc_a6b1eca9 : InstrItinClass;
+def tc_a813cf9a : InstrItinClass;
+def tc_a9d88b22 : InstrItinClass;
+def tc_ae53734a : InstrItinClass;
+def tc_b31c2e97 : InstrItinClass;
+def tc_b343892a : InstrItinClass;
+def tc_b43e7930 : InstrItinClass;
+def tc_b4407292 : InstrItinClass;
+def tc_b44ecf75 : InstrItinClass;
+def tc_b4b5c03a : InstrItinClass;
+def tc_b51dc29a : InstrItinClass;
+def tc_b83e6d73 : InstrItinClass;
+def tc_b857bf4e : InstrItinClass;
+def tc_b8bffe55 : InstrItinClass;
+def tc_b90a29b1 : InstrItinClass;
+def tc_b9272d6c : InstrItinClass;
+def tc_b9e09e03 : InstrItinClass;
+def tc_bab0eed9 : InstrItinClass;
+def tc_bafaade3 : InstrItinClass;
+def tc_bcf98408 : InstrItinClass;
+def tc_bd8382d1 : InstrItinClass;
+def tc_bdceeac1 : InstrItinClass;
+def tc_be9602ff : InstrItinClass;
+def tc_bf061958 : InstrItinClass;
+def tc_bfec0f01 : InstrItinClass;
+def tc_c4db48cb : InstrItinClass;
+def tc_c4f596e3 : InstrItinClass;
+def tc_c79a189f : InstrItinClass;
+def tc_c8ce0b5c : InstrItinClass;
+def tc_cd374165 : InstrItinClass;
+def tc_cf8126ae : InstrItinClass;
+def tc_cfd8378a : InstrItinClass;
+def tc_d08ee0f4 : InstrItinClass;
+def tc_d1aa9eaa : InstrItinClass;
+def tc_d2e63d61 : InstrItinClass;
+def tc_d5b7b0c1 : InstrItinClass;
+def tc_d5c0729a : InstrItinClass;
+def tc_d63f638c : InstrItinClass;
+def tc_d65dbf51 : InstrItinClass;
+def tc_d773585a : InstrItinClass;
+def tc_d9d43ecb : InstrItinClass;
+def tc_da4a37ed : InstrItinClass;
+def tc_da97ee82 : InstrItinClass;
+def tc_db2bce9c : InstrItinClass;
+def tc_de4df740 : InstrItinClass;
+def tc_de554571 : InstrItinClass;
+def tc_df3319ed : InstrItinClass;
+def tc_e06f432a : InstrItinClass;
+def tc_e4a7f9f0 : InstrItinClass;
+def tc_e4b3cb20 : InstrItinClass;
+def tc_e78647bd : InstrItinClass;
+def tc_e86aa961 : InstrItinClass;
+def tc_e93a3d71 : InstrItinClass;
+def tc_e95795ec : InstrItinClass;
+def tc_e9f3243f : InstrItinClass;
+def tc_f429765c : InstrItinClass;
+def tc_f675fee8 : InstrItinClass;
+def tc_f8e23f0b : InstrItinClass;
+def tc_f9058dd7 : InstrItinClass;
+def tc_fc3999b4 : InstrItinClass;
+def tc_fcc3ddf9 : InstrItinClass;
+def tc_fe211424 : InstrItinClass;
class DepScalarItinV5 {
list<InstrItinData> DepScalarItinV5_list = [
- InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_181af5d0, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3669266a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_367f7f3d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_36c68ad1, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_395dc00f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3bc2c5d3, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3cb8ea06, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3d04548d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3da80ba5, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3e07fb90, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_41d5298e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4403ca65, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_44126683, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_452f85af, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_481e5e5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_49eb22c8, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4ca572d4, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_4d9914c9, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_4d99bca9, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4f7cd700, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_513bef45, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_51b866be, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_681a2300, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_68cb12ce, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6aa5711a, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_7934b9df, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9c98e8af, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_9d5941c7, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_9ef61e5c, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_9faf76ae, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_9fdb5406, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a21dc435, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_a27582fa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a46f0df5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a788683e, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a8acdac0, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_a904d137, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_adb14c66, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_b9c0b731, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c5e2426d, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_c6aa82f7, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_c6ce9b3f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c6ebf8dd, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c74f796f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c82dc1ff, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_caaebcba, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cd7374a0, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_cde8b071, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cf47a43f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_cf59f215, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_d088982c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d1090e34, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_d24b2d85, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_d580173f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d6bf0472, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_d9709180, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_d9f95eef, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_fcab4871, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_ff9ee76e, [InstrStage<1, [SLOT0]>]> ];
+ InstrItinData <tc_002cb246, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0371abea, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_05c070ec, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_05d3a09b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0663f615, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_096199d3, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_0a705168, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_0ae0825c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0b2be201, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_0d8f5752, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_13bfbcf9, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_14b272fa, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_14b5c689, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_15aa71c5, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_174516e8, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_17e0d2cd, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_1a2fd869, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1ad90acd, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_1ae57e39, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1b6f7cec, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1c4528a2, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_1c80410a, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1d81e60e, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1fc97744, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_20cdee80, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_2332b92e, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_24b66c99, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_25a78932, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_2b8da4c2, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_2eabeebe, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_2f7c551d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_2ff964b4, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_30b9bb4a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_32779c6f, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_36153880, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_362c6592, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3962fa26, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_39dfefe8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3a867367, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_3b470976, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_3b5b7ef9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3bd75825, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_3c76b0ff, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3d495a39, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_40116ca8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_434c8e1e, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_4414d8b1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_44d3da28, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_4560740b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_4837eefb, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_49a8207d, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_4ae7b58b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_4b68bce4, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_4c5ba658, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_4d5fa3a1, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_53559e35, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_56336eb0, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_56f114f4, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_57890846, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_5a2711e5, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_5abb5e3f, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_5aee39f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_5b54b33f, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_5b7c0967, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_5bf126a6, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_5d7f5414, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_5ef37dc4, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_6132ba3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_61830035, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_640086b5, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_643b4717, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_67435e81, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_675e4897, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_679309b8, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_6b25e783, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_703e822c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_7186d325, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_7646c131, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_76851da1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_779080bf, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_784490da, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_785f65a7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_7a91e76a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_838b34ea, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_85c9c08f, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_85d5d03f, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_862b3e70, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_88b4f13d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_89e94ad3, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_8b121f4a, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_8b3e402a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_8c945be0, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_8c99de45, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_8d9d0154, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_8fb7ab1b, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9461ff31, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_946df596, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9ad9998f, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_9bfd761f, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9c3ecd83, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9ca930f7, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9da59d12, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9debc299, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9e313203, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9fc3dae0, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a1123dda, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a1c00888, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a58fd5cc, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a5d4aeec, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a6b1eca9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a813cf9a, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_a9d88b22, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_ae53734a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_b31c2e97, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_b343892a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b43e7930, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b4407292, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b44ecf75, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b4b5c03a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_b51dc29a, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_b83e6d73, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_b857bf4e, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b8bffe55, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_b90a29b1, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b9272d6c, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_b9e09e03, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_bab0eed9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_bafaade3, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_bcf98408, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_bd8382d1, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_bdceeac1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_be9602ff, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_bf061958, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_bfec0f01, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_c4db48cb, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_c4f596e3, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_c79a189f, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_c8ce0b5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_cd374165, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_cf8126ae, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_cfd8378a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d08ee0f4, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d1aa9eaa, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d2e63d61, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d5b7b0c1, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_d5c0729a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d63f638c, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d65dbf51, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d773585a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d9d43ecb, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_da4a37ed, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_da97ee82, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_db2bce9c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_de4df740, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_de554571, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_df3319ed, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_e06f432a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_e4a7f9f0, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_e4b3cb20, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_e78647bd, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_e86aa961, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_e93a3d71, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_e95795ec, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_e9f3243f, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_f429765c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f675fee8, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f8e23f0b, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_f9058dd7, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_fc3999b4, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_fcc3ddf9, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_fe211424, [InstrStage<1, [SLOT0]>]> ];
}
class DepScalarItinV55 {
list<InstrItinData> DepScalarItinV55_list = [
- InstrItinData <tc_00afc57e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_002cb246, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_05c070ec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_038a1342, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0663f615, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_04c9decc, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_10b97e27, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b5c689, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_15411484, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_181af5d0, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_1ad90acd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ InstrItinData <tc_1ae57e39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
-
- InstrItinData <tc_1b9c9ee5, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1b6f7cec, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c4528a2, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1e856f58, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_1c80410a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_234a11a5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_1d81e60e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29175780, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_20cdee80, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_2332b92e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b2f4060, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b6f77c6, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b8da4c2, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f185f5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2eabeebe, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2ff964b4, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_351fed2d, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3669266a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_32779c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_367f7f3d, /*tc_st*/
+ InstrItinData <tc_36153880, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_36c68ad1, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
- []>,
-
- InstrItinData <tc_395dc00f, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 3, 1],
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bc2c5d3, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3cb8ea06, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
- InstrItinData <tc_3d04548d, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 2],
+ InstrItinData <tc_3a867367, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3da80ba5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_3b470976, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3e07fb90, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3bd75825, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_3d495a39, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_452f85af, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_40116ca8, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_481e5e5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_434c8e1e, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49eb22c8, /*tc_1*/
+ InstrItinData <tc_4414d8b1, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ca572d4, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4560740b, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d99bca9, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
+ InstrItinData <tc_4837eefb, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4f7cd700, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_513bef45, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4ae7b58b, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_51b866be, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_523fcf30, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4d5fa3a1, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_52d7bbea, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_53559e35, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bc8a6a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_56336eb0, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_56f114f4, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_540fdfbc, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57288781, /*tc_st*/
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b54b33f, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b7c0967, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_59a01ead, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5d7f5414, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_6132ba3d, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5eb851fc, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 3, 2],
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5f6847a1, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ InstrItinData <tc_640086b5, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_643b4717, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63fe3df7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_675e4897, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6792d5ff, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_679309b8, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_681a2300, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_68cb12ce, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ebb4a12, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_76851da1, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6efc556e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_779080bf, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6fa4db47, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_73043bf4, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_746baa8e, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_74e47fd9, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_85c9c08f, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a830544, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_85d5d03f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_862b3e70, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_88b4f13d, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_84df2cd3, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b121f4a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_8b3e402a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8c945be0, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8c99de45, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_8d9d0154, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_855b0b61, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
+ InstrItinData <tc_9461ff31, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_87735c3b, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_897d1a9d, /*tc_1*/
+ InstrItinData <tc_946df596, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9ad9998f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_8fd5f294, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_9bfd761f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fe6b782, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9c3ecd83, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_90f3e30c, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_976ddc4f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_9da59d12, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97743097, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e313203, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_994333cd, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_999d32db, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_99be14ca, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c00ce8d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_a1c00888, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a58fd5cc, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9faf76ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a21dc435, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a27582fa, /*tc_2early*/
+ InstrItinData <tc_ae53734a, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a46f0df5, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_b31c2e97, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b343892a, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b13761ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b44c6e2a, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b51dc29a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_b8bffe55, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c4623f, /*tc_2*/
+ InstrItinData <tc_b9272d6c, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b9e09e03, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bde7aaf4, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be706f30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c2f7d806, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c5e2426d, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6aa82f7, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_bd8382d1, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ce9b3f, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ebf8dd, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_caaebcba, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_bfec0f01, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf47a43f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cd374165, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d088982c, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_cf8126ae, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_cfd8378a, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d24b2d85, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+ InstrItinData <tc_d08ee0f4, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d1aa9eaa, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d580173f, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ InstrItinData <tc_d63f638c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d65dbf51, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_daa058fa, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
+ InstrItinData <tc_d773585a, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d9d43ecb, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_dbdffe3d, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e0739b8c, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e99bfa, /*tc_2early*/
+ InstrItinData <tc_db2bce9c, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ InstrItinData <tc_de4df740, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_de554571, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7624c08, /*tc_3stall*/
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e06f432a, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e913dc32, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e4a7f9f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9c822f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9fae2d6, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_e78647bd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f2704b9a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3eaa14b, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e9f3243f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_f429765c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f49e76f4, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcab4871, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>
];
}
class DepScalarItinV60 {
list<InstrItinData> DepScalarItinV60_list = [
- InstrItinData <tc_00afc57e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_002cb246, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_05c070ec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_038a1342, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0663f615, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_04c9decc, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_10b97e27, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b5c689, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_15411484, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_181af5d0, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_1ad90acd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ InstrItinData <tc_1ae57e39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
-
- InstrItinData <tc_1b9c9ee5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1b6f7cec, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c4528a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1e856f58, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_1c80410a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_234a11a5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_1d81e60e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29175780, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_20cdee80, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_2332b92e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b2f4060, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b6f77c6, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f185f5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2eabeebe, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2ff964b4, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_351fed2d, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3669266a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_32779c6f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_367f7f3d, /*tc_st*/
+ InstrItinData <tc_36153880, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_36c68ad1, /*tc_ld*/
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [],
[]>,
- InstrItinData <tc_395dc00f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ InstrItinData <tc_3a867367, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3b470976, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3cb8ea06, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3bd75825, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_3d04548d, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3da80ba5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_3d495a39, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3e07fb90, /*tc_st*/
+ InstrItinData <tc_40116ca8, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_434c8e1e, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4414d8b1, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ InstrItinData <tc_4560740b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_4837eefb, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_452f85af, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_481e5e5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4ae7b58b, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_49eb22c8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ca572d4, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d99bca9, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_4f7cd700, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_53559e35, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_513bef45, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_56336eb0, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_51b866be, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_56f114f4, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_523fcf30, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_52d7bbea, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bc8a6a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_540fdfbc, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5b54b33f, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57288781, /*tc_st*/
+ InstrItinData <tc_5b7c0967, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_59a01ead, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5d7f5414, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_6132ba3d, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5eb851fc, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5f6847a1, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ InstrItinData <tc_640086b5, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_643b4717, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63fe3df7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_675e4897, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6792d5ff, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_679309b8, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_681a2300, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_68cb12ce, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ebb4a12, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_76851da1, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6efc556e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_779080bf, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6fa4db47, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_73043bf4, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_746baa8e, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_74e47fd9, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_85c9c08f, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a830544, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_85d5d03f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_862b3e70, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_88b4f13d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_84df2cd3, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b121f4a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_8b3e402a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8c99de45, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_855b0b61, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
+ InstrItinData <tc_9461ff31, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_87735c3b, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_897d1a9d, /*tc_1*/
+ InstrItinData <tc_946df596, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_8fd5f294, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fe6b782, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9c3ecd83, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_90f3e30c, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_976ddc4f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_9da59d12, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97743097, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e313203, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_994333cd, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_999d32db, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_99be14ca, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c00ce8d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_a1c00888, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a58fd5cc, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9faf76ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a21dc435, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a27582fa, /*tc_2early*/
+ InstrItinData <tc_ae53734a, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a46f0df5, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b31c2e97, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_b343892a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ InstrItinData <tc_b51dc29a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b8bffe55, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b13761ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
+ InstrItinData <tc_b9272d6c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_b9e09e03, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b44c6e2a, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c4623f, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_bfec0f01, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be706f30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_cd374165, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c2f7d806, /*tc_2*/
+ InstrItinData <tc_cf8126ae, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c5e2426d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_cfd8378a, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6aa82f7, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_d08ee0f4, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ce9b3f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d1aa9eaa, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ebf8dd, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
+ InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_caaebcba, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_d63f638c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d65dbf51, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_d773585a, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d9d43ecb, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf47a43f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_db2bce9c, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d088982c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_de4df740, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de554571, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d24b2d85, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+ InstrItinData <tc_e06f432a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e4a7f9f0, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d580173f, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ InstrItinData <tc_e78647bd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9f3243f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f429765c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_daa058fa, /*tc_3stall*/
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV62 {
+ list<InstrItinData> DepScalarItinV62_list = [
+ InstrItinData <tc_002cb246, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_dbdffe3d, /*tc_1*/
+ InstrItinData <tc_05c070ec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0663f615, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e0739b8c, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e99bfa, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7624c08, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
+ InstrItinData <tc_14b5c689, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e913dc32, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9c822f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_1ad90acd, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9fae2d6, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_1ae57e39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1b6f7cec, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_1c4528a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c80410a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1d81e60e, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f2704b9a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20cdee80, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3eaa14b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_2332b92e, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f49e76f4, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_2eabeebe, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_2ff964b4, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_32779c6f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcab4871, /*tc_newvjump*/
+ InstrItinData <tc_36153880, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
- [Hex_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
-class DepScalarItinV62 {
- list<InstrItinData> DepScalarItinV62_list = [
- InstrItinData <tc_00afc57e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_3a867367, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
+ InstrItinData <tc_3b470976, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3bd75825, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_038a1342, /*tc_4x*/
+ InstrItinData <tc_3d495a39, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40116ca8, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_434c8e1e, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4414d8b1, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4560740b, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_04c9decc, /*tc_3stall*/
+ InstrItinData <tc_4837eefb, /*tc_3stall*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4ae7b58b, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
+ InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_10b97e27, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_53559e35, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56336eb0, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56f114f4, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_15411484, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_5b54b33f, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_181af5d0, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_5b7c0967, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ InstrItinData <tc_5d7f5414, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_1b9c9ee5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6132ba3d, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1e856f58, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_640086b5, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_234a11a5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_643b4717, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_675e4897, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_679309b8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29175780, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b2f4060, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76851da1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b6f77c6, /*tc_2*/
+ InstrItinData <tc_779080bf, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f185f5c, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_351fed2d, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_85c9c08f, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3669266a, /*tc_2early*/
+ InstrItinData <tc_85d5d03f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_862b3e70, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_367f7f3d, /*tc_st*/
+ InstrItinData <tc_88b4f13d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b121f4a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_8b3e402a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8c99de45, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_36c68ad1, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
+ InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9461ff31, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946df596, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
[]>,
- InstrItinData <tc_395dc00f, /*tc_newvjump*/
+ InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c3ecd83, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9da59d12, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3cb8ea06, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
+ InstrItinData <tc_9e313203, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3d04548d, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3da80ba5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_3e07fb90, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a1c00888, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_a58fd5cc, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_452f85af, /*tc_1*/
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae53734a, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_481e5e5c, /*tc_2early*/
+ InstrItinData <tc_b31c2e97, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49eb22c8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_b343892a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ca572d4, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d99bca9, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4f7cd700, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b51dc29a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_513bef45, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_51b866be, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_523fcf30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b8bffe55, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_52d7bbea, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_b9272d6c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bc8a6a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_b9e09e03, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_540fdfbc, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57288781, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bfec0f01, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_59a01ead, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5eb851fc, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cd374165, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5f6847a1, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cf8126ae, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cfd8378a, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_d08ee0f4, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63fe3df7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6792d5ff, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_681a2300, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d63f638c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_68cb12ce, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d65dbf51, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d773585a, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d9d43ecb, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ebb4a12, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6efc556e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6fa4db47, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_db2bce9c, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_73043bf4, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
+ InstrItinData <tc_de4df740, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de554571, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_746baa8e, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_74e47fd9, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ InstrItinData <tc_e06f432a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e4a7f9f0, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_e78647bd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a830544, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_84df2cd3, /*tc_2*/
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9f3243f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f429765c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_855b0b61, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_87735c3b, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_897d1a9d, /*tc_2*/
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV65 {
+ list<InstrItinData> DepScalarItinV65_list = [
+ InstrItinData <tc_002cb246, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_05c070ec, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fd5f294, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0663f615, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fe6b782, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_90f3e30c, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_976ddc4f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_994333cd, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97743097, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b5c689, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_999d32db, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_99be14ca, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c00ce8d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_1ad90acd, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1ae57e39, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1b6f7cec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_9faf76ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_1c4528a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c80410a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a21dc435, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_1d81e60e, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a27582fa, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a46f0df5, /*tc_2early*/
+ InstrItinData <tc_20cdee80, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_2332b92e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b13761ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2eabeebe, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2ff964b4, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b44c6e2a, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_32779c6f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_36153880, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_3a867367, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3b470976, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c4623f, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3bd75825, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_3d495a39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be706f30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c2f7d806, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_40116ca8, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c5e2426d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_434c8e1e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6aa82f7, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_4414d8b1, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ce9b3f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ebf8dd, /*tc_3x*/
+ InstrItinData <tc_4560740b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4837eefb, /*tc_3stall*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_4ae7b58b, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
+ InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_caaebcba, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ InstrItinData <tc_53559e35, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_56336eb0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_56f114f4, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf47a43f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5b54b33f, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d088982c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_5b7c0967, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d24b2d85, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d580173f, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5d7f5414, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6132ba3d, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_daa058fa, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_640086b5, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_dbdffe3d, /*tc_1*/
+ InstrItinData <tc_643b4717, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e0739b8c, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_675e4897, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e99bfa, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_679309b8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7624c08, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_76851da1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ InstrItinData <tc_779080bf, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e913dc32, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9c822f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9fae2d6, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_85c9c08f, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_85d5d03f, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_862b3e70, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_f2704b9a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_88b4f13d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3eaa14b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_8b121f4a, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_8b3e402a, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_8c99de45, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f49e76f4, /*tc_2*/
+ InstrItinData <tc_9461ff31, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_946df596, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcab4871, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_9c3ecd83, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
- [Hex_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
-class DepScalarItinV65 {
- list<InstrItinData> DepScalarItinV65_list = [
- InstrItinData <tc_00afc57e, /*tc_2*/
+ InstrItinData <tc_9da59d12, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e313203, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a1c00888, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_038a1342, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a58fd5cc, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_04c9decc, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_ae53734a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_10b97e27, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b31c2e97, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ InstrItinData <tc_b343892a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_15411484, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_181af5d0, /*tc_1*/
+ InstrItinData <tc_b51dc29a, /*tc_1*/
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_1b9c9ee5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_b8bffe55, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b9272d6c, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b9e09e03, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1e856f58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_234a11a5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29175780, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bfec0f01, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b2f4060, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b6f77c6, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f185f5c, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_cd374165, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cf8126ae, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_351fed2d, /*tc_1*/
+ InstrItinData <tc_cfd8378a, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3669266a, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_367f7f3d, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_d08ee0f4, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_36c68ad1, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
- []>,
+ InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_395dc00f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_d5b7b0c1, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_3cb8ea06, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_3d04548d, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3da80ba5, /*tc_ld*/
+ InstrItinData <tc_d63f638c, /*tc_ld*/
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_3e07fb90, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ InstrItinData <tc_d65dbf51, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_d773585a, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d9d43ecb, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_452f85af, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_481e5e5c, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49eb22c8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_db2bce9c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de4df740, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ca572d4, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
+ InstrItinData <tc_de554571, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d99bca9, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ InstrItinData <tc_e06f432a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e4a7f9f0, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4f7cd700, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e78647bd, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_513bef45, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_51b866be, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_523fcf30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ InstrItinData <tc_e9f3243f, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_52d7bbea, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_f429765c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bc8a6a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV66 {
+ list<InstrItinData> DepScalarItinV66_list = [
+ InstrItinData <tc_002cb246, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_540fdfbc, /*tc_1*/
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_05c070ec, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0663f615, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57288781, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_59a01ead, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5eb851fc, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_14b5c689, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5f6847a1, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63fe3df7, /*tc_latepredldaia*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_1ad90acd, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1ae57e39, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6792d5ff, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_1b6f7cec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_1c4528a2, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c80410a, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_681a2300, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_1d81e60e, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_68cb12ce, /*tc_1*/
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20cdee80, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
+ InstrItinData <tc_2332b92e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ebb4a12, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6efc556e, /*tc_1*/
+ InstrItinData <tc_2eabeebe, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_6fa4db47, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_73043bf4, /*tc_1*/
- [InstrStage<1, [SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2ff964b4, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_746baa8e, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_32779c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_74e47fd9, /*tc_latepredstaia*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ InstrItinData <tc_36153880, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_3a867367, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a830544, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_3b470976, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_84df2cd3, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3bd75825, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_855b0b61, /*tc_1*/
+ InstrItinData <tc_3d495a39, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_87735c3b, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ InstrItinData <tc_40116ca8, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_897d1a9d, /*tc_2*/
+ InstrItinData <tc_434c8e1e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4414d8b1, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fd5f294, /*tc_3x*/
+ InstrItinData <tc_4560740b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4837eefb, /*tc_3stall*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fe6b782, /*tc_1*/
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_4ae7b58b, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_53559e35, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56336eb0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56f114f4, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_90f3e30c, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_976ddc4f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97743097, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2, 2],
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b54b33f, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b7c0967, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_994333cd, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_999d32db, /*tc_3stall*/
+ InstrItinData <tc_5d7f5414, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_99be14ca, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6132ba3d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c00ce8d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_640086b5, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_643b4717, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_675e4897, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_679309b8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9faf76ae, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76851da1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_779080bf, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a21dc435, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_85c9c08f, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a27582fa, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_85d5d03f, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_862b3e70, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a46f0df5, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_88b4f13d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8b121f4a, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8b3e402a, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b13761ae, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_8c99de45, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b44c6e2a, /*tc_2*/
+ InstrItinData <tc_9461ff31, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_946df596, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9c3ecd83, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c4623f, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ InstrItinData <tc_9da59d12, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9e313203, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be706f30, /*tc_1*/
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_a1c00888, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c2f7d806, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_a58fd5cc, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c5e2426d, /*tc_3stall*/
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6aa82f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ae53734a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_c6ce9b3f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_b31c2e97, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ebf8dd, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_b343892a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
+ InstrItinData <tc_b51dc29a, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_caaebcba, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b8bffe55, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_b9272d6c, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf47a43f, /*tc_ld*/
+ InstrItinData <tc_b9e09e03, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d088982c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d24b2d85, /*tc_latepredstaia*/
- [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d580173f, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ InstrItinData <tc_bfec0f01, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_daa058fa, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_dbdffe3d, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e0739b8c, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2, 2],
+ InstrItinData <tc_cd374165, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e99bfa, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_cf8126ae, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ InstrItinData <tc_cfd8378a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d08ee0f4, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7624c08, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d5b7b0c1, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e913dc32, /*tc_3x*/
+ InstrItinData <tc_d63f638c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d65dbf51, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d773585a, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9c822f7, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4],
- [Hex_FWD]>,
-
- InstrItinData <tc_e9fae2d6, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_d9d43ecb, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f2704b9a, /*tc_1*/
+ InstrItinData <tc_db2bce9c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de4df740, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de554571, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3eaa14b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e06f432a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
- InstrItinData <tc_f49e76f4, /*tc_2*/
+ InstrItinData <tc_e4a7f9f0, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_e78647bd, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9f3243f, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f429765c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcab4871, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>
];
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
index 7e06ccede6e7..81e3971e21d2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
@@ -1,4 +1,4 @@
-//===- HexagonDepITypes.h -------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,6 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
namespace llvm {
namespace HexagonII {
enum Type {
@@ -45,6 +44,7 @@ enum Type {
TypeCVI_VX = 29,
TypeCVI_VX_DV = 30,
TypeCVI_VX_LATE = 31,
+ TypeCVI_ZW = 32,
TypeDUPLEX = 33,
TypeENDLOOP = 34,
TypeEXTENDER = 35,
@@ -59,7 +59,7 @@ enum Type {
TypeS_2op = 44,
TypeS_3op = 45,
TypeV2LDST = 48,
- TypeV4LDST = 49
+ TypeV4LDST = 49,
};
}
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
index 0a385bf938fe..f694062a5232 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
@@ -1,4 +1,4 @@
-//===- HexagonDepITypes.td ------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,8 +9,7 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
-class IType<bits<6> t> { bits<6> Value = t; }
+class IType<bits<7> t> { bits<7> Value = t; }
def TypeALU32_2op : IType<0>;
def TypeALU32_3op : IType<1>;
def TypeALU32_ADDI : IType<2>;
@@ -43,6 +42,7 @@ def TypeCVI_VS_VX : IType<28>;
def TypeCVI_VX : IType<29>;
def TypeCVI_VX_DV : IType<30>;
def TypeCVI_VX_LATE : IType<31>;
+def TypeCVI_ZW : IType<32>;
def TypeDUPLEX : IType<33>;
def TypeENDLOOP : IType<34>;
def TypeEXTENDER : IType<35>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
index 9f98da3a1dee..ffe212ef9d97 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -1,4 +1,4 @@
-//===- HexagonDepInstrFormats.td ------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,6 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
class Enc_890909 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
@@ -61,14 +60,6 @@ class Enc_27b757 : OpcodeHexagon {
bits <5> Vs32;
let Inst{4-0} = Vs32{4-0};
}
-class Enc_8d04c3 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_1de724 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -87,12 +78,6 @@ class Enc_0e41fa : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
-class Enc_2a736a : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_3d6d37 : OpcodeHexagon {
bits <2> Qs4;
let Inst{6-5} = Qs4{1-0};
@@ -121,14 +106,6 @@ class Enc_802dc0 : OpcodeHexagon {
bits <2> Qv4;
let Inst{23-22} = Qv4{1-0};
}
-class Enc_6a4549 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_6b197f : OpcodeHexagon {
bits <4> Ii;
let Inst{8-5} = Ii{3-0};
@@ -137,22 +114,6 @@ class Enc_6b197f : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_1f3376 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vxx32;
- let Inst{7-3} = Vxx32{4-0};
-}
-class Enc_1f5d8f : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_51436c : OpcodeHexagon {
bits <16> Ii;
let Inst{23-22} = Ii{15-14};
@@ -249,6 +210,14 @@ class Enc_d7dc10 : OpcodeHexagon {
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
+class Enc_6baed4 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
class Enc_736575 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -291,14 +260,6 @@ class Enc_509701 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_c84567 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_830e5d : OpcodeHexagon {
bits <8> Ii;
let Inst{12-5} = Ii{7-0};
@@ -310,12 +271,6 @@ class Enc_830e5d : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_ae0040 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <6> Sd64;
- let Inst{5-0} = Sd64{5-0};
-}
class Enc_79b8c8 : OpcodeHexagon {
bits <6> Ii;
let Inst{6-3} = Ii{5-2};
@@ -336,16 +291,6 @@ class Enc_58a8bf : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_e8ddd5 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Vss32;
- let Inst{7-3} = Vss32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_041d7b : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -369,14 +314,6 @@ class Enc_f44229 : OpcodeHexagon {
bits <3> Nt8;
let Inst{10-8} = Nt8{2-0};
}
-class Enc_fc563d : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_aad80c : OpcodeHexagon {
bits <5> Vuu32;
let Inst{12-8} = Vuu32{4-0};
@@ -434,6 +371,14 @@ class Enc_ee5ed0 : OpcodeHexagon {
bits <2> n1;
let Inst{9-8} = n1{1-0};
}
+class Enc_bddee3 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vyyyy32;
+ let Inst{4-0} = Vyyyy32{4-0};
+ bits <3> Rx8;
+ let Inst{18-16} = Rx8{2-0};
+}
class Enc_935d9b : OpcodeHexagon {
bits <5> Ii;
let Inst{6-3} = Ii{4-1};
@@ -573,6 +518,14 @@ class Enc_27fd0e : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
+class Enc_d7bc34 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vyyyy32;
+ let Inst{4-0} = Vyyyy32{4-0};
+}
class Enc_93af4c : OpcodeHexagon {
bits <7> Ii;
let Inst{10-4} = Ii{6-0};
@@ -620,12 +573,6 @@ class Enc_14640c : OpcodeHexagon {
let Inst{24-22} = n1{3-1};
let Inst{13-13} = n1{0-0};
}
-class Enc_2516bf : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_31db33 : OpcodeHexagon {
bits <2> Qt4;
let Inst{6-5} = Qt4{1-0};
@@ -656,24 +603,6 @@ class Enc_784502 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_9a9d62 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_3a81ac : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_6413b6 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -703,13 +632,13 @@ class Enc_84bff1 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_74aef2 : OpcodeHexagon {
+class Enc_f4413a : OpcodeHexagon {
bits <4> Ii;
let Inst{8-5} = Ii{3-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
@@ -753,16 +682,6 @@ class Enc_e39bb2 : OpcodeHexagon {
bits <4> Rd16;
let Inst{3-0} = Rd16{3-0};
}
-class Enc_7db2f8 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{13-9} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{8-4} = Vv32{4-0};
- bits <4> Vdd16;
- let Inst{3-0} = Vdd16{3-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_1b64fb : OpcodeHexagon {
bits <16> Ii;
let Inst{26-25} = Ii{15-14};
@@ -772,6 +691,16 @@ class Enc_1b64fb : OpcodeHexagon {
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
}
+class Enc_c1d806 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <2> Qe4;
+ let Inst{6-5} = Qe4{1-0};
+}
class Enc_c6220b : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -841,10 +770,6 @@ class Enc_fcf7a7 : OpcodeHexagon {
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
-class Enc_2c3281 : OpcodeHexagon {
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_55355c : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -877,6 +802,16 @@ class Enc_6185fe : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
+class Enc_74aef2 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
class Enc_cd4705 : OpcodeHexagon {
bits <3> Ii;
let Inst{7-5} = Ii{2-0};
@@ -920,10 +855,6 @@ class Enc_fef969 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_b2ffce : OpcodeHexagon {
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_63eaeb : OpcodeHexagon {
bits <2> Ii;
let Inst{1-0} = Ii{1-0};
@@ -948,12 +879,6 @@ class Enc_372c9d : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_9e9047 : OpcodeHexagon {
- bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
-}
class Enc_4dff07 : OpcodeHexagon {
bits <2> Qv4;
let Inst{12-11} = Qv4{1-0};
@@ -1000,16 +925,6 @@ class Enc_b388cf : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_880793 : OpcodeHexagon {
- bits <3> Qt8;
- let Inst{2-0} = Qt8{2-0};
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_ad1c74 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -1086,14 +1001,6 @@ class Enc_88d4d9 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
}
-class Enc_c0cdde : OpcodeHexagon {
- bits <9> Ii;
- let Inst{13-5} = Ii{8-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
class Enc_226535 : OpcodeHexagon {
bits <8> Ii;
let Inst{12-7} = Ii{7-2};
@@ -1102,14 +1009,6 @@ class Enc_226535 : OpcodeHexagon {
bits <5> Rt32;
let Inst{4-0} = Rt32{4-0};
}
-class Enc_96f0fd : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
- bits <3> Qdd8;
- let Inst{2-0} = Qdd8{2-0};
-}
class Enc_31aa6a : OpcodeHexagon {
bits <5> Ii;
let Inst{6-3} = Ii{4-1};
@@ -1120,12 +1019,6 @@ class Enc_31aa6a : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_932b58 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
-}
class Enc_397f23 : OpcodeHexagon {
bits <8> Ii;
let Inst{13-13} = Ii{7-7};
@@ -1192,14 +1085,6 @@ class Enc_01d3d0 : OpcodeHexagon {
bits <5> Vdd32;
let Inst{4-0} = Vdd32{4-0};
}
-class Enc_3126d7 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_b0e9d8 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -1209,6 +1094,14 @@ class Enc_b0e9d8 : OpcodeHexagon {
bits <5> Rx32;
let Inst{4-0} = Rx32{4-0};
}
+class Enc_1bd127 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vdddd32;
+ let Inst{4-0} = Vdddd32{4-0};
+}
class Enc_3694bd : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -1276,12 +1169,6 @@ class Enc_88c16c : OpcodeHexagon {
bits <5> Rxx32;
let Inst{4-0} = Rxx32{4-0};
}
-class Enc_e7408c : OpcodeHexagon {
- bits <6> Sss64;
- let Inst{21-16} = Sss64{5-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
class Enc_770858 : OpcodeHexagon {
bits <2> Ps4;
let Inst{6-5} = Ps4{1-0};
@@ -1323,15 +1210,14 @@ class Enc_412ff0 : OpcodeHexagon {
bits <5> Rxx32;
let Inst{12-8} = Rxx32{4-0};
}
-class Enc_8e9fbd : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
+class Enc_ef601b : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
}
class Enc_c9a18e : OpcodeHexagon {
bits <11> Ii;
@@ -1356,19 +1242,6 @@ class Enc_e6abcf : OpcodeHexagon {
bits <5> Rtt32;
let Inst{12-8} = Rtt32{4-0};
}
-class Enc_6339d5 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <2> Pv4;
- let Inst{6-5} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Ru32;
- let Inst{12-8} = Ru32{4-0};
- bits <5> Rt32;
- let Inst{4-0} = Rt32{4-0};
-}
class Enc_d6990d : OpcodeHexagon {
bits <5> Vuu32;
let Inst{12-8} = Vuu32{4-0};
@@ -1377,16 +1250,6 @@ class Enc_d6990d : OpcodeHexagon {
bits <5> Vxx32;
let Inst{4-0} = Vxx32{4-0};
}
-class Enc_6c4697 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_6c9440 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -1445,15 +1308,13 @@ class Enc_9d1247 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_f4413a : OpcodeHexagon {
- bits <4> Ii;
- let Inst{8-5} = Ii{3-0};
- bits <2> Pt4;
- let Inst{10-9} = Pt4{1-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_7b7ba8 : OpcodeHexagon {
+ bits <2> Qu4;
+ let Inst{9-8} = Qu4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
class Enc_f7430e : OpcodeHexagon {
bits <4> Ii;
@@ -1531,12 +1392,6 @@ class Enc_a803e0 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
}
-class Enc_fde0e3 : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_45364e : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
@@ -1557,12 +1412,6 @@ class Enc_b909d2 : OpcodeHexagon {
let Inst{13-13} = n1{1-1};
let Inst{8-8} = n1{0-0};
}
-class Enc_790d6e : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_e6c957 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -1570,15 +1419,6 @@ class Enc_e6c957 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_fa3ba4 : OpcodeHexagon {
- bits <14> Ii;
- let Inst{26-25} = Ii{13-12};
- let Inst{13-5} = Ii{11-3};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
class Enc_0d8870 : OpcodeHexagon {
bits <12> Ii;
let Inst{26-25} = Ii{11-10};
@@ -1623,14 +1463,6 @@ class Enc_0ed752 : OpcodeHexagon {
bits <5> Cdd32;
let Inst{4-0} = Cdd32{4-0};
}
-class Enc_908985 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vss32;
- let Inst{7-3} = Vss32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_143445 : OpcodeHexagon {
bits <13> Ii;
let Inst{26-25} = Ii{12-11};
@@ -1658,16 +1490,6 @@ class Enc_3e3989 : OpcodeHexagon {
let Inst{25-22} = n1{4-1};
let Inst{8-8} = n1{0-0};
}
-class Enc_12dd8f : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
-}
class Enc_152467 : OpcodeHexagon {
bits <5> Ii;
let Inst{8-5} = Ii{4-1};
@@ -1676,22 +1498,23 @@ class Enc_152467 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_6b1bc4 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <3> Qt8;
- let Inst{10-8} = Qt8{2-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_daea09 : OpcodeHexagon {
- bits <17> Ii;
- let Inst{23-22} = Ii{16-15};
- let Inst{20-16} = Ii{14-10};
- let Inst{13-13} = Ii{9-9};
- let Inst{7-1} = Ii{8-2};
+class Enc_9ac432 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
bits <2> Pu4;
- let Inst{9-8} = Pu4{1-0};
+ let Inst{7-6} = Pu4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_a90628 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
}
class Enc_f37377 : OpcodeHexagon {
bits <8> Ii;
@@ -1712,12 +1535,6 @@ class Enc_a198f6 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_a265b7 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_4e4a80 : OpcodeHexagon {
bits <2> Qs4;
let Inst{6-5} = Qs4{1-0};
@@ -1728,16 +1545,6 @@ class Enc_4e4a80 : OpcodeHexagon {
bits <5> Vvv32;
let Inst{4-0} = Vvv32{4-0};
}
-class Enc_8d5d98 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vxx32;
- let Inst{7-3} = Vxx32{4-0};
-}
class Enc_3dac0b : OpcodeHexagon {
bits <2> Qt4;
let Inst{6-5} = Qt4{1-0};
@@ -1780,16 +1587,6 @@ class Enc_2df31d : OpcodeHexagon {
bits <4> Rd16;
let Inst{3-0} = Rd16{3-0};
}
-class Enc_b0e553 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_25bef0 : OpcodeHexagon {
bits <16> Ii;
let Inst{26-25} = Ii{15-14};
@@ -1905,10 +1702,14 @@ class Enc_bd1cbc : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_d0fe02 : OpcodeHexagon {
- bits <5> Rxx32;
- let Inst{20-16} = Rxx32{4-0};
- bits <0> sgp10;
+class Enc_c85e2a : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
class Enc_a30110 : OpcodeHexagon {
bits <5> Vu32;
@@ -1920,24 +1721,12 @@ class Enc_a30110 : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
-class Enc_f3f408 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
-}
-class Enc_ce4c54 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
+class Enc_33f8ba : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-8} = Ii{7-3};
+ let Inst{4-2} = Ii{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
class Enc_690862 : OpcodeHexagon {
bits <13> Ii;
@@ -1949,20 +1738,6 @@ class Enc_690862 : OpcodeHexagon {
bits <3> Nt8;
let Inst{10-8} = Nt8{2-0};
}
-class Enc_e570b0 : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_3c46e8 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_2a3787 : OpcodeHexagon {
bits <13> Ii;
let Inst{26-25} = Ii{12-11};
@@ -2010,22 +1785,6 @@ class Enc_729ff7 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_5883d0 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_ff0e49 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <6> Sdd64;
- let Inst{5-0} = Sdd64{5-0};
-}
class Enc_217147 : OpcodeHexagon {
bits <2> Qv4;
let Inst{23-22} = Qv4{1-0};
@@ -2060,14 +1819,6 @@ class Enc_541f26 : OpcodeHexagon {
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
}
-class Enc_9aae4a : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
- bits <3> Qd8;
- let Inst{2-0} = Qd8{2-0};
-}
class Enc_724154 : OpcodeHexagon {
bits <6> II;
let Inst{5-0} = II{5-0};
@@ -2114,16 +1865,6 @@ class Enc_b84c4c : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_9ac432 : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{17-16} = Ps4{1-0};
- bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
- bits <2> Pu4;
- let Inst{7-6} = Pu4{1-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
class Enc_8203bb : OpcodeHexagon {
bits <6> Ii;
let Inst{12-7} = Ii{5-0};
@@ -2228,12 +1969,6 @@ class Enc_96ce4f : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_2bbae6 : OpcodeHexagon {
- bits <6> Ss64;
- let Inst{21-16} = Ss64{5-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
class Enc_143a3c : OpcodeHexagon {
bits <6> Ii;
let Inst{13-8} = Ii{5-0};
@@ -2281,13 +2016,14 @@ class Enc_de0214 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_a90628 : OpcodeHexagon {
- bits <2> Qv4;
- let Inst{23-22} = Qv4{1-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+class Enc_daea09 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{23-22} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
}
class Enc_fda92c : OpcodeHexagon {
bits <17> Ii;
@@ -2365,26 +2101,6 @@ class Enc_b43b67 : OpcodeHexagon {
bits <2> Qx4;
let Inst{6-5} = Qx4{1-0};
}
-class Enc_1cd70f : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
-class Enc_3a527f : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_4aca3a : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -2403,12 +2119,6 @@ class Enc_b38ffc : OpcodeHexagon {
bits <4> Rt16;
let Inst{3-0} = Rt16{3-0};
}
-class Enc_5c3a80 : OpcodeHexagon {
- bits <3> Qt8;
- let Inst{10-8} = Qt8{2-0};
- bits <3> Qd8;
- let Inst{5-3} = Qd8{2-0};
-}
class Enc_cda00a : OpcodeHexagon {
bits <12> Ii;
let Inst{19-16} = Ii{11-8};
@@ -2426,24 +2136,6 @@ class Enc_2fbf3c : OpcodeHexagon {
bits <4> Rd16;
let Inst{3-0} = Rd16{3-0};
}
-class Enc_a4ae28 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Qd8;
- let Inst{5-3} = Qd8{2-0};
-}
-class Enc_dd5f9f : OpcodeHexagon {
- bits <3> Qtt8;
- let Inst{2-0} = Qtt8{2-0};
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_70b24b : OpcodeHexagon {
bits <6> Ii;
let Inst{8-5} = Ii{5-2};
@@ -2490,16 +2182,6 @@ class Enc_08d755 : OpcodeHexagon {
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
-class Enc_a7ca29 : OpcodeHexagon {
- bits <3> Qt8;
- let Inst{2-0} = Qt8{2-0};
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_1178da : OpcodeHexagon {
bits <3> Ii;
let Inst{7-5} = Ii{2-0};
@@ -2518,14 +2200,6 @@ class Enc_8dbe85 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_17a474 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_5a18b3 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -2586,14 +2260,6 @@ class Enc_12b6e9 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_9a895f : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_6f70ca : OpcodeHexagon {
bits <8> Ii;
let Inst{8-4} = Ii{7-3};
@@ -2605,12 +2271,7 @@ class Enc_7222b7 : OpcodeHexagon {
let Inst{1-0} = Qd4{1-0};
}
class Enc_e3b0c4 : OpcodeHexagon {
-}
-class Enc_d7e8ba : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
+
}
class Enc_a255dc : OpcodeHexagon {
bits <3> Ii;
@@ -2628,16 +2289,6 @@ class Enc_cb785b : OpcodeHexagon {
bits <5> Vdd32;
let Inst{4-0} = Vdd32{4-0};
}
-class Enc_5b76ab : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-8} = Ii{8-3};
- let Inst{2-0} = Ii{2-0};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_cb4b4e : OpcodeHexagon {
bits <2> Pu4;
let Inst{6-5} = Pu4{1-0};
@@ -2648,23 +2299,13 @@ class Enc_cb4b4e : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_fbacc2 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vxx32;
- let Inst{7-3} = Vxx32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
-}
-class Enc_2ad23d : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
+class Enc_1f5d8f : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
class Enc_9cdba7 : OpcodeHexagon {
bits <8> Ii;
@@ -2683,10 +2324,6 @@ class Enc_5cd7e9 : OpcodeHexagon {
bits <5> Ryy32;
let Inst{4-0} = Ryy32{4-0};
}
-class Enc_e7c9de : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
-}
class Enc_454a26 : OpcodeHexagon {
bits <2> Pt4;
let Inst{9-8} = Pt4{1-0};
@@ -2786,14 +2423,6 @@ class Enc_d2c7f1 : OpcodeHexagon {
bits <2> Pe4;
let Inst{6-5} = Pe4{1-0};
}
-class Enc_dcfcbb : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_3680c2 : OpcodeHexagon {
bits <7> Ii;
let Inst{11-5} = Ii{6-0};
@@ -2822,31 +2451,13 @@ class Enc_e957fb : OpcodeHexagon {
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
}
-class Enc_2146c1 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <3> Qss8;
- let Inst{2-0} = Qss8{2-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
-class Enc_a662ae : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_8f7cc3 : OpcodeHexagon {
- bits <3> Qtt8;
- let Inst{10-8} = Qtt8{2-0};
- bits <3> Qdd8;
- let Inst{5-3} = Qdd8{2-0};
+class Enc_c0cdde : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
class Enc_c9e3bc : OpcodeHexagon {
bits <4> Ii;
@@ -2886,33 +2497,18 @@ class Enc_6f83e7 : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
-class Enc_46f33d : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
-}
-class Enc_c1652e : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+class Enc_6339d5 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <3> Qd8;
- let Inst{5-3} = Qd8{2-0};
-}
-class Enc_b5b643 : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
-}
-class Enc_85daf5 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
+ let Inst{4-0} = Rt32{4-0};
}
class Enc_d483b9 : OpcodeHexagon {
bits <1> Ii;
@@ -2952,13 +2548,14 @@ class Enc_6c9ee0 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_72a92d : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vxx32;
- let Inst{7-3} = Vxx32{4-0};
+class Enc_fa3ba4 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{26-25} = Ii{13-12};
+ let Inst{13-5} = Ii{11-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
class Enc_44661f : OpcodeHexagon {
bits <1> Mu2;
@@ -3006,14 +2603,6 @@ class Enc_da664b : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_7b7ba8 : OpcodeHexagon {
- bits <2> Qu4;
- let Inst{9-8} = Qu4{1-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
-}
class Enc_47ee5e : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -3116,14 +2705,6 @@ class Enc_8e583a : OpcodeHexagon {
let Inst{25-23} = n1{3-1};
let Inst{13-13} = n1{0-0};
}
-class Enc_334c2b : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_b886fd : OpcodeHexagon {
bits <5> Ii;
let Inst{6-3} = Ii{4-1};
@@ -3177,36 +2758,12 @@ class Enc_8dbdfe : OpcodeHexagon {
bits <3> Nt8;
let Inst{10-8} = Nt8{2-0};
}
-class Enc_7dc746 : OpcodeHexagon {
- bits <3> Quu8;
- let Inst{10-8} = Quu8{2-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <3> Qdd8;
- let Inst{5-3} = Qdd8{2-0};
-}
class Enc_90cd8b : OpcodeHexagon {
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_b8513b : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_b3bac4 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_bd0b33 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -3216,16 +2773,6 @@ class Enc_bd0b33 : OpcodeHexagon {
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
-class Enc_843e80 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <3> Qxx8;
- let Inst{2-0} = Qxx8{2-0};
-}
class Enc_8b8927 : OpcodeHexagon {
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
@@ -3359,6 +2906,16 @@ class Enc_e07374 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
+class Enc_e0820b : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
class Enc_323f2d : OpcodeHexagon {
bits <6> II;
let Inst{11-8} = II{5-2};
@@ -3381,16 +2938,6 @@ class Enc_1a9974 : OpcodeHexagon {
bits <5> Rtt32;
let Inst{4-0} = Rtt32{4-0};
}
-class Enc_9ce456 : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-8} = Ii{8-3};
- let Inst{2-0} = Ii{2-0};
- bits <5> Vss32;
- let Inst{7-3} = Vss32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_5de85f : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -3416,14 +2963,6 @@ class Enc_0b51ce : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_b5e54d : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
class Enc_b4e6cf : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -3479,16 +3018,6 @@ class Enc_645d54 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_b5d5a7 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
-}
class Enc_667b39 : OpcodeHexagon {
bits <5> Css32;
let Inst{20-16} = Css32{4-0};
@@ -3511,6 +3040,14 @@ class Enc_163a3c : OpcodeHexagon {
bits <5> Rt32;
let Inst{4-0} = Rt32{4-0};
}
+class Enc_a75aa6 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+}
class Enc_b087ac : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
@@ -3519,6 +3056,14 @@ class Enc_b087ac : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
+class Enc_691712 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
class Enc_b1e1fb : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -3546,16 +3091,6 @@ class Enc_b8c967 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_f106e0 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{8-4} = Vv32{4-0};
- bits <5> Vt32;
- let Inst{13-9} = Vt32{4-0};
- bits <4> Vdd16;
- let Inst{3-0} = Vdd16{3-0};
-}
class Enc_fb6577 : OpcodeHexagon {
bits <2> Pu4;
let Inst{9-8} = Pu4{1-0};
@@ -3564,20 +3099,6 @@ class Enc_fb6577 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_37c406 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <4> Vdd16;
- let Inst{7-4} = Vdd16{3-0};
-}
-class Enc_403871 : OpcodeHexagon {
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_2bae10 : OpcodeHexagon {
bits <4> Ii;
let Inst{10-8} = Ii{3-1};
@@ -3586,22 +3107,6 @@ class Enc_2bae10 : OpcodeHexagon {
bits <4> Rd16;
let Inst{3-0} = Rd16{3-0};
}
-class Enc_f3adb6 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_aac08c : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
-}
class Enc_c4dc92 : OpcodeHexagon {
bits <2> Qv4;
let Inst{23-22} = Qv4{1-0};
@@ -3743,12 +3248,14 @@ class Enc_134437 : OpcodeHexagon {
bits <2> Qd4;
let Inst{1-0} = Qd4{1-0};
}
-class Enc_33f8ba : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-8} = Ii{7-3};
- let Inst{4-2} = Ii{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_f3f408 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
class Enc_97d666 : OpcodeHexagon {
bits <4> Rs16;
@@ -3766,16 +3273,6 @@ class Enc_f82eaf : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_57e245 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
-}
class Enc_69d63b : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -3842,24 +3339,6 @@ class Enc_7eaeb6 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_274a4c : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
-}
-class Enc_aceeef : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_f55a0c : OpcodeHexagon {
bits <6> Ii;
let Inst{11-8} = Ii{5-2};
@@ -3898,16 +3377,6 @@ class Enc_7b523d : OpcodeHexagon {
bits <5> Vxx32;
let Inst{4-0} = Vxx32{4-0};
}
-class Enc_c39a8b : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vss32;
- let Inst{7-3} = Vss32{4-0};
-}
class Enc_47ef61 : OpcodeHexagon {
bits <3> Ii;
let Inst{7-5} = Ii{2-0};
@@ -4006,6 +3475,14 @@ class Enc_a6ce9c : OpcodeHexagon {
bits <4> Rs16;
let Inst{7-4} = Rs16{3-0};
}
+class Enc_3b7631 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vdddd32;
+ let Inst{4-0} = Vdddd32{4-0};
+ bits <3> Rx8;
+ let Inst{18-16} = Rx8{2-0};
+}
class Enc_eca7c8 : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -4017,16 +3494,6 @@ class Enc_eca7c8 : OpcodeHexagon {
bits <5> Rt32;
let Inst{4-0} = Rt32{4-0};
}
-class Enc_598f6c : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
-}
-class Enc_41dcc3 : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_4b39e4 : OpcodeHexagon {
bits <3> Ii;
let Inst{7-5} = Ii{2-0};
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index b6824fa33106..3ef1c49eb7ee 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -1,4 +1,4 @@
-//===- HexagonDepInstrInfo.td ---------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,12 +9,11 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
def A2_abs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = abs($Rs32)",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -25,7 +24,7 @@ def A2_absp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = abs($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000100;
let prefersSlot3 = 1;
@@ -34,7 +33,7 @@ def A2_abssat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = abs($Rs32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -46,7 +45,7 @@ def A2_add : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011000;
@@ -62,7 +61,7 @@ def A2_addh_h16_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -74,7 +73,7 @@ def A2_addh_h16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -86,7 +85,7 @@ def A2_addh_h16_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -98,7 +97,7 @@ def A2_addh_h16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -110,7 +109,7 @@ def A2_addh_h16_sat_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -123,7 +122,7 @@ def A2_addh_h16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -136,7 +135,7 @@ def A2_addh_h16_sat_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -149,7 +148,7 @@ def A2_addh_h16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -162,7 +161,7 @@ def A2_addh_l16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -174,7 +173,7 @@ def A2_addh_l16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -186,7 +185,7 @@ def A2_addh_l16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -199,7 +198,7 @@ def A2_addh_l16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -212,7 +211,7 @@ def A2_addi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = add($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel {
let Inst{31-28} = 0b1011;
let hasNewValue = 1;
let opNewValue = 0;
@@ -231,7 +230,7 @@ def A2_addp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -242,7 +241,7 @@ def A2_addpsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -254,7 +253,7 @@ def A2_addsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110010;
@@ -269,14 +268,14 @@ def A2_addsp : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rs32,$Rtt32)",
-tc_897d1a9d, TypeALU64> {
+tc_679309b8, TypeALU64> {
let isPseudo = 1;
}
def A2_addsph : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):raw:hi",
-tc_897d1a9d, TypeALU64>, Enc_a56825 {
+tc_679309b8, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -286,7 +285,7 @@ def A2_addspl : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):raw:lo",
-tc_897d1a9d, TypeALU64>, Enc_a56825 {
+tc_679309b8, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -296,7 +295,7 @@ def A2_and : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = and($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001000;
@@ -312,7 +311,7 @@ def A2_andir : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = and($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, ImmRegRel {
let Inst{31-22} = 0b0111011000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -328,7 +327,7 @@ def A2_andp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = and($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -338,7 +337,7 @@ def A2_aslh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = aslh($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000000;
let hasNewValue = 1;
@@ -350,7 +349,7 @@ def A2_asrh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = asrh($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000001;
let hasNewValue = 1;
@@ -362,7 +361,7 @@ def A2_combine_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.h,$Rs32.h)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011100;
@@ -374,7 +373,7 @@ def A2_combine_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.h,$Rs32.l)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011101;
@@ -386,7 +385,7 @@ def A2_combine_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.l,$Rs32.h)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011110;
@@ -398,7 +397,7 @@ def A2_combine_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.l,$Rs32.l)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011111;
@@ -410,7 +409,7 @@ def A2_combineii : HInst<
(outs DoubleRegs:$Rdd32),
(ins s32_0Imm:$Ii, s8_0Imm:$II),
"$Rdd32 = combine(#$Ii,#$II)",
-tc_b9488031, TypeALU32_2op>, Enc_18c338 {
+tc_5a2711e5, TypeALU32_2op>, Enc_18c338 {
let Inst{31-23} = 0b011111000;
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
@@ -425,7 +424,7 @@ def A2_combinew : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = combine($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_be32a5, PredNewRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_be32a5, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110101000;
@@ -437,7 +436,7 @@ def A2_max : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = max($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101110;
@@ -449,7 +448,7 @@ def A2_maxp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = max($Rss32,$Rtt32)",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -459,7 +458,7 @@ def A2_maxu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = maxu($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101110;
@@ -471,7 +470,7 @@ def A2_maxup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = maxu($Rss32,$Rtt32)",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -481,7 +480,7 @@ def A2_min : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = min($Rt32,$Rs32)",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101101;
@@ -493,7 +492,7 @@ def A2_minp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = min($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -503,7 +502,7 @@ def A2_minu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = minu($Rt32,$Rs32)",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101101;
@@ -515,7 +514,7 @@ def A2_minup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = minu($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -525,7 +524,7 @@ def A2_neg : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = neg($Rs32)",
-tc_68cb12ce, TypeALU32_2op> {
+tc_57890846, TypeALU32_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -535,7 +534,7 @@ def A2_negp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = neg($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000100;
}
@@ -543,7 +542,7 @@ def A2_negsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = neg($Rs32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -555,7 +554,7 @@ def A2_nop : HInst<
(outs),
(ins),
"nop",
-tc_6efc556e, TypeALU32_2op>, Enc_e3b0c4 {
+tc_2eabeebe, TypeALU32_2op>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b0111111100000000;
}
@@ -563,7 +562,7 @@ def A2_not : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = not($Rs32)",
-tc_68cb12ce, TypeALU32_2op> {
+tc_57890846, TypeALU32_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -573,7 +572,7 @@ def A2_notp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = not($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000100;
}
@@ -581,7 +580,7 @@ def A2_or : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = or($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001001;
@@ -597,7 +596,7 @@ def A2_orir : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = or($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, ImmRegRel {
let Inst{31-22} = 0b0111011010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -613,7 +612,7 @@ def A2_orp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = or($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -623,7 +622,7 @@ def A2_paddf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = add($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011000;
@@ -639,7 +638,7 @@ def A2_paddfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = add($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011000;
@@ -656,7 +655,7 @@ def A2_paddif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if (!$Pu4) $Rd32 = add($Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011101001;
let isPredicated = 1;
@@ -676,7 +675,7 @@ def A2_paddifnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if (!$Pu4.new) $Rd32 = add($Rs32,#$Ii)",
-tc_2b2f4060, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-23} = 0b011101001;
let isPredicated = 1;
@@ -697,7 +696,7 @@ def A2_paddit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if ($Pu4) $Rd32 = add($Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011101000;
let isPredicated = 1;
@@ -716,7 +715,7 @@ def A2_padditnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if ($Pu4.new) $Rd32 = add($Rs32,#$Ii)",
-tc_2b2f4060, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-23} = 0b011101000;
let isPredicated = 1;
@@ -736,7 +735,7 @@ def A2_paddt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = add($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011000;
@@ -751,7 +750,7 @@ def A2_paddtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = add($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011000;
@@ -767,7 +766,7 @@ def A2_pandf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = and($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001000;
@@ -781,7 +780,7 @@ def A2_pandfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = and($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001000;
@@ -796,7 +795,7 @@ def A2_pandt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = and($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001000;
@@ -809,7 +808,7 @@ def A2_pandtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = and($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001000;
@@ -823,7 +822,7 @@ def A2_porf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = or($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001001;
@@ -837,7 +836,7 @@ def A2_porfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = or($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001001;
@@ -852,7 +851,7 @@ def A2_port : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = or($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001001;
@@ -865,7 +864,7 @@ def A2_portnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = or($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001001;
@@ -879,7 +878,7 @@ def A2_psubf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sub($Rt32,$Rs32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011001;
@@ -893,7 +892,7 @@ def A2_psubfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011001;
@@ -908,7 +907,7 @@ def A2_psubt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sub($Rt32,$Rs32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011001;
@@ -921,7 +920,7 @@ def A2_psubtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011001;
@@ -935,7 +934,7 @@ def A2_pxorf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = xor($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001011;
@@ -949,7 +948,7 @@ def A2_pxorfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001011;
@@ -964,7 +963,7 @@ def A2_pxort : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = xor($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001011;
@@ -977,7 +976,7 @@ def A2_pxortnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001011;
@@ -991,7 +990,7 @@ def A2_roundsat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = round($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_cf8126ae, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000110;
let hasNewValue = 1;
@@ -1003,7 +1002,7 @@ def A2_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = sat($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000110;
let hasNewValue = 1;
@@ -1014,7 +1013,7 @@ def A2_satb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1025,7 +1024,7 @@ def A2_sath : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sath($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1036,7 +1035,7 @@ def A2_satub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satub($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1047,7 +1046,7 @@ def A2_satuh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satuh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1058,7 +1057,7 @@ def A2_sub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011001;
@@ -1073,7 +1072,7 @@ def A2_subh_h16_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1085,7 +1084,7 @@ def A2_subh_h16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1097,7 +1096,7 @@ def A2_subh_h16_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1109,7 +1108,7 @@ def A2_subh_h16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1121,7 +1120,7 @@ def A2_subh_h16_sat_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1134,7 +1133,7 @@ def A2_subh_h16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1147,7 +1146,7 @@ def A2_subh_h16_sat_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1160,7 +1159,7 @@ def A2_subh_h16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1173,7 +1172,7 @@ def A2_subh_l16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1185,7 +1184,7 @@ def A2_subh_l16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1197,7 +1196,7 @@ def A2_subh_l16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1210,7 +1209,7 @@ def A2_subh_l16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1223,7 +1222,7 @@ def A2_subp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = sub($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -1232,7 +1231,7 @@ def A2_subri : HInst<
(outs IntRegs:$Rd32),
(ins s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = sub(#$Ii,$Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel {
let Inst{31-22} = 0b0111011001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -1248,7 +1247,7 @@ def A2_subsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110110;
@@ -1262,7 +1261,7 @@ def A2_svaddh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vaddh($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110000;
@@ -1275,7 +1274,7 @@ def A2_svaddhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vaddh($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110001;
@@ -1290,7 +1289,7 @@ def A2_svadduhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vadduh($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110011;
@@ -1305,12 +1304,13 @@ def A2_svavgh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vavgh($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be {
+tc_1c80410a, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111000;
let hasNewValue = 1;
let opNewValue = 0;
+let prefersSlot3 = 1;
let InputType = "reg";
let isCommutable = 1;
}
@@ -1318,12 +1318,13 @@ def A2_svavghs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vavgh($Rs32,$Rt32):rnd",
-tc_8fe6b782, TypeALU32_3op>, Enc_5ab2be {
+tc_d08ee0f4, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111001;
let hasNewValue = 1;
let opNewValue = 0;
+let prefersSlot3 = 1;
let InputType = "reg";
let isCommutable = 1;
}
@@ -1331,19 +1332,20 @@ def A2_svnavgh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vnavgh($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_1c80410a, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111011;
let hasNewValue = 1;
let opNewValue = 0;
+let prefersSlot3 = 1;
let InputType = "reg";
}
def A2_svsubh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubh($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110100;
@@ -1355,7 +1357,7 @@ def A2_svsubhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubh($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110101;
@@ -1369,7 +1371,7 @@ def A2_svsubuhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubuh($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110111;
@@ -1383,7 +1385,7 @@ def A2_swiz : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = swiz($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -1393,7 +1395,7 @@ def A2_sxtb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sxtb($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000101;
let hasNewValue = 1;
@@ -1405,7 +1407,7 @@ def A2_sxth : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sxth($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000111;
let hasNewValue = 1;
@@ -1417,7 +1419,7 @@ def A2_sxtw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = sxtw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100010;
}
@@ -1425,7 +1427,7 @@ def A2_tfr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = $Rs32",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000011;
let hasNewValue = 1;
@@ -1438,7 +1440,7 @@ def A2_tfrcrr : HInst<
(outs IntRegs:$Rd32),
(ins CtrRegs:$Cs32),
"$Rd32 = $Cs32",
-tc_29175780, TypeCR>, Enc_0cb018 {
+tc_b9272d6c, TypeCR>, Enc_0cb018 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101010000;
let hasNewValue = 1;
@@ -1448,7 +1450,7 @@ def A2_tfrf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = $Rs32",
-tc_d6bf0472, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
@@ -1463,7 +1465,7 @@ def A2_tfrfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = $Rs32",
-tc_2b2f4060, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
@@ -1479,7 +1481,7 @@ def A2_tfrih : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
"$Rx32.h = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_51436c {
+tc_5a2711e5, TypeALU32_2op>, Enc_51436c {
let Inst{21-21} = 0b1;
let Inst{31-24} = 0b01110010;
let hasNewValue = 1;
@@ -1490,7 +1492,7 @@ def A2_tfril : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
"$Rx32.l = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_51436c {
+tc_5a2711e5, TypeALU32_2op>, Enc_51436c {
let Inst{21-21} = 0b1;
let Inst{31-24} = 0b01110001;
let hasNewValue = 1;
@@ -1501,7 +1503,7 @@ def A2_tfrp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
let BaseOpcode = "A2_tfrp";
let isPredicable = 1;
let isPseudo = 1;
@@ -1510,7 +1512,7 @@ def A2_tfrpf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if (!$Pu4) $Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let BaseOpcode = "A2_tfrp";
@@ -1520,7 +1522,7 @@ def A2_tfrpfnew : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if (!$Pu4.new) $Rdd32 = $Rss32",
-tc_5f6847a1, TypeALU32_2op>, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let isPredicatedNew = 1;
@@ -1531,7 +1533,7 @@ def A2_tfrpi : HInst<
(outs DoubleRegs:$Rdd32),
(ins s8_0Imm:$Ii),
"$Rdd32 = #$Ii",
-tc_b9488031, TypeALU64> {
+tc_5a2711e5, TypeALU64> {
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
let isMoveImm = 1;
@@ -1541,7 +1543,7 @@ def A2_tfrpt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if ($Pu4) $Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let BaseOpcode = "A2_tfrp";
let isPseudo = 1;
@@ -1550,7 +1552,7 @@ def A2_tfrptnew : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if ($Pu4.new) $Rdd32 = $Rss32",
-tc_5f6847a1, TypeALU32_2op>, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedNew = 1;
let BaseOpcode = "A2_tfrp";
@@ -1560,7 +1562,7 @@ def A2_tfrrcr : HInst<
(outs CtrRegs:$Cd32),
(ins IntRegs:$Rs32),
"$Cd32 = $Rs32",
-tc_a21dc435, TypeCR>, Enc_bd811a {
+tc_434c8e1e, TypeCR>, Enc_bd811a {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100010001;
let hasNewValue = 1;
@@ -1570,7 +1572,7 @@ def A2_tfrsi : HInst<
(outs IntRegs:$Rd32),
(ins s32_0Imm:$Ii),
"$Rd32 = #$Ii",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel {
+tc_57890846, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel {
let Inst{21-21} = 0b0;
let Inst{31-24} = 0b01111000;
let hasNewValue = 1;
@@ -1592,7 +1594,7 @@ def A2_tfrt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = $Rs32",
-tc_d6bf0472, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -1606,7 +1608,7 @@ def A2_tfrtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = $Rs32",
-tc_2b2f4060, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -1621,7 +1623,7 @@ def A2_vabsh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsh($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1630,7 +1632,7 @@ def A2_vabshsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsh($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1640,7 +1642,7 @@ def A2_vabsw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsw($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1649,7 +1651,7 @@ def A2_vabswsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsw($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1659,7 +1661,7 @@ def A2_vaddb_map : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeMAPPING> {
+tc_946df596, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -1667,7 +1669,7 @@ def A2_vaddh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1676,7 +1678,7 @@ def A2_vaddhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1687,7 +1689,7 @@ def A2_vaddub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddub($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1696,7 +1698,7 @@ def A2_vaddubs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddub($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1707,7 +1709,7 @@ def A2_vadduhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vadduh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1718,7 +1720,7 @@ def A2_vaddw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1727,7 +1729,7 @@ def A2_vaddws : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1738,16 +1740,17 @@ def A2_vavgh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavghcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32):crnd",
-tc_2b6f77c6, TypeALU64>, Enc_a56825 {
+tc_002cb246, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1757,79 +1760,87 @@ def A2_vavghr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavgub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgub($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavgubr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgub($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavguh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavguhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguh($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavguw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
}
def A2_vavguwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguw($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
}
def A2_vavgw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
}
def A2_vavgwcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32):crnd",
-tc_2b6f77c6, TypeALU64>, Enc_a56825 {
+tc_002cb246, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -1839,16 +1850,17 @@ def A2_vavgwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
}
def A2_vcmpbeq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b110000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1857,7 +1869,7 @@ def A2_vcmpbgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b111000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1866,7 +1878,7 @@ def A2_vcmpheq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1875,7 +1887,7 @@ def A2_vcmphgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1884,7 +1896,7 @@ def A2_vcmphgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1893,7 +1905,7 @@ def A2_vcmpweq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1902,7 +1914,7 @@ def A2_vcmpwgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1911,7 +1923,7 @@ def A2_vcmpwgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1920,7 +1932,7 @@ def A2_vconj : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vconj($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000100;
let prefersSlot3 = 1;
@@ -1930,7 +1942,7 @@ def A2_vmaxb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxb($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1940,7 +1952,7 @@ def A2_vmaxh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1950,7 +1962,7 @@ def A2_vmaxub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxub($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1960,7 +1972,7 @@ def A2_vmaxuh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxuh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1970,7 +1982,7 @@ def A2_vmaxuw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxuw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -1980,7 +1992,7 @@ def A2_vmaxw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1990,7 +2002,7 @@ def A2_vminb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminb($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -2000,7 +2012,7 @@ def A2_vminh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2010,7 +2022,7 @@ def A2_vminub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminub($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2020,7 +2032,7 @@ def A2_vminuh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminuh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2030,7 +2042,7 @@ def A2_vminuw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminuw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2040,7 +2052,7 @@ def A2_vminw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2050,16 +2062,17 @@ def A2_vnavgh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_6132ba3d, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
}
def A2_vnavghcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32):crnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2070,7 +2083,7 @@ def A2_vnavghr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32):rnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2081,16 +2094,17 @@ def A2_vnavgw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_6132ba3d, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
}
def A2_vnavgwcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32):crnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2101,7 +2115,7 @@ def A2_vnavgwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32):rnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2112,7 +2126,7 @@ def A2_vraddub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vraddub($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -2122,7 +2136,7 @@ def A2_vraddub_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vraddub($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -2133,7 +2147,7 @@ def A2_vrsadub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrsadub($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -2143,7 +2157,7 @@ def A2_vrsadub_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrsadub($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -2154,7 +2168,7 @@ def A2_vsubb_map : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vsubb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeMAPPING> {
+tc_946df596, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -2162,7 +2176,7 @@ def A2_vsubh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2171,7 +2185,7 @@ def A2_vsubhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubh($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2182,7 +2196,7 @@ def A2_vsubub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubub($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2191,7 +2205,7 @@ def A2_vsububs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubub($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2202,7 +2216,7 @@ def A2_vsubuhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubuh($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2213,7 +2227,7 @@ def A2_vsubw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubw($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2222,7 +2236,7 @@ def A2_vsubws : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubw($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2233,7 +2247,7 @@ def A2_xor : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = xor($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001011;
@@ -2248,7 +2262,7 @@ def A2_xorp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = xor($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2258,7 +2272,7 @@ def A2_zxtb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
let hasNewValue = 1;
let opNewValue = 0;
let BaseOpcode = "A2_zxtb";
@@ -2270,7 +2284,7 @@ def A2_zxth : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = zxth($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000110;
let hasNewValue = 1;
@@ -2282,7 +2296,7 @@ def A4_addp_c : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
"$Rdd32 = add($Rss32,$Rtt32,$Px4):carry",
-tc_523fcf30, TypeS_3op>, Enc_2b3f60 {
+tc_9c3ecd83, TypeS_3op>, Enc_2b3f60 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010110;
@@ -2293,7 +2307,7 @@ def A4_andn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = and($Rt32,~$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001100;
@@ -2305,7 +2319,7 @@ def A4_andnp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = and($Rtt32,~$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2314,7 +2328,7 @@ def A4_bitsplit : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = bitsplit($Rs32,$Rt32)",
-tc_1b9c9ee5, TypeALU64>, Enc_be32a5 {
+tc_4414d8b1, TypeALU64>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010100001;
@@ -2324,7 +2338,7 @@ def A4_bitspliti : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rdd32 = bitsplit($Rs32,#$Ii)",
-tc_1b9c9ee5, TypeS_2op>, Enc_311abd {
+tc_4414d8b1, TypeS_2op>, Enc_311abd {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001000110;
@@ -2334,14 +2348,14 @@ def A4_boundscheck : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rs32,$Rtt32)",
-tc_1e856f58, TypeALU64> {
+tc_85d5d03f, TypeALU64> {
let isPseudo = 1;
}
def A4_boundscheck_hi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rss32,$Rtt32):raw:hi",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -2350,7 +2364,7 @@ def A4_boundscheck_lo : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rss32,$Rtt32):raw:lo",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -2359,7 +2373,7 @@ def A4_cmpbeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b110000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2372,7 +2386,7 @@ def A4_cmpbeqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Pd4 = cmpb.eq($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101000;
@@ -2385,7 +2399,7 @@ def A4_cmpbgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2397,7 +2411,7 @@ def A4_cmpbgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s8_0Imm:$Ii),
"$Pd4 = cmpb.gt($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101001;
@@ -2409,7 +2423,7 @@ def A4_cmpbgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.gtu($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b111000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2421,7 +2435,7 @@ def A4_cmpbgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmpb.gtu($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_02553a, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_02553a, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011101010;
@@ -2438,7 +2452,7 @@ def A4_cmpheq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2451,7 +2465,7 @@ def A4_cmpheqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmph.eq($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101000;
@@ -2469,7 +2483,7 @@ def A4_cmphgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2481,7 +2495,7 @@ def A4_cmphgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmph.gt($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101001;
@@ -2498,7 +2512,7 @@ def A4_cmphgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.gtu($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2510,7 +2524,7 @@ def A4_cmphgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmph.gtu($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_02553a, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_02553a, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011101010;
@@ -2527,7 +2541,7 @@ def A4_combineii : HInst<
(outs DoubleRegs:$Rdd32),
(ins s8_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = combine(#$Ii,#$II)",
-tc_b9488031, TypeALU32_2op>, Enc_f0cca7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_f0cca7 {
let Inst{31-21} = 0b01111100100;
let isExtendable = 1;
let opExtendable = 2;
@@ -2539,7 +2553,7 @@ def A4_combineir : HInst<
(outs DoubleRegs:$Rdd32),
(ins s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rdd32 = combine(#$Ii,$Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_9cdba7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_9cdba7 {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011001;
let isExtendable = 1;
@@ -2552,7 +2566,7 @@ def A4_combineri : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rdd32 = combine($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_9cdba7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_9cdba7 {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011000;
let isExtendable = 1;
@@ -2565,7 +2579,7 @@ def A4_cround_ri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = cround($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -2577,7 +2591,7 @@ def A4_cround_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cround($Rs32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -2589,14 +2603,14 @@ def A4_ext : HInst<
(outs),
(ins u26_6Imm:$Ii),
"immext(#$Ii)",
-tc_452f85af, TypeEXTENDER>, Enc_2b518f {
+tc_862b3e70, TypeEXTENDER>, Enc_2b518f {
let Inst{31-28} = 0b0000;
}
def A4_modwrapu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = modwrap($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2608,7 +2622,7 @@ def A4_orn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = or($Rt32,~$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001101;
@@ -2620,7 +2634,7 @@ def A4_ornp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = or($Rtt32,~$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2629,7 +2643,7 @@ def A4_paslhf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = aslh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000000;
@@ -2643,7 +2657,7 @@ def A4_paslhfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = aslh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000000;
@@ -2658,7 +2672,7 @@ def A4_paslht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = aslh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000000;
@@ -2671,7 +2685,7 @@ def A4_paslhtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = aslh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000000;
@@ -2685,7 +2699,7 @@ def A4_pasrhf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = asrh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000001;
@@ -2699,7 +2713,7 @@ def A4_pasrhfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = asrh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000001;
@@ -2714,7 +2728,7 @@ def A4_pasrht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = asrh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000001;
@@ -2727,7 +2741,7 @@ def A4_pasrhtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = asrh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000001;
@@ -2741,7 +2755,7 @@ def A4_psxtbf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000101;
@@ -2755,7 +2769,7 @@ def A4_psxtbfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000101;
@@ -2770,7 +2784,7 @@ def A4_psxtbt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000101;
@@ -2783,7 +2797,7 @@ def A4_psxtbtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000101;
@@ -2797,7 +2811,7 @@ def A4_psxthf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000111;
@@ -2811,7 +2825,7 @@ def A4_psxthfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000111;
@@ -2826,7 +2840,7 @@ def A4_psxtht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000111;
@@ -2839,7 +2853,7 @@ def A4_psxthtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000111;
@@ -2853,7 +2867,7 @@ def A4_pzxtbf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000100;
@@ -2867,7 +2881,7 @@ def A4_pzxtbfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = zxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000100;
@@ -2882,7 +2896,7 @@ def A4_pzxtbt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000100;
@@ -2895,7 +2909,7 @@ def A4_pzxtbtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = zxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000100;
@@ -2909,7 +2923,7 @@ def A4_pzxthf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = zxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000110;
@@ -2923,7 +2937,7 @@ def A4_pzxthfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = zxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000110;
@@ -2938,7 +2952,7 @@ def A4_pzxtht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = zxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000110;
@@ -2951,7 +2965,7 @@ def A4_pzxthtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = zxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000110;
@@ -2965,7 +2979,7 @@ def A4_rcmpeq : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmp.eq($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011010;
@@ -2979,7 +2993,7 @@ def A4_rcmpeqi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = cmp.eq($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011010;
let hasNewValue = 1;
@@ -2996,7 +3010,7 @@ def A4_rcmpneq : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = !cmp.eq($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011011;
@@ -3010,7 +3024,7 @@ def A4_rcmpneqi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = !cmp.eq($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011011;
let hasNewValue = 1;
@@ -3027,7 +3041,7 @@ def A4_round_ri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = round($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -3039,7 +3053,7 @@ def A4_round_ri_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = round($Rs32,#$Ii):sat",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -3052,7 +3066,7 @@ def A4_round_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = round($Rs32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -3064,7 +3078,7 @@ def A4_round_rr_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = round($Rs32,$Rt32):sat",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -3077,7 +3091,7 @@ def A4_subp_c : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
"$Rdd32 = sub($Rss32,$Rtt32,$Px4):carry",
-tc_523fcf30, TypeS_3op>, Enc_2b3f60 {
+tc_9c3ecd83, TypeS_3op>, Enc_2b3f60 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010111;
@@ -3088,7 +3102,7 @@ def A4_tfrcpp : HInst<
(outs DoubleRegs:$Rdd32),
(ins CtrRegs64:$Css32),
"$Rdd32 = $Css32",
-tc_29175780, TypeCR>, Enc_667b39 {
+tc_b9272d6c, TypeCR>, Enc_667b39 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101000000;
}
@@ -3096,7 +3110,7 @@ def A4_tfrpcp : HInst<
(outs CtrRegs64:$Cdd32),
(ins DoubleRegs:$Rss32),
"$Cdd32 = $Rss32",
-tc_a21dc435, TypeCR>, Enc_0ed752 {
+tc_434c8e1e, TypeCR>, Enc_0ed752 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100011001;
}
@@ -3104,7 +3118,7 @@ def A4_tlbmatch : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Pd4 = tlbmatch($Rss32,$Rt32)",
-tc_04c9decc, TypeALU64>, Enc_03833b {
+tc_4837eefb, TypeALU64>, Enc_03833b {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3114,7 +3128,7 @@ def A4_vcmpbeq_any : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3123,7 +3137,7 @@ def A4_vcmpbeqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u8_0Imm:$Ii),
"$Pd4 = vcmpb.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3132,7 +3146,7 @@ def A4_vcmpbgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3141,7 +3155,7 @@ def A4_vcmpbgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpb.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3150,7 +3164,7 @@ def A4_vcmpbgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmpb.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3159,7 +3173,7 @@ def A4_vcmpheqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmph.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3168,7 +3182,7 @@ def A4_vcmphgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmph.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3177,7 +3191,7 @@ def A4_vcmphgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmph.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3186,7 +3200,7 @@ def A4_vcmpweqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpw.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3195,7 +3209,7 @@ def A4_vcmpwgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpw.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3204,7 +3218,7 @@ def A4_vcmpwgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmpw.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b100;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3213,7 +3227,7 @@ def A4_vrmaxh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3224,7 +3238,7 @@ def A4_vrmaxuh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxuh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3235,7 +3249,7 @@ def A4_vrmaxuw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxuw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3246,7 +3260,7 @@ def A4_vrmaxw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3257,7 +3271,7 @@ def A4_vrminh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3268,7 +3282,7 @@ def A4_vrminuh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminuh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3279,7 +3293,7 @@ def A4_vrminuw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminuw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3290,7 +3304,7 @@ def A4_vrminw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3301,7 +3315,7 @@ def A5_ACS : HInst<
(outs DoubleRegs:$Rxx32, PredRegs:$Pe4),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)",
-tc_caaebcba, TypeM>, Enc_831a7d, Requires<[HasV55]> {
+tc_d1aa9eaa, TypeM>, Enc_831a7d, Requires<[HasV55]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -3314,7 +3328,7 @@ def A5_vaddhubs : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vaddhub($Rss32,$Rtt32):sat",
-tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5]> {
+tc_002cb246, TypeS_3op>, Enc_d2216a {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -3327,7 +3341,7 @@ def A6_vcmpbeq_notany : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = !any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_55050d58, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
+tc_1fc97744, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3336,7 +3350,7 @@ def A6_vminub_RdP : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Pe4),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)",
-tc_ef84f62f, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
+tc_f9058dd7, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -3347,7 +3361,7 @@ def C2_all8 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = all8($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011101000;
}
@@ -3355,7 +3369,7 @@ def C2_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = and($Pt4,$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011000000;
@@ -3364,7 +3378,7 @@ def C2_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = and($Pt4,!$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011011000;
@@ -3373,7 +3387,7 @@ def C2_any8 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = any8($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011100000;
}
@@ -3381,7 +3395,7 @@ def C2_bitsclr : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = bitsclr($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111100;
@@ -3390,7 +3404,7 @@ def C2_bitsclri : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u6_0Imm:$Ii),
"$Pd4 = bitsclr($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_5d6c34 {
+tc_643b4717, TypeS_2op>, Enc_5d6c34 {
let Inst{7-2} = 0b000000;
let Inst{31-21} = 0b10000101100;
}
@@ -3398,7 +3412,7 @@ def C2_bitsset : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = bitsset($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111010;
@@ -3407,7 +3421,7 @@ def C2_ccombinewf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rdd32 = combine($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111101000;
@@ -3419,7 +3433,7 @@ def C2_ccombinewnewf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111101000;
@@ -3432,7 +3446,7 @@ def C2_ccombinewnewt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111101000;
@@ -3444,7 +3458,7 @@ def C2_ccombinewt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rdd32 = combine($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111101000;
@@ -3455,7 +3469,7 @@ def C2_cmoveif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if (!$Pu4) $Rd32 = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111101;
@@ -3477,7 +3491,7 @@ def C2_cmoveit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if ($Pu4) $Rd32 = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111100;
@@ -3498,7 +3512,7 @@ def C2_cmovenewif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if (!$Pu4.new) $Rd32 = #$Ii",
-tc_5f6847a1, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111101;
@@ -3521,7 +3535,7 @@ def C2_cmovenewit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if ($Pu4.new) $Rd32 = #$Ii",
-tc_5f6847a1, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111100;
@@ -3543,7 +3557,7 @@ def C2_cmpeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.eq($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010000;
@@ -3556,7 +3570,7 @@ def C2_cmpeqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmp.eq($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-22} = 0b0111010100;
let CextOpcode = "C2_cmpeq";
@@ -3572,7 +3586,7 @@ def C2_cmpeqp : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3583,7 +3597,7 @@ def C2_cmpgei : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s8_0Imm:$Ii),
"$Pd4 = cmp.ge($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op> {
+tc_56f114f4, TypeALU32_2op> {
let isCompare = 1;
let isPseudo = 1;
}
@@ -3591,7 +3605,7 @@ def C2_cmpgeui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Pd4 = cmp.geu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op> {
+tc_56f114f4, TypeALU32_2op> {
let isCompare = 1;
let isPseudo = 1;
}
@@ -3599,7 +3613,7 @@ def C2_cmpgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.gt($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010010;
@@ -3611,7 +3625,7 @@ def C2_cmpgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmp.gt($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-22} = 0b0111010101;
let CextOpcode = "C2_cmpgt";
@@ -3627,7 +3641,7 @@ def C2_cmpgtp : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3637,7 +3651,7 @@ def C2_cmpgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.gtu($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010011;
@@ -3649,7 +3663,7 @@ def C2_cmpgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmp.gtu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-21} = 0b01110101100;
let CextOpcode = "C2_cmpgtu";
@@ -3665,7 +3679,7 @@ def C2_cmpgtup : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3675,7 +3689,7 @@ def C2_cmplt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.lt($Rs32,$Rt32)",
-tc_6ebb4a12, TypeALU32_3op> {
+tc_56f114f4, TypeALU32_3op> {
let isCompare = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -3684,7 +3698,7 @@ def C2_cmpltu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.ltu($Rs32,$Rt32)",
-tc_6ebb4a12, TypeALU32_3op> {
+tc_56f114f4, TypeALU32_3op> {
let isCompare = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -3693,7 +3707,7 @@ def C2_mask : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4),
"$Rdd32 = mask($Pt4)",
-tc_cde8b071, TypeS_2op>, Enc_78e566 {
+tc_0ae0825c, TypeS_2op>, Enc_78e566 {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0000;
let Inst{31-16} = 0b1000011000000000;
@@ -3702,7 +3716,7 @@ def C2_mux : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mux($Pu4,$Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54 {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110100000;
@@ -3714,7 +3728,7 @@ def C2_muxii : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii, s8_0Imm:$II),
"$Rd32 = mux($Pu4,#$Ii,#$II)",
-tc_d6bf0472, TypeALU32_2op>, Enc_830e5d {
+tc_4c5ba658, TypeALU32_2op>, Enc_830e5d {
let Inst{31-25} = 0b0111101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -3728,7 +3742,7 @@ def C2_muxir : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = mux($Pu4,$Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011100110;
let hasNewValue = 1;
@@ -3744,7 +3758,7 @@ def C2_muxri : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = mux($Pu4,#$Ii,$Rs32)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011100111;
let hasNewValue = 1;
@@ -3760,7 +3774,7 @@ def C2_not : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = not($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011110000;
}
@@ -3768,7 +3782,7 @@ def C2_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = or($Pt4,$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011001000;
@@ -3777,7 +3791,7 @@ def C2_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = or($Pt4,!$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011111000;
@@ -3786,7 +3800,7 @@ def C2_pxfer_map : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = $Ps4",
-tc_53bc8a6a, TypeMAPPING> {
+tc_640086b5, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -3794,7 +3808,7 @@ def C2_tfrpr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Ps4),
"$Rd32 = $Ps4",
-tc_cde8b071, TypeS_2op>, Enc_f5e933 {
+tc_0ae0825c, TypeS_2op>, Enc_f5e933 {
let Inst{13-5} = 0b000000000;
let Inst{31-18} = 0b10001001010000;
let hasNewValue = 1;
@@ -3804,7 +3818,7 @@ def C2_tfrrp : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32),
"$Pd4 = $Rs32",
-tc_351fed2d, TypeS_2op>, Enc_48b75f {
+tc_cfd8378a, TypeS_2op>, Enc_48b75f {
let Inst{13-2} = 0b000000000000;
let Inst{31-21} = 0b10000101010;
}
@@ -3812,7 +3826,7 @@ def C2_vitpack : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Rd32 = vitpack($Ps4,$Pt4)",
-tc_1b9c9ee5, TypeS_2op>, Enc_527412 {
+tc_4414d8b1, TypeS_2op>, Enc_527412 {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b10001001000000;
@@ -3824,7 +3838,7 @@ def C2_vmux : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmux($Pu4,$Rss32,$Rtt32)",
-tc_f8eeed7a, TypeALU64>, Enc_329361 {
+tc_b4b5c03a, TypeALU64>, Enc_329361 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010001000;
@@ -3833,7 +3847,7 @@ def C2_xor : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = xor($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011010000;
@@ -3842,7 +3856,7 @@ def C4_addipc : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = add(pc,#$Ii)",
-tc_b9c4623f, TypeCR>, Enc_607661 {
+tc_a813cf9a, TypeCR>, Enc_607661 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0110101001001001;
@@ -3858,7 +3872,7 @@ def C4_and_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,and($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011000100;
@@ -3867,7 +3881,7 @@ def C4_and_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,and($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011100100;
@@ -3876,7 +3890,7 @@ def C4_and_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,or($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011001100;
@@ -3885,7 +3899,7 @@ def C4_and_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,or($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011101100;
@@ -3894,7 +3908,7 @@ def C4_cmplte : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.gt($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010010;
@@ -3906,7 +3920,7 @@ def C4_cmpltei : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = !cmp.gt($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-22} = 0b0111010101;
let CextOpcode = "C4_cmplte";
@@ -3922,7 +3936,7 @@ def C4_cmplteu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.gtu($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010011;
@@ -3934,7 +3948,7 @@ def C4_cmplteui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = !cmp.gtu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-21} = 0b01110101100;
let CextOpcode = "C4_cmplteu";
@@ -3950,7 +3964,7 @@ def C4_cmpneq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.eq($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010000;
@@ -3963,7 +3977,7 @@ def C4_cmpneqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = !cmp.eq($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-22} = 0b0111010100;
let CextOpcode = "C4_cmpneq";
@@ -3979,7 +3993,7 @@ def C4_fastcorner9 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = fastcorner9($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b100100;
let Inst{13-10} = 0b1000;
let Inst{31-18} = 0b01101011000000;
@@ -3988,7 +4002,7 @@ def C4_fastcorner9_not : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = !fastcorner9($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b100100;
let Inst{13-10} = 0b1000;
let Inst{31-18} = 0b01101011000100;
@@ -3997,7 +4011,7 @@ def C4_nbitsclr : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !bitsclr($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111101;
@@ -4006,7 +4020,7 @@ def C4_nbitsclri : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u6_0Imm:$Ii),
"$Pd4 = !bitsclr($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_5d6c34 {
+tc_643b4717, TypeS_2op>, Enc_5d6c34 {
let Inst{7-2} = 0b000000;
let Inst{31-21} = 0b10000101101;
}
@@ -4014,7 +4028,7 @@ def C4_nbitsset : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !bitsset($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111011;
@@ -4023,7 +4037,7 @@ def C4_or_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,and($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011010100;
@@ -4032,7 +4046,7 @@ def C4_or_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,and($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011110100;
@@ -4041,7 +4055,7 @@ def C4_or_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,or($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011011100;
@@ -4050,7 +4064,7 @@ def C4_or_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,or($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011111100;
@@ -4059,7 +4073,7 @@ def F2_conv_d2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_d2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4069,7 +4083,7 @@ def F2_conv_d2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_d2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -4081,7 +4095,7 @@ def F2_conv_df2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4091,7 +4105,7 @@ def F2_conv_df2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4101,7 +4115,7 @@ def F2_conv_df2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -4113,7 +4127,7 @@ def F2_conv_df2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4123,7 +4137,7 @@ def F2_conv_df2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4133,7 +4147,7 @@ def F2_conv_df2uw : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -4145,7 +4159,7 @@ def F2_conv_df2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000101;
let hasNewValue = 1;
@@ -4157,7 +4171,7 @@ def F2_conv_df2w : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -4169,7 +4183,7 @@ def F2_conv_df2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -4181,7 +4195,7 @@ def F2_conv_sf2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4191,7 +4205,7 @@ def F2_conv_sf2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4201,7 +4215,7 @@ def F2_conv_sf2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4211,7 +4225,7 @@ def F2_conv_sf2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4221,7 +4235,7 @@ def F2_conv_sf2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4231,7 +4245,7 @@ def F2_conv_sf2uw : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@@ -4243,7 +4257,7 @@ def F2_conv_sf2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@@ -4255,7 +4269,7 @@ def F2_conv_sf2w : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@@ -4267,7 +4281,7 @@ def F2_conv_sf2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@@ -4279,7 +4293,7 @@ def F2_conv_ud2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_ud2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4289,7 +4303,7 @@ def F2_conv_ud2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_ud2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000001;
let hasNewValue = 1;
@@ -4301,7 +4315,7 @@ def F2_conv_uw2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_uw2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4311,7 +4325,7 @@ def F2_conv_uw2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_uw2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011001;
let hasNewValue = 1;
@@ -4323,7 +4337,7 @@ def F2_conv_w2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_w2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4333,7 +4347,7 @@ def F2_conv_w2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_w2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011010;
let hasNewValue = 1;
@@ -4341,11 +4355,22 @@ let opNewValue = 0;
let isFP = 1;
let Uses = [USR];
}
+def F2_dfadd : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfadd($Rss32,$Rtt32)",
+tc_2f7c551d, TypeM>, Enc_a56825, Requires<[HasV66]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let isFP = 1;
+let Uses = [USR];
+}
def F2_dfclass : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Pd4 = dfclass($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5]> {
+tc_643b4717, TypeALU64>, Enc_1f19b5 {
let Inst{4-2} = 0b100;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b11011100100;
@@ -4356,7 +4381,7 @@ def F2_dfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4368,7 +4393,7 @@ def F2_dfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4380,7 +4405,7 @@ def F2_dfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4392,7 +4417,7 @@ def F2_dfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4404,7 +4429,7 @@ def F2_dfimm_n : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_e6c957 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100101;
let prefersSlot3 = 1;
@@ -4413,16 +4438,27 @@ def F2_dfimm_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_e6c957 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100100;
let prefersSlot3 = 1;
}
+def F2_dfsub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfsub($Rss32,$Rtt32)",
+tc_2f7c551d, TypeM>, Enc_a56825, Requires<[HasV66]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let isFP = 1;
+let Uses = [USR];
+}
def F2_sfadd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfadd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@@ -4436,7 +4472,7 @@ def F2_sfclass : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = sfclass($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5]> {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101111;
@@ -4447,7 +4483,7 @@ def F2_sfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4459,7 +4495,7 @@ def F2_sfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.ge($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4471,7 +4507,7 @@ def F2_sfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4483,7 +4519,7 @@ def F2_sfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.uo($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4495,7 +4531,7 @@ def F2_sffixupd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@@ -4507,7 +4543,7 @@ def F2_sffixupn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupn($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@@ -4519,7 +4555,7 @@ def F2_sffixupr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sffixupr($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011101;
let hasNewValue = 1;
@@ -4530,7 +4566,7 @@ def F2_sffma : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4544,7 +4580,7 @@ def F2_sffma_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4558,7 +4594,7 @@ def F2_sffma_sc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
"$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
-tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5]> {
+tc_4560740b, TypeM>, Enc_437f33 {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -4572,7 +4608,7 @@ def F2_sffms : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4586,7 +4622,7 @@ def F2_sffms_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4600,7 +4636,7 @@ def F2_sfimm_n : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_6c9440 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011001;
let hasNewValue = 1;
@@ -4611,7 +4647,7 @@ def F2_sfimm_p : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_6c9440 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011000;
let hasNewValue = 1;
@@ -4622,7 +4658,7 @@ def F2_sfinvsqrta : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32),
"$Rd32,$Pe4 = sfinvsqrta($Rs32)",
-tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5]> {
+tc_b8bffe55, TypeS_2op>, Enc_890909 {
let Inst{13-7} = 0b0000000;
let Inst{31-21} = 0b10001011111;
let hasNewValue = 1;
@@ -4634,7 +4670,7 @@ def F2_sfmax : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmax($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_88b4f13d, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@@ -4648,7 +4684,7 @@ def F2_sfmin : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmin($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_88b4f13d, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@@ -4662,7 +4698,7 @@ def F2_sfmpy : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmpy($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011010;
@@ -4676,7 +4712,7 @@ def F2_sfrecipa : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
-tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5]> {
+tc_2ff964b4, TypeM>, Enc_a94f3b {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011111;
@@ -4689,7 +4725,7 @@ def F2_sfsub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfsub($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@@ -4702,7 +4738,7 @@ def G4_tfrgcpp : HInst<
(outs DoubleRegs:$Rdd32),
(ins GuestRegs64:$Gss32),
"$Rdd32 = $Gss32",
-tc_6fa4db47, TypeCR>, Enc_0aa344 {
+tc_0d8f5752, TypeCR>, Enc_0aa344 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101000001;
}
@@ -4710,7 +4746,7 @@ def G4_tfrgcrr : HInst<
(outs IntRegs:$Rd32),
(ins GuestRegs:$Gs32),
"$Rd32 = $Gs32",
-tc_6fa4db47, TypeCR>, Enc_44271f {
+tc_0d8f5752, TypeCR>, Enc_44271f {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101010001;
let hasNewValue = 1;
@@ -4720,7 +4756,7 @@ def G4_tfrgpcp : HInst<
(outs GuestRegs64:$Gdd32),
(ins DoubleRegs:$Rss32),
"$Gdd32 = $Rss32",
-tc_994333cd, TypeCR>, Enc_ed5027 {
+tc_bcf98408, TypeCR>, Enc_ed5027 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100011000;
let hasNewValue = 1;
@@ -4730,7 +4766,7 @@ def G4_tfrgrcr : HInst<
(outs GuestRegs:$Gd32),
(ins IntRegs:$Rs32),
"$Gd32 = $Rs32",
-tc_994333cd, TypeCR>, Enc_621fba {
+tc_bcf98408, TypeCR>, Enc_621fba {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100010000;
let hasNewValue = 1;
@@ -4740,7 +4776,7 @@ def J2_call : HInst<
(outs),
(ins a30_2Imm:$Ii),
"call $Ii",
-tc_a27582fa, TypeJ>, Enc_81ac1d, PredRel {
+tc_4ae7b58b, TypeJ>, Enc_81ac1d, PredRel {
let Inst{0-0} = 0b0;
let Inst{31-25} = 0b0101101;
let isCall = 1;
@@ -4762,7 +4798,7 @@ def J2_callf : HInst<
(outs),
(ins PredRegs:$Pu4, a30_2Imm:$Ii),
"if (!$Pu4) call $Ii",
-tc_2f185f5c, TypeJ>, Enc_daea09, PredRel {
+tc_1d81e60e, TypeJ>, Enc_daea09, PredRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b1;
@@ -4789,7 +4825,7 @@ def J2_callr : HInst<
(outs),
(ins IntRegs:$Rs32),
"callr $Rs32",
-tc_15411484, TypeJ>, Enc_ecbcc8 {
+tc_3bd75825, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010000101;
let isCall = 1;
@@ -4803,7 +4839,7 @@ def J2_callrf : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) callr $Rs32",
-tc_10b97e27, TypeJ>, Enc_88d4d9 {
+tc_1ad90acd, TypeJ>, Enc_88d4d9 {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010001001;
@@ -4821,7 +4857,7 @@ def J2_callrt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) callr $Rs32",
-tc_10b97e27, TypeJ>, Enc_88d4d9 {
+tc_1ad90acd, TypeJ>, Enc_88d4d9 {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010001000;
@@ -4838,7 +4874,7 @@ def J2_callt : HInst<
(outs),
(ins PredRegs:$Pu4, a30_2Imm:$Ii),
"if ($Pu4) call $Ii",
-tc_2f185f5c, TypeJ>, Enc_daea09, PredRel {
+tc_1d81e60e, TypeJ>, Enc_daea09, PredRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b0;
@@ -4864,7 +4900,7 @@ def J2_endloop0 : HInst<
(outs),
(ins),
"endloop0",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
let Uses = [LC0, SA0];
let Defs = [LC0, P3, PC, USR];
let isBranch = 1;
@@ -4875,7 +4911,7 @@ def J2_endloop01 : HInst<
(outs),
(ins),
"endloop01",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
let Uses = [LC0, LC1, SA0, SA1];
let Defs = [LC0, LC1, P3, PC, USR];
let isPseudo = 1;
@@ -4884,7 +4920,7 @@ def J2_endloop1 : HInst<
(outs),
(ins),
"endloop1",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
let Uses = [LC1, SA1];
let Defs = [LC1, PC];
let isBranch = 1;
@@ -4895,7 +4931,7 @@ def J2_jump : HInst<
(outs),
(ins b30_2Imm:$Ii),
"jump $Ii",
-tc_3669266a, TypeJ>, Enc_81ac1d, PredNewRel {
+tc_ae53734a, TypeJ>, Enc_81ac1d, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{31-25} = 0b0101100;
let isTerminator = 1;
@@ -4917,7 +4953,7 @@ def J2_jumpf : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4) jump:nt $Ii",
-tc_e9fae2d6, TypeJ>, Enc_daea09, PredNewRel {
+tc_db2bce9c, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b1;
@@ -4943,7 +4979,7 @@ def J2_jumpf_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, b15_2Imm:$Ii),
"if (!$Pu4) jump $Ii",
-tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> {
+tc_db2bce9c, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -4951,7 +4987,7 @@ def J2_jumpfnew : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4.new) jump:nt $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b010;
let Inst{21-21} = 0b1;
@@ -4978,7 +5014,7 @@ def J2_jumpfnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4.new) jump:t $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b110;
let Inst{21-21} = 0b1;
@@ -5005,7 +5041,7 @@ def J2_jumpfpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4) jump:t $Ii",
-tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
+tc_cd374165, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b100;
let Inst{21-21} = 0b1;
@@ -5031,7 +5067,7 @@ def J2_jumpr : HInst<
(outs),
(ins IntRegs:$Rs32),
"jumpr $Rs32",
-tc_9faf76ae, TypeJ>, Enc_ecbcc8, PredNewRel {
+tc_d5b7b0c1, TypeJ>, Enc_ecbcc8, PredNewRel {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010010100;
let isTerminator = 1;
@@ -5048,7 +5084,7 @@ def J2_jumprf : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr:nt $Rs32",
-tc_e0739b8c, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_85c9c08f, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010011011;
@@ -5067,7 +5103,7 @@ def J2_jumprf_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr $Rs32",
-tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> {
+tc_85c9c08f, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5075,7 +5111,7 @@ def J2_jumprfnew : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) jumpr:nt $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b01010011011;
@@ -5095,7 +5131,7 @@ def J2_jumprfnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) jumpr:t $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b01010011011;
@@ -5115,7 +5151,7 @@ def J2_jumprfpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr:t $Rs32",
-tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
+tc_e78647bd, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b01010011011;
@@ -5134,7 +5170,7 @@ def J2_jumprgtez : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32>=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000101;
@@ -5152,7 +5188,7 @@ def J2_jumprgtezpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32>=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000101;
@@ -5170,7 +5206,7 @@ def J2_jumprltez : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32<=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000111;
@@ -5188,7 +5224,7 @@ def J2_jumprltezpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32<=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000111;
@@ -5206,7 +5242,7 @@ def J2_jumprnz : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32==#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000110;
@@ -5224,7 +5260,7 @@ def J2_jumprnzpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32==#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000110;
@@ -5242,7 +5278,7 @@ def J2_jumprt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr:nt $Rs32",
-tc_e0739b8c, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_85c9c08f, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010011010;
@@ -5260,7 +5296,7 @@ def J2_jumprt_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr $Rs32",
-tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> {
+tc_85c9c08f, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5268,7 +5304,7 @@ def J2_jumprtnew : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) jumpr:nt $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b01010011010;
@@ -5287,7 +5323,7 @@ def J2_jumprtnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) jumpr:t $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b01010011010;
@@ -5306,7 +5342,7 @@ def J2_jumprtpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr:t $Rs32",
-tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
+tc_e78647bd, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b01010011010;
@@ -5324,7 +5360,7 @@ def J2_jumprz : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32!=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000100;
@@ -5342,7 +5378,7 @@ def J2_jumprzpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32!=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000100;
@@ -5360,7 +5396,7 @@ def J2_jumpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4) jump:nt $Ii",
-tc_e9fae2d6, TypeJ>, Enc_daea09, PredNewRel {
+tc_db2bce9c, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b0;
@@ -5385,7 +5421,7 @@ def J2_jumpt_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, b15_2Imm:$Ii),
"if ($Pu4) jump $Ii",
-tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> {
+tc_db2bce9c, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5393,7 +5429,7 @@ def J2_jumptnew : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4.new) jump:nt $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b010;
let Inst{21-21} = 0b0;
@@ -5419,7 +5455,7 @@ def J2_jumptnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4.new) jump:t $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b110;
let Inst{21-21} = 0b0;
@@ -5445,7 +5481,7 @@ def J2_jumptpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4) jump:t $Ii",
-tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
+tc_cd374165, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b100;
let Inst{21-21} = 0b0;
@@ -5470,7 +5506,7 @@ def J2_loop0i : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"loop0($Ii,#$II)",
-tc_cf59f215, TypeCR>, Enc_4dc228 {
+tc_a9d88b22, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001000;
@@ -5487,7 +5523,7 @@ def J2_loop0r : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"loop0($Ii,$Rs32)",
-tc_7934b9df, TypeCR>, Enc_864a5a {
+tc_df3319ed, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5505,7 +5541,7 @@ def J2_loop1i : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"loop1($Ii,#$II)",
-tc_cf59f215, TypeCR>, Enc_4dc228 {
+tc_a9d88b22, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001001;
@@ -5522,7 +5558,7 @@ def J2_loop1r : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"loop1($Ii,$Rs32)",
-tc_7934b9df, TypeCR>, Enc_864a5a {
+tc_df3319ed, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5540,7 +5576,7 @@ def J2_pause : HInst<
(outs),
(ins u8_0Imm:$Ii),
"pause(#$Ii)",
-tc_681a2300, TypeJ>, Enc_a51a9a {
+tc_8d9d0154, TypeJ>, Enc_a51a9a {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5551,7 +5587,7 @@ def J2_ploop1si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp1loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001101;
@@ -5569,7 +5605,7 @@ def J2_ploop1sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp1loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5588,7 +5624,7 @@ def J2_ploop2si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp2loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001110;
@@ -5606,7 +5642,7 @@ def J2_ploop2sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp2loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5625,7 +5661,7 @@ def J2_ploop3si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp3loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001111;
@@ -5643,7 +5679,7 @@ def J2_ploop3sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp3loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5662,18 +5698,19 @@ def J2_trap0 : HInst<
(outs),
(ins u8_0Imm:$Ii),
"trap0(#$Ii)",
-tc_14cd4cfa, TypeJ>, Enc_a51a9a {
+tc_fc3999b4, TypeJ>, Enc_a51a9a {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0101010000000000;
let isSolo = 1;
+let hasSideEffects = 1;
}
def J2_trap1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u8_0Imm:$Ii),
"trap1($Rx32,#$Ii)",
-tc_59a01ead, TypeJ>, Enc_33f8ba {
+tc_b9e09e03, TypeJ>, Enc_33f8ba {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5683,13 +5720,15 @@ let opNewValue = 0;
let isSolo = 1;
let Uses = [GOSP];
let Defs = [GOSP, PC];
+let hasSideEffects = 1;
let Constraints = "$Rx32 = $Rx32in";
}
def J2_trap1_noregmap : HInst<
(outs),
(ins u8_0Imm:$Ii),
"trap1(#$Ii)",
-tc_59a01ead, TypeMAPPING> {
+tc_b9e09e03, TypeMAPPING> {
+let hasSideEffects = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5697,7 +5736,7 @@ def J4_cmpeq_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -5723,7 +5762,7 @@ def J4_cmpeq_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -5749,7 +5788,7 @@ def J4_cmpeq_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010001;
@@ -5775,7 +5814,7 @@ def J4_cmpeq_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010001;
@@ -5801,7 +5840,7 @@ def J4_cmpeq_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010001;
@@ -5827,7 +5866,7 @@ def J4_cmpeq_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010001;
@@ -5853,7 +5892,7 @@ def J4_cmpeq_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -5878,7 +5917,7 @@ def J4_cmpeq_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -5903,7 +5942,7 @@ def J4_cmpeq_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010000;
@@ -5928,7 +5967,7 @@ def J4_cmpeq_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010000;
@@ -5953,7 +5992,7 @@ def J4_cmpeq_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010000;
@@ -5978,7 +6017,7 @@ def J4_cmpeq_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010000;
@@ -6003,7 +6042,7 @@ def J4_cmpeqi_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6029,7 +6068,7 @@ def J4_cmpeqi_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6055,7 +6094,7 @@ def J4_cmpeqi_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000001;
@@ -6081,7 +6120,7 @@ def J4_cmpeqi_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000001;
@@ -6107,7 +6146,7 @@ def J4_cmpeqi_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001001;
@@ -6133,7 +6172,7 @@ def J4_cmpeqi_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001001;
@@ -6159,7 +6198,7 @@ def J4_cmpeqi_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6184,7 +6223,7 @@ def J4_cmpeqi_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6209,7 +6248,7 @@ def J4_cmpeqi_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000000;
@@ -6234,7 +6273,7 @@ def J4_cmpeqi_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000000;
@@ -6259,7 +6298,7 @@ def J4_cmpeqi_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001000;
@@ -6284,7 +6323,7 @@ def J4_cmpeqi_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001000;
@@ -6309,7 +6348,7 @@ def J4_cmpeqn1_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_e90a15, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_e90a15, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -6335,7 +6374,7 @@ def J4_cmpeqn1_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_5a18b3, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_5a18b3, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -6361,7 +6400,7 @@ def J4_cmpeqn1_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_1de724, PredRel {
+tc_3d495a39, TypeCJ>, Enc_1de724, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001000111;
@@ -6387,7 +6426,7 @@ def J4_cmpeqn1_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14640c, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14640c, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001000111;
@@ -6413,7 +6452,7 @@ def J4_cmpeqn1_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_668704, PredRel {
+tc_3d495a39, TypeCJ>, Enc_668704, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001001111;
@@ -6439,7 +6478,7 @@ def J4_cmpeqn1_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_800e04, PredRel {
+tc_3d495a39, TypeCJ>, Enc_800e04, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001001111;
@@ -6465,7 +6504,7 @@ def J4_cmpeqn1_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_4aca3a, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_4aca3a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -6490,7 +6529,7 @@ def J4_cmpeqn1_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_f7ea77, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_f7ea77, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -6515,7 +6554,7 @@ def J4_cmpeqn1_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_405228, PredRel {
+tc_3d495a39, TypeCJ>, Enc_405228, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001000110;
@@ -6540,7 +6579,7 @@ def J4_cmpeqn1_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_3a2484, PredRel {
+tc_3d495a39, TypeCJ>, Enc_3a2484, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001000110;
@@ -6565,7 +6604,7 @@ def J4_cmpeqn1_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_736575, PredRel {
+tc_3d495a39, TypeCJ>, Enc_736575, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001001110;
@@ -6590,7 +6629,7 @@ def J4_cmpeqn1_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_8e583a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_8e583a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001001110;
@@ -6615,7 +6654,7 @@ def J4_cmpgt_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6641,7 +6680,7 @@ def J4_cmpgt_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6667,7 +6706,7 @@ def J4_cmpgt_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010011;
@@ -6693,7 +6732,7 @@ def J4_cmpgt_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010011;
@@ -6719,7 +6758,7 @@ def J4_cmpgt_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010011;
@@ -6745,7 +6784,7 @@ def J4_cmpgt_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010011;
@@ -6771,7 +6810,7 @@ def J4_cmpgt_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6796,7 +6835,7 @@ def J4_cmpgt_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6821,7 +6860,7 @@ def J4_cmpgt_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010010;
@@ -6846,7 +6885,7 @@ def J4_cmpgt_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010010;
@@ -6871,7 +6910,7 @@ def J4_cmpgt_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010010;
@@ -6896,7 +6935,7 @@ def J4_cmpgt_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010010;
@@ -6921,7 +6960,7 @@ def J4_cmpgti_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6947,7 +6986,7 @@ def J4_cmpgti_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6973,7 +7012,7 @@ def J4_cmpgti_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000011;
@@ -6999,7 +7038,7 @@ def J4_cmpgti_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000011;
@@ -7025,7 +7064,7 @@ def J4_cmpgti_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001011;
@@ -7051,7 +7090,7 @@ def J4_cmpgti_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001011;
@@ -7077,7 +7116,7 @@ def J4_cmpgti_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7102,7 +7141,7 @@ def J4_cmpgti_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7127,7 +7166,7 @@ def J4_cmpgti_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000010;
@@ -7152,7 +7191,7 @@ def J4_cmpgti_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000010;
@@ -7177,7 +7216,7 @@ def J4_cmpgti_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001010;
@@ -7202,7 +7241,7 @@ def J4_cmpgti_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001010;
@@ -7227,7 +7266,7 @@ def J4_cmpgtn1_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_3694bd, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_3694bd, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -7253,7 +7292,7 @@ def J4_cmpgtn1_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_a6853f, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_a6853f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -7279,7 +7318,7 @@ def J4_cmpgtn1_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_a42857, PredRel {
+tc_3d495a39, TypeCJ>, Enc_a42857, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001000111;
@@ -7305,7 +7344,7 @@ def J4_cmpgtn1_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_f6fe0b, PredRel {
+tc_3d495a39, TypeCJ>, Enc_f6fe0b, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001000111;
@@ -7331,7 +7370,7 @@ def J4_cmpgtn1_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_3e3989, PredRel {
+tc_3d495a39, TypeCJ>, Enc_3e3989, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001001111;
@@ -7357,7 +7396,7 @@ def J4_cmpgtn1_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_b909d2, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b909d2, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001001111;
@@ -7383,7 +7422,7 @@ def J4_cmpgtn1_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_f82302, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_f82302, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -7408,7 +7447,7 @@ def J4_cmpgtn1_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_6413b6, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_6413b6, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -7433,7 +7472,7 @@ def J4_cmpgtn1_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_b78edd, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b78edd, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001000110;
@@ -7458,7 +7497,7 @@ def J4_cmpgtn1_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_041d7b, PredRel {
+tc_3d495a39, TypeCJ>, Enc_041d7b, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001000110;
@@ -7483,7 +7522,7 @@ def J4_cmpgtn1_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_b1e1fb, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b1e1fb, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001001110;
@@ -7508,7 +7547,7 @@ def J4_cmpgtn1_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_178717, PredRel {
+tc_3d495a39, TypeCJ>, Enc_178717, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001001110;
@@ -7533,7 +7572,7 @@ def J4_cmpgtu_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7559,7 +7598,7 @@ def J4_cmpgtu_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7585,7 +7624,7 @@ def J4_cmpgtu_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010101;
@@ -7611,7 +7650,7 @@ def J4_cmpgtu_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010101;
@@ -7637,7 +7676,7 @@ def J4_cmpgtu_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010101;
@@ -7663,7 +7702,7 @@ def J4_cmpgtu_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010101;
@@ -7689,7 +7728,7 @@ def J4_cmpgtu_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7714,7 +7753,7 @@ def J4_cmpgtu_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7739,7 +7778,7 @@ def J4_cmpgtu_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010100;
@@ -7764,7 +7803,7 @@ def J4_cmpgtu_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010100;
@@ -7789,7 +7828,7 @@ def J4_cmpgtu_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010100;
@@ -7814,7 +7853,7 @@ def J4_cmpgtu_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010100;
@@ -7839,7 +7878,7 @@ def J4_cmpgtui_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7865,7 +7904,7 @@ def J4_cmpgtui_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7891,7 +7930,7 @@ def J4_cmpgtui_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000101;
@@ -7917,7 +7956,7 @@ def J4_cmpgtui_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000101;
@@ -7943,7 +7982,7 @@ def J4_cmpgtui_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001101;
@@ -7969,7 +8008,7 @@ def J4_cmpgtui_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001101;
@@ -7995,7 +8034,7 @@ def J4_cmpgtui_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8020,7 +8059,7 @@ def J4_cmpgtui_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8045,7 +8084,7 @@ def J4_cmpgtui_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000100;
@@ -8070,7 +8109,7 @@ def J4_cmpgtui_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000100;
@@ -8095,7 +8134,7 @@ def J4_cmpgtui_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001100;
@@ -8120,7 +8159,7 @@ def J4_cmpgtui_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001100;
@@ -8145,7 +8184,7 @@ def J4_cmplt_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8171,7 +8210,7 @@ def J4_cmplt_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8197,7 +8236,7 @@ def J4_cmplt_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8222,7 +8261,7 @@ def J4_cmplt_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8247,7 +8286,7 @@ def J4_cmpltu_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8273,7 +8312,7 @@ def J4_cmpltu_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8299,7 +8338,7 @@ def J4_cmpltu_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8324,7 +8363,7 @@ def J4_cmpltu_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8349,7 +8388,7 @@ def J4_hintjumpr : HInst<
(outs),
(ins IntRegs:$Rs32),
"hintjr($Rs32)",
-tc_9faf76ae, TypeJ>, Enc_ecbcc8 {
+tc_d5b7b0c1, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010010101;
let isTerminator = 1;
@@ -8361,7 +8400,7 @@ def J4_jumpseti : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u6_0Imm:$II, b30_2Imm:$Ii),
"$Rd16 = #$II ; jump $Ii",
-tc_49eb22c8, TypeCJ>, Enc_9e4c3f {
+tc_0663f615, TypeCJ>, Enc_9e4c3f {
let Inst{0-0} = 0b0;
let Inst{31-22} = 0b0001011000;
let hasNewValue = 1;
@@ -8381,7 +8420,7 @@ def J4_jumpsetr : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"$Rd16 = $Rs16 ; jump $Ii",
-tc_49eb22c8, TypeCJ>, Enc_66bce1 {
+tc_0663f615, TypeCJ>, Enc_66bce1 {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001011100;
@@ -8402,7 +8441,7 @@ def J4_tstbit0_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!tstbit($Ns8.new,#0)) jump:nt $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -8427,7 +8466,7 @@ def J4_tstbit0_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!tstbit($Ns8.new,#0)) jump:t $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -8452,7 +8491,7 @@ def J4_tstbit0_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (!p0.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001000111;
@@ -8477,7 +8516,7 @@ def J4_tstbit0_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (!p0.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001000111;
@@ -8502,7 +8541,7 @@ def J4_tstbit0_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (!p1.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001001111;
@@ -8527,7 +8566,7 @@ def J4_tstbit0_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (!p1.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001001111;
@@ -8552,7 +8591,7 @@ def J4_tstbit0_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (tstbit($Ns8.new,#0)) jump:nt $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -8576,7 +8615,7 @@ def J4_tstbit0_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (tstbit($Ns8.new,#0)) jump:t $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -8600,7 +8639,7 @@ def J4_tstbit0_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (p0.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001000110;
@@ -8624,7 +8663,7 @@ def J4_tstbit0_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (p0.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001000110;
@@ -8648,7 +8687,7 @@ def J4_tstbit0_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (p1.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001001110;
@@ -8672,7 +8711,7 @@ def J4_tstbit0_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (p1.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001001110;
@@ -8696,7 +8735,7 @@ def L2_deallocframe : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = deallocframe($Rs32):raw",
-tc_d1090e34, TypeLD>, Enc_3a3d62 {
+tc_15aa71c5, TypeLD>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010000000;
let accessSize = DoubleWordAccess;
@@ -8708,7 +8747,7 @@ def L2_loadalignb_io : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Ryy32 = memb_fifo($Rs32+#$Ii)",
-tc_ef52ed71, TypeLD>, Enc_a27588 {
+tc_5ef37dc4, TypeLD>, Enc_a27588 {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -8725,9 +8764,10 @@ def L2_loadalignb_pbr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++$Mu2:brev)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110100;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let mayLoad = 1;
let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
@@ -8736,7 +8776,7 @@ def L2_loadalignb_pci : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++#$Ii:circ($Mu2))",
-tc_03220ffa, TypeLD>, Enc_74aef2 {
+tc_785f65a7, TypeLD>, Enc_74aef2 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000100;
let addrMode = PostInc;
@@ -8749,7 +8789,7 @@ def L2_loadalignb_pcr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++I:circ($Mu2))",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000100;
let addrMode = PostInc;
@@ -8762,7 +8802,7 @@ def L2_loadalignb_pi : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Ryy32 = memb_fifo($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_6b197f {
+tc_3c76b0ff, TypeLD>, Enc_6b197f {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010100;
let addrMode = PostInc;
@@ -8774,7 +8814,7 @@ def L2_loadalignb_pr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++$Mu2)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100100;
let addrMode = PostInc;
@@ -8786,7 +8826,7 @@ def L2_loadalignb_zomap : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
"$Ryy32 = memb_fifo($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let Constraints = "$Ryy32 = $Ryy32in";
@@ -8795,7 +8835,7 @@ def L2_loadalignh_io : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s31_1Imm:$Ii),
"$Ryy32 = memh_fifo($Rs32+#$Ii)",
-tc_ef52ed71, TypeLD>, Enc_5cd7e9 {
+tc_5ef37dc4, TypeLD>, Enc_5cd7e9 {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -8812,9 +8852,10 @@ def L2_loadalignh_pbr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++$Mu2:brev)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110010;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
@@ -8823,7 +8864,7 @@ def L2_loadalignh_pci : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++#$Ii:circ($Mu2))",
-tc_03220ffa, TypeLD>, Enc_9e2e1c {
+tc_785f65a7, TypeLD>, Enc_9e2e1c {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000010;
let addrMode = PostInc;
@@ -8836,7 +8877,7 @@ def L2_loadalignh_pcr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++I:circ($Mu2))",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000010;
let addrMode = PostInc;
@@ -8849,7 +8890,7 @@ def L2_loadalignh_pi : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Ryy32 = memh_fifo($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_bd1cbc {
+tc_3c76b0ff, TypeLD>, Enc_bd1cbc {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010010;
let addrMode = PostInc;
@@ -8861,7 +8902,7 @@ def L2_loadalignh_pr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++$Mu2)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100010;
let addrMode = PostInc;
@@ -8873,7 +8914,7 @@ def L2_loadalignh_zomap : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
"$Ryy32 = memh_fifo($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let Constraints = "$Ryy32 = $Ryy32in";
@@ -8882,7 +8923,7 @@ def L2_loadbsw2_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = membh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214 {
+tc_17e0d2cd, TypeLD>, Enc_de0214 {
let Inst{24-21} = 0b0001;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -8900,11 +8941,12 @@ def L2_loadbsw2_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110001;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -8913,7 +8955,7 @@ def L2_loadbsw2_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000001;
let hasNewValue = 1;
@@ -8928,7 +8970,7 @@ def L2_loadbsw2_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000001;
let hasNewValue = 1;
@@ -8943,7 +8985,7 @@ def L2_loadbsw2_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = membh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467 {
+tc_44d3da28, TypeLD>, Enc_152467 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010001;
let hasNewValue = 1;
@@ -8957,7 +8999,7 @@ def L2_loadbsw2_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100001;
let hasNewValue = 1;
@@ -8971,7 +9013,7 @@ def L2_loadbsw2_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = membh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -8981,7 +9023,7 @@ def L2_loadbsw4_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rdd32 = membh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2d7491 {
+tc_17e0d2cd, TypeLD>, Enc_2d7491 {
let Inst{24-21} = 0b0111;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -8997,9 +9039,10 @@ def L2_loadbsw4_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110111;
+let addrMode = PostInc;
let accessSize = WordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9008,7 +9051,7 @@ def L2_loadbsw4_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_70b24b {
+tc_e93a3d71, TypeLD>, Enc_70b24b {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000111;
let addrMode = PostInc;
@@ -9021,7 +9064,7 @@ def L2_loadbsw4_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000111;
let addrMode = PostInc;
@@ -9034,7 +9077,7 @@ def L2_loadbsw4_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rdd32 = membh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_71f1b4 {
+tc_44d3da28, TypeLD>, Enc_71f1b4 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010111;
let addrMode = PostInc;
@@ -9046,7 +9089,7 @@ def L2_loadbsw4_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100111;
let addrMode = PostInc;
@@ -9058,7 +9101,7 @@ def L2_loadbsw4_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = membh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9066,7 +9109,7 @@ def L2_loadbzw2_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memubh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214 {
+tc_17e0d2cd, TypeLD>, Enc_de0214 {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9084,11 +9127,12 @@ def L2_loadbzw2_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110011;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9097,7 +9141,7 @@ def L2_loadbzw2_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000011;
let hasNewValue = 1;
@@ -9112,7 +9156,7 @@ def L2_loadbzw2_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000011;
let hasNewValue = 1;
@@ -9127,7 +9171,7 @@ def L2_loadbzw2_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memubh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467 {
+tc_44d3da28, TypeLD>, Enc_152467 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010011;
let hasNewValue = 1;
@@ -9141,7 +9185,7 @@ def L2_loadbzw2_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100011;
let hasNewValue = 1;
@@ -9155,7 +9199,7 @@ def L2_loadbzw2_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memubh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9165,7 +9209,7 @@ def L2_loadbzw4_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rdd32 = memubh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2d7491 {
+tc_17e0d2cd, TypeLD>, Enc_2d7491 {
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -9181,9 +9225,10 @@ def L2_loadbzw4_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110101;
+let addrMode = PostInc;
let accessSize = WordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9192,7 +9237,7 @@ def L2_loadbzw4_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_70b24b {
+tc_e93a3d71, TypeLD>, Enc_70b24b {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000101;
let addrMode = PostInc;
@@ -9205,7 +9250,7 @@ def L2_loadbzw4_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000101;
let addrMode = PostInc;
@@ -9218,7 +9263,7 @@ def L2_loadbzw4_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rdd32 = memubh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_71f1b4 {
+tc_44d3da28, TypeLD>, Enc_71f1b4 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010101;
let addrMode = PostInc;
@@ -9230,7 +9275,7 @@ def L2_loadbzw4_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100101;
let addrMode = PostInc;
@@ -9242,7 +9287,7 @@ def L2_loadbzw4_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memubh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9250,7 +9295,7 @@ def L2_loadrb_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = memb($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9271,11 +9316,12 @@ def L2_loadrb_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9284,7 +9330,7 @@ def L2_loadrb_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e0a47a {
+tc_e93a3d71, TypeLD>, Enc_e0a47a {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001000;
let hasNewValue = 1;
@@ -9299,7 +9345,7 @@ def L2_loadrb_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001000;
let hasNewValue = 1;
@@ -9314,7 +9360,7 @@ def L2_loadrb_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Rd32 = memb($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011000;
let hasNewValue = 1;
@@ -9331,7 +9377,7 @@ def L2_loadrb_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101000;
let hasNewValue = 1;
@@ -9345,7 +9391,7 @@ def L2_loadrb_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memb($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9355,7 +9401,7 @@ def L2_loadrbgp : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memb(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9374,7 +9420,7 @@ def L2_loadrd_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s29_3Imm:$Ii),
"$Rdd32 = memd($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -9393,9 +9439,10 @@ def L2_loadrd_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111110;
+let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9404,7 +9451,7 @@ def L2_loadrd_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_b05839 {
+tc_e93a3d71, TypeLD>, Enc_b05839 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001110;
let addrMode = PostInc;
@@ -9417,7 +9464,7 @@ def L2_loadrd_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001110;
let addrMode = PostInc;
@@ -9430,7 +9477,7 @@ def L2_loadrd_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii),
"$Rdd32 = memd($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011110;
let addrMode = PostInc;
@@ -9445,7 +9492,7 @@ def L2_loadrd_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101110;
let addrMode = PostInc;
@@ -9457,7 +9504,7 @@ def L2_loadrd_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memd($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9465,7 +9512,7 @@ def L2_loadrdgp : HInst<
(outs DoubleRegs:$Rdd32),
(ins u29_3Imm:$Ii),
"$Rdd32 = memd(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_509701, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_509701, AddrModeRel {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b01001;
let accessSize = DoubleWordAccess;
@@ -9482,7 +9529,7 @@ def L2_loadrh_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9503,11 +9550,12 @@ def L2_loadrh_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9516,7 +9564,7 @@ def L2_loadrh_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001010;
let hasNewValue = 1;
@@ -9531,7 +9579,7 @@ def L2_loadrh_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001010;
let hasNewValue = 1;
@@ -9546,7 +9594,7 @@ def L2_loadrh_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011010;
let hasNewValue = 1;
@@ -9563,7 +9611,7 @@ def L2_loadrh_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101010;
let hasNewValue = 1;
@@ -9577,7 +9625,7 @@ def L2_loadrh_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9587,7 +9635,7 @@ def L2_loadrhgp : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memh(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9606,7 +9654,7 @@ def L2_loadri_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rd32 = memw($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9627,11 +9675,12 @@ def L2_loadri_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = WordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9640,7 +9689,7 @@ def L2_loadri_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_27fd0e {
+tc_e93a3d71, TypeLD>, Enc_27fd0e {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001100;
let hasNewValue = 1;
@@ -9655,7 +9704,7 @@ def L2_loadri_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001100;
let hasNewValue = 1;
@@ -9670,7 +9719,7 @@ def L2_loadri_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rd32 = memw($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011100;
let hasNewValue = 1;
@@ -9687,7 +9736,7 @@ def L2_loadri_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101100;
let hasNewValue = 1;
@@ -9701,7 +9750,7 @@ def L2_loadri_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memw($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9711,7 +9760,7 @@ def L2_loadrigp : HInst<
(outs IntRegs:$Rd32),
(ins u30_2Imm:$Ii),
"$Rd32 = memw(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9730,7 +9779,7 @@ def L2_loadrub_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = memub($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9751,11 +9800,12 @@ def L2_loadrub_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9764,7 +9814,7 @@ def L2_loadrub_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e0a47a {
+tc_e93a3d71, TypeLD>, Enc_e0a47a {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001001;
let hasNewValue = 1;
@@ -9779,7 +9829,7 @@ def L2_loadrub_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001001;
let hasNewValue = 1;
@@ -9794,7 +9844,7 @@ def L2_loadrub_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Rd32 = memub($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011001;
let hasNewValue = 1;
@@ -9811,7 +9861,7 @@ def L2_loadrub_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101001;
let hasNewValue = 1;
@@ -9825,7 +9875,7 @@ def L2_loadrub_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memub($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9835,7 +9885,7 @@ def L2_loadrubgp : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memub(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9854,7 +9904,7 @@ def L2_loadruh_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memuh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9875,11 +9925,12 @@ def L2_loadruh_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9888,7 +9939,7 @@ def L2_loadruh_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001011;
let hasNewValue = 1;
@@ -9903,7 +9954,7 @@ def L2_loadruh_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001011;
let hasNewValue = 1;
@@ -9918,7 +9969,7 @@ def L2_loadruh_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memuh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011011;
let hasNewValue = 1;
@@ -9935,7 +9986,7 @@ def L2_loadruh_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101011;
let hasNewValue = 1;
@@ -9949,7 +10000,7 @@ def L2_loadruh_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memuh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9959,7 +10010,7 @@ def L2_loadruhgp : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memuh(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9978,7 +10029,7 @@ def L2_loadw_locked : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memw_locked($Rs32)",
-tc_6aa5711a, TypeLD>, Enc_5e2823 {
+tc_b43e7930, TypeLD>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010010000;
let hasNewValue = 1;
@@ -9991,7 +10042,7 @@ def L2_ploadrbf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101000;
let isPredicated = 1;
@@ -10013,7 +10064,7 @@ def L2_ploadrbf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10030,7 +10081,7 @@ def L2_ploadrbf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memb($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10040,7 +10091,7 @@ def L2_ploadrbfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111000;
let isPredicated = 1;
@@ -10063,7 +10114,7 @@ def L2_ploadrbfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10081,7 +10132,7 @@ def L2_ploadrbfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memb($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10091,7 +10142,7 @@ def L2_ploadrbt_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001000;
let isPredicated = 1;
@@ -10112,7 +10163,7 @@ def L2_ploadrbt_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10128,7 +10179,7 @@ def L2_ploadrbt_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memb($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10138,7 +10189,7 @@ def L2_ploadrbtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011000;
let isPredicated = 1;
@@ -10160,7 +10211,7 @@ def L2_ploadrbtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10177,7 +10228,7 @@ def L2_ploadrbtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memb($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10187,7 +10238,7 @@ def L2_ploadrdf_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101110;
let isPredicated = 1;
@@ -10207,7 +10258,7 @@ def L2_ploadrdf_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_9d1247, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10222,7 +10273,7 @@ def L2_ploadrdf_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rdd32 = memd($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10230,7 +10281,7 @@ def L2_ploadrdfnew_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111110;
let isPredicated = 1;
@@ -10251,7 +10302,7 @@ def L2_ploadrdfnew_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_9d1247, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10267,7 +10318,7 @@ def L2_ploadrdfnew_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rdd32 = memd($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10275,7 +10326,7 @@ def L2_ploadrdt_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if ($Pt4) $Rdd32 = memd($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001110;
let isPredicated = 1;
@@ -10294,7 +10345,7 @@ def L2_ploadrdt_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if ($Pt4) $Rdd32 = memd($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_9d1247, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10308,7 +10359,7 @@ def L2_ploadrdt_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rdd32 = memd($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10316,7 +10367,7 @@ def L2_ploadrdtnew_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011110;
let isPredicated = 1;
@@ -10336,7 +10387,7 @@ def L2_ploadrdtnew_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_9d1247, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10351,7 +10402,7 @@ def L2_ploadrdtnew_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rdd32 = memd($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10359,7 +10410,7 @@ def L2_ploadrhf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101010;
let isPredicated = 1;
@@ -10381,7 +10432,7 @@ def L2_ploadrhf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10398,7 +10449,7 @@ def L2_ploadrhf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10408,7 +10459,7 @@ def L2_ploadrhfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111010;
let isPredicated = 1;
@@ -10431,7 +10482,7 @@ def L2_ploadrhfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10449,7 +10500,7 @@ def L2_ploadrhfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10459,7 +10510,7 @@ def L2_ploadrht_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4) $Rd32 = memh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001010;
let isPredicated = 1;
@@ -10480,7 +10531,7 @@ def L2_ploadrht_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4) $Rd32 = memh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10496,7 +10547,7 @@ def L2_ploadrht_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10506,7 +10557,7 @@ def L2_ploadrhtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011010;
let isPredicated = 1;
@@ -10528,7 +10579,7 @@ def L2_ploadrhtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10545,7 +10596,7 @@ def L2_ploadrhtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10555,7 +10606,7 @@ def L2_ploadrif_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if (!$Pt4) $Rd32 = memw($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101100;
let isPredicated = 1;
@@ -10577,7 +10628,7 @@ def L2_ploadrif_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if (!$Pt4) $Rd32 = memw($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_b97f71, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10594,7 +10645,7 @@ def L2_ploadrif_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memw($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10604,7 +10655,7 @@ def L2_ploadrifnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111100;
let isPredicated = 1;
@@ -10627,7 +10678,7 @@ def L2_ploadrifnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_b97f71, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10645,7 +10696,7 @@ def L2_ploadrifnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memw($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10655,7 +10706,7 @@ def L2_ploadrit_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if ($Pt4) $Rd32 = memw($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001100;
let isPredicated = 1;
@@ -10676,7 +10727,7 @@ def L2_ploadrit_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if ($Pt4) $Rd32 = memw($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_b97f71, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10692,7 +10743,7 @@ def L2_ploadrit_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memw($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10702,7 +10753,7 @@ def L2_ploadritnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011100;
let isPredicated = 1;
@@ -10724,7 +10775,7 @@ def L2_ploadritnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_b97f71, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10741,7 +10792,7 @@ def L2_ploadritnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memw($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10751,7 +10802,7 @@ def L2_ploadrubf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101001;
let isPredicated = 1;
@@ -10773,7 +10824,7 @@ def L2_ploadrubf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10790,7 +10841,7 @@ def L2_ploadrubf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memub($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10800,7 +10851,7 @@ def L2_ploadrubfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111001;
let isPredicated = 1;
@@ -10823,7 +10874,7 @@ def L2_ploadrubfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10841,7 +10892,7 @@ def L2_ploadrubfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memub($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10851,7 +10902,7 @@ def L2_ploadrubt_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001001;
let isPredicated = 1;
@@ -10872,7 +10923,7 @@ def L2_ploadrubt_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10888,7 +10939,7 @@ def L2_ploadrubt_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memub($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10898,7 +10949,7 @@ def L2_ploadrubtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011001;
let isPredicated = 1;
@@ -10920,7 +10971,7 @@ def L2_ploadrubtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10937,7 +10988,7 @@ def L2_ploadrubtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memub($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10947,7 +10998,7 @@ def L2_ploadruhf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101011;
let isPredicated = 1;
@@ -10969,7 +11020,7 @@ def L2_ploadruhf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -10986,7 +11037,7 @@ def L2_ploadruhf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memuh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10996,7 +11047,7 @@ def L2_ploadruhfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111011;
let isPredicated = 1;
@@ -11019,7 +11070,7 @@ def L2_ploadruhfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11037,7 +11088,7 @@ def L2_ploadruhfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memuh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11047,7 +11098,7 @@ def L2_ploadruht_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4) $Rd32 = memuh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001011;
let isPredicated = 1;
@@ -11068,7 +11119,7 @@ def L2_ploadruht_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4) $Rd32 = memuh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11084,7 +11135,7 @@ def L2_ploadruht_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memuh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11094,7 +11145,7 @@ def L2_ploadruhtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011011;
let isPredicated = 1;
@@ -11116,7 +11167,7 @@ def L2_ploadruhtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11133,7 +11184,7 @@ def L2_ploadruhtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memuh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11143,7 +11194,7 @@ def L4_add_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -11162,7 +11213,7 @@ def L4_add_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11170,7 +11221,7 @@ def L4_add_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -11189,7 +11240,7 @@ def L4_add_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11197,7 +11248,7 @@ def L4_add_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -11216,7 +11267,7 @@ def L4_add_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11224,7 +11275,7 @@ def L4_and_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -11243,7 +11294,7 @@ def L4_and_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11251,7 +11302,7 @@ def L4_and_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -11270,7 +11321,7 @@ def L4_and_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11278,7 +11329,7 @@ def L4_and_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -11297,7 +11348,7 @@ def L4_and_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11305,7 +11356,7 @@ def L4_iadd_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11324,7 +11375,7 @@ def L4_iadd_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11332,7 +11383,7 @@ def L4_iadd_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11351,7 +11402,7 @@ def L4_iadd_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11359,7 +11410,7 @@ def L4_iadd_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11378,7 +11429,7 @@ def L4_iadd_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11386,7 +11437,7 @@ def L4_iand_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11405,7 +11456,7 @@ def L4_iand_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11413,7 +11464,7 @@ def L4_iand_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11432,7 +11483,7 @@ def L4_iand_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11440,7 +11491,7 @@ def L4_iand_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11459,7 +11510,7 @@ def L4_iand_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11467,7 +11518,7 @@ def L4_ior_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11486,7 +11537,7 @@ def L4_ior_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11494,7 +11545,7 @@ def L4_ior_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11513,7 +11564,7 @@ def L4_ior_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11521,7 +11572,7 @@ def L4_ior_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11540,7 +11591,7 @@ def L4_ior_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11548,7 +11599,7 @@ def L4_isub_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11567,7 +11618,7 @@ def L4_isub_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11575,7 +11626,7 @@ def L4_isub_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11594,7 +11645,7 @@ def L4_isub_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11602,7 +11653,7 @@ def L4_isub_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11621,7 +11672,7 @@ def L4_isub_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11629,7 +11680,7 @@ def L4_loadalignb_ap : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
"$Ryy32 = memb_fifo($Re32=#$II)",
-tc_5acef64a, TypeLD>, Enc_f394d3 {
+tc_7a91e76a, TypeLD>, Enc_f394d3 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010100;
@@ -11649,7 +11700,7 @@ def L4_loadalignb_ur : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Ryy32 = memb_fifo($Rt32<<#$Ii+#$II)",
-tc_0cd51c76, TypeLD>, Enc_04c959 {
+tc_a5d4aeec, TypeLD>, Enc_04c959 {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100100;
let addrMode = BaseLongOffset;
@@ -11669,7 +11720,7 @@ def L4_loadalignh_ap : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
"$Ryy32 = memh_fifo($Re32=#$II)",
-tc_5acef64a, TypeLD>, Enc_f394d3 {
+tc_7a91e76a, TypeLD>, Enc_f394d3 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010010;
@@ -11689,7 +11740,7 @@ def L4_loadalignh_ur : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Ryy32 = memh_fifo($Rt32<<#$Ii+#$II)",
-tc_0cd51c76, TypeLD>, Enc_04c959 {
+tc_a5d4aeec, TypeLD>, Enc_04c959 {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100010;
let addrMode = BaseLongOffset;
@@ -11709,7 +11760,7 @@ def L4_loadbsw2_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = membh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010001;
@@ -11730,7 +11781,7 @@ def L4_loadbsw2_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = membh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b {
+tc_bab0eed9, TypeLD>, Enc_4f677b {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100001;
let hasNewValue = 1;
@@ -11751,7 +11802,7 @@ def L4_loadbsw4_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = membh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010111;
@@ -11770,7 +11821,7 @@ def L4_loadbsw4_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = membh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe {
+tc_bab0eed9, TypeLD>, Enc_6185fe {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100111;
let addrMode = BaseLongOffset;
@@ -11789,7 +11840,7 @@ def L4_loadbzw2_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memubh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010011;
@@ -11810,7 +11861,7 @@ def L4_loadbzw2_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memubh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b {
+tc_bab0eed9, TypeLD>, Enc_4f677b {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100011;
let hasNewValue = 1;
@@ -11831,7 +11882,7 @@ def L4_loadbzw4_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = memubh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010101;
@@ -11850,7 +11901,7 @@ def L4_loadbzw4_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = memubh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe {
+tc_bab0eed9, TypeLD>, Enc_6185fe {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100101;
let addrMode = BaseLongOffset;
@@ -11869,7 +11920,7 @@ def L4_loadd_locked : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memd_locked($Rs32)",
-tc_6aa5711a, TypeLD>, Enc_3a3d62 {
+tc_b43e7930, TypeLD>, Enc_3a3d62 {
let Inst{13-5} = 0b010000000;
let Inst{31-21} = 0b10010010000;
let accessSize = DoubleWordAccess;
@@ -11880,7 +11931,7 @@ def L4_loadrb_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memb($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011000;
@@ -11901,7 +11952,7 @@ def L4_loadrb_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010000;
let hasNewValue = 1;
@@ -11918,7 +11969,7 @@ def L4_loadrb_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memb($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101000;
let hasNewValue = 1;
@@ -11940,7 +11991,7 @@ def L4_loadrd_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = memd($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011110;
@@ -11959,7 +12010,7 @@ def L4_loadrd_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010110;
let addrMode = BaseRegOffset;
@@ -11974,7 +12025,7 @@ def L4_loadrd_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = memd($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101110;
let addrMode = BaseLongOffset;
@@ -11994,7 +12045,7 @@ def L4_loadrh_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011010;
@@ -12015,7 +12066,7 @@ def L4_loadrh_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010010;
let hasNewValue = 1;
@@ -12032,7 +12083,7 @@ def L4_loadrh_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101010;
let hasNewValue = 1;
@@ -12054,7 +12105,7 @@ def L4_loadri_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memw($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011100;
@@ -12075,7 +12126,7 @@ def L4_loadri_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010100;
let hasNewValue = 1;
@@ -12092,7 +12143,7 @@ def L4_loadri_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memw($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101100;
let hasNewValue = 1;
@@ -12114,7 +12165,7 @@ def L4_loadrub_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memub($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011001;
@@ -12135,7 +12186,7 @@ def L4_loadrub_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010001;
let hasNewValue = 1;
@@ -12152,7 +12203,7 @@ def L4_loadrub_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memub($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101001;
let hasNewValue = 1;
@@ -12174,7 +12225,7 @@ def L4_loadruh_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memuh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011011;
@@ -12195,7 +12246,7 @@ def L4_loadruh_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010011;
let hasNewValue = 1;
@@ -12212,7 +12263,7 @@ def L4_loadruh_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memuh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101011;
let hasNewValue = 1;
@@ -12234,7 +12285,7 @@ def L4_or_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -12253,7 +12304,7 @@ def L4_or_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12261,7 +12312,7 @@ def L4_or_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -12280,7 +12331,7 @@ def L4_or_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12288,7 +12339,7 @@ def L4_or_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -12307,7 +12358,7 @@ def L4_or_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12315,7 +12366,7 @@ def L4_ploadrbf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111000;
@@ -12340,7 +12391,7 @@ def L4_ploadrbf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12357,7 +12408,7 @@ def L4_ploadrbfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111000;
@@ -12383,7 +12434,7 @@ def L4_ploadrbfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12401,7 +12452,7 @@ def L4_ploadrbt_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111000;
@@ -12425,7 +12476,7 @@ def L4_ploadrbt_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000000;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12441,7 +12492,7 @@ def L4_ploadrbtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111000;
@@ -12466,7 +12517,7 @@ def L4_ploadrbtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010000;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12483,7 +12534,7 @@ def L4_ploadrdf_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111110;
@@ -12506,7 +12557,7 @@ def L4_ploadrdf_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110001110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12521,7 +12572,7 @@ def L4_ploadrdfnew_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111110;
@@ -12545,7 +12596,7 @@ def L4_ploadrdfnew_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110011110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12561,7 +12612,7 @@ def L4_ploadrdt_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rdd32 = memd(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111110;
@@ -12583,7 +12634,7 @@ def L4_ploadrdt_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110000110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -12597,7 +12648,7 @@ def L4_ploadrdtnew_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111110;
@@ -12620,7 +12671,7 @@ def L4_ploadrdtnew_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110010110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -12635,7 +12686,7 @@ def L4_ploadrhf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111010;
@@ -12660,7 +12711,7 @@ def L4_ploadrhf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12677,7 +12728,7 @@ def L4_ploadrhfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111010;
@@ -12703,7 +12754,7 @@ def L4_ploadrhfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12721,7 +12772,7 @@ def L4_ploadrht_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111010;
@@ -12745,7 +12796,7 @@ def L4_ploadrht_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000010;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12761,7 +12812,7 @@ def L4_ploadrhtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111010;
@@ -12786,7 +12837,7 @@ def L4_ploadrhtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010010;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12803,7 +12854,7 @@ def L4_ploadrif_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memw(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111100;
@@ -12828,7 +12879,7 @@ def L4_ploadrif_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12845,7 +12896,7 @@ def L4_ploadrifnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111100;
@@ -12871,7 +12922,7 @@ def L4_ploadrifnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12889,7 +12940,7 @@ def L4_ploadrit_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memw(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111100;
@@ -12913,7 +12964,7 @@ def L4_ploadrit_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12929,7 +12980,7 @@ def L4_ploadritnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111100;
@@ -12954,7 +13005,7 @@ def L4_ploadritnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12971,7 +13022,7 @@ def L4_ploadrubf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111001;
@@ -12996,7 +13047,7 @@ def L4_ploadrubf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001001;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13013,7 +13064,7 @@ def L4_ploadrubfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111001;
@@ -13039,7 +13090,7 @@ def L4_ploadrubfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011001;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13057,7 +13108,7 @@ def L4_ploadrubt_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111001;
@@ -13081,7 +13132,7 @@ def L4_ploadrubt_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000001;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13097,7 +13148,7 @@ def L4_ploadrubtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111001;
@@ -13122,7 +13173,7 @@ def L4_ploadrubtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010001;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13139,7 +13190,7 @@ def L4_ploadruhf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111011;
@@ -13164,7 +13215,7 @@ def L4_ploadruhf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13181,7 +13232,7 @@ def L4_ploadruhfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111011;
@@ -13207,7 +13258,7 @@ def L4_ploadruhfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13225,7 +13276,7 @@ def L4_ploadruht_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memuh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111011;
@@ -13249,7 +13300,7 @@ def L4_ploadruht_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000011;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13265,7 +13316,7 @@ def L4_ploadruhtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111011;
@@ -13290,7 +13341,7 @@ def L4_ploadruhtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010011;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13307,7 +13358,7 @@ def L4_return : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = dealloc_return($Rs32):raw",
-tc_3d04548d, TypeLD>, Enc_3a3d62, PredNewRel {
+tc_675e4897, TypeLD>, Enc_3a3d62, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010110000;
let isTerminator = 1;
@@ -13328,7 +13379,7 @@ def L4_return_f : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4) $Rdd32 = dealloc_return($Rs32):raw",
-tc_513bef45, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_2b8da4c2, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1100;
let Inst{31-21} = 0b10010110000;
@@ -13350,7 +13401,7 @@ def L4_return_fnew_pnt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b10010110000;
@@ -13373,7 +13424,7 @@ def L4_return_fnew_pt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1110;
let Inst{31-21} = 0b10010110000;
@@ -13396,7 +13447,7 @@ def L4_return_map_to_raw_f : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4) dealloc_return",
-tc_513bef45, TypeMAPPING>, Requires<[HasV65]> {
+tc_2b8da4c2, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13404,7 +13455,7 @@ def L4_return_map_to_raw_fnew_pnt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4.new) dealloc_return:nt",
-tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> {
+tc_9da59d12, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13412,7 +13463,7 @@ def L4_return_map_to_raw_fnew_pt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4.new) dealloc_return:t",
-tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> {
+tc_9da59d12, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13420,7 +13471,7 @@ def L4_return_map_to_raw_t : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4) dealloc_return",
-tc_3bc2c5d3, TypeMAPPING>, Requires<[HasV65]> {
+tc_4d5fa3a1, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13428,7 +13479,7 @@ def L4_return_map_to_raw_tnew_pnt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4.new) dealloc_return:nt",
-tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> {
+tc_e06f432a, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13436,7 +13487,7 @@ def L4_return_map_to_raw_tnew_pt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4.new) dealloc_return:t",
-tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> {
+tc_e06f432a, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13444,7 +13495,7 @@ def L4_return_t : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4) $Rdd32 = dealloc_return($Rs32):raw",
-tc_513bef45, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_2b8da4c2, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b10010110000;
@@ -13465,7 +13516,7 @@ def L4_return_tnew_pnt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b10010110000;
@@ -13487,7 +13538,7 @@ def L4_return_tnew_pt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b10010110000;
@@ -13509,7 +13560,7 @@ def L4_sub_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -13528,7 +13579,7 @@ def L4_sub_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13536,7 +13587,7 @@ def L4_sub_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -13555,7 +13606,7 @@ def L4_sub_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13563,7 +13614,7 @@ def L4_sub_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -13582,7 +13633,7 @@ def L4_sub_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13590,15 +13641,26 @@ def L6_deallocframe_map_to_raw : HInst<
(outs),
(ins),
"deallocframe",
-tc_d1090e34, TypeMAPPING>, Requires<[HasV65]> {
+tc_15aa71c5, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
+def L6_memcpy : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, ModRegs:$Mu2),
+"memcpy($Rs32,$Rt32,$Mu2)",
+tc_a6b1eca9, TypeLD>, Enc_a75aa6, Requires<[HasV66]> {
+let Inst{7-0} = 0b01000000;
+let Inst{31-21} = 0b10010010000;
+let mayLoad = 1;
+let isSolo = 1;
+let mayStore = 1;
+}
def L6_return_map_to_raw : HInst<
(outs),
(ins),
"dealloc_return",
-tc_3d04548d, TypeMAPPING>, Requires<[HasV65]> {
+tc_675e4897, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13606,7 +13668,7 @@ def M2_acci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += add($Rs32,$Rt32)",
-tc_c74f796f, TypeM>, Enc_2ae154, ImmRegRel {
+tc_f675fee8, TypeM>, Enc_2ae154, ImmRegRel {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -13621,7 +13683,7 @@ def M2_accii : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 += add($Rs32,#$Ii)",
-tc_c74f796f, TypeM>, Enc_c90aca, ImmRegRel {
+tc_f675fee8, TypeM>, Enc_c90aca, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100010000;
let hasNewValue = 1;
@@ -13640,7 +13702,7 @@ def M2_cmaci_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpyi($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13651,7 +13713,7 @@ def M2_cmacr_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpyr($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13662,7 +13724,7 @@ def M2_cmacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13674,7 +13736,7 @@ def M2_cmacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -13686,7 +13748,7 @@ def M2_cmacsc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32*):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13698,7 +13760,7 @@ def M2_cmacsc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32*):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -13710,7 +13772,7 @@ def M2_cmpyi_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpyi($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13720,7 +13782,7 @@ def M2_cmpyr_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpyr($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13730,7 +13792,7 @@ def M2_cmpyrs_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -13743,7 +13805,7 @@ def M2_cmpyrs_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -13756,7 +13818,7 @@ def M2_cmpyrsc_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32*):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101011;
@@ -13769,7 +13831,7 @@ def M2_cmpyrsc_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -13782,7 +13844,7 @@ def M2_cmpys_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13793,7 +13855,7 @@ def M2_cmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -13804,7 +13866,7 @@ def M2_cmpysc_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32*):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -13815,7 +13877,7 @@ def M2_cmpysc_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32*):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101110;
@@ -13826,7 +13888,7 @@ def M2_cnacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13838,7 +13900,7 @@ def M2_cnacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -13850,7 +13912,7 @@ def M2_cnacsc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32*):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13862,7 +13924,7 @@ def M2_cnacsc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32*):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -13874,7 +13936,7 @@ def M2_dpmpyss_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13885,7 +13947,7 @@ def M2_dpmpyss_nac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -13896,7 +13958,7 @@ def M2_dpmpyss_rnd_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -13908,7 +13970,7 @@ def M2_dpmpyss_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13918,7 +13980,7 @@ def M2_dpmpyuu_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13929,7 +13991,7 @@ def M2_dpmpyuu_nac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111011;
@@ -13940,7 +14002,7 @@ def M2_dpmpyuu_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -13950,7 +14012,7 @@ def M2_hmmpyh_rs1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -13963,7 +14025,7 @@ def M2_hmmpyh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -13976,7 +14038,7 @@ def M2_hmmpyl_rs1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -13989,7 +14051,7 @@ def M2_hmmpyl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -14002,7 +14064,7 @@ def M2_maci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyi($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_2ae154, ImmRegRel {
+tc_d773585a, TypeM>, Enc_2ae154, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -14017,7 +14079,7 @@ def M2_macsin : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rx32 -= mpyi($Rs32,#$Ii)",
-tc_16d0d8d5, TypeM>, Enc_c90aca {
+tc_05d3a09b, TypeM>, Enc_c90aca {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100001100;
let hasNewValue = 1;
@@ -14035,7 +14097,7 @@ def M2_macsip : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rx32 += mpyi($Rs32,#$Ii)",
-tc_16d0d8d5, TypeM>, Enc_c90aca, ImmRegRel {
+tc_05d3a09b, TypeM>, Enc_c90aca, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100001000;
let hasNewValue = 1;
@@ -14054,7 +14116,7 @@ def M2_mmachs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -14066,7 +14128,7 @@ def M2_mmachs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -14078,7 +14140,7 @@ def M2_mmachs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -14090,7 +14152,7 @@ def M2_mmachs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -14102,7 +14164,7 @@ def M2_mmacls_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -14114,7 +14176,7 @@ def M2_mmacls_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -14126,7 +14188,7 @@ def M2_mmacls_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -14138,7 +14200,7 @@ def M2_mmacls_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -14150,7 +14212,7 @@ def M2_mmacuhs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -14162,7 +14224,7 @@ def M2_mmacuhs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -14174,7 +14236,7 @@ def M2_mmacuhs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -14186,7 +14248,7 @@ def M2_mmacuhs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -14198,7 +14260,7 @@ def M2_mmaculs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -14210,7 +14272,7 @@ def M2_mmaculs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -14222,7 +14284,7 @@ def M2_mmaculs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -14234,7 +14296,7 @@ def M2_mmaculs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -14246,7 +14308,7 @@ def M2_mmpyh_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -14257,7 +14319,7 @@ def M2_mmpyh_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -14268,7 +14330,7 @@ def M2_mmpyh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -14279,7 +14341,7 @@ def M2_mmpyh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -14290,7 +14352,7 @@ def M2_mmpyl_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -14301,7 +14363,7 @@ def M2_mmpyl_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -14312,7 +14374,7 @@ def M2_mmpyl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -14323,7 +14385,7 @@ def M2_mmpyl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -14334,7 +14396,7 @@ def M2_mmpyuh_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -14345,7 +14407,7 @@ def M2_mmpyuh_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -14356,7 +14418,7 @@ def M2_mmpyuh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -14367,7 +14429,7 @@ def M2_mmpyuh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -14378,7 +14440,7 @@ def M2_mmpyul_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -14389,7 +14451,7 @@ def M2_mmpyul_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -14400,7 +14462,7 @@ def M2_mmpyul_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -14411,18 +14473,31 @@ def M2_mmpyul_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
let prefersSlot3 = 1;
let Defs = [USR_OVF];
}
+def M2_mnaci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyi($Rs32,$Rt32)",
+tc_bdceeac1, TypeM>, Enc_2ae154, Requires<[HasV66]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
def M2_mpy_acc_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14435,7 +14510,7 @@ def M2_mpy_acc_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14448,7 +14523,7 @@ def M2_mpy_acc_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14461,7 +14536,7 @@ def M2_mpy_acc_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14474,7 +14549,7 @@ def M2_mpy_acc_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14487,7 +14562,7 @@ def M2_mpy_acc_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14500,7 +14575,7 @@ def M2_mpy_acc_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14513,7 +14588,7 @@ def M2_mpy_acc_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14526,7 +14601,7 @@ def M2_mpy_acc_sat_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14540,7 +14615,7 @@ def M2_mpy_acc_sat_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14554,7 +14629,7 @@ def M2_mpy_acc_sat_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14568,7 +14643,7 @@ def M2_mpy_acc_sat_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14582,7 +14657,7 @@ def M2_mpy_acc_sat_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14596,7 +14671,7 @@ def M2_mpy_acc_sat_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14610,7 +14685,7 @@ def M2_mpy_acc_sat_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14624,7 +14699,7 @@ def M2_mpy_acc_sat_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14638,7 +14713,7 @@ def M2_mpy_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14650,7 +14725,7 @@ def M2_mpy_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14662,7 +14737,7 @@ def M2_mpy_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14674,7 +14749,7 @@ def M2_mpy_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14686,7 +14761,7 @@ def M2_mpy_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14698,7 +14773,7 @@ def M2_mpy_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14710,7 +14785,7 @@ def M2_mpy_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14722,7 +14797,7 @@ def M2_mpy_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14734,7 +14809,7 @@ def M2_mpy_nac_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14747,7 +14822,7 @@ def M2_mpy_nac_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14760,7 +14835,7 @@ def M2_mpy_nac_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14773,7 +14848,7 @@ def M2_mpy_nac_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14786,7 +14861,7 @@ def M2_mpy_nac_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14799,7 +14874,7 @@ def M2_mpy_nac_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14812,7 +14887,7 @@ def M2_mpy_nac_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14825,7 +14900,7 @@ def M2_mpy_nac_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14838,7 +14913,7 @@ def M2_mpy_nac_sat_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14852,7 +14927,7 @@ def M2_mpy_nac_sat_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14866,7 +14941,7 @@ def M2_mpy_nac_sat_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14880,7 +14955,7 @@ def M2_mpy_nac_sat_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14894,7 +14969,7 @@ def M2_mpy_nac_sat_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14908,7 +14983,7 @@ def M2_mpy_nac_sat_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14922,7 +14997,7 @@ def M2_mpy_nac_sat_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14936,7 +15011,7 @@ def M2_mpy_nac_sat_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14950,7 +15025,7 @@ def M2_mpy_rnd_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -14962,7 +15037,7 @@ def M2_mpy_rnd_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -14974,7 +15049,7 @@ def M2_mpy_rnd_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -14986,7 +15061,7 @@ def M2_mpy_rnd_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -14998,7 +15073,7 @@ def M2_mpy_rnd_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15010,7 +15085,7 @@ def M2_mpy_rnd_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15022,7 +15097,7 @@ def M2_mpy_rnd_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15034,7 +15109,7 @@ def M2_mpy_rnd_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15046,7 +15121,7 @@ def M2_mpy_sat_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15059,7 +15134,7 @@ def M2_mpy_sat_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15072,7 +15147,7 @@ def M2_mpy_sat_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15085,7 +15160,7 @@ def M2_mpy_sat_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15098,7 +15173,7 @@ def M2_mpy_sat_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15111,7 +15186,7 @@ def M2_mpy_sat_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15124,7 +15199,7 @@ def M2_mpy_sat_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15137,7 +15212,7 @@ def M2_mpy_sat_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15150,7 +15225,7 @@ def M2_mpy_sat_rnd_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15163,7 +15238,7 @@ def M2_mpy_sat_rnd_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15176,7 +15251,7 @@ def M2_mpy_sat_rnd_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15189,7 +15264,7 @@ def M2_mpy_sat_rnd_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15202,7 +15277,7 @@ def M2_mpy_sat_rnd_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15215,7 +15290,7 @@ def M2_mpy_sat_rnd_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15228,7 +15303,7 @@ def M2_mpy_sat_rnd_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15241,7 +15316,7 @@ def M2_mpy_sat_rnd_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15254,7 +15329,7 @@ def M2_mpy_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101000;
@@ -15266,7 +15341,7 @@ def M2_mpy_up_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -15278,7 +15353,7 @@ def M2_mpy_up_s1_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -15291,7 +15366,7 @@ def M2_mpyd_acc_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15302,7 +15377,7 @@ def M2_mpyd_acc_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15313,7 +15388,7 @@ def M2_mpyd_acc_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15324,7 +15399,7 @@ def M2_mpyd_acc_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15335,7 +15410,7 @@ def M2_mpyd_acc_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15346,7 +15421,7 @@ def M2_mpyd_acc_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15357,7 +15432,7 @@ def M2_mpyd_acc_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15368,7 +15443,7 @@ def M2_mpyd_acc_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15379,7 +15454,7 @@ def M2_mpyd_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15389,7 +15464,7 @@ def M2_mpyd_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15399,7 +15474,7 @@ def M2_mpyd_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15409,7 +15484,7 @@ def M2_mpyd_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15419,7 +15494,7 @@ def M2_mpyd_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15429,7 +15504,7 @@ def M2_mpyd_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15439,7 +15514,7 @@ def M2_mpyd_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15449,7 +15524,7 @@ def M2_mpyd_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15459,7 +15534,7 @@ def M2_mpyd_nac_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15470,7 +15545,7 @@ def M2_mpyd_nac_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15481,7 +15556,7 @@ def M2_mpyd_nac_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15492,7 +15567,7 @@ def M2_mpyd_nac_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15503,7 +15578,7 @@ def M2_mpyd_nac_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15514,7 +15589,7 @@ def M2_mpyd_nac_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15525,7 +15600,7 @@ def M2_mpyd_nac_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15536,7 +15611,7 @@ def M2_mpyd_nac_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15547,7 +15622,7 @@ def M2_mpyd_rnd_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15557,7 +15632,7 @@ def M2_mpyd_rnd_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15567,7 +15642,7 @@ def M2_mpyd_rnd_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15577,7 +15652,7 @@ def M2_mpyd_rnd_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15587,7 +15662,7 @@ def M2_mpyd_rnd_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15597,7 +15672,7 @@ def M2_mpyd_rnd_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15607,7 +15682,7 @@ def M2_mpyd_rnd_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15617,7 +15692,7 @@ def M2_mpyd_rnd_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15627,7 +15702,7 @@ def M2_mpyi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyi($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be, ImmRegRel {
+tc_bafaade3, TypeM>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101000;
@@ -15641,7 +15716,7 @@ def M2_mpysin : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Rd32 = -mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, Enc_b8c967 {
+tc_c8ce0b5c, TypeM>, Enc_b8c967 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100000100;
let hasNewValue = 1;
@@ -15652,7 +15727,7 @@ def M2_mpysip : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rd32 = +mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, Enc_b8c967 {
+tc_c8ce0b5c, TypeM>, Enc_b8c967 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100000000;
let hasNewValue = 1;
@@ -15668,7 +15743,7 @@ def M2_mpysmi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, m32_0Imm:$Ii),
"$Rd32 = mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, ImmRegRel {
+tc_c8ce0b5c, TypeM>, ImmRegRel {
let hasNewValue = 1;
let opNewValue = 0;
let CextOpcode = "M2_mpyi";
@@ -15684,7 +15759,7 @@ def M2_mpysu_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpysu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101011;
@@ -15696,7 +15771,7 @@ def M2_mpyu_acc_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15709,7 +15784,7 @@ def M2_mpyu_acc_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15722,7 +15797,7 @@ def M2_mpyu_acc_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15735,7 +15810,7 @@ def M2_mpyu_acc_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15748,7 +15823,7 @@ def M2_mpyu_acc_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15761,7 +15836,7 @@ def M2_mpyu_acc_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15774,7 +15849,7 @@ def M2_mpyu_acc_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15787,7 +15862,7 @@ def M2_mpyu_acc_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15800,7 +15875,7 @@ def M2_mpyu_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15812,7 +15887,7 @@ def M2_mpyu_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15824,7 +15899,7 @@ def M2_mpyu_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15836,7 +15911,7 @@ def M2_mpyu_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15848,7 +15923,7 @@ def M2_mpyu_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15860,7 +15935,7 @@ def M2_mpyu_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15872,7 +15947,7 @@ def M2_mpyu_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15884,7 +15959,7 @@ def M2_mpyu_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15896,7 +15971,7 @@ def M2_mpyu_nac_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15909,7 +15984,7 @@ def M2_mpyu_nac_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -15922,7 +15997,7 @@ def M2_mpyu_nac_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15935,7 +16010,7 @@ def M2_mpyu_nac_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -15948,7 +16023,7 @@ def M2_mpyu_nac_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15961,7 +16036,7 @@ def M2_mpyu_nac_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -15974,7 +16049,7 @@ def M2_mpyu_nac_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15987,7 +16062,7 @@ def M2_mpyu_nac_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -16000,7 +16075,7 @@ def M2_mpyu_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101010;
@@ -16012,7 +16087,7 @@ def M2_mpyud_acc_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16023,7 +16098,7 @@ def M2_mpyud_acc_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16034,7 +16109,7 @@ def M2_mpyud_acc_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16045,7 +16120,7 @@ def M2_mpyud_acc_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16056,7 +16131,7 @@ def M2_mpyud_acc_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16067,7 +16142,7 @@ def M2_mpyud_acc_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16078,7 +16153,7 @@ def M2_mpyud_acc_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16089,7 +16164,7 @@ def M2_mpyud_acc_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16100,7 +16175,7 @@ def M2_mpyud_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16110,7 +16185,7 @@ def M2_mpyud_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16120,7 +16195,7 @@ def M2_mpyud_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16130,7 +16205,7 @@ def M2_mpyud_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16140,7 +16215,7 @@ def M2_mpyud_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16150,7 +16225,7 @@ def M2_mpyud_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16160,7 +16235,7 @@ def M2_mpyud_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16170,7 +16245,7 @@ def M2_mpyud_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16180,7 +16255,7 @@ def M2_mpyud_nac_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16191,7 +16266,7 @@ def M2_mpyud_nac_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16202,7 +16277,7 @@ def M2_mpyud_nac_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16213,7 +16288,7 @@ def M2_mpyud_nac_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16224,7 +16299,7 @@ def M2_mpyud_nac_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16235,7 +16310,7 @@ def M2_mpyud_nac_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16246,7 +16321,7 @@ def M2_mpyud_nac_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16257,7 +16332,7 @@ def M2_mpyud_nac_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16268,7 +16343,7 @@ def M2_mpyui : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyui($Rs32,$Rt32)",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -16278,7 +16353,7 @@ def M2_nacci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= add($Rs32,$Rt32)",
-tc_c74f796f, TypeM>, Enc_2ae154 {
+tc_f675fee8, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111100;
@@ -16292,7 +16367,7 @@ def M2_naccii : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 -= add($Rs32,#$Ii)",
-tc_c74f796f, TypeM>, Enc_c90aca {
+tc_f675fee8, TypeM>, Enc_c90aca {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100010100;
let hasNewValue = 1;
@@ -16310,7 +16385,7 @@ def M2_subacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rt32, IntRegs:$Rs32),
"$Rx32 += sub($Rt32,$Rs32)",
-tc_c74f796f, TypeM>, Enc_a568d4 {
+tc_f675fee8, TypeM>, Enc_a568d4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -16324,7 +16399,7 @@ def M2_vabsdiffh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffh($Rtt32,$Rss32)",
-tc_2b6f77c6, TypeM>, Enc_ea23e4 {
+tc_002cb246, TypeM>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -16334,7 +16409,7 @@ def M2_vabsdiffw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffw($Rtt32,$Rss32)",
-tc_2b6f77c6, TypeM>, Enc_ea23e4 {
+tc_002cb246, TypeM>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -16344,7 +16419,7 @@ def M2_vcmac_s0_sat_i : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vcmpyi($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -16356,7 +16431,7 @@ def M2_vcmac_s0_sat_r : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vcmpyr($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -16368,7 +16443,7 @@ def M2_vcmpy_s0_sat_i : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyi($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -16379,7 +16454,7 @@ def M2_vcmpy_s0_sat_r : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyr($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -16390,7 +16465,7 @@ def M2_vcmpy_s1_sat_i : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyi($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -16401,7 +16476,7 @@ def M2_vcmpy_s1_sat_r : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyr($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -16412,7 +16487,7 @@ def M2_vdmacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpy($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16424,7 +16499,7 @@ def M2_vdmacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpy($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -16436,7 +16511,7 @@ def M2_vdmpyrs_s0 : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vdmpy($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001000;
@@ -16449,7 +16524,7 @@ def M2_vdmpyrs_s1 : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vdmpy($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001100;
@@ -16462,7 +16537,7 @@ def M2_vdmpys_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpy($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16473,7 +16548,7 @@ def M2_vdmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpy($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -16484,7 +16559,7 @@ def M2_vmac2 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -16495,7 +16570,7 @@ def M2_vmac2es : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -16506,7 +16581,7 @@ def M2_vmac2es_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16518,7 +16593,7 @@ def M2_vmac2es_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -16530,7 +16605,7 @@ def M2_vmac2s_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -16542,7 +16617,7 @@ def M2_vmac2s_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -16554,7 +16629,7 @@ def M2_vmac2su_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyhsu($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111011;
@@ -16566,7 +16641,7 @@ def M2_vmac2su_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyhsu($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111111;
@@ -16578,7 +16653,7 @@ def M2_vmpy2es_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyeh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16589,7 +16664,7 @@ def M2_vmpy2es_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyeh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -16600,7 +16675,7 @@ def M2_vmpy2s_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyh($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -16611,7 +16686,7 @@ def M2_vmpy2s_s0pack : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vmpyh($Rs32,$Rt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -16624,7 +16699,7 @@ def M2_vmpy2s_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyh($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -16635,7 +16710,7 @@ def M2_vmpy2s_s1pack : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vmpyh($Rs32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -16648,7 +16723,7 @@ def M2_vmpy2su_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyhsu($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -16659,7 +16734,7 @@ def M2_vmpy2su_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyhsu($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -16670,7 +16745,7 @@ def M2_vraddh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vraddh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001001;
@@ -16682,7 +16757,7 @@ def M2_vradduh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vradduh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001000;
@@ -16694,7 +16769,7 @@ def M2_vrcmaci_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyi($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16705,7 +16780,7 @@ def M2_vrcmaci_s0c : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyi($Rss32,$Rtt32*)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -16716,7 +16791,7 @@ def M2_vrcmacr_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyr($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16727,7 +16802,7 @@ def M2_vrcmacr_s0c : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyr($Rss32,$Rtt32*)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -16738,7 +16813,7 @@ def M2_vrcmpyi_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyi($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16748,7 +16823,7 @@ def M2_vrcmpyi_s0c : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyi($Rss32,$Rtt32*)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -16758,7 +16833,7 @@ def M2_vrcmpyr_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyr($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16768,7 +16843,7 @@ def M2_vrcmpyr_s0c : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyr($Rss32,$Rtt32*)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -16778,7 +16853,7 @@ def M2_vrcmpys_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += vrcmpys($Rss32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM> {
+tc_d773585a, TypeM> {
let isPseudo = 1;
let Constraints = "$Rxx32 = $Rxx32in";
}
@@ -16786,7 +16861,7 @@ def M2_vrcmpys_acc_s1_h : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -16798,7 +16873,7 @@ def M2_vrcmpys_acc_s1_l : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -16810,14 +16885,14 @@ def M2_vrcmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vrcmpys($Rss32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
let isPseudo = 1;
}
def M2_vrcmpys_s1_h : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -16828,7 +16903,7 @@ def M2_vrcmpys_s1_l : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -16839,7 +16914,7 @@ def M2_vrcmpys_s1rp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = vrcmpys($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -16848,7 +16923,7 @@ def M2_vrcmpys_s1rp_h : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:hi",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001101;
@@ -16861,7 +16936,7 @@ def M2_vrcmpys_s1rp_l : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:lo",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001101;
@@ -16874,7 +16949,7 @@ def M2_vrmac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16885,7 +16960,7 @@ def M2_vrmpy_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16895,7 +16970,7 @@ def M2_xor_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111100;
@@ -16909,7 +16984,7 @@ def M4_and_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -16923,7 +16998,7 @@ def M4_and_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -16937,7 +17012,7 @@ def M4_and_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -16951,7 +17026,7 @@ def M4_and_xor : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -16965,7 +17040,7 @@ def M4_cmpyi_wh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyiwh($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -16978,7 +17053,7 @@ def M4_cmpyi_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -16991,7 +17066,7 @@ def M4_cmpyr_wh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyrwh($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17004,7 +17079,7 @@ def M4_cmpyr_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17017,7 +17092,7 @@ def M4_mac_up_s1_sat : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -17032,7 +17107,7 @@ def M4_mpyri_addi : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii, IntRegs:$Rs32, u6_0Imm:$II),
"$Rd32 = add(#$Ii,mpyi($Rs32,#$II))",
-tc_16d0d8d5, TypeALU64>, Enc_322e1b, ImmRegRel {
+tc_05d3a09b, TypeALU64>, Enc_322e1b, ImmRegRel {
let Inst{31-24} = 0b11011000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17048,7 +17123,7 @@ def M4_mpyri_addr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Ru32, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rd32 = add($Ru32,mpyi($Rs32,#$Ii))",
-tc_16d0d8d5, TypeALU64>, Enc_420cf3, ImmRegRel {
+tc_05d3a09b, TypeALU64>, Enc_420cf3, ImmRegRel {
let Inst{31-23} = 0b110111111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17065,7 +17140,7 @@ def M4_mpyri_addr_u2 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Ru32, u6_2Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = add($Ru32,mpyi(#$Ii,$Rs32))",
-tc_bcc96cee, TypeALU64>, Enc_277737 {
+tc_1a2fd869, TypeALU64>, Enc_277737 {
let Inst{31-23} = 0b110111110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17075,7 +17150,7 @@ def M4_mpyrr_addi : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add(#$Ii,mpyi($Rs32,$Rt32))",
-tc_e913dc32, TypeALU64>, Enc_a7b8e8, ImmRegRel {
+tc_d773585a, TypeALU64>, Enc_a7b8e8, ImmRegRel {
let Inst{31-23} = 0b110101110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17092,7 +17167,7 @@ def M4_mpyrr_addr : HInst<
(outs IntRegs:$Ry32),
(ins IntRegs:$Ru32, IntRegs:$Ry32in, IntRegs:$Rs32),
"$Ry32 = add($Ru32,mpyi($Ry32in,$Rs32))",
-tc_e913dc32, TypeM>, Enc_7f1a05, ImmRegRel {
+tc_d773585a, TypeM>, Enc_7f1a05, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100011000;
@@ -17107,7 +17182,7 @@ def M4_nac_up_s1_sat : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -17122,7 +17197,7 @@ def M4_or_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -17136,7 +17211,7 @@ def M4_or_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -17150,7 +17225,7 @@ def M4_or_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17164,7 +17239,7 @@ def M4_or_xor : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17178,7 +17253,7 @@ def M4_pmpyw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = pmpyw($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -17188,7 +17263,7 @@ def M4_pmpyw_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 ^= pmpyw($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -17199,7 +17274,7 @@ def M4_vpmpyh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vpmpyh($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101110;
@@ -17209,7 +17284,7 @@ def M4_vpmpyh_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 ^= vpmpyh($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111101;
@@ -17220,7 +17295,7 @@ def M4_vrmpyeh_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyweh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -17231,7 +17306,7 @@ def M4_vrmpyeh_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyweh($Rss32,$Rtt32):<<1",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -17242,7 +17317,7 @@ def M4_vrmpyeh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyweh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -17252,7 +17327,7 @@ def M4_vrmpyeh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyweh($Rss32,$Rtt32):<<1",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -17262,7 +17337,7 @@ def M4_vrmpyoh_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpywoh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -17273,7 +17348,7 @@ def M4_vrmpyoh_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpywoh($Rss32,$Rtt32):<<1",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -17284,7 +17359,7 @@ def M4_vrmpyoh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpywoh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -17294,7 +17369,7 @@ def M4_vrmpyoh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpywoh($Rss32,$Rtt32):<<1",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17304,7 +17379,7 @@ def M4_xor_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17318,7 +17393,7 @@ def M4_xor_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -17332,7 +17407,7 @@ def M4_xor_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17346,7 +17421,7 @@ def M4_xor_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 ^= xor($Rss32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_88c16c {
+tc_f429765c, TypeS_3op>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001010100;
@@ -17357,7 +17432,7 @@ def M5_vdmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5]> {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -17369,7 +17444,7 @@ def M5_vdmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5]> {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17380,7 +17455,7 @@ def M5_vmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpybsu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -17391,7 +17466,7 @@ def M5_vmacbuu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpybu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -17402,7 +17477,7 @@ def M5_vmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpybsu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -17412,7 +17487,7 @@ def M5_vmpybuu : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpybu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -17422,7 +17497,7 @@ def M5_vrmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpybsu($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -17433,7 +17508,7 @@ def M5_vrmacbuu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpybu($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -17444,7 +17519,7 @@ def M5_vrmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpybsu($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -17454,7 +17529,7 @@ def M5_vrmpybuu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpybu($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -17464,7 +17539,7 @@ def M6_vabsdiffb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffb($Rtt32,$Rss32)",
-tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
+tc_9461ff31, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -17474,7 +17549,7 @@ def M6_vabsdiffub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffub($Rtt32,$Rss32)",
-tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
+tc_9461ff31, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17484,7 +17559,7 @@ def PS_loadrbabs : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memb(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17507,7 +17582,7 @@ def PS_loadrdabs : HInst<
(outs DoubleRegs:$Rdd32),
(ins u29_3Imm:$Ii),
"$Rdd32 = memd(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_509701, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_509701, AddrModeRel {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17528,7 +17603,7 @@ def PS_loadrhabs : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memh(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17551,7 +17626,7 @@ def PS_loadriabs : HInst<
(outs IntRegs:$Rd32),
(ins u30_2Imm:$Ii),
"$Rd32 = memw(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17574,7 +17649,7 @@ def PS_loadrubabs : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memub(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17597,7 +17672,7 @@ def PS_loadruhabs : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memuh(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17620,7 +17695,7 @@ def PS_storerbabs : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Rt32),
"memb(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
let Inst{24-21} = 0b0000;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17642,7 +17717,7 @@ def PS_storerbnewabs : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Nt8),
"memb(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_ad1831, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_ad1831, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17668,7 +17743,7 @@ def PS_storerdabs : HInst<
(outs),
(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd(#$Ii) = $Rtt32",
-tc_a788683e, TypeV2LDST>, Enc_5c124a, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_5c124a, AddrModeRel {
let Inst{24-21} = 0b0110;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17689,7 +17764,7 @@ def PS_storerfabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(#$Ii) = $Rt32.h",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17710,7 +17785,7 @@ def PS_storerhabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17732,7 +17807,7 @@ def PS_storerhnewabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Nt8),
"memh(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17758,7 +17833,7 @@ def PS_storeriabs : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Rt32),
"memw(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_541f26, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_541f26, AddrModeRel {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17780,7 +17855,7 @@ def PS_storerinewabs : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Nt8),
"memw(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17806,7 +17881,7 @@ def S2_addasl_rrri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32, u3_0Imm:$Ii),
"$Rd32 = addasl($Rt32,$Rs32,#$Ii)",
-tc_c74f796f, TypeS_3op>, Enc_47ef61 {
+tc_f675fee8, TypeS_3op>, Enc_47ef61 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000100000;
let hasNewValue = 1;
@@ -17817,7 +17892,7 @@ def S2_allocframe : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u11_3Imm:$Ii),
"allocframe($Rx32,#$Ii):raw",
-tc_e216a5db, TypeST>, Enc_22c845 {
+tc_b44ecf75, TypeST>, Enc_22c845 {
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10100000100;
let hasNewValue = 1;
@@ -17833,7 +17908,7 @@ def S2_asl_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asl($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000000000;
}
@@ -17841,7 +17916,7 @@ def S2_asl_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += asl($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -17851,7 +17926,7 @@ def S2_asl_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -17861,7 +17936,7 @@ def S2_asl_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= asl($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -17871,7 +17946,7 @@ def S2_asl_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -17881,7 +17956,7 @@ def S2_asl_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -17891,7 +17966,7 @@ def S2_asl_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asl($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -17902,7 +17977,7 @@ def S2_asl_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += asl($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -17915,7 +17990,7 @@ def S2_asl_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -17928,7 +18003,7 @@ def S2_asl_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= asl($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -17941,7 +18016,7 @@ def S2_asl_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -17954,7 +18029,7 @@ def S2_asl_i_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asl($Rs32,#$Ii):sat",
-tc_b44c6e2a, TypeS_2op>, Enc_a05677 {
+tc_779080bf, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100010;
@@ -17967,7 +18042,7 @@ def S2_asl_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -17980,7 +18055,7 @@ def S2_asl_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vaslh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -17989,7 +18064,7 @@ def S2_asl_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vaslw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -17998,7 +18073,7 @@ def S2_asl_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = asl($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18007,7 +18082,7 @@ def S2_asl_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += asl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18018,7 +18093,7 @@ def S2_asl_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18029,7 +18104,7 @@ def S2_asl_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= asl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18040,7 +18115,7 @@ def S2_asl_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18051,7 +18126,7 @@ def S2_asl_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18062,7 +18137,7 @@ def S2_asl_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asl($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18073,7 +18148,7 @@ def S2_asl_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += asl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18086,7 +18161,7 @@ def S2_asl_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= asl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18099,7 +18174,7 @@ def S2_asl_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= asl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18112,7 +18187,7 @@ def S2_asl_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= asl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18125,7 +18200,7 @@ def S2_asl_r_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asl($Rs32,$Rt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_5ab2be {
+tc_779080bf, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110000;
@@ -18138,7 +18213,7 @@ def S2_asl_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vaslh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -18147,7 +18222,7 @@ def S2_asl_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vaslw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -18156,7 +18231,7 @@ def S2_asr_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asr($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000000000;
}
@@ -18164,7 +18239,7 @@ def S2_asr_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += asr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18174,7 +18249,7 @@ def S2_asr_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= asr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18184,7 +18259,7 @@ def S2_asr_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= asr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18194,7 +18269,7 @@ def S2_asr_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= asr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18204,7 +18279,7 @@ def S2_asr_i_p_rnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asr($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18213,14 +18288,14 @@ def S2_asr_i_p_rnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asrrnd($Rss32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
let isPseudo = 1;
}
def S2_asr_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asr($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -18231,7 +18306,7 @@ def S2_asr_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += asr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -18244,7 +18319,7 @@ def S2_asr_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= asr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -18257,7 +18332,7 @@ def S2_asr_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= asr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -18270,7 +18345,7 @@ def S2_asr_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= asr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -18283,7 +18358,7 @@ def S2_asr_i_r_rnd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asr($Rs32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100010;
@@ -18295,7 +18370,7 @@ def S2_asr_i_r_rnd_goodsyntax : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asrrnd($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op> {
+tc_002cb246, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -18304,7 +18379,7 @@ def S2_asr_i_svw_trun : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rd32 = vasrw($Rss32,#$Ii)",
-tc_1b9c9ee5, TypeS_2op>, Enc_8dec2e {
+tc_4414d8b1, TypeS_2op>, Enc_8dec2e {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001000110;
@@ -18316,7 +18391,7 @@ def S2_asr_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -18325,7 +18400,7 @@ def S2_asr_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vasrw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -18334,7 +18409,7 @@ def S2_asr_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = asr($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18343,7 +18418,7 @@ def S2_asr_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += asr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18354,7 +18429,7 @@ def S2_asr_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18365,7 +18440,7 @@ def S2_asr_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= asr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18376,7 +18451,7 @@ def S2_asr_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18387,7 +18462,7 @@ def S2_asr_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18398,7 +18473,7 @@ def S2_asr_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asr($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18409,7 +18484,7 @@ def S2_asr_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += asr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18422,7 +18497,7 @@ def S2_asr_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= asr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18435,7 +18510,7 @@ def S2_asr_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= asr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18448,7 +18523,7 @@ def S2_asr_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= asr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18461,7 +18536,7 @@ def S2_asr_r_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asr($Rs32,$Rt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_5ab2be {
+tc_779080bf, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110000;
@@ -18474,7 +18549,7 @@ def S2_asr_r_svw_trun : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = vasrw($Rss32,$Rt32)",
-tc_1b9c9ee5, TypeS_3op>, Enc_3d5b28 {
+tc_4414d8b1, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -18486,7 +18561,7 @@ def S2_asr_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vasrh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -18495,7 +18570,7 @@ def S2_asr_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vasrw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -18504,7 +18579,7 @@ def S2_brev : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = brev($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18515,7 +18590,7 @@ def S2_brevp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = brev($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18524,7 +18599,7 @@ def S2_cabacdecbin : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = decbin($Rss32,$Rtt32)",
-tc_c6ebf8dd, TypeS_3op>, Enc_a56825 {
+tc_76851da1, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -18536,7 +18611,7 @@ def S2_cl0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = cl0($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18547,7 +18622,7 @@ def S2_cl0p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = cl0($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18558,7 +18633,7 @@ def S2_cl1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = cl1($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18569,7 +18644,7 @@ def S2_cl1p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = cl1($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18580,7 +18655,7 @@ def S2_clb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = clb($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18591,7 +18666,7 @@ def S2_clbnorm : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = normamt($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18602,7 +18677,7 @@ def S2_clbp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = clb($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18613,7 +18688,7 @@ def S2_clrbit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = clrbit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -18624,7 +18699,7 @@ def S2_clrbit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = clrbit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -18635,7 +18710,7 @@ def S2_ct0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = ct0($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18646,7 +18721,7 @@ def S2_ct0p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = ct0($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -18657,7 +18732,7 @@ def S2_ct1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = ct1($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18668,7 +18743,7 @@ def S2_ct1p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = ct1($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -18679,7 +18754,7 @@ def S2_deinterleave : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = deinterleave($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18688,7 +18763,7 @@ def S2_extractu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rd32 = extractu($Rs32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b388cf {
+tc_f675fee8, TypeS_2op>, Enc_b388cf {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011010;
let hasNewValue = 1;
@@ -18699,7 +18774,7 @@ def S2_extractu_rp : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rd32 = extractu($Rs32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_e07374 {
+tc_002cb246, TypeS_3op>, Enc_e07374 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001001000;
@@ -18711,7 +18786,7 @@ def S2_extractup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rdd32 = extractu($Rss32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b84c4c {
+tc_f675fee8, TypeS_2op>, Enc_b84c4c {
let Inst{31-24} = 0b10000001;
let prefersSlot3 = 1;
}
@@ -18719,7 +18794,7 @@ def S2_extractup_rp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = extractu($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -18729,7 +18804,7 @@ def S2_insert : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = insert($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op>, Enc_a1e29d {
+tc_bfec0f01, TypeS_2op>, Enc_a1e29d {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011110;
let hasNewValue = 1;
@@ -18741,7 +18816,7 @@ def S2_insert_rp : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rx32 = insert($Rs32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_179b35 {
+tc_f429765c, TypeS_3op>, Enc_179b35 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001000000;
@@ -18754,7 +18829,7 @@ def S2_insertp : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rxx32 = insert($Rss32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op>, Enc_143a3c {
+tc_bfec0f01, TypeS_2op>, Enc_143a3c {
let Inst{31-24} = 0b10000011;
let prefersSlot3 = 1;
let Constraints = "$Rxx32 = $Rxx32in";
@@ -18763,7 +18838,7 @@ def S2_insertp_rp : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 = insert($Rss32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_88c16c {
+tc_f429765c, TypeS_3op>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001010000;
@@ -18774,7 +18849,7 @@ def S2_interleave : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = interleave($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18783,7 +18858,7 @@ def S2_lfsp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = lfs($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -18793,7 +18868,7 @@ def S2_lsl_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = lsl($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18802,7 +18877,7 @@ def S2_lsl_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += lsl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18813,7 +18888,7 @@ def S2_lsl_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18824,7 +18899,7 @@ def S2_lsl_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= lsl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18835,7 +18910,7 @@ def S2_lsl_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18846,7 +18921,7 @@ def S2_lsl_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18857,7 +18932,7 @@ def S2_lsl_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = lsl($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18868,7 +18943,7 @@ def S2_lsl_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += lsl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18881,7 +18956,7 @@ def S2_lsl_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= lsl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18894,7 +18969,7 @@ def S2_lsl_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= lsl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18907,7 +18982,7 @@ def S2_lsl_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= lsl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18920,7 +18995,7 @@ def S2_lsl_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlslh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -18929,7 +19004,7 @@ def S2_lsl_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlslw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -18938,7 +19013,7 @@ def S2_lsr_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = lsr($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000000000;
}
@@ -18946,7 +19021,7 @@ def S2_lsr_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += lsr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18956,7 +19031,7 @@ def S2_lsr_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18966,7 +19041,7 @@ def S2_lsr_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= lsr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18976,7 +19051,7 @@ def S2_lsr_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18986,7 +19061,7 @@ def S2_lsr_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -18996,7 +19071,7 @@ def S2_lsr_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = lsr($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -19007,7 +19082,7 @@ def S2_lsr_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += lsr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -19020,7 +19095,7 @@ def S2_lsr_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -19033,7 +19108,7 @@ def S2_lsr_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= lsr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -19046,7 +19121,7 @@ def S2_lsr_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -19059,7 +19134,7 @@ def S2_lsr_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -19072,7 +19147,7 @@ def S2_lsr_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vlsrh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b001;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -19081,7 +19156,7 @@ def S2_lsr_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vlsrw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -19090,7 +19165,7 @@ def S2_lsr_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = lsr($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -19099,7 +19174,7 @@ def S2_lsr_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += lsr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -19110,7 +19185,7 @@ def S2_lsr_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -19121,7 +19196,7 @@ def S2_lsr_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= lsr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -19132,7 +19207,7 @@ def S2_lsr_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -19143,7 +19218,7 @@ def S2_lsr_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -19154,7 +19229,7 @@ def S2_lsr_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = lsr($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -19165,7 +19240,7 @@ def S2_lsr_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += lsr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -19178,7 +19253,7 @@ def S2_lsr_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= lsr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -19191,7 +19266,7 @@ def S2_lsr_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= lsr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -19204,7 +19279,7 @@ def S2_lsr_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= lsr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -19217,7 +19292,7 @@ def S2_lsr_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlsrh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -19226,16 +19301,28 @@ def S2_lsr_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlsrw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
}
+def S2_mask : HInst<
+(outs IntRegs:$Rd32),
+(ins u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rd32 = mask(#$Ii,#$II)",
+tc_9461ff31, TypeS_2op>, Enc_c85e2a, Requires<[HasV66]> {
+let Inst{13-13} = 0b1;
+let Inst{20-16} = 0b00000;
+let Inst{31-23} = 0b100011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
def S2_packhl : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = packhl($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_be32a5 {
+tc_5a2711e5, TypeALU32_3op>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110101100;
@@ -19245,7 +19332,7 @@ def S2_parityp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = parity($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeALU64>, Enc_d2216a {
+tc_002cb246, TypeALU64>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010000000;
@@ -19257,7 +19344,7 @@ def S2_pstorerbf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100000;
let isPredicated = 1;
@@ -19279,7 +19366,7 @@ def S2_pstorerbf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_cc449f, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19297,7 +19384,7 @@ def S2_pstorerbf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19305,7 +19392,7 @@ def S2_pstorerbfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_cc449f, AddrModeRel {
+tc_53559e35, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19324,7 +19411,7 @@ def S2_pstorerbnewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000100101;
@@ -19350,7 +19437,7 @@ def S2_pstorerbnewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b100;
@@ -19372,7 +19459,7 @@ def S2_pstorerbnewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -19381,7 +19468,7 @@ def S2_pstorerbnewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -19404,7 +19491,7 @@ def S2_pstorerbnewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000000101;
@@ -19429,7 +19516,7 @@ def S2_pstorerbnewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b100;
@@ -19450,7 +19537,7 @@ def S2_pstorerbnewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -19459,7 +19546,7 @@ def S2_pstorerbnewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -19481,7 +19568,7 @@ def S2_pstorerbt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000000;
let isPredicated = 1;
@@ -19502,7 +19589,7 @@ def S2_pstorerbt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_cc449f, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19519,7 +19606,7 @@ def S2_pstorerbt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19527,7 +19614,7 @@ def S2_pstorerbtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_cc449f, AddrModeRel {
+tc_53559e35, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19545,7 +19632,7 @@ def S2_pstorerdf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32+#$Ii) = $Rtt32",
-tc_8b15472a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100110;
let isPredicated = 1;
@@ -19566,7 +19653,7 @@ def S2_pstorerdf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rx32++#$Ii) = $Rtt32",
-tc_cd7374a0, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19584,7 +19671,7 @@ def S2_pstorerdf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32) = $Rtt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19592,7 +19679,7 @@ def S2_pstorerdfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
-tc_74e47fd9, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_53559e35, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19611,7 +19698,7 @@ def S2_pstorerdt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32+#$Ii) = $Rtt32",
-tc_8b15472a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000110;
let isPredicated = 1;
@@ -19631,7 +19718,7 @@ def S2_pstorerdt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rx32++#$Ii) = $Rtt32",
-tc_cd7374a0, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19648,7 +19735,7 @@ def S2_pstorerdt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32) = $Rtt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19656,7 +19743,7 @@ def S2_pstorerdtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
-tc_74e47fd9, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_53559e35, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19674,7 +19761,7 @@ def S2_pstorerff_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32.h",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100011;
let isPredicated = 1;
@@ -19695,7 +19782,7 @@ def S2_pstorerff_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32.h",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19713,7 +19800,7 @@ def S2_pstorerff_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32) = $Rt32.h",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19721,7 +19808,7 @@ def S2_pstorerffnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19740,7 +19827,7 @@ def S2_pstorerft_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+#$Ii) = $Rt32.h",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000011;
let isPredicated = 1;
@@ -19760,7 +19847,7 @@ def S2_pstorerft_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rx32++#$Ii) = $Rt32.h",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19777,7 +19864,7 @@ def S2_pstorerft_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32) = $Rt32.h",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19785,7 +19872,7 @@ def S2_pstorerftnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19803,7 +19890,7 @@ def S2_pstorerhf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100010;
let isPredicated = 1;
@@ -19825,7 +19912,7 @@ def S2_pstorerhf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19843,7 +19930,7 @@ def S2_pstorerhf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19851,7 +19938,7 @@ def S2_pstorerhfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19870,7 +19957,7 @@ def S2_pstorerhnewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000100101;
@@ -19896,7 +19983,7 @@ def S2_pstorerhnewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b101;
@@ -19918,7 +20005,7 @@ def S2_pstorerhnewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -19927,7 +20014,7 @@ def S2_pstorerhnewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -19950,7 +20037,7 @@ def S2_pstorerhnewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000000101;
@@ -19975,7 +20062,7 @@ def S2_pstorerhnewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b101;
@@ -19996,7 +20083,7 @@ def S2_pstorerhnewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20005,7 +20092,7 @@ def S2_pstorerhnewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -20027,7 +20114,7 @@ def S2_pstorerht_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000010;
let isPredicated = 1;
@@ -20048,7 +20135,7 @@ def S2_pstorerht_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20065,7 +20152,7 @@ def S2_pstorerht_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20073,7 +20160,7 @@ def S2_pstorerhtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20091,7 +20178,7 @@ def S2_pstorerif_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100100;
let isPredicated = 1;
@@ -20113,7 +20200,7 @@ def S2_pstorerif_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20131,7 +20218,7 @@ def S2_pstorerif_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20139,7 +20226,7 @@ def S2_pstorerifnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_53559e35, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20159,7 +20246,7 @@ def S2_pstorerinewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000100101;
@@ -20185,7 +20272,7 @@ def S2_pstorerinewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_65f095, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b110;
@@ -20207,7 +20294,7 @@ def S2_pstorerinewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20216,7 +20303,7 @@ def S2_pstorerinewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_65f095, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -20239,7 +20326,7 @@ def S2_pstorerinewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000000101;
@@ -20264,7 +20351,7 @@ def S2_pstorerinewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_65f095, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b110;
@@ -20285,7 +20372,7 @@ def S2_pstorerinewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20294,7 +20381,7 @@ def S2_pstorerinewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_65f095, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -20316,7 +20403,7 @@ def S2_pstorerit_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000100;
let isPredicated = 1;
@@ -20337,7 +20424,7 @@ def S2_pstorerit_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20354,7 +20441,7 @@ def S2_pstorerit_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20362,7 +20449,7 @@ def S2_pstoreritnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_53559e35, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20380,7 +20467,7 @@ def S2_setbit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = setbit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -20391,7 +20478,7 @@ def S2_setbit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = setbit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -20402,7 +20489,7 @@ def S2_shuffeb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = shuffeb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20411,7 +20498,7 @@ def S2_shuffeh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = shuffeh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20420,7 +20507,7 @@ def S2_shuffob : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = shuffob($Rtt32,$Rss32)",
-tc_540fdfbc, TypeS_3op>, Enc_ea23e4 {
+tc_946df596, TypeS_3op>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20429,7 +20516,7 @@ def S2_shuffoh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = shuffoh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeS_3op>, Enc_ea23e4 {
+tc_946df596, TypeS_3op>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -20438,7 +20525,7 @@ def S2_storerb_io : HInst<
(outs),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -20459,9 +20546,10 @@ def S2_storerb_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111000;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let mayStore = 1;
let BaseOpcode = "S2_storerb_pbr";
@@ -20472,7 +20560,7 @@ def S2_storerb_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_b15941, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_b15941, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001000;
@@ -20488,7 +20576,7 @@ def S2_storerb_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001000;
let addrMode = PostInc;
@@ -20503,7 +20591,7 @@ def S2_storerb_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20521,7 +20609,7 @@ def S2_storerb_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101000;
let addrMode = PostInc;
@@ -20534,7 +20622,7 @@ def S2_storerb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20542,7 +20630,7 @@ def S2_storerbgp : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Rt32),
"memb(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
let Inst{24-21} = 0b0000;
let Inst{31-27} = 0b01001;
let accessSize = ByteAccess;
@@ -20560,7 +20648,7 @@ def S2_storerbnew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_4df4e9, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_4df4e9, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -20585,10 +20673,11 @@ def S2_storerbnew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let isNVStore = 1;
let isNewValue = 1;
@@ -20602,7 +20691,7 @@ def S2_storerbnew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_96ce4f, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_96ce4f, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b00;
@@ -20622,7 +20711,7 @@ def S2_storerbnew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101001101;
@@ -20641,7 +20730,7 @@ def S2_storerbnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_c7cd90, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_c7cd90, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b000;
@@ -20662,7 +20751,7 @@ def S2_storerbnew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101101101;
@@ -20679,7 +20768,7 @@ def S2_storerbnew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memb($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -20688,7 +20777,7 @@ def S2_storerbnewgp : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Nt8),
"memb(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_ad1831, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_ad1831, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -20710,7 +20799,7 @@ def S2_storerd_io : HInst<
(outs),
(ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rs32+#$Ii) = $Rtt32",
-tc_05b6c987, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -20730,9 +20819,10 @@ def S2_storerd_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++$Mu2:brev) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111110;
+let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let mayStore = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -20741,7 +20831,7 @@ def S2_storerd_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++#$Ii:circ($Mu2)) = $Rtt32",
-tc_9fdb5406, TypeST>, Enc_395cc4 {
+tc_e86aa961, TypeST>, Enc_395cc4 {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001110;
@@ -20755,7 +20845,7 @@ def S2_storerd_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++I:circ($Mu2)) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001110;
let addrMode = PostInc;
@@ -20768,7 +20858,7 @@ def S2_storerd_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rx32++#$Ii) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20785,7 +20875,7 @@ def S2_storerd_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++$Mu2) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101110;
let addrMode = PostInc;
@@ -20797,7 +20887,7 @@ def S2_storerd_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"memd($Rs32) = $Rtt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20805,7 +20895,7 @@ def S2_storerdgp : HInst<
(outs),
(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd(gp+#$Ii) = $Rtt32",
-tc_a788683e, TypeV2LDST>, Enc_5c124a, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_5c124a, AddrModeRel {
let Inst{24-21} = 0b0110;
let Inst{31-27} = 0b01001;
let accessSize = DoubleWordAccess;
@@ -20822,7 +20912,7 @@ def S2_storerf_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) = $Rt32.h",
-tc_05b6c987, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -20842,9 +20932,10 @@ def S2_storerf_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2:brev) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111011;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -20853,7 +20944,7 @@ def S2_storerf_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32.h",
-tc_9fdb5406, TypeST>, Enc_935d9b {
+tc_e86aa961, TypeST>, Enc_935d9b {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001011;
@@ -20867,7 +20958,7 @@ def S2_storerf_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++I:circ($Mu2)) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001011;
let addrMode = PostInc;
@@ -20880,7 +20971,7 @@ def S2_storerf_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rx32++#$Ii) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20897,7 +20988,7 @@ def S2_storerf_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101011;
let addrMode = PostInc;
@@ -20909,7 +21000,7 @@ def S2_storerf_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) = $Rt32.h",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20917,7 +21008,7 @@ def S2_storerfgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(gp+#$Ii) = $Rt32.h",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b01001;
let accessSize = HalfWordAccess;
@@ -20934,7 +21025,7 @@ def S2_storerh_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -20955,9 +21046,10 @@ def S2_storerh_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111010;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
let BaseOpcode = "S2_storerh_pbr";
@@ -20968,7 +21060,7 @@ def S2_storerh_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_935d9b, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_935d9b, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001010;
@@ -20984,7 +21076,7 @@ def S2_storerh_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001010;
let addrMode = PostInc;
@@ -20999,7 +21091,7 @@ def S2_storerh_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -21017,7 +21109,7 @@ def S2_storerh_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101010;
let addrMode = PostInc;
@@ -21030,7 +21122,7 @@ def S2_storerh_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -21038,7 +21130,7 @@ def S2_storerhgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b01001;
let accessSize = HalfWordAccess;
@@ -21056,7 +21148,7 @@ def S2_storerhnew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Nt8),
"memh($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_0d8870, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_0d8870, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -21081,10 +21173,11 @@ def S2_storerhnew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let isNVStore = 1;
let isNewValue = 1;
@@ -21098,7 +21191,7 @@ def S2_storerhnew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_91b9fe, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_91b9fe, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b01;
@@ -21118,7 +21211,7 @@ def S2_storerhnew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101001101;
@@ -21137,7 +21230,7 @@ def S2_storerhnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"memh($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_e26546, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_e26546, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b001;
@@ -21158,7 +21251,7 @@ def S2_storerhnew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101101101;
@@ -21175,7 +21268,7 @@ def S2_storerhnew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memh($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -21184,7 +21277,7 @@ def S2_storerhnewgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Nt8),
"memh(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -21206,7 +21299,7 @@ def S2_storeri_io : HInst<
(outs),
(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -21227,9 +21320,10 @@ def S2_storeri_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111100;
+let addrMode = PostInc;
let accessSize = WordAccess;
let mayStore = 1;
let BaseOpcode = "S2_storeri_pbr";
@@ -21240,7 +21334,7 @@ def S2_storeri_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_79b8c8, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_79b8c8, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001100;
@@ -21256,7 +21350,7 @@ def S2_storeri_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001100;
let addrMode = PostInc;
@@ -21271,7 +21365,7 @@ def S2_storeri_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -21289,7 +21383,7 @@ def S2_storeri_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101100;
let addrMode = PostInc;
@@ -21302,7 +21396,7 @@ def S2_storeri_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -21310,7 +21404,7 @@ def S2_storerigp : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Rt32),
"memw(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_541f26, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_541f26, AddrModeRel {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b01001;
let accessSize = WordAccess;
@@ -21328,7 +21422,7 @@ def S2_storerinew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Nt8),
"memw($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_690862, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_690862, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -21353,10 +21447,11 @@ def S2_storerinew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
let accessSize = WordAccess;
let isNVStore = 1;
let isNewValue = 1;
@@ -21370,7 +21465,7 @@ def S2_storerinew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_3f97c8, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_3f97c8, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b10;
@@ -21390,7 +21485,7 @@ def S2_storerinew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101001101;
@@ -21409,7 +21504,7 @@ def S2_storerinew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"memw($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_223005, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_223005, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b010;
@@ -21429,7 +21524,7 @@ def S2_storerinew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101101101;
@@ -21446,7 +21541,7 @@ def S2_storerinew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memw($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -21455,7 +21550,7 @@ def S2_storerinewgp : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Nt8),
"memw(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -21477,7 +21572,7 @@ def S2_storew_locked : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw_locked($Rs32,$Pd4) = $Rt32",
-tc_1372bca1, TypeST>, Enc_c2b48e {
+tc_5abb5e3f, TypeST>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100000101;
@@ -21490,7 +21585,7 @@ def S2_svsathb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsathb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -21501,7 +21596,7 @@ def S2_svsathub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsathub($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -21512,7 +21607,7 @@ def S2_tableidxb : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxb($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011100;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21523,7 +21618,7 @@ def S2_tableidxb_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxb($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21534,7 +21629,7 @@ def S2_tableidxd : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxd($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21545,7 +21640,7 @@ def S2_tableidxd_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxd($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21555,7 +21650,7 @@ def S2_tableidxh : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxh($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21566,7 +21661,7 @@ def S2_tableidxh_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxh($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21576,7 +21671,7 @@ def S2_tableidxw : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxw($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21587,7 +21682,7 @@ def S2_tableidxw_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxw($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21597,7 +21692,7 @@ def S2_togglebit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = togglebit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -21608,7 +21703,7 @@ def S2_togglebit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = togglebit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -21619,7 +21714,7 @@ def S2_tstbit_i : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = tstbit($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64 {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101000;
@@ -21628,7 +21723,7 @@ def S2_tstbit_r : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = tstbit($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111000;
@@ -21637,7 +21732,7 @@ def S2_valignib : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, u3_0Imm:$Ii),
"$Rdd32 = valignb($Rtt32,$Rss32,#$Ii)",
-tc_f8eeed7a, TypeS_3op>, Enc_729ff7 {
+tc_b4b5c03a, TypeS_3op>, Enc_729ff7 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000000000;
}
@@ -21645,7 +21740,7 @@ def S2_valignrb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, PredRegs:$Pu4),
"$Rdd32 = valignb($Rtt32,$Rss32,$Pu4)",
-tc_f8eeed7a, TypeS_3op>, Enc_8c6530 {
+tc_b4b5c03a, TypeS_3op>, Enc_8c6530 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010000;
@@ -21654,7 +21749,7 @@ def S2_vcnegh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vcnegh($Rss32,$Rt32)",
-tc_b44c6e2a, TypeS_3op>, Enc_927852 {
+tc_779080bf, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011110;
@@ -21665,7 +21760,7 @@ def S2_vcrotate : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vcrotate($Rss32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_927852 {
+tc_002cb246, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011110;
@@ -21676,7 +21771,7 @@ def S2_vrcnegh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += vrcnegh($Rss32,$Rt32)",
-tc_e913dc32, TypeS_3op>, Enc_1aa186 {
+tc_d773585a, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -21687,7 +21782,7 @@ def S2_vrndpackwh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vrndwh($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21698,7 +21793,7 @@ def S2_vrndpackwhs : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vrndwh($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_90cd8b {
+tc_cf8126ae, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21710,7 +21805,7 @@ def S2_vsathb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsathb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21721,7 +21816,7 @@ def S2_vsathb_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsathb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21730,7 +21825,7 @@ def S2_vsathub : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsathub($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21741,7 +21836,7 @@ def S2_vsathub_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsathub($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21750,7 +21845,7 @@ def S2_vsatwh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsatwh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21761,7 +21856,7 @@ def S2_vsatwh_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsatwh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21770,7 +21865,7 @@ def S2_vsatwuh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsatwuh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21781,7 +21876,7 @@ def S2_vsatwuh_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsatwuh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21790,7 +21885,7 @@ def S2_vsplatrb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsplatb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -21802,7 +21897,7 @@ def S2_vsplatrh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsplath($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100010;
let isReMaterializable = 1;
@@ -21812,7 +21907,7 @@ def S2_vspliceib : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, u3_0Imm:$Ii),
"$Rdd32 = vspliceb($Rss32,$Rtt32,#$Ii)",
-tc_f8eeed7a, TypeS_3op>, Enc_d50cd3 {
+tc_b4b5c03a, TypeS_3op>, Enc_d50cd3 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000000100;
}
@@ -21820,7 +21915,7 @@ def S2_vsplicerb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Pu4),
"$Rdd32 = vspliceb($Rss32,$Rtt32,$Pu4)",
-tc_f8eeed7a, TypeS_3op>, Enc_dbd70c {
+tc_b4b5c03a, TypeS_3op>, Enc_dbd70c {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010100;
@@ -21829,7 +21924,7 @@ def S2_vsxtbh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsxtbh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21839,7 +21934,7 @@ def S2_vsxthw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsxthw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21849,7 +21944,7 @@ def S2_vtrunehb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vtrunehb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21859,7 +21954,7 @@ def S2_vtrunewh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunewh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -21868,7 +21963,7 @@ def S2_vtrunohb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vtrunohb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21878,7 +21973,7 @@ def S2_vtrunowh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunowh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -21887,7 +21982,7 @@ def S2_vzxtbh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vzxtbh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21897,7 +21992,7 @@ def S2_vzxthw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vzxthw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21907,7 +22002,7 @@ def S4_addaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Ru32, s32_0Imm:$Ii),
"$Rd32 = add($Rs32,add($Ru32,#$Ii))",
-tc_c74f796f, TypeALU64>, Enc_8b8d61 {
+tc_f675fee8, TypeALU64>, Enc_8b8d61 {
let Inst{31-23} = 0b110110110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21922,7 +22017,7 @@ def S4_addi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = add(#$Ii,asl($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b100;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -21940,7 +22035,7 @@ def S4_addi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = add(#$Ii,lsr($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b100;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -21958,7 +22053,7 @@ def S4_andi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = and(#$Ii,asl($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b000;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -21976,7 +22071,7 @@ def S4_andi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = and(#$Ii,lsr($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b000;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -21994,7 +22089,7 @@ def S4_clbaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s6_0Imm:$Ii),
"$Rd32 = add(clb($Rs32),#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_9fae8a {
+tc_002cb246, TypeS_2op>, Enc_9fae8a {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10001100001;
let hasNewValue = 1;
@@ -22005,7 +22100,7 @@ def S4_clbpaddi : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, s6_0Imm:$Ii),
"$Rd32 = add(clb($Rss32),#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a1640c {
+tc_002cb246, TypeS_2op>, Enc_a1640c {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -22016,7 +22111,7 @@ def S4_clbpnorm : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = normamt($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -22027,7 +22122,7 @@ def S4_extract : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rd32 = extract($Rs32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b388cf {
+tc_f675fee8, TypeS_2op>, Enc_b388cf {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011011;
let hasNewValue = 1;
@@ -22038,7 +22133,7 @@ def S4_extract_rp : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rd32 = extract($Rs32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_e07374 {
+tc_002cb246, TypeS_3op>, Enc_e07374 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001001000;
@@ -22050,7 +22145,7 @@ def S4_extractp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rdd32 = extract($Rss32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b84c4c {
+tc_f675fee8, TypeS_2op>, Enc_b84c4c {
let Inst{31-24} = 0b10001010;
let prefersSlot3 = 1;
}
@@ -22058,7 +22153,7 @@ def S4_extractp_rp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = extract($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -22068,7 +22163,7 @@ def S4_lsli : HInst<
(outs IntRegs:$Rd32),
(ins s6_0Imm:$Ii, IntRegs:$Rt32),
"$Rd32 = lsl(#$Ii,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_fef969 {
+tc_946df596, TypeS_3op>, Enc_fef969 {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -22079,7 +22174,7 @@ def S4_ntstbit_i : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = !tstbit($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64 {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101001;
@@ -22088,7 +22183,7 @@ def S4_ntstbit_r : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !tstbit($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111001;
@@ -22097,7 +22192,7 @@ def S4_or_andi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 |= and($Rs32,#$Ii)",
-tc_84df2cd3, TypeALU64>, Enc_b0e9d8 {
+tc_f429765c, TypeALU64>, Enc_b0e9d8 {
let Inst{31-22} = 0b1101101000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22114,7 +22209,7 @@ def S4_or_andix : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Ru32, IntRegs:$Rx32in, s32_0Imm:$Ii),
"$Rx32 = or($Ru32,and($Rx32in,#$Ii))",
-tc_84df2cd3, TypeALU64>, Enc_b4e6cf {
+tc_f429765c, TypeALU64>, Enc_b4e6cf {
let Inst{31-22} = 0b1101101001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22130,7 +22225,7 @@ def S4_or_ori : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 |= or($Rs32,#$Ii)",
-tc_84df2cd3, TypeALU64>, Enc_b0e9d8 {
+tc_f429765c, TypeALU64>, Enc_b0e9d8 {
let Inst{31-22} = 0b1101101010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22147,7 +22242,7 @@ def S4_ori_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = or(#$Ii,asl($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b010;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -22165,7 +22260,7 @@ def S4_ori_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = or(#$Ii,lsr($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b010;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -22183,7 +22278,7 @@ def S4_parity : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = parity($Rs32,$Rt32)",
-tc_2b6f77c6, TypeALU64>, Enc_5ab2be {
+tc_002cb246, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101111;
@@ -22195,7 +22290,7 @@ def S4_pstorerbf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22220,7 +22315,7 @@ def S4_pstorerbf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22236,7 +22331,7 @@ def S4_pstorerbfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22262,7 +22357,7 @@ def S4_pstorerbfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110000;
let isPredicated = 1;
@@ -22285,7 +22380,7 @@ def S4_pstorerbfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22302,7 +22397,7 @@ def S4_pstorerbfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22310,7 +22405,7 @@ def S4_pstorerbnewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b000;
@@ -22338,7 +22433,7 @@ def S4_pstorerbnewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -22358,7 +22453,7 @@ def S4_pstorerbnewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -22387,7 +22482,7 @@ def S4_pstorerbnewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000110101;
@@ -22414,7 +22509,7 @@ def S4_pstorerbnewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -22435,7 +22530,7 @@ def S4_pstorerbnewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -22444,7 +22539,7 @@ def S4_pstorerbnewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b000;
@@ -22471,7 +22566,7 @@ def S4_pstorerbnewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -22490,7 +22585,7 @@ def S4_pstorerbnewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -22518,7 +22613,7 @@ def S4_pstorerbnewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000010101;
@@ -22544,7 +22639,7 @@ def S4_pstorerbnewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -22564,7 +22659,7 @@ def S4_pstorerbnewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -22573,7 +22668,7 @@ def S4_pstorerbt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22597,7 +22692,7 @@ def S4_pstorerbt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100000;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -22612,7 +22707,7 @@ def S4_pstorerbtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22637,7 +22732,7 @@ def S4_pstorerbtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010000;
let isPredicated = 1;
@@ -22659,7 +22754,7 @@ def S4_pstorerbtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110000;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -22675,7 +22770,7 @@ def S4_pstorerbtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22683,7 +22778,7 @@ def S4_pstorerdf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd(#$Ii) = $Rtt32",
-tc_238d91d2, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_362c6592, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22707,7 +22802,7 @@ def S4_pstorerdf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_5274e61a, TypeST>, Enc_1a9974, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110101110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22722,7 +22817,7 @@ def S4_pstorerdfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd(#$Ii) = $Rtt32",
-tc_66888ded, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22747,7 +22842,7 @@ def S4_pstorerdfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
-tc_f86c328a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110110;
let isPredicated = 1;
@@ -22769,7 +22864,7 @@ def S4_pstorerdfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_3e07fb90, TypeST>, Enc_1a9974, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110111110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22785,7 +22880,7 @@ def S4_pstorerdfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32) = $Rtt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22793,7 +22888,7 @@ def S4_pstorerdt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd(#$Ii) = $Rtt32",
-tc_238d91d2, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_362c6592, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22816,7 +22911,7 @@ def S4_pstorerdt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_5274e61a, TypeST>, Enc_1a9974, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110100110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -22830,7 +22925,7 @@ def S4_pstorerdtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd(#$Ii) = $Rtt32",
-tc_66888ded, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22854,7 +22949,7 @@ def S4_pstorerdtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
-tc_f86c328a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010110;
let isPredicated = 1;
@@ -22875,7 +22970,7 @@ def S4_pstorerdtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_3e07fb90, TypeST>, Enc_1a9974, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110110110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -22890,7 +22985,7 @@ def S4_pstorerdtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32) = $Rtt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22898,7 +22993,7 @@ def S4_pstorerff_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh(#$Ii) = $Rt32.h",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22922,7 +23017,7 @@ def S4_pstorerff_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22937,7 +23032,7 @@ def S4_pstorerffnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh(#$Ii) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22962,7 +23057,7 @@ def S4_pstorerffnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110011;
let isPredicated = 1;
@@ -22984,7 +23079,7 @@ def S4_pstorerffnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23000,7 +23095,7 @@ def S4_pstorerffnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32) = $Rt32.h",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23008,7 +23103,7 @@ def S4_pstorerft_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh(#$Ii) = $Rt32.h",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23031,7 +23126,7 @@ def S4_pstorerft_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100011;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -23045,7 +23140,7 @@ def S4_pstorerftnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh(#$Ii) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23069,7 +23164,7 @@ def S4_pstorerftnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010011;
let isPredicated = 1;
@@ -23090,7 +23185,7 @@ def S4_pstorerftnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110011;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -23105,7 +23200,7 @@ def S4_pstorerftnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32) = $Rt32.h",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23113,7 +23208,7 @@ def S4_pstorerhf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23138,7 +23233,7 @@ def S4_pstorerhf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23154,7 +23249,7 @@ def S4_pstorerhfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23180,7 +23275,7 @@ def S4_pstorerhfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110010;
let isPredicated = 1;
@@ -23203,7 +23298,7 @@ def S4_pstorerhfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23220,7 +23315,7 @@ def S4_pstorerhfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23228,7 +23323,7 @@ def S4_pstorerhnewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b001;
@@ -23256,7 +23351,7 @@ def S4_pstorerhnewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -23276,7 +23371,7 @@ def S4_pstorerhnewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -23305,7 +23400,7 @@ def S4_pstorerhnewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000110101;
@@ -23332,7 +23427,7 @@ def S4_pstorerhnewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -23353,7 +23448,7 @@ def S4_pstorerhnewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23362,7 +23457,7 @@ def S4_pstorerhnewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b001;
@@ -23389,7 +23484,7 @@ def S4_pstorerhnewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -23408,7 +23503,7 @@ def S4_pstorerhnewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -23436,7 +23531,7 @@ def S4_pstorerhnewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000010101;
@@ -23462,7 +23557,7 @@ def S4_pstorerhnewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -23482,7 +23577,7 @@ def S4_pstorerhnewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23491,7 +23586,7 @@ def S4_pstorerht_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23515,7 +23610,7 @@ def S4_pstorerht_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100010;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -23530,7 +23625,7 @@ def S4_pstorerhtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23555,7 +23650,7 @@ def S4_pstorerhtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010010;
let isPredicated = 1;
@@ -23577,7 +23672,7 @@ def S4_pstorerhtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110010;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -23593,7 +23688,7 @@ def S4_pstorerhtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23601,7 +23696,7 @@ def S4_pstorerif_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23626,7 +23721,7 @@ def S4_pstorerif_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23642,7 +23737,7 @@ def S4_pstorerifnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23668,7 +23763,7 @@ def S4_pstorerifnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110100;
let isPredicated = 1;
@@ -23691,7 +23786,7 @@ def S4_pstorerifnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23708,7 +23803,7 @@ def S4_pstorerifnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23716,7 +23811,7 @@ def S4_pstorerinewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b010;
@@ -23744,7 +23839,7 @@ def S4_pstorerinewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -23764,7 +23859,7 @@ def S4_pstorerinewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -23793,7 +23888,7 @@ def S4_pstorerinewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000110101;
@@ -23820,7 +23915,7 @@ def S4_pstorerinewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -23841,7 +23936,7 @@ def S4_pstorerinewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23850,7 +23945,7 @@ def S4_pstorerinewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b010;
@@ -23877,7 +23972,7 @@ def S4_pstorerinewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -23896,7 +23991,7 @@ def S4_pstorerinewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -23924,7 +24019,7 @@ def S4_pstorerinewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000010101;
@@ -23950,7 +24045,7 @@ def S4_pstorerinewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -23970,7 +24065,7 @@ def S4_pstorerinewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23979,7 +24074,7 @@ def S4_pstorerit_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -24003,7 +24098,7 @@ def S4_pstorerit_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100100;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -24018,7 +24113,7 @@ def S4_pstoreritnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -24043,7 +24138,7 @@ def S4_pstoreritnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010100;
let isPredicated = 1;
@@ -24065,7 +24160,7 @@ def S4_pstoreritnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110100;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -24081,7 +24176,7 @@ def S4_pstoreritnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24089,7 +24184,7 @@ def S4_stored_locked : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"memd_locked($Rs32,$Pd4) = $Rtt32",
-tc_1372bca1, TypeST>, Enc_d7dc10 {
+tc_5abb5e3f, TypeST>, Enc_d7dc10 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100000111;
@@ -24102,7 +24197,7 @@ def S4_storeirb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"memb($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_8203bb, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_8203bb, PredNewRel {
let Inst{31-21} = 0b00111100000;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -24121,7 +24216,7 @@ def S4_storeirb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memb($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24129,7 +24224,7 @@ def S4_storeirbf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memb($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_0b2be201, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111000100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24149,7 +24244,7 @@ def S4_storeirbf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memb($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24157,7 +24252,7 @@ def S4_storeirbfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memb($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111001100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24178,7 +24273,7 @@ def S4_storeirbfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memb($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24186,7 +24281,7 @@ def S4_storeirbt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memb($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_0b2be201, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111000000;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24205,7 +24300,7 @@ def S4_storeirbt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memb($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24213,7 +24308,7 @@ def S4_storeirbtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memb($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111001000;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24233,7 +24328,7 @@ def S4_storeirbtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memb($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24241,7 +24336,7 @@ def S4_storeirh_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"memh($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_a803e0, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_a803e0, PredNewRel {
let Inst{31-21} = 0b00111100001;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
@@ -24260,7 +24355,7 @@ def S4_storeirh_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memh($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24268,7 +24363,7 @@ def S4_storeirhf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memh($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_f20719, PredNewRel {
+tc_0b2be201, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24288,7 +24383,7 @@ def S4_storeirhf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memh($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24296,7 +24391,7 @@ def S4_storeirhfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memh($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_f20719, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111001101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24317,7 +24412,7 @@ def S4_storeirhfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memh($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24325,7 +24420,7 @@ def S4_storeirht_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memh($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_f20719, PredNewRel {
+tc_0b2be201, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111000001;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24344,7 +24439,7 @@ def S4_storeirht_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memh($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24352,7 +24447,7 @@ def S4_storeirhtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memh($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_f20719, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111001001;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24372,7 +24467,7 @@ def S4_storeirhtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memh($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24380,7 +24475,7 @@ def S4_storeiri_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"memw($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_f37377, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_f37377, PredNewRel {
let Inst{31-21} = 0b00111100010;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -24399,7 +24494,7 @@ def S4_storeiri_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memw($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24407,7 +24502,7 @@ def S4_storeirif_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memw($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_0b2be201, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111000110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24427,7 +24522,7 @@ def S4_storeirif_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memw($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24435,7 +24530,7 @@ def S4_storeirifnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memw($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111001110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24456,7 +24551,7 @@ def S4_storeirifnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memw($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24464,7 +24559,7 @@ def S4_storeirit_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memw($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_0b2be201, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111000010;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24483,7 +24578,7 @@ def S4_storeirit_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memw($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24491,7 +24586,7 @@ def S4_storeiritnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memw($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111001010;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24511,7 +24606,7 @@ def S4_storeiritnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memw($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24519,7 +24614,7 @@ def S4_storerb_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memb($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011000;
@@ -24540,7 +24635,7 @@ def S4_storerb_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011000;
let addrMode = BaseRegOffset;
@@ -24556,7 +24651,7 @@ def S4_storerb_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memb($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101000;
let addrMode = BaseLongOffset;
@@ -24578,7 +24673,7 @@ def S4_storerbnew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memb($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10101011101;
@@ -24602,7 +24697,7 @@ def S4_storerbnew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0000;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -24621,7 +24716,7 @@ def S4_storerbnew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memb($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101101101;
@@ -24646,7 +24741,7 @@ def S4_storerd_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, DoubleRegs:$Rtt32),
"memd($Re32=#$II) = $Rtt32",
-tc_66888ded, TypeST>, Enc_c7a204 {
+tc_da4a37ed, TypeST>, Enc_c7a204 {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011110;
@@ -24666,7 +24761,7 @@ def S4_storerd_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_d9709180, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011110;
let addrMode = BaseRegOffset;
@@ -24681,7 +24776,7 @@ def S4_storerd_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, DoubleRegs:$Rtt32),
"memd($Ru32<<#$Ii+#$II) = $Rtt32",
-tc_0dc560de, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101110;
let addrMode = BaseLongOffset;
@@ -24702,7 +24797,7 @@ def S4_storerf_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memh($Re32=#$II) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_8bcba4 {
+tc_da4a37ed, TypeST>, Enc_8bcba4 {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011011;
@@ -24722,7 +24817,7 @@ def S4_storerf_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011011;
let addrMode = BaseRegOffset;
@@ -24737,7 +24832,7 @@ def S4_storerf_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memh($Ru32<<#$Ii+#$II) = $Rt32.h",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101011;
let addrMode = BaseLongOffset;
@@ -24758,7 +24853,7 @@ def S4_storerh_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memh($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011010;
@@ -24779,7 +24874,7 @@ def S4_storerh_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011010;
let addrMode = BaseRegOffset;
@@ -24795,7 +24890,7 @@ def S4_storerh_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memh($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101010;
let addrMode = BaseLongOffset;
@@ -24817,7 +24912,7 @@ def S4_storerhnew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memh($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b001;
let Inst{31-21} = 0b10101011101;
@@ -24841,7 +24936,7 @@ def S4_storerhnew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0001;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -24860,7 +24955,7 @@ def S4_storerhnew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memh($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101101101;
@@ -24885,7 +24980,7 @@ def S4_storeri_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memw($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011100;
@@ -24906,7 +25001,7 @@ def S4_storeri_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011100;
let addrMode = BaseRegOffset;
@@ -24922,7 +25017,7 @@ def S4_storeri_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memw($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101100;
let addrMode = BaseLongOffset;
@@ -24944,7 +25039,7 @@ def S4_storerinew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memw($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b010;
let Inst{31-21} = 0b10101011101;
@@ -24968,7 +25063,7 @@ def S4_storerinew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0010;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -24987,7 +25082,7 @@ def S4_storerinew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memw($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101101101;
@@ -25012,7 +25107,7 @@ def S4_subaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Ru32),
"$Rd32 = add($Rs32,sub(#$Ii,$Ru32))",
-tc_c74f796f, TypeALU64>, Enc_8b8d61 {
+tc_f675fee8, TypeALU64>, Enc_8b8d61 {
let Inst{31-23} = 0b110110111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25027,7 +25122,7 @@ def S4_subi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = sub(#$Ii,asl($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b110;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -25045,7 +25140,7 @@ def S4_subi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = sub(#$Ii,lsr($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b110;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -25063,7 +25158,7 @@ def S4_vrcrotate : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rdd32 = vrcrotate($Rss32,$Rt32,#$Ii)",
-tc_b9c0b731, TypeS_3op>, Enc_645d54 {
+tc_13bfbcf9, TypeS_3op>, Enc_645d54 {
let Inst{7-6} = 0b11;
let Inst{31-21} = 0b11000011110;
let prefersSlot3 = 1;
@@ -25072,7 +25167,7 @@ def S4_vrcrotate_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rxx32 += vrcrotate($Rss32,$Rt32,#$Ii)",
-tc_60571023, TypeS_3op>, Enc_b72622 {
+tc_9debc299, TypeS_3op>, Enc_b72622 {
let Inst{7-6} = 0b00;
let Inst{31-21} = 0b11001011101;
let prefersSlot3 = 1;
@@ -25082,7 +25177,7 @@ def S4_vxaddsubh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25093,7 +25188,7 @@ def S4_vxaddsubhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubh($Rss32,$Rtt32):rnd:>>1:sat",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -25104,7 +25199,7 @@ def S4_vxaddsubw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25115,7 +25210,7 @@ def S4_vxsubaddh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25126,7 +25221,7 @@ def S4_vxsubaddhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddh($Rss32,$Rtt32):rnd:>>1:sat",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -25137,7 +25232,7 @@ def S4_vxsubaddw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25148,7 +25243,7 @@ def S5_asrhub_rnd_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_11a146 {
let Inst{7-5} = 0b100;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@@ -25161,7 +25256,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -25170,7 +25265,7 @@ def S5_asrhub_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):sat",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_11a146 {
let Inst{7-5} = 0b101;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@@ -25183,7 +25278,7 @@ def S5_popcountp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = popcount($Rss32)",
-tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_703e822c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -25194,7 +25289,7 @@ def S5_vasrhrnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000001;
@@ -25204,14 +25299,14 @@ def S5_vasrhrnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
let isPseudo = 1;
}
def S6_allocframe_to_raw : HInst<
(outs),
(ins u11_3Imm:$Ii),
"allocframe(#$Ii)",
-tc_e216a5db, TypeMAPPING>, Requires<[HasV65]> {
+tc_b44ecf75, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -25219,7 +25314,7 @@ def S6_rol_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = rol($Rss32,#$Ii)",
-tc_55050d58, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
+tc_1fc97744, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000000000;
}
@@ -25227,7 +25322,7 @@ def S6_rol_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -25237,7 +25332,7 @@ def S6_rol_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -25247,7 +25342,7 @@ def S6_rol_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -25257,7 +25352,7 @@ def S6_rol_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -25267,7 +25362,7 @@ def S6_rol_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -25277,7 +25372,7 @@ def S6_rol_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = rol($Rs32,#$Ii)",
-tc_55050d58, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
+tc_1fc97744, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -25288,7 +25383,7 @@ def S6_rol_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -25301,7 +25396,7 @@ def S6_rol_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -25314,7 +25409,7 @@ def S6_rol_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -25327,7 +25422,7 @@ def S6_rol_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -25340,7 +25435,7 @@ def S6_rol_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -25353,7 +25448,7 @@ def S6_vsplatrbp : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsplatb($Rs32)",
-tc_be706f30, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
+tc_a1c00888, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100010;
}
@@ -25361,7 +25456,7 @@ def S6_vtrunehb_ppp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunehb($Rss32,$Rtt32)",
-tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
+tc_1fc97744, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -25370,7 +25465,7 @@ def S6_vtrunohb_ppp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunohb($Rss32,$Rtt32)",
-tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
+tc_1fc97744, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -25379,7 +25474,7 @@ def SA1_addi : HInst<
(outs GeneralSubRegs:$Rx16),
(ins IntRegs:$Rx16in, s32_0Imm:$Ii),
"$Rx16 = add($Rx16in,#$Ii)",
-tc_609d2efe, TypeSUBINSN>, Enc_93af4c {
+tc_0a705168, TypeSUBINSN>, Enc_93af4c {
let Inst{12-11} = 0b00;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25396,7 +25491,7 @@ def SA1_addrx : HInst<
(outs GeneralSubRegs:$Rx16),
(ins IntRegs:$Rx16in, GeneralSubRegs:$Rs16),
"$Rx16 = add($Rx16in,$Rs16)",
-tc_609d2efe, TypeSUBINSN>, Enc_0527db {
+tc_0a705168, TypeSUBINSN>, Enc_0527db {
let Inst{12-8} = 0b11000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25408,7 +25503,7 @@ def SA1_addsp : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u6_2Imm:$Ii),
"$Rd16 = add(r29,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_2df31d {
+tc_9fc3dae0, TypeSUBINSN>, Enc_2df31d {
let Inst{12-10} = 0b011;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25420,7 +25515,7 @@ def SA1_and1 : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = and($Rs16,#1)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25431,7 +25526,7 @@ def SA1_clrf : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (!p0) $Rd16 = #0",
-tc_1b82a277, TypeSUBINSN>, Enc_1f5ba6 {
+tc_a1123dda, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25445,7 +25540,7 @@ def SA1_clrfnew : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (!p0.new) $Rd16 = #0",
-tc_e9c822f7, TypeSUBINSN>, Enc_1f5ba6 {
+tc_8b3e402a, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25460,7 +25555,7 @@ def SA1_clrt : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (p0) $Rd16 = #0",
-tc_1b82a277, TypeSUBINSN>, Enc_1f5ba6 {
+tc_a1123dda, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100110;
let isPredicated = 1;
let hasNewValue = 1;
@@ -25473,7 +25568,7 @@ def SA1_clrtnew : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (p0.new) $Rd16 = #0",
-tc_e9c822f7, TypeSUBINSN>, Enc_1f5ba6 {
+tc_8b3e402a, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -25487,7 +25582,7 @@ def SA1_cmpeqi : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u2_0Imm:$Ii),
"p0 = cmp.eq($Rs16,#$Ii)",
-tc_90f3e30c, TypeSUBINSN>, Enc_63eaeb {
+tc_5b7c0967, TypeSUBINSN>, Enc_63eaeb {
let Inst{3-2} = 0b00;
let Inst{12-8} = 0b11001;
let AsmVariantName = "NonParsable";
@@ -25498,7 +25593,7 @@ def SA1_combine0i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#0,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b00;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25510,7 +25605,7 @@ def SA1_combine1i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#1,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b01;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25522,7 +25617,7 @@ def SA1_combine2i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#2,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b10;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25534,7 +25629,7 @@ def SA1_combine3i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#3,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b11;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25546,7 +25641,7 @@ def SA1_combinerz : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins GeneralSubRegs:$Rs16),
"$Rdd8 = combine($Rs16,#0)",
-tc_a904d137, TypeSUBINSN>, Enc_399e12 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_399e12 {
let Inst{3-3} = 0b1;
let Inst{12-8} = 0b11101;
let hasNewValue = 1;
@@ -25558,7 +25653,7 @@ def SA1_combinezr : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins GeneralSubRegs:$Rs16),
"$Rdd8 = combine(#0,$Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_399e12 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_399e12 {
let Inst{3-3} = 0b0;
let Inst{12-8} = 0b11101;
let hasNewValue = 1;
@@ -25570,7 +25665,7 @@ def SA1_dec : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, n1Const:$n1),
"$Rd16 = add($Rs16,#$n1)",
-tc_609d2efe, TypeSUBINSN>, Enc_ee5ed0 {
+tc_0a705168, TypeSUBINSN>, Enc_ee5ed0 {
let Inst{12-8} = 0b10011;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25581,7 +25676,7 @@ def SA1_inc : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = add($Rs16,#1)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25592,7 +25687,7 @@ def SA1_seti : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u32_0Imm:$Ii),
"$Rd16 = #$Ii",
-tc_a904d137, TypeSUBINSN>, Enc_e39bb2 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_e39bb2 {
let Inst{12-10} = 0b010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25608,7 +25703,7 @@ def SA1_setin1 : HInst<
(outs GeneralSubRegs:$Rd16),
(ins n1Const:$n1),
"$Rd16 = #$n1",
-tc_a904d137, TypeSUBINSN>, Enc_7a0ea6 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_7a0ea6 {
let Inst{12-4} = 0b110100000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25619,7 +25714,7 @@ def SA1_sxtb : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = sxtb($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25630,7 +25725,7 @@ def SA1_sxth : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = sxth($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10100;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25641,7 +25736,7 @@ def SA1_tfr : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = $Rs16",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25652,7 +25747,7 @@ def SA1_zxtb : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = and($Rs16,#255)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25663,7 +25758,7 @@ def SA1_zxth : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = zxth($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25674,7 +25769,7 @@ def SL1_loadri_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"$Rd16 = memw($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_53dca9 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_53dca9 {
let Inst{12-12} = 0b0;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25688,7 +25783,7 @@ def SL1_loadrub_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"$Rd16 = memub($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_c175d0 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_c175d0 {
let Inst{12-12} = 0b1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25702,7 +25797,7 @@ def SL2_deallocframe : HInst<
(outs),
(ins),
"deallocframe",
-tc_36c68ad1, TypeSUBINSN>, Enc_e3b0c4 {
+tc_39dfefe8, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111100000000;
let accessSize = DoubleWordAccess;
let AsmVariantName = "NonParsable";
@@ -25715,7 +25810,7 @@ def SL2_jumpr31 : HInst<
(outs),
(ins),
"jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000000;
let isTerminator = 1;
let isIndirectBranch = 1;
@@ -25730,7 +25825,7 @@ def SL2_jumpr31_f : HInst<
(outs),
(ins),
"if (!p0) jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25748,7 +25843,7 @@ def SL2_jumpr31_fnew : HInst<
(outs),
(ins),
"if (!p0.new) jumpr:nt r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25767,7 +25862,7 @@ def SL2_jumpr31_t : HInst<
(outs),
(ins),
"if (p0) jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000100;
let isPredicated = 1;
let isTerminator = 1;
@@ -25784,7 +25879,7 @@ def SL2_jumpr31_tnew : HInst<
(outs),
(ins),
"if (p0.new) jumpr:nt r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000110;
let isPredicated = 1;
let isTerminator = 1;
@@ -25802,7 +25897,7 @@ def SL2_loadrb_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_0Imm:$Ii),
"$Rd16 = memb($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2fbf3c {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2fbf3c {
let Inst{12-11} = 0b10;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25816,7 +25911,7 @@ def SL2_loadrd_sp : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u5_3Imm:$Ii),
"$Rdd8 = memd(r29+#$Ii)",
-tc_9c98e8af, TypeSUBINSN>, Enc_86a14b {
+tc_c4db48cb, TypeSUBINSN>, Enc_86a14b {
let Inst{12-8} = 0b11110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25831,7 +25926,7 @@ def SL2_loadrh_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
"$Rd16 = memh($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2bae10 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2bae10 {
let Inst{12-11} = 0b00;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25845,7 +25940,7 @@ def SL2_loadri_sp : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u5_2Imm:$Ii),
"$Rd16 = memw(r29+#$Ii)",
-tc_9c98e8af, TypeSUBINSN>, Enc_51635c {
+tc_c4db48cb, TypeSUBINSN>, Enc_51635c {
let Inst{12-9} = 0b1110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25860,7 +25955,7 @@ def SL2_loadruh_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
"$Rd16 = memuh($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2bae10 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2bae10 {
let Inst{12-11} = 0b01;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25874,7 +25969,7 @@ def SL2_return : HInst<
(outs),
(ins),
"dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000000;
let isTerminator = 1;
let isIndirectBranch = 1;
@@ -25892,7 +25987,7 @@ def SL2_return_f : HInst<
(outs),
(ins),
"if (!p0) dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25913,7 +26008,7 @@ def SL2_return_fnew : HInst<
(outs),
(ins),
"if (!p0.new) dealloc_return:nt",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25935,7 +26030,7 @@ def SL2_return_t : HInst<
(outs),
(ins),
"if (p0) dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000100;
let isPredicated = 1;
let isTerminator = 1;
@@ -25955,7 +26050,7 @@ def SL2_return_tnew : HInst<
(outs),
(ins),
"if (p0.new) dealloc_return:nt",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000110;
let isPredicated = 1;
let isTerminator = 1;
@@ -25976,7 +26071,7 @@ def SS1_storeb_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii, GeneralSubRegs:$Rt16),
"memb($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_b38ffc {
+tc_30b9bb4a, TypeSUBINSN>, Enc_b38ffc {
let Inst{12-12} = 0b1;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -25988,7 +26083,7 @@ def SS1_storew_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii, GeneralSubRegs:$Rt16),
"memw($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_f55a0c {
+tc_30b9bb4a, TypeSUBINSN>, Enc_f55a0c {
let Inst{12-12} = 0b0;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26000,7 +26095,7 @@ def SS2_allocframe : HInst<
(outs),
(ins u5_3Imm:$Ii),
"allocframe(#$Ii)",
-tc_0fc1ae07, TypeSUBINSN>, Enc_6f70ca {
+tc_49a8207d, TypeSUBINSN>, Enc_6f70ca {
let Inst{3-0} = 0b0000;
let Inst{12-9} = 0b1110;
let addrMode = BaseImmOffset;
@@ -26015,7 +26110,7 @@ def SS2_storebi0 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"memb($Rs16+#$Ii) = #0",
-tc_57288781, TypeSUBINSN>, Enc_84d359 {
+tc_89e94ad3, TypeSUBINSN>, Enc_84d359 {
let Inst{12-8} = 0b10010;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -26027,7 +26122,7 @@ def SS2_storebi1 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"memb($Rs16+#$Ii) = #1",
-tc_57288781, TypeSUBINSN>, Enc_84d359 {
+tc_89e94ad3, TypeSUBINSN>, Enc_84d359 {
let Inst{12-8} = 0b10011;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -26039,7 +26134,7 @@ def SS2_stored_sp : HInst<
(outs),
(ins s6_3Imm:$Ii, GeneralDoubleLow8Regs:$Rtt8),
"memd(r29+#$Ii) = $Rtt8",
-tc_a788683e, TypeSUBINSN>, Enc_b8309d {
+tc_0371abea, TypeSUBINSN>, Enc_b8309d {
let Inst{12-9} = 0b0101;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
@@ -26052,7 +26147,7 @@ def SS2_storeh_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii, GeneralSubRegs:$Rt16),
"memh($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_625deb {
+tc_30b9bb4a, TypeSUBINSN>, Enc_625deb {
let Inst{12-11} = 0b00;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
@@ -26064,7 +26159,7 @@ def SS2_storew_sp : HInst<
(outs),
(ins u5_2Imm:$Ii, GeneralSubRegs:$Rt16),
"memw(r29+#$Ii) = $Rt16",
-tc_a788683e, TypeSUBINSN>, Enc_87c142 {
+tc_0371abea, TypeSUBINSN>, Enc_87c142 {
let Inst{12-9} = 0b0100;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26077,7 +26172,7 @@ def SS2_storewi0 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"memw($Rs16+#$Ii) = #0",
-tc_57288781, TypeSUBINSN>, Enc_a6ce9c {
+tc_89e94ad3, TypeSUBINSN>, Enc_a6ce9c {
let Inst{12-8} = 0b10000;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26089,7 +26184,7 @@ def SS2_storewi1 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"memw($Rs16+#$Ii) = #1",
-tc_57288781, TypeSUBINSN>, Enc_a6ce9c {
+tc_89e94ad3, TypeSUBINSN>, Enc_a6ce9c {
let Inst{12-8} = 0b10001;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26227,7 +26322,7 @@ def V6_extractw : HInst<
(outs IntRegs:$Rd32),
(ins HvxVR:$Vu32, IntRegs:$Rs32),
"$Rd32 = vextract($Vu32,$Rs32)",
-tc_9777e6bf, TypeLD>, Enc_50e578, Requires<[UseHVXV60]> {
+tc_540c3da3, TypeLD>, Enc_50e578, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10010010000;
@@ -26448,7 +26543,7 @@ def V6_lvsplatb : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32.b = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
+tc_c4edf264, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
@@ -26459,7 +26554,7 @@ def V6_lvsplath : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32.h = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
+tc_c4edf264, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
@@ -26470,7 +26565,7 @@ def V6_lvsplatw : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32 = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV60]> {
+tc_c4edf264, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV60]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
@@ -26481,7 +26576,7 @@ def V6_pred_and : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = and($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26494,7 +26589,7 @@ def V6_pred_and_n : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = and($Qs4,!$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000101;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26507,7 +26602,7 @@ def V6_pred_not : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4),
"$Qd4 = not($Qs4)",
-tc_71337255, TypeCVI_VA>, Enc_bfbf03, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_bfbf03, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000010;
let Inst{13-10} = 0b0000;
let Inst{31-16} = 0b0001111000000011;
@@ -26519,7 +26614,7 @@ def V6_pred_or : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = or($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000001;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26532,7 +26627,7 @@ def V6_pred_or_n : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = or($Qs4,!$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000100;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26545,7 +26640,7 @@ def V6_pred_scalar2 : HInst<
(outs HvxQR:$Qd4),
(ins IntRegs:$Rt32),
"$Qd4 = vsetq($Rt32)",
-tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV60]> {
+tc_5bf8afbb, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV60]> {
let Inst{13-2} = 0b000000010001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
@@ -26556,7 +26651,7 @@ def V6_pred_scalar2v2 : HInst<
(outs HvxQR:$Qd4),
(ins IntRegs:$Rt32),
"$Qd4 = vsetq2($Rt32)",
-tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV62]> {
+tc_5bf8afbb, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV62]> {
let Inst{13-2} = 0b000000010011;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
@@ -26567,7 +26662,7 @@ def V6_pred_xor : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = xor($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000011;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26580,7 +26675,7 @@ def V6_shuffeqh : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4.b = vshuffe($Qs4.h,$Qt4.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
let Inst{7-2} = 0b000110;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26593,7 +26688,7 @@ def V6_shuffeqw : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4.h = vshuffe($Qs4.w,$Qt4.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
let Inst{7-2} = 0b000111;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26743,7 +26838,7 @@ def V6_vL32Ub_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmemu($Rt32+#$Ii)",
-tc_35e92f8e, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> {
+tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -26760,7 +26855,7 @@ def V6_vL32Ub_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmemu($Rx32++#$Ii)",
-tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> {
+tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -26779,7 +26874,7 @@ def V6_vL32Ub_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32 = vmemu($Rx32++$Mu2)",
-tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[UseHVXV60]> {
+tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[UseHVXV60]> {
let Inst{12-5} = 0b00000111;
let Inst{31-21} = 0b00101011000;
let hasNewValue = 1;
@@ -26796,7 +26891,7 @@ def V6_vL32b_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmem($Rt32+#$Ii)",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -26816,7 +26911,7 @@ def V6_vL32b_cur_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.cur = vmem($Rt32+#$Ii)",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b001;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -26836,7 +26931,7 @@ def V6_vL32b_cur_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -26856,7 +26951,7 @@ def V6_vL32b_cur_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -26878,7 +26973,7 @@ def V6_vL32b_cur_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000101;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -26899,7 +26994,7 @@ def V6_vL32b_cur_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.cur = vmem($Rx32++#$Ii)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b001;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -26920,7 +27015,7 @@ def V6_vL32b_cur_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32.cur = vmem($Rx32++$Mu2)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000001;
let Inst{31-21} = 0b00101011000;
let hasNewValue = 1;
@@ -26940,7 +27035,7 @@ def V6_vL32b_cur_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -26959,7 +27054,7 @@ def V6_vL32b_cur_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -26980,7 +27075,7 @@ def V6_vL32b_cur_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000100;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27000,7 +27095,7 @@ def V6_vL32b_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -27019,7 +27114,7 @@ def V6_vL32b_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -27040,7 +27135,7 @@ def V6_vL32b_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000011;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27060,7 +27155,7 @@ def V6_vL32b_nt_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmem($Rt32+#$Ii):nt",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -27081,7 +27176,7 @@ def V6_vL32b_nt_cur_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b001;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -27102,7 +27197,7 @@ def V6_vL32b_nt_cur_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27123,7 +27218,7 @@ def V6_vL32b_nt_cur_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27146,7 +27241,7 @@ def V6_vL32b_nt_cur_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000101;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27168,7 +27263,7 @@ def V6_vL32b_nt_cur_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b001;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -27190,7 +27285,7 @@ def V6_vL32b_nt_cur_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000001;
let Inst{31-21} = 0b00101011010;
let hasNewValue = 1;
@@ -27211,7 +27306,7 @@ def V6_vL32b_nt_cur_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27231,7 +27326,7 @@ def V6_vL32b_nt_cur_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27253,7 +27348,7 @@ def V6_vL32b_nt_cur_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000100;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27274,7 +27369,7 @@ def V6_vL32b_nt_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27294,7 +27389,7 @@ def V6_vL32b_nt_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27316,7 +27411,7 @@ def V6_vL32b_nt_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000011;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27337,7 +27432,7 @@ def V6_vL32b_nt_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmem($Rx32++#$Ii):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -27359,7 +27454,7 @@ def V6_vL32b_nt_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32 = vmem($Rx32++$Mu2):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011010;
let hasNewValue = 1;
@@ -27380,7 +27475,7 @@ def V6_vL32b_nt_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27399,7 +27494,7 @@ def V6_vL32b_nt_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27420,7 +27515,7 @@ def V6_vL32b_nt_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000010;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27440,7 +27535,7 @@ def V6_vL32b_nt_tmp_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -27460,7 +27555,7 @@ def V6_vL32b_nt_tmp_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27480,7 +27575,7 @@ def V6_vL32b_nt_tmp_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27502,7 +27597,7 @@ def V6_vL32b_nt_tmp_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000111;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27523,7 +27618,7 @@ def V6_vL32b_nt_tmp_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -27544,7 +27639,7 @@ def V6_vL32b_nt_tmp_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000010;
let Inst{31-21} = 0b00101011010;
let hasNewValue = 1;
@@ -27564,7 +27659,7 @@ def V6_vL32b_nt_tmp_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27583,7 +27678,7 @@ def V6_vL32b_nt_tmp_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27604,7 +27699,7 @@ def V6_vL32b_nt_tmp_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000110;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27624,7 +27719,7 @@ def V6_vL32b_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmem($Rx32++#$Ii)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -27645,7 +27740,7 @@ def V6_vL32b_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32 = vmem($Rx32++$Mu2)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011000;
let hasNewValue = 1;
@@ -27665,7 +27760,7 @@ def V6_vL32b_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -27683,7 +27778,7 @@ def V6_vL32b_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -27703,7 +27798,7 @@ def V6_vL32b_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000010;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27722,7 +27817,7 @@ def V6_vL32b_tmp_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -27741,7 +27836,7 @@ def V6_vL32b_tmp_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -27760,7 +27855,7 @@ def V6_vL32b_tmp_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -27781,7 +27876,7 @@ def V6_vL32b_tmp_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000111;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27801,7 +27896,7 @@ def V6_vL32b_tmp_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -27821,7 +27916,7 @@ def V6_vL32b_tmp_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000010;
let Inst{31-21} = 0b00101011000;
let hasNewValue = 1;
@@ -27840,7 +27935,7 @@ def V6_vL32b_tmp_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -27858,7 +27953,7 @@ def V6_vL32b_tmp_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -27878,7 +27973,7 @@ def V6_vL32b_tmp_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000110;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27897,7 +27992,7 @@ def V6_vS32Ub_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmemu($Rt32+#$Ii) = $Vs32",
-tc_354299ad, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b111;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -27912,7 +28007,7 @@ def V6_vS32Ub_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32",
-tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_131f1c81, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -27927,7 +28022,7 @@ def V6_vS32Ub_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -27944,7 +28039,7 @@ def V6_vS32Ub_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000111;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -27960,7 +28055,7 @@ def V6_vS32Ub_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmemu($Rx32++#$Ii) = $Vs32",
-tc_7fa82b08, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b111;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -27976,7 +28071,7 @@ def V6_vS32Ub_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"vmemu($Rx32++$Mu2) = $Vs32",
-tc_7fa82b08, TypeCVI_VM_STU>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-5} = 0b00000111;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
@@ -27991,7 +28086,7 @@ def V6_vS32Ub_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32",
-tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_131f1c81, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28005,7 +28100,7 @@ def V6_vS32Ub_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28021,7 +28116,7 @@ def V6_vS32Ub_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000110;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28036,7 +28131,7 @@ def V6_vS32b_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rt32+#$Ii) = $Vs32",
-tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28052,7 +28147,7 @@ def V6_vS32b_new_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rt32+#$Ii) = $Os8.new",
-tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
+tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b00100;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28071,7 +28166,7 @@ def V6_vS32b_new_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01101;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28090,7 +28185,7 @@ def V6_vS32b_new_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28111,7 +28206,7 @@ def V6_vS32b_new_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-3} = 0b00001101;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28131,7 +28226,7 @@ def V6_vS32b_new_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rx32++#$Ii) = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b00100;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -28151,7 +28246,7 @@ def V6_vS32b_new_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"vmem($Rx32++$Mu2) = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-3} = 0b0000000100;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
@@ -28170,7 +28265,7 @@ def V6_vS32b_new_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01000;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28188,7 +28283,7 @@ def V6_vS32b_new_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28208,7 +28303,7 @@ def V6_vS32b_new_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-3} = 0b00001000;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28227,7 +28322,7 @@ def V6_vS32b_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28243,7 +28338,7 @@ def V6_vS32b_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28261,7 +28356,7 @@ def V6_vS32b_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28278,7 +28373,7 @@ def V6_vS32b_nqpred_ai : HInst<
(outs),
(ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000100;
let addrMode = BaseImmOffset;
@@ -28290,7 +28385,7 @@ def V6_vS32b_nqpred_pi : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -28304,7 +28399,7 @@ def V6_vS32b_nqpred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011100;
let addrMode = PostInc;
@@ -28317,7 +28412,7 @@ def V6_vS32b_nt_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rt32+#$Ii):nt = $Vs32",
-tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000011;
@@ -28334,7 +28429,7 @@ def V6_vS32b_nt_new_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
+tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b00100;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000011;
@@ -28354,7 +28449,7 @@ def V6_vS32b_nt_new_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01111;
let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
@@ -28374,7 +28469,7 @@ def V6_vS32b_nt_new_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001111;
@@ -28396,7 +28491,7 @@ def V6_vS32b_nt_new_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-3} = 0b00001111;
let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
@@ -28417,7 +28512,7 @@ def V6_vS32b_nt_new_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b00100;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001011;
@@ -28438,7 +28533,7 @@ def V6_vS32b_nt_new_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-3} = 0b0000000100;
let Inst{31-21} = 0b00101011011;
let addrMode = PostInc;
@@ -28458,7 +28553,7 @@ def V6_vS32b_nt_new_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01010;
let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
@@ -28477,7 +28572,7 @@ def V6_vS32b_nt_new_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001111;
@@ -28498,7 +28593,7 @@ def V6_vS32b_nt_new_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-3} = 0b00001010;
let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
@@ -28518,7 +28613,7 @@ def V6_vS32b_nt_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
@@ -28535,7 +28630,7 @@ def V6_vS32b_nt_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001111;
@@ -28554,7 +28649,7 @@ def V6_vS32b_nt_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
@@ -28572,7 +28667,7 @@ def V6_vS32b_nt_nqpred_ai : HInst<
(outs),
(ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000110;
let addrMode = BaseImmOffset;
@@ -28585,7 +28680,7 @@ def V6_vS32b_nt_nqpred_pi : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -28600,7 +28695,7 @@ def V6_vS32b_nt_nqpred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011110;
let addrMode = PostInc;
@@ -28614,7 +28709,7 @@ def V6_vS32b_nt_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rx32++#$Ii):nt = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001011;
@@ -28632,7 +28727,7 @@ def V6_vS32b_nt_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"vmem($Rx32++$Mu2):nt = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011011;
let addrMode = PostInc;
@@ -28649,7 +28744,7 @@ def V6_vS32b_nt_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
@@ -28665,7 +28760,7 @@ def V6_vS32b_nt_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001111;
@@ -28683,7 +28778,7 @@ def V6_vS32b_nt_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
@@ -28700,7 +28795,7 @@ def V6_vS32b_nt_qpred_ai : HInst<
(outs),
(ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000110;
let addrMode = BaseImmOffset;
@@ -28713,7 +28808,7 @@ def V6_vS32b_nt_qpred_pi : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -28728,7 +28823,7 @@ def V6_vS32b_nt_qpred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011110;
let addrMode = PostInc;
@@ -28742,7 +28837,7 @@ def V6_vS32b_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rx32++#$Ii) = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -28759,7 +28854,7 @@ def V6_vS32b_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"vmem($Rx32++$Mu2) = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
@@ -28774,7 +28869,7 @@ def V6_vS32b_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28789,7 +28884,7 @@ def V6_vS32b_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28806,7 +28901,7 @@ def V6_vS32b_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Pv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28822,7 +28917,7 @@ def V6_vS32b_qpred_ai : HInst<
(outs),
(ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Qv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000100;
let addrMode = BaseImmOffset;
@@ -28834,7 +28929,7 @@ def V6_vS32b_qpred_pi : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Qv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -28848,7 +28943,7 @@ def V6_vS32b_qpred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Qv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011100;
let addrMode = PostInc;
@@ -28861,7 +28956,7 @@ def V6_vS32b_srls_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"vmem($Rt32+#$Ii):scatter_release",
-tc_29841470, TypeCVI_SCATTER_NEW_RST>, Enc_ff3442, Requires<[UseHVXV65]> {
+tc_3ce09744, TypeCVI_SCATTER_NEW_RST>, Enc_ff3442, Requires<[UseHVXV65]> {
let Inst{7-0} = 0b00101000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28875,7 +28970,7 @@ def V6_vS32b_srls_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"vmem($Rx32++#$Ii):scatter_release",
-tc_5c03dc63, TypeCVI_SCATTER_NEW_RST>, Enc_6c9ee0, Requires<[UseHVXV65]> {
+tc_20a4bbec, TypeCVI_SCATTER_NEW_RST>, Enc_6c9ee0, Requires<[UseHVXV65]> {
let Inst{7-0} = 0b00101000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -28890,7 +28985,7 @@ def V6_vS32b_srls_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"vmem($Rx32++$Mu2):scatter_release",
-tc_5c03dc63, TypeCVI_SCATTER_NEW_RST>, Enc_44661f, Requires<[UseHVXV65]> {
+tc_20a4bbec, TypeCVI_SCATTER_NEW_RST>, Enc_44661f, Requires<[UseHVXV65]> {
let Inst{12-0} = 0b0000000101000;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
@@ -28904,7 +28999,7 @@ def V6_vabsb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.b = vabs($Vu32.b)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -28927,7 +29022,7 @@ def V6_vabsb_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.b = vabs($Vu32.b):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -28950,7 +29045,7 @@ def V6_vabsdiffh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -28973,7 +29068,7 @@ def V6_vabsdiffub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -28996,7 +29091,7 @@ def V6_vabsdiffuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -29019,7 +29114,7 @@ def V6_vabsdiffw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -29042,7 +29137,7 @@ def V6_vabsh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vabs($Vu32.h)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -29065,7 +29160,7 @@ def V6_vabsh_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vabs($Vu32.h):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -29088,7 +29183,7 @@ def V6_vabsub_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.ub = vabs($Vu32.b)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -29099,7 +29194,7 @@ def V6_vabsuh_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.uh = vabs($Vu32.h)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -29110,7 +29205,7 @@ def V6_vabsuw_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.uw = vabs($Vu32.w)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -29121,7 +29216,7 @@ def V6_vabsw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.w = vabs($Vu32.w)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -29144,7 +29239,7 @@ def V6_vabsw_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.w = vabs($Vu32.w):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -29167,7 +29262,7 @@ def V6_vaddb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vadd($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -29190,7 +29285,7 @@ def V6_vaddb_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -29213,7 +29308,7 @@ def V6_vaddbnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.b += $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29241,7 +29336,7 @@ def V6_vaddbq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.b += $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29269,7 +29364,7 @@ def V6_vaddbsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vadd($Vu32.b,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -29292,7 +29387,7 @@ def V6_vaddbsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -29315,7 +29410,7 @@ def V6_vaddcarry : HInst<
(outs HvxVR:$Vd32, HvxQR:$Qx4),
(ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qx4in),
"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry",
-tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
+tc_7e6a3e89, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100101;
@@ -29324,11 +29419,37 @@ let opNewValue = 0;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
+def V6_vaddcarryo : HInst<
+(outs HvxVR:$Vd32, HvxQR:$Qe4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w,$Qe4 = vadd($Vu32.w,$Vv32.w):carry",
+tc_e35c1e93, TypeCOPROC_VX>, Enc_c1d806, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddcarrysat : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qs4),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qs4):carry:sat",
+tc_257f6f7c, TypeCVI_VA>, Enc_e0820b, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vaddclbh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -29340,7 +29461,7 @@ def V6_vaddclbw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -29352,7 +29473,7 @@ def V6_vaddh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vadd($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -29375,7 +29496,7 @@ def V6_vaddh_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -29398,7 +29519,7 @@ def V6_vaddhnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.h += $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29426,7 +29547,7 @@ def V6_vaddhq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.h += $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29454,7 +29575,7 @@ def V6_vaddhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vadd($Vu32.h,$Vv32.h):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29477,7 +29598,7 @@ def V6_vaddhsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -29500,7 +29621,7 @@ def V6_vaddhw : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vadd($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -29512,7 +29633,7 @@ def V6_vaddhw_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.w += vadd($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -29550,7 +29671,7 @@ def V6_vaddubh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -29562,7 +29683,7 @@ def V6_vaddubh_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100010;
@@ -29600,7 +29721,7 @@ def V6_vaddubsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29623,7 +29744,7 @@ def V6_vaddubsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -29646,7 +29767,7 @@ def V6_vaddububb_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -29658,7 +29779,7 @@ def V6_vadduhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29681,7 +29802,7 @@ def V6_vadduhsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -29704,7 +29825,7 @@ def V6_vadduhw : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -29716,7 +29837,7 @@ def V6_vadduhw_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100010;
@@ -29754,7 +29875,7 @@ def V6_vadduwsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -29777,7 +29898,7 @@ def V6_vadduwsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -29800,7 +29921,7 @@ def V6_vaddw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vadd($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29823,7 +29944,7 @@ def V6_vaddw_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -29846,7 +29967,7 @@ def V6_vaddwnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.w += $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29874,7 +29995,7 @@ def V6_vaddwq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.w += $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29902,7 +30023,7 @@ def V6_vaddwsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vadd($Vu32.w,$Vv32.w):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29925,7 +30046,7 @@ def V6_vaddwsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -29948,7 +30069,7 @@ def V6_valignb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = valign($Vu32,$Vv32,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -29960,7 +30081,7 @@ def V6_valignbi : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vd32 = valign($Vu32,$Vv32,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110001;
let hasNewValue = 1;
@@ -29971,7 +30092,7 @@ def V6_vand : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vand($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -29983,7 +30104,7 @@ def V6_vandnqrt : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qu4, IntRegs:$Rt32),
"$Vd32 = vand(!$Qu4,$Rt32)",
-tc_e231aa4f, TypeCVI_VX>, Enc_7b7ba8, Requires<[UseHVXV62]> {
+tc_ac4046bc, TypeCVI_VX>, Enc_7b7ba8, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-10} = 0b0001;
let Inst{31-21} = 0b00011001101;
@@ -29995,7 +30116,7 @@ def V6_vandnqrt_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32),
"$Vx32 |= vand(!$Qu4,$Rt32)",
-tc_9311da3f, TypeCVI_VX>, Enc_895bd9, Requires<[UseHVXV62]> {
+tc_2e8f5f6e, TypeCVI_VX>, Enc_895bd9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b00011001011;
@@ -30033,7 +30154,7 @@ def V6_vandqrt : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qu4, IntRegs:$Rt32),
"$Vd32 = vand($Qu4,$Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b00011001101;
@@ -30045,7 +30166,7 @@ def V6_vandqrt_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32),
"$Vx32 |= vand($Qu4,$Rt32)",
-tc_9311da3f, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[UseHVXV60]> {
+tc_2e8f5f6e, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b00011001011;
@@ -30083,7 +30204,7 @@ def V6_vandvnqv : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4, HvxVR:$Vu32),
"$Vd32 = vand(!$Qv4,$Vu32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000011;
@@ -30096,7 +30217,7 @@ def V6_vandvqv : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4, HvxVR:$Vu32),
"$Vd32 = vand($Qv4,$Vu32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000011;
@@ -30109,7 +30230,7 @@ def V6_vandvrt : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Qd4 = vand($Vu32,$Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
@@ -30121,7 +30242,7 @@ def V6_vandvrt_acc : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Qx4 |= vand($Vu32,$Rt32)",
-tc_9311da3f, TypeCVI_VX_LATE>, Enc_adf111, Requires<[UseHVXV60]> {
+tc_2e8f5f6e, TypeCVI_VX_LATE>, Enc_adf111, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -30155,7 +30276,7 @@ def V6_vaslh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vasl($Vu32.h,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -30167,7 +30288,7 @@ def V6_vaslh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.h += vasl($Vu32.h,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -30205,7 +30326,7 @@ def V6_vaslhv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vasl($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -30228,7 +30349,7 @@ def V6_vaslw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vasl($Vu32.w,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -30240,7 +30361,7 @@ def V6_vaslw_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vasl($Vu32.w,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -30278,7 +30399,7 @@ def V6_vaslwv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vasl($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -30297,11 +30418,36 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vasr_into : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vxx32.w = vasrinto($Vu32.w,$Vv32.w)",
+tc_df80eeb0, TypeCVI_VP_VS>, Enc_3fc427, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vasr_into_alt : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vxx32 = vasrinto($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
def V6_vasrh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vasr($Vu32.h,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -30313,7 +30459,7 @@ def V6_vasrh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.h += vasr($Vu32.h,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -30351,7 +30497,7 @@ def V6_vasrhbrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -30363,7 +30509,7 @@ def V6_vasrhbrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30373,7 +30519,7 @@ def V6_vasrhbsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -30385,7 +30531,7 @@ def V6_vasrhubrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30397,7 +30543,7 @@ def V6_vasrhubrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30407,7 +30553,7 @@ def V6_vasrhubsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30419,7 +30565,7 @@ def V6_vasrhubsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30429,7 +30575,7 @@ def V6_vasrhv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vasr($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -30452,7 +30598,7 @@ def V6_vasruhubrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.ub = vasr($Vu32.uh,$Vv32.uh,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -30464,7 +30610,7 @@ def V6_vasruhubsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.ub = vasr($Vu32.uh,$Vv32.uh,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011000;
@@ -30476,7 +30622,7 @@ def V6_vasruwuhrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -30488,7 +30634,7 @@ def V6_vasruwuhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011000;
@@ -30500,7 +30646,7 @@ def V6_vasrw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vasr($Vu32.w,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -30512,7 +30658,7 @@ def V6_vasrw_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vasr($Vu32.w,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -30550,7 +30696,7 @@ def V6_vasrwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30562,7 +30708,7 @@ def V6_vasrwh_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30572,7 +30718,7 @@ def V6_vasrwhrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30584,7 +30730,7 @@ def V6_vasrwhrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30594,7 +30740,7 @@ def V6_vasrwhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30606,7 +30752,7 @@ def V6_vasrwhsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30616,7 +30762,7 @@ def V6_vasrwuhrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -30628,7 +30774,7 @@ def V6_vasrwuhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30640,7 +30786,7 @@ def V6_vasrwuhsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30650,7 +30796,7 @@ def V6_vasrwv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vasr($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -30673,7 +30819,7 @@ def V6_vassign : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32 = $Vu32",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-16} = 0b0001111000000011;
@@ -30695,7 +30841,7 @@ def V6_vavgb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vavg($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -30718,7 +30864,7 @@ def V6_vavgbrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vavg($Vu32.b,$Vv32.b):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -30741,7 +30887,7 @@ def V6_vavgh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vavg($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -30764,7 +30910,7 @@ def V6_vavghrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -30787,7 +30933,7 @@ def V6_vavgub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -30810,7 +30956,7 @@ def V6_vavgubrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -30833,7 +30979,7 @@ def V6_vavguh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -30856,7 +31002,7 @@ def V6_vavguhrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -30879,7 +31025,7 @@ def V6_vavguw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vavg($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -30902,7 +31048,7 @@ def V6_vavguwrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vavg($Vu32.uw,$Vv32.uw):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -30925,7 +31071,7 @@ def V6_vavgw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vavg($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -30948,7 +31094,7 @@ def V6_vavgwrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -30971,7 +31117,7 @@ def V6_vccombine : HInst<
(outs HvxWR:$Vdd32),
(ins PredRegs:$Ps4, HvxVR:$Vu32, HvxVR:$Vv32),
"if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
-tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
+tc_af25efd9, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011010011;
@@ -30984,7 +31130,7 @@ def V6_vcl0h : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.uh = vcl0($Vu32.uh)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -31007,7 +31153,7 @@ def V6_vcl0w : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.uw = vcl0($Vu32.uw)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -31030,7 +31176,7 @@ def V6_vcmov : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Ps4, HvxVR:$Vu32),
"if ($Ps4) $Vd32 = $Vu32",
-tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
+tc_3aacf4a8, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001101000000000;
@@ -31043,7 +31189,7 @@ def V6_vcombine : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32 = vcombine($Vu32,$Vv32)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -31067,7 +31213,7 @@ def V6_vdd0 : HInst<
(outs HvxWR:$Vdd32),
(ins),
"$Vdd32 = #0",
-tc_8a6eb39a, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_718b5c53, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -31078,7 +31224,7 @@ def V6_vdeal : HInst<
(outs HvxVR:$Vy32, HvxVR:$Vx32),
(ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32),
"vdeal($Vy32,$Vx32,$Rt32)",
-tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
+tc_561aaa58, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001111;
@@ -31093,7 +31239,7 @@ def V6_vdealb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.b = vdeal($Vu32.b)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -31105,7 +31251,7 @@ def V6_vdealb4w : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vdeale($Vu32.b,$Vv32.b)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -31139,7 +31285,7 @@ def V6_vdealh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vdeal($Vu32.h)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -31162,7 +31308,7 @@ def V6_vdealvdd : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -31174,7 +31320,7 @@ def V6_vdelta : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vdelta($Vu32,$Vv32)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -31186,7 +31332,7 @@ def V6_vdmpybus : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -31198,7 +31344,7 @@ def V6_vdmpybus_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -31236,7 +31382,7 @@ def V6_vdmpybus_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -31248,7 +31394,7 @@ def V6_vdmpybus_dv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -31286,7 +31432,7 @@ def V6_vdmpyhb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vu32.h,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -31298,7 +31444,7 @@ def V6_vdmpyhb_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vu32.h,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -31336,7 +31482,7 @@ def V6_vdmpyhb_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31348,7 +31494,7 @@ def V6_vdmpyhb_dv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31386,7 +31532,7 @@ def V6_vdmpyhisat : HInst<
(outs HvxVR:$Vd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31398,7 +31544,7 @@ def V6_vdmpyhisat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31436,7 +31582,7 @@ def V6_vdmpyhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31448,7 +31594,7 @@ def V6_vdmpyhsat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31486,7 +31632,7 @@ def V6_vdmpyhsuisat : HInst<
(outs HvxVR:$Vd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31498,7 +31644,7 @@ def V6_vdmpyhsuisat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31536,7 +31682,7 @@ def V6_vdmpyhsusat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31548,7 +31694,7 @@ def V6_vdmpyhsusat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31586,7 +31732,7 @@ def V6_vdmpyhvsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -31598,7 +31744,7 @@ def V6_vdmpyhvsat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -31636,7 +31782,7 @@ def V6_vdsaduh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -31648,7 +31794,7 @@ def V6_vdsaduh_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -31686,7 +31832,7 @@ def V6_veqb : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31698,7 +31844,7 @@ def V6_veqb_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31709,7 +31855,7 @@ def V6_veqb_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31721,7 +31867,7 @@ def V6_veqb_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31732,7 +31878,7 @@ def V6_veqh : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31744,7 +31890,7 @@ def V6_veqh_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31755,7 +31901,7 @@ def V6_veqh_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31767,7 +31913,7 @@ def V6_veqh_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31778,7 +31924,7 @@ def V6_veqw : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31790,7 +31936,7 @@ def V6_veqw_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31801,7 +31947,7 @@ def V6_veqw_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31813,7 +31959,7 @@ def V6_veqw_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31824,7 +31970,7 @@ def V6_vgathermh : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
"vtmp.h = vgather($Rt32,$Mu2,$Vv32.h).h",
-tc_66bb62ea, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
+tc_e8797b98, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
let Inst{12-5} = 0b00001000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31840,7 +31986,7 @@ def V6_vgathermhq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
"if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vv32.h).h",
-tc_63e3d94c, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
+tc_05ac6f98, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
let Inst{12-7} = 0b001010;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31856,7 +32002,7 @@ def V6_vgathermhw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32),
"vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h",
-tc_bfe309d5, TypeCVI_GATHER>, Enc_28dcbb, Requires<[UseHVXV65]> {
+tc_05058f6f, TypeCVI_GATHER>, Enc_28dcbb, Requires<[UseHVXV65]> {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31872,7 +32018,7 @@ def V6_vgathermhwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32),
"if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h",
-tc_98733e9d, TypeCVI_GATHER>, Enc_4e4a80, Requires<[UseHVXV65]> {
+tc_fd7610da, TypeCVI_GATHER>, Enc_4e4a80, Requires<[UseHVXV65]> {
let Inst{12-7} = 0b001100;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31888,7 +32034,7 @@ def V6_vgathermw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
"vtmp.w = vgather($Rt32,$Mu2,$Vv32.w).w",
-tc_66bb62ea, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
+tc_e8797b98, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31904,7 +32050,7 @@ def V6_vgathermwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
"if ($Qs4) vtmp.w = vgather($Rt32,$Mu2,$Vv32.w).w",
-tc_63e3d94c, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
+tc_05ac6f98, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
let Inst{12-7} = 0b001000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31920,7 +32066,7 @@ def V6_vgtb : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31932,7 +32078,7 @@ def V6_vgtb_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31943,7 +32089,7 @@ def V6_vgtb_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31955,7 +32101,7 @@ def V6_vgtb_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31966,7 +32112,7 @@ def V6_vgth : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31978,7 +32124,7 @@ def V6_vgth_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31989,7 +32135,7 @@ def V6_vgth_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32001,7 +32147,7 @@ def V6_vgth_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32012,7 +32158,7 @@ def V6_vgtub : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -32024,7 +32170,7 @@ def V6_vgtub_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32035,7 +32181,7 @@ def V6_vgtub_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32047,7 +32193,7 @@ def V6_vgtub_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32058,7 +32204,7 @@ def V6_vgtuh : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -32070,7 +32216,7 @@ def V6_vgtuh_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32081,7 +32227,7 @@ def V6_vgtuh_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b011001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32093,7 +32239,7 @@ def V6_vgtuh_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32104,7 +32250,7 @@ def V6_vgtuw : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -32116,7 +32262,7 @@ def V6_vgtuw_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32127,7 +32273,7 @@ def V6_vgtuw_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b011010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32139,7 +32285,7 @@ def V6_vgtuw_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32150,7 +32296,7 @@ def V6_vgtw : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -32162,7 +32308,7 @@ def V6_vgtw_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32173,7 +32319,7 @@ def V6_vgtw_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32185,7 +32331,7 @@ def V6_vgtw_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32196,7 +32342,7 @@ def V6_vhist : HInst<
(outs),
(ins),
"vhist",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV60]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV60]> {
let Inst{13-0} = 0b10000010000000;
let Inst{31-16} = 0b0001111000000000;
let DecoderNamespace = "EXT_mmvec";
@@ -32205,7 +32351,7 @@ def V6_vhistq : HInst<
(outs),
(ins HvxQR:$Qv4),
"vhist($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV60]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV60]> {
let Inst{13-0} = 0b10000010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
@@ -32215,7 +32361,7 @@ def V6_vinsertwr : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, IntRegs:$Rt32),
"$Vx32.w = vinsert($Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[UseHVXV60]> {
let Inst{13-5} = 0b100000001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
@@ -32227,7 +32373,7 @@ def V6_vlalignb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vlalign($Vu32,$Vv32,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -32239,7 +32385,7 @@ def V6_vlalignbi : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vd32 = vlalign($Vu32,$Vv32,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110011;
let hasNewValue = 1;
@@ -32250,7 +32396,7 @@ def V6_vlsrb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.ub = vlsr($Vu32.ub,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV62]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -32262,7 +32408,7 @@ def V6_vlsrh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.uh = vlsr($Vu32.uh,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -32285,7 +32431,7 @@ def V6_vlsrhv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vlsr($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -32308,7 +32454,7 @@ def V6_vlsrw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.uw = vlsr($Vu32.uw,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -32331,7 +32477,7 @@ def V6_vlsrwv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vlsr($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -32354,7 +32500,7 @@ def V6_vlut4 : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vd32.h = vlut4($Vu32.uh,$Rtt32.h)",
-tc_fa99dc24, TypeCVI_VX_DV>, Enc_263841, Requires<[UseHVXV65]> {
+tc_f1de44ef, TypeCVI_VX_DV>, Enc_263841, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -32366,7 +32512,7 @@ def V6_vlutvvb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -32378,7 +32524,7 @@ def V6_vlutvvb_nm : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -32390,7 +32536,7 @@ def V6_vlutvvb_oracc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_245865, Requires<[UseHVXV60]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_245865, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -32404,7 +32550,7 @@ def V6_vlutvvb_oracci : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_cd4705, Requires<[UseHVXV62]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_cd4705, Requires<[UseHVXV62]> {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
@@ -32417,7 +32563,7 @@ def V6_vlutvvbi : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV62]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV62]> {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110001;
let hasNewValue = 1;
@@ -32428,7 +32574,7 @@ def V6_vlutvwh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -32440,7 +32586,7 @@ def V6_vlutvwh_nm : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV62]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -32452,7 +32598,7 @@ def V6_vlutvwh_oracc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_7b523d, Requires<[UseHVXV60]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_7b523d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -32466,7 +32612,7 @@ def V6_vlutvwh_oracci : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_1178da, Requires<[UseHVXV62]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_1178da, Requires<[UseHVXV62]> {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
@@ -32479,7 +32625,7 @@ def V6_vlutvwhi : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[UseHVXV62]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[UseHVXV62]> {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110011;
let hasNewValue = 1;
@@ -32490,7 +32636,7 @@ def V6_vmaxb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vmax($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -32513,7 +32659,7 @@ def V6_vmaxh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmax($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32536,7 +32682,7 @@ def V6_vmaxub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32559,7 +32705,7 @@ def V6_vmaxuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32582,7 +32728,7 @@ def V6_vmaxw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmax($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -32605,7 +32751,7 @@ def V6_vminb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vmin($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -32628,7 +32774,7 @@ def V6_vminh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmin($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32651,7 +32797,7 @@ def V6_vminub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32674,7 +32820,7 @@ def V6_vminuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32697,7 +32843,7 @@ def V6_vminw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmin($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32720,7 +32866,7 @@ def V6_vmpabus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -32732,7 +32878,7 @@ def V6_vmpabus_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -32770,7 +32916,7 @@ def V6_vmpabusv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -32793,7 +32939,7 @@ def V6_vmpabuu : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.ub)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV65]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -32805,7 +32951,7 @@ def V6_vmpabuu_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.ub)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV65]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -32843,7 +32989,7 @@ def V6_vmpabuuv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -32866,7 +33012,7 @@ def V6_vmpahb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -32878,7 +33024,7 @@ def V6_vmpahb_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -32916,7 +33062,7 @@ def V6_vmpahhsat : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vx32.h = vmpa($Vx32in.h,$Vu32.h,$Rtt32.h):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -32929,7 +33075,7 @@ def V6_vmpauhb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV62]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -32941,7 +33087,7 @@ def V6_vmpauhb_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV62]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -32979,7 +33125,7 @@ def V6_vmpauhuhsat : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vx32.h = vmpa($Vx32in.h,$Vu32.uh,$Rtt32.uh):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -32992,7 +33138,7 @@ def V6_vmpsuhuhsat : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vx32.h = vmps($Vx32in.h,$Vu32.uh,$Rtt32.uh):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -33005,7 +33151,7 @@ def V6_vmpybus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -33017,7 +33163,7 @@ def V6_vmpybus_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -33055,7 +33201,7 @@ def V6_vmpybusv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -33067,7 +33213,7 @@ def V6_vmpybusv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -33105,7 +33251,7 @@ def V6_vmpybv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vmpy($Vu32.b,$Vv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -33117,7 +33263,7 @@ def V6_vmpybv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.h += vmpy($Vu32.b,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -33155,7 +33301,7 @@ def V6_vmpyewuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpye($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -33167,7 +33313,7 @@ def V6_vmpyewuh_64 : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32 = vmpye($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV62]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -33190,7 +33336,7 @@ def V6_vmpyh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vdd32.w = vmpy($Vu32.h,$Rt32.h)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -33202,7 +33348,7 @@ def V6_vmpyh_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.w += vmpy($Vu32.h,$Rt32.h)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV65]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -33240,7 +33386,7 @@ def V6_vmpyhsat_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -33267,7 +33413,7 @@ def V6_vmpyhsrs : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -33290,7 +33436,7 @@ def V6_vmpyhss : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -33313,7 +33459,7 @@ def V6_vmpyhus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -33325,7 +33471,7 @@ def V6_vmpyhus_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33363,7 +33509,7 @@ def V6_vmpyhv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vmpy($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -33375,7 +33521,7 @@ def V6_vmpyhv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.w += vmpy($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -33413,7 +33559,7 @@ def V6_vmpyhvsrs : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -33436,7 +33582,7 @@ def V6_vmpyieoh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -33448,7 +33594,7 @@ def V6_vmpyiewh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vmpyie($Vu32.w,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100010;
@@ -33475,7 +33621,7 @@ def V6_vmpyiewuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -33487,7 +33633,7 @@ def V6_vmpyiewuh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33525,7 +33671,7 @@ def V6_vmpyih : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmpyi($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -33537,7 +33683,7 @@ def V6_vmpyih_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.h += vmpyi($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33575,7 +33721,7 @@ def V6_vmpyihb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vmpyi($Vu32.h,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -33587,7 +33733,7 @@ def V6_vmpyihb_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.h += vmpyi($Vu32.h,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -33625,7 +33771,7 @@ def V6_vmpyiowh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyio($Vu32.w,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -33648,7 +33794,7 @@ def V6_vmpyiwb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vmpyi($Vu32.w,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
@@ -33660,7 +33806,7 @@ def V6_vmpyiwb_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vmpyi($Vu32.w,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -33698,7 +33844,7 @@ def V6_vmpyiwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vmpyi($Vu32.w,$Rt32.h)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -33710,7 +33856,7 @@ def V6_vmpyiwh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vmpyi($Vu32.w,$Rt32.h)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -33748,7 +33894,7 @@ def V6_vmpyiwub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV62]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -33760,7 +33906,7 @@ def V6_vmpyiwub_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV62]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -33798,7 +33944,7 @@ def V6_vmpyowh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -33810,7 +33956,7 @@ def V6_vmpyowh_64_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32 += vmpyo($Vu32.w,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33835,7 +33981,7 @@ def V6_vmpyowh_rnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -33858,7 +34004,7 @@ def V6_vmpyowh_rnd_sacc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33884,7 +34030,7 @@ def V6_vmpyowh_sacc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33910,7 +34056,7 @@ def V6_vmpyub : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
@@ -33922,7 +34068,7 @@ def V6_vmpyub_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -33960,7 +34106,7 @@ def V6_vmpyubv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -33972,7 +34118,7 @@ def V6_vmpyubv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -34010,7 +34156,7 @@ def V6_vmpyuh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -34022,7 +34168,7 @@ def V6_vmpyuh_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -34060,7 +34206,7 @@ def V6_vmpyuhe : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.uw = vmpye($Vu32.uh,$Rt32.uh)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV65]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -34072,7 +34218,7 @@ def V6_vmpyuhe_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.uw += vmpye($Vu32.uh,$Rt32.uh)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -34086,7 +34232,7 @@ def V6_vmpyuhv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -34098,7 +34244,7 @@ def V6_vmpyuhv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -34136,7 +34282,7 @@ def V6_vmux : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vmux($Qt4,$Vu32,$Vv32)",
-tc_a3127e12, TypeCVI_VA>, Enc_31db33, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_31db33, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110111;
@@ -34148,7 +34294,7 @@ def V6_vnavgb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vnavg($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -34171,7 +34317,7 @@ def V6_vnavgh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vnavg($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -34194,7 +34340,7 @@ def V6_vnavgub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -34217,7 +34363,7 @@ def V6_vnavgw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vnavg($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -34240,7 +34386,7 @@ def V6_vnccombine : HInst<
(outs HvxWR:$Vdd32),
(ins PredRegs:$Ps4, HvxVR:$Vu32, HvxVR:$Vv32),
"if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
-tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
+tc_af25efd9, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011010010;
@@ -34254,7 +34400,7 @@ def V6_vncmov : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Ps4, HvxVR:$Vu32),
"if (!$Ps4) $Vd32 = $Vu32",
-tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
+tc_3aacf4a8, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001101000100000;
@@ -34268,7 +34414,7 @@ def V6_vnormamth : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vnormamt($Vu32.h)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000011;
@@ -34291,7 +34437,7 @@ def V6_vnormamtw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.w = vnormamt($Vu32.w)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000011;
@@ -34314,7 +34460,7 @@ def V6_vnot : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32 = vnot($Vu32)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -34326,7 +34472,7 @@ def V6_vor : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vor($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -34338,7 +34484,7 @@ def V6_vpackeb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vpacke($Vu32.h,$Vv32.h)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34361,7 +34507,7 @@ def V6_vpackeh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vpacke($Vu32.w,$Vv32.w)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34384,7 +34530,7 @@ def V6_vpackhb_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vpack($Vu32.h,$Vv32.h):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34407,7 +34553,7 @@ def V6_vpackhub_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34430,7 +34576,7 @@ def V6_vpackob : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vpacko($Vu32.h,$Vv32.h)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -34453,7 +34599,7 @@ def V6_vpackoh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vpacko($Vu32.w,$Vv32.w)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -34476,7 +34622,7 @@ def V6_vpackwh_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vpack($Vu32.w,$Vv32.w):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -34499,7 +34645,7 @@ def V6_vpackwuh_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34522,7 +34668,7 @@ def V6_vpopcounth : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vpopcount($Vu32.h)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -34545,7 +34691,7 @@ def V6_vprefixqb : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4),
"$Vd32.b = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
let Inst{13-5} = 0b100000010;
let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
@@ -34557,7 +34703,7 @@ def V6_vprefixqh : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4),
"$Vd32.h = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
let Inst{13-5} = 0b100001010;
let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
@@ -34569,7 +34715,7 @@ def V6_vprefixqw : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4),
"$Vd32.w = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
let Inst{13-5} = 0b100010010;
let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
@@ -34581,7 +34727,7 @@ def V6_vrdelta : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vrdelta($Vu32,$Vv32)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -34593,7 +34739,7 @@ def V6_vrmpybub_rtt : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vdd32.w = vrmpy($Vu32.b,$Rtt32.ub)",
-tc_a807365d, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
+tc_cd94bfe0, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
@@ -34605,7 +34751,7 @@ def V6_vrmpybub_rtt_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vxx32.w += vrmpy($Vu32.b,$Rtt32.ub)",
-tc_ee927c0e, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
+tc_15fdf750, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -34643,7 +34789,7 @@ def V6_vrmpybus : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -34655,7 +34801,7 @@ def V6_vrmpybus_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -34693,7 +34839,7 @@ def V6_vrmpybusi : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -34705,7 +34851,7 @@ def V6_vrmpybusi_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -34743,7 +34889,7 @@ def V6_vrmpybusv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -34755,7 +34901,7 @@ def V6_vrmpybusv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -34793,7 +34939,7 @@ def V6_vrmpybv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vrmpy($Vu32.b,$Vv32.b)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -34805,7 +34951,7 @@ def V6_vrmpybv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vrmpy($Vu32.b,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -34843,7 +34989,7 @@ def V6_vrmpyub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -34855,7 +35001,7 @@ def V6_vrmpyub_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -34893,7 +35039,7 @@ def V6_vrmpyub_rtt : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vdd32.uw = vrmpy($Vu32.ub,$Rtt32.ub)",
-tc_a807365d, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
+tc_cd94bfe0, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
@@ -34905,7 +35051,7 @@ def V6_vrmpyub_rtt_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vxx32.uw += vrmpy($Vu32.ub,$Rtt32.ub)",
-tc_ee927c0e, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
+tc_15fdf750, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -34943,7 +35089,7 @@ def V6_vrmpyubi : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
@@ -34955,7 +35101,7 @@ def V6_vrmpyubi_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -34993,7 +35139,7 @@ def V6_vrmpyubv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -35005,7 +35151,7 @@ def V6_vrmpyubv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -35039,11 +35185,276 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vrmpyzbb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzbb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzbb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzbb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzbub_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rt8.ub)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzbub_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rt8.ub)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzbub_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rx8.ub++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzbub_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rx8.ub++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzcb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr16mpyz($Vu32.c,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzcb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr16mpyz($Vu32.c,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzcb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr16mpyz($Vu32.c,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzcb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr16mpyz($Vu32.c,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzcbs_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr16mpyzs($Vu32.c,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzcbs_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr16mpyzs($Vu32.c,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzcbs_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr16mpyzs($Vu32.c,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzcbs_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr16mpyzs($Vu32.c,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyznb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr8mpyz($Vu32.n,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyznb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr8mpyz($Vu32.n,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyznb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr8mpyz($Vu32.n,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyznb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr8mpyz($Vu32.n,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
def V6_vror : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32 = vror($Vu32,$Rt32)",
-tc_bf142ae2, TypeCVI_VP>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_6e7fa133, TypeCVI_VP>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -35051,11 +35462,34 @@ let hasNewValue = 1;
let opNewValue = 0;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vrotr : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.uw = vrotr($Vu32.uw,$Vv32.uw)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrotr_alt : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32 = vrotr($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vroundhb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vround($Vu32.h,$Vv32.h):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35078,7 +35512,7 @@ def V6_vroundhub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vround($Vu32.h,$Vv32.h):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35101,7 +35535,7 @@ def V6_vrounduhub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -35124,7 +35558,7 @@ def V6_vrounduwuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -35147,7 +35581,7 @@ def V6_vroundwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vround($Vu32.w,$Vv32.w):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35170,7 +35604,7 @@ def V6_vroundwuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vround($Vu32.w,$Vv32.w):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35193,7 +35627,7 @@ def V6_vrsadubi : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -35205,7 +35639,7 @@ def V6_vrsadubi_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -35239,11 +35673,23 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsatdw : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w = vsatdw($Vu32.w,$Vv32.w)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsathub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vsat($Vu32.h,$Vv32.h)",
-tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_8772086c, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35266,7 +35712,7 @@ def V6_vsatuwuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -35289,7 +35735,7 @@ def V6_vsatwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vsat($Vu32.w,$Vv32.w)",
-tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_8772086c, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35312,7 +35758,7 @@ def V6_vsb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.h = vsxt($Vu32.b)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -35335,7 +35781,7 @@ def V6_vscattermh : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.h).h = $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
@@ -35346,7 +35792,7 @@ def V6_vscattermh_add : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.h).h += $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
@@ -35377,7 +35823,7 @@ def V6_vscattermhq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.h).h = $Vw32",
-tc_df54ad52, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
+tc_8e420e4d, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b00101111100;
let accessSize = HalfWordAccess;
@@ -35397,7 +35843,7 @@ def V6_vscattermhw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vvv32.w).h = $Vw32",
-tc_ec58f88a, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
+tc_7273323b, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
@@ -35408,7 +35854,7 @@ def V6_vscattermhw_add : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vvv32.w).h += $Vw32",
-tc_ec58f88a, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
+tc_7273323b, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
@@ -35420,7 +35866,7 @@ def V6_vscattermhwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vvv32.w).h = $Vw32",
-tc_94f43c04, TypeCVI_SCATTER_DV>, Enc_3d6d37, Requires<[UseHVXV65]> {
+tc_58d21193, TypeCVI_SCATTER_DV>, Enc_3d6d37, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b00101111101;
let accessSize = HalfWordAccess;
@@ -35431,7 +35877,7 @@ def V6_vscattermw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.w).w = $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101111001;
let accessSize = WordAccess;
@@ -35442,7 +35888,7 @@ def V6_vscattermw_add : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.w).w += $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b00101111001;
let accessSize = WordAccess;
@@ -35501,7 +35947,7 @@ def V6_vscattermwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.w).w = $Vw32",
-tc_df54ad52, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
+tc_8e420e4d, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b00101111100;
let accessSize = WordAccess;
@@ -35521,7 +35967,7 @@ def V6_vsh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.w = vsxt($Vu32.h)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -35544,7 +35990,7 @@ def V6_vshufeh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vshuffe($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35567,7 +36013,7 @@ def V6_vshuff : HInst<
(outs HvxVR:$Vy32, HvxVR:$Vx32),
(ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32),
"vshuff($Vy32,$Vx32,$Rt32)",
-tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
+tc_561aaa58, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001111;
@@ -35582,7 +36028,7 @@ def V6_vshuffb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.b = vshuff($Vu32.b)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -35605,7 +36051,7 @@ def V6_vshuffeb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vshuffe($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35628,7 +36074,7 @@ def V6_vshuffh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vshuff($Vu32.h)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -35651,7 +36097,7 @@ def V6_vshuffob : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vshuffo($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35674,7 +36120,7 @@ def V6_vshuffvdd : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -35686,7 +36132,7 @@ def V6_vshufoeb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35709,7 +36155,7 @@ def V6_vshufoeh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35732,7 +36178,7 @@ def V6_vshufoh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vshuffo($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35755,7 +36201,7 @@ def V6_vsubb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vsub($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -35778,7 +36224,7 @@ def V6_vsubb_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -35801,7 +36247,7 @@ def V6_vsubbnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.b -= $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000010;
@@ -35827,7 +36273,7 @@ def V6_vsubbq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.b -= $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -35853,7 +36299,7 @@ def V6_vsubbsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vsub($Vu32.b,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -35876,7 +36322,7 @@ def V6_vsubbsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -35899,7 +36345,7 @@ def V6_vsubcarry : HInst<
(outs HvxVR:$Vd32, HvxQR:$Qx4),
(ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qx4in),
"$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry",
-tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
+tc_7e6a3e89, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100101;
@@ -35908,11 +36354,25 @@ let opNewValue = 0;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
+def V6_vsubcarryo : HInst<
+(outs HvxVR:$Vd32, HvxQR:$Qe4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w,$Qe4 = vsub($Vu32.w,$Vv32.w):carry",
+tc_e35c1e93, TypeCOPROC_VX>, Enc_c1d806, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsubh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vsub($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -35935,7 +36395,7 @@ def V6_vsubh_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -35958,7 +36418,7 @@ def V6_vsubhnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.h -= $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000010;
@@ -35984,7 +36444,7 @@ def V6_vsubhq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.h -= $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -36010,7 +36470,7 @@ def V6_vsubhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vsub($Vu32.h,$Vv32.h):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -36033,7 +36493,7 @@ def V6_vsubhsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36056,7 +36516,7 @@ def V6_vsubhw : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vsub($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36079,7 +36539,7 @@ def V6_vsububh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36102,7 +36562,7 @@ def V6_vsububsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -36125,7 +36585,7 @@ def V6_vsububsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -36148,7 +36608,7 @@ def V6_vsubububb_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -36160,7 +36620,7 @@ def V6_vsubuhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -36183,7 +36643,7 @@ def V6_vsubuhsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -36206,7 +36666,7 @@ def V6_vsubuhw : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36229,7 +36689,7 @@ def V6_vsubuwsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -36252,7 +36712,7 @@ def V6_vsubuwsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -36275,7 +36735,7 @@ def V6_vsubw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vsub($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -36298,7 +36758,7 @@ def V6_vsubw_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -36321,7 +36781,7 @@ def V6_vsubwnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.w -= $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000010;
@@ -36347,7 +36807,7 @@ def V6_vsubwq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.w -= $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000010;
@@ -36373,7 +36833,7 @@ def V6_vsubwsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vsub($Vu32.w,$Vv32.w):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -36396,7 +36856,7 @@ def V6_vsubwsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36419,7 +36879,7 @@ def V6_vswap : HInst<
(outs HvxWR:$Vdd32),
(ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32 = vswap($Qt4,$Vu32,$Vv32)",
-tc_316c637c, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[UseHVXV60]> {
+tc_71646d06, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110101;
@@ -36431,7 +36891,7 @@ def V6_vtmpyb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -36443,7 +36903,7 @@ def V6_vtmpyb_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -36481,7 +36941,7 @@ def V6_vtmpybus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -36493,7 +36953,7 @@ def V6_vtmpybus_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -36531,7 +36991,7 @@ def V6_vtmpyhb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
@@ -36543,7 +37003,7 @@ def V6_vtmpyhb_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -36595,7 +37055,7 @@ def V6_vunpackb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.h = vunpack($Vu32.b)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -36618,7 +37078,7 @@ def V6_vunpackh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.w = vunpack($Vu32.h)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -36641,7 +37101,7 @@ def V6_vunpackob : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32),
"$Vxx32.h |= vunpacko($Vu32.b)",
-tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
+tc_2c745bb8, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-16} = 0b0001111000000000;
@@ -36667,7 +37127,7 @@ def V6_vunpackoh : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32),
"$Vxx32.w |= vunpacko($Vu32.h)",
-tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
+tc_2c745bb8, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-16} = 0b0001111000000000;
@@ -36694,7 +37154,7 @@ def V6_vunpackub : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.uh = vunpack($Vu32.ub)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -36717,7 +37177,7 @@ def V6_vunpackuh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.uw = vunpack($Vu32.uh)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -36740,7 +37200,7 @@ def V6_vwhist128 : HInst<
(outs),
(ins),
"vwhist128",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10010010000000;
let Inst{31-16} = 0b0001111000000000;
let DecoderNamespace = "EXT_mmvec";
@@ -36749,7 +37209,7 @@ def V6_vwhist128m : HInst<
(outs),
(ins u1_0Imm:$Ii),
"vwhist128(#$Ii)",
-tc_b77635b4, TypeCVI_HIST>, Enc_efaed8, Requires<[UseHVXV62]> {
+tc_b28e51aa, TypeCVI_HIST>, Enc_efaed8, Requires<[UseHVXV62]> {
let Inst{7-0} = 0b10000000;
let Inst{13-9} = 0b10011;
let Inst{31-16} = 0b0001111000000000;
@@ -36759,7 +37219,7 @@ def V6_vwhist128q : HInst<
(outs),
(ins HvxQR:$Qv4),
"vwhist128($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10010010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
@@ -36769,7 +37229,7 @@ def V6_vwhist128qm : HInst<
(outs),
(ins HvxQR:$Qv4, u1_0Imm:$Ii),
"vwhist128($Qv4,#$Ii)",
-tc_28978789, TypeCVI_HIST>, Enc_802dc0, Requires<[UseHVXV62]> {
+tc_767c4e9d, TypeCVI_HIST>, Enc_802dc0, Requires<[UseHVXV62]> {
let Inst{7-0} = 0b10000000;
let Inst{13-9} = 0b10011;
let Inst{21-16} = 0b000010;
@@ -36780,7 +37240,7 @@ def V6_vwhist256 : HInst<
(outs),
(ins),
"vwhist256",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001010000000;
let Inst{31-16} = 0b0001111000000000;
let DecoderNamespace = "EXT_mmvec";
@@ -36789,7 +37249,7 @@ def V6_vwhist256_sat : HInst<
(outs),
(ins),
"vwhist256:sat",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001110000000;
let Inst{31-16} = 0b0001111000000000;
let DecoderNamespace = "EXT_mmvec";
@@ -36798,7 +37258,7 @@ def V6_vwhist256q : HInst<
(outs),
(ins HvxQR:$Qv4),
"vwhist256($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
@@ -36808,7 +37268,7 @@ def V6_vwhist256q_sat : HInst<
(outs),
(ins HvxQR:$Qv4),
"vwhist256($Qv4):sat",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001110000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
@@ -36818,7 +37278,7 @@ def V6_vxor : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vxor($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -36830,7 +37290,7 @@ def V6_vzb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.uh = vzxt($Vu32.ub)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -36853,7 +37313,7 @@ def V6_vzh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.uw = vzxt($Vu32.uh)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -36872,11 +37332,122 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_zLd_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"z = vmem($Rt32+#$Ii)",
+tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101100000;
+let addrMode = BaseImmOffset;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zLd_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"z = vmem($Rx32++#$Ii)",
+tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101101000;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"z = vmem($Rx32++$Mu2)",
+tc_a0dbea28, TypeCVI_ZW>, Enc_44661f, Requires<[UseHVXV66,UseZReg]> {
+let Inst{12-0} = 0b0000000000001;
+let Inst{31-21} = 0b00101101000;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) z = vmem($Rt32+#$Ii)",
+tc_dd5b0695, TypeCVI_ZW>, Enc_ef601b, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b00101100100;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zLd_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) z = vmem($Rx32++#$Ii)",
+tc_3ad719fb, TypeCVI_ZW>, Enc_6baed4, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101101100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) z = vmem($Rx32++$Mu2)",
+tc_3ad719fb, TypeCVI_ZW>, Enc_691712, Requires<[UseHVXV66,UseZReg]> {
+let Inst{10-0} = 0b00000000001;
+let Inst{31-21} = 0b00101101100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zextract : HInst<
+(outs HvxVR:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = zextract($Rt32)",
+tc_5bf8afbb, TypeCVI_VP>, Enc_a5ed8a, Requires<[UseHVXV66,UseZReg]> {
+let Inst{13-5} = 0b000001001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zld0 : HInst<
+(outs),
+(ins IntRegs:$Rt32),
+"z = vmem($Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zldp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32),
+"if ($Pv4) z = vmem($Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def Y2_barrier : HInst<
(outs),
(ins),
"barrier",
-tc_367f7f3d, TypeST>, Enc_e3b0c4 {
+tc_8c99de45, TypeST>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b1010100000000000;
let isSoloAX = 1;
@@ -36886,7 +37457,7 @@ def Y2_break : HInst<
(outs),
(ins),
"brkpt",
-tc_4ca572d4, TypeCR>, Enc_e3b0c4 {
+tc_9ad9998f, TypeCR>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b0110110000100000;
let isSolo = 1;
@@ -36895,7 +37466,7 @@ def Y2_dccleana : HInst<
(outs),
(ins IntRegs:$Rs32),
"dccleana($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000000;
let isRestrictSlot1AOK = 1;
@@ -36905,7 +37476,7 @@ def Y2_dccleaninva : HInst<
(outs),
(ins IntRegs:$Rs32),
"dccleaninva($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000010;
let isRestrictSlot1AOK = 1;
@@ -36915,7 +37486,7 @@ def Y2_dcfetch : HInst<
(outs),
(ins IntRegs:$Rs32),
"dcfetch($Rs32)",
-tc_3da80ba5, TypeMAPPING> {
+tc_d63f638c, TypeMAPPING> {
let hasSideEffects = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -36924,7 +37495,7 @@ def Y2_dcfetchbo : HInst<
(outs),
(ins IntRegs:$Rs32, u11_3Imm:$Ii),
"dcfetch($Rs32+#$Ii)",
-tc_4d9914c9, TypeLD>, Enc_2d829e {
+tc_9ca930f7, TypeLD>, Enc_2d829e {
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10010100000;
let addrMode = BaseImmOffset;
@@ -36935,7 +37506,7 @@ def Y2_dcinva : HInst<
(outs),
(ins IntRegs:$Rs32),
"dcinva($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000001;
let isRestrictSlot1AOK = 1;
@@ -36945,7 +37516,7 @@ def Y2_dczeroa : HInst<
(outs),
(ins IntRegs:$Rs32),
"dczeroa($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000110;
let isRestrictSlot1AOK = 1;
@@ -36956,7 +37527,7 @@ def Y2_icinva : HInst<
(outs),
(ins IntRegs:$Rs32),
"icinva($Rs32)",
-tc_999d32db, TypeJ>, Enc_ecbcc8 {
+tc_5d7f5414, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010110110;
let isSolo = 1;
@@ -36965,7 +37536,7 @@ def Y2_isync : HInst<
(outs),
(ins),
"isync",
-tc_b13761ae, TypeJ>, Enc_e3b0c4 {
+tc_8b121f4a, TypeJ>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000010;
let Inst{31-16} = 0b0101011111000000;
let isSolo = 1;
@@ -36974,16 +37545,25 @@ def Y2_syncht : HInst<
(outs),
(ins),
"syncht",
-tc_367f7f3d, TypeST>, Enc_e3b0c4 {
+tc_8c99de45, TypeST>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b1010100001000000;
let isSolo = 1;
}
+def Y2_wait : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"wait($Rs32)",
+tc_174516e8, TypeCR>, Enc_ecbcc8, Requires<[HasV65]> {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100100010;
+let isSolo = 1;
+}
def Y4_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"l2fetch($Rs32,$Rt32)",
-tc_daa058fa, TypeST>, Enc_ca3887 {
+tc_fe211424, TypeST>, Enc_ca3887 {
let Inst{7-0} = 0b00000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100110000;
@@ -36995,7 +37575,7 @@ def Y4_trace : HInst<
(outs),
(ins IntRegs:$Rs32),
"trace($Rs32)",
-tc_c82dc1ff, TypeCR>, Enc_ecbcc8 {
+tc_6b25e783, TypeCR>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01100010010;
let isSoloAX = 1;
@@ -37004,7 +37584,7 @@ def Y5_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"l2fetch($Rs32,$Rtt32)",
-tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5]> {
+tc_fe211424, TypeST>, Enc_e6abcf {
let Inst{7-0} = 0b00000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100110100;
@@ -37016,7 +37596,7 @@ def dep_A2_addsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32):sat:deprecated",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101100;
@@ -37029,7 +37609,7 @@ def dep_A2_subsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32):sat:deprecated",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101100;
@@ -37042,7 +37622,7 @@ def dep_S2_packhl : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = packhl($Rs32,$Rt32):deprecated",
-tc_540fdfbc, TypeALU64>, Enc_be32a5 {
+tc_946df596, TypeALU64>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010100000;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
new file mode 100644
index 000000000000..2346fa572626
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -0,0 +1,3337 @@
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, please consult code owner before editing.
+//===----------------------------------------------------------------------===//
+
+
+// V5 Scalar Instructions.
+
+def: Pat<(int_hexagon_S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwh DoubleRegs:$src1),
+ (S2_vsatwh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysu_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpysu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1),
+ (A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_combineri IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_notp DoubleRegs:$src1),
+ (A2_notp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_brevp DoubleRegs:$src1),
+ (S2_brevp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl1 IntRegs:$src1),
+ (S2_cl1 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplte IntRegs:$src1, IntRegs:$src2),
+ (C4_cmplte IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1),
+ (S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_abssat IntRegs:$src1),
+ (A2_abssat IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtu IntRegs:$src1, IntRegs:$src2),
+ (C2_cmpgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2),
+ (A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2),
+ (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1),
+ (F2_conv_df2uw_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheq IntRegs:$src1, IntRegs:$src2),
+ (A4_cmpheq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
+ (S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2),
+ (S4_ntstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1),
+ (F2_conv_w2sf IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_not PredRegs:$src1),
+ (C2_not PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_tfrpr PredRegs:$src1),
+ (C2_tfrpr PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2),
+ (A4_cmpbgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_orp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+ (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2),
+ (A4_cmpbgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2),
+ (A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2),
+ (A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2),
+ (M4_pmpyw IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathb DoubleRegs:$src1),
+ (S2_vsathb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_absp DoubleRegs:$src1),
+ (A2_absp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_all8 PredRegs:$src1),
+ (C2_all8 PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2),
+ (C2_bitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2),
+ (M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2),
+ (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_boundscheck IntRegs:$src1, DoubleRegs:$src2),
+ (A4_boundscheck IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_fastcorner9 PredRegs:$src1, PredRegs:$src2),
+ (C4_fastcorner9 PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2),
+ (M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subsat IntRegs:$src1, IntRegs:$src2),
+ (A2_subsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1),
+ (F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1),
+ (S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2),
+ (F2_sfadd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sub IntRegs:$src1, IntRegs:$src2),
+ (A2_sub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4),
+ (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2),
+ (S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1),
+ (F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2),
+ (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1),
+ (A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clbp DoubleRegs:$src1),
+ (S2_clbp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_deinterleave DoubleRegs:$src1),
+ (S2_deinterleave DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_any8 PredRegs:$src1),
+ (C2_any8 PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1),
+ (F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1),
+ (S2_vsathb_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_clrbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+ (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred:$src1),
+ (A2_tfrsi s32_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2),
+ (A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+ (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2),
+ (A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_negsat IntRegs:$src1),
+ (A2_negsat IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitsplit IntRegs:$src1, IntRegs:$src2),
+ (A4_bitsplit IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabshsat DoubleRegs:$src1),
+ (A2_vabshsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyui IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyui IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupn IntRegs:$src1, IntRegs:$src2),
+ (F2_sffixupn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_xorp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_xorp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tfrpcp DoubleRegs:$src1),
+ (A4_tfrpcp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_zxtb IntRegs:$src1),
+ (A2_zxtb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_zxth IntRegs:$src1),
+ (A2_zxth IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1),
+ (A2_sat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addsat IntRegs:$src1, IntRegs:$src2),
+ (A2_addsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2),
+ (A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2),
+ (C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_addp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (A4_addp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_xor PredRegs:$src1, PredRegs:$src2),
+ (C2_xor PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2ud_chop DoubleRegs:$src1),
+ (F2_conv_df2ud_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathub DoubleRegs:$src1),
+ (S2_vsathub DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2sf DoubleRegs:$src1),
+ (F2_conv_df2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxth IntRegs:$src1),
+ (A2_sxth IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxtb IntRegs:$src1),
+ (A2_sxtb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxtw IntRegs:$src1),
+ (A2_sxtw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl1p DoubleRegs:$src1),
+ (S2_cl1p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_andnp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_andnp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpeq IntRegs:$src1, IntRegs:$src2),
+ (F2_sfcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddb_map DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clbnorm IntRegs:$src1),
+ (S2_clbnorm IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3),
+ (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_tstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3),
+ (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2),
+ (S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1),
+ (F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpys_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpys_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1),
+ (S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3),
+ (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2),
+ (F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct1 IntRegs:$src1),
+ (S2_ct1 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct0 IntRegs:$src1),
+ (S2_ct0 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1),
+ (F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2),
+ (C2_andn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2),
+ (A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2),
+ (A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2),
+ (A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2),
+ (A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineir s32_0ImmPred:$src1, IntRegs:$src2),
+ (A4_combineir s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2),
+ (A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1),
+ (S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combinew IntRegs:$src1, IntRegs:$src2),
+ (A2_combinew IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2),
+ (C4_nbitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_modwrapu IntRegs:$src1, IntRegs:$src2),
+ (A4_modwrapu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpneq IntRegs:$src1, IntRegs:$src2),
+ (A4_rcmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred:$src1),
+ (F2_sfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred:$src1),
+ (F2_sfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1),
+ (F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2),
+ (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2),
+ (F2_sfcmpuo IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1),
+ (S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minu IntRegs:$src1, IntRegs:$src2),
+ (A2_minu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_minp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpge IntRegs:$src1, IntRegs:$src2),
+ (F2_sfcmpge IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmin IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmin IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpgt IntRegs:$src1, IntRegs:$src2),
+ (F2_sfcmpgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vpmpyh IntRegs:$src1, IntRegs:$src2),
+ (M4_vpmpyh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_roundsat DoubleRegs:$src1),
+ (A2_roundsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct1p DoubleRegs:$src1),
+ (S2_ct1p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2),
+ (S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2),
+ (C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tfrcpp CtrRegs64:$src1),
+ (A4_tfrcpp CtrRegs64:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtu IntRegs:$src1, IntRegs:$src2),
+ (A4_cmphgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sath IntRegs:$src1),
+ (A2_sath IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satb IntRegs:$src1),
+ (A2_satb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4),
+ (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxaddsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_asrh IntRegs:$src1),
+ (A2_asrh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_or PredRegs:$src1, PredRegs:$src2),
+ (C2_or PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_xor IntRegs:$src1, IntRegs:$src2),
+ (A2_xor IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_add IntRegs:$src1, IntRegs:$src2),
+ (A2_add IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfinvsqrta IntRegs:$src1),
+ (F2_sfinvsqrta IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct0p DoubleRegs:$src1),
+ (S2_ct0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svaddh IntRegs:$src1, IntRegs:$src2),
+ (A2_svaddh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vcrotate DoubleRegs:$src1, IntRegs:$src2),
+ (S2_vcrotate DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_aslh IntRegs:$src1),
+ (A2_aslh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplatrh IntRegs:$src1),
+ (S2_vsplatrh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplatrb IntRegs:$src1),
+ (S2_vsplatrb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3),
+ (C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_pxfer_map PredRegs:$src1),
+ (C2_pxfer_map PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3),
+ (M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+ (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrcrr CtrRegs:$src1),
+ (A2_tfrcrr CtrRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+ (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2),
+ (C2_orn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmpy IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmpy IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_mask PredRegs:$src1),
+ (C2_mask PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vrsadub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vrsadub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrrcr IntRegs:$src1),
+ (A2_tfrrcr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxb DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1),
+ (S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2),
+ (C2_cmpgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2d_chop DoubleRegs:$src1),
+ (F2_conv_df2d_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1),
+ (F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2),
+ (F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3),
+ (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred:$src2),
+ (A2_addi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpnorm DoubleRegs:$src1),
+ (S4_clbpnorm DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_rr_sat IntRegs:$src1, IntRegs:$src2),
+ (A4_round_rr_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2uw IntRegs:$src1),
+ (F2_conv_sf2uw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2ud IntRegs:$src1),
+ (F2_conv_sf2ud IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2uw_chop IntRegs:$src1),
+ (F2_conv_sf2uw_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwuh_nopack DoubleRegs:$src1),
+ (S2_vsatwuh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubuhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubuhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmpybsu IntRegs:$src1, IntRegs:$src2),
+ (M5_vmpybsu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_subp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (A4_subp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_vitpack PredRegs:$src1, PredRegs:$src2),
+ (C2_vitpack PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclr IntRegs:$src1, IntRegs:$src2),
+ (C4_nbitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2),
+ (C2_and PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1),
+ (S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
+ (S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1),
+ (S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneq IntRegs:$src1, IntRegs:$src2),
+ (C4_cmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clb IntRegs:$src1),
+ (S2_clb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2),
+ (A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1),
+ (F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred:$src1),
+ (F2_dfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2),
+ (A4_cmphgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred:$src1),
+ (F2_dfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3),
+ (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3),
+ (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2),
+ (A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred:$src2),
+ (A2_andir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfrecipa IntRegs:$src1, IntRegs:$src2),
+ (F2_sfrecipa IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2),
+ (A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2),
+ (A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2),
+ (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2),
+ (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_or IntRegs:$src1, IntRegs:$src2),
+ (A2_or IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeq IntRegs:$src1, IntRegs:$src2),
+ (C2_cmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrp DoubleRegs:$src1),
+ (A2_tfrp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathub_nopack DoubleRegs:$src1),
+ (S2_vsathub_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satuh IntRegs:$src1),
+ (A2_satuh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satub IntRegs:$src1),
+ (A2_satub IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2),
+ (M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2),
+ (C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2),
+ (A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred:$src2),
+ (A2_tfril IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+ (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1),
+ (S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1),
+ (A2_vabsw DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1),
+ (A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2),
+ (F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3),
+ (C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_swiz IntRegs:$src1),
+ (A2_swiz IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2),
+ (A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2w_chop DoubleRegs:$src1),
+ (F2_conv_df2w_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_and IntRegs:$src1, IntRegs:$src2),
+ (A2_and IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
+ (S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1),
+ (F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfr IntRegs:$src1),
+ (A2_tfr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subri s32_0ImmPred:$src1, IntRegs:$src2),
+ (A2_subri s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2),
+ (M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_brev IntRegs:$src1),
+ (S2_brev IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+ (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1),
+ (S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4),
+ (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1),
+ (F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred:$src2),
+ (A2_orir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2),
+ (A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2),
+ (M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2df IntRegs:$src1),
+ (F2_conv_sf2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunohb DoubleRegs:$src1),
+ (S2_vtrunohb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2d_chop IntRegs:$src1),
+ (F2_conv_sf2d_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1),
+ (F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2),
+ (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1),
+ (F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svadduhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svadduhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2w_chop IntRegs:$src1),
+ (F2_conv_sf2w_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_svsathub IntRegs:$src1),
+ (S2_svsathub IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_setbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4),
+ (F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1),
+ (F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1),
+ (F2_conv_df2uw DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2),
+ (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2),
+ (C4_cmplteu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minup DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_minup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminb DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_lsli s6_0ImmPred:$src1, IntRegs:$src2),
+ (S4_lsli s6_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_tfrrp IntRegs:$src1),
+ (C2_tfrrp IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunowh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_vtrunowh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_abs IntRegs:$src1),
+ (A2_abs IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2),
+ (A4_cmpbeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1),
+ (A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwuh DoubleRegs:$src1),
+ (S2_vsatwuh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1),
+ (S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2),
+ (A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2),
+ (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2),
+ (A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffeb DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffeb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3),
+ (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
+ (S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2),
+ (S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1),
+ (S2_vsxtbh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_subp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_parity IntRegs:$src1, IntRegs:$src2),
+ (S4_parity IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2ud_chop IntRegs:$src1),
+ (F2_conv_sf2ud_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2),
+ (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2),
+ (A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrndpackwh DoubleRegs:$src1),
+ (S2_vrndpackwh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2),
+ (C2_bitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+ (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2),
+ (C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+ (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+
+// V55 Scalar Instructions.
+
+def: Pat<(int_hexagon_A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV55]>;
+
+// V60 Scalar Instructions.
+
+def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+ (S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+
+// V62 Scalar Instructions.
+
+def: Pat<(int_hexagon_S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_V6_ldntnt0 IntRegs:$src1),
+ (V6_ldntnt0 IntRegs:$src1)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2),
+ (M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_M6_vabsdiffb DoubleRegs:$src1, DoubleRegs:$src2),
+ (M6_vabsdiffb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_A6_vminub_RdP DoubleRegs:$src1, DoubleRegs:$src2),
+ (A6_vminub_RdP DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_S6_vsplatrbp IntRegs:$src1),
+ (S6_vsplatrbp IntRegs:$src1)>, Requires<[HasV62]>;
+
+// V65 Scalar Instructions.
+
+def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2),
+ (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65]>;
+
+// V66 Scalar Instructions.
+
+def: Pat<(int_hexagon_F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2),
+ (S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2)>, Requires<[HasV66]>;
+
+// V60 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminub HvxVR:$src1, HvxVR:$src2),
+ (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhvsrs_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsathub HvxVR:$src1, HvxVR:$src2),
+ (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsathub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgh HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgub HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubb HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgubrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgubrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundhb HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundhb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmux_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhus HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackeb HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackeb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavghrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavghrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtran2x2_map_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdelta HvxVR:$src1, HvxVR:$src2),
+ (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdelta_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackob HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackob_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_or HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_or_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lo HvxWR:$src1),
+ (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lo_128B HvxWR:$src1),
+ (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubb_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubb_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldu0 IntRegs:$src1),
+ (V6_ldu0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldu0_128B IntRegs:$src1),
+ (V6_ldu0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgh HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsh HvxVR:$src1),
+ (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsh_128B HvxVR:$src1),
+ (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_and_n HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_and_n_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsb HvxVR:$src1),
+ (V6_vsb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsb_128B HvxVR:$src1),
+ (V6_vsb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffh HvxVR:$src1),
+ (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffh_128B HvxVR:$src1),
+ (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgub HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnormamth HvxVR:$src1),
+ (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnormamth_128B HvxVR:$src1),
+ (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguh HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddw HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vzh HvxVR:$src1),
+ (V6_vzh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vzh_128B HvxVR:$src1),
+ (V6_vzh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxub HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh HvxVR:$src1, HvxVR:$src2),
+ (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuuv HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuuv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vror HvxVR:$src1, IntRegs:$src2),
+ (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vror_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsh_sat HvxVR:$src1),
+ (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsh_sat_128B HvxVR:$src1),
+ (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_or_n HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_or_n_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealb HvxVR:$src1),
+ (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealb_128B HvxVR:$src1),
+ (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybusv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vzb HvxVR:$src1),
+ (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vzb_128B HvxVR:$src1),
+ (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddb HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoeb HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoeb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackhub_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyb HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabusv HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabusv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_and HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_and_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackwuh_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vswap_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslw HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackhb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyih_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddb_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddb_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackub HvxVR:$src1),
+ (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackub_128B HvxVR:$src1),
+ (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyieoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyieoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_extractw HvxVR:$src1, IntRegs:$src2),
+ (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_extractw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgwrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgwrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyub HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackob HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackob_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahb HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandqrt HvxQR:$src1, IntRegs:$src2),
+ (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandqrt_128B HvxQR:$src1, IntRegs:$src2),
+ (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vxor HvxVR:$src1, HvxVR:$src2),
+ (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vxor_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhsat_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealb4w HvxVR:$src1, HvxVR:$src2),
+ (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealb4w_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffh HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffob HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffob_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyub_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnormamtw HvxVR:$src1),
+ (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnormamtw_128B HvxVR:$src1),
+ (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackuh HvxVR:$src1),
+ (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackuh_128B HvxVR:$src1),
+ (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackoh HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackoh_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyubv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhss HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhss_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_hi HvxWR:$src1),
+ (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_hi_128B HvxWR:$src1),
+ (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw HvxVR:$src1, HvxVR:$src2),
+ (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdsaduh HvxWR:$src1, IntRegs:$src2),
+ (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubw_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubw_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyih HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyih_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybus HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybus_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1),
+ (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1),
+ (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrw HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsh HvxVR:$src1),
+ (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsh_128B HvxVR:$src1),
+ (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrh HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcombine HvxVR:$src1, HvxVR:$src2),
+ (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcombine_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vinsertwr HvxVR:$src1, IntRegs:$src2),
+ (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vinsertwr_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubh_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubh_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffb HvxVR:$src1),
+ (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffb_128B HvxVR:$src1),
+ (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vand HvxVR:$src1, HvxVR:$src2),
+ (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vand_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcl0h HvxVR:$src1),
+ (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcl0h_128B HvxVR:$src1),
+ (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhsrs_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ld0 IntRegs:$src1),
+ (V6_ld0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ld0_128B IntRegs:$src1),
+ (V6_ld0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpopcounth HvxVR:$src1),
+ (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpopcounth_128B HvxVR:$src1),
+ (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnt0 IntRegs:$src1),
+ (V6_ldnt0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnt0_128B IntRegs:$src1),
+ (V6_ldnt0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminh HvxVR:$src1, HvxVR:$src2),
+ (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2 IntRegs:$src1),
+ (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2_128B IntRegs:$src1),
+ (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealh HvxVR:$src1),
+ (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealh_128B HvxVR:$src1),
+ (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackwh_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslh HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vor HvxVR:$src1, HvxVR:$src2),
+ (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vor_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiowh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiowh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvrt HvxVR:$src1, IntRegs:$src2),
+ (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvrt_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcl0w HvxVR:$src1),
+ (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcl0w_128B HvxVR:$src1),
+ (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyihb HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpybus HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vd0 ),
+ (V6_vd0 )>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vd0_128B ),
+ (V6_vd0 )>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybus HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybus_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvrt_acc_128B HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassign HvxVR:$src1),
+ (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassign_128B HvxVR:$src1),
+ (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackb HvxVR:$src1),
+ (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackb_128B HvxVR:$src1),
+ (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackh HvxVR:$src1),
+ (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackh_128B HvxVR:$src1),
+ (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrdelta HvxVR:$src1, HvxVR:$src2),
+ (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrdelta_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundwh HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundwh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddw_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddw_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffub HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffeb HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffeb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth HvxVR:$src1, HvxVR:$src2),
+ (V6_vgth HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgth HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnot HvxVR:$src1),
+ (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnot_128B HvxVR:$src1),
+ (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxw HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsw_sat HvxVR:$src1),
+ (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsw_sat_128B HvxVR:$src1),
+ (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundhub HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundhub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabus HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabus_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassignp HvxWR:$src1),
+ (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassignp_128B HvxWR:$src1),
+ (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb HvxVR:$src1, HvxVR:$src2),
+ (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplatw IntRegs:$src1),
+ (V6_lvsplatw IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplatw_128B IntRegs:$src1),
+ (V6_lvsplatw IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_not HvxQR:$src1),
+ (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_not_128B HvxQR:$src1),
+ (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgw HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2),
+ (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_xor HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_xor_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguhrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguhrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrh HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhbrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffw HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+
+// V62 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddclbh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddclbh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_64_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_64_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatuwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatuwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_shuffeqh HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_shuffeqh_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_shuffeqw HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_shuffeqw_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhbsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminb HvxVR:$src1, HvxVR:$src2),
+ (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrb HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+ (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+ (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvnqv HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvnqv_128B HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplatb IntRegs:$src1),
+ (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplatb_128B IntRegs:$src1),
+ (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplath IntRegs:$src1),
+ (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplath_128B IntRegs:$src1),
+ (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhb HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtnp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtnp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrounduhub HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrounduhub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandnqrt HvxQR:$src1, IntRegs:$src2),
+ (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_128B HvxQR:$src1, IntRegs:$src2),
+ (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxb HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvqv HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvqv_128B HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrounduwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrounduwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2v2 IntRegs:$src1),
+ (V6_pred_scalar2v2 IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2v2_128B IntRegs:$src1),
+ (V6_pred_scalar2v2 IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubh_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddclbw HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddclbw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubububb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcnp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcnp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+ (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+ (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+
+// V65 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1),
+ (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1),
+ (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1),
+ (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1),
+ (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1),
+ (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1),
+ (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1),
+ (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1),
+ (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdd0 ),
+ (V6_vdd0 )>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdd0_128B ),
+ (V6_vdd0 )>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1),
+ (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1),
+ (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+
+// V66 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddcarrysat_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasr_into_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatdw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatdw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatdw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatdw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrotr HvxVR:$src1, HvxVR:$src2),
+ (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrotr_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
index 03c504ff0b08..b3132d41b903 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
@@ -1,4 +1,4 @@
-//===- HexagonDepMappings.td ----------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,6 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
def A2_negAlias : InstAlias<"$Rd32 = neg($Rs32)", (A2_subri IntRegs:$Rd32, 0, IntRegs:$Rs32)>;
def A2_notAlias : InstAlias<"$Rd32 = not($Rs32)", (A2_subri IntRegs:$Rd32, -1, IntRegs:$Rs32)>;
def A2_tfrfAlias : InstAlias<"if (!$Pu4) $Rd32 = $Rs32", (A2_paddif IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
@@ -252,6 +251,7 @@ def V6_vaslhv_altAlias : InstAlias<"$Vd32 = vaslh($Vu32,$Vv32)", (V6_vaslhv HvxV
def V6_vaslw_acc_altAlias : InstAlias<"$Vx32 += vaslw($Vu32,$Rt32)", (V6_vaslw_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vaslw_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Rt32)", (V6_vaslw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vaslwv_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Vv32)", (V6_vaslwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasr_into_altAlias : InstAlias<"$Vxx32 = vasrinto($Vu32,$Vv32)", (V6_vasr_into HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vasrh_acc_altAlias : InstAlias<"$Vx32 += vasrh($Vu32,$Rt32)", (V6_vasrh_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vasrh_altAlias : InstAlias<"$Vd32 = vasrh($Vu32,$Rt32)", (V6_vasrh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vasrhbrndsat_altAlias : InstAlias<"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhbrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
@@ -402,6 +402,7 @@ def V6_vrmpyubi_acc_altAlias : InstAlias<"$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)",
def V6_vrmpyubi_altAlias : InstAlias<"$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
def V6_vrmpyubv_acc_altAlias : InstAlias<"$Vx32 += vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vrmpyubv_altAlias : InstAlias<"$Vd32 = vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrotr_altAlias : InstAlias<"$Vd32 = vrotr($Vu32,$Vv32)", (V6_vrotr HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vroundhb_altAlias : InstAlias<"$Vd32 = vroundhb($Vu32,$Vv32):sat", (V6_vroundhb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vroundhub_altAlias : InstAlias<"$Vd32 = vroundhub($Vu32,$Vv32):sat", (V6_vroundhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vrounduhub_altAlias : InstAlias<"$Vd32 = vrounduhub($Vu32,$Vv32):sat", (V6_vrounduhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
@@ -473,4 +474,6 @@ def V6_vunpackub_altAlias : InstAlias<"$Vdd32 = vunpackub($Vu32)", (V6_vunpackub
def V6_vunpackuh_altAlias : InstAlias<"$Vdd32 = vunpackuh($Vu32)", (V6_vunpackuh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
def V6_vzb_altAlias : InstAlias<"$Vdd32 = vzxtb($Vu32)", (V6_vzb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
def V6_vzh_altAlias : InstAlias<"$Vdd32 = vzxth($Vu32)", (V6_vzh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
+def V6_zld0Alias : InstAlias<"z = vmem($Rt32)", (V6_zLd_ai IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_zldp0Alias : InstAlias<"if ($Pv4) z = vmem($Rt32)", (V6_zLd_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
def Y2_dcfetchAlias : InstAlias<"dcfetch($Rs32)", (Y2_dcfetchbo IntRegs:$Rs32, 0)>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
index 9d960953f8f5..ef2d4fa45702 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
@@ -1,4 +1,4 @@
-//===- HexagonDepOperands.td ----------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,16 +9,12 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
-def s10_6ImmOperand : AsmOperandClass { let Name = "s10_6Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s10_6Imm : Operand<i32> { let ParserMatchClass = s10_6ImmOperand; let DecoderMethod = "s10_6ImmDecoder"; }
-def s10_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 6>(N->getSExtValue());}]>;
def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
@@ -130,6 +126,3 @@ def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtVal
def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
-def s10_0ImmOperand : AsmOperandClass { let Name = "s10_0Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s10_0Imm : Operand<i32> { let ParserMatchClass = s10_0ImmOperand; let DecoderMethod = "s10_0ImmDecoder"; }
-def s10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 0>(N->getSExtValue());}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
index 656c83f2d0c4..0fd55e8b7997 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
@@ -1,4 +1,4 @@
-//===- HexagonDepTimingClasses.h ------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,7 +10,6 @@
//===----------------------------------------------------------------------===//
-
#ifndef TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H
#define TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H
@@ -20,19 +19,25 @@ namespace llvm {
inline bool is_TC3x(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_16d0d8d5:
- case Hexagon::Sched::tc_1853ea6d:
- case Hexagon::Sched::tc_60571023:
- case Hexagon::Sched::tc_7934b9df:
- case Hexagon::Sched::tc_8fd5f294:
- case Hexagon::Sched::tc_b9c0b731:
- case Hexagon::Sched::tc_bcc96cee:
- case Hexagon::Sched::tc_c6ce9b3f:
- case Hexagon::Sched::tc_c6ebf8dd:
- case Hexagon::Sched::tc_c82dc1ff:
- case Hexagon::Sched::tc_caaebcba:
- case Hexagon::Sched::tc_cf59f215:
- case Hexagon::Sched::tc_e913dc32:
+ case Hexagon::Sched::tc_05d3a09b:
+ case Hexagon::Sched::tc_0d8f5752:
+ case Hexagon::Sched::tc_13bfbcf9:
+ case Hexagon::Sched::tc_174516e8:
+ case Hexagon::Sched::tc_1a2fd869:
+ case Hexagon::Sched::tc_1c4528a2:
+ case Hexagon::Sched::tc_32779c6f:
+ case Hexagon::Sched::tc_5b54b33f:
+ case Hexagon::Sched::tc_6b25e783:
+ case Hexagon::Sched::tc_76851da1:
+ case Hexagon::Sched::tc_9debc299:
+ case Hexagon::Sched::tc_a9d88b22:
+ case Hexagon::Sched::tc_bafaade3:
+ case Hexagon::Sched::tc_bcf98408:
+ case Hexagon::Sched::tc_bdceeac1:
+ case Hexagon::Sched::tc_c8ce0b5c:
+ case Hexagon::Sched::tc_d1aa9eaa:
+ case Hexagon::Sched::tc_d773585a:
+ case Hexagon::Sched::tc_df3319ed:
return true;
default:
return false;
@@ -41,8 +46,8 @@ inline bool is_TC3x(unsigned SchedClass) {
inline bool is_TC2early(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_14cd4cfa:
- case Hexagon::Sched::tc_2a160009:
+ case Hexagon::Sched::tc_b4407292:
+ case Hexagon::Sched::tc_fc3999b4:
return true;
default:
return false;
@@ -51,12 +56,13 @@ inline bool is_TC2early(unsigned SchedClass) {
inline bool is_TC4x(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_038a1342:
- case Hexagon::Sched::tc_4d99bca9:
- case Hexagon::Sched::tc_6792d5ff:
- case Hexagon::Sched::tc_9c00ce8d:
- case Hexagon::Sched::tc_d580173f:
- case Hexagon::Sched::tc_f3eaa14b:
+ case Hexagon::Sched::tc_2f7c551d:
+ case Hexagon::Sched::tc_2ff964b4:
+ case Hexagon::Sched::tc_3a867367:
+ case Hexagon::Sched::tc_3b470976:
+ case Hexagon::Sched::tc_4560740b:
+ case Hexagon::Sched::tc_a58fd5cc:
+ case Hexagon::Sched::tc_b8bffe55:
return true;
default:
return false;
@@ -65,23 +71,27 @@ inline bool is_TC4x(unsigned SchedClass) {
inline bool is_TC2(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_00afc57e:
- case Hexagon::Sched::tc_1b9c9ee5:
- case Hexagon::Sched::tc_234a11a5:
- case Hexagon::Sched::tc_2b6f77c6:
- case Hexagon::Sched::tc_41d5298e:
- case Hexagon::Sched::tc_5ba5997d:
- case Hexagon::Sched::tc_84df2cd3:
- case Hexagon::Sched::tc_87735c3b:
- case Hexagon::Sched::tc_897d1a9d:
- case Hexagon::Sched::tc_976ddc4f:
- case Hexagon::Sched::tc_b44c6e2a:
- case Hexagon::Sched::tc_b9c4623f:
- case Hexagon::Sched::tc_c2f7d806:
- case Hexagon::Sched::tc_c74f796f:
- case Hexagon::Sched::tc_d088982c:
- case Hexagon::Sched::tc_ef84f62f:
- case Hexagon::Sched::tc_f49e76f4:
+ case Hexagon::Sched::tc_002cb246:
+ case Hexagon::Sched::tc_14b5c689:
+ case Hexagon::Sched::tc_1c80410a:
+ case Hexagon::Sched::tc_4414d8b1:
+ case Hexagon::Sched::tc_6132ba3d:
+ case Hexagon::Sched::tc_61830035:
+ case Hexagon::Sched::tc_679309b8:
+ case Hexagon::Sched::tc_703e822c:
+ case Hexagon::Sched::tc_779080bf:
+ case Hexagon::Sched::tc_784490da:
+ case Hexagon::Sched::tc_88b4f13d:
+ case Hexagon::Sched::tc_9461ff31:
+ case Hexagon::Sched::tc_9e313203:
+ case Hexagon::Sched::tc_a813cf9a:
+ case Hexagon::Sched::tc_bfec0f01:
+ case Hexagon::Sched::tc_cf8126ae:
+ case Hexagon::Sched::tc_d08ee0f4:
+ case Hexagon::Sched::tc_e4a7f9f0:
+ case Hexagon::Sched::tc_f429765c:
+ case Hexagon::Sched::tc_f675fee8:
+ case Hexagon::Sched::tc_f9058dd7:
return true;
default:
return false;
@@ -90,45 +100,43 @@ inline bool is_TC2(unsigned SchedClass) {
inline bool is_TC1(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_181af5d0:
- case Hexagon::Sched::tc_1b82a277:
- case Hexagon::Sched::tc_1e856f58:
- case Hexagon::Sched::tc_351fed2d:
- case Hexagon::Sched::tc_3669266a:
- case Hexagon::Sched::tc_3cb8ea06:
- case Hexagon::Sched::tc_452f85af:
- case Hexagon::Sched::tc_481e5e5c:
- case Hexagon::Sched::tc_49eb22c8:
- case Hexagon::Sched::tc_523fcf30:
- case Hexagon::Sched::tc_52d7bbea:
- case Hexagon::Sched::tc_53bc8a6a:
- case Hexagon::Sched::tc_540fdfbc:
- case Hexagon::Sched::tc_55050d58:
- case Hexagon::Sched::tc_609d2efe:
- case Hexagon::Sched::tc_68cb12ce:
- case Hexagon::Sched::tc_6ebb4a12:
- case Hexagon::Sched::tc_6efc556e:
- case Hexagon::Sched::tc_73043bf4:
- case Hexagon::Sched::tc_7a830544:
- case Hexagon::Sched::tc_855b0b61:
- case Hexagon::Sched::tc_8fe6b782:
- case Hexagon::Sched::tc_90f3e30c:
- case Hexagon::Sched::tc_97743097:
- case Hexagon::Sched::tc_99be14ca:
- case Hexagon::Sched::tc_9faf76ae:
- case Hexagon::Sched::tc_a46f0df5:
- case Hexagon::Sched::tc_a904d137:
- case Hexagon::Sched::tc_b9488031:
- case Hexagon::Sched::tc_be706f30:
- case Hexagon::Sched::tc_c6aa82f7:
- case Hexagon::Sched::tc_cde8b071:
- case Hexagon::Sched::tc_d6bf0472:
- case Hexagon::Sched::tc_dbdffe3d:
- case Hexagon::Sched::tc_e0739b8c:
- case Hexagon::Sched::tc_e1e99bfa:
- case Hexagon::Sched::tc_e9fae2d6:
- case Hexagon::Sched::tc_f2704b9a:
- case Hexagon::Sched::tc_f8eeed7a:
+ case Hexagon::Sched::tc_0663f615:
+ case Hexagon::Sched::tc_0a705168:
+ case Hexagon::Sched::tc_0ae0825c:
+ case Hexagon::Sched::tc_1b6f7cec:
+ case Hexagon::Sched::tc_1fc97744:
+ case Hexagon::Sched::tc_20cdee80:
+ case Hexagon::Sched::tc_2332b92e:
+ case Hexagon::Sched::tc_2eabeebe:
+ case Hexagon::Sched::tc_3d495a39:
+ case Hexagon::Sched::tc_4c5ba658:
+ case Hexagon::Sched::tc_56336eb0:
+ case Hexagon::Sched::tc_56f114f4:
+ case Hexagon::Sched::tc_57890846:
+ case Hexagon::Sched::tc_5a2711e5:
+ case Hexagon::Sched::tc_5b7c0967:
+ case Hexagon::Sched::tc_640086b5:
+ case Hexagon::Sched::tc_643b4717:
+ case Hexagon::Sched::tc_85c9c08f:
+ case Hexagon::Sched::tc_85d5d03f:
+ case Hexagon::Sched::tc_862b3e70:
+ case Hexagon::Sched::tc_946df596:
+ case Hexagon::Sched::tc_9c3ecd83:
+ case Hexagon::Sched::tc_9fc3dae0:
+ case Hexagon::Sched::tc_a1123dda:
+ case Hexagon::Sched::tc_a1c00888:
+ case Hexagon::Sched::tc_ae53734a:
+ case Hexagon::Sched::tc_b31c2e97:
+ case Hexagon::Sched::tc_b4b5c03a:
+ case Hexagon::Sched::tc_b51dc29a:
+ case Hexagon::Sched::tc_cd374165:
+ case Hexagon::Sched::tc_cfd8378a:
+ case Hexagon::Sched::tc_d5b7b0c1:
+ case Hexagon::Sched::tc_d9d43ecb:
+ case Hexagon::Sched::tc_db2bce9c:
+ case Hexagon::Sched::tc_de4df740:
+ case Hexagon::Sched::tc_de554571:
+ case Hexagon::Sched::tc_e78647bd:
return true;
default:
return false;
@@ -136,4 +144,4 @@ inline bool is_TC1(unsigned SchedClass) {
}
} // namespace llvm
-#endif
+#endif \ No newline at end of file
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 557e6384be6a..8e2f5093038e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -731,9 +731,7 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
MIB.add(MO);
// Set memory references.
- MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(*MI);
MI->eraseFromParent();
return;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 7e774674e0c0..1a762c0c9de7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -891,14 +891,7 @@ void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
MB.add(MO);
Ox++;
}
-
- MachineFunction &MF = *B.getParent();
- MachineInstr::mmo_iterator I = MI.memoperands_begin();
- unsigned NR = std::distance(I, MI.memoperands_end());
- MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
- for (unsigned i = 0; i < NR; ++i)
- MemRefs[i] = *I++;
- MB.setMemRefs(MemRefs, MemRefs+NR);
+ MB.cloneMemRefs(MI);
MachineInstr *NewI = MB;
NewI->clearKillInfo();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 97b02e2b34cb..f5736546a87c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -550,6 +550,37 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+/// Returns true if the target can safely skip saving callee-saved registers
+/// for noreturn nounwind functions.
+bool HexagonFrameLowering::enableCalleeSaveSkip(
+ const MachineFunction &MF) const {
+ const auto &F = MF.getFunction();
+ assert(F.hasFnAttribute(Attribute::NoReturn) &&
+ F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ !F.getFunction().hasFnAttribute(Attribute::UWTable));
+ (void)F;
+
+ // No need to save callee saved registers if the function does not return.
+ return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();
+}
+
+// Helper function used to determine when to eliminate the stack frame for
+// functions marked as noreturn and when the noreturn-stack-elim options are
+// specified. When both these conditions are true, then a FP may not be needed
+// if the function makes a call. It is very similar to enableCalleeSaveSkip,
+// but it used to check if the allocframe can be eliminated as well.
+static bool enableAllocFrameElim(const MachineFunction &MF) {
+ const auto &F = MF.getFunction();
+ const auto &MFI = MF.getFrameInfo();
+ const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ assert(!MFI.hasVarSizedObjects() &&
+ !HST.getRegisterInfo()->needsStackRealignment(MF));
+ return F.hasFnAttribute(Attribute::NoReturn) &&
+ F.hasFnAttribute(Attribute::NoUnwind) &&
+ !F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() &&
+ MFI.getStackSize() == 0;
+}
+
void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
bool PrologueStubs) const {
MachineFunction &MF = *MBB.getParent();
@@ -994,7 +1025,7 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
}
const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
- if (MFI.hasCalls() || HMFI.hasClobberLR())
+ if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR())
return true;
return false;
@@ -1266,7 +1297,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
// Call spill function.
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
- : MBB.getLastNonDebugInstr()->getDebugLoc();
+ : MBB.findDebugLoc(MBB.end());
MachineInstr *DeallocCall = nullptr;
if (HasTC) {
@@ -1579,10 +1610,10 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
// S2_storeri_io FI, 0, TmpR
BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(TmpR, RegState::Kill)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(TmpR, RegState::Kill)
+ .cloneMemRefs(*MI);
NewRegs.push_back(TmpR);
B.erase(It);
@@ -1604,9 +1635,9 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
// TmpR = L2_loadri_io FI, 0
unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
- .addFrameIndex(FI)
- .addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .cloneMemRefs(*MI);
// DstR = C2_tfrrp TmpR if DstR is a predicate register
// DstR = A2_tfrrcr TmpR if DstR is a modifier register
@@ -1708,7 +1739,7 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
// register that is entirely undefined.
LivePhysRegs LPR(HRI);
LPR.addLiveIns(B);
- SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers;
for (auto R = B.begin(); R != It; ++R) {
Clobbers.clear();
LPR.stepForward(*R, Clobbers);
@@ -1731,10 +1762,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
: Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(SrcLo, getKillRegState(IsKill))
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(SrcLo, getKillRegState(IsKill))
+ .cloneMemRefs(*MI);
}
// Store high part.
@@ -1742,10 +1773,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
StoreOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vS32b_ai
: Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
- .addFrameIndex(FI)
- .addImm(Size)
- .addReg(SrcHi, getKillRegState(IsKill))
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(Size)
+ .addReg(SrcHi, getKillRegState(IsKill))
+ .cloneMemRefs(*MI);
}
B.erase(It);
@@ -1777,17 +1808,17 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
: Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
- .addFrameIndex(FI)
- .addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .cloneMemRefs(*MI);
// Load high part.
LoadOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vL32b_ai
: Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
- .addFrameIndex(FI)
- .addImm(Size)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(Size)
+ .cloneMemRefs(*MI);
B.erase(It);
return true;
@@ -1813,10 +1844,10 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
: Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(SrcR, getKillRegState(IsKill))
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(SrcR, getKillRegState(IsKill))
+ .cloneMemRefs(*MI);
B.erase(It);
return true;
@@ -1841,9 +1872,9 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
: Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
- .addFrameIndex(FI)
- .addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .cloneMemRefs(*MI);
B.erase(It);
return true;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 988718860c5b..d65d870750f8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -41,6 +41,8 @@ public:
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
override {}
+ bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp
deleted file mode 100644
index 63ec9c3d3124..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- HexagonGatherPacketize.cpp -----------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This pass ensures that producer and consumer of VTMP are paired in a bundle.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "gather-packetize"
-
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-cl::opt<bool> EnableGatherPacketize(
- "hexagon-enable-gather-packetize", cl::Hidden, cl::init(true),
- cl::desc("Generate gather packets before packetization"));
-
-namespace llvm {
-FunctionPass *createHexagonGatherPacketize();
-void initializeHexagonGatherPacketizePass(PassRegistry &);
-}
-
-namespace {
-class HexagonGatherPacketize : public MachineFunctionPass {
-public:
- static char ID;
- HexagonGatherPacketize() : MachineFunctionPass(ID) {
- PassRegistry &Registry = *PassRegistry::getPassRegistry();
- initializeHexagonGatherPacketizePass(Registry);
- }
-
- StringRef getPassName() const override {
- return "Hexagon Gather Packetize Code";
- }
- bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-
-char HexagonGatherPacketize::ID = 0;
-
-static inline bool isVtmpDef(const MachineInstr &MI) {
- for (const MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isDef() && MO.isImplicit() &&
- (MO.getReg() == Hexagon::VTMP)) {
- return true;
- }
- return false;
-}
-
-static inline bool isVtmpUse(const MachineInstr &MI) {
- return (MI.mayStore() && (MI.getOperand(2)).isReg() &&
- ((MI.getOperand(2)).getReg() == Hexagon::VTMP));
-}
-
-bool HexagonGatherPacketize::runOnMachineFunction(MachineFunction &Fn) {
- if (!EnableGatherPacketize)
- return false;
- auto &ST = Fn.getSubtarget<HexagonSubtarget>();
- bool HasV65 = ST.hasV65Ops();
- bool UseHVX = ST.useHVXOps();
- if (!(HasV65 & UseHVX))
- return false;
-
- for (auto &MBB : Fn) {
- bool VtmpDef = false;
- MachineBasicBlock::iterator MII, MIE, DefMII;
- for (MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ++MII) {
- MachineInstr &MI = *MII;
- if (VtmpDef) {
- if (!isVtmpUse(MI))
- continue;
- MBB.splice(std::next(DefMII), &MBB, MII);
- finalizeBundle(MBB, DefMII.getInstrIterator(),
- std::next(MII).getInstrIterator());
- VtmpDef = false;
- continue;
- }
- if (!(isVtmpDef(MI)))
- continue;
- VtmpDef = true;
- DefMII = MII;
- }
- assert(!VtmpDef && "VTMP producer and consumer not in same block");
- }
- return true;
-}
-}
-
-//===----------------------------------------------------------------------===//
-// Public Constructor Functions
-//===----------------------------------------------------------------------===//
-
-INITIALIZE_PASS(HexagonGatherPacketize, "hexagon-gather-packetize",
- "Hexagon gather packetize Code", false, false)
-
-FunctionPass *llvm::createHexagonGatherPacketize() {
- return new HexagonGatherPacketize();
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 2582a021e956..e3492e7374e9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -632,7 +632,7 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
SortableVectorType VRs;
for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
VRs.push_back(I->first);
- llvm::sort(VRs.begin(), VRs.end(), LexCmp);
+ llvm::sort(VRs, LexCmp);
// Transfer the results to the outgoing register ordering.
for (unsigned i = 0, n = VRs.size(); i < n; ++i)
RO.insert(std::make_pair(VRs[i], i));
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 0e33976a58ac..239cf49ca8a2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1011,10 +1011,9 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
/// the use of the hardware loop instruction.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
bool IsInnerHWLoop) const {
- const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
- LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Blocks[i];
+ LLVM_DEBUG(dbgs() << "\nhw_loop head, "
+ << printMBBReference(**L->block_begin()));
+ for (MachineBasicBlock *MBB : L->getBlocks()) {
for (MachineBasicBlock::iterator
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
const MachineInstr *MI = &*MII;
@@ -1368,11 +1367,10 @@ bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
const MachineOperand *MO,
LoopFeederMap &LoopFeederPhi) const {
if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
- const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
- LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
+ LLVM_DEBUG(dbgs() << "\nhw_loop head, "
+ << printMBBReference(**L->block_begin()));
// Ignore all BBs that form Loop.
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock *MBB : L->getBlocks()) {
if (A == MBB)
return false;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index efb4c2eb0fc3..470b05bda4c6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -127,8 +127,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
}
SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
+ MachineMemOperand *MemOp = LD->getMemOperand();
auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
-> MachineSDNode* {
@@ -159,7 +158,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
MVT::i32, MVT::Other, Base,
IncV, Chain);
- L->setMemRefs(MemOp, MemOp+1);
+ CurDAG->setNodeMemRefs(L, {MemOp});
To[1] = SDValue(L, 1); // Next address.
To[2] = SDValue(L, 2); // Chain.
// Handle special case for extension to i64.
@@ -170,7 +169,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
Base, Zero, Chain);
- L->setMemRefs(MemOp, MemOp+1);
+ CurDAG->setNodeMemRefs(L, {MemOp});
To[2] = SDValue(L, 1); // Chain.
MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
Base, IncV);
@@ -344,9 +343,8 @@ bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) {
FLI->second, dl, RTys,
{IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
- Res->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
+ CurDAG->setNodeMemRefs(Res, {MemOp});
ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
@@ -525,8 +523,7 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
}
SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = ST->getMemOperand();
+ MachineMemOperand *MemOp = ST->getMemOperand();
// Next address Chain
SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
@@ -537,14 +534,14 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
SDValue Ops[] = { Base, IncV, Value, Chain };
MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
Ops);
- S->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(S, {MemOp});
To[0] = SDValue(S, 0);
To[1] = SDValue(S, 1);
} else {
SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDValue Ops[] = { Base, Zero, Value, Chain };
MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
- S->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(S, {MemOp});
To[1] = SDValue(S, 0);
MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
Base, IncV);
@@ -1550,6 +1547,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
return true;
}
}
+ break;
}
default:
break;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 8aef9b4560d5..b796e442d4fa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -120,7 +120,7 @@ struct Coloring {
return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
}
- void dump() const;
+ LLVM_DUMP_METHOD void dump() const;
private:
ArrayRef<Node> Order;
@@ -267,7 +267,7 @@ bool Coloring::color() {
return true;
}
-LLVM_DUMP_METHOD
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Coloring::dump() const {
dbgs() << "{ Order: {";
for (unsigned I = 0; I != Order.size(); ++I) {
@@ -309,6 +309,7 @@ void Coloring::dump() const {
dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
dbgs() << " }\n}\n";
}
+#endif
namespace {
// Base class of for reordering networks. They don't strictly need to be
@@ -651,6 +652,7 @@ struct OpRef {
IndexBits = 28,
};
+ LLVM_DUMP_METHOD
void print(raw_ostream &OS, const SelectionDAG &G) const;
private:
@@ -663,7 +665,7 @@ struct NodeTemplate {
MVT Ty = MVT::Other;
std::vector<OpRef> Ops;
- void print(raw_ostream &OS, const SelectionDAG &G) const;
+ LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
};
struct ResultStack {
@@ -699,10 +701,12 @@ struct ResultStack {
BaseType List;
+ LLVM_DUMP_METHOD
void print(raw_ostream &OS, const SelectionDAG &G) const;
};
} // namespace
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
if (isValue()) {
OpV.getNode()->print(OS, &G);
@@ -752,6 +756,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
OS << '\n';
}
}
+#endif
namespace {
struct ShuffleMask {
@@ -1327,6 +1332,32 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
return vmuxp(Bytes, L, R, Results);
}
+namespace {
+ struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
+ template <typename T>
+ Deleter(SelectionDAG &D, T &C)
+ : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
+ C.erase(N);
+ }) {}
+ };
+
+ template <typename T>
+ struct NullifyingVector : public T {
+ DenseMap<SDNode*, SDNode**> Refs;
+ NullifyingVector(T &&V) : T(V) {
+ for (unsigned i = 0, e = T::size(); i != e; ++i) {
+ SDNode *&N = T::operator[](i);
+ Refs[N] = &N;
+ }
+ }
+ void erase(SDNode *N) {
+ auto F = Refs.find(N);
+ if (F != Refs.end())
+ *F->second = nullptr;
+ }
+ };
+}
+
bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
MVT ResTy, SDValue Va, SDValue Vb,
SDNode *N) {
@@ -1337,10 +1368,30 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
bool HavePairs = (2*HwLen == VecLen);
MVT SingleTy = getSingleVT(MVT::i8);
+ // The prior attempts to handle this shuffle may have left a bunch of
+ // dead nodes in the DAG (such as constants). These nodes will be added
+ // at the end of DAG's node list, which at that point had already been
+ // sorted topologically. In the main selection loop, the node list is
+ // traversed backwards from the root node, which means that any new
+ // nodes (from the end of the list) will not be visited.
+ // Scalarization will replace the shuffle node with the scalarized
+ // expression, and if that expression reused any if the leftoever (dead)
+ // nodes, these nodes would not be selected (since the "local" selection
+ // only visits nodes that are not in AllNodes).
+ // To avoid this issue, remove all dead nodes from the DAG now.
+ DAG.RemoveDeadNodes();
+ DenseSet<SDNode*> AllNodes;
+ for (SDNode &S : DAG.allnodes())
+ AllNodes.insert(&S);
+
+ Deleter DUA(DAG, AllNodes);
+
SmallVector<SDValue,128> Ops;
+ LLVMContext &Ctx = *DAG.getContext();
+ MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
for (int I : Mask) {
if (I < 0) {
- Ops.push_back(ISel.selectUndef(dl, ElemTy));
+ Ops.push_back(ISel.selectUndef(dl, LegalTy));
continue;
}
SDValue Vec;
@@ -1360,7 +1411,7 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
}
}
SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, {Vec, Idx});
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
SDValue L = Lower.LowerOperation(Ex, DAG);
assert(L.getNode());
Ops.push_back(L);
@@ -1384,32 +1435,55 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
assert(!N->use_empty());
ISel.ReplaceNode(N, LV.getNode());
- DAG.RemoveDeadNodes();
- std::deque<SDNode*> SubNodes;
- SubNodes.push_back(LV.getNode());
+ if (AllNodes.count(LV.getNode())) {
+ DAG.RemoveDeadNodes();
+ return true;
+ }
+
+ // The lowered build-vector node will now need to be selected. It needs
+ // to be done here because this node and its submodes are not included
+ // in the main selection loop.
+ // Implement essentially the same topological ordering algorithm as is
+ // used in SelectionDAGISel.
+
+ SetVector<SDNode*> SubNodes, TmpQ;
+ std::map<SDNode*,unsigned> NumOps;
+
+ SubNodes.insert(LV.getNode());
for (unsigned I = 0; I != SubNodes.size(); ++I) {
- for (SDValue Op : SubNodes[I]->ops())
- SubNodes.push_back(Op.getNode());
+ unsigned OpN = 0;
+ SDNode *S = SubNodes[I];
+ for (SDValue Op : S->ops()) {
+ if (AllNodes.count(Op.getNode()))
+ continue;
+ SubNodes.insert(Op.getNode());
+ ++OpN;
+ }
+ NumOps.insert({S, OpN});
+ if (OpN == 0)
+ TmpQ.insert(S);
}
- while (!SubNodes.empty()) {
- SDNode *S = SubNodes.front();
- SubNodes.pop_front();
- if (S->use_empty())
- continue;
- // This isn't great, but users need to be selected before any nodes that
- // they use. (The reason is to match larger patterns, and avoid nodes that
- // cannot be matched on their own, e.g. ValueType, TokenFactor, etc.).
- bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) {
- return llvm::any_of(SubNodes, [U](const SDNode *T) {
- return T == U;
- });
- });
- if (PendingUser)
- SubNodes.push_back(S);
- else
- ISel.Select(S);
+
+ for (unsigned I = 0; I != TmpQ.size(); ++I) {
+ SDNode *S = TmpQ[I];
+ for (SDNode *U : S->uses()) {
+ if (!SubNodes.count(U))
+ continue;
+ auto F = NumOps.find(U);
+ assert(F != NumOps.end());
+ assert(F->second > 0);
+ if (!--F->second)
+ TmpQ.insert(F->first);
+ }
}
+ assert(SubNodes.size() == TmpQ.size());
+ NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
+
+ Deleter DUQ(DAG, Queue);
+ for (SDNode *S : reverse(Queue))
+ if (S != nullptr)
+ ISel.Select(S);
DAG.RemoveDeadNodes();
return true;
@@ -2048,10 +2122,6 @@ void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
}
void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
- if (!HST->usePackets()) {
- report_fatal_error("Support for gather requires packets, "
- "which are disabled");
- }
const SDLoc &dl(N);
SDValue Chain = N->getOperand(0);
SDValue Address = N->getOperand(2);
@@ -2083,18 +2153,13 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain };
SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
ReplaceNode(N, Result);
}
void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
- if (!HST->usePackets()) {
- report_fatal_error("Support for gather requires packets, "
- "which are disabled");
- }
const SDLoc &dl(N);
SDValue Chain = N->getOperand(0);
SDValue Address = N->getOperand(2);
@@ -2125,9 +2190,8 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
SDValue Ops[] = { Address, Base, Modifier, Offset, Chain };
SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
ReplaceNode(N, Result);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 604d84994b6c..1edf3e498dfa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -151,16 +152,6 @@ static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
#include "HexagonGenCallingConv.inc"
-void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
- if (VT != PromotedLdStVT) {
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType(ISD::LOAD, VT, PromotedLdStVT);
-
- setOperationAction(ISD::STORE, VT, Promote);
- AddPromotedToType(ISD::STORE, VT, PromotedLdStVT);
- }
-}
-
SDValue
HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
const {
@@ -250,6 +241,18 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return true;
}
+unsigned HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
+ // Just support r19, the linux kernel uses it.
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("r19", Hexagon::R19)
+ .Default(0);
+ if (Reg)
+ return Reg;
+
+ report_fatal_error("Invalid register name global variable");
+}
+
/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers. This assumes that
/// Chain/Glue are the input chain/glue to use, and that TheCall is the call
@@ -1225,7 +1228,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const HexagonSubtarget &ST)
: TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
Subtarget(ST) {
- bool IsV4 = !Subtarget.hasV5Ops();
auto &HRI = *Subtarget.getRegisterInfo();
setPrefLoopAlignment(4);
@@ -1267,10 +1269,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
- if (Subtarget.hasV5Ops()) {
- addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
- }
+ addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
//
// Handling of scalar operations.
@@ -1284,21 +1284,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// which default to "expand" for at least one type.
// Misc operations.
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
-
- setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
- setOperationAction(ISD::JumpTable, MVT::i32, Custom);
- setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
- setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
- setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
- setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom legalize GlobalAddress nodes into CONST32.
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@@ -1348,8 +1348,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ, MVT::i8, Promote);
setOperationAction(ISD::CTTZ, MVT::i16, Promote);
- // In V5, popcount can count # of 1s in i64 but returns i32.
- // On V4 it will be expanded (set later).
+ // Popcount can count # of 1s in i64 but returns i32.
setOperationAction(ISD::CTPOP, MVT::i8, Promote);
setOperationAction(ISD::CTPOP, MVT::i16, Promote);
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
@@ -1360,6 +1359,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
setOperationAction(ISD::BSWAP, MVT::i64, Legal);
+ setOperationAction(ISD::FSHL, MVT::i32, Legal);
+ setOperationAction(ISD::FSHL, MVT::i64, Legal);
+ setOperationAction(ISD::FSHR, MVT::i32, Legal);
+ setOperationAction(ISD::FSHR, MVT::i64, Legal);
+
for (unsigned IntExpOp :
{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
@@ -1403,12 +1407,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Handling of vector operations.
//
- promoteLdStType(MVT::v4i8, MVT::i32);
- promoteLdStType(MVT::v2i16, MVT::i32);
- promoteLdStType(MVT::v8i8, MVT::i64);
- promoteLdStType(MVT::v4i16, MVT::i64);
- promoteLdStType(MVT::v2i32, MVT::i64);
-
// Set the action for vector operations to "expand", then override it with
// either "custom" or "legal" for specific cases.
static const unsigned VectExpOps[] = {
@@ -1488,9 +1486,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
}
// Custom lower unaligned loads.
- for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
- MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
- setOperationAction(ISD::LOAD, VecVT, Custom);
+ // Also, for both loads and stores, verify the alignment of the address
+ // in case it is a compile-time constant. This is a usability feature to
+ // provide a meaningful error message to users.
+ for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
+ MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
}
for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
@@ -1508,63 +1510,27 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
- // Subtarget-specific operation actions.
- //
- if (Subtarget.hasV60Ops()) {
- setOperationAction(ISD::ROTL, MVT::i32, Custom);
- setOperationAction(ISD::ROTL, MVT::i64, Custom);
- }
- if (Subtarget.hasV5Ops()) {
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FADD, MVT::f64, Expand);
- setOperationAction(ISD::FSUB, MVT::f64, Expand);
- setOperationAction(ISD::FMUL, MVT::f64, Expand);
-
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- } else { // V4
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i8, Expand);
- setOperationAction(ISD::CTPOP, MVT::i16, Expand);
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-
- // Expand these operations for both f32 and f64:
- for (unsigned FPExpOpV4 :
- {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
- setOperationAction(FPExpOpV4, MVT::f32, Expand);
- setOperationAction(FPExpOpV4, MVT::f64, Expand);
- }
-
- for (ISD::CondCode FPExpCCV4 :
- {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
- ISD::SETUO, ISD::SETO}) {
- setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
- setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
- }
- }
+ // V5+.
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
// Handling of indexed loads/stores: default is "expand".
//
@@ -1574,6 +1540,19 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setIndexedStoreAction(ISD::POST_INC, VT, Legal);
}
+ // Subtarget-specific operation actions.
+ //
+ if (Subtarget.hasV60Ops()) {
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTL, MVT::i64, Legal);
+ setOperationAction(ISD::ROTR, MVT::i32, Legal);
+ setOperationAction(ISD::ROTR, MVT::i64, Legal);
+ }
+ if (Subtarget.hasV66Ops()) {
+ setOperationAction(ISD::FADD, MVT::f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::f64, Legal);
+ }
+
if (Subtarget.useHVXOps())
initializeHVXLowering();
@@ -1600,42 +1579,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
- if (IsV4) {
- // Handle single-precision floating point operations on V4.
- if (FastMath) {
- setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
- setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
- setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
- setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
- setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
- // Double-precision compares.
- setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
- setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
- } else {
- setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
- setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
- setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
- setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
- setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
- // Double-precision compares.
- setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
- setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
- }
- }
-
// This is the only fast library function for sqrtd.
if (FastMath)
setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
// Prefix is: nothing for "slow-math",
- // "fast2_" for V4 fast-math and V5+ fast-math double-precision
+ // "fast2_" for V5+ fast-math double-precision
// (actually, keep fast-math and fast-math2 separate for now)
if (FastMath) {
setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
- // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
} else {
setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
@@ -1645,44 +1600,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
}
- if (Subtarget.hasV5Ops()) {
- if (FastMath)
- setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
- else
- setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
- } else {
- // V4
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
- setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
- setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
- setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
- setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
- setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
- setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
- setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
- setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
- setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
- setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
- setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
- setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
- setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
- }
+ if (FastMath)
+ setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
+ else
+ setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
// These cause problems when the shift amount is non-constant.
setLibcallName(RTLIB::SHL_I128, nullptr);
@@ -1738,6 +1659,26 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
+void
+HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
+ unsigned NeedAlign) const {
+ auto *CA = dyn_cast<ConstantSDNode>(Ptr);
+ if (!CA)
+ return;
+ unsigned Addr = CA->getZExtValue();
+ unsigned HaveAlign = Addr != 0 ? 1u << countTrailingZeros(Addr) : NeedAlign;
+ if (HaveAlign < NeedAlign) {
+ std::string ErrMsg;
+ raw_string_ostream O(ErrMsg);
+ O << "Misaligned constant address: " << format_hex(Addr, 10)
+ << " has alignment " << HaveAlign
+ << ", but the memory access requires " << NeedAlign;
+ if (DebugLoc DL = dl.getDebugLoc())
+ DL.print(O << ", at ");
+ report_fatal_error(O.str());
+ }
+}
+
// Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
// intrinsic.
static bool isBrevLdIntrinsic(const Value *Inst) {
@@ -1834,11 +1775,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// The intrinsic function call is of the form { ElTy, i8* }
// @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
// should be derived from ElTy.
- PointerType *PtrTy = I.getCalledFunction()
- ->getReturnType()
- ->getContainedType(0)
- ->getPointerTo();
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
+ Info.memVT = MVT::getVT(ElTy);
llvm::Value *BasePtrVal = I.getOperand(0);
Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
// The offset value comes through Modifier register. For now, assume the
@@ -1904,12 +1842,12 @@ bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
}
TargetLoweringBase::LegalizeTypeAction
-HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
+HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT.getVectorNumElements() == 1)
return TargetLoweringBase::TypeScalarizeVector;
// Always widen vectors of i1.
- MVT ElemTy = VT.getSimpleVT().getVectorElementType();
+ MVT ElemTy = VT.getVectorElementType();
if (ElemTy == MVT::i1)
return TargetLoweringBase::TypeWidenVector;
@@ -2341,8 +2279,9 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
// Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
// position 0.
assert(ty(IdxV) == MVT::i32);
+ unsigned VecRep = 8 / VecWidth;
SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
- DAG.getConstant(8*Scale, dl, MVT::i32));
+ DAG.getConstant(8*VecRep, dl, MVT::i32));
SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
while (Scale > 1) {
@@ -2643,12 +2582,37 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
}
SDValue
+HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ unsigned ClaimAlign = LN->getAlignment();
+ validateConstPtrAlignment(LN->getBasePtr(), SDLoc(Op), ClaimAlign);
+ // Call LowerUnalignedLoad for all loads, it recognizes loads that
+ // don't need extra aligning.
+ return LowerUnalignedLoad(Op, DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ unsigned ClaimAlign = SN->getAlignment();
+ SDValue Ptr = SN->getBasePtr();
+ const SDLoc &dl(Op);
+ validateConstPtrAlignment(Ptr, dl, ClaimAlign);
+
+ MVT StoreTy = SN->getMemoryVT().getSimpleVT();
+ unsigned NeedAlign = Subtarget.getTypeAlignment(StoreTy);
+ if (ClaimAlign < NeedAlign)
+ return expandUnalignedStore(SN, DAG);
+ return Op;
+}
+
+SDValue
HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
const {
LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
- unsigned HaveAlign = LN->getAlignment();
MVT LoadTy = ty(Op);
unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
+ unsigned HaveAlign = LN->getAlignment();
if (HaveAlign >= NeedAlign)
return Op;
@@ -2802,7 +2766,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::BITCAST: return LowerBITCAST(Op, DAG);
- case ISD::LOAD: return LowerUnalignedLoad(Op, DAG);
+ case ISD::LOAD: return LowerLoad(Op, DAG);
+ case ISD::STORE: return LowerStore(Op, DAG);
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG);
case ISD::SRA:
@@ -2834,6 +2799,19 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
void
+HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ // We are only custom-lowering stores to verify the alignment of the
+ // address if it is a compile-time constant. Since a store can be modified
+ // during type-legalization (the value being stored may need legalization),
+ // return empty Results here to indicate that we don't really make any
+ // changes in the custom lowering.
+ if (N->getOpcode() != ISD::STORE)
+ return TargetLowering::LowerOperationWrapper(N, Results, DAG);
+}
+
+void
HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -2946,7 +2924,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- return Subtarget.hasV5Ops();
+ return true;
}
/// isLegalAddressingMode - Return true if the addressing mode represented by
@@ -3110,6 +3088,25 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
return TargetLowering::findRepresentativeClass(TRI, VT);
}
+bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
+ ISD::LoadExtType ExtTy, EVT NewVT) const {
+ // TODO: This may be worth removing. Check regression tests for diffs.
+ if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
+ return false;
+
+ auto *L = cast<LoadSDNode>(Load);
+ std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
+ // Small-data object, do not shrink.
+ if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
+ return false;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
+ auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
+ const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
+ return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
+ }
+ return true;
+}
+
Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
BasicBlock *BB = Builder.GetInsertBlock();
@@ -3154,9 +3151,12 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
}
-bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
- AtomicCmpXchgInst *AI) const {
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *AI) const {
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
- return Size >= 4 && Size <= 8;
+ if (Size >= 4 && Size <= 8)
+ return AtomicExpansionKind::LLSC;
+ return AtomicExpansionKind::None;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3d94bd1ff6ed..265c37e6ae61 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -101,7 +101,6 @@ namespace HexagonISD {
bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
const;
- void promoteLdStType(MVT VT, MVT PromotedLdStVT);
public:
explicit HexagonTargetLowering(const TargetMachine &TM,
@@ -142,10 +141,12 @@ namespace HexagonISD {
unsigned DefinedValues) const override;
bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
- TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
@@ -164,6 +165,8 @@ namespace HexagonISD {
SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
@@ -220,6 +223,9 @@ namespace HexagonISD {
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
+
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
unsigned
@@ -298,6 +304,9 @@ namespace HexagonISD {
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
const override;
+ bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
+ EVT NewVT) const override;
+
// Handling of atomic RMW instructions.
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
@@ -305,7 +314,8 @@ namespace HexagonISD {
Value *Addr, AtomicOrdering Ord) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+ AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
@@ -314,6 +324,9 @@ namespace HexagonISD {
private:
void initializeHVXLowering();
+ void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
+ unsigned NeedAlign) const;
+
std::pair<SDValue,int> getBaseAndOffset(SDValue Addr) const;
bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 2566194ca9c6..a6400b5d8266 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -400,6 +400,76 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
MachinePointerInfo::getConstantPool(MF), Align);
}
+ // A special case is a situation where the vector is built entirely from
+ // elements extracted from another vector. This could be done via a shuffle
+ // more efficiently, but typically, the size of the source vector will not
+ // match the size of the vector being built (which precludes the use of a
+ // shuffle directly).
+ // This only handles a single source vector, and the vector being built
+ // should be of a sub-vector type of the source vector type.
+ auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
+ SmallVectorImpl<int> &SrcIdx) {
+ SDValue Vec;
+ for (SDValue V : Values) {
+ if (isUndef(V)) {
+ SrcIdx.push_back(-1);
+ continue;
+ }
+ if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+ // All extracts should come from the same vector.
+ SDValue T = V.getOperand(0);
+ if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
+ return false;
+ Vec = T;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (C == nullptr)
+ return false;
+ int I = C->getSExtValue();
+ assert(I >= 0 && "Negative element index");
+ SrcIdx.push_back(I);
+ }
+ SrcVec = Vec;
+ return true;
+ };
+
+ SmallVector<int,128> ExtIdx;
+ SDValue ExtVec;
+ if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
+ MVT ExtTy = ty(ExtVec);
+ unsigned ExtLen = ExtTy.getVectorNumElements();
+ if (ExtLen == VecLen || ExtLen == 2*VecLen) {
+ // Construct a new shuffle mask that will produce a vector with the same
+ // number of elements as the input vector, and such that the vector we
+ // want will be the initial subvector of it.
+ SmallVector<int,128> Mask;
+ BitVector Used(ExtLen);
+
+ for (int M : ExtIdx) {
+ Mask.push_back(M);
+ if (M >= 0)
+ Used.set(M);
+ }
+ // Fill the rest of the mask with the unused elements of ExtVec in hopes
+ // that it will result in a permutation of ExtVec's elements. It's still
+ // fine if it doesn't (e.g. if undefs are present, or elements are
+ // repeated), but permutations can always be done efficiently via vdelta
+ // and vrdelta.
+ for (unsigned I = 0; I != ExtLen; ++I) {
+ if (Mask.size() == ExtLen)
+ break;
+ if (!Used.test(I))
+ Mask.push_back(I);
+ }
+
+ SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
+ DAG.getUNDEF(ExtTy), Mask);
+ if (ExtLen == VecLen)
+ return S;
+ return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S);
+ }
+ }
+
// Construct two halves in parallel, then or them together.
assert(4*Words.size() == Subtarget.getVectorLength());
SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
@@ -1356,7 +1426,8 @@ SDValue
HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
// Sign- and zero-extends are legal.
assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
- return DAG.getZeroExtendVectorInReg(Op.getOperand(0), SDLoc(Op), ty(Op));
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
+ Op.getOperand(0));
}
SDValue
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index 1bb3bc1ea31b..2236140d5dd7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -69,101 +69,101 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// Instruction type according to the ISA.
IType Type = type;
- let TSFlags{5-0} = Type.Value;
+ let TSFlags{6-0} = Type.Value;
// Solo instructions, i.e., those that cannot be in a packet with others.
bits<1> isSolo = 0;
- let TSFlags{6} = isSolo;
+ let TSFlags{7} = isSolo;
// Packed only with A or X-type instructions.
bits<1> isSoloAX = 0;
- let TSFlags{7} = isSoloAX;
+ let TSFlags{8} = isSoloAX;
// Restricts slot 1 to ALU-only instructions.
bits<1> isRestrictSlot1AOK = 0;
- let TSFlags{8} = isRestrictSlot1AOK;
+ let TSFlags{9} = isRestrictSlot1AOK;
// Predicated instructions.
bits<1> isPredicated = 0;
- let TSFlags{9} = isPredicated;
+ let TSFlags{10} = isPredicated;
bits<1> isPredicatedFalse = 0;
- let TSFlags{10} = isPredicatedFalse;
+ let TSFlags{11} = isPredicatedFalse;
bits<1> isPredicatedNew = 0;
- let TSFlags{11} = isPredicatedNew;
+ let TSFlags{12} = isPredicatedNew;
bits<1> isPredicateLate = 0;
- let TSFlags{12} = isPredicateLate; // Late predicate producer insn.
+ let TSFlags{13} = isPredicateLate; // Late predicate producer insn.
// New-value insn helper fields.
bits<1> isNewValue = 0;
- let TSFlags{13} = isNewValue; // New-value consumer insn.
+ let TSFlags{14} = isNewValue; // New-value consumer insn.
bits<1> hasNewValue = 0;
- let TSFlags{14} = hasNewValue; // New-value producer insn.
+ let TSFlags{15} = hasNewValue; // New-value producer insn.
bits<3> opNewValue = 0;
- let TSFlags{17-15} = opNewValue; // New-value produced operand.
+ let TSFlags{18-16} = opNewValue; // New-value produced operand.
bits<1> isNVStorable = 0;
- let TSFlags{18} = isNVStorable; // Store that can become new-value store.
+ let TSFlags{19} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{19} = isNVStore; // New-value store insn.
+ let TSFlags{20} = isNVStore; // New-value store insn.
bits<1> isCVLoadable = 0;
- let TSFlags{20} = isCVLoadable; // Load that can become cur-value load.
+ let TSFlags{21} = isCVLoadable; // Load that can become cur-value load.
bits<1> isCVLoad = 0;
- let TSFlags{21} = isCVLoad; // Cur-value load insn.
+ let TSFlags{22} = isCVLoad; // Cur-value load insn.
// Immediate extender helper fields.
bits<1> isExtendable = 0;
- let TSFlags{22} = isExtendable; // Insn may be extended.
+ let TSFlags{23} = isExtendable; // Insn may be extended.
bits<1> isExtended = 0;
- let TSFlags{23} = isExtended; // Insn must be extended.
+ let TSFlags{24} = isExtended; // Insn must be extended.
bits<3> opExtendable = 0;
- let TSFlags{26-24} = opExtendable; // Which operand may be extended.
+ let TSFlags{27-25} = opExtendable; // Which operand may be extended.
bits<1> isExtentSigned = 0;
- let TSFlags{27} = isExtentSigned; // Signed or unsigned range.
+ let TSFlags{28} = isExtentSigned; // Signed or unsigned range.
bits<5> opExtentBits = 0;
- let TSFlags{32-28} = opExtentBits; //Number of bits of range before extending.
+ let TSFlags{33-29} = opExtentBits; //Number of bits of range before extending.
bits<2> opExtentAlign = 0;
- let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending.
+ let TSFlags{35-34} = opExtentAlign; // Alignment exponent before extending.
bit cofMax1 = 0;
- let TSFlags{35} = cofMax1;
+ let TSFlags{36} = cofMax1;
bit cofRelax1 = 0;
- let TSFlags{36} = cofRelax1;
+ let TSFlags{37} = cofRelax1;
bit cofRelax2 = 0;
- let TSFlags{37} = cofRelax2;
+ let TSFlags{38} = cofRelax2;
bit isRestrictNoSlot1Store = 0;
- let TSFlags{38} = isRestrictNoSlot1Store;
+ let TSFlags{39} = isRestrictNoSlot1Store;
// Addressing mode for load/store instructions.
AddrModeType addrMode = NoAddrMode;
- let TSFlags{43-41} = addrMode.Value;
+ let TSFlags{44-42} = addrMode.Value;
// Memory access size for mem access instructions (load/store)
MemAccessSize accessSize = NoMemAccess;
- let TSFlags{47-44} = accessSize.Value;
+ let TSFlags{48-45} = accessSize.Value;
bits<1> isTaken = 0;
- let TSFlags {48} = isTaken; // Branch prediction.
+ let TSFlags {49} = isTaken; // Branch prediction.
bits<1> isFP = 0;
- let TSFlags {49} = isFP; // Floating-point.
+ let TSFlags {50} = isFP; // Floating-point.
bits<1> isSomeOK = 0;
- let TSFlags {50} = isSomeOK; // Relax some grouping constraints.
+ let TSFlags {51} = isSomeOK; // Relax some grouping constraints.
bits<1> hasNewValue2 = 0;
- let TSFlags{51} = hasNewValue2; // Second New-value producer insn.
+ let TSFlags{52} = hasNewValue2; // Second New-value producer insn.
bits<3> opNewValue2 = 0;
- let TSFlags{54-52} = opNewValue2; // Second New-value produced operand.
+ let TSFlags{55-53} = opNewValue2; // Second New-value produced operand.
bits<1> isAccumulator = 0;
- let TSFlags{55} = isAccumulator;
+ let TSFlags{56} = isAccumulator;
bits<1> prefersSlot3 = 0;
- let TSFlags{56} = prefersSlot3; // Complex XU
+ let TSFlags{57} = prefersSlot3; // Complex XU
bits<1> hasTmpDst = 0;
- let TSFlags{59} = hasTmpDst; // v65 : 'fake" register VTMP is set
+ let TSFlags{60} = hasTmpDst; // v65 : 'fake" register VTMP is set
bit CVINew = 0;
- let TSFlags{61} = CVINew;
+ let TSFlags{62} = CVINew;
// Fields used for relation models.
bit isNonTemporal = 0;
@@ -194,8 +194,6 @@ class HInst<dag outs, dag ins, string asmstr, InstrItinClass itin, IType type> :
// Instruction Classes Definitions +
//===----------------------------------------------------------------------===//
-// LD Instruction Class in V2/V3/V4.
-// Definition of the instruction class NOT CHANGED.
let mayLoad = 1 in
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
@@ -205,9 +203,6 @@ class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
-// ST Instruction Class in V2/V3 can take SLOT0 only.
-// ST Instruction Class in V4 can take SLOT0 & SLOT1.
-// Definition of the instruction class CHANGED from V2/V3 to V4.
let mayStore = 1 in
class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
@@ -235,15 +230,6 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Instruction Classes Definitions -
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// V4 Instruction Format Definitions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrFormatsV4.td"
-
-//===----------------------------------------------------------------------===//
-// V60+ Instruction Format Definitions +
-//===----------------------------------------------------------------------===//
-
+include "HexagonInstrFormatsV5.td"
include "HexagonInstrFormatsV60.td"
include "HexagonInstrFormatsV65.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
index c5fa25995212..c8de5cbcc1e0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
@@ -1,4 +1,4 @@
-//==- HexagonInstrFormatsV4.td - Hexagon Instruction Formats --*- tablegen -==//
+//==- HexagonInstrFormatsV5.td - Hexagon Instruction Formats --*- tablegen -==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file describes the Hexagon V4 instruction classes in TableGen format.
+// This file describes the Hexagon V5 instruction classes in TableGen format.
//
//===----------------------------------------------------------------------===//
@@ -49,39 +49,39 @@ class InstDuplex<bits<4> iClass, list<dag> pattern = [],
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
- let TSFlags{5-0} = Type.Value;
+ let TSFlags{6-0} = Type.Value;
// Predicated instructions.
bits<1> isPredicated = 0;
- let TSFlags{6} = isPredicated;
+ let TSFlags{7} = isPredicated;
bits<1> isPredicatedFalse = 0;
- let TSFlags{7} = isPredicatedFalse;
+ let TSFlags{8} = isPredicatedFalse;
bits<1> isPredicatedNew = 0;
- let TSFlags{8} = isPredicatedNew;
+ let TSFlags{9} = isPredicatedNew;
// New-value insn helper fields.
bits<1> isNewValue = 0;
- let TSFlags{9} = isNewValue; // New-value consumer insn.
+ let TSFlags{10} = isNewValue; // New-value consumer insn.
bits<1> hasNewValue = 0;
- let TSFlags{10} = hasNewValue; // New-value producer insn.
+ let TSFlags{11} = hasNewValue; // New-value producer insn.
bits<3> opNewValue = 0;
- let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ let TSFlags{14-12} = opNewValue; // New-value produced operand.
bits<1> isNVStorable = 0;
- let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+ let TSFlags{15} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{15} = isNVStore; // New-value store insn.
+ let TSFlags{16} = isNVStore; // New-value store insn.
// Immediate extender helper fields.
bits<1> isExtendable = 0;
- let TSFlags{16} = isExtendable; // Insn may be extended.
+ let TSFlags{17} = isExtendable; // Insn may be extended.
bits<1> isExtended = 0;
- let TSFlags{17} = isExtended; // Insn must be extended.
+ let TSFlags{18} = isExtended; // Insn must be extended.
bits<3> opExtendable = 0;
- let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+ let TSFlags{21-19} = opExtendable; // Which operand may be extended.
bits<1> isExtentSigned = 0;
- let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+ let TSFlags{22} = isExtentSigned; // Signed or unsigned range.
bits<5> opExtentBits = 0;
- let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+ let TSFlags{27-23} = opExtentBits; //Number of bits of range before extending.
bits<2> opExtentAlign = 0;
- let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+ let TSFlags{29-28} = opExtentAlign; // Alignment exponent before extending.
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 6019c7c5d024..de0d6c4d9e4e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -335,37 +335,37 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
/// This function checks if the instruction or bundle of instructions
/// has load from stack slot and returns frameindex and machine memory
/// operand of that instruction if true.
-bool HexagonInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
+bool HexagonInstrInfo::hasLoadFromStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
if (MI.isBundle()) {
const MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
- if (TargetInstrInfo::hasLoadFromStackSlot(*MII, MMO, FrameIndex))
+ if (TargetInstrInfo::hasLoadFromStackSlot(*MII, Accesses))
return true;
return false;
}
- return TargetInstrInfo::hasLoadFromStackSlot(MI, MMO, FrameIndex);
+ return TargetInstrInfo::hasLoadFromStackSlot(MI, Accesses);
}
/// This function checks if the instruction or bundle of instructions
/// has store to stack slot and returns frameindex and machine memory
/// operand of that instruction if true.
-bool HexagonInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
+bool HexagonInstrInfo::hasStoreToStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
if (MI.isBundle()) {
const MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
- if (TargetInstrInfo::hasStoreToStackSlot(*MII, MMO, FrameIndex))
+ if (TargetInstrInfo::hasStoreToStackSlot(*MII, Accesses))
return true;
return false;
}
- return TargetInstrInfo::hasStoreToStackSlot(MI, MMO, FrameIndex);
+ return TargetInstrInfo::hasStoreToStackSlot(MI, Accesses);
}
/// This function can analyze one/two way branching only and should (mostly) be
@@ -1086,19 +1086,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned NewOpc = Aligned ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32Ub_ai;
unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
- MachineInstr *MI1New =
- BuildMI(MBB, MI, DL, get(NewOpc))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(SrcSubLo)
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(SrcSubLo)
+ .cloneMemRefs(MI);
MI1New->getOperand(0).setIsKill(false);
BuildMI(MBB, MI, DL, get(NewOpc))
.add(MI.getOperand(0))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(1).getImm() + Offset)
.addReg(SrcSubHi)
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1111,15 +1110,15 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc),
HRI.getSubReg(DstReg, Hexagon::vsub_lo))
- .add(MI.getOperand(1))
- .addImm(MI.getOperand(2).getImm())
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
+ .cloneMemRefs(MI);
MI1New->getOperand(1).setIsKill(false);
BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi))
.add(MI.getOperand(1))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(2).getImm() + Offset)
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1294,7 +1293,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
.add(Op0)
.addReg(PReg, S)
- .add(Op1)
.addReg(SrcHi)
.addReg(SrcLo);
if (IsDestLive)
@@ -1342,81 +1340,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(Hexagon::J2_jumprfnew));
return true;
- case Hexagon::V6_vgathermh_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermw_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermhw_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermhq_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermwq_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermhwq_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
case Hexagon::PS_loadrub_pci:
return RealCirc(Hexagon::L2_loadrub_pci, /*HasImm*/true, /*MxOp*/4);
case Hexagon::PS_loadrb_pci:
@@ -1466,6 +1389,93 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return false;
}
+MachineBasicBlock::instr_iterator
+HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const {
+ MachineBasicBlock &MBB = *MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ unsigned Opc = MI.getOpcode();
+ MachineBasicBlock::iterator First;
+
+ switch (Opc) {
+ case Hexagon::V6_vgathermh_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+ }
+
+ return MI.getIterator();
+}
+
// We indicate that we want to reverse the branch by
// inserting the reversed branching opcode.
bool HexagonInstrInfo::reverseBranchCondition(
@@ -2883,14 +2893,15 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
}
/// Get the base register and byte offset of a load/store instr.
-bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
- unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI)
- const {
+bool HexagonInstrInfo::getMemOperandWithOffset(
+ MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
unsigned AccessSize = 0;
- int OffsetVal = 0;
- BaseReg = getBaseAndOffset(LdSt, OffsetVal, AccessSize);
- Offset = OffsetVal;
- return BaseReg != 0;
+ BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
+ assert((!BaseOp || BaseOp->isReg()) &&
+ "getMemOperandWithOffset only supports base "
+ "operands of type register.");
+ return BaseOp != nullptr;
}
/// Can these instructions execute at the same time in a bundle.
@@ -3097,21 +3108,22 @@ unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const {
// Returns the base register in a memory access (load/store). The offset is
// returned in Offset and the access size is returned in AccessSize.
-// If the base register has a subregister or the offset field does not contain
-// an immediate value, return 0.
-unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
- int &Offset, unsigned &AccessSize) const {
+// If the base operand has a subregister or the offset field does not contain
+// an immediate value, return nullptr.
+MachineOperand *HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
+ int64_t &Offset,
+ unsigned &AccessSize) const {
// Return if it is not a base+offset type instruction or a MemOp.
if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
getAddrMode(MI) != HexagonII::BaseLongOffset &&
!isMemOp(MI) && !isPostIncrement(MI))
- return 0;
+ return nullptr;
AccessSize = getMemAccessSize(MI);
unsigned BasePos = 0, OffsetPos = 0;
if (!getBaseAndOffsetPosition(MI, BasePos, OffsetPos))
- return 0;
+ return nullptr;
// Post increment updates its EA after the mem access,
// so we need to treat its offset as zero.
@@ -3120,14 +3132,14 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
} else {
const MachineOperand &OffsetOp = MI.getOperand(OffsetPos);
if (!OffsetOp.isImm())
- return 0;
+ return nullptr;
Offset = OffsetOp.getImm();
}
const MachineOperand &BaseOp = MI.getOperand(BasePos);
if (BaseOp.getSubReg() != 0)
- return 0;
- return BaseOp.getReg();
+ return nullptr;
+ return &const_cast<MachineOperand&>(BaseOp);
}
/// Return the position of the base and offset operands for this instruction.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 96b4ffaba02f..9b840762e88a 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -69,16 +69,16 @@ public:
/// Check if the instruction or the bundle of instructions has
/// load from stack slots. Return the frameindex and machine memory operand
/// if true.
- bool hasLoadFromStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const override;
+ bool hasLoadFromStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const override;
/// Check if the instruction or the bundle of instructions has
/// store to stack slots. Return the frameindex and machine memory operand
/// if true.
- bool hasStoreToStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const override;
+ bool hasStoreToStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const override;
/// Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
@@ -216,9 +216,9 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
/// Get the base register and byte offset of a load/store instr.
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const override;
/// Reverses the branch condition of the specified condition list,
/// returning false on success and true if it cannot be reversed.
@@ -436,8 +436,8 @@ public:
bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
unsigned getAddrMode(const MachineInstr &MI) const;
- unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
- unsigned &AccessSize) const;
+ MachineOperand *getBaseAndOffset(const MachineInstr &MI, int64_t &Offset,
+ unsigned &AccessSize) const;
SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
unsigned getCExtOpNum(const MachineInstr &MI) const;
HexagonII::CompoundGroup
@@ -472,6 +472,8 @@ public:
uint64_t getType(const MachineInstr &MI) const;
unsigned getUnits(const MachineInstr &MI) const;
+ MachineBasicBlock::instr_iterator expandVGatherPseudo(MachineInstr &MI) const;
+
/// getInstrTimingClassLatency - Compute the instruction latency of a given
/// instruction using Timing Class information, if available.
unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index b25e316709c5..9cab5748bef2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -6,726 +6,78 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-// This is populated based on the following specs:
-// Hexagon V2 Architecture
-// Application-Level Specification
-// 80-V9418-8 Rev. B
-// March 4, 2008
-//===----------------------------------------------------------------------===//
-class T_I_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID imm:$Is),
- (MI imm:$Is)>;
+// These intrinsic patterns are not auto-generated.
class T_R_pat <InstHexagon MI, Intrinsic IntID>
: Pat <(IntID I32:$Rs),
(MI I32:$Rs)>;
-class T_P_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs),
- (MI I64:$Rs)>;
-
-class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
- : Pat<(IntID Imm1:$Is, Imm2:$It),
- (MI Imm1:$Is, Imm2:$It)>;
-
-class T_RI_pat <InstHexagon MI, Intrinsic IntID,
- PatLeaf ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID I32:$Rs, ImmPred:$It),
- (MI I32:$Rs, ImmPred:$It)>;
-
-class T_IR_pat <InstHexagon MI, Intrinsic IntID,
- PatFrag ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID ImmPred:$Is, I32:$Rt),
- (MI ImmPred:$Is, I32:$Rt)>;
-
-class T_PI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID I64:$Rs, imm:$It),
- (MI I64:$Rs, imm:$It)>;
-
-class T_RP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID I32:$Rs, I64:$Rt),
- (MI I32:$Rs, I64:$Rt)>;
-
class T_RR_pat <InstHexagon MI, Intrinsic IntID>
: Pat <(IntID I32:$Rs, I32:$Rt),
(MI I32:$Rs, I32:$Rt)>;
-class T_PP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt),
- (MI I64:$Rs, I64:$Rt)>;
-
-class T_QQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, I32:$Rt),
- (MI (C2_tfrrp I32:$Rs), (C2_tfrrp I32:$Rt))>;
-
-class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
- : Pat <(IntID I32:$Rp, Imm1:$Is, Imm2:$It),
- (MI (C2_tfrrp I32:$Rp), Imm1:$Is, Imm2:$It)>;
-
-class T_QRR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp, I32:$Rs, I32:$Rt),
- (MI (C2_tfrrp I32:$Rp), I32:$Rs, I32:$Rt)>;
-
-class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
- : Pat <(IntID I32:$Rp, I32:$Rs, ImmPred:$Is),
- (MI (C2_tfrrp I32:$Rp), I32:$Rs, ImmPred:$Is)>;
-
-class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
- : Pat <(IntID I32:$Rp, ImmPred:$Is, I32:$Rs),
- (MI (C2_tfrrp I32:$Rp), ImmPred:$Is, I32:$Rs)>;
-
-class T_QPP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp, I64:$Rs, I64:$Rt),
- (MI (C2_tfrrp I32:$Rp), I64:$Rs, I64:$Rt)>;
-
-class T_RRI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu),
- (MI I32:$Rs, I32:$Rt, imm:$Iu)>;
-
-class T_RII_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu),
- (MI I32:$Rs, imm:$It, imm:$Iu)>;
-
-class T_IRI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu),
- (MI imm:$It, I32:$Rs, imm:$Iu)>;
-
-class T_IRR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt),
- (MI imm:$Is, I32:$Rs, I32:$Rt)>;
-
-class T_RIR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt),
- (MI I32:$Rs, imm:$Is, I32:$Rt)>;
-
-class T_RRR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru),
- (MI I32:$Rs, I32:$Rt, I32:$Ru)>;
-
-class T_PPI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu),
- (MI I64:$Rs, I64:$Rt, imm:$Iu)>;
-
-class T_PII_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu),
- (MI I64:$Rs, imm:$It, imm:$Iu)>;
-
-class T_PPP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru),
- (MI I64:$Rs, I64:$Rt, I64:$Ru)>;
-
-class T_PPR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru),
- (MI I64:$Rs, I64:$Rt, I32:$Ru)>;
-
-class T_PRR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru),
- (MI I64:$Rs, I32:$Rt, I32:$Ru)>;
-
-class T_PPQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Rp),
- (MI I64:$Rs, I64:$Rt, (C2_tfrrp I32:$Rp))>;
-
-class T_PR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I32:$Rt),
- (MI I64:$Rs, I32:$Rt)>;
-
-class T_D_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID (F64:$Rs)),
- (MI (F64:$Rs))>;
-
-class T_DI_pat <InstHexagon MI, Intrinsic IntID,
- PatLeaf ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID F64:$Rs, ImmPred:$It),
- (MI F64:$Rs, ImmPred:$It)>;
-
-class T_F_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs),
- (MI F32:$Rs)>;
-
-class T_FI_pat <InstHexagon MI, Intrinsic IntID,
- PatLeaf ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID F32:$Rs, ImmPred:$It),
- (MI F32:$Rs, ImmPred:$It)>;
-
-class T_FF_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs, F32:$Rt),
- (MI F32:$Rs, F32:$Rt)>;
-
-class T_DD_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F64:$Rs, F64:$Rt),
- (MI F64:$Rs, F64:$Rt)>;
-
-class T_FFF_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru),
- (MI F32:$Rs, F32:$Rt, F32:$Ru)>;
-
-class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, I32:$Rp),
- (MI F32:$Rs, F32:$Rt, F32:$Ru, (C2_tfrrp I32:$Rp))>;
-
-class T_Q_RI_pat <InstHexagon MI, Intrinsic IntID,
- PatLeaf ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID I32:$Rs, ImmPred:$It),
- (C2_tfrpr (MI I32:$Rs, ImmPred:$It))>;
-
-class T_Q_RR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, I32:$Rt),
- (C2_tfrpr (MI I32:$Rs, I32:$Rt))>;
-
-class T_Q_RP_pat <InstHexagon MI, Intrinsic IntID>
+class T_RP_pat <InstHexagon MI, Intrinsic IntID>
: Pat <(IntID I32:$Rs, I64:$Rt),
- (C2_tfrpr (MI I32:$Rs, I64:$Rt))>;
-
-class T_Q_PR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I32:$Rt),
- (C2_tfrpr (MI I64:$Rs, I32:$Rt))>;
-
-class T_Q_PI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID I64:$Rs, imm:$It),
- (C2_tfrpr (MI I64:$Rs, imm:$It))>;
-
-class T_Q_PP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt),
- (C2_tfrpr (MI I64:$Rs, I64:$Rt))>;
-
-class T_Q_Q_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp),
- (C2_tfrpr (MI (C2_tfrrp I32:$Rp)))>;
-
-class T_Q_QQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp, I32:$Rq),
- (C2_tfrpr (MI (C2_tfrrp I32:$Rp), (C2_tfrrp I32:$Rq)))>;
-
-class T_Q_FF_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs, F32:$Rt),
- (C2_tfrpr (MI F32:$Rs, F32:$Rt))>;
-
-class T_Q_DD_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F64:$Rs, F64:$Rt),
- (C2_tfrpr (MI F64:$Rs, F64:$Rt))>;
-
-class T_Q_FI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs, imm:$It),
- (C2_tfrpr (MI F32:$Rs, imm:$It))>;
-
-class T_Q_DI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F64:$Rs, imm:$It),
- (C2_tfrpr (MI F64:$Rs, imm:$It))>;
-
-class T_Q_QQQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp, I32:$Rq, I32:$Rs),
- (C2_tfrpr (MI (C2_tfrrp I32:$Rp), (C2_tfrrp I32:$Rq),
- (C2_tfrrp I32:$Rs)))>;
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords
-//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>;
-def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>;
-def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>;
-def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>;
-def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>;
-def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>;
-def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>;
-def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>;
-
-def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>;
-def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>;
-def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>;
-def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>;
-def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>;
-def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>;
-def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>;
-def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>;
-
-def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>;
-def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>;
-def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>;
-def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>;
-def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>;
-def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>;
-def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>;
-def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>;
-
-def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>;
-def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>;
-def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>;
-def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>;
-def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>;
-def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>;
-def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>;
-def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>;
-
-def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>;
-
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the accumulator.
-//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>;
-def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>;
-def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>;
-def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>;
-def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>;
-def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>;
-def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>;
-def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>;
-
-def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>;
-def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>;
-def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>;
-def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>;
-def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>;
-def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>;
-def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>;
-def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>;
-def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>;
-def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>;
-def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>;
-def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>;
-def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>;
-def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>;
-def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>;
-
-def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>;
-def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>;
-def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>;
-def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>;
-def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>;
-def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>;
-def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>;
-def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>;
-
-
-//===----------------------------------------------------------------------===//
-// Multiply signed/unsigned halfwords with and without saturation and rounding
-// into a 64-bits destination register.
-//===----------------------------------------------------------------------===//
-
-def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>;
-def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>;
-def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>;
-def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>;
-def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>;
-def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>;
-def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>;
-def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>;
-
-def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>;
-def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>;
-def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>;
-def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>;
-def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>;
-def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>;
-def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>;
-def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>;
-
-def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>;
-def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>;
-def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>;
-def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>;
-def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>;
-def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>;
-def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>;
-def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>;
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the 64-bit destination register.
-//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>;
-def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>;
-def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>;
-def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>;
-
-def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>;
-def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>;
-def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>;
-def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>;
-
-def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>;
-def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>;
-def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>;
-def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>;
-
-def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>;
-def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>;
-def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>;
-def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>;
-
-def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>;
-def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>;
-def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>;
-def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>;
-
-def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>;
-def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>;
-def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>;
-def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>;
-
-def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>;
-def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>;
-def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>;
-def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>;
-
-def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>;
-def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>;
-def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>;
-def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>;
-
-// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>;
-def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>;
-
-// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>;
-def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>;
-
-// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>;
-def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>;
-
-// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>;
-def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>;
-
-//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyh_s0, int_hexagon_M2_mmpyh_s0>;
-def : T_PP_pat <M2_mmpyh_s1, int_hexagon_M2_mmpyh_s1>;
-def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>;
-def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>;
-
-//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyl_s0, int_hexagon_M2_mmpyl_s0>;
-def : T_PP_pat <M2_mmpyl_s1, int_hexagon_M2_mmpyl_s1>;
-def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>;
-def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>;
-
-//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyuh_s0, int_hexagon_M2_mmpyuh_s0>;
-def : T_PP_pat <M2_mmpyuh_s1, int_hexagon_M2_mmpyuh_s1>;
-def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>;
-def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>;
-
-//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyul_s0, int_hexagon_M2_mmpyul_s0>;
-def : T_PP_pat <M2_mmpyul_s1, int_hexagon_M2_mmpyul_s1>;
-def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>;
-def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>;
-
-// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32)
-def : T_PP_pat <A2_vraddub, int_hexagon_A2_vraddub>;
-def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>;
-
-// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
-def : T_PP_pat <A2_vrsadub, int_hexagon_A2_vrsadub>;
-def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>;
-
-// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
-def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>;
-
-// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
-def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>;
-
-// Vector reduce complex multiply real or imaginary:
-// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
-def : T_PP_pat <M2_vrcmpyi_s0, int_hexagon_M2_vrcmpyi_s0>;
-def : T_PP_pat <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>;
-def : T_PPP_pat <M2_vrcmaci_s0, int_hexagon_M2_vrcmaci_s0>;
-def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>;
-
-def : T_PP_pat <M2_vrcmpyr_s0, int_hexagon_M2_vrcmpyr_s0>;
-def : T_PP_pat <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>;
-def : T_PPP_pat <M2_vrcmacr_s0, int_hexagon_M2_vrcmacr_s0>;
-def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>;
-
-// Vector reduce halfwords
-// Rdd[+]=vrmpyh(Rss,Rtt)
-def : T_PP_pat <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>;
-def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>;
-
-//===----------------------------------------------------------------------===//
-// Vector Multipy with accumulation
-//===----------------------------------------------------------------------===//
-
-// Vector multiply word by signed half with accumulation
-// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>;
-def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>;
-def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>;
-def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>;
-def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>;
-def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>;
-def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>;
-def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>;
-
-// Vector multiply word by unsigned half with accumulation
-// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>;
-def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>;
-def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>;
-def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>;
-def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>;
-def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>;
-def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>;
-def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>;
-
-// Vector multiply even halfwords with accumulation
-// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
-def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>;
-def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>;
-def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>;
-
-// Vector dual multiply with accumulation
-// Rxx+=vdmpy(Rss,Rtt)[:sat]
-def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>;
-def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>;
-
-// Vector complex multiply real or imaginary with accumulation
-// Rxx+=vcmpy[ir](Rss,Rtt):sat
-def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>;
-def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>;
-
-//===----------------------------------------------------------------------===//
-// Add/Subtract halfword
-// Rd=add(Rt.L,Rs.[HL])[:sat]
-// Rd=sub(Rt.L,Rs.[HL])[:sat]
-// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
-// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
-//===----------------------------------------------------------------------===//
-
-//Rd=add(Rt.L,Rs.[LH])
-def : T_RR_pat <A2_addh_l16_ll, int_hexagon_A2_addh_l16_ll>;
-def : T_RR_pat <A2_addh_l16_hl, int_hexagon_A2_addh_l16_hl>;
-
-//Rd=add(Rt.L,Rs.[LH]):sat
-def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>;
-def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>;
-
-//Rd=sub(Rt.L,Rs.[LH])
-def : T_RR_pat <A2_subh_l16_ll, int_hexagon_A2_subh_l16_ll>;
-def : T_RR_pat <A2_subh_l16_hl, int_hexagon_A2_subh_l16_hl>;
-
-//Rd=sub(Rt.L,Rs.[LH]):sat
-def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>;
-def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>;
-
-//Rd=add(Rt.[LH],Rs.[LH]):<<16
-def : T_RR_pat <A2_addh_h16_ll, int_hexagon_A2_addh_h16_ll>;
-def : T_RR_pat <A2_addh_h16_lh, int_hexagon_A2_addh_h16_lh>;
-def : T_RR_pat <A2_addh_h16_hl, int_hexagon_A2_addh_h16_hl>;
-def : T_RR_pat <A2_addh_h16_hh, int_hexagon_A2_addh_h16_hh>;
-
-//Rd=sub(Rt.[LH],Rs.[LH]):<<16
-def : T_RR_pat <A2_subh_h16_ll, int_hexagon_A2_subh_h16_ll>;
-def : T_RR_pat <A2_subh_h16_lh, int_hexagon_A2_subh_h16_lh>;
-def : T_RR_pat <A2_subh_h16_hl, int_hexagon_A2_subh_h16_hl>;
-def : T_RR_pat <A2_subh_h16_hh, int_hexagon_A2_subh_h16_hh>;
-
-//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
-def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>;
-def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>;
-def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>;
-def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>;
-
-//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
-def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>;
-def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>;
-def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>;
-def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>;
-
-// ALU64 / ALU / min max
-def : T_RR_pat<A2_max, int_hexagon_A2_max>;
-def : T_RR_pat<A2_min, int_hexagon_A2_min>;
-def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>;
-def : T_RR_pat<A2_minu, int_hexagon_A2_minu>;
-
-// Shift and accumulate
-def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>;
-def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>;
-def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>;
-def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>;
-def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>;
-def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>;
-
-def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>;
-def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>;
-def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>;
-def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>;
-def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
-def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
-
-def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>;
-def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>;
-def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>;
-def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>;
-def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>;
-def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>;
-
-def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>;
-def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>;
-def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>;
-def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>;
-def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
-def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
-
-def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>;
-def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>;
-def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>;
-def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>;
-def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>;
-def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>;
-def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>;
-def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>;
-
-def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>;
-def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>;
-def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>;
-def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>;
-def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>;
-def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>;
-def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>;
-def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>;
-
-def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>;
-def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>;
-def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>;
-def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>;
-def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>;
-def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>;
-def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>;
-def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>;
-
-def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>;
-def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>;
-def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>;
-def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>;
-def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>;
-def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>;
-def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>;
-def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>;
-
-def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>;
-def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>;
-def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>;
-def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>;
-def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>;
-def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>;
-
-def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>;
-def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>;
-def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>;
-def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>;
-def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
-def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
-
-def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>;
-def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>;
-def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>;
-def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>;
-def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>;
-def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>;
-
-def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>;
-def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>;
-def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>;
-def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>;
-def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
-def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
-
-def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>;
-def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>;
-def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>;
-def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>;
-def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>;
-def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>;
-def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>;
-def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>;
-
-def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>;
-def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>;
-def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>;
-def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>;
-def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>;
-def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>;
-def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>;
-def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>;
-
-def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>;
-def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>;
-def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>;
-def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>;
-def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>;
-def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>;
-def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>;
-def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>;
-
-def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>;
-def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>;
-def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>;
-def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>;
-def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>;
-def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>;
-def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>;
-def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>;
-
-//*******************************************************************
-// ALU32/ALU
-//*******************************************************************
-def : T_RR_pat<A2_add, int_hexagon_A2_add>;
-def : T_RI_pat<A2_addi, int_hexagon_A2_addi>;
-def : T_RR_pat<A2_sub, int_hexagon_A2_sub>;
-def : T_IR_pat<A2_subri, int_hexagon_A2_subri>;
-def : T_RR_pat<A2_and, int_hexagon_A2_and>;
-def : T_RI_pat<A2_andir, int_hexagon_A2_andir>;
-def : T_RR_pat<A2_or, int_hexagon_A2_or>;
-def : T_RI_pat<A2_orir, int_hexagon_A2_orir>;
-def : T_RR_pat<A2_xor, int_hexagon_A2_xor>;
-def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>;
+ (MI I32:$Rs, I64:$Rt)>;
+
+def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt),
+ (A2_add IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16),
+ (A2_addi IntRegs:$Rs, imm:$s16)>;
+def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt),
+ (A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt),
+ (A2_sub IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs),
+ (A2_subri imm:$s10, IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt),
+ (A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+def: Pat<(int_hexagon_M2_mpyi IntRegs:$Rs, IntRegs:$Rt),
+ (M2_mpyi IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_mpyui IntRegs:$Rs, IntRegs:$Rt), // Same as M2_mpyi
+ (M2_mpyi IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_mpysmi IntRegs:$Rs, imm:$s9),
+ (M2_mpysmi IntRegs:$Rs, imm:$s9)>;
+def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt),
+ (M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt),
+ (M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5),
+ (S2_asl_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5),
+ (S2_lsr_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5),
+ (S2_asr_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6),
+ (S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>;
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6),
+ (S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>;
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6),
+ (S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>;
+
+def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt),
+ (A2_and IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10),
+ (A2_andir IntRegs:$Rs, imm:$s10)>;
+def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt),
+ (A2_or IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10),
+ (A2_orir IntRegs:$Rs, imm:$s10)>;
+def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt),
+ (A2_xor IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(int_hexagon_A2_sxtb IntRegs:$Rs),
+ (A2_sxtb IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_sxth IntRegs:$Rs),
+ (A2_sxth IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_zxtb IntRegs:$Rs),
+ (A2_zxtb IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_zxth IntRegs:$Rs),
+ (A2_zxth IntRegs:$Rs)>;
// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32)
def : Pat <(int_hexagon_A2_not I32:$Rs),
@@ -757,16 +109,6 @@ def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
(S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
-// Transfer immediate
-def : Pat <(int_hexagon_A2_tfril I32:$Rs, u16_0ImmPred:$Is),
- (A2_tfril I32:$Rs, u16_0ImmPred:$Is)>;
-def : Pat <(int_hexagon_A2_tfrih I32:$Rs, u16_0ImmPred:$Is),
- (A2_tfrih I32:$Rs, u16_0ImmPred:$Is)>;
-
-// Transfer Register/immediate.
-def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>;
-def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>;
-
def ImmExt64: SDNodeXForm<imm, [{
int64_t V = N->getSExtValue();
return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i64);
@@ -783,49 +125,6 @@ def ImmExt64: SDNodeXForm<imm, [{
def : Pat<(int_hexagon_A2_tfrpi imm:$Is),
(A2_tfrpi (ImmExt64 $Is))>;
-// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32)
-def : Pat<(int_hexagon_A2_tfrp I64:$src),
- (A2_combinew (HiReg I64:$src), (LoReg I64:$src))>;
-
-//*******************************************************************
-// ALU32/PERM
-//*******************************************************************
-// Combine
-def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>;
-def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
-def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
-def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
-
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32_0ImmPred, s8_0ImmPred>;
-
-// Mux
-def : T_QRR_pat<C2_mux, int_hexagon_C2_mux>;
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32_0ImmPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32_0ImmPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32_0ImmPred, s8_0ImmPred>;
-
-// Shift halfword
-def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
-def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>;
-
-// Sign/zero extend
-def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>;
-def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>;
-def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>;
-def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>;
-
-//*******************************************************************
-// ALU32/PRED
-//*******************************************************************
-// Compare
-def : T_Q_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>;
-def : T_Q_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>;
-def : T_Q_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
-
-def : T_Q_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32_0ImmPred>;
-def : T_Q_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32_0ImmPred>;
-def : T_Q_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32_0ImmPred>;
-
def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2),
(C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
@@ -839,420 +138,6 @@ def : Pat <(int_hexagon_C2_cmplt I32:$src1, I32:$src2),
def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2),
(C2_tfrpr (C2_cmpgtu I32:$src2, I32:$src1))>;
-//*******************************************************************
-// ALU32/VH
-//*******************************************************************
-// Vector add, subtract, average halfwords
-def: T_RR_pat<A2_svaddh, int_hexagon_A2_svaddh>;
-def: T_RR_pat<A2_svaddhs, int_hexagon_A2_svaddhs>;
-def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>;
-
-def: T_RR_pat<A2_svsubh, int_hexagon_A2_svsubh>;
-def: T_RR_pat<A2_svsubhs, int_hexagon_A2_svsubhs>;
-def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>;
-
-def: T_RR_pat<A2_svavgh, int_hexagon_A2_svavgh>;
-def: T_RR_pat<A2_svavghs, int_hexagon_A2_svavghs>;
-def: T_RR_pat<A2_svnavgh, int_hexagon_A2_svnavgh>;
-
-//*******************************************************************
-// ALU64/ALU
-//*******************************************************************
-def: T_RR_pat<A2_addsat, int_hexagon_A2_addsat>;
-def: T_RR_pat<A2_subsat, int_hexagon_A2_subsat>;
-def: T_PP_pat<A2_addp, int_hexagon_A2_addp>;
-def: T_PP_pat<A2_subp, int_hexagon_A2_subp>;
-
-def: T_PP_pat<A2_andp, int_hexagon_A2_andp>;
-def: T_PP_pat<A2_orp, int_hexagon_A2_orp>;
-def: T_PP_pat<A2_xorp, int_hexagon_A2_xorp>;
-
-def: T_Q_PP_pat<C2_cmpeqp, int_hexagon_C2_cmpeqp>;
-def: T_Q_PP_pat<C2_cmpgtp, int_hexagon_C2_cmpgtp>;
-def: T_Q_PP_pat<C2_cmpgtup, int_hexagon_C2_cmpgtup>;
-
-def: T_PP_pat<S2_parityp, int_hexagon_S2_parityp>;
-def: T_RR_pat<S2_packhl, int_hexagon_S2_packhl>;
-
-//*******************************************************************
-// ALU64/VB
-//*******************************************************************
-// ALU64 - Vector add
-def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddub>;
-def : T_PP_pat <A2_vaddubs, int_hexagon_A2_vaddubs>;
-def : T_PP_pat <A2_vaddh, int_hexagon_A2_vaddh>;
-def : T_PP_pat <A2_vaddhs, int_hexagon_A2_vaddhs>;
-def : T_PP_pat <A2_vadduhs, int_hexagon_A2_vadduhs>;
-def : T_PP_pat <A2_vaddw, int_hexagon_A2_vaddw>;
-def : T_PP_pat <A2_vaddws, int_hexagon_A2_vaddws>;
-
-// ALU64 - Vector average
-def : T_PP_pat <A2_vavgub, int_hexagon_A2_vavgub>;
-def : T_PP_pat <A2_vavgubr, int_hexagon_A2_vavgubr>;
-def : T_PP_pat <A2_vavgh, int_hexagon_A2_vavgh>;
-def : T_PP_pat <A2_vavghr, int_hexagon_A2_vavghr>;
-def : T_PP_pat <A2_vavghcr, int_hexagon_A2_vavghcr>;
-def : T_PP_pat <A2_vavguh, int_hexagon_A2_vavguh>;
-def : T_PP_pat <A2_vavguhr, int_hexagon_A2_vavguhr>;
-
-def : T_PP_pat <A2_vavgw, int_hexagon_A2_vavgw>;
-def : T_PP_pat <A2_vavgwr, int_hexagon_A2_vavgwr>;
-def : T_PP_pat <A2_vavgwcr, int_hexagon_A2_vavgwcr>;
-def : T_PP_pat <A2_vavguw, int_hexagon_A2_vavguw>;
-def : T_PP_pat <A2_vavguwr, int_hexagon_A2_vavguwr>;
-
-// ALU64 - Vector negative average
-def : T_PP_pat <A2_vnavgh, int_hexagon_A2_vnavgh>;
-def : T_PP_pat <A2_vnavghr, int_hexagon_A2_vnavghr>;
-def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>;
-def : T_PP_pat <A2_vnavgw, int_hexagon_A2_vnavgw>;
-def : T_PP_pat <A2_vnavgwr, int_hexagon_A2_vnavgwr>;
-def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>;
-
-// ALU64 - Vector max
-def : T_PP_pat <A2_vmaxh, int_hexagon_A2_vmaxh>;
-def : T_PP_pat <A2_vmaxw, int_hexagon_A2_vmaxw>;
-def : T_PP_pat <A2_vmaxub, int_hexagon_A2_vmaxub>;
-def : T_PP_pat <A2_vmaxuh, int_hexagon_A2_vmaxuh>;
-def : T_PP_pat <A2_vmaxuw, int_hexagon_A2_vmaxuw>;
-
-// ALU64 - Vector min
-def : T_PP_pat <A2_vminh, int_hexagon_A2_vminh>;
-def : T_PP_pat <A2_vminw, int_hexagon_A2_vminw>;
-def : T_PP_pat <A2_vminub, int_hexagon_A2_vminub>;
-def : T_PP_pat <A2_vminuh, int_hexagon_A2_vminuh>;
-def : T_PP_pat <A2_vminuw, int_hexagon_A2_vminuw>;
-
-// ALU64 - Vector sub
-def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubub>;
-def : T_PP_pat <A2_vsububs, int_hexagon_A2_vsububs>;
-def : T_PP_pat <A2_vsubh, int_hexagon_A2_vsubh>;
-def : T_PP_pat <A2_vsubhs, int_hexagon_A2_vsubhs>;
-def : T_PP_pat <A2_vsubuhs, int_hexagon_A2_vsubuhs>;
-def : T_PP_pat <A2_vsubw, int_hexagon_A2_vsubw>;
-def : T_PP_pat <A2_vsubws, int_hexagon_A2_vsubws>;
-
-// ALU64 - Vector compare bytes
-def : T_Q_PP_pat <A2_vcmpbeq, int_hexagon_A2_vcmpbeq>;
-def : T_Q_PP_pat <A4_vcmpbgt, int_hexagon_A4_vcmpbgt>;
-def : T_Q_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>;
-
-// ALU64 - Vector compare halfwords
-def : T_Q_PP_pat <A2_vcmpheq, int_hexagon_A2_vcmpheq>;
-def : T_Q_PP_pat <A2_vcmphgt, int_hexagon_A2_vcmphgt>;
-def : T_Q_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>;
-
-// ALU64 - Vector compare words
-def : T_Q_PP_pat <A2_vcmpweq, int_hexagon_A2_vcmpweq>;
-def : T_Q_PP_pat <A2_vcmpwgt, int_hexagon_A2_vcmpwgt>;
-def : T_Q_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>;
-
-// ALU64 / VB / Vector mux.
-def : T_QPP_pat <C2_vmux, int_hexagon_C2_vmux>;
-
-// MPY - Multiply and use full result
-// Rdd = mpy[u](Rs, Rt)
-def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>;
-def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>;
-
-// Complex multiply real or imaginary
-def : T_RR_pat <M2_cmpyi_s0, int_hexagon_M2_cmpyi_s0>;
-def : T_RR_pat <M2_cmpyr_s0, int_hexagon_M2_cmpyr_s0>;
-
-// Complex multiply
-def : T_RR_pat <M2_cmpys_s0, int_hexagon_M2_cmpys_s0>;
-def : T_RR_pat <M2_cmpysc_s0, int_hexagon_M2_cmpysc_s0>;
-def : T_RR_pat <M2_cmpys_s1, int_hexagon_M2_cmpys_s1>;
-def : T_RR_pat <M2_cmpysc_s1, int_hexagon_M2_cmpysc_s1>;
-
-// Vector multiply halfwords
-// Rdd=vmpyh(Rs,Rt)[:<<1]:sat
-def : T_RR_pat <M2_vmpy2s_s0, int_hexagon_M2_vmpy2s_s0>;
-def : T_RR_pat <M2_vmpy2s_s1, int_hexagon_M2_vmpy2s_s1>;
-
-// Rxx[+-]= mpy[u](Rs,Rt)
-def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>;
-def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>;
-def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>;
-def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>;
-
-// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
-def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>;
-def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>;
-def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>;
-def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>;
-
-// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
-def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>;
-def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>;
-def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>;
-def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>;
-
-// Rxx+=cmpy[ir](Rs,Rt)
-def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>;
-def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>;
-
-// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
-def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>;
-def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>;
-def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>;
-
-//*******************************************************************
-// CR
-//*******************************************************************
-def: T_Q_Q_pat<C2_not, int_hexagon_C2_not>;
-def: T_Q_Q_pat<C2_all8, int_hexagon_C2_all8>;
-def: T_Q_Q_pat<C2_any8, int_hexagon_C2_any8>;
-def: T_Q_Q_pat<C2_pxfer_map, int_hexagon_C2_pxfer_map>;
-
-def: T_Q_QQ_pat<C2_and, int_hexagon_C2_and>;
-def: T_Q_QQ_pat<C2_andn, int_hexagon_C2_andn>;
-def: T_Q_QQ_pat<C2_or, int_hexagon_C2_or>;
-def: T_Q_QQ_pat<C2_orn, int_hexagon_C2_orn>;
-def: T_Q_QQ_pat<C2_xor, int_hexagon_C2_xor>;
-
-// Multiply 32x32 and use lower result
-def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>;
-def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>;
-def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>;
-
-// Subtract and accumulate
-def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>;
-
-// Add and accumulate
-def : T_RRR_pat <M2_acci, int_hexagon_M2_acci>;
-def : T_RRR_pat <M2_nacci, int_hexagon_M2_nacci>;
-def : T_RRI_pat <M2_accii, int_hexagon_M2_accii>;
-def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>;
-
-// XOR and XOR with destination
-def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>;
-
-// Vector dual multiply with round and pack
-def : T_PP_pat <M2_vdmpyrs_s0, int_hexagon_M2_vdmpyrs_s0>;
-def : T_PP_pat <M2_vdmpyrs_s1, int_hexagon_M2_vdmpyrs_s1>;
-
-// Vector multiply halfwords with round and pack
-def : T_RR_pat <M2_vmpy2s_s0pack, int_hexagon_M2_vmpy2s_s0pack>;
-def : T_RR_pat <M2_vmpy2s_s1pack, int_hexagon_M2_vmpy2s_s1pack>;
-
-// Multiply and use lower result
-def : T_RR_pat <M2_mpyi, int_hexagon_M2_mpyi>;
-def : T_RI_pat <M2_mpysmi, int_hexagon_M2_mpysmi>;
-
-// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32)
-def : T_RR_pat <M2_mpyi, int_hexagon_M2_mpyui>;
-
-// Multiply and use upper result
-def : T_RR_pat <M2_mpy_up, int_hexagon_M2_mpy_up>;
-def : T_RR_pat <M2_mpyu_up, int_hexagon_M2_mpyu_up>;
-def : T_RR_pat <M2_hmmpyh_rs1, int_hexagon_M2_hmmpyh_rs1>;
-def : T_RR_pat <M2_hmmpyl_rs1, int_hexagon_M2_hmmpyl_rs1>;
-def : T_RR_pat <M2_dpmpyss_rnd_s0, int_hexagon_M2_dpmpyss_rnd_s0>;
-
-// Complex multiply with round and pack
-// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
-def : T_RR_pat <M2_cmpyrs_s0, int_hexagon_M2_cmpyrs_s0>;
-def : T_RR_pat <M2_cmpyrs_s1, int_hexagon_M2_cmpyrs_s1>;
-def : T_RR_pat <M2_cmpyrsc_s0, int_hexagon_M2_cmpyrsc_s0>;
-def : T_RR_pat <M2_cmpyrsc_s1, int_hexagon_M2_cmpyrsc_s1>;
-
-//*******************************************************************
-// STYPE/ALU
-//*******************************************************************
-def : T_P_pat <A2_absp, int_hexagon_A2_absp>;
-def : T_P_pat <A2_negp, int_hexagon_A2_negp>;
-def : T_P_pat <A2_notp, int_hexagon_A2_notp>;
-
-//*******************************************************************
-// STYPE/BIT
-//*******************************************************************
-
-// Count leading/trailing
-def: T_R_pat<S2_cl0, int_hexagon_S2_cl0>;
-def: T_P_pat<S2_cl0p, int_hexagon_S2_cl0p>;
-def: T_R_pat<S2_cl1, int_hexagon_S2_cl1>;
-def: T_P_pat<S2_cl1p, int_hexagon_S2_cl1p>;
-def: T_R_pat<S2_clb, int_hexagon_S2_clb>;
-def: T_P_pat<S2_clbp, int_hexagon_S2_clbp>;
-def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>;
-def: T_R_pat<S2_ct0, int_hexagon_S2_ct0>;
-def: T_R_pat<S2_ct1, int_hexagon_S2_ct1>;
-
-// Compare bit mask
-def: T_RR_pat<C2_bitsclr, int_hexagon_C2_bitsclr>;
-def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>;
-def: T_RR_pat<C2_bitsset, int_hexagon_C2_bitsset>;
-
-// Vector shuffle
-def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>;
-def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>;
-def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>;
-def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>;
-
-// Vector truncate
-def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>;
-def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>;
-
-// Linear feedback-shift Iteration.
-def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>;
-
-// Vector align
-// Need custom lowering
-def : T_PPQ_pat <S2_valignrb, int_hexagon_S2_valignrb>;
-def : T_PPI_pat <S2_valignib, int_hexagon_S2_valignib>;
-
-// Vector splice
-def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>;
-def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>;
-
-// Shift by immediate and add
-def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>;
-
-// Extract bitfield
-def : T_PII_pat<S2_extractup, int_hexagon_S2_extractup>;
-def : T_RII_pat<S2_extractu, int_hexagon_S2_extractu>;
-def : T_RP_pat <S2_extractu_rp, int_hexagon_S2_extractu_rp>;
-def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>;
-
-// Insert bitfield
-def : Pat <(int_hexagon_S2_insert_rp I32:$src1, I32:$src2, I64:$src3),
- (S2_insert_rp I32:$src1, I32:$src2, I64:$src3)>;
-
-def : Pat<(i64 (int_hexagon_S2_insertp_rp I64:$src1, I64:$src2, I64:$src3)),
- (i64 (S2_insertp_rp I64:$src1, I64:$src2, I64:$src3))>;
-
-def : Pat<(int_hexagon_S2_insert I32:$src1, I32:$src2,
- u5_0ImmPred:$src3, u5_0ImmPred:$src4),
- (S2_insert I32:$src1, I32:$src2,
- u5_0ImmPred:$src3, u5_0ImmPred:$src4)>;
-
-def : Pat<(i64 (int_hexagon_S2_insertp I64:$src1, I64:$src2,
- u6_0ImmPred:$src3, u6_0ImmPred:$src4)),
- (i64 (S2_insertp I64:$src1, I64:$src2,
- u6_0ImmPred:$src3, u6_0ImmPred:$src4))>;
-
-// Innterleave/deinterleave
-def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>;
-def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>;
-
-// Set/Clear/Toggle Bit
-def: T_RI_pat<S2_setbit_i, int_hexagon_S2_setbit_i>;
-def: T_RI_pat<S2_clrbit_i, int_hexagon_S2_clrbit_i>;
-def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>;
-
-def: T_RR_pat<S2_setbit_r, int_hexagon_S2_setbit_r>;
-def: T_RR_pat<S2_clrbit_r, int_hexagon_S2_clrbit_r>;
-def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>;
-
-// Test Bit
-def: T_Q_RI_pat<S2_tstbit_i, int_hexagon_S2_tstbit_i>;
-def: T_Q_RR_pat<S2_tstbit_r, int_hexagon_S2_tstbit_r>;
-
-//*******************************************************************
-// STYPE/COMPLEX
-//*******************************************************************
-// Vector Complex conjugate
-def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>;
-
-// Vector Complex rotate
-def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>;
-
-//*******************************************************************
-// STYPE/PERM
-//*******************************************************************
-
-// Vector saturate without pack
-def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>;
-def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>;
-def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>;
-def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>;
-
-//*******************************************************************
-// STYPE/PRED
-//*******************************************************************
-
-// Predicate transfer
-def: Pat<(i32 (int_hexagon_C2_tfrpr I32:$Rs)),
- (i32 (C2_tfrpr (C2_tfrrp I32:$Rs)))>;
-def: Pat<(i32 (int_hexagon_C2_tfrrp I32:$Rs)),
- (i32 (C2_tfrpr (C2_tfrrp I32:$Rs)))>;
-
-// Mask generate from predicate
-def: Pat<(i64 (int_hexagon_C2_mask I32:$Rs)),
- (i64 (C2_mask (C2_tfrrp I32:$Rs)))>;
-
-// Viterbi pack even and odd predicate bits
-def: T_QQ_pat<C2_vitpack, int_hexagon_C2_vitpack>;
-
-//*******************************************************************
-// STYPE/SHIFT
-//*******************************************************************
-
-def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>;
-def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>;
-def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>;
-
-def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>;
-def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>;
-def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>;
-def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>;
-
-def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>;
-def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>;
-def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>;
-def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>;
-
-def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>;
-def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>;
-
-def : T_R_pat <S2_vsxtbh, int_hexagon_S2_vsxtbh>;
-def : T_R_pat <S2_vzxtbh, int_hexagon_S2_vzxtbh>;
-def : T_R_pat <S2_vsxthw, int_hexagon_S2_vsxthw>;
-def : T_R_pat <S2_vzxthw, int_hexagon_S2_vzxthw>;
-def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>;
-def : T_R_pat <A2_sxtw, int_hexagon_A2_sxtw>;
-
-// Vector saturate and pack
-def : T_R_pat <S2_svsathb, int_hexagon_S2_svsathb>;
-def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>;
-def : T_P_pat <S2_vsathub, int_hexagon_S2_vsathub>;
-def : T_P_pat <S2_vsatwh, int_hexagon_S2_vsatwh>;
-def : T_P_pat <S2_vsatwuh, int_hexagon_S2_vsatwuh>;
-def : T_P_pat <S2_vsathb, int_hexagon_S2_vsathb>;
-
-def : T_P_pat <S2_vtrunohb, int_hexagon_S2_vtrunohb>;
-def : T_P_pat <S2_vtrunehb, int_hexagon_S2_vtrunehb>;
-def : T_P_pat <S2_vrndpackwh, int_hexagon_S2_vrndpackwh>;
-def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>;
-def : T_R_pat <S2_brev, int_hexagon_S2_brev>;
-def : T_R_pat <S2_vsplatrb, int_hexagon_S2_vsplatrb>;
-
-def : T_R_pat <A2_abs, int_hexagon_A2_abs>;
-def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>;
-def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>;
-
-def : T_R_pat <A2_swiz, int_hexagon_A2_swiz>;
-
-def : T_P_pat <A2_sat, int_hexagon_A2_sat>;
-def : T_R_pat <A2_sath, int_hexagon_A2_sath>;
-def : T_R_pat <A2_satuh, int_hexagon_A2_satuh>;
-def : T_R_pat <A2_satub, int_hexagon_A2_satub>;
-def : T_R_pat <A2_satb, int_hexagon_A2_satb>;
-
-// Vector arithmetic shift right by immediate with truncate and pack.
-def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>;
-
-def : T_RI_pat <S2_asr_i_r, int_hexagon_S2_asr_i_r>;
-def : T_RI_pat <S2_lsr_i_r, int_hexagon_S2_lsr_i_r>;
-def : T_RI_pat <S2_asl_i_r, int_hexagon_S2_asl_i_r>;
-def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>;
-def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax,
- int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
-
-// Shift left by immediate with saturation.
-def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>;
-
//===----------------------------------------------------------------------===//
// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions.
//===----------------------------------------------------------------------===//
@@ -1277,11 +162,8 @@ def SDEC3 : SDNodeXForm<imm, [{
// values from the 4th input operand. Please note that subtraction is not
// needed for int_hexagon_S2_tableidxb_goodsyntax.
-def : Pat <(int_hexagon_S2_tableidxb_goodsyntax I32:$src1, I32:$src2,
- u4_0ImmPred:$src3, u5_0ImmPred:$src4),
- (S2_tableidxb I32:$src1, I32:$src2,
- u4_0ImmPred:$src3, u5_0ImmPred:$src4)>;
-
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxb_goodsyntax, S2_tableidxb,
+ IdImm>;
def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh,
SDEC1>;
def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
@@ -1289,52 +171,6 @@ def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd,
SDEC3>;
-//*******************************************************************
-// STYPE/VH
-//*******************************************************************
-
-// Vector absolute value halfwords with and without saturation
-// Rdd64=vabsh(Rss64)[:sat]
-def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>;
-def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>;
-
-// Vector shift halfwords by immediate
-// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4)
-def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>;
-def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>;
-def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>;
-
-// Vector shift halfwords by register
-// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32)
-def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>;
-def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>;
-def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>;
-def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>;
-
-//*******************************************************************
-// STYPE/VW
-//*******************************************************************
-
-// Vector absolute value words with and without saturation
-def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>;
-def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>;
-
-// Vector shift words by immediate.
-// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5)
-def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>;
-def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>;
-def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>;
-
-// Vector shift words by register.
-// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32)
-def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>;
-def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>;
-def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>;
-def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>;
-
-// Vector shift words with truncate and pack
-def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>;
-
// Load/store locked.
def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>;
def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>;
@@ -1370,10 +206,13 @@ def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
- (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>;
+ (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>,
+ Requires<[UseHVX]>;
+
def : Pat<(!cast<Intrinsic>(IntID#"_128B") HvxQR:$src1, IntRegs:$src2,
HvxVR:$src3),
- (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>;
+ (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>,
+ Requires<[UseHVX]>;
}
defm : MaskedStore <V6_vS32b_qpred_ai, int_hexagon_V6_vmaskedstoreq>;
@@ -1398,7 +237,241 @@ def: T_R_pat<Y2_dczeroa, int_hexagon_Y2_dczeroa>;
def: T_RR_pat<Y4_l2fetch, int_hexagon_Y4_l2fetch>;
def: T_RP_pat<Y5_l2fetch, int_hexagon_Y5_l2fetch>;
-include "HexagonIntrinsicsV3.td"
-include "HexagonIntrinsicsV4.td"
-include "HexagonIntrinsicsV5.td"
-include "HexagonIntrinsicsV60.td"
+//
+// Patterns for optimizing code generations for HVX.
+
+def u3_64_ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)(64 - N->getSExtValue());
+ return isUInt<3>(v);
+}]>;
+
+def u3_128_ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)(128 - N->getSExtValue());
+ return isUInt<3>(v);
+}]>;
+
+def SUB_64_VAL : SDNodeXForm<imm, [{
+ int32_t Imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(64 - Imm, SDLoc(N), MVT::i32);
+}]>;
+
+def SUB_128_VAL : SDNodeXForm<imm, [{
+ int32_t Imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(128 - Imm, SDLoc(N), MVT::i32);
+}]>;
+
+let AddedComplexity = 100 in {
+def : Pat <(v16i32 (int_hexagon_V6_lo (v32i32 HvxWR:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 HvxWR:$src1), vsub_lo))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v16i32 (int_hexagon_V6_hi (v32i32 HvxWR:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 HvxWR:$src1), vsub_hi))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (int_hexagon_V6_lo_128B (v64i32 HvxWR:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_lo))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_hi))>,
+ Requires<[UseHVX]>;
+}
+
+def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
+ (v512i1 (V6_vandvrt (v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
+ (v512i1 (V6_vandvrt (v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))),
+ (v512i1 (V6_vandvrt (v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
+ (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
+ (v32i16 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))),
+ (v64i8 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
+ (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
+ (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
+ (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
+ (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
+ (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
+ (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+let AddedComplexity = 140 in {
+def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai IntRegs:$addr, 0,
+ (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
+ (v512i1 (V6_vandvrt
+ (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai IntRegs:$addr, 0,
+ (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
+ (v1024i1 (V6_vandvrt
+ (v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+}
+
+def: Pat<(v64i16 (trunc v64i32:$Vdd)),
+ (v64i16 (V6_vpackwh_sat
+ (v32i32 (V6_hi HvxWR:$Vdd)),
+ (v32i32 (V6_lo HvxWR:$Vdd))))>,
+ Requires<[UseHVX]>;
+
+def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV55]>;
+
+multiclass T_VI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, u3_0ImmPred:$src2),
+ (MI HvxVR:$src1, HvxVR:$src1, u3_0ImmPred:$src2)>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, u3_0ImmPred:$src2),
+ (MI HvxVR:$src1, HvxVR:$src1, u3_0ImmPred:$src2)>,
+ Requires<[UseHVX]>;
+}
+
+multiclass T_VI_inv_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, u3_64_ImmPred:$src2),
+ (MI HvxVR:$src1, HvxVR:$src1,
+ (SUB_64_VAL u3_64_ImmPred:$src2))>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, u3_128_ImmPred:$src2),
+ (MI HvxVR:$src1, HvxVR:$src1, (SUB_128_VAL u3_128_ImmPred:$src2))>,
+ Requires<[UseHVX]>;
+}
+
+multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+ u3_0ImmPred:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2,
+ u3_0ImmPred:$src3)>,
+ Requires<[UseHVX]>;
+}
+
+multiclass T_VVI_inv_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, u3_64_ImmPred:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2,
+ (SUB_64_VAL u3_64_ImmPred:$src3))>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+ u3_128_ImmPred:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2,
+ (SUB_128_VAL u3_128_ImmPred:$src3))>,
+ Requires<[UseHVX]>;
+}
+
+multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+ IntRegs:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVX]>;
+}
+
+defm : T_VI_pat <V6_valignbi, int_hexagon_V6_vror>;
+defm : T_VI_inv_pat <V6_vlalignbi, int_hexagon_V6_vror>;
+
+defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignb>;
+defm : T_VVI_inv_pat <V6_vlalignbi, int_hexagon_V6_valignbi>;
+defm : T_VVI_inv_pat <V6_vlalignbi, int_hexagon_V6_valignb>;
+defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignbi>;
+defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignb>;
+defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignbi>;
+defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignb>;
+defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignbi>;
+
+def: Pat<(int_hexagon_V6_vd0),
+ (V6_vd0)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vd0_128B ),
+ (V6_vd0)>, Requires<[HasV60, UseHVX128B]>;
+
+def: Pat<(int_hexagon_V6_vdd0),
+ (V6_vdd0)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdd0_128B),
+ (V6_vdd0)>, Requires<[HasV65, UseHVX128B]>;
+
+def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
+ (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
+ (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+
+include "HexagonDepMapAsm2Intrin.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
deleted file mode 100644
index 6152cb098825..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
+++ /dev/null
@@ -1,27 +0,0 @@
-//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// Vector reduce complex multiply real or imaginary
-def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>;
-def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
-def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>;
-
-// Vector reduce add unsigned halfwords
-def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
-
-def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>;
-def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
-def: T_PP_pat<A2_minp, int_hexagon_A2_minp>;
-def: T_PP_pat<A2_minup, int_hexagon_A2_minup>;
-def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>;
-def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
deleted file mode 100644
index 2affe531515d..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ /dev/null
@@ -1,305 +0,0 @@
-//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is populated based on the following specs:
-// Hexagon V4 Architecture Extensions
-// Application-Level Specification
-// 80-V9418-12 Rev. A
-// June 15, 2010
-
-// Vector reduce multiply word by signed half (32x16)
-//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
-def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
-def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
-def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
-
-//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
-def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
-def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
-def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
-
-// Vector multiply halfwords, signed by unsigned
-// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
-def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
-def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
-
-// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
-def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
-def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
-
-// Vector polynomial multiply halfwords
-// Rdd=vpmpyh(Rs,Rt)
-def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
-// Rxx[^]=vpmpyh(Rs,Rt)
-def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
-
-// Polynomial multiply words
-// Rdd=pmpyw(Rs,Rt)
-def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
-// Rxx^=pmpyw(Rs,Rt)
-def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
-
-//Rxx^=asr(Rss,Rt)
-def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
-//Rxx^=asl(Rss,Rt)
-def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
-//Rxx^=lsr(Rss,Rt)
-def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
-//Rxx^=lsl(Rss,Rt)
-def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
-
-// Multiply and use upper result
-def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
-def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
-def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
-def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
-def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
-
-def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
-def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
-
-// Vector reduce add unsigned halfwords
-def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
-
-def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
-def: T_P_pat<S2_ct0p, int_hexagon_S2_ct0p>;
-def: T_P_pat<S2_ct1p, int_hexagon_S2_ct1p>;
-
-def: T_Q_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>;
-def: T_Q_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>;
-def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
-
-def : T_Q_PI_pat<A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi>;
-def : T_Q_PI_pat<A4_vcmpbgti, int_hexagon_A4_vcmpbgti>;
-def : T_Q_PI_pat<A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui>;
-def : T_Q_PI_pat<A4_vcmpheqi, int_hexagon_A4_vcmpheqi>;
-def : T_Q_PI_pat<A4_vcmphgti, int_hexagon_A4_vcmphgti>;
-def : T_Q_PI_pat<A4_vcmphgtui, int_hexagon_A4_vcmphgtui>;
-def : T_Q_PI_pat<A4_vcmpweqi, int_hexagon_A4_vcmpweqi>;
-def : T_Q_PI_pat<A4_vcmpwgti, int_hexagon_A4_vcmpwgti>;
-def : T_Q_PI_pat<A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui>;
-def : T_Q_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
-
-def : T_Q_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>;
-def : T_Q_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>;
-def : T_Q_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>;
-def : T_Q_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>;
-def : T_Q_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>;
-def : T_Q_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>;
-
-def : T_Q_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>;
-def : T_Q_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>;
-def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
-
-def : T_Q_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>;
-def : T_Q_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>;
-def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
-
-def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
-def : T_Q_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
-
-def : T_RRR_pat <M4_mpyrr_addr, int_hexagon_M4_mpyrr_addr>;
-def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
-def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
-def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
-def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
-def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
-def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
-
-// Complex multiply 32x16
-def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
-def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
-
-def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
-def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
-
-def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
-def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>;
-
-// Complex add/sub halfwords/words
-def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
-def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
-def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
-def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
-
-def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
-def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
-
-// Extract bitfield
-def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
-def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>;
-def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
-def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
-
-// Vector conditional negate
-// Rdd=vcnegh(Rss,Rt)
-def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
-
-// Shift an immediate left by register amount
-def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
-
-// Vector reduce maximum halfwords
-def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
-def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
-
-// Vector reduce maximum words
-def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
-def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
-
-// Vector reduce minimum halfwords
-def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
-def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
-
-// Vector reduce minimum words
-def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
-def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
-
-// Rotate and reduce bytes
-def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
- u2_0ImmPred:$src3),
- (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
-
-// Rotate and reduce bytes with accumulation
-// Rxx+=vrcrotate(Rss,Rt,#u2)
-def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
- IntRegs:$src3, u2_0ImmPred:$src4),
- (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
- IntRegs:$src3, u2_0ImmPred:$src4)>;
-
-// Vector conditional negate
-def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
-
-// Logical xor with xor accumulation
-def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
-
-// ALU64 - Vector min/max byte
-def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
-def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
-
-// Shift and add/sub/and/or
-def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
-def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>;
-def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
-def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
-def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
-def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>;
-def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
-def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
-
-// Split bitfield
-def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
-def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
-
-def: T_RR_pat<S4_parity, int_hexagon_S4_parity>;
-
-def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
-def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
-
-def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>;
-def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
-def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
-
-//*******************************************************************
-// ALU32/ALU
-//*******************************************************************
-
-// ALU32 / ALU / Logical Operations.
-def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
-def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
-
-//*******************************************************************
-// ALU32/PERM
-//*******************************************************************
-
-// Combine Words Into Doublewords.
-def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
-def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
-
-//*******************************************************************
-// ALU32/PRED
-//*******************************************************************
-
-// Compare
-def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
-def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
-def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
-
-// Compare To General Register.
-def: T_Q_RR_pat<C4_cmpneq, int_hexagon_C4_cmpneq>;
-def: T_Q_RR_pat<C4_cmplte, int_hexagon_C4_cmplte>;
-def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
-
-def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
-def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
-
-def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>;
-def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
-
-//*******************************************************************
-// CR
-//*******************************************************************
-
-// CR / Logical Operations On Predicates.
-def: T_Q_QQQ_pat<C4_and_and, int_hexagon_C4_and_and>;
-def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
-def: T_Q_QQQ_pat<C4_and_or, int_hexagon_C4_and_or>;
-def: T_Q_QQQ_pat<C4_and_orn, int_hexagon_C4_and_orn>;
-def: T_Q_QQQ_pat<C4_or_and, int_hexagon_C4_or_and>;
-def: T_Q_QQQ_pat<C4_or_andn, int_hexagon_C4_or_andn>;
-def: T_Q_QQQ_pat<C4_or_or, int_hexagon_C4_or_or>;
-def: T_Q_QQQ_pat<C4_or_orn, int_hexagon_C4_or_orn>;
-
-//*******************************************************************
-// XTYPE/ALU
-//*******************************************************************
-
-// Add And Accumulate.
-
-def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
-def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
-
-
-// XTYPE / ALU / Logical-logical Words.
-def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>;
-def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>;
-def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>;
-def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>;
-def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>;
-def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>;
-def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>;
-def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>;
-def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>;
-def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
-def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
-
-def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
-def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>;
-def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
-
-// Modulo wrap.
-def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
-
-// Arithmetic/Convergent round
-// Rd=[cround|round](Rs,Rt)[:sat]
-// Rd=[cround|round](Rs,#u5)[:sat]
-def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
-def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
-
-def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
-def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
-
-def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
-def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
-
-def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
index 29f67cffcf89..a852394f2160 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -7,9 +7,314 @@
//
//===----------------------------------------------------------------------===//
+def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>;
+def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
+def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
+
+def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>;
+def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
+def: T_PP_pat<A2_minp, int_hexagon_A2_minp>;
+def: T_PP_pat<A2_minup, int_hexagon_A2_minup>;
+def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>;
+def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>;
+
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
+def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
+def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
+def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
+def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
+// Rxx[^]=vpmpyh(Rs,Rt)
+def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
+// Rxx^=pmpyw(Rs,Rt)
+def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
+
+//Rxx^=asr(Rss,Rt)
+def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
+//Rxx^=asl(Rss,Rt)
+def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
+//Rxx^=lsr(Rss,Rt)
+def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
+//Rxx^=lsl(Rss,Rt)
+def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
+
+// Multiply and use upper result
+def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
+def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
+def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
+def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
+def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
+
+def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
+def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
+
+def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
+def: T_P_pat<S2_ct0p, int_hexagon_S2_ct0p>;
+def: T_P_pat<S2_ct1p, int_hexagon_S2_ct1p>;
+
+def: T_Q_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>;
+def: T_Q_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>;
+def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
+
+def : T_Q_PI_pat<A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi>;
+def : T_Q_PI_pat<A4_vcmpbgti, int_hexagon_A4_vcmpbgti>;
+def : T_Q_PI_pat<A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui>;
+def : T_Q_PI_pat<A4_vcmpheqi, int_hexagon_A4_vcmpheqi>;
+def : T_Q_PI_pat<A4_vcmphgti, int_hexagon_A4_vcmphgti>;
+def : T_Q_PI_pat<A4_vcmphgtui, int_hexagon_A4_vcmphgtui>;
+def : T_Q_PI_pat<A4_vcmpweqi, int_hexagon_A4_vcmpweqi>;
+def : T_Q_PI_pat<A4_vcmpwgti, int_hexagon_A4_vcmpwgti>;
+def : T_Q_PI_pat<A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui>;
+def : T_Q_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
+
+def : T_Q_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>;
+def : T_Q_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>;
+def : T_Q_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>;
+def : T_Q_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>;
+def : T_Q_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>;
+def : T_Q_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>;
+
+def : T_Q_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>;
+def : T_Q_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>;
+def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
+
+def : T_Q_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>;
+def : T_Q_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>;
+def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
+
+def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
+def : T_Q_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
+
+def : T_RRR_pat <M4_mpyrr_addr, int_hexagon_M4_mpyrr_addr>;
+def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
+def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
+def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
+def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
+def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
+def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
+
+// Complex multiply 32x16
+def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
+def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
+
+def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
+def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
+
+def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
+def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>;
+
+// Complex add/sub halfwords/words
+def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
+def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
+def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
+def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
+
+def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
+def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
+
+// Extract bitfield
+def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
+def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>;
+def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
+def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
+
+// Shift an immediate left by register amount
+def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
+
+// Vector reduce maximum halfwords
+def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
+def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
+
+// Vector reduce maximum words
+def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
+def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
+
+// Vector reduce minimum halfwords
+def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
+def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
+
+// Vector reduce minimum words
+def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
+def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
+
+// Rotate and reduce bytes
+def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
+ u2_0ImmPred:$src3),
+ (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3, u2_0ImmPred:$src4),
+ (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3, u2_0ImmPred:$src4)>;
+
+// Vector conditional negate
+def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
+
+// Logical xor with xor accumulation
+def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
+
+// ALU64 - Vector min/max byte
+def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
+def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
+
+// Shift and add/sub/and/or
+def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
+def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>;
+def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
+def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
+def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
+def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>;
+def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
+def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
+
+// Split bitfield
+def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
+def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
+
+def: T_RR_pat<S4_parity, int_hexagon_S4_parity>;
+
+def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
+def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
+
+def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>;
+def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
+def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
+
+//*******************************************************************
+// ALU32/ALU
+//*******************************************************************
+
+// ALU32 / ALU / Logical Operations.
+def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
+def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
+
+//*******************************************************************
+// ALU32/PERM
+//*******************************************************************
+
+// Combine Words Into Doublewords.
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
+
+//*******************************************************************
+// ALU32/PRED
+//*******************************************************************
+
+// Compare
+def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
+
+// Compare To General Register.
+def: T_Q_RR_pat<C4_cmpneq, int_hexagon_C4_cmpneq>;
+def: T_Q_RR_pat<C4_cmplte, int_hexagon_C4_cmplte>;
+def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
+
+def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
+def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
+
+def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>;
+def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
+
+//*******************************************************************
+// CR
+//*******************************************************************
+
+// CR / Logical Operations On Predicates.
+def: T_Q_QQQ_pat<C4_and_and, int_hexagon_C4_and_and>;
+def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
+def: T_Q_QQQ_pat<C4_and_or, int_hexagon_C4_and_or>;
+def: T_Q_QQQ_pat<C4_and_orn, int_hexagon_C4_and_orn>;
+def: T_Q_QQQ_pat<C4_or_and, int_hexagon_C4_or_and>;
+def: T_Q_QQQ_pat<C4_or_andn, int_hexagon_C4_or_andn>;
+def: T_Q_QQQ_pat<C4_or_or, int_hexagon_C4_or_or>;
+def: T_Q_QQQ_pat<C4_or_orn, int_hexagon_C4_or_orn>;
+
+//*******************************************************************
+// XTYPE/ALU
+//*******************************************************************
+
+// Add And Accumulate.
+
+def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
+def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
+
+
+// XTYPE / ALU / Logical-logical Words.
+def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>;
+def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>;
+def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>;
+def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>;
+def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>;
+def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>;
+def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>;
+def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>;
+def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>;
+def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
+def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
+
+def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
+def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>;
+def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
+
+// Modulo wrap.
+def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
+
+// Arithmetic/Convergent round
+// Rd=[cround|round](Rs,Rt)[:sat]
+// Rd=[cround|round](Rs,#u5)[:sat]
+def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
+def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
+
+def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
+def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
+
+def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
+def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
+
+def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
+
//Rdd[+]=vrmpybsu(Rss,Rtt)
//Rdd[+]=vrmpybuu(Rss,Rtt)
-let Predicates = [HasV5] in {
def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
@@ -31,7 +336,6 @@ def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;
// Rd=vaddhub(Rss,Rtt):sat
def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
-}
def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index f9ed03909233..985f41f3a7d9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1970,12 +1970,13 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// Get the location that may be stored across the loop. Since the access
// is strided positively through memory, we say that the modified location
// starts at the pointer and has infinite size.
- LocationSize AccessSize = MemoryLocation::UnknownSize;
+ LocationSize AccessSize = LocationSize::unknown();
// If the loop iterates a fixed number of times, we can refine the access
// size to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
+ AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
+ StoreSize);
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -2360,7 +2361,7 @@ bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
return DT->dominates(BB, EB);
};
- if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
+ if (!all_of(ExitBlocks, DominatedByBB))
return false;
bool MadeChange = false;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 74c550ce8226..908ce24136c7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -105,6 +105,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
default:
if (!ResourcesModel->canReserveResources(*SU->getInstr()))
return false;
+ break;
case TargetOpcode::EXTRACT_SUBREG:
case TargetOpcode::INSERT_SUBREG:
case TargetOpcode::SUBREG_TO_REG:
@@ -215,8 +216,7 @@ void VLIWMachineScheduler::schedule() {
++su) if (SUnits[su].getDepth() > maxD) maxD =
SUnits[su].getDepth();
dbgs() << "Max Depth " << maxD << "\n";);
- LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
- .dumpAll(this));
+ LLVM_DEBUG(dump());
initQueues(TopRoots, BotRoots);
@@ -489,7 +489,7 @@ void ConvergingVLIWScheduler::traceCandidate(const char *Label,
else
dbgs() << " ";
dbgs() << "cost(" << Cost << ")\t";
- SU->dump(DAG);
+ DAG->dumpNode(*SU);
}
// Very detailed queue dump, to be used with higher verbosity levels.
@@ -982,7 +982,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
<< " Scheduling instruction in cycle "
<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
<< reportPackets() << ")\n";
- SU->dump(DAG));
+ DAG->dumpNode(*SU));
return SU;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 29c044b3b729..c3a5bd5d57bf 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -502,7 +502,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
MIB.add(ImmOp);
OpStart = 4;
Changed = true;
- } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
+ } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset &&
+ OldMI->getOperand(2).isImm()) {
short NewOpCode = HII->changeAddrMode_io_abs(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode))
@@ -518,17 +519,19 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
- } else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) {
- short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
- assert(NewOpCode >= 0 && "Invalid New opcode\n");
- MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.add(OldMI->getOperand(0));
- MIB.add(OldMI->getOperand(1));
- MIB.add(ImmOp);
- OpStart = 4;
- Changed = true;
- LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
- LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+ } else if (ImmOpNum == 2) {
+ if (OldMI->getOperand(3).isImm() && OldMI->getOperand(3).getImm() == 0) {
+ short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
+ assert(NewOpCode >= 0 && "Invalid New opcode\n");
+ MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
+ MIB.add(OldMI->getOperand(0));
+ MIB.add(OldMI->getOperand(1));
+ MIB.add(ImmOp);
+ OpStart = 4;
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
+ LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+ }
}
if (Changed)
@@ -758,11 +761,13 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
// This could happen, for example, when DefR = R4, but the used
// register is D2.
+ // Change UseMI if replacement is possible. If any replacement failed,
+ // or wasn't attempted, make sure to keep the TFR.
+ bool Xformed = false;
if (UseMOnum >= 0 && InstrEvalResult[UseMI])
- // Change UseMI if replacement is possible.
- Changed |= xformUseMI(MI, UseMI, UseN, UseMOnum);
- else
- KeepTfr = true;
+ Xformed = xformUseMI(MI, UseMI, UseN, UseMOnum);
+ Changed |= Xformed;
+ KeepTfr |= !Xformed;
}
if (!KeepTfr)
Deleted.insert(MI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 384fda4ce39a..89177564057e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -177,6 +177,11 @@ def UDEC32: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32);
}]>;
+class Subi<int From>: SDNodeXForm<imm,
+ "int32_t V = " # From # " - N->getSExtValue();" #
+ "return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);"
+>;
+
def Log2_32: SDNodeXForm<imm, [{
uint32_t V = N->getZExtValue();
return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
@@ -218,6 +223,8 @@ def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
+def ToAext64: OutPatFrag<(ops node:$Rs),
+ (REG_SEQUENCE DoubleRegs, (i32 (IMPLICIT_DEF)), isub_hi, (i32 $Rs), isub_lo)>;
def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt),
(REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>;
@@ -246,6 +253,9 @@ def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
def Sext64: PatLeaf<(i64 Usxtw:$Rs)>;
+def azext: PatFrags<(ops node:$Rs), [(zext node:$Rs), (anyext node:$Rs)]>;
+def asext: PatFrags<(ops node:$Rs), [(sext node:$Rs), (anyext node:$Rs)]>;
+
def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
(PS_fi (i32 AddrFI:$Rs), imm:$off)>;
@@ -257,6 +267,23 @@ class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;
class Not2<PatFrag P>
: PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
+// If there is a constant operand that feeds the and/or instruction,
+// do not generate the compound instructions.
+// It is not always profitable, as some times we end up with a transfer.
+// Check the below example.
+// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra)
+// Instead this is preferable.
+// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra)
+class Su_ni1<PatFrag Op>
+ : PatFrag<Op.Operands, !head(Op.Fragments), [{
+ if (hasOneUse(N)){
+ // Check if Op1 is an immediate operand.
+ SDValue Op1 = N->getOperand(1);
+ return !dyn_cast<ConstantSDNode>(Op1);
+ }
+ return false;}],
+ Op.OperandTransform>;
+
class Su<PatFrag Op>
: PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
Op.OperandTransform>;
@@ -348,38 +375,34 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
// --(2) Type cast -------------------------------------------------------
//
-let Predicates = [HasV5] in {
- def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
- def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
+def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
+def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
- def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>;
- def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>;
- def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>;
- def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>;
+def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>;
+def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>;
+def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>;
+def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>;
- def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>;
- def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>;
- def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>;
- def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>;
+def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>;
+def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>;
+def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>;
+def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>;
- def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>;
- def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>;
- def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>;
- def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>;
+def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>;
+def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>;
+def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>;
+def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>;
- def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
- def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
- def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
- def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
-}
+def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
+def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
+def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
+def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-let Predicates = [HasV5] in {
- def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
- def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
- def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
- def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
-}
+def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
+def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
+def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
+def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
@@ -403,52 +426,48 @@ def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>;
def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>;
def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>;
-def: Pat<(i64 (sext I1:$Pu)),
- (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
- (C2_muxii PredRegs:$Pu, -1, 0))>;
-
-def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
-def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
-def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
-def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
-def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
-def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
-def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
-
def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>;
def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>;
def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>;
-def: Pat<(i1 (trunc I64:$Rs)), (C2_tfrrp (LoReg $Rs))>;
+def: Pat<(i1 (trunc I32:$Rs)), (S2_tstbit_i I32:$Rs, 0)>;
+def: Pat<(i1 (trunc I64:$Rs)), (S2_tstbit_i (LoReg $Rs), 0)>;
let AddedComplexity = 20 in {
def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>;
def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>;
}
-def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+// Extensions from i1 or vectors of i1.
+def: Pat<(i32 (azext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
+def: Pat<(i64 (azext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
+def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
+ (C2_muxii PredRegs:$Pu, -1, 0))>;
+
+def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
+def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
+def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
+def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
+def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
def Vsplatpi: OutPatFrag<(ops node:$V),
(Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;
-def: Pat<(v8i8 (zext V8I1:$Pu)),
- (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
-def: Pat<(v4i16 (zext V4I1:$Pu)),
- (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
-def: Pat<(v2i32 (zext V2I1:$Pu)),
- (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
-def: Pat<(v4i8 (zext V4I1:$Pu)),
- (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
-def: Pat<(v2i16 (zext V2I1:$Pu)),
+def: Pat<(v2i16 (azext V2I1:$Pu)),
(A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
+def: Pat<(v2i32 (azext V2I1:$Pu)),
+ (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
+def: Pat<(v4i8 (azext V4I1:$Pu)),
+ (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
+def: Pat<(v4i16 (azext V4I1:$Pu)),
+ (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
+def: Pat<(v8i8 (azext V8I1:$Pu)),
+ (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
-def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
-def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (azext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (azext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
@@ -582,31 +601,29 @@ def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
-let Predicates = [HasV5] in {
- def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
-
- def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
-}
+def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
+
+def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
// Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds.
@@ -729,32 +746,28 @@ class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>;
class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;
-let Predicates = [HasV5] in {
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>;
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>;
- def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>;
+def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
- def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
-}
+def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
+def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
-let Predicates = [HasV5] in {
- def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
- def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
- def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
- def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
- def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
- def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
-}
+def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
// --(6) Select ----------------------------------------------------------
@@ -784,32 +797,30 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
-let Predicates = [HasV5] in {
- def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
- (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
- def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
- (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
- def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
- (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
- def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
- (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
- (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
+def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
+ (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
+def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
+ (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
+def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
+ (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
+def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
+ (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
+ (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
- def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
- (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
- def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
- (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
+def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
+ (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
+def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
+ (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
- def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
- (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
- def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
- (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
-}
+def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
+ (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
+def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
+ (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
- (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
+ (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt),
- (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
+ (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
@@ -872,7 +883,7 @@ let AddedComplexity = 200 in {
defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
}
-let AddedComplexity = 100, Predicates = [HasV5] in {
+let AddedComplexity = 100 in {
defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
@@ -989,15 +1000,95 @@ def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>;
def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>;
def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>;
-let Predicates = [HasV60] in {
+// Funnel shifts.
+def IsMul8_U3: PatLeaf<(i32 imm), [{
+ uint64_t V = N->getZExtValue();
+ return V % 8 == 0 && isUInt<3>(V / 8);
+}]>;
+
+def Divu8: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i32);
+}]>;
+
+// Funnel shift-left.
+def FShl32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+ (HiReg (S2_asl_i_p (Combinew $Rs, $Rt), $S))>;
+def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+ (HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>;
+
+def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+ (S2_lsr_i_p_or (S2_asl_i_p $Rt, $S), $Rs, (Subi<64> $S))>;
+def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+ (S2_lsr_r_p_or (S2_asl_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
+
+// Combined SDNodeXForm: (Divu8 (Subi<64> $S))
+def Divu64_8: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((64 - N->getSExtValue()) / 8,
+ SDLoc(N), MVT::i32);
+}]>;
+
+// Special cases:
+let AddedComplexity = 100 in {
+ def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)),
+ (A2_combine_hl I32:$Rs, I32:$Rt)>;
+ def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S),
+ (S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>;
+}
+
+let Predicates = [HasV60], AddedComplexity = 50 in {
def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>;
def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>;
}
+let AddedComplexity = 30 in {
+ def: Pat<(rotl I32:$Rs, u5_0ImmPred:$S), (FShl32i $Rs, $Rs, imm:$S)>;
+ def: Pat<(rotl I64:$Rs, u6_0ImmPred:$S), (FShl64i $Rs, $Rs, imm:$S)>;
+ def: Pat<(fshl I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShl32i $Rs, $Rt, imm:$S)>;
+ def: Pat<(fshl I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShl64i $Rs, $Rt, imm:$S)>;
+}
+def: Pat<(rotl I32:$Rs, I32:$Rt), (FShl32r $Rs, $Rs, $Rt)>;
+def: Pat<(rotl I64:$Rs, I32:$Rt), (FShl64r $Rs, $Rs, $Rt)>;
+def: Pat<(fshl I32:$Rs, I32:$Rt, I32:$Ru), (FShl32r $Rs, $Rt, $Ru)>;
+def: Pat<(fshl I64:$Rs, I64:$Rt, I32:$Ru), (FShl64r $Rs, $Rt, $Ru)>;
+
+// Funnel shift-right.
+def FShr32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+ (LoReg (S2_lsr_i_p (Combinew $Rs, $Rt), $S))>;
+def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+ (LoReg (S2_lsr_r_p (Combinew $Rs, $Rt), $Ru))>;
+
+def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+ (S2_asl_i_p_or (S2_lsr_i_p $Rt, $S), $Rs, (Subi<64> $S))>;
+def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+ (S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
+
+// Special cases:
+let AddedComplexity = 100 in {
+ def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)),
+ (A2_combine_hl I32:$Rs, I32:$Rt)>;
+ def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S),
+ (S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>;
+}
+
+let Predicates = [HasV60], AddedComplexity = 50 in {
+ def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (S6_rol_i_r I32:$Rs, (Subi<32> $S))>;
+ def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (S6_rol_i_p I64:$Rs, (Subi<64> $S))>;
+}
+let AddedComplexity = 30 in {
+ def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (FShr32i $Rs, $Rs, imm:$S)>;
+ def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (FShr64i $Rs, $Rs, imm:$S)>;
+ def: Pat<(fshr I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShr32i $Rs, $Rt, imm:$S)>;
+ def: Pat<(fshr I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShr64i $Rs, $Rt, imm:$S)>;
+}
+def: Pat<(rotr I32:$Rs, I32:$Rt), (FShr32r $Rs, $Rs, $Rt)>;
+def: Pat<(rotr I64:$Rs, I32:$Rt), (FShr64r $Rs, $Rs, $Rt)>;
+def: Pat<(fshr I32:$Rs, I32:$Rt, I32:$Ru), (FShr32r $Rs, $Rt, $Ru)>;
+def: Pat<(fshr I64:$Rs, I64:$Rt, I32:$Ru), (FShr64r $Rs, $Rt, $Ru)>;
+
def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
(S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
- (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>;
+ (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>;
// Prefer S2_addasl_rrri over S2_asl_i_r_acc.
let AddedComplexity = 120 in
@@ -1119,14 +1210,6 @@ def: Pat<(or (or (or (shl (Zext64 (and I32:$b, (i32 65535))), (i32 16)),
(Combinew (A2_combine_ll I32:$d, I32:$c),
(A2_combine_ll I32:$b, I32:$a))>;
-def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))),
- (i32 8)),
- (i32 (zextloadi8 (add I32:$b, 2)))),
- (i32 16)),
- (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
- (zextloadi8 I32:$b)),
- (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
-
let AddedComplexity = 200 in {
def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))),
(A2_combine_ll I32:$Rt, I32:$Rs)>;
@@ -1172,6 +1255,19 @@ def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
(S2_asl_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S),
+ (LoReg (S2_asr_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVASL V2I16:$Rs, u4_0ImmPred:$S),
+ (LoReg (S2_asl_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVLSR V2I16:$Rs, u4_0ImmPred:$S),
+ (LoReg (S2_lsr_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVASR V2I16:$Rs, I32:$Rt),
+ (LoReg (S2_asr_i_vh (ToAext64 $Rs), I32:$Rt))>;
+def: Pat<(HexagonVASL V2I16:$Rs, I32:$Rt),
+ (LoReg (S2_asl_i_vh (ToAext64 $Rs), I32:$Rt))>;
+def: Pat<(HexagonVLSR V2I16:$Rs, I32:$Rt),
+ (LoReg (S2_lsr_i_vh (ToAext64 $Rs), I32:$Rt))>;
+
// --(9) Arithmetic/bitwise ----------------------------------------------
//
@@ -1182,17 +1278,15 @@ def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>;
-let Predicates = [HasV5] in {
- def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
- def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
+def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
+def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
- def: Pat<(fabs F64:$Rs),
- (Combinew (S2_clrbit_i (HiReg $Rs), 31),
- (i32 (LoReg $Rs)))>;
- def: Pat<(fneg F64:$Rs),
- (Combinew (S2_togglebit_i (HiReg $Rs), 31),
- (i32 (LoReg $Rs)))>;
-}
+def: Pat<(fabs F64:$Rs),
+ (Combinew (S2_clrbit_i (HiReg $Rs), 31),
+ (i32 (LoReg $Rs)))>;
+def: Pat<(fneg F64:$Rs),
+ (Combinew (S2_togglebit_i (HiReg $Rs), 31),
+ (i32 (LoReg $Rs)))>;
def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>;
def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
@@ -1258,12 +1352,15 @@ def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>;
def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>;
def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>;
-let Predicates = [HasV5] in {
- def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
- def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
- def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
- def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>;
- def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>;
+def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
+def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
+def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
+def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>;
+def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>;
+
+let Predicates = [HasV66] in {
+ def: OpR_RR_pat<F2_dfadd, pf2<fadd>, f64, F64>;
+ def: OpR_RR_pat<F2_dfsub, pf2<fsub>, f64, F64>;
}
// In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add,
@@ -1272,6 +1369,8 @@ let AddedComplexity = 10 in {
def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>;
def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>;
def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>;
+ let Predicates = [HasV66] in
+ def: AccRRR_pat<M2_mnaci, Sub, Su<Mul>, I32, I32, I32>;
}
def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>;
@@ -1344,16 +1443,16 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8),
def: Pat<(add Sext64:$Rs, I64:$Rt),
(A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;
-def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32, I32>;
-def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32, I32>;
-def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
-def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32, I32>;
-def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32, I32>;
-def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
-def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32, I32>;
-def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32, I32>;
-def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
-def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
+def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>;
+def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>;
+def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
// For dags like (or (and (not _), _), (shl _, _)) where the "or" with
// one argument matches the patterns below, and with the other argument
@@ -1497,14 +1596,12 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
(M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
-let Predicates = [HasV5] in {
- def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
- (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
- def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
- (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
- def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
- (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-}
+def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
+ (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
+def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
+ (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
+def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
+ (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
@@ -1515,9 +1612,9 @@ def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
// Add/subtract two v4i8: Hexagon does not have an insn for this one, so
// we use the double add v8i8, and use only the low part of the result.
def: Pat<(add V4I8:$Rs, V4I8:$Rt),
- (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>;
+ (LoReg (A2_vaddub (ToAext64 $Rs), (ToAext64 $Rt)))>;
def: Pat<(sub V4I8:$Rs, V4I8:$Rt),
- (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>;
+ (LoReg (A2_vsubub (ToAext64 $Rs), (ToAext64 $Rt)))>;
// Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two
// half-words, and saturates the result to a 32-bit value, except the
@@ -1531,14 +1628,12 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
// Multiplies two v4i8 vectors.
def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
- (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>,
- Requires<[HasV5]>;
+ (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>;
// Multiplies two v8i8 vectors.
def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
(Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
- (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>,
- Requires<[HasV5]>;
+ (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>;
// --(10) Bit ------------------------------------------------------------
@@ -1868,10 +1963,10 @@ let AddedComplexity = 20 in {
}
let AddedComplexity = 30 in {
- defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
- defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
- defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
- defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
+ defm: Loadxim_pat<extloadi1, i64, ToAext64, anyimm0, L2_loadrub_io>;
+ defm: Loadxim_pat<extloadi8, i64, ToAext64, anyimm0, L2_loadrub_io>;
+ defm: Loadxim_pat<extloadi16, i64, ToAext64, anyimm1, L2_loadruh_io>;
+ defm: Loadxim_pat<extloadi32, i64, ToAext64, anyimm2, L2_loadri_io>;
defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
@@ -1906,13 +2001,13 @@ let AddedComplexity = 60 in {
def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>;
def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
- def: Loadxum_pat<extloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
+ def: Loadxum_pat<extloadi8, i64, anyimm0, ToAext64, L4_loadrub_ur>;
def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>;
def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>;
- def: Loadxum_pat<extloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>;
+ def: Loadxum_pat<extloadi16, i64, anyimm1, ToAext64, L4_loadruh_ur>;
def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>;
def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
- def: Loadxum_pat<extloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
+ def: Loadxum_pat<extloadi32, i64, anyimm2, ToAext64, L4_loadri_ur>;
}
let AddedComplexity = 40 in {
@@ -1952,25 +2047,25 @@ let AddedComplexity = 20 in {
}
let AddedComplexity = 40 in {
- def: Loadxrm_shl_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
+ def: Loadxrm_shl_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
- def: Loadxrm_shl_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
+ def: Loadxrm_shl_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>;
def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
- def: Loadxrm_shl_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
+ def: Loadxrm_shl_pat<extloadi32, i64, ToAext64, L4_loadri_rr>;
def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
}
let AddedComplexity = 20 in {
- def: Loadxrm_add_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
+ def: Loadxrm_add_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
- def: Loadxrm_add_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
+ def: Loadxrm_add_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>;
def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
- def: Loadxrm_add_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
+ def: Loadxrm_add_pat<extloadi32, i64, ToAext64, L4_loadri_rr>;
def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
}
@@ -2002,13 +2097,13 @@ let AddedComplexity = 60 in {
}
let AddedComplexity = 30 in {
- def: Loadam_pat<extloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>;
+ def: Loadam_pat<extloadi8, i64, anyimm0, ToAext64, PS_loadrubabs>;
def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>;
def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>;
- def: Loadam_pat<extloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>;
+ def: Loadam_pat<extloadi16, i64, anyimm1, ToAext64, PS_loadruhabs>;
def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>;
def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>;
- def: Loadam_pat<extloadi32, i64, anyimm2, ToZext64, PS_loadriabs>;
+ def: Loadam_pat<extloadi32, i64, anyimm2, ToAext64, PS_loadriabs>;
def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>;
def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>;
@@ -2044,13 +2139,13 @@ let AddedComplexity = 100 in {
}
let AddedComplexity = 70 in {
- def: Loadam_pat<extloadi8, i64, addrgp, ToZext64, L2_loadrubgp>;
+ def: Loadam_pat<extloadi8, i64, addrgp, ToAext64, L2_loadrubgp>;
def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>;
def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>;
- def: Loadam_pat<extloadi16, i64, addrgp, ToZext64, L2_loadruhgp>;
+ def: Loadam_pat<extloadi16, i64, addrgp, ToAext64, L2_loadruhgp>;
def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>;
def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>;
- def: Loadam_pat<extloadi32, i64, addrgp, ToZext64, L2_loadrigp>;
+ def: Loadam_pat<extloadi32, i64, addrgp, ToAext64, L2_loadrigp>;
def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>;
def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>;
@@ -2306,16 +2401,26 @@ let AddedComplexity = 140 in {
// GP-relative address
let AddedComplexity = 120 in {
- def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
- def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
- def: Storea_pat<store, I32, addrgp, S2_storerigp>;
- def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
- def: Storea_pat<store, F32, addrgp, S2_storerigp>;
- def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
- def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
- def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
- def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
- def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+ def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
+ def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
+ def: Storea_pat<store, I32, addrgp, S2_storerigp>;
+ def: Storea_pat<store, V4I8, addrgp, S2_storerigp>;
+ def: Storea_pat<store, V2I16, addrgp, S2_storerigp>;
+ def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
+ def: Storea_pat<store, V8I8, addrgp, S2_storerdgp>;
+ def: Storea_pat<store, V4I16, addrgp, S2_storerdgp>;
+ def: Storea_pat<store, V2I32, addrgp, S2_storerdgp>;
+ def: Storea_pat<store, F32, addrgp, S2_storerigp>;
+ def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
+ def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
+ def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
+ def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
+ def: Storea_pat<AtomSt<atomic_store_32>, V4I8, addrgp, S2_storerigp>;
+ def: Storea_pat<AtomSt<atomic_store_32>, V2I16, addrgp, S2_storerigp>;
+ def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V8I8, addrgp, S2_storerdgp>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V4I16, addrgp, S2_storerdgp>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V2I32, addrgp, S2_storerdgp>;
def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>;
def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>;
@@ -2325,16 +2430,26 @@ let AddedComplexity = 120 in {
// Absolute address
let AddedComplexity = 110 in {
- def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>;
- def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>;
- def: Storea_pat<store, I32, anyimm2, PS_storeriabs>;
- def: Storea_pat<store, I64, anyimm3, PS_storerdabs>;
- def: Storea_pat<store, F32, anyimm2, PS_storeriabs>;
- def: Storea_pat<store, F64, anyimm3, PS_storerdabs>;
- def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>;
- def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>;
- def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>;
- def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>;
+ def: Storea_pat<store, I32, anyimm2, PS_storeriabs>;
+ def: Storea_pat<store, V4I8, anyimm2, PS_storeriabs>;
+ def: Storea_pat<store, V2I16, anyimm2, PS_storeriabs>;
+ def: Storea_pat<store, I64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<store, V8I8, anyimm3, PS_storerdabs>;
+ def: Storea_pat<store, V4I16, anyimm3, PS_storerdabs>;
+ def: Storea_pat<store, V2I32, anyimm3, PS_storerdabs>;
+ def: Storea_pat<store, F32, anyimm2, PS_storeriabs>;
+ def: Storea_pat<store, F64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>;
+ def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>;
+ def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>;
+ def: Storea_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, PS_storeriabs>;
+ def: Storea_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, PS_storeriabs>;
+ def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, PS_storerdabs>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, PS_storerdabs>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, PS_storerdabs>;
def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>;
def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>;
@@ -2344,12 +2459,17 @@ let AddedComplexity = 110 in {
// Reg<<S + Imm
let AddedComplexity = 100 in {
- def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>;
- def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>;
- def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>;
- def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>;
- def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>;
- def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>;
+ def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>;
+ def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>;
+ def: Storexu_shl_pat<store, V4I8, anyimm2, S4_storeri_ur>;
+ def: Storexu_shl_pat<store, V2I16, anyimm2, S4_storeri_ur>;
+ def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<store, V8I8, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<store, V4I16, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<store, V2I32, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>;
+ def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>;
def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)),
(S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>;
@@ -2357,12 +2477,17 @@ let AddedComplexity = 100 in {
// Reg<<S + Reg
let AddedComplexity = 90 in {
- def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>;
- def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>;
- def: Storexr_shl_pat<store, I32, S4_storeri_rr>;
- def: Storexr_shl_pat<store, I64, S4_storerd_rr>;
- def: Storexr_shl_pat<store, F32, S4_storeri_rr>;
- def: Storexr_shl_pat<store, F64, S4_storerd_rr>;
+ def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>;
+ def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>;
+ def: Storexr_shl_pat<store, I32, S4_storeri_rr>;
+ def: Storexr_shl_pat<store, V4I8, S4_storeri_rr>;
+ def: Storexr_shl_pat<store, V2I16, S4_storeri_rr>;
+ def: Storexr_shl_pat<store, I64, S4_storerd_rr>;
+ def: Storexr_shl_pat<store, V8I8, S4_storerd_rr>;
+ def: Storexr_shl_pat<store, V4I16, S4_storerd_rr>;
+ def: Storexr_shl_pat<store, V2I32, S4_storerd_rr>;
+ def: Storexr_shl_pat<store, F32, S4_storeri_rr>;
+ def: Storexr_shl_pat<store, F64, S4_storerd_rr>;
def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)),
(S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>;
@@ -2414,20 +2539,30 @@ let AddedComplexity = 70 in {
// Fi+Imm, Fi, store-register
let AddedComplexity = 60 in {
- defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>;
- defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>;
- defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>;
- defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>;
- defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>;
- defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>;
+ defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>;
+ defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>;
+ defm: Storexi_fi_add_pat<store, V4I8, anyimm, S2_storeri_io>;
+ defm: Storexi_fi_add_pat<store, V2I16, anyimm, S2_storeri_io>;
+ defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<store, V8I8, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<store, V4I16, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<store, V2I32, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>;
+ defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>;
defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>;
- def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>;
- def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>;
- def: Storexi_fi_pat<store, I32, S2_storeri_io>;
- def: Storexi_fi_pat<store, I64, S2_storerd_io>;
- def: Storexi_fi_pat<store, F32, S2_storeri_io>;
- def: Storexi_fi_pat<store, F64, S2_storerd_io>;
+ def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>;
+ def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>;
+ def: Storexi_fi_pat<store, I32, S2_storeri_io>;
+ def: Storexi_fi_pat<store, V4I8, S2_storeri_io>;
+ def: Storexi_fi_pat<store, V2I16, S2_storeri_io>;
+ def: Storexi_fi_pat<store, I64, S2_storerd_io>;
+ def: Storexi_fi_pat<store, V8I8, S2_storerd_io>;
+ def: Storexi_fi_pat<store, V4I16, S2_storerd_io>;
+ def: Storexi_fi_pat<store, V2I32, S2_storerd_io>;
+ def: Storexi_fi_pat<store, F32, S2_storeri_io>;
+ def: Storexi_fi_pat<store, F64, S2_storerd_io>;
def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>;
}
@@ -2452,32 +2587,47 @@ let AddedComplexity = 50 in {
// Reg+Imm, store-register
let AddedComplexity = 40 in {
- defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>;
- defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>;
- defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>;
- defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>;
+ defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>;
+ defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<store, V4I8, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<store, V2I16, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<store, V8I8, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<store, V4I16, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<store, V2I32, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>;
defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>;
defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>;
defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>;
defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>;
- defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>;
- defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
- defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, S2_storerd_io>;
}
// Reg+Reg
let AddedComplexity = 30 in {
- def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>;
- def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>;
- def: Storexr_add_pat<store, I32, S4_storeri_rr>;
- def: Storexr_add_pat<store, I64, S4_storerd_rr>;
- def: Storexr_add_pat<store, F32, S4_storeri_rr>;
- def: Storexr_add_pat<store, F64, S4_storerd_rr>;
+ def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>;
+ def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>;
+ def: Storexr_add_pat<store, I32, S4_storeri_rr>;
+ def: Storexr_add_pat<store, V4I8, S4_storeri_rr>;
+ def: Storexr_add_pat<store, V2I16, S4_storeri_rr>;
+ def: Storexr_add_pat<store, I64, S4_storerd_rr>;
+ def: Storexr_add_pat<store, V8I8, S4_storerd_rr>;
+ def: Storexr_add_pat<store, V4I16, S4_storerd_rr>;
+ def: Storexr_add_pat<store, V2I32, S4_storerd_rr>;
+ def: Storexr_add_pat<store, F32, S4_storeri_rr>;
+ def: Storexr_add_pat<store, F64, S4_storerd_rr>;
def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)),
(S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>;
@@ -2496,22 +2646,32 @@ let AddedComplexity = 20 in {
// Reg, store-register
let AddedComplexity = 10 in {
- def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>;
- def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>;
- def: Storexi_base_pat<store, I32, S2_storeri_io>;
- def: Storexi_base_pat<store, I64, S2_storerd_io>;
- def: Storexi_base_pat<store, F32, S2_storeri_io>;
- def: Storexi_base_pat<store, F64, S2_storerd_io>;
+ def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>;
+ def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>;
+ def: Storexi_base_pat<store, I32, S2_storeri_io>;
+ def: Storexi_base_pat<store, V4I8, S2_storeri_io>;
+ def: Storexi_base_pat<store, V2I16, S2_storeri_io>;
+ def: Storexi_base_pat<store, I64, S2_storerd_io>;
+ def: Storexi_base_pat<store, V8I8, S2_storerd_io>;
+ def: Storexi_base_pat<store, V4I16, S2_storerd_io>;
+ def: Storexi_base_pat<store, V2I32, S2_storerd_io>;
+ def: Storexi_base_pat<store, F32, S2_storeri_io>;
+ def: Storexi_base_pat<store, F64, S2_storerd_io>;
def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_32>, V4I8, S2_storeri_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_32>, V2I16, S2_storeri_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_64>, V8I8, S2_storerd_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_64>, V4I16, S2_storerd_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_64>, V2I32, S2_storerd_io>;
}
@@ -2922,6 +3082,8 @@ def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
def: Pat<(HexagonBARRIER), (Y2_barrier)>;
+def: Pat<(trap), (J2_trap0 (i32 0))>;
+
// Read cycle counter.
def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
index fd7466349ecd..b9748c7e189c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -208,6 +208,7 @@ class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops,
let isPredicable = 0; // !if(isPred, 0, 1);
let isPredicated = 0; // isPred;
let isPredicatedFalse = isFalse;
+ let Itinerary = itin;
}
def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii), J2_call.Itinerary>;
@@ -525,11 +526,11 @@ let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
def NAME#_pci : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_4403ca65>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_e93a3d71>;
def NAME#_pcr : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_2fc0c436>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_44d3da28>;
}
}
@@ -546,11 +547,11 @@ let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
def NAME#_pci : STInst<(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_9fdb5406>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_e86aa961>;
def NAME#_pcr : STInst<(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_f86c328a>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_da97ee82>;
}
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2e11f875c0f9..9b8f4e07376f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -118,18 +118,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool HasEHReturn = MF->getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
- switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
- case Hexagon::ArchEnum::V4:
- case Hexagon::ArchEnum::V5:
- case Hexagon::ArchEnum::V55:
- case Hexagon::ArchEnum::V60:
- case Hexagon::ArchEnum::V62:
- case Hexagon::ArchEnum::V65:
- return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
- }
-
- llvm_unreachable("Callee saved registers requested for unknown architecture "
- "version");
+ return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
}
@@ -323,6 +312,7 @@ unsigned HexagonRegisterInfo::getHexagonSubRegIndex(
static const unsigned ISub[] = { Hexagon::isub_lo, Hexagon::isub_hi };
static const unsigned VSub[] = { Hexagon::vsub_lo, Hexagon::vsub_hi };
+ static const unsigned WSub[] = { Hexagon::wsub_lo, Hexagon::wsub_hi };
switch (RC.getID()) {
case Hexagon::CtrRegs64RegClassID:
@@ -330,6 +320,8 @@ unsigned HexagonRegisterInfo::getHexagonSubRegIndex(
return ISub[GenIdx];
case Hexagon::HvxWRRegClassID:
return VSub[GenIdx];
+ case Hexagon::HvxVQRRegClassID:
+ return WSub[GenIdx];
}
if (const TargetRegisterClass *SuperRC = *RC.getSuperClasses())
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index 497dc45236b1..3e7b63a462f0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -39,8 +39,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 1fe1ef4ac572..da90911e2c05 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -82,13 +82,14 @@ let Namespace = "Hexagon" in {
def isub_hi : SubRegIndex<32, 32>;
def vsub_lo : SubRegIndex<512>;
def vsub_hi : SubRegIndex<512, 512>;
+ def wsub_lo : SubRegIndex<1024>;
+ def wsub_hi : SubRegIndex<1024, 1024>;
def subreg_overflow : SubRegIndex<1, 0>;
// Integer registers.
foreach i = 0-28 in {
def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>;
}
-
def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>;
def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>;
def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>;
@@ -206,6 +207,18 @@ let Namespace = "Hexagon" in {
def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
}
+ // Aliases of the V* registers used to hold quad vec values.
+ let SubRegIndices = [wsub_lo, wsub_hi], CoveredBySubRegs = 1 in {
+ def VQ0 : Rd< 0, "v3:0", [W0, W1]>, DwarfRegNum<[252]>;
+ def VQ1 : Rd< 4, "v7:4", [W2, W3]>, DwarfRegNum<[253]>;
+ def VQ2 : Rd< 8, "v11:8", [W4, W5]>, DwarfRegNum<[254]>;
+ def VQ3 : Rd<12, "v15:12", [W6, W7]>, DwarfRegNum<[255]>;
+ def VQ4 : Rd<16, "v19:16", [W8, W9]>, DwarfRegNum<[256]>;
+ def VQ5 : Rd<20, "v23:20", [W10, W11]>, DwarfRegNum<[257]>;
+ def VQ6 : Rd<24, "v27:24", [W12, W13]>, DwarfRegNum<[258]>;
+ def VQ7 : Rd<28, "v31:28", [W14, W15]>, DwarfRegNum<[259]>;
+ }
+
// Vector Predicate registers.
def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>;
def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
@@ -295,29 +308,6 @@ def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
// HVX register classes
-// Register classes.
-//
-// FIXME: the register order should be defined in terms of the preferred
-// allocation order...
-//
-def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
- (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
- R10, R11, R29, R30, R31)>;
-
-// Registers are listed in reverse order for allocation preference reasons.
-def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
- (add R23, R22, R21, R20, R19, R18, R17, R16,
- R7, R6, R5, R4, R3, R2, R1, R0)>;
-
-def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
- (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
-
-def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
- (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
-
-def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
- (add D11, D10, D9, D8, D3, D2, D1, D0)>;
-
def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512,
(add (sequence "V%u", 0, 31), VTMP)> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
@@ -336,6 +326,32 @@ def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512,
[RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
}
+def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048,
+ (add (sequence "VQ%u", 0, 7))> {
+ let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [RegInfo<2048,2048,2048>, RegInfo<4096,4096,4096>, RegInfo<2048,2048,2048>]>;
+}
+
+// Core register classes
+
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
+ (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
+ R10, R11, R29, R30, R31)>;
+
+// Registers are listed in reverse order for allocation preference reasons.
+def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add R23, R22, R21, R20, R19, R18, R17, R16,
+ R7, R6, R5, R4, R3, R2, R1, R0)>;
+
+def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
+ (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
+ (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
+
+def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
+ (add D11, D10, D9, D8, D3, D2, D1, D0)>;
+
let Size = 32 in
def PredRegs : RegisterClass<"Hexagon",
[i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, (add P0, P1, P2, P3)>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
index a1dfb66017a5..1024198e9b3f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -27,6 +27,7 @@ def CVI_SHIFT : FuncUnit;
def CVI_MPY0 : FuncUnit;
def CVI_MPY1 : FuncUnit;
def CVI_LD : FuncUnit;
+def CVI_ZW : FuncUnit; // Z register write port
// Combined functional units.
def CVI_XLSHF : FuncUnit;
@@ -57,10 +58,10 @@ include "HexagonDepIICScalar.td"
include "HexagonDepIICHVX.td"
//===----------------------------------------------------------------------===//
-// V4 Machine Info +
+// V5 Machine Info +
//===----------------------------------------------------------------------===//
-include "HexagonScheduleV4.td"
+include "HexagonScheduleV5.td"
// V55 Machine Info +
include "HexagonScheduleV55.td"
@@ -84,3 +85,9 @@ include "HexagonScheduleV62.td"
//===----------------------------------------------------------------------===//
include "HexagonScheduleV65.td"
+
+//===----------------------------------------------------------------------===//
+// V66 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV66.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV5.td
index 69b704a805b8..9a893f6dde02 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV5.td
@@ -1,4 +1,4 @@
-//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//=-HexagonScheduleV5.td - HexagonV5 Scheduling Definitions --*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -10,8 +10,8 @@
def LD_tc_ld_SLOT01 : InstrItinClass;
def ST_tc_st_SLOT01 : InstrItinClass;
-class HexagonV4PseudoItin {
- list<InstrItinData> V4PseudoItin_list = [
+class HexagonV5PseudoItin {
+ list<InstrItinData> V5PseudoItin_list = [
InstrItinData<PSEUDO, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -20,27 +20,27 @@ class HexagonV4PseudoItin {
];
}
-def HexagonV4ItinList : DepScalarItinV4, HexagonV4PseudoItin {
- list<InstrItinData> V4Itin_list = [
+def HexagonV5ItinList : DepScalarItinV5, HexagonV5PseudoItin {
+ list<InstrItinData> V5Itin_list = [
InstrItinData<LD_tc_ld_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData<ST_tc_st_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>
];
list<InstrItinData> ItinList =
- !listconcat(V4Itin_list, DepScalarItinV4_list, V4PseudoItin_list);
+ !listconcat(V5Itin_list, DepScalarItinV5_list, V5PseudoItin_list);
}
-def HexagonItinerariesV4 :
+def HexagonItinerariesV5 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP],
- [Hex_FWD], HexagonV4ItinList.ItinList>;
+ [Hex_FWD], HexagonV5ItinList.ItinList>;
-def HexagonModelV4 : SchedMachineModel {
+def HexagonModelV5 : SchedMachineModel {
// Max issue per cycle == bundle width.
let IssueWidth = 4;
- let Itineraries = HexagonItinerariesV4;
+ let Itineraries = HexagonItinerariesV5;
let LoadLatency = 1;
let CompleteModel = 0;
}
//===----------------------------------------------------------------------===//
-// Hexagon V4 Resource Definitions -
+// Hexagon V5 Resource Definitions -
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
index a2544c92a72c..861a8d2b0339 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -65,7 +65,7 @@ def HexagonItinerariesV60 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
- CVI_ALL_NOMEM],
+ CVI_ALL_NOMEM, CVI_ZW],
[Hex_FWD, HVX_FWD], HexagonV60ItinList.ItinList>;
def HexagonModelV60 : SchedMachineModel {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
index a0a8595f185f..1c274191277c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
@@ -21,7 +21,7 @@ def HexagonItinerariesV62 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
- CVI_ALL_NOMEM],
+ CVI_ALL_NOMEM, CVI_ZW],
[Hex_FWD, HVX_FWD], HexagonV62ItinList.ItinList>;
def HexagonModelV62 : SchedMachineModel {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
index e3b1313923f5..46a79d521795 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
@@ -23,7 +23,7 @@ def HexagonItinerariesV65 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
- CVI_ALL_NOMEM],
+ CVI_ALL_NOMEM, CVI_ZW],
[Hex_FWD, HVX_FWD],
HexagonV65ItinList.ItinList>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td
new file mode 100644
index 000000000000..38e3d21d3701
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td
@@ -0,0 +1,41 @@
+//=-HexagonScheduleV66.td - HexagonV66 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// ScalarItin and HVXItin contain some old itineraries
+// still used by a handful of instructions. Hopefully, we will be able
+// to get rid of them soon.
+
+def HexagonV66ItinList : DepScalarItinV66, ScalarItin,
+ DepHVXItinV66, HVXItin, PseudoItin {
+ list<InstrItinData> ItinList =
+ !listconcat(DepScalarItinV66_list, ScalarItin_list,
+ DepHVXItinV66_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV66 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+ CVI_ALL_NOMEM, CVI_ZW],
+ [Hex_FWD, HVX_FWD],
+ HexagonV66ItinList.ItinList>;
+
+def HexagonModelV66 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV66;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V66 Resource Definitions -
+//===----------------------------------------------------------------------===//
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index c41f0d3c085c..55de25120943 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -63,7 +63,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
auto &HST = Fn.getSubtarget<HexagonSubtarget>();
auto &HTM = static_cast<const HexagonTargetMachine&>(Fn.getTarget());
auto &TLOF = *HTM.getObjFileLowering();
- if (HST.useSmallData() && TLOF.isSmallDataEnabled())
+ if (HST.useSmallData() && TLOF.isSmallDataEnabled(HTM))
return false;
const TargetInstrInfo *TII = HST.getInstrInfo();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 991af047387e..61c2121163b8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -578,7 +578,7 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
};
for (auto &G : SGs) {
assert(G.size() > 1 && "Store group with fewer than 2 elements");
- llvm::sort(G.begin(), G.end(), Less);
+ llvm::sort(G, Less);
Changed |= processStoreGroup(G);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 0686d6eb6118..9c77135c2f2f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -93,12 +93,12 @@ HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
static std::map<StringRef, Hexagon::ArchEnum> CpuTable{
{"generic", Hexagon::ArchEnum::V60},
- {"hexagonv4", Hexagon::ArchEnum::V4},
{"hexagonv5", Hexagon::ArchEnum::V5},
{"hexagonv55", Hexagon::ArchEnum::V55},
{"hexagonv60", Hexagon::ArchEnum::V60},
{"hexagonv62", Hexagon::ArchEnum::V62},
{"hexagonv65", Hexagon::ArchEnum::V65},
+ {"hexagonv66", Hexagon::ArchEnum::V66},
};
auto FoundIt = CpuTable.find(CPUString);
@@ -276,11 +276,11 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
if (!L0.mayLoad() || L0.mayStore() ||
HII.getAddrMode(L0) != HexagonII::BaseImmOffset)
continue;
- int Offset0;
+ int64_t Offset0;
unsigned Size0;
- unsigned Base0 = HII.getBaseAndOffset(L0, Offset0, Size0);
+ MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
// Is the access size is longer than the L1 cache line, skip the check.
- if (Base0 == 0 || Size0 >= 32)
+ if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32)
continue;
// Scan only up to 32 instructions ahead (to avoid n^2 complexity).
for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
@@ -289,10 +289,11 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
if (!L1.mayLoad() || L1.mayStore() ||
HII.getAddrMode(L1) != HexagonII::BaseImmOffset)
continue;
- int Offset1;
+ int64_t Offset1;
unsigned Size1;
- unsigned Base1 = HII.getBaseAndOffset(L1, Offset1, Size1);
- if (Base1 == 0 || Size1 >= 32 || Base0 != Base1)
+ MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
+ if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 ||
+ BaseOp0->getReg() != BaseOp1->getReg())
continue;
// Check bits 3 and 4 of the offset: if they differ, a bank conflict
// is unlikely.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index dc8d173a5057..3a5acb53682c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -52,14 +52,16 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool UseNewValueJumps = false;
bool UseNewValueStores = false;
bool UseSmallData = false;
+ bool UseZRegOps = false;
bool HasMemNoShuf = false;
bool EnableDuplex = false;
bool ReservedR19 = false;
+ bool NoreturnStackElim = false;
public:
Hexagon::ArchEnum HexagonArchVersion;
- Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4;
+ Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::NoArch;
CodeGenOpt::Level OptLevel;
/// True if the target should use Back-Skip-Back scheduling. This is the
/// default for V60.
@@ -150,6 +152,12 @@ public:
bool hasV65OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V65;
}
+ bool hasV66Ops() const {
+ return getHexagonArchVersion() >= Hexagon::ArchEnum::V66;
+ }
+ bool hasV66OpsOnly() const {
+ return getHexagonArchVersion() == Hexagon::ArchEnum::V66;
+ }
bool useLongCalls() const { return UseLongCalls; }
bool useMemops() const { return UseMemops; }
@@ -157,8 +165,11 @@ public:
bool useNewValueJumps() const { return UseNewValueJumps; }
bool useNewValueStores() const { return UseNewValueStores; }
bool useSmallData() const { return UseSmallData; }
+ bool useZRegOps() const { return UseZRegOps; }
- bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; }
+ bool useHVXOps() const {
+ return HexagonHVXVersion > Hexagon::ArchEnum::NoArch;
+ }
bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }
@@ -166,6 +177,8 @@ public:
bool hasReservedR19() const { return ReservedR19; }
bool usePredicatedCalls() const;
+ bool noreturnStackElim() const { return NoreturnStackElim; }
+
bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 2c75e9139ad7..ddfda7e27793 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -97,6 +97,10 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
static cl::opt<bool> EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden,
cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization"));
+static cl::opt<bool> EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup",
+ cl::Hidden, cl::ZeroOrMore, cl::init(true),
+ cl::desc("Simplify the CFG after atomic expansion pass"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
/// library. In particular, it seems that it is not possible to get
@@ -149,7 +153,6 @@ namespace llvm {
FunctionPass *createHexagonCopyToCombine();
FunctionPass *createHexagonEarlyIfConversion();
FunctionPass *createHexagonFixupHwLoops();
- FunctionPass *createHexagonGatherPacketize();
FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
FunctionPass *createHexagonGenMux();
@@ -161,7 +164,7 @@ namespace llvm {
FunctionPass *createHexagonNewValueJump();
FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonOptAddrMode();
- FunctionPass *createHexagonPacketizer();
+ FunctionPass *createHexagonPacketizer(bool Minimal);
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonRDFOpt();
FunctionPass *createHexagonSplitConst32AndConst64();
@@ -177,12 +180,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
extern "C" void LLVMInitializeHexagonTarget() {
// Register the target.
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
@@ -219,7 +216,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
"i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
"v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), (HexagonNoOpt ? CodeGenOpt::None : OL)),
+ getEffectiveCodeModel(CM, CodeModel::Small),
+ (HexagonNoOpt ? CodeGenOpt::None : OL)),
TLOF(make_unique<HexagonTargetObjectFile>()) {
initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
initAsmInfo();
@@ -311,7 +309,10 @@ void HexagonPassConfig::addIRPasses() {
}
addPass(createAtomicExpandPass());
+
if (!NoOpt) {
+ if (EnableInitialCFGCleanup)
+ addPass(createCFGSimplificationPass(1, true, true, false, true));
if (EnableLoopPrefetch)
addPass(createLoopDataPrefetchPass());
if (EnableCommGEP)
@@ -402,7 +403,6 @@ void HexagonPassConfig::addPreEmitPass() {
addPass(createHexagonBranchRelaxation());
- // Create Packets.
if (!NoOpt) {
if (!DisableHardwareLoops)
addPass(createHexagonFixupHwLoops());
@@ -411,12 +411,8 @@ void HexagonPassConfig::addPreEmitPass() {
addPass(createHexagonGenMux());
}
- // Create packets for 2 instructions that consitute a gather instruction.
- // Do this regardless of the opt level.
- addPass(createHexagonGatherPacketize(), false);
-
- if (!NoOpt)
- addPass(createHexagonPacketizer(), false);
+ // Packetization is mandatory: it handles gather/scatter at all opt levels.
+ addPass(createHexagonPacketizer(NoOpt), false);
if (EnableVectorPrint)
addPass(createHexagonVectorPrint(), false);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index e771f383dffa..2185bf8eebc6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -199,6 +199,10 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
/// section.
bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
const TargetMachine &TM) const {
+ bool HaveSData = isSmallDataEnabled(TM);
+ if (!HaveSData)
+ LLVM_DEBUG(dbgs() << "Small-data allocation is disabled, but symbols "
+ "may have explicit section assignments...\n");
// Only global variables, not functions.
LLVM_DEBUG(dbgs() << "Checking if value is in small-data, -G"
<< SmallDataThreshold << ": \"" << GO->getName() << "\": ");
@@ -218,6 +222,12 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
return IsSmall;
}
+ // If sdata is disabled, stop the checks here.
+ if (!HaveSData) {
+ LLVM_DEBUG(dbgs() << "no, small-data allocation is disabled\n");
+ return false;
+ }
+
if (GVar->isConstant()) {
LLVM_DEBUG(dbgs() << "no, is a constant\n");
return false;
@@ -263,8 +273,9 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
return true;
}
-bool HexagonTargetObjectFile::isSmallDataEnabled() const {
- return SmallDataThreshold > 0;
+bool HexagonTargetObjectFile::isSmallDataEnabled(const TargetMachine &TM)
+ const {
+ return SmallDataThreshold > 0 && !TM.isPositionIndependent();
}
unsigned HexagonTargetObjectFile::getSmallDataSize() const {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
index eff44f097e03..18863630fde2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -29,7 +29,7 @@ namespace llvm {
bool isGlobalInSmallSection(const GlobalObject *GO,
const TargetMachine &TM) const;
- bool isSmallDataEnabled() const;
+ bool isSmallDataEnabled(const TargetMachine &TM) const;
unsigned getSmallDataSize() const;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index a496a17788d5..c942f645aa88 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -54,7 +54,7 @@ bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
return false;
if (ST.isHVXVectorType(VecVT.getSimpleVT()))
return true;
- auto Action = TLI.getPreferredVectorAction(VecVT);
+ auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
return Action == TargetLoweringBase::TypeWidenVector;
}
@@ -206,9 +206,13 @@ unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace) {
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+ bool UseMaskForGaps) {
+ if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
+ return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
}
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index a232f99fc407..5c6f85584ec2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -98,6 +98,9 @@ public:
bool prefersVectorizedAddressing() {
return false;
}
+ bool enableInterleavedAccessVectorization() {
+ return true;
+ }
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
@@ -120,7 +123,8 @@ public:
bool VariableMask, unsigned Alignment);
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I);
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 56ab69db9bd1..722699907ca0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -77,7 +77,7 @@ extern cl::opt<bool> ScheduleInlineAsm;
namespace llvm {
-FunctionPass *createHexagonPacketizer();
+FunctionPass *createHexagonPacketizer(bool Minimal);
void initializeHexagonPacketizerPass(PassRegistry&);
} // end namespace llvm
@@ -88,7 +88,8 @@ namespace {
public:
static char ID;
- HexagonPacketizer() : MachineFunctionPass(ID) {}
+ HexagonPacketizer(bool Min = false)
+ : MachineFunctionPass(ID), Minimal(Min) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -112,6 +113,7 @@ namespace {
private:
const HexagonInstrInfo *HII;
const HexagonRegisterInfo *HRI;
+ const bool Minimal;
};
} // end anonymous namespace
@@ -129,8 +131,9 @@ INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
MachineLoopInfo &MLI, AliasAnalysis *AA,
- const MachineBranchProbabilityInfo *MBPI)
- : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
+ const MachineBranchProbabilityInfo *MBPI, bool Minimal)
+ : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
+ Minimal(Minimal) {
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
@@ -200,9 +203,6 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
auto &HST = MF.getSubtarget<HexagonSubtarget>();
- if (DisablePacketizer || !HST.usePackets() || skipFunction(MF.getFunction()))
- return false;
-
HII = HST.getInstrInfo();
HRI = HST.getRegisterInfo();
auto &MLI = getAnalysis<MachineLoopInfo>();
@@ -213,7 +213,9 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
HII->genAllInsnTimingClasses(MF);
// Instantiate the packetizer.
- HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
+ bool MinOnly = Minimal || DisablePacketizer || !HST.usePackets() ||
+ skipFunction(MF.getFunction());
+ HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -226,7 +228,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
// Here, Insn 1 will result in the dependence graph not emitting an output
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
- for (auto &MB : MF) {
+ for (MachineBasicBlock &MB : MF) {
auto End = MB.end();
auto MI = MB.begin();
while (MI != End) {
@@ -766,7 +768,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
// Make sure that for non-POST_INC stores:
// 1. The only use of reg is DepReg and no other registers.
- // This handles V4 base+index registers.
+ // This handles base+index registers.
// The following store can not be dot new.
// Eg. r0 = add(r0, #3)
// memw(r1+r0<<#2) = r0
@@ -836,11 +838,7 @@ static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
return false;
}
-// Check to see if an instruction can be dot new
-// There are three kinds.
-// 1. dot new on predicate - V2/V3/V4
-// 2. dot new on stores NV/ST - V4
-// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+// Check to see if an instruction can be dot new.
bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC) {
@@ -1073,9 +1071,6 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
if (MI.isInlineAsm() && !ScheduleInlineAsm)
return true;
- // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
- // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
- // They must not be grouped with other instructions in a packet.
if (isSchedBarrier(MI))
return true;
@@ -1110,6 +1105,10 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() ||
MJ.isCall() || MJ.isTerminator();
+ // New-value stores cannot coexist with any other stores.
+ if (HII.isNewValueStore(MI) && MJ.mayStore())
+ return true;
+
switch (MI.getOpcode()) {
case Hexagon::S2_storew_locked:
case Hexagon::S4_stored_locked:
@@ -1283,8 +1282,8 @@ bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
return false;
}
-bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I,
- const MachineInstr &J) {
+bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
+ const MachineInstr &J) {
bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
bool StoreI = I.mayStore(), StoreJ = J.mayStore();
if ((SysI && StoreJ) || (SysJ && StoreI))
@@ -1337,10 +1336,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
if (Dependence)
return false;
- // V4 allows dual stores. It does not allow second store, if the first
- // store is not in SLOT0. New value store, new value jump, dealloc_return
- // and memop always take SLOT0. Arch spec 3.4.4.2.
- Dependence = hasV4SpecificDependence(I, J);
+ // Dual-store does not allow second store, if the first store is not
+ // in SLOT0. New value store, new value jump, dealloc_return and memop
+ // always take SLOT0. Arch spec 3.4.4.2.
+ Dependence = hasDualStoreDependence(I, J);
if (Dependence)
return false;
@@ -1499,10 +1498,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
}
// For Order dependences:
- // 1. On V4 or later, volatile loads/stores can be packetized together,
- // unless other rules prevent is.
+ // 1. Volatile loads/stores can be packetized together, unless other
+ // rules prevent is.
// 2. Store followed by a load is not allowed.
- // 3. Store followed by a store is only valid on V4 or later.
+ // 3. Store followed by a store is valid.
// 4. Load followed by any memory operation is allowed.
if (DepType == SDep::Order) {
if (!PacketizeVolatiles) {
@@ -1549,7 +1548,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
continue;
}
- // For V4, special case ALLOCFRAME. Even though there is dependency
+ // Special case for ALLOCFRAME: even though there is dependency
// between ALLOCFRAME and subsequent store, allow it to be packetized
// in a same packet. This implies that the store is using the caller's
// SP. Hence, offset needs to be updated accordingly.
@@ -1569,6 +1568,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
if (GlueAllocframeStore)
continue;
}
+ break;
default:
break;
}
@@ -1652,6 +1652,9 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
return false;
}
+ if (!Coexist)
+ return false;
+
if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) {
FoundSequentialDependence = false;
Dependence = false;
@@ -1759,8 +1762,8 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
}
void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator MI) {
- // Replace VLIWPacketizerList::endPacket(MBB, MI).
+ MachineBasicBlock::iterator EndMI) {
+ // Replace VLIWPacketizerList::endPacket(MBB, EndMI).
bool memShufDisabled = getmemShufDisabled();
if (memShufDisabled && !foundLSInPacket()) {
@@ -1769,25 +1772,32 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
}
memShufDisabled = getmemShufDisabled();
- if (CurrentPacketMIs.size() > 1) {
- MachineBasicBlock::instr_iterator FirstMI(CurrentPacketMIs.front());
- MachineBasicBlock::instr_iterator LastMI(MI.getInstrIterator());
- finalizeBundle(*MBB, FirstMI, LastMI);
+ OldPacketMIs.clear();
+ for (MachineInstr *MI : CurrentPacketMIs) {
+ MachineBasicBlock::instr_iterator NextMI = std::next(MI->getIterator());
+ for (auto &I : make_range(HII->expandVGatherPseudo(*MI), NextMI))
+ OldPacketMIs.push_back(&I);
+ }
+ CurrentPacketMIs.clear();
+ if (OldPacketMIs.size() > 1) {
+ MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front());
+ MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator());
+ finalizeBundle(*MBB, FirstMI, LastMI);
auto BundleMII = std::prev(FirstMI);
if (memShufDisabled)
HII->setBundleNoShuf(BundleMII);
setmemShufDisabled(false);
}
- OldPacketMIs = CurrentPacketMIs;
- CurrentPacketMIs.clear();
ResourceTracker->clearResources();
LLVM_DEBUG(dbgs() << "End packet\n");
}
bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
+ if (Minimal)
+ return false;
return !producesStall(MI);
}
@@ -1860,6 +1870,6 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-FunctionPass *llvm::createHexagonPacketizer() {
- return new HexagonPacketizer();
+FunctionPass *llvm::createHexagonPacketizer(bool Minimal) {
+ return new HexagonPacketizer(Minimal);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 40dcee3441a2..ca70cf967a46 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -66,11 +66,13 @@ protected:
private:
const HexagonInstrInfo *HII;
const HexagonRegisterInfo *HRI;
+ const bool Minimal;
public:
HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
AliasAnalysis *AA,
- const MachineBranchProbabilityInfo *MBPI);
+ const MachineBranchProbabilityInfo *MBPI,
+ bool Minimal);
// initPacketizerState - initialize some internal flags.
void initPacketizerState() override;
@@ -147,7 +149,7 @@ protected:
bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J);
bool hasControlDependence(const MachineInstr &I, const MachineInstr &J);
bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
- bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J);
+ bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J);
bool producesStall(const MachineInstr &MI);
};
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index cb504b5c3d5d..6543d8313900 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -26,11 +26,9 @@ namespace llvm {
/// instruction info tracks.
namespace HexagonII {
unsigned const TypeCVI_FIRST = TypeCVI_4SLOT_MPY;
- unsigned const TypeCVI_LAST = TypeCVI_VX_LATE;
+ unsigned const TypeCVI_LAST = TypeCVI_ZW;
enum SubTarget {
- HasV4SubT = 0x3f,
- HasV5SubT = 0x3e,
HasV55SubT = 0x3c,
HasV60SubT = 0x38,
};
@@ -57,117 +55,117 @@ namespace HexagonII {
// MCInstrDesc TSFlags
// *** Must match HexagonInstrFormat*.td ***
enum {
- // This 5-bit field describes the insn type.
- TypePos = 0,
- TypeMask = 0x3f,
+ // This 7-bit field describes the insn type.
+ TypePos = 0,
+ TypeMask = 0x7f,
// Solo instructions.
- SoloPos = 6,
+ SoloPos = 7,
SoloMask = 0x1,
// Packed only with A or X-type instructions.
- SoloAXPos = 7,
+ SoloAXPos = 8,
SoloAXMask = 0x1,
// Only A-type instruction in first slot or nothing.
- RestrictSlot1AOKPos = 8,
+ RestrictSlot1AOKPos = 9,
RestrictSlot1AOKMask = 0x1,
// Predicated instructions.
- PredicatedPos = 9,
+ PredicatedPos = 10,
PredicatedMask = 0x1,
- PredicatedFalsePos = 10,
+ PredicatedFalsePos = 11,
PredicatedFalseMask = 0x1,
- PredicatedNewPos = 11,
+ PredicatedNewPos = 12,
PredicatedNewMask = 0x1,
- PredicateLatePos = 12,
+ PredicateLatePos = 13,
PredicateLateMask = 0x1,
// New-Value consumer instructions.
- NewValuePos = 13,
+ NewValuePos = 14,
NewValueMask = 0x1,
// New-Value producer instructions.
- hasNewValuePos = 14,
+ hasNewValuePos = 15,
hasNewValueMask = 0x1,
// Which operand consumes or produces a new value.
- NewValueOpPos = 15,
+ NewValueOpPos = 16,
NewValueOpMask = 0x7,
// Stores that can become new-value stores.
- mayNVStorePos = 18,
+ mayNVStorePos = 19,
mayNVStoreMask = 0x1,
// New-value store instructions.
- NVStorePos = 19,
+ NVStorePos = 20,
NVStoreMask = 0x1,
// Loads that can become current-value loads.
- mayCVLoadPos = 20,
+ mayCVLoadPos = 21,
mayCVLoadMask = 0x1,
// Current-value load instructions.
- CVLoadPos = 21,
+ CVLoadPos = 22,
CVLoadMask = 0x1,
// Extendable insns.
- ExtendablePos = 22,
+ ExtendablePos = 23,
ExtendableMask = 0x1,
// Insns must be extended.
- ExtendedPos = 23,
+ ExtendedPos = 24,
ExtendedMask = 0x1,
// Which operand may be extended.
- ExtendableOpPos = 24,
+ ExtendableOpPos = 25,
ExtendableOpMask = 0x7,
// Signed or unsigned range.
- ExtentSignedPos = 27,
+ ExtentSignedPos = 28,
ExtentSignedMask = 0x1,
// Number of bits of range before extending operand.
- ExtentBitsPos = 28,
+ ExtentBitsPos = 29,
ExtentBitsMask = 0x1f,
// Alignment power-of-two before extending operand.
- ExtentAlignPos = 33,
+ ExtentAlignPos = 34,
ExtentAlignMask = 0x3,
- CofMax1Pos = 35,
+ CofMax1Pos = 36,
CofMax1Mask = 0x1,
- CofRelax1Pos = 36,
+ CofRelax1Pos = 37,
CofRelax1Mask = 0x1,
- CofRelax2Pos = 37,
+ CofRelax2Pos = 38,
CofRelax2Mask = 0x1,
- RestrictNoSlot1StorePos = 38,
+ RestrictNoSlot1StorePos = 39,
RestrictNoSlot1StoreMask = 0x1,
// Addressing mode for load/store instructions.
- AddrModePos = 41,
+ AddrModePos = 42,
AddrModeMask = 0x7,
// Access size for load/store instructions.
- MemAccessSizePos = 44,
+ MemAccessSizePos = 45,
MemAccesSizeMask = 0xf,
// Branch predicted taken.
- TakenPos = 48,
+ TakenPos = 49,
TakenMask = 0x1,
// Floating-point instructions.
- FPPos = 49,
+ FPPos = 50,
FPMask = 0x1,
// New-Value producer-2 instructions.
- hasNewValuePos2 = 51,
+ hasNewValuePos2 = 52,
hasNewValueMask2 = 0x1,
// Which operand consumes or produces a new value.
- NewValueOpPos2 = 52,
+ NewValueOpPos2 = 53,
NewValueOpMask2 = 0x7,
// Accumulator instructions.
- AccumulatorPos = 55,
+ AccumulatorPos = 56,
AccumulatorMask = 0x1,
// Complex XU, prevent xu competition by preferring slot3
- PrefersSlot3Pos = 56,
+ PrefersSlot3Pos = 57,
PrefersSlot3Mask = 0x1,
// v65
- HasTmpDstPos = 59,
+ HasTmpDstPos = 60,
HasTmpDstMask = 0x1,
- CVINewPos = 61,
- CVINewMask = 0x1
+ CVINewPos = 62,
+ CVINewMask = 0x1,
};
// *** The code above must match HexagonInstrFormat*.td *** //
@@ -176,7 +174,7 @@ namespace HexagonII {
enum HexagonMOTargetFlagVal {
// Hexagon-specific MachineOperand target flags.
//
- // When chaning these, make sure to update
+ // When changing these, make sure to update
// getSerializableDirectMachineOperandTargetFlags and
// getSerializableBitmaskMachineOperandTargetFlags if needed.
MO_NO_FLAG,
@@ -189,7 +187,8 @@ namespace HexagonII {
MO_GOT,
// Low or high part of a symbol.
- MO_LO16, MO_HI16,
+ MO_LO16,
+ MO_HI16,
// Offset from the base of the SDA.
MO_GPREL,
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 3b3a15b990f1..687e79a7dbab 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -28,26 +28,8 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "HexagonGenAsmWriter.inc"
-HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI,
- MCInstrInfo const &MII,
- MCRegisterInfo const &MRI)
- : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) {
-}
-
-StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
- return MII.getName(Opcode);
-}
-
void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
- O << getRegName(RegNo);
-}
-
-StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
- return getRegisterName(RegNo);
-}
-
-void HexagonInstPrinter::setExtender(MCInst const &MCI) {
- HasExtender = HexagonMCInstrInfo::isImmext(MCI);
+ O << getRegisterName(RegNo);
}
void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
@@ -65,7 +47,7 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
printInstruction(MCI.getOperand(0).getInst(), OS);
} else
printInstruction(&MCI, OS);
- setExtender(MCI);
+ HasExtender = HexagonMCInstrInfo::isImmext(MCI);
OS << "\n";
}
@@ -97,72 +79,6 @@ void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
}
}
-void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI,
- unsigned OpNo,
- raw_ostream &O) const {
- O << MI->getOperand(OpNo).getImm();
-}
-
-void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- O << -MI->getOperand(OpNo).getImm();
-}
-
-void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- O << -1;
-}
-
-void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
- printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
- printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- // Branches can take an immediate operand. This is used by the branch
- // selection pass to print $+8, an eight byte displacement from the PC.
- llvm_unreachable("Unknown branch operand.");
-}
-
-void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {}
-
-void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {}
-
-void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {}
-
-void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo,
- raw_ostream &O, bool hi) const {
- assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand");
-
- O << '#' << (hi ? "HI" : "LO") << '(';
- O << '#';
- printOperand(MI, OpNo, O);
- O << ')';
-}
-
void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
MCOperand const &MO = MI->getOperand(OpNo);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index ac8e391905e0..17af046ce090 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -26,57 +26,25 @@ namespace llvm {
class HexagonInstPrinter : public MCInstPrinter {
public:
explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII,
- MCRegisterInfo const &MRI);
+ MCRegisterInfo const &MRI)
+ : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
+
void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
- virtual StringRef getOpcodeName(unsigned Opcode) const;
- void printInstruction(MCInst const *MI, raw_ostream &O);
+ void printRegName(raw_ostream &O, unsigned RegNo) const override;
- StringRef getRegName(unsigned RegNo) const;
static char const *getRegisterName(unsigned RegNo);
- void printRegName(raw_ostream &O, unsigned RegNo) const override;
+ void printInstruction(MCInst const *MI, raw_ostream &O);
void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printNegImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printBranchOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printPredicateOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printGlobalOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
-
- void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
- printSymbol(MI, OpNo, O, true);
- }
- void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
- printSymbol(MI, OpNo, O, false);
- }
-
MCAsmInfo const &getMAI() const { return MAI; }
MCInstrInfo const &getMII() const { return MII; }
-protected:
- void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O,
- bool hi) const;
-
private:
MCInstrInfo const &MII;
-
- bool HasExtender;
- void setExtender(MCInst const &MCI);
+ bool HasExtender = false;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index b208a3668124..f0654d612b4b 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -127,6 +127,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x4;
}
+ break;
case HexagonII::HSIG_L2:
switch (Gb) {
default:
@@ -138,6 +139,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x5;
}
+ break;
case HexagonII::HSIG_S1:
switch (Gb) {
default:
@@ -151,6 +153,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x6;
}
+ break;
case HexagonII::HSIG_S2:
switch (Gb) {
default:
@@ -166,6 +169,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x7;
}
+ break;
case HexagonII::HSIG_A:
switch (Gb) {
default:
@@ -173,11 +177,13 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x3;
}
+ break;
case HexagonII::HSIG_Compound:
switch (Gb) {
case HexagonII::HSIG_Compound:
return 0xFFFFFFFF;
}
+ break;
}
return 0xFFFFFFFF;
}
@@ -634,8 +640,7 @@ bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
return false;
}
- if (STI.getCPU().equals_lower("hexagonv4") ||
- STI.getCPU().equals_lower("hexagonv5") ||
+ if (STI.getCPU().equals_lower("hexagonv5") ||
STI.getCPU().equals_lower("hexagonv55") ||
STI.getCPU().equals_lower("hexagonv60")) {
// If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index 9fbe299d7d52..f0689252b396 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -9,11 +9,11 @@
//===----------------------------------------------------------------------===//
#include "HexagonMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index b211a81524fb..92ce7345f358 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -61,8 +61,6 @@ cl::opt<bool> llvm::HexagonDisableDuplex
cl::desc("Disable looking for duplex instructions for Hexagon"));
namespace { // These flags are to be deprecated
-cl::opt<bool> MV4("mv4", cl::Hidden, cl::desc("Build for Hexagon V4"),
- cl::init(false));
cl::opt<bool> MV5("mv5", cl::Hidden, cl::desc("Build for Hexagon V5"),
cl::init(false));
cl::opt<bool> MV55("mv55", cl::Hidden, cl::desc("Build for Hexagon V55"),
@@ -73,6 +71,8 @@ cl::opt<bool> MV62("mv62", cl::Hidden, cl::desc("Build for Hexagon V62"),
cl::init(false));
cl::opt<bool> MV65("mv65", cl::Hidden, cl::desc("Build for Hexagon V65"),
cl::init(false));
+cl::opt<bool> MV66("mv66", cl::Hidden, cl::desc("Build for Hexagon V66"),
+ cl::init(false));
} // namespace
cl::opt<Hexagon::ArchEnum>
@@ -82,19 +82,20 @@ cl::opt<Hexagon::ArchEnum>
clEnumValN(Hexagon::ArchEnum::V60, "v60", "Build for HVX v60"),
clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"),
clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"),
- // Sentinal for no value specified
- clEnumValN(Hexagon::ArchEnum::V5, "", "")),
- // Sentinal for flag not present
- cl::init(Hexagon::ArchEnum::V4), cl::ValueOptional);
+ clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"),
+ // Sentinel for no value specified.
+ clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
+ // Sentinel for flag not present.
+ cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional);
+
static cl::opt<bool>
- DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions"));
+ DisableHVX("mno-hvx", cl::Hidden,
+ cl::desc("Disable Hexagon Vector eXtensions"));
static StringRef DefaultArch = "hexagonv60";
static StringRef HexagonGetArchVariant() {
- if (MV4)
- return "hexagonv4";
if (MV5)
return "hexagonv5";
if (MV55)
@@ -105,6 +106,8 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv62";
if (MV65)
return "hexagonv65";
+ if (MV66)
+ return "hexagonv66";
return "";
}
@@ -123,7 +126,7 @@ StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) {
return ArchV;
}
-unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV4FU::SLOT3; }
+unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV5FU::SLOT3; }
namespace {
@@ -279,6 +282,7 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
Result.push_back(FS);
switch (EnableHVX) {
+ case Hexagon::ArchEnum::V5:
case Hexagon::ArchEnum::V55:
break;
case Hexagon::ArchEnum::V60:
@@ -290,14 +294,18 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
case Hexagon::ArchEnum::V65:
Result.push_back("+hvxv65");
break;
- case Hexagon::ArchEnum::V5:{
+ case Hexagon::ArchEnum::V66:
+ Result.push_back("+hvxv66");
+ break;
+ case Hexagon::ArchEnum::Generic:{
Result.push_back(StringSwitch<StringRef>(CPU)
.Case("hexagonv60", "+hvxv60")
.Case("hexagonv62", "+hvxv62")
- .Case("hexagonv65", "+hvxv65"));
+ .Case("hexagonv65", "+hvxv65")
+ .Case("hexagonv66", "+hvxv66"));
break;
}
- case Hexagon::ArchEnum::V4:
+ case Hexagon::ArchEnum::NoArch:
// Sentinal if -mhvx isn't specified
break;
}
@@ -307,15 +315,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
static bool isCPUValid(std::string CPU)
{
- std::vector<std::string> table
- {
- "generic",
- "hexagonv4",
- "hexagonv5",
- "hexagonv55",
- "hexagonv60",
- "hexagonv62",
- "hexagonv65",
+ std::vector<std::string> table {
+ "generic", "hexagonv5", "hexagonv55", "hexagonv60",
+ "hexagonv62", "hexagonv65", "hexagonv66",
};
return std::find(table.begin(), table.end(), CPU) != table.end();
@@ -336,8 +338,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// Make sure that +hvx-length turns hvx on, and that "hvx" alone
// turns on hvxvNN, corresponding to the existing ArchVNN.
FeatureBitset FB = S;
- unsigned CpuArch = ArchV4;
- for (unsigned F : {ArchV65, ArchV62, ArchV60, ArchV55, ArchV5, ArchV4}) {
+ unsigned CpuArch = ArchV5;
+ for (unsigned F : {ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
if (!FB.test(F))
continue;
CpuArch = F;
@@ -351,7 +353,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
break;
}
bool HasHvxVer = false;
- for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65}) {
+ for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
+ ExtensionHVXV66}) {
if (!FB.test(F))
continue;
HasHvxVer = true;
@@ -364,6 +367,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// HasHvxVer is false, and UseHvx is true.
switch (CpuArch) {
+ case ArchV66:
+ FB.set(ExtensionHVXV66);
+ LLVM_FALLTHROUGH;
case ArchV65:
FB.set(ExtensionHVXV65);
LLVM_FALLTHROUGH;
@@ -402,12 +408,12 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
static std::map<StringRef,unsigned> ElfFlags = {
- {"hexagonv4", ELF::EF_HEXAGON_MACH_V4},
{"hexagonv5", ELF::EF_HEXAGON_MACH_V5},
{"hexagonv55", ELF::EF_HEXAGON_MACH_V55},
{"hexagonv60", ELF::EF_HEXAGON_MACH_V60},
{"hexagonv62", ELF::EF_HEXAGON_MACH_V62},
{"hexagonv65", ELF::EF_HEXAGON_MACH_V65},
+ {"hexagonv66", ELF::EF_HEXAGON_MACH_V66},
};
auto F = ElfFlags.find(STI.getCPU());
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 6cd1b3a4691f..d6ea664222d3 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -18,6 +18,33 @@
#include <cstdint>
#include <string>
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+// Normal instruction size (in bytes).
+#define HEXAGON_INSTR_SIZE 4
+
+// Maximum number of words and instructions in a packet.
+#define HEXAGON_PACKET_SIZE 4
+#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
+// Minimum number of instructions in an end-loop packet.
+#define HEXAGON_PACKET_INNER_SIZE 2
+#define HEXAGON_PACKET_OUTER_SIZE 3
+// Maximum number of instructions in a packet before shuffling,
+// including a compound one or a duplex or an extender.
+#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
+
+// Name of the global offset table as defined by the Hexagon ABI
+#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
+
namespace llvm {
struct InstrItinerary;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 59f3caa6af94..f4ee2bbfaaaa 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -138,6 +138,8 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
(*TUL)[HexagonII::TypeCVI_SCATTER_NEW_ST] =
UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_4SLOT_MPY] = UnitsAndLanes(CVI_XLANE, 4);
+ (*TUL)[HexagonII::TypeCVI_ZW] = UnitsAndLanes(CVI_ZW, 1);
}
HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
@@ -300,6 +302,7 @@ bool HexagonShuffler::check() {
// Number of memory operations, loads, solo loads, stores, solo stores, single
// stores.
unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+ unsigned NonZCVIloads = 0, AllCVIloads = 0, CVIstores = 0;
// Number of duplex insns
unsigned duplex = 0;
unsigned pSlot3Cnt = 0;
@@ -331,6 +334,11 @@ bool HexagonShuffler::check() {
case HexagonII::TypeCVI_VM_TMP_LD:
case HexagonII::TypeCVI_GATHER:
case HexagonII::TypeCVI_GATHER_RST:
+ ++NonZCVIloads;
+ LLVM_FALLTHROUGH;
+ case HexagonII::TypeCVI_ZW:
+ ++AllCVIloads;
+ LLVM_FALLTHROUGH;
case HexagonII::TypeLD:
++loads;
++memory;
@@ -348,6 +356,8 @@ bool HexagonShuffler::check() {
case HexagonII::TypeCVI_SCATTER_RST:
case HexagonII::TypeCVI_SCATTER_NEW_RST:
case HexagonII::TypeCVI_SCATTER_NEW_ST:
+ ++CVIstores;
+ LLVM_FALLTHROUGH;
case HexagonII::TypeST:
++stores;
++memory;
@@ -405,7 +415,11 @@ bool HexagonShuffler::check() {
applySlotRestrictions();
// Check if the packet is legal.
- if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory))) {
+ const unsigned ZCVIloads = AllCVIloads - NonZCVIloads;
+ const bool ValidHVXMem =
+ NonZCVIloads <= 1 && ZCVIloads <= 1 && CVIstores <= 1;
+ if ((load0 > 1 || store0 > 1 || !ValidHVXMem) ||
+ (duplex > 1 || (duplex && memory))) {
reportError(llvm::Twine("invalid instruction packet"));
return false;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 37f90bc46ac7..ef50c5bebbfb 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -75,7 +75,8 @@ private:
CVI_XLANE = 1 << 0,
CVI_SHIFT = 1 << 1,
CVI_MPY0 = 1 << 2,
- CVI_MPY1 = 1 << 3
+ CVI_MPY1 = 1 << 3,
+ CVI_ZW = 1 << 4
};
// Count of adjacent slots that the insn requires to be executed.
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
index da339bfd3ff4..8dcd485d65e9 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -214,7 +214,7 @@ bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
return false;
return A.Id < B.Id;
};
- llvm::sort(DRNs.begin(), DRNs.end(), UsesFirst);
+ llvm::sort(DRNs, UsesFirst);
if (trace())
dbgs() << "Removing dead ref nodes:\n";
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
index 3d1ec31dada7..d8ca08e70505 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
@@ -1471,7 +1471,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
// and add a def for each S in the closure.
// Sort the refs so that the phis will be created in a deterministic order.
- llvm::sort(MaxRefs.begin(), MaxRefs.end());
+ llvm::sort(MaxRefs);
// Remove duplicates.
auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
MaxRefs.erase(NewEnd, MaxRefs.end());
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
index c257d754ddf9..9ff48d25a026 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
@@ -207,7 +207,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
};
std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
- llvm::sort(Tmp.begin(), Tmp.end(), Less);
+ llvm::sort(Tmp, Less);
// The vector is a list of instructions, so that defs coming from
// the same instruction don't need to be artificially ordered.
@@ -813,7 +813,7 @@ void Liveness::computeLiveIns() {
std::vector<RegisterRef> LV;
for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
LV.push_back(RegisterRef(I->PhysReg, I->LaneMask));
- llvm::sort(LV.begin(), LV.end());
+ llvm::sort(LV);
dbgs() << printMBBReference(B) << "\t rec = {";
for (auto I : LV)
dbgs() << ' ' << Print<RegisterRef>(I, DFG);
@@ -824,7 +824,7 @@ void Liveness::computeLiveIns() {
const RegisterAggr &LG = LiveMap[&B];
for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
LV.push_back(*I);
- llvm::sort(LV.begin(), LV.end());
+ llvm::sort(LV);
dbgs() << "\tcomp = {";
for (auto I : LV)
dbgs() << ' ' << Print<RegisterRef>(I, DFG);
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 045a897c4126..0411704be6fb 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -1498,8 +1498,8 @@ void LanaiTargetLowering::computeKnownBitsForTargetNode(
break;
case LanaiISD::SELECT_CC:
KnownBits Known2;
- DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
- DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 493d02bef37c..196768fdc56a 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -101,12 +101,12 @@ bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned BaseRegA = 0, BaseRegB = 0;
+ MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned int WidthA = 0, WidthB = 0;
- if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
- getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
- if (BaseRegA == BaseRegB) {
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB)) {
int LowOffset = std::min(OffsetA, OffsetB);
int HighOffset = std::max(OffsetA, OffsetB);
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
@@ -733,8 +733,13 @@ unsigned LanaiInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- const MachineMemOperand *Dummy;
- return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (hasLoadFromStackSlot(MI, Accesses)){
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return 1;
+ }
}
return 0;
}
@@ -750,9 +755,9 @@ unsigned LanaiInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return 0;
}
-bool LanaiInstrInfo::getMemOpBaseRegImmOfsWidth(
- MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
- const TargetRegisterInfo * /*TRI*/) const {
+bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
+ MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ unsigned &Width, const TargetRegisterInfo * /*TRI*/) const {
// Handle only loads/stores with base register followed by immediate offset
// and with add as ALU op.
if (LdSt.getNumOperands() != 4)
@@ -782,14 +787,17 @@ bool LanaiInstrInfo::getMemOpBaseRegImmOfsWidth(
break;
}
- BaseReg = LdSt.getOperand(1).getReg();
+ BaseOp = &LdSt.getOperand(1);
Offset = LdSt.getOperand(2).getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
-bool LanaiInstrInfo::getMemOpBaseRegImmOfs(
- MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool LanaiInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+ MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
switch (LdSt.getOpcode()) {
default:
return false;
@@ -803,6 +811,6 @@ bool LanaiInstrInfo::getMemOpBaseRegImmOfs(
case Lanai::LDBs_RI:
case Lanai::LDBz_RI:
unsigned Width;
- return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
+ return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
}
}
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index fe22fde2470b..bdcf9a361b5f 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -68,13 +68,13 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const override;
- bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset, unsigned &Width,
- const TargetRegisterInfo *TRI) const;
+ bool getMemOperandWithOffsetWidth(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset, unsigned &Width,
+ const TargetRegisterInfo *TRI) const;
std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const override;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index 35e2542dfb13..54500b0e52e3 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -277,8 +277,7 @@ void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB,
InstrBuilder.addImm(LPAC::makePostOp(AluOpcode));
// Transfer memory operands.
- InstrBuilder->setMemRefs(MemInstr->memoperands_begin(),
- MemInstr->memoperands_end());
+ InstrBuilder.setMemRefs(MemInstr->memoperands());
}
// Function determines if ALU operation (in alu_iter) can be combined with
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index 2c21a53b13bb..10bd9e2c65d2 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -53,12 +53,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Medium;
-}
-
LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
StringRef Cpu, StringRef FeatureString,
const TargetOptions &Options,
@@ -67,7 +61,8 @@ LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OptLevel, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(), TT, Cpu, FeatureString, Options,
getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CodeModel), OptLevel),
+ getEffectiveCodeModel(CodeModel, CodeModel::Medium),
+ OptLevel),
Subtarget(TT, Cpu, FeatureString, *this, Options, getCodeModel(),
OptLevel),
TLOF(new LanaiTargetObjectFile()) {
diff --git a/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
new file mode 100644
index 000000000000..1ad70ac72c73
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -0,0 +1,580 @@
+//===- MSP430AsmParser.cpp - Parse MSP430 assembly to MCInst instructions -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430RegisterInfo.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define DEBUG_TYPE "msp430-asm-parser"
+
+using namespace llvm;
+
+namespace {
+
+/// Parses MSP430 assembly from a stream.
+class MSP430AsmParser : public MCTargetAsmParser {
+ const MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+ const MCRegisterInfo *MRI;
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ bool ParseDirective(AsmToken DirectiveID) override;
+ bool ParseDirectiveRefSym(AsmToken DirectiveID);
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
+
+ bool parseJccInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands);
+
+ bool ParseOperand(OperandVector &Operands);
+
+ bool ParseLiteralValues(unsigned Size, SMLoc L);
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ /// @name Auto-generated Matcher Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MSP430GenAsmMatcher.inc"
+
+ /// }
+
+public:
+ MSP430AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI, MII), STI(STI), Parser(Parser) {
+ MCAsmParserExtension::Initialize(Parser);
+ MRI = getContext().getRegisterInfo();
+
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+};
+
+/// A parsed MSP430 assembly operand.
+class MSP430Operand : public MCParsedAsmOperand {
+ typedef MCParsedAsmOperand Base;
+
+ enum KindTy {
+ k_Imm,
+ k_Reg,
+ k_Tok,
+ k_Mem,
+ k_IndReg,
+ k_PostIndReg
+ } Kind;
+
+ struct Memory {
+ unsigned Reg;
+ const MCExpr *Offset;
+ };
+ union {
+ const MCExpr *Imm;
+ unsigned Reg;
+ StringRef Tok;
+ Memory Mem;
+ };
+
+ SMLoc Start, End;
+
+public:
+ MSP430Operand(StringRef Tok, SMLoc const &S)
+ : Base(), Kind(k_Tok), Tok(Tok), Start(S), End(S) {}
+ MSP430Operand(KindTy Kind, unsigned Reg, SMLoc const &S, SMLoc const &E)
+ : Base(), Kind(Kind), Reg(Reg), Start(S), End(E) {}
+ MSP430Operand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
+ : Base(), Kind(k_Imm), Imm(Imm), Start(S), End(E) {}
+ MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S, SMLoc const &E)
+ : Base(), Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {}
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert((Kind == k_Reg || Kind == k_IndReg || Kind == k_PostIndReg) &&
+ "Unexpected operand kind");
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ }
+
+ void addExprOperand(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediate when possible
+ if (!Expr)
+ Inst.addOperand(MCOperand::createImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(Kind == k_Imm && "Unexpected operand kind");
+ assert(N == 1 && "Invalid number of operands!");
+
+ addExprOperand(Inst, Imm);
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert(Kind == k_Mem && "Unexpected operand kind");
+ assert(N == 2 && "Invalid number of operands");
+
+ Inst.addOperand(MCOperand::createReg(Mem.Reg));
+ addExprOperand(Inst, Mem.Offset);
+ }
+
+ bool isReg() const { return Kind == k_Reg; }
+ bool isImm() const { return Kind == k_Imm; }
+ bool isToken() const { return Kind == k_Tok; }
+ bool isMem() const { return Kind == k_Mem; }
+ bool isIndReg() const { return Kind == k_IndReg; }
+ bool isPostIndReg() const { return Kind == k_PostIndReg; }
+
+ bool isCGImm() const {
+ if (Kind != k_Imm)
+ return false;
+
+ int64_t Val;
+ if (!Imm->evaluateAsAbsolute(Val))
+ return false;
+
+ if (Val == 0 || Val == 1 || Val == 2 || Val == 4 || Val == 8 || Val == -1)
+ return true;
+
+ return false;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == k_Tok && "Invalid access!");
+ return Tok;
+ }
+
+ unsigned getReg() const {
+ assert(Kind == k_Reg && "Invalid access!");
+ return Reg;
+ }
+
+ void setReg(unsigned RegNo) {
+ assert(Kind == k_Reg && "Invalid access!");
+ Reg = RegNo;
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateToken(StringRef Str, SMLoc S) {
+ return make_unique<MSP430Operand>(Str, S);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ return make_unique<MSP430Operand>(k_Reg, RegNum, S, E);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ return make_unique<MSP430Operand>(Val, S, E);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateMem(unsigned RegNum,
+ const MCExpr *Val,
+ SMLoc S, SMLoc E) {
+ return make_unique<MSP430Operand>(RegNum, Val, S, E);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateIndReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ return make_unique<MSP430Operand>(k_IndReg, RegNum, S, E);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreatePostIndReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ return make_unique<MSP430Operand>(k_PostIndReg, RegNum, S, E);
+ }
+
+ SMLoc getStartLoc() const { return Start; }
+ SMLoc getEndLoc() const { return End; }
+
+ virtual void print(raw_ostream &O) const {
+ switch (Kind) {
+ case k_Tok:
+ O << "Token " << Tok;
+ break;
+ case k_Reg:
+ O << "Register " << Reg;
+ break;
+ case k_Imm:
+ O << "Immediate " << *Imm;
+ break;
+ case k_Mem:
+ O << "Memory ";
+ O << *Mem.Offset << "(" << Reg << ")";
+ break;
+ case k_IndReg:
+ O << "RegInd " << Reg;
+ break;
+ case k_PostIndReg:
+ O << "PostInc " << Reg;
+ break;
+ }
+ }
+};
+} // end anonymous namespace
+
+bool MSP430AsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+
+ switch (MatchResult) {
+ case Match_Success:
+ Inst.setLoc(Loc);
+ Out.EmitInstruction(Inst, STI);
+ return false;
+ case Match_MnemonicFail:
+ return Error(Loc, "invalid instruction mnemonic");
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = Loc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(ErrorLoc, "too few operands for instruction");
+
+ ErrorLoc = ((MSP430Operand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = Loc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ default:
+ return true;
+ }
+}
+
+// Auto-generated by TableGen
+static unsigned MatchRegisterName(StringRef Name);
+static unsigned MatchRegisterAltName(StringRef Name);
+
+bool MSP430AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ auto Name = getLexer().getTok().getIdentifier().lower();
+ RegNo = MatchRegisterName(Name);
+ if (RegNo == MSP430::NoRegister) {
+ RegNo = MatchRegisterAltName(Name);
+ if (RegNo == MSP430::NoRegister)
+ return true;
+ }
+
+ AsmToken const &T = getParser().getTok();
+ StartLoc = T.getLoc();
+ EndLoc = T.getEndLoc();
+ getLexer().Lex(); // eat register token
+
+ return false;
+ }
+
+ return Error(StartLoc, "invalid register name");
+}
+
+bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+ if (!Name.startswith_lower("j"))
+ return true;
+
+ auto CC = Name.drop_front().lower();
+ unsigned CondCode;
+ if (CC == "ne" || CC == "nz")
+ CondCode = MSP430CC::COND_NE;
+ else if (CC == "eq" || CC == "z")
+ CondCode = MSP430CC::COND_E;
+ else if (CC == "lo" || CC == "nc")
+ CondCode = MSP430CC::COND_LO;
+ else if (CC == "hs" || CC == "c")
+ CondCode = MSP430CC::COND_HS;
+ else if (CC == "n")
+ CondCode = MSP430CC::COND_N;
+ else if (CC == "ge")
+ CondCode = MSP430CC::COND_GE;
+ else if (CC == "l")
+ CondCode = MSP430CC::COND_L;
+ else if (CC == "mp")
+ CondCode = MSP430CC::COND_NONE;
+ else
+ return Error(NameLoc, "unknown instruction");
+
+ if (CondCode == (unsigned)MSP430CC::COND_NONE)
+ Operands.push_back(MSP430Operand::CreateToken("jmp", NameLoc));
+ else {
+ Operands.push_back(MSP430Operand::CreateToken("j", NameLoc));
+ const MCExpr *CCode = MCConstantExpr::create(CondCode, getContext());
+ Operands.push_back(MSP430Operand::CreateImm(CCode, SMLoc(), SMLoc()));
+ }
+
+ // Skip optional '$' sign.
+ if (getLexer().getKind() == AsmToken::Dollar)
+ getLexer().Lex(); // Eat '$'
+
+ const MCExpr *Val;
+ SMLoc ExprLoc = getLexer().getLoc();
+ if (getParser().parseExpression(Val))
+ return Error(ExprLoc, "expected expression operand");
+
+ int64_t Res;
+ if (Val->evaluateAsAbsolute(Res))
+ if (Res < -512 || Res > 511)
+ return Error(ExprLoc, "invalid jump offset");
+
+ Operands.push_back(MSP430Operand::CreateImm(Val, ExprLoc,
+ getLexer().getLoc()));
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ getParser().eatToEndOfStatement();
+ return Error(Loc, "unexpected token");
+ }
+
+ getParser().Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+ // Drop .w suffix
+ if (Name.endswith_lower(".w"))
+ Name = Name.drop_back(2);
+
+ if (!parseJccInstruction(Info, Name, NameLoc, Operands))
+ return false;
+
+ // First operand is instruction mnemonic
+ Operands.push_back(MSP430Operand::CreateToken(Name, NameLoc));
+
+ // If there are no more operands, then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse first operand
+ if (ParseOperand(Operands))
+ return true;
+
+ // Parse second operand if any
+ if (getLexer().is(AsmToken::Comma)) {
+ getLexer().Lex(); // Eat ','
+ if (ParseOperand(Operands))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ getParser().eatToEndOfStatement();
+ return Error(Loc, "unexpected token");
+ }
+
+ getParser().Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MSP430AsmParser::ParseDirectiveRefSym(AsmToken DirectiveID) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+ return false;
+}
+
+bool MSP430AsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal.lower() == ".long") {
+ ParseLiteralValues(4, DirectiveID.getLoc());
+ } else if (IDVal.lower() == ".word" || IDVal.lower() == ".short") {
+ ParseLiteralValues(2, DirectiveID.getLoc());
+ } else if (IDVal.lower() == ".byte") {
+ ParseLiteralValues(1, DirectiveID.getLoc());
+ } else if (IDVal.lower() == ".refsym") {
+ return ParseDirectiveRefSym(DirectiveID);
+ }
+ return true;
+}
+
+bool MSP430AsmParser::ParseOperand(OperandVector &Operands) {
+ switch (getLexer().getKind()) {
+ default: return true;
+ case AsmToken::Identifier: {
+ // try rN
+ unsigned RegNo;
+ SMLoc StartLoc, EndLoc;
+ if (!ParseRegister(RegNo, StartLoc, EndLoc)) {
+ Operands.push_back(MSP430Operand::CreateReg(RegNo, StartLoc, EndLoc));
+ return false;
+ }
+ LLVM_FALLTHROUGH;
+ }
+ case AsmToken::Integer:
+ case AsmToken::Plus:
+ case AsmToken::Minus: {
+ SMLoc StartLoc = getParser().getTok().getLoc();
+ const MCExpr *Val;
+ // Try constexpr[(rN)]
+ if (!getParser().parseExpression(Val)) {
+ unsigned RegNo = MSP430::PC;
+ SMLoc EndLoc = getParser().getTok().getLoc();
+ // Try (rN)
+ if (getLexer().getKind() == AsmToken::LParen) {
+ getLexer().Lex(); // Eat '('
+ SMLoc RegStartLoc;
+ if (ParseRegister(RegNo, RegStartLoc, EndLoc))
+ return true;
+ if (getLexer().getKind() != AsmToken::RParen)
+ return true;
+ EndLoc = getParser().getTok().getEndLoc();
+ getLexer().Lex(); // Eat ')'
+ }
+ Operands.push_back(MSP430Operand::CreateMem(RegNo, Val, StartLoc,
+ EndLoc));
+ return false;
+ }
+ return true;
+ }
+ case AsmToken::Amp: {
+ // Try &constexpr
+ SMLoc StartLoc = getParser().getTok().getLoc();
+ getLexer().Lex(); // Eat '&'
+ const MCExpr *Val;
+ if (!getParser().parseExpression(Val)) {
+ SMLoc EndLoc = getParser().getTok().getLoc();
+ Operands.push_back(MSP430Operand::CreateMem(MSP430::SR, Val, StartLoc,
+ EndLoc));
+ return false;
+ }
+ return true;
+ }
+ case AsmToken::At: {
+ // Try @rN[+]
+ SMLoc StartLoc = getParser().getTok().getLoc();
+ getLexer().Lex(); // Eat '@'
+ unsigned RegNo;
+ SMLoc RegStartLoc, EndLoc;
+ if (ParseRegister(RegNo, RegStartLoc, EndLoc))
+ return true;
+ if (getLexer().getKind() == AsmToken::Plus) {
+ Operands.push_back(MSP430Operand::CreatePostIndReg(RegNo, StartLoc, EndLoc));
+ getLexer().Lex(); // Eat '+'
+ return false;
+ }
+ if (Operands.size() > 1) // Emulate @rd in destination position as 0(rd)
+ Operands.push_back(MSP430Operand::CreateMem(RegNo,
+ MCConstantExpr::create(0, getContext()), StartLoc, EndLoc));
+ else
+ Operands.push_back(MSP430Operand::CreateIndReg(RegNo, StartLoc, EndLoc));
+ return false;
+ }
+ case AsmToken::Hash:
+ // Try #constexpr
+ SMLoc StartLoc = getParser().getTok().getLoc();
+ getLexer().Lex(); // Eat '#'
+ const MCExpr *Val;
+ if (!getParser().parseExpression(Val)) {
+ SMLoc EndLoc = getParser().getTok().getLoc();
+ Operands.push_back(MSP430Operand::CreateImm(Val, StartLoc, EndLoc));
+ return false;
+ }
+ return true;
+ }
+}
+
+bool MSP430AsmParser::ParseLiteralValues(unsigned Size, SMLoc L) {
+ auto parseOne = [&]() -> bool {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+ getParser().getStreamer().EmitValue(Value, Size, L);
+ return false;
+ };
+ return (parseMany(parseOne));
+}
+
+extern "C" void LLVMInitializeMSP430AsmParser() {
+ RegisterMCAsmParser<MSP430AsmParser> X(getTheMSP430Target());
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MSP430GenAsmMatcher.inc"
+
+static unsigned convertGR16ToGR8(unsigned Reg) {
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown GR16 register");
+ case MSP430::PC: return MSP430::PCB;
+ case MSP430::SP: return MSP430::SPB;
+ case MSP430::SR: return MSP430::SRB;
+ case MSP430::CG: return MSP430::CGB;
+ case MSP430::FP: return MSP430::FPB;
+ case MSP430::R5: return MSP430::R5B;
+ case MSP430::R6: return MSP430::R6B;
+ case MSP430::R7: return MSP430::R7B;
+ case MSP430::R8: return MSP430::R8B;
+ case MSP430::R9: return MSP430::R9B;
+ case MSP430::R10: return MSP430::R10B;
+ case MSP430::R11: return MSP430::R11B;
+ case MSP430::R12: return MSP430::R12B;
+ case MSP430::R13: return MSP430::R13B;
+ case MSP430::R14: return MSP430::R14B;
+ case MSP430::R15: return MSP430::R15B;
+ }
+}
+
+unsigned MSP430AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ MSP430Operand &Op = static_cast<MSP430Operand &>(AsmOp);
+
+ if (!Op.isReg())
+ return Match_InvalidOperand;
+
+ unsigned Reg = Op.getReg();
+ bool isGR16 =
+ MSP430MCRegisterClasses[MSP430::GR16RegClassID].contains(Reg);
+
+ if (isGR16 && (Kind == MCK_GR8)) {
+ Op.setReg(convertGR16ToGR8(Reg));
+ return Match_Success;
+ }
+
+ return Match_InvalidOperand;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
new file mode 100644
index 000000000000..e5da130f9bbb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -0,0 +1,387 @@
+//===-- MSP430Disassembler.cpp - Disassembler for MSP430 ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430Disassembler class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "msp430-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class MSP430Disassembler : public MCDisassembler {
+ DecodeStatus getInstructionI(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
+
+ DecodeStatus getInstructionII(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
+
+ DecodeStatus getInstructionCJ(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
+
+public:
+ MSP430Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+
+ DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createMSP430Disassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MSP430Disassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializeMSP430Disassembler() {
+ TargetRegistry::RegisterMCDisassembler(getTheMSP430Target(),
+ createMSP430Disassembler);
+}
+
+static const unsigned GR8DecoderTable[] = {
+ MSP430::PCB, MSP430::SPB, MSP430::SRB, MSP430::CGB,
+ MSP430::FPB, MSP430::R5B, MSP430::R6B, MSP430::R7B,
+ MSP430::R8B, MSP430::R9B, MSP430::R10B, MSP430::R11B,
+ MSP430::R12B, MSP430::R13B, MSP430::R14B, MSP430::R15B
+};
+
+static DecodeStatus DecodeGR8RegisterClass(MCInst &MI, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = GR8DecoderTable[RegNo];
+ MI.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static const unsigned GR16DecoderTable[] = {
+ MSP430::PC, MSP430::SP, MSP430::SR, MSP430::CG,
+ MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
+ MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
+};
+
+static DecodeStatus DecodeGR16RegisterClass(MCInst &MI, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = GR16DecoderTable[RegNo];
+ MI.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+#include "MSP430GenDisassemblerTables.inc"
+
+static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
+ const void *Decoder) {
+ int64_t Imm;
+ switch (Bits) {
+ default:
+ llvm_unreachable("Invalid immediate value");
+ case 0x22: Imm = 4; break;
+ case 0x32: Imm = 8; break;
+ case 0x03: Imm = 0; break;
+ case 0x13: Imm = 1; break;
+ case 0x23: Imm = 2; break;
+ case 0x33: Imm = -1; break;
+ }
+ MI.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Reg = Bits & 15;
+ unsigned Imm = Bits >> 4;
+
+ if (DecodeGR16RegisterClass(MI, Reg, Address, Decoder) !=
+ MCDisassembler::Success)
+ return MCDisassembler::Fail;
+
+ MI.addOperand(MCOperand::createImm((int16_t)Imm));
+ return MCDisassembler::Success;
+}
+
+enum AddrMode {
+ amInvalid = 0,
+ amRegister,
+ amIndexed,
+ amIndirect,
+ amIndirectPost,
+ amSymbolic,
+ amImmediate,
+ amAbsolute,
+ amConstant
+};
+
+static AddrMode DecodeSrcAddrMode(unsigned Rs, unsigned As) {
+ switch (Rs) {
+ case 0:
+ if (As == 1) return amSymbolic;
+ if (As == 2) return amInvalid;
+ if (As == 3) return amImmediate;
+ break;
+ case 2:
+ if (As == 1) return amAbsolute;
+ if (As == 2) return amConstant;
+ if (As == 3) return amConstant;
+ break;
+ case 3:
+ return amConstant;
+ default:
+ break;
+ }
+ switch (As) {
+ case 0: return amRegister;
+ case 1: return amIndexed;
+ case 2: return amIndirect;
+ case 3: return amIndirectPost;
+ default:
+ llvm_unreachable("As out of range");
+ }
+}
+
+static AddrMode DecodeSrcAddrModeI(unsigned Insn) {
+ unsigned Rs = fieldFromInstruction(Insn, 8, 4);
+ unsigned As = fieldFromInstruction(Insn, 4, 2);
+ return DecodeSrcAddrMode(Rs, As);
+}
+
+static AddrMode DecodeSrcAddrModeII(unsigned Insn) {
+ unsigned Rs = fieldFromInstruction(Insn, 0, 4);
+ unsigned As = fieldFromInstruction(Insn, 4, 2);
+ return DecodeSrcAddrMode(Rs, As);
+}
+
+static AddrMode DecodeDstAddrMode(unsigned Insn) {
+ unsigned Rd = fieldFromInstruction(Insn, 0, 4);
+ unsigned Ad = fieldFromInstruction(Insn, 7, 1);
+ switch (Rd) {
+ case 0: return Ad ? amSymbolic : amRegister;
+ case 2: return Ad ? amAbsolute : amRegister;
+ default:
+ break;
+ }
+ return Ad ? amIndexed : amRegister;
+}
+
+static const uint8_t *getDecoderTable(AddrMode SrcAM, unsigned Words) {
+ assert(0 < Words && Words < 4 && "Incorrect number of words");
+ switch (SrcAM) {
+ default:
+ llvm_unreachable("Invalid addressing mode");
+ case amRegister:
+ assert(Words < 3 && "Incorrect number of words");
+ return Words == 2 ? DecoderTableAlpha32 : DecoderTableAlpha16;
+ case amConstant:
+ assert(Words < 3 && "Incorrect number of words");
+ return Words == 2 ? DecoderTableBeta32 : DecoderTableBeta16;
+ case amIndexed:
+ case amSymbolic:
+ case amImmediate:
+ case amAbsolute:
+ assert(Words > 1 && "Incorrect number of words");
+ return Words == 2 ? DecoderTableGamma32 : DecoderTableGamma48;
+ case amIndirect:
+ case amIndirectPost:
+ assert(Words < 3 && "Incorrect number of words");
+ return Words == 2 ? DecoderTableDelta32 : DecoderTableDelta16;
+ }
+}
+
+DecodeStatus MSP430Disassembler::getInstructionI(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
+ uint64_t Insn = support::endian::read16le(Bytes.data());
+ AddrMode SrcAM = DecodeSrcAddrModeI(Insn);
+ AddrMode DstAM = DecodeDstAddrMode(Insn);
+ if (SrcAM == amInvalid || DstAM == amInvalid) {
+ Size = 2; // skip one word and let disassembler to try further
+ return MCDisassembler::Fail;
+ }
+
+ unsigned Words = 1;
+ switch (SrcAM) {
+ case amIndexed:
+ case amSymbolic:
+ case amImmediate:
+ case amAbsolute:
+ if (Bytes.size() < (Words + 1) * 2) {
+ Size = 2;
+ return DecodeStatus::Fail;
+ }
+ Insn |= (uint64_t)support::endian::read16le(Bytes.data() + 2) << 16;
+ ++Words;
+ break;
+ default:
+ break;
+ }
+ switch (DstAM) {
+ case amIndexed:
+ case amSymbolic:
+ case amAbsolute:
+ if (Bytes.size() < (Words + 1) * 2) {
+ Size = 2;
+ return DecodeStatus::Fail;
+ }
+ Insn |= (uint64_t)support::endian::read16le(Bytes.data() + Words * 2)
+ << (Words * 16);
+ ++Words;
+ break;
+ default:
+ break;
+ }
+
+ DecodeStatus Result = decodeInstruction(getDecoderTable(SrcAM, Words), MI,
+ Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = Words * 2;
+ return Result;
+ }
+
+ Size = 2;
+ return DecodeStatus::Fail;
+}
+
+DecodeStatus MSP430Disassembler::getInstructionII(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
+ uint64_t Insn = support::endian::read16le(Bytes.data());
+ AddrMode SrcAM = DecodeSrcAddrModeII(Insn);
+ if (SrcAM == amInvalid) {
+ Size = 2; // skip one word and let disassembler to try further
+ return MCDisassembler::Fail;
+ }
+
+ unsigned Words = 1;
+ switch (SrcAM) {
+ case amIndexed:
+ case amSymbolic:
+ case amImmediate:
+ case amAbsolute:
+ if (Bytes.size() < (Words + 1) * 2) {
+ Size = 2;
+ return DecodeStatus::Fail;
+ }
+ Insn |= (uint64_t)support::endian::read16le(Bytes.data() + 2) << 16;
+ ++Words;
+ break;
+ default:
+ break;
+ }
+
+ const uint8_t *DecoderTable = Words == 2 ? DecoderTable32 : DecoderTable16;
+ DecodeStatus Result = decodeInstruction(DecoderTable, MI, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = Words * 2;
+ return Result;
+ }
+
+ Size = 2;
+ return DecodeStatus::Fail;
+}
+
+static MSP430CC::CondCodes getCondCode(unsigned Cond) {
+ switch (Cond) {
+ case 0: return MSP430CC::COND_NE;
+ case 1: return MSP430CC::COND_E;
+ case 2: return MSP430CC::COND_LO;
+ case 3: return MSP430CC::COND_HS;
+ case 4: return MSP430CC::COND_N;
+ case 5: return MSP430CC::COND_GE;
+ case 6: return MSP430CC::COND_L;
+ case 7: return MSP430CC::COND_NONE;
+ default:
+ llvm_unreachable("Cond out of range");
+ }
+}
+
+DecodeStatus MSP430Disassembler::getInstructionCJ(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
+ uint64_t Insn = support::endian::read16le(Bytes.data());
+ unsigned Cond = fieldFromInstruction(Insn, 10, 3);
+ unsigned Offset = fieldFromInstruction(Insn, 0, 10);
+
+ MI.addOperand(MCOperand::createImm(SignExtend32(Offset, 10)));
+
+ if (Cond == 7)
+ MI.setOpcode(MSP430::JMP);
+ else {
+ MI.setOpcode(MSP430::JCC);
+ MI.addOperand(MCOperand::createImm(getCondCode(Cond)));
+ }
+
+ Size = 2;
+ return DecodeStatus::Success;
+}
+
+DecodeStatus MSP430Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
+ if (Bytes.size() < 2) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint64_t Insn = support::endian::read16le(Bytes.data());
+ unsigned Opc = fieldFromInstruction(Insn, 13, 3);
+ switch (Opc) {
+ case 0:
+ return getInstructionII(MI, Size, Bytes, Address, VStream, CStream);
+ case 1:
+ return getInstructionCJ(MI, Size, Bytes, Address, VStream, CStream);
+ default:
+ return getInstructionI(MI, Size, Bytes, Address, VStream, CStream);
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index be6d1a84a377..4d62547bc65b 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -16,28 +16,34 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-
// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
#include "MSP430GenAsmWriter.inc"
void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot, const MCSubtargetInfo &STI) {
- printInstruction(MI, O);
+ if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
printAnnotation(O, Annot);
}
void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isImm())
- O << Op.getImm();
- else {
+ if (Op.isImm()) {
+ int64_t Imm = Op.getImm() * 2 + 2;
+ O << "$";
+ if (Imm >= 0)
+ O << '+';
+ O << Imm;
+ } else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
Op.getExpr()->print(O, &MAI);
}
@@ -72,7 +78,7 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
// vs
// mov.w glb(r1), r2
// Otherwise (!) msp430-as will silently miscompile the output :(
- if (!Base.getReg())
+ if (Base.getReg() == MSP430::SR)
O << '&';
if (Disp.isExpr())
@@ -83,10 +89,23 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
}
// Print register base field
- if (Base.getReg())
+ if ((Base.getReg() != MSP430::SR) &&
+ (Base.getReg() != MSP430::PC))
O << '(' << getRegisterName(Base.getReg()) << ')';
}
+void MSP430InstPrinter::printIndRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Base = MI->getOperand(OpNo);
+ O << "@" << getRegisterName(Base.getReg());
+}
+
+void MSP430InstPrinter::printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Base = MI->getOperand(OpNo);
+ O << "@" << getRegisterName(Base.getReg()) << "+";
+}
+
void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned CC = MI->getOperand(OpNo).getImm();
@@ -112,5 +131,8 @@ void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
case MSP430CC::COND_L:
O << 'l';
break;
+ case MSP430CC::COND_N:
+ O << 'n';
+ break;
}
}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 72afec18becb..cd02c4fa645a 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -28,13 +28,20 @@ namespace llvm {
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
+private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier = nullptr);
void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier = nullptr);
+ void printIndRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O);
void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
new file mode 100644
index 000000000000..bd69a9d8d795
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -0,0 +1,178 @@
+//===-- MSP430AsmBackend.cpp - MSP430 Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MSP430FixupKinds.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class MSP430AsmBackend : public MCAsmBackend {
+ uint8_t OSABI;
+
+ uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ MCContext &Ctx) const;
+
+public:
+ MSP430AsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI)
+ : MCAsmBackend(support::little), OSABI(OSABI) {}
+ ~MSP430AsmBackend() override {}
+
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override;
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createMSP430ELFObjectWriter(OSABI);
+ }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+
+ bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout,
+ const bool WasForced) const override {
+ return false;
+ }
+
+ unsigned getNumFixupKinds() const override {
+ return MSP430::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo Infos[MSP430::NumTargetFixupKinds] = {
+ // This table must be in the same order of enum in MSP430FixupKinds.h.
+ //
+ // name offset bits flags
+ {"fixup_32", 0, 32, 0},
+ {"fixup_10_pcrel", 0, 10, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_16", 0, 16, 0},
+ {"fixup_16_pcrel", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_16_byte", 0, 16, 0},
+ {"fixup_16_pcrel_byte", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_2x_pcrel", 0, 10, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_rl_pcrel", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_8", 0, 8, 0},
+ {"fixup_sym_diff", 0, 32, 0},
+ };
+ static_assert((array_lengthof(Infos)) == MSP430::NumTargetFixupKinds,
+ "Not all fixup kinds added to Infos array");
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
+ return false;
+ }
+
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override {}
+
+ bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+};
+
+uint64_t MSP430AsmBackend::adjustFixupValue(const MCFixup &Fixup,
+ uint64_t Value,
+ MCContext &Ctx) const {
+ unsigned Kind = Fixup.getKind();
+ switch (Kind) {
+ case MSP430::fixup_10_pcrel: {
+ if (Value & 0x1)
+ Ctx.reportError(Fixup.getLoc(), "fixup value must be 2-byte aligned");
+
+ // Offset is signed
+ int16_t Offset = Value;
+ // Jumps are in words
+ Offset >>= 1;
+ // PC points to the next instruction so decrement by one
+ --Offset;
+
+ if (Offset < -512 || Offset > 511)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+
+ // Mask 10 bits
+ Offset &= 0x3ff;
+
+ return Offset;
+ }
+ default:
+ return Value;
+ }
+}
+
+void MSP430AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const {
+ Value = adjustFixupValue(Fixup, Value, Asm.getContext());
+ MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+ if (!Value)
+ return; // Doesn't change encoding.
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned Offset = Fixup.getOffset();
+ unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
+
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ }
+}
+
+bool MSP430AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+ if ((Count % 2) != 0)
+ return false;
+
+ // The canonical nop on MSP430 is mov #0, r3
+ uint64_t NopCount = Count / 2;
+ while (NopCount--)
+ OS.write("\x03\x43", 2);
+
+ return true;
+}
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createMSP430MCAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options) {
+ return new MSP430AsmBackend(STI, ELF::ELFOSABI_STANDALONE);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
new file mode 100644
index 000000000000..e47db2400a05
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -0,0 +1,59 @@
+//===-- MSP430ELFObjectWriter.cpp - MSP430 ELF Writer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MSP430FixupKinds.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class MSP430ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ MSP430ELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(false, OSABI, ELF::EM_MSP430,
+ /*HasRelocationAddend*/ true) {}
+
+ ~MSP430ELFObjectWriter() override {}
+
+protected:
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override {
+ // Translate fixup kind to ELF relocation type.
+ switch ((unsigned)Fixup.getKind()) {
+ case FK_Data_1: return ELF::R_MSP430_8;
+ case FK_Data_2: return ELF::R_MSP430_16_BYTE;
+ case FK_Data_4: return ELF::R_MSP430_32;
+ case MSP430::fixup_32: return ELF::R_MSP430_32;
+ case MSP430::fixup_10_pcrel: return ELF::R_MSP430_10_PCREL;
+ case MSP430::fixup_16: return ELF::R_MSP430_16;
+ case MSP430::fixup_16_pcrel: return ELF::R_MSP430_16_PCREL;
+ case MSP430::fixup_16_byte: return ELF::R_MSP430_16_BYTE;
+ case MSP430::fixup_16_pcrel_byte: return ELF::R_MSP430_16_PCREL_BYTE;
+ case MSP430::fixup_2x_pcrel: return ELF::R_MSP430_2X_PCREL;
+ case MSP430::fixup_rl_pcrel: return ELF::R_MSP430_RL_PCREL;
+ case MSP430::fixup_8: return ELF::R_MSP430_8;
+ case MSP430::fixup_sym_diff: return ELF::R_MSP430_SYM_DIFF;
+ default:
+ llvm_unreachable("Invalid fixup kind");
+ }
+ }
+};
+} // end of anonymous namespace
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createMSP430ELFObjectWriter(uint8_t OSABI) {
+ return llvm::make_unique<MSP430ELFObjectWriter>(OSABI);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
new file mode 100644
index 000000000000..9449cb278024
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -0,0 +1,81 @@
+//===-- MSP430ELFStreamer.cpp - MSP430 ELF Target Streamer Methods --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class MSP430TargetELFStreamer : public MCTargetStreamer {
+public:
+ MCELFStreamer &getStreamer();
+ MSP430TargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+};
+
+// This part is for ELF object output.
+MSP430TargetELFStreamer::MSP430TargetELFStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI)
+ : MCTargetStreamer(S) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned EFlags = MCA.getELFHeaderEFlags();
+ MCA.setELFHeaderEFlags(EFlags);
+
+ // Emit build attributes section according to
+ // MSP430 EABI (slaa534.pdf, part 13).
+ MCSection *AttributeSection = getStreamer().getContext().getELFSection(
+ ".MSP430.attributes", ELF::SHT_MSP430_ATTRIBUTES, 0);
+ Streamer.SwitchSection(AttributeSection);
+
+ // Format version.
+ Streamer.EmitIntValue(0x41, 1);
+ // Subsection length.
+ Streamer.EmitIntValue(22, 4);
+ // Vendor name string, zero-terminated.
+ Streamer.EmitBytes("mspabi");
+ Streamer.EmitIntValue(0, 1);
+
+ // Attribute vector scope tag. 1 stands for the entire file.
+ Streamer.EmitIntValue(1, 1);
+ // Attribute vector length.
+ Streamer.EmitIntValue(11, 4);
+ // OFBA_MSPABI_Tag_ISA(4) = 1, MSP430
+ Streamer.EmitIntValue(4, 1);
+ Streamer.EmitIntValue(1, 1);
+ // OFBA_MSPABI_Tag_Code_Model(6) = 1, Small
+ Streamer.EmitIntValue(6, 1);
+ Streamer.EmitIntValue(1, 1);
+ // OFBA_MSPABI_Tag_Data_Model(8) = 1, Small
+ Streamer.EmitIntValue(8, 1);
+ Streamer.EmitIntValue(1, 1);
+}
+
+MCELFStreamer &MSP430TargetELFStreamer::getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+}
+
+MCTargetStreamer *
+createMSP430ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new MSP430TargetELFStreamer(S, STI);
+ return nullptr;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
new file mode 100644
index 000000000000..1eb6a2759423
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
@@ -0,0 +1,53 @@
+//===-- MSP430FixupKinds.h - MSP430 Specific Fixup Entries ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430FIXUPKINDS_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+#undef MSP430
+
+namespace llvm {
+namespace MSP430 {
+
+// This table must be in the same order of
+// MCFixupKindInfo Infos[MSP430::NumTargetFixupKinds]
+// in MSP430AsmBackend.cpp.
+//
+enum Fixups {
+ // A 32 bit absolute fixup.
+ fixup_32 = FirstTargetFixupKind,
+ // A 10 bit PC relative fixup.
+ fixup_10_pcrel,
+ // A 16 bit absolute fixup.
+ fixup_16,
+ // A 16 bit PC relative fixup.
+ fixup_16_pcrel,
+ // A 16 bit absolute fixup for byte operations.
+ fixup_16_byte,
+ // A 16 bit PC relative fixup for command address.
+ fixup_16_pcrel_byte,
+ // A 10 bit PC relative fixup for complicated polymorphs.
+ fixup_2x_pcrel,
+ // A 16 bit relaxable fixup.
+ fixup_rl_pcrel,
+ // A 8 bit absolute fixup.
+ fixup_8,
+ // A 32 bit symbol difference fixup.
+ fixup_sym_diff,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace MSP430
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 82e6731ecd78..36e9a9c31075 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -20,6 +20,7 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
CodePointerSize = CalleeSaveStackSlotSize = 2;
CommentString = ";";
+ SeparatorString = "{";
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
new file mode 100644
index 000000000000..06f9f307cb1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
@@ -0,0 +1,211 @@
+//===-- MSP430MCCodeEmitter.cpp - Convert MSP430 code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "MCTargetDesc/MSP430FixupKinds.h"
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace llvm {
+
+class MSP430MCCodeEmitter : public MCCodeEmitter {
+ MCContext &Ctx;
+ MCInstrInfo const &MCII;
+
+ // Offset keeps track of current word number being emitted
+ // inside a particular instruction.
+ mutable unsigned Offset;
+
+ /// TableGen'erated function for getting the binary encoding for an
+ /// instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Returns the binary encoding of operands.
+ ///
+ /// If an operand requires relocation, the relocation is recorded
+ /// and zero is returned.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getMemOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getPCRelImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getCGImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getCCOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+public:
+ MSP430MCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
+ : Ctx(ctx), MCII(MCII) {}
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+};
+
+void MSP430MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ // Get byte count of instruction.
+ unsigned Size = Desc.getSize();
+
+ // Initialize fixup offset
+ Offset = 2;
+
+ uint64_t BinaryOpCode = getBinaryCodeForInstr(MI, Fixups, STI);
+ size_t WordCount = Size / 2;
+
+ while (WordCount--) {
+ support::endian::write(OS, (uint16_t)BinaryOpCode, support::little);
+ BinaryOpCode >>= 16;
+ }
+}
+
+unsigned MSP430MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ if (MO.isImm()) {
+ Offset += 2;
+ return MO.getImm();
+ }
+
+ assert(MO.isExpr() && "Expected expr operand");
+ Fixups.push_back(MCFixup::create(Offset, MO.getExpr(),
+ static_cast<MCFixupKind>(MSP430::fixup_16_byte), MI.getLoc()));
+ Offset += 2;
+ return 0;
+}
+
+unsigned MSP430MCCodeEmitter::getMemOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO1 = MI.getOperand(Op);
+ assert(MO1.isReg() && "Register operand expected");
+ unsigned Reg = Ctx.getRegisterInfo()->getEncodingValue(MO1.getReg());
+
+ const MCOperand &MO2 = MI.getOperand(Op + 1);
+ if (MO2.isImm()) {
+ Offset += 2;
+ return ((unsigned)MO2.getImm() << 4) | Reg;
+ }
+
+ assert(MO2.isExpr() && "Expr operand expected");
+ MSP430::Fixups FixupKind;
+ switch (Reg) {
+ case 0:
+ FixupKind = MSP430::fixup_16_pcrel_byte;
+ break;
+ case 2:
+ FixupKind = MSP430::fixup_16_byte;
+ break;
+ default:
+ FixupKind = MSP430::fixup_16_byte;
+ break;
+ }
+ Fixups.push_back(MCFixup::create(Offset, MO2.getExpr(),
+ static_cast<MCFixupKind>(FixupKind), MI.getLoc()));
+ Offset += 2;
+ return Reg;
+}
+
+unsigned MSP430MCCodeEmitter::getPCRelImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ if (MO.isImm())
+ return MO.getImm();
+
+ assert(MO.isExpr() && "Expr operand expected");
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ static_cast<MCFixupKind>(MSP430::fixup_10_pcrel), MI.getLoc()));
+ return 0;
+}
+
+unsigned MSP430MCCodeEmitter::getCGImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ assert(MO.isImm() && "Expr operand expected");
+
+ int64_t Imm = MO.getImm();
+ switch (Imm) {
+ default:
+ llvm_unreachable("Invalid immediate value");
+ case 4: return 0x22;
+ case 8: return 0x32;
+ case 0: return 0x03;
+ case 1: return 0x13;
+ case 2: return 0x23;
+ case -1: return 0x33;
+ }
+}
+
+unsigned MSP430MCCodeEmitter::getCCOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ assert(MO.isImm() && "Immediate operand expected");
+ switch (MO.getImm()) {
+ case MSP430CC::COND_NE: return 0;
+ case MSP430CC::COND_E: return 1;
+ case MSP430CC::COND_LO: return 2;
+ case MSP430CC::COND_HS: return 3;
+ case MSP430CC::COND_N: return 4;
+ case MSP430CC::COND_GE: return 5;
+ case MSP430CC::COND_L: return 6;
+ default:
+ llvm_unreachable("Unknown condition code");
+ }
+}
+
+MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new MSP430MCCodeEmitter(Ctx, MCII);
+}
+
+#include "MSP430GenMCCodeEmitter.inc"
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 8c715500f38b..b21145d3904a 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -58,22 +58,15 @@ static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
}
extern "C" void LLVMInitializeMSP430TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<MSP430MCAsmInfo> X(getTheMSP430Target());
+ Target &T = getTheMSP430Target();
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(getTheMSP430Target(),
- createMSP430MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(getTheMSP430Target(),
- createMSP430MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(getTheMSP430Target(),
- createMSP430MCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(getTheMSP430Target(),
- createMSP430MCInstPrinter);
+ RegisterMCAsmInfo<MSP430MCAsmInfo> X(T);
+ TargetRegistry::RegisterMCInstrInfo(T, createMSP430MCInstrInfo);
+ TargetRegistry::RegisterMCRegInfo(T, createMSP430MCRegisterInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(T, createMSP430MCSubtargetInfo);
+ TargetRegistry::RegisterMCInstPrinter(T, createMSP430MCInstPrinter);
+ TargetRegistry::RegisterMCCodeEmitter(T, createMSP430MCCodeEmitter);
+ TargetRegistry::RegisterMCAsmBackend(T, createMSP430MCAsmBackend);
+ TargetRegistry::RegisterObjectTargetStreamer(
+ T, createMSP430ObjectTargetStreamer);
}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index b901c5f09794..e484c79c9ee9 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -15,12 +15,39 @@
#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
+#include <memory>
namespace llvm {
class Target;
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCInstrInfo;
+class MCSubtargetInfo;
+class MCRegisterInfo;
+class MCContext;
+class MCTargetOptions;
+class MCObjectTargetWriter;
+class MCStreamer;
+class MCTargetStreamer;
Target &getTheMSP430Target();
+/// Creates a machine code emitter for MSP430.
+MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMSP430MCAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options);
+
+MCTargetStreamer *
+createMSP430ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+
+std::unique_ptr<MCObjectTargetWriter>
+createMSP430ELFObjectWriter(uint8_t OSABI);
+
} // End llvm namespace
// Defines symbolic names for MSP430 registers.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm/lib/Target/MSP430/MSP430.h
index 796f25233123..7a5314a10844 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.h
@@ -27,6 +27,8 @@ namespace MSP430CC {
COND_LO = 3, // aka COND_NC
COND_GE = 4,
COND_L = 5,
+ COND_N = 6, // jump if negative
+ COND_NONE, // unconditional
COND_INVALID = -1
};
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td
index 203864dd4065..8fa99dc13dd5 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.td
@@ -64,11 +64,29 @@ include "MSP430InstrInfo.td"
def MSP430InstrInfo : InstrInfo;
+//===---------------------------------------------------------------------===//
+// Assembly Printers
+//===---------------------------------------------------------------------===//
+
+def MSP430AsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+}
+
+//===---------------------------------------------------------------------===//
+// Assembly Parsers
+//===---------------------------------------------------------------------===//
+
+def MSP430AsmParser : AsmParser {
+ let AllowDuplicateRegisterNames = 1;
+ let ShouldEmitMatchRegisterAltName = 1;
+}
+
//===----------------------------------------------------------------------===//
// Target Declaration
//===----------------------------------------------------------------------===//
def MSP430 : Target {
let InstructionSet = MSP430InstrInfo;
+ let AssemblyParsers = [MSP430AsmParser];
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 005f5f44a635..7a1998ad355d 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -98,6 +98,7 @@ namespace {
MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel) {}
+ private:
StringRef getPassName() const override {
return "MSP430 DAG->DAG Pattern Instruction Selection";
}
@@ -112,8 +113,9 @@ namespace {
// Include the pieces autogenerated from the target description.
#include "MSP430GenDAGISel.inc"
- private:
+ // Main method to transform nodes into machine nodes.
void Select(SDNode *N) override;
+
bool tryIndexedLoad(SDNode *Op);
bool tryIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8,
unsigned Opc16);
@@ -250,11 +252,9 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
if (MatchAddress(N, AM))
return false;
- EVT VT = N.getValueType();
- if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase)
if (!AM.Base.Reg.getNode())
- AM.Base.Reg = CurDAG->getRegister(0, VT);
- }
+ AM.Base.Reg = CurDAG->getRegister(MSP430::SR, MVT::i16);
Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase)
? CurDAG->getTargetFrameIndex(
@@ -336,10 +336,10 @@ bool MSP430DAGToDAGISel::tryIndexedLoad(SDNode *N) {
unsigned Opcode = 0;
switch (VT.SimpleTy) {
case MVT::i8:
- Opcode = MSP430::MOV8rm_POST;
+ Opcode = MSP430::MOV8rp;
break;
case MVT::i16:
- Opcode = MSP430::MOV16rm_POST;
+ Opcode = MSP430::MOV16rp;
break;
default:
return false;
@@ -362,12 +362,11 @@ bool MSP430DAGToDAGISel::tryIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
MVT VT = LD->getMemoryVT().getSimpleVT();
unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+ MachineMemOperand *MemRef = cast<MemSDNode>(N1)->getMemOperand();
SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
SDNode *ResNode =
CurDAG->SelectNodeTo(Op, Opc, VT, MVT::i16, MVT::Other, Ops0);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemRef});
// Transfer chain.
ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
// Transfer writeback.
@@ -413,47 +412,47 @@ void MSP430DAGToDAGISel::Select(SDNode *Node) {
break;
case ISD::ADD:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ MSP430::ADD8rp, MSP430::ADD16rp))
return;
else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
- MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ MSP430::ADD8rp, MSP430::ADD16rp))
return;
// Other cases are autogenerated.
break;
case ISD::SUB:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+ MSP430::SUB8rp, MSP430::SUB16rp))
return;
// Other cases are autogenerated.
break;
case ISD::AND:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ MSP430::AND8rp, MSP430::AND16rp))
return;
else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
- MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ MSP430::AND8rp, MSP430::AND16rp))
return;
// Other cases are autogenerated.
break;
case ISD::OR:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ MSP430::BIS8rp, MSP430::BIS16rp))
return;
else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
- MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ MSP430::BIS8rp, MSP430::BIS16rp))
return;
// Other cases are autogenerated.
break;
case ISD::XOR:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ MSP430::XOR8rp, MSP430::XOR16rp))
return;
else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
- MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ MSP430::XOR8rp, MSP430::XOR16rp))
return;
// Other cases are autogenerated.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index f5b2bda5d1e4..3e706134afc5 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -95,6 +95,8 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::CTTZ, MVT::i8, Expand);
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
@@ -217,8 +219,6 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
// { RTLIB::NEG_F64, "__mspabi_negd", ISD::SETCC_INVALID },
// { RTLIB::NEG_F32, "__mspabi_negf", ISD::SETCC_INVALID },
- // TODO: SLL/SRA/SRL are in libgcc, RLL isn't
-
// Universal Integer Operations - EABI Table 9
{ RTLIB::SDIV_I16, "__mspabi_divi", ISD::SETCC_INVALID },
{ RTLIB::SDIV_I32, "__mspabi_divli", ISD::SETCC_INVALID },
@@ -233,6 +233,13 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
{ RTLIB::UREM_I32, "__mspabi_remul", ISD::SETCC_INVALID },
{ RTLIB::UREM_I64, "__mspabi_remull", ISD::SETCC_INVALID },
+ // Bitwise Operations - EABI Table 10
+ // TODO: __mspabi_[srli/srai/slli] ARE implemented in libgcc
+ { RTLIB::SRL_I32, "__mspabi_srll", ISD::SETCC_INVALID },
+ { RTLIB::SRA_I32, "__mspabi_sral", ISD::SETCC_INVALID },
+ { RTLIB::SHL_I32, "__mspabi_slll", ISD::SETCC_INVALID },
+ // __mspabi_[srlll/srall/sllll/rlli/rlll] are NOT implemented in libgcc
+
};
for (const auto &LC : LibraryCalls) {
@@ -940,30 +947,40 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
// Expand non-constant shifts to loops:
if (!isa<ConstantSDNode>(N->getOperand(1)))
- switch (Opc) {
- default: llvm_unreachable("Invalid shift opcode!");
- case ISD::SHL:
- return DAG.getNode(MSP430ISD::SHL, dl,
- VT, N->getOperand(0), N->getOperand(1));
- case ISD::SRA:
- return DAG.getNode(MSP430ISD::SRA, dl,
- VT, N->getOperand(0), N->getOperand(1));
- case ISD::SRL:
- return DAG.getNode(MSP430ISD::SRL, dl,
- VT, N->getOperand(0), N->getOperand(1));
- }
+ return Op;
uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
// Expand the stuff into sequence of shifts.
- // FIXME: for some shift amounts this might be done better!
- // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
SDValue Victim = N->getOperand(0);
+ if (ShiftAmount >= 8) {
+ assert(VT == MVT::i16 && "Can not shift i8 by 8 and more");
+ switch(Opc) {
+ default:
+ llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // foo << (8 + N) => swpb(zext(foo)) << N
+ Victim = DAG.getZeroExtendInReg(Victim, dl, MVT::i8);
+ Victim = DAG.getNode(ISD::BSWAP, dl, VT, Victim);
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ // foo >> (8 + N) => sxt(swpb(foo)) >> N
+ Victim = DAG.getNode(ISD::BSWAP, dl, VT, Victim);
+ Victim = (Opc == ISD::SRA)
+ ? DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Victim,
+ DAG.getValueType(MVT::i8))
+ : DAG.getZeroExtendInReg(Victim, dl, MVT::i8);
+ break;
+ }
+ ShiftAmount -= 8;
+ }
+
if (Opc == ISD::SRL && ShiftAmount) {
// Emit a special goodness here:
// srl A, 1 => clrc; rrc A
- Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+ Victim = DAG.getNode(MSP430ISD::RRCL, dl, VT, Victim);
ShiftAmount -= 1;
}
@@ -1342,15 +1359,14 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
case MSP430ISD::RRA: return "MSP430ISD::RRA";
case MSP430ISD::RLA: return "MSP430ISD::RLA";
case MSP430ISD::RRC: return "MSP430ISD::RRC";
+ case MSP430ISD::RRCL: return "MSP430ISD::RRCL";
case MSP430ISD::CALL: return "MSP430ISD::CALL";
case MSP430ISD::Wrapper: return "MSP430ISD::Wrapper";
case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
case MSP430ISD::CMP: return "MSP430ISD::CMP";
case MSP430ISD::SETCC: return "MSP430ISD::SETCC";
case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
- case MSP430ISD::SHL: return "MSP430ISD::SHL";
- case MSP430ISD::SRA: return "MSP430ISD::SRA";
- case MSP430ISD::SRL: return "MSP430ISD::SRL";
+ case MSP430ISD::DADD: return "MSP430ISD::DADD";
}
return nullptr;
}
@@ -1397,33 +1413,49 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
const TargetInstrInfo &TII = *F->getSubtarget().getInstrInfo();
unsigned Opc;
+ bool ClearCarry = false;
const TargetRegisterClass * RC;
switch (MI.getOpcode()) {
default: llvm_unreachable("Invalid shift opcode!");
case MSP430::Shl8:
- Opc = MSP430::SHL8r1;
- RC = &MSP430::GR8RegClass;
- break;
+ Opc = MSP430::ADD8rr;
+ RC = &MSP430::GR8RegClass;
+ break;
case MSP430::Shl16:
- Opc = MSP430::SHL16r1;
- RC = &MSP430::GR16RegClass;
- break;
+ Opc = MSP430::ADD16rr;
+ RC = &MSP430::GR16RegClass;
+ break;
case MSP430::Sra8:
- Opc = MSP430::SAR8r1;
- RC = &MSP430::GR8RegClass;
- break;
+ Opc = MSP430::RRA8r;
+ RC = &MSP430::GR8RegClass;
+ break;
case MSP430::Sra16:
- Opc = MSP430::SAR16r1;
- RC = &MSP430::GR16RegClass;
- break;
+ Opc = MSP430::RRA16r;
+ RC = &MSP430::GR16RegClass;
+ break;
case MSP430::Srl8:
- Opc = MSP430::SAR8r1c;
- RC = &MSP430::GR8RegClass;
- break;
+ ClearCarry = true;
+ Opc = MSP430::RRC8r;
+ RC = &MSP430::GR8RegClass;
+ break;
case MSP430::Srl16:
- Opc = MSP430::SAR16r1c;
- RC = &MSP430::GR16RegClass;
- break;
+ ClearCarry = true;
+ Opc = MSP430::RRC16r;
+ RC = &MSP430::GR16RegClass;
+ break;
+ case MSP430::Rrcl8:
+ case MSP430::Rrcl16: {
+ BuildMI(*BB, MI, dl, TII.get(MSP430::BIC16rc), MSP430::SR)
+ .addReg(MSP430::SR).addImm(1);
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned RrcOpc = MI.getOpcode() == MSP430::Rrcl16
+ ? MSP430::RRC16r : MSP430::RRC8r;
+ BuildMI(*BB, MI, dl, TII.get(RrcOpc), DstReg)
+ .addReg(SrcReg);
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
}
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -1476,8 +1508,16 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
.addReg(ShiftAmtSrcReg).addMBB(BB)
.addReg(ShiftAmtReg2).addMBB(LoopBB);
- BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
- .addReg(ShiftReg);
+ if (ClearCarry)
+ BuildMI(LoopBB, dl, TII.get(MSP430::BIC16rc), MSP430::SR)
+ .addReg(MSP430::SR).addImm(1);
+ if (Opc == MSP430::ADD8rr || Opc == MSP430::ADD16rr)
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg)
+ .addReg(ShiftReg);
+ else
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg);
BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
.addReg(ShiftAmtReg).addImm(1);
BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
@@ -1499,9 +1539,10 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
unsigned Opc = MI.getOpcode();
- if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
- Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
- Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+ if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+ Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+ Opc == MSP430::Srl8 || Opc == MSP430::Srl16 ||
+ Opc == MSP430::Rrcl8 || Opc == MSP430::Rrcl16)
return EmitShiftInstr(MI, BB);
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index 842d03df32fc..731bc1406711 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -36,6 +36,9 @@ namespace llvm {
/// Y = RRC X, rotate right via carry
RRC,
+ /// Rotate right via carry, carry gets cleared beforehand by clrc
+ RRCL,
+
/// CALL - These operations represent an abstract call
/// instruction, which includes a bunch of information.
CALL,
@@ -61,8 +64,9 @@ namespace llvm {
/// is condition code and operand 4 is flag operand.
SELECT_CC,
- /// SHL, SRA, SRL - Non-constant shifts.
- SHL, SRA, SRL
+ /// DADD - Decimal addition with carry
+ /// TODO Nothing generates a node of this type yet.
+ DADD,
};
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
index a9e87dad0cd8..e2e4503db20c 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
@@ -11,201 +11,431 @@
// Describe MSP430 instructions format here
//
-// Format specifies the encoding used by the instruction. This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<2> val> {
- bits<2> Value = val;
-}
-
-def PseudoFrm : Format<0>;
-def SingleOpFrm : Format<1>;
-def DoubleOpFrm : Format<2>;
-def CondJumpFrm : Format<3>;
-
class SourceMode<bits<2> val> {
bits<2> Value = val;
}
-def SrcReg : SourceMode<0>;
-def SrcMem : SourceMode<1>;
-def SrcIndReg : SourceMode<2>;
-def SrcPostInc : SourceMode<3>;
-def SrcImm : SourceMode<3>;
+def SrcReg : SourceMode<0>; // r
+def SrcMem : SourceMode<1>; // m
+def SrcIndReg : SourceMode<2>; // n
+def SrcPostInc : SourceMode<3>; // p
+def SrcImm : SourceMode<3>; // i
+// SrcCGImm : SourceMode< >; // c
class DestMode<bit val> {
bit Value = val;
}
-def DstReg : DestMode<0>;
-def DstMem : DestMode<1>;
-
-class SizeVal<bits<3> val> {
- bits<3> Value = val;
-}
-
-def SizeUnknown : SizeVal<0>; // Unknown / unset size
-def SizeSpecial : SizeVal<1>; // Special instruction, e.g. pseudo
-def Size2Bytes : SizeVal<2>;
-def Size4Bytes : SizeVal<3>;
-def Size6Bytes : SizeVal<4>;
+def DstReg : DestMode<0>; // r
+def DstMem : DestMode<1>; // m
// Generic MSP430 Format
-class MSP430Inst<dag outs, dag ins, SizeVal sz, Format f,
- string asmstr> : Instruction {
- field bits<16> Inst;
+class MSP430Inst<dag outs, dag ins, int size, string asmstr> : Instruction {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
let Namespace = "MSP430";
dag OutOperandList = outs;
dag InOperandList = ins;
- Format Form = f;
- SizeVal Sz = sz;
-
- // Define how we want to layout our TargetSpecific information field... This
- // should be kept up-to-date with the fields in the MSP430InstrInfo.h file.
- let TSFlags{1-0} = Form.Value;
- let TSFlags{4-2} = Sz.Value;
-
- let AsmString = asmstr;
+ let AsmString = asmstr;
+ let Size = size;
}
-// FIXME: Create different classes for different addressing modes.
-
// MSP430 Double Operand (Format I) Instructions
-class IForm<bits<4> opcode, DestMode dest, bit bw, SourceMode src, SizeVal sz,
+class IForm<bits<4> opcode, DestMode ad, bit bw, SourceMode as, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : MSP430Inst<outs, ins, sz, DoubleOpFrm, asmstr> {
+ : MSP430Inst<outs, ins, size, asmstr> {
let Pattern = pattern;
- DestMode ad = dest;
- SourceMode as = src;
-
- let Inst{12-15} = opcode;
+ bits<4> rs;
+ bits<4> rd;
+
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = rs;
let Inst{7} = ad.Value;
let Inst{6} = bw;
- let Inst{4-5} = as.Value;
+ let Inst{5-4} = as.Value;
+ let Inst{3-0} = rd;
}
// 8 bit IForm instructions
-class IForm8<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+class IForm8<bits<4> opcode, DestMode dest, SourceMode src, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm<opcode, dest, 1, src, sz, outs, ins, asmstr, pattern>;
+ : IForm<opcode, dest, 1, src, size, outs, ins, asmstr, pattern>;
class I8rr<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstReg, SrcReg, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Alpha";
+}
class I8ri<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstReg, SrcImm, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<16> imm;
+ let Inst{31-16} = imm;
+ let rs = 0b0000;
+}
+
+class I8rc<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 2, asmstr> {
+ let DecoderNamespace = "Beta";
+ let Pattern = pattern;
+
+ bits<6> imm;
+ bits<4> rd;
+
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = imm{3-0};
+ let Inst{7} = DstReg.Value;
+ let Inst{6} = 1;
+ let Inst{5-4} = imm{5-4};
+ let Inst{3-0} = rd;
+}
class I8rm<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstReg, SrcMem, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<20> src;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+}
+
+class I8rn<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcIndReg, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+}
+
+class I8rp<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcPostInc, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+}
class I8mr<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstMem, SrcReg, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Alpha";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
class I8mi<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstMem, SrcImm, 6, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<16> imm;
+ bits<20> dst;
+ let rs = 0b0000;
+ let Inst{31-16} = imm;
+ let rd = dst{3-0};
+ let Inst{47-32} = dst{19-4};
+}
+
+class I8mc<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 4, asmstr> {
+ let DecoderNamespace = "Beta";
+ let Pattern = pattern;
+
+ bits<6> imm;
+ bits<20> dst;
+
+ let Inst{31-16} = dst{19-4};
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = imm{3-0};
+ let Inst{7} = DstMem.Value;
+ let Inst{6} = 1;
+ let Inst{5-4} = imm{5-4};
+ let Inst{3-0} = dst{3-0};
+}
class I8mm<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstMem, SrcMem, 6, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<20> src;
+ bits<20> dst;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+ let rd = dst{3-0};
+ let Inst{47-32} = dst{19-4};
+}
+
+class I8mn<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcIndReg, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
+
+class I8mp<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcPostInc, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
// 16 bit IForm instructions
-class IForm16<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+class IForm16<bits<4> opcode, DestMode dest, SourceMode src, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm<opcode, dest, 0, src, sz, outs, ins, asmstr, pattern>;
+ : IForm<opcode, dest, 0, src, size, outs, ins, asmstr, pattern>;
class I16rr<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstReg, SrcReg, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Alpha";
+}
class I16ri<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstReg, SrcImm, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<16> imm;
+ let Inst{31-16} = imm;
+ let rs = 0b0000;
+}
+
+class I16rc<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 2, asmstr> {
+ let DecoderNamespace = "Beta";
+ let Pattern = pattern;
+
+ bits<6> imm;
+ bits<4> rd;
+
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = imm{3-0};
+ let Inst{7} = DstReg.Value;
+ let Inst{6} = 0;
+ let Inst{5-4} = imm{5-4};
+ let Inst{3-0} = rd;
+}
class I16rm<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstReg, SrcMem, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<20> src;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+}
+
+class I16rn<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcIndReg, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+}
+
+class I16rp<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcPostInc, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+}
class I16mr<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstMem, SrcReg, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Alpha";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
class I16mi<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstMem, SrcImm, 6, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<16> imm;
+ bits<20> dst;
+ let Inst{31-16} = imm;
+ let rs = 0b0000;
+ let rd = dst{3-0};
+ let Inst{47-32} = dst{19-4};
+}
+
+class I16mc<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 4, asmstr> {
+ let DecoderNamespace = "Beta";
+ let Pattern = pattern;
+
+ bits<6> imm;
+ bits<20> dst;
+
+ let Inst{31-16} = dst{19-4};
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = imm{3-0};
+ let Inst{7} = DstMem.Value;
+ let Inst{6} = 0;
+ let Inst{5-4} = imm{5-4};
+ let Inst{3-0} = dst{3-0};
+}
class I16mm<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstMem, SrcMem, 6, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<20> src;
+ bits<20> dst;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+ let rd = dst{3-0};
+ let Inst{47-32} = dst{19-4};
+}
+
+class I16mn<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcIndReg, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
+
+class I16mp<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcPostInc, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
// MSP430 Single Operand (Format II) Instructions
-class IIForm<bits<9> opcode, bit bw, SourceMode src, SizeVal sz,
+class IIForm<bits<3> opcode, bit bw, SourceMode as, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : MSP430Inst<outs, ins, sz, SingleOpFrm, asmstr> {
+ : MSP430Inst<outs, ins, size, asmstr> {
let Pattern = pattern;
-
- SourceMode as = src;
- let Inst{7-15} = opcode;
- let Inst{6} = bw;
- let Inst{4-5} = as.Value;
+ bits<4> rs;
+
+ let Inst{15-10} = 0b000100;
+ let Inst{9-7} = opcode;
+ let Inst{6} = bw;
+ let Inst{5-4} = as.Value;
+ let Inst{3-0} = rs;
}
// 8 bit IIForm instructions
-class IIForm8<bits<9> opcode, SourceMode src, SizeVal sz,
+class IIForm8<bits<3> opcode, SourceMode src, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm<opcode, 1, src, sz, outs, ins, asmstr, pattern>;
+ : IIForm<opcode, 1, src, size, outs, ins, asmstr, pattern>;
+
+class II8r<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcReg, 2, outs, ins, asmstr, pattern>;
-class II8r<bits<9> opcode,
+class II8m<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm8<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+ : IIForm8<opcode, SrcMem, 4, outs, ins, asmstr, pattern> {
+ bits<20> src;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+}
-class II8m<bits<9> opcode,
+class II8i<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm8<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IIForm8<opcode, SrcImm, 4, outs, ins, asmstr, pattern> {
+ bits<16> imm;
+ let rs = 0b0000;
+ let Inst{31-16} = imm;
+}
-class II8i<bits<9> opcode,
+class II8c<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm8<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+ : MSP430Inst<outs, ins, 2, asmstr> {
+ let Pattern = pattern;
+
+ bits<6> imm;
+
+ let Inst{15-10} = 0b000100;
+ let Inst{9-7} = opcode;
+ let Inst{6} = 1;
+ let Inst{5-0} = imm;
+}
+
+class II8n<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcIndReg, 2, outs, ins, asmstr, pattern>;
+
+class II8p<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcPostInc, 2, outs, ins, asmstr, pattern>;
// 16 bit IIForm instructions
-class IIForm16<bits<9> opcode, SourceMode src, SizeVal sz,
+class IIForm16<bits<3> opcode, SourceMode src, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm<opcode, 0, src, sz, outs, ins, asmstr, pattern>;
+ : IIForm<opcode, 0, src, size, outs, ins, asmstr, pattern>;
-class II16r<bits<9> opcode,
+class II16r<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm16<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+ : IIForm16<opcode, SrcReg, 2, outs, ins, asmstr, pattern>;
-class II16m<bits<9> opcode,
+class II16m<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm16<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IIForm16<opcode, SrcMem, 4, outs, ins, asmstr, pattern> {
+ bits<20> src;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+}
-class II16i<bits<9> opcode,
+class II16i<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm16<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IIForm16<opcode, SrcImm, 4, outs, ins, asmstr, pattern> {
+ bits<16> imm;
+ let rs = 0b0000;
+ let Inst{31-16} = imm;
+}
+
+class II16c<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 2, asmstr> {
+ let Pattern = pattern;
+
+ bits<6> imm;
+
+ let Inst{15-10} = 0b000100;
+ let Inst{9-7} = opcode;
+ let Inst{6} = 0;
+ let Inst{5-0} = imm;
+}
+
+class II16n<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcIndReg, 2, outs, ins, asmstr, pattern>;
+
+class II16p<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcPostInc, 2, outs, ins, asmstr, pattern>;
// MSP430 Conditional Jumps Instructions
-class CJForm<bits<3> opcode, bits<3> cond,
- dag outs, dag ins, string asmstr, list<dag> pattern>
- : MSP430Inst<outs, ins, Size2Bytes, CondJumpFrm, asmstr> {
+class CJForm<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 2, asmstr> {
let Pattern = pattern;
- let Inst{13-15} = opcode;
- let Inst{10-12} = cond;
+ bits<3> cond;
+ bits<10> dst;
+
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = cond;
+ let Inst{9-0} = dst;
}
// Pseudo instructions
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : MSP430Inst<outs, ins, SizeSpecial, PseudoFrm, asmstr> {
+ : MSP430Inst<outs, ins, 0, asmstr> {
let Pattern = pattern;
- let Inst{15-0} = 0;
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index dd1b30a3e470..c136933a51bc 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -301,35 +301,20 @@ unsigned MSP430InstrInfo::insertBranch(MachineBasicBlock &MBB,
unsigned MSP430InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MCInstrDesc &Desc = MI.getDesc();
- switch (Desc.TSFlags & MSP430II::SizeMask) {
- default:
- switch (Desc.getOpcode()) {
- default: llvm_unreachable("Unknown instruction size!");
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::DBG_VALUE:
- return 0;
- case TargetOpcode::INLINEASM: {
- const MachineFunction *MF = MI.getParent()->getParent();
- const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
- return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
- *MF->getTarget().getMCAsmInfo());
- }
- }
- case MSP430II::SizeSpecial:
- switch (MI.getOpcode()) {
- default: llvm_unreachable("Unknown instruction size!");
- case MSP430::SAR8r1c:
- case MSP430::SAR16r1c:
- return 4;
- }
- case MSP430II::Size2Bytes:
- return 2;
- case MSP430II::Size4Bytes:
- return 4;
- case MSP430II::Size6Bytes:
- return 6;
+ switch (Desc.getOpcode()) {
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case TargetOpcode::INLINEASM: {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+ return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
+ *MF->getTarget().getMCAsmInfo());
}
+ }
+
+ return Desc.getSize();
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index 45357f54c9c6..fee3bea9b8d6 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -24,22 +24,6 @@ namespace llvm {
class MSP430Subtarget;
-/// MSP430II - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace MSP430II {
- enum {
- SizeShift = 2,
- SizeMask = 7 << SizeShift,
-
- SizeUnknown = 0 << SizeShift,
- SizeSpecial = 1 << SizeShift,
- Size2Bytes = 2 << SizeShift,
- Size4Bytes = 3 << SizeShift,
- Size6Bytes = 4 << SizeShift
- };
-}
-
class MSP430InstrInfo : public MSP430GenInstrInfo {
const MSP430RegisterInfo RI;
virtual void anchor();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
index cec43040f60d..25c81d94f75b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -34,8 +34,9 @@ def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
-def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
- SDTCisI8<2>]>;
+def SDT_MSP430DAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<0>]>;
//===----------------------------------------------------------------------===//
// MSP430 Specific Node Definitions.
@@ -48,6 +49,7 @@ def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+def MSP430rrcl : SDNode<"MSP430ISD::RRCL", SDTIntUnaryOp, []>;
def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
@@ -63,33 +65,88 @@ def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
[SDNPHasChain, SDNPInGlue]>;
def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
[SDNPInGlue]>;
-def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
-def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
-def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
+def MSP430dadd : SDNode<"MSP430ISD::DADD", SDT_MSP430DAdd, []>;
//===----------------------------------------------------------------------===//
// MSP430 Operand Definitions.
//===----------------------------------------------------------------------===//
+def MemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+}
+
// Address operands
def memsrc : Operand<i16> {
let PrintMethod = "printSrcMemOperand";
let MIOperandInfo = (ops GR16, i16imm);
+ let ParserMatchClass = MemAsmOperand;
+ let EncoderMethod = "getMemOpValue";
+ let DecoderMethod = "DecodeMemOperand";
}
def memdst : Operand<i16> {
let PrintMethod = "printSrcMemOperand";
let MIOperandInfo = (ops GR16, i16imm);
+ let ParserMatchClass = MemAsmOperand;
+ let EncoderMethod = "getMemOpValue";
+ let DecoderMethod = "DecodeMemOperand";
+}
+
+def IndRegAsmOperand : AsmOperandClass {
+ let Name = "IndReg";
+ let RenderMethod = "addRegOperands";
+}
+
+def indreg : Operand<i16> {
+ let PrintMethod = "printIndRegOperand";
+ let MIOperandInfo = (ops GR16);
+ let ParserMatchClass = IndRegAsmOperand;
+ let DecoderMethod = "DecodeGR16RegisterClass";
+}
+
+def PostIndRegAsmOperand : AsmOperandClass {
+ let Name = "PostIndReg";
+ let RenderMethod = "addRegOperands";
+}
+
+def postreg : Operand<i16> {
+ let PrintMethod = "printPostIndRegOperand";
+ let MIOperandInfo = (ops GR16);
+ let ParserMatchClass = PostIndRegAsmOperand;
+ let DecoderMethod = "DecodeGR16RegisterClass";
}
// Short jump targets have OtherVT type and are printed as pcrel imm values.
def jmptarget : Operand<OtherVT> {
let PrintMethod = "printPCRelImmOperand";
+ let EncoderMethod = "getPCRelImmOpValue";
}
// Operand for printing out a condition code.
def cc : Operand<i8> {
let PrintMethod = "printCCOperand";
+ let EncoderMethod = "getCCOpValue";
+}
+
+def CGImmAsmOperand : AsmOperandClass {
+ let Name = "CGImm";
+ let RenderMethod = "addImmOperands";
+}
+
+def cg8imm : Operand<i8>,
+ ImmLeaf<i8, [{return Imm == 0 || Imm == 1 || Imm == 2 ||
+ Imm == 4 || Imm == 8 || Imm == -1;}]> {
+ let ParserMatchClass = CGImmAsmOperand;
+ let EncoderMethod = "getCGImmOpValue";
+ let DecoderMethod = "DecodeCGImm";
+}
+
+def cg16imm : Operand<i16>,
+ ImmLeaf<i16, [{return Imm == 0 || Imm == 1 || Imm == 2 ||
+ Imm == 4 || Imm == 8 || Imm == -1;}]> {
+ let ParserMatchClass = CGImmAsmOperand;
+ let EncoderMethod = "getCGImmOpValue";
+ let DecoderMethod = "DecodeCGImm";
}
//===----------------------------------------------------------------------===//
@@ -102,6 +159,7 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
// Pattern Fragments
def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+def bic : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, (not node:$rhs))>;
def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
return N->hasOneUse();
}]>;
@@ -113,21 +171,21 @@ def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
// pointer before prolog-epilog rewriting occurs.
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
// sub / add which can clobber SR.
-let Defs = [SP, SR], Uses = [SP] in {
+let isCodeGenOnly = 1, Defs = [SP, SR], Uses = [SP] in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
- "#ADJCALLSTACKDOWN",
+ "#ADJCALLSTACKDOWN $amt1 $amt2",
[(MSP430callseq_start timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
- "#ADJCALLSTACKUP",
+ "#ADJCALLSTACKUP $amt1 $amt2",
[(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
}
-let Defs = [SR], Uses = [SP] in {
+let isCodeGenOnly = 1, Defs = [SR], Uses = [SP] in {
def ADDframe : Pseudo<(outs GR16:$dst), (ins i16imm:$base, i16imm:$offset),
"# ADDframe PSEUDO", []>;
}
-let usesCustomInserter = 1 in {
+let isCodeGenOnly = 1, usesCustomInserter = 1 in {
let Uses = [SR] in {
def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
"# Select8 PSEUDO",
@@ -141,38 +199,44 @@ let usesCustomInserter = 1 in {
let Defs = [SR] in {
def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Shl8 PSEUDO",
- [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+ [(set GR8:$dst, (shl GR8:$src, GR8:$cnt))]>;
def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
"# Shl16 PSEUDO",
- [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+ [(set GR16:$dst, (shl GR16:$src, GR8:$cnt))]>;
def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Sra8 PSEUDO",
- [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+ [(set GR8:$dst, (sra GR8:$src, GR8:$cnt))]>;
def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
"# Sra16 PSEUDO",
- [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+ [(set GR16:$dst, (sra GR16:$src, GR8:$cnt))]>;
def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Srl8 PSEUDO",
- [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+ [(set GR8:$dst, (srl GR8:$src, GR8:$cnt))]>;
def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
"# Srl16 PSEUDO",
- [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
-
+ [(set GR16:$dst, (srl GR16:$src, GR8:$cnt))]>;
+ def Rrcl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src), "",
+ [(set GR8:$dst, (MSP430rrcl GR8:$src))]>;
+ def Rrcl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src), "",
+ [(set GR16:$dst, (MSP430rrcl GR16:$src))]>;
}
}
-let hasSideEffects = 0 in
-def NOP : Pseudo<(outs), (ins), "nop", []>;
-
//===----------------------------------------------------------------------===//
// Control Flow Instructions...
//
-// FIXME: Provide proper encoding!
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def RET : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs), (ins), "ret", [(MSP430retflag)]>;
- def RETI : II16r<0x0, (outs), (ins), "reti", [(MSP430retiflag)]>;
+ def RET : IForm16<0b0100, DstReg, SrcPostInc, 2,
+ (outs), (ins), "ret", [(MSP430retflag)]> {
+ let DecoderNamespace = "Delta";
+ let rs = 1;
+ let rd = 0;
+ }
+ def RETI : IIForm16<0b110, SrcReg, 2,
+ (outs), (ins), "reti", [(MSP430retiflag)]> {
+ let rs = 0;
+ }
}
let isBranch = 1, isTerminator = 1 in {
@@ -182,120 +246,143 @@ let isBranch = 1, isTerminator = 1 in {
// Direct branch
let isBarrier = 1 in {
// Short branch
- def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
+ def JMP : CJForm<(outs), (ins jmptarget:$dst),
"jmp\t$dst",
- [(br bb:$dst)]>;
- let isIndirectBranch = 1 in {
+ [(br bb:$dst)]> {
+ let cond = 0b111;
+ }
+ let isIndirectBranch = 1, rd = 0 in {
// Long branches
- def Bi : I16ri<0, (outs), (ins i16imm:$brdst),
- "br\t$brdst",
- [(brind tblockaddress:$brdst)]>;
- def Br : I16rr<0, (outs), (ins GR16:$brdst),
- "br\t$brdst",
- [(brind GR16:$brdst)]>;
- def Bm : I16rm<0, (outs), (ins memsrc:$brdst),
- "br\t$brdst",
- [(brind (load addr:$brdst))]>;
+ def Bi : I16ri<0b0100, (outs), (ins i16imm:$imm),
+ "br\t$imm",
+ [(brind tblockaddress:$imm)]>;
+ def Br : I16rr<0b0100, (outs), (ins GR16:$rs),
+ "br\t$rs",
+ [(brind GR16:$rs)]>;
+ def Bm : I16rm<0b0100, (outs), (ins memsrc:$src),
+ "br\t$src",
+ [(brind (load addr:$src))]>;
}
}
// Conditional branches
let Uses = [SR] in
- def JCC : CJForm<0, 0,
- (outs), (ins jmptarget:$dst, cc:$cc),
- "j$cc\t$dst",
- [(MSP430brcc bb:$dst, imm:$cc)]>;
+ def JCC : CJForm<(outs), (ins jmptarget:$dst, cc:$cond),
+ "j$cond\t$dst",
+ [(MSP430brcc bb:$dst, imm:$cond)]>;
} // isBranch, isTerminator
//===----------------------------------------------------------------------===//
// Call Instructions...
//
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. SPW is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [R11, R12, R13, R14, R15, SR],
- Uses = [SP] in {
- def CALLi : II16i<0x0,
- (outs), (ins i16imm:$dst),
- "call\t$dst", [(MSP430call imm:$dst)]>;
- def CALLr : II16r<0x0,
- (outs), (ins GR16:$dst),
- "call\t$dst", [(MSP430call GR16:$dst)]>;
- def CALLm : II16m<0x0,
- (outs), (ins memsrc:$dst),
- "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
- }
-
+// All calls clobber the non-callee saved registers. SPW is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead. Uses for argument
+// registers are added manually.
+let isCall = 1,
+ Defs = [R11, R12, R13, R14, R15, SR],
+ Uses = [SP] in {
+ def CALLi : II16i<0b101,
+ (outs), (ins i16imm:$imm),
+ "call\t$imm", [(MSP430call imm:$imm)]>;
+ def CALLr : II16r<0b101,
+ (outs), (ins GR16:$rs),
+ "call\t$rs", [(MSP430call GR16:$rs)]>;
+ def CALLm : II16m<0b101,
+ (outs), (ins memsrc:$src),
+ "call\t$src", [(MSP430call (load addr:$src))]>;
+ def CALLn : II16n<0b101, (outs), (ins indreg:$rs), "call\t$rs", []>;
+ def CALLp : II16p<0b101, (outs), (ins postreg:$rs), "call\t$rs", []>;
+}
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions...
//
-let Defs = [SP], Uses = [SP], hasSideEffects=0 in {
+let Defs = [SP], Uses = [SP], hasSideEffects = 0 in {
let mayLoad = 1 in
-def POP16r : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+def POP16r : IForm16<0b0100, DstReg, SrcPostInc, 2,
+ (outs GR16:$rd), (ins), "pop\t$rd", []> {
+ let DecoderNamespace = "Delta";
+ let rs = 1;
+}
let mayStore = 1 in
-def PUSH16r : II16r<0x0,
- (outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+def PUSH8r : II8r<0b100, (outs), (ins GR8:$rs), "push.b\t$rs", []>;
+def PUSH16r : II16r<0b100, (outs), (ins GR16:$rs), "push\t$rs", []>;
+def PUSH16c : II16c<0b100, (outs), (ins cg16imm:$imm), "push\t$imm", []>;
+def PUSH16i : II16i<0b100, (outs), (ins i16imm:$imm), "push\t$imm", []>;
}
//===----------------------------------------------------------------------===//
// Move Instructions
-// FIXME: Provide proper encoding!
let hasSideEffects = 0 in {
-def MOV8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src),
- "mov.b\t{$src, $dst}",
+def MOV8rr : I8rr<0b0100,
+ (outs GR8:$rd), (ins GR8:$rs),
+ "mov.b\t{$rs, $rd}",
[]>;
-def MOV16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "mov.w\t{$src, $dst}",
+def MOV16rr : I16rr<0b0100,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "mov\t{$rs, $rd}",
[]>;
}
-// FIXME: Provide proper encoding!
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MOV8ri : I8ri<0x0,
- (outs GR8:$dst), (ins i8imm:$src),
- "mov.b\t{$src, $dst}",
- [(set GR8:$dst, imm:$src)]>;
-def MOV16ri : I16ri<0x0,
- (outs GR16:$dst), (ins i16imm:$src),
- "mov.w\t{$src, $dst}",
- [(set GR16:$dst, imm:$src)]>;
+def MOV8rc : I8rc<0b0100,
+ (outs GR8:$rd), (ins cg8imm:$imm),
+ "mov.b\t$imm, $rd",
+ [(set GR8:$rd, cg8imm:$imm)]>;
+def MOV16rc : I16rc<0b0100,
+ (outs GR16:$rd), (ins cg16imm:$imm),
+ "mov\t$imm, $rd",
+ [(set GR16:$rd, cg16imm:$imm)]>;
+def MOV8ri : I8ri<0b0100,
+ (outs GR8:$rd), (ins i8imm:$imm),
+ "mov.b\t{$imm, $rd}",
+ [(set GR8:$rd, imm:$imm)]>;
+def MOV16ri : I16ri<0b0100,
+ (outs GR16:$rd), (ins i16imm:$imm),
+ "mov\t{$imm, $rd}",
+ [(set GR16:$rd, imm:$imm)]>;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def MOV8rm : I8rm<0x0,
- (outs GR8:$dst), (ins memsrc:$src),
- "mov.b\t{$src, $dst}",
- [(set GR8:$dst, (load addr:$src))]>;
-def MOV16rm : I16rm<0x0,
- (outs GR16:$dst), (ins memsrc:$src),
- "mov.w\t{$src, $dst}",
- [(set GR16:$dst, (load addr:$src))]>;
+def MOV8rm : I8rm<0b0100,
+ (outs GR8:$rd), (ins memsrc:$src),
+ "mov.b\t{$src, $rd}",
+ [(set GR8:$rd, (load addr:$src))]>;
+def MOV16rm : I16rm<0b0100,
+ (outs GR16:$rd), (ins memsrc:$src),
+ "mov\t{$src, $rd}",
+ [(set GR16:$rd, (load addr:$src))]>;
+def MOV8rn : I8rn<0b0100,
+ (outs GR8:$rd), (ins indreg:$rs),
+ "mov.b\t{$rs, $rd}",
+ [(set GR8:$rd, (load addr:$rs))]>;
+def MOV16rn : I16rn<0b0100,
+ (outs GR16:$rd), (ins indreg:$rs),
+ "mov\t{$rs, $rd}",
+ [(set GR16:$rd, (load addr:$rs))]>;
+}
+
+let isCodeGenOnly = 1 in {
+def MOVZX16rr8 : I8rr<0b0100,
+ (outs GR16:$rd), (ins GR8:$rs),
+ "mov.b\t{$rs, $rd}",
+ [(set GR16:$rd, (zext GR8:$rs))]>;
+def MOVZX16rm8 : I8rm<0b0100,
+ (outs GR16:$rd), (ins memsrc:$src),
+ "mov.b\t{$src, $rd}",
+ [(set GR16:$rd, (zextloadi16i8 addr:$src))]>;
}
-def MOVZX16rr8 : I8rr<0x0,
- (outs GR16:$dst), (ins GR8:$src),
- "mov.b\t{$src, $dst}",
- [(set GR16:$dst, (zext GR8:$src))]>;
-def MOVZX16rm8 : I8rm<0x0,
- (outs GR16:$dst), (ins memsrc:$src),
- "mov.b\t{$src, $dst}",
- [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
-def MOV8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
- "mov.b\t{@$base+, $dst}", []>;
-def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
- "mov.w\t{@$base+, $dst}", []>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$rs = $wb" in {
+def MOV8rp : I8rp<0b0100,
+ (outs GR8:$rd, GR16:$wb), (ins postreg:$rs),
+ "mov.b\t{$rs, $rd}", []>;
+def MOV16rp : I16rp<0b0100,
+ (outs GR16:$rd, GR16:$wb), (ins postreg:$rs),
+ "mov\t{$rs, $rd}", []>;
}
// Any instruction that defines a 8-bit result leaves the high half of the
@@ -313,821 +400,557 @@ def def8 : PatLeaf<(i8 GR8:$src), [{
def : Pat<(i16 (zext def8:$src)),
(SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
-def MOV8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "mov.b\t{$src, $dst}",
- [(store (i8 imm:$src), addr:$dst)]>;
-def MOV16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "mov.w\t{$src, $dst}",
- [(store (i16 imm:$src), addr:$dst)]>;
-
-def MOV8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "mov.b\t{$src, $dst}",
- [(store GR8:$src, addr:$dst)]>;
-def MOV16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "mov.w\t{$src, $dst}",
- [(store GR16:$src, addr:$dst)]>;
-
-def MOV8mm : I8mm<0x0,
+def MOV8mc : I8mc<0b0100,
+ (outs), (ins memdst:$dst, cg8imm:$imm),
+ "mov.b\t{$imm, $dst}",
+ [(store (i8 cg8imm:$imm), addr:$dst)]>;
+def MOV16mc : I16mc<0b0100,
+ (outs), (ins memdst:$dst, cg16imm:$imm),
+ "mov\t{$imm, $dst}",
+ [(store (i16 cg16imm:$imm), addr:$dst)]>;
+
+def MOV8mi : I8mi<0b0100,
+ (outs), (ins memdst:$dst, i8imm:$imm),
+ "mov.b\t{$imm, $dst}",
+ [(store (i8 imm:$imm), addr:$dst)]>;
+def MOV16mi : I16mi<0b0100,
+ (outs), (ins memdst:$dst, i16imm:$imm),
+ "mov\t{$imm, $dst}",
+ [(store (i16 imm:$imm), addr:$dst)]>;
+
+def MOV8mr : I8mr<0b0100,
+ (outs), (ins memdst:$dst, GR8:$rs),
+ "mov.b\t{$rs, $dst}",
+ [(store GR8:$rs, addr:$dst)]>;
+def MOV16mr : I16mr<0b0100,
+ (outs), (ins memdst:$dst, GR16:$rs),
+ "mov\t{$rs, $dst}",
+ [(store GR16:$rs, addr:$dst)]>;
+
+def MOV8mm : I8mm<0b0100,
(outs), (ins memdst:$dst, memsrc:$src),
"mov.b\t{$src, $dst}",
[(store (i8 (load addr:$src)), addr:$dst)]>;
-def MOV16mm : I16mm<0x0,
+def MOV16mm : I16mm<0b0100,
(outs), (ins memdst:$dst, memsrc:$src),
- "mov.w\t{$src, $dst}",
+ "mov\t{$src, $dst}",
[(store (i16 (load addr:$src)), addr:$dst)]>;
+def MOV8mn : I8mn<0b0100, (outs), (ins memdst:$dst, indreg:$rs),
+ "mov.b\t{$rs, $dst}", []>;
+def MOV16mn : I16mn<0b0100, (outs), (ins memdst:$dst, indreg:$rs),
+ "mov\t{$rs, $dst}", []>;
+
//===----------------------------------------------------------------------===//
// Arithmetic Instructions
-let Constraints = "$src = $dst" in {
-
-let Defs = [SR] in {
-
-let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
-
-def ADD8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
+multiclass Arith<bits<4> opcode, string asmstring, SDNode node,
+ bit commutes, list<Register> uses> {
+ let Defs = [SR], Uses = uses in {
+ let Constraints = "$src2 = $rd" in {
+ let isCommutable = commutes in {
+ def 8rr : I8rr<opcode, (outs GR8:$rd), (ins GR8:$src2, GR8:$rs),
+ !strconcat(asmstring, ".b\t$rs, $rd"),
+ [(set GR8:$rd, (node GR8:$src2, GR8:$rs)),
+ (implicit SR)]>;
+ def 16rr : I16rr<opcode, (outs GR16:$rd), (ins GR16:$src2, GR16:$rs),
+ !strconcat(asmstring, "\t$rs, $rd"),
+ [(set GR16:$rd, (node GR16:$src2, GR16:$rs)),
(implicit SR)]>;
-def ADD16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
- (implicit SR)]>;
-}
-
-def ADD8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
+ }
+ def 8rm : I8rm<opcode, (outs GR8:$rd), (ins GR8:$src2, memsrc:$src),
+ !strconcat(asmstring, ".b\t$src, $rd"),
+ [(set GR8:$rd, (node GR8:$src2, (load addr:$src))),
+ (implicit SR)]>;
+ def 16rm : I16rm<opcode, (outs GR16:$rd), (ins GR16:$src2, memsrc:$src),
+ !strconcat(asmstring, "\t$src, $rd"),
+ [(set GR16:$rd, (node GR16:$src2, (load addr:$src))),
(implicit SR)]>;
-def ADD16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
- (implicit SR)]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "add.b\t{@$base+, $dst}", []>;
-def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "add.w\t{@$base+, $dst}", []>;
-}
-
-
-def ADD8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src, imm:$src2)),
+ def 8rn : I8rn<opcode, (outs GR8:$rd), (ins GR8:$src2, indreg:$rs),
+ !strconcat(asmstring, ".b\t$rs, $rd"), []>;
+ def 16rn : I16rn<opcode, (outs GR16:$rd), (ins GR16:$src2, indreg:$rs),
+ !strconcat(asmstring, "\t$rs, $rd"), []>;
+ let mayLoad = 1,
+ hasExtraDefRegAllocReq = 1,
+ Constraints = "$rs = $wb, $src2 = $rd" in {
+ def 8rp : I8rp<opcode, (outs GR8:$rd, GR16:$wb), (ins GR8:$src2, postreg:$rs),
+ !strconcat(asmstring, ".b\t$rs, $rd"), []>;
+ def 16rp : I16rp<opcode, (outs GR16:$rd, GR16:$wb), (ins GR16:$src2, postreg:$rs),
+ !strconcat(asmstring, "\t$rs, $rd"), []>;
+ }
+ def 8rc : I8rc<opcode, (outs GR8:$rd), (ins GR8:$src2, cg8imm:$imm),
+ !strconcat(asmstring, ".b\t$imm, $rd"),
+ [(set GR8:$rd, (node GR8:$src2, cg8imm:$imm)),
+ (implicit SR)]>;
+ def 16rc : I16rc<opcode, (outs GR16:$rd), (ins GR16:$src2, cg16imm:$imm),
+ !strconcat(asmstring, "\t$imm, $rd"),
+ [(set GR16:$rd, (node GR16:$src2, cg16imm:$imm)),
+ (implicit SR)]>;
+ def 8ri : I8ri<opcode, (outs GR8:$rd), (ins GR8:$src2, i8imm:$imm),
+ !strconcat(asmstring, ".b\t$imm, $rd"),
+ [(set GR8:$rd, (node GR8:$src2, imm:$imm)),
+ (implicit SR)]>;
+ def 16ri : I16ri<opcode, (outs GR16:$rd), (ins GR16:$src2, i16imm:$imm),
+ !strconcat(asmstring, "\t$imm, $rd"),
+ [(set GR16:$rd, (node GR16:$src2, imm:$imm)),
+ (implicit SR)]>;
+ }
+ def 8mr : I8mr<opcode, (outs), (ins memdst:$dst, GR8:$rs),
+ !strconcat(asmstring, ".b\t$rs, $dst"),
+ [(store (node (load addr:$dst), GR8:$rs), addr:$dst),
+ (implicit SR)]>;
+ def 16mr : I16mr<opcode, (outs), (ins memdst:$dst, GR16:$rs),
+ !strconcat(asmstring, "\t$rs, $dst"),
+ [(store (node (load addr:$dst), GR16:$rs), addr:$dst),
(implicit SR)]>;
-def ADD16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src, imm:$src2)),
- (implicit SR)]>;
-
-let Constraints = "" in {
-def ADD8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "add.b\t{$src, $dst}",
- [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+ def 8mc : I8mc<opcode, (outs), (ins memdst:$dst, cg8imm:$imm),
+ !strconcat(asmstring, ".b\t$imm, $dst"),
+ [(store (node (load addr:$dst), (i8 cg8imm:$imm)), addr:$dst),
+ (implicit SR)]>;
+ def 16mc : I16mc<opcode, (outs), (ins memdst:$dst, cg16imm:$imm),
+ !strconcat(asmstring, "\t$imm, $dst"),
+ [(store (node (load addr:$dst), (i16 cg16imm:$imm)), addr:$dst),
(implicit SR)]>;
-def ADD16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "add.w\t{$src, $dst}",
- [(store (add (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
-
-def ADD8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "add.b\t{$src, $dst}",
- [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ def 8mi : I8mi<opcode, (outs), (ins memdst:$dst, i8imm:$imm),
+ !strconcat(asmstring, ".b\t$imm, $dst"),
+ [(store (node (load addr:$dst), (i8 imm:$imm)), addr:$dst),
+ (implicit SR)]>;
+ def 16mi : I16mi<opcode, (outs), (ins memdst:$dst, i16imm:$imm),
+ !strconcat(asmstring, "\t$imm, $dst"),
+ [(store (node (load addr:$dst), (i16 imm:$imm)), addr:$dst),
(implicit SR)]>;
-def ADD16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "add.w\t{$src, $dst}",
- [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SR)]>;
-
-def ADD8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "add.b\t{$src, $dst}",
- [(store (add (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
+ def 8mm : I8mm<opcode, (outs), (ins memdst:$dst, memsrc:$src),
+ !strconcat(asmstring, ".b\t$src, $dst"),
+ [(store (node (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SR)]>;
+ def 16mm : I16mm<opcode, (outs), (ins memdst:$dst, memsrc:$src),
+ !strconcat(asmstring, "\t$src, $dst"),
+ [(store (node (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
(implicit SR)]>;
-def ADD16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "add.w\t{$src, $dst}",
- [(store (add (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
+ def 8mn : I8mn<opcode, (outs), (ins memdst:$dst, indreg:$rs),
+ !strconcat(asmstring, ".b\t$rs, $dst"), []>;
+ def 16mn : I16mn<opcode, (outs), (ins memdst:$dst, indreg:$rs),
+ !strconcat(asmstring, "\t$rs, $dst"), []>;
+ def 8mp : I8mp<opcode, (outs), (ins memdst:$dst, postreg:$rs),
+ !strconcat(asmstring, ".b\t$rs, $dst"), []>;
+ def 16mp : I16mp<opcode, (outs), (ins memdst:$dst, postreg:$rs),
+ !strconcat(asmstring, "\t$rs, $dst"), []>;
+ }
}
-let Uses = [SR] in {
-
-let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
-def ADC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def ADC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
- (implicit SR)]>;
-} // isCommutable
-
-def ADC8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
- (implicit SR)]>;
-def ADC16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
- (implicit SR)]>;
-
-def ADC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
- (implicit SR)]>;
-def ADC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
- (implicit SR)]>;
+defm ADD : Arith<0b0101, "add", add, 1, []>;
+defm ADDC : Arith<0b0110, "addc", adde, 1, [SR]>;
+defm AND : Arith<0b1111, "and", and, 1, []>;
+defm BIS : Arith<0b1101, "bis", or, 1, []>;
+defm BIC : Arith<0b1100, "bic", bic, 0, []>;
+defm XOR : Arith<0b1110, "xor", xor, 1, []>;
+defm SUB : Arith<0b1000, "sub", sub, 0, []>;
+defm SUBC : Arith<0b0111, "subc", sube, 0, [SR]>;
+defm DADD : Arith<0b1010, "dadd", MSP430dadd, 1, [SR]>;
+
+def ADC8r : InstAlias<"adc.b\t$dst", (ADDC8rc GR8:$dst, 0)>;
+def ADC16r : InstAlias<"adc\t$dst", (ADDC16rc GR16:$dst, 0)>;
+def ADC8m : InstAlias<"adc.b\t$dst", (ADDC8mc memdst:$dst, 0)>;
+def ADC16m : InstAlias<"adc\t$dst", (ADDC16mc memdst:$dst, 0)>;
+
+def DADC8r : InstAlias<"dadc.b\t$dst", (DADD8rc GR8:$dst, 0)>;
+def DADC16r : InstAlias<"dadc\t$dst", (DADD16rc GR16:$dst, 0)>;
+def DADC8m : InstAlias<"dadc.b\t$dst", (DADD8mc memdst:$dst, 0)>;
+def DADC16m : InstAlias<"dadc\t$dst", (DADD16mc memdst:$dst, 0)>;
+
+def DEC8r : InstAlias<"dec.b\t$dst", (SUB8rc GR8:$dst, 1)>;
+def DEC16r : InstAlias<"dec\t$dst", (SUB16rc GR16:$dst, 1)>;
+def DEC8m : InstAlias<"dec.b\t$dst", (SUB8mc memdst:$dst, 1)>;
+def DEC16m : InstAlias<"dec\t$dst", (SUB16mc memdst:$dst, 1)>;
+
+def DECD8r : InstAlias<"decd.b\t$dst", (SUB8rc GR8:$dst, 2)>;
+def DECD16r : InstAlias<"decd\t$dst", (SUB16rc GR16:$dst, 2)>;
+def DECD8m : InstAlias<"decd.b\t$dst", (SUB8mc memdst:$dst, 2)>;
+def DECD16m : InstAlias<"decd\t$dst", (SUB16mc memdst:$dst, 2)>;
+
+def INC8r : InstAlias<"inc.b\t$dst", (ADD8rc GR8:$dst, 1)>;
+def INC16r : InstAlias<"inc\t$dst", (ADD16rc GR16:$dst, 1)>;
+def INC8m : InstAlias<"inc.b\t$dst", (ADD8mc memdst:$dst, 1)>;
+def INC16m : InstAlias<"inc\t$dst", (ADD16mc memdst:$dst, 1)>;
+
+def INCD8r : InstAlias<"incd.b\t$dst", (ADD8rc GR8:$dst, 2)>;
+def INCD16r : InstAlias<"incd\t$dst", (ADD16rc GR16:$dst, 2)>;
+def INCD8m : InstAlias<"incd.b\t$dst", (ADD8mc memdst:$dst, 2)>;
+def INCD16m : InstAlias<"incd\t$dst", (ADD16mc memdst:$dst, 2)>;
+
+def SBC8r : InstAlias<"sbc.b\t$dst", (SUBC8rc GR8:$dst, 0)>;
+def SBC16r : InstAlias<"sbc\t$dst", (SUBC16rc GR16:$dst, 0)>;
+def SBC8m : InstAlias<"sbc.b\t$dst", (SUBC8mc memdst:$dst, 0)>;
+def SBC16m : InstAlias<"sbc\t$dst", (SUBC16mc memdst:$dst, 0)>;
+
+def INV8r : InstAlias<"inv.b\t$dst", (XOR8rc GR8:$dst, -1)>;
+def INV16r : InstAlias<"inv\t$dst", (XOR16rc GR16:$dst, -1)>;
+def INV8m : InstAlias<"inv.b\t$dst", (XOR8mc memdst:$dst, -1)>;
+def INV16m : InstAlias<"inv\t$dst", (XOR16mc memdst:$dst, -1)>;
+
+// printAliasInstr() doesn't check $dst operands are actually equal
+// for RLA and RLC aliases below, so disable printing aliases.
+
+def RLA8r : InstAlias<"rla.b\t$dst", (ADD8rr GR8:$dst, GR8:$dst), 0>;
+def RLA16r : InstAlias<"rla\t$dst", (ADD16rr GR16:$dst, GR16:$dst), 0>;
+def RLA8m : InstAlias<"rla.b\t$dst", (ADD8mm memdst:$dst, memdst:$dst), 0>;
+def RLA16m : InstAlias<"rla\t$dst", (ADD16mm memdst:$dst, memdst:$dst), 0>;
+
+def RLC8r : InstAlias<"rlc.b\t$dst", (ADDC8rr GR8:$dst, GR8:$dst), 0>;
+def RLC16r : InstAlias<"rlc\t$dst", (ADDC16rr GR16:$dst, GR16:$dst), 0>;
+def RLC8m : InstAlias<"rlc.b\t$dst", (ADDC8mm memdst:$dst, memdst:$dst), 0>;
+def RLC16m : InstAlias<"rlc\t$dst", (ADDC16mm memdst:$dst, memdst:$dst), 0>;
+
+def DINT : InstAlias<"dint", (BIC16rc SR, 8)>;
+def EINT : InstAlias<"eint", (BIS16rc SR, 8)>;
+
+def NOP : InstAlias<"nop", (MOV16rc CG, 0)>;
+
+def CLR8r : InstAlias<"clr.b\t$dst", (MOV8rc GR8:$dst, 0)>;
+def CLR16r : InstAlias<"clr\t$dst", (MOV16rc GR16:$dst, 0)>;
+def CLR8m : InstAlias<"clr.b\t$dst", (MOV8mc memdst:$dst, 0)>;
+def CLR16m : InstAlias<"clr\t$dst", (MOV16mc memdst:$dst, 0)>;
+
+def CLRC : InstAlias<"clrc", (BIC16rc SR, 1)>;
+def CLRN : InstAlias<"clrn", (BIC16rc SR, 4)>;
+def CLRZ : InstAlias<"clrz", (BIC16rc SR, 2)>;
+def SETC : InstAlias<"setc", (BIS16rc SR, 1)>;
+def SETN : InstAlias<"setn", (BIS16rc SR, 4)>;
+def SETZ : InstAlias<"setz", (BIS16rc SR, 2)>;
+
+def : Pat<(MSP430rla GR8:$dst), (ADD8rr $dst, $dst)>;
+def : Pat<(MSP430rla GR16:$dst), (ADD16rr $dst, $dst)>;
+
+// Format-II (Single Operand) Instruction
+// Register mode
+let Constraints = "$rs = $rd" in {
-let Constraints = "" in {
-def ADC8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "addc.b\t{$src, $dst}",
- [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def ADC16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "addc.w\t{$src, $dst}",
- [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
-
-def ADC8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "addc.b\t{$src, $dst}",
- [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+let Defs = [SR] in {
+def RRA8r : II8r<0b010,
+ (outs GR8:$rd), (ins GR8:$rs),
+ "rra.b\t$rd",
+ [(set GR8:$rd, (MSP430rra GR8:$rs)),
(implicit SR)]>;
-def ADC16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "addc.w\t{$src, $dst}",
- [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+def RRA16r : II16r<0b010,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "rra\t$rd",
+ [(set GR16:$rd, (MSP430rra GR16:$rs)),
(implicit SR)]>;
-def ADC8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "addc.b\t{$src, $dst}",
- [(store (adde (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
+let Uses = [SR] in {
+def RRC8r : II8r<0b000,
+ (outs GR8:$rd), (ins GR8:$rs),
+ "rrc.b\t$rd",
+ [(set GR8:$rd, (MSP430rrc GR8:$rs)),
(implicit SR)]>;
-def ADC16mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "addc.w\t{$src, $dst}",
- [(store (adde (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
+def RRC16r : II16r<0b000,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "rrc\t$rd",
+ [(set GR16:$rd, (MSP430rrc GR16:$rs)),
(implicit SR)]>;
-}
-
} // Uses = [SR]
-let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
-def AND8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def AND16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
+def SEXT16r : II16r<0b011,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "sxt\t$rd",
+ [(set GR16:$rd, (sext_inreg GR16:$rs, i8)),
(implicit SR)]>;
-}
-def AND8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, imm:$src2)),
- (implicit SR)]>;
-def AND16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, imm:$src2)),
- (implicit SR)]>;
+} // Defs = [SR]
-def AND8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
- (implicit SR)]>;
-def AND16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
- (implicit SR)]>;
+let isCodeGenOnly = 1 in
+def ZEXT16r : I8rr<0b0100,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "mov.b\t{$rs, $rd}",
+ [(set GR16:$rd, (zext (trunc GR16:$rs)))]>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "and.b\t{@$base+, $dst}", []>;
-def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "and.w\t{@$base+, $dst}", []>;
-}
+def SWPB16r : II16r<0b001,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "swpb\t$rd",
+ [(set GR16:$rd, (bswap GR16:$rs))]>;
-let Constraints = "" in {
-def AND8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "and.b\t{$src, $dst}",
- [(store (and (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def AND16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "and.w\t{$src, $dst}",
- [(store (and (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
+} // Constraints = "$src = $dst"
-def AND8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "and.b\t{$src, $dst}",
- [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+// Indexed, indirect register and indirect autoincrement modes
+let Defs = [SR] in {
+def RRA8m : II8m<0b010,
+ (outs), (ins memsrc:$src),
+ "rra.b\t$src",
+ [(store (MSP430rra (i8 (load addr:$src))), addr:$src),
(implicit SR)]>;
-def AND16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "and.w\t{$src, $dst}",
- [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SR)]>;
-
-def AND8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "and.b\t{$src, $dst}",
- [(store (and (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
+def RRA16m : II16m<0b010,
+ (outs), (ins memsrc:$src),
+ "rra\t$src",
+ [(store (MSP430rra (i16 (load addr:$src))), addr:$src),
(implicit SR)]>;
-def AND16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "and.w\t{$src, $dst}",
- [(store (and (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
-}
-
-let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
-def OR8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
-def OR16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
-}
-
-def OR8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
-def OR16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
-
-def OR8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
-def OR16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "bis.b\t{@$base+, $dst}", []>;
-def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "bis.w\t{@$base+, $dst}", []>;
-}
-
-let Constraints = "" in {
-def OR8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
-def OR16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
-
-def OR8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def OR16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
-
-def OR8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (i8 (load addr:$dst)),
- (i8 (load addr:$src))), addr:$dst)]>;
-def OR16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (i16 (load addr:$dst)),
- (i16 (load addr:$src))), addr:$dst)]>;
-}
-
-// bic does not modify condition codes
-def BIC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "bic.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
-def BIC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "bic.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
-
-def BIC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "bic.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
-def BIC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "bic.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
-
-let Constraints = "" in {
-def BIC8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "bic.b\t{$src, $dst}",
- [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
-def BIC16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "bic.w\t{$src, $dst}",
- [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
-
-def BIC8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "bic.b\t{$src, $dst}",
- [(store (and (load addr:$dst),
- (not (i8 (load addr:$src)))), addr:$dst)]>;
-def BIC16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "bic.w\t{$src, $dst}",
- [(store (and (load addr:$dst),
- (not (i16 (load addr:$src)))), addr:$dst)]>;
-}
-let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
-def XOR8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def XOR16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
- (implicit SR)]>;
-}
+def RRA8n : II8n<0b010, (outs), (ins indreg:$rs), "rra.b\t$rs", []>;
+def RRA16n : II16n<0b010, (outs), (ins indreg:$rs), "rra\t$rs", []>;
+def RRA8p : II8p<0b010, (outs), (ins postreg:$rs), "rra.b\t$rs", []>;
+def RRA16p : II16p<0b010, (outs), (ins postreg:$rs), "rra\t$rs", []>;
-def XOR8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
+let Uses = [SR] in {
+def RRC8m : II8m<0b000,
+ (outs), (ins memsrc:$src),
+ "rrc.b\t$src",
+ [(store (MSP430rrc (i8 (load addr:$src))), addr:$src),
(implicit SR)]>;
-def XOR16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
- (implicit SR)]>;
-
-def XOR8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
+def RRC16m : II16m<0b000,
+ (outs), (ins memsrc:$src),
+ "rrc\t$src",
+ [(store (MSP430rrc (i16 (load addr:$src))), addr:$src),
(implicit SR)]>;
-def XOR16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
- (implicit SR)]>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "xor.b\t{@$base+, $dst}", []>;
-def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "xor.w\t{@$base+, $dst}", []>;
-}
+def RRC8n : II8n<0b000, (outs), (ins indreg:$rs), "rrc.b\t$rs", []>;
+def RRC16n : II16n<0b000, (outs), (ins indreg:$rs), "rrc\t$rs", []>;
+def RRC8p : II8p<0b000, (outs), (ins postreg:$rs), "rrc.b\t$rs", []>;
+def RRC16p : II16p<0b000, (outs), (ins postreg:$rs), "rrc\t$rs", []>;
-let Constraints = "" in {
-def XOR8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "xor.b\t{$src, $dst}",
- [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def XOR16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "xor.w\t{$src, $dst}",
- [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
-
-def XOR8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "xor.b\t{$src, $dst}",
- [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SR)]>;
-def XOR16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "xor.w\t{$src, $dst}",
- [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SR)]>;
+} // Uses = [SR]
-def XOR8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "xor.b\t{$src, $dst}",
- [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
-def XOR16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "xor.w\t{$src, $dst}",
- [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+def SEXT16m : II16m<0b011,
+ (outs), (ins memsrc:$src),
+ "sxt\t$src",
+ [(store (sext_inreg (extloadi16i8 addr:$src), i8),
+ addr:$src),
(implicit SR)]>;
-}
+def SEXT16n : II16n<0b011, (outs), (ins indreg:$rs), "sxt\t$rs", []>;
+def SEXT16p : II16p<0b011, (outs), (ins postreg:$rs), "sxt\t$rs", []>;
+} // Defs = [SR]
-def SUB8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def SUB16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
- (implicit SR)]>;
+def SWPB16m : II16m<0b001,
+ (outs), (ins memsrc:$src),
+ "swpb\t$src",
+ [(store (bswap (i16 (load addr:$src))), addr:$src)]>;
+def SWPB16n : II16n<0b001, (outs), (ins indreg:$rs), "swpb\t$rs", []>;
+def SWPB16p : II16p<0b001, (outs), (ins postreg:$rs), "swpb\t$rs", []>;
-def SUB8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
+// Integer comparisons
+let Defs = [SR] in {
+def CMP8rr : I8rr<0b1001,
+ (outs), (ins GR8:$rd, GR8:$rs),
+ "cmp.b\t$rs, $rd",
+ [(MSP430cmp GR8:$rd, GR8:$rs), (implicit SR)]>;
+def CMP16rr : I16rr<0b1001,
+ (outs), (ins GR16:$rd, GR16:$rs),
+ "cmp\t$rs, $rd",
+ [(MSP430cmp GR16:$rd, GR16:$rs), (implicit SR)]>;
+
+def CMP8rc : I8rc<0b1001,
+ (outs), (ins GR8:$rd, cg8imm:$imm),
+ "cmp.b\t$imm, $rd",
+ [(MSP430cmp GR8:$rd, cg8imm:$imm), (implicit SR)]>;
+def CMP16rc : I16rc<0b1001,
+ (outs), (ins GR16:$rd, cg16imm:$imm),
+ "cmp\t$imm, $rd",
+ [(MSP430cmp GR16:$rd, cg16imm:$imm), (implicit SR)]>;
+
+def CMP8ri : I8ri<0b1001,
+ (outs), (ins GR8:$rd, i8imm:$imm),
+ "cmp.b\t$imm, $rd",
+ [(MSP430cmp GR8:$rd, imm:$imm), (implicit SR)]>;
+def CMP16ri : I16ri<0b1001,
+ (outs), (ins GR16:$rd, i16imm:$imm),
+ "cmp\t$imm, $rd",
+ [(MSP430cmp GR16:$rd, imm:$imm), (implicit SR)]>;
+
+def CMP8mc : I8mc<0b1001,
+ (outs), (ins memsrc:$dst, cg8imm:$imm),
+ "cmp.b\t$imm, $dst",
+ [(MSP430cmp (load addr:$dst), (i8 cg8imm:$imm)),
(implicit SR)]>;
-def SUB16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
+def CMP16mc : I16mc<0b1001,
+ (outs), (ins memsrc:$dst, cg16imm:$imm),
+ "cmp\t$imm, $dst",
+ [(MSP430cmp (load addr:$dst), (i16 cg16imm:$imm)),
(implicit SR)]>;
-def SUB8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
+def CMP8mi : I8mi<0b1001,
+ (outs), (ins memsrc:$dst, i8imm:$imm),
+ "cmp.b\t$imm, $dst",
+ [(MSP430cmp (load addr:$dst),
+ (i8 imm:$imm)), (implicit SR)]>;
+def CMP16mi : I16mi<0b1001,
+ (outs), (ins memsrc:$dst, i16imm:$imm),
+ "cmp\t$imm, $dst",
+ [(MSP430cmp (load addr:$dst),
+ (i16 imm:$imm)), (implicit SR)]>;
+
+def CMP8rm : I8rm<0b1001,
+ (outs), (ins GR8:$rd, memsrc:$src),
+ "cmp.b\t$src, $rd",
+ [(MSP430cmp GR8:$rd, (load addr:$src)),
(implicit SR)]>;
-def SUB16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
+def CMP16rm : I16rm<0b1001,
+ (outs), (ins GR16:$rd, memsrc:$src),
+ "cmp\t$src, $rd",
+ [(MSP430cmp GR16:$rd, (load addr:$src)),
(implicit SR)]>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "sub.b\t{@$base+, $dst}", []>;
-def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "sub.w\t{@$base+, $dst}", []>;
-}
+def CMP8rn : I8rn<0b1001,
+ (outs), (ins GR8:$rd, indreg:$rs), "cmp.b\t$rs, $rd", []>;
+def CMP16rn : I16rn<0b1001,
+ (outs), (ins GR16:$rd, indreg:$rs), "cmp\t$rs, $rd", []>;
-let Constraints = "" in {
-def SUB8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "sub.b\t{$src, $dst}",
- [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def SUB16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "sub.w\t{$src, $dst}",
- [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
+def CMP8rp : I8rp<0b1001,
+ (outs), (ins GR8:$rd, postreg:$rs), "cmp.b\t$rs, $rd", []>;
+def CMP16rp : I16rp<0b1001,
+ (outs), (ins GR16:$rd, postreg:$rs), "cmp\t$rs, $rd", []>;
-def SUB8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "sub.b\t{$src, $dst}",
- [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+def CMP8mr : I8mr<0b1001,
+ (outs), (ins memsrc:$dst, GR8:$rs),
+ "cmp.b\t$rs, $dst",
+ [(MSP430cmp (load addr:$dst), GR8:$rs),
(implicit SR)]>;
-def SUB16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "sub.w\t{$src, $dst}",
- [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+def CMP16mr : I16mr<0b1001,
+ (outs), (ins memsrc:$dst, GR16:$rs),
+ "cmp\t$rs, $dst",
+ [(MSP430cmp (load addr:$dst), GR16:$rs),
(implicit SR)]>;
-
-def SUB8mm : I8mm<0x0,
+def CMP8mm : I8mm<0b1001,
(outs), (ins memdst:$dst, memsrc:$src),
- "sub.b\t{$src, $dst}",
- [(store (sub (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
-def SUB16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "sub.w\t{$src, $dst}",
- [(store (sub (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
-}
-
-let Uses = [SR] in {
-def SBC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def SBC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
- (implicit SR)]>;
-
-def SBC8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
- (implicit SR)]>;
-def SBC16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
- (implicit SR)]>;
-
-def SBC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
+ "cmp.b\t$src, $dst",
+ [(MSP430cmp (load addr:$dst), (i8 (load addr:$src))),
(implicit SR)]>;
-def SBC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
+def CMP16mm : I16mm<0b1001, (outs), (ins memdst:$dst, memsrc:$src),
+ "cmp\t$src, $dst",
+ [(MSP430cmp (load addr:$dst), (i16 (load addr:$src))),
(implicit SR)]>;
-let Constraints = "" in {
-def SBC8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "subc.b\t{$src, $dst}",
- [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def SBC16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "subc.w\t{$src, $dst}",
- [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
+def CMP8mn : I8mn<0b1001, (outs), (ins memsrc:$dst, indreg:$rs),
+ "cmp.b\t$rs, $dst", []>;
+def CMP16mn : I16mn<0b1001, (outs), (ins memsrc:$dst, indreg:$rs),
+ "cmp\t$rs, $dst", []>;
-def SBC8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "subc.b\t{$src, $dst}",
- [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SR)]>;
-def SBC16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "subc.w\t{$src, $dst}",
- [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SR)]>;
+def CMP8mp : I8mp<0b1001, (outs), (ins memsrc:$dst, postreg:$rs),
+ "cmp.b\t$rs, $dst", []>;
+def CMP16mp : I16mp<0b1001, (outs), (ins memsrc:$dst, postreg:$rs),
+ "cmp\t$rs, $dst", []>;
-def SBC8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "subc.b\t{$src, $dst}",
- [(store (sube (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr : I8rr<0b1011,
+ (outs), (ins GR8:$rd, GR8:$rs),
+ "bit.b\t$rs, $rd",
+ [(MSP430cmp (and_su GR8:$rd, GR8:$rs), 0),
(implicit SR)]>;
-def SBC16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "subc.w\t{$src, $dst}",
- [(store (sube (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
+def BIT16rr : I16rr<0b1011,
+ (outs), (ins GR16:$rd, GR16:$rs),
+ "bit\t$rs, $rd",
+ [(MSP430cmp (and_su GR16:$rd, GR16:$rs), 0),
(implicit SR)]>;
}
-
-} // Uses = [SR]
-
-// FIXME: memory variant!
-def SAR8r1 : II8r<0x0,
- (outs GR8:$dst), (ins GR8:$src),
- "rra.b\t$dst",
- [(set GR8:$dst, (MSP430rra GR8:$src)),
+def BIT8rc : I8rc<0b1011,
+ (outs), (ins GR8:$rd, cg8imm:$imm),
+ "bit.b\t$imm, $rd",
+ [(MSP430cmp (and_su GR8:$rd, cg8imm:$imm), 0),
(implicit SR)]>;
-def SAR16r1 : II16r<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "rra.w\t$dst",
- [(set GR16:$dst, (MSP430rra GR16:$src)),
+def BIT16rc : I16rc<0b1011,
+ (outs), (ins GR16:$rd, cg16imm:$imm),
+ "bit\t$imm, $rd",
+ [(MSP430cmp (and_su GR16:$rd, cg16imm:$imm), 0),
(implicit SR)]>;
-def SHL8r1 : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src),
- "rla.b\t$dst",
- [(set GR8:$dst, (MSP430rla GR8:$src)),
+def BIT8ri : I8ri<0b1011,
+ (outs), (ins GR8:$rd, i8imm:$imm),
+ "bit.b\t$imm, $rd",
+ [(MSP430cmp (and_su GR8:$rd, imm:$imm), 0),
(implicit SR)]>;
-def SHL16r1 : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "rla.w\t$dst",
- [(set GR16:$dst, (MSP430rla GR16:$src)),
- (implicit SR)]>;
-
-def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
- "clrc\n\t"
- "rrc.b\t$dst",
- [(set GR8:$dst, (MSP430rrc GR8:$src)),
- (implicit SR)]>;
-def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
- "clrc\n\t"
- "rrc.w\t$dst",
- [(set GR16:$dst, (MSP430rrc GR16:$src)),
- (implicit SR)]>;
-
-// FIXME: Memory sext's ?
-def SEXT16r : II16r<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "sxt\t$dst",
- [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+def BIT16ri : I16ri<0b1011,
+ (outs), (ins GR16:$rd, i16imm:$imm),
+ "bit\t$imm, $rd",
+ [(MSP430cmp (and_su GR16:$rd, imm:$imm), 0),
(implicit SR)]>;
-} // Defs = [SR]
-
-def ZEXT16r : I8rr<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "mov.b\t{$src, $dst}",
- [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
-
-// FIXME: Memory bitswaps?
-def SWPB16r : II16r<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "swpb\t$dst",
- [(set GR16:$dst, (bswap GR16:$src))]>;
-
-} // Constraints = "$src = $dst"
-
-// Integer comparisons
-let Defs = [SR] in {
-def CMP8rr : I8rr<0x0,
- (outs), (ins GR8:$src, GR8:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, GR8:$src2), (implicit SR)]>;
-def CMP16rr : I16rr<0x0,
- (outs), (ins GR16:$src, GR16:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, GR16:$src2), (implicit SR)]>;
-
-def CMP8ri : I8ri<0x0,
- (outs), (ins GR8:$src, i8imm:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, imm:$src2), (implicit SR)]>;
-def CMP16ri : I16ri<0x0,
- (outs), (ins GR16:$src, i16imm:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, imm:$src2), (implicit SR)]>;
-
-def CMP8mi : I8mi<0x0,
- (outs), (ins memsrc:$src, i8imm:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp (load addr:$src),
- (i8 imm:$src2)), (implicit SR)]>;
-def CMP16mi : I16mi<0x0,
- (outs), (ins memsrc:$src, i16imm:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp (load addr:$src),
- (i16 imm:$src2)), (implicit SR)]>;
-
-def CMP8rm : I8rm<0x0,
- (outs), (ins GR8:$src, memsrc:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, (load addr:$src2)),
+def BIT8rm : I8rm<0b1011,
+ (outs), (ins GR8:$rd, memdst:$src),
+ "bit.b\t$src, $rd",
+ [(MSP430cmp (and_su GR8:$rd, (load addr:$src)), 0),
(implicit SR)]>;
-def CMP16rm : I16rm<0x0,
- (outs), (ins GR16:$src, memsrc:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, (load addr:$src2)),
+def BIT16rm : I16rm<0b1011,
+ (outs), (ins GR16:$rd, memdst:$src),
+ "bit\t$src, $rd",
+ [(MSP430cmp (and_su GR16:$rd, (load addr:$src)), 0),
(implicit SR)]>;
-def CMP8mr : I8mr<0x0,
- (outs), (ins memsrc:$src, GR8:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp (load addr:$src), GR8:$src2),
- (implicit SR)]>;
-def CMP16mr : I16mr<0x0,
- (outs), (ins memsrc:$src, GR16:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp (load addr:$src), GR16:$src2),
- (implicit SR)]>;
+def BIT8rn : I8rn<0b1011, (outs), (ins GR8:$rd, indreg:$rs),
+ "bit.b\t$rs, $rd", []>;
+def BIT16rn : I16rn<0b1011, (outs), (ins GR16:$rd, indreg:$rs),
+ "bit\t$rs, $rd", []>;
+def BIT8rp : I8rp<0b1011, (outs), (ins GR8:$rd, postreg:$rs),
+ "bit.b\t$rs, $rd", []>;
+def BIT16rp : I16rp<0b1011, (outs), (ins GR16:$rd, postreg:$rs),
+ "bit\t$rs, $rd", []>;
-// BIT TESTS, just sets condition codes
-// Note that the C condition is set differently than when using CMP.
-let isCommutable = 1 in {
-def BIT8rr : I8rr<0x0,
- (outs), (ins GR8:$src, GR8:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
- (implicit SR)]>;
-def BIT16rr : I16rr<0x0,
- (outs), (ins GR16:$src, GR16:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
- (implicit SR)]>;
-}
-def BIT8ri : I8ri<0x0,
- (outs), (ins GR8:$src, i8imm:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
- (implicit SR)]>;
-def BIT16ri : I16ri<0x0,
- (outs), (ins GR16:$src, i16imm:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
+def BIT8mr : I8mr<0b1011,
+ (outs), (ins memsrc:$dst, GR8:$rs),
+ "bit.b\t$rs, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), GR8:$rs), 0),
+ (implicit SR)]>;
+def BIT16mr : I16mr<0b1011,
+ (outs), (ins memsrc:$dst, GR16:$rs),
+ "bit\t$rs, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), GR16:$rs), 0),
(implicit SR)]>;
-def BIT8rm : I8rm<0x0,
- (outs), (ins GR8:$src, memdst:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
+def BIT8mc : I8mc<0b1011,
+ (outs), (ins memsrc:$dst, cg8imm:$imm),
+ "bit.b\t$imm, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), (i8 cg8imm:$imm)), 0),
(implicit SR)]>;
-def BIT16rm : I16rm<0x0,
- (outs), (ins GR16:$src, memdst:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
+def BIT16mc : I16mc<0b1011,
+ (outs), (ins memdst:$dst, cg16imm:$imm),
+ "bit\t$imm, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), (i16 cg16imm:$imm)), 0),
(implicit SR)]>;
-def BIT8mr : I8mr<0x0,
- (outs), (ins memsrc:$src, GR8:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
- (implicit SR)]>;
-def BIT16mr : I16mr<0x0,
- (outs), (ins memsrc:$src, GR16:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
- (implicit SR)]>;
-
-def BIT8mi : I8mi<0x0,
- (outs), (ins memsrc:$src, i8imm:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
+def BIT8mi : I8mi<0b1011,
+ (outs), (ins memsrc:$dst, i8imm:$imm),
+ "bit.b\t$imm, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), (i8 imm:$imm)), 0),
(implicit SR)]>;
-def BIT16mi : I16mi<0x0,
- (outs), (ins memsrc:$src, i16imm:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
+def BIT16mi : I16mi<0b1011,
+ (outs), (ins memsrc:$dst, i16imm:$imm),
+ "bit\t$imm, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), (i16 imm:$imm)), 0),
(implicit SR)]>;
-def BIT8mm : I8mm<0x0,
- (outs), (ins memsrc:$src, memsrc:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su (i8 (load addr:$src)),
- (load addr:$src2)),
+def BIT8mm : I8mm<0b1011,
+ (outs), (ins memsrc:$dst, memsrc:$src),
+ "bit.b\t$src, $dst",
+ [(MSP430cmp (and_su (i8 (load addr:$dst)),
+ (load addr:$src)),
0),
(implicit SR)]>;
-def BIT16mm : I16mm<0x0,
- (outs), (ins memsrc:$src, memsrc:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su (i16 (load addr:$src)),
- (load addr:$src2)),
+def BIT16mm : I16mm<0b1011,
+ (outs), (ins memsrc:$dst, memsrc:$src),
+ "bit\t$src, $dst",
+ [(MSP430cmp (and_su (i16 (load addr:$dst)),
+ (load addr:$src)),
0),
(implicit SR)]>;
+def BIT8mn : I8mn<0b1011, (outs), (ins memsrc:$dst, indreg:$rs),
+ "bit.b\t$rs, $dst", []>;
+def BIT16mn : I16mn<0b1011, (outs), (ins memsrc:$dst, indreg:$rs),
+ "bit\t$rs, $dst", []>;
+
+def BIT8mp : I8mp<0b1011, (outs), (ins memsrc:$dst, postreg:$rs),
+ "bit.b\t$rs, $dst", []>;
+def BIT16mp : I16mp<0b1011, (outs), (ins memsrc:$dst, postreg:$rs),
+ "bit\t$rs, $dst", []>;
+
} // Defs = [SR]
+def TST8r : InstAlias<"tst.b\t$dst", (CMP8rc GR8:$dst, 0)>;
+def TST16r : InstAlias<"tst\t$dst", (CMP16rc GR16:$dst, 0)>;
+def TST8m : InstAlias<"tst.b\t$dst", (CMP8mc memdst:$dst, 0)>;
+def TST16m : InstAlias<"tst\t$dst", (CMP16mc memdst:$dst, 0)>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index e7716382b222..860c0006f782 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -110,6 +110,9 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
return MCOperand::createExpr(Expr);
}
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
index b5a6ed0f0a56..1e86bdf34a0b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -11,26 +11,31 @@
// Declarations that describe the MSP430 register file
//===----------------------------------------------------------------------===//
-class MSP430Reg<bits<4> num, string n> : Register<n> {
+class MSP430Reg<bits<4> num, string n, list<string> alt = []> : Register<n> {
field bits<4> Num = num;
let Namespace = "MSP430";
+ let HWEncoding{3-0} = num;
+ let AltNames = alt;
}
-class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs>
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs,
+ list<string> alt = []>
: RegisterWithSubRegs<n, subregs> {
field bits<4> Num = num;
let Namespace = "MSP430";
+ let HWEncoding{3-0} = num;
+ let AltNames = alt;
}
//===----------------------------------------------------------------------===//
// Registers
//===----------------------------------------------------------------------===//
-def PCB : MSP430Reg<0, "r0">;
-def SPB : MSP430Reg<1, "r1">;
-def SRB : MSP430Reg<2, "r2">;
-def CGB : MSP430Reg<3, "r3">;
-def FPB : MSP430Reg<4, "r4">;
+def PCB : MSP430Reg<0, "r0", ["pc"]>;
+def SPB : MSP430Reg<1, "r1", ["sp"]>;
+def SRB : MSP430Reg<2, "r2", ["sr"]>;
+def CGB : MSP430Reg<3, "r3", ["cg"]>;
+def FPB : MSP430Reg<4, "r4", ["fp"]>;
def R5B : MSP430Reg<5, "r5">;
def R6B : MSP430Reg<6, "r6">;
def R7B : MSP430Reg<7, "r7">;
@@ -46,11 +51,11 @@ def R15B : MSP430Reg<15, "r15">;
def subreg_8bit : SubRegIndex<8> { let Namespace = "MSP430"; }
let SubRegIndices = [subreg_8bit] in {
-def PC : MSP430RegWithSubregs<0, "r0", [PCB]>;
-def SP : MSP430RegWithSubregs<1, "r1", [SPB]>;
-def SR : MSP430RegWithSubregs<2, "r2", [SRB]>;
-def CG : MSP430RegWithSubregs<3, "r3", [CGB]>;
-def FP : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def PC : MSP430RegWithSubregs<0, "r0", [PCB], ["pc"]>;
+def SP : MSP430RegWithSubregs<1, "r1", [SPB], ["sp"]>;
+def SR : MSP430RegWithSubregs<2, "r2", [SRB], ["sr"]>;
+def CG : MSP430RegWithSubregs<3, "r3", [CGB], ["cg"]>;
+def FP : MSP430RegWithSubregs<4, "r4", [FPB], ["fp"]>;
def R5 : MSP430RegWithSubregs<5, "r5", [R5B]>;
def R6 : MSP430RegWithSubregs<6, "r6", [R6B]>;
def R7 : MSP430RegWithSubregs<7, "r7", [R7B]>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 01f44e266d7b..9f6ebba75ec6 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -32,12 +32,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
return "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16";
@@ -51,7 +45,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ce7db657f5e9..d2fed6861477 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -64,6 +65,11 @@ class MCInstrInfo;
} // end namespace llvm
+static cl::opt<bool>
+EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
+ cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
+ cl::init(true));
+
namespace {
class MipsAssemblerOptions {
@@ -195,7 +201,6 @@ class MipsAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseImm(OperandVector &Operands);
OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
OperandMatchResultTy parseInvNum(OperandVector &Operands);
- OperandMatchResultTy parseMovePRegPair(OperandVector &Operands);
OperandMatchResultTy parseRegisterList(OperandVector &Operands);
bool searchSymbolAlias(OperandVector &Operands);
@@ -760,7 +765,6 @@ private:
k_RegisterIndex, /// A register index in one or more RegKind.
k_Token, /// A simple token
k_RegList, /// A physical register list
- k_RegPair /// A pair of physical register
} Kind;
public:
@@ -769,16 +773,15 @@ public:
~MipsOperand() override {
switch (Kind) {
- case k_Immediate:
- break;
case k_Memory:
delete Mem.Base;
break;
case k_RegList:
delete RegList.List;
+ break;
+ case k_Immediate:
case k_RegisterIndex:
case k_Token:
- case k_RegPair:
break;
}
}
@@ -1038,6 +1041,17 @@ public:
Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
}
+ void addGPRMM16AsmRegMovePPairFirstOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
+ }
+
+ void addGPRMM16AsmRegMovePPairSecondOperands(MCInst &Inst,
+ unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
+ }
+
/// Render the operand to an MCInst as a GPR64
/// Asserts if the wrong number of operands are requested, or the operand
/// is not a k_RegisterIndex compatible with RegKind_GPR
@@ -1217,29 +1231,6 @@ public:
Inst.addOperand(MCOperand::createReg(RegNo));
}
- void addRegPairOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && "Invalid number of operands!");
- assert((RegIdx.Kind & RegKind_GPR) && "Invalid access!");
- unsigned RegNo = getRegPair();
- AsmParser.warnIfRegIndexIsAT(RegNo, StartLoc);
- Inst.addOperand(MCOperand::createReg(
- RegIdx.RegInfo->getRegClass(
- AsmParser.getABI().AreGprs64bit()
- ? Mips::GPR64RegClassID
- : Mips::GPR32RegClassID).getRegister(RegNo++)));
- Inst.addOperand(MCOperand::createReg(
- RegIdx.RegInfo->getRegClass(
- AsmParser.getABI().AreGprs64bit()
- ? Mips::GPR64RegClassID
- : Mips::GPR32RegClassID).getRegister(RegNo)));
- }
-
- void addMovePRegPairOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && "Invalid number of operands!");
- for (auto RegNo : getRegList())
- Inst.addOperand(MCOperand::createReg(RegNo));
- }
-
bool isReg() const override {
// As a special case until we sort out the definition of div/divu, accept
// $0/$zero here so that MCK_ZERO works correctly.
@@ -1406,34 +1397,6 @@ public:
bool isRegList() const { return Kind == k_RegList; }
- bool isMovePRegPair() const {
- if (Kind != k_RegList || RegList.List->size() != 2)
- return false;
-
- unsigned R0 = RegList.List->front();
- unsigned R1 = RegList.List->back();
-
- if ((R0 == Mips::A1 && R1 == Mips::A2) ||
- (R0 == Mips::A1 && R1 == Mips::A3) ||
- (R0 == Mips::A2 && R1 == Mips::A3) ||
- (R0 == Mips::A0 && R1 == Mips::S5) ||
- (R0 == Mips::A0 && R1 == Mips::S6) ||
- (R0 == Mips::A0 && R1 == Mips::A1) ||
- (R0 == Mips::A0 && R1 == Mips::A2) ||
- (R0 == Mips::A0 && R1 == Mips::A3) ||
- (R0 == Mips::A1_64 && R1 == Mips::A2_64) ||
- (R0 == Mips::A1_64 && R1 == Mips::A3_64) ||
- (R0 == Mips::A2_64 && R1 == Mips::A3_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::S5_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::S6_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::A1_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::A2_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::A3_64))
- return true;
-
- return false;
- }
-
StringRef getToken() const {
assert(Kind == k_Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -1481,11 +1444,6 @@ public:
return *(RegList.List);
}
- unsigned getRegPair() const {
- assert((Kind == k_RegPair) && "Invalid access!");
- return RegIdx.Index;
- }
-
static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
MipsAsmParser &Parser) {
auto Op = llvm::make_unique<MipsOperand>(k_Token, Parser);
@@ -1593,18 +1551,6 @@ public:
return Op;
}
- static std::unique_ptr<MipsOperand> CreateRegPair(const MipsOperand &MOP,
- SMLoc S, SMLoc E,
- MipsAsmParser &Parser) {
- auto Op = llvm::make_unique<MipsOperand>(k_RegPair, Parser);
- Op->RegIdx.Index = MOP.RegIdx.Index;
- Op->RegIdx.RegInfo = MOP.RegIdx.RegInfo;
- Op->RegIdx.Kind = MOP.RegIdx.Kind;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
bool isGPRZeroAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0;
}
@@ -1640,6 +1586,19 @@ public:
(RegIdx.Index >= 16 && RegIdx.Index <= 20));
}
+ bool isMM16AsmRegMovePPairFirst() const {
+ if (!(isRegIdx() && RegIdx.Kind))
+ return false;
+ return RegIdx.Index >= 4 && RegIdx.Index <= 6;
+ }
+
+ bool isMM16AsmRegMovePPairSecond() const {
+ if (!(isRegIdx() && RegIdx.Kind))
+ return false;
+ return (RegIdx.Index == 21 || RegIdx.Index == 22 ||
+ (RegIdx.Index >= 5 && RegIdx.Index <= 7));
+ }
+
bool isFGRAsmReg() const {
// AFGR64 is $0-$15 but we handle this in getAFGR64()
return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
@@ -1720,9 +1679,6 @@ public:
OS << Reg << " ";
OS << ">";
break;
- case k_RegPair:
- OS << "RegPair<" << RegIdx.Index << "," << RegIdx.Index + 1 << ">";
- break;
}
}
@@ -1755,14 +1711,23 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) {
return MipsInsts[Opcode];
}
-static bool hasShortDelaySlot(unsigned Opcode) {
- switch (Opcode) {
+static bool hasShortDelaySlot(MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ case Mips::BEQ_MM:
+ case Mips::BNE_MM:
+ case Mips::BLTZ_MM:
+ case Mips::BGEZ_MM:
+ case Mips::BLEZ_MM:
+ case Mips::BGTZ_MM:
+ case Mips::JRC16_MM:
case Mips::JALS_MM:
case Mips::JALRS_MM:
case Mips::JALRS16_MM:
case Mips::BGEZALS_MM:
case Mips::BLTZALS_MM:
return true;
+ case Mips::J_MM:
+ return !Inst.getOperand(0).isReg();
default:
return false;
}
@@ -2115,9 +2080,21 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
JalrInst.addOperand(MCOperand::createReg(Mips::RA));
JalrInst.addOperand(MCOperand::createReg(Mips::T9));
- // FIXME: Add an R_(MICRO)MIPS_JALR relocation after the JALR.
- // This relocation is supposed to be an optimization hint for the linker
- // and is not necessary for correctness.
+ if (EmitJalrReloc) {
+ // As an optimization hint for the linker, before the JALR we add:
+ // .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
+ // tmplabel:
+ MCSymbol *TmpLabel = getContext().createTempSymbol();
+ const MCExpr *TmpExpr = MCSymbolRefExpr::create(TmpLabel, getContext());
+ const MCExpr *RelocJalrExpr =
+ MCSymbolRefExpr::create(JalSym, MCSymbolRefExpr::VK_None,
+ getContext(), IDLoc);
+
+ TOut.getStreamer().EmitRelocDirective(*TmpExpr,
+ inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+ RelocJalrExpr, IDLoc, *STI);
+ TOut.getStreamer().EmitLabel(TmpLabel);
+ }
Inst = JalrInst;
ExpandedJalSym = true;
@@ -2288,6 +2265,22 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
if (Inst.getOperand(0).getReg() == Mips::RA)
return Error(IDLoc, "invalid operand for instruction");
break;
+ case Mips::MOVEP_MM:
+ case Mips::MOVEP_MMR6: {
+ unsigned R0 = Inst.getOperand(0).getReg();
+ unsigned R1 = Inst.getOperand(1).getReg();
+ bool RegPair = ((R0 == Mips::A1 && R1 == Mips::A2) ||
+ (R0 == Mips::A1 && R1 == Mips::A3) ||
+ (R0 == Mips::A2 && R1 == Mips::A3) ||
+ (R0 == Mips::A0 && R1 == Mips::S5) ||
+ (R0 == Mips::A0 && R1 == Mips::S6) ||
+ (R0 == Mips::A0 && R1 == Mips::A1) ||
+ (R0 == Mips::A0 && R1 == Mips::A2) ||
+ (R0 == Mips::A0 && R1 == Mips::A3));
+ if (!RegPair)
+ return Error(IDLoc, "invalid operand for instruction");
+ break;
+ }
}
}
@@ -2318,7 +2311,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// If this instruction has a delay slot and .set reorder is active,
// emit a NOP after it.
if (FillDelaySlot) {
- TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst.getOpcode()), IDLoc, STI);
+ TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst), IDLoc, STI);
TOut.emitDirectiveSetReorder();
}
@@ -2330,7 +2323,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// If .set reorder has been used, we've already emitted a NOP.
// If .set noreorder has been used, we need to emit a NOP at this point.
if (!AssemblerOptions.back()->isReorder())
- TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst.getOpcode()), IDLoc,
+ TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst), IDLoc,
STI);
// Load the $gp from the stack.
@@ -2617,7 +2610,7 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
// emit a NOP after it.
const MCInstrDesc &MCID = getInstDesc(JalrInst.getOpcode());
if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
- TOut.emitEmptyDelaySlot(hasShortDelaySlot(JalrInst.getOpcode()), IDLoc,
+ TOut.emitEmptyDelaySlot(hasShortDelaySlot(JalrInst), IDLoc,
STI);
return false;
@@ -6278,45 +6271,6 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
return MatchOperand_Success;
}
-OperandMatchResultTy
-MipsAsmParser::parseMovePRegPair(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> TmpOperands;
- SmallVector<unsigned, 10> Regs;
-
- if (Parser.getTok().isNot(AsmToken::Dollar))
- return MatchOperand_ParseFail;
-
- SMLoc S = Parser.getTok().getLoc();
-
- if (parseAnyRegister(TmpOperands) != MatchOperand_Success)
- return MatchOperand_ParseFail;
-
- MipsOperand *Reg = &static_cast<MipsOperand &>(*TmpOperands.back());
- unsigned RegNo = isGP64bit() ? Reg->getGPR64Reg() : Reg->getGPR32Reg();
- Regs.push_back(RegNo);
-
- SMLoc E = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::Comma)) {
- Error(E, "',' expected");
- return MatchOperand_ParseFail;
- }
-
- // Remove comma.
- Parser.Lex();
-
- if (parseAnyRegister(TmpOperands) != MatchOperand_Success)
- return MatchOperand_ParseFail;
-
- Reg = &static_cast<MipsOperand &>(*TmpOperands.back());
- RegNo = isGP64bit() ? Reg->getGPR64Reg() : Reg->getGPR32Reg();
- Regs.push_back(RegNo);
-
- Operands.push_back(MipsOperand::CreateRegList(Regs, S, E, *this));
-
- return MatchOperand_Success;
-}
-
/// Sometimes (i.e. load/stores) the operand may be followed immediately by
/// either this.
/// ::= '(', register, ')'
@@ -6371,6 +6325,9 @@ bool MipsAsmParser::parseBracketSuffix(StringRef Name,
return false;
}
+static std::string MipsMnemonicSpellCheck(StringRef S, uint64_t FBS,
+ unsigned VariantID = 0);
+
bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
MCAsmParser &Parser = getParser();
@@ -6381,7 +6338,9 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
- return Error(NameLoc, "unknown instruction");
+ uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = MipsMnemonicSpellCheck(Name, FBS);
+ return Error(NameLoc, "unknown instruction" + Suggestion);
}
// First operand in MCInst is instruction mnemonic.
Operands.push_back(MipsOperand::CreateToken(Name, NameLoc, *this));
@@ -8257,6 +8216,7 @@ extern "C" void LLVMInitializeMipsAsmParser() {
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
+#define GET_MNEMONIC_SPELL_CHECKER
#include "MipsGenAsmMatcher.inc"
bool MipsAsmParser::mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index b94afb9520e3..27b27ff1e1e2 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -538,6 +538,9 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
namespace llvm {
Target &getTheMipselTarget();
@@ -2450,6 +2453,32 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned RegPair = fieldFromInstruction(Insn, 7, 3);
+ if (DecodeMovePRegPair(Inst, RegPair, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ unsigned RegRs;
+ if (static_cast<const MipsDisassembler*>(Decoder)->hasMips32r6())
+ RegRs = fieldFromInstruction(Insn, 0, 2) |
+ (fieldFromInstruction(Insn, 3, 1) << 2);
+ else
+ RegRs = fieldFromInstruction(Insn, 1, 3);
+ if (DecodeGPRMM16MovePRegisterClass(Inst, RegRs, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ unsigned RegRt = fieldFromInstruction(Insn, 4, 3);
+ if (DecodeGPRMM16MovePRegisterClass(Inst, RegRt, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
uint64_t Address, const void *Decoder) {
switch (RegPair) {
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index bf1390880281..18d7dd99be34 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -55,6 +55,8 @@ MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
return MipsABIInfo::N32();
if (Options.getABIName().startswith("n64"))
return MipsABIInfo::N64();
+ if (TT.getEnvironment() == llvm::Triple::GNUABIN32)
+ return MipsABIInfo::N32();
assert(Options.getABIName().empty() && "Unknown ABI option for MIPS");
if (TT.isMIPS64())
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 3b1b94acb149..7d528fe3eab1 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -13,6 +13,7 @@
//
#include "MCTargetDesc/MipsAsmBackend.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
@@ -339,6 +340,8 @@ Optional<MCFixupKind> MipsAsmBackend::getFixupKind(StringRef Name) const {
(MCFixupKind)Mips::fixup_MICROMIPS_TLS_TPREL_HI16)
.Case("R_MICROMIPS_TLS_TPREL_LO16",
(MCFixupKind)Mips::fixup_MICROMIPS_TLS_TPREL_LO16)
+ .Case("R_MIPS_JALR", (MCFixupKind)Mips::fixup_Mips_JALR)
+ .Case("R_MICROMIPS_JALR", (MCFixupKind)Mips::fixup_MICROMIPS_JALR)
.Default(MCAsmBackend::getFixupKind(Name));
}
@@ -417,7 +420,9 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_MICROMIPS_TLS_TPREL_HI16", 0, 16, 0 },
{ "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 },
{ "fixup_Mips_SUB", 0, 64, 0 },
- { "fixup_MICROMIPS_SUB", 0, 64, 0 }
+ { "fixup_MICROMIPS_SUB", 0, 64, 0 },
+ { "fixup_Mips_JALR", 0, 32, 0 },
+ { "fixup_MICROMIPS_JALR", 0, 32, 0 }
};
static_assert(array_lengthof(LittleEndianInfos) == Mips::NumTargetFixupKinds,
"Not all MIPS little endian fixup kinds added!");
@@ -495,7 +500,9 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_MICROMIPS_TLS_TPREL_HI16", 16, 16, 0 },
{ "fixup_MICROMIPS_TLS_TPREL_LO16", 16, 16, 0 },
{ "fixup_Mips_SUB", 0, 64, 0 },
- { "fixup_MICROMIPS_SUB", 0, 64, 0 }
+ { "fixup_MICROMIPS_SUB", 0, 64, 0 },
+ { "fixup_Mips_JALR", 0, 32, 0 },
+ { "fixup_MICROMIPS_JALR", 0, 32, 0 }
};
static_assert(array_lengthof(BigEndianInfos) == Mips::NumTargetFixupKinds,
"Not all MIPS big endian fixup kinds added!");
@@ -553,6 +560,7 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
case Mips::fixup_Mips_TLSLDM:
case Mips::fixup_Mips_TPREL_HI:
case Mips::fixup_Mips_TPREL_LO:
+ case Mips::fixup_Mips_JALR:
case Mips::fixup_MICROMIPS_CALL16:
case Mips::fixup_MICROMIPS_GOT_DISP:
case Mips::fixup_MICROMIPS_GOT_PAGE:
@@ -565,6 +573,7 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
case Mips::fixup_MICROMIPS_TLS_LDM:
case Mips::fixup_MICROMIPS_TLS_TPREL_HI16:
case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
+ case Mips::fixup_MICROMIPS_JALR:
return true;
}
}
@@ -581,6 +590,6 @@ MCAsmBackend *llvm::createMipsAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
- return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
- Options.ABIName == "n32");
+ MipsABIInfo ABI = MipsABIInfo::computeTargetABI(STI.getTargetTriple(), STI.getCPU(), Options);
+ return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), ABI.IsN32());
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 3dc753772e5f..8ace2895d681 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
@@ -225,7 +226,9 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
case Mips::fixup_Mips_NONE:
return ELF::R_MIPS_NONE;
case FK_Data_1:
- report_fatal_error("MIPS does not support one byte relocations");
+ Ctx.reportError(Fixup.getLoc(),
+ "MIPS does not support one byte relocations");
+ return ELF::R_MIPS_NONE;
case Mips::fixup_Mips_16:
case FK_Data_2:
return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
@@ -236,6 +239,10 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsPCRel) {
switch (Kind) {
+ case FK_Data_8:
+ Ctx.reportError(Fixup.getLoc(),
+ "MIPS does not support 64-bit PC-relative relocations");
+ return ELF::R_MIPS_NONE;
case Mips::fixup_Mips_Branch_PCRel:
case Mips::fixup_Mips_PC16:
return ELF::R_MIPS_PC16;
@@ -401,6 +408,10 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_MICROMIPS_HIGHER;
case Mips::fixup_MICROMIPS_HIGHEST:
return ELF::R_MICROMIPS_HIGHEST;
+ case Mips::fixup_Mips_JALR:
+ return ELF::R_MIPS_JALR;
+ case Mips::fixup_MICROMIPS_JALR:
+ return ELF::R_MICROMIPS_JALR;
}
llvm_unreachable("invalid fixup kind!");
@@ -453,7 +464,7 @@ void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
return;
// Sort relocations by the address they are applied to.
- llvm::sort(Relocs.begin(), Relocs.end(),
+ llvm::sort(Relocs,
[](const ELFRelocationEntry &A, const ELFRelocationEntry &B) {
return A.Offset < B.Offset;
});
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index d7f6cf91db73..eedad16dddc3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -222,6 +222,10 @@ namespace Mips {
fixup_Mips_SUB,
fixup_MICROMIPS_SUB,
+ // resulting in - R_MIPS_JALR/R_MICROMIPS_JALR
+ fixup_Mips_JALR,
+ fixup_MICROMIPS_JALR,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index f498d830c8f0..1506b4a83649 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -21,9 +21,8 @@ void MipsMCAsmInfo::anchor() { }
MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
IsLittleEndian = TheTriple.isLittleEndian();
- if (TheTriple.isMIPS64()) {
+ if (TheTriple.isMIPS64() && TheTriple.getEnvironment() != Triple::GNUABIN32)
CodePointerSize = CalleeSaveStackSlotSize = 8;
- }
// FIXME: This condition isn't quite right but it's the best we can do until
// this object can identify the ABI. It will misbehave when using O32
@@ -50,21 +49,5 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
ExceptionsType = ExceptionHandling::DwarfCFI;
DwarfRegNumForCFI = true;
HasMipsExpressions = true;
-
- // Enable IAS by default for O32.
- if (TheTriple.isMIPS32())
- UseIntegratedAssembler = true;
-
- // Enable IAS by default for Debian mips64/mips64el.
- if (TheTriple.getEnvironment() == Triple::GNUABI64)
- UseIntegratedAssembler = true;
-
- // Enable IAS by default for Android mips64el that uses N64 ABI.
- if (TheTriple.getArch() == Triple::mips64el && TheTriple.isAndroid())
- UseIntegratedAssembler = true;
-
- // Enable IAS by default for FreeBSD / OpenBSD mips64/mips64el.
- if (TheTriple.isOSFreeBSD() ||
- TheTriple.isOSOpenBSD())
- UseIntegratedAssembler = true;
+ UseIntegratedAssembler = true;
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index cd34b0ab70b4..f43a4d980f92 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -213,6 +213,12 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
TmpInst.setOpcode (NewOpcode);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
}
+
+ if (((MI.getOpcode() == Mips::MOVEP_MM) ||
+ (MI.getOpcode() == Mips::MOVEP_MMR6))) {
+ unsigned RegPair = getMovePRegPairOpValue(MI, 0, Fixups, STI);
+ Binary = (Binary & 0xFFFFFC7F) | (RegPair << 7);
+ }
}
const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
@@ -607,6 +613,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
case MipsMCExpr::MEK_Special:
llvm_unreachable("Unhandled fixup kind!");
break;
+ case MipsMCExpr::MEK_DTPREL:
+ llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+ break;
case MipsMCExpr::MEK_CALL_HI16:
FixupKind = Mips::fixup_Mips_CALL_HI16;
break;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 0bddba781453..99857e083c6c 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -43,6 +43,9 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
case MEK_Special:
llvm_unreachable("MEK_None and MEK_Special are invalid");
break;
+ case MEK_DTPREL:
+ llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+ break;
case MEK_CALL_HI16:
OS << "%call_hi";
break;
@@ -157,6 +160,8 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
case MEK_None:
case MEK_Special:
llvm_unreachable("MEK_None and MEK_Special are invalid");
+ case MEK_DTPREL:
+ llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
case MEK_DTPREL_HI:
case MEK_DTPREL_LO:
case MEK_GOT:
@@ -244,6 +249,9 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
case MEK_Special:
llvm_unreachable("MEK_None and MEK_Special are invalid");
break;
+ case MEK_DTPREL:
+ llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+ break;
case MEK_CALL_HI16:
case MEK_CALL_LO16:
case MEK_GOT:
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index 495d525ccff4..bf3274ab5d17 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -22,6 +22,7 @@ public:
MEK_None,
MEK_CALL_HI16,
MEK_CALL_LO16,
+ MEK_DTPREL,
MEK_DTPREL_HI,
MEK_DTPREL_LO,
MEK_GOT,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index ce208b7f98bc..a8cd7b0d9b03 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -47,10 +47,17 @@ using namespace llvm;
/// FIXME: Merge with the copy in MipsSubtarget.cpp
StringRef MIPS_MC::selectMipsCPU(const Triple &TT, StringRef CPU) {
if (CPU.empty() || CPU == "generic") {
- if (TT.isMIPS32())
- CPU = "mips32";
- else
- CPU = "mips64";
+ if (TT.getSubArch() == llvm::Triple::MipsSubArch_r6) {
+ if (TT.isMIPS32())
+ CPU = "mips32r6";
+ else
+ CPU = "mips64r6";
+ } else {
+ if (TT.isMIPS32())
+ CPU = "mips32";
+ else
+ CPU = "mips64";
+ }
}
return CPU;
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 1eb21b6cc826..58f9717e1cc6 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -248,7 +248,11 @@ void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc,
}
void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
- emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
+ const FeatureBitset &Features = STI->getFeatureBits();
+ if (Features[Mips::FeatureMicroMips])
+ emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI);
+ else
+ emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
}
/// Emit the $gp restore operation for .cprestore.
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 6b0aa7756eab..814918d25e70 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -159,6 +159,7 @@ class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6;
class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">;
class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>;
class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">;
+class SIGRIE_MMR6_ENC : SIGRIE_FM_MM, MMR6Arch<"sigrie">;
class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>;
class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>;
@@ -1101,7 +1102,9 @@ class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16", II_BREAK>, MMR6Arch<"break16">
class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>,
MMR6Arch<"li16">, IsAsCheapAsAMove;
class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">;
-class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMoveP>, MMR6Arch<"movep">;
+class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMovePPairFirst,
+ GPRMM16OpndMovePPairSecond, GPRMM16OpndMoveP>,
+ MMR6Arch<"movep">;
class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">;
class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
MMR6Arch<"subu16"> {
@@ -1160,6 +1163,14 @@ class SDBBP_MMR6_DESC : MipsR6Inst {
InstrItinClass Itinerary = II_SDBBP;
}
+class SIGRIE_MMR6_DESC : MipsR6Inst {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins uimm16:$code_);
+ string AsmString = !strconcat("sigrie", "\t$code_");
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_SIGRIE;
+}
+
class LWM16_MMR6_DESC
: MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
!strconcat("lwm16", "\t$rt, $addr"), [],
@@ -1425,6 +1436,7 @@ def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6;
def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC,
ISA_MICROMIPS32R6;
def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6;
+def SIGRIE_MMR6 : R6MMR6Rel, SIGRIE_MMR6_DESC, SIGRIE_MMR6_ENC, ISA_MICROMIPS32R6;
def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
let DecoderMethod = "DecodeMemMMImm16" in {
@@ -1633,6 +1645,7 @@ def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
}
def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6;
def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"sigrie", (SIGRIE_MMR6 0), 1>, ISA_MICROMIPS32R6;
def : MipsInstAlias<"rdhwr $rt, $rs",
(RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>,
ISA_MICROMIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index 84ae0eddf980..1731afc1961f 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -243,6 +243,8 @@ let DecoderNamespace = "MicroMipsFP64" in {
MFC1_FM_MM<0xe0>, ISA_MICROMIPS, FGR_64;
def MFHC1_D64_MM : MFC1_FT<"mfhc1", GPR32Opnd, FGR64Opnd, II_MFHC1>,
MFC1_FM_MM<0xc0>, ISA_MICROMIPS, FGR_64;
+ def MTC1_D64_MM : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>,
+ MFC1_FM_MM<0xa0>, ISA_MICROMIPS, FGR_64;
}
let DecoderNamespace = "MicroMips" in {
@@ -405,6 +407,9 @@ let AddedComplexity = 40 in {
def : StoreRegImmPat<SWC1_MM, f32>, ISA_MICROMIPS;
}
+def : MipsPat<(MipsMTC1_D64 GPR32Opnd:$src),
+ (MTC1_D64_MM GPR32Opnd:$src)>, ISA_MICROMIPS, FGR_64;
+
def : MipsPat<(f32 fpimm0), (MTC1_MM ZERO)>, ISA_MICROMIPS32_NOT_MIPS32R6;
def : MipsPat<(f32 fpimm0neg), (FNEG_S_MM (MTC1_MM ZERO))>,
ISA_MICROMIPS32_NOT_MIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index a9c53e08b810..2a4cc279ef0d 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -933,6 +933,17 @@ class SDBBP_FM_MM : MMArch {
let Inst{5-0} = 0x3c;
}
+class SIGRIE_FM_MM : MMArch {
+ bits<16> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-22} = 0x0;
+ let Inst{21-6} = code_;
+ let Inst{5-0} = 0b111111;
+}
+
class RDHWR_FM_MM : MMArch {
bits<5> rt;
bits<5> rd;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index ebadb59a0432..af380a0ec71e 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -231,27 +231,14 @@ class StoreLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
bit mayStore = 1;
}
-/// A register pair used by movep instruction.
-def MovePRegPairAsmOperand : AsmOperandClass {
- let Name = "MovePRegPair";
- let ParserMethod = "parseMovePRegPair";
- let PredicateMethod = "isMovePRegPair";
-}
-
-def movep_regpair : Operand<i32> {
- let EncoderMethod = "getMovePRegPairOpValue";
- let ParserMatchClass = MovePRegPairAsmOperand;
- let PrintMethod = "printRegisterList";
- let DecoderMethod = "DecodeMovePRegPair";
- let MIOperandInfo = (ops ptr_rc, ptr_rc);
-}
-
-class MovePMM16<string opstr, RegisterOperand RO> :
-MicroMipsInst16<(outs movep_regpair:$dst_regs), (ins RO:$rs, RO:$rt),
- !strconcat(opstr, "\t$dst_regs, $rs, $rt"), [],
+class MovePMM16<string opstr, RegisterOperand RO1, RegisterOperand RO2,
+ RegisterOperand RO3> :
+MicroMipsInst16<(outs RO1:$rd1, RO2:$rd2), (ins RO3:$rs, RO3:$rt),
+ !strconcat(opstr, "\t$rd1, $rd2, $rs, $rt"), [],
NoItinerary, FrmR> {
let isReMaterializable = 1;
let isMoveReg = 1;
+ let DecoderMethod = "DecodeMovePOperands";
}
class StorePairMM<string opstr, ComplexPattern Addr = addr>
@@ -682,8 +669,9 @@ def MFLO16_MM : MoveFromHILOMM<"mflo16", GPR32Opnd, AC0>,
MFHILO_FM_MM16<0x12>, ISA_MICROMIPS32_NOT_MIPS32R6;
def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>,
ISA_MICROMIPS32_NOT_MIPS32R6;
-def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16,
- ISA_MICROMIPS32_NOT_MIPS32R6;
+def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMovePPairFirst,
+ GPRMM16OpndMovePPairSecond, GPRMM16OpndMoveP>,
+ MOVEP_FM_MM16, ISA_MICROMIPS32_NOT_MIPS32R6;
def LI16_MM : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, LI_FM_MM16,
IsAsCheapAsAMove, ISA_MICROMIPS32_NOT_MIPS32R6;
def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>,
@@ -1116,6 +1104,27 @@ let DecoderNamespace = "MicroMips" in {
ISA_MICROMIPS32_NOT_MIPS32R6;
}
+let AdditionalPredicates = [NotDSP] in {
+ def PseudoMULT_MM : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMULTu_MM : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMFHI_MM : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMFLO_MM : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMTLOHI_MM : PseudoMTLOHI<ACC64, GPR32>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMADD_MM : MAddSubPseudo<MADD, MipsMAdd, II_MADD>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMADDU_MM : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMSUB_MM : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMSUBU_MM : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+}
+
def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;
def TAILCALLREG_MM : TailCallReg<JRC16_MM, GPR32Opnd>,
@@ -1262,6 +1271,8 @@ let AddedComplexity = 40 in
def : MipsPat<(bswap GPR32:$rt), (ROTR_MM (WSBH_MM GPR32:$rt), 16)>,
ISA_MICROMIPS;
+def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL_MM texternalsym:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6;
def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
(TAILCALL_MM tglobaladdr:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6;
def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp b/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
index 568cdfb5b110..f9062cc23da2 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -31,13 +31,14 @@ namespace {
/// Order of operands to transfer
// TODO: Will be extended when additional optimizations are added
enum OperandTransfer {
- OT_NA, ///< Not applicable
- OT_OperandsAll, ///< Transfer all operands
- OT_Operands02, ///< Transfer operands 0 and 2
- OT_Operand2, ///< Transfer just operand 2
- OT_OperandsXOR, ///< Transfer operands for XOR16
- OT_OperandsLwp, ///< Transfer operands for LWP
- OT_OperandsSwp, ///< Transfer operands for SWP
+ OT_NA, ///< Not applicable
+ OT_OperandsAll, ///< Transfer all operands
+ OT_Operands02, ///< Transfer operands 0 and 2
+ OT_Operand2, ///< Transfer just operand 2
+ OT_OperandsXOR, ///< Transfer operands for XOR16
+ OT_OperandsLwp, ///< Transfer operands for LWP
+ OT_OperandsSwp, ///< Transfer operands for SWP
+ OT_OperandsMovep, ///< Transfer operands for MOVEP
};
/// Reduction type
@@ -170,6 +171,10 @@ private:
// returns true on success.
static bool ReduceSXtoSX16(ReduceEntryFunArgs *Arguments);
+ // Attempts to reduce two MOVE instructions into MOVEP instruction,
+ // returns true on success.
+ static bool ReduceMoveToMovep(ReduceEntryFunArgs *Arguments);
+
// Attempts to reduce arithmetic instructions, returns true on success.
static bool ReduceArithmeticInstructions(ReduceEntryFunArgs *Arguments);
@@ -243,6 +248,8 @@ ReduceEntryVector MicroMipsSizeReduce::ReduceTable = {
OpInfo(OT_OperandsLwp), ImmField(0, -2048, 2048, 2)},
{RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP,
OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+ {RT_TwoInstr, OpCodes(Mips::MOVE16_MM, Mips::MOVEP_MM), ReduceMoveToMovep,
+ OpInfo(OT_OperandsMovep), ImmField(0, 0, 0, -1)},
{RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16,
OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)},
{RT_OneInstr, OpCodes(Mips::SB_MM, Mips::SB16_MM), ReduceSXtoSX16,
@@ -562,6 +569,89 @@ bool MicroMipsSizeReduce::ReduceSXtoSX16(ReduceEntryFunArgs *Arguments) {
return ReplaceInstruction(MI, Entry);
}
+// Returns true if Reg can be a source register
+// of MOVEP instruction
+static bool IsMovepSrcRegister(unsigned Reg) {
+
+ if (Reg == Mips::ZERO || Reg == Mips::V0 || Reg == Mips::V1 ||
+ Reg == Mips::S0 || Reg == Mips::S1 || Reg == Mips::S2 ||
+ Reg == Mips::S3 || Reg == Mips::S4)
+ return true;
+
+ return false;
+}
+
+// Returns true if Reg can be a destination register
+// of MOVEP instruction
+static bool IsMovepDestinationReg(unsigned Reg) {
+
+ if (Reg == Mips::A0 || Reg == Mips::A1 || Reg == Mips::A2 ||
+ Reg == Mips::A3 || Reg == Mips::S5 || Reg == Mips::S6)
+ return true;
+
+ return false;
+}
+
+// Returns true if the registers can be a pair of destination
+// registers in MOVEP instruction
+static bool IsMovepDestinationRegPair(unsigned R0, unsigned R1) {
+
+ if ((R0 == Mips::A0 && R1 == Mips::S5) ||
+ (R0 == Mips::A0 && R1 == Mips::S6) ||
+ (R0 == Mips::A0 && R1 == Mips::A1) ||
+ (R0 == Mips::A0 && R1 == Mips::A2) ||
+ (R0 == Mips::A0 && R1 == Mips::A3) ||
+ (R0 == Mips::A1 && R1 == Mips::A2) ||
+ (R0 == Mips::A1 && R1 == Mips::A3) ||
+ (R0 == Mips::A2 && R1 == Mips::A3))
+ return true;
+
+ return false;
+}
+
+bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) {
+
+ const ReduceEntry &Entry = Arguments->Entry;
+ MachineBasicBlock::instr_iterator &NextMII = Arguments->NextMII;
+ const MachineBasicBlock::instr_iterator &E =
+ Arguments->MI->getParent()->instr_end();
+
+ if (NextMII == E)
+ return false;
+
+ MachineInstr *MI1 = Arguments->MI;
+ MachineInstr *MI2 = &*NextMII;
+
+ unsigned RegDstMI1 = MI1->getOperand(0).getReg();
+ unsigned RegSrcMI1 = MI1->getOperand(1).getReg();
+
+ if (!IsMovepSrcRegister(RegSrcMI1))
+ return false;
+
+ if (!IsMovepDestinationReg(RegDstMI1))
+ return false;
+
+ if (MI2->getOpcode() != Entry.WideOpc())
+ return false;
+
+ unsigned RegDstMI2 = MI2->getOperand(0).getReg();
+ unsigned RegSrcMI2 = MI2->getOperand(1).getReg();
+
+ if (!IsMovepSrcRegister(RegSrcMI2))
+ return false;
+
+ bool ConsecutiveForward;
+ if (IsMovepDestinationRegPair(RegDstMI1, RegDstMI2)) {
+ ConsecutiveForward = true;
+ } else if (IsMovepDestinationRegPair(RegDstMI2, RegDstMI1)) {
+ ConsecutiveForward = false;
+ } else
+ return false;
+
+ NextMII = std::next(NextMII);
+ return ReplaceInstruction(MI1, Entry, MI2, ConsecutiveForward);
+}
+
bool MicroMipsSizeReduce::ReduceXORtoXOR16(ReduceEntryFunArgs *Arguments) {
MachineInstr *MI = Arguments->MI;
@@ -641,18 +731,25 @@ bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI,
}
break;
}
+ case OT_OperandsMovep:
case OT_OperandsLwp:
case OT_OperandsSwp: {
if (ConsecutiveForward) {
MIB.add(MI->getOperand(0));
MIB.add(MI2->getOperand(0));
MIB.add(MI->getOperand(1));
- MIB.add(MI->getOperand(2));
+ if (OpTransfer == OT_OperandsMovep)
+ MIB.add(MI2->getOperand(1));
+ else
+ MIB.add(MI->getOperand(2));
} else { // consecutive backward
MIB.add(MI2->getOperand(0));
MIB.add(MI->getOperand(0));
MIB.add(MI2->getOperand(1));
- MIB.add(MI2->getOperand(2));
+ if (OpTransfer == OT_OperandsMovep)
+ MIB.add(MI->getOperand(1));
+ else
+ MIB.add(MI2->getOperand(2));
}
LLVM_DEBUG(dbgs() << "and converting 32-bit: " << *MI2
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
index ef3a807c7648..6bb7aecc867a 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.h
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -38,6 +38,7 @@ namespace llvm {
FunctionPass *createMipsConstantIslandPass();
FunctionPass *createMicroMipsSizeReducePass();
FunctionPass *createMipsExpandPseudoPass();
+ FunctionPass *createMipsPreLegalizeCombiner();
InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
MipsSubtarget &,
@@ -46,6 +47,7 @@ namespace llvm {
void initializeMipsDelaySlotFillerPass(PassRegistry &);
void initializeMipsBranchExpansionPass(PassRegistry &);
void initializeMicroMipsSizeReducePass(PassRegistry &);
+ void initializeMipsPreLegalizerCombinerPass(PassRegistry&);
} // end namespace llvm;
#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index c310d9491af8..f237bb6d4006 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -74,16 +74,18 @@ static FPReturnVariant whichFPReturnVariant(Type *T) {
return FRet;
case Type::DoubleTyID:
return DRet;
- case Type::StructTyID:
- if (T->getStructNumElements() != 2)
+ case Type::StructTyID: {
+ StructType *ST = cast<StructType>(T);
+ if (ST->getNumElements() != 2)
break;
- if ((T->getContainedType(0)->isFloatTy()) &&
- (T->getContainedType(1)->isFloatTy()))
+ if ((ST->getElementType(0)->isFloatTy()) &&
+ (ST->getElementType(1)->isFloatTy()))
return CFRet;
- if ((T->getContainedType(0)->isDoubleTy()) &&
- (T->getContainedType(1)->isDoubleTy()))
+ if ((ST->getElementType(0)->isDoubleTy()) &&
+ (ST->getElementType(1)->isDoubleTy()))
return CDRet;
break;
+ }
default:
break;
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 8ce47e3f669d..79df622241a0 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -386,27 +386,22 @@ const char* Mips16TargetLowering::
}
else if (RetTy ->isDoubleTy()) {
result = dfMips16Helper[stubNum];
- }
- else if (RetTy->isStructTy()) {
+ } else if (StructType *SRetTy = dyn_cast<StructType>(RetTy)) {
// check if it's complex
- if (RetTy->getNumContainedTypes() == 2) {
- if ((RetTy->getContainedType(0)->isFloatTy()) &&
- (RetTy->getContainedType(1)->isFloatTy())) {
+ if (SRetTy->getNumElements() == 2) {
+ if ((SRetTy->getElementType(0)->isFloatTy()) &&
+ (SRetTy->getElementType(1)->isFloatTy())) {
result = scMips16Helper[stubNum];
- }
- else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
- (RetTy->getContainedType(1)->isDoubleTy())) {
+ } else if ((SRetTy->getElementType(0)->isDoubleTy()) &&
+ (SRetTy->getElementType(1)->isDoubleTy())) {
result = dcMips16Helper[stubNum];
- }
- else {
+ } else {
llvm_unreachable("Uncovered condition");
}
- }
- else {
+ } else {
llvm_unreachable("Uncovered condition");
}
- }
- else {
+ } else {
if (stubNum == 0) {
needHelper = false;
return "";
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 219f1ad33586..efebc99b5dae 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -97,9 +97,9 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool Mips16InstrInfo::isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+bool Mips16InstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Src,
+ const MachineOperand *&Dest) const {
if (MI.isMoveReg()) {
Dest = &MI.getOperand(0);
Src = &MI.getOperand(1);
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
index 8190be6187ea..6a802e4cce5d 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -53,9 +53,6 @@ public:
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
- const MachineOperand *&Dest) const override;
-
void storeRegToStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -105,6 +102,14 @@ public:
void BuildAddiuSpImm
(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
+
+protected:
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
private:
unsigned getAnalyzableBrOpc(unsigned Opc) const override;
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
index e1d08cad88b7..623af570a5e6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -87,6 +87,7 @@ def OPCODE5_BC1NEZ : OPCODE5<0b01101>;
def OPCODE5_BC2EQZ : OPCODE5<0b01001>;
def OPCODE5_BC2NEZ : OPCODE5<0b01101>;
def OPCODE5_BGEZAL : OPCODE5<0b10001>;
+def OPCODE5_SIGRIE : OPCODE5<0b10111>;
// The next four constants are unnamed in the spec. These names are taken from
// the OPGROUP names they are used with.
def OPCODE5_LDC2 : OPCODE5<0b01110>;
@@ -602,3 +603,12 @@ class SPECIAL3_GINV<bits<2> ginv> : MipsR6Inst {
let Inst{7-6} = ginv;
let Inst{5-0} = 0b111101;
}
+
+class SIGRIE_FM : MipsR6Inst {
+ bits<16> code_;
+
+ let Inst{31-26} = OPGROUP_REGIMM.Value;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = OPCODE5_SIGRIE.Value;
+ let Inst{15-0} = code_;
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index d86fc3f658ae..2bd0cf2d59a6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -200,6 +200,8 @@ class CRC32CW_ENC : SPECIAL3_2R_SZ_CRC<2,1>;
class GINVI_ENC : SPECIAL3_GINV<0>;
class GINVT_ENC : SPECIAL3_GINV<2>;
+class SIGRIE_ENC : SIGRIE_FM;
+
//===----------------------------------------------------------------------===//
//
// Instruction Multiclasses
@@ -846,6 +848,14 @@ class GINVI_DESC : GINV_DESC_BASE<"ginvi", GPR32Opnd, II_GINVI> {
}
class GINVT_DESC : GINV_DESC_BASE<"ginvt", GPR32Opnd, II_GINVT>;
+class SIGRIE_DESC {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins uimm16:$code_);
+ string AsmString = "sigrie\t$code_";
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_SIGRIE;
+}
+
//===----------------------------------------------------------------------===//
//
// Instruction Definitions
@@ -961,6 +971,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
def SEL_S : R6MMR6Rel, SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
+ def SIGRIE : SIGRIE_ENC, SIGRIE_DESC, ISA_MIPS32R6;
}
let AdditionalPredicates = [NotInMicroMips] in {
@@ -988,6 +999,7 @@ def : MipsInstAlias<"evp", (EVP ZERO), 0>, ISA_MIPS32R6;
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
+def : MipsInstAlias<"sigrie", (SIGRIE 0)>, ISA_MIPS32R6;
def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6, GPR_32;
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index b5317bec70c4..5729182deafb 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -416,6 +416,13 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
// long branches. See the comment in file MipsLongBranch.cpp for detailed
// explanation.
+// Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_LUi2Op_64 : PseudoSE<(outs GPR64Opnd:$dst),
+ (ins brtarget:$tgt), []>, GPR_64;
+// Expands to: addiu $dst, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_DADDiu2Op : PseudoSE<(outs GPR64Opnd:$dst),
+ (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64;
+
// Expands to: daddiu $dst, $src, %PART($tgt - $baltgt)
// where %PART may be %hi or %lo, depending on the relocation kind
// that $tgt is annotated with.
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 2e0c25de2bc8..362431fd42a6 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -561,6 +561,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
O << '$' << MipsInstPrinter::getRegisterName(Reg);
return false;
}
+ break;
}
case 'w':
// Print MSA registers for the 'f' constraint
@@ -1203,18 +1204,23 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// Emit .dtprelword or .dtpreldword directive
// and value for debug thread local expression.
-void MipsAsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
- unsigned Size) const {
- switch (Size) {
- case 4:
- OutStreamer->EmitDTPRel32Value(Value);
- break;
- case 8:
- OutStreamer->EmitDTPRel64Value(Value);
- break;
- default:
- llvm_unreachable("Unexpected size of expression value.");
+void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
+ if (auto *MipsExpr = dyn_cast<MipsMCExpr>(Value)) {
+ if (MipsExpr && MipsExpr->getKind() == MipsMCExpr::MEK_DTPREL) {
+ switch (Size) {
+ case 4:
+ OutStreamer->EmitDTPRel32Value(MipsExpr->getSubExpr());
+ break;
+ case 8:
+ OutStreamer->EmitDTPRel64Value(MipsExpr->getSubExpr());
+ break;
+ default:
+ llvm_unreachable("Unexpected size of expression value.");
+ }
+ return;
+ }
}
+ AsmPrinter::EmitDebugValue(Value, Size);
}
// Align all targets of indirect branches on bundle size. Used only if target
@@ -1240,8 +1246,12 @@ void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const {
return (Opcode == Mips::LONG_BRANCH_LUi
+ || Opcode == Mips::LONG_BRANCH_LUi2Op
+ || Opcode == Mips::LONG_BRANCH_LUi2Op_64
|| Opcode == Mips::LONG_BRANCH_ADDiu
- || Opcode == Mips::LONG_BRANCH_DADDiu);
+ || Opcode == Mips::LONG_BRANCH_ADDiu2Op
+ || Opcode == Mips::LONG_BRANCH_DADDiu
+ || Opcode == Mips::LONG_BRANCH_DADDiu2Op);
}
// Force static initialization.
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 999b6f896bae..eb58234e3e77 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -160,7 +160,7 @@ public:
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- void EmitDebugThreadLocal(const MCExpr *Value, unsigned Size) const override;
+ void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
index af936e6fc96b..e59267c4fd9b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -128,6 +128,7 @@ struct MBBInfo {
uint64_t Size = 0;
bool HasLongBranch = false;
MachineInstr *Br = nullptr;
+ uint64_t Offset = 0;
MBBInfo() = default;
};
@@ -154,8 +155,11 @@ private:
void splitMBB(MachineBasicBlock *MBB);
void initMBBInfo();
int64_t computeOffset(const MachineInstr *Br);
+ uint64_t computeOffsetFromTheBeginning(int MBB);
void replaceBranch(MachineBasicBlock &MBB, Iter Br, const DebugLoc &DL,
MachineBasicBlock *MBBOpnd);
+ bool buildProperJumpMI(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Pos, DebugLoc DL);
void expandToLongBranch(MBBInfo &Info);
bool handleForbiddenSlot();
bool handlePossibleLongBranch();
@@ -167,7 +171,6 @@ private:
SmallVector<MBBInfo, 16> MBBInfos;
bool IsPIC;
MipsABIInfo ABI;
- unsigned LongBranchSeqSize;
bool ForceLongBranchFirstPass = false;
};
@@ -176,7 +179,7 @@ private:
char MipsBranchExpansion::ID = 0;
INITIALIZE_PASS(MipsBranchExpansion, DEBUG_TYPE,
- "Expand out of range branch instructions and prevent forbidden"
+ "Expand out of range branch instructions and fix forbidden"
" slot hazards",
false, false)
@@ -268,7 +271,8 @@ void MipsBranchExpansion::splitMBB(MachineBasicBlock *MBB) {
// Insert NewMBB and fix control flow.
MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
NewMBB->transferSuccessors(MBB);
- NewMBB->removeSuccessor(Tgt, true);
+ if (Tgt != getTargetMBB(*LastBr))
+ NewMBB->removeSuccessor(Tgt, true);
MBB->addSuccessor(NewMBB);
MBB->addSuccessor(Tgt);
MFp->insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
@@ -294,14 +298,6 @@ void MipsBranchExpansion::initMBBInfo() {
for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin();
MI != MBB->instr_end(); ++MI)
MBBInfos[I].Size += TII->getInstSizeInBytes(*MI);
-
- // Search for MBB's branch instruction.
- ReverseIter End = MBB->rend();
- ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
-
- if ((Br != End) && !Br->isIndirectBranch() &&
- (Br->isConditionalBranch() || (Br->isUnconditionalBranch() && IsPIC)))
- MBBInfos[I].Br = &*Br;
}
}
@@ -326,6 +322,14 @@ int64_t MipsBranchExpansion::computeOffset(const MachineInstr *Br) {
return -Offset + 4;
}
+// Returns the distance in bytes up until MBB
+uint64_t MipsBranchExpansion::computeOffsetFromTheBeginning(int MBB) {
+ uint64_t Offset = 0;
+ for (int N = 0; N < MBB; ++N)
+ Offset += MBBInfos[N].Size;
+ return Offset;
+}
+
// Replace Br with a branch which has the opposite condition code and a
// MachineBasicBlock operand MBBOpnd.
void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
@@ -359,6 +363,35 @@ void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
Br->eraseFromParent();
}
+bool MipsBranchExpansion::buildProperJumpMI(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Pos,
+ DebugLoc DL) {
+ bool HasR6 = ABI.IsN64() ? STI->hasMips64r6() : STI->hasMips32r6();
+ bool AddImm = HasR6 && !STI->useIndirectJumpsHazard();
+
+ unsigned JR = ABI.IsN64() ? Mips::JR64 : Mips::JR;
+ unsigned JIC = ABI.IsN64() ? Mips::JIC64 : Mips::JIC;
+ unsigned JR_HB = ABI.IsN64() ? Mips::JR_HB64 : Mips::JR_HB;
+ unsigned JR_HB_R6 = ABI.IsN64() ? Mips::JR_HB64_R6 : Mips::JR_HB_R6;
+
+ unsigned JumpOp;
+ if (STI->useIndirectJumpsHazard())
+ JumpOp = HasR6 ? JR_HB_R6 : JR_HB;
+ else
+ JumpOp = HasR6 ? JIC : JR;
+
+ if (JumpOp == Mips::JIC && STI->inMicroMipsMode())
+ JumpOp = Mips::JIC_MMR6;
+
+ unsigned ATReg = ABI.IsN64() ? Mips::AT_64 : Mips::AT;
+ MachineInstrBuilder Instr =
+ BuildMI(*MBB, Pos, DL, TII->get(JumpOp)).addReg(ATReg);
+ if (AddImm)
+ Instr.addImm(0);
+
+ return !AddImm;
+}
+
// Expand branch instructions to long branches.
// TODO: This function has to be fixed for beqz16 and bnez16, because it
// currently assumes that all branches have 16-bit offsets, and will produce
@@ -479,33 +512,21 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
// In NaCl, modifying the sp is not allowed in branch delay slot.
// For MIPS32R6, we can skip using a delay slot branch.
- if (STI->isTargetNaCl() ||
- (STI->hasMips32r6() && !STI->useIndirectJumpsHazard()))
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
+ bool hasDelaySlot = buildProperJumpMI(BalTgtMBB, Pos, DL);
+
+ if (STI->isTargetNaCl() || !hasDelaySlot) {
+ BuildMI(*BalTgtMBB, std::prev(Pos), DL, TII->get(Mips::ADDiu), Mips::SP)
.addReg(Mips::SP)
.addImm(8);
-
- if (STI->hasMips32r6() && !STI->useIndirectJumpsHazard()) {
- const unsigned JICOp =
- STI->inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC;
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp))
- .addReg(Mips::AT)
- .addImm(0);
-
- } else {
- unsigned JROp =
- STI->useIndirectJumpsHazard()
- ? (STI->hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB)
- : Mips::JR;
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT);
-
+ }
+ if (hasDelaySlot) {
if (STI->isTargetNaCl()) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP));
- } else
+ } else {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
.addReg(Mips::SP)
.addImm(8);
-
+ }
BalTgtMBB->rbegin()->bundleWithPred();
}
} else {
@@ -597,46 +618,94 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP_64)
.addImm(0);
- if (STI->hasMips64r6() && !STI->useIndirectJumpsHazard()) {
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+ bool hasDelaySlot = buildProperJumpMI(BalTgtMBB, Pos, DL);
+ // If there is no delay slot, Insert stack adjustment before
+ if (!hasDelaySlot) {
+ BuildMI(*BalTgtMBB, std::prev(Pos), DL, TII->get(Mips::DADDiu),
+ Mips::SP_64)
.addReg(Mips::SP_64)
.addImm(16);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JIC64))
- .addReg(Mips::AT_64)
- .addImm(0);
} else {
- unsigned JROp =
- STI->useIndirectJumpsHazard()
- ? (STI->hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64)
- : Mips::JR64;
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
.addReg(Mips::SP_64)
.addImm(16);
BalTgtMBB->rbegin()->bundleWithPred();
}
}
-
- assert(LongBrMBB->size() + BalTgtMBB->size() == LongBranchSeqSize);
- } else {
- // Pre R6: R6:
- // $longbr: $longbr:
- // j $tgt bc $tgt
- // nop $fallthrough
- // $fallthrough:
- //
+ } else { // Not PIC
Pos = LongBrMBB->begin();
LongBrMBB->addSuccessor(TgtMBB);
- if (STI->hasMips32r6())
+
+ // Compute the position of the potentiall jump instruction (basic blocks
+ // before + 4 for the instruction)
+ uint64_t JOffset = computeOffsetFromTheBeginning(MBB->getNumber()) +
+ MBBInfos[MBB->getNumber()].Size + 4;
+ uint64_t TgtMBBOffset = computeOffsetFromTheBeginning(TgtMBB->getNumber());
+ // If it's a forward jump, then TgtMBBOffset will be shifted by two
+ // instructions
+ if (JOffset < TgtMBBOffset)
+ TgtMBBOffset += 2 * 4;
+ // Compare 4 upper bits to check if it's the same segment
+ bool SameSegmentJump = JOffset >> 28 == TgtMBBOffset >> 28;
+
+ if (STI->hasMips32r6() && TII->isBranchOffsetInRange(Mips::BC, I.Offset)) {
+ // R6:
+ // $longbr:
+ // bc $tgt
+ // $fallthrough:
+ //
BuildMI(*LongBrMBB, Pos, DL,
TII->get(STI->inMicroMipsMode() ? Mips::BC_MMR6 : Mips::BC))
.addMBB(TgtMBB);
- else
+ } else if (SameSegmentJump) {
+ // Pre R6:
+ // $longbr:
+ // j $tgt
+ // nop
+ // $fallthrough:
+ //
MIBundleBuilder(*LongBrMBB, Pos)
.append(BuildMI(*MFp, DL, TII->get(Mips::J)).addMBB(TgtMBB))
.append(BuildMI(*MFp, DL, TII->get(Mips::NOP)));
-
- assert(LongBrMBB->size() == LongBranchSeqSize);
+ } else {
+ // At this point, offset where we need to branch does not fit into
+ // immediate field of the branch instruction and is not in the same
+ // segment as jump instruction. Therefore we will break it into couple
+ // instructions, where we first load the offset into register, and then we
+ // do branch register.
+ if (ABI.IsN64()) {
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi2Op_64),
+ Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_HIGHEST);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+ Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_HIGHER);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addImm(16);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+ Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_ABS_HI);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addImm(16);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+ Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_ABS_LO);
+ } else {
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi2Op),
+ Mips::AT)
+ .addMBB(TgtMBB, MipsII::MO_ABS_HI);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_ADDiu2Op),
+ Mips::AT)
+ .addReg(Mips::AT)
+ .addMBB(TgtMBB, MipsII::MO_ABS_LO);
+ }
+ buildProperJumpMI(LongBrMBB, Pos, DL);
+ }
}
if (I.Br->isUnconditionalBranch()) {
@@ -666,8 +735,6 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
if (!STI->hasMips32r6() || STI->inMicroMipsMode())
return false;
- const MipsInstrInfo *TII = STI->getInstrInfo();
-
bool Changed = false;
for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) {
@@ -704,66 +771,65 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
}
bool MipsBranchExpansion::handlePossibleLongBranch() {
-
- LongBranchSeqSize = IsPIC ? ((ABI.IsN64() || STI->isTargetNaCl()) ? 10 : 9)
- : (STI->hasMips32r6() ? 1 : 2);
-
if (STI->inMips16Mode() || !STI->enableLongBranchPass())
return false;
if (SkipLongBranch)
return false;
- initMBBInfo();
-
- SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end();
bool EverMadeChange = false, MadeChange = true;
while (MadeChange) {
MadeChange = false;
- for (I = MBBInfos.begin(); I != E; ++I) {
- // Skip if this MBB doesn't have a branch or the branch has already been
- // converted to a long branch.
- if (!I->Br || I->HasLongBranch)
- continue;
+ initMBBInfo();
- int64_t Offset = computeOffset(I->Br);
+ for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
+ MachineBasicBlock *MBB = MFp->getBlockNumbered(I);
+ // Search for MBB's branch instruction.
+ ReverseIter End = MBB->rend();
+ ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
- if (STI->isTargetNaCl()) {
- // The offset calculation does not include sandboxing instructions
- // that will be added later in the MC layer. Since at this point we
- // don't know the exact amount of code that "sandboxing" will add, we
- // conservatively estimate that code will not grow more than 100%.
- Offset *= 2;
- }
+ if ((Br != End) && Br->isBranch() && !Br->isIndirectBranch() &&
+ (Br->isConditionalBranch() ||
+ (Br->isUnconditionalBranch() && IsPIC))) {
+ int64_t Offset = computeOffset(&*Br);
- // Check if offset fits into the immediate field of the branch.
- if (!ForceLongBranchFirstPass &&
- TII->isBranchOffsetInRange(I->Br->getOpcode(), Offset))
- continue;
+ if (STI->isTargetNaCl()) {
+ // The offset calculation does not include sandboxing instructions
+ // that will be added later in the MC layer. Since at this point we
+ // don't know the exact amount of code that "sandboxing" will add, we
+ // conservatively estimate that code will not grow more than 100%.
+ Offset *= 2;
+ }
- I->HasLongBranch = true;
- I->Size += LongBranchSeqSize * 4;
- ++LongBranches;
- EverMadeChange = MadeChange = true;
- }
- }
+ if (ForceLongBranchFirstPass ||
+ !TII->isBranchOffsetInRange(Br->getOpcode(), Offset)) {
+ MBBInfos[I].Offset = Offset;
+ MBBInfos[I].Br = &*Br;
+ }
+ }
+ } // End for
- ForceLongBranchFirstPass = false;
+ ForceLongBranchFirstPass = false;
- if (!EverMadeChange)
- return false;
+ SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end();
+
+ for (I = MBBInfos.begin(); I != E; ++I) {
+ // Skip if this MBB doesn't have a branch or the branch has already been
+ // converted to a long branch.
+ if (!I->Br)
+ continue;
- // Do the expansion.
- for (I = MBBInfos.begin(); I != E; ++I)
- if (I->HasLongBranch) {
expandToLongBranch(*I);
+ ++LongBranches;
+ EverMadeChange = MadeChange = true;
}
- MFp->RenumberBlocks();
+ MFp->RenumberBlocks();
+ }
- return true;
+ return EverMadeChange;
}
bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
index 81a1cced93b7..90cb3f437bd5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -24,10 +24,10 @@ static bool isF128SoftLibCall(const char *CallSym) {
"__lttf2", "__multf3", "__netf2", "__powitf2",
"__subtf3", "__trunctfdf2", "__trunctfsf2", "__unordtf2",
"ceill", "copysignl", "cosl", "exp2l",
- "expl", "floorl", "fmal", "fmodl",
- "log10l", "log2l", "logl", "nearbyintl",
- "powl", "rintl", "roundl", "sinl",
- "sqrtl", "truncl"};
+ "expl", "floorl", "fmal", "fmaxl",
+ "fmodl", "log10l", "log2l", "logl",
+ "nearbyintl", "powl", "rintl", "roundl",
+ "sinl", "sqrtl", "truncl"};
// Check that LibCalls is sorted alphabetically.
auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
index a705ebb6b193..c550fadf6632 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -16,6 +16,7 @@
#include "MipsCallLowering.h"
#include "MipsCCState.h"
#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
using namespace llvm;
@@ -23,48 +24,89 @@ using namespace llvm;
MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI)
: CallLowering(&TLI) {}
-bool MipsCallLowering::MipsHandler::assign(const CCValAssign &VA,
- unsigned vreg) {
+bool MipsCallLowering::MipsHandler::assign(unsigned VReg,
+ const CCValAssign &VA) {
if (VA.isRegLoc()) {
- assignValueToReg(vreg, VA.getLocReg());
+ assignValueToReg(VReg, VA);
} else if (VA.isMemLoc()) {
- unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
- unsigned Offset = VA.getLocMemOffset();
- MachinePointerInfo MPO;
- unsigned StackAddr = getStackAddress(Size, Offset, MPO);
- assignValueToAddress(vreg, StackAddr, Size, MPO);
+ assignValueToAddress(VReg, VA);
} else {
return false;
}
return true;
}
+bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef<unsigned> VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex) {
+ for (unsigned i = 0; i < VRegs.size(); ++i)
+ if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i]))
+ return false;
+ return true;
+}
+
+void MipsCallLowering::MipsHandler::setLeastSignificantFirst(
+ SmallVectorImpl<unsigned> &VRegs) {
+ if (!MIRBuilder.getMF().getDataLayout().isLittleEndian())
+ std::reverse(VRegs.begin(), VRegs.end());
+}
+
+bool MipsCallLowering::MipsHandler::handle(
+ ArrayRef<CCValAssign> ArgLocs, ArrayRef<CallLowering::ArgInfo> Args) {
+ SmallVector<unsigned, 4> VRegs;
+ unsigned SplitLength;
+ const Function &F = MIRBuilder.getMF().getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const MipsTargetLowering &TLI = *static_cast<const MipsTargetLowering *>(
+ MIRBuilder.getMF().getSubtarget().getTargetLowering());
+
+ for (unsigned ArgsIndex = 0, ArgLocsIndex = 0; ArgsIndex < Args.size();
+ ++ArgsIndex, ArgLocsIndex += SplitLength) {
+ EVT VT = TLI.getValueType(DL, Args[ArgsIndex].Ty);
+ SplitLength = TLI.getNumRegistersForCallingConv(F.getContext(),
+ F.getCallingConv(), VT);
+ if (SplitLength > 1) {
+ VRegs.clear();
+ MVT RegisterVT = TLI.getRegisterTypeForCallingConv(
+ F.getContext(), F.getCallingConv(), VT);
+ for (unsigned i = 0; i < SplitLength; ++i)
+ VRegs.push_back(MRI.createGenericVirtualRegister(LLT{RegisterVT}));
+
+ if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg))
+ return false;
+ } else {
+ if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex]))
+ return false;
+ }
+ }
+ return true;
+}
+
namespace {
class IncomingValueHandler : public MipsCallLowering::MipsHandler {
public:
IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
: MipsHandler(MIRBuilder, MRI) {}
- bool handle(ArrayRef<CCValAssign> ArgLocs,
- ArrayRef<CallLowering::ArgInfo> Args);
-
private:
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg) override;
+ void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+
+ unsigned getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) override;
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) override;
+ void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
- MachinePointerInfo &MPO) override;
+ bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
+ unsigned ArgsReg) override;
virtual void markPhysRegUsed(unsigned PhysReg) {
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
- void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment,
- MachinePointerInfo &MPO) {
- MachineMemOperand *MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOLoad, Size, Alignment);
+ void buildLoad(unsigned Val, const CCValAssign &VA) {
+ MachineMemOperand *MMO;
+ unsigned Addr = getStackAddress(VA, MMO);
MIRBuilder.buildLoad(Val, Addr, *MMO);
}
};
@@ -86,17 +128,34 @@ private:
} // end anonymous namespace
void IncomingValueHandler::assignValueToReg(unsigned ValVReg,
- unsigned PhysReg) {
- MIRBuilder.buildCopy(ValVReg, PhysReg);
+ const CCValAssign &VA) {
+ unsigned PhysReg = VA.getLocReg();
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::SExt:
+ case CCValAssign::LocInfo::ZExt:
+ case CCValAssign::LocInfo::AExt: {
+ auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ break;
+ }
+ default:
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ break;
+ }
markPhysRegUsed(PhysReg);
}
-unsigned IncomingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) {
+unsigned IncomingValueHandler::getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) {
+ unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+ unsigned Offset = VA.getLocMemOffset();
MachineFrameInfo &MFI = MIRBuilder.getMF().getFrameInfo();
int FI = MFI.CreateFixedObject(Size, Offset, true);
- MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ MachinePointerInfo MPO =
+ MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOLoad,
+ Size, /* Alignment */ 0);
unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
MIRBuilder.buildFrameIndex(AddrReg, FI);
@@ -104,19 +163,26 @@ unsigned IncomingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
return AddrReg;
}
-void IncomingValueHandler::assignValueToAddress(unsigned ValVReg, unsigned Addr,
- uint64_t Size,
- MachinePointerInfo &MPO) {
- // If the value is not extended, a simple load will suffice.
- buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO);
+void IncomingValueHandler::assignValueToAddress(unsigned ValVReg,
+ const CCValAssign &VA) {
+ if (VA.getLocInfo() == CCValAssign::SExt ||
+ VA.getLocInfo() == CCValAssign::ZExt ||
+ VA.getLocInfo() == CCValAssign::AExt) {
+ unsigned LoadReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ buildLoad(LoadReg, VA);
+ MIRBuilder.buildTrunc(ValVReg, LoadReg);
+ } else
+ buildLoad(ValVReg, VA);
}
-bool IncomingValueHandler::handle(ArrayRef<CCValAssign> ArgLocs,
- ArrayRef<CallLowering::ArgInfo> Args) {
- for (unsigned i = 0, ArgsSize = Args.size(); i < ArgsSize; ++i) {
- if (!assign(ArgLocs[i], Args[i].Reg))
- return false;
- }
+bool IncomingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex,
+ unsigned ArgsReg) {
+ if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+ return false;
+ setLeastSignificantFirst(VRegs);
+ MIRBuilder.buildMerge(ArgsReg, VRegs);
return true;
}
@@ -127,103 +193,179 @@ public:
MachineInstrBuilder &MIB)
: MipsHandler(MIRBuilder, MRI), MIB(MIB) {}
- bool handle(ArrayRef<CCValAssign> ArgLocs,
- ArrayRef<CallLowering::ArgInfo> Args);
-
private:
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg) override;
+ void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+
+ unsigned getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) override;
+
+ void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) override;
+ bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
+ unsigned ArgsReg) override;
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
- MachinePointerInfo &MPO) override;
+ unsigned extendRegister(unsigned ValReg, const CCValAssign &VA);
MachineInstrBuilder &MIB;
};
} // end anonymous namespace
void OutgoingValueHandler::assignValueToReg(unsigned ValVReg,
- unsigned PhysReg) {
- MIRBuilder.buildCopy(PhysReg, ValVReg);
+ const CCValAssign &VA) {
+ unsigned PhysReg = VA.getLocReg();
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
-unsigned OutgoingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) {
+unsigned OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) {
LLT p0 = LLT::pointer(0, 32);
LLT s32 = LLT::scalar(32);
unsigned SPReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildCopy(SPReg, Mips::SP);
unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+ unsigned Offset = VA.getLocMemOffset();
MIRBuilder.buildConstant(OffsetReg, Offset);
unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
- MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ MachinePointerInfo MPO =
+ MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+ MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOStore,
+ Size, /* Alignment */ 0);
+
return AddrReg;
}
-void OutgoingValueHandler::assignValueToAddress(unsigned ValVReg, unsigned Addr,
- uint64_t Size,
- MachinePointerInfo &MPO) {
- MachineMemOperand *MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOStore, Size, /* Alignment */ 0);
- MIRBuilder.buildStore(ValVReg, Addr, *MMO);
+void OutgoingValueHandler::assignValueToAddress(unsigned ValVReg,
+ const CCValAssign &VA) {
+ MachineMemOperand *MMO;
+ unsigned Addr = getStackAddress(VA, MMO);
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
-bool OutgoingValueHandler::handle(ArrayRef<CCValAssign> ArgLocs,
- ArrayRef<CallLowering::ArgInfo> Args) {
- for (unsigned i = 0; i < Args.size(); ++i) {
- if (!assign(ArgLocs[i], Args[i].Reg))
- return false;
+unsigned OutgoingValueHandler::extendRegister(unsigned ValReg,
+ const CCValAssign &VA) {
+ LLT LocTy{VA.getLocVT()};
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt: {
+ unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildSExt(ExtReg, ValReg);
+ return ExtReg;
+ }
+ case CCValAssign::ZExt: {
+ unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildZExt(ExtReg, ValReg);
+ return ExtReg;
}
+ case CCValAssign::AExt: {
+ unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildAnyExt(ExtReg, ValReg);
+ return ExtReg;
+ }
+ // TODO : handle upper extends
+ case CCValAssign::Full:
+ return ValReg;
+ default:
+ break;
+ }
+ llvm_unreachable("unable to extend register");
+}
+
+bool OutgoingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex,
+ unsigned ArgsReg) {
+ MIRBuilder.buildUnmerge(VRegs, ArgsReg);
+ setLeastSignificantFirst(VRegs);
+ if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+ return false;
+
return true;
}
static bool isSupportedType(Type *T) {
- if (T->isIntegerTy() && T->getScalarSizeInBits() == 32)
+ if (T->isIntegerTy())
return true;
if (T->isPointerTy())
return true;
return false;
}
+static CCValAssign::LocInfo determineLocInfo(const MVT RegisterVT, const EVT VT,
+ const ISD::ArgFlagsTy &Flags) {
+ // > does not mean loss of information as type RegisterVT can't hold type VT,
+ // it means that type VT is split into multiple registers of type RegisterVT
+ if (VT.getSizeInBits() >= RegisterVT.getSizeInBits())
+ return CCValAssign::LocInfo::Full;
+ if (Flags.isSExt())
+ return CCValAssign::LocInfo::SExt;
+ if (Flags.isZExt())
+ return CCValAssign::LocInfo::ZExt;
+ return CCValAssign::LocInfo::AExt;
+}
+
+template <typename T>
+static void setLocInfo(SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<T> &Arguments) {
+ for (unsigned i = 0; i < ArgLocs.size(); ++i) {
+ const CCValAssign &VA = ArgLocs[i];
+ CCValAssign::LocInfo LocInfo = determineLocInfo(
+ Arguments[i].VT, Arguments[i].ArgVT, Arguments[i].Flags);
+ if (VA.isMemLoc())
+ ArgLocs[i] =
+ CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
+ VA.getLocMemOffset(), VA.getLocVT(), LocInfo);
+ else
+ ArgLocs[i] = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
+ VA.getLocReg(), VA.getLocVT(), LocInfo);
+ }
+}
+
bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
+ const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA);
- if (Val != nullptr) {
- if (!isSupportedType(Val->getType()))
- return false;
+ if (Val != nullptr && !isSupportedType(Val->getType()))
+ return false;
+ if (!VRegs.empty()) {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = MF.getDataLayout();
const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
+ LLVMContext &Ctx = Val->getType()->getContext();
+
+ SmallVector<EVT, 4> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
SmallVector<ArgInfo, 8> RetInfos;
SmallVector<unsigned, 8> OrigArgIndices;
- ArgInfo ArgRetInfo(VReg, Val->getType());
- setArgFlags(ArgRetInfo, AttributeList::ReturnIndex, DL, F);
- splitToValueTypes(ArgRetInfo, 0, RetInfos, OrigArgIndices);
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+ splitToValueTypes(CurArgInfo, 0, RetInfos, OrigArgIndices);
+ }
SmallVector<ISD::OutputArg, 8> Outs;
- subTargetRegTypeForCallingConv(
- MIRBuilder, RetInfos, OrigArgIndices,
- [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
- unsigned origIdx, unsigned partOffs) {
- Outs.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
- });
+ subTargetRegTypeForCallingConv(F, RetInfos, OrigArgIndices, Outs);
SmallVector<CCValAssign, 16> ArgLocs;
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
F.getContext());
CCInfo.AnalyzeReturn(Outs, TLI.CCAssignFnForReturn());
+ setLocInfo(ArgLocs, Outs);
OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
if (!RetHandler.handle(ArgLocs, RetInfos)) {
@@ -266,12 +408,7 @@ bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
}
SmallVector<ISD::InputArg, 8> Ins;
- subTargetRegTypeForCallingConv(
- MIRBuilder, ArgInfos, OrigArgIndices,
- [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used, unsigned origIdx,
- unsigned partOffs) {
- Ins.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
- });
+ subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Ins);
SmallVector<CCValAssign, 16> ArgLocs;
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
@@ -283,6 +420,7 @@ bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(F.getCallingConv()),
1);
CCInfo.AnalyzeFormalArguments(Ins, TLI.CCAssignFnForCall());
+ setLocInfo(ArgLocs, Ins);
IncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
if (!Handler.handle(ArgLocs, ArgInfos))
@@ -347,12 +485,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
}
SmallVector<ISD::OutputArg, 8> Outs;
- subTargetRegTypeForCallingConv(
- MIRBuilder, ArgInfos, OrigArgIndices,
- [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used, unsigned origIdx,
- unsigned partOffs) {
- Outs.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
- });
+ subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Outs);
SmallVector<CCValAssign, 8> ArgLocs;
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
@@ -361,6 +494,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
const char *Call = Callee.isSymbol() ? Callee.getSymbolName() : nullptr;
CCInfo.AnalyzeCallOperands(Outs, TLI.CCAssignFnForCall(), FuncOrigArgs, Call);
+ setLocInfo(ArgLocs, Outs);
OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), MIB);
if (!RetHandler.handle(ArgLocs, ArgInfos)) {
@@ -383,18 +517,14 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
splitToValueTypes(OrigRet, 0, ArgInfos, OrigRetIndices);
SmallVector<ISD::InputArg, 8> Ins;
- subTargetRegTypeForCallingConv(
- MIRBuilder, ArgInfos, OrigRetIndices,
- [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
- unsigned origIdx, unsigned partOffs) {
- Ins.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
- });
+ subTargetRegTypeForCallingConv(F, ArgInfos, OrigRetIndices, Ins);
SmallVector<CCValAssign, 8> ArgLocs;
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
F.getContext());
CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), OrigRet.Ty, Call);
+ setLocInfo(ArgLocs, Ins);
CallReturnHandler Handler(MIRBuilder, MF.getRegInfo(), MIB);
if (!Handler.handle(ArgLocs, ArgInfos))
@@ -406,11 +536,10 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return true;
}
+template <typename T>
void MipsCallLowering::subTargetRegTypeForCallingConv(
- MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
- ArrayRef<unsigned> OrigArgIndices, const FunTy &PushBack) const {
- MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = MF.getFunction();
+ const Function &F, ArrayRef<ArgInfo> Args,
+ ArrayRef<unsigned> OrigArgIndices, SmallVectorImpl<T> &ISDArgs) const {
const DataLayout &DL = F.getParent()->getDataLayout();
const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
@@ -420,12 +549,20 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
EVT VT = TLI.getValueType(DL, Arg.Ty);
MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(),
F.getCallingConv(), VT);
+ unsigned NumRegs = TLI.getNumRegistersForCallingConv(
+ F.getContext(), F.getCallingConv(), VT);
- ISD::ArgFlagsTy Flags = Arg.Flags;
- Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ ISD::ArgFlagsTy Flags = Arg.Flags;
- PushBack(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo], 0);
+ if (i == 0)
+ Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
+ else
+ Flags.setOrigAlign(1);
+ ISDArgs.emplace_back(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo],
+ 0);
+ }
++ArgNo;
}
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallLowering.h b/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
index e23c10cec563..9916b04ef50c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
@@ -31,27 +31,38 @@ public:
virtual ~MipsHandler() = default;
+ bool handle(ArrayRef<CCValAssign> ArgLocs,
+ ArrayRef<CallLowering::ArgInfo> Args);
+
protected:
- bool assign(const CCValAssign &VA, unsigned vreg);
+ bool assignVRegs(ArrayRef<unsigned> VRegs, ArrayRef<CCValAssign> ArgLocs,
+ unsigned Index);
+
+ void setLeastSignificantFirst(SmallVectorImpl<unsigned> &VRegs);
MachineIRBuilder &MIRBuilder;
MachineRegisterInfo &MRI;
private:
- virtual unsigned getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) = 0;
+ bool assign(unsigned VReg, const CCValAssign &VA);
+
+ virtual unsigned getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) = 0;
- virtual void assignValueToReg(unsigned ValVReg, unsigned PhysReg) = 0;
+ virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) = 0;
- virtual void assignValueToAddress(unsigned ValVReg, unsigned Addr,
- uint64_t Size,
- MachinePointerInfo &MPO) = 0;
+ virtual void assignValueToAddress(unsigned ValVReg,
+ const CCValAssign &VA) = 0;
+
+ virtual bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex, unsigned ArgsReg) = 0;
};
MipsCallLowering(const MipsTargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
@@ -61,21 +72,16 @@ public:
ArrayRef<ArgInfo> OrigArgs) const override;
private:
- using FunTy =
- std::function<void(ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
- unsigned origIdx, unsigned partOffs)>;
-
/// Based on registers available on target machine split or extend
/// type if needed, also change pointer type to appropriate integer
- /// type. Lambda will fill some info so we can tell MipsCCState to
- /// assign physical registers.
- void subTargetRegTypeForCallingConv(MachineIRBuilder &MIRBuilder,
- ArrayRef<ArgInfo> Args,
+ /// type.
+ template <typename T>
+ void subTargetRegTypeForCallingConv(const Function &F, ArrayRef<ArgInfo> Args,
ArrayRef<unsigned> OrigArgIndices,
- const FunTy &PushBack) const;
+ SmallVectorImpl<T> &ISDArgs) const;
/// Split structures and arrays, save original argument indices since
- /// Mips calling conv needs info about original argument type.
+ /// Mips calling convention needs info about original argument type.
void splitToValueTypes(const ArgInfo &OrigArg, unsigned OriginalIndex,
SmallVectorImpl<ArgInfo> &SplitArgs,
SmallVectorImpl<unsigned> &SplitArgsOrigIndices) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsCondMov.td b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
index 39dc2654aa6a..0d7e3e200b5f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCondMov.td
+++ b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
@@ -296,3 +296,13 @@ def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F<GPR64Opnd>;
def PseudoSELECTFP_F_S : SelectFP_Pseudo_F<FGR32Opnd>;
def PseudoSELECTFP_F_D32 : SelectFP_Pseudo_F<AFGR64Opnd>, FGR_32;
def PseudoSELECTFP_F_D64 : SelectFP_Pseudo_F<FGR64Opnd>, FGR_64;
+
+let usesCustomInserter = 1 in {
+class D_SELECT_CLASS<RegisterOperand RC> :
+ PseudoSE<(outs RC:$dst1, RC:$dst2),
+ (ins GPR32Opnd:$cond, RC:$a1, RC:$a2, RC:$b1, RC:$b2), []>,
+ ISA_MIPS1_NOT_4_32;
+}
+
+def PseudoD_SELECT_I : D_SELECT_CLASS<GPR32Opnd>;
+def PseudoD_SELECT_I64 : D_SELECT_CLASS<GPR64Opnd>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 33f03b954a8c..e3823e0dfdb8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -728,9 +728,10 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
(Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
continue;
- // Instructions LWP/SWP should not be in a delay slot as that
+ // Instructions LWP/SWP and MOVEP should not be in a delay slot as that
// results in unpredictable behaviour
- if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM))
+ if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM ||
+ Opcode == Mips::MOVEP_MM))
continue;
Filler = CurrI;
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0677d378a115..8c2a364cdfa9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -1396,6 +1396,9 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case Mips::PseudoSELECTFP_T_D32:
case Mips::PseudoSELECTFP_T_D64:
return emitPseudoSELECT(MI, BB, true, Mips::BC1T);
+ case Mips::PseudoD_SELECT_I:
+ case Mips::PseudoD_SELECT_I64:
+ return emitPseudoD_SELECT(MI, BB);
}
}
@@ -2427,6 +2430,16 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32));
SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
DAG.getConstant(VT.getSizeInBits() - 1, DL, VT));
+
+ if (!(Subtarget.hasMips4() || Subtarget.hasMips32())) {
+ SDVTList VTList = DAG.getVTList(VT, VT);
+ return DAG.getNode(Subtarget.isGP64bit() ? Mips::PseudoD_SELECT_I64
+ : Mips::PseudoD_SELECT_I,
+ DL, VTList, Cond, ShiftRightHi,
+ IsSRA ? Ext : DAG.getConstant(0, DL, VT), Or,
+ ShiftRightHi);
+ }
+
Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi);
@@ -2563,10 +2576,12 @@ static SDValue lowerUnalignedIntStore(StoreSDNode *SD, SelectionDAG &DAG,
}
// Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr).
-static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG) {
+static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG,
+ bool SingleFloat) {
SDValue Val = SD->getValue();
- if (Val.getOpcode() != ISD::FP_TO_SINT)
+ if (Val.getOpcode() != ISD::FP_TO_SINT ||
+ (Val.getValueSizeInBits() > 32 && SingleFloat))
return SDValue();
EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits());
@@ -2587,7 +2602,7 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
((MemVT == MVT::i32) || (MemVT == MVT::i64)))
return lowerUnalignedIntStore(SD, DAG, Subtarget.isLittle());
- return lowerFP_TO_SINT_STORE(SD, DAG);
+ return lowerFP_TO_SINT_STORE(SD, DAG, Subtarget.isSingleFloat());
}
SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
@@ -2603,6 +2618,9 @@ SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
+ if (Op.getValueSizeInBits() > 32 && Subtarget.isSingleFloat())
+ return SDValue();
+
EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
SDValue Trunc = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Op), FPTy,
Op.getOperand(0));
@@ -4340,6 +4358,81 @@ MachineBasicBlock *MipsTargetLowering::emitPseudoSELECT(MachineInstr &MI,
return BB;
}
+MachineBasicBlock *MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ assert(!(Subtarget.hasMips4() || Subtarget.hasMips32()) &&
+ "Subtarget already supports SELECT nodes with the use of"
+ "conditional-move instructions.");
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // D_SELECT substitutes two SELECT nodes that goes one after another and
+ // have the same condition operand. On machines which don't have
+ // conditional-move instruction, it reduces unnecessary branch instructions
+ // which are result of using two diamond patterns that are result of two
+ // SELECT pseudo instructions.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // bne rs, $0, sinkMBB
+ BuildMI(BB, DL, TII->get(Mips::BNE))
+ .addReg(MI.getOperand(2).getReg())
+ .addReg(Mips::ZERO)
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ // Use two PHI nodes to select two reults
+ BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(3).getReg())
+ .addMBB(thisMBB)
+ .addReg(MI.getOperand(5).getReg())
+ .addMBB(copy0MBB);
+ BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(4).getReg())
+ .addMBB(thisMBB)
+ .addReg(MI.getOperand(6).getReg())
+ .addMBB(copy0MBB);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+
+ return BB;
+}
+
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT,
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index 5a0de45c44f3..e043f133a09f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -699,6 +699,8 @@ class TargetRegisterClass;
MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB,
bool isFPCmp, unsigned Opc) const;
+ MachineBasicBlock *emitPseudoD_SELECT(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
};
/// Create MipsTargetLowering objects.
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index e986942ad8fa..4cb8574e08f6 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -130,6 +130,15 @@ class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
HARDFLOAT,
NeverHasSideEffects;
+class CVT_PS_S_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fd), (ins SrcRC:$fs, SrcRC:$ft),
+ !strconcat(opstr, "\t$fd, $fs, $ft"),
+ [(set DstRC:$fd, (OpNode SrcRC:$fs, SrcRC:$ft))], Itin, FrmFR, opstr>,
+ HARDFLOAT {
+ let isCommutable = IsComm;
+}
+
multiclass ABSS_M<string opstr, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> {
def _D32 : MMRel, ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
@@ -432,6 +441,29 @@ let AdditionalPredicates = [NotInMicroMips] in {
def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x21, 20>, ISA_MIPS1, FGR_32;
}
+
+let DecoderNamespace = "MipsFP64" in {
+ let AdditionalPredicates = [NotInMicroMips] in {
+ def PLL_PS64 : ADDS_FT<"pll.ps", FGR64Opnd, II_CVT, 0>,
+ ADDS_FM<0x2C, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def PLU_PS64 : ADDS_FT<"plu.ps", FGR64Opnd, II_CVT, 0>,
+ ADDS_FM<0x2D, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+
+ def CVT_S_PU64 : ABSS_FT<"cvt.s.pu", FGR32Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x20, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def CVT_S_PL64 : ABSS_FT<"cvt.s.pl", FGR32Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x28, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+
+ def CVT_PS_S64 : CVT_PS_S_FT<"cvt.ps.s", FGR64Opnd, FGR32Opnd, II_CVT, 0>,
+ ADDS_FM<0x26, 16>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ }
+}
+
let DecoderNamespace = "MipsFP64" in {
let AdditionalPredicates = [NotInMicroMips] in {
def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 0e0e712dba19..bfb4c775205d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -280,6 +280,8 @@ bool MipsInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset)
switch (BranchOpc) {
case Mips::B:
case Mips::BAL:
+ case Mips::BAL_BR:
+ case Mips::BAL_BR_MM:
case Mips::BC1F:
case Mips::BC1FL:
case Mips::BC1T:
@@ -661,8 +663,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
}
MIB.copyImplicitOps(*I);
-
- MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
+ MIB.cloneMemRefs(*I);
return MIB;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index 0faa13d4d63f..d9398b7d6024 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -2002,13 +2002,19 @@ let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
// branches. See the comment in file MipsLongBranch.cpp for detailed
// explanation.
-// Expands to: lui $dst, %hi($tgt - $baltgt)
+// Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt - $baltgt)
def LONG_BRANCH_LUi : PseudoSE<(outs GPR32Opnd:$dst),
(ins brtarget:$tgt, brtarget:$baltgt), []>;
+// Expands to: lui $dst, highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_LUi2Op : PseudoSE<(outs GPR32Opnd:$dst),
+ (ins brtarget:$tgt), []>;
-// Expands to: addiu $dst, $src, %lo($tgt - $baltgt)
+// Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt - $baltgt)
def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
(ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
+// Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_ADDiu2Op : PseudoSE<(outs GPR32Opnd:$dst),
+ (ins GPR32Opnd:$src, brtarget:$tgt), []>;
//===----------------------------------------------------------------------===//
// Instruction definition
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 6c5b83021f74..b041590ee343 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -15,6 +15,7 @@
#include "MipsRegisterBankInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#define DEBUG_TYPE "mips-isel"
@@ -144,6 +145,42 @@ bool MipsInstructionSelector::select(MachineInstr &I,
.addMemOperand(*I.memoperands_begin());
break;
}
+ case G_UDIV:
+ case G_UREM:
+ case G_SDIV:
+ case G_SREM: {
+ unsigned HILOReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
+ bool IsSigned = I.getOpcode() == G_SREM || I.getOpcode() == G_SDIV;
+ bool IsDiv = I.getOpcode() == G_UDIV || I.getOpcode() == G_SDIV;
+
+ MachineInstr *PseudoDIV, *PseudoMove;
+ PseudoDIV = BuildMI(MBB, I, I.getDebugLoc(),
+ TII.get(IsSigned ? Mips::PseudoSDIV : Mips::PseudoUDIV))
+ .addDef(HILOReg)
+ .add(I.getOperand(1))
+ .add(I.getOperand(2));
+ if (!constrainSelectedInstRegOperands(*PseudoDIV, TII, TRI, RBI))
+ return false;
+
+ PseudoMove = BuildMI(MBB, I, I.getDebugLoc(),
+ TII.get(IsDiv ? Mips::PseudoMFLO : Mips::PseudoMFHI))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(HILOReg);
+ if (!constrainSelectedInstRegOperands(*PseudoMove, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
+ case G_SELECT: {
+ // Handle operands with pointer type.
+ MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::MOVN_I_I))
+ .add(I.getOperand(0))
+ .add(I.getOperand(2))
+ .add(I.getOperand(1))
+ .add(I.getOperand(3));
+ break;
+ }
case G_CONSTANT: {
int Imm = I.getOperand(1).getCImm()->getValue().getLimitedValue();
unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
@@ -193,7 +230,85 @@ bool MipsInstructionSelector::select(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case G_ICMP: {
+ struct Instr {
+ unsigned Opcode, Def, LHS, RHS;
+ Instr(unsigned Opcode, unsigned Def, unsigned LHS, unsigned RHS)
+ : Opcode(Opcode), Def(Def), LHS(LHS), RHS(RHS){};
+
+ bool hasImm() const {
+ if (Opcode == Mips::SLTiu || Opcode == Mips::XORi)
+ return true;
+ return false;
+ }
+ };
+
+ SmallVector<struct Instr, 2> Instructions;
+ unsigned ICMPReg = I.getOperand(0).getReg();
+ unsigned Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ unsigned LHS = I.getOperand(2).getReg();
+ unsigned RHS = I.getOperand(3).getReg();
+ CmpInst::Predicate Cond =
+ static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+
+ switch (Cond) {
+ case CmpInst::ICMP_EQ: // LHS == RHS -> (LHS ^ RHS) < 1
+ Instructions.emplace_back(Mips::XOR, Temp, LHS, RHS);
+ Instructions.emplace_back(Mips::SLTiu, ICMPReg, Temp, 1);
+ break;
+ case CmpInst::ICMP_NE: // LHS != RHS -> 0 < (LHS ^ RHS)
+ Instructions.emplace_back(Mips::XOR, Temp, LHS, RHS);
+ Instructions.emplace_back(Mips::SLTu, ICMPReg, Mips::ZERO, Temp);
+ break;
+ case CmpInst::ICMP_UGT: // LHS > RHS -> RHS < LHS
+ Instructions.emplace_back(Mips::SLTu, ICMPReg, RHS, LHS);
+ break;
+ case CmpInst::ICMP_UGE: // LHS >= RHS -> !(LHS < RHS)
+ Instructions.emplace_back(Mips::SLTu, Temp, LHS, RHS);
+ Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+ break;
+ case CmpInst::ICMP_ULT: // LHS < RHS -> LHS < RHS
+ Instructions.emplace_back(Mips::SLTu, ICMPReg, LHS, RHS);
+ break;
+ case CmpInst::ICMP_ULE: // LHS <= RHS -> !(RHS < LHS)
+ Instructions.emplace_back(Mips::SLTu, Temp, RHS, LHS);
+ Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+ break;
+ case CmpInst::ICMP_SGT: // LHS > RHS -> RHS < LHS
+ Instructions.emplace_back(Mips::SLT, ICMPReg, RHS, LHS);
+ break;
+ case CmpInst::ICMP_SGE: // LHS >= RHS -> !(LHS < RHS)
+ Instructions.emplace_back(Mips::SLT, Temp, LHS, RHS);
+ Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+ break;
+ case CmpInst::ICMP_SLT: // LHS < RHS -> LHS < RHS
+ Instructions.emplace_back(Mips::SLT, ICMPReg, LHS, RHS);
+ break;
+ case CmpInst::ICMP_SLE: // LHS <= RHS -> !(RHS < LHS)
+ Instructions.emplace_back(Mips::SLT, Temp, RHS, LHS);
+ Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+ break;
+ default:
+ return false;
+ }
+
+ MachineIRBuilder B(I);
+ for (const struct Instr &Instruction : Instructions) {
+ MachineInstrBuilder MIB = B.buildInstr(
+ Instruction.Opcode, {Instruction.Def}, {Instruction.LHS});
+
+ if (Instruction.hasImm())
+ MIB.addImm(Instruction.RHS);
+ else
+ MIB.addUse(Instruction.RHS);
+ if (!MIB.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index fb259516be09..c629f02af00e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -13,23 +13,53 @@
#include "MipsLegalizerInfo.h"
#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
using namespace llvm;
MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
using namespace TargetOpcode;
+ const LLT s1 = LLT::scalar(1);
const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
const LLT p0 = LLT::pointer(0, 32);
- getActionDefinitionsBuilder(G_ADD).legalFor({s32});
+ getActionDefinitionsBuilder(G_ADD)
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
+
+ getActionDefinitionsBuilder(G_UADDE)
+ .lowerFor({{s32, s1}});
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.legalForCartesianProduct({p0, s32}, {p0});
- getActionDefinitionsBuilder(G_CONSTANT)
+ getActionDefinitionsBuilder(G_SELECT)
+ .legalForCartesianProduct({p0, s32}, {s32})
+ .minScalar(0, s32)
+ .minScalar(1, s32);
+
+ getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
+
+ getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
.legalFor({s32});
+ getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UREM, G_UDIV})
+ .legalFor({s32})
+ .minScalar(0, s32)
+ .libcallFor({s64});
+
+ getActionDefinitionsBuilder(G_ICMP)
+ .legalFor({{s32, s32}})
+ .minScalar(0, s32);
+
+ getActionDefinitionsBuilder(G_CONSTANT)
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
+
getActionDefinitionsBuilder(G_GEP)
.legalFor({{p0, s32}});
@@ -42,3 +72,15 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
computeTables();
verify(*ST.getInstrInfo());
}
+
+bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+
+ using namespace TargetOpcode;
+
+ MIRBuilder.setInstr(MI);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
index 36dd39c8c1c1..75fadd6cf613 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_MIPS_MIPSMACHINELEGALIZER_H
#define LLVM_LIB_TARGET_MIPS_MIPSMACHINELEGALIZER_H
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
namespace llvm {
@@ -24,6 +25,10 @@ class MipsSubtarget;
class MipsLegalizerInfo : public LegalizerInfo {
public:
MipsLegalizerInfo(const MipsSubtarget &ST);
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const override;
};
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index 2b7f64099923..46b37ceae391 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -298,12 +298,16 @@ bool MipsMCInstLower::lowerLongBranch(const MachineInstr *MI,
default:
return false;
case Mips::LONG_BRANCH_LUi:
+ case Mips::LONG_BRANCH_LUi2Op:
+ case Mips::LONG_BRANCH_LUi2Op_64:
lowerLongBranchLUi(MI, OutMI);
return true;
case Mips::LONG_BRANCH_ADDiu:
+ case Mips::LONG_BRANCH_ADDiu2Op:
lowerLongBranchADDiu(MI, OutMI, Mips::ADDiu);
return true;
case Mips::LONG_BRANCH_DADDiu:
+ case Mips::LONG_BRANCH_DADDiu2Op:
lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu);
return true;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index d83f75ffa1c1..eecc7c573df1 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -107,6 +107,18 @@ class vfsetcc_type<ValueType ResTy, ValueType OpTy, CondCode CC> :
(ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>;
// ISD::SETFALSE cannot occur
+def vfseteq_v4f32 : vfsetcc_type<v4i32, v4f32, SETEQ>;
+def vfseteq_v2f64 : vfsetcc_type<v2i64, v2f64, SETEQ>;
+def vfsetge_v4f32 : vfsetcc_type<v4i32, v4f32, SETGE>;
+def vfsetge_v2f64 : vfsetcc_type<v2i64, v2f64, SETGE>;
+def vfsetgt_v4f32 : vfsetcc_type<v4i32, v4f32, SETGT>;
+def vfsetgt_v2f64 : vfsetcc_type<v2i64, v2f64, SETGT>;
+def vfsetle_v4f32 : vfsetcc_type<v4i32, v4f32, SETLE>;
+def vfsetle_v2f64 : vfsetcc_type<v2i64, v2f64, SETLE>;
+def vfsetlt_v4f32 : vfsetcc_type<v4i32, v4f32, SETLT>;
+def vfsetlt_v2f64 : vfsetcc_type<v2i64, v2f64, SETLT>;
+def vfsetne_v4f32 : vfsetcc_type<v4i32, v4f32, SETNE>;
+def vfsetne_v2f64 : vfsetcc_type<v2i64, v2f64, SETNE>;
def vfsetoeq_v4f32 : vfsetcc_type<v4i32, v4f32, SETOEQ>;
def vfsetoeq_v2f64 : vfsetcc_type<v2i64, v2f64, SETOEQ>;
def vfsetoge_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGE>;
@@ -4038,3 +4050,20 @@ def : MSAPat<
(SPLAT_D v2f64:$ws,
(COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
sub_64))>;
+
+def : MSAPat<(vfseteq_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+ (FCEQ_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfseteq_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+ (FCEQ_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetle_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+ (FCLE_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetle_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+ (FCLE_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetlt_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+ (FCLT_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetlt_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+ (FCLT_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetne_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+ (FCNE_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetne_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+ (FCNE_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..1cff1c8396ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -0,0 +1,92 @@
+//=== lib/CodeGen/GlobalISel/MipsPreLegalizerCombiner.cpp --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define DEBUG_TYPE "mips-prelegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+class MipsPreLegalizerCombinerInfo : public CombinerInfo {
+public:
+ MipsPreLegalizerCombinerInfo()
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr) {}
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ return false;
+}
+
+// Pass boilerplate
+// ================
+
+class MipsPreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MipsPreLegalizerCombiner();
+
+ StringRef getPassName() const override { return "MipsPreLegalizerCombiner"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+} // end anonymous namespace
+
+void MipsPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MipsPreLegalizerCombiner::MipsPreLegalizerCombiner() : MachineFunctionPass(ID) {
+ initializeMipsPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool MipsPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ MipsPreLegalizerCombinerInfo PCInfo;
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, nullptr);
+}
+
+char MipsPreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(MipsPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine Mips machine instrs before legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(MipsPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine Mips machine instrs before legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createMipsPreLegalizeCombiner() {
+ return new MipsPreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 351135079217..6af1f10189df 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -57,7 +57,10 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass(
switch (RC.getID()) {
case Mips::GPR32RegClassID:
case Mips::CPU16Regs_and_GPRMM16ZeroRegClassID:
+ case Mips::GPRMM16MovePPairFirstRegClassID:
+ case Mips::CPU16Regs_and_GPRMM16MovePPairSecondRegClassID:
case Mips::GPRMM16MoveP_and_CPU16Regs_and_GPRMM16ZeroRegClassID:
+ case Mips::GPRMM16MovePPairFirst_and_GPRMM16MovePPairSecondRegClassID:
case Mips::SP32RegClassID:
return getRegBank(Mips::GPRBRegBankID);
default:
@@ -84,6 +87,16 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_LOAD:
case G_STORE:
case G_GEP:
+ case G_AND:
+ case G_OR:
+ case G_XOR:
+ case G_SHL:
+ case G_ASHR:
+ case G_LSHR:
+ case G_SDIV:
+ case G_UDIV:
+ case G_SREM:
+ case G_UREM:
OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
break;
case G_CONSTANT:
@@ -92,6 +105,19 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
break;
+ case G_ICMP:
+ OperandsMapping =
+ getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
+ &Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ break;
+ case G_SELECT:
+ OperandsMapping =
+ getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ break;
default:
return getInvalidInstructionMapping();
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 4cc50fb981ba..b84aaad05eb5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -57,8 +57,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
index c85ee20273c0..a943a0ad4094 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -335,6 +335,16 @@ def GPRMM16MoveP : RegisterClass<"Mips", [i32], 32, (add
// Callee save
S0, S2, S3, S4)>;
+def GPRMM16MovePPairFirst : RegisterClass<"Mips", [i32], 32, (add
+ // Arguments
+ A0, A1, A2)>;
+
+def GPRMM16MovePPairSecond : RegisterClass<"Mips", [i32], 32, (add
+ // Arguments
+ A1, A2, A3,
+ // Callee save
+ S5, S6)>;
+
def GPR64 : RegisterClass<"Mips", [i64], 64, (add
// Reserved
ZERO_64, AT_64,
@@ -522,6 +532,16 @@ def GPRMM16AsmOperandMoveP : MipsAsmRegOperand {
let PredicateMethod = "isMM16AsmRegMoveP";
}
+def GPRMM16AsmOperandMovePPairFirst : MipsAsmRegOperand {
+ let Name = "GPRMM16AsmRegMovePPairFirst";
+ let PredicateMethod = "isMM16AsmRegMovePPairFirst";
+}
+
+def GPRMM16AsmOperandMovePPairSecond : MipsAsmRegOperand {
+ let Name = "GPRMM16AsmRegMovePPairSecond";
+ let PredicateMethod = "isMM16AsmRegMovePPairSecond";
+}
+
def ACC64DSPAsmOperand : MipsAsmRegOperand {
let Name = "ACC64DSPAsmReg";
let PredicateMethod = "isACCAsmReg";
@@ -613,6 +633,14 @@ def GPRMM16OpndMoveP : RegisterOperand<GPRMM16MoveP> {
let EncoderMethod = "getMovePRegSingleOpValue";
}
+def GPRMM16OpndMovePPairFirst : RegisterOperand<GPRMM16MovePPairFirst> {
+ let ParserMatchClass = GPRMM16AsmOperandMovePPairFirst;
+}
+
+def GPRMM16OpndMovePPairSecond : RegisterOperand<GPRMM16MovePPairSecond> {
+ let ParserMatchClass = GPRMM16AsmOperandMovePPairSecond;
+}
+
def GPR64Opnd : RegisterOperand<GPR64> {
let ParserMatchClass = GPR64AsmOperand;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index cf2899dd375e..cf196b597278 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -244,7 +244,7 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true));
break;
}
- // fallthrough
+ LLVM_FALLTHROUGH;
case Mips::BuildPairF64:
case Mips::ExtractElementF64:
if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1())
@@ -795,6 +795,24 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
switch(Opcode) {
default: break;
+ case Mips::PseudoD_SELECT_I:
+ case Mips::PseudoD_SELECT_I64: {
+ MVT VT = Subtarget->isGP64bit() ? MVT::i64 : MVT::i32;
+ SDValue cond = Node->getOperand(0);
+ SDValue Hi1 = Node->getOperand(1);
+ SDValue Lo1 = Node->getOperand(2);
+ SDValue Hi2 = Node->getOperand(3);
+ SDValue Lo2 = Node->getOperand(4);
+
+ SDValue ops[] = {cond, Hi1, Lo1, Hi2, Lo2};
+ EVT NodeTys[] = {VT, VT};
+ ReplaceNode(Node, CurDAG->getMachineNode(Subtarget->isGP64bit()
+ ? Mips::PseudoD_SELECT_I64
+ : Mips::PseudoD_SELECT_I,
+ DL, NodeTys, ops));
+ return true;
+ }
+
case ISD::ADDE: {
selectAddE(Node, DL);
return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index f625a2903bd7..a78e544c35f0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -158,8 +158,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
@@ -2360,24 +2360,6 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
}
}
-/// Check if the given BuildVectorSDNode is a splat.
-/// This method currently relies on DAG nodes being reused when equivalent,
-/// so it's possible for this to return false even when isConstantSplat returns
-/// true.
-static bool isSplatVector(const BuildVectorSDNode *N) {
- unsigned int nOps = N->getNumOperands();
- assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector");
-
- SDValue Operand0 = N->getOperand(0);
-
- for (unsigned int i = 1; i < nOps; ++i) {
- if (N->getOperand(i) != Operand0)
- return false;
- }
-
- return true;
-}
-
// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
//
// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
@@ -2488,7 +2470,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
return Result;
- } else if (isSplatVector(Node))
+ } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
return Op;
else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
// Use INSERT_VECTOR_ELT operations rather than expand to stores.
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index e8589fc53492..c7ab90ed2a3b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -222,9 +222,9 @@ static bool isReadOrWriteToDSPReg(const MachineInstr &MI, bool &isWrite) {
/// We check for the common case of 'or', as it's MIPS' preferred instruction
/// for GPRs but we have to check the operands to ensure that is the case.
/// Other move instructions for MIPS are directly identifiable.
-bool MipsSEInstrInfo::isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+bool MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Src,
+ const MachineOperand *&Dest) const {
bool isDSPControlWrite = false;
// Condition is made to match the creation of WRDSP/RDDSP copy instruction
// from copyPhysReg function.
@@ -421,12 +421,16 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandERet(MBB, MI);
break;
case Mips::PseudoMFHI:
- Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI;
- expandPseudoMFHiLo(MBB, MI, Opc);
+ expandPseudoMFHiLo(MBB, MI, Mips::MFHI);
+ break;
+ case Mips::PseudoMFHI_MM:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFHI16_MM);
break;
case Mips::PseudoMFLO:
- Opc = isMicroMips ? Mips::MFLO16_MM : Mips::MFLO;
- expandPseudoMFHiLo(MBB, MI, Opc);
+ expandPseudoMFHiLo(MBB, MI, Mips::MFLO);
+ break;
+ case Mips::PseudoMFLO_MM:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFLO16_MM);
break;
case Mips::PseudoMFHI64:
expandPseudoMFHiLo(MBB, MI, Mips::MFHI64);
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index fc55716d598a..fce0fe5f58ad 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -47,9 +47,6 @@ public:
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
- const MachineOperand *&Dest) const override;
-
void storeRegToStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -79,6 +76,13 @@ public:
MachineBasicBlock::iterator II, const DebugLoc &DL,
unsigned *NewImm) const;
+protected:
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
private:
unsigned getAnalyzableBrOpc(unsigned Opc) const override;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index 64db815a0f4c..410fa655a225 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -154,6 +154,7 @@ def II_DERET : InstrItinClass;
def II_ERETNC : InstrItinClass;
def II_EHB : InstrItinClass;
def II_SDBBP : InstrItinClass;
+def II_SIGRIE : InstrItinClass;
def II_SSNOP : InstrItinClass;
def II_SYSCALL : InstrItinClass;
def II_PAUSE : InstrItinClass;
@@ -546,6 +547,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_ERETNC , [InstrStage<1, [ALU]>]>,
InstrItinData<II_EHB , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SDBBP , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_SIGRIE , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SSNOP , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SYSCALL , [InstrStage<1, [ALU]>]>,
InstrItinData<II_PAUSE , [InstrStage<1, [ALU]>]>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index 79c55dbb9e03..80ffe7ada7c8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -179,7 +179,7 @@ def GenericWriteTrap : SchedWriteRes<[GenericIssueCTISTD]>;
def : ItinRW<[GenericWriteTrap], [II_BREAK, II_SYSCALL, II_TEQ, II_TEQI,
II_TGE, II_TGEI, II_TGEIU, II_TGEU, II_TNE,
II_TNEI, II_TLT, II_TLTI, II_TLTU, II_TTLTIU,
- II_TRAP, II_SDBBP]>;
+ II_TRAP, II_SDBBP, II_SIGRIE]>;
// COP0 Pipeline
// =============
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 896dd0eb0a5e..ad8f4848b870 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -295,8 +295,10 @@ public:
bool inMips16HardFloat() const {
return inMips16Mode() && InMips16HardFloat;
}
- bool inMicroMipsMode() const { return InMicroMipsMode; }
- bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); }
+ bool inMicroMipsMode() const { return InMicroMipsMode && !InMips16Mode; }
+ bool inMicroMips32r6Mode() const {
+ return inMicroMipsMode() && hasMips32r6();
+ }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
bool hasDSPR3() const { return HasDSPR3; }
@@ -312,14 +314,14 @@ public:
}
bool useSmallSection() const { return UseSmallSection; }
- bool hasStandardEncoding() const { return !inMips16Mode(); }
+ bool hasStandardEncoding() const { return !InMips16Mode && !InMicroMipsMode; }
bool useSoftFloat() const { return IsSoftFloat; }
bool useLongCalls() const { return UseLongCalls; }
bool enableLongBranchPass() const {
- return hasStandardEncoding() || allowMixed16_32();
+ return hasStandardEncoding() || inMicroMipsMode() || allowMixed16_32();
}
/// Features related to the presence of specific instructions.
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 1e6fe2b9f7e7..8466298cf36f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -56,6 +56,7 @@ extern "C" void LLVMInitializeMipsTarget() {
initializeMipsDelaySlotFillerPass(*PR);
initializeMipsBranchExpansionPass(*PR);
initializeMicroMipsSizeReducePass(*PR);
+ initializeMipsPreLegalizerCombinerPass(*PR);
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -101,12 +102,6 @@ static Reloc::Model getEffectiveRelocModel(bool JIT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
// On function prologue, the stack is created by decrementing
// its pointer. Once decremented, all references are done with positive
// offset from the stack/frame pointer, using StackGrowsUp enables
@@ -121,7 +116,7 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
bool isLittle)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(JIT, RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
isLittle(isLittle), TLOF(llvm::make_unique<MipsTargetObjectFile>()),
ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this,
@@ -240,8 +235,8 @@ public:
bool addInstSelector() override;
void addPreEmitPass() override;
void addPreRegAlloc() override;
- void addPreEmit2() ;
bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
@@ -286,9 +281,6 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(BasicTTIImpl(this, F));
}
-void MipsPassConfig::addPreEmit2() {
-}
-
// Implemented by targets that want to run passes immediately before
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
@@ -322,6 +314,10 @@ bool MipsPassConfig::addIRTranslator() {
return false;
}
+void MipsPassConfig::addPreLegalizeMachineIR() {
+ addPass(createMipsPreLegalizeCombiner());
+}
+
bool MipsPassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index f767c8321988..f53ee0631b5e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -10,6 +10,7 @@
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsMCExpr.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -189,6 +190,7 @@ const MCExpr *
MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
const MCExpr *Expr =
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- return MCBinaryExpr::createAdd(
+ Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(0x8000, getContext()), getContext());
+ return MipsMCExpr::create(MipsMCExpr::MEK_DTPREL, Expr, getContext());
}
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index ab494d5bf41b..22be564b6502 100644
--- a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -32,17 +32,18 @@ Target &llvm::getTheMips64elTarget() {
extern "C" void LLVMInitializeMipsTargetInfo() {
RegisterTarget<Triple::mips,
/*HasJIT=*/true>
- X(getTheMipsTarget(), "mips", "Mips", "Mips");
+ X(getTheMipsTarget(), "mips", "MIPS (32-bit big endian)", "Mips");
RegisterTarget<Triple::mipsel,
/*HasJIT=*/true>
- Y(getTheMipselTarget(), "mipsel", "Mipsel", "Mips");
+ Y(getTheMipselTarget(), "mipsel", "MIPS (32-bit little endian)", "Mips");
RegisterTarget<Triple::mips64,
/*HasJIT=*/true>
- A(getTheMips64Target(), "mips64", "Mips64 [experimental]", "Mips");
+ A(getTheMips64Target(), "mips64", "MIPS (64-bit big endian)", "Mips");
RegisterTarget<Triple::mips64el,
/*HasJIT=*/true>
- B(getTheMips64elTarget(), "mips64el", "Mips64el [experimental]", "Mips");
+ B(getTheMips64elTarget(), "mips64el", "MIPS (64-bit little endian)",
+ "Mips");
}
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index aeb90eca3a05..f7b4cf3a0f72 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -25,6 +25,12 @@ NVPTXTargetStreamer::NVPTXTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
NVPTXTargetStreamer::~NVPTXTargetStreamer() = default;
+void NVPTXTargetStreamer::outputDwarfFileDirectives() {
+ for (const std::string &S : DwarfFiles)
+ getStreamer().EmitRawText(S.data());
+ DwarfFiles.clear();
+}
+
void NVPTXTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
DwarfFiles.emplace_back(Directive);
}
@@ -82,9 +88,7 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
OS << "//\t}\n";
if (isDwarfSection(FI, Section)) {
// Emit DWARF .file directives in the outermost scope.
- for (const std::string &S : DwarfFiles)
- getStreamer().EmitRawText(S.data());
- DwarfFiles.clear();
+ outputDwarfFileDirectives();
OS << "//\t.section";
Section->PrintSwitchToSection(*getStreamer().getContext().getAsmInfo(),
FI->getTargetTriple(), OS, SubSection);
@@ -92,3 +96,30 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
OS << "//\t{\n";
}
}
+
+void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+ const char *Directive = MAI->getData8bitsDirective();
+ unsigned NumElements = Data.size();
+ const unsigned MaxLen = 40;
+ unsigned NumChunks = 1 + ((NumElements - 1) / MaxLen);
+ // Split the very long directives into several parts if the limit is
+ // specified.
+ for (unsigned I = 0; I < NumChunks; ++I) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
+ const char *Label = Directive;
+ for (auto It = std::next(Data.bytes_begin(), I * MaxLen),
+ End = (I == NumChunks - 1)
+ ? Data.bytes_end()
+ : std::next(Data.bytes_begin(), (I + 1) * MaxLen);
+ It != End; ++It) {
+ OS << Label << (unsigned)*It;
+ if (Label == Directive)
+ Label = ",";
+ }
+ Streamer.EmitRawText(OS.str());
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
index 30831ab8bbeb..f18e61cdca57 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
@@ -24,6 +24,9 @@ public:
NVPTXTargetStreamer(MCStreamer &S);
~NVPTXTargetStreamer() override;
+ /// Outputs the list of the DWARF '.file' directives to the streamer.
+ void outputDwarfFileDirectives();
+
/// Record DWARF file directives for later output.
/// According to PTX ISA, CUDA Toolkit documentation, 11.5.3. Debugging
/// Directives: .file
@@ -39,6 +42,10 @@ public:
void emitDwarfFileDirective(StringRef Directive) override;
void changeSection(const MCSection *CurSection, MCSection *Section,
const MCExpr *SubSection, raw_ostream &OS) override;
+ /// Emit the bytes in \p Data into the output.
+ ///
+ /// This is used to emit bytes in \p Data as sequence of .byte directives.
+ void emitRawBytes(StringRef Data) override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index 902d1b25e7dd..07bfc58a8da7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -46,13 +46,14 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
-FunctionPass *createNVVMReflectPass();
+FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
BasicBlockPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
+MachineFunctionPass *createNVPTXProxyRegErasurePass();
Target &getTheNVPTXTarget32();
Target &getTheNVPTXTarget64();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
index 6494c46f54ab..3731b2f37f6c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
@@ -54,6 +54,8 @@ def SM70 : SubtargetFeature<"sm_70", "SmVersion", "70",
"Target SM 7.0">;
def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72",
"Target SM 7.2">;
+def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75",
+ "Target SM 7.5">;
// PTX Versions
def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
@@ -72,6 +74,8 @@ def PTX60 : SubtargetFeature<"ptx60", "PTXVersion", "60",
"Use PTX version 6.0">;
def PTX61 : SubtargetFeature<"ptx61", "PTXVersion", "61",
"Use PTX version 6.1">;
+def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
+ "Use PTX version 6.3">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -94,6 +98,7 @@ def : Proc<"sm_61", [SM61, PTX50]>;
def : Proc<"sm_62", [SM62, PTX50]>;
def : Proc<"sm_70", [SM70, PTX60]>;
def : Proc<"sm_72", [SM72, PTX61]>;
+def : Proc<"sm_75", [SM75, PTX63]>;
def NVPTXInstrInfo : InstrInfo {
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index bed52293197d..bf922eb8a195 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -41,7 +41,7 @@ public:
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
bool functionModified = false;
Function::iterator I = function.begin();
- TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
+ Instruction *firstTerminatorInst = (I++)->getTerminator();
for (Function::iterator E = function.end(); I != E; ++I) {
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index a966b9928400..6284ad8b82e8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -16,6 +16,7 @@
#include "InstPrinter/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "MCTargetDesc/NVPTXTargetStreamer.h"
#include "NVPTX.h"
#include "NVPTXMCExpr.h"
#include "NVPTXMachineFunctionInfo.h"
@@ -199,7 +200,7 @@ bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
// Ewwww
- TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
+ LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
const char *Sym = MFI->getImageHandleSymbol(Index);
@@ -218,11 +219,12 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
return;
}
+ const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
- if (!nvptxSubtarget->hasImageHandles()) {
+ if (!STI.hasImageHandles()) {
if (lowerImageHandleOperand(MI, i, MCOp)) {
OutMI.addOperand(MCOp);
continue;
@@ -328,11 +330,12 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
- const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
+ const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
+ const TargetLowering *TLI = STI.getTargetLowering();
Type *Ty = F->getReturnType();
- bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
if (Ty->getTypeID() == Type::VoidTyID)
return;
@@ -473,7 +476,6 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
}
bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
- nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>();
bool Result = AsmPrinter::runOnMachineFunction(F);
// Emit closing brace for the body of function F.
// The closing brace must be emitted here because we need to emit additional
@@ -507,8 +509,9 @@ void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
OutStreamer->AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
+ const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
OutStreamer->AddComment(Twine("implicit-def: ") +
- nvptxSubtarget->getRegisterInfo()->getName(RegNo));
+ STI.getRegisterInfo()->getName(RegNo));
}
OutStreamer->AddBlankLine();
}
@@ -727,6 +730,11 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = &*FI;
+ if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
+ emitDeclaration(F, O);
+ continue;
+ }
+
if (F->isDeclaration()) {
if (F->use_empty())
continue;
@@ -785,11 +793,8 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
// Construct a default subtarget off of the TargetMachine defaults. The
// rest of NVPTX isn't friendly to change subtargets per function and
// so the default TargetMachine will have all of the options.
- const Triple &TT = TM.getTargetTriple();
- StringRef CPU = TM.getTargetCPU();
- StringRef FS = TM.getTargetFeatureString();
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
- const NVPTXSubtarget STI(TT, CPU, FS, NTM);
+ const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
if (M.alias_size()) {
report_fatal_error("Module has aliases, which NVPTX does not support.");
@@ -813,7 +818,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
bool Result = AsmPrinter::doInitialization(M);
// Emit header before any dwarf directives are emitted below.
- emitHeader(M, OS1, STI);
+ emitHeader(M, OS1, *STI);
OutStreamer->EmitRawText(OS1.str());
// Emit module-level inline asm if it exists.
@@ -880,8 +885,22 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
if (NTM.getDrvInterface() == NVPTX::NVCL)
O << ", texmode_independent";
+ bool HasFullDebugInfo = false;
+ for (DICompileUnit *CU : M.debug_compile_units()) {
+ switch(CU->getEmissionKind()) {
+ case DICompileUnit::NoDebug:
+ case DICompileUnit::DebugDirectivesOnly:
+ break;
+ case DICompileUnit::LineTablesOnly:
+ case DICompileUnit::FullDebug:
+ HasFullDebugInfo = true;
+ break;
+ }
+ if (HasFullDebugInfo)
+ break;
+ }
// FIXME: remove comment once debug info is properly supported.
- if (MMI && MMI->hasDebugInfo())
+ if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
O << "//, debug";
O << "\n";
@@ -938,6 +957,10 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
if (HasDebugInfo)
OutStreamer->EmitRawText("//\t}");
+ // Output last DWARF .file directives, if any.
+ static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
+ ->outputDwarfFileDirectives();
+
return ret;
//bool Result = AsmPrinter::doFinalization(M);
@@ -1412,12 +1435,14 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
const AttributeList &PAL = F->getAttributes();
- const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
+ const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
+ const TargetLowering *TLI = STI.getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
bool isKernelFunc = isKernelFunction(*F);
- bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
+ bool hasImageHandles = STI.hasImageHandles();
MVT thePointerTy = TLI->getPointerTy(DL);
if (F->arg_empty()) {
@@ -1441,7 +1466,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isImage(*I)) {
std::string sname = I->getName();
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
- if (nvptxSubtarget->hasImageHandles())
+ if (hasImageHandles)
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
@@ -1449,7 +1474,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "_param_" << paramIndex;
}
else { // Default image is read_only
- if (nvptxSubtarget->hasImageHandles())
+ if (hasImageHandles)
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
@@ -1457,7 +1482,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "_param_" << paramIndex;
}
} else {
- if (nvptxSubtarget->hasImageHandles())
+ if (hasImageHandles)
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index efe98003b1c8..44a09f5fe513 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -258,9 +258,6 @@ private:
typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
VRegRCMap VRegMapping;
- // Cache the subtarget here.
- const NVPTXSubtarget *nvptxSubtarget;
-
// List of variables demoted to a function scope.
std::map<const Function *, std::vector<const GlobalVariable *>> localDecls;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 21939d836dc7..ffc6a59cd6c8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -981,9 +981,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
if (!NVPTXLD)
return false;
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
ReplaceNode(N, NVPTXLD);
return true;
@@ -1221,9 +1220,8 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
}
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
ReplaceNode(N, LD);
return true;
@@ -1659,9 +1657,8 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
}
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = Mem->getMemOperand();
- cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = Mem->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
// For automatic generation of LDG (through SelectLoad[Vector], not the
// intrinsics), we may have an extending load like:
@@ -1864,9 +1861,8 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!NVPTXST)
return false;
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
ReplaceNode(N, NVPTXST);
return true;
}
@@ -2088,9 +2084,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
ReplaceNode(N, ST);
return true;
@@ -2236,9 +2231,8 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
return false;
SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
ReplaceNode(N, Ret);
return true;
@@ -2341,9 +2335,8 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
SDNode *Ret =
CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
ReplaceNode(N, Ret);
return true;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 2536623fb853..bec8ece29050 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -180,6 +180,18 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
return;
}
+ // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ auto const *SL = DL.getStructLayout(STy);
+ auto ElementNum = 0;
+ for(auto *EI : STy->elements()) {
+ ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(ElementNum));
+ ++ElementNum;
+ }
+ return;
+ }
+
ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
EVT VT = TempVTs[i];
@@ -560,8 +572,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
}
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
// No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
// No FPOW or FREM in PTX.
@@ -651,6 +663,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::CallSeqEnd";
case NVPTXISD::CallPrototype:
return "NVPTXISD::CallPrototype";
+ case NVPTXISD::ProxyReg:
+ return "NVPTXISD::ProxyReg";
case NVPTXISD::LoadV2:
return "NVPTXISD::LoadV2";
case NVPTXISD::LoadV4:
@@ -1170,7 +1184,7 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
TargetLoweringBase::LegalizeTypeAction
-NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
+NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
return TypeSplitVector;
if (VT == MVT::v2f16)
@@ -1649,7 +1663,24 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- if (!Func) {
+ // Both indirect calls and libcalls have nullptr Func. In order to distinguish
+ // between them we must rely on the call site value which is valid for
+ // indirect calls but is always null for libcalls.
+ bool isIndirectCall = !Func && CS;
+
+ if (isa<ExternalSymbolSDNode>(Callee)) {
+ Function* CalleeFunc = nullptr;
+
+ // Try to find the callee in the current module.
+ Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
+ assert(CalleeFunc != nullptr && "Libcall callee must be set.");
+
+ // Set the "libcall callee" attribute to indicate that the function
+ // must always have a declaration.
+ CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
+ }
+
+ if (isIndirectCall) {
// This is indirect function call case : PTX requires a prototype of the
// form
// proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
@@ -1673,7 +1704,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
};
// We model convergent calls as separate opcodes.
- unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall;
+ unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
if (CLI.IsConvergent)
Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
: NVPTXISD::PrintConvergentCall;
@@ -1707,12 +1738,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgEndOps[] = { Chain,
- DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
+ DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
InFlag };
Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
InFlag = Chain.getValue(1);
- if (!Func) {
+ if (isIndirectCall) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue PrototypeOps[] = { Chain,
DAG.getConstant(uniqueCallSite, dl, MVT::i32),
@@ -1721,6 +1752,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
}
+ SmallVector<SDValue, 16> ProxyRegOps;
+ SmallVector<Optional<MVT>, 16> ProxyRegTruncates;
+
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
SmallVector<EVT, 16> VTs;
@@ -1791,11 +1825,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineMemOperand::MOLoad);
for (unsigned j = 0; j < NumElts; ++j) {
- SDValue Ret = RetVal.getValue(j);
+ ProxyRegOps.push_back(RetVal.getValue(j));
+
if (needTruncate)
- Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
- InVals.push_back(Ret);
+ ProxyRegTruncates.push_back(Optional<MVT>(Ins[VecIdx + j].VT));
+ else
+ ProxyRegTruncates.push_back(Optional<MVT>());
}
+
Chain = RetVal.getValue(NumElts);
InFlag = RetVal.getValue(NumElts + 1);
@@ -1811,8 +1848,29 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
true),
InFlag, dl);
+ InFlag = Chain.getValue(1);
uniqueCallSite++;
+ // Append ProxyReg instructions to the chain to make sure that `callseq_end`
+ // will not get lost. Otherwise, during libcalls expansion, the nodes can become
+ // dangling.
+ for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
+ SDValue Ret = DAG.getNode(
+ NVPTXISD::ProxyReg, dl,
+ DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
+ { Chain, ProxyRegOps[i], InFlag }
+ );
+
+ Chain = Ret.getValue(1);
+ InFlag = Ret.getValue(2);
+
+ if (ProxyRegTruncates[i].hasValue()) {
+ Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
+ }
+
+ InVals.push_back(Ret);
+ }
+
// set isTailCall to false for now, until we figure out how to express
// tail call optimization in PTX
isTailCall = false;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index ef04a8573d45..66fab2b6f480 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -51,6 +51,7 @@ enum NodeType : unsigned {
CallSeqBegin,
CallSeqEnd,
CallPrototype,
+ ProxyReg,
FUN_SHFL_CLAMP,
FUN_SHFR_CLAMP,
MUL_WIDE_SIGNED,
@@ -511,7 +512,7 @@ public:
}
TargetLoweringBase::LegalizeTypeAction
- getPreferredVectorAction(EVT VT) const override;
+ getPreferredVectorAction(MVT VT) const override;
// Get the degree of precision we want from 32-bit floating point division
// operations.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 443b077184c7..02a40b9f5262 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1318,9 +1318,6 @@ def ROTR64reg_sw :
// Create SDNodes so they can be used in the DAG code, e.g.
// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
-def SDTIntShiftDOp :
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisInt<3>]>;
def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
@@ -1888,6 +1885,7 @@ def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
+def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
def DeclareParam :
SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
@@ -1975,6 +1973,9 @@ def PseudoUseParam :
def RETURNNode :
SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
[SDNPHasChain, SDNPSideEffect]>;
+def ProxyReg :
+ SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
let mayLoad = 1 in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
@@ -2252,6 +2253,21 @@ def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
+class ProxyRegInst<string SzStr, NVPTXRegClass regclass> :
+ NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
+ !strconcat("mov.", SzStr, " \t$dst, $src;"),
+ [(set regclass:$dst, (ProxyReg regclass:$src))]>;
+
+let isCodeGenOnly=1, isPseudo=1 in {
+ def ProxyRegI1 : ProxyRegInst<"pred", Int1Regs>;
+ def ProxyRegI16 : ProxyRegInst<"b16", Int16Regs>;
+ def ProxyRegI32 : ProxyRegInst<"b32", Int32Regs>;
+ def ProxyRegI64 : ProxyRegInst<"b64", Int64Regs>;
+ def ProxyRegF16 : ProxyRegInst<"b16", Float16Regs>;
+ def ProxyRegF32 : ProxyRegInst<"f32", Float32Regs>;
+ def ProxyRegF64 : ProxyRegInst<"f64", Float64Regs>;
+ def ProxyRegF16x2 : ProxyRegInst<"b32", Float16x2Regs>;
+}
//
// Load / Store Handling
@@ -2544,7 +2560,7 @@ let mayStore=1, hasSideEffects=0 in {
class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
NVPTXRegClass regclassOut> :
NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
- !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")),
+ !strconcat("mov.b", SzStr, " \t$d, $a;"),
[(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>;
@@ -2625,32 +2641,20 @@ def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
def : Pat<(i1 (fp_to_sint Float16Regs:$a)),
(SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
- (CVT_s16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
(CVT_s16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
- (CVT_s32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
(CVT_s32_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
- (CVT_s64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
(CVT_s64_f16 Float16Regs:$a, CvtRZI)>;
// f16 -> uint
def : Pat<(i1 (fp_to_uint Float16Regs:$a)),
(SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
- (CVT_u16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
(CVT_u16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
- (CVT_u32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
(CVT_u32_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
- (CVT_u64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
(CVT_u64_f16 Float16Regs:$a, CvtRZI)>;
// f32 -> sint
@@ -2948,14 +2952,10 @@ def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
// fpround f32 -> f16
def : Pat<(f16 (fpround Float32Regs:$a)),
- (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f16 (fpround Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN)>;
// fpround f64 -> f16
def : Pat<(f16 (fpround Float64Regs:$a)),
- (CVT_f16_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f16 (fpround Float64Regs:$a)),
(CVT_f16_f64 Float64Regs:$a, CvtRN)>;
// fpround f64 -> f32
@@ -2972,8 +2972,6 @@ def : Pat<(f32 (fpextend Float16Regs:$a)),
// fpextend f16 -> f64
def : Pat<(f64 (fpextend Float16Regs:$a)),
- (CVT_f64_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f64 (fpextend Float16Regs:$a)),
(CVT_f64_f16 Float16Regs:$a, CvtNONE)>;
// fpextend f32 -> f64
@@ -2988,9 +2986,7 @@ def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
// fceil, ffloor, fround, ftrunc.
def : Pat<(fceil Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(fceil Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRPI)>;
def : Pat<(fceil Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fceil Float32Regs:$a),
@@ -2999,9 +2995,7 @@ def : Pat<(fceil Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
def : Pat<(ffloor Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(ffloor Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRMI)>;
def : Pat<(ffloor Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ffloor Float32Regs:$a),
@@ -3009,10 +3003,8 @@ def : Pat<(ffloor Float32Regs:$a),
def : Pat<(ffloor Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
-def : Pat<(fround Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f16 (fround Float16Regs:$a)),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(fround Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fround Float32Regs:$a)),
@@ -3021,9 +3013,7 @@ def : Pat<(f64 (fround Float64Regs:$a)),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
def : Pat<(ftrunc Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(ftrunc Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(ftrunc Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ftrunc Float32Regs:$a),
@@ -3036,9 +3026,7 @@ def : Pat<(ftrunc Float64Regs:$a),
// matches what CUDA's "libm" does.
def : Pat<(fnearbyint Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(fnearbyint Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(fnearbyint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fnearbyint Float32Regs:$a),
@@ -3047,9 +3035,7 @@ def : Pat<(fnearbyint Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
def : Pat<(frint Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(frint Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(frint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(frint Float32Regs:$a),
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 5bb4fc3edd09..2ca0ccf2dfa7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -60,6 +61,24 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (!MI.getOperand(i).isFI())
continue;
+
+ // Frame indices in debug values are encoded in a target independent
+ // way with simply the frame index and offset rather than any
+ // target-specific addressing mode.
+ if (MI.isDebugValue()) {
+ assert(i == 0 && "Frame indices can only appear as the first "
+ "operand of a DBG_VALUE machine instruction");
+ unsigned Reg;
+ int64_t Offset =
+ TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
+ MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
+ MI.getOperand(0).setIsDebug();
+ auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
+ DIExpression::NoDeref, Offset);
+ MI.getOperand(3).setMetadata(DIExpr);
+ continue;
+ }
+
TRI.eliminateFrameIndex(MI, 0, i, nullptr);
Modified = true;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
new file mode 100644
index 000000000000..f60d841c1683
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -0,0 +1,122 @@
+//===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The pass is needed to remove ProxyReg instructions and restore related
+// registers. The instructions were needed at instruction selection stage to
+// make sure that callseq_end nodes won't be removed as "dead nodes". This can
+// happen when we expand instructions into libcalls and the call site doesn't
+// care about the libcall chain. Call site cares about data flow only, and the
+// latest data flow node happens to be before callseq_end. Therefore the node
+// becomes dangling and "dead". The ProxyReg acts like an additional data flow
+// node *after* the callseq_end in the chain and ensures that everything will be
+// preserved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXProxyRegErasurePass(PassRegistry &);
+}
+
+namespace {
+
+struct NVPTXProxyRegErasure : public MachineFunctionPass {
+public:
+ static char ID;
+ NVPTXProxyRegErasure() : MachineFunctionPass(ID) {
+ initializeNVPTXProxyRegErasurePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "NVPTX Proxy Register Instruction Erasure";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ void replaceMachineInstructionUsage(MachineFunction &MF, MachineInstr &MI);
+
+ void replaceRegisterUsage(MachineInstr &Instr, MachineOperand &From,
+ MachineOperand &To);
+};
+
+} // namespace
+
+char NVPTXProxyRegErasure::ID = 0;
+
+INITIALIZE_PASS(NVPTXProxyRegErasure, "nvptx-proxyreg-erasure", "NVPTX ProxyReg Erasure", false, false)
+
+bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
+ SmallVector<MachineInstr *, 16> RemoveList;
+
+ for (auto &BB : MF) {
+ for (auto &MI : BB) {
+ switch (MI.getOpcode()) {
+ case NVPTX::ProxyRegI1:
+ case NVPTX::ProxyRegI16:
+ case NVPTX::ProxyRegI32:
+ case NVPTX::ProxyRegI64:
+ case NVPTX::ProxyRegF16:
+ case NVPTX::ProxyRegF16x2:
+ case NVPTX::ProxyRegF32:
+ case NVPTX::ProxyRegF64:
+ replaceMachineInstructionUsage(MF, MI);
+ RemoveList.push_back(&MI);
+ break;
+ }
+ }
+ }
+
+ for (auto *MI : RemoveList) {
+ MI->eraseFromParent();
+ }
+
+ return !RemoveList.empty();
+}
+
+void NVPTXProxyRegErasure::replaceMachineInstructionUsage(MachineFunction &MF,
+ MachineInstr &MI) {
+ auto &InOp = *MI.uses().begin();
+ auto &OutOp = *MI.defs().begin();
+
+ assert(InOp.isReg() && "ProxyReg input operand should be a register.");
+ assert(OutOp.isReg() && "ProxyReg output operand should be a register.");
+
+ for (auto &BB : MF) {
+ for (auto &I : BB) {
+ replaceRegisterUsage(I, OutOp, InOp);
+ }
+ }
+}
+
+void NVPTXProxyRegErasure::replaceRegisterUsage(MachineInstr &Instr,
+ MachineOperand &From,
+ MachineOperand &To) {
+ for (auto &Op : Instr.uses()) {
+ if (Op.isReg() && Op.getReg() == From.getReg()) {
+ Op.setReg(To.getReg());
+ }
+ }
+}
+
+MachineFunctionPass *llvm::createNVPTXProxyRegErasurePass() {
+ return new NVPTXProxyRegErasure();
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a1b160441df3..8ec0ddb9b3d5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -68,6 +68,7 @@ void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXProxyRegErasurePass(PassRegistry &);
} // end namespace llvm
@@ -87,6 +88,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
+ initializeNVPTXProxyRegErasurePass(PR);
}
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
@@ -102,12 +104,6 @@ static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
return Ret;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -118,7 +114,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
// specified, as it is the only relocation model currently supported.
: LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
CPU, FS, Options, Reloc::PIC_,
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
@@ -166,6 +162,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
+ void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addMachineSSAOptimization() override;
@@ -195,7 +192,7 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.addExtension(
PassManagerBuilder::EP_EarlyAsPossible,
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- PM.add(createNVVMReflectPass());
+ PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
});
}
@@ -258,7 +255,8 @@ void NVPTXPassConfig::addIRPasses() {
// it here does nothing. But since we need it for correctness when lowering
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
// call addEarlyAsPossiblePasses.
- addPass(createNVVMReflectPass());
+ const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
+ addPass(createNVVMReflectPass(ST.getSmVersion()));
if (getOptLevel() != CodeGenOpt::None)
addPass(createNVPTXImageOptimizerPass());
@@ -306,6 +304,11 @@ bool NVPTXPassConfig::addInstSelector() {
return false;
}
+void NVPTXPassConfig::addPreRegAlloc() {
+ // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
+ addPass(createNVPTXProxyRegErasurePass());
+}
+
void NVPTXPassConfig::addPostRegAlloc() {
addPass(createNVPTXPrologEpilogPass(), false);
if (getOptLevel() != CodeGenOpt::None) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index a631055d36a0..14e93f7447dd 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -49,6 +49,19 @@ public:
return AddressSpace::ADDRESS_SPACE_GENERIC;
}
+ // Loads and stores can be vectorized if the alignment is at least as big as
+ // the load/store we want to vectorize.
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return Alignment >= ChainSizeInBytes;
+ }
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
+ }
+
// NVPTX has infinite registers of all kinds, but the actual machine doesn't.
// We conservatively return 1 here which is just enough to enable the
// vectorizers but disables heuristics based on the number of registers.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 60971b48adfc..64c262664fda 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -50,7 +50,9 @@ namespace {
class NVVMReflect : public FunctionPass {
public:
static char ID;
- NVVMReflect() : FunctionPass(ID) {
+ unsigned int SmVersion;
+ NVVMReflect() : NVVMReflect(0) {}
+ explicit NVVMReflect(unsigned int Sm) : FunctionPass(ID), SmVersion(Sm) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
}
@@ -58,7 +60,9 @@ public:
};
}
-FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
+FunctionPass *llvm::createNVVMReflectPass(unsigned int SmVersion) {
+ return new NVVMReflect(SmVersion);
+}
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@@ -163,6 +167,8 @@ bool NVVMReflect::runOnFunction(Function &F) {
if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
ReflectVal = Flag->getSExtValue();
+ } else if (ReflectArg == "__CUDA_ARCH") {
+ ReflectVal = SmVersion * 10;
}
Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal));
ToRemove.push_back(Call);
diff --git a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp b/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp
deleted file mode 100644
index de0a5f9e84ea..000000000000
--- a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- Nios2InstPrinter.cpp - Convert Nios2 MCInst to assembly syntax-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an Nios2 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2InstPrinter.h"
-
-#include "Nios2InstrInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#define PRINT_ALIAS_INSTR
-#include "Nios2GenAsmWriter.inc"
-
-void Nios2InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
-}
-
-void Nios2InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
- // Try to print any aliases first.
- if (!printAliasInstr(MI, STI, O))
- printInstruction(MI, STI, O);
- printAnnotation(O, Annot);
-}
-
-void Nios2InstPrinter::printOperand(const MCInst *MI, int OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- printRegName(O, Op.getReg());
- return;
- }
-
- if (Op.isImm()) {
- O << Op.getImm();
- return;
- }
-
- assert(Op.isExpr() && "unknown operand kind in printOperand");
- Op.getExpr()->print(O, &MAI, true);
-}
-
-void Nios2InstPrinter::printMemOperand(const MCInst *MI, int opNum,
- const MCSubtargetInfo &STI,
- raw_ostream &O, const char *Modifier) {
- // Load/Store memory operands -- imm($reg)
- printOperand(MI, opNum + 1, STI, O);
- O << "(";
- printOperand(MI, opNum, STI, O);
- O << ")";
-}
diff --git a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h b/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h
deleted file mode 100644
index 43a12951baea..000000000000
--- a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//= Nios2InstPrinter.h - Convert Nios2 MCInst to assembly syntax -*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a Nios2 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_INSTPRINTER_NIOS2INSTPRINTER_H
-#define LLVM_LIB_TARGET_NIOS2_INSTPRINTER_NIOS2INSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class Nios2InstPrinter : public MCInstPrinter {
-public:
- Nios2InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
-
- void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
-
- // Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
- bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
-
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
- const MCSubtargetInfo &STI, raw_ostream &O);
- void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
- raw_ostream &OS);
- void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
- raw_ostream &OS, const char *Modifier = nullptr);
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp
deleted file mode 100644
index 8ac08c6837d9..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//===-- Nios2AsmBackend.cpp - Nios2 Asm Backend --------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Nios2AsmBackend class.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "MCTargetDesc/Nios2AsmBackend.h"
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-using namespace llvm;
-
-// Prepare value for the target space for it
-static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value) {
-
- unsigned Kind = Fixup.getKind();
-
- // Add/subtract and shift
- switch (Kind) {
- default:
- return 0;
- case Nios2::fixup_Nios2_LO16:
- break;
- case Nios2::fixup_Nios2_HI16:
- // Get the higher 16-bits. Also add 1 if bit 15 is 1.
- Value = ((Value + 0x8000) >> 16) & 0xffff;
- break;
- }
-
- return Value;
-}
-
-// Calculate index for Nios2 specific little endian byte order
-static unsigned calculateLEIndex(unsigned i) {
- assert(i <= 3 && "Index out of range!");
-
- return (1 - i / 2) * 2 + i % 2;
-}
-
-/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided
-/// data fragment, at the offset specified by the fixup and following the
-/// fixup kind as appropriate.
-void Nios2AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) const {
- MCFixupKind Kind = Fixup.getKind();
- Value = adjustFixupValue(Fixup, Value);
-
- if (!Value)
- return; // Doesn't change encoding.
-
- // Where do we start in the object
- unsigned Offset = Fixup.getOffset();
- // Number of bytes we need to fixup
- unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
- // Grab current value, if any, from bits.
- uint64_t CurVal = 0;
-
- for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = calculateLEIndex(i);
- CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i * 8);
- }
-
- uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
- CurVal |= Value & Mask;
-
- // Write out the fixed up bytes back to the code/data bits.
- for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = calculateLEIndex(i);
- Data[Offset + Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff);
- }
-}
-
-Optional<MCFixupKind> Nios2AsmBackend::getFixupKind(StringRef Name) const {
- return StringSwitch<Optional<MCFixupKind>>(Name)
- .Case("R_NIOS2_NONE", (MCFixupKind)Nios2::fixup_Nios2_32)
- .Case("R_NIOS2_32", FK_Data_4)
- .Default(MCAsmBackend::getFixupKind(Name));
-}
-
-//@getFixupKindInfo {
-const MCFixupKindInfo &
-Nios2AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[Nios2::NumTargetFixupKinds] = {
- // This table *must* be in same the order of fixup_* kinds in
- // Nios2FixupKinds.h.
- //
- // name offset bits flags
- {"fixup_Nios2_32", 0, 32, 0},
- {"fixup_Nios2_HI16", 0, 16, 0},
- {"fixup_Nios2_LO16", 0, 16, 0}};
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
-}
-
-std::unique_ptr<MCObjectTargetWriter>
-Nios2AsmBackend::createObjectTargetWriter() const {
- return createNios2ELFObjectWriter(MCELFObjectTargetWriter::getOSABI(OSType));
-}
-
-bool Nios2AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
- return true;
-}
-
-// MCAsmBackend
-MCAsmBackend *llvm::createNios2AsmBackend(const Target &T,
- const MCSubtargetInfo &STI,
- const MCRegisterInfo &MRI,
- const MCTargetOptions &Options) {
- return new Nios2AsmBackend(T, STI.getTargetTriple().getOS());
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h
deleted file mode 100644
index 1f114bd869b1..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h
+++ /dev/null
@@ -1,81 +0,0 @@
-//===-- Nios2AsmBackend.h - Nios2 Asm Backend ----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Nios2AsmBackend class.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2ASMBACKEND_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2ASMBACKEND_H
-
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmBackend.h"
-
-namespace llvm {
-
-class MCAssembler;
-struct MCFixupKindInfo;
-class Target;
-class MCObjectWriter;
-
-class Nios2AsmBackend : public MCAsmBackend {
- Triple::OSType OSType;
-
-public:
- Nios2AsmBackend(const Target &T, Triple::OSType OSType)
- : MCAsmBackend(support::little), OSType(OSType) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override;
-
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
-
- void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsResolved) const override;
-
- Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
-
- unsigned getNumFixupKinds() const override {
- return Nios2::NumTargetFixupKinds;
- }
-
- /// MayNeedRelaxation - Check whether the given instruction may need
- /// relaxation.
- ///
- /// \param Inst - The instruction to test.
- bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
-
- /// fixupNeedsRelaxation - Target specific predicate for whether a given
- /// fixup requires the associated instruction to be relaxed.
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
- // FIXME.
- llvm_unreachable("RelaxInstruction() unimplemented");
- return false;
- }
-
- /// RelaxInstruction - Relax the instruction in the given fragment
- /// to the next wider instruction.
- ///
- /// \param Inst - The instruction to relax, which may be the same
- /// as the output.
- /// \param [out] Res On return, the relaxed instruction.
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {}
-
-}; // class Nios2AsmBackend
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h
deleted file mode 100644
index 225671ebc8d8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- Nios2BaseInfo.h - Top level definitions for NIOS2 MC ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the Nios2 target useful for the compiler back-end and the MC libraries.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2BASEINFO_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2BASEINFO_H
-
-namespace llvm {
-
-/// Nios2FG - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-namespace Nios2FG {
-/// Target Operand Flag enum.
-enum TOF {
- //===------------------------------------------------------------------===//
- // Nios2 Specific MachineOperand flags.
-
- MO_NO_FLAG,
-
- /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
- /// address.
- MO_ABS_HI,
- MO_ABS_LO,
-
-};
-} // namespace Nios2FG
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp
deleted file mode 100644
index db432d15120d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- Nios2ELFObjectWriter.cpp - Nios2 ELF Writer -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "MCTargetDesc/Nios2MCExpr.h"
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCObjectWriter.h"
-
-using namespace llvm;
-
-namespace {
-class Nios2ELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- Nios2ELFObjectWriter(uint8_t OSABI)
- : MCELFObjectTargetWriter(false, OSABI, ELF::EM_ALTERA_NIOS2, false) {}
-
- ~Nios2ELFObjectWriter() override;
-
- unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
- const MCFixup &Fixup, bool IsPCRel) const override;
-};
-} // namespace
-
-Nios2ELFObjectWriter::~Nios2ELFObjectWriter() {}
-
-unsigned Nios2ELFObjectWriter::getRelocType(MCContext &Ctx,
- const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- return 0;
-}
-
-std::unique_ptr<MCObjectTargetWriter>
-llvm::createNios2ELFObjectWriter(uint8_t OSABI) {
- return llvm::make_unique<Nios2ELFObjectWriter>(OSABI);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h
deleted file mode 100644
index c169a1b19371..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- Nios2FixupKinds.h - Nios2 Specific Fixup Entries --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2FIXUPKINDS_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2FIXUPKINDS_H
-
-#include "llvm/MC/MCFixup.h"
-
-namespace llvm {
-namespace Nios2 {
-// Although most of the current fixup types reflect a unique relocation
-// one can have multiple fixup types for a given relocation and thus need
-// to be uniquely named.
-//
-// This table *must* be in the save order of
-// MCFixupKindInfo Infos[Nios2::NumTargetFixupKinds]
-// in Nios2AsmBackend.cpp.
-enum Fixups {
- // Pure upper 32 bit fixup resulting in - R_NIOS2_32.
- fixup_Nios2_32 = FirstTargetFixupKind,
-
- // Pure upper 16 bit fixup resulting in - R_NIOS2_HI16.
- fixup_Nios2_HI16,
-
- // Pure lower 16 bit fixup resulting in - R_NIOS2_LO16.
- fixup_Nios2_LO16,
-
- // Marker
- LastTargetFixupKind,
- NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-};
-} // namespace Nios2
-} // namespace llvm
-
-#endif // LLVM_NIOS2_NIOS2FIXUPKINDS_H
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp
deleted file mode 100644
index e3c66e6776c2..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-//===-- Nios2MCAsmInfo.cpp - Nios2 Asm Properties -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the Nios2MCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MCAsmInfo.h"
-
-#include "llvm/ADT/Triple.h"
-
-using namespace llvm;
-
-void Nios2MCAsmInfo::anchor() {}
-
-Nios2MCAsmInfo::Nios2MCAsmInfo(const Triple &TheTriple) {
- if ((TheTriple.getArch() == Triple::nios2))
- IsLittleEndian = true; // the default of IsLittleEndian is true
-
- AlignmentIsInBytes = false;
- Data16bitsDirective = "\t.2byte\t";
- Data32bitsDirective = "\t.4byte\t";
- Data64bitsDirective = "\t.8byte\t";
- PrivateLabelPrefix = ".LC";
- CommentString = "#";
- ZeroDirective = "\t.space\t";
- GPRel32Directive = "\t.gpword\t";
- GPRel64Directive = "\t.gpdword\t";
- WeakRefDirective = "\t.weak\t";
- GlobalDirective = "\t.global\t";
- AscizDirective = "\t.string\t";
- UseAssignmentForEHBegin = true;
-
- SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::DwarfCFI;
- DwarfRegNumForCFI = true;
- UsesELFSectionDirectiveForBSS = true;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h
deleted file mode 100644
index 0c81276f84d8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- Nios2MCAsmInfo.h - Nios2 Asm Info ----------------------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the Nios2MCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCASMINFO_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCASMINFO_H
-
-#include "llvm/MC/MCAsmInfoELF.h"
-
-namespace llvm {
-class Triple;
-
-class Nios2MCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
-
-public:
- explicit Nios2MCAsmInfo(const Triple &TheTriple);
-};
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp
deleted file mode 100644
index 0f12c9e93378..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//===-- Nios2MCExpr.cpp - Nios2 specific MC expression classes ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-
-#include "Nios2MCExpr.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2mcexpr"
-
-const Nios2MCExpr *Nios2MCExpr::create(Nios2MCExpr::Nios2ExprKind Kind,
- const MCExpr *Expr, MCContext &Ctx) {
- return new (Ctx) Nios2MCExpr(Kind, Expr);
-}
-
-const Nios2MCExpr *Nios2MCExpr::create(const MCSymbol *Symbol,
- Nios2MCExpr::Nios2ExprKind Kind,
- MCContext &Ctx) {
- const MCSymbolRefExpr *MCSym =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
- return new (Ctx) Nios2MCExpr(Kind, MCSym);
-}
-
-void Nios2MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
-
- switch (Kind) {
- case CEK_None:
- case CEK_Special:
- llvm_unreachable("CEK_None and CEK_Special are invalid");
- break;
- case CEK_ABS_HI:
- OS << "%hiadj";
- break;
- case CEK_ABS_LO:
- OS << "%lo";
- break;
- }
-
- OS << '(';
- Expr->print(OS, MAI, true);
- OS << ')';
-}
-
-bool Nios2MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout,
- const MCFixup *Fixup) const {
- return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
-}
-
-void Nios2MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
- Streamer.visitUsedExpr(*getSubExpr());
-}
-
-void Nios2MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
- switch (getKind()) {
- case CEK_None:
- case CEK_Special:
- llvm_unreachable("CEK_None and CEK_Special are invalid");
- break;
- case CEK_ABS_HI:
- case CEK_ABS_LO:
- break;
- }
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h
deleted file mode 100644
index 5b49005eb648..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Nios2MCExpr.h - Nios2 specific MC expression classes ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCEXPR_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCEXPR_H
-
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCValue.h"
-
-namespace llvm {
-
-class Nios2MCExpr : public MCTargetExpr {
-public:
- enum Nios2ExprKind {
- CEK_None,
- CEK_ABS_HI,
- CEK_ABS_LO,
- CEK_Special,
- };
-
-private:
- const Nios2ExprKind Kind;
- const MCExpr *Expr;
-
- explicit Nios2MCExpr(Nios2ExprKind Kind, const MCExpr *Expr)
- : Kind(Kind), Expr(Expr) {}
-
-public:
- static const Nios2MCExpr *create(Nios2ExprKind Kind, const MCExpr *Expr,
- MCContext &Ctx);
- static const Nios2MCExpr *create(const MCSymbol *Symbol,
- Nios2MCExpr::Nios2ExprKind Kind,
- MCContext &Ctx);
-
- /// Get the kind of this expression.
- Nios2ExprKind getKind() const { return Kind; }
-
- /// Get the child of this expression.
- const MCExpr *getSubExpr() const { return Expr; }
-
- void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
- bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
- const MCFixup *Fixup) const override;
- void visitUsedExpr(MCStreamer &Streamer) const override;
- MCFragment *findAssociatedFragment() const override {
- return getSubExpr()->findAssociatedFragment();
- }
-
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp
deleted file mode 100644
index e57b44d3cfdc..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-//===-- Nios2MCTargetDesc.cpp - Nios2 Target Descriptions -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MCTargetDesc.h"
-#include "InstPrinter/Nios2InstPrinter.h"
-#include "Nios2MCAsmInfo.h"
-#include "Nios2TargetStreamer.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_MC_DESC
-#include "Nios2GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "Nios2GenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "Nios2GenRegisterInfo.inc"
-
-static MCInstrInfo *createNios2MCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitNios2MCInstrInfo(X); // defined in Nios2GenInstrInfo.inc
- return X;
-}
-
-static MCRegisterInfo *createNios2MCRegisterInfo(const Triple &TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitNios2MCRegisterInfo(X, Nios2::R15); // defined in Nios2GenRegisterInfo.inc
- return X;
-}
-
-static MCSubtargetInfo *
-createNios2MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- if (CPU.empty() || CPU == "generic")
- CPU = "nios2r1";
- return createNios2MCSubtargetInfoImpl(TT, CPU, FS);
- // createNios2MCSubtargetInfoImpl defined in Nios2GenSubtargetInfo.inc
-}
-
-static MCAsmInfo *createNios2MCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
- MCAsmInfo *MAI = new Nios2MCAsmInfo(TT);
-
- unsigned SP = MRI.getDwarfRegNum(Nios2::SP, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
- MAI->addInitialFrameState(Inst);
-
- return MAI;
-}
-
-static MCInstPrinter *createNios2MCInstPrinter(const Triple &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI) {
- return new Nios2InstPrinter(MAI, MII, MRI);
-}
-
-static MCTargetStreamer *createNios2AsmTargetStreamer(MCStreamer &S,
- formatted_raw_ostream &OS,
- MCInstPrinter *InstPrint,
- bool isVerboseAsm) {
- return new Nios2TargetAsmStreamer(S, OS);
-}
-
-extern "C" void LLVMInitializeNios2TargetMC() {
- Target *T = &getTheNios2Target();
-
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(*T, createNios2MCAsmInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(*T, createNios2MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(*T, createNios2MCRegisterInfo);
-
- // Register the asm target streamer.
- TargetRegistry::RegisterAsmTargetStreamer(*T, createNios2AsmTargetStreamer);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(*T, createNios2MCSubtargetInfo);
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(*T, createNios2MCInstPrinter);
-
- // Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(*T, createNios2AsmBackend);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h
deleted file mode 100644
index a7c4b16c6a3b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- Nios2MCTargetDesc.h - Nios2 Target Descriptions ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H
-
-#include <memory>
-
-namespace llvm {
-class MCAsmBackend;
-class MCObjectTargetWriter;
-class MCRegisterInfo;
-class MCSubtargetInfo;
-class MCTargetOptions;
-class Target;
-class Triple;
-class StringRef;
-class raw_pwrite_stream;
-
-Target &getTheNios2Target();
-
-MCAsmBackend *createNios2AsmBackend(const Target &T, const MCSubtargetInfo &STI,
- const MCRegisterInfo &MRI,
- const MCTargetOptions &Options);
-
-std::unique_ptr<MCObjectTargetWriter> createNios2ELFObjectWriter(uint8_t OSABI);
-
-} // namespace llvm
-
-// Defines symbolic names for Nios2 registers. This defines a mapping from
-// register name to register number.
-#define GET_REGINFO_ENUM
-#include "Nios2GenRegisterInfo.inc"
-
-// Defines symbolic names for the Nios2 instructions.
-#define GET_INSTRINFO_ENUM
-#include "Nios2GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "Nios2GenSubtargetInfo.inc"
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp
deleted file mode 100644
index 795fd0084aa3..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- Nios2TargetStreamer.cpp - Nios2 Target Streamer Methods -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target streamer methods.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetStreamer.h"
-
-using namespace llvm;
-
-Nios2TargetStreamer::Nios2TargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
-
-Nios2TargetAsmStreamer::Nios2TargetAsmStreamer(MCStreamer &S,
- formatted_raw_ostream &OS)
- : Nios2TargetStreamer(S) {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2.h b/contrib/llvm/lib/Target/Nios2/Nios2.h
deleted file mode 100644
index d6c5c1e49662..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===-- Nios2.h - Top-level interface for Nios2 representation --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in
-// the LLVM Nios2 back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2_H
-
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-class FunctionPass;
-class formatted_raw_ostream;
-class Nios2TargetMachine;
-class AsmPrinter;
-class MachineInstr;
-class MCInst;
-
-FunctionPass *createNios2ISelDag(Nios2TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
-void LowerNios2MachineInstToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP);
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2.td b/contrib/llvm/lib/Target/Nios2/Nios2.td
deleted file mode 100644
index 1acf4c70c42c..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2.td
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- Nios2.td - Describe the Nios2 Target Machine -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-include "Nios2RegisterInfo.td"
-include "Nios2Schedule.td"
-include "Nios2InstrInfo.td"
-include "Nios2CallingConv.td"
-
-//===----------------------------------------------------------------------===//
-// Nios2 Subtarget features
-//===----------------------------------------------------------------------===//
-def FeatureNios2r1 : SubtargetFeature<"nios2r1", "Nios2ArchVersion",
- "Nios2r1", "Nios2 R1 ISA Support">;
-def FeatureNios2r2 : SubtargetFeature<"nios2r2", "Nios2ArchVersion",
- "Nios2r2", "Nios2 R2 ISA Support">;
-
-//===----------------------------------------------------------------------===//
-// Nios2 processors supported.
-//===----------------------------------------------------------------------===//
-
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, Nios2GenericItineraries, Features>;
-
-def : Proc<"nios2r1", [FeatureNios2r1]>;
-def : Proc<"nios2r2", [FeatureNios2r2]>;
-
-def Nios2InstrInfo : InstrInfo;
-
-def Nios2AsmParser : AsmParser {
- let ShouldEmitMatchRegisterName = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Declare the target which we are implementing
-//===----------------------------------------------------------------------===//
-
-def Nios2AsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- int PassSubtarget = 1;
- int Variant = 0;
-}
-
-def Nios2 : Target {
-// def Nios2InstrInfo : InstrInfo as before.
- let InstructionSet = Nios2InstrInfo;
- let AssemblyParsers = [Nios2AsmParser];
- let AssemblyWriters = [Nios2AsmWriter];
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp b/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp
deleted file mode 100644
index 1abf19591774..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- Nios2AsmPrinter.cpp - Nios2 LLVM Assembly Printer -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format NIOS2 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#include "InstPrinter/Nios2InstPrinter.h"
-#include "MCTargetDesc/Nios2BaseInfo.h"
-#include "Nios2.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-asm-printer"
-
-namespace {
-
-class Nios2AsmPrinter : public AsmPrinter {
-
-public:
- explicit Nios2AsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {}
-
- StringRef getPassName() const override { return "Nios2 Assembly Printer"; }
-
- //- EmitInstruction() must exists or will have run time error.
- void EmitInstruction(const MachineInstr *MI) override;
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void EmitFunctionEntryLabel() override;
-};
-} // namespace
-
-//- EmitInstruction() must exists or will have run time error.
-void Nios2AsmPrinter::EmitInstruction(const MachineInstr *MI) {
-
- // Print out both ordinary instruction and boudle instruction
- MachineBasicBlock::const_instr_iterator I = MI->getIterator();
- MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
-
- do {
-
- if (I->isPseudo()) {
- llvm_unreachable("Pseudo opcode found in EmitInstruction()");
- }
-
- MCInst TmpInst0;
- LowerNios2MachineInstToMCInst(&*I, TmpInst0, *this);
- EmitToStreamer(*OutStreamer, TmpInst0);
- } while ((++I != E) && I->isInsideBundle()); // Delay slot check
-}
-
-// .type main,@function
-//-> .ent main # @main
-// main:
-void Nios2AsmPrinter::EmitFunctionEntryLabel() {
- OutStreamer->EmitLabel(CurrentFnSym);
-}
-
-// Print out an operand for an inline asm expression.
-bool Nios2AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
- printOperand(MI, OpNum, O);
- return false;
-}
-
-bool Nios2AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNum, unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier
-
- const MachineOperand &MO = MI->getOperand(OpNum);
- assert(MO.isReg() && "unexpected inline asm memory operand");
- O << "($" << Nios2InstPrinter::getRegisterName(MO.getReg()) << ")";
-
- return false;
-}
-
-void Nios2AsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(opNum);
- bool closeP = false;
-
- if (MO.getTargetFlags())
- closeP = true;
-
- switch (MO.getTargetFlags()) {
- case Nios2FG::MO_ABS_HI:
- O << "%hiadj(";
- break;
- case Nios2FG::MO_ABS_LO:
- O << "%lo(";
- break;
- }
-
- switch (MO.getType()) {
- case MachineOperand::MO_Register:
- O << '$'
- << StringRef(Nios2InstPrinter::getRegisterName(MO.getReg())).lower();
- break;
-
- case MachineOperand::MO_Immediate:
- O << MO.getImm();
- break;
-
- case MachineOperand::MO_MachineBasicBlock:
- MO.getMBB()->getSymbol()->print(O, MAI);
- return;
-
- case MachineOperand::MO_GlobalAddress:
- getSymbol(MO.getGlobal())->print(O, MAI);
- break;
-
- case MachineOperand::MO_BlockAddress:
- O << GetBlockAddressSymbol(MO.getBlockAddress())->getName();
- break;
-
- case MachineOperand::MO_ExternalSymbol:
- O << MO.getSymbolName();
- break;
-
- default:
- llvm_unreachable("<unknown operand type>");
- }
-
- if (closeP)
- O << ")";
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeNios2AsmPrinter() {
- RegisterAsmPrinter<Nios2AsmPrinter> X(getTheNios2Target());
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td b/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td
deleted file mode 100644
index f0b172f8422d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- Nios2CallingConv.td - Calling Conventions for Nios2 -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This describes the calling conventions for Nios2 architecture.
-//===----------------------------------------------------------------------===//
-
-/// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>:
- CCIf<!strconcat("State.getTarget().getSubtarget<Nios2Subtarget>().", F), A>;
-
-def CC_Nios2 : CallingConv<[
- // i32 f32 arguments get passed in integer registers if there is space.
- CCIfType<[i32, f32], CCAssignToReg<[R4, R5, R6, R7]>>,
-
- // Alternatively, they are assigned to the stack in 4-byte aligned units.
- CCAssignToStack<4, 4>
-]>;
-
-def RetCC_Nios2EABI : CallingConv<[
- // i32 are returned in registers R2, R3
- CCIfType<[i32], CCAssignToReg<[R2, R3]>>,
- // In case of floating point (FPH2 instr.) also use the same register set
- CCIfType<[f32], CCAssignToReg<[R2, R3]>>,
- CCIfByVal<CCPassByVal<4, 4>>,
- // Stack parameter slots for i32 is 32-bit words and 4-byte aligned.
- CCIfType<[i32], CCAssignToStack<4, 4>>
-]>;
-
-def CSR : CalleeSavedRegs<(add RA, FP, (sequence "R%u", 16, 23))>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp b/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp
deleted file mode 100644
index 6fb28a6fd638..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- Nios2FrameLowering.cpp - Nios2 Frame Information ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2FrameLowering.h"
-
-#include "Nios2Subtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-
-using namespace llvm;
-
-bool Nios2FrameLowering::hasFP(const MachineFunction &MF) const { return true; }
-
-void Nios2FrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
-
-void Nios2FrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h b/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h
deleted file mode 100644
index 4ffb01dda36a..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- Nios2FrameLowering.h - Define frame lowering for Nios2 --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2FRAMELOWERING_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2FRAMELOWERING_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-
-namespace llvm {
-class Nios2Subtarget;
-
-class Nios2FrameLowering : public TargetFrameLowering {
-protected:
- const Nios2Subtarget &STI;
-
-public:
- explicit Nios2FrameLowering(const Nios2Subtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0, 4),
- STI(sti) {}
-
- bool hasFP(const MachineFunction &MF) const override;
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp
deleted file mode 100644
index 5f9679466115..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//===-- Nios2ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Nios2 ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the NIOS2 target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-isel"
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Nios2DAGToDAGISel - NIOS2 specific code to select NIOS2 machine
-// instructions for SelectionDAG operations.
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class Nios2DAGToDAGISel : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the Nios2 Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const Nios2Subtarget *Subtarget;
-
-public:
- explicit Nios2DAGToDAGISel(Nios2TargetMachine &TM, CodeGenOpt::Level OL)
- : SelectionDAGISel(TM, OL) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- Subtarget = &MF.getSubtarget<Nios2Subtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
- }
-
- void Select(SDNode *N) override;
-
- // Pass Name
- StringRef getPassName() const override {
- return "NIOS2 DAG->DAG Pattern Instruction Selection";
- }
-
-#include "Nios2GenDAGISel.inc"
-};
-} // namespace
-
-// Select instructions not customized! Used for
-// expanded, promoted and normal instructions
-void Nios2DAGToDAGISel::Select(SDNode *Node) {
-
- // If we have a custom node, we already have selected!
- if (Node->isMachineOpcode()) {
- LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
- Node->setNodeId(-1);
- return;
- }
-
- // Select the default instruction
- SelectCode(Node);
-}
-
-FunctionPass *llvm::createNios2ISelDag(Nios2TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new Nios2DAGToDAGISel(TM, OptLevel);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp b/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp
deleted file mode 100644
index 008ce1570722..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===-- Nios2ISelLowering.cpp - Nios2 DAG Lowering Implementation ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the interfaces that Nios2 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2ISelLowering.h"
-#include "Nios2MachineFunction.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "Nios2GenCallingConv.inc"
-
-SDValue
-Nios2TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool IsVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SDLoc &DL, SelectionDAG &DAG) const {
- // CCValAssign - represent the assignment of
- // the return value to a location
- SmallVector<CCValAssign, 16> RVLocs;
- MachineFunction &MF = DAG.getMachineFunction();
-
- // CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
- // Analyze return values.
- CCInfo.CheckReturn(Outs, RetCC_Nios2EABI);
-
- SDValue Flag;
- SmallVector<SDValue, 4> RetOps(1, Chain);
-
- // Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- SDValue Val = OutVals[i];
- CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
-
- if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
- Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
-
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
-
- // Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
- }
-
- if (Flag.getNode())
- RetOps.push_back(Flag);
-
- return DAG.getNode(Nios2ISD::Ret, DL, MVT::Other, RetOps);
-}
-
-// addLiveIn - This helper function adds the specified physical register to the
-// MachineFunction as a live in value. It also creates a corresponding
-// virtual register for it.
-static unsigned addLiveIn(MachineFunction &MF, unsigned PReg,
- const TargetRegisterClass *RC) {
- unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
- MF.getRegInfo().addLiveIn(PReg, VReg);
- return VReg;
-}
-
-//===----------------------------------------------------------------------===//
-// Formal Arguments Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-// LowerFormalArguments - transform physical registers into virtual registers
-// and generate load operations for arguments places on the stack.
-SDValue Nios2TargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
-
- CCInfo.AnalyzeFormalArguments(Ins, CC_Nios2);
-
- // Used with vargs to acumulate store chains.
- std::vector<SDValue> OutChains;
-
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
-
- EVT ValVT = VA.getValVT();
-
- // Arguments stored on registers
- if (VA.isRegLoc()) {
- MVT RegVT = VA.getLocVT();
- unsigned ArgReg = VA.getLocReg();
- const TargetRegisterClass *RC = getRegClassFor(RegVT);
-
- // Transform the arguments stored on
- // physical registers into virtual ones
- unsigned Reg = addLiveIn(MF, ArgReg, RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it has been passed promoted
- // to 32 bits. Insert an assert[sz]ext to capture this, then
- // truncate to the right size.
- if (VA.getLocInfo() != CCValAssign::Full) {
- unsigned Opcode = 0;
- if (VA.getLocInfo() == CCValAssign::SExt)
- Opcode = ISD::AssertSext;
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- Opcode = ISD::AssertZext;
- if (Opcode)
- ArgValue =
- DAG.getNode(Opcode, DL, RegVT, ArgValue, DAG.getValueType(ValVT));
- ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
- }
-
- // Handle floating point arguments passed in integer registers.
- if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
- (RegVT == MVT::i64 && ValVT == MVT::f64))
- ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
- InVals.push_back(ArgValue);
- } else { // VA.isRegLoc()
- MVT LocVT = VA.getLocVT();
-
- // sanity check
- assert(VA.isMemLoc());
-
- // The stack pointer offset is relative to the caller stack frame.
- int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
- VA.getLocMemOffset(), true);
-
- // Create load nodes to retrieve arguments from the stack
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue Load = DAG.getLoad(
- LocVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- InVals.push_back(Load);
- OutChains.push_back(Load.getValue(1));
- }
- }
- if (!OutChains.empty()) {
- OutChains.push_back(Chain);
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
- }
-
- return Chain;
-}
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation
-//===----------------------------------------------------------------------===//
-
-Nios2TargetLowering::Nios2TargetLowering(const TargetMachine &TM,
- const Nios2Subtarget &STI)
- : TargetLowering(TM), Subtarget(&STI) {
-
- addRegisterClass(MVT::i32, &Nios2::CPURegsRegClass);
- computeRegisterProperties(Subtarget->getRegisterInfo());
-}
-
-const char *Nios2TargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- case Nios2ISD::Hi:
- return "Nios2ISD::Hi";
- case Nios2ISD::Lo:
- return "Nios2ISD::Lo";
- case Nios2ISD::Ret:
- return "Nios2ISD::Ret";
- }
- return nullptr;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h b/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h
deleted file mode 100644
index c3c8179054bb..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h
+++ /dev/null
@@ -1,63 +0,0 @@
-//===-- Nios2ISelLowering.h - Nios2 DAG Lowering Interface ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Nios2 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2ISELLOWERING_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2ISELLOWERING_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetLowering.h"
-
-namespace llvm {
-class Nios2Subtarget;
-
-namespace Nios2ISD {
-enum NodeType {
- // Start the numbering from where ISD NodeType finishes.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // Get the Higher 16 bits from a 32-bit immediate
- // No relation with Nios2 Hi register
- Hi,
- // Get the Lower 16 bits from a 32-bit immediate
- // No relation with Nios2 Lo register
- Lo,
- // Return
- Ret
-};
-}
-
-class Nios2TargetLowering : public TargetLowering {
- const Nios2Subtarget *Subtarget;
-
-public:
- Nios2TargetLowering(const TargetMachine &TM, const Nios2Subtarget &STI);
-
- /// getTargetNodeName - This method returns the name of a target specific
- // DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
- SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
- bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const override;
-
- SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
- SelectionDAG &DAG) const override;
-};
-} // end namespace llvm
-
-#endif // NIOS2_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td b/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td
deleted file mode 100644
index f57bf03bba3c..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td
+++ /dev/null
@@ -1,235 +0,0 @@
-//===-- Nios2InstrFormats.td - Nios2 Instruction Formats ---*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Describe NIOS2 instructions format
-//
-//
-//===----------------------------------------------------------------------===//
-
-// Format specifies the encoding used by the instruction. This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<6> val> {
- bits<6> Value = val;
-}
-
-def Pseudo : Format<0>;
-// Nios2 R1 instr formats:
-def FrmI : Format<1>;
-def FrmR : Format<2>;
-def FrmJ : Format<3>;
-def FrmOther : Format<4>; // Instruction w/ a custom format
-// Nios2 R2 instr 32-bit formats:
-def FrmL26 : Format<5>; // corresponds to J format in R1
-def FrmF2I16 : Format<6>; // corresponds to I format in R1
-def FrmF2X4I12 : Format<7>;
-def FrmF1X4I12 : Format<8>;
-def FrmF1X4L17 : Format<9>;
-def FrmF3X6L5 : Format<10>; // corresponds to R format in R1
-def FrmF2X6L10 : Format<11>;
-def FrmF3X6 : Format<12>; // corresponds to R format in R1
-def FrmF3X8 : Format<13>; // corresponds to custom format in R1
-// Nios2 R2 instr 16-bit formats:
-def FrmI10 : Format<14>;
-def FrmT1I7 : Format<15>;
-def FrmT2I4 : Format<16>;
-def FrmT1X1I6 : Format<17>;
-def FrmX1I7 : Format<18>;
-def FrmL5I4X1 : Format<19>;
-def FrmT2X1L3 : Format<20>;
-def FrmT2X1I3 : Format<21>;
-def FrmT3X1 : Format<22>;
-def FrmT2X3 : Format<23>;
-def FrmF1X1 : Format<24>;
-def FrmX2L5 : Format<25>;
-def FrmF1I5 : Format<26>;
-def FrmF2 : Format<27>;
-
-//===----------------------------------------------------------------------===//
-// Instruction Predicates:
-//===----------------------------------------------------------------------===//
-
-def isNios2r1 : Predicate<"Subtarget->isNios2r1()">;
-def isNios2r2 : Predicate<"Subtarget->isNios2r2()">;
-
-class PredicateControl {
- // Predicates related to specific target CPU features
- list<Predicate> FeaturePredicates = [];
- // Predicates for the instruction group membership in given ISA
- list<Predicate> InstrPredicates = [];
-
- list<Predicate> Predicates = !listconcat(FeaturePredicates, InstrPredicates);
-}
-
-//===----------------------------------------------------------------------===//
-// Base classes for 32-bit, 16-bit and pseudo instructions
-//===----------------------------------------------------------------------===//
-
-class Nios2Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format f>: Instruction,
- PredicateControl {
- field bits<32> Inst;
- Format Form = f;
-
- let Namespace = "Nios2";
- let Size = 4;
-
- bits<6> Opcode = 0;
-
- // Bottom 6 bits are the 'opcode' field
- let Inst{5-0} = Opcode;
-
- let OutOperandList = outs;
- let InOperandList = ins;
-
- let AsmString = asmstr;
- let Pattern = pattern;
- let Itinerary = itin;
-
- // Attributes specific to Nios2 instructions:
-
- // TSFlags layout should be kept in sync with Nios2InstrInfo.h.
- let TSFlags{5-0} = Form.Value;
- let DecoderNamespace = "Nios2";
- field bits<32> SoftFail = 0;
-}
-
-class Nios2Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass Itin = IIPseudo>:
- Nios2Inst32<outs, ins, asmstr, pattern, Itin, Pseudo> {
-
- let isCodeGenOnly = 1;
- let isPseudo = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Base classes for R1 and R2 instructions
-//===----------------------------------------------------------------------===//
-
-class Nios2R1Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format f>:
- Nios2Inst32<outs, ins, asmstr, pattern, itin, f> {
- let DecoderNamespace = "Nios2";
- let InstrPredicates = [isNios2r1];
-}
-
-class Nios2R2Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format f>:
- Nios2Inst32<outs, ins, asmstr, pattern, itin, f> {
- let DecoderNamespace = "Nios2r2";
- let InstrPredicates = [isNios2r2];
-}
-
-//===----------------------------------------------------------------------===//
-// Format I instruction class in Nios2 : <|A|B|immediate|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: Nios2R1Inst32<outs, ins, asmstr,
- pattern, itin, FrmI> {
-
- bits<5> rA;
- bits<5> rB;
- bits<16> imm;
-
- let Opcode = op;
-
- let Inst{31-27} = rA;
- let Inst{26-22} = rB;
- let Inst{21-6} = imm;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Format R instruction : <|A|B|C|opx|imm|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FR<bits<6> opx, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: Nios2R1Inst32<outs, ins, asmstr,
- pattern, itin, FrmR> {
- bits<5> rA;
- bits<5> rB;
- bits<5> rC;
- bits<5> imm = 0;
-
- let Opcode = 0x3a; /* opcode is always 0x3a for R instr. */
-
- let Inst{31-27} = rA;
- let Inst{26-22} = rB;
- let Inst{21-17} = rC;
- let Inst{16-11} = opx; /* opx stands for opcode extension */
- let Inst{10-6} = imm; /* optional 5-bit immediate value */
-}
-
-//===----------------------------------------------------------------------===//
-// Format J instruction class in Nios2 : <|address|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>:
- Nios2R1Inst32<outs, ins, asmstr, pattern, itin, FrmJ> {
- bits<26> addr;
- let Opcode = op;
- let Inst{31-6} = addr;
-}
-
-//===----------------------------------------------------------------------===//
-// Format F3X6 (R2) instruction : <|opx|RSV|C|B|A|opcode|>
-//===----------------------------------------------------------------------===//
-
-class F3X6<bits<6> opx, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>:
- Nios2R2Inst32<outs, ins, asmstr, pattern, itin, FrmF3X6> {
- bits<5> rC;
- bits<5> rB;
- bits<5> rA;
- bits<5> rsv = 0;
-
- let Opcode = 0x20; /* opcode is always 0x20 (OPX group) for F3X6 instr. */
-
- let Inst{31-26} = opx; /* opx stands for opcode extension */
- let Inst{25-21} = rsv;
- let Inst{20-16} = rC;
- let Inst{15-11} = rB;
- let Inst{10-6} = rA;
-}
-
-//===----------------------------------------------------------------------===//
-// Multiclasses for common instructions of both R1 and R2:
-//===----------------------------------------------------------------------===//
-
-// Multiclass for instructions that have R format in R1 and F3X6 format in R2
-// and their opx values differ between R1 and R2
-multiclass CommonInstr_R_F3X6_opx<bits<6> opxR1, bits<6> opxR2, dag outs,
- dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin> {
- def NAME#_R1 : FR<opxR1, outs, ins, asmstr, pattern, itin>;
- def NAME#_R2 : F3X6<opxR2, outs, ins, asmstr, pattern, itin>;
-}
-
-// Multiclass for instructions that have R format in R1 and F3X6 format in R2
-// and their opx values are the same in R1 and R2
-multiclass CommonInstr_R_F3X6<bits<6> opx, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- CommonInstr_R_F3X6_opx<opx, opx, outs, ins, asmstr, pattern, itin>;
-
-// Multiclass for instructions that have I format in R1 and F2I16 format in R2
-// and their op code values differ between R1 and R2
-multiclass CommonInstr_I_F2I16_op<bits<6> opR1, bits<6> opR2, dag outs, dag ins,
- string asmstr, list<dag> pattern,
- InstrItinClass itin> {
- def NAME#_R1 : FI<opR1, outs, ins, asmstr, pattern, itin>;
-}
-
-// Multiclass for instructions that have I format in R1 and F2I16 format in R2
-// and their op code values are the same in R1 and R2
-multiclass CommonInstr_I_F2I16<bits<6> op, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- CommonInstr_I_F2I16_op<op, op, outs, ins, asmstr, pattern, itin>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp
deleted file mode 100644
index 9700cba3595b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- Nios2InstrInfo.cpp - Nios2 Instruction Information ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2InstrInfo.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_CTOR_DTOR
-#include "Nios2GenInstrInfo.inc"
-
-// Pin the vtable to this file.
-void Nios2InstrInfo::anchor() {}
-
-Nios2InstrInfo::Nios2InstrInfo(Nios2Subtarget &ST)
- : Nios2GenInstrInfo(), RI(ST), Subtarget(ST) {}
-
-/// Expand Pseudo instructions into real backend instructions
-bool Nios2InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
- MachineBasicBlock &MBB = *MI.getParent();
-
- switch (MI.getDesc().getOpcode()) {
- default:
- return false;
- case Nios2::RetRA:
- BuildMI(MBB, MI, MI.getDebugLoc(), get(Nios2::RET_R1)).addReg(Nios2::RA);
- break;
- }
-
- MBB.erase(MI);
- return true;
-}
-
-void Nios2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
- unsigned opc = Subtarget.hasNios2r2() ? Nios2::ADD_R2 : Nios2::ADD_R1;
- BuildMI(MBB, I, DL, get(opc))
- .addReg(DestReg, RegState::Define)
- .addReg(Nios2::ZERO)
- .addReg(SrcReg, getKillRegState(KillSrc));
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h
deleted file mode 100644
index 52f6e7e9c7c8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- Nios2InstrInfo.h - Nios2 Instruction Information --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2INSTRINFO_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2INSTRINFO_H
-
-#include "Nios2RegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "Nios2GenInstrInfo.inc"
-
-namespace llvm {
-
-class Nios2Subtarget;
-
-class Nios2InstrInfo : public Nios2GenInstrInfo {
- const Nios2RegisterInfo RI;
- const Nios2Subtarget &Subtarget;
- virtual void anchor();
-
-public:
- explicit Nios2InstrInfo(Nios2Subtarget &ST);
-
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- const Nios2RegisterInfo &getRegisterInfo() const { return RI; };
-
- bool expandPostRAPseudo(MachineInstr &MI) const override;
-
- void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td
deleted file mode 100644
index dee84f74bcbe..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- Nios2InstrInfo.td - Target Description for Nios2 ------*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-include "Nios2InstrFormats.td"
-
-
-//===----------------------------------------------------------------------===//
-// Nios2 Operand, Complex Patterns and Transformations Definitions.
-//===----------------------------------------------------------------------===//
-
-def simm16 : Operand<i32> {
- let DecoderMethod= "DecodeSimm16";
-}
-
-// Node immediate fits as 16-bit sign extended on target immediate.
-// e.g. addi, andi
-def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
-
-// Custom return SDNode
-def Nios2Ret : SDNode<"Nios2ISD::Ret", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
-//===----------------------------------------------------------------------===//
-// Instructions specific format
-//===----------------------------------------------------------------------===//
-
-// Arithmetic and logical instructions with 2 registers and 16-bit immediate
-// value.
-multiclass ArithLogicRegImm16<bits<6> op, string mnemonic, SDNode opNode,
- Operand immOp, PatLeaf immType>:
- CommonInstr_I_F2I16<op, (outs CPURegs:$rB),
- (ins CPURegs:$rA, immOp:$imm),
- !strconcat(mnemonic, "\t$rB, $rA, $imm"),
- [(set CPURegs:$rB,
- (opNode CPURegs:$rA, immType:$imm))],
- IIAlu>;
-
-// Arithmetic and logical instructions with 3 register operands.
-// Defines R1 and R2 instruction at the same time.
-multiclass ArithLogicReg<bits<6> opx, string mnemonic,
- SDNode opNode>:
- CommonInstr_R_F3X6<opx, (outs CPURegs:$rC),
- (ins CPURegs:$rA, CPURegs:$rB),
- !strconcat(mnemonic, "\t$rC, $rA, $rB"),
- [(set CPURegs:$rC, (opNode CPURegs:$rA, CPURegs:$rB))],
- IIAlu>;
-
-multiclass Return<bits<6> opx, dag outs, dag ins, string mnemonic> {
- let rB = 0, rC = 0,
- isReturn = 1,
- isCodeGenOnly = 1,
- hasCtrlDep = 1,
- hasExtraSrcRegAllocReq = 1 in {
- defm NAME# : CommonInstr_R_F3X6<opx, outs, ins, mnemonic, [], IIBranch>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Nios2 Instructions
-//===----------------------------------------------------------------------===//
-
-/// Arithmetic instructions operating on registers.
-let isCommutable = 1 ,
- isReMaterializable = 1 in {
- defm ADD : ArithLogicReg<0x31, "add", add>;
- defm AND : ArithLogicReg<0x0e, "and", and>;
- defm OR : ArithLogicReg<0x16, "or", or>;
- defm XOR : ArithLogicReg<0x1e, "xor", xor>;
- defm MUL : ArithLogicReg<0x27, "mul", mul>;
-}
-
-let isReMaterializable = 1 in {
- defm SUB : ArithLogicReg<0x39, "sub", sub>;
-}
-
-defm DIVU : ArithLogicReg<0x24, "divu", udiv>;
-defm DIV : ArithLogicReg<0x25, "div", sdiv>;
-
-defm SLL : ArithLogicReg<0x13, "sll", shl>;
-defm SRL : ArithLogicReg<0x1b, "srl", srl>;
-defm SRA : ArithLogicReg<0x3b, "sra", sra>;
-
-/// Arithmetic Instructions (ALU Immediate)
-defm ADDI : ArithLogicRegImm16<0x04, "addi", add, simm16, immSExt16>;
-
-// Returns:
-defm RET : Return<0x05, (outs), (ins CPURegs:$rA), "ret">;
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions
-//===----------------------------------------------------------------------===//
-
-// Return RA.
-let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : Nios2Pseudo<(outs), (ins), "", [(Nios2Ret)]>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp b/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp
deleted file mode 100644
index c43af879b8a6..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===-- Nios2MCInstLower.cpp - Convert Nios2 MachineInstr to MCInst -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower Nios2 MachineInstrs to their corresponding
-// MCInst records.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/Nios2BaseInfo.h"
-#include "MCTargetDesc/Nios2MCExpr.h"
-#include "Nios2.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-
-using namespace llvm;
-
-static MCOperand LowerSymbolOperand(const MachineOperand &MO, AsmPrinter &AP) {
- MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
- Nios2MCExpr::Nios2ExprKind TargetKind = Nios2MCExpr::CEK_None;
- const MCSymbol *Symbol;
-
- switch (MO.getTargetFlags()) {
- default:
- llvm_unreachable("Invalid target flag!");
- case Nios2FG::MO_NO_FLAG:
- break;
- case Nios2FG::MO_ABS_HI:
- TargetKind = Nios2MCExpr::CEK_ABS_HI;
- break;
- case Nios2FG::MO_ABS_LO:
- TargetKind = Nios2MCExpr::CEK_ABS_LO;
- break;
- }
-
- switch (MO.getType()) {
- case MachineOperand::MO_GlobalAddress:
- Symbol = AP.getSymbol(MO.getGlobal());
- break;
-
- case MachineOperand::MO_MachineBasicBlock:
- Symbol = MO.getMBB()->getSymbol();
- break;
-
- case MachineOperand::MO_BlockAddress:
- Symbol = AP.GetBlockAddressSymbol(MO.getBlockAddress());
- break;
-
- case MachineOperand::MO_ExternalSymbol:
- Symbol = AP.GetExternalSymbolSymbol(MO.getSymbolName());
- break;
-
- case MachineOperand::MO_JumpTableIndex:
- Symbol = AP.GetJTISymbol(MO.getIndex());
- break;
-
- case MachineOperand::MO_ConstantPoolIndex:
- Symbol = AP.GetCPISymbol(MO.getIndex());
- break;
-
- default:
- llvm_unreachable("<unknown operand type>");
- }
-
- const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, AP.OutContext);
-
- if (TargetKind != Nios2MCExpr::CEK_None)
- Expr = Nios2MCExpr::create(TargetKind, Expr, AP.OutContext);
-
- return MCOperand::createExpr(Expr);
-}
-
-static MCOperand LowerOperand(const MachineOperand &MO, AsmPrinter &AP) {
-
- switch (MO.getType()) {
- default:
- llvm_unreachable("unknown operand type");
- case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit())
- break;
- return MCOperand::createReg(MO.getReg());
- case MachineOperand::MO_Immediate:
- return MCOperand::createImm(MO.getImm());
- case MachineOperand::MO_MachineBasicBlock:
- case MachineOperand::MO_ExternalSymbol:
- case MachineOperand::MO_JumpTableIndex:
- case MachineOperand::MO_BlockAddress:
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ConstantPoolIndex:
- return LowerSymbolOperand(MO, AP);
- case MachineOperand::MO_RegisterMask:
- break;
- }
-
- return MCOperand();
-}
-
-void llvm::LowerNios2MachineInstToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP) {
-
- OutMI.setOpcode(MI->getOpcode());
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- MCOperand MCOp = LowerOperand(MO, AP);
-
- if (MCOp.isValid())
- OutMI.addOperand(MCOp);
- }
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp b/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp
deleted file mode 100644
index be5b8829fe36..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===-- Nios2MachineFunctionInfo.cpp - Private data used for Nios2 --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MachineFunction.h"
-
-using namespace llvm;
-
-void Nios2FunctionInfo::anchor() {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h b/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h
deleted file mode 100644
index 73baf9694790..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- Nios2MachineFunctionInfo.h - Private data used for Nios2 --*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of MachineFunctionInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2MACHINEFUNCTION_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2MACHINEFUNCTION_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// Nios2FunctionInfo - This class is derived from MachineFunction private
-/// Nios2 target-specific information for each MachineFunction.
-class Nios2FunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
-
-private:
- unsigned GlobalBaseReg;
-
- /// VarArgsFrameOffset - Frame offset to start of varargs area.
- int VarArgsFrameOffset;
-
- /// SRetReturnReg - Holds the virtual register into which the sret
- /// argument is passed.
- unsigned SRetReturnReg;
-
- /// IsLeafProc - True if the function is a leaf procedure.
- bool IsLeafProc;
-
-public:
- Nios2FunctionInfo()
- : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
- IsLeafProc(false) {}
- explicit Nios2FunctionInfo(MachineFunction &MF)
- : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
- IsLeafProc(false) {}
-
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
- int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
- void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
-
- unsigned getSRetReturnReg() const { return SRetReturnReg; }
- void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
-
- void setLeafProc(bool rhs) { IsLeafProc = rhs; }
- bool isLeafProc() const { return IsLeafProc; }
-};
-
-} // end of namespace llvm
-
-#endif // NIOS2_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp
deleted file mode 100644
index 9b892f917535..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- Nios2RegisterInfo.cpp - Nios2 Register Information -== ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "nios2-reg-info"
-
-#include "Nios2RegisterInfo.h"
-
-#include "Nios2.h"
-#include "Nios2Subtarget.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "Nios2GenRegisterInfo.inc"
-
-using namespace llvm;
-
-Nios2RegisterInfo::Nios2RegisterInfo(const Nios2Subtarget &ST)
- : Nios2GenRegisterInfo(Nios2::RA), Subtarget(ST) {}
-
-const TargetRegisterClass *Nios2RegisterInfo::intRegClass(unsigned Size) const {
- return &Nios2::CPURegsRegClass;
-}
-
-const MCPhysReg *
-Nios2RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_SaveList;
-}
-
-BitVector Nios2RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- static const MCPhysReg ReservedCPURegs[] = {Nios2::ZERO, Nios2::AT, Nios2::SP,
- Nios2::RA, Nios2::PC, Nios2::GP};
- BitVector Reserved(getNumRegs());
-
- for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I)
- Reserved.set(ReservedCPURegs[I]);
-
- return Reserved;
-}
-
-void Nios2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {}
-
-unsigned Nios2RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return Nios2::SP;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h
deleted file mode 100644
index 3658343b1d2e..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- Nios2RegisterInfo.h - Nios2 Register Information Impl ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2REGISTERINFO_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2REGISTERINFO_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "Nios2GenRegisterInfo.inc"
-
-namespace llvm {
-class Nios2Subtarget;
-class TargetInstrInfo;
-class Type;
-
-class Nios2RegisterInfo : public Nios2GenRegisterInfo {
-protected:
- const Nios2Subtarget &Subtarget;
-
-public:
- Nios2RegisterInfo(const Nios2Subtarget &Subtarget);
-
- const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
-
- BitVector getReservedRegs(const MachineFunction &MF) const override;
-
- /// Stack Frame Processing Methods
- void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS = nullptr) const override;
-
- /// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
-
- /// Return GPR register class.
- const TargetRegisterClass *intRegClass(unsigned Size) const;
-};
-
-} // end namespace llvm
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td
deleted file mode 100644
index 1808815816f3..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Nios2RegisterInfo.td - Nios2 Register defs ---------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-// We have bank of 32 registers.
-class Nios2Reg<string n> : Register<n> {
- field bits<5> Num;
- let Namespace = "Nios2";
-}
-
-// Nios2 CPU Registers
-class Nios2GPRReg<bits<5> num, string n> : Nios2Reg<n> {
- let Num = num;
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-
-let Namespace = "Nios2" in {
- // General Purpose Registers
- def ZERO : Nios2GPRReg<0, "zero">, DwarfRegNum<[ 0 ]>;
- def AT : Nios2GPRReg<1, "at">, DwarfRegNum<[ 1 ]>;
- foreach RegNum = 2 - 23 in {
- def R #RegNum : Nios2GPRReg<RegNum, "r" #RegNum>, DwarfRegNum<[ RegNum ]>;
- }
- def ET : Nios2GPRReg<24, "et">, DwarfRegNum<[ 24 ]>;
- def BT : Nios2GPRReg<25, "bt">, DwarfRegNum<[ 25 ]>;
- def GP : Nios2GPRReg<26, "gp">, DwarfRegNum<[ 26 ]>;
- def SP : Nios2GPRReg<27, "sp">, DwarfRegNum<[ 27 ]>;
- def FP : Nios2GPRReg<28, "fp">, DwarfRegNum<[ 28 ]>;
- def EA : Nios2GPRReg<29, "ea">, DwarfRegNum<[ 29 ]>;
- def BA : Nios2GPRReg<30, "ba">, DwarfRegNum<[ 30 ]>;
- def RA : Nios2GPRReg<31, "ra">, DwarfRegNum<[ 31 ]>;
- def PC : Nios2Reg<"pc">, DwarfRegNum<[ 32 ]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Register Classes
-//===----------------------------------------------------------------------===//
-
-def CPURegs : RegisterClass<"Nios2", [ i32 ], 32,
- (add
- // Reserved
- ZERO,
- AT,
- // Return Values and Arguments
- (sequence "R%u", 2, 7),
- // Not preserved across procedure calls
- // Caller saved
- (sequence "R%u", 8, 15),
- // Callee saved
- (sequence "R%u", 16, 23),
- // Reserved
- ET, BT, GP, SP, FP, EA, BA, RA, PC)>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td b/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td
deleted file mode 100644
index 2d1d9d5e5f3f..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- Nios2Schedule.td - Nios2 Scheduling Definitions ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Functional units across Nios2 chips sets. Based on GCC/Nios2 backend files.
-//===----------------------------------------------------------------------===//
-def ALU : FuncUnit;
-def IMULDIV : FuncUnit;
-
-//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for Nios2
-//===----------------------------------------------------------------------===//
-def IIAlu : InstrItinClass;
-def IILoad : InstrItinClass;
-def IIStore : InstrItinClass;
-def IIFlush : InstrItinClass;
-def IIIdiv : InstrItinClass;
-def IIBranch : InstrItinClass;
-
-def IIPseudo : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// Nios2 Generic instruction itineraries.
-//===----------------------------------------------------------------------===//
-//@ http://llvm.org/docs/doxygen/html/structllvm_1_1InstrStage.html
-def Nios2GenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
- InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
- InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
- InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIFlush , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
- InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>
-]>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp b/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp
deleted file mode 100644
index 196bed20cdcc..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- Nios2Subtarget.cpp - Nios2 Subtarget Information ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Nios2 specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2Subtarget.h"
-#include "Nios2.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-subtarget"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "Nios2GenSubtargetInfo.inc"
-
-void Nios2Subtarget::anchor() {}
-
-Nios2Subtarget::Nios2Subtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM)
- :
-
- // Nios2GenSubtargetInfo will display features by llc -march=nios2
- // -mcpu=help
- Nios2GenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(*this) {}
-
-Nios2Subtarget &Nios2Subtarget::initializeSubtargetDependencies(StringRef CPU,
- StringRef FS) {
- if (TargetTriple.getArch() == Triple::nios2) {
- if (CPU != "nios2r2") {
- CPU = "nios2r1";
- Nios2ArchVersion = Nios2r1;
- } else {
- Nios2ArchVersion = Nios2r2;
- }
- } else {
- errs() << "!!!Error, TargetTriple.getArch() = " << TargetTriple.getArch()
- << "CPU = " << CPU << "\n";
- exit(0);
- }
-
- // Parse features string.
- ParseSubtargetFeatures(CPU, FS);
-
- return *this;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h b/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h
deleted file mode 100644
index a822dff33b5b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- Nios2Subtarget.h - Define Subtarget for the Nios2 -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2SUBTARGET_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2SUBTARGET_H
-
-#include "Nios2FrameLowering.h"
-#include "Nios2ISelLowering.h"
-#include "Nios2InstrInfo.h"
-#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-
-#define GET_SUBTARGETINFO_HEADER
-#include "Nios2GenSubtargetInfo.inc"
-
-namespace llvm {
-class StringRef;
-
-class Nios2TargetMachine;
-
-class Nios2Subtarget : public Nios2GenSubtargetInfo {
- virtual void anchor();
-
-public:
- // Nios2 R2 features
- // Bit manipulation instructions extension
- bool HasBMX;
- // Code Density instructions extension
- bool HasCDX;
- // Multi-Processor instructions extension
- bool HasMPX;
- // New mandatory instructions
- bool HasR2Mandatory;
-
-protected:
- enum Nios2ArchEnum {
- // Nios2 R1 ISA
- Nios2r1,
- // Nios2 R2 ISA
- Nios2r2
- };
-
- // Nios2 architecture version
- Nios2ArchEnum Nios2ArchVersion;
-
- Triple TargetTriple;
-
- Nios2InstrInfo InstrInfo;
- Nios2TargetLowering TLInfo;
- SelectionDAGTargetInfo TSInfo;
- Nios2FrameLowering FrameLowering;
-
-public:
- /// This constructor initializes the data members to match that
- /// of the specified triple.
- Nios2Subtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- bool hasNios2r1() const { return Nios2ArchVersion >= Nios2r1; }
- bool isNios2r1() const { return Nios2ArchVersion == Nios2r1; }
- bool hasNios2r2() const { return Nios2ArchVersion >= Nios2r2; }
- bool isNios2r2() const { return Nios2ArchVersion == Nios2r2; }
-
- Nios2Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
-
- const Nios2InstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- const Nios2RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
- const Nios2TargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
- const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp
deleted file mode 100644
index b7594dde709d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-//===-- Nios2TargetMachine.cpp - Define TargetMachine for Nios2 -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implements the info about Nios2 target spec.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetMachine.h"
-#include "Nios2.h"
-#include "Nios2TargetObjectFile.h"
-
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2"
-
-extern "C" void LLVMInitializeNios2Target() {
- // Register the target.
- RegisterTargetMachine<Nios2TargetMachine> X(getTheNios2Target());
-}
-
-static std::string computeDataLayout() {
- return "e-p:32:32:32-i8:8:32-i16:16:32-n32";
-}
-
-static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
-}
-
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
- Reloc::Model RM, bool JIT) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
-Nios2TargetMachine::Nios2TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- Optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
- : LLVMTargetMachine(
- T, computeDataLayout(), TT, CPU, FS, Options,
- getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL),
- TLOF(make_unique<Nios2TargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this) {
- initAsmInfo();
-}
-
-Nios2TargetMachine::~Nios2TargetMachine() {}
-
-const Nios2Subtarget *
-Nios2TargetMachine::getSubtargetImpl(const Function &F) const {
- Attribute CPUAttr = F.getFnAttribute("target-cpu");
- Attribute FSAttr = F.getFnAttribute("target-features");
-
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
-
- auto &I = SubtargetMap[CPU + FS];
- if (!I) {
- // This needs to be done before we create a new subtarget since any
- // creation will depend on the TM and the code generation flags on the
- // function that reside in TargetOptions.
- resetTargetOptions(F);
- I = llvm::make_unique<Nios2Subtarget>(TargetTriple, CPU, FS, *this);
- }
- return I.get();
-}
-
-namespace {
-/// Nios2 Code Generator Pass Configuration Options.
-class Nios2PassConfig : public TargetPassConfig {
-public:
- Nios2PassConfig(Nios2TargetMachine &TM, PassManagerBase *PM)
- : TargetPassConfig(TM, *PM) {}
-
- Nios2TargetMachine &getNios2TargetMachine() const {
- return getTM<Nios2TargetMachine>();
- }
-
- void addCodeGenPrepare() override;
- bool addInstSelector() override;
- void addIRPasses() override;
-};
-} // namespace
-
-TargetPassConfig *Nios2TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new Nios2PassConfig(*this, &PM);
-}
-
-void Nios2PassConfig::addCodeGenPrepare() {
- TargetPassConfig::addCodeGenPrepare();
-}
-
-void Nios2PassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); }
-
-// Install an instruction selector pass using
-// the ISelDag to gen Nios2 code.
-bool Nios2PassConfig::addInstSelector() {
- addPass(createNios2ISelDag(getNios2TargetMachine(), getOptLevel()));
- return false;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h
deleted file mode 100644
index 1ebfb397383e..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===-- Nios2TargetMachine.h - Define TargetMachine for Nios2 ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H
-
-#include "Nios2Subtarget.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-class Nios2TargetMachine : public LLVMTargetMachine {
- mutable StringMap<std::unique_ptr<Nios2Subtarget>> SubtargetMap;
- std::unique_ptr<TargetLoweringObjectFile> TLOF;
- Nios2Subtarget Subtarget;
-
-public:
- Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT);
- ~Nios2TargetMachine() override;
-
- const Nios2Subtarget *getSubtargetImpl() const { return &Subtarget; }
- const Nios2Subtarget *getSubtargetImpl(const Function &F) const override;
-
- TargetLoweringObjectFile *getObjFileLowering() const override {
- return TLOF.get();
- }
-
- // Pass Pipeline Configuration
- TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp b/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp
deleted file mode 100644
index 5fc85ef487e6..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-//===-- Nios2TargetObjectFile.cpp - Nios2 Object Files --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetObjectFile.h"
-
-using namespace llvm;
-
-void Nios2TargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h
deleted file mode 100644
index e9ed6e31d937..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- llvm/Target/Nios2TargetObjectFile.h - Nios2 Object Info -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETOBJECTFILE_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETOBJECTFILE_H
-
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-
-namespace llvm {
-
-class Nios2TargetObjectFile : public TargetLoweringObjectFileELF {
-public:
- Nios2TargetObjectFile() : TargetLoweringObjectFileELF() {}
-
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h
deleted file mode 100644
index 1520ac27e94f..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- Nios2TargetStreamer.h - Nios2 Target Streamer ----------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETSTREAMER_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETSTREAMER_H
-
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-
-namespace llvm {
-
-class Nios2TargetStreamer : public MCTargetStreamer {
-public:
- Nios2TargetStreamer(MCStreamer &S);
-};
-
-// This part is for ascii assembly output
-class Nios2TargetAsmStreamer : public Nios2TargetStreamer {
-public:
- Nios2TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
-};
-
-} // namespace llvm
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp b/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp
deleted file mode 100644
index d808a96db772..000000000000
--- a/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- Nios2TargetInfo.cpp - Nios2 Target Implementation -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-Target &llvm::getTheNios2Target() {
- static Target TheNios2Target;
- return TheNios2Target;
-}
-
-extern "C" void LLVMInitializeNios2TargetInfo() {
- RegisterTarget<Triple::nios2,
- /*HasJIT=*/true>
- X(getTheNios2Target(), "nios2", "Nios2", "Nios2");
-}
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 56307a84f2e5..8b3480f772e9 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -21,7 +21,6 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
@@ -31,169 +30,7 @@
using namespace llvm;
-static const MCPhysReg RRegs[32] = {
- PPC::R0, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-static const MCPhysReg RRegsNoR0[32] = {
- PPC::ZERO,
- PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-static const MCPhysReg XRegs[32] = {
- PPC::X0, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-static const MCPhysReg XRegsNoX0[32] = {
- PPC::ZERO8,
- PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-static const MCPhysReg FRegs[32] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31
-};
-static const MCPhysReg SPERegs[32] = {
- PPC::S0, PPC::S1, PPC::S2, PPC::S3,
- PPC::S4, PPC::S5, PPC::S6, PPC::S7,
- PPC::S8, PPC::S9, PPC::S10, PPC::S11,
- PPC::S12, PPC::S13, PPC::S14, PPC::S15,
- PPC::S16, PPC::S17, PPC::S18, PPC::S19,
- PPC::S20, PPC::S21, PPC::S22, PPC::S23,
- PPC::S24, PPC::S25, PPC::S26, PPC::S27,
- PPC::S28, PPC::S29, PPC::S30, PPC::S31
-};
-static const MCPhysReg VFRegs[32] = {
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static const MCPhysReg VRegs[32] = {
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-static const MCPhysReg VSRegs[64] = {
- PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
- PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
- PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
- PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
- PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
- PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
- PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
- PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
-
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-static const MCPhysReg VSFRegs[64] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static const MCPhysReg VSSRegs[64] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static unsigned QFRegs[32] = {
- PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
- PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
- PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
- PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
- PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
- PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
- PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
-};
-static const MCPhysReg CRBITRegs[32] = {
- PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
- PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
- PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
- PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
- PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
-};
-static const MCPhysReg CRRegs[8] = {
- PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
- PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
-};
+DEFINE_PPC_REGCLASSES;
// Evaluate an expression containing condition register
// or condition register field symbols. Returns positive
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index db01271b87e1..26869f250823 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -17,6 +17,8 @@
using namespace llvm;
+DEFINE_PPC_REGCLASSES;
+
#define DEBUG_TYPE "ppc-disassembler"
typedef MCDisassembler::DecodeStatus DecodeStatus;
@@ -62,184 +64,9 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
// FIXME: These can be generated by TableGen from the existing register
// encoding values!
-static const unsigned CRRegs[] = {
- PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
- PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
-};
-
-static const unsigned CRBITRegs[] = {
- PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
- PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
- PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
- PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
- PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
-};
-
-static const unsigned FRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31
-};
-
-static const unsigned VFRegs[] = {
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned VRegs[] = {
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-static const unsigned VSRegs[] = {
- PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
- PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
- PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
- PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
- PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
- PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
- PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
- PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
-
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-static const unsigned VSFRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned VSSRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned GPRegs[] = {
- PPC::R0, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-
-static const unsigned GP0Regs[] = {
- PPC::ZERO, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-
-static const unsigned G8Regs[] = {
- PPC::X0, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-
-static const unsigned G80Regs[] = {
- PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-
-static const unsigned QFRegs[] = {
- PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
- PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
- PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
- PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
- PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
- PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
- PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
-};
-
-static const unsigned SPERegs[] = {
- PPC::S0, PPC::S1, PPC::S2, PPC::S3,
- PPC::S4, PPC::S5, PPC::S6, PPC::S7,
- PPC::S8, PPC::S9, PPC::S10, PPC::S11,
- PPC::S12, PPC::S13, PPC::S14, PPC::S15,
- PPC::S16, PPC::S17, PPC::S18, PPC::S19,
- PPC::S20, PPC::S21, PPC::S22, PPC::S23,
- PPC::S24, PPC::S25, PPC::S26, PPC::S27,
- PPC::S28, PPC::S29, PPC::S30, PPC::S31
-};
-
template <std::size_t N>
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
- const unsigned (&Regs)[N]) {
+ const MCPhysReg (&Regs)[N]) {
assert(RegNo < N && "Invalid register number");
Inst.addOperand(MCOperand::createReg(Regs[RegNo]));
return MCDisassembler::Success;
@@ -308,25 +135,25 @@ static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GPRegs);
+ return decodeRegisterClass(Inst, RegNo, RRegs);
}
static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GP0Regs);
+ return decodeRegisterClass(Inst, RegNo, RRegsNoR0);
}
static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, G8Regs);
+ return decodeRegisterClass(Inst, RegNo, XRegs);
}
static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, G80Regs);
+ return decodeRegisterClass(Inst, RegNo, XRegsNoX0);
}
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
@@ -341,7 +168,7 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GPRegs);
+ return decodeRegisterClass(Inst, RegNo, RRegs);
}
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
@@ -388,19 +215,19 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
case PPC::LFSU:
case PPC::LFDU:
// Add the tied output operand.
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
break;
case PPC::STBU:
case PPC::STHU:
case PPC::STWU:
case PPC::STFSU:
case PPC::STFDU:
- Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+ Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
break;
}
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -416,12 +243,12 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
if (Inst.getOpcode() == PPC::LDU)
// Add the tied output operand.
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
else if (Inst.getOpcode() == PPC::STDU)
- Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+ Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -436,7 +263,7 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 4)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -451,7 +278,7 @@ static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 3));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -466,7 +293,7 @@ static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 2));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -481,7 +308,7 @@ static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 1));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index fd7f81591426..fc29e4effbb1 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -499,43 +499,14 @@ bool PPCInstPrinter::showRegistersWithPrefix() const {
return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames;
}
-/// stripRegisterPrefix - This method strips the character prefix from a
-/// register name so that only the number is left.
-static const char *stripRegisterPrefix(const char *RegName) {
- switch (RegName[0]) {
- case 'r':
- case 'f':
- case 'q': // for QPX
- case 'v':
- if (RegName[1] == 's')
- return RegName + 2;
- return RegName + 1;
- case 'c': if (RegName[1] == 'r') return RegName + 2;
- }
-
- return RegName;
-}
-
void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
-
- // There are VSX instructions that use VSX register numbering (vs0 - vs63)
- // as well as those that use VMX register numbering (v0 - v31 which
- // correspond to vs32 - vs63). If we have an instruction that uses VSX
- // numbering, we need to convert the VMX registers to VSX registers.
- // Namely, we print 32-63 when the instruction operates on one of the
- // VMX registers.
- // (Please synchronize with PPCAsmPrinter::printOperand)
- if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) &&
- !ShowVSRNumsAsVR) {
- if (PPCInstrInfo::isVRRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (PPCInstrInfo::isVFRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::VF0);
- }
+ if (!ShowVSRNumsAsVR)
+ Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()),
+ Reg, OpNo);
const char *RegName;
RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg));
@@ -544,7 +515,7 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (showRegistersWithPercentPrefix(RegName))
O << "%";
if (!showRegistersWithPrefix())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName;
return;
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 57bda1403c62..8c15ade6f9c4 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -13,18 +13,13 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "PPCInstrInfo.h"
+#include "PPCMCCodeEmitter.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
@@ -39,117 +34,6 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
-namespace {
-
-class PPCMCCodeEmitter : public MCCodeEmitter {
- const MCInstrInfo &MCII;
- const MCContext &CTX;
- bool IsLittleEndian;
-
-public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), CTX(ctx),
- IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
- PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
- void operator=(const PPCMCCodeEmitter &) = delete;
- ~PPCMCCodeEmitter() override = default;
-
- unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- // getBinaryCodeForInstr - TableGen'erated function for getting the
- // binary encoding for an instruction.
- uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
- unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MCII.get(Opcode);
-
- uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
-
- // Output the constant in big/little endian byte order.
- unsigned Size = Desc.getSize();
- support::endianness E = IsLittleEndian ? support::little : support::big;
- switch (Size) {
- case 0:
- break;
- case 4:
- support::endian::write<uint32_t>(OS, Bits, E);
- break;
- case 8:
- // If we emit a pair of instructions, the first one is
- // always in the top 32 bits, even on little-endian.
- support::endian::write<uint32_t>(OS, Bits >> 32, E);
- support::endian::write<uint32_t>(OS, Bits, E);
- break;
- default:
- llvm_unreachable("Invalid instruction size");
- }
-
- ++MCNumEmitted; // Keep track of the # of mi's emitted.
- }
-
-private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
-};
-
-} // end anonymous namespace
-
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx) {
@@ -264,10 +148,16 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12;
const MCOperand &MO = MI.getOperand(OpNo);
- assert(MO.isImm() && !(MO.getImm() % 16) &&
- "Expecting an immediate that is a multiple of 16");
+ if (MO.isImm()) {
+ assert(!(MO.getImm() % 16) &&
+ "Expecting an immediate that is a multiple of 16");
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+ }
- return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+ // Otherwise add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16ds));
+ return RegBits;
}
unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
@@ -354,6 +244,20 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
}
+// Get the index for this operand in this instruction. This is needed for
+// computing the register number in PPCInstrInfo::getRegNumForOperand() for
+// any instructions that use a different numbering scheme for registers in
+// different operands.
+static unsigned getOpIdxForMO(const MCInst &MI, const MCOperand &MO) {
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ const MCOperand &Op = MI.getOperand(i);
+ if (&Op == &MO)
+ return i;
+ }
+ llvm_unreachable("This operand is not part of this instruction");
+ return ~0U; // Silence any warnings about no return.
+}
+
unsigned PPCMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
@@ -364,14 +268,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- unsigned Reg = MO.getReg();
- unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg);
-
- if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg))
- if (PPCInstrInfo::isVRRegister(Reg))
- Encode += 32;
-
- return Encode;
+ unsigned OpNo = getOpIdxForMO(MI, MO);
+ unsigned Reg =
+ PPCInstrInfo::getRegNumForOperand(MCII.get(MI.getOpcode()),
+ MO.getReg(), OpNo);
+ return CTX.getRegisterInfo()->getEncodingValue(Reg);
}
assert(MO.isImm() &&
@@ -379,5 +280,42 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return MO.getImm();
}
+void PPCMCCodeEmitter::encodeInstruction(
+ const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ verifyInstructionPredicates(MI,
+ computeAvailableFeatures(STI.getFeatureBits()));
+
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+
+ // Output the constant in big/little endian byte order.
+ unsigned Size = getInstSizeInBytes(MI);
+ support::endianness E = IsLittleEndian ? support::little : support::big;
+ switch (Size) {
+ case 0:
+ break;
+ case 4:
+ support::endian::write<uint32_t>(OS, Bits, E);
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ support::endian::write<uint32_t>(OS, Bits >> 32, E);
+ support::endian::write<uint32_t>(OS, Bits, E);
+ break;
+ default:
+ llvm_unreachable("Invalid instruction size");
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+// Get the number of bytes used to encode the given MCInst.
+unsigned PPCMCCodeEmitter::getInstSizeInBytes(const MCInst &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ return Desc.getSize();
+}
+
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
new file mode 100644
index 000000000000..a4bcff4b9450
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -0,0 +1,109 @@
+//===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
+#define LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ const MCInstrInfo &MCII;
+ const MCContext &CTX;
+ bool IsLittleEndian;
+
+public:
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), CTX(ctx),
+ IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+ void operator=(const PPCMCCodeEmitter &) = delete;
+ ~PPCMCCodeEmitter() override = default;
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ // Get the number of bytes used to encode the given MCInst.
+ unsigned getInstSizeInBytes(const MCInst &MI) const;
+
+private:
+ uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+ void verifyInstructionPredicates(const MCInst &MI,
+ uint64_t AvailableFeatures) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 316fd2ccf358..d6e450cba0d7 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -17,6 +17,7 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/MathExtras.h"
#include <cstdint>
#include <memory>
@@ -104,4 +105,63 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
#define GET_SUBTARGETINFO_ENUM
#include "PPCGenSubtargetInfo.inc"
+#define PPC_REGS0_31(X) \
+ { \
+ X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
+ X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \
+ X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \
+ }
+
+#define PPC_REGS_NO0_31(Z, X) \
+ { \
+ Z, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
+ X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \
+ X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \
+ }
+
+#define PPC_REGS_LO_HI(LO, HI) \
+ { \
+ LO##0, LO##1, LO##2, LO##3, LO##4, LO##5, LO##6, LO##7, LO##8, LO##9, \
+ LO##10, LO##11, LO##12, LO##13, LO##14, LO##15, LO##16, LO##17, \
+ LO##18, LO##19, LO##20, LO##21, LO##22, LO##23, LO##24, LO##25, \
+ LO##26, LO##27, LO##28, LO##29, LO##30, LO##31, HI##0, HI##1, HI##2, \
+ HI##3, HI##4, HI##5, HI##6, HI##7, HI##8, HI##9, HI##10, HI##11, \
+ HI##12, HI##13, HI##14, HI##15, HI##16, HI##17, HI##18, HI##19, \
+ HI##20, HI##21, HI##22, HI##23, HI##24, HI##25, HI##26, HI##27, \
+ HI##28, HI##29, HI##30, HI##31 \
+ }
+
+using llvm::MCPhysReg;
+
+#define DEFINE_PPC_REGCLASSES \
+ static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \
+ static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \
+ static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \
+ static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
+ static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
+ static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
+ static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
+ static const MCPhysReg RRegsNoR0[32] = \
+ PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
+ static const MCPhysReg XRegsNoX0[32] = \
+ PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \
+ static const MCPhysReg VSRegs[64] = \
+ PPC_REGS_LO_HI(PPC::VSL, PPC::V); \
+ static const MCPhysReg VSFRegs[64] = \
+ PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+ static const MCPhysReg VSSRegs[64] = \
+ PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+ static const MCPhysReg CRBITRegs[32] = { \
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, \
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, \
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, \
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, \
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \
+ static const MCPhysReg CRRegs[8] = { \
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7}
+
#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
index c6cbb9037ede..17c37964c562 100644
--- a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -111,11 +111,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
(instregex "POPCNT(D|W)$"),
(instregex "CMPB(8)?$"),
+ (instregex "SETB(8)?$"),
XSTDIVDP,
XSTSQRTDP,
XSXSIGDP,
XSCVSPDPN,
- SETB,
BPERMD
)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 80ad4962a20f..98e6e98e6974 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -305,11 +305,11 @@ def : Processor<"generic", G3Itineraries, [Directive32, FeatureHardFloat,
FeatureMFTB]>;
def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureICBT, FeatureBookE,
+ FeatureICBT, FeatureBookE,
FeatureMSYNC, FeatureMFTB]>;
def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureICBT, FeatureBookE,
+ FeatureICBT, FeatureBookE,
FeatureMSYNC, FeatureMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601, FeatureFPU]>;
def : Processor<"602", G3Itineraries, [Directive602, FeatureFPU,
@@ -348,7 +348,7 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE,
FeatureMFTB]>;
def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
- FeatureFRES, FeatureFRSQRTE,
+ FeatureFRES, FeatureFRSQRTE,
FeatureMFTB]>;
def : ProcessorModel<"970", G5Model,
@@ -369,11 +369,11 @@ def : ProcessorModel<"e500", PPCE500Model,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc,
- FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
- FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"a2", PPCA2Model,
[DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
@@ -428,7 +428,7 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
+def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
FeatureMFTB]>;
def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
@@ -478,3 +478,9 @@ def PPC : Target {
let AssemblyParserVariants = [PPCAsmParserVariant];
let AllowRegisterRenaming = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "PPCPfmCounters.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a9da64cc216f..04aa3c9b1e22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -158,23 +158,6 @@ public:
} // end anonymous namespace
-/// stripRegisterPrefix - This method strips the character prefix from a
-/// register name so that only the number is left. Used by for linux asm.
-static const char *stripRegisterPrefix(const char *RegName) {
- switch (RegName[0]) {
- case 'r':
- case 'f':
- case 'q': // for QPX
- case 'v':
- if (RegName[1] == 's')
- return RegName + 2;
- return RegName + 1;
- case 'c': if (RegName[1] == 'r') return RegName + 2;
- }
-
- return RegName;
-}
-
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
const DataLayout &DL = getDataLayout();
@@ -182,27 +165,15 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
switch (MO.getType()) {
case MachineOperand::MO_Register: {
- unsigned Reg = MO.getReg();
-
- // There are VSX instructions that use VSX register numbering (vs0 - vs63)
- // as well as those that use VMX register numbering (v0 - v31 which
- // correspond to vs32 - vs63). If we have an instruction that uses VSX
- // numbering, we need to convert the VMX registers to VSX registers.
- // Namely, we print 32-63 when the instruction operates on one of the
- // VMX registers.
- // (Please synchronize with PPCInstPrinter::printOperand)
- if (MI->getDesc().TSFlags & PPCII::UseVSXReg) {
- if (PPCInstrInfo::isVRRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (PPCInstrInfo::isVFRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::VF0);
- }
+ unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(),
+ MO.getReg(), OpNo);
+
const char *RegName = PPCInstPrinter::getRegisterName(Reg);
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
if (!Subtarget->isDarwin())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName;
return;
}
@@ -279,6 +250,21 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (MI->getOperand(OpNo).isImm())
O << "i";
return false;
+ case 'x':
+ if(!MI->getOperand(OpNo).isReg())
+ return true;
+ // This operand uses VSX numbering.
+ // If the operand is a VMX register, convert it to a VSX register.
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ if (PPCInstrInfo::isVRRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::V0);
+ else if (PPCInstrInfo::isVFRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::VF0);
+ const char *RegName;
+ RegName = PPCInstPrinter::getRegisterName(Reg);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
+ O << RegName;
+ return false;
}
}
@@ -303,7 +289,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
{
const char *RegName = "r0";
if (!Subtarget->isDarwin())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName << ", ";
printOperand(MI, OpNo, O);
return false;
@@ -341,7 +327,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
}
void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
}
void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 12c581023234..22842d516e7d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -338,7 +338,7 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
// coldcc calling convection marks most registers as non-volatile.
// Do not include r1 since the stack pointer is never considered a CSR.
// Do not include r2, since it is the TOC register and is added depending
-// on wether or not the function uses the TOC and is a non-leaf.
+// on whether or not the function uses the TOC and is a non-leaf.
// Do not include r0,r11,r13 as they are optional in functional linkage
// and value may be altered by inter-library calls.
// Do not include r12 as it is used as a scratch register.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
index fe41e1b36a5d..a03e691ef5bb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -392,7 +392,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
// liveness state at the end of MBB (liveOut of MBB) as the liveIn for
// NewSuccessor. Otherwise, will cause cyclic dependence.
LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
- SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 2> Clobbers;
for (MachineInstr &MI : *MBB)
LPR.stepForward(MI, Clobbers);
for (auto &LI : LPR)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index f212894035db..3b2d92db78b9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -861,8 +861,20 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
}
+ unsigned SrcReg1 = getRegForValue(SrcValue1);
+ if (SrcReg1 == 0)
+ return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(SrcValue2);
+ if (SrcReg2 == 0)
+ return false;
+ }
+
unsigned CmpOpc;
bool NeedsExt = false;
+ auto RC = MRI.getRegClass(SrcReg1);
switch (SrcVT.SimpleTy) {
default: return false;
case MVT::f32:
@@ -879,8 +891,15 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
CmpOpc = PPC::EFSCMPGT;
break;
}
- } else
+ } else {
CmpOpc = PPC::FCMPUS;
+ if (isVSSRCRegClass(RC)) {
+ unsigned TmpReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1);
+ SrcReg1 = TmpReg;
+ }
+ }
break;
case MVT::f64:
if (HasSPE) {
@@ -896,14 +915,17 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
CmpOpc = PPC::EFDCMPGT;
break;
}
- } else
+ } else if (isVSFRCRegClass(RC)) {
+ CmpOpc = PPC::XSCMPUDP;
+ } else {
CmpOpc = PPC::FCMPUD;
+ }
break;
case MVT::i1:
case MVT::i8:
case MVT::i16:
NeedsExt = true;
- // Intentional fall-through.
+ LLVM_FALLTHROUGH;
case MVT::i32:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
@@ -918,17 +940,6 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
break;
}
- unsigned SrcReg1 = getRegForValue(SrcValue1);
- if (SrcReg1 == 0)
- return false;
-
- unsigned SrcReg2 = 0;
- if (!UseImm) {
- SrcReg2 = getRegForValue(SrcValue2);
- if (SrcReg2 == 0)
- return false;
- }
-
if (NeedsExt) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
@@ -2354,7 +2365,8 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
return true;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 84dacf396462..8263954994d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -17,6 +17,7 @@
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,6 +29,16 @@
using namespace llvm;
+#define DEBUG_TYPE "framelowering"
+STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
+STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
+STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+
+static cl::opt<bool>
+EnablePEVectorSpills("ppc-enable-pe-vector-spills",
+ cl::desc("Enable spills in prologue to vector registers."),
+ cl::init(false), cl::Hidden);
+
/// VRRegNo - Map from a numbered VR register to its enum value.
///
static const MCPhysReg VRRegNo[] = {
@@ -466,6 +477,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Check whether we can skip adjusting the stack pointer (by using red zone)
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
+ NumNoNeedForFrame++;
// No need for frame
if (UpdateMF)
MFI.setStackSize(0);
@@ -1213,11 +1225,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
continue;
}
- int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (CSI[I].isSpilledToReg()) {
+ unsigned SpilledReg = CSI[I].getDstReg();
+ unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
+ nullptr, MRI->getDwarfRegNum(Reg, true),
+ MRI->getDwarfRegNum(SpilledReg, true)));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIRegister);
+ } else {
+ int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
}
}
}
@@ -1822,17 +1843,19 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// Move general register save area spill slots down, taking into account
// the size of the Floating-point register save area.
for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
- int FI = GPRegs[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ if (!GPRegs[i].isSpilledToReg()) {
+ int FI = GPRegs[i].getFrameIdx();
+ MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ }
}
// Move general register save area spill slots down, taking into account
// the size of the Floating-point register save area.
for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
- int FI = G8Regs[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ if (!G8Regs[i].isSpilledToReg()) {
+ int FI = G8Regs[i].getFrameIdx();
+ MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ }
}
unsigned MinReg =
@@ -1947,6 +1970,64 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
}
}
+// This function checks if a callee saved gpr can be spilled to a volatile
+// vector register. This occurs for leaf functions when the option
+// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
+// which were not spilled to vectors, return false so the target independent
+// code can handle them by assigning a FrameIdx to a stack slot.
+bool PPCFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+
+ if (CSI.empty())
+ return true; // Early exit if no callee saved registers are modified!
+
+ // Early exit if cannot spill gprs to volatile vector registers.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
+ return false;
+
+ // Build a BitVector of VSRs that can be used for spilling GPRs.
+ BitVector BVAllocatable = TRI->getAllocatableSet(MF);
+ BitVector BVCalleeSaved(TRI->getNumRegs());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ BVCalleeSaved.set(CSRegs[i]);
+
+ for (unsigned Reg : BVAllocatable.set_bits()) {
+ // Set to 0 if the register is not a volatile VF/F8 register, or if it is
+ // used in the function.
+ if (BVCalleeSaved[Reg] ||
+ (!PPC::F8RCRegClass.contains(Reg) &&
+ !PPC::VFRCRegClass.contains(Reg)) ||
+ (MF.getRegInfo().isPhysRegUsed(Reg)))
+ BVAllocatable.reset(Reg);
+ }
+
+ bool AllSpilledToReg = true;
+ for (auto &CS : CSI) {
+ if (BVAllocatable.none())
+ return false;
+
+ unsigned Reg = CS.getReg();
+ if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
+ AllSpilledToReg = false;
+ continue;
+ }
+
+ unsigned VolatileVFReg = BVAllocatable.find_first();
+ if (VolatileVFReg < BVAllocatable.size()) {
+ CS.setDstReg(VolatileVFReg);
+ BVAllocatable.reset(VolatileVFReg);
+ } else {
+ AllSpilledToReg = false;
+ }
+ }
+ return AllSpilledToReg;
+}
+
+
bool
PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -2012,12 +2093,18 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CSI[i].getFrameIdx()));
}
} else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- // Use !IsLiveIn for the kill flag.
- // We do not want to kill registers that are live in this function
- // before their use because they will become undefined registers.
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ if (CSI[i].isSpilledToReg()) {
+ NumPESpillVSR++;
+ BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
+ .addReg(Reg, getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ // Use !IsLiveIn for the kill flag.
+ // We do not want to kill registers that are live in this function
+ // before their use because they will become undefined registers.
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
}
}
return true;
@@ -2157,13 +2244,19 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
CR2Spilled = CR3Spilled = CR4Spilled = false;
}
- // Default behavior for non-CR saves.
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
- assert(I != MBB.begin() &&
- "loadRegFromStackSlot didn't insert any code!");
+ if (CSI[i].isSpilledToReg()) {
+ DebugLoc DL;
+ NumPEReloadVSR++;
+ BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
+ .addReg(CSI[i].getDstReg(), getKillRegState(true));
+ } else {
+ // Default behavior for non-CR saves.
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ assert(I != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
}
+ }
// Insert in reverse order.
if (AtStart)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 01c155594c44..69bd1484d6e5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -99,6 +99,13 @@ public:
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override;
+ /// This function will assign callee saved gprs to volatile vector registers
+ /// for prologue spills when applicable. It returns false if there are any
+ /// registers which were not spilled to volatile vector registers.
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 793a4dd7f624..5f6966cecd61 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -103,7 +103,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
case PPC::Sched::IIC_LdStLHA:
case PPC::Sched::IIC_LdStLHAU:
case PPC::Sched::IIC_LdStLWA:
- case PPC::Sched::IIC_LdStSTDU:
+ case PPC::Sched::IIC_LdStSTU:
case PPC::Sched::IIC_LdStSTFDU:
NSlots = 2;
break;
@@ -112,7 +112,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
case PPC::Sched::IIC_LdStLHAUX:
case PPC::Sched::IIC_LdStLWARX:
case PPC::Sched::IIC_LdStLDARX:
- case PPC::Sched::IIC_LdStSTDUX:
+ case PPC::Sched::IIC_LdStSTUX:
case PPC::Sched::IIC_LdStSTDCX:
case PPC::Sched::IIC_LdStSTWCX:
case PPC::Sched::IIC_BrMCRX: // mtcr
@@ -180,9 +180,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
CurGroup.clear();
CurSlots = CurBranches = 0;
} else {
- LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << SU->NodeNum
- << "): ");
- LLVM_DEBUG(DAG->dumpNode(SU));
+ LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: ");
+ LLVM_DEBUG(DAG->dumpNode(*SU));
unsigned NSlots;
bool MustBeFirst = mustComeFirst(MCID, NSlots);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6cec664d1e66..31acd0ff870f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison,
"Number of logical ops on i1 values calculated in GPR.");
STATISTIC(OmittedForNonExtendUses,
"Number of compares not eliminated as they have non-extending uses.");
+STATISTIC(NumP9Setb,
+ "Number of compares lowered to setb.");
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
@@ -327,7 +329,6 @@ private:
bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode *N, SDNode *Result);
- MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);
};
} // end anonymous namespace
@@ -490,7 +491,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
- const TerminatorInst *BBTerm = BB->getTerminator();
+ const Instruction *BBTerm = BB->getTerminator();
if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
@@ -687,9 +688,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- KnownBits LKnown, RKnown;
- CurDAG->computeKnownBits(Op0, LKnown);
- CurDAG->computeKnownBits(Op1, RKnown);
+ KnownBits LKnown = CurDAG->computeKnownBits(Op0);
+ KnownBits RKnown = CurDAG->computeKnownBits(Op1);
unsigned TargetMask = LKnown.Zero.getZExtValue();
unsigned InsertMask = RKnown.Zero.getZExtValue();
@@ -733,8 +733,7 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
// The AND mask might not be a constant, and we need to make sure that
// if we're going to fold the masking with the insert, all bits not
// know to be zero in the mask are known to be one.
- KnownBits MKnown;
- CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
+ KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
unsigned SHOpc = Op1.getOperand(0).getOpcode();
@@ -1083,9 +1082,14 @@ class BitPermutationSelector {
// lowest-order bit.
unsigned Idx;
+ // ConstZero means a bit we need to mask off.
+ // Variable is a bit comes from an input variable.
+ // VariableKnownToBeZero is also a bit comes from an input variable,
+ // but it is known to be already zero. So we do not need to mask them.
enum Kind {
ConstZero,
- Variable
+ Variable,
+ VariableKnownToBeZero
} K;
ValueBit(SDValue V, unsigned I, Kind K = Variable)
@@ -1094,11 +1098,11 @@ class BitPermutationSelector {
: V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
bool isZero() const {
- return K == ConstZero;
+ return K == ConstZero || K == VariableKnownToBeZero;
}
bool hasValue() const {
- return K == Variable;
+ return K == Variable || K == VariableKnownToBeZero;
}
SDValue getValue() const {
@@ -1248,8 +1252,14 @@ class BitPermutationSelector {
for (unsigned i = 0; i < NumBits; ++i)
if (((Mask >> i) & 1) == 1)
Bits[i] = (*LHSBits)[i];
- else
- Bits[i] = ValueBit(ValueBit::ConstZero);
+ else {
+ // AND instruction masks this bit. If the input is already zero,
+ // we have nothing to do here. Otherwise, make the bit ConstZero.
+ if ((*LHSBits)[i].isZero())
+ Bits[i] = (*LHSBits)[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+ }
return std::make_pair(Interesting, &Bits);
}
@@ -1259,8 +1269,26 @@ class BitPermutationSelector {
const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
bool AllDisjoint = true;
- for (unsigned i = 0; i < NumBits; ++i)
- if (LHSBits[i].isZero())
+ SDValue LastVal = SDValue();
+ unsigned LastIdx = 0;
+ for (unsigned i = 0; i < NumBits; ++i) {
+ if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
+ // If both inputs are known to be zero and one is ConstZero and
+ // another is VariableKnownToBeZero, we can select whichever
+ // we like. To minimize the number of bit groups, we select
+ // VariableKnownToBeZero if this bit is the next bit of the same
+ // input variable from the previous bit. Otherwise, we select
+ // ConstZero.
+ if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
+ LHSBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = LHSBits[i];
+ else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
+ RHSBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = RHSBits[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+ }
+ else if (LHSBits[i].isZero())
Bits[i] = RHSBits[i];
else if (RHSBits[i].isZero())
Bits[i] = LHSBits[i];
@@ -1268,6 +1296,16 @@ class BitPermutationSelector {
AllDisjoint = false;
break;
}
+ // We remember the value and bit index of this bit.
+ if (Bits[i].hasValue()) {
+ LastVal = Bits[i].getValue();
+ LastIdx = Bits[i].getValueBitIndex();
+ }
+ else {
+ if (LastVal) LastVal = SDValue();
+ LastIdx = 0;
+ }
+ }
if (!AllDisjoint)
break;
@@ -1293,6 +1331,72 @@ class BitPermutationSelector {
return std::make_pair(Interesting, &Bits);
}
+ case ISD::TRUNCATE: {
+ EVT FromType = V.getOperand(0).getValueType();
+ EVT ToType = V.getValueType();
+ // We support only the case with truncate from i64 to i32.
+ if (FromType != MVT::i64 || ToType != MVT::i32)
+ break;
+ const unsigned NumAllBits = FromType.getSizeInBits();
+ SmallVector<ValueBit, 64> *InBits;
+ std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
+ NumAllBits);
+ const unsigned NumValidBits = ToType.getSizeInBits();
+
+ // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
+ // So, we cannot include this truncate.
+ bool UseUpper32bit = false;
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
+ UseUpper32bit = true;
+ break;
+ }
+ if (UseUpper32bit)
+ break;
+
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*InBits)[i];
+
+ return std::make_pair(Interesting, &Bits);
+ }
+ case ISD::AssertZext: {
+ // For AssertZext, we look through the operand and
+ // mark the bits known to be zero.
+ const SmallVector<ValueBit, 64> *LHSBits;
+ std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
+ NumBits);
+
+ EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
+ const unsigned NumValidBits = FromType.getSizeInBits();
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*LHSBits)[i];
+
+ // These bits are known to be zero.
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit((*LHSBits)[i].getValue(),
+ (*LHSBits)[i].getValueBitIndex(),
+ ValueBit::VariableKnownToBeZero);
+
+ return std::make_pair(Interesting, &Bits);
+ }
+ case ISD::LOAD:
+ LoadSDNode *LD = cast<LoadSDNode>(V);
+ if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
+ EVT VT = LD->getMemoryVT();
+ const unsigned NumValidBits = VT.getSizeInBits();
+
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = ValueBit(V, i);
+
+ // These bits are known to be zero.
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
+
+ // Zero-extending load itself cannot be optimized. So, it is not
+ // interesting by itself though it gives useful information.
+ return std::make_pair(Interesting = false, &Bits);
+ }
+ break;
}
for (unsigned i = 0; i < NumBits; ++i)
@@ -1304,7 +1408,7 @@ class BitPermutationSelector {
// For each value (except the constant ones), compute the left-rotate amount
// to get it from its original to final position.
void computeRotationAmounts() {
- HasZeros = false;
+ NeedMask = false;
RLAmt.resize(Bits.size());
for (unsigned i = 0; i < Bits.size(); ++i)
if (Bits[i].hasValue()) {
@@ -1314,7 +1418,7 @@ class BitPermutationSelector {
else
RLAmt[i] = Bits.size() - (VBI - i);
} else if (Bits[i].isZero()) {
- HasZeros = true;
+ NeedMask = true;
RLAmt[i] = UINT32_MAX;
} else {
llvm_unreachable("Unknown value bit type");
@@ -1330,6 +1434,7 @@ class BitPermutationSelector {
unsigned LastRLAmt = RLAmt[0];
SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
unsigned LastGroupStartIdx = 0;
+ bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
for (unsigned i = 1; i < Bits.size(); ++i) {
unsigned ThisRLAmt = RLAmt[i];
SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
@@ -1342,10 +1447,20 @@ class BitPermutationSelector {
LastGroupStartIdx = 0;
}
+ // If this bit is known to be zero and the current group is a bit group
+ // of zeros, we do not need to terminate the current bit group even the
+ // Value or RLAmt does not match here. Instead, we terminate this group
+ // when the first non-zero bit appears later.
+ if (IsGroupOfZeros && Bits[i].isZero())
+ continue;
+
// If this bit has the same underlying value and the same rotate factor as
// the last one, then they're part of the same group.
if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
- continue;
+ // We cannot continue the current group if this bits is not known to
+ // be zero in a bit group of zeros.
+ if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
+ continue;
if (LastValue.getNode())
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1353,6 +1468,7 @@ class BitPermutationSelector {
LastRLAmt = ThisRLAmt;
LastValue = ThisValue;
LastGroupStartIdx = i;
+ IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
}
if (LastValue.getNode())
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1401,7 +1517,7 @@ class BitPermutationSelector {
for (auto &I : ValueRots) {
ValueRotsVec.push_back(I.second);
}
- llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end());
+ llvm::sort(ValueRotsVec);
}
// In 64-bit mode, rlwinm and friends have a rotation operator that
@@ -1588,6 +1704,17 @@ class BitPermutationSelector {
return ExtVal;
}
+ SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
+ if (V.getValueSizeInBits() == 32)
+ return V;
+
+ assert(V.getValueSizeInBits() == 64);
+ SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+ SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
+ MVT::i32, V, SubRegIdx), 0);
+ return SubVal;
+ }
+
// Depending on the number of groups for a particular value, it might be
// better to rotate, mask explicitly (using andi/andis), and then or the
// result. Select this part of the result first.
@@ -1646,12 +1773,12 @@ class BitPermutationSelector {
SDValue VRot;
if (VRI.RLAmt) {
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
- getI32Imm(31, dl) };
+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+ getI32Imm(0, dl), getI32Imm(31, dl) };
VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
Ops), 0);
} else {
- VRot = VRI.V;
+ VRot = TruncateToInt32(VRI.V, dl);
}
SDValue ANDIVal, ANDISVal;
@@ -1698,17 +1825,17 @@ class BitPermutationSelector {
// If we've not yet selected a 'starting' instruction, and we have no zeros
// to fill in, select the (Value, RLAmt) with the highest priority (largest
// number of groups), and start with this rotated value.
- if ((!HasZeros || LateMask) && !Res) {
+ if ((!NeedMask || LateMask) && !Res) {
ValueRotInfo &VRI = ValueRotsVec[0];
if (VRI.RLAmt) {
if (InstCnt) *InstCnt += 1;
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
- getI32Imm(31, dl) };
+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+ getI32Imm(0, dl), getI32Imm(31, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
0);
} else {
- Res = VRI.V;
+ Res = TruncateToInt32(VRI.V, dl);
}
// Now, remove all groups with this underlying value and rotation factor.
@@ -1723,13 +1850,13 @@ class BitPermutationSelector {
for (auto &BG : BitGroups) {
if (!Res) {
SDValue Ops[] =
- { BG.V, getI32Imm(BG.RLAmt, dl),
+ { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
} else {
SDValue Ops[] =
- { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+ { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
@@ -2077,7 +2204,7 @@ class BitPermutationSelector {
// If we've not yet selected a 'starting' instruction, and we have no zeros
// to fill in, select the (Value, RLAmt) with the highest priority (largest
// number of groups), and start with this rotated value.
- if ((!HasZeros || LateMask) && !Res) {
+ if ((!NeedMask || LateMask) && !Res) {
// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
// groups will come first, and so the VRI representing the largest number
// of groups might not be first (it might be the first Repl32 groups).
@@ -2230,7 +2357,7 @@ class BitPermutationSelector {
SmallVector<ValueBit, 64> Bits;
- bool HasZeros;
+ bool NeedMask;
SmallVector<unsigned, 64> RLAmt;
SmallVector<BitGroup, 16> BitGroups;
@@ -2259,10 +2386,10 @@ public:
" selection for: ");
LLVM_DEBUG(N->dump(CurDAG));
- // Fill it RLAmt and set HasZeros.
+ // Fill it RLAmt and set NeedMask.
computeRotationAmounts();
- if (!HasZeros)
+ if (!NeedMask)
return Select(N, false);
// We currently have two techniques for handling results with zeros: early
@@ -4045,54 +4172,148 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
}
-/// This method returns a node after flipping the MSB of each element
-/// of vector integer type. Additionally, if SignBitVec is non-null,
-/// this method sets a node with one at MSB of all elements
-/// and zero at other bits in SignBitVec.
-MachineSDNode *
-PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) {
- SDLoc dl(N);
- EVT VecVT = N.getValueType();
- if (VecVT == MVT::v4i32) {
- if (SignBitVec) {
- SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32);
- *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,
- SDValue(ZV, 0));
- }
- return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);
- }
- else if (VecVT == MVT::v8i16) {
- SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32,
- getI32Imm(0x8000, dl));
- SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32,
- SDValue(Hi, 0),
- getI32Imm(0x8000, dl));
- SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,
- SDValue(ScaImm, 0));
- /*
- Alternatively, we can do this as follow to use VRF instead of GPR.
- vspltish 5, 1
- vspltish 6, 15
- vslh 5, 6, 5
- */
- if (SignBitVec) *SignBitVec = VecImm;
- return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N,
- SDValue(VecImm, 0));
- }
- else if (VecVT == MVT::v16i8) {
- SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,
- getI32Imm(0x80, dl));
- if (SignBitVec) *SignBitVec = VecImm;
- return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N,
- SDValue(VecImm, 0));
+static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
+ bool &NeedSwapOps, bool &IsUnCmp) {
+
+ assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue TrueRes = N->getOperand(2);
+ SDValue FalseRes = N->getOperand(3);
+ ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
+ if (!TrueConst)
+ return false;
+
+ assert((N->getSimpleValueType(0) == MVT::i64 ||
+ N->getSimpleValueType(0) == MVT::i32) &&
+ "Expecting either i64 or i32 here.");
+
+ // We are looking for any of:
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
+ int64_t TrueResVal = TrueConst->getSExtValue();
+ if ((TrueResVal < -1 || TrueResVal > 1) ||
+ (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
+ (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
+ (TrueResVal == 0 &&
+ (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
+ return false;
+
+ bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
+ SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
+ if (SetOrSelCC.getOpcode() != ISD::SETCC &&
+ SetOrSelCC.getOpcode() != ISD::SELECT_CC)
+ return false;
+
+ // Without this setb optimization, the outer SELECT_CC will be manually
+ // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
+ // transforms pseduo instruction to isel instruction. When there are more than
+ // one use for result like zext/sext, with current optimization we only see
+ // isel is replaced by setb but can't see any significant gain. Since
+ // setb has longer latency than original isel, we should avoid this. Another
+ // point is that setb requires comparison always kept, it can break the
+ // oppotunity to get the comparison away if we have in future.
+ if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
+ return false;
+
+ SDValue InnerLHS = SetOrSelCC.getOperand(0);
+ SDValue InnerRHS = SetOrSelCC.getOperand(1);
+ ISD::CondCode InnerCC =
+ cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
+ // If the inner comparison is a select_cc, make sure the true/false values are
+ // 1/-1 and canonicalize it if needed.
+ if (InnerIsSel) {
+ ConstantSDNode *SelCCTrueConst =
+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
+ ConstantSDNode *SelCCFalseConst =
+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
+ if (!SelCCTrueConst || !SelCCFalseConst)
+ return false;
+ int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
+ int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
+ // The values must be -1/1 (requiring a swap) or 1/-1.
+ if (SelCCTVal == -1 && SelCCFVal == 1) {
+ std::swap(InnerLHS, InnerRHS);
+ } else if (SelCCTVal != 1 || SelCCFVal != -1)
+ return false;
}
- else
- llvm_unreachable("Unsupported vector data type for flipSignBit");
+
+ // Canonicalize unsigned case
+ if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
+ IsUnCmp = true;
+ InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
+ }
+
+ bool InnerSwapped = false;
+ if (LHS == InnerRHS && RHS == InnerLHS)
+ InnerSwapped = true;
+ else if (LHS != InnerLHS || RHS != InnerRHS)
+ return false;
+
+ switch (CC) {
+ // (select_cc lhs, rhs, 0, \
+ // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
+ case ISD::SETEQ:
+ if (!InnerIsSel)
+ return false;
+ if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
+ return false;
+ NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
+ break;
+
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
+ case ISD::SETULT:
+ if (!IsUnCmp && InnerCC != ISD::SETNE)
+ return false;
+ IsUnCmp = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETLT:
+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
+ (InnerCC == ISD::SETLT && InnerSwapped))
+ NeedSwapOps = (TrueResVal == 1);
+ else
+ return false;
+ break;
+
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
+ case ISD::SETUGT:
+ if (!IsUnCmp && InnerCC != ISD::SETNE)
+ return false;
+ IsUnCmp = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETGT:
+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
+ (InnerCC == ISD::SETGT && InnerSwapped))
+ NeedSwapOps = (TrueResVal == -1);
+ else
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
+ LLVM_DEBUG(N->dump());
+
+ return true;
}
// Select - Convert the specified operand from a target-independent to a
@@ -4429,8 +4650,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
- KnownBits LHSKnown;
- CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
+ KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
// If this is equivalent to an add, then we can fold it with the
// FrameIndex calculation.
@@ -4557,6 +4777,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
N->getOperand(0).getValueType() == MVT::i1)
break;
+ if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
+ bool NeedSwapOps = false;
+ bool IsUnCmp = false;
+ if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (NeedSwapOps)
+ std::swap(LHS, RHS);
+
+ // Make use of SelectCC to generate the comparison to set CR bits, for
+ // equality comparisons having one literal operand, SelectCC probably
+ // doesn't need to materialize the whole literal and just use xoris to
+ // check it first, it leads the following comparison result can't
+ // exactly represent GT/LT relationship. So to avoid this we specify
+ // SETGT/SETUGT here instead of SETEQ.
+ SDValue GenCC =
+ SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
+ CurDAG->SelectNodeTo(
+ N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
+ N->getValueType(0), GenCC);
+ NumP9Setb++;
+ return;
+ }
+ }
+
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
@@ -4648,14 +4893,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
return;
}
- case ISD::VSELECT:
- if (PPCSubTarget->hasVSX()) {
- SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
- CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
- return;
- }
- break;
-
case ISD::VECTOR_SHUFFLE:
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
@@ -4683,11 +4920,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
+ MachineMemOperand *MemOp = LD->getMemOperand();
SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
N->getValueType(0), Ops);
- cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
return;
}
}
@@ -4753,6 +4989,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
}
+ // A signed comparison of i1 values produces the opposite result to an
+ // unsigned one if the condition code includes less-than or greater-than.
+ // This is because 1 is the most negative signed i1 number and the most
+ // positive unsigned i1 number. The CR-logical operations used for such
+ // comparisons are non-commutative so for signed comparisons vs. unsigned
+ // ones, the input operands just need to be swapped.
+ if (ISD::isSignedIntSetCC(CC))
+ Swap = !Swap;
+
SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
N->getOperand(Swap ? 3 : 2),
N->getOperand(Swap ? 2 : 3)), 0);
@@ -4809,9 +5054,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
-
- if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
- CModel == CodeModel::Large) {
+ if (PPCLowering->isAccessedAsGotIndirect(GA)) {
+ // If it is access as got-indirect, we need an extra LD to load
+ // the address.
SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
transferMemOperands(N, MN);
@@ -4819,18 +5064,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GV = G->getGlobal();
- unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
- if (GVFlags & PPCII::MO_NLP_FLAG) {
- SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
- transferMemOperands(N, MN);
- ReplaceNode(N, MN);
- return;
- }
- }
-
+ // Build the address relative to the TOC-pointer..
ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA));
return;
@@ -4916,55 +5150,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
}
- case ISD::ABS: {
- assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");
-
- // For vector absolute difference, we use VABSDUW instruction of POWER9.
- // Since VABSDU instructions are for unsigned integers, we need adjustment
- // for signed integers.
- // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
- // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
- // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
- EVT VecVT = N->getOperand(0).getValueType();
- SDNode *AbsOp = nullptr;
- unsigned AbsOpcode;
-
- if (VecVT == MVT::v4i32)
- AbsOpcode = PPC::VABSDUW;
- else if (VecVT == MVT::v8i16)
- AbsOpcode = PPC::VABSDUH;
- else if (VecVT == MVT::v16i8)
- AbsOpcode = PPC::VABSDUB;
- else
- llvm_unreachable("Unsupported vector data type for ISD::ABS");
-
- // Even for signed integers, we can skip adjustment if all values are
- // known to be positive (as signed integer) due to zero-extended inputs.
- if (N->getOperand(0).getOpcode() == ISD::SUB &&
- N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) {
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
- SDValue(N->getOperand(0)->getOperand(0)),
- SDValue(N->getOperand(0)->getOperand(1)));
- ReplaceNode(N, AbsOp);
- return;
- }
- if (N->getOperand(0).getOpcode() == ISD::SUB) {
- SDValue SubVal = N->getOperand(0);
- SDNode *Op0 = flipSignBit(SubVal->getOperand(0));
- SDNode *Op1 = flipSignBit(SubVal->getOperand(1));
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
- SDValue(Op0, 0), SDValue(Op1, 0));
- }
- else {
- SDNode *Op1 = nullptr;
- SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1);
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),
- SDValue(Op1, 0));
- }
- ReplaceNode(N, AbsOp);
- return;
- }
}
SelectCode(N);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b5bdf47ce37a..39608cb74bee 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -251,12 +251,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i64, Expand);
}
- if (Subtarget.hasP9Vector()) {
- setOperationAction(ISD::ABS, MVT::v4i32, Legal);
- setOperationAction(ISD::ABS, MVT::v8i16, Legal);
- setOperationAction(ISD::ABS, MVT::v16i8, Legal);
- }
-
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
@@ -323,12 +317,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// to speed up scalar BSWAP64.
// CTPOP or CTTZ were introduced in P8/P9 respectively
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- if (Subtarget.isISA3_0()) {
+ if (Subtarget.hasP9Vector())
setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
+ else
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ if (Subtarget.isISA3_0()) {
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
} else {
- setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
}
@@ -554,6 +550,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Custom);
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -586,6 +583,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::SELECT_CC, VT, Promote);
AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
setOperationAction(ISD::STORE, VT, Promote);
@@ -626,7 +624,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
@@ -659,6 +656,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ // Without hasP8Altivec set, v2i64 SMAX isn't available.
+ // But ABS custom lowering requires SMAX support.
+ if (!Subtarget.hasP8Altivec())
+ setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -727,12 +729,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
- setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
- setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
-
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
@@ -792,12 +788,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
- // Vector operation legalization checks the result type of
- // SIGN_EXTEND_INREG, overall legalization checks the inner type.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+ // Custom handling for partial vectors of integers converted to
+ // floating point. We already have optimal handling for v2i32 through
+ // the DAG combine, so those aren't necessary.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
@@ -1055,6 +1056,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
@@ -1076,6 +1078,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::TRUNCATE);
+
if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SETCC);
@@ -1088,6 +1092,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::FSQRT);
}
+ if (Subtarget.hasP9Altivec()) {
+ setTargetDAGCombine(ISD::ABS);
+ setTargetDAGCombine(ISD::VSELECT);
+ }
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -1348,6 +1357,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RFEBB: return "PPCISD::RFEBB";
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
+ case PPCISD::VABSD: return "PPCISD::VABSD";
case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
@@ -1355,6 +1365,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
+ case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
}
return nullptr;
}
@@ -2214,11 +2225,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
// disjoint.
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+ KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
if (LHSKnown.Zero.getBoolValue()) {
- DAG.computeKnownBits(N.getOperand(1), RHSKnown);
+ KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
@@ -2317,8 +2327,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
- KnownBits LHSKnown;
- DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+ KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
@@ -2405,6 +2414,28 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
return true;
}
+/// Returns true if we should use a direct load into vector instruction
+/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
+static bool usePartialVectorLoads(SDNode *N) {
+ if (!N->hasOneUse())
+ return false;
+
+ // If there are any other uses other than scalar to vector, then we should
+ // keep it as a scalar load -> direct move pattern to prevent multiple
+ // loads. Currently, only check for i64 since we have lxsd/lfd to do this
+ // efficiently, but no update equivalent.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ EVT MemVT = LD->getMemoryVT();
+ if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
+ SDNode *User = *(LD->use_begin());
+ if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -2430,6 +2461,13 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
} else
return false;
+ // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
+ // instructions because we can fold these into a more efficient instruction
+ // instead, (such as LXSD).
+ if (isLoad && usePartialVectorLoads(N)) {
+ return false;
+ }
+
// PowerPC doesn't have preinc load/store instructions for vectors (except
// for QPX, which does have preinc r+r forms).
if (VT.isVector()) {
@@ -2674,7 +2712,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.isSVR4ABI() && isPositionIndependent()) {
+ if (Subtarget.isSVR4ABI() &&
+ (Subtarget.isPPC64() || isPositionIndependent())) {
if (Subtarget.isPPC64())
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
@@ -3480,9 +3519,14 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Argument stored in memory.
assert(VA.isMemLoc());
+ // Get the extended size of the argument type in stack
unsigned ArgSize = VA.getLocVT().getStoreSize();
- int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- isImmutable);
+ // Get the actual size of the argument type
+ unsigned ObjSize = VA.getValVT().getStoreSize();
+ unsigned ArgOffset = VA.getLocMemOffset();
+ // Stack objects in PPC32 are right justified.
+ ArgOffset += ArgSize - ObjSize;
+ int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3935,7 +3979,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
"Invalid QPX parameter type");
- /* fall through */
+ LLVM_FALLTHROUGH;
case MVT::v4f64:
case MVT::v4i1:
@@ -5053,9 +5097,15 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
// All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
// into the call.
- if (isSVR4ABI && isPPC64 && !isPatchPoint) {
+ // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
+ if (isSVR4ABI && isPPC64) {
setUsesTOCBasePtr(DAG);
- Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
+ // We cannot add X2 as an operand here for PATCHPOINT, because there is no
+ // way to mark dependencies as implicit here. We will add the X2 dependency
+ // in EmitInstrWithCustomInserter.
+ if (!isPatchPoint)
+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
}
return CallOpc;
@@ -5437,10 +5487,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
Arg = PtrOff;
}
- if (VA.isRegLoc()) {
- if (Arg.getValueType() == MVT::i1)
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+ // When useCRBits() is true, there can be i1 arguments.
+ // It is because getRegisterType(MVT::i1) => MVT::i1,
+ // and for other integer types getRegisterType() => MVT::i32.
+ // Extend i1 and ensure callee will get i32.
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, MVT::i32, Arg);
+ if (VA.isRegLoc()) {
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -6073,7 +6128,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
"Invalid QPX parameter type");
- /* fall through */
+ LLVM_FALLTHROUGH;
case MVT::v4f64:
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
@@ -7228,10 +7283,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
return FP;
}
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
+
+ EVT VecVT = Vec.getValueType();
+ assert(VecVT.isVector() && "Expected a vector type.");
+ assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
+
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+ unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = Vec;
+ SDValue UndefVec = DAG.getUNDEF(VecVT);
+ for (unsigned i = 1; i < NumConcat; ++i)
+ Ops[i] = UndefVec;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
+}
+
+SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+ const SDLoc &dl) const {
+
+ unsigned Opc = Op.getOpcode();
+ assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
+ "Unexpected conversion type");
+ assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
+ "Supports conversions to v2f64/v4f32 only.");
+
+ bool SignedConv = Opc == ISD::SINT_TO_FP;
+ bool FourEltRes = Op.getValueType() == MVT::v4f32;
+
+ SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
+ EVT WideVT = Wide.getValueType();
+ unsigned WideNumElts = WideVT.getVectorNumElements();
+ MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
+
+ SmallVector<int, 16> ShuffV;
+ for (unsigned i = 0; i < WideNumElts; ++i)
+ ShuffV.push_back(i + WideNumElts);
+
+ int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
+ int SaveElts = FourEltRes ? 4 : 2;
+ if (Subtarget.isLittleEndian())
+ for (int i = 0; i < SaveElts; i++)
+ ShuffV[i * Stride] = i;
+ else
+ for (int i = 1; i <= SaveElts; i++)
+ ShuffV[i * Stride - 1] = i - 1;
+
+ SDValue ShuffleSrc2 =
+ SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
+ SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
+ unsigned ExtendOp =
+ SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
+
+ SDValue Extend;
+ if (!Subtarget.hasP9Altivec() && SignedConv) {
+ Arrange = DAG.getBitcast(IntermediateVT, Arrange);
+ Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
+ DAG.getValueType(Op.getOperand(0).getValueType()));
+ } else
+ Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
+}
+
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ EVT InVT = Op.getOperand(0).getValueType();
+ EVT OutVT = Op.getValueType();
+ if (OutVT.isVector() && OutVT.isFloatingPoint() &&
+ isOperationCustom(Op.getOpcode(), InVT))
+ return LowerINT_TO_FPVector(Op, DAG, dl);
+
// Conversions to f128 are legal.
if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
return Op;
@@ -8902,35 +9030,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(PPC::R2, MVT::i32);
}
- // We are looking for absolute values here.
- // The idea is to try to fit one of two patterns:
- // max (a, (0-a)) OR max ((0-a), a)
- if (Subtarget.hasP9Vector() &&
- (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw ||
- IntrinsicID == Intrinsic::ppc_altivec_vmaxsh ||
- IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) {
- SDValue V1 = Op.getOperand(1);
- SDValue V2 = Op.getOperand(2);
- if (V1.getSimpleValueType() == V2.getSimpleValueType() &&
- (V1.getSimpleValueType() == MVT::v4i32 ||
- V1.getSimpleValueType() == MVT::v8i16 ||
- V1.getSimpleValueType() == MVT::v16i8)) {
- if ( V1.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
- V1.getOperand(1) == V2 ) {
- // Generate the abs instruction with the operands
- return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2);
- }
-
- if ( V2.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
- V2.getOperand(1) == V1 ) {
- // Generate the abs instruction with the operands
- return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1);
- }
- }
- }
-
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
int CompareOpc;
@@ -9081,30 +9180,6 @@ SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
}
-SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc dl(Op);
- // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
- // instructions), but for smaller types, we need to first extend up to v2i32
- // before doing going farther.
- if (Op.getValueType() == MVT::v2i64) {
- EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- if (ExtVT != MVT::v2i32) {
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
- Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
- DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
- ExtVT.getVectorElementType(), 4)));
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
- Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
- DAG.getValueType(MVT::v2i32));
- }
-
- return Op;
- }
-
- return SDValue();
-}
-
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9495,6 +9570,44 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
}
}
+SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+
+ assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
+
+ EVT VT = Op.getValueType();
+ assert(VT.isVector() &&
+ "Only set vector abs as custom, scalar abs shouldn't reach here!");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ VT == MVT::v16i8) &&
+ "Unexpected vector element type!");
+ assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
+ "Current subtarget doesn't support smax v2i64!");
+
+ // For vector abs, it can be lowered to:
+ // abs x
+ // ==>
+ // y = -x
+ // smax(x, y)
+
+ SDLoc dl(Op);
+ SDValue X = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
+
+ // SMAX patch https://reviews.llvm.org/D47332
+ // hasn't landed yet, so use intrinsic first here.
+ // TODO: Should use SMAX directly once SMAX patch landed
+ Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
+ if (VT == MVT::v2i64)
+ BifID = Intrinsic::ppc_altivec_vmaxsd;
+ else if (VT == MVT::v8i16)
+ BifID = Intrinsic::ppc_altivec_vmaxsh;
+ else if (VT == MVT::v16i8)
+ BifID = Intrinsic::ppc_altivec_vmaxsb;
+
+ return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9544,10 +9657,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
- case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
@@ -9624,6 +9737,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
+ case ISD::BITCAST:
+ // Don't handle bitcast here.
+ return;
}
}
@@ -9787,17 +9903,14 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
-MachineBasicBlock *
-PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
- MachineBasicBlock *BB,
- bool is8bit, // operation
- unsigned BinOpcode,
- unsigned CmpOpcode,
- unsigned CmpPred) const {
+MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
+ MachineInstr &MI, MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
// If we support part-word atomic mnemonics, just use them
if (Subtarget.hasPartwordAtomics())
- return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
- CmpOpcode, CmpPred);
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
+ CmpPred);
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -9821,7 +9934,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB =
- CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
+ CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
if (CmpOpcode)
@@ -9832,22 +9945,25 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
- : &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC =
+ is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
unsigned ShiftReg =
- isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
- unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
- unsigned MaskReg = RegInfo.createVirtualRegister(RC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
unsigned Ptr1Reg;
- unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+ unsigned TmpReg =
+ (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
// thisMBB:
// ...
@@ -9876,82 +9992,107 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
- .addReg(ptrA).addReg(ptrB);
+ .addReg(ptrA)
+ .addReg(ptrB);
} else {
Ptr1Reg = ptrB;
}
- BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
- .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ // We need use 32-bit subregister to avoid mismatch register class in 64-bit
+ // mode.
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
+ .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
+ .addImm(3)
+ .addImm(27)
+ .addImm(is8bit ? 28 : 27);
if (!isLittleEndian)
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg)
+ .addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(61);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(61);
else
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
- BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
- .addReg(incr).addReg(ShiftReg);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
if (is8bit)
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
else {
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
- BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg)
+ .addImm(65535);
}
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
- .addReg(Mask2Reg).addReg(ShiftReg);
+ .addReg(Mask2Reg)
+ .addReg(ShiftReg);
BB = loopMBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
- .addReg(Incr2Reg).addReg(TmpDestReg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
- .addReg(TmpDestReg).addReg(MaskReg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
- .addReg(TmpReg).addReg(MaskReg);
+ .addReg(Incr2Reg)
+ .addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
if (CmpOpcode) {
// For unsigned comparisons, we can directly compare the shifted values.
// For signed comparisons we shift and sign extend.
- unsigned SReg = RegInfo.createVirtualRegister(RC);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
- .addReg(TmpDestReg).addReg(MaskReg);
+ unsigned SReg = RegInfo.createVirtualRegister(GPRC);
+ BuildMI(BB, dl, TII->get(PPC::AND), SReg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
unsigned ValueReg = SReg;
unsigned CmpReg = Incr2Reg;
if (CmpOpcode == PPC::CMPW) {
- ValueReg = RegInfo.createVirtualRegister(RC);
+ ValueReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
- .addReg(SReg).addReg(ShiftReg);
- unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
+ .addReg(SReg)
+ .addReg(ShiftReg);
+ unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
- .addReg(ValueReg);
+ .addReg(ValueReg);
ValueReg = ValueSReg;
CmpReg = incr;
}
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
- .addReg(CmpReg).addReg(ValueReg);
+ .addReg(CmpReg)
+ .addReg(ValueReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
+ .addImm(CmpPred)
+ .addReg(PPC::CR0)
+ .addMBB(exitMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(exitMBB);
BB = loop2MBB;
}
- BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
- .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
BuildMI(BB, dl, TII->get(PPC::STWCX))
- .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+ .addReg(Tmp4Reg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
- .addReg(ShiftReg);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+ .addReg(TmpDestReg)
+ .addReg(ShiftReg);
return BB;
}
@@ -9968,10 +10109,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
-
unsigned DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
@@ -10034,10 +10171,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
- .addReg(PPC::X2)
- .addImm(TOCOffset)
- .addReg(BufReg);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addReg(PPC::X2)
+ .addImm(TOCOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
}
// Naked functions never have a base pointer, and so we use r1. For all
@@ -10052,8 +10189,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
.addReg(BaseReg)
.addImm(BPOffset)
- .addReg(BufReg);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
@@ -10086,8 +10223,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
.addImm(LabelOffset)
.addReg(BufReg);
}
-
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
mainMBB->addSuccessor(sinkMBB);
@@ -10111,10 +10247,6 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
-
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -10152,7 +10284,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(0)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload IP
if (PVT == MVT::i64) {
@@ -10164,7 +10296,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(LabelOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload SP
if (PVT == MVT::i64) {
@@ -10176,7 +10308,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(SPOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload BP
if (PVT == MVT::i64) {
@@ -10188,16 +10320,15 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(BPOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload TOC
if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
- .addImm(TOCOffset)
- .addReg(BufReg);
-
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addImm(TOCOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
}
// Jump
@@ -10221,7 +10352,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// way to mark the dependence as implicit there, and so the stackmap code
// will confuse it with a regular operand. Instead, add the dependence
// here.
- setUsesTOCBasePtr(*BB->getParent());
MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
}
@@ -10246,8 +10376,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *F = BB->getParent();
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
- MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
+ MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
+ MI.getOpcode() == PPC::SELECT_I8) {
SmallVector<MachineOperand, 2> Cond;
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -10392,9 +10522,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
- .addReg(HiReg).addReg(ReadAgainReg);
+ .addReg(HiReg)
+ .addReg(ReadAgainReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(CmpReg)
+ .addMBB(readMBB);
BB->addSuccessor(readMBB);
BB->addSuccessor(sinkMBB);
@@ -10564,27 +10697,35 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
- .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
- .addReg(oldval).addReg(dest);
+ .addReg(oldval)
+ .addReg(dest);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(midMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(midMBB);
BB = loop2MBB;
BuildMI(BB, dl, TII->get(StoreMnemonic))
- .addReg(newval).addReg(ptrA).addReg(ptrB);
+ .addReg(newval)
+ .addReg(ptrA)
+ .addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
BB = midMBB;
BuildMI(BB, dl, TII->get(StoreMnemonic))
- .addReg(dest).addReg(ptrA).addReg(ptrB);
+ .addReg(dest)
+ .addReg(ptrA)
+ .addReg(ptrB);
BB->addSuccessor(exitMBB);
// exitMBB:
@@ -10619,24 +10760,26 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
- : &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC =
+ is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
unsigned ShiftReg =
- isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
- unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
- unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
- unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
- unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
- unsigned MaskReg = RegInfo.createVirtualRegister(RC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
unsigned Ptr1Reg;
- unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
@@ -10673,74 +10816,107 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
- .addReg(ptrA).addReg(ptrB);
+ .addReg(ptrA)
+ .addReg(ptrB);
} else {
Ptr1Reg = ptrB;
}
- BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
- .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+
+ // We need use 32-bit subregister to avoid mismatch register class in 64-bit
+ // mode.
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
+ .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
+ .addImm(3)
+ .addImm(27)
+ .addImm(is8bit ? 28 : 27);
if (!isLittleEndian)
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg)
+ .addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(61);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(61);
else
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(29);
BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
- .addReg(newval).addReg(ShiftReg);
+ .addReg(newval)
+ .addReg(ShiftReg);
BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
- .addReg(oldval).addReg(ShiftReg);
+ .addReg(oldval)
+ .addReg(ShiftReg);
if (is8bit)
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
else {
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
- .addReg(Mask3Reg).addImm(65535);
+ .addReg(Mask3Reg)
+ .addImm(65535);
}
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
- .addReg(Mask2Reg).addReg(ShiftReg);
+ .addReg(Mask2Reg)
+ .addReg(ShiftReg);
BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
- .addReg(NewVal2Reg).addReg(MaskReg);
+ .addReg(NewVal2Reg)
+ .addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
- .addReg(OldVal2Reg).addReg(MaskReg);
+ .addReg(OldVal2Reg)
+ .addReg(MaskReg);
BB = loop1MBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
- BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
- .addReg(TmpDestReg).addReg(MaskReg);
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
- .addReg(TmpReg).addReg(OldVal3Reg);
+ .addReg(TmpReg)
+ .addReg(OldVal3Reg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(midMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(midMBB);
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
- .addReg(TmpDestReg).addReg(MaskReg);
- BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
- .addReg(Tmp2Reg).addReg(NewVal3Reg);
- BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
- .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
+ .addReg(Tmp2Reg)
+ .addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(Tmp4Reg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
BB = midMBB;
- BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(TmpDestReg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
- .addReg(ShiftReg);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+ .addReg(TmpReg)
+ .addReg(ShiftReg);
} else if (MI.getOpcode() == PPC::FADDrtz) {
// This pseudo performs an FADD with rounding mode temporarily forced
// to round-to-zero. We emit this via custom inserter since the FPSCR
@@ -10777,9 +10953,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
- &PPC::GPRCRegClass :
- &PPC::G8RCRegClass);
+ unsigned Dest = RegInfo.createVirtualRegister(
+ Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
DebugLoc dl = MI.getDebugLoc();
BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
@@ -11231,9 +11406,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
} else {
// This is neither a signed nor an unsigned comparison, just make sure
// that the high bits are equal.
- KnownBits Op1Known, Op2Known;
- DAG.computeKnownBits(N->getOperand(0), Op1Known);
- DAG.computeKnownBits(N->getOperand(1), Op2Known);
+ KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
+ KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
// We don't really care about what is known about the first bit (if
// anything), so clear it in all masks prior to comparing them.
@@ -11750,6 +11924,37 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}
+SDValue PPCTargetLowering::combineSetCC(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::SETCC &&
+ "Should be called with a SETCC node");
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (CC == ISD::SETNE || CC == ISD::SETEQ) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
+ if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+ LHS.hasOneUse())
+ std::swap(LHS, RHS);
+
+ // x == 0-y --> x+y == 0
+ // x != 0-y --> x+y != 0
+ if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
+ RHS.hasOneUse()) {
+ SDLoc DL(N);
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ EVT OpVT = LHS.getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
+ return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
+ }
+ }
+
+ return DAGCombineTruncBoolExt(N, DCI);
+}
+
// Is this an extending load from an f32 to an f64?
static bool isFPExtLoad(SDValue Op) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
@@ -11869,7 +12074,8 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
}
// Not a build vector of (possibly fp_rounded) loads.
- if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
+ if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
+ N->getNumOperands() == 1)
return SDValue();
for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
@@ -12450,6 +12656,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
+ case ISD::ADD:
+ return combineADD(N, DCI);
case ISD::SHL:
return combineSHL(N, DCI);
case ISD::SRA:
@@ -12476,7 +12684,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND:
return DAGCombineExtBoolTrunc(N, DCI);
case ISD::TRUNCATE:
+ return combineTRUNCATE(N, DCI);
case ISD::SETCC:
+ if (SDValue CSCC = combineSetCC(N, DCI))
+ return CSCC;
+ LLVM_FALLTHROUGH;
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
case ISD::SINT_TO_FP:
@@ -12499,9 +12711,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
(Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
- // STBRX can only handle simple types.
+ // STBRX can only handle simple types and it makes no sense to store less
+ // two bytes in byte-reversed order.
EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
- if (mVT.isExtended())
+ if (mVT.isExtended() || mVT.getSizeInBits() < 16)
break;
SDValue BSwapOp = N->getOperand(1).getOperand(0);
@@ -12877,6 +13090,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+
+ // Combine vmaxsw/h/b(a, a's negation) to abs(a)
+ // Expose the vabsduw/h/b opportunity for down stream
+ if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
+ (IID == Intrinsic::ppc_altivec_vmaxsw ||
+ IID == Intrinsic::ppc_altivec_vmaxsh ||
+ IID == Intrinsic::ppc_altivec_vmaxsb)) {
+ SDValue V1 = N->getOperand(1);
+ SDValue V2 = N->getOperand(2);
+ if ((V1.getSimpleValueType() == MVT::v4i32 ||
+ V1.getSimpleValueType() == MVT::v8i16 ||
+ V1.getSimpleValueType() == MVT::v16i8) &&
+ V1.getSimpleValueType() == V2.getSimpleValueType()) {
+ // (0-a, a)
+ if (V1.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
+ V1.getOperand(1) == V2) {
+ return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
+ }
+ // (a, 0-a)
+ if (V2.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
+ V2.getOperand(1) == V1) {
+ return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+ }
+ // (x-y, y-x)
+ if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
+ V1.getOperand(0) == V2.getOperand(1) &&
+ V1.getOperand(1) == V2.getOperand(0)) {
+ return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+ }
+ }
+ }
}
break;
@@ -13109,6 +13355,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::BUILD_VECTOR:
return DAGCombineBuildVector(N, DCI);
+ case ISD::ABS:
+ return combineABS(N, DCI);
+ case ISD::VSELECT:
+ return combineVSelect(N, DCI);
}
return SDValue();
@@ -13251,7 +13501,8 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const {
} else if (Constraint == "wc") { // individual CR bits.
return C_RegisterClass;
} else if (Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf" || Constraint == "ws") {
+ Constraint == "wf" || Constraint == "ws" ||
+ Constraint == "wi") {
return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
@@ -13281,6 +13532,8 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
return CW_Register;
else if (StringRef(constraint) == "ws" && type->isDoubleTy())
return CW_Register;
+ else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
+ return CW_Register; // just hold 64-bit integers data.
switch (*constraint) {
default:
@@ -13363,7 +13616,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// An individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass);
} else if ((Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf") && Subtarget.hasVSX()) {
+ Constraint == "wf" || Constraint == "wi") &&
+ Subtarget.hasVSX()) {
return std::make_pair(0U, &PPC::VSRCRegClass);
} else if (Constraint == "ws" && Subtarget.hasVSX()) {
if (VT == MVT::f32 && Subtarget.hasP8Vector())
@@ -13598,6 +13852,35 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
report_fatal_error("Invalid register name global variable");
}
+bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
+ // 32-bit SVR4 ABI access everything as got-indirect.
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ return true;
+
+ CodeModel::Model CModel = getTargetMachine().getCodeModel();
+ // If it is small or large code model, module locals are accessed
+ // indirectly by loading their address from .toc/.got. The difference
+ // is that for large code model we have ADDISTocHa + LDtocL and for
+ // small code model we simply have LDtoc.
+ if (CModel == CodeModel::Small || CModel == CodeModel::Large)
+ return true;
+
+ // JumpTable and BlockAddress are accessed as got-indirect.
+ if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
+ return true;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+ const GlobalValue *GV = G->getGlobal();
+ unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
+ // The NLP flag indicates that a global access has to use an
+ // extra indirection.
+ if (GVFlags & PPCII::MO_NLP_FLAG)
+ return true;
+ }
+
+ return false;
+}
+
bool
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The PowerPC target isn't yet aware of offsets.
@@ -14116,7 +14399,30 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
return Value;
- return SDValue();
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Subtarget.isISA3_0() ||
+ N0.getOpcode() != ISD::SIGN_EXTEND ||
+ N0.getOperand(0).getValueType() != MVT::i32 ||
+ CN1 == nullptr || N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ // We can't save an operation here if the value is already extended, and
+ // the existing shift is easier to combine.
+ SDValue ExtsSrc = N0.getOperand(0);
+ if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
+ ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
+ return SDValue();
+
+ SDLoc DL(N0);
+ SDValue ShiftBy = SDValue(CN1, 0);
+ // We want the shift amount to be i32 on the extswli, but the shift could
+ // have an i64.
+ if (ShiftBy.getValueType() == MVT::i64)
+ ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
+
+ return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
+ ShiftBy);
}
SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
@@ -14133,6 +14439,152 @@ SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
return SDValue();
}
+// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
+// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
+// When C is zero, the equation (addi Z, -C) can be simplified to Z
+// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
+static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ if (!Subtarget.isPPC64())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ auto isZextOfCompareWithConstant = [](SDValue Op) {
+ if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
+ Op.getValueType() != MVT::i64)
+ return false;
+
+ SDValue Cmp = Op.getOperand(0);
+ if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
+ Cmp.getOperand(0).getValueType() != MVT::i64)
+ return false;
+
+ if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
+ int64_t NegConstant = 0 - Constant->getSExtValue();
+ // Due to the limitations of the addi instruction,
+ // -C is required to be [-32768, 32767].
+ return isInt<16>(NegConstant);
+ }
+
+ return false;
+ };
+
+ bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
+ bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
+
+ // If there is a pattern, canonicalize a zext operand to the RHS.
+ if (LHSHasPattern && !RHSHasPattern)
+ std::swap(LHS, RHS);
+ else if (!LHSHasPattern && !RHSHasPattern)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
+ SDValue Cmp = RHS.getOperand(0);
+ SDValue Z = Cmp.getOperand(0);
+ auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
+
+ assert(Constant && "Constant Should not be a null pointer.");
+ int64_t NegConstant = 0 - Constant->getSExtValue();
+
+ switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
+ default: break;
+ case ISD::SETNE: {
+ // when C == 0
+ // --> addze X, (addic Z, -1).carry
+ // /
+ // add X, (zext(setne Z, C))--
+ // \ when -32768 <= -C <= 32767 && C != 0
+ // --> addze X, (addic (addi Z, -C), -1).carry
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+ DAG.getConstant(NegConstant, DL, MVT::i64));
+ SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+ SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+ AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
+ return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+ SDValue(Addc.getNode(), 1));
+ }
+ case ISD::SETEQ: {
+ // when C == 0
+ // --> addze X, (subfic Z, 0).carry
+ // /
+ // add X, (zext(sete Z, C))--
+ // \ when -32768 <= -C <= 32767 && C != 0
+ // --> addze X, (subfic (addi Z, -C), 0).carry
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+ DAG.getConstant(NegConstant, DL, MVT::i64));
+ SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+ SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+ DAG.getConstant(0, DL, MVT::i64), AddOrZ);
+ return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+ SDValue(Subc.getNode(), 1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
+ return Value;
+
+ return SDValue();
+}
+
+// Detect TRUNCATE operations on bitcasts of float128 values.
+// What we are looking for here is the situtation where we extract a subset
+// of bits from a 128 bit float.
+// This can be of two forms:
+// 1) BITCAST of f128 feeding TRUNCATE
+// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
+// The reason this is required is because we do not have a legal i128 type
+// and so we want to prevent having to store the f128 and then reload part
+// of it.
+SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // If we are using CRBits then try that first.
+ if (Subtarget.useCRBits()) {
+ // Check if CRBits did anything and return that if it did.
+ if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
+ return CRTruncValue;
+ }
+
+ SDLoc dl(N);
+ SDValue Op0 = N->getOperand(0);
+
+ // Looking for a truncate of i128 to i64.
+ if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
+
+ // SRL feeding TRUNCATE.
+ if (Op0.getOpcode() == ISD::SRL) {
+ ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ // The right shift has to be by 64 bits.
+ if (!ConstNode || ConstNode->getZExtValue() != 64)
+ return SDValue();
+
+ // Switch the element number to extract.
+ EltToExtract = EltToExtract ? 0 : 1;
+ // Update Op0 past the SRL.
+ Op0 = Op0.getOperand(0);
+ }
+
+ // BITCAST feeding a TRUNCATE possibly via SRL.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op0.getValueType() == MVT::i128 &&
+ Op0.getOperand(0).getValueType() == MVT::f128) {
+ SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
+ return DCI.DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
+ DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
+ }
+ return SDValue();
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
@@ -14168,6 +14620,15 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
}
+bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
+ if (!Subtarget.hasVSX())
+ return false;
+ if (Subtarget.hasP9Vector() && VT == MVT::f128)
+ return true;
+ return VT == MVT::f32 || VT == MVT::f64 ||
+ VT == MVT::v4f32 || VT == MVT::v2f64;
+}
+
bool PPCTargetLowering::
isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
const Value *Mask = AndI.getOperand(1);
@@ -14184,3 +14645,109 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
// For non-constant masks, we can always use the record-form and.
return true;
}
+
+// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
+SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
+ assert(Subtarget.hasP9Altivec() &&
+ "Only combine this when P9 altivec supported!");
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ if (N->getOperand(0).getOpcode() == ISD::SUB) {
+ // Even for signed integers, if it's known to be positive (as signed
+ // integer) due to zero-extended inputs.
+ unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
+ unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
+ if ((SubOpcd0 == ISD::ZERO_EXTEND ||
+ SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ (SubOpcd1 == ISD::ZERO_EXTEND ||
+ SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0)->getOperand(0),
+ N->getOperand(0)->getOperand(1),
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+
+ // For type v4i32, it can be optimized with xvnegsp + vabsduw
+ if (N->getOperand(0).getValueType() == MVT::v4i32 &&
+ N->getOperand(0).hasOneUse()) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0)->getOperand(0),
+ N->getOperand(0)->getOperand(1),
+ DAG.getTargetConstant(1, dl, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
+// For type v4i32/v8ii16/v16i8, transform
+// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
+// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
+SDValue PPCTargetLowering::combineVSelect(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
+ assert(Subtarget.hasP9Altivec() &&
+ "Only combine this when P9 altivec supported!");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue TrueOpnd = N->getOperand(1);
+ SDValue FalseOpnd = N->getOperand(2);
+ EVT VT = N->getOperand(1).getValueType();
+
+ if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
+ FalseOpnd.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ // ABSD only available for type v4i32/v8i16/v16i8
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+
+ // At least to save one more dependent computation
+ if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
+ return SDValue();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Can only handle unsigned comparison here
+ switch (CC) {
+ default:
+ return SDValue();
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ std::swap(TrueOpnd, FalseOpnd);
+ break;
+ }
+
+ SDValue CmpOpnd1 = Cond.getOperand(0);
+ SDValue CmpOpnd2 = Cond.getOperand(1);
+
+ // SETCC CmpOpnd1 CmpOpnd2 cond
+ // TrueOpnd = CmpOpnd1 - CmpOpnd2
+ // FalseOpnd = CmpOpnd2 - CmpOpnd1
+ if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
+ TrueOpnd.getOperand(1) == CmpOpnd2 &&
+ FalseOpnd.getOperand(0) == CmpOpnd2 &&
+ FalseOpnd.getOperand(1) == CmpOpnd1) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
+ CmpOpnd1, CmpOpnd2,
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+
+ return SDValue();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f174943a8004..30acd60eba6f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -149,6 +149,10 @@ namespace llvm {
/// For vector types, only the last n bits are used. See vsld.
SRL, SRA, SHL,
+ /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+ /// word and shift left immediate.
+ EXTSWSLI,
+
/// The combination of sra[wd]i and addze used to implemented signed
/// integer division by a power of 2. The first operand is the dividend,
/// and the second is the constant shift amount (representing the
@@ -369,6 +373,21 @@ namespace llvm {
/// An SDNode for swaps that are not associated with any loads/stores
/// and thereby have no chain.
SWAP_NO_CHAIN,
+
+ /// An SDNode for Power9 vector absolute value difference.
+ /// operand #0 vector
+ /// operand #1 vector
+ /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+ /// the most significant bit for signed i32
+ ///
+ /// Power9 VABSD* instructions are designed to support unsigned integer
+ /// vectors (byte/halfword/word), if we want to make use of them for signed
+ /// integer vectors, we have to flip their sign bits first. To flip sign bit
+ /// for byte/halfword integer vector would become inefficient, but for word
+ /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+ /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
+ /// => VABSDUW((XVNEGSP a), (XVNEGSP b))
+ VABSD,
/// QVFPERM = This corresponds to the QPX qvfperm instruction.
QVFPERM,
@@ -557,6 +576,11 @@ namespace llvm {
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ bool isSelectSupported(SelectSupportKind Kind) const override {
+ // PowerPC does not support scalar condition selects on vectors.
+ return (Kind != SelectSupportKind::ScalarCondVectorVal);
+ }
+
/// getPreferredVectorAction - The code we generate when vector types are
/// legalized by promoting the integer element type is often much worse
/// than code we generate if we widen the type for applicable vector types.
@@ -565,7 +589,7 @@ namespace llvm {
/// of v4i8's and shuffle them. This will turn into a mess of 8 extending
/// loads, moves back into VSR's (or memory ops if we don't have moves) and
/// then the VPERM for the shuffle. All in all a very slow sequence.
- TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
if (VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
@@ -785,6 +809,9 @@ namespace llvm {
return true;
}
+ // Returns true if the address of the global is stored in TOC entry.
+ bool isAccessedAsGotIndirect(SDValue N) const;
+
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -923,6 +950,9 @@ namespace llvm {
SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
+ SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+ const SDLoc &dl) const;
+
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -988,6 +1018,7 @@ namespace llvm {
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1088,6 +1119,11 @@ namespace llvm {
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
@@ -1122,6 +1158,7 @@ namespace llvm {
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
}; // end class PPCTargetLowering
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index cdd57c6a1118..2ce6ad3293eb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -94,7 +94,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
let Defs = [LR8] in
- def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
+ def MovePCtoLR8 : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -199,47 +199,45 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
// clean this up in PPCMIPeephole with calls to
// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
// in the first place.
-let usesCustomInserter = 1 in {
- let Defs = [CR0] in {
- def ATOMIC_LOAD_ADD_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
- [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_SUB_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
- [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_OR_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
- [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_XOR_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
- [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_AND_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
- [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_NAND_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
- [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_MIN_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
- [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_MAX_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
- [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_UMIN_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
- [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_UMAX_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
- [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
-
- def ATOMIC_CMP_SWAP_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
- [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
-
- def ATOMIC_SWAP_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
- [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
- }
+let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_MIN_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
+ [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_MAX_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
+ [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
+ [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
+ [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
+
+ def ATOMIC_SWAP_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
}
// Instructions to support atomic operations
@@ -269,18 +267,18 @@ def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNdi8 :Pseudo< (outs),
+def TCRETURNdi8 :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd8 $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+def TCRETURNai8 :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa8 $func $offset",
[(PPCtc_return (i64 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
+def TCRETURNri8 : PPCEmitTimePseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
@@ -347,14 +345,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
} // hasExtraSrcRegAllocReq = 1
} // hasSideEffects = 0
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
+// is not.
+let hasSideEffects = 1 in {
let Defs = [CTR8] in
- def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ def EH_SjLj_SetJmp64 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP64",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In64BitMode]>;
+}
+
+let hasSideEffects = 1, isBarrier = 1 in {
let isTerminator = 1 in
- def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ def EH_SjLj_LongJmp64 : PPCCustomInserterPseudo<(outs), (ins memr:$buf),
"#EH_SJLJ_LONGJMP64",
[(PPCeh_sjlj_longjmp addr:$buf)]>,
Requires<[In64BitMode]>;
@@ -396,10 +399,10 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
// the POWER3.
let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
+def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
[(set i64:$result,
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
-def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
+def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
[(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
let Defs = [LR8] in {
@@ -717,9 +720,10 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
-defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
- []>, isPPC64;
+ [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+ isPPC64, Requires<[IsISA3_0]>;
// For fast-isel:
let isCodeGenOnly = 1, Defs = [CARRY] in
@@ -773,8 +777,12 @@ def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC)
"maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def SETB : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
- "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
+ "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
+ "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+}
def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L),
"darn $RT, $L", IIC_LdStLD>, isPPC64;
def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),
@@ -1018,19 +1026,19 @@ def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
-def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtoc: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtoc",
[(set i64:$rD,
(PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocJTI: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocJTI",
[(set i64:$rD,
(PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
-def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocCPT: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
@@ -1071,40 +1079,40 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
// Support for medium and large code model.
let hasSideEffects = 0 in {
let isReMaterializable = 1 in {
-def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDIStocHA", []>, isPPC64;
-def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItocL", []>, isPPC64;
}
let mayLoad = 1 in
-def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
}
// Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDISgotTprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISgotTprelHA",
[(set i64:$rD,
(PPCaddisGotTprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
+def LDgotTprelL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
"#LDgotTprelL",
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
-let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
-def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
+let Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE8 : PPCPostRAExpPseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
(ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIStlsgdHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsgdHA",
[(set i64:$rD,
(PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDItlsgdL",
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1115,7 +1123,7 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
// correct because the branch select pass is relying on it.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+def GETtlsADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
@@ -1125,7 +1133,7 @@ def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
in
-def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
+def ADDItlsgdLADDR : PPCEmitTimePseudo<(outs g8rc:$rD),
(ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
"#ADDItlsgdLADDR",
[(set i64:$rD,
@@ -1133,12 +1141,12 @@ def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIStlsldHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
(PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDItlsldL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDItlsldL",
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1147,7 +1155,7 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+def GETtlsldADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsldADDR",
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
@@ -1157,7 +1165,7 @@ def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
in
-def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
+def ADDItlsldLADDR : PPCEmitTimePseudo<(outs g8rc:$rD),
(ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
"#ADDItlsldLADDR",
[(set i64:$rD,
@@ -1165,13 +1173,13 @@ def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDISdtprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
(PPCaddisDtprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIdtprelL",
[(set i64:$rD,
(PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1221,30 +1229,30 @@ def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STBUX8: XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX8: XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
@@ -1252,13 +1260,13 @@ def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrix:$dst),
- "stdu $rS, $dst", IIC_LdStSTDU, []>,
+ "stdu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
def STDUX : XForm_8_memOp<31, 181, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stdux $rS, $dst", IIC_LdStSTDUX, []>,
+ "stdux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 24969d7ef853..69b19e45c3e9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1051,6 +1051,20 @@ def : Pat<(v4f32 (ftrunc v4f32:$vA)),
def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
(VRFIN $vA)>;
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4i32 (vselect v4i32:$vA, v4i32:$vB, v4i32:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2i64 (vselect v2i64:$vA, v2i64:$vB, v2i64:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index f5f4b46344cf..2fe765dd99e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -2153,7 +2153,9 @@ class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
}
//===----------------------------------------------------------------------===//
-class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+// EmitTimePseudo won't have encoding information for the [MC]CodeEmitter
+// stuff
+class PPCEmitTimePseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
let isCodeGenOnly = 1;
let PPC64 = 0;
@@ -2162,6 +2164,21 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
let hasNoSchedulingInfo = 1;
}
+// Instruction that require custom insertion support
+// a.k.a. ISelPseudos, however, these won't have isPseudo set
+class PPCCustomInserterPseudo<dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> {
+ let usesCustomInserter = 1;
+}
+
+// PostRAPseudo will be expanded in expandPostRAPseudo, isPseudo flag in td
+// files is set only for PostRAPseudo
+class PPCPostRAExpPseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> {
+ let isPseudo = 1;
+}
+
class PseudoXFormMemOp<dag OOL, dag IOL, string asmstr, list<dag> pattern>
- : Pseudo<OOL, IOL, asmstr, pattern>, XFormMemOp;
+ : PPCPostRAExpPseudo<OOL, IOL, asmstr, pattern>, XFormMemOp;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 6c4e2129087c..0efe797c765d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -20,8 +20,8 @@ def HTM_get_imm : SDNodeXForm<imm, [{
return getI32Imm (N->getZExtValue(), SDLoc(N));
}]>;
-let hasSideEffects = 1, usesCustomInserter = 1 in {
-def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+let hasSideEffects = 1 in {
+def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0930f7d3b8d7..d754ce2990d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -987,7 +987,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::XXLOR;
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::XXLORf;
+ Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMR;
else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
@@ -1429,17 +1429,15 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
: (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI.setDesc(get(PPC::BCLR));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MI.setDesc(get(PPC::BCLRn));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
} else {
MI.setDesc(get(PPC::BCCLR));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg());
+ .add(Pred[1]);
}
return true;
@@ -1454,7 +1452,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BC));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
@@ -1462,7 +1460,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BCn));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
} else {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
@@ -1471,13 +1469,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BCC));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
}
return true;
- } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 ||
- OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+ } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
+ OpC == PPC::BCTRL8) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
@@ -1487,14 +1485,12 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
: (setLR ? PPC::BCCTRL : PPC::BCCTR)));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
return true;
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
: (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
return true;
}
@@ -1502,7 +1498,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
: (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg());
+ .add(Pred[1]);
return true;
}
@@ -1822,7 +1818,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
int NewOpC = -1;
int MIOpC = MI->getOpcode();
- if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+ if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8 ||
+ MIOpC == PPC::ANDISo || MIOpC == PPC::ANDISo8)
NewOpC = MIOpC;
else {
NewOpC = PPC::getRecordFormOpcode(MIOpC);
@@ -1912,14 +1909,36 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// compare).
// Rotates are expensive instructions. If we're emitting a record-form
- // rotate that can just be an andi, we should just emit the andi.
- if ((MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) &&
- MI->getOperand(2).getImm() == 0) {
+ // rotate that can just be an andi/andis, we should just emit that.
+ if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
+ unsigned GPRRes = MI->getOperand(0).getReg();
+ int64_t SH = MI->getOperand(2).getImm();
int64_t MB = MI->getOperand(3).getImm();
int64_t ME = MI->getOperand(4).getImm();
- if (MB < ME && MB >= 16) {
- uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
- NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIo : PPC::ANDIo8;
+ // We can only do this if both the start and end of the mask are in the
+ // same halfword.
+ bool MBInLoHWord = MB >= 16;
+ bool MEInLoHWord = ME >= 16;
+ uint64_t Mask = ~0LLU;
+
+ if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
+ Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
+ // The mask value needs to shift right 16 if we're emitting andis.
+ Mask >>= MBInLoHWord ? 0 : 16;
+ NewOpC = MIOpC == PPC::RLWINM ?
+ (MBInLoHWord ? PPC::ANDIo : PPC::ANDISo) :
+ (MBInLoHWord ? PPC::ANDIo8 :PPC::ANDISo8);
+ } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
+ (ME - MB + 1 == SH) && (MB >= 16)) {
+ // If we are rotating by the exact number of bits as are in the mask
+ // and the mask is in the least significant bits of the register,
+ // that's just an andis. (as long as the GPR result has no uses).
+ Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
+ Mask >>= 16;
+ NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDISo :PPC::ANDISo8;
+ }
+ // If we've set the mask, we can transform.
+ if (Mask != ~0LLU) {
MI->RemoveOperand(4);
MI->RemoveOperand(3);
MI->getOperand(2).setImm(Mask);
@@ -2088,11 +2107,9 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
return true;
}
-#ifndef NDEBUG
static bool isAnImmediateOperand(const MachineOperand &MO) {
return MO.isCPI() || MO.isGlobal() || MO.isImm();
}
-#endif
bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto &MBB = *MI.getParent();
@@ -2231,6 +2248,35 @@ static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
return PPC::NoRegister;
}
+void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
+ unsigned OpNo,
+ int64_t Imm) const {
+ assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
+ // Replace the REG with the Immediate.
+ unsigned InUseReg = MI.getOperand(OpNo).getReg();
+ MI.getOperand(OpNo).ChangeToImmediate(Imm);
+
+ if (empty(MI.implicit_operands()))
+ return;
+
+ // We need to make sure that the MI didn't have any implicit use
+ // of this REG any more.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI);
+ if (UseOpIdx >= 0) {
+ MachineOperand &MO = MI.getOperand(UseOpIdx);
+ if (MO.isImplicit())
+ // The operands must always be in the following order:
+ // - explicit reg defs,
+ // - other explicit operands (reg uses, immediates, etc.),
+ // - implicit reg defs
+ // - implicit reg uses
+ // Therefore, removing the implicit operand won't change the explicit
+ // operands layout.
+ MI.RemoveOperand(UseOpIdx);
+ }
+}
+
// Replace an instruction with one that materializes a constant (and sets
// CR0 if the original instruction was a record-form instruction).
void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
@@ -2256,10 +2302,11 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
.addImm(LII.Imm);
}
-MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
- unsigned &ConstOp,
- bool &SeenIntermediateUse) const {
- ConstOp = ~0U;
+MachineInstr *PPCInstrInfo::getForwardingDefMI(
+ MachineInstr &MI,
+ unsigned &OpNoForForwarding,
+ bool &SeenIntermediateUse) const {
+ OpNoForForwarding = ~0U;
MachineInstr *DefMI = nullptr;
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2276,7 +2323,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
- ConstOp = i;
+ OpNoForForwarding = i;
break;
}
}
@@ -2297,7 +2344,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
- if (!instrHasImmForm(MI, III) && !ConvertibleImmForm)
+ if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm)
return nullptr;
// Don't convert or %X, %Y, %Y since that's just a register move.
@@ -2319,15 +2366,22 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
if (PPC::G8RCRegClass.contains(Reg))
Reg = Reg - PPC::X0 + PPC::R0;
- // Is this register defined by a load-immediate in this block?
+ // Is this register defined by some form of add-immediate (including
+ // load-immediate) within this basic block?
for ( ; It != E; ++It) {
if (It->modifiesRegister(Reg, &getRegisterInfo())) {
- if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) {
- ConstOp = i;
+ switch (It->getOpcode()) {
+ default: break;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::ADDItocL:
+ case PPC::ADDI:
+ case PPC::ADDI8:
+ OpNoForForwarding = i;
return &*It;
- } else
- break;
- } else if (It->readsRegister(Reg, &getRegisterInfo()))
+ }
+ break;
+ } else if (It->readsRegister(Reg, &getRegisterInfo()))
// If we see another use of this reg between the def and the MI,
// we want to flat it so the def isn't deleted.
SeenIntermediateUse = true;
@@ -2335,7 +2389,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
}
}
}
- return ConstOp == ~0U ? nullptr : DefMI;
+ return OpNoForForwarding == ~0U ? nullptr : DefMI;
}
const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
@@ -2371,35 +2425,48 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
}
// If this instruction has an immediate form and one of its operands is a
-// result of a load-immediate, convert it to the immediate form if the constant
-// is in range.
+// result of a load-immediate or an add-immediate, convert it to
+// the immediate form if the constant is in range.
bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef) const {
MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
bool PostRA = !MRI->isSSA();
bool SeenIntermediateUse = true;
- unsigned ConstantOperand = ~0U;
- MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand,
- SeenIntermediateUse);
- if (!DefMI || !DefMI->getOperand(1).isImm())
+ unsigned ForwardingOperand = ~0U;
+ MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
+ SeenIntermediateUse);
+ if (!DefMI)
+ return false;
+ assert(ForwardingOperand < MI.getNumOperands() &&
+ "The forwarding operand needs to be valid at this point");
+ bool KillFwdDefMI = !SeenIntermediateUse &&
+ MI.getOperand(ForwardingOperand).isKill();
+ if (KilledDef && KillFwdDefMI)
+ *KilledDef = DefMI;
+
+ ImmInstrInfo III;
+ bool HasImmForm = instrHasImmForm(MI, III, PostRA);
+ // If this is a reg+reg instruction that has a reg+imm form,
+ // and one of the operands is produced by an add-immediate,
+ // try to convert it.
+ if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand,
+ *DefMI, KillFwdDefMI))
+ return true;
+
+ if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
+ !DefMI->getOperand(1).isImm())
return false;
- assert(ConstantOperand < MI.getNumOperands() &&
- "The constant operand needs to be valid at this point");
int64_t Immediate = DefMI->getOperand(1).getImm();
// Sign-extend to 64-bits.
int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
(Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
- if (KilledDef && MI.getOperand(ConstantOperand).isKill() &&
- !SeenIntermediateUse)
- *KilledDef = DefMI;
-
- // If this is a reg+reg instruction that has a reg+imm form, convert it now.
- ImmInstrInfo III;
- if (instrHasImmForm(MI, III))
- return transformToImmForm(MI, III, ConstantOperand, SExtImm);
+ // If this is a reg+reg instruction that has a reg+imm form,
+ // and one of the operands is produced by LI, convert it now.
+ if (HasImmForm)
+ return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm);
bool ReplaceWithLI = false;
bool Is64BitLI = false;
@@ -2443,7 +2510,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
- CompareUseMI.getOperand(1).ChangeToImmediate(0);
+ replaceInstrOperandWithImm(CompareUseMI, 1, 0);
CompareUseMI.RemoveOperand(3);
CompareUseMI.RemoveOperand(2);
continue;
@@ -2602,18 +2669,23 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
+static bool isVFReg(unsigned Reg) {
+ return PPC::VFRCRegClass.contains(Reg);
+}
+
bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
- ImmInstrInfo &III) const {
+ ImmInstrInfo &III, bool PostRA) const {
unsigned Opc = MI.getOpcode();
// The vast majority of the instructions would need their operand 2 replaced
// with an immediate when switching to the reg+imm form. A marked exception
// are the update form loads/stores for which a constant operand 2 would need
// to turn into a displacement and move operand 1 to the operand 2 position.
III.ImmOpNo = 2;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
III.ImmWidth = 16;
III.ImmMustBeMultipleOf = 1;
III.TruncateImmTo = 0;
+ III.IsSummingOperands = false;
switch (Opc) {
default: return false;
case PPC::ADD4:
@@ -2622,6 +2694,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 1;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
break;
case PPC::ADDC:
@@ -2630,6 +2703,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
break;
case PPC::ADDCo:
@@ -2637,6 +2711,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = PPC::ADDICo;
break;
case PPC::SUBFC:
@@ -2809,8 +2884,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 1;
III.ZeroIsSpecialNew = 2;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpNo = 1;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
@@ -2866,8 +2942,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 2;
III.ZeroIsSpecialNew = 3;
III.IsCommutative = false;
+ III.IsSummingOperands = true;
III.ImmOpNo = 2;
- III.ConstantOpNo = 3;
+ III.OpNoForForwarding = 3;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
@@ -2898,21 +2975,30 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
}
break;
- // Power9 only.
+ // Power9 and up only. For some of these, the X-Form version has access to all
+ // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
+ // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
+ // into or stored from is one of the VR registers.
case PPC::LXVX:
case PPC::LXSSPX:
case PPC::LXSDX:
case PPC::STXVX:
case PPC::STXSSPX:
case PPC::STXSDX:
+ case PPC::XFLOADf32:
+ case PPC::XFLOADf64:
+ case PPC::XFSTOREf32:
+ case PPC::XFSTOREf64:
if (!Subtarget.hasP9Vector())
return false;
III.SignedImm = true;
III.ZeroIsSpecialOrig = 1;
III.ZeroIsSpecialNew = 2;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpNo = 1;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
+ III.ImmMustBeMultipleOf = 4;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LXVX:
@@ -2920,24 +3006,64 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ImmMustBeMultipleOf = 16;
break;
case PPC::LXSSPX:
- III.ImmOpcode = PPC::LXSSP;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::LXSSP;
+ else {
+ III.ImmOpcode = PPC::LFS;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFLOADf32:
+ III.ImmOpcode = PPC::DFLOADf32;
break;
case PPC::LXSDX:
- III.ImmOpcode = PPC::LXSD;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::LXSD;
+ else {
+ III.ImmOpcode = PPC::LFD;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFLOADf64:
+ III.ImmOpcode = PPC::DFLOADf64;
break;
case PPC::STXVX:
III.ImmOpcode = PPC::STXV;
III.ImmMustBeMultipleOf = 16;
break;
case PPC::STXSSPX:
- III.ImmOpcode = PPC::STXSSP;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::STXSSP;
+ else {
+ III.ImmOpcode = PPC::STFS;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFSTOREf32:
+ III.ImmOpcode = PPC::DFSTOREf32;
break;
case PPC::STXSDX:
- III.ImmOpcode = PPC::STXSD;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::STXSD;
+ else {
+ III.ImmOpcode = PPC::STFD;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFSTOREf64:
+ III.ImmOpcode = PPC::DFSTOREf64;
break;
}
break;
@@ -2984,13 +3110,264 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
}
}
-bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo,
- int64_t Imm) const {
+// Check if the 'MI' that has the index OpNoForForwarding
+// meets the requirement described in the ImmInstrInfo.
+bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned OpNoForForwarding
+ ) const {
+ // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
+ // would not work pre-RA, we can only do the check post RA.
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA())
+ return false;
+
+ // Cannot do the transform if MI isn't summing the operands.
+ if (!III.IsSummingOperands)
+ return false;
+
+ // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
+ if (!III.ZeroIsSpecialOrig)
+ return false;
+
+ // We cannot do the transform if the operand we are trying to replace
+ // isn't the same as the operand the instruction allows.
+ if (OpNoForForwarding != III.OpNoForForwarding)
+ return false;
+
+ // Check if the instruction we are trying to transform really has
+ // the special zero register as its operand.
+ if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
+ MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
+ return false;
+
+ // This machine instruction is convertible if it is,
+ // 1. summing the operands.
+ // 2. one of the operands is special zero register.
+ // 3. the operand we are trying to replace is allowed by the MI.
+ return true;
+}
+
+// Check if the DefMI is the add inst and set the ImmMO and RegMO
+// accordingly.
+bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ MachineOperand *&ImmMO,
+ MachineOperand *&RegMO) const {
+ unsigned Opc = DefMI.getOpcode();
+ if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
+ return false;
+
+ assert(DefMI.getNumOperands() >= 3 &&
+ "Add inst must have at least three operands");
+ RegMO = &DefMI.getOperand(1);
+ ImmMO = &DefMI.getOperand(2);
+
+ // This DefMI is elgible for forwarding if it is:
+ // 1. add inst
+ // 2. one of the operands is Imm/CPI/Global.
+ return isAnImmediateOperand(*ImmMO);
+}
+
+bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
+ const MachineInstr &DefMI,
+ const MachineInstr &MI,
+ bool KillDefMI
+ ) const {
+ // x = addi y, imm
+ // ...
+ // z = lfdx 0, x -> z = lfd imm(y)
+ // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
+ // of "y" between the DEF of "x" and "z".
+ // The query is only valid post RA.
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA())
+ return false;
+
+ // MachineInstr::readsRegister only returns true if the machine
+ // instruction reads the exact register or its super-register. It
+ // does not consider uses of sub-registers which seems like strange
+ // behaviour. Nonetheless, if we end up with a 64-bit register here,
+ // get the corresponding 32-bit register to check.
+ unsigned Reg = RegMO.getReg();
+ if (PPC::G8RCRegClass.contains(Reg))
+ Reg = Reg - PPC::X0 + PPC::R0;
+
+ // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
+ MachineBasicBlock::const_reverse_iterator It = MI;
+ MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
+ It++;
+ for (; It != E; ++It) {
+ if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+ return false;
+ // Made it to DefMI without encountering a clobber.
+ if ((&*It) == &DefMI)
+ break;
+ }
+ assert((&*It) == &DefMI && "DefMI is missing");
+
+ // If DefMI also uses the register to be forwarded, we can only forward it
+ // if DefMI is being erased.
+ if (DefMI.readsRegister(Reg, &getRegisterInfo()))
+ return KillDefMI;
+
+ return true;
+}
+
+bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
+ const MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ int64_t &Imm) const {
+ assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
+ if (DefMI.getOpcode() == PPC::ADDItocL) {
+ // The operand for ADDItocL is CPI, which isn't imm at compiling time,
+ // However, we know that, it is 16-bit width, and has the alignment of 4.
+ // Check if the instruction met the requirement.
+ if (III.ImmMustBeMultipleOf > 4 ||
+ III.TruncateImmTo || III.ImmWidth != 16)
+ return false;
+
+ // Going from XForm to DForm loads means that the displacement needs to be
+ // not just an immediate but also a multiple of 4, or 16 depending on the
+ // load. A DForm load cannot be represented if it is a multiple of say 2.
+ // XForm loads do not have this restriction.
+ if (ImmMO.isGlobal() &&
+ ImmMO.getGlobal()->getAlignment() < III.ImmMustBeMultipleOf)
+ return false;
+
+ return true;
+ }
+
+ if (ImmMO.isImm()) {
+ // It is Imm, we need to check if the Imm fit the range.
+ int64_t Immediate = ImmMO.getImm();
+ // Sign-extend to 64-bits.
+ Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
+ (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
+
+ if (Imm % III.ImmMustBeMultipleOf)
+ return false;
+ if (III.TruncateImmTo)
+ Imm &= ((1 << III.TruncateImmTo) - 1);
+ if (III.SignedImm) {
+ APInt ActualValue(64, Imm, true);
+ if (!ActualValue.isSignedIntN(III.ImmWidth))
+ return false;
+ } else {
+ uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
+ if ((uint64_t)Imm > UnsignedMax)
+ return false;
+ }
+ }
+ else
+ return false;
+
+ // This ImmMO is forwarded if it meets the requriement describle
+ // in ImmInstrInfo
+ return true;
+}
+
+// If an X-Form instruction is fed by an add-immediate and one of its operands
+// is the literal zero, attempt to forward the source of the add-immediate to
+// the corresponding D-Form instruction with the displacement coming from
+// the immediate being added.
+bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned OpNoForForwarding,
+ MachineInstr &DefMI,
+ bool KillDefMI) const {
+ // RegMO ImmMO
+ // | |
+ // x = addi reg, imm <----- DefMI
+ // y = op 0 , x <----- MI
+ // |
+ // OpNoForForwarding
+ // Check if the MI meet the requirement described in the III.
+ if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
+ return false;
+
+ // Check if the DefMI meet the requirement
+ // described in the III. If yes, set the ImmMO and RegMO accordingly.
+ MachineOperand *ImmMO = nullptr;
+ MachineOperand *RegMO = nullptr;
+ if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
+ return false;
+ assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
+
+ // As we get the Imm operand now, we need to check if the ImmMO meet
+ // the requirement described in the III. If yes set the Imm.
+ int64_t Imm = 0;
+ if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
+ return false;
+
+ // Check if the RegMO can be forwarded to MI.
+ if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI))
+ return false;
+
+ // We know that, the MI and DefMI both meet the pattern, and
+ // the Imm also meet the requirement with the new Imm-form.
+ // It is safe to do the transformation now.
+ LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "Fed by:\n");
+ LLVM_DEBUG(DefMI.dump());
+
+ // Update the base reg first.
+ MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
+ false, false,
+ RegMO->isKill());
+
+ // Then, update the imm.
+ if (ImmMO->isImm()) {
+ // If the ImmMO is Imm, change the operand that has ZERO to that Imm
+ // directly.
+ replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm);
+ }
+ else {
+ // Otherwise, it is Constant Pool Index(CPI) or Global,
+ // which is relocation in fact. We need to replace the special zero
+ // register with ImmMO.
+ // Before that, we need to fixup the target flags for imm.
+ // For some reason, we miss to set the flag for the ImmMO if it is CPI.
+ if (DefMI.getOpcode() == PPC::ADDItocL)
+ ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
+
+ // MI didn't have the interface such as MI.setOperand(i) though
+ // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
+ // ImmMO, we need to remove ZERO operand and all the operands behind it,
+ // and, add the ImmMO, then, move back all the operands behind ZERO.
+ SmallVector<MachineOperand, 2> MOps;
+ for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
+ MOps.push_back(MI.getOperand(i));
+ MI.RemoveOperand(i);
+ }
+
+ // Remove the last MO in the list, which is ZERO operand in fact.
+ MOps.pop_back();
+ // Add the imm operand.
+ MI.addOperand(*ImmMO);
+ // Now add the rest back.
+ for (auto &MO : MOps)
+ MI.addOperand(MO);
+ }
+
+ // Update the opcode.
+ MI.setDesc(get(III.ImmOpcode));
+
+ LLVM_DEBUG(dbgs() << "With:\n");
+ LLVM_DEBUG(MI.dump());
+
+ return true;
+}
+
+bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned ConstantOpNo,
+ int64_t Imm) const {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
// Exit early if we can't convert this.
- if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative)
+ if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
return false;
if (Imm % III.ImmMustBeMultipleOf)
return false;
@@ -3035,7 +3412,7 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
MI.setDesc(get(III.ImmOpcode));
- if (ConstantOpNo == III.ConstantOpNo) {
+ if (ConstantOpNo == III.OpNoForForwarding) {
// Converting shifts to immediate form is a bit tricky since they may do
// one of three things:
// 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
@@ -3063,42 +3440,47 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
uint64_t MB = RightShift ? ShAmt : 0;
uint64_t ME = RightShift ? 31 : 31 - ShAmt;
- MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+ replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
.addImm(ME);
} else {
// Left shifts use (N, 63-N), right shifts use (64-N, N).
uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
- MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+ replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
}
}
} else
- MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
+ replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
}
// Convert commutative instructions (switch the operands and convert the
// desired one to an immediate.
else if (III.IsCommutative) {
- MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
- swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo);
+ replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
+ swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
} else
llvm_unreachable("Should have exited early!");
// For instructions for which the constant register replaces a different
// operand than where the immediate goes, we need to swap them.
- if (III.ConstantOpNo != III.ImmOpNo)
- swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo);
+ if (III.OpNoForForwarding != III.ImmOpNo)
+ swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo);
- // If the R0/X0 register is special for the original instruction and not for
- // the new instruction (or vice versa), we need to fix up the register class.
+ // If the special R0/X0 register index are different for original instruction
+ // and new instruction, we need to fix up the register class in new
+ // instruction.
if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
- if (!III.ZeroIsSpecialOrig) {
+ if (III.ZeroIsSpecialNew) {
+ // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
+ // need to fix up register class.
unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
- const TargetRegisterClass *NewRC =
- MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
- &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
- MRI.setRegClass(RegToModify, NewRC);
+ if (TargetRegisterInfo::isVirtualRegister(RegToModify)) {
+ const TargetRegisterClass *NewRC =
+ MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
+ &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
+ MRI.setRegClass(RegToModify, NewRC);
+ }
}
}
return true;
@@ -3140,6 +3522,7 @@ static bool isSignExtendingOp(const MachineInstr &MI) {
Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo ||
Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 ||
Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo ||
+ Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
Opcode == PPC::EXTSB8_32_64)
return true;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index ba82f56a2464..7ed558b835af 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -91,8 +91,8 @@ struct ImmInstrInfo {
uint64_t ZeroIsSpecialNew : 3;
// Is the operation commutative?
uint64_t IsCommutative : 1;
- // The operand number to check for load immediate.
- uint64_t ConstantOpNo : 3;
+ // The operand number to check for add-immediate def.
+ uint64_t OpNoForForwarding : 3;
// The operand number for the immediate.
uint64_t ImmOpNo : 3;
// The opcode of the new instruction.
@@ -101,6 +101,8 @@ struct ImmInstrInfo {
uint64_t ImmWidth : 5;
// The immediate should be truncated to N bits.
uint64_t TruncateImmTo : 5;
+ // Is the instruction summing the operand
+ uint64_t IsSummingOperands : 1;
};
// Information required to convert an instruction to just a materialized
@@ -123,10 +125,42 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
- bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo, int64_t Imm) const;
- MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp,
- bool &SeenIntermediateUse) const;
+
+ // If the inst has imm-form and one of its operand is produced by a LI,
+ // put the imm into the inst directly and remove the LI if possible.
+ bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned ConstantOpNo, int64_t Imm) const;
+ // If the inst has imm-form and one of its operand is produced by an
+ // add-immediate, try to transform it when possible.
+ bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned ConstantOpNo,
+ MachineInstr &DefMI,
+ bool KillDefMI) const;
+ // Try to find that, if the instruction 'MI' contains any operand that
+ // could be forwarded from some inst that feeds it. If yes, return the
+ // Def of that operand. And OpNoForForwarding is the operand index in
+ // the 'MI' for that 'Def'. If we see another use of this Def between
+ // the Def and the MI, SeenIntermediateUse becomes 'true'.
+ MachineInstr *getForwardingDefMI(MachineInstr &MI,
+ unsigned &OpNoForForwarding,
+ bool &SeenIntermediateUse) const;
+
+ // Can the user MI have it's source at index \p OpNoForForwarding
+ // forwarded from an add-immediate that feeds it?
+ bool isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned OpNoForForwarding) const;
+ bool isDefMIElgibleForForwarding(MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ MachineOperand *&ImmMO,
+ MachineOperand *&RegMO) const;
+ bool isImmElgibleForForwarding(const MachineOperand &ImmMO,
+ const MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ int64_t &Imm) const;
+ bool isRegElgibleForForwarding(const MachineOperand &RegMO,
+ const MachineInstr &DefMI,
+ const MachineInstr &MI,
+ bool KillDefMI) const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
virtual void anchor();
@@ -158,6 +192,16 @@ public:
bool isXFormMemOp(unsigned Opcode) const {
return get(Opcode).TSFlags & PPCII::XFormMemOp;
}
+ static bool isSameClassPhysRegCopy(unsigned Opcode) {
+ unsigned CopyOpcodes[] =
+ { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+ PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
+ PPC::CROR, PPC::EVOR, -1U };
+ for (int i = 0; CopyOpcodes[i] != -1U; i++)
+ if (Opcode == CopyOpcodes[i])
+ return true;
+ return false;
+ }
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
@@ -369,8 +413,30 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
-
- bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
+ void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
+ int64_t Imm) const;
+
+ bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III,
+ bool PostRA) const;
+
+ /// getRegNumForOperand - some operands use different numbering schemes
+ /// for the same registers. For example, a VSX instruction may have any of
+ /// vs0-vs63 allocated whereas an Altivec instruction could only have
+ /// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual
+ /// register number needed for the opcode/operand number combination.
+ /// The operand number argument will be useful when we need to extend this
+ /// to instructions that use both Altivec and VSX numbering (for different
+ /// operands).
+ static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
+ unsigned OpNo) {
+ if (Desc.TSFlags & PPCII::UseVSXReg) {
+ if (isVRRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::V0);
+ else if (isVFRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::VF0);
+ }
+ return Reg;
+ }
};
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1a43037e4a4b..dd3f1ac79089 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -114,6 +114,10 @@ def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;
+def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@@ -218,6 +222,8 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
+
// Move 2 i64 values into a VSX register
def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
SDTypeProfile<1, 2,
@@ -1189,77 +1195,76 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
-// Pseudo-instructions:
+// Pseudo instructions:
let hasCtrlDep = 1 in {
let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+def ADJCALLSTACKDOWN : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
"#ADJCALLSTACKDOWN $amt1 $amt2",
[(callseq_start timm:$amt1, timm:$amt2)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
"#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
+def UPDATE_VRSAVE : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS),
"UPDATE_VRSAVE $rD, $rS", []>;
}
let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
+def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
[(set i32:$result,
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
-def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
+def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
[(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
-let usesCustomInserter = 1, // Expanded after instruction selection.
- PPC970_Single = 1 in {
+let PPC970_Single = 1 in {
// Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
// because either operand might become the first operand in an isel, and
// that operand cannot be r0.
- def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+ def SELECT_CC_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crrc:$cond,
gprc_nor0:$T, gprc_nor0:$F,
i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
- def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+ def SELECT_CC_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crrc:$cond,
g8rc_nox0:$T, g8rc_nox0:$F,
i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
- def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ def SELECT_CC_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
i32imm:$BROPC), "#SELECT_CC_F4",
[]>;
- def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ def SELECT_CC_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
i32imm:$BROPC), "#SELECT_CC_F8",
[]>;
- def SELECT_CC_F16 : Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+ def SELECT_CC_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_F16",
[]>;
- def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+ def SELECT_CC_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_VRRC",
[]>;
// SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
// register bit directly.
- def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond,
+ def SELECT_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crbitrc:$cond,
gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4",
[(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>;
- def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
+ def SELECT_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8",
[(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>;
let Predicates = [HasFPU] in {
- def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
+ def SELECT_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
f4rc:$T, f4rc:$F), "#SELECT_F4",
[(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
- def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
+ def SELECT_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
f8rc:$T, f8rc:$F), "#SELECT_F8",
[(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_F16 : Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ def SELECT_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
vrrc:$T, vrrc:$F), "#SELECT_F16",
[(set f128:$dst, (select i1:$cond, f128:$T, f128:$F))]>;
}
- def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ def SELECT_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
vrrc:$T, vrrc:$F), "#SELECT_VRRC",
[(set v4i32:$dst,
(select i1:$cond, v4i32:$T, v4i32:$F))]>;
@@ -1268,18 +1273,18 @@ let Predicates = [HasFPU] in {
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
let mayStore = 1 in {
-def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
+def SPILL_CR : PPCEmitTimePseudo<(outs), (ins crrc:$cond, memri:$F),
"#SPILL_CR", []>;
-def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F),
+def SPILL_CRBIT : PPCEmitTimePseudo<(outs), (ins crbitrc:$cond, memri:$F),
"#SPILL_CRBIT", []>;
}
// RESTORE_CR - Indicate that we're restoring the CR register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in {
-def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
+def RESTORE_CR : PPCEmitTimePseudo<(outs crrc:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
-def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
+def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
"#RESTORE_CRBIT", []>;
}
@@ -1305,10 +1310,10 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
let Defs = [LR] in
- def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
+ def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
let Defs = [LR] in
- def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+ def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -1506,19 +1511,19 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNdi :Pseudo< (outs),
+def TCRETURNdi :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+def TCRETURNai :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa $func $offset",
[(PPCtc_return (i32 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
+def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
"#TC_RETURNr $dst $offset",
[]>;
@@ -1544,14 +1549,19 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
}
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
+// is not.
+let hasSideEffects = 1 in {
let Defs = [CTR] in
- def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ def EH_SjLj_SetJmp32 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP32",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In32BitMode]>;
+}
+
+let hasSideEffects = 1, isBarrier = 1 in {
let isTerminator = 1 in
- def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ def EH_SjLj_LongJmp32 : PPCCustomInserterPseudo<(outs), (ins memr:$buf),
"#EH_SJLJ_LONGJMP32",
[(PPCeh_sjlj_longjmp addr:$buf)]>,
Requires<[In32BitMode]>;
@@ -1561,7 +1571,7 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
// a terminator. Size is set to 0 to prevent the builtin assembler
// from emitting it.
let isBranch = 1, isTerminator = 1, Size = 0 in {
- def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ def EH_SjLj_Setup : PPCEmitTimePseudo<(outs), (ins directbrtarget:$dst),
"#EH_SjLj_Setup\t$dst", []>;
}
@@ -1648,119 +1658,117 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
// clean this up in PPCMIPeephole with calls to
// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
// in the first place.
-let usesCustomInserter = 1 in {
- let Defs = [CR0] in {
- def ATOMIC_LOAD_ADD_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
- [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
- [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
- [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
- [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
- [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
- [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
- [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
- [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
- [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
- [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_ADD_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
- [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
- [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
- [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
- [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
- [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
- [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
- [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
- [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
- [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
- [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_ADD_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
- [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
- [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
- [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
- [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
- [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
- [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
- [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
- [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
- [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
- [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
-
- def ATOMIC_CMP_SWAP_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
- [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
- def ATOMIC_CMP_SWAP_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
- [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
- def ATOMIC_CMP_SWAP_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
- [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
-
- def ATOMIC_SWAP_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
- [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
- def ATOMIC_SWAP_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
- [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
- def ATOMIC_SWAP_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
- [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
- }
+let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
+ [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
+ [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
+ [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
+ [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
+ [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
+ [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
+ [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
+ [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
+ [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
+ [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
+ [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
+ [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
+
+ def ATOMIC_SWAP_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
}
def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
@@ -1988,15 +1996,15 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
// Unindexed (r+i) Stores.
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
-def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
- "stb $rS, $src", IIC_LdStStore,
- [(truncstorei8 i32:$rS, iaddr:$src)]>;
-def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
- "sth $rS, $src", IIC_LdStStore,
- [(truncstorei16 i32:$rS, iaddr:$src)]>;
-def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
- "stw $rS, $src", IIC_LdStStore,
- [(store i32:$rS, iaddr:$src)]>;
+def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst),
+ "stb $rS, $dst", IIC_LdStStore,
+ [(truncstorei8 i32:$rS, iaddr:$dst)]>;
+def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst),
+ "sth $rS, $dst", IIC_LdStStore,
+ [(truncstorei16 i32:$rS, iaddr:$dst)]>;
+def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst),
+ "stw $rS, $dst", IIC_LdStStore,
+ [(store i32:$rS, iaddr:$dst)]>;
let Predicates = [HasFPU] in {
def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
"stfs $rS, $dst", IIC_LdStSTFD,
@@ -2010,13 +2018,13 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
let Predicates = [HasFPU] in {
def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
@@ -2084,19 +2092,19 @@ def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
@@ -2543,8 +2551,8 @@ def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
// A pseudo-instruction used to implement the read of the 64-bit cycle counter
// on a 32-bit target.
-let hasSideEffects = 1, usesCustomInserter = 1 in
-def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins),
+let hasSideEffects = 1 in
+def ReadTB : PPCCustomInserterPseudo<(outs gprc:$lo, gprc:$hi), (ins),
"#ReadTB", []>;
let Uses = [CTR] in {
@@ -2603,13 +2611,13 @@ def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>;
// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
// so we'll need to scavenge a register for it.
let mayStore = 1 in
-def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
"#SPILL_VRSAVE", []>;
// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in
-def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
let hasSideEffects = 0 in {
@@ -2648,9 +2656,9 @@ def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins),
} // hasSideEffects = 0
let Predicates = [HasFPU] in {
-// Pseudo instruction to perform FADD in round-to-zero mode.
-let usesCustomInserter = 1, Uses = [RM] in {
- def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
+// Custom inserter instruction to perform FADD in round-to-zero mode.
+let Uses = [RM] in {
+ def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
[(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
}
@@ -3022,23 +3030,23 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS $in, tblockaddress:$g)>;
// Support for thread-local storage.
-def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
[(set i32:$rD, (PPCppc32GOT))]>;
// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
// This uses two output registers, the first as the real output, the second as a
// temporary register, used internally in code generation.
-def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
[]>, NoEncode<"$rT">;
-def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
"#LDgotTprelL32",
[(set i32:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
(ADD4TLS $in, tglobaltlsaddr:$g)>;
-def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDItlsgdL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsgdL32",
[(set i32:$rD,
(PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
@@ -3046,7 +3054,7 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsADDR32",
[(set i32:$rD,
(PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
@@ -3054,14 +3062,14 @@ def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
+def ADDItlsgdLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD),
(ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
"#ADDItlsgdLADDR32",
[(set i32:$rD,
(PPCaddiTlsgdLAddr i32:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>;
-def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDItlsldL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsldL32",
[(set i32:$rD,
(PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
@@ -3069,7 +3077,7 @@ def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+def GETtlsldADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsldADDR32",
[(set i32:$rD,
(PPCgetTlsldAddr i32:$reg,
@@ -3078,31 +3086,31 @@ def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
+def ADDItlsldLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD),
(ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
"#ADDItlsldLADDR32",
[(set i32:$rD,
(PPCaddiTlsldLAddr i32:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>;
-def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDIdtprelL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDIdtprelL32",
[(set i32:$rD,
(PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
-def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDISdtprelHA32",
[(set i32:$rD,
(PPCaddisDtprelHA i32:$reg,
tglobaltlsaddr:$disp))]>;
// Support for Position-independent code
-def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
"#LWZtoc",
[(set i32:$rD,
(PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
// Get Global (GOT) Base Register offset, from the word immediately preceding
// the function label.
-def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
// Standard shifts. These are represented separately from the real shifts above
@@ -3930,21 +3938,19 @@ def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
(SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-let usesCustomInserter = 1 in {
-def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
"#ANDIo_1_EQ_BIT",
[(set i1:$dst, (trunc (not i32:$in)))]>;
-def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
"#ANDIo_1_GT_BIT",
[(set i1:$dst, (trunc i32:$in))]>;
-def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
"#ANDIo_1_EQ_BIT8",
[(set i1:$dst, (trunc (not i64:$in)))]>;
-def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
"#ANDIo_1_GT_BIT8",
[(set i1:$dst, (trunc i64:$in))]>;
-}
def : Pat<(i1 (not (trunc i32:$in))),
(ANDIo_1_EQ_BIT $in)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index c4bb02695b36..ef589ad01fd7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -245,32 +245,30 @@ let Uses = [RM] in {
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
- let usesCustomInserter = 1 in {
- def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QFRC",
- []>;
- def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QSRC",
- []>;
- def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QBRC",
- []>;
-
- // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
- // register bit directly.
- def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
- qfrc:$T, qfrc:$F), "#SELECT_QFRC",
- [(set v4f64:$dst,
- (select i1:$cond, v4f64:$T, v4f64:$F))]>;
- def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
- qsrc:$T, qsrc:$F), "#SELECT_QSRC",
- [(set v4f32:$dst,
- (select i1:$cond, v4f32:$T, v4f32:$F))]>;
- def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
- qbrc:$T, qbrc:$F), "#SELECT_QBRC",
- [(set v4i1:$dst,
- (select i1:$cond, v4i1:$T, v4i1:$F))]>;
- }
+ def SELECT_CC_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QFRC",
+ []>;
+ def SELECT_CC_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QSRC",
+ []>;
+ def SELECT_CC_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QBRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+ qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+ [(set v4f64:$dst,
+ (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+ def SELECT_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+ qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+ [(set v4f32:$dst,
+ (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+ def SELECT_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+ qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+ [(set v4i1:$dst,
+ (select i1:$cond, v4i1:$T, v4i1:$F))]>;
// Convert and Round Instructions
def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 96649efdc1bc..9f5891a45f22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -831,22 +831,20 @@ def : Pat<(f64 (fpextend f32:$src)),
}
let Predicates = [HasSPE] in {
- let usesCustomInserter = 1 in {
-def SELECT_CC_SPE4 : Pseudo<(outs spe4rc:$dst),
+def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst),
(ins crrc:$cond, spe4rc:$T, spe4rc:$F,
i32imm:$BROPC), "#SELECT_CC_SPE4",
[]>;
-def SELECT_CC_SPE : Pseudo<(outs sperc:$dst),
+def SELECT_CC_SPE : PPCCustomInserterPseudo<(outs sperc:$dst),
(ins crrc:$cond, sperc:$T, sperc:$F, i32imm:$BROPC),
"#SELECT_CC_SPE",
[]>;
-def SELECT_SPE4 : Pseudo<(outs spe4rc:$dst), (ins crbitrc:$cond,
+def SELECT_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crbitrc:$cond,
spe4rc:$T, spe4rc:$F), "#SELECT_SPE4",
[(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
-def SELECT_SPE : Pseudo<(outs sperc:$dst), (ins crbitrc:$cond,
+def SELECT_SPE : PPCCustomInserterPseudo<(outs sperc:$dst), (ins crbitrc:$cond,
sperc:$T, sperc:$F), "#SELECT_SPE",
[(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
- }
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
(SELECT_SPE4 (CRANDC $lhs, $rhs), $tval, $fval)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 781a3277441a..0f073388dc74 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -67,6 +67,10 @@ def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
def SDTVecConv : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>
]>;
+def SDTVabsd : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
+]>;
+
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -79,6 +83,7 @@ def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
+def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -132,7 +137,7 @@ let Uses = [RM] in {
[]>;
// Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later
- let isPseudo = 1, CodeSize = 3 in
+ let CodeSize = 3 in
def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
"#XFLOADf64",
[(set f64:$XT, (load xoaddr:$src))]>;
@@ -163,7 +168,7 @@ let Uses = [RM] in {
[]>;
// Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later
- let isPseudo = 1, CodeSize = 3 in
+ let CodeSize = 3 in
def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
"#XFSTOREf64",
[(store f64:$XT, xoaddr:$dst)]>;
@@ -898,37 +903,36 @@ let Uses = [RM] in {
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
-let usesCustomInserter = 1, // Expanded after instruction selection.
- PPC970_Single = 1 in {
+let PPC970_Single = 1 in {
- def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+ def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
(ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
"#SELECT_CC_VSRC",
[]>;
- def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+ def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
(ins crbitrc:$cond, vsrc:$T, vsrc:$F),
"#SELECT_VSRC",
[(set v2f64:$dst,
(select i1:$cond, v2f64:$T, v2f64:$F))]>;
- def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+ def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
(ins crrc:$cond, f8rc:$T, f8rc:$F,
i32imm:$BROPC), "#SELECT_CC_VSFRC",
[]>;
- def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+ def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
(ins crbitrc:$cond, f8rc:$T, f8rc:$F),
"#SELECT_VSFRC",
[(set f64:$dst,
(select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst),
+ def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
(ins crrc:$cond, f4rc:$T, f4rc:$F,
i32imm:$BROPC), "#SELECT_CC_VSSRC",
[]>;
- def SELECT_VSSRC: Pseudo<(outs f4rc:$dst),
+ def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
(ins crbitrc:$cond, f4rc:$T, f4rc:$F),
"#SELECT_VSSRC",
[(set f32:$dst,
(select i1:$cond, f32:$T, f32:$F))]>;
-} // usesCustomInserter
+}
} // AddedComplexity
def : InstAlias<"xvmovdp $XT, $XB",
@@ -1040,17 +1044,14 @@ def : Pat<(v2f64 (bitconvert v1i128:$A)),
def : Pat<(v1i128 (bitconvert v2f64:$A)),
(COPY_TO_REGCLASS $A, VRRC)>;
-// sign extension patterns
-// To extend "in place" from v2i32 to v2i64, we have input data like:
-// | undef | i32 | undef | i32 |
-// but xvcvsxwdp expects the input in big-Endian format:
-// | i32 | undef | i32 | undef |
-// so we need to shift everything to the left by one i32 (word) before
-// the conversion.
-def : Pat<(sext_inreg v2i64:$C, v2i32),
- (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>;
-def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
- (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+def : Pat<(v2i64 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
(v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
@@ -1069,10 +1070,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
// Stores.
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
}
let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
@@ -1159,6 +1156,26 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
(XVRSQRTEDP $A)>;
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+
let Predicates = [IsLittleEndian] in {
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1200,6 +1217,27 @@ def ScalarLoads {
dag Li32 = (i32 (load xoaddr:$src));
}
+def DWToSPExtractConv {
+ dag El0US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag El0SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+ dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
// The following VSX instructions were introduced in Power ISA 2.07
/* FIXME: if the operands are v2i64, these patterns will not match.
we should define new patterns or otherwise match the same patterns
@@ -1241,23 +1279,19 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
"lxsiwzx $XT, $src", IIC_LdStLFD, []>;
- // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
- // would cause these Pseudos are not expanded in expandPostRAPseudos()
- let isPseudo = 1 in {
- // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
- let CodeSize = 3 in
- def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
- "#XFLOADf32",
- [(set f32:$XT, (load xoaddr:$src))]>;
- // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
- def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
- "#LIWAX",
- [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
- // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
- def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
- "#LIWZX",
- [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
- }
+ // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
+ let CodeSize = 3 in
+ def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
+ "#XFLOADf32",
+ [(set f32:$XT, (load xoaddr:$src))]>;
+ // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
+ def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWAX",
+ [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+ // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
+ def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWZX",
+ [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
@@ -1268,19 +1302,15 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
"stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
- // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
- // would cause these Pseudos are not expanded in expandPostRAPseudos()
- let isPseudo = 1 in {
- // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
- let CodeSize = 3 in
- def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
- "#XFSTOREf32",
- [(store f32:$XT, xoaddr:$dst)]>;
- // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
- def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
- "#STIWX",
- [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
- }
+ // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
+ let CodeSize = 3 in
+ def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
+ "#XFSTOREf32",
+ [(store f32:$XT, xoaddr:$dst)]>;
+ // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
+ def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
+ "#STIWX",
+ [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
} // mayStore
} // UseVSXReg = 1
@@ -1443,35 +1473,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // UseVSXReg = 1
let Predicates = [IsLittleEndian] in {
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+ def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1SS1,
(f32 (XSCVSXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1US1,
(f32 (XSCVUXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
}
let Predicates = [IsBigEndian] in {
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
}
// Instructions for converting float to i64 feeding a store.
@@ -1993,6 +2015,10 @@ let Predicates = [IsLittleEndian, HasVSX] in
def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
(f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -2671,6 +2697,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
"xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
+ def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
+ (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
+
// Extract Exponent/Significand DP/QP
def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
@@ -2678,6 +2707,10 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
+ def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
+ (i64 (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (XSXEXPQP $vA)), sub_64)))>;
+
// Vector Insert Word
let UseVSXReg = 1 in {
// XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
@@ -3238,20 +3271,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f64 (PPCVexts f64:$A, 2)),
(f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
- let isPseudo = 1 in {
- def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
- "#DFLOADf32",
- [(set f32:$XT, (load ixaddr:$src))]>;
- def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
- "#DFLOADf64",
- [(set f64:$XT, (load ixaddr:$src))]>;
- def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
- "#DFSTOREf32",
- [(store f32:$XT, ixaddr:$dst)]>;
- def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
- "#DFSTOREf64",
- [(store f64:$XT, ixaddr:$dst)]>;
- }
+ def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
+ "#DFLOADf32",
+ [(set f32:$XT, (load ixaddr:$src))]>;
+ def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
+ "#DFLOADf64",
+ [(set f64:$XT, (load ixaddr:$src))]>;
+ def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+ "#DFSTOREf32",
+ [(store f32:$XT, ixaddr:$dst)]>;
+ def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+ "#DFSTOREf64",
+ [(store f64:$XT, ixaddr:$dst)]>;
+
def : Pat<(f64 (extloadf32 ixaddr:$src)),
(COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
@@ -3533,22 +3565,20 @@ let AddedComplexity = 400 in {
}
let Predicates = [HasP9Vector] in {
- let isPseudo = 1 in {
- let mayStore = 1 in {
- def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
- (ins spilltovsrrc:$XT, memrr:$dst),
- "#SPILLTOVSR_STX", []>;
- def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
- "#SPILLTOVSR_ST", []>;
- }
- let mayLoad = 1 in {
- def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
- (ins memrr:$src),
- "#SPILLTOVSR_LDX", []>;
- def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
- "#SPILLTOVSR_LD", []>;
+ let mayStore = 1 in {
+ def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
+ (ins spilltovsrrc:$XT, memrr:$dst),
+ "#SPILLTOVSR_STX", []>;
+ def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
+ "#SPILLTOVSR_ST", []>;
+ }
+ let mayLoad = 1 in {
+ def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
+ (ins memrr:$src),
+ "#SPILLTOVSR_LDX", []>;
+ def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
+ "#SPILLTOVSR_LD", []>;
- }
}
}
// Integer extend helper dags 32 -> 64
@@ -3797,6 +3827,15 @@ let AddedComplexity = 400 in {
(XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
}
+ let Predicates = [IsBigEndian, HasP8Vector] in {
+ def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+ (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+ def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+ (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+ }
+
// Big endian, available on all targets with VSX
let Predicates = [IsBigEndian, HasVSX] in {
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3825,6 +3864,15 @@ let AddedComplexity = 400 in {
(v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
}
+ let Predicates = [IsLittleEndian, HasP8Vector] in {
+ def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+ (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+ def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+ (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+ }
+
let Predicates = [IsLittleEndian, HasVSX] in {
// Little endian, available on all targets with VSX
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3869,10 +3917,11 @@ let AddedComplexity = 400 in {
(COPY_TO_REGCLASS (MTVSRD $A), VSRC),
(COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0),
- (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>;
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
}
@@ -3884,10 +3933,11 @@ let AddedComplexity = 400 in {
(COPY_TO_REGCLASS (MTVSRD $B), VSRC),
(COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0),
- (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>;
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
}
@@ -3940,10 +3990,9 @@ let AddedComplexity = 400 in {
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
(v2i64 (MTVSRDD $rB, $rA))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW
- (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)),
- (v4i32
- (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>;
+ (MTVSRDD
+ (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
+ (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
}
let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
@@ -3953,10 +4002,9 @@ let AddedComplexity = 400 in {
def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
(v2i64 (MTVSRDD $rB, $rA))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW
- (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)),
- (v4i32
- (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>;
+ (MTVSRDD
+ (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
+ (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
}
// P9 Altivec instructions that can be used to build vectors.
// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
@@ -4005,3 +4053,21 @@ let AddedComplexity = 400 in {
}
}
+// Put this P9Altivec related definition here since it's possible to be
+// selected to VSX instruction xvnegsp, avoid possible undef.
+let Predicates = [HasP9Altivec] in {
+
+ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+ (v4i32 (VABSDUW $A, $B))>;
+
+ def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+ (v8i16 (VABSDUH $A, $B))>;
+
+ def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+ (v16i8 (VABSDUB $A, $B))>;
+
+ // As PPCVABSD description, the last operand indicates whether do the
+ // sign bit flip.
+ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+ (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
index 230a04628504..d2a09f30c0f3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
@@ -1,4 +1,4 @@
-//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
+//===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,10 +7,13 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines intrinsics that are used by all hw codegen targets.
+// This describes the available hardware counters for PPC.
//
//===----------------------------------------------------------------------===//
-let TargetPrefix = "AMDGPU", isTarget = 1 in {
- def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+def CpuCyclesPfmCounter : PfmCounter<"CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+ let CycleCounter = CpuCyclesPfmCounter;
}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 1892d1e3dc26..4458b92ceb5e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +34,8 @@ STATISTIC(NumRRConvertedInPreEmit,
"Number of r+r instructions converted to r+i in pre-emit peephole");
STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
+STATISTIC(NumberOfSelfCopies,
+ "Number of self copy instructions eliminated");
static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -60,9 +63,32 @@ namespace {
return false;
bool Changed = false;
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
+ unsigned Opc = MI.getOpcode();
+ // Detect self copies - these can result from running AADB.
+ if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.getNumOperands() == 3 &&
+ MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+ MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+ NumberOfSelfCopies++;
+ LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
+ else if (MCID.getNumOperands() == 2 &&
+ MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ NumberOfSelfCopies++;
+ LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
+ }
MachineInstr *DefMIToErase = nullptr;
if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
Changed = true;
@@ -74,6 +100,75 @@ namespace {
}
}
}
+
+ // Eliminate conditional branch based on a constant CR bit by
+ // CRSET or CRUNSET. We eliminate the conditional branch or
+ // convert it into an unconditional branch. Also, if the CR bit
+ // is not used by other instructions, we eliminate CRSET as well.
+ auto I = MBB.getFirstInstrTerminator();
+ if (I == MBB.instr_end())
+ continue;
+ MachineInstr *Br = &*I;
+ if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
+ continue;
+ MachineInstr *CRSetMI = nullptr;
+ unsigned CRBit = Br->getOperand(0).getReg();
+ unsigned CRReg = getCRFromCRBit(CRBit);
+ bool SeenUse = false;
+ MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
+ for (It++; It != Er; It++) {
+ if (It->modifiesRegister(CRBit, TRI)) {
+ if ((It->getOpcode() == PPC::CRUNSET ||
+ It->getOpcode() == PPC::CRSET) &&
+ It->getOperand(0).getReg() == CRBit)
+ CRSetMI = &*It;
+ break;
+ }
+ if (It->readsRegister(CRBit, TRI))
+ SeenUse = true;
+ }
+ if (!CRSetMI) continue;
+
+ unsigned CRSetOp = CRSetMI->getOpcode();
+ if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
+ (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
+ // Remove this branch since it cannot be taken.
+ InstrsToErase.push_back(Br);
+ MBB.removeSuccessor(Br->getOperand(1).getMBB());
+ }
+ else {
+ // This conditional branch is always taken. So, remove all branches
+ // and insert an unconditional branch to the destination of this.
+ MachineBasicBlock::iterator It = Br, Er = MBB.end();
+ for (; It != Er; It++) {
+ if (It->isDebugInstr()) continue;
+ assert(It->isTerminator() && "Non-terminator after a terminator");
+ InstrsToErase.push_back(&*It);
+ }
+ if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
+ ArrayRef<MachineOperand> NoCond;
+ TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
+ NoCond, Br->getDebugLoc());
+ }
+ for (auto &Succ : MBB.successors())
+ if (Succ != Br->getOperand(1).getMBB()) {
+ MBB.removeSuccessor(Succ);
+ break;
+ }
+ }
+
+ // If the CRBit is not used by another instruction, we can eliminate
+ // CRSET/CRUNSET instruction.
+ if (!SeenUse) {
+ // We need to check use of the CRBit in successors.
+ for (auto &SuccMBB : MBB.successors())
+ if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
+ SeenUse = true;
+ break;
+ }
+ if (!SeenUse)
+ InstrsToErase.push_back(CRSetMI);
+ }
}
for (MachineInstr *MI : InstrsToErase) {
LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 96923a97a82c..3d067aa8e621 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -673,12 +673,15 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned SrcReg = MI.getOperand(0).getReg();
- BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL),
- getCRFromCRBit(SrcReg))
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
+ // We need to move the CR field that contains the CR bit we are spilling.
+ // The super register may not be explicitly defined (i.e. it can be defined
+ // by a CR-logical that only defines the subreg) so we state that the CR
+ // field is undef. Also, in order to preserve the kill flag on the CR bit,
+ // we add it as an implicit use.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
- .addReg(getCRFromCRBit(SrcReg));
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+ .addReg(SrcReg,
+ RegState::Implicit | getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0LT, shift the bits left so that the bit to
// store is the first one. Mask all but that bit.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 91a98ee4efc7..e93fe4ce3453 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -85,8 +85,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
/// We require the register scavenger.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
@@ -141,6 +139,23 @@ public:
// Base pointer (stack realignment) support.
unsigned getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
+
+ /// stripRegisterPrefix - This method strips the character prefix from a
+ /// register name so that only the number is left. Used by for linux asm.
+ static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'q': // for QPX
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 0e641cf9e00a..d0d29b6d2c7d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -85,6 +85,12 @@ class VSRL<FPR SubReg, string n> : PPCReg<n> {
let SubRegIndices = [sub_64];
}
+// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
+// and encoding to match.
+class VSXReg<bits<6> num, string n> : PPCReg<n> {
+ let HWEncoding{5-0} = num;
+}
+
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
@@ -148,7 +154,7 @@ foreach Index = 0-31 in {
// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
// asm printing.
foreach Index = 32-63 in {
- def VSX#Index : PPCReg<"vs"#Index>;
+ def VSX#Index : VSXReg<Index, "vs"#Index>;
}
// The reprsentation of r0 when treated as the constant 0.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index 5ad0a517c117..c8fe7d7eea78 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -42,7 +42,6 @@ def IIC_LdStLoad : InstrItinClass;
def IIC_LdStLoadUpd : InstrItinClass;
def IIC_LdStLoadUpdX : InstrItinClass;
def IIC_LdStStore : InstrItinClass;
-def IIC_LdStStoreUpd : InstrItinClass;
def IIC_LdStDSS : InstrItinClass;
def IIC_LdStICBI : InstrItinClass;
def IIC_LdStLD : InstrItinClass;
@@ -63,8 +62,8 @@ def IIC_LdStSLBIA : InstrItinClass;
def IIC_LdStSLBIE : InstrItinClass;
def IIC_LdStSTD : InstrItinClass;
def IIC_LdStSTDCX : InstrItinClass;
-def IIC_LdStSTDU : InstrItinClass;
-def IIC_LdStSTDUX : InstrItinClass;
+def IIC_LdStSTU : InstrItinClass;
+def IIC_LdStSTUX : InstrItinClass;
def IIC_LdStSTFD : InstrItinClass;
def IIC_LdStSTFDU : InstrItinClass;
def IIC_LdStSTVEBX : InstrItinClass;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
index 2455e5e52de5..646822eedbe0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -280,13 +280,6 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [P440_LWB]>],
[1, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
- InstrStage<1, [P440_LRACC]>,
- InstrStage<1, [P440_AGEN]>,
- InstrStage<1, [P440_CRD]>,
- InstrStage<2, [P440_LWB]>],
- [2, 1, 1, 1],
- [NoBypass, P440_GPR_Bypass]>,
InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
@@ -373,14 +366,14 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [P440_LWB]>],
[4, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
InstrStage<1, [P440_CRD]>,
InstrStage<2, [P440_LWB]>],
[2, 1, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
InstrStage<1, [P440_CRD]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index 54cfae5d74b7..f34c1accc0fd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -81,8 +81,6 @@ def PPCA2Itineraries : ProcessorItineraries<
[6, 0, 0]>,
InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>],
[0, 0, 0]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>],
- [2, 0, 0, 0]>,
InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>],
[16, 0, 0]>,
InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>],
@@ -105,9 +103,9 @@ def PPCA2Itineraries : ProcessorItineraries<
[82, 0, 0]>, // L2 latency
InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>],
[0, 0, 0]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>],
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [A2_XU]>],
[2, 0, 0, 0]>,
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>],
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [A2_XU]>],
[2, 0, 0, 0]>,
InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>],
[82, 0, 0]>, // L2 latency
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
index d7c2bd15a258..479a970b2537 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
@@ -144,7 +144,13 @@ def PPCE500Itineraries : ProcessorItineraries<
InstrStage<1, [E500_LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, E500_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SU0, E500_SU1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
InstrStage<1, [E500_SU0, E500_SU1], 0>,
InstrStage<1, [E500_LSU_0]>],
[6, 1], // Latency = 3
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index 5f95f2a79f66..d8bda073833f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -157,7 +157,13 @@ def PPCE500mcItineraries : ProcessorItineraries<
InstrStage<1, [E500mc_LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, E500mc_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
+ InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>,
+ InstrStage<1, [E500mc_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500mc_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>,
InstrStage<1, [E500mc_LSU_0]>],
[6, 1], // Latency = 3
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 32f8e652dd56..3e50803955c4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -206,12 +206,6 @@ def PPCE5500Itineraries : ProcessorItineraries<
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
- InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
- InstrStage<1, [E5500_LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, E5500_GPR_Bypass],
- 2>, // 2 micro-ops
InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
@@ -281,13 +275,13 @@ def PPCE5500Itineraries : ProcessorItineraries<
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass],
2>, // 2 micro-ops
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
index 21efd8f8f6c9..0995b7200d93 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -43,7 +43,8 @@ def G3Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
index 340773ef7876..1b15c7b3c7ad 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -48,7 +48,8 @@ def G4Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 1d9f13fcb850..0044c3c6a449 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -56,7 +56,6 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>,
@@ -73,8 +72,8 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
index b5a9f96d45ae..c802b80170fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -54,7 +54,6 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>,
@@ -76,8 +75,8 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work
InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index a8678f56900e..1d6e509819da 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -114,6 +114,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[4, 1, 1]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
@@ -126,6 +130,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[1, 1, 1]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
@@ -253,13 +261,13 @@ def P7Itineraries : ProcessorItineraries<
InstrStage<1, [P7_LS1, P7_LS2], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[1, 1, 1]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<1, [P7_DU1], 0>,
InstrStage<1, [P7_DU2], 0>,
InstrStage<1, [P7_LS1, P7_LS2], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[2, 1, 1, 1]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P7_DU1], 0>,
InstrStage<1, [P7_DU2], 0>,
InstrStage<1, [P7_DU3], 0>,
InstrStage<1, [P7_DU4], 0>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 79963dd6a3e9..ff39dfda7016 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -90,6 +90,10 @@ def P8Itineraries : ProcessorItineraries<
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[4, 1, 1]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
@@ -102,6 +106,10 @@ def P8Itineraries : ProcessorItineraries<
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[1, 1, 1]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
@@ -259,14 +267,14 @@ def P8Itineraries : ProcessorItineraries<
InstrStage<1, [P8_LU1, P8_LU2,
P8_LSU1, P8_LSU2]>]
[1, 1, 1]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<1, [P8_DU1], 0>,
InstrStage<1, [P8_DU2], 0>,
InstrStage<1, [P8_LU1, P8_LU2,
P8_LSU1, P8_LSU2], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[2, 1, 1, 1]>,
// First+last
- InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P8_DU1], 0>,
InstrStage<1, [P8_DU2], 0>,
InstrStage<1, [P8_DU3], 0>,
InstrStage<1, [P8_DU4], 0>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index e1a480117315..a1e625c855e0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -33,6 +33,12 @@ def P9Model : SchedMachineModel {
// A dispatch group is 6 instructions.
let LoopMicroOpBufferSize = 60;
+ // As iops are dispatched to a slice, they are held in an independent slice
+ // issue queue until all register sources and other dependencies have been
+ // resolved and they can be issued. Each of four execution slices has an
+ // 11-entry iop issue queue.
+ let MicroOpBufferSize = 44;
+
let CompleteModel = 1;
// Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index a8d7955ef548..580d057602f5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -181,6 +181,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
const TargetOptions &Options) {
+ if (TT.isOSDarwin())
+ report_fatal_error("Darwin is no longer supported for PowerPC");
+
if (Options.MCOptions.getABIName().startswith("elfv1"))
return PPCTargetMachine::PPC_ABI_ELFv1;
else if (Options.MCOptions.getABIName().startswith("elfv2"))
@@ -211,19 +214,24 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
if (TT.isOSDarwin())
return Reloc::DynamicNoPIC;
- // Non-darwin 64-bit platforms are PIC by default.
- if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)
+ // Big Endian PPC is PIC by default.
+ if (TT.getArch() == Triple::ppc64)
return Reloc::PIC_;
- // 32-bit is static by default.
+ // Rest are static by default.
return Reloc::Static;
}
-static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
- Optional<CodeModel::Model> CM,
- bool JIT) {
- if (CM)
+static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
+ Optional<CodeModel::Model> CM,
+ bool JIT) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
+ if (*CM == CodeModel::Kernel)
+ report_fatal_error("Target does not support the kernel CodeModel");
return *CM;
+ }
if (!TT.isOSDarwin() && !JIT &&
(TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
return CodeModel::Medium;
@@ -243,7 +251,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(TT, CM, JIT), OL),
+ getEffectivePPCCodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())),
TargetABI(computeTargetABI(TT, Options)) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index b0da9b5a6d70..bc9bcab83a0a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -473,7 +473,14 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
+
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 2ee2b3eb8084..9221a910288a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -90,7 +90,9 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
/// @}
};
diff --git a/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 9a455c105482..1d1112cc5124 100644
--- a/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -7,12 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVAsmBackend.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "MCTargetDesc/RISCVTargetStreamer.h"
+#include "Utils/RISCVBaseInfo.h"
+#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -39,6 +43,8 @@ namespace {
struct RISCVOperand;
class RISCVAsmParser : public MCTargetAsmParser {
+ SmallVector<FeatureBitset, 4> FeatureBitStack;
+
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
@@ -73,6 +79,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
// synthesize the desired immedate value into the destination register.
void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out);
+ // Helper to emit pseudo instruction "lla" used in PC-rel addressing.
+ void emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
/// Helper for processing MC instructions that have been successfully matched
/// by MatchAndEmitInstruction. Modifications to the emitted instructions,
/// like the expansion of pseudo instructions (e.g., "li"), can be performed
@@ -83,13 +92,16 @@ class RISCVAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "RISCVGenAsmMatcher.inc"
+ OperandMatchResultTy parseCSRSystemRegister(OperandVector &Operands);
OperandMatchResultTy parseImmediate(OperandVector &Operands);
OperandMatchResultTy parseRegister(OperandVector &Operands,
bool AllowParens = false);
OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
+ OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
+ OperandMatchResultTy parseJALOffset(OperandVector &Operands);
- bool parseOperand(OperandVector &Operands, bool ForceImmediate);
+ bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
bool parseDirectiveOption();
@@ -108,6 +120,21 @@ class RISCVAsmParser : public MCTargetAsmParser {
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
}
+
+ void pushFeatureBits() {
+ FeatureBitStack.push_back(getSTI().getFeatureBits());
+ }
+
+ bool popFeatureBits() {
+ if (FeatureBitStack.empty())
+ return true;
+
+ FeatureBitset FeatureBits = FeatureBitStack.pop_back_val();
+ copySTI().setFeatureBits(FeatureBits);
+ setAvailableFeatures(ComputeAvailableFeatures(FeatureBits));
+
+ return false;
+ }
public:
enum RISCVMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
@@ -139,6 +166,7 @@ struct RISCVOperand : public MCParsedAsmOperand {
Token,
Register,
Immediate,
+ SystemRegister
} Kind;
bool IsRV64;
@@ -151,11 +179,20 @@ struct RISCVOperand : public MCParsedAsmOperand {
const MCExpr *Val;
};
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ unsigned Encoding;
+ // FIXME: Add the Encoding parsed fields as needed for checks,
+ // e.g.: read/write or user/supervisor/machine privileges.
+ };
+
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
RegOp Reg;
ImmOp Imm;
+ struct SysRegOp SysReg;
};
RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -176,6 +213,9 @@ public:
case Token:
Tok = o.Tok;
break;
+ case SystemRegister:
+ SysReg = o.SysReg;
+ break;
}
}
@@ -183,19 +223,22 @@ public:
bool isReg() const override { return Kind == Register; }
bool isImm() const override { return Kind == Immediate; }
bool isMem() const override { return false; }
+ bool isSystemRegister() const { return Kind == SystemRegister; }
- bool evaluateConstantImm(int64_t &Imm, RISCVMCExpr::VariantKind &VK) const {
- const MCExpr *Val = getImm();
- bool Ret = false;
- if (auto *RE = dyn_cast<RISCVMCExpr>(Val)) {
- Ret = RE->evaluateAsConstant(Imm);
+ static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
+ RISCVMCExpr::VariantKind &VK) {
+ if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
VK = RE->getKind();
- } else if (auto CE = dyn_cast<MCConstantExpr>(Val)) {
- Ret = true;
+ return RE->evaluateAsConstant(Imm);
+ }
+
+ if (auto CE = dyn_cast<MCConstantExpr>(Expr)) {
VK = RISCVMCExpr::VK_RISCV_None;
Imm = CE->getValue();
+ return true;
}
- return Ret;
+
+ return false;
}
// True if operand is a symbol with no modifiers, or a constant with no
@@ -205,7 +248,7 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
bool IsValid;
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
@@ -220,12 +263,14 @@ public:
int64_t Imm;
RISCVMCExpr::VariantKind VK;
// Must be of 'immediate' type but not a constant.
- if (!isImm() || evaluateConstantImm(Imm, VK))
+ if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isCSRSystemRegister() const { return isSystemRegister(); }
+
/// Return true if the operand is a valid for the fence instruction e.g.
/// ('iorw').
bool isFenceArg() const {
@@ -265,12 +310,14 @@ public:
return RISCVFPRndMode::stringToRoundingMode(Str) != RISCVFPRndMode::Invalid;
}
- bool isImmXLen() const {
+ bool isImmXLenLI() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ if (VK == RISCVMCExpr::VK_RISCV_LO || VK == RISCVMCExpr::VK_RISCV_PCREL_LO)
+ return true;
// Given only Imm, ensuring that the actually specified constant is either
// a signed or unsigned 64-bit number is unfortunately impossible.
bool IsInRange = isRV64() ? true : isInt<32>(Imm) || isUInt<32>(Imm);
@@ -282,7 +329,8 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- if (!evaluateConstantImm(Imm, VK) || VK != RISCVMCExpr::VK_RISCV_None)
+ if (!evaluateConstantImm(getImm(), Imm, VK) ||
+ VK != RISCVMCExpr::VK_RISCV_None)
return false;
return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm);
}
@@ -292,7 +340,8 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- if (!evaluateConstantImm(Imm, VK) || VK != RISCVMCExpr::VK_RISCV_None)
+ if (!evaluateConstantImm(getImm(), Imm, VK) ||
+ VK != RISCVMCExpr::VK_RISCV_None)
return false;
if (Imm == 0)
return false;
@@ -304,7 +353,7 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -313,66 +362,68 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUInt<5>(Imm) && (Imm != 0) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6() const {
+ if (!isImm())
+ return false;
RISCVMCExpr::VariantKind VK;
int64_t Imm;
- bool IsValid;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
- if (!IsConstantImm)
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
- else
- IsValid = isInt<6>(Imm);
- return IsValid &&
- (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isInt<6>(Imm) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6NonZero() const {
+ if (!isImm())
+ return false;
RISCVMCExpr::VariantKind VK;
int64_t Imm;
- bool IsValid;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
- if (!IsConstantImm)
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
- else
- IsValid = ((Imm != 0) && isInt<6>(Imm));
- return IsValid &&
- (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isInt<6>(Imm) && (Imm != 0) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isCLUIImm() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm != 0) &&
(isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff)) &&
- VK == RISCVMCExpr::VK_RISCV_None;
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm7Lsb00() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<5, 2>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm8Lsb00() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<6, 2>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm8Lsb000() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<5, 3>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -380,17 +431,21 @@ public:
bool isSImm9Lsb0() const { return isBareSimmNLsb0<9>(); }
bool isUImm9Lsb000() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<6, 3>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm10Lsb00NonZero() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<8, 2>(Imm) && (Imm != 0) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -401,54 +456,63 @@ public:
bool IsValid;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
else
IsValid = isInt<12>(Imm);
- return IsValid && (VK == RISCVMCExpr::VK_RISCV_None ||
+ return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) ||
VK == RISCVMCExpr::VK_RISCV_LO ||
VK == RISCVMCExpr::VK_RISCV_PCREL_LO);
}
bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); }
- bool isUImm12() const {
- int64_t Imm;
- RISCVMCExpr::VariantKind VK;
- if (!isImm())
- return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
- return IsConstantImm && isUInt<12>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
- }
-
bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); }
bool isSImm10Lsb0000NonZero() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm != 0) && isShiftedInt<6, 4>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
- bool isUImm20() const {
+ bool isUImm20LUI() const {
RISCVMCExpr::VariantKind VK;
int64_t Imm;
bool IsValid;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
- if (!IsConstantImm)
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ if (!IsConstantImm) {
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
- else
- IsValid = isUInt<20>(Imm);
- return IsValid && (VK == RISCVMCExpr::VK_RISCV_None ||
- VK == RISCVMCExpr::VK_RISCV_HI ||
- VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
+ return IsValid && VK == RISCVMCExpr::VK_RISCV_HI;
+ } else {
+ return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
+ VK == RISCVMCExpr::VK_RISCV_HI);
+ }
+ }
+
+ bool isUImm20AUIPC() const {
+ RISCVMCExpr::VariantKind VK;
+ int64_t Imm;
+ bool IsValid;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ if (!IsConstantImm) {
+ IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
+ return IsValid && VK == RISCVMCExpr::VK_RISCV_PCREL_HI;
+ } else {
+ return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
+ VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
+ }
}
- bool isSImm21Lsb0() const { return isBareSimmNLsb0<21>(); }
+ bool isSImm21Lsb0JAL() const { return isBareSimmNLsb0<21>(); }
/// getStartLoc - Gets location of the first token of this operand
SMLoc getStartLoc() const override { return StartLoc; }
@@ -462,6 +526,11 @@ public:
return Reg.RegNum;
}
+ StringRef getSysReg() const {
+ assert(Kind == SystemRegister && "Invalid access!");
+ return StringRef(SysReg.Data, SysReg.Length);
+ }
+
const MCExpr *getImm() const {
assert(Kind == Immediate && "Invalid type access!");
return Imm.Val;
@@ -484,6 +553,9 @@ public:
case Token:
OS << "'" << getToken() << "'";
break;
+ case SystemRegister:
+ OS << "<sysreg: " << getSysReg() << '>';
+ break;
}
}
@@ -517,16 +589,22 @@ public:
return Op;
}
+ static std::unique_ptr<RISCVOperand>
+ createSysReg(StringRef Str, SMLoc S, unsigned Encoding, bool IsRV64) {
+ auto Op = make_unique<RISCVOperand>(SystemRegister);
+ Op->SysReg.Data = Str.data();
+ Op->SysReg.Length = Str.size();
+ Op->SysReg.Encoding = Encoding;
+ Op->StartLoc = S;
+ Op->IsRV64 = IsRV64;
+ return Op;
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
assert(Expr && "Expr shouldn't be null!");
int64_t Imm = 0;
- bool IsConstant = false;
- if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
- IsConstant = RE->evaluateAsConstant(Imm);
- } else if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
- IsConstant = true;
- Imm = CE->getValue();
- }
+ RISCVMCExpr::VariantKind VK;
+ bool IsConstant = evaluateConstantImm(Expr, Imm, VK);
if (IsConstant)
Inst.addOperand(MCOperand::createImm(Imm));
@@ -553,16 +631,22 @@ public:
unsigned Imm = 0;
for (char c : SE->getSymbol().getName()) {
switch (c) {
- default: llvm_unreachable("FenceArg must contain only [iorw]");
- case 'i': Imm |= RISCVFenceField::I; break;
- case 'o': Imm |= RISCVFenceField::O; break;
- case 'r': Imm |= RISCVFenceField::R; break;
- case 'w': Imm |= RISCVFenceField::W; break;
+ default:
+ llvm_unreachable("FenceArg must contain only [iorw]");
+ case 'i': Imm |= RISCVFenceField::I; break;
+ case 'o': Imm |= RISCVFenceField::O; break;
+ case 'r': Imm |= RISCVFenceField::R; break;
+ case 'w': Imm |= RISCVFenceField::W; break;
}
}
Inst.addOperand(MCOperand::createImm(Imm));
}
+ void addCSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(SysReg.Encoding));
+ }
+
// Returns the rounding mode represented by this RISCVOperand. Should only
// be called after checking isFRMArg.
RISCVFPRndMode::RoundingMode getRoundingMode() const {
@@ -590,40 +674,40 @@ public:
// information from TableGen.
unsigned convertFPR32ToFPR64(unsigned Reg) {
switch (Reg) {
- default:
- llvm_unreachable("Not a recognised FPR32 register");
- case RISCV::F0_32: return RISCV::F0_64;
- case RISCV::F1_32: return RISCV::F1_64;
- case RISCV::F2_32: return RISCV::F2_64;
- case RISCV::F3_32: return RISCV::F3_64;
- case RISCV::F4_32: return RISCV::F4_64;
- case RISCV::F5_32: return RISCV::F5_64;
- case RISCV::F6_32: return RISCV::F6_64;
- case RISCV::F7_32: return RISCV::F7_64;
- case RISCV::F8_32: return RISCV::F8_64;
- case RISCV::F9_32: return RISCV::F9_64;
- case RISCV::F10_32: return RISCV::F10_64;
- case RISCV::F11_32: return RISCV::F11_64;
- case RISCV::F12_32: return RISCV::F12_64;
- case RISCV::F13_32: return RISCV::F13_64;
- case RISCV::F14_32: return RISCV::F14_64;
- case RISCV::F15_32: return RISCV::F15_64;
- case RISCV::F16_32: return RISCV::F16_64;
- case RISCV::F17_32: return RISCV::F17_64;
- case RISCV::F18_32: return RISCV::F18_64;
- case RISCV::F19_32: return RISCV::F19_64;
- case RISCV::F20_32: return RISCV::F20_64;
- case RISCV::F21_32: return RISCV::F21_64;
- case RISCV::F22_32: return RISCV::F22_64;
- case RISCV::F23_32: return RISCV::F23_64;
- case RISCV::F24_32: return RISCV::F24_64;
- case RISCV::F25_32: return RISCV::F25_64;
- case RISCV::F26_32: return RISCV::F26_64;
- case RISCV::F27_32: return RISCV::F27_64;
- case RISCV::F28_32: return RISCV::F28_64;
- case RISCV::F29_32: return RISCV::F29_64;
- case RISCV::F30_32: return RISCV::F30_64;
- case RISCV::F31_32: return RISCV::F31_64;
+ default:
+ llvm_unreachable("Not a recognised FPR32 register");
+ case RISCV::F0_32: return RISCV::F0_64;
+ case RISCV::F1_32: return RISCV::F1_64;
+ case RISCV::F2_32: return RISCV::F2_64;
+ case RISCV::F3_32: return RISCV::F3_64;
+ case RISCV::F4_32: return RISCV::F4_64;
+ case RISCV::F5_32: return RISCV::F5_64;
+ case RISCV::F6_32: return RISCV::F6_64;
+ case RISCV::F7_32: return RISCV::F7_64;
+ case RISCV::F8_32: return RISCV::F8_64;
+ case RISCV::F9_32: return RISCV::F9_64;
+ case RISCV::F10_32: return RISCV::F10_64;
+ case RISCV::F11_32: return RISCV::F11_64;
+ case RISCV::F12_32: return RISCV::F12_64;
+ case RISCV::F13_32: return RISCV::F13_64;
+ case RISCV::F14_32: return RISCV::F14_64;
+ case RISCV::F15_32: return RISCV::F15_64;
+ case RISCV::F16_32: return RISCV::F16_64;
+ case RISCV::F17_32: return RISCV::F17_64;
+ case RISCV::F18_32: return RISCV::F18_64;
+ case RISCV::F19_32: return RISCV::F19_64;
+ case RISCV::F20_32: return RISCV::F20_64;
+ case RISCV::F21_32: return RISCV::F21_64;
+ case RISCV::F22_32: return RISCV::F22_64;
+ case RISCV::F23_32: return RISCV::F23_64;
+ case RISCV::F24_32: return RISCV::F24_64;
+ case RISCV::F25_32: return RISCV::F25_64;
+ case RISCV::F26_32: return RISCV::F26_64;
+ case RISCV::F27_32: return RISCV::F27_64;
+ case RISCV::F28_32: return RISCV::F28_64;
+ case RISCV::F29_32: return RISCV::F29_64;
+ case RISCV::F30_32: return RISCV::F30_64;
+ case RISCV::F31_32: return RISCV::F31_64;
}
}
@@ -663,7 +747,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
bool MatchingInlineAsm) {
MCInst Inst;
- switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+ auto Result =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+ switch (Result) {
default:
break;
case Match_Success:
@@ -684,7 +770,21 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
return Error(ErrorLoc, "invalid operand for instruction");
}
- case Match_InvalidImmXLen:
+ }
+
+ // Handle the case when the error message is of specific type
+ // other than the generic Match_InvalidOperand, and the
+ // corresponding operand is missing.
+ if (Result > FIRST_TARGET_MATCH_RESULT_TY) {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+ return Error(ErrorLoc, "too few operands for instruction");
+ }
+
+ switch(Result) {
+ default:
+ break;
+ case Match_InvalidImmXLenLI:
if (isRV64()) {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a constant 64-bit integer");
@@ -706,8 +806,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
(1 << 5) - 1);
case Match_InvalidSImm6NonZero:
- return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
- (1 << 5) - 1,
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, -(1 << 5), (1 << 5) - 1,
"immediate must be non-zero in the range");
case Match_InvalidCLUIImm:
return generateImmOutOfRangeError(
@@ -742,24 +842,36 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Operands, ErrorInfo, -(1 << 9), (1 << 9) - 16,
"immediate must be a multiple of 16 bytes and non-zero in the range");
case Match_InvalidSImm12:
- return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 11),
- (1 << 11) - 1);
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, -(1 << 11), (1 << 11) - 1,
+ "operand must be a symbol with %lo/%pcrel_lo modifier or an integer in "
+ "the range");
case Match_InvalidSImm12Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2,
"immediate must be a multiple of 2 bytes in the range");
- case Match_InvalidUImm12:
- return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1);
case Match_InvalidSImm13Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 12), (1 << 12) - 2,
"immediate must be a multiple of 2 bytes in the range");
- case Match_InvalidUImm20:
- return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1);
- case Match_InvalidSImm21Lsb0:
+ case Match_InvalidUImm20LUI:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1,
+ "operand must be a symbol with %hi() "
+ "modifier or an integer in the range");
+ case Match_InvalidUImm20AUIPC:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 20) - 1,
+ "operand must be a symbol with %pcrel_hi() modifier or an integer in "
+ "the range");
+ case Match_InvalidSImm21Lsb0JAL:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 20), (1 << 20) - 2,
"immediate must be a multiple of 2 bytes in the range");
+ case Match_InvalidCSRSystemRegister: {
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1,
+ "operand must be a valid system register "
+ "name or an integer in the range");
+ }
case Match_InvalidFenceArg: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(
@@ -842,9 +954,9 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
return MatchOperand_Success;
}
-OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
+OperandMatchResultTy
+RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
const MCExpr *Res;
switch (getLexer().getKind()) {
@@ -854,18 +966,77 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Integer:
- case AsmToken::String:
+ case AsmToken::String: {
if (getParser().parseExpression(Res))
return MatchOperand_ParseFail;
- break;
+
+ auto *CE = dyn_cast<MCConstantExpr>(Res);
+ if (CE) {
+ int64_t Imm = CE->getValue();
+ if (isUInt<12>(Imm)) {
+ auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
+ // Accept an immediate representing a named or un-named Sys Reg
+ // if the range is valid, regardless of the required features.
+ Operands.push_back(RISCVOperand::createSysReg(
+ SysReg ? SysReg->Name : "", S, Imm, isRV64()));
+ return MatchOperand_Success;
+ }
+ }
+
+ Twine Msg = "immediate must be an integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
case AsmToken::Identifier: {
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
- MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
- Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- break;
+
+ auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier);
+ // Accept a named Sys Reg if the required features are present.
+ if (SysReg) {
+ if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) {
+ Error(S, "system register use requires an option to be enabled");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(RISCVOperand::createSysReg(
+ Identifier, S, SysReg->Encoding, isRV64()));
+ return MatchOperand_Success;
+ }
+
+ Twine Msg = "operand must be a valid system register name "
+ "or an integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
+ case AsmToken::Percent: {
+ // Discard operand with modifier.
+ Twine Msg = "immediate must be an integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
}
+
+ return MatchOperand_NoMatch;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
+
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_NoMatch;
+ case AsmToken::LParen:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ case AsmToken::Integer:
+ case AsmToken::String:
+ case AsmToken::Identifier:
+ if (getParser().parseExpression(Res))
+ return MatchOperand_ParseFail;
+ break;
case AsmToken::Percent:
return parseOperandWithModifier(Operands);
}
@@ -914,6 +1085,41 @@ RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
+
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return MatchOperand_NoMatch;
+
+ StringRef Identifier;
+ if (getParser().parseIdentifier(Identifier))
+ return MatchOperand_ParseFail;
+
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
+ // Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo`
+ // both being acceptable forms. When parsing `jal ra, foo` this function
+ // will be called for the `ra` register operand in an attempt to match the
+ // single-operand alias. parseJALOffset must fail for this case. It would
+ // seem logical to try parse the operand using parseImmediate and return
+ // NoMatch if the next token is a comma (meaning we must be parsing a jal in
+ // the second form rather than the first). We can't do this as there's no
+ // way of rewinding the lexer state. Instead, return NoMatch if this operand
+ // is an identifier and is followed by a comma.
+ if (getLexer().is(AsmToken::Identifier) &&
+ getLexer().peekTok().is(AsmToken::Comma))
+ return MatchOperand_NoMatch;
+
+ return parseImmediate(Operands);
+}
+
OperandMatchResultTy
RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::LParen)) {
@@ -942,13 +1148,19 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
/// Looks at a token type and creates the relevant operand from this
/// information, adding to Operands. If operand was parsed, returns false, else
-/// true. If ForceImmediate is true, no attempt will be made to parse the
-/// operand as a register, which is needed for pseudoinstructions such as
-/// call.
-bool RISCVAsmParser::parseOperand(OperandVector &Operands,
- bool ForceImmediate) {
- // Attempt to parse token as register, unless ForceImmediate.
- if (!ForceImmediate && parseRegister(Operands, true) == MatchOperand_Success)
+/// true.
+bool RISCVAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy Result =
+ MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true);
+ if (Result == MatchOperand_Success)
+ return false;
+ if (Result == MatchOperand_ParseFail)
+ return true;
+
+ // Attempt to parse token as a register.
+ if (parseRegister(Operands, true) == MatchOperand_Success)
return false;
// Attempt to parse token as an immediate
@@ -967,6 +1179,21 @@ bool RISCVAsmParser::parseOperand(OperandVector &Operands,
bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
+ // Ensure that if the instruction occurs when relaxation is enabled,
+ // relocations are forced for the file. Ideally this would be done when there
+ // is enough information to reliably determine if the instruction itself may
+ // cause relaxations. Unfortunately instruction processing stage occurs in the
+ // same pass as relocation emission, so it's too late to set a 'sticky bit'
+ // for the entire file.
+ if (getSTI().getFeatureBits()[RISCV::FeatureRelax]) {
+ auto *Assembler = getTargetStreamer().getStreamer().getAssemblerPtr();
+ if (Assembler != nullptr) {
+ RISCVAsmBackend &MAB =
+ static_cast<RISCVAsmBackend &>(Assembler->getBackend());
+ MAB.setForceRelocs();
+ }
+ }
+
// First operand is token for instruction
Operands.push_back(RISCVOperand::createToken(Name, NameLoc, isRV64()));
@@ -975,18 +1202,20 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
// Parse first operand
- bool ForceImmediate = (Name == "call" || Name == "tail");
- if (parseOperand(Operands, ForceImmediate))
+ if (parseOperand(Operands, Name))
return true;
// Parse until end of statement, consuming commas between operands
+ unsigned OperandIdx = 1;
while (getLexer().is(AsmToken::Comma)) {
// Consume comma token
getLexer().Lex();
// Parse next operand
- if (parseOperand(Operands, false))
+ if (parseOperand(Operands, Name))
return true;
+
+ ++OperandIdx;
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -1068,6 +1297,33 @@ bool RISCVAsmParser::parseDirectiveOption() {
StringRef Option = Tok.getIdentifier();
+ if (Option == "push") {
+ getTargetStreamer().emitDirectiveOptionPush();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ pushFeatureBits();
+ return false;
+ }
+
+ if (Option == "pop") {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ getTargetStreamer().emitDirectiveOptionPop();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ if (popFeatureBits())
+ return Error(StartLoc, ".option pop with no .option push");
+
+ return false;
+ }
+
if (Option == "rvc") {
getTargetStreamer().emitDirectiveOptionRVC();
@@ -1092,9 +1348,34 @@ bool RISCVAsmParser::parseDirectiveOption() {
return false;
}
+ if (Option == "relax") {
+ getTargetStreamer().emitDirectiveOptionRelax();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ setFeatureBits(RISCV::FeatureRelax, "relax");
+ return false;
+ }
+
+ if (Option == "norelax") {
+ getTargetStreamer().emitDirectiveOptionNoRelax();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ clearFeatureBits(RISCV::FeatureRelax, "relax");
+ return false;
+ }
+
// Unknown option.
Warning(Parser.getTok().getLoc(),
- "unknown option, expected 'rvc' or 'norvc'");
+ "unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'relax' or "
+ "'norelax'");
Parser.eatToEndOfStatement();
return false;
}
@@ -1108,80 +1389,54 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value,
MCStreamer &Out) {
- if (isInt<32>(Value)) {
- // Emits the MC instructions for loading a 32-bit constant into a register.
- //
- // Depending on the active bits in the immediate Value v, the following
- // instruction sequences are emitted:
- //
- // v == 0 : ADDI(W)
- // v[0,12) != 0 && v[12,32) == 0 : ADDI(W)
- // v[0,12) == 0 && v[12,32) != 0 : LUI
- // v[0,32) != 0 : LUI+ADDI(W)
- //
- int64_t Hi20 = ((Value + 0x800) >> 12) & 0xFFFFF;
- int64_t Lo12 = SignExtend64<12>(Value);
- unsigned SrcReg = RISCV::X0;
-
- if (Hi20) {
- emitToStreamer(Out,
- MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Hi20));
- SrcReg = DestReg;
+ RISCVMatInt::InstSeq Seq;
+ RISCVMatInt::generateInstSeq(Value, isRV64(), Seq);
+
+ unsigned SrcReg = RISCV::X0;
+ for (RISCVMatInt::Inst &Inst : Seq) {
+ if (Inst.Opc == RISCV::LUI) {
+ emitToStreamer(
+ Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm));
+ } else {
+ emitToStreamer(
+ Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
+ Inst.Imm));
}
- if (Lo12 || Hi20 == 0) {
- unsigned AddiOpcode =
- STI->hasFeature(RISCV::Feature64Bit) ? RISCV::ADDIW : RISCV::ADDI;
- emitToStreamer(Out, MCInstBuilder(AddiOpcode)
- .addReg(DestReg)
- .addReg(SrcReg)
- .addImm(Lo12));
- }
- return;
- }
- assert(STI->hasFeature(RISCV::Feature64Bit) &&
- "Target must be 64-bit to support a >32-bit constant");
-
- // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
- // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
- // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
- // while the following ADDI instructions contribute up to 12 bits each.
- //
- // On the first glance, implementing this seems to be possible by simply
- // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
- // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
- // fact that ADDI performs a sign extended addition, doing it like that would
- // only be possible when at most 11 bits of the ADDI instructions are used.
- // Using all 12 bits of the ADDI instructions, like done by GAS, actually
- // requires that the constant is processed starting with the least significant
- // bit.
- //
- // In the following, constants are processed from LSB to MSB but instruction
- // emission is performed from MSB to LSB by recursively calling
- // emitLoadImm. In each recursion, first the lowest 12 bits are removed
- // from the constant and the optimal shift amount, which can be greater than
- // 12 bits if the constant is sparse, is determined. Then, the shifted
- // remaining constant is processed recursively and gets emitted as soon as it
- // fits into 32 bits. The emission of the shifts and additions is subsequently
- // performed when the recursion returns.
- //
- int64_t Lo12 = SignExtend64<12>(Value);
- int64_t Hi52 = (Value + 0x800) >> 12;
- int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
- Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
-
- emitLoadImm(DestReg, Hi52, Out);
-
- emitToStreamer(Out, MCInstBuilder(RISCV::SLLI)
- .addReg(DestReg)
- .addReg(DestReg)
- .addImm(ShiftAmount));
-
- if (Lo12)
- emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
- .addReg(DestReg)
- .addReg(DestReg)
- .addImm(Lo12));
+ // Only the first instruction has X0 as its source.
+ SrcReg = DestReg;
+ }
+}
+
+void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // The local load address pseudo-instruction "lla" is used in PC-relative
+ // addressing of symbols:
+ // lla rdest, symbol
+ // expands to
+ // TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
+ // ADDI rdest, %pcrel_lo(TmpLabel)
+ MCContext &Ctx = getContext();
+
+ MCSymbol *TmpLabel = Ctx.createTempSymbol(
+ "pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false);
+ Out.EmitLabel(TmpLabel);
+
+ MCOperand DestReg = Inst.getOperand(0);
+ const RISCVMCExpr *Symbol = RISCVMCExpr::create(
+ Inst.getOperand(1).getExpr(), RISCVMCExpr::VK_RISCV_PCREL_HI, Ctx);
+
+ emitToStreamer(
+ Out, MCInstBuilder(RISCV::AUIPC).addOperand(DestReg).addExpr(Symbol));
+
+ const MCExpr *RefToLinkTmpLabel =
+ RISCVMCExpr::create(MCSymbolRefExpr::create(TmpLabel, Ctx),
+ RISCVMCExpr::VK_RISCV_PCREL_LO, Ctx);
+
+ emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
+ .addOperand(DestReg)
+ .addOperand(DestReg)
+ .addExpr(RefToLinkTmpLabel));
}
bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
@@ -1189,7 +1444,17 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Inst.setLoc(IDLoc);
if (Inst.getOpcode() == RISCV::PseudoLI) {
- auto Reg = Inst.getOperand(0).getReg();
+ unsigned Reg = Inst.getOperand(0).getReg();
+ const MCOperand &Op1 = Inst.getOperand(1);
+ if (Op1.isExpr()) {
+ // We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar.
+ // Just convert to an addi. This allows compatibility with gas.
+ emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
+ .addReg(Reg)
+ .addReg(RISCV::X0)
+ .addExpr(Op1.getExpr()));
+ return false;
+ }
int64_t Imm = Inst.getOperand(1).getImm();
// On RV32 the immediate here can either be a signed or an unsigned
// 32-bit number. Sign extension has to be performed to ensure that Imm
@@ -1198,6 +1463,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Imm = SignExtend64<32>(Imm);
emitLoadImm(Reg, Imm, Out);
return false;
+ } else if (Inst.getOpcode() == RISCV::PseudoLLA) {
+ emitLoadLocalAddress(Inst, IDLoc, Out);
+ return false;
}
emitToStreamer(Out, Inst);
diff --git a/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 7bbb371a757f..eafa09d56315 100644
--- a/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -211,6 +212,15 @@ static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
}
template <unsigned N>
+static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ if (Imm == 0)
+ return MCDisassembler::Fail;
+ return decodeUImmOperand<N>(Inst, Imm, Address, Decoder);
+}
+
+template <unsigned N>
static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
@@ -221,6 +231,15 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
}
template <unsigned N>
+static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ if (Imm == 0)
+ return MCDisassembler::Fail;
+ return decodeSImmOperand<N>(Inst, Imm, Address, Decoder);
+}
+
+template <unsigned N>
static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm,
int64_t Address,
const void *Decoder) {
@@ -243,6 +262,17 @@ static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ assert(isUInt<3>(Imm) && "Invalid immediate");
+ if (!llvm::RISCVFPRndMode::isValidRoundingMode(Imm))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
#include "RISCVGenDisassemblerTables.inc"
DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
@@ -257,11 +287,19 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// It's a 32 bit instruction if bit 0 and 1 are 1.
if ((Bytes[0] & 0x3) == 0x3) {
+ if (Bytes.size() < 4) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
Insn = support::endian::read32le(Bytes.data());
LLVM_DEBUG(dbgs() << "Trying RISCV32 table :\n");
Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
Size = 4;
} else {
+ if (Bytes.size() < 2) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
Insn = support::endian::read16le(Bytes.data());
if (!STI.getFeatureBits()[RISCV::Feature64Bit]) {
diff --git a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
index 300e6fd9750a..979c8f4e2fa7 100644
--- a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "RISCVInstPrinter.h"
-#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVMCExpr.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -36,10 +36,9 @@ using namespace llvm;
#include "RISCVGenCompressInstEmitter.inc"
static cl::opt<bool>
-NoAliases("riscv-no-aliases",
- cl::desc("Disable the emission of assembler pseudo instructions"),
- cl::init(false),
- cl::Hidden);
+ NoAliases("riscv-no-aliases",
+ cl::desc("Disable the emission of assembler pseudo instructions"),
+ cl::init(false), cl::Hidden);
void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot, const MCSubtargetInfo &STI) {
@@ -49,7 +48,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
if (!NoAliases)
Res = uncompressInst(UncompressedMI, *MI, MRI, STI);
if (Res)
- NewMI = const_cast<MCInst*>(&UncompressedMI);
+ NewMI = const_cast<MCInst *>(&UncompressedMI);
if (NoAliases || !printAliasInstr(NewMI, STI, O))
printInstruction(NewMI, STI, O);
printAnnotation(O, Annot);
@@ -60,8 +59,8 @@ void RISCVInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
}
void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O, const char *Modifier) {
+ const MCSubtargetInfo &STI, raw_ostream &O,
+ const char *Modifier) {
assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
const MCOperand &MO = MI->getOperand(OpNo);
@@ -79,10 +78,23 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
MO.getExpr()->print(O, &MAI);
}
+void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
+ if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
+ O << SysReg->Name;
+ else
+ O << Imm;
+}
+
void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned FenceArg = MI->getOperand(OpNo).getImm();
+ assert (((FenceArg >> 4) == 0) && "Invalid immediate in printFenceArg");
+
if ((FenceArg & RISCVFenceField::I) != 0)
O << 'i';
if ((FenceArg & RISCVFenceField::O) != 0)
@@ -91,11 +103,12 @@ void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
O << 'r';
if ((FenceArg & RISCVFenceField::W) != 0)
O << 'w';
+ if (FenceArg == 0)
+ O << "unknown";
}
void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
auto FRMArg =
static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
O << RISCVFPRndMode::roundingModeToString(FRMArg);
diff --git a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
index 241be8daf113..0f9bed184996 100644
--- a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
+++ b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
@@ -32,6 +32,8 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O, const char *Modifier = nullptr);
+ void printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printFenceArg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -48,6 +50,6 @@ public:
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = RISCV::ABIRegAltName);
};
-}
+} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 9ba7ebd0eb0f..7672fea5d95b 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -7,114 +7,58 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/RISCVFixupKinds.h"
-#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCVAsmBackend.h"
+#include "RISCVMCExpr.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
-class RISCVAsmBackend : public MCAsmBackend {
- const MCSubtargetInfo &STI;
- uint8_t OSABI;
- bool Is64Bit;
+// If linker relaxation is enabled, or the relax option had previously been
+// enabled, always emit relocations even if the fixup can be resolved. This is
+// necessary for correctness as offsets may change during relaxation.
+bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
+ bool ShouldForce = false;
-public:
- RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
- Is64Bit(Is64Bit) {}
- ~RISCVAsmBackend() override {}
-
- // Generate diff expression relocations if the relax feature is enabled,
- // otherwise it is safe for the assembler to calculate these internally.
- bool requiresDiffExpressionRelocations() const override {
- return STI.getFeatureBits()[RISCV::FeatureRelax];
- }
- void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsResolved,
- const MCSubtargetInfo *STI) const override;
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override;
-
- // If linker relaxation is enabled, always emit relocations even if the fixup
- // can be resolved. This is necessary for correctness as offsets may change
- // during relaxation.
- bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override {
- return STI.getFeatureBits()[RISCV::FeatureRelax];
- }
-
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
- llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
- }
-
- bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
- uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout,
- const bool WasForced) const override;
-
- unsigned getNumFixupKinds() const override {
- return RISCV::NumTargetFixupKinds;
- }
-
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
- const static MCFixupKindInfo Infos[] = {
- // This table *must* be in the order that the fixup_* kinds are defined in
- // RISCVFixupKinds.h.
- //
- // name offset bits flags
- { "fixup_riscv_hi20", 12, 20, 0 },
- { "fixup_riscv_lo12_i", 20, 12, 0 },
- { "fixup_riscv_lo12_s", 0, 32, 0 },
- { "fixup_riscv_pcrel_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_pcrel_lo12_i", 20, 12, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_pcrel_lo12_s", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_jal", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_relax", 0, 0, 0 }
- };
- static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
- "Not all fixup kinds added to Infos array");
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ break;
+ case RISCV::fixup_riscv_pcrel_lo12_i:
+ case RISCV::fixup_riscv_pcrel_lo12_s:
+ // For pcrel_lo12, force a relocation if the target of the corresponding
+ // pcrel_hi20 is not in the same fragment.
+ const MCFixup *T = cast<RISCVMCExpr>(Fixup.getValue())->getPCRelHiFixup();
+ if (!T) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "could not find corresponding %pcrel_hi");
+ return false;
+ }
+
+ switch ((unsigned)T->getKind()) {
+ default:
+ llvm_unreachable("Unexpected fixup kind for pcrel_lo12");
+ break;
+ case RISCV::fixup_riscv_pcrel_hi20:
+ ShouldForce = T->getValue()->findAssociatedFragment() !=
+ Fixup.getValue()->findAssociatedFragment();
+ break;
+ }
+ break;
}
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override;
- unsigned getRelaxedOpcode(unsigned Op) const;
-
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
-
-
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
-};
-
+ return ShouldForce || STI.getFeatureBits()[RISCV::FeatureRelax] ||
+ ForceRelocs;
+}
bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
bool Resolved,
@@ -348,8 +292,6 @@ RISCVAsmBackend::createObjectTargetWriter() const {
return createRISCVELFObjectWriter(OSABI, Is64Bit);
}
-} // end anonymous namespace
-
MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
new file mode 100644
index 000000000000..b98e45f4053f
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -0,0 +1,113 @@
+//===-- RISCVAsmBackend.h - RISCV Assembler Backend -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H
+
+#include "MCTargetDesc/RISCVFixupKinds.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+class MCAssembler;
+class MCObjectTargetWriter;
+class raw_ostream;
+
+class RISCVAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo &STI;
+ uint8_t OSABI;
+ bool Is64Bit;
+ bool ForceRelocs = false;
+
+public:
+ RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
+ : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
+ Is64Bit(Is64Bit) {}
+ ~RISCVAsmBackend() override {}
+
+ void setForceRelocs() { ForceRelocs = true; }
+
+ // Generate diff expression relocations if the relax feature is enabled or had
+ // previously been enabled, otherwise it is safe for the assembler to
+ // calculate these internally.
+ bool requiresDiffExpressionRelocations() const override {
+ return STI.getFeatureBits()[RISCV::FeatureRelax] || ForceRelocs;
+ }
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override;
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override;
+
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
+ }
+
+ bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout,
+ const bool WasForced) const override;
+
+ unsigned getNumFixupKinds() const override {
+ return RISCV::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo Infos[] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // RISCVFixupKinds.h.
+ //
+ // name offset bits flags
+ { "fixup_riscv_hi20", 12, 20, 0 },
+ { "fixup_riscv_lo12_i", 20, 12, 0 },
+ { "fixup_riscv_lo12_s", 0, 32, 0 },
+ { "fixup_riscv_pcrel_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_pcrel_lo12_i", 20, 12, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_pcrel_lo12_s", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_jal", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_relax", 0, 0, 0 }
+ };
+ static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
+ "Not all fixup kinds added to Infos array");
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
+ unsigned getRelaxedOpcode(unsigned Op) const;
+
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override;
+
+
+ bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 6428b11cfe9c..a6ba1e41e964 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -38,5 +38,9 @@ MCELFStreamer &RISCVTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
+void RISCVTargetELFStreamer::emitDirectiveOptionPush() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionPop() {}
void RISCVTargetELFStreamer::emitDirectiveOptionRVC() {}
void RISCVTargetELFStreamer::emitDirectiveOptionNoRVC() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionRelax() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionNoRelax() {}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index daa7abfe1336..1f36bbc43882 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -20,8 +20,12 @@ public:
MCELFStreamer &getStreamer();
RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+ virtual void emitDirectiveOptionPush();
+ virtual void emitDirectiveOptionPop();
virtual void emitDirectiveOptionRVC();
virtual void emitDirectiveOptionNoRVC();
+ virtual void emitDirectiveOptionRelax();
+ virtual void emitDirectiveOptionNoRelax();
};
}
#endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 8a796a014b33..c5a4ffc0e360 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVFixupKinds.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -196,7 +196,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
MCInstrDesc const &Desc = MCII.get(MI.getOpcode());
unsigned MIFrm = Desc.TSFlags & RISCVII::InstFormatMask;
- // If the destination is an immediate, there is nothing to do
+ // If the destination is an immediate, there is nothing to do.
if (MO.isImm())
return MO.getImm();
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index 085dcd4e5f66..53648a5922c8 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -14,12 +14,12 @@
#include "RISCV.h"
#include "RISCVMCExpr.h"
+#include "RISCVFixupKinds.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -41,9 +41,90 @@ void RISCVMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
OS << ')';
}
+const MCFixup *RISCVMCExpr::getPCRelHiFixup() const {
+ MCValue AUIPCLoc;
+ if (!getSubExpr()->evaluateAsRelocatable(AUIPCLoc, nullptr, nullptr))
+ return nullptr;
+
+ const MCSymbolRefExpr *AUIPCSRE = AUIPCLoc.getSymA();
+ if (!AUIPCSRE)
+ return nullptr;
+
+ const auto *DF =
+ dyn_cast_or_null<MCDataFragment>(AUIPCSRE->findAssociatedFragment());
+ if (!DF)
+ return nullptr;
+
+ const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+ for (const MCFixup &F : DF->getFixups()) {
+ if (F.getOffset() != AUIPCSymbol->getOffset())
+ continue;
+
+ switch ((unsigned)F.getKind()) {
+ default:
+ continue;
+ case RISCV::fixup_riscv_pcrel_hi20:
+ return &F;
+ }
+ }
+
+ return nullptr;
+}
+
+bool RISCVMCExpr::evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ // VK_RISCV_PCREL_LO has to be handled specially. The MCExpr inside is
+ // actually the location of a auipc instruction with a VK_RISCV_PCREL_HI fixup
+ // pointing to the real target. We need to generate an MCValue in the form of
+ // (<real target> + <offset from this fixup to the auipc fixup>). The Fixup
+ // is pcrel relative to the VK_RISCV_PCREL_LO fixup, so we need to add the
+ // offset to the VK_RISCV_PCREL_HI Fixup from VK_RISCV_PCREL_LO to correct.
+ MCValue AUIPCLoc;
+ if (!getSubExpr()->evaluateAsValue(AUIPCLoc, *Layout))
+ return false;
+
+ const MCSymbolRefExpr *AUIPCSRE = AUIPCLoc.getSymA();
+ // Don't try to evaluate %pcrel_hi/%pcrel_lo pairs that cross fragment
+ // boundries.
+ if (!AUIPCSRE ||
+ findAssociatedFragment() != AUIPCSRE->findAssociatedFragment())
+ return false;
+
+ const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+ if (!AUIPCSymbol)
+ return false;
+
+ const MCFixup *TargetFixup = getPCRelHiFixup();
+ if (!TargetFixup)
+ return false;
+
+ if ((unsigned)TargetFixup->getKind() != RISCV::fixup_riscv_pcrel_hi20)
+ return false;
+
+ MCValue Target;
+ if (!TargetFixup->getValue()->evaluateAsValue(Target, *Layout))
+ return false;
+
+ if (!Target.getSymA() || !Target.getSymA()->getSymbol().isInSection())
+ return false;
+
+ if (&Target.getSymA()->getSymbol().getSection() !=
+ findAssociatedFragment()->getParent())
+ return false;
+
+ uint64_t AUIPCOffset = AUIPCSymbol->getOffset();
+
+ Res = MCValue::get(Target.getSymA(), nullptr,
+ Target.getConstant() + (Fixup->getOffset() - AUIPCOffset));
+ return true;
+}
+
bool RISCVMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
+ if (Kind == VK_RISCV_PCREL_LO && evaluatePCRelLo(Res, Layout, Fixup))
+ return true;
+
if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
return false;
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
index d2e0f6b6cdae..4eafcc08b51f 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
@@ -39,6 +39,9 @@ private:
int64_t evaluateAsInt64(int64_t Value) const;
+ bool evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const;
+
explicit RISCVMCExpr(const MCExpr *Expr, VariantKind Kind)
: Expr(Expr), Kind(Kind) {}
@@ -50,6 +53,13 @@ public:
const MCExpr *getSubExpr() const { return Expr; }
+ /// Get the MCExpr of the VK_RISCV_PCREL_HI Fixup that the
+ /// VK_RISCV_PCREL_LO points to.
+ ///
+ /// \returns nullptr if this isn't a VK_RISCV_PCREL_LO pointing to a
+ /// VK_RISCV_PCREL_HI.
+ const MCFixup *getPCRelHiFixup() const;
+
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 2d5205aa7ef7..8d5ef3dbd17f 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -23,6 +23,14 @@ RISCVTargetAsmStreamer::RISCVTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
: RISCVTargetStreamer(S), OS(OS) {}
+void RISCVTargetAsmStreamer::emitDirectiveOptionPush() {
+ OS << "\t.option\tpush\n";
+}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionPop() {
+ OS << "\t.option\tpop\n";
+}
+
void RISCVTargetAsmStreamer::emitDirectiveOptionRVC() {
OS << "\t.option\trvc\n";
}
@@ -30,3 +38,11 @@ void RISCVTargetAsmStreamer::emitDirectiveOptionRVC() {
void RISCVTargetAsmStreamer::emitDirectiveOptionNoRVC() {
OS << "\t.option\tnorvc\n";
}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionRelax() {
+ OS << "\t.option\trelax\n";
+}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionNoRelax() {
+ OS << "\t.option\tnorelax\n";
+}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 525c20810f24..74ec9e303933 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -18,8 +18,12 @@ class RISCVTargetStreamer : public MCTargetStreamer {
public:
RISCVTargetStreamer(MCStreamer &S);
+ virtual void emitDirectiveOptionPush() = 0;
+ virtual void emitDirectiveOptionPop() = 0;
virtual void emitDirectiveOptionRVC() = 0;
virtual void emitDirectiveOptionNoRVC() = 0;
+ virtual void emitDirectiveOptionRelax() = 0;
+ virtual void emitDirectiveOptionNoRelax() = 0;
};
// This part is for ascii assembly output
@@ -29,8 +33,12 @@ class RISCVTargetAsmStreamer : public RISCVTargetStreamer {
public:
RISCVTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+ void emitDirectiveOptionPush() override;
+ void emitDirectiveOptionPop() override;
void emitDirectiveOptionRVC() override;
void emitDirectiveOptionNoRVC() override;
+ void emitDirectiveOptionRelax() override;
+ void emitDirectiveOptionNoRelax() override;
};
}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm/lib/Target/RISCV/RISCV.h
index 2e4f536aca35..b25aee46200d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCV.h
+++ b/contrib/llvm/lib/Target/RISCV/RISCV.h
@@ -15,7 +15,8 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCV_H
#define LLVM_LIB_TARGET_RISCV_RISCV_H
-#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "Utils/RISCVBaseInfo.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
class RISCVTargetMachine;
@@ -36,6 +37,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
+
+FunctionPass *createRISCVExpandPseudoPass();
+void initializeRISCVExpandPseudoPass(PassRegistry &);
}
#endif
diff --git a/contrib/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm/lib/Target/RISCV/RISCV.td
index 281378cb2eee..0e86e2bc5e98 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCV.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCV.td
@@ -68,6 +68,12 @@ include "RISCVCallingConv.td"
include "RISCVInstrInfo.td"
//===----------------------------------------------------------------------===//
+// Named operands for CSR instructions.
+//===----------------------------------------------------------------------===//
+
+include "RISCVSystemOperands.td"
+
+//===----------------------------------------------------------------------===//
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..35c185aa5edd
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -0,0 +1,556 @@
+//===-- RISCVExpandPseudoInsts.cpp - Expand pseudo instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_EXPAND_PSEUDO_NAME "RISCV pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVExpandPseudo : public MachineFunctionPass {
+public:
+ const RISCVInstrInfo *TII;
+ static char ID;
+
+ RISCVExpandPseudo() : MachineFunctionPass(ID) {
+ initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return RISCV_EXPAND_PSEUDO_NAME; }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicBinOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
+ bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, bool IsMasked,
+ int Width, MachineBasicBlock::iterator &NextMBBI);
+};
+
+char RISCVExpandPseudo::ID = 0;
+
+bool RISCVExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+ return Modified;
+}
+
+bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ switch (MBBI->getOpcode()) {
+ case RISCV::PseudoAtomicLoadNand32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicSwap32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadAdd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadSub32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadNand32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
+ NextMBBI);
+ case RISCV::PseudoCmpXchg32:
+ return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
+ case RISCV::PseudoMaskedCmpXchg32:
+ return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+ }
+
+ return false;
+}
+
+static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::LR_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::LR_W_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::LR_W;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::LR_W_AQ;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::LR_W_AQ_RL;
+ }
+}
+
+static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::SC_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::SC_W;
+ case AtomicOrdering::Release:
+ return RISCV::SC_W_RL;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::SC_W_RL;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::SC_W_AQ_RL;
+ }
+}
+
+static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
+ DebugLoc DL, MachineBasicBlock *ThisMBB,
+ MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB,
+ AtomicRMWInst::BinOp BinOp, int Width) {
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned IncrReg = MI.getOperand(3).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+ // .loop:
+ // lr.w dest, (addr)
+ // binop scratch, dest, val
+ // sc.w scratch, scratch, (addr)
+ // bnez scratch, loop
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Nand:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-1);
+ break;
+ }
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopMBB);
+}
+
+static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, unsigned DestReg,
+ unsigned OldValReg, unsigned NewValReg,
+ unsigned MaskReg, unsigned ScratchReg) {
+ assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
+ assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
+ assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
+
+ // We select bits from newval and oldval using:
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+ // r = oldval ^ ((oldval ^ newval) & masktargetdata);
+ BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
+ .addReg(OldValReg)
+ .addReg(NewValReg);
+ BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(MaskReg);
+ BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
+ .addReg(OldValReg)
+ .addReg(ScratchReg);
+}
+
+static void doMaskedAtomicBinOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned IncrReg = MI.getOperand(3).getReg();
+ unsigned MaskReg = MI.getOperand(4).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
+
+ // .loop:
+ // lr.w destreg, (alignedaddr)
+ // binop scratch, destreg, incr
+ // xor scratch, destreg, scratch
+ // and scratch, scratch, masktargetdata
+ // xor scratch, destreg, scratch
+ // sc.w scratch, scratch, (alignedaddr)
+ // bnez scratch, loop
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(RISCV::X0)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Add:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Sub:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Nand:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-1);
+ break;
+ }
+
+ insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
+ ScratchReg);
+
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopMBB);
+}
+
+bool RISCVExpandPseudo::expandAtomicBinOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineFunction *MF = MBB.getParent();
+ auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopMBB);
+ MF->insert(++LoopMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopMBB);
+
+ if (!IsMasked)
+ doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+ else
+ doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
+ Width);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, unsigned ValReg,
+ unsigned ShamtReg) {
+ BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+ BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+}
+
+bool RISCVExpandPseudo::expandAtomicMinMaxOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ assert(IsMasked == true &&
+ "Should only need to expand masked atomic max/min");
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+ MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Scratch1Reg = MI.getOperand(1).getReg();
+ unsigned Scratch2Reg = MI.getOperand(2).getReg();
+ unsigned AddrReg = MI.getOperand(3).getReg();
+ unsigned IncrReg = MI.getOperand(4).getReg();
+ unsigned MaskReg = MI.getOperand(5).getReg();
+ bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
+
+ //
+ // .loophead:
+ // lr.w destreg, (alignedaddr)
+ // and scratch2, destreg, mask
+ // mv scratch1, destreg
+ // [sext scratch2 if signed min/max]
+ // ifnochangeneeded scratch2, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
+ .addReg(DestReg)
+ .addImm(0);
+
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Max: {
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::Min: {
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::UMax:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::UMin:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+
+ // .loopifbody:
+ // xor scratch1, destreg, incr
+ // and scratch1, scratch1, mask
+ // xor scratch1, destreg, scratch1
+ insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
+ MaskReg, Scratch1Reg);
+
+ // .looptail:
+ // sc.w scratch1, scratch1, (addr)
+ // bnez scratch1, loop
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+ .addReg(AddrReg)
+ .addReg(Scratch1Reg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(Scratch1Reg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+bool RISCVExpandPseudo::expandAtomicCmpXchg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
+ int Width, MachineBasicBlock::iterator &NextMBBI) {
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopHeadMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned CmpValReg = MI.getOperand(3).getReg();
+ unsigned NewValReg = MI.getOperand(4).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
+
+ if (!IsMasked) {
+ // .loophead:
+ // lr.w dest, (addr)
+ // bne dest, cmpval, done
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
+ .addReg(DestReg)
+ .addReg(CmpValReg)
+ .addMBB(DoneMBB);
+ // .looptail:
+ // sc.w scratch, newval, (addr)
+ // bnez scratch, loophead
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(NewValReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+ } else {
+ // .loophead:
+ // lr.w dest, (addr)
+ // and scratch, dest, mask
+ // bne scratch, cmpval, done
+ unsigned MaskReg = MI.getOperand(5).getReg();
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(CmpValReg)
+ .addMBB(DoneMBB);
+
+ // .looptail:
+ // xor scratch, dest, newval
+ // and scratch, scratch, mask
+ // xor scratch, dest, scratch
+ // sc.w scratch, scratch, (adrr)
+ // bnez scratch, loophead
+ insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
+ MaskReg, ScratchReg);
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+ }
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
+ RISCV_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createRISCVExpandPseudoPass() { return new RISCVExpandPseudo(); }
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index a816028f9d8b..74417899c8da 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -148,8 +148,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
// Skip to before the restores of callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
// callee-saved register.
- MachineBasicBlock::iterator LastFrameDestroy = MBBI;
- std::advance(LastFrameDestroy, -MFI.getCalleeSavedInfo().size());
+ auto LastFrameDestroy = std::prev(MBBI, MFI.getCalleeSavedInfo().size());
uint64_t StackSize = MFI.getStackSize();
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 04441b9a9b15..aa80365feb83 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -11,9 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "RISCV.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCV.h"
#include "RISCVTargetMachine.h"
+#include "Utils/RISCVMatInt.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Debug.h"
@@ -56,20 +57,47 @@ public:
private:
void doPeepholeLoadStoreADDI();
- void doPeepholeBuildPairF64SplitF64();
};
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
doPeepholeLoadStoreADDI();
- doPeepholeBuildPairF64SplitF64();
}
-void RISCVDAGToDAGISel::Select(SDNode *Node) {
- unsigned Opcode = Node->getOpcode();
- MVT XLenVT = Subtarget->getXLenVT();
+static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
+ MVT XLenVT) {
+ RISCVMatInt::InstSeq Seq;
+ RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64, Seq);
+
+ SDNode *Result;
+ SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
+ for (RISCVMatInt::Inst &Inst : Seq) {
+ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
+ if (Inst.Opc == RISCV::LUI)
+ Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
+ else
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
+
+ // Only the first instruction has X0 as its source.
+ SrcReg = SDValue(Result, 0);
+ }
+
+ return Result;
+}
+
+// Returns true if the Node is an ISD::AND with a constant argument. If so,
+// set Mask to that constant value.
+static bool isConstantMask(SDNode *Node, uint64_t &Mask) {
+ if (Node->getOpcode() == ISD::AND &&
+ Node->getOperand(1).getOpcode() == ISD::Constant) {
+ Mask = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ return true;
+ }
+ return false;
+}
- // If we have a custom node, we have already selected
+void RISCVDAGToDAGISel::Select(SDNode *Node) {
+ // If we have a custom node, we have already selected.
if (Node->isMachineOpcode()) {
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
Node->setNodeId(-1);
@@ -78,27 +106,58 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Instruction Selection not handled by the auto-generated tablegen selection
// should be handled here.
+ unsigned Opcode = Node->getOpcode();
+ MVT XLenVT = Subtarget->getXLenVT();
+ SDLoc DL(Node);
EVT VT = Node->getValueType(0);
- if (Opcode == ISD::Constant && VT == XLenVT) {
- auto *ConstNode = cast<ConstantSDNode>(Node);
- // Materialize zero constants as copies from X0. This allows the coalescer
- // to propagate these into other instructions.
- if (ConstNode->isNullValue()) {
+
+ switch (Opcode) {
+ case ISD::Constant: {
+ auto ConstNode = cast<ConstantSDNode>(Node);
+ if (VT == XLenVT && ConstNode->isNullValue()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());
return;
}
+ int64_t Imm = ConstNode->getSExtValue();
+ if (XLenVT == MVT::i64) {
+ ReplaceNode(Node, selectImm(CurDAG, SDLoc(Node), Imm, XLenVT));
+ return;
+ }
+ break;
}
- if (Opcode == ISD::FrameIndex) {
- SDLoc DL(Node);
+ case ISD::FrameIndex: {
SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
- EVT VT = Node->getValueType(0);
SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
return;
}
+ case ISD::SRL: {
+ if (!Subtarget->is64Bit())
+ break;
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ uint64_t Mask;
+ // Match (srl (and val, mask), imm) where the result would be a
+ // zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
+ // is equivalent to this (SimplifyDemandedBits may have removed lower bits
+ // from the mask that aren't necessary due to the right-shifting).
+ if (Op1.getOpcode() == ISD::Constant &&
+ isConstantMask(Op0.getNode(), Mask)) {
+ uint64_t ShAmt = cast<ConstantSDNode>(Op1.getNode())->getZExtValue();
+
+ if ((Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff) {
+ SDValue ShAmtVal =
+ CurDAG->getTargetConstant(ShAmt, SDLoc(Node), XLenVT);
+ CurDAG->SelectNodeTo(Node, RISCV::SRLIW, XLenVT, Op0.getOperand(0),
+ ShAmtVal);
+ return;
+ }
+ }
+ }
+ }
// Select the default instruction.
SelectCode(Node);
@@ -216,43 +275,6 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
}
}
-// Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is
-// built of two i32 values, only to be split apart again. This must be done
-// here as a peephole optimisation as the DAG has not been fully legalized at
-// the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so
-// some nodes would not yet have been replaced with libcalls.
-void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() {
- SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
- ++Position;
-
- while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = &*--Position;
- // Skip dead nodes and any nodes other than SplitF64Pseudo.
- if (N->use_empty() || !N->isMachineOpcode() ||
- !(N->getMachineOpcode() == RISCV::SplitF64Pseudo))
- continue;
-
- // If the operand to SplitF64 is a BuildPairF64, the split operation is
- // redundant. Just use the operands to BuildPairF64 as the result.
- SDValue F64Val = N->getOperand(0);
- if (F64Val.isMachineOpcode() &&
- F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) {
- LLVM_DEBUG(
- dbgs() << "Removing redundant SplitF64Pseudo and replacing uses "
- "with BuildPairF64Pseudo operands:\n");
- LLVM_DEBUG(dbgs() << "N: ");
- LLVM_DEBUG(N->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "F64Val: ");
- LLVM_DEBUG(F64Val->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
- SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
- SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)};
- CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
- }
- }
- CurDAG->RemoveDeadNodes();
-}
-
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 87796e5b1097..508dcbd009ed 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -80,6 +80,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+ if (Subtarget.is64Bit()) {
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ }
+
if (!Subtarget.hasStdExtM()) {
setOperationAction(ISD::MUL, XLenVT, Expand);
setOperationAction(ISD::MULHS, XLenVT, Expand);
@@ -111,6 +118,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
ISD::SETGT, ISD::SETGE, ISD::SETNE};
+ ISD::NodeType FPOpToExtend[] = {
+ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
+
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
@@ -119,6 +129,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ for (auto Op : FPOpToExtend)
+ setOperationAction(Op, MVT::f32, Expand);
}
if (Subtarget.hasStdExtD()) {
@@ -131,16 +143,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ for (auto Op : FPOpToExtend)
+ setOperationAction(Op, MVT::f64, Expand);
}
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
- if (Subtarget.hasStdExtA())
+ if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
- else
+ setMinCmpXchgSizeInBits(32);
+ } else {
setMaxAtomicSizeInBitsSupported(0);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
@@ -160,6 +176,34 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
return VT.changeVectorElementTypeToInteger();
}
+bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ default:
+ return false;
+ case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
+ case Intrinsic::riscv_masked_atomicrmw_add_i32:
+ case Intrinsic::riscv_masked_atomicrmw_sub_i32:
+ case Intrinsic::riscv_masked_atomicrmw_nand_i32:
+ case Intrinsic::riscv_masked_atomicrmw_max_i32:
+ case Intrinsic::riscv_masked_atomicrmw_min_i32:
+ case Intrinsic::riscv_masked_atomicrmw_umax_i32:
+ case Intrinsic::riscv_masked_atomicrmw_umin_i32:
+ case Intrinsic::riscv_masked_cmpxchg_i32:
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
+}
+
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
@@ -228,6 +272,10 @@ bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return TargetLowering::isZExtFree(Val, VT2);
}
+bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
+ return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
+}
+
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly in the RISC-V
// ISA.
@@ -283,9 +331,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VASTART:
return lowerVASTART(Op, DAG);
case ISD::FRAMEADDR:
- return LowerFRAMEADDR(Op, DAG);
+ return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
- return LowerRETURNADDR(Op, DAG);
+ return lowerRETURNADDR(Op, DAG);
}
}
@@ -298,7 +346,7 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
int64_t Offset = N->getOffset();
MVT XLenVT = Subtarget.getXLenVT();
- if (isPositionIndependent() || Subtarget.is64Bit())
+ if (isPositionIndependent())
report_fatal_error("Unable to lowerGlobalAddress");
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
@@ -323,7 +371,7 @@ SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
const BlockAddress *BA = N->getBlockAddress();
int64_t Offset = N->getOffset();
- if (isPositionIndependent() || Subtarget.is64Bit())
+ if (isPositionIndependent())
report_fatal_error("Unable to lowerBlockAddress");
SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
@@ -357,26 +405,6 @@ SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
}
}
-SDValue RISCVTargetLowering::lowerExternalSymbol(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- ExternalSymbolSDNode *N = cast<ExternalSymbolSDNode>(Op);
- const char *Sym = N->getSymbol();
-
- // TODO: should also handle gp-relative loads.
-
- if (isPositionIndependent() || Subtarget.is64Bit())
- report_fatal_error("Unable to lowerExternalSymbol");
-
- SDValue GAHi = DAG.getTargetExternalSymbol(Sym, Ty, RISCVII::MO_HI);
- SDValue GALo = DAG.getTargetExternalSymbol(Sym, Ty, RISCVII::MO_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
- SDValue MNLo =
- SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
- return MNLo;
-}
-
SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CondV = Op.getOperand(0);
SDValue TrueV = Op.getOperand(1);
@@ -432,7 +460,7 @@ SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV));
}
-SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op,
+SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
@@ -455,7 +483,7 @@ SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op,
return FrameAddr;
}
-SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
+SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
@@ -472,7 +500,7 @@ SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
int Off = -XLenInBytes;
- SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(Off, DL, VT);
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
@@ -485,6 +513,84 @@ SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
}
+// Return true if the given node is a shift with a non-constant shift amount.
+static bool isVariableShift(SDValue Val) {
+ switch (Val.getOpcode()) {
+ default:
+ return false;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return Val.getOperand(1).getOpcode() != ISD::Constant;
+ }
+}
+
+// Returns true if the given node is an sdiv, udiv, or urem with non-constant
+// operands.
+static bool isVariableSDivUDivURem(SDValue Val) {
+ switch (Val.getOpcode()) {
+ default:
+ return false;
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::UREM:
+ return Val.getOperand(0).getOpcode() != ISD::Constant &&
+ Val.getOperand(1).getOpcode() != ISD::Constant;
+ }
+}
+
+SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA: {
+ assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only");
+ if (!DCI.isBeforeLegalize())
+ break;
+ SDValue RHS = N->getOperand(1);
+ if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant ||
+ (RHS->getOpcode() == ISD::AssertZext &&
+ cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5))
+ break;
+ SDValue LHS = N->getOperand(0);
+ SDLoc DL(N);
+ SDValue NewRHS =
+ DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5)));
+ return DCI.CombineTo(
+ N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));
+ }
+ case ISD::ANY_EXTEND: {
+ // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64,
+ // then instead sign-extend in order to increase the chance of being able
+ // to select the sllw/srlw/sraw/divw/divuw/remuw instructions.
+ SDValue Src = N->getOperand(0);
+ if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
+ break;
+ if (!isVariableShift(Src) &&
+ !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
+ break;
+ SDLoc DL(N);
+ return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src));
+ }
+ case RISCVISD::SplitF64: {
+ // If the input to SplitF64 is just BuildPairF64 then the operation is
+ // redundant. Instead, use BuildPairF64's operands directly.
+ SDValue Op0 = N->getOperand(0);
+ if (Op0->getOpcode() != RISCVISD::BuildPairF64)
+ break;
+ return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
@@ -807,10 +913,14 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
if (Reg) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- } else {
- State.addLoc(
- CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ return false;
}
+
+ if (ValVT == MVT::f32) {
+ LocVT = MVT::f32;
+ LocInfo = CCValAssign::Full;
+ }
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
@@ -859,6 +969,22 @@ void RISCVTargetLowering::analyzeOutputArgs(
}
}
+// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
+// values.
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
+ const CCValAssign &VA, const SDLoc &DL) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unexpected CCValAssign::LocInfo");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ break;
+ }
+ return Val;
+}
+
// The caller is responsible for loading the full value if the argument is
// passed with CCValAssign::Indirect.
static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
@@ -866,21 +992,29 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
EVT LocVT = VA.getLocVT();
- EVT ValVT = VA.getValVT();
SDValue Val;
unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return Val;
+
+ return convertLocVTToValVT(DAG, Val, VA, DL);
+}
+
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
+ const CCValAssign &VA, const SDLoc &DL) {
+ EVT LocVT = VA.getLocVT();
+
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
- case CCValAssign::Indirect:
break;
case CCValAssign::BCvt:
- Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
break;
}
return Val;
@@ -995,7 +1129,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- assert(VA.getLocVT() == XLenVT && "Unhandled argument type");
SDValue ArgValue;
// Passing f64 on RV32D with a soft float ABI must be handled as a special
// case.
@@ -1282,13 +1415,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Promote the value if needed.
// For now, only handle fully promoted and indirect arguments.
- switch (VA.getLocInfo()) {
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), ArgValue);
- break;
- case CCValAssign::Indirect: {
+ if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
@@ -1310,10 +1437,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
++i;
}
ArgValue = SpillSlot;
- break;
- }
- default:
- llvm_unreachable("Unknown loc info!");
+ } else {
+ ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
}
// Use local copy if it is a byval arg.
@@ -1415,6 +1540,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Glue the RetValue to the end of the call sequence
Chain = RetValue.getValue(1);
Glue = RetValue.getValue(2);
+
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
SDValue RetValue2 =
@@ -1425,15 +1551,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
RetValue2);
}
- switch (VA.getLocInfo()) {
- default:
- llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- RetValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), RetValue);
- break;
- }
+ RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
InVals.push_back(RetValue);
}
@@ -1456,22 +1574,6 @@ bool RISCVTargetLowering::CanLowerReturn(
return true;
}
-static SDValue packIntoRegLoc(SelectionDAG &DAG, SDValue Val,
- const CCValAssign &VA, const SDLoc &DL) {
- EVT LocVT = VA.getLocVT();
-
- switch (VA.getLocInfo()) {
- default:
- llvm_unreachable("Unexpected CCValAssign::LocInfo");
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
- break;
- }
- return Val;
-}
-
SDValue
RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -1514,7 +1616,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
} else {
// Handle a 'normal' return.
- Val = packIntoRegLoc(DAG, Val, VA, DL);
+ Val = convertValVTToLocVT(DAG, Val, VA, DL);
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
// Guarantee that all emitted copies are stuck together.
@@ -1616,3 +1718,83 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
return Builder.CreateFence(AtomicOrdering::Acquire);
return nullptr;
}
+
+TargetLowering::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ if (Size == 8 || Size == 16)
+ return AtomicExpansionKind::MaskedIntrinsic;
+ return AtomicExpansionKind::None;
+}
+
+static Intrinsic::ID
+getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
+ case AtomicRMWInst::Add:
+ return Intrinsic::riscv_masked_atomicrmw_add_i32;
+ case AtomicRMWInst::Sub:
+ return Intrinsic::riscv_masked_atomicrmw_sub_i32;
+ case AtomicRMWInst::Nand:
+ return Intrinsic::riscv_masked_atomicrmw_nand_i32;
+ case AtomicRMWInst::Max:
+ return Intrinsic::riscv_masked_atomicrmw_max_i32;
+ case AtomicRMWInst::Min:
+ return Intrinsic::riscv_masked_atomicrmw_min_i32;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::riscv_masked_atomicrmw_umax_i32;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+ }
+}
+
+Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
+ IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+ Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
+ Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
+ Type *Tys[] = {AlignedAddr->getType()};
+ Function *LrwOpScwLoop = Intrinsic::getDeclaration(
+ AI->getModule(),
+ getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
+
+ // Must pass the shift amount needed to sign extend the loaded value prior
+ // to performing a signed comparison for min/max. ShiftAmt is the number of
+ // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
+ // is the number of bits to left+right shift the value in order to
+ // sign-extend.
+ if (AI->getOperation() == AtomicRMWInst::Min ||
+ AI->getOperation() == AtomicRMWInst::Max) {
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ unsigned ValWidth =
+ DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
+ Value *SextShamt = Builder.CreateSub(
+ Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
+ return Builder.CreateCall(LrwOpScwLoop,
+ {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ }
+
+ return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+}
+
+TargetLowering::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *CI) const {
+ unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
+ if (Size == 8 || Size == 16)
+ return AtomicExpansionKind::MaskedIntrinsic;
+ return AtomicExpansionKind::None;
+}
+
+Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
+ IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
+ Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+ Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord));
+ Type *Tys[] = {AlignedAddr->getType()};
+ Function *MaskedCmpXchg = Intrinsic::getDeclaration(
+ CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys);
+ return Builder.CreateCall(MaskedCmpXchg,
+ {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 280adb29fd02..6970900bb062 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -43,6 +43,9 @@ public:
explicit RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI);
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I = nullptr) const override;
@@ -51,10 +54,13 @@ public:
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
@@ -107,15 +113,27 @@ private:
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
bool IsEligibleForTailCallOptimization(CCState &CCInfo,
CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const;
+
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ virtual Value *emitMaskedAtomicRMWIntrinsic(
+ IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+ Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override;
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
+ virtual Value *
+ emitMaskedAtomicCmpXchgIntrinsic(IRBuilder<> &Builder, AtomicCmpXchgInst *CI,
+ Value *AlignedAddr, Value *CmpVal,
+ Value *NewVal, Value *Mask,
+ AtomicOrdering Ord) const override;
};
}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 529e048045c6..ebd676a6056e 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -45,11 +45,12 @@ def InstFormatCSS : InstFormat<10>;
def InstFormatCIW : InstFormat<11>;
def InstFormatCL : InstFormat<12>;
def InstFormatCS : InstFormat<13>;
-def InstFormatCB : InstFormat<14>;
-def InstFormatCJ : InstFormat<15>;
-def InstFormatOther : InstFormat<16>;
+def InstFormatCA : InstFormat<14>;
+def InstFormatCB : InstFormat<15>;
+def InstFormatCJ : InstFormat<16>;
+def InstFormatOther : InstFormat<17>;
-// The following opcode names and match those given in Table 19.1 in the
+// The following opcode names match those given in Table 19.1 in the
// RISC-V User-level ISA specification ("RISC-V base opcode map").
class RISCVOpcode<bits<7> val> {
bits<7> Value = val;
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
index 6abcbd7cc8a1..bda8bbb558eb 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -118,6 +118,19 @@ class RVInst16CS<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
let Inst{1-0} = opcode;
}
+class RVInst16CA<bits<6> funct6, bits<2> funct2, bits<2> opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCA> {
+ bits<3> rs2;
+ bits<3> rs1;
+
+ let Inst{15-10} = funct6;
+ let Inst{9-7} = rs1;
+ let Inst{6-5} = funct2;
+ let Inst{4-2} = rs2;
+ let Inst{1-0} = opcode;
+}
+
class RVInst16CB<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
string opcodestr, string argstr>
: RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCB> {
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 327e4a7d615f..76c74368ca11 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -362,9 +362,8 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
- const auto &STI = MF->getSubtarget<RISCVSubtarget>();
- if (TM.isPositionIndependent() || STI.is64Bit())
+ if (TM.isPositionIndependent())
report_fatal_error("Unable to insert indirect branch");
if (!isInt<32>(BrOffset))
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b51e4e70330d..d7cc13d4fabd 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -125,11 +125,6 @@ def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
}];
}
-def uimm12 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<12>;
- let DecoderMethod = "decodeUImmOperand<12>";
-}
-
// A 13-bit signed immediate where the least significant bit is zero.
def simm13_lsb0 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<13, "Lsb0">;
@@ -143,8 +138,7 @@ def simm13_lsb0 : Operand<OtherVT> {
}];
}
-def uimm20 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<20>;
+class UImm20Operand : Operand<XLenVT> {
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<20>";
let MCOperandPredicate = [{
@@ -155,9 +149,20 @@ def uimm20 : Operand<XLenVT> {
}];
}
+def uimm20_lui : UImm20Operand {
+ let ParserMatchClass = UImmAsmOperand<20, "LUI">;
+}
+def uimm20_auipc : UImm20Operand {
+ let ParserMatchClass = UImmAsmOperand<20, "AUIPC">;
+}
+
+def Simm21Lsb0JALAsmOperand : SImmAsmOperand<21, "Lsb0JAL"> {
+ let ParserMethod = "parseJALOffset";
+}
+
// A 21-bit signed immediate where the least significant bit is zero.
-def simm21_lsb0 : Operand<OtherVT> {
- let ParserMatchClass = SImmAsmOperand<21, "Lsb0">;
+def simm21_lsb0_jal : Operand<OtherVT> {
+ let ParserMatchClass = Simm21Lsb0JALAsmOperand;
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<21>";
let MCOperandPredicate = [{
@@ -172,24 +177,42 @@ def BareSymbol : AsmOperandClass {
let Name = "BareSymbol";
let RenderMethod = "addImmOperands";
let DiagnosticType = "InvalidBareSymbol";
+ let ParserMethod = "parseBareSymbol";
}
// A bare symbol.
def bare_symbol : Operand<XLenVT> {
let ParserMatchClass = BareSymbol;
- let MCOperandPredicate = [{
- return MCOp.isBareSymbolRef();
- }];
+}
+
+def CSRSystemRegister : AsmOperandClass {
+ let Name = "CSRSystemRegister";
+ let ParserMethod = "parseCSRSystemRegister";
+ let DiagnosticType = "InvalidCSRSystemRegister";
+}
+
+def csr_sysreg : Operand<XLenVT> {
+ let ParserMatchClass = CSRSystemRegister;
+ let PrintMethod = "printCSRSystemRegister";
+ let DecoderMethod = "decodeUImmOperand<12>";
}
// A parameterized register class alternative to i32imm/i64imm from Target.td.
-def ixlenimm : Operand<XLenVT> {
- let ParserMatchClass = ImmXLenAsmOperand<"">;
+def ixlenimm : Operand<XLenVT>;
+
+def ixlenimm_li : Operand<XLenVT> {
+ let ParserMatchClass = ImmXLenAsmOperand<"", "LI">;
}
// Standalone (codegen-only) immleaf patterns.
def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
+// A mask value that won't affect significant shift bits.
+def immbottomxlenset : ImmLeaf<XLenVT, [{
+ if (Subtarget->is64Bit())
+ return countTrailingOnes<uint64_t>(Imm) >= 6;
+ return countTrailingOnes<uint64_t>(Imm) >= 5;
+}]>;
// Addressing modes.
// Necessary because a frameindex can't be matched directly in a pattern.
@@ -255,13 +278,13 @@ class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class CSR_ir<bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins uimm12:$imm12, GPR:$rs1),
+ : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, GPR:$rs1),
opcodestr, "$rd, $imm12, $rs1">;
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class CSR_ii<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd),
- (ins uimm12:$imm12, uimm5:$rs1),
+ (ins csr_sysreg:$imm12, uimm5:$rs1),
opcodestr, "$rd, $imm12, $rs1">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -285,14 +308,14 @@ class Priv<string opcodestr, bits<7> funct7>
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, isReMaterializable = 1, mayLoad = 0, mayStore = 0 in {
-def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20:$imm20),
+def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20),
"lui", "$rd, $imm20">;
-def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20:$imm20),
+def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20_auipc:$imm20),
"auipc", "$rd, $imm20">;
let isCall = 1 in
-def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0:$imm20),
+def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
"jal", "$rd, $imm20">;
let isCall = 1 in
@@ -379,6 +402,15 @@ def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> {
let rd = 0;
let imm12 = 1;
}
+
+// This is a de facto standard (as set by GNU binutils) 32-bit unimplemented
+// instruction (i.e., it should always trap, if your implementation has invalid
+// instruction traps).
+def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", ""> {
+ let rs1 = 0;
+ let rd = 0;
+ let imm12 = 0b110000000000;
+}
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
def CSRRW : CSR_ir<0b001, "csrrw">;
@@ -467,7 +499,7 @@ def : InstAlias<"nop", (ADDI X0, X0, 0)>;
// expanded to real instructions immediately.
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 32,
isCodeGenOnly = 0, isAsmParserOnly = 1 in
-def PseudoLI : Pseudo<(outs GPR:$rd), (ins ixlenimm:$imm), [],
+def PseudoLI : Pseudo<(outs GPR:$rd), (ins ixlenimm_li:$imm), [],
"li", "$rd, $imm">;
def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>;
@@ -516,8 +548,8 @@ def : InstAlias<"bleu $rs, $rt, $offset",
(BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>;
// "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction.
-def : InstAlias<"j $offset", (JAL X0, simm21_lsb0:$offset)>;
-def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0:$offset)>;
+def : InstAlias<"j $offset", (JAL X0, simm21_lsb0_jal:$offset)>;
+def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0_jal:$offset)>;
def : InstAlias<"jr $rs", (JALR X0, GPR:$rs, 0)>;
def : InstAlias<"jalr $rs", (JALR X1, GPR:$rs, 0)>;
def : InstAlias<"ret", (JALR X0, X1, 0), 2>;
@@ -538,18 +570,67 @@ def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, 0xC80, X0)>;
def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, 0xC81, X0)>;
} // Predicates = [IsRV32]
-def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, uimm12:$csr, X0)>;
-def : InstAlias<"csrw $csr, $rs", (CSRRW X0, uimm12:$csr, GPR:$rs)>;
-def : InstAlias<"csrs $csr, $rs", (CSRRS X0, uimm12:$csr, GPR:$rs)>;
-def : InstAlias<"csrc $csr, $rs", (CSRRC X0, uimm12:$csr, GPR:$rs)>;
+def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, csr_sysreg:$csr, X0)>;
+def : InstAlias<"csrw $csr, $rs", (CSRRW X0, csr_sysreg:$csr, GPR:$rs)>;
+def : InstAlias<"csrs $csr, $rs", (CSRRS X0, csr_sysreg:$csr, GPR:$rs)>;
+def : InstAlias<"csrc $csr, $rs", (CSRRC X0, csr_sysreg:$csr, GPR:$rs)>;
-def : InstAlias<"csrwi $csr, $imm", (CSRRWI X0, uimm12:$csr, uimm5:$imm)>;
-def : InstAlias<"csrsi $csr, $imm", (CSRRSI X0, uimm12:$csr, uimm5:$imm)>;
-def : InstAlias<"csrci $csr, $imm", (CSRRCI X0, uimm12:$csr, uimm5:$imm)>;
+def : InstAlias<"csrwi $csr, $imm", (CSRRWI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrsi $csr, $imm", (CSRRSI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrci $csr, $imm", (CSRRCI X0, csr_sysreg:$csr, uimm5:$imm)>;
+
+let EmitPriority = 0 in {
+def : InstAlias<"csrw $csr, $imm", (CSRRWI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrs $csr, $imm", (CSRRSI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrc $csr, $imm", (CSRRCI X0, csr_sysreg:$csr, uimm5:$imm)>;
+
+def : InstAlias<"csrrw $rd, $csr, $imm", (CSRRWI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrrs $rd, $csr, $imm", (CSRRSI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrrc $rd, $csr, $imm", (CSRRCI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+}
def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>;
def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
+let EmitPriority = 0 in {
+def : InstAlias<"add $rd, $rs1, $imm12",
+ (ADDI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"and $rd, $rs1, $imm12",
+ (ANDI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"xor $rd, $rs1, $imm12",
+ (XORI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"or $rd, $rs1, $imm12",
+ (ORI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sll $rd, $rs1, $shamt",
+ (SLLI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : InstAlias<"srl $rd, $rs1, $shamt",
+ (SRLI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : InstAlias<"sra $rd, $rs1, $shamt",
+ (SRAI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+let Predicates = [IsRV64] in {
+def : InstAlias<"addw $rd, $rs1, $imm12",
+ (ADDIW GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sllw $rd, $rs1, $shamt",
+ (SLLIW GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+def : InstAlias<"srlw $rd, $rs1, $shamt",
+ (SRLIW GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+def : InstAlias<"sraw $rd, $rs1, $shamt",
+ (SRAIW GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+} // Predicates = [IsRV64]
+def : InstAlias<"slt $rd, $rs1, $imm12",
+ (SLTI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sltu $rd, $rs1, $imm12",
+ (SLTIU GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+}
+
+def : MnemonicAlias<"move", "mv">;
+
+// The SCALL and SBREAK instructions wererenamed to ECALL and EBREAK in
+// version 2.1 of the user-level ISA. Like the GNU toolchain, we still accept
+// the old name for backwards compatibility.
+def : MnemonicAlias<"scall", "ecall">;
+def : MnemonicAlias<"sbreak", "ebreak">;
+
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//
@@ -560,7 +641,7 @@ def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
/// Generic pattern classes
-class PatGprGpr<SDPatternOperator OpNode, RVInstR Inst>
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst>
: Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
: Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>;
@@ -573,12 +654,37 @@ class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst>
def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
return isOrEquivalentToAdd(N);
}]>;
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def sexti32 : PatFrags<(ops node:$src),
+ [(sext_inreg node:$src, i32),
+ (assertsexti32 node:$src)]>;
+def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def assertzexti5 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits() <= 5;
+}]>;
+def zexti32 : PatFrags<(ops node:$src),
+ [(and node:$src, 0xffffffff),
+ (assertzexti32 node:$src)]>;
+// Defines a legal mask for (assertzexti5 (and src, mask)) to be combinable
+// with a shiftw operation. The mask mustn't modify the lower 5 bits or the
+// upper 32 bits.
+def shiftwamt_mask : ImmLeaf<XLenVT, [{
+ return countTrailingOnes<uint64_t>(Imm) >= 5 && isUInt<32>(Imm);
+}]>;
+def shiftwamt : PatFrags<(ops node:$src),
+ [(assertzexti5 (and node:$src, shiftwamt_mask)),
+ (assertzexti5 node:$src)]>;
/// Immediates
def : Pat<(simm12:$imm), (ADDI X0, simm12:$imm)>;
def : Pat<(simm32hi20:$imm), (LUI (HI20 imm:$imm))>;
-def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>;
+def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>,
+ Requires<[IsRV32]>;
/// Simple arithmetic operations
@@ -591,13 +697,23 @@ def : PatGprGpr<and, AND>;
def : PatGprSimm12<and, ANDI>;
def : PatGprGpr<xor, XOR>;
def : PatGprSimm12<xor, XORI>;
-def : PatGprGpr<shl, SLL>;
def : PatGprUimmLog2XLen<shl, SLLI>;
-def : PatGprGpr<srl, SRL>;
def : PatGprUimmLog2XLen<srl, SRLI>;
-def : PatGprGpr<sra, SRA>;
def : PatGprUimmLog2XLen<sra, SRAI>;
+// Match both a plain shift and one where the shift amount is masked (this is
+// typically introduced when the legalizer promotes the shift amount and
+// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
+// ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
+class shiftop<SDPatternOperator operator>
+ : PatFrags<(ops node:$val, node:$count),
+ [(operator node:$val, node:$count),
+ (operator node:$val, (and node:$count, immbottomxlenset))]>;
+
+def : PatGprGpr<shiftop<shl>, SLL>;
+def : PatGprGpr<shiftop<srl>, SRL>;
+def : PatGprGpr<shiftop<sra>, SRA>;
+
/// FrameIndex calculations
def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12),
@@ -614,7 +730,9 @@ def : PatGprSimm12<setult, SLTIU>;
// Define pattern expansions for setcc operations that aren't directly
// handled by a RISC-V instruction.
+def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;
def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;
def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;
def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>;
def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
@@ -638,7 +756,7 @@ def Select_GPR_Using_CC_GPR : SelectCC_rrirr<GPR, GPR>;
// Match `(brcond (CondOp ..), ..)` and lower to the appropriate RISC-V branch
// instruction.
class BccPat<PatFrag CondOp, RVInstB Inst>
- : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
+ : Pat<(brcond (XLenVT (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
(Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>;
def : BccPat<seteq, BEQ>;
@@ -649,7 +767,7 @@ def : BccPat<setult, BLTU>;
def : BccPat<setuge, BGEU>;
class BccSwapPat<PatFrag CondOp, RVInst InstBcc>
- : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
+ : Pat<(brcond (XLenVT (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
(InstBcc GPR:$rs2, GPR:$rs1, bb:$imm12)>;
// Condition codes that don't have matching RISC-V branch instructions, but
@@ -664,8 +782,8 @@ def : BccSwapPat<setule, BGEU>;
def : Pat<(brcond GPR:$cond, bb:$imm12), (BNE GPR:$cond, X0, bb:$imm12)>;
let isBarrier = 1, isBranch = 1, isTerminator = 1 in
-def PseudoBR : Pseudo<(outs), (ins simm21_lsb0:$imm20), [(br bb:$imm20)]>,
- PseudoInstExpansion<(JAL X0, simm21_lsb0:$imm20)>;
+def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>,
+ PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>;
let isCall = 1, Defs=[X1] in
let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in
@@ -720,6 +838,11 @@ def : Pat<(Tail (iPTR tglobaladdr:$dst)),
def : Pat<(Tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in
+def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+ "lla", "$dst, $src">;
+
/// Loads
multiclass LdPat<PatFrag LoadOp, RVInst Inst> {
@@ -737,7 +860,7 @@ defm : LdPat<sextloadi8, LB>;
defm : LdPat<extloadi8, LB>;
defm : LdPat<sextloadi16, LH>;
defm : LdPat<extloadi16, LH>;
-defm : LdPat<load, LW>;
+defm : LdPat<load, LW>, Requires<[IsRV32]>;
defm : LdPat<zextloadi8, LBU>;
defm : LdPat<zextloadi16, LHU>;
@@ -756,7 +879,7 @@ multiclass StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> {
defm : StPat<truncstorei8, SB, GPR>;
defm : StPat<truncstorei16, SH, GPR>;
-defm : StPat<store, SW, GPR>;
+defm : StPat<store, SW, GPR>, Requires<[IsRV32]>;
/// Fences
@@ -764,13 +887,13 @@ defm : StPat<store, SW, GPR>;
// Manual: Volume I.
// fence acquire -> fence r, rw
-def : Pat<(atomic_fence (i32 4), (imm)), (FENCE 0b10, 0b11)>;
+def : Pat<(atomic_fence (XLenVT 4), (imm)), (FENCE 0b10, 0b11)>;
// fence release -> fence rw, w
-def : Pat<(atomic_fence (i32 5), (imm)), (FENCE 0b11, 0b1)>;
+def : Pat<(atomic_fence (XLenVT 5), (imm)), (FENCE 0b11, 0b1)>;
// fence acq_rel -> fence.tso
-def : Pat<(atomic_fence (i32 6), (imm)), (FENCE_TSO)>;
+def : Pat<(atomic_fence (XLenVT 6), (imm)), (FENCE_TSO)>;
// fence seq_cst -> fence rw, rw
-def : Pat<(atomic_fence (i32 7), (imm)), (FENCE 0b11, 0b11)>;
+def : Pat<(atomic_fence (XLenVT 7), (imm)), (FENCE 0b11, 0b11)>;
// Lowering for atomic load and store is defined in RISCVInstrInfoA.td.
// Although these are lowered to fence+load/store instructions defined in the
@@ -788,6 +911,66 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
[(CallSeqEnd timm:$amt1, timm:$amt2)]>;
} // Defs = [X2], Uses = [X2]
+/// RV64 patterns
+
+let Predicates = [IsRV64] in {
+
+/// sext and zext
+
+def : Pat<(sext_inreg GPR:$rs1, i32), (ADDIW GPR:$rs1, 0)>;
+def : Pat<(and GPR:$rs1, 0xffffffff), (SRLI (SLLI GPR:$rs1, 32), 32)>;
+
+/// ALU operations
+
+def : Pat<(sext_inreg (add GPR:$rs1, GPR:$rs2), i32),
+ (ADDW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (add GPR:$rs1, simm12:$imm12), i32),
+ (ADDIW GPR:$rs1, simm12:$imm12)>;
+def : Pat<(sext_inreg (sub GPR:$rs1, GPR:$rs2), i32),
+ (SUBW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
+ (SLLIW GPR:$rs1, uimm5:$shamt)>;
+// (srl (zexti32 ...), uimm5:$shamt) is matched with custom code due to the
+// need to undo manipulation of the mask value performed by DAGCombine.
+def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
+ (SRAIW GPR:$rs1, uimm5:$shamt)>;
+
+// For variable-length shifts, we rely on assertzexti5 being inserted during
+// lowering (see RISCVTargetLowering::PerformDAGCombine). This enables us to
+// guarantee that selecting a 32-bit variable shift is legal (as the variable
+// shift is known to be <= 32). We must also be careful not to create
+// semantically incorrect patterns. For instance, selecting SRLW for
+// (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
+// is not guaranteed to be safe, as we don't know whether the upper 32-bits of
+// the result are used or not (in the case where rs2=0, this is a
+// sign-extension operation).
+
+def : Pat<(sext_inreg (shl GPR:$rs1, (shiftwamt GPR:$rs2)), i32),
+ (SLLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (shl GPR:$rs1, (shiftwamt GPR:$rs2))),
+ (SRLI (SLLI (SLLW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : Pat<(sext_inreg (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), i32),
+ (SRLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2))),
+ (SRLI (SLLI (SRLW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : Pat<(sra (sexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
+ (SRAW GPR:$rs1, GPR:$rs2)>;
+
+/// Loads
+
+defm : LdPat<sextloadi32, LW>;
+defm : LdPat<extloadi32, LW>;
+defm : LdPat<zextloadi32, LWU>;
+defm : LdPat<load, LD>;
+
+/// Stores
+
+defm : StPat<truncstorei32, SW, GPR>;
+defm : StPat<store, SD, GPR>;
+} // Predicates = [IsRV64]
+
//===----------------------------------------------------------------------===//
// Standard extensions
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 379322060438..9cb1d2f0b627 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -44,6 +44,17 @@ multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">;
}
+multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> {
+ def : Pat<(StoreOp GPR:$rs1, StTy:$rs2), (Inst StTy:$rs2, GPR:$rs1, 0)>;
+ def : Pat<(StoreOp AddrFI:$rs1, StTy:$rs2), (Inst StTy:$rs2, AddrFI:$rs1, 0)>;
+ def : Pat<(StoreOp (add GPR:$rs1, simm12:$imm12), StTy:$rs2),
+ (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
+ def : Pat<(StoreOp (add AddrFI:$rs1, simm12:$imm12), StTy:$rs2),
+ (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>;
+ def : Pat<(StoreOp (IsOrAdd AddrFI:$rs1, simm12:$imm12), StTy:$rs2),
+ (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -91,7 +102,177 @@ defm : LdPat<atomic_load_8, LB>;
defm : LdPat<atomic_load_16, LH>;
defm : LdPat<atomic_load_32, LW>;
-defm : StPat<atomic_store_8, SB, GPR>;
-defm : StPat<atomic_store_16, SH, GPR>;
-defm : StPat<atomic_store_32, SW, GPR>;
-} // Predicates = [HasStdExtF]
+defm : AtomicStPat<atomic_store_8, SB, GPR>;
+defm : AtomicStPat<atomic_store_16, SH, GPR>;
+defm : AtomicStPat<atomic_store_32, SW, GPR>;
+
+/// AMOs
+
+multiclass AMOPat<string AtomicOp, string BaseInst> {
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst)>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst#"_AQ")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst#"_RL")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst#"_AQ_RL")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst#"_AQ_RL")>;
+}
+
+defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
+defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;
+defm : AMOPat<"atomic_load_and_32", "AMOAND_W">;
+defm : AMOPat<"atomic_load_or_32", "AMOOR_W">;
+defm : AMOPat<"atomic_load_xor_32", "AMOXOR_W">;
+defm : AMOPat<"atomic_load_max_32", "AMOMAX_W">;
+defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
+defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
+defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
+
+def : Pat<(atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr),
+ (AMOADD_W GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acquire GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_release GPR:$addr, GPR:$incr),
+ (AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+
+/// Pseudo AMOs
+
+class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$incr, ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def PseudoAtomicLoadNand32 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(atomic_load_nand_32_acquire GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(atomic_load_nand_32_release GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
+
+class PseudoMaskedAMO
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOMinMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$sextshamt,
+ ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOUMinUMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
+ (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
+
+class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ imm:$ordering),
+ (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ imm:$ordering)>;
+
+def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
+ PseudoMaskedAtomicSwap32>;
+def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
+ PseudoMaskedAtomicLoadAdd32>;
+def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
+ PseudoMaskedAtomicLoadSub32>;
+def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
+ PseudoMaskedAtomicLoadNand32>;
+def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
+ PseudoMaskedAtomicLoadMax32>;
+def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
+ PseudoMaskedAtomicLoadMin32>;
+def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
+ PseudoMaskedAtomicLoadUMax32>;
+def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
+ PseudoMaskedAtomicLoadUMin32>;
+
+/// Compare and exchange
+
+class PseudoCmpXchg
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, i32imm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst> {
+ def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
+ def : Pat<(!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
+ def : Pat<(!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
+ def : Pat<(!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
+ def : Pat<(!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
+}
+
+def PseudoCmpXchg32 : PseudoCmpXchg;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
+
+def PseudoMaskedCmpXchg32
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
+ i32imm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def : Pat<(int_riscv_masked_cmpxchg_i32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
+ (PseudoMaskedCmpXchg32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+
+} // Predicates = [HasStdExtA]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 5d1c62c0b653..ad68b5a7dc97 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -167,7 +167,7 @@ def uimm10_lsb00nonzero : Operand<XLenVT>,
[{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<10>";
+ let DecoderMethod = "decodeUImmNonZeroOperand<10>";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -182,12 +182,12 @@ def simm10_lsb0000nonzero : Operand<XLenVT>,
[{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<10>";
+ let DecoderMethod = "decodeSImmNonZeroOperand<10>";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
return false;
- return isShiftedInt<6, 4>(Imm);
+ return isShiftedInt<6, 4>(Imm) && (Imm != 0);
}];
}
@@ -258,16 +258,13 @@ class Shift_right<bits<2> funct2, string OpcodeStr, RegisterClass cls,
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class CS_ALU<bits<2> funct2, string OpcodeStr, RegisterClass cls,
- bit RV64only>
- : RVInst16CS<0b100, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2),
+class CS_ALU<bits<6> funct6, bits<2> funct2, string OpcodeStr,
+ RegisterClass cls>
+ : RVInst16CA<funct6, funct2, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2),
OpcodeStr, "$rd, $rs2"> {
bits<3> rd;
let Constraints = "$rd = $rd_wb";
- let Inst{12} = RV64only;
- let Inst{11-10} = 0b11;
let Inst{9-7} = rd;
- let Inst{6-5} = funct2;
}
//===----------------------------------------------------------------------===//
@@ -411,14 +408,14 @@ def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:
let Inst{6-2} = imm{4-0};
}
-def C_SUB : CS_ALU<0b00, "c.sub", GPRC, 0>;
-def C_XOR : CS_ALU<0b01, "c.xor", GPRC, 0>;
-def C_OR : CS_ALU<0b10, "c.or" , GPRC, 0>;
-def C_AND : CS_ALU<0b11, "c.and", GPRC, 0>;
+def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>;
+def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>;
+def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>;
+def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>;
let Predicates = [HasStdExtC, IsRV64] in {
-def C_SUBW : CS_ALU<0b00, "c.subw", GPRC, 1>;
-def C_ADDW : CS_ALU<0b01, "c.addw", GPRC, 1>;
+def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>;
+def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -478,7 +475,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2),
"c.mv", "$rs1, $rs2">;
-let rs1 = 0, rs2 = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let rs1 = 0, rs2 = 0, hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
@@ -517,6 +514,13 @@ def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> {
let Inst{9-7} = imm{8-6};
}
+// The all zeros pattern isn't a valid RISC-V instruction. It's used by GNU
+// binutils as 16-bit instruction known to be unimplemented (i.e., trapping).
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> {
+ let Inst{15-0} = 0;
+}
+
} // Predicates = [HasStdExtC]
//===----------------------------------------------------------------------===//
@@ -625,6 +629,8 @@ def : CompressPat<(AND GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
} // Predicates = [HasStdExtC]
let Predicates = [HasStdExtC, IsRV64] in {
+def : CompressPat<(ADDIW GPRNoX0:$rd, X0, simm6:$imm),
+ (C_LI GPRNoX0:$rd, simm6:$imm)>;
def : CompressPat<(SUBW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
(C_SUBW GPRC:$rs1, GPRC:$rs2)>;
def : CompressPat<(ADDW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
@@ -678,6 +684,7 @@ def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, X0),
def : CompressPat<(ADDI GPRNoX0:$rs1, GPRNoX0:$rs2, 0),
(C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>;
def : CompressPat<(EBREAK), (C_EBREAK)>;
+def : CompressPat<(UNIMP), (C_UNIMP)>;
def : CompressPat<(JALR X1, GPRNoX0:$rs1, 0),
(C_JALR GPRNoX0:$rs1)>;
def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs1, GPRNoX0:$rs2),
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 06b834d55ade..9f1cd50de595 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -212,13 +212,8 @@ let Predicates = [HasStdExtD] in {
def : Pat<(fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>;
def : Pat<(fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
-// FP->[u]int. Round-to-zero must be used
-def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_W_D FPR64:$rs1, 0b001)>;
-def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
-
-// [u]int->fp
-def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W GPR:$rs1)>;
-def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
+// [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so
+// are defined later.
/// Float arithmetic operations
@@ -235,6 +230,22 @@ def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
def : PatFpr64Fpr64<fcopysign, FSGNJ_D>;
def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>;
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3),
+ (FMADD_D $rs1, $rs2, $rs3, 0b111)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)),
+ (FMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
+ (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
+ (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
// The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
// canonical NaN when giving a signaling NaN. This doesn't match the LLVM
// behaviour (see https://bugs.llvm.org/show_bug.cgi?id=27363). However, the
@@ -287,3 +298,13 @@ def SplitF64Pseudo
[(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64:$src))]>;
} // Predicates = [HasStdExtD]
+
+let Predicates = [HasStdExtD, IsRV32] in {
+// double->[u]int. Round-to-zero must be used.
+def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_W_D FPR64:$rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
+
+// [u]int->double.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W GPR:$rs1)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
+} // Predicates = [HasStdExtD, IsRV32]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 6d7c59becf24..03bdac45873d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -27,7 +27,7 @@ def FRMArg : AsmOperandClass {
def frmarg : Operand<XLenVT> {
let ParserMatchClass = FRMArg;
let PrintMethod = "printFRMArg";
- let DecoderMethod = "decodeUImmOperand<3>";
+ let DecoderMethod = "decodeFRMArg";
}
//===----------------------------------------------------------------------===//
@@ -252,13 +252,8 @@ let Predicates = [HasStdExtF] in {
def : Pat<(bitconvert GPR:$rs1), (FMV_W_X GPR:$rs1)>;
def : Pat<(bitconvert FPR32:$rs1), (FMV_X_W FPR32:$rs1)>;
-// FP->[u]int. Round-to-zero must be used
-def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
-
-// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
-def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+// [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so
+// are defined later.
/// Float arithmetic operations
@@ -275,6 +270,22 @@ def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>;
def : PatFpr32Fpr32<fcopysign, FSGNJ_S>;
def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>;
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3),
+ (FMADD_S $rs1, $rs2, $rs3, 0b111)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
+ (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
+ (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
+ (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
// The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
// canonical NaN when given a signaling NaN. This doesn't match the LLVM
// behaviour (see https://bugs.llvm.org/show_bug.cgi?id=27363). However, the
@@ -313,3 +324,13 @@ defm : LdPat<load, FLW>;
defm : StPat<store, FSW, FPR32>;
} // Predicates = [HasStdExtF]
+
+let Predicates = [HasStdExtF, IsRV32] in {
+// float->[u]int. Round-to-zero must be used.
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+
+// [u]int->float. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV32]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 2dd10ada4003..05dd3311ad54 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -49,3 +49,34 @@ def : PatGprGpr<udiv, DIVU>;
def : PatGprGpr<srem, REM>;
def : PatGprGpr<urem, REMU>;
} // Predicates = [HasStdExtM]
+
+let Predicates = [HasStdExtM, IsRV64] in {
+def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
+ (MULW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
+ (sexti32 GPR:$rs2)), i32),
+ (DIVW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
+ (sexti32 GPR:$rs2))),
+ (SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
+def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
+ (DIVUW GPR:$rs1, GPR:$rs2)>;
+// It's cheaper to perform a divuw and zero-extend the result than to
+// zero-extend both inputs to a udiv.
+def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
+ (SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+// Although the sexti32 operands may not have originated from an i32 srem,
+// this pattern is safe as it is impossible for two sign extended inputs to
+// produce a result where res[63:32]=0 and res[31]=1.
+def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
+ (REMW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
+ (sexti32 GPR:$rs2)), i32),
+ (REMW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
+ (REMUW GPR:$rs1, GPR:$rs2)>;
+// It's cheaper to perform a remuw and zero-extend the result than to
+// zero-extend both inputs to a urem.
+def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
+ (SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+} // Predicates = [HasStdExtM, IsRV64]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index b8fa8a97d41a..cea009c5447d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -62,7 +62,7 @@ private:
MachineRegisterInfo *MRI;
std::set<MachineInstr *> DeadInstrs;
};
-}; // end anonymous namespace
+} // end anonymous namespace
char RISCVMergeBaseOffsetOpt::ID = 0;
INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset",
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td
new file mode 100644
index 000000000000..f1b7984ffe6b
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -0,0 +1,352 @@
+//===- RISCVSystemOperands.td ----------------------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the symbolic operands permitted for various kinds of
+// RISC-V system instruction.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/TableGen/SearchableTable.td"
+
+//===----------------------------------------------------------------------===//
+// CSR (control and status register read/write) instruction options.
+//===----------------------------------------------------------------------===//
+
+class SysReg<string name, bits<12> op> {
+ string Name = name;
+ bits<12> Encoding = op;
+ // FIXME: add these additional fields when needed.
+ // Privilege Access: Read and Write = 0, 1, 2; Read-Only = 3.
+ // Privilege Mode: User = 0, System = 1 or Machine = 3.
+ // bits<2> ReadWrite = op{11 - 10};
+ // bits<2> XMode = op{9 - 8};
+ // Check Extra field name and what bits 7-6 correspond to.
+ // bits<2> Extra = op{7 - 6};
+ // Register number without the privilege bits.
+ // bits<6> Number = op{5 - 0};
+ code FeaturesRequired = [{ {} }];
+ bit isRV32Only = 0;
+}
+
+def SysRegsList : GenericTable {
+ let FilterClass = "SysReg";
+ // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
+ let Fields = [ "Name", "Encoding", "FeaturesRequired", "isRV32Only" ];
+
+ let PrimaryKey = [ "Encoding" ];
+ let PrimaryKeyName = "lookupSysRegByEncoding";
+}
+
+def lookupSysRegByName : SearchIndex {
+ let Table = SysRegsList;
+ let Key = [ "Name" ];
+}
+
+// The following CSR encodings match those given in Tables 2.2,
+// 2.3, 2.4 and 2.5 in the RISC-V Instruction Set Manual
+// Volume II: Privileged Architecture.
+
+//===--------------------------
+// User Trap Setup
+//===--------------------------
+def : SysReg<"ustatus", 0x000>;
+def : SysReg<"uie", 0x004>;
+def : SysReg<"utvec", 0x005>;
+
+//===--------------------------
+// User Trap Handling
+//===--------------------------
+def : SysReg<"uscratch", 0x040>;
+def : SysReg<"uepc", 0x041>;
+def : SysReg<"ucause", 0x042>;
+def : SysReg<"utval", 0x043>;
+def : SysReg<"uip", 0x044>;
+
+//===--------------------------
+// User Floating-Point CSRs
+//===--------------------------
+
+let FeaturesRequired = [{ {RISCV::FeatureStdExtF} }] in {
+def : SysReg<"fflags", 0x001>;
+def : SysReg<"frm", 0x002>;
+def : SysReg<"fcsr", 0x003>;
+}
+
+//===--------------------------
+// User Counter/Timers
+//===--------------------------
+def : SysReg<"cycle", 0xC00>;
+def : SysReg<"time", 0xC01>;
+def : SysReg<"instret", 0xC02>;
+
+def : SysReg<"hpmcounter3", 0xC03>;
+def : SysReg<"hpmcounter4", 0xC04>;
+def : SysReg<"hpmcounter5", 0xC05>;
+def : SysReg<"hpmcounter6", 0xC06>;
+def : SysReg<"hpmcounter7", 0xC07>;
+def : SysReg<"hpmcounter8", 0xC08>;
+def : SysReg<"hpmcounter9", 0xC09>;
+def : SysReg<"hpmcounter10", 0xC0A>;
+def : SysReg<"hpmcounter11", 0xC0B>;
+def : SysReg<"hpmcounter12", 0xC0C>;
+def : SysReg<"hpmcounter13", 0xC0D>;
+def : SysReg<"hpmcounter14", 0xC0E>;
+def : SysReg<"hpmcounter15", 0xC0F>;
+def : SysReg<"hpmcounter16", 0xC10>;
+def : SysReg<"hpmcounter17", 0xC11>;
+def : SysReg<"hpmcounter18", 0xC12>;
+def : SysReg<"hpmcounter19", 0xC13>;
+def : SysReg<"hpmcounter20", 0xC14>;
+def : SysReg<"hpmcounter21", 0xC15>;
+def : SysReg<"hpmcounter22", 0xC16>;
+def : SysReg<"hpmcounter23", 0xC17>;
+def : SysReg<"hpmcounter24", 0xC18>;
+def : SysReg<"hpmcounter25", 0xC19>;
+def : SysReg<"hpmcounter26", 0xC1A>;
+def : SysReg<"hpmcounter27", 0xC1B>;
+def : SysReg<"hpmcounter28", 0xC1C>;
+def : SysReg<"hpmcounter29", 0xC1D>;
+def : SysReg<"hpmcounter30", 0xC1E>;
+def : SysReg<"hpmcounter31", 0xC1F>;
+
+let isRV32Only = 1 in {
+def: SysReg<"cycleh", 0xC80>;
+def: SysReg<"timeh", 0xC81>;
+def: SysReg<"instreth", 0xC82>;
+
+def: SysReg<"hpmcounter3h", 0xC83>;
+def: SysReg<"hpmcounter4h", 0xC84>;
+def: SysReg<"hpmcounter5h", 0xC85>;
+def: SysReg<"hpmcounter6h", 0xC86>;
+def: SysReg<"hpmcounter7h", 0xC87>;
+def: SysReg<"hpmcounter8h", 0xC88>;
+def: SysReg<"hpmcounter9h", 0xC89>;
+def: SysReg<"hpmcounter10h", 0xC8A>;
+def: SysReg<"hpmcounter11h", 0xC8B>;
+def: SysReg<"hpmcounter12h", 0xC8C>;
+def: SysReg<"hpmcounter13h", 0xC8D>;
+def: SysReg<"hpmcounter14h", 0xC8E>;
+def: SysReg<"hpmcounter15h", 0xC8F>;
+def: SysReg<"hpmcounter16h", 0xC90>;
+def: SysReg<"hpmcounter17h", 0xC91>;
+def: SysReg<"hpmcounter18h", 0xC92>;
+def: SysReg<"hpmcounter19h", 0xC93>;
+def: SysReg<"hpmcounter20h", 0xC94>;
+def: SysReg<"hpmcounter21h", 0xC95>;
+def: SysReg<"hpmcounter22h", 0xC96>;
+def: SysReg<"hpmcounter23h", 0xC97>;
+def: SysReg<"hpmcounter24h", 0xC98>;
+def: SysReg<"hpmcounter25h", 0xC99>;
+def: SysReg<"hpmcounter26h", 0xC9A>;
+def: SysReg<"hpmcounter27h", 0xC9B>;
+def: SysReg<"hpmcounter28h", 0xC9C>;
+def: SysReg<"hpmcounter29h", 0xC9D>;
+def: SysReg<"hpmcounter30h", 0xC9E>;
+def: SysReg<"hpmcounter31h", 0xC9F>;
+}
+
+//===--------------------------
+// Supervisor Trap Setup
+//===--------------------------
+def : SysReg<"sstatus", 0x100>;
+def : SysReg<"sedeleg", 0x102>;
+def : SysReg<"sideleg", 0x103>;
+def : SysReg<"sie", 0x104>;
+def : SysReg<"stvec", 0x105>;
+def : SysReg<"scounteren", 0x106>;
+
+//===--------------------------
+// Supervisor Trap Handling
+//===--------------------------
+def : SysReg<"sscratch", 0x140>;
+def : SysReg<"sepc", 0x141>;
+def : SysReg<"scause", 0x142>;
+def : SysReg<"stval", 0x143>;
+def : SysReg<"sip", 0x144>;
+
+//===-------------------------------------
+// Supervisor Protection and Translation
+//===-------------------------------------
+def : SysReg<"satp", 0x180>;
+
+//===-----------------------------
+// Machine Information Registers
+//===-----------------------------
+
+def : SysReg<"mvendorid", 0xF11>;
+def : SysReg<"marchid", 0xF12>;
+def : SysReg<"mimpid", 0xF13>;
+def : SysReg<"mhartid", 0xF14>;
+
+//===-----------------------------
+// Machine Trap Setup
+//===-----------------------------
+def : SysReg<"mstatus", 0x300>;
+def : SysReg<"misa", 0x301>;
+def : SysReg<"medeleg", 0x302>;
+def : SysReg<"mideleg", 0x303>;
+def : SysReg<"mie", 0x304>;
+def : SysReg<"mtvec", 0x305>;
+def : SysReg<"mcounteren", 0x306>;
+
+//===-----------------------------
+// Machine Trap Handling
+//===-----------------------------
+def : SysReg<"mscratch", 0x340>;
+def : SysReg<"mepc", 0x341>;
+def : SysReg<"mcause", 0x342>;
+def : SysReg<"mtval", 0x343>;
+def : SysReg<"mip", 0x344>;
+
+//===----------------------------------
+// Machine Protection and Translation
+//===----------------------------------
+def : SysReg<"pmpcfg0", 0x3A0>;
+def : SysReg<"pmpcfg2", 0x3A2>;
+let isRV32Only = 1 in {
+def : SysReg<"pmpcfg1", 0x3A1>;
+def : SysReg<"pmpcfg3", 0x3A3>;
+}
+
+def : SysReg<"pmpaddr0", 0x3B0>;
+def : SysReg<"pmpaddr1", 0x3B1>;
+def : SysReg<"pmpaddr2", 0x3B2>;
+def : SysReg<"pmpaddr3", 0x3B3>;
+def : SysReg<"pmpaddr4", 0x3B4>;
+def : SysReg<"pmpaddr5", 0x3B5>;
+def : SysReg<"pmpaddr6", 0x3B6>;
+def : SysReg<"pmpaddr7", 0x3B7>;
+def : SysReg<"pmpaddr8", 0x3B8>;
+def : SysReg<"pmpaddr9", 0x3B9>;
+def : SysReg<"pmpaddr10", 0x3BA>;
+def : SysReg<"pmpaddr11", 0x3BB>;
+def : SysReg<"pmpaddr12", 0x3BC>;
+def : SysReg<"pmpaddr13", 0x3BD>;
+def : SysReg<"pmpaddr14", 0x3BE>;
+def : SysReg<"pmpaddr15", 0x3BF>;
+
+
+//===--------------------------
+// Machine Counter and Timers
+//===--------------------------
+def : SysReg<"mcycle", 0xB00>;
+def : SysReg<"minstret", 0xB02>;
+
+def : SysReg<"mhpmcounter3", 0xB03>;
+def : SysReg<"mhpmcounter4", 0xB04>;
+def : SysReg<"mhpmcounter5", 0xB05>;
+def : SysReg<"mhpmcounter6", 0xB06>;
+def : SysReg<"mhpmcounter7", 0xB07>;
+def : SysReg<"mhpmcounter8", 0xB08>;
+def : SysReg<"mhpmcounter9", 0xB09>;
+def : SysReg<"mhpmcounter10", 0xB0A>;
+def : SysReg<"mhpmcounter11", 0xB0B>;
+def : SysReg<"mhpmcounter12", 0xB0C>;
+def : SysReg<"mhpmcounter13", 0xB0D>;
+def : SysReg<"mhpmcounter14", 0xB0E>;
+def : SysReg<"mhpmcounter15", 0xB0F>;
+def : SysReg<"mhpmcounter16", 0xB10>;
+def : SysReg<"mhpmcounter17", 0xB11>;
+def : SysReg<"mhpmcounter18", 0xB12>;
+def : SysReg<"mhpmcounter19", 0xB13>;
+def : SysReg<"mhpmcounter20", 0xB14>;
+def : SysReg<"mhpmcounter21", 0xB15>;
+def : SysReg<"mhpmcounter22", 0xB16>;
+def : SysReg<"mhpmcounter23", 0xB17>;
+def : SysReg<"mhpmcounter24", 0xB18>;
+def : SysReg<"mhpmcounter25", 0xB19>;
+def : SysReg<"mhpmcounter26", 0xB1A>;
+def : SysReg<"mhpmcounter27", 0xB1B>;
+def : SysReg<"mhpmcounter28", 0xB1C>;
+def : SysReg<"mhpmcounter29", 0xB1D>;
+def : SysReg<"mhpmcounter30", 0xB1E>;
+def : SysReg<"mhpmcounter31", 0xB1F>;
+
+let isRV32Only = 1 in {
+def: SysReg<"mcycleh", 0xB80>;
+def: SysReg<"minstreth", 0xB82>;
+
+def: SysReg<"mhpmcounter3h", 0xB83>;
+def: SysReg<"mhpmcounter4h", 0xB84>;
+def: SysReg<"mhpmcounter5h", 0xB85>;
+def: SysReg<"mhpmcounter6h", 0xB86>;
+def: SysReg<"mhpmcounter7h", 0xB87>;
+def: SysReg<"mhpmcounter8h", 0xB88>;
+def: SysReg<"mhpmcounter9h", 0xB89>;
+def: SysReg<"mhpmcounter10h", 0xB8A>;
+def: SysReg<"mhpmcounter11h", 0xB8B>;
+def: SysReg<"mhpmcounter12h", 0xB8C>;
+def: SysReg<"mhpmcounter13h", 0xB8D>;
+def: SysReg<"mhpmcounter14h", 0xB8E>;
+def: SysReg<"mhpmcounter15h", 0xB8F>;
+def: SysReg<"mhpmcounter16h", 0xB90>;
+def: SysReg<"mhpmcounter17h", 0xB91>;
+def: SysReg<"mhpmcounter18h", 0xB92>;
+def: SysReg<"mhpmcounter19h", 0xB93>;
+def: SysReg<"mhpmcounter20h", 0xB94>;
+def: SysReg<"mhpmcounter21h", 0xB95>;
+def: SysReg<"mhpmcounter22h", 0xB96>;
+def: SysReg<"mhpmcounter23h", 0xB97>;
+def: SysReg<"mhpmcounter24h", 0xB98>;
+def: SysReg<"mhpmcounter25h", 0xB99>;
+def: SysReg<"mhpmcounter26h", 0xB9A>;
+def: SysReg<"mhpmcounter27h", 0xB9B>;
+def: SysReg<"mhpmcounter28h", 0xB9C>;
+def: SysReg<"mhpmcounter29h", 0xB9D>;
+def: SysReg<"mhpmcounter30h", 0xB9E>;
+def: SysReg<"mhpmcounter31h", 0xB9F>;
+}
+
+//===--------------------------
+// Machine Counter Setup
+//===--------------------------
+def : SysReg<"mhpmevent3", 0x323>;
+def : SysReg<"mhpmevent4", 0x324>;
+def : SysReg<"mhpmevent5", 0x325>;
+def : SysReg<"mhpmevent6", 0x326>;
+def : SysReg<"mhpmevent7", 0x327>;
+def : SysReg<"mhpmevent8", 0x328>;
+def : SysReg<"mhpmevent9", 0x329>;
+def : SysReg<"mhpmevent10", 0x32A>;
+def : SysReg<"mhpmevent11", 0x32B>;
+def : SysReg<"mhpmevent12", 0x32C>;
+def : SysReg<"mhpmevent13", 0x32D>;
+def : SysReg<"mhpmevent14", 0x32E>;
+def : SysReg<"mhpmevent15", 0x32F>;
+def : SysReg<"mhpmevent16", 0x330>;
+def : SysReg<"mhpmevent17", 0x331>;
+def : SysReg<"mhpmevent18", 0x332>;
+def : SysReg<"mhpmevent19", 0x333>;
+def : SysReg<"mhpmevent20", 0x334>;
+def : SysReg<"mhpmevent21", 0x335>;
+def : SysReg<"mhpmevent22", 0x336>;
+def : SysReg<"mhpmevent23", 0x337>;
+def : SysReg<"mhpmevent24", 0x338>;
+def : SysReg<"mhpmevent25", 0x339>;
+def : SysReg<"mhpmevent26", 0x33A>;
+def : SysReg<"mhpmevent27", 0x33B>;
+def : SysReg<"mhpmevent28", 0x33C>;
+def : SysReg<"mhpmevent29", 0x33D>;
+def : SysReg<"mhpmevent30", 0x33E>;
+def : SysReg<"mhpmevent31", 0x33F>;
+
+//===-----------------------------------------------
+// Debug/ Trace Registers (shared with Debug Mode)
+//===-----------------------------------------------
+def : SysReg<"tselect", 0x7A0>;
+def : SysReg<"tdata1", 0x7A1>;
+def : SysReg<"tdata2", 0x7A2>;
+def : SysReg<"tdata3", 0x7A3>;
+
+//===-----------------------------------------------
+// Debug Mode Registers
+//===-----------------------------------------------
+def : SysReg<"dcsr", 0x7B0>;
+def : SysReg<"dpc", 0x7B1>;
+def : SysReg<"dscratch", 0x7B2>;
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index a2ebf5bf3e6b..8937ec200bd7 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,6 +27,8 @@ using namespace llvm;
extern "C" void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+ auto PR = PassRegistry::getPassRegistry();
+ initializeRISCVExpandPseudoPass(*PR);
}
static std::string computeDataLayout(const Triple &TT) {
@@ -45,12 +47,6 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -59,7 +55,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<RISCVELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
@@ -78,6 +74,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
+ void addPreEmitPass2() override;
void addPreRegAlloc() override;
};
}
@@ -99,6 +96,13 @@ bool RISCVPassConfig::addInstSelector() {
void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
+void RISCVPassConfig::addPreEmitPass2() {
+ // Schedule the expansion of AMOs at the last possible moment, avoiding the
+ // possibility for other passes to break the requirements for forward
+ // progress in the LR/SC block.
+ addPass(createRISCVExpandPseudoPass());
+}
+
void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVMergeBaseOffsetOptPass());
}
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
new file mode 100644
index 000000000000..964af1f74cec
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
@@ -0,0 +1,9 @@
+#include "RISCVBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace RISCVSysReg {
+#define GET_SysRegsList_IMPL
+#include "RISCVGenSystemOperands.inc"
+} // namespace RISCVSysReg
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index b278a2ed3903..372e0e80bbaf 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -14,9 +14,10 @@
#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
-#include "RISCVMCTargetDesc.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/SubtargetFeature.h"
namespace llvm {
@@ -38,9 +39,10 @@ enum {
InstFormatCIW = 11,
InstFormatCL = 12,
InstFormatCS = 13,
- InstFormatCB = 14,
- InstFormatCJ = 15,
- InstFormatOther = 16,
+ InstFormatCA = 14,
+ InstFormatCB = 15,
+ InstFormatCJ = 16,
+ InstFormatOther = 17,
InstFormatMask = 31
};
@@ -104,7 +106,53 @@ inline static RoundingMode stringToRoundingMode(StringRef Str) {
.Case("dyn", RISCVFPRndMode::DYN)
.Default(RISCVFPRndMode::Invalid);
}
+
+inline static bool isValidRoundingMode(unsigned Mode) {
+ switch (Mode) {
+ default:
+ return false;
+ case RISCVFPRndMode::RNE:
+ case RISCVFPRndMode::RTZ:
+ case RISCVFPRndMode::RDN:
+ case RISCVFPRndMode::RUP:
+ case RISCVFPRndMode::RMM:
+ case RISCVFPRndMode::DYN:
+ return true;
+ }
+}
} // namespace RISCVFPRndMode
+
+namespace RISCVSysReg {
+struct SysReg {
+ const char *Name;
+ unsigned Encoding;
+ // FIXME: add these additional fields when needed.
+ // Privilege Access: Read, Write, Read-Only.
+ // unsigned ReadWrite;
+ // Privilege Mode: User, System or Machine.
+ // unsigned Mode;
+ // Check field name.
+ // unsigned Extra;
+ // Register number without the privilege bits.
+ // unsigned Number;
+ FeatureBitset FeaturesRequired;
+ bool isRV32Only;
+
+ bool haveRequiredFeatures(FeatureBitset ActiveFeatures) const {
+ // Not in 32-bit mode.
+ if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit])
+ return false;
+ // No required feature associated with the system register.
+ if (FeaturesRequired.none())
+ return true;
+ return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ }
+};
+
+#define GET_SysRegsList_DECL
+#include "RISCVGenSystemOperands.inc"
+} // end namespace RISCVSysReg
+
} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
new file mode 100644
index 000000000000..3dc298246bc5
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
@@ -0,0 +1,79 @@
+//===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMatInt.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdint>
+
+namespace llvm {
+
+namespace RISCVMatInt {
+void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
+ if (isInt<32>(Val)) {
+ // Depending on the active bits in the immediate Value v, the following
+ // instruction sequences are emitted:
+ //
+ // v == 0 : ADDI
+ // v[0,12) != 0 && v[12,32) == 0 : ADDI
+ // v[0,12) == 0 && v[12,32) != 0 : LUI
+ // v[0,32) != 0 : LUI+ADDI(W)
+ int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
+ int64_t Lo12 = SignExtend64<12>(Val);
+
+ if (Hi20)
+ Res.push_back(Inst(RISCV::LUI, Hi20));
+
+ if (Lo12 || Hi20 == 0) {
+ unsigned AddiOpc = (Is64Bit && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
+ Res.push_back(Inst(AddiOpc, Lo12));
+ }
+ return;
+ }
+
+ assert(Is64Bit && "Can't emit >32-bit imm for non-RV64 target");
+
+ // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
+ // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
+ // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
+ // while the following ADDI instructions contribute up to 12 bits each.
+ //
+ // On the first glance, implementing this seems to be possible by simply
+ // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
+ // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
+ // fact that ADDI performs a sign extended addition, doing it like that would
+ // only be possible when at most 11 bits of the ADDI instructions are used.
+ // Using all 12 bits of the ADDI instructions, like done by GAS, actually
+ // requires that the constant is processed starting with the least significant
+ // bit.
+ //
+ // In the following, constants are processed from LSB to MSB but instruction
+ // emission is performed from MSB to LSB by recursively calling
+ // generateInstSeq. In each recursion, first the lowest 12 bits are removed
+ // from the constant and the optimal shift amount, which can be greater than
+ // 12 bits if the constant is sparse, is determined. Then, the shifted
+ // remaining constant is processed recursively and gets emitted as soon as it
+ // fits into 32 bits. The emission of the shifts and additions is subsequently
+ // performed when the recursion returns.
+
+ int64_t Lo12 = SignExtend64<12>(Val);
+ int64_t Hi52 = (Val + 0x800) >> 12;
+ int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
+ Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
+
+ generateInstSeq(Hi52, Is64Bit, Res);
+
+ Res.push_back(Inst(RISCV::SLLI, ShiftAmount));
+ if (Lo12)
+ Res.push_back(Inst(RISCV::ADDI, Lo12));
+}
+} // namespace RISCVMatInt
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h
new file mode 100644
index 000000000000..49d1d89adc7a
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h
@@ -0,0 +1,36 @@
+//===- RISCVMatInt.h - Immediate materialisation ---------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MATINT_H
+#define LLVM_LIB_TARGET_RISCV_MATINT_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MachineValueType.h"
+#include <cstdint>
+
+namespace llvm {
+
+namespace RISCVMatInt {
+struct Inst {
+ unsigned Opc;
+ int64_t Imm;
+
+ Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
+};
+using InstSeq = SmallVector<Inst, 8>;
+
+// Helper to generate an instruction sequence that will materialise the given
+// immediate value into a register. A sequence of instructions represented by
+// a simple struct produced rather than directly emitting the instructions in
+// order to allow this helper to be used from both the MC layer and during
+// instruction selection.
+void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res);
+} // namespace RISCVMatInt
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 35f52f7d279b..691421e533ea 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -78,6 +78,8 @@ class SparcAsmParser : public MCTargetAsmParser {
// Custom parse functions for Sparc specific operands.
OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
+ OperandMatchResultTy parseMembarTag(OperandVector &Operands);
+
OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
OperandMatchResultTy
@@ -256,6 +258,7 @@ public:
bool isMem() const override { return isMEMrr() || isMEMri(); }
bool isMEMrr() const { return Kind == k_MemoryReg; }
bool isMEMri() const { return Kind == k_MemoryImm; }
+ bool isMembarTag() const { return Kind == k_Immediate; }
bool isIntReg() const {
return (Kind == k_Register && Reg.Kind == rk_IntReg);
@@ -366,6 +369,12 @@ public:
addExpr(Inst, Expr);
}
+ void addMembarTagOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCExpr *Expr = getImm();
+ addExpr(Inst, Expr);
+ }
+
static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) {
auto Op = make_unique<SparcOperand>(k_Token);
Op->Tok.Data = Str.data();
@@ -742,6 +751,52 @@ SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const MCExpr *EVal;
+ int64_t ImmVal = 0;
+
+ std::unique_ptr<SparcOperand> Mask;
+ if (parseSparcAsmOperand(Mask) == MatchOperand_Success) {
+ if (!Mask->isImm() || !Mask->getImm()->evaluateAsAbsolute(ImmVal) ||
+ ImmVal < 0 || ImmVal > 127) {
+ Error(S, "invalid membar mask number");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ while (getLexer().getKind() == AsmToken::Hash) {
+ SMLoc TagStart = getLexer().getLoc();
+ Parser.Lex(); // Eat the '#'.
+ unsigned MaskVal = StringSwitch<unsigned>(Parser.getTok().getString())
+ .Case("LoadLoad", 0x1)
+ .Case("StoreLoad", 0x2)
+ .Case("LoadStore", 0x4)
+ .Case("StoreStore", 0x8)
+ .Case("Lookaside", 0x10)
+ .Case("MemIssue", 0x20)
+ .Case("Sync", 0x40)
+ .Default(0);
+
+ Parser.Lex(); // Eat the identifier token.
+
+ if (!MaskVal) {
+ Error(TagStart, "unknown membar tag");
+ return MatchOperand_ParseFail;
+ }
+
+ ImmVal |= MaskVal;
+
+ if (getLexer().getKind() == AsmToken::Pipe)
+ Parser.Lex(); // Eat the '|'.
+ }
+
+ EVal = MCConstantExpr::create(ImmVal, getContext());
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(SparcOperand::CreateImm(EVal, S, E));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 3e30dae1537f..0045e63a824e 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -11,9 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
-#include "SparcRegisterInfo.h"
-#include "SparcSubtarget.h"
+#include "MCTargetDesc/SparcMCTargetDesc.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index c1512cbdc44f..d152efae6d1f 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -195,3 +195,26 @@ bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
return true;
}
+
+void SparcInstPrinter::printMembarTag(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ static const char *const TagNames[] = {
+ "#LoadLoad", "#StoreLoad", "#LoadStore", "#StoreStore",
+ "#Lookaside", "#MemIssue", "#Sync"};
+
+ unsigned Imm = MI->getOperand(opNum).getImm();
+
+ if (Imm > 127) {
+ O << Imm;
+ return;
+ }
+
+ bool First = true;
+ for (unsigned i = 0; i < sizeof(TagNames) / sizeof(char *); i++) {
+ if (Imm & (1 << i)) {
+ O << (First ? "" : " | ") << TagNames[i];
+ First = false;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 6f06d1ddae32..89015eb137c2 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -49,6 +49,8 @@ public:
raw_ostream &OS);
bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &OS);
+ void printMembarTag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/LeonFeatures.td b/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
index a7dea068cb11..61e5f16e0a1e 100755
--- a/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
+++ b/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
@@ -58,3 +58,7 @@ def FixAllFDIVSQRT : SubtargetFeature<
"true",
"LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store"
>;
+
+def LeonCycleCounter
+ : SubtargetFeature<"leoncyclecounter", "HasLeonCycleCounter", "true",
+ "Use the Leon cycle counter register">;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 5f5e2ef7d45a..d7f1e3a1ab1d 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -100,6 +100,20 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
}
}
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ return 4;
+ case FK_Data_1:
+ return 1;
+ case FK_Data_2:
+ return 2;
+ case FK_Data_8:
+ return 8;
+ }
+}
+
namespace {
class SparcAsmBackend : public MCAsmBackend {
protected:
@@ -290,13 +304,13 @@ namespace {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
unsigned Offset = Fixup.getOffset();
-
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the
// appropriate bitfields above.
- for (unsigned i = 0; i != 4; ++i) {
- unsigned Idx = Endian == support::little ? i : 3 - i;
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i;
Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index f736a37a266c..4ddb72643a91 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -13,11 +13,11 @@
//===----------------------------------------------------------------------===//
#include "SparcMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Object/ELF.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetStreamer.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
index 3b503503abce..8bb418e39ab4 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetStreamer.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
-#define LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCTARGETSTREAMER_H
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCStreamer.h"
@@ -33,7 +33,6 @@ public:
SparcTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
void emitSparcRegisterIgnore(unsigned reg) override;
void emitSparcRegisterScratch(unsigned reg) override;
-
};
// This part is for ELF object output
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
index 2f9b57f76041..0412215be8ab 100644
--- a/contrib/llvm/lib/Target/Sparc/Sparc.td
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -49,6 +49,9 @@ def FeatureVIS3
def FeatureLeon
: SubtargetFeature<"leon", "IsLeon", "true",
"Enable LEON extensions">;
+def FeaturePWRPSR
+ : SubtargetFeature<"leonpwrpsr", "HasPWRPSR", "true",
+ "Enable the PWRPSR instruction">;
def FeatureHardQuad
: SubtargetFeature<"hard-quad-float", "HasHardQuad", "true",
@@ -159,7 +162,8 @@ def : Processor<"leon4", LEON4Itineraries,
// LEON 4 FT (GR740)
// TO DO: Place-holder: Processor specific features will be added *very* soon here.
def : Processor<"gr740", LEON4Itineraries,
- [FeatureLeon, UMACSMACSupport, LeonCASA]>;
+ [FeatureLeon, UMACSMACSupport, LeonCASA, LeonCycleCounter,
+ FeaturePWRPSR]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 19fb94534b25..5f0e359a3b00 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -14,10 +14,10 @@
#include "InstPrinter/SparcInstPrinter.h"
#include "MCTargetDesc/SparcMCExpr.h"
+#include "MCTargetDesc/SparcTargetStreamer.h"
#include "Sparc.h"
#include "SparcInstrInfo.h"
#include "SparcTargetMachine.h"
-#include "SparcTargetStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index b04c6b112682..ae2257618a55 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -780,6 +780,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
const unsigned StackOffset = 92;
bool hasStructRetAttr = false;
+ unsigned SRetArgSize = 0;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
i != e;
@@ -824,6 +825,11 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(
DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
hasStructRetAttr = true;
+ // sret only allowed on first argument
+ assert(Outs[realArgIdx].OrigArgIndex == 0);
+ PointerType *Ty = cast<PointerType>(CLI.getArgs()[0].Ty);
+ Type *ElementTy = Ty->getElementType();
+ SRetArgSize = DAG.getDataLayout().getTypeAllocSize(ElementTy);
continue;
}
@@ -846,12 +852,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (VA.getLocVT() == MVT::f64) {
// Move from the float value from float registers into the
// integer registers.
-
- // TODO: The f64 -> v2i32 conversion is super-inefficient for
- // constants: it sticks them in the constant pool, then loads
- // to a fp register, then stores to temp memory, then loads to
- // integer registers.
- Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg))
+ Arg = bitcastConstantFPToInt(C, dl, DAG);
+ else
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
}
SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
@@ -932,7 +936,6 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
}
- unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
// If the callee is a GlobalAddress node (quite common, every direct call is)
@@ -1032,51 +1035,6 @@ unsigned SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT,
report_fatal_error("Invalid register name global variable");
}
-// This functions returns true if CalleeName is a ABI function that returns
-// a long double (fp128).
-static bool isFP128ABICall(const char *CalleeName)
-{
- static const char *const ABICalls[] =
- { "_Q_add", "_Q_sub", "_Q_mul", "_Q_div",
- "_Q_sqrt", "_Q_neg",
- "_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq",
- "_Q_lltoq", "_Q_ulltoq",
- nullptr
- };
- for (const char * const *I = ABICalls; *I != nullptr; ++I)
- if (strcmp(CalleeName, *I) == 0)
- return true;
- return false;
-}
-
-unsigned
-SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
-{
- const Function *CalleeFn = nullptr;
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- CalleeFn = dyn_cast<Function>(G->getGlobal());
- } else if (ExternalSymbolSDNode *E =
- dyn_cast<ExternalSymbolSDNode>(Callee)) {
- const Function &F = DAG.getMachineFunction().getFunction();
- const Module *M = F.getParent();
- const char *CalleeName = E->getSymbol();
- CalleeFn = M->getFunction(CalleeName);
- if (!CalleeFn && isFP128ABICall(CalleeName))
- return 16; // Return sizeof(fp128)
- }
-
- if (!CalleeFn)
- return 0;
-
- // It would be nice to check for the sret attribute on CalleeFn here,
- // but since it is not part of the function type, any check will misfire.
-
- PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
- Type *ElementTy = Ty->getElementType();
- return DAG.getDataLayout().getTypeAllocSize(ElementTy);
-}
-
-
// Fixup floating point arguments in the ... part of a varargs call.
//
// The SPARC v9 ABI requires that floating point arguments are treated the same
@@ -1587,9 +1545,6 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
- setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
- setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
-
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
@@ -1841,6 +1796,13 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMUL, MVT::f32, Promote);
}
+ // Custom combine bitcast between f64 and v2i32
+ if (!Subtarget->is64Bit())
+ setTargetDAGCombine(ISD::BITCAST);
+
+ if (Subtarget->hasLeonCycleCounter())
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setMinFunctionAlignment(2);
@@ -1863,8 +1825,6 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
- case SPISD::EH_SJLJ_SETJMP: return "SPISD::EH_SJLJ_SETJMP";
- case SPISD::EH_SJLJ_LONGJMP: return "SPISD::EH_SJLJ_LONGJMP";
case SPISD::Hi: return "SPISD::Hi";
case SPISD::Lo: return "SPISD::Lo";
case SPISD::FTOI: return "SPISD::FTOI";
@@ -1906,8 +1866,8 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
case SPISD::SELECT_ICC:
case SPISD::SELECT_XCC:
case SPISD::SELECT_FCC:
- DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1);
- DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1);
+ Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
@@ -2537,20 +2497,6 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
}
-SDValue SparcTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI) const {
- SDLoc DL(Op);
- return DAG.getNode(SPISD::EH_SJLJ_SETJMP, DL,
- DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1));
-
-}
-
-SDValue SparcTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI) const {
- SDLoc DL(Op);
- return DAG.getNode(SPISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1));
-}
-
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI) {
MachineFunction &MF = DAG.getMachineFunction();
@@ -2666,7 +2612,8 @@ static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
}
static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
- const SparcSubtarget *Subtarget) {
+ const SparcSubtarget *Subtarget,
+ bool AlwaysFlush = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setFrameAddressIsTaken(true);
@@ -2676,17 +2623,11 @@ static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
unsigned stackBias = Subtarget->getStackPointerBias();
SDValue FrameAddr;
-
- if (depth == 0) {
- FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
- if (Subtarget->is64Bit())
- FrameAddr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
- DAG.getIntPtrConstant(stackBias, dl));
- return FrameAddr;
- }
+ SDValue Chain;
// flush first to make sure the windowed registers' values are in stack
- SDValue Chain = getFLUSHW(Op, DAG);
+ Chain = (depth || AlwaysFlush) ? getFLUSHW(Op, DAG) : DAG.getEntryNode();
+
FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
unsigned Offset = (Subtarget->is64Bit()) ? (stackBias + 112) : 56;
@@ -2735,7 +2676,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
}
// Need frame address to find return address of the caller.
- SDValue FrameAddr = getFRAMEADDR(depth - 1, Op, DAG, Subtarget);
+ SDValue FrameAddr = getFRAMEADDR(depth - 1, Op, DAG, Subtarget, true);
unsigned Offset = (Subtarget->is64Bit()) ? 120 : 60;
SDValue Ptr = DAG.getNode(ISD::ADD,
@@ -3085,8 +3026,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
hasHardQuad);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,
hasHardQuad);
- case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG, *this);
- case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG, *this);
case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
@@ -3120,6 +3059,40 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
+SDValue SparcTargetLowering::bitcastConstantFPToInt(ConstantFPSDNode *C,
+ const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ APInt V = C->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(V.zextOrTrunc(32), DL, MVT::i32);
+ SDValue Hi = DAG.getConstant(V.lshr(32).zextOrTrunc(32), DL, MVT::i32);
+ if (DAG.getDataLayout().isLittleEndian())
+ std::swap(Lo, Hi);
+ return DAG.getBuildVector(MVT::v2i32, DL, {Hi, Lo});
+}
+
+SDValue SparcTargetLowering::PerformBITCASTCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDLoc dl(N);
+ SDValue Src = N->getOperand(0);
+
+ if (isa<ConstantFPSDNode>(Src) && N->getSimpleValueType(0) == MVT::v2i32 &&
+ Src.getSimpleValueType() == MVT::f64)
+ return bitcastConstantFPToInt(cast<ConstantFPSDNode>(Src), dl, DCI.DAG);
+
+ return SDValue();
+}
+
+SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BITCAST:
+ return PerformBITCASTCombine(N, DCI);
+ }
+ return SDValue();
+}
+
MachineBasicBlock *
SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -3135,13 +3108,6 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case SP::SELECT_CC_DFP_FCC:
case SP::SELECT_CC_QFP_FCC:
return expandSelectCC(MI, BB, SP::FBCOND);
- case SP::EH_SJLJ_SETJMP32ri:
- case SP::EH_SJLJ_SETJMP32rr:
- return emitEHSjLjSetJmp(MI, BB);
- case SP::EH_SJLJ_LONGJMP32rr:
- case SP::EH_SJLJ_LONGJMP32ri:
- return emitEHSjLjLongJmp(MI, BB);
-
}
}
@@ -3201,205 +3167,6 @@ SparcTargetLowering::expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
return SinkMBB;
}
-MachineBasicBlock *
-SparcTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- DebugLoc DL = MI.getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
-
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineInstrBuilder MIB;
-
- MVT PVT = getPointerTy(MF->getDataLayout());
- unsigned RegSize = PVT.getStoreSize();
- assert(PVT == MVT::i32 && "Invalid Pointer Size!");
-
- unsigned Buf = MI.getOperand(0).getReg();
- unsigned JmpLoc = MRI.createVirtualRegister(&SP::IntRegsRegClass);
-
- // TO DO: If we do 64-bit handling, this perhaps should be FLUSHW, not TA 3
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::TRAPri), SP::G0).addImm(3).addImm(SPCC::ICC_A);
-
- // Instruction to restore FP
- const unsigned FP = SP::I6;
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
- .addReg(FP)
- .addReg(Buf)
- .addImm(0);
-
- // Instruction to load jmp location
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
- .addReg(JmpLoc, RegState::Define)
- .addReg(Buf)
- .addImm(RegSize);
-
- // Instruction to restore SP
- const unsigned SP = SP::O6;
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
- .addReg(SP)
- .addReg(Buf)
- .addImm(2 * RegSize);
-
- // Instruction to restore I7
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
- .addReg(SP::I7)
- .addReg(Buf, RegState::Kill)
- .addImm(3 * RegSize);
-
- // Jump to JmpLoc
- BuildMI(*MBB, MI, DL, TII->get(SP::JMPLrr)).addReg(SP::G0).addReg(JmpLoc, RegState::Kill).addReg(SP::G0);
-
- MI.eraseFromParent();
- return MBB;
-}
-
-MachineBasicBlock *
-SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- DebugLoc DL = MI.getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
-
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineInstrBuilder MIB;
-
- MVT PVT = getPointerTy(MF->getDataLayout());
- unsigned RegSize = PVT.getStoreSize();
- assert(PVT == MVT::i32 && "Invalid Pointer Size!");
-
- unsigned DstReg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
- (void)TRI;
- unsigned mainDstReg = MRI.createVirtualRegister(RC);
- unsigned restoreDstReg = MRI.createVirtualRegister(RC);
-
- // For v = setjmp(buf), we generate
- //
- // thisMBB:
- // buf[0] = FP
- // buf[RegSize] = restoreMBB <-- takes address of restoreMBB
- // buf[RegSize * 2] = O6
- // buf[RegSize * 3] = I7
- // Ensure restoreMBB remains in the relocations list (done using a bn instruction)
- // b mainMBB
- //
- // mainMBB:
- // v_main = 0
- // b sinkMBB
- //
- // restoreMBB:
- // v_restore = 1
- // --fall through--
- //
- // sinkMBB:
- // v = phi(main, restore)
-
- const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator It = ++MBB->getIterator();
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
-
- MF->insert(It, mainMBB);
- MF->insert(It, restoreMBB);
- MF->insert(It, sinkMBB);
- restoreMBB->setHasAddressTaken();
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)),
- MBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- unsigned LabelReg = MRI.createVirtualRegister(&SP::IntRegsRegClass);
- unsigned LabelReg2 = MRI.createVirtualRegister(&SP::IntRegsRegClass);
- unsigned BufReg = MI.getOperand(1).getReg();
-
- // Instruction to store FP
- const unsigned FP = SP::I6;
- MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
- .addReg(BufReg)
- .addImm(0)
- .addReg(FP);
-
- // Instructions to store jmp location
- MIB = BuildMI(thisMBB, DL, TII->get(SP::SETHIi))
- .addReg(LabelReg, RegState::Define)
- .addMBB(restoreMBB, SparcMCExpr::VK_Sparc_HI);
-
- MIB = BuildMI(thisMBB, DL, TII->get(SP::ORri))
- .addReg(LabelReg2, RegState::Define)
- .addReg(LabelReg, RegState::Kill)
- .addMBB(restoreMBB, SparcMCExpr::VK_Sparc_LO);
-
- MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
- .addReg(BufReg)
- .addImm(RegSize)
- .addReg(LabelReg2, RegState::Kill);
-
- // Instruction to store SP
- const unsigned SP = SP::O6;
- MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
- .addReg(BufReg)
- .addImm(2 * RegSize)
- .addReg(SP);
-
- // Instruction to store I7
- MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
- .addReg(BufReg)
- .addImm(3 * RegSize)
- .addReg(SP::I7);
-
-
- // FIX ME: This next instruction ensures that the restoreMBB block address remains
- // valid through optimization passes and serves no other purpose. The ICC_N ensures
- // that the branch is never taken. This commented-out code here was an alternative
- // attempt to achieve this which brought myriad problems.
- //MIB = BuildMI(thisMBB, DL, TII->get(SP::EH_SjLj_Setup)).addMBB(restoreMBB, SparcMCExpr::VK_Sparc_None);
- MIB = BuildMI(thisMBB, DL, TII->get(SP::BCOND))
- .addMBB(restoreMBB)
- .addImm(SPCC::ICC_N);
-
- MIB = BuildMI(thisMBB, DL, TII->get(SP::BCOND))
- .addMBB(mainMBB)
- .addImm(SPCC::ICC_A);
-
- thisMBB->addSuccessor(mainMBB);
- thisMBB->addSuccessor(restoreMBB);
-
-
- // mainMBB:
- MIB = BuildMI(mainMBB, DL, TII->get(SP::ORrr))
- .addReg(mainDstReg, RegState::Define)
- .addReg(SP::G0)
- .addReg(SP::G0);
- MIB = BuildMI(mainMBB, DL, TII->get(SP::BCOND)).addMBB(sinkMBB).addImm(SPCC::ICC_A);
-
- mainMBB->addSuccessor(sinkMBB);
-
-
- // restoreMBB:
- MIB = BuildMI(restoreMBB, DL, TII->get(SP::ORri))
- .addReg(restoreDstReg, RegState::Define)
- .addReg(SP::G0)
- .addImm(1);
- //MIB = BuildMI(restoreMBB, DL, TII->get(SP::BCOND)).addMBB(sinkMBB).addImm(SPCC::ICC_A);
- restoreMBB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- MIB = BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(SP::PHI), DstReg)
- .addReg(mainDstReg).addMBB(mainMBB)
- .addReg(restoreDstReg).addMBB(restoreMBB);
-
- MI.eraseFromParent();
- return sinkMBB;
-}
-
//===----------------------------------------------------------------------===//
// Sparc Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -3494,23 +3261,23 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
else
return std::make_pair(0U, &SP::IntRegsRegClass);
case 'f':
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &SP::FPRegsRegClass);
- else if (VT == MVT::f64)
+ else if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &SP::LowDFPRegsRegClass);
else if (VT == MVT::f128)
return std::make_pair(0U, &SP::LowQFPRegsRegClass);
- llvm_unreachable("Unknown ValueType for f-register-type!");
- break;
+ // This will generate an error message
+ return std::make_pair(0U, nullptr);
case 'e':
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &SP::FPRegsRegClass);
- else if (VT == MVT::f64)
+ else if (VT == MVT::f64 || VT == MVT::i64 )
return std::make_pair(0U, &SP::DFPRegsRegClass);
else if (VT == MVT::f128)
return std::make_pair(0U, &SP::QFPRegsRegClass);
- llvm_unreachable("Unknown ValueType for e-register-type!");
- break;
+ // This will generate an error message
+ return std::make_pair(0U, nullptr);
}
} else if (!Constraint.empty() && Constraint.size() <= 5
&& Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
@@ -3587,7 +3354,16 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
getLibcallName(libCall),
1));
return;
-
+ case ISD::READCYCLECOUNTER: {
+ assert(Subtarget->hasLeonCycleCounter());
+ SDValue Lo = DAG.getCopyFromReg(N->getOperand(0), dl, SP::ASR23, MVT::i32);
+ SDValue Hi = DAG.getCopyFromReg(Lo, dl, SP::G0, MVT::i32);
+ SDValue Ops[] = { Lo, Hi };
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops);
+ Results.push_back(Pair);
+ Results.push_back(N->getOperand(0));
+ return;
+ }
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
// Custom lower only if it involves f128 or i64.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index 0cbbda787881..718851db25bf 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -33,9 +33,6 @@ namespace llvm {
SELECT_XCC, // Select between two values using the current XCC flags.
SELECT_FCC, // Select between two values using the current FCC flags.
- EH_SJLJ_SETJMP, // builtin setjmp operation
- EH_SJLJ_LONGJMP, // builtin longjmp operation
-
Hi, Lo, // Hi/Lo operations, typically on a global address.
FTOI, // FP to Int within a FP register.
@@ -171,12 +168,6 @@ namespace llvm {
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI) const ;
- SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI) const ;
-
- unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
SelectionDAG &DAG) const;
@@ -192,6 +183,13 @@ namespace llvm {
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue PerformBITCASTCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ SDValue bitcastConstantFPToInt(ConstantFPSDNode *C, const SDLoc &DL,
+ SelectionDAG &DAG) const;
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
@@ -213,10 +211,6 @@ namespace llvm {
MachineBasicBlock *expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
unsigned BROpcode) const;
- MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
- MachineBasicBlock *MBB) const;
- MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
- MachineBasicBlock *MBB) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index f6518c936ebc..0b94c6b614eb 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -239,7 +239,7 @@ let Predicates = [Is64Bit] in {
let DecoderMethod = "DecodeLoadInt" in
defm LDX : Load<"ldx", 0b001011, load, I64Regs, i64>;
-let mayLoad = 1, isCodeGenOnly = 1, isAsmParserOnly = 1 in
+let mayLoad = 1, isAsmParserOnly = 1 in
def TLS_LDXrr : F3_1<3, 0b001011,
(outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
"ldx [$addr], $dst, $sym",
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
index 352090ed92c1..35987390d7ba 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -470,10 +470,15 @@ def : InstAlias<"wr $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
def : InstAlias<"wr $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"pwr $rs2, %psr", (PWRPSRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"pwr $simm13, %psr", (PWRPSRri G0, i32imm:$simm13), 0>;
// flush -> flush %g0
def : InstAlias<"flush", (FLUSH), 0>;
+// unimp -> unimp 0
+def : InstAlias<"unimp", (UNIMP 0), 0>;
+
def : MnemonicAlias<"iflush", "flush">;
def : MnemonicAlias<"stub", "stb">;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5b7fb3c485e8..558b37aeebcb 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -56,6 +56,11 @@ def HasHardQuad : Predicate<"Subtarget->hasHardQuad()">;
// instruction
def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
+// HasPWRPSR - This is true when the target processor supports partial
+// writes to the PSR register that only affects the ET field.
+def HasPWRPSR : Predicate<"Subtarget->hasPWRPSR()">,
+ AssemblerPredicate<"FeaturePWRPSR">;
+
// HasUMAC_SMAC - This is true when the target processor supports the
// UMAC and SMAC instructions
def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">;
@@ -89,10 +94,22 @@ def HI22 : SDNodeXForm<imm, [{
MVT::i32);
}]>;
+// Return the complement of a HI22 immediate value.
+def HI22_not : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(unsigned)N->getZExtValue() >> 10, SDLoc(N),
+ MVT::i32);
+}]>;
+
def SETHIimm : PatLeaf<(imm), [{
return isShiftedUInt<22, 10>(N->getZExtValue());
}], HI22>;
+// The N->hasOneUse() prevents the immediate from being instantiated in both
+// normal and complement form.
+def SETHIimm_not : PatLeaf<(i32 imm), [{
+ return N->hasOneUse() && isShiftedUInt<22, 10>(~(unsigned)N->getZExtValue());
+}], HI22_not>;
+
// Addressing modes.
def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
@@ -121,6 +138,16 @@ def MEMri : Operand<iPTR> {
def TLSSym : Operand<iPTR>;
+def SparcMembarTagAsmOperand : AsmOperandClass {
+ let Name = "MembarTag";
+ let ParserMethod = "parseMembarTag";
+}
+
+def MembarTag : Operand<i32> {
+ let PrintMethod = "printMembarTag";
+ let ParserMatchClass = SparcMembarTagAsmOperand;
+}
+
// Branch targets have OtherVT type.
def brtarget : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
@@ -169,9 +196,6 @@ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDTSPtlsld :
SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
-def SDTSPeh_sjlj_setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
-def SDTSPeh_sjlj_longjmp: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-
def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>;
def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
@@ -190,13 +214,6 @@ def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
-def SPsjlj_setjmp: SDNode<"SPISD::EH_SJLJ_SETJMP",
- SDTSPeh_sjlj_setjmp,
- [SDNPHasChain, SDNPSideEffect]>;
-def SPsjlj_longjmp: SDNode<"SPISD::EH_SJLJ_LONGJMP",
- SDTSPeh_sjlj_longjmp,
- [SDNPHasChain, SDNPSideEffect]>;
-
// These are target-independent nodes, but have target-specific formats.
def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -473,27 +490,6 @@ let usesCustomInserter = 1, Uses = [FCC0] in {
[(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>;
}
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
- let Defs = [WIM] in
- def EH_SJLJ_SETJMP32ri : Pseudo<(outs IntRegs:$dst), (ins MEMri:$buf),
- "#EH_SJLJ_SETJMP32",
- [(set i32:$dst, (SPsjlj_setjmp ADDRri:$buf))]>,
- Requires<[Is32Bit]>;
- def EH_SJLJ_SETJMP32rr : Pseudo<(outs IntRegs:$dst), (ins MEMrr:$buf),
- "#EH_SJLJ_SETJMP32",
- [(set i32:$dst, (SPsjlj_setjmp ADDRrr:$buf))]>,
- Requires<[Is32Bit]>;
- let isTerminator = 1 in
- def EH_SJLJ_LONGJMP32ri : Pseudo<(outs), (ins MEMri:$buf),
- "#EH_SJLJ_LONGJMP32",
- [(SPsjlj_longjmp ADDRri:$buf)]>,
- Requires<[Is32Bit]>;
- def EH_SJLJ_LONGJMP32rr : Pseudo<(outs), (ins MEMrr:$buf),
- "#EH_SJLJ_LONGJMP32",
- [(SPsjlj_longjmp ADDRrr:$buf)]>,
- Requires<[Is32Bit]>;
-}
-
// Section B.1 - Load Integer Instructions, p. 90
let DecoderMethod = "DecodeLoadInt" in {
defm LDSB : LoadA<"ldsb", 0b001001, 0b011001, sextloadi8, IntRegs, i32>;
@@ -680,6 +676,12 @@ def XNORri : F3_2<2, 0b000111,
(outs IntRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13),
"xnor $rs1, $simm13, $rd", []>;
+def : Pat<(and IntRegs:$rs1, SETHIimm_not:$rs2),
+ (ANDNrr i32:$rs1, (SETHIi SETHIimm_not:$rs2))>;
+
+def : Pat<(or IntRegs:$rs1, SETHIimm_not:$rs2),
+ (ORNrr i32:$rs1, (SETHIi SETHIimm_not:$rs2))>;
+
let Defs = [ICC] in {
defm ANDCC : F3_12np<"andcc", 0b010001>;
defm ANDNCC : F3_12np<"andncc", 0b010101>;
@@ -1316,7 +1318,7 @@ let Defs = [FCC0], rd = 0, isCodeGenOnly = 1 in {
//===----------------------------------------------------------------------===//
// Instructions for Thread Local Storage(TLS).
//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1, isAsmParserOnly = 1 in {
+let isAsmParserOnly = 1 in {
def TLS_ADDrr : F3_1<2, 0b000000,
(outs IntRegs:$rd),
(ins IntRegs:$rs1, IntRegs:$rs2, TLSSym:$sym),
@@ -1511,7 +1513,7 @@ def : Pat<(ctpop i32:$src),
(POPCrr (SRLri $src, 0))>;
let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
- def MEMBARi : F3_2<2, 0b101000, (outs), (ins simm13Op:$simm13),
+ def MEMBARi : F3_2<2, 0b101000, (outs), (ins MembarTag:$simm13),
"membar $simm13", []>;
// The CAS instruction, unlike other instructions, only comes in a
@@ -1569,6 +1571,17 @@ let Predicates = [HasUMAC_SMAC], Defs = [Y, ASR18], Uses = [Y, ASR18] in {
[], IIC_smac_umac>;
}
+// The partial write WRPSR instruction has a non-zero destination
+// register value to separate it from the standard instruction.
+let Predicates = [HasPWRPSR], Defs = [PSR], rd=1 in {
+ def PWRPSRrr : F3_1<2, 0b110001,
+ (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
+ "pwr $rs1, $rs2, %psr", []>;
+ def PWRPSRri : F3_2<2, 0b110001,
+ (outs), (ins IntRegs:$rs1, simm13Op:$simm13),
+ "pwr $rs1, $simm13, %psr", []>;
+}
+
let Defs = [ICC] in {
defm TADDCC : F3_12np<"taddcc", 0b100000>;
defm TSUBCC : F3_12np<"tsubcc", 0b100001>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index b9647eaa3d51..33caa66154ff 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -95,6 +95,10 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
+ // Reserve ASR1-ASR31
+ for (unsigned n = 0; n < 31; n++)
+ Reserved.set(SP::ASR1 + n);
+
return Reserved;
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 2a279dad5ae2..8dd2569d10de 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -35,8 +35,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index 40c5683f8495..5301fc30a006 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -44,9 +44,11 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
// Leon features
HasLeonCasa = false;
HasUmacSmac = false;
+ HasPWRPSR = false;
InsertNOPLoad = false;
FixAllFDIVSQRT = false;
DetectRoundChange = false;
+ HasLeonCycleCounter = false;
// Determine default and user specified characteristics
std::string CPUName = CPU;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index 588a6765bcdf..24ea41a266e7 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -47,9 +47,11 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
// LEON features
bool HasUmacSmac;
bool HasLeonCasa;
+ bool HasPWRPSR;
bool InsertNOPLoad;
bool FixAllFDIVSQRT;
bool DetectRoundChange;
+ bool HasLeonCycleCounter;
SparcInstrInfo InstrInfo;
SparcTargetLowering TLInfo;
@@ -92,9 +94,11 @@ public:
// Leon options
bool hasUmacSmac() const { return HasUmacSmac; }
bool hasLeonCasa() const { return HasLeonCasa; }
+ bool hasPWRPSR() const { return HasPWRPSR; }
bool insertNOPLoad() const { return InsertNOPLoad; }
bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
bool detectRoundChange() const { return DetectRoundChange; }
+ bool hasLeonCycleCounter() const { return HasLeonCycleCounter; }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 07f9e7250bd9..5b467235f809 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -70,11 +70,16 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
// pic32 PIC_ Medium GOT < 2^32 bytes
//
// All code models require that the text segment is smaller than 2GB.
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
- Reloc::Model RM, bool Is64Bit,
- bool JIT) {
- if (CM)
+static CodeModel::Model
+getEffectiveSparcCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
+ bool Is64Bit, bool JIT) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
+ if (*CM == CodeModel::Kernel)
+ report_fatal_error("Target does not support the kernel CodeModel");
return *CM;
+ }
if (Is64Bit) {
if (JIT)
return CodeModel::Large;
@@ -88,11 +93,11 @@ SparcTargetMachine::SparcTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Optional<Reloc::Model> RM,
Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT, bool is64bit)
- : LLVMTargetMachine(
- T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
- getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), is64bit, JIT),
- OL),
+ : LLVMTargetMachine(T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
+ getEffectiveRelocModel(RM),
+ getEffectiveSparcCodeModel(
+ CM, getEffectiveRelocModel(RM), is64bit, JIT),
+ OL),
TLOF(make_unique<SparcELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this, is64bit), is64Bit(is64bit) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
index b0d76abeba7d..d1eb1d329a4c 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -40,10 +40,6 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
-
- bool isMachineVerifierClean() const override {
- return false;
- }
};
/// Sparc 32-bit target machine
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index bde067d6c129..91959b4151b3 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "InstPrinter/SystemZInstPrinter.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -243,6 +244,11 @@ public:
return Kind == KindImmTLS;
}
+ const ImmTLSOp getImmTLS() const {
+ assert(Kind == KindImmTLS && "Not a TLS immediate");
+ return ImmTLS;
+ }
+
// Memory operands.
bool isMem() const override {
return Kind == KindMem;
@@ -270,6 +276,11 @@ public:
return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x100);
}
+ const MemOp& getMem() const {
+ assert(Kind == KindMem && "Not a Mem operand");
+ return Mem;
+ }
+
// Override MCParsedAsmOperand.
SMLoc getStartLoc() const override { return StartLoc; }
SMLoc getEndLoc() const override { return EndLoc; }
@@ -623,8 +634,61 @@ static struct InsnMatchEntry InsnMatchTable[] = {
{ MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }
};
+static void printMCExpr(const MCExpr *E, raw_ostream &OS) {
+ if (!E)
+ return;
+ if (auto *CE = dyn_cast<MCConstantExpr>(E))
+ OS << *CE;
+ else if (auto *UE = dyn_cast<MCUnaryExpr>(E))
+ OS << *UE;
+ else if (auto *BE = dyn_cast<MCBinaryExpr>(E))
+ OS << *BE;
+ else if (auto *SRE = dyn_cast<MCSymbolRefExpr>(E))
+ OS << *SRE;
+ else
+ OS << *E;
+}
+
void SystemZOperand::print(raw_ostream &OS) const {
- llvm_unreachable("Not implemented");
+ switch (Kind) {
+ break;
+ case KindToken:
+ OS << "Token:" << getToken();
+ break;
+ case KindReg:
+ OS << "Reg:" << SystemZInstPrinter::getRegisterName(getReg());
+ break;
+ case KindImm:
+ OS << "Imm:";
+ printMCExpr(getImm(), OS);
+ break;
+ case KindImmTLS:
+ OS << "ImmTLS:";
+ printMCExpr(getImmTLS().Imm, OS);
+ if (getImmTLS().Sym) {
+ OS << ", ";
+ printMCExpr(getImmTLS().Sym, OS);
+ }
+ break;
+ case KindMem: {
+ const MemOp &Op = getMem();
+ OS << "Mem:" << *cast<MCConstantExpr>(Op.Disp);
+ if (Op.Base) {
+ OS << "(";
+ if (Op.MemKind == BDLMem)
+ OS << *cast<MCConstantExpr>(Op.Length.Imm) << ",";
+ else if (Op.MemKind == BDRMem)
+ OS << SystemZInstPrinter::getRegisterName(Op.Length.Reg) << ",";
+ if (Op.Index)
+ OS << SystemZInstPrinter::getRegisterName(Op.Index) << ",";
+ OS << SystemZInstPrinter::getRegisterName(Op.Base);
+ OS << ")";
+ }
+ break;
+ }
+ case KindInvalid:
+ break;
+ }
}
// Parse one register of the form %<prefix><number>.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index bd99fabb48c9..e2de721be568 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -647,7 +647,7 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
}
// Force static initialization.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 9edd1fc36406..668a77ac014f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -294,11 +294,10 @@ bool SystemZElimCompare::convertToLoadAndTest(
return false;
// Rebuild to get the CC operand in the right place.
- MachineInstr *BuiltMI =
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode));
+ auto MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode));
for (const auto &MO : MI.operands())
- BuiltMI->addOperand(MO);
- BuiltMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.add(MO);
+ MIB.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index b9e5788cf018..8726b56bc94f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -49,14 +49,14 @@ getNumDecoderSlots(SUnit *SU) const {
if (!SC->isValid())
return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
- if (SC->BeginGroup) {
- if (!SC->EndGroup)
- return 2; // Cracked instruction
- else
- return 3; // Expanded/group-alone instruction
- }
-
- return 1; // Normal instruction
+ assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
+ "Only cracked instruction can have 2 uops.");
+ assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
+ "Expanded instructions always group alone.");
+ assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
+ "Expanded instructions fill the group(s).");
+
+ return SC->NumMicroOps;
}
unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
@@ -139,16 +139,21 @@ void SystemZHazardRecognizer::nextGroup() {
LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
LLVM_DEBUG(CurGroupDbg = "";);
- GrpCount++;
+ int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
+ assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
+ "Current decoder group bad.");
// Reset counter for next group.
CurrGroupSize = 0;
CurrGroupHas4RegOps = false;
- // Decrease counters for execution units by one.
+ GrpCount += ((unsigned) NumGroups);
+
+ // Decrease counters for execution units.
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
- if (ProcResourceCounters[i] > 0)
- ProcResourceCounters[i]--;
+ ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
+ ? (ProcResourceCounters[i] - NumGroups)
+ : 0);
// Clear CriticalResourceIdx if it is now below the threshold.
if (CriticalResourceIdx != UINT_MAX &&
@@ -323,13 +328,13 @@ EmitInstruction(SUnit *SU) {
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
- unsigned GroupLim =
- ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
- assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
+ unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
+ assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
+ && "SU does not fit into decoder group!");
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == GroupLim || SC->EndGroup)
+ if (CurrGroupSize >= GroupLim || SC->EndGroup)
nextGroup();
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 5425f1d16e5e..5bc2ab0ef2d8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -71,19 +71,19 @@ struct SystemZAddressingMode {
// True if the address can (and must) include ADJDYNALLOC.
bool isDynAlloc() { return Form == FormBDXDynAlloc; }
- void dump() {
+ void dump(const llvm::SelectionDAG *DAG) {
errs() << "SystemZAddressingMode " << this << '\n';
errs() << " Base ";
if (Base.getNode())
- Base.getNode()->dump();
+ Base.getNode()->dump(DAG);
else
errs() << "null\n";
if (hasIndexField()) {
errs() << " Index ";
if (Index.getNode())
- Index.getNode()->dump();
+ Index.getNode()->dump(DAG);
else
errs() << "null\n";
}
@@ -589,7 +589,7 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
if (AM.isDynAlloc() && !AM.IncludesDynAlloc)
return false;
- LLVM_DEBUG(AM.dump());
+ LLVM_DEBUG(AM.dump(CurDAG));
return true;
}
@@ -728,8 +728,7 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
// The inner check covers all cases but is more expensive.
uint64_t Used = allOnes(Op.getValueSizeInBits());
if (Used != (AndMask | InsertMask)) {
- KnownBits Known;
- CurDAG->computeKnownBits(Op.getOperand(0), Known);
+ KnownBits Known = CurDAG->computeKnownBits(Op.getOperand(0));
if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue()))
return false;
}
@@ -787,8 +786,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
// If some bits of Input are already known zeros, those bits will have
// been removed from the mask. See if adding them back in makes the
// mask suitable.
- KnownBits Known;
- CurDAG->computeKnownBits(Input, Known);
+ KnownBits Known = CurDAG->computeKnownBits(Input);
Mask |= Known.Zero.getZExtValue();
if (!refineRxSBGMask(RxSBG, Mask))
return false;
@@ -811,8 +809,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
// If some bits of Input are already known ones, those bits will have
// been removed from the mask. See if adding them back in makes the
// mask suitable.
- KnownBits Known;
- CurDAG->computeKnownBits(Input, Known);
+ KnownBits Known = CurDAG->computeKnownBits(Input);
Mask &= ~Known.One.getZExtValue();
if (!refineRxSBGMask(RxSBG, Mask))
return false;
@@ -1147,7 +1144,7 @@ bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
return false;
auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
- if (!Load || !Load->hasOneUse())
+ if (!Load || !Load->hasNUsesOfValue(1, 0))
return false;
if (Load->getMemoryVT().getSizeInBits() !=
Load->getValueType(0).getSizeInBits())
@@ -1308,7 +1305,7 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
return false;
case SystemZISD::SSUBO:
NegateOperand = true;
- /* fall through */
+ LLVM_FALLTHROUGH;
case SystemZISD::SADDO:
if (MemVT == MVT::i32)
NewOpc = SystemZ::ASI;
@@ -1319,7 +1316,7 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
break;
case SystemZISD::USUBO:
NegateOperand = true;
- /* fall through */
+ LLVM_FALLTHROUGH;
case SystemZISD::UADDO:
if (MemVT == MVT::i32)
NewOpc = SystemZ::ALSI;
@@ -1354,11 +1351,8 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
SDValue Ops[] = { Base, Disp, Operand, InputChain };
MachineSDNode *Result =
CurDAG->getMachineNode(NewOpc, DL, MVT::i32, MVT::Other, Ops);
-
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
- MemOp[0] = StoreNode->getMemOperand();
- MemOp[1] = LoadNode->getMemOperand();
- Result->setMemRefs(MemOp, MemOp + 2);
+ CurDAG->setNodeMemRefs(
+ Result, {StoreNode->getMemOperand(), LoadNode->getMemOperand()});
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e76fa71dacd7..2a825c1316f3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -452,29 +452,29 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f64, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
- setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f128, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f128, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
}
// We have fused multiply-addition for f32 and f64 but not f128.
@@ -523,10 +523,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::SDIV);
+ setTargetDAGCombine(ISD::UDIV);
+ setTargetDAGCombine(ISD::SREM);
+ setTargetDAGCombine(ISD::UREM);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -2213,8 +2219,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
if (!Mask)
return;
- KnownBits Known;
- DAG.computeKnownBits(C.Op0.getOperand(0), Known);
+ KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
return;
@@ -2912,12 +2917,12 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
DAG.getConstant(32, DL, MVT::i64));
}
SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
- return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
DL, MVT::f32, Out64);
}
if (InVT == MVT::f32 && ResVT == MVT::i32) {
SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
- SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
MVT::f64, SDValue(U64, 0), In);
SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
if (Subtarget.hasHighWord())
@@ -3160,10 +3165,9 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
// Get the known-zero masks for each operand.
- SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
- KnownBits Known[2];
- DAG.computeKnownBits(Ops[0], Known[0]);
- DAG.computeKnownBits(Ops[1], Known[1]);
+ SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
+ KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
+ DAG.computeKnownBits(Ops[1])};
// See if the upper 32 bits of one operand and the lower 32 bits of the
// other are known zero. They are the low and high operands respectively.
@@ -3346,8 +3350,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
}
// Get the known-zero mask for the operand.
- KnownBits Known;
- DAG.computeKnownBits(Op, Known);
+ KnownBits Known = DAG.computeKnownBits(Op);
unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
if (NumSignificantBits == 0)
return DAG.getConstant(0, DL, VT);
@@ -4475,6 +4478,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// Constants with undefs to get a full vector constant and use that
// as the starting point.
SDValue Result;
+ SDValue ReplicatedVal;
if (NumConstants > 0) {
for (unsigned I = 0; I < NumElements; ++I)
if (!Constants[I].getNode())
@@ -4485,17 +4489,21 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// avoid a false dependency on any previous contents of the vector
// register.
- // Use a VLREP if at least one element is a load.
- unsigned LoadElIdx = UINT_MAX;
+ // Use a VLREP if at least one element is a load. Make sure to replicate
+ // the load with the most elements having its value.
+ std::map<const SDNode*, unsigned> UseCounts;
+ SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
if (Elems[I].getOpcode() == ISD::LOAD &&
cast<LoadSDNode>(Elems[I])->isUnindexed()) {
- LoadElIdx = I;
- break;
+ SDNode *Ld = Elems[I].getNode();
+ UseCounts[Ld]++;
+ if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
+ LoadMaxUses = Ld;
}
- if (LoadElIdx != UINT_MAX) {
- Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
- Done[LoadElIdx] = true;
+ if (LoadMaxUses != nullptr) {
+ ReplicatedVal = SDValue(LoadMaxUses, 0);
+ Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
} else {
// Try to use VLVGP.
unsigned I1 = NumElements / 2 - 1;
@@ -4516,7 +4524,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// Use VLVGx to insert the other elements.
for (unsigned I = 0; I < NumElements; ++I)
- if (!Done[I] && !Elems[I].isUndef())
+ if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
DAG.getConstant(I, DL, MVT::i32));
return Result;
@@ -5359,6 +5367,46 @@ SDValue SystemZTargetLowering::combineMERGE(
return SDValue();
}
+SDValue SystemZTargetLowering::combineLOAD(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT LdVT = N->getValueType(0);
+ if (LdVT.isVector() || LdVT.isInteger())
+ return SDValue();
+ // Transform a scalar load that is REPLICATEd as well as having other
+ // use(s) to the form where the other use(s) use the first element of the
+ // REPLICATE instead of the load. Otherwise instruction selection will not
+ // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
+ // point loads.
+
+ SDValue Replicate;
+ SmallVector<SDNode*, 8> OtherUses;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == SystemZISD::REPLICATE) {
+ if (Replicate)
+ return SDValue(); // Should never happen
+ Replicate = SDValue(*UI, 0);
+ }
+ else if (UI.getUse().getResNo() == 0)
+ OtherUses.push_back(*UI);
+ }
+ if (!Replicate || OtherUses.empty())
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
+ Replicate, DAG.getConstant(0, DL, MVT::i32));
+ // Update uses of the loaded Value while preserving old chains.
+ for (SDNode *U : OtherUses) {
+ SmallVector<SDValue, 8> Ops;
+ for (SDValue Op : U->ops())
+ Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
+ DAG.UpdateNodeOperands(U, Ops);
+ }
+ return SDValue(N, 0);
+}
+
SDValue SystemZTargetLowering::combineSTORE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5394,8 +5442,7 @@ SDValue SystemZTargetLowering::combineSTORE(
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
SDValue Ops[] = {
- N->getOperand(0), BSwapOp, N->getOperand(2),
- DAG.getValueType(Op1.getValueType())
+ N->getOperand(0), BSwapOp, N->getOperand(2)
};
return
@@ -5436,7 +5483,7 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
// (fpround (extract_vector_elt X 0))
// (fpround (extract_vector_elt X 1)) ->
// (extract_vector_elt (VROUND X) 0)
- // (extract_vector_elt (VROUND X) 1)
+ // (extract_vector_elt (VROUND X) 2)
//
// This is a special case since the target doesn't really support v2f32s.
SelectionDAG &DAG = DCI.DAG;
@@ -5478,6 +5525,53 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
return SDValue();
}
+SDValue SystemZTargetLowering::combineFP_EXTEND(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // (fpextend (extract_vector_elt X 0))
+ // (fpextend (extract_vector_elt X 2)) ->
+ // (extract_vector_elt (VEXTEND X) 0)
+ // (extract_vector_elt (VEXTEND X) 1)
+ //
+ // This is a special case since the target doesn't really support v2f32s.
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f64 &&
+ Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getValueType() == MVT::v4f32 &&
+ Op0.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ SDValue Vec = Op0.getOperand(0);
+ for (auto *U : Vec->uses()) {
+ if (U != Op0.getNode() &&
+ U->hasOneUse() &&
+ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ U->getOperand(0) == Vec &&
+ U->getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
+ SDValue OtherExtend = SDValue(*U->use_begin(), 0);
+ if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
+ OtherExtend.getOperand(0) == SDValue(U, 0) &&
+ OtherExtend.getValueType() == MVT::f64) {
+ SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+ MVT::v2f64, Vec);
+ DCI.AddToWorklist(VExtend.getNode());
+ SDValue Extract1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
+ VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
+ DCI.AddToWorklist(Extract1.getNode());
+ DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+ SDValue Extract0 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
+ VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ return Extract0;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5492,13 +5586,14 @@ SDValue SystemZTargetLowering::combineBSWAP(
// Create the byte-swapping load.
SDValue Ops[] = {
LD->getChain(), // Chain
- LD->getBasePtr(), // Ptr
- DAG.getValueType(N->getValueType(0)) // VT
+ LD->getBasePtr() // Ptr
};
+ EVT LoadVT = N->getValueType(0);
+ if (LoadVT == MVT::i16)
+ LoadVT = MVT::i32;
SDValue BSLoad =
DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
- DAG.getVTList(N->getValueType(0) == MVT::i64 ?
- MVT::i64 : MVT::i32, MVT::Other),
+ DAG.getVTList(LoadVT, MVT::Other),
Ops, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
@@ -5664,6 +5759,23 @@ SDValue SystemZTargetLowering::combineGET_CCMASK(
return Select->getOperand(4);
}
+SDValue SystemZTargetLowering::combineIntDIVREM(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // In the case where the divisor is a vector of constants a cheaper
+ // sequence of instructions can replace the divide. BuildSDIV is called to
+ // do this during DAG combining, but it only succeeds when it can build a
+ // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
+ // since it is not Legal but Custom it can only happen before
+ // legalization. Therefore we must scalarize this early before Combine
+ // 1. For widened vectors, this is already the result of type legalization.
+ if (VT.isVector() && isTypeLegal(VT) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
+ return DAG.UnrollVectorOp(N);
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch(N->getOpcode()) {
@@ -5673,14 +5785,20 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
case SystemZISD::MERGE_HIGH:
case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
+ case ISD::LOAD: return combineLOAD(N, DCI);
case ISD::STORE: return combineSTORE(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
+ case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: return combineIntDIVREM(N, DCI);
}
return SDValue();
@@ -5791,10 +5909,10 @@ static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
unsigned OpNo) {
APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
- unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
- KnownBits LHSKnown(SrcBitWidth), RHSKnown(SrcBitWidth);
- DAG.computeKnownBits(Op.getOperand(OpNo), LHSKnown, Src0DemE, Depth + 1);
- DAG.computeKnownBits(Op.getOperand(OpNo + 1), RHSKnown, Src1DemE, Depth + 1);
+ KnownBits LHSKnown =
+ DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
+ KnownBits RHSKnown =
+ DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
Known.One = LHSKnown.One & RHSKnown.One;
}
@@ -5860,9 +5978,8 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::s390_vuplf: {
SDValue SrcOp = Op.getOperand(1);
unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
- Known = KnownBits(SrcBitWidth);
APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
- DAG.computeKnownBits(SrcOp, Known, SrcDemE, Depth + 1);
+ Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
if (IsLogical) {
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(SrcBitWidth);
@@ -5881,7 +5998,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
case SystemZISD::REPLICATE: {
SDValue SrcOp = Op.getOperand(0);
- DAG.computeKnownBits(SrcOp, Known, Depth + 1);
+ Known = DAG.computeKnownBits(SrcOp, Depth + 1);
if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
break;
@@ -6852,7 +6969,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
.addImm(ThisLength)
.add(SrcBase)
.addImm(SrcDisp)
- ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .setMemRefs(MI.memoperands());
DestDisp += ThisLength;
SrcDisp += ThisLength;
Length -= ThisLength;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 267e31a85216..622da32e418d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -337,18 +337,8 @@ enum NodeType : unsigned {
// Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
ATOMIC_CMP_SWAP_128,
- // Byte swapping load.
- //
- // Operand 0: the address to load from
- // Operand 1: the type of load (i16, i32, i64)
- LRV,
-
- // Byte swapping store.
- //
- // Operand 0: the value to store
- // Operand 1: the address to store to
- // Operand 2: the type of store (i16, i32, i64)
- STRV,
+ // Byte swapping load/store. Same operands as regular load/store.
+ LRV, STRV,
// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
@@ -389,7 +379,7 @@ public:
// want to clobber the upper 32 bits of a GPR unnecessarily.
return MVT::i32;
}
- TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
// Widen subvectors to the full width rather than promoting integer
// elements. This is better because:
@@ -597,14 +587,17 @@ private:
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 4e47752ed122..1374ee91fa29 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -97,7 +97,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
let Predicates = [FeatureVectorEnhancements1] in
def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
- (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
// fcopysign with an FP64 result.
let isCodeGenOnly = 1 in
@@ -110,7 +110,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in
(CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
let Predicates = [FeatureVectorEnhancements1] in
def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
- (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
// fcopysign with an FP128 result. Use "upper" as the high half and leave
// the low half as-is.
@@ -187,7 +187,7 @@ def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
let Predicates = [FeatureNoVectorEnhancements1] in {
def : Pat<(f32 (fpround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
def : Pat<(f64 (fpround FP128:$src)),
(EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
}
@@ -446,13 +446,13 @@ def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- FP32:$src1, subreg_r32), FP32:$src2)>;
+ FP32:$src1, subreg_h32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
def : Pat<(fmul (f64 (fpextend FP32:$src1)),
(f64 (extloadf32 bdxaddr12only:$addr))),
- (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32),
+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
bdxaddr12only:$addr)>;
// f128 multiplication of two FP64 registers.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index e3f9a9645d13..1e904a86ea79 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2900,7 +2900,7 @@ multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode,
}
multiclass UnaryExtraVRRaSPairGeneric<string mnemonic, bits<16> opcode> {
- let M4 = 0 in
+ let M4 = 0, Defs = [CC] in
def "" : InstVRRa<opcode, (outs VR128:$V1),
(ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $M3, $M5", []>;
@@ -3472,7 +3472,9 @@ multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
class BinaryVRRbSPairGeneric<string mnemonic, bits<16> opcode>
: InstVRRb<opcode, (outs VR128:$V1),
(ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
- mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []> {
+ let Defs = [CC];
+}
// Declare a pair of instructions, one which sets CC and one which doesn't.
// The CC-setting form ends with "S" and sets the low bit of M5.
@@ -3496,9 +3498,10 @@ multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode,
}
multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
- def "" : InstVRRb<opcode, (outs VR128:$V1),
- (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
- mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+ let Defs = [CC] in
+ def "" : InstVRRb<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
imm32zx4:$M4, 0)>;
@@ -4185,9 +4188,10 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
}
multiclass TernaryOptVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
- def "" : InstVRRb<opcode, (outs VR128:$V1),
- (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
- mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+ let Defs = [CC] in
+ def "" : InstVRRb<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
imm32zx4:$M4, 0)>;
@@ -4385,7 +4389,8 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
}
multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
- def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
+ let Defs = [CC] in
+ def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
VR128:$V4, imm32zx4:$M5, 0)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index f0f9211efd5d..b03b4edaa4ab 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -880,10 +880,10 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SystemZ::FP128BitRegClass.contains(SrcReg)) {
unsigned SrcRegHi =
RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
- SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
unsigned SrcRegLo =
RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
- SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg)
.addReg(SrcRegHi, getKillRegState(KillSrc))
@@ -894,10 +894,10 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SystemZ::VR128BitRegClass.contains(SrcReg)) {
unsigned DestRegHi =
RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
- SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
unsigned DestRegLo =
RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
- SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
if (DestRegHi != SrcReg)
copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index bb5b7aae883b..8d3b1011d0a7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -756,16 +756,15 @@ def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>;
def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>;
def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
-// Byte-swapping loads. Unlike normal loads, these instructions are
-// allowed to access storage more than once.
-def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
-def LRV : UnaryRXY<"lrv", 0xE31E, z_lrv, GR32, 4>;
-def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;
-
-// Likewise byte-swapping stores.
-def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
-def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>;
-def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
+// Byte-swapping loads.
+def LRVH : UnaryRXY<"lrvh", 0xE31F, z_loadbswap16, GR32, 2>;
+def LRV : UnaryRXY<"lrv", 0xE31E, z_loadbswap32, GR32, 4>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, z_loadbswap64, GR64, 8>;
+
+// Byte-swapping stores.
+def STRVH : StoreRXY<"strvh", 0xE33F, z_storebswap16, GR32, 2>;
+def STRV : StoreRXY<"strv", 0xE33E, z_storebswap32, GR32, 4>;
+def STRVG : StoreRXY<"strvg", 0xE32F, z_storebswap64, GR64, 8>;
// Byte-swapping memory-to-memory moves.
let mayLoad = 1, mayStore = 1 in
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 92b86575235a..6c97b85277c3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -151,13 +151,13 @@ let Predicates = [FeatureVector] in {
def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
- def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)),
+ def : Pat<(z_vllezf32 bdxaddr12only:$addr),
(VLLEZF bdxaddr12only:$addr)>;
- def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
+ def : Pat<(z_vllezf64 bdxaddr12only:$addr),
(VLLEZG bdxaddr12only:$addr)>;
let Predicates = [FeatureVectorEnhancements1] in {
def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>;
- def : Pat<(v4f32 (z_vllezlf32 bdxaddr12only:$addr)),
+ def : Pat<(z_vllezlf32 bdxaddr12only:$addr),
(VLLEZLF bdxaddr12only:$addr)>;
}
@@ -1031,7 +1031,7 @@ let Predicates = [FeatureVector] in {
// Maximum.
multiclass VectorMax<Instruction insn, TypedReg tr> {
def : FPMinMax<insn, fmaxnum, tr, 4>;
- def : FPMinMax<insn, fmaxnan, tr, 1>;
+ def : FPMinMax<insn, fmaximum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
@@ -1055,7 +1055,7 @@ let Predicates = [FeatureVector] in {
// Minimum.
multiclass VectorMin<Instruction insn, TypedReg tr> {
def : FPMinMax<insn, fminnum, tr, 4>;
- def : FPMinMax<insn, fminnan, tr, 1>;
+ def : FPMinMax<insn, fminimum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
@@ -1405,8 +1405,8 @@ multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
(vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
subreg), 0)>;
}
-defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>;
-defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
+defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_h32>;
+defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_h64>;
// Match v2f64 insertions. The AddedComplexity counters the 3 added by
// TableGen for the base register operand in VLVG-based integer insertions
@@ -1414,10 +1414,10 @@ defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
let AddedComplexity = 4 in {
def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
(VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
- subreg_r64), VR128:$vec, 1)>;
+ subreg_h64), VR128:$vec, 1)>;
def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
(VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
- subreg_r64), 0)>;
+ subreg_h64), 0)>;
}
// We extract floating-point element X by replicating (for elements other
@@ -1426,14 +1426,14 @@ let AddedComplexity = 4 in {
// extractions and ensures that this version is strictly better.
let AddedComplexity = 4 in {
def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)),
- (EXTRACT_SUBREG VR128:$vec, subreg_r32)>;
+ (EXTRACT_SUBREG VR128:$vec, subreg_h32)>;
def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)),
- (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>;
+ (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_h32)>;
def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
- (EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
+ (EXTRACT_SUBREG VR128:$vec, subreg_h64)>;
def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
- (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
+ (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_h64)>;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
index 5103867e2d9a..626675bfb70c 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -127,14 +127,6 @@ def SDT_ZIPM : SDTypeProfile<1, 1,
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
-def SDT_ZLoadBSwap : SDTypeProfile<1, 2,
- [SDTCisInt<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, OtherVT>]>;
-def SDT_ZStoreBSwap : SDTypeProfile<0, 3,
- [SDTCisInt<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, OtherVT>]>;
def SDT_ZTBegin : SDTypeProfile<1, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>,
@@ -283,9 +275,9 @@ def z_subcarry_1 : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>;
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
-def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
+def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
+def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>;
@@ -429,16 +421,28 @@ def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
// Pattern fragments
//===----------------------------------------------------------------------===//
-def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
-def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
-def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
+def z_loadbswap16 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_loadbswap32 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_loadbswap64 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
-def z_strvh : PatFrag<(ops node:$src, node:$addr),
- (z_storebswap node:$src, node:$addr, i16)>;
-def z_strv : PatFrag<(ops node:$src, node:$addr),
- (z_storebswap node:$src, node:$addr, i32)>;
-def z_strvg : PatFrag<(ops node:$src, node:$addr),
- (z_storebswap node:$src, node:$addr, i64)>;
+def z_storebswap16 : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_storebswap32 : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_storebswap64 : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
// Fragments including CC as an implicit source.
def z_br_ccmask
@@ -556,7 +560,6 @@ class NonvolatileLoad<SDPatternOperator load>
auto *Load = cast<LoadSDNode>(N);
return !Load->isVolatile();
}]>;
-def nonvolatile_load : NonvolatileLoad<load>;
def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
@@ -567,7 +570,6 @@ class NonvolatileStore<SDPatternOperator store>
auto *Store = cast<StoreSDNode>(N);
return !Store->isVolatile();
}]>;
-def nonvolatile_store : NonvolatileStore<store>;
def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
@@ -743,37 +745,37 @@ class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>;
def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
def z_vllezi32 : z_vllez<i32, load, 1>;
-def z_vllezi64 : PatFrag<(ops node:$addr),
- (z_join_dwords (i64 (load node:$addr)), (i64 0))>;
+def z_vllezi64 : PatFrags<(ops node:$addr),
+ [(z_vector_insert (z_vzero),
+ (i64 (load node:$addr)), (i32 0)),
+ (z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
// We use high merges to form a v4f32 from four f32s. Propagating zero
// into all elements but index 1 gives this expression.
def z_vllezf32 : PatFrag<(ops node:$addr),
- (bitconvert
- (z_merge_high
- (v2i64
- (z_unpackl_high
- (v4i32
- (bitconvert
- (v4f32 (scalar_to_vector
- (f32 (load node:$addr)))))))),
- (v2i64 (z_vzero))))>;
+ (z_merge_high
+ (v2i64
+ (z_unpackl_high
+ (v4i32
+ (bitconvert
+ (v4f32 (scalar_to_vector
+ (f32 (load node:$addr)))))))),
+ (v2i64 (z_vzero)))>;
def z_vllezf64 : PatFrag<(ops node:$addr),
(z_merge_high
- (scalar_to_vector (f64 (load node:$addr))),
+ (v2f64 (scalar_to_vector (f64 (load node:$addr)))),
(z_vzero))>;
// Similarly for the high element of a zeroed vector.
def z_vllezli32 : z_vllez<i32, load, 0>;
def z_vllezlf32 : PatFrag<(ops node:$addr),
- (bitconvert
- (z_merge_high
- (v2i64
- (bitconvert
- (z_merge_high
- (v4f32 (scalar_to_vector
- (f32 (load node:$addr)))),
- (v4f32 (z_vzero))))),
- (v2i64 (z_vzero))))>;
+ (z_merge_high
+ (v2i64
+ (bitconvert
+ (z_merge_high
+ (v4f32 (scalar_to_vector
+ (f32 (load node:$addr)))),
+ (v4f32 (z_vzero))))),
+ (v2i64 (z_vzero)))>;
// Store one element of a vector.
class z_vste<ValueType scalartype, SDPatternOperator store>
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 76ed6f80ba55..e9f9188048da 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -63,6 +63,10 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
+ VirtReg, Order, Hints, MF, VRM, Matrix);
+
if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) {
SmallVector<unsigned, 8> Worklist;
SmallSet<unsigned, 4> DoneRegs;
@@ -84,8 +88,18 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
getRC32(TrueMO, VRM, MRI));
if (RC && RC != &SystemZ::GRX32BitRegClass) {
+ // Pass the registers of RC as hints while making sure that if
+ // any of these registers are copy hints, hint them first.
+ SmallSet<unsigned, 4> CopyHints;
+ CopyHints.insert(Hints.begin(), Hints.end());
+ Hints.clear();
+ for (MCPhysReg Reg : Order)
+ if (CopyHints.count(Reg) &&
+ RC->contains(Reg) && !MRI->isReserved(Reg))
+ Hints.push_back(Reg);
for (MCPhysReg Reg : Order)
- if (RC->contains(Reg) && !MRI->isReserved(Reg))
+ if (!CopyHints.count(Reg) &&
+ RC->contains(Reg) && !MRI->isReserved(Reg))
Hints.push_back(Reg);
// Return true to make these hints the only regs available to
// RA. This may mean extra spilling but since the alternative is
@@ -102,8 +116,7 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
}
- return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
- VRM, Matrix);
+ return BaseImplRetVal;
}
const MCPhysReg *
@@ -270,25 +283,30 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
// Check that the two virtual registers are local to MBB.
MachineBasicBlock *MBB = MI->getParent();
- if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) ||
- LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB))
+ MachineInstr *FirstMI_GR128 =
+ LIS.getInstructionFromIndex(IntGR128.beginIndex());
+ MachineInstr *FirstMI_GRNar =
+ LIS.getInstructionFromIndex(IntGRNar.beginIndex());
+ MachineInstr *LastMI_GR128 = LIS.getInstructionFromIndex(IntGR128.endIndex());
+ MachineInstr *LastMI_GRNar = LIS.getInstructionFromIndex(IntGRNar.endIndex());
+ if ((!FirstMI_GR128 || FirstMI_GR128->getParent() != MBB) ||
+ (!FirstMI_GRNar || FirstMI_GRNar->getParent() != MBB) ||
+ (!LastMI_GR128 || LastMI_GR128->getParent() != MBB) ||
+ (!LastMI_GRNar || LastMI_GRNar->getParent() != MBB))
return false;
- // Find the first and last MIs of the registers.
- MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
+ MachineBasicBlock::iterator MII = nullptr, MEE = nullptr;
if (WideOpNo == 1) {
- FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex());
- LastMI = LIS.getInstructionFromIndex(IntGRNar.endIndex());
+ MII = FirstMI_GR128;
+ MEE = LastMI_GRNar;
} else {
- FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex());
- LastMI = LIS.getInstructionFromIndex(IntGR128.endIndex());
+ MII = FirstMI_GRNar;
+ MEE = LastMI_GR128;
}
- assert (FirstMI && LastMI && "No instruction from index?");
// Check if coalescing seems safe by finding the set of clobbered physreg
// pairs in the region.
BitVector PhysClobbered(getNumRegs());
- MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI;
MEE++;
for (; MII != MEE; ++MII) {
for (const MachineOperand &MO : MII->operands())
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 94781659a50a..9fd2e4ae4f00 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -57,8 +57,6 @@ public:
const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
// Override TargetRegisterInfo.h.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 79ba7534f92c..cea88c088b86 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -25,11 +25,8 @@ def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32.
def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
def subreg_l64 : SubRegIndex<64, 0>;
def subreg_h64 : SubRegIndex<64, 64>;
-def subreg_r32 : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits.
-def subreg_r64 : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits.
def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>;
def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>;
-def subreg_hr32 : ComposedSubRegIndex<subreg_h64, subreg_r32>;
}
// Define a register class that contains values of types TYPES and an
@@ -188,7 +185,7 @@ class FPR32<bits<16> num, string n> : SystemZReg<n> {
class FPR64<bits<16> num, string n, FPR32 high>
: SystemZRegWithSubregs<n, [high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_r32];
+ let SubRegIndices = [subreg_h32];
}
// 8 pairs of FPR64s, with a one-register gap inbetween.
@@ -231,7 +228,7 @@ defm FP128 : SystemZRegClass<"FP128", [f128], 128,
class VR128<bits<16> num, string n, FPR64 high>
: SystemZRegWithSubregs<n, [high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_r64];
+ let SubRegIndices = [subreg_h64];
}
// Full vector registers.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
index 385a94b5d6a9..83bf97e6841a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
@@ -12,11 +12,13 @@
// These resources are used to express decoder grouping rules. The number of
// decoder slots needed by an instructions is normally one, but there are
// exceptions.
-def NormalGr : SchedWrite;
-def Cracked : SchedWrite;
-def GroupAlone : SchedWrite;
-def BeginGroup : SchedWrite;
-def EndGroup : SchedWrite;
+def NormalGr : SchedWrite;
+def Cracked : SchedWrite;
+def GroupAlone : SchedWrite;
+def GroupAlone2 : SchedWrite;
+def GroupAlone3 : SchedWrite;
+def BeginGroup : SchedWrite;
+def EndGroup : SchedWrite;
// A SchedWrite added to other SchedWrites to make LSU latency parameterizable.
def LSULatency : SchedWrite;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index 5d32232107af..74e1dad87908 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -48,6 +48,16 @@ def : WriteRes<GroupAlone, []> {
let BeginGroup = 1;
let EndGroup = 1;
}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
// Incoming latency removed from the register operand which is used together
// with a memory operand by the instruction.
@@ -131,7 +141,7 @@ def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXa2, FXb2, GroupAlone],
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
(instregex "B(R)?X(H|L).*$")>;
// Compare and branch
@@ -474,13 +484,13 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXa2, LSU, GroupAlone],
//===----------------------------------------------------------------------===//
def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "D$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
(instregex "DSG(F)?$")>;
def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "DL(G)?$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "DL(G)?$")>;
//===----------------------------------------------------------------------===//
// Shifts
@@ -490,7 +500,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
(instregex "S(L|R)D(A|L)$")>;
// Rotate
@@ -597,9 +607,9 @@ def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
(instregex "CS(G|Y)?$")>;
// Compare double and swap
-def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone],
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
(instregex "CDS(Y)?$")>;
-def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone],
+def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone3],
(instregex "CDSG$")>;
// Compare and swap and store
@@ -620,7 +630,7 @@ def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
(instregex "TRT$")>;
def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -643,21 +653,21 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD],
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
(instregex "CVBG$")>;
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
(instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
-def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone],
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
(instregex "(A|S|ZA)P$")>;
-def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone], (instregex "(M|D)P$")>;
-def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone], (instregex "SRP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "(M|D)P$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
@@ -674,7 +684,7 @@ def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
// Load/store access multiple (not modeled precisely)
def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
//===----------------------------------------------------------------------===//
// Program mask and addressing mode
@@ -704,7 +714,7 @@ def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
//===----------------------------------------------------------------------===//
// Transaction begin
-def : InstRW<[WLat9, LSU2, FXb5, GroupAlone], (instregex "TBEGIN(C)?$")>;
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
// Transaction end
def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
@@ -813,9 +823,9 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
// Convert from fixed / logical
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
// Convert to fixed / logical
def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked],
@@ -899,7 +909,7 @@ def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
// Test Data Class
def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
-def : InstRW<[WLat10, LSU2, VecDF4, GroupAlone], (instregex "TCXB$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
//===----------------------------------------------------------------------===//
// FP: Floating-point control register instructions
@@ -941,7 +951,7 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
// Convert from fixed
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
// Convert to fixed
def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
@@ -1054,9 +1064,9 @@ def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>;
def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)TR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>;
// Convert to fixed / logical
def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
@@ -1068,19 +1078,19 @@ def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
// Convert from / to zoned
def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
// Convert from / to packed
def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
@@ -1129,7 +1139,7 @@ def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
// Reround
def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
-def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
// Shift significand left/right
def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1137,7 +1147,7 @@ def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
// DFP: Comparisons
@@ -1200,7 +1210,7 @@ def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
//===----------------------------------------------------------------------===//
@@ -1414,7 +1424,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STCT(L|G)$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
@@ -1458,8 +1468,8 @@ def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
// System: Memory-move Instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone], (instregex "MVC(K|P|S)$")>;
-def : InstRW<[WLat1, FXa, LSU5, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
@@ -1491,8 +1501,8 @@ def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
-def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index 515f968e5091..1962fdf3a1d1 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -48,6 +48,16 @@ def : WriteRes<GroupAlone, []> {
let BeginGroup = 1;
let EndGroup = 1;
}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
// Incoming latency removed from the register operand which is used together
// with a memory operand by the instruction.
@@ -132,7 +142,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>;
def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXa2, FXb2, GroupAlone],
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
(instregex "B(R)?X(H|L).*$")>;
// Compare and branch
@@ -483,13 +493,14 @@ def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "D$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
(instregex "DSG(F)?$")>;
def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "DL(G)?$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2],
+ (instregex "DL(G)?$")>;
//===----------------------------------------------------------------------===//
// Shifts
@@ -499,7 +510,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
(instregex "S(L|R)D(A|L)$")>;
// Rotate
@@ -606,10 +617,10 @@ def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
(instregex "CS(G|Y)?$")>;
// Compare double and swap
-def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone],
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
(instregex "CDS(Y)?$")>;
def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3,
- GroupAlone], (instregex "CDSG$")>;
+ GroupAlone3], (instregex "CDSG$")>;
// Compare and swap and store
def : InstRW<[WLat30, MCD], (instregex "CSST$")>;
@@ -629,7 +640,7 @@ def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
(instregex "TRT$")>;
def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -662,21 +673,21 @@ def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>;
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
(instregex "CVBG$")>;
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
(instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
-def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone],
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
(instregex "(A|S|ZA)P$")>;
-def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone], (instregex "(M|D)P$")>;
-def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone], (instregex "SRP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "(M|D)P$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
@@ -693,7 +704,7 @@ def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
// Load/store access multiple (not modeled precisely)
def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
//===----------------------------------------------------------------------===//
// Program mask and addressing mode
@@ -723,7 +734,7 @@ def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
//===----------------------------------------------------------------------===//
// Transaction begin
-def : InstRW<[WLat9, LSU2, FXb5, GroupAlone], (instregex "TBEGIN(C)?$")>;
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
// Transaction end
def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
@@ -832,9 +843,9 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
// Convert from fixed / logical
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
// Convert to fixed / logical
def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked],
@@ -918,7 +929,7 @@ def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
// Test Data Class
def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
-def : InstRW<[WLat10, LSU2, VecDF4, GroupAlone], (instregex "TCXB$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
//===----------------------------------------------------------------------===//
// FP: Floating-point control register instructions
@@ -960,7 +971,7 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
// Convert from fixed
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
// Convert to fixed
def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
@@ -1071,9 +1082,9 @@ def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>;
def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)TR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>;
// Convert to fixed / logical
def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
@@ -1085,19 +1096,19 @@ def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
// Convert from / to zoned
def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
// Convert from / to packed
def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
@@ -1146,7 +1157,7 @@ def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
// Reround
def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
-def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
// Shift significand left/right
def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1154,7 +1165,7 @@ def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
// DFP: Comparisons
@@ -1218,7 +1229,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
@@ -1469,7 +1480,7 @@ def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "VLIP$")>;
-def : InstRW<[WLat6, VecDFX, LSU, GroupAlone], (instregex "VPKZ$")>;
+def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>;
def : InstRW<[WLat1, VecDFX, FXb, LSU, Cracked], (instregex "VUPKZ$")>;
def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVB(G)?$")>;
def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVD(G)?$")>;
@@ -1489,7 +1500,7 @@ def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>;
-def : InstRW<[WLat20, GroupAlone], (instregex "LPSW(E)?$")>;
+def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>;
def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>;
def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>;
def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>;
@@ -1502,7 +1513,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STCT(L|G)$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
@@ -1547,8 +1558,8 @@ def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
// System: Memory-move Instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone], (instregex "MVC(K|P|S)$")>;
-def : InstRW<[WLat1, FXa, LSU5, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
@@ -1580,8 +1591,8 @@ def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
-def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index 3012b565d5ef..7535739f813a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -39,15 +39,21 @@ let NumMicroOps = 1 in {
def : WriteRes<BeginGroup, []> { let BeginGroup = 1; }
def : WriteRes<EndGroup, []> { let EndGroup = 1; }
}
-def : WriteRes<Cracked, []> {
- let NumMicroOps = 2;
- let BeginGroup = 1;
-}
def : WriteRes<GroupAlone, []> {
let NumMicroOps = 3;
let BeginGroup = 1;
let EndGroup = 1;
}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
// Incoming latency removed from the register operand which is used together
// with a memory operand by the instruction.
@@ -114,7 +120,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?B(R)?(Asm.*)?$")>;
def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCT(G|H)?$")>;
def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXU3, LSU, GroupAlone],
+def : InstRW<[WLat1, FXU3, LSU, GroupAlone2],
(instregex "B(R)?X(H|L).*$")>;
// Compare and branch
@@ -439,14 +445,14 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone],
// Division and remainder
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
(instregex "D$")>;
-def : InstRW<[WLat30, FPU4, FXU4, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3],
(instregex "DSG(F)?$")>;
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DL(G)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
(instregex "DL(G)?$")>;
//===----------------------------------------------------------------------===//
@@ -457,7 +463,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>;
def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2],
(instregex "S(L|R)D(A|L)$")>;
// Rotate
@@ -560,7 +566,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone],
(instregex "CS(G|Y)?$")>;
// Compare double and swap
-def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2],
(instregex "CDS(Y)?$")>;
def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone],
(instregex "CDSG$")>;
@@ -604,12 +610,12 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")>
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2],
(instregex "CVBG$")>;
-def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone],
+def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone2],
(instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone3], (instregex "CVD(Y)?$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
def : InstRW<[WLat10, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
@@ -701,13 +707,13 @@ def : InstRW<[], (instregex "Insn.*")>;
// Load zero
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER)$")>;
-def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LZXR$")>;
+def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LZXR$")>;
// Load
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R32|GR)$")>;
def : InstRW<[WLat3, FXU, NormalGr], (instregex "LGDR$")>;
-def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>;
+def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>;
// Load and Test
def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
@@ -747,10 +753,10 @@ def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>;
// Convert from fixed / logical
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CX(F|G)BR(A?)$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>;
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>;
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
// Convert to fixed / logical
def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -874,7 +880,7 @@ def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>;
// Convert from fixed
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat10, FXU, FPU4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>;
// Convert to fixed
def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -986,11 +992,11 @@ def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>;
def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXFTR(A)?$")>;
-def : InstRW<[WLat30, FXU, DFU4, GroupAlone], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>;
def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat9, FXU, DFU4, GroupAlone], (instregex "CXLFTR$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXLGTR$")>;
+def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>;
// Convert to fixed / logical
def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>;
@@ -1002,9 +1008,9 @@ def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat8, FXU2, DFU4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>;
def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat12, FXU2, DFU4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>;
// Perform floating-point operation
def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>;
@@ -1051,7 +1057,7 @@ def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>;
// Reround
def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>;
-def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>;
// Shift significand left/right
def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1059,7 +1065,7 @@ def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>;
-def : InstRW<[WLat7, FXU, DFU4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
// DFP: Comparisons
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 892f493570d1..a21d2c4cef70 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -39,15 +39,21 @@ let NumMicroOps = 1 in {
def : WriteRes<BeginGroup, []> { let BeginGroup = 1; }
def : WriteRes<EndGroup, []> { let EndGroup = 1; }
}
-def : WriteRes<Cracked, []> {
- let NumMicroOps = 2;
- let BeginGroup = 1;
-}
def : WriteRes<GroupAlone, []> {
let NumMicroOps = 3;
let BeginGroup = 1;
let EndGroup = 1;
}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
// Incoming latency removed from the register operand which is used together
// with a memory operand by the instruction.
@@ -119,7 +125,7 @@ def : InstRW<[WLat1, LSU, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
def : InstRW<[WLat1, FXU, EndGroup], (instregex "BRCT(G)?$")>;
def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCTH$")>;
def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXU3, LSU, GroupAlone],
+def : InstRW<[WLat1, FXU3, LSU, GroupAlone2],
(instregex "B(R)?X(H|L).*$")>;
// Compare and branch
@@ -450,14 +456,14 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone],
// Division and remainder
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
(instregex "D$")>;
-def : InstRW<[WLat30, FPU4, FXU4, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3],
(instregex "DSG(F)?$")>;
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DL(G)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
(instregex "DL(G)?$")>;
//===----------------------------------------------------------------------===//
@@ -468,7 +474,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2],
(instregex "S(L|R)D(A|L)$")>;
// Rotate
@@ -572,7 +578,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone],
(instregex "CS(G|Y)?$")>;
// Compare double and swap
-def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2],
(instregex "CDS(Y)?$")>;
def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone],
(instregex "CDSG$")>;
@@ -595,7 +601,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat1, LSU, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone2],
(instregex "TRT$")>;
def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -617,11 +623,11 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")>
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2],
(instregex "CVBG$")>;
def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone],
(instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>;
def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
@@ -785,10 +791,10 @@ def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>;
// Convert from fixed / logical
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CX(F|G)BR(A?)$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>;
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>;
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
// Convert to fixed / logical
def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -912,7 +918,7 @@ def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>;
// Convert from fixed
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat10, FXU, FPU4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>;
// Convert to fixed
def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -1024,11 +1030,11 @@ def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>;
def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXFTR(A)?$")>;
-def : InstRW<[WLat30, FXU, DFU4, GroupAlone], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>;
def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat9, FXU, DFU4, GroupAlone], (instregex "CXLFTR$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXLGTR$")>;
+def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>;
// Convert to fixed / logical
def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>;
@@ -1040,13 +1046,13 @@ def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat8, FXU2, DFU4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>;
def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat12, FXU2, DFU4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>;
// Convert from / to zoned
def : InstRW<[WLat4LSU, LSU, DFU2, GroupAlone], (instregex "CDZT$")>;
-def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone3], (instregex "CXZT$")>;
def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZDT$")>;
def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZXT$")>;
@@ -1095,7 +1101,7 @@ def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>;
// Reround
def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>;
-def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>;
// Shift significand left/right
def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1103,7 +1109,7 @@ def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>;
-def : InstRW<[WLat7, FXU, DFU4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
// DFP: Comparisons
@@ -1223,7 +1229,7 @@ def : InstRW<[WLat30, MCD], (instregex "SCKPF$")>;
def : InstRW<[WLat30, MCD], (instregex "SCKC$")>;
def : InstRW<[WLat30, MCD], (instregex "SPT$")>;
def : InstRW<[WLat9, FXU, LSU2, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXU2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat20, LSU4, FXU2, GroupAlone2], (instregex "STCKE$")>;
def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
def : InstRW<[WLat30, MCD], (instregex "STPT$")>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 9cd09b0f911e..fb030a207bc7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -19,6 +19,11 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "SystemZGenSubtargetInfo.inc"
+static cl::opt<bool> UseSubRegLiveness(
+ "systemz-subreg-liveness",
+ cl::desc("Enable subregister liveness tracking for SystemZ (experimental)"),
+ cl::Hidden);
+
// Pin the vtable to this file.
void SystemZSubtarget::anchor() {}
@@ -54,6 +59,11 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
+
+bool SystemZSubtarget::enableSubRegLiveness() const {
+ return UseSubRegLiveness;
+}
+
bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
CodeModel::Model CM) const {
// PC32DBL accesses require the low bit to be clear. Note that a zero
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 8285b4277d11..cb6b21a1d465 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -102,6 +102,9 @@ public:
// Always enable the early if-conversion pass.
bool enableEarlyIfConversion() const override { return true; }
+ // Enable tracking of subregister liveness in register allocator.
+ bool enableSubRegLiveness() const override;
+
// Automatically generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f3620dcf3b92..9596a2b6388d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -128,10 +128,16 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
// in range of LARL. However, the JIT environment has no equivalent
// of copy relocs, so locally-binding data symbols might not be in
// the range of LARL. We need the Medium model in that case.
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
- Reloc::Model RM, bool JIT) {
- if (CM)
+static CodeModel::Model
+getEffectiveSystemZCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
+ bool JIT) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
+ if (*CM == CodeModel::Kernel)
+ report_fatal_error("Target does not support the kernel CodeModel");
return *CM;
+ }
if (JIT)
return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
return CodeModel::Small;
@@ -146,7 +152,8 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(
T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL),
+ getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
+ OL),
TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index c5cdc22f2099..129610fe095b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -328,6 +328,25 @@ bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
}
+// Return the bit size for the scalar type or vector element
+// type. getScalarSizeInBits() returns 0 for a pointer type.
+static unsigned getScalarSizeInBits(Type *Ty) {
+ unsigned Size =
+ (Ty->isPtrOrPtrVectorTy() ? 64U : Ty->getScalarSizeInBits());
+ assert(Size > 0 && "Element must have non-zero size.");
+ return Size;
+}
+
+// getNumberOfParts() calls getTypeLegalizationCost() which splits the vector
+// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of
+// 3.
+static unsigned getNumVectorRegs(Type *Ty) {
+ assert(Ty->isVectorTy() && "Expected vector type");
+ unsigned WideBits = getScalarSizeInBits(Ty) * Ty->getVectorNumElements();
+ assert(WideBits > 0 && "Could not compute size of vector");
+ return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+}
+
int SystemZTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
@@ -343,44 +362,59 @@ int SystemZTTIImpl::getArithmeticInstrCost(
unsigned ScalarBits = Ty->getScalarSizeInBits();
- // Div with a constant which is a power of 2 will be converted by
- // DAGCombiner to use shifts. With vector shift-element instructions, a
- // vector sdiv costs about as much as a scalar one.
- const unsigned SDivCostEstimate = 4;
- bool SDivPow2 = false;
- bool UDivPow2 = false;
- if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
- Args.size() == 2) {
- const ConstantInt *CI = nullptr;
+ // There are thre cases of division and remainder: Dividing with a register
+ // needs a divide instruction. A divisor which is a power of two constant
+ // can be implemented with a sequence of shifts. Any other constant needs a
+ // multiply and shifts.
+ const unsigned DivInstrCost = 20;
+ const unsigned DivMulSeqCost = 10;
+ const unsigned SDivPow2Cost = 4;
+
+ bool SignedDivRem =
+ Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
+ bool UnsignedDivRem =
+ Opcode == Instruction::UDiv || Opcode == Instruction::URem;
+
+ // Check for a constant divisor.
+ bool DivRemConst = false;
+ bool DivRemConstPow2 = false;
+ if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) {
if (const Constant *C = dyn_cast<Constant>(Args[1])) {
- if (C->getType()->isVectorTy())
- CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+ const ConstantInt *CVal =
+ (C->getType()->isVectorTy()
+ ? dyn_cast_or_null<const ConstantInt>(C->getSplatValue())
+ : dyn_cast<const ConstantInt>(C));
+ if (CVal != nullptr &&
+ (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2()))
+ DivRemConstPow2 = true;
else
- CI = dyn_cast<const ConstantInt>(C);
- }
- if (CI != nullptr &&
- (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
- if (Opcode == Instruction::SDiv)
- SDivPow2 = true;
- else
- UDivPow2 = true;
+ DivRemConst = true;
}
}
if (Ty->isVectorTy()) {
- assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
+ assert(ST->hasVector() &&
+ "getArithmeticInstrCost() called with vector type.");
unsigned VF = Ty->getVectorNumElements();
- unsigned NumVectors = getNumberOfParts(Ty);
+ unsigned NumVectors = getNumVectorRegs(Ty);
// These vector operations are custom handled, but are still supported
// with one instruction per vector, regardless of element size.
if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
- Opcode == Instruction::AShr || UDivPow2) {
+ Opcode == Instruction::AShr) {
return NumVectors;
}
- if (SDivPow2)
- return (NumVectors * SDivCostEstimate);
+ if (DivRemConstPow2)
+ return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
+ if (DivRemConst)
+ return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args);
+ if ((SignedDivRem || UnsignedDivRem) && VF > 4)
+ // Temporary hack: disable high vectorization factors with integer
+ // division/remainder, which will get scalarized and handled with
+ // GR128 registers. The mischeduler is not clever enough to avoid
+ // spilling yet.
+ return 1000;
// These FP operations are supported with a single vector instruction for
// double (base implementation assumes float generally costs 2). For
@@ -395,7 +429,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(
return NumVectors;
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ unsigned ScalarCost =
+ getArithmeticInstrCost(Opcode, Ty->getScalarType());
unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
// FIXME: VF 2 for these FP operations are currently just as
// expensive as for VF 4.
@@ -432,30 +467,22 @@ int SystemZTTIImpl::getArithmeticInstrCost(
if (Opcode == Instruction::FRem)
return LIBCALL_COST;
- if (Opcode == Instruction::LShr || Opcode == Instruction::AShr)
- return (ScalarBits >= 32 ? 1 : 2 /*ext*/);
-
// Or requires one instruction, although it has custom handling for i64.
if (Opcode == Instruction::Or)
return 1;
- if (Opcode == Instruction::Xor && ScalarBits == 1)
- // 2 * ipm sequences ; xor ; shift ; compare
- return 7;
-
- if (UDivPow2)
- return 1;
- if (SDivPow2)
- return SDivCostEstimate;
-
- // An extra extension for narrow types is needed.
- if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
- // sext of op(s) for narrow types
- return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
+ if (Opcode == Instruction::Xor && ScalarBits == 1) {
+ if (ST->hasLoadStoreOnCond2())
+ return 5; // 2 * (li 0; loc 1); xor
+ return 7; // 2 * ipm sequences ; xor ; shift ; compare
+ }
- if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
- // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
- return (ScalarBits < 32 ? 4 : 2);
+ if (DivRemConstPow2)
+ return (SignedDivRem ? SDivPow2Cost : 1);
+ if (DivRemConst)
+ return DivMulSeqCost;
+ if (SignedDivRem || UnsignedDivRem)
+ return DivInstrCost;
}
// Fallback to the default implementation.
@@ -463,12 +490,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo, Args);
}
-
int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
assert (Tp->isVectorTy());
assert (ST->hasVector() && "getShuffleCost() called.");
- unsigned NumVectors = getNumberOfParts(Tp);
+ unsigned NumVectors = getNumVectorRegs(Tp);
// TODO: Since fp32 is expanded, the shuffle cost should always be 0.
@@ -523,7 +549,7 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
// TODO: Since fp32 is expanded, the extract cost should always be 0.
- unsigned NumParts = getNumberOfParts(SrcTy);
+ unsigned NumParts = getNumVectorRegs(SrcTy);
if (NumParts <= 2)
// Up to 2 vector registers can be truncated efficiently with pack or
// permute. The latter requires an immediate mask to be loaded, which
@@ -566,7 +592,7 @@ getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
// The bitmask will be truncated.
PackCost = getVectorTruncCost(SrcTy, DstTy);
else if (SrcScalarBits < DstScalarBits) {
- unsigned DstNumParts = getNumberOfParts(DstTy);
+ unsigned DstNumParts = getNumVectorRegs(DstTy);
// Each vector select needs its part of the bitmask unpacked.
PackCost = Log2Diff * DstNumParts;
// Extra cost for moving part of mask before unpacking.
@@ -602,6 +628,25 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
return nullptr;
}
+// Get the cost of converting a boolean vector to a vector with same width
+// and element size as Dst, plus the cost of zero extending if needed.
+unsigned SystemZTTIImpl::
+getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+ const Instruction *I) {
+ assert (Dst->isVectorTy());
+ unsigned VF = Dst->getVectorNumElements();
+ unsigned Cost = 0;
+ // If we know what the widths of the compared operands, get any cost of
+ // converting it to match Dst. Otherwise assume same widths.
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
+ // One 'vn' per dst vector with an immediate mask.
+ Cost += getNumVectorRegs(Dst);
+ return Cost;
+}
+
int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) {
unsigned DstScalarBits = Dst->getScalarSizeInBits();
@@ -611,8 +656,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
assert (Dst->isVectorTy());
unsigned VF = Src->getVectorNumElements();
- unsigned NumDstVectors = getNumberOfParts(Dst);
- unsigned NumSrcVectors = getNumberOfParts(Src);
+ unsigned NumDstVectors = getNumVectorRegs(Dst);
+ unsigned NumSrcVectors = getNumVectorRegs(Src);
if (Opcode == Instruction::Trunc) {
if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
@@ -633,19 +678,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
}
- else if (SrcScalarBits == 1) {
- // This should be extension of a compare i1 result.
- // If we know what the widths of the compared operands, get the
- // cost of converting it to Dst. Otherwise assume same widths.
- unsigned Cost = 0;
- Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
- if (CmpOpTy != nullptr)
- Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
- if (Opcode == Instruction::ZExt)
- // One 'vn' per dst vector with an immediate mask.
- Cost += NumDstVectors;
- return Cost;
- }
+ else if (SrcScalarBits == 1)
+ return getBoolVecToIntConversionCost(Opcode, Dst, I);
}
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
@@ -654,8 +688,13 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// (seems to miss on differentiating on scalar/vector types).
// Only 64 bit vector conversions are natively supported.
- if (SrcScalarBits == 64 && DstScalarBits == 64)
- return NumDstVectors;
+ if (DstScalarBits == 64) {
+ if (SrcScalarBits == 64)
+ return NumDstVectors;
+
+ if (SrcScalarBits == 1)
+ return getBoolVecToIntConversionCost(Opcode, Dst, I) + NumDstVectors;
+ }
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. Base implementation does not
@@ -672,7 +711,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
(Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
NeedsExtracts = false;
- TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts);
+ TotCost += getScalarizationOverhead(Src, false, NeedsExtracts);
+ TotCost += getScalarizationOverhead(Dst, NeedsInserts, false);
// FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
@@ -702,11 +742,18 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
else { // Scalar
assert (!Dst->isVectorTy());
- if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
- return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
+ if (SrcScalarBits >= 32 ||
+ (I != nullptr && isa<LoadInst>(I->getOperand(0))))
+ return 1;
+ return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
+ }
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
Src->isIntegerTy(1)) {
+ if (ST->hasLoadStoreOnCond2())
+ return 2; // li 0; loc 1
+
// This should be extension of a compare i1 result, which is done with
// ipm and a varying sequence of instructions.
unsigned Cost = 0;
@@ -718,7 +765,6 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
// If operands of an fp-type was compared, this costs +1.
Cost++;
-
return Cost;
}
}
@@ -726,8 +772,20 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
}
-int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- const Instruction *I) {
+// Scalar i8 / i16 operations will typically be made after first extending
+// the operands to i32.
+static unsigned getOperandsExtensionCost(const Instruction *I) {
+ unsigned ExtCost = 0;
+ for (Value *Op : I->operands())
+ // A load of i8 or i16 sign/zero extends to i32.
+ if (!isa<LoadInst>(Op) && !isa<ConstantInt>(Op))
+ ExtCost++;
+
+ return ExtCost;
+}
+
+int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy, const Instruction *I) {
if (ValTy->isVectorTy()) {
assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
unsigned VF = ValTy->getVectorNumElements();
@@ -759,7 +817,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT
// Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of
// floats. FIXME: <2 x float> generates same code as <4 x float>.
unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
- unsigned NumVecs_cmp = getNumberOfParts(ValTy);
+ unsigned NumVecs_cmp = getNumVectorRegs(ValTy);
unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
return Cost;
@@ -775,20 +833,30 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT
PackCost =
getVectorBitmaskConversionCost(CmpOpTy, ValTy);
- return getNumberOfParts(ValTy) /*vsel*/ + PackCost;
+ return getNumVectorRegs(ValTy) /*vsel*/ + PackCost;
}
}
else { // Scalar
switch (Opcode) {
case Instruction::ICmp: {
+ // A loaded value compared with 0 with multiple users becomes Load and
+ // Test. The load is then not foldable, so return 0 cost for the ICmp.
+ unsigned ScalarBits = ValTy->getScalarSizeInBits();
+ if (I != nullptr && ScalarBits >= 32)
+ if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
+ C->getZExtValue() == 0)
+ return 0;
+
unsigned Cost = 1;
if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
- Cost += 2; // extend both operands
+ Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
return Cost;
}
case Instruction::Select:
if (ValTy->isFloatingPointTy())
- return 4; // No load on condition for FP, so this costs a conditional jump.
+ return 4; // No load on condition for FP - costs a conditional jump.
return 1; // Load On Condition.
}
}
@@ -804,7 +872,7 @@ getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return ((Index % 2 == 0) ? 1 : 0);
if (Opcode == Instruction::ExtractElement) {
- int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1);
+ int Cost = ((getScalarSizeInBits(Val) == 1) ? 2 /*+test-under-mask*/ : 1);
// Give a slight penalty for moving out of vector pipeline to FXU unit.
if (Index == 0 && Val->isIntOrIntVectorTy())
@@ -816,58 +884,147 @@ getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
+// Check if a load may be folded as a memory operand in its user.
+bool SystemZTTIImpl::
+isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) {
+ if (!Ld->hasOneUse())
+ return false;
+ FoldedValue = Ld;
+ const Instruction *UserI = cast<Instruction>(*Ld->user_begin());
+ unsigned LoadedBits = getScalarSizeInBits(Ld->getType());
+ unsigned TruncBits = 0;
+ unsigned SExtBits = 0;
+ unsigned ZExtBits = 0;
+ if (UserI->hasOneUse()) {
+ unsigned UserBits = UserI->getType()->getScalarSizeInBits();
+ if (isa<TruncInst>(UserI))
+ TruncBits = UserBits;
+ else if (isa<SExtInst>(UserI))
+ SExtBits = UserBits;
+ else if (isa<ZExtInst>(UserI))
+ ZExtBits = UserBits;
+ }
+ if (TruncBits || SExtBits || ZExtBits) {
+ FoldedValue = UserI;
+ UserI = cast<Instruction>(*UserI->user_begin());
+ // Load (single use) -> trunc/extend (single use) -> UserI
+ }
+ if ((UserI->getOpcode() == Instruction::Sub ||
+ UserI->getOpcode() == Instruction::SDiv ||
+ UserI->getOpcode() == Instruction::UDiv) &&
+ UserI->getOperand(1) != FoldedValue)
+ return false; // Not commutative, only RHS foldable.
+ // LoadOrTruncBits holds the number of effectively loaded bits, but 0 if an
+ // extension was made of the load.
+ unsigned LoadOrTruncBits =
+ ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
+ switch (UserI->getOpcode()) {
+ case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64
+ case Instruction::Sub:
+ case Instruction::ICmp:
+ if (LoadedBits == 32 && ZExtBits == 64)
+ return true;
+ LLVM_FALLTHROUGH;
+ case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64
+ if (UserI->getOpcode() != Instruction::ICmp) {
+ if (LoadedBits == 16 &&
+ (SExtBits == 32 ||
+ (SExtBits == 64 && ST->hasMiscellaneousExtensions2())))
+ return true;
+ if (LoadOrTruncBits == 16)
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+ case Instruction::SDiv:// SE: 32->64
+ if (LoadedBits == 32 && SExtBits == 64)
+ return true;
+ LLVM_FALLTHROUGH;
+ case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // This also makes sense for float operations, but disabled for now due
+ // to regressions.
+ // case Instruction::FCmp:
+ // case Instruction::FAdd:
+ // case Instruction::FSub:
+ // case Instruction::FMul:
+ // case Instruction::FDiv:
+
+ // All possible extensions of memory checked above.
+
+ // Comparison between memory and immediate.
+ if (UserI->getOpcode() == Instruction::ICmp)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(UserI->getOperand(1)))
+ if (isUInt<16>(CI->getZExtValue()))
+ return true;
+ return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
+ break;
+ }
+ return false;
+}
+
+static bool isBswapIntrinsicCall(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ if (auto *CI = dyn_cast<CallInst>(I))
+ if (auto *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::bswap)
+ return true;
+ return false;
+}
+
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace,
const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");
- if (!Src->isVectorTy() && Opcode == Instruction::Load &&
- I != nullptr && I->hasOneUse()) {
- const Instruction *UserI = cast<Instruction>(*I->user_begin());
- unsigned Bits = Src->getScalarSizeInBits();
- bool FoldsLoad = false;
- switch (UserI->getOpcode()) {
- case Instruction::ICmp:
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::SDiv:
- case Instruction::UDiv:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- // This also makes sense for float operations, but disabled for now due
- // to regressions.
- // case Instruction::FCmp:
- // case Instruction::FAdd:
- // case Instruction::FSub:
- // case Instruction::FMul:
- // case Instruction::FDiv:
- FoldsLoad = (Bits == 32 || Bits == 64);
- break;
+ if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
+ // Store the load or its truncated or extended value in FoldedValue.
+ const Instruction *FoldedValue = nullptr;
+ if (isFoldableLoad(cast<LoadInst>(I), FoldedValue)) {
+ const Instruction *UserI = cast<Instruction>(*FoldedValue->user_begin());
+ assert (UserI->getNumOperands() == 2 && "Expected a binop.");
+
+ // UserI can't fold two loads, so in that case return 0 cost only
+ // half of the time.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (UserI->getOperand(i) == FoldedValue)
+ continue;
+
+ if (Instruction *OtherOp = dyn_cast<Instruction>(UserI->getOperand(i))){
+ LoadInst *OtherLoad = dyn_cast<LoadInst>(OtherOp);
+ if (!OtherLoad &&
+ (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) ||
+ isa<ZExtInst>(OtherOp)))
+ OtherLoad = dyn_cast<LoadInst>(OtherOp->getOperand(0));
+ if (OtherLoad && isFoldableLoad(OtherLoad, FoldedValue/*dummy*/))
+ return i == 0; // Both operands foldable.
+ }
}
- if (FoldsLoad) {
- assert (UserI->getNumOperands() == 2 &&
- "Expected to only handle binops.");
-
- // UserI can't fold two loads, so in that case return 0 cost only
- // half of the time.
- for (unsigned i = 0; i < 2; ++i) {
- if (UserI->getOperand(i) == I)
- continue;
- if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
- if (LI->hasOneUse())
- return i == 0;
- }
- }
+ return 0; // Only I is foldable in user.
+ }
+ }
+ unsigned NumOps =
+ (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
+
+ // Store/Load reversed saves one instruction.
+ if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
+ if (Opcode == Instruction::Load && I->hasOneUse()) {
+ const Instruction *LdUser = cast<Instruction>(*I->user_begin());
+ // In case of load -> bswap -> store, return normal cost for the load.
+ if (isBswapIntrinsicCall(LdUser) &&
+ (!LdUser->hasOneUse() || !isa<StoreInst>(*LdUser->user_begin())))
return 0;
- }
+ }
+ else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ const Value *StoredVal = SI->getValueOperand();
+ if (StoredVal->hasOneUse() && isBswapIntrinsicCall(StoredVal))
+ return 0;
+ }
}
- unsigned NumOps = getNumberOfParts(Src);
-
if (Src->getScalarSizeInBits() == 128)
// 128 bit scalars are held in a pair of two 64 bit registers.
NumOps *= 2;
@@ -875,34 +1032,94 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return NumOps;
}
+// The generic implementation of getInterleavedMemoryOpCost() is based on
+// adding costs of the memory operations plus all the extracts and inserts
+// needed for using / defining the vector operands. The SystemZ version does
+// roughly the same but bases the computations on vector permutations
+// instead.
int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
- unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ?
- (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits());
- assert (WideBits > 0 && "Could not compute size of vector");
- int NumWideParts =
- ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+ // Return the ceiling of dividing A by B.
+ auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
+
+ unsigned NumElts = VecTy->getVectorNumElements();
+ assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
+ unsigned VF = NumElts / Factor;
+ unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy));
+ unsigned NumVectorMemOps = getNumVectorRegs(VecTy);
+ unsigned NumPermutes = 0;
+
+ if (Opcode == Instruction::Load) {
+ // Loading interleave groups may have gaps, which may mean fewer
+ // loads. Find out how many vectors will be loaded in total, and in how
+ // many of them each value will be in.
+ BitVector UsedInsts(NumVectorMemOps, false);
+ std::vector<BitVector> ValueVecs(Factor, BitVector(NumVectorMemOps, false));
+ for (unsigned Index : Indices)
+ for (unsigned Elt = 0; Elt < VF; ++Elt) {
+ unsigned Vec = (Index + Elt * Factor) / NumEltsPerVecReg;
+ UsedInsts.set(Vec);
+ ValueVecs[Index].set(Vec);
+ }
+ NumVectorMemOps = UsedInsts.count();
+
+ for (unsigned Index : Indices) {
+ // Estimate that each loaded source vector containing this Index
+ // requires one operation, except that vperm can handle two input
+ // registers first time for each dst vector.
+ unsigned NumSrcVecs = ValueVecs[Index].count();
+ unsigned NumDstVecs = ceil(VF * getScalarSizeInBits(VecTy), 128U);
+ assert (NumSrcVecs >= NumDstVecs && "Expected at least as many sources");
+ NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs);
+ }
+ } else {
+ // Estimate the permutes for each stored vector as the smaller of the
+ // number of elements and the number of source vectors. Subtract one per
+ // dst vector for vperm (S.A.).
+ unsigned NumSrcVecs = std::min(NumEltsPerVecReg, Factor);
+ unsigned NumDstVecs = NumVectorMemOps;
+ assert (NumSrcVecs > 1 && "Expected at least two source vectors.");
+ NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs;
+ }
- // How many source vectors are handled to produce a vectorized operand?
- int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts);
- int NumSrcParts =
- ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts);
+ // Cost of load/store operations and the permutations needed.
+ return NumVectorMemOps + NumPermutes;
+}
- // A Load group may have gaps.
- unsigned NumOperands =
- ((Opcode == Instruction::Load) ? Indices.size() : Factor);
+static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
+ if (RetTy->isVectorTy() && ID == Intrinsic::bswap)
+ return getNumVectorRegs(RetTy); // VPERM
+ return -1;
+}
- // Each needed permute takes two vectors as input.
- if (NumSrcParts > 1)
- NumSrcParts--;
- int NumPermutes = NumSrcParts * NumOperands;
+int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args,
+ FastMathFlags FMF, unsigned VF) {
+ int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+ if (Cost != -1)
+ return Cost;
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+}
- // Cost of load/store operations and the permutations needed.
- return NumWideParts + NumPermutes;
+int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys,
+ FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
+ int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+ if (Cost != -1)
+ return Cost;
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys,
+ FMF, ScalarizationCostPassed);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 4b11a6f0a837..e79bee1ea3a8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -37,6 +37,8 @@ public:
/// \name Scalar TTI Implementations
/// @{
+ unsigned getInliningThresholdMultiplier() { return 3; }
+
int getIntImmCost(const APInt &Imm, Type *Ty);
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
@@ -78,11 +80,14 @@ public:
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+ unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+ const Instruction *I);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
@@ -90,7 +95,16 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
+
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF = 1);
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
/// @}
};
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 6bcf60fafc3e..bb937923b47e 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -45,6 +45,9 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
Mang = new Mangler();
InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(), *Ctx,
TM.getCodeModel() == CodeModel::Large);
+
+ // Reset various EH DWARF encodings.
+ PersonalityEncoding = LSDAEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 092f5ea4104b..39d5705b2a53 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -40,12 +40,7 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
RequireStructuredCFG(false), DefaultOptions(Options), Options(Options) {
}
-TargetMachine::~TargetMachine() {
- delete AsmInfo;
- delete MRI;
- delete MII;
- delete STI;
-}
+TargetMachine::~TargetMachine() = default;
bool TargetMachine::isPositionIndependent() const {
return getRelocationModel() == Reloc::PIC_;
@@ -141,6 +136,15 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
if (GV && GV->hasDLLImportStorageClass())
return false;
+ // On MinGW, variables that haven't been declared with DLLImport may still
+ // end up automatically imported by the linker. To make this feasible,
+ // don't assume the variables to be DSO local unless we actually know
+ // that for sure. This only has to be done for variables; for functions
+ // the linker can insert thunks for calling functions from another DLL.
+ if (TT.isWindowsGNUEnvironment() && GV && GV->isDeclarationForLinker() &&
+ isa<GlobalVariable>(GV))
+ return false;
+
// Every other GV is local on COFF.
// Make an exception for windows OS in the triple: Some firmware builds use
// *-win32-macho triples. This (accidentally?) produced windows relocations
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 37d398d580f8..bae45ae28c45 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -115,6 +115,15 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
case LLVMRelocDynamicNoPic:
RM = Reloc::DynamicNoPIC;
break;
+ case LLVMRelocROPI:
+ RM = Reloc::ROPI;
+ break;
+ case LLVMRelocRWPI:
+ RM = Reloc::RWPI;
+ break;
+ case LLVMRelocROPI_RWPI:
+ RM = Reloc::ROPI_RWPI;
+ break;
default:
break;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 2d92b93ca704..0a5908f43790 100644
--- a/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -18,13 +18,14 @@
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/TargetRegistry.h"
@@ -34,27 +35,10 @@ using namespace llvm;
namespace {
-// We store register types as SimpleValueType to retain SIMD layout
-// information, but must also be able to supply them as the (unnamed)
-// register enum from WebAssemblyRegisterInfo.td/.inc.
-static unsigned MVTToWasmReg(MVT::SimpleValueType Type) {
- switch(Type) {
- case MVT::i32: return WebAssembly::I32_0;
- case MVT::i64: return WebAssembly::I64_0;
- case MVT::f32: return WebAssembly::F32_0;
- case MVT::f64: return WebAssembly::F64_0;
- case MVT::v16i8: return WebAssembly::V128_0;
- case MVT::v8i16: return WebAssembly::V128_0;
- case MVT::v4i32: return WebAssembly::V128_0;
- case MVT::v4f32: return WebAssembly::V128_0;
- default: return MVT::INVALID_SIMPLE_VALUE_TYPE;
- }
-}
-
/// WebAssemblyOperand - Instances of this class represent the operands in a
/// parsed WASM machine instruction.
struct WebAssemblyOperand : public MCParsedAsmOperand {
- enum KindTy { Token, Local, Stack, Integer, Float, Symbol } Kind;
+ enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
SMLoc StartLoc, EndLoc;
@@ -62,19 +46,6 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
StringRef Tok;
};
- struct RegOp {
- // This is a (virtual) local or stack register represented as 0..
- unsigned RegNo;
- // In most targets, the register number also encodes the type, but for
- // wasm we have to track that seperately since we have an unbounded
- // number of registers.
- // This has the unfortunate side effect that we supply a different value
- // to the table-gen matcher at different times in the process (when it
- // calls getReg() or addRegOperands().
- // TODO: While this works, it feels brittle. and would be nice to clean up.
- MVT::SimpleValueType Type;
- };
-
struct IntOp {
int64_t Val;
};
@@ -87,37 +58,45 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
const MCExpr *Exp;
};
+ struct BrLOp {
+ std::vector<unsigned> List;
+ };
+
union {
struct TokOp Tok;
- struct RegOp Reg;
struct IntOp Int;
struct FltOp Flt;
struct SymOp Sym;
+ struct BrLOp BrL;
};
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
- : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
- WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, RegOp R)
- : Kind(K), StartLoc(Start), EndLoc(End), Reg(R) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
- : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
- : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
- : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
+ WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
+ : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
+
+ ~WebAssemblyOperand() {
+ if (isBrList())
+ BrL.~BrLOp();
+ }
bool isToken() const override { return Kind == Token; }
- bool isImm() const override { return Kind == Integer ||
- Kind == Float ||
- Kind == Symbol; }
- bool isReg() const override { return Kind == Local || Kind == Stack; }
+ bool isImm() const override {
+ return Kind == Integer || Kind == Float || Kind == Symbol;
+ }
bool isMem() const override { return false; }
+ bool isReg() const override { return false; }
+ bool isBrList() const { return Kind == BrList; }
unsigned getReg() const override {
- assert(isReg());
- // This is called from the tablegen matcher (MatchInstructionImpl)
- // where it expects to match the type of register, see RegOp above.
- return MVTToWasmReg(Reg.Type);
+ llvm_unreachable("Assembly inspects a register operand");
+ return 0;
}
StringRef getToken() const {
@@ -128,19 +107,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
SMLoc getStartLoc() const override { return StartLoc; }
SMLoc getEndLoc() const override { return EndLoc; }
- void addRegOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- assert(isReg() && "Not a register operand!");
- // This is called from the tablegen matcher (MatchInstructionImpl)
- // where it expects to output the actual register index, see RegOp above.
- unsigned R = Reg.RegNo;
- if (Kind == Stack) {
- // A stack register is represented as a large negative number.
- // See WebAssemblyRegNumbering::runOnMachineFunction and
- // getWARegStackId for why this | is needed.
- R |= INT32_MIN;
- }
- Inst.addOperand(MCOperand::createReg(R));
+ void addRegOperands(MCInst &, unsigned) const {
+ // Required by the assembly matcher.
+ llvm_unreachable("Assembly matcher creates register operands");
}
void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -155,17 +124,17 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
llvm_unreachable("Should be immediate or symbol!");
}
+ void addBrListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && isBrList() && "Invalid BrList!");
+ for (auto Br : BrL.List)
+ Inst.addOperand(MCOperand::createImm(Br));
+ }
+
void print(raw_ostream &OS) const override {
switch (Kind) {
case Token:
OS << "Tok:" << Tok.Tok;
break;
- case Local:
- OS << "Loc:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type);
- break;
- case Stack:
- OS << "Stk:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type);
- break;
case Integer:
OS << "Int:" << Int.Val;
break;
@@ -175,6 +144,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
case Symbol:
OS << "Sym:" << Sym.Exp;
break;
+ case BrList:
+ OS << "BrList:" << BrL.List.size();
+ break;
}
}
};
@@ -182,352 +154,526 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
class WebAssemblyAsmParser final : public MCTargetAsmParser {
MCAsmParser &Parser;
MCAsmLexer &Lexer;
- // These are for the current function being parsed:
- // These are vectors since register assignments are so far non-sparse.
- // Replace by map if necessary.
- std::vector<MVT::SimpleValueType> LocalTypes;
- std::vector<MVT::SimpleValueType> StackTypes;
- MCSymbol *LastLabel;
+
+ // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
+ std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
+
+ // Order of labels, directives and instructions in a .s file have no
+ // syntactical enforcement. This class is a callback from the actual parser,
+ // and yet we have to be feeding data to the streamer in a very particular
+ // order to ensure a correct binary encoding that matches the regular backend
+ // (the streamer does not enforce this). This "state machine" enum helps
+ // guarantee that correct order.
+ enum ParserState {
+ FileStart,
+ Label,
+ FunctionStart,
+ FunctionLocals,
+ Instructions,
+ } CurrentState = FileStart;
+
+ // For ensuring blocks are properly nested.
+ enum NestingType {
+ Function,
+ Block,
+ Loop,
+ Try,
+ If,
+ Else,
+ Undefined,
+ };
+ std::vector<NestingType> NestingStack;
+
+ // We track this to see if a .functype following a label is the same,
+ // as this is how we recognize the start of a function.
+ MCSymbol *LastLabel = nullptr;
public:
- WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
- const MCInstrInfo &mii, const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, sti, mii), Parser(Parser),
- Lexer(Parser.getLexer()), LastLabel(nullptr) {
+ WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
+ Lexer(Parser.getLexer()) {
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
#define GET_ASSEMBLER_HEADER
#include "WebAssemblyGenAsmMatcher.inc"
// TODO: This is required to be implemented, but appears unused.
- bool ParseRegister(unsigned &/*RegNo*/, SMLoc &/*StartLoc*/,
- SMLoc &/*EndLoc*/) override {
+ bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
+ SMLoc & /*EndLoc*/) override {
llvm_unreachable("ParseRegister is not implemented.");
}
- bool Error(const StringRef &msg, const AsmToken &tok) {
- return Parser.Error(tok.getLoc(), msg + tok.getString());
+ bool error(const Twine &Msg, const AsmToken &Tok) {
+ return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
+ }
+
+ bool error(const Twine &Msg) {
+ return Parser.Error(Lexer.getTok().getLoc(), Msg);
+ }
+
+ void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
+ Signatures.push_back(std::move(Sig));
+ }
+
+ std::pair<StringRef, StringRef> nestingString(NestingType NT) {
+ switch (NT) {
+ case Function:
+ return {"function", "end_function"};
+ case Block:
+ return {"block", "end_block"};
+ case Loop:
+ return {"loop", "end_loop"};
+ case Try:
+ return {"try", "end_try"};
+ case If:
+ return {"if", "end_if"};
+ case Else:
+ return {"else", "end_if"};
+ default:
+ llvm_unreachable("unknown NestingType");
+ }
+ }
+
+ void push(NestingType NT) { NestingStack.push_back(NT); }
+
+ bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
+ if (NestingStack.empty())
+ return error(Twine("End of block construct with no start: ") + Ins);
+ auto Top = NestingStack.back();
+ if (Top != NT1 && Top != NT2)
+ return error(Twine("Block construct type mismatch, expected: ") +
+ nestingString(Top).second + ", instead got: " + Ins);
+ NestingStack.pop_back();
+ return false;
+ }
+
+ bool ensureEmptyNestingStack() {
+ auto err = !NestingStack.empty();
+ while (!NestingStack.empty()) {
+ error(Twine("Unmatched block construct(s) at function end: ") +
+ nestingString(NestingStack.back()).first);
+ NestingStack.pop_back();
+ }
+ return err;
}
- bool IsNext(AsmToken::TokenKind Kind) {
- auto ok = Lexer.is(Kind);
- if (ok) Parser.Lex();
- return ok;
+ bool isNext(AsmToken::TokenKind Kind) {
+ auto Ok = Lexer.is(Kind);
+ if (Ok)
+ Parser.Lex();
+ return Ok;
}
- bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
- if (!IsNext(Kind))
- return Error(std::string("Expected ") + KindName + ", instead got: ",
+ bool expect(AsmToken::TokenKind Kind, const char *KindName) {
+ if (!isNext(Kind))
+ return error(std::string("Expected ") + KindName + ", instead got: ",
Lexer.getTok());
return false;
}
- MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
- // Derive type from .param .local decls, or the instruction itself.
- return StringSwitch<MVT::SimpleValueType>(RegType)
- .Case("i32", MVT::i32)
- .Case("i64", MVT::i64)
- .Case("f32", MVT::f32)
- .Case("f64", MVT::f64)
- .Case("i8x16", MVT::v16i8)
- .Case("i16x8", MVT::v8i16)
- .Case("i32x4", MVT::v4i32)
- .Case("f32x4", MVT::v4f32)
- .Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
+ StringRef expectIdent() {
+ if (!Lexer.is(AsmToken::Identifier)) {
+ error("Expected identifier, got: ", Lexer.getTok());
+ return StringRef();
+ }
+ auto Name = Lexer.getTok().getString();
+ Parser.Lex();
+ return Name;
}
- MVT::SimpleValueType &GetType(
- std::vector<MVT::SimpleValueType> &Types, size_t i) {
- Types.resize(std::max(i + 1, Types.size()), MVT::INVALID_SIMPLE_VALUE_TYPE);
- return Types[i];
+ Optional<wasm::ValType> parseType(const StringRef &Type) {
+ // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
+ // "invalid" value.
+ if (Type == "i32")
+ return wasm::ValType::I32;
+ if (Type == "i64")
+ return wasm::ValType::I64;
+ if (Type == "f32")
+ return wasm::ValType::F32;
+ if (Type == "f64")
+ return wasm::ValType::F64;
+ if (Type == "v128" || Type == "i8x16" || Type == "i16x8" ||
+ Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
+ Type == "f64x2")
+ return wasm::ValType::V128;
+ return Optional<wasm::ValType>();
}
- bool ParseReg(OperandVector &Operands, StringRef TypePrefix) {
- if (Lexer.is(AsmToken::Integer)) {
- auto &Local = Lexer.getTok();
- // This is a reference to a local, turn it into a virtual register.
- auto LocalNo = static_cast<unsigned>(Local.getIntVal());
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Local, Local.getLoc(),
- Local.getEndLoc(),
- WebAssemblyOperand::RegOp{LocalNo,
- GetType(LocalTypes, LocalNo)}));
- Parser.Lex();
- } else if (Lexer.is(AsmToken::Identifier)) {
- auto &StackRegTok = Lexer.getTok();
- // These are push/pop/drop pseudo stack registers, which we turn
- // into virtual registers also. The stackify pass will later turn them
- // back into implicit stack references if possible.
- auto StackReg = StackRegTok.getString();
- auto StackOp = StackReg.take_while([](char c) { return isalpha(c); });
- auto Reg = StackReg.drop_front(StackOp.size());
- unsigned long long ParsedRegNo = 0;
- if (!Reg.empty() && getAsUnsignedInteger(Reg, 10, ParsedRegNo))
- return Error("Cannot parse stack register index: ", StackRegTok);
- unsigned RegNo = static_cast<unsigned>(ParsedRegNo);
- if (StackOp == "push") {
- // This defines a result, record register type.
- auto RegType = ParseRegType(TypePrefix);
- GetType(StackTypes, RegNo) = RegType;
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Stack,
- StackRegTok.getLoc(),
- StackRegTok.getEndLoc(),
- WebAssemblyOperand::RegOp{RegNo, RegType}));
- } else if (StackOp == "pop") {
- // This uses a previously defined stack value.
- auto RegType = GetType(StackTypes, RegNo);
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Stack,
- StackRegTok.getLoc(),
- StackRegTok.getEndLoc(),
- WebAssemblyOperand::RegOp{RegNo, RegType}));
- } else if (StackOp == "drop") {
- // This operand will be dropped, since it is part of an instruction
- // whose result is void.
- } else {
- return Error("Unknown stack register prefix: ", StackRegTok);
- }
+ WebAssembly::ExprType parseBlockType(StringRef ID) {
+ return StringSwitch<WebAssembly::ExprType>(ID)
+ .Case("i32", WebAssembly::ExprType::I32)
+ .Case("i64", WebAssembly::ExprType::I64)
+ .Case("f32", WebAssembly::ExprType::F32)
+ .Case("f64", WebAssembly::ExprType::F64)
+ .Case("v128", WebAssembly::ExprType::V128)
+ .Case("except_ref", WebAssembly::ExprType::ExceptRef)
+ .Case("void", WebAssembly::ExprType::Void)
+ .Default(WebAssembly::ExprType::Invalid);
+ }
+
+ bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
+ while (Lexer.is(AsmToken::Identifier)) {
+ auto Type = parseType(Lexer.getTok().getString());
+ if (!Type)
+ return true;
+ Types.push_back(Type.getValue());
Parser.Lex();
- } else {
- return Error(
- "Expected identifier/integer following $, instead got: ",
- Lexer.getTok());
+ if (!isNext(AsmToken::Comma))
+ break;
}
- IsNext(AsmToken::Equal);
return false;
}
- void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
+ void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
auto &Int = Lexer.getTok();
int64_t Val = Int.getIntVal();
- if (IsNegative) Val = -Val;
+ if (IsNegative)
+ Val = -Val;
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, Int.getLoc(),
- Int.getEndLoc(), WebAssemblyOperand::IntOp{Val}));
+ WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
+ WebAssemblyOperand::IntOp{Val}));
Parser.Lex();
}
- bool ParseOperandStartingWithInteger(bool IsNegative,
- OperandVector &Operands,
- StringRef InstType) {
- ParseSingleInteger(IsNegative, Operands);
- if (Lexer.is(AsmToken::LParen)) {
- // Parse load/store operands of the form: offset($reg)align
- auto &LParen = Lexer.getTok();
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token,
- LParen.getLoc(),
- LParen.getEndLoc(),
- WebAssemblyOperand::TokOp{
- LParen.getString()}));
- Parser.Lex();
- if (Expect(AsmToken::Dollar, "register")) return true;
- if (ParseReg(Operands, InstType)) return true;
- auto &RParen = Lexer.getTok();
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token,
- RParen.getLoc(),
- RParen.getEndLoc(),
- WebAssemblyOperand::TokOp{
- RParen.getString()}));
- if (Expect(AsmToken::RParen, ")")) return true;
- if (Lexer.is(AsmToken::Integer)) {
- ParseSingleInteger(false, Operands);
+ bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
+ StringRef InstName) {
+ parseSingleInteger(IsNegative, Operands);
+ // FIXME: there is probably a cleaner way to do this.
+ auto IsLoadStore = InstName.startswith("load") ||
+ InstName.startswith("store") ||
+ InstName.startswith("atomic_load") ||
+ InstName.startswith("atomic_store");
+ if (IsLoadStore) {
+ // Parse load/store operands of the form: offset align
+ auto &Offset = Lexer.getTok();
+ if (Offset.is(AsmToken::Integer)) {
+ parseSingleInteger(false, Operands);
} else {
// Alignment not specified.
// FIXME: correctly derive a default from the instruction.
+ // We can't just call WebAssembly::GetDefaultP2Align since we don't have
+ // an opcode until after the assembly matcher.
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, RParen.getLoc(),
- RParen.getEndLoc(), WebAssemblyOperand::IntOp{0}));
+ WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
+ WebAssemblyOperand::IntOp{0}));
}
}
return false;
}
- bool ParseInstruction(ParseInstructionInfo &/*Info*/, StringRef Name,
+ void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
+ WebAssembly::ExprType BT) {
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Integer, NameLoc, NameLoc,
+ WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
+ }
+
+ bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override {
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token, NameLoc,
- SMLoc::getFromPointer(
- NameLoc.getPointer() + Name.size()),
- WebAssemblyOperand::TokOp{
- StringRef(NameLoc.getPointer(),
- Name.size())}));
+ // Note: Name does NOT point into the sourcecode, but to a local, so
+ // use NameLoc instead.
+ Name = StringRef(NameLoc.getPointer(), Name.size());
+
+ // WebAssembly has instructions with / in them, which AsmLexer parses
+ // as seperate tokens, so if we find such tokens immediately adjacent (no
+ // whitespace), expand the name to include them:
+ for (;;) {
+ auto &Sep = Lexer.getTok();
+ if (Sep.getLoc().getPointer() != Name.end() ||
+ Sep.getKind() != AsmToken::Slash)
+ break;
+ // Extend name with /
+ Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
+ Parser.Lex();
+ // We must now find another identifier, or error.
+ auto &Id = Lexer.getTok();
+ if (Id.getKind() != AsmToken::Identifier ||
+ Id.getLoc().getPointer() != Name.end())
+ return error("Incomplete instruction name: ", Id);
+ Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
+ Parser.Lex();
+ }
+
+ // Now construct the name as first operand.
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
+ WebAssemblyOperand::TokOp{Name}));
auto NamePair = Name.split('.');
// If no '.', there is no type prefix.
- if (NamePair.second.empty()) std::swap(NamePair.first, NamePair.second);
+ auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second;
+
+ // If this instruction is part of a control flow structure, ensure
+ // proper nesting.
+ bool ExpectBlockType = false;
+ if (BaseName == "block") {
+ push(Block);
+ ExpectBlockType = true;
+ } else if (BaseName == "loop") {
+ push(Loop);
+ ExpectBlockType = true;
+ } else if (BaseName == "try") {
+ push(Try);
+ ExpectBlockType = true;
+ } else if (BaseName == "if") {
+ push(If);
+ ExpectBlockType = true;
+ } else if (BaseName == "else") {
+ if (pop(BaseName, If))
+ return true;
+ push(Else);
+ } else if (BaseName == "catch") {
+ if (pop(BaseName, Try))
+ return true;
+ push(Try);
+ } else if (BaseName == "catch_all") {
+ if (pop(BaseName, Try))
+ return true;
+ push(Try);
+ } else if (BaseName == "end_if") {
+ if (pop(BaseName, If, Else))
+ return true;
+ } else if (BaseName == "end_try") {
+ if (pop(BaseName, Try))
+ return true;
+ } else if (BaseName == "end_loop") {
+ if (pop(BaseName, Loop))
+ return true;
+ } else if (BaseName == "end_block") {
+ if (pop(BaseName, Block))
+ return true;
+ } else if (BaseName == "end_function") {
+ if (pop(BaseName, Function) || ensureEmptyNestingStack())
+ return true;
+ }
+
while (Lexer.isNot(AsmToken::EndOfStatement)) {
auto &Tok = Lexer.getTok();
switch (Tok.getKind()) {
- case AsmToken::Dollar: {
- Parser.Lex();
- if (ParseReg(Operands, NamePair.first)) return true;
- break;
- }
case AsmToken::Identifier: {
auto &Id = Lexer.getTok();
- const MCExpr *Val;
- SMLoc End;
- if (Parser.parsePrimaryExpr(Val, End))
- return Error("Cannot parse symbol: ", Lexer.getTok());
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Symbol, Id.getLoc(),
- Id.getEndLoc(), WebAssemblyOperand::SymOp{Val}));
+ if (ExpectBlockType) {
+ // Assume this identifier is a block_type.
+ auto BT = parseBlockType(Id.getString());
+ if (BT == WebAssembly::ExprType::Invalid)
+ return error("Unknown block type: ", Id);
+ addBlockTypeOperand(Operands, NameLoc, BT);
+ Parser.Lex();
+ } else {
+ // Assume this identifier is a label.
+ const MCExpr *Val;
+ SMLoc End;
+ if (Parser.parsePrimaryExpr(Val, End))
+ return error("Cannot parse symbol: ", Lexer.getTok());
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
+ WebAssemblyOperand::SymOp{Val}));
+ }
break;
}
case AsmToken::Minus:
Parser.Lex();
if (Lexer.isNot(AsmToken::Integer))
- return Error("Expected integer instead got: ", Lexer.getTok());
- if (ParseOperandStartingWithInteger(true, Operands, NamePair.first))
+ return error("Expected integer instead got: ", Lexer.getTok());
+ if (parseOperandStartingWithInteger(true, Operands, BaseName))
return true;
break;
case AsmToken::Integer:
- if (ParseOperandStartingWithInteger(false, Operands, NamePair.first))
+ if (parseOperandStartingWithInteger(false, Operands, BaseName))
return true;
break;
case AsmToken::Real: {
double Val;
if (Tok.getString().getAsDouble(Val, false))
- return Error("Cannot parse real: ", Tok);
+ return error("Cannot parse real: ", Tok);
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Float, Tok.getLoc(),
- Tok.getEndLoc(), WebAssemblyOperand::FltOp{Val}));
+ WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
+ WebAssemblyOperand::FltOp{Val}));
+ Parser.Lex();
+ break;
+ }
+ case AsmToken::LCurly: {
Parser.Lex();
+ auto Op = make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
+ if (!Lexer.is(AsmToken::RCurly))
+ for (;;) {
+ Op->BrL.List.push_back(Lexer.getTok().getIntVal());
+ expect(AsmToken::Integer, "integer");
+ if (!isNext(AsmToken::Comma))
+ break;
+ }
+ expect(AsmToken::RCurly, "}");
+ Operands.push_back(std::move(Op));
break;
}
default:
- return Error("Unexpected token in operand: ", Tok);
+ return error("Unexpected token in operand: ", Tok);
}
if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Expect(AsmToken::Comma, ",")) return true;
- }
- }
- Parser.Lex();
- // Call instructions are vararg, but the tablegen matcher doesn't seem to
- // support that, so for now we strip these extra operands.
- // This is problematic if these arguments are not simple $pop stack
- // registers, since e.g. a local register would get lost, so we check for
- // this. This can be the case when using -disable-wasm-explicit-locals
- // which currently s2wasm requires.
- // TODO: Instead, we can move this code to MatchAndEmitInstruction below and
- // actually generate get_local instructions on the fly.
- // Or even better, improve the matcher to support vararg?
- auto IsIndirect = NamePair.second == "call_indirect";
- if (IsIndirect || NamePair.second == "call") {
- // Figure out number of fixed operands from the instruction.
- size_t CallOperands = 1; // The name token.
- if (!IsIndirect) CallOperands++; // The function index.
- if (!NamePair.first.empty()) CallOperands++; // The result register.
- if (Operands.size() > CallOperands) {
- // Ensure operands we drop are all $pop.
- for (size_t I = CallOperands; I < Operands.size(); I++) {
- auto Operand =
- reinterpret_cast<WebAssemblyOperand *>(Operands[I].get());
- if (Operand->Kind != WebAssemblyOperand::Stack)
- Parser.Error(NameLoc,
- "Call instruction has non-stack arguments, if this code was "
- "generated with -disable-wasm-explicit-locals please remove it");
- }
- // Drop unneeded operands.
- Operands.resize(CallOperands);
+ if (expect(AsmToken::Comma, ","))
+ return true;
}
}
- // Block instructions require a signature index, but these are missing in
- // assembly, so we add a dummy one explicitly (since we have no control
- // over signature tables here, we assume these will be regenerated when
- // the wasm module is generated).
- if (NamePair.second == "block" || NamePair.second == "loop") {
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, NameLoc,
- NameLoc, WebAssemblyOperand::IntOp{-1}));
- }
- // These don't specify the type, which has to derived from the local index.
- if (NamePair.second == "get_local" || NamePair.second == "tee_local") {
- if (Operands.size() >= 3 && Operands[1]->isReg() &&
- Operands[2]->isImm()) {
- auto Op1 = reinterpret_cast<WebAssemblyOperand *>(Operands[1].get());
- auto Op2 = reinterpret_cast<WebAssemblyOperand *>(Operands[2].get());
- auto Type = GetType(LocalTypes, static_cast<size_t>(Op2->Int.Val));
- Op1->Reg.Type = Type;
- GetType(StackTypes, Op1->Reg.RegNo) = Type;
- }
+ if (ExpectBlockType && Operands.size() == 1) {
+ // Support blocks with no operands as default to void.
+ addBlockTypeOperand(Operands, NameLoc, WebAssembly::ExprType::Void);
}
+ Parser.Lex();
return false;
}
void onLabelParsed(MCSymbol *Symbol) override {
LastLabel = Symbol;
+ CurrentState = Label;
}
+ bool parseSignature(wasm::WasmSignature *Signature) {
+ if (expect(AsmToken::LParen, "("))
+ return true;
+ if (parseRegTypeList(Signature->Params))
+ return true;
+ if (expect(AsmToken::RParen, ")"))
+ return true;
+ if (expect(AsmToken::MinusGreater, "->"))
+ return true;
+ if (expect(AsmToken::LParen, "("))
+ return true;
+ if (parseRegTypeList(Signature->Returns))
+ return true;
+ if (expect(AsmToken::RParen, ")"))
+ return true;
+ return false;
+ }
+
+ // This function processes wasm-specific directives streamed to
+ // WebAssemblyTargetStreamer, all others go to the generic parser
+ // (see WasmAsmParser).
bool ParseDirective(AsmToken DirectiveID) override {
+ // This function has a really weird return value behavior that is different
+ // from all the other parsing functions:
+ // - return true && no tokens consumed -> don't know this directive / let
+ // the generic parser handle it.
+ // - return true && tokens consumed -> a parsing error occurred.
+ // - return false -> processed this directive successfully.
assert(DirectiveID.getKind() == AsmToken::Identifier);
auto &Out = getStreamer();
- auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
- *Out.getTargetStreamer());
- // TODO: we're just parsing the subset of directives we're interested in,
- // and ignoring ones we don't recognise. We should ideally verify
- // all directives here.
- if (DirectiveID.getString() == ".type") {
- // This could be the start of a function, check if followed by
- // "label,@function"
- if (!(IsNext(AsmToken::Identifier) &&
- IsNext(AsmToken::Comma) &&
- IsNext(AsmToken::At) &&
- Lexer.is(AsmToken::Identifier)))
- return Error("Expected label,@type declaration, got: ", Lexer.getTok());
- if (Lexer.getTok().getString() == "function") {
- // Track locals from start of function.
- LocalTypes.clear();
- StackTypes.clear();
- }
- Parser.Lex();
- //Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
- } else if (DirectiveID.getString() == ".param" ||
- DirectiveID.getString() == ".local") {
- // Track the number of locals, needed for correct virtual register
- // assignment elsewhere.
- // Also output a directive to the streamer.
- std::vector<MVT> Params;
- std::vector<MVT> Locals;
- while (Lexer.is(AsmToken::Identifier)) {
- auto RegType = ParseRegType(Lexer.getTok().getString());
- if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE) return true;
- LocalTypes.push_back(RegType);
- if (DirectiveID.getString() == ".param") {
- Params.push_back(RegType);
- } else {
- Locals.push_back(RegType);
- }
- Parser.Lex();
- if (!IsNext(AsmToken::Comma)) break;
+ auto &TOut =
+ reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
+
+ // TODO: any time we return an error, at least one token must have been
+ // consumed, otherwise this will not signal an error to the caller.
+ if (DirectiveID.getString() == ".globaltype") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ if (expect(AsmToken::Comma, ","))
+ return true;
+ auto TypeTok = Lexer.getTok();
+ auto TypeName = expectIdent();
+ if (TypeName.empty())
+ return true;
+ auto Type = parseType(TypeName);
+ if (!Type)
+ return error("Unknown type in .globaltype directive: ", TypeTok);
+ // Now set this symbol with the correct type.
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ WasmSym->setGlobalType(
+ wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
+ // And emit the directive again.
+ TOut.emitGlobalType(WasmSym);
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".functype") {
+ // This code has to send things to the streamer similar to
+ // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
+ // TODO: would be good to factor this into a common function, but the
+ // assembler and backend really don't share any common code, and this code
+ // parses the locals seperately.
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ if (CurrentState == Label && WasmSym == LastLabel) {
+ // This .functype indicates a start of a function.
+ if (ensureEmptyNestingStack())
+ return true;
+ CurrentState = FunctionStart;
+ push(Function);
}
- assert(LastLabel);
- TOut.emitParam(LastLabel, Params);
+ auto Signature = make_unique<wasm::WasmSignature>();
+ if (parseSignature(Signature.get()))
+ return true;
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ TOut.emitFunctionType(WasmSym);
+ // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".eventtype") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ auto Signature = make_unique<wasm::WasmSignature>();
+ if (parseRegTypeList(Signature->Params))
+ return true;
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
+ TOut.emitEventType(WasmSym);
+ // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".local") {
+ if (CurrentState != FunctionStart)
+ return error(".local directive should follow the start of a function",
+ Lexer.getTok());
+ SmallVector<wasm::ValType, 4> Locals;
+ if (parseRegTypeList(Locals))
+ return true;
TOut.emitLocal(Locals);
- } else {
- // For now, ignore anydirective we don't recognize:
- while (Lexer.isNot(AsmToken::EndOfStatement)) Parser.Lex();
+ CurrentState = FunctionLocals;
+ return expect(AsmToken::EndOfStatement, "EOL");
}
- return Expect(AsmToken::EndOfStatement, "EOL");
+
+ return true; // We didn't process this directive.
}
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &/*Opcode*/,
- OperandVector &Operands,
- MCStreamer &Out, uint64_t &ErrorInfo,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override {
MCInst Inst;
unsigned MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
case Match_Success: {
+ if (CurrentState == FunctionStart) {
+ // This is the first instruction in a function, but we haven't seen
+ // a .local directive yet. The streamer requires locals to be encoded
+ // as a prelude to the instructions, so emit an empty list of locals
+ // here.
+ auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
+ *Out.getTargetStreamer());
+ TOut.emitLocal(SmallVector<wasm::ValType, 0>());
+ }
+ CurrentState = Instructions;
Out.EmitInstruction(Inst, getSTI());
return false;
}
case Match_MissingFeature:
- return Parser.Error(IDLoc,
- "instruction requires a WASM feature not currently enabled");
+ return Parser.Error(
+ IDLoc, "instruction requires a WASM feature not currently enabled");
case Match_MnemonicFail:
return Parser.Error(IDLoc, "invalid instruction");
case Match_NearMisses:
@@ -547,6 +693,8 @@ public:
}
llvm_unreachable("Implement any new match types added!");
}
+
+ void onEndOfFile() override { ensureEmptyNestingStack(); }
};
} // end anonymous namespace
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 2f0960271e30..6acc9b20eed2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -16,7 +16,6 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -37,6 +36,8 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
#include "WebAssemblyGenDisassemblerTables.inc"
namespace {
+static constexpr int WebAssemblyInstructionTableSize = 256;
+
class WebAssemblyDisassembler final : public MCDisassembler {
std::unique_ptr<const MCInstrInfo> MCII;
@@ -75,31 +76,43 @@ static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
return V;
}
-static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, bool Signed) {
+static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
+ bool Signed = false) {
unsigned N = 0;
const char *Error = nullptr;
- auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
- Bytes.data() + Bytes.size(), &Error)
- : static_cast<int64_t>(
- decodeULEB128(Bytes.data() + Size, &N,
- Bytes.data() + Bytes.size(), &Error));
+ Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
+ Bytes.data() + Bytes.size(), &Error)
+ : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
+ Bytes.data() + Bytes.size(),
+ &Error));
if (Error)
return false;
Size += N;
+ return true;
+}
+
+static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, bool Signed) {
+ int64_t Val;
+ if (!nextLEB(Val, Bytes, Size, Signed))
+ return false;
MI.addOperand(MCOperand::createImm(Val));
return true;
}
template <typename T>
-bool parseFPImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
+bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
if (Size + sizeof(T) > Bytes.size())
return false;
T Val;
memcpy(&Val, Bytes.data() + Size, sizeof(T));
support::endian::byte_swap<T, support::endianness::little>(Val);
Size += sizeof(T);
- MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
+ if (std::is_floating_point<T>::value) {
+ MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
+ } else {
+ MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
+ }
return true;
}
@@ -108,7 +121,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
raw_ostream & /*OS*/, raw_ostream &CS) const {
CommentStream = &CS;
Size = 0;
- auto Opc = nextByte(Bytes, Size);
+ int Opc = nextByte(Bytes, Size);
if (Opc < 0)
return MCDisassembler::Fail;
const auto *WasmInst = &InstructionTable0[Opc];
@@ -124,10 +137,12 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
}
if (!WasmInst)
return MCDisassembler::Fail;
- Opc = nextByte(Bytes, Size);
- if (Opc < 0)
+ int64_t PrefixedOpc;
+ if (!nextLEB(PrefixedOpc, Bytes, Size))
return MCDisassembler::Fail;
- WasmInst += Opc;
+ if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
+ return MCDisassembler::Fail;
+ WasmInst += PrefixedOpc;
}
if (WasmInst->ET == ET_Unused)
return MCDisassembler::Fail;
@@ -136,7 +151,8 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MI.setOpcode(WasmInst->Opcode);
// Parse any operands.
for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
- switch (WasmInst->Operands[OPI]) {
+ auto OT = OperandTable[WasmInst->OperandStart + OPI];
+ switch (OT) {
// ULEB operands:
case WebAssembly::OPERAND_BASIC_BLOCK:
case WebAssembly::OPERAND_LOCAL:
@@ -152,32 +168,68 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
}
// SLEB operands:
case WebAssembly::OPERAND_I32IMM:
- case WebAssembly::OPERAND_I64IMM:
- case WebAssembly::OPERAND_SIGNATURE: {
+ case WebAssembly::OPERAND_I64IMM: {
if (!parseLEBImmediate(MI, Size, Bytes, true))
return MCDisassembler::Fail;
break;
}
+ // block_type operands (uint8_t).
+ case WebAssembly::OPERAND_SIGNATURE: {
+ if (!parseImmediate<uint8_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
// FP operands.
case WebAssembly::OPERAND_F32IMM: {
- if (!parseFPImmediate<float>(MI, Size, Bytes))
+ if (!parseImmediate<float>(MI, Size, Bytes))
return MCDisassembler::Fail;
break;
}
case WebAssembly::OPERAND_F64IMM: {
- if (!parseFPImmediate<double>(MI, Size, Bytes))
+ if (!parseImmediate<double>(MI, Size, Bytes))
return MCDisassembler::Fail;
break;
}
- case MCOI::OPERAND_REGISTER: {
- // These are NOT actually in the instruction stream, but MC is going to
- // expect operands to be present for them!
- // FIXME: can MC re-generate register assignments or do we have to
- // do this? Since this function decodes a single instruction, we don't
- // have the proper context for tracking an operand stack here.
- MI.addOperand(MCOperand::createReg(0));
+ // Vector lane operands (not LEB encoded).
+ case WebAssembly::OPERAND_VEC_I8IMM: {
+ if (!parseImmediate<uint8_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_VEC_I16IMM: {
+ if (!parseImmediate<uint16_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_VEC_I32IMM: {
+ if (!parseImmediate<uint32_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_VEC_I64IMM: {
+ if (!parseImmediate<uint64_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_BRLIST: {
+ int64_t TargetTableLen;
+ if (!nextLEB(TargetTableLen, Bytes, Size, false))
+ return MCDisassembler::Fail;
+ for (int64_t I = 0; I < TargetTableLen; I++) {
+ if (!parseLEBImmediate(MI, Size, Bytes, false))
+ return MCDisassembler::Fail;
+ }
+ // Default case.
+ if (!parseLEBImmediate(MI, Size, Bytes, false))
+ return MCDisassembler::Fail;
break;
}
+ case MCOI::OPERAND_REGISTER:
+ // The tablegen header currently does not have any register operands since
+ // we use only the stack (_S) instructions.
+ // If you hit this that probably means a bad instruction definition in
+ // tablegen.
+ llvm_unreachable("Register operand in WebAssemblyDisassembler");
default:
llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 10fa798ac8d7..15532d7ff1a6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -35,12 +35,12 @@ using namespace llvm;
WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI), ControlFlowCounter(0) {}
+ : MCInstPrinter(MAI, MII, MRI) {}
void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
assert(RegNo != WebAssemblyFunctionInfo::UnusedReg);
- // Note that there's an implicit get_local/set_local here!
+ // Note that there's an implicit local.get/local.set here!
OS << "$" << RegNo;
}
@@ -57,9 +57,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
// FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
// we have an extra flags operand which is not currently printed, for
// compatiblity reasons.
- if (i != 0 &&
- (MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID ||
- i != Desc.getNumOperands()))
+ if (i != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID &&
+ MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) ||
+ i != Desc.getNumOperands()))
OS << ", ";
printOperand(MI, i, OS);
}
@@ -70,25 +70,76 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
if (CommentStream) {
// Observe any effects on the control flow stack, for use in annotating
// control flow label references.
- switch (MI->getOpcode()) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
default:
break;
- case WebAssembly::LOOP: {
+
+ case WebAssembly::LOOP:
+ case WebAssembly::LOOP_S:
printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':');
ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true));
break;
- }
+
case WebAssembly::BLOCK:
+ case WebAssembly::BLOCK_S:
ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
break;
+
+ case WebAssembly::TRY:
+ case WebAssembly::TRY_S:
+ ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+ EHPadStack.push_back(EHPadStackCounter++);
+ LastSeenEHInst = TRY;
+ break;
+
case WebAssembly::END_LOOP:
- // Have to guard against an empty stack, in case of mismatched pairs
- // in assembly parsing.
- if (!ControlFlowStack.empty()) ControlFlowStack.pop_back();
+ case WebAssembly::END_LOOP_S:
+ if (ControlFlowStack.empty()) {
+ printAnnotation(OS, "End marker mismatch!");
+ } else {
+ ControlFlowStack.pop_back();
+ }
break;
+
case WebAssembly::END_BLOCK:
- if (!ControlFlowStack.empty()) printAnnotation(
- OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ case WebAssembly::END_BLOCK_S:
+ if (ControlFlowStack.empty()) {
+ printAnnotation(OS, "End marker mismatch!");
+ } else {
+ printAnnotation(
+ OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ }
+ break;
+
+ case WebAssembly::END_TRY:
+ case WebAssembly::END_TRY_S:
+ if (ControlFlowStack.empty()) {
+ printAnnotation(OS, "End marker mismatch!");
+ } else {
+ printAnnotation(
+ OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ LastSeenEHInst = END_TRY;
+ }
+ break;
+
+ case WebAssembly::CATCH_I32:
+ case WebAssembly::CATCH_I32_S:
+ case WebAssembly::CATCH_I64:
+ case WebAssembly::CATCH_I64_S:
+ case WebAssembly::CATCH_ALL:
+ case WebAssembly::CATCH_ALL_S:
+ // There can be multiple catch instructions for one try instruction, so we
+ // print a label only for the first 'catch' label.
+ if (LastSeenEHInst != CATCH) {
+ if (EHPadStack.empty()) {
+ printAnnotation(OS, "try-catch mismatch!");
+ } else {
+ printAnnotation(OS,
+ "catch" + utostr(EHPadStack.pop_back_val()) + ':');
+ }
+ }
+ LastSeenEHInst = CATCH;
break;
}
@@ -96,34 +147,61 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
unsigned NumFixedOperands = Desc.NumOperands;
SmallSet<uint64_t, 8> Printed;
for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- if (!(i < NumFixedOperands
- ? (Desc.OpInfo[i].OperandType ==
- WebAssembly::OPERAND_BASIC_BLOCK)
- : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel)))
- continue;
+ // See if this operand denotes a basic block target.
+ if (i < NumFixedOperands) {
+ // A non-variable_ops operand, check its type.
+ if (Desc.OpInfo[i].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
+ continue;
+ } else {
+ // A variable_ops operand, which currently can be immediates (used in
+ // br_table) which are basic block targets, or for call instructions
+ // when using -wasm-keep-registers (in which case they are registers,
+ // and should not be processed).
+ if (!MI->getOperand(i).isImm())
+ continue;
+ }
uint64_t Depth = MI->getOperand(i).getImm();
if (!Printed.insert(Depth).second)
continue;
- const auto &Pair = ControlFlowStack.rbegin()[Depth];
- printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") +
- " to label" + utostr(Pair.first));
+
+ if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) {
+ if (Depth > EHPadStack.size()) {
+ printAnnotation(OS, "Invalid depth argument!");
+ } else if (Depth == EHPadStack.size()) {
+ // This can happen when rethrow instruction breaks out of all nests
+ // and throws up to the current function's caller.
+ printAnnotation(OS, utostr(Depth) + ": " + "to caller");
+ } else {
+ uint64_t CatchNo = EHPadStack.rbegin()[Depth];
+ printAnnotation(OS, utostr(Depth) + ": " + "down to catch" +
+ utostr(CatchNo));
+ }
+
+ } else {
+ if (Depth >= ControlFlowStack.size()) {
+ printAnnotation(OS, "Invalid depth argument!");
+ } else {
+ const auto &Pair = ControlFlowStack.rbegin()[Depth];
+ printAnnotation(OS, utostr(Depth) + ": " +
+ (Pair.second ? "up" : "down") + " to label" +
+ utostr(Pair.first));
+ }
+ }
}
}
}
static std::string toString(const APFloat &FP) {
// Print NaNs with custom payloads specially.
- if (FP.isNaN() &&
- !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
+ if (FP.isNaN() && !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
!FP.bitwiseIsEqual(
APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
APInt AI = FP.bitcastToAPInt();
- return
- std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
- utohexstr(AI.getZExtValue() &
- (AI.getBitWidth() == 32 ? INT64_C(0x007fffff) :
- INT64_C(0x000fffffffffffff)),
- /*LowerCase=*/true);
+ return std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
+ utohexstr(AI.getZExtValue() &
+ (AI.getBitWidth() == 32 ? INT64_C(0x007fffff)
+ : INT64_C(0x000fffffffffffff)),
+ /*LowerCase=*/true);
}
// Use C99's hexadecimal floating-point representation.
@@ -141,9 +219,6 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
- assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
- MII.get(MI->getOpcode()).TSFlags == 0) &&
- "WebAssembly variable_ops register ops don't use TSFlags");
unsigned WAReg = Op.getReg();
if (int(WAReg) >= 0)
printRegName(O, WAReg);
@@ -157,23 +232,9 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
O << '=';
} else if (Op.isImm()) {
- const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- assert((OpNo < Desc.getNumOperands() ||
- (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate)) &&
- "WebAssemblyII::VariableOpIsImmediate should be set for "
- "variable_ops immediate ops");
- (void)Desc;
- // TODO: (MII.get(MI->getOpcode()).TSFlags &
- // WebAssemblyII::VariableOpImmediateIsLabel)
- // can tell us whether this is an immediate referencing a label in the
- // control flow stack, and it may be nice to pretty-print.
O << Op.getImm();
} else if (Op.isFPImm()) {
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- assert(OpNo < Desc.getNumOperands() &&
- "Unexpected floating-point immediate as a non-fixed operand");
- assert(Desc.TSFlags == 0 &&
- "WebAssembly variable_ops floating point ops don't use TSFlags");
const MCOperandInfo &Info = Desc.OpInfo[OpNo];
if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
// TODO: MC converts all floating point immediate operands to double.
@@ -184,78 +245,66 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << ::toString(APFloat(Op.getFPImm()));
}
} else {
- assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
- (MII.get(MI->getOpcode()).TSFlags &
- WebAssemblyII::VariableOpIsImmediate)) &&
- "WebAssemblyII::VariableOpIsImmediate should be set for "
- "variable_ops expr ops");
assert(Op.isExpr() && "unknown operand kind in printOperand");
Op.getExpr()->print(O, &MAI);
}
}
-void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(
- const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+void WebAssemblyInstPrinter::printBrList(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << "{";
+ for (unsigned I = OpNo, E = MI->getNumOperands(); I != E; ++I) {
+ if (I != OpNo)
+ O << ", ";
+ O << MI->getOperand(I).getImm();
+ }
+ O << "}";
+}
+
+void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
int64_t Imm = MI->getOperand(OpNo).getImm();
if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
return;
O << ":p2align=" << Imm;
}
-void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(
- const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- int64_t Imm = MI->getOperand(OpNo).getImm();
- switch (WebAssembly::ExprType(Imm)) {
- case WebAssembly::ExprType::Void: break;
- case WebAssembly::ExprType::I32: O << "i32"; break;
- case WebAssembly::ExprType::I64: O << "i64"; break;
- case WebAssembly::ExprType::F32: O << "f32"; break;
- case WebAssembly::ExprType::F64: O << "f64"; break;
- case WebAssembly::ExprType::I8x16: O << "i8x16"; break;
- case WebAssembly::ExprType::I16x8: O << "i16x8"; break;
- case WebAssembly::ExprType::I32x4: O << "i32x4"; break;
- case WebAssembly::ExprType::F32x4: O << "f32x4"; break;
- case WebAssembly::ExprType::B8x16: O << "b8x16"; break;
- case WebAssembly::ExprType::B16x8: O << "b16x8"; break;
- case WebAssembly::ExprType::B32x4: O << "b32x4"; break;
- case WebAssembly::ExprType::ExceptRef: O << "except_ref"; break;
- }
+void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ auto Imm = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ if (Imm != wasm::WASM_TYPE_NORESULT)
+ O << WebAssembly::anyTypeToString(Imm);
}
-const char *llvm::WebAssembly::TypeToString(MVT Ty) {
- switch (Ty.SimpleTy) {
- case MVT::i32:
+// We have various enums representing a subset of these types, use this
+// function to convert any of them to text.
+const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
+ switch (Ty) {
+ case wasm::WASM_TYPE_I32:
return "i32";
- case MVT::i64:
+ case wasm::WASM_TYPE_I64:
return "i64";
- case MVT::f32:
+ case wasm::WASM_TYPE_F32:
return "f32";
- case MVT::f64:
+ case wasm::WASM_TYPE_F64:
return "f64";
- case MVT::v16i8:
- case MVT::v8i16:
- case MVT::v4i32:
- case MVT::v4f32:
+ case wasm::WASM_TYPE_V128:
return "v128";
- case MVT::ExceptRef:
+ case wasm::WASM_TYPE_FUNCREF:
+ return "funcref";
+ case wasm::WASM_TYPE_FUNC:
+ return "func";
+ case wasm::WASM_TYPE_EXCEPT_REF:
return "except_ref";
+ case wasm::WASM_TYPE_NORESULT:
+ return "void";
default:
- llvm_unreachable("unsupported type");
+ return "invalid_type";
}
}
-const char *llvm::WebAssembly::TypeToString(wasm::ValType Type) {
- switch (Type) {
- case wasm::ValType::I32:
- return "i32";
- case wasm::ValType::I64:
- return "i64";
- case wasm::ValType::F32:
- return "f32";
- case wasm::ValType::F64:
- return "f64";
- case wasm::ValType::EXCEPT_REF:
- return "except_ref";
- }
- llvm_unreachable("unsupported type");
+const char *llvm::WebAssembly::typeToString(wasm::ValType Ty) {
+ return anyTypeToString(static_cast<unsigned>(Ty));
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index f5b890a7615e..5ad45c7d5c7f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -25,8 +25,13 @@ namespace llvm {
class MCSubtargetInfo;
class WebAssemblyInstPrinter final : public MCInstPrinter {
- uint64_t ControlFlowCounter;
- SmallVector<std::pair<uint64_t, bool>, 0> ControlFlowStack;
+ uint64_t ControlFlowCounter = 0;
+ uint64_t EHPadStackCounter = 0;
+ SmallVector<std::pair<uint64_t, bool>, 4> ControlFlowStack;
+ SmallVector<uint64_t, 4> EHPadStack;
+
+ enum EHInstKind { TRY, CATCH, END_TRY };
+ EHInstKind LastSeenEHInst = END_TRY;
public:
WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
@@ -38,6 +43,7 @@ public:
// Used by tblegen code.
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBrList(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O);
void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
@@ -50,8 +56,8 @@ public:
namespace WebAssembly {
-const char *TypeToString(MVT Ty);
-const char *TypeToString(wasm::ValType Type);
+const char *typeToString(wasm::ValType Ty);
+const char *anyTypeToString(unsigned Ty);
} // end namespace WebAssembly
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 244c2189b455..0726dd481174 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -73,13 +73,13 @@ public:
const MCFixupKindInfo &
WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[WebAssembly::NumTargetFixupKinds] = {
- // This table *must* be in the order that the fixup_* kinds are defined in
- // WebAssemblyFixupKinds.h.
- //
- // Name Offset (bits) Size (bits) Flags
- { "fixup_code_sleb128_i32", 0, 5*8, 0 },
- { "fixup_code_sleb128_i64", 0, 10*8, 0 },
- { "fixup_code_uleb128_i32", 0, 5*8, 0 },
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // WebAssemblyFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_code_sleb128_i32", 0, 5 * 8, 0},
+ {"fixup_code_sleb128_i64", 0, 10 * 8, 0},
+ {"fixup_code_uleb128_i32", 0, 5 * 8, 0},
};
if (Kind < FirstTargetFixupKind)
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
index b0af63c924bd..c2fac5f93a2f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
@@ -15,11 +15,9 @@
namespace llvm {
namespace WebAssembly {
enum Fixups {
- fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
- fixup_code_sleb128_i64, // 64-bit signed
- fixup_code_uleb128_i32, // 32-bit unsigned
-
- fixup_code_global_index, // 32-bit unsigned
+ fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
+ fixup_code_sleb128_i64, // 64-bit signed
+ fixup_code_uleb128_i32, // 32-bit unsigned
// Marker
LastTargetFixupKind,
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 94ca94e1e18c..065a4dc94ca6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -67,13 +67,16 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
OS << uint8_t(Binary);
} else {
assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet");
- OS << uint8_t(Binary >> 8)
- << uint8_t(Binary);
+ OS << uint8_t(Binary >> 8);
+ encodeULEB128(uint8_t(Binary), OS);
}
// For br_table instructions, encode the size of the table. In the MCInst,
- // there's an index operand, one operand for each table entry, and the
- // default operand.
+ // there's an index operand (if not a stack instruction), one operand for
+ // each table entry, and the default operand.
+ if (MI.getOpcode() == WebAssembly::BR_TABLE_I32_S ||
+ MI.getOpcode() == WebAssembly::BR_TABLE_I64_S)
+ encodeULEB128(MI.getNumOperands() - 1, OS);
if (MI.getOpcode() == WebAssembly::BR_TABLE_I32 ||
MI.getOpcode() == WebAssembly::BR_TABLE_I64)
encodeULEB128(MI.getNumOperands() - 2, OS);
@@ -83,36 +86,47 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
/* nothing to encode */
+
} else if (MO.isImm()) {
if (i < Desc.getNumOperands()) {
- assert(Desc.TSFlags == 0 &&
- "WebAssembly non-variable_ops don't use TSFlags");
const MCOperandInfo &Info = Desc.OpInfo[i];
LLVM_DEBUG(dbgs() << "Encoding immediate: type="
<< int(Info.OperandType) << "\n");
- if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+ switch (Info.OperandType) {
+ case WebAssembly::OPERAND_I32IMM:
encodeSLEB128(int32_t(MO.getImm()), OS);
- } else if (Info.OperandType == WebAssembly::OPERAND_OFFSET32) {
+ break;
+ case WebAssembly::OPERAND_OFFSET32:
encodeULEB128(uint32_t(MO.getImm()), OS);
- } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+ break;
+ case WebAssembly::OPERAND_I64IMM:
encodeSLEB128(int64_t(MO.getImm()), OS);
- } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
- llvm_unreachable("wasm globals should only be accessed symbolicly");
- } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) {
+ break;
+ case WebAssembly::OPERAND_SIGNATURE:
OS << uint8_t(MO.getImm());
- } else {
+ break;
+ case WebAssembly::OPERAND_VEC_I8IMM:
+ support::endian::write<uint8_t>(OS, MO.getImm(), support::little);
+ break;
+ case WebAssembly::OPERAND_VEC_I16IMM:
+ support::endian::write<uint16_t>(OS, MO.getImm(), support::little);
+ break;
+ case WebAssembly::OPERAND_VEC_I32IMM:
+ support::endian::write<uint32_t>(OS, MO.getImm(), support::little);
+ break;
+ case WebAssembly::OPERAND_VEC_I64IMM:
+ support::endian::write<uint64_t>(OS, MO.getImm(), support::little);
+ break;
+ case WebAssembly::OPERAND_GLOBAL:
+ llvm_unreachable("wasm globals should only be accessed symbolicly");
+ default:
encodeULEB128(uint64_t(MO.getImm()), OS);
}
} else {
- assert(Desc.TSFlags == (WebAssemblyII::VariableOpIsImmediate |
- WebAssemblyII::VariableOpImmediateIsLabel));
encodeULEB128(uint64_t(MO.getImm()), OS);
}
+
} else if (MO.isFPImm()) {
- assert(i < Desc.getNumOperands() &&
- "Unexpected floating-point immediate as a non-fixed operand");
- assert(Desc.TSFlags == 0 &&
- "WebAssembly variable_ops floating point ops don't use TSFlags");
const MCOperandInfo &Info = Desc.OpInfo[i];
if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
// TODO: MC converts all floating point immediate operands to double.
@@ -124,27 +138,31 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
double d = MO.getFPImm();
support::endian::write<double>(OS, d, support::little);
}
+
} else if (MO.isExpr()) {
const MCOperandInfo &Info = Desc.OpInfo[i];
llvm::MCFixupKind FixupKind;
size_t PaddedSize = 5;
- if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+ switch (Info.OperandType) {
+ case WebAssembly::OPERAND_I32IMM:
FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i32);
- } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+ break;
+ case WebAssembly::OPERAND_I64IMM:
FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i64);
PaddedSize = 10;
- } else if (Info.OperandType == WebAssembly::OPERAND_FUNCTION32 ||
- Info.OperandType == WebAssembly::OPERAND_OFFSET32 ||
- Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
+ break;
+ case WebAssembly::OPERAND_FUNCTION32:
+ case WebAssembly::OPERAND_OFFSET32:
+ case WebAssembly::OPERAND_TYPEINDEX:
+ case WebAssembly::OPERAND_GLOBAL:
+ case WebAssembly::OPERAND_EVENT:
FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
- } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
- FixupKind = MCFixupKind(WebAssembly::fixup_code_global_index);
- } else {
+ break;
+ default:
llvm_unreachable("unexpected symbolic operand kind");
}
- Fixups.push_back(MCFixup::create(
- OS.tell() - Start, MO.getExpr(),
- FixupKind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(OS.tell() - Start, MO.getExpr(),
+ FixupKind, MI.getLoc()));
++MCNumFixups;
encodeULEB128(0, OS, PaddedSize);
} else {
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index baf8a0c96c0a..390f367c2978 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -90,6 +90,10 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
return new WebAssemblyTargetAsmStreamer(S, OS);
}
+static MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) {
+ return new WebAssemblyTargetNullStreamer(S);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeWebAssemblyTargetMC() {
for (Target *T :
@@ -120,16 +124,31 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
createObjectTargetStreamer);
// Register the asm target streamer.
TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+ // Register the null target streamer.
+ TargetRegistry::RegisterNullTargetStreamer(*T, createNullTargetStreamer);
}
}
wasm::ValType WebAssembly::toValType(const MVT &Ty) {
switch (Ty.SimpleTy) {
- case MVT::i32: return wasm::ValType::I32;
- case MVT::i64: return wasm::ValType::I64;
- case MVT::f32: return wasm::ValType::F32;
- case MVT::f64: return wasm::ValType::F64;
- case MVT::ExceptRef: return wasm::ValType::EXCEPT_REF;
- default: llvm_unreachable("unexpected type");
+ case MVT::i32:
+ return wasm::ValType::I32;
+ case MVT::i64:
+ return wasm::ValType::I64;
+ case MVT::f32:
+ return wasm::ValType::F32;
+ case MVT::f64:
+ return wasm::ValType::F64;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return wasm::ValType::V128;
+ case MVT::ExceptRef:
+ return wasm::ValType::EXCEPT_REF;
+ default:
+ llvm_unreachable("unexpected type");
}
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index c1c8d243e920..a01517fb90c3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -59,6 +59,14 @@ enum OperandType {
OPERAND_F32IMM,
/// 64-bit floating-point immediates.
OPERAND_F64IMM,
+ /// 8-bit vector lane immediate
+ OPERAND_VEC_I8IMM,
+ /// 16-bit vector lane immediate
+ OPERAND_VEC_I16IMM,
+ /// 32-bit vector lane immediate
+ OPERAND_VEC_I32IMM,
+ /// 64-bit vector lane immediate
+ OPERAND_VEC_I64IMM,
/// 32-bit unsigned function indices.
OPERAND_FUNCTION32,
/// 32-bit unsigned memory offsets.
@@ -69,17 +77,24 @@ enum OperandType {
OPERAND_SIGNATURE,
/// type signature immediate for call_indirect.
OPERAND_TYPEINDEX,
+ /// Event index.
+ OPERAND_EVENT,
+ /// A list of branch targets for br_list.
+ OPERAND_BRLIST,
};
} // end namespace WebAssembly
namespace WebAssemblyII {
-enum {
- // For variadic instructions, this flag indicates whether an operand
- // in the variable_ops range is an immediate value.
- VariableOpIsImmediate = (1 << 0),
- // For immediate values in the variable_ops range, this flag indicates
- // whether the value represents a control-flow label.
- VariableOpImmediateIsLabel = (1 << 1)
+
+/// Target Operand Flag enum.
+enum TOF {
+ MO_NO_FLAG = 0,
+
+ // Flags to indicate the type of the symbol being referenced
+ MO_SYMBOL_FUNCTION = 0x1,
+ MO_SYMBOL_GLOBAL = 0x2,
+ MO_SYMBOL_EVENT = 0x4,
+ MO_SYMBOL_MASK = 0x7,
};
} // end namespace WebAssemblyII
@@ -149,6 +164,10 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S:
return 0;
case WebAssembly::LOAD16_S_I32:
case WebAssembly::LOAD16_S_I32_S:
@@ -194,6 +213,10 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S:
return 1;
case WebAssembly::LOAD_I32:
case WebAssembly::LOAD_I32_S:
@@ -241,6 +264,14 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::ATOMIC_RMW_XCHG_I32_S:
case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I32_S:
+ case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64_S:
+ case WebAssembly::ATOMIC_NOTIFY:
+ case WebAssembly::ATOMIC_NOTIFY_S:
+ case WebAssembly::ATOMIC_WAIT_I32:
+ case WebAssembly::ATOMIC_WAIT_I32_S:
return 2;
case WebAssembly::LOAD_I64:
case WebAssembly::LOAD_I64_S:
@@ -266,7 +297,36 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::ATOMIC_RMW_XOR_I64_S:
case WebAssembly::ATOMIC_RMW_XCHG_I64:
case WebAssembly::ATOMIC_RMW_XCHG_I64_S:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S:
+ case WebAssembly::ATOMIC_WAIT_I64:
+ case WebAssembly::ATOMIC_WAIT_I64_S:
return 3;
+ case WebAssembly::LOAD_v16i8:
+ case WebAssembly::LOAD_v16i8_S:
+ case WebAssembly::LOAD_v8i16:
+ case WebAssembly::LOAD_v8i16_S:
+ case WebAssembly::LOAD_v4i32:
+ case WebAssembly::LOAD_v4i32_S:
+ case WebAssembly::LOAD_v2i64:
+ case WebAssembly::LOAD_v2i64_S:
+ case WebAssembly::LOAD_v4f32:
+ case WebAssembly::LOAD_v4f32_S:
+ case WebAssembly::LOAD_v2f64:
+ case WebAssembly::LOAD_v2f64_S:
+ case WebAssembly::STORE_v16i8:
+ case WebAssembly::STORE_v16i8_S:
+ case WebAssembly::STORE_v8i16:
+ case WebAssembly::STORE_v8i16_S:
+ case WebAssembly::STORE_v4i32:
+ case WebAssembly::STORE_v4i32_S:
+ case WebAssembly::STORE_v2i64:
+ case WebAssembly::STORE_v2i64_S:
+ case WebAssembly::STORE_v4f32:
+ case WebAssembly::STORE_v4f32_S:
+ case WebAssembly::STORE_v2f64:
+ case WebAssembly::STORE_v2f64_S:
+ return 4;
default:
llvm_unreachable("Only loads and stores have p2align values");
}
@@ -282,19 +342,14 @@ static const unsigned StoreP2AlignOperandNo = 0;
/// This is used to indicate block signatures.
enum class ExprType : unsigned {
- Void = 0x40,
- I32 = 0x7F,
- I64 = 0x7E,
- F32 = 0x7D,
- F64 = 0x7C,
- I8x16 = 0x7B,
- I16x8 = 0x7A,
- I32x4 = 0x79,
- F32x4 = 0x78,
- B8x16 = 0x77,
- B16x8 = 0x76,
- B32x4 = 0x75,
- ExceptRef = 0x68
+ Void = 0x40,
+ I32 = 0x7F,
+ I64 = 0x7E,
+ F32 = 0x7D,
+ F64 = 0x7C,
+ V128 = 0x7B,
+ ExceptRef = 0x68,
+ Invalid = 0x00
};
/// Instruction opcodes emitted via means other than CodeGen.
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 5272e188e1d0..50143fb0ece3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -39,70 +39,80 @@ WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
WebAssemblyTargetWasmStreamer::WebAssemblyTargetWasmStreamer(MCStreamer &S)
: WebAssemblyTargetStreamer(S) {}
-static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
+static void printTypes(formatted_raw_ostream &OS,
+ ArrayRef<wasm::ValType> Types) {
bool First = true;
- for (MVT Type : Types) {
+ for (auto Type : Types) {
if (First)
First = false;
else
OS << ", ";
- OS << WebAssembly::TypeToString(Type);
+ OS << WebAssembly::typeToString(Type);
}
OS << '\n';
}
-void WebAssemblyTargetAsmStreamer::emitParam(MCSymbol *Symbol,
- ArrayRef<MVT> Types) {
+void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
if (!Types.empty()) {
- OS << "\t.param \t";
-
- // FIXME: Currently this applies to the "current" function; it may
- // be cleaner to specify an explicit symbol as part of the directive.
-
- PrintTypes(OS, Types);
+ OS << "\t.local \t";
+ printTypes(OS, Types);
}
}
-void WebAssemblyTargetAsmStreamer::emitResult(MCSymbol *Symbol,
- ArrayRef<MVT> Types) {
- if (!Types.empty()) {
- OS << "\t.result \t";
+void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
- // FIXME: Currently this applies to the "current" function; it may
- // be cleaner to specify an explicit symbol as part of the directive.
+void WebAssemblyTargetAsmStreamer::emitSignature(
+ const wasm::WasmSignature *Sig) {
+ OS << "(";
+ emitParamList(Sig);
+ OS << ") -> (";
+ emitReturnList(Sig);
+ OS << ")";
+}
- PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitParamList(
+ const wasm::WasmSignature *Sig) {
+ auto &Params = Sig->Params;
+ for (auto &Ty : Params) {
+ if (&Ty != &Params[0])
+ OS << ", ";
+ OS << WebAssembly::typeToString(Ty);
}
}
-void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
- if (!Types.empty()) {
- OS << "\t.local \t";
- PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitReturnList(
+ const wasm::WasmSignature *Sig) {
+ auto &Returns = Sig->Returns;
+ for (auto &Ty : Returns) {
+ if (&Ty != &Returns[0])
+ OS << ", ";
+ OS << WebAssembly::typeToString(Ty);
}
}
-void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
+void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) {
+ assert(Sym->isFunction());
+ OS << "\t.functype\t" << Sym->getName() << " ";
+ emitSignature(Sym->getSignature());
+ OS << "\n";
+}
-void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
- MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
- OS << "\t.functype\t" << Symbol->getName();
- if (Results.empty())
- OS << ", void";
- else {
- assert(Results.size() == 1);
- OS << ", " << WebAssembly::TypeToString(Results.front());
- }
- for (auto Ty : Params)
- OS << ", " << WebAssembly::TypeToString(Ty);
- OS << '\n';
+void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) {
+ assert(Sym->isGlobal());
+ OS << "\t.globaltype\t" << Sym->getName() << ", "
+ << WebAssembly::typeToString(
+ static_cast<wasm::ValType>(Sym->getGlobalType().Type))
+ << '\n';
}
-void WebAssemblyTargetAsmStreamer::emitGlobalImport(StringRef name) {
- OS << "\t.import_global\t" << name << '\n';
+void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
+ assert(Sym->isEvent());
+ OS << "\t.eventtype\t" << Sym->getName() << " ";
+ emitParamList(Sym->getSignature());
+ OS << "\n";
}
-void WebAssemblyTargetAsmStreamer::emitImportModule(MCSymbolWasm *Sym,
+void WebAssemblyTargetAsmStreamer::emitImportModule(const MCSymbolWasm *Sym,
StringRef ModuleName) {
OS << "\t.import_module\t" << Sym->getName() << ", " << ModuleName << '\n';
}
@@ -111,27 +121,9 @@ void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
OS << "\t.indidx \t" << *Value << '\n';
}
-void WebAssemblyTargetWasmStreamer::emitParam(MCSymbol *Symbol,
- ArrayRef<MVT> Types) {
- SmallVector<wasm::ValType, 4> Params;
- for (MVT Ty : Types)
- Params.push_back(WebAssembly::toValType(Ty));
-
- cast<MCSymbolWasm>(Symbol)->setParams(std::move(Params));
-}
-
-void WebAssemblyTargetWasmStreamer::emitResult(MCSymbol *Symbol,
- ArrayRef<MVT> Types) {
- SmallVector<wasm::ValType, 4> Returns;
- for (MVT Ty : Types)
- Returns.push_back(WebAssembly::toValType(Ty));
-
- cast<MCSymbolWasm>(Symbol)->setReturns(std::move(Returns));
-}
-
-void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<MVT> Types) {
- SmallVector<std::pair<MVT, uint32_t>, 4> Grouped;
- for (MVT Type : Types) {
+void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
+ SmallVector<std::pair<wasm::ValType, uint32_t>, 4> Grouped;
+ for (auto Type : Types) {
if (Grouped.empty() || Grouped.back().first != Type)
Grouped.push_back(std::make_pair(Type, 1));
else
@@ -141,7 +133,7 @@ void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<MVT> Types) {
Streamer.EmitULEB128IntValue(Grouped.size());
for (auto Pair : Grouped) {
Streamer.EmitULEB128IntValue(Pair.second);
- emitValueType(WebAssembly::toValType(Pair.first));
+ emitValueType(Pair.first);
}
}
@@ -152,34 +144,3 @@ void WebAssemblyTargetWasmStreamer::emitEndFunc() {
void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) {
llvm_unreachable(".indidx encoding not yet implemented");
}
-
-void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType(
- MCSymbol *Symbol, SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) {
- MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Symbol);
- if (WasmSym->isFunction()) {
- // Symbol already has its arguments and result set.
- return;
- }
-
- SmallVector<wasm::ValType, 4> ValParams;
- for (MVT Ty : Params)
- ValParams.push_back(WebAssembly::toValType(Ty));
-
- SmallVector<wasm::ValType, 1> ValResults;
- for (MVT Ty : Results)
- ValResults.push_back(WebAssembly::toValType(Ty));
-
- WasmSym->setParams(std::move(ValParams));
- WasmSym->setReturns(std::move(ValResults));
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
-}
-
-void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) {
- llvm_unreachable(".global_import is not needed for direct wasm output");
-}
-
-void WebAssemblyTargetWasmStreamer::emitImportModule(MCSymbolWasm *Sym,
- StringRef ModuleName) {
- Sym->setModuleName(ModuleName);
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index cafcb04ccd11..3073938118b4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -31,24 +31,21 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer {
public:
explicit WebAssemblyTargetStreamer(MCStreamer &S);
- /// .param
- virtual void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
- /// .result
- virtual void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
/// .local
- virtual void emitLocal(ArrayRef<MVT> Types) = 0;
+ virtual void emitLocal(ArrayRef<wasm::ValType> Types) = 0;
/// .endfunc
virtual void emitEndFunc() = 0;
/// .functype
- virtual void emitIndirectFunctionType(MCSymbol *Symbol,
- SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) = 0;
+ virtual void emitFunctionType(const MCSymbolWasm *Sym) = 0;
/// .indidx
virtual void emitIndIdx(const MCExpr *Value) = 0;
- /// .import_global
- virtual void emitGlobalImport(StringRef name) = 0;
+ /// .globaltype
+ virtual void emitGlobalType(const MCSymbolWasm *Sym) = 0;
+ /// .eventtype
+ virtual void emitEventType(const MCSymbolWasm *Sym) = 0;
/// .import_module
- virtual void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) = 0;
+ virtual void emitImportModule(const MCSymbolWasm *Sym,
+ StringRef ModuleName) = 0;
protected:
void emitValueType(wasm::ValType Type);
@@ -57,20 +54,20 @@ protected:
/// This part is for ascii assembly output
class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
formatted_raw_ostream &OS;
+ void emitSignature(const wasm::WasmSignature *Sig);
+ void emitParamList(const wasm::WasmSignature *Sig);
+ void emitReturnList(const wasm::WasmSignature *Sig);
public:
WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
- void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
- void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
- void emitLocal(ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<wasm::ValType> Types) override;
void emitEndFunc() override;
- void emitIndirectFunctionType(MCSymbol *Symbol,
- SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) override;
+ void emitFunctionType(const MCSymbolWasm *Sym) override;
void emitIndIdx(const MCExpr *Value) override;
- void emitGlobalImport(StringRef name) override;
- void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) override;
+ void emitGlobalType(const MCSymbolWasm *Sym) override;
+ void emitEventType(const MCSymbolWasm *Sym) override;
+ void emitImportModule(const MCSymbolWasm *Sym, StringRef ModuleName) override;
};
/// This part is for Wasm object output
@@ -78,16 +75,29 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer {
public:
explicit WebAssemblyTargetWasmStreamer(MCStreamer &S);
- void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
- void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
- void emitLocal(ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<wasm::ValType> Types) override;
void emitEndFunc() override;
- void emitIndirectFunctionType(MCSymbol *Symbol,
- SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) override;
+ void emitFunctionType(const MCSymbolWasm *Sym) override {}
void emitIndIdx(const MCExpr *Value) override;
- void emitGlobalImport(StringRef name) override;
- void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) override;
+ void emitGlobalType(const MCSymbolWasm *Sym) override {}
+ void emitEventType(const MCSymbolWasm *Sym) override {}
+ void emitImportModule(const MCSymbolWasm *Sym,
+ StringRef ModuleName) override {}
+};
+
+/// This part is for null output
+class WebAssemblyTargetNullStreamer final : public WebAssemblyTargetStreamer {
+public:
+ explicit WebAssemblyTargetNullStreamer(MCStreamer &S)
+ : WebAssemblyTargetStreamer(S) {}
+
+ void emitLocal(ArrayRef<wasm::ValType>) override {}
+ void emitEndFunc() override {}
+ void emitFunctionType(const MCSymbolWasm *) override {}
+ void emitIndIdx(const MCExpr *) override {}
+ void emitGlobalType(const MCSymbolWasm *) override {}
+ void emitEventType(const MCSymbolWasm *) override {}
+ void emitImportModule(const MCSymbolWasm *, StringRef) override {}
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 4fb12d40b01b..763e30be8e02 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -81,16 +81,23 @@ static const MCSection *GetFixupSection(const MCExpr *Expr) {
return nullptr;
}
-unsigned
-WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
- const MCFixup &Fixup) const {
+static bool IsGlobalType(const MCValue &Target) {
+ const MCSymbolRefExpr *RefA = Target.getSymA();
+ return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_GLOBAL;
+}
+
+static bool IsEventType(const MCValue &Target) {
+ const MCSymbolRefExpr *RefA = Target.getSymA();
+ return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_EVENT;
+}
+
+unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
+ const MCFixup &Fixup) const {
// WebAssembly functions are not allocated in the data address space. To
// resolve a pointer to a function, we must use a special relocation type.
bool IsFunction = IsFunctionExpr(Fixup.getValue());
switch (unsigned(Fixup.getKind())) {
- case WebAssembly::fixup_code_global_index:
- return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
case WebAssembly::fixup_code_sleb128_i32:
if (IsFunction)
return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
@@ -98,10 +105,14 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
case WebAssembly::fixup_code_sleb128_i64:
llvm_unreachable("fixup_sleb128_i64 not implemented yet");
case WebAssembly::fixup_code_uleb128_i32:
+ if (IsGlobalType(Target))
+ return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
if (IsFunctionType(Target))
return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB;
if (IsFunction)
return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
+ if (IsEventType(Target))
+ return wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB;
return wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB;
case FK_Data_4:
if (IsFunction)
diff --git a/contrib/llvm/lib/Target/WebAssembly/README.txt b/contrib/llvm/lib/Target/WebAssembly/README.txt
index ef0099f07efb..a154b4bf7ea8 100644
--- a/contrib/llvm/lib/Target/WebAssembly/README.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/README.txt
@@ -94,10 +94,10 @@ WebAssemblyTargetLowering.
//===---------------------------------------------------------------------===//
Instead of the OptimizeReturned pass, which should consider preserving the
-"returned" attribute through to MachineInstrs and extending the StoreResults
-pass to do this optimization on calls too. That would also let the
-WebAssemblyPeephole pass clean up dead defs for such calls, as it does for
-stores.
+"returned" attribute through to MachineInstrs and extending the
+MemIntrinsicResults pass to do this optimization on calls too. That would also
+let the WebAssemblyPeephole pass clean up dead defs for such calls, as it does
+for stores.
//===---------------------------------------------------------------------===//
@@ -120,8 +120,8 @@ code like this:
It could be done with a smaller encoding like this:
i32.const $push5=, 0
- tee_local $push6=, $4=, $pop5
- copy_local $3=, $pop6
+ local.tee $push6=, $4=, $pop5
+ local.copy $3=, $pop6
//===---------------------------------------------------------------------===//
@@ -180,11 +180,11 @@ floating-point constants.
//===---------------------------------------------------------------------===//
The function @dynamic_alloca_redzone in test/CodeGen/WebAssembly/userstack.ll
-ends up with a tee_local in its prolog which has an unused result, requiring
+ends up with a local.tee in its prolog which has an unused result, requiring
an extra drop:
- get_global $push8=, 0
- tee_local $push9=, 1, $pop8
+ global.get $push8=, 0
+ local.tee $push9=, 1, $pop8
drop $pop9
[...]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
index 05b7b21fb597..45145c0a6527 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -39,10 +39,11 @@ FunctionPass *createWebAssemblyArgumentMove();
FunctionPass *createWebAssemblySetP2AlignOperands();
// Late passes.
+FunctionPass *createWebAssemblyEHRestoreStackPointer();
FunctionPass *createWebAssemblyReplacePhysRegs();
FunctionPass *createWebAssemblyPrepareForLiveIntervals();
FunctionPass *createWebAssemblyOptimizeLiveIntervals();
-FunctionPass *createWebAssemblyStoreResults();
+FunctionPass *createWebAssemblyMemIntrinsicResults();
FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
FunctionPass *createWebAssemblyExplicitLocals();
@@ -63,10 +64,11 @@ void initializeFixFunctionBitcastsPass(PassRegistry &);
void initializeOptimizeReturnedPass(PassRegistry &);
void initializeWebAssemblyArgumentMovePass(PassRegistry &);
void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &);
+void initializeWebAssemblyEHRestoreStackPointerPass(PassRegistry &);
void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &);
void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &);
void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &);
-void initializeWebAssemblyStoreResultsPass(PassRegistry &);
+void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &);
void initializeWebAssemblyRegStackifyPass(PassRegistry &);
void initializeWebAssemblyRegColoringPass(PassRegistry &);
void initializeWebAssemblyExplicitLocalsPass(PassRegistry &);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
index 2f301da8e422..6b218f8aa880 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -23,8 +23,15 @@ include "llvm/Target/Target.td"
// WebAssembly Subtarget features.
//===----------------------------------------------------------------------===//
-def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "true",
+def FeatureSIMD128 : SubtargetFeature<"simd128", "SIMDLevel", "SIMD128",
"Enable 128-bit SIMD">;
+
+def FeatureUnimplementedSIMD128 :
+ SubtargetFeature<"unimplemented-simd128",
+ "SIMDLevel", "UnimplementedSIMD128",
+ "Enable 128-bit SIMD not yet implemented in engines",
+ [FeatureSIMD128]>;
+
def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
"Enable Atomics">;
def FeatureNontrappingFPToInt :
@@ -71,7 +78,8 @@ def : ProcessorModel<"generic", NoSchedModel, []>;
// Latest and greatest experimental version of WebAssembly. Bugs included!
def : ProcessorModel<"bleeding-edge", NoSchedModel,
- [FeatureSIMD128, FeatureAtomics]>;
+ [FeatureSIMD128, FeatureAtomics,
+ FeatureNontrappingFPToInt, FeatureSignExt]>;
//===----------------------------------------------------------------------===//
// Target Declaration
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
index 4af9cd150bf7..e49e2b67f435 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
@@ -24,10 +24,10 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-add-missing-prototypes"
@@ -60,16 +60,17 @@ ModulePass *llvm::createWebAssemblyAddMissingPrototypes() {
}
bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
- LLVM_DEBUG(dbgs() << "runnning AddMissingPrototypes\n");
+ LLVM_DEBUG(dbgs() << "********** Add Missing Prototypes **********\n");
- std::vector<std::pair<Function*, Function*>> Replacements;
+ std::vector<std::pair<Function *, Function *>> Replacements;
// Find all the prototype-less function declarations
for (Function &F : M) {
if (!F.isDeclaration() || !F.hasFnAttribute("no-prototype"))
continue;
- LLVM_DEBUG(dbgs() << "Found no-prototype function: " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Found no-prototype function: " << F.getName()
+ << "\n");
// When clang emits prototype-less C functions it uses (...), i.e. varargs
// function that take no arguments (have no sentinel). When we see a
@@ -83,23 +84,29 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
"Functions with 'no-prototype' attribute should not have params: " +
F.getName());
-
// Create a function prototype based on the first call site (first bitcast)
// that we find.
FunctionType *NewType = nullptr;
- Function* NewF = nullptr;
+ Function *NewF = nullptr;
for (Use &U : F.uses()) {
LLVM_DEBUG(dbgs() << "prototype-less use: " << F.getName() << "\n");
- if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser())) {
- FunctionType *DestType =
- cast<FunctionType>(BC->getDestTy()->getPointerElementType());
-
- // Create a new function with the correct type
- NewType = DestType;
- NewF = Function::Create(NewType, F.getLinkage(), F.getName());
- NewF->setAttributes(F.getAttributes());
- NewF->removeFnAttr("no-prototype");
- break;
+ if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) {
+ if (auto *DestType = dyn_cast<FunctionType>(
+ BC->getDestTy()->getPointerElementType())) {
+ if (!NewType) {
+ // Create a new function with the correct type
+ NewType = DestType;
+ NewF = Function::Create(NewType, F.getLinkage(), F.getName());
+ NewF->setAttributes(F.getAttributes());
+ NewF->removeFnAttr("no-prototype");
+ } else {
+ if (NewType != DestType) {
+ report_fatal_error("Prototypeless function used with "
+ "conflicting signatures: " +
+ F.getName());
+ }
+ }
+ }
}
}
@@ -110,32 +117,42 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
continue;
}
- for (Use &U : F.uses()) {
- if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser())) {
- FunctionType *DestType =
- cast<FunctionType>(BC->getDestTy()->getPointerElementType());
- if (NewType != DestType) {
- report_fatal_error(
- "Prototypeless function used with conflicting signatures: " +
- F.getName());
- }
- BC->replaceAllUsesWith(NewF);
- Replacements.emplace_back(&F, NewF);
- } else {
- dbgs() << *U.getUser()->getType() << "\n";
+ SmallVector<Instruction *, 4> DeadInsts;
+
+ for (Use &US : F.uses()) {
+ User *U = US.getUser();
+ if (auto *BC = dyn_cast<BitCastOperator>(U)) {
+ if (auto *Inst = dyn_cast<BitCastInst>(U)) {
+ // Replace with a new bitcast
+ IRBuilder<> Builder(Inst);
+ Value *NewCast = Builder.CreatePointerCast(NewF, BC->getDestTy());
+ Inst->replaceAllUsesWith(NewCast);
+ DeadInsts.push_back(Inst);
+ } else if (auto *Const = dyn_cast<ConstantExpr>(U)) {
+ Constant *NewConst =
+ ConstantExpr::getPointerCast(NewF, BC->getDestTy());
+ Const->replaceAllUsesWith(NewConst);
+ } else {
+ dbgs() << *U->getType() << "\n";
#ifndef NDEBUG
- U.getUser()->dump();
+ U->dump();
#endif
- report_fatal_error(
- "unexpected use of prototypeless function: " + F.getName() + "\n");
+ report_fatal_error("unexpected use of prototypeless function: " +
+ F.getName() + "\n");
+ }
}
}
+
+ for (auto I : DeadInsts)
+ I->eraseFromParent();
+ Replacements.emplace_back(&F, NewF);
}
+
// Finally replace the old function declarations with the new ones
for (auto &Pair : Replacements) {
- Function* Old = Pair.first;
- Function* New = Pair.second;
+ Function *Old = Pair.first;
+ Function *New = Pair.second;
Old->eraseFromParent();
M.getFunctionList().push_back(New);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 1f280e1d13fc..c4f03dfa7f9e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -50,7 +50,7 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16,
- MVT::v4i32, MVT::v4f32})
+ MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64})
if (TRI->isTypeLegalForClass(*TRC, T))
return T;
LLVM_DEBUG(errs() << "Unknown type for register number: " << RegNo);
@@ -78,24 +78,45 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
//===----------------------------------------------------------------------===//
void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ for (auto &It : OutContext.getSymbols()) {
+ // Emit a .globaltype and .eventtype declaration.
+ auto Sym = cast<MCSymbolWasm>(It.getValue());
+ if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_GLOBAL)
+ getTargetStreamer()->emitGlobalType(Sym);
+ else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_EVENT)
+ getTargetStreamer()->emitEventType(Sym);
+ }
+
for (const auto &F : M) {
// Emit function type info for all undefined functions
if (F.isDeclarationForLinker() && !F.isIntrinsic()) {
SmallVector<MVT, 4> Results;
SmallVector<MVT, 4> Params;
- ComputeSignatureVTs(F, TM, Params, Results);
- MCSymbol *Sym = getSymbol(&F);
- getTargetStreamer()->emitIndirectFunctionType(Sym, Params, Results);
+ ComputeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
+ auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ if (!Sym->getSignature()) {
+ auto Signature = SignatureFromMVTs(Results, Params);
+ Sym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ }
+ // FIXME: this was originally intended for post-linking and was only used
+ // for imports that were only called indirectly (i.e. s2wasm could not
+ // infer the type from a call). With object files it applies to all
+ // imports. so fix the names and the tests, or rethink how import
+ // delcarations work in asm files.
+ getTargetStreamer()->emitFunctionType(Sym);
if (TM.getTargetTriple().isOSBinFormatWasm() &&
F.hasFnAttribute("wasm-import-module")) {
- MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
- StringRef Name = F.getFnAttribute("wasm-import-module")
- .getValueAsString();
- getTargetStreamer()->emitImportModule(WasmSym, Name);
+ StringRef Name =
+ F.getFnAttribute("wasm-import-module").getValueAsString();
+ Sym->setModuleName(Name);
+ getTargetStreamer()->emitImportModule(Sym, Name);
}
}
}
+
for (const auto &G : M.globals()) {
if (!G.hasInitializer() && G.hasExternalLinkage()) {
if (G.getValueType()->isSized()) {
@@ -137,10 +158,18 @@ void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
}
void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
- getTargetStreamer()->emitParam(CurrentFnSym, MFI->getParams());
-
- SmallVector<MVT, 4> ResultVTs;
const Function &F = MF->getFunction();
+ SmallVector<MVT, 1> ResultVTs;
+ SmallVector<MVT, 4> ParamVTs;
+ ComputeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs);
+ auto Signature = SignatureFromMVTs(ResultVTs, ParamVTs);
+ auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym);
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+
+ // FIXME: clean up how params and results are emitted (use signatures)
+ getTargetStreamer()->emitFunctionType(WasmSym);
// Emit the function index.
if (MDNode *Idx = F.getMetadata("wasm.index")) {
@@ -150,16 +179,9 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
cast<ConstantAsMetadata>(Idx->getOperand(0))->getValue()));
}
- ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
-
- // If the return type needs to be legalized it will get converted into
- // passing a pointer.
- if (ResultVTs.size() == 1)
- getTargetStreamer()->emitResult(CurrentFnSym, ResultVTs);
- else
- getTargetStreamer()->emitResult(CurrentFnSym, ArrayRef<MVT>());
-
- getTargetStreamer()->emitLocal(MFI->getLocals());
+ SmallVector<wasm::ValType, 16> Locals;
+ ValTypesFromMVTs(MFI->getLocals(), Locals);
+ getTargetStreamer()->emitLocal(Locals);
AsmPrinter::EmitFunctionBodyStart();
}
@@ -168,42 +190,63 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
switch (MI->getOpcode()) {
- case WebAssembly::ARGUMENT_I32:
- case WebAssembly::ARGUMENT_I64:
- case WebAssembly::ARGUMENT_F32:
- case WebAssembly::ARGUMENT_F64:
+ case WebAssembly::ARGUMENT_i32:
+ case WebAssembly::ARGUMENT_i32_S:
+ case WebAssembly::ARGUMENT_i64:
+ case WebAssembly::ARGUMENT_i64_S:
+ case WebAssembly::ARGUMENT_f32:
+ case WebAssembly::ARGUMENT_f32_S:
+ case WebAssembly::ARGUMENT_f64:
+ case WebAssembly::ARGUMENT_f64_S:
case WebAssembly::ARGUMENT_v16i8:
+ case WebAssembly::ARGUMENT_v16i8_S:
case WebAssembly::ARGUMENT_v8i16:
+ case WebAssembly::ARGUMENT_v8i16_S:
case WebAssembly::ARGUMENT_v4i32:
+ case WebAssembly::ARGUMENT_v4i32_S:
+ case WebAssembly::ARGUMENT_v2i64:
+ case WebAssembly::ARGUMENT_v2i64_S:
case WebAssembly::ARGUMENT_v4f32:
+ case WebAssembly::ARGUMENT_v4f32_S:
+ case WebAssembly::ARGUMENT_v2f64:
+ case WebAssembly::ARGUMENT_v2f64_S:
// These represent values which are live into the function entry, so there's
// no instruction to emit.
break;
case WebAssembly::FALLTHROUGH_RETURN_I32:
+ case WebAssembly::FALLTHROUGH_RETURN_I32_S:
case WebAssembly::FALLTHROUGH_RETURN_I64:
+ case WebAssembly::FALLTHROUGH_RETURN_I64_S:
case WebAssembly::FALLTHROUGH_RETURN_F32:
+ case WebAssembly::FALLTHROUGH_RETURN_F32_S:
case WebAssembly::FALLTHROUGH_RETURN_F64:
+ case WebAssembly::FALLTHROUGH_RETURN_F64_S:
case WebAssembly::FALLTHROUGH_RETURN_v16i8:
+ case WebAssembly::FALLTHROUGH_RETURN_v16i8_S:
case WebAssembly::FALLTHROUGH_RETURN_v8i16:
+ case WebAssembly::FALLTHROUGH_RETURN_v8i16_S:
case WebAssembly::FALLTHROUGH_RETURN_v4i32:
- case WebAssembly::FALLTHROUGH_RETURN_v4f32: {
+ case WebAssembly::FALLTHROUGH_RETURN_v4i32_S:
+ case WebAssembly::FALLTHROUGH_RETURN_v2i64:
+ case WebAssembly::FALLTHROUGH_RETURN_v2i64_S:
+ case WebAssembly::FALLTHROUGH_RETURN_v4f32:
+ case WebAssembly::FALLTHROUGH_RETURN_v4f32_S:
+ case WebAssembly::FALLTHROUGH_RETURN_v2f64:
+ case WebAssembly::FALLTHROUGH_RETURN_v2f64_S: {
// These instructions represent the implicit return at the end of a
- // function body. The operand is always a pop.
- assert(MFI->isVRegStackified(MI->getOperand(0).getReg()));
-
+ // function body. Always pops one value off the stack.
if (isVerbose()) {
- OutStreamer->AddComment("fallthrough-return: $pop" +
- Twine(MFI->getWARegStackId(
- MFI->getWAReg(MI->getOperand(0).getReg()))));
+ OutStreamer->AddComment("fallthrough-return-value");
OutStreamer->AddBlankLine();
}
break;
}
case WebAssembly::FALLTHROUGH_RETURN_VOID:
+ case WebAssembly::FALLTHROUGH_RETURN_VOID_S:
// This instruction represents the implicit return at the end of a
// function body with no return value.
if (isVerbose()) {
- OutStreamer->AddComment("fallthrough-return");
+ OutStreamer->AddComment("fallthrough-return-void");
OutStreamer->AddBlankLine();
}
break;
@@ -244,6 +287,9 @@ bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
OS << MO.getImm();
return false;
case MachineOperand::MO_Register:
+ // FIXME: only opcode that still contains registers, as required by
+ // MachineInstr::getDebugVariable().
+ assert(MI->getOpcode() == WebAssembly::INLINEASM);
OS << regToString(MO);
return false;
case MachineOperand::MO_GlobalAddress:
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index 23817b4e5126..f6cb5610bad3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -25,18 +25,23 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter {
const WebAssemblySubtarget *Subtarget;
const MachineRegisterInfo *MRI;
WebAssemblyFunctionInfo *MFI;
+ // TODO: Do the uniquing of Signatures here instead of ObjectFileWriter?
+ std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
public:
explicit WebAssemblyAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)),
- Subtarget(nullptr), MRI(nullptr), MFI(nullptr) {}
+ : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr), MRI(nullptr),
+ MFI(nullptr) {}
StringRef getPassName() const override {
return "WebAssembly Assembly Printer";
}
const WebAssemblySubtarget &getSubtarget() const { return *Subtarget; }
+ void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
+ Signatures.push_back(std::move(Sig));
+ }
//===------------------------------------------------------------------===//
// MachineFunctionPass Implementation.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 267a51433cd1..fc827e9d5780 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -11,14 +11,15 @@
/// This file implements a CFG sorting pass.
///
/// This pass reorders the blocks in a function to put them into topological
-/// order, ignoring loop backedges, and without any loop being interrupted
-/// by a block not dominated by the loop header, with special care to keep the
-/// order as similar as possible to the original order.
+/// order, ignoring loop backedges, and without any loop or exception being
+/// interrupted by a block not dominated by the its header, with special care
+/// to keep the order as similar as possible to the original order.
///
////===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyExceptionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
#include "llvm/ADT/PriorityQueue.h"
@@ -35,6 +36,73 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-cfg-sort"
namespace {
+
+// Wrapper for loops and exceptions
+class Region {
+public:
+ virtual ~Region() = default;
+ virtual MachineBasicBlock *getHeader() const = 0;
+ virtual bool contains(const MachineBasicBlock *MBB) const = 0;
+ virtual unsigned getNumBlocks() const = 0;
+ using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
+ virtual iterator_range<block_iterator> blocks() const = 0;
+ virtual bool isLoop() const = 0;
+};
+
+template <typename T> class ConcreteRegion : public Region {
+ const T *Region;
+
+public:
+ ConcreteRegion(const T *Region) : Region(Region) {}
+ MachineBasicBlock *getHeader() const override { return Region->getHeader(); }
+ bool contains(const MachineBasicBlock *MBB) const override {
+ return Region->contains(MBB);
+ }
+ unsigned getNumBlocks() const override { return Region->getNumBlocks(); }
+ iterator_range<block_iterator> blocks() const override {
+ return Region->blocks();
+ }
+ bool isLoop() const override { return false; }
+};
+
+template <> bool ConcreteRegion<MachineLoop>::isLoop() const { return true; }
+
+// This class has information of nested Regions; this is analogous to what
+// LoopInfo is for loops.
+class RegionInfo {
+ const MachineLoopInfo &MLI;
+ const WebAssemblyExceptionInfo &WEI;
+ std::vector<const Region *> Regions;
+ DenseMap<const MachineLoop *, std::unique_ptr<Region>> LoopMap;
+ DenseMap<const WebAssemblyException *, std::unique_ptr<Region>> ExceptionMap;
+
+public:
+ RegionInfo(const MachineLoopInfo &MLI, const WebAssemblyExceptionInfo &WEI)
+ : MLI(MLI), WEI(WEI) {}
+
+ // Returns a smallest loop or exception that contains MBB
+ const Region *getRegionFor(const MachineBasicBlock *MBB) {
+ const auto *ML = MLI.getLoopFor(MBB);
+ const auto *WE = WEI.getExceptionFor(MBB);
+ if (!ML && !WE)
+ return nullptr;
+ if ((ML && !WE) || (ML && WE && ML->getNumBlocks() < WE->getNumBlocks())) {
+ // If the smallest region containing MBB is a loop
+ if (LoopMap.count(ML))
+ return LoopMap[ML].get();
+ LoopMap[ML] = llvm::make_unique<ConcreteRegion<MachineLoop>>(ML);
+ return LoopMap[ML].get();
+ } else {
+ // If the smallest region containing MBB is an exception
+ if (ExceptionMap.count(WE))
+ return ExceptionMap[WE].get();
+ ExceptionMap[WE] =
+ llvm::make_unique<ConcreteRegion<WebAssemblyException>>(WE);
+ return ExceptionMap[WE].get();
+ }
+ }
+};
+
class WebAssemblyCFGSort final : public MachineFunctionPass {
StringRef getPassName() const override { return "WebAssembly CFG Sort"; }
@@ -44,6 +112,8 @@ class WebAssemblyCFGSort final : public MachineFunctionPass {
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<WebAssemblyExceptionInfo>();
+ AU.addPreserved<WebAssemblyExceptionInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -81,10 +151,48 @@ static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
}
namespace {
+// EH pads are selected first regardless of the block comparison order.
+// When only one of the BBs is an EH pad, we give a higher priority to it, to
+// prevent common mismatches between possibly throwing calls and ehpads they
+// unwind to, as in the example below:
+//
+// bb0:
+// call @foo // If this throws, unwind to bb2
+// bb1:
+// call @bar // If this throws, unwind to bb3
+// bb2 (ehpad):
+// handler_bb2
+// bb3 (ehpad):
+// handler_bb3
+// continuing code
+//
+// Because this pass tries to preserve the original BB order, this order will
+// not change. But this will result in this try-catch structure in CFGStackify,
+// resulting in a mismatch:
+// try
+// try
+// call @foo
+// call @bar // This should unwind to bb3, not bb2!
+// catch
+// handler_bb2
+// end
+// catch
+// handler_bb3
+// end
+// continuing code
+//
+// If we give a higher priority to an EH pad whenever it is ready in this
+// example, when both bb1 and bb2 are ready, we would pick up bb2 first.
+
/// Sort blocks by their number.
struct CompareBlockNumbers {
bool operator()(const MachineBasicBlock *A,
const MachineBasicBlock *B) const {
+ if (A->isEHPad() && !B->isEHPad())
+ return false;
+ if (!A->isEHPad() && B->isEHPad())
+ return true;
+
return A->getNumber() > B->getNumber();
}
};
@@ -92,29 +200,36 @@ struct CompareBlockNumbers {
struct CompareBlockNumbersBackwards {
bool operator()(const MachineBasicBlock *A,
const MachineBasicBlock *B) const {
+ // We give a higher priority to an EH pad
+ if (A->isEHPad() && !B->isEHPad())
+ return false;
+ if (!A->isEHPad() && B->isEHPad())
+ return true;
+
return A->getNumber() < B->getNumber();
}
};
-/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated
-/// by the loop header among the loop's blocks.
+/// Bookkeeping for a region to help ensure that we don't mix blocks not
+/// dominated by the its header among its blocks.
struct Entry {
- const MachineLoop *Loop;
+ const Region *TheRegion;
unsigned NumBlocksLeft;
/// List of blocks not dominated by Loop's header that are deferred until
/// after all of Loop's blocks have been seen.
std::vector<MachineBasicBlock *> Deferred;
- explicit Entry(const MachineLoop *L)
- : Loop(L), NumBlocksLeft(L->getNumBlocks()) {}
+ explicit Entry(const class Region *R)
+ : TheRegion(R), NumBlocksLeft(R->getNumBlocks()) {}
};
} // end anonymous namespace
-/// Sort the blocks, taking special care to make sure that loops are not
+/// Sort the blocks, taking special care to make sure that regions are not
/// interrupted by blocks not dominated by their header.
/// TODO: There are many opportunities for improving the heuristics here.
/// Explore them.
static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const WebAssemblyExceptionInfo &WEI,
const MachineDominatorTree &MDT) {
// Prepare for a topological sort: Record the number of predecessors each
// block has, ignoring loop backedges.
@@ -131,35 +246,39 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
}
// Topological sort the CFG, with additional constraints:
- // - Between a loop header and the last block in the loop, there can be
- // no blocks not dominated by the loop header.
+ // - Between a region header and the last block in the region, there can be
+ // no blocks not dominated by its header.
// - It's desirable to preserve the original block order when possible.
// We use two ready lists; Preferred and Ready. Preferred has recently
// processed successors, to help preserve block sequences from the original
- // order. Ready has the remaining ready blocks.
+ // order. Ready has the remaining ready blocks. EH blocks are picked first
+ // from both queues.
PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
CompareBlockNumbers>
Preferred;
PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
CompareBlockNumbersBackwards>
Ready;
- SmallVector<Entry, 4> Loops;
+
+ RegionInfo SUI(MLI, WEI);
+ SmallVector<Entry, 4> Entries;
for (MachineBasicBlock *MBB = &MF.front();;) {
- const MachineLoop *L = MLI.getLoopFor(MBB);
- if (L) {
- // If MBB is a loop header, add it to the active loop list. We can't put
- // any blocks that it doesn't dominate until we see the end of the loop.
- if (L->getHeader() == MBB)
- Loops.push_back(Entry(L));
- // For each active loop the block is in, decrement the count. If MBB is
- // the last block in an active loop, take it off the list and pick up any
- // blocks deferred because the header didn't dominate them.
- for (Entry &E : Loops)
- if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0)
+ const Region *R = SUI.getRegionFor(MBB);
+ if (R) {
+ // If MBB is a region header, add it to the active region list. We can't
+ // put any blocks that it doesn't dominate until we see the end of the
+ // region.
+ if (R->getHeader() == MBB)
+ Entries.push_back(Entry(R));
+ // For each active region the block is in, decrement the count. If MBB is
+ // the last block in an active region, take it off the list and pick up
+ // any blocks deferred because the header didn't dominate them.
+ for (Entry &E : Entries)
+ if (E.TheRegion->contains(MBB) && --E.NumBlocksLeft == 0)
for (auto DeferredBlock : E.Deferred)
Ready.push(DeferredBlock);
- while (!Loops.empty() && Loops.back().NumBlocksLeft == 0)
- Loops.pop_back();
+ while (!Entries.empty() && Entries.back().NumBlocksLeft == 0)
+ Entries.pop_back();
}
// The main topological sort logic.
for (MachineBasicBlock *Succ : MBB->successors()) {
@@ -177,19 +296,19 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
while (!Preferred.empty()) {
Next = Preferred.top();
Preferred.pop();
- // If X isn't dominated by the top active loop header, defer it until that
- // loop is done.
- if (!Loops.empty() &&
- !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
- Loops.back().Deferred.push_back(Next);
+ // If X isn't dominated by the top active region header, defer it until
+ // that region is done.
+ if (!Entries.empty() &&
+ !MDT.dominates(Entries.back().TheRegion->getHeader(), Next)) {
+ Entries.back().Deferred.push_back(Next);
Next = nullptr;
continue;
}
// If Next was originally ordered before MBB, and it isn't because it was
// loop-rotated above the header, it's not preferred.
if (Next->getNumber() < MBB->getNumber() &&
- (!L || !L->contains(Next) ||
- L->getHeader()->getNumber() < Next->getNumber())) {
+ (!R || !R->contains(Next) ||
+ R->getHeader()->getNumber() < Next->getNumber())) {
Ready.push(Next);
Next = nullptr;
continue;
@@ -207,11 +326,11 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
for (;;) {
Next = Ready.top();
Ready.pop();
- // If Next isn't dominated by the top active loop header, defer it until
- // that loop is done.
- if (!Loops.empty() &&
- !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
- Loops.back().Deferred.push_back(Next);
+ // If Next isn't dominated by the top active region header, defer it
+ // until that region is done.
+ if (!Entries.empty() &&
+ !MDT.dominates(Entries.back().TheRegion->getHeader(), Next)) {
+ Entries.back().Deferred.push_back(Next);
continue;
}
break;
@@ -222,11 +341,11 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
MaybeUpdateTerminator(MBB);
MBB = Next;
}
- assert(Loops.empty() && "Active loop list not finished");
+ assert(Entries.empty() && "Active sort region list not finished");
MF.RenumberBlocks();
#ifndef NDEBUG
- SmallSetVector<MachineLoop *, 8> OnStack;
+ SmallSetVector<const Region *, 8> OnStack;
// Insert a sentinel representing the degenerate loop that starts at the
// function entry block and includes the entire function as a "loop" that
@@ -235,29 +354,39 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
for (auto &MBB : MF) {
assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
+ const Region *Region = SUI.getRegionFor(&MBB);
+
+ if (Region && &MBB == Region->getHeader()) {
+ if (Region->isLoop()) {
+ // Loop header. The loop predecessor should be sorted above, and the
+ // other predecessors should be backedges below.
+ for (auto Pred : MBB.predecessors())
+ assert(
+ (Pred->getNumber() < MBB.getNumber() || Region->contains(Pred)) &&
+ "Loop header predecessors must be loop predecessors or "
+ "backedges");
+ } else {
+ // Not a loop header. All predecessors should be sorted above.
+ for (auto Pred : MBB.predecessors())
+ assert(Pred->getNumber() < MBB.getNumber() &&
+ "Non-loop-header predecessors should be topologically sorted");
+ }
+ assert(OnStack.insert(Region) &&
+ "Regions should be declared at most once.");
- MachineLoop *Loop = MLI.getLoopFor(&MBB);
- if (Loop && &MBB == Loop->getHeader()) {
- // Loop header. The loop predecessor should be sorted above, and the other
- // predecessors should be backedges below.
- for (auto Pred : MBB.predecessors())
- assert(
- (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
- "Loop header predecessors must be loop predecessors or backedges");
- assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
} else {
// Not a loop header. All predecessors should be sorted above.
for (auto Pred : MBB.predecessors())
assert(Pred->getNumber() < MBB.getNumber() &&
"Non-loop-header predecessors should be topologically sorted");
- assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
- "Blocks must be nested in their loops");
+ assert(OnStack.count(SUI.getRegionFor(&MBB)) &&
+ "Blocks must be nested in their regions");
}
while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back()))
OnStack.pop_back();
}
assert(OnStack.pop_back_val() == nullptr &&
- "The function entry block shouldn't actually be a loop header");
+ "The function entry block shouldn't actually be a region header");
assert(OnStack.empty() &&
"Control flow stack pushes and pops should be balanced.");
#endif
@@ -269,12 +398,13 @@ bool WebAssemblyCFGSort::runOnMachineFunction(MachineFunction &MF) {
<< MF.getName() << '\n');
const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
auto &MDT = getAnalysis<MachineDominatorTree>();
// Liveness is not tracked for VALUE_STACK physreg.
MF.getRegInfo().invalidateLiveness();
- // Sort the blocks, with contiguous loops.
- SortBlocks(MF, MLI, MDT);
+ // Sort the blocks, with contiguous sort regions.
+ SortBlocks(MF, MLI, WEI, MDT);
return true;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 70ce40cefed7..f8f5f4040c86 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -10,16 +10,21 @@
/// \file
/// This file implements a CFG stacking pass.
///
-/// This pass inserts BLOCK and LOOP markers to mark the start of scopes, since
-/// scope boundaries serve as the labels for WebAssembly's control transfers.
+/// This pass inserts BLOCK, LOOP, and TRY markers to mark the start of scopes,
+/// since scope boundaries serve as the labels for WebAssembly's control
+/// transfers.
///
/// This is sufficient to convert arbitrary CFGs into a form that works on
/// WebAssembly, provided that all loops are single-entry.
///
+/// In case we use exceptions, this pass also fixes mismatches in unwind
+/// destinations created during transforming CFG into wasm structured format.
+///
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyExceptionInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
@@ -29,6 +34,8 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -40,26 +47,57 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
StringRef getPassName() const override { return "WebAssembly CFG Stackify"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<WebAssemblyExceptionInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
+ // For each block whose label represents the end of a scope, record the block
+ // which holds the beginning of the scope. This will allow us to quickly skip
+ // over scoped regions when walking blocks.
+ SmallVector<MachineBasicBlock *, 8> ScopeTops;
+
+ void placeMarkers(MachineFunction &MF);
+ void placeBlockMarker(MachineBasicBlock &MBB);
+ void placeLoopMarker(MachineBasicBlock &MBB);
+ void placeTryMarker(MachineBasicBlock &MBB);
+ void rewriteDepthImmediates(MachineFunction &MF);
+ void fixEndsAtEndOfFunction(MachineFunction &MF);
+
+ // For each BLOCK|LOOP|TRY, the corresponding END_(BLOCK|LOOP|TRY).
+ DenseMap<const MachineInstr *, MachineInstr *> BeginToEnd;
+ // For each END_(BLOCK|LOOP|TRY), the corresponding BLOCK|LOOP|TRY.
+ DenseMap<const MachineInstr *, MachineInstr *> EndToBegin;
+ // <TRY marker, EH pad> map
+ DenseMap<const MachineInstr *, MachineBasicBlock *> TryToEHPad;
+ // <EH pad, TRY marker> map
+ DenseMap<const MachineBasicBlock *, MachineInstr *> EHPadToTry;
+ // <LOOP|TRY marker, Loop/exception bottom BB> map
+ DenseMap<const MachineInstr *, MachineBasicBlock *> BeginToBottom;
+
+ // Helper functions to register scope information created by marker
+ // instructions.
+ void registerScope(MachineInstr *Begin, MachineInstr *End);
+ void registerTryScope(MachineInstr *Begin, MachineInstr *End,
+ MachineBasicBlock *EHPad);
+
+ MachineBasicBlock *getBottom(const MachineInstr *Begin);
+
public:
static char ID; // Pass identification, replacement for typeid
WebAssemblyCFGStackify() : MachineFunctionPass(ID) {}
+ ~WebAssemblyCFGStackify() override { releaseMemory(); }
+ void releaseMemory() override;
};
} // end anonymous namespace
char WebAssemblyCFGStackify::ID = 0;
INITIALIZE_PASS(WebAssemblyCFGStackify, DEBUG_TYPE,
- "Insert BLOCK and LOOP markers for WebAssembly scopes",
- false, false)
+ "Insert BLOCK and LOOP markers for WebAssembly scopes", false,
+ false)
FunctionPass *llvm::createWebAssemblyCFGStackify() {
return new WebAssemblyCFGStackify();
@@ -73,34 +111,121 @@ FunctionPass *llvm::createWebAssemblyCFGStackify() {
static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
MachineBasicBlock *MBB) {
for (MachineInstr &MI : Pred->terminators())
- for (MachineOperand &MO : MI.explicit_operands())
- if (MO.isMBB() && MO.getMBB() == MBB)
- return true;
+ // Even if a rethrow takes a BB argument, it is not a branch
+ if (!WebAssembly::isRethrow(MI))
+ for (MachineOperand &MO : MI.explicit_operands())
+ if (MO.isMBB() && MO.getMBB() == MBB)
+ return true;
return false;
}
+// Returns an iterator to the earliest position possible within the MBB,
+// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
+// contains instructions that should go before the marker, and AfterSet contains
+// ones that should go after the marker. In this function, AfterSet is only
+// used for sanity checking.
+static MachineBasicBlock::iterator
+GetEarliestInsertPos(MachineBasicBlock *MBB,
+ const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
+ const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
+ auto InsertPos = MBB->end();
+ while (InsertPos != MBB->begin()) {
+ if (BeforeSet.count(&*std::prev(InsertPos))) {
+#ifndef NDEBUG
+ // Sanity check
+ for (auto Pos = InsertPos, E = MBB->begin(); Pos != E; --Pos)
+ assert(!AfterSet.count(&*std::prev(Pos)));
+#endif
+ break;
+ }
+ --InsertPos;
+ }
+ return InsertPos;
+}
+
+// Returns an iterator to the latest position possible within the MBB,
+// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
+// contains instructions that should go before the marker, and AfterSet contains
+// ones that should go after the marker. In this function, BeforeSet is only
+// used for sanity checking.
+static MachineBasicBlock::iterator
+GetLatestInsertPos(MachineBasicBlock *MBB,
+ const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
+ const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
+ auto InsertPos = MBB->begin();
+ while (InsertPos != MBB->end()) {
+ if (AfterSet.count(&*InsertPos)) {
+#ifndef NDEBUG
+ // Sanity check
+ for (auto Pos = InsertPos, E = MBB->end(); Pos != E; ++Pos)
+ assert(!BeforeSet.count(&*Pos));
+#endif
+ break;
+ }
+ ++InsertPos;
+ }
+ return InsertPos;
+}
+
+void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin,
+ MachineInstr *End) {
+ BeginToEnd[Begin] = End;
+ EndToBegin[End] = Begin;
+}
+
+void WebAssemblyCFGStackify::registerTryScope(MachineInstr *Begin,
+ MachineInstr *End,
+ MachineBasicBlock *EHPad) {
+ registerScope(Begin, End);
+ TryToEHPad[Begin] = EHPad;
+ EHPadToTry[EHPad] = Begin;
+}
+
+// Given a LOOP/TRY marker, returns its bottom BB. Use cached information if any
+// to prevent recomputation.
+MachineBasicBlock *
+WebAssemblyCFGStackify::getBottom(const MachineInstr *Begin) {
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
+ if (BeginToBottom.count(Begin))
+ return BeginToBottom[Begin];
+ if (Begin->getOpcode() == WebAssembly::LOOP) {
+ MachineLoop *L = MLI.getLoopFor(Begin->getParent());
+ assert(L);
+ BeginToBottom[Begin] = WebAssembly::getBottom(L);
+ } else if (Begin->getOpcode() == WebAssembly::TRY) {
+ WebAssemblyException *WE = WEI.getExceptionFor(TryToEHPad[Begin]);
+ assert(WE);
+ BeginToBottom[Begin] = WebAssembly::getBottom(WE);
+ } else
+ assert(false);
+ return BeginToBottom[Begin];
+}
+
/// Insert a BLOCK marker for branches to MBB (if needed).
-static void PlaceBlockMarker(
- MachineBasicBlock &MBB, MachineFunction &MF,
- SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
- DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
- DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
- const WebAssemblyInstrInfo &TII,
- const MachineLoopInfo &MLI,
- MachineDominatorTree &MDT,
- WebAssemblyFunctionInfo &MFI) {
+void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
+ // This should have been handled in placeTryMarker.
+ if (MBB.isEHPad())
+ return;
+
+ MachineFunction &MF = *MBB.getParent();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
// First compute the nearest common dominator of all forward non-fallthrough
// predecessors so that we minimize the time that the BLOCK is on the stack,
// which reduces overall stack height.
MachineBasicBlock *Header = nullptr;
bool IsBranchedTo = false;
int MBBNumber = MBB.getNumber();
- for (MachineBasicBlock *Pred : MBB.predecessors())
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
if (Pred->getNumber() < MBBNumber) {
Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
if (ExplicitlyBranchesTo(Pred, &MBB))
IsBranchedTo = true;
}
+ }
if (!Header)
return;
if (!IsBranchedTo)
@@ -125,43 +250,93 @@ static void PlaceBlockMarker(
}
// Decide where in Header to put the BLOCK.
- MachineBasicBlock::iterator InsertPos;
- MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
- if (HeaderLoop &&
- MBB.getNumber() > WebAssembly::getBottom(HeaderLoop)->getNumber()) {
- // Header is the header of a loop that does not lexically contain MBB, so
- // the BLOCK needs to be above the LOOP, after any END constructs.
- InsertPos = Header->begin();
- while (InsertPos->getOpcode() == WebAssembly::END_BLOCK ||
- InsertPos->getOpcode() == WebAssembly::END_LOOP)
- ++InsertPos;
- } else {
- // Otherwise, insert the BLOCK as late in Header as we can, but before the
- // beginning of the local expression tree and any nested BLOCKs.
- InsertPos = Header->getFirstTerminator();
- while (InsertPos != Header->begin() &&
- WebAssembly::isChild(*std::prev(InsertPos), MFI) &&
- std::prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
- std::prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
- std::prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
- --InsertPos;
+
+ // Instructions that should go before the BLOCK.
+ SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+ // Instructions that should go after the BLOCK.
+ SmallPtrSet<const MachineInstr *, 4> AfterSet;
+ for (const auto &MI : *Header) {
+ // If there is a previously placed LOOP/TRY marker and the bottom block of
+ // the loop/exception is above MBB, it should be after the BLOCK, because
+ // the loop/exception is nested in this block. Otherwise it should be before
+ // the BLOCK.
+ if (MI.getOpcode() == WebAssembly::LOOP ||
+ MI.getOpcode() == WebAssembly::TRY) {
+ if (MBB.getNumber() > getBottom(&MI)->getNumber())
+ AfterSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ BeforeSet.insert(&MI);
+#endif
+ }
+
+ // All previously inserted BLOCK markers should be after the BLOCK because
+ // they are all nested blocks.
+ if (MI.getOpcode() == WebAssembly::BLOCK)
+ AfterSet.insert(&MI);
+
+#ifndef NDEBUG
+ // All END_(BLOCK|LOOP|TRY) markers should be before the BLOCK.
+ if (MI.getOpcode() == WebAssembly::END_BLOCK ||
+ MI.getOpcode() == WebAssembly::END_LOOP ||
+ MI.getOpcode() == WebAssembly::END_TRY)
+ BeforeSet.insert(&MI);
+#endif
+
+ // Terminators should go after the BLOCK.
+ if (MI.isTerminator())
+ AfterSet.insert(&MI);
+ }
+
+ // Local expression tree should go after the BLOCK.
+ for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E;
+ --I) {
+ if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
+ continue;
+ if (WebAssembly::isChild(*std::prev(I), MFI))
+ AfterSet.insert(&*std::prev(I));
+ else
+ break;
}
// Add the BLOCK.
+ auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
MachineInstr *Begin =
BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
TII.get(WebAssembly::BLOCK))
.addImm(int64_t(WebAssembly::ExprType::Void));
+ // Decide where in Header to put the END_BLOCK.
+ BeforeSet.clear();
+ AfterSet.clear();
+ for (auto &MI : MBB) {
+#ifndef NDEBUG
+ // END_BLOCK should precede existing LOOP and TRY markers.
+ if (MI.getOpcode() == WebAssembly::LOOP ||
+ MI.getOpcode() == WebAssembly::TRY)
+ AfterSet.insert(&MI);
+#endif
+
+ // If there is a previously placed END_LOOP marker and the header of the
+ // loop is above this block's header, the END_LOOP should be placed after
+ // the BLOCK, because the loop contains this block. Otherwise the END_LOOP
+ // should be placed before the BLOCK. The same for END_TRY.
+ if (MI.getOpcode() == WebAssembly::END_LOOP ||
+ MI.getOpcode() == WebAssembly::END_TRY) {
+ if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber())
+ BeforeSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ AfterSet.insert(&MI);
+#endif
+ }
+ }
+
// Mark the end of the block.
- InsertPos = MBB.begin();
- while (InsertPos != MBB.end() &&
- InsertPos->getOpcode() == WebAssembly::END_LOOP &&
- LoopTops[&*InsertPos]->getParent()->getNumber() >= Header->getNumber())
- ++InsertPos;
+ InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
MachineInstr *End = BuildMI(MBB, InsertPos, MBB.findPrevDebugLoc(InsertPos),
TII.get(WebAssembly::END_BLOCK));
- BlockTops[End] = Begin;
+ registerScope(Begin, End);
// Track the farthest-spanning scope that ends at this point.
int Number = MBB.getNumber();
@@ -171,11 +346,11 @@ static void PlaceBlockMarker(
}
/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
-static void PlaceLoopMarker(
- MachineBasicBlock &MBB, MachineFunction &MF,
- SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
- DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
- const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
+void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
+ MachineFunction &MF = *MBB.getParent();
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
MachineLoop *Loop = MLI.getLoopFor(&MBB);
if (!Loop || Loop->getHeader() != &MBB)
return;
@@ -193,22 +368,43 @@ static void PlaceLoopMarker(
}
MachineBasicBlock *AfterLoop = &*Iter;
- // Mark the beginning of the loop (after the end of any existing loop that
- // ends here).
- auto InsertPos = MBB.begin();
- while (InsertPos != MBB.end() &&
- InsertPos->getOpcode() == WebAssembly::END_LOOP)
- ++InsertPos;
+ // Decide where in Header to put the LOOP.
+ SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+ SmallPtrSet<const MachineInstr *, 4> AfterSet;
+ for (const auto &MI : MBB) {
+ // LOOP marker should be after any existing loop that ends here. Otherwise
+ // we assume the instruction belongs to the loop.
+ if (MI.getOpcode() == WebAssembly::END_LOOP)
+ BeforeSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ AfterSet.insert(&MI);
+#endif
+ }
+
+ // Mark the beginning of the loop.
+ auto InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos),
TII.get(WebAssembly::LOOP))
.addImm(int64_t(WebAssembly::ExprType::Void));
- // Mark the end of the loop (using arbitrary debug location that branched
- // to the loop end as its location).
+ // Decide where in Header to put the END_LOOP.
+ BeforeSet.clear();
+ AfterSet.clear();
+#ifndef NDEBUG
+ for (const auto &MI : MBB)
+ // Existing END_LOOP markers belong to parent loops of this loop
+ if (MI.getOpcode() == WebAssembly::END_LOOP)
+ AfterSet.insert(&MI);
+#endif
+
+ // Mark the end of the loop (using arbitrary debug location that branched to
+ // the loop end as its location).
+ InsertPos = GetEarliestInsertPos(AfterLoop, BeforeSet, AfterSet);
DebugLoc EndDL = (*AfterLoop->pred_rbegin())->findBranchDebugLoc();
- MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), EndDL,
- TII.get(WebAssembly::END_LOOP));
- LoopTops[End] = Begin;
+ MachineInstr *End =
+ BuildMI(*AfterLoop, InsertPos, EndDL, TII.get(WebAssembly::END_LOOP));
+ registerScope(Begin, End);
assert((!ScopeTops[AfterLoop->getNumber()] ||
ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
@@ -217,6 +413,183 @@ static void PlaceLoopMarker(
ScopeTops[AfterLoop->getNumber()] = &MBB;
}
+void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
+ if (!MBB.isEHPad())
+ return;
+
+ // catch_all terminate pad is grouped together with catch terminate pad and
+ // does not need a separate TRY and END_TRY marker.
+ if (WebAssembly::isCatchAllTerminatePad(MBB))
+ return;
+
+ MachineFunction &MF = *MBB.getParent();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
+ const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+ // Compute the nearest common dominator of all unwind predecessors
+ MachineBasicBlock *Header = nullptr;
+ int MBBNumber = MBB.getNumber();
+ for (auto *Pred : MBB.predecessors()) {
+ if (Pred->getNumber() < MBBNumber) {
+ Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
+ assert(!ExplicitlyBranchesTo(Pred, &MBB) &&
+ "Explicit branch to an EH pad!");
+ }
+ }
+ if (!Header)
+ return;
+
+ // If this try is at the bottom of the function, insert a dummy block at the
+ // end.
+ WebAssemblyException *WE = WEI.getExceptionFor(&MBB);
+ assert(WE);
+ MachineBasicBlock *Bottom = WebAssembly::getBottom(WE);
+
+ auto Iter = std::next(MachineFunction::iterator(Bottom));
+ if (Iter == MF.end()) {
+ MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
+ // Give it a fake predecessor so that AsmPrinter prints its label.
+ Label->addSuccessor(Label);
+ MF.push_back(Label);
+ Iter = std::next(MachineFunction::iterator(Bottom));
+ }
+ MachineBasicBlock *AfterTry = &*Iter;
+
+ assert(AfterTry != &MF.front());
+ MachineBasicBlock *LayoutPred =
+ &*std::prev(MachineFunction::iterator(AfterTry));
+
+ // If the nearest common dominator is inside a more deeply nested context,
+ // walk out to the nearest scope which isn't more deeply nested.
+ for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) {
+ if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
+ if (ScopeTop->getNumber() > Header->getNumber()) {
+ // Skip over an intervening scope.
+ I = std::next(MachineFunction::iterator(ScopeTop));
+ } else {
+ // We found a scope level at an appropriate depth.
+ Header = ScopeTop;
+ break;
+ }
+ }
+ }
+
+ // Decide where in Header to put the TRY.
+
+ // Instructions that should go before the BLOCK.
+ SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+ // Instructions that should go after the BLOCK.
+ SmallPtrSet<const MachineInstr *, 4> AfterSet;
+ for (const auto &MI : *Header) {
+ // If there is a previously placed LOOP marker and the bottom block of
+ // the loop is above MBB, the LOOP should be after the TRY, because the
+ // loop is nested in this try. Otherwise it should be before the TRY.
+ if (MI.getOpcode() == WebAssembly::LOOP) {
+ if (MBB.getNumber() > Bottom->getNumber())
+ AfterSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ BeforeSet.insert(&MI);
+#endif
+ }
+
+ // All previously inserted TRY markers should be after the TRY because they
+ // are all nested trys.
+ if (MI.getOpcode() == WebAssembly::TRY)
+ AfterSet.insert(&MI);
+
+#ifndef NDEBUG
+ // All END_(LOOP/TRY) markers should be before the TRY.
+ if (MI.getOpcode() == WebAssembly::END_LOOP ||
+ MI.getOpcode() == WebAssembly::END_TRY)
+ BeforeSet.insert(&MI);
+#endif
+
+ // Terminators should go after the TRY.
+ if (MI.isTerminator())
+ AfterSet.insert(&MI);
+ }
+
+ // Local expression tree should go after the TRY.
+ for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E;
+ --I) {
+ if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
+ continue;
+ if (WebAssembly::isChild(*std::prev(I), MFI))
+ AfterSet.insert(&*std::prev(I));
+ else
+ break;
+ }
+
+ // If Header unwinds to MBB (= Header contains 'invoke'), the try block should
+ // contain the call within it. So the call should go after the TRY. The
+ // exception is when the header's terminator is a rethrow instruction, in
+ // which case that instruction, not a call instruction before it, is gonna
+ // throw.
+ if (MBB.isPredecessor(Header)) {
+ auto TermPos = Header->getFirstTerminator();
+ if (TermPos == Header->end() || !WebAssembly::isRethrow(*TermPos)) {
+ for (const auto &MI : reverse(*Header)) {
+ if (MI.isCall()) {
+ AfterSet.insert(&MI);
+ break;
+ }
+ }
+ }
+ }
+
+ // Add the TRY.
+ auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
+ MachineInstr *Begin =
+ BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
+ TII.get(WebAssembly::TRY))
+ .addImm(int64_t(WebAssembly::ExprType::Void));
+
+ // Decide where in Header to put the END_TRY.
+ BeforeSet.clear();
+ AfterSet.clear();
+ for (const auto &MI : *AfterTry) {
+#ifndef NDEBUG
+ // END_TRY should precede existing LOOP markers.
+ if (MI.getOpcode() == WebAssembly::LOOP)
+ AfterSet.insert(&MI);
+
+ // All END_TRY markers placed earlier belong to exceptions that contains
+ // this one.
+ if (MI.getOpcode() == WebAssembly::END_TRY)
+ AfterSet.insert(&MI);
+#endif
+
+ // If there is a previously placed END_LOOP marker and its header is after
+ // where TRY marker is, this loop is contained within the 'catch' part, so
+ // the END_TRY marker should go after that. Otherwise, the whole try-catch
+ // is contained within this loop, so the END_TRY should go before that.
+ if (MI.getOpcode() == WebAssembly::END_LOOP) {
+ if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber())
+ BeforeSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ AfterSet.insert(&MI);
+#endif
+ }
+ }
+
+ // Mark the end of the TRY.
+ InsertPos = GetEarliestInsertPos(AfterTry, BeforeSet, AfterSet);
+ MachineInstr *End =
+ BuildMI(*AfterTry, InsertPos, Bottom->findBranchDebugLoc(),
+ TII.get(WebAssembly::END_TRY));
+ registerTryScope(Begin, End, &MBB);
+
+ // Track the farthest-spanning scope that ends at this point.
+ int Number = AfterTry->getNumber();
+ if (!ScopeTops[Number] ||
+ ScopeTops[Number]->getNumber() > Header->getNumber())
+ ScopeTops[Number] = Header;
+}
+
static unsigned
GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
const MachineBasicBlock *MBB) {
@@ -237,11 +610,8 @@ GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
/// that end at the function end need to have a return type signature that
/// matches the function signature, even though it's unreachable. This function
/// checks for such cases and fixes up the signatures.
-static void FixEndsAtEndOfFunction(
- MachineFunction &MF,
- const WebAssemblyFunctionInfo &MFI,
- DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
- DenseMap<const MachineInstr *, MachineInstr *> &LoopTops) {
+void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
+ const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
assert(MFI.getResults().size() <= 1);
if (MFI.getResults().empty())
@@ -249,16 +619,31 @@ static void FixEndsAtEndOfFunction(
WebAssembly::ExprType retType;
switch (MFI.getResults().front().SimpleTy) {
- case MVT::i32: retType = WebAssembly::ExprType::I32; break;
- case MVT::i64: retType = WebAssembly::ExprType::I64; break;
- case MVT::f32: retType = WebAssembly::ExprType::F32; break;
- case MVT::f64: retType = WebAssembly::ExprType::F64; break;
- case MVT::v16i8: retType = WebAssembly::ExprType::I8x16; break;
- case MVT::v8i16: retType = WebAssembly::ExprType::I16x8; break;
- case MVT::v4i32: retType = WebAssembly::ExprType::I32x4; break;
- case MVT::v4f32: retType = WebAssembly::ExprType::F32x4; break;
- case MVT::ExceptRef: retType = WebAssembly::ExprType::ExceptRef; break;
- default: llvm_unreachable("unexpected return type");
+ case MVT::i32:
+ retType = WebAssembly::ExprType::I32;
+ break;
+ case MVT::i64:
+ retType = WebAssembly::ExprType::I64;
+ break;
+ case MVT::f32:
+ retType = WebAssembly::ExprType::F32;
+ break;
+ case MVT::f64:
+ retType = WebAssembly::ExprType::F64;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ retType = WebAssembly::ExprType::V128;
+ break;
+ case MVT::ExceptRef:
+ retType = WebAssembly::ExprType::ExceptRef;
+ break;
+ default:
+ llvm_unreachable("unexpected return type");
}
for (MachineBasicBlock &MBB : reverse(MF)) {
@@ -266,11 +651,11 @@ static void FixEndsAtEndOfFunction(
if (MI.isPosition() || MI.isDebugInstr())
continue;
if (MI.getOpcode() == WebAssembly::END_BLOCK) {
- BlockTops[&MI]->getOperand(0).setImm(int32_t(retType));
+ EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
continue;
}
if (MI.getOpcode() == WebAssembly::END_LOOP) {
- LoopTops[&MI]->getOperand(0).setImm(int32_t(retType));
+ EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
continue;
}
// Something other than an `end`. We're done.
@@ -281,60 +666,108 @@ static void FixEndsAtEndOfFunction(
// WebAssembly functions end with an end instruction, as if the function body
// were a block.
-static void AppendEndToFunction(
- MachineFunction &MF,
- const WebAssemblyInstrInfo &TII) {
+static void AppendEndToFunction(MachineFunction &MF,
+ const WebAssemblyInstrInfo &TII) {
BuildMI(MF.back(), MF.back().end(),
MF.back().findPrevDebugLoc(MF.back().end()),
TII.get(WebAssembly::END_FUNCTION));
}
-/// Insert LOOP and BLOCK markers at appropriate places.
-static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
- const WebAssemblyInstrInfo &TII,
- MachineDominatorTree &MDT,
- WebAssemblyFunctionInfo &MFI) {
- // For each block whose label represents the end of a scope, record the block
- // which holds the beginning of the scope. This will allow us to quickly skip
- // over scoped regions when walking blocks. We allocate one more than the
- // number of blocks in the function to accommodate for the possible fake block
- // we may insert at the end.
- SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
-
- // For each LOOP_END, the corresponding LOOP.
- DenseMap<const MachineInstr *, MachineInstr *> LoopTops;
-
- // For each END_BLOCK, the corresponding BLOCK.
- DenseMap<const MachineInstr *, MachineInstr *> BlockTops;
-
- for (auto &MBB : MF) {
- // Place the LOOP for MBB if MBB is the header of a loop.
- PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
-
- // Place the BLOCK for MBB if MBB is branched to from above.
- PlaceBlockMarker(MBB, MF, ScopeTops, BlockTops, LoopTops, TII, MLI, MDT, MFI);
- }
+/// Insert LOOP/TRY/BLOCK markers at appropriate places.
+void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) {
+ const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
+ // We allocate one more than the number of blocks in the function to
+ // accommodate for the possible fake block we may insert at the end.
+ ScopeTops.resize(MF.getNumBlockIDs() + 1);
+ // Place the LOOP for MBB if MBB is the header of a loop.
+ for (auto &MBB : MF)
+ placeLoopMarker(MBB);
+ // Place the TRY for MBB if MBB is the EH pad of an exception.
+ if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
+ MF.getFunction().hasPersonalityFn())
+ for (auto &MBB : MF)
+ placeTryMarker(MBB);
+ // Place the BLOCK for MBB if MBB is branched to from above.
+ for (auto &MBB : MF)
+ placeBlockMarker(MBB);
+}
+void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
// Now rewrite references to basic blocks to be depth immediates.
+ // We need two stacks: one for normal scopes and the other for EH pad scopes.
+ // EH pad stack is used to rewrite depths in rethrow instructions.
SmallVector<const MachineBasicBlock *, 8> Stack;
+ SmallVector<const MachineBasicBlock *, 8> EHPadStack;
for (auto &MBB : reverse(MF)) {
- for (auto &MI : reverse(MBB)) {
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
+ MachineInstr &MI = *I;
switch (MI.getOpcode()) {
case WebAssembly::BLOCK:
- assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <= MBB.getNumber() &&
- "Block should be balanced");
+ assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
+ MBB.getNumber() &&
+ "Block/try should be balanced");
Stack.pop_back();
break;
+
+ case WebAssembly::TRY:
+ assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
+ MBB.getNumber() &&
+ "Block/try marker should be balanced");
+ Stack.pop_back();
+ EHPadStack.pop_back();
+ break;
+
+ case WebAssembly::CATCH_I32:
+ case WebAssembly::CATCH_I64:
+ case WebAssembly::CATCH_ALL:
+ // Currently the only case there are more than one catch for a try is
+ // for catch terminate pad, in the form of
+ // try
+ // catch
+ // call @__clang_call_terminate
+ // unreachable
+ // catch_all
+ // call @std::terminate
+ // unreachable
+ // end
+ // So we shouldn't push the current BB for the second catch_all block
+ // here.
+ if (!WebAssembly::isCatchAllTerminatePad(MBB))
+ EHPadStack.push_back(&MBB);
+ break;
+
case WebAssembly::LOOP:
assert(Stack.back() == &MBB && "Loop top should be balanced");
Stack.pop_back();
break;
+
case WebAssembly::END_BLOCK:
+ case WebAssembly::END_TRY:
Stack.push_back(&MBB);
break;
+
case WebAssembly::END_LOOP:
- Stack.push_back(LoopTops[&MI]->getParent());
+ Stack.push_back(EndToBegin[&MI]->getParent());
+ break;
+
+ case WebAssembly::RETHROW: {
+ // Rewrite MBB operands to be depth immediates.
+ unsigned EHPadDepth = GetDepth(EHPadStack, MI.getOperand(0).getMBB());
+ MI.RemoveOperand(0);
+ MI.addOperand(MF, MachineOperand::CreateImm(EHPadDepth));
break;
+ }
+
+ case WebAssembly::RETHROW_TO_CALLER: {
+ MachineInstr *Rethrow =
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(WebAssembly::RETHROW))
+ .addImm(EHPadStack.size());
+ MI.eraseFromParent();
+ I = MachineBasicBlock::reverse_iterator(Rethrow);
+ break;
+ }
+
default:
if (MI.isTerminator()) {
// Rewrite MBB operands to be depth immediates.
@@ -352,13 +785,15 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
}
}
assert(Stack.empty() && "Control flow should be balanced");
+}
- // Fix up block/loop signatures at the end of the function to conform to
- // WebAssembly's rules.
- FixEndsAtEndOfFunction(MF, MFI, BlockTops, LoopTops);
-
- // Add an end instruction at the end of the function body.
- AppendEndToFunction(MF, TII);
+void WebAssemblyCFGStackify::releaseMemory() {
+ ScopeTops.clear();
+ BeginToEnd.clear();
+ EndToBegin.clear();
+ TryToEHPad.clear();
+ EHPadToTry.clear();
+ BeginToBottom.clear();
}
bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
@@ -366,15 +801,27 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
"********** Function: "
<< MF.getName() << '\n');
- const auto &MLI = getAnalysis<MachineLoopInfo>();
- auto &MDT = getAnalysis<MachineDominatorTree>();
+ releaseMemory();
+
// Liveness is not tracked for VALUE_STACK physreg.
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
MF.getRegInfo().invalidateLiveness();
- // Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
- PlaceMarkers(MF, MLI, TII, MDT, MFI);
+ // Place the BLOCK/LOOP/TRY markers to indicate the beginnings of scopes.
+ placeMarkers(MF);
+
+ // Convert MBB operands in terminators to relative depth immediates.
+ rewriteDepthImmediates(MF);
+
+ // Fix up block/loop/try signatures at the end of the function to conform to
+ // WebAssembly's rules.
+ fixEndsAtEndOfFunction(MF);
+
+ // Add an end instruction at the end of the function body.
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ if (!MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple()
+ .isOSBinFormatELF())
+ AppendEndToFunction(MF, TII);
return true;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index c1820bf66bc0..aaa6d286598f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -64,16 +64,30 @@ FunctionPass *llvm::createWebAssemblyCallIndirectFixup() {
static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
switch (MI.getOpcode()) {
using namespace WebAssembly;
- case PCALL_INDIRECT_VOID: return CALL_INDIRECT_VOID;
- case PCALL_INDIRECT_I32: return CALL_INDIRECT_I32;
- case PCALL_INDIRECT_I64: return CALL_INDIRECT_I64;
- case PCALL_INDIRECT_F32: return CALL_INDIRECT_F32;
- case PCALL_INDIRECT_F64: return CALL_INDIRECT_F64;
- case PCALL_INDIRECT_v16i8: return CALL_INDIRECT_v16i8;
- case PCALL_INDIRECT_v8i16: return CALL_INDIRECT_v8i16;
- case PCALL_INDIRECT_v4i32: return CALL_INDIRECT_v4i32;
- case PCALL_INDIRECT_v4f32: return CALL_INDIRECT_v4f32;
- default: return INSTRUCTION_LIST_END;
+ case PCALL_INDIRECT_VOID:
+ return CALL_INDIRECT_VOID;
+ case PCALL_INDIRECT_I32:
+ return CALL_INDIRECT_I32;
+ case PCALL_INDIRECT_I64:
+ return CALL_INDIRECT_I64;
+ case PCALL_INDIRECT_F32:
+ return CALL_INDIRECT_F32;
+ case PCALL_INDIRECT_F64:
+ return CALL_INDIRECT_F64;
+ case PCALL_INDIRECT_v16i8:
+ return CALL_INDIRECT_v16i8;
+ case PCALL_INDIRECT_v8i16:
+ return CALL_INDIRECT_v8i16;
+ case PCALL_INDIRECT_v4i32:
+ return CALL_INDIRECT_v4i32;
+ case PCALL_INDIRECT_v2i64:
+ return CALL_INDIRECT_v2i64;
+ case PCALL_INDIRECT_v4f32:
+ return CALL_INDIRECT_v4f32;
+ case PCALL_INDIRECT_v2f64:
+ return CALL_INDIRECT_v2f64;
+ default:
+ return INSTRUCTION_LIST_END;
}
}
@@ -84,7 +98,7 @@ static bool IsPseudoCallIndirect(const MachineInstr &MI) {
bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Fixing up CALL_INDIRECTs **********\n"
- << MF.getName() << '\n');
+ << "********** Function: " << MF.getName() << '\n');
bool Changed = false;
const WebAssemblyInstrInfo *TII =
@@ -110,10 +124,8 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
Ops.push_back(MachineOperand::CreateImm(0));
for (const MachineOperand &MO :
- make_range(MI.operands_begin() +
- MI.getDesc().getNumDefs() + 1,
- MI.operands_begin() +
- MI.getNumExplicitOperands()))
+ make_range(MI.operands_begin() + MI.getDesc().getNumDefs() + 1,
+ MI.operands_begin() + MI.getNumExplicitOperands()))
Ops.push_back(MO);
Ops.push_back(MI.getOperand(MI.getDesc().getNumDefs()));
@@ -133,4 +145,3 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
new file mode 100644
index 000000000000..8ecc159951ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -0,0 +1,46 @@
+//===-- WebAssemblyDebugValueManager.cpp - WebAssembly DebugValue Manager -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the manager for MachineInstr DebugValues.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyDebugValueManager.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+using namespace llvm;
+
+WebAssemblyDebugValueManager::WebAssemblyDebugValueManager(
+ MachineInstr *Instr) {
+ Instr->collectDebugValues(DbgValues);
+}
+
+void WebAssemblyDebugValueManager::move(MachineInstr *Insert) {
+ MachineBasicBlock *MBB = Insert->getParent();
+ for (MachineInstr *DBI : reverse(DbgValues))
+ MBB->splice(Insert, DBI->getParent(), DBI);
+}
+
+void WebAssemblyDebugValueManager::updateReg(unsigned Reg) {
+ for (auto *DBI : DbgValues)
+ DBI->getOperand(0).setReg(Reg);
+}
+
+void WebAssemblyDebugValueManager::clone(MachineInstr *Insert,
+ unsigned NewReg) {
+ MachineBasicBlock *MBB = Insert->getParent();
+ MachineFunction *MF = MBB->getParent();
+ for (MachineInstr *DBI : reverse(DbgValues)) {
+ MachineInstr *Clone = MF->CloneMachineInstr(DBI);
+ Clone->getOperand(0).setReg(NewReg);
+ MBB->insert(Insert, Clone);
+ }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
new file mode 100644
index 000000000000..73f317214058
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
@@ -0,0 +1,38 @@
+// WebAssemblyDebugValueManager.h - WebAssembly DebugValue Manager -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the WebAssembly-specific
+/// manager for DebugValues associated with the specific MachineInstr.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class MachineInstr;
+
+class WebAssemblyDebugValueManager {
+ SmallVector<MachineInstr *, 2> DbgValues;
+
+public:
+ WebAssemblyDebugValueManager(MachineInstr *Instr);
+
+ void move(MachineInstr *Insert);
+ void updateReg(unsigned Reg);
+ void clone(MachineInstr *Insert, unsigned NewReg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
new file mode 100644
index 000000000000..c86260ba408c
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
@@ -0,0 +1,87 @@
+//===-- WebAssemblyEHRestoreStackPointer.cpp - __stack_pointer restoration ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// After the stack is unwound due to a thrown exception, the __stack_pointer
+/// global can point to an invalid address. This inserts instructions that
+/// restore __stack_pointer global.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-eh-restore-stack-pointer"
+
+namespace {
+class WebAssemblyEHRestoreStackPointer final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyEHRestoreStackPointer() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "WebAssembly Restore Stack Pointer for Exception Handling";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyEHRestoreStackPointer::ID = 0;
+INITIALIZE_PASS(WebAssemblyEHRestoreStackPointer, DEBUG_TYPE,
+ "Restore Stack Pointer for Exception Handling", true, false)
+
+FunctionPass *llvm::createWebAssemblyEHRestoreStackPointer() {
+ return new WebAssemblyEHRestoreStackPointer();
+}
+
+bool WebAssemblyEHRestoreStackPointer::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** EH Restore Stack Pointer **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
+ if (!FrameLowering->needsPrologForEH(MF))
+ return false;
+ bool Changed = false;
+
+ for (auto &MBB : MF) {
+ if (!MBB.isEHPad())
+ continue;
+ Changed = true;
+
+ // Insert __stack_pointer restoring instructions at the beginning of each EH
+ // pad, after the catch instruction. (Catch instructions may have been
+ // reordered, and catch_all instructions have not been inserted yet, but
+ // those cases are handled in LateEHPrepare).
+ //
+ // Here it is safe to assume that SP32 holds the latest value of
+ // __stack_pointer, because the only exception for this case is when a
+ // function uses the red zone, but that only happens with leaf functions,
+ // and we don't restore __stack_pointer in leaf functions anyway.
+ auto InsertPos = MBB.begin();
+ if (WebAssembly::isCatch(*MBB.begin()))
+ InsertPos++;
+ FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
+ MBB.begin()->getDebugLoc());
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index 84683d48a90a..6b3a3e765786 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyExceptionInfo.h"
-#include "WebAssemblyUtilities.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -32,7 +32,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
INITIALIZE_PASS_END(WebAssemblyExceptionInfo, DEBUG_TYPE,
"WebAssembly Exception Information", true, true)
-bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &F) {
+bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** Exception Info Calculation **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
releaseMemory();
auto &MDT = getAnalysis<MachineDominatorTree>();
auto &MDF = getAnalysis<MachineDominanceFrontier>();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 8619cbdcb5ee..27aabe6ba0bd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -11,7 +11,7 @@
/// This file converts any remaining registers into WebAssembly locals.
///
/// After register stackification and register coloring, convert non-stackified
-/// registers into locals, inserting explicit get_local and set_local
+/// registers into locals, inserting explicit local.get and local.set
/// instructions.
///
//===----------------------------------------------------------------------===//
@@ -31,12 +31,14 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-explicit-locals"
-// A command-line option to disable this pass. Note that this produces output
-// which is not valid WebAssembly, though it may be more convenient for writing
-// LLVM unit tests with.
-static cl::opt<bool> DisableWebAssemblyExplicitLocals(
- "disable-wasm-explicit-locals", cl::ReallyHidden,
- cl::desc("WebAssembly: Disable emission of get_local/set_local."),
+// A command-line option to disable this pass, and keep implicit locals
+// for the purpose of testing with lit/llc ONLY.
+// This produces output which is not valid WebAssembly, and is not supported
+// by assemblers/disassemblers and other MC based tools.
+static cl::opt<bool> WasmDisableExplicitLocals(
+ "wasm-disable-explicit-locals", cl::Hidden,
+ cl::desc("WebAssembly: output implicit locals in"
+ " instruction output for test purposes only."),
cl::init(false));
namespace {
@@ -94,54 +96,54 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) {
llvm_unreachable("Unexpected register class");
}
-/// Get the appropriate get_local opcode for the given register class.
+/// Get the appropriate local.get opcode for the given register class.
static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
- return WebAssembly::GET_LOCAL_I32;
+ return WebAssembly::LOCAL_GET_I32;
if (RC == &WebAssembly::I64RegClass)
- return WebAssembly::GET_LOCAL_I64;
+ return WebAssembly::LOCAL_GET_I64;
if (RC == &WebAssembly::F32RegClass)
- return WebAssembly::GET_LOCAL_F32;
+ return WebAssembly::LOCAL_GET_F32;
if (RC == &WebAssembly::F64RegClass)
- return WebAssembly::GET_LOCAL_F64;
+ return WebAssembly::LOCAL_GET_F64;
if (RC == &WebAssembly::V128RegClass)
- return WebAssembly::GET_LOCAL_V128;
+ return WebAssembly::LOCAL_GET_V128;
if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::GET_LOCAL_EXCEPT_REF;
+ return WebAssembly::LOCAL_GET_EXCEPT_REF;
llvm_unreachable("Unexpected register class");
}
-/// Get the appropriate set_local opcode for the given register class.
+/// Get the appropriate local.set opcode for the given register class.
static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
- return WebAssembly::SET_LOCAL_I32;
+ return WebAssembly::LOCAL_SET_I32;
if (RC == &WebAssembly::I64RegClass)
- return WebAssembly::SET_LOCAL_I64;
+ return WebAssembly::LOCAL_SET_I64;
if (RC == &WebAssembly::F32RegClass)
- return WebAssembly::SET_LOCAL_F32;
+ return WebAssembly::LOCAL_SET_F32;
if (RC == &WebAssembly::F64RegClass)
- return WebAssembly::SET_LOCAL_F64;
+ return WebAssembly::LOCAL_SET_F64;
if (RC == &WebAssembly::V128RegClass)
- return WebAssembly::SET_LOCAL_V128;
+ return WebAssembly::LOCAL_SET_V128;
if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::SET_LOCAL_EXCEPT_REF;
+ return WebAssembly::LOCAL_SET_EXCEPT_REF;
llvm_unreachable("Unexpected register class");
}
-/// Get the appropriate tee_local opcode for the given register class.
+/// Get the appropriate local.tee opcode for the given register class.
static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
- return WebAssembly::TEE_LOCAL_I32;
+ return WebAssembly::LOCAL_TEE_I32;
if (RC == &WebAssembly::I64RegClass)
- return WebAssembly::TEE_LOCAL_I64;
+ return WebAssembly::LOCAL_TEE_I64;
if (RC == &WebAssembly::F32RegClass)
- return WebAssembly::TEE_LOCAL_F32;
+ return WebAssembly::LOCAL_TEE_F32;
if (RC == &WebAssembly::F64RegClass)
- return WebAssembly::TEE_LOCAL_F64;
+ return WebAssembly::LOCAL_TEE_F64;
if (RC == &WebAssembly::V128RegClass)
- return WebAssembly::TEE_LOCAL_V128;
+ return WebAssembly::LOCAL_TEE_V128;
if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::TEE_LOCAL_EXCEPT_REF;
+ return WebAssembly::LOCAL_TEE_EXCEPT_REF;
llvm_unreachable("Unexpected register class");
}
@@ -155,6 +157,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
return MVT::f32;
if (RC == &WebAssembly::F64RegClass)
return MVT::f64;
+ if (RC == &WebAssembly::V128RegClass)
+ return MVT::v16i8;
if (RC == &WebAssembly::EXCEPT_REFRegClass)
return MVT::ExceptRef;
llvm_unreachable("unrecognized register class");
@@ -162,7 +166,7 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
/// Given a MachineOperand of a stackified vreg, return the instruction at the
/// start of the expression tree.
-static MachineInstr *FindStartOfTree(MachineOperand &MO,
+static MachineInstr *findStartOfTree(MachineOperand &MO,
MachineRegisterInfo &MRI,
WebAssemblyFunctionInfo &MFI) {
unsigned Reg = MO.getReg();
@@ -173,7 +177,7 @@ static MachineInstr *FindStartOfTree(MachineOperand &MO,
for (MachineOperand &DefMO : Def->explicit_uses()) {
if (!DefMO.isReg())
continue;
- return FindStartOfTree(DefMO, MRI, MFI);
+ return findStartOfTree(DefMO, MRI, MFI);
}
// If there were no stackified uses, we've reached the start.
@@ -186,7 +190,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
<< MF.getName() << '\n');
// Disable this pass if directed to do so.
- if (DisableWebAssemblyExplicitLocals)
+ if (WasmDisableExplicitLocals)
return false;
bool Changed = false;
@@ -206,19 +210,19 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
break;
unsigned Reg = MI.getOperand(0).getReg();
assert(!MFI.isVRegStackified(Reg));
- Reg2Local[Reg] = MI.getOperand(1).getImm();
+ Reg2Local[Reg] = static_cast<unsigned>(MI.getOperand(1).getImm());
MI.eraseFromParent();
Changed = true;
}
// Start assigning local numbers after the last parameter.
- unsigned CurLocal = MFI.getParams().size();
+ unsigned CurLocal = static_cast<unsigned>(MFI.getParams().size());
// Precompute the set of registers that are unused, so that we can insert
// drops to their defs.
BitVector UseEmpty(MRI.getNumVirtRegs());
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i)
- UseEmpty[i] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(i));
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I)
+ UseEmpty[I] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(I));
// Visit each instruction in the function.
for (MachineBasicBlock &MBB : MF) {
@@ -229,8 +233,8 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (MI.isDebugInstr() || MI.isLabel())
continue;
- // Replace tee instructions with tee_local. The difference is that tee
- // instructins have two defs, while tee_local instructions have one def
+ // Replace tee instructions with local.tee. The difference is that tee
+ // instructions have two defs, while local.tee instructions have one def
// and an index of a local to write to.
if (WebAssembly::isTee(MI)) {
assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
@@ -249,7 +253,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
MFI.stackifyVReg(NewReg);
}
- // Replace the TEE with a TEE_LOCAL.
+ // Replace the TEE with a LOCAL_TEE.
unsigned LocalId =
getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg());
unsigned Opc = getTeeLocalOpcode(RC);
@@ -263,7 +267,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- // Insert set_locals for any defs that aren't stackified yet. Currently
+ // Insert local.sets for any defs that aren't stackified yet. Currently
// we handle at most one def.
assert(MI.getDesc().getNumDefs() <= 1);
if (MI.getDesc().getNumDefs() == 1) {
@@ -292,15 +296,16 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
.addReg(NewReg);
}
MI.getOperand(0).setReg(NewReg);
- // This register operand is now being used by the inserted drop
- // instruction, so make it undead.
+ // This register operand of the original instruction is now being used
+ // by the inserted drop or local.set instruction, so make it not dead
+ // yet.
MI.getOperand(0).setIsDead(false);
MFI.stackifyVReg(NewReg);
Changed = true;
}
}
- // Insert get_locals for any uses that aren't stackified yet.
+ // Insert local.gets for any uses that aren't stackified yet.
MachineInstr *InsertPt = &MI;
for (MachineOperand &MO : reverse(MI.explicit_uses())) {
if (!MO.isReg())
@@ -314,15 +319,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (MO.isDef()) {
assert(MI.getOpcode() == TargetOpcode::INLINEASM);
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
- MRI.removeRegOperandFromUseList(&MO);
- MO = MachineOperand::CreateImm(LocalId);
+ // If this register operand is tied to another operand, we can't
+ // change it to an immediate. Untie it first.
+ MI.untieRegOperand(MI.getOperandNo(&MO));
+ MO.ChangeToImmediate(LocalId);
continue;
}
// If we see a stackified register, prepare to insert subsequent
- // get_locals before the start of its tree.
+ // local.gets before the start of its tree.
if (MFI.isVRegStackified(OldReg)) {
- InsertPt = FindStartOfTree(MO, MRI, MFI);
+ InsertPt = findStartOfTree(MO, MRI, MFI);
continue;
}
@@ -330,12 +337,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// indices as immediates.
if (MI.getOpcode() == TargetOpcode::INLINEASM) {
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
- MRI.removeRegOperandFromUseList(&MO);
- MO = MachineOperand::CreateImm(LocalId);
+ // Untie it first if this reg operand is tied to another operand.
+ MI.untieRegOperand(MI.getOperandNo(&MO));
+ MO.ChangeToImmediate(LocalId);
continue;
}
- // Insert a get_local.
+ // Insert a local.get.
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
unsigned NewReg = MRI.createVirtualRegister(RC);
@@ -361,13 +369,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Define the locals.
// TODO: Sort the locals for better compression.
MFI.setNumLocals(CurLocal - MFI.getParams().size());
- for (size_t i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- auto I = Reg2Local.find(Reg);
- if (I == Reg2Local.end() || I->second < MFI.getParams().size())
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ auto RL = Reg2Local.find(Reg);
+ if (RL == Reg2Local.end() || RL->second < MFI.getParams().size())
continue;
- MFI.setLocal(I->second - MFI.getParams().size(),
+ MFI.setLocal(RL->second - MFI.getParams().size(),
typeForRegClass(MRI.getRegClass(Reg)));
Changed = true;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 566ef68c027d..3856700cca94 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -37,7 +37,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "wasm-fastisel"
@@ -114,8 +117,8 @@ private:
// Utility helper routines
MVT::SimpleValueType getSimpleType(Type *Ty) {
EVT VT = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
- return VT.isSimple() ? VT.getSimpleVT().SimpleTy :
- MVT::INVALID_SIMPLE_VALUE_TYPE;
+ return VT.isSimple() ? VT.getSimpleVT().SimpleTy
+ : MVT::INVALID_SIMPLE_VALUE_TYPE;
}
MVT::SimpleValueType getLegalType(MVT::SimpleValueType VT) {
switch (VT) {
@@ -138,6 +141,11 @@ private:
if (Subtarget->hasSIMD128())
return VT;
break;
+ case MVT::v2i64:
+ case MVT::v2f64:
+ if (Subtarget->hasUnimplementedSIMD128())
+ return VT;
+ break;
default:
break;
}
@@ -153,11 +161,9 @@ private:
MVT::SimpleValueType From);
unsigned signExtendToI32(unsigned Reg, const Value *V,
MVT::SimpleValueType From);
- unsigned zeroExtend(unsigned Reg, const Value *V,
- MVT::SimpleValueType From,
+ unsigned zeroExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
MVT::SimpleValueType To);
- unsigned signExtend(unsigned Reg, const Value *V,
- MVT::SimpleValueType From,
+ unsigned signExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
MVT::SimpleValueType To);
unsigned getRegForUnsignedValue(const Value *V);
unsigned getRegForSignedValue(const Value *V);
@@ -374,14 +380,12 @@ void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) {
if (Addr.isRegBase()) {
unsigned Reg = Addr.getReg();
if (Reg == 0) {
- Reg = createResultReg(Subtarget->hasAddr64() ?
- &WebAssembly::I64RegClass :
- &WebAssembly::I32RegClass);
- unsigned Opc = Subtarget->hasAddr64() ?
- WebAssembly::CONST_I64 :
- WebAssembly::CONST_I32;
+ Reg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+ : &WebAssembly::I32RegClass);
+ unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
+ : WebAssembly::CONST_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), Reg)
- .addImm(0);
+ .addImm(0);
Addr.setReg(Reg);
}
}
@@ -419,9 +423,10 @@ unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
return getRegForValue(ICmp->getOperand(0));
}
- if (BinaryOperator::isNot(V) && V->getType()->isIntegerTy(32)) {
+ Value *NotV;
+ if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) {
Not = true;
- return getRegForValue(BinaryOperator::getNotArgument(V));
+ return getRegForValue(NotV);
}
Not = false;
@@ -438,13 +443,12 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
switch (From) {
case MVT::i1:
- // If the value is naturally an i1, we don't need to mask it.
- // TODO: Recursively examine selects, phis, and, or, xor, constants.
- if (From == MVT::i1 && V != nullptr) {
- if (isa<CmpInst>(V) ||
- (isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr()))
- return copyValue(Reg);
- }
+ // If the value is naturally an i1, we don't need to mask it. We only know
+ // if a value is naturally an i1 if it is definitely lowered by FastISel,
+ // not a DAG ISel fallback.
+ if (V != nullptr && isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr())
+ return copyValue(Reg);
+ break;
case MVT::i8:
case MVT::i16:
break;
@@ -457,13 +461,13 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::CONST_I32), Imm)
- .addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));
+ .addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));
unsigned Result = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::AND_I32), Result)
- .addReg(Reg)
- .addReg(Imm);
+ .addReg(Reg)
+ .addReg(Imm);
return Result;
}
@@ -487,19 +491,19 @@ unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V,
unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::CONST_I32), Imm)
- .addImm(32 - MVT(From).getSizeInBits());
+ .addImm(32 - MVT(From).getSizeInBits());
unsigned Left = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::SHL_I32), Left)
- .addReg(Reg)
- .addReg(Imm);
+ .addReg(Reg)
+ .addReg(Imm);
unsigned Right = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::SHR_S_I32), Right)
- .addReg(Left)
- .addReg(Imm);
+ .addReg(Left)
+ .addReg(Imm);
return Right;
}
@@ -562,8 +566,7 @@ unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) {
unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V,
bool IsSigned) {
- return IsSigned ? getRegForSignedValue(V) :
- getRegForUnsignedValue(V);
+ return IsSigned ? getRegForSignedValue(V) : getRegForUnsignedValue(V);
}
unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
@@ -572,15 +575,15 @@ unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
unsigned NotReg = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::EQZ_I32), NotReg)
- .addReg(Reg);
+ .addReg(Reg);
return NotReg;
}
unsigned WebAssemblyFastISel::copyValue(unsigned Reg) {
unsigned ResultReg = createResultReg(MRI.getRegClass(Reg));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(WebAssembly::COPY), ResultReg)
- .addReg(Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(WebAssembly::COPY),
+ ResultReg)
+ .addReg(Reg);
return ResultReg;
}
@@ -589,12 +592,11 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ?
- &WebAssembly::I64RegClass :
- &WebAssembly::I32RegClass);
- unsigned Opc = Subtarget->hasAddr64() ?
- WebAssembly::COPY_I64 :
- WebAssembly::COPY_I32;
+ unsigned ResultReg =
+ createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+ : &WebAssembly::I32RegClass);
+ unsigned Opc =
+ Subtarget->hasAddr64() ? WebAssembly::COPY_I64 : WebAssembly::COPY_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addFrameIndex(SI->second);
return ResultReg;
@@ -605,14 +607,13 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
- unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ?
- &WebAssembly::I64RegClass :
- &WebAssembly::I32RegClass);
- unsigned Opc = Subtarget->hasAddr64() ?
- WebAssembly::CONST_I64 :
- WebAssembly::CONST_I32;
+ unsigned ResultReg =
+ createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+ : &WebAssembly::I32RegClass);
+ unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
+ : WebAssembly::CONST_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addGlobalAddress(GV);
+ .addGlobalAddress(GV);
return ResultReg;
}
@@ -651,19 +652,19 @@ bool WebAssemblyFastISel::fastLowerArguments() {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- Opc = WebAssembly::ARGUMENT_I32;
+ Opc = WebAssembly::ARGUMENT_i32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i64:
- Opc = WebAssembly::ARGUMENT_I64;
+ Opc = WebAssembly::ARGUMENT_i64;
RC = &WebAssembly::I64RegClass;
break;
case MVT::f32:
- Opc = WebAssembly::ARGUMENT_F32;
+ Opc = WebAssembly::ARGUMENT_f32;
RC = &WebAssembly::F32RegClass;
break;
case MVT::f64:
- Opc = WebAssembly::ARGUMENT_F64;
+ Opc = WebAssembly::ARGUMENT_f64;
RC = &WebAssembly::F64RegClass;
break;
case MVT::v16i8:
@@ -678,12 +679,20 @@ bool WebAssemblyFastISel::fastLowerArguments() {
Opc = WebAssembly::ARGUMENT_v4i32;
RC = &WebAssembly::V128RegClass;
break;
+ case MVT::v2i64:
+ Opc = WebAssembly::ARGUMENT_v2i64;
+ RC = &WebAssembly::V128RegClass;
+ break;
case MVT::v4f32:
Opc = WebAssembly::ARGUMENT_v4f32;
RC = &WebAssembly::V128RegClass;
break;
+ case MVT::v2f64:
+ Opc = WebAssembly::ARGUMENT_v2f64;
+ RC = &WebAssembly::V128RegClass;
+ break;
case MVT::ExceptRef:
- Opc = WebAssembly::ARGUMENT_EXCEPT_REF;
+ Opc = WebAssembly::ARGUMENT_ExceptRef;
RC = &WebAssembly::EXCEPT_REFRegClass;
break;
default:
@@ -691,7 +700,7 @@ bool WebAssemblyFastISel::fastLowerArguments() {
}
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addImm(i);
+ .addImm(i);
updateValueMap(&Arg, ResultReg);
++i;
@@ -710,7 +719,8 @@ bool WebAssemblyFastISel::fastLowerArguments() {
}
if (!F->getReturnType()->isVoidTy()) {
- MVT::SimpleValueType RetTy = getLegalType(getSimpleType(F->getReturnType()));
+ MVT::SimpleValueType RetTy =
+ getLegalType(getSimpleType(F->getReturnType()));
if (RetTy == MVT::INVALID_SIMPLE_VALUE_TYPE) {
MFI->clearParamsAndResults();
return false;
@@ -768,23 +778,33 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
ResultReg = createResultReg(&WebAssembly::F64RegClass);
break;
case MVT::v16i8:
- Opc =
- IsDirect ? WebAssembly::CALL_v16i8 : WebAssembly::PCALL_INDIRECT_v16i8;
+ Opc = IsDirect ? WebAssembly::CALL_v16i8
+ : WebAssembly::PCALL_INDIRECT_v16i8;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v8i16:
- Opc =
- IsDirect ? WebAssembly::CALL_v8i16 : WebAssembly::PCALL_INDIRECT_v8i16;
+ Opc = IsDirect ? WebAssembly::CALL_v8i16
+ : WebAssembly::PCALL_INDIRECT_v8i16;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v4i32:
- Opc =
- IsDirect ? WebAssembly::CALL_v4i32 : WebAssembly::PCALL_INDIRECT_v4i32;
+ Opc = IsDirect ? WebAssembly::CALL_v4i32
+ : WebAssembly::PCALL_INDIRECT_v4i32;
+ ResultReg = createResultReg(&WebAssembly::V128RegClass);
+ break;
+ case MVT::v2i64:
+ Opc = IsDirect ? WebAssembly::CALL_v2i64
+ : WebAssembly::PCALL_INDIRECT_v2i64;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v4f32:
- Opc =
- IsDirect ? WebAssembly::CALL_v4f32 : WebAssembly::PCALL_INDIRECT_v4f32;
+ Opc = IsDirect ? WebAssembly::CALL_v4f32
+ : WebAssembly::PCALL_INDIRECT_v4f32;
+ ResultReg = createResultReg(&WebAssembly::V128RegClass);
+ break;
+ case MVT::v2f64:
+ Opc = IsDirect ? WebAssembly::CALL_v2f64
+ : WebAssembly::PCALL_INDIRECT_v2f64;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::ExceptRef:
@@ -853,11 +873,11 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
const SelectInst *Select = cast<SelectInst>(I);
bool Not;
- unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
+ unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
if (CondReg == 0)
return false;
- unsigned TrueReg = getRegForValue(Select->getTrueValue());
+ unsigned TrueReg = getRegForValue(Select->getTrueValue());
if (TrueReg == 0)
return false;
@@ -900,9 +920,9 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(TrueReg)
- .addReg(FalseReg)
- .addReg(CondReg);
+ .addReg(TrueReg)
+ .addReg(FalseReg)
+ .addReg(CondReg);
updateValueMap(Select, ResultReg);
return true;
@@ -1002,7 +1022,8 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64;
isSigned = true;
break;
- default: return false;
+ default:
+ return false;
}
unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), isSigned);
@@ -1210,7 +1231,8 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
case MVT::f64:
Opc = WebAssembly::STORE_F64;
break;
- default: return false;
+ default:
+ return false;
}
materializeLoadStoreOperands(Addr);
@@ -1275,8 +1297,10 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
unsigned Opc;
switch (getSimpleType(RV->getType())) {
- case MVT::i1: case MVT::i8:
- case MVT::i16: case MVT::i32:
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
Opc = WebAssembly::RETURN_I32;
break;
case MVT::i64:
@@ -1297,13 +1321,20 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
case MVT::v4i32:
Opc = WebAssembly::RETURN_v4i32;
break;
+ case MVT::v2i64:
+ Opc = WebAssembly::RETURN_v2i64;
+ break;
case MVT::v4f32:
Opc = WebAssembly::RETURN_v4f32;
break;
+ case MVT::v2f64:
+ Opc = WebAssembly::RETURN_v2f64;
+ break;
case MVT::ExceptRef:
Opc = WebAssembly::RETURN_EXCEPT_REF;
break;
- default: return false;
+ default:
+ return false;
}
unsigned Reg;
@@ -1333,19 +1364,32 @@ bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) {
if (selectCall(I))
return true;
break;
- case Instruction::Select: return selectSelect(I);
- case Instruction::Trunc: return selectTrunc(I);
- case Instruction::ZExt: return selectZExt(I);
- case Instruction::SExt: return selectSExt(I);
- case Instruction::ICmp: return selectICmp(I);
- case Instruction::FCmp: return selectFCmp(I);
- case Instruction::BitCast: return selectBitCast(I);
- case Instruction::Load: return selectLoad(I);
- case Instruction::Store: return selectStore(I);
- case Instruction::Br: return selectBr(I);
- case Instruction::Ret: return selectRet(I);
- case Instruction::Unreachable: return selectUnreachable(I);
- default: break;
+ case Instruction::Select:
+ return selectSelect(I);
+ case Instruction::Trunc:
+ return selectTrunc(I);
+ case Instruction::ZExt:
+ return selectZExt(I);
+ case Instruction::SExt:
+ return selectSExt(I);
+ case Instruction::ICmp:
+ return selectICmp(I);
+ case Instruction::FCmp:
+ return selectFCmp(I);
+ case Instruction::BitCast:
+ return selectBitCast(I);
+ case Instruction::Load:
+ return selectLoad(I);
+ case Instruction::Store:
+ return selectStore(I);
+ case Instruction::Br:
+ return selectBr(I);
+ case Instruction::Ret:
+ return selectRet(I);
+ case Instruction::Unreachable:
+ return selectUnreachable(I);
+ default:
+ break;
}
// Fall back to target-independent instruction selection.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index d5e47ee82513..1a416520f97d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -36,10 +36,10 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-fix-function-bitcasts"
-static cl::opt<bool> TemporaryWorkarounds(
- "wasm-temporary-workarounds",
- cl::desc("Apply certain temporary workarounds"),
- cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ TemporaryWorkarounds("wasm-temporary-workarounds",
+ cl::desc("Apply certain temporary workarounds"),
+ cl::init(true), cl::Hidden);
namespace {
class FixFunctionBitcasts final : public ModulePass {
@@ -103,14 +103,29 @@ static void FindUses(Value *V, Function &F,
// - Return value is not needed: drop it
// - Return value needed but not present: supply an undef
//
-// For now, return nullptr without creating a wrapper if the wrapper cannot
-// be generated due to incompatible types.
+// If the all the argument types of trivially castable to one another (i.e.
+// I32 vs pointer type) then we don't create a wrapper at all (return nullptr
+// instead).
+//
+// If there is a type mismatch that we know would result in an invalid wasm
+// module then generate wrapper that contains unreachable (i.e. abort at
+// runtime). Such programs are deep into undefined behaviour territory,
+// but we choose to fail at runtime rather than generate and invalid module
+// or fail at compiler time. The reason we delay the error is that we want
+// to support the CMake which expects to be able to compile and link programs
+// that refer to functions with entirely incorrect signatures (this is how
+// CMake detects the existence of a function in a toolchain).
+//
+// For bitcasts that involve struct types we don't know at this stage if they
+// would be equivalent at the wasm level and so we can't know if we need to
+// generate a wrapper.
static Function *CreateWrapper(Function *F, FunctionType *Ty) {
Module *M = F->getParent();
- Function *Wrapper =
- Function::Create(Ty, Function::PrivateLinkage, "bitcast", M);
+ Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage,
+ F->getName() + "_bitcast", M);
BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
+ const DataLayout &DL = BB->getModule()->getDataLayout();
// Determine what arguments to pass.
SmallVector<Value *, 4> Args;
@@ -118,38 +133,103 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
Function::arg_iterator AE = Wrapper->arg_end();
FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
FunctionType::param_iterator PE = F->getFunctionType()->param_end();
+ bool TypeMismatch = false;
+ bool WrapperNeeded = false;
+
+ Type *ExpectedRtnType = F->getFunctionType()->getReturnType();
+ Type *RtnType = Ty->getReturnType();
+
+ if ((F->getFunctionType()->getNumParams() != Ty->getNumParams()) ||
+ (F->getFunctionType()->isVarArg() != Ty->isVarArg()) ||
+ (ExpectedRtnType != RtnType))
+ WrapperNeeded = true;
+
for (; AI != AE && PI != PE; ++AI, ++PI) {
- if (AI->getType() != *PI) {
- Wrapper->eraseFromParent();
- return nullptr;
+ Type *ArgType = AI->getType();
+ Type *ParamType = *PI;
+
+ if (ArgType == ParamType) {
+ Args.push_back(&*AI);
+ } else {
+ if (CastInst::isBitOrNoopPointerCastable(ArgType, ParamType, DL)) {
+ Instruction *PtrCast =
+ CastInst::CreateBitOrPointerCast(AI, ParamType, "cast");
+ BB->getInstList().push_back(PtrCast);
+ Args.push_back(PtrCast);
+ } else if (ArgType->isStructTy() || ParamType->isStructTy()) {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: struct param type in bitcast: "
+ << F->getName() << "\n");
+ WrapperNeeded = false;
+ } else {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: arg type mismatch calling: "
+ << F->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Arg[" << Args.size() << "] Expected: "
+ << *ParamType << " Got: " << *ArgType << "\n");
+ TypeMismatch = true;
+ break;
+ }
}
- Args.push_back(&*AI);
}
- for (; PI != PE; ++PI)
- Args.push_back(UndefValue::get(*PI));
- if (F->isVarArg())
- for (; AI != AE; ++AI)
- Args.push_back(&*AI);
- CallInst *Call = CallInst::Create(F, Args, "", BB);
-
- // Determine what value to return.
- if (Ty->getReturnType()->isVoidTy())
- ReturnInst::Create(M->getContext(), BB);
- else if (F->getFunctionType()->getReturnType()->isVoidTy())
- ReturnInst::Create(M->getContext(), UndefValue::get(Ty->getReturnType()),
- BB);
- else if (F->getFunctionType()->getReturnType() == Ty->getReturnType())
- ReturnInst::Create(M->getContext(), Call, BB);
- else {
+ if (WrapperNeeded && !TypeMismatch) {
+ for (; PI != PE; ++PI)
+ Args.push_back(UndefValue::get(*PI));
+ if (F->isVarArg())
+ for (; AI != AE; ++AI)
+ Args.push_back(&*AI);
+
+ CallInst *Call = CallInst::Create(F, Args, "", BB);
+
+ Type *ExpectedRtnType = F->getFunctionType()->getReturnType();
+ Type *RtnType = Ty->getReturnType();
+ // Determine what value to return.
+ if (RtnType->isVoidTy()) {
+ ReturnInst::Create(M->getContext(), BB);
+ } else if (ExpectedRtnType->isVoidTy()) {
+ LLVM_DEBUG(dbgs() << "Creating dummy return: " << *RtnType << "\n");
+ ReturnInst::Create(M->getContext(), UndefValue::get(RtnType), BB);
+ } else if (RtnType == ExpectedRtnType) {
+ ReturnInst::Create(M->getContext(), Call, BB);
+ } else if (CastInst::isBitOrNoopPointerCastable(ExpectedRtnType, RtnType,
+ DL)) {
+ Instruction *Cast =
+ CastInst::CreateBitOrPointerCast(Call, RtnType, "cast");
+ BB->getInstList().push_back(Cast);
+ ReturnInst::Create(M->getContext(), Cast, BB);
+ } else if (RtnType->isStructTy() || ExpectedRtnType->isStructTy()) {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: struct return type in bitcast: "
+ << F->getName() << "\n");
+ WrapperNeeded = false;
+ } else {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: return type mismatch calling: "
+ << F->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Expected: " << *ExpectedRtnType
+ << " Got: " << *RtnType << "\n");
+ TypeMismatch = true;
+ }
+ }
+
+ if (TypeMismatch) {
+ // Create a new wrapper that simply contains `unreachable`.
+ Wrapper->eraseFromParent();
+ Wrapper = Function::Create(Ty, Function::PrivateLinkage,
+ F->getName() + "_bitcast_invalid", M);
+ BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
+ new UnreachableInst(M->getContext(), BB);
+ Wrapper->setName(F->getName() + "_bitcast_invalid");
+ } else if (!WrapperNeeded) {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: no wrapper needed: " << F->getName()
+ << "\n");
Wrapper->eraseFromParent();
return nullptr;
}
-
+ LLVM_DEBUG(dbgs() << "CreateWrapper: " << F->getName() << "\n");
return Wrapper;
}
bool FixFunctionBitcasts::runOnModule(Module &M) {
+ LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n");
+
Function *Main = nullptr;
CallInst *CallMain = nullptr;
SmallVector<std::pair<Use *, Function *>, 0> Uses;
@@ -166,19 +246,17 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") {
Main = &F;
LLVMContext &C = M.getContext();
- Type *MainArgTys[] = {
- PointerType::get(Type::getInt8PtrTy(C), 0),
- Type::getInt32Ty(C)
- };
+ Type *MainArgTys[] = {Type::getInt32Ty(C),
+ PointerType::get(Type::getInt8PtrTy(C), 0)};
FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
/*isVarArg=*/false);
if (F.getFunctionType() != MainTy) {
- Value *Args[] = {
- UndefValue::get(MainArgTys[0]),
- UndefValue::get(MainArgTys[1])
- };
- Value *Casted = ConstantExpr::getBitCast(Main,
- PointerType::get(MainTy, 0));
+ LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: "
+ << *F.getFunctionType() << "\n");
+ Value *Args[] = {UndefValue::get(MainArgTys[0]),
+ UndefValue::get(MainArgTys[1])};
+ Value *Casted =
+ ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
CallMain = CallInst::Create(Casted, Args, "call_main");
Use *UseMain = &CallMain->getOperandUse(2);
Uses.push_back(std::make_pair(UseMain, &F));
@@ -200,11 +278,6 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (!Ty)
continue;
- // Bitcasted vararg functions occur in Emscripten's implementation of
- // EM_ASM, so suppress wrappers for them for now.
- if (TemporaryWorkarounds && (Ty->isVarArg() || F->isVarArg()))
- continue;
-
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
if (Pair.second)
Pair.first->second = CreateWrapper(F, Ty);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index bea027be7711..108f2879a071 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements a pass that transforms irreducible control flow
-/// into reducible control flow. Irreducible control flow means multiple-entry
+/// This file implements a pass that transforms irreducible control flow into
+/// reducible control flow. Irreducible control flow means multiple-entry
/// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
/// due to being unnatural.
///
@@ -17,12 +17,36 @@
/// it linearizes control flow, turning diamonds into two triangles, which is
/// both unnecessary and undesirable for WebAssembly.
///
-/// TODO: The transformation implemented here handles all irreducible control
-/// flow, without exponential code-size expansion, though it does so by creating
-/// inefficient code in many cases. Ideally, we should add other
-/// transformations, including code-duplicating cases, which can be more
-/// efficient in common cases, and they can fall back to this conservative
-/// implementation as needed.
+/// The big picture: Ignoring natural loops (seeing them monolithically), we
+/// find all the blocks which can return to themselves ("loopers"). Loopers
+/// reachable from the non-loopers are loop entries: if there are 2 or more,
+/// then we have irreducible control flow. We fix that as follows: a new block
+/// is created that can dispatch to each of the loop entries, based on the
+/// value of a label "helper" variable, and we replace direct branches to the
+/// entries with assignments to the label variable and a branch to the dispatch
+/// block. Then the dispatch block is the single entry in a new natural loop.
+///
+/// This is similar to what the Relooper [1] does, both identify looping code
+/// that requires multiple entries, and resolve it in a similar way. In
+/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
+/// also that like the Relooper, we implement a "minimal" intervention: we only
+/// use the "label" helper for the blocks we absolutely must and no others. We
+/// also prioritize code size and do not perform node splitting (i.e. we don't
+/// duplicate code in order to resolve irreducibility).
+///
+/// The difference between this code and the Relooper is that the Relooper also
+/// generates ifs and loops and works in a recursive manner, knowing at each
+/// point what the entries are, and recursively breaks down the problem. Here
+/// we just want to resolve irreducible control flow, and we also want to use
+/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
+/// identify natural loops, etc., and we start with the whole CFG and must
+/// identify both the looping code and its entries.
+///
+/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
+/// Proceedings of the ACM international conference companion on Object oriented
+/// programming systems languages and applications companion (SPLASH '11). ACM,
+/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
+/// http://doi.acm.org/10.1145/2048147.2048224
///
//===----------------------------------------------------------------------===//
@@ -46,141 +70,203 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
namespace {
-class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
- StringRef getPassName() const override {
- return "WebAssembly Fix Irreducible Control Flow";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- bool VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop);
-
-public:
- static char ID; // Pass identification, replacement for typeid
- WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
-};
-} // end anonymous namespace
-
-char WebAssemblyFixIrreducibleControlFlow::ID = 0;
-INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
- "Removes irreducible control flow", false, false)
-
-FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
- return new WebAssemblyFixIrreducibleControlFlow();
-}
-
-namespace {
-
-/// A utility for walking the blocks of a loop, handling a nested inner
-/// loop as a monolithic conceptual block.
-class MetaBlock {
- MachineBasicBlock *Block;
- SmallVector<MachineBasicBlock *, 2> Preds;
- SmallVector<MachineBasicBlock *, 2> Succs;
+class LoopFixer {
public:
- explicit MetaBlock(MachineBasicBlock *MBB)
- : Block(MBB), Preds(MBB->pred_begin(), MBB->pred_end()),
- Succs(MBB->succ_begin(), MBB->succ_end()) {}
-
- explicit MetaBlock(MachineLoop *Loop) : Block(Loop->getHeader()) {
- Loop->getExitBlocks(Succs);
- for (MachineBasicBlock *Pred : Block->predecessors())
- if (!Loop->contains(Pred))
- Preds.push_back(Pred);
+ LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
+ : MF(MF), MLI(MLI), Loop(Loop) {}
+
+ // Run the fixer on the given inputs. Returns whether changes were made.
+ bool run();
+
+private:
+ MachineFunction &MF;
+ MachineLoopInfo &MLI;
+ MachineLoop *Loop;
+
+ MachineBasicBlock *Header;
+ SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;
+
+ using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+ DenseMap<MachineBasicBlock *, BlockSet> Reachable;
+
+ // The worklist contains pairs of recent additions, (a, b), where we just
+ // added a link a => b.
+ using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
+ SmallVector<BlockPair, 4> WorkList;
+
+ // Get a canonical block to represent a block or a loop: the block, or if in
+ // an inner loop, the loop header, of it in an outer loop scope, we can
+ // ignore it. We need to call this on all blocks we work on.
+ MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
+ MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
+ if (InnerLoop == Loop) {
+ return MBB;
+ } else {
+ // This is either in an outer or an inner loop, and not in ours.
+ if (!LoopBlocks.count(MBB)) {
+ // It's in outer code, ignore it.
+ return nullptr;
+ }
+ assert(InnerLoop);
+ // It's in an inner loop, canonicalize it to the header of that loop.
+ return InnerLoop->getHeader();
+ }
}
- MachineBasicBlock *getBlock() const { return Block; }
-
- const SmallVectorImpl<MachineBasicBlock *> &predecessors() const {
- return Preds;
- }
- const SmallVectorImpl<MachineBasicBlock *> &successors() const {
- return Succs;
+ // For a successor we can additionally ignore it if it's a branch back to a
+ // natural loop top, as when we are in the scope of a loop, we just care
+ // about internal irreducibility, and can ignore the loop we are in. We need
+ // to call this on all blocks in a context where they are a successor.
+ MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
+ if (Loop && MBB == Loop->getHeader()) {
+ // Ignore branches going to the loop's natural header.
+ return nullptr;
+ }
+ return canonicalize(MBB);
}
- bool operator==(const MetaBlock &MBB) { return Block == MBB.Block; }
- bool operator!=(const MetaBlock &MBB) { return Block != MBB.Block; }
+ // Potentially insert a new reachable edge, and if so, note it as further
+ // work.
+ void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ assert(MBB == canonicalize(MBB));
+ assert(Succ);
+ // Succ may not be interesting as a sucessor.
+ Succ = canonicalizeSuccessor(Succ);
+ if (!Succ)
+ return;
+ if (Reachable[MBB].insert(Succ).second) {
+ // For there to be further work, it means that we have
+ // X => MBB => Succ
+ // for some other X, and in that case X => Succ would be a new edge for
+ // us to discover later. However, if we don't care about MBB as a
+ // successor, then we don't care about that anyhow.
+ if (canonicalizeSuccessor(MBB)) {
+ WorkList.emplace_back(MBB, Succ);
+ }
+ }
+ }
};
-class SuccessorList final : public MetaBlock {
- size_t Index;
- size_t Num;
+bool LoopFixer::run() {
+ Header = Loop ? Loop->getHeader() : &*MF.begin();
-public:
- explicit SuccessorList(MachineBasicBlock *MBB)
- : MetaBlock(MBB), Index(0), Num(successors().size()) {}
+ // Identify all the blocks in this loop scope.
+ if (Loop) {
+ for (auto *MBB : Loop->getBlocks()) {
+ LoopBlocks.insert(MBB);
+ }
+ } else {
+ for (auto &MBB : MF) {
+ LoopBlocks.insert(&MBB);
+ }
+ }
- explicit SuccessorList(MachineLoop *Loop)
- : MetaBlock(Loop), Index(0), Num(successors().size()) {}
+ // Compute which (canonicalized) blocks each block can reach.
- bool HasNext() const { return Index != Num; }
+ // Add all the initial work.
+ for (auto *MBB : LoopBlocks) {
+ MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
- MachineBasicBlock *Next() {
- assert(HasNext());
- return successors()[Index++];
+ if (InnerLoop == Loop) {
+ for (auto *Succ : MBB->successors()) {
+ maybeInsert(MBB, Succ);
+ }
+ } else {
+ // It can't be in an outer loop - we loop on LoopBlocks - and so it must
+ // be an inner loop.
+ assert(InnerLoop);
+ // Check if we are the canonical block for this loop.
+ if (canonicalize(MBB) != MBB) {
+ continue;
+ }
+ // The successors are those of the loop.
+ SmallVector<MachineBasicBlock *, 2> ExitBlocks;
+ InnerLoop->getExitBlocks(ExitBlocks);
+ for (auto *Succ : ExitBlocks) {
+ maybeInsert(MBB, Succ);
+ }
+ }
}
-};
-} // end anonymous namespace
+ // Do work until we are all done.
+ while (!WorkList.empty()) {
+ MachineBasicBlock *MBB;
+ MachineBasicBlock *Succ;
+ std::tie(MBB, Succ) = WorkList.pop_back_val();
+ // The worklist item is an edge we just added, so it must have valid blocks
+ // (and not something canonicalized to nullptr).
+ assert(MBB);
+ assert(Succ);
+ // The successor in that pair must also be a valid successor.
+ assert(MBB == canonicalizeSuccessor(MBB));
+ // We recently added MBB => Succ, and that means we may have enabled
+ // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
+ // is correct for both a block and a block representing a loop, as the loop
+ // is natural and so the predecessors are all predecessors of the loop
+ // header, which is the block we have here.
+ for (auto *Pred : MBB->predecessors()) {
+ // Canonicalize, make sure it's relevant, and check it's not the same
+ // block (an update to the block itself doesn't help compute that same
+ // block).
+ Pred = canonicalize(Pred);
+ if (Pred && Pred != MBB) {
+ maybeInsert(Pred, Succ);
+ }
+ }
+ }
-bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
- MachineLoopInfo &MLI,
- MachineLoop *Loop) {
- MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin();
- SetVector<MachineBasicBlock *> RewriteSuccs;
-
- // DFS through Loop's body, looking for irreducible control flow. Loop is
- // natural, and we stay in its body, and we treat any nested loops
- // monolithically, so any cycles we encounter indicate irreducibility.
- SmallPtrSet<MachineBasicBlock *, 8> OnStack;
- SmallPtrSet<MachineBasicBlock *, 8> Visited;
- SmallVector<SuccessorList, 4> LoopWorklist;
- LoopWorklist.push_back(SuccessorList(Header));
- OnStack.insert(Header);
- Visited.insert(Header);
- while (!LoopWorklist.empty()) {
- SuccessorList &Top = LoopWorklist.back();
- if (Top.HasNext()) {
- MachineBasicBlock *Next = Top.Next();
- if (Next == Header || (Loop && !Loop->contains(Next)))
- continue;
- if (LLVM_LIKELY(OnStack.insert(Next).second)) {
- if (!Visited.insert(Next).second) {
- OnStack.erase(Next);
- continue;
- }
- MachineLoop *InnerLoop = MLI.getLoopFor(Next);
- if (InnerLoop != Loop)
- LoopWorklist.push_back(SuccessorList(InnerLoop));
- else
- LoopWorklist.push_back(SuccessorList(Next));
- } else {
- RewriteSuccs.insert(Top.getBlock());
+ // It's now trivial to identify the loopers.
+ SmallPtrSet<MachineBasicBlock *, 4> Loopers;
+ for (auto MBB : LoopBlocks) {
+ if (Reachable[MBB].count(MBB)) {
+ Loopers.insert(MBB);
+ }
+ }
+ // The header cannot be a looper. At the toplevel, LLVM does not allow the
+ // entry to be in a loop, and in a natural loop we should ignore the header.
+ assert(Loopers.count(Header) == 0);
+
+ // Find the entries, loopers reachable from non-loopers.
+ SmallPtrSet<MachineBasicBlock *, 4> Entries;
+ SmallVector<MachineBasicBlock *, 4> SortedEntries;
+ for (auto *Looper : Loopers) {
+ for (auto *Pred : Looper->predecessors()) {
+ Pred = canonicalize(Pred);
+ if (Pred && !Loopers.count(Pred)) {
+ Entries.insert(Looper);
+ SortedEntries.push_back(Looper);
+ break;
}
- continue;
}
- OnStack.erase(Top.getBlock());
- LoopWorklist.pop_back();
}
- // Most likely, we didn't find any irreducible control flow.
- if (LLVM_LIKELY(RewriteSuccs.empty()))
+ // Check if we found irreducible control flow.
+ if (LLVM_LIKELY(Entries.size() <= 1))
return false;
- LLVM_DEBUG(dbgs() << "Irreducible control flow detected!\n");
+ // Sort the entries to ensure a deterministic build.
+ llvm::sort(SortedEntries,
+ [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ auto ANum = A->getNumber();
+ auto BNum = B->getNumber();
+ return ANum < BNum;
+ });
+
+#ifndef NDEBUG
+ for (auto Block : SortedEntries)
+ assert(Block->getNumber() != -1);
+ if (SortedEntries.size() > 1) {
+ for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1;
+ I != E; ++I) {
+ auto ANum = (*I)->getNumber();
+ auto BNum = (*(std::next(I)))->getNumber();
+ assert(ANum != BNum);
+ }
+ }
+#endif
- // Ok. We have irreducible control flow! Create a dispatch block which will
- // contains a jump table to any block in the problematic set of blocks.
+ // Create a dispatch block which will contain a jump table to the entries.
MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
MF.insert(MF.end(), Dispatch);
MLI.changeLoopFor(Dispatch, Loop);
@@ -196,43 +282,43 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
MIB.addReg(Reg);
- // Collect all the blocks which need to have their successors rewritten,
- // add the successors to the jump table, and remember their index.
+ // Compute the indices in the superheader, one for each bad block, and
+ // add them as successors.
DenseMap<MachineBasicBlock *, unsigned> Indices;
- SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(),
- RewriteSuccs.end());
- while (!SuccWorklist.empty()) {
- MachineBasicBlock *MBB = SuccWorklist.pop_back_val();
+ for (auto *MBB : SortedEntries) {
auto Pair = Indices.insert(std::make_pair(MBB, 0));
- if (!Pair.second)
+ if (!Pair.second) {
continue;
+ }
unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
- LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index
- << "\n");
-
Pair.first->second = Index;
- for (auto Pred : MBB->predecessors())
- RewriteSuccs.insert(Pred);
MIB.addMBB(MBB);
Dispatch->addSuccessor(MBB);
+ }
- MetaBlock Meta(MBB);
- for (auto *Succ : Meta.successors())
- if (Succ != Header && (!Loop || Loop->contains(Succ)))
- SuccWorklist.push_back(Succ);
+ // Rewrite the problematic successors for every block that wants to reach the
+ // bad blocks. For simplicity, we just introduce a new block for every edge
+ // we need to rewrite. (Fancier things are possible.)
+
+ SmallVector<MachineBasicBlock *, 4> AllPreds;
+ for (auto *MBB : SortedEntries) {
+ for (auto *Pred : MBB->predecessors()) {
+ if (Pred != Dispatch) {
+ AllPreds.push_back(Pred);
+ }
+ }
}
- // Rewrite the problematic successors for every block in RewriteSuccs.
- // For simplicity, we just introduce a new block for every edge we need to
- // rewrite. Fancier things are possible.
- for (MachineBasicBlock *MBB : RewriteSuccs) {
+ for (MachineBasicBlock *MBB : AllPreds) {
DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
for (auto *Succ : MBB->successors()) {
- if (!Indices.count(Succ))
+ if (!Entries.count(Succ)) {
continue;
+ }
+ // This is a successor we need to rewrite.
MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
: MF.end(),
@@ -266,6 +352,55 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
return true;
}
+class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
+ StringRef getPassName() const override {
+ return "WebAssembly Fix Irreducible Control Flow";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
+ // Visit the function body, which is identified as a null loop.
+ if (LoopFixer(MF, MLI, nullptr).run()) {
+ return true;
+ }
+
+ // Visit all the loops.
+ SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
+ while (!Worklist.empty()) {
+ MachineLoop *Loop = Worklist.pop_back_val();
+ Worklist.append(Loop->begin(), Loop->end());
+ if (LoopFixer(MF, MLI, Loop).run()) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyFixIrreducibleControlFlow::ID = 0;
+INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
+ "Removes irreducible control flow", false, false)
+
+FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
+ return new WebAssemblyFixIrreducibleControlFlow();
+}
+
bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
@@ -275,24 +410,19 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
bool Changed = false;
auto &MLI = getAnalysis<MachineLoopInfo>();
- // Visit the function body, which is identified as a null loop.
- Changed |= VisitLoop(MF, MLI, nullptr);
-
- // Visit all the loops.
- SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
- while (!Worklist.empty()) {
- MachineLoop *CurLoop = Worklist.pop_back_val();
- Worklist.append(CurLoop->begin(), CurLoop->end());
- Changed |= VisitLoop(MF, MLI, CurLoop);
- }
-
- // If we made any changes, completely recompute everything.
- if (LLVM_UNLIKELY(Changed)) {
- LLVM_DEBUG(dbgs() << "Recomputing dominators and loops.\n");
+ // When we modify something, bail out and recompute MLI, then start again, as
+ // we create a new natural loop when we resolve irreducible control flow, and
+ // other loops may become nested in it, etc. In practice this is not an issue
+ // because irreducible control flow is rare, only very few cycles are needed
+ // here.
+ while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
+ // We rewrote part of the function; recompute MLI and start again.
+ LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
MF.getRegInfo().invalidateLiveness();
MF.RenumberBlocks();
getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
MLI.runOnMachineFunction(MF);
+ Changed = true;
}
return Changed;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 052c94e9d6a9..2d5aff28d27b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -42,8 +43,7 @@ using namespace llvm;
/// require stricter alignment than the stack pointer itself. Because we need
/// to shift the stack pointer by some unknown amount to force the alignment,
/// we need to record the value of the stack pointer on entry to the function.
-bool WebAssemblyFrameLowering::hasBP(
- const MachineFunction &MF) const {
+bool WebAssemblyFrameLowering::hasBP(const MachineFunction &MF) const {
const auto *RegInfo =
MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
return RegInfo->needsStackRealignment(MF);
@@ -78,36 +78,60 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame(
return !MF.getFrameInfo().hasVarSizedObjects();
}
+// Returns true if this function needs a local user-space stack pointer for its
+// local frame (not for exception handling).
+bool WebAssemblyFrameLowering::needsSPForLocalFrame(
+ const MachineFunction &MF) const {
+ auto &MFI = MF.getFrameInfo();
+ return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF);
+}
+
+// In function with EH pads, we need to make a copy of the value of
+// __stack_pointer global in SP32 register, in order to use it when restoring
+// __stack_pointer after an exception is caught.
+bool WebAssemblyFrameLowering::needsPrologForEH(
+ const MachineFunction &MF) const {
+ auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType();
+ return EHType == ExceptionHandling::Wasm &&
+ MF.getFunction().hasPersonalityFn() && MF.getFrameInfo().hasCalls();
+}
/// Returns true if this function needs a local user-space stack pointer.
/// Unlike a machine stack pointer, the wasm user stack pointer is a global
/// variable, so it is loaded into a register in the prolog.
-bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF,
- const MachineFrameInfo &MFI) const {
- return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF);
+bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF) const {
+ return needsSPForLocalFrame(MF) || needsPrologForEH(MF);
}
/// Returns true if the local user-space stack pointer needs to be written back
-/// to memory by this function (this is not meaningful if needsSP is false). If
-/// false, the stack red zone can be used and only a local SP is needed.
+/// to __stack_pointer global by this function (this is not meaningful if
+/// needsSP is false). If false, the stack red zone can be used and only a local
+/// SP is needed.
bool WebAssemblyFrameLowering::needsSPWriteback(
- const MachineFunction &MF, const MachineFrameInfo &MFI) const {
- assert(needsSP(MF, MFI));
- return MFI.getStackSize() > RedZoneSize || MFI.hasCalls() ||
- MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
+ const MachineFunction &MF) const {
+ auto &MFI = MF.getFrameInfo();
+ assert(needsSP(MF));
+ // When we don't need a local stack pointer for its local frame but only to
+ // support EH, we don't need to write SP back in the epilog, because we don't
+ // bump down the stack pointer in the prolog. We need to write SP back in the
+ // epilog only if
+ // 1. We need SP not only for EH support but also because we actually use
+ // stack or we have a frame address taken.
+ // 2. We cannot use the red zone.
+ bool CanUseRedZone = MFI.getStackSize() <= RedZoneSize && !MFI.hasCalls() &&
+ !MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
+ return needsSPForLocalFrame(MF) && !CanUseRedZone;
}
-static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &InsertAddr,
- MachineBasicBlock::iterator &InsertStore,
- const DebugLoc &DL) {
+void WebAssemblyFrameLowering::writeSPToGlobal(
+ unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const {
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
const char *ES = "__stack_pointer";
auto *SPSymbol = MF.createExternalSymbolName(ES);
- BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32))
- .addExternalSymbol(SPSymbol)
+ BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32))
+ .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL)
.addReg(SrcReg);
}
@@ -119,9 +143,9 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
"Call frame pseudos should only be used for dynamic stack adjustment");
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
- needsSPWriteback(MF, MF.getFrameInfo())) {
+ needsSPWriteback(MF)) {
DebugLoc DL = I->getDebugLoc();
- writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL);
+ writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL);
}
return MBB.erase(I);
}
@@ -133,7 +157,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
assert(MFI.getCalleeSavedInfo().empty() &&
"WebAssembly should not have callee-saved registers");
- if (!needsSP(MF, MFI)) return;
+ if (!needsSP(MF))
+ return;
uint64_t StackSize = MFI.getStackSize();
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -152,8 +177,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
const char *ES = "__stack_pointer";
auto *SPSymbol = MF.createExternalSymbolName(ES);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg)
- .addExternalSymbol(SPSymbol);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg)
+ .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL);
bool HasBP = hasBP(MF);
if (HasBP) {
@@ -177,7 +202,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC);
unsigned Alignment = MFI.getMaxAlignment();
assert((1u << countTrailingZeros(Alignment)) == Alignment &&
- "Alignment must be a power of 2");
+ "Alignment must be a power of 2");
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg)
.addImm((int)~(Alignment - 1));
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32),
@@ -189,20 +214,19 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
// Unlike most conventional targets (where FP points to the saved FP),
// FP points to the bottom of the fixed-size locals, so we can use positive
// offsets in load/store instructions.
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY),
- WebAssembly::FP32)
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32)
.addReg(WebAssembly::SP32);
}
- if (StackSize && needsSPWriteback(MF, MFI)) {
- writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL);
+ if (StackSize && needsSPWriteback(MF)) {
+ writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL);
}
}
void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- auto &MFI = MF.getFrameInfo();
- uint64_t StackSize = MFI.getStackSize();
- if (!needsSP(MF, MFI) || !needsSPWriteback(MF, MFI)) return;
+ uint64_t StackSize = MF.getFrameInfo().getStackSize();
+ if (!needsSP(MF) || !needsSPWriteback(MF))
+ return;
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
auto &MRI = MF.getRegInfo();
auto InsertPt = MBB.getFirstTerminator();
@@ -214,7 +238,6 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
// Restore the stack pointer. If we had fixed-size locals, add the offset
// subtracted in the prolog.
unsigned SPReg = 0;
- MachineBasicBlock::iterator InsertAddr = InsertPt;
if (hasBP(MF)) {
auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
SPReg = FI->getBasePointerVreg();
@@ -222,9 +245,8 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
- InsertAddr =
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
- .addImm(StackSize);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addImm(StackSize);
// In the epilog we don't need to write the result back to the SP32 physreg
// because it won't be used again. We can use a stackified register instead.
SPReg = MRI.createVirtualRegister(PtrRC);
@@ -235,5 +257,5 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
}
- writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL);
+ writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index fe23e418a3f1..c6fa8261b03f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -22,9 +22,10 @@ namespace llvm {
class MachineFrameInfo;
class WebAssemblyFrameLowering final : public TargetFrameLowering {
- public:
+public:
/// Size of the red zone for the user stack (leaf functions can use this much
- /// space below the stack pointer without writing it back to memory).
+ /// space below the stack pointer without writing it back to __stack_pointer
+ /// global).
// TODO: (ABI) Revisit and decide how large it should be.
static const size_t RedZoneSize = 128;
@@ -34,9 +35,9 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering {
/*TransientStackAlignment=*/16,
/*StackRealignable=*/true) {}
- MachineBasicBlock::iterator eliminateCallFramePseudoInstr(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const override;
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
/// These methods insert prolog and epilog code into the function.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
@@ -45,13 +46,21 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering {
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- private:
+ bool needsPrologForEH(const MachineFunction &MF) const;
+
+ /// Write SP back to __stack_pointer global.
+ void writeSPToGlobal(unsigned SrcReg, MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &InsertStore,
+ const DebugLoc &DL) const;
+
+private:
bool hasBP(const MachineFunction &MF) const;
- bool needsSP(const MachineFunction &MF, const MachineFrameInfo &MFI) const;
- bool needsSPWriteback(const MachineFunction &MF,
- const MachineFrameInfo &MFI) const;
+ bool needsSPForLocalFrame(const MachineFunction &MF) const;
+ bool needsSP(const MachineFunction &MF) const;
+ bool needsSPWriteback(const MachineFunction &MF) const;
};
-} // end namespace llvm
+} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index c12550feabbb..e987d7f7f43a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -21,5 +21,10 @@ HANDLE_NODETYPE(ARGUMENT)
HANDLE_NODETYPE(Wrapper)
HANDLE_NODETYPE(BR_IF)
HANDLE_NODETYPE(BR_TABLE)
+HANDLE_NODETYPE(SHUFFLE)
+HANDLE_NODETYPE(VEC_SHL)
+HANDLE_NODETYPE(VEC_SHR_S)
+HANDLE_NODETYPE(VEC_SHR_U)
+HANDLE_NODETYPE(THROW)
// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index fdf3a30a5c0e..0a7464cedc90 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -48,6 +48,10 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
ForCodeSize = MF.getFunction().hasFnAttribute(Attribute::OptimizeForSize) ||
MF.getFunction().hasFnAttribute(Attribute::MinSize);
Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 283e703e1f6c..003848e34227 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -21,8 +21,10 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
@@ -42,6 +44,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Booleans always contain 0 or 1.
setBooleanContents(ZeroOrOneBooleanContent);
+ // Except in SIMD vectors
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// WebAssembly does not produce floating-point exceptions on normal floating
// point operations.
setHasFloatingPointExceptions(false);
@@ -60,6 +64,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
+ addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
+ }
}
// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -77,7 +85,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
- for (auto T : {MVT::f32, MVT::f64}) {
+ for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
// Don't expand the floating-point types to constant pools.
setOperationAction(ISD::ConstantFP, T, Legal);
// Expand floating-point comparisons.
@@ -85,17 +93,17 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
setCondCodeAction(CC, T, Expand);
// Expand floating-point library function operators.
- for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM,
- ISD::FMA})
+ for (auto Op :
+ {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
setOperationAction(Op, T, Expand);
// Note supported floating-point library function operators that otherwise
// default to expand.
for (auto Op :
{ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
setOperationAction(Op, T, Legal);
- // Support minnan and maxnan, which otherwise default to expand.
- setOperationAction(ISD::FMINNAN, T, Legal);
- setOperationAction(ISD::FMAXNAN, T, Legal);
+ // Support minimum and maximum, which otherwise default to expand.
+ setOperationAction(ISD::FMINIMUM, T, Legal);
+ setOperationAction(ISD::FMAXIMUM, T, Legal);
// WebAssembly currently has no builtin f16 support.
setOperationAction(ISD::FP16_TO_FP, T, Expand);
setOperationAction(ISD::FP_TO_FP16, T, Expand);
@@ -103,24 +111,75 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTruncStoreAction(T, MVT::f16, Expand);
}
- for (auto T : {MVT::i32, MVT::i64}) {
- // Expand unavailable integer operations.
- for (auto Op :
- {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
- ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS,
- ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC,
- ISD::SUBE}) {
+ // Support saturating add for i8x16 and i16x8
+ if (Subtarget->hasSIMD128())
+ for (auto T : {MVT::v16i8, MVT::v8i16})
+ for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
+ setOperationAction(Op, T, Legal);
+
+ // Expand unavailable integer operations.
+ for (auto Op :
+ {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
+ ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
+ ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
+ for (auto T : {MVT::i32, MVT::i64}) {
setOperationAction(Op, T, Expand);
}
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
+ setOperationAction(Op, T, Expand);
+ }
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ setOperationAction(Op, MVT::v2i64, Expand);
+ }
+ }
+ }
+
+ // There is no i64x2.mul instruction
+ setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+
+ // We have custom shuffle lowering to expose the shuffle mask
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
+ setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
+ }
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ }
+ }
+
+ // Custom lowering since wasm shifts must have a scalar shift amount
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+ setOperationAction(Op, T, Custom);
+ if (Subtarget->hasUnimplementedSIMD128())
+ for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+ setOperationAction(Op, MVT::v2i64, Custom);
}
+ // There are no select instructions for vectors
+ if (Subtarget->hasSIMD128())
+ for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+ setOperationAction(Op, T, Expand);
+ if (Subtarget->hasUnimplementedSIMD128())
+ for (auto T : {MVT::v2i64, MVT::v2f64})
+ setOperationAction(Op, T, Expand);
+ }
+
// As a special case, these operators use the type to mean the type to
// sign-extend from.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!Subtarget->hasSignExt()) {
+ // Sign extends are legal only when extending a vector extract
+ auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
for (auto T : {MVT::i8, MVT::i16, MVT::i32})
- setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
}
+ for (auto T : MVT::integer_vector_valuetypes())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
// Dynamic stack allocation: use the default expansion.
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -142,21 +201,72 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// - Floating-point extending loads.
// - Floating-point truncating stores.
// - i1 extending loads.
+ // - extending/truncating SIMD loads/stores
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
for (auto T : MVT::integer_valuetypes())
for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
setLoadExtAction(Ext, T, MVT::i1, Promote);
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
+ MVT::v2f64}) {
+ for (auto MemT : MVT::vector_valuetypes()) {
+ if (MVT(T) != MemT) {
+ setTruncStoreAction(T, MemT, Expand);
+ for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
+ setLoadExtAction(Ext, T, MemT, Expand);
+ }
+ }
+ }
+ }
+
+ // Expand additional SIMD ops that V8 hasn't implemented yet
+ if (Subtarget->hasSIMD128() && !Subtarget->hasUnimplementedSIMD128()) {
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ }
+
+ // Custom lower lane accesses to expand out variable indices
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+ }
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ for (auto T : {MVT::v2i64, MVT::v2f64}) {
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+ }
+ }
+ }
// Trap lowers to wasm unreachable
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// Exception handling intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setMaxAtomicSizeInBitsSupported(64);
}
+TargetLowering::AtomicExpansionKind
+WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ // We have wasm instructions for these
+ switch (AI->getOperation()) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ case AtomicRMWInst::Xchg:
+ return AtomicExpansionKind::None;
+ default:
+ break;
+ }
+ return AtomicExpansionKind::CmpXChg;
+}
+
FastISel *WebAssemblyTargetLowering::createFastISel(
FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
return WebAssembly::createFastISel(FuncInfo, LibInfo);
@@ -171,7 +281,8 @@ bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
EVT VT) const {
unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
- if (BitWidth > 1 && BitWidth < 8) BitWidth = 8;
+ if (BitWidth > 1 && BitWidth < 8)
+ BitWidth = 8;
if (BitWidth > 64) {
// The shift will be lowered to a libcall, and compiler-rt libcalls expect
@@ -190,17 +301,11 @@ MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
// undefined result on invalid/overflow, to the WebAssembly opcode, which
// traps on invalid/overflow.
-static MachineBasicBlock *
-LowerFPToInt(
- MachineInstr &MI,
- DebugLoc DL,
- MachineBasicBlock *BB,
- const TargetInstrInfo &TII,
- bool IsUnsigned,
- bool Int64,
- bool Float64,
- unsigned LoweredOpcode
-) {
+static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *BB,
+ const TargetInstrInfo &TII,
+ bool IsUnsigned, bool Int64,
+ bool Float64, unsigned LoweredOpcode) {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned OutReg = MI.getOperand(0).getReg();
@@ -232,8 +337,7 @@ LowerFPToInt(
// Transfer the remainder of BB and its successor edges to DoneMBB.
DoneMBB->splice(DoneMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(TrueMBB);
@@ -255,45 +359,33 @@ LowerFPToInt(
if (IsUnsigned) {
Tmp0 = InReg;
} else {
- BuildMI(BB, DL, TII.get(Abs), Tmp0)
- .addReg(InReg);
+ BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
}
BuildMI(BB, DL, TII.get(FConst), Tmp1)
.addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
- BuildMI(BB, DL, TII.get(LT), CmpReg)
- .addReg(Tmp0)
- .addReg(Tmp1);
+ BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
// For unsigned numbers, we have to do a separate comparison with zero.
if (IsUnsigned) {
Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
- unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ unsigned SecondCmpReg =
+ MRI.createVirtualRegister(&WebAssembly::I32RegClass);
unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(BB, DL, TII.get(FConst), Tmp1)
.addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
- BuildMI(BB, DL, TII.get(GE), SecondCmpReg)
- .addReg(Tmp0)
- .addReg(Tmp1);
- BuildMI(BB, DL, TII.get(And), AndReg)
- .addReg(CmpReg)
- .addReg(SecondCmpReg);
+ BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
+ BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
CmpReg = AndReg;
}
- BuildMI(BB, DL, TII.get(Eqz), EqzReg)
- .addReg(CmpReg);
+ BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
// Create the CFG diamond to select between doing the conversion or using
// the substitute value.
- BuildMI(BB, DL, TII.get(WebAssembly::BR_IF))
- .addMBB(TrueMBB)
- .addReg(EqzReg);
- BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg)
- .addReg(InReg);
- BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR))
- .addMBB(DoneMBB);
- BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg)
- .addImm(Substitute);
+ BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
+ BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
+ BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
+ BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
.addReg(FalseReg)
.addMBB(FalseMBB)
@@ -303,16 +395,14 @@ LowerFPToInt(
return DoneMBB;
}
-MachineBasicBlock *
-WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
- MachineInstr &MI,
- MachineBasicBlock *BB
-) const {
+MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unexpected instr type to insert");
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
case WebAssembly::FP_TO_SINT_I32_F32:
return LowerFPToInt(MI, DL, BB, TII, false, false, false,
WebAssembly::I32_TRUNC_S_F32);
@@ -337,17 +427,17 @@ WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
case WebAssembly::FP_TO_UINT_I64_F64:
return LowerFPToInt(MI, DL, BB, TII, true, true, true,
WebAssembly::I64_TRUNC_U_F64);
- llvm_unreachable("Unexpected instruction to emit with custom inserter");
+ llvm_unreachable("Unexpected instruction to emit with custom inserter");
}
}
-const char *WebAssemblyTargetLowering::getTargetNodeName(
- unsigned Opcode) const {
+const char *
+WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
- case WebAssemblyISD::FIRST_NUMBER:
- break;
-#define HANDLE_NODETYPE(NODE) \
- case WebAssemblyISD::NODE: \
+ case WebAssemblyISD::FIRST_NUMBER:
+ break;
+#define HANDLE_NODETYPE(NODE) \
+ case WebAssemblyISD::NODE: \
return "WebAssemblyISD::" #NODE;
#include "WebAssemblyISD.def"
#undef HANDLE_NODETYPE
@@ -362,21 +452,21 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
// WebAssembly register class.
if (Constraint.size() == 1) {
switch (Constraint[0]) {
- case 'r':
- assert(VT != MVT::iPTR && "Pointer MVT not expected here");
- if (Subtarget->hasSIMD128() && VT.isVector()) {
- if (VT.getSizeInBits() == 128)
- return std::make_pair(0U, &WebAssembly::V128RegClass);
- }
- if (VT.isInteger() && !VT.isVector()) {
- if (VT.getSizeInBits() <= 32)
- return std::make_pair(0U, &WebAssembly::I32RegClass);
- if (VT.getSizeInBits() <= 64)
- return std::make_pair(0U, &WebAssembly::I64RegClass);
- }
- break;
- default:
- break;
+ case 'r':
+ assert(VT != MVT::iPTR && "Pointer MVT not expected here");
+ if (Subtarget->hasSIMD128() && VT.isVector()) {
+ if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, &WebAssembly::V128RegClass);
+ }
+ if (VT.isInteger() && !VT.isVector()) {
+ if (VT.getSizeInBits() <= 32)
+ return std::make_pair(0U, &WebAssembly::I32RegClass);
+ if (VT.getSizeInBits() <= 64)
+ return std::make_pair(0U, &WebAssembly::I64RegClass);
+ }
+ break;
+ default:
+ break;
}
}
@@ -395,16 +485,17 @@ bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM,
- Type *Ty,
- unsigned AS,
+ Type *Ty, unsigned AS,
Instruction *I) const {
// WebAssembly offsets are added as unsigned without wrapping. The
// isLegalAddressingMode gives us no way to determine if wrapping could be
// happening, so we approximate this by accepting only non-negative offsets.
- if (AM.BaseOffs < 0) return false;
+ if (AM.BaseOffs < 0)
+ return false;
// WebAssembly has no scale register operands.
- if (AM.Scale != 0) return false;
+ if (AM.Scale != 0)
+ return false;
// Everything else is legal.
return true;
@@ -418,7 +509,8 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
// for the kinds of things that LLVM uses this for (merging adjacent stores
// of constants, etc.), WebAssembly implementations will either want the
// unaligned access or they'll split anyway.
- if (Fast) *Fast = true;
+ if (Fast)
+ *Fast = true;
return true;
}
@@ -438,6 +530,46 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
return TargetLowering::getSetCCResultType(DL, C, VT);
}
+bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::wasm_atomic_notify:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ // atomic.notify instruction does not really load the memory specified with
+ // this argument, but MachineMemOperand should either be load or store, so
+ // we set this to a load.
+ // FIXME Volatile isn't really correct, but currently all LLVM atomic
+ // instructions are treated as volatiles in the backend, so we should be
+ // consistent. The same applies for wasm_atomic_wait intrinsics too.
+ Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::wasm_atomic_wait_i32:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::wasm_atomic_wait_i64:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+ return true;
+ default:
+ return false;
+ }
+}
+
//===----------------------------------------------------------------------===//
// WebAssembly Lowering private implementation.
//===----------------------------------------------------------------------===//
@@ -465,8 +597,9 @@ static bool CallingConvSupported(CallingConv::ID CallConv) {
CallConv == CallingConv::CXX_FAST_TLS;
}
-SDValue WebAssemblyTargetLowering::LowerCall(
- CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const {
+SDValue
+WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc DL = CLI.DL;
SDValue Chain = CLI.Chain;
@@ -568,9 +701,9 @@ SDValue WebAssemblyTargetLowering::LowerCall(
FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
DAG.getConstant(Offset, DL, PtrVT));
- Chains.push_back(DAG.getStore(
- Chain, DL, Arg, Add,
- MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
+ Chains.push_back(
+ DAG.getStore(Chain, DL, Arg, Add,
+ MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
}
if (!Chains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
@@ -588,7 +721,8 @@ SDValue WebAssemblyTargetLowering::LowerCall(
Ops.append(OutVals.begin(),
IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
// Add a pointer to the vararg buffer.
- if (IsVarArg) Ops.push_back(FINode);
+ if (IsVarArg)
+ Ops.push_back(FINode);
SmallVector<EVT, 8> InTys;
for (const auto &In : Ins) {
@@ -682,11 +816,10 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
// Ignore In.getOrigAlign() because all our arguments are passed in
// registers.
- InVals.push_back(
- In.Used
- ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
- DAG.getTargetConstant(InVals.size(), DL, MVT::i32))
- : DAG.getUNDEF(In.VT));
+ InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
+ DAG.getTargetConstant(InVals.size(),
+ DL, MVT::i32))
+ : DAG.getUNDEF(In.VT));
// Record the number and types of arguments.
MFI->addParam(In.VT);
@@ -706,12 +839,18 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
MFI->addParam(PtrVT);
}
- // Record the number and types of results.
+ // Record the number and types of arguments and results.
SmallVector<MVT, 4> Params;
SmallVector<MVT, 4> Results;
- ComputeSignatureVTs(MF.getFunction(), DAG.getTarget(), Params, Results);
+ ComputeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
+ DAG.getTarget(), Params, Results);
for (MVT VT : Results)
MFI->addResult(VT);
+ // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
+ // the param logic here with ComputeSignatureVTs
+ assert(MFI->getParams().size() == Params.size() &&
+ std::equal(MFI->getParams().begin(), MFI->getParams().end(),
+ Params.begin()));
return Chain;
}
@@ -724,34 +863,47 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
switch (Op.getOpcode()) {
- default:
- llvm_unreachable("unimplemented operation lowering");
- return SDValue();
- case ISD::FrameIndex:
- return LowerFrameIndex(Op, DAG);
- case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG);
- case ISD::ExternalSymbol:
- return LowerExternalSymbol(Op, DAG);
- case ISD::JumpTable:
- return LowerJumpTable(Op, DAG);
- case ISD::BR_JT:
- return LowerBR_JT(Op, DAG);
- case ISD::VASTART:
- return LowerVASTART(Op, DAG);
- case ISD::BlockAddress:
- case ISD::BRIND:
- fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
- return SDValue();
- case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
- fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
- return SDValue();
- case ISD::FRAMEADDR:
- return LowerFRAMEADDR(Op, DAG);
- case ISD::CopyToReg:
- return LowerCopyToReg(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN:
- return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ default:
+ llvm_unreachable("unimplemented operation lowering");
+ return SDValue();
+ case ISD::FrameIndex:
+ return LowerFrameIndex(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::ExternalSymbol:
+ return LowerExternalSymbol(Op, DAG);
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT:
+ return LowerBR_JT(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG);
+ case ISD::BlockAddress:
+ case ISD::BRIND:
+ fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
+ return SDValue();
+ case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
+ fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
+ return SDValue();
+ case ISD::FRAMEADDR:
+ return LowerFRAMEADDR(Op, DAG);
+ case ISD::CopyToReg:
+ return LowerCopyToReg(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerAccessVectorElement(Op, DAG);
+ case ISD::INTRINSIC_VOID:
+ return LowerINTRINSIC_VOID(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG:
+ return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return LowerShift(Op, DAG);
}
}
@@ -763,21 +915,20 @@ SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
// the FI to some LEA-like instruction, but since we don't have that, we
// need to insert some kind of instruction that can take an FI operand and
// produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
- // copy_local between Op and its FI operand.
+ // local.copy between Op and its FI operand.
SDValue Chain = Op.getOperand(0);
SDLoc DL(Op);
unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
EVT VT = Src.getValueType();
- SDValue Copy(
- DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
- : WebAssembly::COPY_I64,
- DL, VT, Src),
- 0);
+ SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
+ : WebAssembly::COPY_I64,
+ DL, VT, Src),
+ 0);
return Op.getNode()->getNumValues() == 1
? DAG.getCopyToReg(Chain, DL, Reg, Copy)
- : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4
- ? Op.getOperand(3)
- : SDValue());
+ : DAG.getCopyToReg(Chain, DL, Reg, Copy,
+ Op.getNumOperands() == 4 ? Op.getOperand(3)
+ : SDValue());
}
return SDValue();
}
@@ -817,8 +968,9 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
}
-SDValue WebAssemblyTargetLowering::LowerExternalSymbol(
- SDValue Op, SelectionDAG &DAG) const {
+SDValue
+WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
const auto *ES = cast<ExternalSymbolSDNode>(Op);
EVT VT = Op.getValueType();
@@ -829,9 +981,10 @@ SDValue WebAssemblyTargetLowering::LowerExternalSymbol(
// we don't know anything about the symbol other than its name, because all
// external symbols used in target-independent SelectionDAG code are for
// functions.
- return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
- DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
- /*TargetFlags=*/0x1));
+ return DAG.getNode(
+ WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
+ WebAssemblyII::MO_SYMBOL_FUNCTION));
}
SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
@@ -860,7 +1013,8 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
// Add an operand for each case.
- for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB));
+ for (auto MBB : MBBs)
+ Ops.push_back(DAG.getBasicBlock(MBB));
// TODO: For now, we just pick something arbitrary for a default case for now.
// We really want to sniff out the guard and put in the real default case (and
@@ -893,10 +1047,181 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
default:
return {}; // Don't custom lower most intrinsics.
- case Intrinsic::wasm_lsda:
- // TODO For now, just return 0 not to crash
- return DAG.getConstant(0, DL, Op.getValueType());
+ case Intrinsic::wasm_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT VT = Op.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ auto &Context = MF.getMMI().getContext();
+ MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
+ Twine(MF.getFunctionNumber()));
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getMCSymbol(S, PtrVT));
+ }
+ }
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ SDLoc DL(Op);
+
+ switch (IntNo) {
+ default:
+ return {}; // Don't custom lower most intrinsics.
+
+ case Intrinsic::wasm_throw: {
+ int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
+ switch (Tag) {
+ case CPP_EXCEPTION: {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ const char *SymName = MF.createExternalSymbolName("__cpp_exception");
+ SDValue SymNode =
+ DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+ DAG.getTargetExternalSymbol(
+ SymName, PtrVT, WebAssemblyII::MO_SYMBOL_EVENT));
+ return DAG.getNode(WebAssemblyISD::THROW, DL,
+ MVT::Other, // outchain type
+ {
+ Op.getOperand(0), // inchain
+ SymNode, // exception symbol
+ Op.getOperand(3) // thrown value
+ });
+ }
+ default:
+ llvm_unreachable("Invalid tag!");
+ }
+ break;
+ }
+ }
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ // If sign extension operations are disabled, allow sext_inreg only if operand
+ // is a vector extract. SIMD does not depend on sign extension operations, but
+ // allowing sext_inreg in this context lets us have simple patterns to select
+ // extract_lane_s instructions. Expanding sext_inreg everywhere would be
+ // simpler in this file, but would necessitate large and brittle patterns to
+ // undo the expansion and select extract_lane_s instructions.
+ assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
+ if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT)
+ return Op;
+ // Otherwise expand
+ return SDValue();
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
+ MVT VecType = Op.getOperand(0).getSimpleValueType();
+ assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
+ size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
+
+ // Space for two vector args and sixteen mask indices
+ SDValue Ops[18];
+ size_t OpIdx = 0;
+ Ops[OpIdx++] = Op.getOperand(0);
+ Ops[OpIdx++] = Op.getOperand(1);
+
+ // Expand mask indices to byte indices and materialize them as operands
+ for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) {
+ for (size_t J = 0; J < LaneBytes; ++J) {
+ // Lower undefs (represented by -1 in mask) to zero
+ uint64_t ByteIndex =
+ Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J;
+ Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
+ }
+ }
+
+ return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Allow constant lane indices, expand variable lane indices
+ SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
+ if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
+ return Op;
+ else
+ // Perform default expansion
+ return SDValue();
+}
+
+static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
+ EVT LaneT = Op.getSimpleValueType().getVectorElementType();
+ // 32-bit and 64-bit unrolled shifts will have proper semantics
+ if (LaneT.bitsGE(MVT::i32))
+ return DAG.UnrollVectorOp(Op.getNode());
+ // Otherwise mask the shift value to get proper semantics from 32-bit shift
+ SDLoc DL(Op);
+ SDValue ShiftVal = Op.getOperand(1);
+ uint64_t MaskVal = LaneT.getSizeInBits() - 1;
+ SDValue MaskedShiftVal = DAG.getNode(
+ ISD::AND, // mask opcode
+ DL, ShiftVal.getValueType(), // masked value type
+ ShiftVal, // original shift value operand
+ DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
+ );
+
+ return DAG.UnrollVectorOp(
+ DAG.getNode(Op.getOpcode(), // original shift opcode
+ DL, Op.getValueType(), // original return type
+ Op.getOperand(0), // original vector operand,
+ MaskedShiftVal // new masked shift value operand
+ )
+ .getNode());
+}
+
+SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ // Only manually lower vector shifts
+ assert(Op.getSimpleValueType().isVector());
+
+ // Expand all vector shifts until V8 fixes its implementation
+ // TODO: remove this once V8 is fixed
+ if (!Subtarget->hasUnimplementedSIMD128())
+ return UnrollVectorShift(Op, DAG);
+
+ // Unroll non-splat vector shifts
+ BuildVectorSDNode *ShiftVec;
+ SDValue SplatVal;
+ if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
+ !(SplatVal = ShiftVec->getSplatValue()))
+ return UnrollVectorShift(Op, DAG);
+
+ // All splats except i64x2 const splats are handled by patterns
+ ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
+ if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
+ return Op;
+
+ // i64x2 const splats are custom lowered to avoid unnecessary wraps
+ unsigned Opcode;
+ switch (Op.getOpcode()) {
+ case ISD::SHL:
+ Opcode = WebAssemblyISD::VEC_SHL;
+ break;
+ case ISD::SRA:
+ Opcode = WebAssemblyISD::VEC_SHR_S;
+ break;
+ case ISD::SRL:
+ Opcode = WebAssemblyISD::VEC_SHR_U;
+ break;
+ default:
+ llvm_unreachable("unexpected opcode");
}
+ APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
+ return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
+ DAG.getConstant(Shift, DL, MVT::i32));
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 79819493ac6a..59f4230ed889 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -29,21 +29,22 @@ enum NodeType : unsigned {
#undef HANDLE_NODETYPE
};
-} // end namespace WebAssemblyISD
+} // end namespace WebAssemblyISD
class WebAssemblySubtarget;
class WebAssemblyTargetMachine;
class WebAssemblyTargetLowering final : public TargetLowering {
- public:
+public:
WebAssemblyTargetLowering(const TargetMachine &TM,
const WebAssemblySubtarget &STI);
- private:
+private:
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
/// right decision when generating code for different targets.
const WebAssemblySubtarget *Subtarget;
+ AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
@@ -52,9 +53,9 @@ class WebAssemblyTargetLowering final : public TargetLowering {
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
- std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, StringRef Constraint,
- MVT VT) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -66,6 +67,9 @@ class WebAssemblyTargetLowering final : public TargetLowering {
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
@@ -94,13 +98,18 @@ class WebAssemblyTargetLowering final : public TargetLowering {
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
};
namespace WebAssembly {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
-} // end namespace WebAssembly
+} // end namespace WebAssembly
-} // end namespace llvm
+} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index d879932b3232..5fb8ef90bc43 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -16,10 +16,16 @@
// Atomic loads
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
+multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> inst = -1> {
+ defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+ inst>,
+ Requires<[HasAtomics]>;
+}
+
defm ATOMIC_LOAD_I32 : WebAssemblyLoad<I32, "i32.atomic.load", 0xfe10>;
defm ATOMIC_LOAD_I64 : WebAssemblyLoad<I64, "i64.atomic.load", 0xfe11>;
-} // Defs = [ARGUMENTS]
// Select loads with no constant offset.
let Predicates = [HasAtomics] in {
@@ -54,13 +60,11 @@ def : LoadPatExternSymOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
// Extending loads. Note that there are only zero-extending atomic loads, no
// sign-extending loads.
-let Defs = [ARGUMENTS] in {
defm ATOMIC_LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load8_u", 0xfe12>;
defm ATOMIC_LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load16_u", 0xfe13>;
defm ATOMIC_LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load8_u", 0xfe14>;
defm ATOMIC_LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load16_u", 0xfe15>;
defm ATOMIC_LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load32_u", 0xfe16>;
-} // Defs = [ARGUMENTS]
// Fragments for extending loads. These are different from regular loads because
// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and
@@ -110,7 +114,7 @@ def : LoadPatNoOffset<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatNoOffset<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatNoOffset<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatNoOffset<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i32
+// 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s
// Zero-extending loads with constant offset
def : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, ATOMIC_LOAD8_U_I32>;
@@ -192,10 +196,8 @@ def : LoadPatExternSymOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
// Atomic stores
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
defm ATOMIC_STORE_I32 : WebAssemblyStore<I32, "i32.atomic.store", 0xfe17>;
defm ATOMIC_STORE_I64 : WebAssemblyStore<I64, "i64.atomic.store", 0xfe18>;
-} // Defs = [ARGUMENTS]
// We need an 'atomic' version of store patterns because store and atomic_store
// nodes have different operand orders:
@@ -255,13 +257,11 @@ def : AStorePatExternSymOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
} // Predicates = [HasAtomics]
// Truncating stores.
-let Defs = [ARGUMENTS] in {
defm ATOMIC_STORE8_I32 : WebAssemblyStore<I32, "i32.atomic.store8", 0xfe19>;
defm ATOMIC_STORE16_I32 : WebAssemblyStore<I32, "i32.atomic.store16", 0xfe1a>;
defm ATOMIC_STORE8_I64 : WebAssemblyStore<I64, "i64.atomic.store8", 0xfe1b>;
defm ATOMIC_STORE16_I64 : WebAssemblyStore<I64, "i64.atomic.store16", 0xfe1c>;
defm ATOMIC_STORE32_I64 : WebAssemblyStore<I64, "i64.atomic.store32", 0xfe1d>;
-} // Defs = [ARGUMENTS]
// Fragments for truncating stores.
@@ -333,8 +333,6 @@ def : AStorePatExternSymOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
// Atomic binary read-modify-writes
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
-
multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
defm "" : I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
@@ -346,83 +344,82 @@ multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0xfe1e>;
defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0xfe1f>;
defm ATOMIC_RMW8_U_ADD_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.add", 0xfe20>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.add_u", 0xfe20>;
defm ATOMIC_RMW16_U_ADD_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.add", 0xfe21>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.add_u", 0xfe21>;
defm ATOMIC_RMW8_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.add", 0xfe22>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.add_u", 0xfe22>;
defm ATOMIC_RMW16_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.add", 0xfe23>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.add_u", 0xfe23>;
defm ATOMIC_RMW32_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.add", 0xfe24>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.add_u", 0xfe24>;
defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0xfe25>;
defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0xfe26>;
defm ATOMIC_RMW8_U_SUB_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.sub", 0xfe27>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.sub_u", 0xfe27>;
defm ATOMIC_RMW16_U_SUB_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.sub", 0xfe28>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.sub_u", 0xfe28>;
defm ATOMIC_RMW8_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.sub", 0xfe29>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.sub_u", 0xfe29>;
defm ATOMIC_RMW16_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.sub", 0xfe2a>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.sub_u", 0xfe2a>;
defm ATOMIC_RMW32_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.sub", 0xfe2b>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.sub_u", 0xfe2b>;
defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0xfe2c>;
defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0xfe2d>;
defm ATOMIC_RMW8_U_AND_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.and", 0xfe2e>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.and_u", 0xfe2e>;
defm ATOMIC_RMW16_U_AND_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.and", 0xfe2f>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.and_u", 0xfe2f>;
defm ATOMIC_RMW8_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.and", 0xfe30>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.and_u", 0xfe30>;
defm ATOMIC_RMW16_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.and", 0xfe31>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.and_u", 0xfe31>;
defm ATOMIC_RMW32_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.and", 0xfe32>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.and_u", 0xfe32>;
defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0xfe33>;
defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0xfe34>;
defm ATOMIC_RMW8_U_OR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.or", 0xfe35>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.or_u", 0xfe35>;
defm ATOMIC_RMW16_U_OR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.or", 0xfe36>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.or_u", 0xfe36>;
defm ATOMIC_RMW8_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.or", 0xfe37>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.or_u", 0xfe37>;
defm ATOMIC_RMW16_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.or", 0xfe38>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.or_u", 0xfe38>;
defm ATOMIC_RMW32_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.or", 0xfe39>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.or_u", 0xfe39>;
defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0xfe3a>;
defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0xfe3b>;
defm ATOMIC_RMW8_U_XOR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.xor", 0xfe3c>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xor_u", 0xfe3c>;
defm ATOMIC_RMW16_U_XOR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.xor", 0xfe3d>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xor_u", 0xfe3d>;
defm ATOMIC_RMW8_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.xor", 0xfe3e>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xor_u", 0xfe3e>;
defm ATOMIC_RMW16_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.xor", 0xfe3f>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xor_u", 0xfe3f>;
defm ATOMIC_RMW32_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.xor", 0xfe40>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xor_u", 0xfe40>;
defm ATOMIC_RMW_XCHG_I32 :
WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0xfe41>;
defm ATOMIC_RMW_XCHG_I64 :
WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0xfe42>;
defm ATOMIC_RMW8_U_XCHG_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.xchg", 0xfe43>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xchg_u", 0xfe43>;
defm ATOMIC_RMW16_U_XCHG_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.xchg", 0xfe44>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xchg_u", 0xfe44>;
defm ATOMIC_RMW8_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.xchg", 0xfe45>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xchg_u", 0xfe45>;
defm ATOMIC_RMW16_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.xchg", 0xfe46>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xchg_u", 0xfe46>;
defm ATOMIC_RMW32_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.xchg", 0xfe47>;
-}
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0xfe47>;
// Select binary RMWs with no constant offset.
class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
@@ -533,7 +530,7 @@ class sext_bin_rmw_8_64<PatFrag kind> :
PatFrag<(ops node:$addr, node:$val),
(anyext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>;
class sext_bin_rmw_16_64<PatFrag kind> : sext_bin_rmw_8_64<kind>;
-// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_s/i32
+// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s
// Patterns for various addressing modes for truncating-extending binary RMWs.
multiclass BinRMWTruncExtPattern<
@@ -655,3 +652,368 @@ defm : BinRMWTruncExtPattern<
ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32,
ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>;
} // Predicates = [HasAtomics]
+
+//===----------------------------------------------------------------------===//
+// Atomic ternary read-modify-writes
+//===----------------------------------------------------------------------===//
+
+// TODO LLVM IR's cmpxchg instruction returns a pair of {loaded value, success
+// flag}. When we use the success flag or both values, we can't make use of i64
+// truncate/extend versions of instructions for now, which is suboptimal.
+// Consider adding a pass after instruction selection that optimizes this case
+// if it is frequent.
+
+multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
+ defm "" : I<(outs rc:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
+ rc:$new),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"),
+ !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+}
+
+defm ATOMIC_RMW_CMPXCHG_I32 :
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw.cmpxchg", 0xfe48>;
+defm ATOMIC_RMW_CMPXCHG_I64 :
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw.cmpxchg", 0xfe49>;
+defm ATOMIC_RMW8_U_CMPXCHG_I32 :
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw8.cmpxchg_u", 0xfe4a>;
+defm ATOMIC_RMW16_U_CMPXCHG_I32 :
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw16.cmpxchg_u", 0xfe4b>;
+defm ATOMIC_RMW8_U_CMPXCHG_I64 :
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw8.cmpxchg_u", 0xfe4c>;
+defm ATOMIC_RMW16_U_CMPXCHG_I64 :
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw16.cmpxchg_u", 0xfe4d>;
+defm ATOMIC_RMW32_U_CMPXCHG_I64 :
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0xfe4e>;
+
+// Select ternary RMWs with no constant offset.
+class TerRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)),
+ (inst 0, 0, I32:$addr, ty:$exp, ty:$new)>;
+
+// Select ternary RMWs with a constant offset.
+
+// Pattern with address + immediate offset
+class TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
+ Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)),
+ (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+class TerRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+ ty:$exp, ty:$new)),
+ (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+class TerRMWPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+ ty:$exp, ty:$new)),
+ (inst 0, texternalsym:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+// Select ternary RMWs with just a constant offset.
+class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
+ (inst 0, imm:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+class TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+ (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+class TerRMWPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$exp, ty:$new)),
+ (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+// Patterns for various addressing modes.
+multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
+ NI inst_64> {
+ def : TerRMWPatNoOffset<i32, rmw_32, inst_32>;
+ def : TerRMWPatNoOffset<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>;
+ def : TerRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>;
+ def : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
+ def : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
+
+ def : TerRMWPatGlobalAddr<i32, rmw_32, inst_32>;
+ def : TerRMWPatGlobalAddr<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatExternalSym<i32, rmw_32, inst_32>;
+ def : TerRMWPatExternalSym<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>;
+ def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatExternSymOffOnly<i32, rmw_32, inst_32>;
+ def : TerRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
+}
+
+let Predicates = [HasAtomics] in {
+defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
+ ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>;
+} // Predicates = [HasAtomics]
+
+// Truncating & zero-extending ternary RMW patterns.
+// DAG legalization & optimization before instruction selection may introduce
+// additional nodes such as anyext or assertzext depending on operand types.
+class zext_ter_rmw_8_32<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (and (i32 (kind node:$addr, node:$exp, node:$new)), 255)>;
+class zext_ter_rmw_16_32<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (and (i32 (kind node:$addr, node:$exp, node:$new)), 65535)>;
+class zext_ter_rmw_8_64<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (zext (i32 (assertzext (i32 (kind node:$addr,
+ (i32 (trunc (i64 node:$exp))),
+ (i32 (trunc (i64 node:$new))))))))>;
+class zext_ter_rmw_16_64<PatFrag kind> : zext_ter_rmw_8_64<kind>;
+class zext_ter_rmw_32_64<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (zext (i32 (kind node:$addr,
+ (i32 (trunc (i64 node:$exp))),
+ (i32 (trunc (i64 node:$new))))))>;
+
+// Truncating & sign-extending ternary RMW patterns.
+// We match subword RMWs (for 32-bit) and anyext RMWs (for 64-bit) and select a
+// zext RMW; the next instruction will be sext_inreg which is selected by
+// itself.
+class sext_ter_rmw_8_32<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (kind node:$addr, node:$exp, node:$new)>;
+class sext_ter_rmw_16_32<PatFrag kind> : sext_ter_rmw_8_32<kind>;
+class sext_ter_rmw_8_64<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (anyext (i32 (assertzext (i32
+ (kind node:$addr,
+ (i32 (trunc (i64 node:$exp))),
+ (i32 (trunc (i64 node:$new))))))))>;
+class sext_ter_rmw_16_64<PatFrag kind> : sext_ter_rmw_8_64<kind>;
+// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s
+
+// Patterns for various addressing modes for truncating-extending ternary RMWs.
+multiclass TerRMWTruncExtPattern<
+ PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
+ NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> {
+ // Truncating-extending ternary RMWs with no constant offset
+ def : TerRMWPatNoOffset<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatNoOffset<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatNoOffset<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatNoOffset<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatNoOffset<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatNoOffset<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatNoOffset<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatNoOffset<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatNoOffset<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ // Truncating-extending ternary RMWs with a constant offset
+ def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+ def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, regPlusImm, inst32_64>;
+ def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+ def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, or_is_add, inst32_64>;
+
+ def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+ def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
+ def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+ def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
+ def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+ def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+ def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+ def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+
+ def : TerRMWPatGlobalAddr<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatGlobalAddr<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatGlobalAddr<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatGlobalAddr<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ def : TerRMWPatExternalSym<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatExternalSym<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatExternalSym<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatExternalSym<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatExternalSym<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatExternalSym<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatExternalSym<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatExternalSym<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatExternalSym<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ // Truncating-extending ternary RMWs with just a constant offset
+ def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+}
+
+let Predicates = [HasAtomics] in {
+defm : TerRMWTruncExtPattern<
+ atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
+ ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
+ ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
+ ATOMIC_RMW32_U_CMPXCHG_I64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic wait / notify
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1 in {
+defm ATOMIC_NOTIFY :
+ I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
+ "atomic.notify \t${off}, ${p2align}", 0xfe00>;
+let mayLoad = 1 in {
+defm ATOMIC_WAIT_I32 :
+ I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp, I64:$timeout),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "i32.atomic.wait \t${off}, ${p2align}", 0xfe01>;
+defm ATOMIC_WAIT_I64 :
+ I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, I64:$timeout),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "i64.atomic.wait \t${off}, ${p2align}", 0xfe02>;
+} // mayLoad = 1
+} // hasSideEffects = 1
+
+let Predicates = [HasAtomics] in {
+// Select notifys with no constant offset.
+class NotifyPatNoOffset<Intrinsic kind> :
+ Pat<(i32 (kind I32:$addr, I32:$count)),
+ (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>;
+def : NotifyPatNoOffset<int_wasm_atomic_notify>;
+
+// Select notifys with a constant offset.
+
+// Pattern with address + immediate offset
+class NotifyPatImmOff<Intrinsic kind, PatFrag operand> :
+ Pat<(i32 (kind (operand I32:$addr, imm:$off), I32:$count)),
+ (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>;
+def : NotifyPatImmOff<int_wasm_atomic_notify, regPlusImm>;
+def : NotifyPatImmOff<int_wasm_atomic_notify, or_is_add>;
+
+class NotifyPatGlobalAddr<Intrinsic kind> :
+ Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+ I32:$count)),
+ (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>;
+def : NotifyPatGlobalAddr<int_wasm_atomic_notify>;
+
+class NotifyPatExternalSym<Intrinsic kind> :
+ Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+ I32:$count)),
+ (ATOMIC_NOTIFY 0, texternalsym:$off, I32:$addr, I32:$count)>;
+def : NotifyPatExternalSym<int_wasm_atomic_notify>;
+
+// Select notifys with just a constant offset.
+class NotifyPatOffsetOnly<Intrinsic kind> :
+ Pat<(i32 (kind imm:$off, I32:$count)),
+ (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatOffsetOnly<int_wasm_atomic_notify>;
+
+class NotifyPatGlobalAddrOffOnly<Intrinsic kind> :
+ Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), I32:$count)),
+ (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatGlobalAddrOffOnly<int_wasm_atomic_notify>;
+
+class NotifyPatExternSymOffOnly<Intrinsic kind> :
+ Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), I32:$count)),
+ (ATOMIC_NOTIFY 0, texternalsym:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatExternSymOffOnly<int_wasm_atomic_notify>;
+
+// Select waits with no constant offset.
+class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
+ (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select waits with a constant offset.
+
+// Pattern with address + immediate offset
+class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> :
+ Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
+ (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddr<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+ ty:$exp, I64:$timeout)),
+ (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddr<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddr<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatExternalSym<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+ ty:$exp, I64:$timeout)),
+ (inst 0, texternalsym:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatExternalSym<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatExternalSym<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
+class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
+ (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)),
+ (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatExternSymOffOnly<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), ty:$exp,
+ I64:$timeout)),
+ (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatExternSymOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatExternSymOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+} // Predicates = [HasAtomics]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 34262752430c..07839b790114 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -15,8 +15,6 @@
// TODO: addr64: These currently assume the callee address is 32-bit.
// FIXME: add $type to first call_indirect asmstr (and maybe $flags)
-let Defs = [ARGUMENTS] in {
-
// Call sequence markers. These have an immediate which represents the amount of
// stack space to allocate or free, which is used for varargs lowering.
let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in {
@@ -52,34 +50,35 @@ multiclass CALL<WebAssemblyRegClass vt, string prefix> {
}
multiclass SIMD_CALL<ValueType vt, string prefix> {
- defm CALL_#vt : SIMD_I<(outs V128:$dst), (ins function32_op:$callee,
- variable_ops),
- (outs), (ins function32_op:$callee),
- [(set (vt V128:$dst),
- (WebAssemblycall1 (i32 imm:$callee)))],
- !strconcat(prefix, "call\t$dst, $callee"),
- !strconcat(prefix, "call\t$callee"),
- 0x10>;
+
+ defm CALL_#vt : I<(outs V128:$dst), (ins function32_op:$callee, variable_ops),
+ (outs), (ins function32_op:$callee),
+ [(set (vt V128:$dst),
+ (WebAssemblycall1 (i32 imm:$callee)))],
+ !strconcat(prefix, "call\t$dst, $callee"),
+ !strconcat(prefix, "call\t$callee"),
+ 0x10>,
+ Requires<[HasSIMD128]>;
let isCodeGenOnly = 1 in {
- defm PCALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
- (ins I32:$callee, variable_ops),
- (outs), (ins I32:$callee),
- [(set (vt V128:$dst),
- (WebAssemblycall1 I32:$callee))],
- "PSEUDO CALL INDIRECT\t$callee",
- "PSEUDO CALL INDIRECT\t$callee">;
+ defm PCALL_INDIRECT_#vt : I<(outs V128:$dst),
+ (ins I32:$callee, variable_ops),
+ (outs), (ins I32:$callee),
+ [(set (vt V128:$dst),
+ (WebAssemblycall1 I32:$callee))],
+ "PSEUDO CALL INDIRECT\t$callee",
+ "PSEUDO CALL INDIRECT\t$callee">,
+ Requires<[HasSIMD128]>;
} // isCodeGenOnly = 1
- defm CALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
- (ins TypeIndex:$type, i32imm:$flags,
- variable_ops),
- (outs), (ins TypeIndex:$type, i32imm:$flags),
- [],
- !strconcat(prefix,
- "call_indirect\t$dst"),
- !strconcat(prefix, "call_indirect\t$type"),
- 0x11>;
+ defm CALL_INDIRECT_#vt : I<(outs V128:$dst),
+ (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+ (outs), (ins TypeIndex:$type, i32imm:$flags),
+ [],
+ !strconcat(prefix, "call_indirect\t$dst"),
+ !strconcat(prefix, "call_indirect\t$type"),
+ 0x11>,
+ Requires<[HasSIMD128]>;
}
let Uses = [SP32, SP64], isCall = 1 in {
@@ -88,10 +87,12 @@ let Uses = [SP32, SP64], isCall = 1 in {
defm "" : CALL<F32, "f32.">;
defm "" : CALL<F64, "f64.">;
defm "" : CALL<EXCEPT_REF, "except_ref.">;
- defm "" : SIMD_CALL<v16i8, "i8x16.">;
- defm "" : SIMD_CALL<v8i16, "i16x8.">;
- defm "" : SIMD_CALL<v4i32, "i32x4.">;
- defm "" : SIMD_CALL<v4f32, "f32x4.">;
+ defm "" : SIMD_CALL<v16i8, "v128.">;
+ defm "" : SIMD_CALL<v8i16, "v128.">;
+ defm "" : SIMD_CALL<v4i32, "v128.">;
+ defm "" : SIMD_CALL<v2i64, "v128.">;
+ defm "" : SIMD_CALL<v4f32, "v128.">;
+ defm "" : SIMD_CALL<v2f64, "v128.">;
defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
(outs), (ins function32_op:$callee),
@@ -115,8 +116,6 @@ let Uses = [SP32, SP64], isCall = 1 in {
0x11>;
} // Uses = [SP32,SP64], isCall = 1
-} // Defs = [ARGUMENTS]
-
// Patterns for matching a direct call to a global address.
def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_I32 tglobaladdr:$callee)>;
@@ -132,8 +131,12 @@ def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_v2i64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(ExceptRef
(WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_EXCEPT_REF tglobaladdr:$callee)>;
@@ -155,8 +158,12 @@ def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_v2i64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(ExceptRef
(WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_EXCEPT_REF texternalsym:$callee)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index d90244b90662..7eb6cbf4d249 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -12,8 +12,6 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
-
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
// The condition operand is a boolean value which WebAssembly represents as i32.
defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond),
@@ -30,47 +28,37 @@ defm BR : NRI<(outs), (ins bb_op:$dst),
} // isBarrier = 1
} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
-} // Defs = [ARGUMENTS]
-
def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
(BR_IF bb_op:$dst, I32:$cond)>;
def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
(BR_UNLESS bb_op:$dst, I32:$cond)>;
-let Defs = [ARGUMENTS] in {
+// A list of branch targets enclosed in {} and separated by comma.
+// Used by br_table only.
+def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; }
+let OperandNamespace = "WebAssembly" in {
+let OperandType = "OPERAND_BRLIST" in {
+def brlist : Operand<i32> {
+ let ParserMatchClass = BrListAsmOperand;
+ let PrintMethod = "printBrList";
+}
+} // OPERAND_BRLIST
+} // OperandNamespace = "WebAssembly"
// TODO: SelectionDAG's lowering insists on using a pointer as the index for
// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
// currently.
-// Set TSFlags{0} to 1 to indicate that the variable_ops are immediates.
-// Set TSFlags{1} to 1 to indicate that the immediates represent labels.
-// FIXME: this can't inherit from I<> since there is no way to inherit from a
-// multiclass and still have the let statements.
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-def BR_TABLE_I32 : NI<(outs), (ins I32:$index, variable_ops),
- [(WebAssemblybr_table I32:$index)], 0,
- "br_table \t$index", 0x0e> {
- let TSFlags{0} = 1;
- let TSFlags{1} = 1;
-}
-def BR_TABLE_I32_S : NI<(outs), (ins I32:$index),
- [], 1,
- "br_table \t$index", 0x0e> {
- let TSFlags{0} = 1;
- let TSFlags{1} = 1;
-}
-def BR_TABLE_I64 : NI<(outs), (ins I64:$index, variable_ops),
- [(WebAssemblybr_table I64:$index)], 0,
- "br_table \t$index"> {
- let TSFlags{0} = 1;
- let TSFlags{1} = 1;
-}
-def BR_TABLE_I64_S : NI<(outs), (ins I64:$index),
- [], 1,
- "br_table \t$index"> {
- let TSFlags{0} = 1;
- let TSFlags{1} = 1;
-}
+defm BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops),
+ (outs), (ins brlist:$brl),
+ [(WebAssemblybr_table I32:$index)],
+ "br_table \t$index", "br_table \t$brl",
+ 0x0e>;
+defm BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
+ (outs), (ins brlist:$brl),
+ [(WebAssemblybr_table I64:$index)],
+ "br_table \t$index", "br_table \t$brl",
+ 0x0e>;
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
// This is technically a control-flow instruction, since all it affects is the
@@ -81,13 +69,19 @@ defm NOP : NRI<(outs), (ins), [], "nop", 0x01>;
// These use/clobber VALUE_STACK to prevent them from being moved into the
// middle of an expression tree.
let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
-defm BLOCK : NRI<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>;
-defm LOOP : NRI<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>;
+defm BLOCK : NRI<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>;
+defm LOOP : NRI<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>;
+
+defm IF : I<(outs), (ins Signature:$sig, I32:$cond),
+ (outs), (ins Signature:$sig),
+ [], "if \t$sig, $cond", "if \t$sig", 0x04>;
+defm ELSE : NRI<(outs), (ins), [], "else", 0x05>;
-// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode in
-// wasm.
+// END_BLOCK, END_LOOP, END_IF and END_FUNCTION are represented with the same
+// opcode in wasm.
defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>;
defm END_LOOP : NRI<(outs), (ins), [], "end_loop", 0x0b>;
+defm END_IF : NRI<(outs), (ins), [], "end_if", 0x0b>;
let isTerminator = 1, isBarrier = 1 in
defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
@@ -103,14 +97,16 @@ multiclass RETURN<WebAssemblyRegClass vt> {
}
multiclass SIMD_RETURN<ValueType vt> {
- defm RETURN_#vt : SIMD_I<(outs), (ins V128:$val), (outs), (ins),
- [(WebAssemblyreturn (vt V128:$val))],
- "return \t$val", "return", 0x0f>;
+ defm RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
+ [(WebAssemblyreturn (vt V128:$val))],
+ "return \t$val", "return", 0x0f>,
+ Requires<[HasSIMD128]>;
// Equivalent to RETURN_#vt, for use at the end of a function when wasm
// semantics return by falling off the end of the block.
let isCodeGenOnly = 1 in
- defm FALLTHROUGH_RETURN_#vt : SIMD_I<(outs), (ins V128:$val), (outs), (ins),
- []>;
+ defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
+ []>,
+ Requires<[HasSIMD128]>;
}
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
@@ -124,7 +120,9 @@ let isReturn = 1 in {
defm "": SIMD_RETURN<v16i8>;
defm "": SIMD_RETURN<v8i16>;
defm "": SIMD_RETURN<v4i32>;
+ defm "": SIMD_RETURN<v2i64>;
defm "": SIMD_RETURN<v4f32>;
+ defm "": SIMD_RETURN<v2f64>;
defm RETURN_VOID : NRI<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>;
@@ -144,14 +142,16 @@ let Predicates = [HasExceptionHandling] in {
// Throwing an exception: throw / rethrow
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-defm THROW_I32 : I<(outs), (ins i32imm:$tag, I32:$val),
- (outs), (ins i32imm:$tag),
- [(int_wasm_throw imm:$tag, I32:$val)],
+defm THROW_I32 : I<(outs), (ins event_op:$tag, I32:$val),
+ (outs), (ins event_op:$tag),
+ [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
+ I32:$val)],
"throw \t$tag, $val", "throw \t$tag",
0x08>;
-defm THROW_I64 : I<(outs), (ins i32imm:$tag, I64:$val),
- (outs), (ins i32imm:$tag),
- [(int_wasm_throw imm:$tag, I64:$val)],
+defm THROW_I64 : I<(outs), (ins event_op:$tag, I64:$val),
+ (outs), (ins event_op:$tag),
+ [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
+ I64:$val)],
"throw \t$tag, $val", "throw \t$tag",
0x08>;
defm RETHROW : NRI<(outs), (ins bb_op:$dst), [], "rethrow \t$dst", 0x09>;
@@ -168,7 +168,7 @@ defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
// Catching an exception: catch / catch_all
-let hasCtrlDep = 1 in {
+let hasCtrlDep = 1, hasSideEffects = 1 in {
defm CATCH_I32 : I<(outs I32:$dst), (ins i32imm:$tag),
(outs), (ins i32imm:$tag),
[(set I32:$dst, (int_wasm_catch imm:$tag))],
@@ -181,14 +181,10 @@ defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x05>;
}
// Pseudo instructions: cleanupret / catchret
-// They are not return instructions in wasm, but setting 'isReturn' to true as
-// in X86 is necessary for computing EH scope membership.
let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
- isCodeGenOnly = 1, isReturn = 1 in {
+ isCodeGenOnly = 1, isEHScopeReturn = 1 in {
defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "", 0>;
defm CATCHRET : NRI<(outs), (ins bb_op:$dst, bb_op:$from),
[(catchret bb:$dst, bb:$from)], "", 0>;
}
}
-
-} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index c89c1b549816..e128656a142c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -13,19 +13,17 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
-
defm I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins),
[(set I32:$dst, (trunc I64:$src))],
- "i32.wrap/i64\t$dst, $src", "i32.wrap/i64", 0xa7>;
+ "i32.wrap_i64\t$dst, $src", "i32.wrap_i64", 0xa7>;
defm I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src), (outs), (ins),
[(set I64:$dst, (sext I32:$src))],
- "i64.extend_s/i32\t$dst, $src", "i64.extend_s/i32",
+ "i64.extend_i32_s\t$dst, $src", "i64.extend_i32_s",
0xac>;
defm I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src), (outs), (ins),
[(set I64:$dst, (zext I32:$src))],
- "i64.extend_u/i32\t$dst, $src", "i64.extend_u/i32",
+ "i64.extend_i32_u\t$dst, $src", "i64.extend_i32_u",
0xad>;
let Predicates = [HasSignExt] in {
@@ -51,58 +49,72 @@ defm I64_EXTEND32_S_I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
0xc4>;
} // Predicates = [HasSignExt]
-} // defs = [ARGUMENTS]
-
// Expand a "don't care" extend into zero-extend (chosen over sign-extend
// somewhat arbitrarily, although it favors popular hardware architectures
// and is conceptually a simpler operation).
def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>;
-let Defs = [ARGUMENTS] in {
-
// Conversion from floating point to integer instructions which don't trap on
// overflow or invalid.
defm I32_TRUNC_S_SAT_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
[(set I32:$dst, (fp_to_sint F32:$src))],
- "i32.trunc_s:sat/f32\t$dst, $src",
- "i32.trunc_s:sat/f32", 0xfc00>,
+ "i32.trunc_sat_f32_s\t$dst, $src",
+ "i32.trunc_sat_f32_s", 0xfc00>,
Requires<[HasNontrappingFPToInt]>;
defm I32_TRUNC_U_SAT_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
[(set I32:$dst, (fp_to_uint F32:$src))],
- "i32.trunc_u:sat/f32\t$dst, $src",
- "i32.trunc_u:sat/f32", 0xfc01>,
+ "i32.trunc_sat_f32_u\t$dst, $src",
+ "i32.trunc_sat_f32_u", 0xfc01>,
Requires<[HasNontrappingFPToInt]>;
defm I64_TRUNC_S_SAT_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
[(set I64:$dst, (fp_to_sint F32:$src))],
- "i64.trunc_s:sat/f32\t$dst, $src",
- "i64.trunc_s:sat/f32", 0xfc04>,
+ "i64.trunc_sat_f32_s\t$dst, $src",
+ "i64.trunc_sat_f32_s", 0xfc04>,
Requires<[HasNontrappingFPToInt]>;
defm I64_TRUNC_U_SAT_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
[(set I64:$dst, (fp_to_uint F32:$src))],
- "i64.trunc_u:sat/f32\t$dst, $src",
- "i64.trunc_u:sat/f32", 0xfc05>,
+ "i64.trunc_sat_f32_u\t$dst, $src",
+ "i64.trunc_sat_f32_u", 0xfc05>,
Requires<[HasNontrappingFPToInt]>;
defm I32_TRUNC_S_SAT_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
[(set I32:$dst, (fp_to_sint F64:$src))],
- "i32.trunc_s:sat/f64\t$dst, $src",
- "i32.trunc_s:sat/f64", 0xfc02>,
+ "i32.trunc_sat_f64_s\t$dst, $src",
+ "i32.trunc_sat_f64_s", 0xfc02>,
Requires<[HasNontrappingFPToInt]>;
defm I32_TRUNC_U_SAT_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
[(set I32:$dst, (fp_to_uint F64:$src))],
- "i32.trunc_u:sat/f64\t$dst, $src",
- "i32.trunc_u:sat/f64", 0xfc03>,
+ "i32.trunc_sat_f64_u\t$dst, $src",
+ "i32.trunc_sat_f64_u", 0xfc03>,
Requires<[HasNontrappingFPToInt]>;
defm I64_TRUNC_S_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
[(set I64:$dst, (fp_to_sint F64:$src))],
- "i64.trunc_s:sat/f64\t$dst, $src",
- "i64.trunc_s:sat/f64", 0xfc06>,
+ "i64.trunc_sat_f64_s\t$dst, $src",
+ "i64.trunc_sat_f64_s", 0xfc06>,
Requires<[HasNontrappingFPToInt]>;
defm I64_TRUNC_U_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
[(set I64:$dst, (fp_to_uint F64:$src))],
- "i64.trunc_u:sat/f64\t$dst, $src",
- "i64.trunc_u:sat/f64", 0xfc07>,
+ "i64.trunc_sat_f64_u\t$dst, $src",
+ "i64.trunc_sat_f64_u", 0xfc07>,
Requires<[HasNontrappingFPToInt]>;
+// Lower llvm.wasm.trunc.saturate.* to saturating instructions
+def : Pat<(int_wasm_trunc_saturate_signed F32:$src),
+ (I32_TRUNC_S_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src),
+ (I32_TRUNC_U_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F64:$src),
+ (I32_TRUNC_S_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src),
+ (I32_TRUNC_U_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F32:$src),
+ (I64_TRUNC_S_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src),
+ (I64_TRUNC_U_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F64:$src),
+ (I64_TRUNC_S_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src),
+ (I64_TRUNC_U_SAT_F64 F64:$src)>;
+
// Conversion from floating point to integer pseudo-instructions which don't
// trap on overflow or invalid.
let usesCustomInserter = 1, isCodeGenOnly = 1 in {
@@ -135,88 +147,86 @@ defm FP_TO_UINT_I64_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
// Conversion from floating point to integer traps on overflow and invalid.
let hasSideEffects = 1 in {
defm I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
- [], "i32.trunc_s/f32\t$dst, $src", "i32.trunc_s/f32",
+ [], "i32.trunc_f32_s\t$dst, $src", "i32.trunc_f32_s",
0xa8>;
defm I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
- [], "i32.trunc_u/f32\t$dst, $src", "i32.trunc_u/f32",
+ [], "i32.trunc_f32_u\t$dst, $src", "i32.trunc_f32_u",
0xa9>;
defm I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
- [], "i64.trunc_s/f32\t$dst, $src", "i64.trunc_s/f32",
+ [], "i64.trunc_f32_s\t$dst, $src", "i64.trunc_f32_s",
0xae>;
defm I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
- [], "i64.trunc_u/f32\t$dst, $src", "i64.trunc_u/f32",
+ [], "i64.trunc_f32_u\t$dst, $src", "i64.trunc_f32_u",
0xaf>;
defm I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
- [], "i32.trunc_s/f64\t$dst, $src", "i32.trunc_s/f64",
+ [], "i32.trunc_f64_s\t$dst, $src", "i32.trunc_f64_s",
0xaa>;
defm I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
- [], "i32.trunc_u/f64\t$dst, $src", "i32.trunc_u/f64",
+ [], "i32.trunc_f64_u\t$dst, $src", "i32.trunc_f64_u",
0xab>;
defm I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
- [], "i64.trunc_s/f64\t$dst, $src", "i64.trunc_s/f64",
+ [], "i64.trunc_f64_s\t$dst, $src", "i64.trunc_f64_s",
0xb0>;
defm I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
- [], "i64.trunc_u/f64\t$dst, $src", "i64.trunc_u/f64",
+ [], "i64.trunc_f64_u\t$dst, $src", "i64.trunc_f64_u",
0xb1>;
} // hasSideEffects = 1
defm F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
[(set F32:$dst, (sint_to_fp I32:$src))],
- "f32.convert_s/i32\t$dst, $src", "f32.convert_s/i32",
+ "f32.convert_i32_s\t$dst, $src", "f32.convert_i32_s",
0xb2>;
defm F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
[(set F32:$dst, (uint_to_fp I32:$src))],
- "f32.convert_u/i32\t$dst, $src", "f32.convert_u/i32",
+ "f32.convert_i32_u\t$dst, $src", "f32.convert_i32_u",
0xb3>;
defm F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src), (outs), (ins),
[(set F64:$dst, (sint_to_fp I32:$src))],
- "f64.convert_s/i32\t$dst, $src", "f64.convert_s/i32",
+ "f64.convert_i32_s\t$dst, $src", "f64.convert_i32_s",
0xb7>;
defm F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src), (outs), (ins),
[(set F64:$dst, (uint_to_fp I32:$src))],
- "f64.convert_u/i32\t$dst, $src", "f64.convert_u/i32",
+ "f64.convert_i32_u\t$dst, $src", "f64.convert_i32_u",
0xb8>;
defm F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src), (outs), (ins),
[(set F32:$dst, (sint_to_fp I64:$src))],
- "f32.convert_s/i64\t$dst, $src", "f32.convert_s/i64",
+ "f32.convert_i64_s\t$dst, $src", "f32.convert_i64_s",
0xb4>;
defm F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src), (outs), (ins),
[(set F32:$dst, (uint_to_fp I64:$src))],
- "f32.convert_u/i64\t$dst, $src", "f32.convert_u/i64",
+ "f32.convert_i64_u\t$dst, $src", "f32.convert_i64_u",
0xb5>;
defm F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
[(set F64:$dst, (sint_to_fp I64:$src))],
- "f64.convert_s/i64\t$dst, $src", "f64.convert_s/i64",
+ "f64.convert_i64_s\t$dst, $src", "f64.convert_i64_s",
0xb9>;
defm F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
[(set F64:$dst, (uint_to_fp I64:$src))],
- "f64.convert_u/i64\t$dst, $src", "f64.convert_u/i64",
+ "f64.convert_i64_u\t$dst, $src", "f64.convert_i64_u",
0xba>;
defm F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src), (outs), (ins),
[(set F64:$dst, (fpextend F32:$src))],
- "f64.promote/f32\t$dst, $src", "f64.promote/f32",
+ "f64.promote_f32\t$dst, $src", "f64.promote_f32",
0xbb>;
defm F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src), (outs), (ins),
[(set F32:$dst, (fpround F64:$src))],
- "f32.demote/f64\t$dst, $src", "f32.demote/f64",
+ "f32.demote_f64\t$dst, $src", "f32.demote_f64",
0xb6>;
defm I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
[(set I32:$dst, (bitconvert F32:$src))],
- "i32.reinterpret/f32\t$dst, $src",
- "i32.reinterpret/f32", 0xbc>;
+ "i32.reinterpret_f32\t$dst, $src",
+ "i32.reinterpret_f32", 0xbc>;
defm F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
[(set F32:$dst, (bitconvert I32:$src))],
- "f32.reinterpret/i32\t$dst, $src",
- "f32.reinterpret/i32", 0xbe>;
+ "f32.reinterpret_i32\t$dst, $src",
+ "f32.reinterpret_i32", 0xbe>;
defm I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
[(set I64:$dst, (bitconvert F64:$src))],
- "i64.reinterpret/f64\t$dst, $src",
- "i64.reinterpret/f64", 0xbd>;
+ "i64.reinterpret_f64\t$dst, $src",
+ "i64.reinterpret_f64", 0xbd>;
defm F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
[(set F64:$dst, (bitconvert I64:$src))],
- "f64.reinterpret/i64\t$dst, $src",
- "f64.reinterpret/i64", 0xbf>;
-
-} // Defs = [ARGUMENTS]
+ "f64.reinterpret_i64\t$dst, $src",
+ "f64.reinterpret_i64", 0xbf>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
index 41b39f69e51c..a251d60b89ee 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
@@ -12,8 +12,6 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
-
defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
(ins EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond),
(outs), (ins),
@@ -23,8 +21,6 @@ defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
"except_ref.select\t$dst, $lhs, $rhs, $cond",
"except_ref.select", 0x1b>;
-} // Defs = [ARGUMENTS]
-
def : Pat<(select (i32 (setne I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
(SELECT_EXCEPT_REF EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond)>;
def : Pat<(select (i32 (seteq I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 8db75d38942b..c5290f00b431 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -12,7 +12,38 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
+multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst,
+ bits<32> f64Inst> {
+ defm _F32 : I<(outs F32:$dst), (ins F32:$src), (outs), (ins),
+ [(set F32:$dst, (node F32:$src))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $src")),
+ !strconcat("f32.", name), f32Inst>;
+ defm _F64 : I<(outs F64:$dst), (ins F64:$src), (outs), (ins),
+ [(set F64:$dst, (node F64:$src))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $src")),
+ !strconcat("f64.", name), f64Inst>;
+}
+multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst,
+ bits<32> f64Inst> {
+ defm _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
+ [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("f32.", name), f32Inst>;
+ defm _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
+ [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("f64.", name), f64Inst>;
+}
+multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> {
+ defm _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
+ [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("f32.", name), f32Inst>;
+ defm _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
+ [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("f64.", name), f64Inst>;
+}
let isCommutable = 1 in
defm ADD : BinaryFP<fadd, "add ", 0x92, 0xa0>;
@@ -27,8 +58,8 @@ defm NEG : UnaryFP<fneg, "neg ", 0x8c, 0x9a>;
defm COPYSIGN : BinaryFP<fcopysign, "copysign", 0x98, 0xa6>;
let isCommutable = 1 in {
-defm MIN : BinaryFP<fminnan, "min ", 0x96, 0xa4>;
-defm MAX : BinaryFP<fmaxnan, "max ", 0x97, 0xa5>;
+defm MIN : BinaryFP<fminimum, "min ", 0x96, 0xa4>;
+defm MAX : BinaryFP<fmaximum, "max ", 0x97, 0xa5>;
} // isCommutable = 1
defm CEIL : UnaryFP<fceil, "ceil", 0x8d, 0x9b>;
@@ -36,8 +67,6 @@ defm FLOOR : UnaryFP<ffloor, "floor", 0x8e, 0x9c>;
defm TRUNC : UnaryFP<ftrunc, "trunc", 0x8f, 0x9d>;
defm NEAREST : UnaryFP<fnearbyint, "nearest", 0x90, 0x9e>;
-} // Defs = [ARGUMENTS]
-
// DAGCombine oddly folds casts into the rhs of copysign. Unfold them.
def : Pat<(fcopysign F64:$lhs, F32:$rhs),
(COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>;
@@ -48,8 +77,6 @@ def : Pat<(fcopysign F32:$lhs, F64:$rhs),
def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>;
def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
-let Defs = [ARGUMENTS] in {
-
let isCommutable = 1 in {
defm EQ : ComparisonFP<SETOEQ, "eq ", 0x5b, 0x61>;
defm NE : ComparisonFP<SETUNE, "ne ", 0x5c, 0x62>;
@@ -59,8 +86,6 @@ defm LE : ComparisonFP<SETOLE, "le ", 0x5f, 0x65>;
defm GT : ComparisonFP<SETOGT, "gt ", 0x5e, 0x64>;
defm GE : ComparisonFP<SETOGE, "ge ", 0x60, 0x66>;
-} // Defs = [ARGUMENTS]
-
// Don't care floating-point comparisons, supported via other comparisons.
def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>;
def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>;
@@ -75,8 +100,6 @@ def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>;
def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>;
def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>;
-let Defs = [ARGUMENTS] in {
-
defm SELECT_F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs, I32:$cond),
(outs), (ins),
[(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))],
@@ -86,8 +109,6 @@ defm SELECT_F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs, I32:$cond),
[(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))],
"f64.select\t$dst, $lhs, $rhs, $cond", "f64.select", 0x1b>;
-} // Defs = [ARGUMENTS]
-
// ISD::SELECT requires its operand to conform to getBooleanContents, but
// WebAssembly's select interprets any non-zero value as true, so we can fold
// a setne with 0 into a select.
@@ -101,3 +122,10 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs),
(SELECT_F32 F32:$rhs, F32:$lhs, I32:$cond)>;
def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs),
(SELECT_F64 F64:$rhs, F64:$lhs, I32:$cond)>;
+
+// The legalizer inserts an unnecessary `and 1` to make input conform
+// to getBooleanContents, which we can lower away.
+def : Pat<(select (i32 (and I32:$cond, 1)), F32:$lhs, F32:$rhs),
+ (SELECT_F32 F32:$lhs, F32:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (and I32:$cond, 1)), F64:$lhs, F64:$rhs),
+ (SELECT_F64 F64:$lhs, F64:$rhs, I32:$cond)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 403152c80660..15a9714a55a1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -15,21 +15,24 @@
// WebAssembly Instruction Format.
// We instantiate 2 of these for every actual instruction (register based
// and stack based), see below.
-class WebAssemblyInst<bits<32> inst, string asmstr, bit stack> : Instruction {
- field bits<32> Inst = inst; // Instruction encoding.
- field bit StackBased = stack;
+class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel,
+ Instruction {
+ bits<32> Inst = inst; // Instruction encoding.
+ string StackBased = stack;
+ string BaseName = NAME;
let Namespace = "WebAssembly";
let Pattern = [];
let AsmString = asmstr;
}
// Normal instructions. Default instantiation of a WebAssemblyInst.
-class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
- bits<32> inst = -1>
+class NI<dag oops, dag iops, list<dag> pattern, string stack,
+ string asmstr = "", bits<32> inst = -1>
: WebAssemblyInst<inst, asmstr, stack> {
dag OutOperandList = oops;
dag InOperandList = iops;
let Pattern = pattern;
+ let Defs = [ARGUMENTS];
}
// Generates both register and stack based versions of one actual instruction.
@@ -37,10 +40,10 @@ class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
// based version of this instruction, as well as the corresponding asmstr.
// The register versions have virtual-register operands which correspond to wasm
// locals or stack locations. Each use and def of the register corresponds to an
-// implicit get_local / set_local or access of stack operands in wasm. These
+// implicit local.get / local.set or access of stack operands in wasm. These
// instructions are used for ISel and all MI passes. The stack versions of the
// instructions do not have register operands (they implicitly operate on the
-// stack), and get_locals and set_locals are explicit. The register instructions
+// stack), and local.gets and local.sets are explicit. The register instructions
// are converted to their corresponding stack instructions before lowering to
// MC.
// Every instruction should want to be based on this multi-class to guarantee
@@ -48,8 +51,10 @@ class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
multiclass I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r = "", string asmstr_s = "",
bits<32> inst = -1> {
- def "" : NI<oops_r, iops_r, pattern_r, 0, asmstr_r, inst>;
- def _S : NI<oops_s, iops_s, [], 1, asmstr_s, inst>;
+ let isCodeGenOnly = 1 in
+ def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst>;
+ let BaseName = NAME in
+ def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst>;
}
// For instructions that have no register ops, so both sets are the same.
@@ -57,111 +62,3 @@ multiclass NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "",
bits<32> inst = -1> {
defm "": I<oops, iops, oops, iops, pattern, asmstr, asmstr, inst>;
}
-
-multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
- list<dag> pattern_r, string asmstr_r = "",
- string asmstr_s = "", bits<32> inst = -1> {
- defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
- inst>,
- Requires<[HasSIMD128]>;
-}
-
-multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
- list<dag> pattern_r, string asmstr_r = "",
- string asmstr_s = "", bits<32> inst = -1> {
- defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
- inst>,
- Requires<[HasAtomics]>;
-}
-
-// Unary and binary instructions, for the local types that WebAssembly supports.
-multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst,
- bits<32> i64Inst> {
- defm _I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins),
- [(set I32:$dst, (node I32:$src))],
- !strconcat("i32.", !strconcat(name, "\t$dst, $src")),
- !strconcat("i32.", name), i32Inst>;
- defm _I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
- [(set I64:$dst, (node I64:$src))],
- !strconcat("i64.", !strconcat(name, "\t$dst, $src")),
- !strconcat("i64.", name), i64Inst>;
-}
-multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst,
- bits<32> i64Inst> {
- defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
- [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
- !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i32.", name), i32Inst>;
- defm _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
- [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
- !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i64.", name), i64Inst>;
-}
-multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst,
- bits<32> f64Inst> {
- defm _F32 : I<(outs F32:$dst), (ins F32:$src), (outs), (ins),
- [(set F32:$dst, (node F32:$src))],
- !strconcat("f32.", !strconcat(name, "\t$dst, $src")),
- !strconcat("f32.", name), f32Inst>;
- defm _F64 : I<(outs F64:$dst), (ins F64:$src), (outs), (ins),
- [(set F64:$dst, (node F64:$src))],
- !strconcat("f64.", !strconcat(name, "\t$dst, $src")),
- !strconcat("f64.", name), f64Inst>;
-}
-multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst,
- bits<32> f64Inst> {
- defm _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
- [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
- !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f32.", name), f32Inst>;
- defm _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
- [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
- !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f64.", name), f64Inst>;
-}
-multiclass SIMDBinary<SDNode node, SDNode fnode, string name> {
- defm _I8x16 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (v16i8 V128:$dst), (node V128:$lhs, V128:$rhs))],
- !strconcat("i8x16.",
- !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i8x16.", name)>;
- defm _I16x8 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (v8i16 V128:$dst), (node V128:$lhs, V128:$rhs))],
- !strconcat("i16x8.",
- !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i16x8.", name)>;
- defm _I32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (v4i32 V128:$dst), (node V128:$lhs, V128:$rhs))],
- !strconcat("i32x4.",
- !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i32x4.", name)>;
- defm _F32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (v4f32 V128:$dst), (fnode V128:$lhs, V128:$rhs))],
- !strconcat("f32x4.",
- !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f32x4.", name)>;
-}
-multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> {
- defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
- [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
- !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i32.", name), i32Inst>;
- defm _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
- [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
- !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i64.", name), i64Inst>;
-}
-multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> {
- defm _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
- [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
- !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f32.", name), f32Inst>;
- defm _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
- [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
- !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f64.", name), f64Inst>;
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index cd49bd1682ad..5efff32d6167 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -70,6 +70,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
CopyOpcode = WebAssembly::COPY_F32;
else if (RC == &WebAssembly::F64RegClass)
CopyOpcode = WebAssembly::COPY_F64;
+ else if (RC == &WebAssembly::V128RegClass)
+ CopyOpcode = WebAssembly::COPY_V128;
else
llvm_unreachable("Unexpected register class");
@@ -77,10 +79,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, KillSrc ? RegState::Kill : 0);
}
-MachineInstr *
-WebAssemblyInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
- unsigned OpIdx1,
- unsigned OpIdx2) const {
+MachineInstr *WebAssemblyInstrInfo::commuteInstructionImpl(
+ MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const {
// If the operands are stackified, we can't reorder them.
WebAssemblyFunctionInfo &MFI =
*MI.getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>();
@@ -165,12 +165,9 @@ unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB,
return Count;
}
-unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL,
- int *BytesAdded) const {
+unsigned WebAssemblyInstrInfo::insertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
assert(!BytesAdded && "code size not handled");
if (Cond.empty()) {
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index aeb282a7febb..e3d795f2aab1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -20,6 +20,9 @@ def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">;
def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
AssemblerPredicate<"FeatureSIMD128", "simd128">;
+def HasUnimplementedSIMD128 :
+ Predicate<"Subtarget->hasUnimplementedSIMD128()">,
+ AssemblerPredicate<"FeatureUnimplementedSIMD128", "unimplemented-simd128">;
def HasAtomics : Predicate<"Subtarget->hasAtomics()">,
AssemblerPredicate<"FeatureAtomics", "atomics">;
def HasNontrappingFPToInt :
@@ -64,6 +67,7 @@ def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyThrow : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Nodes.
@@ -90,6 +94,8 @@ def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN",
SDT_WebAssemblyReturn, [SDNPHasChain]>;
def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
SDT_WebAssemblyWrapper>;
+def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow,
+ [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// WebAssembly-specific Operands.
@@ -118,6 +124,18 @@ def f32imm_op : Operand<f32>;
let OperandType = "OPERAND_F64IMM" in
def f64imm_op : Operand<f64>;
+let OperandType = "OPERAND_VEC_I8IMM" in
+def vec_i8imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I16IMM" in
+def vec_i16imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I32IMM" in
+def vec_i32imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I64IMM" in
+def vec_i64imm_op : Operand<i64>;
+
let OperandType = "OPERAND_FUNCTION32" in
def function32_op : Operand<i32>;
@@ -128,6 +146,10 @@ let OperandType = "OPERAND_P2ALIGN" in {
def P2Align : Operand<i32> {
let PrintMethod = "printWebAssemblyP2AlignOperand";
}
+
+let OperandType = "OPERAND_EVENT" in
+def event_op : Operand<i32>;
+
} // OperandType = "OPERAND_P2ALIGN"
let OperandType = "OPERAND_SIGNATURE" in {
@@ -142,6 +164,19 @@ def TypeIndex : Operand<i32>;
} // OperandNamespace = "WebAssembly"
//===----------------------------------------------------------------------===//
+// WebAssembly Register to Stack instruction mapping
+//===----------------------------------------------------------------------===//
+
+class StackRel;
+def getStackOpcode : InstrMapping {
+ let FilterClass = "StackRel";
+ let RowFields = ["BaseName"];
+ let ColFields = ["StackBased"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format Definitions.
//===----------------------------------------------------------------------===//
@@ -151,74 +186,62 @@ include "WebAssemblyInstrFormats.td"
// Additional instructions.
//===----------------------------------------------------------------------===//
-multiclass ARGUMENT<WebAssemblyRegClass vt> {
- let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
- defm ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno),
- (outs), (ins i32imm:$argno),
- [(set vt:$res, (WebAssemblyargument timm:$argno))]>;
+multiclass ARGUMENT<WebAssemblyRegClass reg, ValueType vt> {
+ let hasSideEffects = 1, isCodeGenOnly = 1,
+ Defs = []<Register>, Uses = [ARGUMENTS] in
+ defm ARGUMENT_#vt :
+ I<(outs reg:$res), (ins i32imm:$argno), (outs), (ins i32imm:$argno),
+ [(set (vt reg:$res), (WebAssemblyargument timm:$argno))]>;
}
-multiclass SIMD_ARGUMENT<ValueType vt> {
- let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
- defm ARGUMENT_#vt : SIMD_I<(outs V128:$res), (ins i32imm:$argno),
- (outs), (ins i32imm:$argno),
- [(set (vt V128:$res),
- (WebAssemblyargument timm:$argno))]>;
-}
-defm "": ARGUMENT<I32>;
-defm "": ARGUMENT<I64>;
-defm "": ARGUMENT<F32>;
-defm "": ARGUMENT<F64>;
-defm "": ARGUMENT<EXCEPT_REF>;
-defm "": SIMD_ARGUMENT<v16i8>;
-defm "": SIMD_ARGUMENT<v8i16>;
-defm "": SIMD_ARGUMENT<v4i32>;
-defm "": SIMD_ARGUMENT<v4f32>;
-
-let Defs = [ARGUMENTS] in {
-
-// get_local and set_local are not generated by instruction selection; they
+defm "": ARGUMENT<I32, i32>;
+defm "": ARGUMENT<I64, i64>;
+defm "": ARGUMENT<F32, f32>;
+defm "": ARGUMENT<F64, f64>;
+defm "": ARGUMENT<EXCEPT_REF, ExceptRef>;
+
+// local.get and local.set are not generated by instruction selection; they
// are implied by virtual register uses and defs.
multiclass LOCAL<WebAssemblyRegClass vt> {
let hasSideEffects = 0 in {
- // COPY is not an actual instruction in wasm, but since we allow get_local and
- // set_local to be implicit during most of codegen, we can have a COPY which
- // is actually a no-op because all the work is done in the implied get_local
- // and set_local. COPYs are eliminated (and replaced with
- // get_local/set_local) in the ExplicitLocals pass.
+ // COPY is not an actual instruction in wasm, but since we allow local.get and
+ // local.set to be implicit during most of codegen, we can have a COPY which
+ // is actually a no-op because all the work is done in the implied local.get
+ // and local.set. COPYs are eliminated (and replaced with
+ // local.get/local.set) in the ExplicitLocals pass.
let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
defm COPY_#vt : I<(outs vt:$res), (ins vt:$src), (outs), (ins), [],
- "copy_local\t$res, $src", "copy_local">;
+ "local.copy\t$res, $src", "local.copy">;
// TEE is similar to COPY, but writes two copies of its result. Typically
// this would be used to stackify one result and write the other result to a
// local.
let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
defm TEE_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), (outs), (ins), [],
- "tee_local\t$res, $also, $src", "tee_local">;
+ "local.tee\t$res, $also, $src", "local.tee">;
- // This is the actual get_local instruction in wasm. These are made explicit
+ // This is the actual local.get instruction in wasm. These are made explicit
// by the ExplicitLocals pass. It has mayLoad because it reads from a wasm
// local, which is a side effect not otherwise modeled in LLVM.
let mayLoad = 1, isAsCheapAsAMove = 1 in
- defm GET_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local),
+ defm LOCAL_GET_#vt : I<(outs vt:$res), (ins local_op:$local),
(outs), (ins local_op:$local), [],
- "get_local\t$res, $local", "get_local\t$local", 0x20>;
+ "local.get\t$res, $local", "local.get\t$local", 0x20>;
- // This is the actual set_local instruction in wasm. These are made explicit
+ // This is the actual local.set instruction in wasm. These are made explicit
// by the ExplicitLocals pass. It has mayStore because it writes to a wasm
// local, which is a side effect not otherwise modeled in LLVM.
let mayStore = 1, isAsCheapAsAMove = 1 in
- defm SET_LOCAL_#vt : I<(outs), (ins local_op:$local, vt:$src),
+ defm LOCAL_SET_#vt : I<(outs), (ins local_op:$local, vt:$src),
(outs), (ins local_op:$local), [],
- "set_local\t$local, $src", "set_local\t$local", 0x21>;
+ "local.set\t$local, $src", "local.set\t$local", 0x21>;
- // This is the actual tee_local instruction in wasm. TEEs are turned into
- // TEE_LOCALs by the ExplicitLocals pass. It has mayStore for the same reason
- // as SET_LOCAL.
+ // This is the actual local.tee instruction in wasm. TEEs are turned into
+ // LOCAL_TEEs by the ExplicitLocals pass. It has mayStore for the same reason
+ // as LOCAL_SET.
let mayStore = 1, isAsCheapAsAMove = 1 in
- defm TEE_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src),
+ defm LOCAL_TEE_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src),
(outs), (ins local_op:$local), [],
- "tee_local\t$res, $local, $src", "tee_local\t$local",
+ "local.tee\t$res, $local, $src", "local.tee\t$local",
0x22>;
// Unused values must be dropped in some contexts.
@@ -226,15 +249,15 @@ let hasSideEffects = 0 in {
"drop\t$src", "drop", 0x1a>;
let mayLoad = 1 in
- defm GET_GLOBAL_#vt : I<(outs vt:$res), (ins global_op:$local),
+ defm GLOBAL_GET_#vt : I<(outs vt:$res), (ins global_op:$local),
(outs), (ins global_op:$local), [],
- "get_global\t$res, $local", "get_global\t$local",
+ "global.get\t$res, $local", "global.get\t$local",
0x23>;
let mayStore = 1 in
- defm SET_GLOBAL_#vt : I<(outs), (ins global_op:$local, vt:$src),
+ defm GLOBAL_SET_#vt : I<(outs), (ins global_op:$local, vt:$src),
(outs), (ins global_op:$local), [],
- "set_global\t$local, $src", "set_global\t$local",
+ "global.set\t$local, $src", "global.set\t$local",
0x24>;
} // hasSideEffects = 0
@@ -265,12 +288,12 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
"f64.const\t$res, $imm", "f64.const\t$imm", 0x44>;
} // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
-} // Defs = [ARGUMENTS]
-
def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
(CONST_I32 tglobaladdr:$addr)>;
def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
(CONST_I32 texternalsym:$addr)>;
+def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
+def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
//===----------------------------------------------------------------------===//
// Additional sets of instructions.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index f9f21fd1d754..d5b63d643697 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -12,7 +12,38 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
+multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst,
+ bits<32> i64Inst> {
+ defm _I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins),
+ [(set I32:$dst, (node I32:$src))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $src")),
+ !strconcat("i32.", name), i32Inst>;
+ defm _I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
+ [(set I64:$dst, (node I64:$src))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $src")),
+ !strconcat("i64.", name), i64Inst>;
+}
+multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst,
+ bits<32> i64Inst> {
+ defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
+ [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("i32.", name), i32Inst>;
+ defm _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
+ [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("i64.", name), i64Inst>;
+}
+multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> {
+ defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
+ [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("i32.", name), i32Inst>;
+ defm _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
+ [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("i64.", name), i64Inst>;
+}
// The spaces after the names are for aesthetic purposes only, to make
// operands line up vertically after tab expansion.
@@ -63,16 +94,12 @@ defm EQZ_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins),
[(set I32:$dst, (setcc I64:$src, 0, SETEQ))],
"i64.eqz \t$dst, $src", "i64.eqz", 0x50>;
-} // Defs = [ARGUMENTS]
-
// Optimize away an explicit mask on a rotate count.
def : Pat<(rotl I32:$lhs, (and I32:$rhs, 31)), (ROTL_I32 I32:$lhs, I32:$rhs)>;
def : Pat<(rotr I32:$lhs, (and I32:$rhs, 31)), (ROTR_I32 I32:$lhs, I32:$rhs)>;
def : Pat<(rotl I64:$lhs, (and I64:$rhs, 63)), (ROTL_I64 I64:$lhs, I64:$rhs)>;
def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>;
-let Defs = [ARGUMENTS] in {
-
defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond),
(outs), (ins),
[(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
@@ -82,8 +109,6 @@ defm SELECT_I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs, I32:$cond),
[(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))],
"i64.select\t$dst, $lhs, $rhs, $cond", "i64.select", 0x1b>;
-} // Defs = [ARGUMENTS]
-
// ISD::SELECT requires its operand to conform to getBooleanContents, but
// WebAssembly's select interprets any non-zero value as true, so we can fold
// a setne with 0 into a select.
@@ -97,3 +122,10 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
(SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>;
def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
(SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>;
+
+// The legalizer inserts an unnecessary `and 1` to make input conform
+// to getBooleanContents, which we can lower away.
+def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs),
+ (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs),
+ (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 8a49325af2bd..518f81c61dc4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -33,10 +33,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- KnownBits Known0;
- CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);
- KnownBits Known1;
- CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);
+ KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
+ KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
return (~Known0.Zero & ~Known1.Zero) == 0;
}]>;
@@ -53,15 +51,14 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
// We don't need a regPlusES because external symbols never have constant
// offsets folded into them, so we can just use add.
-let Defs = [ARGUMENTS] in {
-
// Defines atomic and non-atomic loads, regular and extending.
multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
+ let mayLoad = 1 in
defm "": I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off),
[], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"),
- !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+ !strconcat(Name, "\t${off}${p2align}"), Opcode>;
}
// Basic load.
@@ -72,8 +69,6 @@ defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29>;
defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a>;
defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b>;
-} // Defs = [ARGUMENTS]
-
// Select loads with no constant offset.
class LoadPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind I32:$addr)), (inst 0, 0, I32:$addr)>;
@@ -143,8 +138,6 @@ def : LoadPatExternSymOffOnly<i64, load, LOAD_I64>;
def : LoadPatExternSymOffOnly<f32, load, LOAD_F32>;
def : LoadPatExternSymOffOnly<f64, load, LOAD_F64>;
-let Defs = [ARGUMENTS] in {
-
// Extending load.
defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>;
defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>;
@@ -157,8 +150,6 @@ defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33>;
defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34>;
defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35>;
-} // Defs = [ARGUMENTS]
-
// Select extending loads with no constant offset.
def : LoadPatNoOffset<i32, sextloadi8, LOAD8_S_I32>;
def : LoadPatNoOffset<i32, zextloadi8, LOAD8_U_I32>;
@@ -302,17 +293,15 @@ def : LoadPatExternSymOffOnly<i64, extloadi8, LOAD8_U_I64>;
def : LoadPatExternSymOffOnly<i64, extloadi16, LOAD16_U_I64>;
def : LoadPatExternSymOffOnly<i64, extloadi32, LOAD32_U_I64>;
-
-let Defs = [ARGUMENTS] in {
-
// Defines atomic and non-atomic stores, regular and truncating
multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
+ let mayStore = 1 in
defm "" : I<(outs),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
(outs),
(ins P2Align:$p2align, offset32_op:$off), [],
!strconcat(Name, "\t${off}(${addr})${p2align}, $val"),
- !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+ !strconcat(Name, "\t${off}${p2align}"), Opcode>;
}
// Basic store.
// Note: WebAssembly inverts SelectionDAG's usual operand order.
@@ -321,8 +310,6 @@ defm STORE_I64 : WebAssemblyStore<I64, "i64.store", 0x37>;
defm STORE_F32 : WebAssemblyStore<F32, "f32.store", 0x38>;
defm STORE_F64 : WebAssemblyStore<F64, "f64.store", 0x39>;
-} // Defs = [ARGUMENTS]
-
// Select stores with no constant offset.
class StorePatNoOffset<ValueType ty, PatFrag node, NI inst> :
Pat<(node ty:$val, I32:$addr), (inst 0, 0, I32:$addr, ty:$val)>;
@@ -387,9 +374,6 @@ def : StorePatExternSymOffOnly<i64, store, STORE_I64>;
def : StorePatExternSymOffOnly<f32, store, STORE_F32>;
def : StorePatExternSymOffOnly<f64, store, STORE_F64>;
-
-let Defs = [ARGUMENTS] in {
-
// Truncating store.
defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>;
defm STORE16_I32 : WebAssemblyStore<I32, "i32.store16", 0x3b>;
@@ -397,8 +381,6 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>;
defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
-} // Defs = [ARGUMENTS]
-
// Select truncating stores with no constant offset.
def : StorePatNoOffset<i32, truncstorei8, STORE8_I32>;
def : StorePatNoOffset<i32, truncstorei16, STORE16_I32>;
@@ -446,8 +428,6 @@ def : StorePatExternSymOffOnly<i64, truncstorei8, STORE8_I64>;
def : StorePatExternSymOffOnly<i64, truncstorei16, STORE16_I64>;
def : StorePatExternSymOffOnly<i64, truncstorei32, STORE32_I64>;
-let Defs = [ARGUMENTS] in {
-
// Current memory size.
defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
(outs), (ins i32imm:$flags),
@@ -456,44 +436,13 @@ defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
"memory.size\t$dst, $flags", "memory.size\t$flags",
0x3f>,
Requires<[HasAddr32]>;
-defm MEM_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
- (outs), (ins i32imm:$flags),
- [(set I32:$dst, (int_wasm_mem_size (i32 imm:$flags)))],
- "mem.size\t$dst, $flags", "mem.size\t$flags", 0x3f>,
- Requires<[HasAddr32]>;
-defm CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
- (outs), (ins i32imm:$flags),
- [],
- "current_memory\t$dst",
- "current_memory\t$flags", 0x3f>,
- Requires<[HasAddr32]>;
// Grow memory.
defm MEMORY_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
- (outs), (ins i32imm:$flags, I32:$delta),
+ (outs), (ins i32imm:$flags),
[(set I32:$dst,
(int_wasm_memory_grow (i32 imm:$flags),
I32:$delta))],
"memory.grow\t$dst, $flags, $delta",
- "memory.grow\t$flags, $delta", 0x3f>,
+ "memory.grow\t$flags", 0x40>,
Requires<[HasAddr32]>;
-defm MEM_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
- (outs), (ins i32imm:$flags),
- [(set I32:$dst,
- (int_wasm_mem_grow (i32 imm:$flags), I32:$delta))],
- "mem.grow\t$dst, $flags, $delta", "mem.grow\t$flags",
- 0x3f>,
- Requires<[HasAddr32]>;
-defm GROW_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
- (outs), (ins i32imm:$flags),
- [],
- "grow_memory\t$dst, $delta", "grow_memory\t$flags",
- 0x40>,
- Requires<[HasAddr32]>;
-
-} // Defs = [ARGUMENTS]
-
-def : Pat<(int_wasm_current_memory),
- (CURRENT_MEMORY_I32 0)>;
-def : Pat<(int_wasm_grow_memory I32:$delta),
- (GROW_MEMORY_I32 0, $delta)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 7d1edccdeb3c..587515c5b299 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -12,8 +12,796 @@
///
//===----------------------------------------------------------------------===//
+// Instructions requiring HasSIMD128 and the simd128 prefix byte
+multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> simdop = -1> {
+ defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+ !or(0xfd00, !and(0xff, simdop))>,
+ Requires<[HasSIMD128]>;
+}
+
+defm "" : ARGUMENT<V128, v16i8>;
+defm "" : ARGUMENT<V128, v8i16>;
+defm "" : ARGUMENT<V128, v4i32>;
+defm "" : ARGUMENT<V128, v2i64>;
+defm "" : ARGUMENT<V128, v4f32>;
+defm "" : ARGUMENT<V128, v2f64>;
+
+// Constrained immediate argument types
+foreach SIZE = [8, 16] in
+def ImmI#SIZE : ImmLeaf<i32,
+ "return ((uint64_t)Imm & ((1UL << "#SIZE#") - 1)) == (uint64_t)Imm;"
+>;
+foreach SIZE = [2, 4, 8, 16, 32] in
+def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
+
+//===----------------------------------------------------------------------===//
+// Load and store
+//===----------------------------------------------------------------------===//
+
+// Load: v128.load
+multiclass SIMDLoad<ValueType vec_t> {
+ let mayLoad = 1 in
+ defm LOAD_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$align, offset32_op:$off), [],
+ "v128.load\t$dst, ${off}(${addr})$align",
+ "v128.load\t$off$align", 0>;
+}
+
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+defm "" : SIMDLoad<vec_t>;
+
+// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
+def : LoadPatNoOffset<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatImmOff<vec_t, load, regPlusImm, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatImmOff<vec_t, load, or_is_add, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatGlobalAddr<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatExternalSym<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatOffsetOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatGlobalAddrOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatExternSymOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+}
+
+// Store: v128.store
+multiclass SIMDStore<ValueType vec_t> {
+ let mayStore = 1 in
+ defm STORE_#vec_t :
+ SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
+ (outs), (ins P2Align:$align, offset32_op:$off), [],
+ "v128.store\t${off}(${addr})$align, $vec",
+ "v128.store\t$off$align", 1>;
+}
+
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+defm "" : SIMDStore<vec_t>;
+
+// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
+def : StorePatNoOffset<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatImmOff<vec_t, store, regPlusImm, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatImmOff<vec_t, store, or_is_add, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatGlobalAddr<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatExternalSym<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatOffsetOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatGlobalAddrOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Constructing SIMD values
+//===----------------------------------------------------------------------===//
+
+// Constant: v128.const
+multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
+ let isMoveImm = 1, isReMaterializable = 1,
+ Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+ defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
+ [(set V128:$dst, (vec_t pat))],
+ "v128.const\t$dst, "#args,
+ "v128.const\t"#args, 2>;
+}
+
+defm "" : ConstVec<v16i8,
+ (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
+ vec_i8imm_op:$i2, vec_i8imm_op:$i3,
+ vec_i8imm_op:$i4, vec_i8imm_op:$i5,
+ vec_i8imm_op:$i6, vec_i8imm_op:$i7,
+ vec_i8imm_op:$i8, vec_i8imm_op:$i9,
+ vec_i8imm_op:$iA, vec_i8imm_op:$iB,
+ vec_i8imm_op:$iC, vec_i8imm_op:$iD,
+ vec_i8imm_op:$iE, vec_i8imm_op:$iF),
+ (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3,
+ ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7,
+ ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB,
+ ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
+ !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
+ "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
+defm "" : ConstVec<v8i16,
+ (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
+ vec_i16imm_op:$i2, vec_i16imm_op:$i3,
+ vec_i16imm_op:$i4, vec_i16imm_op:$i5,
+ vec_i16imm_op:$i6, vec_i16imm_op:$i7),
+ (build_vector
+ ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
+ ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
+ "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
+defm "" : ConstVec<v4i32,
+ (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
+ vec_i32imm_op:$i2, vec_i32imm_op:$i3),
+ (build_vector (i32 imm:$i0), (i32 imm:$i1),
+ (i32 imm:$i2), (i32 imm:$i3)),
+ "$i0, $i1, $i2, $i3">;
+defm "" : ConstVec<v2i64,
+ (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
+ (build_vector (i64 imm:$i0), (i64 imm:$i1)),
+ "$i0, $i1">;
+defm "" : ConstVec<v4f32,
+ (ins f32imm_op:$i0, f32imm_op:$i1,
+ f32imm_op:$i2, f32imm_op:$i3),
+ (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
+ (f32 fpimm:$i2), (f32 fpimm:$i3)),
+ "$i0, $i1, $i2, $i3">;
+defm "" : ConstVec<v2f64,
+ (ins f64imm_op:$i0, f64imm_op:$i1),
+ (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
+ "$i0, $i1">;
+
+// Shuffle lanes: shuffle
+defm SHUFFLE :
+ SIMD_I<(outs V128:$dst),
+ (ins V128:$x, V128:$y,
+ vec_i8imm_op:$m0, vec_i8imm_op:$m1,
+ vec_i8imm_op:$m2, vec_i8imm_op:$m3,
+ vec_i8imm_op:$m4, vec_i8imm_op:$m5,
+ vec_i8imm_op:$m6, vec_i8imm_op:$m7,
+ vec_i8imm_op:$m8, vec_i8imm_op:$m9,
+ vec_i8imm_op:$mA, vec_i8imm_op:$mB,
+ vec_i8imm_op:$mC, vec_i8imm_op:$mD,
+ vec_i8imm_op:$mE, vec_i8imm_op:$mF),
+ (outs),
+ (ins
+ vec_i8imm_op:$m0, vec_i8imm_op:$m1,
+ vec_i8imm_op:$m2, vec_i8imm_op:$m3,
+ vec_i8imm_op:$m4, vec_i8imm_op:$m5,
+ vec_i8imm_op:$m6, vec_i8imm_op:$m7,
+ vec_i8imm_op:$m8, vec_i8imm_op:$m9,
+ vec_i8imm_op:$mA, vec_i8imm_op:$mB,
+ vec_i8imm_op:$mC, vec_i8imm_op:$mD,
+ vec_i8imm_op:$mE, vec_i8imm_op:$mF),
+ [],
+ "v8x16.shuffle\t$dst, $x, $y, "#
+ "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
+ "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
+ "v8x16.shuffle\t"#
+ "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
+ "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
+ 3>;
+
+// Shuffles after custom lowering
+def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
+def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
+ (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
+ (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
+ (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
+ (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
+ (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
+ (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
+ (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
+ (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
+ (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y),
+ (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
+ (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
+ (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
+ (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
+ (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
+ (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
+ (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
+ (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
+}
+
+// Create vector with identical lanes: splat
+def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>;
+def splat4 : PatFrag<(ops node:$x), (build_vector
+ node:$x, node:$x, node:$x, node:$x)>;
+def splat8 : PatFrag<(ops node:$x), (build_vector
+ node:$x, node:$x, node:$x, node:$x,
+ node:$x, node:$x, node:$x, node:$x)>;
+def splat16 : PatFrag<(ops node:$x), (build_vector
+ node:$x, node:$x, node:$x, node:$x,
+ node:$x, node:$x, node:$x, node:$x,
+ node:$x, node:$x, node:$x, node:$x,
+ node:$x, node:$x, node:$x, node:$x)>;
+
+multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
+ PatFrag splat_pat, bits<32> simdop> {
+ // Prefer splats over v128.const for const splats (65 is lowest that works)
+ let AddedComplexity = 65 in
+ defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins),
+ [(set (vec_t V128:$dst), (splat_pat reg_t:$x))],
+ vec#".splat\t$dst, $x", vec#".splat", simdop>;
+}
+
+defm "" : Splat<v16i8, "i8x16", I32, splat16, 4>;
+defm "" : Splat<v8i16, "i16x8", I32, splat8, 8>;
+defm "" : Splat<v4i32, "i32x4", I32, splat4, 12>;
+defm "" : Splat<v2i64, "i64x2", I64, splat2, 15>;
+defm "" : Splat<v4f32, "f32x4", F32, splat4, 18>;
+defm "" : Splat<v2f64, "f64x2", F64, splat2, 21>;
+
+//===----------------------------------------------------------------------===//
+// Accessing lanes
+//===----------------------------------------------------------------------===//
+
+// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
+multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t,
+ WebAssemblyRegClass reg_t, bits<32> simdop,
+ string suffix = "", SDNode extract = vector_extract> {
+ defm EXTRACT_LANE_#vec_t#suffix :
+ SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
+ (outs), (ins vec_i8imm_op:$idx),
+ [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))],
+ vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
+ vec#".extract_lane"#suffix#"\t$idx", simdop>;
+}
+
+multiclass ExtractPat<ValueType lane_t, int mask> {
+ def _s : PatFrag<(ops node:$vec, node:$idx),
+ (i32 (sext_inreg
+ (i32 (vector_extract
+ node:$vec,
+ node:$idx
+ )),
+ lane_t
+ ))>;
+ def _u : PatFrag<(ops node:$vec, node:$idx),
+ (i32 (and
+ (i32 (vector_extract
+ node:$vec,
+ node:$idx
+ )),
+ (i32 mask)
+ ))>;
+}
+
+defm extract_i8x16 : ExtractPat<i8, 0xff>;
+defm extract_i16x8 : ExtractPat<i16, 0xffff>;
+
+multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
+ defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign,
+ !cast<PatFrag>("extract_i8x16"#sign)>;
+ defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign,
+ !cast<PatFrag>("extract_i16x8"#sign)>;
+}
+
+defm "" : ExtractLaneExtended<"_s", 5>;
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm "" : ExtractLaneExtended<"_u", 6>;
+defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
+defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>;
+defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>;
+defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>;
+
+// It would be more conventional to use unsigned extracts, but v8
+// doesn't implement them yet
+def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
+ (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>;
+def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
+ (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>;
+
+// Lower undef lane indices to zero
+def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
+ (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
+def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
+ (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
+def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
+ (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
+def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
+ (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
+def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
+ (EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
+def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
+ (EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
+def : Pat<(vector_extract (v4i32 V128:$vec), undef),
+ (EXTRACT_LANE_v4i32 V128:$vec, 0)>;
+def : Pat<(vector_extract (v2i64 V128:$vec), undef),
+ (EXTRACT_LANE_v2i64 V128:$vec, 0)>;
+def : Pat<(vector_extract (v4f32 V128:$vec), undef),
+ (EXTRACT_LANE_v4f32 V128:$vec, 0)>;
+def : Pat<(vector_extract (v2f64 V128:$vec), undef),
+ (EXTRACT_LANE_v2f64 V128:$vec, 0)>;
+
+// Replace lane value: replace_lane
+multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
+ WebAssemblyRegClass reg_t, ValueType lane_t,
+ bits<32> simdop> {
+ defm REPLACE_LANE_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x),
+ (outs), (ins vec_i8imm_op:$idx),
+ [(set V128:$dst, (vector_insert
+ (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))],
+ vec#".replace_lane\t$dst, $vec, $idx, $x",
+ vec#".replace_lane\t$idx", simdop>;
+}
+
+defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 7>;
+defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 11>;
+defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 14>;
+defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 17>;
+defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 20>;
+defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 23>;
+
+// Lower undef lane indices to zero
+def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
+ (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>;
+def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
+ (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>;
+def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
+ (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
+
+// Arbitrary other BUILD_VECTOR patterns
+def : Pat<(v16i8 (build_vector
+ (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
+ (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7),
+ (i32 I32:$x8), (i32 I32:$x9), (i32 I32:$x10), (i32 I32:$x11),
+ (i32 I32:$x12), (i32 I32:$x13), (i32 I32:$x14), (i32 I32:$x15)
+ )),
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (SPLAT_v16i8 (i32 I32:$x0))),
+ 1, I32:$x1
+ )),
+ 2, I32:$x2
+ )),
+ 3, I32:$x3
+ )),
+ 4, I32:$x4
+ )),
+ 5, I32:$x5
+ )),
+ 6, I32:$x6
+ )),
+ 7, I32:$x7
+ )),
+ 8, I32:$x8
+ )),
+ 9, I32:$x9
+ )),
+ 10, I32:$x10
+ )),
+ 11, I32:$x11
+ )),
+ 12, I32:$x12
+ )),
+ 13, I32:$x13
+ )),
+ 14, I32:$x14
+ )),
+ 15, I32:$x15
+ ))>;
+def : Pat<(v8i16 (build_vector
+ (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
+ (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7)
+ )),
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (SPLAT_v8i16 (i32 I32:$x0))),
+ 1, I32:$x1
+ )),
+ 2, I32:$x2
+ )),
+ 3, I32:$x3
+ )),
+ 4, I32:$x4
+ )),
+ 5, I32:$x5
+ )),
+ 6, I32:$x6
+ )),
+ 7, I32:$x7
+ ))>;
+def : Pat<(v4i32 (build_vector
+ (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3)
+ )),
+ (v4i32 (REPLACE_LANE_v4i32
+ (v4i32 (REPLACE_LANE_v4i32
+ (v4i32 (REPLACE_LANE_v4i32
+ (v4i32 (SPLAT_v4i32 (i32 I32:$x0))),
+ 1, I32:$x1
+ )),
+ 2, I32:$x2
+ )),
+ 3, I32:$x3
+ ))>;
+def : Pat<(v2i64 (build_vector (i64 I64:$x0), (i64 I64:$x1))),
+ (v2i64 (REPLACE_LANE_v2i64
+ (v2i64 (SPLAT_v2i64 (i64 I64:$x0))), 1, I64:$x1))>;
+def : Pat<(v4f32 (build_vector
+ (f32 F32:$x0), (f32 F32:$x1), (f32 F32:$x2), (f32 F32:$x3)
+ )),
+ (v4f32 (REPLACE_LANE_v4f32
+ (v4f32 (REPLACE_LANE_v4f32
+ (v4f32 (REPLACE_LANE_v4f32
+ (v4f32 (SPLAT_v4f32 (f32 F32:$x0))),
+ 1, F32:$x1
+ )),
+ 2, F32:$x2
+ )),
+ 3, F32:$x3
+ ))>;
+def : Pat<(v2f64 (build_vector (f64 F64:$x0), (f64 F64:$x1))),
+ (v2f64 (REPLACE_LANE_v2f64
+ (v2f64 (SPLAT_v2f64 (f64 F64:$x0))), 1, F64:$x1))>;
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec,
+ string name, CondCode cond, bits<32> simdop> {
+ defm _#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
+ [(set (out_t V128:$dst),
+ (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond)
+ )],
+ vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>;
+}
+
+multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> {
+ defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>;
+ defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond,
+ !add(baseInst, 10)>;
+ defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond,
+ !add(baseInst, 20)>;
+}
+
+multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
+ defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>;
+ defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond,
+ !add(baseInst, 6)>;
+}
+
+// Equality: eq
+let isCommutable = 1 in {
+defm EQ : SIMDConditionInt<"eq", SETEQ, 24>;
+defm EQ : SIMDConditionFP<"eq", SETOEQ, 64>;
+} // isCommutable = 1
+
+// Non-equality: ne
let isCommutable = 1 in {
-defm ADD : SIMDBinary<add, fadd, "add ">;
-defm MUL: SIMDBinary<mul, fmul, "mul ">;
+defm NE : SIMDConditionInt<"ne", SETNE, 25>;
+defm NE : SIMDConditionFP<"ne", SETUNE, 65>;
} // isCommutable = 1
-defm SUB: SIMDBinary<sub, fsub, "sub ">;
+
+// Less than: lt_s / lt_u / lt
+defm LT_S : SIMDConditionInt<"lt_s", SETLT, 26>;
+defm LT_U : SIMDConditionInt<"lt_u", SETULT, 27>;
+defm LT : SIMDConditionFP<"lt", SETOLT, 66>;
+
+// Greater than: gt_s / gt_u / gt
+defm GT_S : SIMDConditionInt<"gt_s", SETGT, 28>;
+defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 29>;
+defm GT : SIMDConditionFP<"gt", SETOGT, 67>;
+
+// Less than or equal: le_s / le_u / le
+defm LE_S : SIMDConditionInt<"le_s", SETLE, 30>;
+defm LE_U : SIMDConditionInt<"le_u", SETULE, 31>;
+defm LE : SIMDConditionFP<"le", SETOLE, 68>;
+
+// Greater than or equal: ge_s / ge_u / ge
+defm GE_S : SIMDConditionInt<"ge_s", SETGE, 32>;
+defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>;
+defm GE : SIMDConditionFP<"ge", SETOGE, 69>;
+
+// Lower float comparisons that don't care about NaN to standard WebAssembly
+// float comparisons. These instructions are generated in the target-independent
+// expansion of unordered comparisons and ordered ne.
+def : Pat<(v4i32 (seteq (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (v4i32 (EQ_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+def : Pat<(v4i32 (setne (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (v4i32 (NE_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+def : Pat<(v2i64 (seteq (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (v2i64 (EQ_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+def : Pat<(v2i64 (setne (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (v2i64 (NE_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+
+//===----------------------------------------------------------------------===//
+// Bitwise operations
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name,
+ bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+ (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (node (vec_t V128:$lhs), (vec_t V128:$rhs))
+ )],
+ vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
+ simdop>;
+}
+
+multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> {
+ defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>;
+ defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>;
+ defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>;
+ defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>;
+}
+
+multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name,
+ bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (vec_t (node (vec_t V128:$vec)))
+ )],
+ vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
+}
+
+// Bitwise logic: v128.not
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
+defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 76>;
+
+// Bitwise logic: v128.and / v128.or / v128.xor
+let isCommutable = 1 in {
+defm AND : SIMDBitwise<and, "and", 77>;
+defm OR : SIMDBitwise<or, "or", 78>;
+defm XOR : SIMDBitwise<xor, "xor", 79>;
+} // isCommutable = 1
+
+// Bitwise select: v128.bitselect
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+ defm BITSELECT_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (vec_t (int_wasm_bitselect
+ (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c)
+ ))
+ )],
+ "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 80>;
+
+// Bitselect is equivalent to (c & v1) | (~c & v2)
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
+ def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)),
+ (and (vnot V128:$c), (vec_t V128:$v2)))),
+ (!cast<Instruction>("BITSELECT_"#vec_t)
+ V128:$v1, V128:$v2, V128:$c)>;
+
+//===----------------------------------------------------------------------===//
+// Integer unary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>;
+ defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 17)>;
+ defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 34)>;
+ defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 51)>;
+}
+
+multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name,
+ bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
+ [(set I32:$dst, (i32 (op (vec_t V128:$vec))))],
+ vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
+}
+
+multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> {
+ defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>;
+ defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 17)>;
+ defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 34)>;
+ defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 51)>;
+}
+
+// Integer vector negation
+def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
+
+// Integer negation: neg
+defm NEG : SIMDUnaryInt<ivneg, "neg", 81>;
+
+// Any lane true: any_true
+defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 82>;
+
+// All lanes true: all_true
+defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 83>;
+
+//===----------------------------------------------------------------------===//
+// Bit shifts
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, dag shift_vec,
+ string name, bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x),
+ (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (node V128:$vec, (vec_t shift_vec)))],
+ vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>;
+}
+
+multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDShift<v16i8, "i8x16", node, (splat16 I32:$x), name, baseInst>;
+ defm "" : SIMDShift<v8i16, "i16x8", node, (splat8 I32:$x), name,
+ !add(baseInst, 17)>;
+ defm "" : SIMDShift<v4i32, "i32x4", node, (splat4 I32:$x), name,
+ !add(baseInst, 34)>;
+ defm "" : SIMDShift<v2i64, "i64x2", node, (splat2 (i64 (zext I32:$x))),
+ name, !add(baseInst, 51)>;
+}
+
+// Left shift by scalar: shl
+defm SHL : SIMDShiftInt<shl, "shl", 84>;
+
+// Right shift by scalar: shr_s / shr_u
+defm SHR_S : SIMDShiftInt<sra, "shr_s", 85>;
+defm SHR_U : SIMDShiftInt<srl, "shr_u", 86>;
+
+// Truncate i64 shift operands to i32s
+foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in
+def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))),
+ (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>;
+
+// 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping
+def wasm_shift_t : SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]
+>;
+def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
+def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
+def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
+foreach shifts = [[wasm_shl, SHL_v2i64],
+ [wasm_shr_s, SHR_S_v2i64],
+ [wasm_shr_u, SHR_U_v2i64]] in
+def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), I32:$x)),
+ (v2i64 (shifts[1] (v2i64 V128:$vec), I32:$x))>;
+
+//===----------------------------------------------------------------------===//
+// Integer binary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>;
+ defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 17)>;
+}
+
+multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
+ defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 34)>;
+}
+
+multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
+ defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 51)>;
+}
+
+// Integer addition: add / add_saturate_s / add_saturate_u
+let isCommutable = 1 in {
+defm ADD : SIMDBinaryInt<add, "add", 87>;
+defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 88>;
+defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 89>;
+} // isCommutable = 1
+
+// Integer subtraction: sub / sub_saturate_s / sub_saturate_u
+defm SUB : SIMDBinaryInt<sub, "sub", 90>;
+defm SUB_SAT_S :
+ SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 91>;
+defm SUB_SAT_U :
+ SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 92>;
+
+// Integer multiplication: mul
+defm MUL : SIMDBinaryIntNoI64x2<mul, "mul", 93>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point unary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>;
+ defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 11)>;
+}
+
+// Absolute value: abs
+defm ABS : SIMDUnaryFP<fabs, "abs", 149>;
+
+// Negation: neg
+defm NEG : SIMDUnaryFP<fneg, "neg", 150>;
+
+// Square root: sqrt
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point binary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
+ defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 11)>;
+}
+
+// Addition: add
+let isCommutable = 1 in
+defm ADD : SIMDBinaryFP<fadd, "add", 154>;
+
+// Subtraction: sub
+defm SUB : SIMDBinaryFP<fsub, "sub", 155>;
+
+// Multiplication: mul
+let isCommutable = 1 in
+defm MUL : SIMDBinaryFP<fmul, "mul", 156>;
+
+// Division: div
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm DIV : SIMDBinaryFP<fdiv, "div", 157>;
+
+// NaN-propagating minimum: min
+defm MIN : SIMDBinaryFP<fminimum, "min", 158>;
+
+// NaN-propagating maximum: max
+defm MAX : SIMDBinaryFP<fmaximum, "max", 159>;
+
+//===----------------------------------------------------------------------===//
+// Conversions
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op,
+ string name, bits<32> simdop> {
+ defm op#_#vec_t#_#arg_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+ [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))],
+ name#"\t$dst, $vec", name, simdop>;
+}
+
+// Integer to floating point: convert
+defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 175>;
+defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 176>;
+defm "" : SIMDConvert<v2f64, v2i64, sint_to_fp, "f64x2.convert_i64x2_s", 177>;
+defm "" : SIMDConvert<v2f64, v2i64, uint_to_fp, "f64x2.convert_i64x2_u", 178>;
+
+// Floating point to integer with saturation: trunc_sat
+defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 171>;
+defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 172>;
+defm "" : SIMDConvert<v2i64, v2f64, fp_to_sint, "i64x2.trunc_sat_f64x2_s", 173>;
+defm "" : SIMDConvert<v2i64, v2f64, fp_to_uint, "i64x2.trunc_sat_f64x2_u", 174>;
+
+// Lower llvm.wasm.trunc.saturate.* to saturating instructions
+def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
+ (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
+def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
+ (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
+def : Pat<(v2i64 (int_wasm_trunc_saturate_signed (v2f64 V128:$src))),
+ (fp_to_sint_v2i64_v2f64 (v2f64 V128:$src))>;
+def : Pat<(v2i64 (int_wasm_trunc_saturate_unsigned (v2f64 V128:$src))),
+ (fp_to_uint_v2i64_v2f64 (v2f64 V128:$src))>;
+
+// Bitcasts are nops
+// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
+foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+foreach t2 = !foldl(
+ []<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)),
+ acc, !listconcat(acc, [cur])
+ )
+) in
+def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index e42dcbc0a8ac..ad838dfb574a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -31,6 +31,7 @@ class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
+ bool removeUnnecessaryUnreachables(MachineFunction &MF);
bool replaceFuncletReturns(MachineFunction &MF);
bool hoistCatches(MachineFunction &MF);
bool addCatchAlls(MachineFunction &MF);
@@ -47,7 +48,7 @@ public:
char WebAssemblyLateEHPrepare::ID = 0;
INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
- "WebAssembly Exception Preparation", false, false)
+ "WebAssembly Late Exception Preparation", false, false)
FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
return new WebAssemblyLateEHPrepare();
@@ -59,7 +60,7 @@ FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
// possible search paths should be the same.
// Returns nullptr in case it does not find any EH pad in the search, or finds
// multiple different EH pads.
-MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) {
+static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) {
MachineFunction *MF = MI->getParent()->getParent();
SmallVector<MachineBasicBlock *, 2> WL;
SmallPtrSet<MachineBasicBlock *, 2> Visited;
@@ -83,29 +84,35 @@ MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) {
return EHPad;
}
-// Erases the given BB and all its children from the function. If other BBs have
-// this BB as a successor, the successor relationships will be deleted as well.
-static void EraseBBAndChildren(MachineBasicBlock *MBB) {
- SmallVector<MachineBasicBlock *, 8> WL;
- WL.push_back(MBB);
+// Erase the specified BBs if the BB does not have any remaining predecessors,
+// and also all its dead children.
+template <typename Container>
+static void eraseDeadBBsAndChildren(const Container &MBBs) {
+ SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end());
while (!WL.empty()) {
MachineBasicBlock *MBB = WL.pop_back_val();
- for (auto *Pred : MBB->predecessors())
- Pred->removeSuccessor(MBB);
- for (auto *Succ : MBB->successors()) {
- WL.push_back(Succ);
+ if (!MBB->pred_empty())
+ continue;
+ SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+ WL.append(MBB->succ_begin(), MBB->succ_end());
+ for (auto *Succ : Succs)
MBB->removeSuccessor(Succ);
- }
MBB->eraseFromParent();
}
}
bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
ExceptionHandling::Wasm)
return false;
bool Changed = false;
+ Changed |= removeUnnecessaryUnreachables(MF);
Changed |= addRethrows(MF);
if (!MF.getFunction().hasPersonalityFn())
return Changed;
@@ -118,6 +125,31 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
+ MachineFunction &MF) {
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (!WebAssembly::isThrow(MI))
+ continue;
+ Changed = true;
+
+ // The instruction after the throw should be an unreachable or a branch to
+ // another BB that should eventually lead to an unreachable. Delete it
+ // because throw itself is a terminator, and also delete successors if
+ // any.
+ MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end());
+ SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
+ MBB.succ_end());
+ for (auto *Succ : Succs)
+ MBB.removeSuccessor(Succ);
+ eraseDeadBBsAndChildren(Succs);
+ }
+ }
+
+ return Changed;
+}
+
bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
bool Changed = false;
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -179,7 +211,7 @@ bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
Catches.push_back(&MI);
for (auto *Catch : Catches) {
- MachineBasicBlock *EHPad = GetMatchingEHPad(Catch);
+ MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
assert(EHPad && "No matching EH pad for catch");
if (EHPad->begin() == Catch)
continue;
@@ -238,14 +270,18 @@ bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
TII.get(WebAssembly::RETHROW_TO_CALLER));
- // Becasue __cxa_rethrow does not return, the instruction after the
+ // Because __cxa_rethrow does not return, the instruction after the
// rethrow should be an unreachable or a branch to another BB that should
// eventually lead to an unreachable. Delete it because rethrow itself is
// a terminator, and also delete non-EH pad successors if any.
MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
+ SmallVector<MachineBasicBlock *, 8> NonPadSuccessors;
for (auto *Succ : MBB.successors())
if (!Succ->isEHPad())
- EraseBBAndChildren(Succ);
+ NonPadSuccessors.push_back(Succ);
+ for (auto *Succ : NonPadSuccessors)
+ MBB.removeSuccessor(Succ);
+ eraseDeadBBsAndChildren(NonPadSuccessors);
}
return Changed;
}
@@ -255,7 +291,7 @@ bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
// %exn = catch 0
// call @__clang_call_terminate(%exn)
// unreachable
-// (There can be set_local and get_locals before the call if we didn't run
+// (There can be local.set and local.gets before the call if we didn't run
// RegStackify)
// But code transformations can change or add more control flow, so the call to
// __clang_call_terminate() function may not be in the original EH pad anymore.
@@ -277,7 +313,7 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
bool Changed = false;
for (auto *Call : ClangCallTerminateCalls) {
- MachineBasicBlock *EHPad = GetMatchingEHPad(Call);
+ MachineBasicBlock *EHPad = getMatchingEHPad(Call);
assert(EHPad && "No matching EH pad for catch");
// If it is already the form we want, skip it
@@ -294,7 +330,7 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
// This runs after hoistCatches(), so catch instruction should be at the top
assert(WebAssembly::isCatch(*Catch));
// Takes the result register of the catch instruction as argument. There may
- // have been some other set_local/get_locals in between, but at this point
+ // have been some other local.set/local.gets in between, but at this point
// we don't care.
Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
@@ -302,8 +338,11 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
TII.get(WebAssembly::UNREACHABLE));
EHPad->erase(InsertPos, EHPad->end());
- for (auto *Succ : EHPad->successors())
- EraseBBAndChildren(Succ);
+ SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(),
+ EHPad->succ_end());
+ for (auto *Succ : Succs)
+ EHPad->removeSuccessor(Succ);
+ eraseDeadBBsAndChildren(Succs);
}
return Changed;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 5fb97e38939a..c9a3527d3fbd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -78,30 +78,102 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *Def = MRI.getVRegDef(Cond);
switch (Def->getOpcode()) {
using namespace WebAssembly;
- case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break;
- case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break;
- case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break;
- case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break;
- case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break;
- case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break;
- case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break;
- case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break;
- case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break;
- case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break;
- case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break;
- case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break;
- case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break;
- case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break;
- case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break;
- case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break;
- case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break;
- case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break;
- case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break;
- case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break;
- case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break;
- case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break;
- case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break;
- case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break;
+ case EQ_I32:
+ Def->setDesc(TII.get(NE_I32));
+ Inverted = true;
+ break;
+ case NE_I32:
+ Def->setDesc(TII.get(EQ_I32));
+ Inverted = true;
+ break;
+ case GT_S_I32:
+ Def->setDesc(TII.get(LE_S_I32));
+ Inverted = true;
+ break;
+ case GE_S_I32:
+ Def->setDesc(TII.get(LT_S_I32));
+ Inverted = true;
+ break;
+ case LT_S_I32:
+ Def->setDesc(TII.get(GE_S_I32));
+ Inverted = true;
+ break;
+ case LE_S_I32:
+ Def->setDesc(TII.get(GT_S_I32));
+ Inverted = true;
+ break;
+ case GT_U_I32:
+ Def->setDesc(TII.get(LE_U_I32));
+ Inverted = true;
+ break;
+ case GE_U_I32:
+ Def->setDesc(TII.get(LT_U_I32));
+ Inverted = true;
+ break;
+ case LT_U_I32:
+ Def->setDesc(TII.get(GE_U_I32));
+ Inverted = true;
+ break;
+ case LE_U_I32:
+ Def->setDesc(TII.get(GT_U_I32));
+ Inverted = true;
+ break;
+ case EQ_I64:
+ Def->setDesc(TII.get(NE_I64));
+ Inverted = true;
+ break;
+ case NE_I64:
+ Def->setDesc(TII.get(EQ_I64));
+ Inverted = true;
+ break;
+ case GT_S_I64:
+ Def->setDesc(TII.get(LE_S_I64));
+ Inverted = true;
+ break;
+ case GE_S_I64:
+ Def->setDesc(TII.get(LT_S_I64));
+ Inverted = true;
+ break;
+ case LT_S_I64:
+ Def->setDesc(TII.get(GE_S_I64));
+ Inverted = true;
+ break;
+ case LE_S_I64:
+ Def->setDesc(TII.get(GT_S_I64));
+ Inverted = true;
+ break;
+ case GT_U_I64:
+ Def->setDesc(TII.get(LE_U_I64));
+ Inverted = true;
+ break;
+ case GE_U_I64:
+ Def->setDesc(TII.get(LT_U_I64));
+ Inverted = true;
+ break;
+ case LT_U_I64:
+ Def->setDesc(TII.get(GE_U_I64));
+ Inverted = true;
+ break;
+ case LE_U_I64:
+ Def->setDesc(TII.get(GT_U_I64));
+ Inverted = true;
+ break;
+ case EQ_F32:
+ Def->setDesc(TII.get(NE_F32));
+ Inverted = true;
+ break;
+ case NE_F32:
+ Def->setDesc(TII.get(EQ_F32));
+ Inverted = true;
+ break;
+ case EQ_F64:
+ Def->setDesc(TII.get(NE_F64));
+ Inverted = true;
+ break;
+ case NE_F64:
+ Def->setDesc(TII.get(EQ_F64));
+ Inverted = true;
+ break;
case EQZ_I32: {
// Invert an eqz by replacing it with its operand.
Cond = Def->getOperand(1).getReg();
@@ -109,7 +181,8 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
Inverted = true;
break;
}
- default: break;
+ default:
+ break;
}
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index e9cb7c10113b..0491f71cea7f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -50,24 +50,21 @@
///
/// In detail, this pass does following things:
///
-/// 1) Create three global variables: __THREW__, __threwValue, and __tempRet0.
-/// __tempRet0 will be set within __cxa_find_matching_catch() function in
-/// JS library, and __THREW__ and __threwValue will be set in invoke wrappers
+/// 1) Assumes the existence of global variables: __THREW__, __threwValue
+/// __THREW__ and __threwValue will be set in invoke wrappers
/// in JS glue code. For what invoke wrappers are, refer to 3). These
/// variables are used for both exceptions and setjmp/longjmps.
/// __THREW__ indicates whether an exception or a longjmp occurred or not. 0
/// means nothing occurred, 1 means an exception occurred, and other numbers
/// mean a longjmp occurred. In the case of longjmp, __threwValue variable
/// indicates the corresponding setjmp buffer the longjmp corresponds to.
-/// In exception handling, __tempRet0 indicates the type of an exception
-/// caught, and in setjmp/longjmp, it means the second argument to longjmp
-/// function.
///
/// * Exception handling
///
-/// 2) Create setThrew and setTempRet0 functions.
-/// The global variables created in 1) will exist in wasm address space,
-/// but their values should be set in JS code, so we provide these functions
+/// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions
+/// at link time.
+/// The global variables in 1) will exist in wasm address space,
+/// but their values should be set in JS code, so these functions
/// as interfaces to JS glue code. These functions are equivalent to the
/// following JS functions, which actually exist in asm.js version of JS
/// library.
@@ -78,10 +75,12 @@
/// __threwValue = value;
/// }
/// }
+//
+/// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
///
-/// function setTempRet0(value) {
-/// __tempRet0 = value;
-/// }
+/// In exception handling, getTempRet0 indicates the type of an exception
+/// caught, and in setjmp/longjmp, it means the second argument to longjmp
+/// function.
///
/// 3) Lower
/// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
@@ -118,11 +117,10 @@
/// ... use %val ...
/// into
/// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...)
-/// %val = {%fmc, __tempRet0}
+/// %val = {%fmc, getTempRet0()}
/// ... use %val ...
/// Here N is a number calculated based on the number of clauses.
-/// Global variable __tempRet0 is set within __cxa_find_matching_catch() in
-/// JS glue code.
+/// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
///
/// 5) Lower
/// resume {%a, %b}
@@ -138,7 +136,17 @@
///
/// * Setjmp / Longjmp handling
///
-/// 7) In the function entry that calls setjmp, initialize setjmpTable and
+/// In case calls to longjmp() exists
+///
+/// 1) Lower
+/// longjmp(buf, value)
+/// into
+/// emscripten_longjmp_jmpbuf(buf, value)
+/// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
+///
+/// In case calls to setjmp() exists
+///
+/// 2) In the function entry that calls setjmp, initialize setjmpTable and
/// sejmpTableSize as follows:
/// setjmpTableSize = 4;
/// setjmpTable = (int *) malloc(40);
@@ -146,27 +154,22 @@
/// setjmpTable and setjmpTableSize are used in saveSetjmp() function in JS
/// code.
///
-/// 8) Lower
+/// 3) Lower
/// setjmp(buf)
/// into
/// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
-/// setjmpTableSize = __tempRet0;
+/// setjmpTableSize = getTempRet0();
/// For each dynamic setjmp call, setjmpTable stores its ID (a number which
/// is incrementally assigned from 0) and its label (a unique number that
/// represents each callsite of setjmp). When we need more entries in
/// setjmpTable, it is reallocated in saveSetjmp() in JS code and it will
/// return the new table address, and assign the new table size in
-/// __tempRet0. saveSetjmp also stores the setjmp's ID into the buffer buf.
-/// A BB with setjmp is split into two after setjmp call in order to make the
-/// post-setjmp BB the possible destination of longjmp BB.
+/// setTempRet0(). saveSetjmp also stores the setjmp's ID into the buffer
+/// buf. A BB with setjmp is split into two after setjmp call in order to
+/// make the post-setjmp BB the possible destination of longjmp BB.
///
-/// 9) Lower
-/// longjmp(buf, value)
-/// into
-/// emscripten_longjmp_jmpbuf(buf, value)
-/// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
///
-/// 10) Lower every call that might longjmp into
+/// 4) Lower every call that might longjmp into
/// __THREW__ = 0;
/// call @__invoke_SIG(func, arg1, arg2)
/// %__THREW__.val = __THREW__;
@@ -176,32 +179,32 @@
/// setjmpTableSize);
/// if (%label == 0)
/// emscripten_longjmp(%__THREW__.val, __threwValue);
-/// __tempRet0 = __threwValue;
+/// setTempRet0(__threwValue);
/// } else {
/// %label = -1;
/// }
-/// longjmp_result = __tempRet0;
+/// longjmp_result = getTempRet0();
/// switch label {
/// label 1: goto post-setjmp BB 1
/// label 2: goto post-setjmp BB 2
/// ...
/// default: goto splitted next BB
/// }
-/// testSetjmp examines setjmpTable to see if there is a matching setjmp
-/// call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
-/// will be the address of matching jmp_buf buffer and __threwValue be the
-/// second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is
-/// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
-/// each setjmp callsite. Label 0 means this longjmp buffer does not
-/// correspond to one of the setjmp callsites in this function, so in this
-/// case we just chain the longjmp to the caller. (Here we call
-/// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
-/// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
-/// emscripten_longjmp takes an int. Both of them will eventually be lowered
-/// to emscripten_longjmp in s2wasm, but here we need two signatures - we
-/// can't translate an int value to a jmp_buf.)
-/// Label -1 means no longjmp occurred. Otherwise we jump to the right
-/// post-setjmp BB based on the label.
+/// testSetjmp examines setjmpTable to see if there is a matching setjmp
+/// call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
+/// will be the address of matching jmp_buf buffer and __threwValue be the
+/// second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is
+/// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
+/// each setjmp callsite. Label 0 means this longjmp buffer does not
+/// correspond to one of the setjmp callsites in this function, so in this
+/// case we just chain the longjmp to the caller. (Here we call
+/// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
+/// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
+/// emscripten_longjmp takes an int. Both of them will eventually be lowered
+/// to emscripten_longjmp in s2wasm, but here we need two signatures - we
+/// can't translate an int value to a jmp_buf.)
+/// Label -1 means no longjmp occurred. Otherwise we jump to the right
+/// post-setjmp BB based on the label.
///
///===----------------------------------------------------------------------===//
@@ -239,7 +242,8 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
GlobalVariable *ThrewGV;
GlobalVariable *ThrewValueGV;
- GlobalVariable *TempRet0GV;
+ Function *GetTempRet0Func;
+ Function *SetTempRet0Func;
Function *ResumeF;
Function *EHTypeIDF;
Function *EmLongjmpF;
@@ -272,9 +276,6 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); }
bool canLongjmp(Module &M, const Value *Callee) const;
- void createSetThrewFunction(Module &M);
- void createSetTempRet0Function(Module &M);
-
void rebuildSSA(Function &F);
public:
@@ -282,9 +283,10 @@ public:
WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
: ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj),
- ThrewGV(nullptr), ThrewValueGV(nullptr), TempRet0GV(nullptr),
- ResumeF(nullptr), EHTypeIDF(nullptr), EmLongjmpF(nullptr),
- EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr), TestSetjmpF(nullptr) {
+ ThrewGV(nullptr), ThrewValueGV(nullptr), GetTempRet0Func(nullptr),
+ SetTempRet0Func(nullptr), ResumeF(nullptr), EHTypeIDF(nullptr),
+ EmLongjmpF(nullptr), EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr),
+ TestSetjmpF(nullptr) {
EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end());
}
bool runOnModule(Module &M) override;
@@ -333,13 +335,15 @@ static bool canThrow(const Value *V) {
return true;
}
-static GlobalVariable *createGlobalVariableI32(Module &M, IRBuilder<> &IRB,
- const char *Name) {
+// Get a global variable with the given name. If it doesn't exist declare it,
+// which will generate an import and asssumes that it will exist at link time.
+static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB,
+ const char *Name) {
if (M.getNamedGlobal(Name))
report_fatal_error(Twine("variable name is reserved: ") + Name);
return new GlobalVariable(M, IRB.getInt32Ty(), false,
- GlobalValue::WeakODRLinkage, IRB.getInt32(0), Name);
+ GlobalValue::ExternalLinkage, nullptr, Name);
}
// Simple function name mangler.
@@ -508,7 +512,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
Function *ThrowF = M.getFunction("__cxa_throw");
Function *TerminateF = M.getFunction("__clang_call_terminate");
if (Callee == BeginCatchF || Callee == EndCatchF ||
- Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF)
+ Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF ||
+ Callee == GetTempRet0Func || Callee == SetTempRet0Func)
return false;
// Otherwise we don't know
@@ -521,11 +526,11 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
// %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
// if (%label == 0)
// emscripten_longjmp(%__THREW__.val, threwValue);
-// __tempRet0 = threwValue;
+// setTempRet0(threwValue);
// } else {
// %label = -1;
// }
-// %longjmp_result = __tempRet0;
+// %longjmp_result = getTempRet0();
//
// As output parameters. returns %label, %longjmp_result, and the BB the last
// instruction (%longjmp_result = ...) is in.
@@ -569,15 +574,15 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
IRB.CreateUnreachable();
- // __tempRet0 = threwValue;
+ // setTempRet0(threwValue);
IRB.SetInsertPoint(EndBB2);
- IRB.CreateStore(ThrewValue, TempRet0GV);
+ IRB.CreateCall(SetTempRet0Func, ThrewValue);
IRB.CreateBr(EndBB1);
IRB.SetInsertPoint(ElseBB1);
IRB.CreateBr(EndBB1);
- // longjmp_result = __tempRet0;
+ // longjmp_result = getTempRet0();
IRB.SetInsertPoint(EndBB1);
PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label");
LabelPHI->addIncoming(ThenLabel, EndBB2);
@@ -587,68 +592,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
// Output parameter assignment
Label = LabelPHI;
EndBB = EndBB1;
- LongjmpResult = IRB.CreateLoad(TempRet0GV, "longjmp_result");
-}
-
-// Create setThrew function
-// function setThrew(threw, value) {
-// if (__THREW__ == 0) {
-// __THREW__ = threw;
-// __threwValue = value;
-// }
-// }
-void WebAssemblyLowerEmscriptenEHSjLj::createSetThrewFunction(Module &M) {
- LLVMContext &C = M.getContext();
- IRBuilder<> IRB(C);
-
- if (M.getNamedGlobal("setThrew"))
- report_fatal_error("setThrew already exists");
-
- Type *Params[] = {IRB.getInt32Ty(), IRB.getInt32Ty()};
- FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
- Function *F =
- Function::Create(FTy, GlobalValue::WeakODRLinkage, "setThrew", &M);
- Argument *Arg1 = &*(F->arg_begin());
- Argument *Arg2 = &*std::next(F->arg_begin());
- Arg1->setName("threw");
- Arg2->setName("value");
- BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
- BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", F);
- BasicBlock *EndBB = BasicBlock::Create(C, "if.end", F);
-
- IRB.SetInsertPoint(EntryBB);
- Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
- Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(0), "cmp");
- IRB.CreateCondBr(Cmp, ThenBB, EndBB);
-
- IRB.SetInsertPoint(ThenBB);
- IRB.CreateStore(Arg1, ThrewGV);
- IRB.CreateStore(Arg2, ThrewValueGV);
- IRB.CreateBr(EndBB);
-
- IRB.SetInsertPoint(EndBB);
- IRB.CreateRetVoid();
-}
-
-// Create setTempRet0 function
-// function setTempRet0(value) {
-// __tempRet0 = value;
-// }
-void WebAssemblyLowerEmscriptenEHSjLj::createSetTempRet0Function(Module &M) {
- LLVMContext &C = M.getContext();
- IRBuilder<> IRB(C);
-
- if (M.getNamedGlobal("setTempRet0"))
- report_fatal_error("setTempRet0 already exists");
- Type *Params[] = {IRB.getInt32Ty()};
- FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
- Function *F =
- Function::Create(FTy, GlobalValue::WeakODRLinkage, "setTempRet0", &M);
- F->arg_begin()->setName("value");
- BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
- IRB.SetInsertPoint(EntryBB);
- IRB.CreateStore(&*F->arg_begin(), TempRet0GV);
- IRB.CreateRetVoid();
+ LongjmpResult = IRB.CreateCall(GetTempRet0Func, None, "longjmp_result");
}
void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
@@ -679,6 +623,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
}
bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
+ LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
+
LLVMContext &C = M.getContext();
IRBuilder<> IRB(C);
@@ -688,11 +634,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed);
- // Create global variables __THREW__, threwValue, and __tempRet0, which are
- // used in common for both exception handling and setjmp/longjmp handling
- ThrewGV = createGlobalVariableI32(M, IRB, "__THREW__");
- ThrewValueGV = createGlobalVariableI32(M, IRB, "__threwValue");
- TempRet0GV = createGlobalVariableI32(M, IRB, "__tempRet0");
+ // Declare (or get) global variables __THREW__, __threwValue, and
+ // getTempRet0/setTempRet0 function which are used in common for both
+ // exception handling and setjmp/longjmp handling
+ ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__");
+ ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue");
+ GetTempRet0Func =
+ Function::Create(FunctionType::get(IRB.getInt32Ty(), false),
+ GlobalValue::ExternalLinkage, "getTempRet0", &M);
+ SetTempRet0Func = Function::Create(
+ FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false),
+ GlobalValue::ExternalLinkage, "setTempRet0", &M);
+ GetTempRet0Func->setDoesNotThrow();
+ SetTempRet0Func->setDoesNotThrow();
bool Changed = false;
@@ -721,22 +675,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
if (DoSjLj) {
Changed = true; // We have setjmp or longjmp somewhere
- // Register saveSetjmp function
- FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
- SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
- IRB.getInt32Ty(), Type::getInt32PtrTy(C),
- IRB.getInt32Ty()};
- FunctionType *FTy =
- FunctionType::get(Type::getInt32PtrTy(C), Params, false);
- SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
- SaveSetjmpFName, &M);
-
- // Register testSetjmp function
- Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
- FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
- TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
- TestSetjmpFName, &M);
-
if (LongjmpF) {
// Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is
// defined in JS code
@@ -746,27 +684,43 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF);
}
- FTy = FunctionType::get(IRB.getVoidTy(),
- {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
- EmLongjmpF =
- Function::Create(FTy, GlobalValue::ExternalLinkage, EmLongjmpFName, &M);
-
- // Only traverse functions that uses setjmp in order not to insert
- // unnecessary prep / cleanup code in every function
- SmallPtrSet<Function *, 8> SetjmpUsers;
- for (User *U : SetjmpF->users()) {
- auto *UI = cast<Instruction>(U);
- SetjmpUsers.insert(UI->getFunction());
+
+ if (SetjmpF) {
+ // Register saveSetjmp function
+ FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
+ SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
+ IRB.getInt32Ty(), Type::getInt32PtrTy(C),
+ IRB.getInt32Ty()};
+ FunctionType *FTy =
+ FunctionType::get(Type::getInt32PtrTy(C), Params, false);
+ SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ SaveSetjmpFName, &M);
+
+ // Register testSetjmp function
+ Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
+ FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
+ TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ TestSetjmpFName, &M);
+
+ FTy = FunctionType::get(IRB.getVoidTy(),
+ {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
+ EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ EmLongjmpFName, &M);
+
+ // Only traverse functions that uses setjmp in order not to insert
+ // unnecessary prep / cleanup code in every function
+ SmallPtrSet<Function *, 8> SetjmpUsers;
+ for (User *U : SetjmpF->users()) {
+ auto *UI = cast<Instruction>(U);
+ SetjmpUsers.insert(UI->getFunction());
+ }
+ for (Function *F : SetjmpUsers)
+ runSjLjOnFunction(*F);
}
- for (Function *F : SetjmpUsers)
- runSjLjOnFunction(*F);
}
if (!Changed) {
// Delete unused global variables and functions
- ThrewGV->eraseFromParent();
- ThrewValueGV->eraseFromParent();
- TempRet0GV->eraseFromParent();
if (ResumeF)
ResumeF->eraseFromParent();
if (EHTypeIDF)
@@ -780,12 +734,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
return false;
}
- // If we have made any changes while doing exception handling or
- // setjmp/longjmp handling, we have to create these functions for JavaScript
- // to call.
- createSetThrewFunction(M);
- createSetTempRet0Function(M);
-
return true;
}
@@ -908,8 +856,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc");
Value *Undef = UndefValue::get(LPI->getType());
Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0");
- Value *TempRet0 =
- IRB.CreateLoad(TempRet0GV, TempRet0GV->getName() + ".val");
+ Value *TempRet0 = IRB.CreateCall(GetTempRet0Func, None, "tempret0");
Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1");
LPI->replaceAllUsesWith(Pair1);
@@ -990,7 +937,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Instruction *NewSetjmpTable =
IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
Instruction *NewSetjmpTableSize =
- IRB.CreateLoad(TempRet0GV, "setjmpTableSize");
+ IRB.CreateCall(GetTempRet0Func, None, "setjmpTableSize");
SetjmpTableInsts.push_back(NewSetjmpTable);
SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
ToErase.push_back(CI);
@@ -1098,7 +1045,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// Free setjmpTable buffer before each return instruction
for (BasicBlock &BB : F) {
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
if (isa<ReturnInst>(TI))
CallInst::CreateFree(SetjmpTable, TI);
}
@@ -1112,7 +1059,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// ...
// somebb:
// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
- // setjmpTableSize = __tempRet0;
+ // setjmpTableSize = getTempRet0();
// So we need to make sure the SSA for these variables is valid so that every
// saveSetjmp and testSetjmp calls have the correct arguments.
SSAUpdater SetjmpTableSSA;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index ee708d637b25..84c877cb8d02 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -18,15 +18,15 @@
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Pass.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-lower-global-dtors"
@@ -59,6 +59,8 @@ ModulePass *llvm::createWebAssemblyLowerGlobalDtors() {
}
bool LowerGlobalDtors::runOnModule(Module &M) {
+ LLVM_DEBUG(dbgs() << "********** Lower Global Destructors **********\n");
+
GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors");
if (!GV)
return false;
@@ -77,18 +79,20 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
// Collect the contents of @llvm.global_dtors, collated by priority and
// associated symbol.
- std::map<uint16_t, MapVector<Constant *, std::vector<Constant *> > > DtorFuncs;
+ std::map<uint16_t, MapVector<Constant *, std::vector<Constant *>>> DtorFuncs;
for (Value *O : InitList->operands()) {
ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
- if (!CS) continue; // Malformed.
+ if (!CS)
+ continue; // Malformed.
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
- if (!Priority) continue; // Malformed.
+ if (!Priority)
+ continue; // Malformed.
uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX);
Constant *DtorFunc = CS->getOperand(1);
if (DtorFunc->isNullValue())
- break; // Found a null terminator, skip the rest.
+ break; // Found a null terminator, skip the rest.
Constant *Associated = CS->getOperand(2);
Associated = cast<Constant>(Associated->stripPointerCastsNoFollowAliases());
@@ -101,31 +105,23 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
// extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
LLVMContext &C = M.getContext();
PointerType *VoidStar = Type::getInt8PtrTy(C);
- Type *AtExitFuncArgs[] = { VoidStar };
- FunctionType *AtExitFuncTy = FunctionType::get(
- Type::getVoidTy(C),
- AtExitFuncArgs,
- /*isVarArg=*/false);
-
- Type *AtExitArgs[] = {
- PointerType::get(AtExitFuncTy, 0),
- VoidStar,
- VoidStar
- };
- FunctionType *AtExitTy = FunctionType::get(
- Type::getInt32Ty(C),
- AtExitArgs,
- /*isVarArg=*/false);
+ Type *AtExitFuncArgs[] = {VoidStar};
+ FunctionType *AtExitFuncTy =
+ FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs,
+ /*isVarArg=*/false);
+
+ Type *AtExitArgs[] = {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar};
+ FunctionType *AtExitTy = FunctionType::get(Type::getInt32Ty(C), AtExitArgs,
+ /*isVarArg=*/false);
Constant *AtExit = M.getOrInsertFunction("__cxa_atexit", AtExitTy);
// Declare __dso_local.
Constant *DsoHandle = M.getNamedValue("__dso_handle");
if (!DsoHandle) {
Type *DsoHandleTy = Type::getInt8Ty(C);
- GlobalVariable *Handle =
- new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true,
- GlobalVariable::ExternalWeakLinkage,
- nullptr, "__dso_handle");
+ GlobalVariable *Handle = new GlobalVariable(
+ M, DsoHandleTy, /*isConstant=*/true,
+ GlobalVariable::ExternalWeakLinkage, nullptr, "__dso_handle");
Handle->setVisibility(GlobalVariable::HiddenVisibility);
DsoHandle = Handle;
}
@@ -139,13 +135,13 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
Constant *Associated = AssociatedAndMore.first;
Function *CallDtors = Function::Create(
- AtExitFuncTy, Function::PrivateLinkage,
- "call_dtors" +
- (Priority != UINT16_MAX ?
- (Twine(".") + Twine(Priority)) : Twine()) +
- (!Associated->isNullValue() ?
- (Twine(".") + Associated->getName()) : Twine()),
- &M);
+ AtExitFuncTy, Function::PrivateLinkage,
+ "call_dtors" +
+ (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
+ : Twine()) +
+ (!Associated->isNullValue() ? (Twine(".") + Associated->getName())
+ : Twine()),
+ &M);
BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors);
for (auto Dtor : AssociatedAndMore.second)
@@ -155,29 +151,29 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
/*isVarArg=*/false);
Function *RegisterCallDtors = Function::Create(
- VoidVoid, Function::PrivateLinkage,
- "register_call_dtors" +
- (Priority != UINT16_MAX ?
- (Twine(".") + Twine(Priority)) : Twine()) +
- (!Associated->isNullValue() ?
- (Twine(".") + Associated->getName()) : Twine()),
- &M);
+ VoidVoid, Function::PrivateLinkage,
+ "register_call_dtors" +
+ (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
+ : Twine()) +
+ (!Associated->isNullValue() ? (Twine(".") + Associated->getName())
+ : Twine()),
+ &M);
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors);
BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors);
BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors);
Value *Null = ConstantPointerNull::get(VoidStar);
- Value *Args[] = { CallDtors, Null, DsoHandle };
+ Value *Args[] = {CallDtors, Null, DsoHandle};
Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB);
Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res,
Constant::getNullValue(Res->getType()));
BranchInst::Create(FailBB, RetBB, Cmp, EntryBB);
// If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave.
- // This should be very rare, because if the process is running out of memory
- // before main has even started, something is wrong.
- CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap),
- "", FailBB);
+ // This should be very rare, because if the process is running out of
+ // memory before main has even started, something is wrong.
+ CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), "",
+ FailBB);
new UnreachableInst(C, FailBB);
ReturnInst::Create(C, RetBB);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index d85db14fc679..fa862fbaa634 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -30,6 +30,21 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+// Defines llvm::WebAssembly::getStackOpcode to convert register instructions to
+// stack instructions
+#define GET_INSTRMAP_INFO 1
+#include "WebAssemblyGenInstrInfo.inc"
+
+// This disables the removal of registers when lowering into MC, as required
+// by some current tests.
+static cl::opt<bool>
+ WasmKeepRegisters("wasm-keep-registers", cl::Hidden,
+ cl::desc("WebAssembly: output stack registers in"
+ " instruction output for test purposes only."),
+ cl::init(false));
+
+static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
+
MCSymbol *
WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const GlobalValue *Global = MO.getGlobal();
@@ -40,35 +55,13 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const TargetMachine &TM = MF.getTarget();
const Function &CurrentFunc = MF.getFunction();
- SmallVector<wasm::ValType, 4> Returns;
- SmallVector<wasm::ValType, 4> Params;
-
- wasm::ValType iPTR =
- MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() ?
- wasm::ValType::I64 :
- wasm::ValType::I32;
-
- SmallVector<MVT, 4> ResultMVTs;
- ComputeLegalValueVTs(CurrentFunc, TM, FuncTy->getReturnType(), ResultMVTs);
- // WebAssembly can't currently handle returning tuples.
- if (ResultMVTs.size() <= 1)
- for (MVT ResultMVT : ResultMVTs)
- Returns.push_back(WebAssembly::toValType(ResultMVT));
- else
- Params.push_back(iPTR);
-
- for (Type *Ty : FuncTy->params()) {
- SmallVector<MVT, 4> ParamMVTs;
- ComputeLegalValueVTs(CurrentFunc, TM, Ty, ParamMVTs);
- for (MVT ParamMVT : ParamMVTs)
- Params.push_back(WebAssembly::toValType(ParamMVT));
- }
-
- if (FuncTy->isVarArg())
- Params.push_back(iPTR);
+ SmallVector<MVT, 1> ResultMVTs;
+ SmallVector<MVT, 4> ParamMVTs;
+ ComputeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs);
- WasmSym->setReturns(std::move(Returns));
- WasmSym->setParams(std::move(Params));
+ auto Signature = SignatureFromMVTs(ResultMVTs, ParamMVTs);
+ WasmSym->setSignature(Signature.get());
+ Printer.addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
}
@@ -82,10 +75,10 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
cast<MCSymbolWasm>(Printer.GetExternalSymbolSymbol(Name));
const WebAssemblySubtarget &Subtarget = Printer.getSubtarget();
- // __stack_pointer is a global variable; all other external symbols used by
- // CodeGen are functions. It's OK to hardcode knowledge of specific symbols
- // here; this method is precisely there for fetching the signatures of known
- // Clang-provided symbols.
+ // Except for the two exceptions (__stack_pointer and __cpp_exception), all
+ // other external symbols used by CodeGen are functions. It's OK to hardcode
+ // knowledge of specific symbols here; this method is precisely there for
+ // fetching the signatures of known Clang-provided symbols.
if (strcmp(Name, "__stack_pointer") == 0) {
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
WasmSym->setGlobalType(wasm::WasmGlobalType{
@@ -97,27 +90,55 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
SmallVector<wasm::ValType, 4> Returns;
SmallVector<wasm::ValType, 4> Params;
- GetSignature(Subtarget, Name, Returns, Params);
+ if (strcmp(Name, "__cpp_exception") == 0) {
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
+ // We can't confirm its signature index for now because there can be
+ // imported exceptions. Set it to be 0 for now.
+ WasmSym->setEventType(
+ {wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION, /* SigIndex */ 0});
+ // We may have multiple C++ compilation units to be linked together, each of
+ // which defines the exception symbol. To resolve them, we declare them as
+ // weak.
+ WasmSym->setWeak(true);
+ WasmSym->setExternal(true);
- WasmSym->setReturns(std::move(Returns));
- WasmSym->setParams(std::move(Params));
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ // All C++ exceptions are assumed to have a single i32 (for wasm32) or i64
+ // (for wasm64) param type and void return type. The reaon is, all C++
+ // exception values are pointers, and to share the type section with
+ // functions, exceptions are assumed to have void return type.
+ Params.push_back(Subtarget.hasAddr64() ? wasm::ValType::I64
+ : wasm::ValType::I32);
+ } else { // Function symbols
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ GetLibcallSignature(Subtarget, Name, Returns, Params);
+ }
+ auto Signature =
+ make_unique<wasm::WasmSignature>(std::move(Returns), std::move(Params));
+ WasmSym->setSignature(Signature.get());
+ Printer.addSignature(std::move(Signature));
return WasmSym;
}
MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
int64_t Offset,
- bool IsFunc) const {
+ bool IsFunc, bool IsGlob,
+ bool IsEvent) const {
MCSymbolRefExpr::VariantKind VK =
IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
- : MCSymbolRefExpr::VK_None;
+ : IsGlob ? MCSymbolRefExpr::VK_WebAssembly_GLOBAL
+ : IsEvent ? MCSymbolRefExpr::VK_WebAssembly_EVENT
+ : MCSymbolRefExpr::VK_None;
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
if (Offset != 0) {
if (IsFunc)
report_fatal_error("Function addresses with offsets not supported");
+ if (IsGlob)
+ report_fatal_error("Global indexes with offsets not supported");
+ if (IsEvent)
+ report_fatal_error("Event indexes with offsets not supported");
Expr =
MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
}
@@ -135,6 +156,8 @@ static wasm::ValType getType(const TargetRegisterClass *RC) {
return wasm::ValType::F32;
if (RC == &WebAssembly::F64RegClass)
return wasm::ValType::F64;
+ if (RC == &WebAssembly::V128RegClass)
+ return wasm::ValType::V128;
llvm_unreachable("Unexpected register class");
}
@@ -187,8 +210,10 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
Params.pop_back();
MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
- WasmSym->setReturns(std::move(Returns));
- WasmSym->setParams(std::move(Params));
+ auto Signature = make_unique<wasm::WasmSignature>(std::move(Returns),
+ std::move(Params));
+ WasmSym->setSignature(Signature.get());
+ Printer.addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
const MCExpr *Expr = MCSymbolRefExpr::create(
@@ -212,21 +237,68 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
break;
}
case MachineOperand::MO_GlobalAddress:
- assert(MO.getTargetFlags() == 0 &&
+ assert(MO.getTargetFlags() == WebAssemblyII::MO_NO_FLAG &&
"WebAssembly does not use target flags on GlobalAddresses");
MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(),
- MO.getGlobal()->getValueType()->isFunctionTy());
+ MO.getGlobal()->getValueType()->isFunctionTy(),
+ false, false);
break;
case MachineOperand::MO_ExternalSymbol:
// The target flag indicates whether this is a symbol for a
// variable or a function.
- assert((MO.getTargetFlags() & -2) == 0 &&
- "WebAssembly uses only one target flag bit on ExternalSymbols");
- MCOp = LowerSymbolOperand(GetExternalSymbolSymbol(MO), /*Offset=*/0,
- MO.getTargetFlags() & 1);
+ assert((MO.getTargetFlags() & ~WebAssemblyII::MO_SYMBOL_MASK) == 0 &&
+ "WebAssembly uses only symbol flags on ExternalSymbols");
+ MCOp = LowerSymbolOperand(
+ GetExternalSymbolSymbol(MO), /*Offset=*/0,
+ (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_FUNCTION) != 0,
+ (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_GLOBAL) != 0,
+ (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_EVENT) != 0);
+ break;
+ case MachineOperand::MO_MCSymbol:
+ // This is currently used only for LSDA symbols (GCC_except_table),
+ // because global addresses or other external symbols are handled above.
+ assert(MO.getTargetFlags() == 0 &&
+ "WebAssembly does not use target flags on MCSymbol");
+ MCOp = LowerSymbolOperand(MO.getMCSymbol(), /*Offset=*/0, false, false,
+ false);
break;
}
OutMI.addOperand(MCOp);
}
+
+ if (!WasmKeepRegisters)
+ removeRegisterOperands(MI, OutMI);
+}
+
+static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI) {
+ // Remove all uses of stackified registers to bring the instruction format
+ // into its final stack form used thruout MC, and transition opcodes to
+ // their _S variant.
+ // We do this seperate from the above code that still may need these
+ // registers for e.g. call_indirect signatures.
+ // See comments in lib/Target/WebAssembly/WebAssemblyInstrFormats.td for
+ // details.
+ // TODO: the code above creates new registers which are then removed here.
+ // That code could be slightly simplified by not doing that, though maybe
+ // it is simpler conceptually to keep the code above in "register mode"
+ // until this transition point.
+ // FIXME: we are not processing inline assembly, which contains register
+ // operands, because it is used by later target generic code.
+ if (MI->isDebugInstr() || MI->isLabel() || MI->isInlineAsm())
+ return;
+
+ // Transform to _S instruction.
+ auto RegOpcode = OutMI.getOpcode();
+ auto StackOpcode = WebAssembly::getStackOpcode(RegOpcode);
+ assert(StackOpcode != -1 && "Failed to stackify instruction");
+ OutMI.setOpcode(StackOpcode);
+
+ // Remove register operands.
+ for (auto I = OutMI.getNumOperands(); I; --I) {
+ auto &MO = OutMI.getOperand(I - 1);
+ if (MO.isReg()) {
+ OutMI.erase(&MO);
+ }
+ }
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index 41b4313bb38c..fa7a0ea61b3b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -33,8 +33,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
- MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset,
- bool IsFunc) const;
+ MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset, bool IsFunc,
+ bool IsGlob, bool IsEvent) const;
public:
WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index e511e574050f..0157af0f8510 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -43,20 +43,38 @@ void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
}
}
-void llvm::ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
+void llvm::ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+ const TargetMachine &TM,
SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results) {
- ComputeLegalValueVTs(F, TM, F.getReturnType(), Results);
+ ComputeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
+ MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits());
if (Results.size() > 1) {
// WebAssembly currently can't lower returns of multiple values without
// demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So
// replace multiple return values with a pointer parameter.
Results.clear();
- Params.push_back(
- MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()));
+ Params.push_back(PtrVT);
}
- for (auto &Arg : F.args())
- ComputeLegalValueVTs(F, TM, Arg.getType(), Params);
+ for (auto *Param : Ty->params())
+ ComputeLegalValueVTs(F, TM, Param, Params);
+ if (Ty->isVarArg())
+ Params.push_back(PtrVT);
+}
+
+void llvm::ValTypesFromMVTs(const ArrayRef<MVT> &In,
+ SmallVectorImpl<wasm::ValType> &Out) {
+ for (MVT Ty : In)
+ Out.push_back(WebAssembly::toValType(Ty));
+}
+
+std::unique_ptr<wasm::WasmSignature>
+llvm::SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+ const SmallVectorImpl<MVT> &Params) {
+ auto Sig = make_unique<wasm::WasmSignature>();
+ ValTypesFromMVTs(Results, Sig->Returns);
+ ValTypesFromMVTs(Params, Sig->Params);
+ return Sig;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index a60b10fc5309..4be4beb85d04 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -17,7 +17,9 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCSymbolWasm.h"
namespace llvm {
@@ -50,7 +52,7 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
// overaligned values on the user stack.
unsigned BasePtrVreg = -1U;
- public:
+public:
explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
~WebAssemblyFunctionInfo() override;
@@ -60,7 +62,10 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
void addResult(MVT VT) { Results.push_back(VT); }
const std::vector<MVT> &getResults() const { return Results; }
- void clearParamsAndResults() { Params.clear(); Results.clear(); }
+ void clearParamsAndResults() {
+ Params.clear();
+ Results.clear();
+ }
void setNumLocals(size_t NumLocals) { Locals.resize(NumLocals, MVT::i32); }
void setLocal(size_t i, MVT VT) { Locals[i] = VT; }
@@ -115,13 +120,22 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
}
};
-void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
- Type *Ty, SmallVectorImpl<MVT> &ValueVTs);
+void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
+ SmallVectorImpl<MVT> &ValueVTs);
-void ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
- SmallVectorImpl<MVT> &Params,
+// Compute the signature for a given FunctionType (Ty). Note that it's not the
+// signature for F (F is just used to get varous context)
+void ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+ const TargetMachine &TM, SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results);
+void ValTypesFromMVTs(const ArrayRef<MVT> &In,
+ SmallVectorImpl<wasm::ValType> &Out);
+
+std::unique_ptr<wasm::WasmSignature>
+SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+ const SmallVectorImpl<MVT> &Params);
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index 893e8484c4c6..c4b5e96db0c7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -1,4 +1,4 @@
-//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===//
+//== WebAssemblyMemIntrinsicResults.cpp - Optimize memory intrinsic results ==//
//
// The LLVM Compiler Infrastructure
//
@@ -8,19 +8,22 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements an optimization pass using store result values.
+/// This file implements an optimization pass using memory intrinsic results.
///
-/// WebAssembly's store instructions return the stored value. This is to enable
-/// an optimization wherein uses of the stored value can be replaced by uses of
-/// the store's result value, making the stored value register more likely to
-/// be single-use, thus more likely to be useful to register stackifying, and
-/// potentially also exposing the store to register stackifying. These both can
-/// reduce get_local/set_local traffic.
+/// Calls to memory intrinsics (memcpy, memmove, memset) return the destination
+/// address. They are in the form of
+/// %dst_new = call @memcpy %dst, %src, %len
+/// where %dst and %dst_new registers contain the same value.
///
-/// This pass also performs this optimization for memcpy, memmove, and memset
-/// calls, since the LLVM intrinsics for these return void so they can't use the
-/// returned attribute and consequently aren't handled by the OptimizeReturned
-/// pass.
+/// This is to enable an optimization wherein uses of the %dst register used in
+/// the parameter can be replaced by uses of the %dst_new register used in the
+/// result, making the %dst register more likely to be single-use, thus more
+/// likely to be useful to register stackifying, and potentially also exposing
+/// the call instruction itself to register stackifying. These both can reduce
+/// local.get/local.set traffic.
+///
+/// The LLVM intrinsics for these return void so they can't use the returned
+/// attribute and consequently aren't handled by the OptimizeReturned pass.
///
//===----------------------------------------------------------------------===//
@@ -38,15 +41,17 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-#define DEBUG_TYPE "wasm-store-results"
+#define DEBUG_TYPE "wasm-mem-intrinsic-results"
namespace {
-class WebAssemblyStoreResults final : public MachineFunctionPass {
+class WebAssemblyMemIntrinsicResults final : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
+ WebAssemblyMemIntrinsicResults() : MachineFunctionPass(ID) {}
- StringRef getPassName() const override { return "WebAssembly Store Results"; }
+ StringRef getPassName() const override {
+ return "WebAssembly Memory Intrinsic Results";
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -67,12 +72,13 @@ private:
};
} // end anonymous namespace
-char WebAssemblyStoreResults::ID = 0;
-INITIALIZE_PASS(WebAssemblyStoreResults, DEBUG_TYPE,
- "Optimize store result values for WebAssembly", false, false)
+char WebAssemblyMemIntrinsicResults::ID = 0;
+INITIALIZE_PASS(WebAssemblyMemIntrinsicResults, DEBUG_TYPE,
+ "Optimize memory intrinsic result values for WebAssembly",
+ false, false)
-FunctionPass *llvm::createWebAssemblyStoreResults() {
- return new WebAssemblyStoreResults();
+FunctionPass *llvm::createWebAssemblyMemIntrinsicResults() {
+ return new WebAssemblyMemIntrinsicResults();
}
// Replace uses of FromReg with ToReg if they are dominated by MI.
@@ -91,7 +97,8 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
SmallVector<SlotIndex, 4> Indices;
- for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end(); I != E;) {
+ for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end();
+ I != E;) {
MachineOperand &O = *I++;
MachineInstr *Where = O.getParent();
@@ -132,9 +139,9 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
// If we replaced all dominated uses, FromReg is now killed at MI.
if (!FromLI->liveAt(FromIdx.getDeadSlot()))
- MI.addRegisterKilled(FromReg,
- MBB.getParent()->getSubtarget<WebAssemblySubtarget>()
- .getRegisterInfo());
+ MI.addRegisterKilled(FromReg, MBB.getParent()
+ ->getSubtarget<WebAssemblySubtarget>()
+ .getRegisterInfo());
}
return Changed;
@@ -142,8 +149,7 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
const MachineRegisterInfo &MRI,
- MachineDominatorTree &MDT,
- LiveIntervals &LIS,
+ MachineDominatorTree &MDT, LiveIntervals &LIS,
const WebAssemblyTargetLowering &TLI,
const TargetLibraryInfo &LibInfo) {
MachineOperand &Op1 = MI.getOperand(1);
@@ -164,14 +170,14 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
unsigned FromReg = MI.getOperand(2).getReg();
unsigned ToReg = MI.getOperand(0).getReg();
if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
- report_fatal_error("Store results: call to builtin function with wrong "
- "signature, from/to mismatch");
+ report_fatal_error("Memory Intrinsic results: call to builtin function "
+ "with wrong signature, from/to mismatch");
return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
}
-bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG({
- dbgs() << "********** Store Results **********\n"
+ dbgs() << "********** Memory Intrinsic Results **********\n"
<< "********** Function: " << MF.getName() << '\n';
});
@@ -186,7 +192,8 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
// We don't preserve SSA form.
MRI.leaveSSA();
- assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking");
+ assert(MRI.tracksLiveness() &&
+ "MemIntrinsicResults expects liveness tracking");
for (auto &MBB : MF) {
LLVM_DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n');
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 04ac22a589ea..3d0a15244ee0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -65,7 +65,8 @@ FunctionPass *llvm::createWebAssemblyOptimizeLiveIntervals() {
return new WebAssemblyOptimizeLiveIntervals();
}
-bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
+ MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Optimize LiveIntervals **********\n"
"********** Function: "
<< MF.getName() << '\n');
@@ -76,11 +77,10 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF)
// We don't preserve SSA form.
MRI.leaveSSA();
- assert(MRI.tracksLiveness() &&
- "OptimizeLiveIntervals expects liveness");
+ assert(MRI.tracksLiveness() && "OptimizeLiveIntervals expects liveness");
// Split multiple-VN LiveIntervals into multiple LiveIntervals.
- SmallVector<LiveInterval*, 4> SplitLIs;
+ SmallVector<LiveInterval *, 4> SplitLIs;
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI.reg_nodbg_empty(Reg))
@@ -94,7 +94,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF)
// instructions to satisfy LiveIntervals' requirement that all uses be
// dominated by defs. Now that LiveIntervals has computed which of these
// defs are actually needed and which are dead, remove the dead ones.
- for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE; ) {
+ for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE;) {
MachineInstr *MI = &*MII++;
if (MI->isImplicitDef() && MI->getOperand(0).isDead()) {
LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 113ee2532bce..2c018d0785a7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -74,6 +74,10 @@ void OptimizeReturned::visitCallSite(CallSite CS) {
}
bool OptimizeReturned::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "********** Optimize returned Attributes **********\n"
+ "********** Function: "
+ << F.getName() << '\n');
+
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
visit(F);
return true;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index a54484407805..2dfd85953f14 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -192,11 +192,21 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32,
WebAssembly::COPY_V128);
break;
+ case WebAssembly::RETURN_v2i64:
+ Changed |= MaybeRewriteToFallthrough(
+ MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64,
+ WebAssembly::COPY_V128);
+ break;
case WebAssembly::RETURN_v4f32:
Changed |= MaybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32,
WebAssembly::COPY_V128);
break;
+ case WebAssembly::RETURN_v2f64:
+ Changed |= MaybeRewriteToFallthrough(
+ MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64,
+ WebAssembly::COPY_V128);
+ break;
case WebAssembly::RETURN_VOID:
Changed |= MaybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID,
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index e44e7057e233..0be0ba657830 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -70,7 +70,8 @@ static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
return false;
}
-bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
+ MachineFunction &MF) {
LLVM_DEBUG({
dbgs() << "********** Prepare For LiveIntervals **********\n"
<< "********** Function: " << MF.getName() << '\n';
@@ -112,7 +113,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &M
// Move ARGUMENT_* instructions to the top of the entry block, so that their
// liveness reflects the fact that these really are live-in values.
- for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) {
+ for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) {
MachineInstr &MI = *MII++;
if (WebAssembly::isArgument(MI)) {
MI.removeFromParent();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index d69a27937105..d97b13a8d699 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -118,16 +118,15 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
// registers), by weight next, and then by position.
// TODO: Investigate more intelligent sorting heuristics. For starters, we
// should try to coalesce adjacent live intervals before non-adjacent ones.
- llvm::sort(SortedIntervals.begin(), SortedIntervals.end(),
- [MRI](LiveInterval *LHS, LiveInterval *RHS) {
- if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
- return MRI->isLiveIn(LHS->reg);
- if (LHS->weight != RHS->weight)
- return LHS->weight > RHS->weight;
- if (LHS->empty() || RHS->empty())
- return !LHS->empty() && RHS->empty();
- return *LHS < *RHS;
- });
+ llvm::sort(SortedIntervals, [MRI](LiveInterval *LHS, LiveInterval *RHS) {
+ if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
+ return MRI->isLiveIn(LHS->reg);
+ if (LHS->weight != RHS->weight)
+ return LHS->weight > RHS->weight;
+ if (LHS->empty() || RHS->empty())
+ return !LHS->empty() && RHS->empty();
+ return *LHS < *RHS;
+ });
LLVM_DEBUG(dbgs() << "Coloring register intervals:\n");
SmallVector<unsigned, 16> SlotMapping(SortedIntervals.size(), -1u);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 9f5d5bd87831..1eb32ed64494 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -22,9 +22,11 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
#include "WebAssembly.h"
+#include "WebAssemblyDebugValueManager.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -97,11 +99,11 @@ static void ImposeStackOrdering(MachineInstr *MI) {
static void ConvertImplicitDefToConstZero(MachineInstr *MI,
MachineRegisterInfo &MRI,
const TargetInstrInfo *TII,
- MachineFunction &MF) {
+ MachineFunction &MF,
+ LiveIntervals &LIS) {
assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
- const auto *RegClass =
- MRI.getRegClass(MI->getOperand(0).getReg());
+ const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
if (RegClass == &WebAssembly::I32RegClass) {
MI->setDesc(TII->get(WebAssembly::CONST_I32));
MI->addOperand(MachineOperand::CreateImm(0));
@@ -118,6 +120,14 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI,
ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
Type::getDoubleTy(MF.getFunction().getContext())));
MI->addOperand(MachineOperand::CreateFPImm(Val));
+ } else if (RegClass == &WebAssembly::V128RegClass) {
+ unsigned TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32));
+ MI->addOperand(MachineOperand::CreateReg(TempReg, false));
+ MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(WebAssembly::CONST_I32), TempReg)
+ .addImm(0);
+ LIS.InsertMachineInstrInMaps(*Const);
} else {
llvm_unreachable("Unexpected reg class");
}
@@ -172,29 +182,24 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
// Check for stores.
if (MI.mayStore()) {
Write = true;
-
- // Check for stores to __stack_pointer.
- for (auto MMO : MI.memoperands()) {
- const MachinePointerInfo &MPI = MMO->getPointerInfo();
- if (MPI.V.is<const PseudoSourceValue *>()) {
- auto PSV = MPI.V.get<const PseudoSourceValue *>();
- if (const ExternalSymbolPseudoSourceValue *EPSV =
- dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
- if (StringRef(EPSV->getSymbol()) == "__stack_pointer") {
- StackPointer = true;
- }
- }
- }
} else if (MI.hasOrderedMemoryRef()) {
switch (MI.getOpcode()) {
- case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
- case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
- case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
- case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
- case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
- case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
- case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
- case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+ case WebAssembly::DIV_S_I32:
+ case WebAssembly::DIV_S_I64:
+ case WebAssembly::REM_S_I32:
+ case WebAssembly::REM_S_I64:
+ case WebAssembly::DIV_U_I32:
+ case WebAssembly::DIV_U_I64:
+ case WebAssembly::REM_U_I32:
+ case WebAssembly::REM_U_I64:
+ case WebAssembly::I32_TRUNC_S_F32:
+ case WebAssembly::I64_TRUNC_S_F32:
+ case WebAssembly::I32_TRUNC_S_F64:
+ case WebAssembly::I64_TRUNC_S_F64:
+ case WebAssembly::I32_TRUNC_U_F32:
+ case WebAssembly::I64_TRUNC_U_F32:
+ case WebAssembly::I32_TRUNC_U_F64:
+ case WebAssembly::I64_TRUNC_U_F64:
// These instruction have hasUnmodeledSideEffects() returning true
// because they trap on overflow and invalid so they can't be arbitrarily
// moved, however hasOrderedMemoryRef() interprets this plus their lack
@@ -214,14 +219,22 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
// Check for side effects.
if (MI.hasUnmodeledSideEffects()) {
switch (MI.getOpcode()) {
- case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
- case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
- case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
- case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
- case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
- case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
- case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
- case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+ case WebAssembly::DIV_S_I32:
+ case WebAssembly::DIV_S_I64:
+ case WebAssembly::REM_S_I32:
+ case WebAssembly::REM_S_I64:
+ case WebAssembly::DIV_U_I32:
+ case WebAssembly::DIV_U_I64:
+ case WebAssembly::REM_U_I32:
+ case WebAssembly::REM_U_I64:
+ case WebAssembly::I32_TRUNC_S_F32:
+ case WebAssembly::I64_TRUNC_S_F32:
+ case WebAssembly::I32_TRUNC_S_F64:
+ case WebAssembly::I64_TRUNC_S_F64:
+ case WebAssembly::I32_TRUNC_U_F32:
+ case WebAssembly::I64_TRUNC_U_F32:
+ case WebAssembly::I32_TRUNC_U_F64:
+ case WebAssembly::I64_TRUNC_U_F64:
// These instructions have hasUnmodeledSideEffects() returning true
// because they trap on overflow and invalid so they can't be arbitrarily
// moved, however in the specific case of register stackifying, it is safe
@@ -233,22 +246,15 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
}
}
+ // Check for writes to __stack_pointer global.
+ if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 &&
+ strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
+ StackPointer = true;
+
// Analyze calls.
if (MI.isCall()) {
- switch (MI.getOpcode()) {
- case WebAssembly::CALL_VOID:
- case WebAssembly::CALL_INDIRECT_VOID:
- QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
- break;
- case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
- case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
- case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
- case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
- QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
- break;
- default:
- llvm_unreachable("unexpected call opcode");
- }
+ unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI);
+ QueryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
}
}
@@ -263,8 +269,7 @@ static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
// LiveIntervals to handle complex cases.
static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
const MachineRegisterInfo &MRI,
- const LiveIntervals &LIS)
-{
+ const LiveIntervals &LIS) {
// Most registers are in SSA form here so we try a quick MRI query first.
if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
return Def;
@@ -280,17 +285,16 @@ static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
// Test whether Reg, as defined at Def, has exactly one use. This is a
// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
// to handle complex cases.
-static bool HasOneUse(unsigned Reg, MachineInstr *Def,
- MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
- LiveIntervals &LIS) {
+static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
+ MachineDominatorTree &MDT, LiveIntervals &LIS) {
// Most registers are in SSA form here so we try a quick MRI query first.
if (MRI.hasOneUse(Reg))
return true;
bool HasOne = false;
const LiveInterval &LI = LIS.getInterval(Reg);
- const VNInfo *DefVNI = LI.getVNInfoAt(
- LIS.getInstructionIndex(*Def).getRegSlot());
+ const VNInfo *DefVNI =
+ LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
assert(DefVNI);
for (auto &I : MRI.use_nodbg_operands(Reg)) {
const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
@@ -403,7 +407,6 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
if (UseVNI != OneUseVNI)
continue;
- const MachineInstr *OneUseInst = OneUse.getParent();
if (UseInst == OneUseInst) {
// Another use in the same instruction. We need to ensure that the one
// selected use happens "before" it.
@@ -415,8 +418,8 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
// Actually, dominating is over-conservative. Test that the use would
// happen after the one selected use in the stack evaluation order.
//
- // This is needed as a consequence of using implicit get_locals for
- // uses and implicit set_locals for defs.
+ // This is needed as a consequence of using implicit local.gets for
+ // uses and implicit local.sets for defs.
if (UseInst->getDesc().getNumDefs() == 0)
return false;
const MachineOperand &MO = UseInst->getOperand(0);
@@ -426,8 +429,8 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
!MFI.isVRegStackified(DefReg))
return false;
- assert(MRI.hasOneUse(DefReg));
- const MachineOperand &NewUse = *MRI.use_begin(DefReg);
+ assert(MRI.hasOneNonDBGUse(DefReg));
+ const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg);
const MachineInstr *NewUseInst = NewUse.getParent();
if (NewUseInst == OneUseInst) {
if (&OneUse > &NewUse)
@@ -459,22 +462,23 @@ static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
// Shrink LI to its uses, cleaning up LI.
static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
if (LIS.shrinkToUses(&LI)) {
- SmallVector<LiveInterval*, 4> SplitLIs;
+ SmallVector<LiveInterval *, 4> SplitLIs;
LIS.splitSeparateComponents(LI, SplitLIs);
}
}
/// A single-use def in the same block with no intervening memory or register
/// dependencies; move the def down and nest it with the current instruction.
-static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
- MachineInstr *Def,
- MachineBasicBlock &MBB,
+static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
+ MachineInstr *Def, MachineBasicBlock &MBB,
MachineInstr *Insert, LiveIntervals &LIS,
WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI) {
LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
+ WebAssemblyDebugValueManager DefDIs(Def);
MBB.splice(Insert, &MBB, Def);
+ DefDIs.move(Insert);
LIS.handleMove(*Def);
if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
@@ -499,6 +503,8 @@ static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
MFI.stackifyVReg(NewReg);
+ DefDIs.updateReg(NewReg);
+
LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
}
@@ -516,6 +522,8 @@ static MachineInstr *RematerializeCheapDef(
LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
+ WebAssemblyDebugValueManager DefDIs(&Def);
+
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
Op.setReg(NewReg);
@@ -536,6 +544,7 @@ static MachineInstr *RematerializeCheapDef(
}
// If that was the last use of the original, delete the original.
+ // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
if (IsDead) {
LLVM_DEBUG(dbgs() << " - Deleting original\n");
SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
@@ -543,6 +552,11 @@ static MachineInstr *RematerializeCheapDef(
LIS.removeInterval(Reg);
LIS.RemoveMachineInstrFromMaps(Def);
Def.eraseFromParent();
+
+ DefDIs.move(&*Insert);
+ DefDIs.updateReg(NewReg);
+ } else {
+ DefDIs.clone(&*Insert, NewReg);
}
return Clone;
@@ -566,7 +580,7 @@ static MachineInstr *RematerializeCheapDef(
/// INST ..., Reg, ...
/// INST ..., Reg, ...
///
-/// with DefReg and TeeReg stackified. This eliminates a get_local from the
+/// with DefReg and TeeReg stackified. This eliminates a local.get from the
/// resulting code.
static MachineInstr *MoveAndTeeForMultiUse(
unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
@@ -574,6 +588,8 @@ static MachineInstr *MoveAndTeeForMultiUse(
MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
+ WebAssemblyDebugValueManager DefDIs(Def);
+
// Move Def into place.
MBB.splice(Insert, &MBB, Def);
LIS.handleMove(*Def);
@@ -592,6 +608,8 @@ static MachineInstr *MoveAndTeeForMultiUse(
SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
+ DefDIs.move(Insert);
+
// Tell LiveIntervals we moved the original vreg def from Def to Tee.
LiveInterval &LI = LIS.getInterval(Reg);
LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
@@ -608,6 +626,9 @@ static MachineInstr *MoveAndTeeForMultiUse(
ImposeStackOrdering(Def);
ImposeStackOrdering(Tee);
+ DefDIs.clone(Tee, DefReg);
+ DefDIs.clone(Insert, TeeReg);
+
LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
return Def;
@@ -672,8 +693,8 @@ public:
/// operand in the tree that we haven't visited yet. Moving a definition of
/// Reg to a point in the tree after that would change its value.
///
- /// This is needed as a consequence of using implicit get_locals for
- /// uses and implicit set_locals for defs.
+ /// This is needed as a consequence of using implicit local.gets for
+ /// uses and implicit local.sets for defs.
bool IsOnStack(unsigned Reg) const {
for (const RangeTy &Range : Worklist)
for (const MachineOperand &MO : Range)
@@ -687,9 +708,9 @@ public:
/// tried for the current instruction and didn't work.
class CommutingState {
/// There are effectively three states: the initial state where we haven't
- /// started commuting anything and we don't know anything yet, the tenative
+ /// started commuting anything and we don't know anything yet, the tentative
/// state where we've commuted the operands of the current instruction and are
- /// revisting it, and the declined state where we've reverted the operands
+ /// revisiting it, and the declined state where we've reverted the operands
/// back to their original order and will no longer commute it further.
bool TentativelyCommuting;
bool Declined;
@@ -831,7 +852,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// to a constant 0 so that the def is explicit, and the push/pop
// correspondence is maintained.
if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
- ConvertImplicitDefToConstZero(Insert, MRI, TII, MF);
+ ConvertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
// We stackified an operand. Add the defining instruction's operands to
// the worklist stack now to continue to build an ever deeper tree.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index b6481ac2d4ae..1f0870865b06 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 29f42b96b249..a7c3d177724d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -63,6 +63,6 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32, I32_0)>;
def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64, I64_0)>;
def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
-def V128 : WebAssemblyRegClass<[v4f32, v4i32, v16i8, v8i16], 128, (add V128_0)>;
+def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128,
+ (add V128_0)>;
def EXCEPT_REF : WebAssemblyRegClass<[ExceptRef], 0, (add EXCEPT_REF_0)>;
-
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index f432b367d156..e5a3e47a3bcd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -54,8 +54,8 @@ private:
char WebAssemblyReplacePhysRegs::ID = 0;
INITIALIZE_PASS(WebAssemblyReplacePhysRegs, DEBUG_TYPE,
- "Replace physical registers with virtual registers",
- false, false)
+ "Replace physical registers with virtual registers", false,
+ false)
FunctionPass *llvm::createWebAssemblyReplacePhysRegs() {
return new WebAssemblyReplacePhysRegs();
@@ -86,7 +86,7 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
// Replace explicit uses of the physical register with a virtual register.
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg);
unsigned VReg = WebAssembly::NoRegister;
- for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E; ) {
+ for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E;) {
MachineOperand &MO = *I++;
if (!MO.isImplicit()) {
if (VReg == WebAssembly::NoRegister)
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index fe8a5e4c06f1..6cf81a9d77b3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -88,7 +88,6 @@ enum RuntimeLibcallSignature {
unsupported
};
-
struct RuntimeLibcallSignatureTable {
std::vector<RuntimeLibcallSignature> Table;
@@ -486,18 +485,17 @@ struct StaticLibcallNameMap {
} // end anonymous namespace
-
-
-void llvm::GetSignature(const WebAssemblySubtarget &Subtarget,
- RTLIB::Libcall LC, SmallVectorImpl<wasm::ValType> &Rets,
- SmallVectorImpl<wasm::ValType> &Params) {
+void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+ RTLIB::Libcall LC,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params) {
assert(Rets.empty());
assert(Params.empty());
wasm::ValType iPTR =
Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
- auto& Table = RuntimeLibcallSignatures->Table;
+ auto &Table = RuntimeLibcallSignatures->Table;
switch (Table[LC]) {
case func:
break;
@@ -834,11 +832,12 @@ void llvm::GetSignature(const WebAssemblySubtarget &Subtarget,
static ManagedStatic<StaticLibcallNameMap> LibcallNameMap;
// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed
// other than here, just roll its logic into this version.
-void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, const char *Name,
- SmallVectorImpl<wasm::ValType> &Rets,
- SmallVectorImpl<wasm::ValType> &Params) {
- auto& Map = LibcallNameMap->Map;
+void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+ const char *Name,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params) {
+ auto &Map = LibcallNameMap->Map;
auto val = Map.find(Name);
assert(val != Map.end() && "unexpected runtime library name");
- return GetSignature(Subtarget, val->second, Rets, Params);
+ return GetLibcallSignature(Subtarget, val->second, Rets, Params);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
index 2ba65ff5b716..7fa70bea96de 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
@@ -23,14 +23,15 @@ namespace llvm {
class WebAssemblySubtarget;
-extern void GetSignature(const WebAssemblySubtarget &Subtarget,
- RTLIB::Libcall LC,
- SmallVectorImpl<wasm::ValType> &Rets,
- SmallVectorImpl<wasm::ValType> &Params);
-
-extern void GetSignature(const WebAssemblySubtarget &Subtarget,
- const char *Name, SmallVectorImpl<wasm::ValType> &Rets,
- SmallVectorImpl<wasm::ValType> &Params);
+extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+ RTLIB::Libcall LC,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params);
+
+extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+ const char *Name,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params);
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index 14221993603a..c95af88c6f43 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -60,8 +60,7 @@ static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
assert(MI.hasOneMemOperand() &&
"Load and store instructions have exactly one mem operand");
assert((*MI.memoperands_begin())->getSize() ==
- (UINT64_C(1)
- << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
+ (UINT64_C(1) << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
"Default p2align value should be natural");
assert(MI.getDesc().OpInfo[OperandNo].OperandType ==
WebAssembly::OPERAND_P2ALIGN &&
@@ -69,8 +68,8 @@ static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment());
// WebAssembly does not currently support supernatural alignment.
- P2Align = std::min(
- P2Align, uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
+ P2Align = std::min(P2Align,
+ uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
MI.getOperand(OperandNo).setImm(P2Align);
}
@@ -90,6 +89,12 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::LOAD_I64:
case WebAssembly::LOAD_F32:
case WebAssembly::LOAD_F64:
+ case WebAssembly::LOAD_v16i8:
+ case WebAssembly::LOAD_v8i16:
+ case WebAssembly::LOAD_v4i32:
+ case WebAssembly::LOAD_v2i64:
+ case WebAssembly::LOAD_v4f32:
+ case WebAssembly::LOAD_v2f64:
case WebAssembly::LOAD8_S_I32:
case WebAssembly::LOAD8_U_I32:
case WebAssembly::LOAD16_S_I32:
@@ -119,6 +124,8 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
@@ -131,6 +138,8 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW_ADD_I32:
case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
case WebAssembly::ATOMIC_RMW_SUB_I32:
@@ -143,18 +152,30 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
case WebAssembly::ATOMIC_RMW_XCHG_I32:
case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW_ADD_I64:
case WebAssembly::ATOMIC_RMW_SUB_I64:
case WebAssembly::ATOMIC_RMW_AND_I64:
case WebAssembly::ATOMIC_RMW_OR_I64:
case WebAssembly::ATOMIC_RMW_XOR_I64:
case WebAssembly::ATOMIC_RMW_XCHG_I64:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_NOTIFY:
+ case WebAssembly::ATOMIC_WAIT_I32:
+ case WebAssembly::ATOMIC_WAIT_I64:
RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
break;
case WebAssembly::STORE_I32:
case WebAssembly::STORE_I64:
case WebAssembly::STORE_F32:
case WebAssembly::STORE_F64:
+ case WebAssembly::STORE_v16i8:
+ case WebAssembly::STORE_v8i16:
+ case WebAssembly::STORE_v4i32:
+ case WebAssembly::STORE_v2i64:
+ case WebAssembly::STORE_v4f32:
+ case WebAssembly::STORE_v2f64:
case WebAssembly::STORE8_I32:
case WebAssembly::STORE16_I32:
case WebAssembly::STORE8_I64:
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index d6af0fb219d7..98133e2153a0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -40,10 +40,9 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
const std::string &CPU,
const std::string &FS,
const TargetMachine &TM)
- : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false),
- HasAtomics(false), HasNontrappingFPToInt(false), HasSignExt(false),
- HasExceptionHandling(false), CPUString(CPU), TargetTriple(TT),
- FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+ : WebAssemblyGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
+ TargetTriple(TT), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
TLInfo(TM, *this) {}
bool WebAssemblySubtarget::enableMachineScheduler() const {
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index b170dbff3b32..0a0c04609ac4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -29,11 +29,16 @@
namespace llvm {
class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
- bool HasSIMD128;
- bool HasAtomics;
- bool HasNontrappingFPToInt;
- bool HasSignExt;
- bool HasExceptionHandling;
+ enum SIMDEnum {
+ NoSIMD,
+ SIMD128,
+ UnimplementedSIMD128,
+ } SIMDLevel = NoSIMD;
+
+ bool HasAtomics = false;
+ bool HasNontrappingFPToInt = false;
+ bool HasSignExt = false;
+ bool HasExceptionHandling = false;
/// String name of used CPU.
std::string CPUString;
@@ -77,7 +82,10 @@ public:
// Predicates used by WebAssemblyInstrInfo.td.
bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
- bool hasSIMD128() const { return HasSIMD128; }
+ bool hasSIMD128() const { return SIMDLevel >= SIMD128; }
+ bool hasUnimplementedSIMD128() const {
+ return SIMDLevel >= UnimplementedSIMD128;
+ }
bool hasAtomics() const { return HasAtomics; }
bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
bool hasSignExt() const { return HasSignExt; }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 7c10f022cbbc..3bf8dd40892c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -58,10 +58,11 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
initializeOptimizeReturnedPass(PR);
initializeWebAssemblyArgumentMovePass(PR);
initializeWebAssemblySetP2AlignOperandsPass(PR);
+ initializeWebAssemblyEHRestoreStackPointerPass(PR);
initializeWebAssemblyReplacePhysRegsPass(PR);
initializeWebAssemblyPrepareForLiveIntervalsPass(PR);
initializeWebAssemblyOptimizeLiveIntervalsPass(PR);
- initializeWebAssemblyStoreResultsPass(PR);
+ initializeWebAssemblyMemIntrinsicResultsPass(PR);
initializeWebAssemblyRegStackifyPass(PR);
initializeWebAssemblyRegColoringPass(PR);
initializeWebAssemblyExplicitLocalsPass(PR);
@@ -81,8 +82,12 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
//===----------------------------------------------------------------------===//
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::PIC_;
+ if (!RM.hasValue()) {
+ // Default to static relocation model. This should always be more optimial
+ // than PIC since the static linker can determine all global addresses and
+ // assume direct function calls.
+ return Reloc::Static;
+ }
return *RM;
}
@@ -96,7 +101,7 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
: "e-m:e-p:32:32-i64:64-n32:64-S128",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
- CM ? *CM : CodeModel::Large, OL),
+ getEffectiveCodeModel(CM, CodeModel::Large), OL),
TLOF(new WebAssemblyTargetObjectFile()) {
// WebAssembly type-checks instructions, but a noreturn function with a return
// type that doesn't match the context will cause a check failure. So we lower
@@ -149,7 +154,7 @@ class StripThreadLocal final : public ModulePass {
// pass just converts all GlobalVariables to NotThreadLocal
static char ID;
- public:
+public:
StripThreadLocal() : ModulePass(ID) {}
bool runOnModule(Module &M) override {
for (auto &GV : M.globals())
@@ -280,6 +285,9 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
void WebAssemblyPassConfig::addPreEmitPass() {
TargetPassConfig::addPreEmitPass();
+ // Restore __stack_pointer global after an exception is thrown.
+ addPass(createWebAssemblyEHRestoreStackPointer());
+
// Now that we have a prologue and epilogue and all frame indices are
// rewritten, eliminate SP and FP. This allows them to be stackified,
// colored, and numbered with the rest of the registers.
@@ -290,6 +298,12 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// order of the arguments.
addPass(createWebAssemblyCallIndirectFixup());
+ // Eliminate multiple-entry loops.
+ addPass(createWebAssemblyFixIrreducibleControlFlow());
+
+ // Do various transformations for exception handling.
+ addPass(createWebAssemblyLateEHPrepare());
+
if (getOptLevel() != CodeGenOpt::None) {
// LiveIntervals isn't commonly run this late. Re-establish preconditions.
addPass(createWebAssemblyPrepareForLiveIntervals());
@@ -297,13 +311,14 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// Depend on LiveIntervals and perform some optimizations on it.
addPass(createWebAssemblyOptimizeLiveIntervals());
- // Prepare store instructions for register stackifying.
- addPass(createWebAssemblyStoreResults());
+ // Prepare memory intrinsic calls for register stackifying.
+ addPass(createWebAssemblyMemIntrinsicResults());
// Mark registers as representing wasm's value stack. This is a key
// code-compression technique in WebAssembly. We run this pass (and
- // StoreResults above) very late, so that it sees as much code as possible,
- // including code emitted by PEI and expanded by late tail duplication.
+ // MemIntrinsicResults above) very late, so that it sees as much code as
+ // possible, including code emitted by PEI and expanded by late tail
+ // duplication.
addPass(createWebAssemblyRegStackify());
// Run the register coloring pass to reduce the total number of registers.
@@ -312,17 +327,9 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyRegColoring());
}
- // Eliminate multiple-entry loops. Do this before inserting explicit get_local
- // and set_local operators because we create a new variable that we want
- // converted into a local.
- addPass(createWebAssemblyFixIrreducibleControlFlow());
-
- // Insert explicit get_local and set_local operators.
+ // Insert explicit local.get and local.set operators.
addPass(createWebAssemblyExplicitLocals());
- // Do various transformations for exception handling
- addPass(createWebAssemblyLateEHPrepare());
-
// Sort the blocks of the CFG into topological order, a prerequisite for
// BLOCK and LOOP markers.
addPass(createWebAssemblyCFGSort());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 5944cea5abd1..ada6fb9a96d7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -27,14 +27,26 @@ const char *const WebAssembly::PersonalityWrapperFn =
bool WebAssembly::isArgument(const MachineInstr &MI) {
switch (MI.getOpcode()) {
- case WebAssembly::ARGUMENT_I32:
- case WebAssembly::ARGUMENT_I64:
- case WebAssembly::ARGUMENT_F32:
- case WebAssembly::ARGUMENT_F64:
+ case WebAssembly::ARGUMENT_i32:
+ case WebAssembly::ARGUMENT_i32_S:
+ case WebAssembly::ARGUMENT_i64:
+ case WebAssembly::ARGUMENT_i64_S:
+ case WebAssembly::ARGUMENT_f32:
+ case WebAssembly::ARGUMENT_f32_S:
+ case WebAssembly::ARGUMENT_f64:
+ case WebAssembly::ARGUMENT_f64_S:
case WebAssembly::ARGUMENT_v16i8:
+ case WebAssembly::ARGUMENT_v16i8_S:
case WebAssembly::ARGUMENT_v8i16:
+ case WebAssembly::ARGUMENT_v8i16_S:
case WebAssembly::ARGUMENT_v4i32:
+ case WebAssembly::ARGUMENT_v4i32_S:
+ case WebAssembly::ARGUMENT_v2i64:
+ case WebAssembly::ARGUMENT_v2i64_S:
case WebAssembly::ARGUMENT_v4f32:
+ case WebAssembly::ARGUMENT_v4f32_S:
+ case WebAssembly::ARGUMENT_v2f64:
+ case WebAssembly::ARGUMENT_v2f64_S:
return true;
default:
return false;
@@ -44,9 +56,15 @@ bool WebAssembly::isArgument(const MachineInstr &MI) {
bool WebAssembly::isCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::COPY_I32:
+ case WebAssembly::COPY_I32_S:
case WebAssembly::COPY_I64:
+ case WebAssembly::COPY_I64_S:
case WebAssembly::COPY_F32:
+ case WebAssembly::COPY_F32_S:
case WebAssembly::COPY_F64:
+ case WebAssembly::COPY_F64_S:
+ case WebAssembly::COPY_V128:
+ case WebAssembly::COPY_V128_S:
return true;
default:
return false;
@@ -56,9 +74,15 @@ bool WebAssembly::isCopy(const MachineInstr &MI) {
bool WebAssembly::isTee(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::TEE_I32:
+ case WebAssembly::TEE_I32_S:
case WebAssembly::TEE_I64:
+ case WebAssembly::TEE_I64_S:
case WebAssembly::TEE_F32:
+ case WebAssembly::TEE_F32_S:
case WebAssembly::TEE_F64:
+ case WebAssembly::TEE_F64_S:
+ case WebAssembly::TEE_V128:
+ case WebAssembly::TEE_V128_S:
return true;
default:
return false;
@@ -81,15 +105,29 @@ bool WebAssembly::isChild(const MachineInstr &MI,
bool WebAssembly::isCallDirect(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::CALL_VOID:
+ case WebAssembly::CALL_VOID_S:
case WebAssembly::CALL_I32:
+ case WebAssembly::CALL_I32_S:
case WebAssembly::CALL_I64:
+ case WebAssembly::CALL_I64_S:
case WebAssembly::CALL_F32:
+ case WebAssembly::CALL_F32_S:
case WebAssembly::CALL_F64:
+ case WebAssembly::CALL_F64_S:
case WebAssembly::CALL_v16i8:
+ case WebAssembly::CALL_v16i8_S:
case WebAssembly::CALL_v8i16:
+ case WebAssembly::CALL_v8i16_S:
case WebAssembly::CALL_v4i32:
+ case WebAssembly::CALL_v4i32_S:
+ case WebAssembly::CALL_v2i64:
+ case WebAssembly::CALL_v2i64_S:
case WebAssembly::CALL_v4f32:
+ case WebAssembly::CALL_v4f32_S:
+ case WebAssembly::CALL_v2f64:
+ case WebAssembly::CALL_v2f64_S:
case WebAssembly::CALL_EXCEPT_REF:
+ case WebAssembly::CALL_EXCEPT_REF_S:
return true;
default:
return false;
@@ -99,15 +137,29 @@ bool WebAssembly::isCallDirect(const MachineInstr &MI) {
bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_VOID_S:
case WebAssembly::CALL_INDIRECT_I32:
+ case WebAssembly::CALL_INDIRECT_I32_S:
case WebAssembly::CALL_INDIRECT_I64:
+ case WebAssembly::CALL_INDIRECT_I64_S:
case WebAssembly::CALL_INDIRECT_F32:
+ case WebAssembly::CALL_INDIRECT_F32_S:
case WebAssembly::CALL_INDIRECT_F64:
+ case WebAssembly::CALL_INDIRECT_F64_S:
case WebAssembly::CALL_INDIRECT_v16i8:
+ case WebAssembly::CALL_INDIRECT_v16i8_S:
case WebAssembly::CALL_INDIRECT_v8i16:
+ case WebAssembly::CALL_INDIRECT_v8i16_S:
case WebAssembly::CALL_INDIRECT_v4i32:
+ case WebAssembly::CALL_INDIRECT_v4i32_S:
+ case WebAssembly::CALL_INDIRECT_v2i64:
+ case WebAssembly::CALL_INDIRECT_v2i64_S:
case WebAssembly::CALL_INDIRECT_v4f32:
+ case WebAssembly::CALL_INDIRECT_v4f32_S:
+ case WebAssembly::CALL_INDIRECT_v2f64:
+ case WebAssembly::CALL_INDIRECT_v2f64_S:
case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
+ case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
return true;
default:
return false;
@@ -117,18 +169,54 @@ bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::CALL_VOID:
+ case WebAssembly::CALL_VOID_S:
case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_VOID_S:
return 0;
case WebAssembly::CALL_I32:
+ case WebAssembly::CALL_I32_S:
case WebAssembly::CALL_I64:
+ case WebAssembly::CALL_I64_S:
case WebAssembly::CALL_F32:
+ case WebAssembly::CALL_F32_S:
case WebAssembly::CALL_F64:
+ case WebAssembly::CALL_F64_S:
+ case WebAssembly::CALL_v16i8:
+ case WebAssembly::CALL_v16i8_S:
+ case WebAssembly::CALL_v8i16:
+ case WebAssembly::CALL_v8i16_S:
+ case WebAssembly::CALL_v4i32:
+ case WebAssembly::CALL_v4i32_S:
+ case WebAssembly::CALL_v2i64:
+ case WebAssembly::CALL_v2i64_S:
+ case WebAssembly::CALL_v4f32:
+ case WebAssembly::CALL_v4f32_S:
+ case WebAssembly::CALL_v2f64:
+ case WebAssembly::CALL_v2f64_S:
case WebAssembly::CALL_EXCEPT_REF:
+ case WebAssembly::CALL_EXCEPT_REF_S:
case WebAssembly::CALL_INDIRECT_I32:
+ case WebAssembly::CALL_INDIRECT_I32_S:
case WebAssembly::CALL_INDIRECT_I64:
+ case WebAssembly::CALL_INDIRECT_I64_S:
case WebAssembly::CALL_INDIRECT_F32:
+ case WebAssembly::CALL_INDIRECT_F32_S:
case WebAssembly::CALL_INDIRECT_F64:
+ case WebAssembly::CALL_INDIRECT_F64_S:
+ case WebAssembly::CALL_INDIRECT_v16i8:
+ case WebAssembly::CALL_INDIRECT_v16i8_S:
+ case WebAssembly::CALL_INDIRECT_v8i16:
+ case WebAssembly::CALL_INDIRECT_v8i16_S:
+ case WebAssembly::CALL_INDIRECT_v4i32:
+ case WebAssembly::CALL_INDIRECT_v4i32_S:
+ case WebAssembly::CALL_INDIRECT_v2i64:
+ case WebAssembly::CALL_INDIRECT_v2i64_S:
+ case WebAssembly::CALL_INDIRECT_v4f32:
+ case WebAssembly::CALL_INDIRECT_v4f32_S:
+ case WebAssembly::CALL_INDIRECT_v2f64:
+ case WebAssembly::CALL_INDIRECT_v2f64_S:
case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
+ case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
return 1;
default:
llvm_unreachable("Not a call instruction");
@@ -138,11 +226,17 @@ unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
bool WebAssembly::isMarker(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::BLOCK:
+ case WebAssembly::BLOCK_S:
case WebAssembly::END_BLOCK:
+ case WebAssembly::END_BLOCK_S:
case WebAssembly::LOOP:
+ case WebAssembly::LOOP_S:
case WebAssembly::END_LOOP:
+ case WebAssembly::END_LOOP_S:
case WebAssembly::TRY:
+ case WebAssembly::TRY_S:
case WebAssembly::END_TRY:
+ case WebAssembly::END_TRY_S:
return true;
default:
return false;
@@ -152,7 +246,9 @@ bool WebAssembly::isMarker(const MachineInstr &MI) {
bool WebAssembly::isThrow(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::THROW_I32:
+ case WebAssembly::THROW_I32_S:
case WebAssembly::THROW_I64:
+ case WebAssembly::THROW_I64_S:
return true;
default:
return false;
@@ -162,7 +258,9 @@ bool WebAssembly::isThrow(const MachineInstr &MI) {
bool WebAssembly::isRethrow(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::RETHROW:
+ case WebAssembly::RETHROW_S:
case WebAssembly::RETHROW_TO_CALLER:
+ case WebAssembly::RETHROW_TO_CALLER_S:
return true;
default:
return false;
@@ -172,8 +270,11 @@ bool WebAssembly::isRethrow(const MachineInstr &MI) {
bool WebAssembly::isCatch(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::CATCH_I32:
+ case WebAssembly::CATCH_I32_S:
case WebAssembly::CATCH_I64:
+ case WebAssembly::CATCH_I64_S:
case WebAssembly::CATCH_ALL:
+ case WebAssembly::CATCH_ALL_S:
return true;
default:
return false;
@@ -183,8 +284,11 @@ bool WebAssembly::isCatch(const MachineInstr &MI) {
bool WebAssembly::mayThrow(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::THROW_I32:
+ case WebAssembly::THROW_I32_S:
case WebAssembly::THROW_I64:
+ case WebAssembly::THROW_I64_S:
case WebAssembly::RETHROW:
+ case WebAssembly::RETHROW_S:
return true;
}
if (isCallIndirect(MI))
@@ -212,7 +316,9 @@ bool WebAssembly::isCatchTerminatePad(const MachineBasicBlock &MBB) {
bool SeenCatch = false;
for (auto &MI : MBB) {
if (MI.getOpcode() == WebAssembly::CATCH_I32 ||
- MI.getOpcode() == WebAssembly::CATCH_I64)
+ MI.getOpcode() == WebAssembly::CATCH_I64 ||
+ MI.getOpcode() == WebAssembly::CATCH_I32_S ||
+ MI.getOpcode() == WebAssembly::CATCH_I64_S)
SeenCatch = true;
if (SeenCatch && MI.isCall()) {
const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
@@ -229,7 +335,8 @@ bool WebAssembly::isCatchAllTerminatePad(const MachineBasicBlock &MBB) {
return false;
bool SeenCatchAll = false;
for (auto &MI : MBB) {
- if (MI.getOpcode() == WebAssembly::CATCH_ALL)
+ if (MI.getOpcode() == WebAssembly::CATCH_ALL ||
+ MI.getOpcode() == WebAssembly::CATCH_ALL_S)
SeenCatchAll = true;
if (SeenCatchAll && MI.isCall()) {
const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8b7b250e1a09..899b50d0f78f 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -804,8 +804,8 @@ private:
return Parser.Error(L, Msg, Range);
}
- std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
- Error(Loc, Msg);
+ std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
+ Error(Loc, Msg, R);
return nullptr;
}
@@ -835,7 +835,10 @@ private:
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End);
- std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc MemStart);
+ std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
+ const MCExpr *&Disp,
+ const SMLoc &StartLoc,
+ SMLoc &EndLoc);
bool ParseIntelMemoryOperandSize(unsigned &Size);
std::unique_ptr<X86Operand>
@@ -852,6 +855,7 @@ private:
bool parseDirectiveFPOSetFrame(SMLoc L);
bool parseDirectiveFPOPushReg(SMLoc L);
bool parseDirectiveFPOStackAlloc(SMLoc L);
+ bool parseDirectiveFPOStackAlign(SMLoc L);
bool parseDirectiveFPOEndPrologue(SMLoc L);
bool parseDirectiveFPOEndProc(SMLoc L);
bool parseDirectiveFPOData(SMLoc L);
@@ -1010,8 +1014,7 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
// and then only in non-64-bit modes.
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
(Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
- BaseReg != X86::SI && BaseReg != X86::DI)) &&
- BaseReg != X86::DX) {
+ BaseReg != X86::SI && BaseReg != X86::DI))) {
ErrMsg = "invalid 16-bit base register";
return true;
}
@@ -1102,10 +1105,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (RegNo == X86::RIZ || RegNo == X86::RIP ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
- X86II::isX86_64ExtendedReg(RegNo))
- return Error(StartLoc, "register %"
- + Tok.getString() + " is only available in 64-bit mode",
+ X86II::isX86_64ExtendedReg(RegNo)) {
+ StringRef RegName = Tok.getString();
+ Parser.Lex(); // Eat register name.
+ return Error(StartLoc,
+ "register %" + RegName + " is only available in 64-bit mode",
SMRange(StartLoc, EndLoc));
+ }
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@@ -1935,49 +1941,61 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
- default:
- // Parse a memory operand with no segment register.
- return ParseMemOperand(0, Parser.getTok().getLoc());
- case AsmToken::Percent: {
- // Read the register.
- unsigned RegNo;
- SMLoc Start, End;
- if (ParseRegister(RegNo, Start, End)) return nullptr;
- if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
- Error(Start, "%eiz and %riz can only be used as index registers",
- SMRange(Start, End));
- return nullptr;
- }
- if (RegNo == X86::RIP) {
- Error(Start, "%rip can only be used as a base register",
- SMRange(Start, End));
- return nullptr;
- }
-
- // If this is a segment register followed by a ':', then this is the start
- // of a memory reference, otherwise this is a normal register reference.
- if (getLexer().isNot(AsmToken::Colon))
- return X86Operand::CreateReg(RegNo, Start, End);
-
- if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
- return ErrorOperand(Start, "invalid segment register");
-
- getParser().Lex(); // Eat the colon.
- return ParseMemOperand(RegNo, Start);
- }
case AsmToken::Dollar: {
- // $42 -> immediate.
+ // $42 or $ID -> immediate.
SMLoc Start = Parser.getTok().getLoc(), End;
Parser.Lex();
const MCExpr *Val;
- if (getParser().parseExpression(Val, End))
+ // This is an immediate, so we should not parse a register. Do a precheck
+ // for '%' to supercede intra-register parse errors.
+ SMLoc L = Parser.getTok().getLoc();
+ if (check(getLexer().is(AsmToken::Percent), L,
+ "expected immediate expression") ||
+ getParser().parseExpression(Val, End) ||
+ check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
return nullptr;
return X86Operand::CreateImm(Val, Start, End);
}
- case AsmToken::LCurly:{
+ case AsmToken::LCurly: {
SMLoc Start = Parser.getTok().getLoc();
return ParseRoundingModeOp(Start);
}
+ default: {
+ // This a memory operand or a register. We have some parsing complications
+ // as a '(' may be part of an immediate expression or the addressing mode
+ // block. This is complicated by the fact that an assembler-level variable
+ // may refer either to a register or an immediate expression.
+
+ SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
+ const MCExpr *Expr = nullptr;
+ unsigned Reg = 0;
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // No '(' so this is either a displacement expression or a register.
+ if (Parser.parseExpression(Expr, EndLoc))
+ return nullptr;
+ if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
+ // Segment Register. Reset Expr and copy value to register.
+ Expr = nullptr;
+ Reg = RE->getRegNo();
+
+ // Sanity check register.
+ if (Reg == X86::EIZ || Reg == X86::RIZ)
+ return ErrorOperand(
+ Loc, "%eiz and %riz can only be used as index registers",
+ SMRange(Loc, EndLoc));
+ if (Reg == X86::RIP)
+ return ErrorOperand(Loc, "%rip can only be used as a base register",
+ SMRange(Loc, EndLoc));
+ // Return register that are not segment prefixes immediately.
+ if (!Parser.parseOptionalToken(AsmToken::Colon))
+ return X86Operand::CreateReg(Reg, Loc, EndLoc);
+ if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
+ return ErrorOperand(Loc, "invalid segment register");
+ }
+ }
+ // This is a Memory operand.
+ return ParseMemOperand(Reg, Expr, Loc, EndLoc);
+ }
}
}
@@ -2086,199 +2104,201 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
return false;
}
-/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
-/// has already been parsed if present.
+/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
+/// has already been parsed if present. disp may be provided as well.
std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
- SMLoc MemStart) {
-
+ const MCExpr *&Disp,
+ const SMLoc &StartLoc,
+ SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
- // We have to disambiguate a parenthesized expression "(4+5)" from the start
- // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
- // only way to do this without lookahead is to eat the '(' and see what is
- // after it.
- const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
- if (getLexer().isNot(AsmToken::LParen)) {
- SMLoc ExprEnd;
- if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
- // Disp may be a variable, handle register values.
- if (auto *RE = dyn_cast<X86MCExpr>(Disp))
- return X86Operand::CreateReg(RE->getRegNo(), MemStart, ExprEnd);
-
- // After parsing the base expression we could either have a parenthesized
- // memory address or not. If not, return now. If so, eat the (.
- if (getLexer().isNot(AsmToken::LParen)) {
- // Unless we have a segment register, treat this as an immediate.
- if (SegReg == 0)
- return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
- return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
- MemStart, ExprEnd);
+ SMLoc Loc;
+ // Based on the initial passed values, we may be in any of these cases, we are
+ // in one of these cases (with current position (*)):
+
+ // 1. seg : * disp (base-index-scale-expr)
+ // 2. seg : *(disp) (base-index-scale-expr)
+ // 3. seg : *(base-index-scale-expr)
+ // 4. disp *(base-index-scale-expr)
+ // 5. *(disp) (base-index-scale-expr)
+ // 6. *(base-index-scale-expr)
+ // 7. disp *
+ // 8. *(disp)
+
+ // If we do not have an displacement yet, check if we're in cases 4 or 6 by
+ // checking if the first object after the parenthesis is a register (or an
+ // identifier referring to a register) and parse the displacement or default
+ // to 0 as appropriate.
+ auto isAtMemOperand = [this]() {
+ if (this->getLexer().isNot(AsmToken::LParen))
+ return false;
+ AsmToken Buf[2];
+ StringRef Id;
+ auto TokCount = this->getLexer().peekTokens(Buf, true);
+ if (TokCount == 0)
+ return false;
+ switch (Buf[0].getKind()) {
+ case AsmToken::Percent:
+ case AsmToken::Comma:
+ return true;
+ // These lower cases are doing a peekIdentifier.
+ case AsmToken::At:
+ case AsmToken::Dollar:
+ if ((TokCount > 1) &&
+ (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
+ (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
+ Id = StringRef(Buf[0].getLoc().getPointer(),
+ Buf[1].getIdentifier().size() + 1);
+ break;
+ case AsmToken::Identifier:
+ case AsmToken::String:
+ Id = Buf[0].getIdentifier();
+ break;
+ default:
+ return false;
}
+ // We have an ID. Check if it is bound to a register.
+ if (!Id.empty()) {
+ MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
+ if (Sym->isVariable()) {
+ auto V = Sym->getVariableValue(/*SetUsed*/ false);
+ return isa<X86MCExpr>(V);
+ }
+ }
+ return false;
+ };
- // Eat the '('.
- Parser.Lex();
- } else {
- // Okay, we have a '('. We don't know if this is an expression or not, but
- // so we have to eat the ( to see beyond it.
- SMLoc LParenLoc = Parser.getTok().getLoc();
- Parser.Lex(); // Eat the '('.
-
- if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
- // Nothing to do here, fall into the code below with the '(' part of the
- // memory operand consumed.
- } else {
- SMLoc ExprEnd;
- getLexer().UnLex(AsmToken(AsmToken::LParen, "("));
-
- // It must be either an parenthesized expression, or an expression that
- // begins from a parenthesized expression, parse it now. Example: (1+2) or
- // (1+2)+3
- if (getParser().parseExpression(Disp, ExprEnd))
+ if (!Disp) {
+ // Parse immediate if we're not at a mem operand yet.
+ if (!isAtMemOperand()) {
+ if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
return nullptr;
+ assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
+ } else {
+ // Disp is implicitly zero if we haven't parsed it yet.
+ Disp = MCConstantExpr::create(0, Parser.getContext());
+ }
+ }
- // After parsing the base expression we could either have a parenthesized
- // memory address or not. If not, return now. If so, eat the (.
- if (getLexer().isNot(AsmToken::LParen)) {
- // Unless we have a segment register, treat this as an immediate.
- if (SegReg == 0)
- return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
- ExprEnd);
- return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
- MemStart, ExprEnd);
- }
+ // We are now either at the end of the operand or at the '(' at the start of a
+ // base-index-scale-expr.
- // Eat the '('.
- Parser.Lex();
- }
+ if (!parseOptionalToken(AsmToken::LParen)) {
+ if (SegReg == 0)
+ return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
+ StartLoc, EndLoc);
}
- // If we reached here, then we just ate the ( of the memory operand. Process
+ // If we reached here, then eat the '(' and Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
- SMLoc IndexLoc, BaseLoc;
-
- if (getLexer().is(AsmToken::Percent)) {
- SMLoc StartLoc, EndLoc;
- BaseLoc = Parser.getTok().getLoc();
- if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
- if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
- Error(StartLoc, "eiz and riz can only be used as index registers",
- SMRange(StartLoc, EndLoc));
+ SMLoc BaseLoc = getLexer().getLoc();
+ const MCExpr *E;
+ StringRef ErrMsg;
+
+ // Parse BaseReg if one is provided.
+ if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
+ if (Parser.parseExpression(E, EndLoc) ||
+ check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
return nullptr;
- }
- }
- if (getLexer().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat the comma.
- IndexLoc = Parser.getTok().getLoc();
+ // Sanity check register.
+ BaseReg = cast<X86MCExpr>(E)->getRegNo();
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
+ return ErrorOperand(BaseLoc,
+ "eiz and riz can only be used as index registers",
+ SMRange(BaseLoc, EndLoc));
+ }
+ if (parseOptionalToken(AsmToken::Comma)) {
// Following the comma we should have either an index register, or a scale
// value. We don't support the later form, but we want to parse it
// correctly.
//
- // Not that even though it would be completely consistent to support syntax
- // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
- if (getLexer().is(AsmToken::Percent)) {
- SMLoc L;
- if (ParseRegister(IndexReg, L, L))
+ // Even though it would be completely consistent to support syntax like
+ // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
+ if (getLexer().isNot(AsmToken::RParen)) {
+ if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
return nullptr;
- if (BaseReg == X86::RIP) {
- Error(IndexLoc, "%rip as base register can not have an index register");
- return nullptr;
- }
- if (IndexReg == X86::RIP) {
- Error(IndexLoc, "%rip is not allowed as an index register");
- return nullptr;
- }
-
- if (getLexer().isNot(AsmToken::RParen)) {
- // Parse the scale amount:
- // ::= ',' [scale-expression]
- if (parseToken(AsmToken::Comma, "expected comma in scale expression"))
- return nullptr;
- if (getLexer().isNot(AsmToken::RParen)) {
- SMLoc Loc = Parser.getTok().getLoc();
-
- int64_t ScaleVal;
- if (getParser().parseAbsoluteExpression(ScaleVal)){
- Error(Loc, "expected scale expression");
- return nullptr;
- }
-
- // Validate the scale amount.
- if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
- ScaleVal != 1) {
- Error(Loc, "scale factor in 16-bit address must be 1");
- return nullptr;
- }
- if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
- ScaleVal != 8) {
- Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
- return nullptr;
+ if (!isa<X86MCExpr>(E)) {
+ // We've parsed an unexpected Scale Value instead of an index
+ // register. Interpret it as an absolute.
+ int64_t ScaleVal;
+ if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
+ return ErrorOperand(Loc, "expected absolute expression");
+ if (ScaleVal != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
+ } else { // IndexReg Found.
+ IndexReg = cast<X86MCExpr>(E)->getRegNo();
+
+ if (BaseReg == X86::RIP)
+ return ErrorOperand(
+ Loc, "%rip as base register can not have an index register");
+ if (IndexReg == X86::RIP)
+ return ErrorOperand(Loc, "%rip is not allowed as an index register");
+
+ if (parseOptionalToken(AsmToken::Comma)) {
+ // Parse the scale amount:
+ // ::= ',' [scale-expression]
+
+ // A scale amount without an index is ignored.
+ if (getLexer().isNot(AsmToken::RParen)) {
+ int64_t ScaleVal;
+ if (Parser.parseTokenLoc(Loc) ||
+ Parser.parseAbsoluteExpression(ScaleVal))
+ return ErrorOperand(Loc, "expected scale expression");
+ Scale = (unsigned)ScaleVal;
+ // Validate the scale amount.
+ if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
+ Scale != 1)
+ return ErrorOperand(Loc,
+ "scale factor in 16-bit address must be 1");
+ if (checkScale(Scale, ErrMsg))
+ return ErrorOperand(Loc, ErrMsg);
}
- Scale = (unsigned)ScaleVal;
}
}
- } else if (getLexer().isNot(AsmToken::RParen)) {
- // A scale amount without an index is ignored.
- // index.
- SMLoc Loc = Parser.getTok().getLoc();
-
- int64_t Value;
- if (getParser().parseAbsoluteExpression(Value))
- return nullptr;
-
- if (Value != 1)
- Warning(Loc, "scale factor without index register is ignored");
- Scale = 1;
}
}
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
- SMLoc MemEnd = Parser.getTok().getEndLoc();
if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
return nullptr;
- // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
- // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
- // documented form in various unofficial manuals, so a lot of code uses it.
- if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 &&
- SegReg == 0 && isa<MCConstantExpr>(Disp) &&
- cast<MCConstantExpr>(Disp)->getValue() == 0)
+ // This is to support otherwise illegal operand (%dx) found in various
+ // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
+ // be supported. Mark such DX variants separately fix only in special cases.
+ if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
+ isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
- StringRef ErrMsg;
if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
- ErrMsg)) {
- Error(BaseLoc, ErrMsg);
- return nullptr;
- }
+ ErrMsg))
+ return ErrorOperand(BaseLoc, ErrMsg);
if (SegReg || BaseReg || IndexReg)
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
- IndexReg, Scale, MemStart, MemEnd);
- return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
+ IndexReg, Scale, StartLoc, EndLoc);
+ return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
}
// Parse either a standard primary expression or a register.
bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
- if (Parser.parsePrimaryExpr(Res, EndLoc)) {
+ // See if this is a register first.
+ if (getTok().is(AsmToken::Percent) ||
+ (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
+ MatchRegisterName(Parser.getTok().getString()))) {
SMLoc StartLoc = Parser.getTok().getLoc();
- // Normal Expression parse fails, check if it could be a register.
unsigned RegNo;
- bool TryRegParse =
- getTok().is(AsmToken::Percent) ||
- (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier));
- if (!TryRegParse || ParseRegister(RegNo, StartLoc, EndLoc))
+ if (ParseRegister(RegNo, StartLoc, EndLoc))
return true;
- // Clear previous parse error and return correct expression.
- Parser.clearPendingErrors();
Res = X86MCExpr::create(RegNo, Parser.getContext());
return false;
}
-
- return false;
+ return Parser.parsePrimaryExpr(Res, EndLoc);
}
bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -3282,7 +3302,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
- getParser().setParsingInlineAsm(false);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "prefix")
Parser.Lex();
@@ -3295,7 +3314,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
- getParser().setParsingInlineAsm(true);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "noprefix")
Parser.Lex();
@@ -3315,6 +3333,8 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveFPOPushReg(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_stackalloc")
return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
+ else if (IDVal == ".cv_fpo_stackalign")
+ return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_endprologue")
return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_endproc")
@@ -3429,6 +3449,16 @@ bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
return getTargetStreamer().emitFPOStackAlloc(Offset, L);
}
+// .cv_fpo_stackalign 8
+bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ int64_t Offset;
+ if (Parser.parseIntToken(Offset, "expected offset") ||
+ Parser.parseEOL("unexpected tokens"))
+ return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
+ return getTargetStreamer().emitFPOStackAlign(Offset, L);
+}
+
// .cv_fpo_endprologue
bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
MCAsmParser &Parser = getParser();
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 1ac304f3be03..54d550b60652 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -983,45 +983,18 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
insn->opcode == 0xE3)
attrMask ^= ATTR_ADSIZE;
- /*
- * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
- * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
- */
-
- if ((insn->mode == MODE_64BIT) && insn->hasOpSize) {
- switch (insn->opcode) {
- case 0xE8:
- case 0xE9:
- // Take care of psubsb and other mmx instructions.
- if (insn->opcodeType == ONEBYTE) {
- attrMask ^= ATTR_OPSIZE;
- insn->immediateSize = 4;
- insn->displacementSize = 4;
- }
- break;
- case 0x82:
- case 0x83:
- case 0x84:
- case 0x85:
- case 0x86:
- case 0x87:
- case 0x88:
- case 0x89:
- case 0x8A:
- case 0x8B:
- case 0x8C:
- case 0x8D:
- case 0x8E:
- case 0x8F:
- // Take care of lea and three byte ops.
- if (insn->opcodeType == TWOBYTE) {
- attrMask ^= ATTR_OPSIZE;
- insn->immediateSize = 4;
- insn->displacementSize = 4;
- }
- break;
- }
- }
+ // If we're in 16-bit mode and this is one of the relative jumps and opsize
+ // prefix isn't present, we need to force the opsize attribute since the
+ // prefix is inverted relative to 32-bit mode.
+ if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
+ insn->opcodeType == ONEBYTE &&
+ (insn->opcode == 0xE8 || insn->opcode == 0xE9))
+ attrMask |= ATTR_OPSIZE;
+
+ if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
+ insn->opcodeType == TWOBYTE &&
+ insn->opcode >= 0x80 && insn->opcode <= 0x8F)
+ attrMask |= ATTR_OPSIZE;
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
@@ -1420,7 +1393,7 @@ static int readModRM(struct InternalInstruction* insn) {
break;
case 0x1:
insn->displacementSize = 1;
- /* FALLTHROUGH */
+ LLVM_FALLTHROUGH;
case 0x2:
insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
switch (rm & 7) {
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 0e4c4398e49d..64e6fb9f0375 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -540,7 +540,6 @@ protected:
unsigned InstrOffset = 0;
unsigned StackAdjust = 0;
unsigned StackSize = 0;
- unsigned PrevStackSize = 0;
unsigned NumDefCFAOffsets = 0;
for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
@@ -588,7 +587,6 @@ protected:
// L0:
// .cfi_def_cfa_offset 80
//
- PrevStackSize = StackSize;
StackSize = std::abs(Inst.getOffset()) / StackDivide;
++NumDefCFAOffsets;
break;
@@ -635,16 +633,6 @@ protected:
CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
} else {
- // If the amount of the stack allocation is the size of a register, then
- // we "push" the RAX/EAX register onto the stack instead of adjusting the
- // stack pointer with a SUB instruction. We don't support the push of the
- // RAX/EAX register with compact unwind. So we check for that situation
- // here.
- if ((NumDefCFAOffsets == SavedRegIdx + 1 &&
- StackSize - PrevStackSize == 1) ||
- (Instrs.size() == 1 && NumDefCFAOffsets == 1 && StackSize == 2))
- return CU::UNWIND_MODE_DWARF;
-
SubtractInstrIdx += InstrOffset;
++StackAdjust;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 497e29fe628e..c85ce9bbd5a4 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -231,6 +231,11 @@ namespace X86II {
/// to be an absolute symbol in range [0,128), so we can use the @ABS8
/// symbol modifier.
MO_ABS8,
+
+ /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the ".refptr.FOO" symbol. This is used for
+ /// stub symbols on windows.
+ MO_COFFSTUB,
};
enum : uint64_t {
@@ -261,12 +266,12 @@ namespace X86II {
RawFrmSrc = 4,
/// RawFrmDst - This form is for instructions that use the destination index
- /// register DI/EDI/ESI.
+ /// register DI/EDI/RDI.
RawFrmDst = 5,
- /// RawFrmSrc - This form is for instructions that use the source index
- /// register SI/ESI/ERI with a possible segment override, and also the
- /// destination index register DI/ESI/RDI.
+ /// RawFrmDstSrc - This form is for instructions that use the source index
+ /// register SI/ESI/RSI with a possible segment override, and also the
+ /// destination index register DI/EDI/RDI.
RawFrmDstSrc = 6,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index f1d15e66918b..ea4aaf14223d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
#include "X86GenRegisterInfo.inc"
#define GET_INSTRINFO_MC_DESC
-#define GET_GENINSTRINFO_MC_HELPERS
+#define GET_INSTRINFO_MC_HELPERS
#include "X86GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
@@ -81,120 +81,193 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
codeview::RegisterId CVReg;
MCPhysReg Reg;
} RegMap[] = {
- { codeview::RegisterId::CVRegAL, X86::AL},
- { codeview::RegisterId::CVRegCL, X86::CL},
- { codeview::RegisterId::CVRegDL, X86::DL},
- { codeview::RegisterId::CVRegBL, X86::BL},
- { codeview::RegisterId::CVRegAH, X86::AH},
- { codeview::RegisterId::CVRegCH, X86::CH},
- { codeview::RegisterId::CVRegDH, X86::DH},
- { codeview::RegisterId::CVRegBH, X86::BH},
- { codeview::RegisterId::CVRegAX, X86::AX},
- { codeview::RegisterId::CVRegCX, X86::CX},
- { codeview::RegisterId::CVRegDX, X86::DX},
- { codeview::RegisterId::CVRegBX, X86::BX},
- { codeview::RegisterId::CVRegSP, X86::SP},
- { codeview::RegisterId::CVRegBP, X86::BP},
- { codeview::RegisterId::CVRegSI, X86::SI},
- { codeview::RegisterId::CVRegDI, X86::DI},
- { codeview::RegisterId::CVRegEAX, X86::EAX},
- { codeview::RegisterId::CVRegECX, X86::ECX},
- { codeview::RegisterId::CVRegEDX, X86::EDX},
- { codeview::RegisterId::CVRegEBX, X86::EBX},
- { codeview::RegisterId::CVRegESP, X86::ESP},
- { codeview::RegisterId::CVRegEBP, X86::EBP},
- { codeview::RegisterId::CVRegESI, X86::ESI},
- { codeview::RegisterId::CVRegEDI, X86::EDI},
-
- { codeview::RegisterId::CVRegEFLAGS, X86::EFLAGS},
-
- { codeview::RegisterId::CVRegST0, X86::FP0},
- { codeview::RegisterId::CVRegST1, X86::FP1},
- { codeview::RegisterId::CVRegST2, X86::FP2},
- { codeview::RegisterId::CVRegST3, X86::FP3},
- { codeview::RegisterId::CVRegST4, X86::FP4},
- { codeview::RegisterId::CVRegST5, X86::FP5},
- { codeview::RegisterId::CVRegST6, X86::FP6},
- { codeview::RegisterId::CVRegST7, X86::FP7},
-
- { codeview::RegisterId::CVRegXMM0, X86::XMM0},
- { codeview::RegisterId::CVRegXMM1, X86::XMM1},
- { codeview::RegisterId::CVRegXMM2, X86::XMM2},
- { codeview::RegisterId::CVRegXMM3, X86::XMM3},
- { codeview::RegisterId::CVRegXMM4, X86::XMM4},
- { codeview::RegisterId::CVRegXMM5, X86::XMM5},
- { codeview::RegisterId::CVRegXMM6, X86::XMM6},
- { codeview::RegisterId::CVRegXMM7, X86::XMM7},
-
- { codeview::RegisterId::CVRegXMM8, X86::XMM8},
- { codeview::RegisterId::CVRegXMM9, X86::XMM9},
- { codeview::RegisterId::CVRegXMM10, X86::XMM10},
- { codeview::RegisterId::CVRegXMM11, X86::XMM11},
- { codeview::RegisterId::CVRegXMM12, X86::XMM12},
- { codeview::RegisterId::CVRegXMM13, X86::XMM13},
- { codeview::RegisterId::CVRegXMM14, X86::XMM14},
- { codeview::RegisterId::CVRegXMM15, X86::XMM15},
-
- { codeview::RegisterId::CVRegSIL, X86::SIL},
- { codeview::RegisterId::CVRegDIL, X86::DIL},
- { codeview::RegisterId::CVRegBPL, X86::BPL},
- { codeview::RegisterId::CVRegSPL, X86::SPL},
- { codeview::RegisterId::CVRegRAX, X86::RAX},
- { codeview::RegisterId::CVRegRBX, X86::RBX},
- { codeview::RegisterId::CVRegRCX, X86::RCX},
- { codeview::RegisterId::CVRegRDX, X86::RDX},
- { codeview::RegisterId::CVRegRSI, X86::RSI},
- { codeview::RegisterId::CVRegRDI, X86::RDI},
- { codeview::RegisterId::CVRegRBP, X86::RBP},
- { codeview::RegisterId::CVRegRSP, X86::RSP},
- { codeview::RegisterId::CVRegR8, X86::R8},
- { codeview::RegisterId::CVRegR9, X86::R9},
- { codeview::RegisterId::CVRegR10, X86::R10},
- { codeview::RegisterId::CVRegR11, X86::R11},
- { codeview::RegisterId::CVRegR12, X86::R12},
- { codeview::RegisterId::CVRegR13, X86::R13},
- { codeview::RegisterId::CVRegR14, X86::R14},
- { codeview::RegisterId::CVRegR15, X86::R15},
- { codeview::RegisterId::CVRegR8B, X86::R8B},
- { codeview::RegisterId::CVRegR9B, X86::R9B},
- { codeview::RegisterId::CVRegR10B, X86::R10B},
- { codeview::RegisterId::CVRegR11B, X86::R11B},
- { codeview::RegisterId::CVRegR12B, X86::R12B},
- { codeview::RegisterId::CVRegR13B, X86::R13B},
- { codeview::RegisterId::CVRegR14B, X86::R14B},
- { codeview::RegisterId::CVRegR15B, X86::R15B},
- { codeview::RegisterId::CVRegR8W, X86::R8W},
- { codeview::RegisterId::CVRegR9W, X86::R9W},
- { codeview::RegisterId::CVRegR10W, X86::R10W},
- { codeview::RegisterId::CVRegR11W, X86::R11W},
- { codeview::RegisterId::CVRegR12W, X86::R12W},
- { codeview::RegisterId::CVRegR13W, X86::R13W},
- { codeview::RegisterId::CVRegR14W, X86::R14W},
- { codeview::RegisterId::CVRegR15W, X86::R15W},
- { codeview::RegisterId::CVRegR8D, X86::R8D},
- { codeview::RegisterId::CVRegR9D, X86::R9D},
- { codeview::RegisterId::CVRegR10D, X86::R10D},
- { codeview::RegisterId::CVRegR11D, X86::R11D},
- { codeview::RegisterId::CVRegR12D, X86::R12D},
- { codeview::RegisterId::CVRegR13D, X86::R13D},
- { codeview::RegisterId::CVRegR14D, X86::R14D},
- { codeview::RegisterId::CVRegR15D, X86::R15D},
- { codeview::RegisterId::CVRegAMD64_YMM0, X86::YMM0},
- { codeview::RegisterId::CVRegAMD64_YMM1, X86::YMM1},
- { codeview::RegisterId::CVRegAMD64_YMM2, X86::YMM2},
- { codeview::RegisterId::CVRegAMD64_YMM3, X86::YMM3},
- { codeview::RegisterId::CVRegAMD64_YMM4, X86::YMM4},
- { codeview::RegisterId::CVRegAMD64_YMM5, X86::YMM5},
- { codeview::RegisterId::CVRegAMD64_YMM6, X86::YMM6},
- { codeview::RegisterId::CVRegAMD64_YMM7, X86::YMM7},
- { codeview::RegisterId::CVRegAMD64_YMM8, X86::YMM8},
- { codeview::RegisterId::CVRegAMD64_YMM9, X86::YMM9},
- { codeview::RegisterId::CVRegAMD64_YMM10, X86::YMM10},
- { codeview::RegisterId::CVRegAMD64_YMM11, X86::YMM11},
- { codeview::RegisterId::CVRegAMD64_YMM12, X86::YMM12},
- { codeview::RegisterId::CVRegAMD64_YMM13, X86::YMM13},
- { codeview::RegisterId::CVRegAMD64_YMM14, X86::YMM14},
- { codeview::RegisterId::CVRegAMD64_YMM15, X86::YMM15},
+ {codeview::RegisterId::AL, X86::AL},
+ {codeview::RegisterId::CL, X86::CL},
+ {codeview::RegisterId::DL, X86::DL},
+ {codeview::RegisterId::BL, X86::BL},
+ {codeview::RegisterId::AH, X86::AH},
+ {codeview::RegisterId::CH, X86::CH},
+ {codeview::RegisterId::DH, X86::DH},
+ {codeview::RegisterId::BH, X86::BH},
+ {codeview::RegisterId::AX, X86::AX},
+ {codeview::RegisterId::CX, X86::CX},
+ {codeview::RegisterId::DX, X86::DX},
+ {codeview::RegisterId::BX, X86::BX},
+ {codeview::RegisterId::SP, X86::SP},
+ {codeview::RegisterId::BP, X86::BP},
+ {codeview::RegisterId::SI, X86::SI},
+ {codeview::RegisterId::DI, X86::DI},
+ {codeview::RegisterId::EAX, X86::EAX},
+ {codeview::RegisterId::ECX, X86::ECX},
+ {codeview::RegisterId::EDX, X86::EDX},
+ {codeview::RegisterId::EBX, X86::EBX},
+ {codeview::RegisterId::ESP, X86::ESP},
+ {codeview::RegisterId::EBP, X86::EBP},
+ {codeview::RegisterId::ESI, X86::ESI},
+ {codeview::RegisterId::EDI, X86::EDI},
+
+ {codeview::RegisterId::EFLAGS, X86::EFLAGS},
+
+ {codeview::RegisterId::ST0, X86::FP0},
+ {codeview::RegisterId::ST1, X86::FP1},
+ {codeview::RegisterId::ST2, X86::FP2},
+ {codeview::RegisterId::ST3, X86::FP3},
+ {codeview::RegisterId::ST4, X86::FP4},
+ {codeview::RegisterId::ST5, X86::FP5},
+ {codeview::RegisterId::ST6, X86::FP6},
+ {codeview::RegisterId::ST7, X86::FP7},
+
+ {codeview::RegisterId::XMM0, X86::XMM0},
+ {codeview::RegisterId::XMM1, X86::XMM1},
+ {codeview::RegisterId::XMM2, X86::XMM2},
+ {codeview::RegisterId::XMM3, X86::XMM3},
+ {codeview::RegisterId::XMM4, X86::XMM4},
+ {codeview::RegisterId::XMM5, X86::XMM5},
+ {codeview::RegisterId::XMM6, X86::XMM6},
+ {codeview::RegisterId::XMM7, X86::XMM7},
+
+ {codeview::RegisterId::XMM8, X86::XMM8},
+ {codeview::RegisterId::XMM9, X86::XMM9},
+ {codeview::RegisterId::XMM10, X86::XMM10},
+ {codeview::RegisterId::XMM11, X86::XMM11},
+ {codeview::RegisterId::XMM12, X86::XMM12},
+ {codeview::RegisterId::XMM13, X86::XMM13},
+ {codeview::RegisterId::XMM14, X86::XMM14},
+ {codeview::RegisterId::XMM15, X86::XMM15},
+
+ {codeview::RegisterId::SIL, X86::SIL},
+ {codeview::RegisterId::DIL, X86::DIL},
+ {codeview::RegisterId::BPL, X86::BPL},
+ {codeview::RegisterId::SPL, X86::SPL},
+ {codeview::RegisterId::RAX, X86::RAX},
+ {codeview::RegisterId::RBX, X86::RBX},
+ {codeview::RegisterId::RCX, X86::RCX},
+ {codeview::RegisterId::RDX, X86::RDX},
+ {codeview::RegisterId::RSI, X86::RSI},
+ {codeview::RegisterId::RDI, X86::RDI},
+ {codeview::RegisterId::RBP, X86::RBP},
+ {codeview::RegisterId::RSP, X86::RSP},
+ {codeview::RegisterId::R8, X86::R8},
+ {codeview::RegisterId::R9, X86::R9},
+ {codeview::RegisterId::R10, X86::R10},
+ {codeview::RegisterId::R11, X86::R11},
+ {codeview::RegisterId::R12, X86::R12},
+ {codeview::RegisterId::R13, X86::R13},
+ {codeview::RegisterId::R14, X86::R14},
+ {codeview::RegisterId::R15, X86::R15},
+ {codeview::RegisterId::R8B, X86::R8B},
+ {codeview::RegisterId::R9B, X86::R9B},
+ {codeview::RegisterId::R10B, X86::R10B},
+ {codeview::RegisterId::R11B, X86::R11B},
+ {codeview::RegisterId::R12B, X86::R12B},
+ {codeview::RegisterId::R13B, X86::R13B},
+ {codeview::RegisterId::R14B, X86::R14B},
+ {codeview::RegisterId::R15B, X86::R15B},
+ {codeview::RegisterId::R8W, X86::R8W},
+ {codeview::RegisterId::R9W, X86::R9W},
+ {codeview::RegisterId::R10W, X86::R10W},
+ {codeview::RegisterId::R11W, X86::R11W},
+ {codeview::RegisterId::R12W, X86::R12W},
+ {codeview::RegisterId::R13W, X86::R13W},
+ {codeview::RegisterId::R14W, X86::R14W},
+ {codeview::RegisterId::R15W, X86::R15W},
+ {codeview::RegisterId::R8D, X86::R8D},
+ {codeview::RegisterId::R9D, X86::R9D},
+ {codeview::RegisterId::R10D, X86::R10D},
+ {codeview::RegisterId::R11D, X86::R11D},
+ {codeview::RegisterId::R12D, X86::R12D},
+ {codeview::RegisterId::R13D, X86::R13D},
+ {codeview::RegisterId::R14D, X86::R14D},
+ {codeview::RegisterId::R15D, X86::R15D},
+ {codeview::RegisterId::AMD64_YMM0, X86::YMM0},
+ {codeview::RegisterId::AMD64_YMM1, X86::YMM1},
+ {codeview::RegisterId::AMD64_YMM2, X86::YMM2},
+ {codeview::RegisterId::AMD64_YMM3, X86::YMM3},
+ {codeview::RegisterId::AMD64_YMM4, X86::YMM4},
+ {codeview::RegisterId::AMD64_YMM5, X86::YMM5},
+ {codeview::RegisterId::AMD64_YMM6, X86::YMM6},
+ {codeview::RegisterId::AMD64_YMM7, X86::YMM7},
+ {codeview::RegisterId::AMD64_YMM8, X86::YMM8},
+ {codeview::RegisterId::AMD64_YMM9, X86::YMM9},
+ {codeview::RegisterId::AMD64_YMM10, X86::YMM10},
+ {codeview::RegisterId::AMD64_YMM11, X86::YMM11},
+ {codeview::RegisterId::AMD64_YMM12, X86::YMM12},
+ {codeview::RegisterId::AMD64_YMM13, X86::YMM13},
+ {codeview::RegisterId::AMD64_YMM14, X86::YMM14},
+ {codeview::RegisterId::AMD64_YMM15, X86::YMM15},
+ {codeview::RegisterId::AMD64_YMM16, X86::YMM16},
+ {codeview::RegisterId::AMD64_YMM17, X86::YMM17},
+ {codeview::RegisterId::AMD64_YMM18, X86::YMM18},
+ {codeview::RegisterId::AMD64_YMM19, X86::YMM19},
+ {codeview::RegisterId::AMD64_YMM20, X86::YMM20},
+ {codeview::RegisterId::AMD64_YMM21, X86::YMM21},
+ {codeview::RegisterId::AMD64_YMM22, X86::YMM22},
+ {codeview::RegisterId::AMD64_YMM23, X86::YMM23},
+ {codeview::RegisterId::AMD64_YMM24, X86::YMM24},
+ {codeview::RegisterId::AMD64_YMM25, X86::YMM25},
+ {codeview::RegisterId::AMD64_YMM26, X86::YMM26},
+ {codeview::RegisterId::AMD64_YMM27, X86::YMM27},
+ {codeview::RegisterId::AMD64_YMM28, X86::YMM28},
+ {codeview::RegisterId::AMD64_YMM29, X86::YMM29},
+ {codeview::RegisterId::AMD64_YMM30, X86::YMM30},
+ {codeview::RegisterId::AMD64_YMM31, X86::YMM31},
+ {codeview::RegisterId::AMD64_ZMM0, X86::ZMM0},
+ {codeview::RegisterId::AMD64_ZMM1, X86::ZMM1},
+ {codeview::RegisterId::AMD64_ZMM2, X86::ZMM2},
+ {codeview::RegisterId::AMD64_ZMM3, X86::ZMM3},
+ {codeview::RegisterId::AMD64_ZMM4, X86::ZMM4},
+ {codeview::RegisterId::AMD64_ZMM5, X86::ZMM5},
+ {codeview::RegisterId::AMD64_ZMM6, X86::ZMM6},
+ {codeview::RegisterId::AMD64_ZMM7, X86::ZMM7},
+ {codeview::RegisterId::AMD64_ZMM8, X86::ZMM8},
+ {codeview::RegisterId::AMD64_ZMM9, X86::ZMM9},
+ {codeview::RegisterId::AMD64_ZMM10, X86::ZMM10},
+ {codeview::RegisterId::AMD64_ZMM11, X86::ZMM11},
+ {codeview::RegisterId::AMD64_ZMM12, X86::ZMM12},
+ {codeview::RegisterId::AMD64_ZMM13, X86::ZMM13},
+ {codeview::RegisterId::AMD64_ZMM14, X86::ZMM14},
+ {codeview::RegisterId::AMD64_ZMM15, X86::ZMM15},
+ {codeview::RegisterId::AMD64_ZMM16, X86::ZMM16},
+ {codeview::RegisterId::AMD64_ZMM17, X86::ZMM17},
+ {codeview::RegisterId::AMD64_ZMM18, X86::ZMM18},
+ {codeview::RegisterId::AMD64_ZMM19, X86::ZMM19},
+ {codeview::RegisterId::AMD64_ZMM20, X86::ZMM20},
+ {codeview::RegisterId::AMD64_ZMM21, X86::ZMM21},
+ {codeview::RegisterId::AMD64_ZMM22, X86::ZMM22},
+ {codeview::RegisterId::AMD64_ZMM23, X86::ZMM23},
+ {codeview::RegisterId::AMD64_ZMM24, X86::ZMM24},
+ {codeview::RegisterId::AMD64_ZMM25, X86::ZMM25},
+ {codeview::RegisterId::AMD64_ZMM26, X86::ZMM26},
+ {codeview::RegisterId::AMD64_ZMM27, X86::ZMM27},
+ {codeview::RegisterId::AMD64_ZMM28, X86::ZMM28},
+ {codeview::RegisterId::AMD64_ZMM29, X86::ZMM29},
+ {codeview::RegisterId::AMD64_ZMM30, X86::ZMM30},
+ {codeview::RegisterId::AMD64_ZMM31, X86::ZMM31},
+ {codeview::RegisterId::AMD64_K0, X86::K0},
+ {codeview::RegisterId::AMD64_K1, X86::K1},
+ {codeview::RegisterId::AMD64_K2, X86::K2},
+ {codeview::RegisterId::AMD64_K3, X86::K3},
+ {codeview::RegisterId::AMD64_K4, X86::K4},
+ {codeview::RegisterId::AMD64_K5, X86::K5},
+ {codeview::RegisterId::AMD64_K6, X86::K6},
+ {codeview::RegisterId::AMD64_K7, X86::K7},
+ {codeview::RegisterId::AMD64_XMM16, X86::XMM16},
+ {codeview::RegisterId::AMD64_XMM17, X86::XMM17},
+ {codeview::RegisterId::AMD64_XMM18, X86::XMM18},
+ {codeview::RegisterId::AMD64_XMM19, X86::XMM19},
+ {codeview::RegisterId::AMD64_XMM20, X86::XMM20},
+ {codeview::RegisterId::AMD64_XMM21, X86::XMM21},
+ {codeview::RegisterId::AMD64_XMM22, X86::XMM22},
+ {codeview::RegisterId::AMD64_XMM23, X86::XMM23},
+ {codeview::RegisterId::AMD64_XMM24, X86::XMM24},
+ {codeview::RegisterId::AMD64_XMM25, X86::XMM25},
+ {codeview::RegisterId::AMD64_XMM26, X86::XMM26},
+ {codeview::RegisterId::AMD64_XMM27, X86::XMM27},
+ {codeview::RegisterId::AMD64_XMM28, X86::XMM28},
+ {codeview::RegisterId::AMD64_XMM29, X86::XMM29},
+ {codeview::RegisterId::AMD64_XMM30, X86::XMM30},
+ {codeview::RegisterId::AMD64_XMM31, X86::XMM31},
+
};
for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
@@ -307,83 +380,19 @@ class X86MCInstrAnalysis : public MCInstrAnalysis {
public:
X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {}
- bool isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const override;
+#define GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS
+#include "X86GenSubtargetInfo.inc"
+
bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
APInt &Mask) const override;
+ std::vector<std::pair<uint64_t, uint64_t>>
+ findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotSectionVA,
+ const Triple &TargetTriple) const override;
};
-bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const {
- if (STI.getCPU() == "btver2") {
- // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and
- // Jaguar pipeline", subsection 8 "Dependency-breaking instructions".
- switch (Inst.getOpcode()) {
- default:
- return false;
- case X86::SUB32rr:
- case X86::SUB64rr:
- case X86::SBB32rr:
- case X86::SBB64rr:
- case X86::XOR32rr:
- case X86::XOR64rr:
- case X86::XORPSrr:
- case X86::XORPDrr:
- case X86::VXORPSrr:
- case X86::VXORPDrr:
- case X86::ANDNPSrr:
- case X86::VANDNPSrr:
- case X86::ANDNPDrr:
- case X86::VANDNPDrr:
- case X86::PXORrr:
- case X86::VPXORrr:
- case X86::PANDNrr:
- case X86::VPANDNrr:
- case X86::PSUBBrr:
- case X86::PSUBWrr:
- case X86::PSUBDrr:
- case X86::PSUBQrr:
- case X86::VPSUBBrr:
- case X86::VPSUBWrr:
- case X86::VPSUBDrr:
- case X86::VPSUBQrr:
- case X86::PCMPEQBrr:
- case X86::PCMPEQWrr:
- case X86::PCMPEQDrr:
- case X86::PCMPEQQrr:
- case X86::VPCMPEQBrr:
- case X86::VPCMPEQWrr:
- case X86::VPCMPEQDrr:
- case X86::VPCMPEQQrr:
- case X86::PCMPGTBrr:
- case X86::PCMPGTWrr:
- case X86::PCMPGTDrr:
- case X86::PCMPGTQrr:
- case X86::VPCMPGTBrr:
- case X86::VPCMPGTWrr:
- case X86::VPCMPGTDrr:
- case X86::VPCMPGTQrr:
- case X86::MMX_PXORirr:
- case X86::MMX_PANDNirr:
- case X86::MMX_PSUBBirr:
- case X86::MMX_PSUBDirr:
- case X86::MMX_PSUBQirr:
- case X86::MMX_PSUBWirr:
- case X86::MMX_PCMPGTBirr:
- case X86::MMX_PCMPGTDirr:
- case X86::MMX_PCMPGTWirr:
- case X86::MMX_PCMPEQBirr:
- case X86::MMX_PCMPEQDirr:
- case X86::MMX_PCMPEQWirr:
- return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg();
- case X86::CMP32rr:
- case X86::CMP64rr:
- return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg();
- }
- }
-
- return false;
-}
+#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
+#include "X86GenSubtargetInfo.inc"
bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
const MCInst &Inst,
@@ -437,6 +446,64 @@ bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
return Mask.getBoolValue();
}
+static std::vector<std::pair<uint64_t, uint64_t>>
+findX86PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotPltSectionVA) {
+ // Do a lightweight parsing of PLT entries.
+ std::vector<std::pair<uint64_t, uint64_t>> Result;
+ for (uint64_t Byte = 0, End = PltContents.size(); Byte + 6 < End; ) {
+ // Recognize a jmp.
+ if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0xa3) {
+ // The jmp instruction at the beginning of each PLT entry jumps to the
+ // address of the base of the .got.plt section plus the immediate.
+ uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+ Result.push_back(
+ std::make_pair(PltSectionVA + Byte, GotPltSectionVA + Imm));
+ Byte += 6;
+ } else if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0x25) {
+ // The jmp instruction at the beginning of each PLT entry jumps to the
+ // immediate.
+ uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+ Result.push_back(std::make_pair(PltSectionVA + Byte, Imm));
+ Byte += 6;
+ } else
+ Byte++;
+ }
+ return Result;
+}
+
+static std::vector<std::pair<uint64_t, uint64_t>>
+findX86_64PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents) {
+ // Do a lightweight parsing of PLT entries.
+ std::vector<std::pair<uint64_t, uint64_t>> Result;
+ for (uint64_t Byte = 0, End = PltContents.size(); Byte + 6 < End; ) {
+ // Recognize a jmp.
+ if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0x25) {
+ // The jmp instruction at the beginning of each PLT entry jumps to the
+ // address of the next instruction plus the immediate.
+ uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+ Result.push_back(
+ std::make_pair(PltSectionVA + Byte, PltSectionVA + Byte + 6 + Imm));
+ Byte += 6;
+ } else
+ Byte++;
+ }
+ return Result;
+}
+
+std::vector<std::pair<uint64_t, uint64_t>> X86MCInstrAnalysis::findPltEntries(
+ uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotPltSectionVA, const Triple &TargetTriple) const {
+ switch (TargetTriple.getArch()) {
+ case Triple::x86:
+ return findX86PltEntries(PltSectionVA, PltContents, GotPltSectionVA);
+ case Triple::x86_64:
+ return findX86_64PltEntries(PltSectionVA, PltContents);
+ default:
+ return {};
+ }
+}
+
} // end of namespace X86_MC
} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 595c26d31e3f..4e9f5ba60d2e 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -134,7 +134,7 @@ unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned,
// Defines symbolic names for the X86 instructions.
//
#define GET_INSTRINFO_ENUM
-#define GET_GENINSTRINFO_MC_DECL
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "X86GenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
index 8d38cd32b82c..10a282dd2962 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
@@ -26,6 +26,7 @@ public:
virtual bool emitFPOData(const MCSymbol *ProcSym, SMLoc L = {}) = 0;
virtual bool emitFPOPushReg(unsigned Reg, SMLoc L = {}) = 0;
virtual bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L = {}) = 0;
+ virtual bool emitFPOStackAlign(unsigned Align, SMLoc L = {}) = 0;
virtual bool emitFPOSetFrame(unsigned Reg, SMLoc L = {}) = 0;
};
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index a5e115e5ff4d..2aec695b2dbf 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -79,7 +79,8 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
case FK_SecRel_4:
return COFF::IMAGE_REL_AMD64_SECREL;
default:
- llvm_unreachable("unsupported relocation type");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
+ return COFF::IMAGE_REL_AMD64_ADDR32;
}
} else if (getMachine() == COFF::IMAGE_FILE_MACHINE_I386) {
switch (FixupKind) {
@@ -100,7 +101,8 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
case FK_SecRel_4:
return COFF::IMAGE_REL_I386_SECREL;
default:
- llvm_unreachable("unsupported relocation type");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
+ return COFF::IMAGE_REL_I386_DIR32;
}
} else
llvm_unreachable("Unsupported COFF machine type.");
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index 093dab4f2f96..bee9b7046338 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -38,6 +38,7 @@ public:
bool emitFPOData(const MCSymbol *ProcSym, SMLoc L) override;
bool emitFPOPushReg(unsigned Reg, SMLoc L) override;
bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) override;
+ bool emitFPOStackAlign(unsigned Align, SMLoc L) override;
bool emitFPOSetFrame(unsigned Reg, SMLoc L) override;
};
@@ -47,6 +48,7 @@ struct FPOInstruction {
enum Operation {
PushReg,
StackAlloc,
+ StackAlign,
SetFrame,
} Op;
unsigned RegOrOffset;
@@ -90,6 +92,7 @@ public:
bool emitFPOData(const MCSymbol *ProcSym, SMLoc L) override;
bool emitFPOPushReg(unsigned Reg, SMLoc L) override;
bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) override;
+ bool emitFPOStackAlign(unsigned Align, SMLoc L) override;
bool emitFPOSetFrame(unsigned Reg, SMLoc L) override;
};
} // end namespace
@@ -133,6 +136,11 @@ bool X86WinCOFFAsmTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc,
return false;
}
+bool X86WinCOFFAsmTargetStreamer::emitFPOStackAlign(unsigned Align, SMLoc L) {
+ OS << "\t.cv_fpo_stackalign\t" << Align << '\n';
+ return false;
+}
+
bool X86WinCOFFAsmTargetStreamer::emitFPOSetFrame(unsigned Reg, SMLoc L) {
OS << "\t.cv_fpo_setframe\t";
InstPrinter.printRegName(OS, Reg);
@@ -226,6 +234,24 @@ bool X86WinCOFFTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) {
return false;
}
+bool X86WinCOFFTargetStreamer::emitFPOStackAlign(unsigned Align, SMLoc L) {
+ if (checkInFPOPrologue(L))
+ return true;
+ if (!llvm::any_of(CurFPOData->Instructions, [](const FPOInstruction &Inst) {
+ return Inst.Op == FPOInstruction::SetFrame;
+ })) {
+ getContext().reportError(
+ L, "a frame register must be established before aligning the stack");
+ return true;
+ }
+ FPOInstruction Inst;
+ Inst.Label = emitFPOLabel();
+ Inst.Op = FPOInstruction::StackAlign;
+ Inst.RegOrOffset = Align;
+ CurFPOData->Instructions.push_back(Inst);
+ return false;
+}
+
bool X86WinCOFFTargetStreamer::emitFPOEndPrologue(SMLoc L) {
if (checkInFPOPrologue(L))
return true;
@@ -250,6 +276,8 @@ struct FPOStateMachine {
unsigned CurOffset = 0;
unsigned LocalSize = 0;
unsigned SavedRegSize = 0;
+ unsigned StackOffsetBeforeAlign = 0;
+ unsigned StackAlign = 0;
unsigned Flags = 0; // FIXME: Set HasSEH / HasEH.
SmallString<128> FrameFunc;
@@ -291,24 +319,39 @@ void FPOStateMachine::emitFrameDataRecord(MCStreamer &OS, MCSymbol *Label) {
FrameFunc.clear();
raw_svector_ostream FuncOS(FrameFunc);
const MCRegisterInfo *MRI = OS.getContext().getRegisterInfo();
+ assert((StackAlign == 0 || FrameReg != 0) &&
+ "cannot align stack without frame reg");
+ StringRef CFAVar = StackAlign == 0 ? "$T0" : "$T1";
+
if (FrameReg) {
// CFA is FrameReg + FrameRegOff.
- FuncOS << "$T0 " << printFPOReg(MRI, FrameReg) << " " << FrameRegOff
+ FuncOS << CFAVar << ' ' << printFPOReg(MRI, FrameReg) << ' ' << FrameRegOff
<< " + = ";
+
+ // Assign $T0, the VFRAME register, the value of ESP after it is aligned.
+ // Starting from the CFA, we subtract the size of all pushed registers, and
+ // align the result. While we don't store any CSRs in this area, $T0 is used
+ // by S_DEFRANGE_FRAMEPOINTER_REL records to find local variables.
+ if (StackAlign) {
+ FuncOS << "$T0 " << CFAVar << ' ' << StackOffsetBeforeAlign << " - "
+ << StackAlign << " @ = ";
+ }
} else {
// The address of return address is ESP + CurOffset, but we use .raSearch to
// match MSVC. This seems to ask the debugger to subtract some combination
// of LocalSize and SavedRegSize from ESP and grovel around in that memory
// to find the address of a plausible return address.
- FuncOS << "$T0 .raSearch = ";
+ FuncOS << CFAVar << " .raSearch = ";
}
// Caller's $eip should be dereferenced CFA, and $esp should be CFA plus 4.
- FuncOS << "$eip $T0 ^ = $esp $T0 4 + = ";
+ FuncOS << "$eip " << CFAVar << " ^ = ";
+ FuncOS << "$esp " << CFAVar << " 4 + = ";
// Each saved register is stored at an unchanging negative CFA offset.
for (RegSaveOffset RO : RegSaveOffsets)
- FuncOS << printFPOReg(MRI, RO.Reg) << " $T0 " << RO.Offset << " - ^ = ";
+ FuncOS << printFPOReg(MRI, RO.Reg) << ' ' << CFAVar << ' ' << RO.Offset
+ << " - ^ = ";
// Add it to the CV string table.
CodeViewContext &CVCtx = OS.getContext().getCVContext();
@@ -380,6 +423,10 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) {
FSM.FrameReg = Inst.RegOrOffset;
FSM.FrameRegOff = FSM.CurOffset;
break;
+ case FPOInstruction::StackAlign:
+ FSM.StackOffsetBeforeAlign = FSM.CurOffset;
+ FSM.StackAlign = Inst.RegOrOffset;
+ break;
case FPOInstruction::StackAlloc:
FSM.CurOffset += Inst.RegOrOffset;
FSM.LocalSize += Inst.RegOrOffset;
diff --git a/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp b/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
index 9a39455f9dd5..ab2cebcb58ee 100644
--- a/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
+++ b/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
@@ -31,10 +31,6 @@
using namespace llvm;
-namespace llvm {
-void initializeShadowCallStackPass(PassRegistry &);
-}
-
namespace {
class ShadowCallStack : public MachineFunctionPass {
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index fe567f4cece8..bed940d0d0e9 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -304,12 +304,12 @@ void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
}
}
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0, e = RawMask.size(); i < e; ++i) {
uint64_t M = RawMask[i];
- if (M == (uint64_t)SM_SentinelUndef) {
- ShuffleMask.push_back(M);
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
// For 256/512-bit vectors the base of the shuffle is the 128-bit
@@ -336,7 +336,7 @@ void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
}
}
-void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
@@ -354,12 +354,12 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
// 6 - Most significant bit of source byte replicated in all bit positions.
// 7 - Invert most significant bit of source byte and replicate in all bit positions.
for (int i = 0, e = RawMask.size(); i < e; ++i) {
- uint64_t M = RawMask[i];
- if (M == (uint64_t)SM_SentinelUndef) {
- ShuffleMask.push_back(M);
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
+ uint64_t M = RawMask[i];
uint64_t PermuteOp = (M >> 5) & 0x7;
if (PermuteOp == 4) {
ShuffleMask.push_back(SM_SentinelZero);
@@ -490,7 +490,7 @@ void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
}
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
- ArrayRef<uint64_t> RawMask,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
unsigned VecSize = NumElts * ScalarBits;
unsigned NumLanes = VecSize / 128;
@@ -500,6 +500,10 @@ void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
uint64_t M = RawMask[i];
M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
@@ -508,7 +512,7 @@ void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
}
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
- ArrayRef<uint64_t> RawMask,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
unsigned VecSize = NumElts * ScalarBits;
unsigned NumLanes = VecSize / 128;
@@ -518,6 +522,11 @@ void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
assert((NumElts == RawMask.size()) && "Unexpected mask size");
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+
// VPERMIL2 Operation.
// Bits[3] - Match Bit.
// Bits[2:1] - (Per Lane) PD Shuffle Mask.
@@ -548,19 +557,29 @@ void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
}
}
-void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
uint64_t EltMaskSize = RawMask.size() - 1;
- for (auto M : RawMask) {
+ for (int i = 0, e = RawMask.size(); i != e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t M = RawMask[i];
M &= EltMaskSize;
ShuffleMask.push_back((int)M);
}
}
-void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
- for (auto M : RawMask) {
+ for (int i = 0, e = RawMask.size(); i != e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t M = RawMask[i];
M &= EltMaskSize;
ShuffleMask.push_back((int)M);
}
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 6d13bd58a127..85cde14a3241 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
#define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
//===----------------------------------------------------------------------===//
@@ -108,7 +109,7 @@ void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
/// Decode a PSHUFB mask from a raw array of constants such as from
/// BUILD_VECTOR.
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a BLEND immediate mask into a shuffle mask.
@@ -131,7 +132,7 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
/// BUILD_VECTOR.
/// This can only basic masks (permutes + zeros), not any of the other
/// operations that VPPERM can perform.
-void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a zero extension instruction as a shuffle mask.
@@ -156,20 +157,20 @@ void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
- ArrayRef<uint64_t> RawMask,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
- ArrayRef<uint64_t> RawMask,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
-void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
-void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 73bb0f2af285..1c8813815b86 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -75,6 +75,9 @@ FunctionPass *createX86OptimizeLEAs();
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
FunctionPass *createX86FixupSetCC();
+/// Return a pass that folds conditional branch jumps.
+FunctionPass *createX86CondBrFolding();
+
/// Return a pass that avoids creating store forward block issues in the hardware.
FunctionPass *createX86AvoidStoreForwardingBlocks();
@@ -112,8 +115,6 @@ FunctionPass *createX86FixupBWInsts();
/// to another, when profitable.
FunctionPass *createX86DomainReassignmentPass();
-void initializeFixupBWInstPassPass(PassRegistry &);
-
/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX
/// encoding when possible in order to reduce code size.
FunctionPass *createX86EvexToVexInsts();
@@ -121,14 +122,33 @@ FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
FunctionPass *createX86RetpolineThunksPass();
+/// This pass ensures instructions featuring a memory operand
+/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
+FunctionPass *createX86DiscriminateMemOpsPass();
+
+/// This pass applies profiling information to insert cache prefetches.
+FunctionPass *createX86InsertPrefetchPass();
+
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
-void initializeEvexToVexInstPassPass(PassRegistry &);
-
FunctionPass *createX86SpeculativeLoadHardeningPass();
+void initializeEvexToVexInstPassPass(PassRegistry &);
+void initializeFixupBWInstPassPass(PassRegistry &);
+void initializeFixupLEAPassPass(PassRegistry &);
+void initializeShadowCallStackPass(PassRegistry &);
+void initializeWinEHStatePassPass(PassRegistry &);
+void initializeX86AvoidSFBPassPass(PassRegistry &);
+void initializeX86CallFrameOptimizationPass(PassRegistry &);
+void initializeX86CmovConverterPassPass(PassRegistry &);
+void initializeX86CondBrFoldingPassPass(PassRegistry &);
+void initializeX86DomainReassignmentPass(PassRegistry &);
+void initializeX86ExecutionDomainFixPass(PassRegistry &);
+void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 63c2dc4da6cc..6b1749fc7500 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -59,10 +59,7 @@ def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
"Support xsaves instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
- "Enable SSE instructions",
- // SSE codegen depends on cmovs, and all
- // SSE1+ processors support them.
- [FeatureCMOV]>;
+ "Enable SSE instructions">;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
@@ -91,17 +88,19 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
[Feature3DNow]>;
// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
-// without disabling 64-bit mode.
+// without disabling 64-bit mode. Nothing should imply this feature bit. It
+// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
- "Support 64-bit instructions",
- [FeatureCMOV]>;
+ "Support 64-bit instructions">;
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
- "64-bit with cmpxchg16b",
- [Feature64Bit]>;
+ "64-bit with cmpxchg16b">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
"PMULLD instruction is slow">;
+def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
+ "true",
+ "PMADDWD is slower than PMULLD">;
// FIXME: This should not apply to CPUs that do not have SSE.
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
"IsUAMem16Slow", "true",
@@ -362,17 +361,30 @@ def FeaturePrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
-// Enable mitigation of some aspects of speculative execution related
-// vulnerabilities by removing speculatable indirect branches. This disables
-// jump-table formation, rewrites explicit `indirectbr` instructions into
-// `switch` instructions, and uses a special construct called a "retpoline" to
-// prevent speculation of the remaining indirect branches (indirect calls and
-// tail calls).
+// Lower indirect calls using a special construct called a `retpoline` to
+// mitigate potential Spectre v2 attacks against them.
+def FeatureRetpolineIndirectCalls
+ : SubtargetFeature<
+ "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
+ "Remove speculation of indirect calls from the generated code.">;
+
+// Lower indirect branches and switches either using conditional branch trees
+// or using a special construct called a `retpoline` to mitigate potential
+// Spectre v2 attacks against them.
+def FeatureRetpolineIndirectBranches
+ : SubtargetFeature<
+ "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
+ "Remove speculation of indirect branches from the generated code.">;
+
+// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
+// `retpoline-indirect-branches` above.
def FeatureRetpoline
- : SubtargetFeature<"retpoline", "UseRetpoline", "true",
+ : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
"Remove speculation of indirect branches from the "
"generated code, either by avoiding them entirely or "
- "lowering them with a speculation blocking construct.">;
+ "lowering them with a speculation blocking construct.",
+ [FeatureRetpolineIndirectCalls,
+ FeatureRetpolineIndirectBranches]>;
// Rely on external thunks for the emitted retpoline calls. This allows users
// to provide their own custom thunk definitions in highly specialized
@@ -380,8 +392,10 @@ def FeatureRetpoline
def FeatureRetpolineExternalThunk
: SubtargetFeature<
"retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
- "Enable retpoline, but with an externally provided thunk.",
- [FeatureRetpoline]>;
+ "When lowering an indirect call or branch using a `retpoline`, rely "
+ "on the specified user provided thunk rather than emitting one "
+ "ourselves. Only has effect when combined with some other retpoline "
+ "feature.", [FeatureRetpolineIndirectCalls]>;
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
@@ -389,6 +403,25 @@ def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
"Support movdir64b instruction">;
+def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
+ "Indicates that the BEXTR instruction is implemented as a single uop "
+ "with good throughput.">;
+
+// Combine vector math operations with shuffles into horizontal math
+// instructions if a CPU implements horizontal operations (introduced with
+// SSE3) with better latency/throughput than the alternative sequence.
+def FeatureFastHorizontalOps
+ : SubtargetFeature<
+ "fast-hops", "HasFastHorizontalOps", "true",
+ "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
+ "normal vector instructions with shuffles", [FeatureSSE3]>;
+
+// Merge branches using three-way conditional code.
+def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
+ "ThreewayBranchProfitable", "true",
+ "Merge branches to a three-way "
+ "conditional branch">;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -416,6 +449,7 @@ include "X86SchedHaswell.td"
include "X86SchedBroadwell.td"
include "X86ScheduleSLM.td"
include "X86ScheduleZnver1.td"
+include "X86ScheduleBdVer2.td"
include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
include "X86SchedSkylakeServer.td"
@@ -430,22 +464,6 @@ def ProcIntelGLP : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
"Intel Goldmont Plus processors">;
def ProcIntelTRM : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
"Intel Tremont processors">;
-def ProcIntelHSW : SubtargetFeature<"haswell", "X86ProcFamily",
- "IntelHaswell", "Intel Haswell processors">;
-def ProcIntelBDW : SubtargetFeature<"broadwell", "X86ProcFamily",
- "IntelBroadwell", "Intel Broadwell processors">;
-def ProcIntelSKL : SubtargetFeature<"skylake", "X86ProcFamily",
- "IntelSkylake", "Intel Skylake processors">;
-def ProcIntelKNL : SubtargetFeature<"knl", "X86ProcFamily",
- "IntelKNL", "Intel Knights Landing processors">;
-def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily",
- "IntelSKX", "Intel Skylake Server processors">;
-def ProcIntelCNL : SubtargetFeature<"cannonlake", "X86ProcFamily",
- "IntelCannonlake", "Intel Cannonlake processors">;
-def ProcIntelICL : SubtargetFeature<"icelake-client", "X86ProcFamily",
- "IntelIcelakeClient", "Intel Icelake processors">;
-def ProcIntelICX : SubtargetFeature<"icelake-server", "X86ProcFamily",
- "IntelIcelakeServer", "Intel Icelake Server processors">;
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
@@ -466,7 +484,7 @@ def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
foreach P = ["pentium3", "pentium3m"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -481,12 +499,12 @@ foreach P = ["pentium3", "pentium3m"] in {
def : ProcessorModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+ FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcessorModel<P, GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+ FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
}
// Intel Quark.
@@ -495,19 +513,21 @@ def : Proc<"lakemont", []>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
// NetBurst.
def : ProcessorModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE3,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B
]>;
@@ -515,10 +535,12 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [
def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureLAHFSAHF,
FeatureMacroFusion
@@ -526,10 +548,12 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE41,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureLAHFSAHF,
FeatureMacroFusion
@@ -540,10 +564,12 @@ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
ProcIntelAtom,
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureMOVBE,
FeatureLEAForSP,
@@ -560,15 +586,16 @@ def : BonnellProc<"atom">; // Pin the generic name to the baseline.
class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
ProcIntelSLM,
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureMOVBE,
FeaturePOPCNT,
FeaturePCLMUL,
- FeatureAES,
FeatureSlowDivide64,
FeatureSlowTwoMemOps,
FeaturePRFCHW,
@@ -594,10 +621,12 @@ class ProcModel<string Name, SchedMachineModel Model,
def GLMFeatures : ProcessorFeatures<[], [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureMOVBE,
FeaturePOPCNT,
@@ -653,10 +682,12 @@ def : TremontProc<"tremont">;
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePOPCNT,
FeatureLAHFSAHF,
@@ -669,13 +700,14 @@ def : NehalemProc<"corei7">;
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePOPCNT,
- FeatureAES,
FeaturePCLMUL,
FeatureLAHFSAHF,
FeatureMacroFusion
@@ -686,13 +718,14 @@ def : WestmereProc<"westmere">;
// rather than a superset.
def SNBFeatures : ProcessorFeatures<[], [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePOPCNT,
- FeatureAES,
FeatureSlowDivide64,
FeaturePCLMUL,
FeatureXSAVE,
@@ -702,6 +735,7 @@ def SNBFeatures : ProcessorFeatures<[], [
FeatureFastScalarFSQRT,
FeatureFastSHLDRotate,
FeatureSlowIncDec,
+ FeatureMergeToThreeWayBranch,
FeatureMacroFusion
]>;
@@ -741,7 +775,6 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
HSWFeatures.Value, [
- ProcIntelHSW,
FeaturePOPCNTFalseDeps,
FeatureLZCNTFalseDeps
]>;
@@ -755,15 +788,14 @@ def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
]>;
class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
BDWFeatures.Value, [
- ProcIntelBDW,
FeaturePOPCNTFalseDeps,
FeatureLZCNTFalseDeps
]>;
def : BroadwellProc<"broadwell">;
def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
+ FeatureAES,
FeatureMPX,
- FeatureRTM,
FeatureXSAVEC,
FeatureXSAVES,
FeatureCLFLUSHOPT,
@@ -772,14 +804,32 @@ def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
SKLFeatures.Value, [
- ProcIntelSKL,
FeatureHasFastGather,
FeaturePOPCNTFalseDeps,
FeatureSGX
]>;
def : SkylakeClientProc<"skylake">;
-def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
+def KNLFeatures : ProcessorFeatures<[], [
+ FeatureX87,
+ FeatureCMOV,
+ FeatureMMX,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeaturePOPCNT,
+ FeatureSlowDivide64,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureLAHFSAHF,
+ FeatureSlow3OpsLEA,
+ FeatureSlowIncDec,
+ FeatureAES,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
FeatureAVX512,
FeatureERI,
FeatureCDI,
@@ -798,19 +848,19 @@ def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
// FIXME: define KNL model
class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
KNLFeatures.Value, [
- ProcIntelKNL,
FeatureSlowTwoMemOps,
FeatureFastPartialYMMorZMMWrite,
- FeatureHasFastGather
+ FeatureHasFastGather,
+ FeatureSlowPMADDWD
]>;
def : KnightsLandingProc<"knl">;
class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
KNLFeatures.Value, [
- ProcIntelKNL,
FeatureSlowTwoMemOps,
FeatureFastPartialYMMorZMMWrite,
FeatureHasFastGather,
+ FeatureSlowPMADDWD,
FeatureVPOPCNTDQ
]>;
def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
@@ -827,13 +877,23 @@ def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
SKXFeatures.Value, [
- ProcIntelSKX,
FeatureHasFastGather,
FeaturePOPCNTFalseDeps
]>;
def : SkylakeServerProc<"skylake-avx512">;
def : SkylakeServerProc<"skx">; // Legacy alias.
+def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
+ FeatureVNNI
+]>;
+
+class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
+ CLXFeatures.Value, [
+ FeatureHasFastGather,
+ FeaturePOPCNTFalseDeps
+]>;
+def : CascadelakeProc<"cascadelake">;
+
def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
FeatureAVX512,
FeatureCDI,
@@ -849,7 +909,6 @@ def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
CNLFeatures.Value, [
- ProcIntelCNL,
FeatureHasFastGather
]>;
def : CannonlakeProc<"cannonlake">;
@@ -868,14 +927,12 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
ICLFeatures.Value, [
- ProcIntelICL,
FeatureHasFastGather
]>;
def : IcelakeClientProc<"icelake-client">;
class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
ICLFeatures.Value, [
- ProcIntelICX,
FeaturePCONFIG,
FeatureWBNOINVD,
FeatureHasFastGather
@@ -889,39 +946,43 @@ def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
foreach P = ["athlon", "athlon-tbird"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
FeatureNOPL, FeatureSlowSHLD]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD]>;
+ FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
+ FeatureCMOV]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
+ FeatureCMOV, Feature64Bit]>;
}
foreach P = ["amdfam10", "barcelona"] in {
def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD, FeatureLAHFSAHF]>;
+ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
}
// Bobcat
def : Proc<"btver1", [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureSSE4A,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePRFCHW,
FeatureLZCNT,
@@ -934,11 +995,13 @@ def : Proc<"btver1", [
// Jaguar
def : ProcessorModel<"btver2", BtVer2Model, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX,
FeatureFXSR,
FeatureNOPL,
FeatureSSE4A,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePRFCHW,
FeatureAES,
@@ -954,14 +1017,18 @@ def : ProcessorModel<"btver2", BtVer2Model, [
FeatureSlowSHLD,
FeatureLAHFSAHF,
FeatureFast15ByteNOP,
- FeatureFastPartialYMMorZMMWrite
+ FeatureFastBEXTR,
+ FeatureFastPartialYMMorZMMWrite,
+ FeatureFastHorizontalOps
]>;
// Bulldozer
-def : Proc<"bdver1", [
+def : ProcessorModel<"bdver1", BdVer2Model, [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
@@ -981,10 +1048,12 @@ def : Proc<"bdver1", [
FeatureMacroFusion
]>;
// Piledriver
-def : Proc<"bdver2", [
+def : ProcessorModel<"bdver2", BdVer2Model, [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
@@ -1005,14 +1074,17 @@ def : Proc<"bdver2", [
FeatureSlowSHLD,
FeatureLAHFSAHF,
FeatureFast11ByteNOP,
+ FeatureFastBEXTR,
FeatureMacroFusion
]>;
// Steamroller
def : Proc<"bdver3", [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
@@ -1035,18 +1107,21 @@ def : Proc<"bdver3", [
FeatureFSGSBase,
FeatureLAHFSAHF,
FeatureFast11ByteNOP,
+ FeatureFastBEXTR,
FeatureMacroFusion
]>;
// Excavator
def : Proc<"bdver4", [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX2,
FeatureFXSR,
FeatureNOPL,
FeatureXOP,
FeatureFMA4,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
@@ -1064,6 +1139,7 @@ def : Proc<"bdver4", [
FeatureSlowSHLD,
FeatureFSGSBase,
FeatureLAHFSAHF,
+ FeatureFastBEXTR,
FeatureFast11ByteNOP,
FeatureMWAITX,
FeatureMacroFusion
@@ -1078,6 +1154,8 @@ def: ProcessorModel<"znver1", Znver1Model, [
FeatureBMI2,
FeatureCLFLUSHOPT,
FeatureCLZERO,
+ FeatureCMOV,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureF16C,
FeatureFMA,
@@ -1087,6 +1165,7 @@ def: ProcessorModel<"znver1", Znver1Model, [
FeatureFastLZCNT,
FeatureLAHFSAHF,
FeatureLZCNT,
+ FeatureFastBEXTR,
FeatureFast15ByteNOP,
FeatureMacroFusion,
FeatureMMX,
@@ -1112,7 +1191,7 @@ def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE1, FeatureFXSR]>;
+ FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1126,6 +1205,7 @@ def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
// forming a common base for them.
def : ProcessorModel<"x86-64", SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE2,
FeatureFXSR,
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 7d8f7b9dfe46..36cef98a1ef5 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -88,19 +88,19 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
void X86AsmPrinter::EmitFunctionBodyStart() {
if (EmitFPOData) {
- X86TargetStreamer *XTS =
- static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
- unsigned ParamsSize =
- MF->getInfo<X86MachineFunctionInfo>()->getArgumentStackSize();
- XTS->emitFPOProc(CurrentFnSym, ParamsSize);
+ if (auto *XTS =
+ static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()))
+ XTS->emitFPOProc(
+ CurrentFnSym,
+ MF->getInfo<X86MachineFunctionInfo>()->getArgumentStackSize());
}
}
void X86AsmPrinter::EmitFunctionBodyEnd() {
if (EmitFPOData) {
- X86TargetStreamer *XTS =
- static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
- XTS->emitFPOEndProc();
+ if (auto *XTS =
+ static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()))
+ XTS->emitFPOEndProc();
}
}
@@ -129,6 +129,9 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
GVSym =
P.OutContext.getOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+ else if (MO.getTargetFlags() == X86II::MO_COFFSTUB)
+ GVSym =
+ P.OutContext.getOrCreateSymbol(Twine(".refptr.") + GVSym->getName());
if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
@@ -161,6 +164,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
break;
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DLLIMPORT:
+ case X86II::MO_COFFSTUB:
// These affect the name of the symbol, not any suffix.
break;
case X86II::MO_GOT_ABSOLUTE_ADDRESS:
@@ -568,9 +572,9 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
// Emitting an Elf_Prop for the CET properties.
OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_X86_FEATURE_1_AND, 4);
- OutStreamer->EmitIntValue(WordSize, 4); // data size
- OutStreamer->EmitIntValue(FeatureFlagsAnd, WordSize); // data
- EmitAlignment(WordSize == 4 ? 2 : 3); // padding
+ OutStreamer->EmitIntValue(4, 4); // data size
+ OutStreamer->EmitIntValue(FeatureFlagsAnd, 4); // data
+ EmitAlignment(WordSize == 4 ? 2 : 3); // padding
OutStreamer->endSection(Nt);
OutStreamer->SwitchSection(Cur);
@@ -583,21 +587,28 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
if (TT.isOSBinFormatCOFF()) {
// Emit an absolute @feat.00 symbol. This appears to be some kind of
// compiler features bitfield read by link.exe.
+ MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
+ OutStreamer->BeginCOFFSymbolDef(S);
+ OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
+ OutStreamer->EndCOFFSymbolDef();
+ int64_t Feat00Flags = 0;
+
if (TT.getArch() == Triple::x86) {
- MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
- OutStreamer->BeginCOFFSymbolDef(S);
- OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
- OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
- OutStreamer->EndCOFFSymbolDef();
// According to the PE-COFF spec, the LSB of this value marks the object
// for "registered SEH". This means that all SEH handler entry points
// must be registered in .sxdata. Use of any unregistered handlers will
// cause the process to terminate immediately. LLVM does not know how to
// register any SEH handlers, so its object files should be safe.
- OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
- OutStreamer->EmitAssignment(
- S, MCConstantExpr::create(int64_t(1), MMI->getContext()));
+ Feat00Flags |= 1;
}
+
+ if (M.getModuleFlag("cfguardtable"))
+ Feat00Flags |= 0x800; // Object is CFG-aware.
+
+ OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
+ OutStreamer->EmitAssignment(
+ S, MCConstantExpr::create(Feat00Flags, MMI->getContext()));
}
OutStreamer->EmitSyntaxDirective();
@@ -663,7 +674,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
emitNonLazyStubs(MMI, *OutStreamer);
// Emit stack and fault map information.
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
FM.serializeToFaultMapSection();
// This flag tells the linker that no global symbols contain code that fall
@@ -684,12 +695,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
if (TT.isOSBinFormatCOFF()) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
return;
}
if (TT.isOSBinFormatELF()) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
FM.serializeToFaultMapSection();
return;
}
diff --git a/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index ab2cbfc33e17..627a6cb14514 100644
--- a/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -52,10 +52,6 @@ using namespace llvm;
#define DEBUG_TYPE "x86-avoid-SFB"
-namespace llvm {
-void initializeX86AvoidSFBPassPass(PassRegistry &);
-} // end namespace llvm
-
static cl::opt<bool> DisableX86AvoidStoreForwardBlocks(
"x86-disable-avoid-SFB", cl::Hidden,
cl::desc("X86: Disable Store Forwarding Blocks fixup."), cl::init(false));
@@ -590,7 +586,7 @@ void X86AvoidSFBPass::breakBlockedCopies(
StDisp2 += OverlapDelta;
Size2 -= OverlapDelta;
}
- Size1 = std::abs(std::abs(LdDisp2) - std::abs(LdDisp1));
+ Size1 = LdDisp2 - LdDisp1;
// Build a copy for the point until the current blocking store's
// displacement.
@@ -645,21 +641,22 @@ removeRedundantBlockingStores(DisplacementSizeMap &BlockingStoresDispSizeMap) {
if (BlockingStoresDispSizeMap.size() <= 1)
return;
- int64_t PrevDisp = BlockingStoresDispSizeMap.begin()->first;
- unsigned PrevSize = BlockingStoresDispSizeMap.begin()->second;
- SmallVector<int64_t, 2> ForRemoval;
- for (auto DispSizePair = std::next(BlockingStoresDispSizeMap.begin());
- DispSizePair != BlockingStoresDispSizeMap.end(); ++DispSizePair) {
- int64_t CurrDisp = DispSizePair->first;
- unsigned CurrSize = DispSizePair->second;
- if (CurrDisp + CurrSize <= PrevDisp + PrevSize) {
- ForRemoval.push_back(PrevDisp);
+ SmallVector<std::pair<int64_t, unsigned>, 0> DispSizeStack;
+ for (auto DispSizePair : BlockingStoresDispSizeMap) {
+ int64_t CurrDisp = DispSizePair.first;
+ unsigned CurrSize = DispSizePair.second;
+ while (DispSizeStack.size()) {
+ int64_t PrevDisp = DispSizeStack.back().first;
+ unsigned PrevSize = DispSizeStack.back().second;
+ if (CurrDisp + CurrSize > PrevDisp + PrevSize)
+ break;
+ DispSizeStack.pop_back();
}
- PrevDisp = CurrDisp;
- PrevSize = CurrSize;
+ DispSizeStack.push_back(DispSizePair);
}
- for (auto Disp : ForRemoval)
- BlockingStoresDispSizeMap.erase(Disp);
+ BlockingStoresDispSizeMap.clear();
+ for (auto Disp : DispSizeStack)
+ BlockingStoresDispSizeMap.insert(Disp);
}
bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index dea95f56f4d5..903d24c9984a 100644
--- a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -56,10 +56,6 @@ static cl::opt<bool>
cl::desc("Avoid optimizing x86 call frames for size"),
cl::init(false), cl::Hidden);
-namespace llvm {
-void initializeX86CallFrameOptimizationPass(PassRegistry &);
-}
-
namespace {
class X86CallFrameOptimization : public MachineFunctionPass {
diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.cpp b/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
index 96ea64dc8c48..1dc83b76595d 100644
--- a/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -65,10 +65,8 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
- if (SplitVTs.size() != 1) {
- // TODO: support struct/array split
- return false;
- }
+ if (OrigArg.Ty->isVoidTy())
+ return true;
EVT VT = SplitVTs[0];
unsigned NumParts = TLI.getNumRegisters(Context, VT);
@@ -185,27 +183,36 @@ protected:
} // end anonymous namespace
-bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
- assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
-
+bool X86CallLowering::lowerReturn(
+ MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
+ assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
+ "Return value without a vreg");
auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
- if (VReg) {
+ if (!VRegs.empty()) {
MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto &DL = MF.getDataLayout();
- const Function &F = MF.getFunction();
+ LLVMContext &Ctx = Val->getType()->getContext();
+ const X86TargetLowering &TLI = *getTLI<X86TargetLowering>();
- ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+ SmallVector<EVT, 4> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
SmallVector<ArgInfo, 8> SplitArgs;
- if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs) {
- MIRBuilder.buildUnmerge(Regs, VReg);
- }))
- return false;
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+ if (!splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI,
+ [&](ArrayRef<unsigned> Regs) {
+ MIRBuilder.buildUnmerge(Regs, VRegs[i]);
+ }))
+ return false;
+ }
OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.h b/contrib/llvm/lib/Target/X86/X86CallLowering.h
index 6c9dc1565dad..f5f8f9a3ef6d 100644
--- a/contrib/llvm/lib/Target/X86/X86CallLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86CallLowering.h
@@ -29,8 +29,8 @@ class X86CallLowering : public CallLowering {
public:
X86CallLowering(const X86TargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index fcc9a296de93..fe49c9ffbd95 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -590,9 +590,11 @@ def CC_X86_64_HHVM_C : CallingConv<[
// Calling convention used on Win64
def CC_X86_Win64_C : CallingConv<[
- // FIXME: Handle byval stuff.
// FIXME: Handle varargs.
+ // Byval aggregates are passed by pointer
+ CCIfByVal<CCPassIndirect<i64>>,
+
// Promote i1/v1i1 arguments to i8.
CCIfType<[i1, v1i1], CCPromoteToType<i8>>,
diff --git a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
index 1c5f110d8c60..c3e76fd2a856 100644
--- a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -81,12 +81,6 @@ STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
-namespace llvm {
-
-void initializeX86CmovConverterPassPass(PassRegistry &);
-
-} // end namespace llvm
-
// This internal switch can be used to turn off the cmov/branch optimization.
static cl::opt<bool>
EnableCmovConverter("x86-cmov-converter",
diff --git a/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp b/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp
new file mode 100644
index 000000000000..7ce443c4656a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp
@@ -0,0 +1,585 @@
+//===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines a pass that optimizes condition branches on x86 by taking
+// advantage of the three-way conditional code generated by compare
+// instructions.
+// Currently, it tries to hoisting EQ and NE conditional branch to a dominant
+// conditional branch condition where the same EQ/NE conditional code is
+// computed. An example:
+// bb_0:
+// cmp %0, 19
+// jg bb_1
+// jmp bb_2
+// bb_1:
+// cmp %0, 40
+// jg bb_3
+// jmp bb_4
+// bb_4:
+// cmp %0, 20
+// je bb_5
+// jmp bb_6
+// Here we could combine the two compares in bb_0 and bb_4 and have the
+// following code:
+// bb_0:
+// cmp %0, 20
+// jg bb_1
+// jl bb_2
+// jmp bb_5
+// bb_1:
+// cmp %0, 40
+// jg bb_3
+// jmp bb_6
+// For the case of %0 == 20 (bb_5), we eliminate two jumps, and the control
+// height for bb_6 is also reduced. bb_4 is gone after the optimization.
+//
+// There are plenty of this code patterns, especially from the switch case
+// lowing where we generate compare of "pivot-1" for the inner nodes in the
+// binary search tree.
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-condbr-folding"
+
+STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
+
+namespace {
+class X86CondBrFoldingPass : public MachineFunctionPass {
+public:
+ X86CondBrFoldingPass() : MachineFunctionPass(ID) {
+ initializeX86CondBrFoldingPassPass(*PassRegistry::getPassRegistry());
+ }
+ StringRef getPassName() const override { return "X86 CondBr Folding"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ }
+
+public:
+ static char ID;
+};
+} // namespace
+
+char X86CondBrFoldingPass::ID = 0;
+INITIALIZE_PASS(X86CondBrFoldingPass, "X86CondBrFolding", "X86CondBrFolding", false, false)
+
+FunctionPass *llvm::createX86CondBrFolding() {
+ return new X86CondBrFoldingPass();
+}
+
+namespace {
+// A class the stores the auxiliary information for each MBB.
+struct TargetMBBInfo {
+ MachineBasicBlock *TBB;
+ MachineBasicBlock *FBB;
+ MachineInstr *BrInstr;
+ MachineInstr *CmpInstr;
+ X86::CondCode BranchCode;
+ unsigned SrcReg;
+ int CmpValue;
+ bool Modified;
+ bool CmpBrOnly;
+};
+
+// A class that optimizes the conditional branch by hoisting and merge CondCode.
+class X86CondBrFolding {
+public:
+ X86CondBrFolding(const X86InstrInfo *TII,
+ const MachineBranchProbabilityInfo *MBPI,
+ MachineFunction &MF)
+ : TII(TII), MBPI(MBPI), MF(MF) {}
+ bool optimize();
+
+private:
+ const X86InstrInfo *TII;
+ const MachineBranchProbabilityInfo *MBPI;
+ MachineFunction &MF;
+ std::vector<std::unique_ptr<TargetMBBInfo>> MBBInfos;
+ SmallVector<MachineBasicBlock *, 4> RemoveList;
+
+ void optimizeCondBr(MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+ void fixBranchProb(MachineBasicBlock *NextMBB, MachineBasicBlock *RootMBB,
+ SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+ void replaceBrDest(MachineBasicBlock *MBB, MachineBasicBlock *OrigDest,
+ MachineBasicBlock *NewDest);
+ void fixupModifiedCond(MachineBasicBlock *MBB);
+ std::unique_ptr<TargetMBBInfo> analyzeMBB(MachineBasicBlock &MBB);
+ static bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+ int &CmpValue);
+ bool findPath(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+ TargetMBBInfo *getMBBInfo(MachineBasicBlock *MBB) const {
+ return MBBInfos[MBB->getNumber()].get();
+ }
+};
+} // namespace
+
+// Find a valid path that we can reuse the CondCode.
+// The resulted path (if return true) is stored in BranchPath.
+// Return value:
+// false: is no valid path is found.
+// true: a valid path is found and the targetBB can be reached.
+bool X86CondBrFolding::findPath(
+ MachineBasicBlock *MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
+ TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+ assert(MBBInfo && "Expecting a candidate MBB");
+ int CmpValue = MBBInfo->CmpValue;
+
+ MachineBasicBlock *PredMBB = *MBB->pred_begin();
+ MachineBasicBlock *SaveMBB = MBB;
+ while (PredMBB) {
+ TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
+ if (!PredMBBInfo || PredMBBInfo->SrcReg != MBBInfo->SrcReg)
+ return false;
+
+ assert(SaveMBB == PredMBBInfo->TBB || SaveMBB == PredMBBInfo->FBB);
+ bool IsFalseBranch = (SaveMBB == PredMBBInfo->FBB);
+
+ X86::CondCode CC = PredMBBInfo->BranchCode;
+ assert(CC == X86::COND_L || CC == X86::COND_G || CC == X86::COND_E);
+ int PredCmpValue = PredMBBInfo->CmpValue;
+ bool ValueCmpTrue = ((CmpValue < PredCmpValue && CC == X86::COND_L) ||
+ (CmpValue > PredCmpValue && CC == X86::COND_G) ||
+ (CmpValue == PredCmpValue && CC == X86::COND_E));
+ // Check if both the result of value compare and the branch target match.
+ if (!(ValueCmpTrue ^ IsFalseBranch)) {
+ LLVM_DEBUG(dbgs() << "Dead BB detected!\n");
+ return false;
+ }
+
+ BranchPath.push_back(PredMBB);
+ // These are the conditions on which we could combine the compares.
+ if ((CmpValue == PredCmpValue) ||
+ (CmpValue == PredCmpValue - 1 && CC == X86::COND_L) ||
+ (CmpValue == PredCmpValue + 1 && CC == X86::COND_G))
+ return true;
+
+ // If PredMBB has more than on preds, or not a pure cmp and br, we bailout.
+ if (PredMBB->pred_size() != 1 || !PredMBBInfo->CmpBrOnly)
+ return false;
+
+ SaveMBB = PredMBB;
+ PredMBB = *PredMBB->pred_begin();
+ }
+ return false;
+}
+
+// Fix up any PHI node in the successor of MBB.
+static void fixPHIsInSucc(MachineBasicBlock *MBB, MachineBasicBlock *OldMBB,
+ MachineBasicBlock *NewMBB) {
+ if (NewMBB == OldMBB)
+ return;
+ for (auto MI = MBB->instr_begin(), ME = MBB->instr_end();
+ MI != ME && MI->isPHI(); ++MI)
+ for (unsigned i = 2, e = MI->getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.getMBB() == OldMBB)
+ MO.setMBB(NewMBB);
+ }
+}
+
+// Utility function to set branch probability for edge MBB->SuccMBB.
+static inline bool setBranchProb(MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccMBB,
+ BranchProbability Prob) {
+ auto MBBI = std::find(MBB->succ_begin(), MBB->succ_end(), SuccMBB);
+ if (MBBI == MBB->succ_end())
+ return false;
+ MBB->setSuccProbability(MBBI, Prob);
+ return true;
+}
+
+// Utility function to find the unconditional br instruction in MBB.
+static inline MachineBasicBlock::iterator
+findUncondBrI(MachineBasicBlock *MBB) {
+ return std::find_if(MBB->begin(), MBB->end(), [](MachineInstr &MI) -> bool {
+ return MI.getOpcode() == X86::JMP_1;
+ });
+}
+
+// Replace MBB's original successor, OrigDest, with NewDest.
+// Also update the MBBInfo for MBB.
+void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
+ MachineBasicBlock *OrigDest,
+ MachineBasicBlock *NewDest) {
+ TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+ MachineInstr *BrMI;
+ if (MBBInfo->TBB == OrigDest) {
+ BrMI = MBBInfo->BrInstr;
+ unsigned JNCC = GetCondBranchFromCond(MBBInfo->BranchCode);
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(JNCC))
+ .addMBB(NewDest);
+ MBBInfo->TBB = NewDest;
+ MBBInfo->BrInstr = MIB.getInstr();
+ } else { // Should be the unconditional jump stmt.
+ MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
+ BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
+ .addMBB(NewDest);
+ MBBInfo->FBB = NewDest;
+ BrMI = &*UncondBrI;
+ }
+ fixPHIsInSucc(NewDest, OrigDest, MBB);
+ BrMI->eraseFromParent();
+ MBB->addSuccessor(NewDest);
+ setBranchProb(MBB, NewDest, MBPI->getEdgeProbability(MBB, OrigDest));
+ MBB->removeSuccessor(OrigDest);
+}
+
+// Change the CondCode and BrInstr according to MBBInfo.
+void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
+ TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+ if (!MBBInfo->Modified)
+ return;
+
+ MachineInstr *BrMI = MBBInfo->BrInstr;
+ X86::CondCode CC = MBBInfo->BranchCode;
+ MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
+ TII->get(GetCondBranchFromCond(CC)))
+ .addMBB(MBBInfo->TBB);
+ BrMI->eraseFromParent();
+ MBBInfo->BrInstr = MIB.getInstr();
+
+ MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
+ BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
+ .addMBB(MBBInfo->FBB);
+ MBB->erase(UncondBrI);
+ MBBInfo->Modified = false;
+}
+
+//
+// Apply the transformation:
+// RootMBB -1-> ... PredMBB -3-> MBB -5-> TargetMBB
+// \-2-> \-4-> \-6-> FalseMBB
+// ==>
+// RootMBB -1-> ... PredMBB -7-> FalseMBB
+// TargetMBB <-8-/ \-2-> \-4->
+//
+// Note that PredMBB and RootMBB could be the same.
+// And in the case of dead TargetMBB, we will not have TargetMBB and edge 8.
+//
+// There are some special handling where the RootMBB is COND_E in which case
+// we directly short-cycle the brinstr.
+//
+void X86CondBrFolding::optimizeCondBr(
+ MachineBasicBlock &MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
+
+ X86::CondCode CC;
+ TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
+ assert(MBBInfo && "Expecting a candidate MBB");
+ MachineBasicBlock *TargetMBB = MBBInfo->TBB;
+ BranchProbability TargetProb = MBPI->getEdgeProbability(&MBB, MBBInfo->TBB);
+
+ // Forward the jump from MBB's predecessor to MBB's false target.
+ MachineBasicBlock *PredMBB = BranchPath.front();
+ TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
+ assert(PredMBBInfo && "Expecting a candidate MBB");
+ if (PredMBBInfo->Modified)
+ fixupModifiedCond(PredMBB);
+ CC = PredMBBInfo->BranchCode;
+ // Don't do this if depth of BranchPath is 1 and PredMBB is of COND_E.
+ // We will short-cycle directly for this case.
+ if (!(CC == X86::COND_E && BranchPath.size() == 1))
+ replaceBrDest(PredMBB, &MBB, MBBInfo->FBB);
+
+ MachineBasicBlock *RootMBB = BranchPath.back();
+ TargetMBBInfo *RootMBBInfo = getMBBInfo(RootMBB);
+ assert(RootMBBInfo && "Expecting a candidate MBB");
+ if (RootMBBInfo->Modified)
+ fixupModifiedCond(RootMBB);
+ CC = RootMBBInfo->BranchCode;
+
+ if (CC != X86::COND_E) {
+ MachineBasicBlock::iterator UncondBrI = findUncondBrI(RootMBB);
+ // RootMBB: Cond jump to the original not-taken MBB.
+ X86::CondCode NewCC;
+ switch (CC) {
+ case X86::COND_L:
+ NewCC = X86::COND_G;
+ break;
+ case X86::COND_G:
+ NewCC = X86::COND_L;
+ break;
+ default:
+ llvm_unreachable("unexpected condtional code.");
+ }
+ BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
+ TII->get(GetCondBranchFromCond(NewCC)))
+ .addMBB(RootMBBInfo->FBB);
+
+ // RootMBB: Jump to TargetMBB
+ BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
+ TII->get(X86::JMP_1))
+ .addMBB(TargetMBB);
+ RootMBB->addSuccessor(TargetMBB);
+ fixPHIsInSucc(TargetMBB, &MBB, RootMBB);
+ RootMBB->erase(UncondBrI);
+ } else {
+ replaceBrDest(RootMBB, RootMBBInfo->TBB, TargetMBB);
+ }
+
+ // Fix RootMBB's CmpValue to MBB's CmpValue to TargetMBB. Don't set Imm
+ // directly. Move MBB's stmt to here as the opcode might be different.
+ if (RootMBBInfo->CmpValue != MBBInfo->CmpValue) {
+ MachineInstr *NewCmp = MBBInfo->CmpInstr;
+ NewCmp->removeFromParent();
+ RootMBB->insert(RootMBBInfo->CmpInstr, NewCmp);
+ RootMBBInfo->CmpInstr->eraseFromParent();
+ }
+
+ // Fix branch Probabilities.
+ auto fixBranchProb = [&](MachineBasicBlock *NextMBB) {
+ BranchProbability Prob;
+ for (auto &I : BranchPath) {
+ MachineBasicBlock *ThisMBB = I;
+ if (!ThisMBB->hasSuccessorProbabilities() ||
+ !ThisMBB->isSuccessor(NextMBB))
+ break;
+ Prob = MBPI->getEdgeProbability(ThisMBB, NextMBB);
+ if (Prob.isUnknown())
+ break;
+ TargetProb = Prob * TargetProb;
+ Prob = Prob - TargetProb;
+ setBranchProb(ThisMBB, NextMBB, Prob);
+ if (ThisMBB == RootMBB) {
+ setBranchProb(ThisMBB, TargetMBB, TargetProb);
+ }
+ ThisMBB->normalizeSuccProbs();
+ if (ThisMBB == RootMBB)
+ break;
+ NextMBB = ThisMBB;
+ }
+ return true;
+ };
+ if (CC != X86::COND_E && !TargetProb.isUnknown())
+ fixBranchProb(MBBInfo->FBB);
+
+ if (CC != X86::COND_E)
+ RemoveList.push_back(&MBB);
+
+ // Invalidate MBBInfo just in case.
+ MBBInfos[MBB.getNumber()] = nullptr;
+ MBBInfos[RootMBB->getNumber()] = nullptr;
+
+ LLVM_DEBUG(dbgs() << "After optimization:\nRootMBB is: " << *RootMBB << "\n");
+ if (BranchPath.size() > 1)
+ LLVM_DEBUG(dbgs() << "PredMBB is: " << *(BranchPath[0]) << "\n");
+}
+
+// Driver function for optimization: find the valid candidate and apply
+// the transformation.
+bool X86CondBrFolding::optimize() {
+ bool Changed = false;
+ LLVM_DEBUG(dbgs() << "***** X86CondBr Folding on Function: " << MF.getName()
+ << " *****\n");
+ // Setup data structures.
+ MBBInfos.resize(MF.getNumBlockIDs());
+ for (auto &MBB : MF)
+ MBBInfos[MBB.getNumber()] = analyzeMBB(MBB);
+
+ for (auto &MBB : MF) {
+ TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
+ if (!MBBInfo || !MBBInfo->CmpBrOnly)
+ continue;
+ if (MBB.pred_size() != 1)
+ continue;
+ LLVM_DEBUG(dbgs() << "Work on MBB." << MBB.getNumber()
+ << " CmpValue: " << MBBInfo->CmpValue << "\n");
+ SmallVector<MachineBasicBlock *, 4> BranchPath;
+ if (!findPath(&MBB, BranchPath))
+ continue;
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Found one path (len=" << BranchPath.size() << "):\n");
+ int Index = 1;
+ LLVM_DEBUG(dbgs() << "Target MBB is: " << MBB << "\n");
+ for (auto I = BranchPath.rbegin(); I != BranchPath.rend(); ++I, ++Index) {
+ MachineBasicBlock *PMBB = *I;
+ TargetMBBInfo *PMBBInfo = getMBBInfo(PMBB);
+ LLVM_DEBUG(dbgs() << "Path MBB (" << Index << " of " << BranchPath.size()
+ << ") is " << *PMBB);
+ LLVM_DEBUG(dbgs() << "CC=" << PMBBInfo->BranchCode
+ << " Val=" << PMBBInfo->CmpValue
+ << " CmpBrOnly=" << PMBBInfo->CmpBrOnly << "\n\n");
+ }
+#endif
+ optimizeCondBr(MBB, BranchPath);
+ Changed = true;
+ }
+ NumFixedCondBrs += RemoveList.size();
+ for (auto MBBI : RemoveList) {
+ while (!MBBI->succ_empty())
+ MBBI->removeSuccessor(MBBI->succ_end() - 1);
+
+ MBBI->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+// Analyze instructions that generate CondCode and extract information.
+bool X86CondBrFolding::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+ int &CmpValue) {
+ unsigned SrcRegIndex = 0;
+ unsigned ValueIndex = 0;
+ switch (MI.getOpcode()) {
+ // TODO: handle test instructions.
+ default:
+ return false;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri:
+ SrcRegIndex = 0;
+ ValueIndex = 1;
+ break;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ SrcRegIndex = 1;
+ ValueIndex = 2;
+ break;
+ }
+ SrcReg = MI.getOperand(SrcRegIndex).getReg();
+ if (!MI.getOperand(ValueIndex).isImm())
+ return false;
+ CmpValue = MI.getOperand(ValueIndex).getImm();
+ return true;
+}
+
+// Analyze a candidate MBB and set the extract all the information needed.
+// The valid candidate will have two successors.
+// It also should have a sequence of
+// Branch_instr,
+// CondBr,
+// UnCondBr.
+// Return TargetMBBInfo if MBB is a valid candidate and nullptr otherwise.
+std::unique_ptr<TargetMBBInfo>
+X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
+ MachineBasicBlock *TBB;
+ MachineBasicBlock *FBB;
+ MachineInstr *BrInstr;
+ MachineInstr *CmpInstr;
+ X86::CondCode CC;
+ unsigned SrcReg;
+ int CmpValue;
+ bool Modified;
+ bool CmpBrOnly;
+
+ if (MBB.succ_size() != 2)
+ return nullptr;
+
+ CmpBrOnly = true;
+ FBB = TBB = nullptr;
+ CmpInstr = nullptr;
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() == X86::JMP_1) {
+ if (FBB)
+ return nullptr;
+ FBB = I->getOperand(0).getMBB();
+ continue;
+ }
+ if (I->isBranch()) {
+ if (TBB)
+ return nullptr;
+ CC = X86::getCondFromBranchOpc(I->getOpcode());
+ switch (CC) {
+ default:
+ return nullptr;
+ case X86::COND_E:
+ case X86::COND_L:
+ case X86::COND_G:
+ case X86::COND_NE:
+ case X86::COND_LE:
+ case X86::COND_GE:
+ break;
+ }
+ TBB = I->getOperand(0).getMBB();
+ BrInstr = &*I;
+ continue;
+ }
+ if (analyzeCompare(*I, SrcReg, CmpValue)) {
+ if (CmpInstr)
+ return nullptr;
+ CmpInstr = &*I;
+ continue;
+ }
+ CmpBrOnly = false;
+ break;
+ }
+
+ if (!TBB || !FBB || !CmpInstr)
+ return nullptr;
+
+ // Simplify CondCode. Note this is only to simplify the findPath logic
+ // and will not change the instruction here.
+ switch (CC) {
+ case X86::COND_NE:
+ CC = X86::COND_E;
+ std::swap(TBB, FBB);
+ Modified = true;
+ break;
+ case X86::COND_LE:
+ if (CmpValue == INT_MAX)
+ return nullptr;
+ CC = X86::COND_L;
+ CmpValue += 1;
+ Modified = true;
+ break;
+ case X86::COND_GE:
+ if (CmpValue == INT_MIN)
+ return nullptr;
+ CC = X86::COND_G;
+ CmpValue -= 1;
+ Modified = true;
+ break;
+ default:
+ Modified = false;
+ break;
+ }
+ return llvm::make_unique<TargetMBBInfo>(TargetMBBInfo{
+ TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly});
+}
+
+bool X86CondBrFoldingPass::runOnMachineFunction(MachineFunction &MF) {
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ if (!ST.threewayBranchProfitable())
+ return false;
+ const X86InstrInfo *TII = ST.getInstrInfo();
+ const MachineBranchProbabilityInfo *MBPI =
+ &getAnalysis<MachineBranchProbabilityInfo>();
+
+ X86CondBrFolding CondBr(TII, MBPI, MF);
+ return CondBr.optimize();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
new file mode 100644
index 000000000000..3654bf04f4e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -0,0 +1,156 @@
+//===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass aids profile-driven cache prefetch insertion by ensuring all
+/// instructions that have a memory operand are distinguishible from each other.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-discriminate-memops"
+
+namespace {
+
+using Location = std::pair<StringRef, unsigned>;
+
+Location diToLocation(const DILocation *Loc) {
+ return std::make_pair(Loc->getFilename(), Loc->getLine());
+}
+
+/// Ensure each instruction having a memory operand has a distinct <LineNumber,
+/// Discriminator> pair.
+void updateDebugInfo(MachineInstr *MI, const DILocation *Loc) {
+ DebugLoc DL(Loc);
+ MI->setDebugLoc(DL);
+}
+
+class X86DiscriminateMemOps : public MachineFunctionPass {
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override {
+ return "X86 Discriminate Memory Operands";
+ }
+
+public:
+ static char ID;
+
+ /// Default construct and initialize the pass.
+ X86DiscriminateMemOps();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char X86DiscriminateMemOps::ID = 0;
+
+/// Default construct and initialize the pass.
+X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}
+
+bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
+ DISubprogram *FDI = MF.getFunction().getSubprogram();
+ if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
+ return false;
+
+ // Have a default DILocation, if we find instructions with memops that don't
+ // have any debug info.
+ const DILocation *ReferenceDI =
+ DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI);
+
+ DenseMap<Location, unsigned> MemOpDiscriminators;
+ MemOpDiscriminators[diToLocation(ReferenceDI)] = 0;
+
+ // Figure out the largest discriminator issued for each Location. When we
+ // issue new discriminators, we can thus avoid issuing discriminators
+ // belonging to instructions that don't have memops. This isn't a requirement
+ // for the goals of this pass, however, it avoids unnecessary ambiguity.
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ const auto &DI = MI.getDebugLoc();
+ if (!DI)
+ continue;
+ Location Loc = diToLocation(DI);
+ MemOpDiscriminators[Loc] =
+ std::max(MemOpDiscriminators[Loc], DI->getBaseDiscriminator());
+ }
+ }
+
+ // Keep track of the discriminators seen at each Location. If an instruction's
+ // DebugInfo has a Location and discriminator we've already seen, replace its
+ // discriminator with a new one, to guarantee uniqueness.
+ DenseMap<Location, DenseSet<unsigned>> Seen;
+
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0)
+ continue;
+ const DILocation *DI = MI.getDebugLoc();
+ if (!DI) {
+ DI = ReferenceDI;
+ }
+ Location L = diToLocation(DI);
+ DenseSet<unsigned> &Set = Seen[L];
+ const std::pair<DenseSet<unsigned>::iterator, bool> TryInsert =
+ Set.insert(DI->getBaseDiscriminator());
+ if (!TryInsert.second) {
+ unsigned BF, DF, CI = 0;
+ DILocation::decodeDiscriminator(DI->getDiscriminator(), BF, DF, CI);
+ Optional<unsigned> EncodedDiscriminator = DILocation::encodeDiscriminator(
+ MemOpDiscriminators[L] + 1, DF, CI);
+
+ if (!EncodedDiscriminator) {
+ // FIXME(mtrofin): The assumption is that this scenario is infrequent/OK
+ // not to support. If evidence points otherwise, we can explore synthesizeing
+ // unique DIs by adding fake line numbers, or by constructing 64 bit
+ // discriminators.
+ LLVM_DEBUG(dbgs() << "Unable to create a unique discriminator "
+ "for instruction with memory operand in: "
+ << DI->getFilename() << " Line: " << DI->getLine()
+ << " Column: " << DI->getColumn()
+ << ". This is likely due to a large macro expansion. \n");
+ continue;
+ }
+ // Since we were able to encode, bump the MemOpDiscriminators.
+ ++MemOpDiscriminators[L];
+ DI = DI->cloneWithDiscriminator(EncodedDiscriminator.getValue());
+ updateDebugInfo(&MI, DI);
+ Changed = true;
+ std::pair<DenseSet<unsigned>::iterator, bool> MustInsert =
+ Set.insert(DI->getBaseDiscriminator());
+ (void)MustInsert; // Silence warning in release build.
+ assert(MustInsert.second && "New discriminator shouldn't be present in set");
+ }
+
+ // Bump the reference DI to avoid cramming discriminators on line 0.
+ // FIXME(mtrofin): pin ReferenceDI on blocks or first instruction with DI
+ // in a block. It's more consistent than just relying on the last memop
+ // instruction we happened to see.
+ ReferenceDI = DI;
+ }
+ }
+ return Changed;
+}
+
+FunctionPass *llvm::createX86DiscriminateMemOpsPass() {
+ return new X86DiscriminateMemOps();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 62588e9509d3..d9ebbb506ca4 100644
--- a/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -31,10 +31,6 @@
using namespace llvm;
-namespace llvm {
-void initializeX86DomainReassignmentPass(PassRegistry &);
-}
-
#define DEBUG_TYPE "x86-domain-reassignment"
STATISTIC(NumClosuresConverted, "Number of closures converted by the pass");
@@ -736,7 +732,10 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
STI = &MF.getSubtarget<X86Subtarget>();
// GPR->K is the only transformation currently supported, bail out early if no
// AVX512.
- if (!STI->hasAVX512())
+ // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and
+ // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register
+ // coalescer doesn't clean it up and we generate a spill we will crash.
+ if (!STI->hasAVX512() || !STI->hasBWI())
return false;
MRI = &MF.getRegInfo();
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index d082b42eefa9..9dd3f2652543 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1278,7 +1278,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
- unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
RetRegs.push_back(RetReg);
@@ -2900,23 +2900,15 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
- bool UseIncDec = false;
- if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
- UseIncDec = true;
-
unsigned BaseOpc, CondOpc;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
- CondOpc = X86::SETOr;
- break;
+ BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
- BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
- CondOpc = X86::SETOr;
- break;
+ BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
@@ -2938,9 +2930,11 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
};
- if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
+ if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
+ CondOpc == X86::SETOr) {
+ // We can use INC/DEC.
ResultReg = createResultReg(TLI.getRegClassFor(VT));
- bool IsDec = BaseOpc == X86ISD::DEC;
+ bool IsDec = BaseOpc == ISD::SUB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
@@ -3222,8 +3216,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
(CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
return false;
- // Functions using retpoline should use SDISel for calls.
- if (Subtarget->useRetpoline())
+ // Functions using retpoline for indirect calls need to use SDISel.
+ if (Subtarget->useRetpolineIndirectCalls())
return false;
// Handle only C, fastcc, and webkit_js calling conventions for now.
@@ -3734,9 +3728,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
- // TODO: Support this properly.
- if (Subtarget->hasAVX512())
- return 0;
VT = MVT::i8;
LLVM_FALLTHROUGH;
case MVT::i8: Opc = X86::MOV8ri; break;
@@ -3744,7 +3735,7 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
case MVT::i32: Opc = X86::MOV32ri; break;
case MVT::i64: {
if (isUInt<32>(Imm))
- Opc = X86::MOV32ri;
+ Opc = X86::MOV32ri64;
else if (isInt<32>(Imm))
Opc = X86::MOV64ri32;
else
@@ -3752,14 +3743,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
break;
}
}
- if (VT == MVT::i64 && Opc == X86::MOV32ri) {
- unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
- unsigned ResultReg = createResultReg(&X86::GR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
- .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
- return ResultReg;
- }
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
@@ -4009,7 +3992,8 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
}
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
return true;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index d9bf60c2c9fb..ed297e678203 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -288,7 +288,7 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
for (unsigned i = 1; i < NumArgs; ++i)
MIB.add(MI->getOperand(i));
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
return MIB;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index f3f7f6a37360..a346085a52cb 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -25,10 +25,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace llvm {
-void initializeFixupLEAPassPass(PassRegistry &);
-}
-
#define FIXUPLEA_DESC "X86 LEA Fixup"
#define FIXUPLEA_NAME "x86-fixup-LEAs"
@@ -43,8 +39,8 @@ class FixupLEAPass : public MachineFunctionPass {
/// Loop over all of the instructions in the basic block
/// replacing applicable instructions with LEA instructions,
/// where appropriate.
- bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
-
+ bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI,
+ bool IsSlowLEA, bool IsSlow3OpsLEA);
/// Given a machine register, look for the instruction
/// which writes it in the current basic block. If found,
@@ -62,10 +58,9 @@ class FixupLEAPass : public MachineFunctionPass {
MachineFunction::iterator MFI);
/// Given a LEA instruction which is unprofitable
- /// on Silvermont try to replace it with an equivalent ADD instruction
- void processInstructionForSLM(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI);
-
+ /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
+ void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
/// Given a LEA instruction which is unprofitable
/// on SNB+ try to replace it with other instructions.
@@ -197,8 +192,11 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
+ bool IsSlowLEA = ST.slowLEA();
+ bool IsSlow3OpsLEA = ST.slow3OpsLEA();
+
OptIncDec = !ST.slowIncDec() || Func.getFunction().optForMinSize();
- OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA();
+ OptLEA = ST.LEAusesAG() || IsSlowLEA || IsSlow3OpsLEA;
if (!OptLEA && !OptIncDec)
return false;
@@ -209,7 +207,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
// Process all basic blocks.
for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
- processBasicBlock(Func, I);
+ processBasicBlock(Func, I, IsSlowLEA, IsSlow3OpsLEA);
LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
return true;
@@ -278,14 +276,16 @@ static inline bool isLEA(const int Opcode) {
}
static inline bool isInefficientLEAReg(unsigned int Reg) {
- return Reg == X86::EBP || Reg == X86::RBP || Reg == X86::R13;
+ return Reg == X86::EBP || Reg == X86::RBP ||
+ Reg == X86::R13D || Reg == X86::R13;
}
static inline bool isRegOperand(const MachineOperand &Op) {
return Op.isReg() && Op.getReg() != X86::NoRegister;
}
-/// hasIneffecientLEARegs - LEA that uses base and index registers
-/// where the base is EBP, RBP, or R13
+
+/// Returns true if this LEA uses base an index registers, and the base register
+/// is known to be inefficient for the subtarget.
// TODO: use a variant scheduling class to model the latency profile
// of LEA instructions, and implement this logic as a scheduling predicate.
static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
@@ -333,13 +333,12 @@ static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) {
unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg();
unsigned DstReg = LEA.getOperand(0).getReg();
- unsigned AddrDispOp = 1 + X86::AddrDisp;
+ const MachineOperand &AddrDisp = LEA.getOperand(1 + X86::AddrDisp);
return SrcReg == DstReg &&
LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
- LEA.getOperand(AddrDispOp).isImm() &&
- (LEA.getOperand(AddrDispOp).getImm() == 1 ||
- LEA.getOperand(AddrDispOp).getImm() == -1);
+ AddrDisp.isImm() &&
+ (AddrDisp.getImm() == 1 || AddrDisp.getImm() == -1);
}
bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
@@ -351,7 +350,7 @@ bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
int NewOpcode;
- bool isINC = MI.getOperand(4).getImm() == 1;
+ bool isINC = MI.getOperand(1 + X86::AddrDisp).getImm() == 1;
switch (Opcode) {
case X86::LEA16r:
NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
@@ -368,7 +367,7 @@ bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
MachineInstr *NewMI =
BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode))
.add(MI.getOperand(0))
- .add(MI.getOperand(1));
+ .add(MI.getOperand(1 + X86::AddrBaseReg));
MFI->erase(I);
I = static_cast<MachineBasicBlock::iterator>(NewMI);
return true;
@@ -414,21 +413,29 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p,
}
}
-void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) {
+void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) {
MachineInstr &MI = *I;
const int Opcode = MI.getOpcode();
if (!isLEA(Opcode))
return;
- if (MI.getOperand(5).getReg() != 0 || !MI.getOperand(4).isImm() ||
+
+ const MachineOperand &Dst = MI.getOperand(0);
+ const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
+ const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
+ const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
+ const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
+ const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
+
+ if (Segment.getReg() != 0 || !Offset.isImm() ||
!TII->isSafeToClobberEFLAGS(*MFI, I))
return;
- const unsigned DstR = MI.getOperand(0).getReg();
- const unsigned SrcR1 = MI.getOperand(1).getReg();
- const unsigned SrcR2 = MI.getOperand(3).getReg();
+ const unsigned DstR = Dst.getReg();
+ const unsigned SrcR1 = Base.getReg();
+ const unsigned SrcR2 = Index.getReg();
if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
return;
- if (MI.getOperand(2).getImm() > 1)
+ if (Scale.getImm() > 1)
return;
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
@@ -436,19 +443,19 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
// Make ADD instruction for two registers writing to LEA's destination
if (SrcR1 != 0 && SrcR2 != 0) {
const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
- const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 3 : 1);
+ const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
NewMI =
BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
LLVM_DEBUG(NewMI->dump(););
}
// Make ADD instruction for immediate
- if (MI.getOperand(4).getImm() != 0) {
+ if (Offset.getImm() != 0) {
const MCInstrDesc &ADDri =
- TII->get(getADDriFromLEA(Opcode, MI.getOperand(4)));
- const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3);
+ TII->get(getADDriFromLEA(Opcode, Offset));
+ const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
.add(SrcR)
- .addImm(MI.getOperand(4).getImm());
+ .addImm(Offset.getImm());
LLVM_DEBUG(NewMI->dump(););
}
if (NewMI) {
@@ -465,12 +472,12 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
if (!isLEA(LEAOpcode))
return nullptr;
- const MachineOperand &Dst = MI.getOperand(0);
- const MachineOperand &Base = MI.getOperand(1);
- const MachineOperand &Scale = MI.getOperand(2);
- const MachineOperand &Index = MI.getOperand(3);
- const MachineOperand &Offset = MI.getOperand(4);
- const MachineOperand &Segment = MI.getOperand(5);
+ const MachineOperand &Dst = MI.getOperand(0);
+ const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
+ const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
+ const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
+ const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
+ const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
if (!(TII->isThreeOperandsLEA(MI) ||
hasInefficientLEABaseReg(Base, Index)) ||
@@ -570,26 +577,28 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
}
bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
- MachineFunction::iterator MFI) {
-
+ MachineFunction::iterator MFI,
+ bool IsSlowLEA, bool IsSlow3OpsLEA) {
for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
if (OptIncDec)
if (fixupIncDec(I, MFI))
continue;
if (OptLEA) {
- if (MF.getSubtarget<X86Subtarget>().slowLEA())
- processInstructionForSLM(I, MFI);
-
- else {
- if (MF.getSubtarget<X86Subtarget>().slow3OpsLEA()) {
- if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
- MFI->erase(I);
- I = NewMI;
- }
- } else
- processInstruction(I, MFI);
+ if (IsSlowLEA) {
+ processInstructionForSlowLEA(I, MFI);
+ continue;
}
+
+ if (IsSlow3OpsLEA) {
+ if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
+ MFI->erase(I);
+ I = NewMI;
+ }
+ continue;
+ }
+
+ processInstruction(I, MFI);
}
}
return false;
diff --git a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index d2f2f21542a9..778aa505b2d9 100644
--- a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -1053,7 +1053,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
MIB.addReg(CondReg);
- MIB->setMemRefs(SetCCI.memoperands_begin(), SetCCI.memoperands_end());
+ MIB.setMemRefs(SetCCI.memoperands());
SetCCI.eraseFromParent();
return;
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index e207c343fac8..984db12201ed 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -185,7 +185,8 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
}
for (auto CS : AvailableRegs)
- if (!Uses.count(CS) && CS != X86::RIP)
+ if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP &&
+ CS != X86::ESP)
return CS;
}
}
@@ -765,7 +766,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
// FIXME: Add retpoline support and remove this.
- if (Is64Bit && IsLargeCodeModel && STI.useRetpoline())
+ if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
"code model and retpoline not yet implemented.");
@@ -1103,15 +1104,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
NumBytes = alignTo(NumBytes, MaxAlign);
- // Get the offset of the stack slot for the EBP register, which is
- // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- // Update the frame offset adjustment.
- if (!IsFunclet)
- MFI.setOffsetAdjustment(-NumBytes);
- else
- assert(MFI.getOffsetAdjustment() == -(int)NumBytes &&
- "should calculate same local variable offset for funclets");
-
// Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
.addReg(MachineFramePtr, RegState::Kill)
@@ -1167,6 +1159,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
+ // Update the offset adjustment, which is mainly used by codeview to translate
+ // from ESP to VFRAME relative local variable offsets.
+ if (!IsFunclet) {
+ if (HasFP && TRI->needsStackRealignment(MF))
+ MFI.setOffsetAdjustment(-NumBytes);
+ else
+ MFI.setOffsetAdjustment(-StackSize);
+ }
+
// For EH funclets, only allocate enough space for outgoing calls. Save the
// NumBytes value that we would've used for the parent frame.
unsigned ParentFrameNumBytes = NumBytes;
@@ -1208,6 +1209,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
+
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
+ .addImm(MaxAlign)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
// If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -1983,6 +1991,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
}
X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
+ MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
// Assign slots for XMMs.
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -2262,9 +2271,15 @@ void X86FrameLowering::adjustForSegmentedStacks(
// Do not generate a prologue for leaf functions with a stack of size zero.
// For non-leaf functions we have to allow for the possibility that the
- // call is to a non-split function, as in PR37807.
- if (StackSize == 0 && !MFI.hasTailCall())
+ // callis to a non-split function, as in PR37807. This function could also
+ // take the address of a non-split function. When the linker tries to adjust
+ // its non-existent prologue, it would fail with an error. Mark the object
+ // file so that such failures are not errors. See this Go language bug-report
+ // https://go-review.googlesource.com/c/go/+/148819/
+ if (StackSize == 0 && !MFI.hasTailCall()) {
+ MF.getMMI().setHasNosplitStack(true);
return;
+ }
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@@ -2437,7 +2452,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
// is laid out within 2^31 bytes of each function body, but this seems
// to be sufficient for JIT.
// FIXME: Add retpoline support and remove the error here..
- if (STI.useRetpoline())
+ if (STI.useRetpolineIndirectCalls())
report_fatal_error("Emitting morestack calls on 64-bit with the large "
"code model and retpoline not yet implemented.");
BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
@@ -2463,8 +2478,8 @@ void X86FrameLowering::adjustForSegmentedStacks(
allocMBB->addSuccessor(&PrologueMBB);
- checkMBB->addSuccessor(allocMBB);
- checkMBB->addSuccessor(&PrologueMBB);
+ checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
+ checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
#ifdef EXPENSIVE_CHECKS
MF.verify();
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index a28d4eac8393..5ac153244df9 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -41,6 +41,10 @@ using namespace llvm;
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
+ cl::desc("Enable setting constant bits to reduce size of mask immediates"),
+ cl::Hidden);
+
//===----------------------------------------------------------------------===//
// Pattern Matcher Implementation
//===----------------------------------------------------------------------===//
@@ -161,6 +165,9 @@ namespace {
/// If true, selector should try to optimize for minimum code size.
bool OptForMinSize;
+ /// Disable direct TLS access through segment registers.
+ bool IndirectTlsSegRefs;
+
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), OptForSize(false),
@@ -173,6 +180,8 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.
Subtarget = &MF.getSubtarget<X86Subtarget>();
+ IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
+ "indirect-tls-seg-refs");
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
@@ -235,12 +244,6 @@ namespace {
return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
}
- // Try to fold a vector load. This makes sure the load isn't non-temporal.
- bool tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
- SDValue &Base, SDValue &Scale,
- SDValue &Index, SDValue &Disp,
- SDValue &Segment);
-
/// Implement addressing mode selection for inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
@@ -443,6 +446,9 @@ namespace {
switch (StoreSize) {
default: llvm_unreachable("Unsupported store size");
+ case 4:
+ case 8:
+ return false;
case 16:
return Subtarget->hasSSE41();
case 32:
@@ -453,15 +459,23 @@ namespace {
}
bool foldLoadStoreIntoMemOperand(SDNode *Node);
- bool matchBEXTRFromAnd(SDNode *Node);
+ MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
+ bool matchBitExtract(SDNode *Node);
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
+ bool tryShiftAmountMod(SDNode *N);
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node,
SDValue &InFlag);
+
+ bool tryOptimizeRem8Extend(SDNode *N);
+
+ bool onlyUsesZeroFlag(SDValue Flags) const;
+ bool hasNoSignFlagUses(SDValue Flags) const;
+ bool hasNoCarryFlagUses(SDValue Flags) const;
};
}
@@ -512,12 +526,18 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (N.getOpcode() != ISD::LOAD)
return true;
+ // Don't fold non-temporal loads if we have an instruction for them.
+ if (useNonTemporalLoad(cast<LoadSDNode>(N)))
+ return false;
+
// If N is a load, do additional profitability checks.
if (U == Root) {
switch (U->getOpcode()) {
default: break;
case X86ISD::ADD:
+ case X86ISD::ADC:
case X86ISD::SUB:
+ case X86ISD::SBB:
case X86ISD::AND:
case X86ISD::XOR:
case X86ISD::OR:
@@ -724,7 +744,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (OptLevel != CodeGenOpt::None &&
// Only do this when the target can fold the load into the call or
// jmp.
- !Subtarget->useRetpoline() &&
+ !Subtarget->useRetpolineIndirectCalls() &&
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
(Subtarget->is64Bit() ||
@@ -827,24 +847,144 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
}
}
+// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
+bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
+ Opc != X86::MOVSX64rr8)
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+
+ // We need to be extracting the lower bit of an extend.
+ if (!N0.isMachineOpcode() ||
+ N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
+ N0.getConstantOperandVal(1) != X86::sub_8bit)
+ return false;
+
+ // We're looking for either a movsx or movzx to match the original opcode.
+ unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
+ : X86::MOVSX32rr8_NOREX;
+ SDValue N00 = N0.getOperand(0);
+ if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
+ return false;
+
+ if (Opc == X86::MOVSX64rr8) {
+ // If we had a sign extend from 8 to 64 bits. We still need to go from 32
+ // to 64.
+ MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
+ MVT::i64, N00);
+ ReplaceUses(N, Extend);
+ } else {
+ // Ok we can drop this extend and just use the original extend.
+ ReplaceUses(N, N00.getNode());
+ }
+
+ return true;
+}
void X86DAGToDAGISel::PostprocessISelDAG() {
// Skip peepholes at -O0.
if (TM.getOptLevel() == CodeGenOpt::None)
return;
- // Attempt to remove vectors moves that were inserted to zero upper bits.
-
- SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
- ++Position;
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+ bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
- if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
+ if (tryOptimizeRem8Extend(N)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Look for a TESTrr+ANDrr pattern where both operands of the test are
+ // the same. Rewrite to remove the AND.
+ unsigned Opc = N->getMachineOpcode();
+ if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
+ Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
+ N->getOperand(0) == N->getOperand(1) &&
+ N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+ N->getOperand(0).isMachineOpcode()) {
+ SDValue And = N->getOperand(0);
+ unsigned N0Opc = And.getMachineOpcode();
+ if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
+ N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
+ MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
+ MVT::i32,
+ And.getOperand(0),
+ And.getOperand(1));
+ ReplaceUses(N, Test);
+ MadeChange = true;
+ continue;
+ }
+ if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
+ N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
+ unsigned NewOpc;
+ switch (N0Opc) {
+ case X86::AND8rm: NewOpc = X86::TEST8mr; break;
+ case X86::AND16rm: NewOpc = X86::TEST16mr; break;
+ case X86::AND32rm: NewOpc = X86::TEST32mr; break;
+ case X86::AND64rm: NewOpc = X86::TEST64mr; break;
+ }
+
+ // Need to swap the memory and register operand.
+ SDValue Ops[] = { And.getOperand(1),
+ And.getOperand(2),
+ And.getOperand(3),
+ And.getOperand(4),
+ And.getOperand(5),
+ And.getOperand(0),
+ And.getOperand(6) /* Chain */ };
+ MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
+ MVT::i32, MVT::Other, Ops);
+ ReplaceUses(N, Test);
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
+ // used. We're doing this late so we can prefer to fold the AND into masked
+ // comparisons. Doing that can be better for the live range of the mask
+ // register.
+ if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
+ Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
+ N->getOperand(0) == N->getOperand(1) &&
+ N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+ N->getOperand(0).isMachineOpcode() &&
+ onlyUsesZeroFlag(SDValue(N, 0))) {
+ SDValue And = N->getOperand(0);
+ unsigned N0Opc = And.getMachineOpcode();
+ // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
+ // KAND instructions and KTEST use the same ISA feature.
+ if (N0Opc == X86::KANDBrr ||
+ (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
+ N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
+ unsigned NewOpc;
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
+ case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
+ case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
+ case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
+ }
+ MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
+ MVT::i32,
+ And.getOperand(0),
+ And.getOperand(1));
+ ReplaceUses(N, KTest);
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ // Attempt to remove vectors moves that were inserted to zero upper bits.
+ if (Opc != TargetOpcode::SUBREG_TO_REG)
continue;
unsigned SubRegIdx = N->getConstantOperandVal(2);
@@ -881,14 +1021,22 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
continue;
+ // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
+ // the SHA instructions which use a legacy encoding.
+ uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
+ if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
+ (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
+ (TSFlags & X86II::EncodingMask) != X86II::XOP)
+ continue;
+
// Producing instruction is another vector instruction. We can drop the
// move.
CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
-
- // If the move is now dead, delete it.
- if (Move.getNode()->use_empty())
- CurDAG->RemoveDeadNode(Move.getNode());
+ MadeChange = true;
}
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
}
@@ -964,6 +1112,7 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
+ !IndirectTlsSegRefs &&
(Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetFuchsia()))
switch (N->getPointerInfo().getAddrSpace()) {
@@ -1291,8 +1440,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
}
APInt MaskedHighBits =
APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
- KnownBits Known;
- DAG.computeKnownBits(X, Known);
+ KnownBits Known = DAG.computeKnownBits(X);
if (MaskedHighBits != Known.Zero) return true;
// We've identified a pattern that can be transformed into a single shift
@@ -1327,6 +1475,64 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
return false;
}
+// Transform "(X >> SHIFT) & (MASK << C1)" to
+// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
+// matched to a BEXTR later. Returns false if the simplification is performed.
+static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM,
+ const X86Subtarget &Subtarget) {
+ if (Shift.getOpcode() != ISD::SRL ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)) ||
+ !Shift.hasOneUse() || !N.hasOneUse())
+ return true;
+
+ // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
+ if (!Subtarget.hasTBM() &&
+ !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
+ return true;
+
+ // We need to ensure that mask is a continuous run of bits.
+ if (!isShiftedMask_64(Mask)) return true;
+
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+
+ // The amount of shift we're trying to fit into the addressing mode is taken
+ // from the trailing zeros of the mask.
+ unsigned AMShiftAmt = countTrailingZeros(Mask);
+
+ // There is nothing we can do here unless the mask is removing some bits.
+ // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
+ if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+
+ MVT VT = N.getSimpleValueType();
+ SDLoc DL(N);
+ SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
+ SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
+ SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
+ SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ insertDAGNode(DAG, N, NewSRLAmt);
+ insertDAGNode(DAG, N, NewSRL);
+ insertDAGNode(DAG, N, NewMask);
+ insertDAGNode(DAG, N, NewAnd);
+ insertDAGNode(DAG, N, NewSHLAmt);
+ insertDAGNode(DAG, N, NewSHL);
+ DAG.ReplaceAllUsesWith(N, NewSHL);
+
+ AM.Scale = 1 << AMShiftAmt;
+ AM.IndexReg = NewAnd;
+ return false;
+}
+
bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
SDLoc dl(N);
@@ -1607,6 +1813,11 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// a scale on the outside of the mask.
if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
return false;
+
+ // Try to fold the mask and shift into BEXTR and scale.
+ if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+ return false;
+
break;
}
}
@@ -2039,20 +2250,6 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
-bool X86DAGToDAGISel::tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
- SDValue &Base, SDValue &Scale,
- SDValue &Index, SDValue &Disp,
- SDValue &Segment) {
- if (!ISD::isNON_EXTLoad(N.getNode()) ||
- useNonTemporalLoad(cast<LoadSDNode>(N)) ||
- !IsProfitableToFold(N, P, Root) ||
- !IsLegalToFold(N, P, Root, OptLevel))
- return false;
-
- return selectAddr(N.getNode(),
- N.getOperand(1), Base, Scale, Index, Disp, Segment);
-}
-
/// Return an SDNode that returns the value of the global base register.
/// Output instructions required to initialize the global base register,
/// if necessary.
@@ -2077,18 +2274,30 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
CR->getSignedMax().slt(1ull << Width);
}
-/// Test whether the given X86ISD::CMP node has any uses which require the SF
-/// or OF bits to be accurate.
-static bool hasNoSignedComparisonUses(SDNode *N) {
+static X86::CondCode getCondFromOpc(unsigned Opc) {
+ X86::CondCode CC = X86::COND_INVALID;
+ if (CC == X86::COND_INVALID)
+ CC = X86::getCondFromBranchOpc(Opc);
+ if (CC == X86::COND_INVALID)
+ CC = X86::getCondFromSETOpc(Opc);
+ if (CC == X86::COND_INVALID)
+ CC = X86::getCondFromCMovOpc(Opc);
+
+ return CC;
+}
+
+/// Test whether the given X86ISD::CMP node has any users that use a flag
+/// other than ZF.
+bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
// Examine each user of the node.
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end(); UI != UE; ++UI) {
- // Only examine CopyToReg uses.
- if (UI->getOpcode() != ISD::CopyToReg)
- return false;
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ // Only check things that use the flags.
+ if (UI.getUse().getResNo() != Flags.getResNo())
+ continue;
// Only examine CopyToReg uses that copy to EFLAGS.
- if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
- X86::EFLAGS)
+ if (UI->getOpcode() != ISD::CopyToReg ||
+ cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
return false;
// Examine each user of the CopyToReg use.
for (SDNode::use_iterator FlagUI = UI->use_begin(),
@@ -2097,105 +2306,52 @@ static bool hasNoSignedComparisonUses(SDNode *N) {
if (FlagUI.getUse().getResNo() != 1) continue;
// Anything unusual: assume conservatively.
if (!FlagUI->isMachineOpcode()) return false;
- // Examine the opcode of the user.
- switch (FlagUI->getMachineOpcode()) {
- // These comparisons don't treat the most significant bit specially.
- case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
- case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
- case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
- case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
- case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
- case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
- case X86::CMOVA16rr: case X86::CMOVA16rm:
- case X86::CMOVA32rr: case X86::CMOVA32rm:
- case X86::CMOVA64rr: case X86::CMOVA64rm:
- case X86::CMOVAE16rr: case X86::CMOVAE16rm:
- case X86::CMOVAE32rr: case X86::CMOVAE32rm:
- case X86::CMOVAE64rr: case X86::CMOVAE64rm:
- case X86::CMOVB16rr: case X86::CMOVB16rm:
- case X86::CMOVB32rr: case X86::CMOVB32rm:
- case X86::CMOVB64rr: case X86::CMOVB64rm:
- case X86::CMOVBE16rr: case X86::CMOVBE16rm:
- case X86::CMOVBE32rr: case X86::CMOVBE32rm:
- case X86::CMOVBE64rr: case X86::CMOVBE64rm:
- case X86::CMOVE16rr: case X86::CMOVE16rm:
- case X86::CMOVE32rr: case X86::CMOVE32rm:
- case X86::CMOVE64rr: case X86::CMOVE64rm:
- case X86::CMOVNE16rr: case X86::CMOVNE16rm:
- case X86::CMOVNE32rr: case X86::CMOVNE32rm:
- case X86::CMOVNE64rr: case X86::CMOVNE64rm:
- case X86::CMOVNP16rr: case X86::CMOVNP16rm:
- case X86::CMOVNP32rr: case X86::CMOVNP32rm:
- case X86::CMOVNP64rr: case X86::CMOVNP64rm:
- case X86::CMOVP16rr: case X86::CMOVP16rm:
- case X86::CMOVP32rr: case X86::CMOVP32rm:
- case X86::CMOVP64rr: case X86::CMOVP64rm:
+ // Examine the condition code of the user.
+ X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+ switch (CC) {
+ // Comparisons which only use the zero flag.
+ case X86::COND_E: case X86::COND_NE:
continue;
// Anything else: assume conservatively.
- default: return false;
+ default:
+ return false;
}
}
}
return true;
}
-/// Test whether the given node which sets flags has any uses which require the
-/// CF flag to be accurate.
-static bool hasNoCarryFlagUses(SDNode *N) {
+/// Test whether the given X86ISD::CMP node has any uses which require the SF
+/// flag to be accurate.
+bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
// Examine each user of the node.
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
- ++UI) {
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
// Only check things that use the flags.
- if (UI.getUse().getResNo() != 1)
+ if (UI.getUse().getResNo() != Flags.getResNo())
continue;
- // Only examine CopyToReg uses.
- if (UI->getOpcode() != ISD::CopyToReg)
- return false;
// Only examine CopyToReg uses that copy to EFLAGS.
- if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
+ if (UI->getOpcode() != ISD::CopyToReg ||
+ cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
return false;
// Examine each user of the CopyToReg use.
- for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
- FlagUI != FlagUE; ++FlagUI) {
+ for (SDNode::use_iterator FlagUI = UI->use_begin(),
+ FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
// Only examine the Flag result.
- if (FlagUI.getUse().getResNo() != 1)
- continue;
+ if (FlagUI.getUse().getResNo() != 1) continue;
// Anything unusual: assume conservatively.
- if (!FlagUI->isMachineOpcode())
- return false;
- // Examine the opcode of the user.
- switch (FlagUI->getMachineOpcode()) {
- // Comparisons which don't examine the CF flag.
- case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
- case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
- case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
- case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
- case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
- case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
- case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
- case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
- case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
- case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
- case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
- case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
- case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
- case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
- case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
- case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
- case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
- case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
- case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
- case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
- case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
- case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
- case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
- case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
- case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
- case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
- case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
- case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
- case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
- case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
+ if (!FlagUI->isMachineOpcode()) return false;
+ // Examine the condition code of the user.
+ X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+ switch (CC) {
+ // Comparisons which don't examine the SF flag.
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ case X86::COND_E: case X86::COND_NE:
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_P: case X86::COND_NP:
continue;
// Anything else: assume conservatively.
default:
@@ -2206,23 +2362,96 @@ static bool hasNoCarryFlagUses(SDNode *N) {
return true;
}
+static bool mayUseCarryFlag(X86::CondCode CC) {
+ switch (CC) {
+ // Comparisons which don't examine the CF flag.
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_E: case X86::COND_NE:
+ case X86::COND_S: case X86::COND_NS:
+ case X86::COND_P: case X86::COND_NP:
+ case X86::COND_L: case X86::COND_GE:
+ case X86::COND_G: case X86::COND_LE:
+ return false;
+ // Anything else: assume conservatively.
+ default:
+ return true;
+ }
+}
+
+/// Test whether the given node which sets flags has any uses which require the
+/// CF flag to be accurate.
+ bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
+ // Examine each user of the node.
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ // Only check things that use the flags.
+ if (UI.getUse().getResNo() != Flags.getResNo())
+ continue;
+
+ unsigned UIOpc = UI->getOpcode();
+
+ if (UIOpc == ISD::CopyToReg) {
+ // Only examine CopyToReg uses that copy to EFLAGS.
+ if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
+ return false;
+ // Examine each user of the CopyToReg use.
+ for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
+ FlagUI != FlagUE; ++FlagUI) {
+ // Only examine the Flag result.
+ if (FlagUI.getUse().getResNo() != 1)
+ continue;
+ // Anything unusual: assume conservatively.
+ if (!FlagUI->isMachineOpcode())
+ return false;
+ // Examine the condition code of the user.
+ X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+ if (mayUseCarryFlag(CC))
+ return false;
+ }
+
+ // This CopyToReg is ok. Move on to the next user.
+ continue;
+ }
+
+ // This might be an unselected node. So look for the pre-isel opcodes that
+ // use flags.
+ unsigned CCOpNo;
+ switch (UIOpc) {
+ default:
+ // Something unusual. Be conservative.
+ return false;
+ case X86ISD::SETCC: CCOpNo = 0; break;
+ case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
+ case X86ISD::CMOV: CCOpNo = 2; break;
+ case X86ISD::BRCOND: CCOpNo = 2; break;
+ }
+
+ X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
+ if (mayUseCarryFlag(CC))
+ return false;
+ }
+ return true;
+}
+
/// Check whether or not the chain ending in StoreNode is suitable for doing
/// the {load; op; store} to modify transformation.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
SDValue StoredVal, SelectionDAG *CurDAG,
+ unsigned LoadOpNo,
LoadSDNode *&LoadNode,
SDValue &InputChain) {
- // is the stored value result 0 of the load?
+ // Is the stored value result 0 of the operation?
if (StoredVal.getResNo() != 0) return false;
- // are there other uses of the loaded value than the inc or dec?
+ // Are there other uses of the operation other than the store?
if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
- // is the store non-extending and non-indexed?
+ // Is the store non-extending and non-indexed?
if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
return false;
- SDValue Load = StoredVal->getOperand(0);
+ SDValue Load = StoredVal->getOperand(LoadOpNo);
// Is the stored value a non-extending and non-indexed load?
if (!ISD::isNormalLoad(Load.getNode())) return false;
@@ -2351,26 +2580,37 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
MemVT != MVT::i8)
return false;
+
+ bool IsCommutable = false;
switch (Opc) {
default:
return false;
- case X86ISD::INC:
- case X86ISD::DEC:
- case X86ISD::ADD:
- case X86ISD::ADC:
case X86ISD::SUB:
case X86ISD::SBB:
+ break;
+ case X86ISD::ADD:
+ case X86ISD::ADC:
case X86ISD::AND:
case X86ISD::OR:
case X86ISD::XOR:
+ IsCommutable = true;
break;
}
+ unsigned LoadOpNo = 0;
LoadSDNode *LoadNode = nullptr;
SDValue InputChain;
- if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,
- InputChain))
- return false;
+ if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
+ LoadNode, InputChain)) {
+ if (!IsCommutable)
+ return false;
+
+ // This operation is commutable, try the other operand.
+ LoadOpNo = 1;
+ if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
+ LoadNode, InputChain))
+ return false;
+ }
SDValue Base, Scale, Index, Disp, Segment;
if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
@@ -2395,20 +2635,27 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
MachineSDNode *Result;
switch (Opc) {
- case X86ISD::INC:
- case X86ISD::DEC: {
- unsigned NewOpc =
- Opc == X86ISD::INC
- ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
- : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
- const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
- Result =
- CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
- break;
- }
case X86ISD::ADD:
- case X86ISD::ADC:
case X86ISD::SUB:
+ // Try to match inc/dec.
+ if (!Subtarget->slowIncDec() ||
+ CurDAG->getMachineFunction().getFunction().optForSize()) {
+ bool IsOne = isOneConstant(StoredVal.getOperand(1));
+ bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
+ // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
+ if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
+ unsigned NewOpc =
+ ((Opc == X86ISD::ADD) == IsOne)
+ ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
+ : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
+ const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+ Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
+ MVT::Other, Ops);
+ break;
+ }
+ }
+ LLVM_FALLTHROUGH;
+ case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::AND:
case X86ISD::OR:
@@ -2488,7 +2735,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
};
unsigned NewOpc = SelectRegOpcode(Opc);
- SDValue Operand = StoredVal->getOperand(1);
+ SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
// See if the operand is a constant that we can fold into an immediate
// operand.
@@ -2503,7 +2750,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
(-OperandV).getMinSignedBits() <= 8) ||
(MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
(-OperandV).getMinSignedBits() <= 32)) &&
- hasNoCarryFlagUses(StoredVal.getNode())) {
+ hasNoCarryFlagUses(StoredVal.getValue(1))) {
OperandV = -OperandV;
Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
}
@@ -2541,10 +2788,9 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
llvm_unreachable("Invalid opcode!");
}
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
- MemOp[0] = StoreNode->getMemOperand();
- MemOp[1] = LoadNode->getMemOperand();
- Result->setMemRefs(MemOp, MemOp + 2);
+ MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
+ LoadNode->getMemOperand()};
+ CurDAG->setNodeMemRefs(Result, MemOps);
// Update Load Chain uses as well.
ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
@@ -2554,39 +2800,273 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
return true;
}
+// See if this is an X & Mask that we can match to BEXTR/BZHI.
+// Where Mask is one of the following patterns:
+// a) x & (1 << nbits) - 1
+// b) x & ~(-1 << nbits)
+// c) x & (-1 >> (32 - y))
+// d) x << (32 - y) >> (32 - y)
+bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
+ assert(
+ (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
+ "Should be either an and-mask, or right-shift after clearing high bits.");
+
+ // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
+ if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
+ return false;
+
+ MVT NVT = Node->getSimpleValueType(0);
+
+ // Only supported for 32 and 64 bits.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ return false;
+
+ unsigned Size = NVT.getSizeInBits();
+
+ SDValue NBits;
+
+ // If we have BMI2's BZHI, we are ok with muti-use patterns.
+ // Else, if we only have BMI1's BEXTR, we require one-use.
+ const bool CanHaveExtraUses = Subtarget->hasBMI2();
+ auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
+ return CanHaveExtraUses ||
+ Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
+ };
+ auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
+ auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
+
+ // a) x & ((1 << nbits) + (-1))
+ auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+ // Match `add`. Must only have one use!
+ if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
+ return false;
+ // We should be adding all-ones constant (i.e. subtracting one.)
+ if (!isAllOnesConstant(Mask->getOperand(1)))
+ return false;
+ // Match `1 << nbits`. Must only have one use!
+ SDValue M0 = Mask->getOperand(0);
+ if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
+ return false;
+ if (!isOneConstant(M0->getOperand(0)))
+ return false;
+ NBits = M0->getOperand(1);
+ return true;
+ };
+
+ // b) x & ~(-1 << nbits)
+ auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+ // Match `~()`. Must only have one use!
+ if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
+ return false;
+ // Match `-1 << nbits`. Must only have one use!
+ SDValue M0 = Mask->getOperand(0);
+ if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
+ return false;
+ if (!isAllOnesConstant(M0->getOperand(0)))
+ return false;
+ NBits = M0->getOperand(1);
+ return true;
+ };
+
+ // Match potentially-truncated (bitwidth - y)
+ auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
+ // Skip over a truncate of the shift amount.
+ if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
+ ShiftAmt = ShiftAmt.getOperand(0);
+ // The trunc should have been the only user of the real shift amount.
+ if (!checkOneUse(ShiftAmt))
+ return false;
+ }
+ // Match the shift amount as: (bitwidth - y). It should go away, too.
+ if (ShiftAmt.getOpcode() != ISD::SUB)
+ return false;
+ auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
+ if (!V0 || V0->getZExtValue() != Size)
+ return false;
+ NBits = ShiftAmt.getOperand(1);
+ return true;
+ };
+
+ // c) x & (-1 >> (32 - y))
+ auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
+ // Match `l>>`. Must only have one use!
+ if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
+ return false;
+ // We should be shifting all-ones constant.
+ if (!isAllOnesConstant(Mask.getOperand(0)))
+ return false;
+ SDValue M1 = Mask.getOperand(1);
+ // The shift amount should not be used externally.
+ if (!checkOneUse(M1))
+ return false;
+ return matchShiftAmt(M1);
+ };
+
+ SDValue X;
+
+ // d) x << (32 - y) >> (32 - y)
+ auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
+ &X](SDNode *Node) -> bool {
+ if (Node->getOpcode() != ISD::SRL)
+ return false;
+ SDValue N0 = Node->getOperand(0);
+ if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
+ return false;
+ SDValue N1 = Node->getOperand(1);
+ SDValue N01 = N0->getOperand(1);
+ // Both of the shifts must be by the exact same value.
+ // There should not be any uses of the shift amount outside of the pattern.
+ if (N1 != N01 || !checkTwoUse(N1))
+ return false;
+ if (!matchShiftAmt(N1))
+ return false;
+ X = N0->getOperand(0);
+ return true;
+ };
+
+ auto matchLowBitMask = [&matchPatternA, &matchPatternB,
+ &matchPatternC](SDValue Mask) -> bool {
+ // FIXME: pattern c.
+ return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
+ };
+
+ if (Node->getOpcode() == ISD::AND) {
+ X = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+
+ if (matchLowBitMask(Mask)) {
+ // Great.
+ } else {
+ std::swap(X, Mask);
+ if (!matchLowBitMask(Mask))
+ return false;
+ }
+ } else if (!matchPatternD(Node))
+ return false;
+
+ SDLoc DL(Node);
+
+ // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
+ // shifted (potentially with one-use trunc inbetween),
+ // and if so look past one-use truncation.
+ MVT XVT = NVT;
+ if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
+ X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
+ assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
+ X = X.getOperand(0);
+ XVT = X.getSimpleValueType();
+ assert(XVT == MVT::i64 && "Expected truncation from i64");
+ }
+
+ SDValue OrigNBits = NBits;
+ if (NBits.getValueType() != XVT) {
+ // Truncate the shift amount.
+ NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
+ insertDAGNode(*CurDAG, OrigNBits, NBits);
+
+ // Insert 8-bit NBits into lowest 8 bits of XVT-sized (32 or 64-bit)
+ // register. All the other bits are undefined, we do not care about them.
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, XVT), 0);
+ insertDAGNode(*CurDAG, OrigNBits, ImplDef);
+ NBits =
+ CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, XVT, ImplDef, NBits);
+ insertDAGNode(*CurDAG, OrigNBits, NBits);
+ }
+
+ if (Subtarget->hasBMI2()) {
+ // Great, just emit the the BZHI..
+ SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
+ ReplaceNode(Node, Extract.getNode());
+ SelectCode(Extract.getNode());
+ return true;
+ }
+
+ // Else, emitting BEXTR requires one more step.
+ // The 'control' of BEXTR has the pattern of:
+ // [15...8 bit][ 7...0 bit] location
+ // [ bit count][ shift] name
+ // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
+
+ // Shift NBits left by 8 bits, thus producing 'control'.
+ // This makes the low 8 bits to be zero.
+ SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
+ SDValue Control = CurDAG->getNode(ISD::SHL, DL, XVT, NBits, C8);
+ insertDAGNode(*CurDAG, OrigNBits, Control);
+
+ // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
+ if (X.getOpcode() == ISD::SRL) {
+ SDValue ShiftAmt = X.getOperand(1);
+ X = X.getOperand(0);
+
+ assert(ShiftAmt.getValueType() == MVT::i8 &&
+ "Expected shift amount to be i8");
+
+ // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
+ SDValue OrigShiftAmt = ShiftAmt;
+ ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, XVT, ShiftAmt);
+ insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
+
+ // And now 'or' these low 8 bits of shift amount into the 'control'.
+ Control = CurDAG->getNode(ISD::OR, DL, XVT, Control, ShiftAmt);
+ insertDAGNode(*CurDAG, OrigNBits, Control);
+ }
+
+ // And finally, form the BEXTR itself.
+ SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
+
+ // The 'X' was originally truncated. Do that now.
+ if (XVT != NVT) {
+ insertDAGNode(*CurDAG, OrigNBits, Extract);
+ Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
+ }
+
+ ReplaceNode(Node, Extract.getNode());
+ SelectCode(Extract.getNode());
+
+ return true;
+}
+
// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
-bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
+MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
SDLoc dl(Node);
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
- return false;
+ // If we have TBM we can use an immediate for the control. If we have BMI
+ // we should only do this if the BEXTR instruction is implemented well.
+ // Otherwise moving the control into a register makes this more costly.
+ // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
+ // hoisting the move immediate would make it worthwhile with a less optimal
+ // BEXTR?
+ if (!Subtarget->hasTBM() &&
+ !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
+ return nullptr;
// Must have a shift right.
if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
- return false;
+ return nullptr;
// Shift can't have additional users.
if (!N0->hasOneUse())
- return false;
+ return nullptr;
// Only supported for 32 and 64 bits.
if (NVT != MVT::i32 && NVT != MVT::i64)
- return false;
+ return nullptr;
// Shift amount and RHS of and must be constant.
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (!MaskCst || !ShiftCst)
- return false;
+ return nullptr;
// And RHS must be a mask.
uint64_t Mask = MaskCst->getZExtValue();
if (!isMask_64(Mask))
- return false;
+ return nullptr;
uint64_t Shift = ShiftCst->getZExtValue();
uint64_t MaskSize = countPopulation(Mask);
@@ -2594,20 +3074,41 @@ bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
// Don't interfere with something that can be handled by extracting AH.
// TODO: If we are able to fold a load, BEXTR might still be better than AH.
if (Shift == 8 && MaskSize == 8)
- return false;
+ return nullptr;
// Make sure we are only using bits that were in the original value, not
// shifted in.
if (Shift + MaskSize > NVT.getSizeInBits())
- return false;
+ return nullptr;
+
+ SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
+ unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
+ unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
+
+ // BMI requires the immediate to placed in a register.
+ if (!Subtarget->hasTBM()) {
+ ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
+ MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+ unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
+ New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
+ }
- // Create a BEXTR node and run it through selection.
- SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
- SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
- N0->getOperand(0), C);
- ReplaceNode(Node, New.getNode());
- SelectCode(New.getNode());
- return true;
+ MachineSDNode *NewNode;
+ SDValue Input = N0->getOperand(0);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+ NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ // Update the chain.
+ ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
+ } else {
+ NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
+ }
+
+ return NewNode;
}
// Emit a PCMISTR(I/M) instruction.
@@ -2620,23 +3121,17 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
- // If there is a load, it will be behind a bitcast. We don't need to check
- // alignment on this load.
+ // Try to fold a load. No need to check alignment.
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (MayFoldLoad && N1->getOpcode() == ISD::BITCAST && N1->hasOneUse() &&
- tryFoldVecLoad(Node, N1.getNode(), N1.getOperand(0), Tmp0, Tmp1, Tmp2,
- Tmp3, Tmp4)) {
- SDValue Load = N1.getOperand(0);
+ if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
- Load.getOperand(0) };
+ N1.getOperand(0) };
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
// Update the chain.
- ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
// Record the mem-refs
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<LoadSDNode>(Load)->getMemOperand();
- CNode->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
return CNode;
}
@@ -2659,24 +3154,18 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
- // If there is a load, it will be behind a bitcast. We don't need to check
- // alignment on this load.
+ // Try to fold a load. No need to check alignment.
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (MayFoldLoad && N2->getOpcode() == ISD::BITCAST && N2->hasOneUse() &&
- tryFoldVecLoad(Node, N2.getNode(), N2.getOperand(0), Tmp0, Tmp1, Tmp2,
- Tmp3, Tmp4)) {
- SDValue Load = N2.getOperand(0);
+ if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
- Load.getOperand(0), InFlag };
+ N2.getOperand(0), InFlag };
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
InFlag = SDValue(CNode, 3);
// Update the chain.
- ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
+ ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
// Record the mem-refs
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<LoadSDNode>(Load)->getMemOperand();
- CNode->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
return CNode;
}
@@ -2687,6 +3176,93 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
return CNode;
}
+bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // Only handle scalar shifts.
+ if (VT.isVector())
+ return false;
+
+ // Narrower shifts only mask to 5 bits in hardware.
+ unsigned Size = VT == MVT::i64 ? 64 : 32;
+
+ SDValue OrigShiftAmt = N->getOperand(1);
+ SDValue ShiftAmt = OrigShiftAmt;
+ SDLoc DL(N);
+
+ // Skip over a truncate of the shift amount.
+ if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
+ ShiftAmt = ShiftAmt->getOperand(0);
+
+ // This function is called after X86DAGToDAGISel::matchBitExtract(),
+ // so we are not afraid that we might mess up BZHI/BEXTR pattern.
+
+ SDValue NewShiftAmt;
+ if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
+ SDValue Add0 = ShiftAmt->getOperand(0);
+ SDValue Add1 = ShiftAmt->getOperand(1);
+ // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
+ // to avoid the ADD/SUB.
+ if (isa<ConstantSDNode>(Add1) &&
+ cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
+ NewShiftAmt = Add0;
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+ // generate a NEG instead of a SUB of a constant.
+ } else if (ShiftAmt->getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(Add0) &&
+ cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
+ cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
+ // Insert a negate op.
+ // TODO: This isn't guaranteed to replace the sub if there is a logic cone
+ // that uses it that's not a shift.
+ EVT SubVT = ShiftAmt.getValueType();
+ SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
+ SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
+ NewShiftAmt = Neg;
+
+ // Insert these operands into a valid topological order so they can
+ // get selected independently.
+ insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
+ insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
+ } else
+ return false;
+ } else
+ return false;
+
+ if (NewShiftAmt.getValueType() != MVT::i8) {
+ // Need to truncate the shift amount.
+ NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
+ // Add to a correct topological ordering.
+ insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+ }
+
+ // Insert a new mask to keep the shift amount legal. This should be removed
+ // by isel patterns.
+ NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
+ CurDAG->getConstant(Size - 1, DL, MVT::i8));
+ // Place in a correct topological ordering.
+ insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+
+ SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+ NewShiftAmt);
+ if (UpdatedNode != N) {
+ // If we found an existing node, we should replace ourselves with that node
+ // and wait for it to be selected after its other users.
+ ReplaceNode(N, UpdatedNode);
+ return true;
+ }
+
+ // If the original shift amount is now dead, delete it so that we don't run
+ // it through isel.
+ if (OrigShiftAmt.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
+
+ // Now that we've optimized the shift amount, defer to normal isel to get
+ // load folding and legacy vs BMI2 selection without repeating it here.
+ SelectCode(N);
+ return true;
+}
+
/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
@@ -2795,9 +3371,18 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, getGlobalBaseReg());
return;
- case X86ISD::SELECT:
- case X86ISD::SHRUNKBLEND: {
- // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
+ case ISD::BITCAST:
+ // Just drop all 128/256/512-bit bitcasts.
+ if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
+ NVT == MVT::f128) {
+ ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ break;
+
+ case X86ISD::BLENDV: {
+ // BLENDV selects like a regular VSELECT.
SDValue VSelect = CurDAG->getNode(
ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2));
@@ -2807,10 +3392,25 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
}
+ case ISD::SRL:
+ if (matchBitExtract(Node))
+ return;
+ LLVM_FALLTHROUGH;
+ case ISD::SRA:
+ case ISD::SHL:
+ if (tryShiftAmountMod(Node))
+ return;
+ break;
+
case ISD::AND:
- if (matchBEXTRFromAnd(Node))
+ if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ if (matchBitExtract(Node))
return;
- if (shrinkAndImmediate(Node))
+ if (AndImmShrink && shrinkAndImmediate(Node))
return;
LLVM_FALLTHROUGH;
@@ -2898,45 +3498,85 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
getI8Imm(ShlVal, dl));
return;
}
- case X86ISD::UMUL8:
- case X86ISD::SMUL8: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
-
- unsigned Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
-
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
- N0, SDValue()).getValue(1);
-
- SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
- SDValue Ops[] = {N1, InFlag};
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
-
- ReplaceNode(Node, CNode);
- return;
- }
-
+ case X86ISD::SMUL:
+ // i16/i32/i64 are handled with isel patterns.
+ if (NVT != MVT::i8)
+ break;
+ LLVM_FALLTHROUGH;
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- unsigned LoReg, Opc;
+ unsigned LoReg, ROpc, MOpc;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
- // MVT::i8 is handled by X86ISD::UMUL8.
- case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
- case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
- case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ case MVT::i8:
+ LoReg = X86::AL;
+ ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
+ MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX;
+ ROpc = X86::MUL16r;
+ MOpc = X86::MUL16m;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX;
+ ROpc = X86::MUL32r;
+ MOpc = X86::MUL32m;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX;
+ ROpc = X86::MUL64r;
+ MOpc = X86::MUL64m;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // Multiply is commmutative.
+ if (!FoldedLoad) {
+ FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (FoldedLoad)
+ std::swap(N0, N1);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
- SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
- SDValue Ops[] = {N1, InFlag};
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ MachineSDNode *CNode;
+ if (FoldedLoad) {
+ // i16/i32/i64 use an instruction that produces a low and high result even
+ // though only the low result is used.
+ SDVTList VTs;
+ if (NVT == MVT::i8)
+ VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
+ else
+ VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
+
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
+ } else {
+ // i16/i32/i64 use an instruction that produces a low and high result even
+ // though only the low result is used.
+ SDVTList VTs;
+ if (NVT == MVT::i8)
+ VTs = CurDAG->getVTList(NVT, MVT::i32);
+ else
+ VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+
+ CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
+ }
- ReplaceNode(Node, CNode);
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
+ CurDAG->RemoveDeadNode(Node);
return;
}
@@ -2947,14 +3587,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned Opc, MOpc;
bool isSigned = Opcode == ISD::SMUL_LOHI;
- bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
- case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
- MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
- case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
- MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
+ case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
}
} else {
switch (NVT.SimpleTy) {
@@ -2975,12 +3612,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case X86::MUL64r:
SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
break;
- case X86::MULX32rr:
- SrcReg = X86::EDX; LoReg = HiReg = 0;
- break;
- case X86::MULX64rr:
- SrcReg = X86::RDX; LoReg = HiReg = 0;
- break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2994,68 +3625,43 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
- SDValue ResHi, ResLo;
-
if (foldedLoad) {
SDValue Chain;
MachineSDNode *CNode = nullptr;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
- SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
- CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
- ResHi = SDValue(CNode, 0);
- ResLo = SDValue(CNode, 1);
- Chain = SDValue(CNode, 2);
- InFlag = SDValue(CNode, 3);
- } else {
- SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
- CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
- Chain = SDValue(CNode, 0);
- InFlag = SDValue(CNode, 1);
- }
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), Chain);
// Record the mem-refs
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
- CNode->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
SDValue Ops[] = { N1, InFlag };
- if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
- SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
- ResHi = SDValue(CNode, 0);
- ResLo = SDValue(CNode, 1);
- InFlag = SDValue(CNode, 2);
- } else {
- SDVTList VTs = CurDAG->getVTList(MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
- InFlag = SDValue(CNode, 0);
- }
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ InFlag = SDValue(CNode, 0);
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- if (!ResLo.getNode()) {
- assert(LoReg && "Register for low half is not defined!");
- ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
- InFlag);
- InFlag = ResLo.getValue(2);
- }
+ assert(LoReg && "Register for low half is not defined!");
+ SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
+ NVT, InFlag);
+ InFlag = ResLo.getValue(2);
ReplaceUses(SDValue(Node, 0), ResLo);
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- if (!ResHi.getNode()) {
- assert(HiReg && "Register for high half is not defined!");
- ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
- InFlag);
- InFlag = ResHi.getValue(2);
- }
+ assert(HiReg && "Register for high half is not defined!");
+ SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
+ NVT, InFlag);
+ InFlag = ResHi.getValue(2);
ReplaceUses(SDValue(Node, 1), ResHi);
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
dbgs() << '\n');
@@ -3066,15 +3672,12 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
case ISD::SDIVREM:
- case ISD::UDIVREM:
- case X86ISD::SDIVREM8_SEXT_HREG:
- case X86ISD::UDIVREM8_ZEXT_HREG: {
+ case ISD::UDIVREM: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
unsigned Opc, MOpc;
- bool isSigned = (Opcode == ISD::SDIVREM ||
- Opcode == X86ISD::SDIVREM8_SEXT_HREG);
+ bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
@@ -3124,20 +3727,22 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
+ MachineSDNode *Move;
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
- Move =
- SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
- MVT::Other, Ops), 0);
- Chain = Move.getValue(1);
+ Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
+ MVT::Other, Ops);
+ Chain = SDValue(Move, 1);
ReplaceUses(N0.getValue(1), Chain);
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
} else {
- Move =
- SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
+ Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
Chain = CurDAG->getEntryNode();
}
- Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
+ SDValue());
InFlag = Chain.getValue(1);
} else {
InFlag =
@@ -3188,9 +3793,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
// Record the mem-refs
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
- CNode->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
InFlag =
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
@@ -3213,13 +3816,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Result(RNode, 0);
InFlag = SDValue(RNode, 1);
- if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
- Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
- assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
- } else {
- Result =
- CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
- }
+ Result =
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
+
ReplaceUses(SDValue(Node, 1), Result);
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
dbgs() << '\n');
@@ -3250,8 +3849,31 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
- hasNoSignedComparisonUses(Node))
+ // Optimizations for TEST compares.
+ if (!isNullConstant(N1))
+ break;
+
+ // Save the original VT of the compare.
+ MVT CmpVT = N0.getSimpleValueType();
+
+ // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
+ // by a test instruction. The test should be removed later by
+ // analyzeCompare if we are using only the zero flag.
+ // TODO: Should we check the users and use the BEXTR flags directly?
+ if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
+ unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
+ : X86::TEST32rr;
+ SDValue BEXTR = SDValue(NewNode, 0);
+ NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ }
+
+ // We can peek through truncates, but we need to be careful below.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
N0 = N0.getOperand(0);
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
@@ -3259,33 +3881,75 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Look past the truncate if CMP is the only use of it.
if (N0.getOpcode() == ISD::AND &&
N0.getNode()->hasOneUse() &&
- N0.getValueType() != MVT::i8 &&
- X86::isZeroNode(N1)) {
+ N0.getValueType() != MVT::i8) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C) break;
uint64_t Mask = C->getZExtValue();
+ // Check if we can replace AND+IMM64 with a shift. This is possible for
+ // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
+ // flag.
+ if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
+ onlyUsesZeroFlag(SDValue(Node, 0))) {
+ if (isMask_64(~Mask)) {
+ unsigned TrailingZeros = countTrailingZeros(Mask);
+ SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
+ SDValue Shift =
+ SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64,
+ N0.getOperand(0), Imm), 0);
+ MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
+ MVT::i32, Shift, Shift);
+ ReplaceNode(Node, Test);
+ return;
+ }
+ if (isMask_64(Mask)) {
+ unsigned LeadingZeros = countLeadingZeros(Mask);
+ SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
+ SDValue Shift =
+ SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64,
+ N0.getOperand(0), Imm), 0);
+ MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
+ MVT::i32, Shift, Shift);
+ ReplaceNode(Node, Test);
+ return;
+ }
+ }
+
MVT VT;
int SubRegOp;
- unsigned Op;
+ unsigned ROpc, MOpc;
+
+ // For each of these checks we need to be careful if the sign flag is
+ // being used. It is only safe to use the sign flag in two conditions,
+ // either the sign bit in the shrunken mask is zero or the final test
+ // size is equal to the original compare size.
if (isUInt<8>(Mask) &&
- (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
+ (!(Mask & 0x80) || CmpVT == MVT::i8 ||
+ hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, convert "testl %eax, $8" to "testb %al, $8"
VT = MVT::i8;
SubRegOp = X86::sub_8bit;
- Op = X86::TEST8ri;
+ ROpc = X86::TEST8ri;
+ MOpc = X86::TEST8mi;
} else if (OptForMinSize && isUInt<16>(Mask) &&
- (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
+ (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
+ hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, "testl %eax, $32776" to "testw %ax, $32776".
// NOTE: We only want to form TESTW instructions if optimizing for
// min size. Otherwise we only save one byte and possibly get a length
// changing prefix penalty in the decoders.
VT = MVT::i16;
SubRegOp = X86::sub_16bit;
- Op = X86::TEST16ri;
+ ROpc = X86::TEST16ri;
+ MOpc = X86::TEST16mi;
} else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
- (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
+ ((!(Mask & 0x80000000) &&
+ // Without minsize 16-bit Cmps can get here so we need to
+ // be sure we calculate the correct sign flag if needed.
+ (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
+ CmpVT == MVT::i32 ||
+ hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
// NOTE: We only want to run that transform if N0 is 32 or 64 bits.
// Otherwize, we find ourselves in a position where we have to do
@@ -3293,21 +3957,37 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// they had a good reason not to and do not promote here.
VT = MVT::i32;
SubRegOp = X86::sub_32bit;
- Op = X86::TEST32ri;
+ ROpc = X86::TEST32ri;
+ MOpc = X86::TEST32mi;
} else {
// No eligible transformation was found.
break;
}
+ // FIXME: We should be able to fold loads here.
+
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
SDValue Reg = N0.getOperand(0);
- // Extract the subregister if necessary.
- if (N0.getValueType() != VT)
- Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
-
// Emit a testl or testw.
- SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
+ MachineSDNode *NewNode;
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
+ Reg.getOperand(0) };
+ NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
+ // Update the chain.
+ ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(NewNode,
+ {cast<LoadSDNode>(Reg)->getMemOperand()});
+ } else {
+ // Extract the subregister if necessary.
+ if (N0.getValueType() != VT)
+ Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
+
+ NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
+ }
// Replace CMP with TEST.
ReplaceNode(Node, NewNode);
return;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 67a127fe0a2b..b6a692ee187d 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19,7 +19,6 @@
#include "X86InstrBuilder.h"
#include "X86IntrinsicsInfo.h"
#include "X86MachineFunctionInfo.h"
-#include "X86ShuffleDecodeConstantPool.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -196,6 +195,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
+ // Funnel shifts.
+ for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
+ setOperationAction(ShiftOp , MVT::i16 , Custom);
+ setOperationAction(ShiftOp , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ShiftOp , MVT::i64 , Custom);
+ }
+
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
@@ -533,6 +540,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Use ANDPD and ORPD to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ // These might be better off as horizontal vector ops.
+ setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FSUB, VT, Custom);
+
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
@@ -543,15 +554,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
- // Expand FP immediates into loads from the stack, except for the special
- // cases we handle.
- addLegalFPImmediate(APFloat(+0.0)); // xorpd
- addLegalFPImmediate(APFloat(+0.0f)); // xorps
- } else if (UseX87 && X86ScalarSSEf32) {
+ } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
- addRegisterClass(MVT::f64, &X86::RFP64RegClass);
+ if (UseX87)
+ addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
setOperationAction(ISD::FABS , MVT::f32, Custom);
@@ -559,10 +567,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f32, Custom);
- setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ if (UseX87)
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
// Use ANDPS and ORPS to simulate FCOPYSIGN.
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ if (UseX87)
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
@@ -570,17 +580,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
- // Special cases we handle for FP constants.
- addLegalFPImmediate(APFloat(+0.0f)); // xorps
- addLegalFPImmediate(APFloat(+0.0)); // FLD0
- addLegalFPImmediate(APFloat(+1.0)); // FLD1
- addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
-
- // Always expand sin/cos functions even though x87 has an instruction.
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ if (UseX87) {
+ // Always expand sin/cos functions even though x87 has an instruction.
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ }
} else if (UseX87) {
// f32 and f64 in x87.
// Set up the FP register classes.
@@ -596,14 +601,27 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
- addLegalFPImmediate(APFloat(+0.0)); // FLD0
- addLegalFPImmediate(APFloat(+1.0)); // FLD1
- addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- addLegalFPImmediate(APFloat(+0.0f)); // FLD0
- addLegalFPImmediate(APFloat(+1.0f)); // FLD1
- addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ }
+
+ // Expand FP32 immediates into loads from the stack, save special cases.
+ if (isTypeLegal(MVT::f32)) {
+ if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ } else // SSE immediates.
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ }
+ // Expand FP64 immediates into loads from the stack, save special cases.
+ if (isTypeLegal(MVT::f64)) {
+ if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ } else // SSE immediates.
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
// We don't support FMA.
@@ -613,7 +631,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Long double always uses X87, except f128 in MMX.
if (UseX87) {
if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
- addRegisterClass(MVT::f128, &X86::VR128RegClass);
+ addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
+ : &X86::VR128RegClass);
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
setOperationAction(ISD::FABS , MVT::f128, Custom);
setOperationAction(ISD::FNEG , MVT::f128, Custom);
@@ -778,11 +797,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
+ for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
+ MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::SREM, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ setOperationAction(ISD::UREM, VT, Custom);
+ }
+
+ setOperationAction(ISD::MUL, MVT::v2i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i8, Custom);
+
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
@@ -799,6 +833,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
}
+ setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
+
+ if (!ExperimentalVectorWideningLegalization) {
+ // Use widening instead of promotion.
+ for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
+ MVT::v4i16, MVT::v2i16 }) {
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
+ }
+ }
+
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -813,7 +867,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::ABS, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -834,9 +888,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// scalars) and extend in-register to a legal 128-bit vector type. For sext
// loads these must work with a single scalar load.
for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
@@ -857,21 +908,36 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
- // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
- setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
- setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
- setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
- setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
- setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
- }
-
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
+
+ // Custom legalize these to avoid over promotion or custom promotion.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+
+ // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
+ // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
+ // split again based on the input type, this will cause an AssertSExt i16 to
+ // be emitted instead of an AssertZExt. This will allow packssdw followed by
+ // packuswb to be used to truncate to v8i8. This is necessary since packusdw
+ // isn't available until sse4.1.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@@ -887,6 +953,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
+ // We want to legalize this to an f64 load rather than an i64 load on
+ // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
+ // store.
+ setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v8i8, Custom);
+
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
@@ -897,6 +975,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
+ if (ExperimentalVectorWideningLegalization) {
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+ } else {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
+ }
+
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
@@ -907,7 +998,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
- setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
+
+ // With AVX512, expanding (and promoting the shifts) is better.
+ if (!Subtarget.hasAVX512())
+ setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
@@ -919,6 +1013,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
+
+ // These might be better off as horizontal vector ops.
+ setOperationAction(ISD::ADD, MVT::i16, Custom);
+ setOperationAction(ISD::ADD, MVT::i32, Custom);
+ setOperationAction(ISD::SUB, MVT::i16, Custom);
+ setOperationAction(ISD::SUB, MVT::i32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
@@ -953,17 +1053,22 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
- for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+ if (!ExperimentalVectorWideningLegalization) {
+ // Avoid narrow result types when widening. The legal types are listed
+ // in the next loop.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+ }
}
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
+ if (!ExperimentalVectorWideningLegalization)
+ setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
@@ -1039,12 +1144,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRA, VT, Custom);
}
+ if (ExperimentalVectorWideningLegalization) {
+ // These types need custom splitting if their input is a 128-bit vector.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ }
+
setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
- setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
+
+ // With BWI, expanding (and promoting the shifts) is the better.
+ if (!Subtarget.hasBWI())
+ setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
@@ -1061,9 +1180,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
+ // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
+ setOperationAction(ISD::CTTZ, VT, HasInt256 ? Expand : Custom);
+
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
setCondCodeAction(ISD::SETLT, VT, Custom);
@@ -1086,19 +1207,28 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
- setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
-
+ setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
+ setOperationAction(ISD::ABS, MVT::v4i64, Custom);
setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
@@ -1107,11 +1237,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
}
- if (HasInt256) {
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
+ for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+ }
+ if (HasInt256) {
// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
@@ -1156,15 +1287,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (HasInt256)
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
- // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
- setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
- setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
- setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
- setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
- setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
- }
-
if (HasInt256) {
// Custom legalize 2x32 to get a little better code.
setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
@@ -1224,6 +1346,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -1307,6 +1433,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ if (ExperimentalVectorWideningLegalization) {
+ // Need to custom widen this if we don't have AVX512BW.
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
+ }
+
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
@@ -1315,12 +1448,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, VT, Legal);
}
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
-
// Without BWI we need to use custom lowering to handle MVT::v64i8 input.
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
+ for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+ }
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
@@ -1330,11 +1462,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
- setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
@@ -1347,7 +1482,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1358,13 +1492,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
- // Need to promote to 64-bit even though we have 32-bit masked instructions
- // because the IR optimizers rearrange bitcasts around logic ops leaving
- // too many variations to handle if we don't promote them.
- setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
- setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
- setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
-
if (Subtarget.hasDQI()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
@@ -1378,7 +1505,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
setOperationAction(ISD::CTLZ, VT, Legal);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
} // Subtarget.hasCDI()
@@ -1407,16 +1533,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
- for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
- setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
- setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
- }
-
// Need to custom split v32i16/v64i8 bitcasts.
if (!Subtarget.hasBWI()) {
setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
}
+
+ if (Subtarget.hasVBMI2()) {
+ for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
+ }
+ }
}// has AVX-512
// This block controls legalization for operations that don't have
@@ -1468,7 +1596,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasCDI()) {
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::CTLZ, VT, Legal);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
} // Subtarget.hasCDI()
@@ -1490,6 +1617,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1550,6 +1681,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
@@ -1563,17 +1695,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
- setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
- setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
- setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
+ // The condition codes aren't legal in SSE/AVX and under AVX512 we use
+ // setcc all the way to isel and prefer SETGT in some isel patterns.
+ setCondCodeAction(ISD::SETLT, VT, Custom);
+ setCondCodeAction(ISD::SETLE, VT, Custom);
}
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
@@ -1584,6 +1720,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v64i8, MVT::v32i16 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
+
+ if (Subtarget.hasVBMI2()) {
+ setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
+ setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
+ }
}
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
@@ -1630,6 +1771,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
+
+ if (Subtarget.hasVBMI2()) {
+ // TODO: Make these legal even without VLX?
+ for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
+ MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
+ }
+ }
}
// We want to custom lower some of our intrinsics.
@@ -1731,8 +1881,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
- setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -1787,13 +1935,13 @@ SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
}
TargetLoweringBase::LegalizeTypeAction
-X86TargetLowering::getPreferredVectorAction(EVT VT) const {
+X86TargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return TypeSplitVector;
if (ExperimentalVectorWideningLegalization &&
VT.getVectorNumElements() != 1 &&
- VT.getVectorElementType().getSimpleVT() != MVT::i1)
+ VT.getVectorElementType() != MVT::i1)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
@@ -1926,7 +2074,8 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
if (Subtarget.hasSSE2())
return MVT::v16i8;
// TODO: Can SSE1 handle a byte vector?
- if (Subtarget.hasSSE1())
+ // If we have SSE1 registers we should be able to use them.
+ if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
return MVT::v4f32;
} else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
!Subtarget.is64Bit() && Subtarget.hasSSE2()) {
@@ -3138,7 +3287,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
}
// If value is passed via pointer - do a load.
- if (VA.getLocInfo() == CCValAssign::Indirect)
+ if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
ArgValue =
DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
@@ -3621,13 +3770,29 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg = DAG.getBitcast(RegVT, Arg);
break;
case CCValAssign::Indirect: {
- // Store the argument.
- SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
- int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- Chain = DAG.getStore(
- Chain, dl, Arg, SpillSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- Arg = SpillSlot;
+ if (isByVal) {
+ // Memcpy the argument to a temporary stack slot to prevent
+ // the caller from seeing any modifications the callee may make
+ // as guaranteed by the `byval` attribute.
+ int FrameIdx = MF.getFrameInfo().CreateStackObject(
+ Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
+ false);
+ SDValue StackSlot =
+ DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
+ Chain =
+ CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
+ // From now on treat this as a regular pointer
+ Arg = StackSlot;
+ isByVal = false;
+ } else {
+ // Store the argument.
+ SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Chain = DAG.getStore(
+ Chain, dl, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ Arg = SpillSlot;
+ }
break;
}
}
@@ -4405,6 +4570,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
case X86ISD::VPERMV3:
return true;
// 'Faux' Target Shuffles.
+ case ISD::OR:
case ISD::AND:
case X86ISD::ANDNP:
return true;
@@ -4686,6 +4852,14 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+ // If we are using XMM registers in the ABI and the condition of the select is
+ // a floating-point compare and we have blendv or conditional move, then it is
+ // cheaper to select instead of doing a cross-register move and creating a
+ // load that depends on the compare result.
+ return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
+}
+
bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
// TODO: It might be a win to ease or lift this restriction, but the generic
// folds in DAGCombiner conflict with vector folds for an AVX512 target.
@@ -4695,6 +4869,31 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
return true;
}
+bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
+ // TODO: We handle scalars using custom code, but generic combining could make
+ // that unnecessary.
+ APInt MulC;
+ if (!ISD::isConstantSplatVector(C.getNode(), MulC))
+ return false;
+
+ // If vector multiply is legal, assume that's faster than shl + add/sub.
+ // TODO: Multiply is a complex op with higher latency and lower througput in
+ // most implementations, so this check could be loosened based on type
+ // and/or a CPU attribute.
+ if (isOperationLegal(ISD::MUL, VT))
+ return false;
+
+ // shl+add, shl+sub, shl+add+neg
+ return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
+ (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
+}
+
+bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+ bool IsSigned) const {
+ // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
+ return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
+}
+
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
@@ -4709,6 +4908,18 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
return (Index % ResVT.getVectorNumElements()) == 0;
}
+bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+ // If the vector op is not supported, try to convert to scalar.
+ EVT VecVT = VecOp.getValueType();
+ if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
+ return true;
+
+ // If the vector op is supported, but the scalar op is not, the transform may
+ // not be worthwhile.
+ EVT ScalarVT = VecVT.getScalarType();
+ return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
+}
+
bool X86TargetLowering::isCheapToSpeculateCttz() const {
// Speculate cttz only if we can directly use TZCNT.
return Subtarget.hasBMI();
@@ -4721,7 +4932,11 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
EVT BitcastVT) const {
- if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
+ if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
+ BitcastVT.getVectorElementType() == MVT::i1)
+ return false;
+
+ if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
return false;
return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
@@ -4763,17 +4978,14 @@ bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
if (VT != MVT::i32 && VT != MVT::i64)
return false;
- // A mask and compare against constant is ok for an 'andn' too
- // even though the BMI instruction doesn't have an immediate form.
-
- return true;
+ return !isa<ConstantSDNode>(Y);
}
bool X86TargetLowering::hasAndNot(SDValue Y) const {
EVT VT = Y.getValueType();
- if (!VT.isVector()) // x86 can't form 'andn' with an immediate.
- return !isa<ConstantSDNode>(Y) && hasAndNotCompare(Y);
+ if (!VT.isVector())
+ return hasAndNotCompare(Y);
// Vector.
@@ -4800,6 +5012,12 @@ bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
return true;
}
+bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
+ // Any legal vector type can be splatted more efficiently than
+ // loading/spilling from memory.
+ return isTypeLegal(VT);
+}
+
MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
MVT VT = MVT::getIntegerVT(NumBits);
if (isTypeLegal(VT))
@@ -5408,24 +5626,29 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
return DAG.getBitcast(VT, Vec);
}
-static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
+static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
SelectionDAG &DAG) {
EVT InVT = In.getValueType();
- assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode");
-
- if (VT.is128BitVector() && InVT.is128BitVector())
- return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
- : DAG.getZeroExtendVectorInReg(In, DL, VT);
+ assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");
// For 256-bit vectors, we only need the lower (128-bit) input half.
// For 512-bit vectors, we only need the lower input half or quarter.
- if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
- int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
+ if (InVT.getSizeInBits() > 128) {
+ assert(VT.getSizeInBits() == InVT.getSizeInBits() &&
+ "Expected VTs to be the same size!");
+ unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
In = extractSubVector(In, 0, DAG, DL,
- std::max(128, (int)VT.getSizeInBits() / Scale));
+ std::max(128U, VT.getSizeInBits() / Scale));
+ InVT = In.getValueType();
}
- return DAG.getNode(Opc, DL, VT, In);
+ if (VT.getVectorNumElements() == InVT.getVectorNumElements())
+ return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ DL, VT, In);
+
+ return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
+ : ISD::ZERO_EXTEND_VECTOR_INREG,
+ DL, VT, In);
}
/// Returns a vector_shuffle node for an unpackl operation.
@@ -5463,19 +5686,6 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
}
-static SDValue peekThroughBitcasts(SDValue V) {
- while (V.getNode() && V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
- return V;
-}
-
-static SDValue peekThroughOneUseBitcasts(SDValue V) {
- while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
- V.getOperand(0).hasOneUse())
- V = V.getOperand(0);
- return V;
-}
-
// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
@@ -5496,10 +5706,10 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
Ptr = Ptr->getOperand(0);
auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
- if (!CNode || CNode->isMachineConstantPoolEntry())
+ if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
return nullptr;
- return dyn_cast<Constant>(CNode->getConstVal());
+ return CNode->getConstVal();
}
// Extract raw constant bits from constant pools.
@@ -5632,15 +5842,34 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
return CastBitData(UndefSrcElts, SrcEltBits);
}
+ if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
+ APInt UndefSrcElts(NumSrcElts, 0);
+ SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ const SDValue &Src = Op.getOperand(i);
+ if (Src.isUndef()) {
+ UndefSrcElts.setBit(i);
+ continue;
+ }
+ auto *Cst = cast<ConstantFPSDNode>(Src);
+ APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
+ SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
+ }
+ return CastBitData(UndefSrcElts, SrcEltBits);
+ }
// Extract constant bits from constant pool vector.
if (auto *Cst = getTargetConstantFromNode(Op)) {
Type *CstTy = Cst->getType();
- if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
+ unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
+ if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
return false;
unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
- unsigned NumSrcElts = CstTy->getVectorNumElements();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
@@ -5685,19 +5914,107 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return CastBitData(UndefSrcElts, SrcEltBits);
}
+ // Extract constant bits from a subvector's source.
+ if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isa<ConstantSDNode>(Op.getOperand(1))) {
+ // TODO - support extract_subvector through bitcasts.
+ if (EltSizeInBits != VT.getScalarSizeInBits())
+ return false;
+
+ if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+ UndefElts, EltBits, AllowWholeUndefs,
+ AllowPartialUndefs)) {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumSubElts = VT.getVectorNumElements();
+ unsigned BaseIdx = Op.getConstantOperandVal(1);
+ UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
+ if ((BaseIdx + NumSubElts) != NumSrcElts)
+ EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
+ if (BaseIdx != 0)
+ EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
+ return true;
+ }
+ }
+
+ // Extract constant bits from shuffle node sources.
+ if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) {
+ // TODO - support shuffle through bitcasts.
+ if (EltSizeInBits != VT.getScalarSizeInBits())
+ return false;
+
+ ArrayRef<int> Mask = SVN->getMask();
+ if ((!AllowWholeUndefs || !AllowPartialUndefs) &&
+ llvm::any_of(Mask, [](int M) { return M < 0; }))
+ return false;
+
+ APInt UndefElts0, UndefElts1;
+ SmallVector<APInt, 32> EltBits0, EltBits1;
+ if (isAnyInRange(Mask, 0, NumElts) &&
+ !getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+ UndefElts0, EltBits0, AllowWholeUndefs,
+ AllowPartialUndefs))
+ return false;
+ if (isAnyInRange(Mask, NumElts, 2 * NumElts) &&
+ !getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
+ UndefElts1, EltBits1, AllowWholeUndefs,
+ AllowPartialUndefs))
+ return false;
+
+ UndefElts = APInt::getNullValue(NumElts);
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ EltBits.push_back(APInt::getNullValue(EltSizeInBits));
+ } else if (M < (int)NumElts) {
+ if (UndefElts0[M])
+ UndefElts.setBit(i);
+ EltBits.push_back(EltBits0[M]);
+ } else {
+ if (UndefElts1[M - NumElts])
+ UndefElts.setBit(i);
+ EltBits.push_back(EltBits1[M - NumElts]);
+ }
+ }
+ return true;
+ }
+
return false;
}
-static bool getTargetShuffleMaskIndices(SDValue MaskNode,
- unsigned MaskEltSizeInBits,
- SmallVectorImpl<uint64_t> &RawMask) {
+static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
APInt UndefElts;
- SmallVector<APInt, 64> EltBits;
+ SmallVector<APInt, 16> EltBits;
+ if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
+ UndefElts, EltBits, true, false)) {
+ int SplatIndex = -1;
+ for (int i = 0, e = EltBits.size(); i != e; ++i) {
+ if (UndefElts[i])
+ continue;
+ if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) {
+ SplatIndex = -1;
+ break;
+ }
+ SplatIndex = i;
+ }
+ if (0 <= SplatIndex) {
+ SplatVal = EltBits[SplatIndex];
+ return true;
+ }
+ }
+
+ return false;
+}
+static bool getTargetShuffleMaskIndices(SDValue MaskNode,
+ unsigned MaskEltSizeInBits,
+ SmallVectorImpl<uint64_t> &RawMask,
+ APInt &UndefElts) {
// Extract the raw target constant bits.
- // FIXME: We currently don't support UNDEF bits or mask entries.
+ SmallVector<APInt, 64> EltBits;
if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
- EltBits, /* AllowWholeUndefs */ false,
+ EltBits, /* AllowWholeUndefs */ true,
/* AllowPartialUndefs */ false))
return false;
@@ -5726,6 +6043,31 @@ static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
}
}
+// Split the demanded elts of a PACKSS/PACKUS node between its operands.
+static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
+ APInt &DemandedLHS, APInt &DemandedRHS) {
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumElts = DemandedElts.getBitWidth();
+ int NumInnerElts = NumElts / 2;
+ int NumEltsPerLane = NumElts / NumLanes;
+ int NumInnerEltsPerLane = NumInnerElts / NumLanes;
+
+ DemandedLHS = APInt::getNullValue(NumInnerElts);
+ DemandedRHS = APInt::getNullValue(NumInnerElts);
+
+ // Map DemandedElts to the packed operands.
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) {
+ int OuterIdx = (Lane * NumEltsPerLane) + Elt;
+ int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt;
+ if (DemandedElts[OuterIdx])
+ DemandedLHS.setBit(InnerIdx);
+ if (DemandedElts[OuterIdx + NumInnerEltsPerLane])
+ DemandedRHS.setBit(InnerIdx);
+ }
+ }
+}
+
/// Calculates the shuffle mask corresponding to the target-specific opcode.
/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
/// operands in \p Ops, and returns true.
@@ -5737,6 +6079,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
SmallVectorImpl<SDValue> &Ops,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
+ unsigned MaskEltSize = VT.getScalarSizeInBits();
+ SmallVector<uint64_t, 32> RawMask;
+ APInt RawUndefs;
SDValue ImmN;
assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector");
@@ -5744,26 +6089,26 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
IsUnary = false;
bool IsFakeUnary = false;
- switch(N->getOpcode()) {
+ switch (N->getOpcode()) {
case X86ISD::BLENDI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodeBLENDMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::SHUFP:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeSHUFPMask(NumElems, VT.getScalarSizeInBits(),
+ ImmN = N->getOperand(N->getNumOperands() - 1);
+ DecodeSHUFPMask(NumElems, MaskEltSize,
cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::INSERTPS:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
@@ -5773,8 +6118,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
isa<ConstantSDNode>(N->getOperand(2))) {
int BitLen = N->getConstantOperandVal(1);
int BitIdx = N->getConstantOperandVal(2);
- DecodeEXTRQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx,
- Mask);
+ DecodeEXTRQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask);
IsUnary = true;
}
break;
@@ -5785,21 +6129,20 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
isa<ConstantSDNode>(N->getOperand(3))) {
int BitLen = N->getConstantOperandVal(2);
int BitIdx = N->getConstantOperandVal(3);
- DecodeINSERTQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx,
- Mask);
+ DecodeINSERTQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
}
break;
case X86ISD::UNPCKH:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- DecodeUNPCKHMask(NumElems, VT.getScalarSizeInBits(), Mask);
+ DecodeUNPCKHMask(NumElems, MaskEltSize, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKL:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- DecodeUNPCKLMask(NumElems, VT.getScalarSizeInBits(), Mask);
+ DecodeUNPCKLMask(NumElems, MaskEltSize, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVHLPS:
@@ -5818,7 +6161,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodePALIGNRMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -5844,21 +6187,21 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::PSHUFD:
case X86ISD::VPERMILPI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFMask(NumElems, VT.getScalarSizeInBits(),
+ ImmN = N->getOperand(N->getNumOperands() - 1);
+ DecodePSHUFMask(NumElems, MaskEltSize,
cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFHW:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodePSHUFHWMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodePSHUFLWMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
Mask);
IsUnary = true;
@@ -5891,14 +6234,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
- unsigned MaskEltSize = VT.getScalarSizeInBits();
- SmallVector<uint64_t, 32> RawMask;
- if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
- DecodeVPERMILPMask(NumElems, VT.getScalarSizeInBits(), RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMILPMask(C, MaskEltSize, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+ RawUndefs)) {
+ DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask);
break;
}
return false;
@@ -5909,20 +6247,15 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
- SmallVector<uint64_t, 32> RawMask;
- if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
- DecodePSHUFBMask(RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodePSHUFBMask(C, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
+ DecodePSHUFBMask(RawMask, RawUndefs, Mask);
break;
}
return false;
}
case X86ISD::VPERMI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodeVPERMMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
@@ -5935,7 +6268,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::VPERM2X128:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodeVPERM2X128Mask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -5943,10 +6276,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::SHUF128:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
- decodeVSHUF64x2FamilyMask(NumElems, VT.getScalarSizeInBits(),
- cast<ConstantSDNode>(ImmN)->getZExtValue(),
- Mask);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
+ decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVSLDUP:
@@ -5968,19 +6300,14 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
- unsigned MaskEltSize = VT.getScalarSizeInBits();
SDValue MaskNode = N->getOperand(2);
SDValue CtrlNode = N->getOperand(3);
if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
unsigned CtrlImm = CtrlOp->getZExtValue();
- SmallVector<uint64_t, 32> RawMask;
- if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
- DecodeVPERMIL2PMask(NumElems, VT.getScalarSizeInBits(), CtrlImm,
- RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+ RawUndefs)) {
+ DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs,
+ Mask);
break;
}
}
@@ -5991,13 +6318,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
SDValue MaskNode = N->getOperand(2);
- SmallVector<uint64_t, 32> RawMask;
- if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
- DecodeVPPERMMask(RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPPERMMask(C, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
+ DecodeVPPERMMask(RawMask, RawUndefs, Mask);
break;
}
return false;
@@ -6008,14 +6330,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
// Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
Ops.push_back(N->getOperand(1));
SDValue MaskNode = N->getOperand(0);
- SmallVector<uint64_t, 32> RawMask;
- unsigned MaskEltSize = VT.getScalarSizeInBits();
- if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
- DecodeVPERMVMask(RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMVMask(C, MaskEltSize, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+ RawUndefs)) {
+ DecodeVPERMVMask(RawMask, RawUndefs, Mask);
break;
}
return false;
@@ -6028,9 +6345,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
Ops.push_back(N->getOperand(0));
Ops.push_back(N->getOperand(2));
SDValue MaskNode = N->getOperand(1);
- unsigned MaskEltSize = VT.getScalarSizeInBits();
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMV3Mask(C, MaskEltSize, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+ RawUndefs)) {
+ DecodeVPERMV3Mask(RawMask, RawUndefs, Mask);
break;
}
return false;
@@ -6147,6 +6464,12 @@ static bool setTargetShuffleZeroElements(SDValue N,
return true;
}
+// Forward declaration (for getFauxShuffleMask recursive check).
+static bool resolveTargetShuffleInputs(SDValue Op,
+ SmallVectorImpl<SDValue> &Inputs,
+ SmallVectorImpl<int> &Mask,
+ const SelectionDAG &DAG);
+
// Attempt to decode ops that could be represented as a shuffle mask.
// The decoded shuffle mask may contain a different number of elements to the
// destination value type.
@@ -6200,6 +6523,78 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
Ops.push_back(IsAndN ? N1 : N0);
return true;
}
+ case ISD::OR: {
+ // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
+ // is a valid shuffle index.
+ SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+ SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
+ if (!N0.getValueType().isVector() || !N1.getValueType().isVector())
+ return false;
+ SmallVector<int, 64> SrcMask0, SrcMask1;
+ SmallVector<SDValue, 2> SrcInputs0, SrcInputs1;
+ if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) ||
+ !resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG))
+ return false;
+ int MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
+ SmallVector<int, 64> Mask0, Mask1;
+ scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
+ scaleShuffleMask<int>(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
+ for (int i = 0; i != MaskSize; ++i) {
+ if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
+ Mask.push_back(SM_SentinelUndef);
+ else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero)
+ Mask.push_back(SM_SentinelZero);
+ else if (Mask1[i] == SM_SentinelZero)
+ Mask.push_back(Mask0[i]);
+ else if (Mask0[i] == SM_SentinelZero)
+ Mask.push_back(Mask1[i] + (MaskSize * SrcInputs0.size()));
+ else
+ return false;
+ }
+ for (SDValue &Op : SrcInputs0)
+ Ops.push_back(Op);
+ for (SDValue &Op : SrcInputs1)
+ Ops.push_back(Op);
+ return true;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1)) where
+ // SRC0/SRC1 are both of the same valuetype VT.
+ // TODO - add peekThroughOneUseBitcasts support.
+ SDValue Src = N.getOperand(0);
+ SDValue Sub = N.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ if (!isa<ConstantSDNode>(N.getOperand(2)) ||
+ !N->isOnlyUserOf(Sub.getNode()))
+ return false;
+ SmallVector<int, 64> SubMask;
+ SmallVector<SDValue, 2> SubInputs;
+ if (!resolveTargetShuffleInputs(Sub, SubInputs, SubMask, DAG) ||
+ SubMask.size() != NumSubElts)
+ return false;
+ Ops.push_back(Src);
+ for (SDValue &SubInput : SubInputs) {
+ if (SubInput.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ SubInput.getOperand(0).getValueType() != VT ||
+ !isa<ConstantSDNode>(SubInput.getOperand(1)))
+ return false;
+ Ops.push_back(SubInput.getOperand(0));
+ }
+ int InsertIdx = N.getConstantOperandVal(2);
+ for (int i = 0; i != (int)NumElts; ++i)
+ Mask.push_back(i);
+ for (int i = 0; i != (int)NumSubElts; ++i) {
+ int M = SubMask[i];
+ if (0 <= M) {
+ int InputIdx = M / NumSubElts;
+ int ExtractIdx = SubInputs[InputIdx].getConstantOperandVal(1);
+ M = (NumElts * (1 + InputIdx)) + ExtractIdx + (M % NumSubElts);
+ }
+ Mask[i + InsertIdx] = M;
+ }
+ return true;
+ }
case ISD::SCALAR_TO_VECTOR: {
// Match against a scalar_to_vector of an extract from a vector,
// for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
@@ -6334,14 +6729,14 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return true;
}
case ISD::ZERO_EXTEND_VECTOR_INREG:
- case X86ISD::VZEXT: {
+ case ISD::ZERO_EXTEND: {
// TODO - add support for VPMOVZX with smaller input vector types.
SDValue Src = N.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
if (NumSizeInBits != SrcVT.getSizeInBits())
break;
- DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), VT.getScalarSizeInBits(),
- VT.getVectorNumElements(), Mask);
+ DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts,
+ Mask);
Ops.push_back(Src);
return true;
}
@@ -6586,6 +6981,26 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ // If this is a splat of a pair of elements, use MOVDDUP (unless the target
+ // has XOP; in that case defer lowering to potentially use VPERMIL2PS).
+ // Because we're creating a less complicated build vector here, we may enable
+ // further folding of the MOVDDUP via shuffle transforms.
+ if (Subtarget.hasSSE3() && !Subtarget.hasXOP() &&
+ Op.getOperand(0) == Op.getOperand(2) &&
+ Op.getOperand(1) == Op.getOperand(3) &&
+ Op.getOperand(0) != Op.getOperand(1)) {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ // Create a new build vector with the first 2 elements followed by undef
+ // padding, bitcast to v2f64, duplicate, and bitcast back.
+ SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
+ DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+ SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
+ SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
+ return DAG.getBitcast(VT, Dup);
+ }
+
// Find all zeroable elements.
std::bitset<4> Zeroable;
for (int i=0; i < 4; ++i) {
@@ -7059,9 +7474,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
}
}
- // We need a splat of a single value to use broadcast, and it doesn't
- // make any sense if the value is only in one element of the vector.
- if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumUndefElts = UndefElements.count();
+ if (!Ld || (NumElts - NumUndefElts) <= 1) {
APInt SplatValue, Undef;
unsigned SplatBitSize;
bool HasUndef;
@@ -7137,7 +7552,17 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
}
}
}
- return SDValue();
+
+ // If we are moving a scalar into a vector (Ld must be set and all elements
+ // but 1 are undef) and that operation is not obviously supported by
+ // vmovd/vmovq/vmovss/vmovsd, then keep trying to form a broadcast.
+ // That's better than general shuffling and may eliminate a load to GPR and
+ // move from scalar to vector register.
+ if (!Ld || NumElts - NumUndefElts != 1)
+ return SDValue();
+ unsigned ScalarSize = Ld.getValueSizeInBits();
+ if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64)))
+ return SDValue();
}
bool ConstSplatVal =
@@ -7434,13 +7859,14 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
return DstVec;
}
-/// Return true if \p N implements a horizontal binop and return the
-/// operands for the horizontal binop into V0 and V1.
-///
/// This is a helper function of LowerToHorizontalOp().
/// This function checks that the build_vector \p N in input implements a
-/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
-/// operation to match.
+/// 128-bit partial horizontal operation on a 256-bit vector, but that operation
+/// may not match the layout of an x86 256-bit horizontal instruction.
+/// In other words, if this returns true, then some extraction/insertion will
+/// be required to produce a valid horizontal instruction.
+///
+/// Parameter \p Opcode defines the kind of horizontal operation to match.
/// For example, if \p Opcode is equal to ISD::ADD, then this function
/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
/// is equal to ISD::SUB, then this function checks if this is a horizontal
@@ -7448,12 +7874,17 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
///
/// This function only analyzes elements of \p N whose indices are
/// in range [BaseIdx, LastIdx).
-static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
- SelectionDAG &DAG,
- unsigned BaseIdx, unsigned LastIdx,
- SDValue &V0, SDValue &V1) {
+///
+/// TODO: This function was originally used to match both real and fake partial
+/// horizontal operations, but the index-matching logic is incorrect for that.
+/// See the corrected implementation in isHopBuildVector(). Can we reduce this
+/// code because it is only used for partial h-op matching now?
+static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
+ SelectionDAG &DAG,
+ unsigned BaseIdx, unsigned LastIdx,
+ SDValue &V0, SDValue &V1) {
EVT VT = N->getValueType(0);
-
+ assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops");
assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
"Invalid Vector in input!");
@@ -7623,7 +8054,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
// adding/subtracting two integer/float elements.
// Even-numbered elements in the input build vector are obtained from
// subtracting/adding two integer/float elements.
- unsigned Opc[2] {0, 0};
+ unsigned Opc[2] = {0, 0};
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Op = BV->getOperand(i);
@@ -7794,17 +8225,158 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
+static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
+ unsigned &HOpcode, SDValue &V0, SDValue &V1) {
+ // Initialize outputs to known values.
+ MVT VT = BV->getSimpleValueType(0);
+ HOpcode = ISD::DELETED_NODE;
+ V0 = DAG.getUNDEF(VT);
+ V1 = DAG.getUNDEF(VT);
+
+ // x86 256-bit horizontal ops are defined in a non-obvious way. Each 128-bit
+ // half of the result is calculated independently from the 128-bit halves of
+ // the inputs, so that makes the index-checking logic below more complicated.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned GenericOpcode = ISD::DELETED_NODE;
+ unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1;
+ unsigned NumEltsIn128Bits = NumElts / Num128BitChunks;
+ unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2;
+ for (unsigned i = 0; i != Num128BitChunks; ++i) {
+ for (unsigned j = 0; j != NumEltsIn128Bits; ++j) {
+ // Ignore undef elements.
+ SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j);
+ if (Op.isUndef())
+ continue;
+
+ // If there's an opcode mismatch, we're done.
+ if (HOpcode != ISD::DELETED_NODE && Op.getOpcode() != GenericOpcode)
+ return false;
+
+ // Initialize horizontal opcode.
+ if (HOpcode == ISD::DELETED_NODE) {
+ GenericOpcode = Op.getOpcode();
+ switch (GenericOpcode) {
+ case ISD::ADD: HOpcode = X86ISD::HADD; break;
+ case ISD::SUB: HOpcode = X86ISD::HSUB; break;
+ case ISD::FADD: HOpcode = X86ISD::FHADD; break;
+ case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
+ default: return false;
+ }
+ }
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Op0.getOperand(0) != Op1.getOperand(0) ||
+ !isa<ConstantSDNode>(Op0.getOperand(1)) ||
+ !isa<ConstantSDNode>(Op1.getOperand(1)) || !Op.hasOneUse())
+ return false;
+
+ // The source vector is chosen based on which 64-bit half of the
+ // destination vector is being calculated.
+ if (j < NumEltsIn64Bits) {
+ if (V0.isUndef())
+ V0 = Op0.getOperand(0);
+ } else {
+ if (V1.isUndef())
+ V1 = Op0.getOperand(0);
+ }
+
+ SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1;
+ if (SourceVec != Op0.getOperand(0))
+ return false;
+
+ // op (extract_vector_elt A, I), (extract_vector_elt A, I+1)
+ unsigned ExtIndex0 = Op0.getConstantOperandVal(1);
+ unsigned ExtIndex1 = Op1.getConstantOperandVal(1);
+ unsigned ExpectedIndex = i * NumEltsIn128Bits +
+ (j % NumEltsIn64Bits) * 2;
+ if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
+ continue;
+
+ // If this is not a commutative op, this does not match.
+ if (GenericOpcode != ISD::ADD && GenericOpcode != ISD::FADD)
+ return false;
+
+ // Addition is commutative, so try swapping the extract indexes.
+ // op (extract_vector_elt A, I+1), (extract_vector_elt A, I)
+ if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1)
+ continue;
+
+ // Extract indexes do not match horizontal requirement.
+ return false;
+ }
+ }
+ // We matched. Opcode and operands are returned by reference as arguments.
+ return true;
+}
+
+static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
+ SelectionDAG &DAG, unsigned HOpcode,
+ SDValue V0, SDValue V1) {
+ // If either input vector is not the same size as the build vector,
+ // extract/insert the low bits to the correct size.
+ // This is free (examples: zmm --> xmm, xmm --> ymm).
+ MVT VT = BV->getSimpleValueType(0);
+ unsigned Width = VT.getSizeInBits();
+ if (V0.getValueSizeInBits() > Width)
+ V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width);
+ else if (V0.getValueSizeInBits() < Width)
+ V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width);
+
+ if (V1.getValueSizeInBits() > Width)
+ V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width);
+ else if (V1.getValueSizeInBits() < Width)
+ V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
+
+ return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
+}
+
/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ // We need at least 2 non-undef elements to make this worthwhile by default.
+ unsigned NumNonUndefs = 0;
+ for (const SDValue &V : BV->op_values())
+ if (!V.isUndef())
+ ++NumNonUndefs;
+
+ if (NumNonUndefs < 2)
+ return SDValue();
+
+ // There are 4 sets of horizontal math operations distinguished by type:
+ // int/FP at 128-bit/256-bit. Each type was introduced with a different
+ // subtarget feature. Try to match those "native" patterns first.
MVT VT = BV->getSimpleValueType(0);
+ unsigned HOpcode;
+ SDValue V0, V1;
+ if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3())
+ if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+ return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+ if ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3())
+ if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+ return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+ if ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX())
+ if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+ return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+ if ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())
+ if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+ return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+ // Try harder to match 256-bit ops by using extract/concat.
+ if (!Subtarget.hasAVX() || !VT.is256BitVector())
+ return SDValue();
+
+ // Count the number of UNDEF operands in the build_vector in input.
unsigned NumElts = VT.getVectorNumElements();
+ unsigned Half = NumElts / 2;
unsigned NumUndefsLO = 0;
unsigned NumUndefsHI = 0;
- unsigned Half = NumElts/2;
-
- // Count the number of UNDEF operands in the build_vector in input.
for (unsigned i = 0, e = Half; i != e; ++i)
if (BV->getOperand(i)->isUndef())
NumUndefsLO++;
@@ -7813,96 +8385,61 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
if (BV->getOperand(i)->isUndef())
NumUndefsHI++;
- // Early exit if this is either a build_vector of all UNDEFs or all the
- // operands but one are UNDEF.
- if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
- return SDValue();
-
SDLoc DL(BV);
SDValue InVec0, InVec1;
- if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
- // Try to match an SSE3 float HADD/HSUB.
- if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
- return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
-
- if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
- return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
- } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
- // Try to match an SSSE3 integer HADD/HSUB.
- if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
- return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
-
- if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
- return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
- }
-
- if (!Subtarget.hasAVX())
- return SDValue();
-
- if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
- // Try to match an AVX horizontal add/sub of packed single/double
- // precision floating point values from 256-bit vectors.
- SDValue InVec2, InVec3;
- if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
-
- if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
- } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
- // Try to match an AVX2 horizontal add/sub of signed integers.
+ if (VT == MVT::v8i32 || VT == MVT::v16i16) {
SDValue InVec2, InVec3;
unsigned X86Opcode;
bool CanFold = true;
- if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
+ if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2,
+ InVec3) &&
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
+ else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0,
+ InVec1) &&
+ isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2,
+ InVec3) &&
+ ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
+ ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
X86Opcode = X86ISD::HSUB;
else
CanFold = false;
if (CanFold) {
- // Fold this build_vector into a single horizontal add/sub.
- // Do this only if the target has AVX2.
- if (Subtarget.hasAVX2())
- return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
-
// Do not try to expand this build_vector into a pair of horizontal
// add/sub if we can emit a pair of scalar add/sub.
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
return SDValue();
- // Convert this build_vector into a pair of horizontal binop followed by
- // a concat vector.
+ // Convert this build_vector into a pair of horizontal binops followed by
+ // a concat vector. We must adjust the outputs from the partial horizontal
+ // matching calls above to account for undefined vector halves.
+ SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
+ SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
+ assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
bool isUndefLO = NumUndefsLO == Half;
bool isUndefHI = NumUndefsHI == Half;
- return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
- isUndefLO, isUndefHI);
+ return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
+ isUndefHI);
}
}
- if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
- VT == MVT::v16i16) && Subtarget.hasAVX()) {
+ if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
+ VT == MVT::v16i16) {
unsigned X86Opcode;
- if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
+ if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
+ else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0,
+ InVec1))
X86Opcode = X86ISD::HSUB;
- else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
+ else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0,
+ InVec1))
X86Opcode = X86ISD::FHADD;
- else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
+ else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0,
+ InVec1))
X86Opcode = X86ISD::FHSUB;
else
return SDValue();
@@ -8370,9 +8907,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If we are inserting one variable into a vector of non-zero constants, try
// to avoid loading each constant element as a scalar. Load the constants as a
// vector and then insert the variable scalar element. If insertion is not
- // supported, we assume that we will fall back to a shuffle to get the scalar
- // blended with the constants. Insertion into a zero vector is handled as a
- // special-case somewhere below here.
+ // supported, fall back to a shuffle to get the scalar blended with the
+ // constants. Insertion into a zero vector is handled as a special-case
+ // somewhere below here.
if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
(isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) ||
isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {
@@ -8410,7 +8947,21 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF);
SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI);
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);
+ unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue();
+ unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits();
+ if (InsertC < NumEltsInLow128Bits)
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);
+
+ // There's no good way to insert into the high elements of a >128-bit
+ // vector, so use shuffles to avoid an extract/insert sequence.
+ assert(VT.getSizeInBits() > 128 && "Invalid insertion index?");
+ assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector");
+ SmallVector<int, 8> ShuffleMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShuffleMask.push_back(i == InsertC ? NumElts : i);
+ SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt);
+ return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask);
}
// Special case for single non-zero, non-undef, element.
@@ -9097,6 +9648,28 @@ static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
return TargetMask;
}
+// Attempt to create a shuffle mask from a VSELECT condition mask.
+static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
+ SDValue Cond) {
+ if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
+ return false;
+
+ unsigned Size = Cond.getValueType().getVectorNumElements();
+ Mask.resize(Size, SM_SentinelUndef);
+
+ for (int i = 0; i != (int)Size; ++i) {
+ SDValue CondElt = Cond.getOperand(i);
+ Mask[i] = i;
+ // Arbitrarily choose from the 2nd operand if the select condition element
+ // is undef.
+ // TODO: Can we do better by matching patterns such as even/odd?
+ if (CondElt.isUndef() || isNullConstant(CondElt))
+ Mask[i] += Size;
+ }
+
+ return true;
+}
+
// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
// instructions.
static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
@@ -9664,11 +10237,7 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);
V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
- // We have to cast V2 around.
- MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
- V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
- DAG.getBitcast(MaskVT, V1Mask),
- DAG.getBitcast(MaskVT, V2)));
+ V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2);
return DAG.getNode(ISD::OR, DL, VT, V1, V2);
}
@@ -9762,7 +10331,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v8f32:
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
-
case MVT::v4i64:
case MVT::v8i32:
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
@@ -9794,7 +10362,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8)));
}
-
case MVT::v16i16: {
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
@@ -9808,6 +10375,20 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
}
+ // Use PBLENDW for lower/upper lanes and then blend lanes.
+ // TODO - we should allow 2 PBLENDW here and leave shuffle combine to
+ // merge to VSELECT where useful.
+ uint64_t LoMask = BlendMask & 0xFF;
+ uint64_t HiMask = (BlendMask >> 8) & 0xFF;
+ if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
+ SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+ DAG.getConstant(LoMask, DL, MVT::i8));
+ SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+ DAG.getConstant(HiMask, DL, MVT::i8));
+ return DAG.getVectorShuffle(
+ MVT::v16i16, DL, Lo, Hi,
+ {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
+ }
LLVM_FALLTHROUGH;
}
case MVT::v16i8:
@@ -9815,6 +10396,11 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
+ // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
+ if (SDValue Masked =
+ lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
+ return Masked;
+
if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
@@ -9822,11 +10408,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
}
- // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
- if (SDValue Masked =
- lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
- return Masked;
-
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
@@ -9834,6 +10415,15 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
// type.
MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ // x86 allows load folding with blendvb from the 2nd source operand. But
+ // we are still using LLVM select here (see comment below), so that's V1.
+ // If V2 can be load-folded and V1 cannot be load-folded, then commute to
+ // allow that load-folding possibility.
+ if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(V1, V2);
+ }
+
// Compute the VSELECT mask. Note that VSELECT is really confusing in the
// mix of LLVM's code generator and the x86 backend. We tell the code
// generator that boolean values in the elements of an x86 vector register
@@ -9884,7 +10474,8 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ bool ImmBlends = false) {
// We build up the blend mask while checking whether a blend is a viable way
// to reduce the shuffle.
SmallVector<int, 32> BlendMask(Mask.size(), -1);
@@ -9904,10 +10495,168 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
PermuteMask[i] = Mask[i] % Size;
}
+ // If only immediate blends, then bail if the blend mask can't be widened to
+ // i16.
+ unsigned EltSize = VT.getScalarSizeInBits();
+ if (ImmBlends && EltSize == 8 && !canWidenShuffleElements(BlendMask))
+ return SDValue();
+
SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
}
+/// Try to lower as an unpack of elements from two inputs followed by
+/// a single-input permutation.
+///
+/// This matches the pattern where we can unpack elements from two inputs and
+/// then reduce the shuffle to a single-input (wider) permutation.
+static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ int NumElts = Mask.size();
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumLaneElts = NumElts / NumLanes;
+ int NumHalfLaneElts = NumLaneElts / 2;
+
+ bool MatchLo = true, MatchHi = true;
+ SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
+
+ // Determine UNPCKL/UNPCKH type and operand order.
+ for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
+ for (int Elt = 0; Elt != NumLaneElts; ++Elt) {
+ int M = Mask[Lane + Elt];
+ if (M < 0)
+ continue;
+
+ SDValue &Op = Ops[Elt & 1];
+ if (M < NumElts && (Op.isUndef() || Op == V1))
+ Op = V1;
+ else if (NumElts <= M && (Op.isUndef() || Op == V2))
+ Op = V2;
+ else
+ return SDValue();
+
+ int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts;
+ MatchLo &= isUndefOrInRange(M, Lo, Mid) ||
+ isUndefOrInRange(M, NumElts + Lo, NumElts + Mid);
+ MatchHi &= isUndefOrInRange(M, Mid, Hi) ||
+ isUndefOrInRange(M, NumElts + Mid, NumElts + Hi);
+ if (!MatchLo && !MatchHi)
+ return SDValue();
+ }
+ }
+ assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI");
+
+ // Now check that each pair of elts come from the same unpack pair
+ // and set the permute mask based on each pair.
+ // TODO - Investigate cases where we permute individual elements.
+ SmallVector<int, 32> PermuteMask(NumElts, -1);
+ for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
+ for (int Elt = 0; Elt != NumLaneElts; Elt += 2) {
+ int M0 = Mask[Lane + Elt + 0];
+ int M1 = Mask[Lane + Elt + 1];
+ if (0 <= M0 && 0 <= M1 &&
+ (M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts))
+ return SDValue();
+ if (0 <= M0)
+ PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts));
+ if (0 <= M1)
+ PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1;
+ }
+ }
+
+ unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+ SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops);
+ return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask);
+}
+
+/// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
+/// permuting the elements of the result in place.
+static SDValue lowerVectorShuffleAsByteRotateAndPermute(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+ if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||
+ (VT.is256BitVector() && !Subtarget.hasAVX2()) ||
+ (VT.is512BitVector() && !Subtarget.hasBWI()))
+ return SDValue();
+
+ // We don't currently support lane crossing permutes.
+ if (is128BitLaneCrossingShuffleMask(VT, Mask))
+ return SDValue();
+
+ int Scale = VT.getScalarSizeInBits() / 8;
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumElts = VT.getVectorNumElements();
+ int NumEltsPerLane = NumElts / NumLanes;
+
+ // Determine range of mask elts.
+ bool Blend1 = true;
+ bool Blend2 = true;
+ std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN);
+ std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN);
+ for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
+ for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
+ int M = Mask[Lane + Elt];
+ if (M < 0)
+ continue;
+ if (M < NumElts) {
+ Blend1 &= (M == (Lane + Elt));
+ assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask");
+ M = M % NumEltsPerLane;
+ Range1.first = std::min(Range1.first, M);
+ Range1.second = std::max(Range1.second, M);
+ } else {
+ M -= NumElts;
+ Blend2 &= (M == (Lane + Elt));
+ assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask");
+ M = M % NumEltsPerLane;
+ Range2.first = std::min(Range2.first, M);
+ Range2.second = std::max(Range2.second, M);
+ }
+ }
+ }
+
+ // Bail if we don't need both elements.
+ // TODO - it might be worth doing this for unary shuffles if the permute
+ // can be widened.
+ if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) ||
+ !(0 <= Range2.first && Range2.second < NumEltsPerLane))
+ return SDValue();
+
+ if (VT.getSizeInBits() > 128 && (Blend1 || Blend2))
+ return SDValue();
+
+ // Rotate the 2 ops so we can access both ranges, then permute the result.
+ auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) {
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ SDValue Rotate = DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),
+ DAG.getBitcast(ByteVT, Lo),
+ DAG.getConstant(Scale * RotAmt, DL, MVT::i8)));
+ SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef);
+ for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
+ for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
+ int M = Mask[Lane + Elt];
+ if (M < 0)
+ continue;
+ if (M < NumElts)
+ PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane);
+ else
+ PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane);
+ }
+ }
+ return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask);
+ };
+
+ // Check if the ranges are small enough to rotate from either direction.
+ if (Range2.second < Range1.first)
+ return RotateAndPermute(V1, V2, Range1.first, 0);
+ if (Range1.second < Range2.first)
+ return RotateAndPermute(V2, V1, Range2.first, NumElts);
+ return SDValue();
+}
+
/// Generic routine to decompose a shuffle and blend into independent
/// blends and permutes.
///
@@ -9915,11 +10664,9 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
/// operations. It will try to pick the best arrangement of shuffles and
/// blends.
-static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
- MVT VT, SDValue V1,
- SDValue V2,
- ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
// Shuffle the input elements into the desired positions in V1 and V2 and
// blend them together.
SmallVector<int, 32> V1Mask(Mask.size(), -1);
@@ -9934,15 +10681,27 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
BlendMask[i] = i + Size;
}
- // Try to lower with the simpler initial blend strategy unless one of the
- // input shuffles would be a no-op. We prefer to shuffle inputs as the
- // shuffle may be able to fold with a load or other benefit. However, when
- // we'll have to do 2x as many shuffles in order to achieve this, blending
- // first is a better strategy.
- if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask))
+ // Try to lower with the simpler initial blend/unpack/rotate strategies unless
+ // one of the input shuffles would be a no-op. We prefer to shuffle inputs as
+ // the shuffle may be able to fold with a load or other benefit. However, when
+ // we'll have to do 2x as many shuffles in order to achieve this, a 2-input
+ // pre-shuffle first is a better strategy.
+ if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) {
+ // Only prefer immediate blends to unpack/rotate.
+ if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
+ DL, VT, V1, V2, Mask, DAG, true))
+ return BlendPerm;
+ if (SDValue UnpackPerm =
+ lowerVectorShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG))
+ return UnpackPerm;
+ if (SDValue RotatePerm = lowerVectorShuffleAsByteRotateAndPermute(
+ DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return RotatePerm;
+ // Unpack/rotate failed - try again with variable blends.
if (SDValue BlendPerm =
lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
return BlendPerm;
+ }
V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
@@ -10452,7 +11211,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
InputV = ShuffleOffset(InputV);
- InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG);
+ InputV = getExtendInVec(/*Signed*/false, DL, ExtVT, InputV, DAG);
return DAG.getBitcast(VT, InputV);
}
@@ -10930,7 +11689,8 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
continue;
}
case ISD::CONCAT_VECTORS: {
- int OperandSize = Mask.size() / V.getNumOperands();
+ int OperandSize =
+ V.getOperand(0).getSimpleValueType().getVectorNumElements();
V = V.getOperand(BroadcastIdx / OperandSize);
BroadcastIdx %= OperandSize;
continue;
@@ -10989,7 +11749,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
SDValue BC = peekThroughBitcasts(V);
// Also check the simpler case, where we can directly reuse the scalar.
- if (V.getOpcode() == ISD::BUILD_VECTOR ||
+ if ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
@@ -11204,10 +11964,9 @@ static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
/// because for floating point vectors we have a generalized SHUFPS lowering
/// strategy that handles everything that doesn't *exactly* match an unpack,
/// making this clever lowering unnecessary.
-static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsPermuteAndUnpack(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() &&
"This routine only supports integer vectors.");
assert(VT.is128BitVector() &&
@@ -11276,6 +12035,12 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
return Unpack;
+ // If we're shuffling with a zero vector then we're better off not doing
+ // VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements.
+ if (ISD::isBuildVectorAllZeros(V1.getNode()) ||
+ ISD::isBuildVectorAllZeros(V2.getNode()))
+ return SDValue();
+
// If none of the unpack-rooted lowerings worked (or were profitable) try an
// initial unpack.
if (NumLoInputs == 0 || NumHiInputs == 0) {
@@ -11475,7 +12240,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
@@ -11785,11 +12550,11 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// Try to lower by permuting the inputs into an unpack instruction.
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
- DL, MVT::v4i32, V1, V2, Mask, DAG))
+ DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Unpack;
}
@@ -12321,47 +13086,48 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
/// blend if only one input is used.
static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
- bool &V2InUse) {
- SDValue V1Mask[16];
- SDValue V2Mask[16];
+ const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
+ assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
+ "Lane crossing shuffle masks not supported");
+
+ int NumBytes = VT.getSizeInBits() / 8;
+ int Size = Mask.size();
+ int Scale = NumBytes / Size;
+
+ SmallVector<SDValue, 64> V1Mask(NumBytes, DAG.getUNDEF(MVT::i8));
+ SmallVector<SDValue, 64> V2Mask(NumBytes, DAG.getUNDEF(MVT::i8));
V1InUse = false;
V2InUse = false;
- int Size = Mask.size();
- int Scale = 16 / Size;
- for (int i = 0; i < 16; ++i) {
- if (Mask[i / Scale] < 0) {
- V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
- } else {
- const int ZeroMask = 0x80;
- int V1Idx = Mask[i / Scale] < Size ? Mask[i / Scale] * Scale + i % Scale
- : ZeroMask;
- int V2Idx = Mask[i / Scale] < Size
- ? ZeroMask
- : (Mask[i / Scale] - Size) * Scale + i % Scale;
- if (Zeroable[i / Scale])
- V1Idx = V2Idx = ZeroMask;
- V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
- V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
- V1InUse |= (ZeroMask != V1Idx);
- V2InUse |= (ZeroMask != V2Idx);
- }
+ for (int i = 0; i < NumBytes; ++i) {
+ int M = Mask[i / Scale];
+ if (M < 0)
+ continue;
+
+ const int ZeroMask = 0x80;
+ int V1Idx = M < Size ? M * Scale + i % Scale : ZeroMask;
+ int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale;
+ if (Zeroable[i / Scale])
+ V1Idx = V2Idx = ZeroMask;
+
+ V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
+ V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
+ V1InUse |= (ZeroMask != V1Idx);
+ V2InUse |= (ZeroMask != V2Idx);
}
+ MVT ShufVT = MVT::getVectorVT(MVT::i8, NumBytes);
if (V1InUse)
- V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- DAG.getBitcast(MVT::v16i8, V1),
- DAG.getBuildVector(MVT::v16i8, DL, V1Mask));
+ V1 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V1),
+ DAG.getBuildVector(ShufVT, DL, V1Mask));
if (V2InUse)
- V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- DAG.getBitcast(MVT::v16i8, V2),
- DAG.getBuildVector(MVT::v16i8, DL, V2Mask));
+ V2 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V2),
+ DAG.getBuildVector(ShufVT, DL, V2Mask));
// If we need shuffled inputs from both, blend the two.
SDValue V;
if (V1InUse && V2InUse)
- V = DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+ V = DAG.getNode(ISD::OR, DL, ShufVT, V1, V2);
else
V = V1InUse ? V1 : V2;
@@ -12484,8 +13250,8 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return BitBlend;
// Try to lower by permuting the inputs into an unpack instruction.
- if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
- V2, Mask, DAG))
+ if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+ DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
return Unpack;
// If we can't directly blend but can use PSHUFB, that will be better as it
@@ -12499,7 +13265,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// We can always bit-blend if we have to so the fallback strategy is to
// decompose into single-input permutes and blends.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
}
/// Check whether a compaction lowering can be done by dropping even
@@ -12632,6 +13398,10 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+ return V;
+
// Check whether we can widen this to an i16 shuffle by duplicating bytes.
// Notably, this handles splat and partial-splat shuffles more efficiently.
// However, it only makes sense if the pre-duplication shuffle simplifies
@@ -12769,12 +13539,18 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// shuffles will both be pshufb, in which case we shouldn't bother with
// this.
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
- DL, MVT::v16i8, V1, V2, Mask, DAG))
+ DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return Unpack;
// If we have VBMI we can use one VPERM instead of multiple PSHUFBs.
if (Subtarget.hasVBMI() && Subtarget.hasVLX())
return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
+
+ // Use PALIGNR+Permute if possible - permute might become PSHUFB but the
+ // PALIGNR will be cheaper than the second PSHUFB+OR.
+ if (SDValue V = lowerVectorShuffleAsByteRotateAndPermute(
+ DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+ return V;
}
return PSHUFB;
@@ -12830,7 +13606,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Handle multi-input cases by blending single-input shuffles.
if (NumV2Elements > 0)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// The fallback path for single-input shuffles widens this into two v8i16
// vectors with unpacks, shuffles those, and then pulls them back together
@@ -13043,6 +13819,7 @@ static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(!V2.isUndef() && "This routine must not be used to lower single-input "
"shuffles as it could then recurse on itself.");
@@ -13069,7 +13846,7 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
};
if (DoBothBroadcast())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
- DAG);
+ Subtarget, DAG);
// If the inputs all stem from a single 128-bit lane of each input, then we
// split them rather than blending because the split will decompose to
@@ -13087,7 +13864,62 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
// Otherwise, just fall back to decomposed shuffles and a blend. This requires
// that the decomposed single-input shuffles don't end up here.
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
+ Subtarget, DAG);
+}
+
+/// Lower a vector shuffle crossing multiple 128-bit lanes as
+/// a lane permutation followed by a per-lane permutation.
+///
+/// This is mainly for cases where we can have non-repeating permutes
+/// in each lane.
+///
+/// TODO: This is very similar to lowerVectorShuffleByMerging128BitLanes,
+/// we should investigate merging them.
+static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumEltsPerLane = NumElts / NumLanes;
+
+ SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
+ SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+ SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+
+ // Ensure that each lane comes from a single source lane.
+ int SrcLane = M / NumEltsPerLane;
+ int DstLane = i / NumEltsPerLane;
+ if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane))
+ return SDValue();
+ SrcLaneMask[DstLane] = SrcLane;
+
+ LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
+ PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
+ }
+
+ // If we're only shuffling a single lowest lane and the rest are identity
+ // then don't bother.
+ // TODO - isShuffleMaskInputInPlace could be extended to something like this.
+ int NumIdentityLanes = 0;
+ bool OnlyShuffleLowestLane = true;
+ for (int i = 0; i != NumLanes; ++i) {
+ if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane,
+ i * NumEltsPerLane))
+ NumIdentityLanes++;
+ else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes)
+ OnlyShuffleLowestLane = false;
+ }
+ if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
+ return SDValue();
+
+ SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask);
+ return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
}
/// Lower a vector shuffle crossing multiple 128-bit lanes as
@@ -13248,79 +14080,174 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
/// Lower a vector shuffle by first fixing the 128-bit lanes and then
/// shuffling each lane.
///
-/// This will only succeed when the result of fixing the 128-bit lanes results
-/// in a single-input non-lane-crossing shuffle with a repeating shuffle mask in
-/// each 128-bit lanes. This handles many cases where we can quickly blend away
-/// the lane crosses early and then use simpler shuffles within each lane.
+/// This attempts to create a repeated lane shuffle where each lane uses one
+/// or two of the lanes of the inputs. The lanes of the input vectors are
+/// shuffled in one or two independent shuffles to get the lanes into the
+/// position needed by the final shuffle.
///
-/// FIXME: It might be worthwhile at some point to support this without
-/// requiring the 128-bit lane-relative shuffles to be repeating, but currently
-/// in x86 only floating point has interesting non-repeating shuffles, and even
-/// those are still *marginally* more expensive.
+/// FIXME: This should be generalized to 512-bit shuffles.
static SDValue lowerVectorShuffleByMerging128BitLanes(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(!V2.isUndef() && "This is only useful with multiple inputs.");
+ if (is128BitLaneRepeatedShuffleMask(VT, Mask))
+ return SDValue();
+
int Size = Mask.size();
int LaneSize = 128 / VT.getScalarSizeInBits();
int NumLanes = Size / LaneSize;
- assert(NumLanes > 1 && "Only handles 256-bit and wider shuffles.");
+ assert(NumLanes == 2 && "Only handles 256-bit shuffles.");
+
+ SmallVector<int, 16> RepeatMask(LaneSize, -1);
+ int LaneSrcs[2][2] = { { -1, -1 }, { -1 , -1 } };
+
+ // First pass will try to fill in the RepeatMask from lanes that need two
+ // sources.
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ int Srcs[2] = { -1, -1 };
+ SmallVector<int, 16> InLaneMask(LaneSize, -1);
+ for (int i = 0; i != LaneSize; ++i) {
+ int M = Mask[(Lane * LaneSize) + i];
+ if (M < 0)
+ continue;
+ // Determine which of the 4 possible input lanes (2 from each source)
+ // this element comes from. Assign that as one of the sources for this
+ // lane. We can assign up to 2 sources for this lane. If we run out
+ // sources we can't do anything.
+ int LaneSrc = M / LaneSize;
+ int Src;
+ if (Srcs[0] < 0 || Srcs[0] == LaneSrc)
+ Src = 0;
+ else if (Srcs[1] < 0 || Srcs[1] == LaneSrc)
+ Src = 1;
+ else
+ return SDValue();
- // See if we can build a hypothetical 128-bit lane-fixing shuffle mask. Also
- // check whether the in-128-bit lane shuffles share a repeating pattern.
- SmallVector<int, 4> Lanes((unsigned)NumLanes, -1);
- SmallVector<int, 4> InLaneMask((unsigned)LaneSize, -1);
- for (int i = 0; i < Size; ++i) {
- if (Mask[i] < 0)
+ Srcs[Src] = LaneSrc;
+ InLaneMask[i] = (M % LaneSize) + Src * Size;
+ }
+
+ // If this lane has two sources, see if it fits with the repeat mask so far.
+ if (Srcs[1] < 0)
continue;
- int j = i / LaneSize;
+ LaneSrcs[Lane][0] = Srcs[0];
+ LaneSrcs[Lane][1] = Srcs[1];
- if (Lanes[j] < 0) {
- // First entry we've seen for this lane.
- Lanes[j] = Mask[i] / LaneSize;
- } else if (Lanes[j] != Mask[i] / LaneSize) {
- // This doesn't match the lane selected previously!
- return SDValue();
+ auto MatchMasks = [](ArrayRef<int> M1, ArrayRef<int> M2) {
+ assert(M1.size() == M2.size() && "Unexpected mask size");
+ for (int i = 0, e = M1.size(); i != e; ++i)
+ if (M1[i] >= 0 && M2[i] >= 0 && M1[i] != M2[i])
+ return false;
+ return true;
+ };
+
+ auto MergeMasks = [](ArrayRef<int> Mask, MutableArrayRef<int> MergedMask) {
+ assert(Mask.size() == MergedMask.size() && "Unexpected mask size");
+ for (int i = 0, e = MergedMask.size(); i != e; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ assert((MergedMask[i] < 0 || MergedMask[i] == M) &&
+ "Unexpected mask element");
+ MergedMask[i] = M;
+ }
+ };
+
+ if (MatchMasks(InLaneMask, RepeatMask)) {
+ // Merge this lane mask into the final repeat mask.
+ MergeMasks(InLaneMask, RepeatMask);
+ continue;
}
- // Check that within each lane we have a consistent shuffle mask.
- int k = i % LaneSize;
- if (InLaneMask[k] < 0) {
- InLaneMask[k] = Mask[i] % LaneSize;
- } else if (InLaneMask[k] != Mask[i] % LaneSize) {
- // This doesn't fit a repeating in-lane mask.
- return SDValue();
+ // Didn't find a match. Swap the operands and try again.
+ std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]);
+ ShuffleVectorSDNode::commuteMask(InLaneMask);
+
+ if (MatchMasks(InLaneMask, RepeatMask)) {
+ // Merge this lane mask into the final repeat mask.
+ MergeMasks(InLaneMask, RepeatMask);
+ continue;
}
+
+ // Couldn't find a match with the operands in either order.
+ return SDValue();
}
- // First shuffle the lanes into place.
- MVT LaneVT = MVT::getVectorVT(VT.isFloatingPoint() ? MVT::f64 : MVT::i64,
- VT.getSizeInBits() / 64);
- SmallVector<int, 8> LaneMask((unsigned)NumLanes * 2, -1);
- for (int i = 0; i < NumLanes; ++i)
- if (Lanes[i] >= 0) {
- LaneMask[2 * i + 0] = 2*Lanes[i] + 0;
- LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
+ // Now handle any lanes with only one source.
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ // If this lane has already been processed, skip it.
+ if (LaneSrcs[Lane][0] >= 0)
+ continue;
+
+ for (int i = 0; i != LaneSize; ++i) {
+ int M = Mask[(Lane * LaneSize) + i];
+ if (M < 0)
+ continue;
+
+ // If RepeatMask isn't defined yet we can define it ourself.
+ if (RepeatMask[i] < 0)
+ RepeatMask[i] = M % LaneSize;
+
+ if (RepeatMask[i] < Size) {
+ if (RepeatMask[i] != M % LaneSize)
+ return SDValue();
+ LaneSrcs[Lane][0] = M / LaneSize;
+ } else {
+ if (RepeatMask[i] != ((M % LaneSize) + Size))
+ return SDValue();
+ LaneSrcs[Lane][1] = M / LaneSize;
+ }
}
- V1 = DAG.getBitcast(LaneVT, V1);
- V2 = DAG.getBitcast(LaneVT, V2);
- SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);
+ if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0)
+ return SDValue();
+ }
+
+ SmallVector<int, 16> NewMask(Size, -1);
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ int Src = LaneSrcs[Lane][0];
+ for (int i = 0; i != LaneSize; ++i) {
+ int M = -1;
+ if (Src >= 0)
+ M = Src * LaneSize + i;
+ NewMask[Lane * LaneSize + i] = M;
+ }
+ }
+ SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
+ // Ensure we didn't get back the shuffle we started with.
+ // FIXME: This is a hack to make up for some splat handling code in
+ // getVectorShuffle.
+ if (isa<ShuffleVectorSDNode>(NewV1) &&
+ cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask)
+ return SDValue();
- // Cast it back to the type we actually want.
- LaneShuffle = DAG.getBitcast(VT, LaneShuffle);
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ int Src = LaneSrcs[Lane][1];
+ for (int i = 0; i != LaneSize; ++i) {
+ int M = -1;
+ if (Src >= 0)
+ M = Src * LaneSize + i;
+ NewMask[Lane * LaneSize + i] = M;
+ }
+ }
+ SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
+ // Ensure we didn't get back the shuffle we started with.
+ // FIXME: This is a hack to make up for some splat handling code in
+ // getVectorShuffle.
+ if (isa<ShuffleVectorSDNode>(NewV2) &&
+ cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask)
+ return SDValue();
- // Now do a simple shuffle that isn't lane crossing.
- SmallVector<int, 8> NewMask((unsigned)Size, -1);
- for (int i = 0; i < Size; ++i)
- if (Mask[i] >= 0)
- NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;
- assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&
- "Must not introduce lane crosses at this point!");
+ for (int i = 0; i != Size; ++i) {
+ NewMask[i] = RepeatMask[i % LaneSize];
+ if (NewMask[i] < 0)
+ continue;
- return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
+ NewMask[i] += (i / LaneSize) * LaneSize;
+ }
+ return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask);
}
/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
@@ -13731,6 +14658,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return V;
+ // Try to permute the lanes and then use a per-lane permute.
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
DAG, Subtarget);
@@ -13765,6 +14697,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return Result;
+
// If we have VLX support, we can use VEXPAND.
if (Subtarget.hasVLX())
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
@@ -13775,10 +14708,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// can fully permute the elements.
if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 4-lane 64-bit integer shuffles.
@@ -13872,7 +14806,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Otherwise fall back on generic blend lowering.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
}
/// Handle lowering of 8-lane 32-bit floating point shuffles.
@@ -13961,17 +14895,18 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// vpunpckhwd instrs than vblend.
if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
- Mask, DAG))
+ Mask, Subtarget, DAG))
return V;
// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 8-lane 32-bit integer shuffles.
@@ -14000,8 +14935,8 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// vpunpcklwd and vpunpckhwd instrs.
if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
!Subtarget.hasAVX512())
- if (SDValue V =
- lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG))
+ if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2,
+ Mask, Subtarget, DAG))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
@@ -14084,7 +15019,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Otherwise fall back on generic blend lowering.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
}
/// Handle lowering of 16-lane 16-bit integer shuffles.
@@ -14146,9 +15081,14 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (V2.isUndef()) {
// There are no generalized cross-lane shuffle operations available on i16
// element types.
- if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
+ if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) {
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
Mask, DAG, Subtarget);
+ }
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
@@ -14174,8 +15114,14 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
return Result;
+ // Try to permute the lanes and then use a per-lane permute.
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 32-lane 8-bit integer shuffles.
@@ -14236,9 +15182,14 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are no generalized cross-lane shuffle operations available on i8
// element types.
- if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
+ if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
DAG, Subtarget);
+ }
if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
@@ -14254,8 +15205,14 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return Result;
+ // Try to permute the lanes and then use a per-lane permute.
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
+ Subtarget, DAG);
}
/// High-level routine to lower various 256-bit x86 vector shuffles.
@@ -14757,6 +15714,11 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
return V;
+ // Use dedicated pack instructions for masks that match their pattern.
+ if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
+ Subtarget))
+ return V;
+
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@@ -14845,6 +15807,39 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
}
+// Determine if this shuffle can be implemented with a KSHIFT instruction.
+// Returns the shift amount if possible or -1 if not. This is a simplified
+// version of matchVectorShuffleAsShift.
+static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
+ int MaskOffset, const APInt &Zeroable) {
+ int Size = Mask.size();
+
+ auto CheckZeros = [&](int Shift, bool Left) {
+ for (int j = 0; j < Shift; ++j)
+ if (!Zeroable[j + (Left ? 0 : (Size - Shift))])
+ return false;
+
+ return true;
+ };
+
+ auto MatchShift = [&](int Shift, bool Left) {
+ unsigned Pos = Left ? Shift : 0;
+ unsigned Low = Left ? 0 : Shift;
+ unsigned Len = Size - Shift;
+ return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset);
+ };
+
+ for (int Shift = 1; Shift != Size; ++Shift)
+ for (bool Left : {true, false})
+ if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) {
+ Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR;
+ return Shift;
+ }
+
+ return -1;
+}
+
+
// Lower vXi1 vector shuffles.
// There is no a dedicated instruction on AVX-512 that shuffles the masks.
// The only way to shuffle bits is to sign-extend the mask vector to SIMD
@@ -14854,6 +15849,9 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ assert(Subtarget.hasAVX512() &&
+ "Cannot lower 512-bit vectors w/o basic ISA!");
+
unsigned NumElts = Mask.size();
// Try to recognize shuffles that are just padding a subvector with zeros.
@@ -14880,9 +15878,21 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Extract, DAG.getIntPtrConstant(0, DL));
}
+ // Try to match KSHIFTs.
+ // TODO: Support narrower than legal shifts by widening and extracting.
+ if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) {
+ unsigned Offset = 0;
+ for (SDValue V : { V1, V2 }) {
+ unsigned Opcode;
+ int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
+ if (ShiftAmt >= 0)
+ return DAG.getNode(Opcode, DL, VT, V,
+ DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ Offset += NumElts; // Increment for next iteration.
+ }
+ }
+
- assert(Subtarget.hasAVX512() &&
- "Cannot lower 512-bit vectors w/o basic ISA!");
MVT ExtVT;
switch (VT.SimpleTy) {
default:
@@ -15069,6 +16079,14 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
canWidenShuffleElements(ZeroableMask, WidenedMask)) {
+ // Shuffle mask widening should not interfere with a broadcast opportunity
+ // by obfuscating the operands with bitcasts.
+ // TODO: Avoid lowering directly from this top-level function: make this
+ // a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
+ if (SDValue Broadcast =
+ lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return Broadcast;
+
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
: MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
@@ -15135,34 +16153,27 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
SDValue Cond = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
- if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
- return SDValue();
- auto *CondBV = cast<BuildVectorSDNode>(Cond);
-
// Only non-legal VSELECTs reach this lowering, convert those into generic
// shuffles and re-use the shuffle lowering path for blends.
SmallVector<int, 32> Mask;
- for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
- SDValue CondElt = CondBV->getOperand(i);
- int M = i;
- // We can't map undef to undef here. They have different meanings. Treat
- // as the same as zero.
- if (CondElt.isUndef() || isNullConstant(CondElt))
- M += Size;
- Mask.push_back(M);
- }
- return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
+ if (createShuffleMaskFromVSELECT(Mask, Cond))
+ return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask);
+
+ return SDValue();
}
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+
// A vselect where all conditions and data are constants can be optimized into
// a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
- ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
- ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
+ if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
return SDValue();
// Try to lower this to a blend-style vector shuffle. This can handle all
@@ -15172,7 +16183,9 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
// If this VSELECT has a vector if i1 as a mask, it will be directly matched
// with patterns on the mask registers on AVX-512.
- if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1)
+ MVT CondVT = Cond.getSimpleValueType();
+ unsigned CondEltSize = Cond.getScalarValueSizeInBits();
+ if (CondEltSize == 1)
return Op;
// Variable blends are only legal from SSE4.1 onward.
@@ -15181,24 +16194,32 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned NumElts = VT.getVectorNumElements();
// If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition
// into an i1 condition so that we can use the mask-based 512-bit blend
// instructions.
if (VT.getSizeInBits() == 512) {
- SDValue Cond = Op.getOperand(0);
- // The vNi1 condition case should be handled above as it can be trivially
- // lowered.
- assert(Cond.getValueType().getScalarSizeInBits() ==
- VT.getScalarSizeInBits() &&
- "Should have a size-matched integer condition!");
// Build a mask by testing the condition against zero.
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
- getZeroVector(VT, Subtarget, DAG, dl),
+ DAG.getConstant(0, dl, CondVT),
ISD::SETNE);
// Now return a new VSELECT using the mask.
- return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2));
+ return DAG.getSelect(dl, VT, Mask, LHS, RHS);
+ }
+
+ // SEXT/TRUNC cases where the mask doesn't match the destination size.
+ if (CondEltSize != EltSize) {
+ // If we don't have a sign splat, rely on the expansion.
+ if (CondEltSize != DAG.ComputeNumSignBits(Cond))
+ return SDValue();
+
+ MVT NewCondSVT = MVT::getIntegerVT(EltSize);
+ MVT NewCondVT = MVT::getVectorVT(NewCondSVT, NumElts);
+ Cond = DAG.getSExtOrTrunc(Cond, dl, NewCondVT);
+ return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS);
}
// Only some types will be legal on some subtargets. If we can emit a legal
@@ -15219,10 +16240,10 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
case MVT::v8i16:
case MVT::v16i16: {
// Bitcast everything to the vXi8 type and use a vXi8 vselect.
- MVT CastVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
- SDValue Cond = DAG.getBitcast(CastVT, Op->getOperand(0));
- SDValue LHS = DAG.getBitcast(CastVT, Op->getOperand(1));
- SDValue RHS = DAG.getBitcast(CastVT, Op->getOperand(2));
+ MVT CastVT = MVT::getVectorVT(MVT::i8, NumElts * 2);
+ Cond = DAG.getBitcast(CastVT, Cond);
+ LHS = DAG.getBitcast(CastVT, LHS);
+ RHS = DAG.getBitcast(CastVT, RHS);
SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS);
return DAG.getBitcast(VT, Select);
}
@@ -15298,34 +16319,25 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0) // the operation is legal
+ return Op;
- // If the kshift instructions of the correct width aren't natively supported
- // then we need to promote the vector to the native size to get the correct
- // zeroing behavior.
- if (VecVT.getVectorNumElements() < 16) {
- VecVT = MVT::v16i1;
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
- DAG.getUNDEF(VecVT), Vec,
+ // Extend to natively supported kshift.
+ unsigned NumElems = VecVT.getVectorNumElements();
+ MVT WideVecVT = VecVT;
+ if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
+ WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+ DAG.getUNDEF(WideVecVT), Vec,
DAG.getIntPtrConstant(0, dl));
}
- // Extracts from element 0 are always allowed.
- if (IdxVal != 0) {
- // Use kshiftr instruction to move to the lower element.
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
- }
-
- // Shrink to v16i1 since that's always legal.
- if (VecVT.getVectorNumElements() > 16) {
- VecVT = MVT::v16i1;
- Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
- DAG.getIntPtrConstant(0, dl));
- }
+ // Use kshiftr instruction to move to the lower element.
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
- // Convert to a bitcast+aext/trunc.
- MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
- return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+ DAG.getIntPtrConstant(0, dl));
}
SDValue
@@ -15793,7 +16805,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
- if (isPositionIndependent() && !Subtarget.is64Bit()) {
+ if (OpFlag) {
Result =
DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
@@ -16173,6 +17185,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
/// Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
+/// TODO: Can this be moved to general expansion code?
static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getSimpleValueType();
@@ -16182,8 +17195,8 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
- // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
+ // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
+ // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away
// during isel.
SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits - 1, dl, MVT::i8));
@@ -16193,10 +17206,10 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
- Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
- Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
@@ -16220,6 +17233,56 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({ Lo, Hi }, dl);
}
+static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) &&
+ "Unexpected funnel shift opcode!");
+
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+
+ bool IsFSHR = Op.getOpcode() == ISD::FSHR;
+
+ if (VT.isVector()) {
+ assert(Subtarget.hasVBMI2() && "Expected VBMI2");
+
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+
+ APInt APIntShiftAmt;
+ if (isConstantSplat(Amt, APIntShiftAmt)) {
+ uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
+ return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
+ Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ }
+
+ return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+ Op0, Op1, Amt);
+ }
+
+ assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ "Unexpected funnel shift type!");
+
+ // Expand slow SHLD/SHRD cases if we are not optimizing for size.
+ bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ if (!OptForSize && Subtarget.isSHLDSlow())
+ return SDValue();
+
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+
+ // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
+ if (VT == MVT::i16)
+ Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
+ DAG.getConstant(15, DL, Amt.getValueType()));
+
+ unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD);
+ return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt);
+}
+
// Try to use a packed vector operation to handle i64 on 32-bit targets when
// AVX512DQ is enabled.
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
@@ -16271,9 +17334,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
// Legal.
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(VT))
return Op;
- if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit()) {
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit())
return Op;
- }
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
@@ -16331,7 +17393,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
Chain = Result.getValue(1);
SDValue InFlag = Result.getValue(2);
- // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+ // FIXME: Currently the FST is glued to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
@@ -16412,13 +17474,11 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
SDValue Result;
if (Subtarget.hasSSE3()) {
- // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
+ // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'.
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
- SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
- SDValue Shuffle = DAG.getVectorShuffle(MVT::v4i32, dl, S2F, S2F, {2,3,0,1});
- Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
- DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
+ SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
+ Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
@@ -16910,33 +17970,43 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
InVT.getVectorElementType() == MVT::i32) &&
"Unexpected element type");
+ // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
+ if (InVT == MVT::v8i8) {
+ if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+ return SDValue();
+
+ In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
+ MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
+ // FIXME: This should be ANY_EXTEND_VECTOR_INREG for ANY_EXTEND input.
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, VT, In);
+ }
+
if (Subtarget.hasInt256())
- return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
+ return Op;
// Optimize vectors in AVX mode:
//
// v8i16 -> v8i32
- // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
+ // Use vpmovzwd for 4 lower elements v8i16 -> v4i32.
// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
// Concat upper and lower parts.
//
// v4i32 -> v4i64
- // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
+ // Use vpmovzdq for 4 lower elements v4i32 -> v2i64.
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
// Concat upper and lower parts.
//
- SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+
+ SDValue OpLo = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, HalfVT, In);
+
+ SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
SDValue Undef = DAG.getUNDEF(InVT);
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
- SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
-
- MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements()/2);
-
- OpLo = DAG.getBitcast(HVT, OpLo);
- OpHi = DAG.getBitcast(HVT, OpHi);
+ OpHi = DAG.getBitcast(HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
@@ -16965,7 +18035,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
SDLoc DL(Op);
unsigned NumElts = VT.getVectorNumElements();
- // For all vectors, but vXi8 we can just emit a sign_extend a shift. This
+ // For all vectors, but vXi8 we can just emit a sign_extend and a shift. This
// avoids a constant pool load.
if (VT.getVectorElementType() != MVT::i8) {
SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In);
@@ -16995,7 +18065,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
}
SDValue One = DAG.getConstant(1, DL, WideVT);
- SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, DL);
+ SDValue Zero = DAG.getConstant(0, DL, WideVT);
SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero);
@@ -17035,9 +18105,10 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
const X86Subtarget &Subtarget) {
assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) &&
"Unexpected PACK opcode");
+ assert(DstVT.isVector() && "VT not a vector?");
// Requires SSE2 but AVX512 has fast vector truncate.
- if (!Subtarget.hasSSE2() || Subtarget.hasAVX512() || !DstVT.isVector())
+ if (!Subtarget.hasSSE2())
return SDValue();
EVT SrcVT = In.getValueType();
@@ -17203,10 +18274,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
}
// If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m.
if (Subtarget.hasDQI())
- return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),
- In, ISD::SETGT);
- return DAG.getSetCC(DL, VT, In, getZeroVector(InVT, Subtarget, DAG, DL),
- ISD::SETNE);
+ return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);
+ return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE);
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
@@ -17219,20 +18288,22 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
+ // If called by the legalizer just return.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
+ return SDValue();
+
if (VT.getVectorElementType() == MVT::i1)
return LowerTruncateVecI1(Op, DAG, Subtarget);
// vpmovqb/w/d, vpmovdb/w, vpmovwb
if (Subtarget.hasAVX512()) {
- // word to byte only under BWI
- if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) { // v16i16 -> v16i8
- // Make sure we're allowed to promote 512-bits.
- if (Subtarget.canExtendTo512DQ())
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
- } else {
+ // word to byte only under BWI. Otherwise we have to promoted to v16i32
+ // and then truncate that. But we should only do that if we haven't been
+ // asked to avoid 512-bit vectors. The actual promotion to v16i32 will be
+ // handled by isel patterns.
+ if (InVT != MVT::v16i16 || Subtarget.hasBWI() ||
+ Subtarget.canExtendTo512DQ())
return Op;
- }
}
unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16);
@@ -17241,8 +18312,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// Truncate with PACKUS if we are truncating a vector with leading zero bits
// that extend all the way to the packed/truncated value.
// Pre-SSE41 we can only use PACKUSWB.
- KnownBits Known;
- DAG.computeKnownBits(In, Known);
+ KnownBits Known = DAG.computeKnownBits(In);
if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros())
if (SDValue V =
truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget))
@@ -17320,6 +18390,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBitcast(MVT::v8i16, res);
}
+ if (VT == MVT::v16i8 && InVT == MVT::v16i16) {
+ // Use an AND to zero uppper bits for PACKUS.
+ In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT));
+
+ SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
+ DAG.getIntPtrConstant(8, DL));
+ return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi);
+ }
+
// Handle truncation of V256 to V128 using shuffles.
assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
@@ -17405,6 +18486,98 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
In, DAG.getUNDEF(SVT)));
}
+/// Horizontal vector math instructions may be slower than normal math with
+/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
+/// implementation, and likely shuffle complexity of the alternate sequence.
+static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ bool IsOptimizingSize = DAG.getMachineFunction().getFunction().optForSize();
+ bool HasFastHOps = Subtarget.hasFastHorizontalOps();
+ return !IsSingleSource || IsOptimizingSize || HasFastHOps;
+}
+
+/// Depending on uarch and/or optimizing for size, we might prefer to use a
+/// vector operation in place of the typical scalar operation.
+static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // If both operands have other uses, this is probably not profitable.
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ if (!LHS.hasOneUse() && !RHS.hasOneUse())
+ return Op;
+
+ // FP horizontal add/sub were added with SSE3. Integer with SSSE3.
+ bool IsFP = Op.getSimpleValueType().isFloatingPoint();
+ if (IsFP && !Subtarget.hasSSE3())
+ return Op;
+ if (!IsFP && !Subtarget.hasSSSE3())
+ return Op;
+
+ // Defer forming the minimal horizontal op if the vector source has more than
+ // the 2 extract element uses that we're matching here. In that case, we might
+ // form a horizontal op that includes more than 1 add/sub op.
+ if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ LHS.getOperand(0) != RHS.getOperand(0) ||
+ !LHS.getOperand(0)->hasNUsesOfValue(2, 0))
+ return Op;
+
+ if (!isa<ConstantSDNode>(LHS.getOperand(1)) ||
+ !isa<ConstantSDNode>(RHS.getOperand(1)) ||
+ !shouldUseHorizontalOp(true, DAG, Subtarget))
+ return Op;
+
+ // Allow commuted 'hadd' ops.
+ // TODO: Allow commuted (f)sub by negating the result of (F)HSUB?
+ unsigned HOpcode;
+ switch (Op.getOpcode()) {
+ case ISD::ADD: HOpcode = X86ISD::HADD; break;
+ case ISD::SUB: HOpcode = X86ISD::HSUB; break;
+ case ISD::FADD: HOpcode = X86ISD::FHADD; break;
+ case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
+ default:
+ llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
+ }
+ unsigned LExtIndex = LHS.getConstantOperandVal(1);
+ unsigned RExtIndex = RHS.getConstantOperandVal(1);
+ if (LExtIndex == 1 && RExtIndex == 0 &&
+ (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD))
+ std::swap(LExtIndex, RExtIndex);
+
+ // TODO: This can be extended to handle other adjacent extract pairs.
+ if (LExtIndex != 0 || RExtIndex != 1)
+ return Op;
+
+ SDValue X = LHS.getOperand(0);
+ EVT VecVT = X.getValueType();
+ unsigned BitWidth = VecVT.getSizeInBits();
+ assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) &&
+ "Not expecting illegal vector widths here");
+
+ // Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit
+ // equivalent, so extract the 256/512-bit source op to 128-bit.
+ // This is free: ymm/zmm -> xmm.
+ SDLoc DL(Op);
+ if (BitWidth == 256 || BitWidth == 512)
+ X = extract128BitVector(X, 0, DAG, DL);
+
+ // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0
+ // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0
+ // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0
+ SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp,
+ DAG.getIntPtrConstant(0, DL));
+}
+
+/// Depending on uarch and/or optimizing for size, we might prefer to use a
+/// vector operation in place of the typical scalar operation.
+static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
+ "Only expecting float/double");
+ return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
+}
+
/// The only differences between FABS and FNEG are the mask and the logic op.
/// FNEG also has a folding opportunity for FNEG(FABS(x)).
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
@@ -17424,43 +18597,36 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
bool IsF128 = (VT == MVT::f128);
+ assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
+ VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
+ VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
+ "Unexpected type in LowerFABSorFNEG");
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
// decide if we should generate a 16-byte constant mask when we only need 4 or
// 8 bytes for the scalar case.
- MVT LogicVT;
- MVT EltVT;
-
- if (VT.isVector()) {
- LogicVT = VT;
- EltVT = VT.getVectorElementType();
- } else if (IsF128) {
- // SSE instructions are used for optimized f128 logical operations.
- LogicVT = MVT::f128;
- EltVT = VT;
- } else {
- // There are no scalar bitwise logical SSE/AVX instructions, so we
- // generate a 16-byte vector constant and logic op even for the scalar case.
- // Using a 16-byte mask allows folding the load of the mask with
- // the logic op, so it can save (~4 bytes) on code size.
+ // There are no scalar bitwise logical SSE/AVX instructions, so we
+ // generate a 16-byte vector constant and logic op even for the scalar case.
+ // Using a 16-byte mask allows folding the load of the mask with
+ // the logic op, so it can save (~4 bytes) on code size.
+ bool IsFakeVector = !VT.isVector() && !IsF128;
+ MVT LogicVT = VT;
+ if (IsFakeVector)
LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
- EltVT = VT;
- }
- unsigned EltBits = EltVT.getSizeInBits();
+ unsigned EltBits = VT.getScalarSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
- APInt MaskElt =
- IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
- const fltSemantics &Sem =
- EltVT == MVT::f64 ? APFloat::IEEEdouble() :
- (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
+ APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) :
+ APInt::getSignMask(EltBits);
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
- unsigned LogicOp =
- IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
+ unsigned LogicOp = IsFABS ? X86ISD::FAND :
+ IsFNABS ? X86ISD::FOR :
+ X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
if (VT.isVector() || IsF128)
@@ -17496,10 +18662,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
"Unexpected type in LowerFCOPYSIGN");
- MVT EltVT = VT.getScalarType();
- const fltSemantics &Sem =
- EltVT == MVT::f64 ? APFloat::IEEEdouble()
- : (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
// Perform all scalar logic operations as 16-byte vectors because there are no
// scalar FP logic instructions in SSE.
@@ -17516,7 +18679,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue SignMask = DAG.getConstantFP(
APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
SDValue MagMask = DAG.getConstantFP(
- APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
+ APFloat(Sem, APInt::getSignedMaxValue(EltSizeInBits)), dl, LogicVT);
// First, clear all bits but the sign bit from the second operand (sign).
if (IsFakeVector)
@@ -17527,7 +18690,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// TODO: If we had general constant folding for FP logic ops, this check
// wouldn't be necessary.
SDValue MagBits;
- if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Mag)) {
+ if (ConstantFPSDNode *Op0CN = isConstOrConstSplatFP(Mag)) {
APFloat APF = Op0CN->getValueAPF();
APF.clearSign();
MagBits = DAG.getConstantFP(APF, dl, LogicVT);
@@ -17572,7 +18735,8 @@ static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
// Check whether an OR'd tree is PTEST-able.
static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ SDValue &X86CC) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget.hasSSE41())
@@ -17658,9 +18822,10 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
- SDValue Res = DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
- VecIns.back(), VecIns.back());
- return getSETCC(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, Res, DL, DAG);
+ X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE,
+ DL, MVT::i8);
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
+ VecIns.back(), VecIns.back());
}
/// return true if \c Op has a use that doesn't just read flags.
@@ -17684,8 +18849,8 @@ static bool hasNonFlagsUse(SDValue Op) {
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
-SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
- SelectionDAG &DAG) const {
+static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget) {
// CF and OF aren't always set the way we want. Determine which
// of these we need.
bool NeedCF = false;
@@ -17728,159 +18893,26 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
unsigned Opcode = 0;
unsigned NumOperands = 0;
- // Truncate operations may prevent the merge of the SETCC instruction
- // and the arithmetic instruction before it. Attempt to truncate the operands
- // of the arithmetic instruction and use a reduced bit-width instruction.
- bool NeedTruncation = false;
SDValue ArithOp = Op;
- if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
- SDValue Arith = Op->getOperand(0);
- // Both the trunc and the arithmetic op need to have one user each.
- if (Arith->hasOneUse())
- switch (Arith.getOpcode()) {
- default: break;
- case ISD::ADD:
- case ISD::SUB:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR: {
- NeedTruncation = true;
- ArithOp = Arith;
- }
- }
- }
-
- // Sometimes flags can be set either with an AND or with an SRL/SHL
- // instruction. SRL/SHL variant should be preferred for masks longer than this
- // number of bits.
- const int ShiftToAndMaxMaskWidth = 32;
- const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE);
// NOTICE: In the code below we use ArithOp to hold the arithmetic operation
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
- case ISD::ADD:
- // We only want to rewrite this as a target-specific node with attached
- // flags if there is a reasonable chance of either using that to do custom
- // instructions selection that can fold some of the memory operands, or if
- // only the flags are used. If there are other uses, leave the node alone
- // and emit a test instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg &&
- UI->getOpcode() != ISD::SETCC &&
- UI->getOpcode() != ISD::STORE)
- goto default_case;
-
- if (auto *C = dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
- // An add of one will be selected as an INC.
- if (C->isOne() &&
- (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- Opcode = X86ISD::INC;
- NumOperands = 1;
- break;
- }
-
- // An add of negative one (subtract of one) will be selected as a DEC.
- if (C->isAllOnesValue() &&
- (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- Opcode = X86ISD::DEC;
- NumOperands = 1;
- break;
- }
- }
-
- // Otherwise use a regular EFLAGS-setting add.
- Opcode = X86ISD::ADD;
- NumOperands = 2;
- break;
- case ISD::SHL:
- case ISD::SRL:
- // If we have a constant logical shift that's only used in a comparison
- // against zero turn it into an equivalent AND. This allows turning it into
- // a TEST instruction later.
- if (ZeroCheck && Op->hasOneUse() &&
- isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
- EVT VT = Op.getValueType();
- unsigned BitWidth = VT.getSizeInBits();
- unsigned ShAmt = Op->getConstantOperandVal(1);
- if (ShAmt >= BitWidth) // Avoid undefined shifts.
- break;
- APInt Mask = ArithOp.getOpcode() == ISD::SRL
- ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
- : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
- if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
- break;
- Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
- DAG.getConstant(Mask, dl, VT));
- }
- break;
-
case ISD::AND:
// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
- // because a TEST instruction will be better. However, AND should be
- // preferred if the instruction can be combined into ANDN.
- if (!hasNonFlagsUse(Op)) {
- SDValue Op0 = ArithOp->getOperand(0);
- SDValue Op1 = ArithOp->getOperand(1);
- EVT VT = ArithOp.getValueType();
- bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1);
- bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64;
- bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI();
-
- // If we cannot select an ANDN instruction, check if we can replace
- // AND+IMM64 with a shift before giving up. This is possible for masks
- // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag.
- if (!isProperAndn) {
- if (!ZeroCheck)
- break;
-
- assert(!isa<ConstantSDNode>(Op0) && "AND node isn't canonicalized");
- auto *CN = dyn_cast<ConstantSDNode>(Op1);
- if (!CN)
- break;
-
- const APInt &Mask = CN->getAPIntValue();
- if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
- break; // Prefer TEST instruction.
-
- unsigned BitWidth = Mask.getBitWidth();
- unsigned LeadingOnes = Mask.countLeadingOnes();
- unsigned TrailingZeros = Mask.countTrailingZeros();
-
- if (LeadingOnes + TrailingZeros == BitWidth) {
- assert(TrailingZeros < VT.getSizeInBits() &&
- "Shift amount should be less than the type width");
- MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
- SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy);
- Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt);
- break;
- }
-
- unsigned LeadingZeros = Mask.countLeadingZeros();
- unsigned TrailingOnes = Mask.countTrailingOnes();
-
- if (LeadingZeros + TrailingOnes == BitWidth) {
- assert(LeadingZeros < VT.getSizeInBits() &&
- "Shift amount should be less than the type width");
- MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
- SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy);
- Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt);
- break;
- }
+ // because a TEST instruction will be better.
+ if (!hasNonFlagsUse(Op))
+ break;
- break;
- }
- }
LLVM_FALLTHROUGH;
+ case ISD::ADD:
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
- // Similar to ISD::ADD above, check if the uses will preclude useful
- // lowering of the target-specific node.
+ // Transform to an x86-specific ALU node with flags if there is a chance of
+ // using an RMW op or only the flags are used. Otherwise, leave
+ // the node alone and emit a 'test' instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
@@ -17891,6 +18923,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
+ case ISD::ADD: Opcode = X86ISD::ADD; break;
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
@@ -17901,8 +18934,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
break;
case X86ISD::ADD:
case X86ISD::SUB:
- case X86ISD::INC:
- case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
@@ -17912,36 +18943,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
break;
}
- // If we found that truncation is beneficial, perform the truncation and
- // update 'Op'.
- if (NeedTruncation) {
- EVT VT = Op.getValueType();
- SDValue WideVal = Op->getOperand(0);
- EVT WideVT = WideVal.getValueType();
- unsigned ConvertedOp = 0;
- // Use a target machine opcode to prevent further DAGCombine
- // optimizations that may separate the arithmetic operations
- // from the setcc node.
- switch (WideVal.getOpcode()) {
- default: break;
- case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
- case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
- case ISD::AND: ConvertedOp = X86ISD::AND; break;
- case ISD::OR: ConvertedOp = X86ISD::OR; break;
- case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
- }
-
- if (ConvertedOp) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
- SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
- SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- Op = DAG.getNode(ConvertedOp, dl, VTs, V0, V1);
- }
- }
- }
-
if (Opcode == 0) {
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
@@ -17960,17 +18961,17 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
const SDLoc &dl, SelectionDAG &DAG) const {
if (isNullConstant(Op1))
- return EmitTest(Op0, X86CC, dl, DAG);
-
- assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
- "Unexpected comparison operation for MVT::i1 operands");
+ return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
// Only promote the compare up to I32 if it is a 16 bit operation
// with an immediate. 16 bit immediates are to be avoided.
- if ((Op0.getValueType() == MVT::i16 &&
- (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1))) &&
+ if (Op0.getValueType() == MVT::i16 &&
+ ((isa<ConstantSDNode>(Op0) &&
+ !cast<ConstantSDNode>(Op0)->getAPIntValue().isSignedIntN(8)) ||
+ (isa<ConstantSDNode>(Op1) &&
+ !cast<ConstantSDNode>(Op1)->getAPIntValue().isSignedIntN(8))) &&
!DAG.getMachineFunction().getFunction().optForMinSize() &&
!Subtarget.isAtom()) {
unsigned ExtendOp =
@@ -17983,6 +18984,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
return SDValue(Sub.getNode(), 1);
}
+ assert(Op0.getValueType().isFloatingPoint() && "Unexpected VT!");
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
@@ -18103,39 +19105,11 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
return 2;
}
-/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
-/// according to equal/not-equal condition code \p CC.
-static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG) {
- // If Src is i8, promote it to i32 with any_extend. There is no i8 BT
- // instruction. Since the shift amount is in-range-or-undefined, we know
- // that doing a bittest on the i32 value is ok. We extend to i32 because
- // the encoding for the i16 version is larger than the i32 version.
- // Also promote i16 to i32 for performance / code size reason.
- if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
- Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
-
- // See if we can use the 32-bit instruction instead of the 64-bit one for a
- // shorter encoding. Since the former takes the modulo 32 of BitNo and the
- // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
- // known to be zero.
- if (Src.getValueType() == MVT::i64 &&
- DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
- Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
-
- // If the operand types disagree, extend the shift amount to match. Since
- // BT ignores high bits (like shifts) we can use anyextend.
- if (Src.getValueType() != BitNo.getValueType())
- BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
-
- SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
- X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
- return getSETCC(Cond, BT, dl , DAG);
-}
-
/// Result of 'and' is compared against zero. Change to a BT node if possible.
+/// Returns the BT node and the condition code needed to use it.
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG) {
+ const SDLoc &dl, SelectionDAG &DAG,
+ SDValue &X86CC) {
assert(And.getOpcode() == ISD::AND && "Expected AND node!");
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
@@ -18144,7 +19118,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
if (Op1.getOpcode() == ISD::TRUNCATE)
Op1 = Op1.getOperand(0);
- SDValue LHS, RHS;
+ SDValue Src, BitNo;
if (Op1.getOpcode() == ISD::SHL)
std::swap(Op0, Op1);
if (Op0.getOpcode() == ISD::SHL) {
@@ -18154,13 +19128,12 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
unsigned BitWidth = Op0.getValueSizeInBits();
unsigned AndBitWidth = And.getValueSizeInBits();
if (BitWidth > AndBitWidth) {
- KnownBits Known;
- DAG.computeKnownBits(Op0, Known);
+ KnownBits Known = DAG.computeKnownBits(Op0);
if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth)
return SDValue();
}
- LHS = Op1;
- RHS = Op0.getOperand(1);
+ Src = Op1;
+ BitNo = Op0.getOperand(1);
}
} else if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
@@ -18168,24 +19141,49 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
SDValue AndLHS = Op0;
if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
- LHS = AndLHS.getOperand(0);
- RHS = AndLHS.getOperand(1);
+ Src = AndLHS.getOperand(0);
+ BitNo = AndLHS.getOperand(1);
} else {
// Use BT if the immediate can't be encoded in a TEST instruction or we
// are optimizing for size and the immedaite won't fit in a byte.
bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
isPowerOf2_64(AndRHSVal)) {
- LHS = AndLHS;
- RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, LHS.getValueType());
+ Src = AndLHS;
+ BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl,
+ Src.getValueType());
}
}
}
- if (LHS.getNode())
- return getBitTestCondition(LHS, RHS, CC, dl, DAG);
+ // No patterns found, give up.
+ if (!Src.getNode())
+ return SDValue();
- return SDValue();
+ // If Src is i8, promote it to i32 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i32 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ // Also promote i16 to i32 for performance / code size reason.
+ if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
+ Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
+
+ // See if we can use the 32-bit instruction instead of the 64-bit one for a
+ // shorter encoding. Since the former takes the modulo 32 of BitNo and the
+ // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
+ // known to be zero.
+ if (Src.getValueType() == MVT::i64 &&
+ DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
+ Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (Src.getValueType() != BitNo.getValueType())
+ BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
+
+ X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
+ dl, MVT::i8);
+ return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
}
/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
@@ -18292,34 +19290,32 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
}
-/// Try to turn a VSETULT into a VSETULE by modifying its second
-/// operand \p Op1. If non-trivial (for example because it's not constant)
-/// return an empty value.
-static SDValue ChangeVSETULTtoVSETULE(const SDLoc &dl, SDValue Op1,
- SelectionDAG &DAG) {
- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1.getNode());
+/// Given a simple buildvector constant, return a new vector constant with each
+/// element decremented. If decrementing would result in underflow or this
+/// is not a simple vector constant, return an empty value.
+static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) {
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
if (!BV)
return SDValue();
- MVT VT = Op1.getSimpleValueType();
- MVT EVT = VT.getVectorElementType();
- unsigned n = VT.getVectorNumElements();
- SmallVector<SDValue, 8> ULTOp1;
-
- for (unsigned i = 0; i < n; ++i) {
- ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
- if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT)
+ MVT VT = V.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> NewVecC;
+ SDLoc DL(V);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT)
return SDValue();
// Avoid underflow.
- APInt Val = Elt->getAPIntValue();
- if (Val == 0)
+ if (Elt->getAPIntValue().isNullValue())
return SDValue();
- ULTOp1.push_back(DAG.getConstant(Val - 1, dl, EVT));
+ NewVecC.push_back(DAG.getConstant(Elt->getAPIntValue() - 1, DL, EltVT));
}
- return DAG.getBuildVector(VT, dl, ULTOp1);
+ return DAG.getBuildVector(VT, DL, NewVecC);
}
/// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
@@ -18348,7 +19344,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget.hasAVX())
return SDValue();
- SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG);
+ SDValue ULEOp1 = decrementVectorConstant(Op1, DAG);
if (!ULEOp1)
return SDValue();
Op1 = ULEOp1;
@@ -18362,9 +19358,9 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
break;
}
- SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1);
+ SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1);
return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
- getZeroVector(VT, Subtarget, DAG, dl));
+ DAG.getConstant(0, dl, VT));
}
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
@@ -18527,13 +19523,26 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
!(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));
- // Special case: Use min/max operations for unsigned compares. We only want
- // to do this for unsigned compares if we need to flip signs or if it allows
- // use to avoid an invert.
+ // Special case: Use min/max operations for unsigned compares.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (ISD::isUnsignedIntSetCC(Cond) &&
(FlipSigns || ISD::isTrueWhenEqual(Cond)) &&
TLI.isOperationLegal(ISD::UMIN, VT)) {
+ // If we have a constant operand, increment/decrement it and change the
+ // condition to avoid an invert.
+ // TODO: This could be extended to handle a non-splat constant by checking
+ // that each element of the constant is not the max/null value.
+ APInt C;
+ if (Cond == ISD::SETUGT && isConstantSplat(Op1, C) && !C.isMaxValue()) {
+ // X > C --> X >= (C+1) --> X == umax(X, C+1)
+ Op1 = DAG.getConstant(C + 1, dl, VT);
+ Cond = ISD::SETUGE;
+ }
+ if (Cond == ISD::SETULT && isConstantSplat(Op1, C) && !C.isNullValue()) {
+ // X < C --> X <= (C-1) --> X == umin(X, C-1)
+ Op1 = DAG.getConstant(C - 1, dl, VT);
+ Cond = ISD::SETULE;
+ }
bool Invert = false;
unsigned Opc;
switch (Cond) {
@@ -18577,23 +19586,21 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
assert(Subtarget.hasSSE2() && "Don't know how to lower!");
- // First cast everything to the right type.
- Op0 = DAG.getBitcast(MVT::v4i32, Op0);
- Op1 = DAG.getBitcast(MVT::v4i32, Op1);
-
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
// compare is always unsigned.
SDValue SB;
if (FlipSigns) {
- SB = DAG.getConstant(0x80000000U, dl, MVT::v4i32);
+ SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64);
} else {
- SDValue Sign = DAG.getConstant(0x80000000U, dl, MVT::i32);
- SDValue Zero = DAG.getConstant(0x00000000U, dl, MVT::i32);
- SB = DAG.getBuildVector(MVT::v4i32, dl, {Sign, Zero, Sign, Zero});
+ SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64);
}
- Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
- Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
+ Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB);
+ Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB);
+
+ // Cast everything to the right type.
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
// Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
@@ -18658,10 +19665,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
return Result;
}
-// Try to select this as a KTEST+SETCC if possible.
-static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+// Try to select this as a KORTEST+SETCC if possible.
+static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ SDValue &X86CC) {
// Only support equality comparisons.
if (CC != ISD::SETEQ && CC != ISD::SETNE)
return SDValue();
@@ -18677,12 +19685,12 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
!(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
return SDValue();
- X86::CondCode X86CC;
+ X86::CondCode X86Cond;
if (isNullConstant(Op1)) {
- X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+ X86Cond = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
} else if (isAllOnesConstant(Op1)) {
// C flag is set for all ones.
- X86CC = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
+ X86Cond = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
} else
return SDValue();
@@ -18694,70 +19702,87 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
RHS = Op0.getOperand(1);
}
- SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
- return getSETCC(X86CC, KORTEST, dl, DAG);
+ X86CC = DAG.getConstant(X86Cond, dl, MVT::i8);
+ return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
}
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
- MVT VT = Op.getSimpleValueType();
-
- if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
-
- assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDLoc dl(Op);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
+/// Emit flags for the given setcc condition and operands. Also returns the
+/// corresponding X86 condition code constant in X86CC.
+SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
+ ISD::CondCode CC, const SDLoc &dl,
+ SelectionDAG &DAG,
+ SDValue &X86CC) const {
// Optimize to BT if possible.
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (SDValue NewSetCC = LowerAndToBT(Op0, CC, dl, DAG))
- return NewSetCC;
+ if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC))
+ return BT;
}
// Try to use PTEST for a tree ORs equality compared with 0.
// TODO: We could do AND tree with all 1s as well by using the C flag.
if (Op0.getOpcode() == ISD::OR && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (SDValue NewSetCC = LowerVectorAllZeroTest(Op0, CC, Subtarget, DAG))
- return NewSetCC;
+ if (SDValue PTEST = LowerVectorAllZeroTest(Op0, CC, Subtarget, DAG, X86CC))
+ return PTEST;
}
- // Try to lower using KTEST.
- if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget))
- return NewSetCC;
+ // Try to lower using KORTEST.
+ if (SDValue KORTEST = EmitKORTEST(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
+ return KORTEST;
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
-
// If the input is a setcc, then reuse the input setcc or use a new one with
// the inverted condition.
if (Op0.getOpcode() == X86ISD::SETCC) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
- if (!Invert)
- return Op0;
- CCode = X86::GetOppositeBranchCondition(CCode);
- return getSETCC(CCode, Op0.getOperand(1), dl, DAG);
+ X86CC = Op0.getOperand(0);
+ if (Invert) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ X86CC = DAG.getConstant(CCode, dl, MVT::i8);
+ }
+
+ return Op0.getOperand(1);
}
}
bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
- X86::CondCode X86CC = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
- if (X86CC == X86::COND_INVALID)
+ X86::CondCode CondCode = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
+ if (CondCode == X86::COND_INVALID)
return SDValue();
- SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
+ SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
- return getSETCC(X86CC, EFLAGS, dl, DAG);
+ X86CC = DAG.getConstant(CondCode, dl, MVT::i8);
+ return EFLAGS;
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ MVT VT = Op.getSimpleValueType();
+
+ if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+
+ assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDLoc dl(Op);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ SDValue X86CC;
+ SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
+ if (!EFLAGS)
+ return SDValue();
+
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
}
SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
@@ -18781,6 +19806,70 @@ SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const
return getSETCC(CC, Cmp.getValue(1), DL, DAG);
}
+// This function returns three things: the arithmetic computation itself
+// (Value), an EFLAGS result (Overflow), and a condition code (Cond). The
+// flag and the condition code define the case in which the arithmetic
+// computation overflows.
+static std::pair<SDValue, SDValue>
+getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getResNo() == 0 && "Unexpected result number!");
+ SDValue Value, Overflow;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ unsigned BaseOp = 0;
+ SDLoc DL(Op);
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown ovf instruction!");
+ case ISD::SADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SSUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_O;
+ break;
+ case ISD::USUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SMULO:
+ BaseOp = X86ISD::SMUL;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UMULO:
+ BaseOp = X86ISD::UMUL;
+ Cond = X86::COND_O;
+ break;
+ }
+
+ if (BaseOp) {
+ // Also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ Value = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+ Overflow = Value.getValue(1);
+ }
+
+ return std::make_pair(Value, Overflow);
+}
+
+static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+ // looks for this combo and may remove the "setcc" instruction if the "setcc"
+ // has only one use.
+ SDLoc DL(Op);
+ X86::CondCode Cond;
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG);
+
+ SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG);
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC);
+}
+
/// Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getOpcode();
@@ -18789,12 +19878,8 @@ static bool isX86LogicalCmp(SDValue Op) {
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
- Opc == X86ISD::SBB || Opc == X86ISD::SMUL ||
- Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
- Opc == X86ISD::XOR || Opc == X86ISD::AND))
- return true;
-
- if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+ Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
+ Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND))
return true;
return false;
@@ -18845,7 +19930,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// of 3 logic instructions for size savings and potentially speed.
// Unfortunately, there is no scalar form of VBLENDV.
- // If either operand is a constant, don't try this. We can expect to
+ // If either operand is a +0.0 constant, don't try this. We can expect to
// optimize away at least one of the logic instructions later in that
// case, so that sequence would be faster than a variable blend.
@@ -18853,13 +19938,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// uses XMM0 as the selection register. That may need just as many
// instructions as the AND/ANDN/OR sequence due to register moves, so
// don't bother.
-
- if (Subtarget.hasAVX() &&
- !isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
-
+ if (Subtarget.hasAVX() && !isNullFPConstant(Op1) &&
+ !isNullFPConstant(Op2)) {
// Convert to vectors, do a VSELECT, and convert back to scalar.
// All of the conversions should be optimized away.
-
MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
@@ -18919,16 +20001,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (VT == MVT::v4i1 || VT == MVT::v2i1) {
- SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
- Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
- DAG.getUNDEF(MVT::v8i1), Op1, zeroConst);
- Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
- DAG.getUNDEF(MVT::v8i1), Op2, zeroConst);
- SDValue newSelect = DAG.getSelect(DL, MVT::v8i1, Cond, Op1, Op2);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
- }
-
if (Cond.getOpcode() == ISD::SETCC) {
if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
Cond = NewCond;
@@ -18963,22 +20035,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x == 0), 0, -1) -> neg & sbb
if (isNullConstant(Y) &&
(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
- SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
- SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- SDValue(Neg.getNode(), 1));
- return Res;
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ Zero = DAG.getConstant(0, DL, Op.getValueType());
+ return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
}
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
SDValue Res = // Res = 0 or -1.
- DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
+ DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
@@ -19055,34 +20126,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
} else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
- ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
- Cond.getOperand(0).getValueType() != MVT::i8)) {
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- unsigned X86Opcode;
- unsigned X86Cond;
- SDVTList VTs;
- switch (CondOpcode) {
- case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
- case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
- case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
- case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
- case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
- case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
- default: llvm_unreachable("unexpected overflowing operator");
- }
- if (CondOpcode == ISD::UMULO)
- VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
- MVT::i32);
- else
- VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
-
- SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
-
- if (CondOpcode == ISD::UMULO)
- Cond = X86Op.getValue(2);
- else
- Cond = X86Op.getValue(1);
+ CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
+ SDValue Value;
+ X86::CondCode X86Cond;
+ std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
CC = DAG.getConstant(X86Cond, DL, MVT::i8);
AddTest = false;
@@ -19096,9 +20143,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- if (SDValue NewSetCC = LowerAndToBT(Cond, ISD::SETNE, DL, DAG)) {
- CC = NewSetCC.getOperand(0);
- Cond = NewSetCC.getOperand(1);
+ SDValue BTCC;
+ if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) {
+ CC = BTCC;
+ Cond = BT;
AddTest = false;
}
}
@@ -19106,7 +20154,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (AddTest) {
CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8);
- Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
+ Cond = EmitCmp(Cond, DAG.getConstant(0, DL, Cond.getValueType()),
+ X86::COND_NE, DL, DAG);
}
// a < b ? -1 : 0 -> RES = ~setcc_carry
@@ -19171,12 +20220,12 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
unsigned NumElts = VT.getVectorNumElements();
- // Extend VT if the scalar type is v8/v16 and BWI is not supported.
+ // Extend VT if the scalar type is i8/i16 and BWI is not supported.
MVT ExtVT = VT;
if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) {
// If v16i32 is to be avoided, we'll need to split and concatenate.
if (NumElts == 16 && !Subtarget.canExtendTo512DQ())
- return SplitAndExtendv16i1(ISD::SIGN_EXTEND, VT, In, dl, DAG);
+ return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG);
ExtVT = MVT::getVectorVT(MVT::i32, NumElts);
}
@@ -19195,10 +20244,10 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
MVT WideEltVT = WideVT.getVectorElementType();
if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) ||
(Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) {
- V = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, In);
+ V = DAG.getNode(Op.getOpcode(), dl, WideVT, In);
} else {
- SDValue NegOne = getOnesVector(WideVT, DAG, dl);
- SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, dl);
+ SDValue NegOne = DAG.getConstant(-1, dl, WideVT);
+ SDValue Zero = DAG.getConstant(0, dl, WideVT);
V = DAG.getSelect(dl, WideVT, In, NegOne, Zero);
}
@@ -19238,7 +20287,6 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
SDValue In = Op->getOperand(0);
MVT VT = Op->getSimpleValueType(0);
MVT InVT = In.getSimpleValueType();
- assert(VT.getSizeInBits() == InVT.getSizeInBits());
MVT SVT = VT.getVectorElementType();
MVT InSVT = InVT.getVectorElementType();
@@ -19249,70 +20297,100 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
return SDValue();
if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
- !(VT.is256BitVector() && Subtarget.hasInt256()) &&
+ !(VT.is256BitVector() && Subtarget.hasAVX()) &&
!(VT.is512BitVector() && Subtarget.hasAVX512()))
return SDValue();
SDLoc dl(Op);
+ unsigned Opc = Op.getOpcode();
+ unsigned NumElts = VT.getVectorNumElements();
// For 256-bit vectors, we only need the lower (128-bit) half of the input.
// For 512-bit vectors, we need 128-bits or 256-bits.
- if (VT.getSizeInBits() > 128) {
+ if (InVT.getSizeInBits() > 128) {
// Input needs to be at least the same number of elements as output, and
// at least 128-bits.
- int InSize = InSVT.getSizeInBits() * VT.getVectorNumElements();
+ int InSize = InSVT.getSizeInBits() * NumElts;
In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128));
+ InVT = In.getSimpleValueType();
}
- assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG ||
- InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");
-
- // SSE41 targets can use the pmovsx* instructions directly for 128-bit results,
+ // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results,
// so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
// need to be handled here for 256/512-bit results.
if (Subtarget.hasInt256()) {
assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
- unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
- X86ISD::VSEXT : X86ISD::VZEXT;
+
+ if (InVT.getVectorNumElements() != NumElts)
+ return DAG.getNode(Op.getOpcode(), dl, VT, In);
+
+ // FIXME: Apparently we create inreg operations that could be regular
+ // extends.
+ unsigned ExtOpc =
+ Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, VT, In);
}
+ // pre-AVX2 256-bit extensions need to be split into 128-bit instructions.
+ if (Subtarget.hasAVX()) {
+ assert(VT.is256BitVector() && "256-bit vector expected");
+ int HalfNumElts = NumElts / 2;
+ MVT HalfVT = MVT::getVectorVT(SVT, HalfNumElts);
+
+ unsigned NumSrcElts = InVT.getVectorNumElements();
+ SmallVector<int, 16> HiMask(NumSrcElts, SM_SentinelUndef);
+ for (int i = 0; i != HalfNumElts; ++i)
+ HiMask[i] = HalfNumElts + i;
+
+ SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In);
+ SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask);
+ Hi = DAG.getNode(Opc, dl, HalfVT, Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ }
+
// We should only get here for sign extend.
- assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
- "Unexpected opcode!");
+ assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!");
+ assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs");
// pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
SDValue Curr = In;
- MVT CurrVT = InVT;
+ SDValue SignExt = Curr;
// As SRAI is only available on i16/i32 types, we expand only up to i32
// and handle i64 separately.
- while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
- Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
- MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
- CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
- Curr = DAG.getBitcast(CurrVT, Curr);
- }
+ if (InVT != MVT::v4i32) {
+ MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
- SDValue SignExt = Curr;
- if (CurrVT != InVT) {
- unsigned SignExtShift =
- CurrVT.getScalarSizeInBits() - InSVT.getSizeInBits();
- SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
+ unsigned DestWidth = DestVT.getScalarSizeInBits();
+ unsigned Scale = DestWidth / InSVT.getSizeInBits();
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned DestElts = DestVT.getVectorNumElements();
+
+ // Build a shuffle mask that takes each input element and places it in the
+ // MSBs of the new element size.
+ SmallVector<int, 16> Mask(InNumElts, SM_SentinelUndef);
+ for (unsigned i = 0; i != DestElts; ++i)
+ Mask[i * Scale + (Scale - 1)] = i;
+
+ Curr = DAG.getVectorShuffle(InVT, dl, In, In, Mask);
+ Curr = DAG.getBitcast(DestVT, Curr);
+
+ unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();
+ SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr,
DAG.getConstant(SignExtShift, dl, MVT::i8));
}
- if (CurrVT == VT)
- return SignExt;
-
- if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
- SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
- DAG.getConstant(31, dl, MVT::i8));
- SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
- return DAG.getBitcast(VT, Ext);
+ if (VT == MVT::v2i64) {
+ assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT");
+ SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
+ SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT);
+ SignExt = DAG.getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5});
+ SignExt = DAG.getBitcast(VT, SignExt);
}
- return SDValue();
+ return SignExt;
}
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
@@ -19337,38 +20415,40 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
InVT.getVectorElementType() == MVT::i32) &&
"Unexpected element type");
+ // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
+ if (InVT == MVT::v8i8) {
+ if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+ return SDValue();
+
+ In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
+ MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, VT, In);
+ }
+
if (Subtarget.hasInt256())
- return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+ return Op;
// Optimize vectors in AVX mode
// Sign extend v8i16 to v8i32 and
// v4i32 to v4i64
//
// Divide input vector into two parts
- // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // for v4i32 the high shuffle mask will be {2, 3, -1, -1}
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
// concat the vectors to original VT
- unsigned NumElems = InVT.getVectorNumElements();
- SDValue Undef = DAG.getUNDEF(InVT);
-
- SmallVector<int,8> ShufMask1(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask1[i] = i;
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
- SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask1);
+ SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);
- SmallVector<int,8> ShufMask2(NumElems, -1);
+ unsigned NumElems = InVT.getVectorNumElements();
+ SmallVector<int,8> ShufMask(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask2[i] = i + NumElems/2;
+ ShufMask[i] = i + NumElems/2;
- SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask2);
-
- MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
-
- OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
- OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
+ SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
+ OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
@@ -19379,19 +20459,47 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(St);
SDValue StoredVal = St->getValue();
- // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
- assert(StoredVal.getValueType().isVector() &&
- StoredVal.getValueType().getVectorElementType() == MVT::i1 &&
- StoredVal.getValueType().getVectorNumElements() <= 8 &&
- "Unexpected VT");
- assert(!St->isTruncatingStore() && "Expected non-truncating store");
- assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
- "Expected AVX512F without AVX512DQI");
+ // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
+ if (StoredVal.getValueType().isVector() &&
+ StoredVal.getValueType().getVectorElementType() == MVT::i1) {
+ assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
+ "Unexpected VT");
+ assert(!St->isTruncatingStore() && "Expected non-truncating store");
+ assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
+ "Expected AVX512F without AVX512DQI");
+
+ StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+ DAG.getUNDEF(MVT::v16i1), StoredVal,
+ DAG.getIntPtrConstant(0, dl));
+ StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
+ StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
+
+ return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
+ }
+
+ if (St->isTruncatingStore())
+ return SDValue();
- StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
- DAG.getUNDEF(MVT::v8i1), StoredVal,
+ MVT StoreVT = StoredVal.getSimpleValueType();
+ assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
+ "Unexpected VT");
+ if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
+ TargetLowering::TypeWidenVector)
+ return SDValue();
+
+ // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
+ // and store it.
+ MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
+ StoreVT.getVectorNumElements() * 2);
+ StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
+ DAG.getUNDEF(StoreVT));
+ MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
+ MVT CastVT = MVT::getVectorVT(StVT, 2);
+ StoredVal = DAG.getBitcast(CastVT, StoredVal);
+ StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
DAG.getIntPtrConstant(0, dl));
- StoredVal = DAG.getBitcast(MVT::i8, StoredVal);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
@@ -19400,7 +20508,7 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
// Lower vector extended loads using a shuffle. If SSSE3 is not available we
// may emit an illegal shuffle but the expansion is still better than scalar
-// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
+// code. We generate sext/sext_invec for SEXTLOADs if it's available, otherwise
// we'll emit a shuffle and a arithmetic shift.
// FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
// TODO: It is possible to support ZExt by zeroing the undef values during
@@ -19408,16 +20516,16 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT RegVT = Op.getSimpleValueType();
- assert(RegVT.isVector() && "We only custom lower vector sext loads.");
+ assert(RegVT.isVector() && "We only custom lower vector loads.");
assert(RegVT.isInteger() &&
- "We only custom lower integer vector sext loads.");
+ "We only custom lower integer vector loads.");
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
EVT MemVT = Ld->getMemoryVT();
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
- if (RegVT.isVector() && RegVT.getVectorElementType() == MVT::i1) {
+ if (RegVT.getVectorElementType() == MVT::i1) {
assert(EVT(RegVT) == MemVT && "Expected non-extending load");
assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
@@ -19429,12 +20537,12 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
// Replace chain users with the new chain.
assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewLd.getValue(1));
- SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
- DAG.getBitcast(MVT::v8i1, NewLd),
- DAG.getIntPtrConstant(0, dl));
- return DAG.getMergeValues({Extract, NewLd.getValue(1)}, dl);
+ SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd);
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
+ DAG.getBitcast(MVT::v16i1, Val),
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl);
}
// Nothing useful we can do without SSE2 shuffles.
@@ -19490,10 +20598,10 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
// Finally, do a normal sign-extend to the desired register.
- return DAG.getSExtOrTrunc(Load, dl, RegVT);
+ SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT);
+ return DAG.getMergeValues({SExt, Load.getValue(1)}, dl);
}
// All sizes must be a power of two.
@@ -19521,26 +20629,26 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
"Can only lower sext loads with a single scalar load!");
- unsigned loadRegZize = RegSz;
+ unsigned loadRegSize = RegSz;
if (Ext == ISD::SEXTLOAD && RegSz >= 256)
- loadRegZize = 128;
+ loadRegSize = 128;
// If we don't have BWI we won't be able to create the shuffle needed for
// v8i8->v8i64.
if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
MemVT == MVT::v8i8)
- loadRegZize = 128;
+ loadRegSize = 128;
// Represent our vector as a sequence of elements which are the
// largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(
- *DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits());
+ *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits());
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
EVT WideVecVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegZize / MemVT.getScalarSizeInBits());
+ loadRegSize / MemVT.getScalarSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
@@ -19551,15 +20659,20 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
SmallVector<SDValue, 8> Chains;
SDValue Ptr = Ld->getBasePtr();
- SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl,
+ unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8;
+ SDValue Increment = DAG.getConstant(OffsetInc, dl,
TLI.getPointerTy(DAG.getDataLayout()));
SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
+ unsigned Offset = 0;
for (unsigned i = 0; i < NumLoads; ++i) {
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset);
+
// Perform a single load.
SDValue ScalarLoad =
- DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
- Ld->getAlignment(), Ld->getMemOperand()->getFlags());
+ DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo().getWithOffset(Offset),
+ NewAlign, Ld->getMemOperand()->getFlags());
Chains.push_back(ScalarLoad.getValue(1));
// Create the first element type using SCALAR_TO_VECTOR in order to avoid
// another round of DAGCombining.
@@ -19570,6 +20683,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
ScalarLoad, DAG.getIntPtrConstant(i, dl));
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ Offset += OffsetInc;
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
@@ -19580,28 +20694,14 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
unsigned SizeRatio = RegSz / MemSz;
if (Ext == ISD::SEXTLOAD) {
- // If we have SSE4.1, we can directly emit a VSEXT node.
- if (Subtarget.hasSSE41()) {
- SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
- return Sext;
- }
-
- // Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest
- // lanes.
- assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) &&
- "We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!");
-
- SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
- return Shuff;
+ SDValue Sext = getExtendInVec(/*Signed*/true, dl, RegVT, SlicedVec, DAG);
+ return DAG.getMergeValues({Sext, TF}, dl);
}
if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
MemVT == MVT::v8i8) {
- SDValue Sext = getExtendInVec(X86ISD::VZEXT, dl, RegVT, SlicedVec, DAG);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
- return Sext;
+ SDValue Sext = getExtendInVec(/*Signed*/false, dl, RegVT, SlicedVec, DAG);
+ return DAG.getMergeValues({Sext, TF}, dl);
}
// Redistribute the loaded elements into the different locations.
@@ -19614,8 +20714,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
// Bitcast to the requested type.
Shuff = DAG.getBitcast(RegVT, Shuff);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
- return Shuff;
+ return DAG.getMergeValues({Shuff, TF}, dl);
}
/// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes
@@ -19712,49 +20811,13 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
CondOpcode = Cond.getOpcode();
if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
- ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
- Cond.getOperand(0).getValueType() != MVT::i8)) {
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- unsigned X86Opcode;
- unsigned X86Cond;
- SDVTList VTs;
- // Keep this in sync with LowerXALUO, otherwise we might create redundant
- // instructions that can't be removed afterwards (i.e. X86ISD::ADD and
- // X86ISD::INC).
- switch (CondOpcode) {
- case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
- case ISD::SADDO:
- if (isOneConstant(RHS)) {
- X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
- break;
- }
- X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
- case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
- case ISD::SSUBO:
- if (isOneConstant(RHS)) {
- X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
- break;
- }
- X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
- case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
- case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
- default: llvm_unreachable("unexpected overflowing operator");
- }
- if (Inverted)
- X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
- if (CondOpcode == ISD::UMULO)
- VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
- MVT::i32);
- else
- VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
-
- SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
+ CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
+ SDValue Value;
+ X86::CondCode X86Cond;
+ std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
- if (CondOpcode == ISD::UMULO)
- Cond = X86Op.getValue(2);
- else
- Cond = X86Op.getValue(1);
+ if (Inverted)
+ X86Cond = X86::GetOppositeBranchCondition(X86Cond);
CC = DAG.getConstant(X86Cond, dl, MVT::i8);
addTest = false;
@@ -19855,34 +20918,17 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
} else if (Cond.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
// For FCMP_UNE, we can emit
- // two branches instead of an explicit AND instruction with a
- // separate test. However, we only do this if this block doesn't
- // have a fall-through edge, because this requires an explicit
- // jmp when the condition is false.
- if (Op.getNode()->hasOneUse()) {
- SDNode *User = *Op.getNode()->use_begin();
- // Look for an unconditional branch following this conditional branch.
- // We need this because we need to reverse the successors in order
- // to implement FCMP_UNE.
- if (User->getOpcode() == ISD::BR) {
- SDValue FalseBB = User->getOperand(1);
- SDNode *NewBR =
- DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
- assert(NewBR == User);
- (void)NewBR;
-
- SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
- Cond.getOperand(0), Cond.getOperand(1));
- Cmp = ConvertCmpIfNecessary(Cmp, DAG);
- CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
- Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
- Chain, Dest, CC, Cmp);
- CC = DAG.getConstant(X86::COND_NP, dl, MVT::i8);
- Cond = Cmp;
- addTest = false;
- Dest = FalseBB;
- }
- }
+ // two branches instead of an explicit OR instruction with a
+ // separate test.
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ Cond.getOperand(0), Cond.getOperand(1));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+ CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
}
}
@@ -19894,9 +20940,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- if (SDValue NewSetCC = LowerAndToBT(Cond, ISD::SETNE, dl, DAG)) {
- CC = NewSetCC.getOperand(0);
- Cond = NewSetCC.getOperand(1);
+ SDValue BTCC;
+ if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, dl, DAG, BTCC)) {
+ CC = BTCC;
+ Cond = BT;
addTest = false;
}
}
@@ -19905,7 +20952,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (addTest) {
X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
CC = DAG.getConstant(X86Cond, dl, MVT::i8);
- Cond = EmitTest(Cond, X86Cond, dl, DAG);
+ Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()),
+ X86Cond, dl, DAG);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -20141,6 +21189,25 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// Helper to get immediate/variable SSE shift opcode from other shift opcodes.
+static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable) {
+ switch (Opc) {
+ case ISD::SHL:
+ case X86ISD::VSHL:
+ case X86ISD::VSHLI:
+ return IsVariable ? X86ISD::VSHL : X86ISD::VSHLI;
+ case ISD::SRL:
+ case X86ISD::VSRL:
+ case X86ISD::VSRLI:
+ return IsVariable ? X86ISD::VSRL : X86ISD::VSRLI;
+ case ISD::SRA:
+ case X86ISD::VSRA:
+ case X86ISD::VSRAI:
+ return IsVariable ? X86ISD::VSRA : X86ISD::VSRAI;
+ }
+ llvm_unreachable("Unknown target vector shift node");
+}
+
/// Handle vector element shifts where the shift amount is a constant.
/// Takes immediate version of shift as input.
static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
@@ -20236,46 +21303,57 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
CShAmt->getZExtValue(), DAG);
- // Change opcode to non-immediate version
- switch (Opc) {
- default: llvm_unreachable("Unknown target vector shift node");
- case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
- case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
- case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
- }
+ // Change opcode to non-immediate version.
+ Opc = getTargetVShiftUniformOpcode(Opc, true);
// Need to build a vector containing shift amount.
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- // +=================+============+=======================================+
- // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
- // +=================+============+=======================================+
- // | i64 | Yes, No | Use ShAmt as lowest elt |
- // | i32 | Yes | zero-extend in-reg |
- // | (i32 zext(i16)) | Yes | zero-extend in-reg |
- // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
- // +=================+============+=======================================+
+ // +====================+============+=======================================+
+ // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
+ // +====================+============+=======================================+
+ // | i64 | Yes, No | Use ShAmt as lowest elt |
+ // | i32 | Yes | zero-extend in-reg |
+ // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg |
+ // | (i32 zext(i16/i8)) | No | byte-shift-in-reg |
+ // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
+ // +====================+============+=======================================+
if (SVT == MVT::i64)
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
- else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
- ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
+ else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 ||
+ ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) {
ShAmt = ShAmt.getOperand(0);
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
- ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
+ MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
+ if (Subtarget.hasSSE41())
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
+ MVT::v2i64, ShAmt);
+ else {
+ SDValue ByteShift = DAG.getConstant(
+ (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
+ ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
+ ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
+ ByteShift);
+ ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
+ ByteShift);
+ }
} else if (Subtarget.hasSSE41() &&
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
- ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
+ MVT::v2i64, ShAmt);
} else {
- SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT),
- DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
+ SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
+ DAG.getUNDEF(SVT)};
ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType();
- MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+ MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());
ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
@@ -20292,11 +21370,7 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
if (X86::isZeroNode(Mask))
return DAG.getConstant(0, dl, MaskVT);
- if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
- // Mask should be extended
- Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
- MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
- }
+ assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!");
if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
assert(MaskVT == MVT::v64i1 && "Expected v64i1 mask!");
@@ -20340,24 +21414,6 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- switch (Op.getOpcode()) {
- default: break;
- case X86ISD::CMPM:
- case X86ISD::CMPM_RND:
- case X86ISD::VPSHUFBITQMB:
- case X86ISD::VFPCLASS:
- return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
- case ISD::TRUNCATE:
- case X86ISD::VTRUNC:
- case X86ISD::VTRUNCS:
- case X86ISD::VTRUNCUS:
- case X86ISD::CVTPS2PH:
- // We can't use ISD::VSELECT here because it is not always "Legal"
- // for the destination type. For example vpmovqb require only AVX512
- // and vselect that can operate on byte element type require BWI
- OpcodeSelect = X86ISD::SELECT;
- break;
- }
if (PreservedSrc.isUndef())
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
@@ -20383,7 +21439,9 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDLoc dl(Op);
assert(Mask.getValueType() == MVT::i8 && "Unexpect type");
- SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
+ SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1,
+ DAG.getBitcast(MVT::v8i1, Mask),
+ DAG.getIntPtrConstant(0, dl));
if (Op.getOpcode() == X86ISD::FSETCCM ||
Op.getOpcode() == X86ISD::FSETCCM_RND ||
Op.getOpcode() == X86ISD::VFPCLASSS)
@@ -20486,13 +21544,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
}
- case INTR_TYPE_2OP:
- case INTR_TYPE_2OP_IMM8: {
+ case INTR_TYPE_2OP: {
SDValue Src2 = Op.getOperand(2);
- if (IntrData->Type == INTR_TYPE_2OP_IMM8)
- Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
-
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -20724,38 +21778,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Swap Src1 and Src2 in the node creation
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
}
- case FMA_OP_MASKZ:
- case FMA_OP_MASK: {
- SDValue Src1 = Op.getOperand(1);
- SDValue Src2 = Op.getOperand(2);
- SDValue Src3 = Op.getOperand(3);
- SDValue Mask = Op.getOperand(4);
- MVT VT = Op.getSimpleValueType();
- SDValue PassThru = SDValue();
-
- // set PassThru element
- if (IntrData->Type == FMA_OP_MASKZ)
- PassThru = getZeroVector(VT, Subtarget, DAG, dl);
- else
- PassThru = Src1;
-
- // We specify 2 possible opcodes for intrinsics with rounding modes.
- // First, we check if the intrinsic may have non-default rounding mode,
- // (IntrData->Opc1 != 0), then we check the rounding mode operand.
- unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- SDValue Rnd = Op.getOperand(5);
- if (!isRoundModeCurDirection(Rnd))
- return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, Op.getValueType(),
- Src1, Src2, Src3, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
- dl, Op.getValueType(),
- Src1, Src2, Src3),
- Mask, PassThru, Subtarget, DAG);
- }
case IFMA_OP:
// NOTE: We need to swizzle the operands to pass the multiply operands
// first.
@@ -20766,7 +21788,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// does not change the value. Set it to 0 since it can change.
return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
DAG.getIntPtrConstant(0, dl));
- case CVTPD2PS_MASK: {
+ case CVTPD2PS_RND_MASK: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
@@ -20790,13 +21812,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getIntPtrConstant(0, dl)),
Mask, PassThru, Subtarget, DAG);
}
- case FPCLASS: {
- // FPclass intrinsics
- SDValue Src1 = Op.getOperand(1);
- MVT MaskVT = Op.getSimpleValueType();
- SDValue Imm = Op.getOperand(2);
- return DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm);
- }
case FPCLASSS: {
SDValue Src1 = Op.getOperand(1);
SDValue Imm = Op.getOperand(2);
@@ -20811,32 +21826,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
FPclassMask, DAG.getIntPtrConstant(0, dl));
return DAG.getBitcast(MVT::i8, Ins);
}
- case CMP_MASK: {
- // Comparison intrinsics with masks.
- // Example of transformation:
- // (i8 (int_x86_avx512_mask_pcmpeq_q_128
- // (v2i64 %a), (v2i64 %b), (i8 %mask))) ->
- // (i8 (bitcast
- // (v8i1 (insert_subvector zero,
- // (v2i1 (and (PCMPEQM %a, %b),
- // (extract_subvector
- // (v8i1 (bitcast %mask)), 0))), 0))))
- MVT VT = Op.getOperand(1).getSimpleValueType();
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
- SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
- MVT BitcastVT = MVT::getVectorVT(MVT::i1,
- Mask.getSimpleValueType().getSizeInBits());
- SDValue Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2));
- SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, SDValue(),
- Subtarget, DAG);
- // Need to fill with zeros to ensure the bitcast will produce zeroes
- // for the upper bits in the v2i1/v4i1 case.
- SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
- DAG.getConstant(0, dl, BitcastVT),
- CmpMask, DAG.getIntPtrConstant(0, dl));
- return DAG.getBitcast(Op.getValueType(), Res);
- }
case CMP_MASK_CC: {
MVT MaskVT = Op.getSimpleValueType();
@@ -21007,6 +21996,59 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), RoundingMode);
}
+ // ADC/ADCX/SBB
+ case ADX: {
+ SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
+ SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32);
+
+ SDValue Res;
+ // If the carry in is zero, then we should just use ADD/SUB instead of
+ // ADC/SBB.
+ if (isNullConstant(Op.getOperand(1))) {
+ Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2),
+ Op.getOperand(3));
+ } else {
+ SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1),
+ DAG.getConstant(-1, dl, MVT::i8));
+ Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2),
+ Op.getOperand(3), GenCF.getValue(1));
+ }
+ SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
+ SDValue Results[] = { SetCC, Res };
+ return DAG.getMergeValues(Results, dl);
+ }
+ case CVTPD2PS_MASK:
+ case CVTPD2I_MASK:
+ case TRUNCATE_TO_REG: {
+ SDValue Src = Op.getOperand(1);
+ SDValue PassThru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+
+ if (isAllOnesConstant(Mask))
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
+
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
+ Mask);
+ }
+ case CVTPS2PH_MASK: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Rnd = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+
+ if (isAllOnesConstant(Mask))
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd);
+
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd,
+ PassThru, Mask);
+
+ }
default:
break;
}
@@ -21018,6 +22060,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
+ case Intrinsic::x86_avx512_ktestc_b:
+ case Intrinsic::x86_avx512_ktestc_w:
+ case Intrinsic::x86_avx512_ktestc_d:
+ case Intrinsic::x86_avx512_ktestc_q:
+ case Intrinsic::x86_avx512_ktestz_b:
+ case Intrinsic::x86_avx512_ktestz_w:
+ case Intrinsic::x86_avx512_ktestz_d:
+ case Intrinsic::x86_avx512_ktestz_q:
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestnzc:
@@ -21036,15 +22086,30 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestz_pd_256:
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
- bool IsTestPacked = false;
+ unsigned TestOpc = X86ISD::PTEST;
X86::CondCode X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx512_ktestc_b:
+ case Intrinsic::x86_avx512_ktestc_w:
+ case Intrinsic::x86_avx512_ktestc_d:
+ case Intrinsic::x86_avx512_ktestc_q:
+ // CF = 1
+ TestOpc = X86ISD::KTEST;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_avx512_ktestz_b:
+ case Intrinsic::x86_avx512_ktestz_w:
+ case Intrinsic::x86_avx512_ktestz_d:
+ case Intrinsic::x86_avx512_ktestz_q:
+ TestOpc = X86ISD::KTEST;
+ X86CC = X86::COND_E;
+ break;
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_avx_ptestz_256:
@@ -21055,7 +22120,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestc_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_avx_ptestc_256:
@@ -21066,7 +22131,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestnzc_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestnzc_256:
@@ -21077,7 +22142,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@@ -21196,14 +22260,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
}
- case Intrinsic::x86_seh_recoverfp: {
+ case Intrinsic::eh_recoverfp: {
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
- "llvm.x86.seh.recoverfp must take a function as the first argument");
+ "llvm.eh.recoverfp must take a function as the first argument");
return recoverFramePointer(DAG, Fn, IncomingFPOp);
}
@@ -21251,25 +22315,31 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
// Scale must be constant.
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
- MVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getSimpleValueType().getVectorNumElements());
+ unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
+ VT.getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
+
+ // We support two versions of the gather intrinsics. One with scalar mask and
+ // one with vXi1 mask. Convert scalar to vXi1 if necessary.
+ if (Mask.getValueType() != MaskVT)
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
- if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode()))
+ if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
- SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain};
+ SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
return DAG.getMergeValues(RetOps, dl);
@@ -21287,12 +22357,17 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- MVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getSimpleValueType().getVectorNumElements());
+ unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
+ Src.getSimpleValueType().getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
+
+ // We support two versions of the scatter intrinsics. One with scalar mask and
+ // one with vXi1 mask. Convert scalar to vXi1 if necessary.
+ if (Mask.getValueType() != MaskVT)
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
- SDValue Ops[] = {Base, Scale, Index, Disp, Segment, VMask, Src, Chain};
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
return SDValue(Res, 1);
}
@@ -21433,39 +22508,39 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
}
SDValue Chain = HI.getValue(1);
+ SDValue TSC;
+ if (Subtarget.is64Bit()) {
+ // The EDX register is loaded with the high-order 32 bits of the MSR, and
+ // the EAX register is loaded with the low-order 32 bits.
+ TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+ DAG.getConstant(32, DL, MVT::i8));
+ TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC);
+ } else {
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI });
+ }
+
if (Opcode == X86ISD::RDTSCP_DAG) {
- assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->getNumOperands() == 2 && "Unexpected number of operands!");
// Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
// the ECX register. Add 'ecx' explicitly to the chain.
SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
HI.getValue(2));
- // Explicitly store the content of ECX at the location passed in input
- // to the 'rdtscp' intrinsic.
- Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
- MachinePointerInfo());
- }
- if (Subtarget.is64Bit()) {
- // The EDX register is loaded with the high-order 32 bits of the MSR, and
- // the EAX register is loaded with the low-order 32 bits.
- SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
- DAG.getConstant(32, DL, MVT::i8));
- Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
- Results.push_back(Chain);
+ Results.push_back(TSC);
+ Results.push_back(ecx);
+ Results.push_back(ecx.getValue(1));
return;
}
- // Use a buildpair to merge the two 32-bit values into a 64-bit one.
- SDValue Ops[] = { LO, HI };
- SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
- Results.push_back(Pair);
+ Results.push_back(TSC);
Results.push_back(Chain);
}
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- SmallVector<SDValue, 2> Results;
+ SmallVector<SDValue, 3> Results;
SDLoc DL(Op);
getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
@@ -21529,7 +22604,7 @@ EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
MachineMemOperand *MMO, SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
- SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ SDValue Ops[] = { Chain, Val, Ptr, Mask };
return SignedSat ?
DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
@@ -21689,20 +22764,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
Ret, SDValue(InTrans.getNode(), 1));
}
- // ADC/ADCX/SBB
- case ADX: {
- SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
- SDVTList VTs = DAG.getVTList(Op.getOperand(3).getValueType(), MVT::i32);
- SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
- DAG.getConstant(-1, dl, MVT::i8));
- SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
- Op.getOperand(4), GenCF.getValue(1));
- SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
- Op.getOperand(5), MachinePointerInfo());
- SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
- SDValue Results[] = { SetCC, Store };
- return DAG.getMergeValues(Results, dl);
- }
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {
@@ -22255,11 +23316,10 @@ static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
// we just take the hi result (by masking the lo result to zero before the
// add).
SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0));
- SDValue Zero = getZeroVector(CurrVT, Subtarget, DAG, DL);
+ SDValue Zero = DAG.getConstant(0, DL, CurrVT);
- SDValue NibbleMask = DAG.getConstant(0xF, DL, CurrVT);
SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT);
- SDValue Lo = DAG.getNode(ISD::AND, DL, CurrVT, Op0, NibbleMask);
+ SDValue Lo = Op0;
SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift);
SDValue HiZ;
if (CurrVT.is512BitVector()) {
@@ -22377,38 +23437,23 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
-static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
unsigned NumBits = VT.getScalarSizeInBits();
+ SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
- if (VT.isVector()) {
- SDValue N0 = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, dl, VT);
-
- // lsb(x) = (x & -x)
- SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0,
- DAG.getNode(ISD::SUB, dl, VT, Zero, N0));
-
- // cttz_undef(x) = (width - 1) - ctlz(lsb)
- if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
- SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT);
- return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne,
- DAG.getNode(ISD::CTLZ, dl, VT, LSB));
- }
-
- // cttz(x) = ctpop(lsb - 1)
- SDValue One = DAG.getConstant(1, dl, VT);
- return DAG.getNode(ISD::CTPOP, dl, VT,
- DAG.getNode(ISD::SUB, dl, VT, LSB, One));
- }
+ // Decompose 256-bit ops into smaller 128-bit ops.
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
+ return Lower256IntUnary(Op, DAG);
- assert(Op.getOpcode() == ISD::CTTZ &&
+ assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
"Only scalar CTTZ requires custom lowering");
// Issue a bsf (scan bits forward) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
- Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0));
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0);
// If src is zero (i.e. bsf sets ZF), returns NumBits.
SDValue Ops[] = {
@@ -22422,7 +23467,7 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
/// Break a 256-bit integer operation into two new 128-bit ones and then
/// concatenate the result back.
-static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
+static SDValue split256IntArith(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && VT.isInteger() &&
@@ -22451,7 +23496,7 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
/// Break a 512-bit integer operation into two new 256-bit ones and then
/// concatenate the result back.
-static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
+static SDValue split512IntArith(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is512BitVector() && VT.isInteger() &&
@@ -22478,18 +23523,46 @@ static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
-static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
+static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
+ if (VT == MVT::i16 || VT == MVT::i32)
+ return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
+
if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));
+
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
}
-static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getScalarType() == MVT::i1) {
+ SDLoc dl(Op);
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Expected saturated arithmetic opcode");
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
+ DAG.getNOT(dl, Op.getOperand(1), VT));
+ }
+ }
+
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return split256IntArith(Op, DAG);
+}
+
+static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
// Since X86 does not have CMOV for 8-bit integer, we don't convert
@@ -22503,10 +23576,23 @@ static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);
}
- assert(Op.getSimpleValueType().is256BitVector() &&
- Op.getSimpleValueType().isInteger() &&
- "Only handle AVX 256-bit vector integer operation");
- return Lower256IntUnary(Op, DAG);
+ // ABS(vXi64 X) --> VPBLENDVPD(X, 0-X, X).
+ if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) {
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
+ return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Sub, Src);
+ }
+
+ if (VT.is256BitVector() && !Subtarget.hasInt256()) {
+ assert(VT.isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntUnary(Op, DAG);
+ }
+
+ // Default to expand.
+ return SDValue();
}
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
@@ -22514,7 +23600,7 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
// For AVX1 cases, split to use legal ops (everything but v4i64).
if (VT.getScalarType() != MVT::i64 && VT.is256BitVector())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
@@ -22556,9 +23642,9 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
- // Decompose 256-bit ops into smaller 128-bit ops.
+ // Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
@@ -22566,53 +23652,49 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16
// vector pairs, multiply and truncate.
if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) {
- if (Subtarget.hasInt256()) {
- // For 512-bit vectors, split into 256-bit vectors to allow the
- // sign-extension to occur.
- if (VT == MVT::v64i8)
- return Lower512IntArith(Op, DAG);
-
- // For 256-bit vectors, split into 128-bit vectors to allow the
- // sign-extension to occur. We don't need this on AVX512BW as we can
- // safely sign-extend to v32i16.
- if (VT == MVT::v32i8 && !Subtarget.hasBWI())
- return Lower256IntArith(Op, DAG);
+ unsigned NumElts = VT.getVectorNumElements();
+ if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||
+ (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
return DAG.getNode(
ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::MUL, dl, ExVT,
- DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, A),
- DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, B)));
+ DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, A),
+ DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, B)));
}
- assert(VT == MVT::v16i8 &&
- "Pre-AVX2 support only supports v16i8 multiplication");
- MVT ExVT = MVT::v8i16;
+ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- // Extract the lo parts and sign extend to i16
+ // Extract the lo/hi parts to any extend to i16.
// We're going to mask off the low byte of each result element of the
// pmullw, so it doesn't matter what's in the high byte of each 16-bit
// element.
- const int LoShufMask[] = {0, -1, 1, -1, 2, -1, 3, -1,
- 4, -1, 5, -1, 6, -1, 7, -1};
- SDValue ALo = DAG.getVectorShuffle(VT, dl, A, A, LoShufMask);
- SDValue BLo = DAG.getVectorShuffle(VT, dl, B, B, LoShufMask);
- ALo = DAG.getBitcast(ExVT, ALo);
- BLo = DAG.getBitcast(ExVT, BLo);
-
- // Extract the hi parts and sign extend to i16
- // We're going to mask off the low byte of each result element of the
- // pmullw, so it doesn't matter what's in the high byte of each 16-bit
- // element.
- const int HiShufMask[] = {8, -1, 9, -1, 10, -1, 11, -1,
- 12, -1, 13, -1, 14, -1, 15, -1};
- SDValue AHi = DAG.getVectorShuffle(VT, dl, A, A, HiShufMask);
- SDValue BHi = DAG.getVectorShuffle(VT, dl, B, B, HiShufMask);
- AHi = DAG.getBitcast(ExVT, AHi);
- BHi = DAG.getBitcast(ExVT, BHi);
-
- // Multiply, mask the lower 8bits of the lo/hi results and pack
+ SDValue Undef = DAG.getUNDEF(VT);
+ SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef));
+ SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef));
+
+ SDValue BLo, BHi;
+ if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
+ // If the LHS is a constant, manually unpackl/unpackh.
+ SmallVector<SDValue, 16> LoOps, HiOps;
+ for (unsigned i = 0; i != NumElts; i += 16) {
+ for (unsigned j = 0; j != 8; ++j) {
+ LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl,
+ MVT::i16));
+ HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl,
+ MVT::i16));
+ }
+ }
+
+ BLo = DAG.getBuildVector(ExVT, dl, LoOps);
+ BHi = DAG.getBuildVector(ExVT, dl, HiOps);
+ } else {
+ BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));
+ BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));
+ }
+
+ // Multiply, mask the lower 8bits of the lo/hi results and pack.
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT));
@@ -22661,9 +23743,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
//
// Hi = psllqi(AloBhi + AhiBlo, 32);
// return AloBlo + Hi;
- KnownBits AKnown, BKnown;
- DAG.computeKnownBits(A, AKnown);
- DAG.computeKnownBits(B, BKnown);
+ KnownBits AKnown = DAG.computeKnownBits(A);
+ KnownBits BKnown = DAG.computeKnownBits(B);
APInt LowerBitsMask = APInt::getLowBitsSet(64, 32);
bool ALoIsZero = LowerBitsMask.isSubsetOf(AKnown.Zero);
@@ -22673,7 +23754,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero);
bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero);
- SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
// Only multiply lo/hi halves that aren't known to be zero.
SDValue AloBlo = Zero;
@@ -22702,10 +23783,79 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ bool IsSigned = Op->getOpcode() == ISD::MULHS;
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
- // Decompose 256-bit ops into smaller 128-bit ops.
+ // Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
+
+ if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
+ assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
+ (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
+ (VT == MVT::v16i32 && Subtarget.hasAVX512()));
+
+ // PMULxD operations multiply each even value (starting at 0) of LHS with
+ // the related value of RHS and produce a widen result.
+ // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+ // => <2 x i64> <ae|cg>
+ //
+ // In other word, to have all the results, we need to perform two PMULxD:
+ // 1. one with the even values.
+ // 2. one with the odd values.
+ // To achieve #2, with need to place the odd values at an even position.
+ //
+ // Place the odd value at an even position (basically, shift all values 1
+ // step to the left):
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
+ 9, -1, 11, -1, 13, -1, 15, -1};
+ // <a|b|c|d> => <b|undef|d|undef>
+ SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A,
+ makeArrayRef(&Mask[0], NumElts));
+ // <e|f|g|h> => <f|undef|h|undef>
+ SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B,
+ makeArrayRef(&Mask[0], NumElts));
+
+ // Emit two multiplies, one for the lower 2 ints and one for the higher 2
+ // ints.
+ MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
+ unsigned Opcode =
+ (IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ;
+ // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+ // => <2 x i64> <ae|cg>
+ SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
+ DAG.getBitcast(MulVT, A),
+ DAG.getBitcast(MulVT, B)));
+ // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
+ // => <2 x i64> <bf|dh>
+ SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
+ DAG.getBitcast(MulVT, Odd0),
+ DAG.getBitcast(MulVT, Odd1)));
+
+ // Shuffle it back into the right order.
+ SmallVector<int, 16> ShufMask(NumElts);
+ for (int i = 0; i != (int)NumElts; ++i)
+ ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
+
+ SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask);
+
+ // If we have a signed multiply but no PMULDQ fix up the result of an
+ // unsigned multiply.
+ if (IsSigned && !Subtarget.hasSSE41()) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B);
+ SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A);
+
+ SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);
+ }
+
+ return Res;
+ }
// Only i8 vectors should need custom lowering after this.
assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
@@ -22714,123 +23864,141 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
// logical shift down the upper half and pack back to i8.
- SDValue A = Op.getOperand(0);
- SDValue B = Op.getOperand(1);
// With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack
// and then ashr/lshr the upper bits down to the lower bits before multiply.
- unsigned Opcode = Op.getOpcode();
- unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA);
- unsigned ExAVX = (ISD::MULHU == Opcode ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+ unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+ if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||
+ (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
+ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
+ SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A);
+ SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB);
+ Mul = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+ }
- // For 512-bit vectors, split into 256-bit vectors to allow the
+ // For signed 512-bit vectors, split into 256-bit vectors to allow the
// sign-extension to occur.
- if (VT == MVT::v64i8)
- return Lower512IntArith(Op, DAG);
+ if (VT == MVT::v64i8 && IsSigned)
+ return split512IntArith(Op, DAG);
- // AVX2 implementations - extend xmm subvectors to ymm.
- if (Subtarget.hasInt256()) {
- unsigned NumElems = VT.getVectorNumElements();
+ // Signed AVX2 implementation - extend xmm subvectors to ymm.
+ if (VT == MVT::v32i8 && IsSigned) {
SDValue Lo = DAG.getIntPtrConstant(0, dl);
- SDValue Hi = DAG.getIntPtrConstant(NumElems / 2, dl);
-
- if (VT == MVT::v32i8) {
- if (Subtarget.canExtendTo512BW()) {
- SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v32i16, A);
- SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v32i16, B);
- SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v32i16, ExA, ExB);
- Mul = DAG.getNode(ISD::SRL, dl, MVT::v32i16, Mul,
- DAG.getConstant(8, dl, MVT::v32i16));
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
- }
- SDValue ALo = extract128BitVector(A, 0, DAG, dl);
- SDValue BLo = extract128BitVector(B, 0, DAG, dl);
- SDValue AHi = extract128BitVector(A, NumElems / 2, DAG, dl);
- SDValue BHi = extract128BitVector(B, NumElems / 2, DAG, dl);
- ALo = DAG.getNode(ExAVX, dl, MVT::v16i16, ALo);
- BLo = DAG.getNode(ExAVX, dl, MVT::v16i16, BLo);
- AHi = DAG.getNode(ExAVX, dl, MVT::v16i16, AHi);
- BHi = DAG.getNode(ExAVX, dl, MVT::v16i16, BHi);
- Lo = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
- DAG.getNode(ISD::MUL, dl, MVT::v16i16, ALo, BLo),
- DAG.getConstant(8, dl, MVT::v16i16));
- Hi = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
- DAG.getNode(ISD::MUL, dl, MVT::v16i16, AHi, BHi),
- DAG.getConstant(8, dl, MVT::v16i16));
- // The ymm variant of PACKUS treats the 128-bit lanes separately, so before
- // using PACKUS we need to permute the inputs to the correct lo/hi xmm lane.
- const int LoMask[] = {0, 1, 2, 3, 4, 5, 6, 7,
- 16, 17, 18, 19, 20, 21, 22, 23};
- const int HiMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
- 24, 25, 26, 27, 28, 29, 30, 31};
- return DAG.getNode(X86ISD::PACKUS, dl, VT,
- DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, LoMask),
- DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
- }
-
- assert(VT == MVT::v16i8 && "Unexpected VT");
-
- SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v16i16, A);
- SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v16i16, B);
- SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
- Mul = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
- DAG.getConstant(8, dl, MVT::v16i16));
- // If we have BWI we can use truncate instruction.
- if (Subtarget.hasBWI())
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Lo);
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Hi);
- return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
- }
-
- assert(VT == MVT::v16i8 &&
- "Pre-AVX2 support only supports v16i8 multiplication");
- MVT ExVT = MVT::v8i16;
- unsigned ExSSE41 = ISD::MULHU == Opcode ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG;
+ SDValue Hi = DAG.getIntPtrConstant(NumElts / 2, dl);
+
+ MVT ExVT = MVT::v16i16;
+ SDValue ALo = extract128BitVector(A, 0, DAG, dl);
+ SDValue BLo = extract128BitVector(B, 0, DAG, dl);
+ SDValue AHi = extract128BitVector(A, NumElts / 2, DAG, dl);
+ SDValue BHi = extract128BitVector(B, NumElts / 2, DAG, dl);
+ ALo = DAG.getNode(ExAVX, dl, ExVT, ALo);
+ BLo = DAG.getNode(ExAVX, dl, ExVT, BLo);
+ AHi = DAG.getNode(ExAVX, dl, ExVT, AHi);
+ BHi = DAG.getNode(ExAVX, dl, ExVT, BHi);
+ Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
+ Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
+ Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG);
+ Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG);
+
+ // Bitcast back to VT and then pack all the even elements from Lo and Hi.
+ // Shuffle lowering should turn this into PACKUS+PERMQ
+ Lo = DAG.getBitcast(VT, Lo);
+ Hi = DAG.getBitcast(VT, Hi);
+ return DAG.getVectorShuffle(VT, dl, Lo, Hi,
+ { 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30,
+ 32, 34, 36, 38, 40, 42, 44, 46,
+ 48, 50, 52, 54, 56, 58, 60, 62});
+ }
+
+ // For signed v16i8 and all unsigned vXi8 we will unpack the low and high
+ // half of each 128 bit lane to widen to a vXi16 type. Do the multiplies,
+ // shift the results and pack the half lane results back together.
+
+ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+
+ static const int PSHUFDMask[] = { 8, 9, 10, 11, 12, 13, 14, 15,
+ -1, -1, -1, -1, -1, -1, -1, -1};
// Extract the lo parts and zero/sign extend to i16.
- SDValue ALo, BLo;
- if (Subtarget.hasSSE41()) {
- ALo = DAG.getNode(ExSSE41, dl, ExVT, A);
- BLo = DAG.getNode(ExSSE41, dl, ExVT, B);
+ // Only use SSE4.1 instructions for signed v16i8 where using unpack requires
+ // shifts to sign extend. Using unpack for unsigned only requires an xor to
+ // create zeros and a copy due to tied registers contraints pre-avx. But using
+ // zero_extend_vector_inreg would require an additional pshufd for the high
+ // part.
+
+ SDValue ALo, AHi;
+ if (IsSigned && VT == MVT::v16i8 && Subtarget.hasSSE41()) {
+ ALo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, A);
+
+ AHi = DAG.getVectorShuffle(VT, dl, A, A, PSHUFDMask);
+ AHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, AHi);
+ } else if (IsSigned) {
+ ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), A));
+ AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), A));
+
+ ALo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, ALo, 8, DAG);
+ AHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, AHi, 8, DAG);
} else {
- const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
- -1, 4, -1, 5, -1, 6, -1, 7};
- ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
- BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- ALo = DAG.getBitcast(ExVT, ALo);
- BLo = DAG.getBitcast(ExVT, BLo);
- ALo = DAG.getNode(ExShift, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
- BLo = DAG.getNode(ExShift, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
- }
-
- // Extract the hi parts and zero/sign extend to i16.
- SDValue AHi, BHi;
- if (Subtarget.hasSSE41()) {
- const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
- -1, -1, -1, -1, -1, -1, -1, -1};
- AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
- BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getNode(ExSSE41, dl, ExVT, AHi);
- BHi = DAG.getNode(ExSSE41, dl, ExVT, BHi);
+ ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A,
+ DAG.getConstant(0, dl, VT)));
+ AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A,
+ DAG.getConstant(0, dl, VT)));
+ }
+
+ SDValue BLo, BHi;
+ if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
+ // If the LHS is a constant, manually unpackl/unpackh and extend.
+ SmallVector<SDValue, 16> LoOps, HiOps;
+ for (unsigned i = 0; i != NumElts; i += 16) {
+ for (unsigned j = 0; j != 8; ++j) {
+ SDValue LoOp = B.getOperand(i + j);
+ SDValue HiOp = B.getOperand(i + j + 8);
+
+ if (IsSigned) {
+ LoOp = DAG.getSExtOrTrunc(LoOp, dl, MVT::i16);
+ HiOp = DAG.getSExtOrTrunc(HiOp, dl, MVT::i16);
+ } else {
+ LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16);
+ HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16);
+ }
+
+ LoOps.push_back(LoOp);
+ HiOps.push_back(HiOp);
+ }
+ }
+
+ BLo = DAG.getBuildVector(ExVT, dl, LoOps);
+ BHi = DAG.getBuildVector(ExVT, dl, HiOps);
+ } else if (IsSigned && VT == MVT::v16i8 && Subtarget.hasSSE41()) {
+ BLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, B);
+
+ BHi = DAG.getVectorShuffle(VT, dl, B, B, PSHUFDMask);
+ BHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, BHi);
+ } else if (IsSigned) {
+ BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), B));
+ BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), B));
+
+ BLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BLo, 8, DAG);
+ BHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BHi, 8, DAG);
} else {
- const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
- -1, 12, -1, 13, -1, 14, -1, 15};
- AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
- BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getBitcast(ExVT, AHi);
- BHi = DAG.getBitcast(ExVT, BHi);
- AHi = DAG.getNode(ExShift, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
- BHi = DAG.getNode(ExShift, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
+ BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B,
+ DAG.getConstant(0, dl, VT)));
+ BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B,
+ DAG.getConstant(0, dl, VT)));
}
// Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
- // pack back to v16i8.
+ // pack back to vXi8.
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
- RLo = DAG.getNode(ISD::SRL, dl, ExVT, RLo, DAG.getConstant(8, dl, ExVT));
- RHi = DAG.getNode(ISD::SRL, dl, ExVT, RHi, DAG.getConstant(8, dl, ExVT));
+ RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG);
+ RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG);
+
+ // Bitcast back to VT and then pack all the even elements from Lo and Hi.
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
@@ -22890,105 +24058,6 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
return DAG.getBitcast(VT, CallInfo.first);
}
-static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
- MVT VT = Op0.getSimpleValueType();
- SDLoc dl(Op);
-
- // Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector() && !Subtarget.hasInt256()) {
- unsigned Opcode = Op.getOpcode();
- unsigned NumElems = VT.getVectorNumElements();
- MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), NumElems / 2);
- SDValue Lo0 = extract128BitVector(Op0, 0, DAG, dl);
- SDValue Lo1 = extract128BitVector(Op1, 0, DAG, dl);
- SDValue Hi0 = extract128BitVector(Op0, NumElems / 2, DAG, dl);
- SDValue Hi1 = extract128BitVector(Op1, NumElems / 2, DAG, dl);
- SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Lo0, Lo1);
- SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Hi0, Hi1);
- SDValue Ops[] = {
- DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(0), Hi.getValue(0)),
- DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(1), Hi.getValue(1))
- };
- return DAG.getMergeValues(Ops, dl);
- }
-
- assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
- (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
- (VT == MVT::v16i32 && Subtarget.hasAVX512()));
-
- int NumElts = VT.getVectorNumElements();
-
- // PMULxD operations multiply each even value (starting at 0) of LHS with
- // the related value of RHS and produce a widen result.
- // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
- // => <2 x i64> <ae|cg>
- //
- // In other word, to have all the results, we need to perform two PMULxD:
- // 1. one with the even values.
- // 2. one with the odd values.
- // To achieve #2, with need to place the odd values at an even position.
- //
- // Place the odd value at an even position (basically, shift all values 1
- // step to the left):
- const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1};
- // <a|b|c|d> => <b|undef|d|undef>
- SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0,
- makeArrayRef(&Mask[0], NumElts));
- // <e|f|g|h> => <f|undef|h|undef>
- SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1,
- makeArrayRef(&Mask[0], NumElts));
-
- // Emit two multiplies, one for the lower 2 ints and one for the higher 2
- // ints.
- MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
- bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
- unsigned Opcode =
- (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
- // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
- // => <2 x i64> <ae|cg>
- SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
- DAG.getBitcast(MulVT, Op0),
- DAG.getBitcast(MulVT, Op1)));
- // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
- // => <2 x i64> <bf|dh>
- SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
- DAG.getBitcast(MulVT, Odd0),
- DAG.getBitcast(MulVT, Odd1)));
-
- // Shuffle it back into the right order.
- SmallVector<int, 16> HighMask(NumElts);
- SmallVector<int, 16> LowMask(NumElts);
- for (int i = 0; i != NumElts; ++i) {
- HighMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
- LowMask[i] = (i / 2) * 2 + ((i % 2) * NumElts);
- }
-
- SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
-
- // If we have a signed multiply but no PMULDQ fix up the high parts of a
- // unsigned multiply.
- if (IsSigned && !Subtarget.hasSSE41()) {
- SDValue ShAmt = DAG.getConstant(
- 31, dl,
- DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
- SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
- SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
-
- SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
- Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
- }
-
- // The first result of MUL_LOHI is actually the low value, followed by the
- // high value.
- SDValue Ops[] = {Lows, Highs};
- return DAG.getMergeValues(Ops, dl);
-}
-
// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
@@ -23042,9 +24111,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
-
- unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
- (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
+ unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false);
auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
@@ -23055,8 +24122,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (ShiftAmt == 63 && Subtarget.hasSSE42()) {
assert((VT != MVT::v4i64 || Subtarget.hasInt256()) &&
"Unsupported PCMPGT op");
- return DAG.getNode(X86ISD::PCMPGT, dl, VT,
- getZeroVector(VT, Subtarget, DAG, dl), R);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R);
}
if (ShiftAmt >= 32) {
@@ -23071,7 +24137,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
{9, 1, 11, 3, 13, 5, 15, 7});
} else {
- // SRA upper i32, SHL whole i64 and select lower i32.
+ // SRA upper i32, SRL whole i64 and select lower i32.
SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
ShiftAmt, DAG);
SDValue Lower =
@@ -23087,199 +24153,123 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
};
// Optimize shl/srl/sra with constant shift amount.
- if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
- if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
- uint64_t ShiftAmt = ShiftConst->getZExtValue();
-
- if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
- return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
-
- // i64 SRA needs to be performed as partial shifts.
- if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
- (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
- Op.getOpcode() == ISD::SRA)
- return ArithmeticShiftRight64(ShiftAmt);
-
- if (VT == MVT::v16i8 ||
- (Subtarget.hasInt256() && VT == MVT::v32i8) ||
- VT == MVT::v64i8) {
- unsigned NumElts = VT.getVectorNumElements();
- MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
-
- // Simple i8 add case
- if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
- return DAG.getNode(ISD::ADD, dl, VT, R, R);
-
- // ashr(R, 7) === cmp_slt(R, 0)
- if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
- SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
- if (VT.is512BitVector()) {
- assert(VT == MVT::v64i8 && "Unexpected element type!");
- SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R,
- ISD::SETGT);
- return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
- }
- return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
- }
+ APInt APIntShiftAmt;
+ if (!isConstantSplat(Amt, APIntShiftAmt))
+ return SDValue();
+ uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
- // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
- if (VT == MVT::v16i8 && Subtarget.hasXOP())
- return SDValue();
+ if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+ return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
- if (Op.getOpcode() == ISD::SHL) {
- // Make a large shift.
- SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
- R, ShiftAmt, DAG);
- SHL = DAG.getBitcast(VT, SHL);
- // Zero out the rightmost bits.
- return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
- }
- if (Op.getOpcode() == ISD::SRL) {
- // Make a large shift.
- SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT,
- R, ShiftAmt, DAG);
- SRL = DAG.getBitcast(VT, SRL);
- // Zero out the leftmost bits.
- return DAG.getNode(ISD::AND, dl, VT, SRL,
- DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
- }
- if (Op.getOpcode() == ISD::SRA) {
- // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
- SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
-
- SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
- Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
- Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
- return Res;
- }
- llvm_unreachable("Unknown shift opcode.");
- }
- }
- }
+ // i64 SRA needs to be performed as partial shifts.
+ if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
+ (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
+ Op.getOpcode() == ISD::SRA)
+ return ArithmeticShiftRight64(ShiftAmt);
- // Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
- // TODO: Replace constant extraction with getTargetConstantBitsFromNode.
- if (!Subtarget.hasXOP() &&
- (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) ||
- (Subtarget.hasAVX512() && VT == MVT::v8i64))) {
+ if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) ||
+ VT == MVT::v64i8) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- // AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant.
- unsigned SubVectorScale = 1;
- if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- SubVectorScale =
- Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits();
- Amt = Amt.getOperand(0);
- }
+ // Simple i8 add case
+ if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
+ return DAG.getNode(ISD::ADD, dl, VT, R, R);
- // Peek through any splat that was introduced for i64 shift vectorization.
- int SplatIndex = -1;
- if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
- if (SVN->isSplat()) {
- SplatIndex = SVN->getSplatIndex();
- Amt = Amt.getOperand(0);
- assert(SplatIndex < (int)VT.getVectorNumElements() &&
- "Splat shuffle referencing second operand");
+ // ashr(R, 7) === cmp_slt(R, 0)
+ if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
+ SDValue Zeros = DAG.getConstant(0, dl, VT);
+ if (VT.is512BitVector()) {
+ assert(VT == MVT::v64i8 && "Unexpected element type!");
+ SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
}
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
- if (Amt.getOpcode() != ISD::BITCAST ||
- Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR)
+ // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
+ if (VT == MVT::v16i8 && Subtarget.hasXOP())
return SDValue();
- Amt = Amt.getOperand(0);
- unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
- (SubVectorScale * VT.getVectorNumElements());
- unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
- uint64_t ShiftAmt = 0;
- unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
- for (unsigned i = 0; i != Ratio; ++i) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + BaseOp));
- if (!C)
- return SDValue();
- // 6 == Log2(64)
- ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
- }
-
- // Check remaining shift amounts (if not a splat).
- if (SplatIndex < 0) {
- for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
- uint64_t ShAmt = 0;
- for (unsigned j = 0; j != Ratio; ++j) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
- if (!C)
- return SDValue();
- // 6 == Log2(64)
- ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
- }
- if (ShAmt != ShiftAmt)
- return SDValue();
- }
+ if (Op.getOpcode() == ISD::SHL) {
+ // Make a large shift.
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R,
+ ShiftAmt, DAG);
+ SHL = DAG.getBitcast(VT, SHL);
+ // Zero out the rightmost bits.
+ return DAG.getNode(ISD::AND, dl, VT, SHL,
+ DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
+ }
+ if (Op.getOpcode() == ISD::SRL) {
+ // Make a large shift.
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R,
+ ShiftAmt, DAG);
+ SRL = DAG.getBitcast(VT, SRL);
+ // Zero out the leftmost bits.
+ return DAG.getNode(ISD::AND, dl, VT, SRL,
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
+ }
+ if (Op.getOpcode() == ISD::SRA) {
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+
+ SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
}
-
- if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
- return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
-
- if (Op.getOpcode() == ISD::SRA)
- return ArithmeticShiftRight64(ShiftAmt);
+ llvm_unreachable("Unknown shift opcode.");
}
return SDValue();
}
-// Determine if V is a splat value, and return the scalar.
-static SDValue IsSplatValue(MVT VT, SDValue V, const SDLoc &dl,
- SelectionDAG &DAG, const X86Subtarget &Subtarget,
- unsigned Opcode) {
- V = peekThroughEXTRACT_SUBVECTORs(V);
-
- // Check if this is a splat build_vector node.
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V)) {
- SDValue SplatAmt = BV->getSplatValue();
- if (SplatAmt && SplatAmt.isUndef())
- return SDValue();
- return SplatAmt;
- }
-
- // Check for SUB(SPLAT_BV, SPLAT) cases from rotate patterns.
- if (V.getOpcode() == ISD::SUB &&
- !SupportedVectorVarShift(VT, Subtarget, Opcode)) {
- SDValue LHS = peekThroughEXTRACT_SUBVECTORs(V.getOperand(0));
- SDValue RHS = peekThroughEXTRACT_SUBVECTORs(V.getOperand(1));
+// If V is a splat value, return the source vector and splat index;
+static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) {
+ V = peekThroughEXTRACT_SUBVECTORs(V);
- // Ensure that the corresponding splat BV element is not UNDEF.
- BitVector UndefElts;
- BuildVectorSDNode *BV0 = dyn_cast<BuildVectorSDNode>(LHS);
- ShuffleVectorSDNode *SVN1 = dyn_cast<ShuffleVectorSDNode>(RHS);
- if (BV0 && SVN1 && BV0->getSplatValue(&UndefElts) && SVN1->isSplat()) {
- unsigned SplatIdx = (unsigned)SVN1->getSplatIndex();
- if (!UndefElts[SplatIdx])
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getVectorElementType(), V,
- DAG.getIntPtrConstant(SplatIdx, dl));
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+ switch (Opcode) {
+ default: {
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (DAG.isSplatValue(V, DemandedElts, UndefElts)) {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return DAG.getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+ return V;
}
+ break;
}
-
- // Check if this is a shuffle node doing a splat.
- ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V);
- if (!SVN || !SVN->isSplat())
- return SDValue();
-
- unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
- SDValue InVec = V.getOperand(0);
- if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- assert((SplatIdx < VT.getVectorNumElements()) &&
- "Unexpected shuffle index found!");
- return InVec.getOperand(SplatIdx);
- } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2)))
- if (C->getZExtValue() == SplatIdx)
- return InVec.getOperand(1);
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+ // getTargetVShiftNode currently struggles without the splat source.
+ auto *SVN = cast<ShuffleVectorSDNode>(V);
+ if (!SVN->isSplat())
+ break;
+ int Idx = SVN->getSplatIndex();
+ int NumElts = V.getValueType().getVectorNumElements();
+ SplatIdx = Idx % NumElts;
+ return V.getOperand(Idx / NumElts);
}
+ }
+
+ return SDValue();
+}
- // Avoid introducing an extract element from a shuffle.
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getVectorElementType(), InVec,
- DAG.getIntPtrConstant(SplatIdx, dl));
+static SDValue GetSplatValue(SDValue V, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ int SplatIdx;
+ if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG))
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ SrcVector.getValueType().getScalarType(), SrcVector,
+ DAG.getIntPtrConstant(SplatIdx, dl));
+ return SDValue();
}
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
@@ -23289,17 +24279,11 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
unsigned Opcode = Op.getOpcode();
+ unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
+ unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
- unsigned X86OpcI = (Opcode == ISD::SHL) ? X86ISD::VSHLI :
- (Opcode == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
-
- unsigned X86OpcV = (Opcode == ISD::SHL) ? X86ISD::VSHL :
- (Opcode == ISD::SRL) ? X86ISD::VSRL : X86ISD::VSRA;
-
- Amt = peekThroughEXTRACT_SUBVECTORs(Amt);
-
- if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
- if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG, Subtarget, Opcode)) {
+ if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) {
+ if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
@@ -23309,6 +24293,50 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
+
+ // vXi8 shifts - shift as v8i16 + mask result.
+ if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
+ (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||
+ VT == MVT::v64i8) &&
+ !Subtarget.hasXOP()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
+ unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
+ unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+ // Create the mask using vXi16 shifts. For shift-rights we need to move
+ // the upper byte down before splatting the vXi8 mask.
+ SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);
+ BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask,
+ BaseShAmt, Subtarget, DAG);
+ if (Opcode != ISD::SHL)
+ BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask,
+ 8, DAG);
+ BitMask = DAG.getBitcast(VT, BitMask);
+ BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask,
+ SmallVector<int, 64>(NumElts, 0));
+
+ SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT,
+ DAG.getBitcast(ExtVT, R), BaseShAmt,
+ Subtarget, DAG);
+ Res = DAG.getBitcast(VT, Res);
+ Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask);
+
+ if (Opcode == ISD::SRA) {
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask)
+ // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW.
+ SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT);
+ SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask,
+ BaseShAmt, Subtarget, DAG);
+ SignMask = DAG.getBitcast(VT, SignMask);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask);
+ }
+ return Res;
+ }
+ }
}
// Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
@@ -23379,7 +24407,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
// AVX2 can more effectively perform this as a zext/trunc to/from v8i32.
if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) {
- SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ SDValue Z = DAG.getConstant(0, dl, VT);
SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z));
SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z));
Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG);
@@ -23401,8 +24429,13 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
+ unsigned Opc = Op.getOpcode();
+ unsigned X86OpcV = getTargetVShiftUniformOpcode(Opc, true);
+ unsigned X86OpcI = getTargetVShiftUniformOpcode(Opc, false);
+
assert(VT.isVector() && "Custom lowering only for vector shifts!");
assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");
@@ -23412,31 +24445,31 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
return V;
- if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
+ if (SupportedVectorVarShift(VT, Subtarget, Opc))
return Op;
// XOP has 128-bit variable logical/arithmetic shifts.
// +ve/-ve Amt = shift left/right.
if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||
VT == MVT::v8i16 || VT == MVT::v16i8)) {
- if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
+ if (Opc == ISD::SRL || Opc == ISD::SRA) {
SDValue Zero = DAG.getConstant(0, dl, VT);
Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
}
- if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL)
+ if (Opc == ISD::SHL || Opc == ISD::SRL)
return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
- if (Op.getOpcode() == ISD::SRA)
+ if (Opc == ISD::SRA)
return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
}
// 2i64 vector logical shifts can efficiently avoid scalarization - do the
// shifts per-lane and then shuffle the partial results back together.
- if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
+ if (VT == MVT::v2i64 && Opc != ISD::SRA) {
// Splat the shift amounts so the scalar shifts above will catch it.
SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
- SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0);
- SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1);
+ SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
+ SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
}
@@ -23444,7 +24477,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// M = lshr(SIGN_MASK, Amt)
// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
- Op.getOpcode() == ISD::SRA) {
+ Opc == ISD::SRA) {
SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
@@ -23489,36 +24522,34 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// Only perform this blend if we can perform it without loading a mask.
if (ShuffleMask.size() == NumElts && Amt1 && Amt2 &&
- isa<ConstantSDNode>(Amt1) && isa<ConstantSDNode>(Amt2) &&
(VT != MVT::v16i16 ||
is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) &&
- (VT == MVT::v4i32 || Subtarget.hasSSE41() ||
- Op.getOpcode() != ISD::SHL || canWidenShuffleElements(ShuffleMask))) {
- SDValue Splat1 =
- DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
- SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
- SDValue Splat2 =
- DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
- SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
- return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
+ (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL ||
+ canWidenShuffleElements(ShuffleMask))) {
+ auto *Cst1 = dyn_cast<ConstantSDNode>(Amt1);
+ auto *Cst2 = dyn_cast<ConstantSDNode>(Amt2);
+ if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) &&
+ Cst2->getAPIntValue().ult(EltSizeInBits)) {
+ SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
+ Cst1->getZExtValue(), DAG);
+ SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
+ Cst2->getZExtValue(), DAG);
+ return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
+ }
}
}
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
- if (Op.getOpcode() == ISD::SHL)
+ if (Opc == ISD::SHL)
if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
- // Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we
+ // Constant ISD::SRL can be performed efficiently on vXi16 vectors as we
// can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).
- // TODO: Improve support for the shift by zero special case.
- if (Op.getOpcode() == ISD::SRL && ConstantAmt &&
- ((Subtarget.hasSSE41() && VT == MVT::v8i16) ||
- DAG.isKnownNeverZero(Amt)) &&
- (VT == MVT::v16i8 || VT == MVT::v8i16 ||
- ((VT == MVT::v32i8 || VT == MVT::v16i16) && Subtarget.hasInt256()))) {
- SDValue EltBits = DAG.getConstant(VT.getScalarSizeInBits(), dl, VT);
+ if (Opc == ISD::SRL && ConstantAmt &&
+ (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) {
+ SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
@@ -23528,13 +24559,36 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ // Constant ISD::SRA can be performed efficiently on vXi16 vectors as we
+ // can replace with ISD::MULHS, creating scale factor from (NumEltBits - Amt).
+ // TODO: Special case handling for shift by 0/1, really we can afford either
+ // of these cases in pre-SSE41/XOP/AVX512 but not both.
+ if (Opc == ISD::SRA && ConstantAmt &&
+ (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) &&
+ ((Subtarget.hasSSE41() && !Subtarget.hasXOP() &&
+ !Subtarget.hasAVX512()) ||
+ DAG.isKnownNeverZero(Amt))) {
+ SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
+ SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
+ if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
+ SDValue Amt0 =
+ DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ);
+ SDValue Amt1 =
+ DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ);
+ SDValue Sra1 =
+ getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG);
+ SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale);
+ Res = DAG.getSelect(dl, VT, Amt0, R, Res);
+ return DAG.getSelect(dl, VT, Amt1, Sra1, Res);
+ }
+ }
+
// v4i32 Non Uniform Shifts.
// If the shift amount is constant we can shift each lane using the SSE2
// immediate shifts, else we need to zero-extend each lane to the lower i64
// and shift using the SSE2 variable shifts.
// The separate results can then be blended together.
if (VT == MVT::v4i32) {
- unsigned Opc = Op.getOpcode();
SDValue Amt0, Amt1, Amt2, Amt3;
if (ConstantAmt) {
Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
@@ -23542,26 +24596,12 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
} else {
- // ISD::SHL is handled above but we include it here for completeness.
- switch (Opc) {
- default:
- llvm_unreachable("Unknown target vector shift node");
- case ISD::SHL:
- Opc = X86ISD::VSHL;
- break;
- case ISD::SRL:
- Opc = X86ISD::VSRL;
- break;
- case ISD::SRA:
- Opc = X86ISD::VSRA;
- break;
- }
// The SSE2 shifts use the lower i64 as the same shift amount for
// all lanes and the upper i64 is ignored. On AVX we're better off
// just zero-extending, but for SSE just duplicating the top 16-bits is
// cheaper and has the same effect for out of range values.
if (Subtarget.hasAVX()) {
- SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ SDValue Z = DAG.getConstant(0, dl, VT);
Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
@@ -23581,10 +24621,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
}
- SDValue R0 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt0));
- SDValue R1 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt1));
- SDValue R2 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt2));
- SDValue R3 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt3));
+ unsigned ShOpc = ConstantAmt ? Opc : X86OpcV;
+ SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0));
+ SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1));
+ SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2));
+ SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3));
// Merge the shifted lane results optimally with/without PBLENDW.
// TODO - ideally shuffle combining would handle this.
@@ -23611,19 +24652,66 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
"Unexpected vector type");
MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32;
MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
- unsigned ExtOpc =
- Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
+ DAG.getNode(Opc, dl, ExtVT, R, Amt));
+ }
+
+ // Constant ISD::SRA/SRL can be performed efficiently on vXi8 vectors as we
+ // extend to vXi16 to perform a MUL scale effectively as a MUL_LOHI.
+ if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) &&
+ (VT == MVT::v16i8 || VT == MVT::v64i8 ||
+ (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
+ !Subtarget.hasXOP()) {
+ int NumElts = VT.getVectorNumElements();
+ SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8);
+
+ // Extend constant shift amount to vXi16 (it doesn't matter if the type
+ // isn't legal).
+ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
+ Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT);
+ Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt);
+ Amt = DAG.getNode(ISD::SHL, dl, ExVT, DAG.getConstant(1, dl, ExVT), Amt);
+ assert(ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()) &&
+ "Constant build vector expected");
+
+ if (VT == MVT::v16i8 && Subtarget.hasInt256()) {
+ R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT)
+ : DAG.getZExtOrTrunc(R, dl, ExVT);
+ R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt);
+ R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8);
+ return DAG.getZExtOrTrunc(R, dl, VT);
+ }
+
+ SmallVector<SDValue, 16> LoAmt, HiAmt;
+ for (int i = 0; i != NumElts; i += 16) {
+ for (int j = 0; j != 8; ++j) {
+ LoAmt.push_back(Amt.getOperand(i + j));
+ HiAmt.push_back(Amt.getOperand(i + j + 8));
+ }
+ }
+
+ MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt);
+ SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt);
+
+ SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R));
+ SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R));
+ LoR = DAG.getNode(X86OpcI, dl, VT16, LoR, Cst8);
+ HiR = DAG.getNode(X86OpcI, dl, VT16, HiR, Cst8);
+ LoR = DAG.getNode(ISD::MUL, dl, VT16, LoR, LoA);
+ HiR = DAG.getNode(ISD::MUL, dl, VT16, HiR, HiA);
+ LoR = DAG.getNode(X86ISD::VSRLI, dl, VT16, LoR, Cst8);
+ HiR = DAG.getNode(X86ISD::VSRLI, dl, VT16, HiR, Cst8);
+ return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR);
}
if (VT == MVT::v16i8 ||
(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
(VT == MVT::v64i8 && Subtarget.hasBWI())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
- unsigned ShiftOpcode = Op->getOpcode();
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (VT.is512BitVector()) {
@@ -23648,7 +24736,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// On pre-SSE41 targets we test for the sign bit by comparing to
// zero - a negative value will set all bits of the lanes to true
// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
- SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl);
+ SDValue Z = DAG.getConstant(0, dl, SelVT);
SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
return DAG.getSelect(dl, SelVT, C, V0, V1);
};
@@ -23657,49 +24745,46 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// We can safely do this using i16 shifts as we're only interested in
// the 3 lower bits of each byte.
Amt = DAG.getBitcast(ExtVT, Amt);
- Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT));
+ Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG);
Amt = DAG.getBitcast(VT, Amt);
- if (Op->getOpcode() == ISD::SHL || Op->getOpcode() == ISD::SRL) {
+ if (Opc == ISD::SHL || Opc == ISD::SRL) {
// r = VSELECT(r, shift(r, 4), a);
- SDValue M =
- DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+ SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 2), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+ M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// return VSELECT(r, shift(r, 1), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+ M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
return R;
}
- if (Op->getOpcode() == ISD::SRA) {
+ if (Opc == ISD::SRA) {
// For SRA we need to unpack each byte to the higher byte of a i16 vector
// so we can correctly sign extend. We don't care what happens to the
// lower byte.
- SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), Amt);
- SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), Amt);
- SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), R);
- SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), R);
+ SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
+ SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
+ SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R);
+ SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R);
ALo = DAG.getBitcast(ExtVT, ALo);
AHi = DAG.getBitcast(ExtVT, AHi);
RLo = DAG.getBitcast(ExtVT, RLo);
RHi = DAG.getBitcast(ExtVT, RHi);
// r = VSELECT(r, shift(r, 4), a);
- SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(4, dl, ExtVT));
- SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(4, dl, ExtVT));
+ SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG);
+ SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
@@ -23708,10 +24793,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
// r = VSELECT(r, shift(r, 2), a);
- MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(2, dl, ExtVT));
- MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(2, dl, ExtVT));
+ MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 2, DAG);
+ MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 2, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
@@ -23720,45 +24803,38 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
// r = VSELECT(r, shift(r, 1), a);
- MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(1, dl, ExtVT));
- MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(1, dl, ExtVT));
+ MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 1, DAG);
+ MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 1, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
// Logical shift the result back to the lower byte, leaving a zero upper
- // byte
- // meaning that we can safely pack with PACKUSWB.
- RLo =
- DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT));
- RHi =
- DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT));
+ // byte meaning that we can safely pack with PACKUSWB.
+ RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG);
+ RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG);
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
}
if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
MVT ExtVT = MVT::v8i32;
- SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
- SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
- SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
- SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Z, R);
- SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Z, R);
+ SDValue Z = DAG.getConstant(0, dl, VT);
+ SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z);
+ SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z);
+ SDValue RLo = getUnpackl(DAG, dl, VT, Z, R);
+ SDValue RHi = getUnpackh(DAG, dl, VT, Z, R);
ALo = DAG.getBitcast(ExtVT, ALo);
AHi = DAG.getBitcast(ExtVT, AHi);
RLo = DAG.getBitcast(ExtVT, RLo);
RHi = DAG.getBitcast(ExtVT, RHi);
- SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
- SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
- Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
- Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
+ SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo);
+ SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi);
+ Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG);
+ Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG);
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
}
if (VT == MVT::v8i16) {
- unsigned ShiftOpcode = Op->getOpcode();
-
// If we have a constant shift amount, the non-SSE41 path is best as
// avoiding bitcasts make it easier to constant fold and reduce to PBLENDW.
bool UseSSE41 = Subtarget.hasSSE41() &&
@@ -23778,7 +24854,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// set all bits of the lanes to true and VSELECT uses that in
// its OR(AND(V0,C),AND(V1,~C)) lowering.
SDValue C =
- DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT));
+ getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG);
return DAG.getSelect(dl, VT, C, V0, V1);
};
@@ -23788,42 +24864,42 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// bytes for PBLENDVB.
Amt = DAG.getNode(
ISD::OR, dl, VT,
- DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(4, dl, VT)),
- DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT)));
+ getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG),
+ getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG));
} else {
- Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT));
+ Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG);
}
// r = VSELECT(r, shift(r, 8), a);
- SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(8, dl, VT));
+ SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG);
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 4), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+ M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG);
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 2), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+ M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG);
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// return VSELECT(r, shift(r, 1), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+ M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG);
R = SignBitSelect(Amt, M, R);
return R;
}
- // Decompose 256-bit shifts into smaller 128-bit shifts.
+ // Decompose 256-bit shifts into 128-bit shifts.
if (VT.is256BitVector())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
return SDValue();
}
@@ -23838,20 +24914,31 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SDValue Amt = Op.getOperand(1);
unsigned Opcode = Op.getOpcode();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ int NumElts = VT.getVectorNumElements();
+
+ // Check for constant splat rotation amount.
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ int CstSplatIndex = -1;
+ if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits))
+ for (int i = 0; i != NumElts; ++i)
+ if (!UndefElts[i]) {
+ if (CstSplatIndex < 0 || EltBits[i] == EltBits[CstSplatIndex]) {
+ CstSplatIndex = i;
+ continue;
+ }
+ CstSplatIndex = -1;
+ break;
+ }
+ // AVX512 implicitly uses modulo rotation amounts.
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
// Attempt to rotate by immediate.
- APInt UndefElts;
- SmallVector<APInt, 16> EltBits;
- if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) {
- if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) {
- return EltBits[0] == V;
- })) {
- unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
- uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits);
- return DAG.getNode(Op, DL, VT, R,
- DAG.getConstant(RotateAmt, DL, MVT::i8));
- }
+ if (0 <= CstSplatIndex) {
+ unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
+ uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
+ return DAG.getNode(Op, DL, VT, R,
+ DAG.getConstant(RotateAmt, DL, MVT::i8));
}
// Else, fall-back on VPROLV/VPRORV.
@@ -23862,20 +24949,17 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
+ // XOP implicitly uses modulo rotation amounts.
if (Subtarget.hasXOP()) {
- // Split 256-bit integers.
if (VT.is256BitVector())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
// Attempt to rotate by immediate.
- if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
- if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
- uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
- assert(RotateAmt < EltSizeInBits && "Rotation out of range");
- return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
- DAG.getConstant(RotateAmt, DL, MVT::i8));
- }
+ if (0 <= CstSplatIndex) {
+ uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
+ return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
+ DAG.getConstant(RotateAmt, DL, MVT::i8));
}
// Use general rotate by variable (per-element).
@@ -23884,7 +24968,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Split 256-bit integers on pre-AVX2 targets.
if (VT.is256BitVector() && !Subtarget.hasAVX2())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
@@ -23892,44 +24976,19 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
"Only vXi32/vXi16/vXi8 vector rotates supported");
// Rotate by an uniform constant - expand back to shifts.
- // TODO - legalizers should be able to handle this.
- if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
- if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
- uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
- assert(RotateAmt < EltSizeInBits && "Rotation out of range");
- if (RotateAmt == 0)
- return R;
-
- SDValue AmtR = DAG.getConstant(EltSizeInBits - RotateAmt, DL, VT);
- SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
- SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
- return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
- }
- }
+ if (0 <= CstSplatIndex)
+ return SDValue();
- // Rotate by splat - expand back to shifts.
- // TODO - legalizers should be able to handle this.
- if ((EltSizeInBits >= 16 || Subtarget.hasBWI()) &&
- IsSplatValue(VT, Amt, DL, DAG, Subtarget, Opcode)) {
- SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
- AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
- SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
- SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
- return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
- }
+ bool IsSplatAmt = DAG.isSplatValue(Amt);
// v16i8/v32i8: Split rotation into rot4/rot2/rot1 stages and select by
// the amount bit.
- if (EltSizeInBits == 8) {
- if (Subtarget.hasBWI()) {
- SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
- AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
- SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
- SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
- return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
- }
+ if (EltSizeInBits == 8 && !IsSplatAmt) {
+ if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()))
+ return SDValue();
- MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
+ // We don't need ModuloAmt here as we just peek at individual bits.
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (Subtarget.hasSSE41()) {
@@ -23943,7 +25002,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// On pre-SSE41 targets we test for the sign bit by comparing to
// zero - a negative value will set all bits of the lanes to true
// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
- SDValue Z = getZeroVector(SelVT, Subtarget, DAG, DL);
+ SDValue Z = DAG.getConstant(0, DL, SelVT);
SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel);
return DAG.getSelect(DL, SelVT, C, V0, V1);
};
@@ -23984,14 +25043,17 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return SignBitSelect(VT, Amt, M, R);
}
+ // ISD::ROT* uses modulo rotate amounts.
+ Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
+ DAG.getConstant(EltSizeInBits - 1, DL, VT));
+
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
SupportedVectorVarShift(VT, Subtarget, ISD::SRL);
- // Best to fallback for all supported variable shifts.
- // AVX2 - best to fallback for non-constants as well.
- // TODO - legalizers should be able to handle this.
- if (LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) {
+ // Fallback for splats + all supported variable shifts.
+ // Fallback for non-constants AVX2 vXi16 as well.
+ if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) {
SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
@@ -24032,78 +25094,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7}));
}
-static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
- // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
- // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
- // looks for this combo and may remove the "setcc" instruction if the "setcc"
- // has only one use.
- SDNode *N = Op.getNode();
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- unsigned BaseOp = 0;
- X86::CondCode Cond;
- SDLoc DL(Op);
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Unknown ovf instruction!");
- case ISD::SADDO:
- // A subtract of one will be selected as a INC. Note that INC doesn't
- // set CF, so we can't do this for UADDO.
- if (isOneConstant(RHS)) {
- BaseOp = X86ISD::INC;
- Cond = X86::COND_O;
- break;
- }
- BaseOp = X86ISD::ADD;
- Cond = X86::COND_O;
- break;
- case ISD::UADDO:
- BaseOp = X86ISD::ADD;
- Cond = X86::COND_B;
- break;
- case ISD::SSUBO:
- // A subtract of one will be selected as a DEC. Note that DEC doesn't
- // set CF, so we can't do this for USUBO.
- if (isOneConstant(RHS)) {
- BaseOp = X86ISD::DEC;
- Cond = X86::COND_O;
- break;
- }
- BaseOp = X86ISD::SUB;
- Cond = X86::COND_O;
- break;
- case ISD::USUBO:
- BaseOp = X86ISD::SUB;
- Cond = X86::COND_B;
- break;
- case ISD::SMULO:
- BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
- Cond = X86::COND_O;
- break;
- case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
- if (N->getValueType(0) == MVT::i8) {
- BaseOp = X86ISD::UMUL8;
- Cond = X86::COND_O;
- break;
- }
- SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
- MVT::i32);
- SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
-
- SDValue SetCC = getSETCC(X86::COND_O, SDValue(Sum.getNode(), 2), DL, DAG);
-
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
- }
- }
-
- // Also sets EFLAGS.
- SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
- SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
-
- SDValue SetCC = getSETCC(Cond, SDValue(Sum.getNode(), 1), DL, DAG);
-
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
-}
-
/// Returns true if the operand type is exactly twice the native width, and
/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
@@ -24246,7 +25236,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
+ SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
DAG.getTargetConstant(1, dl, MVT::i8), // Scale
@@ -24256,7 +25246,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
Zero,
Chain
};
- SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
+ SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, dl, MVT::Other, Ops);
return SDValue(Res, 0);
}
@@ -24369,40 +25359,32 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
SrcVT == MVT::i64) {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- if (DstVT != MVT::f64)
+ if (DstVT != MVT::f64 && DstVT != MVT::i64 &&
+ !(DstVT == MVT::x86mmx && SrcVT.isVector()))
// This conversion needs to be expanded.
return SDValue();
- SmallVector<SDValue, 16> Elts;
SDLoc dl(Op);
- unsigned NumElts;
- MVT SVT;
if (SrcVT.isVector()) {
- NumElts = SrcVT.getVectorNumElements();
- SVT = SrcVT.getVectorElementType();
-
// Widen the vector in input in the case of MVT::v2i32.
// Example: from MVT::v2i32 to MVT::v4i32.
- for (unsigned i = 0, e = NumElts; i != e; ++i)
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Src,
- DAG.getIntPtrConstant(i, dl)));
+ MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
+ SrcVT.getVectorNumElements() * 2);
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
+ DAG.getUNDEF(SrcVT));
} else {
assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
"Unexpected source type in LowerBITCAST");
- Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
- DAG.getIntPtrConstant(0, dl)));
- Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
- DAG.getIntPtrConstant(1, dl)));
- NumElts = 2;
- SVT = MVT::i32;
- }
- // Explicitly mark the extra elements as Undef.
- Elts.append(NumElts, DAG.getUNDEF(SVT));
-
- EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
- SDValue BV = DAG.getBuildVector(NewVT, dl, Elts);
- SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
+ Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
+ }
+
+ MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
+ Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src);
+
+ if (DstVT == MVT::x86mmx)
+ return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src,
DAG.getIntPtrConstant(0, dl));
}
@@ -24445,7 +25427,7 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
// PSADBW instruction horizontally add all bytes and leave the result in i64
// chunks, thus directly computes the pop count for v2i64 and v4i64.
if (EltVT == MVT::i64) {
- SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+ SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT);
MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
return DAG.getBitcast(VT, V);
@@ -24457,13 +25439,13 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
// this is that it lines up the results of two PSADBW instructions to be
// two v2i64 vectors which concatenated are the 4 population counts. We can
// then use PACKUSWB to shrink and concatenate them into a v4i32 again.
- SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
+ SDValue Zeros = DAG.getConstant(0, DL, VT);
SDValue V32 = DAG.getBitcast(VT, V);
- SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros);
- SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros);
+ SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros);
+ SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros);
// Do the horizontal sums into two v2i64s.
- Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+ Zeros = DAG.getConstant(0, DL, ByteVecVT);
MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
DAG.getBitcast(ByteVecVT, Low), Zeros);
@@ -24498,7 +25480,9 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
- unsigned VecSize = VT.getSizeInBits();
+ int NumElts = VT.getVectorNumElements();
+ (void)EltVT;
+ assert(EltVT == MVT::i8 && "Only vXi8 vector CTPOP lowering supported.");
// Implement a lookup table in register by using an algorithm based on:
// http://wm.ite.pl/articles/sse-popcount.html
@@ -24510,109 +25494,30 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
// masked out higher ones) for each byte. PSHUFB is used separately with both
// to index the in-register table. Next, both are added and the result is a
// i8 vector where each element contains the pop count for input byte.
- //
- // To obtain the pop count for elements != i8, we follow up with the same
- // approach and use additional tricks as described below.
- //
const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
/* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
/* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
/* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4};
- int NumByteElts = VecSize / 8;
- MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
- SDValue In = DAG.getBitcast(ByteVecVT, Op);
SmallVector<SDValue, 64> LUTVec;
- for (int i = 0; i < NumByteElts; ++i)
+ for (int i = 0; i < NumElts; ++i)
LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
- SDValue InRegLUT = DAG.getBuildVector(ByteVecVT, DL, LUTVec);
- SDValue M0F = DAG.getConstant(0x0F, DL, ByteVecVT);
+ SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec);
+ SDValue M0F = DAG.getConstant(0x0F, DL, VT);
// High nibbles
- SDValue FourV = DAG.getConstant(4, DL, ByteVecVT);
- SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV);
+ SDValue FourV = DAG.getConstant(4, DL, VT);
+ SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV);
// Low nibbles
- SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F);
+ SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F);
// The input vector is used as the shuffle mask that index elements into the
// LUT. After counting low and high nibbles, add the vector to obtain the
// final pop count per i8 element.
- SDValue HighPopCnt =
- DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles);
- SDValue LowPopCnt =
- DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles);
- SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt);
-
- if (EltVT == MVT::i8)
- return PopCnt;
-
- return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG);
-}
-
-static SDValue LowerVectorCTPOPBitmath(SDValue Op, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- assert(VT.is128BitVector() &&
- "Only 128-bit vector bitmath lowering supported.");
-
- int VecSize = VT.getSizeInBits();
- MVT EltVT = VT.getVectorElementType();
- int Len = EltVT.getSizeInBits();
-
- // This is the vectorized version of the "best" algorithm from
- // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
- // with a minor tweak to use a series of adds + shifts instead of vector
- // multiplications. Implemented for all integer vector types. We only use
- // this when we don't have SSSE3 which allows a LUT-based lowering that is
- // much faster, even faster than using native popcnt instructions.
-
- auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) {
- MVT VT = V.getSimpleValueType();
- SDValue ShifterV = DAG.getConstant(Shifter, DL, VT);
- return DAG.getNode(OpCode, DL, VT, V, ShifterV);
- };
- auto GetMask = [&](SDValue V, APInt Mask) {
- MVT VT = V.getSimpleValueType();
- SDValue MaskV = DAG.getConstant(Mask, DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, V, MaskV);
- };
-
- // We don't want to incur the implicit masks required to SRL vNi8 vectors on
- // x86, so set the SRL type to have elements at least i16 wide. This is
- // correct because all of our SRLs are followed immediately by a mask anyways
- // that handles any bits that sneak into the high bits of the byte elements.
- MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16);
-
- SDValue V = Op;
-
- // v = v - ((v >> 1) & 0x55555555...)
- SDValue Srl =
- DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1));
- SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55)));
- V = DAG.getNode(ISD::SUB, DL, VT, V, And);
-
- // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
- SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33)));
- Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2));
- SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33)));
- V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS);
-
- // v = (v + (v >> 4)) & 0x0F0F0F0F...
- Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4));
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl);
- V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F)));
-
- // At this point, V contains the byte-wise population count, and we are
- // merely doing a horizontal sum if necessary to get the wider element
- // counts.
- if (EltVT == MVT::i8)
- return V;
-
- return LowerHorizontalByteSum(
- DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget,
- DAG);
+ SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles);
+ SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles);
+ return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt);
}
// Please ensure that any codegen change from LowerVectorCTPOP is reflected in
@@ -24638,12 +25543,6 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
}
}
- if (!Subtarget.hasSSSE3()) {
- // We can't use the fast LUT approach, so fall back on vectorized bitmath.
- assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
- return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
- }
-
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntUnary(Op, DAG);
@@ -24652,6 +25551,18 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is512BitVector() && !Subtarget.hasBWI())
return Lower512IntUnary(Op, DAG);
+ // For element types greater than i8, do vXi8 pop counts and a bytesum.
+ if (VT.getScalarType() != MVT::i8) {
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ SDValue ByteOp = DAG.getBitcast(ByteVT, Op0);
+ SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp);
+ return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG);
+ }
+
+ // We can't use the fast LUT approach, so fall back on LegalizeDAG.
+ if (!Subtarget.hasSSSE3())
+ return SDValue();
+
return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
}
@@ -24759,8 +25670,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
}
static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- bool AllowIncDec = true) {
+ const X86Subtarget &Subtarget) {
unsigned NewOpc = 0;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
@@ -24784,25 +25694,6 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
- if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
- // Convert to inc/dec if they aren't slow or we are optimizing for size.
- if (AllowIncDec && (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- if ((NewOpc == X86ISD::LADD && C->isOne()) ||
- (NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
- return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
- DAG.getVTList(MVT::i32, MVT::Other),
- {N->getOperand(0), N->getOperand(1)},
- /*MemVT=*/N->getSimpleValueType(0), MMO);
- if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
- (NewOpc == X86ISD::LADD && C->isAllOnesValue()))
- return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
- DAG.getVTList(MVT::i32, MVT::Other),
- {N->getOperand(0), N->getOperand(1)},
- /*MemVT=*/N->getSimpleValueType(0), MMO);
- }
- }
-
return DAG.getMemIntrinsicNode(
NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
@@ -25120,8 +26011,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
// VLX the vector should be widened to 512 bit
unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
- SDValue Src0 = N->getSrc0();
- Src0 = ExtendToType(Src0, WideDataVT, DAG);
+ SDValue PassThru = ExtendToType(N->getPassThru(), WideDataVT, DAG);
// Mask element has to be i1.
assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
@@ -25131,7 +26021,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
- N->getBasePtr(), Mask, Src0,
+ N->getBasePtr(), Mask, PassThru,
N->getMemoryVT(), N->getMemOperand(),
N->getExtensionType(),
N->isExpandingLoad());
@@ -25194,7 +26084,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
MVT VT = Op.getSimpleValueType();
SDValue Index = N->getIndex();
SDValue Mask = N->getMask();
- SDValue Src0 = N->getValue();
+ SDValue PassThru = N->getPassThru();
MVT IndexVT = Index.getSimpleValueType();
MVT MaskVT = Mask.getSimpleValueType();
@@ -25219,12 +26109,12 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts);
MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
- Src0 = ExtendToType(Src0, VT, DAG);
+ PassThru = ExtendToType(PassThru, VT, DAG);
Index = ExtendToType(Index, IndexVT, DAG);
Mask = ExtendToType(Mask, MaskVT, DAG, true);
}
- SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index,
+ SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
N->getScale() };
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(),
@@ -25308,6 +26198,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
+ case ISD::FSHL:
+ case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
@@ -25322,6 +26214,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
+ case ISD::FADD:
+ case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -25354,12 +26248,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG);
case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG);
+ case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, Subtarget, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::MULHS:
case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG);
- case ISD::UMUL_LOHI:
- case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
case ISD::ROTL:
case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG);
case ISD::SRA:
@@ -25376,12 +26268,16 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
- case ISD::SUB: return LowerADD_SUB(Op, DAG);
+ case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget);
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, DAG);
- case ISD::ABS: return LowerABS(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
@@ -25421,32 +26317,70 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
SDLoc dl(N);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::MUL: {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "Unexpected VT");
+ if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger &&
+ VT.getVectorNumElements() == 2) {
+ // Promote to a pattern that will be turned into PMULUDQ.
+ SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
+ N->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
+ N->getOperand(1));
+ SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul));
+ } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ VT.getVectorElementType() == MVT::i8) {
+ // Pre-promote these to vXi16 to avoid op legalization thinking all 16
+ // elements are needed.
+ MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
+ SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ unsigned NumConcats = 16 / VT.getVectorNumElements();
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps);
+ Results.push_back(Res);
+ }
+ return;
+ }
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ case X86ISD::VPMADDWD:
case X86ISD::AVG: {
- // Legalize types for X86ISD::AVG by expanding vectors.
+ // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
+ // X86ISD::AVG/VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- auto InVT = N->getValueType(0);
- assert(InVT.getSizeInBits() < 128);
- assert(128 % InVT.getSizeInBits() == 0);
+ EVT VT = N->getValueType(0);
+ EVT InVT = N->getOperand(0).getValueType();
+ assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 &&
+ "Expected a VT that divides into 128 bits.");
unsigned NumConcat = 128 / InVT.getSizeInBits();
- EVT RegVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(),
- NumConcat * InVT.getVectorNumElements());
+ EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(),
+ NumConcat * InVT.getVectorNumElements());
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ NumConcat * VT.getVectorNumElements());
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = N->getOperand(0);
- SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+ SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
Ops[0] = N->getOperand(1);
- SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+ SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
- SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
- if (getTypeAction(*DAG.getContext(), InVT) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
@@ -25456,7 +26390,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// setCC result type is v2i1 because type legalzation will end up with
// a v4i1 setcc plus an extend.
assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type");
- if (N->getOperand(0).getValueType() != MVT::v2f32)
+ if (N->getOperand(0).getValueType() != MVT::v2f32 ||
+ getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector)
return;
SDValue UNDEF = DAG.getUNDEF(MVT::v2f32);
SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
@@ -25465,9 +26400,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
N->getOperand(1), UNDEF);
SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS,
N->getOperand(2));
- if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+ DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
}
@@ -25489,13 +26423,198 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
- case ISD::UREM:
+ case ISD::UREM: {
+ EVT VT = N->getValueType(0);
+ if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) {
+ // If this RHS is a constant splat vector we can widen this and let
+ // division/remainder by constant optimize it.
+ // TODO: Can we do something for non-splat?
+ APInt SplatVal;
+ if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) {
+ unsigned NumConcats = 128 / VT.getSizeInBits();
+ SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT));
+ Ops0[0] = N->getOperand(0);
+ EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0);
+ SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT);
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1);
+ Results.push_back(Res);
+ }
+ return;
+ }
+
+ if (VT == MVT::v2i32) {
+ // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the
+ // v2i64 and unroll later. But then we create i64 scalar ops which
+ // might be slow in 64-bit mode or require a libcall in 32-bit mode.
+ Results.push_back(DAG.UnrollVectorOp(N));
+ return;
+ }
+
+ if (VT.isVector())
+ return;
+
+ LLVM_FALLTHROUGH;
+ }
case ISD::SDIVREM:
case ISD::UDIVREM: {
SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
Results.push_back(V);
return;
}
+ case ISD::TRUNCATE: {
+ MVT VT = N->getSimpleValueType(0);
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
+
+ // The generic legalizer will try to widen the input type to the same
+ // number of elements as the widened result type. But this isn't always
+ // the best thing so do some custom legalization to avoid some cases.
+ MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+
+ unsigned InBits = InVT.getSizeInBits();
+ if (128 % InBits == 0) {
+ // 128 bit and smaller inputs should avoid truncate all together and
+ // just use a build_vector that will become a shuffle.
+ // TODO: Widen and use a shuffle directly?
+ MVT InEltVT = InVT.getSimpleVT().getVectorElementType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = VT.getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In,
+ DAG.getIntPtrConstant(i, dl));
+ Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val);
+ }
+ Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops));
+ return;
+ }
+ // With AVX512 there are some cases that can use a target specific
+ // truncate node to go from 256/512 to less than 128 with zeros in the
+ // upper elements of the 128 bit result.
+ if (Subtarget.hasAVX512() && isTypeLegal(InVT)) {
+ // We can use VTRUNC directly if for 256 bits with VLX or for any 512.
+ if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) {
+ Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In));
+ return;
+ }
+ // There's one case we can widen to 512 bits and use VTRUNC.
+ if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) {
+ In = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i64, In,
+ DAG.getUNDEF(MVT::v4i64));
+ Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In));
+ return;
+ }
+ }
+ return;
+ }
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (ExperimentalVectorWideningLegalization)
+ return;
+
+ EVT VT = N->getValueType(0);
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+ if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
+ (InVT == MVT::v16i16 || InVT == MVT::v32i8)) {
+ // Custom split this so we can extend i8/i16->i32 invec. This is better
+ // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
+ // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
+ // we allow the sra from the extend to i32 to be shared by the split.
+ EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(),
+ InVT.getVectorNumElements() / 2);
+ MVT ExtendVT = MVT::getVectorVT(MVT::i32,
+ VT.getVectorNumElements());
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT,
+ In, DAG.getIntPtrConstant(0, dl));
+ In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In);
+
+ // Fill a vector with sign bits for each element.
+ SDValue Zero = DAG.getConstant(0, dl, ExtendVT);
+ SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Create an unpackl and unpackh to interleave the sign bits then bitcast
+ // to vXi64.
+ SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Results.push_back(Res);
+ return;
+ }
+ return;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ if (!ExperimentalVectorWideningLegalization)
+ return;
+
+ EVT VT = N->getValueType(0);
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+ if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
+ (InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
+ // Custom split this so we can extend i8/i16->i32 invec. This is better
+ // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
+ // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
+ // we allow the sra from the extend to i32 to be shared by the split.
+ In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);
+
+ // Fill a vector with sign bits for each element.
+ SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
+ SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT);
+
+ // Create an unpackl and unpackh to interleave the sign bits then bitcast
+ // to v2i64.
+ SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
+ {0, 4, 1, 5});
+ Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
+ SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
+ {2, 6, 3, 7});
+ Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Results.push_back(Res);
+ return;
+ }
+
+ if ((VT == MVT::v16i32 || VT == MVT::v8i64) && InVT.is128BitVector()) {
+ // Perform custom splitting instead of the two stage extend we would get
+ // by default.
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ assert(isTypeLegal(LoVT) && "Split VT not legal?");
+
+ bool IsSigned = N->getOpcode() == ISD::SIGN_EXTEND;
+
+ SDValue Lo = getExtendInVec(IsSigned, dl, LoVT, In, DAG);
+
+ // We need to shift the input over by half the number of elements.
+ unsigned NumElts = InVT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef);
+ for (unsigned i = 0; i != HalfNumElts; ++i)
+ ShufMask[i] = i + HalfNumElts;
+
+ SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
+ Hi = getExtendInVec(IsSigned, dl, HiVT, Hi, DAG);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Results.push_back(Res);
+ }
+ return;
+ }
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
@@ -25503,38 +26622,90 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
+ // Promote these manually to avoid over promotion to v2i64. Type
+ // legalization will revisit the v2i32 operation for more cleanup.
+ if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
+ getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) {
+ // AVX512DQ provides instructions that produce a v2i64 result.
+ if (Subtarget.hasDQI())
+ return;
+
+ SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
+ Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, MVT::v2i32, Res,
+ DAG.getValueType(VT.getVectorElementType()));
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ Results.push_back(Res);
+ return;
+ }
+
+ if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
+
+ // Try to create a 128 bit vector, but don't exceed a 32 bit element.
+ unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
+ MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth),
+ VT.getVectorNumElements());
+ SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
+
+ // Preserve what we know about the size of the original result. Except
+ // when the result is v2i32 since we can't widen the assert.
+ if (PromoteVT != MVT::v2i32)
+ Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, PromoteVT, Res,
+ DAG.getValueType(VT.getVectorElementType()));
+
+ // Truncate back to the original width.
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+
+ // Now widen to 128 bits.
+ unsigned NumConcats = 128 / VT.getSizeInBits();
+ MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
+ VT.getVectorNumElements() * NumConcats);
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
+ Results.push_back(Res);
+ return;
+ }
+
+
if (VT == MVT::v2i32) {
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
+ bool Widenv2i32 =
+ getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector;
if (Src.getValueType() == MVT::v2f64) {
- MVT ResVT = MVT::v4i32;
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
if (!IsSigned && !Subtarget.hasVLX()) {
- // Widen to 512-bits.
- ResVT = MVT::v8i32;
+ // If v2i32 is widened, we can defer to the generic legalizer.
+ if (Widenv2i32)
+ return;
+ // Custom widen by doubling to a legal vector with. Isel will
+ // further widen to v8f64.
Opc = ISD::FP_TO_UINT;
- Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
- DAG.getUNDEF(MVT::v8f64),
- Src, DAG.getIntPtrConstant(0, dl));
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64,
+ Src, DAG.getUNDEF(MVT::v2f64));
}
- SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
- bool WidenType = getTypeAction(*DAG.getContext(),
- MVT::v2i32) == TypeWidenVector;
- ResVT = WidenType ? MVT::v4i32 : MVT::v2i32;
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Res,
- DAG.getIntPtrConstant(0, dl));
+ SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
+ if (!Widenv2i32)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+ DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
}
- if (SrcVT == MVT::v2f32) {
+ if (SrcVT == MVT::v2f32 &&
+ getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
: ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
- if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
}
@@ -25610,7 +26781,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_ROUND: {
- if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
+ if (!isTypeLegal(N->getOperand(0).getValueType()))
return;
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
@@ -25780,29 +26951,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
if (SrcVT != MVT::f64 ||
- (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
+ (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) ||
+ getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector)
return;
unsigned NumElts = DstVT.getVectorNumElements();
EVT SVT = DstVT.getVectorElementType();
EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
- SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
- MVT::v2f64, N->getOperand(0));
- SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded);
-
- if (getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) {
- // If we are legalizing vectors by widening, we already have the desired
- // legal vector type, just return it.
- Results.push_back(ToVecInt);
- return;
- }
-
- SmallVector<SDValue, 8> Elts;
- for (unsigned i = 0, e = NumElts; i != e; ++i)
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
- ToVecInt, DAG.getIntPtrConstant(i, dl)));
-
- Results.push_back(DAG.getBuildVector(DstVT, dl, Elts));
+ SDValue Res;
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0));
+ Res = DAG.getBitcast(WiderVT, Res);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
return;
}
case ISD::MGATHER: {
@@ -25814,9 +26975,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
SDValue Mask = Gather->getMask();
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
- Gather->getValue(),
- DAG.getUNDEF(MVT::v2f32));
+ SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+ Gather->getPassThru(),
+ DAG.getUNDEF(MVT::v2f32));
if (!Subtarget.hasVLX()) {
// We need to widen the mask, but the instruction will only use 2
// of its elements. So we can use undef.
@@ -25824,8 +26985,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getUNDEF(MVT::v2i1));
Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
}
- SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
- Index, Gather->getScale() };
+ SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+ Gather->getBasePtr(), Index, Gather->getScale() };
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
Gather->getMemoryVT(), Gather->getMemOperand());
@@ -25838,9 +26999,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Index = Gather->getIndex();
SDValue Mask = Gather->getMask();
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
- Gather->getValue(),
- DAG.getUNDEF(MVT::v2i32));
+ SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
+ Gather->getPassThru(),
+ DAG.getUNDEF(MVT::v2i32));
// If the index is v2i64 we can use it directly.
if (Index.getValueType() == MVT::v2i64 &&
(Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
@@ -25851,8 +27012,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getUNDEF(MVT::v2i1));
Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
}
- SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
- Index, Gather->getScale() };
+ SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+ Gather->getBasePtr(), Index, Gather->getScale() };
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl,
Gather->getMemoryVT(), Gather->getMemOperand());
@@ -25864,28 +27025,56 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Chain);
return;
}
- EVT IndexVT = Index.getValueType();
- EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
- IndexVT.getScalarType(), 4);
- // Otherwise we need to custom widen everything to avoid promotion.
- Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
- DAG.getUNDEF(IndexVT));
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
- DAG.getConstant(0, dl, MVT::v2i1));
- SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
- Index, Gather->getScale() };
- SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
- Gather->getMemoryVT(), dl, Ops,
- Gather->getMemOperand());
- SDValue Chain = Res.getValue(1);
- if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- Results.push_back(Chain);
- return;
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
+ EVT IndexVT = Index.getValueType();
+ EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ IndexVT.getScalarType(), 4);
+ // Otherwise we need to custom widen everything to avoid promotion.
+ Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
+ DAG.getUNDEF(IndexVT));
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
+ DAG.getConstant(0, dl, MVT::v2i1));
+ SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+ Gather->getBasePtr(), Index, Gather->getScale() };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
+ Gather->getMemoryVT(), dl, Ops,
+ Gather->getMemOperand());
+ SDValue Chain = Res.getValue(1);
+ if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
+ }
}
- break;
+ return;
+ }
+ case ISD::LOAD: {
+ // Use an f64/i64 load and a scalar_to_vector for v2f32/v2i32 loads. This
+ // avoids scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
+ // cast since type legalization will try to use an i64 load.
+ MVT VT = N->getSimpleValueType(0);
+ assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT");
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
+ if (!ISD::isNON_EXTLoad(N))
+ return;
+ auto *Ld = cast<LoadSDNode>(N);
+ MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
+ SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(),
+ Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
+ SDValue Chain = Res.getValue(1);
+ MVT WideVT = MVT::getVectorVT(LdVT, 2);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
+ MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() * 2);
+ Res = DAG.getBitcast(CastVT, Res);
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
}
}
}
@@ -25943,9 +27132,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
- case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
- case X86ISD::ADDUS: return "X86ISD::ADDUS";
- case X86ISD::SUBUS: return "X86ISD::SUBUS";
+ case X86ISD::BLENDV: return "X86ISD::BLENDV";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
@@ -25988,15 +27175,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::LOR: return "X86ISD::LOR";
case X86ISD::LXOR: return "X86ISD::LXOR";
case X86ISD::LAND: return "X86ISD::LAND";
- case X86ISD::LINC: return "X86ISD::LINC";
- case X86ISD::LDEC: return "X86ISD::LDEC";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
- case X86ISD::VZEXT: return "X86ISD::VZEXT";
- case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
+ case X86ISD::VMTRUNC: return "X86ISD::VMTRUNC";
+ case X86ISD::VMTRUNCS: return "X86ISD::VMTRUNCS";
+ case X86ISD::VMTRUNCUS: return "X86ISD::VMTRUNCUS";
case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES";
case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
@@ -26005,6 +27191,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
+ case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
@@ -26029,16 +27216,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
- case X86ISD::SMUL8: return "X86ISD::SMUL8";
- case X86ISD::UMUL8: return "X86ISD::UMUL8";
- case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
- case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
- case X86ISD::INC: return "X86ISD::INC";
- case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::BEXTR: return "X86ISD::BEXTR";
+ case X86ISD::BZHI: return "X86ISD::BZHI";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
case X86ISD::PTEST: return "X86ISD::PTEST";
@@ -26136,7 +27318,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::XTEST: return "X86ISD::XTEST";
case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
case X86ISD::EXPAND: return "X86ISD::EXPAND";
- case X86ISD::SELECT: return "X86ISD::SELECT";
case X86ISD::SELECTS: return "X86ISD::SELECTS";
case X86ISD::ADDSUB: return "X86ISD::ADDSUB";
case X86ISD::RCP14: return "X86ISD::RCP14";
@@ -26162,16 +27343,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
- case X86ISD::ADDS: return "X86ISD::ADDS";
- case X86ISD::SUBS: return "X86ISD::SUBS";
case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI";
case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI";
+ case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI";
+ case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI";
case X86ISD::CVTTP2SI_RND: return "X86ISD::CVTTP2SI_RND";
case X86ISD::CVTTP2UI_RND: return "X86ISD::CVTTP2UI_RND";
+ case X86ISD::CVTTS2SI: return "X86ISD::CVTTS2SI";
+ case X86ISD::CVTTS2UI: return "X86ISD::CVTTS2UI";
case X86ISD::CVTTS2SI_RND: return "X86ISD::CVTTS2SI_RND";
case X86ISD::CVTTS2UI_RND: return "X86ISD::CVTTS2UI_RND";
case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P";
@@ -26182,12 +27365,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
+ case X86ISD::MCVTPS2PH: return "X86ISD::MCVTPS2PH";
case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND";
case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
+ case X86ISD::MCVTP2SI: return "X86ISD::MCVTP2SI";
+ case X86ISD::MCVTP2UI: return "X86ISD::MCVTP2UI";
case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND";
case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND";
+ case X86ISD::CVTS2SI: return "X86ISD::CVTS2SI";
+ case X86ISD::CVTS2UI: return "X86ISD::CVTS2UI";
case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
@@ -26321,6 +27509,10 @@ bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<32>(Imm);
}
+bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
+ return isInt<32>(Imm);
+}
+
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
@@ -26434,7 +27626,7 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
// If the subtarget is using retpolines, we need to not generate jump tables.
- if (Subtarget.useRetpoline())
+ if (Subtarget.useRetpolineIndirectBranches())
return false;
// Otherwise, fallback on the generic logic.
@@ -26633,8 +27825,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Memory Reference
assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 1> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
// Machine Information
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -26732,7 +27924,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.add(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
@@ -26757,7 +27949,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, 16)
.add(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// Zero-extend the offset
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
@@ -26785,7 +27977,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.addDisp(Disp, UseFPOffset ? 4 : 0)
.add(Segment)
.addReg(NextOffsetReg)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// Jump to endMBB
BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
@@ -26804,7 +27996,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, 8)
.add(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// If we need to align it, do so. Otherwise, just copy the address
// to OverflowDestReg.
@@ -26841,7 +28033,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.addDisp(Disp, 8)
.add(Segment)
.addReg(NextAddrReg)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
@@ -26981,19 +28173,17 @@ static bool isCMOVPseudo(MachineInstr &MI) {
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
- case X86::CMOV_V2F64:
- case X86::CMOV_V2I64:
- case X86::CMOV_V4F32:
- case X86::CMOV_V4F64:
- case X86::CMOV_V4I64:
- case X86::CMOV_V16F32:
- case X86::CMOV_V8F32:
- case X86::CMOV_V8F64:
- case X86::CMOV_V8I64:
- case X86::CMOV_V8I1:
- case X86::CMOV_V16I1:
- case X86::CMOV_V32I1:
- case X86::CMOV_V64I1:
+ case X86::CMOV_VR128:
+ case X86::CMOV_VR128X:
+ case X86::CMOV_VR256:
+ case X86::CMOV_VR256X:
+ case X86::CMOV_VR512:
+ case X86::CMOV_VK2:
+ case X86::CMOV_VK4:
+ case X86::CMOV_VK8:
+ case X86::CMOV_VK16:
+ case X86::CMOV_VK32:
+ case X86::CMOV_VK64:
return true;
default:
@@ -27815,8 +29005,8 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineInstrBuilder MIB;
// Memory Reference.
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
// Initialize a register with zero.
MVT PVT = getPointerTy(MF->getDataLayout());
@@ -27845,7 +29035,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MIB.add(MI.getOperand(MemOpndSlot + i));
}
MIB.addReg(SSPCopyReg);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
}
MachineBasicBlock *
@@ -27861,8 +29051,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineFunction::iterator I = ++MBB->getIterator();
// Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
unsigned DstReg;
unsigned MemOpndSlot = 0;
@@ -27956,7 +29146,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MIB.addReg(LabelReg);
else
MIB.addMBB(restoreMBB);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
emitSetJmpShadowStackFix(MI, thisMBB);
@@ -28017,8 +29207,8 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
MVT PVT = getPointerTy(MF->getDataLayout());
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
@@ -28100,12 +29290,16 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineInstrBuilder MIB =
BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (i == X86::AddrDisp)
- MIB.addDisp(MI.getOperand(i), SPPOffset);
+ MIB.addDisp(MO, SPPOffset);
+ else if (MO.isReg()) // Don't add the whole operand, we don't want to
+ // preserve kill flags.
+ MIB.addReg(MO.getReg());
else
- MIB.add(MI.getOperand(i));
+ MIB.add(MO);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
// Subtract the current SSP from the previous SSP.
unsigned SspSubReg = MRI.createVirtualRegister(PtrRC);
@@ -28189,8 +29383,8 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
@@ -28221,19 +29415,29 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
// Reload FP
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.add(MI.getOperand(i));
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) // Don't add the whole operand, we don't want to
+ // preserve kill flags.
+ MIB.addReg(MO.getReg());
+ else
+ MIB.add(MO);
+ }
+ MIB.setMemRefs(MMOs);
// Reload IP
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (i == X86::AddrDisp)
- MIB.addDisp(MI.getOperand(i), LabelOffset);
+ MIB.addDisp(MO, LabelOffset);
+ else if (MO.isReg()) // Don't add the whole operand, we don't want to
+ // preserve kill flags.
+ MIB.addReg(MO.getReg());
else
- MIB.add(MI.getOperand(i));
+ MIB.add(MO);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
// Reload SP
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP);
@@ -28241,9 +29445,10 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPOffset);
else
- MIB.add(MI.getOperand(i));
+ MIB.add(MI.getOperand(i)); // We can preserve the kill flags here, it's
+ // the last instruction of the expansion.
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
// Jump
BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
@@ -28562,26 +29767,23 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_FR32:
case X86::CMOV_FR64:
- case X86::CMOV_F128:
case X86::CMOV_GR8:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
- case X86::CMOV_V2F64:
- case X86::CMOV_V2I64:
- case X86::CMOV_V4F32:
- case X86::CMOV_V4F64:
- case X86::CMOV_V4I64:
- case X86::CMOV_V16F32:
- case X86::CMOV_V8F32:
- case X86::CMOV_V8F64:
- case X86::CMOV_V8I64:
- case X86::CMOV_V8I1:
- case X86::CMOV_V16I1:
- case X86::CMOV_V32I1:
- case X86::CMOV_V64I1:
+ case X86::CMOV_VR128:
+ case X86::CMOV_VR128X:
+ case X86::CMOV_VR256:
+ case X86::CMOV_VR256X:
+ case X86::CMOV_VR512:
+ case X86::CMOV_VK2:
+ case X86::CMOV_VK4:
+ case X86::CMOV_VK8:
+ case X86::CMOV_VK16:
+ case X86::CMOV_VK32:
+ case X86::CMOV_VK64:
return EmitLoweredSelect(MI, BB);
case X86::RDFLAGS32:
@@ -28890,11 +30092,12 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
EVT SrcVT = Src.getValueType();
APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
Op.getConstantOperandVal(1));
- DAG.computeKnownBits(Src, Known, DemandedElt, Depth + 1);
+ Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1);
Known = Known.zextOrTrunc(BitWidth);
Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits());
break;
}
+ case X86ISD::VSRAI:
case X86ISD::VSHLI:
case X86ISD::VSRLI: {
if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -28903,72 +30106,62 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
}
- DAG.computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned ShAmt = ShiftImm->getZExtValue();
if (Opc == X86ISD::VSHLI) {
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// Low bits are known zero.
Known.Zero.setLowBits(ShAmt);
- } else {
+ } else if (Opc == X86ISD::VSRLI) {
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// High bits are known zero.
Known.Zero.setHighBits(ShAmt);
+ } else {
+ Known.Zero.ashrInPlace(ShAmt);
+ Known.One.ashrInPlace(ShAmt);
}
}
break;
}
case X86ISD::PACKUS: {
// PACKUS is just a truncation if the upper half is zero.
- // TODO: Add DemandedElts support.
+ APInt DemandedLHS, DemandedRHS;
+ getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
+
+ Known.One = APInt::getAllOnesValue(BitWidth * 2);
+ Known.Zero = APInt::getAllOnesValue(BitWidth * 2);
+
KnownBits Known2;
- DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
- DAG.computeKnownBits(Op.getOperand(1), Known2, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ if (!!DemandedLHS) {
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ if (!!DemandedRHS) {
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+
if (Known.countMinLeadingZeros() < BitWidth)
Known.resetAll();
Known = Known.trunc(BitWidth);
break;
}
- case X86ISD::VZEXT: {
- // TODO: Add DemandedElts support.
- SDValue N0 = Op.getOperand(0);
- unsigned NumElts = VT.getVectorNumElements();
-
- EVT SrcVT = N0.getValueType();
- unsigned InNumElts = SrcVT.getVectorNumElements();
- unsigned InBitWidth = SrcVT.getScalarSizeInBits();
- assert(InNumElts >= NumElts && "Illegal VZEXT input");
-
- Known = KnownBits(InBitWidth);
- APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
- DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1);
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InBitWidth);
- break;
- }
case X86ISD::CMOV: {
- DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1);
+ Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
- KnownBits Known2;
- DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1);
+ KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
}
- case X86ISD::UDIVREM8_ZEXT_HREG:
- // TODO: Support more than just the zero extended bits?
- if (Op.getResNo() != 1)
- break;
- // The remainder is zero extended.
- Known.Zero.setBitsFrom(8);
- break;
}
// Handle target shuffles.
@@ -29013,8 +30206,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) {
if (!DemandedOps[i])
continue;
- KnownBits Known2;
- DAG.computeKnownBits(Ops[i], Known2, DemandedOps[i], Depth + 1);
+ KnownBits Known2 =
+ DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -29033,14 +30226,6 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
return VTBits;
- case X86ISD::VSEXT: {
- // TODO: Add DemandedElts support.
- SDValue Src = Op.getOperand(0);
- unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
- Tmp += VTBits - Src.getScalarValueSizeInBits();
- return Tmp;
- }
-
case X86ISD::VTRUNC: {
// TODO: Add DemandedElts support.
SDValue Src = Op.getOperand(0);
@@ -29054,10 +30239,16 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
case X86ISD::PACKSS: {
// PACKSS is just a truncation if the sign bits extend to the packed size.
- // TODO: Add DemandedElts support.
+ APInt DemandedLHS, DemandedRHS;
+ getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS,
+ DemandedRHS);
+
unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits();
- unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
- unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ unsigned Tmp0 = SrcBits, Tmp1 = SrcBits;
+ if (!!DemandedLHS)
+ Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ if (!!DemandedRHS)
+ Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
unsigned Tmp = std::min(Tmp0, Tmp1);
if (Tmp > (SrcBits - VTBits))
return Tmp - (SrcBits - VTBits);
@@ -29099,12 +30290,6 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
return std::min(Tmp0, Tmp1);
}
- case X86ISD::SDIVREM8_SEXT_HREG:
- // TODO: Support more than just the sign extended bits?
- if (Op.getResNo() != 1)
- break;
- // The remainder is sign extended.
- return VTBits - 7;
}
// Fallback case.
@@ -29117,21 +30302,6 @@ SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
return N;
}
-/// Returns true (and the GlobalValue and the offset) if the node is a
-/// GlobalAddress + offset.
-bool X86TargetLowering::isGAPlusOffset(SDNode *N,
- const GlobalValue* &GA,
- int64_t &Offset) const {
- if (N->getOpcode() == X86ISD::Wrapper) {
- if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
- GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
- Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
- return true;
- }
- }
- return TargetLowering::isGAPlusOffset(N, GA, Offset);
-}
-
// Attempt to match a combined shuffle mask against supported unary shuffle
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
@@ -29170,10 +30340,12 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
MVT::getIntegerVT(MaskEltSize);
SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
- if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) {
+ if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
- Shuffle = unsigned(X86ISD::VZEXT);
- } else
+
+ if (SrcVT.getVectorNumElements() == NumDstElts)
+ Shuffle = unsigned(ISD::ZERO_EXTEND);
+ else
Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
@@ -29430,9 +30602,10 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- // Attempt to match against either a unary or binary PACKSS/PACKUS shuffle.
- // TODO add support for 256/512-bit types.
- if ((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) {
+ // Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
+ if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) ||
+ ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) ||
+ ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
Subtarget)) {
DstVT = MaskVT;
@@ -29622,7 +30795,8 @@ static bool matchBinaryPermuteVectorShuffle(
/// instruction but should only be used to replace chains over a certain depth.
static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
ArrayRef<int> BaseMask, int Depth,
- bool HasVariableMask, SelectionDAG &DAG,
+ bool HasVariableMask,
+ bool AllowVariableMask, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
assert((Inputs.size() == 1 || Inputs.size() == 2) &&
@@ -29835,7 +31009,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Depth threshold above which we can efficiently use variable mask shuffles.
int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
- bool AllowVariableMask = (Depth >= VariableShuffleDepth) || HasVariableMask;
+ AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
bool MaskContainsZeros =
any_of(Mask, [](int M) { return M == SM_SentinelZero; });
@@ -30169,7 +31343,8 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
static SDValue combineX86ShufflesRecursively(
ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
- bool HasVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+ bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
const unsigned MaxRecursionDepth = 8;
@@ -30195,30 +31370,36 @@ static SDValue combineX86ShufflesRecursively(
if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
return SDValue();
- assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
+ // TODO - Add support for more than 2 inputs.
+ if (2 < OpInputs.size())
+ return SDValue();
+
SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());
// Add the inputs to the Ops list, avoiding duplicates.
SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
- int InputIdx0 = -1, InputIdx1 = -1;
- for (int i = 0, e = Ops.size(); i < e; ++i) {
- SDValue BC = peekThroughBitcasts(Ops[i]);
- if (Input0 && BC == peekThroughBitcasts(Input0))
- InputIdx0 = i;
- if (Input1 && BC == peekThroughBitcasts(Input1))
- InputIdx1 = i;
- }
+ auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
+ if (!Input)
+ return -1;
+ // Attempt to find an existing match.
+ SDValue InputBC = peekThroughBitcasts(Input);
+ for (int i = 0, e = Ops.size(); i < e; ++i)
+ if (InputBC == peekThroughBitcasts(Ops[i]))
+ return i;
+ // Match failed - should we replace an existing Op?
+ if (InsertionPoint >= 0) {
+ Ops[InsertionPoint] = Input;
+ return InsertionPoint;
+ }
+ // Add to the end of the Ops list.
+ Ops.push_back(Input);
+ return Ops.size() - 1;
+ };
- if (Input0 && InputIdx0 < 0) {
- InputIdx0 = SrcOpIndex;
- Ops[SrcOpIndex] = Input0;
- }
- if (Input1 && InputIdx1 < 0) {
- InputIdx1 = Ops.size();
- Ops.push_back(Input1);
- }
+ int InputIdx0 = AddOp(Input0, SrcOpIndex);
+ int InputIdx1 = AddOp(Input1, -1);
assert(((RootMask.size() > OpMask.size() &&
RootMask.size() % OpMask.size() == 0) ||
@@ -30324,18 +31505,23 @@ static SDValue combineX86ShufflesRecursively(
CombinedNodes.push_back(Op.getNode());
// See if we can recurse into each shuffle source op (if it's a target
- // shuffle). The source op should only be combined if it either has a
- // single use (i.e. current Op) or all its users have already been combined.
+ // shuffle). The source op should only be generally combined if it either has
+ // a single use (i.e. current Op) or all its users have already been combined,
+ // if not then we can still combine but should prevent generation of variable
+ // shuffles to avoid constant pool bloat.
// Don't recurse if we already have more source ops than we can combine in
// the remaining recursion depth.
if (Ops.size() < (MaxRecursionDepth - Depth)) {
- for (int i = 0, e = Ops.size(); i < e; ++i)
+ for (int i = 0, e = Ops.size(); i < e; ++i) {
+ bool AllowVar = false;
if (Ops[i].getNode()->hasOneUse() ||
SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
- if (SDValue Res = combineX86ShufflesRecursively(
- Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
- DAG, Subtarget))
- return Res;
+ AllowVar = AllowVariableMask;
+ if (SDValue Res = combineX86ShufflesRecursively(
+ Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
+ AllowVar, DAG, Subtarget))
+ return Res;
+ }
}
// Attempt to constant fold all of the constant source ops.
@@ -30365,8 +31551,8 @@ static SDValue combineX86ShufflesRecursively(
}
// Finally, try to combine into a single shuffle instruction.
- return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
- Subtarget);
+ return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask,
+ AllowVariableMask, DAG, Subtarget);
}
/// Get the PSHUF-style mask from PSHUF node.
@@ -30545,74 +31731,6 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
return V;
}
-/// Search for a combinable shuffle across a chain ending in pshuflw or
-/// pshufhw.
-///
-/// We walk up the chain, skipping shuffles of the other half and looking
-/// through shuffles which switch halves trying to find a shuffle of the same
-/// pair of dwords.
-static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
- SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
- assert(
- (N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) &&
- "Called with something other than an x86 128-bit half shuffle!");
- SDLoc DL(N);
- unsigned CombineOpcode = N.getOpcode();
-
- // Walk up a single-use chain looking for a combinable shuffle.
- SDValue V = N.getOperand(0);
- for (; V.hasOneUse(); V = V.getOperand(0)) {
- switch (V.getOpcode()) {
- default:
- return false; // Nothing combined!
-
- case ISD::BITCAST:
- // Skip bitcasts as we always know the type for the target specific
- // instructions.
- continue;
-
- case X86ISD::PSHUFLW:
- case X86ISD::PSHUFHW:
- if (V.getOpcode() == CombineOpcode)
- break;
-
- // Other-half shuffles are no-ops.
- continue;
- }
- // Break out of the loop if we break out of the switch.
- break;
- }
-
- if (!V.hasOneUse())
- // We fell out of the loop without finding a viable combining instruction.
- return false;
-
- // Combine away the bottom node as its shuffle will be accumulated into
- // a preceding shuffle.
- DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
-
- // Record the old value.
- SDValue Old = V;
-
- // Merge this node's mask and our incoming mask (adjusted to account for all
- // the pshufd instructions encountered).
- SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
- for (int &M : Mask)
- M = VMask[M];
- V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
- getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
-
- // Check that the shuffles didn't cancel each other out. If not, we need to
- // combine to the new one.
- if (Old != V)
- // Replace the combinable shuffle with the combined one, updating all users
- // so that we re-evaluate the chain here.
- DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
-
- return true;
-}
-
/// Try to combine x86 target specific shuffles.
static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -30667,7 +31785,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
DemandedMask[i] = i;
if (SDValue Res = combineX86ShufflesRecursively(
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
}
@@ -30679,40 +31797,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
Mask = getPSHUFShuffleMask(N);
assert(Mask.size() == 4);
break;
- case X86ISD::UNPCKL: {
- // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
- // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
- // moves upper half elements into the lower half part. For example:
- //
- // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
- // undef:v16i8
- // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
- //
- // will be combined to:
- //
- // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1
-
- // This is only for 128-bit vectors. From SSE4.1 onward this combine may not
- // happen due to advanced instructions.
- if (!VT.is128BitVector())
- return SDValue();
-
- auto Op0 = N.getOperand(0);
- auto Op1 = N.getOperand(1);
- if (Op0.isUndef() && Op1.getOpcode() == ISD::VECTOR_SHUFFLE) {
- ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
-
- unsigned NumElts = VT.getVectorNumElements();
- SmallVector<int, 8> ExpectedMask(NumElts, -1);
- std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
- NumElts / 2);
-
- auto ShufOp = Op1.getOperand(0);
- if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
- return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
- }
- return SDValue();
- }
case X86ISD::MOVSD:
case X86ISD::MOVSS: {
SDValue N0 = N.getOperand(0);
@@ -30844,9 +31928,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
case X86ISD::PSHUFHW:
assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");
- if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
- return SDValue(); // We combined away this shuffle, so we're done.
-
// See if this reduces to a PSHUFD which is no more expensive and can
// combine with more operations. Note that it has to at least flip the
// dwords as otherwise it would have been removed as a no-op.
@@ -31286,13 +32367,404 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// a particular chain.
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
+
+ // Simplify source operands based on shuffle mask.
+ // TODO - merge this into combineX86ShufflesRecursively.
+ APInt KnownUndef, KnownZero;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI))
+ return SDValue(N, 0);
+ }
+
+ // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
+ // operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
+ // FIXME: This can probably go away once we default to widening legalization.
+ if (Subtarget.hasSSE41() && VT == MVT::v4i32 &&
+ N->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N->getOperand(0).getOpcode() == ISD::BITCAST &&
+ N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) {
+ SDValue BC = N->getOperand(0);
+ SDValue MULUDQ = BC.getOperand(0);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVOp->getMask();
+ if (BC.hasOneUse() && MULUDQ.hasOneUse() &&
+ Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) {
+ SDValue Op0 = MULUDQ.getOperand(0);
+ SDValue Op1 = MULUDQ.getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
+ Op0.getOperand(0).getValueType() == MVT::v4i32) {
+ ShuffleVectorSDNode *SVOp0 =
+ cast<ShuffleVectorSDNode>(Op0.getOperand(0));
+ ArrayRef<int> Mask2 = SVOp0->getMask();
+ if (Mask2[0] == 0 && Mask2[1] == -1 &&
+ Mask2[2] == 1 && Mask2[3] == -1) {
+ Op0 = SVOp0->getOperand(0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
+ Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask);
+ return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
+ }
+ }
+ if (Op1.getOpcode() == ISD::BITCAST &&
+ Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
+ Op1.getOperand(0).getValueType() == MVT::v4i32) {
+ ShuffleVectorSDNode *SVOp1 =
+ cast<ShuffleVectorSDNode>(Op1.getOperand(0));
+ ArrayRef<int> Mask2 = SVOp1->getMask();
+ if (Mask2[0] == 0 && Mask2[1] == -1 &&
+ Mask2[2] == 1 && Mask2[3] == -1) {
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask);
+ Op1 = SVOp1->getOperand(0);
+ return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
+ }
+ }
+ }
}
return SDValue();
}
+bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
+ TargetLoweringOpt &TLO, unsigned Depth) const {
+ int NumElts = DemandedElts.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
+
+ // Handle special case opcodes.
+ switch (Opc) {
+ case X86ISD::VSHL:
+ case X86ISD::VSRL:
+ case X86ISD::VSRA: {
+ // We only need the bottom 64-bits of the (128-bit) shift amount.
+ SDValue Amt = Op.getOperand(1);
+ MVT AmtVT = Amt.getSimpleValueType();
+ assert(AmtVT.is128BitVector() && "Unexpected value type");
+ APInt AmtUndef, AmtZero;
+ unsigned NumAmtElts = AmtVT.getVectorNumElements();
+ APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
+ if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
+ Depth + 1))
+ return true;
+ LLVM_FALLTHROUGH;
+ }
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI: {
+ SDValue Src = Op.getOperand(0);
+ APInt SrcUndef;
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+ // TODO convert SrcUndef to KnownUndef.
+ break;
+ }
+ case X86ISD::CVTSI2P:
+ case X86ISD::CVTUI2P: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ APInt SrcUndef, SrcZero;
+ APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS: {
+ APInt DemandedLHS, DemandedRHS;
+ getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::VBROADCAST: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ if (!SrcVT.isVector())
+ return false;
+ // Don't bother broadcasting if we just need the 0'th element.
+ if (DemandedElts == 1) {
+ if(Src.getValueType() != VT)
+ Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,
+ SDLoc(Op));
+ return TLO.CombineTo(Op, Src);
+ }
+ APInt SrcUndef, SrcZero;
+ APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::PSHUFB: {
+ // TODO - simplify other variable shuffle masks.
+ SDValue Mask = Op.getOperand(1);
+ APInt MaskUndef, MaskZero;
+ if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ }
+
+ // Simplify target shuffles.
+ if (!isTargetShuffle(Opc) || !VT.isSimple())
+ return false;
+
+ // Get target shuffle mask.
+ bool IsUnary;
+ SmallVector<int, 64> OpMask;
+ SmallVector<SDValue, 2> OpInputs;
+ if (!getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, OpInputs,
+ OpMask, IsUnary))
+ return false;
+
+ // Shuffle inputs must be the same type as the result.
+ if (llvm::any_of(OpInputs,
+ [VT](SDValue V) { return VT != V.getValueType(); }))
+ return false;
+
+ // Clear known elts that might have been set above.
+ KnownZero.clearAllBits();
+ KnownUndef.clearAllBits();
+
+ // Check if shuffle mask can be simplified to undef/zero/identity.
+ int NumSrcs = OpInputs.size();
+ for (int i = 0; i != NumElts; ++i) {
+ int &M = OpMask[i];
+ if (!DemandedElts[i])
+ M = SM_SentinelUndef;
+ else if (0 <= M && OpInputs[M / NumElts].isUndef())
+ M = SM_SentinelUndef;
+ }
+
+ if (isUndefInRange(OpMask, 0, NumElts)) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+ if (isUndefOrZeroInRange(OpMask, 0, NumElts)) {
+ KnownZero.setAllBits();
+ return TLO.CombineTo(
+ Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));
+ }
+ for (int Src = 0; Src != NumSrcs; ++Src)
+ if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts))
+ return TLO.CombineTo(Op, OpInputs[Src]);
+
+ // Attempt to simplify inputs.
+ for (int Src = 0; Src != NumSrcs; ++Src) {
+ int Lo = Src * NumElts;
+ APInt SrcElts = APInt::getNullValue(NumElts);
+ for (int i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ int M = OpMask[i] - Lo;
+ if (0 <= M && M < NumElts)
+ SrcElts.setBit(M);
+ }
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+ }
+
+ // Extract known zero/undef elements.
+ // TODO - Propagate input undef/zero elts.
+ for (int i = 0; i != NumElts; ++i) {
+ if (OpMask[i] == SM_SentinelUndef)
+ KnownUndef.setBit(i);
+ if (OpMask[i] == SM_SentinelZero)
+ KnownZero.setBit(i);
+ }
+
+ return false;
+}
+
+bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
+ SDValue Op, const APInt &OriginalDemandedBits,
+ const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = OriginalDemandedBits.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+ switch(Opc) {
+ case X86ISD::PMULDQ:
+ case X86ISD::PMULUDQ: {
+ // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
+ KnownBits KnownOp;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // FIXME: Can we bound this better?
+ APInt DemandedMask = APInt::getLowBitsSet(64, 32);
+ if (SimplifyDemandedBits(LHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(RHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::VSHLI: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
+ if (ShiftImm->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (Op0.getOpcode() == X86ISD::VSRLI &&
+ OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
+ if (auto *Shift2Imm = dyn_cast<ConstantSDNode>(Op0.getOperand(1))) {
+ if (Shift2Imm->getAPIntValue().ult(BitWidth)) {
+ int Diff = ShAmt - Shift2Imm->getZExtValue();
+ if (Diff == 0)
+ return TLO.CombineTo(Op, Op0.getOperand(0));
+
+ unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
+ SDValue NewShift = TLO.DAG.getNode(
+ NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
+ TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
+ return TLO.CombineTo(Op, NewShift);
+ }
+ }
+ }
+
+ if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+ TLO, Depth + 1))
+ return true;
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
+
+ // Low bits known zero.
+ Known.Zero.setLowBits(ShAmt);
+ }
+ break;
+ }
+ case X86ISD::VSRLI: {
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ShiftImm->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ APInt DemandedMask = OriginalDemandedBits << ShAmt;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+ OriginalDemandedElts, Known, TLO, Depth + 1))
+ return true;
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+
+ // High bits known zero.
+ Known.Zero.setHighBits(ShAmt);
+ }
+ break;
+ }
+ case X86ISD::VSRAI: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
+ if (ShiftImm->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ APInt DemandedMask = OriginalDemandedBits << ShAmt;
+
+ // If we just want the sign bit then we don't need to shift it.
+ if (OriginalDemandedBits.isSignMask())
+ return TLO.CombineTo(Op, Op0);
+
+ // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
+ if (Op0.getOpcode() == X86ISD::VSHLI && Op1 == Op0.getOperand(1)) {
+ SDValue Op00 = Op0.getOperand(0);
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts);
+ if (ShAmt < NumSignBits)
+ return TLO.CombineTo(Op, Op00);
+ }
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ if (OriginalDemandedBits.countLeadingZeros() < ShAmt)
+ DemandedMask.setSignBit();
+
+ if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+ TLO, Depth + 1))
+ return true;
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (Known.Zero[BitWidth - ShAmt - 1] ||
+ OriginalDemandedBits.countLeadingZeros() >= ShAmt)
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1));
+
+ // High bits are known one.
+ if (Known.One[BitWidth - ShAmt - 1])
+ Known.One.setHighBits(ShAmt);
+ }
+ break;
+ }
+ case X86ISD::MOVMSK: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ unsigned SrcBits = SrcVT.getScalarSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+
+ // If we don't need the sign bits at all just return zero.
+ if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+ // Only demand the vector elements of the sign bits we need.
+ APInt KnownUndef, KnownZero;
+ APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
+ return true;
+
+ Known.Zero = KnownZero.zextOrSelf(BitWidth);
+ Known.Zero.setHighBits(BitWidth - NumElts);
+
+ // MOVMSK only uses the MSB from each vector element.
+ KnownBits KnownSrc;
+ if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), DemandedElts,
+ KnownSrc, TLO, Depth + 1))
+ return true;
+
+ if (KnownSrc.One[SrcBits - 1])
+ Known.One.setLowBits(NumElts);
+ else if (KnownSrc.Zero[SrcBits - 1])
+ Known.Zero.setLowBits(NumElts);
+ return false;
+ }
+ }
+
+ return TargetLowering::SimplifyDemandedBitsForTargetNode(
+ Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
+}
+
/// Check if a vector extract from a target-specific shuffle of a load can be
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
@@ -31344,9 +32816,13 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (Idx == SM_SentinelUndef)
return DAG.getUNDEF(EltVT);
+ // Bail if any mask element is SM_SentinelZero - getVectorShuffle below
+ // won't handle it.
+ if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; }))
+ return SDValue();
+
assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
- SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0]
- : ShuffleOps[1];
+ SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1];
// If inputs to shuffle are the same for both ops, then allow 2 uses
unsigned AllowedUses =
@@ -31407,9 +32883,18 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
if (!VT.isScalarInteger() || !VecVT.isSimple())
return SDValue();
+ // If the input is a truncate from v16i8 or v32i8 go ahead and use a
+ // movmskb even with avx512. This will be better than truncating to vXi1 and
+ // using a kmov. This can especially help KNL if the input is a v16i8/v32i8
+ // vpcmpeqb/vpcmpgtb.
+ bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ (N0.getOperand(0).getValueType() == MVT::v16i8 ||
+ N0.getOperand(0).getValueType() == MVT::v32i8 ||
+ N0.getOperand(0).getValueType() == MVT::v64i8);
+
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
- if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+ if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated))
return SDValue();
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
@@ -31423,23 +32908,19 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
MVT SExtVT;
- MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
switch (VecVT.getSimpleVT().SimpleTy) {
default:
return SDValue();
case MVT::v2i1:
SExtVT = MVT::v2i64;
- FPCastVT = MVT::v2f64;
break;
case MVT::v4i1:
SExtVT = MVT::v4i32;
- FPCastVT = MVT::v4f32;
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
// sign-extend to a 256-bit operation to avoid truncation.
if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
N0->getOperand(0).getValueType().is256BitVector()) {
SExtVT = MVT::v4i64;
- FPCastVT = MVT::v4f64;
}
break;
case MVT::v8i1:
@@ -31453,7 +32934,6 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
(N0->getOperand(0).getValueType().is256BitVector() ||
N0->getOperand(0).getValueType().is512BitVector())) {
SExtVT = MVT::v8i32;
- FPCastVT = MVT::v8f32;
}
break;
case MVT::v16i1:
@@ -31466,26 +32946,37 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
case MVT::v32i1:
SExtVT = MVT::v32i8;
break;
+ case MVT::v64i1:
+ // If we have AVX512F, but not AVX512BW and the input is truncated from
+ // v64i8 checked earlier. Then split the input and make two pmovmskbs.
+ if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) {
+ SExtVT = MVT::v64i8;
+ break;
+ }
+ return SDValue();
};
SDLoc DL(BitCast);
- SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT);
+ SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0);
- if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
+ if (SExtVT == MVT::v64i8) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
+ Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo);
+ Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+ DAG.getConstant(32, DL, MVT::i8));
+ V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+ } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
V = getPMOVMSKB(DL, V, DAG, Subtarget);
- return DAG.getZExtOrTrunc(V, DL, VT);
+ } else {
+ if (SExtVT == MVT::v8i16)
+ V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
+ DAG.getUNDEF(MVT::v8i16));
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
}
-
- if (SExtVT == MVT::v8i16) {
- assert(16 == DAG.ComputeNumSignBits(V) && "Expected all/none bit vector");
- V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
- DAG.getUNDEF(MVT::v8i16));
- } else
- assert(SExtVT.getScalarType() != MVT::i16 &&
- "Vectors of i16 must be packed");
- if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
- V = DAG.getBitcast(FPCastVT, V);
- V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
return DAG.getZExtOrTrunc(V, DL, VT);
}
@@ -31806,65 +33297,6 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// Match a binop + shuffle pyramid that represents a horizontal reduction over
-// the elements of a vector.
-// Returns the vector that is being reduced on, or SDValue() if a reduction
-// was not matched.
-static SDValue matchBinOpReduction(SDNode *Extract, unsigned &BinOp,
- ArrayRef<ISD::NodeType> CandidateBinOps) {
- // The pattern must end in an extract from index 0.
- if ((Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ||
- !isNullConstant(Extract->getOperand(1)))
- return SDValue();
-
- SDValue Op = Extract->getOperand(0);
- unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
-
- // Match against one of the candidate binary ops.
- if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
- return Op.getOpcode() == unsigned(BinOp);
- }))
- return SDValue();
-
- // At each stage, we're looking for something that looks like:
- // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
- // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
- // i32 undef, i32 undef, i32 undef, i32 undef>
- // %a = binop <8 x i32> %op, %s
- // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
- // we expect something like:
- // <4,5,6,7,u,u,u,u>
- // <2,3,u,u,u,u,u,u>
- // <1,u,u,u,u,u,u,u>
- unsigned CandidateBinOp = Op.getOpcode();
- for (unsigned i = 0; i < Stages; ++i) {
- if (Op.getOpcode() != CandidateBinOp)
- return SDValue();
-
- ShuffleVectorSDNode *Shuffle =
- dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0).getNode());
- if (Shuffle) {
- Op = Op.getOperand(1);
- } else {
- Shuffle = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1).getNode());
- Op = Op.getOperand(0);
- }
-
- // The first operand of the shuffle should be the same as the other operand
- // of the binop.
- if (!Shuffle || Shuffle->getOperand(0) != Op)
- return SDValue();
-
- // Verify the shuffle has the expected (at this stage of the pyramid) mask.
- for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
- if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
- return SDValue();
- }
-
- BinOp = CandidateBinOp;
- return Op;
-}
-
// Given a select, detect the following pattern:
// 1: %2 = zext <N x i8> %0 to <N x i32>
// 2: %3 = zext <N x i8> %1 to <N x i32>
@@ -31979,8 +33411,8 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
return SDValue();
// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
- unsigned BinOp;
- SDValue Src = matchBinOpReduction(
+ ISD::NodeType BinOp;
+ SDValue Src = DAG.matchBinOpReduction(
Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});
if (!Src)
return SDValue();
@@ -32027,7 +33459,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
// ready for the PHMINPOS.
if (ExtractVT == MVT::i8) {
SDValue Upper = DAG.getVectorShuffle(
- SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL),
+ SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8),
{1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper);
}
@@ -32059,8 +33491,8 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
return SDValue();
// Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
- unsigned BinOp = 0;
- SDValue Match = matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
+ ISD::NodeType BinOp;
+ SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
if (!Match)
return SDValue();
@@ -32142,8 +33574,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
return SDValue();
// Match shuffle + add pyramid.
- unsigned BinOp = 0;
- SDValue Root = matchBinOpReduction(Extract, BinOp, {ISD::ADD});
+ ISD::NodeType BinOp;
+ SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD});
// The operand is expected to be zero extended from i8
// (verified in detectZextAbsDiff).
@@ -32238,6 +33670,15 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
scaleShuffleMask<int>(Scale, Mask, ScaledMask);
Mask = std::move(ScaledMask);
} else if ((Mask.size() % NumSrcElts) == 0) {
+ // Simplify Mask based on demanded element.
+ int ExtractIdx = (int)N->getConstantOperandVal(1);
+ int Scale = Mask.size() / NumSrcElts;
+ int Lo = Scale * ExtractIdx;
+ int Hi = Scale * (ExtractIdx + 1);
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i)
+ if (i < Lo || Hi <= i)
+ Mask[i] = SM_SentinelUndef;
+
SmallVector<int, 16> WidenedMask;
while (Mask.size() > NumSrcElts &&
canWidenShuffleElements(Mask, WidenedMask))
@@ -32532,11 +33973,14 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
/// If this is a *dynamic* select (non-constant condition) and we can match
/// this node with one of the variable blend instructions, restructure the
/// condition so that blends can use the high (sign) bit of each element.
-static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
+/// This function will also call SimplfiyDemandedBits on already created
+/// BLENDV to perform additional simplifications.
+static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
- if (N->getOpcode() != ISD::VSELECT ||
+ if ((N->getOpcode() != ISD::VSELECT &&
+ N->getOpcode() != X86ISD::BLENDV) ||
ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
@@ -32578,7 +34022,9 @@ static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
// TODO: Add other opcodes eventually lowered into BLEND.
for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
UI != UE; ++UI)
- if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0)
+ if ((UI->getOpcode() != ISD::VSELECT &&
+ UI->getOpcode() != X86ISD::BLENDV) ||
+ UI.getOperandNo() != 0)
return SDValue();
APInt DemandedMask(APInt::getSignMask(BitWidth));
@@ -32594,9 +34040,13 @@ static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
// optimizations as we messed with the actual expectation for the vector
// boolean values.
for (SDNode *U : Cond->uses()) {
- SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U), U->getValueType(0),
+ if (U->getOpcode() == X86ISD::BLENDV)
+ continue;
+
+ SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),
Cond, U->getOperand(1), U->getOperand(2));
DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
+ DCI.AddToWorklist(U);
}
DCI.CommitTargetLoweringOpt(TLO);
return SDValue(N, 0);
@@ -32608,9 +34058,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue Cond = N->getOperand(0);
- // Get the LHS/RHS of the select.
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
+
+ // Try simplification again because we use this function to optimize
+ // BLENDV nodes that are not handled by the generic combiner.
+ if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS))
+ return V;
+
EVT VT = LHS.getValueType();
EVT CondVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -32618,18 +34073,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Convert vselects with constant condition into shuffles.
if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
DCI.isBeforeLegalizeOps()) {
- SmallVector<int, 64> Mask(VT.getVectorNumElements(), -1);
- for (int i = 0, Size = Mask.size(); i != Size; ++i) {
- SDValue CondElt = Cond->getOperand(i);
- Mask[i] = i;
- // Arbitrarily choose from the 2nd operand if the select condition element
- // is undef.
- // TODO: Can we do better by matching patterns such as even/odd?
- if (CondElt.isUndef() || isNullConstant(CondElt))
- Mask[i] += Size;
- }
-
- return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
+ SmallVector<int, 64> Mask;
+ if (createShuffleMaskFromVSELECT(Mask, Cond))
+ return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
}
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
@@ -32814,7 +34260,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Since SKX these selects have a proper lowering.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1 &&
- VT.getVectorNumElements() > 4 &&
+ (ExperimentalVectorWideningLegalization ||
+ VT.getVectorNumElements() > 4) &&
(VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16)) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -32855,15 +34302,13 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}
- // Early exit check
- if (!TLI.isTypeLegal(VT))
- return SDValue();
-
// Match VSELECTs into subs with unsigned saturation.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
- // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
- ((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
- (Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+ // psubus is available in SSE2 for i8 and i16 vectors.
+ Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
+ isPowerOf2_32(VT.getVectorNumElements()) &&
+ (VT.getVectorElementType() == MVT::i8 ||
+ VT.getVectorElementType() == MVT::i16)) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
@@ -32877,37 +34322,31 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
if (Other.getNode() && Other->getNumOperands() == 2 &&
- DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
+ Other->getOperand(0) == Cond.getOperand(0)) {
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
SDValue CondRHS = Cond->getOperand(1);
- auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
- };
-
// Look for a general sub with unsigned saturation first.
// x >= y ? x-y : 0 --> subus x, y
// x > y ? x-y : 0 --> subus x, y
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
- Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ Other->getOpcode() == ISD::SUB && OpRHS == CondRHS)
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
- if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
+ if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
if (isa<BuildVectorSDNode>(CondRHS)) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x > C-1 ? x+-C : 0 --> subus x, C
- auto MatchSUBUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ // TODO: Handle build_vectors with undef elements.
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
};
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchSUBUS)) {
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) {
OpRHS = DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), OpRHS);
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
// Another special case: If C was a sign bit, the sub has been
@@ -32915,24 +34354,82 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// FIXME: Would it be better to use computeKnownBits to determine
// whether it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> subus x, C
- if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode())
+ if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
OpRHSConst->getAPIntValue().isSignMask()) {
- OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
// Note that we have to rebuild the RHS constant here to ensure we
// don't rely on particular values of undef lanes.
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
+ }
}
+ }
+ }
+ }
+
+ // Match VSELECTs into add with unsigned saturation.
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ // paddus is available in SSE2 for i8 and i16 vectors.
+ Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
+ isPowerOf2_32(VT.getVectorNumElements()) &&
+ (VT.getVectorElementType() == MVT::i8 ||
+ VT.getVectorElementType() == MVT::i16)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ SDValue CondLHS = Cond->getOperand(0);
+ SDValue CondRHS = Cond->getOperand(1);
+
+ // Check if one of the arms of the VSELECT is vector with all bits set.
+ // If it's on the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ if (ISD::isBuildVectorAllOnes(LHS.getNode())) {
+ Other = RHS;
+ CC = ISD::getSetCCInverse(CC, true);
+ } else if (ISD::isBuildVectorAllOnes(RHS.getNode())) {
+ Other = LHS;
+ }
+
+ if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Canonicalize condition operands.
+ if (CC == ISD::SETUGE) {
+ std::swap(CondLHS, CondRHS);
+ CC = ISD::SETULE;
+ }
+
+ // We can test against either of the addition operands.
+ // x <= x+y ? x+y : ~0 --> addus x, y
+ // x+y >= x ? x+y : ~0 --> addus x, y
+ if (CC == ISD::SETULE && Other == CondRHS &&
+ (OpLHS == CondLHS || OpRHS == CondLHS))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+
+ if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
+ CondLHS == OpLHS) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > ~C ? x+C : ~0 --> addus x, C
+ auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return Cond->getAPIntValue() == ~Op->getAPIntValue();
+ };
+ if (CC == ISD::SETULE &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+ }
}
}
+ // Early exit check
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
return V;
- if (SDValue V = combineVSelectToShrunkBlend(N, DAG, DCI, Subtarget))
+ if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget))
return V;
// Custom action for SELECT MMX
@@ -33014,16 +34511,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
/*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
/*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
AN->getMemOperand());
- // If the comparision uses the CF flag we can't use INC/DEC instructions.
- bool NeedCF = false;
- switch (CC) {
- default: break;
- case X86::COND_A: case X86::COND_AE:
- case X86::COND_B: case X86::COND_BE:
- NeedCF = true;
- break;
- }
- auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
+ auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
@@ -33453,10 +34941,13 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
SDValue Add = TrueOp;
SDValue Const = FalseOp;
// Canonicalize the condition code for easier matching and output.
- if (CC == X86::COND_E) {
+ if (CC == X86::COND_E)
std::swap(Add, Const);
- CC = X86::COND_NE;
- }
+
+ // We might have replaced the constant in the cmov with the LHS of the
+ // compare. If so change it to the RHS of the compare.
+ if (Const == Cond.getOperand(0))
+ Const = Cond.getOperand(1);
// Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant.
if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD &&
@@ -33468,7 +34959,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
// This should constant fold.
SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
- DAG.getConstant(CC, DL, MVT::i8), Cond);
+ DAG.getConstant(X86::COND_NE, DL, MVT::i8),
+ Cond);
return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
}
}
@@ -33490,40 +34982,8 @@ static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
for (unsigned i = 0; i < 2; i++) {
SDValue Opd = N->getOperand(i);
- // DAG.ComputeNumSignBits return 1 for ISD::ANY_EXTEND, so we need to
- // compute signbits for it separately.
- if (Opd.getOpcode() == ISD::ANY_EXTEND) {
- // For anyextend, it is safe to assume an appropriate number of leading
- // sign/zero bits.
- if (Opd.getOperand(0).getValueType().getVectorElementType() == MVT::i8)
- SignBits[i] = 25;
- else if (Opd.getOperand(0).getValueType().getVectorElementType() ==
- MVT::i16)
- SignBits[i] = 17;
- else
- return false;
- IsPositive[i] = true;
- } else if (Opd.getOpcode() == ISD::BUILD_VECTOR) {
- // All the operands of BUILD_VECTOR need to be int constant.
- // Find the smallest value range which all the operands belong to.
- SignBits[i] = 32;
- IsPositive[i] = true;
- for (const SDValue &SubOp : Opd.getNode()->op_values()) {
- if (SubOp.isUndef())
- continue;
- auto *CN = dyn_cast<ConstantSDNode>(SubOp);
- if (!CN)
- return false;
- APInt IntVal = CN->getAPIntValue();
- if (IntVal.isNegative())
- IsPositive[i] = false;
- SignBits[i] = std::min(SignBits[i], IntVal.getNumSignBits());
- }
- } else {
- SignBits[i] = DAG.ComputeNumSignBits(Opd);
- if (Opd.getOpcode() == ISD::ZERO_EXTEND)
- IsPositive[i] = true;
- }
+ SignBits[i] = DAG.ComputeNumSignBits(Opd);
+ IsPositive[i] = DAG.SignBitIsZero(Opd);
}
bool AllPositive = IsPositive[0] && IsPositive[1];
@@ -33608,90 +35068,90 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);
- if (NumElts >= OpsVT.getVectorNumElements()) {
+ if (ExperimentalVectorWideningLegalization ||
+ NumElts >= OpsVT.getVectorNumElements()) {
// Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
// lower part is needed.
SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
- if (Mode == MULU8 || Mode == MULS8) {
+ if (Mode == MULU8 || Mode == MULS8)
return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
DL, VT, MulLo);
- } else {
- MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
- // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
- // the higher part is also needed.
- SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- ReducedVT, NewN0, NewN1);
-
- // Repack the lower part and higher part result of mul into a wider
- // result.
- // Generate shuffle functioning as punpcklwd.
- SmallVector<int, 16> ShuffleMask(NumElts);
- for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
- ShuffleMask[2 * i] = i;
- ShuffleMask[2 * i + 1] = i + NumElts;
- }
- SDValue ResLo =
- DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
- ResLo = DAG.getBitcast(ResVT, ResLo);
- // Generate shuffle functioning as punpckhwd.
- for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
- ShuffleMask[2 * i] = i + NumElts / 2;
- ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
- }
- SDValue ResHi =
- DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
- ResHi = DAG.getBitcast(ResVT, ResHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
- }
- } else {
- // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
- // to legalize the mul explicitly because implicit legalization for type
- // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
- // instructions which will not exist when we explicitly legalize it by
- // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
- // <4 x i16> undef).
- //
- // Legalize the operands of mul.
- // FIXME: We may be able to handle non-concatenated vectors by insertion.
- unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
- if ((RegSize % ReducedSizeInBits) != 0)
- return SDValue();
- SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
- DAG.getUNDEF(ReducedVT));
- Ops[0] = NewN0;
- NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
- Ops[0] = NewN1;
- NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
-
- if (Mode == MULU8 || Mode == MULS8) {
- // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
- // part is needed.
- SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
-
- // convert the type of mul result to VT.
- MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
- SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG,
- DL, ResVT, Mul);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
- } else {
- // Generate the lower and higher part of mul: pmulhw/pmulhuw. For
- // MULU16/MULS16, both parts are needed.
- SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
- SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- OpsVT, NewN0, NewN1);
-
- // Repack the lower part and higher part result of mul into a wider
- // result. Make sure the type of mul result is VT.
- MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
- SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi);
- Res = DAG.getBitcast(ResVT, Res);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
- }
+ MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
+ // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
+ // the higher part is also needed.
+ SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
+ ReducedVT, NewN0, NewN1);
+
+ // Repack the lower part and higher part result of mul into a wider
+ // result.
+ // Generate shuffle functioning as punpcklwd.
+ SmallVector<int, 16> ShuffleMask(NumElts);
+ for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+ ShuffleMask[2 * i] = i;
+ ShuffleMask[2 * i + 1] = i + NumElts;
+ }
+ SDValue ResLo =
+ DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+ ResLo = DAG.getBitcast(ResVT, ResLo);
+ // Generate shuffle functioning as punpckhwd.
+ for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+ ShuffleMask[2 * i] = i + NumElts / 2;
+ ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
+ }
+ SDValue ResHi =
+ DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+ ResHi = DAG.getBitcast(ResVT, ResHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
+ }
+
+ // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
+ // to legalize the mul explicitly because implicit legalization for type
+ // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
+ // instructions which will not exist when we explicitly legalize it by
+ // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
+ // <4 x i16> undef).
+ //
+ // Legalize the operands of mul.
+ // FIXME: We may be able to handle non-concatenated vectors by insertion.
+ unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
+ if ((RegSize % ReducedSizeInBits) != 0)
+ return SDValue();
+
+ SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
+ DAG.getUNDEF(ReducedVT));
+ Ops[0] = NewN0;
+ NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
+ Ops[0] = NewN1;
+ NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
+
+ if (Mode == MULU8 || Mode == MULS8) {
+ // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
+ // part is needed.
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
+
+ // convert the type of mul result to VT.
+ MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
+ SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
+ : ISD::SIGN_EXTEND_VECTOR_INREG,
+ DL, ResVT, Mul);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
}
+
+ // Generate the lower and higher part of mul: pmulhw/pmulhuw. For
+ // MULU16/MULS16, both parts are needed.
+ SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
+ SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
+ OpsVT, NewN0, NewN1);
+
+ // Repack the lower part and higher part result of mul into a wider
+ // result. Make sure the type of mul result is VT.
+ MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
+ SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi);
+ Res = DAG.getBitcast(ResVT, Res);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
}
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
@@ -33781,13 +35241,13 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
}
// If the upper 17 bits of each element are zero then we can use PMADDWD,
-// which is always at least as quick as PMULLD, expect on KNL.
+// which is always at least as quick as PMULLD, except on KNL.
static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasSSE2())
return SDValue();
- if (Subtarget.getProcFamily() == X86Subtarget::IntelKNL)
+ if (Subtarget.isPMADDWDSlow())
return SDValue();
EVT VT = N->getValueType(0);
@@ -33797,12 +35257,24 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Make sure the vXi16 type is legal. This covers the AVX512 without BWI case.
+ // Also allow v2i32 if it will be widened.
MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
- if (!DAG.getTargetLoweringInfo().isTypeLegal(WVT))
+ if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) ||
+ DAG.getTargetLoweringInfo().isTypeLegal(WVT)))
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+
+ // If we are zero extending two steps without SSE4.1, its better to reduce
+ // the vmul width instead.
+ if (!Subtarget.hasSSE41() &&
+ (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
+ (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() <= 8))
+ return SDValue();
+
APInt Mask17 = APInt::getHighBitsSet(32, 17);
if (!DAG.MaskedValueIsZero(N1, Mask17) ||
!DAG.MaskedValueIsZero(N0, Mask17))
@@ -33828,7 +35300,8 @@ static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
// Only support vXi64 vectors.
if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 ||
- !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ VT.getVectorNumElements() < 2 ||
+ !isPowerOf2_32(VT.getVectorNumElements()))
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -33929,10 +35402,12 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
(SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
if (isPowerOf2_64(MulAmt2) &&
- !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+ !(SignMulAmt >= 0 && N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::ADD))
// If second multiplifer is pow2, issue it first. We want the multiply by
// 3, 5, or 9 to be folded into the addressing mode unless the lone use
- // is an add.
+ // is an add. Only do this for positive multiply amounts since the
+ // negate would prevent it from being used as an address mode anyway.
std::swap(MulAmt1, MulAmt2);
if (isPowerOf2_64(MulAmt1))
@@ -34197,6 +35672,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
"Unexpected PACKSS/PACKUS input type");
+ bool IsSigned = (X86ISD::PACKSS == Opcode);
+
// Constant Folding.
APInt UndefElts0, UndefElts1;
SmallVector<APInt, 32> EltBits0, EltBits1;
@@ -34209,7 +35686,6 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
unsigned NumSrcElts = NumDstElts / 2;
unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
- bool IsSigned = (X86ISD::PACKSS == Opcode);
APInt Undefs(NumDstElts, 0);
SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
@@ -34253,16 +35729,58 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
}
+ // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
+ // truncate to create a larger truncate.
+ if (Subtarget.hasAVX512() &&
+ N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
+ N0.getOperand(0).getValueType() == MVT::v8i32) {
+ if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
+ (!IsSigned &&
+ DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) {
+ if (Subtarget.hasVLX())
+ return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ // Widen input to v16i32 so we can truncate that.
+ SDLoc dl(N);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32,
+ N0.getOperand(0), DAG.getUNDEF(MVT::v8i32));
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat);
+ }
+ }
+
// Attempt to combine as shuffle.
SDValue Op(N, 0);
if (SDValue Res =
combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
return SDValue();
}
+static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() ||
+ X86ISD::VSRL == N->getOpcode()) &&
+ "Unexpected shift opcode");
+ EVT VT = N->getValueType(0);
+
+ // Shift zero -> zero.
+ if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -34277,13 +35795,14 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
"Unexpected value type");
+ assert(N1.getValueType() == MVT::i8 && "Unexpected shift amount type");
// Out of range logical bit shifts are guaranteed to be zero.
// Out of range arithmetic bit shifts splat the sign bit.
- APInt ShiftVal = cast<ConstantSDNode>(N1)->getAPIntValue();
- if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) {
+ unsigned ShiftVal = cast<ConstantSDNode>(N1)->getZExtValue();
+ if (ShiftVal >= NumBitsPerElt) {
if (LogicalShift)
- return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ return DAG.getConstant(0, SDLoc(N), VT);
else
ShiftVal = NumBitsPerElt - 1;
}
@@ -34294,30 +35813,25 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
// Shift zero -> zero.
if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
-
- // fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
- // This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
- // TODO - support other sra opcodes as needed.
- if (Opcode == X86ISD::VSRLI && (ShiftVal + 1) == NumBitsPerElt &&
- N0.getOpcode() == X86ISD::VSRAI)
- return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);
-
- // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
- if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSHLI &&
- N1 == N0.getOperand(1)) {
- SDValue N00 = N0.getOperand(0);
- unsigned NumSignBits = DAG.ComputeNumSignBits(N00);
- if (ShiftVal.ult(NumSignBits))
- return N00;
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // Fold (VSRAI (VSRAI X, C1), C2) --> (VSRAI X, (C1 + C2)) with (C1 + C2)
+ // clamped to (NumBitsPerElt - 1).
+ if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSRAI) {
+ unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ unsigned NewShiftVal = ShiftVal + ShiftVal2;
+ if (NewShiftVal >= NumBitsPerElt)
+ NewShiftVal = NumBitsPerElt - 1;
+ return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8));
}
// We can decode 'whole byte' logical bit shifts as shuffles.
- if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
+ if (LogicalShift && (ShiftVal % 8) == 0) {
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -34328,18 +35842,22 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
assert(EltBits.size() == VT.getVectorNumElements() &&
"Unexpected shift value type");
- unsigned ShiftImm = ShiftVal.getZExtValue();
for (APInt &Elt : EltBits) {
if (X86ISD::VSHLI == Opcode)
- Elt <<= ShiftImm;
+ Elt <<= ShiftVal;
else if (X86ISD::VSRAI == Opcode)
- Elt.ashrInPlace(ShiftImm);
+ Elt.ashrInPlace(ShiftVal);
else
- Elt.lshrInPlace(ShiftImm);
+ Elt.lshrInPlace(ShiftVal);
}
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+ APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -34356,7 +35874,8 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res =
combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
return SDValue();
@@ -34468,42 +35987,31 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// Try to match (and (xor X, -1), Y) logic pattern for (andnp X, Y) combines.
-static bool matchANDXORWithAllOnesAsANDNP(SDNode *N, SDValue &X, SDValue &Y) {
- if (N->getOpcode() != ISD::AND)
- return false;
+/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
+static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::AND);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ MVT VT = N->getSimpleValueType(0);
+ if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
+ return SDValue();
+
+ SDValue X, Y;
+ SDValue N0 = peekThroughBitcasts(N->getOperand(0));
+ SDValue N1 = peekThroughBitcasts(N->getOperand(1));
if (N0.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) {
X = N0.getOperand(0);
Y = N1;
- return true;
- }
- if (N1.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) {
+ } else if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) {
X = N1.getOperand(0);
Y = N0;
- return true;
- }
-
- return false;
-}
-
-/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
-static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
- assert(N->getOpcode() == ISD::AND);
-
- EVT VT = N->getValueType(0);
- if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64)
+ } else
return SDValue();
- SDValue X, Y;
- if (matchANDXORWithAllOnesAsANDNP(N, X, Y))
- return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);
-
- return SDValue();
+ X = DAG.getBitcast(VT, X);
+ Y = DAG.getBitcast(VT, Y);
+ return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);
}
// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
@@ -34512,8 +36020,8 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
// some of the transition sequences.
// Even with AVX-512 this is still useful for removing casts around logical
// operations on vXi1 mask types.
-static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
assert(VT.isVector() && "Expected vector type");
@@ -34628,6 +36136,10 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
!SplatVal.isMask())
return SDValue();
+ // Don't prevent creation of ANDN.
+ if (isBitwiseNot(Op0))
+ return SDValue();
+
if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
return SDValue();
@@ -34761,6 +36273,73 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
return SDValue();
}
+// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
+// Turn it into series of XORs and a setnp.
+static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+
+ // We only support 64-bit and 32-bit. 64-bit requires special handling
+ // unless the 64-bit popcnt instruction is legal.
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT))
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // LHS needs to be a single use CTPOP.
+ if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse())
+ return SDValue();
+
+ // RHS needs to be 1.
+ if (!isOneConstant(N1))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue X = N0.getOperand(0);
+
+ // If this is 64-bit, its always best to xor the two 32-bit pieces together
+ // even if we have popcnt.
+ if (VT == MVT::i64) {
+ SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
+ DAG.getNode(ISD::SRL, DL, VT, X,
+ DAG.getConstant(32, DL, MVT::i8)));
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+ X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
+ // Generate a 32-bit parity idiom. This will bring us back here if we need
+ // to expand it too.
+ SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32,
+ DAG.getNode(ISD::CTPOP, DL, MVT::i32, X),
+ DAG.getConstant(1, DL, MVT::i32));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Parity);
+ }
+ assert(VT == MVT::i32 && "Unexpected VT!");
+
+ // Xor the high and low 16-bits together using a 32-bit operation.
+ SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X,
+ DAG.getConstant(16, DL, MVT::i8));
+ X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16);
+
+ // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
+ // This should allow an h-reg to be used to save a shift.
+ // FIXME: We only get an h-reg in 32-bit mode.
+ SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
+ DAG.getNode(ISD::SRL, DL, VT, X,
+ DAG.getConstant(8, DL, MVT::i8)));
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
+ SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
+ SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
+
+ // Copy the inverse of the parity flag into a register with setcc.
+ SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
+ // Zero extend to original type.
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp);
+}
+
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -34788,6 +36367,10 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
}
+ // This must be done before legalization has expanded the ctpop.
+ if (SDValue V = combineParity(N, DAG, Subtarget))
+ return V;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -34811,7 +36394,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -34848,7 +36431,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue Shuffle = combineX86ShufflesRecursively(
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
N->getOperand(0).getOperand(1));
}
@@ -34978,7 +36561,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasSSE41())
return SDValue();
- MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+ MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8;
X = DAG.getBitcast(BlendVT, X);
Y = DAG.getBitcast(BlendVT, Y);
@@ -35122,11 +36705,21 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
return R;
+ // Attempt to recursively combine an OR of shuffles.
+ if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+ SDValue Op(N, 0);
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+ return Res;
+ }
+
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ unsigned Bits = VT.getScalarSizeInBits();
// SHLD/SHRD instructions have lower register pressure, but on some
// platforms they have higher latency than the equivalent
@@ -35149,6 +36742,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
SDValue ShAmt1 = N1.getOperand(1);
if (ShAmt1.getValueType() != MVT::i8)
return SDValue();
+
+ // Peek through any modulo shift masks.
+ SDValue ShMsk0;
+ if (ShAmt0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt0.getOperand(1)) &&
+ ShAmt0.getConstantOperandVal(1) == (Bits - 1)) {
+ ShMsk0 = ShAmt0;
+ ShAmt0 = ShAmt0.getOperand(0);
+ }
+ SDValue ShMsk1;
+ if (ShAmt1.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt1.getOperand(1)) &&
+ ShAmt1.getConstantOperandVal(1) == (Bits - 1)) {
+ ShMsk1 = ShAmt1;
+ ShAmt1 = ShAmt1.getOperand(0);
+ }
+
if (ShAmt0.getOpcode() == ISD::TRUNCATE)
ShAmt0 = ShAmt0.getOperand(0);
if (ShAmt1.getOpcode() == ISD::TRUNCATE)
@@ -35163,27 +36773,29 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
Opc = X86ISD::SHRD;
std::swap(Op0, Op1);
std::swap(ShAmt0, ShAmt1);
+ std::swap(ShMsk0, ShMsk1);
}
// OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
// OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
// OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
// OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
- unsigned Bits = VT.getSizeInBits();
+ // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> SHLD( X, Y, C )
+ // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> SHRD( X, Y, C )
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
- if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+ if (auto *SumC = dyn_cast<ConstantSDNode>(Sum)) {
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
- if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
- return DAG.getNode(Opc, DL, VT,
- Op0, Op1,
- DAG.getNode(ISD::TRUNCATE, DL,
- MVT::i8, ShAmt0));
- }
- } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
- ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+ if ((SumC->getAPIntValue() == Bits ||
+ (SumC->getAPIntValue() == 0 && ShMsk1)) &&
+ ShAmt1Op1 == ShAmt0)
+ return DAG.getNode(Opc, DL, VT, Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ }
+ } else if (auto *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+ auto *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
return DAG.getNode(Opc, DL, VT,
N0.getOperand(0), N1.getOperand(0),
@@ -35191,12 +36803,13 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
MVT::i8, ShAmt0));
} else if (ShAmt1.getOpcode() == ISD::XOR) {
SDValue Mask = ShAmt1.getOperand(1);
- if (ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
+ if (auto *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
ShAmt1Op0 = ShAmt1Op0.getOperand(0);
- if (MaskC->getSExtValue() == (Bits - 1) && ShAmt1Op0 == ShAmt0) {
+ if (MaskC->getSExtValue() == (Bits - 1) &&
+ (ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) {
if (Op1.getOpcode() == InnerShift &&
isa<ConstantSDNode>(Op1.getOperand(1)) &&
Op1.getConstantOperandVal(1) == 1) {
@@ -35207,7 +36820,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
Op1.getOperand(0) == Op1.getOperand(1)) {
return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
}
}
}
@@ -35478,6 +37091,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
}
if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
+ !Subtarget.hasAVX512() &&
(SVT == MVT::i8 || SVT == MVT::i16) &&
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
if (auto USatVal = detectSSatPattern(In, VT, true)) {
@@ -35514,7 +37128,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
EVT ScalarVT = VT.getVectorElementType();
if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
- isPowerOf2_32(NumElems)))
+ NumElems >= 2 && isPowerOf2_32(NumElems)))
return SDValue();
// InScalarVT is the intermediate type in AVG pattern and it should be greater
@@ -35752,8 +37366,8 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
Alignment, ML->getMemOperand()->getFlags());
// Insert the loaded element into the appropriate place in the vector.
- SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, ML->getSrc0(),
- Load, VecIndex);
+ SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ ML->getPassThru(), Load, VecIndex);
return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
}
@@ -35776,7 +37390,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
if (LoadFirstElt && LoadLastElt) {
SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
ML->getMemOperand());
- SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getSrc0());
+ SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
+ ML->getPassThru());
return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
}
@@ -35786,7 +37401,7 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
// Don't try this if the pass-through operand is already undefined. That would
// cause an infinite loop because that's what we're about to create.
- if (ML->getSrc0().isUndef())
+ if (ML->getPassThru().isUndef())
return SDValue();
// The new masked load has an undef pass-through operand. The select uses the
@@ -35795,7 +37410,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
ML->getMask(), DAG.getUNDEF(VT),
ML->getMemoryVT(), ML->getMemOperand(),
ML->getExtensionType());
- SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getSrc0());
+ SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
+ ML->getPassThru());
return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
}
@@ -35842,9 +37458,9 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
LdVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- // Convert Src0 value.
- SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0());
- if (!Mld->getSrc0().isUndef()) {
+ // Convert PassThru value.
+ SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru());
+ if (!Mld->getPassThru().isUndef()) {
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
@@ -35852,7 +37468,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
// Can't shuffle using an illegal type.
assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
"WideVecVT should be legal");
- WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
+ WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru,
DAG.getUNDEF(WideVecVT), ShuffleVec);
}
@@ -35885,10 +37501,10 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
}
SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
- Mld->getBasePtr(), NewMask, WideSrc0,
+ Mld->getBasePtr(), NewMask, WidePassThru,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
- SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG);
+ SDValue NewVec = getExtendInVec(/*Signed*/true, dl, VT, WideLd, DAG);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
}
@@ -35920,31 +37536,25 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
}
static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
-
if (Mst->isCompressingStore())
return SDValue();
+ EVT VT = Mst->getValue().getValueType();
if (!Mst->isTruncatingStore()) {
if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
return ScalarStore;
- // If the mask is checking (0 > X), we're creating a vector with all-zeros
- // or all-ones elements based on the sign bits of X. AVX1 masked store only
- // cares about the sign bit of each mask element, so eliminate the compare:
- // mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X
- // Note that by waiting to match an x86-specific PCMPGT node, we're
- // eliminating potentially more complex matching of a setcc node which has
- // a full range of predicates.
+ // If the mask value has been legalized to a non-boolean vector, try to
+ // simplify ops leading up to it. We only demand the MSB of each lane.
SDValue Mask = Mst->getMask();
- if (Mask.getOpcode() == X86ISD::PCMPGT &&
- ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) {
- assert(Mask.getValueType() == Mask.getOperand(1).getValueType() &&
- "Unexpected type for PCMPGT");
- return DAG.getMaskedStore(
- Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(),
- Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand());
+ if (Mask.getScalarValueSizeInBits() != 1) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+ return SDValue(N, 0);
}
// TODO: AVX512 targets should also be able to simplify something like the
@@ -35955,7 +37565,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
}
// Resolve truncating stores.
- EVT VT = Mst->getValue().getValueType();
unsigned NumElems = VT.getVectorNumElements();
EVT StVT = Mst->getMemoryVT();
SDLoc dl(Mst);
@@ -36043,6 +37652,18 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Convert a store of vXi1 into a store of iX and a bitcast.
+ if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1) {
+
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
+ StoredVal = DAG.getBitcast(NewVT, StoredVal);
+
+ return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
+ }
+
// If this is a store of a scalar_to_vector to v1i1, just use a scalar store.
// This will avoid a copy to k-register.
if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
@@ -36269,7 +37890,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget.is64Bit() || F64IsLegal) {
- MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
+ MVT LdVT = (Subtarget.is64Bit() &&
+ (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getMemOperand());
@@ -36343,10 +37965,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
/// A horizontal-op B, for some already available A and B, and if so then LHS is
/// set to A, RHS to B, and the routine returns 'true'.
-/// Note that the binary operation should have the property that if one of the
-/// operands is UNDEF then the result is UNDEF.
static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
- // Look for the following pattern: if
+ // If either operand is undef, bail out. The binop should be simplified.
+ if (LHS.isUndef() || RHS.isUndef())
+ return false;
+
+ // Look for the following pattern:
// A = < float a0, float a1, float a2, float a3 >
// B = < float b0, float b1, float b2, float b3 >
// and
@@ -36361,25 +37985,15 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
return false;
MVT VT = LHS.getSimpleValueType();
-
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
- // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
- // operate independently on 128-bit lanes.
- unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts / NumLanes;
- assert((NumLaneElts % 2 == 0) &&
- "Vector type should have an even number of elements in each lane");
- unsigned HalfLaneElts = NumLaneElts/2;
-
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
- // If LHS is not a shuffle then pretend it is the shuffle
+ // If LHS is not a shuffle, then pretend it is the identity shuffle:
// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
- // NOTE: in what follows a default initialized SDValue represents an UNDEF of
- // type VT.
+ // NOTE: A default initialized SDValue represents an UNDEF of type VT.
+ unsigned NumElts = VT.getVectorNumElements();
SDValue A, B;
SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
@@ -36388,10 +38002,9 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
if (!LHS.getOperand(1).isUndef())
B = LHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
- std::copy(Mask.begin(), Mask.end(), LMask.begin());
+ llvm::copy(Mask, LMask.begin());
} else {
- if (!LHS.isUndef())
- A = LHS;
+ A = LHS;
for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
@@ -36406,45 +38019,51 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
if (!RHS.getOperand(1).isUndef())
D = RHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
- std::copy(Mask.begin(), Mask.end(), RMask.begin());
+ llvm::copy(Mask, RMask.begin());
} else {
- if (!RHS.isUndef())
- C = RHS;
+ C = RHS;
for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
+ // If A and B occur in reverse order in RHS, then canonicalize by commuting
+ // RHS operands and shuffle mask.
+ if (A != C) {
+ std::swap(C, D);
+ ShuffleVectorSDNode::commuteMask(RMask);
+ }
// Check that the shuffles are both shuffling the same vectors.
- if (!(A == C && B == D) && !(A == D && B == C))
- return false;
-
- // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
- if (!A.getNode() && !B.getNode())
+ if (!(A == C && B == D))
return false;
- // If A and B occur in reverse order in RHS, then "swap" them (which means
- // rewriting the mask).
- if (A != C)
- ShuffleVectorSDNode::commuteMask(RMask);
-
- // At this point LHS and RHS are equivalent to
- // LHS = VECTOR_SHUFFLE A, B, LMask
- // RHS = VECTOR_SHUFFLE A, B, RMask
+ // LHS and RHS are now:
+ // LHS = shuffle A, B, LMask
+ // RHS = shuffle A, B, RMask
// Check that the masks correspond to performing a horizontal operation.
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0; i != NumLaneElts; ++i) {
- int LIdx = LMask[i+l], RIdx = RMask[i+l];
-
- // Ignore any UNDEF components.
+ // AVX defines horizontal add/sub to operate independently on 128-bit lanes,
+ // so we just repeat the inner loop if this is a 256-bit op.
+ unsigned Num128BitChunks = VT.getSizeInBits() / 128;
+ unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks;
+ assert((NumEltsPer128BitChunk % 2 == 0) &&
+ "Vector type should have an even number of elements in each lane");
+ for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) {
+ for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) {
+ // Ignore undefined components.
+ int LIdx = LMask[i + j], RIdx = RMask[i + j];
if (LIdx < 0 || RIdx < 0 ||
(!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
(!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
- // Check that successive elements are being operated on. If not, this is
+ // The low half of the 128-bit result must choose from A.
+ // The high half of the 128-bit result must choose from B,
+ // unless B is undef. In that case, we are always choosing from A.
+ unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
+ unsigned Src = B.getNode() ? i >= NumEltsPer64BitChunk : 0;
+
+ // Check that successive elements are being operated on. If not, this is
// not a horizontal operation.
- unsigned Src = (i/HalfLaneElts); // each lane is split between srcs
- int Index = 2*(i%HalfLaneElts) + NumElts*Src + l;
+ int Index = 2 * (i % NumEltsPer64BitChunk) + NumElts * Src + j;
if (!(LIdx == Index && RIdx == Index + 1) &&
!(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
@@ -36463,21 +38082,24 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
bool IsFadd = N->getOpcode() == ISD::FADD;
+ auto HorizOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
assert((IsFadd || N->getOpcode() == ISD::FSUB) && "Wrong opcode");
// Try to synthesize horizontal add/sub from adds/subs of shuffles.
if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
- isHorizontalBinOp(LHS, RHS, IsFadd)) {
- auto NewOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
- return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
- }
+ isHorizontalBinOp(LHS, RHS, IsFadd) &&
+ shouldUseHorizontalOp(LHS == RHS, DAG, Subtarget))
+ return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
+
return SDValue();
}
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
/// the codegen.
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
+/// TODO: This overlaps with the generic combiner's visitTRUNCATE. Remove
+/// anything that is guaranteed to be transformed by DAGCombiner.
static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
const SDLoc &DL) {
@@ -36489,34 +38111,20 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
- auto IsRepeatedOpOrFreeTruncation = [VT](SDValue Op0, SDValue Op1) {
+ auto IsFreeTruncation = [VT](SDValue Op) {
unsigned TruncSizeInBits = VT.getScalarSizeInBits();
- // Repeated operand, so we are only trading one output truncation for
- // one input truncation.
- if (Op0 == Op1)
- return true;
-
- // See if either operand has been extended from a smaller/equal size to
+ // See if this has been extended from a smaller/equal size to
// the truncation size, allowing a truncation to combine with the extend.
- unsigned Opcode0 = Op0.getOpcode();
- if ((Opcode0 == ISD::ANY_EXTEND || Opcode0 == ISD::SIGN_EXTEND ||
- Opcode0 == ISD::ZERO_EXTEND) &&
- Op0.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
- return true;
-
- unsigned Opcode1 = Op1.getOpcode();
- if ((Opcode1 == ISD::ANY_EXTEND || Opcode1 == ISD::SIGN_EXTEND ||
- Opcode1 == ISD::ZERO_EXTEND) &&
- Op1.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
+ unsigned Opcode = Op.getOpcode();
+ if ((Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND ||
+ Opcode == ISD::ZERO_EXTEND) &&
+ Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
return true;
- // See if either operand is a single use constant which can be constant
- // folded.
- SDValue BC0 = peekThroughOneUseBitcasts(Op0);
- SDValue BC1 = peekThroughOneUseBitcasts(Op1);
- return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
- ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
+ // See if this is a single use constant which can be constant folded.
+ SDValue BC = peekThroughOneUseBitcasts(Op);
+ return ISD::isBuildVectorOfConstantSDNodes(BC.getNode());
};
auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
@@ -36526,7 +38134,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
};
// Don't combine if the operation has other uses.
- if (!N->isOnlyUserOf(Src.getNode()))
+ if (!Src.hasOneUse())
return SDValue();
// Only support vector truncation for now.
@@ -36544,7 +38152,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
- IsRepeatedOpOrFreeTruncation(Op0, Op1))
+ (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -36557,11 +38165,20 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
LLVM_FALLTHROUGH;
case ISD::ADD: {
- // TODO: ISD::SUB should be here but interferes with combineSubToSubus.
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegal(Opcode, VT) &&
- IsRepeatedOpOrFreeTruncation(Op0, Op1))
+ (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
+ return TruncateArithmetic(Op0, Op1);
+ break;
+ }
+ case ISD::SUB: {
+ // TODO: ISD::SUB We are conservative and require both sides to be freely
+ // truncatable to avoid interfering with combineSubToSubus.
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ if (TLI.isOperationLegal(Opcode, VT) &&
+ (Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1))))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -36701,8 +38318,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
// Use PACKUS if the input has zero-bits that extend all the way to the
// packed/truncated value. e.g. masks, zext_in_reg, etc.
- KnownBits Known;
- DAG.computeKnownBits(In, Known);
+ KnownBits Known = DAG.computeKnownBits(In);
unsigned NumLeadingZeroBits = Known.countMinLeadingZeros();
if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits))
return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget);
@@ -36733,9 +38349,11 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
if (!Subtarget.hasSSE2())
return SDValue();
- // Only handle vXi16 types that are at least 128-bits.
+ // Only handle vXi16 types that are at least 128-bits unless they will be
+ // widened.
if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 ||
- VT.getVectorNumElements() < 8)
+ (!ExperimentalVectorWideningLegalization &&
+ VT.getVectorNumElements() < 8))
return SDValue();
// Input type should be vXi32.
@@ -36951,29 +38569,72 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
/// Returns the negated value if the node \p N flips sign of FP value.
///
-/// FP-negation node may have different forms: FNEG(x) or FXOR (x, 0x80000000).
+/// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)
+/// or FSUB(0, x)
/// AVX512F does not have FXOR, so FNEG is lowered as
/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
/// In this case we go though all bitcasts.
-static SDValue isFNEG(SDNode *N) {
+/// This also recognizes splat of a negated value and returns the splat of that
+/// value.
+static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
if (N->getOpcode() == ISD::FNEG)
return N->getOperand(0);
SDValue Op = peekThroughBitcasts(SDValue(N, 0));
- if (Op.getOpcode() != X86ISD::FXOR && Op.getOpcode() != ISD::XOR)
+ auto VT = Op->getValueType(0);
+ if (auto SVOp = dyn_cast<ShuffleVectorSDNode>(Op.getNode())) {
+ // For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
+ // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
+ if (!SVOp->getOperand(1).isUndef())
+ return SDValue();
+ if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode()))
+ return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
+ SVOp->getMask());
+ return SDValue();
+ }
+ unsigned Opc = Op.getOpcode();
+ if (Opc == ISD::INSERT_VECTOR_ELT) {
+ // Negate of INSERT_VECTOR_ELT(UNDEF, V, INDEX) is INSERT_VECTOR_ELT(UNDEF,
+ // -V, INDEX).
+ SDValue InsVector = Op.getOperand(0);
+ SDValue InsVal = Op.getOperand(1);
+ if (!InsVector.isUndef())
+ return SDValue();
+ if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode()))
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
+ NegInsVal, Op.getOperand(2));
+ return SDValue();
+ }
+
+ if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB)
return SDValue();
SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
if (!Op1.getValueType().isFloatingPoint())
return SDValue();
- // Extract constant bits and see if they are all sign bit masks.
+ SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
+
+ // For XOR and FXOR, we want to check if constant bits of Op1 are sign bit
+ // masks. For FSUB, we have to check if constant bits of Op0 are sign bit
+ // masks and hence we swap the operands.
+ if (Opc == ISD::FSUB)
+ std::swap(Op0, Op1);
+
APInt UndefElts;
SmallVector<APInt, 16> EltBits;
+ // Extract constant bits and see if they are all sign bit masks. Ignore the
+ // undef elements.
if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
- UndefElts, EltBits, false, false))
- if (llvm::all_of(EltBits, [](APInt &I) { return I.isSignMask(); }))
- return peekThroughBitcasts(Op.getOperand(0));
+ UndefElts, EltBits,
+ /* AllowWholeUndefs */ true,
+ /* AllowPartialUndefs */ false)) {
+ for (unsigned I = 0, E = EltBits.size(); I < E; I++)
+ if (!UndefElts[I] && !EltBits[I].isSignMask())
+ return SDValue();
+
+ return peekThroughBitcasts(Op0);
+ }
return SDValue();
}
@@ -36982,8 +38643,9 @@ static SDValue isFNEG(SDNode *N) {
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT OrigVT = N->getValueType(0);
- SDValue Arg = isFNEG(N);
- assert(Arg.getNode() && "N is expected to be an FNEG node");
+ SDValue Arg = isFNEG(DAG, N);
+ if (!Arg)
+ return SDValue();
EVT VT = Arg.getValueType();
EVT SVT = VT.getScalarType();
@@ -37033,25 +38695,27 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
// If we have integer vector types available, use the integer opcodes.
- if (VT.isVector() && Subtarget.hasSSE2()) {
- SDLoc dl(N);
+ if (!VT.isVector() || !Subtarget.hasSSE2())
+ return SDValue();
- MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+ SDLoc dl(N);
- SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
- SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
- unsigned IntOpcode;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected FP logic op");
- case X86ISD::FOR: IntOpcode = ISD::OR; break;
- case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
- case X86ISD::FAND: IntOpcode = ISD::AND; break;
- case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
- }
- SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
- return DAG.getBitcast(VT, IntOp);
+ unsigned IntBits = VT.getScalarSizeInBits();
+ MVT IntSVT = MVT::getIntegerVT(IntBits);
+ MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits);
+
+ SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
+ SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
+ unsigned IntOpcode;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected FP logic op");
+ case X86ISD::FOR: IntOpcode = ISD::OR; break;
+ case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+ case X86ISD::FAND: IntOpcode = ISD::AND; break;
+ case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
}
- return SDValue();
+ SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+ return DAG.getBitcast(VT, IntOp);
}
@@ -37098,9 +38762,7 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
- if (isFNEG(N))
- return combineFneg(N, DAG, Subtarget);
- return SDValue();
+ return combineFneg(N, DAG, Subtarget);
}
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
@@ -37112,8 +38774,6 @@ static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
unsigned NumBits = VT.getSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
// TODO - Constant Folding.
if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
@@ -37127,12 +38787,9 @@ static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
}
// Only bottom 16-bits of the control bits are required.
- KnownBits Known;
APInt DemandedMask(APInt::getLowBitsSet(NumBits, 16));
- if (TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
+ if (TLI.SimplifyDemandedBits(Op1, DemandedMask, DCI))
return SDValue(N, 0);
- }
return SDValue();
}
@@ -37233,9 +38890,8 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
if (isNullFPScalarOrVectorConst(N->getOperand(1)))
return N->getOperand(0);
- if (isFNEG(N))
- if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
- return NewVal;
+ if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
+ return NewVal;
return lowerX86FPLogicOp(N, DAG, Subtarget);
}
@@ -37320,26 +38976,47 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax);
}
+static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ APInt KnownUndef, KnownZero;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
/// Do target-specific dag combines on X86ISD::ANDNP nodes.
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ MVT VT = N->getSimpleValueType(0);
+
// ANDNP(0, x) -> x
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
return N->getOperand(1);
// ANDNP(x, 0) -> 0
if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
- return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
+ return DAG.getConstant(0, SDLoc(N), VT);
- EVT VT = N->getValueType(0);
+ // Turn ANDNP back to AND if input is inverted.
+ if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N->getOperand(0).getOperand(1).getNode())) {
+ return DAG.getNode(ISD::AND, SDLoc(N), VT,
+ N->getOperand(0).getOperand(0), N->getOperand(1));
+ }
// Attempt to recursively combine a bitmask ANDNP with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -37502,36 +39179,6 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
}
-/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
-/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
-/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
-/// extends from AH (which we otherwise need to do contortions to access).
-static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
- SDValue N0 = N->getOperand(0);
- auto OpcodeN = N->getOpcode();
- auto OpcodeN0 = N0.getOpcode();
- if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
- (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
- return SDValue();
-
- EVT VT = N->getValueType(0);
- EVT InVT = N0.getValueType();
- if (N0.getResNo() != 1 || InVT != MVT::i8 ||
- !(VT == MVT::i32 || VT == MVT::i64))
- return SDValue();
-
- SDVTList NodeTys = DAG.getVTList(MVT::i8, MVT::i32);
- auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
- : X86ISD::UDIVREM8_ZEXT_HREG;
- SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
- N0.getOperand(1));
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
- // If this was a 64-bit extend, complete it.
- if (VT == MVT::i64)
- return DAG.getNode(OpcodeN, SDLoc(N), VT, R.getValue(1));
- return R.getValue(1);
-}
-
// If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant
// operands and the result of CMOV is not used anywhere else - promote CMOV
// itself instead of promoting its result. This could be beneficial, because:
@@ -37685,6 +39332,9 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ if (ExperimentalVectorWideningLegalization)
+ return SDValue();
+
unsigned Opcode = N->getOpcode();
if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
return SDValue();
@@ -37699,17 +39349,33 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
EVT InVT = N0.getValueType();
EVT InSVT = InVT.getScalarType();
+ // FIXME: Generic DAGCombiner previously had a bug that would cause a
+ // sign_extend of setcc to sometimes return the original node and tricked it
+ // into thinking CombineTo was used which prevented the target combines from
+ // running.
+ // Earlying out here to avoid regressions like this
+ // (v4i32 (sext (v4i1 (setcc (v4i16)))))
+ // Becomes
+ // (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef))))
+ // Type legalized to
+ // (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32)))))))
+ // Leading to a packssdw+pmovsxwd
+ // We could write a DAG combine to fix this, but really we shouldn't be
+ // creating sext_invec that's forcing v8i16 into the DAG.
+ if (N0.getOpcode() == ISD::SETCC)
+ return SDValue();
+
// Input type must be a vector and we must be extending legal integer types.
- if (!VT.isVector())
+ if (!VT.isVector() || VT.getVectorNumElements() < 2)
return SDValue();
if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
return SDValue();
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
return SDValue();
- // On AVX2+ targets, if the input/output types are both legal then we will be
- // able to use SIGN_EXTEND/ZERO_EXTEND directly.
- if (Subtarget.hasInt256() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ // If the input/output types are both legal then we have at least AVX1 and
+ // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
DAG.getTargetLoweringInfo().isTypeLegal(InVT))
return SDValue();
@@ -37737,16 +39403,16 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, DL));
}
- // If target-size is 128-bits (or 256-bits on AVX2 target), then convert to
+ // If target-size is 128-bits (or 256-bits on AVX target), then convert to
// ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT.
// Also use this if we don't have SSE41 to allow the legalizer do its job.
if (!Subtarget.hasSSE41() || VT.is128BitVector() ||
- (VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is256BitVector() && Subtarget.hasAVX()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs())) {
SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
- return Opcode == ISD::SIGN_EXTEND
- ? DAG.getSignExtendVectorInReg(ExOp, DL, VT)
- : DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
+ Opcode = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
+ : ISD::ZERO_EXTEND_VECTOR_INREG;
+ return DAG.getNode(Opcode, DL, VT, ExOp);
}
auto SplitAndExtendInReg = [&](unsigned SplitSize) {
@@ -37755,22 +39421,23 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
+ unsigned IROpc = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
+ : ISD::ZERO_EXTEND_VECTOR_INREG;
+
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
DAG.getIntPtrConstant(Offset, DL));
SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
- SrcVec = Opcode == ISD::SIGN_EXTEND
- ? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
- : DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
+ SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec);
Opnds.push_back(SrcVec);
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
};
- // On pre-AVX2 targets, split into 128-bit nodes of
+ // On pre-AVX targets, split into 128-bit nodes of
// ISD::*_EXTEND_VECTOR_INREG.
- if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128))
+ if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128))
return SplitAndExtendInReg(128);
// On pre-AVX512 targets, split into 256-bit nodes of
@@ -37832,9 +39499,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
EVT InVT = N0.getValueType();
SDLoc DL(N);
- if (SDValue DivRem8 = getDivRem8(N, DAG))
- return DivRem8;
-
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
return NewCMov;
@@ -37861,7 +39525,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
return V;
if (VT.isVector())
- if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget))
+ if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
return R;
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
@@ -37920,7 +39584,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
SDValue C = N->getOperand(2);
auto invertIfNegative = [&DAG](SDValue &V) {
- if (SDValue NegVal = isFNEG(V.getNode())) {
+ if (SDValue NegVal = isFNEG(DAG, V.getNode())) {
V = DAG.getBitcast(V.getValueType(), NegVal);
return true;
}
@@ -37928,7 +39592,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
// new extract from the FNEG input.
if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isNullConstant(V.getOperand(1))) {
- if (SDValue NegVal = isFNEG(V.getOperand(0).getNode())) {
+ if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) {
NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal);
V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
NegVal, V.getOperand(1));
@@ -37961,7 +39625,7 @@ static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
EVT VT = N->getValueType(0);
- SDValue NegVal = isFNEG(N->getOperand(2).getNode());
+ SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode());
if (!NegVal)
return SDValue();
@@ -38032,12 +39696,9 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
return V;
if (VT.isVector())
- if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget))
+ if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
return R;
- if (SDValue DivRem8 = getDivRem8(N, DAG))
- return DivRem8;
-
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
return NewAdd;
@@ -38079,12 +39740,15 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
return SDValue();
// TODO: Use PXOR + PTEST for SSE4.1 or later?
- // TODO: Add support for AVX-512.
EVT VT = SetCC->getValueType(0);
SDLoc DL(SetCC);
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX2())) {
- EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
+ (OpSize == 256 && Subtarget.hasAVX2()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs())) {
+ EVT VecVT = OpSize == 512 ? MVT::v16i32 :
+ OpSize == 256 ? MVT::v32i8 :
+ MVT::v16i8;
+ EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
SDValue Cmp;
if (IsOrXorXorCCZero) {
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
@@ -38095,14 +39759,18 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
- SDValue Cmp1 = DAG.getSetCC(DL, VecVT, A, B, ISD::SETEQ);
- SDValue Cmp2 = DAG.getSetCC(DL, VecVT, C, D, ISD::SETEQ);
- Cmp = DAG.getNode(ISD::AND, DL, VecVT, Cmp1, Cmp2);
+ SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+ SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ);
+ Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2);
} else {
SDValue VecX = DAG.getBitcast(VecVT, X);
SDValue VecY = DAG.getBitcast(VecVT, Y);
- Cmp = DAG.getSetCC(DL, VecVT, VecX, VecY, ISD::SETEQ);
+ Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
}
+ // For 512-bits we want to emit a setcc that will lower to kortest.
+ if (OpSize == 512)
+ return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
+ DAG.getConstant(0xFFFF, DL, MVT::i16), CC);
// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
@@ -38181,7 +39849,9 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
// NOTE: The element count check is to ignore operand types that need to
// go through type promotion to a 128-bit vector.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
- VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() > 4 &&
+ VT.getVectorElementType() == MVT::i1 &&
+ (ExperimentalVectorWideningLegalization ||
+ VT.getVectorNumElements() > 4) &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
@@ -38202,10 +39872,11 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue Src = N->getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = N->getSimpleValueType(0);
// Perform constant folding.
if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) {
- assert(N->getValueType(0) == MVT::i32 && "Unexpected result type");
+ assert(VT== MVT::i32 && "Unexpected result type");
APInt Imm(32, 0);
for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) {
SDValue In = Src.getOperand(Idx);
@@ -38213,20 +39884,53 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
cast<ConstantSDNode>(In)->getAPIntValue().isNegative())
Imm.setBit(Idx);
}
- return DAG.getConstant(Imm, SDLoc(N), N->getValueType(0));
+ return DAG.getConstant(Imm, SDLoc(N), VT);
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
+ // Look through int->fp bitcasts that don't change the element width.
+ if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse() &&
+ SrcVT.isFloatingPoint() &&
+ Src.getOperand(0).getValueType() ==
+ EVT(SrcVT).changeVectorElementTypeToInteger())
+ Src = Src.getOperand(0);
- // MOVMSK only uses the MSB from each vector element.
- KnownBits Known;
- APInt DemandedMask(APInt::getSignMask(SrcVT.getScalarSizeInBits()));
- if (TLI.SimplifyDemandedBits(Src, DemandedMask, Known, TLO)) {
- DCI.AddToWorklist(Src.getNode());
- DCI.CommitTargetLoweringOpt(TLO);
+ // Simplify the inputs.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
+
+ // Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
+ // Only do this when the setcc input and output types are the same and the
+ // setcc and the 'and' node have a single use.
+ // FIXME: Support 256-bits with AVX1. The movmsk is split, but the and isn't.
+ APInt SplatVal;
+ if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
+ Src.getOperand(0).getValueType() == Src.getValueType() &&
+ cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETNE &&
+ ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
+ Src.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue And = Src.getOperand(0);
+ if (And.hasOneUse() &&
+ ISD::isConstantSplatVector(And.getOperand(1).getNode(), SplatVal) &&
+ SplatVal.isPowerOf2()) {
+ MVT VT = Src.getSimpleValueType();
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned ShAmt = BitWidth - SplatVal.logBase2() - 1;
+ SDLoc DL(And);
+ SDValue X = And.getOperand(0);
+ // If the element type is i8, we need to bitcast to i16 to use a legal
+ // shift. If we wait until lowering we end up with an extra and to bits
+ // from crossing the 8-bit elements, but we don't care about that here.
+ if (VT.getVectorElementType() == MVT::i8) {
+ VT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
+ X = DAG.getBitcast(VT, X);
+ }
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
+ DAG.getConstant(ShAmt, DL, VT));
+ SDValue Cast = DAG.getBitcast(SrcVT, Shl);
+ return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), N->getValueType(0), Cast);
+ }
}
return SDValue();
@@ -38296,16 +40000,10 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
// With AVX2 we only demand the upper bit of the mask.
if (!Subtarget.hasAVX512()) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
SDValue Mask = N->getOperand(2);
- KnownBits Known;
APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
- if (TLI.SimplifyDemandedBits(Mask, DemandedMask, Known, TLO)) {
- DCI.AddToWorklist(Mask.getNode());
- DCI.CommitTargetLoweringOpt(TLO);
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
return SDValue(N, 0);
- }
}
return SDValue();
@@ -38396,7 +40094,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
- // UINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
+ // UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
@@ -38460,7 +40158,8 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
- if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasX87() &&
+ Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT LdVT = Ld->getValueType(0);
@@ -38485,6 +40184,159 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static bool needCarryOrOverflowFlag(SDValue Flags) {
+ assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
+
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+
+ X86::CondCode CC;
+ switch (User->getOpcode()) {
+ default:
+ // Be conservative.
+ return true;
+ case X86ISD::SETCC:
+ case X86ISD::SETCC_CARRY:
+ CC = (X86::CondCode)User->getConstantOperandVal(0);
+ break;
+ case X86ISD::BRCOND:
+ CC = (X86::CondCode)User->getConstantOperandVal(2);
+ break;
+ case X86ISD::CMOV:
+ CC = (X86::CondCode)User->getConstantOperandVal(2);
+ break;
+ }
+
+ switch (CC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool onlyZeroFlagUsed(SDValue Flags) {
+ assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
+
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+
+ unsigned CCOpNo;
+ switch (User->getOpcode()) {
+ default:
+ // Be conservative.
+ return false;
+ case X86ISD::SETCC: CCOpNo = 0; break;
+ case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
+ case X86ISD::BRCOND: CCOpNo = 2; break;
+ case X86ISD::CMOV: CCOpNo = 2; break;
+ }
+
+ X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo);
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return false;
+ }
+
+ return true;
+}
+
+static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
+ // Only handle test patterns.
+ if (!isNullConstant(N->getOperand(1)))
+ return SDValue();
+
+ // If we have a CMP of a truncated binop, see if we can make a smaller binop
+ // and use its flags directly.
+ // TODO: Maybe we should try promoting compares that only use the zero flag
+ // first if we can prove the upper bits with computeKnownBits?
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ EVT VT = Op.getValueType();
+
+ // If we have a constant logical shift that's only used in a comparison
+ // against zero turn it into an equivalent AND. This allows turning it into
+ // a TEST instruction later.
+ if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) &&
+ Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) &&
+ onlyZeroFlagUsed(SDValue(N, 0))) {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ unsigned ShAmt = Op.getConstantOperandVal(1);
+ if (ShAmt < BitWidth) { // Avoid undefined shifts.
+ APInt Mask = Op.getOpcode() == ISD::SRL
+ ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
+ : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
+ if (Mask.isSignedIntN(32)) {
+ Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
+ DAG.getConstant(Mask, dl, VT));
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, dl, VT));
+ }
+ }
+ }
+
+
+ // Look for a truncate with a single use.
+ if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse())
+ return SDValue();
+
+ Op = Op.getOperand(0);
+
+ // Arithmetic op can only have one use.
+ if (!Op.hasOneUse())
+ return SDValue();
+
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ default: return SDValue();
+ case ISD::AND:
+ // Skip and with constant. We have special handling for and with immediate
+ // during isel to generate test instructions.
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ NewOpc = X86ISD::AND;
+ break;
+ case ISD::OR: NewOpc = X86ISD::OR; break;
+ case ISD::XOR: NewOpc = X86ISD::XOR; break;
+ case ISD::ADD:
+ // If the carry or overflow flag is used, we can't truncate.
+ if (needCarryOrOverflowFlag(SDValue(N, 0)))
+ return SDValue();
+ NewOpc = X86ISD::ADD;
+ break;
+ case ISD::SUB:
+ // If the carry or overflow flag is used, we can't truncate.
+ if (needCarryOrOverflowFlag(SDValue(N, 0)))
+ return SDValue();
+ NewOpc = X86ISD::SUB;
+ break;
+ }
+
+ // We found an op we can narrow. Truncate its inputs.
+ SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1));
+
+ // Use a X86 specific opcode to avoid DAG combine messing with it.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1);
+
+ // For AND, keep a CMP so that we can match the test pattern.
+ if (NewOpc == X86ISD::AND)
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, dl, VT));
+
+ // Return the flags.
+ return Op.getValue(1);
+}
+
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
MVT VT = N->getSimpleValueType(0);
@@ -38531,21 +40383,6 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
-/// which is more useful than 0/1 in some cases.
-static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
- SDLoc DL(N);
- // "Condition code B" is also known as "the carry flag" (CF).
- SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
- SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
- MVT VT = N->getSimpleValueType(0);
- if (VT == MVT::i8)
- return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));
-
- assert(VT == MVT::i1 && "Unexpected type for SETCC node");
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
-}
-
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
/// with CMP+{ADC, SBB}.
@@ -38616,13 +40453,11 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
}
if (CC == X86::COND_B) {
- // X + SETB Z --> X + (mask SBB Z, Z)
- // X - SETB Z --> X - (mask SBB Z, Z)
- // TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY?
- SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG);
- if (SBB.getValueSizeInBits() != VT.getSizeInBits())
- SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
- return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ // X + SETB Z --> adc X, 0
+ // X - SETB Z --> sbb X, 0
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), Y.getOperand(1));
}
if (CC == X86::COND_A) {
@@ -38640,10 +40475,9 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG);
- if (SBB.getValueSizeInBits() != VT.getSizeInBits())
- SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
- return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), NewEFLAGS);
}
}
@@ -38713,23 +40547,23 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasSSE2())
return SDValue();
- SDValue MulOp = N->getOperand(0);
- SDValue Phi = N->getOperand(1);
-
- if (MulOp.getOpcode() != ISD::MUL)
- std::swap(MulOp, Phi);
- if (MulOp.getOpcode() != ISD::MUL)
- return SDValue();
-
- ShrinkMode Mode;
- if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
- return SDValue();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// If the vector size is less than 128, or greater than the supported RegSize,
// do not use PMADD.
- if (VT.getVectorNumElements() < 8)
+ if (!VT.isVector() || VT.getVectorNumElements() < 8)
+ return SDValue();
+
+ if (Op0.getOpcode() != ISD::MUL)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ ShrinkMode Mode;
+ if (!canReduceVMulWidth(Op0.getNode(), DAG, Mode) || Mode == MULU16)
return SDValue();
SDLoc DL(N);
@@ -38738,22 +40572,34 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
VT.getVectorNumElements() / 2);
- // Shrink the operands of mul.
- SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
- SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
-
// Madd vector size is half of the original vector size
auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
};
- SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
- PMADDWDBuilder);
- // Fill the rest of the output with 0
- SDValue Zero = getZeroVector(Madd.getSimpleValueType(), Subtarget, DAG, DL);
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
- return DAG.getNode(ISD::ADD, DL, VT, Concat, Phi);
+
+ auto BuildPMADDWD = [&](SDValue Mul) {
+ // Shrink the operands of mul.
+ SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1));
+
+ SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
+ PMADDWDBuilder);
+ // Fill the rest of the output with 0
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd,
+ DAG.getConstant(0, DL, MAddVT));
+ };
+
+ Op0 = BuildPMADDWD(Op0);
+
+ // It's possible that Op1 is also a mul we can reduce.
+ if (Op1.getOpcode() == ISD::MUL &&
+ canReduceVMulWidth(Op1.getNode(), DAG, Mode) && Mode != MULU16) {
+ Op1 = BuildPMADDWD(Op1);
+ }
+
+ return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
}
static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
@@ -38786,45 +40632,53 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
// We know N is a reduction add, which means one of its operands is a phi.
// To match SAD, we need the other operand to be a vector select.
- SDValue SelectOp, Phi;
- if (Op0.getOpcode() == ISD::VSELECT) {
- SelectOp = Op0;
- Phi = Op1;
- } else if (Op1.getOpcode() == ISD::VSELECT) {
- SelectOp = Op1;
- Phi = Op0;
- } else
- return SDValue();
+ if (Op0.getOpcode() != ISD::VSELECT)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) {
+ // SAD pattern detected. Now build a SAD instruction and an addition for
+ // reduction. Note that the number of elements of the result of SAD is less
+ // than the number of elements of its input. Therefore, we could only update
+ // part of elements in the reduction vector.
+ SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget);
+
+ // The output of PSADBW is a vector of i64.
+ // We need to turn the vector of i64 into a vector of i32.
+ // If the reduction vector is at least as wide as the psadbw result, just
+ // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
+ // anyway.
+ MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
+ if (VT.getSizeInBits() >= ResVT.getSizeInBits())
+ Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
+ else
+ Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
+
+ if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
+ // Fill the upper elements with zero to match the add width.
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
+ return Sad;
+ };
// Check whether we have an abs-diff pattern feeding into the select.
- if(!detectZextAbsDiff(SelectOp, Op0, Op1))
- return SDValue();
-
- // SAD pattern detected. Now build a SAD instruction and an addition for
- // reduction. Note that the number of elements of the result of SAD is less
- // than the number of elements of its input. Therefore, we could only update
- // part of elements in the reduction vector.
- SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget);
-
- // The output of PSADBW is a vector of i64.
- // We need to turn the vector of i64 into a vector of i32.
- // If the reduction vector is at least as wide as the psadbw result, just
- // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
- // anyway.
- MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
- if (VT.getSizeInBits() >= ResVT.getSizeInBits())
- Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
- else
- Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
+ SDValue SadOp0, SadOp1;
+ if (!detectZextAbsDiff(Op0, SadOp0, SadOp1))
+ return SDValue();
- if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
- // Fill the upper elements with zero to match the add width.
- SDValue Zero = DAG.getConstant(0, DL, VT);
- Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
- DAG.getIntPtrConstant(0, DL));
+ Op0 = BuildPSADBW(SadOp0, SadOp1);
+
+ // It's possible we have a sad on the other side too.
+ if (Op1.getOpcode() == ISD::VSELECT &&
+ detectZextAbsDiff(Op1, SadOp0, SadOp1)) {
+ Op1 = BuildPSADBW(SadOp0, SadOp1);
}
- return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
+ return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
}
/// Convert vector increment or decrement to sub/add with an all-ones constant:
@@ -38843,10 +40697,8 @@ static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
return SDValue();
- SDNode *N1 = N->getOperand(1).getNode();
APInt SplatVal;
- if (!ISD::isConstantSplatVector(N1, SplatVal) ||
- !SplatVal.isOneValue())
+ if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue())
return SDValue();
SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
@@ -38963,6 +40815,39 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
PMADDBuilder);
}
+// Try to turn (add (umax X, C), -C) into (psubus X, C)
+static SDValue combineAddToSUBUS(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasSSE2())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // psubus is available in SSE2 for i8 and i16 vectors.
+ if (!VT.isVector() || VT.getVectorNumElements() < 2 ||
+ !isPowerOf2_32(VT.getVectorNumElements()) ||
+ !(VT.getVectorElementType() == MVT::i8 ||
+ VT.getVectorElementType() == MVT::i16))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() != ISD::UMAX)
+ return SDValue();
+
+ // The add should have a constant that is the negative of the max.
+ // TODO: Handle build_vectors with undef elements.
+ auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+ return Max->getAPIntValue() == (-Op->getAPIntValue());
+ };
+ if (!ISD::matchBinaryPredicate(Op0.getOperand(1), Op1, MatchUSUBSAT))
+ return SDValue();
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, Op0.getOperand(0),
+ Op0.getOperand(1));
+}
+
// Attempt to turn this pattern into PMADDWD.
// (mul (add (zext (build_vector)), (zext (build_vector))),
// (add (zext (build_vector)), (zext (build_vector)))
@@ -39105,7 +40990,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal adds from adds of shuffles.
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
- Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true)) {
+ Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true) &&
+ shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
@@ -39117,6 +41003,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineIncDecVector(N, DAG))
return V;
+ if (SDValue V = combineAddToSUBUS(N, DAG, Subtarget))
+ return V;
+
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -39162,23 +41051,22 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
} else
return SDValue();
- auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
+ auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
};
// PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
// special preprocessing in some cases.
if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
- { SubusLHS, SubusRHS }, SUBUSBuilder);
+ { SubusLHS, SubusRHS }, USUBSATBuilder);
// Special preprocessing case can be only applied
// if the value was zero extended from 16 bit,
// so we require first 16 bits to be zeros for 32 bit
// values, or first 48 bits for 64 bit values.
- KnownBits Known;
- DAG.computeKnownBits(SubusLHS, Known);
+ KnownBits Known = DAG.computeKnownBits(SubusLHS);
unsigned NumZeros = Known.countMinLeadingZeros();
if ((VT == MVT::v8i64 && NumZeros < 48) || NumZeros < 16)
return SDValue();
@@ -39203,7 +41091,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
SDValue Psubus =
SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
- { NewSubusLHS, NewSubusRHS }, SUBUSBuilder);
+ { NewSubusLHS, NewSubusRHS }, USUBSATBuilder);
// Zero extend the result, it may be used somewhere as 32 bit,
// if not zext and following trunc will shrink.
return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
@@ -39236,7 +41124,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
- Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false)) {
+ Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false) &&
+ shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
@@ -39255,98 +41144,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
return combineAddOrSubToADCOrSBB(N, DAG);
}
-static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- SDLoc DL(N);
- unsigned Opcode = N->getOpcode();
- MVT VT = N->getSimpleValueType(0);
- MVT SVT = VT.getVectorElementType();
- unsigned NumElts = VT.getVectorNumElements();
- unsigned EltSizeInBits = SVT.getSizeInBits();
-
- SDValue Op = N->getOperand(0);
- MVT OpVT = Op.getSimpleValueType();
- MVT OpEltVT = OpVT.getVectorElementType();
- unsigned OpEltSizeInBits = OpEltVT.getSizeInBits();
- unsigned InputBits = OpEltSizeInBits * NumElts;
-
- // Perform any constant folding.
- // FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
- APInt UndefElts;
- SmallVector<APInt, 64> EltBits;
- if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) {
- APInt Undefs(NumElts, 0);
- SmallVector<APInt, 4> Vals(NumElts, APInt(EltSizeInBits, 0));
- bool IsZEXT =
- (Opcode == X86ISD::VZEXT) || (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (UndefElts[i]) {
- Undefs.setBit(i);
- continue;
- }
- Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits)
- : EltBits[i].sextOrTrunc(EltSizeInBits);
- }
- return getConstVector(Vals, Undefs, VT, DAG, DL);
- }
-
- // (vzext (bitcast (vzext (x)) -> (vzext x)
- // TODO: (vsext (bitcast (vsext (x)) -> (vsext x)
- SDValue V = peekThroughBitcasts(Op);
- if (Opcode == X86ISD::VZEXT && V != Op && V.getOpcode() == X86ISD::VZEXT) {
- MVT InnerVT = V.getSimpleValueType();
- MVT InnerEltVT = InnerVT.getVectorElementType();
-
- // If the element sizes match exactly, we can just do one larger vzext. This
- // is always an exact type match as vzext operates on integer types.
- if (OpEltVT == InnerEltVT) {
- assert(OpVT == InnerVT && "Types must match for vzext!");
- return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
- }
-
- // The only other way we can combine them is if only a single element of the
- // inner vzext is used in the input to the outer vzext.
- if (InnerEltVT.getSizeInBits() < InputBits)
- return SDValue();
-
- // In this case, the inner vzext is completely dead because we're going to
- // only look at bits inside of the low element. Just do the outer vzext on
- // a bitcast of the input to the inner.
- return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V));
- }
-
- // Check if we can bypass extracting and re-inserting an element of an input
- // vector. Essentially:
- // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
- // TODO: Add X86ISD::VSEXT support
- if (Opcode == X86ISD::VZEXT &&
- V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
- SDValue ExtractedV = V.getOperand(0);
- SDValue OrigV = ExtractedV.getOperand(0);
- if (isNullConstant(ExtractedV.getOperand(1))) {
- MVT OrigVT = OrigV.getSimpleValueType();
- // Extract a subvector if necessary...
- if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
- int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
- OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
- OrigVT.getVectorNumElements() / Ratio);
- OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
- DAG.getIntPtrConstant(0, DL));
- }
- Op = DAG.getBitcast(OpVT, OrigV);
- return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
- }
- }
-
- return SDValue();
-}
-
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
@@ -39354,9 +41151,9 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
if (N->getOperand(0) == N->getOperand(1)) {
if (N->getOpcode() == X86ISD::PCMPEQ)
- return getOnesVector(VT, DAG, DL);
+ return DAG.getConstant(-1, DL, VT);
if (N->getOpcode() == X86ISD::PCMPGT)
- return getZeroVector(VT, Subtarget, DAG, DL);
+ return DAG.getConstant(0, DL, VT);
}
return SDValue();
@@ -39487,11 +41284,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
return Ld;
}
}
- // If lower/upper loads are the same and the only users of the load, then
- // lower to a VBROADCASTF128/VBROADCASTI128/etc.
+ // If lower/upper loads are the same and there's no other use of the lower
+ // load, then splat the loaded value with a broadcast.
if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2)))
- if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
- SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode()))
+ if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse())
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
// If this is subv_broadcast insert into both halves, use a larger
@@ -39528,6 +41324,39 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ // For AVX1 only, if we are extracting from a 256-bit and+not (which will
+ // eventually get combined/lowered into ANDNP) with a concatenated operand,
+ // split the 'and' into 128-bit ops to avoid the concatenate and extract.
+ // We let generic combining take over from there to simplify the
+ // insert/extract and 'not'.
+ // This pattern emerges during AVX1 legalization. We handle it before lowering
+ // to avoid complications like splitting constant vector loads.
+
+ // Capture the original wide type in the likely case that we need to bitcast
+ // back to this type.
+ EVT VT = N->getValueType(0);
+ EVT WideVecVT = N->getOperand(0).getValueType();
+ SDValue WideVec = peekThroughBitcasts(N->getOperand(0));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
+ TLI.isTypeLegal(WideVecVT) &&
+ WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) {
+ auto isConcatenatedNot = [] (SDValue V) {
+ V = peekThroughBitcasts(V);
+ if (!isBitwiseNot(V))
+ return false;
+ SDValue NotOp = V->getOperand(0);
+ return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS;
+ };
+ if (isConcatenatedNot(WideVec.getOperand(0)) ||
+ isConcatenatedNot(WideVec.getOperand(1))) {
+ // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1
+ SDValue Concat = split256IntArith(WideVec, DAG);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
+ DAG.getBitcast(WideVecVT, Concat), N->getOperand(1));
+ }
+ }
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -39565,13 +41394,32 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0));
}
}
- if ((InOpcode == X86ISD::VZEXT || InOpcode == X86ISD::VSEXT) &&
+ if ((InOpcode == ISD::ZERO_EXTEND || InOpcode == ISD::SIGN_EXTEND) &&
OpVT.is128BitVector() &&
InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
- unsigned ExtOp = InOpcode == X86ISD::VZEXT ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG;
+ unsigned ExtOp =
+ InOpcode == ISD::ZERO_EXTEND ? ISD::ZERO_EXTEND_VECTOR_INREG
+ : ISD::SIGN_EXTEND_VECTOR_INREG;
return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
}
+ if ((InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) &&
+ OpVT.is128BitVector() &&
+ InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
+ return DAG.getNode(InOpcode, SDLoc(N), OpVT, InVec.getOperand(0));
+ }
+ if (InOpcode == ISD::BITCAST) {
+ // TODO - do this for target shuffles in general.
+ SDValue InVecBC = peekThroughOneUseBitcasts(InVec);
+ if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) {
+ SDLoc DL(N);
+ SDValue SubPSHUFB =
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
+ extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL),
+ extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL));
+ return DAG.getBitcast(OpVT, SubPSHUFB);
+ }
+ }
}
return SDValue();
@@ -39591,6 +41439,15 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
Src.getOperand(0));
+ // Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
+ if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
+ Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+ if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
+ if (C->isNullValue())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
+ Src.getOperand(0), Src.getOperand(1));
+
return SDValue();
}
@@ -39600,23 +41457,28 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ // Canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(RHS))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS);
+
+ // Multiply by zero.
+ if (ISD::isBuildVectorAllZeros(RHS.getNode()))
+ return RHS;
+
+ // Aggressively peek through ops to get at the demanded low bits.
+ APInt DemandedMask = APInt::getLowBitsSet(64, 32);
+ SDValue DemandedLHS = DAG.GetDemandedBits(LHS, DemandedMask);
+ SDValue DemandedRHS = DAG.GetDemandedBits(RHS, DemandedMask);
+ if (DemandedLHS || DemandedRHS)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ DemandedLHS ? DemandedLHS : LHS,
+ DemandedRHS ? DemandedRHS : RHS);
+
+ // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- APInt DemandedMask(APInt::getLowBitsSet(64, 32));
-
- // PMULQDQ/PMULUDQ only uses lower 32 bits from each vector element.
- KnownBits LHSKnown;
- if (TLI.SimplifyDemandedBits(LHS, DemandedMask, LHSKnown, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
return SDValue(N, 0);
- }
-
- KnownBits RHSKnown;
- if (TLI.SimplifyDemandedBits(RHS, DemandedMask, RHSKnown, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
- return SDValue(N, 0);
- }
return SDValue();
}
@@ -39638,9 +41500,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return combineExtractSubvector(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
case ISD::SELECT:
- case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
+ case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
+ case X86ISD::CMP: return combineCMP(N, DAG);
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
case ISD::SUB: return combineSub(N, DAG, Subtarget);
case X86ISD::SBB: return combineSBB(N, DAG);
@@ -39656,7 +41519,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, Subtarget);
- case ISD::MSTORE: return combineMaskedStore(N, DAG, Subtarget);
+ case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget);
case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
@@ -39672,6 +41535,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMAX: return combineFMinFMax(N, DAG);
case ISD::FMINNUM:
case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
+ case X86ISD::CVTSI2P:
+ case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
case X86ISD::BT: return combineBT(N, DAG, DCI);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
@@ -39682,14 +41547,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
case X86ISD::PACKSS:
case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget);
+ case X86ISD::VSHL:
+ case X86ISD::VSRA:
+ case X86ISD::VSRL:
+ return combineVectorShiftVar(N, DAG, DCI, Subtarget);
case X86ISD::VSHLI:
case X86ISD::VSRAI:
case X86ISD::VSRLI:
return combineVectorShiftImm(N, DAG, DCI, Subtarget);
- case ISD::SIGN_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG:
- case X86ISD::VSEXT:
- case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget);
case X86ISD::PINSRB:
case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
@@ -39751,10 +41616,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
-/// Return true if the target has native support for the specified value type
-/// and it is 'desirable' to use the type for the given node type. e.g. On x86
-/// i16 is legal, but undesirable since i16 instruction encodings are longer and
-/// some i16 instructions are slow.
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
@@ -39763,26 +41624,37 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
return false;
- if (VT != MVT::i16)
- return true;
-
- switch (Opc) {
- default:
- return true;
- case ISD::LOAD:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- case ISD::SHL:
- case ISD::SRL:
- case ISD::SUB:
- case ISD::ADD:
- case ISD::MUL:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
+ // 8-bit multiply is probably not much cheaper than 32-bit multiply, and
+ // we have specializations to turn 32-bit multiply into LEA or other ops.
+ // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
+ // check for a constant operand to the multiply.
+ if (Opc == ISD::MUL && VT == MVT::i8)
return false;
+
+ // i16 instruction encodings are longer and some i16 instructions are slow,
+ // so those are not desirable.
+ if (VT == MVT::i16) {
+ switch (Opc) {
+ default:
+ break;
+ case ISD::LOAD:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SUB:
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return false;
+ }
}
+
+ // Any legal type not explicitly accounted for above here is desirable.
+ return true;
}
SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
@@ -39801,12 +41673,16 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
}
-/// This method query the target whether it is beneficial for dag combiner to
-/// promote the specified node. If true, it should return the desired promotion
-/// type by reference.
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
- if (VT != MVT::i16)
+ bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&
+ isa<ConstantSDNode>(Op.getOperand(1));
+
+ // i16 is legal, but undesirable since i16 instruction encodings are longer
+ // and some i16 instructions are slow.
+ // 8-bit multiply-by-constant can usually be expanded to something cheaper
+ // using LEA and/or other ALU ops.
+ if (VT != MVT::i16 && !Is8BitMulByConstant)
return false;
auto IsFoldableRMW = [](SDValue Load, SDValue Op) {
@@ -39820,6 +41696,19 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
return Ld->getBasePtr() == St->getBasePtr();
};
+ auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) {
+ if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD)
+ return false;
+ if (!Op.hasOneUse())
+ return false;
+ SDNode *User = *Op->use_begin();
+ if (User->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+ auto *Ld = cast<AtomicSDNode>(Load);
+ auto *St = cast<AtomicSDNode>(User);
+ return Ld->getBasePtr() == St->getBasePtr();
+ };
+
bool Commute = false;
switch (Op.getOpcode()) {
default: return false;
@@ -39854,6 +41743,9 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
+ if (IsFoldableAtomicRMW(N0, Op) ||
+ (Commute && IsFoldableAtomicRMW(N1, Op)))
+ return false;
}
}
@@ -40593,44 +42485,33 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (!Res.second) {
// Map st(0) -> st(7) -> ST0
if (Constraint.size() == 7 && Constraint[0] == '{' &&
- tolower(Constraint[1]) == 's' &&
- tolower(Constraint[2]) == 't' &&
+ tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
Constraint[3] == '(' &&
(Constraint[4] >= '0' && Constraint[4] <= '7') &&
- Constraint[5] == ')' &&
- Constraint[6] == '}') {
-
- Res.first = X86::FP0+Constraint[4]-'0';
- Res.second = &X86::RFP80RegClass;
- return Res;
+ Constraint[5] == ')' && Constraint[6] == '}') {
+ // st(7) is not allocatable and thus not a member of RFP80. Return
+ // singleton class in cases where we have a reference to it.
+ if (Constraint[4] == '7')
+ return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
+ return std::make_pair(X86::FP0 + Constraint[4] - '0',
+ &X86::RFP80RegClass);
}
// GCC allows "st(0)" to be called just plain "st".
- if (StringRef("{st}").equals_lower(Constraint)) {
- Res.first = X86::FP0;
- Res.second = &X86::RFP80RegClass;
- return Res;
- }
+ if (StringRef("{st}").equals_lower(Constraint))
+ return std::make_pair(X86::FP0, &X86::RFP80RegClass);
// flags -> EFLAGS
- if (StringRef("{flags}").equals_lower(Constraint)) {
- Res.first = X86::EFLAGS;
- Res.second = &X86::CCRRegClass;
- return Res;
- }
+ if (StringRef("{flags}").equals_lower(Constraint))
+ return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
// 'A' means [ER]AX + [ER]DX.
if (Constraint == "A") {
- if (Subtarget.is64Bit()) {
- Res.first = X86::RAX;
- Res.second = &X86::GR64_ADRegClass;
- } else {
- assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
- "Expecting 64, 32 or 16 bit subtarget");
- Res.first = X86::EAX;
- Res.second = &X86::GR32_ADRegClass;
- }
- return Res;
+ if (Subtarget.is64Bit())
+ return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
+ assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
+ "Expecting 64, 32 or 16 bit subtarget");
+ return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
}
return Res;
}
@@ -40640,18 +42521,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
(isFRClass(*Res.second) || isGRClass(*Res.second)) &&
TRI->getEncodingValue(Res.first) >= 8) {
// Register requires REX prefix, but we're in 32-bit mode.
- Res.first = 0;
- Res.second = nullptr;
- return Res;
+ return std::make_pair(0, nullptr);
}
// Make sure it isn't a register that requires AVX512.
if (!Subtarget.hasAVX512() && isFRClass(*Res.second) &&
TRI->getEncodingValue(Res.first) & 0x10) {
// Register requires EVEX prefix.
- Res.first = 0;
- Res.second = nullptr;
- return Res;
+ return std::make_pair(0, nullptr);
}
// Otherwise, check to see if this is a register class of the wrong value
@@ -40679,14 +42556,36 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
: Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
: Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
- : &X86::GR64RegClass;
- if (RC->contains(DestReg))
- Res = std::make_pair(DestReg, RC);
- } else {
- // No register found/type mismatch.
- Res.first = 0;
- Res.second = nullptr;
+ : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr)
+ : nullptr;
+ if (Size == 64 && !is64Bit) {
+ // Model GCC's behavior here and select a fixed pair of 32-bit
+ // registers.
+ switch (Res.first) {
+ case X86::EAX:
+ return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
+ case X86::EDX:
+ return std::make_pair(X86::EDX, &X86::GR32_DCRegClass);
+ case X86::ECX:
+ return std::make_pair(X86::ECX, &X86::GR32_CBRegClass);
+ case X86::EBX:
+ return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass);
+ case X86::ESI:
+ return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass);
+ case X86::EDI:
+ return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass);
+ case X86::EBP:
+ return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass);
+ default:
+ return std::make_pair(0, nullptr);
+ }
+ }
+ if (RC && RC->contains(DestReg))
+ return std::make_pair(DestReg, RC);
+ return Res;
}
+ // No register found/type mismatch.
+ return std::make_pair(0, nullptr);
} else if (isFRClass(*Class)) {
// Handle references to XMM physical registers that got mapped into the
// wrong class. This can happen with constraints like {xmm0} where the
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index ff5006d208e5..910acd80e8b8 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -98,7 +98,7 @@ namespace llvm {
SETCC,
/// X86 Select
- SELECT, SELECTS,
+ SELECTS,
// Same as SETCC except it's materialized with a sbb and the value is all
// one's or all zero's.
@@ -203,8 +203,9 @@ namespace llvm {
/// Dynamic (non-constant condition) vector blend where only the sign bits
/// of the condition elements are used. This is used to enforce that the
- /// condition mask is not valid for generic VSELECT optimizations.
- SHRUNKBLEND,
+ /// condition mask is not valid for generic VSELECT optimizations. This
+ /// can also be used to implement the intrinsics.
+ BLENDV,
/// Combined add and sub on an FP vector.
ADDSUB,
@@ -226,14 +227,6 @@ namespace llvm {
SCALEF,
SCALEFS,
- // Integer add/sub with unsigned saturation.
- ADDUS,
- SUBUS,
-
- // Integer add/sub with signed saturation.
- ADDS,
- SUBS,
-
// Unsigned Integer average.
AVG,
@@ -295,22 +288,27 @@ namespace llvm {
// Vector move to low scalar and zero higher vector elements.
VZEXT_MOVL,
- // Vector integer zero-extend.
- VZEXT,
- // Vector integer signed-extend.
- VSEXT,
-
// Vector integer truncate.
VTRUNC,
// Vector integer truncate with unsigned/signed saturation.
VTRUNCUS, VTRUNCS,
+ // Masked version of the above. Used when less than a 128-bit result is
+ // produced since the mask only applies to the lower elements and can't
+ // be represented by a select.
+ // SRC, PASSTHRU, MASK
+ VMTRUNC, VMTRUNCUS, VMTRUNCS,
+
// Vector FP extend.
VFPEXT, VFPEXT_RND, VFPEXTS_RND,
// Vector FP round.
VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
+ // Masked version of above. Used for v2f64->v4f32.
+ // SRC, PASSTHRU, MASK
+ VMFPROUND,
+
// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -349,21 +347,14 @@ namespace llvm {
CMPM_RND,
// Arithmetic operations with FLAGS results.
- ADD, SUB, ADC, SBB, SMUL,
- INC, DEC, OR, XOR, AND,
+ ADD, SUB, ADC, SBB, SMUL, UMUL,
+ OR, XOR, AND,
// Bit field extract.
BEXTR,
- // LOW, HI, FLAGS = umul LHS, RHS.
- UMUL,
-
- // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
- SMUL8, UMUL8,
-
- // 8-bit divrem that zero-extend the high result (AH).
- UDIVREM8_ZEXT_HREG,
- SDIVREM8_SEXT_HREG,
+ // Zero High Bits Starting with Specified Bit Position.
+ BZHI,
// X86-specific multiply by immediate.
MUL_IMM,
@@ -513,16 +504,20 @@ namespace llvm {
// Vector float/double to signed/unsigned integer.
CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
// Scalar float/double to signed/unsigned integer.
- CVTS2SI_RND, CVTS2UI_RND,
+ CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
// Vector float/double to signed/unsigned integer with truncation.
CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
// Scalar float/double to signed/unsigned integer with truncation.
- CVTTS2SI_RND, CVTTS2UI_RND,
+ CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
// Vector signed/unsigned integer to float/double.
CVTSI2P, CVTUI2P,
+ // Masked versions of above. Used for v2f64->v4f32.
+ // SRC, PASSTHRU, MASK
+ MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
+
// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
@@ -570,6 +565,10 @@ namespace llvm {
// Conversions between float and half-float.
CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
+ // Masked version of above.
+ // SRC, RND, PASSTHRU, MASK
+ MCVTPS2PH,
+
// Galois Field Arithmetic Instructions
GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
@@ -588,7 +587,7 @@ namespace llvm {
/// LOCK-prefixed arithmetic read-modify-write instructions.
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
- LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
+ LADD, LSUB, LOR, LXOR, LAND,
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
@@ -833,6 +832,8 @@ namespace llvm {
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
}
+ bool shouldSplatInsEltVarIndex(EVT VT) const override;
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
@@ -866,10 +867,21 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth) const override;
- SDValue unwrapAddress(SDValue N) const override;
+ bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &KnownUndef,
+ APInt &KnownZero,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const override;
- bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
- int64_t &Offset) const override;
+ bool SimplifyDemandedBitsForTargetNode(SDValue Op,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const override;
+
+ SDValue unwrapAddress(SDValue N) const override;
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
@@ -932,6 +944,8 @@ namespace llvm {
/// the immediate into a register.
bool isLegalAddImmediate(int64_t Imm) const override;
+ bool isLegalStoreImmediate(int64_t Imm) const override;
+
/// Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
@@ -1030,13 +1044,25 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
+
bool convertSelectOfConstantsToMath(EVT VT) const override;
+ bool decomposeMulByConstant(EVT VT, SDValue C) const override;
+
+ bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+ bool IsSigned) const override;
+
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const override;
+ /// Scalar ops always have equal or better analysis/performance/power than
+ /// the vector equivalent, so this always makes sense if the scalar op is
+ /// supported.
+ bool shouldScalarizeBinop(SDValue) const override;
+
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
unsigned AddrSpace) const override {
// If we can replace more than 2 scalar stores, there will be a reduction
@@ -1095,7 +1121,7 @@ namespace llvm {
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
/// Customize the preferred legalization strategy for certain types.
- LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
+ LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
@@ -1347,11 +1373,6 @@ namespace llvm {
MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- /// Emit nodes that will be selected as "test Op0,Op0", or something
- /// equivalent, for use with the given x86 condition code.
- SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
- SelectionDAG &DAG) const;
-
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
@@ -1360,6 +1381,13 @@ namespace llvm {
/// Convert a comparison if required by the subtarget.
SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
+ /// Emit flags for the given setcc condition and operands. Also returns the
+ /// corresponding X86 condition code constant in X86CC.
+ SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
+ ISD::CondCode CC, const SDLoc &dl,
+ SelectionDAG &DAG,
+ SDValue &X86CC) const;
+
/// Check if replacement of SQRT with RSQRT should be disabled.
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
@@ -1407,9 +1435,9 @@ namespace llvm {
MachineMemOperand *MMO)
: MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
- const SDValue &getBasePtr() const { return getOperand(1); }
- const SDValue &getMask() const { return getOperand(2); }
- const SDValue &getValue() const { return getOperand(3); }
+ const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getBasePtr() const { return getOperand(2); }
+ const SDValue &getMask() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
@@ -1480,7 +1508,6 @@ namespace llvm {
const SDValue &getBasePtr() const { return getOperand(3); }
const SDValue &getIndex() const { return getOperand(4); }
const SDValue &getMask() const { return getOperand(2); }
- const SDValue &getValue() const { return getOperand(1); }
const SDValue &getScale() const { return getOperand(5); }
static bool classof(const SDNode *N) {
@@ -1496,6 +1523,8 @@ namespace llvm {
: X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
MMO) {}
+ const SDValue &getPassThru() const { return getOperand(1); }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == X86ISD::MGATHER;
}
@@ -1508,6 +1537,8 @@ namespace llvm {
: X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
MMO) {}
+ const SDValue &getValue() const { return getOperand(1); }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == X86ISD::MSCATTER;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp
new file mode 100644
index 000000000000..30b46a09ef0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -0,0 +1,253 @@
+//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass applies cache prefetch instructions based on a profile. The pass
+// assumes DiscriminateMemOps ran immediately before, to ensure debug info
+// matches the one used at profile generation time. The profile is encoded in
+// afdo format (text or binary). It contains prefetch hints recommendations.
+// Each recommendation is made in terms of debug info locations, a type (i.e.
+// nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a
+// memory operand (see X86DiscriminateMemOps). The prefetch will be made for
+// a location at that memory operand + the delta specified in the
+// recommendation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+using namespace llvm;
+using namespace sampleprof;
+
+static cl::opt<std::string>
+ PrefetchHintsFile("prefetch-hints-file",
+ cl::desc("Path to the prefetch hints profile."),
+ cl::Hidden);
+namespace {
+
+class X86InsertPrefetch : public MachineFunctionPass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool doInitialization(Module &) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ struct PrefetchInfo {
+ unsigned InstructionID;
+ int64_t Delta;
+ };
+ typedef SmallVectorImpl<PrefetchInfo> Prefetches;
+ bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI,
+ Prefetches &prefetches) const;
+
+public:
+ static char ID;
+ X86InsertPrefetch(const std::string &PrefetchHintsFilename);
+ StringRef getPassName() const override {
+ return "X86 Insert Cache Prefetches";
+ }
+
+private:
+ std::string Filename;
+ std::unique_ptr<SampleProfileReader> Reader;
+};
+
+using PrefetchHints = SampleRecord::CallTargetMap;
+
+// Return any prefetching hints for the specified MachineInstruction. The hints
+// are returned as pairs (name, delta).
+ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
+ const MachineInstr &MI) {
+ if (const auto &Loc = MI.getDebugLoc())
+ if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
+ return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
+ Loc->getBaseDiscriminator());
+ return std::error_code();
+}
+
+// The prefetch instruction can't take memory operands involving vector
+// registers.
+bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
+ unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
+ unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
+ return (BaseReg == 0 ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
+ (IndexReg == 0 ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg));
+}
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char X86InsertPrefetch::ID = 0;
+
+X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename)
+ : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {}
+
+/// Return true if the provided MachineInstruction has cache prefetch hints. In
+/// that case, the prefetch hints are stored, in order, in the Prefetches
+/// vector.
+bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
+ const MachineInstr &MI,
+ Prefetches &Prefetches) const {
+ assert(Prefetches.empty() &&
+ "Expected caller passed empty PrefetchInfo vector.");
+ static const std::pair<const StringRef, unsigned> HintTypes[] = {
+ {"_nta_", X86::PREFETCHNTA},
+ {"_t0_", X86::PREFETCHT0},
+ {"_t1_", X86::PREFETCHT1},
+ {"_t2_", X86::PREFETCHT2},
+ };
+ static const char *SerializedPrefetchPrefix = "__prefetch";
+
+ const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
+ if (!T)
+ return false;
+ int16_t max_index = -1;
+ // Convert serialized prefetch hints into PrefetchInfo objects, and populate
+ // the Prefetches vector.
+ for (const auto &S_V : *T) {
+ StringRef Name = S_V.getKey();
+ if (Name.consume_front(SerializedPrefetchPrefix)) {
+ int64_t D = static_cast<int64_t>(S_V.second);
+ unsigned IID = 0;
+ for (const auto &HintType : HintTypes) {
+ if (Name.startswith(HintType.first)) {
+ Name = Name.drop_front(HintType.first.size());
+ IID = HintType.second;
+ break;
+ }
+ }
+ if (IID == 0)
+ return false;
+ uint8_t index = 0;
+ Name.consumeInteger(10, index);
+
+ if (index >= Prefetches.size())
+ Prefetches.resize(index + 1);
+ Prefetches[index] = {IID, D};
+ max_index = std::max(max_index, static_cast<int16_t>(index));
+ }
+ }
+ assert(max_index + 1 >= 0 &&
+ "Possible overflow: max_index + 1 should be positive.");
+ assert(static_cast<size_t>(max_index + 1) == Prefetches.size() &&
+ "The number of prefetch hints received should match the number of "
+ "PrefetchInfo objects returned");
+ return !Prefetches.empty();
+}
+
+bool X86InsertPrefetch::doInitialization(Module &M) {
+ if (Filename.empty())
+ return false;
+
+ LLVMContext &Ctx = M.getContext();
+ ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr =
+ SampleProfileReader::create(Filename, Ctx);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg,
+ DiagnosticSeverity::DS_Warning));
+ return false;
+ }
+ Reader = std::move(ReaderOrErr.get());
+ Reader->read();
+ return true;
+}
+
+void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+}
+
+bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
+ if (!Reader)
+ return false;
+ const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction());
+ if (!Samples)
+ return false;
+
+ bool Changed = false;
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<PrefetchInfo, 4> Prefetches;
+ for (auto &MBB : MF) {
+ for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) {
+ auto Current = MI;
+ ++MI;
+
+ int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags);
+ if (Offset < 0)
+ continue;
+ unsigned Bias = X86II::getOperandBias(Current->getDesc());
+ int MemOpOffset = Offset + Bias;
+ // FIXME(mtrofin): ORE message when the recommendation cannot be taken.
+ if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset))
+ continue;
+ Prefetches.clear();
+ if (!findPrefetchInfo(Samples, *Current, Prefetches))
+ continue;
+ assert(!Prefetches.empty() &&
+ "The Prefetches vector should contain at least a value if "
+ "findPrefetchInfo returned true.");
+ for (auto &PrefInfo : Prefetches) {
+ unsigned PFetchInstrID = PrefInfo.InstructionID;
+ int64_t Delta = PrefInfo.Delta;
+ const MCInstrDesc &Desc = TII->get(PFetchInstrID);
+ MachineInstr *PFetch =
+ MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, PFetch);
+
+ assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
+ X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
+ X86::AddrSegmentReg == 4 &&
+ "Unexpected change in X86 operand offset order.");
+
+ // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
+ // FIXME(mtrofin): consider adding a:
+ // MachineInstrBuilder::set(unsigned offset, op).
+ MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg())
+ .addImm(
+ Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm())
+ .addReg(
+ Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg())
+ .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() +
+ Delta)
+ .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg)
+ .getReg());
+
+ if (!Current->memoperands_empty()) {
+ MachineMemOperand *CurrentOp = *(Current->memoperands_begin());
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize()));
+ }
+
+ // Insert before Current. This is because Current may clobber some of
+ // the registers used to describe the input memory operand.
+ MBB.insert(Current, PFetch);
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+FunctionPass *llvm::createX86InsertPrefetchPass() {
+ return new X86InsertPrefetch(PrefetchHintsFile);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
index 46dc6bf7661a..49e9e924887a 100644
--- a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -38,7 +38,7 @@ multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
@@ -51,7 +51,7 @@ multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn))
(bitconvert (load_mmx addr:$src))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 2d95061a8213..7423cb85acd2 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -66,21 +66,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
!if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
// Load patterns
- // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
- // due to load promotion during legalization
- PatFrag LdFrag = !cast<PatFrag>("load" #
- !if (!eq (TypeVariantName, "i"),
- !if (!eq (Size, 128), "v2i64",
- !if (!eq (Size, 256), "v4i64",
- !if (!eq (Size, 512), "v8i64",
- VTName))), VTName));
-
- PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
- !if (!eq (TypeVariantName, "i"),
- !if (!eq (Size, 128), "v2i64",
- !if (!eq (Size, 256), "v4i64",
- !if (!eq (Size, 512), "v8i64",
- VTName))), VTName));
+ PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
+
+ PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
@@ -107,10 +95,6 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
- // A vector tye of the same width with element type i64. This is used to
- // create patterns for logic ops.
- ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
-
// A vector type of the same width with element type i32. This is used to
// create the canonical constant zero node ImmAllZerosV.
ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
@@ -518,13 +502,13 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
- (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (From.VT (From.LdFrag addr:$src2)),
(iPTR imm)),
(vinsert_for_mask:$src3 (To.VT To.RC:$src1),
- (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (From.VT (From.LdFrag addr:$src2)),
(iPTR imm))>, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<From.EltSize, From.CD8TupleForm>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -547,7 +531,7 @@ multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
def : Pat<(vinsert_insert:$ins
(To.VT To.RC:$src1),
- (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (From.VT (From.LdFrag addr:$src2)),
(iPTR imm)),
(To.VT (!cast<Instruction>(InstrStr#"rm")
To.RC:$src1, addr:$src2,
@@ -680,9 +664,7 @@ let Predicates = p in {
(vselect Cast.KRCWM:$mask,
(bitconvert
(vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT
- (bitconvert
- (From.LdFrag addr:$src2))),
+ (From.VT (From.LdFrag addr:$src2)),
(iPTR imm))),
Cast.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#"rmkz")
@@ -783,7 +765,7 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>,
EVEX_4V, EVEX_CD8<32, CD8VT1>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
//===----------------------------------------------------------------------===//
@@ -1374,7 +1356,7 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(_Dst.VT (X86SubVBroadcast
- (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+ (_Src.VT (_Src.LdFrag addr:$src))))>,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
@@ -1389,7 +1371,7 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(null_frag),
(_Dst.VT (X86SubVBroadcast
- (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+ (_Src.VT (_Src.LdFrag addr:$src))))>,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
@@ -1442,11 +1424,11 @@ defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
(VBROADCASTF64X4rm addr:$src)>;
-def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
+def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
(VBROADCASTI64X4rm addr:$src)>;
-def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
+def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
(VBROADCASTI64X4rm addr:$src)>;
-def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
+def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
(VBROADCASTI64X4rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
@@ -1474,9 +1456,9 @@ def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
// Patterns for selects of bitcasted operations.
@@ -1506,11 +1488,11 @@ def : Pat<(vselect VK8WM:$mask,
VR512:$src0),
(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
(bc_v8i64 (v16i32 immAllZerosV))),
(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
VR512:$src0),
(VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -1527,9 +1509,9 @@ def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Patterns for selects of bitcasted operations.
@@ -1591,11 +1573,11 @@ def : Pat<(vselect VK4WM:$mask,
VR256X:$src0),
(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
(bc_v4i64 (v8i32 immAllZerosV))),
(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
VR256X:$src0),
(VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
@@ -1641,11 +1623,11 @@ def : Pat<(vselect VK8WM:$mask,
VR512:$src0),
(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
(bc_v8i64 (v16i32 immAllZerosV))),
(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
VR512:$src0),
(VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -1741,8 +1723,8 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
- (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
+ (_.VT (_.LdFrag addr:$src3)))), 1>,
+ EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -1758,7 +1740,7 @@ multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
(_.VT (X86VPermt2 _.RC:$src2,
IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
@@ -1859,8 +1841,8 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
(ins IdxVT.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
- (bitconvert (_.LdFrag addr:$src3)))), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
+ (_.LdFrag addr:$src3))), 1>,
+ EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
@@ -1874,7 +1856,7 @@ multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
(_.VT (X86VPermt2 _.RC:$src1,
IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
@@ -1955,19 +1937,19 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
}
}
}
@@ -1980,7 +1962,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
@@ -1988,7 +1970,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2),
@@ -1996,7 +1978,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2067,7 +2049,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
@@ -2094,7 +2076,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
@@ -2123,7 +2105,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
(_.ScalarLdFrag addr:$src2),
imm:$cc))]>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2149,8 +2131,8 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ (_.VT (_.LdFrag addr:$src2))))]>,
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = IsCommutable in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -2165,9 +2147,8 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert
- (_.LdFrag addr:$src2))))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ (_.VT (_.LdFrag addr:$src2)))))]>,
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
@@ -2180,7 +2161,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
"|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2),
@@ -2192,7 +2173,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))]>,
EVEX_4V, EVEX_K, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
@@ -2291,9 +2272,9 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
[(set _.KRC:$dst, (_.KVT
(Frag:$cc
(_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ (_.VT (_.LdFrag addr:$src2)),
cond)))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
@@ -2316,10 +2297,9 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
(_.KVT
(Frag:$cc
(_.VT _.RC:$src1),
- (_.VT (bitconvert
- (_.LdFrag addr:$src2))),
+ (_.VT (_.LdFrag addr:$src2)),
cond))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -2333,7 +2313,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"), []>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
u8imm:$cc),
@@ -2348,17 +2328,17 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"), []>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
- def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
+ def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
(_.VT _.RC:$src1), cond)),
(!cast<Instruction>(Name#_.ZSuffix#"rmi")
_.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
def : Pat<(and _.KRCWM:$mask,
- (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
+ (_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
(_.VT _.RC:$src1), cond))),
(!cast<Instruction>(Name#_.ZSuffix#"rmik")
_.KRCWM:$mask, _.RC:$src1, addr:$src2,
@@ -2380,7 +2360,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
cond)))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, AVX512ICC:$cc),
@@ -2393,7 +2373,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
cond))))]>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
@@ -2403,7 +2383,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
@@ -2411,7 +2391,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
@@ -2544,9 +2524,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(X86cmpm (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ (_.VT (_.LdFrag addr:$src2)),
imm:$cc)>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -2557,7 +2537,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
imm:$cc)>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
@@ -2573,7 +2553,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
@@ -2582,7 +2562,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"vcmp"#_.Suffix,
"$cc, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $cc">,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
@@ -2694,7 +2674,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set _.KRC:$dst,
(OpNode _.ScalarIntMemCPat:$src1,
(i32 imm:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
@@ -2702,7 +2682,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(OpNode _.ScalarIntMemCPat:$src1,
(i32 imm:$src2))))]>,
- EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2732,17 +2712,17 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr##_.Suffix##mem#
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode
- (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (_.VT (_.LdFrag addr:$src1)),
(i32 imm:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
- (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (_.VT (_.LdFrag addr:$src1)),
(i32 imm:$src2))))]>,
- EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
@@ -2752,7 +2732,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2)))]>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
@@ -2762,7 +2742,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))))]>,
- EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2907,8 +2887,15 @@ let Predicates = [HasDQI] in {
let Predicates = [HasAVX512] in {
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
+ def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
+ (KMOVWkm addr:$src)>;
}
+def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
+ SDTCVecEltisVT<1, i1>,
+ SDTCisPtrTy<2>]>>;
+
let Predicates = [HasAVX512] in {
multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
def : Pat<(maskVT (scalar_to_vector GR32:$src)),
@@ -2916,6 +2903,12 @@ let Predicates = [HasAVX512] in {
def : Pat<(maskVT (scalar_to_vector GR8:$src)),
(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
+
+ def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
+
+ def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
+ (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
}
defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
@@ -3353,7 +3346,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
!if(NoRMPattern, [],
[(set _.RC:$dst,
- (_.VT (bitconvert (ld_frag addr:$src))))]),
+ (_.VT (ld_frag addr:$src)))]),
_.ExeDomain>, EVEX, Sched<[Sched.RM]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
@@ -3372,7 +3365,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
"${dst} {${mask}}, $src1}"),
[(set _.RC:$dst, (_.VT
(vselect _.KRCWM:$mask,
- (_.VT (bitconvert (ld_frag addr:$src1))),
+ (_.VT (ld_frag addr:$src1)),
(_.VT _.RC:$src0))))], _.ExeDomain>,
EVEX, EVEX_K, Sched<[Sched.RM]>;
}
@@ -3381,7 +3374,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
"${dst} {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
- (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
+ (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
_.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
}
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
@@ -3474,7 +3467,7 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
[], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
NotMemoryFoldable;
- def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
+ def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
(!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
_.KRCWM:$mask, _.RC:$src)>;
@@ -3681,6 +3674,20 @@ let Predicates = [HasBWI, NoVLX] in {
}
let Predicates = [HasAVX512] in {
+ // 512-bit load.
+ def : Pat<(alignedloadv16i32 addr:$src),
+ (VMOVDQA64Zrm addr:$src)>;
+ def : Pat<(alignedloadv32i16 addr:$src),
+ (VMOVDQA64Zrm addr:$src)>;
+ def : Pat<(alignedloadv64i8 addr:$src),
+ (VMOVDQA64Zrm addr:$src)>;
+ def : Pat<(loadv16i32 addr:$src),
+ (VMOVDQU64Zrm addr:$src)>;
+ def : Pat<(loadv32i16 addr:$src),
+ (VMOVDQU64Zrm addr:$src)>;
+ def : Pat<(loadv64i8 addr:$src),
+ (VMOVDQU64Zrm addr:$src)>;
+
// 512-bit store.
def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
@@ -3697,6 +3704,20 @@ let Predicates = [HasAVX512] in {
}
let Predicates = [HasVLX] in {
+ // 128-bit load.
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVDQA64Z128rm addr:$src)>;
+ def : Pat<(alignedloadv8i16 addr:$src),
+ (VMOVDQA64Z128rm addr:$src)>;
+ def : Pat<(alignedloadv16i8 addr:$src),
+ (VMOVDQA64Z128rm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVDQU64Z128rm addr:$src)>;
+ def : Pat<(loadv8i16 addr:$src),
+ (VMOVDQU64Z128rm addr:$src)>;
+ def : Pat<(loadv16i8 addr:$src),
+ (VMOVDQU64Z128rm addr:$src)>;
+
// 128-bit store.
def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
@@ -3711,6 +3732,20 @@ let Predicates = [HasVLX] in {
def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
+ // 256-bit load.
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVDQA64Z256rm addr:$src)>;
+ def : Pat<(alignedloadv16i16 addr:$src),
+ (VMOVDQA64Z256rm addr:$src)>;
+ def : Pat<(alignedloadv32i8 addr:$src),
+ (VMOVDQA64Z256rm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVDQU64Z256rm addr:$src)>;
+ def : Pat<(loadv16i16 addr:$src),
+ (VMOVDQU64Z256rm addr:$src)>;
+ def : Pat<(loadv32i8 addr:$src),
+ (VMOVDQU64Z256rm addr:$src)>;
+
// 256-bit store.
def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
@@ -4029,10 +4064,10 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC> {
-def : Pat<(masked_store addr:$dst, Mask,
+def : Pat<(masked_store
(_.info512.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
- (iPTR 0)))),
+ (iPTR 0))), addr:$dst, Mask),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4044,10 +4079,10 @@ multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
dag Mask, RegisterClass MaskRC,
SubRegIndex subreg> {
-def : Pat<(masked_store addr:$dst, Mask,
+def : Pat<(masked_store
(_.info512.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
- (iPTR 0)))),
+ (iPTR 0))), addr:$dst, Mask),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4064,16 +4099,16 @@ multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
SubRegIndex subreg> {
// AVX512F pattern.
-def : Pat<(masked_store addr:$dst, Mask512,
+def : Pat<(masked_store
(_.info512.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
- (iPTR 0)))),
+ (iPTR 0))), addr:$dst, Mask512),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
// AVX512VL pattern.
-def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
+def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4421,8 +4456,6 @@ let Predicates = [HasAVX512] in {
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
- def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
@@ -4497,7 +4530,7 @@ let Predicates = [HasAVX512] in {
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
@@ -4593,6 +4626,12 @@ let Predicates = [HasAVX512], AddedComplexity = 400 in {
(VMOVNTDQAZrm addr:$src)>;
def : Pat<(v8i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZrm addr:$src)>;
+ def : Pat<(v16i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZrm addr:$src)>;
+ def : Pat<(v32i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZrm addr:$src)>;
+ def : Pat<(v64i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZrm addr:$src)>;
}
let Predicates = [HasVLX], AddedComplexity = 400 in {
@@ -4609,6 +4648,12 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(v4i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ256rm addr:$src)>;
+ def : Pat<(v8i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ256rm addr:$src)>;
+ def : Pat<(v16i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ256rm addr:$src)>;
+ def : Pat<(v32i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
@@ -4623,6 +4668,12 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
(VMOVNTDQAZ128rm addr:$src)>;
def : Pat<(v2i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ128rm addr:$src)>;
+ def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ128rm addr:$src)>;
+ def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ128rm addr:$src)>;
+ def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ128rm addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -4641,10 +4692,9 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src2))))>,
+ (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4659,7 +4709,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4773,9 +4823,9 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
- (bitconvert (_Src.LdFrag addr:$src2))))>,
+ (_Src.LdFrag addr:$src2)))>,
AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
@@ -4786,20 +4836,20 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
(_Brdct.VT (X86VBroadcast
(_Brdct.ScalarLdFrag addr:$src2))))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
SchedWriteVecALU, 1>;
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
SchedWriteVecALU, 0>;
-defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
+defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
SchedWriteVecALU, HasBWI, 1>;
-defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
+defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
SchedWriteVecALU, HasBWI, 0>;
-defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
+defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
SchedWriteVecALU, HasBWI, 1>;
-defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
+defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
SchedWriteVecALU, HasBWI, 0>;
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
SchedWritePMULLD, HasAVX512, 1>, T8PD;
@@ -4859,7 +4909,7 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_Src.VT (X86VBroadcast
(_Src.ScalarLdFrag addr:$src2))))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
@@ -4878,9 +4928,9 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
- (bitconvert (_Src.LdFrag addr:$src2))))>,
+ (_Src.LdFrag addr:$src2)))>,
EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
@@ -5046,95 +5096,356 @@ let Predicates = [HasAVX512, NoVLX] in {
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
-// OpNodeMsk is the OpNode to use when element size is important. OpNode will
-// be set to null_frag for 32-bit elements.
-multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
- SDNode OpNodeMsk, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, bit IsCommutable = 0> {
- let hasSideEffects = 0 in
- defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
- "$src2, $src1", "$src1, $src2",
- (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
- (bitconvert (_.VT _.RC:$src2)))),
- (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
- _.RC:$src2)))),
- IsCommutable>, AVX512BIBase, EVEX_4V,
- Sched<[sched]>;
+defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
+ SchedWriteVecLogic, HasAVX512, 1>;
+defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
+ SchedWriteVecLogic, HasAVX512, 1>;
+defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
+ SchedWriteVecLogic, HasAVX512, 1>;
+defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
+ SchedWriteVecLogic, HasAVX512>;
- let hasSideEffects = 0, mayLoad = 1 in
- defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
- "$src2, $src1", "$src1, $src2",
- (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
- (bitconvert (_.LdFrag addr:$src2)))),
- (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src2))))))>,
- AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+let Predicates = [HasVLX] in {
+ def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
+ (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
+ (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+ def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
+ (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
+ (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+ def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
+ (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
+ (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+ def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
+ (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
+ (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+ def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
+ (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
+ (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
+ (VPORQZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
+ (VPORQZ128rm VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
+ (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
+ (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
+ (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
+ (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(and VR128X:$src1,
+ (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(or VR128X:$src1,
+ (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(xor VR128X:$src1,
+ (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128X:$src1,
+ (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(and VR128X:$src1,
+ (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(or VR128X:$src1,
+ (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(xor VR128X:$src1,
+ (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128X:$src1,
+ (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
+ (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
+ def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
+ (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+ def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
+ (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
+ def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
+ (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+ def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
+ (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
+ def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
+ (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+ def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
+ (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
+ def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
+ (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+ def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
+ (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
+ def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
+ (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
+ (VPORQZ256rm VR256X:$src1, addr:$src2)>;
+ def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
+ (VPORQZ256rm VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
+ (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
+ def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
+ (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
+ (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
+ (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(and VR256X:$src1,
+ (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(or VR256X:$src1,
+ (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(xor VR256X:$src1,
+ (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256X:$src1,
+ (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(and VR256X:$src1,
+ (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(or VR256X:$src1,
+ (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(xor VR256X:$src1,
+ (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256X:$src1,
+ (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
}
-// OpNodeMsk is the OpNode to use where element size is important. So use
-// for all of the broadcast patterns.
-multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
- SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
- bit IsCommutable = 0> :
- avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
- IsCommutable> {
- defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
- (_.i64VT (OpNodeMsk _.RC:$src1,
- (bitconvert
- (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))))),
- (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
- (bitconvert
- (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))))))>,
- AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+let Predicates = [HasAVX512] in {
+ def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
+ (VPANDQZrr VR512:$src1, VR512:$src2)>;
+ def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
+ (VPANDQZrr VR512:$src1, VR512:$src2)>;
+
+ def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
+ (VPORQZrr VR512:$src1, VR512:$src2)>;
+ def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
+ (VPORQZrr VR512:$src1, VR512:$src2)>;
+
+ def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
+ (VPXORQZrr VR512:$src1, VR512:$src2)>;
+ def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
+ (VPXORQZrr VR512:$src1, VR512:$src2)>;
+
+ def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
+ (VPANDNQZrr VR512:$src1, VR512:$src2)>;
+ def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
+ (VPANDNQZrr VR512:$src1, VR512:$src2)>;
+
+ def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
+ (VPANDQZrm VR512:$src1, addr:$src2)>;
+ def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
+ (VPANDQZrm VR512:$src1, addr:$src2)>;
+
+ def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
+ (VPORQZrm VR512:$src1, addr:$src2)>;
+ def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
+ (VPORQZrm VR512:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
+ (VPXORQZrm VR512:$src1, addr:$src2)>;
+ def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
+ (VPXORQZrm VR512:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
+ (VPANDNQZrm VR512:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
+ (VPANDNQZrm VR512:$src1, addr:$src2)>;
+
+ def : Pat<(and VR512:$src1,
+ (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDDZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(or VR512:$src1,
+ (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPORDZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(xor VR512:$src1,
+ (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPXORDZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR512:$src1,
+ (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDNDZrmb VR512:$src1, addr:$src2)>;
+
+ def : Pat<(and VR512:$src1,
+ (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDQZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(or VR512:$src1,
+ (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPORQZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(xor VR512:$src1,
+ (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPXORQZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR512:$src1,
+ (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDNQZrmb VR512:$src1, addr:$src2)>;
+}
+
+// Patterns to catch vselect with different type than logic op.
+multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
+ X86VectorVTInfo _,
+ X86VectorVTInfo IntInfo> {
+ // Masked register-register logical operations.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, _.RC:$src2)>;
+
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
+ _.RC:$src2)>;
+
+ // Masked register-memory logical operations.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
+ (load addr:$src2)))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
+ (load addr:$src2)))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
+ addr:$src2)>;
}
-multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
- SDNode OpNodeMsk, X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo VTInfo,
- bit IsCommutable = 0> {
- let Predicates = [HasAVX512] in
- defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
- VTInfo.info512, IsCommutable>, EVEX_V512;
+multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
+ X86VectorVTInfo _,
+ X86VectorVTInfo IntInfo> {
+ // Register-broadcast logical operations.
+ def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))))),
+ (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert
+ (IntInfo.VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert
+ (IntInfo.VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+}
- let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
- VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
- VTInfo.info128, IsCommutable>, EVEX_V128;
- }
+multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
+ AVX512VLVectorVTInfo SelectInfo,
+ AVX512VLVectorVTInfo IntInfo> {
+let Predicates = [HasVLX] in {
+ defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
+ IntInfo.info128>;
+ defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
+ IntInfo.info256>;
+}
+let Predicates = [HasAVX512] in {
+ defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
+ IntInfo.info512>;
+}
}
-multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
- SDNode OpNode, X86SchedWriteWidths sched,
- bit IsCommutable = 0> {
- defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
- avx512vl_i64_info, IsCommutable>,
- VEX_W, EVEX_CD8<64, CD8VF>;
- defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
- avx512vl_i32_info, IsCommutable>,
- EVEX_CD8<32, CD8VF>;
-}
-
-defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
- SchedWriteVecLogic, 1>;
-defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
- SchedWriteVecLogic, 1>;
-defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
- SchedWriteVecLogic, 1>;
-defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
- SchedWriteVecLogic>;
+multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
+ AVX512VLVectorVTInfo SelectInfo,
+ AVX512VLVectorVTInfo IntInfo> {
+let Predicates = [HasVLX] in {
+ defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
+ SelectInfo.info128, IntInfo.info128>;
+ defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
+ SelectInfo.info256, IntInfo.info256>;
+}
+let Predicates = [HasAVX512] in {
+ defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
+ SelectInfo.info512, IntInfo.info512>;
+}
+}
+
+multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
+ // i64 vselect with i32/i16/i8 logic op
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+ avx512vl_i32_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+ avx512vl_i16_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+ avx512vl_i8_info>;
+
+ // i32 vselect with i64/i16/i8 logic op
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+ avx512vl_i64_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+ avx512vl_i16_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+ avx512vl_i8_info>;
+
+ // f32 vselect with i64/i32/i16/i8 logic op
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+ avx512vl_i64_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+ avx512vl_i32_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+ avx512vl_i16_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+ avx512vl_i8_info>;
+
+ // f64 vselect with i64/i32/i16/i8 logic op
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+ avx512vl_i64_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+ avx512vl_i32_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+ avx512vl_i16_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+ avx512vl_i8_info>;
+
+ defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
+ avx512vl_f32_info,
+ avx512vl_i32_info>;
+ defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
+ avx512vl_f64_info,
+ avx512vl_i64_info>;
+}
+
+defm : avx512_logical_lowering_types<"VPAND", and>;
+defm : avx512_logical_lowering_types<"VPOR", or>;
+defm : avx512_logical_lowering_types<"VPXOR", xor>;
+defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@@ -5157,7 +5468,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
@@ -5171,7 +5482,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
@@ -5202,7 +5513,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -5217,7 +5528,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5285,7 +5596,7 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
@@ -5320,7 +5631,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
@@ -5328,7 +5639,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))))>,
EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
@@ -5439,73 +5750,6 @@ defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
-// Patterns catch floating point selects with bitcasted integer logic ops.
-multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
- X86VectorVTInfo _, Predicate prd> {
-let Predicates = [prd] in {
- // Masked register-register logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
- _.RC:$src1, _.RC:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
- _.RC:$src2)>;
- // Masked register-memory logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert (_.i64VT (OpNode _.RC:$src1,
- (load addr:$src2)))),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
- _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
- addr:$src2)>;
- // Register-broadcast logical operations.
- def : Pat<(_.i64VT (OpNode _.RC:$src1,
- (bitconvert (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))))),
- (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert
- (_.i64VT (OpNode _.RC:$src1,
- (bitconvert (_.VT
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))))),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
- _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert
- (_.i64VT (OpNode _.RC:$src1,
- (bitconvert (_.VT
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
- _.RC:$src1, addr:$src2)>;
-}
-}
-
-multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
- defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
- defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
- defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
- defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
- defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
- defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
-}
-
-defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
-defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
-defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
-defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
-
let Predicates = [HasVLX,HasDQI] in {
// Use packed logical operations for scalar ops.
def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
@@ -5563,7 +5807,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
@@ -5571,7 +5815,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5588,7 +5832,7 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5635,18 +5879,15 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV)>,
+ (OpNode (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)>,
EVEX_4V, Sched<[sched]>;
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (bitconvert
- (_.i64VT (and _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src2))))),
+ (OpNode (and _.RC:$src1, (_.LdFrag addr:$src2)),
_.ImmAllZerosV)>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Patterns for compare with 0 that just use the same source twice.
@@ -5671,13 +5912,13 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
(_.ScalarLdFrag addr:$src2))),
_.ImmAllZerosV)>,
EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
X86VectorVTInfo _, string Name> {
- def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+ def : Pat<(_.KVT (OpNode (and _.RC:$src1, _.RC:$src2),
_.ImmAllZerosV)),
(_.KVT (COPY_TO_REGCLASS
(!cast<Instruction>(Name # "Zrr")
@@ -5688,7 +5929,7 @@ multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
_.KRC))>;
def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+ (OpNode (and _.RC:$src1, _.RC:$src2),
_.ImmAllZerosV))),
(COPY_TO_REGCLASS
(!cast<Instruction>(Name # "Zrrk")
@@ -5765,7 +6006,7 @@ multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
v16i8x_info, NAME#"B">, EVEX_V128;
}
- let Predicates = [HasAVX512, NoVLX] in {
+ let Predicates = [HasBWI, NoVLX] in {
defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
@@ -5791,6 +6032,125 @@ defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
SchedWriteVecLogic>, T8XS;
+
+multiclass avx512_vptest_lowering_pats<string InstrStr, PatFrag OpNode,
+ X86VectorVTInfo _,
+ X86VectorVTInfo AndInfo> {
+ def : Pat<(_.KVT (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr # "rr") _.RC:$src1, _.RC:$src2)>;
+
+ def : Pat<(_.KVT (and _.KRC:$mask,
+ (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV))),
+ (!cast<Instruction>(InstrStr # "rrk") _.KRC:$mask, _.RC:$src1,
+ _.RC:$src2)>;
+
+ def : Pat<(_.KVT (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1,
+ (AndInfo.LdFrag addr:$src2)))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr # "rm") _.RC:$src1, addr:$src2)>;
+
+ def : Pat<(_.KVT (and _.KRC:$mask,
+ (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1,
+ (AndInfo.LdFrag addr:$src2)))),
+ _.ImmAllZerosV))),
+ (!cast<Instruction>(InstrStr # "rmk") _.KRC:$mask, _.RC:$src1,
+ addr:$src2)>;
+}
+
+// Patterns to use 512-bit instructions when 128/256 are not available.
+multiclass avx512_vptest_lowering_wide_pats<string InstrStr, PatFrag OpNode,
+ X86VectorVTInfo _,
+ X86VectorVTInfo AndInfo,
+ X86VectorVTInfo ExtendInfo> {
+ def : Pat<(_.KVT (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV)),
+ (_.KVT (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstrStr#"rr")
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src1, _.SubRegIdx),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src2, _.SubRegIdx)),
+ _.KRC))>;
+
+ def : Pat<(_.KVT (and _.KRC:$mask,
+ (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstrStr#"rrk")
+ (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src1, _.SubRegIdx),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src2, _.SubRegIdx)),
+ _.KRC)>;
+}
+
+multiclass avx512_vptest_lowering_sizes<string InstrStr, PatFrag OpNode,
+ Predicate prd,
+ AVX512VLVectorVTInfo CmpInfo,
+ AVX512VLVectorVTInfo AndInfo> {
+let Predicates = [prd, HasVLX] in {
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z128", OpNode,
+ CmpInfo.info128, AndInfo.info128>;
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z256", OpNode,
+ CmpInfo.info256, AndInfo.info256>;
+}
+let Predicates = [prd] in {
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z", OpNode,
+ CmpInfo.info512, AndInfo.info512>;
+}
+
+let Predicates = [prd, NoVLX] in {
+ defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
+ CmpInfo.info128, AndInfo.info128,
+ CmpInfo.info512>;
+ defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
+ CmpInfo.info256, AndInfo.info256,
+ CmpInfo.info512>;
+}
+}
+
+multiclass avx512_vptest_lowering_types<string InstrStr, PatFrag OpNode> {
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+ avx512vl_i8_info, avx512vl_i16_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+ avx512vl_i8_info, avx512vl_i32_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+ avx512vl_i8_info, avx512vl_i64_info>;
+
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ avx512vl_i16_info, avx512vl_i8_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ avx512vl_i16_info, avx512vl_i32_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ avx512vl_i16_info, avx512vl_i64_info>;
+
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ avx512vl_i32_info, avx512vl_i8_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ avx512vl_i32_info, avx512vl_i16_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ avx512vl_i32_info, avx512vl_i64_info>;
+
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ avx512vl_i64_info, avx512vl_i8_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ avx512vl_i64_info, avx512vl_i16_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ avx512vl_i64_info, avx512vl_i32_info>;
+}
+
+defm : avx512_vptest_lowering_types<"VPTESTM", X86pcmpnem>;
+defm : avx512_vptest_lowering_types<"VPTESTNM", X86pcmpeqm>;
+
//===----------------------------------------------------------------------===//
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
@@ -5807,7 +6167,7 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
(i8 imm:$src2)))>,
Sched<[sched.Folded]>;
}
@@ -5826,7 +6186,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, ValueType SrcVT,
- PatFrag bc_frag, X86VectorVTInfo _> {
+ X86VectorVTInfo _> {
// src2 is always 128-bit
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5837,26 +6197,26 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
+ (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
AVX512BIBase,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched, ValueType SrcVT,
- PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo VTInfo,
Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
- bc_frag, VTInfo.info512>, EVEX_V512,
+ VTInfo.info512>, EVEX_V512,
EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
- bc_frag, VTInfo.info256>, EVEX_V256,
+ VTInfo.info256>, EVEX_V256,
EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
- bc_frag, VTInfo.info128>, EVEX_V128,
+ VTInfo.info128>, EVEX_V128,
EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
}
}
@@ -5866,12 +6226,12 @@ multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
X86SchedWriteWidths sched,
bit NotEVEX2VEXConvertibleQ = 0> {
defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
- bc_v4i32, avx512vl_i32_info, HasAVX512>;
+ avx512vl_i32_info, HasAVX512>;
let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
- bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
+ avx512vl_i64_info, HasAVX512>, VEX_W;
defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
- bc_v2i64, avx512vl_i16_info, HasBWI>;
+ avx512vl_i16_info, HasBWI>;
}
multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
@@ -5991,9 +6351,9 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
- (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
+ (_.VT (_.LdFrag addr:$src2))))>,
AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6007,7 +6367,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))>,
AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -6091,7 +6451,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
_.RC:$src2)>;
- def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
+ def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rm)
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
@@ -6099,7 +6459,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
(!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
+ (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
@@ -6108,7 +6468,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
(!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
_.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
+ (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
_.RC:$src1, addr:$src2)>;
@@ -6333,9 +6693,9 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode
_.RC:$src1,
- (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
+ (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
@@ -6345,7 +6705,7 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
(Ctrl.VT (X86VBroadcast
(Ctrl.ScalarLdFrag addr:$src2)))))>,
T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
@@ -6448,7 +6808,7 @@ multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
(OpNode _.RC:$src1,
(_.VT (bitconvert
(v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
}
// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
@@ -6524,7 +6884,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6532,7 +6892,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
_.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6598,7 +6958,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6607,7 +6967,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (OpNode _.RC:$src2,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6675,7 +7035,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -6685,7 +7045,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, ${src3}"##_.BroadcastStr,
(_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6750,7 +7110,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
@@ -6767,7 +7127,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
+ [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
@@ -7069,7 +7429,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -7078,7 +7438,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src2,
(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
_.RC:$src1)>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
} // Constraints = "$src1 = $dst"
@@ -7120,7 +7480,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched
def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
let isCodeGenOnly = 1 in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
@@ -7139,7 +7499,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched
(OpNode (DstVT.VT DstVT.RC:$src1),
(ld_frag addr:$src2),
(i32 FROUND_CURRENT)))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}//isCodeGenOnly = 1
}
@@ -7246,26 +7606,26 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
+ SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
string aliasStr,
bit CodeGenOnly = 1> {
let Predicates = [HasAVX512] in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
+ [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
EVEX, VEX_LIG, Sched<[sched]>;
def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
- [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
+ [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
EVEX, VEX_LIG, EVEX_B, EVEX_RC,
Sched<[sched]>;
let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
- (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
- (i32 FROUND_CURRENT)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
@@ -7276,9 +7636,10 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
+ SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
string aliasStr> :
- avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> {
+ avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, OpNodeRnd, sched, asm, aliasStr, 0> {
let Predicates = [HasAVX512] in {
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
@@ -7287,52 +7648,31 @@ multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
}
// Convert float/double to signed/unsigned int 32/64
-defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
- X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">,
+defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
- X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">,
+defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
- X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
- X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
- X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">,
+defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
- X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">,
+defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
- X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">,
+defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
- X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-// The SSE version of these instructions are disabled for AVX512.
-// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
-let Predicates = [HasAVX512] in {
- def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
- (VCVTSS2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
- (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
- def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
- (VCVTSS2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
- (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
- (VCVTSD2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
- (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
- (VCVTSD2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
- (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
-} // HasAVX512
-
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
// which produce unnecessary vmovs{s,d} instructions
let Predicates = [HasAVX512] in {
@@ -7420,8 +7760,9 @@ def : Pat<(v2f64 (X86Movsd
// Convert float/double to signed/unsigned int 32/64 with truncation
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,
- SDNode OpNodeRnd, X86FoldableSchedWrite sched,
- string aliasStr, bit CodeGenOnly = 1>{
+ SDNode OpNodeInt, SDNode OpNodeRnd,
+ X86FoldableSchedWrite sched, string aliasStr,
+ bit CodeGenOnly = 1>{
let Predicates = [HasAVX512] in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
@@ -7431,13 +7772,12 @@ let Predicates = [HasAVX512] in {
def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
- EVEX, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
- (i32 FROUND_CURRENT)))]>,
+ [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
EVEX, VEX_LIG, Sched<[sched]>;
def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
@@ -7448,10 +7788,9 @@ let Predicates = [HasAVX512] in {
def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
(ins _SrcRC.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set _DstRC.RC:$dst, (OpNodeRnd
- (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
- (i32 FROUND_CURRENT)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ [(set _DstRC.RC:$dst,
+ (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
@@ -7463,9 +7802,10 @@ let Predicates = [HasAVX512] in {
multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,
- SDNode OpNodeRnd, X86FoldableSchedWrite sched,
+ SDNode OpNodeInt, SDNode OpNodeRnd,
+ X86FoldableSchedWrite sched,
string aliasStr> :
- avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched,
+ avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeInt, OpNodeRnd, sched,
aliasStr, 0> {
let Predicates = [HasAVX512] in {
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
@@ -7475,49 +7815,30 @@ let Predicates = [HasAVX512] in {
}
defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
- fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">,
- XS, EVEX_CD8<32, CD8VT1>;
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+ "{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
- fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">,
- VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+ "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
- fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">,
- XD, EVEX_CD8<64, CD8VT1>;
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+ "{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
- fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">,
- VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+ "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
- fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">,
- XS, EVEX_CD8<32, CD8VT1>;
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+ "{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
- fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">,
- XS,VEX_W, EVEX_CD8<32, CD8VT1>;
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+ "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
- fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">,
- XD, EVEX_CD8<64, CD8VT1>;
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+ "{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
- fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">,
- XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-
-let Predicates = [HasAVX512] in {
- def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
- (VCVTTSS2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
- (VCVTTSS2SIZrm_Int ssmem:$src)>;
- def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
- (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
- (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
- (VCVTTSD2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
- (VCVTTSD2SIZrm_Int sdmem:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
- (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
- (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
-} // HasAVX512
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+ "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
@@ -7540,7 +7861,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(_Src.VT _Src.ScalarIntMemCPat:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -7551,7 +7872,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -7648,26 +7969,53 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode,
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
- string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
+ string Alias = "", X86MemOperand MemOp = _Src.MemOp,
+ RegisterClass MaskRC = _.KRCWM> {
- defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
+ defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src),
+ (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
+ (ins MaskRC:$mask, _Src.RC:$src),
+ OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+ (vselect MaskRC:$mask,
+ (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
EVEX, Sched<[sched]>;
- defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
+ defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins MemOp:$src),
+ (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
+ (ins MaskRC:$mask, MemOp:$src),
+ OpcodeStr#Alias, "$src", "$src",
(_.VT (OpNode (_Src.VT
- (bitconvert (_Src.LdFrag addr:$src)))))>,
+ (_Src.LdFrag addr:$src)))),
+ (vselect MaskRC:$mask,
+ (_.VT (OpNode (_Src.VT
+ (_Src.LdFrag addr:$src)))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
EVEX, Sched<[sched.Folded]>;
- defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _Src.ScalarMemOp:$src), OpcodeStr,
+ defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.ScalarMemOp:$src),
+ (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
+ (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
+ OpcodeStr,
"${src}"##Broadcast, "${src}"##Broadcast,
(_.VT (OpNode (_Src.VT
(X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
- ))>, EVEX, EVEX_B,
- Sched<[sched.Folded]>;
+ )),
+ (vselect MaskRC:$mask,
+ (_.VT
+ (OpNode
+ (_Src.VT
+ (X86VBroadcast
+ (_Src.ScalarLdFrag addr:$src))))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
+ EVEX, EVEX_B, Sched<[sched.Folded]>;
}
// Coversion with SAE - suppress all exceptions
multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -7718,7 +8066,8 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
@@ -7752,6 +8101,35 @@ let Predicates = [HasVLX] in {
(VCVTPS2PDZ128rm addr:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDZ256rm addr:$src)>;
+
+ // Special patterns to allow use of X86vmfpround for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(X86vfpround (v2f64 VR128X:$src)),
+ (VCVTPD2PSZ128rr VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(X86vfpround (loadv2f64 addr:$src)),
+ (VCVTPD2PSZ128rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
+ (VCVTPD2PSZ128rmb addr:$src)>;
+ def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4f32 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4f32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
}
// Convert Signed/Unsigned Doubleword to Double
@@ -7836,7 +8214,8 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
- OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
@@ -7865,8 +8244,9 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// memory forms of these instructions in Asm Parcer. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
- sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
@@ -8149,6 +8529,122 @@ let Predicates = [HasVLX] in {
(VCVTTPD2UDQZ256rr VR256X:$src)>;
def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
(VCVTTPD2UDQZ256rm addr:$src)>;
+
+ // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
+ (VCVTPD2DQZ128rr VR128X:$src)>;
+ def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
+ (VCVTPD2DQZ128rm addr:$src)>;
+ def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTPD2DQZ128rmb addr:$src)>;
+ def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86mcvttp2si for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
+ (VCVTTPD2DQZ128rr VR128X:$src)>;
+ def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
+ (VCVTTPD2DQZ128rm addr:$src)>;
+ def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTTPD2DQZ128rmb addr:$src)>;
+ def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
+ (VCVTPD2UDQZ128rr VR128X:$src)>;
+ def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
+ (VCVTPD2UDQZ128rm addr:$src)>;
+ def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTPD2UDQZ128rmb addr:$src)>;
+ def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
+ (VCVTTPD2UDQZ128rr VR128X:$src)>;
+ def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
+ (VCVTTPD2UDQZ128rm addr:$src)>;
+ def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTTPD2UDQZ128rmb addr:$src)>;
+ def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI] in {
@@ -8365,8 +8861,7 @@ multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
(ins x86memop:$src), "vcvtph2ps", "$src", "$src",
(X86cvtph2ps (_src.VT
- (bitconvert
- (ld_frag addr:$src))))>,
+ (ld_frag addr:$src)))>,
T8PD, Sched<[sched.Folded]>;
}
@@ -8381,17 +8876,17 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
}
let Predicates = [HasAVX512] in
- defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
+ defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
WriteCvtPH2PSZ>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
- loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
+ load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
EVEX_CD8<32, CD8VH>;
defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
- loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
+ load, WriteCvtPH2PS>, EVEX, EVEX_V128,
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
@@ -8406,12 +8901,28 @@ let Predicates = [HasVLX] in {
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
- defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
- (ins _src.RC:$src1, i32u8imm:$src2),
- "vcvtps2ph", "$src2, $src1", "$src1, $src2",
- (X86cvtps2ph (_src.VT _src.RC:$src1),
- (i32 imm:$src2)), 0, 0>,
- AVX512AIi8Base, Sched<[RR]>;
+let ExeDomain = GenericDomain in {
+ def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
+ Sched<[RR]>;
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+ _dest.RC:$src0, _src.KRCWM:$mask))]>,
+ Sched<[RR]>, EVEX_K;
+ def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+ _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
+ Sched<[RR]>, EVEX_KZ;
let hasSideEffects = 0, mayStore = 1 in {
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
@@ -8423,6 +8934,7 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
EVEX_K, Sched<[MR]>, NotMemoryFoldable;
}
}
+}
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
SchedWrite Sched> {
@@ -8483,7 +8995,7 @@ let Predicates = [HasVLX] in {
(v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
}
-// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
+// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
@@ -8549,7 +9061,7 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
_.ScalarIntMemCPat:$src2)>, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8578,13 +9090,13 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT
(bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
- EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8638,7 +9150,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8674,7 +9186,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(OpNode (_.VT
(bitconvert (_.LdFrag addr:$src))),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
@@ -8682,7 +9194,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(OpNode (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
(i32 FROUND_CURRENT))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -8749,13 +9261,13 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(fsqrt (_.VT
(bitconvert (_.LdFrag addr:$src))))>, EVEX,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(fsqrt (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
- EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8810,7 +9322,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
(X86fsqrtRnds (_.VT _.RC:$src1),
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -8828,7 +9340,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8881,7 +9393,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
_.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -8893,7 +9405,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9000,14 +9512,47 @@ defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
// Integer truncate and extend operations
//-------------------------------------------------
+// PatFrags that contain a select and a truncate op. The take operands in the
+// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
+// either to the multiclasses.
+def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect node:$mask,
+ (trunc node:$src), node:$src0)>;
+def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect node:$mask,
+ (X86vtruncs node:$src), node:$src0)>;
+def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect node:$mask,
+ (X86vtruncus node:$src), node:$src0)>;
+
multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
- let ExeDomain = DestInfo.ExeDomain in
- defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
- (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
- (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
- EVEX, T8XS, Sched<[sched]>;
+ let ExeDomain = DestInfo.ExeDomain in {
+ def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst|$dst, $src}",
+ [(set DestInfo.RC:$dst,
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
+ EVEX, Sched<[sched]>;
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+ (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ [(set DestInfo.RC:$dst,
+ (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
+ (DestInfo.VT DestInfo.RC:$src0),
+ SrcInfo.KRCWM:$mask))]>,
+ EVEX, EVEX_K, Sched<[sched]>;
+ def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ [(set DestInfo.RC:$dst,
+ (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
+ DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
+ EVEX, EVEX_KZ, Sched<[sched]>;
+ }
let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
def mr : AVX512XS8I<opc, MRMDestMem, (outs),
@@ -9031,14 +9576,18 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
(!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
addr:$dst, SrcInfo.RC:$src)>;
- def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
- (SrcInfo.VT SrcInfo.RC:$src)),
+ def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
+ SrcInfo.KRCWM:$mask),
(!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
}
multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
- SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
+ SDNode OpNode256, SDNode OpNode512,
+ SDPatternOperator MaskNode128,
+ SDPatternOperator MaskNode256,
+ SDPatternOperator MaskNode512,
+ X86FoldableSchedWrite sched,
AVX512VLVectorVTInfo VTSrcInfo,
X86VectorVTInfo DestInfoZ128,
X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
@@ -9047,118 +9596,167 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
PatFrag mtruncFrag, Predicate prd = HasAVX512>{
let Predicates = [HasVLX, prd] in {
- defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
+ defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
truncFrag, mtruncFrag, NAME>, EVEX_V128;
- defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
+ defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
truncFrag, mtruncFrag, NAME>, EVEX_V256;
}
let Predicates = [prd] in
- defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
+ defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
VTSrcInfo.info512, DestInfoZ, x86memopZ>,
avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
truncFrag, mtruncFrag, NAME>, EVEX_V512;
}
multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
+ InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
avx512vl_i64_info, v16i8x_info, v16i8x_info,
v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
}
multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
+ InVecMaskNode, InVecMaskNode, MaskNode, sched,
avx512vl_i64_info, v8i16x_info, v8i16x_info,
v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
}
multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
+ InVecMaskNode, MaskNode, MaskNode, sched,
avx512vl_i64_info, v4i32x_info, v4i32x_info,
v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
}
multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
+ InVecMaskNode, InVecMaskNode, MaskNode, sched,
avx512vl_i32_info, v16i8x_info, v16i8x_info,
v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
}
multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
+ InVecMaskNode, MaskNode, MaskNode, sched,
avx512vl_i32_info, v8i16x_info, v8i16x_info,
v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
}
multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
- sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
+ InVecMaskNode, MaskNode, MaskNode, sched,
+ avx512vl_i16_info, v16i8x_info, v16i8x_info,
v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
}
-defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256,
- truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256,
- truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
- truncstore_us_vi8, masked_truncstore_us_vi8>;
-
-defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256,
- truncstorevi16, masked_truncstorevi16, X86vtrunc>;
-defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256,
- truncstore_s_vi16, masked_truncstore_s_vi16>;
-defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
- truncstore_us_vi16, masked_truncstore_us_vi16>;
-
-defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256,
- truncstorevi32, masked_truncstorevi32, X86vtrunc>;
-defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256,
- truncstore_s_vi32, masked_truncstore_s_vi32>;
-defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
- truncstore_us_vi32, masked_truncstore_us_vi32>;
-
-defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
- truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256,
- truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256,
- truncstore_us_vi8, masked_truncstore_us_vi8>;
-
-defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
- truncstorevi16, masked_truncstorevi16, X86vtrunc>;
-defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256,
- truncstore_s_vi16, masked_truncstore_s_vi16>;
-defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256,
- truncstore_us_vi16, masked_truncstore_us_vi16>;
-
-defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
- truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256,
- truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256,
- truncstore_us_vi8, masked_truncstore_us_vi8>;
+defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
+ WriteShuffle256, truncstorevi8,
+ masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi8,
+ masked_truncstore_s_vi8, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi8, masked_truncstore_us_vi8,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
+ WriteShuffle256, truncstorevi16,
+ masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi16,
+ masked_truncstore_s_vi16, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi16, masked_truncstore_us_vi16,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
+ WriteShuffle256, truncstorevi32,
+ masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi32,
+ masked_truncstore_s_vi32, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi32, masked_truncstore_us_vi32,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
+ WriteShuffle256, truncstorevi8,
+ masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
+defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi8,
+ masked_truncstore_s_vi8, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi8, masked_truncstore_us_vi8,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
+ WriteShuffle256, truncstorevi16,
+ masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
+defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi16,
+ masked_truncstore_s_vi16, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi16, masked_truncstore_us_vi16,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
+ WriteShuffle256, truncstorevi8,
+ masked_truncstorevi8, X86vtrunc,
+ X86vmtrunc>;
+defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi8,
+ masked_truncstore_s_vi8, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi8, masked_truncstore_us_vi8,
+ X86vtruncus, X86vmtruncus>;
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
@@ -9177,6 +9775,44 @@ def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
VR256X:$src, sub_ymm))), sub_xmm))>;
}
+// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
+multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
+ X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> {
+ def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
+ DestInfo.RC:$src0,
+ SrcInfo.KRCWM:$mask)),
+ (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
+ SrcInfo.KRCWM:$mask,
+ SrcInfo.RC:$src)>;
+
+ def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
+ DestInfo.ImmAllZerosV,
+ SrcInfo.KRCWM:$mask)),
+ (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
+ SrcInfo.RC:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
+defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
+defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
+}
+
+let Predicates = [HasAVX512] in {
+defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
+
+defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
+
+defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
+defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
+defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
+}
+
multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
@@ -9221,7 +9857,7 @@ multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
- v16i8x_info, i64mem, LdFrag, OpNode>,
+ v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
@@ -9240,12 +9876,12 @@ multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
- v16i8x_info, i32mem, LdFrag, OpNode>,
+ v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
- v16i8x_info, i64mem, LdFrag, OpNode>,
+ v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
}
}
@@ -9278,7 +9914,7 @@ multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
- v8i16x_info, i64mem, LdFrag, OpNode>,
+ v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
@@ -9308,23 +9944,107 @@ multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
}
}
-defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
+
+defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
+
-defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
+// Patterns that we also need any extend versions of. aext_vector_inreg
+// is currently legalized to zext_vector_inreg.
+multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
+ // 256-bit patterns
+ let Predicates = [HasVLX, HasBWI] in {
+ def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ }
+
+ let Predicates = [HasVLX] in {
+ def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ }
+
+ // 512-bit patterns
+ let Predicates = [HasBWI] in {
+ def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
+ }
+ let Predicates = [HasAVX512] in {
+ def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
+ def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
+ (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
+ }
+}
+
+multiclass AVX512_pmovx_patterns_aext<string OpcPrefix, SDNode ExtOp> :
+ AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
+ let Predicates = [HasVLX, HasBWI] in {
+ def : Pat<(v16i16 (ExtOp (v16i8 VR128X:$src))),
+ (!cast<I>(OpcPrefix#BWZ256rr) VR128X:$src)>;
+ }
+
+ let Predicates = [HasVLX] in {
+ def : Pat<(v8i32 (ExtOp (v8i16 VR128X:$src))),
+ (!cast<I>(OpcPrefix#WDZ256rr) VR128X:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (v4i32 VR128X:$src))),
+ (!cast<I>(OpcPrefix#DQZ256rr) VR128X:$src)>;
+ }
+
+ // 512-bit patterns
+ let Predicates = [HasBWI] in {
+ def : Pat<(v32i16 (ExtOp (v32i8 VR256X:$src))),
+ (!cast<I>(OpcPrefix#BWZrr) VR256X:$src)>;
+ }
+ let Predicates = [HasAVX512] in {
+ def : Pat<(v16i32 (ExtOp (v16i8 VR128X:$src))),
+ (!cast<I>(OpcPrefix#BDZrr) VR128X:$src)>;
+ def : Pat<(v16i32 (ExtOp (v16i16 VR256X:$src))),
+ (!cast<I>(OpcPrefix#WDZrr) VR256X:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (v8i16 VR128X:$src))),
+ (!cast<I>(OpcPrefix#WQZrr) VR128X:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (v8i32 VR256X:$src))),
+ (!cast<I>(OpcPrefix#DQZrr) VR256X:$src)>;
+ }
+}
multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
- SDNode InVecOp> {
+ SDNode InVecOp> :
+ AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
// 128-bit patterns
let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9335,7 +10055,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
@@ -9345,7 +10065,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
@@ -9354,7 +10074,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9365,7 +10085,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
@@ -9374,7 +10094,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9385,87 +10105,73 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
- // 256-bit patterns
- let Predicates = [HasVLX, HasBWI] in {
- def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
- }
let Predicates = [HasVLX] in {
- def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
}
// 512-bit patterns
- let Predicates = [HasBWI] in {
- def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
- }
let Predicates = [HasAVX512] in {
- def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
-
- def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
- def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+ }
+}
- def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
-
- def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
+defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
+defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
+defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>;
- def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
- }
+// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
+// ext+trunc aggresively making it impossible to legalize the DAG to this
+// pattern directly.
+let Predicates = [HasAVX512, NoBWI] in {
+def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
+ (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
+ (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
+def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
+ (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
}
-defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
-defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
+// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
+// ext+trunc aggresively making it impossible to legalize the DAG to this
+// pattern directly.
+let Predicates = [HasAVX512, NoBWI] in {
+def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
+ (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
+ (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
+def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
+ (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+}
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
@@ -9651,6 +10357,10 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
+
+// Also need a pattern for anyextend.
+def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
+ (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
}
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
@@ -9724,6 +10434,19 @@ let Predicates = [HasDQI, NoBWI] in {
(VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
(VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+
+ def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
+ (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+ def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
+ (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+}
+
+let Predicates = [HasDQI, NoBWI, HasVLX] in {
+ def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
+ (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
+
+ def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
+ (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
}
//===----------------------------------------------------------------------===//
@@ -9753,8 +10476,7 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
}
multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
- def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
- (_.VT _.RC:$src)),
+ def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
(!cast<Instruction>(Name#_.ZSuffix##mrk)
addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
}
@@ -9798,7 +10520,7 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
(_.VT (X86expand (_.VT (bitconvert
(_.LdFrag addr:$src1)))))>,
AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
@@ -9860,14 +10582,14 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr##", $src2",
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9923,7 +10645,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(i32 imm:$src3))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -9931,7 +10653,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9955,7 +10677,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9974,7 +10696,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -9996,7 +10718,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10360,9 +11082,9 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(_.VT
(bitconvert
(CastInfo.VT (X86Shuf128 _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src2)),
+ (CastInfo.LdFrag addr:$src2),
(i8 imm:$src3)))))>,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
@@ -10374,7 +11096,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(X86Shuf128 _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
(i8 imm:$src3)))))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10450,7 +11172,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
(_.VT (X86VAlign _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)),
(i8 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<"VPALIGNRrmi">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -10460,7 +11182,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
(X86VAlign _.RC:$src1,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10526,7 +11248,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1,
- (bitconvert (To.LdFrag addr:$src2)),
+ (From.LdFrag addr:$src2),
imm:$src3))),
To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
@@ -10536,7 +11258,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1,
- (bitconvert (To.LdFrag addr:$src2)),
+ (From.LdFrag addr:$src2),
imm:$src3))),
To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
@@ -10824,6 +11546,8 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(VMOVDDUPZ128rm addr:$src)>;
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+ (VMOVDDUPZ128rm addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(v2f64 VR128X:$src0)),
@@ -10954,7 +11678,7 @@ multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -11027,7 +11751,7 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
[(set _.RC:$dst,(_.VT (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
@@ -11067,7 +11791,7 @@ multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
(OpNode (_src.VT _src.RC:$src1),
(_src.VT (bitconvert
(_src.LdFrag addr:$src2))))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
@@ -11169,7 +11893,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (bitconvert (_.LdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
@@ -11179,7 +11903,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
@@ -11343,19 +12067,68 @@ defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
// TODO: We should maybe have a more generalized algorithm for folding to
// vpternlog.
let Predicates = [HasAVX512] in {
- def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
+ def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))),
+ (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+ def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))),
+ (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+ def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))),
+ (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+ def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
}
let Predicates = [HasAVX512, NoVLX] in {
- def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
+ def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (i8 15)), sub_xmm)>;
+ def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (i8 15)), sub_xmm)>;
+ def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (i8 15)), sub_xmm)>;
+ def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
+
+ def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (i8 15)), sub_ymm)>;
+ def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (i8 15)), sub_ymm)>;
+ def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (i8 15)), sub_ymm)>;
+ def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
@@ -11365,9 +12138,22 @@ let Predicates = [HasAVX512, NoVLX] in {
}
let Predicates = [HasVLX] in {
- def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
+ def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+ (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+ def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+ (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+ def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+ (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+ def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
+
+ def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+ (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+ def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+ (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+ def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+ (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+ def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
}
@@ -11395,7 +12181,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
@@ -11405,7 +12191,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
(TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
}
@@ -11448,7 +12234,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_src3VT.VT _src3VT.RC:$src3),
(i32 imm:$src4),
(i32 FROUND_NO_EXC))>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -11458,7 +12244,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_src3VT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -11680,9 +12466,9 @@ multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
+ (VTI.VT (VTI.LdFrag addr:$src3))))>,
AVX512FMA3Base,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -11698,7 +12484,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
@@ -11783,10 +12569,9 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (bitconvert
- (VTI.LdFrag addr:$src3)))))>,
+ (VTI.VT (VTI.LdFrag addr:$src3))))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
@@ -11795,7 +12580,7 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(VTI.VT (X86VBroadcast
(VTI.ScalarLdFrag addr:$src3))))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
- T8PD, Sched<[sched.Folded, ReadAfterLd]>;
+ T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
@@ -11840,9 +12625,9 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
"vpshufbitqmb",
"$src2, $src1", "$src1, $src2",
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
- (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
+ (VTI.VT (VTI.LdFrag addr:$src2)))>,
EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
@@ -11890,7 +12675,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
(OpNode (VTI.VT VTI.RC:$src1),
(bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index c444fa761960..cb5a4e5b5d41 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -46,11 +46,11 @@ def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
// (and possibly third) value from a register.
// This is used for instructions that put the memory operands before other
// uses.
-class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// Register reads (implicit or explicit).
- ReadAfterLd, ReadAfterLd]>;
+ Sched.ReadAfterFold, Sched.ReadAfterFold]>;
// Extra precision multiplication
@@ -63,18 +63,18 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>, Sched<[WriteIMul]>;
+ (implicit EFLAGS)]>, Sched<[WriteIMul8]>;
// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
- []>, OpSize16, Sched<[WriteIMul]>;
+ []>, OpSize16, Sched<[WriteIMul16]>;
// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
"mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>,
- OpSize32, Sched<[WriteIMul]>;
+ OpSize32, Sched<[WriteIMul32]>;
// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
@@ -89,20 +89,20 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>, SchedLoadReg<WriteIMul.Folded>;
+ (implicit EFLAGS)]>, SchedLoadReg<WriteIMul8>;
// AX,DX = AX*[mem16]
let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
+ "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
+ "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
+ "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
Requires<[In64BitMode]>;
}
@@ -110,15 +110,15 @@ let hasSideEffects = 0 in {
// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>,
- Sched<[WriteIMul]>;
+ Sched<[WriteIMul8]>;
// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize16, Sched<[WriteIMul]>;
+ OpSize16, Sched<[WriteIMul16]>;
// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>,
- OpSize32, Sched<[WriteIMul]>;
+ OpSize32, Sched<[WriteIMul32]>;
// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>,
@@ -128,19 +128,19 @@ let mayLoad = 1 in {
// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>, SchedLoadReg<WriteIMul.Folded>;
+ "imul{b}\t$src", []>, SchedLoadReg<WriteIMul8>;
// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
+ "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
+ "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
+ "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
Requires<[In64BitMode]>;
}
} // hasSideEffects
@@ -156,18 +156,18 @@ def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, GR16:$src2))]>,
- Sched<[WriteIMul]>, TB, OpSize16;
+ Sched<[WriteIMul16Reg]>, TB, OpSize16;
def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, GR32:$src2))]>,
- Sched<[WriteIMul]>, TB, OpSize32;
+ Sched<[WriteIMul32Reg]>, TB, OpSize32;
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, GR64:$src2))]>,
- Sched<[WriteIMul64]>, TB;
+ Sched<[WriteIMul64Reg]>, TB;
} // isCommutable
// Register-Memory Signed Integer Multiply
@@ -176,19 +176,19 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>,
- Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize16;
+ Sched<[WriteIMul16Reg.Folded, WriteIMul16Reg.ReadAfterFold]>, TB, OpSize16;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>,
- Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize32;
+ Sched<[WriteIMul32Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB, OpSize32;
def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>,
- Sched<[WriteIMul64.Folded, ReadAfterLd]>, TB;
+ Sched<[WriteIMul64Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB;
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
@@ -201,37 +201,37 @@ def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, imm:$src2))]>,
- Sched<[WriteIMul]>, OpSize16;
+ Sched<[WriteIMul16Imm]>, OpSize16;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
- Sched<[WriteIMul]>, OpSize16;
+ Sched<[WriteIMul16Imm]>, OpSize16;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, imm:$src2))]>,
- Sched<[WriteIMul]>, OpSize32;
+ Sched<[WriteIMul32Imm]>, OpSize32;
def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, i32immSExt8:$src2))]>,
- Sched<[WriteIMul]>, OpSize32;
+ Sched<[WriteIMul32Imm]>, OpSize32;
def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
- Sched<[WriteIMul64]>;
+ Sched<[WriteIMul64Imm]>;
def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))]>,
- Sched<[WriteIMul64]>;
+ Sched<[WriteIMul64Imm]>;
// Memory-Integer Signed Integer Multiply
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
@@ -239,41 +239,41 @@ def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize16;
+ Sched<[WriteIMul16Imm.Folded]>, OpSize16;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (loadi16 addr:$src1),
i16immSExt8:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize16;
+ Sched<[WriteIMul16Imm.Folded]>, OpSize16;
def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
(outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize32;
+ Sched<[WriteIMul32Imm.Folded]>, OpSize32;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (loadi32 addr:$src1),
i32immSExt8:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize32;
+ Sched<[WriteIMul32Imm.Folded]>, OpSize32;
def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32
(outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (loadi64 addr:$src1),
i64immSExt32:$src2))]>,
- Sched<[WriteIMul64.Folded]>;
+ Sched<[WriteIMul64Imm.Folded]>;
def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (loadi64 addr:$src1),
i64immSExt8:$src2))]>,
- Sched<[WriteIMul64.Folded]>;
+ Sched<[WriteIMul64Imm.Folded]>;
} // Defs = [EFLAGS]
// unsigned division/remainder
@@ -295,17 +295,17 @@ def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>, SchedLoadReg<WriteDiv8.Folded>;
+ "div{b}\t$src", []>, SchedLoadReg<WriteDiv8>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16.Folded>;
+ "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>, SchedLoadReg<WriteDiv32.Folded>, OpSize32;
+ "div{l}\t$src", []>, SchedLoadReg<WriteDiv32>, OpSize32;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", []>, SchedLoadReg<WriteDiv64.Folded>,
+ "div{q}\t$src", []>, SchedLoadReg<WriteDiv64>,
Requires<[In64BitMode]>;
}
@@ -327,16 +327,16 @@ def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8.Folded>;
+ "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16.Folded>;
+ "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32.Folded>;
+ "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64.Folded>,
+ "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64>,
Requires<[In64BitMode]>;
}
} // hasSideEffects = 0
@@ -422,22 +422,35 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
} // SchedRW
} // CodeSize
+def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86add_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86sub_flag node:$lhs, node:$rhs), [{
+ // Only use DEC if the result is used.
+ return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize16;
+ [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
+ OpSize16;
def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, OpSize32;
+ [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>,
+ OpSize32;
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
@@ -474,16 +487,18 @@ let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize16;
+ [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
+ OpSize16;
def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, OpSize32;
+ [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>,
+ OpSize32;
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
@@ -691,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// BinOpRM_F - Instructions like "cmp reg, [mem]".
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -806,8 +821,8 @@ class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.RegClass:$src))]>,
- Sched<[WriteALULd, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault, ReadAfterLd]>;
+ Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>;
// BinOpMI - Instructions like "add [mem], imm".
class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -839,7 +854,7 @@ class BinOpMI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMI<opcode, mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.ImmOperator:$src))]>,
- Sched<[WriteALULd]>;
+ Sched<[WriteALU.Folded]>;
// BinOpMI8 - Instructions like "add [mem], imm8".
class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
@@ -872,7 +887,7 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
: BinOpMI8<mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.Imm8Operator:$src))]>,
- Sched<[WriteALULd]>;
+ Sched<[WriteALU.Folded]>;
// BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def EFLAGS.
class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -913,8 +928,8 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR in {
- def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
@@ -931,9 +946,9 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
- def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
-
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
@@ -1176,6 +1191,30 @@ let isCompare = 1 in {
defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
}
+// Patterns to recognize loads on the LHS of an ADC. We can't make X86adc_flag
+// commutable since it has EFLAGs as an input.
+def : Pat<(X86adc_flag (loadi8 addr:$src2), GR8:$src1, EFLAGS),
+ (ADC8rm GR8:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi16 addr:$src2), GR16:$src1, EFLAGS),
+ (ADC16rm GR16:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi32 addr:$src2), GR32:$src1, EFLAGS),
+ (ADC32rm GR32:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi64 addr:$src2), GR64:$src1, EFLAGS),
+ (ADC64rm GR64:$src1, addr:$src2)>;
+
+// Patterns to recognize RMW ADC with loads in operand 1.
+def : Pat<(store (X86adc_flag GR8:$src, (loadi8 addr:$dst), EFLAGS),
+ addr:$dst),
+ (ADC8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (X86adc_flag GR16:$src, (loadi16 addr:$dst), EFLAGS),
+ addr:$dst),
+ (ADC16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (X86adc_flag GR32:$src, (loadi32 addr:$dst), EFLAGS),
+ addr:$dst),
+ (ADC32mr addr:$dst, GR32:$src)>;
+def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS),
+ addr:$dst),
+ (ADC64mr addr:$dst, GR64:$src)>;
//===----------------------------------------------------------------------===//
// Semantically, test instructions are similar like AND, except they don't
@@ -1188,16 +1227,21 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
let isCompare = 1 in {
let Defs = [EFLAGS] in {
let isCommutable = 1 in {
- def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat>;
- def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>;
- def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>;
- def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>;
+ // Avoid selecting these and instead use a test+and. Post processing will
+ // combine them. This gives bunch of other patterns that start with
+ // and a chance to match.
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , null_frag>;
+ def TEST16rr : BinOpRR_F<0x84, "test", Xi16, null_frag>;
+ def TEST32rr : BinOpRR_F<0x84, "test", Xi32, null_frag>;
+ def TEST64rr : BinOpRR_F<0x84, "test", Xi64, null_frag>;
} // isCommutable
- def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , X86testpat>;
- def TEST16mr : BinOpMR_F<0x84, "test", Xi16, X86testpat>;
- def TEST32mr : BinOpMR_F<0x84, "test", Xi32, X86testpat>;
- def TEST64mr : BinOpMR_F<0x84, "test", Xi64, X86testpat>;
+ let hasSideEffects = 0, mayLoad = 1 in {
+ def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , null_frag>;
+ def TEST16mr : BinOpMR_F<0x84, "test", Xi16, null_frag>;
+ def TEST32mr : BinOpMR_F<0x84, "test", Xi32, null_frag>;
+ def TEST64mr : BinOpMR_F<0x84, "test", Xi64, null_frag>;
+ }
def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
@@ -1235,7 +1279,7 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- Sched<[WriteALULd, ReadAfterLd]>;
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}
// Complexity is reduced to give and with immediate a chance to match first.
@@ -1275,7 +1319,7 @@ let hasSideEffects = 0 in {
let Predicates = [HasBMI2] in {
let Uses = [EDX] in
- defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul>;
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
let Uses = [RDX] in
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
}
@@ -1283,22 +1327,18 @@ let Predicates = [HasBMI2] in {
//===----------------------------------------------------------------------===//
// ADCX and ADOX Instructions
//
+// We don't have patterns for these as there is no advantage over ADC for
+// most code.
let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
- Constraints = "$src1 = $dst", AddedComplexity = 10 in {
- let SchedRW = [WriteADC] in {
+ Constraints = "$src1 = $dst", hasSideEffects = 0 in {
+ let SchedRW = [WriteADC], isCommutable = 1 in {
def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
- "adcx{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86adc_flag GR32:$src1, GR32:$src2, EFLAGS))]>, T8PD;
+ "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
def ADCX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
- "adcx{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86adc_flag GR64:$src1, GR64:$src2, EFLAGS))]>, T8PD;
+ "adcx{q}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
- // We don't have patterns for ADOX yet.
- let hasSideEffects = 0 in {
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"adox{l}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
@@ -1306,26 +1346,17 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
- } // hasSideEffects = 0
} // SchedRW
- let mayLoad = 1, SchedRW = [WriteADCLd, ReadAfterLd] in {
+ let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
- "adcx{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86adc_flag GR32:$src1, (loadi32 addr:$src2), EFLAGS))]>,
- T8PD;
+ "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
def ADCX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
- "adcx{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86adc_flag GR64:$src1, (loadi64 addr:$src2), EFLAGS))]>,
- T8PD;
+ "adcx{q}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
- // We don't have patterns for ADOX yet.
- let hasSideEffects = 0 in {
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"adox{l}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
@@ -1333,6 +1364,5 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
- } // hasSideEffects = 0
- } // mayLoad = 1, SchedRW = [WriteADCLd]
+ } // mayLoad, SchedRW
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index eda4ba5ae6f0..f5494fc0b13f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -38,7 +38,7 @@ multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
}
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- SchedRW = [Sched.Folded, ReadAfterLd] in {
+ SchedRW = [Sched.Folded, Sched.ReadAfterFold] in {
def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 373f85020372..394dca8e7817 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -37,11 +37,6 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP],
def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
"", []>;
-// 64-bit large code model PIC base construction.
-let hasSideEffects = 0, mayLoad = 1, isNotDuplicable = 1, SchedRW = [WriteJump] in
- def MOVGOT64r : PseudoI<(outs GR64:$reg),
- (ins GR64:$scratch, i64i32imm_pcrel:$got), []>;
-
// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
// a stack adjustment and the codegen must know that they may modify the stack
// pointer before prolog-epilog rewriting occurs.
@@ -148,7 +143,7 @@ def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),
// These instructions XOR the frame pointer into a GPR. They are used in some
// stack protection schemes. These are post-RA pseudos because we only know the
// frame register after register allocation.
-let Constraints = "$src = $dst", isPseudo = 1, Defs = [EFLAGS] in {
+let Constraints = "$src = $dst", isMoveImm = 1, isPseudo = 1, Defs = [EFLAGS] in {
def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src),
"xorl\t$$FP, $src", []>,
Requires<[NotLP64]>, Sched<[WriteALU]>;
@@ -178,7 +173,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
}
let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
- isCodeGenOnly = 1, isReturn = 1 in {
+ isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1 in {
def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;
// CATCHRET needs a custom inserter for SEH.
@@ -240,6 +235,8 @@ let isPseudo = 1, SchedRW = [WriteSystem] in {
"#SEH_SaveXMM $reg, $dst", []>;
def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
"#SEH_StackAlloc $size", []>;
+ def SEH_StackAlign : I<0, Pseudo, (outs), (ins i32imm:$align),
+ "#SEH_StackAlign $align", []>;
def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
"#SEH_SetFrame $reg, $offset", []>;
def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
@@ -273,7 +270,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>;
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isPseudo = 1, AddedComplexity = 10 in
+ isPseudo = 1, isMoveImm = 1, AddedComplexity = 10 in
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)]>, Sched<[WriteZero]>;
@@ -319,16 +316,14 @@ def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
// that would make it more difficult to rematerialize.
let isReMaterializable = 1, isAsCheapAsAMove = 1,
isPseudo = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
-def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>;
+def MOV32ri64 : I<0, Pseudo, (outs GR64:$dst), (ins i64i32imm:$src), "", []>;
// This 64-bit pseudo-move can be used for both a 64-bit constant that is
// actually the zero-extension of a 32-bit constant and for labels in the
// x86-64 small code model.
def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
-let AddedComplexity = 1 in
-def : Pat<(i64 mov64imm32:$src),
- (SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;
+def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>;
// Use sbb to materialize carry bit.
let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
@@ -367,95 +362,109 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
(SETBr)>;
-// (add OP, SETB) -> (adc OP, 0)
-def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
- (ADC8ri GR8:$op, 0)>;
-def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
- (ADC32ri8 GR32:$op, 0)>;
-def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
- (ADC64ri8 GR64:$op, 0)>;
-
-// (sub OP, SETB) -> (sbb OP, 0)
-def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
- (SBB8ri GR8:$op, 0)>;
-def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
- (SBB32ri8 GR32:$op, 0)>;
-def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
- (SBB64ri8 GR64:$op, 0)>;
-
-// (sub OP, SETCC_CARRY) -> (adc OP, 0)
-def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
- (ADC8ri GR8:$op, 0)>;
-def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
- (ADC32ri8 GR32:$op, 0)>;
-def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
- (ADC64ri8 GR64:$op, 0)>;
+// Patterns to give priority when both inputs are zero so that we don't use
+// an immediate for the RHS.
+// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
+def : Pat<(X86sbb_flag (i8 0), (i8 0), EFLAGS),
+ (SBB8rr (EXTRACT_SUBREG (MOV32r0), sub_8bit),
+ (EXTRACT_SUBREG (MOV32r0), sub_8bit))>;
+def : Pat<(X86sbb_flag (i16 0), (i16 0), EFLAGS),
+ (SBB16rr (EXTRACT_SUBREG (MOV32r0), sub_16bit),
+ (EXTRACT_SUBREG (MOV32r0), sub_16bit))>;
+def : Pat<(X86sbb_flag (i32 0), (i32 0), EFLAGS),
+ (SBB32rr (MOV32r0), (MOV32r0))>;
+def : Pat<(X86sbb_flag (i64 0), (i64 0), EFLAGS),
+ (SBB64rr (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit),
+ (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit))>;
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
let SchedRW = [WriteMicrocoded] in {
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP,
- Requires<[Not64BitMode]>;
-def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize16,
- Requires<[Not64BitMode]>;
-def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP, OpSize32,
- Requires<[Not64BitMode]>;
+def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins),
+ "{rep;movsb (%esi), %es:(%edi)|rep movsb es:[edi], [esi]}",
+ [(X86rep_movs i8)]>, REP, AdSize32,
+ Requires<[NotLP64]>;
+def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsw (%esi), %es:(%edi)|rep movsw es:[edi], [esi]}",
+ [(X86rep_movs i16)]>, REP, AdSize32, OpSize16,
+ Requires<[NotLP64]>;
+def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsl (%esi), %es:(%edi)|rep movsd es:[edi], [esi]}",
+ [(X86rep_movs i32)]>, REP, AdSize32, OpSize32,
+ Requires<[NotLP64]>;
+def REP_MOVSQ_32 : RI<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsq (%esi), %es:(%edi)|rep movsq es:[edi], [esi]}",
+ [(X86rep_movs i64)]>, REP, AdSize32,
+ Requires<[NotLP64, In64BitMode]>;
}
let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
-def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP,
- Requires<[In64BitMode]>;
-def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize16,
- Requires<[In64BitMode]>;
-def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP, OpSize32,
- Requires<[In64BitMode]>;
-def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
- [(X86rep_movs i64)]>, REP,
- Requires<[In64BitMode]>;
+def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins),
+ "{rep;movsb (%rsi), %es:(%rdi)|rep movsb es:[rdi], [rsi]}",
+ [(X86rep_movs i8)]>, REP, AdSize64,
+ Requires<[IsLP64]>;
+def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsw (%rsi), %es:(%rdi)|rep movsw es:[rdi], [rsi]}",
+ [(X86rep_movs i16)]>, REP, AdSize64, OpSize16,
+ Requires<[IsLP64]>;
+def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsl (%rsi), %es:(%rdi)|rep movsdi es:[rdi], [rsi]}",
+ [(X86rep_movs i32)]>, REP, AdSize64, OpSize32,
+ Requires<[IsLP64]>;
+def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsq (%rsi), %es:(%rdi)|rep movsq es:[rdi], [rsi]}",
+ [(X86rep_movs i64)]>, REP, AdSize64,
+ Requires<[IsLP64]>;
}
// FIXME: Should use "(X86rep_stos AL)" as the pattern.
let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
let Uses = [AL,ECX,EDI] in
- def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP,
- Requires<[Not64BitMode]>;
+ def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins),
+ "{rep;stosb %al, %es:(%edi)|rep stosb es:[edi], al}",
+ [(X86rep_stos i8)]>, REP, AdSize32,
+ Requires<[NotLP64]>;
let Uses = [AX,ECX,EDI] in
- def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize16,
- Requires<[Not64BitMode]>;
+ def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosw %ax, %es:(%edi)|rep stosw es:[edi], ax}",
+ [(X86rep_stos i16)]>, REP, AdSize32, OpSize16,
+ Requires<[NotLP64]>;
let Uses = [EAX,ECX,EDI] in
- def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP, OpSize32,
- Requires<[Not64BitMode]>;
+ def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosl %eax, %es:(%edi)|rep stosd es:[edi], eax}",
+ [(X86rep_stos i32)]>, REP, AdSize32, OpSize32,
+ Requires<[NotLP64]>;
+ let Uses = [RAX,RCX,RDI] in
+ def REP_STOSQ_32 : RI<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosq %rax, %es:(%edi)|rep stosq es:[edi], rax}",
+ [(X86rep_stos i64)]>, REP, AdSize32,
+ Requires<[NotLP64, In64BitMode]>;
}
let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
let Uses = [AL,RCX,RDI] in
- def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP,
- Requires<[In64BitMode]>;
+ def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins),
+ "{rep;stosb %al, %es:(%rdi)|rep stosb es:[rdi], al}",
+ [(X86rep_stos i8)]>, REP, AdSize64,
+ Requires<[IsLP64]>;
let Uses = [AX,RCX,RDI] in
- def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize16,
- Requires<[In64BitMode]>;
+ def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosw %ax, %es:(%rdi)|rep stosw es:[rdi], ax}",
+ [(X86rep_stos i16)]>, REP, AdSize64, OpSize16,
+ Requires<[IsLP64]>;
let Uses = [RAX,RCX,RDI] in
- def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP, OpSize32,
- Requires<[In64BitMode]>;
+ def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosl %eax, %es:(%rdi)|rep stosd es:[rdi], eax}",
+ [(X86rep_stos i32)]>, REP, AdSize64, OpSize32,
+ Requires<[IsLP64]>;
let Uses = [RAX,RCX,RDI] in
- def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
- [(X86rep_stos i64)]>, REP,
- Requires<[In64BitMode]>;
+ def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosq %rax, %es:(%rdi)|rep stosq es:[rdi], rax}",
+ [(X86rep_stos i64)]>, REP, AdSize64,
+ Requires<[IsLP64]>;
}
} // SchedRW
@@ -567,22 +576,84 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
- defm _F128 : CMOVrr_PSEUDO<VR128, f128>;
- defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
- defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
- defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
- defm _V8F32 : CMOVrr_PSEUDO<VR256, v8f32>;
- defm _V4F64 : CMOVrr_PSEUDO<VR256, v4f64>;
- defm _V4I64 : CMOVrr_PSEUDO<VR256, v4i64>;
- defm _V8I64 : CMOVrr_PSEUDO<VR512, v8i64>;
- defm _V8F64 : CMOVrr_PSEUDO<VR512, v8f64>;
- defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;
- defm _V8I1 : CMOVrr_PSEUDO<VK8, v8i1>;
- defm _V16I1 : CMOVrr_PSEUDO<VK16, v16i1>;
- defm _V32I1 : CMOVrr_PSEUDO<VK32, v32i1>;
- defm _V64I1 : CMOVrr_PSEUDO<VK64, v64i1>;
+ let Predicates = [NoVLX] in {
+ defm _VR128 : CMOVrr_PSEUDO<VR128, v2i64>;
+ defm _VR256 : CMOVrr_PSEUDO<VR256, v4i64>;
+ }
+ let Predicates = [HasVLX] in {
+ defm _VR128X : CMOVrr_PSEUDO<VR128X, v2i64>;
+ defm _VR256X : CMOVrr_PSEUDO<VR256X, v4i64>;
+ }
+ defm _VR512 : CMOVrr_PSEUDO<VR512, v8i64>;
+ defm _VK2 : CMOVrr_PSEUDO<VK2, v2i1>;
+ defm _VK4 : CMOVrr_PSEUDO<VK4, v4i1>;
+ defm _VK8 : CMOVrr_PSEUDO<VK8, v8i1>;
+ defm _VK16 : CMOVrr_PSEUDO<VK16, v16i1>;
+ defm _VK32 : CMOVrr_PSEUDO<VK32, v32i1>;
+ defm _VK64 : CMOVrr_PSEUDO<VK64, v64i1>;
} // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS]
+def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+
+let Predicates = [NoVLX] in {
+ def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+ def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+ def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+ def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+ def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+
+ def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+}
+let Predicates = [HasVLX] in {
+ def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+ def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+ def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+ def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+ def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+
+ def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+}
+
+def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+
//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -591,12 +662,11 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
// Memory barriers
-// TODO: Get this to fold the constant into the instruction.
let isCodeGenOnly = 1, Defs = [EFLAGS] in
-def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
- "or{l}\t{$zero, $dst|$dst, $zero}", []>,
- Requires<[Not64BitMode]>, OpSize32, LOCK,
- Sched<[WriteALULd, WriteRMW]>;
+def OR32mi8Locked : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$zero),
+ "or{l}\t{$zero, $dst|$dst, $zero}", []>,
+ Requires<[Not64BitMode]>, OpSize32, LOCK,
+ Sched<[WriteALURMW]>;
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
@@ -610,7 +680,7 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
Format ImmMod, SDNode Op, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
- SchedRW = [WriteALULd, WriteRMW] in {
+ SchedRW = [WriteALURMW] in {
def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
@@ -706,53 +776,64 @@ defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;
defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
-multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
- string frag, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
- SchedRW = [WriteALULd, WriteRMW] in {
-def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
- !strconcat(mnemonic, "{b}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_8") addr:$dst))]>,
- LOCK;
-def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
- !strconcat(mnemonic, "{w}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_16") addr:$dst))]>,
- OpSize16, LOCK;
-def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
- !strconcat(mnemonic, "{l}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_32") addr:$dst))]>,
- OpSize32, LOCK;
-def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
- !strconcat(mnemonic, "{q}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_64") addr:$dst))]>,
- LOCK;
-}
-}
+def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_add node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
-multiclass unary_atomic_intrin<SDNode atomic_op> {
- def _8 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
- }]>;
- def _16 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
- }]>;
- def _32 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
- }]>;
- def _64 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
- }]>;
-}
+def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_sub node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
-defm X86lock_inc : unary_atomic_intrin<X86lock_inc>;
-defm X86lock_dec : unary_atomic_intrin<X86lock_dec>;
+let Predicates = [UseIncDec] in {
+ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALURMW] in {
+ def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "inc{b}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>,
+ LOCK;
+ def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "inc{w}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>,
+ OpSize16, LOCK;
+ def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "inc{l}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>,
+ OpSize32, LOCK;
+ def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "inc{q}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>,
+ LOCK;
+
+ def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "dec{b}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>,
+ LOCK;
+ def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "dec{w}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>,
+ OpSize16, LOCK;
+ def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "dec{l}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>,
+ OpSize32, LOCK;
+ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "dec{q}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>,
+ LOCK;
+ }
-defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">;
-defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">;
+ // Additional patterns for -1 constant.
+ def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
+}
// Atomic compare and swap.
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
@@ -766,7 +847,7 @@ let isCodeGenOnly = 1, usesCustomInserter = 1 in {
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag> {
-let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
!strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
@@ -787,7 +868,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
- SchedRW = [WriteALULd, WriteRMW] in {
+ SchedRW = [WriteCMPXCHGRMW] in {
defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
}
@@ -811,7 +892,7 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
// the instruction and we are sure we will have a valid register to restore
// the value of RBX.
let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
- SchedRW = [WriteALULd, WriteRMW], isCodeGenOnly = 1, isPseudo = 1,
+ SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1,
Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {
def LCMPXCHG8B_SAVE_EBX :
I<0, Pseudo, (outs GR32:$dst),
@@ -823,14 +904,14 @@ def LCMPXCHG8B_SAVE_EBX :
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
- Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
+ Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW] in {
defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
X86cas16, i128mem>, REX_W;
}
// Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.
let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
- Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW],
+ Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW],
isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",
usesCustomInserter = 1 in {
def LCMPXCHG16B_SAVE_RBX :
@@ -847,7 +928,7 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas>;
multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
string frag> {
let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
- SchedRW = [WriteALULd, WriteRMW] in {
+ SchedRW = [WriteALURMW] in {
def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
(ins GR8:$val, i8mem:$ptr),
!strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
@@ -887,42 +968,38 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add">, TB, LOCK;
* extremely late to prevent them from being accidentally reordered in the backend
* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
*/
-multiclass RELEASE_BINOP_MI<SDNode op> {
- def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
- "#BINOP "#NAME#"8mi PSEUDO!",
- [(atomic_store_8 addr:$dst, (op
- (atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
- def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),
- "#BINOP "#NAME#"8mr PSEUDO!",
- [(atomic_store_8 addr:$dst, (op
- (atomic_load_8 addr:$dst), GR8:$src))]>;
- // NAME#16 is not generated as 16-bit arithmetic instructions are considered
- // costly and avoided as far as possible by this backend anyway
- def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
- "#BINOP "#NAME#"32mi PSEUDO!",
- [(atomic_store_32 addr:$dst, (op
- (atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
- def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
- "#BINOP "#NAME#"32mr PSEUDO!",
- [(atomic_store_32 addr:$dst, (op
- (atomic_load_32 addr:$dst), GR32:$src))]>;
- def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "#BINOP "#NAME#"64mi32 PSEUDO!",
- [(atomic_store_64 addr:$dst, (op
- (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
- def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
- "#BINOP "#NAME#"64mr PSEUDO!",
- [(atomic_store_64 addr:$dst, (op
- (atomic_load_64 addr:$dst), GR64:$src))]>;
-}
-let Defs = [EFLAGS], SchedRW = [WriteMicrocoded] in {
- defm RELEASE_ADD : RELEASE_BINOP_MI<add>;
- defm RELEASE_AND : RELEASE_BINOP_MI<and>;
- defm RELEASE_OR : RELEASE_BINOP_MI<or>;
- defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;
- // Note: we don't deal with sub, because substractions of constants are
- // optimized into additions before this code can run.
+multiclass RELEASE_BINOP_MI<string Name, SDNode op> {
+ def : Pat<(atomic_store_8 addr:$dst,
+ (op (atomic_load_8 addr:$dst), (i8 imm:$src))),
+ (!cast<Instruction>(Name#"8mi") addr:$dst, imm:$src)>;
+ def : Pat<(atomic_store_16 addr:$dst,
+ (op (atomic_load_16 addr:$dst), (i16 imm:$src))),
+ (!cast<Instruction>(Name#"16mi") addr:$dst, imm:$src)>;
+ def : Pat<(atomic_store_32 addr:$dst,
+ (op (atomic_load_32 addr:$dst), (i32 imm:$src))),
+ (!cast<Instruction>(Name#"32mi") addr:$dst, imm:$src)>;
+ def : Pat<(atomic_store_64 addr:$dst,
+ (op (atomic_load_64 addr:$dst), (i64immSExt32:$src))),
+ (!cast<Instruction>(Name#"64mi32") addr:$dst, (i64immSExt32:$src))>;
+
+ def : Pat<(atomic_store_8 addr:$dst,
+ (op (atomic_load_8 addr:$dst), (i8 GR8:$src))),
+ (!cast<Instruction>(Name#"8mr") addr:$dst, GR8:$src)>;
+ def : Pat<(atomic_store_16 addr:$dst,
+ (op (atomic_load_16 addr:$dst), (i16 GR16:$src))),
+ (!cast<Instruction>(Name#"16mr") addr:$dst, GR16:$src)>;
+ def : Pat<(atomic_store_32 addr:$dst,
+ (op (atomic_load_32 addr:$dst), (i32 GR32:$src))),
+ (!cast<Instruction>(Name#"32mr") addr:$dst, GR32:$src)>;
+ def : Pat<(atomic_store_64 addr:$dst,
+ (op (atomic_load_64 addr:$dst), (i64 GR64:$src))),
+ (!cast<Instruction>(Name#"64mr") addr:$dst, GR64:$src)>;
}
+defm : RELEASE_BINOP_MI<"ADD", add>;
+defm : RELEASE_BINOP_MI<"AND", and>;
+defm : RELEASE_BINOP_MI<"OR", or>;
+defm : RELEASE_BINOP_MI<"XOR", xor>;
+defm : RELEASE_BINOP_MI<"SUB", sub>;
// Same as above, but for floating-point.
// FIXME: imm version.
@@ -947,91 +1024,64 @@ defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
// FIXME: Add fsub, fmul, fdiv, ...
}
-multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
- def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
- "#UNOP "#NAME#"8m PSEUDO!",
- [(atomic_store_8 addr:$dst, dag8)]>;
- def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
- "#UNOP "#NAME#"16m PSEUDO!",
- [(atomic_store_16 addr:$dst, dag16)]>;
- def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
- "#UNOP "#NAME#"32m PSEUDO!",
- [(atomic_store_32 addr:$dst, dag32)]>;
- def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
- "#UNOP "#NAME#"64m PSEUDO!",
- [(atomic_store_64 addr:$dst, dag64)]>;
+multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
+ dag dag64> {
+ def : Pat<(atomic_store_8 addr:$dst, dag8),
+ (!cast<Instruction>(Name#8m) addr:$dst)>;
+ def : Pat<(atomic_store_16 addr:$dst, dag16),
+ (!cast<Instruction>(Name#16m) addr:$dst)>;
+ def : Pat<(atomic_store_32 addr:$dst, dag32),
+ (!cast<Instruction>(Name#32m) addr:$dst)>;
+ def : Pat<(atomic_store_64 addr:$dst, dag64),
+ (!cast<Instruction>(Name#64m) addr:$dst)>;
}
-let Defs = [EFLAGS], Predicates = [UseIncDec], SchedRW = [WriteMicrocoded] in {
- defm RELEASE_INC : RELEASE_UNOP<
+let Predicates = [UseIncDec] in {
+ defm : RELEASE_UNOP<"INC",
(add (atomic_load_8 addr:$dst), (i8 1)),
(add (atomic_load_16 addr:$dst), (i16 1)),
(add (atomic_load_32 addr:$dst), (i32 1)),
(add (atomic_load_64 addr:$dst), (i64 1))>;
- defm RELEASE_DEC : RELEASE_UNOP<
+ defm : RELEASE_UNOP<"DEC",
(add (atomic_load_8 addr:$dst), (i8 -1)),
(add (atomic_load_16 addr:$dst), (i16 -1)),
(add (atomic_load_32 addr:$dst), (i32 -1)),
(add (atomic_load_64 addr:$dst), (i64 -1))>;
}
-/*
-TODO: These don't work because the type inference of TableGen fails.
-TODO: find a way to fix it.
-let Defs = [EFLAGS] in {
- defm RELEASE_NEG : RELEASE_UNOP<
- (ineg (atomic_load_8 addr:$dst)),
- (ineg (atomic_load_16 addr:$dst)),
- (ineg (atomic_load_32 addr:$dst)),
- (ineg (atomic_load_64 addr:$dst))>;
-}
-// NOT doesn't set flags.
-defm RELEASE_NOT : RELEASE_UNOP<
- (not (atomic_load_8 addr:$dst)),
- (not (atomic_load_16 addr:$dst)),
- (not (atomic_load_32 addr:$dst)),
- (not (atomic_load_64 addr:$dst))>;
-*/
-let SchedRW = [WriteMicrocoded] in {
-def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
- "#RELEASE_MOV8mi PSEUDO!",
- [(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
-def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
- "#RELEASE_MOV16mi PSEUDO!",
- [(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
-def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
- "#RELEASE_MOV32mi PSEUDO!",
- [(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
-def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "#RELEASE_MOV64mi32 PSEUDO!",
- [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
-
-def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
- "#RELEASE_MOV8mr PSEUDO!",
- [(atomic_store_8 addr:$dst, GR8 :$src)]>;
-def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
- "#RELEASE_MOV16mr PSEUDO!",
- [(atomic_store_16 addr:$dst, GR16:$src)]>;
-def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
- "#RELEASE_MOV32mr PSEUDO!",
- [(atomic_store_32 addr:$dst, GR32:$src)]>;
-def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
- "#RELEASE_MOV64mr PSEUDO!",
- [(atomic_store_64 addr:$dst, GR64:$src)]>;
-
-def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
- "#ACQUIRE_MOV8rm PSEUDO!",
- [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
-def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
- "#ACQUIRE_MOV16rm PSEUDO!",
- [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
-def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
- "#ACQUIRE_MOV32rm PSEUDO!",
- [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
-def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
- "#ACQUIRE_MOV64rm PSEUDO!",
- [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
-} // SchedRW
+defm : RELEASE_UNOP<"NEG",
+ (ineg (i8 (atomic_load_8 addr:$dst))),
+ (ineg (i16 (atomic_load_16 addr:$dst))),
+ (ineg (i32 (atomic_load_32 addr:$dst))),
+ (ineg (i64 (atomic_load_64 addr:$dst)))>;
+defm : RELEASE_UNOP<"NOT",
+ (not (i8 (atomic_load_8 addr:$dst))),
+ (not (i16 (atomic_load_16 addr:$dst))),
+ (not (i32 (atomic_load_32 addr:$dst))),
+ (not (i64 (atomic_load_64 addr:$dst)))>;
+
+def : Pat<(atomic_store_8 addr:$dst, (i8 imm:$src)),
+ (MOV8mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_16 addr:$dst, (i16 imm:$src)),
+ (MOV16mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_32 addr:$dst, (i32 imm:$src)),
+ (MOV32mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_64 addr:$dst, (i64immSExt32:$src)),
+ (MOV64mi32 addr:$dst, i64immSExt32:$src)>;
+
+def : Pat<(atomic_store_8 addr:$dst, GR8:$src),
+ (MOV8mr addr:$dst, GR8:$src)>;
+def : Pat<(atomic_store_16 addr:$dst, GR16:$src),
+ (MOV16mr addr:$dst, GR16:$src)>;
+def : Pat<(atomic_store_32 addr:$dst, GR32:$src),
+ (MOV32mr addr:$dst, GR32:$src)>;
+def : Pat<(atomic_store_64 addr:$dst, GR64:$src),
+ (MOV64mr addr:$dst, GR64:$src)>;
+
+def : Pat<(i8 (atomic_load_8 addr:$src)), (MOV8rm addr:$src)>;
+def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
+def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
+def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
//===----------------------------------------------------------------------===//
// DAG Pattern Matching Rules
@@ -1041,12 +1091,12 @@ def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
// binary size compared to a regular MOV, but it introduces an unnecessary
// load, so is not suitable for regular or optsize functions.
let Predicates = [OptForMinSize] in {
-def : Pat<(store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
-def : Pat<(store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
-def : Pat<(store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
-def : Pat<(store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
-def : Pat<(store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
-def : Pat<(store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
}
// In kernel code model, we can get the address of a label
@@ -1128,14 +1178,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, NotUseRetpoline]>;
+ Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[Not64BitMode, IsNotPIC, NotUseRetpoline]>;
+ Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@@ -1147,21 +1197,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpoline]>;
+ Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpoline]>;
+ Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, UseRetpoline]>;
+ Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, UseRetpoline]>;
+ Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
@@ -1226,7 +1276,8 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
// i1 stored in one byte in zero-extended form.
// Upper bits cleanup should be executed before Store.
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(zextloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
@@ -1237,9 +1288,11 @@ def : Pat<(zextloadi64i1 addr:$src),
// defined, avoiding partial-register updates.
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
@@ -1271,6 +1324,15 @@ def : Pat<(i64 (anyext GR16:$src)),
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>;
+// If this is an anyext of the remainder of an 8-bit sdivrem, use a MOVSX
+// instead of a MOVZX. The sdivrem lowering will emit emit a MOVSX to move
+// %ah to the lower byte of a register. By using a MOVSX here we allow a
+// post-isel peephole to merge the two MOVSX instructions into one.
+def anyext_sdiv : PatFrag<(ops node:$lhs), (anyext node:$lhs),[{
+ return (N->getOperand(0).getOpcode() == ISD::SDIVREM &&
+ N->getOperand(0).getResNo() == 1);
+}]>;
+def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
// Any instruction that defines a 32-bit result leaves the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
@@ -1305,17 +1367,15 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- KnownBits Known0;
- CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);
- KnownBits Known1;
- CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);
+ KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
+ KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
return (~Known0.Zero & ~Known1.Zero) == 0;
}]>;
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
// Try this before the selecting to OR.
-let AddedComplexity = 5, SchedRW = [WriteALU] in {
+let SchedRW = [WriteALU] in {
let isConvertibleToThreeAddress = 1,
Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
@@ -1375,8 +1435,7 @@ def ADD64ri32_DB : I<0, Pseudo,
def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- KnownBits Known;
- CurDAG->computeKnownBits(N->getOperand(1), Known);
+ KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1));
// If all possible ones in the RHS are set in the LHS then there can't be
// a borrow and we can use xor.
@@ -1973,6 +2032,15 @@ let Predicates = [UseIncDec] in {
def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+ def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>;
+ def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>;
+ def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>;
+ def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>;
+ def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>;
+ def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>;
+ def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>;
+ def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>;
}
// or reg/reg.
@@ -2081,23 +2149,3 @@ def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
let Predicates = [HasMOVBE] in {
def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
}
-
-// These patterns are selected by some custom code in X86ISelDAGToDAG.cpp that
-// custom combines and+srl into BEXTR. We use these patterns to avoid a bunch
-// of manual code for folding loads.
-let Predicates = [HasBMI, NoTBM] in {
- def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)),
- (BEXTR32rr GR32:$src1, (MOV32ri imm:$src2))>;
- def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)),
- (BEXTR32rm addr:$src1, (MOV32ri imm:$src2))>;
- def : Pat<(X86bextr GR64:$src1, mov64imm32:$src2),
- (BEXTR64rr GR64:$src1,
- (SUBREG_TO_REG (i64 0),
- (MOV32ri64 mov64imm32:$src2),
- sub_32bit))>;
- def : Pat<(X86bextr (loadi64 addr:$src1), mov64imm32:$src2),
- (BEXTR64rm addr:$src1,
- (SUBREG_TO_REG (i64 0),
- (MOV32ri64 mov64imm32:$src2),
- sub_32bit))>;
-} // HasBMI, NoTBM
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
index 650bce74dcf2..a7c7aaab2285 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -222,11 +222,13 @@ let isCall = 1 in
Sched<[WriteJumpLd]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
- Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>;
+ Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
+ Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
OpSize32,
- Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
+ Requires<[Not64BitMode,FavorMemIndirectCall,
+ NotUseRetpolineIndirectCalls]>,
Sched<[WriteJumpLd]>;
// Non-tracking calls for IBT, use with caution.
@@ -320,11 +322,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,NotUseRetpoline]>;
+ Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
- NotUseRetpoline]>;
+ NotUseRetpolineIndirectCalls]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
@@ -379,11 +381,11 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
SchedRW = [WriteJump] in {
def RETPOLINE_CALL32 :
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
- Requires<[Not64BitMode,UseRetpoline]>;
+ Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
def RETPOLINE_CALL64 :
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,UseRetpoline]>;
+ Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
// Retpoline variant of indirect tail calls.
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
index 421792c5599f..c24d6d5b8df1 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -163,6 +163,26 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
[(set GR64:$dst, (sextloadi64i32 addr:$src))]>,
Sched<[WriteALULd]>, Requires<[In64BitMode]>;
+// These instructions exist as a consequence of operand size prefix having
+// control of the destination size, but not the input size. Only support them
+// for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+def MOVSX16rr32: I<0x63, MRMSrcReg, (outs GR16:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteALU]>, OpSize16, Requires<[In64BitMode]>;
+def MOVSX32rr32: I<0x63, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteALU]>, OpSize32, Requires<[In64BitMode]>;
+let mayLoad = 1 in {
+def MOVSX16rm32: I<0x63, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteALULd]>, OpSize16, Requires<[In64BitMode]>;
+def MOVSX32rm32: I<0x63, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteALULd]>, OpSize32, Requires<[In64BitMode]>;
+} // mayLoad = 1
+} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
+
// movzbq and movzwq encodings for the disassembler
let hasSideEffects = 0 in {
def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
index a559f62c8f38..1a8e529431af 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -51,7 +51,7 @@ multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
(MemFrag addr:$src3))))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -71,7 +71,7 @@ multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
RC:$src1)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -93,7 +93,7 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
RC:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
@@ -192,7 +192,7 @@ multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
@@ -212,7 +212,7 @@ multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
@@ -234,7 +234,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
@@ -279,7 +279,7 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
(ins RC:$src1, RC:$src2, memopr:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
// The FMA 213 form is created for lowering of scalar FMA intrinscis
@@ -402,19 +402,19 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
@@ -438,19 +438,19 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_W, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
let mayLoad = 1 in
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>,
- VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
+ VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold,
// memop:$src2
ReadDefault, ReadDefault, ReadDefault,
ReadDefault, ReadDefault,
// VR128::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -477,19 +477,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
(ld_frag128 addr:$src3)))]>, VEX_W,
- Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold, sched.XMM.ReadAfterFold]>;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
- Sched<[sched.XMM.Folded, ReadAfterLd,
+ Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold,
// f128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128::$src3
- ReadAfterLd]>;
+ sched.XMM.ReadAfterFold]>;
let isCommutable = 1 in
def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
@@ -504,19 +504,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
(ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
- Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold, sched.YMM.ReadAfterFold]>;
def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1,
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
- Sched<[sched.YMM.Folded, ReadAfterLd,
+ Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold,
// f256mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR256::$src3
- ReadAfterLd]>;
+ sched.YMM.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index cc81a919ec99..5912a3199613 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -592,10 +592,13 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
let SchedRW = [WriteFCom] in {
// CC = ST(0) cmp ST(i)
let Defs = [EFLAGS, FPSW] in {
+let Predicates = [FPStackf32, HasCMov] in
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+let Predicates = [FPStackf64, HasCMov] in
def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+let Predicates = [HasCMov] in
def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 739275907978..11a27ba90586 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -75,7 +75,7 @@ def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, i8>,
SDTCisSameSizeAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisInt<3>]>>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -103,23 +103,22 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86vzext : SDNode<"X86ISD::VZEXT",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisOpSmallerThanOp<1, 0>]>>;
-
-def X86vsext : SDNode<"X86ISD::VSEXT",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisOpSmallerThanOp<1, 0>]>>;
-
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisOpSmallerThanOp<0, 1>]>;
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisOpSmallerThanOp<0, 1>]>;
+def SDTVmtrunc : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisOpSmallerThanOp<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>;
def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>;
def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>;
def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
+def X86vmtrunc : SDNode<"X86ISD::VMTRUNC", SDTVmtrunc>;
+def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>;
+def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
@@ -144,6 +143,14 @@ def X86fpextRnd : SDNode<"X86ISD::VFPEXTS_RND",
SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>;
+def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCisSameSizeAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>>;
+
def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVT<2, i8>, SDTCisInt<0>]>;
@@ -182,7 +189,7 @@ def X86phminpos: SDNode<"X86ISD::PHMINPOS",
def X86vshiftuniform : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVec<2>, SDTCisInt<0>,
- SDTCisInt<1>]>;
+ SDTCisInt<2>]>;
def X86vshl : SDNode<"X86ISD::VSHL", X86vshiftuniform>;
def X86vsrl : SDNode<"X86ISD::VSRL", X86vshiftuniform>;
@@ -237,10 +244,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
-def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
-def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
-def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
-def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
@@ -535,6 +538,8 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisFP<1>,
SDTCisVT<2, i32>]>;
+def SDTSFloatToInt: SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisFP<1>,
+ SDTCisVec<1>]>;
def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
SDTCisVec<1>, SDTCisVT<2, i32>]>;
@@ -548,11 +553,15 @@ def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
def X86SintToFpRnd : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND", SDTintToFPRound>;
def X86UintToFpRnd : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND", SDTintToFPRound>;
+def X86cvtts2Int : SDNode<"X86ISD::CVTTS2SI", SDTSFloatToInt>;
+def X86cvtts2UInt : SDNode<"X86ISD::CVTTS2UI", SDTSFloatToInt>;
def X86cvtts2IntRnd : SDNode<"X86ISD::CVTTS2SI_RND", SDTSFloatToIntRnd>;
def X86cvtts2UIntRnd : SDNode<"X86ISD::CVTTS2UI_RND", SDTSFloatToIntRnd>;
-def X86cvts2si : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
-def X86cvts2usi : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
+def X86cvts2si : SDNode<"X86ISD::CVTS2SI", SDTSFloatToInt>;
+def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>;
+def X86cvts2siRnd : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
+def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
// Vector with rounding mode
@@ -581,6 +590,19 @@ def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
+def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisFP<1>,
+ SDTCisSameSizeAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>;
+
+def X86mcvtp2Int : SDNode<"X86ISD::MCVTP2SI", SDTMFloatToInt>;
+def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>;
+def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
+def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>;
+
+
def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>]> >;
@@ -594,6 +616,13 @@ def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
SDTCisVT<2, i32>]> >;
+def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
+ SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisVT<2, i32>,
+ SDTCisSameAs<0, 3>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisSameNumEltsAs<1, 4>]> >;
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>,
@@ -641,28 +670,29 @@ def sdmem : Operand<v2f64> {
// SSE pattern fragments
//===----------------------------------------------------------------------===//
-// Vector load wrappers to prevent folding of non-temporal aligned loads on
-// supporting targets.
-def vecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return !useNonTemporalLoad(cast<LoadSDNode>(N));
-}]>;
-
// 128-bit load pattern fragments
-// NOTE: all 128-bit integer vector loads are promoted to v2i64
-def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vecload node:$ptr))>;
-def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vecload node:$ptr))>;
-def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vecload node:$ptr))>;
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
+def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
// 256-bit load pattern fragments
-// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vecload node:$ptr))>;
-def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vecload node:$ptr))>;
-def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vecload node:$ptr))>;
+def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
+def loadv16i16 : PatFrag<(ops node:$ptr), (v16i16 (load node:$ptr))>;
+def loadv32i8 : PatFrag<(ops node:$ptr), (v32i8 (load node:$ptr))>;
// 512-bit load pattern fragments
-def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vecload node:$ptr))>;
-def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vecload node:$ptr))>;
-def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vecload node:$ptr))>;
+def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
+def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
+def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
+def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
+def loadv32i16 : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
+def loadv64i8 : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
// 128-/256-/512-bit extload pattern fragments
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
@@ -676,46 +706,63 @@ def alignedstore : PatFrag<(ops node:$val, node:$ptr),
return St->getAlignment() >= St->getMemoryVT().getStoreSize();
}]>;
-// Like 'load', but always requires 128-bit vector alignment.
-def alignedvecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+// Like 'load', but always requires vector size alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
auto *Ld = cast<LoadSDNode>(N);
- return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize() &&
- !useNonTemporalLoad(cast<LoadSDNode>(N));
+ return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
}]>;
// 128-bit aligned load pattern fragments
// NOTE: all 128-bit integer vector loads are promoted to v2i64
def alignedloadv4f32 : PatFrag<(ops node:$ptr),
- (v4f32 (alignedvecload node:$ptr))>;
+ (v4f32 (alignedload node:$ptr))>;
def alignedloadv2f64 : PatFrag<(ops node:$ptr),
- (v2f64 (alignedvecload node:$ptr))>;
+ (v2f64 (alignedload node:$ptr))>;
def alignedloadv2i64 : PatFrag<(ops node:$ptr),
- (v2i64 (alignedvecload node:$ptr))>;
+ (v2i64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+ (v4i32 (alignedload node:$ptr))>;
+def alignedloadv8i16 : PatFrag<(ops node:$ptr),
+ (v8i16 (alignedload node:$ptr))>;
+def alignedloadv16i8 : PatFrag<(ops node:$ptr),
+ (v16i8 (alignedload node:$ptr))>;
// 256-bit aligned load pattern fragments
// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def alignedloadv8f32 : PatFrag<(ops node:$ptr),
- (v8f32 (alignedvecload node:$ptr))>;
-def alignedloadv4f64 : PatFrag<(ops node:$ptr),
- (v4f64 (alignedvecload node:$ptr))>;
-def alignedloadv4i64 : PatFrag<(ops node:$ptr),
- (v4i64 (alignedvecload node:$ptr))>;
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+ (v8f32 (alignedload node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+ (v4f64 (alignedload node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+ (v4i64 (alignedload node:$ptr))>;
+def alignedloadv8i32 : PatFrag<(ops node:$ptr),
+ (v8i32 (alignedload node:$ptr))>;
+def alignedloadv16i16 : PatFrag<(ops node:$ptr),
+ (v16i16 (alignedload node:$ptr))>;
+def alignedloadv32i8 : PatFrag<(ops node:$ptr),
+ (v32i8 (alignedload node:$ptr))>;
// 512-bit aligned load pattern fragments
def alignedloadv16f32 : PatFrag<(ops node:$ptr),
- (v16f32 (alignedvecload node:$ptr))>;
+ (v16f32 (alignedload node:$ptr))>;
def alignedloadv8f64 : PatFrag<(ops node:$ptr),
- (v8f64 (alignedvecload node:$ptr))>;
+ (v8f64 (alignedload node:$ptr))>;
def alignedloadv8i64 : PatFrag<(ops node:$ptr),
- (v8i64 (alignedvecload node:$ptr))>;
-
-// Like 'vecload', but uses special alignment checks suitable for use in
+ (v8i64 (alignedload node:$ptr))>;
+def alignedloadv16i32 : PatFrag<(ops node:$ptr),
+ (v16i32 (alignedload node:$ptr))>;
+def alignedloadv32i16 : PatFrag<(ops node:$ptr),
+ (v32i16 (alignedload node:$ptr))>;
+def alignedloadv64i8 : PatFrag<(ops node:$ptr),
+ (v64i8 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
// be naturally aligned on some targets but not on others. If the subtarget
// allows unaligned accesses, match any load, though this may require
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
-def memop : PatFrag<(ops node:$ptr), (vecload node:$ptr), [{
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
auto *Ld = cast<LoadSDNode>(N);
return Subtarget->hasSSEUnalignedMem() ||
Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
@@ -726,6 +773,9 @@ def memop : PatFrag<(ops node:$ptr), (vecload node:$ptr), [{
def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
def X86masked_gather : SDNode<"X86ISD::MGATHER",
SDTypeProfile<2, 3, [SDTCisVec<0>,
@@ -828,6 +878,7 @@ def bc_v4f64 : PatFrag<(ops node:$in), (v4f64 (bitconvert node:$in))>;
// 512-bit bitconvert pattern fragments
def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>;
+def bc_v32i16 : PatFrag<(ops node:$in), (v32i16 (bitconvert node:$in))>;
def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index e56452362168..ab14ee7fadf2 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -411,8 +411,13 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- const MachineMemOperand *Dummy;
- return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (hasLoadFromStackSlot(MI, Accesses)) {
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return 1;
+ }
}
return 0;
}
@@ -441,8 +446,13 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- const MachineMemOperand *Dummy;
- return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (hasStoreToStackSlot(MI, Accesses)) {
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return 1;
+ }
}
return 0;
}
@@ -708,7 +718,7 @@ bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
}
/// Check whether the shift count for a machine operand is non-zero.
-inline static unsigned getTruncatedShiftCount(MachineInstr &MI,
+inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
unsigned ShiftAmtOperandIdx) {
// The shift count is six bits with the REX.W prefix and five bits without.
unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
@@ -729,8 +739,7 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, unsigned &NewSrc,
- bool &isKill, bool &isUndef,
- MachineOperand &ImplicitOp,
+ bool &isKill, MachineOperand &ImplicitOp,
LiveVariables *LV) const {
MachineFunction &MF = *MI.getParent()->getParent();
const TargetRegisterClass *RC;
@@ -747,7 +756,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
if (Opc != X86::LEA64_32r) {
NewSrc = SrcReg;
isKill = Src.isKill();
- isUndef = Src.isUndef();
+ assert(!Src.isUndef() && "Undef op doesn't need optimization");
if (TargetRegisterInfo::isVirtualRegister(NewSrc) &&
!MF.getRegInfo().constrainRegClass(NewSrc, RC))
@@ -764,7 +773,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
isKill = Src.isKill();
- isUndef = Src.isUndef();
+ assert(!Src.isUndef() && "Undef op doesn't need optimization");
} else {
// Virtual register of the wrong class, we have to create a temporary 64-bit
// vreg to feed into the LEA.
@@ -776,7 +785,6 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// Which is obviously going to be dead after we're done with it.
isKill = true;
- isUndef = false;
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *Copy);
@@ -786,88 +794,99 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
return true;
}
-/// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
-/// LEA to form 3-address code by promoting to a 32-bit superregister and then
-/// truncating back down to a 16-bit subregister.
MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
LiveVariables *LV) const {
- MachineBasicBlock::iterator MBBI = MI.getIterator();
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
- bool isDead = MI.getOperand(0).isDead();
- bool isKill = MI.getOperand(1).isKill();
-
+ // We handle 8-bit adds and various 16-bit opcodes in the switch below.
+ bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
- unsigned Opc, leaInReg;
- if (Subtarget.is64Bit()) {
- Opc = X86::LEA64_32r;
- leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
- } else {
- Opc = X86::LEA32r;
- leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
- }
+ assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
+ *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
+ "Unexpected type for LEA transform");
+
+ // TODO: For a 32-bit target, we need to adjust the LEA variables with
+ // something like this:
+ // Opcode = X86::LEA32r;
+ // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ // OutRegLEA =
+ // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
+ // : RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ if (!Subtarget.is64Bit())
+ return nullptr;
+
+ unsigned Opcode = X86::LEA64_32r;
+ unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+ unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
+ // we will be shifting and then extracting the lower 8/16-bits.
// This has the potential to cause partial register stall. e.g.
// movw (%rbp,%rcx,2), %dx
// leal -65(%rdx), %esi
// But testing has shown this *does* help performance in 64-bit mode (at
// least on modern x86 machines).
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
+ unsigned Dest = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+ bool IsDead = MI.getOperand(0).isDead();
+ bool IsKill = MI.getOperand(1).isKill();
+ unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
+ assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
+ BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
MachineInstr *InsMI =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
- .addReg(leaInReg, RegState::Define, X86::sub_16bit)
- .addReg(Src, getKillRegState(isKill));
+ .addReg(InRegLEA, RegState::Define, SubReg)
+ .addReg(Src, getKillRegState(IsKill));
MachineInstrBuilder MIB =
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opc), leaOutReg);
+ BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
case X86::SHL16ri: {
unsigned ShAmt = MI.getOperand(2).getImm();
MIB.addReg(0).addImm(1ULL << ShAmt)
- .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
+ .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
break;
}
case X86::INC16r:
- addRegOffset(MIB, leaInReg, true, 1);
+ addRegOffset(MIB, InRegLEA, true, 1);
break;
case X86::DEC16r:
- addRegOffset(MIB, leaInReg, true, -1);
+ addRegOffset(MIB, InRegLEA, true, -1);
break;
+ case X86::ADD8ri:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
- addRegOffset(MIB, leaInReg, true, MI.getOperand(2).getImm());
+ addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
+ case X86::ADD8rr:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
unsigned Src2 = MI.getOperand(2).getReg();
- bool isKill2 = MI.getOperand(2).isKill();
- unsigned leaInReg2 = 0;
+ bool IsKill2 = MI.getOperand(2).isKill();
+ assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
+ unsigned InRegLEA2 = 0;
MachineInstr *InsMI2 = nullptr;
if (Src == Src2) {
- // ADD16rr killed %reg1028, %reg1028
+ // ADD8rr/ADD16rr killed %reg1028, %reg1028
// just a single insert_subreg.
- addRegReg(MIB, leaInReg, true, leaInReg, false);
+ addRegReg(MIB, InRegLEA, true, InRegLEA, false);
} else {
if (Subtarget.is64Bit())
- leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+ InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
else
- leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ // we will be shifting and then extracting the lower 8/16-bits.
+ BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
- .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
- .addReg(Src2, getKillRegState(isKill2));
- addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ .addReg(InRegLEA2, RegState::Define, SubReg)
+ .addReg(Src2, getKillRegState(IsKill2));
+ addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
}
- if (LV && isKill2 && InsMI2)
+ if (LV && IsKill2 && InsMI2)
LV->replaceKillInstruction(Src2, MI, *InsMI2);
break;
}
@@ -876,16 +895,16 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
MachineInstr *NewMI = MIB;
MachineInstr *ExtMI =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
+ .addReg(Dest, RegState::Define | getDeadRegState(IsDead))
+ .addReg(OutRegLEA, RegState::Kill, SubReg);
if (LV) {
- // Update live variables
- LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
- LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
- if (isKill)
+ // Update live variables.
+ LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
+ LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
+ if (IsKill)
LV->replaceKillInstruction(Src, MI, *InsMI);
- if (isDead)
+ if (IsDead)
LV->replaceKillInstruction(Dest, MI, *ExtMI);
}
@@ -916,12 +935,18 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
const MachineOperand &Dest = MI.getOperand(0);
const MachineOperand &Src = MI.getOperand(1);
+ // Ideally, operations with undef should be folded before we get here, but we
+ // can't guarantee it. Bail out because optimizing undefs is a waste of time.
+ // Without this, we have to forward undef state to new register operands to
+ // avoid machine verifier errors.
+ if (Src.isUndef())
+ return nullptr;
+ if (MI.getNumOperands() > 2)
+ if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
+ return nullptr;
+
MachineInstr *NewMI = nullptr;
- // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
- // we have better subtarget support, enable the 16-bit LEA generation here.
- // 16-bit LEA is also slow on Core2.
- bool DisableLEA16 = true;
- bool is64Bit = Subtarget.is64Bit();
+ bool Is64Bit = Subtarget.is64Bit();
unsigned MIOpc = MI.getOpcode();
switch (MIOpc) {
@@ -951,14 +976,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
// LEA can't handle ESP.
- bool isKill, isUndef;
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ SrcReg, isKill, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
@@ -966,7 +991,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+ .addReg(SrcReg, getKillRegState(isKill))
.addImm(0)
.addReg(0);
if (ImplicitOp.getReg() != 0)
@@ -978,37 +1003,26 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SHL16ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
-
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .add(Dest)
- .addReg(0)
- .addImm(1ULL << ShAmt)
- .add(Src)
- .addImm(0)
- .addReg(0);
- break;
+ if (!isTruncatedShiftCountForLEA(ShAmt))
+ return nullptr;
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
}
case X86::INC64r:
case X86::INC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- bool isKill, isUndef;
+ unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
+ (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
+ ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef));
+ .addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
@@ -1016,30 +1030,23 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::INC16r:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- NewMI = addOffset(
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), 1);
- break;
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::DEC64r:
case X86::DEC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
- bool isKill, isUndef;
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
+ ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
- .addReg(SrcReg, getUndefRegState(isUndef) |
- getKillRegState(isKill));
+ .addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
@@ -1048,13 +1055,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::DEC16r:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
- NewMI = addOffset(
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), -1);
- break;
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::ADD64rr:
case X86::ADD64rr_DB:
case X86::ADD32rr:
@@ -1064,21 +1065,21 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
Opc = X86::LEA64r;
else
- Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
- bool isKill, isUndef;
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ SrcReg, isKill, ImplicitOp, LV))
return nullptr;
const MachineOperand &Src2 = MI.getOperand(2);
- bool isKill2, isUndef2;
+ bool isKill2;
unsigned SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
- SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
+ SrcReg2, isKill2, ImplicitOp2, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
@@ -1088,36 +1089,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MIB.add(ImplicitOp2);
NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
-
- // Preserve undefness of the operands.
- NewMI->getOperand(1).setIsUndef(isUndef);
- NewMI->getOperand(3).setIsUndef(isUndef2);
-
if (LV && Src2.isKill())
LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
break;
}
+ case X86::ADD8rr:
case X86::ADD16rr:
- case X86::ADD16rr_DB: {
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Src2 = MI.getOperand(2).getReg();
- bool isKill2 = MI.getOperand(2).isKill();
- NewMI = addRegReg(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest),
- Src.getReg(), Src.isKill(), Src2, isKill2);
-
- // Preserve undefness of the operands.
- bool isUndef = MI.getOperand(1).isUndef();
- bool isUndef2 = MI.getOperand(2).isUndef();
- NewMI->getOperand(1).setIsUndef(isUndef);
- NewMI->getOperand(3).setIsUndef(isUndef2);
-
- if (LV && isKill2)
- LV->replaceKillInstruction(Src2, MI, *NewMI);
- break;
- }
+ case X86::ADD16rr_DB:
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64ri32_DB:
@@ -1132,38 +1111,30 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD32ri_DB:
case X86::ADD32ri8_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
- bool isKill, isUndef;
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ SrcReg, isKill, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
- .addReg(SrcReg, getUndefRegState(isUndef) |
- getKillRegState(isKill));
+ .addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
NewMI = addOffset(MIB, MI.getOperand(2));
break;
}
+ case X86::ADD8ri:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addOffset(
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src),
- MI.getOperand(2));
- break;
-
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::VMOVDQU8Z128rmk:
case X86::VMOVDQU8Z256rmk:
case X86::VMOVDQU8Zrmk:
@@ -2540,7 +2511,7 @@ void X86InstrInfo::replaceBranchWithTailCall(
// call. This way they still appear live across the call.
LivePhysRegs LiveRegs(getRegisterInfo());
LiveRegs.addLiveOuts(MBB);
- SmallVector<std::pair<unsigned, const MachineOperand *>, 8> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers;
LiveRegs.stepForward(*MIB, Clobbers);
for (const auto &C : Clobbers) {
MIB.addReg(C.first, RegState::Implicit);
@@ -2630,6 +2601,11 @@ bool X86InstrInfo::AnalyzeBranchImpl(
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
+ // In practice we should never have an undef eflags operand, if we do
+ // abort here as we are not prepared to preserve the flag.
+ if (I->getOperand(1).isUndef())
+ return true;
+
// Working from the bottom, handle the first conditional branch.
if (Cond.empty()) {
MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
@@ -3112,9 +3088,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
report_fatal_error("Cannot emit physreg copy instruction");
}
-bool X86InstrInfo::isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+bool X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Src,
+ const MachineOperand *&Dest) const {
if (MI.isMoveReg()) {
Dest = &MI.getOperand(0);
Src = &MI.getOperand(1);
@@ -3242,9 +3218,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
}
}
-bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool X86InstrInfo::getMemOperandWithOffset(
+ MachineInstr &MemOp, MachineOperand *&BaseOp, int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
const MCInstrDesc &Desc = MemOp.getDesc();
int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
if (MemRefBegin < 0)
@@ -3252,11 +3228,10 @@ bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
MemRefBegin += X86II::getOperandBias(Desc);
- MachineOperand &BaseMO = MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
- if (!BaseMO.isReg()) // Can be an MO_FrameIndex
+ BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
+ if (!BaseOp->isReg()) // Can be an MO_FrameIndex
return false;
- BaseReg = BaseMO.getReg();
if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
return false;
@@ -3272,6 +3247,8 @@ bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
Offset = DispMO.getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
@@ -3303,29 +3280,25 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
(Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
- DebugLoc DL = MBB.findDebugLoc(MI);
- addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
.addReg(SrcReg, getKillRegState(isKill));
}
-void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
- bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- MachineInstr::mmo_iterator MMOBegin,
- MachineInstr::mmo_iterator MMOEnd,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
+void X86InstrInfo::storeRegToAddr(
+ MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
+ ArrayRef<MachineMemOperand *> MMOs,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = MMOBegin != MMOEnd &&
- (*MMOBegin)->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
MIB.add(Addr[i]);
MIB.addReg(SrcReg, getKillRegState(isKill));
- (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
NewMIs.push_back(MIB);
}
@@ -3341,26 +3314,23 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
(Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
- DebugLoc DL = MBB.findDebugLoc(MI);
- addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
}
-void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- MachineInstr::mmo_iterator MMOBegin,
- MachineInstr::mmo_iterator MMOEnd,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
+void X86InstrInfo::loadRegFromAddr(
+ MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
+ ArrayRef<MachineMemOperand *> MMOs,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = MMOBegin != MMOEnd &&
- (*MMOBegin)->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
MIB.add(Addr[i]);
- (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
NewMIs.push_back(MIB);
}
@@ -3451,9 +3421,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
/// This function can be extended later on.
/// SrcReg, SrcRegs: register operands for FlagI.
/// ImmValue: immediate for FlagI if it takes an immediate.
-inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
- unsigned SrcReg2, int ImmMask,
- int ImmValue, MachineInstr &OI) {
+inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
+ unsigned SrcReg, unsigned SrcReg2,
+ int ImmMask, int ImmValue,
+ const MachineInstr &OI) {
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
(FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
(FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
@@ -3484,7 +3455,9 @@ inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
/// Check whether the definition can be converted
/// to remove a comparison against zero.
-inline static bool isDefConvertible(MachineInstr &MI) {
+inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
+ NoSignFlag = false;
+
switch (MI.getOpcode()) {
default: return false;
@@ -3549,8 +3522,6 @@ inline static bool isDefConvertible(MachineInstr &MI) {
case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
case X86::ANDN32rr: case X86::ANDN32rm:
case X86::ANDN64rr: case X86::ANDN64rm:
- case X86::BEXTR32rr: case X86::BEXTR64rr:
- case X86::BEXTR32rm: case X86::BEXTR64rm:
case X86::BLSI32rr: case X86::BLSI32rm:
case X86::BLSI64rr: case X86::BLSI64rm:
case X86::BLSMSK32rr:case X86::BLSMSK32rm:
@@ -3568,8 +3539,6 @@ inline static bool isDefConvertible(MachineInstr &MI) {
case X86::TZCNT16rr: case X86::TZCNT16rm:
case X86::TZCNT32rr: case X86::TZCNT32rm:
case X86::TZCNT64rr: case X86::TZCNT64rm:
- case X86::BEXTRI32ri: case X86::BEXTRI32mi:
- case X86::BEXTRI64ri: case X86::BEXTRI64mi:
case X86::BLCFILL32rr: case X86::BLCFILL32rm:
case X86::BLCFILL64rr: case X86::BLCFILL64rm:
case X86::BLCI32rr: case X86::BLCI32rm:
@@ -3584,12 +3553,23 @@ inline static bool isDefConvertible(MachineInstr &MI) {
case X86::BLSFILL64rr: case X86::BLSFILL64rm:
case X86::BLSIC32rr: case X86::BLSIC32rm:
case X86::BLSIC64rr: case X86::BLSIC64rm:
+ case X86::T1MSKC32rr: case X86::T1MSKC32rm:
+ case X86::T1MSKC64rr: case X86::T1MSKC64rm:
+ case X86::TZMSK32rr: case X86::TZMSK32rm:
+ case X86::TZMSK64rr: case X86::TZMSK64rm:
+ return true;
+ case X86::BEXTR32rr: case X86::BEXTR64rr:
+ case X86::BEXTR32rm: case X86::BEXTR64rm:
+ case X86::BEXTRI32ri: case X86::BEXTRI32mi:
+ case X86::BEXTRI64ri: case X86::BEXTRI64mi:
+ // BEXTR doesn't update the sign flag so we can't use it.
+ NoSignFlag = true;
return true;
}
}
/// Check whether the use can be converted to remove a comparison against zero.
-static X86::CondCode isUseDefConvertible(MachineInstr &MI) {
+static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default: return X86::COND_INVALID;
case X86::LZCNT16rr: case X86::LZCNT16rm:
@@ -3604,12 +3584,12 @@ static X86::CondCode isUseDefConvertible(MachineInstr &MI) {
case X86::TZCNT32rr: case X86::TZCNT32rm:
case X86::TZCNT64rr: case X86::TZCNT64rm:
return X86::COND_B;
- case X86::BSF16rr:
- case X86::BSF16rm:
- case X86::BSF32rr:
- case X86::BSF32rm:
- case X86::BSF64rr:
- case X86::BSF64rm:
+ case X86::BSF16rr: case X86::BSF16rm:
+ case X86::BSF32rr: case X86::BSF32rm:
+ case X86::BSF64rr: case X86::BSF64rm:
+ case X86::BSR16rr: case X86::BSR16rm:
+ case X86::BSR32rr: case X86::BSR32rm:
+ case X86::BSR64rr: case X86::BSR64rm:
return X86::COND_E;
}
}
@@ -3687,8 +3667,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// instruction we can eliminate the compare iff the use sets EFLAGS in the
// right way.
bool ShouldUpdateCC = false;
+ bool NoSignFlag = false;
X86::CondCode NewCC = X86::COND_INVALID;
- if (IsCmpZero && !isDefConvertible(*MI)) {
+ if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) {
// Scan forward from the use until we hit the use we're looking for or the
// compare instruction.
for (MachineBasicBlock::iterator J = MI;; ++J) {
@@ -3807,6 +3788,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
case X86::COND_O: case X86::COND_NO:
// CF and OF are used, we can't perform this optimization.
return false;
+ case X86::COND_S: case X86::COND_NS:
+ // If SF is used, but the instruction doesn't update the SF, then we
+ // can't do the optimization.
+ if (NoSignFlag)
+ return false;
+ break;
}
// If we're updating the condition code check if we have to reverse the
@@ -4267,9 +4254,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::VMOVUPSZ256mr_NOVLX:
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
- case X86::MOV32ri64:
+ case X86::MOV32ri64: {
+ unsigned Reg = MIB->getOperand(0).getReg();
+ unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
MI.setDesc(get(X86::MOV32ri));
+ MIB->getOperand(0).setReg(Reg32);
+ MIB.addReg(Reg, RegState::ImplicitDefine);
return true;
+ }
// KNL does not recognize dependency-breaking idioms for mask registers,
// so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
@@ -5353,6 +5345,54 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
/*Size=*/0, Alignment, /*AllowCommute=*/true);
}
+static SmallVector<MachineMemOperand *, 2>
+extractLoadMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
+ SmallVector<MachineMemOperand *, 2> LoadMMOs;
+
+ for (MachineMemOperand *MMO : MMOs) {
+ if (!MMO->isLoad())
+ continue;
+
+ if (!MMO->isStore()) {
+ // Reuse the MMO.
+ LoadMMOs.push_back(MMO);
+ } else {
+ // Clone the MMO and unset the store flag.
+ LoadMMOs.push_back(MF.getMachineMemOperand(
+ MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOStore,
+ MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
+ MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering()));
+ }
+ }
+
+ return LoadMMOs;
+}
+
+static SmallVector<MachineMemOperand *, 2>
+extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
+ SmallVector<MachineMemOperand *, 2> StoreMMOs;
+
+ for (MachineMemOperand *MMO : MMOs) {
+ if (!MMO->isStore())
+ continue;
+
+ if (!MMO->isLoad()) {
+ // Reuse the MMO.
+ StoreMMOs.push_back(MMO);
+ } else {
+ // Clone the MMO and unset the load flag.
+ StoreMMOs.push_back(MF.getMachineMemOperand(
+ MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOLoad,
+ MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
+ MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering()));
+ }
+ }
+
+ return StoreMMOs;
+}
+
bool X86InstrInfo::unfoldMemoryOperand(
MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
@@ -5397,9 +5437,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
// Emit the load instruction.
if (UnfoldLoad) {
- std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs =
- MF.extractLoadMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
+ auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
+ loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs);
if (UnfoldStore) {
// Address operands cannot be marked isKill.
for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
@@ -5464,9 +5503,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
// Emit the store instruction.
if (UnfoldStore) {
const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
- std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs =
- MF.extractStoreMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
+ auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
+ storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs);
}
return true;
@@ -5511,26 +5549,21 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
SDNode *Load = nullptr;
if (FoldedLoad) {
EVT VT = *TRI.legalclasstypes_begin(*RC);
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
- if (!(*MMOs.first) &&
- RC == &X86::VR128RegClass &&
+ auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
+ if (MMOs.empty() && RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned load.
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = (*MMOs.first) &&
- (*MMOs.first)->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
VT, MVT::Other, AddrOps);
NewNodes.push_back(Load);
// Preserve memory reference information.
- cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs);
}
// Emit the data processing instruction.
@@ -5580,27 +5613,22 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
AddrOps.pop_back();
AddrOps.push_back(SDValue(NewNode, 0));
AddrOps.push_back(Chain);
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
- if (!(*MMOs.first) &&
- RC == &X86::VR128RegClass &&
+ auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
+ if (MMOs.empty() && RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned store.
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = (*MMOs.first) &&
- (*MMOs.first)->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
SDNode *Store =
DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
dl, MVT::Other, AddrOps);
NewNodes.push_back(Store);
// Preserve memory reference information.
- cast<MachineSDNode>(Store)->setMemRefs(MMOs.first, MMOs.second);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs);
}
return true;
@@ -6511,6 +6539,19 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
// All domains are valid.
return 0xe;
+ case X86::MOVHLPSrr:
+ // We can swap domains when both inputs are the same register.
+ // FIXME: This doesn't catch all the cases we would like. If the input
+ // register isn't KILLed by the instruction, the two address instruction
+ // pass puts a COPY on one input. The other input uses the original
+ // register. This prevents the same physical register from being used by
+ // both inputs.
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
+ MI.getOperand(0).getSubReg() == 0 &&
+ MI.getOperand(1).getSubReg() == 0 &&
+ MI.getOperand(2).getSubReg() == 0)
+ return 0x6;
+ return 0;
}
return 0;
}
@@ -6617,6 +6658,20 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
MI.setDesc(get(table[Domain - 1]));
return true;
}
+ case X86::UNPCKHPDrr:
+ case X86::MOVHLPSrr:
+ // We just need to commute the instruction which will switch the domains.
+ if (Domain != dom && Domain != 3 &&
+ MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
+ MI.getOperand(0).getSubReg() == 0 &&
+ MI.getOperand(1).getSubReg() == 0 &&
+ MI.getOperand(2).getSubReg() == 0) {
+ commuteInstruction(MI, false);
+ return true;
+ }
+ // We must always return true for MOVHLPSrr.
+ if (Opcode == X86::MOVHLPSrr)
+ return true;
}
return false;
}
@@ -7339,7 +7394,8 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
{MO_TLVP, "x86-tlvp"},
{MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
- {MO_SECREL, "x86-secrel"}};
+ {MO_SECREL, "x86-secrel"},
+ {MO_COFFSTUB, "x86-coffstub"}};
return makeArrayRef(TargetFlags);
}
@@ -7396,12 +7452,28 @@ namespace {
.addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
.addReg(0);
} else if (TM->getCodeModel() == CodeModel::Large) {
- // Loading the GOT in the large code model requires math with labels,
- // so we use a pseudo instruction and expand it during MC emission.
- unsigned Scratch = RegInfo.createVirtualRegister(&X86::GR64RegClass);
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVGOT64r), PC)
- .addReg(Scratch, RegState::Undef | RegState::Define)
- .addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+ // In the large code model, we are aiming for this code, though the
+ // register allocation may vary:
+ // leaq .LN$pb(%rip), %rax
+ // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
+ // addq %rcx, %rax
+ // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
+ unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ unsigned GOTReg =
+ RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addSym(MF.getPICBaseSymbol())
+ .addReg(0);
+ std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
+ .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_PIC_BASE_OFFSET);
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
+ .addReg(PBReg, RegState::Kill)
+ .addReg(GOTReg, RegState::Kill);
} else {
llvm_unreachable("unexpected code model");
}
@@ -7736,3 +7808,6 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
return It;
}
+
+#define GET_INSTRINFO_HELPERS
+#include "X86GenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index b1ceb767cce4..159cb50afc5c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -117,6 +117,7 @@ inline static bool isGlobalStubReference(unsigned char TargetFlag) {
case X86II::MO_GOT: // normal GOT reference.
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref.
+ case X86II::MO_COFFSTUB: // COFF .refptr stub.
return true;
default:
return false;
@@ -257,7 +258,7 @@ public:
/// operand to the LEA instruction.
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc,
- bool &isKill, bool &isUndef, MachineOperand &ImplicitOp,
+ bool &isKill, MachineOperand &ImplicitOp,
LiveVariables *LV) const;
/// convertToThreeAddress - This method must be implemented by targets that
@@ -326,9 +327,9 @@ public:
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const override;
bool analyzeBranchPredicate(MachineBasicBlock &MBB,
TargetInstrInfo::MachineBranchPredicate &MBP,
bool AllowModify = false) const override;
@@ -348,8 +349,6 @@ public:
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
- const MachineOperand *&Dest) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned SrcReg,
bool isKill, int FrameIndex,
@@ -359,8 +358,7 @@ public:
void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
SmallVectorImpl<MachineOperand> &Addr,
const TargetRegisterClass *RC,
- MachineInstr::mmo_iterator MMOBegin,
- MachineInstr::mmo_iterator MMOEnd,
+ ArrayRef<MachineMemOperand *> MMOs,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -371,8 +369,7 @@ public:
void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
const TargetRegisterClass *RC,
- MachineInstr::mmo_iterator MMOBegin,
- MachineInstr::mmo_iterator MMOEnd,
+ ArrayRef<MachineMemOperand *> MMOs,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
bool expandPostRAPseudo(MachineInstr &MI) const override;
@@ -561,6 +558,9 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
+#define GET_INSTRINFO_HELPER_DECLS
+#include "X86GenInstrInfo.inc"
+
protected:
/// Commutes the operands in the given instruction by changing the operands
/// order and/or changing the instruction's opcode and/or the immediate value
@@ -577,7 +577,16 @@ protected:
unsigned CommuteOpIdx1,
unsigned CommuteOpIdx2) const override;
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
private:
+ /// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
+ /// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
+ /// super-register and then truncating back down to a 8/16-bit sub-register.
MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
MachineInstr &MI,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index bc7afd32d494..e53f83baa3c6 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -17,10 +17,6 @@
// X86 specific DAG Nodes.
//
-def SDTIntShiftDOp: SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisInt<3>]>;
-
def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
@@ -257,8 +253,6 @@ def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
-def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
-def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
@@ -282,15 +276,10 @@ def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
-def X86lock_inc : SDNode<"X86ISD::LINC", SDTLockUnaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-
def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
+def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
+
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
@@ -955,8 +944,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
-def UseRetpoline : Predicate<"Subtarget->useRetpoline()">;
-def NotUseRetpoline : Predicate<"!Subtarget->useRetpoline()">;
+def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
+def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -1210,12 +1199,12 @@ def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayLoad, SchedRW
-let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in {
+let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in {
def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>,
OpSize16;
def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>,
OpSize32, Requires<[Not64BitMode]>;
-} // mayStore, mayLoad, WriteRMW
+} // mayStore, mayLoad, SchedRW
let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
@@ -1243,7 +1232,7 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
Requires<[Not64BitMode]>;
} // mayStore, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>,
OpSize16;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
@@ -1302,7 +1291,7 @@ def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayLoad, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
OpSize32, Requires<[In64BitMode]>;
let mayStore = 1, SchedRW = [WriteStore] in {
@@ -1314,7 +1303,7 @@ def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayStore, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
OpSize32, Requires<[In64BitMode]>;
} // mayLoad, mayStore, SchedRW
@@ -1491,7 +1480,7 @@ def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, imm:$src)]>;
@@ -1505,7 +1494,7 @@ def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, i64immSExt32:$src)]>;
}
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isMoveImm = 1 in {
def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
"movabs{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, relocImm:$src)]>;
@@ -1771,7 +1760,7 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
// only for now. These instructions are also slow on modern CPUs so that's
// another reason to avoid generating them.
-let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteBitTestRegLd] in {
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[]>, OpSize16, TB, NotMemoryFoldable;
@@ -1799,7 +1788,7 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
// Note that these instructions aren't slow because that only applies when the
// other operand is in a register. When it's an immediate, bt is still fast.
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTestImmLd] in {
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi16 addr:$src1),
@@ -1818,7 +1807,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
} // SchedRW
let hasSideEffects = 0 in {
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1830,7 +1819,7 @@ def BTC64rr : RI<0xBB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
NotMemoryFoldable;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1842,7 +1831,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1851,7 +1840,7 @@ def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
"btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
@@ -1861,7 +1850,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1873,7 +1862,7 @@ def BTR64rr : RI<0xB3, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
NotMemoryFoldable;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1885,7 +1874,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
@@ -1896,7 +1885,7 @@ def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
@@ -1908,7 +1897,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1920,7 +1909,7 @@ def BTS64rr : RI<0xAB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
NotMemoryFoldable;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1932,7 +1921,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1941,7 +1930,7 @@ def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
"bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
@@ -1994,7 +1983,7 @@ multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag>
defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap">, NotMemoryFoldable;
// Swap between registers.
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteXCHG] in {
let Constraints = "$src1 = $dst1, $src2 = $dst2", hasSideEffects = 0 in {
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst1, GR8:$dst2),
(ins GR8:$src1, GR8:$src2),
@@ -2027,7 +2016,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
} // SchedRW
let hasSideEffects = 0, Constraints = "$src1 = $dst1, $src2 = $dst2",
- Defs = [EFLAGS], SchedRW = [WriteALU] in {
+ Defs = [EFLAGS], SchedRW = [WriteXCHG] in {
def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst1, GR8:$dst2),
(ins GR8:$src1, GR8:$src2),
"xadd{b}\t{$src2, $src1|$src1, $src2}", []>, TB;
@@ -2061,7 +2050,7 @@ def XADD64rm : RI<0xC1, MRMSrcMem, (outs GR64:$dst),
}
-let SchedRW = [WriteALU], hasSideEffects = 0 in {
+let SchedRW = [WriteCMPXCHG], hasSideEffects = 0 in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB,
@@ -2080,7 +2069,7 @@ def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
NotMemoryFoldable;
} // SchedRW, hasSideEffects
-let SchedRW = [WriteALULd, WriteRMW], mayLoad = 1, mayStore = 1,
+let SchedRW = [WriteCMPXCHGRMW], mayLoad = 1, mayStore = 1,
hasSideEffects = 0 in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -2368,11 +2357,11 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
let hasSideEffects = 0 in {
def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[WriteALU]>;
+ T8PS, VEX_4V, Sched<[WriteBLS]>;
let mayLoad = 1 in
def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[WriteALULd]>;
+ T8PS, VEX_4V, Sched<[WriteBLS.Folded]>;
}
}
@@ -2389,6 +2378,16 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
// Pattern fragments to auto generate BMI instructions.
//===----------------------------------------------------------------------===//
+def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86or_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86xor_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
let Predicates = [HasBMI] in {
// FIXME: patterns for the load versions are not implemented
def : Pat<(and GR32:$src, (add GR32:$src, -1)),
@@ -2405,6 +2404,14 @@ let Predicates = [HasBMI] in {
(BLSI32rr GR32:$src)>;
def : Pat<(and GR64:$src, (ineg GR64:$src)),
(BLSI64rr GR64:$src)>;
+
+ // Versions to match flag producing ops.
+ // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
+ // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+ def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ (BLSMSK32rr GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ (BLSMSK64rr GR64:$src)>;
}
multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -2423,7 +2430,7 @@ multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ Sched.ReadAfterFold]>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -2449,14 +2456,14 @@ multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ Sched.ReadAfterFold]>;
}
let Predicates = [HasBMI2], Defs = [EFLAGS] in {
defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
- int_x86_bmi_bzhi_32, loadi32, WriteBZHI>;
+ X86bzhi, loadi32, WriteBZHI>;
defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
- int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W;
+ X86bzhi, loadi64, WriteBZHI>, VEX_W;
}
def CountTrailingOnes : SDNodeXForm<imm, [{
@@ -2497,84 +2504,6 @@ let Predicates = [HasBMI2, NoTBM] in {
(MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
}
-let Predicates = [HasBMI2] in {
- multiclass _bmi_bzhi_pattern<dag regpattern, dag mempattern, RegisterClass RC,
- ValueType VT, Instruction DstInst,
- Instruction DstMemInst> {
- def : Pat<regpattern,
- (DstInst RC:$src,
- (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
- def : Pat<mempattern,
- (DstMemInst addr:$src,
- (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
- }
-
- multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT,
- Instruction DstInst, X86MemOperand x86memop,
- Instruction DstMemInst> {
- // x & ((1 << y) - 1)
- defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)),
- (and (x86memop addr:$src),
- (add (shl 1, GR8:$lz), -1)),
- RC, VT, DstInst, DstMemInst>;
-
- // x & ~(-1 << y)
- defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)),
- (and (x86memop addr:$src),
- (xor (shl -1, GR8:$lz), -1)),
- RC, VT, DstInst, DstMemInst>;
-
- // x & (-1 >> (bitwidth - y))
- defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))),
- (and (x86memop addr:$src),
- (srl -1, (sub bitwidth, GR8:$lz))),
- RC, VT, DstInst, DstMemInst>;
-
- // x << (bitwidth - y) >> (bitwidth - y)
- defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)),
- (sub bitwidth, GR8:$lz)),
- (srl (shl (x86memop addr:$src),
- (sub bitwidth, GR8:$lz)),
- (sub bitwidth, GR8:$lz)),
- RC, VT, DstInst, DstMemInst>;
- }
-
- defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>;
- defm : bmi_bzhi_patterns<GR64, 64, i64, BZHI64rr, loadi64, BZHI64rm>;
-
- // x & (-1 >> (32 - y))
- def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
- (BZHI32rr GR32:$src, GR32:$lz)>;
- def : Pat<(and (loadi32 addr:$src), (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
- (BZHI32rm addr:$src, GR32:$lz)>;
-
- // x & (-1 >> (64 - y))
- def : Pat<(and GR64:$src, (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
- (BZHI64rr GR64:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
- def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
- (BZHI64rm addr:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
-
- // x << (32 - y) >> (32 - y)
- def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))),
- (i8 (trunc (sub 32, GR32:$lz)))),
- (BZHI32rr GR32:$src, GR32:$lz)>;
- def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))),
- (i8 (trunc (sub 32, GR32:$lz)))),
- (BZHI32rm addr:$src, GR32:$lz)>;
-
- // x << (64 - y) >> (64 - y)
- def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))),
- (i8 (trunc (sub 64, GR32:$lz)))),
- (BZHI64rr GR64:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
- def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))),
- (i8 (trunc (sub 64, GR32:$lz)))),
- (BZHI64rm addr:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
-} // HasBMI2
-
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int,
PatFrag ld_frag> {
@@ -2585,7 +2514,7 @@ multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>,
- VEX_4V, Sched<[WriteALULd, ReadAfterLd]>;
+ VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}
let Predicates = [HasBMI2] in {
@@ -2881,6 +2810,45 @@ let Predicates = [HasTBM] in {
(TZMSK32rr GR32:$src)>;
def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
(TZMSK64rr GR64:$src)>;
+
+ // Patterns to match flag producing ops.
+ // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
+ // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+ def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
+ (BLCI64rr GR64:$src)>;
+
+ // Extra patterns because opt can optimize the above patterns to this.
+ def : Pat<(or_flag_nocf GR32:$src, (sub -2, GR32:$src)),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
+ (BLCI64rr GR64:$src)>;
+
+ def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCMSK32rr GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCMSK64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCS32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCS64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ (BLSFILL32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ (BLSFILL64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
+ (BLSIC32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
+ (BLSIC64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
+ (T1MSKC32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
+ (T1MSKC64rr GR64:$src)>;
} // HasTBM
//===----------------------------------------------------------------------===//
@@ -2976,6 +2944,8 @@ def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>;
def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>;
def : MnemonicAlias<"popf", "popfq", "intel">, Requires<[In64BitMode]>;
def : MnemonicAlias<"popfd", "popfl", "att">;
+def : MnemonicAlias<"popfw", "popf", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popfw", "popf", "intel">, Requires<[In64BitMode]>;
// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
// all modes. However: "push (addr)" and "push $42" should default to
@@ -2988,6 +2958,8 @@ def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>;
def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>;
def : MnemonicAlias<"pushf", "pushfq", "intel">, Requires<[In64BitMode]>;
def : MnemonicAlias<"pushfd", "pushfl", "att">;
+def : MnemonicAlias<"pushfw", "pushf", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushfw", "pushf", "intel">, Requires<[In64BitMode]>;
def : MnemonicAlias<"popad", "popal", "intel">, Requires<[Not64BitMode]>;
def : MnemonicAlias<"pushad", "pushal", "intel">, Requires<[Not64BitMode]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index aefeffedfc1a..8f3357170576 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -47,7 +47,7 @@ let Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -64,7 +64,7 @@ let Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -105,7 +105,7 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
[(set VR64:$dst,
(IntId64 VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -122,7 +122,7 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -553,7 +553,7 @@ let Predicates = [HasMMX, HasSSE1] in {
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
imm:$src3))]>,
- Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 6a9b20998210..e2bcd18ce660 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -35,7 +35,7 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -57,7 +57,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -81,7 +81,7 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
@@ -103,7 +103,7 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rm, d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -265,8 +265,6 @@ let Predicates = [UseAVX] in {
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
@@ -349,8 +347,6 @@ let Predicates = [UseSSE2] in {
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
}
@@ -593,8 +589,21 @@ let Predicates = [HasAVX, NoVLX] in {
// available and changing the domain is beneficial.
def : Pat<(alignedloadv4i64 addr:$src),
(VMOVAPSYrm addr:$src)>;
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(alignedloadv16i16 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(alignedloadv32i8 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
def : Pat<(loadv4i64 addr:$src),
(VMOVUPSYrm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(loadv16i16 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(loadv32i8 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+
def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
@@ -619,8 +628,20 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [UseSSE1] in {
def : Pat<(alignedloadv2i64 addr:$src),
(MOVAPSrm addr:$src)>;
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(alignedloadv8i16 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(alignedloadv16i8 addr:$src),
+ (MOVAPSrm addr:$src)>;
def : Pat<(loadv2i64 addr:$src),
(MOVUPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(loadv8i16 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(loadv16i8 addr:$src),
+ (MOVUPSrm addr:$src)>;
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
@@ -652,7 +673,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode pdnode,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
[], SSEPackedSingle>, PS,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
@@ -660,7 +681,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode pdnode,
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
SSEPackedDouble>, PD,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
@@ -820,19 +841,6 @@ let Constraints = "$src1 = $dst" in {
Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
}
-// TODO: This is largely to trick fastisel into ignoring the pattern.
-def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2),
- (X86Unpckh node:$src1, node:$src2), [{
- return N->getOperand(0) == N->getOperand(1);
-}]>;
-
-let Predicates = [UseSSE2] in {
- // TODO: This is a hack pattern to allow lowering to emit unpckh instead of
- // movhlps for sse2 without changing a bunch of tests.
- def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)),
- (MOVHLPSrr VR128:$src, VR128:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Conversion Instructions
//===----------------------------------------------------------------------===//
@@ -858,7 +866,7 @@ let hasSideEffects = 0 in {
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
[(set RC:$dst, (DstTy (sint_to_fp
- (SrcTy (bitconvert (ld_frag addr:$src))))))], d>,
+ (SrcTy (ld_frag addr:$src)))))], d>,
Sched<[sched.Folded]>;
}
}
@@ -874,7 +882,7 @@ let hasSideEffects = 0, Predicates = [UseAVX] in {
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
}
@@ -1001,18 +1009,17 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
-// FIXME: We probably want to match the rm form only when optimizing for
-// size, to avoid false depenendecies (see sse_fp_unop_s for details)
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
- string asm, X86FoldableSchedWrite sched> {
+ ValueType DstVT, ValueType SrcVT, SDNode OpNode,
+ Operand memop, ComplexPattern mem_cpat, string asm,
+ X86FoldableSchedWrite sched> {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))]>,
+ [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
Sched<[sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int mem_cpat:$src))]>,
+ [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>,
Sched<[sched.Folded]>;
}
@@ -1032,21 +1039,21 @@ let hasSideEffects = 0 in {
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let Predicates = [UseAVX] in {
-defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
- int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
+ X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
WriteCvtSD2I>, XD, VEX, VEX_LIG;
-defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
+defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
+ X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG;
}
-defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD;
-defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
@@ -1078,60 +1085,60 @@ let isCodeGenOnly = 1 in {
// Aliases for intrinsics
let isCodeGenOnly = 1 in {
let Predicates = [UseAVX] in {
-defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
WriteCvtSS2I>, XS, VEX;
-defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
+ X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I>,
XS, VEX, VEX_W;
-defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
WriteCvtSS2I>, XD, VEX;
-defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
+ X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSS2I>,
XD, VEX, VEX_W;
}
-defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
WriteCvtSS2I>, XS;
-defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
+ X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I>, XS, REX_W;
-defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
WriteCvtSD2I>, XD;
-defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
+ X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSD2I>, XD, REX_W;
} // isCodeGenOnly = 1
let Predicates = [UseAVX] in {
-defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, VEX, VEX_LIG;
-defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG;
}
-defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS;
-defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, REX_W;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64,
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64,
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PSY>,
PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64,
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
PS, Requires<[UseSSE2]>;
@@ -1186,7 +1193,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XD, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
def : Pat<(f32 (fpround FR64:$src)),
@@ -1217,7 +1224,7 @@ def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1231,7 +1238,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, Requires<[UseSSE2]>,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
} // isCodeGenOnly = 1
@@ -1248,7 +1255,7 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>,
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
Requires<[UseAVX, OptForSize]>;
}
@@ -1295,7 +1302,7 @@ def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1307,7 +1314,7 @@ def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
}
} // isCodeGenOnly = 1
@@ -1690,7 +1697,7 @@ let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+ (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1700,7 +1707,7 @@ def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+ (v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
VEX_WIG;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
@@ -1714,7 +1721,7 @@ let hasSideEffects = 0, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+ (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
Sched<[WriteCvtI2PDLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1826,7 +1833,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -1836,7 +1843,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
}
}
@@ -1878,7 +1885,7 @@ let mayLoad = 1 in
(ins VR128:$src1, memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
mem_cpat:$src, imm:$cc))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let isCodeGenOnly = 1 in {
@@ -1920,7 +1927,7 @@ let mayLoad = 1 in
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -1938,7 +1945,7 @@ let mayLoad = 1 in
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
mem_cpat:$src2))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Defs = [EFLAGS] in {
@@ -2003,7 +2010,7 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst,
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -2013,7 +2020,7 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
let mayLoad = 1 in
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- asm_alt, [], d>, Sched<[sched.Folded, ReadAfterLd]>,
+ asm_alt, [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
@@ -2109,7 +2116,7 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@@ -2165,58 +2172,58 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX] in {
-defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
+defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
-defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
+defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG;
-defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
+defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
-defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
+defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
-defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
+defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
+defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
+defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
+defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
}// Predicates = [HasAVX, NoVLX]
let Constraints = "$src1 = $dst" in {
- defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop,
VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
- defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop,
VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
- defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop,
VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
- defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop,
VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX1Only] in {
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
@@ -2253,6 +2260,16 @@ let Predicates = [HasAVX] in {
SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
+
+ // Also support integer VTs to avoid a int->fp bitcast in the DAG.
+ def : Pat<(X86movmsk (v4i32 VR128:$src)),
+ (VMOVMSKPSrr VR128:$src)>;
+ def : Pat<(X86movmsk (v2i64 VR128:$src)),
+ (VMOVMSKPDrr VR128:$src)>;
+ def : Pat<(X86movmsk (v8i32 VR256:$src)),
+ (VMOVMSKPSYrr VR256:$src)>;
+ def : Pat<(X86movmsk (v4i64 VR256:$src)),
+ (VMOVMSKPDYrr VR256:$src)>;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
@@ -2260,6 +2277,14 @@ defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
SSEPackedDouble>, PD;
+let Predicates = [UseSSE2] in {
+ // Also support integer VTs to avoid a int->fp bitcast in the DAG.
+ def : Pat<(X86movmsk (v4i32 VR128:$src)),
+ (MOVMSKPSrr VR128:$src)>;
+ def : Pat<(X86movmsk (v2i64 VR128:$src)),
+ (MOVMSKPDrr VR128:$src)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
@@ -2284,9 +2309,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
} // ExeDomain = SSEPackedInt
@@ -2296,16 +2320,16 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
Predicate prd> {
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
- VR128, loadv2i64, i128mem, sched.XMM,
+ VR128, load, i128mem, sched.XMM,
IsCommutable, 0>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
- memopv2i64, i128mem, sched.XMM, IsCommutable, 1>;
+ memop, i128mem, sched.XMM, IsCommutable, 1>;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
- OpVT256, VR256, loadv4i64, i256mem, sched.YMM,
+ OpVT256, VR256, load, i256mem, sched.YMM,
IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
}
@@ -2365,24 +2389,136 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
+let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
+ (VPORYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
+ (VPORYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
+ (VPORYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
+ (VPANDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
+ (VPANDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
+ (VPANDYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
+ (VPORYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
+ (VPORYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
+ (VPORYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
+ (VPXORYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
+ (VPXORYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
+ (VPXORYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
+ (VPANDNYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
+ (VPANDNYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
+ (VPANDNYrm VR256:$src1, addr:$src2)>;
+}
+
// If only AVX1 is supported, we need to handle integer operations with
// floating point instructions since the integer versions aren't available.
let Predicates = [HasAVX1Only] in {
+ def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
(VANDPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
(VORPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
(VXORPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
(VANDNPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
(VANDPSYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),
(VORPSYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)),
(VXORPSYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)),
(VANDNPSYrm VR256:$src1, addr:$src2)>;
}
@@ -2480,6 +2616,122 @@ let Predicates = [UseSSE2] in {
FR64)>;
}
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
+ (VPANDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
+ (VPANDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
+ (VPANDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
+ (VPORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
+ (VPORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
+ (VPORrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
+ (VPXORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
+ (VPXORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
+ (VPXORrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)),
+ (VPANDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)),
+ (VPANDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)),
+ (VPANDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)),
+ (VPORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)),
+ (VPORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)),
+ (VPORrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)),
+ (VPXORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)),
+ (VPXORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)),
+ (VPXORrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)),
+ (VPANDNrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)),
+ (VPANDNrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)),
+ (VPANDNrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
+ (PANDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
+ (PANDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
+ (PANDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
+ (PORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
+ (PORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
+ (PORrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
+ (PXORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
+ (PXORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
+ (PXORrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)),
+ (PANDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)),
+ (PANDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)),
+ (PANDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)),
+ (PORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)),
+ (PORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)),
+ (PORrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)),
+ (PXORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)),
+ (PXORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)),
+ (PXORrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)),
+ (PANDNrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)),
+ (PANDNrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)),
+ (PANDNrm VR128:$src1, addr:$src2)>;
+}
+
// Patterns for packed operations when we don't have integer type available.
def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
(ANDPSrr VR128:$src1, VR128:$src2)>;
@@ -2713,7 +2965,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
!strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
[(set RC:$dst, (OpNode (load addr:$src1)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded]>,
Requires<[target, OptForSize]>;
let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in {
@@ -2723,7 +2975,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2777,7 +3029,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], d>, Sched<[sched.Folded, ReadAfterLd]>;
+ [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -2787,7 +3039,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, intmemop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -3306,6 +3558,19 @@ def : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}",
let Predicates = [HasAVX, NoVLX] in {
// Additional patterns for other integer sizes.
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVDQArm addr:$src)>;
+ def : Pat<(alignedloadv8i16 addr:$src),
+ (VMOVDQArm addr:$src)>;
+ def : Pat<(alignedloadv16i8 addr:$src),
+ (VMOVDQArm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVDQUrm addr:$src)>;
+ def : Pat<(loadv8i16 addr:$src),
+ (VMOVDQUrm addr:$src)>;
+ def : Pat<(loadv16i8 addr:$src),
+ (VMOVDQUrm addr:$src)>;
+
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
(VMOVDQAmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
@@ -3345,8 +3610,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (memop_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
} // ExeDomain = SSEPackedInt
@@ -3358,13 +3623,13 @@ defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
SchedWriteVecALU, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
SchedWriteVecALU, 1, NoVLX>;
-defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
+defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
+defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8,
+defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16,
+defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
@@ -3380,13 +3645,13 @@ defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
SchedWriteVecALU, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
SchedWriteVecALU, 0, NoVLX>;
-defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
+defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
+defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
@@ -3405,28 +3670,28 @@ defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
- loadv2i64, i128mem, SchedWriteVecIMul.XMM, 0>,
+ load, i128mem, SchedWriteVecIMul.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
- VR256, loadv4i64, i256mem, SchedWriteVecIMul.YMM,
+ VR256, load, i256mem, SchedWriteVecIMul.YMM,
0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
- memopv2i64, i128mem, SchedWriteVecIMul.XMM>;
+ memop, i128mem, SchedWriteVecIMul.XMM>;
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
- loadv2i64, i128mem, SchedWritePSADBW.XMM, 0>,
+ load, i128mem, SchedWritePSADBW.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
- loadv4i64, i256mem, SchedWritePSADBW.YMM, 0>,
+ load, i256mem, SchedWritePSADBW.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
- memopv2i64, i128mem, SchedWritePSADBW.XMM>;
+ memop, i128mem, SchedWritePSADBW.XMM>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
@@ -3453,8 +3718,8 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1,
- (SrcVT (bitconvert (ld_frag addr:$src2))))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (SrcVT (ld_frag addr:$src2)))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!if(Is2Addr,
@@ -3473,16 +3738,16 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
- DstVT128, SrcVT, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
- DstVT256, SrcVT, loadv2i64, 0>, VEX_4V, VEX_L,
+ DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
- memopv2i64>;
+ memop>;
}
multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
@@ -3582,7 +3847,7 @@ let Predicates = [HasAVX, prd] in {
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
+ (vt128 (OpNode (load addr:$src1),
(i8 imm:$src2))))]>, VEX,
Sched<[sched.XMM.Folded]>, VEX_WIG;
}
@@ -3600,7 +3865,7 @@ let Predicates = [HasAVX2, prd] in {
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
+ (vt256 (OpNode (load addr:$src1),
(i8 imm:$src2))))]>, VEX, VEX_L,
Sched<[sched.YMM.Folded]>, VEX_WIG;
}
@@ -3618,7 +3883,7 @@ let Predicates = [UseSSE2] in {
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (vt128 (OpNode (memop addr:$src1),
(i8 imm:$src2))))]>,
Sched<[sched.XMM.Folded]>;
}
@@ -3658,8 +3923,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OutVT (OpNode (ArgVT RC:$src1),
- (bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (ld_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
@@ -3683,53 +3948,53 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OutVT (OpNode (ArgVT RC:$src1),
- (bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (ld_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
}
} // ExeDomain = SSEPackedInt
@@ -3754,89 +4019,88 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1,
- (bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
}
} // ExeDomain = SSEPackedInt
@@ -3864,7 +4128,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
imm:$src3))]>,
- Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
// Extract
@@ -4155,7 +4419,7 @@ let Predicates = [UseAVX] in {
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(VMOVDI2PDIrm addr:$src)>;
@@ -4180,7 +4444,7 @@ let Predicates = [UseSSE2] in {
(MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
(MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(MOVDI2PDIrm addr:$src)>;
@@ -4335,30 +4599,30 @@ defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(VMOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movshdup (load addr:$src))),
(VMOVSHDUPrm addr:$src)>;
def : Pat<(v4i32 (X86Movsldup VR128:$src)),
(VMOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movsldup (load addr:$src))),
(VMOVSLDUPrm addr:$src)>;
def : Pat<(v8i32 (X86Movshdup VR256:$src)),
(VMOVSHDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))),
+ def : Pat<(v8i32 (X86Movshdup (load addr:$src))),
(VMOVSHDUPYrm addr:$src)>;
def : Pat<(v8i32 (X86Movsldup VR256:$src)),
(VMOVSLDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))),
+ def : Pat<(v8i32 (X86Movsldup (load addr:$src))),
(VMOVSLDUPYrm addr:$src)>;
}
let Predicates = [UseSSE3] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(MOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movshdup (memop addr:$src))),
(MOVSHDUPrm addr:$src)>;
def : Pat<(v4i32 (X86Movsldup VR128:$src)),
(MOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movsldup (memop addr:$src))),
(MOVSLDUPrm addr:$src)>;
}
@@ -4405,12 +4669,16 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
}
let Predicates = [UseSSE3] in {
// No need for aligned memory as this only loads 64-bits.
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+ (MOVDDUPrm addr:$src)>;
}
//===---------------------------------------------------------------------===//
@@ -4453,7 +4721,7 @@ multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
@@ -4504,7 +4772,7 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode,
@@ -4522,7 +4790,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
@@ -4580,7 +4848,7 @@ multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
(ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (vt (OpNode (bitconvert (ld_frag addr:$src)))))]>,
+ (vt (OpNode (ld_frag addr:$src))))]>,
Sched<[sched.XMM.Folded]>;
}
@@ -4597,19 +4865,19 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
(ins i256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
- (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))]>,
+ (vt (OpNode (load addr:$src))))]>,
Sched<[sched.YMM.Folded]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU,
- loadv2i64>, VEX, VEX_WIG;
+ load>, VEX, VEX_WIG;
defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU,
- loadv2i64>, VEX, VEX_WIG;
+ load>, VEX, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU,
- loadv2i64>, VEX, VEX_WIG;
+ load>, VEX, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>,
@@ -4623,11 +4891,11 @@ let Predicates = [HasAVX2, NoVLX] in {
}
defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU,
- memopv2i64>;
+ memop>;
defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU,
- memopv2i64>;
+ memop>;
defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU,
- memopv2i64>;
+ memop>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -4652,9 +4920,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
- (DstVT (OpNode (OpVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -4675,9 +4942,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -4693,83 +4959,83 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (IntId256 VR256:$src1, (load addr:$src2)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
- VR128, loadv2i64, i128mem,
+ VR128, load, i128mem,
SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG;
defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
- v16i8, VR128, loadv2i64, i128mem,
+ v16i8, VR128, load, i128mem,
SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
}
defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
- VR128, loadv2i64, i128mem,
+ VR128, load, i128mem,
SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
- loadv2i64, i128mem,
+ load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
- loadv2i64, i128mem,
+ load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
- loadv2i64, i128mem,
+ load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
- loadv2i64, i128mem,
+ load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
- SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128,
- SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128,
- SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
- SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
- SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
}
}
let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
- VR256, loadv4i64, i256mem,
+ VR256, load, i256mem,
SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
- v32i8, VR256, loadv4i64, i256mem,
+ v32i8, VR256, load, i256mem,
SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
}
defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
- VR256, loadv4i64, i256mem,
+ VR256, load, i256mem,
SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
- VR256, loadv4i64, i256mem,
+ VR256, load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
- loadv4i64, i256mem,
+ load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
- VR256, loadv4i64, i256mem,
+ VR256, load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
- loadv4i64, i256mem,
+ load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
@@ -4790,33 +5056,33 @@ let isCommutable = 0 in {
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
- memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+ memop, i128mem, SchedWritePHAdd.XMM>;
defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
- memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+ memop, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
- memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+ memop, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
- memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+ memop, i128mem, SchedWritePHAdd.XMM>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
- SchedWriteVecALU.XMM, memopv2i64>;
+ SchedWriteVecALU.XMM, memop>;
defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
- SchedWriteVecALU.XMM, memopv2i64>;
+ SchedWriteVecALU.XMM, memop>;
defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
- SchedWriteVecALU.XMM, memopv2i64>;
+ SchedWriteVecALU.XMM, memop>;
defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
- memopv2i64, i128mem, SchedWriteVarShuffle.XMM>;
+ memop, i128mem, SchedWriteVarShuffle.XMM>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128,
- SchedWritePHAdd.XMM, memopv2i64>;
+ SchedWritePHAdd.XMM, memop>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128,
- SchedWritePHAdd.XMM, memopv2i64>;
+ SchedWritePHAdd.XMM, memop>;
defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
- v16i8, VR128, memopv2i64, i128mem,
+ v16i8, VR128, memop, i128mem,
SchedWriteVecIMul.XMM>;
}
defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
- VR128, memopv2i64, i128mem, SchedWriteVecIMul.XMM>;
+ VR128, memop, i128mem, SchedWriteVecIMul.XMM>;
}
//===---------------------------------------------------------------------===//
@@ -4843,20 +5109,20 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (VT (X86PAlignr RC:$src1,
- (bitconvert (memop_frag addr:$src2)),
+ (memop_frag addr:$src2),
(i8 imm:$src3))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
- defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64, i128mem,
+ defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
- defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64, i256mem,
+ defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
- defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64, i128mem,
+ defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
SchedWriteShuffle.XMM>;
//===---------------------------------------------------------------------===//
@@ -4936,34 +5202,72 @@ defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
-// AVX2 Patterns
-multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtOp> {
+// Patterns that we also need for any_extend.
+// Any_extend_vector_inreg is currently legalized to zero_extend_vector_inreg.
+multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> {
// Register-Register patterns
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
+ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
}
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))),
+
+ let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
+ (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
+ (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
+ }
+
+ // AVX2 Register-Memory patterns
+ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ }
+
+ let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ }
+}
+
+// AVX2 Patterns
+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
+ SDNode ExtOp, SDNode InVecOp> :
+ SS41I_pmovx_avx2_patterns_base<OpcPrefix, ExtOp> {
+
+ // Register-Register patterns
+ let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))),
+ def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
- (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))),
+ def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
(!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
-
- def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
- (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
}
// Simple Register-Memory patterns
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
(!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
}
- let Predicates = [HasAVX, NoVLX] in {
+ let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
@@ -4979,60 +5283,39 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
}
// AVX2 Register-Memory patterns
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
}
}
-defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
-defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
+defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
+defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
+defm : SS41I_pmovx_avx2_patterns_base<"VPMOVZX", anyext>;
// SSE4.1/AVX patterns.
multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
@@ -5082,7 +5365,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
}
let Predicates = [HasAVX, NoVLX] in {
@@ -5092,7 +5375,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
@@ -5101,7 +5384,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -5112,7 +5395,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
@@ -5121,7 +5404,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -5132,7 +5415,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
}
}
@@ -5298,8 +5581,8 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
- imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoBWI] in
@@ -5324,8 +5607,8 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
- imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoDQI] in
@@ -5350,8 +5633,8 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
- imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoDQI] in
@@ -5383,7 +5666,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
(X86insertps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
let ExeDomain = SSEPackedSingle in {
@@ -5446,7 +5729,7 @@ let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
@@ -5461,7 +5744,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
@@ -5479,7 +5762,7 @@ let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
(outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
@@ -5494,7 +5777,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
(outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
@@ -5522,7 +5805,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
@@ -5545,7 +5828,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
}
@@ -5846,7 +6129,7 @@ def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>,
+ Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
VEX, VEX_WIG;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
@@ -5856,7 +6139,7 @@ def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.YMM.Folded, ReadAfterLd]>,
+ Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
VEX, VEX_L, VEX_WIG;
}
@@ -5868,7 +6151,7 @@ def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
}
// The bit test instructions below are AVX only
@@ -5882,7 +6165,7 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>, VEX;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
}
let Defs = [EFLAGS], Predicates = [HasAVX] in {
@@ -5950,7 +6233,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
(ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (v8i16 (OpNode (v8i16 (bitconvert (ld_frag addr:$src))))))]>,
+ (v8i16 (OpNode (ld_frag addr:$src))))]>,
Sched<[Sched.Folded]>;
}
@@ -5958,10 +6241,10 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
// model, although the naming is misleading.
let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",
- X86phminpos, loadv2i64,
+ X86phminpos, load,
WritePHMINPOS>, VEX, VEX_WIG;
defm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",
- X86phminpos, memopv2i64,
+ X86phminpos, memop,
WritePHMINPOS>;
/// SS48I_binop_rm - Simple SSE41 binary operator.
@@ -5983,118 +6266,118 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
- loadv2i64, i128mem, SchedWriteVecIMul.XMM, 0>,
+ load, i128mem, SchedWriteVecIMul.XMM, 0>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
- loadv4i64, i256mem, SchedWriteVecIMul.YMM, 0>,
+ load, i256mem, SchedWriteVecIMul.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128,
- memopv2i64, i128mem, SchedWriteVecIMul.XMM, 1>;
+ memop, i128mem, SchedWriteVecIMul.XMM, 1>;
}
let Predicates = [HasAVX, NoVLX] in
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
- loadv2i64, i128mem, SchedWritePMULLD.XMM, 0>,
+ load, i128mem, SchedWritePMULLD.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX] in
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
- loadv4i64, i256mem, SchedWritePMULLD.YMM, 0>,
+ load, i256mem, SchedWritePMULLD.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
let Predicates = [HasAVX2] in
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
- memopv2i64, i128mem, SchedWritePMULLD.XMM, 1>;
+ memop, i128mem, SchedWritePMULLD.XMM, 1>;
defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
}
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
@@ -6120,9 +6403,8 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (IntId RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
@@ -6148,9 +6430,8 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def BlendCommuteImm2 : SDNodeXForm<imm, [{
@@ -6171,28 +6452,28 @@ def BlendCommuteImm8 : SDNodeXForm<imm, [{
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, loadv2i64, i128mem, 0,
+ VR128, load, i128mem, 0,
SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG;
}
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, loadv4f32, f128mem, 0,
+ VR128, load, f128mem, 0,
SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, loadv2f64, f128mem, 0,
+ VR128, load, f128mem, 0,
SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, loadv8f32, i256mem, 0,
+ VR256, load, i256mem, 0,
SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, loadv4i64, i256mem, 0,
+ VR256, load, i256mem, 0,
SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
}
@@ -6200,17 +6481,17 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem, 1,
+ VR128, memop, i128mem, 1,
SchedWriteMPSAD.XMM>;
}
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv4f32, f128mem, 1,
+ VR128, memop, f128mem, 1,
SchedWriteDPPS.XMM>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv2f64, f128mem, 1,
+ VR128, memop, f128mem, 1,
SchedWriteDPPD.XMM>;
}
@@ -6238,56 +6519,54 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Pattern to commute if load is in first source.
- def : Pat<(OpVT (OpNode (bitconvert (memop_frag addr:$src2)),
- RC:$src1, imm:$src3)),
+ def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
(commuteXForm imm:$src3))>;
}
let Predicates = [HasAVX] in {
defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
- VR128, loadv4f32, f128mem, 0, SSEPackedSingle,
+ VR128, load, f128mem, 0, SSEPackedSingle,
SchedWriteFBlend.XMM, BlendCommuteImm4>,
VEX_4V, VEX_WIG;
defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
- VR256, loadv8f32, f256mem, 0, SSEPackedSingle,
+ VR256, load, f256mem, 0, SSEPackedSingle,
SchedWriteFBlend.YMM, BlendCommuteImm8>,
VEX_4V, VEX_L, VEX_WIG;
defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
- VR128, loadv2f64, f128mem, 0, SSEPackedDouble,
+ VR128, load, f128mem, 0, SSEPackedDouble,
SchedWriteFBlend.XMM, BlendCommuteImm2>,
VEX_4V, VEX_WIG;
defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
- VR256, loadv4f64, f256mem, 0, SSEPackedDouble,
+ VR256, load, f256mem, 0, SSEPackedDouble,
SchedWriteFBlend.YMM, BlendCommuteImm4>,
VEX_4V, VEX_L, VEX_WIG;
defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
- VR128, loadv2i64, i128mem, 0, SSEPackedInt,
+ VR128, load, i128mem, 0, SSEPackedInt,
SchedWriteBlend.XMM, BlendCommuteImm8>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2] in {
defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
- VR256, loadv4i64, i256mem, 0, SSEPackedInt,
+ VR256, load, i256mem, 0, SSEPackedInt,
SchedWriteBlend.YMM, BlendCommuteImm8>,
VEX_4V, VEX_L, VEX_WIG;
}
defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
- VR128, memopv4f32, f128mem, 1, SSEPackedSingle,
+ VR128, memop, f128mem, 1, SSEPackedSingle,
SchedWriteFBlend.XMM, BlendCommuteImm4>;
defm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64,
- VR128, memopv2f64, f128mem, 1, SSEPackedDouble,
+ VR128, memop, f128mem, 1, SSEPackedDouble,
SchedWriteFBlend.XMM, BlendCommuteImm2>;
defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
- VR128, memopv2i64, i128mem, 1, SSEPackedInt,
+ VR128, memop, i128mem, 1, SSEPackedInt,
SchedWriteBlend.XMM, BlendCommuteImm8>;
// For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -6321,20 +6600,20 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+ (IntId RC:$src1, (mem_frag addr:$src2),
RC:$src3))], SSEPackedInt>, TAPD, VEX_4V,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
- loadv2f64, int_x86_sse41_blendvpd,
+ load, int_x86_sse41_blendvpd,
SchedWriteFVarBlend.XMM>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
loadv4f64, int_x86_avx_blendv_pd_256,
@@ -6342,20 +6621,20 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
- loadv4f32, int_x86_sse41_blendvps,
+ load, int_x86_sse41_blendvps,
SchedWriteFVarBlend.XMM>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
loadv8f32, int_x86_avx_blendv_ps_256,
SchedWriteFVarBlend.YMM>, VEX_L;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- loadv2i64, int_x86_sse41_pblendvb,
+ load, int_x86_sse41_pblendvb,
SchedWriteVarBlend.XMM>;
}
let Predicates = [HasAVX2] in {
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
- loadv4i64, int_x86_avx2_pblendvb,
+ load, int_x86_avx2_pblendvb,
SchedWriteVarBlend.YMM>, VEX_L;
}
@@ -6486,18 +6765,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
"\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
[(set VR128:$dst,
(IntId VR128:$src1,
- (bitconvert (mem_frag addr:$src2)), XMM0))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (mem_frag addr:$src2), XMM0))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memop, f128mem,
int_x86_sse41_blendvpd, SchedWriteFVarBlend.XMM>;
let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memop, f128mem,
int_x86_sse41_blendvps, SchedWriteFVarBlend.XMM>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memop, i128mem,
int_x86_sse41_pblendvb, SchedWriteVarBlend.XMM>;
// Aliases with the implicit xmm0 argument
@@ -6553,6 +6832,12 @@ let Predicates = [HasAVX2, NoVLX] in {
(VMOVNTDQAYrm addr:$src)>;
def : Pat<(v4i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAYrm addr:$src)>;
+ def : Pat<(v8i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAYrm addr:$src)>;
+ def : Pat<(v16i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAYrm addr:$src)>;
+ def : Pat<(v32i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAYrm addr:$src)>;
}
let Predicates = [HasAVX, NoVLX] in {
@@ -6562,6 +6847,12 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVNTDQArm addr:$src)>;
def : Pat<(v2i64 (alignednontemporalload addr:$src)),
(VMOVNTDQArm addr:$src)>;
+ def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQArm addr:$src)>;
+ def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQArm addr:$src)>;
+ def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQArm addr:$src)>;
}
let Predicates = [UseSSE41] in {
@@ -6571,6 +6862,12 @@ let Predicates = [UseSSE41] in {
(MOVNTDQArm addr:$src)>;
def : Pat<(v2i64 (alignednontemporalload addr:$src)),
(MOVNTDQArm addr:$src)>;
+ def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+ (MOVNTDQArm addr:$src)>;
+ def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+ (MOVNTDQArm addr:$src)>;
+ def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+ (MOVNTDQArm addr:$src)>;
}
} // AddedComplexity
@@ -6598,22 +6895,22 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM>;
+ memop, i128mem, SchedWriteVecALU.XMM>;
//===----------------------------------------------------------------------===//
// SSE4.2 - String/text Processing Instructions
@@ -6628,7 +6925,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
def rm :SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrM.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>;
}
let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
@@ -6646,7 +6943,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
def rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, u8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrM.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>;
}
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
@@ -6664,7 +6961,7 @@ multiclass SS42AI_pcmpistri<string asm> {
def rm : SS42AI<0x63, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrI.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>;
}
let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
@@ -6682,7 +6979,7 @@ multiclass SS42AI_pcmpestri<string asm> {
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, u8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrI.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>;
}
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
@@ -6712,7 +7009,7 @@ class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
- Sched<[WriteCRC32.Folded, ReadAfterLd]>;
+ Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
@@ -6764,10 +7061,10 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
[!if(UsesXMM0,
(set VR128:$dst, (IntId VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
+ (memop addr:$src2), XMM0)),
(set VR128:$dst, (IntId VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (memop addr:$src2))))]>, T8,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
@@ -6783,9 +7080,10 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
"sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (memop addr:$src2),
(i8 imm:$src3)))]>, TA,
- Sched<[SchedWriteVecIMul.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecIMul.XMM.Folded,
+ SchedWriteVecIMul.XMM.ReadAfterFold]>;
defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
SchedWriteVecIMul.XMM>;
@@ -6828,46 +7126,46 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, MemOp:$src2), "",
[(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>,
- Sched<[WriteAESDecEnc.Folded, ReadAfterLd]>;
+ Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>;
}
}
// Perform One Round of an AES Encryption/Decryption Flow
let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc, loadv2i64>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast, loadv2i64>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG;
defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec, loadv2i64>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG;
defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast, loadv2i64>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG;
}
let Predicates = [NoVLX, HasVAES] in {
defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc_256, loadv4i64, 0, VR256,
+ int_x86_aesni_aesenc_256, load, 0, VR256,
i256mem>, VEX_4V, VEX_L, VEX_WIG;
defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast_256, loadv4i64, 0, VR256,
+ int_x86_aesni_aesenclast_256, load, 0, VR256,
i256mem>, VEX_4V, VEX_L, VEX_WIG;
defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec_256, loadv4i64, 0, VR256,
+ int_x86_aesni_aesdec_256, load, 0, VR256,
i256mem>, VEX_4V, VEX_L, VEX_WIG;
defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast_256, loadv4i64, 0, VR256,
+ int_x86_aesni_aesdeclast_256, load, 0, VR256,
i256mem>, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
- int_x86_aesni_aesenc, memopv2i64, 1>;
+ int_x86_aesni_aesenc, memop, 1>;
defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
- int_x86_aesni_aesenclast, memopv2i64, 1>;
+ int_x86_aesni_aesenclast, memop, 1>;
defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
- int_x86_aesni_aesdec, memopv2i64, 1>;
+ int_x86_aesni_aesdec, memop, 1>;
defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
- int_x86_aesni_aesdeclast, memopv2i64, 1>;
+ int_x86_aesni_aesdeclast, memop, 1>;
}
// Perform the AES InvMixColumn Transformation
@@ -6881,7 +7179,7 @@ let Predicates = [HasAVX, HasAES] in {
def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>,
+ [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>,
Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG;
}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -6892,7 +7190,7 @@ def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
+ [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>,
Sched<[WriteAESIMC.Folded]>;
// AES Round Key Generation Assist
@@ -6907,7 +7205,7 @@ let Predicates = [HasAVX, HasAES] in {
(ins i128mem:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
@@ -6920,7 +7218,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>;
//===----------------------------------------------------------------------===//
@@ -6948,12 +7246,12 @@ let Predicates = [NoAVX, HasPCLMUL] in {
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
+ (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
imm:$src3))]>,
- Sched<[WriteCLMul.Folded, ReadAfterLd]>;
+ Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
- def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1,
+ def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
(i8 imm:$src3)),
(PCLMULQDQrm VR128:$src1, addr:$src2,
(PCLMULCommuteImm imm:$src3))>;
@@ -6986,7 +7284,7 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst,
(IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
- Sched<[WriteCLMul.Folded, ReadAfterLd]>;
+ Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
// We can commute a load in the first operand by swapping the sources and
// rotating the immediate.
@@ -6996,11 +7294,11 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
}
let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
-defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, loadv2i64,
+defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
int_x86_pclmulqdq>, VEX_4V, VEX_WIG;
let Predicates = [NoVLX, HasVPCLMULQDQ] in
-defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, loadv4i64,
+defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG;
multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
@@ -7156,11 +7454,11 @@ def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
(VBROADCASTI128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
(VBROADCASTI128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI128 addr:$src)>;
}
@@ -7174,11 +7472,11 @@ def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
let Predicates = [HasAVX1Only] in {
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTF128 addr:$src)>;
}
@@ -7194,7 +7492,7 @@ let mayLoad = 1 in
def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f128mem:$src2, u8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
}
// To create a 256-bit all ones value, we should produce VCMPTRUEPS
@@ -7211,7 +7509,7 @@ multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
(!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
- (From (bitconvert (memop_frag addr:$src2))),
+ (From (memop_frag addr:$src2)),
(iPTR imm)),
(!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -7224,9 +7522,9 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [HasAVX1Only] in {
defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64, loadv2i64>;
- defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv2i64>;
- defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv2i64>;
- defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv2i64>;
+ defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv4i32>;
+ defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>;
+ defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv16i8>;
}
//===----------------------------------------------------------------------===//
@@ -7315,7 +7613,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop_f,
- X86MemOperand x86memop_i, PatFrag i_frag,
+ X86MemOperand x86memop_i,
ValueType f_vt, ValueType i_vt,
X86FoldableSchedWrite sched,
X86FoldableSchedWrite varsched> {
@@ -7329,8 +7627,8 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
- (i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
- Sched<[varsched.Folded, ReadAfterLd]>;
+ (i_vt (load addr:$src2)))))]>, VEX_4V,
+ Sched<[varsched.Folded, sched.ReadAfterFold]>;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
@@ -7348,18 +7646,18 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- loadv2i64, v4f32, v4i32, SchedWriteFShuffle.XMM,
+ v4f32, v4i32, SchedWriteFShuffle.XMM,
SchedWriteFVarShuffle.XMM>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- loadv4i64, v8f32, v8i32, SchedWriteFShuffle.YMM,
+ v8f32, v8i32, SchedWriteFShuffle.YMM,
SchedWriteFVarShuffle.YMM>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- loadv2i64, v2f64, v2i64, SchedWriteFShuffle.XMM,
+ v2f64, v2i64, SchedWriteFShuffle.XMM,
SchedWriteFVarShuffle.XMM>;
defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- loadv4i64, v4f64, v4i64, SchedWriteFShuffle.YMM,
+ v4f64, v4i64, SchedWriteFShuffle.YMM,
SchedWriteFVarShuffle.YMM>, VEX_L;
}
@@ -7380,7 +7678,7 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V, VEX_L,
- Sched<[WriteFShuffle256Ld, ReadAfterLd]>;
+ Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
}
// Immediate transform to help with commuting.
@@ -7440,8 +7738,7 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set RC:$dst, (X86cvtph2ps (bc_v8i16
- (loadv2i64 addr:$src))))]>,
+ [(set RC:$dst, (X86cvtph2ps (loadv8i16 addr:$src)))]>,
T8PD, VEX, Sched<[sched.Folded]>;
}
@@ -7515,7 +7812,7 @@ let Predicates = [HasF16C, NoVLX] in {
/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate
multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, X86FoldableSchedWrite sched,
- RegisterClass RC, PatFrag memop_frag,
+ RegisterClass RC,
X86MemOperand x86memop, SDNodeXForm commuteXForm> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
@@ -7529,22 +7826,20 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>, VEX_4V;
+ (OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
// Pattern to commute if load is in first source.
- def : Pat<(OpVT (OpNode (bitconvert (memop_frag addr:$src2)),
- RC:$src1, imm:$src3)),
+ def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
(commuteXForm imm:$src3))>;
}
defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
- SchedWriteBlend.XMM, VR128, loadv2i64, i128mem,
+ SchedWriteBlend.XMM, VR128, i128mem,
BlendCommuteImm4>;
defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
- SchedWriteBlend.YMM, VR256, loadv4i64, i256mem,
+ SchedWriteBlend.YMM, VR256, i256mem,
BlendCommuteImm8>, VEX_L;
// For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -7743,6 +8038,8 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDDUPrr VR128:$src)>;
def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(VMOVDDUPrm addr:$src)>;
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+ (VMOVDDUPrm addr:$src)>;
}
let Predicates = [HasAVX1Only] in {
@@ -7778,7 +8075,7 @@ let Predicates = [HasAVX1Only] in {
// VPERM - Permute instructions
//
-multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+multiclass avx2_perm<bits<8> opc, string OpcodeStr,
ValueType OpVT, X86FoldableSchedWrite Sched,
X86MemOperand memOp> {
let Predicates = [HasAVX2, NoVLX] in {
@@ -7795,16 +8092,14 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1,
- (bitconvert (mem_frag addr:$src2)))))]>,
- Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L;
+ (load addr:$src2))))]>,
+ Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
}
}
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteVarShuffle256,
- i256mem>;
+defm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFVarShuffle256,
- f256mem>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
ValueType OpVT, X86FoldableSchedWrite Sched,
@@ -7824,7 +8119,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
[(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1),
(i8 imm:$src2))))]>,
- Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L;
+ Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
}
}
@@ -7849,7 +8144,7 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
(i8 imm:$src3)))]>,
- Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
let Predicates = [HasAVX2] in
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
@@ -7869,14 +8164,14 @@ let mayLoad = 1 in
def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i128mem:$src2, u8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
}
let Predicates = [HasAVX2, NoVLX] in {
defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64, loadv2i64>;
- defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv2i64>;
- defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv2i64>;
- defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv2i64>;
+ defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv4i32>;
+ defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>;
+ defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv16i8>;
}
//===----------------------------------------------------------------------===//
@@ -7941,7 +8236,7 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
// masked store
- def: Pat<(X86mstore addr:$ptr, (MaskVT RC:$mask), (VT RC:$src)),
+ def: Pat<(X86mstore (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
(!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
// masked load
def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), undef)),
@@ -8035,8 +8330,9 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1,
- (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
- VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, ReadAfterLd]>;
+ (vt128 (load addr:$src2)))))]>,
+ VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
+ SchedWriteVarVecShift.XMM.ReadAfterFold]>;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -8048,8 +8344,9 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1,
- (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, ReadAfterLd]>;
+ (vt256 (load addr:$src2)))))]>,
+ VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
+ SchedWriteVarVecShift.YMM.ReadAfterFold]>;
}
let Predicates = [HasAVX2, NoVLX] in {
@@ -8061,13 +8358,11 @@ let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
(VPSRAVDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vsrav VR128:$src1,
- (bitconvert (loadv2i64 addr:$src2)))),
+ def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))),
(VPSRAVDrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
(VPSRAVDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86vsrav VR256:$src1,
- (bitconvert (loadv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))),
(VPSRAVDYrm VR256:$src1, addr:$src2)>;
}
@@ -8132,51 +8427,6 @@ let Predicates = [UseAVX2] in {
}
//===----------------------------------------------------------------------===//
-// Extra selection patterns for f128, f128mem
-
-// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
-def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
-def : Pat<(store (f128 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
-
-def : Pat<(alignedloadf128 addr:$src),
- (COPY_TO_REGCLASS (MOVAPSrm addr:$src), VR128)>;
-def : Pat<(loadf128 addr:$src),
- (COPY_TO_REGCLASS (MOVUPSrm addr:$src), VR128)>;
-
-// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
-def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (ANDPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (ANDPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (ORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (ORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (XORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (XORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-//===----------------------------------------------------------------------===//
// GFNI instructions
//===----------------------------------------------------------------------===//
@@ -8194,8 +8444,8 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
[(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
- (bitconvert (MemOpFrag addr:$src2)))))]>,
- Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>, T8PD;
+ (MemOpFrag addr:$src2))))]>,
+ Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD;
}
}
@@ -8212,9 +8462,9 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1,
- (bitconvert (MemOpFrag addr:$src2)),
+ (MemOpFrag addr:$src2),
imm:$src3)))], SSEPackedInt>,
- Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
}
}
@@ -8222,24 +8472,24 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
let Constraints = "$src1 = $dst",
Predicates = [HasGFNI, UseSSE2] in
defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
- VR128, loadv2i64, i128mem, 1>;
+ VR128, load, i128mem, 1>;
let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
defm V##NAME : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128,
- loadv2i64, i128mem>, VEX_4V, VEX_W;
+ load, i128mem>, VEX_4V, VEX_W;
defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256,
- loadv4i64, i256mem>, VEX_4V, VEX_L, VEX_W;
+ load, i256mem>, VEX_4V, VEX_L, VEX_W;
}
}
// GF2P8MULB
let Constraints = "$src1 = $dst",
Predicates = [HasGFNI, UseSSE2] in
-defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memopv2i64,
+defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
i128mem, 1>;
let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
- defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, loadv2i64,
+ defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
i128mem>, VEX_4V;
- defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, loadv4i64,
+ defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
i256mem>, VEX_4V, VEX_L;
}
// GF2P8AFFINEINVQB, GF2P8AFFINEQB
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 023137634df1..7cd63a6dd820 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -16,7 +16,7 @@
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
"shl{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (shl GR8:$src1, CL))]>;
@@ -29,7 +29,7 @@ def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t{%cl, $dst|$dst, cl}",
[(set GR64:$dst, (shl GR64:$src1, CL))]>;
-} // Uses = [CL]
+} // Uses = [CL], SchedRW
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
@@ -64,11 +64,9 @@ def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
} // hasSideEffects = 0
} // Constraints = "$src = $dst", SchedRW
-
-let SchedRW = [WriteShiftLd, WriteRMW] in {
// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
// using CL?
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
"shl{b}\t{%cl, $dst|$dst, cl}",
[(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -85,6 +83,8 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
[(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src),
"shl{b}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -120,7 +120,7 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
} // SchedRW
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
"shr{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (srl GR8:$src1, CL))]>;
@@ -166,8 +166,7 @@ def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
} // Constraints = "$src = $dst", SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t{%cl, $dst|$dst, cl}",
[(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -184,6 +183,8 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
[(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src),
"shr{b}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -219,7 +220,7 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
} // SchedRW
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (sra GR8:$src1, CL))]>;
@@ -268,8 +269,7 @@ def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
} // Constraints = "$src = $dst", SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t{%cl, $dst|$dst, cl}",
[(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -286,6 +286,8 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
[(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src),
"sar{b}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -325,9 +327,9 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in {
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in {
def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{%cl, $dst|$dst, cl}", []>;
def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
@@ -357,7 +359,7 @@ def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
} // Uses = [EFLAGS]
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in {
def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{%cl, $dst|$dst, cl}", []>;
def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
@@ -389,7 +391,7 @@ def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
} // Constraints = "$src = $dst"
-let SchedRW = [WriteShiftLd, WriteRMW], mayStore = 1 in {
+let SchedRW = [WriteRotateLd, WriteRMW], mayStore = 1 in {
let Uses = [EFLAGS] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", []>;
@@ -428,7 +430,7 @@ def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt),
Requires<[In64BitMode]>;
} // Uses = [EFLAGS]
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCLLd, WriteRMW] in {
def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t{%cl, $dst|$dst, cl}", []>;
def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
@@ -452,9 +454,9 @@ def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
} // SchedRW
} // hasSideEffects = 0
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
// FIXME: provide shorter instructions when imm8 == 1
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCL] in {
def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"rol{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (rotl GR8:$src1, CL))]>;
@@ -498,8 +500,7 @@ def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
} // Constraints = "$src = $dst", SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t{%cl, $dst|$dst, cl}",
[(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -514,6 +515,8 @@ def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
[(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteRotateLd, WriteRMW] in {
def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1),
"rol{b}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
@@ -548,8 +551,8 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
Requires<[In64BitMode]>;
} // SchedRW
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
+let Uses = [CL], SchedRW = [WriteRotateCL] in {
def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (rotr GR8:$src1, CL))]>;
@@ -595,8 +598,7 @@ def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (rotl GR64:$src1, (i8 63)))]>;
} // Constraints = "$src = $dst", SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t{%cl, $dst|$dst, cl}",
[(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -611,6 +613,8 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
[(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteRotateLd, WriteRMW] in {
def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src),
"ror{b}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -822,6 +826,8 @@ def ROT64L2R_imm8 : SDNodeXForm<imm, [{
return getI8Imm(64 - N->getZExtValue(), SDLoc(N));
}]>;
+// NOTE: We use WriteShift for these rotates as they avoid the stalls
+// of many of the older x86 rotate instructions.
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let hasSideEffects = 0 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
@@ -844,12 +850,12 @@ let hasSideEffects = 0 in {
def rm : I<0xF7, MRMSrcMem4VOp3,
(outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX, Sched<[WriteShiftLd,
+ VEX, Sched<[WriteShift.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ WriteShift.ReadAfterFold]>;
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
index 322bdb74e2de..c417dc99b84d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -13,126 +13,42 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// No op bitconverts
-//===----------------------------------------------------------------------===//
-
-// Bitcasts between 128-bit vector types. Return the original type since
-// no instruction is needed for the conversion
-def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
-
-// Bitcasts between 256-bit vector types. Return the original type since
-// no instruction is needed for the conversion
-def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
-
-// Bitcasts between 512-bit vector types. Return the original type since
-// no instruction is needed for the conversion.
-def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
-
-
-//===----------------------------------------------------------------------===//
// Non-instruction patterns
//===----------------------------------------------------------------------===//
-// A vector extract of the first f32/f64 position is a subregister copy
-def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
- (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
-def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
- (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+let Predicates = [NoAVX512] in {
+ // A vector extract of the first f32/f64 position is a subregister copy
+ def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
+ def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+}
+
+let Predicates = [HasAVX512] in {
+ // A vector extract of the first f32/f64 position is a subregister copy
+ def : Pat<(f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X)>;
+ def : Pat<(f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X)>;
+}
-// Implicitly promote a 32-bit scalar to a vector.
-def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (COPY_TO_REGCLASS FR32:$src, VR128)>;
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (COPY_TO_REGCLASS FR64:$src, VR128)>;
+let Predicates = [NoVLX] in {
+ // Implicitly promote a 32-bit scalar to a vector.
+ def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
+ // Implicitly promote a 64-bit scalar to a vector.
+ def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
+}
+let Predicates = [HasVLX] in {
+ // Implicitly promote a 32-bit scalar to a vector.
+ def : Pat<(v4f32 (scalar_to_vector FR32X:$src)),
+ (COPY_TO_REGCLASS FR32X:$src, VR128X)>;
+ // Implicitly promote a 64-bit scalar to a vector.
+ def : Pat<(v2f64 (scalar_to_vector FR64X:$src)),
+ (COPY_TO_REGCLASS FR64X:$src, VR128X)>;
+}
//===----------------------------------------------------------------------===//
// Subvector tricks
@@ -509,3 +425,85 @@ let Predicates = [HasBWI, HasVLX] in {
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
(i8 60)), (i8 60))>;
}
+
+//===----------------------------------------------------------------------===//
+// Extra selection patterns for f128, f128mem
+
+// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
+let Predicates = [NoAVX] in {
+def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f128 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (MOVAPSrm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (MOVUPSrm addr:$src)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f128 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+def : Pat<(alignedstore (f128 VR128X:$src), addr:$dst),
+ (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
+def : Pat<(store (f128 VR128X:$src), addr:$dst),
+ (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (VMOVAPSZ128rm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (VMOVUPSZ128rm addr:$src)>;
+}
+
+let Predicates = [UseSSE1] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
+ (ANDPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
+ (ANDPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
+ (ORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
+ (ORPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
+ (XORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
+ (XORPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))),
+ (VANDPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
+ (VANDPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, (loadf128 addr:$src2))),
+ (VORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
+ (VORPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))),
+ (VXORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
+ (VXORPSrr VR128:$src1, VR128:$src2)>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
index ff3e3be48a24..9d810a675e3b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -11,32 +11,32 @@
//
//===----------------------------------------------------------------------===//
-multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
- Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
+ [(set VR128:$dst, (Int (load addr:$src)))]>, XOP,
+ Sched<[SchedWritePHAdd.XMM.Folded, SchedWritePHAdd.XMM.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
- defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
- defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
- defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
- defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
- defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
- defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
- defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
- defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
- defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
- defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
- defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
- defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
- defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
- defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
- defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
+ defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd>;
+ defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq>;
+ defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw>;
+ defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq>;
+ defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd>;
+ defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq>;
+ defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd>;
+ defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq>;
+ defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw>;
+ defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq>;
+ defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd>;
+ defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq>;
+ defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw>;
+ defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq>;
+ defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd>;
}
// Scalar load 2 addr operand instructions
@@ -48,47 +48,47 @@ multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set VR128:$dst, (Int mem_cpat:$src))]>, XOP,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
- PatFrag memop, X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set VR128:$dst, (Int (load addr:$src)))]>, XOP,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
- PatFrag memop, X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched> {
def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>;
def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set VR256:$dst, (Int (load addr:$src)))]>, XOP, VEX_L,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedSingle in {
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
- defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
+ defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps,
SchedWriteFRnd.XMM>;
- defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
+ defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
SchedWriteFRnd.YMM>;
}
let ExeDomain = SSEPackedDouble in {
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
- defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
+ defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd,
SchedWriteFRnd.XMM>;
- defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
+ defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
SchedWriteFRnd.YMM>;
}
@@ -105,15 +105,15 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
- (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
+ (vt128 (load addr:$src2)))))]>,
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold]>;
def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
+ (vt128 (OpNode (vt128 (load addr:$src1)),
(vt128 VR128:$src2))))]>,
- XOP, Sched<[sched.Folded, ReadAfterLd]>;
+ XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
@@ -150,8 +150,8 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
- XOP, Sched<[sched.Folded, ReadAfterLd]>;
+ (vt128 (OpNode (vt128 (load addr:$src1)), imm:$src2)))]>,
+ XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
@@ -181,8 +181,8 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
- VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ (Int VR128:$src1, (load addr:$src2),
+ VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
@@ -260,9 +260,9 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
- (vt128 (bitconvert (loadv2i64 addr:$src2))),
+ (vt128 (load addr:$src2)),
imm:$cc)))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
@@ -274,12 +274,12 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
+ []>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
- def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)),
+ def : Pat<(OpNode (load addr:$src2),
(vt128 VR128:$src1), imm:$cc),
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
(CommuteVPCOMCC imm:$cc))>;
@@ -310,21 +310,21 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ (vt128 (load addr:$src3)))))]>,
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
+ (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
@@ -350,25 +350,26 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
Sched<[sched]>;
+ // FIXME: This pattern can't match.
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
(X86andnp (load addr:$src3), RC:$src2))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, (load addr:$src2)))))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
@@ -385,6 +386,48 @@ let ExeDomain = SSEPackedInt in {
SchedWriteShuffle.YMM>, VEX_L;
}
+let Predicates = [HasXOP] in {
+ def : Pat<(v16i8 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v8i16 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+
+ def : Pat<(or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, (bc_v16i8 (loadv2i64 addr:$src2)))),
+ (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+ def : Pat<(or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, (bc_v8i16 (loadv2i64 addr:$src2)))),
+ (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+ def : Pat<(or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, (bc_v4i32 (loadv2i64 addr:$src2)))),
+ (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+ def : Pat<(v32i8 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(v16i16 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(v8i32 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+
+ def : Pat<(or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, (bc_v32i8 (loadv4i64 addr:$src2)))),
+ (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+ def : Pat<(or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, (bc_v16i16 (loadv4i64 addr:$src2)))),
+ (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+ def : Pat<(or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+}
+
multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
X86MemOperand intmemop, X86MemOperand fpmemop,
ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
@@ -401,10 +444,9 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
- (VT (X86vpermil2 RC:$src1, RC:$src2,
- (bitconvert (IntLdFrag addr:$src3)),
+ (VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3),
(i8 imm:$src4))))]>, VEX_W,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
@@ -412,11 +454,11 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
@@ -437,10 +479,10 @@ let ExeDomain = SSEPackedDouble in {
let ExeDomain = SSEPackedSingle in {
defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
- v4f32, loadv4f32, loadv2i64,
+ v4f32, loadv4f32, loadv4i32,
SchedWriteFVarShuffle.XMM>;
defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
- v8f32, loadv8f32, loadv4i64,
+ v8f32, loadv8f32, loadv8i32,
SchedWriteFVarShuffle.YMM>, VEX_L;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 36d36cb11d72..c20336387b2d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -89,6 +89,8 @@ private:
MachineFunction &MF) const;
bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -114,8 +116,10 @@ private:
bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectShift(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
- bool selectSDiv(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF) const;
+ bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
// emit insert subreg instruction and insert it before MachineInstr &I
bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
@@ -362,11 +366,14 @@ bool X86InstructionSelector::select(MachineInstr &I,
return selectAnyext(I, MRI, MF);
case TargetOpcode::G_ICMP:
return selectCmp(I, MRI, MF);
+ case TargetOpcode::G_FCMP:
+ return selectFCmp(I, MRI, MF);
case TargetOpcode::G_UADDE:
return selectUadde(I, MRI, MF);
case TargetOpcode::G_UNMERGE_VALUES:
return selectUnmergeValues(I, MRI, MF, CoverageInfo);
case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
return selectMergeValues(I, MRI, MF, CoverageInfo);
case TargetOpcode::G_EXTRACT:
return selectExtract(I, MRI, MF);
@@ -382,7 +389,12 @@ bool X86InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_LSHR:
return selectShift(I, MRI, MF);
case TargetOpcode::G_SDIV:
- return selectSDiv(I, MRI, MF);
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM:
+ return selectDivRem(I, MRI, MF);
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ return selectIntrinsicWSideEffects(I, MRI, MF);
}
return false;
@@ -967,6 +979,98 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
return true;
}
+bool X86InstructionSelector::selectFCmp(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
+
+ unsigned LhsReg = I.getOperand(2).getReg();
+ unsigned RhsReg = I.getOperand(3).getReg();
+ CmpInst::Predicate Predicate =
+ (CmpInst::Predicate)I.getOperand(1).getPredicate();
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static const uint16_t SETFOpcTable[2][3] = {
+ {X86::SETEr, X86::SETNPr, X86::AND8rr},
+ {X86::SETNEr, X86::SETPr, X86::OR8rr}};
+ const uint16_t *SETFOpc = nullptr;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_OEQ:
+ SETFOpc = &SETFOpcTable[0][0];
+ break;
+ case CmpInst::FCMP_UNE:
+ SETFOpc = &SETFOpcTable[1][0];
+ break;
+ }
+
+ // Compute the opcode for the CMP instruction.
+ unsigned OpCmp;
+ LLT Ty = MRI.getType(LhsReg);
+ switch (Ty.getSizeInBits()) {
+ default:
+ return false;
+ case 32:
+ OpCmp = X86::UCOMISSrr;
+ break;
+ case 64:
+ OpCmp = X86::UCOMISDrr;
+ break;
+ }
+
+ unsigned ResultReg = I.getOperand(0).getReg();
+ RBI.constrainGenericRegister(
+ ResultReg,
+ *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
+ if (SETFOpc) {
+ MachineInstr &CmpInst =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
+ .addReg(LhsReg)
+ .addReg(RhsReg);
+
+ unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
+ unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
+ MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SETFOpc[0]), FlagReg1);
+ MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SETFOpc[1]), FlagReg2);
+ MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SETFOpc[2]), ResultReg)
+ .addReg(FlagReg1)
+ .addReg(FlagReg2);
+ constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
+
+ I.eraseFromParent();
+ return true;
+ }
+
+ X86::CondCode CC;
+ bool SwapArgs;
+ std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+ unsigned Opc = X86::getSETFromCond(CC);
+
+ if (SwapArgs)
+ std::swap(LhsReg, RhsReg);
+
+ // Emit a compare of LHS/RHS.
+ MachineInstr &CmpInst =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
+ .addReg(LhsReg)
+ .addReg(RhsReg);
+
+ MachineInstr &Set =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc), ResultReg);
+ constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
bool X86InstructionSelector::selectUadde(MachineInstr &I,
MachineRegisterInfo &MRI,
MachineFunction &MF) const {
@@ -1246,7 +1350,8 @@ bool X86InstructionSelector::selectUnmergeValues(
bool X86InstructionSelector::selectMergeValues(
MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF,
CodeGenCoverage &CoverageInfo) const {
- assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES) &&
+ assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
+ I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
"unexpected instruction");
// Split to inserts.
@@ -1485,23 +1590,33 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
return true;
}
-bool X86InstructionSelector::selectSDiv(MachineInstr &I,
- MachineRegisterInfo &MRI,
- MachineFunction &MF) const {
-
- assert(I.getOpcode() == TargetOpcode::G_SDIV && "unexpected instruction");
+bool X86InstructionSelector::selectDivRem(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ // The implementation of this function is taken from X86FastISel.
+ assert((I.getOpcode() == TargetOpcode::G_SDIV ||
+ I.getOpcode() == TargetOpcode::G_SREM ||
+ I.getOpcode() == TargetOpcode::G_UDIV ||
+ I.getOpcode() == TargetOpcode::G_UREM) &&
+ "unexpected instruction");
const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned DividentReg = I.getOperand(1).getReg();
- const unsigned DiviserReg = I.getOperand(2).getReg();
+ const unsigned Op1Reg = I.getOperand(1).getReg();
+ const unsigned Op2Reg = I.getOperand(2).getReg();
const LLT RegTy = MRI.getType(DstReg);
- assert(RegTy == MRI.getType(DividentReg) &&
- RegTy == MRI.getType(DiviserReg) &&
+ assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
"Arguments and return value types must match");
const RegisterBank &RegRB = *RBI.getRegBank(DstReg, MRI, TRI);
+ if (RegRB.getID() != X86::GPRRegBankID)
+ return false;
+ const static unsigned NumTypes = 4; // i8, i16, i32, i64
+ const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
+ const static bool S = true; // IsSigned
+ const static bool U = false; // !IsSigned
+ const static unsigned Copy = TargetOpcode::COPY;
// For the X86 IDIV instruction, in most cases the dividend
// (numerator) must be in a specific register pair highreg:lowreg,
// producing the quotient in lowreg and the remainder in highreg.
@@ -1510,56 +1625,182 @@ bool X86InstructionSelector::selectSDiv(MachineInstr &I,
// exception is i8, where the dividend is defined as a single register rather
// than a register pair, and we therefore directly sign-extend the dividend
// into lowreg, instead of copying, and ignore the highreg.
- const static struct SDivEntry {
+ const static struct DivRemEntry {
+ // The following portion depends only on the data type.
unsigned SizeInBits;
- unsigned QuotientReg;
- unsigned DividentRegUpper;
- unsigned DividentRegLower;
- unsigned OpSignExtend;
- unsigned OpCopy;
- unsigned OpDiv;
- } OpTable[] = {
- {8, X86::AL, X86::NoRegister, X86::AX, 0, X86::MOVSX16rr8,
- X86::IDIV8r}, // i8
- {16, X86::AX, X86::DX, X86::AX, X86::CWD, TargetOpcode::COPY,
- X86::IDIV16r}, // i16
- {32, X86::EAX, X86::EDX, X86::EAX, X86::CDQ, TargetOpcode::COPY,
- X86::IDIV32r}, // i32
- {64, X86::RAX, X86::RDX, X86::RAX, X86::CQO, TargetOpcode::COPY,
- X86::IDIV64r} // i64
+ unsigned LowInReg; // low part of the register pair
+ unsigned HighInReg; // high part of the register pair
+ // The following portion depends on both the data type and the operation.
+ struct DivRemResult {
+ unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
+ unsigned OpSignExtend; // Opcode for sign-extending lowreg into
+ // highreg, or copying a zero into highreg.
+ unsigned OpCopy; // Opcode for copying dividend into lowreg, or
+ // zero/sign-extending into lowreg for i8.
+ unsigned DivRemResultReg; // Register containing the desired result.
+ bool IsOpSigned; // Whether to use signed or unsigned form.
+ } ResultTable[NumOps];
+ } OpTable[NumTypes] = {
+ {8,
+ X86::AX,
+ 0,
+ {
+ {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
+ {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
+ {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv
+ {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem
+ }}, // i8
+ {16,
+ X86::AX,
+ X86::DX,
+ {
+ {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
+ {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
+ }}, // i16
+ {32,
+ X86::EAX,
+ X86::EDX,
+ {
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
+ }}, // i32
+ {64,
+ X86::RAX,
+ X86::RDX,
+ {
+ {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv
+ {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
+ {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
+ {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
+ }}, // i64
};
- if (RegRB.getID() != X86::GPRRegBankID)
+ auto OpEntryIt = std::find_if(std::begin(OpTable), std::end(OpTable),
+ [RegTy](const DivRemEntry &El) {
+ return El.SizeInBits == RegTy.getSizeInBits();
+ });
+ if (OpEntryIt == std::end(OpTable))
return false;
- auto SDivEntryIt = std::find_if(
- std::begin(OpTable), std::end(OpTable), [RegTy](const SDivEntry &El) {
- return El.SizeInBits == RegTy.getSizeInBits();
- });
+ unsigned OpIndex;
+ switch (I.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected div/rem opcode");
+ case TargetOpcode::G_SDIV:
+ OpIndex = 0;
+ break;
+ case TargetOpcode::G_SREM:
+ OpIndex = 1;
+ break;
+ case TargetOpcode::G_UDIV:
+ OpIndex = 2;
+ break;
+ case TargetOpcode::G_UREM:
+ OpIndex = 3;
+ break;
+ }
- if (SDivEntryIt == std::end(OpTable))
- return false;
+ const DivRemEntry &TypeEntry = *OpEntryIt;
+ const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
const TargetRegisterClass *RegRC = getRegClass(RegTy, RegRB);
- if (!RBI.constrainGenericRegister(DividentReg, *RegRC, MRI) ||
- !RBI.constrainGenericRegister(DiviserReg, *RegRC, MRI) ||
+ if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
+ !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
return false;
}
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SDivEntryIt->OpCopy),
- SDivEntryIt->DividentRegLower)
- .addReg(DividentReg);
- if (SDivEntryIt->DividentRegUpper != X86::NoRegister)
+ // Move op1 into low-order input register.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
+ TypeEntry.LowInReg)
+ .addReg(Op1Reg);
+ // Zero-extend or sign-extend into high-order input register.
+ if (OpEntry.OpSignExtend) {
+ if (OpEntry.IsOpSigned)
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(OpEntry.OpSignExtend));
+ else {
+ unsigned Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
+ Zero32);
+
+ // Copy the zero into the appropriate sub/super/identical physical
+ // register. Unfortunately the operations needed are not uniform enough
+ // to fit neatly into the table above.
+ if (RegTy.getSizeInBits() == 16) {
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
+ TypeEntry.HighInReg)
+ .addReg(Zero32, 0, X86::sub_16bit);
+ } else if (RegTy.getSizeInBits() == 32) {
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
+ TypeEntry.HighInReg)
+ .addReg(Zero32);
+ } else if (RegTy.getSizeInBits() == 64) {
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
+ .addImm(0)
+ .addReg(Zero32)
+ .addImm(X86::sub_32bit);
+ }
+ }
+ }
+ // Generate the DIV/IDIV instruction.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
+ .addReg(Op2Reg);
+ // For i8 remainder, we can't reference ah directly, as we'll end
+ // up with bogus copies like %r9b = COPY %ah. Reference ax
+ // instead to prevent ah references in a rex instruction.
+ //
+ // The current assumption of the fast register allocator is that isel
+ // won't generate explicit references to the GR8_NOREX registers. If
+ // the allocator and/or the backend get enhanced to be more robust in
+ // that regard, this can be, and should be, removed.
+ if ((I.getOpcode() == Instruction::SRem ||
+ I.getOpcode() == Instruction::URem) &&
+ OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
+ unsigned SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
+ unsigned ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
+ .addReg(X86::AX);
+
+ // Shift AX right by 8 bits instead of using AH.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
+ ResultSuperReg)
+ .addReg(SourceSuperReg)
+ .addImm(8);
+
+ // Now reference the 8-bit subreg of the result.
BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(SDivEntryIt->OpSignExtend));
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SDivEntryIt->OpDiv))
- .addReg(DiviserReg);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
- DstReg)
- .addReg(SDivEntryIt->QuotientReg);
+ TII.get(TargetOpcode::SUBREG_TO_REG))
+ .addDef(DstReg)
+ .addImm(0)
+ .addReg(ResultSuperReg)
+ .addImm(X86::sub_8bit);
+ } else {
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
+ DstReg)
+ .addReg(OpEntry.DivRemResultReg);
+ }
+ I.eraseFromParent();
+ return true;
+}
+
+bool X86InstructionSelector::selectIntrinsicWSideEffects(
+ MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const {
+
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
+ "unexpected instruction");
+
+ if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
+ return false;
+
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP));
I.eraseFromParent();
return true;
diff --git a/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 6c7fb9c339ac..28940754a203 100644
--- a/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -463,7 +463,7 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
// {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}.
// Imm variable sets the offset amount. The result of the
// function is stored inside ShuffleMask vector and it built as described in
-// the begin of the description. AlignDirection is a boolean that indecat the
+// the begin of the description. AlignDirection is a boolean that indicates the
// direction of the alignment. (false - align to the "right" side while true -
// align to the "left" side)
static void DecodePALIGNRMask(MVT VT, unsigned Imm,
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 2dd60a1b8b5a..151e1b9136c4 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -20,18 +20,18 @@
namespace llvm {
enum IntrinsicType : uint16_t {
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS,
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
- INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP_IMM8,
- CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
- CVTPD2PS, CVTPD2PS_MASK,
+ INTR_TYPE_3OP_IMM8,
+ CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
+ CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK,
- FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_SCALAR,
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG,
+ TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2I_MASK,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, GATHER_AVX2,
@@ -64,11 +64,6 @@ struct IntrinsicData {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithChain[] = {
- X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
-
X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0),
X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0),
X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0),
@@ -120,6 +115,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
@@ -229,6 +249,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
+
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@@ -270,9 +315,6 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0),
X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0),
-
- X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
- X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0),
};
@@ -294,6 +336,8 @@ static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithoutChain[] = {
+ X86_INTRINSIC_DATA(addcarry_32, ADX, X86ISD::ADC, X86ISD::ADD),
+ X86_INTRINSIC_DATA(addcarry_64, ADX, X86ISD::ADC, X86ISD::ADD),
X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
@@ -325,10 +369,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -369,10 +409,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -384,14 +420,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
@@ -402,12 +438,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_b, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_d, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_q, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_w, INTR_TYPE_2OP, X86ISD::KADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FADDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
@@ -467,13 +507,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CONFLICT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2I_MASK,
+ X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK,
- X86ISD::VFPROUND, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, CVTPD2PS_MASK,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK,
ISD::FP_ROUND, X86ISD::VFPROUND_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0),
@@ -481,8 +521,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2I_MASK,
+ X86ISD::CVTP2UI, X86ISD::MCVTP2UI),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_512, INTR_TYPE_1OP_MASK,
@@ -531,8 +571,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VFPROUNDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPEXTS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2I_MASK,
+ X86ISD::CVTTP2SI, X86ISD::MCVTTP2SI),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
@@ -541,8 +581,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2I_MASK,
+ X86ISD::CVTTP2UI, X86ISD::MCVTTP2UI),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
@@ -677,144 +717,114 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMULS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FMULS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, TRUNCATE_TO_REG,
+ ISD::TRUNCATE, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, TRUNCATE_TO_REG,
+ ISD::TRUNCATE, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, TRUNCATE_TO_REG,
+ ISD::TRUNCATE, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, TRUNCATE_TO_REG,
+ ISD::TRUNCATE, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_128, INTR_TYPE_2OP_MASK,
- X86ISD::MULTISHIFT, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_256, INTR_TYPE_2OP_MASK,
- X86ISD::MULTISHIFT, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_512, INTR_TYPE_2OP_MASK,
- X86ISD::MULTISHIFT, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
@@ -871,38 +881,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
-
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-
- X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_128, CMP_MASK,
- X86ISD::VPSHUFBITQMB, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_256, CMP_MASK,
- X86ISD::VPSHUFBITQMB, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK,
- X86ISD::VPSHUFBITQMB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
@@ -921,25 +905,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
X86ISD::VFIXUPIMMS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-
X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
@@ -967,30 +932,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0),
- X86_INTRINSIC_DATA(avx512_prol_d_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_d_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_d_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_q_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_q_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_q_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prolv_d_128, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_d_256, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_d_512, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_q_128, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_q_256, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_q_512, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_pror_d_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_d_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_d_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_q_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_q_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_q_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_prorv_d_128, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_d_256, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_d_512, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_q_128, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_q_256, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_q_512, INTR_TYPE_2OP, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_pmultishift_qb_128, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
+ X86_INTRINSIC_DATA(avx512_pmultishift_qb_256, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
+ X86_INTRINSIC_DATA(avx512_pmultishift_qb_512, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
@@ -1068,14 +1012,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_sub_ps_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
- X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
+ X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
X86_INTRINSIC_DATA(avx512_vfmadd_f32, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
X86_INTRINSIC_DATA(avx512_vfmadd_f64, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
X86_INTRINSIC_DATA(avx512_vfmadd_pd_512, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
@@ -1124,26 +1068,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_128 , IFMA_OP, X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_256 , IFMA_OP, X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_512 , IFMA_OP, X86ISD::VPMADD52L, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_d_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_d_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_d_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_q_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_q_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_q_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_w_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_w_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_d_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_d_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_d_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_q_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_q_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_q_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_w_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_w_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_vpshufbitqmb_128, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+ X86_INTRINSIC_DATA(avx512_vpshufbitqmb_256, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+ X86_INTRINSIC_DATA(avx512_vpshufbitqmb_512, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
+ X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
+ X86_INTRINSIC_DATA(bmi_bzhi_64, INTR_TYPE_2OP, X86ISD::BZHI, 0),
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
@@ -1151,6 +1082,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse_comile_ss, COMI, X86ISD::COMI, ISD::SETLE),
X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse_cvtss2si, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+ X86_INTRINSIC_DATA(sse_cvtss2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+ X86_INTRINSIC_DATA(sse_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
+ X86_INTRINSIC_DATA(sse_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse_max_ss, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),
@@ -1174,8 +1109,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_cvtpd2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvtsd2si, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvtsd2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvttsd2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
@@ -1184,10 +1123,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(sse2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1209,10 +1144,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(sse2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(sse2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
@@ -1241,6 +1172,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(subborrow_32, ADX, X86ISD::SBB, X86ISD::SUB),
+ X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB),
X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
@@ -1280,14 +1213,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
- X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(xop_vprotwi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
X86_INTRINSIC_DATA(xop_vpshab, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
X86_INTRINSIC_DATA(xop_vpshad, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
X86_INTRINSIC_DATA(xop_vpshaq, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
diff --git a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index d372cada8de8..4a49fa68dd06 100644
--- a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -133,7 +133,8 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
// Shifts and SDIV
- getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR, G_SDIV})
+ getActionDefinitionsBuilder(
+ {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
.legalFor({s8, s16, s32})
.clampScalar(0, s8, s32);
}
@@ -219,13 +220,27 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0);
+ getActionDefinitionsBuilder(G_FPTOSI)
+ .legalForCartesianProduct({s32, s64})
+ .clampScalar(1, s32, s64)
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
+ .widenScalarToNextPow2(1);
+
// Comparison
setAction({G_ICMP, 1, s64}, Legal);
+ getActionDefinitionsBuilder(G_FCMP)
+ .legalForCartesianProduct({s8}, {s32, s64})
+ .clampScalar(0, s8, s8)
+ .clampScalar(1, s32, s64)
+ .widenScalarToNextPow2(1);
+
// Shifts and SDIV
- getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR, G_SDIV})
- .legalFor({s8, s16, s32, s64})
- .clampScalar(0, s8, s64);
+ getActionDefinitionsBuilder(
+ {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
+ .legalFor({s8, s16, s32, s64})
+ .clampScalar(0, s8, s64);
// Merge/Unmerge
setAction({G_MERGE_VALUES, s128}, Legal);
@@ -256,7 +271,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE1() {
// Merge/Unmerge
for (const auto &Ty : {v4s32, v2s64}) {
- setAction({G_MERGE_VALUES, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, Ty}, Legal);
setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
}
setAction({G_MERGE_VALUES, 1, s64}, Legal);
@@ -292,17 +307,20 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
setAction({G_FPEXT, s64}, Legal);
setAction({G_FPEXT, 1, s32}, Legal);
+ setAction({G_FPTRUNC, s32}, Legal);
+ setAction({G_FPTRUNC, 1, s64}, Legal);
+
// Constants
setAction({TargetOpcode::G_FCONSTANT, s64}, Legal);
// Merge/Unmerge
for (const auto &Ty :
{v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
- setAction({G_MERGE_VALUES, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, Ty}, Legal);
setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
}
for (const auto &Ty : {v16s8, v8s16, v4s32, v2s64}) {
- setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
setAction({G_UNMERGE_VALUES, Ty}, Legal);
}
}
@@ -349,12 +367,12 @@ void X86LegalizerInfo::setLegalizerInfoAVX() {
// Merge/Unmerge
for (const auto &Ty :
{v32s8, v64s8, v16s16, v32s16, v8s32, v16s32, v4s64, v8s64}) {
- setAction({G_MERGE_VALUES, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, Ty}, Legal);
setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
}
for (const auto &Ty :
{v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
- setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
setAction({G_UNMERGE_VALUES, Ty}, Legal);
}
}
@@ -382,11 +400,11 @@ void X86LegalizerInfo::setLegalizerInfoAVX2() {
// Merge/Unmerge
for (const auto &Ty : {v64s8, v32s16, v16s32, v8s64}) {
- setAction({G_MERGE_VALUES, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, Ty}, Legal);
setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
}
for (const auto &Ty : {v32s8, v16s16, v8s32, v4s64}) {
- setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
setAction({G_UNMERGE_VALUES, Ty}, Legal);
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index d38c7b497965..2816f8c62bfb 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -132,6 +132,9 @@ MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
// Handle dllimport linkage.
Name += "__imp_";
break;
+ case X86II::MO_COFFSTUB:
+ Name += ".refptr.";
+ break;
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
Suffix = "$non_lazy_ptr";
@@ -160,6 +163,17 @@ MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
switch (MO.getTargetFlags()) {
default:
break;
+ case X86II::MO_COFFSTUB: {
+ MachineModuleInfoCOFF &MMICOFF =
+ MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
+ MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
+ if (!StubSym.getPointer()) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym = MachineModuleInfoImpl::StubValueTy(
+ AsmPrinter.getSymbol(MO.getGlobal()), true);
+ }
+ break;
+ }
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -191,6 +205,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// These affect the name of the symbol, not any suffix.
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DLLIMPORT:
+ case X86II::MO_COFFSTUB:
break;
case X86II::MO_TLVP:
@@ -512,7 +527,7 @@ ReSimplify:
}
case X86::CLEANUPRET: {
- // Replace CATCHRET with the appropriate RET.
+ // Replace CLEANUPRET with the appropriate RET.
OutMI = MCInst();
OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
break;
@@ -584,54 +599,6 @@ ReSimplify:
case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
- // Atomic load and store require a separate pseudo-inst because Acquire
- // implies mayStore and Release implies mayLoad; fix these to regular MOV
- // instructions here
- case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
- case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
- case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
- case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
- case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
- case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
- case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
- case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
- case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify;
- case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify;
- case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
- case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
- case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
- case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
- case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
- case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
- case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
- case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
- case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
- case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
- case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
- case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
- case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
- case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
- case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
- case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
- case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
- case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
- case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
- case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
- case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
- case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
- case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
- case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
- case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
- case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify;
- case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify;
- case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify;
- case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify;
- case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify;
- case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify;
- case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify;
- case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify;
- case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify;
-
// We don't currently select the correct instruction form for instructions
// which have a short %eax, etc. form. Handle this by custom lowering, for
// now.
@@ -946,7 +913,7 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
break;
case MachineOperand::MO_Register:
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpoline())
+ if (Subtarget->useRetpolineIndirectCalls())
report_fatal_error("Lowering register statepoints with retpoline not "
"yet implemented.");
CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
@@ -1103,7 +1070,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
EmitAndCountInstruction(
MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpoline())
+ if (Subtarget->useRetpolineIndirectCalls())
report_fatal_error(
"Lowering patchpoint with retpoline not yet implemented.");
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
@@ -1412,7 +1379,7 @@ PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
static const Constant *getConstantFromPool(const MachineInstr &MI,
const MachineOperand &Op) {
- if (!Op.isCPI())
+ if (!Op.isCPI() || Op.getOffset() != 0)
return nullptr;
ArrayRef<MachineConstantPoolEntry> Constants =
@@ -1424,7 +1391,7 @@ static const Constant *getConstantFromPool(const MachineInstr &MI,
if (ConstantEntry.isMachineConstantPoolEntry())
return nullptr;
- auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal);
+ const Constant *C = ConstantEntry.Val.ConstVal;
assert((!C || ConstantEntry.getType() == C->getType()) &&
"Expected a constant of the same type!");
return C;
@@ -1515,27 +1482,35 @@ static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
return Comment;
}
+static void printConstant(const APInt &Val, raw_ostream &CS) {
+ if (Val.getBitWidth() <= 64) {
+ CS << Val.getZExtValue();
+ } else {
+ // print multi-word constant as (w0,w1)
+ CS << "(";
+ for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
+ if (i > 0)
+ CS << ",";
+ CS << Val.getRawData()[i];
+ }
+ CS << ")";
+ }
+}
+
+static void printConstant(const APFloat &Flt, raw_ostream &CS) {
+ SmallString<32> Str;
+ // Force scientific notation to distinquish from integers.
+ Flt.toString(Str, 0, 0);
+ CS << Str;
+}
+
static void printConstant(const Constant *COp, raw_ostream &CS) {
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
- if (CI->getBitWidth() <= 64) {
- CS << CI->getZExtValue();
- } else {
- // print multi-word constant as (w0,w1)
- const auto &Val = CI->getValue();
- CS << "(";
- for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
- if (i > 0)
- CS << ",";
- CS << Val.getRawData()[i];
- }
- CS << ")";
- }
+ printConstant(CI->getValue(), CS);
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
- SmallString<32> Str;
- CF->getValueAPF().toString(Str);
- CS << Str;
+ printConstant(CF->getValueAPF(), CS);
} else {
CS << "?";
}
@@ -1558,6 +1533,9 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
case X86::SEH_StackAlloc:
XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
break;
+ case X86::SEH_StackAlign:
+ XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
+ break;
case X86::SEH_SetFrame:
assert(MI->getOperand(1).getImm() == 0 &&
".cv_fpo_setframe takes no offset");
@@ -1617,6 +1595,18 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
}
}
+static unsigned getRegisterWidth(const MCOperandInfo &Info) {
+ if (Info.RegClass == X86::VR128RegClassID ||
+ Info.RegClass == X86::VR128XRegClassID)
+ return 128;
+ if (Info.RegClass == X86::VR256RegClassID ||
+ Info.RegClass == X86::VR256XRegClassID)
+ return 256;
+ if (Info.RegClass == X86::VR512RegClassID)
+ return 512;
+ llvm_unreachable("Unknown register class!");
+}
+
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI =
@@ -1720,41 +1710,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- case X86::MOVGOT64r: {
- // Materializes the GOT for the 64-bit large code model.
- MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(DotSym);
-
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned ScratchReg = MI->getOperand(1).getReg();
- MCSymbol *GOTSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
-
- // .LtmpN: leaq .LtmpN(%rip), %dst
- const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
- EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
- .addReg(DstReg) // dest
- .addReg(X86::RIP) // base
- .addImm(1) // scale
- .addReg(0) // index
- .addExpr(DotExpr) // disp
- .addReg(0)); // seg
-
- // movq $_GLOBAL_OFFSET_TABLE_ - .LtmpN, %scratch
- const MCExpr *GOTSymExpr = MCSymbolRefExpr::create(GOTSym, OutContext);
- const MCExpr *GOTDiffExpr =
- MCBinaryExpr::createSub(GOTSymExpr, DotExpr, OutContext);
- EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
- .addReg(ScratchReg) // dest
- .addExpr(GOTDiffExpr)); // disp
-
- // addq %scratch, %dst
- EmitAndCountInstruction(MCInstBuilder(X86::ADD64rr)
- .addReg(DstReg) // dest
- .addReg(DstReg) // dest
- .addReg(ScratchReg)); // src
- return;
- }
-
case X86::ADD32ri: {
// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
@@ -1835,6 +1790,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::SEH_SaveReg:
case X86::SEH_SaveXMM:
case X86::SEH_StackAlloc:
+ case X86::SEH_StackAlign:
case X86::SEH_SetFrame:
case X86::SEH_PushFrame:
case X86::SEH_EndPrologue:
@@ -1901,8 +1857,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 64> Mask;
- DecodePSHUFBMask(C, Mask);
+ DecodePSHUFBMask(C, Width, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
!EnablePrintSchedInfo);
@@ -1973,8 +1930,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 16> Mask;
- DecodeVPERMILPMask(C, ElSize, Mask);
+ DecodeVPERMILPMask(C, ElSize, Width, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
!EnablePrintSchedInfo);
@@ -2004,8 +1962,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MaskOp = MI->getOperand(6);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 16> Mask;
- DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
+ DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
!EnablePrintSchedInfo);
@@ -2021,8 +1980,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MaskOp = MI->getOperand(6);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 16> Mask;
- DecodeVPPERMMask(C, Mask);
+ DecodeVPPERMMask(C, Width, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
!EnablePrintSchedInfo);
@@ -2129,11 +2089,11 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (i != 0 || l != 0)
CS << ",";
if (CDS->getElementType()->isIntegerTy())
- CS << CDS->getElementAsInteger(i);
- else if (CDS->getElementType()->isFloatTy())
- CS << CDS->getElementAsFloat(i);
- else if (CDS->getElementType()->isDoubleTy())
- CS << CDS->getElementAsDouble(i);
+ printConstant(CDS->getElementAsAPInt(i), CS);
+ else if (CDS->getElementType()->isHalfTy() ||
+ CDS->getElementType()->isFloatTy() ||
+ CDS->getElementType()->isDoubleTy())
+ printConstant(CDS->getElementAsAPFloat(i), CS);
else
CS << "?";
}
@@ -2155,6 +2115,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
break;
+ case X86::MOVDDUPrm:
+ case X86::VMOVDDUPrm:
+ case X86::VMOVDDUPZ128rm:
case X86::VBROADCASTSSrm:
case X86::VBROADCASTSSYrm:
case X86::VBROADCASTSSZ128m:
@@ -2191,6 +2154,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
int NumElts;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
+ case X86::MOVDDUPrm: NumElts = 2; break;
+ case X86::VMOVDDUPrm: NumElts = 2; break;
+ case X86::VMOVDDUPZ128rm: NumElts = 2; break;
case X86::VBROADCASTSSrm: NumElts = 4; break;
case X86::VBROADCASTSSYrm: NumElts = 8; break;
case X86::VBROADCASTSSZ128m: NumElts = 4; break;
diff --git a/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp b/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
index df3abb17014d..5c09597d0442 100644
--- a/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -79,53 +79,46 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::TEST8ri:
case X86::TEST16ri:
case X86::TEST32ri:
- case X86::TEST32i32:
- case X86::TEST64i32:
case X86::TEST64ri32:
case X86::TEST8mr:
case X86::TEST16mr:
case X86::TEST32mr:
case X86::TEST64mr:
- case X86::AND16i16:
case X86::AND16ri:
case X86::AND16ri8:
case X86::AND16rm:
case X86::AND16rr:
- case X86::AND32i32:
case X86::AND32ri:
case X86::AND32ri8:
case X86::AND32rm:
case X86::AND32rr:
- case X86::AND64i32:
case X86::AND64ri32:
case X86::AND64ri8:
case X86::AND64rm:
case X86::AND64rr:
- case X86::AND8i8:
case X86::AND8ri:
case X86::AND8rm:
case X86::AND8rr:
return true;
- case X86::CMP16i16:
case X86::CMP16ri:
case X86::CMP16ri8:
case X86::CMP16rm:
case X86::CMP16rr:
- case X86::CMP32i32:
+ case X86::CMP16mr:
case X86::CMP32ri:
case X86::CMP32ri8:
case X86::CMP32rm:
case X86::CMP32rr:
- case X86::CMP64i32:
+ case X86::CMP32mr:
case X86::CMP64ri32:
case X86::CMP64ri8:
case X86::CMP64rm:
case X86::CMP64rr:
- case X86::CMP8i8:
+ case X86::CMP64mr:
case X86::CMP8ri:
case X86::CMP8rm:
case X86::CMP8rr:
- case X86::ADD16i16:
+ case X86::CMP8mr:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri8_DB:
@@ -133,7 +126,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::ADD16rm:
case X86::ADD16rr:
case X86::ADD16rr_DB:
- case X86::ADD32i32:
case X86::ADD32ri:
case X86::ADD32ri8:
case X86::ADD32ri8_DB:
@@ -141,7 +133,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::ADD32rm:
case X86::ADD32rr:
case X86::ADD32rr_DB:
- case X86::ADD64i32:
case X86::ADD64ri32:
case X86::ADD64ri32_DB:
case X86::ADD64ri8:
@@ -149,28 +140,21 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::ADD64rm:
case X86::ADD64rr:
case X86::ADD64rr_DB:
- case X86::ADD8i8:
- case X86::ADD8mi:
- case X86::ADD8mr:
case X86::ADD8ri:
case X86::ADD8rm:
case X86::ADD8rr:
- case X86::SUB16i16:
case X86::SUB16ri:
case X86::SUB16ri8:
case X86::SUB16rm:
case X86::SUB16rr:
- case X86::SUB32i32:
case X86::SUB32ri:
case X86::SUB32ri8:
case X86::SUB32rm:
case X86::SUB32rr:
- case X86::SUB64i32:
case X86::SUB64ri32:
case X86::SUB64ri8:
case X86::SUB64rm:
case X86::SUB64rr:
- case X86::SUB8i8:
case X86::SUB8ri:
case X86::SUB8rm:
case X86::SUB8rr:
diff --git a/contrib/llvm/lib/Target/X86/X86MacroFusion.h b/contrib/llvm/lib/Target/X86/X86MacroFusion.h
index 13fa2d78a018..97ef1d6d3b61 100644
--- a/contrib/llvm/lib/Target/X86/X86MacroFusion.h
+++ b/contrib/llvm/lib/Target/X86/X86MacroFusion.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_X86_X86MACROFUSION_H
+#define LLVM_LIB_TARGET_X86_X86MACROFUSION_H
+
#include "llvm/CodeGen/MachineScheduler.h"
namespace llvm {
@@ -23,3 +26,5 @@ std::unique_ptr<ScheduleDAGMutation>
createX86MacroFusionDAGMutation();
} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 42db51b3cf01..b56d02b6bfb6 100644
--- a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -510,12 +510,16 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
MemOpNo += X86II::getOperandBias(Desc);
+ // Do not call chooseBestLEA if there was no matching LEA
+ auto Insns = LEAs.find(getMemOpKey(MI, MemOpNo));
+ if (Insns == LEAs.end())
+ continue;
+
// Get the best LEA instruction to replace address calculation.
MachineInstr *DefMI;
int64_t AddrDispShift;
int Dist;
- if (!chooseBestLEA(LEAs[getMemOpKey(MI, MemOpNo)], MI, DefMI, AddrDispShift,
- Dist))
+ if (!chooseBestLEA(Insns->second, MI, DefMI, AddrDispShift, Dist))
continue;
// If LEA occurs before current instruction, we can freely replace
diff --git a/contrib/llvm/lib/Target/X86/X86PfmCounters.td b/contrib/llvm/lib/Target/X86/X86PfmCounters.td
index 093fbafa3fba..a1a4210b5ebf 100644
--- a/contrib/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/contrib/llvm/lib/Target/X86/X86PfmCounters.td
@@ -11,67 +11,216 @@
//
//===----------------------------------------------------------------------===//
-let SchedModel = SandyBridgeModel in {
-def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;
-def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;
-def SBPort23Counter : PfmIssueCounter<SBPort23,
- ["uops_dispatched_port:port_2",
- "uops_dispatched_port:port_3"]>;
-def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;
-def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;
-}
-
-let SchedModel = HaswellModel in {
-def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def HWPort0Counter : PfmIssueCounter<HWPort0, ["uops_dispatched_port:port_0"]>;
-def HWPort1Counter : PfmIssueCounter<HWPort1, ["uops_dispatched_port:port_1"]>;
-def HWPort2Counter : PfmIssueCounter<HWPort2, ["uops_dispatched_port:port_2"]>;
-def HWPort3Counter : PfmIssueCounter<HWPort3, ["uops_dispatched_port:port_3"]>;
-def HWPort4Counter : PfmIssueCounter<HWPort4, ["uops_dispatched_port:port_4"]>;
-def HWPort5Counter : PfmIssueCounter<HWPort5, ["uops_dispatched_port:port_5"]>;
-def HWPort6Counter : PfmIssueCounter<HWPort6, ["uops_dispatched_port:port_6"]>;
-def HWPort7Counter : PfmIssueCounter<HWPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = BroadwellModel in {
-def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def BWPort0Counter : PfmIssueCounter<BWPort0, ["uops_executed_port:port_0"]>;
-def BWPort1Counter : PfmIssueCounter<BWPort1, ["uops_executed_port:port_1"]>;
-def BWPort2Counter : PfmIssueCounter<BWPort2, ["uops_executed_port:port_2"]>;
-def BWPort3Counter : PfmIssueCounter<BWPort3, ["uops_executed_port:port_3"]>;
-def BWPort4Counter : PfmIssueCounter<BWPort4, ["uops_executed_port:port_4"]>;
-def BWPort5Counter : PfmIssueCounter<BWPort5, ["uops_executed_port:port_5"]>;
-def BWPort6Counter : PfmIssueCounter<BWPort6, ["uops_executed_port:port_6"]>;
-def BWPort7Counter : PfmIssueCounter<BWPort7, ["uops_executed_port:port_7"]>;
-}
-
-let SchedModel = SkylakeClientModel in {
-def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SKLPort0Counter : PfmIssueCounter<SKLPort0, ["uops_dispatched_port:port_0"]>;
-def SKLPort1Counter : PfmIssueCounter<SKLPort1, ["uops_dispatched_port:port_1"]>;
-def SKLPort2Counter : PfmIssueCounter<SKLPort2, ["uops_dispatched_port:port_2"]>;
-def SKLPort3Counter : PfmIssueCounter<SKLPort3, ["uops_dispatched_port:port_3"]>;
-def SKLPort4Counter : PfmIssueCounter<SKLPort4, ["uops_dispatched_port:port_4"]>;
-def SKLPort5Counter : PfmIssueCounter<SKLPort5, ["uops_dispatched_port:port_5"]>;
-def SKLPort6Counter : PfmIssueCounter<SKLPort6, ["uops_dispatched_port:port_6"]>;
-def SKLPort7Counter : PfmIssueCounter<SKLPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = SkylakeServerModel in {
-def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SKXPort0Counter : PfmIssueCounter<SKXPort0, ["uops_dispatched_port:port_0"]>;
-def SKXPort1Counter : PfmIssueCounter<SKXPort1, ["uops_dispatched_port:port_1"]>;
-def SKXPort2Counter : PfmIssueCounter<SKXPort2, ["uops_dispatched_port:port_2"]>;
-def SKXPort3Counter : PfmIssueCounter<SKXPort3, ["uops_dispatched_port:port_3"]>;
-def SKXPort4Counter : PfmIssueCounter<SKXPort4, ["uops_dispatched_port:port_4"]>;
-def SKXPort5Counter : PfmIssueCounter<SKXPort5, ["uops_dispatched_port:port_5"]>;
-def SKXPort6Counter : PfmIssueCounter<SKXPort6, ["uops_dispatched_port:port_6"]>;
-def SKXPort7Counter : PfmIssueCounter<SKXPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = BtVer2Model in {
-def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">;
-def JFPU0Counter : PfmIssueCounter<JFPU0, ["dispatched_fpu:pipe0"]>;
-def JFPU1Counter : PfmIssueCounter<JFPU1, ["dispatched_fpu:pipe1"]>;
+def UnhaltedCoreCyclesPfmCounter : PfmCounter<"unhalted_core_cycles">;
+def UopsIssuedPfmCounter : PfmCounter<"uops_issued:any">;
+
+// No default counters on X86.
+def DefaultPfmCounters : ProcPfmCounters {}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
+
+// Intel X86 Counters.
+def PentiumPfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"uops_retired">;
+}
+def : PfmCountersBinding<"pentiumpro", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium2", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium3", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium3m", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium-m", PentiumPfmCounters>;
+
+def CorePfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"yonah", CorePfmCounters>;
+def : PfmCountersBinding<"prescott", CorePfmCounters>;
+def : PfmCountersBinding<"core2", CorePfmCounters>;
+def : PfmCountersBinding<"penryn", CorePfmCounters>;
+def : PfmCountersBinding<"nehalem", CorePfmCounters>;
+def : PfmCountersBinding<"corei7", CorePfmCounters>;
+def : PfmCountersBinding<"westmere", CorePfmCounters>;
+
+def AtomPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"bonnell", AtomPfmCounters>;
+def : PfmCountersBinding<"atom", AtomPfmCounters>;
+
+def SLMPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"silvermont", SLMPfmCounters>;
+def : PfmCountersBinding<"goldmont", SLMPfmCounters>;
+def : PfmCountersBinding<"goldmont-plus", SLMPfmCounters>;
+def : PfmCountersBinding<"tremont", SLMPfmCounters>;
+
+def KnightPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = PfmCounter<"uops_retired:all">;
+}
+def : PfmCountersBinding<"knl", KnightPfmCounters>;
+def : PfmCountersBinding<"knm", KnightPfmCounters>;
+
+def SandyBridgePfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"SBPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"SBPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"SBPort23", "uops_dispatched_port:port_2 + uops_dispatched_port:port_3">,
+ PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">,
+ PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5">
+ ];
+}
+def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>;
+def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>;
+
+def HaswellPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"HWPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"HWPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"HWPort2", "uops_dispatched_port:port_2">,
+ PfmIssueCounter<"HWPort3", "uops_dispatched_port:port_3">,
+ PfmIssueCounter<"HWPort4", "uops_dispatched_port:port_4">,
+ PfmIssueCounter<"HWPort5", "uops_dispatched_port:port_5">,
+ PfmIssueCounter<"HWPort6", "uops_dispatched_port:port_6">,
+ PfmIssueCounter<"HWPort7", "uops_dispatched_port:port_7">
+ ];
+}
+def : PfmCountersBinding<"haswell", HaswellPfmCounters>;
+
+def BroadwellPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"BWPort0", "uops_executed_port:port_0">,
+ PfmIssueCounter<"BWPort1", "uops_executed_port:port_1">,
+ PfmIssueCounter<"BWPort2", "uops_executed_port:port_2">,
+ PfmIssueCounter<"BWPort3", "uops_executed_port:port_3">,
+ PfmIssueCounter<"BWPort4", "uops_executed_port:port_4">,
+ PfmIssueCounter<"BWPort5", "uops_executed_port:port_5">,
+ PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">,
+ PfmIssueCounter<"BWPort7", "uops_executed_port:port_7">
+ ];
+}
+def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>;
+
+def SkylakeClientPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"SKLPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"SKLPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"SKLPort2", "uops_dispatched_port:port_2">,
+ PfmIssueCounter<"SKLPort3", "uops_dispatched_port:port_3">,
+ PfmIssueCounter<"SKLPort4", "uops_dispatched_port:port_4">,
+ PfmIssueCounter<"SKLPort5", "uops_dispatched_port:port_5">,
+ PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">,
+ PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7">
+ ];
+}
+def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>;
+
+def SkylakeServerPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"SKXPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"SKXPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"SKXPort2", "uops_dispatched_port:port_2">,
+ PfmIssueCounter<"SKXPort3", "uops_dispatched_port:port_3">,
+ PfmIssueCounter<"SKXPort4", "uops_dispatched_port:port_4">,
+ PfmIssueCounter<"SKXPort5", "uops_dispatched_port:port_5">,
+ PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">,
+ PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7">
+ ];
+}
+def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"cannonlake", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"icelake-client", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"icelake-server", SkylakeServerPfmCounters>;
+
+// AMD X86 Counters.
+// Set basic counters for AMD cpus that we know libpfm4 supports.
+def DefaultAMDPfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+}
+def : PfmCountersBinding<"athlon", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-tbird", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-4", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-xp", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-mp", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"k8", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"opteron", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon64", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-fx", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"k8-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"opteron-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon64-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"amdfam10", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"barcelona", DefaultAMDPfmCounters>;
+
+def BdVer2PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"PdFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">,
+ PfmIssueCounter<"PdFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">,
+ PfmIssueCounter<"PdFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">,
+ PfmIssueCounter<"PdFPU3", "dispatched_fpu_ops:ops_pipe3 + dispatched_fpu_ops:ops_dual_pipe3">
+ ];
+}
+def : PfmCountersBinding<"bdver1", BdVer2PfmCounters>;
+def : PfmCountersBinding<"bdver2", BdVer2PfmCounters>;
+
+def BdVer3PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"SrFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">,
+ PfmIssueCounter<"SrFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">,
+ PfmIssueCounter<"SrFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">
+ ];
+}
+def : PfmCountersBinding<"bdver3", BdVer3PfmCounters>;
+def : PfmCountersBinding<"bdver4", BdVer3PfmCounters>;
+
+def BtVer1PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"BtFPU0", "dispatched_fpu:pipe0">,
+ PfmIssueCounter<"BtFPU1", "dispatched_fpu:pipe1">
+ ];
+}
+def : PfmCountersBinding<"btver1", BtVer1PfmCounters>;
+
+def BtVer2PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"JFPU0", "dispatched_fpu:pipe0">,
+ PfmIssueCounter<"JFPU1", "dispatched_fpu:pipe1">
+ ];
+}
+def : PfmCountersBinding<"btver2", BtVer2PfmCounters>;
+
+def ZnVer1PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cycles_not_in_halt">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"ZnFPU0", "fpu_pipe_assignment:total0">,
+ PfmIssueCounter<"ZnFPU1", "fpu_pipe_assignment:total1">,
+ PfmIssueCounter<"ZnFPU2", "fpu_pipe_assignment:total2">,
+ PfmIssueCounter<"ZnFPU3", "fpu_pipe_assignment:total3">,
+ PfmIssueCounter<"ZnDivider", "div_op_count">
+ ];
}
+def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index 246d6d5a58d0..355291916ee8 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -194,19 +194,40 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (Opc) {
case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCONSTANT:
// Instruction having only floating-point operands (all scalars in VECRReg)
getInstrPartialMappingIdxs(MI, MRI, /* isFP */ true, OpRegBankIdx);
break;
- case TargetOpcode::G_SITOFP: {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_FPTOSI: {
// Some of the floating-point instructions have mixed GPR and FP operands:
// fine-tune the computed mapping.
auto &Op0 = MI.getOperand(0);
auto &Op1 = MI.getOperand(1);
const LLT Ty0 = MRI.getType(Op0.getReg());
const LLT Ty1 = MRI.getType(Op1.getReg());
- OpRegBankIdx[0] = getPartialMappingIdx(Ty0, /* isFP */ true);
- OpRegBankIdx[1] = getPartialMappingIdx(Ty1, /* isFP */ false);
+
+ bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP;
+ bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI;
+ OpRegBankIdx[0] = getPartialMappingIdx(Ty0, /* isFP */ FirstArgIsFP);
+ OpRegBankIdx[1] = getPartialMappingIdx(Ty1, /* isFP */ SecondArgIsFP);
+ break;
+ }
+ case TargetOpcode::G_FCMP: {
+ LLT Ty1 = MRI.getType(MI.getOperand(2).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+ (void)Ty2;
+ assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() &&
+ "Mismatched operand sizes for G_FCMP");
+
+ unsigned Size = Ty1.getSizeInBits();
+ (void)Size;
+ assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+ auto FpRegBank = getPartialMappingIdx(Ty1, /* isFP */ true);
+ OpRegBankIdx = {PMI_GPR8,
+ /* Predicate */ PMI_None, FpRegBank, FpRegBank};
break;
}
case TargetOpcode::G_TRUNC:
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index ee9e7891f9f6..aa20273f89ab 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -436,11 +436,12 @@ def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
-def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
+def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESP)>;
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
- R8, R9, R11, RIP)>;
+ R8, R9, R11, RIP, RSP)>;
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
- R8, R9, R10, R11, RIP)>;
+ R8, R9, R10, R11,
+ RIP, RSP)>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
@@ -499,6 +500,16 @@ def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
+// Classes to support the 64-bit assembler constraint tied to a fixed
+// register in 32-bit mode. The second register is always the next in
+// the list. Wrap around causes an error.
+def GR32_DC : RegisterClass<"X86", [i32], 32, (add EDX, ECX)>;
+def GR32_CB : RegisterClass<"X86", [i32], 32, (add ECX, EBX)>;
+def GR32_BSI : RegisterClass<"X86", [i32], 32, (add EBX, ESI)>;
+def GR32_SIDI : RegisterClass<"X86", [i32], 32, (add ESI, EDI)>;
+def GR32_DIBP : RegisterClass<"X86", [i32], 32, (add EDI, EBP)>;
+def GR32_BPSP : RegisterClass<"X86", [i32], 32, (add EBP, ESP)>;
+
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
@@ -511,10 +522,16 @@ def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
// faster on common hardware. In reality, this should be controlled by a
// command line option or something.
+
def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
+// st(7) may be is not allocatable.
+def RFP80_7 : RegisterClass<"X86",[f80], 32, (add FP7)> {
+ let isAllocatable = 0;
+}
+
// Floating point stack registers (these are not allocatable by the
// register allocator - the floating point stackifier is responsible
// for transforming FPn allocations to STn registers)
diff --git a/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
index 250deb3523b4..08994cccb21e 100644
--- a/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
@@ -74,7 +74,7 @@ private:
void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
- void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
+ void populateThunk(MachineFunction &MF, unsigned Reg);
};
} // end anonymous namespace
@@ -115,7 +115,9 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
// FIXME: It's a little silly to look at every function just to enumerate
// the subtargets, but eventually we'll want to look at them for indirect
// calls, so maybe this is OK.
- if (!STI->useRetpoline() || STI->useRetpolineExternalThunk())
+ if ((!STI->useRetpolineIndirectCalls() &&
+ !STI->useRetpolineIndirectBranches()) ||
+ STI->useRetpolineExternalThunk())
return false;
// Otherwise, we need to insert the thunk.
@@ -234,25 +236,33 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
}
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
- Optional<unsigned> Reg) {
+ unsigned Reg) {
// Set MF properties. We never use vregs...
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+ // generate two bbs for the entry block.
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
+ while (MF.size() > 1)
+ MF.erase(std::next(MF.begin()));
MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MCSymbol *TargetSym = MF.getContext().createTempSymbol();
MF.push_back(CaptureSpec);
MF.push_back(CallTarget);
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
- BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget);
- Entry->addSuccessor(CallTarget);
+ Entry->addLiveIn(Reg);
+ BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
+
+ // The MIR verifier thinks that the CALL in the entry block will fall through
+ // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
+ // the successor, but the MIR verifier doesn't know how to cope with that.
Entry->addSuccessor(CaptureSpec);
- CallTarget->setHasAddressTaken();
// In the capture loop for speculation, we want to stop the processor from
// speculating as fast as possible. On Intel processors, the PAUSE instruction
@@ -268,7 +278,10 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
CaptureSpec->setHasAddressTaken();
CaptureSpec->addSuccessor(CaptureSpec);
+ CallTarget->addLiveIn(Reg);
+ CallTarget->setHasAddressTaken();
CallTarget->setAlignment(4);
- insertRegReturnAddrClobber(*CallTarget, *Reg);
+ insertRegReturnAddrClobber(*CallTarget, Reg);
+ CallTarget->back().setPreInstrSymbol(MF, TargetSym);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}
diff --git a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
index 6334d9e89a60..971a50196e45 100755
--- a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -72,10 +72,16 @@ def BWDivider : ProcResource<1>;
// FP division and sqrt on port 0.
def BWFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/5/6 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/5/6 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 6>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -108,22 +114,47 @@ def : WriteRes<WriteRMW, [BWPort237,BWPort4]>;
// Arithmetic.
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteADC, [BWPort06], 1>; // Integer ALU + flags op.
-defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
-defm : BWWriteResPair<WriteIMul64, [BWPort1], 3>; // Integer 64-bit multiplication.
-defm : BWWriteResPair<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
+// Integer multiplication.
+defm : BWWriteResPair<WriteIMul8, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul16, [BWPort1,BWPort06,BWPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [BWPort1,BWPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
+defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
+defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+// TODO: Why isn't the BWDivider used consistently?
+defm : X86WriteRes<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10], 1>;
+defm : X86WriteRes<WriteDiv16, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+
+defm : X86WriteRes<WriteIDiv8, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv32, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv64, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+
+defm : X86WriteRes<WriteCMPXCHG,[BWPort06, BWPort0156], 5, [2, 3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[BWPort23, BWPort06, BWPort0156, BWPort237, BWPort4], 8, [1, 2, 1, 1, 1], 6>;
defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteXCHG, [BWPort0156], 2, [3], 3>;
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
@@ -136,8 +167,14 @@ def : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [BWPort06]>;
-def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
+
+defm : X86WriteRes<WriteLAHFSAHF, [BWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [BWPort06], 1, [1], 1>; // Bit Test instrs
+defm : X86WriteRes<WriteBitTestImmLd, [BWPort06,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [BWPort0156,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [BWPort06], 1, [1], 1>; // Bit Test + Set instrs
+defm : X86WriteRes<WriteBitTestSetImmLd, [BWPort06,BWPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [BWPort0156,BWPort23], 5, [1,1], 2>;
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
@@ -147,7 +184,10 @@ defm : BWWriteResPair<WriteTZCNT, [BWPort1], 3>;
defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>;
// Integer shifts and rotates.
-defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
+defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
+defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
+defm : BWWriteResPair<WriteRotate, [BWPort06], 2, [2], 2>;
+defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
@@ -155,9 +195,10 @@ defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
-defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
+defm : BWWriteResPair<WriteBLS, [BWPort15], 1>;
+defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
@@ -582,7 +623,7 @@ def BWWriteResGroup3 : SchedWriteRes<[BWPort5]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[BWWriteResGroup3], (instrs MMX_MOVQ2DQrr)>;
def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> {
let Latency = 1;
@@ -610,10 +651,7 @@ def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr",
- "BLSI(32|64)rr",
- "BLSMSK(32|64)rr",
- "BLSR(32|64)rr")>;
+def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr")>;
def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> {
let Latency = 1;
@@ -627,19 +665,19 @@ def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup9], (instregex "SGDT64m",
- "SIDT64m",
- "SMSW16m",
- "STRm",
- "SYSCALL")>;
+def: InstRW<[BWWriteResGroup9], (instrs SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> {
let Latency = 1;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
- "ST_FP(32|64|80)m")>;
+def: InstRW<[BWWriteResGroup10], (instrs FBSTPm)>;
+def: InstRW<[BWWriteResGroup10], (instregex "ST_FP(32|64|80)m")>;
def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
let Latency = 2;
@@ -648,16 +686,6 @@ def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
}
def: InstRW<[BWWriteResGroup12], (instrs FDECSTP)>;
-def BWWriteResGroup13 : SchedWriteRes<[BWPort06]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[BWWriteResGroup13], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri")>;
-
def BWWriteResGroup14 : SchedWriteRes<[BWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -688,7 +716,7 @@ def BWWriteResGroup17 : SchedWriteRes<[BWPort01,BWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup17], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[BWWriteResGroup17], (instrs MMX_MOVDQ2Qrr)>;
def BWWriteResGroup18 : SchedWriteRes<[BWPort237,BWPort0156]> {
let Latency = 2;
@@ -702,11 +730,10 @@ def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup20], (instrs CWD)>;
-def: InstRW<[BWWriteResGroup20], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8",
- "ADC8ri",
- "SBB8i8",
+def: InstRW<[BWWriteResGroup20], (instrs CWD,
+ JCXZ, JECXZ, JRCXZ,
+ ADC8i8, SBB8i8)>;
+def: InstRW<[BWWriteResGroup20], (instregex "ADC8ri",
"SBB8ri",
"SET(A|BE)r")>;
@@ -729,53 +756,35 @@ def BWWriteResGroup25 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup25], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[BWWriteResGroup25], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr",
- "PUSH64i8")>;
+def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr")>;
def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr",
- "PDEP(32|64)rr",
- "PEXT(32|64)rr",
+def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr",
"(V?)CVTDQ2PS(Y?)rr")>;
-def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rri, IMUL16rri8)>;
-
def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr",
- "VPBROADCASTWrr")>;
-
-def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[BWWriteResGroup30], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[BWWriteResGroup28], (instrs VPBROADCASTBrr,
+ VPBROADCASTWrr)>;
def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKSSDWirr",
- "MMX_PACKSSWBirr",
- "MMX_PACKUSWBirr")>;
+def: InstRW<[BWWriteResGroup33], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 3;
@@ -789,21 +798,8 @@ def BWWriteResGroup35 : SchedWriteRes<[BWPort06,BWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[BWWriteResGroup35], (instregex "RCL(8|16|32|64)r1",
- "RCL(8|16|32|64)ri",
- "RCR(8|16|32|64)r1",
- "RCR(8|16|32|64)ri")>;
-
-def BWWriteResGroup36 : SchedWriteRes<[BWPort06,BWPort0156]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup36], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
+def: InstRW<[BWWriteResGroup35], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
def BWWriteResGroup37 : SchedWriteRes<[BWPort4,BWPort6,BWPort237,BWPort0156]> {
let Latency = 3;
@@ -835,7 +831,7 @@ def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup40], (instregex "VCVTPS2PDYrr")>;
+def: InstRW<[BWWriteResGroup40], (instrs VCVTPS2PDYrr)>;
def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> {
let Latency = 4;
@@ -849,9 +845,8 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup42], (instrs IMUL64r, MUL64r, MULX64rr)>;
-def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPI2PDirr",
- "MMX_CVT(T?)PD2PIirr",
+def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDirr)>;
+def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIirr",
"MMX_CVT(T?)PS2PIirr",
"(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
@@ -861,13 +856,6 @@ def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPI2PDirr",
"(V?)CVTSI2SSrr",
"(V?)CVT(T?)PD2DQrr")>;
-def BWWriteResGroup42_16 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[BWWriteResGroup42_16], (instrs IMUL16r, MUL16r)>;
-
def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -890,10 +878,10 @@ def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> {
}
def: InstRW<[BWWriteResGroup45], (instrs FNCLEX)>;
-def BWWriteResGroup46 : SchedWriteRes<[BWPort015,BWPort0156]> {
- let Latency = 4;
+def BWWriteResGroup46 : SchedWriteRes<[]> {
+ let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ResourceCycles = [];
}
def: InstRW<[BWWriteResGroup46], (instrs VZEROUPPER)>;
@@ -910,17 +898,14 @@ def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm16",
- "MOVSX(16|32|64)rm32",
- "MOVSX(16|32|64)rm8",
- "MOVZX(16|32|64)rm16",
- "MOVZX(16|32|64)rm8",
- "VBROADCASTSSrm",
- "(V?)MOVDDUPrm",
- "(V?)MOVSHDUPrm",
- "(V?)MOVSLDUPrm",
- "VPBROADCASTDrm",
- "VPBROADCASTQrm")>;
+def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)")>;
+def: InstRW<[BWWriteResGroup49], (instrs VBROADCASTSSrm,
+ VMOVDDUPrm, MOVDDUPrm,
+ VMOVSHDUPrm, MOVSHDUPrm,
+ VMOVSLDUPrm, MOVSLDUPrm,
+ VPBROADCASTDrm,
+ VPBROADCASTQrm)>;
def BWWriteResGroup50 : SchedWriteRes<[BWPort1,BWPort5]> {
let Latency = 5;
@@ -936,13 +921,6 @@ def BWWriteResGroup51 : SchedWriteRes<[BWPort1,BWPort6,BWPort06]> {
}
def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>;
-def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -957,13 +935,6 @@ def BWWriteResGroup55 : SchedWriteRes<[BWPort06,BWPort0156]> {
}
def: InstRW<[BWWriteResGroup55], (instrs XSETBV)>;
-def BWWriteResGroup56 : SchedWriteRes<[BWPort06,BWPort0156]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[BWWriteResGroup56], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def BWWriteResGroup57 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
@@ -976,50 +947,44 @@ def BWWriteResGroup58 : SchedWriteRes<[BWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m",
- "VBROADCASTF128",
- "VBROADCASTI128",
- "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm",
- "VPBROADCASTDYrm",
- "VPBROADCASTQYrm")>;
+def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[BWWriteResGroup58], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm,
+ VPBROADCASTDYrm,
+ VPBROADCASTQYrm)>;
def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup59], (instregex "(V?)CVTPS2PDrm",
- "(V?)CVTSS2SDrm",
- "VPSLLVQrm",
- "VPSRLVQrm")>;
+def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm,
+ CVTSS2SDrm, VCVTSS2SDrm,
+ VPSLLVQrm,
+ VPSRLVQrm)>;
def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup60], (instregex "VCVTDQ2PDYrr",
- "VCVTPD2PSYrr",
- "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[BWWriteResGroup60], (instrs VCVTDQ2PDYrr,
+ VCVTPD2PSYrr,
+ VCVTPD2DQYrr,
+ VCVTTPD2DQYrr)>;
def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup62], (instregex "FARJMP64",
- "JMP(16|32|64)m")>;
-
-def BWWriteResGroup63 : SchedWriteRes<[BWPort23,BWPort06]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup63], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[BWWriteResGroup62], (instrs FARJMP64)>;
+def: InstRW<[BWWriteResGroup62], (instregex "JMP(16|32|64)m")>;
def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> {
let Latency = 6;
@@ -1027,9 +992,6 @@ def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> {
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup64], (instregex "ANDN(32|64)rm",
- "BLSI(32|64)rm",
- "BLSMSK(32|64)rm",
- "BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> {
@@ -1037,9 +999,9 @@ def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup65], (instregex "VINSERTF128rm",
- "VINSERTI128rm",
- "VPBLENDDrmi")>;
+def: InstRW<[BWWriteResGroup65], (instrs VINSERTF128rm,
+ VINSERTI128rm,
+ VPBLENDDrmi)>;
def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 6;
@@ -1061,15 +1023,9 @@ def BWWriteResGroup69 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[BWWriteResGroup69], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[BWWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def BWWriteResGroup70 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 6;
@@ -1091,8 +1047,8 @@ def BWWriteResGroup73 : SchedWriteRes<[BWPort0,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup73], (instregex "VPSLLVQYrm",
- "VPSRLVQYrm")>;
+def: InstRW<[BWWriteResGroup73], (instrs VPSLLVQYrm,
+ VPSRLVQYrm)>;
def BWWriteResGroup74 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 7;
@@ -1106,16 +1062,16 @@ def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup77], (instregex "VPBLENDDYrmi")>;
+def: InstRW<[BWWriteResGroup77], (instrs VPBLENDDYrmi)>;
def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 7;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm",
- "MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm")>;
+def: InstRW<[BWWriteResGroup79], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 7;
@@ -1144,10 +1100,8 @@ def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 7;
@@ -1161,8 +1115,8 @@ def BWWriteResGroup89 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
}
-def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m",
- "FARCALL64")>;
+def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[BWWriteResGroup89], (instrs FARCALL64)>;
def BWWriteResGroup90 : SchedWriteRes<[BWPort6,BWPort06,BWPort15,BWPort0156]> {
let Latency = 7;
@@ -1176,54 +1130,31 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm",
- "PDEP(32|64)rm",
- "PEXT(32|64)rm",
- "(V?)CVTDQ2PSrm")>;
-
-def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup91_16], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort06, BWPort0156, BWPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>;
+def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSirm,
+ CVTDQ2PSrm,
+ VCVTDQ2PSrm)>;
+def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>;
def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm",
- "VPMOVSXBQYrm",
- "VPMOVSXBWYrm",
- "VPMOVSXDQYrm",
- "VPMOVSXWDYrm",
- "VPMOVSXWQYrm",
- "VPMOVZXWDYrm")>;
+def: InstRW<[BWWriteResGroup92], (instrs VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVSXWQYrm,
+ VPMOVZXWDYrm)>;
def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[BWWriteResGroup97], (instregex "RCL(8|16|32|64)m1",
- "RCL(8|16|32|64)mi",
- "RCR(8|16|32|64)m1",
- "RCR(8|16|32|64)mi")>;
-
-def BWWriteResGroup98 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[BWWriteResGroup98], (instregex "ROR(8|16|32|64)mCL")>;
+def: InstRW<[BWWriteResGroup97], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
def BWWriteResGroup99 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 8;
@@ -1238,8 +1169,8 @@ def BWWriteResGroup100 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPo
let ResourceCycles = [1,1,1,2,1];
}
def : SchedAlias<WriteADCRMW, BWWriteResGroup100>;
-def: InstRW<[BWWriteResGroup100], (instregex "CMPXCHG(8|16|32|64)rm",
- "ROL(8|16|32|64)mCL",
+def: InstRW<[BWWriteResGroup100], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
@@ -1250,9 +1181,9 @@ def BWWriteResGroup101 : SchedWriteRes<[BWPort1,BWPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
- "ILD_F(16|32|64)m",
- "VCVTPS2DQYrm",
- "VCVTTPS2DQYrm")>;
+ "ILD_F(16|32|64)m")>;
+def: InstRW<[BWWriteResGroup101], (instrs VCVTPS2DQYrm,
+ VCVTTPS2DQYrm)>;
def BWWriteResGroup105 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 9;
@@ -1270,18 +1201,18 @@ def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup106], (instregex "VCVTPS2PDYrm")>;
+def: InstRW<[BWWriteResGroup106], (instrs VCVTPS2PDYrm)>;
def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup107], (instrs IMUL64m, MUL64m, MULX64rm)>;
-def: InstRW<[BWWriteResGroup107], (instregex "CVTPD2PSrm",
- "CVT(T?)PD2DQrm",
- "MMX_CVTPI2PDirm",
- "MMX_CVT(T?)PD2PIirm",
+def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPI2PDirm)>;
+def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIirm",
"(V?)CVTDQ2PDrm",
"(V?)CVTSD2SSrm")>;
@@ -1298,7 +1229,7 @@ def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
let NumMicroOps = 5;
let ResourceCycles = [1,1,3];
}
-def: InstRW<[BWWriteResGroup112], (instregex "RDRAND(16|32|64)r")>;
+def: InstRW<[BWWriteResGroup112], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
def BWWriteResGroup113 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> {
let Latency = 9;
@@ -1329,13 +1260,6 @@ def BWWriteResGroup120 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
}
def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>;
-def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 11;
let NumMicroOps = 1;
@@ -1348,15 +1272,15 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m",
- "VPCMPGTQYrm")>;
+def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m")>;
+def: InstRW<[BWWriteResGroup123], (instrs VPCMPGTQYrm)>;
def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[BWWriteResGroup128], (instrs VCVTDQ2PDYrm)>;
def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
let Latency = 11;
@@ -1371,7 +1295,7 @@ def BWWriteResGroup132 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> {
let NumMicroOps = 9;
let ResourceCycles = [1,4,1,3];
}
-def: InstRW<[BWWriteResGroup132], (instregex "RCL8rCL")>;
+def: InstRW<[BWWriteResGroup132], (instrs RCL8rCL)>;
def BWWriteResGroup133 : SchedWriteRes<[BWPort06,BWPort0156]> {
let Latency = 11;
@@ -1414,7 +1338,7 @@ def BWWriteResGroup145 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> {
let NumMicroOps = 10;
let ResourceCycles = [2,3,1,4];
}
-def: InstRW<[BWWriteResGroup145], (instregex "RCR8rCL")>;
+def: InstRW<[BWWriteResGroup145], (instrs RCR8rCL)>;
def BWWriteResGroup146 : SchedWriteRes<[BWPort0,BWPort1,BWPort6,BWPort0156]> {
let Latency = 14;
@@ -1451,10 +1375,10 @@ def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPo
}
def: InstRW<[BWWriteResGroup153], (instrs CMPXCHG8B)>;
-def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> {
- let Latency = 16;
- let NumMicroOps = 16;
- let ResourceCycles = [16];
+def BWWriteResGroup154 : SchedWriteRes<[BWPort5,BWPort6]> {
+ let Latency = 8;
+ let NumMicroOps = 20;
+ let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>;
@@ -1513,7 +1437,7 @@ def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let NumMicroOps = 18;
let ResourceCycles = [1,1,16];
}
-def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>;
+def: InstRW<[BWWriteResGroup172], (instrs POPF64)>;
def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let Latency = 23;
@@ -1608,13 +1532,6 @@ def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPor
def: InstRW<[BWWriteResGroup186], (instrs XSAVE)>;
def: InstRW<[BWWriteResGroup186], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
-def BWWriteResGroup190 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> {
- let Latency = 34;
- let NumMicroOps = 8;
- let ResourceCycles = [2,2,2,1,1];
-}
-def: InstRW<[BWWriteResGroup190], (instregex "DIV(8|16|32|64)m")>;
-
def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort0156]> {
let Latency = 34;
let NumMicroOps = 23;
@@ -1623,13 +1540,6 @@ def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort
def: InstRW<[BWWriteResGroup191], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
-def BWWriteResGroup193 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> {
- let Latency = 35;
- let NumMicroOps = 8;
- let ResourceCycles = [2,2,2,1,1];
-}
-def: InstRW<[BWWriteResGroup193], (instregex "IDIV(8|16|32|64)m")>;
-
def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
@@ -1673,13 +1583,6 @@ def BWWriteResGroup200 : SchedWriteRes<[BWPort5,BWPort01,BWPort0156]> {
}
def: InstRW<[BWWriteResGroup200], (instrs FNINIT)>;
-def BWWriteResGroup201 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156]> {
- let Latency = 80;
- let NumMicroOps = 32;
- let ResourceCycles = [7,7,3,3,1,11];
-}
-def: InstRW<[BWWriteResGroup201], (instregex "DIV(16|32|64)r")>;
-
def BWWriteResGroup202 : SchedWriteRes<[BWPort0,BWPort1,BWPort4,BWPort5,BWPort6,BWPort237,BWPort06,BWPort0156]> {
let Latency = 115;
let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
index 876c3e4162cf..06a32fb0b1cd 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -77,10 +77,16 @@ def HWDivider : ProcResource<1>;
// FP division and sqrt on port 0.
def HWFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -121,16 +127,32 @@ def : WriteRes<WriteZero, []>;
// Arithmetic.
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
defm : HWWriteResPair<WriteADC, [HWPort06, HWPort0156], 2, [1,1], 2>;
-defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
-defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>;
+
+// Integer multiplication.
+defm : HWWriteResPair<WriteIMul8, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul16, [HWPort1,HWPort06,HWPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [HWPort1,HWPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
+defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
+defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
+defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
-
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+defm : X86WriteRes<WriteCMPXCHG,[HWPort06, HWPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[HWPort23,HWPort06,HWPort0156,HWPort237,HWPort4], 9, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>;
// Integer shifts and rotates.
-defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
+defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
+defm : HWWriteResPair<WriteShiftCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
+defm : HWWriteResPair<WriteRotate, [HWPort06], 2, [2], 2>;
+defm : HWWriteResPair<WriteRotateCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [HWPort1], 3, [1], 1>;
@@ -149,8 +171,14 @@ def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [HWPort06]>;
-def : WriteRes<WriteBitTest,[HWPort06]>;
+
+defm : X86WriteRes<WriteLAHFSAHF, [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [HWPort06,HWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [], 1, [], 10>;
+defm : X86WriteRes<WriteBitTestSet, [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [HWPort06,HWPort23], 6, [1,1], 3>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [], 1, [], 11>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -164,18 +192,29 @@ defm : HWWriteResPair<WriteLZCNT, [HWPort1], 3>;
defm : HWWriteResPair<WriteTZCNT, [HWPort1], 3>;
defm : HWWriteResPair<WritePOPCNT, [HWPort1], 3>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : HWWriteResPair<WriteBEXTR, [HWPort06,HWPort15], 2, [1,1], 2>;
-defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
-
-defm : HWWriteResPair<WriteDiv8, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv8, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteBLS, [HWPort15], 1>;
+defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
+
+// TODO: Why isn't the HWDivider used?
+defm : X86WriteRes<WriteDiv8, [HWPort0,HWPort1,HWPort5,HWPort6], 22, [], 9>;
+defm : X86WriteRes<WriteDiv16, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv8Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv16Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8, [HWPort0,HWPort1,HWPort5,HWPort6], 23, [], 9>;
+defm : X86WriteRes<WriteIDiv16, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv16Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv32Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv64Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLD0, [HWPort01], 1, [1], 1>;
@@ -614,35 +653,12 @@ def : InstRW<[HWWritePopA], (instregex "POPA(16|32)")>;
//-- Arithmetic instructions --//
-// DIV.
-// r8.
-def HWWriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
- let Latency = 22;
- let NumMicroOps = 9;
-}
-def : InstRW<[HWWriteDiv8], (instregex "DIV8r")>;
-
-// IDIV.
-// r8.
-def HWWriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
- let Latency = 23;
- let NumMicroOps = 9;
-}
-def : InstRW<[HWWriteIDiv8], (instregex "IDIV8r")>;
-
-// BT.
-// m,r.
-def HWWriteBTmr : SchedWriteRes<[]> {
- let NumMicroOps = 10;
-}
-def : InstRW<[HWWriteBTmr], (instregex "BT(16|32|64)mr")>;
-
// BTR BTS BTC.
// m,r.
def HWWriteBTRSCmr : SchedWriteRes<[]> {
let NumMicroOps = 11;
}
-def : InstRW<[HWWriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestSetRegRMW, HWWriteBTRSCmr>;
//-- Control transfer instructions --//
@@ -704,14 +720,14 @@ def HWWriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> {
let NumMicroOps = 17;
let ResourceCycles = [1, 16];
}
-def : InstRW<[HWWriteRDRAND], (instregex "RDRAND(16|32|64)r")>;
+def : InstRW<[HWWriteRDRAND], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
//=== Floating Point x87 Instructions ===//
//-- Move instructions --//
// FLD.
// m80.
-def : InstRW<[HWWriteP01], (instregex "LD_Frr")>;
+def : InstRW<[HWWriteP01], (instrs LD_Frr)>;
// FBLD.
// m80.
@@ -719,7 +735,7 @@ def HWWriteFBLD : SchedWriteRes<[]> {
let Latency = 47;
let NumMicroOps = 43;
}
-def : InstRW<[HWWriteFBLD], (instregex "FBLDm")>;
+def : InstRW<[HWWriteFBLD], (instrs FBLDm)>;
// FST(P).
// r.
@@ -732,13 +748,13 @@ def : InstRW<[HWWriteP01], (instregex "FFREE")>;
def HWWriteFNSAVE : SchedWriteRes<[]> {
let NumMicroOps = 147;
}
-def : InstRW<[HWWriteFNSAVE], (instregex "FSAVEm")>;
+def : InstRW<[HWWriteFNSAVE], (instrs FSAVEm)>;
// FRSTOR.
def HWWriteFRSTOR : SchedWriteRes<[]> {
let NumMicroOps = 90;
}
-def : InstRW<[HWWriteFRSTOR], (instregex "FRSTORm")>;
+def : InstRW<[HWWriteFRSTOR], (instrs FRSTORm)>;
//-- Arithmetic instructions --//
@@ -812,8 +828,8 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm",
- "(V?)MOVSHDUPrm",
+def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>;
+def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm",
"(V?)MOVSLDUPrm",
"VPBROADCAST(D|Q)rm")>;
@@ -822,14 +838,14 @@ def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
+def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm)>;
def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m",
- "VBROADCASTF128",
- "VBROADCASTI128",
- "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm",
"VPBROADCAST(D|Q)Yrm")>;
def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
@@ -837,11 +853,8 @@ def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm16",
- "MOVSX(16|32|64)rm32",
- "MOVSX(16|32|64)rm8",
- "MOVZX(16|32|64)rm16",
- "MOVZX(16|32|64)rm8",
+def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)",
"(V?)MOVDDUPrm")>;
def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
@@ -849,9 +862,8 @@ def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
- "ST_FP(32|64|80)m",
- "VMPTRSTm")>;
+def: InstRW<[HWWriteResGroup1], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[HWWriteResGroup1], (instregex "ST_FP(32|64|80)m")>;
def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> {
let Latency = 1;
@@ -874,7 +886,7 @@ def HWWriteResGroup4 : SchedWriteRes<[HWPort5]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[HWWriteResGroup4], (instrs MMX_MOVQ2DQrr)>;
def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> {
let Latency = 1;
@@ -902,10 +914,7 @@ def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr",
- "BLSI(32|64)rr",
- "BLSMSK(32|64)rr",
- "BLSR(32|64)rr")>;
+def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def HWWriteResGroup9 : SchedWriteRes<[HWPort015]> {
let Latency = 1;
@@ -920,12 +929,12 @@ def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> {
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup10], (instrs CBW, CWDE, CDQE,
- CMC, STC)>;
-def: InstRW<[HWWriteResGroup10], (instregex "SGDT64m",
- "SIDT64m",
- "SMSW16m",
- "STRm",
- "SYSCALL")>;
+ CMC, STC,
+ SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
def HWWriteResGroup11 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 6;
@@ -939,40 +948,23 @@ def HWWriteResGroup11_1 : SchedWriteRes<[HWPort0,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm",
- "VPSLLVQrm",
- "VPSRLVQrm")>;
+def: InstRW<[HWWriteResGroup11_1], (instrs VPSLLVQrm, VPSRLVQrm)>;
+def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm")>;
def HWWriteResGroup11_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLVQYrm",
- "VPSRLVQYrm")>;
+def: InstRW<[HWWriteResGroup11_2], (instrs VPSLLVQYrm, VPSRLVQYrm)>;
def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPI2PSirm",
- "PDEP(32|64)rm",
- "PEXT(32|64)rm")>;
-
-def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup12_1], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def HWWriteResGroup12_2 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>;
+def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSirm)>;
+def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 6;
@@ -991,24 +983,17 @@ def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup13_1], (instregex "VPMOVSXBDYrm",
- "VPMOVSXBQYrm",
- "VPMOVSXWQYrm")>;
+def: InstRW<[HWWriteResGroup13_1], (instrs VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXWQYrm)>;
def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup14], (instregex "FARJMP64",
- "JMP(16|32|64)m")>;
-
-def HWWriteResGroup15 : SchedWriteRes<[HWPort23,HWPort06]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup15], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[HWWriteResGroup14], (instrs FARJMP64)>;
+def: InstRW<[HWWriteResGroup14], (instregex "JMP(16|32|64)m")>;
def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> {
let Latency = 6;
@@ -1016,9 +1001,6 @@ def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> {
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm",
- "BLSI(32|64)rm",
- "BLSMSK(32|64)rm",
- "BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
@@ -1026,16 +1008,16 @@ def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup17], (instregex "VINSERTF128rm",
- "VINSERTI128rm",
- "VPBLENDDrmi")>;
+def: InstRW<[HWWriteResGroup17], (instrs VINSERTF128rm,
+ VINSERTI128rm,
+ VPBLENDDrmi)>;
def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup17_2], (instregex "VPBLENDDYrmi")>;
+def: InstRW<[HWWriteResGroup17_2], (instrs VPBLENDDYrmi)>;
def HWWriteResGroup18 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 6;
@@ -1078,25 +1060,18 @@ def HWWriteResGroup24 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup24], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[HWWriteResGroup24], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)rmr",
- "PUSH64i8")>;
+def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)rmr")>;
def HWWriteResGroup25 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
let Latency = 7;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[HWWriteResGroup25], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[HWWriteResGroup25], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def HWWriteResGroup26 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 7;
@@ -1113,16 +1088,6 @@ def HWWriteResGroup28 : SchedWriteRes<[HWPort01]> {
}
def: InstRW<[HWWriteResGroup28], (instrs FDECSTP)>;
-def HWWriteResGroup29 : SchedWriteRes<[HWPort06]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[HWWriteResGroup29], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri")>;
-
def HWWriteResGroup30 : SchedWriteRes<[HWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -1153,7 +1118,7 @@ def HWWriteResGroup33 : SchedWriteRes<[HWPort01,HWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup33], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[HWWriteResGroup33], (instrs MMX_MOVDQ2Qrr)>;
def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 2;
@@ -1168,9 +1133,9 @@ def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup36_2], (instregex "MMX_PACKSSDWirm",
- "MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm")>;
+def: InstRW<[HWWriteResGroup36_2], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
def HWWriteResGroup37 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 7;
@@ -1214,10 +1179,8 @@ def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def HWWriteResGroup47 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 8;
@@ -1231,26 +1194,18 @@ def HWWriteResGroup48 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
}
-def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m",
- "FARCALL64")>;
+def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[HWWriteResGroup48], (instrs FARCALL64)>;
def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr",
- "PDEP(32|64)rr",
- "PEXT(32|64)rr",
+def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr",
"(V?)CVTDQ2PS(Y?)rr")>;
-def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup50_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -1272,38 +1227,29 @@ def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
- "ILD_F(16|32|64)m",
- "VCVTDQ2PSYrm",
- "VCVTPS2DQYrm",
- "VCVTTPS2DQYrm")>;
+ "ILD_F(16|32|64)m")>;
+def: InstRW<[HWWriteResGroup52_1], (instrs VCVTDQ2PSYrm,
+ VCVTPS2DQYrm,
+ VCVTTPS2DQYrm)>;
def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup53_1], (instregex "VPMOVSXBWYrm",
- "VPMOVSXDQYrm",
- "VPMOVSXWDYrm",
- "VPMOVZXWDYrm")>;
-
-def HWWriteResGroup54 : SchedWriteRes<[HWPort0156]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[HWWriteResGroup54], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[HWWriteResGroup53_1], (instrs VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVZXWDYrm)>;
def HWWriteResGroup57 : SchedWriteRes<[HWPort5,HWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup57], (instregex "MMX_PACKSSDWirr",
- "MMX_PACKSSWBirr",
- "MMX_PACKUSWBirr")>;
+def: InstRW<[HWWriteResGroup57], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
def HWWriteResGroup58 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 3;
@@ -1317,21 +1263,8 @@ def HWWriteResGroup59 : SchedWriteRes<[HWPort06,HWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[HWWriteResGroup59], (instregex "RCL(8|16|32|64)r1",
- "RCL(8|16|32|64)ri",
- "RCR(8|16|32|64)r1",
- "RCR(8|16|32|64)ri")>;
-
-def HWWriteResGroup60 : SchedWriteRes<[HWPort06,HWPort0156]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup60], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
+def: InstRW<[HWWriteResGroup59], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
def HWWriteResGroup61 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> {
let Latency = 4;
@@ -1353,17 +1286,8 @@ def HWWriteResGroup66 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]>
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[HWWriteResGroup66], (instregex "RCL(8|16|32|64)m1",
- "RCL(8|16|32|64)mi",
- "RCR(8|16|32|64)m1",
- "RCR(8|16|32|64)mi")>;
-
-def HWWriteResGroup67 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[HWWriteResGroup67], (instregex "ROR(8|16|32|64)mCL")>;
+def: InstRW<[HWWriteResGroup66], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
def HWWriteResGroup68 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 9;
@@ -1377,8 +1301,8 @@ def HWWriteResGroup69 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPor
let NumMicroOps = 6;
let ResourceCycles = [1,1,1,2,1];
}
-def: InstRW<[HWWriteResGroup69], (instregex "CMPXCHG(8|16|32|64)rm",
- "ROL(8|16|32|64)mCL",
+def: InstRW<[HWWriteResGroup69], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
@@ -1397,7 +1321,7 @@ def HWWriteResGroup71 : SchedWriteRes<[HWPort0,HWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup71], (instregex "VCVTPS2PDYrr")>;
+def: InstRW<[HWWriteResGroup71], (instrs VCVTPS2PDYrr)>;
def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> {
let Latency = 4;
@@ -1411,30 +1335,18 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup73], (instregex "MMX_CVTPI2PDirr",
- "MMX_CVT(T?)PD2PIirr",
- "MMX_CVT(T?)PS2PIirr",
- "(V?)CVTDQ2PDrr",
+def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPI2PDirr,
+ MMX_CVTPD2PIirr,
+ MMX_CVTPS2PIirr,
+ MMX_CVTTPD2PIirr,
+ MMX_CVTTPS2PIirr)>;
+def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI(64)?2SDrr",
"(V?)CVTSI2SSrr",
"(V?)CVT(T?)PD2DQrr")>;
-def HWWriteResGroup74 : SchedWriteRes<[HWPort1,HWPort6]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup74], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def HWWriteResGroup74_16 : SchedWriteRes<[HWPort1, HWPort06, HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[HWWriteResGroup74_16], (instrs IMUL16r, MUL16r)>;
-
def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
@@ -1458,32 +1370,29 @@ def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup77], (instregex "VCVTPS2PDYrm")>;
+def: InstRW<[HWWriteResGroup77], (instrs VCVTPS2PDYrm)>;
def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 10;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup78], (instregex "CVTPD2PSrm",
- "CVT(T?)PD2DQrm",
- "MMX_CVT(T?)PD2PIirm",
- "(V?)CVTDQ2PDrm")>;
+def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTTPD2PIirm,
+ CVTDQ2PDrm,
+ VCVTDQ2PDrm)>;
def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup78_1], (instregex "MMX_CVTPI2PDirm",
- "(V?)CVTSD2SSrm")>;
-
-def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup79], (instrs IMUL64m, MUL64m, MULX64rm)>;
+def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDirm,
+ CVTSD2SSrm,
+ VCVTSD2SSrm)>;
def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
let Latency = 9;
@@ -1499,10 +1408,10 @@ def HWWriteResGroup81 : SchedWriteRes<[HWPort0156]> {
}
def: InstRW<[HWWriteResGroup81], (instrs FNCLEX)>;
-def HWWriteResGroup82 : SchedWriteRes<[HWPort015,HWPort0156]> {
- let Latency = 4;
+def HWWriteResGroup82 : SchedWriteRes<[]> {
+ let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ResourceCycles = [];
}
def: InstRW<[HWWriteResGroup82], (instrs VZEROUPPER)>;
@@ -1548,8 +1457,8 @@ def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m",
- "VPCMPGTQYrm")>;
+def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m")>;
+def: InstRW<[HWWriteResGroup91_3], (instrs VPCMPGTQYrm)>;
def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> {
let Latency = 5;
@@ -1565,13 +1474,6 @@ def HWWriteResGroup94 : SchedWriteRes<[HWPort1,HWPort6,HWPort06]> {
}
def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>;
-def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
let Latency = 10;
let NumMicroOps = 4;
@@ -1579,13 +1481,6 @@ def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
}
def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>;
-def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort23,HWPort06,HWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup98], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -1600,21 +1495,15 @@ def HWWriteResGroup100 : SchedWriteRes<[HWPort06,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>;
-def HWWriteResGroup101 : SchedWriteRes<[HWPort06,HWPort0156]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[HWWriteResGroup101], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup102], (instregex "VCVTDQ2PDYrr",
- "VCVTPD2PSYrr",
- "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[HWWriteResGroup102], (instrs VCVTDQ2PDYrr,
+ VCVTPD2PSYrr,
+ VCVTPD2DQYrr,
+ VCVTTPD2DQYrr)>;
def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 13;
@@ -1628,7 +1517,7 @@ def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup104], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[HWWriteResGroup104], (instrs VCVTDQ2PDYrm)>;
def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
let Latency = 6;
@@ -1678,7 +1567,7 @@ def HWWriteResGroup130 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
let NumMicroOps = 9;
let ResourceCycles = [1,4,1,3];
}
-def: InstRW<[HWWriteResGroup130], (instregex "RCL8rCL")>;
+def: InstRW<[HWWriteResGroup130], (instrs RCL8rCL)>;
def HWWriteResGroup131 : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 11;
@@ -1706,14 +1595,14 @@ def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
let NumMicroOps = 10;
let ResourceCycles = [2,3,1,4];
}
-def: InstRW<[HWWriteResGroup142], (instregex "RCR8rCL")>;
+def: InstRW<[HWWriteResGroup142], (instrs RCR8rCL)>;
def HWWriteResGroup143 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 19;
let NumMicroOps = 15;
let ResourceCycles = [1,14];
}
-def: InstRW<[HWWriteResGroup143], (instregex "POPF16")>;
+def: InstRW<[HWWriteResGroup143], (instrs POPF16)>;
def HWWriteResGroup144 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort06,HWPort0156]> {
let Latency = 21;
@@ -1722,10 +1611,10 @@ def HWWriteResGroup144 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort2
}
def: InstRW<[HWWriteResGroup144], (instrs INSB, INSL, INSW)>;
-def HWWriteResGroup145 : SchedWriteRes<[HWPort5]> {
- let Latency = 16;
- let NumMicroOps = 16;
- let ResourceCycles = [16];
+def HWWriteResGroup145 : SchedWriteRes<[HWPort5, HWPort6]> {
+ let Latency = 8;
+ let NumMicroOps = 20;
+ let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup145], (instrs VZEROALL)>;
@@ -1879,20 +1768,6 @@ def HWWriteResGroup180 : SchedWriteRes<[HWPort5,HWPort01,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup180], (instrs FNINIT)>;
-def HWWriteResGroup181 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156]> {
- let Latency = 98;
- let NumMicroOps = 32;
- let ResourceCycles = [7,7,3,3,1,11];
-}
-def: InstRW<[HWWriteResGroup181], (instregex "DIV(16|32|64)r")>;
-
-def HWWriteResGroup182 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156]> {
- let Latency = 112;
- let NumMicroOps = 66;
- let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[HWWriteResGroup182], (instregex "IDIV(16|32|64)r")>;
-
def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,HWPort237,HWPort06,HWPort0156]> {
let Latency = 115;
let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedPredicates.td b/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
index 27aaeb193583..1c7f24375f61 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
@@ -19,11 +19,16 @@
// different zero-idioms.
def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
-// A predicate used to check if an instruction is a LEA, and if it uses all
-// three source operands: base, index, and offset.
-def IsThreeOperandsLEAPredicate: CheckAll<[
- CheckOpcode<[LEA32r, LEA64r, LEA64_32r, LEA16r]>,
+// A predicate used to identify VPERM that have bits 3 and 7 of their mask set.
+// On some processors, these VPERM instructions are zero-idioms.
+def ZeroIdiomVPERMPredicate : CheckAll<[
+ ZeroIdiomPredicate,
+ CheckImmOperand<3, 0x88>
+]>;
+// A predicate used to check if a LEA instruction uses all three source
+// operands: base, index, and offset.
+def IsThreeOperandsLEAPredicate: CheckAll<[
// isRegOperand(Base)
CheckIsRegOperand<1>,
CheckNot<CheckInvalidRegOperand<1>>,
@@ -42,8 +47,17 @@ def IsThreeOperandsLEAPredicate: CheckAll<[
]>
]>;
+def LEACases : MCOpcodeSwitchCase<
+ [LEA32r, LEA64r, LEA64_32r, LEA16r],
+ MCReturnStatement<IsThreeOperandsLEAPredicate>
+>;
+
+// Used to generate the body of a TII member function.
+def IsThreeOperandsLEABody :
+ MCOpcodeSwitchStatement<[LEACases], MCReturnStatement<FalsePred>>;
+
// This predicate evaluates to true only if the input machine instruction is a
// 3-operands LEA. Tablegen automatically generates a new method for it in
// X86GenInstrInfo.
def IsThreeOperandsLEAFn :
- TIIPredicate<"X86", "isThreeOperandsLEA", IsThreeOperandsLEAPredicate>;
+ TIIPredicate<"isThreeOperandsLEA", IsThreeOperandsLEABody>;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 6b7bbdea860a..9dbf0976989f 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -67,10 +67,16 @@ def SBDivider : ProcResource<1>;
// FP division and sqrt on port 0.
def SBFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -109,11 +115,25 @@ def : WriteRes<WriteZero, []>;
// Arithmetic.
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>;
-defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
-defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul8, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul16, [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
+defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
+defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
+defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
-defm : X86WriteRes<WriteBSWAP64, [SBPort1,SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP64, [SBPort1, SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCMPXCHG, [SBPort05, SBPort015], 5, [1,3], 4>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SBPort015, SBPort5, SBPort23, SBPort4], 8, [1, 2, 2, 1], 6>;
defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
@@ -124,15 +144,17 @@ defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
-
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
-defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
+defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
+defm : SBWriteResPair<WriteShiftCL, [SBPort05], 3, [3], 3>;
+defm : SBWriteResPair<WriteRotate, [SBPort05], 2, [2], 2>;
+defm : SBWriteResPair<WriteRotateCL, [SBPort05], 3, [3], 3>;
+
defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
@@ -144,8 +166,14 @@ def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [SBPort05]>;
-def : WriteRes<WriteBitTest,[SBPort05]>;
+
+defm : X86WriteRes<WriteLAHFSAHF, [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SBPort05,SBPort23], 6, [1,1], 2>;
+//defm : X86WriteRes<WriteBitTestRegLd, [SBPort05,SBPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SBPort05,SBPort23], 6, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SBPort05,SBPort23,SBPort5,SBPort015], 8, [1,1,1,1], 5>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -159,10 +187,11 @@ defm : SBWriteResPair<WriteLZCNT, [SBPort1], 3, [1], 1, 5>;
defm : SBWriteResPair<WriteTZCNT, [SBPort1], 3, [1], 1, 5>;
defm : SBWriteResPair<WritePOPCNT, [SBPort1], 3, [1], 1, 6>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
// NOTE: These don't exist on Sandy Bridge. Ports are guesses.
defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>;
-defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
+defm : SBWriteResPair<WriteBLS, [SBPort015], 1>;
+defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLD0, [SBPort5], 1, [1], 1>;
@@ -577,21 +606,21 @@ def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
- "MMX_PADDQirr",
- "MMX_PALIGNRrri",
- "MMX_PSIGN(B|D|W)rr")>;
+def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
+ MMX_PABSDrr,
+ MMX_PABSWrr,
+ MMX_PADDQirr,
+ MMX_PALIGNRrri,
+ MMX_PSIGNBrr,
+ MMX_PSIGNDrr,
+ MMX_PSIGNWrr)>;
def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SBWriteResGroup9], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri",
- "SET(A|BE)r")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SET(A|BE)r")>;
def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
let Latency = 2;
@@ -608,10 +637,7 @@ def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup12], (instregex "(V?)COMISDrr",
- "(V?)COMISSrr",
- "(V?)UCOMISDrr",
- "(V?)UCOMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "(V?)(U?)COMI(SD|SS)rr")>;
def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
let Latency = 2;
@@ -626,22 +652,15 @@ def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ,
+ MMX_MOVDQ2Qrr)>;
def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup21], (instregex "PUSHFS64")>;
-
-def SBWriteResGroup21_16i : SchedWriteRes<[SBPort1, SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup21_16i], (instrs IMUL16rri, IMUL16rri8)>;
+def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>;
def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
let Latency = 3;
@@ -650,25 +669,13 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
}
def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
-def SBWriteResGroup23_2 : SchedWriteRes<[SBPort05]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SBWriteResGroup23_2], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
-
-def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> {
+def SBWriteResGroup23 : SchedWriteRes<[SBPort05]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
-def: InstRW<[SBWriteResGroup25], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[SBWriteResGroup23], (instregex "RCL(8|16|32|64)r1",
+ "RCR(8|16|32|64)r1")>;
def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 7;
@@ -684,33 +691,12 @@ def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
}
def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
-def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup27], (instrs IMUL64r, MUL64r)>;
-
-def SBWriteResGroup27_1 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup27_1], (instrs IMUL32r, MUL32r)>;
-
-def SBWriteResGroup27_2 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup27_2], (instrs IMUL16r, MUL16r)>;
-
def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
let Latency = 4;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
+def: InstRW<[SBWriteResGroup29], (instrs MOV64sr)>;
def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
let Latency = 4;
@@ -724,7 +710,6 @@ def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup30], (instregex "(V?)PCMPGTQrr")>;
def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
let Latency = 5;
@@ -734,6 +719,14 @@ def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm(8|16|32)",
"MOVZX(16|32|64)rm(8|16)")>;
+def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+ let ResourceCycles = [8];
+}
+def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)r(i|CL)",
+ "RCR(8|16|32|64)r(i|CL)")>;
+
def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
let Latency = 5;
let NumMicroOps = 2;
@@ -753,8 +746,8 @@ def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m",
- "PUSHGS64")>;
+def: InstRW<[SBWriteResGroup35_2], (instrs PUSHGS64)>;
+def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>;
def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
let Latency = 5;
@@ -779,13 +772,6 @@ def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
}
def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
-def SBWriteResGroup42 : SchedWriteRes<[SBPort05,SBPort015]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let Latency = 3;
let NumMicroOps = 4;
@@ -820,9 +806,9 @@ def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm",
- "POP(16|32|64)r",
- "VBROADCASTSSrm",
+def: InstRW<[SBWriteResGroup48], (instrs MMX_MOVD64from64rm,
+ VBROADCASTSSrm)>;
+def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
"(V?)MOV64toPQIrm",
"(V?)MOVDDUPrm",
"(V?)MOVDI2PDIrm",
@@ -837,23 +823,20 @@ def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup49], (instregex "MOV16sm")>;
-
-def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort05]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup50], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup49], (instrs MOV16sm)>;
def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABS(B|D|W)rm",
- "MMX_PALIGNRrmi",
- "MMX_PSIGN(B|D|W)rm")>;
+def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm,
+ MMX_PABSDrm,
+ MMX_PABSWrm,
+ MMX_PALIGNRrmi,
+ MMX_PSIGNBrm,
+ MMX_PSIGNDrm,
+ MMX_PSIGNWrm)>;
def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 6;
@@ -875,11 +858,11 @@ def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instrs VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm)>;
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 7;
@@ -893,14 +876,14 @@ def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
+def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQirm)>;
def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 7;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SBWriteResGroup62], (instregex "VER(R|W)m")>;
+def: InstRW<[SBWriteResGroup62], (instrs VERRm, VERWm)>;
def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 7;
@@ -944,15 +927,9 @@ def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
}
-def: InstRW<[SBWriteResGroup69], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 8;
@@ -961,12 +938,12 @@ def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
-def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2, 1];
}
-def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>;
def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 8;
@@ -990,10 +967,8 @@ def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let NumMicroOps = 5;
let ResourceCycles = [1,2,2];
}
-def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 8;
@@ -1015,36 +990,7 @@ def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)SD2SI(64)?rm",
- "CVT(T?)SS2SI(64)?rm")>;
-
-def SBWriteResGroup93_1 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_1], (instrs IMUL64m, MUL64m)>;
-
-def SBWriteResGroup93_2 : SchedWriteRes<[SBPort1,SBPort23,SBPort05,SBPort015]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_2], (instrs IMUL32m, MUL32m)>;
-
-def SBWriteResGroup93_3 : SchedWriteRes<[SBPort1,SBPort05,SBPort015,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SBWriteResGroup93_3], (instrs IMUL16m, MUL16m)>;
-
-def SBWriteResGroup93_4 : SchedWriteRes<[SBPort1,SBPort015,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_4], (instrs IMUL16rmi, IMUL16rmi8)>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)(SD|SS)2SI(64)?rm")>;
def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
let Latency = 9;
@@ -1092,10 +1038,7 @@ def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort
let NumMicroOps = 6;
let ResourceCycles = [1,1,2,1,1];
}
-def: InstRW<[SBWriteResGroup100], (instregex "BT(16|32|64)mr",
- "BTC(16|32|64)mr",
- "BTR(16|32|64)mr",
- "BTS(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestRegLd, SBWriteResGroup100>; // TODO - this is incorrect - no RMW
def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
let Latency = 10;
@@ -1119,6 +1062,14 @@ def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup106], (instregex "FICOM(P?)(16|32)m")>;
+def SBWriteResGroup108 : SchedWriteRes<[SBPort05,SBPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 11;
+ let ResourceCycles = [7,4];
+}
+def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m",
+ "RCR(8|16|32|64)m")>;
+
def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 12;
let NumMicroOps = 2;
@@ -1154,6 +1105,71 @@ def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
+def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> {
+ let Latency = 9;
+ let NumMicroOps = 20;
+ let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>;
+
+def SBWriteResGroupVzeroupper : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 4;
+ let ResourceCycles = [];
+}
+def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
+
def: InstRW<[WriteZero], (instrs CLC)>;
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SBWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def SBWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[SBWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def SBWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+ VXORPDrr)>;
+
+def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def SBWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+ PSUBDrr, VPSUBDrr,
+ PSUBQrr, VPSUBQrr,
+ PSUBWrr, VPSUBWrr,
+ PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [SBWriteResGroup30]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
+
} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index bda088e1512f..2c9eb7516085 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -71,10 +71,16 @@ def SKLPortAny : ProcResGroup<[SKLPort0, SKLPort1, SKLPort2, SKLPort3, SKLPort4,
let BufferSize=60;
}
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -107,24 +113,47 @@ def : WriteRes<WriteRMW, [SKLPort237,SKLPort4]>;
// Arithmetic.
defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
defm : SKLWriteResPair<WriteADC, [SKLPort06], 1>; // Integer ALU + flags op.
-defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
-defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : SKLWriteResPair<WriteIMul8, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul16, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SKLPort1,SKLPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
+defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
-
-defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteCMPXCHG,[SKLPort06, SKLPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SKLPort23,SKLPort06,SKLPort0156,SKLPort237,SKLPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [SKLPort0156], 2, [3], 3>;
+
+// TODO: Why isn't the SKLDivider used?
+defm : SKLWriteResPair<WriteDiv8, [SKLPort0,SKLDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteDiv16, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv16Ld, [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld, [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld, [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8, [SKLPort0,SKLDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
defm : SKLWriteResPair<WriteCMOV, [SKLPort06], 1, [1], 1>; // Conditional move.
@@ -135,8 +164,14 @@ def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
-def : WriteRes<WriteBitTest,[SKLPort06]>; //
+
+defm : X86WriteRes<WriteLAHFSAHF, [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SKLPort06,SKLPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [SKLPort0156,SKLPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SKLPort06,SKLPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SKLPort0156,SKLPort23], 5, [1,1], 2>;
// Bit counts.
defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
@@ -146,7 +181,10 @@ defm : SKLWriteResPair<WriteTZCNT, [SKLPort1], 3>;
defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>;
// Integer shifts and rotates.
-defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
+defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
+defm : SKLWriteResPair<WriteShiftCL, [SKLPort06], 3, [3], 3>;
+defm : SKLWriteResPair<WriteRotate, [SKLPort06], 2, [2], 2>;
+defm : SKLWriteResPair<WriteRotateCL, [SKLPort06], 3, [3], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SKLPort1], 3, [1], 1>;
@@ -154,9 +192,10 @@ defm : X86WriteRes<WriteSHDrrcl,[SKLPort1,SKLPort06,SKLPort0156], 6, [1, 2, 1],
defm : X86WriteRes<WriteSHDmri, [SKLPort1,SKLPort23,SKLPort237,SKLPort0156], 9, [1, 1, 1, 1], 4>;
defm : X86WriteRes<WriteSHDmrcl,[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156], 11, [1, 1, 1, 2, 1], 6>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
-defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
+defm : SKLWriteResPair<WriteBLS, [SKLPort15], 1>;
+defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
defm : X86WriteRes<WriteLoad, [SKLPort23], 5, [1], 1>;
@@ -612,10 +651,7 @@ def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr",
- "BLSI(32|64)rr",
- "BLSMSK(32|64)rr",
- "BLSR(32|64)rr")>;
+def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> {
let Latency = 1;
@@ -632,47 +668,42 @@ def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup10], (instrs CBW, CWDE, CDQE,
- CMC, STC)>;
-def: InstRW<[SKLWriteResGroup10], (instregex "SGDT64m",
- "SIDT64m",
- "SMSW16m",
- "STRm",
- "SYSCALL")>;
+ CMC, STC,
+ SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> {
let Latency = 1;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
- "ST_FP(32|64|80)m",
- "VMPTRSTm")>;
+def: InstRW<[SKLWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP(32|64|80)m")>;
def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SKLWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP)>;
-def: InstRW<[SKLWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP,
+ MMX_MOVDQ2Qrr)>;
def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup15], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri",
- "SET(A|BE)r")>;
+def: InstRW<[SKLWriteResGroup15], (instregex "SET(A|BE)r")>;
def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> {
let Latency = 2;
@@ -702,11 +733,10 @@ def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup23], (instrs CWD)>;
-def: InstRW<[SKLWriteResGroup23], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SKLWriteResGroup23], (instregex "ADC8i8",
- "ADC8ri",
- "SBB8i8",
+def: InstRW<[SKLWriteResGroup23], (instrs CWD,
+ JCXZ, JECXZ, JRCXZ,
+ ADC8i8, SBB8i8)>;
+def: InstRW<[SKLWriteResGroup23], (instregex "ADC8ri",
"SBB8ri")>;
def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
@@ -728,10 +758,9 @@ def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[SKLWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr",
- "PUSH64i8")>;
+def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
let Latency = 3;
@@ -741,21 +770,13 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
-def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup29_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
- "VPBROADCASTBrr",
- "VPBROADCASTWrr",
+ "VPBROADCAST(B|W)rr",
"(V?)PCMPGTQ(Y?)rr")>;
def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
@@ -765,26 +786,6 @@ def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup32], (instrs FNSTSW16r)>;
-def SKLWriteResGroup33 : SchedWriteRes<[SKLPort06]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SKLWriteResGroup33], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
-
-def SKLWriteResGroup34 : SchedWriteRes<[SKLPort0156]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SKLWriteResGroup34], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
-
def SKLWriteResGroup35 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -805,9 +806,9 @@ def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKSSDWirr",
- "MMX_PACKSSWBirr",
- "MMX_PACKUSWBirr")>;
+def: InstRW<[SKLWriteResGroup39], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
def SKLWriteResGroup40 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 3;
@@ -828,10 +829,8 @@ def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[SKLWriteResGroup42], (instregex "RCL(8|16|32|64)r1",
- "RCL(8|16|32|64)ri",
- "RCR(8|16|32|64)r1",
- "RCR(8|16|32|64)ri")>;
+def: InstRW<[SKLWriteResGroup42], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> {
let Latency = 3;
@@ -876,20 +875,6 @@ def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> {
def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr",
"(V?)CVT(T?)PS2DQ(Y?)rr")>;
-def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup51], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKLWriteResGroup51_16], (instrs IMUL16r, MUL16r)>;
-
def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -912,10 +897,10 @@ def SKLWriteResGroup55 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup55], (instrs PAUSE)>;
-def SKLWriteResGroup56 : SchedWriteRes<[SKLPort015,SKLPort0156]> {
- let Latency = 4;
+def SKLWriteResGroup56 : SchedWriteRes<[]> {
+ let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ResourceCycles = [];
}
def: InstRW<[SKLWriteResGroup56], (instrs VZEROUPPER)>;
@@ -931,11 +916,8 @@ def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm16",
- "MOVSX(16|32|64)rm32",
- "MOVSX(16|32|64)rm8",
- "MOVZX(16|32|64)rm16",
- "MOVZX(16|32|64)rm8",
+def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)",
"(V?)MOVDDUPrm")>; // TODO: Should this be SKLWriteResGroup67?
def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
@@ -943,8 +925,9 @@ def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup59], (instregex "MMX_CVTPI2PDirr",
- "(V?)CVTDQ2PDrr")>;
+def: InstRW<[SKLWriteResGroup59], (instrs MMX_CVTPI2PDirr,
+ CVTDQ2PDrr,
+ VCVTDQ2PDrr)>;
def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> {
let Latency = 5;
@@ -969,13 +952,6 @@ def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> {
}
def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>;
-def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -983,13 +959,6 @@ def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup63], (instrs XSETBV)>;
-def SKLWriteResGroup64 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[SKLWriteResGroup64], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def SKLWriteResGroup65 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
@@ -1002,44 +971,44 @@ def SKLWriteResGroup67 : SchedWriteRes<[SKLPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup67], (instregex "VBROADCASTSSrm",
- "(V?)MOVSHDUPrm",
- "(V?)MOVSLDUPrm",
- "VPBROADCASTDrm",
- "VPBROADCASTQrm")>;
+def: InstRW<[SKLWriteResGroup67], (instrs VBROADCASTSSrm,
+ VPBROADCASTDrm,
+ VPBROADCASTQrm)>;
+def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
+ "(V?)MOVSLDUPrm")>;
def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup68], (instregex "MMX_CVTPI2PSirr")>;
+def: InstRW<[SKLWriteResGroup68], (instrs MMX_CVTPI2PSirr)>;
def SKLWriteResGroup69 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDSBirm",
- "MMX_PADDSWirm",
- "MMX_PADDUSBirm",
- "MMX_PADDUSWirm",
- "MMX_PAVGBirm",
- "MMX_PAVGWirm",
- "MMX_PCMPEQBirm",
- "MMX_PCMPEQDirm",
- "MMX_PCMPEQWirm",
- "MMX_PCMPGTBirm",
- "MMX_PCMPGTDirm",
- "MMX_PCMPGTWirm",
- "MMX_PMAXSWirm",
- "MMX_PMAXUBirm",
- "MMX_PMINSWirm",
- "MMX_PMINUBirm",
- "MMX_PSUBSBirm",
- "MMX_PSUBSWirm",
- "MMX_PSUBUSBirm",
- "MMX_PSUBUSWirm")>;
+def: InstRW<[SKLWriteResGroup69], (instrs MMX_PADDSBirm,
+ MMX_PADDSWirm,
+ MMX_PADDUSBirm,
+ MMX_PADDUSWirm,
+ MMX_PAVGBirm,
+ MMX_PAVGWirm,
+ MMX_PCMPEQBirm,
+ MMX_PCMPEQDirm,
+ MMX_PCMPEQWirm,
+ MMX_PCMPGTBirm,
+ MMX_PCMPGTDirm,
+ MMX_PCMPGTWirm,
+ MMX_PMAXSWirm,
+ MMX_PMAXUBirm,
+ MMX_PMINSWirm,
+ MMX_PMINUBirm,
+ MMX_PSUBSBirm,
+ MMX_PSUBSWirm,
+ MMX_PSUBUSBirm,
+ MMX_PSUBUSWirm)>;
def SKLWriteResGroup70 : SchedWriteRes<[SKLPort0,SKLPort01]> {
let Latency = 6;
@@ -1054,15 +1023,8 @@ def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup72], (instregex "FARJMP64",
- "JMP(16|32|64)m")>;
-
-def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup74], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SKLWriteResGroup72], (instrs FARJMP64)>;
+def: InstRW<[SKLWriteResGroup72], (instregex "JMP(16|32|64)m")>;
def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> {
let Latency = 6;
@@ -1070,9 +1032,6 @@ def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup75], (instregex "ANDN(32|64)rm",
- "BLSI(32|64)rm",
- "BLSMSK(32|64)rm",
- "BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def SKLWriteResGroup76 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
@@ -1102,15 +1061,9 @@ def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[SKLWriteResGroup82], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup82], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 6;
@@ -1132,23 +1085,23 @@ def SKLWriteResGroup85 : SchedWriteRes<[SKLPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup85], (instregex "LD_F(32|64|80)m",
- "VBROADCASTF128",
- "VBROADCASTI128",
- "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm",
- "VPBROADCASTDYrm",
- "VPBROADCASTQYrm")>;
+def: InstRW<[SKLWriteResGroup85], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[SKLWriteResGroup85], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm,
+ VPBROADCASTDYrm,
+ VPBROADCASTQYrm)>;
def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>;
+def: InstRW<[SKLWriteResGroup86], (instrs VCVTDQ2PDYrr)>;
def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 6;
@@ -1167,19 +1120,21 @@ def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2PSYrr",
- "VCVTPS2PDYrr",
- "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[SKLWriteResGroup89], (instrs VCVTPD2PSYrr,
+ VCVTPS2PDYrr,
+ VCVTPD2DQYrr,
+ VCVTTPD2DQYrr)>;
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup91], (instregex "(V?)INSERTF128rm",
- "(V?)INSERTI128rm",
- "(V?)PADD(B|D|Q|W)rm",
- "(V?)PBLENDDrmi",
+def: InstRW<[SKLWriteResGroup91], (instrs VINSERTF128rm,
+ VINSERTI128rm,
+ VPBLENDDrmi)>;
+def: InstRW<[SKLWriteResGroup91, ReadAfterVecXLd],
+ (instregex "(V?)PADD(B|D|Q|W)rm",
"(V?)PSUB(B|D|Q|W)rm")>;
def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
@@ -1187,9 +1142,9 @@ def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKSSDWirm",
- "MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm")>;
+def: InstRW<[SKLWriteResGroup92], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
def SKLWriteResGroup94 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
let Latency = 7;
@@ -1225,10 +1180,8 @@ def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 7;
@@ -1242,8 +1195,8 @@ def SKLWriteResGroup102 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
}
-def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m",
- "FARCALL64")>;
+def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[SKLWriteResGroup102], (instrs FARCALL64)>;
def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 7;
@@ -1260,39 +1213,26 @@ def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> {
def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
-def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup107_16], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort06, SKLPort0156, SKLPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SKLWriteResGroup107_16_2], (instrs IMUL16m, MUL16m)>;
-
def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup108], (instregex "FCOM(P?)(32|64)m",
- "VPBROADCASTBYrm",
- "VPBROADCASTWYrm",
- "VPMOVSXBDYrm",
- "VPMOVSXBQYrm",
- "VPMOVSXWQYrm")>;
+def: InstRW<[SKLWriteResGroup108], (instregex "FCOM(P?)(32|64)m")>;
+def: InstRW<[SKLWriteResGroup108], (instrs VPBROADCASTBYrm,
+ VPBROADCASTWYrm,
+ VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXWQYrm)>;
def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup110], (instregex "VPADD(B|D|Q|W)Yrm",
- "VPBLENDDYrmi",
+def: InstRW<[SKLWriteResGroup110], (instrs VPBLENDDYrmi)>;
+def: InstRW<[SKLWriteResGroup110, ReadAfterVecYLd],
+ (instregex "VPADD(B|D|Q|W)Yrm",
"VPSUB(B|D|Q|W)Yrm")>;
def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
@@ -1302,22 +1242,13 @@ def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
}
def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKLWriteResGroup115 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKLWriteResGroup115], (instregex "ROR(8|16|32|64)mCL")>;
-
def SKLWriteResGroup116 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m1",
- "RCL(8|16|32|64)mi",
- "RCR(8|16|32|64)m1",
- "RCR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 8;
@@ -1325,6 +1256,7 @@ def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
let ResourceCycles = [1,1,1,3];
}
def: InstRW<[SKLWriteResGroup117], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
@@ -1335,25 +1267,25 @@ def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
let ResourceCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, SKLWriteResGroup119>;
-def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG(8|16|32|64)rm")>;
def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup120], (instregex "MMX_CVTPI2PSirm")>;
+def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSirm)>;
def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup121], (instregex "(V?)PCMPGTQrm",
- "VPMOVSXBWYrm",
- "VPMOVSXDQYrm",
- "VPMOVSXWDYrm",
- "VPMOVZXWDYrm")>;
+def: InstRW<[SKLWriteResGroup121], (instrs PCMPGTQrm,
+ VPCMPGTQrm,
+ VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVZXWDYrm)>;
def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 9;
@@ -1363,13 +1295,6 @@ def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm",
"(V?)CVTPS2PDrm")>;
-def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup127], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -1392,8 +1317,8 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
- "ILD_F(16|32|64)m",
- "VPCMPGTQYrm")>;
+ "ILD_F(16|32|64)m")>;
+def: InstRW<[SKLWriteResGroup133], (instrs VPCMPGTQYrm)>;
def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 10;
@@ -1410,7 +1335,7 @@ def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup138], (instregex "MMX_CVTPI2PDirm")>;
+def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDirm)>;
def SKLWriteResGroup139 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 10;
@@ -1424,15 +1349,8 @@ def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
-def: InstRW<[SKLWriteResGroup140], (instregex "VPHADDSWYrm",
- "VPHSUBSWYrm")>;
-
-def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup142], (instrs IMUL32m, MUL32m, MULX32rm)>;
+def: InstRW<[SKLWriteResGroup140], (instrs VPHADDSWYrm,
+ VPHSUBSWYrm)>;
def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 10;
@@ -1460,9 +1378,10 @@ def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm",
- "VCVTPS2PDYrm",
- "VCVT(T?)PS2DQYrm")>;
+def: InstRW<[SKLWriteResGroup147], (instrs VCVTDQ2PSYrm,
+ VCVTPS2PDYrm,
+ VCVTPS2DQYrm,
+ VCVTTPS2DQYrm)>;
def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 11;
@@ -1493,9 +1412,11 @@ def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm",
- "CVT(T?)PD2DQrm",
- "MMX_CVT(T?)PD2PIirm")>;
+def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTTPD2PIirm)>;
def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 11;
@@ -1510,7 +1431,7 @@ def SKLWriteResGroup155 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort015
let NumMicroOps = 9;
let ResourceCycles = [1,5,1,2];
}
-def: InstRW<[SKLWriteResGroup155], (instregex "RCL8rCL")>;
+def: InstRW<[SKLWriteResGroup155], (instrs RCL8rCL)>;
def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 11;
@@ -1538,7 +1459,7 @@ def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup163], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[SKLWriteResGroup163], (instrs VCVTDQ2PDYrm)>;
def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
let Latency = 14;
@@ -1567,7 +1488,7 @@ def SKLWriteResGroup170 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort015
let NumMicroOps = 10;
let ResourceCycles = [2,4,1,3];
}
-def: InstRW<[SKLWriteResGroup170], (instregex "RCR8rCL")>;
+def: InstRW<[SKLWriteResGroup170], (instrs RCR8rCL)>;
def SKLWriteResGroup171 : SchedWriteRes<[SKLPort0]> {
let Latency = 15;
@@ -1723,13 +1644,6 @@ def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort23]> {
}
def: InstRW<[SKLWriteResGroup206], (instregex "DIVR_F(32|64)m")>;
-def SKLWriteResGroup207 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> {
- let Latency = 28;
- let NumMicroOps = 8;
- let ResourceCycles = [2,4,1,1];
-}
-def: InstRW<[SKLWriteResGroup207], (instregex "IDIV(8|16|32|64)m")>;
-
def SKLWriteResGroup208 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 30;
let NumMicroOps = 3;
@@ -1824,20 +1738,6 @@ def SKLWriteResGroup220 : SchedWriteRes<[SKLPort5,SKLPort05,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup220], (instrs FNINIT)>;
-def SKLWriteResGroup221 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
- let Latency = 76;
- let NumMicroOps = 32;
- let ResourceCycles = [7,2,8,3,1,11];
-}
-def: InstRW<[SKLWriteResGroup221], (instregex "DIV(16|32|64)r")>;
-
-def SKLWriteResGroup222 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
- let Latency = 102;
- let NumMicroOps = 66;
- let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[SKLWriteResGroup222], (instregex "IDIV(16|32|64)r")>;
-
def SKLWriteResGroup223 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 9d5f8555c505..ec8e4db02d8a 100755
--- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -71,10 +71,16 @@ def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4,
let BufferSize=60;
}
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -107,24 +113,48 @@ def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>;
// Arithmetic.
defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op.
-defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
-defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
+defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
+defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
-
-defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [SKXPort0156], 2, [3], 3>;
+
+// TODO: Why isn't the SKXDivider used?
+defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv16Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move.
@@ -135,11 +165,19 @@ def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
-def : WriteRes<WriteBitTest,[SKXPort06]>; //
+defm : X86WriteRes<WriteLAHFSAHF, [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SKXPort06,SKXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [SKXPort0156,SKXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SKXPort06,SKXPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>;
// Integer shifts and rotates.
-defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
+defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
+defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>;
+defm : SKXWriteResPair<WriteRotate, [SKXPort06], 2, [2], 2>;
+defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>;
@@ -154,9 +192,10 @@ defm : SKXWriteResPair<WriteLZCNT, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteTZCNT, [SKXPort1], 3>;
defm : SKXWriteResPair<WritePOPCNT, [SKXPort1], 3>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>;
-defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
+defm : SKXWriteResPair<WriteBLS, [SKXPort15], 1>;
+defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>;
@@ -625,10 +664,7 @@ def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr",
- "BLSI(32|64)rr",
- "BLSMSK(32|64)rr",
- "BLSR(32|64)rr")>;
+def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> {
let Latency = 1;
@@ -655,48 +691,43 @@ def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE,
- CMC, STC)>;
-def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m",
- "SIDT64m",
- "SMSW16m",
- "STRm",
- "SYSCALL")>;
+ CMC, STC,
+ SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> {
let Latency = 1;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
- "KMOV(B|D|Q|W)mk",
- "ST_FP(32|64|80)m",
- "VMPTRSTm")>;
+def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
+ "ST_FP(32|64|80)m")>;
def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP)>;
-def: InstRW<[SKXWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP,
+ MMX_MOVDQ2Qrr)>;
def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup15], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri",
- "SET(A|BE)r")>;
+def: InstRW<[SKXWriteResGroup15], (instregex "SET(A|BE)r")>;
def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
let Latency = 2;
@@ -726,11 +757,10 @@ def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup23], (instrs CWD)>;
-def: InstRW<[SKXWriteResGroup23], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SKXWriteResGroup23], (instregex "ADC8i8",
- "ADC8ri",
- "SBB8i8",
+def: InstRW<[SKXWriteResGroup23], (instrs CWD,
+ JCXZ, JECXZ, JRCXZ,
+ ADC8i8, SBB8i8)>;
+def: InstRW<[SKXWriteResGroup23], (instregex "ADC8ri",
"SBB8ri")>;
def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
@@ -752,10 +782,9 @@ def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr",
- "PUSH64i8")>;
+def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> {
let Latency = 2;
@@ -781,39 +810,26 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
-def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup31_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
-
def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
+def: InstRW<[SKXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
"KADD(B|D|Q|W)rr",
"KSHIFTL(B|D|Q|W)ri",
"KSHIFTR(B|D|Q|W)ri",
- "KUNPCKBWrr",
- "KUNPCKDQrr",
- "KUNPCKWDrr",
+ "KUNPCK(BW|DQ|WD)rr",
"VALIGND(Z|Z128|Z256)rri",
"VALIGNQ(Z|Z128|Z256)rri",
"VCMPPD(Z|Z128|Z256)rri",
"VCMPPS(Z|Z128|Z256)rri",
- "VCMPSDZrr",
- "VCMPSSZrr",
+ "VCMP(SD|SS)Zrr",
"VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
- "VFPCLASSPD(Z|Z128|Z256)rr",
- "VFPCLASSPS(Z|Z128|Z256)rr",
- "VFPCLASSSDZrr",
- "VFPCLASSSSZrr",
- "VPBROADCASTBrr",
- "VPBROADCASTWrr",
+ "VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
+ "VFPCLASS(SD|SS)Zrr",
+ "VPBROADCAST(B|W)rr",
"VPCMPB(Z|Z128|Z256)rri",
"VPCMPD(Z|Z128|Z256)rri",
"VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
@@ -823,7 +839,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
"VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
"VPCMPW(Z|Z128|Z256)rri",
"VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr",
- "VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
"VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
@@ -833,26 +848,6 @@ def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>;
-def SKXWriteResGroup35 : SchedWriteRes<[SKXPort06]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SKXWriteResGroup35], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
-
-def SKXWriteResGroup36 : SchedWriteRes<[SKXPort0156]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SKXWriteResGroup36], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
-
def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -872,9 +867,9 @@ def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKSSDWirr",
- "MMX_PACKSSWBirr",
- "MMX_PACKUSWBirr")>;
+def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 3;
@@ -895,10 +890,8 @@ def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r1",
- "RCL(8|16|32|64)ri",
- "RCR(8|16|32|64)r1",
- "RCR(8|16|32|64)ri")>;
+def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> {
let Latency = 3;
@@ -1000,20 +993,6 @@ def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
"VPMOVUSWB(Z|Z128|Z256)rr",
"VPMOVWB(Z|Z128|Z256)rr")>;
-def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup52], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKXWriteResGroup52_16], (instrs IMUL16r, MUL16r)>;
-
def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -1030,10 +1009,10 @@ def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>;
-def SKXWriteResGroup56 : SchedWriteRes<[SKXPort015,SKXPort0156]> {
- let Latency = 4;
+def SKXWriteResGroup56 : SchedWriteRes<[]> {
+ let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ResourceCycles = [];
}
def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>;
@@ -1049,11 +1028,8 @@ def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16",
- "MOVSX(16|32|64)rm32",
- "MOVSX(16|32|64)rm8",
- "MOVZX(16|32|64)rm16",
- "MOVZX(16|32|64)rm8",
+def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)",
"(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71?
def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
@@ -1104,13 +1080,6 @@ def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> {
}
def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>;
-def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup64], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> {
let Latency = 5;
let NumMicroOps = 3;
@@ -1150,13 +1119,6 @@ def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>;
-def SKXWriteResGroup68 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[SKXWriteResGroup68], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
@@ -1169,19 +1131,21 @@ def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup71], (instregex "VBROADCASTSSrm",
- "(V?)MOVSHDUPrm",
- "(V?)MOVSLDUPrm",
- "VPBROADCASTDrm",
- "VPBROADCASTQrm")>;
+def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm,
+ VPBROADCASTDrm,
+ VPBROADCASTQrm,
+ VMOVSHDUPrm,
+ VMOVSLDUPrm,
+ MOVSHDUPrm,
+ MOVSLDUPrm)>;
def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup72], (instregex "MMX_CVTPI2PSirr",
- "VCOMPRESSPD(Z|Z128|Z256)rr",
+def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
"VCOMPRESSPS(Z|Z128|Z256)rr",
"VPCOMPRESSD(Z|Z128|Z256)rr",
"VPCOMPRESSQ(Z|Z128|Z256)rr",
@@ -1192,41 +1156,34 @@ def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDSBirm",
- "MMX_PADDSWirm",
- "MMX_PADDUSBirm",
- "MMX_PADDUSWirm",
- "MMX_PAVGBirm",
- "MMX_PAVGWirm",
- "MMX_PCMPEQBirm",
- "MMX_PCMPEQDirm",
- "MMX_PCMPEQWirm",
- "MMX_PCMPGTBirm",
- "MMX_PCMPGTDirm",
- "MMX_PCMPGTWirm",
- "MMX_PMAXSWirm",
- "MMX_PMAXUBirm",
- "MMX_PMINSWirm",
- "MMX_PMINUBirm",
- "MMX_PSUBSBirm",
- "MMX_PSUBSWirm",
- "MMX_PSUBUSBirm",
- "MMX_PSUBUSWirm")>;
+def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBirm,
+ MMX_PADDSWirm,
+ MMX_PADDUSBirm,
+ MMX_PADDUSWirm,
+ MMX_PAVGBirm,
+ MMX_PAVGWirm,
+ MMX_PCMPEQBirm,
+ MMX_PCMPEQDirm,
+ MMX_PCMPEQWirm,
+ MMX_PCMPGTBirm,
+ MMX_PCMPGTDirm,
+ MMX_PCMPGTWirm,
+ MMX_PMAXSWirm,
+ MMX_PMAXUBirm,
+ MMX_PMINSWirm,
+ MMX_PMINUBirm,
+ MMX_PSUBSBirm,
+ MMX_PSUBSWirm,
+ MMX_PSUBUSBirm,
+ MMX_PSUBUSWirm)>;
def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup76], (instregex "FARJMP64",
- "JMP(16|32|64)m")>;
-
-def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup78], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64)>;
+def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
let Latency = 6;
@@ -1234,9 +1191,6 @@ def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm",
- "BLSI(32|64)rm",
- "BLSMSK(32|64)rm",
- "BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> {
@@ -1244,8 +1198,8 @@ def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)",
- "VMOVDI2PDIZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
let Latency = 6;
@@ -1276,15 +1230,9 @@ def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[SKXWriteResGroup86], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 6;
@@ -1306,23 +1254,23 @@ def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m",
- "VBROADCASTF128",
- "VBROADCASTI128",
- "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm",
- "VPBROADCASTDYrm",
- "VPBROADCASTQYrm")>;
+def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm,
+ VPBROADCASTDYrm,
+ VPBROADCASTQYrm)>;
def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>;
+def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 7;
@@ -1389,12 +1337,14 @@ def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
+def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
+ VPBLENDDrmi)>;
+def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
+ (instregex "VBLENDMPDZ128rm(b?)",
"VBLENDMPSZ128rm(b?)",
"VBROADCASTI32X2Z128m(b?)",
"VBROADCASTSSZ128m(b?)",
- "VINSERTF128rm",
- "VINSERTI128rm",
+ "VINSERT(F|I)128rm",
"VMOVAPDZ128rm(b?)",
"VMOVAPSZ128rm(b?)",
"VMOVDDUPZ128rm(b?)",
@@ -1404,14 +1354,12 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
"VMOVDQU32Z128rm(b?)",
"VMOVDQU64Z128rm(b?)",
"VMOVDQU8Z128rm(b?)",
- "VMOVNTDQAZ128rm(b?)",
"VMOVSHDUPZ128rm(b?)",
"VMOVSLDUPZ128rm(b?)",
"VMOVUPDZ128rm(b?)",
"VMOVUPSZ128rm(b?)",
"VPADD(B|D|Q|W)Z128rm(b?)",
"(V?)PADD(B|D|Q|W)rm",
- "VPBLENDDrmi",
"VPBLENDM(B|D|Q|W)Z128rm(b?)",
"VPBROADCASTDZ128m(b?)",
"VPBROADCASTQZ128m(b?)",
@@ -1425,9 +1373,9 @@ def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKSSDWirm",
- "MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm")>;
+def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 7;
@@ -1495,10 +1443,8 @@ def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 7;
@@ -1512,8 +1458,8 @@ def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
}
-def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m",
- "FARCALL64")>;
+def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64)>;
def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
@@ -1567,20 +1513,6 @@ def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> {
def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
-def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup118_16_1], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>;
-
-def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort06, SKXPort0156, SKXPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SKXWriteResGroup118_16_2], (instrs IMUL16m, MUL16m)>;
-
def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@@ -1588,20 +1520,23 @@ def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
"VFPCLASSSDZrm(b?)",
- "VPBROADCASTBYrm",
"VPBROADCASTB(Z|Z256)m(b?)",
- "VPBROADCASTWYrm",
- "VPBROADCASTW(Z|Z256)m(b?)",
- "VPMOVSXBDYrm",
- "VPMOVSXBQYrm",
- "VPMOVSXWQYrm")>;
+ "VPBROADCASTW(Z|Z256)m(b?)")>;
+def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
+ VPBROADCASTWYrm,
+ VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXWQYrm)>;
def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPD(Z|Z256)rm(b?)",
+def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
+ VPBLENDDYrmi)>;
+def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
+ (instregex "VBLENDMPD(Z|Z256)rm(b?)",
"VBLENDMPS(Z|Z256)rm(b?)",
"VBROADCASTF32X2Z256m(b?)",
"VBROADCASTF32X2Zm(b?)",
@@ -1638,14 +1573,12 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPD(Z|Z256)rm(b?)",
"VMOVDQU32(Z|Z256)rm(b?)",
"VMOVDQU64(Z|Z256)rm(b?)",
"VMOVDQU8(Z|Z256)rm(b?)",
- "VMOVNTDQAZ256rm(b?)",
"VMOVSHDUP(Z|Z256)rm(b?)",
"VMOVSLDUP(Z|Z256)rm(b?)",
"VMOVUPD(Z|Z256)rm(b?)",
"VMOVUPS(Z|Z256)rm(b?)",
"VPADD(B|D|Q|W)Yrm",
"VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
- "VPBLENDDYrmi",
"VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
"VPBROADCASTD(Z|Z256)m(b?)",
"VPBROADCASTQ(Z|Z256)m(b?)",
@@ -1661,22 +1594,13 @@ def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKXWriteResGroup126 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKXWriteResGroup126], (instregex "ROR(8|16|32|64)mCL")>;
-
def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m1",
- "RCL(8|16|32|64)mi",
- "RCR(8|16|32|64)m1",
- "RCR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 8;
@@ -1684,6 +1608,7 @@ def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
let ResourceCycles = [1,1,1,3];
}
def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
@@ -1694,7 +1619,6 @@ def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
let ResourceCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>;
-def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG(8|16|32|64)rm")>;
def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
@@ -1734,19 +1658,20 @@ def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup135], (instregex "MMX_CVTPI2PSirm")>;
+def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSirm)>;
def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
- "VALIGNQZ128rm(b?)i",
- "VCMPPDZ128rm(b?)i",
- "VCMPPSZ128rm(b?)i",
- "VCMPSDZrm",
- "VCMPSSZrm",
+def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVZXWDYrm)>;
+def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
+ "VCMP(PD|PS)Z128rm(b?)i",
+ "VCMP(SD|SS)Zrm",
"VFPCLASSSSZrm(b?)",
"VPCMPBZ128rmi(b?)",
"VPCMPDZ128rmi(b?)",
@@ -1770,18 +1695,14 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
"VPMINUQZ128rm(b?)",
"VPMOVSXBDZ128rm(b?)",
"VPMOVSXBQZ128rm(b?)",
- "VPMOVSXBWYrm",
"VPMOVSXBWZ128rm(b?)",
- "VPMOVSXDQYrm",
"VPMOVSXDQZ128rm(b?)",
- "VPMOVSXWDYrm",
"VPMOVSXWDZ128rm(b?)",
"VPMOVSXWQZ128rm(b?)",
"VPMOVZXBDZ128rm(b?)",
"VPMOVZXBQZ128rm(b?)",
"VPMOVZXBWZ128rm(b?)",
"VPMOVZXDQZ128rm(b?)",
- "VPMOVZXWDYrm",
"VPMOVZXWDZ128rm(b?)",
"VPMOVZXWQZ128rm(b?)",
"VPTESTMBZ128rm(b?)",
@@ -1801,13 +1722,6 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
"(V?)CVTPS2PDrm")>;
-def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup142], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -1829,6 +1743,7 @@ def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
+def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>;
def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
"VALIGND(Z|Z256)rm(b?)i",
@@ -1843,7 +1758,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPCMPEQW(Z|Z256)rm(b?)",
"VPCMPGTB(Z|Z256)rm(b?)",
"VPCMPGTD(Z|Z256)rm(b?)",
- "VPCMPGTQYrm",
"VPCMPGTQ(Z|Z256)rm(b?)",
"VPCMPGTW(Z|Z256)rm(b?)",
"VPCMPQ(Z|Z256)rmi(b?)",
@@ -1914,15 +1828,8 @@ def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
-def: InstRW<[SKXWriteResGroup154], (instregex "VPHADDSWYrm",
- "VPHSUBSWYrm")>;
-
-def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup156], (instrs IMUL32m, MUL32m, MULX32rm)>;
+def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm,
+ VPHSUBSWYrm)>;
def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 10;
@@ -1950,11 +1857,10 @@ def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PD(Z|Z256)rm(b?)",
- "VCVTDQ2PSYrm",
- "VCVTDQ2PS(Z|Z256)rm(b?)",
+def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm,
+ VCVTPS2PDYrm)>;
+def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
"VCVTPH2PS(Z|Z256)rm(b?)",
- "VCVTPS2PDYrm",
"VCVTPS2PD(Z|Z256)rm(b?)",
"VCVTQQ2PD(Z|Z256)rm(b?)",
"VCVTQQ2PSZ256rm(b?)",
@@ -1965,8 +1871,7 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PD(Z|Z256)rm(b?)",
"VCVT(T?)PS2QQZ256rm(b?)",
"VCVT(T?)PS2UDQ(Z|Z256)rm(b?)",
"VCVT(T?)PS2UQQZ256rm(b?)",
- "VCVTUDQ2PD(Z|Z256)rm(b?)",
- "VCVTUDQ2PS(Z|Z256)rm(b?)",
+ "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)",
"VCVTUQQ2PD(Z|Z256)rm(b?)",
"VCVTUQQ2PSZ256rm(b?)")>;
@@ -2000,9 +1905,11 @@ def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup166], (instregex "CVTPD2PSrm",
- "CVT(T?)PD2DQrm",
- "MMX_CVT(T?)PD2PIirm")>;
+def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTTPD2PIirm)>;
def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 11;
@@ -2024,7 +1931,7 @@ def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort015
let NumMicroOps = 9;
let ResourceCycles = [1,5,1,2];
}
-def: InstRW<[SKXWriteResGroup170], (instregex "RCL8rCL")>;
+def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>;
def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 11;
@@ -2091,7 +1998,7 @@ def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup181], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 13;
@@ -2151,7 +2058,7 @@ def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort015
let NumMicroOps = 10;
let ResourceCycles = [2,4,1,3];
}
-def: InstRW<[SKXWriteResGroup190], (instregex "RCR8rCL")>;
+def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>;
def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> {
let Latency = 15;
@@ -2181,10 +2088,10 @@ def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
}
def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>;
-def SKXWriteResGroup200 : SchedWriteRes<[SKXPort0156]> {
- let Latency = 16;
- let NumMicroOps = 16;
- let ResourceCycles = [16];
+def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> {
+ let Latency = 12;
+ let NumMicroOps = 34;
+ let ResourceCycles = [1, 4, 5];
}
def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>;
@@ -2408,13 +2315,6 @@ def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01
def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm,
VPGATHERDDZ256rm)>;
-def SKXWriteResGroup241 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort0156]> {
- let Latency = 28;
- let NumMicroOps = 8;
- let ResourceCycles = [2,4,1,1];
-}
-def: InstRW<[SKXWriteResGroup241], (instregex "IDIV(8|16|32|64)m")>;
-
def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 29;
let NumMicroOps = 15;
@@ -2547,20 +2447,6 @@ def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>;
-def SKXWriteResGroup264 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
- let Latency = 76;
- let NumMicroOps = 32;
- let ResourceCycles = [7,2,8,3,1,11];
-}
-def: InstRW<[SKXWriteResGroup264], (instregex "DIV(16|32|64)r")>;
-
-def SKXWriteResGroup265 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> {
- let Latency = 102;
- let NumMicroOps = 66;
- let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[SKXWriteResGroup265], (instregex "IDIV(16|32|64)r")>;
-
def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm/lib/Target/X86/X86Schedule.td
index ef9ce94706df..25aa83f96d3a 100644
--- a/contrib/llvm/lib/Target/X86/X86Schedule.td
+++ b/contrib/llvm/lib/Target/X86/X86Schedule.td
@@ -14,6 +14,9 @@
// but other register operands don't have to be read until the load is ready.
// These operands are marked with ReadAfterLd.
def ReadAfterLd : SchedRead;
+def ReadAfterVecLd : SchedRead;
+def ReadAfterVecXLd : SchedRead;
+def ReadAfterVecYLd : SchedRead;
// Instructions with both a load and a store folded are modeled as a folded
// load + WriteRMW.
@@ -37,15 +40,19 @@ multiclass X86WriteRes<SchedWrite SchedRW,
class X86FoldableSchedWrite : SchedWrite {
// The SchedWrite to use when a load is folded into the instruction.
SchedWrite Folded;
+ // The SchedRead to tag register operands than don't need to be ready
+ // until the folded load has completed.
+ SchedRead ReadAfterFold;
}
// Multiclass that produces a linked pair of SchedWrites.
-multiclass X86SchedWritePair {
+multiclass X86SchedWritePair<SchedRead ReadAfter = ReadAfterLd> {
// Register-Memory operation.
def Ld : SchedWrite;
// Register-Register operation.
def NAME : X86FoldableSchedWrite {
let Folded = !cast<SchedWrite>(NAME#"Ld");
+ let ReadAfterFold = ReadAfter;
}
}
@@ -107,19 +114,33 @@ def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
def WriteStoreNT : SchedWrite;
def WriteMove : SchedWrite;
+def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
defm WriteADC : X86SchedWritePair; // Integer ALU + flags op.
-def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
-def WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
-defm WriteIMul : X86SchedWritePair; // Integer multiplication.
-defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
-def WriteIMulH : SchedWrite; // Integer multiplication, high part.
+def WriteALURMW : WriteSequence<[WriteALULd, WriteRMW]>;
+def WriteADCRMW : WriteSequence<[WriteADCLd, WriteRMW]>;
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+// Integer multiplication
+defm WriteIMul8 : X86SchedWritePair; // Integer 8-bit multiplication.
+defm WriteIMul16 : X86SchedWritePair; // Integer 16-bit multiplication.
+defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate.
+defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register.
+defm WriteIMul32 : X86SchedWritePair; // Integer 32-bit multiplication.
+defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate.
+defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register.
+defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
+defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
+defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
+def WriteIMulH : SchedWrite; // Integer multiplication, high part.
+
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
+defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap.
+def WriteCMPXCHGRMW : SchedWrite; // Compare and set, compare and swap.
+def WriteXCHG : SchedWrite; // Compare+Exchange - TODO RMW support.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
@@ -142,18 +163,32 @@ def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
-def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
+
+def WriteBitTest : SchedWrite; // Bit Test
+def WriteBitTestImmLd : SchedWrite;
+def WriteBitTestRegLd : SchedWrite;
+
+def WriteBitTestSet : SchedWrite; // Bit Test + Set
+def WriteBitTestSetImmLd : SchedWrite;
+def WriteBitTestSetRegLd : SchedWrite;
+def WriteBitTestSetImmRMW : WriteSequence<[WriteBitTestSetImmLd, WriteRMW]>;
+def WriteBitTestSetRegRMW : WriteSequence<[WriteBitTestSetRegLd, WriteRMW]>;
// Integer shifts and rotates.
-defm WriteShift : X86SchedWritePair;
+defm WriteShift : X86SchedWritePair;
+defm WriteShiftCL : X86SchedWritePair;
+defm WriteRotate : X86SchedWritePair;
+defm WriteRotateCL : X86SchedWritePair;
+
// Double shift instructions.
def WriteSHDrri : SchedWrite;
def WriteSHDrrcl : SchedWrite;
def WriteSHDmri : SchedWrite;
def WriteSHDmrcl : SchedWrite;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm WriteBEXTR : X86SchedWritePair;
+defm WriteBLS : X86SchedWritePair;
defm WriteBZHI : X86SchedWritePair;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
@@ -185,98 +220,98 @@ def WriteFMove : SchedWrite;
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
-defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
-defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
-defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM).
-defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM).
-defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
-defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
-defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
-defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
-defm WriteFCmp : X86SchedWritePair; // Floating point compare.
-defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
-defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM).
-defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM).
-defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
-defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
-defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
-defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
-defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
-defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
-defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
-defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM).
-defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM).
-defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
-defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
-defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
-defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
-defm WriteFDiv : X86SchedWritePair; // Floating point division.
-defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
-defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
-defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM).
-defm WriteFDiv64 : X86SchedWritePair; // Floating point double division.
-defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
-defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
-defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
-defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
-defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
-defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
-defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
-defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root.
-defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
-defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
-defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
-defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
-defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
-defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
-defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
-defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
-defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
-defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
-defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
-defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
-defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
-defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
-defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM).
-defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM).
-defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
-defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
-defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
-defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM).
-defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
-defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
-defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM).
-defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM).
-defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
-defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
-defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
-defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions.
-defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM).
-defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM).
-defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
-defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
-defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
-defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
-defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
-defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
-defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
-defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
-defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
-defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
-defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
-defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
+defm WriteFAdd : X86SchedWritePair<ReadAfterVecLd>; // Floating point add/sub.
+defm WriteFAddX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
+defm WriteFAddY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (YMM).
+defm WriteFAddZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (ZMM).
+defm WriteFAdd64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double add/sub.
+defm WriteFAdd64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double add/sub (XMM).
+defm WriteFAdd64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (YMM).
+defm WriteFAdd64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (ZMM).
+defm WriteFCmp : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare.
+defm WriteFCmpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point compare (XMM).
+defm WriteFCmpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (YMM).
+defm WriteFCmpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (ZMM).
+defm WriteFCmp64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double compare.
+defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
+defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
+defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
+defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags.
+defm WriteFMul : X86SchedWritePair<ReadAfterVecLd>; // Floating point multiplication.
+defm WriteFMulX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
+defm WriteFMulY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
+defm WriteFMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
+defm WriteFMul64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double multiplication.
+defm WriteFMul64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double multiplication (XMM).
+defm WriteFMul64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (YMM).
+defm WriteFMul64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (ZMM).
+defm WriteFDiv : X86SchedWritePair<ReadAfterVecLd>; // Floating point division.
+defm WriteFDivX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point division (XMM).
+defm WriteFDivY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (YMM).
+defm WriteFDivZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (ZMM).
+defm WriteFDiv64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double division.
+defm WriteFDiv64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (ZMM).
+defm WriteFSqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point square root.
+defm WriteFSqrtX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point square root (XMM).
+defm WriteFSqrtY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (YMM).
+defm WriteFSqrtZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (ZMM).
+defm WriteFSqrt64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double square root.
+defm WriteFSqrt64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double square root (XMM).
+defm WriteFSqrt64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (YMM).
+defm WriteFSqrt64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (ZMM).
+defm WriteFSqrt80 : X86SchedWritePair<ReadAfterVecLd>; // Floating point long double square root.
+defm WriteFRcp : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal estimate.
+defm WriteFRcpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal estimate (XMM).
+defm WriteFRcpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (YMM).
+defm WriteFRcpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (ZMM).
+defm WriteFRsqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal square root estimate.
+defm WriteFRsqrtX: X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal square root estimate (XMM).
+defm WriteFRsqrtY: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (YMM).
+defm WriteFRsqrtZ: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (ZMM).
+defm WriteFMA : X86SchedWritePair<ReadAfterVecLd>; // Fused Multiply Add.
+defm WriteFMAX : X86SchedWritePair<ReadAfterVecXLd>; // Fused Multiply Add (XMM).
+defm WriteFMAY : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (YMM).
+defm WriteFMAZ : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (ZMM).
+defm WriteDPPD : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double dot product.
+defm WriteDPPS : X86SchedWritePair<ReadAfterVecXLd>; // Floating point single dot product.
+defm WriteDPPSY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (YMM).
+defm WriteDPPSZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (ZMM).
+defm WriteFSign : X86SchedWritePair<ReadAfterVecLd>; // Floating point fabs/fchs.
+defm WriteFRnd : X86SchedWritePair<ReadAfterVecXLd>; // Floating point rounding.
+defm WriteFRndY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (YMM).
+defm WriteFRndZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (ZMM).
+defm WriteFLogic : X86SchedWritePair<ReadAfterVecXLd>; // Floating point and/or/xor logicals.
+defm WriteFLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (YMM).
+defm WriteFLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (ZMM).
+defm WriteFTest : X86SchedWritePair<ReadAfterVecXLd>; // Floating point TEST instructions.
+defm WriteFTestY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (YMM).
+defm WriteFTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (ZMM).
+defm WriteFShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector shuffles.
+defm WriteFShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (YMM).
+defm WriteFShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (ZMM).
+defm WriteFVarShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector variable shuffles.
+defm WriteFVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (YMM).
+defm WriteFVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (ZMM).
+defm WriteFBlend : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector blends.
+defm WriteFBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (YMM).
+defm WriteFBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (ZMM).
+defm WriteFVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Fp vector variable blends.
+defm WriteFVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMM).
+defm WriteFVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMZMM).
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Horizontal Add/Sub (float and integer)
-defm WriteFHAdd : X86SchedWritePair;
-defm WriteFHAddY : X86SchedWritePair;
-defm WriteFHAddZ : X86SchedWritePair;
-defm WritePHAdd : X86SchedWritePair;
-defm WritePHAddX : X86SchedWritePair;
-defm WritePHAddY : X86SchedWritePair;
-defm WritePHAddZ : X86SchedWritePair;
+defm WriteFHAdd : X86SchedWritePair<ReadAfterVecXLd>;
+defm WriteFHAddY : X86SchedWritePair<ReadAfterVecYLd>;
+defm WriteFHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
+defm WritePHAdd : X86SchedWritePair<ReadAfterVecLd>;
+defm WritePHAddX : X86SchedWritePair<ReadAfterVecXLd>;
+defm WritePHAddY : X86SchedWritePair<ReadAfterVecYLd>;
+defm WritePHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
// Vector integer operations.
def WriteVecLoad : SchedWrite;
@@ -299,54 +334,54 @@ def WriteVecMoveY : SchedWrite;
def WriteVecMoveToGpr : SchedWrite;
def WriteVecMoveFromGpr : SchedWrite;
-defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
-defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
-defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
-defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
-defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
-defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
-defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
-defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
-defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
-defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
-defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
-defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
-defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
-defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
-defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
-defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
-defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
-defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
-defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
-defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default).
-defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
-defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
-defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
-defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
-defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM).
-defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM).
-defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
-defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
-defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
-defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
-defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
-defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
-defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
-defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
-defm WriteBlend : X86SchedWritePair; // Vector blends.
-defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
-defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
-defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
-defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
-defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
-defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
-defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
-defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
-defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
-defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
-defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
-defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
-defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
+defm WriteVecALU : X86SchedWritePair<ReadAfterVecLd>; // Vector integer ALU op, no logicals.
+defm WriteVecALUX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer ALU op, no logicals (XMM).
+defm WriteVecALUY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (YMM).
+defm WriteVecALUZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (ZMM).
+defm WriteVecLogic : X86SchedWritePair<ReadAfterVecLd>; // Vector integer and/or/xor logicals.
+defm WriteVecLogicX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer and/or/xor logicals (XMM).
+defm WriteVecLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (YMM).
+defm WriteVecLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (ZMM).
+defm WriteVecTest : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer TEST instructions.
+defm WriteVecTestY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (YMM).
+defm WriteVecTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (ZMM).
+defm WriteVecShift : X86SchedWritePair<ReadAfterVecLd>; // Vector integer shifts (default).
+defm WriteVecShiftX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer shifts (XMM).
+defm WriteVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (YMM).
+defm WriteVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (ZMM).
+defm WriteVecShiftImm : X86SchedWritePair<ReadAfterVecLd>; // Vector integer immediate shifts (default).
+defm WriteVecShiftImmX: X86SchedWritePair<ReadAfterVecXLd>; // Vector integer immediate shifts (XMM).
+defm WriteVecShiftImmY: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (YMM).
+defm WriteVecShiftImmZ: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (ZMM).
+defm WriteVecIMul : X86SchedWritePair<ReadAfterVecLd>; // Vector integer multiply (default).
+defm WriteVecIMulX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer multiply (XMM).
+defm WriteVecIMulY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (YMM).
+defm WriteVecIMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (ZMM).
+defm WritePMULLD : X86SchedWritePair<ReadAfterVecXLd>; // Vector PMULLD.
+defm WritePMULLDY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (YMM).
+defm WritePMULLDZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (ZMM).
+defm WriteShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector shuffles.
+defm WriteShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector shuffles (XMM).
+defm WriteShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (YMM).
+defm WriteShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (ZMM).
+defm WriteVarShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector variable shuffles.
+defm WriteVarShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable shuffles (XMM).
+defm WriteVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (YMM).
+defm WriteVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (ZMM).
+defm WriteBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector blends.
+defm WriteBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (YMM).
+defm WriteBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (ZMM).
+defm WriteVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable blends.
+defm WriteVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (YMM).
+defm WriteVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (ZMM).
+defm WritePSADBW : X86SchedWritePair<ReadAfterVecLd>; // Vector PSADBW.
+defm WritePSADBWX : X86SchedWritePair<ReadAfterVecXLd>; // Vector PSADBW (XMM).
+defm WritePSADBWY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (YMM).
+defm WritePSADBWZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (ZMM).
+defm WriteMPSAD : X86SchedWritePair<ReadAfterVecXLd>; // Vector MPSAD.
+defm WriteMPSADY : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (YMM).
+defm WriteMPSADZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (ZMM).
+defm WritePHMINPOS : X86SchedWritePair<ReadAfterVecXLd>; // Vector PHMINPOS.
// Vector insert/extract operations.
defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
@@ -360,39 +395,39 @@ def WriteVecMOVMSKY : SchedWrite;
def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
-defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer.
-defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM).
-defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
-defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
-
-defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer.
-defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM).
-defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
-defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
-
-defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double.
-defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM).
-defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
-defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
-
-defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float.
-defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM).
-defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
-defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
-
-defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
-defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
-defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
-defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
-
-defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
-defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
-defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
-defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
-
-defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
-defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM).
-defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM).
+defm WriteCvtSD2I : X86SchedWritePair<ReadAfterVecLd>; // Double -> Integer.
+defm WriteCvtPD2I : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Integer (XMM).
+defm WriteCvtPD2IY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (YMM).
+defm WriteCvtPD2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (ZMM).
+
+defm WriteCvtSS2I : X86SchedWritePair<ReadAfterVecLd>; // Float -> Integer.
+defm WriteCvtPS2I : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Integer (XMM).
+defm WriteCvtPS2IY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (YMM).
+defm WriteCvtPS2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (ZMM).
+
+defm WriteCvtI2SD : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Double.
+defm WriteCvtI2PD : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Double (XMM).
+defm WriteCvtI2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (YMM).
+defm WriteCvtI2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (ZMM).
+
+defm WriteCvtI2SS : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Float.
+defm WriteCvtI2PS : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Float (XMM).
+defm WriteCvtI2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (YMM).
+defm WriteCvtI2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (ZMM).
+
+defm WriteCvtSS2SD : X86SchedWritePair<ReadAfterVecLd>; // Float -> Double size conversion.
+defm WriteCvtPS2PD : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Double size conversion (XMM).
+defm WriteCvtPS2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (YMM).
+defm WriteCvtPS2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (ZMM).
+
+defm WriteCvtSD2SS : X86SchedWritePair<ReadAfterVecLd>; // Double -> Float size conversion.
+defm WriteCvtPD2PS : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Float size conversion (XMM).
+defm WriteCvtPD2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (YMM).
+defm WriteCvtPD2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (ZMM).
+
+defm WriteCvtPH2PS : X86SchedWritePair<ReadAfterVecXLd>; // Half -> Float size conversion.
+defm WriteCvtPH2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (YMM).
+defm WriteCvtPH2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (ZMM).
def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
@@ -402,25 +437,25 @@ def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion
def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
// CRC32 instruction.
-defm WriteCRC32 : X86SchedWritePair;
+defm WriteCRC32 : X86SchedWritePair<ReadAfterLd>;
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
-defm WritePCmpIStrM : X86SchedWritePair;
+defm WritePCmpIStrM : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Explicit Length Strings, Return Mask
-defm WritePCmpEStrM : X86SchedWritePair;
+defm WritePCmpEStrM : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Implicit Length Strings, Return Index
-defm WritePCmpIStrI : X86SchedWritePair;
+defm WritePCmpIStrI : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Explicit Length Strings, Return Index
-defm WritePCmpEStrI : X86SchedWritePair;
+defm WritePCmpEStrI : X86SchedWritePair<ReadAfterVecXLd>;
// AES instructions.
-defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
-defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
-defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
+defm WriteAESDecEnc : X86SchedWritePair<ReadAfterVecXLd>; // Decryption, encryption.
+defm WriteAESIMC : X86SchedWritePair<ReadAfterVecXLd>; // InvMixColumn.
+defm WriteAESKeyGen : X86SchedWritePair<ReadAfterVecXLd>; // Key Generation.
// Carry-less multiplication instructions.
-defm WriteCLMul : X86SchedWritePair;
+defm WriteCLMul : X86SchedWritePair<ReadAfterVecXLd>;
// EMMS/FEMMS
def WriteEMMS : SchedWrite;
@@ -433,13 +468,13 @@ def WriteSTMXCSR : SchedWrite;
def WriteSystem : SchedWrite;
// AVX2.
-defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
-defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
-defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
-defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
-defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
-defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
-defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
+defm WriteFShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width vector shuffles.
+defm WriteFVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width variable shuffles.
+defm WriteShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector shuffles.
+defm WriteVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector variable shuffles.
+defm WriteVarVecShift : X86SchedWritePair<ReadAfterVecXLd>; // Variable vector shifts.
+defm WriteVarVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (YMM).
+defm WriteVarVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (ZMM).
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
index a7f461c456bd..1589ff2ef402 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -43,6 +43,9 @@ def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+def : ReadAdvance<ReadAfterVecLd, 3>;
+def : ReadAdvance<ReadAfterVecXLd, 3>;
+def : ReadAdvance<ReadAfterVecYLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
@@ -78,11 +81,24 @@ def : WriteRes<WriteRMW, [AtomPort0]>;
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
-defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : AtomWriteResPair<WriteIMul8, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
+defm : AtomWriteResPair<WriteIMul16, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul32, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul32Imm, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteIMul32Reg, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
+defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+
+defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
+defm : AtomWriteResPair<WriteCMPXCHG, [AtomPort01], [AtomPort01], 15, 15, [15]>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [AtomPort01, AtomPort0], 1, [1, 1], 1>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
@@ -108,32 +124,16 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
-def : WriteRes<WriteBitTest,[AtomPort01]>;
-
-defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteRes<WriteBitTest, [AtomPort1], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [AtomPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd, [AtomPort01], 9, [9], 1>;
+defm : X86WriteRes<WriteBitTestSet, [AtomPort1], 1, [1], 1>;
+//defm : X86WriteRes<WriteBitTestSetImmLd, [AtomPort1], 1, [1], 1>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [AtomPort1], 1, [1], 1>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [AtomPort1]>;
-def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
- let Latency = 8;
- let ResourceCycles = [8];
-}
-def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
-
-def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
- let Latency = 6;
- let ResourceCycles = [6];
-}
-def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
-
-def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
- let Latency = 14;
- let ResourceCycles = [14];
-}
-def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
- IMUL64rmi8, IMUL64rmi32)>;
-
// Bit counts.
defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
@@ -141,15 +141,19 @@ defm : X86WriteResPairUnsupported<WritePOPCNT>;
defm : X86WriteResPairUnsupported<WriteLZCNT>;
defm : X86WriteResPairUnsupported<WriteTZCNT>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBLS>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteShiftCL, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteRotate, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteRotateCL, [AtomPort0], [AtomPort0]>;
defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
@@ -498,20 +502,13 @@ def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
"MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
-def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
- let Latency = 5;
- let ResourceCycles = [5];
-}
-def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
-
// Port1
def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
let Latency = 1;
let ResourceCycles = [1];
}
def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
-def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
- "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
+def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r")>;
def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
let Latency = 5;
@@ -563,16 +560,14 @@ def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
PUSH16rmm, PUSH32rmm, PUSH64rmm,
LODSB, LODSL, LODSQ, LODSW,
SCASB, SCASL, SCASQ, SCASW)>;
-def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
- "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
- "XADD(8|16|32|64)rr",
- "XCHG(8|16|32|64)(ar|rr)",
+def : InstRW<[AtomWrite01_2], (instregex "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
"(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
"MMX_P(ADD|SUB)Qirr",
"MOV(S|Z)X16rr8",
"MOV(UPS|UPD|DQU)mr",
"MASKMOVDQU(64)?",
"P(ADD|SUB)Qrr")>;
+def : SchedAlias<WriteBitTestSetImmRMW, AtomWrite01_2>;
def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
let Latency = 3;
@@ -616,8 +611,7 @@ def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
SHLD16rri8, SHRD16rri8,
SHLD16mrCL, SHRD16mrCL,
SHLD16mri8, SHRD16mri8)>;
-def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
- "IST_F(P)?(16|32|64)?m",
+def : InstRW<[AtomWrite01_6], (instregex "IST_F(P)?(16|32|64)?m",
"MMX_PH(ADD|SUB)S?Wrm")>;
def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
@@ -639,8 +633,7 @@ def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
let Latency = 9;
let ResourceCycles = [9];
}
-def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
- POPA16, POPA32,
+def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
PUSHF16, PUSHF32, PUSHF64,
SHLD64mrCL, SHRD64mrCL,
SHLD64mri8, SHRD64mri8,
@@ -663,7 +656,7 @@ def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let ResourceCycles = [11];
}
def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
-def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestSetRegRMW, AtomWrite01_11>;
def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
let Latency = 13;
@@ -677,12 +670,6 @@ def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
}
def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
-def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
- let Latency = 15;
- let ResourceCycles = [15];
-}
-def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>;
-
def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
let Latency = 17;
let ResourceCycles = [17];
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td
new file mode 100644
index 000000000000..5798e1b2671b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -0,0 +1,1282 @@
+//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AMD bdver2 (Piledriver) to support
+// instruction scheduling and other instruction cost heuristics.
+// Based on:
+// * AMD Software Optimization Guide for AMD Family 15h Processors.
+// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf
+// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog
+// http://www.agner.org/optimize/microarchitecture.pdf
+// * https://www.realworldtech.com/bulldozer/
+// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2.
+//
+//===----------------------------------------------------------------------===//
+
+def BdVer2Model : SchedMachineModel {
+ let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired.
+ let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed.
+ let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer.
+ let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency.
+ let HighLatency = 25; // FIXME: any better choice?
+ let MispredictPenalty = 20; // Minimum branch misdirection penalty.
+
+ let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
+
+ // FIXME: Incomplete. This flag is set to allow the scheduler to assign
+ // a default model to unrecognized opcodes.
+ let CompleteModel = 0;
+} // SchedMachineModel
+
+let SchedModel = BdVer2Model in {
+
+
+//===----------------------------------------------------------------------===//
+// Pipes
+//===----------------------------------------------------------------------===//
+
+// There are total of eight pipes.
+
+//===----------------------------------------------------------------------===//
+// Integer execution pipes
+//
+
+// Two EX (ALU) pipes.
+def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0
+def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1
+def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>;
+
+// Two AGLU pipes, identical.
+def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23]
+
+//===----------------------------------------------------------------------===//
+// Floating point execution pipes
+//
+
+// Four FPU pipes.
+
+def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0
+def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1
+def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2
+def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3
+
+// FPU grouping
+def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>;
+def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>;
+
+
+//===----------------------------------------------------------------------===//
+// RCU
+//===----------------------------------------------------------------------===//
+
+// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle.
+// On the other hand, the RCU reorder buffer size for Piledriver does not
+// seem be specified in any trustworthy source.
+// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had
+// RCU reorder buffer size of 128. So that is a good guess for now.
+def PdRCU : RetireControlUnit<128, 4>;
+
+
+//===----------------------------------------------------------------------===//
+// Pipelines
+//===----------------------------------------------------------------------===//
+
+// There are total of two pipelines, each one with it's own scheduler.
+
+//===----------------------------------------------------------------------===//
+// Integer Pipeline Scheduling
+//
+
+// There is one Integer Scheduler per core.
+
+// Integer physical register file has 96 registers of 64-bit.
+def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>;
+
+// Unified Integer, Memory Scheduler has 40 entries.
+def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> {
+ // Up to 4 IPC can be decoded, issued, retired.
+ let BufferSize = 40;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FPU Pipeline Scheduling
+//
+
+// The FPU unit is shared between the two cores.
+
+// FP physical register file has 160 registers of 128-bit.
+// Operations on 256-bit data types are cracked into two COPs.
+def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// Unified FP Scheduler has 64 entries,
+def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> {
+ // Up to 4 IPC can be decoded, issued, retired.
+ let BufferSize = 64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Functional units
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Load-Store Units
+//
+
+let Super = PdAGLU01 in
+def PdLoad : ProcResource<2> {
+ // For Piledriver, the load queue is 40 entries deep.
+ let BufferSize = 40;
+}
+
+def PdLoadQueue : LoadQueue<PdLoad>;
+
+let Super = PdAGLU01 in
+def PdStore : ProcResource<1> {
+ // For Piledriver, the store queue is 24 entries deep.
+ let BufferSize = 24;
+}
+
+def PdStoreQueue : StoreQueue<PdStore>;
+
+//===----------------------------------------------------------------------===//
+// Integer Execution Units
+//
+
+def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division
+def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT
+
+def PdMul : ProcResource<1>; // PdEX1; integer multiplication
+def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches
+
+//===----------------------------------------------------------------------===//
+// Floating-Point Units
+//
+
+// Two FMAC/FPFMA units.
+def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1
+
+// One 128-bit integer multiply-accumulate unit.
+def PdFPMMA : ProcResource<1>; // PdFPU0
+
+// One fp conversion unit.
+def PdFPCVT : ProcResource<1>; // PdFPU0
+
+// One unit for shuffles, packs, permutes, shifts.
+def PdFPXBR : ProcResource<1>; // PdFPU1
+
+// Two 128-bit packed integer units.
+def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3
+
+// One FP store unit.
+def PdFPSTO : ProcResource<1>; // PdFPU3
+
+
+//===----------------------------------------------------------------------===//
+// Basic helper classes.
+//===----------------------------------------------------------------------===//
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass PdWriteRes<SchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1> {
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+}
+
+multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat,
+ list<int> Res, int UOps,
+ int LoadLat, int LoadRes, int LoadUOps> {
+ defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+
+ defm : PdWriteRes<SchedRW.Folded,
+ !listconcat([PdLoad], ExePorts),
+ !add(Lat, LoadLat),
+ !if(!and(!empty(Res), !eq(LoadRes, 1)),
+ [],
+ !listconcat([LoadRes], Res)),
+ !add(UOps, LoadUOps)>;
+}
+
+multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0> {
+ defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ /*LoadLat*/4, /*LoadRes*/1, LoadUOps>;
+}
+
+multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0> {
+ defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ /*LoadLat*/5, /*LoadRes*/1, LoadUOps>;
+}
+
+multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat,
+ list<int> Res, int UOps = 2,
+ int LoadUOps = 0> {
+ defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ /*LoadLat*/5, /*LoadRes*/2, LoadUOps>;
+}
+
+//===----------------------------------------------------------------------===//
+// Here be dragons.
+//===----------------------------------------------------------------------===//
+
+// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers
+// needn't be available until 4 cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 5>;
+
+// A folded store needs a cycle on the PdStore for the store data.
+def : WriteRes<WriteRMW, [PdStore]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; }
+def : WriteRes<WriteStore, [PdStore]>;
+def : WriteRes<WriteStoreNT, [PdStore]>;
+def : WriteRes<WriteMove, [PdEX01]>;
+
+// Load/store MXCSR.
+// FIXME: These are copy and pasted from WriteLoad/Store.
+def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
+def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; }
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero, [/*No ExePorts*/]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; }
+def : WriteRes<WriteFence, [PdStore]>;
+
+def PdWriteXLAT : SchedWriteRes<[PdEX01]> {
+ let Latency = 6;
+}
+def : InstRW<[PdWriteXLAT], (instrs XLAT)>;
+
+def PdWriteLARrr : SchedWriteRes<[PdEX01]> {
+ let Latency = 184;
+ let NumMicroOps = 45;
+}
+def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
+ "LSL(16|32|64)rr")>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [PdEX01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteALU, [PdEX01]>;
+
+def PdWriteLXADD : SchedWriteRes<[PdEX01]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
+
+def PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBMI1],
+ (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr,
+ BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr,
+ BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr,
+ BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr,
+ TZMSK32rr, TZMSK64rr)>;
+
+def PdWriteBMI1m : SchedWriteRes<[PdEX01]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBMI1m],
+ (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm,
+ BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm,
+ BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm,
+ BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm,
+ TZMSK32rm, TZMSK64rm)>;
+
+defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>;
+
+defm : PdWriteRes<WriteBSWAP32, [PdEX1]>;
+defm : PdWriteRes<WriteBSWAP64, [PdEX1]>;
+defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [], 5>;
+defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [], 2>;
+defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>;
+
+def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
+
+def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 5;
+}
+def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
+
+def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
+ (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
+
+def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 18;
+}
+def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
+
+def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 22;
+}
+def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
+
+def PdWriteXCHG16rr : SchedWriteRes<[PdEX1]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteXCHG16rr], (instrs XCHG16rr)>;
+
+def PdWriteXADD : SchedWriteRes<[PdEX1]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>;
+
+def PdWriteXADDm : SchedWriteRes<[PdEX1]> {
+let Latency = 6;
+let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
+
+defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4>;
+defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [], 2>;
+defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [], 2>;
+defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4>;
+defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4>;
+defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [], 1, 1>;
+defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4>;
+defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 4]>;
+defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
+defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
+defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
+
+defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
+defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>;
+defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>;
+defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
+
+defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
+defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>;
+defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>;
+defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
+
+defm : PdWriteResExPair<WriteCRC32, [PdEX01], 3, [4], 3>;
+
+def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
+ let Latency = 5;
+ let ResourceCycles = [4];
+ let NumMicroOps = 5;
+}
+def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
+
+def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
+ let Latency = 6;
+ let ResourceCycles = [4];
+ let NumMicroOps = 7;
+}
+def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
+
+def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
+ let Latency = 10;
+ let ResourceCycles = [4];
+ let NumMicroOps = 11;
+}
+def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
+
+defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move.
+defm : PdWriteResExPair<WriteCMOV2, [PdEX01], 1, [], 1, 1>; // Conditional (CF + ZF flag) move.
+
+def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm,
+ CMOVGE16rm, CMOVGE32rm, CMOVGE64rm,
+ CMOVL16rm, CMOVL32rm, CMOVL64rm,
+ CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>;
+
+defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move.
+
+def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc.
+def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>;
+
+def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteSETGEmSETGmSETLEmSETLm], (instrs SETGEm, SETGm,
+ SETLEm, SETLm)>;
+
+defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [], 2>;
+
+def WriteLAHF : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteLAHF], (instrs LAHF)>;
+
+def WriteSAHF : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteSAHF], (instrs SAHF)>;
+
+defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [1], 1>;
+defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [1, 1], 1>;
+defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [1, 1], 7>;
+defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [1], 2>;
+defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>;
+defm : PdWriteRes<WriteBitTestSetImmRMW, [PdEX01, PdLoad], 6, [1, 1], 4>;
+defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>;
+defm : PdWriteRes<WriteBitTestSetRegRMW, [PdEX01, PdLoad], 6, [1, 1], 10>;
+
+// This is for simple LEAs with one or two input operands.
+// FIXME: SAGU 3-operand LEA
+def : WriteRes<WriteLEA, [PdEX01]> { let NumMicroOps = 2; }
+
+// Bit counts.
+defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [4], 6, 2>;
+defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [4], 7, 2>;
+defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4>;
+defm : PdWriteResExPair<WriteLZCNT, [PdEX01], 2, [], 2>;
+defm : PdWriteResExPair<WriteTZCNT, [PdEX01], 2, [2], 2>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [], 2>;
+defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [], 2>;
+defm : PdWriteResExPair<WriteBZHI, [PdEX01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteShift, [PdEX01]>;
+defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>;
+defm : PdWriteResExPair<WriteRotate, [PdEX01]>;
+defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
+
+def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 12;
+ let NumMicroOps = 26;
+}
+def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
+
+def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 12;
+ let NumMicroOps = 23;
+}
+def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
+
+def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 11;
+ let NumMicroOps = 24;
+}
+def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
+
+def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 10;
+ let NumMicroOps = 22;
+}
+def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
+
+def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 10;
+ let NumMicroOps = 19;
+}
+def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
+
+def PdWriteRCL32rCLRCL64rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 7;
+ let NumMicroOps = 17;
+}
+def : InstRW<[PdWriteRCL32rCLRCL64rCL], (instrs RCL32rCL, RCL64rCL)>;
+
+def PdWriteRCR64rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 7;
+ let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCR64rCL], (instrs RCR64rCL)>;
+
+def PdWriteRCR32rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 7;
+ let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCR32rCL ], (instrs RCR32rCL)>;
+
+def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 7;
+ let NumMicroOps = 15;
+}
+def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
+
+
+def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 9;
+ let NumMicroOps = 20;
+}
+def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
+
+def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 11;
+ let NumMicroOps = 21;
+}
+def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
+
+def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 8;
+ let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
+
+def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 13;
+ let NumMicroOps = 25;
+}
+def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
+
+// SHLD/SHRD.
+defm : PdWriteRes<WriteSHDrri, [PdEX01], 4, [6], 6>;
+defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 4, [8], 7>;
+
+def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
+ let Latency = 3;
+ let ResourceCycles = [6];
+ let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>;
+
+def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 4;
+ let ResourceCycles = [8];
+ let NumMicroOps = 7;
+}
+def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
+ SHLD32rrCL,
+ SHRD32rrCL)>;
+
+defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>;
+defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>;
+
+defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5>;
+defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5>;
+defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [], 2>;
+
+defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [1, 1, 2]>;
+defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteFStore, [PdStore, PdFPU1, PdFPSTO], 2>;
+defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU1, PdFPSTO]>;
+defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU1, PdFPSTO], 1, [], 4>;
+
+def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
+
+def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
+ let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>;
+
+defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>;
+
+defm : PdWriteRes<WriteFMaskedStore, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 4], 18>;
+defm : PdWriteRes<WriteFMaskedStoreY, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 4], 34>;
+
+defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
+defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA]>;
+defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+
+defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>;
+
+defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+
+defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+
+defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>;
+defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+
+defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>;
+defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+
+defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+
+def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
+ let Latency = 6;
+}
+def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>;
+
+def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>;
+def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>;
+
+defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+
+defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+
+defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+
+
+defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 3], 15, 2>;
+
+defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 3], 16, 2>;
+defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 6], /*or 29*/ 25, 4>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+
+def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 3];
+ let NumMicroOps = 17;
+}
+def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
+
+defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+
+defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+
+defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 38]>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+
+defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 38]>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+
+defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 21]>;
+defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 21]>;
+defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 42]>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+
+defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 27]>;
+defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 27]>;
+defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 54]>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+
+defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 35]>;
+defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA]>;
+
+defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+
+def PdWriteVFRCZ : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVFRCZ], (instrs VFRCZPDrr, VFRCZPSrr,
+ VFRCZSDrr, VFRCZSSrr)>;
+
+def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
+ VFRCZSDrm, VFRCZSSrm)>;
+
+def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 10;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
+
+def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 15;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
+
+defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+
+defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+
+defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+
+def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>;
+
+defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 4]>;
+defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 6], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+
+defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 4]>;
+defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 6], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+
+defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+
+def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 2;
+}
+def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
+
+def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
+
+def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
+
+def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 8; // 4 + 4
+ let NumMicroOps = 10;
+}
+def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+
+defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+
+defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>;
+
+// FIXME: f+3 ST, LD+STC latency
+defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU1, PdFPSTO], 4, [], 2>;
+// FIXME: .Folded version is one NumMicroOp *less*..
+
+defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+
+defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU1, PdFPSTO], 4, [], 2>;
+// FIXME: .Folded version is one NumMicroOp *less*..
+
+def WriteCVTSI642SDrr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 13;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteCVTSI642SDrr], (instrs CVTSI642SDrr, CVTSI642SSrr)>;
+
+defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU1, PdFPSTO], 4>;
+
+defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU1, PdFPSTO], 4>;
+
+defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+def WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
+ MMX_CVTPI2PDirr)>;
+
+def WriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
+
+defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU1, PdFPSTO], 8, [], 2, 1>;
+defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+
+defm : PdWriteRes<WriteCvtPS2PH, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+
+defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU1, PdFPSTO, PdStore], 4, [], 3>;
+defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU1, PdFPSTO, PdFPFMA, PdStore], 4, [2, 1, 1, 1], 4>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [], 2>;
+
+defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5>;
+
+defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [1, 1, 2]>;
+defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU1, PdFPSTO], 2>;
+defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU1, PdFPSTO]>;
+defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU1, PdFPSTO], 1, [], 4>;
+
+def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
+ let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>;
+
+defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>;
+defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>;
+
+defm : PdWriteRes<WriteVecMaskedStore, [PdStore, PdFPU01, PdFPMAL], 6, [1, 1, 4]>;
+defm : PdWriteRes<WriteVecMaskedStoreY, [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
+
+defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 10>;
+defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 10, [], 2>;
+
+defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecALUY>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+
+defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3>;
+defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3>;
+defm : X86WriteResPairUnsupported<WriteVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+
+defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+
+defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>;
+defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>;
+defm : X86WriteResPairUnsupported<WriteVecIMulY>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+
+defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>;
+defm : X86WriteResPairUnsupported<WritePMULLDY>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+
+def JWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> {
+ let Latency = 4;
+ let ResourceCycles = [2, 1, 2, 1];
+}
+def : InstRW<[JWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
+ VPMACSSDQLrr)>;
+
+defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 2], 9>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+
+defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [], 2>;
+defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [], 2>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+
+defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>;
+
+defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 1]>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 4]>;
+defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 4]>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteBlendY>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+
+defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 4]>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+
+defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecLogicY>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+
+defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+
+defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>;
+defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>;
+
+defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector insert/extract operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [], 2>;
+defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [], 2>;
+
+defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [], 2>;
+
+def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 3;
+}
+def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 14, [1, 2, 1], 7, 1>;
+defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 6, [1, 2, 1], 7, 2>;
+
+defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 15, [1, 2, 6, 4, 1, 1], 27, 1>;
+defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 2, 6, 4, 1, 1], 27, 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
+
+defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
+// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
+
+defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>;
+defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>;
+defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [], 3, 1>;
+defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [2, 1], 8, 2>;
+defm : X86WriteResPairUnsupported<WriteFHAddZ>;
+
+defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [], 3, 1>;
+defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
+defm : X86WriteResPairUnsupported<WritePHAddZ>;
+
+def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr,
+ PHADDWrr, PHSUBWrr,
+ PHADDSWrr, PHSUBSWrr,
+ VPHADDDrr, VPHSUBDrr,
+ VPHADDWrr, VPHSUBWrr,
+ VPHADDSWrr, VPHSUBSWrr)>;
+
+def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm,
+ PHADDWrm, PHSUBWrm,
+ PHADDSWrm, PHSUBSWrm,
+ VPHADDDrm, VPHSUBDrm,
+ VPHADDWrm, VPHSUBWrm,
+ VPHADDSWrm, VPHSUBSWrm)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [], 5, 1>;
+
+def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
+ let Latency = 13;
+ let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE4A instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 4];
+}
+def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AVX instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 2, 4];
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
+ VBROADCASTSSYrm)>;
+
+def PdWriteVZEROALL : SchedWriteRes<[]> {
+ let Latency = 90;
+ let NumMicroOps = 32;
+}
+def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>;
+
+def PdWriteVZEROUPPER : SchedWriteRes<[]> {
+ let Latency = 46;
+ let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>;
+
+///////////////////////////////////////////////////////////////////////////////
+// SchedWriteVariant definitions.
+///////////////////////////////////////////////////////////////////////////////
+
+def PdWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def PdWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
+]>;
+def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def PdWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
+]>;
+def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
+ XORPDrr, VXORPDrr,
+ ANDNPSrr, VANDNPSrr,
+ ANDNPDrr, VANDNPDrr)>;
+
+// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1.
+
+def PdWriteVZeroIdiomLogic : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
+
+def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+ PANDNrr, VPANDNrr)>;
+
+def PdWriteVZeroIdiomALU : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
+ MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PCMPGTBirr,
+ MMX_PCMPGTDirr,
+ MMX_PCMPGTWirr)>;
+
+def PdWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+ PSUBDrr, VPSUBDrr,
+ PSUBQrr, VPSUBQrr,
+ PSUBWrr, VPSUBWrr,
+ PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+// VPCMPGTQ, but not PCMPGTQ!
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // MMX Zero-idioms.
+ DepBreakingClass<[
+ MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
+ MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
+ MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+ // int variants.
+ PXORrr, PANDNrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+ // xmm int variants.
+ VPXORrr, VPANDNrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
+ ], ZeroIdiomPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+ // GPR
+ DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+ DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+ // MMX
+ DepBreakingClass<[
+ MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE
+ DepBreakingClass<[
+ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr
+ // But not PCMPEQQrr.
+ ], ZeroIdiomPredicate>,
+
+ // AVX
+ DepBreakingClass<[
+ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr
+ // But not VPCMPEQQrr.
+ ], ZeroIdiomPredicate>
+]>;
+
+
+} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 719e71cd25e5..33a6b01546d7 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -48,12 +48,22 @@ def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
// part of it.
// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
// access" - Agner Fog's "microarchitecture.pdf".
-def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
+def JIntegerPRF : RegisterFile<64, [GR64, CCR], [1, 1], [1, 0],
+ 0, // Max moves that can be eliminated per cycle.
+ 1>; // Restrict move elimination to zero regs.
// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
// registers. Operations on 256-bit data types are cracked into two COPs.
// Reference: www.realworldtech.com/jaguar/4/
-def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// The PRF in the floating point unit can eliminate a move from a MMX or SSE
+// register that is know to be zero (i.e. it has been zeroed using a zero-idiom
+// dependency breaking instruction, or via VZEROALL).
+// Reference: Section 21.8 "AMD Bobcat and Jaguar pipeline: Dependency-breaking
+// instructions" - Agner Fog's "microarchitecture.pdf"
+def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2], [1, 1, 0],
+ 0, // Max moves that can be eliminated per cycle.
+ 1>; // Restrict move elimination to zero regs.
// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
// retire up to two macro-ops per cycle.
@@ -93,6 +103,12 @@ def JVALU : ProcResGroup<[JVALU0, JVALU1]>;
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available until 5
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 5>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
@@ -100,7 +116,8 @@ def : ReadAdvance<ReadAfterLd, 3>;
// folded loads.
multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [], int UOps = 1> {
+ int Lat, list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -113,13 +130,14 @@ multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 3);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [], int UOps = 1> {
+ int Lat, list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -132,13 +150,14 @@ multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [2], int UOps = 2> {
+ int Lat, list<int> Res = [2], int UOps = 2,
+ int LoadUOps = 0> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -151,12 +170,13 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
let ResourceCycles = !listconcat([2], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
-// A folded store needs a cycle on the SAGU for the store data.
-def : WriteRes<WriteRMW, [JSAGU]>;
+// A folded store needs a cycle on the SAGU for the store data,
+// most RMW instructions don't need an extra uop.
+defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>;
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
@@ -164,12 +184,24 @@ def : WriteRes<WriteRMW, [JSAGU]>;
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteADC, [JALU01], 1, [2]>;
-defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
-defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
-defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHG,[JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[JALU01, JSAGU, JLAGU], 4, [1, 1, 1], 2>;
+defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>;
+
+defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 2>;
+defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
@@ -188,27 +220,37 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
-def : WriteRes<WriteBitTest,[JALU01]>;
+
+defm : X86WriteRes<WriteBitTest, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [JALU01,JLAGU], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd, [JALU01,JLAGU], 4, [1,1], 5>;
+defm : X86WriteRes<WriteBitTestSet, [JALU01], 1, [1], 2>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [JALU01,JLAGU], 4, [1,1], 4>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [JALU01,JLAGU], 4, [1,1], 8>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
// Bit counts.
-defm : JWriteResIntPair<WriteBSF, [JALU01], 5, [4], 8>;
-defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [4], 8>;
+defm : JWriteResIntPair<WriteBSF, [JALU01], 4, [8], 7>;
+defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [8], 8>;
defm : JWriteResIntPair<WritePOPCNT, [JALU01], 1>;
defm : JWriteResIntPair<WriteLZCNT, [JALU01], 1>;
-defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2]>;
+defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2], 2>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
+defm : JWriteResIntPair<WriteBLS, [JALU01], 2, [2], 2>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
+defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
+defm : JWriteResIntPair<WriteShiftCL, [JALU01], 1>;
+defm : JWriteResIntPair<WriteRotate, [JALU01], 1>;
+defm : JWriteResIntPair<WriteRotateCL, [JALU01], 1>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
@@ -269,8 +311,8 @@ defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
-defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 2, 2], 1>;
+defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 4, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
@@ -364,21 +406,21 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
-defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
-defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
+defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [4, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [6, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
-defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
+defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
// Conversions.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPU0, JFPA, JALU0], 7, [1,1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPS2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2IY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPU0, JFPA, JALU0], 7, [1,1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
@@ -423,8 +465,8 @@ defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1],
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
-defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 2, 2], 1>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 4, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
@@ -462,26 +504,26 @@ defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
-defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
+defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2], 3>;
defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>;
defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
-defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
+defm : JWriteResFpuPair<WritePHMINPOS, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
-defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 1], 1>;
defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [4, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
@@ -507,8 +549,8 @@ defm : X86WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPA, JALU0], 7, [1, 2, 1, 1], 3>;
-defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPU0, JFPA, JALU0], 7, [2, 2, 1, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPU0, JFPA, JALU0], 8, [2, 2, 1, 1, 1], 3>;
defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
@@ -527,7 +569,7 @@ def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
defm : JWriteResFpuPair<WriteAESIMC, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteAESKeyGen, [JFPU0, JVIMUL], 2>;
-defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1, 1], 2>;
+defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU01, JVALU, JFPU0, JVIMUL], 3, [1,1,1,1], 2>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
@@ -559,13 +601,17 @@ def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
+def JWriteVecExtractF128: SchedWriteRes<[JFPU01, JFPX]>;
+def : InstRW<[JWriteVecExtractF128], (instrs VEXTRACTF128rr)>;
+
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
- VBROADCASTSSYrm)>;
+def : InstRW<[JWriteVBROADCASTYLd], (instrs VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VBROADCASTF128)>;
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
@@ -587,6 +633,10 @@ def JWriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
}
+def JWriteZeroIdiomYmm : SchedWriteRes<[JFPU01, JFPX]> {
+ let NumMicroOps = 2;
+}
+
// Certain instructions that use the same register for both source
// operands do not have a real dependency on the previous contents of the
// register, and thus, do not have to wait before completing. They can be
@@ -598,54 +648,73 @@ def JWriteZeroLatency : SchedWriteRes<[]> {
def JWriteZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
+ SchedVar<NoSchedPred, [WriteALU]>
]>;
def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
XOR32rr, XOR64rr)>;
def JWriteFZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
+ SchedVar<NoSchedPred, [WriteFLogic]>
]>;
def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
ANDNPSrr, VANDNPSrr,
ANDNPDrr, VANDNPDrr)>;
+def JWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroIdiomYmm]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[JWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+ VANDNPSYrr, VANDNPDYrr)>;
+
def JWriteVZeroIdiomLogic : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
+ SchedVar<NoSchedPred, [WriteVecLogic]>
]>;
def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
]>;
def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
PANDNrr, VPANDNrr)>;
def JWriteVZeroIdiomALU : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
+ SchedVar<NoSchedPred, [WriteVecALU]>
]>;
def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PSUBSBirr, MMX_PSUBSWirr,
+ MMX_PSUBUSBirr, MMX_PSUBUSWirr,
MMX_PCMPGTBirr, MMX_PCMPGTDirr,
MMX_PCMPGTWirr)>;
def JWriteVZeroIdiomALUX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
+ SchedVar<NoSchedPred, [WriteVecALUX]>
]>;
def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
PSUBDrr, VPSUBDrr,
PSUBQrr, VPSUBQrr,
PSUBWrr, VPSUBWrr,
+ PSUBSBrr, VPSUBSBrr,
+ PSUBSWrr, VPSUBSWrr,
+ PSUBUSBrr, VPSUBUSBrr,
+ PSUBUSWrr, VPSUBUSWrr,
PCMPGTBrr, VPCMPGTBrr,
PCMPGTDrr, VPCMPGTDrr,
PCMPGTQrr, VPCMPGTQrr,
PCMPGTWrr, VPCMPGTWrr)>;
+def JWriteVPERM2F128 : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomVPERMPredicate>, [JWriteZeroIdiomYmm]>,
+ SchedVar<NoSchedPred, [WriteFShuffle256]>
+]>;
+def : InstRW<[JWriteVPERM2F128], (instrs VPERM2F128rr)>;
+
// This write is used for slow LEA instructions.
def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
let Latency = 2;
@@ -666,8 +735,8 @@ def JSlowLEAPredicate : MCSchedPredicate<
>;
def JWriteLEA : SchedWriteVariant<[
- SchedVar<JSlowLEAPredicate, [JWrite3OpsLEA]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteLEA]>
+ SchedVar<JSlowLEAPredicate, [JWrite3OpsLEA]>,
+ SchedVar<NoSchedPred, [WriteLEA]>
]>;
def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
@@ -679,4 +748,91 @@ def JSlowLEA16r : SchedWriteRes<[JALU01]> {
def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // MMX Zero-idioms.
+ DepBreakingClass<[
+ MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
+ MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
+ MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+ // int variants.
+ PXORrr, PANDNrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+ // xmm int variants.
+ VPXORrr, VPANDNrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
+ ], ZeroIdiomPredicate>,
+
+ DepBreakingClass<[ VPERM2F128rr ], ZeroIdiomVPERMPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+ // GPR
+ DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+ DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+ // MMX
+ DepBreakingClass<[
+ MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE
+ DepBreakingClass<[
+ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX
+ DepBreakingClass<[
+ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
+ ], ZeroIdiomPredicate>
+]>;
+
+def : IsOptimizableRegisterMove<[
+ InstructionEquivalenceClass<[
+ // GPR variants.
+ MOV32rr, MOV64rr,
+
+ // MMX variants.
+ MMX_MOVQ64rr,
+
+ // SSE variants.
+ MOVAPSrr, MOVUPSrr,
+ MOVAPDrr, MOVUPDrr,
+ MOVDQArr, MOVDQUrr,
+
+ // AVX variants.
+ VMOVAPSrr, VMOVUPSrr,
+ VMOVAPDrr, VMOVUPDrr,
+ VMOVDQArr, VMOVDQUrr
+ ], TruePred >
+]>;
+
} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
index b1e843013707..fcaff7cf810f 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -49,6 +49,9 @@ def SLMFPDivider : ProcResource<1>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+def : ReadAdvance<ReadAfterVecLd, 3>;
+def : ReadAdvance<ReadAfterVecXLd, 3>;
+def : ReadAdvance<ReadAfterVecYLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
@@ -95,13 +98,28 @@ def : InstRW<[WriteMove], (instrs COPY)>;
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
+
+defm : SLMWriteResPair<WriteIMul8, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>;
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1, 2], 2>;
+defm : X86WriteRes<WriteXCHG, [SLM_IEC_RSV01], 1, [1], 1>;
-defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteShiftCL, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteRotate, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteRotateCL, [SLM_IEC_RSV0], 1>;
defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>;
defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>;
@@ -119,8 +137,13 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
// FIXME Latency and NumMicrOps?
let ResourceCycles = [2,1];
}
-def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
-def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
+defm : X86WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestSet, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -134,8 +157,9 @@ defm : SLMWriteResPair<WriteLZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WriteTZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBLS>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 7184b850a195..a866f843106b 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -87,9 +87,14 @@ def ZnMultiplier : ProcResource<1>;
// Integer division issued on ALU2.
def ZnDivider : ProcResource<1>;
-// 4 Cycles load-to use Latency is captured
+// 4 Cycles integer load-to use Latency is captured
def : ReadAdvance<ReadAfterLd, 4>;
+// 8 Cycles vector load-to use Latency is captured
+def : ReadAdvance<ReadAfterVecLd, 8>;
+def : ReadAdvance<ReadAfterVecXLd, 8>;
+def : ReadAdvance<ReadAfterVecYLd, 8>;
+
// The Integer PRF for Zen is 168 entries, and it holds the architectural and
// speculative version of the 64-bit integer registers.
// Reference: "Software Optimization Guide for AMD Family 17h Processors"
@@ -177,13 +182,28 @@ def : WriteRes<WriteZero, []>;
def : WriteRes<WriteLEA, [ZnALU]>;
defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
-defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+
+defm : ZnWriteResPair<WriteIMul8, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16Imm, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16Reg, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32Imm, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32Reg, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+//defm : ZnWriteResPair<WriteIMul64Imm, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+//defm : ZnWriteResPair<WriteIMul64Reg, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [ZnALU], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[ZnALU,ZnAGU], 8, [1,1], 5>;
+defm : X86WriteRes<WriteXCHG, [ZnALU], 1, [2], 2>;
-defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteShiftCL, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteRotate, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteRotateCL, [ZnALU], 1>;
defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteSHDrrcl>;
@@ -198,7 +218,13 @@ defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>;
def : WriteRes<WriteSETCC, [ZnALU]>;
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
-def : WriteRes<WriteBitTest,[ZnALU]>;
+
+defm : X86WriteRes<WriteBitTest, [ZnALU], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [ZnALU], 2, [1], 2>;
+//defm : X86WriteRes<WriteBitTestSetImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
// Bit counts.
defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
@@ -210,9 +236,10 @@ defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
+//defm : ZnWriteResPair<WriteBLS, [ZnALU], 2>;
+defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
// IDIV
defm : ZnWriteResPair<WriteDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
@@ -492,21 +519,13 @@ def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>;
//-- Move instructions --//
// MOV.
// r16,m.
-def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
+def : InstRW<[WriteALULd, ReadAfterLd], (instrs MOV16rm)>;
// MOVSX, MOVZX.
// r,m.
def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
// XCHG.
-// r,r.
-def ZnWriteXCHG : SchedWriteRes<[ZnALU]> {
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-
-def : InstRW<[ZnWriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
-
// r,m.
def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
let Latency = 5;
@@ -522,7 +541,7 @@ def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
let Latency = 5;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWritePop16r], (instregex "POP16rmm")>;
+def : InstRW<[ZnWritePop16r], (instrs POP16rmm)>;
def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
@@ -582,45 +601,51 @@ def : InstRW<[WriteALULd],
def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 3;
}
-def : InstRW<[ZnWriteMul16], (instrs IMUL16r, MUL16r)>;
-def : InstRW<[ZnWriteMul16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16, ZnWriteMul16>;
+def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right?
+def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right?
+def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
// m16.
def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instrs IMUL16m, MUL16m)>;
+def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>;
// r32.
def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 3;
}
-def : InstRW<[ZnWriteMul32], (instrs IMUL32r, MUL32r)>;
-def : InstRW<[ZnWriteMul32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul32], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32, ZnWriteMul32>;
+def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right?
+def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right?
+def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
// m32.
def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instrs IMUL32m, MUL32m)>;
+def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>;
// r64.
def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 4;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteMul64], (instrs IMUL64r, MUL64r)>;
-def : InstRW<[ZnWriteMul64], (instrs IMUL64rr, IMUL64rri8, IMUL64rri32)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul64], (instrs IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64, ZnWriteMul64>;
+def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right?
+def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right?
+def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
// m64.
def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 9;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instrs IMUL64m, MUL64m)>;
+def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
// MULX.
// r32,r32,r32.
@@ -696,31 +721,21 @@ def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
let Latency = 6;
}
-// BT.
-// m,i.
-def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
-
// BTR BTS BTC.
-// r,r,i.
-def ZnWriteBTRSC : SchedWriteRes<[ZnALU]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-def : InstRW<[ZnWriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
-
// m,r,i.
def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
let Latency = 6;
let NumMicroOps = 2;
}
// m,r,i.
-def : InstRW<[ZnWriteBTRSCm], (instregex "BT(R|S|C)(16|32|64)m(r|i8)")>;
+def : SchedAlias<WriteBitTestSetImmRMW, ZnWriteBTRSCm>;
+def : SchedAlias<WriteBitTestSetRegRMW, ZnWriteBTRSCm>;
// BLSI BLSMSK BLSR.
// r,r.
-def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>;
+def : SchedAlias<WriteBLS, ZnWriteALULat2>;
// r,m.
-def : InstRW<[ZnWriteALULat2Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
+def : SchedAlias<WriteBLSLd, ZnWriteALULat2Ld>;
// CLD STD.
def : InstRW<[WriteALU], (instrs STD, CLD)>;
@@ -750,13 +765,6 @@ def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
//-- Misc instructions --//
-// CMPXCHG.
-def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> {
- let Latency = 8;
- let NumMicroOps = 5;
-}
-def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
-
// CMPXCHG8B.
def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
let NumMicroOps = 18;
@@ -782,10 +790,10 @@ def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
// RDRAND.
-def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
+def : InstRW<[WriteMicrocoded], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
// XGETBV.
-def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
+def : InstRW<[WriteMicrocoded], (instrs XGETBV)>;
//-- String instructions --//
// CMPS.
@@ -807,6 +815,8 @@ def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
// XADD.
+def ZnXADD : SchedWriteRes<[ZnALU]>;
+def : InstRW<[ZnXADD], (instregex "XADD(8|16|32|64)rr")>;
def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
//=== Floating Point x87 Instructions ===//
@@ -821,16 +831,16 @@ def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
// LD_F.
// r.
-def : InstRW<[ZnWriteFLDr], (instregex "LD_Frr")>;
+def : InstRW<[ZnWriteFLDr], (instrs LD_Frr)>;
// m.
def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteLD_F80m], (instregex "LD_F80m")>;
+def : InstRW<[ZnWriteLD_F80m], (instrs LD_F80m)>;
// FBLD.
-def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
+def : InstRW<[WriteMicrocoded], (instrs FBLDm)>;
// FST(P).
// r.
@@ -840,11 +850,11 @@ def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
let Latency = 5;
}
-def : InstRW<[ZnWriteST_FP80m], (instregex "ST_FP80m")>;
+def : InstRW<[ZnWriteST_FP80m], (instrs ST_FP80m)>;
// FBSTP.
// m80.
-def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
+def : InstRW<[WriteMicrocoded], (instrs FBSTPm)>;
def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
@@ -901,10 +911,10 @@ def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>;
def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
// FNSAVE.
-def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
+def : InstRW<[WriteMicrocoded], (instrs FSAVEm)>;
// FRSTOR.
-def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
+def : InstRW<[WriteMicrocoded], (instrs FRSTORm)>;
//-- Arithmetic instructions --//
@@ -1401,46 +1411,46 @@ def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
// SHA1MSG2
// x,x.
def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
-def : InstRW<[ZnWriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
+def : InstRW<[ZnWriteSHA1MSG2r], (instrs SHA1MSG2rr)>;
// x,m.
def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
+def : InstRW<[ZnWriteSHA1MSG2Ld], (instrs SHA1MSG2rm)>;
// SHA1NEXTE
// x,x.
def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
-def : InstRW<[ZnWriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
+def : InstRW<[ZnWriteSHA1NEXTEr], (instrs SHA1NEXTErr)>;
// x,m.
def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
+def : InstRW<[ZnWriteSHA1NEXTELd], (instrs SHA1NEXTErm)>;
// SHA1RNDS4
// x,x.
def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
let Latency = 6;
}
-def : InstRW<[ZnWriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
+def : InstRW<[ZnWriteSHA1RNDS4r], (instrs SHA1RNDS4rri)>;
// x,m.
def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
let Latency = 13;
}
-def : InstRW<[ZnWriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
+def : InstRW<[ZnWriteSHA1RNDS4Ld], (instrs SHA1RNDS4rmi)>;
// SHA256RNDS2
// x,x.
def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
let Latency = 4;
}
-def : InstRW<[ZnWriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
+def : InstRW<[ZnWriteSHA256RNDS2r], (instrs SHA256RNDS2rr)>;
// x,m.
def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
let Latency = 11;
}
-def : InstRW<[ZnWriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
+def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
//-- Arithmetic instructions --//
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index e131f1a1e4bd..008a9ec2ba3c 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -170,10 +170,11 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
InFlag = Chain.getValue(1);
}
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+ bool Use64BitRegs = Subtarget.isTarget64BitLP64();
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
@@ -249,20 +250,21 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
if (Repeats.BytesLeft() > 0 &&
DAG.getMachineFunction().getFunction().optForMinSize()) {
- // When agressively optimizing for size, avoid generating the code to
+ // When aggressively optimizing for size, avoid generating the code to
// handle BytesLeft.
Repeats.AVT = MVT::i8;
}
}
+ bool Use64BitRegs = Subtarget.isTarget64BitLP64();
SDValue InFlag;
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RSI : X86::ESI,
Src, InFlag);
InFlag = Chain.getValue(1);
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index c7ddf93f8e85..720be8afa62c 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -112,11 +112,10 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
return true;
}
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- (void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+void DecodePSHUFBMask(const Constant *C, unsigned Width,
+ SmallVectorImpl<int> &ShuffleMask) {
+ assert((Width == 128 || Width == 256 || Width == 512) &&
+ C->getType()->getPrimitiveSizeInBits() >= Width &&
"Unexpected vector size.");
// The shuffle mask requires a byte vector.
@@ -125,7 +124,7 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / 8;
assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
"Unexpected number of vector elements.");
@@ -151,12 +150,10 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- (void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+ assert((Width == 128 || Width == 256 || Width == 512) &&
+ C->getType()->getPrimitiveSizeInBits() >= Width &&
"Unexpected vector size.");
assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
@@ -166,7 +163,7 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / ElSize;
unsigned NumEltsPerLane = 128 / ElSize;
assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
"Unexpected number of vector elements.");
@@ -189,11 +186,13 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
}
void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
+ unsigned Width,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
(void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
+ assert((MaskTySize == 128 || MaskTySize == 256) &&
+ Width >= MaskTySize && "Unexpected vector size.");
// The shuffle mask requires elements the same size as the target.
APInt UndefElts;
@@ -201,7 +200,7 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / ElSize;
unsigned NumEltsPerLane = 128 / ElSize;
assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
"Unexpected number of vector elements.");
@@ -242,9 +241,12 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
}
}
-void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
- assert(C->getType()->getPrimitiveSizeInBits() == 128 &&
- "Unexpected vector size.");
+void DecodeVPPERMMask(const Constant *C, unsigned Width,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+ (void)MaskTySize;
+ assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");
// The shuffle mask requires a byte vector.
APInt UndefElts;
@@ -252,7 +254,7 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / 8;
assert(NumElts == 16 && "Unexpected number of vector elements.");
for (unsigned i = 0; i != NumElts; ++i) {
@@ -291,12 +293,10 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- (void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+ assert((Width == 128 || Width == 256 || Width == 512) &&
+ C->getType()->getPrimitiveSizeInBits() >= Width &&
"Unexpected vector size.");
assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
"Unexpected vector element size.");
@@ -307,7 +307,7 @@ void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / ElSize;
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
@@ -319,12 +319,10 @@ void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
}
}
-void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- (void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+ assert((Width == 128 || Width == 256 || Width == 512) &&
+ C->getType()->getPrimitiveSizeInBits() >= Width &&
"Unexpected vector size.");
assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
"Unexpected vector element size.");
@@ -335,7 +333,7 @@ void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / ElSize;
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
index b703cbbd2b29..b08c31935d28 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -26,25 +26,28 @@ class Constant;
class MVT;
/// Decode a PSHUFB mask from an IR-level vector constant.
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFBMask(const Constant *C, unsigned Width,
+ SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILP2 variable mask from an IR-level vector constant.
void DecodeVPERMIL2PMask(const Constant *C, unsigned MatchImm, unsigned ElSize,
+ unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPPERM variable mask from an IR-level vector constant.
-void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPPERMMask(const Constant *C, unsigned Width,
+ SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 078fe1598f13..a729161a1beb 100644
--- a/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -61,7 +61,7 @@
using namespace llvm;
-#define PASS_KEY "x86-speculative-load-hardening"
+#define PASS_KEY "x86-slh"
#define DEBUG_TYPE PASS_KEY
STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
@@ -75,6 +75,11 @@ STATISTIC(NumCallsOrJumpsHardened,
STATISTIC(NumInstsInserted, "Number of instructions inserted");
STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
+static cl::opt<bool> EnableSpeculativeLoadHardening(
+ "x86-speculative-load-hardening",
+ cl::desc("Force enable speculative load hardening"), cl::init(false),
+ cl::Hidden);
+
static cl::opt<bool> HardenEdgesWithLFENCE(
PASS_KEY "-lfence",
cl::desc(
@@ -114,12 +119,6 @@ static cl::opt<bool> HardenIndirectCallsAndJumps(
"mitigate Spectre v1.2 style attacks."),
cl::init(true), cl::Hidden);
-namespace llvm {
-
-void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
-
-} // end namespace llvm
-
namespace {
class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
@@ -179,6 +178,9 @@ private:
void unfoldCallAndJumpLoads(MachineFunction &MF);
+ SmallVector<MachineInstr *, 16>
+ tracePredStateThroughIndirectBranches(MachineFunction &MF);
+
void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
unsigned saveEFLAGS(MachineBasicBlock &MBB,
@@ -401,6 +403,12 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
<< " **********\n");
+ // Only run if this pass is forced enabled or we detect the relevant function
+ // attribute requesting SLH.
+ if (!EnableSpeculativeLoadHardening &&
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ return false;
+
Subtarget = &MF.getSubtarget<X86Subtarget>();
MRI = &MF.getRegInfo();
TII = Subtarget->getInstrInfo();
@@ -522,11 +530,16 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
}
}
- // If we are going to harden calls and jumps we need to unfold their memory
- // operands.
- if (HardenIndirectCallsAndJumps)
+ if (HardenIndirectCallsAndJumps) {
+ // If we are going to harden calls and jumps we need to unfold their memory
+ // operands.
unfoldCallAndJumpLoads(MF);
+ // Then we trace predicate state through the indirect branches.
+ auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
+ CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
+ }
+
// Now that we have the predicate state available at the start of each block
// in the CFG, trace it through each block, hardening vulnerable instructions
// as we go.
@@ -809,7 +822,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
"split above!");
// Sort and unique the codes to minimize them.
- llvm::sort(UncondCodeSeq.begin(), UncondCodeSeq.end());
+ llvm::sort(UncondCodeSeq);
UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
UncondCodeSeq.end());
@@ -925,6 +938,265 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
}
}
+/// Trace the predicate state through indirect branches, instrumenting them to
+/// poison the state if a target is reached that does not match the expected
+/// target.
+///
+/// This is designed to mitigate Spectre variant 1 attacks where an indirect
+/// branch is trained to predict a particular target and then mispredicts that
+/// target in a way that can leak data. Despite using an indirect branch, this
+/// is really a variant 1 style attack: it does not steer execution to an
+/// arbitrary or attacker controlled address, and it does not require any
+/// special code executing next to the victim. This attack can also be mitigated
+/// through retpolines, but those require either replacing indirect branches
+/// with conditional direct branches or lowering them through a device that
+/// blocks speculation. This mitigation can replace these retpoline-style
+/// mitigations for jump tables and other indirect branches within a function
+/// when variant 2 isn't a risk while allowing limited speculation. Indirect
+/// calls, however, cannot be mitigated through this technique without changing
+/// the ABI in a fundamental way.
+SmallVector<MachineInstr *, 16>
+X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
+ MachineFunction &MF) {
+ // We use the SSAUpdater to insert PHI nodes for the target addresses of
+ // indirect branches. We don't actually need the full power of the SSA updater
+ // in this particular case as we always have immediately available values, but
+ // this avoids us having to re-implement the PHI construction logic.
+ MachineSSAUpdater TargetAddrSSA(MF);
+ TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
+
+ // Track which blocks were terminated with an indirect branch.
+ SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
+
+ // We need to know what blocks end up reached via indirect branches. We
+ // expect this to be a subset of those whose address is taken and so track it
+ // directly via the CFG.
+ SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
+
+ // Walk all the blocks which end in an indirect branch and make the
+ // target address available.
+ for (MachineBasicBlock &MBB : MF) {
+ // Find the last terminator.
+ auto MII = MBB.instr_rbegin();
+ while (MII != MBB.instr_rend() && MII->isDebugInstr())
+ ++MII;
+ if (MII == MBB.instr_rend())
+ continue;
+ MachineInstr &TI = *MII;
+ if (!TI.isTerminator() || !TI.isBranch())
+ // No terminator or non-branch terminator.
+ continue;
+
+ unsigned TargetReg;
+
+ switch (TI.getOpcode()) {
+ default:
+ // Direct branch or conditional branch (leading to fallthrough).
+ continue;
+
+ case X86::FARJMP16m:
+ case X86::FARJMP32m:
+ case X86::FARJMP64:
+ // We cannot mitigate far jumps or calls, but we also don't expect them
+ // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
+ continue;
+
+ case X86::JMP16m:
+ case X86::JMP16m_NT:
+ case X86::JMP32m:
+ case X86::JMP32m_NT:
+ case X86::JMP64m:
+ case X86::JMP64m_NT:
+ // Mostly as documentation.
+ report_fatal_error("Memory operand jumps should have been unfolded!");
+
+ case X86::JMP16r:
+ report_fatal_error(
+ "Support for 16-bit indirect branches is not implemented.");
+ case X86::JMP32r:
+ report_fatal_error(
+ "Support for 32-bit indirect branches is not implemented.");
+
+ case X86::JMP64r:
+ TargetReg = TI.getOperand(0).getReg();
+ }
+
+ // We have definitely found an indirect branch. Verify that there are no
+ // preceding conditional branches as we don't yet support that.
+ if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
+ return !OtherTI.isDebugInstr() && &OtherTI != &TI;
+ })) {
+ LLVM_DEBUG({
+ dbgs() << "ERROR: Found other terminators in a block with an indirect "
+ "branch! This is not yet supported! Terminator sequence:\n";
+ for (MachineInstr &MI : MBB.terminators()) {
+ MI.dump();
+ dbgs() << '\n';
+ }
+ });
+ report_fatal_error("Unimplemented terminator sequence!");
+ }
+
+ // Make the target register an available value for this block.
+ TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
+ IndirectTerminatedMBBs.insert(&MBB);
+
+ // Add all the successors to our target candidates.
+ for (MachineBasicBlock *Succ : MBB.successors())
+ IndirectTargetMBBs.insert(Succ);
+ }
+
+ // Keep track of the cmov instructions we insert so we can return them.
+ SmallVector<MachineInstr *, 16> CMovs;
+
+ // If we didn't find any indirect branches with targets, nothing to do here.
+ if (IndirectTargetMBBs.empty())
+ return CMovs;
+
+ // We found indirect branches and targets that need to be instrumented to
+ // harden loads within them. Walk the blocks of the function (to get a stable
+ // ordering) and instrument each target of an indirect branch.
+ for (MachineBasicBlock &MBB : MF) {
+ // Skip the blocks that aren't candidate targets.
+ if (!IndirectTargetMBBs.count(&MBB))
+ continue;
+
+ // We don't expect EH pads to ever be reached via an indirect branch. If
+ // this is desired for some reason, we could simply skip them here rather
+ // than asserting.
+ assert(!MBB.isEHPad() &&
+ "Unexpected EH pad as target of an indirect branch!");
+
+ // We should never end up threading EFLAGS into a block to harden
+ // conditional jumps as there would be an additional successor via the
+ // indirect branch. As a consequence, all such edges would be split before
+ // reaching here, and the inserted block will handle the EFLAGS-based
+ // hardening.
+ assert(!MBB.isLiveIn(X86::EFLAGS) &&
+ "Cannot check within a block that already has live-in EFLAGS!");
+
+ // We can't handle having non-indirect edges into this block unless this is
+ // the only successor and we can synthesize the necessary target address.
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ // If we've already handled this by extracting the target directly,
+ // nothing to do.
+ if (IndirectTerminatedMBBs.count(Pred))
+ continue;
+
+ // Otherwise, we have to be the only successor. We generally expect this
+ // to be true as conditional branches should have had a critical edge
+ // split already. We don't however need to worry about EH pad successors
+ // as they'll happily ignore the target and their hardening strategy is
+ // resilient to all ways in which they could be reached speculatively.
+ if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
+ return Succ->isEHPad() || Succ == &MBB;
+ })) {
+ LLVM_DEBUG({
+ dbgs() << "ERROR: Found conditional entry to target of indirect "
+ "branch!\n";
+ Pred->dump();
+ MBB.dump();
+ });
+ report_fatal_error("Cannot harden a conditional entry to a target of "
+ "an indirect branch!");
+ }
+
+ // Now we need to compute the address of this block and install it as a
+ // synthetic target in the predecessor. We do this at the bottom of the
+ // predecessor.
+ auto InsertPt = Pred->getFirstTerminator();
+ unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // Directly materialize it into an immediate.
+ auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
+ TII->get(X86::MOV64ri32), TargetReg)
+ .addMBB(&MBB);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
+ dbgs() << "\n");
+ } else {
+ auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
+ TargetReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addMBB(&MBB)
+ .addReg(/*Segment*/ 0);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
+ dbgs() << "\n");
+ }
+ // And make this available.
+ TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
+ }
+
+ // Materialize the needed SSA value of the target. Note that we need the
+ // middle of the block as this block might at the bottom have an indirect
+ // branch back to itself. We can do this here because at this point, every
+ // predecessor of this block has an available value. This is basically just
+ // automating the construction of a PHI node for this target.
+ unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
+
+ // Insert a comparison of the incoming target register with this block's
+ // address. This also requires us to mark the block as having its address
+ // taken explicitly.
+ MBB.setHasAddressTaken();
+ auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // Check directly against a relocated immediate when we can.
+ auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
+ .addReg(TargetReg, RegState::Kill)
+ .addMBB(&MBB);
+ ++NumInstsInserted;
+ (void)CheckI;
+ LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
+ } else {
+ // Otherwise compute the address into a register first.
+ unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ auto AddrI =
+ BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addMBB(&MBB)
+ .addReg(/*Segment*/ 0);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
+ auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
+ .addReg(TargetReg, RegState::Kill)
+ .addReg(AddrReg, RegState::Kill);
+ ++NumInstsInserted;
+ (void)CheckI;
+ LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
+ }
+
+ // Now cmov over the predicate if the comparison wasn't equal.
+ int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
+ auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+ unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ auto CMovI =
+ BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
+ .addReg(PS->InitialReg)
+ .addReg(PS->PoisonReg);
+ CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
+ ++NumInstsInserted;
+ LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
+ CMovs.push_back(&*CMovI);
+
+ // And put the new value into the available values for SSA form of our
+ // predicate state.
+ PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
+ }
+
+ // Return all the newly inserted cmov instructions of the predicate state.
+ return CMovs;
+}
+
/// Returns true if the instruction has no behavior (specified or otherwise)
/// that is based on the value of any of its register operands
///
@@ -1498,13 +1770,6 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
// pass specifically so that we have the complete set of instructions for
// which we will do post-load hardening and can defer it in certain
// circumstances.
- //
- // FIXME: This could probably be made even more effective by doing it
- // across the entire function. Rather than just walking the flat list
- // backwards here, we could walk the function in PO and each block bottom
- // up, allowing us to in some cases sink hardening across block blocks. As
- // long as the in-block predicate state is used at the eventual hardening
- // site, this remains safe.
for (MachineInstr &MI : MBB) {
if (HardenLoads) {
// We cannot both require hardening the def of a load and its address.
@@ -1586,8 +1851,8 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
}
// Otherwise we have a call. We need to handle transferring the predicate
- // state into a call and recovering it after the call returns unless this
- // is a tail call.
+ // state into a call and recovering it after the call returns (unless this
+ // is a tail call).
assert(MI.isCall() && "Should only reach here for calls!");
tracePredStateThroughCall(MI);
}
@@ -2109,21 +2374,10 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
DebugLoc Loc = MI.getDebugLoc();
auto InsertPt = MI.getIterator();
- if (FenceCallAndRet) {
- // Simply forcibly block speculation of loads out of the function by using
- // an LFENCE. This is potentially a heavy-weight mitigation strategy, but
- // should be secure, is simple from an ABI perspective, and the cost can be
- // minimized through inlining.
- //
- // FIXME: We should investigate ways to establish a strong data-dependency
- // on the return. However, poisoning the stack pointer is unlikely to work
- // because the return is *predicted* rather than relying on the load of the
- // return address to actually resolve.
- BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE));
- ++NumInstsInserted;
- ++NumLFENCEsInserted;
+ if (FenceCallAndRet)
+ // No need to fence here as we'll fence at the return site itself. That
+ // handles more cases than we can handle here.
return;
- }
// Take our predicate state, shift it to the high 17 bits (so that we keep
// pointers canonical) and merge it into RSP. This will allow the caller to
@@ -2141,31 +2395,168 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
///
/// For tail calls, this is all we need to do.
///
-/// For calls where we might return to control flow, we further need to extract
-/// the predicate state built up within that function from the high bits of the
-/// stack pointer, and make that the newly available predicate state.
+/// For calls where we might return and resume the control flow, we need to
+/// extract the predicate state from the high bits of the stack pointer after
+/// control returns from the called function.
+///
+/// We also need to verify that we intended to return to this location in the
+/// code. An attacker might arrange for the processor to mispredict the return
+/// to this valid but incorrect return address in the program rather than the
+/// correct one. See the paper on this attack, called "ret2spec" by the
+/// researchers, here:
+/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
+///
+/// The way we verify that we returned to the correct location is by preserving
+/// the expected return address across the call. One technique involves taking
+/// advantage of the red-zone to load the return address from `8(%rsp)` where it
+/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
+/// directly save the address into a register that will be preserved across the
+/// call. We compare this intended return address against the address
+/// immediately following the call (the observed return address). If these
+/// mismatch, we have detected misspeculation and can poison our predicate
+/// state.
void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
auto InsertPt = MI.getIterator();
DebugLoc Loc = MI.getDebugLoc();
+ if (FenceCallAndRet) {
+ if (MI.isReturn())
+ // Tail call, we don't return to this function.
+ // FIXME: We should also handle noreturn calls.
+ return;
+
+ // We don't need to fence before the call because the function should fence
+ // in its entry. However, we do need to fence after the call returns.
+ // Fencing before the return doesn't correctly handle cases where the return
+ // itself is mispredicted.
+ BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
+ ++NumInstsInserted;
+ ++NumLFENCEsInserted;
+ return;
+ }
+
// First, we transfer the predicate state into the called function by merging
// it into the stack pointer. This will kill the current def of the state.
unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
// If this call is also a return, it is a tail call and we don't need anything
- // else to handle it so just continue.
- // FIXME: We should also handle noreturn calls.
- if (MI.isReturn())
+ // else to handle it so just return. Also, if there are no further
+ // instructions and no successors, this call does not return so we can also
+ // bail.
+ if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
return;
- // We need to step past the call and recover the predicate state from SP after
- // the return, and make this new state available.
+ // Create a symbol to track the return address and attach it to the call
+ // machine instruction. We will lower extra symbols attached to call
+ // instructions as label immediately following the call.
+ MCSymbol *RetSymbol =
+ MF.getContext().createTempSymbol("slh_ret_addr",
+ /*AlwaysAddSuffix*/ true);
+ MI.setPostInstrSymbol(MF, RetSymbol);
+
+ const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
+ unsigned ExpectedRetAddrReg = 0;
+
+ // If we have no red zones or if the function returns twice (possibly without
+ // using the `ret` instruction) like setjmp, we need to save the expected
+ // return address prior to the call.
+ if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone) ||
+ MF.exposesReturnsTwice()) {
+ // If we don't have red zones, we need to compute the expected return
+ // address prior to the call and store it in a register that lives across
+ // the call.
+ //
+ // In some ways, this is doubly satisfying as a mitigation because it will
+ // also successfully detect stack smashing bugs in some cases (typically,
+ // when a callee-saved register is used and the callee doesn't push it onto
+ // the stack). But that isn't our primary goal, so we only use it as
+ // a fallback.
+ //
+ // FIXME: It isn't clear that this is reliable in the face of
+ // rematerialization in the register allocator. We somehow need to force
+ // that to not occur for this particular instruction, and instead to spill
+ // or otherwise preserve the value computed *prior* to the call.
+ //
+ // FIXME: It is even less clear why MachineCSE can't just fold this when we
+ // end up having to use identical instructions both before and after the
+ // call to feed the comparison.
+ ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
+ .addSym(RetSymbol);
+ } else {
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addSym(RetSymbol)
+ .addReg(/*Segment*/ 0);
+ }
+ }
+
+ // Step past the call to handle when it returns.
++InsertPt;
+
+ // If we didn't pre-compute the expected return address into a register, then
+ // red zones are enabled and the return address is still available on the
+ // stack immediately after the call. As the very first instruction, we load it
+ // into a register.
+ if (!ExpectedRetAddrReg) {
+ ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
+ .addReg(/*Base*/ X86::RSP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
+ // the return address is 8-bytes past it.
+ .addReg(/*Segment*/ 0);
+ }
+
+ // Now we extract the callee's predicate state from the stack pointer.
unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
- PS->SSA.AddAvailableValue(&MBB, NewStateReg);
+
+ // Test the expected return address against our actual address. If we can
+ // form this basic block's address as an immediate, this is easy. Otherwise
+ // we compute it.
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // FIXME: Could we fold this with the load? It would require careful EFLAGS
+ // management.
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
+ .addReg(ExpectedRetAddrReg, RegState::Kill)
+ .addSym(RetSymbol);
+ } else {
+ unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addSym(RetSymbol)
+ .addReg(/*Segment*/ 0);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
+ .addReg(ExpectedRetAddrReg, RegState::Kill)
+ .addReg(ActualRetAddrReg, RegState::Kill);
+ }
+
+ // Now conditionally update the predicate state we just extracted if we ended
+ // up at a different return address than expected.
+ int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
+ auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+
+ unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
+ .addReg(NewStateReg, RegState::Kill)
+ .addReg(PS->PoisonReg);
+ CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
+ ++NumInstsInserted;
+ LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
+
+ PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
}
/// An attacker may speculatively store over a value that is then speculatively
@@ -2237,9 +2628,9 @@ void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
++NumCallsOrJumpsHardened;
}
-INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
"X86 speculative load hardener", false, false)
-INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
+INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
"X86 speculative load hardener", false, false)
FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() {
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 7e84323dda4c..0c9ce8802e1b 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -77,6 +77,8 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
if (isTargetELF()) {
switch (TM.getCodeModel()) {
// 64-bit small code model is simple: All rip-relative.
+ case CodeModel::Tiny:
+ llvm_unreachable("Tiny codesize model not supported on X86");
case CodeModel::Small:
case CodeModel::Kernel:
return X86II::MO_NO_FLAG;
@@ -139,8 +141,11 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
if (TM.shouldAssumeDSOLocal(M, GV))
return classifyLocalReference(GV);
- if (isTargetCOFF())
- return X86II::MO_DLLIMPORT;
+ if (isTargetCOFF()) {
+ if (GV->hasDLLImportStorageClass())
+ return X86II::MO_DLLIMPORT;
+ return X86II::MO_COFFSTUB;
+ }
if (is64Bit()) {
// ELF supports a large, truly PIC code model with non-PC relative GOT
@@ -220,14 +225,22 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUName.empty())
CPUName = "generic";
- // Make sure 64-bit features are available in 64-bit mode. (But make sure
- // SSE2 can be turned off explicitly.)
std::string FullFS = FS;
if (In64BitMode) {
+ // SSE2 should default to enabled in 64-bit mode, but can be turned off
+ // explicitly.
if (!FullFS.empty())
- FullFS = "+64bit,+sse2," + FullFS;
+ FullFS = "+sse2," + FullFS;
else
- FullFS = "+64bit,+sse2";
+ FullFS = "+sse2";
+
+ // If no CPU was specified, enable 64bit feature to satisy later check.
+ if (CPUName == "generic") {
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
+ }
}
// LAHF/SAHF are always supported in non-64-bit mode.
@@ -262,8 +275,9 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel << ", 64bit "
<< HasX86_64 << "\n");
- assert((!In64BitMode || HasX86_64) &&
- "64-bit code requested on a subtarget that doesn't support it!");
+ if (In64BitMode && !HasX86_64)
+ report_fatal_error("64-bit code requested on a subtarget that doesn't "
+ "support it!");
// Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 85e8256a6e94..b1103f823e7f 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -52,21 +52,15 @@ enum Style {
class X86Subtarget final : public X86GenSubtargetInfo {
public:
+ // NOTE: Do not add anything new to this list. Coarse, CPU name based flags
+ // are not a good idea. We should be migrating away from these.
enum X86ProcFamilyEnum {
Others,
IntelAtom,
IntelSLM,
IntelGLM,
IntelGLP,
- IntelTRM,
- IntelHaswell,
- IntelBroadwell,
- IntelSkylake,
- IntelKNL,
- IntelSKX,
- IntelCannonlake,
- IntelIcelakeClient,
- IntelIcelakeServer,
+ IntelTRM
};
protected:
@@ -229,6 +223,9 @@ protected:
// PMULUDQ.
bool IsPMULLDSlow = false;
+ /// True if the PMADDWD instruction is slow compared to PMULLD.
+ bool IsPMADDWDSlow = false;
+
/// True if unaligned memory accesses of 16-bytes are slow.
bool IsUAMem16Slow = false;
@@ -385,9 +382,23 @@ protected:
/// Processor supports PCONFIG instruction
bool HasPCONFIG = false;
+ /// Processor has a single uop BEXTR implementation.
+ bool HasFastBEXTR = false;
+
+ /// Try harder to combine to horizontal vector ops if they are fast.
+ bool HasFastHorizontalOps = false;
+
/// Use a retpoline thunk rather than indirect calls to block speculative
/// execution.
- bool UseRetpoline = false;
+ bool UseRetpolineIndirectCalls = false;
+
+ /// Use a retpoline thunk or remove any indirect branch to block speculative
+ /// execution.
+ bool UseRetpolineIndirectBranches = false;
+
+ /// Deprecated flag, query `UseRetpolineIndirectCalls` and
+ /// `UseRetpolineIndirectBranches` instead.
+ bool DeprecatedUseRetpoline = false;
/// When using a retpoline thunk, call an externally provided thunk rather
/// than emitting one inside the compiler.
@@ -408,6 +419,9 @@ protected:
/// Indicates target prefers 256 bit instructions.
bool Prefer256Bit = false;
+ /// Threeway branch is profitable in this subtarget.
+ bool ThreewayBranchProfitable = false;
+
/// What processor and OS we're targeting.
Triple TargetTriple;
@@ -534,7 +548,9 @@ public:
bool hasX87() const { return HasX87; }
bool hasNOPL() const { return HasNOPL; }
- bool hasCMov() const { return HasCMov; }
+ // SSE codegen depends on cmovs, and all SSE1+ processors support them.
+ // All 64-bit processors support cmov.
+ bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
@@ -599,6 +615,7 @@ public:
bool hasPTWRITE() const { return HasPTWRITE; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isPMULLDSlow() const { return IsPMULLDSlow; }
+ bool isPMADDWDSlow() const { return IsPMADDWDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
int getGatherOverhead() const { return GatherOverhead; }
@@ -619,6 +636,8 @@ public:
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+ bool hasFastBEXTR() const { return HasFastBEXTR; }
+ bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
bool hasMacroFusion() const { return HasMacroFusion; }
bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
@@ -648,8 +667,12 @@ public:
bool hasWAITPKG() const { return HasWAITPKG; }
bool hasPCONFIG() const { return HasPCONFIG; }
bool hasSGX() const { return HasSGX; }
+ bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
bool hasINVPCID() const { return HasINVPCID; }
- bool useRetpoline() const { return UseRetpoline; }
+ bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
+ bool useRetpolineIndirectBranches() const {
+ return UseRetpolineIndirectBranches;
+ }
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
@@ -804,7 +827,9 @@ public:
/// If we are using retpolines, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
- bool enableIndirectBrExpand() const override { return useRetpoline(); }
+ bool enableIndirectBrExpand() const override {
+ return useRetpolineIndirectBranches();
+ }
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 374bf3daaf9b..afcb49dc2263 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -54,23 +54,10 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
-static cl::opt<bool> EnableSpeculativeLoadHardening(
- "x86-speculative-load-hardening",
- cl::desc("Enable speculative load hardening"), cl::init(false), cl::Hidden);
-
-namespace llvm {
-
-void initializeWinEHStatePassPass(PassRegistry &);
-void initializeFixupLEAPassPass(PassRegistry &);
-void initializeShadowCallStackPass(PassRegistry &);
-void initializeX86CallFrameOptimizationPass(PassRegistry &);
-void initializeX86CmovConverterPassPass(PassRegistry &);
-void initializeX86ExecutionDomainFixPass(PassRegistry &);
-void initializeX86DomainReassignmentPass(PassRegistry &);
-void initializeX86AvoidSFBPassPass(PassRegistry &);
-void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
-
-} // end namespace llvm
+static cl::opt<bool> EnableCondBrFoldingPass("x86-condbr-folding",
+ cl::desc("Enable the conditional branch "
+ "folding pass"),
+ cl::init(false), cl::Hidden);
extern "C" void LLVMInitializeX86Target() {
// Register the target.
@@ -89,7 +76,9 @@ extern "C" void LLVMInitializeX86Target() {
initializeX86ExecutionDomainFixPass(PR);
initializeX86DomainReassignmentPass(PR);
initializeX86AvoidSFBPassPass(PR);
+ initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
+ initializeX86CondBrFoldingPassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -201,10 +190,13 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
- bool JIT, bool Is64Bit) {
- if (CM)
+static CodeModel::Model getEffectiveX86CodeModel(Optional<CodeModel::Model> CM,
+ bool JIT, bool Is64Bit) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
return *CM;
+ }
if (JIT)
return Is64Bit ? CodeModel::Large : CodeModel::Small;
return CodeModel::Small;
@@ -221,7 +213,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(
T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, JIT, RM),
- getEffectiveCodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL),
+ getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64),
+ OL),
TLOF(createTLOF(getTargetTriple())) {
// Windows stack unwinder gets confused when execution flow "falls through"
// after a call to 'noreturn' function.
@@ -292,13 +285,14 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
}
}
- // Extract required-vector-width attribute.
+ // Extract min-legal-vector-width attribute.
unsigned RequiredVectorWidth = UINT32_MAX;
- if (F.hasFnAttribute("required-vector-width")) {
- StringRef Val = F.getFnAttribute("required-vector-width").getValueAsString();
+ if (F.hasFnAttribute("min-legal-vector-width")) {
+ StringRef Val =
+ F.getFnAttribute("min-legal-vector-width").getValueAsString();
unsigned Width;
if (!Val.getAsInteger(0, Width)) {
- Key += ",required-vector-width=";
+ Key += ",min-legal-vector-width=";
Key += Val;
RequiredVectorWidth = Width;
}
@@ -449,6 +443,8 @@ bool X86PassConfig::addGlobalInstructionSelect() {
}
bool X86PassConfig::addILPOpts() {
+ if (EnableCondBrFoldingPass)
+ addPass(createX86CondBrFolding());
addPass(&EarlyIfConverterID);
if (EnableMachineCombinerPass)
addPass(&MachineCombinerID);
@@ -473,9 +469,7 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86AvoidStoreForwardingBlocks());
}
- if (EnableSpeculativeLoadHardening)
- addPass(createX86SpeculativeLoadHardeningPass());
-
+ addPass(createX86SpeculativeLoadHardeningPass());
addPass(createX86FlagsCopyLoweringPass());
addPass(createX86WinAllocaExpander());
}
@@ -508,6 +502,8 @@ void X86PassConfig::addPreEmitPass() {
addPass(createX86FixupLEAs());
addPass(createX86EvexToVexInsts());
}
+ addPass(createX86DiscriminateMemOpsPass());
+ addPass(createX86InsertPrefetchPass());
}
void X86PassConfig::addPreEmitPass2() {
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
index 5b21cd82b5b1..f5b45da0c3dc 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -53,10 +53,6 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
-
- bool isMachineVerifierClean() const override {
- return false;
- }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 865462622627..36929a4f5439 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -290,11 +290,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.
{ ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb.
-
- { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
- { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence
- { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
- { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -308,11 +303,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v2i64, 1 },
{ ISD::SRA, MVT::v4i64, 1 },
{ ISD::SRA, MVT::v8i64, 1 },
-
- { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
- { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
- { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
- { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -328,15 +318,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
-
- { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
- { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence
- { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
- { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence
- { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
- { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence
- { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
- { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -354,7 +335,81 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split.
{ ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split.
{ ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split.
+ };
+
+ // XOP has faster vXi8 shifts.
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasSSE2() && !ST->hasXOP()) {
+ if (const auto *Entry =
+ CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX512BWConstCostTable[] = {
+ { ISD::SDIV, MVT::v64i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::SREM, MVT::v64i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v64i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::UREM, MVT::v64i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
+ { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
+ { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasBWI()) {
+ if (const auto *Entry =
+ CostTableLookup(AVX512BWConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX512ConstCostTable[] = {
+ { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
+ { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
+ { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
+ { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasAVX512()) {
+ if (const auto *Entry =
+ CostTableLookup(AVX512ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+ static const CostTblEntry AVX2ConstCostTable[] = {
+ { ISD::SDIV, MVT::v32i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::SREM, MVT::v32i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v32i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::UREM, MVT::v32i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
+ { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
+ { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence
+ { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
+ { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence
+ { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
+ { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasAVX2()) {
+ if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry SSE2ConstCostTable[] = {
+ { ISD::SDIV, MVT::v32i8, 28+2 }, // 4*ext+4*pmulhw sequence + split.
+ { ISD::SREM, MVT::v32i8, 32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split.
+ { ISD::SDIV, MVT::v16i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::SREM, MVT::v16i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v32i8, 28+2 }, // 4*ext+4*pmulhw sequence + split.
+ { ISD::UREM, MVT::v32i8, 32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split.
+ { ISD::UDIV, MVT::v16i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::UREM, MVT::v16i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
{ ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split.
{ ISD::SREM, MVT::v16i16, 16+2 }, // 2*pmulhw+mul+sub sequence + split.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
@@ -373,7 +428,8 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::UREM, MVT::v4i32, 20 }, // pmuludq+mul+sub sequence
};
- if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
ST->hasSSE2()) {
// pmuldq sequence.
if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
@@ -385,12 +441,8 @@ int X86TTIImpl::getArithmeticInstrCost(
if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41())
return LT.first * 20;
- // XOP has faster vXi8 shifts.
- if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) ||
- !ST->hasXOP())
- if (const auto *Entry =
- CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
- return LT.first * Entry->Cost;
+ if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
static const CostTblEntry AVX2UniformCostTable[] = {
@@ -560,9 +612,18 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for XOP lowering tricks.
- if (ST->hasXOP())
- if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
+ if (ST->hasXOP()) {
+ // If the right shift is constant then we'll fold the negation so
+ // it's as cheap as a left shift.
+ int ShiftISD = ISD;
+ if ((ShiftISD == ISD::SRL || ShiftISD == ISD::SRA) &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+ ShiftISD = ISD::SHL;
+ if (const auto *Entry =
+ CostTableLookup(XOPShiftCostTable, ShiftISD, LT.second))
return LT.first * Entry->Cost;
+ }
static const CostTblEntry SSE2UniformShiftCostTable[] = {
// Uniform splats are cheaper for the following instructions.
@@ -771,6 +832,12 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
{ ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/
{ ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/
+
+ { ISD::FADD, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/
+ { ISD::FADD, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/
+
+ { ISD::FSUB, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/
+ { ISD::FSUB, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/
};
if (ST->hasSSE2())
@@ -780,6 +847,20 @@ int X86TTIImpl::getArithmeticInstrCost(
static const CostTblEntry SSE1CostTable[] = {
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
+
+ { ISD::FADD, MVT::f32, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::FADD, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/
+
+ { ISD::FSUB, MVT::f32, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::FSUB, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/
+
+ { ISD::ADD, MVT::i8, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::ADD, MVT::i16, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::ADD, MVT::i32, 1 }, // Pentium III from http://www.agner.org/
+
+ { ISD::SUB, MVT::i8, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::SUB, MVT::i16, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::SUB, MVT::i32, 1 }, // Pentium III from http://www.agner.org/
};
if (ST->hasSSE1())
@@ -810,12 +891,30 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ // Treat Transpose as 2-op shuffles - there's no difference in lowering.
+ if (Kind == TTI::SK_Transpose)
+ Kind = TTI::SK_PermuteTwoSrc;
+
// For Broadcasts we are splatting the first element from the first input
// register, so only need to reference that input and all the output
// registers are the same.
if (Kind == TTI::SK_Broadcast)
LT.first = 1;
+ // Subvector extractions are free if they start at the beginning of a
+ // vector and cheap if the subvectors are aligned.
+ if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) {
+ int NumElts = LT.second.getVectorNumElements();
+ if ((Index % NumElts) == 0)
+ return 0;
+ std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
+ if (SubLT.second.isVector()) {
+ int NumSubElts = SubLT.second.getVectorNumElements();
+ if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
+ return SubLT.first;
+ }
+ }
+
// We are going to permute multiple sources and the result will be in multiple
// destinations. Providing an accurate cost only for splits where the element
// type remains the same.
@@ -853,15 +952,15 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
- { TTI::SK_Reverse, MVT::v32i8, 1 }, // vpermb
+ {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
+ {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
- { TTI::SK_PermuteSingleSrc, MVT::v64i8, 1 }, // vpermb
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 1 }, // vpermb
+ {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
- { TTI::SK_PermuteTwoSrc, MVT::v64i8, 1 }, // vpermt2b
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 1 }, // vpermt2b
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 } // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1} // vpermt2b
};
if (ST->hasVBMI())
@@ -870,25 +969,25 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v32i16, 1 }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v64i8, 1 }, // vpbroadcastb
+ {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb
- { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v64i8, 2 }, // pshufb + vshufi64x2
+ {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw
+ {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw
+ {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2
- { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v64i8, 8 }, // extend to v32i16
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 3 }, // vpermw + zext/trunc
+ {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3}, // vpermw + zext/trunc
- { TTI::SK_PermuteTwoSrc, MVT::v32i16, 1 }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, 1 }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 3 }, // zext + vpermt2w + trunc
- { TTI::SK_PermuteTwoSrc, MVT::v64i8, 19 }, // 6 * v32i8 + 1
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 } // zext + vpermt2w + trunc
+ {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3}, // zext + vpermt2w + trunc
+ {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3} // zext + vpermt2w + trunc
};
if (ST->hasBWI())
@@ -897,42 +996,42 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry AVX512ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v8f64, 1 }, // vbroadcastpd
- { TTI::SK_Broadcast, MVT::v16f32, 1 }, // vbroadcastps
- { TTI::SK_Broadcast, MVT::v8i64, 1 }, // vpbroadcastq
- { TTI::SK_Broadcast, MVT::v16i32, 1 }, // vpbroadcastd
-
- { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
-
- { TTI::SK_PermuteSingleSrc, MVT::v8f64, 1 }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v8i64, 1 }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
-
- { TTI::SK_PermuteTwoSrc, MVT::v8f64, 1 }, // vpermt2pd
- { TTI::SK_PermuteTwoSrc, MVT::v16f32, 1 }, // vpermt2ps
- { TTI::SK_PermuteTwoSrc, MVT::v8i64, 1 }, // vpermt2q
- { TTI::SK_PermuteTwoSrc, MVT::v16i32, 1 }, // vpermt2d
- { TTI::SK_PermuteTwoSrc, MVT::v4f64, 1 }, // vpermt2pd
- { TTI::SK_PermuteTwoSrc, MVT::v8f32, 1 }, // vpermt2ps
- { TTI::SK_PermuteTwoSrc, MVT::v4i64, 1 }, // vpermt2q
- { TTI::SK_PermuteTwoSrc, MVT::v8i32, 1 }, // vpermt2d
- { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // vpermt2pd
- { TTI::SK_PermuteTwoSrc, MVT::v4f32, 1 }, // vpermt2ps
- { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // vpermt2q
- { TTI::SK_PermuteTwoSrc, MVT::v4i32, 1 } // vpermt2d
+ {TTI::SK_Broadcast, MVT::v8f64, 1}, // vbroadcastpd
+ {TTI::SK_Broadcast, MVT::v16f32, 1}, // vbroadcastps
+ {TTI::SK_Broadcast, MVT::v8i64, 1}, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd
+
+ {TTI::SK_Reverse, MVT::v8f64, 1}, // vpermpd
+ {TTI::SK_Reverse, MVT::v16f32, 1}, // vpermps
+ {TTI::SK_Reverse, MVT::v8i64, 1}, // vpermq
+ {TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd
+
+ {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v16f32, 1}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v4f32, 1}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v8i64, 1}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v16i32, 1}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
+
+ {TTI::SK_PermuteTwoSrc, MVT::v8f64, 1}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v16f32, 1}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v8i64, 1}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v16i32, 1}, // vpermt2d
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, 1}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, 1}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, 1}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, 1}, // vpermt2d
+ {TTI::SK_PermuteTwoSrc, MVT::v2f64, 1}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v4f32, 1}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v2i64, 1}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1} // vpermt2d
};
if (ST->hasAVX512())
@@ -940,40 +1039,40 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f64, 1 }, // vbroadcastpd
- { TTI::SK_Broadcast, MVT::v8f32, 1 }, // vbroadcastps
- { TTI::SK_Broadcast, MVT::v4i64, 1 }, // vpbroadcastq
- { TTI::SK_Broadcast, MVT::v8i32, 1 }, // vpbroadcastd
- { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32i8, 1 }, // vpbroadcastb
-
- { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
-
- { TTI::SK_Select, MVT::v16i16, 1 }, // vpblendvb
- { TTI::SK_Select, MVT::v32i8, 1 }, // vpblendvb
-
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2*vpshufb
+ {TTI::SK_Broadcast, MVT::v4f64, 1}, // vbroadcastpd
+ {TTI::SK_Broadcast, MVT::v8f32, 1}, // vbroadcastps
+ {TTI::SK_Broadcast, MVT::v4i64, 1}, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v8i32, 1}, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32i8, 1}, // vpbroadcastb
+
+ {TTI::SK_Reverse, MVT::v4f64, 1}, // vpermpd
+ {TTI::SK_Reverse, MVT::v8f32, 1}, // vpermps
+ {TTI::SK_Reverse, MVT::v4i64, 1}, // vpermq
+ {TTI::SK_Reverse, MVT::v8i32, 1}, // vpermd
+ {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
+ {TTI::SK_Reverse, MVT::v32i8, 2}, // vperm2i128 + pshufb
+
+ {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
+ {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb
+
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb
// + vpblendvb
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vperm2i128 + 2*vpshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vperm2i128 + 2*vpshufb
// + vpblendvb
- { TTI::SK_PermuteTwoSrc, MVT::v4f64, 3 }, // 2*vpermpd + vblendpd
- { TTI::SK_PermuteTwoSrc, MVT::v8f32, 3 }, // 2*vpermps + vblendps
- { TTI::SK_PermuteTwoSrc, MVT::v4i64, 3 }, // 2*vpermq + vpblendd
- { TTI::SK_PermuteTwoSrc, MVT::v8i32, 3 }, // 2*vpermd + vpblendd
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, 7 }, // 2*vperm2i128 + 4*vpshufb
- // + vpblendvb
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 7 }, // 2*vperm2i128 + 4*vpshufb
- // + vpblendvb
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vpermpd + vblendpd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3}, // 2*vpermps + vblendps
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vpermq + vpblendd
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, // 2*vpermd + vpblendd
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
+ // + vpblendvb
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, // 2*vperm2i128 + 4*vpshufb
+ // + vpblendvb
};
if (ST->hasAVX2())
@@ -981,21 +1080,21 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry XOPShuffleTbl[] = {
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vpermil2pd
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, 2 }, // vperm2f128 + vpermil2ps
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vpermil2pd
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, 2 }, // vperm2f128 + vpermil2ps
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vextractf128 + 2*vpperm
- // + vinsertf128
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vextractf128 + 2*vpperm
- // + vinsertf128
-
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, 9 }, // 2*vextractf128 + 6*vpperm
- // + vinsertf128
- { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpperm
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 9 }, // 2*vextractf128 + 6*vpperm
- // + vinsertf128
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 }, // vpperm
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vpermil2pd
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2}, // vperm2f128 + vpermil2ps
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vpermil2pd
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2}, // vperm2f128 + vpermil2ps
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*vpperm
+ // + vinsertf128
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vextractf128 + 2*vpperm
+ // + vinsertf128
+
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpperm
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9}, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}, // vpperm
};
if (ST->hasXOP())
@@ -1003,46 +1102,46 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry AVX1ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Broadcast, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Broadcast, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd + vinsertf128
- { TTI::SK_Broadcast, MVT::v32i8, 2 }, // vpshufb + vinsertf128
-
- { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
- { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
-
- { TTI::SK_Select, MVT::v4i64, 1 }, // vblendpd
- { TTI::SK_Select, MVT::v4f64, 1 }, // vblendpd
- { TTI::SK_Select, MVT::v8i32, 1 }, // vblendps
- { TTI::SK_Select, MVT::v8f32, 1 }, // vblendps
- { TTI::SK_Select, MVT::v16i16, 3 }, // vpand + vpandn + vpor
- { TTI::SK_Select, MVT::v32i8, 3 }, // vpand + vpandn + vpor
-
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vshufpd
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vshufpd
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8 }, // vextractf128 + 4*pshufb
+ {TTI::SK_Broadcast, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8f32, 2}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8i32, 2}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v32i8, 2}, // vpshufb + vinsertf128
+
+ {TTI::SK_Reverse, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8f32, 2}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8i32, 2}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ {TTI::SK_Reverse, MVT::v32i8, 4}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ {TTI::SK_Select, MVT::v4i64, 1}, // vblendpd
+ {TTI::SK_Select, MVT::v4f64, 1}, // vblendpd
+ {TTI::SK_Select, MVT::v8i32, 1}, // vblendps
+ {TTI::SK_Select, MVT::v8f32, 1}, // vblendps
+ {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
+ {TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor
+
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vshufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb
// + 2*por + vinsertf128
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 8 }, // vextractf128 + 4*pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, // vextractf128 + 4*pshufb
// + 2*por + vinsertf128
- { TTI::SK_PermuteTwoSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd
- { TTI::SK_PermuteTwoSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd
- { TTI::SK_PermuteTwoSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps
- { TTI::SK_PermuteTwoSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, 15 }, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 15 }, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vperm2f128 + vshufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vperm2f128 + vshufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
};
if (ST->hasAVX())
@@ -1050,12 +1149,12 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry SSE41ShuffleTbl[] = {
- { TTI::SK_Select, MVT::v2i64, 1 }, // pblendw
- { TTI::SK_Select, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Select, MVT::v4i32, 1 }, // pblendw
- { TTI::SK_Select, MVT::v4f32, 1 }, // blendps
- { TTI::SK_Select, MVT::v8i16, 1 }, // pblendw
- { TTI::SK_Select, MVT::v16i8, 1 } // pblendvb
+ {TTI::SK_Select, MVT::v2i64, 1}, // pblendw
+ {TTI::SK_Select, MVT::v2f64, 1}, // movsd
+ {TTI::SK_Select, MVT::v4i32, 1}, // pblendw
+ {TTI::SK_Select, MVT::v4f32, 1}, // blendps
+ {TTI::SK_Select, MVT::v8i16, 1}, // pblendw
+ {TTI::SK_Select, MVT::v16i8, 1} // pblendvb
};
if (ST->hasSSE41())
@@ -1063,20 +1162,20 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry SSSE3ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_Broadcast, MVT::v16i8, 1 }, // pshufb
+ {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
- { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+ {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
- { TTI::SK_Select, MVT::v8i16, 3 }, // 2*pshufb + por
- { TTI::SK_Select, MVT::v16i8, 3 }, // 2*pshufb + por
+ {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
+ {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
- { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
- { TTI::SK_PermuteTwoSrc, MVT::v8i16, 3 }, // 2*pshufb + por
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 }, // 2*pshufb + por
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
};
if (ST->hasSSSE3())
@@ -1084,29 +1183,29 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry SSE2ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd
- { TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd
-
- { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
- { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
-
- { TTI::SK_Select, MVT::v2i64, 1 }, // movsd
- { TTI::SK_Select, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Select, MVT::v4i32, 2 }, // 2*shufps
- { TTI::SK_Select, MVT::v8i16, 3 }, // pand + pandn + por
- { TTI::SK_Select, MVT::v16i8, 3 }, // pand + pandn + por
-
- { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_PermuteSingleSrc, MVT::v8i16, 5 }, // 2*pshuflw + 2*pshufhw
+ {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd
+ {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
+ {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
+ {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
+
+ {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
+ {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
+ {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
+ {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
+
+ {TTI::SK_Select, MVT::v2i64, 1}, // movsd
+ {TTI::SK_Select, MVT::v2f64, 1}, // movsd
+ {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
+ {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
+ {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
+
+ {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
// + pshufd/unpck
{ TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + 2*packus
@@ -1145,6 +1244,27 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// FIXME: Need a better design of the cost table to handle non-simple types of
// potential massive combinations (elem_num x src_type x dst_type).
+ static const TypeConversionCostTblEntry AVX512BWConversionTbl[] {
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
+
+ // Mask sign extend has an instruction.
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
+
+ // Mask zero extend is a load + broadcast.
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
+ };
+
static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
@@ -1208,8 +1328,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
@@ -1231,12 +1349,16 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
+
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 },
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 },
@@ -1328,13 +1450,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 6 },
// The generic code to compute the scalar overhead is currently broken.
// Workaround this limitation by estimating the scalarization overhead
// here. We have roughly 10 instructions per scalar element.
// Multiply that by the vector width.
// FIXME: remove that when PR19268 is fixed.
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 10 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 20 },
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
@@ -1387,6 +1509,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
};
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -1408,11 +1531,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
+
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
@@ -1465,43 +1590,51 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src);
- if (ST->hasDQI())
- if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
- return Entry->Cost;
+ MVT SimpleSrcTy = SrcTy.getSimpleVT();
+ MVT SimpleDstTy = DstTy.getSimpleVT();
- if (ST->hasAVX512())
- if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
- return Entry->Cost;
+ // Make sure that neither type is going to be split before using the
+ // AVX512 tables. This handles -mprefer-vector-width=256
+ // with -min-legal-vector-width<=256
+ if (TLI->getTypeAction(SimpleSrcTy) != TargetLowering::TypeSplitVector &&
+ TLI->getTypeAction(SimpleDstTy) != TargetLowering::TypeSplitVector) {
+ if (ST->hasBWI())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512BWConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+
+ if (ST->hasDQI())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+
+ if (ST->hasAVX512())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+ }
if (ST->hasAVX2()) {
if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
+ SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
if (ST->hasAVX()) {
if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
+ SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
if (ST->hasSSE41()) {
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
+ SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
if (ST->hasSSE2()) {
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
+ SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
@@ -1629,6 +1762,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v16i32, 14 },
{ ISD::CTTZ, MVT::v32i16, 12 },
{ ISD::CTTZ, MVT::v64i8, 9 },
+ { ISD::SADDSAT, MVT::v32i16, 1 },
+ { ISD::SADDSAT, MVT::v64i8, 1 },
+ { ISD::SSUBSAT, MVT::v32i16, 1 },
+ { ISD::SSUBSAT, MVT::v64i8, 1 },
+ { ISD::UADDSAT, MVT::v32i16, 1 },
+ { ISD::UADDSAT, MVT::v64i8, 1 },
+ { ISD::USUBSAT, MVT::v32i16, 1 },
+ { ISD::USUBSAT, MVT::v64i8, 1 },
};
static const CostTblEntry AVX512CostTbl[] = {
{ ISD::BITREVERSE, MVT::v8i64, 36 },
@@ -1639,6 +1780,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTPOP, MVT::v16i32, 24 },
{ ISD::CTTZ, MVT::v8i64, 20 },
{ ISD::CTTZ, MVT::v16i32, 28 },
+ { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
+ { ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
+ { ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
+ { ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq
};
static const CostTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, 4 },
@@ -1674,6 +1819,15 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v8i32, 14 },
{ ISD::CTTZ, MVT::v16i16, 12 },
{ ISD::CTTZ, MVT::v32i8, 9 },
+ { ISD::SADDSAT, MVT::v16i16, 1 },
+ { ISD::SADDSAT, MVT::v32i8, 1 },
+ { ISD::SSUBSAT, MVT::v16i16, 1 },
+ { ISD::SSUBSAT, MVT::v32i8, 1 },
+ { ISD::UADDSAT, MVT::v16i16, 1 },
+ { ISD::UADDSAT, MVT::v32i8, 1 },
+ { ISD::USUBSAT, MVT::v16i16, 1 },
+ { ISD::USUBSAT, MVT::v32i8, 1 },
+ { ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
{ ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
@@ -1701,6 +1855,15 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v8i32, 30 }, // 2 x 128-bit Op + extract/insert
{ ISD::CTTZ, MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert
{ ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
{ ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
@@ -1721,6 +1884,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
};
static const CostTblEntry SSE42CostTbl[] = {
+ { ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd
{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};
@@ -1765,6 +1929,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v4i32, 18 },
{ ISD::CTTZ, MVT::v8i16, 16 },
{ ISD::CTTZ, MVT::v16i8, 13 },
+ { ISD::SADDSAT, MVT::v8i16, 1 },
+ { ISD::SADDSAT, MVT::v16i8, 1 },
+ { ISD::SSUBSAT, MVT::v8i16, 1 },
+ { ISD::SSUBSAT, MVT::v16i8, 1 },
+ { ISD::UADDSAT, MVT::v8i16, 1 },
+ { ISD::UADDSAT, MVT::v16i8, 1 },
+ { ISD::USUBSAT, MVT::v8i16, 1 },
+ { ISD::USUBSAT, MVT::v16i8, 1 },
{ ISD::FSQRT, MVT::f64, 32 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/
};
@@ -1800,76 +1972,180 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::cttz:
ISD = ISD::CTTZ;
break;
+ case Intrinsic::sadd_sat:
+ ISD = ISD::SADDSAT;
+ break;
+ case Intrinsic::ssub_sat:
+ ISD = ISD::SSUBSAT;
+ break;
+ case Intrinsic::uadd_sat:
+ ISD = ISD::UADDSAT;
+ break;
+ case Intrinsic::usub_sat:
+ ISD = ISD::USUBSAT;
+ break;
case Intrinsic::sqrt:
ISD = ISD::FSQRT;
break;
}
- // Legalize the type.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
- MVT MTy = LT.second;
+ if (ISD != ISD::DELETED_NODE) {
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ MVT MTy = LT.second;
- // Attempt to lookup cost.
- if (ST->isGLM())
- if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ // Attempt to lookup cost.
+ if (ST->isGLM())
+ if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->isSLM())
- if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasCDI())
- if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasCDI())
+ if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX512())
- if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasXOP())
- if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasXOP())
+ if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSSE3())
- if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->is64Bit())
- if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed);
}
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF) {
+ static const CostTblEntry AVX512CostTbl[] = {
+ { ISD::ROTL, MVT::v8i64, 1 },
+ { ISD::ROTL, MVT::v4i64, 1 },
+ { ISD::ROTL, MVT::v2i64, 1 },
+ { ISD::ROTL, MVT::v16i32, 1 },
+ { ISD::ROTL, MVT::v8i32, 1 },
+ { ISD::ROTL, MVT::v4i32, 1 },
+ { ISD::ROTR, MVT::v8i64, 1 },
+ { ISD::ROTR, MVT::v4i64, 1 },
+ { ISD::ROTR, MVT::v2i64, 1 },
+ { ISD::ROTR, MVT::v16i32, 1 },
+ { ISD::ROTR, MVT::v8i32, 1 },
+ { ISD::ROTR, MVT::v4i32, 1 }
+ };
+ // XOP: ROTL = VPROT(X,Y), ROTR = VPROT(X,SUB(0,Y))
+ static const CostTblEntry XOPCostTbl[] = {
+ { ISD::ROTL, MVT::v4i64, 4 },
+ { ISD::ROTL, MVT::v8i32, 4 },
+ { ISD::ROTL, MVT::v16i16, 4 },
+ { ISD::ROTL, MVT::v32i8, 4 },
+ { ISD::ROTL, MVT::v2i64, 1 },
+ { ISD::ROTL, MVT::v4i32, 1 },
+ { ISD::ROTL, MVT::v8i16, 1 },
+ { ISD::ROTL, MVT::v16i8, 1 },
+ { ISD::ROTR, MVT::v4i64, 6 },
+ { ISD::ROTR, MVT::v8i32, 6 },
+ { ISD::ROTR, MVT::v16i16, 6 },
+ { ISD::ROTR, MVT::v32i8, 6 },
+ { ISD::ROTR, MVT::v2i64, 2 },
+ { ISD::ROTR, MVT::v4i32, 2 },
+ { ISD::ROTR, MVT::v8i16, 2 },
+ { ISD::ROTR, MVT::v16i8, 2 }
+ };
+ static const CostTblEntry X64CostTbl[] = { // 64-bit targets
+ { ISD::ROTL, MVT::i64, 1 },
+ { ISD::ROTR, MVT::i64, 1 },
+ { ISD::FSHL, MVT::i64, 4 }
+ };
+ static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
+ { ISD::ROTL, MVT::i32, 1 },
+ { ISD::ROTL, MVT::i16, 1 },
+ { ISD::ROTL, MVT::i8, 1 },
+ { ISD::ROTR, MVT::i32, 1 },
+ { ISD::ROTR, MVT::i16, 1 },
+ { ISD::ROTR, MVT::i8, 1 },
+ { ISD::FSHL, MVT::i32, 4 },
+ { ISD::FSHL, MVT::i16, 4 },
+ { ISD::FSHL, MVT::i8, 4 }
+ };
+
+ unsigned ISD = ISD::DELETED_NODE;
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::fshl:
+ ISD = ISD::FSHL;
+ if (Args[0] == Args[1])
+ ISD = ISD::ROTL;
+ break;
+ case Intrinsic::fshr:
+ // FSHR has same costs so don't duplicate.
+ ISD = ISD::FSHL;
+ if (Args[0] == Args[1])
+ ISD = ISD::ROTR;
+ break;
+ }
+
+ if (ISD != ISD::DELETED_NODE) {
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ MVT MTy = LT.second;
+
+ // Attempt to lookup cost.
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasXOP())
+ if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
+
return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF);
}
@@ -2341,11 +2617,15 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return TTI::TCC_Free;
ImmIdx = 1;
break;
- case Instruction::Mul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
+ // Division by constant is typically expanded later into a different
+ // instruction sequence. This completely changes the constants.
+ // Report them as "free" to stop ConstantHoist from marking them as opaque.
+ return TTI::TCC_Free;
+ case Instruction::Mul:
case Instruction::Or:
case Instruction::Xor:
ImmIdx = 1;
@@ -2690,6 +2970,9 @@ X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
Options.LoadSizes.push_back(4);
Options.LoadSizes.push_back(2);
Options.LoadSizes.push_back(1);
+ // All GPR and vector loads can be unaligned. SIMD compare requires integer
+ // vectors (SSE2/AVX2).
+ Options.AllowOverlappingLoads = true;
return Options;
}();
return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
@@ -2718,7 +3001,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
// We currently Support only fully-interleaved groups, with no gaps.
// TODO: Support also strided loads (interleaved-groups with gaps).
@@ -2827,7 +3117,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
// VecTy for interleave memop is <VF*Factor x Elt>.
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
@@ -2945,7 +3242,9 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
Type *EltTy = VecTy->getVectorElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
@@ -2957,11 +3256,14 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
};
if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
if (ST->hasAVX2())
return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 3df899038820..1637592c81f8 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -101,13 +101,19 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
int getIntImmCost(int64_t);
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index dde9c734f492..185deda97c1f 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -34,10 +34,6 @@ using namespace llvm;
#define DEBUG_TYPE "winehstate"
-namespace llvm {
-void initializeWinEHStatePassPass(PassRegistry &);
-}
-
namespace {
const int OverdefinedState = INT_MIN;
@@ -369,7 +365,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
// Insert an unlink before all returns.
for (BasicBlock &BB : *F) {
- TerminatorInst *T = BB.getTerminator();
+ Instruction *T = BB.getTerminator();
if (!isa<ReturnInst>(T))
continue;
Builder.SetInsertPoint(T);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index b87c149a36dc..fff8a66d0e75 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -151,7 +151,7 @@ static void GetSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
Offset,
FramePtr));
}
- llvm::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
+ llvm::sort(SpillList, CompareSSIOffset);
}
/// Creates an ordered list of EH info register 'spills'.
@@ -170,7 +170,7 @@ static void GetEHSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
SpillList.push_back(
StackSlotInfo(EHSlot[0], MFI.getObjectOffset(EHSlot[1]),
TL->getExceptionSelectorRegister(PersonalityFn)));
- llvm::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
+ llvm::sort(SpillList, CompareSSIOffset);
}
static MachineMemOperand *getFrameIndexMMO(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 1a56d1fd6e2f..1688c38efc1d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -150,11 +150,10 @@ void XCoreDAGToDAGISel::Select(SDNode *N) {
SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
MVT::Other, CPIdx,
CurDAG->getEntryNode());
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] =
+ MachineMemOperand *MemOp =
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
MachineMemOperand::MOLoad, 4, 4);
- cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(node), {MemOp});
ReplaceNode(N, node);
return;
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 99e76144cba3..75d7ae7048a1 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -403,8 +403,7 @@ SDValue XCoreTargetLowering::lowerLoadWordFromAlignedBasePlusOffset(
static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
{
- KnownBits Known;
- DAG.computeKnownBits(Value, Known);
+ KnownBits Known = DAG.computeKnownBits(Value);
return Known.countMinTrailingZeros() >= 2;
}
@@ -1649,10 +1648,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
// low bit set
if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
- KnownBits Known;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.computeKnownBits(N2, Known);
+ KnownBits Known = DAG.computeKnownBits(N2);
if ((Known.Zero & Mask) == Mask) {
SDValue Carry = DAG.getConstant(0, dl, VT);
SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
@@ -1672,10 +1670,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
- KnownBits Known;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.computeKnownBits(N2, Known);
+ KnownBits Known = DAG.computeKnownBits(N2);
if ((Known.Zero & Mask) == Mask) {
SDValue Borrow = N2;
SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
@@ -1688,10 +1685,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
// low bit set
if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
- KnownBits Known;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.computeKnownBits(N2, Known);
+ KnownBits Known = DAG.computeKnownBits(N2);
if ((Known.Zero & Mask) == Mask) {
SDValue Borrow = DAG.getConstant(0, dl, VT);
SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 1c93ba8fa14c..7455cd997ad6 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -129,7 +129,7 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
do {
SmallVector<WeakTrackingVH, 8> WUsers(CE->user_begin(), CE->user_end());
- llvm::sort(WUsers.begin(), WUsers.end());
+ llvm::sort(WUsers);
WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end());
while (!WUsers.empty())
if (WeakTrackingVH WU = WUsers.pop_back_val()) {
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index 9451a05d8d58..2e9fd98ed34f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -33,8 +33,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 38925bfd51b0..2aa9932e2465 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,7 +31,8 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
+static CodeModel::Model
+getEffectiveXCoreCodeModel(Optional<CodeModel::Model> CM) {
if (CM) {
if (*CM != CodeModel::Small && *CM != CodeModel::Large)
report_fatal_error("Target only supports CodeModel Small or Large");
@@ -51,7 +52,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(
T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveXCoreCodeModel(CM), OL),
TLOF(llvm::make_unique<XCoreTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Testing/Support/SupportHelpers.cpp b/contrib/llvm/lib/Testing/Support/SupportHelpers.cpp
new file mode 100644
index 000000000000..5f53b2330b20
--- /dev/null
+++ b/contrib/llvm/lib/Testing/Support/SupportHelpers.cpp
@@ -0,0 +1,53 @@
+
+#include "llvm/Testing/Support/SupportHelpers.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::unittest;
+
+static std::pair<bool, SmallString<128>> findSrcDirMap(StringRef Argv0) {
+ SmallString<128> BaseDir = llvm::sys::path::parent_path(Argv0);
+
+ llvm::sys::fs::make_absolute(BaseDir);
+
+ SmallString<128> PathInSameDir = BaseDir;
+ llvm::sys::path::append(PathInSameDir, "llvm.srcdir.txt");
+
+ if (llvm::sys::fs::is_regular_file(PathInSameDir))
+ return std::make_pair(true, std::move(PathInSameDir));
+
+ SmallString<128> PathInParentDir = llvm::sys::path::parent_path(BaseDir);
+
+ llvm::sys::path::append(PathInParentDir, "llvm.srcdir.txt");
+ if (llvm::sys::fs::is_regular_file(PathInParentDir))
+ return std::make_pair(true, std::move(PathInParentDir));
+
+ return std::pair<bool, SmallString<128>>(false, {});
+}
+
+SmallString<128> llvm::unittest::getInputFileDirectory(const char *Argv0) {
+ bool Found = false;
+ SmallString<128> InputFilePath;
+ std::tie(Found, InputFilePath) = findSrcDirMap(Argv0);
+
+ EXPECT_TRUE(Found) << "Unit test source directory file does not exist.";
+
+ auto File = MemoryBuffer::getFile(InputFilePath);
+
+ EXPECT_TRUE(static_cast<bool>(File))
+ << "Could not open unit test source directory file.";
+
+ InputFilePath.clear();
+ InputFilePath.append((*File)->getBuffer().trim());
+ llvm::sys::path::append(InputFilePath, "Inputs");
+ llvm::sys::path::native(InputFilePath);
+ return InputFilePath;
+}
diff --git a/contrib/llvm/lib/TextAPI/ELF/ELFStub.cpp b/contrib/llvm/lib/TextAPI/ELF/ELFStub.cpp
new file mode 100644
index 000000000000..248a078a2404
--- /dev/null
+++ b/contrib/llvm/lib/TextAPI/ELF/ELFStub.cpp
@@ -0,0 +1,29 @@
+//===- ELFStub.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-----------------------------------------------------------------------===/
+
+#include "llvm/TextAPI/ELF/ELFStub.h"
+
+using namespace llvm;
+using namespace llvm::elfabi;
+
+ELFStub::ELFStub(ELFStub const &Stub) {
+ TbeVersion = Stub.TbeVersion;
+ Arch = Stub.Arch;
+ SoName = Stub.SoName;
+ NeededLibs = Stub.NeededLibs;
+ Symbols = Stub.Symbols;
+}
+
+ELFStub::ELFStub(ELFStub &&Stub) {
+ TbeVersion = std::move(Stub.TbeVersion);
+ Arch = std::move(Stub.Arch);
+ SoName = std::move(Stub.SoName);
+ NeededLibs = std::move(Stub.NeededLibs);
+ Symbols = std::move(Stub.Symbols);
+}
diff --git a/contrib/llvm/lib/TextAPI/ELF/TBEHandler.cpp b/contrib/llvm/lib/TextAPI/ELF/TBEHandler.cpp
new file mode 100644
index 000000000000..b621829d9358
--- /dev/null
+++ b/contrib/llvm/lib/TextAPI/ELF/TBEHandler.cpp
@@ -0,0 +1,161 @@
+//===- TBEHandler.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-----------------------------------------------------------------------===/
+
+#include "llvm/TextAPI/ELF/TBEHandler.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/TextAPI/ELF/ELFStub.h"
+
+using namespace llvm;
+using namespace llvm::elfabi;
+
+LLVM_YAML_STRONG_TYPEDEF(ELFArch, ELFArchMapper)
+
+namespace llvm {
+namespace yaml {
+
+/// YAML traits for ELFSymbolType.
+template <> struct ScalarEnumerationTraits<ELFSymbolType> {
+ static void enumeration(IO &IO, ELFSymbolType &SymbolType) {
+ IO.enumCase(SymbolType, "NoType", ELFSymbolType::NoType);
+ IO.enumCase(SymbolType, "Func", ELFSymbolType::Func);
+ IO.enumCase(SymbolType, "Object", ELFSymbolType::Object);
+ IO.enumCase(SymbolType, "TLS", ELFSymbolType::TLS);
+ IO.enumCase(SymbolType, "Unknown", ELFSymbolType::Unknown);
+ // Treat other symbol types as noise, and map to Unknown.
+ if (!IO.outputting() && IO.matchEnumFallback())
+ SymbolType = ELFSymbolType::Unknown;
+ }
+};
+
+/// YAML traits for ELFArch.
+template <> struct ScalarTraits<ELFArchMapper> {
+ static void output(const ELFArchMapper &Value, void *,
+ llvm::raw_ostream &Out) {
+ // Map from integer to architecture string.
+ switch (Value) {
+ case (ELFArch)ELF::EM_X86_64:
+ Out << "x86_64";
+ break;
+ case (ELFArch)ELF::EM_AARCH64:
+ Out << "AArch64";
+ break;
+ case (ELFArch)ELF::EM_NONE:
+ default:
+ Out << "Unknown";
+ }
+ }
+
+ static StringRef input(StringRef Scalar, void *, ELFArchMapper &Value) {
+ // Map from architecture string to integer.
+ Value = StringSwitch<ELFArch>(Scalar)
+ .Case("x86_64", ELF::EM_X86_64)
+ .Case("AArch64", ELF::EM_AARCH64)
+ .Case("Unknown", ELF::EM_NONE)
+ .Default(ELF::EM_NONE);
+
+ // Returning empty StringRef indicates successful parse.
+ return StringRef();
+ }
+
+ // Don't place quotation marks around architecture value.
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
+/// YAML traits for TbeVersion.
+template <> struct ScalarTraits<VersionTuple> {
+ static void output(const VersionTuple &Value, void *,
+ llvm::raw_ostream &Out) {
+ Out << Value.getAsString();
+ }
+
+ static StringRef input(StringRef Scalar, void *, VersionTuple &Value) {
+ if (Value.tryParse(Scalar))
+ return StringRef("Can't parse version: invalid version format.");
+
+ if (Value > TBEVersionCurrent)
+ return StringRef("Unsupported TBE version.");
+
+ // Returning empty StringRef indicates successful parse.
+ return StringRef();
+ }
+
+ // Don't place quotation marks around version value.
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
+/// YAML traits for ELFSymbol.
+template <> struct MappingTraits<ELFSymbol> {
+ static void mapping(IO &IO, ELFSymbol &Symbol) {
+ IO.mapRequired("Type", Symbol.Type);
+ // The need for symbol size depends on the symbol type.
+ if (Symbol.Type == ELFSymbolType::NoType) {
+ IO.mapOptional("Size", Symbol.Size, (uint64_t)0);
+ } else if (Symbol.Type == ELFSymbolType::Func) {
+ Symbol.Size = 0;
+ } else {
+ IO.mapRequired("Size", Symbol.Size);
+ }
+ IO.mapOptional("Undefined", Symbol.Undefined, false);
+ IO.mapOptional("Weak", Symbol.Weak, false);
+ IO.mapOptional("Warning", Symbol.Warning);
+ }
+
+ // Compacts symbol information into a single line.
+ static const bool flow = true;
+};
+
+/// YAML traits for set of ELFSymbols.
+template <> struct CustomMappingTraits<std::set<ELFSymbol>> {
+ static void inputOne(IO &IO, StringRef Key, std::set<ELFSymbol> &Set) {
+ ELFSymbol Sym(Key.str());
+ IO.mapRequired(Key.str().c_str(), Sym);
+ Set.insert(Sym);
+ }
+
+ static void output(IO &IO, std::set<ELFSymbol> &Set) {
+ for (auto &Sym : Set)
+ IO.mapRequired(Sym.Name.c_str(), const_cast<ELFSymbol &>(Sym));
+ }
+};
+
+/// YAML traits for ELFStub objects.
+template <> struct MappingTraits<ELFStub> {
+ static void mapping(IO &IO, ELFStub &Stub) {
+ if (!IO.mapTag("!tapi-tbe", true))
+ IO.setError("Not a .tbe YAML file.");
+ IO.mapRequired("TbeVersion", Stub.TbeVersion);
+ IO.mapOptional("SoName", Stub.SoName);
+ IO.mapRequired("Arch", (ELFArchMapper &)Stub.Arch);
+ IO.mapOptional("NeededLibs", Stub.NeededLibs);
+ IO.mapRequired("Symbols", Stub.Symbols);
+ }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+Expected<std::unique_ptr<ELFStub>> elfabi::readTBEFromBuffer(StringRef Buf) {
+ yaml::Input YamlIn(Buf);
+ std::unique_ptr<ELFStub> Stub(new ELFStub());
+ YamlIn >> *Stub;
+ if (std::error_code Err = YamlIn.error())
+ return createStringError(Err, "YAML failed reading as TBE");
+
+ return std::move(Stub);
+}
+
+Error elfabi::writeTBEToOutputStream(raw_ostream &OS, const ELFStub &Stub) {
+ yaml::Output YamlOut(OS, NULL, /*WrapColumn =*/0);
+
+ YamlOut << const_cast<ELFStub &>(Stub);
+ return Error::success();
+}
diff --git a/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index f8de7ca73924..c5a28d4f1c08 100644
--- a/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -96,7 +96,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
// Handle when no input or output is specified
if (Args.hasArgNoClaim(OPT_INPUT) ||
(!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) {
- Table.PrintHelp(outs(), ArgsArr[0], "dlltool", false);
+ Table.PrintHelp(outs(), "llvm-dlltool [options] file...", "llvm-dlltool",
+ false);
llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm, arm64\n";
return 1;
}
diff --git a/contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index d636dca7a2c7..64f4fe423f25 100644
--- a/contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -123,7 +123,7 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
// Handle /help
if (Args.hasArg(OPT_help)) {
- Table.PrintHelp(outs(), ArgsArr[0], "LLVM Lib");
+ Table.PrintHelp(outs(), "llvm-lib [options] file...", "LLVM Lib");
return 0;
}
diff --git a/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index b622d018478a..c795866ec0f2 100644
--- a/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -16,7 +16,7 @@
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "AggressiveInstCombineInternal.h"
#include "llvm-c/Initialization.h"
-#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Transforms/AggressiveInstCombine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
@@ -59,6 +59,99 @@ public:
};
} // namespace
+/// Match a pattern for a bitwise rotate operation that partially guards
+/// against undefined behavior by branching around the rotation when the shift
+/// amount is 0.
+static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+ if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
+ return false;
+
+ // As with the one-use checks below, this is not strictly necessary, but we
+ // are being cautious to avoid potential perf regressions on targets that
+ // do not actually have a rotate instruction (where the funnel shift would be
+ // expanded back into math/shift/logic ops).
+ if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
+ return false;
+
+ // Match V to funnel shift left/right and capture the source operand and
+ // shift amount in X and Y.
+ auto matchRotate = [](Value *V, Value *&X, Value *&Y) {
+ Value *L0, *L1, *R0, *R1;
+ unsigned Width = V->getType()->getScalarSizeInBits();
+ auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1));
+
+ // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y))
+ auto RotL = m_OneUse(
+ m_c_Or(m_Shl(m_Value(L0), m_Value(L1)), m_LShr(m_Value(R0), Sub)));
+ if (RotL.match(V) && L0 == R0 && L1 == R1) {
+ X = L0;
+ Y = L1;
+ return Intrinsic::fshl;
+ }
+
+ // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y))
+ auto RotR = m_OneUse(
+ m_c_Or(m_LShr(m_Value(L0), m_Value(L1)), m_Shl(m_Value(R0), Sub)));
+ if (RotR.match(V) && L0 == R0 && L1 == R1) {
+ X = L0;
+ Y = L1;
+ return Intrinsic::fshr;
+ }
+
+ return Intrinsic::not_intrinsic;
+ };
+
+ // One phi operand must be a rotate operation, and the other phi operand must
+ // be the source value of that rotate operation:
+ // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ]
+ PHINode &Phi = cast<PHINode>(I);
+ Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
+ Value *RotSrc, *RotAmt;
+ Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt);
+ if (IID == Intrinsic::not_intrinsic || RotSrc != P1) {
+ IID = matchRotate(P1, RotSrc, RotAmt);
+ if (IID == Intrinsic::not_intrinsic || RotSrc != P0)
+ return false;
+ assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
+ "Pattern must match funnel shift left or right");
+ }
+
+ // The incoming block with our source operand must be the "guard" block.
+ // That must contain a cmp+branch to avoid the rotate when the shift amount
+ // is equal to 0. The other incoming block is the block with the rotate.
+ BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
+ BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
+ Instruction *TermI = GuardBB->getTerminator();
+ BasicBlock *TrueBB, *FalseBB;
+ ICmpInst::Predicate Pred;
+ if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), TrueBB,
+ FalseBB)))
+ return false;
+
+ BasicBlock *PhiBB = Phi.getParent();
+ if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
+ return false;
+
+ // We matched a variation of this IR pattern:
+ // GuardBB:
+ // %cmp = icmp eq i32 %RotAmt, 0
+ // br i1 %cmp, label %PhiBB, label %RotBB
+ // RotBB:
+ // %sub = sub i32 32, %RotAmt
+ // %shr = lshr i32 %X, %sub
+ // %shl = shl i32 %X, %RotAmt
+ // %rot = or i32 %shr, %shl
+ // br label %PhiBB
+ // PhiBB:
+ // %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ]
+ // -->
+ // llvm.fshl.i32(i32 %X, i32 %RotAmt)
+ IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+ Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
+ Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt}));
+ return true;
+}
+
/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
/// of 'and' ops, then we also need to capture the fact that we saw an
@@ -69,9 +162,9 @@ struct MaskOps {
bool MatchAndChain;
bool FoundAnd1;
- MaskOps(unsigned BitWidth, bool MatchAnds) :
- Root(nullptr), Mask(APInt::getNullValue(BitWidth)),
- MatchAndChain(MatchAnds), FoundAnd1(false) {}
+ MaskOps(unsigned BitWidth, bool MatchAnds)
+ : Root(nullptr), Mask(APInt::getNullValue(BitWidth)),
+ MatchAndChain(MatchAnds), FoundAnd1(false) {}
};
/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
@@ -152,8 +245,8 @@ static bool foldAnyOrAllBitsSet(Instruction &I) {
IRBuilder<> Builder(&I);
Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
Value *And = Builder.CreateAnd(MOps.Root, Mask);
- Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask) :
- Builder.CreateIsNotNull(And);
+ Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
+ : Builder.CreateIsNotNull(And);
Value *Zext = Builder.CreateZExt(Cmp, I.getType());
I.replaceAllUsesWith(Zext);
return true;
@@ -174,8 +267,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
// Also, we want to avoid matching partial patterns.
// TODO: It would be more efficient if we removed dead instructions
// iteratively in this loop rather than waiting until the end.
- for (Instruction &I : make_range(BB.rbegin(), BB.rend()))
+ for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldAnyOrAllBitsSet(I);
+ MadeChange |= foldGuardedRotateToFunnelShift(I);
+ }
}
// We're done with transforms, so remove dead instructions.
diff --git a/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
index 199374cdabf3..f3c8bde9f8ff 100644
--- a/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
+++ b/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -13,6 +13,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TRANSFORMS_AGGRESSIVEINSTCOMBINE_COMBINEINTERNAL_H
+#define LLVM_LIB_TRANSFORMS_AGGRESSIVEINSTCOMBINE_COMBINEINTERNAL_H
+
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -119,3 +122,5 @@ private:
void ReduceExpressionDag(Type *SclTy);
};
} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index dfe05c4b2a5e..58f952b54f3a 100644
--- a/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -157,7 +157,7 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
SmallPtrSet<Instruction *, 8> Terminators;
for (BasicBlock &B : *F) {
auto *TI = B.getTerminator();
- if (TI->getNumSuccessors() == 0 && !TI->isExceptional() &&
+ if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
!isa<UnreachableInst>(TI))
Terminators.insert(TI);
}
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index cf63b678b618..4cb0a52961cc 100644
--- a/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -49,7 +49,7 @@ public:
BlockToIndexMapping(Function &F) {
for (BasicBlock &BB : F)
V.push_back(&BB);
- llvm::sort(V.begin(), V.end());
+ llvm::sort(V);
}
size_t blockToIndex(BasicBlock *BB) const {
@@ -546,7 +546,8 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
} else {
// For all other values, the spill is placed immediately after
// the definition.
- assert(!isa<TerminatorInst>(E.def()) && "unexpected terminator");
+ assert(!cast<Instruction>(E.def())->isTerminator() &&
+ "unexpected terminator");
InsertPt = cast<Instruction>(E.def())->getNextNode();
}
@@ -600,7 +601,7 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
}
// Sets the unwind edge of an instruction to a particular successor.
-static void setUnwindEdgeTo(TerminatorInst *TI, BasicBlock *Succ) {
+static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
if (auto *II = dyn_cast<InvokeInst>(TI))
II->setUnwindDest(Succ);
else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 49acc5e93a39..9eeceb217ba8 100644
--- a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -459,7 +459,7 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
DenseMap<Value *, Value *> ResolvedValues;
Instruction *I = InitialInst;
- while (isa<TerminatorInst>(I)) {
+ while (I->isTerminator()) {
if (isa<ReturnInst>(I)) {
if (I != InitialInst)
ReplaceInstWithInst(InitialInst, I->clone());
@@ -538,43 +538,92 @@ static void handleNoSuspendCoroutine(CoroBeginInst *CoroBegin, Type *FrameTy) {
CoroBegin->eraseFromParent();
}
-// look for a very simple pattern
-// coro.save
-// no other calls
-// resume or destroy call
-// coro.suspend
-//
-// If there are other calls between coro.save and coro.suspend, they can
-// potentially resume or destroy the coroutine, so it is unsafe to eliminate a
-// suspend point.
-static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
- CoroBeginInst *CoroBegin) {
- auto *Save = Suspend->getCoroSave();
- auto *BB = Suspend->getParent();
- if (BB != Save->getParent())
- return false;
+// SimplifySuspendPoint needs to check that there is no calls between
+// coro_save and coro_suspend, since any of the calls may potentially resume
+// the coroutine and if that is the case we cannot eliminate the suspend point.
+static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
+ for (Instruction *I = From; I != To; I = I->getNextNode()) {
+ // Assume that no intrinsic can resume the coroutine.
+ if (isa<IntrinsicInst>(I))
+ continue;
- CallSite SingleCallSite;
+ if (CallSite(I))
+ return true;
+ }
+ return false;
+}
- // Check that we have only one CallSite.
- for (Instruction *I = Save->getNextNode(); I != Suspend;
- I = I->getNextNode()) {
- if (isa<CoroFrameInst>(I))
- continue;
- if (isa<CoroSubFnInst>(I))
- continue;
- if (CallSite CS = CallSite(I)) {
- if (SingleCallSite)
- return false;
- else
- SingleCallSite = CS;
- }
+static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
+ SmallPtrSet<BasicBlock *, 8> Set;
+ SmallVector<BasicBlock *, 8> Worklist;
+
+ Set.insert(SaveBB);
+ Worklist.push_back(ResDesBB);
+
+ // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
+ // returns a token consumed by suspend instruction, all blocks in between
+ // will have to eventually hit SaveBB when going backwards from ResDesBB.
+ while (!Worklist.empty()) {
+ auto *BB = Worklist.pop_back_val();
+ Set.insert(BB);
+ for (auto *Pred : predecessors(BB))
+ if (Set.count(Pred) == 0)
+ Worklist.push_back(Pred);
}
- auto *CallInstr = SingleCallSite.getInstruction();
- if (!CallInstr)
+
+ // SaveBB and ResDesBB are checked separately in hasCallsBetween.
+ Set.erase(SaveBB);
+ Set.erase(ResDesBB);
+
+ for (auto *BB : Set)
+ if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
+ return true;
+
+ return false;
+}
+
+static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
+ auto *SaveBB = Save->getParent();
+ auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
+
+ if (SaveBB == ResumeOrDestroyBB)
+ return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
+
+ // Any calls from Save to the end of the block?
+ if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
+ return true;
+
+ // Any calls from begging of the block up to ResumeOrDestroy?
+ if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
+ ResumeOrDestroy))
+ return true;
+
+ // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
+ if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
+ return true;
+
+ return false;
+}
+
+// If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
+// suspend point and replace it with nornal control flow.
+static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
+ CoroBeginInst *CoroBegin) {
+ Instruction *Prev = Suspend->getPrevNode();
+ if (!Prev) {
+ auto *Pred = Suspend->getParent()->getSinglePredecessor();
+ if (!Pred)
+ return false;
+ Prev = Pred->getTerminator();
+ }
+
+ CallSite CS{Prev};
+ if (!CS)
return false;
- auto *Callee = SingleCallSite.getCalledValue()->stripPointerCasts();
+ auto *CallInstr = CS.getInstruction();
+
+ auto *Callee = CS.getCalledValue()->stripPointerCasts();
// See if the callsite is for resumption or destruction of the coroutine.
auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
@@ -585,6 +634,13 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
if (SubFn->getFrame() != CoroBegin)
return false;
+ // See if the transformation is safe. Specifically, see if there are any
+ // calls in between Save and CallInstr. They can potenitally resume the
+ // coroutine rendering this optimization unsafe.
+ auto *Save = Suspend->getCoroSave();
+ if (hasCallsBetween(Save, CallInstr))
+ return false;
+
// Replace llvm.coro.suspend with the value that results in resumption over
// the resume or cleanup path.
Suspend->replaceAllUsesWith(SubFn->getRawIndex());
@@ -592,8 +648,20 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
Save->eraseFromParent();
// No longer need a call to coro.resume or coro.destroy.
+ if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) {
+ BranchInst::Create(Invoke->getNormalDest(), Invoke);
+ }
+
+ // Grab the CalledValue from CS before erasing the CallInstr.
+ auto *CalledValue = CS.getCalledValue();
CallInstr->eraseFromParent();
+ // If no more users remove it. Usually it is a bitcast of SubFn.
+ if (CalledValue != SubFn && CalledValue->user_empty())
+ if (auto *I = dyn_cast<Instruction>(CalledValue))
+ I->eraseFromParent();
+
+ // Now we are good to remove SubFn.
if (SubFn->user_empty())
SubFn->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 731faeb5dce4..cf84f916e24b 100644
--- a/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Coroutines.h"
+#include "llvm-c/Transforms/Coroutines.h"
#include "CoroInstr.h"
#include "CoroInternal.h"
#include "llvm/ADT/SmallVector.h"
@@ -344,3 +345,19 @@ void coro::Shape::buildFrom(Function &F) {
for (CoroSaveInst *CoroSave : UnusedCoroSaves)
CoroSave->eraseFromParent();
}
+
+void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCoroEarlyPass());
+}
+
+void LLVMAddCoroSplitPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCoroSplitPass());
+}
+
+void LLVMAddCoroElidePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCoroElidePass());
+}
+
+void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCoroCleanupPass());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 3b735ddd192e..07138718ce2c 100644
--- a/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -150,7 +150,7 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
// declarations.
if (Callee && !Callee->isDeclaration() &&
CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
- return InlineCost::getAlways();
+ return InlineCost::getAlways("always inliner");
- return InlineCost::getNever();
+ return InlineCost::getNever("always inliner");
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index f2c2b55b1c5b..4663de0b049e 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -49,6 +49,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -213,7 +214,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
// Create the new function body and insert it into the module.
- Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
+ F->getName());
NF->copyAttributesFrom(F);
// Patch the pointer to LLVM function in debug info descriptor.
@@ -808,6 +810,21 @@ static bool canPaddingBeAccessed(Argument *arg) {
return false;
}
+static bool areFunctionArgsABICompatible(
+ const Function &F, const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<Argument *> &ArgsToPromote,
+ SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
+ for (const Use &U : F.uses()) {
+ CallSite CS(U.getUser());
+ const Function *Caller = CS.getCaller();
+ const Function *Callee = CS.getCalledFunction();
+ if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
+ !TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform))
+ return false;
+ }
+ return true;
+}
+
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct). If safe to promote some arguments, it
@@ -816,7 +833,8 @@ static Function *
promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
unsigned MaxElements,
Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
- ReplaceCallSite) {
+ ReplaceCallSite,
+ const TargetTransformInfo &TTI) {
// Don't perform argument promotion for naked functions; otherwise we can end
// up removing parameters that are seemingly 'not used' as they are referred
// to in the assembly.
@@ -845,7 +863,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// Second check: make sure that all callers are direct callers. We can't
// transform functions that have indirect callers. Also see if the function
- // is self-recursive.
+ // is self-recursive and check that target features are compatible.
bool isSelfRecursive = false;
for (Use &U : F->uses()) {
CallSite CS(U.getUser());
@@ -954,6 +972,10 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
return nullptr;
+ if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote,
+ ByValArgsToTransform))
+ return nullptr;
+
return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
}
@@ -979,7 +1001,9 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
return FAM.getResult<AAManager>(F);
};
- Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None);
+ const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
+ Function *NewF =
+ promoteArguments(&OldF, AARGetter, MaxElements, None, TTI);
if (!NewF)
continue;
LocalChange = true;
@@ -1017,6 +1041,7 @@ struct ArgPromotion : public CallGraphSCCPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
getAAResultsAnalysisUsage(AU);
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -1042,6 +1067,7 @@ INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -1078,8 +1104,10 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
};
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF);
if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
- {ReplaceCallSite})) {
+ {ReplaceCallSite}, TTI)) {
LocalChange = true;
// Update the call graph for the newly promoted function.
diff --git a/contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index d642445b35de..de62cfc0c1db 100644
--- a/contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -345,6 +345,9 @@ private:
void visitInst(Instruction &I,
DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
+ // Simply bail if this instruction has no user.
+ if (I.use_empty())
+ return;
auto RegI = CVPLatticeKey(&I, IPOGrouping::Register);
ChangedValues[RegI] = getOverdefinedVal();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index e0b1037053f0..81f3634eaf28 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -40,7 +40,7 @@ using namespace llvm;
#define DEBUG_TYPE "constmerge"
-STATISTIC(NumMerged, "Number of global constants merged");
+STATISTIC(NumIdenticalMerged, "Number of identical global constants merged");
/// Find values that are marked as llvm.used.
static void FindUsedValues(GlobalVariable *LLVMUsed,
@@ -91,6 +91,37 @@ static unsigned getAlignment(GlobalVariable *GV) {
return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
}
+enum class CanMerge { No, Yes };
+static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) {
+ if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr())
+ return CanMerge::No;
+ if (hasMetadataOtherThanDebugLoc(Old))
+ return CanMerge::No;
+ assert(!hasMetadataOtherThanDebugLoc(New));
+ if (!Old->hasGlobalUnnamedAddr())
+ New->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+ return CanMerge::Yes;
+}
+
+static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
+ Constant *NewConstant = New;
+
+ LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @"
+ << New->getName() << "\n");
+
+ // Bump the alignment if necessary.
+ if (Old->getAlignment() || New->getAlignment())
+ New->setAlignment(std::max(getAlignment(Old), getAlignment(New)));
+
+ copyDebugLocMetadata(Old, New);
+ Old->replaceAllUsesWith(NewConstant);
+
+ // Delete the global value from the module.
+ assert(Old->hasLocalLinkage() &&
+ "Refusing to delete an externally visible global variable.");
+ Old->eraseFromParent();
+}
+
static bool mergeConstants(Module &M) {
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -100,17 +131,18 @@ static bool mergeConstants(Module &M) {
// Map unique constants to globals.
DenseMap<Constant *, GlobalVariable *> CMap;
- // Replacements - This vector contains a list of replacements to perform.
- SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+ SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32>
+ SameContentReplacements;
- bool MadeChange = false;
+ size_t ChangesMade = 0;
+ size_t OldChangesMade = 0;
// Iterate constant merging while we are still making progress. Merging two
// constants together may allow us to merge other constants together if the
// second level constants have initializers which point to the globals that
// were just merged.
while (true) {
- // First: Find the canonical constants others will be merged with.
+ // Find the canonical constants others will be merged with.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
GlobalVariable *GV = &*GVI++;
@@ -119,6 +151,7 @@ static bool mergeConstants(Module &M) {
GV->removeDeadConstantUsers();
if (GV->use_empty() && GV->hasLocalLinkage()) {
GV->eraseFromParent();
+ ++ChangesMade;
continue;
}
@@ -148,12 +181,16 @@ static bool mergeConstants(Module &M) {
// If this is the first constant we find or if the old one is local,
// replace with the current one. If the current is externally visible
// it cannot be replace, but can be the canonical constant we merge with.
- if (!Slot || IsBetterCanonical(*GV, *Slot))
+ bool FirstConstantFound = !Slot;
+ if (FirstConstantFound || IsBetterCanonical(*GV, *Slot)) {
Slot = GV;
+ LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV->getName()
+ << (FirstConstantFound ? "\n" : " (updated)\n"));
+ }
}
- // Second: identify all globals that can be merged together, filling in
- // the Replacements vector. We cannot do the replacement in this pass
+ // Identify all globals that can be merged together, filling in the
+ // SameContentReplacements vector. We cannot do the replacement in this pass
// because doing so may cause initializers of other globals to be rewritten,
// invalidating the Constant* pointers in CMap.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
@@ -174,54 +211,43 @@ static bool mergeConstants(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- GlobalVariable *Slot = CMap[Init];
-
- if (!Slot || Slot == GV)
+ auto Found = CMap.find(Init);
+ if (Found == CMap.end())
continue;
- if (!Slot->hasGlobalUnnamedAddr() && !GV->hasGlobalUnnamedAddr())
+ GlobalVariable *Slot = Found->second;
+ if (Slot == GV)
continue;
- if (hasMetadataOtherThanDebugLoc(GV))
+ if (makeMergeable(GV, Slot) == CanMerge::No)
continue;
- if (!GV->hasGlobalUnnamedAddr())
- Slot->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
-
// Make all uses of the duplicate constant use the canonical version.
- Replacements.push_back(std::make_pair(GV, Slot));
+ LLVM_DEBUG(dbgs() << "Will replace: @" << GV->getName() << " -> @"
+ << Slot->getName() << "\n");
+ SameContentReplacements.push_back(std::make_pair(GV, Slot));
}
- if (Replacements.empty())
- return MadeChange;
- CMap.clear();
-
// Now that we have figured out which replacements must be made, do them all
// now. This avoid invalidating the pointers in CMap, which are unneeded
// now.
- for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
- // Bump the alignment if necessary.
- if (Replacements[i].first->getAlignment() ||
- Replacements[i].second->getAlignment()) {
- Replacements[i].second->setAlignment(
- std::max(getAlignment(Replacements[i].first),
- getAlignment(Replacements[i].second)));
- }
-
- copyDebugLocMetadata(Replacements[i].first, Replacements[i].second);
-
- // Eliminate any uses of the dead global.
- Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
-
- // Delete the global value from the module.
- assert(Replacements[i].first->hasLocalLinkage() &&
- "Refusing to delete an externally visible global variable.");
- Replacements[i].first->eraseFromParent();
+ for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) {
+ GlobalVariable *Old = SameContentReplacements[i].first;
+ GlobalVariable *New = SameContentReplacements[i].second;
+ replace(M, Old, New);
+ ++ChangesMade;
+ ++NumIdenticalMerged;
}
- NumMerged += Replacements.size();
- Replacements.clear();
+ if (ChangesMade == OldChangesMade)
+ break;
+ OldChangesMade = ChangesMade;
+
+ SameContentReplacements.clear();
+ CMap.clear();
}
+
+ return ChangesMade;
}
PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) {
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index cd2bd734eb26..cb30e8f46a54 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -24,7 +24,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -165,7 +164,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
unsigned NumArgs = Params.size();
// Create the new function body and insert it into the module...
- Function *NF = Function::Create(NFTy, Fn.getLinkage());
+ Function *NF = Function::Create(NFTy, Fn.getLinkage(), Fn.getAddressSpace());
NF->copyAttributesFrom(&Fn);
NF->setComdat(Fn.getComdat());
Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
@@ -289,16 +288,21 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
return false;
SmallVector<unsigned, 8> UnusedArgs;
+ bool Changed = false;
+
for (Argument &Arg : Fn.args()) {
- if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+ if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) {
+ if (Arg.isUsedByMetadata()) {
+ Arg.replaceAllUsesWith(UndefValue::get(Arg.getType()));
+ Changed = true;
+ }
UnusedArgs.push_back(Arg.getArgNo());
+ }
}
if (UnusedArgs.empty())
return false;
- bool Changed = false;
-
for (Use &U : Fn.uses()) {
CallSite CS(U.getUser());
if (!CS || !CS.isCallee(&U))
@@ -859,7 +863,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
return false;
// Create the new function body and insert it into the module...
- Function *NF = Function::Create(NFTy, F->getLinkage());
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace());
NF->copyAttributesFrom(F);
NF->setComdat(F->getComdat());
NF->setAttributes(NewPAL);
@@ -949,16 +953,16 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
ArgAttrVec.clear();
Instruction *New = NewCS.getInstruction();
- if (!Call->use_empty()) {
+ if (!Call->use_empty() || Call->isUsedByMetadata()) {
if (New->getType() == Call->getType()) {
// Return type not changed? Just replace users then.
Call->replaceAllUsesWith(New);
New->takeName(Call);
} else if (New->getType()->isVoidTy()) {
- // Our return value has uses, but they will get removed later on.
- // Replace by null for now.
+ // If the return value is dead, replace any uses of it with undef
+ // (any non-debug value uses will get removed later on).
if (!Call->getType()->isX86_MMXTy())
- Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
} else {
assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
"Return type changed, but not into a void. The old return type"
@@ -1018,10 +1022,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
I2->takeName(&*I);
++I2;
} else {
- // If this argument is dead, replace any uses of it with null constants
- // (these are guaranteed to become unused later on).
+ // If this argument is dead, replace any uses of it with undef
+ // (any non-debug value uses will get removed later on).
if (!I->getType()->isX86_MMXTy())
- I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
}
// If we change the return value of the function we must rewrite any return
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index d45a88323910..a744d7f2d2d9 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -135,6 +135,7 @@ namespace {
llvm::Value *Declaration;
if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ CurI->getAddressSpace(),
CurI->getName(), &M);
} else {
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 37273f975417..4dc1529ddbf5 100644
--- a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -58,6 +58,7 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
.Case("sanitize_hwaddress", Attribute::SanitizeHWAddress)
.Case("sanitize_memory", Attribute::SanitizeMemory)
.Case("sanitize_thread", Attribute::SanitizeThread)
+ .Case("speculative_load_hardening", Attribute::SpeculativeLoadHardening)
.Case("ssp", Attribute::StackProtect)
.Case("sspreq", Attribute::StackProtectReq)
.Case("sspstrong", Attribute::StackProtectStrong)
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 010b0a29807d..4e2a82b56eec 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
@@ -66,6 +67,7 @@ using namespace llvm;
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumWriteOnly, "Number of functions marked writeonly");
STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumReturned, "Number of arguments marked returned");
STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
@@ -113,27 +115,30 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
if (AliasAnalysis::onlyReadsMemory(MRB))
return MAK_ReadOnly;
- // Conservatively assume it writes to memory.
+ if (AliasAnalysis::doesNotReadMemory(MRB))
+ return MAK_WriteOnly;
+
+ // Conservatively assume it reads and writes to memory.
return MAK_MayWrite;
}
// Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
+ bool WritesMemory = false;
for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
Instruction *I = &*II;
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
- CallSite CS(cast<Value>(I));
- if (CS) {
+ if (auto *Call = dyn_cast<CallBase>(I)) {
// Ignore calls to functions in the same SCC, as long as the call sites
// don't have operand bundles. Calls with operand bundles are allowed to
// have memory effects not described by the memory effects of the call
// target.
- if (!CS.hasOperandBundles() && CS.getCalledFunction() &&
- SCCNodes.count(CS.getCalledFunction()))
+ if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
+ SCCNodes.count(Call->getCalledFunction()))
continue;
- FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(Call);
ModRefInfo MRI = createModRefInfo(MRB);
// If the call doesn't access memory, we're done.
@@ -141,9 +146,9 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
continue;
if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
- // The call could access any memory. If that includes writes, give up.
+ // The call could access any memory. If that includes writes, note it.
if (isModSet(MRI))
- return MAK_MayWrite;
+ WritesMemory = true;
// If it reads, note it.
if (isRefSet(MRI))
ReadsMemory = true;
@@ -152,7 +157,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Check whether all pointer arguments point to local memory, and
// ignore calls that only access local memory.
- for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ for (CallSite::arg_iterator CI = Call->arg_begin(), CE = Call->arg_end();
CI != CE; ++CI) {
Value *Arg = *CI;
if (!Arg->getType()->isPtrOrPtrVectorTy())
@@ -160,7 +165,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
AAMDNodes AAInfo;
I->getAAMetadata(AAInfo);
- MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
+ MemoryLocation Loc(Arg, LocationSize::unknown(), AAInfo);
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
@@ -168,8 +173,8 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
continue;
if (isModSet(MRI))
- // Writes non-local memory. Give up.
- return MAK_MayWrite;
+ // Writes non-local memory.
+ WritesMemory = true;
if (isRefSet(MRI))
// Ok, it reads non-local memory.
ReadsMemory = true;
@@ -198,14 +203,21 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Any remaining instructions need to be taken seriously! Check if they
// read or write memory.
- if (I->mayWriteToMemory())
- // Writes memory. Just give up.
- return MAK_MayWrite;
+ //
+ // Writes memory, remember that.
+ WritesMemory |= I->mayWriteToMemory();
// If this instruction may read memory, remember that.
ReadsMemory |= I->mayReadFromMemory();
}
+ if (WritesMemory) {
+ if (!ReadsMemory)
+ return MAK_WriteOnly;
+ else
+ return MAK_MayWrite;
+ }
+
return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
}
@@ -220,6 +232,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
+ bool WritesMemory = false;
for (Function *F : SCCNodes) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
@@ -234,6 +247,9 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
case MAK_ReadOnly:
ReadsMemory = true;
break;
+ case MAK_WriteOnly:
+ WritesMemory = true;
+ break;
case MAK_ReadNone:
// Nothing to do!
break;
@@ -243,6 +259,9 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
bool MadeChange = false;
+
+ assert(!(ReadsMemory && WritesMemory) &&
+ "Function marked read-only and write-only");
for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
// Already perfect!
@@ -252,16 +271,32 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// No change.
continue;
+ if (F->doesNotReadMemory() && WritesMemory)
+ continue;
+
MadeChange = true;
// Clear out any existing attributes.
F->removeFnAttr(Attribute::ReadOnly);
F->removeFnAttr(Attribute::ReadNone);
+ F->removeFnAttr(Attribute::WriteOnly);
+
+ if (!WritesMemory && !ReadsMemory) {
+ // Clear out any "access range attributes" if readnone was deduced.
+ F->removeFnAttr(Attribute::ArgMemOnly);
+ F->removeFnAttr(Attribute::InaccessibleMemOnly);
+ F->removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+ }
// Add in the new attribute.
- F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
+ if (WritesMemory && !ReadsMemory)
+ F->addFnAttr(Attribute::WriteOnly);
+ else
+ F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
- if (ReadsMemory)
+ if (WritesMemory && !ReadsMemory)
+ ++NumWriteOnly;
+ else if (ReadsMemory)
++NumReadOnly;
else
++NumReadNone;
@@ -1272,13 +1307,14 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// If all of the calls in F are identifiable and are to norecurse functions, F
// is norecurse. This check also detects self-recursion as F is not currently
// marked norecurse, so any called from F to F will not be marked norecurse.
- for (Instruction &I : instructions(*F))
- if (auto CS = CallSite(&I)) {
- Function *Callee = CS.getCalledFunction();
- if (!Callee || Callee == F || !Callee->doesNotRecurse())
- // Function calls a potentially recursive function.
- return false;
- }
+ for (auto &BB : *F)
+ for (auto &I : BB.instructionsWithoutDebug())
+ if (auto CS = CallSite(&I)) {
+ Function *Callee = CS.getCalledFunction();
+ if (!Callee || Callee == F || !Callee->doesNotRecurse())
+ // Function calls a potentially recursive function.
+ return false;
+ }
// Every call was to a non-recursive function other than this function, and
// we have no indirect recursion as the SCC size is one. This function cannot
@@ -1286,6 +1322,31 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
return setDoesNotRecurse(*F);
}
+template <typename AARGetterT>
+static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+ bool HasUnknownCall) {
+ bool Changed = false;
+
+ // Bail if the SCC only contains optnone functions.
+ if (SCCNodes.empty())
+ return Changed;
+
+ Changed |= addArgumentReturnedAttrs(SCCNodes);
+ Changed |= addReadAttrs(SCCNodes, AARGetter);
+ Changed |= addArgumentAttrs(SCCNodes);
+
+ // If we have no external nodes participating in the SCC, we can deduce some
+ // more precise attributes as well.
+ if (!HasUnknownCall) {
+ Changed |= addNoAliasAttrs(SCCNodes);
+ Changed |= addNonNullAttrs(SCCNodes);
+ Changed |= inferAttrsFromFunctionBodies(SCCNodes);
+ Changed |= addNoRecurseAttrs(SCCNodes);
+ }
+
+ return Changed;
+}
+
PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG,
@@ -1328,21 +1389,10 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
SCCNodes.insert(&F);
}
- bool Changed = false;
- Changed |= addArgumentReturnedAttrs(SCCNodes);
- Changed |= addReadAttrs(SCCNodes, AARGetter);
- Changed |= addArgumentAttrs(SCCNodes);
-
- // If we have no external nodes participating in the SCC, we can deduce some
- // more precise attributes as well.
- if (!HasUnknownCall) {
- Changed |= addNoAliasAttrs(SCCNodes);
- Changed |= addNonNullAttrs(SCCNodes);
- Changed |= inferAttrsFromFunctionBodies(SCCNodes);
- Changed |= addNoRecurseAttrs(SCCNodes);
- }
+ if (deriveAttrsInPostOrder(SCCNodes, AARGetter, HasUnknownCall))
+ return PreservedAnalyses::none();
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ return PreservedAnalyses::all();
}
namespace {
@@ -1382,7 +1432,6 @@ Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
template <typename AARGetterT>
static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
- bool Changed = false;
// Fill SCCNodes with the elements of the SCC. Used for quickly looking up
// whether a given CallGraphNode is in this SCC. Also track whether there are
@@ -1403,24 +1452,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
SCCNodes.insert(F);
}
- // Skip it if the SCC only contains optnone functions.
- if (SCCNodes.empty())
- return Changed;
-
- Changed |= addArgumentReturnedAttrs(SCCNodes);
- Changed |= addReadAttrs(SCCNodes, AARGetter);
- Changed |= addArgumentAttrs(SCCNodes);
-
- // If we have no external nodes participating in the SCC, we can deduce some
- // more precise attributes as well.
- if (!ExternalNode) {
- Changed |= addNoAliasAttrs(SCCNodes);
- Changed |= addNonNullAttrs(SCCNodes);
- Changed |= inferAttrsFromFunctionBodies(SCCNodes);
- Changed |= addNoRecurseAttrs(SCCNodes);
- }
-
- return Changed;
+ return deriveAttrsInPostOrder(SCCNodes, AARGetter, ExternalNode);
}
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index ed97d342f348..1223a23512ed 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -60,8 +60,17 @@ using namespace llvm;
#define DEBUG_TYPE "function-import"
-STATISTIC(NumImportedFunctions, "Number of functions imported");
-STATISTIC(NumImportedGlobalVars, "Number of global variables imported");
+STATISTIC(NumImportedFunctionsThinLink,
+ "Number of functions thin link decided to import");
+STATISTIC(NumImportedHotFunctionsThinLink,
+ "Number of hot functions thin link decided to import");
+STATISTIC(NumImportedCriticalFunctionsThinLink,
+ "Number of critical functions thin link decided to import");
+STATISTIC(NumImportedGlobalVarsThinLink,
+ "Number of global variables thin link decided to import");
+STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
+STATISTIC(NumImportedGlobalVars,
+ "Number of global variables imported in backend");
STATISTIC(NumImportedModules, "Number of modules imported from");
STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
STATISTIC(NumLiveSymbols, "Number of live symbols in index");
@@ -107,6 +116,10 @@ static cl::opt<float> ImportColdMultiplier(
static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
cl::desc("Print imported functions"));
+static cl::opt<bool> PrintImportFailures(
+ "print-import-failures", cl::init(false), cl::Hidden,
+ cl::desc("Print information for functions rejected for importing"));
+
static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
cl::desc("Compute dead symbols"));
@@ -163,13 +176,18 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex &Index,
ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
- unsigned Threshold, StringRef CallerModulePath) {
+ unsigned Threshold, StringRef CallerModulePath,
+ FunctionImporter::ImportFailureReason &Reason,
+ GlobalValue::GUID GUID) {
+ Reason = FunctionImporter::ImportFailureReason::None;
auto It = llvm::find_if(
CalleeSummaryList,
[&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
auto *GVSummary = SummaryPtr.get();
- if (!Index.isGlobalValueLive(GVSummary))
+ if (!Index.isGlobalValueLive(GVSummary)) {
+ Reason = FunctionImporter::ImportFailureReason::NotLive;
return false;
+ }
// For SamplePGO, in computeImportForFunction the OriginalId
// may have been used to locate the callee summary list (See
@@ -184,11 +202,15 @@ selectCallee(const ModuleSummaryIndex &Index,
// When this happens, the logic for SamplePGO kicks in and
// the static variable in 2) will be found, which needs to be
// filtered out.
- if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind)
+ if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind) {
+ Reason = FunctionImporter::ImportFailureReason::GlobalVar;
return false;
- if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
+ }
+ if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) {
+ Reason = FunctionImporter::ImportFailureReason::InterposableLinkage;
// There is no point in importing these, we can't inline them
return false;
+ }
auto *Summary = cast<FunctionSummary>(GVSummary->getBaseObject());
@@ -204,14 +226,29 @@ selectCallee(const ModuleSummaryIndex &Index,
// a local in another module.
if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
CalleeSummaryList.size() > 1 &&
- Summary->modulePath() != CallerModulePath)
+ Summary->modulePath() != CallerModulePath) {
+ Reason =
+ FunctionImporter::ImportFailureReason::LocalLinkageNotInModule;
return false;
+ }
- if (Summary->instCount() > Threshold)
+ if (Summary->instCount() > Threshold) {
+ Reason = FunctionImporter::ImportFailureReason::TooLarge;
return false;
+ }
- if (Summary->notEligibleToImport())
+ // Skip if it isn't legal to import (e.g. may reference unpromotable
+ // locals).
+ if (Summary->notEligibleToImport()) {
+ Reason = FunctionImporter::ImportFailureReason::NotEligible;
return false;
+ }
+
+ // Don't bother importing if we can't inline it anyway.
+ if (Summary->fflags().NoInline) {
+ Reason = FunctionImporter::ImportFailureReason::NoInline;
+ return false;
+ }
return true;
});
@@ -256,12 +293,25 @@ static void computeImportForReferencedGlobals(
LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
+ // If this is a local variable, make sure we import the copy
+ // in the caller's module. The only time a local variable can
+ // share an entry in the index is if there is a local with the same name
+ // in another module that had the same source file name (in a different
+ // directory), where each was compiled in their own directory so there
+ // was not distinguishing path.
+ auto LocalNotInModule = [&](const GlobalValueSummary *RefSummary) -> bool {
+ return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
+ RefSummary->modulePath() != Summary.modulePath();
+ };
+
for (auto &RefSummary : VI.getSummaryList())
- if (RefSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind &&
- !RefSummary->notEligibleToImport() &&
- !GlobalValue::isInterposableLinkage(RefSummary->linkage()) &&
- RefSummary->refs().empty()) {
- ImportList[RefSummary->modulePath()].insert(VI.getGUID());
+ if (isa<GlobalVarSummary>(RefSummary.get()) &&
+ canImportGlobalVar(RefSummary.get()) &&
+ !LocalNotInModule(RefSummary.get())) {
+ auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID());
+ // Only update stat if we haven't already imported this variable.
+ if (ILI.second)
+ NumImportedGlobalVarsThinLink++;
if (ExportLists)
(*ExportLists)[RefSummary->modulePath()].insert(VI.getGUID());
break;
@@ -269,6 +319,29 @@ static void computeImportForReferencedGlobals(
}
}
+static const char *
+getFailureName(FunctionImporter::ImportFailureReason Reason) {
+ switch (Reason) {
+ case FunctionImporter::ImportFailureReason::None:
+ return "None";
+ case FunctionImporter::ImportFailureReason::GlobalVar:
+ return "GlobalVar";
+ case FunctionImporter::ImportFailureReason::NotLive:
+ return "NotLive";
+ case FunctionImporter::ImportFailureReason::TooLarge:
+ return "TooLarge";
+ case FunctionImporter::ImportFailureReason::InterposableLinkage:
+ return "InterposableLinkage";
+ case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
+ return "LocalLinkageNotInModule";
+ case FunctionImporter::ImportFailureReason::NotEligible:
+ return "NotEligible";
+ case FunctionImporter::ImportFailureReason::NoInline:
+ return "NoInline";
+ }
+ llvm_unreachable("invalid reason");
+}
+
/// Compute the list of functions to import for a given caller. Mark these
/// imported functions and the symbols they reference in their source module as
/// exported from their source module.
@@ -315,11 +388,17 @@ static void computeImportForFunction(
const auto NewThreshold =
Threshold * GetBonusMultiplier(Edge.second.getHotness());
- auto IT = ImportThresholds.insert(
- std::make_pair(VI.getGUID(), std::make_pair(NewThreshold, nullptr)));
+ auto IT = ImportThresholds.insert(std::make_pair(
+ VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
bool PreviouslyVisited = !IT.second;
- auto &ProcessedThreshold = IT.first->second.first;
- auto &CalleeSummary = IT.first->second.second;
+ auto &ProcessedThreshold = std::get<0>(IT.first->second);
+ auto &CalleeSummary = std::get<1>(IT.first->second);
+ auto &FailureInfo = std::get<2>(IT.first->second);
+
+ bool IsHotCallsite =
+ Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
+ bool IsCriticalCallsite =
+ Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
const FunctionSummary *ResolvedCalleeSummary = nullptr;
if (CalleeSummary) {
@@ -344,16 +423,37 @@ static void computeImportForFunction(
LLVM_DEBUG(
dbgs() << "ignored! Target was already rejected with Threshold "
<< ProcessedThreshold << "\n");
+ if (PrintImportFailures) {
+ assert(FailureInfo &&
+ "Expected FailureInfo for previously rejected candidate");
+ FailureInfo->Attempts++;
+ }
continue;
}
+ FunctionImporter::ImportFailureReason Reason;
CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
- Summary.modulePath());
+ Summary.modulePath(), Reason, VI.getGUID());
if (!CalleeSummary) {
// Update with new larger threshold if this was a retry (otherwise
- // we would have already inserted with NewThreshold above).
- if (PreviouslyVisited)
+ // we would have already inserted with NewThreshold above). Also
+ // update failure info if requested.
+ if (PreviouslyVisited) {
ProcessedThreshold = NewThreshold;
+ if (PrintImportFailures) {
+ assert(FailureInfo &&
+ "Expected FailureInfo for previously rejected candidate");
+ FailureInfo->Reason = Reason;
+ FailureInfo->Attempts++;
+ FailureInfo->MaxHotness =
+ std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
+ }
+ } else if (PrintImportFailures) {
+ assert(!FailureInfo &&
+ "Expected no FailureInfo for newly rejected candidate");
+ FailureInfo = llvm::make_unique<FunctionImporter::ImportFailureInfo>(
+ VI, Edge.second.getHotness(), Reason, 1);
+ }
LLVM_DEBUG(
dbgs() << "ignored! No qualifying callee with summary found.\n");
continue;
@@ -371,6 +471,13 @@ static void computeImportForFunction(
// We previously decided to import this GUID definition if it was already
// inserted in the set of imports from the exporting module.
bool PreviouslyImported = !ILI.second;
+ if (!PreviouslyImported) {
+ NumImportedFunctionsThinLink++;
+ if (IsHotCallsite)
+ NumImportedHotFunctionsThinLink++;
+ if (IsCriticalCallsite)
+ NumImportedCriticalFunctionsThinLink++;
+ }
// Make exports in the source module.
if (ExportLists) {
@@ -404,8 +511,6 @@ static void computeImportForFunction(
return Threshold * ImportInstrFactor;
};
- bool IsHotCallsite =
- Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
ImportCount++;
@@ -420,7 +525,7 @@ static void computeImportForFunction(
/// another module (that may require promotion).
static void ComputeImportForModule(
const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
- FunctionImporter::ImportMapTy &ImportList,
+ StringRef ModName, FunctionImporter::ImportMapTy &ImportList,
StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
@@ -460,6 +565,30 @@ static void ComputeImportForModule(
Worklist, ImportList, ExportLists,
ImportThresholds);
}
+
+ // Print stats about functions considered but rejected for importing
+ // when requested.
+ if (PrintImportFailures) {
+ dbgs() << "Missed imports into module " << ModName << "\n";
+ for (auto &I : ImportThresholds) {
+ auto &ProcessedThreshold = std::get<0>(I.second);
+ auto &CalleeSummary = std::get<1>(I.second);
+ auto &FailureInfo = std::get<2>(I.second);
+ if (CalleeSummary)
+ continue; // We are going to import.
+ assert(FailureInfo);
+ FunctionSummary *FS = nullptr;
+ if (!FailureInfo->VI.getSummaryList().empty())
+ FS = dyn_cast<FunctionSummary>(
+ FailureInfo->VI.getSummaryList()[0]->getBaseObject());
+ dbgs() << FailureInfo->VI
+ << ": Reason = " << getFailureName(FailureInfo->Reason)
+ << ", Threshold = " << ProcessedThreshold
+ << ", Size = " << (FS ? (int)FS->instCount() : -1)
+ << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
+ << ", Attempts = " << FailureInfo->Attempts << "\n";
+ }
+ }
}
#ifndef NDEBUG
@@ -497,7 +626,8 @@ void llvm::ComputeCrossModuleImport(
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
LLVM_DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
- ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
+ ComputeImportForModule(DefinedGVSummaries.second, Index,
+ DefinedGVSummaries.first(), ImportList,
&ExportLists);
}
@@ -568,7 +698,7 @@ void llvm::ComputeCrossModuleImportForModule(
// Compute the import list for this module.
LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
- ComputeImportForModule(FunctionSummaryMap, Index, ImportList);
+ ComputeImportForModule(FunctionSummaryMap, Index, ModulePath, ImportList);
#ifndef NDEBUG
dumpImportListForModule(Index, ModulePath, ImportList);
@@ -647,29 +777,38 @@ void llvm::computeDeadSymbols(
VI = updateValueInfoForIndirectCalls(Index, VI);
if (!VI)
return;
- for (auto &S : VI.getSummaryList())
- if (S->isLive())
- return;
+
+ // We need to make sure all variants of the symbol are scanned, alias can
+ // make one (but not all) alive.
+ if (llvm::all_of(VI.getSummaryList(),
+ [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
+ return S->isLive();
+ }))
+ return;
// We only keep live symbols that are known to be non-prevailing if any are
- // available_externally. Those symbols are discarded later in the
- // EliminateAvailableExternally pass and setting them to not-live breaks
- // downstreams users of liveness information (PR36483).
+ // available_externally, linkonceodr, weakodr. Those symbols are discarded
+ // later in the EliminateAvailableExternally pass and setting them to
+ // not-live could break downstreams users of liveness information (PR36483)
+ // or limit optimization opportunities.
if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
- bool AvailableExternally = false;
+ bool KeepAliveLinkage = false;
bool Interposable = false;
for (auto &S : VI.getSummaryList()) {
- if (S->linkage() == GlobalValue::AvailableExternallyLinkage)
- AvailableExternally = true;
+ if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
+ S->linkage() == GlobalValue::WeakODRLinkage ||
+ S->linkage() == GlobalValue::LinkOnceODRLinkage)
+ KeepAliveLinkage = true;
else if (GlobalValue::isInterposableLinkage(S->linkage()))
Interposable = true;
}
- if (!AvailableExternally)
+ if (!KeepAliveLinkage)
return;
if (Interposable)
- report_fatal_error("Interposable and available_externally symbol");
+ report_fatal_error(
+ "Interposable and available_externally/linkonce_odr/weak_odr symbol");
}
for (auto &S : VI.getSummaryList())
@@ -700,6 +839,25 @@ void llvm::computeDeadSymbols(
NumLiveSymbols += LiveSymbols;
}
+// Compute dead symbols and propagate constants in combined index.
+void llvm::computeDeadSymbolsWithConstProp(
+ ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
+ function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
+ bool ImportEnabled) {
+ computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
+ if (ImportEnabled) {
+ Index.propagateConstants(GUIDPreservedSymbols);
+ } else {
+ // If import is disabled we should drop read-only attribute
+ // from all summaries to prevent internalization.
+ for (auto &P : Index)
+ for (auto &S : P.second.SummaryList)
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get()))
+ GVS->setReadOnly(false);
+ }
+}
+
/// Compute the set of summaries needed for a ThinLTO backend compilation of
/// \p ModulePath.
void llvm::gatherImportedSummariesForModule(
@@ -758,7 +916,8 @@ bool llvm::convertToDeclaration(GlobalValue &GV) {
if (GV.getValueType()->isFunctionTy())
NewGV =
Function::Create(cast<FunctionType>(GV.getValueType()),
- GlobalValue::ExternalLinkage, "", GV.getParent());
+ GlobalValue::ExternalLinkage, GV.getAddressSpace(),
+ "", GV.getParent());
else
NewGV =
new GlobalVariable(*GV.getParent(), GV.getValueType(),
@@ -773,8 +932,8 @@ bool llvm::convertToDeclaration(GlobalValue &GV) {
return true;
}
-/// Fixup WeakForLinker linkages in \p TheModule based on summary analysis.
-void llvm::thinLTOResolveWeakForLinkerModule(
+/// Fixup prevailing symbol linkages in \p TheModule based on summary analysis.
+void llvm::thinLTOResolvePrevailingInModule(
Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
auto updateLinkage = [&](GlobalValue &GV) {
// See if the global summary analysis computed a new resolved linkage.
@@ -791,13 +950,15 @@ void llvm::thinLTOResolveWeakForLinkerModule(
// as we need access to the resolution vectors for each input file in
// order to find which symbols have been redefined.
// We may consider reorganizing this code and moving the linkage recording
- // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex.
+ // somewhere else, e.g. in thinLTOResolvePrevailingInIndex.
if (NewLinkage == GlobalValue::WeakAnyLinkage) {
GV.setLinkage(NewLinkage);
return;
}
- if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
+ if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
+ // In case it was dead and already converted to declaration.
+ GV.isDeclaration())
return;
// Check for a non-prevailing def that has interposable linkage
// (e.g. non-odr weak or linkonce). In that case we can't simply
@@ -808,7 +969,7 @@ void llvm::thinLTOResolveWeakForLinkerModule(
GlobalValue::isInterposableLinkage(GV.getLinkage())) {
if (!convertToDeclaration(GV))
// FIXME: Change this to collect replaced GVs and later erase
- // them from the parent module once thinLTOResolveWeakForLinkerGUID is
+ // them from the parent module once thinLTOResolvePrevailingGUID is
// changed to enable this for aliases.
llvm_unreachable("Expected GV to be converted");
} else {
@@ -894,6 +1055,18 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
return NewFn;
}
+// Internalize values that we marked with specific attribute
+// in processGlobalForThinLTO.
+static void internalizeImmutableGVs(Module &M) {
+ for (auto &GV : M.globals())
+ // Skip GVs which have been converted to declarations
+ // by dropDeadSymbols.
+ if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
+ GV.setLinkage(GlobalValue::InternalLinkage);
+ GV.setVisibility(GlobalValue::DefaultVisibility);
+ }
+}
+
// Automatically import functions in Module \p DestModule based on the summaries
// index.
Expected<bool> FunctionImporter::importFunctions(
@@ -1017,6 +1190,8 @@ Expected<bool> FunctionImporter::importFunctions(
NumImportedModules++;
}
+ internalizeImmutableGVs(DestModule);
+
NumImportedFunctions += (ImportedCount - ImportedGVCount);
NumImportedGlobalVars += ImportedGVCount;
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index ada9eb80e680..34de87433367 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
@@ -75,13 +76,17 @@ ModulePass *llvm::createGlobalDCEPass() {
return new GlobalDCELegacyPass();
}
-/// Returns true if F contains only a single "ret" instruction.
+/// Returns true if F is effectively empty.
static bool isEmptyFunction(Function *F) {
BasicBlock &Entry = F->getEntryBlock();
- if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front()))
- return false;
- ReturnInst &RI = cast<ReturnInst>(Entry.front());
- return RI.getReturnValue() == nullptr;
+ for (auto &I : Entry) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (auto *RI = dyn_cast<ReturnInst>(&I))
+ return !RI->getReturnValue();
+ break;
+ }
+ return false;
}
/// Compute the set of GlobalValue that depends from V.
@@ -165,7 +170,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Functions with external linkage are needed if they have a body.
// Externally visible & appending globals are needed, if they have an
// initializer.
- if (!GO.isDeclaration() && !GO.hasAvailableExternallyLinkage())
+ if (!GO.isDeclaration())
if (!GO.isDiscardableIfUnused())
MarkLive(GO);
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 1761d7faff57..3005aafd06b1 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1710,19 +1710,25 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
}
}
- new StoreInst(StoreVal, NewGV, false, 0,
- SI->getOrdering(), SI->getSyncScopeID(), SI);
+ StoreInst *NSI =
+ new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(),
+ SI->getSyncScopeID(), SI);
+ NSI->setDebugLoc(SI->getDebugLoc());
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
LI->getOrdering(), LI->getSyncScopeID(), LI);
- Value *NSI;
+ Instruction *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
else
NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
NSI->takeName(LI);
+ // Since LI is split into two instructions, NLI and NSI both inherit the
+ // same DebugLoc
+ NLI->setDebugLoc(LI->getDebugLoc());
+ NSI->setDebugLoc(LI->getDebugLoc());
LI->replaceAllUsesWith(NSI);
}
UI->eraseFromParent();
@@ -2107,6 +2113,13 @@ static bool hasChangeableCC(Function *F) {
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
return false;
+ // Don't break the invariant that the inalloca parameter is the only parameter
+ // passed in memory.
+ // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
+ // alloca it uses to the entry block if possible.
+ if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ return false;
+
// FIXME: Change CC for the whole chain of musttail calls when possible.
//
// Can't change CC of the function that either has musttail calls, or is a
diff --git a/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
new file mode 100644
index 000000000000..924a7d5fbd9c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -0,0 +1,643 @@
+//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Outline cold regions to a separate function.
+// TODO: Update BFI and BPI
+// TODO: Add all the outlined functions to a separate section.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+
+#define DEBUG_TYPE "hotcoldsplit"
+
+STATISTIC(NumColdRegionsFound, "Number of cold regions found.");
+STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
+ cl::init(true), cl::Hidden);
+
+static cl::opt<int>
+ MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
+ cl::desc("Code size threshold for outlining within a "
+ "single BB (as a multiple of TCC_Basic)"));
+
+namespace {
+
+struct PostDomTree : PostDomTreeBase<BasicBlock> {
+ PostDomTree(Function &F) { recalculate(F); }
+};
+
+/// A sequence of basic blocks.
+///
+/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
+using BlockSequence = SmallVector<BasicBlock *, 0>;
+
+// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
+// this function unless you modify the MBB version as well.
+//
+/// A no successor, non-return block probably ends in unreachable and is cold.
+/// Also consider a block that ends in an indirect branch to be a return block,
+/// since many targets use plain indirect branches to return.
+bool blockEndsInUnreachable(const BasicBlock &BB) {
+ if (!succ_empty(&BB))
+ return false;
+ if (BB.empty())
+ return true;
+ const Instruction *I = BB.getTerminator();
+ return !(isa<ReturnInst>(I) || isa<IndirectBrInst>(I));
+}
+
+bool unlikelyExecuted(BasicBlock &BB) {
+ // Exception handling blocks are unlikely executed.
+ if (BB.isEHPad())
+ return true;
+
+ // The block is cold if it calls/invokes a cold function.
+ for (Instruction &I : BB)
+ if (auto CS = CallSite(&I))
+ if (CS.hasFnAttr(Attribute::Cold))
+ return true;
+
+ // The block is cold if it has an unreachable terminator, unless it's
+ // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp).
+ if (blockEndsInUnreachable(BB)) {
+ if (auto *CI =
+ dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
+ if (CI->hasFnAttr(Attribute::NoReturn))
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+/// Check whether it's safe to outline \p BB.
+static bool mayExtractBlock(const BasicBlock &BB) {
+ return !BB.hasAddressTaken() && !BB.isEHPad();
+}
+
+/// Check whether \p Region is profitable to outline.
+static bool isProfitableToOutline(const BlockSequence &Region,
+ TargetTransformInfo &TTI) {
+ if (Region.size() > 1)
+ return true;
+
+ int Cost = 0;
+ const BasicBlock &BB = *Region[0];
+ for (const Instruction &I : BB) {
+ if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
+ continue;
+
+ Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+ if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
+ return true;
+ }
+ return false;
+}
+
+/// Mark \p F cold. Return true if it's changed.
+static bool markEntireFunctionCold(Function &F) {
+ assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
+ bool Changed = false;
+ if (!F.hasFnAttribute(Attribute::MinSize)) {
+ F.addFnAttr(Attribute::MinSize);
+ Changed = true;
+ }
+ // TODO: Move this function into a cold section.
+ return Changed;
+}
+
+class HotColdSplitting {
+public:
+ HotColdSplitting(ProfileSummaryInfo *ProfSI,
+ function_ref<BlockFrequencyInfo *(Function &)> GBFI,
+ function_ref<TargetTransformInfo &(Function &)> GTTI,
+ std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
+ : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
+ bool run(Module &M);
+
+private:
+ bool shouldOutlineFrom(const Function &F) const;
+ bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
+ BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+ DominatorTree &DT, PostDomTree &PDT,
+ OptimizationRemarkEmitter &ORE);
+ Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
+ BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+ OptimizationRemarkEmitter &ORE, unsigned Count);
+ SmallPtrSet<const Function *, 2> OutlinedFunctions;
+ ProfileSummaryInfo *PSI;
+ function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
+ function_ref<TargetTransformInfo &(Function &)> GetTTI;
+ std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
+};
+
+class HotColdSplittingLegacyPass : public ModulePass {
+public:
+ static char ID;
+ HotColdSplittingLegacyPass() : ModulePass(ID) {
+ initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override;
+};
+
+} // end anonymous namespace
+
+// Returns false if the function should not be considered for hot-cold split
+// optimization.
+bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
+ // Do not try to outline again from an already outlined cold function.
+ if (OutlinedFunctions.count(&F))
+ return false;
+
+ if (F.size() <= 2)
+ return false;
+
+ // TODO: Consider only skipping functions marked `optnone` or `cold`.
+
+ if (F.hasAddressTaken())
+ return false;
+
+ if (F.hasFnAttribute(Attribute::AlwaysInline))
+ return false;
+
+ if (F.hasFnAttribute(Attribute::NoInline))
+ return false;
+
+ if (F.getCallingConv() == CallingConv::Cold)
+ return false;
+
+ if (PSI->isFunctionEntryCold(&F))
+ return false;
+ return true;
+}
+
+Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
+ DominatorTree &DT,
+ BlockFrequencyInfo *BFI,
+ TargetTransformInfo &TTI,
+ OptimizationRemarkEmitter &ORE,
+ unsigned Count) {
+ assert(!Region.empty());
+
+ // TODO: Pass BFI and BPI to update profile information.
+ CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
+ /* BPI */ nullptr, /* AllowVarArgs */ false,
+ /* AllowAlloca */ false,
+ /* Suffix */ "cold." + std::to_string(Count));
+
+ SetVector<Value *> Inputs, Outputs, Sinks;
+ CE.findInputsOutputs(Inputs, Outputs, Sinks);
+
+ // Do not extract regions that have live exit variables.
+ if (Outputs.size() > 0) {
+ LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
+ return nullptr;
+ }
+
+ // TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
+ Function *OrigF = Region[0]->getParent();
+ if (Function *OutF = CE.extractCodeRegion()) {
+ User *U = *OutF->user_begin();
+ CallInst *CI = cast<CallInst>(U);
+ CallSite CS(CI);
+ NumColdRegionsOutlined++;
+ if (TTI.useColdCCForColdCall(*OutF)) {
+ OutF->setCallingConv(CallingConv::Cold);
+ CS.setCallingConv(CallingConv::Cold);
+ }
+ CI->setIsNoInline();
+
+ // Try to make the outlined code as small as possible on the assumption
+ // that it's cold.
+ markEntireFunctionCold(*OutF);
+
+ LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "HotColdSplit",
+ &*Region[0]->begin())
+ << ore::NV("Original", OrigF) << " split cold code into "
+ << ore::NV("Split", OutF);
+ });
+ return OutF;
+ }
+
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
+ &*Region[0]->begin())
+ << "Failed to extract region at block "
+ << ore::NV("Block", Region.front());
+ });
+ return nullptr;
+}
+
+/// A pair of (basic block, score).
+using BlockTy = std::pair<BasicBlock *, unsigned>;
+
+namespace {
+/// A maximal outlining region. This contains all blocks post-dominated by a
+/// sink block, the sink block itself, and all blocks dominated by the sink.
+class OutliningRegion {
+ /// A list of (block, score) pairs. A block's score is non-zero iff it's a
+ /// viable sub-region entry point. Blocks with higher scores are better entry
+ /// points (i.e. they are more distant ancestors of the sink block).
+ SmallVector<BlockTy, 0> Blocks = {};
+
+ /// The suggested entry point into the region. If the region has multiple
+ /// entry points, all blocks within the region may not be reachable from this
+ /// entry point.
+ BasicBlock *SuggestedEntryPoint = nullptr;
+
+ /// Whether the entire function is cold.
+ bool EntireFunctionCold = false;
+
+ /// Whether or not \p BB could be the entry point of an extracted region.
+ static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
+
+ /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
+ static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
+ return isViableEntryPoint(BB) ? Score : 0;
+ }
+
+ /// These scores should be lower than the score for predecessor blocks,
+ /// because regions starting at predecessor blocks are typically larger.
+ static constexpr unsigned ScoreForSuccBlock = 1;
+ static constexpr unsigned ScoreForSinkBlock = 1;
+
+ OutliningRegion(const OutliningRegion &) = delete;
+ OutliningRegion &operator=(const OutliningRegion &) = delete;
+
+public:
+ OutliningRegion() = default;
+ OutliningRegion(OutliningRegion &&) = default;
+ OutliningRegion &operator=(OutliningRegion &&) = default;
+
+ static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
+ const PostDomTree &PDT) {
+ OutliningRegion ColdRegion;
+
+ SmallPtrSet<BasicBlock *, 4> RegionBlocks;
+
+ auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
+ RegionBlocks.insert(BB);
+ ColdRegion.Blocks.emplace_back(BB, Score);
+ assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
+ };
+
+ // The ancestor farthest-away from SinkBB, and also post-dominated by it.
+ unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
+ ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
+ unsigned BestScore = SinkScore;
+
+ // Visit SinkBB's ancestors using inverse DFS.
+ auto PredIt = ++idf_begin(&SinkBB);
+ auto PredEnd = idf_end(&SinkBB);
+ while (PredIt != PredEnd) {
+ BasicBlock &PredBB = **PredIt;
+ bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);
+
+ // If the predecessor is cold and has no predecessors, the entire
+ // function must be cold.
+ if (SinkPostDom && pred_empty(&PredBB)) {
+ ColdRegion.EntireFunctionCold = true;
+ return ColdRegion;
+ }
+
+ // If SinkBB does not post-dominate a predecessor, do not mark the
+ // predecessor (or any of its predecessors) cold.
+ if (!SinkPostDom || !mayExtractBlock(PredBB)) {
+ PredIt.skipChildren();
+ continue;
+ }
+
+ // Keep track of the post-dominated ancestor farthest away from the sink.
+ // The path length is always >= 2, ensuring that predecessor blocks are
+ // considered as entry points before the sink block.
+ unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
+ if (PredScore > BestScore) {
+ ColdRegion.SuggestedEntryPoint = &PredBB;
+ BestScore = PredScore;
+ }
+
+ addBlockToRegion(&PredBB, PredScore);
+ ++PredIt;
+ }
+
+ // Add SinkBB to the cold region. It's considered as an entry point before
+ // any sink-successor blocks.
+ addBlockToRegion(&SinkBB, SinkScore);
+
+ // Find all successors of SinkBB dominated by SinkBB using DFS.
+ auto SuccIt = ++df_begin(&SinkBB);
+ auto SuccEnd = df_end(&SinkBB);
+ while (SuccIt != SuccEnd) {
+ BasicBlock &SuccBB = **SuccIt;
+ bool SinkDom = DT.dominates(&SinkBB, &SuccBB);
+
+ // Don't allow the backwards & forwards DFSes to mark the same block.
+ bool DuplicateBlock = RegionBlocks.count(&SuccBB);
+
+ // If SinkBB does not dominate a successor, do not mark the successor (or
+ // any of its successors) cold.
+ if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
+ SuccIt.skipChildren();
+ continue;
+ }
+
+ unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
+ if (SuccScore > BestScore) {
+ ColdRegion.SuggestedEntryPoint = &SuccBB;
+ BestScore = SuccScore;
+ }
+
+ addBlockToRegion(&SuccBB, SuccScore);
+ ++SuccIt;
+ }
+
+ return ColdRegion;
+ }
+
+ /// Whether this region has nothing to extract.
+ bool empty() const { return !SuggestedEntryPoint; }
+
+ /// The blocks in this region.
+ ArrayRef<std::pair<BasicBlock *, unsigned>> blocks() const { return Blocks; }
+
+ /// Whether the entire function containing this region is cold.
+ bool isEntireFunctionCold() const { return EntireFunctionCold; }
+
+ /// Remove a sub-region from this region and return it as a block sequence.
+ BlockSequence takeSingleEntrySubRegion(DominatorTree &DT) {
+ assert(!empty() && !isEntireFunctionCold() && "Nothing to extract");
+
+ // Remove blocks dominated by the suggested entry point from this region.
+ // During the removal, identify the next best entry point into the region.
+ // Ensure that the first extracted block is the suggested entry point.
+ BlockSequence SubRegion = {SuggestedEntryPoint};
+ BasicBlock *NextEntryPoint = nullptr;
+ unsigned NextScore = 0;
+ auto RegionEndIt = Blocks.end();
+ auto RegionStartIt = remove_if(Blocks, [&](const BlockTy &Block) {
+ BasicBlock *BB = Block.first;
+ unsigned Score = Block.second;
+ bool InSubRegion =
+ BB == SuggestedEntryPoint || DT.dominates(SuggestedEntryPoint, BB);
+ if (!InSubRegion && Score > NextScore) {
+ NextEntryPoint = BB;
+ NextScore = Score;
+ }
+ if (InSubRegion && BB != SuggestedEntryPoint)
+ SubRegion.push_back(BB);
+ return InSubRegion;
+ });
+ Blocks.erase(RegionStartIt, RegionEndIt);
+
+ // Update the suggested entry point.
+ SuggestedEntryPoint = NextEntryPoint;
+
+ return SubRegion;
+ }
+};
+} // namespace
+
+bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
+ BlockFrequencyInfo *BFI,
+ TargetTransformInfo &TTI,
+ DominatorTree &DT, PostDomTree &PDT,
+ OptimizationRemarkEmitter &ORE) {
+ bool Changed = false;
+
+ // The set of cold blocks.
+ SmallPtrSet<BasicBlock *, 4> ColdBlocks;
+
+ // The worklist of non-intersecting regions left to outline.
+ SmallVector<OutliningRegion, 2> OutliningWorklist;
+
+ // Set up an RPO traversal. Experimentally, this performs better (outlines
+ // more) than a PO traversal, because we prevent region overlap by keeping
+ // the first region to contain a block.
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+
+ // Find all cold regions.
+ for (BasicBlock *BB : RPOT) {
+ // Skip blocks which can't be outlined.
+ if (!mayExtractBlock(*BB))
+ continue;
+
+ // This block is already part of some outlining region.
+ if (ColdBlocks.count(BB))
+ continue;
+
+ bool Cold = PSI.isColdBlock(BB, BFI) ||
+ (EnableStaticAnalyis && unlikelyExecuted(*BB));
+ if (!Cold)
+ continue;
+
+ LLVM_DEBUG({
+ dbgs() << "Found a cold block:\n";
+ BB->dump();
+ });
+
+ auto Region = OutliningRegion::create(*BB, DT, PDT);
+ if (Region.empty())
+ continue;
+
+ if (Region.isEntireFunctionCold()) {
+ LLVM_DEBUG(dbgs() << "Entire function is cold\n");
+ return markEntireFunctionCold(F);
+ }
+
+ // If this outlining region intersects with another, drop the new region.
+ //
+ // TODO: It's theoretically possible to outline more by only keeping the
+ // largest region which contains a block, but the extra bookkeeping to do
+ // this is tricky/expensive.
+ bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
+ return !ColdBlocks.insert(Block.first).second;
+ });
+ if (RegionsOverlap)
+ continue;
+
+ OutliningWorklist.emplace_back(std::move(Region));
+ ++NumColdRegionsFound;
+ }
+
+ // Outline single-entry cold regions, splitting up larger regions as needed.
+ unsigned OutlinedFunctionID = 1;
+ while (!OutliningWorklist.empty()) {
+ OutliningRegion Region = OutliningWorklist.pop_back_val();
+ assert(!Region.empty() && "Empty outlining region in worklist");
+ do {
+ BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
+ if (!isProfitableToOutline(SubRegion, TTI)) {
+ LLVM_DEBUG({
+ dbgs() << "Skipping outlining; not profitable to outline\n";
+ SubRegion[0]->dump();
+ });
+ continue;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
+ for (BasicBlock *BB : SubRegion)
+ BB->dump();
+ });
+
+ Function *Outlined =
+ extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
+ if (Outlined) {
+ ++OutlinedFunctionID;
+ OutlinedFunctions.insert(Outlined);
+ Changed = true;
+ }
+ } while (!Region.empty());
+ }
+
+ return Changed;
+}
+
+bool HotColdSplitting::run(Module &M) {
+ bool Changed = false;
+ OutlinedFunctions.clear();
+ for (auto &F : M) {
+ if (!shouldOutlineFrom(F)) {
+ LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
+ continue;
+ }
+ LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
+ DominatorTree DT(F);
+ PostDomTree PDT(F);
+ PDT.recalculate(F);
+ BlockFrequencyInfo *BFI = GetBFI(F);
+ TargetTransformInfo &TTI = GetTTI(F);
+ OptimizationRemarkEmitter &ORE = (*GetORE)(F);
+ Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
+ }
+ return Changed;
+}
+
+bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+ ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto GTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+ auto GBFI = [this](Function &F) {
+ return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+ };
+ std::unique_ptr<OptimizationRemarkEmitter> ORE;
+ std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
+ [&ORE](Function &F) -> OptimizationRemarkEmitter & {
+ ORE.reset(new OptimizationRemarkEmitter(&F));
+ return *ORE.get();
+ };
+
+ return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
+}
+
+PreservedAnalyses
+HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+ [&FAM](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+
+ auto GBFI = [&FAM](Function &F) {
+ return &FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ std::function<TargetTransformInfo &(Function &)> GTTI =
+ [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ std::unique_ptr<OptimizationRemarkEmitter> ORE;
+ std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
+ [&ORE](Function &F) -> OptimizationRemarkEmitter & {
+ ORE.reset(new OptimizationRemarkEmitter(&F));
+ return *ORE.get();
+ };
+
+ ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+ if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+char HotColdSplittingLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
+ "Hot Cold Splitting", false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
+ "Hot Cold Splitting", false, false)
+
+ModulePass *llvm::createHotColdSplittingPass() {
+ return new HotColdSplittingLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index dce9ee076bc5..973382e2b097 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -34,6 +34,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeGlobalDCELegacyPassPass(Registry);
initializeGlobalOptLegacyPassPass(Registry);
initializeGlobalSplitPass(Registry);
+ initializeHotColdSplittingLegacyPassPass(Registry);
initializeIPCPPass(Registry);
initializeAlwaysInlinerLegacyPassPass(Registry);
initializeSimpleInlinerPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 3da0c2e83eb8..66a6f80f31e4 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -64,6 +64,7 @@
#include <algorithm>
#include <cassert>
#include <functional>
+#include <sstream>
#include <tuple>
#include <utility>
#include <vector>
@@ -112,6 +113,14 @@ static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
"printing of statistics for each inlined function")),
cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
+/// Flag to add inline messages as callsite attributes 'inline-remark'.
+static cl::opt<bool>
+ InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable adding inline-remark attribute to"
+ " callsites processed by inliner but decided"
+ " to be not inlined"));
+
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -263,7 +272,7 @@ static void mergeInlinedArrayAllocas(
/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(
+static InlineResult InlineCallIfPossible(
CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
@@ -275,8 +284,9 @@ static bool InlineCallIfPossible(
// Try to inline the function. Get the list of static allocas that were
// inlined.
- if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
- return false;
+ InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
+ if (!IR)
+ return IR;
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
ImportedFunctionsStats.recordInline(*Caller, *Callee);
@@ -286,7 +296,7 @@ static bool InlineCallIfPossible(
if (!DisableInlinedAllocaMerging)
mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
- return true;
+ return IR; // success
}
/// Return true if inlining of CS can block the caller from being
@@ -301,6 +311,11 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// For now we only handle local or inline functions.
if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
return false;
+ // If the cost of inlining CS is non-positive, it is not going to prevent the
+ // caller from being inlined into its callers and hence we don't need to
+ // defer.
+ if (IC.getCost() <= 0)
+ return false;
// Try to detect the case where the current inlining candidate caller (call
// it B) is a static or linkonce-ODR function and is an inlining candidate
// elsewhere, and the current candidate callee (call it C) is large enough
@@ -320,25 +335,31 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
TotalSecondaryCost = 0;
// The candidate cost to be imposed upon the current function.
int CandidateCost = IC.getCost() - 1;
- // This bool tracks what happens if we do NOT inline C into B.
- bool callerWillBeRemoved = Caller->hasLocalLinkage();
+ // If the caller has local linkage and can be inlined to all its callers, we
+ // can apply a huge negative bonus to TotalSecondaryCost.
+ bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
// This bool tracks what happens if we DO inline C into B.
bool inliningPreventsSomeOuterInline = false;
for (User *U : Caller->users()) {
+ // If the caller will not be removed (either because it does not have a
+ // local linkage or because the LastCallToStaticBonus has been already
+ // applied), then we can exit the loop early.
+ if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost())
+ return false;
CallSite CS2(U);
// If this isn't a call to Caller (it could be some other sort
// of reference) skip it. Such references will prevent the caller
// from being removed.
if (!CS2 || CS2.getCalledFunction() != Caller) {
- callerWillBeRemoved = false;
+ ApplyLastCallBonus = false;
continue;
}
InlineCost IC2 = GetInlineCost(CS2);
++NumCallerCallersAnalyzed;
if (!IC2) {
- callerWillBeRemoved = false;
+ ApplyLastCallBonus = false;
continue;
}
if (IC2.isAlways())
@@ -356,7 +377,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (callerWillBeRemoved && !Caller->hasOneUse())
+ if (ApplyLastCallBonus)
TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
@@ -365,6 +386,33 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
return false;
}
+static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
+ const ore::NV &Arg) {
+ return R << Arg.Val;
+}
+
+template <class RemarkT>
+RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
+ using namespace ore;
+ if (IC.isAlways()) {
+ R << "(cost=always)";
+ } else if (IC.isNever()) {
+ R << "(cost=never)";
+ } else {
+ R << "(cost=" << ore::NV("Cost", IC.getCost())
+ << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
+ }
+ if (const char *Reason = IC.getReason())
+ R << ": " << ore::NV("Reason", Reason);
+ return R;
+}
+
+static std::string inlineCostStr(const InlineCost &IC) {
+ std::stringstream Remark;
+ Remark << IC;
+ return Remark.str();
+}
+
/// Return the cost only if the inliner should attempt to inline at the given
/// CallSite. If we return the cost, we will emit an optimisation remark later
/// using that cost, so we won't do so from this function.
@@ -379,35 +427,32 @@ shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
Function *Caller = CS.getCaller();
if (IC.isAlways()) {
- LLVM_DEBUG(dbgs() << " Inlining: cost=always"
+ LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
<< ", Call: " << *CS.getInstruction() << "\n");
return IC;
}
if (IC.isNever()) {
- LLVM_DEBUG(dbgs() << " NOT Inlining: cost=never"
+ LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
<< ", Call: " << *CS.getInstruction() << "\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
<< NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller)
- << " because it should never be inlined (cost=never)";
+ << NV("Caller", Caller) << " because it should never be inlined "
+ << IC;
});
- return None;
+ return IC;
}
if (!IC) {
- LLVM_DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
- << ", thres=" << IC.getThreshold()
+ LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
<< ", Call: " << *CS.getInstruction() << "\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
<< NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because too costly to inline (cost="
- << NV("Cost", IC.getCost())
- << ", threshold=" << NV("Threshold", IC.getThreshold()) << ")";
+ << NV("Caller", Caller) << " because too costly to inline " << IC;
});
- return None;
+ return IC;
}
int TotalSecondaryCost = 0;
@@ -428,8 +473,7 @@ shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
return None;
}
- LLVM_DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
- << ", thres=" << IC.getThreshold()
+ LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
<< ", Call: " << *CS.getInstruction() << '\n');
return IC;
}
@@ -461,6 +505,26 @@ bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
return inlineCalls(SCC);
}
+static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc,
+ const BasicBlock *Block, const Function &Callee,
+ const Function &Caller, const InlineCost &IC) {
+ ORE.emit([&]() {
+ bool AlwaysInline = IC.isAlways();
+ StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
+ return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
+ << ore::NV("Callee", &Callee) << " inlined into "
+ << ore::NV("Caller", &Caller) << " with " << IC;
+ });
+}
+
+static void setInlineRemark(CallSite &CS, StringRef message) {
+ if (!InlineRemarkAttribute)
+ return;
+
+ Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message);
+ CS.addAttribute(AttributeList::FunctionIndex, attr);
+}
+
static bool
inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
@@ -510,6 +574,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (Callee->isDeclaration()) {
using namespace ore;
+ setInlineRemark(CS, "unavailable definition");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
<< NV("Callee", Callee) << " will not be inlined into "
@@ -573,8 +638,10 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// infinitely inline.
InlineHistoryID = CallSites[CSi].second;
if (InlineHistoryID != -1 &&
- InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+ InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(CS, "recursive");
continue;
+ }
}
// FIXME for new PM: because of the old PM we currently generate ORE and
@@ -585,8 +652,17 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
// If the policy determines that we should inline this function,
// delete the call instead.
- if (!OIC)
+ if (!OIC.hasValue()) {
+ setInlineRemark(CS, "deferred");
+ continue;
+ }
+
+ if (!OIC.getValue()) {
+ // shouldInline() call returned a negative inline cost that explains
+ // why this callsite should not be inlined.
+ setInlineRemark(CS, inlineCostStr(*OIC));
continue;
+ }
// If this call site is dead and it is to a readonly function, we should
// just delete the call instead of trying to inline it, regardless of
@@ -595,6 +671,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (IsTriviallyDead) {
LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
+ setInlineRemark(CS, "trivially dead");
CG[Caller]->removeCallEdgeFor(CS);
Instr->eraseFromParent();
++NumCallsDeleted;
@@ -606,34 +683,22 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// Attempt to inline the function.
using namespace ore;
- if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID, InsertLifetime, AARGetter,
- ImportedFunctionsStats)) {
+ InlineResult IR = InlineCallIfPossible(
+ CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
+ InsertLifetime, AARGetter, ImportedFunctionsStats);
+ if (!IR) {
+ setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
Block)
<< NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller);
+ << NV("Caller", Caller) << ": " << NV("Reason", IR.message);
});
continue;
}
++NumInlined;
- ORE.emit([&]() {
- bool AlwaysInline = OIC->isAlways();
- StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
- OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
- R << NV("Callee", Callee) << " inlined into ";
- R << NV("Caller", Caller);
- if (AlwaysInline)
- R << " with cost=always";
- else {
- R << " with cost=" << NV("Cost", OIC->getCost());
- R << " (threshold=" << NV("Threshold", OIC->getThreshold());
- R << ")";
- }
- return R;
- });
+ emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC);
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
@@ -692,7 +757,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
ACT = &getAnalysis<AssumptionCacheTracker>();
- PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
@@ -865,6 +930,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Calls.push_back({CS, -1});
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
+ setInlineRemark(CS, "unavailable definition");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
<< NV("Callee", Callee) << " will not be inlined into "
@@ -908,8 +974,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- if (F.hasFnAttribute(Attribute::OptimizeNone))
+ if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ setInlineRemark(Calls[i].first, "optnone attribute");
continue;
+ }
LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
@@ -953,8 +1021,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Function &Callee = *CS.getCalledFunction();
if (InlineHistoryID != -1 &&
- InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
+ InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(CS, "recursive");
continue;
+ }
// Check if this inlining may repeat breaking an SCC apart that has
// already been split once before. In that case, inlining here may
@@ -966,13 +1036,23 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
+ setInlineRemark(CS, "recursive SCC split");
continue;
}
Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
// Check whether we want to inline this callsite.
- if (!OIC)
+ if (!OIC.hasValue()) {
+ setInlineRemark(CS, "deferred");
+ continue;
+ }
+
+ if (!OIC.getValue()) {
+ // shouldInline() call returned a negative inline cost that explains
+ // why this callsite should not be inlined.
+ setInlineRemark(CS, inlineCostStr(*OIC));
continue;
+ }
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
@@ -987,32 +1067,22 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
using namespace ore;
- if (!InlineFunction(CS, IFI)) {
+ InlineResult IR = InlineFunction(CS, IFI);
+ if (!IR) {
+ setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
<< NV("Callee", &Callee) << " will not be inlined into "
- << NV("Caller", &F);
+ << NV("Caller", &F) << ": " << NV("Reason", IR.message);
});
continue;
}
DidInline = true;
InlinedCallees.insert(&Callee);
- ORE.emit([&]() {
- bool AlwaysInline = OIC->isAlways();
- StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
- OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
- R << NV("Callee", &Callee) << " inlined into ";
- R << NV("Caller", &F);
- if (AlwaysInline)
- R << " with cost=always";
- else {
- R << " with cost=" << NV("Cost", OIC->getCost());
- R << " (threshold=" << NV("Threshold", OIC->getThreshold());
- R << ")";
- }
- return R;
- });
+ ++NumInlined;
+
+ emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
// Add any new callsites to defined functions to the worklist.
if (!IFI.InlinedCallSites.empty()) {
@@ -1099,10 +1169,19 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// SCC splits and merges. To avoid this, we capture the originating caller
// node and the SCC containing the call edge. This is a slight over
// approximation of the possible inlining decisions that must be avoided,
- // but is relatively efficient to store.
+ // but is relatively efficient to store. We use C != OldC to know when
+ // a new SCC is generated and the original SCC may be generated via merge
+ // in later iterations.
+ //
+ // It is also possible that even if no new SCC is generated
+ // (i.e., C == OldC), the original SCC could be split and then merged
+ // into the same one as itself. and the original SCC will be added into
+ // UR.CWorklist again, we want to catch such cases too.
+ //
// FIXME: This seems like a very heavyweight way of retaining the inline
// history, we should look for a more efficient way of tracking it.
- if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
+ if ((C != OldC || UR.CWorklist.count(OldC)) &&
+ llvm::any_of(InlinedCallees, [&](Function *Callee) {
return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
})) {
LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
@@ -1138,6 +1217,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// And delete the actual function from the module.
M.getFunctionList().erase(DeadF);
+ ++NumDeleted;
}
if (!Changed)
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 8c86f7cb806a..733235d45a09 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -104,8 +104,8 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
bool ShouldExtractLoop = false;
// Extract the loop if the entry block doesn't branch to the loop header.
- TerminatorInst *EntryTI =
- L->getHeader()->getParent()->getEntryBlock().getTerminator();
+ Instruction *EntryTI =
+ L->getHeader()->getParent()->getEntryBlock().getTerminator();
if (!isa<BranchInst>(EntryTI) ||
!cast<BranchInst>(EntryTI)->isUnconditional() ||
EntryTI->getSuccessor(0) != L->getHeader()) {
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 4f7571884707..87c65db09517 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -989,6 +989,7 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
if (F->isDSOLocal()) {
Function *RealF = Function::Create(F->getFunctionType(),
GlobalValue::ExternalLinkage,
+ F->getAddressSpace(),
Name + ".cfi", &M);
RealF->setVisibility(GlobalVariable::HiddenVisibility);
replaceDirectCalls(F, RealF);
@@ -1000,13 +1001,13 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
if (F->isDeclarationForLinker() && !isDefinition) {
// Declaration of an external function.
FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
- Name + ".cfi_jt", &M);
+ F->getAddressSpace(), Name + ".cfi_jt", &M);
FDecl->setVisibility(GlobalValue::HiddenVisibility);
} else if (isDefinition) {
F->setName(Name + ".cfi");
F->setLinkage(GlobalValue::ExternalLinkage);
FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
- Name, &M);
+ F->getAddressSpace(), Name, &M);
FDecl->setVisibility(Visibility);
Visibility = GlobalValue::HiddenVisibility;
@@ -1016,7 +1017,8 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
for (auto &U : F->uses()) {
if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) {
Function *AliasDecl = Function::Create(
- F->getFunctionType(), GlobalValue::ExternalLinkage, "", &M);
+ F->getFunctionType(), GlobalValue::ExternalLinkage,
+ F->getAddressSpace(), "", &M);
AliasDecl->takeName(A);
A->replaceAllUsesWith(AliasDecl);
ToErase.push_back(A);
@@ -1191,7 +1193,9 @@ void LowerTypeTestsModule::moveInitializerToModuleConstructor(
WeakInitializerFn = Function::Create(
FunctionType::get(Type::getVoidTy(M.getContext()),
/* IsVarArg */ false),
- GlobalValue::InternalLinkage, "__cfi_global_var_init", &M);
+ GlobalValue::InternalLinkage,
+ M.getDataLayout().getProgramAddressSpace(),
+ "__cfi_global_var_init", &M);
BasicBlock *BB =
BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn);
ReturnInst::Create(M.getContext(), BB);
@@ -1234,7 +1238,8 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
// placeholder first.
Function *PlaceholderFn =
Function::Create(cast<FunctionType>(F->getValueType()),
- GlobalValue::ExternalWeakLinkage, "", &M);
+ GlobalValue::ExternalWeakLinkage,
+ F->getAddressSpace(), "", &M);
replaceCfiUses(F, PlaceholderFn, IsDefinition);
Constant *Target = ConstantExpr::getSelect(
@@ -1424,7 +1429,9 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
Function *JumpTableFn =
Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()),
/* IsVarArg */ false),
- GlobalValue::PrivateLinkage, ".cfi.jumptable", &M);
+ GlobalValue::PrivateLinkage,
+ M.getDataLayout().getProgramAddressSpace(),
+ ".cfi.jumptable", &M);
ArrayType *JumpTableType =
ArrayType::get(getJumpTableEntryType(), Functions.size());
auto JumpTable =
@@ -1695,6 +1702,13 @@ bool LowerTypeTestsModule::lower() {
!ExportSummary && !ImportSummary)
return false;
+ // If only some of the modules were split, we cannot correctly handle
+ // code that contains type tests.
+ if (TypeTestFunc && !TypeTestFunc->use_empty() &&
+ ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+ (ImportSummary && ImportSummary->partiallySplitLTOUnits())))
+ report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test");
+
if (ImportSummary) {
if (TypeTestFunc) {
for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
@@ -1813,7 +1827,8 @@ bool LowerTypeTestsModule::lower() {
if (!F)
F = Function::Create(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
- GlobalVariable::ExternalLinkage, FunctionName, &M);
+ GlobalVariable::ExternalLinkage,
+ M.getDataLayout().getProgramAddressSpace(), FunctionName, &M);
// If the function is available_externally, remove its definition so
// that it is handled the same way as a declaration. Later we will try
@@ -1997,7 +2012,7 @@ bool LowerTypeTestsModule::lower() {
}
Sets.emplace_back(I, MaxUniqueId);
}
- llvm::sort(Sets.begin(), Sets.end(),
+ llvm::sort(Sets,
[](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
return S1.second < S2.second;
@@ -2022,12 +2037,12 @@ bool LowerTypeTestsModule::lower() {
// Order type identifiers by unique ID for determinism. This ordering is
// stable as there is a one-to-one mapping between metadata and unique IDs.
- llvm::sort(TypeIds.begin(), TypeIds.end(), [&](Metadata *M1, Metadata *M2) {
+ llvm::sort(TypeIds, [&](Metadata *M1, Metadata *M2) {
return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId;
});
// Same for the branch funnels.
- llvm::sort(ICallBranchFunnels.begin(), ICallBranchFunnels.end(),
+ llvm::sort(ICallBranchFunnels,
[&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) {
return F1->UniqueId < F2->UniqueId;
});
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 3bebb96c6d35..11efe95b10d4 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -136,6 +136,7 @@ using namespace llvm;
STATISTIC(NumFunctionsMerged, "Number of functions merged");
STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
STATISTIC(NumDoubleWeak, "Number of new functions created");
static cl::opt<unsigned> NumFunctionsForSanityCheck(
@@ -165,6 +166,11 @@ static cl::opt<bool>
cl::desc("Preserve debug info in thunk when mergefunc "
"transformations are made."));
+static cl::opt<bool>
+ MergeFunctionsAliases("mergefunc-use-aliases", cl::Hidden,
+ cl::init(false),
+ cl::desc("Allow mergefunc to create aliases"));
+
namespace {
class FunctionNode {
@@ -272,6 +278,13 @@ private:
/// delete G.
void writeThunk(Function *F, Function *G);
+ // Replace G with an alias to F (deleting function G)
+ void writeAlias(Function *F, Function *G);
+
+ // Replace G with an alias to F if possible, or a thunk to F if
+ // profitable. Returns false if neither is the case.
+ bool writeThunkOrAlias(Function *F, Function *G);
+
/// Replace function F with function G in the function tree.
void replaceFunctionInTree(const FunctionNode &FN, Function *G);
@@ -284,7 +297,7 @@ private:
// modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
// dangling iterators into FnTree. The invariant that preserves this is that
// there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
- ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
+ DenseMap<AssertingVH<Function>, FnTreeType::iterator> FNodesInTree;
};
} // end anonymous namespace
@@ -425,6 +438,7 @@ bool MergeFunctions::runOnModule(Module &M) {
} while (!Deferred.empty());
FnTree.clear();
+ FNodesInTree.clear();
GlobalNumbers.clear();
return Changed;
@@ -460,7 +474,7 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
NewPAL.getRetAttributes(),
NewArgAttrs));
- remove(CS.getInstruction()->getParent()->getParent());
+ remove(CS.getInstruction()->getFunction());
U->set(BitcastNew);
}
}
@@ -608,7 +622,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
LLVM_DEBUG(BI->print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");
}
- } else if (dyn_cast<TerminatorInst>(BI) == GEntryBlock->getTerminator()) {
+ } else if (BI->isTerminator() && &*BI == GEntryBlock->getTerminator()) {
LLVM_DEBUG(dbgs() << " Will Include Terminator: ");
LLVM_DEBUG(BI->print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");
@@ -679,8 +693,8 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
GEntryBlock->getTerminator()->eraseFromParent();
BB = GEntryBlock;
} else {
- NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
- G->getParent());
+ NewG = Function::Create(G->getFunctionType(), G->getLinkage(),
+ G->getAddressSpace(), "", G->getParent());
BB = BasicBlock::Create(F->getContext(), "", NewG);
}
@@ -734,27 +748,76 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
++NumThunksWritten;
}
+// Whether this function may be replaced by an alias
+static bool canCreateAliasFor(Function *F) {
+ if (!MergeFunctionsAliases || !F->hasGlobalUnnamedAddr())
+ return false;
+
+ // We should only see linkages supported by aliases here
+ assert(F->hasLocalLinkage() || F->hasExternalLinkage()
+ || F->hasWeakLinkage() || F->hasLinkOnceLinkage());
+ return true;
+}
+
+// Replace G with an alias to F (deleting function G)
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ PointerType *PtrType = G->getType();
+ auto *GA = GlobalAlias::create(
+ PtrType->getElementType(), PtrType->getAddressSpace(),
+ G->getLinkage(), "", BitcastF, G->getParent());
+
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ GA->takeName(G);
+ GA->setVisibility(G->getVisibility());
+ GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ removeUsers(G);
+ G->replaceAllUsesWith(GA);
+ G->eraseFromParent();
+
+ LLVM_DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+ ++NumAliasesWritten;
+}
+
+// Replace G with an alias to F if possible, or a thunk to F if
+// profitable. Returns false if neither is the case.
+bool MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+ if (canCreateAliasFor(G)) {
+ writeAlias(F, G);
+ return true;
+ }
+ if (isThunkProfitable(F)) {
+ writeThunk(F, G);
+ return true;
+ }
+ return false;
+}
+
// Merge two equivalent functions. Upon completion, Function G is deleted.
void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
if (F->isInterposable()) {
assert(G->isInterposable());
- if (!isThunkProfitable(F)) {
+ // Both writeThunkOrAlias() calls below must succeed, either because we can
+ // create aliases for G and NewF, or because a thunk for F is profitable.
+ // F here has the same signature as NewF below, so that's what we check.
+ if (!isThunkProfitable(F) && (!canCreateAliasFor(F) || !canCreateAliasFor(G))) {
return;
}
// Make them both thunks to the same internal function.
- Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
- F->getParent());
- H->copyAttributesFrom(F);
- H->takeName(F);
+ Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
+ F->getAddressSpace(), "", F->getParent());
+ NewF->copyAttributesFrom(F);
+ NewF->takeName(F);
removeUsers(F);
- F->replaceAllUsesWith(H);
+ F->replaceAllUsesWith(NewF);
- unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+ unsigned MaxAlignment = std::max(G->getAlignment(), NewF->getAlignment());
- writeThunk(F, G);
- writeThunk(F, H);
+ writeThunkOrAlias(F, G);
+ writeThunkOrAlias(F, NewF);
F->setAlignment(MaxAlignment);
F->setLinkage(GlobalValue::PrivateLinkage);
@@ -770,6 +833,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
GlobalNumbers.erase(G);
// If G's address is not significant, replace it entirely.
Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ removeUsers(G);
G->replaceAllUsesWith(BitcastF);
} else {
// Redirect direct callers of G to F. (See note on MergeFunctionsPDI
@@ -781,18 +845,15 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// If G was internal then we may have replaced all uses of G with F. If so,
// stop here and delete G. There's no need for a thunk. (See note on
// MergeFunctionsPDI above).
- if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
+ if (G->isDiscardableIfUnused() && G->use_empty() && !MergeFunctionsPDI) {
G->eraseFromParent();
++NumFunctionsMerged;
return;
}
- if (!isThunkProfitable(F)) {
- return;
+ if (writeThunkOrAlias(F, G)) {
+ ++NumFunctionsMerged;
}
-
- writeThunk(F, G);
- ++NumFunctionsMerged;
}
}
@@ -816,6 +877,24 @@ void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
FN.replaceBy(G);
}
+// Ordering for functions that are equal under FunctionComparator
+static bool isFuncOrderCorrect(const Function *F, const Function *G) {
+ if (F->isInterposable() != G->isInterposable()) {
+ // Strong before weak, because the weak function may call the strong
+ // one, but not the other way around.
+ return !F->isInterposable();
+ }
+ if (F->hasLocalLinkage() != G->hasLocalLinkage()) {
+ // External before local, because we definitely have to keep the external
+ // function, but may be able to drop the local one.
+ return !F->hasLocalLinkage();
+ }
+ // Impose a total order (by name) on the replacement of functions. This is
+ // important when operating on more than one module independently to prevent
+ // cycles of thunks calling each other when the modules are linked together.
+ return F->getName() <= G->getName();
+}
+
// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
// that was already inserted.
bool MergeFunctions::insert(Function *NewFunction) {
@@ -832,14 +911,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
const FunctionNode &OldF = *Result.first;
- // Impose a total order (by name) on the replacement of functions. This is
- // important when operating on more than one module independently to prevent
- // cycles of thunks calling each other when the modules are linked together.
- //
- // First of all, we process strong functions before weak functions.
- if ((OldF.getFunc()->isInterposable() && !NewFunction->isInterposable()) ||
- (OldF.getFunc()->isInterposable() == NewFunction->isInterposable() &&
- OldF.getFunc()->getName() > NewFunction->getName())) {
+ if (!isFuncOrderCorrect(OldF.getFunc(), NewFunction)) {
// Swap the two functions.
Function *F = OldF.getFunc();
replaceFunctionInTree(*Result.first, NewFunction);
@@ -882,7 +954,7 @@ void MergeFunctions::removeUsers(Value *V) {
for (User *U : V->users()) {
if (Instruction *I = dyn_cast<Instruction>(U)) {
- remove(I->getParent()->getParent());
+ remove(I->getFunction());
} else if (isa<GlobalValue>(U)) {
// do nothing
} else if (Constant *C = dyn_cast<Constant>(U)) {
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 4907e4b30519..da214a1d3b44 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -359,7 +359,7 @@ struct PartialInlinerLegacyPass : public ModulePass {
TargetTransformInfoWrapperPass *TTIWP =
&getAnalysis<TargetTransformInfoWrapperPass>();
ProfileSummaryInfo *PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&ACT](Function &F) -> AssumptionCache & {
@@ -403,7 +403,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
BasicBlock *Dom = BlockList.front();
- return BlockList.size() > 1 && pred_size(Dom) == 1;
+ return BlockList.size() > 1 && Dom->hasNPredecessors(1);
};
auto IsSingleExit =
@@ -468,7 +468,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
// Only consider regions with predecessor blocks that are considered
// not-cold (default: part of the top 99.99% of all block counters)
// AND greater than our minimum block execution count (default: 100).
- if (PSI->isColdBB(thisBB, BFI) ||
+ if (PSI->isColdBlock(thisBB, BFI) ||
BBProfileCount(thisBB) < MinBlockCounterExecution)
continue;
for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
@@ -556,7 +556,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
};
auto IsReturnBlock = [](BasicBlock *BB) {
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
return isa<ReturnInst>(TI);
};
@@ -834,42 +834,37 @@ bool PartialInlinerImpl::shouldPartialInline(
int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
int InlineCost = 0;
const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- switch (I->getOpcode()) {
+ for (Instruction &I : BB->instructionsWithoutDebug()) {
+ // Skip free instructions.
+ switch (I.getOpcode()) {
case Instruction::BitCast:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::Alloca:
+ case Instruction::PHI:
continue;
case Instruction::GetElementPtr:
- if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
continue;
break;
default:
break;
}
- IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
- if (IntrInst) {
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
- IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
- continue;
- }
+ if (I.isLifetimeStartOrEnd())
+ continue;
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
InlineCost += getCallsiteCost(CallSite(CI), DL);
continue;
}
- if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
InlineCost += getCallsiteCost(CallSite(II), DL);
continue;
}
- if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
continue;
}
@@ -1251,7 +1246,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (PSI->isFunctionEntryCold(F))
return {false, nullptr};
- if (F->user_begin() == F->user_end())
+ if (empty(F->users()))
return {false, nullptr};
OptimizationRemarkEmitter ORE(F);
@@ -1357,7 +1352,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
return false;
}
- assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
+ assert(empty(Cloner.OrigFunc->users()) &&
"F's users should all be replaced!");
std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
@@ -1461,9 +1456,7 @@ bool PartialInlinerImpl::run(Module &M) {
std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
if (Result.second)
Worklist.push_back(Result.second);
- if (Result.first) {
- Changed = true;
- }
+ Changed |= Result.first;
}
return Changed;
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 5ced6481996a..9764944dc332 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -104,6 +104,10 @@ static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable preparation for ThinLTO."));
+cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden,
+ cl::desc("Enable hot-cold splitting pass"));
+
+
static cl::opt<bool> RunPGOInstrGen(
"profile-generate", cl::init(false), cl::Hidden,
cl::desc("Enable PGO instrumentation."));
@@ -152,6 +156,10 @@ static cl::opt<bool> EnableGVNSink(
"enable-gvn-sink", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN sinking pass (default = off)"));
+static cl::opt<bool>
+ EnableCHR("enable-chr", cl::init(true), cl::Hidden,
+ cl::desc("Enable control height reduction optimization (CHR)"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -367,13 +375,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops
- if (EnableLoopInterchange) {
- // FIXME: These are function passes and break the loop pass pipeline.
+ if (EnableLoopInterchange)
MPM.add(createLoopInterchangePass()); // Interchange loops
- MPM.add(createCFGSimplificationPass());
- }
- if (!DisableUnrollLoops)
- MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
+
+ MPM.add(createSimpleLoopUnrollPass(OptLevel,
+ DisableUnrollLoops)); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.
@@ -411,6 +417,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Clean up after everything.
addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
+
+ if (EnableCHR && OptLevel >= 3 &&
+ (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
+ MPM.add(createControlHeightReductionLegacyPass());
}
void PassManagerBuilder::populateModulePassManager(
@@ -452,12 +462,14 @@ void PassManagerBuilder::populateModulePassManager(
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
- // Rename anon globals to be able to export them in the summary.
- // This has to be done after we add the extensions to the pass manager
- // as there could be passes (e.g. Adddress sanitizer) which introduce
- // new unnamed globals.
- if (PrepareForLTO || PrepareForThinLTO)
+ if (PrepareForLTO || PrepareForThinLTO) {
+ MPM.add(createCanonicalizeAliasesPass());
+ // Rename anon globals to be able to export them in the summary.
+ // This has to be done after we add the extensions to the pass manager
+ // as there could be passes (e.g. Adddress sanitizer) which introduce
+ // new unnamed globals.
MPM.add(createNameAnonGlobalPass());
+ }
return;
}
@@ -575,6 +587,7 @@ void PassManagerBuilder::populateModulePassManager(
// Ensure we perform any last passes, but do so before renaming anonymous
// globals in case the passes add any.
addExtensionsToPM(EP_OptimizerLast, MPM);
+ MPM.add(createCanonicalizeAliasesPass());
// Rename anon globals to be able to export them in the summary.
MPM.add(createNameAnonGlobalPass());
return;
@@ -627,7 +640,7 @@ void PassManagerBuilder::populateModulePassManager(
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
MPM.add(createLoopDistributePass());
- MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops, !LoopVectorize));
// Eliminate loads by forwarding stores from the previous iteration to loads
// of the current iteration.
@@ -672,16 +685,17 @@ void PassManagerBuilder::populateModulePassManager(
addExtensionsToPM(EP_Peephole, MPM);
addInstructionCombiningPass(MPM);
- if (!DisableUnrollLoops) {
- if (EnableUnrollAndJam) {
- // Unroll and Jam. We do this before unroll but need to be in a separate
- // loop pass manager in order for the outer loop to be processed by
- // unroll and jam before the inner loop is unrolled.
- MPM.add(createLoopUnrollAndJamPass(OptLevel));
- }
+ if (EnableUnrollAndJam && !DisableUnrollLoops) {
+ // Unroll and Jam. We do this before unroll but need to be in a separate
+ // loop pass manager in order for the outer loop to be processed by
+ // unroll and jam before the inner loop is unrolled.
+ MPM.add(createLoopUnrollAndJamPass(OptLevel));
+ }
- MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops
+ MPM.add(createLoopUnrollPass(OptLevel,
+ DisableUnrollLoops)); // Unroll small loops
+ if (!DisableUnrollLoops) {
// LoopUnroll may generate some redundency to cleanup.
addInstructionCombiningPass(MPM);
@@ -690,7 +704,9 @@ void PassManagerBuilder::populateModulePassManager(
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
MPM.add(createLICMPass());
- }
+ }
+
+ MPM.add(createWarnMissedTransformationsPass());
// After vectorization and unrolling, assume intrinsics may tell us more
// about pointer alignments.
@@ -722,18 +738,29 @@ void PassManagerBuilder::populateModulePassManager(
// flattening of blocks.
MPM.add(createDivRemPairsPass());
+ if (EnableHotColdSplit)
+ MPM.add(createHotColdSplittingPass());
+
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
MPM.add(createCFGSimplificationPass());
addExtensionsToPM(EP_OptimizerLast, MPM);
- // Rename anon globals to be able to handle them in the summary
- if (PrepareForLTO)
+ if (PrepareForLTO) {
+ MPM.add(createCanonicalizeAliasesPass());
+ // Rename anon globals to be able to handle them in the summary
MPM.add(createNameAnonGlobalPass());
+ }
}
void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
+ // Load sample profile before running the LTO optimization pipeline.
+ if (!PGOSampleUse.empty()) {
+ PM.add(createPruneEHPass());
+ PM.add(createSampleProfileLoaderPass(PGOSampleUse));
+ }
+
// Remove unused virtual tables to improve the quality of code generated by
// whole-program devirtualization and bitset lowering.
PM.add(createGlobalDCEPass());
@@ -851,12 +878,13 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());
- if (!DisableUnrollLoops)
- PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
- PM.add(createLoopVectorizePass(true, LoopVectorize));
+ PM.add(createSimpleLoopUnrollPass(OptLevel,
+ DisableUnrollLoops)); // Unroll small loops
+ PM.add(createLoopVectorizePass(true, !LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- PM.add(createLoopUnrollPass(OptLevel));
+ PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops));
+
+ PM.add(createWarnMissedTransformationsPass());
// Now that we've optimized loops (in particular loop induction variables),
// we may have exposed more scalar opportunities. Run parts of the scalar
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 2be654258aa8..ae586c017471 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -107,7 +107,7 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
continue;
for (const BasicBlock &BB : *F) {
- const TerminatorInst *TI = BB.getTerminator();
+ const Instruction *TI = BB.getTerminator();
if (CheckUnwind && TI->mayThrow()) {
SCCMightUnwind = true;
} else if (CheckReturn && isa<ReturnInst>(TI)) {
@@ -255,7 +255,7 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) {
}
if (TokenInst) {
- if (!isa<TerminatorInst>(TokenInst))
+ if (!TokenInst->isTerminator())
changeToUnreachable(TokenInst->getNextNode(), /*UseLLVMTrap=*/false);
} else {
// Get the list of successors of this block.
diff --git a/contrib/llvm/lib/Transforms/IPO/SCCP.cpp b/contrib/llvm/lib/Transforms/IPO/SCCP.cpp
index cc53c4b8c46f..d2c34abfc132 100644
--- a/contrib/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -1,4 +1,6 @@
#include "llvm/Transforms/IPO/SCCP.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar/SCCP.h"
@@ -8,9 +10,22 @@ using namespace llvm;
PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
const DataLayout &DL = M.getDataLayout();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
- if (!runIPSCCP(M, DL, &TLI))
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn {
+ DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ return {
+ make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
+ &DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F)};
+ };
+
+ if (!runIPSCCP(M, DL, &TLI, getAnalysis))
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<PostDominatorTreeAnalysis>();
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ return PA;
}
namespace {
@@ -34,10 +49,25 @@ public:
const DataLayout &DL = M.getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- return runIPSCCP(M, DL, TLI);
+
+ auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn {
+ DominatorTree &DT =
+ this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ return {
+ make_unique<PredicateInfo>(
+ F, DT,
+ this->getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ F)),
+ nullptr, // We cannot preserve the DT or PDT with the legacy pass
+ nullptr}; // manager, so set them to nullptr.
+ };
+
+ return runIPSCCP(M, DL, TLI, getAnalysis);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
@@ -49,6 +79,7 @@ char IPSCCPLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(IPSCCPLegacyPass, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(IPSCCPLegacyPass, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index dcd24595f7ea..9f123c2b875e 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -96,6 +96,13 @@ static cl::opt<std::string> SampleProfileFile(
"sample-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+// The named file contains a set of transformations that may have been applied
+// to the symbol names between the program from which the sample data was
+// collected and the current program's symbols.
+static cl::opt<std::string> SampleProfileRemappingFile(
+ "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
+
static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
"sample-profile-max-propagate-iterations", cl::init(100),
cl::desc("Maximum number of iterations to go through when propagating "
@@ -116,6 +123,12 @@ static cl::opt<bool> NoWarnSampleUnused(
cl::desc("Use this option to turn off/on warnings about function with "
"samples but without debug information to use those samples. "));
+static cl::opt<bool> ProfileSampleAccurate(
+ "profile-sample-accurate", cl::Hidden, cl::init(false),
+ cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+ "callsite and function as having 0 samples. Otherwise, treat "
+ "un-sampled callsites and functions conservatively as unknown. "));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -183,12 +196,12 @@ private:
class SampleProfileLoader {
public:
SampleProfileLoader(
- StringRef Name, bool IsThinLTOPreLink,
+ StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
: GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
- IsThinLTOPreLink(IsThinLTOPreLink) {}
+ RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -205,6 +218,7 @@ protected:
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
+ mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineCallInstruction(Instruction *I);
bool inlineHotFunctions(Function &F,
@@ -282,6 +296,9 @@ protected:
/// Name of the profile file to load.
std::string Filename;
+ /// Name of the profile remapping file to load.
+ std::string RemappingFilename;
+
/// Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid = false;
@@ -311,13 +328,14 @@ public:
SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
bool IsThinLTOPreLink = false)
- : ModulePass(ID), SampleLoader(Name, IsThinLTOPreLink,
- [&](Function &F) -> AssumptionCache & {
- return ACT->getAssumptionCache(F);
- },
- [&](Function &F) -> TargetTransformInfo & {
- return TTIWP->getTTI(F);
- }) {
+ : ModulePass(ID),
+ SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
+ [&](Function &F) -> AssumptionCache & {
+ return ACT->getAssumptionCache(F);
+ },
+ [&](Function &F) -> TargetTransformInfo & {
+ return TTIWP->getTTI(F);
+ }) {
initializeSampleProfileLoaderLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -527,10 +545,10 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
if (!FS)
return std::error_code();
- // Ignore all intrinsics and branch instructions.
- // Branch instruction usually contains debug info from sources outside of
+ // Ignore all intrinsics, phinodes and branch instructions.
+ // Branch and phinodes instruction usually contains debug info from sources outside of
// the residing basic block, thus we ignore them during annotation.
- if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
+ if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
return std::error_code();
// If a direct call/invoke instruction is inlined in profile
@@ -643,8 +661,6 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
if (FS == nullptr)
return nullptr;
- std::string CalleeGUID;
- CalleeName = getRepInFormat(CalleeName, Reader->getFormat(), CalleeGUID);
return FS->findFunctionSamplesAt(LineLocation(FunctionSamples::getOffset(DIL),
DIL->getBaseDiscriminator()),
CalleeName);
@@ -683,10 +699,12 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
Sum += NameFS.second.getEntrySamples();
R.push_back(&NameFS.second);
}
- llvm::sort(R.begin(), R.end(),
- [](const FunctionSamples *L, const FunctionSamples *R) {
- return L->getEntrySamples() > R->getEntrySamples();
- });
+ llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
+ if (L->getEntrySamples() != R->getEntrySamples())
+ return L->getEntrySamples() > R->getEntrySamples();
+ return FunctionSamples::getGUID(L->getName()) <
+ FunctionSamples::getGUID(R->getName());
+ });
}
return R;
}
@@ -702,12 +720,14 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
- SmallVector<std::pair<LineLocation, StringRef>, 10> S;
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL)
return Samples;
- return Samples->findFunctionSamples(DIL);
+ auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
+ if (it.second)
+ it.first->second = Samples->findFunctionSamples(DIL);
+ return it.first->second;
}
bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
@@ -760,7 +780,6 @@ bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
- bool isCompact = (Reader->getFormat() == SPF_Compact_Binary);
while (true) {
bool LocalChanged = false;
SmallVector<Instruction *, 10> CIS;
@@ -792,19 +811,16 @@ bool SampleProfileLoader::inlineHotFunctions(
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
if (IsThinLTOPreLink) {
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
- PSI->getOrCompHotCountThreshold(),
- isCompact);
+ PSI->getOrCompHotCountThreshold());
continue;
}
- auto CalleeFunctionName = FS->getName();
+ auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
// If it is a recursive call, we do not inline it as it could bloat
// the code exponentially. There is way to better handle this, e.g.
// clone the caller first, and inline the cloned caller if it is
// recursive. As llvm does not inline recursive calls, we will
// simply ignore it instead of handling it explicitly.
- std::string FGUID;
- auto Fname = getRepInFormat(F.getName(), Reader->getFormat(), FGUID);
- if (CalleeFunctionName == Fname)
+ if (CalleeFunctionName == F.getName())
continue;
const char *Reason = "Callee function not available";
@@ -834,8 +850,7 @@ bool SampleProfileLoader::inlineHotFunctions(
LocalChanged = true;
} else if (IsThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(
- InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold(),
- isCompact);
+ InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
}
}
if (LocalChanged) {
@@ -1177,14 +1192,13 @@ static SmallVector<InstrProfValueData, 2> SortCallTargets(
const SampleRecord::CallTargetMap &M) {
SmallVector<InstrProfValueData, 2> R;
for (auto I = M.begin(); I != M.end(); ++I)
- R.push_back({Function::getGUID(I->getKey()), I->getValue()});
- llvm::sort(R.begin(), R.end(),
- [](const InstrProfValueData &L, const InstrProfValueData &R) {
- if (L.Count == R.Count)
- return L.Value > R.Value;
- else
- return L.Count > R.Count;
- });
+ R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
+ llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
+ if (L.Count == R.Count)
+ return L.Value > R.Value;
+ else
+ return L.Count > R.Count;
+ });
return R;
}
@@ -1292,7 +1306,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
}
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 1)
continue;
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
@@ -1519,12 +1533,28 @@ bool SampleProfileLoader::doInitialization(Module &M) {
return false;
}
Reader = std::move(ReaderOrErr.get());
+ Reader->collectFuncsToUse(M);
ProfileIsValid = (Reader->read() == sampleprof_error::success);
+
+ if (!RemappingFilename.empty()) {
+ // Apply profile remappings to the loaded profile data if requested.
+ // For now, we only support remapping symbols encoded using the Itanium
+ // C++ ABI's name mangling scheme.
+ ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
+ RemappingFilename, Ctx, std::move(Reader));
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile remapping file: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+ Reader = std::move(ReaderOrErr.get());
+ ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ }
return true;
}
ModulePass *llvm::createSampleProfileLoaderPass() {
- return new SampleProfileLoaderLegacyPass(SampleProfileFile);
+ return new SampleProfileLoaderLegacyPass();
}
ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
@@ -1533,6 +1563,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI) {
+ FunctionSamples::GUIDToFuncNameMapper Mapper(M);
if (!ProfileIsValid)
return false;
@@ -1577,15 +1608,25 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
ACT = &getAnalysis<AssumptionCacheTracker>();
TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
ProfileSummaryInfo *PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
return SampleLoader.runOnModule(M, nullptr, PSI);
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
- // Initialize the entry count to -1, which will be treated conservatively
- // by getEntryCount as the same as unknown (None). If we have samples this
- // will be overwritten in emitAnnotations.
- F.setEntryCount(ProfileCount(-1, Function::PCT_Real));
+
+ DILocation2SampleMap.clear();
+ // By default the entry count is initialized to -1, which will be treated
+ // conservatively by getEntryCount as the same as unknown (None). This is
+ // to avoid newly added code to be treated as cold. If we have samples
+ // this will be overwritten in emitAnnotations.
+ // If ProfileSampleAccurate is true or F has profile-sample-accurate
+ // attribute, initialize the entry count to 0 so callsites or functions
+ // unsampled will be treated as cold.
+ uint64_t initialEntryCount =
+ (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
+ ? 0
+ : -1;
+ F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
auto &FAM =
@@ -1616,6 +1657,8 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
SampleProfileLoader SampleLoader(
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
+ ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
+ : ProfileRemappingFileName,
IsThinLTOPreLink, GetAssumptionCache, GetTTI);
SampleLoader.doInitialization(M);
diff --git a/contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index 3c5ad37bced1..ba4efb3ff60d 100644
--- a/contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
@@ -46,7 +47,7 @@ using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "synthetic-counts-propagation"
/// Initial synthetic count assigned to functions.
-static cl::opt<int>
+cl::opt<int>
InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
cl::ZeroOrMore,
cl::desc("Initial value of synthetic entry count."));
@@ -98,13 +99,15 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
ModuleAnalysisManager &MAM) {
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- DenseMap<Function *, uint64_t> Counts;
+ DenseMap<Function *, Scaled64> Counts;
// Set initial entry counts.
- initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; });
+ initializeCounts(
+ M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); });
- // Compute the relative block frequency for a call edge. Use scaled numbers
- // and not integers since the relative block frequency could be less than 1.
- auto GetCallSiteRelFreq = [&](const CallGraphNode::CallRecord &Edge) {
+ // Edge includes information about the source. Hence ignore the first
+ // parameter.
+ auto GetCallSiteProfCount = [&](const CallGraphNode *,
+ const CallGraphNode::CallRecord &Edge) {
Optional<Scaled64> Res = None;
if (!Edge.first)
return Res;
@@ -112,29 +115,33 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
CallSite CS(cast<Instruction>(Edge.first));
Function *Caller = CS.getCaller();
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
+
+ // Now compute the callsite count from relative frequency and
+ // entry count:
BasicBlock *CSBB = CS.getInstruction()->getParent();
Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
- Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0);
- BBFreq /= EntryFreq;
- return Optional<Scaled64>(BBFreq);
+ Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
+ BBCount /= EntryFreq;
+ BBCount *= Counts[Caller];
+ return Optional<Scaled64>(BBCount);
};
CallGraph CG(M);
// Propgate the entry counts on the callgraph.
SyntheticCountsUtils<const CallGraph *>::propagate(
- &CG, GetCallSiteRelFreq,
- [&](const CallGraphNode *N) { return Counts[N->getFunction()]; },
- [&](const CallGraphNode *N, uint64_t New) {
+ &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) {
auto F = N->getFunction();
if (!F || F->isDeclaration())
return;
+
Counts[F] += New;
});
// Set the counts as metadata.
- for (auto Entry : Counts)
- Entry.first->setEntryCount(
- ProfileCount(Entry.second, Function::PCT_Synthetic));
+ for (auto Entry : Counts) {
+ Entry.first->setEntryCount(ProfileCount(
+ Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic));
+ }
return PreservedAnalyses::all();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 8fe7ae1282cc..510ecb516dc2 100644
--- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -154,7 +154,8 @@ void simplifyExternals(Module &M) {
continue;
Function *NewF =
- Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
+ Function::Create(EmptyFT, GlobalValue::ExternalLinkage,
+ F.getAddressSpace(), "", &M);
NewF->setVisibility(F.getVisibility());
NewF->takeName(&F);
F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
@@ -237,7 +238,7 @@ void splitAndWriteThinLTOBitcode(
// sound because the virtual constant propagation optimizations effectively
// inline all implementations of the virtual function into each call site,
// rather than using function attributes to perform local optimization.
- std::set<const Function *> EligibleVirtualFns;
+ DenseSet<const Function *> EligibleVirtualFns;
// If any member of a comdat lives in MergedM, put all members of that
// comdat in MergedM to keep the comdat together.
DenseSet<const Comdat *> MergedMComdats;
@@ -417,8 +418,18 @@ void splitAndWriteThinLTOBitcode(
}
}
-// Returns whether this module needs to be split because it uses type metadata.
+// Returns whether this module needs to be split because splitting is
+// enabled and it uses type metadata.
bool requiresSplit(Module &M) {
+ // First check if the LTO Unit splitting has been enabled.
+ bool EnableSplitLTOUnit = false;
+ if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("EnableSplitLTOUnit")))
+ EnableSplitLTOUnit = MD->getZExtValue();
+ if (!EnableSplitLTOUnit)
+ return false;
+
+ // Module only needs to be split if it contains type metadata.
for (auto &GO : M.global_objects()) {
if (GO.hasMetadata(LLVMContext::MD_type))
return true;
@@ -430,7 +441,7 @@ bool requiresSplit(Module &M) {
void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
function_ref<AAResults &(Function &)> AARGetter,
Module &M, const ModuleSummaryIndex *Index) {
- // See if this module has any type metadata. If so, we need to split it.
+ // Split module if splitting is enabled and it contains any type metadata.
if (requiresSplit(M))
return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
diff --git a/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index d65da2504db4..48bd0cda759d 100644
--- a/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -58,6 +58,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
@@ -406,6 +407,7 @@ void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS,
struct DevirtModule {
Module &M;
function_ref<AAResults &(Function &)> AARGetter;
+ function_ref<DominatorTree &(Function &)> LookupDomTree;
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
@@ -433,10 +435,12 @@ struct DevirtModule {
DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
+ function_ref<DominatorTree &(Function &)> LookupDomTree,
ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
- : M(M), AARGetter(AARGetter), ExportSummary(ExportSummary),
- ImportSummary(ImportSummary), Int8Ty(Type::getInt8Ty(M.getContext())),
+ : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
+ ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+ Int8Ty(Type::getInt8Ty(M.getContext())),
Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
Int64Ty(Type::getInt64Ty(M.getContext())),
@@ -533,9 +537,10 @@ struct DevirtModule {
// Lower the module using the action and summary passed as command line
// arguments. For testing purposes only.
- static bool runForTesting(
- Module &M, function_ref<AAResults &(Function &)> AARGetter,
- function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
+ static bool
+ runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
+ function_ref<DominatorTree &(Function &)> LookupDomTree);
};
struct WholeProgramDevirt : public ModulePass {
@@ -572,17 +577,23 @@ struct WholeProgramDevirt : public ModulePass {
return *ORE;
};
+ auto LookupDomTree = [this](Function &F) -> DominatorTree & {
+ return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ };
+
if (UseCommandLine)
- return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter);
+ return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter,
+ LookupDomTree);
- return DevirtModule(M, LegacyAARGetter(*this), OREGetter, ExportSummary,
- ImportSummary)
+ return DevirtModule(M, LegacyAARGetter(*this), OREGetter, LookupDomTree,
+ ExportSummary, ImportSummary)
.run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
}
};
@@ -592,6 +603,7 @@ INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
"Whole program devirtualization", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
"Whole program devirtualization", false, false)
char WholeProgramDevirt::ID = 0;
@@ -611,7 +623,11 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
};
- if (!DevirtModule(M, AARGetter, OREGetter, ExportSummary, ImportSummary)
+ auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+ if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
+ ImportSummary)
.run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -619,7 +635,8 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
bool DevirtModule::runForTesting(
Module &M, function_ref<AAResults &(Function &)> AARGetter,
- function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
ModuleSummaryIndex Summary(/*HaveGVs=*/false);
// Handle the command-line summary arguments. This code is for testing
@@ -637,7 +654,7 @@ bool DevirtModule::runForTesting(
bool Changed =
DevirtModule(
- M, AARGetter, OREGetter,
+ M, AARGetter, OREGetter, LookupDomTree,
ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
.run();
@@ -665,7 +682,7 @@ void DevirtModule::buildTypeIdentifierMap(
for (GlobalVariable &GV : M.globals()) {
Types.clear();
GV.getMetadata(LLVMContext::MD_type, Types);
- if (Types.empty())
+ if (GV.isDeclaration() || Types.empty())
continue;
VTableBits *&BitsPtr = GVToBits[&GV];
@@ -755,7 +772,8 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
auto Apply = [&](CallSiteInfo &CSInfo) {
for (auto &&VCallSite : CSInfo.CallSites) {
if (RemarksEnabled)
- VCallSite.emitRemark("single-impl", TheFn->getName(), OREGetter);
+ VCallSite.emitRemark("single-impl",
+ TheFn->stripPointerCasts()->getName(), OREGetter);
VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
TheFn, VCallSite.CS.getCalledValue()->getType()));
// This use is no longer unsafe.
@@ -846,10 +864,13 @@ void DevirtModule::tryICallBranchFunnel(
Function *JT;
if (isa<MDString>(Slot.TypeID)) {
JT = Function::Create(FT, Function::ExternalLinkage,
+ M.getDataLayout().getProgramAddressSpace(),
getGlobalName(Slot, {}, "branch_funnel"), &M);
JT->setVisibility(GlobalValue::HiddenVisibility);
} else {
- JT = Function::Create(FT, Function::InternalLinkage, "branch_funnel", &M);
+ JT = Function::Create(FT, Function::InternalLinkage,
+ M.getDataLayout().getProgramAddressSpace(),
+ "branch_funnel", &M);
}
JT->addAttribute(1, Attribute::Nest);
@@ -891,7 +912,8 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
continue;
if (RemarksEnabled)
- VCallSite.emitRemark("branch-funnel", JT->getName(), OREGetter);
+ VCallSite.emitRemark("branch-funnel",
+ JT->stripPointerCasts()->getName(), OREGetter);
// Pass the address of the vtable in the nest register, which is r10 on
// x86_64.
@@ -1323,15 +1345,14 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
bool DevirtModule::areRemarksEnabled() {
const auto &FL = M.getFunctionList();
- if (FL.empty())
- return false;
- const Function &Fn = FL.front();
-
- const auto &BBL = Fn.getBasicBlockList();
- if (BBL.empty())
- return false;
- auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
- return DI.isEnabled();
+ for (const Function &Fn : FL) {
+ const auto &BBL = Fn.getBasicBlockList();
+ if (BBL.empty())
+ continue;
+ auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
+ return DI.isEnabled();
+ }
+ return false;
}
void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
@@ -1341,7 +1362,7 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
// points to a member of the type identifier %md. Group calls by (type ID,
// offset) pair (effectively the identity of the virtual function) and store
// to CallSlots.
- DenseSet<Value *> SeenPtrs;
+ DenseSet<CallSite> SeenCallSites;
for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
I != E;) {
auto CI = dyn_cast<CallInst>(I->getUser());
@@ -1352,19 +1373,22 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
// Search for virtual calls based on %p and add them to DevirtCalls.
SmallVector<DevirtCallSite, 1> DevirtCalls;
SmallVector<CallInst *, 1> Assumes;
- findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
+ auto &DT = LookupDomTree(*CI->getFunction());
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
- // If we found any, add them to CallSlots. Only do this if we haven't seen
- // the vtable pointer before, as it may have been CSE'd with pointers from
- // other call sites, and we don't want to process call sites multiple times.
+ // If we found any, add them to CallSlots.
if (!Assumes.empty()) {
Metadata *TypeId =
cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
- if (SeenPtrs.insert(Ptr).second) {
- for (DevirtCallSite Call : DevirtCalls) {
+ for (DevirtCallSite Call : DevirtCalls) {
+ // Only add this CallSite if we haven't seen it before. The vtable
+ // pointer may have been CSE'd with pointers from other call sites,
+ // and we don't want to process call sites multiple times. We can't
+ // just skip the vtable Ptr if it has been seen before, however, since
+ // it may be shared by type tests that dominate different calls.
+ if (SeenCallSites.insert(Call.CS).second)
CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS, nullptr);
- }
}
}
@@ -1398,8 +1422,9 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
SmallVector<Instruction *, 1> LoadedPtrs;
SmallVector<Instruction *, 1> Preds;
bool HasNonCallUses = false;
+ auto &DT = LookupDomTree(*CI->getFunction());
findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
- HasNonCallUses, CI);
+ HasNonCallUses, CI, DT);
// Start by generating "pessimistic" code that explicitly loads the function
// pointer from the vtable and performs the type check. If possible, we will
@@ -1538,6 +1563,17 @@ bool DevirtModule::run() {
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
+ // If only some of the modules were split, we cannot correctly handle
+ // code that contains type tests or type checked loads.
+ if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+ (ImportSummary && ImportSummary->partiallySplitLTOUnits())) {
+ if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
+ (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
+ report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test "
+ "or llvm.type.checked.load");
+ return false;
+ }
+
// Normally if there are no users of the devirtualization intrinsics in the
// module, this pass has nothing to do. But if we are exporting, we also need
// to handle any users that appear only in the function summaries.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 83054588a9aa..6e196bfdbd25 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -186,8 +186,6 @@ namespace {
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
- Value *performFactorization(Instruction *I);
-
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
@@ -197,7 +195,6 @@ namespace {
Value *createFSub(Value *Opnd0, Value *Opnd1);
Value *createFAdd(Value *Opnd0, Value *Opnd1);
Value *createFMul(Value *Opnd0, Value *Opnd1);
- Value *createFDiv(Value *Opnd0, Value *Opnd1);
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst, bool NoNumber = false);
@@ -427,89 +424,6 @@ unsigned FAddend::drillAddendDownOneStep
return BreakNum;
}
-// Try to perform following optimization on the input instruction I. Return the
-// simplified expression if was successful; otherwise, return 0.
-//
-// Instruction "I" is Simplified into
-// -------------------------------------------------------
-// (x * y) +/- (x * z) x * (y +/- z)
-// (y / x) +/- (z / x) (y +/- z) / x
-Value *FAddCombine::performFactorization(Instruction *I) {
- assert((I->getOpcode() == Instruction::FAdd ||
- I->getOpcode() == Instruction::FSub) && "Expect add/sub");
-
- Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
- Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
-
- if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
- return nullptr;
-
- bool isMpy = false;
- if (I0->getOpcode() == Instruction::FMul)
- isMpy = true;
- else if (I0->getOpcode() != Instruction::FDiv)
- return nullptr;
-
- Value *Opnd0_0 = I0->getOperand(0);
- Value *Opnd0_1 = I0->getOperand(1);
- Value *Opnd1_0 = I1->getOperand(0);
- Value *Opnd1_1 = I1->getOperand(1);
-
- // Input Instr I Factor AddSub0 AddSub1
- // ----------------------------------------------
- // (x*y) +/- (x*z) x y z
- // (y/x) +/- (z/x) x y z
- Value *Factor = nullptr;
- Value *AddSub0 = nullptr, *AddSub1 = nullptr;
-
- if (isMpy) {
- if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
- Factor = Opnd0_0;
- else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
- Factor = Opnd0_1;
-
- if (Factor) {
- AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
- AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
- }
- } else if (Opnd0_1 == Opnd1_1) {
- Factor = Opnd0_1;
- AddSub0 = Opnd0_0;
- AddSub1 = Opnd1_0;
- }
-
- if (!Factor)
- return nullptr;
-
- FastMathFlags Flags;
- Flags.setFast();
- if (I0) Flags &= I->getFastMathFlags();
- if (I1) Flags &= I->getFastMathFlags();
-
- // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
- Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
- createFAdd(AddSub0, AddSub1) :
- createFSub(AddSub0, AddSub1);
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
- const APFloat &F = CFP->getValueAPF();
- if (!F.isNormal())
- return nullptr;
- } else if (Instruction *II = dyn_cast<Instruction>(NewAddSub))
- II->setFastMathFlags(Flags);
-
- if (isMpy) {
- Value *RI = createFMul(Factor, NewAddSub);
- if (Instruction *II = dyn_cast<Instruction>(RI))
- II->setFastMathFlags(Flags);
- return RI;
- }
-
- Value *RI = createFDiv(NewAddSub, Factor);
- if (Instruction *II = dyn_cast<Instruction>(RI))
- II->setFastMathFlags(Flags);
- return RI;
-}
-
Value *FAddCombine::simplify(Instruction *I) {
assert(I->hasAllowReassoc() && I->hasNoSignedZeros() &&
"Expected 'reassoc'+'nsz' instruction");
@@ -594,8 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- // step 6: Try factorization as the last resort,
- return performFactorization(I);
+ return nullptr;
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
@@ -772,13 +685,6 @@ Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
return V;
}
-Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
- Value *V = Builder.CreateFDiv(Opnd0, Opnd1);
- if (Instruction *I = dyn_cast<Instruction>(V))
- createInstPostProc(I);
- return V;
-}
-
void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) {
NewInstr->setDebugLoc(Instr->getDebugLoc());
@@ -1135,7 +1041,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (SimplifyAssociativeOrCommutative(I))
return &I;
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
// (A*B)+(A*C) -> A*(B+C) etc
@@ -1285,77 +1191,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- // Check for (add (sext x), y), see if we can merge this into an
- // integer add followed by a sext.
- if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
- // (add (sext x), cst) --> (sext (add x, cst'))
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
- if (LHSConv->hasOneUse()) {
- Constant *CI =
- ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
- if (ConstantExpr::getSExt(CI, Ty) == RHSC &&
- willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
- // Insert the new, smaller add.
- Value *NewAdd =
- Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
- return new SExtInst(NewAdd, Ty);
- }
- }
- }
-
- // (add (sext x), (sext y)) --> (sext (add int x, y))
- if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
- // Only do this if x/y have the same type, if at least one of them has a
- // single use (so we don't increase the number of sexts), and if the
- // integer add will not overflow.
- if (LHSConv->getOperand(0)->getType() ==
- RHSConv->getOperand(0)->getType() &&
- (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- willNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), I)) {
- // Insert the new integer add.
- Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), "addconv");
- return new SExtInst(NewAdd, Ty);
- }
- }
- }
-
- // Check for (add (zext x), y), see if we can merge this into an
- // integer add followed by a zext.
- if (auto *LHSConv = dyn_cast<ZExtInst>(LHS)) {
- // (add (zext x), cst) --> (zext (add x, cst'))
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
- if (LHSConv->hasOneUse()) {
- Constant *CI =
- ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
- if (ConstantExpr::getZExt(CI, Ty) == RHSC &&
- willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) {
- // Insert the new, smaller add.
- Value *NewAdd =
- Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
- return new ZExtInst(NewAdd, Ty);
- }
- }
- }
-
- // (add (zext x), (zext y)) --> (zext (add int x, y))
- if (auto *RHSConv = dyn_cast<ZExtInst>(RHS)) {
- // Only do this if x/y have the same type, if at least one of them has a
- // single use (so we don't increase the number of zexts), and if the
- // integer add will not overflow.
- if (LHSConv->getOperand(0)->getType() ==
- RHSConv->getOperand(0)->getType() &&
- (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- willNotOverflowUnsignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), I)) {
- // Insert the new integer add.
- Value *NewAdd = Builder.CreateNUWAdd(
- LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv");
- return new ZExtInst(NewAdd, Ty);
- }
- }
- }
+ if (Instruction *Ext = narrowMathIfNoOverflow(I))
+ return Ext;
// (add (xor A, B) (and A, B)) --> (or A, B)
// (add (and A, B) (xor A, B)) --> (or A, B)
@@ -1391,6 +1228,45 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
+/// Factor a common operand out of fadd/fsub of fmul/fdiv.
+static Instruction *factorizeFAddFSub(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert((I.getOpcode() == Instruction::FAdd ||
+ I.getOpcode() == Instruction::FSub) && "Expecting fadd/fsub");
+ assert(I.hasAllowReassoc() && I.hasNoSignedZeros() &&
+ "FP factorization requires FMF");
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ Value *X, *Y, *Z;
+ bool IsFMul;
+ if ((match(Op0, m_OneUse(m_FMul(m_Value(X), m_Value(Z)))) &&
+ match(Op1, m_OneUse(m_c_FMul(m_Value(Y), m_Specific(Z))))) ||
+ (match(Op0, m_OneUse(m_FMul(m_Value(Z), m_Value(X)))) &&
+ match(Op1, m_OneUse(m_c_FMul(m_Value(Y), m_Specific(Z))))))
+ IsFMul = true;
+ else if (match(Op0, m_OneUse(m_FDiv(m_Value(X), m_Value(Z)))) &&
+ match(Op1, m_OneUse(m_FDiv(m_Value(Y), m_Specific(Z)))))
+ IsFMul = false;
+ else
+ return nullptr;
+
+ // (X * Z) + (Y * Z) --> (X + Y) * Z
+ // (X * Z) - (Y * Z) --> (X - Y) * Z
+ // (X / Z) + (Y / Z) --> (X + Y) / Z
+ // (X / Z) - (Y / Z) --> (X - Y) / Z
+ bool IsFAdd = I.getOpcode() == Instruction::FAdd;
+ Value *XY = IsFAdd ? Builder.CreateFAddFMF(X, Y, &I)
+ : Builder.CreateFSubFMF(X, Y, &I);
+
+ // Bail out if we just created a denormal constant.
+ // TODO: This is copied from a previous implementation. Is it necessary?
+ const APFloat *C;
+ if (match(XY, m_APFloat(C)) && !C->isNormal())
+ return nullptr;
+
+ return IsFMul ? BinaryOperator::CreateFMulFMF(XY, Z, &I)
+ : BinaryOperator::CreateFDivFMF(XY, Z, &I);
+}
+
Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (Value *V = SimplifyFAddInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
@@ -1400,7 +1276,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (SimplifyAssociativeOrCommutative(I))
return &I;
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))
@@ -1478,6 +1354,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
if (I.hasAllowReassoc() && I.hasNoSignedZeros()) {
+ if (Instruction *F = factorizeFAddFSub(I, Builder))
+ return F;
if (Value *V = FAddCombine(Builder).simplify(&I))
return replaceInstUsesWith(I, V);
}
@@ -1577,7 +1455,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
// (A*B)-(A*C) -> A*(B-C) etc
@@ -1771,19 +1649,51 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// X - A*-B -> X + A*B
// X - -A*B -> X + A*B
Value *A, *B;
- Constant *CI;
if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B)))))
return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B));
- // X - A*CI -> X + A*-CI
+ // X - A*C -> X + A*-C
// No need to handle commuted multiply because multiply handling will
// ensure constant will be move to the right hand side.
- if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) {
- Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI));
+ if (match(Op1, m_Mul(m_Value(A), m_Constant(C))) && !isa<ConstantExpr>(C)) {
+ Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(C));
return BinaryOperator::CreateAdd(Op0, NewMul);
}
}
+ {
+ // ~A - Min/Max(~A, O) -> Max/Min(A, ~O) - A
+ // ~A - Min/Max(O, ~A) -> Max/Min(A, ~O) - A
+ // Min/Max(~A, O) - ~A -> A - Max/Min(A, ~O)
+ // Min/Max(O, ~A) - ~A -> A - Max/Min(A, ~O)
+ // So long as O here is freely invertible, this will be neutral or a win.
+ Value *LHS, *RHS, *A;
+ Value *NotA = Op0, *MinMax = Op1;
+ SelectPatternFlavor SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
+ if (!SelectPatternResult::isMinOrMax(SPF)) {
+ NotA = Op1;
+ MinMax = Op0;
+ SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
+ }
+ if (SelectPatternResult::isMinOrMax(SPF) &&
+ match(NotA, m_Not(m_Value(A))) && (NotA == LHS || NotA == RHS)) {
+ if (NotA == LHS)
+ std::swap(LHS, RHS);
+ // LHS is now O above and expected to have at least 2 uses (the min/max)
+ // NotA is epected to have 2 uses from the min/max and 1 from the sub.
+ if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
+ !NotA->hasNUsesOrMore(4)) {
+ // Note: We don't generate the inverse max/min, just create the not of
+ // it and let other folds do the rest.
+ Value *Not = Builder.CreateNot(MinMax);
+ if (NotA == Op0)
+ return BinaryOperator::CreateSub(Not, A);
+ else
+ return BinaryOperator::CreateSub(A, Not);
+ }
+ }
+ }
+
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
Value *LHSOp, *RHSOp;
@@ -1819,6 +1729,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return SelectInst::Create(Cmp, Neg, A);
}
+ if (Instruction *Ext = narrowMathIfNoOverflow(I))
+ return Ext;
+
bool Changed = false;
if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;
@@ -1838,7 +1751,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
// Subtraction from -0.0 is the canonical form of fneg.
@@ -1847,13 +1760,27 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (I.hasNoSignedZeros() && match(Op0, m_PosZeroFP()))
return BinaryOperator::CreateFNegFMF(Op1, &I);
+ Value *X, *Y;
+ Constant *C;
+
+ // Fold negation into constant operand. This is limited with one-use because
+ // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
+ // -(X * C) --> X * (-C)
+ if (match(&I, m_FNeg(m_OneUse(m_FMul(m_Value(X), m_Constant(C))))))
+ return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
+ // -(X / C) --> X / (-C)
+ if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Value(X), m_Constant(C))))))
+ return BinaryOperator::CreateFDivFMF(X, ConstantExpr::getFNeg(C), &I);
+ // -(C / X) --> (-C) / X
+ if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Constant(C), m_Value(X))))))
+ return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C), X, &I);
+
// If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
// Canonicalize to fadd to make analysis easier.
// This can also help codegen because fadd is commutative.
// Note that if this fsub was really an fneg, the fadd with -0.0 will get
// killed later. We still limit that particular transform with 'hasOneUse'
// because an fneg is assumed better/cheaper than a generic fsub.
- Value *X, *Y;
if (I.hasNoSignedZeros() || CannotBeNegativeZero(Op0, SQ.TLI)) {
if (match(Op1, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {
Value *NewSub = Builder.CreateFSubFMF(Y, X, &I);
@@ -1869,7 +1796,6 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
// X - C --> X + (-C)
// But don't transform constant expressions because there's an inverse fold
// for X + (-Y) --> X - Y.
- Constant *C;
if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1))
return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I);
@@ -1879,21 +1805,46 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
// Similar to above, but look through a cast of the negated value:
// X - (fptrunc(-Y)) --> X + fptrunc(Y)
- if (match(Op1, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y)))))) {
- Value *TruncY = Builder.CreateFPTrunc(Y, I.getType());
- return BinaryOperator::CreateFAddFMF(Op0, TruncY, &I);
- }
+ Type *Ty = I.getType();
+ if (match(Op1, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y))))))
+ return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPTrunc(Y, Ty), &I);
+
// X - (fpext(-Y)) --> X + fpext(Y)
- if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y)))))) {
- Value *ExtY = Builder.CreateFPExt(Y, I.getType());
- return BinaryOperator::CreateFAddFMF(Op0, ExtY, &I);
- }
+ if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y))))))
+ return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPExt(Y, Ty), &I);
- // Handle specials cases for FSub with selects feeding the operation
+ // Handle special cases for FSub with selects feeding the operation
if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
return replaceInstUsesWith(I, V);
if (I.hasAllowReassoc() && I.hasNoSignedZeros()) {
+ // (Y - X) - Y --> -X
+ if (match(Op0, m_FSub(m_Specific(Op1), m_Value(X))))
+ return BinaryOperator::CreateFNegFMF(X, &I);
+
+ // Y - (X + Y) --> -X
+ // Y - (Y + X) --> -X
+ if (match(Op1, m_c_FAdd(m_Specific(Op0), m_Value(X))))
+ return BinaryOperator::CreateFNegFMF(X, &I);
+
+ // (X * C) - X --> X * (C - 1.0)
+ if (match(Op0, m_FMul(m_Specific(Op1), m_Constant(C)))) {
+ Constant *CSubOne = ConstantExpr::getFSub(C, ConstantFP::get(Ty, 1.0));
+ return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I);
+ }
+ // X - (X * C) --> X * (1.0 - C)
+ if (match(Op1, m_FMul(m_Specific(Op0), m_Constant(C)))) {
+ Constant *OneSubC = ConstantExpr::getFSub(ConstantFP::get(Ty, 1.0), C);
+ return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);
+ }
+
+ if (Instruction *F = factorizeFAddFSub(I, Builder))
+ return F;
+
+ // TODO: This performs reassociative folds for FP ops. Some fraction of the
+ // functionality has been subsumed by simple pattern matching here and in
+ // InstSimplify. We should let a dedicated reassociation pass handle more
+ // complex pattern matching and remove this from InstCombine.
if (Value *V = FAddCombine(Builder).simplify(&I))
return replaceInstUsesWith(I, V);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3d758e2fe7c9..404c2ad7e6e7 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -53,11 +53,11 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC) {
/// operands into either a constant true or false, or a brand new ICmp
/// instruction. The sign is passed in to determine which kind of predicate to
/// use in the new icmp instruction.
-static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS,
InstCombiner::BuilderTy &Builder) {
ICmpInst::Predicate NewPred;
- if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
- return NewConstant;
+ if (Constant *TorF = getPredForICmpCode(Code, Sign, LHS->getType(), NewPred))
+ return TorF;
return Builder.CreateICmp(NewPred, LHS, RHS);
}
@@ -898,6 +898,130 @@ Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
return nullptr;
}
+/// General pattern:
+/// X & Y
+///
+/// Where Y is checking that all the high bits (covered by a mask 4294967168)
+/// are uniform, i.e. %arg & 4294967168 can be either 4294967168 or 0
+/// Pattern can be one of:
+/// %t = add i32 %arg, 128
+/// %r = icmp ult i32 %t, 256
+/// Or
+/// %t0 = shl i32 %arg, 24
+/// %t1 = ashr i32 %t0, 24
+/// %r = icmp eq i32 %t1, %arg
+/// Or
+/// %t0 = trunc i32 %arg to i8
+/// %t1 = sext i8 %t0 to i32
+/// %r = icmp eq i32 %t1, %arg
+/// This pattern is a signed truncation check.
+///
+/// And X is checking that some bit in that same mask is zero.
+/// I.e. can be one of:
+/// %r = icmp sgt i32 %arg, -1
+/// Or
+/// %t = and i32 %arg, 2147483648
+/// %r = icmp eq i32 %t, 0
+///
+/// Since we are checking that all the bits in that mask are the same,
+/// and a particular bit is zero, what we are really checking is that all the
+/// masked bits are zero.
+/// So this should be transformed to:
+/// %r = icmp ult i32 %arg, 128
+static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
+ Instruction &CxtI,
+ InstCombiner::BuilderTy &Builder) {
+ assert(CxtI.getOpcode() == Instruction::And);
+
+ // Match icmp ult (add %arg, C01), C1 (C1 == C01 << 1; powers of two)
+ auto tryToMatchSignedTruncationCheck = [](ICmpInst *ICmp, Value *&X,
+ APInt &SignBitMask) -> bool {
+ CmpInst::Predicate Pred;
+ const APInt *I01, *I1; // powers of two; I1 == I01 << 1
+ if (!(match(ICmp,
+ m_ICmp(Pred, m_Add(m_Value(X), m_Power2(I01)), m_Power2(I1))) &&
+ Pred == ICmpInst::ICMP_ULT && I1->ugt(*I01) && I01->shl(1) == *I1))
+ return false;
+ // Which bit is the new sign bit as per the 'signed truncation' pattern?
+ SignBitMask = *I01;
+ return true;
+ };
+
+ // One icmp needs to be 'signed truncation check'.
+ // We need to match this first, else we will mismatch commutative cases.
+ Value *X1;
+ APInt HighestBit;
+ ICmpInst *OtherICmp;
+ if (tryToMatchSignedTruncationCheck(ICmp1, X1, HighestBit))
+ OtherICmp = ICmp0;
+ else if (tryToMatchSignedTruncationCheck(ICmp0, X1, HighestBit))
+ OtherICmp = ICmp1;
+ else
+ return nullptr;
+
+ assert(HighestBit.isPowerOf2() && "expected to be power of two (non-zero)");
+
+ // Try to match/decompose into: icmp eq (X & Mask), 0
+ auto tryToDecompose = [](ICmpInst *ICmp, Value *&X,
+ APInt &UnsetBitsMask) -> bool {
+ CmpInst::Predicate Pred = ICmp->getPredicate();
+ // Can it be decomposed into icmp eq (X & Mask), 0 ?
+ if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
+ Pred, X, UnsetBitsMask,
+ /*LookThruTrunc=*/false) &&
+ Pred == ICmpInst::ICMP_EQ)
+ return true;
+ // Is it icmp eq (X & Mask), 0 already?
+ const APInt *Mask;
+ if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) &&
+ Pred == ICmpInst::ICMP_EQ) {
+ UnsetBitsMask = *Mask;
+ return true;
+ }
+ return false;
+ };
+
+ // And the other icmp needs to be decomposable into a bit test.
+ Value *X0;
+ APInt UnsetBitsMask;
+ if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask))
+ return nullptr;
+
+ assert(!UnsetBitsMask.isNullValue() && "empty mask makes no sense.");
+
+ // Are they working on the same value?
+ Value *X;
+ if (X1 == X0) {
+ // Ok as is.
+ X = X1;
+ } else if (match(X0, m_Trunc(m_Specific(X1)))) {
+ UnsetBitsMask = UnsetBitsMask.zext(X1->getType()->getScalarSizeInBits());
+ X = X1;
+ } else
+ return nullptr;
+
+ // So which bits should be uniform as per the 'signed truncation check'?
+ // (all the bits starting with (i.e. including) HighestBit)
+ APInt SignBitsMask = ~(HighestBit - 1U);
+
+ // UnsetBitsMask must have some common bits with SignBitsMask,
+ if (!UnsetBitsMask.intersects(SignBitsMask))
+ return nullptr;
+
+ // Does UnsetBitsMask contain any bits outside of SignBitsMask?
+ if (!UnsetBitsMask.isSubsetOf(SignBitsMask)) {
+ APInt OtherHighestBit = (~UnsetBitsMask) + 1U;
+ if (!OtherHighestBit.isPowerOf2())
+ return nullptr;
+ HighestBit = APIntOps::umin(HighestBit, OtherHighestBit);
+ }
+ // Else, if it does not, then all is ok as-is.
+
+ // %r = icmp ult %X, SignBit
+ return Builder.CreateICmpULT(X, ConstantInt::get(X->getType(), HighestBit),
+ CxtI.getName() + ".simplified");
+}
+
/// Fold (icmp)&(icmp) if possible.
Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction &CxtI) {
@@ -909,7 +1033,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
- if (PredicatesFoldable(PredL, PredR)) {
+ if (predicatesFoldable(PredL, PredR)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
LHS->swapOperands();
@@ -917,8 +1041,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
LHS->getOperand(1) == RHS->getOperand(1)) {
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
- bool isSigned = LHS->isSigned() || RHS->isSigned();
- return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ bool IsSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
}
}
@@ -937,6 +1061,9 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder))
return V;
+ if (Value *V = foldSignedTruncationCheck(LHS, RHS, CxtI, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1004,7 +1131,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return nullptr;
// We can't fold (ugt x, C) & (sgt x, C2).
- if (!PredicatesFoldable(PredL, PredR))
+ if (!predicatesFoldable(PredL, PredR))
return nullptr;
// Ensure that the larger constant is on the RHS.
@@ -1408,7 +1535,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (SimplifyAssociativeOrCommutative(I))
return &I;
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
// See if we can simplify any instructions used by the instruction whose sole
@@ -1635,10 +1762,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return nullptr;
}
-/// Given an OR instruction, check to see if this is a bswap idiom. If so,
-/// insert the new intrinsic and return it.
-Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+Instruction *InstCombiner::matchBSwap(BinaryOperator &Or) {
+ assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'");
+ Value *Op0 = Or.getOperand(0), *Op1 = Or.getOperand(1);
// Look through zero extends.
if (Instruction *Ext = dyn_cast<ZExtInst>(Op0))
@@ -1674,7 +1800,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
return nullptr;
SmallVector<Instruction*, 4> Insts;
- if (!recognizeBSwapOrBitReverseIdiom(&I, true, false, Insts))
+ if (!recognizeBSwapOrBitReverseIdiom(&Or, true, false, Insts))
return nullptr;
Instruction *LastInst = Insts.pop_back_val();
LastInst->removeFromParent();
@@ -1684,6 +1810,57 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
return LastInst;
}
+/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
+static Instruction *matchRotate(Instruction &Or) {
+ // TODO: Can we reduce the code duplication between this and the related
+ // rotate matching code under visitSelect and visitTrunc?
+ unsigned Width = Or.getType()->getScalarSizeInBits();
+ if (!isPowerOf2_32(Width))
+ return nullptr;
+
+ // First, find an or'd pair of opposite shifts with the same shifted operand:
+ // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
+ Value *Or0 = Or.getOperand(0), *Or1 = Or.getOperand(1);
+ Value *ShVal, *ShAmt0, *ShAmt1;
+ if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
+ !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
+ return nullptr;
+
+ auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
+ auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+ if (ShiftOpcode0 == ShiftOpcode1)
+ return nullptr;
+
+ // Match the shift amount operands for a rotate pattern. This always matches
+ // a subtraction on the R operand.
+ auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+ // The shift amount may be masked with negation:
+ // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+ Value *X;
+ unsigned Mask = Width - 1;
+ if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
+ match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
+ return X;
+
+ return nullptr;
+ };
+
+ Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
+ bool SubIsOnLHS = false;
+ if (!ShAmt) {
+ ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
+ SubIsOnLHS = true;
+ }
+ if (!ShAmt)
+ return nullptr;
+
+ bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
+ (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
+ Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+ Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
+ return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt });
+}
+
/// If all elements of two constant vectors are 0/-1 and inverses, return true.
static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
unsigned NumElts = C1->getType()->getVectorNumElements();
@@ -1704,14 +1881,33 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
/// We have an expression of the form (A & C) | (B & D). If A is a scalar or
/// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
/// B, it can be used as the condition operand of a select instruction.
-static Value *getSelectCondition(Value *A, Value *B,
- InstCombiner::BuilderTy &Builder) {
- // If these are scalars or vectors of i1, A can be used directly.
+Value *InstCombiner::getSelectCondition(Value *A, Value *B) {
+ // Step 1: We may have peeked through bitcasts in the caller.
+ // Exit immediately if we don't have (vector) integer types.
Type *Ty = A->getType();
- if (match(A, m_Not(m_Specific(B))) && Ty->isIntOrIntVectorTy(1))
- return A;
+ if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy())
+ return nullptr;
+
+ // Step 2: We need 0 or all-1's bitmasks.
+ if (ComputeNumSignBits(A) != Ty->getScalarSizeInBits())
+ return nullptr;
+
+ // Step 3: If B is the 'not' value of A, we have our answer.
+ if (match(A, m_Not(m_Specific(B)))) {
+ // If these are scalars or vectors of i1, A can be used directly.
+ if (Ty->isIntOrIntVectorTy(1))
+ return A;
+ return Builder.CreateTrunc(A, CmpInst::makeCmpResultType(Ty));
+ }
- // If A and B are sign-extended, look through the sexts to find the booleans.
+ // If both operands are constants, see if the constants are inverse bitmasks.
+ Constant *AConst, *BConst;
+ if (match(A, m_Constant(AConst)) && match(B, m_Constant(BConst)))
+ if (AConst == ConstantExpr::getNot(BConst))
+ return Builder.CreateZExtOrTrunc(A, CmpInst::makeCmpResultType(Ty));
+
+ // Look for more complex patterns. The 'not' op may be hidden behind various
+ // casts. Look through sexts and bitcasts to find the booleans.
Value *Cond;
Value *NotB;
if (match(A, m_SExt(m_Value(Cond))) &&
@@ -1727,36 +1923,29 @@ static Value *getSelectCondition(Value *A, Value *B,
if (!Ty->isVectorTy())
return nullptr;
- // If both operands are constants, see if the constants are inverse bitmasks.
- Constant *AC, *BC;
- if (match(A, m_Constant(AC)) && match(B, m_Constant(BC)) &&
- areInverseVectorBitmasks(AC, BC)) {
- return Builder.CreateZExtOrTrunc(AC, CmpInst::makeCmpResultType(Ty));
- }
-
// If both operands are xor'd with constants using the same sexted boolean
// operand, see if the constants are inverse bitmasks.
- if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) &&
- match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) &&
+ // TODO: Use ConstantExpr::getNot()?
+ if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AConst)))) &&
+ match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BConst)))) &&
Cond->getType()->isIntOrIntVectorTy(1) &&
- areInverseVectorBitmasks(AC, BC)) {
- AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
- return Builder.CreateXor(Cond, AC);
+ areInverseVectorBitmasks(AConst, BConst)) {
+ AConst = ConstantExpr::getTrunc(AConst, CmpInst::makeCmpResultType(Ty));
+ return Builder.CreateXor(Cond, AConst);
}
return nullptr;
}
/// We have an expression of the form (A & C) | (B & D). Try to simplify this
/// to "A' ? C : D", where A' is a boolean or vector of booleans.
-static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
- InstCombiner::BuilderTy &Builder) {
+Value *InstCombiner::matchSelectFromAndOr(Value *A, Value *C, Value *B,
+ Value *D) {
// The potential condition of the select may be bitcasted. In that case, look
// through its bitcast and the corresponding bitcast of the 'not' condition.
Type *OrigType = A->getType();
A = peekThroughBitcast(A, true);
B = peekThroughBitcast(B, true);
-
- if (Value *Cond = getSelectCondition(A, B, Builder)) {
+ if (Value *Cond = getSelectCondition(A, B)) {
// ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
// The bitcasts will either all exist or all not exist. The builder will
// not create unnecessary casts if the types already match.
@@ -1838,7 +2027,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
}
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
- if (PredicatesFoldable(PredL, PredR)) {
+ if (predicatesFoldable(PredL, PredR)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
LHS->swapOperands();
@@ -1846,8 +2035,8 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
LHS->getOperand(1) == RHS->getOperand(1)) {
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
- bool isSigned = LHS->isSigned() || RHS->isSigned();
- return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ bool IsSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
}
}
@@ -1928,7 +2117,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return nullptr;
// We can't fold (ugt x, C) | (sgt x, C2).
- if (!PredicatesFoldable(PredL, PredR))
+ if (!predicatesFoldable(PredL, PredR))
return nullptr;
// Ensure that the larger constant is on the RHS.
@@ -2007,7 +2196,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (SimplifyAssociativeOrCommutative(I))
return &I;
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
// See if we can simplify any instructions used by the instruction whose sole
@@ -2029,37 +2218,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
return FoldedLogic;
- // Given an OR instruction, check to see if this is a bswap.
- if (Instruction *BSwap = MatchBSwap(I))
+ if (Instruction *BSwap = matchBSwap(I))
return BSwap;
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- {
- Value *A;
- const APInt *C;
- // (X^C)|Y -> (X|Y)^C iff Y&C == 0
- if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
- MaskedValueIsZero(Op1, *C, 0, &I)) {
- Value *NOr = Builder.CreateOr(A, Op1);
- NOr->takeName(Op0);
- return BinaryOperator::CreateXor(NOr,
- ConstantInt::get(NOr->getType(), *C));
- }
+ if (Instruction *Rotate = matchRotate(I))
+ return Rotate;
- // Y|(X^C) -> (X|Y)^C iff Y&C == 0
- if (match(Op1, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
- MaskedValueIsZero(Op0, *C, 0, &I)) {
- Value *NOr = Builder.CreateOr(A, Op0);
- NOr->takeName(Op0);
- return BinaryOperator::CreateXor(NOr,
- ConstantInt::get(NOr->getType(), *C));
- }
+ Value *X, *Y;
+ const APInt *CV;
+ if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&
+ !CV->isAllOnesValue() && MaskedValueIsZero(Y, *CV, 0, &I)) {
+ // (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
+ // The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
+ Value *Or = Builder.CreateOr(X, Y);
+ return BinaryOperator::CreateXor(Or, ConstantInt::get(I.getType(), *CV));
}
- Value *A, *B;
-
// (A & C)|(B & D)
- Value *C = nullptr, *D = nullptr;
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ Value *A, *B, *C, *D;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
ConstantInt *C1 = dyn_cast<ConstantInt>(C);
@@ -2122,21 +2299,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// 'or' that it is replacing.
if (Op0->hasOneUse() || Op1->hasOneUse()) {
// (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.
- if (Value *V = matchSelectFromAndOr(A, C, B, D, Builder))
+ if (Value *V = matchSelectFromAndOr(A, C, B, D))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(A, C, D, B, Builder))
+ if (Value *V = matchSelectFromAndOr(A, C, D, B))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(C, A, B, D, Builder))
+ if (Value *V = matchSelectFromAndOr(C, A, B, D))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(C, A, D, B, Builder))
+ if (Value *V = matchSelectFromAndOr(C, A, D, B))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(B, D, A, C, Builder))
+ if (Value *V = matchSelectFromAndOr(B, D, A, C))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(B, D, C, A, Builder))
+ if (Value *V = matchSelectFromAndOr(B, D, C, A))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(D, B, A, C, Builder))
+ if (Value *V = matchSelectFromAndOr(D, B, A, C))
return replaceInstUsesWith(I, V);
- if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder))
+ if (Value *V = matchSelectFromAndOr(D, B, C, A))
return replaceInstUsesWith(I, V);
}
}
@@ -2251,12 +2428,12 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// be simplified by a later pass either, so we try swapping the inner/outer
// ORs in the hopes that we'll be able to simplify it this way.
// (X|C) | V --> (X|V) | C
- ConstantInt *C1;
+ ConstantInt *CI;
if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
- match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+ match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) {
Value *Inner = Builder.CreateOr(A, Op1);
Inner->takeName(Op0);
- return BinaryOperator::CreateOr(Inner, C1);
+ return BinaryOperator::CreateOr(Inner, CI);
}
// Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
@@ -2339,7 +2516,7 @@ static Instruction *foldXorToXor(BinaryOperator &I,
}
Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
- if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+ if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
LHS->swapOperands();
@@ -2348,8 +2525,8 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
- bool isSigned = LHS->isSigned() || RHS->isSigned();
- return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ bool IsSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
}
}
@@ -2360,7 +2537,8 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
- LHS0->getType() == RHS0->getType()) {
+ LHS0->getType() == RHS0->getType() &&
+ LHS0->getType()->isIntOrIntVectorTy()) {
// (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
// (X < 0) ^ (Y < 0) --> (X ^ Y) < 0
if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
@@ -2452,6 +2630,32 @@ static Instruction *visitMaskedMerge(BinaryOperator &I,
return nullptr;
}
+// Transform
+// ~(x ^ y)
+// into:
+// (~x) ^ y
+// or into
+// x ^ (~y)
+static Instruction *sinkNotIntoXor(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X, *Y;
+ // FIXME: one-use check is not needed in general, but currently we are unable
+ // to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182)
+ if (!match(&I, m_Not(m_OneUse(m_Xor(m_Value(X), m_Value(Y))))))
+ return nullptr;
+
+ // We only want to do the transform if it is free to do.
+ if (IsFreeToInvert(X, X->hasOneUse())) {
+ // Ok, good.
+ } else if (IsFreeToInvert(Y, Y->hasOneUse())) {
+ std::swap(X, Y);
+ } else
+ return nullptr;
+
+ Value *NotX = Builder.CreateNot(X, X->getName() + ".not");
+ return BinaryOperator::CreateXor(NotX, Y, I.getName() + ".demorgan");
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -2463,7 +2667,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (SimplifyAssociativeOrCommutative(I))
return &I;
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *NewXor = foldXorToXor(I, Builder))
@@ -2481,9 +2685,15 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
- // A^B --> A|B iff A and B have no bits set in common.
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (haveNoCommonBitsSet(Op0, Op1, DL, &AC, &I, &DT))
+
+ // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
+ // This it a special case in haveNoCommonBitsSet, but the computeKnownBits
+ // calls in there are unnecessary as SimplifyDemandedInstructionBits should
+ // have already taken care of those cases.
+ Value *M;
+ if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),
+ m_c_And(m_Deferred(M), m_Value()))))
return BinaryOperator::CreateOr(Op0, Op1);
// Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
@@ -2528,8 +2738,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
// ~(X - Y) --> ~X + Y
- if (match(NotVal, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))))
- return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
+ if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
+ if (isa<Constant>(X) || NotVal->hasOneUse())
+ return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
// ~(~X >>s Y) --> (X >>s Y)
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
@@ -2539,19 +2750,36 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// the 'not' by inverting the constant and using the opposite shift type.
// Canonicalization rules ensure that only a negative constant uses 'ashr',
// but we must check that in case that transform has not fired yet.
+
+ // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
Constant *C;
if (match(NotVal, m_AShr(m_Constant(C), m_Value(Y))) &&
- match(C, m_Negative())) {
- // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
- Constant *NotC = ConstantExpr::getNot(C);
- return BinaryOperator::CreateLShr(NotC, Y);
- }
+ match(C, m_Negative()))
+ return BinaryOperator::CreateLShr(ConstantExpr::getNot(C), Y);
+ // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
if (match(NotVal, m_LShr(m_Constant(C), m_Value(Y))) &&
- match(C, m_NonNegative())) {
- // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
- Constant *NotC = ConstantExpr::getNot(C);
- return BinaryOperator::CreateAShr(NotC, Y);
+ match(C, m_NonNegative()))
+ return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y);
+
+ // ~(X + C) --> -(C + 1) - X
+ if (match(Op0, m_Add(m_Value(X), m_Constant(C))))
+ return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X);
+ }
+
+ // Use DeMorgan and reassociation to eliminate a 'not' op.
+ Constant *C1;
+ if (match(Op1, m_Constant(C1))) {
+ Constant *C2;
+ if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) {
+ // (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
+ Value *And = Builder.CreateAnd(X, ConstantExpr::getNot(C2));
+ return BinaryOperator::CreateXor(And, ConstantExpr::getNot(C1));
+ }
+ if (match(Op0, m_OneUse(m_And(m_Not(m_Value(X)), m_Constant(C2))))) {
+ // (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1
+ Value *Or = Builder.CreateOr(X, ConstantExpr::getNot(C2));
+ return BinaryOperator::CreateXor(Or, ConstantExpr::getNot(C1));
}
}
@@ -2567,28 +2795,15 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (match(Op1, m_APInt(RHSC))) {
Value *X;
const APInt *C;
- if (match(Op0, m_Sub(m_APInt(C), m_Value(X)))) {
- // ~(c-X) == X-c-1 == X+(-c-1)
- if (RHSC->isAllOnesValue()) {
- Constant *NewC = ConstantInt::get(I.getType(), -(*C) - 1);
- return BinaryOperator::CreateAdd(X, NewC);
- }
- if (RHSC->isSignMask()) {
- // (C - X) ^ signmask -> (C + signmask - X)
- Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
- return BinaryOperator::CreateSub(NewC, X);
- }
- } else if (match(Op0, m_Add(m_Value(X), m_APInt(C)))) {
- // ~(X-c) --> (-c-1)-X
- if (RHSC->isAllOnesValue()) {
- Constant *NewC = ConstantInt::get(I.getType(), -(*C) - 1);
- return BinaryOperator::CreateSub(NewC, X);
- }
- if (RHSC->isSignMask()) {
- // (X + C) ^ signmask -> (X + C + signmask)
- Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
- return BinaryOperator::CreateAdd(X, NewC);
- }
+ if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X)))) {
+ // (C - X) ^ signmask -> (C + signmask - X)
+ Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
+ return BinaryOperator::CreateSub(NewC, X);
+ }
+ if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C)))) {
+ // (X + C) ^ signmask -> (X + C + signmask)
+ Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
+ return BinaryOperator::CreateAdd(X, NewC);
}
// (X|C1)^C2 -> X^(C1^C2) iff X&~C1 == 0
@@ -2635,82 +2850,52 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
return FoldedLogic;
- {
- Value *A, *B;
- if (match(Op1, m_OneUse(m_Or(m_Value(A), m_Value(B))))) {
- if (A == Op0) { // A^(A|B) == A^(B|A)
- cast<BinaryOperator>(Op1)->swapOperands();
- std::swap(A, B);
- }
- if (B == Op0) { // A^(B|A) == (B|A)^A
- I.swapOperands(); // Simplified below.
- std::swap(Op0, Op1);
- }
- } else if (match(Op1, m_OneUse(m_And(m_Value(A), m_Value(B))))) {
- if (A == Op0) { // A^(A&B) -> A^(B&A)
- cast<BinaryOperator>(Op1)->swapOperands();
- std::swap(A, B);
- }
- if (B == Op0) { // A^(B&A) -> (B&A)^A
- I.swapOperands(); // Simplified below.
- std::swap(Op0, Op1);
- }
- }
- }
-
- {
- Value *A, *B;
- if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B))))) {
- if (A == Op1) // (B|A)^B == (A|B)^B
- std::swap(A, B);
- if (B == Op1) // (A|B)^B == A & ~B
- return BinaryOperator::CreateAnd(A, Builder.CreateNot(Op1));
- } else if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B))))) {
- if (A == Op1) // (A&B)^A -> (B&A)^A
- std::swap(A, B);
- const APInt *C;
- if (B == Op1 && // (B&A)^A == ~B & A
- !match(Op1, m_APInt(C))) { // Canonical form is (B&C)^C
- return BinaryOperator::CreateAnd(Builder.CreateNot(A), Op1);
- }
- }
- }
-
- {
- Value *A, *B, *C, *D;
- // (A ^ C)^(A | B) -> ((~A) & B) ^ C
- if (match(Op0, m_Xor(m_Value(D), m_Value(C))) &&
- match(Op1, m_Or(m_Value(A), m_Value(B)))) {
- if (D == A)
- return BinaryOperator::CreateXor(
- Builder.CreateAnd(Builder.CreateNot(A), B), C);
- if (D == B)
- return BinaryOperator::CreateXor(
- Builder.CreateAnd(Builder.CreateNot(B), A), C);
- }
- // (A | B)^(A ^ C) -> ((~A) & B) ^ C
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_Xor(m_Value(D), m_Value(C)))) {
- if (D == A)
- return BinaryOperator::CreateXor(
- Builder.CreateAnd(Builder.CreateNot(A), B), C);
- if (D == B)
- return BinaryOperator::CreateXor(
- Builder.CreateAnd(Builder.CreateNot(B), A), C);
- }
- // (A & B) ^ (A ^ B) -> (A | B)
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- match(Op1, m_c_Xor(m_Specific(A), m_Specific(B))))
- return BinaryOperator::CreateOr(A, B);
- // (A ^ B) ^ (A & B) -> (A | B)
- if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
- match(Op1, m_c_And(m_Specific(A), m_Specific(B))))
- return BinaryOperator::CreateOr(A, B);
- }
+ // Y ^ (X | Y) --> X & ~Y
+ // Y ^ (Y | X) --> X & ~Y
+ if (match(Op1, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op0)))))
+ return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op0));
+ // (X | Y) ^ Y --> X & ~Y
+ // (Y | X) ^ Y --> X & ~Y
+ if (match(Op0, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op1)))))
+ return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op1));
+
+ // Y ^ (X & Y) --> ~X & Y
+ // Y ^ (Y & X) --> ~X & Y
+ if (match(Op1, m_OneUse(m_c_And(m_Value(X), m_Specific(Op0)))))
+ return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(X));
+ // (X & Y) ^ Y --> ~X & Y
+ // (Y & X) ^ Y --> ~X & Y
+ // Canonical form is (X & C) ^ C; don't touch that.
+ // TODO: A 'not' op is better for analysis and codegen, but demanded bits must
+ // be fixed to prefer that (otherwise we get infinite looping).
+ if (!match(Op1, m_Constant()) &&
+ match(Op0, m_OneUse(m_c_And(m_Value(X), m_Specific(Op1)))))
+ return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(X));
+
+ Value *A, *B, *C;
+ // (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants.
+ if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))),
+ m_OneUse(m_c_Or(m_Deferred(A), m_Value(C))))))
+ return BinaryOperator::CreateXor(
+ Builder.CreateAnd(Builder.CreateNot(A), C), B);
+
+ // (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants.
+ if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))),
+ m_OneUse(m_c_Or(m_Deferred(B), m_Value(C))))))
+ return BinaryOperator::CreateXor(
+ Builder.CreateAnd(Builder.CreateNot(B), C), A);
+
+ // (A & B) ^ (A ^ B) -> (A | B)
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Xor(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateOr(A, B);
+ // (A ^ B) ^ (A & B) -> (A | B)
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_And(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateOr(A, B);
// (A & ~B) ^ ~A -> ~(A & B)
// (~B & A) ^ ~A -> ~(A & B)
- Value *A, *B;
if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Op1, m_Not(m_Specific(A))))
return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
@@ -2759,23 +2944,41 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// %res = select i1 %cmp2, i32 %x, i32 %noty
//
// Same is applicable for smin/umax/umin.
- {
+ if (match(Op1, m_AllOnes()) && Op0->hasOneUse()) {
Value *LHS, *RHS;
SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor;
- if (Op0->hasOneUse() && SelectPatternResult::isMinOrMax(SPF) &&
- match(Op1, m_AllOnes())) {
-
- Value *X;
- if (match(RHS, m_Not(m_Value(X))))
- std::swap(RHS, LHS);
-
- if (match(LHS, m_Not(m_Value(X)))) {
+ if (SelectPatternResult::isMinOrMax(SPF)) {
+ // It's possible we get here before the not has been simplified, so make
+ // sure the input to the not isn't freely invertible.
+ if (match(LHS, m_Not(m_Value(X))) && !IsFreeToInvert(X, X->hasOneUse())) {
Value *NotY = Builder.CreateNot(RHS);
return SelectInst::Create(
Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
}
+
+ // It's possible we get here before the not has been simplified, so make
+ // sure the input to the not isn't freely invertible.
+ if (match(RHS, m_Not(m_Value(Y))) && !IsFreeToInvert(Y, Y->hasOneUse())) {
+ Value *NotX = Builder.CreateNot(LHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
+ }
+
+ // If both sides are freely invertible, then we can get rid of the xor
+ // completely.
+ if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
+ IsFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
+ Value *NotLHS = Builder.CreateNot(LHS);
+ Value *NotRHS = Builder.CreateNot(RHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS),
+ NotLHS, NotRHS);
+ }
}
}
+ if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
+ return NewXor;
+
return nullptr;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cbfbd8a53993..aeb25d530d71 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -136,6 +136,14 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
if (Size > 8 || (Size&(Size-1)))
return nullptr; // If not 1/2/4/8 bytes, exit.
+ // If it is an atomic and alignment is less than the size then we will
+ // introduce the unaligned memory access which will be later transformed
+ // into libcall in CodeGen. This is not evident performance gain so disable
+ // it now.
+ if (isa<AtomicMemTransferInst>(MI))
+ if (CopyDstAlign < Size || CopySrcAlign < Size)
+ return nullptr;
+
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
@@ -174,6 +182,9 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
if (LoopMemParallelMD)
L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
+ MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
+ if (AccessGroupMD)
+ L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
StoreInst *S = Builder.CreateStore(L, Dest);
// Alignment from the mem intrinsic will be better, so use it.
@@ -182,6 +193,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
if (LoopMemParallelMD)
S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
+ if (AccessGroupMD)
+ S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
// non-atomics can be volatile
@@ -215,6 +228,18 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
Alignment = MI->getDestAlignment();
assert(Len && "0-sized memory setting should be removed already.");
+ // Alignment 0 is identity for alignment 1 for memset, but not store.
+ if (Alignment == 0)
+ Alignment = 1;
+
+ // If it is an atomic and alignment is less than the size then we will
+ // introduce the unaligned memory access which will be later transformed
+ // into libcall in CodeGen. This is not evident performance gain so disable
+ // it now.
+ if (isa<AtomicMemSetInst>(MI))
+ if (Alignment < Len)
+ return nullptr;
+
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
@@ -224,9 +249,6 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
- // Alignment 0 is identity for alignment 1 for memset, but not store.
- if (Alignment == 0) Alignment = 1;
-
// Extract the fill value and store.
uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
@@ -648,7 +670,7 @@ static Value *simplifyX86round(IntrinsicInst &II,
}
Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
- Value *Res = Builder.CreateIntrinsic(ID, {Src}, &II);
+ Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II);
if (!IsScalar) {
if (auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
@@ -675,7 +697,8 @@ static Value *simplifyX86round(IntrinsicInst &II,
return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
}
-static Value *simplifyX86movmsk(const IntrinsicInst &II) {
+static Value *simplifyX86movmsk(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
Value *Arg = II.getArgOperand(0);
Type *ResTy = II.getType();
Type *ArgTy = Arg->getType();
@@ -688,29 +711,46 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II) {
if (!ArgTy->isVectorTy())
return nullptr;
- auto *C = dyn_cast<Constant>(Arg);
- if (!C)
- return nullptr;
+ if (auto *C = dyn_cast<Constant>(Arg)) {
+ // Extract signbits of the vector input and pack into integer result.
+ APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
+ for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
+ auto *COp = C->getAggregateElement(I);
+ if (!COp)
+ return nullptr;
+ if (isa<UndefValue>(COp))
+ continue;
- // Extract signbits of the vector input and pack into integer result.
- APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
- for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
- auto *COp = C->getAggregateElement(I);
- if (!COp)
- return nullptr;
- if (isa<UndefValue>(COp))
- continue;
+ auto *CInt = dyn_cast<ConstantInt>(COp);
+ auto *CFp = dyn_cast<ConstantFP>(COp);
+ if (!CInt && !CFp)
+ return nullptr;
- auto *CInt = dyn_cast<ConstantInt>(COp);
- auto *CFp = dyn_cast<ConstantFP>(COp);
- if (!CInt && !CFp)
- return nullptr;
+ if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
+ Result.setBit(I);
+ }
+ return Constant::getIntegerValue(ResTy, Result);
+ }
- if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
- Result.setBit(I);
+ // Look for a sign-extended boolean source vector as the argument to this
+ // movmsk. If the argument is bitcast, look through that, but make sure the
+ // source of that bitcast is still a vector with the same number of elements.
+ // TODO: We can also convert a bitcast with wider elements, but that requires
+ // duplicating the bool source sign bits to match the number of elements
+ // expected by the movmsk call.
+ Arg = peekThroughBitcast(Arg);
+ Value *X;
+ if (Arg->getType()->isVectorTy() &&
+ Arg->getType()->getVectorNumElements() == ArgTy->getVectorNumElements() &&
+ match(Arg, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
+ // call iM movmsk(sext <N x i1> X) --> zext (bitcast <N x i1> X to iN) to iM
+ unsigned NumElts = X->getType()->getVectorNumElements();
+ Type *ScalarTy = Type::getIntNTy(Arg->getContext(), NumElts);
+ Value *BC = Builder.CreateBitCast(X, ScalarTy);
+ return Builder.CreateZExtOrTrunc(BC, ResTy);
}
- return Constant::getIntegerValue(ResTy, Result);
+ return nullptr;
}
static Value *simplifyX86insertps(const IntrinsicInst &II,
@@ -1133,82 +1173,6 @@ static Value *simplifyX86vpcom(const IntrinsicInst &II,
return nullptr;
}
-static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) {
- Value *Arg0 = II.getArgOperand(0);
- Value *Arg1 = II.getArgOperand(1);
-
- // fmin(x, x) -> x
- if (Arg0 == Arg1)
- return Arg0;
-
- const auto *C1 = dyn_cast<ConstantFP>(Arg1);
-
- // fmin(x, nan) -> x
- if (C1 && C1->isNaN())
- return Arg0;
-
- // This is the value because if undef were NaN, we would return the other
- // value and cannot return a NaN unless both operands are.
- //
- // fmin(undef, x) -> x
- if (isa<UndefValue>(Arg0))
- return Arg1;
-
- // fmin(x, undef) -> x
- if (isa<UndefValue>(Arg1))
- return Arg0;
-
- Value *X = nullptr;
- Value *Y = nullptr;
- if (II.getIntrinsicID() == Intrinsic::minnum) {
- // fmin(x, fmin(x, y)) -> fmin(x, y)
- // fmin(y, fmin(x, y)) -> fmin(x, y)
- if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
- if (Arg0 == X || Arg0 == Y)
- return Arg1;
- }
-
- // fmin(fmin(x, y), x) -> fmin(x, y)
- // fmin(fmin(x, y), y) -> fmin(x, y)
- if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
- if (Arg1 == X || Arg1 == Y)
- return Arg0;
- }
-
- // TODO: fmin(nnan x, inf) -> x
- // TODO: fmin(nnan ninf x, flt_max) -> x
- if (C1 && C1->isInfinity()) {
- // fmin(x, -inf) -> -inf
- if (C1->isNegative())
- return Arg1;
- }
- } else {
- assert(II.getIntrinsicID() == Intrinsic::maxnum);
- // fmax(x, fmax(x, y)) -> fmax(x, y)
- // fmax(y, fmax(x, y)) -> fmax(x, y)
- if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
- if (Arg0 == X || Arg0 == Y)
- return Arg1;
- }
-
- // fmax(fmax(x, y), x) -> fmax(x, y)
- // fmax(fmax(x, y), y) -> fmax(x, y)
- if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
- if (Arg1 == X || Arg1 == Y)
- return Arg0;
- }
-
- // TODO: fmax(nnan x, -inf) -> x
- // TODO: fmax(nnan ninf x, -flt_max) -> x
- if (C1 && C1->isInfinity()) {
- // fmax(x, inf) -> inf
- if (!C1->isNegative())
- return Arg1;
- }
- }
- return nullptr;
-}
-
static bool maskIsAllOneOrUndef(Value *Mask) {
auto *ConstMask = dyn_cast<Constant>(Mask);
if (!ConstMask)
@@ -1852,6 +1816,17 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
return nullptr;
}
+static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) {
+ assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap");
+ Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
+ if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
+ Call.setArgOperand(0, Arg1);
+ Call.setArgOperand(1, Arg0);
+ return &Call;
+ }
+ return nullptr;
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallSite to do the heavy
/// lifting.
@@ -2005,18 +1980,49 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return I;
break;
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ const APInt *SA;
+ if (match(II->getArgOperand(2), m_APInt(SA))) {
+ Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+ unsigned BitWidth = SA->getBitWidth();
+ uint64_t ShiftAmt = SA->urem(BitWidth);
+ assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift");
+ // Normalize to funnel shift left.
+ if (II->getIntrinsicID() == Intrinsic::fshr)
+ ShiftAmt = BitWidth - ShiftAmt;
+
+ // fshl(X, 0, C) -> shl X, C
+ // fshl(X, undef, C) -> shl X, C
+ if (match(Op1, m_Zero()) || match(Op1, m_Undef()))
+ return BinaryOperator::CreateShl(
+ Op0, ConstantInt::get(II->getType(), ShiftAmt));
+
+ // fshl(0, X, C) -> lshr X, (BW-C)
+ // fshl(undef, X, C) -> lshr X, (BW-C)
+ if (match(Op0, m_Zero()) || match(Op0, m_Undef()))
+ return BinaryOperator::CreateLShr(
+ Op1, ConstantInt::get(II->getType(), BitWidth - ShiftAmt));
+ }
+
+ // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
+ // so only the low bits of the shift amount are demanded if the bitwidth is
+ // a power-of-2.
+ unsigned BitWidth = II->getType()->getScalarSizeInBits();
+ if (!isPowerOf2_32(BitWidth))
+ break;
+ APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
+ KnownBits Op2Known(BitWidth);
+ if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
+ return &CI;
+ break;
+ }
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
- if (isa<Constant>(II->getArgOperand(0)) &&
- !isa<Constant>(II->getArgOperand(1))) {
- // Canonicalize constants into the RHS.
- Value *LHS = II->getArgOperand(0);
- II->setArgOperand(0, II->getArgOperand(1));
- II->setArgOperand(1, LHS);
- return II;
- }
+ if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+ return I;
LLVM_FALLTHROUGH;
case Intrinsic::usub_with_overflow:
@@ -2034,34 +2040,164 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- case Intrinsic::minnum:
- case Intrinsic::maxnum: {
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_sat:
+ if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+ return I;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::usub_sat:
+ case Intrinsic::ssub_sat: {
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
- // Canonicalize constants to the RHS.
- if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
- II->setArgOperand(0, Arg1);
- II->setArgOperand(1, Arg0);
- return II;
+ Intrinsic::ID IID = II->getIntrinsicID();
+
+ // Make use of known overflow information.
+ OverflowResult OR;
+ switch (IID) {
+ default:
+ llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::uadd_sat:
+ OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II);
+ if (OR == OverflowResult::NeverOverflows)
+ return BinaryOperator::CreateNUWAdd(Arg0, Arg1);
+ if (OR == OverflowResult::AlwaysOverflows)
+ return replaceInstUsesWith(*II,
+ ConstantInt::getAllOnesValue(II->getType()));
+ break;
+ case Intrinsic::usub_sat:
+ OR = computeOverflowForUnsignedSub(Arg0, Arg1, II);
+ if (OR == OverflowResult::NeverOverflows)
+ return BinaryOperator::CreateNUWSub(Arg0, Arg1);
+ if (OR == OverflowResult::AlwaysOverflows)
+ return replaceInstUsesWith(*II,
+ ConstantInt::getNullValue(II->getType()));
+ break;
+ case Intrinsic::sadd_sat:
+ if (willNotOverflowSignedAdd(Arg0, Arg1, *II))
+ return BinaryOperator::CreateNSWAdd(Arg0, Arg1);
+ break;
+ case Intrinsic::ssub_sat:
+ if (willNotOverflowSignedSub(Arg0, Arg1, *II))
+ return BinaryOperator::CreateNSWSub(Arg0, Arg1);
+ break;
}
- // FIXME: Simplifications should be in instsimplify.
- if (Value *V = simplifyMinnumMaxnum(*II))
- return replaceInstUsesWith(*II, V);
+ // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
+ Constant *C;
+ if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
+ C->isNotMinSignedValue()) {
+ Value *NegVal = ConstantExpr::getNeg(C);
+ return replaceInstUsesWith(
+ *II, Builder.CreateBinaryIntrinsic(
+ Intrinsic::sadd_sat, Arg0, NegVal));
+ }
+
+ // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
+ // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
+ // if Val and Val2 have the same sign
+ if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
+ Value *X;
+ const APInt *Val, *Val2;
+ APInt NewVal;
+ bool IsUnsigned =
+ IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
+ if (Other->getIntrinsicID() == II->getIntrinsicID() &&
+ match(Arg1, m_APInt(Val)) &&
+ match(Other->getArgOperand(0), m_Value(X)) &&
+ match(Other->getArgOperand(1), m_APInt(Val2))) {
+ if (IsUnsigned)
+ NewVal = Val->uadd_sat(*Val2);
+ else if (Val->isNonNegative() == Val2->isNonNegative()) {
+ bool Overflow;
+ NewVal = Val->sadd_ov(*Val2, Overflow);
+ if (Overflow) {
+ // Both adds together may add more than SignedMaxValue
+ // without saturating the final result.
+ break;
+ }
+ } else {
+ // Cannot fold saturated addition with different signs.
+ break;
+ }
+ return replaceInstUsesWith(
+ *II, Builder.CreateBinaryIntrinsic(
+ IID, X, ConstantInt::get(II->getType(), NewVal)));
+ }
+ }
+ break;
+ }
+
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum: {
+ if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+ return I;
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ Intrinsic::ID IID = II->getIntrinsicID();
Value *X, *Y;
if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
(Arg0->hasOneUse() || Arg1->hasOneUse())) {
// If both operands are negated, invert the call and negate the result:
- // minnum(-X, -Y) --> -(maxnum(X, Y))
- // maxnum(-X, -Y) --> -(minnum(X, Y))
- Intrinsic::ID NewIID = II->getIntrinsicID() == Intrinsic::maxnum ?
- Intrinsic::minnum : Intrinsic::maxnum;
- Value *NewCall = Builder.CreateIntrinsic(NewIID, { X, Y }, II);
+ // min(-X, -Y) --> -(max(X, Y))
+ // max(-X, -Y) --> -(min(X, Y))
+ Intrinsic::ID NewIID;
+ switch (IID) {
+ case Intrinsic::maxnum:
+ NewIID = Intrinsic::minnum;
+ break;
+ case Intrinsic::minnum:
+ NewIID = Intrinsic::maxnum;
+ break;
+ case Intrinsic::maximum:
+ NewIID = Intrinsic::minimum;
+ break;
+ case Intrinsic::minimum:
+ NewIID = Intrinsic::maximum;
+ break;
+ default:
+ llvm_unreachable("unexpected intrinsic ID");
+ }
+ Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
Instruction *FNeg = BinaryOperator::CreateFNeg(NewCall);
FNeg->copyIRFlags(II);
return FNeg;
}
+
+ // m(m(X, C2), C1) -> m(X, C)
+ const APFloat *C1, *C2;
+ if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
+ if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
+ ((match(M->getArgOperand(0), m_Value(X)) &&
+ match(M->getArgOperand(1), m_APFloat(C2))) ||
+ (match(M->getArgOperand(1), m_Value(X)) &&
+ match(M->getArgOperand(0), m_APFloat(C2))))) {
+ APFloat Res(0.0);
+ switch (IID) {
+ case Intrinsic::maxnum:
+ Res = maxnum(*C1, *C2);
+ break;
+ case Intrinsic::minnum:
+ Res = minnum(*C1, *C2);
+ break;
+ case Intrinsic::maximum:
+ Res = maximum(*C1, *C2);
+ break;
+ case Intrinsic::minimum:
+ Res = minimum(*C1, *C2);
+ break;
+ default:
+ llvm_unreachable("unexpected intrinsic ID");
+ }
+ Instruction *NewCall = Builder.CreateBinaryIntrinsic(
+ IID, X, ConstantFP::get(Arg0->getType(), Res));
+ NewCall->copyIRFlags(II);
+ return replaceInstUsesWith(*II, NewCall);
+ }
+ }
+
break;
}
case Intrinsic::fmuladd: {
@@ -2079,17 +2215,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
LLVM_FALLTHROUGH;
}
case Intrinsic::fma: {
- Value *Src0 = II->getArgOperand(0);
- Value *Src1 = II->getArgOperand(1);
-
- // Canonicalize constant multiply operand to Src1.
- if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
- II->setArgOperand(0, Src1);
- II->setArgOperand(1, Src0);
- std::swap(Src0, Src1);
- }
+ if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+ return I;
// fma fneg(x), fneg(y), z -> fma x, y, z
+ Value *Src0 = II->getArgOperand(0);
+ Value *Src1 = II->getArgOperand(1);
Value *X, *Y;
if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
II->setArgOperand(0, X);
@@ -2135,24 +2266,33 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *ExtSrc;
if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
- Value *NarrowII = Builder.CreateIntrinsic(II->getIntrinsicID(),
- { ExtSrc }, II);
+ Value *NarrowII =
+ Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), ExtSrc, II);
return new FPExtInst(NarrowII, II->getType());
}
break;
}
case Intrinsic::cos:
case Intrinsic::amdgcn_cos: {
- Value *SrcSrc;
+ Value *X;
Value *Src = II->getArgOperand(0);
- if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
- match(Src, m_FAbs(m_Value(SrcSrc)))) {
+ if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {
// cos(-x) -> cos(x)
// cos(fabs(x)) -> cos(x)
- II->setArgOperand(0, SrcSrc);
+ II->setArgOperand(0, X);
return II;
}
-
+ break;
+ }
+ case Intrinsic::sin: {
+ Value *X;
+ if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
+ // sin(-x) --> -sin(x)
+ Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
+ Instruction *FNeg = BinaryOperator::CreateFNeg(NewSin);
+ FNeg->copyFastMathFlags(II);
+ return FNeg;
+ }
break;
}
case Intrinsic::ppc_altivec_lvx:
@@ -2382,7 +2522,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx_movmsk_pd_256:
case Intrinsic::x86_avx_movmsk_ps_256:
case Intrinsic::x86_avx2_pmovmskb:
- if (Value *V = simplifyX86movmsk(*II))
+ if (Value *V = simplifyX86movmsk(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -2922,16 +3062,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx_blendv_ps_256:
case Intrinsic::x86_avx_blendv_pd_256:
case Intrinsic::x86_avx2_pblendvb: {
- // Convert blendv* to vector selects if the mask is constant.
- // This optimization is convoluted because the intrinsic is defined as
- // getting a vector of floats or doubles for the ps and pd versions.
- // FIXME: That should be changed.
-
+ // fold (blend A, A, Mask) -> A
Value *Op0 = II->getArgOperand(0);
Value *Op1 = II->getArgOperand(1);
Value *Mask = II->getArgOperand(2);
-
- // fold (blend A, A, Mask) -> A
if (Op0 == Op1)
return replaceInstUsesWith(CI, Op0);
@@ -2944,6 +3078,33 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
}
+
+ // Convert to a vector select if we can bypass casts and find a boolean
+ // vector condition value.
+ Value *BoolVec;
+ Mask = peekThroughBitcast(Mask);
+ if (match(Mask, m_SExt(m_Value(BoolVec))) &&
+ BoolVec->getType()->isVectorTy() &&
+ BoolVec->getType()->getScalarSizeInBits() == 1) {
+ assert(Mask->getType()->getPrimitiveSizeInBits() ==
+ II->getType()->getPrimitiveSizeInBits() &&
+ "Not expecting mask and operands with different sizes");
+
+ unsigned NumMaskElts = Mask->getType()->getVectorNumElements();
+ unsigned NumOperandElts = II->getType()->getVectorNumElements();
+ if (NumMaskElts == NumOperandElts)
+ return SelectInst::Create(BoolVec, Op1, Op0);
+
+ // If the mask has less elements than the operands, each mask bit maps to
+ // multiple elements of the operands. Bitcast back and forth.
+ if (NumMaskElts < NumOperandElts) {
+ Value *CastOp0 = Builder.CreateBitCast(Op0, Mask->getType());
+ Value *CastOp1 = Builder.CreateBitCast(Op1, Mask->getType());
+ Value *Sel = Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
+ return new BitCastInst(Sel, II->getType());
+ }
+ }
+
break;
}
@@ -3275,6 +3436,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, FCmp);
}
+ if (Mask == (N_ZERO | P_ZERO)) {
+ // Equivalent of == 0.
+ Value *FCmp = Builder.CreateFCmpOEQ(
+ Src0, ConstantFP::get(Src0->getType(), 0.0));
+
+ FCmp->takeName(II);
+ return replaceInstUsesWith(*II, FCmp);
+ }
+
+ // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
+ if (((Mask & S_NAN) || (Mask & Q_NAN)) && isKnownNeverNaN(Src0, &TLI)) {
+ II->setArgOperand(1, ConstantInt::get(Src1->getType(),
+ Mask & ~(S_NAN | Q_NAN)));
+ return II;
+ }
+
const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
if (!CVal) {
if (isa<UndefValue>(Src0))
@@ -3384,22 +3561,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
- // TODO: Also emit sub if only width is constant.
- if (!CWidth && COffset && Offset == 0) {
- Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
- Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
- ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
-
- Value *Shl = Builder.CreateShl(Src, ShiftVal);
- Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
- : Builder.CreateLShr(Shl, ShiftVal);
- RightShift->takeName(II);
- return replaceInstUsesWith(*II, RightShift);
- }
-
if (!CWidth || !COffset)
break;
+ // The case of Width == 0 is handled above, which makes this tranformation
+ // safe. If Width == 0, then the ashr and lshr instructions become poison
+ // value since the shift amount would be equal to the bit size.
+ assert(Width != 0);
+
// TODO: This allows folding to undef when the hardware has specific
// behavior?
if (Offset + Width < IntSize) {
@@ -3603,6 +3772,38 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
+ Type *Ty = SrcLHS->getType();
+ if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
+ // Promote to next legal integer type.
+ unsigned Width = CmpType->getBitWidth();
+ unsigned NewWidth = Width;
+
+ // Don't do anything for i1 comparisons.
+ if (Width == 1)
+ break;
+
+ if (Width <= 16)
+ NewWidth = 16;
+ else if (Width <= 32)
+ NewWidth = 32;
+ else if (Width <= 64)
+ NewWidth = 64;
+ else if (Width > 64)
+ break; // Can't handle this.
+
+ if (Width != NewWidth) {
+ IntegerType *CmpTy = Builder.getIntNTy(NewWidth);
+ if (CmpInst::isSigned(SrcPred)) {
+ SrcLHS = Builder.CreateSExt(SrcLHS, CmpTy);
+ SrcRHS = Builder.CreateSExt(SrcRHS, CmpTy);
+ } else {
+ SrcLHS = Builder.CreateZExt(SrcLHS, CmpTy);
+ SrcRHS = Builder.CreateZExt(SrcRHS, CmpTy);
+ }
+ }
+ } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
+ break;
+
Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
SrcLHS->getType());
Value *Args[] = { SrcLHS, SrcRHS,
@@ -3661,7 +3862,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Scan down this block to see if there is another stack restore in the
// same block without an intervening call/alloca.
BasicBlock::iterator BI(II);
- TerminatorInst *TI = II->getParent()->getTerminator();
+ Instruction *TI = II->getParent()->getTerminator();
bool CannotRemove = false;
for (++BI; &*BI != TI; ++BI) {
if (isa<AllocaInst>(BI)) {
@@ -3788,8 +3989,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
// isKnownNonNull -> nonnull attribute
- if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
+ if (!II->hasRetAttr(Attribute::NonNull) &&
+ isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT)) {
II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ return II;
+ }
}
// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
@@ -3889,7 +4093,11 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
auto InstCombineRAUW = [this](Instruction *From, Value *With) {
replaceInstUsesWith(*From, With);
};
- LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);
+ auto InstCombineErase = [this](Instruction *I) {
+ eraseInstFromFunction(*I);
+ };
+ LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
+ InstCombineErase);
if (Value *With = Simplifier.optimizeCall(CI)) {
++NumSimplified;
return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index fd59c3a7c0c3..1201ac196ec0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -492,12 +492,19 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) {
}
/// Rotate left/right may occur in a wider type than necessary because of type
-/// promotion rules. Try to narrow all of the component instructions.
+/// promotion rules. Try to narrow the inputs and convert to funnel shift.
Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
assert((isa<VectorType>(Trunc.getSrcTy()) ||
shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
"Don't narrow to an illegal scalar type");
+ // Bail out on strange types. It is possible to handle some of these patterns
+ // even with non-power-of-2 sizes, but it is not a likely scenario.
+ Type *DestTy = Trunc.getType();
+ unsigned NarrowWidth = DestTy->getScalarSizeInBits();
+ if (!isPowerOf2_32(NarrowWidth))
+ return nullptr;
+
// First, find an or'd pair of opposite shifts with the same shifted operand:
// trunc (or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1))
Value *Or0, *Or1;
@@ -514,22 +521,38 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
if (ShiftOpcode0 == ShiftOpcode1)
return nullptr;
- // The shift amounts must add up to the narrow bit width.
- Value *ShAmt;
- bool SubIsOnLHS;
- Type *DestTy = Trunc.getType();
- unsigned NarrowWidth = DestTy->getScalarSizeInBits();
- if (match(ShAmt0,
- m_OneUse(m_Sub(m_SpecificInt(NarrowWidth), m_Specific(ShAmt1))))) {
- ShAmt = ShAmt1;
- SubIsOnLHS = true;
- } else if (match(ShAmt1, m_OneUse(m_Sub(m_SpecificInt(NarrowWidth),
- m_Specific(ShAmt0))))) {
- ShAmt = ShAmt0;
- SubIsOnLHS = false;
- } else {
+ // Match the shift amount operands for a rotate pattern. This always matches
+ // a subtraction on the R operand.
+ auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+ // The shift amounts may add up to the narrow bit width:
+ // (shl ShVal, L) | (lshr ShVal, Width - L)
+ if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
+ return L;
+
+ // The shift amount may be masked with negation:
+ // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+ Value *X;
+ unsigned Mask = Width - 1;
+ if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
+ match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
+ return X;
+
+ // Same as above, but the shift amount may be extended after masking:
+ if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+ match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
+ return X;
+
return nullptr;
+ };
+
+ Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
+ bool SubIsOnLHS = false;
+ if (!ShAmt) {
+ ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
+ SubIsOnLHS = true;
}
+ if (!ShAmt)
+ return nullptr;
// The shifted value must have high zeros in the wide type. Typically, this
// will be a zext, but it could also be the result of an 'and' or 'shift'.
@@ -540,23 +563,15 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
// We have an unnecessarily wide rotate!
// trunc (or (lshr ShVal, ShAmt), (shl ShVal, BitWidth - ShAmt))
- // Narrow it down to eliminate the zext/trunc:
- // or (lshr trunc(ShVal), ShAmt0'), (shl trunc(ShVal), ShAmt1')
+ // Narrow the inputs and convert to funnel shift intrinsic:
+ // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
- Value *NegShAmt = Builder.CreateNeg(NarrowShAmt);
-
- // Mask both shift amounts to ensure there's no UB from oversized shifts.
- Constant *MaskC = ConstantInt::get(DestTy, NarrowWidth - 1);
- Value *MaskedShAmt = Builder.CreateAnd(NarrowShAmt, MaskC);
- Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, MaskC);
-
- // Truncate the original value and use narrow ops.
Value *X = Builder.CreateTrunc(ShVal, DestTy);
- Value *NarrowShAmt0 = SubIsOnLHS ? MaskedNegShAmt : MaskedShAmt;
- Value *NarrowShAmt1 = SubIsOnLHS ? MaskedShAmt : MaskedNegShAmt;
- Value *NarrowSh0 = Builder.CreateBinOp(ShiftOpcode0, X, NarrowShAmt0);
- Value *NarrowSh1 = Builder.CreateBinOp(ShiftOpcode1, X, NarrowShAmt1);
- return BinaryOperator::CreateOr(NarrowSh0, NarrowSh1);
+ bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
+ (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
+ Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+ Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
+ return IntrinsicInst::Create(F, { X, X, NarrowShAmt });
}
/// Try to narrow the width of math or bitwise logic instructions by pulling a
@@ -706,12 +721,35 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (SimplifyDemandedInstructionBits(CI))
return &CI;
- // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
if (DestTy->getScalarSizeInBits() == 1) {
- Constant *One = ConstantInt::get(SrcTy, 1);
- Src = Builder.CreateAnd(Src, One);
Value *Zero = Constant::getNullValue(Src->getType());
- return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+ if (DestTy->isIntegerTy()) {
+ // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
+ // TODO: We canonicalize to more instructions here because we are probably
+ // lacking equivalent analysis for trunc relative to icmp. There may also
+ // be codegen concerns. If those trunc limitations were removed, we could
+ // remove this transform.
+ Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
+ return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+ }
+
+ // For vectors, we do not canonicalize all truncs to icmp, so optimize
+ // patterns that would be covered within visitICmpInst.
+ Value *X;
+ const APInt *C;
+ if (match(Src, m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
+ // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
+ APInt MaskC = APInt(SrcTy->getScalarSizeInBits(), 1).shl(*C);
+ Value *And = Builder.CreateAnd(X, ConstantInt::get(SrcTy, MaskC));
+ return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+ }
+ if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_APInt(C)),
+ m_Deferred(X))))) {
+ // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
+ APInt MaskC = APInt(SrcTy->getScalarSizeInBits(), 1).shl(*C) | 1;
+ Value *And = Builder.CreateAnd(X, ConstantInt::get(SrcTy, MaskC));
+ return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+ }
}
// FIXME: Maybe combine the next two transforms to handle the no cast case
@@ -1061,12 +1099,9 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
- // Attempt to extend the entire input expression tree to the destination
- // type. Only do this if the dest type is a simple type, don't convert the
- // expression tree to something weird like i93 unless the source is also
- // strange.
+ // Try to extend the entire expression tree to the wide destination type.
unsigned BitsToClear;
- if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
+ if (shouldChangeType(SrcTy, DestTy) &&
canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
"Can't clear more bits than in SrcTy");
@@ -1343,12 +1378,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
return replaceInstUsesWith(CI, ZExt);
}
- // Attempt to extend the entire input expression tree to the destination
- // type. Only do this if the dest type is a simple type, don't convert the
- // expression tree to something weird like i93 unless the source is also
- // strange.
- if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
- canEvaluateSExtd(Src, DestTy)) {
+ // Try to extend the entire expression tree to the wide destination type.
+ if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
LLVM_DEBUG(
dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1589,8 +1620,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
}
// (fptrunc (fneg x)) -> (fneg (fptrunc x))
- if (BinaryOperator::isFNeg(OpI)) {
- Value *InnerTrunc = Builder.CreateFPTrunc(OpI->getOperand(1), Ty);
+ Value *X;
+ if (match(OpI, m_FNeg(m_Value(X)))) {
+ Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
return BinaryOperator::CreateFNegFMF(InnerTrunc, OpI);
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e1bae11b40d1..b5bbb09935e2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -522,11 +522,9 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
}
// Otherwise, there is an index. The computation we will do will be modulo
- // the pointer size, so get it.
- uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-
- Offset &= PtrSizeMask;
- VariableScale &= PtrSizeMask;
+ // the pointer size.
+ Offset = SignExtend64(Offset, IntPtrWidth);
+ VariableScale = SignExtend64(VariableScale, IntPtrWidth);
// To do this transformation, any constant index must be a multiple of the
// variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
@@ -909,7 +907,8 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
// If all indices are the same, just compare the base pointers.
- if (IndicesTheSame)
+ Type *BaseType = GEPLHS->getOperand(0)->getType();
+ if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
// If we're comparing GEPs with two base pointers that only differ in type
@@ -976,7 +975,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
if (NumDifferences == 0) // SAME GEP?
return replaceInstUsesWith(I, // No comparison is needed here.
- Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond)));
+ ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond)));
else if (NumDifferences == 1 && GEPsInBounds) {
Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -1079,19 +1078,20 @@ Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
}
-/// Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
+/// Fold "icmp pred (X+C), X".
+Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C,
ICmpInst::Predicate Pred) {
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
// operators.
+ assert(!!C && "C should not be zero!");
// (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
// (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
// (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
- Value *R =
- ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+ Constant *R = ConstantInt::get(X->getType(),
+ APInt::getMaxValue(C.getBitWidth()) - C);
return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
}
@@ -1099,11 +1099,10 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
// (X+2) >u X --> X <u (0-2) --> X <u 254
// (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
- return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+ return new ICmpInst(ICmpInst::ICMP_ULT, X,
+ ConstantInt::get(X->getType(), -C));
- unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
- ConstantInt *SMax = ConstantInt::get(X->getContext(),
- APInt::getSignedMaxValue(BitWidth));
+ APInt SMax = APInt::getSignedMaxValue(C.getBitWidth());
// (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
// (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
@@ -1112,7 +1111,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
// (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
// (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
- return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+ return new ICmpInst(ICmpInst::ICMP_SGT, X,
+ ConstantInt::get(X->getType(), SMax - C));
// (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
// (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
@@ -1122,8 +1122,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
- Constant *C = Builder.getInt(CI->getValue() - 1);
- return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+ return new ICmpInst(ICmpInst::ICMP_SLT, X,
+ ConstantInt::get(X->getType(), SMax - (C - 1)));
}
/// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
@@ -1333,17 +1333,12 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
return nullptr;
}
-// Fold icmp Pred X, C.
+/// Fold icmp Pred X, C.
+/// TODO: This code structure does not make sense. The saturating add fold
+/// should be moved to some other helper and extended as noted below (it is also
+/// possible that code has been made unnecessary - do we canonicalize IR to
+/// overflow/saturating intrinsics or not?).
Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
- CmpInst::Predicate Pred = Cmp.getPredicate();
- Value *X = Cmp.getOperand(0);
-
- const APInt *C;
- if (!match(Cmp.getOperand(1), m_APInt(C)))
- return nullptr;
-
- Value *A = nullptr, *B = nullptr;
-
// Match the following pattern, which is a common idiom when writing
// overflow-safe integer arithmetic functions. The source performs an addition
// in wider type and explicitly checks for overflow using comparisons against
@@ -1355,37 +1350,62 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
//
// sum = a + b
// if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
- {
- ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
- if (Pred == ICmpInst::ICMP_UGT &&
- match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
- if (Instruction *Res = processUGT_ADDCST_ADD(
- Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this))
- return Res;
- }
+ CmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1);
+ Value *A, *B;
+ ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+ if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) &&
+ match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+ if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this))
+ return Res;
+
+ return nullptr;
+}
- // FIXME: Use m_APInt to allow folds for splat constants.
- ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1));
- if (!CI)
+/// Canonicalize icmp instructions based on dominating conditions.
+Instruction *InstCombiner::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
+ // This is a cheap/incomplete check for dominance - just match a single
+ // predecessor with a conditional branch.
+ BasicBlock *CmpBB = Cmp.getParent();
+ BasicBlock *DomBB = CmpBB->getSinglePredecessor();
+ if (!DomBB)
return nullptr;
- // Canonicalize icmp instructions based on dominating conditions.
- BasicBlock *Parent = Cmp.getParent();
- BasicBlock *Dom = Parent->getSinglePredecessor();
- auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
- ICmpInst::Predicate Pred2;
+ Value *DomCond;
BasicBlock *TrueBB, *FalseBB;
- ConstantInt *CI2;
- if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)),
- TrueBB, FalseBB)) &&
- TrueBB != FalseBB) {
- ConstantRange CR =
- ConstantRange::makeAllowedICmpRegion(Pred, CI->getValue());
+ if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
+ return nullptr;
+
+ assert((TrueBB == CmpBB || FalseBB == CmpBB) &&
+ "Predecessor block does not point to successor?");
+
+ // The branch should get simplified. Don't bother simplifying this condition.
+ if (TrueBB == FalseBB)
+ return nullptr;
+
+ // Try to simplify this compare to T/F based on the dominating condition.
+ Optional<bool> Imp = isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB);
+ if (Imp)
+ return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp));
+
+ CmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
+ ICmpInst::Predicate DomPred;
+ const APInt *C, *DomC;
+ if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) &&
+ match(Y, m_APInt(C))) {
+ // We have 2 compares of a variable with constants. Calculate the constant
+ // ranges of those compares to see if we can transform the 2nd compare:
+ // DomBB:
+ // DomCond = icmp DomPred X, DomC
+ // br DomCond, CmpBB, FalseBB
+ // CmpBB:
+ // Cmp = icmp Pred X, C
+ ConstantRange CR = ConstantRange::makeAllowedICmpRegion(Pred, *C);
ConstantRange DominatingCR =
- (Parent == TrueBB)
- ? ConstantRange::makeExactICmpRegion(Pred2, CI2->getValue())
- : ConstantRange::makeExactICmpRegion(
- CmpInst::getInversePredicate(Pred2), CI2->getValue());
+ (CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC)
+ : ConstantRange::makeExactICmpRegion(
+ CmpInst::getInversePredicate(DomPred), *DomC);
ConstantRange Intersection = DominatingCR.intersectWith(CR);
ConstantRange Difference = DominatingCR.difference(CR);
if (Intersection.isEmptySet())
@@ -1393,23 +1413,20 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
if (Difference.isEmptySet())
return replaceInstUsesWith(Cmp, Builder.getTrue());
- // If this is a normal comparison, it demands all bits. If it is a sign
- // bit comparison, it only demands the sign bit.
- bool UnusedBit;
- bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit);
-
// Canonicalizing a sign bit comparison that gets used in a branch,
// pessimizes codegen by generating branch on zero instruction instead
// of a test and branch. So we avoid canonicalizing in such situations
// because test and branch instruction has better branch displacement
// than compare and branch instruction.
+ bool UnusedBit;
+ bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
return nullptr;
- if (auto *AI = Intersection.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
- if (auto *AD = Difference.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
+ if (const APInt *EqC = Intersection.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
+ if (const APInt *NeC = Difference.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
}
return nullptr;
@@ -1498,16 +1515,25 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
}
}
- // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
- // iff -C is a power of 2
- if (Pred == ICmpInst::ICMP_UGT && *XorC == ~C && (C + 1).isPowerOf2())
- return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
-
- // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
- // iff -C is a power of 2
- if (Pred == ICmpInst::ICMP_ULT && *XorC == -C && C.isPowerOf2())
- return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
-
+ // Mask constant magic can eliminate an 'xor' with unsigned compares.
+ if (Pred == ICmpInst::ICMP_UGT) {
+ // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
+ if (*XorC == ~C && (C + 1).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
+ // (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
+ if (*XorC == C && (C + 1).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
+ }
+ if (Pred == ICmpInst::ICMP_ULT) {
+ // (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
+ if (*XorC == -C && C.isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGT, X,
+ ConstantInt::get(X->getType(), ~C));
+ // (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
+ if (*XorC == C && (-C).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGT, X,
+ ConstantInt::get(X->getType(), ~C));
+ }
return nullptr;
}
@@ -1598,6 +1624,13 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
BinaryOperator *And,
const APInt &C1) {
+ // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
+ // TODO: We canonicalize to the longer form for scalars because we have
+ // better analysis/folds for icmp, and codegen may be better with icmp.
+ if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() &&
+ C1.isNullValue() && match(And->getOperand(1), m_One()))
+ return new TruncInst(And->getOperand(0), Cmp.getType());
+
const APInt *C2;
if (!match(And->getOperand(1), m_APInt(C2)))
return nullptr;
@@ -2336,13 +2369,19 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
Type *Ty = Add->getType();
CmpInst::Predicate Pred = Cmp.getPredicate();
+ if (!Add->hasOneUse())
+ return nullptr;
+
// If the add does not wrap, we can always adjust the compare by subtracting
- // the constants. Equality comparisons are handled elsewhere. SGE/SLE are
- // canonicalized to SGT/SLT.
- if (Add->hasNoSignedWrap() &&
- (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) {
+ // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
+ // are canonicalized to SGT/SLT/UGT/ULT.
+ if ((Add->hasNoSignedWrap() &&
+ (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
+ (Add->hasNoUnsignedWrap() &&
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
bool Overflow;
- APInt NewC = C.ssub_ov(*C2, Overflow);
+ APInt NewC =
+ Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow);
// If there is overflow, the result must be true or false.
// TODO: Can we assert there is no overflow because InstSimplify always
// handles those cases?
@@ -2366,9 +2405,6 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
}
- if (!Add->hasOneUse())
- return nullptr;
-
// X+C <u C2 -> (X & -C2) == C
// iff C & (C2-1) == 0
// C2 is a power of 2
@@ -2729,6 +2765,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
// Handle icmp {eq|ne} <intrinsic>, Constant.
Type *Ty = II->getType();
+ unsigned BitWidth = C.getBitWidth();
switch (II->getIntrinsicID()) {
case Intrinsic::bswap:
Worklist.Add(II);
@@ -2737,21 +2774,39 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
return &Cmp;
case Intrinsic::ctlz:
- case Intrinsic::cttz:
+ case Intrinsic::cttz: {
// ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
- if (C == C.getBitWidth()) {
+ if (C == BitWidth) {
Worklist.Add(II);
Cmp.setOperand(0, II->getArgOperand(0));
Cmp.setOperand(1, ConstantInt::getNullValue(Ty));
return &Cmp;
}
+
+ // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
+ // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits.
+ // Limit to one use to ensure we don't increase instruction count.
+ unsigned Num = C.getLimitedValue(BitWidth);
+ if (Num != BitWidth && II->hasOneUse()) {
+ bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz;
+ APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1)
+ : APInt::getHighBitsSet(BitWidth, Num + 1);
+ APInt Mask2 = IsTrailing
+ ? APInt::getOneBitSet(BitWidth, Num)
+ : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
+ Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask1));
+ Cmp.setOperand(1, ConstantInt::get(Ty, Mask2));
+ Worklist.Add(II);
+ return &Cmp;
+ }
break;
+ }
case Intrinsic::ctpop: {
// popcount(A) == 0 -> A == 0 and likewise for !=
// popcount(A) == bitwidth(A) -> A == -1 and likewise for !=
bool IsZero = C.isNullValue();
- if (IsZero || C == C.getBitWidth()) {
+ if (IsZero || C == BitWidth) {
Worklist.Add(II);
Cmp.setOperand(0, II->getArgOperand(0));
auto *NewOp =
@@ -2870,15 +2925,25 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
/// In this case, we are looking for comparisons that look like
/// a check for a lossy truncation.
/// Folds:
-/// x & (-1 >> y) SrcPred x to x DstPred (-1 >> y)
+/// icmp SrcPred (x & Mask), x to icmp DstPred x, Mask
+/// Where Mask is some pattern that produces all-ones in low bits:
+/// (-1 >> y)
+/// ((-1 << y) >> y) <- non-canonical, has extra uses
+/// ~(-1 << y)
+/// ((1 << y) + (-1)) <- non-canonical, has extra uses
/// The Mask can be a constant, too.
/// For some predicates, the operands are commutative.
/// For others, x can only be on a specific side.
static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
InstCombiner::BuilderTy &Builder) {
ICmpInst::Predicate SrcPred;
- Value *X, *M;
- auto m_Mask = m_CombineOr(m_LShr(m_AllOnes(), m_Value()), m_LowBitMask());
+ Value *X, *M, *Y;
+ auto m_VariableMask = m_CombineOr(
+ m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())),
+ m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
+ m_CombineOr(m_LShr(m_AllOnes(), m_Value()),
+ m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
+ auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
if (!match(&I, m_c_ICmp(SrcPred,
m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
m_Deferred(X))))
@@ -3042,6 +3107,18 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
return nullptr;
const CmpInst::Predicate Pred = I.getPredicate();
+ Value *X;
+
+ // Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
+ // (Op1 + X) <u Op1 --> ~Op1 <u X
+ // Op0 >u (Op0 + X) --> X >u ~Op0
+ if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
+ Pred == ICmpInst::ICMP_ULT)
+ return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
+ if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
+ Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
+
bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
if (BO0 && isa<OverflowingBinaryOperator>(BO0))
NoOp0WrapProblem =
@@ -4606,6 +4683,83 @@ static Instruction *canonicalizeICmpBool(ICmpInst &I,
}
}
+// Transform pattern like:
+// (1 << Y) u<= X or ~(-1 << Y) u< X or ((1 << Y)+(-1)) u< X
+// (1 << Y) u> X or ~(-1 << Y) u>= X or ((1 << Y)+(-1)) u>= X
+// Into:
+// (X l>> Y) != 0
+// (X l>> Y) == 0
+static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp,
+ InstCombiner::BuilderTy &Builder) {
+ ICmpInst::Predicate Pred, NewPred;
+ Value *X, *Y;
+ if (match(&Cmp,
+ m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) {
+ // We want X to be the icmp's second operand, so swap predicate if it isn't.
+ if (Cmp.getOperand(0) == X)
+ Pred = Cmp.getSwappedPredicate();
+
+ switch (Pred) {
+ case ICmpInst::ICMP_ULE:
+ NewPred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_UGT:
+ NewPred = ICmpInst::ICMP_EQ;
+ break;
+ default:
+ return nullptr;
+ }
+ } else if (match(&Cmp, m_c_ICmp(Pred,
+ m_OneUse(m_CombineOr(
+ m_Not(m_Shl(m_AllOnes(), m_Value(Y))),
+ m_Add(m_Shl(m_One(), m_Value(Y)),
+ m_AllOnes()))),
+ m_Value(X)))) {
+ // The variant with 'add' is not canonical, (the variant with 'not' is)
+ // we only get it because it has extra uses, and can't be canonicalized,
+
+ // We want X to be the icmp's second operand, so swap predicate if it isn't.
+ if (Cmp.getOperand(0) == X)
+ Pred = Cmp.getSwappedPredicate();
+
+ switch (Pred) {
+ case ICmpInst::ICMP_ULT:
+ NewPred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_UGE:
+ NewPred = ICmpInst::ICMP_EQ;
+ break;
+ default:
+ return nullptr;
+ }
+ } else
+ return nullptr;
+
+ Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits");
+ Constant *Zero = Constant::getNullValue(NewX->getType());
+ return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero);
+}
+
+static Instruction *foldVectorCmp(CmpInst &Cmp,
+ InstCombiner::BuilderTy &Builder) {
+ // If both arguments of the cmp are shuffles that use the same mask and
+ // shuffle within a single vector, move the shuffle after the cmp.
+ Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
+ Value *V1, *V2;
+ Constant *M;
+ if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(M))) &&
+ match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(M))) &&
+ V1->getType() == V2->getType() &&
+ (LHS->hasOneUse() || RHS->hasOneUse())) {
+ // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M
+ CmpInst::Predicate P = Cmp.getPredicate();
+ Value *NewCmp = isa<ICmpInst>(Cmp) ? Builder.CreateICmp(P, V1, V2)
+ : Builder.CreateFCmp(P, V1, V2);
+ return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M);
+ }
+ return nullptr;
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -4653,6 +4807,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpWithConstant(I))
return Res;
+ if (Instruction *Res = foldICmpWithDominatingICmp(I))
+ return Res;
+
if (Instruction *Res = foldICmpUsingKnownBits(I))
return Res;
@@ -4865,16 +5022,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return ExtractValueInst::Create(ACXI, 1);
{
- Value *X; ConstantInt *Cst;
+ Value *X;
+ const APInt *C;
// icmp X+Cst, X
- if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
- return foldICmpAddOpConst(X, Cst, I.getPredicate());
+ if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
+ return foldICmpAddOpConst(X, *C, I.getPredicate());
// icmp X, X+Cst
- if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
- return foldICmpAddOpConst(X, Cst, I.getSwappedPredicate());
+ if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
+ return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
}
+ if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
+ return Res;
+
+ if (I.getType()->isVectorTy())
+ if (Instruction *Res = foldVectorCmp(I, Builder))
+ return Res;
+
return Changed ? &I : nullptr;
}
@@ -5117,6 +5282,117 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
}
+/// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary.
+static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC) {
+ // When C is not 0.0 and infinities are not allowed:
+ // (C / X) < 0.0 is a sign-bit test of X
+ // (C / X) < 0.0 --> X < 0.0 (if C is positive)
+ // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate)
+ //
+ // Proof:
+ // Multiply (C / X) < 0.0 by X * X / C.
+ // - X is non zero, if it is the flag 'ninf' is violated.
+ // - C defines the sign of X * X * C. Thus it also defines whether to swap
+ // the predicate. C is also non zero by definition.
+ //
+ // Thus X * X / C is non zero and the transformation is valid. [qed]
+
+ FCmpInst::Predicate Pred = I.getPredicate();
+
+ // Check that predicates are valid.
+ if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
+ (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
+ return nullptr;
+
+ // Check that RHS operand is zero.
+ if (!match(RHSC, m_AnyZeroFP()))
+ return nullptr;
+
+ // Check fastmath flags ('ninf').
+ if (!LHSI->hasNoInfs() || !I.hasNoInfs())
+ return nullptr;
+
+ // Check the properties of the dividend. It must not be zero to avoid a
+ // division by zero (see Proof).
+ const APFloat *C;
+ if (!match(LHSI->getOperand(0), m_APFloat(C)))
+ return nullptr;
+
+ if (C->isZero())
+ return nullptr;
+
+ // Get swapped predicate if necessary.
+ if (C->isNegative())
+ Pred = I.getSwappedPredicate();
+
+ return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I);
+}
+
+/// Optimize fabs(X) compared with zero.
+static Instruction *foldFabsWithFcmpZero(FCmpInst &I) {
+ Value *X;
+ if (!match(I.getOperand(0), m_Intrinsic<Intrinsic::fabs>(m_Value(X))) ||
+ !match(I.getOperand(1), m_PosZeroFP()))
+ return nullptr;
+
+ auto replacePredAndOp0 = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
+ I->setPredicate(P);
+ I->setOperand(0, X);
+ return I;
+ };
+
+ switch (I.getPredicate()) {
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OLT:
+ // fabs(X) >= 0.0 --> true
+ // fabs(X) < 0.0 --> false
+ llvm_unreachable("fcmp should have simplified");
+
+ case FCmpInst::FCMP_OGT:
+ // fabs(X) > 0.0 --> X != 0.0
+ return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X);
+
+ case FCmpInst::FCMP_UGT:
+ // fabs(X) u> 0.0 --> X u!= 0.0
+ return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X);
+
+ case FCmpInst::FCMP_OLE:
+ // fabs(X) <= 0.0 --> X == 0.0
+ return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X);
+
+ case FCmpInst::FCMP_ULE:
+ // fabs(X) u<= 0.0 --> X u== 0.0
+ return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X);
+
+ case FCmpInst::FCMP_OGE:
+ // fabs(X) >= 0.0 --> !isnan(X)
+ assert(!I.hasNoNaNs() && "fcmp should have simplified");
+ return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X);
+
+ case FCmpInst::FCMP_ULT:
+ // fabs(X) u< 0.0 --> isnan(X)
+ assert(!I.hasNoNaNs() && "fcmp should have simplified");
+ return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X);
+
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ORD:
+ case FCmpInst::FCMP_UNO:
+ // Look through the fabs() because it doesn't change anything but the sign.
+ // fabs(X) == 0.0 --> X == 0.0,
+ // fabs(X) != 0.0 --> X != 0.0
+ // isnan(fabs(X)) --> isnan(X)
+ // !isnan(fabs(X) --> !isnan(X)
+ return replacePredAndOp0(&I, I.getPredicate(), X);
+
+ default:
+ return nullptr;
+ }
+}
+
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
bool Changed = false;
@@ -5161,11 +5437,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
// then canonicalize the operand to 0.0.
if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
- if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0)) {
+ if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI)) {
I.setOperand(0, ConstantFP::getNullValue(Op0->getType()));
return &I;
}
- if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1)) {
+ if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI)) {
I.setOperand(1, ConstantFP::getNullValue(Op0->getType()));
return &I;
}
@@ -5186,128 +5462,93 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return nullptr;
}
- // Handle fcmp with constant RHS
- if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
- if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
- switch (LHSI->getOpcode()) {
- case Instruction::FPExt: {
- // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
- FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
- ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
- if (!RHSF)
- break;
-
- const fltSemantics *Sem;
- // FIXME: This shouldn't be here.
- if (LHSExt->getSrcTy()->isHalfTy())
- Sem = &APFloat::IEEEhalf();
- else if (LHSExt->getSrcTy()->isFloatTy())
- Sem = &APFloat::IEEEsingle();
- else if (LHSExt->getSrcTy()->isDoubleTy())
- Sem = &APFloat::IEEEdouble();
- else if (LHSExt->getSrcTy()->isFP128Ty())
- Sem = &APFloat::IEEEquad();
- else if (LHSExt->getSrcTy()->isX86_FP80Ty())
- Sem = &APFloat::x87DoubleExtended();
- else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
- Sem = &APFloat::PPCDoubleDouble();
- else
- break;
-
- bool Lossy;
- APFloat F = RHSF->getValueAPF();
- F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
-
- // Avoid lossy conversions and denormals. Zero is a special case
- // that's OK to convert.
- APFloat Fabs = F;
- Fabs.clearSign();
- if (!Lossy &&
- ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
- APFloat::cmpLessThan) || Fabs.isZero()))
-
- return new FCmpInst(Pred, LHSExt->getOperand(0),
- ConstantFP::get(RHSC->getContext(), F));
- break;
- }
- case Instruction::PHI:
- // Only fold fcmp into the PHI if the phi and fcmp are in the same
- // block. If in the same block, we're encouraging jump threading. If
- // not, we are just pessimizing the code by making an i1 phi.
- if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
- return NV;
- break;
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
+ // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
+ // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
+ if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) {
+ I.setOperand(1, ConstantFP::getNullValue(Op1->getType()));
+ return &I;
+ }
+
+ // Handle fcmp with instruction LHS and constant RHS.
+ Instruction *LHSI;
+ Constant *RHSC;
+ if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) {
+ switch (LHSI->getOpcode()) {
+ case Instruction::PHI:
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
return NV;
- break;
- case Instruction::FSub: {
- // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
- Value *Op;
- if (match(LHSI, m_FNeg(m_Value(Op))))
- return new FCmpInst(I.getSwappedPredicate(), Op,
- ConstantExpr::getFNeg(RHSC));
- break;
- }
- case Instruction::Load:
- if (GetElementPtrInst *GEP =
- dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
- !cast<LoadInst>(LHSI)->isVolatile())
- if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
- return Res;
- }
- break;
- case Instruction::Call: {
- if (!RHSC->isNullValue())
- break;
+ break;
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::FDiv:
+ if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::Load:
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
+ if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ break;
+ }
+ }
- CallInst *CI = cast<CallInst>(LHSI);
- Intrinsic::ID IID = getIntrinsicForCallSite(CI, &TLI);
- if (IID != Intrinsic::fabs)
- break;
+ if (Instruction *R = foldFabsWithFcmpZero(I))
+ return R;
- // Various optimization for fabs compared with zero.
- switch (Pred) {
- default:
- break;
- // fabs(x) < 0 --> false
- case FCmpInst::FCMP_OLT:
- llvm_unreachable("handled by SimplifyFCmpInst");
- // fabs(x) > 0 --> x != 0
- case FCmpInst::FCMP_OGT:
- return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC);
- // fabs(x) <= 0 --> x == 0
- case FCmpInst::FCMP_OLE:
- return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC);
- // fabs(x) >= 0 --> !isnan(x)
- case FCmpInst::FCMP_OGE:
- return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC);
- // fabs(x) == 0 --> x == 0
- // fabs(x) != 0 --> x != 0
- case FCmpInst::FCMP_OEQ:
- case FCmpInst::FCMP_UEQ:
- case FCmpInst::FCMP_ONE:
- case FCmpInst::FCMP_UNE:
- return new FCmpInst(Pred, CI->getArgOperand(0), RHSC);
- }
- }
+ Value *X, *Y;
+ if (match(Op0, m_FNeg(m_Value(X)))) {
+ // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
+ if (match(Op1, m_FNeg(m_Value(Y))))
+ return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
+
+ // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
+ Constant *C;
+ if (match(Op1, m_Constant(C))) {
+ Constant *NegC = ConstantExpr::getFNeg(C);
+ return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I);
+ }
+ }
+
+ if (match(Op0, m_FPExt(m_Value(X)))) {
+ // fcmp (fpext X), (fpext Y) -> fcmp X, Y
+ if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType())
+ return new FCmpInst(Pred, X, Y, "", &I);
+
+ // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless
+ const APFloat *C;
+ if (match(Op1, m_APFloat(C))) {
+ const fltSemantics &FPSem =
+ X->getType()->getScalarType()->getFltSemantics();
+ bool Lossy;
+ APFloat TruncC = *C;
+ TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy);
+
+ // Avoid lossy conversions and denormals.
+ // Zero is a special case that's OK to convert.
+ APFloat Fabs = TruncC;
+ Fabs.clearSign();
+ if (!Lossy &&
+ ((Fabs.compare(APFloat::getSmallestNormalized(FPSem)) !=
+ APFloat::cmpLessThan) || Fabs.isZero())) {
+ Constant *NewC = ConstantFP::get(X->getType(), TruncC);
+ return new FCmpInst(Pred, X, NewC, "", &I);
}
+ }
}
- // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
- Value *X, *Y;
- if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
- return new FCmpInst(I.getSwappedPredicate(), X, Y);
-
- // fcmp (fpext x), (fpext y) -> fcmp x, y
- if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
- if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
- if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
- return new FCmpInst(Pred, LHSExt->getOperand(0), RHSExt->getOperand(0));
+ if (I.getType()->isVectorTy())
+ if (Instruction *Res = foldVectorCmp(I, Builder))
+ return Res;
return Changed ? &I : nullptr;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 58ef3d41415c..2de41bd5bef5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -20,7 +20,6 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetFolder.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -33,6 +32,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
@@ -41,11 +41,14 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#define DEBUG_TYPE "instcombine"
+using namespace llvm::PatternMatch;
+
namespace llvm {
class APInt;
@@ -79,8 +82,8 @@ class User;
/// 5 -> Other instructions
static inline unsigned getComplexity(Value *V) {
if (isa<Instruction>(V)) {
- if (isa<CastInst>(V) || BinaryOperator::isNeg(V) ||
- BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V))
+ if (isa<CastInst>(V) || match(V, m_Neg(m_Value())) ||
+ match(V, m_Not(m_Value())) || match(V, m_FNeg(m_Value())))
return 4;
return 5;
}
@@ -138,7 +141,7 @@ static inline Constant *SubOne(Constant *C) {
/// uses of V and only keep uses of ~V.
static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
// ~(~(X)) -> X.
- if (BinaryOperator::isNot(V))
+ if (match(V, m_Not(m_Value())))
return true;
// Constants can be considered to be not'ed values.
@@ -175,6 +178,10 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
if (isa<Constant>(BO->getOperand(0)) || isa<Constant>(BO->getOperand(1)))
return WillInvertAllUses;
+ // Selects with invertible operands are freely invertible
+ if (match(V, m_Select(m_Value(), m_Not(m_Value()), m_Not(m_Value()))))
+ return WillInvertAllUses;
+
return false;
}
@@ -496,6 +503,12 @@ private:
OverflowResult::NeverOverflows;
}
+ bool willNotOverflowAdd(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI, bool IsSigned) const {
+ return IsSigned ? willNotOverflowSignedAdd(LHS, RHS, CxtI)
+ : willNotOverflowUnsignedAdd(LHS, RHS, CxtI);
+ }
+
bool willNotOverflowSignedSub(const Value *LHS, const Value *RHS,
const Instruction &CxtI) const {
return computeOverflowForSignedSub(LHS, RHS, &CxtI) ==
@@ -508,6 +521,12 @@ private:
OverflowResult::NeverOverflows;
}
+ bool willNotOverflowSub(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI, bool IsSigned) const {
+ return IsSigned ? willNotOverflowSignedSub(LHS, RHS, CxtI)
+ : willNotOverflowUnsignedSub(LHS, RHS, CxtI);
+ }
+
bool willNotOverflowSignedMul(const Value *LHS, const Value *RHS,
const Instruction &CxtI) const {
return computeOverflowForSignedMul(LHS, RHS, &CxtI) ==
@@ -520,12 +539,29 @@ private:
OverflowResult::NeverOverflows;
}
+ bool willNotOverflowMul(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI, bool IsSigned) const {
+ return IsSigned ? willNotOverflowSignedMul(LHS, RHS, CxtI)
+ : willNotOverflowUnsignedMul(LHS, RHS, CxtI);
+ }
+
+ bool willNotOverflow(BinaryOperator::BinaryOps Opcode, const Value *LHS,
+ const Value *RHS, const Instruction &CxtI,
+ bool IsSigned) const {
+ switch (Opcode) {
+ case Instruction::Add: return willNotOverflowAdd(LHS, RHS, CxtI, IsSigned);
+ case Instruction::Sub: return willNotOverflowSub(LHS, RHS, CxtI, IsSigned);
+ case Instruction::Mul: return willNotOverflowMul(LHS, RHS, CxtI, IsSigned);
+ default: llvm_unreachable("Unexpected opcode for overflow query");
+ }
+ }
+
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
- Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
Instruction *narrowBinOp(TruncInst &Trunc);
Instruction *narrowMaskedBinOp(BinaryOperator &And);
+ Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
Instruction *narrowRotate(TruncInst &Trunc);
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
@@ -553,6 +589,9 @@ private:
Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
bool JoinedByAnd, Instruction &CxtI);
+ Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D);
+ Value *getSelectCondition(Value *A, Value *B);
+
public:
/// Inserts an instruction \p New before instruction \p Old
///
@@ -763,13 +802,14 @@ private:
Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DmaskIdx = -1);
+ int DmaskIdx = -1,
+ int TFCIdx = -1);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0);
/// Canonicalize the position of binops relative to shufflevector.
- Instruction *foldShuffledBinop(BinaryOperator &Inst);
+ Instruction *foldVectorBinop(BinaryOperator &Inst);
/// Given a binary operator, cast instruction, or select which has a PHI node
/// as operand #0, see if we can fold the instruction into the PHI (which is
@@ -813,11 +853,12 @@ private:
ConstantInt *AndCst = nullptr);
Instruction *foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
Constant *RHSC);
- Instruction *foldICmpAddOpConst(Value *X, ConstantInt *CI,
+ Instruction *foldICmpAddOpConst(Value *X, const APInt &C,
ICmpInst::Predicate Pred);
Instruction *foldICmpWithCastAndCast(ICmpInst &ICI);
Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
+ Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
Instruction *foldICmpWithConstant(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
@@ -880,8 +921,11 @@ private:
Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
bool isSigned, bool Inside);
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
- Instruction *MatchBSwap(BinaryOperator &I);
- bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+ bool mergeStoreIntoSuccessor(StoreInst &SI);
+
+ /// Given an 'or' instruction, check to see if it is part of a bswap idiom.
+ /// If so, return the equivalent bswap intrinsic.
+ Instruction *matchBSwap(BinaryOperator &Or);
Instruction *SimplifyAnyMemTransfer(AnyMemTransferInst *MI);
Instruction *SimplifyAnyMemSet(AnyMemSetInst *MI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 62769f077b47..76ab614090fa 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -19,6 +19,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
@@ -115,13 +116,10 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
}
// Lifetime intrinsics can be handled by the caller.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- assert(II->use_empty() && "Lifetime markers have no result to use!");
- ToDelete.push_back(II);
- continue;
- }
+ if (I->isLifetimeStartOrEnd()) {
+ assert(I->use_empty() && "Lifetime markers have no result to use!");
+ ToDelete.push_back(I);
+ continue;
}
// If this is isn't our memcpy/memmove, reject it as something we can't
@@ -197,30 +195,32 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
- New->setAlignment(AI.getAlignment());
-
- // Scan to the end of the allocation instructions, to skip over a block of
- // allocas if possible...also skip interleaved debug info
- //
- BasicBlock::iterator It(New);
- while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
- ++It;
-
- // Now that I is pointing to the first non-allocation-inst in the block,
- // insert our getelementptr instruction...
- //
- Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
- Value *NullIdx = Constant::getNullValue(IdxTy);
- Value *Idx[2] = {NullIdx, NullIdx};
- Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
- IC.InsertNewInstBefore(GEP, *It);
-
- // Now make everything use the getelementptr instead of the original
- // allocation.
- return IC.replaceInstUsesWith(AI, GEP);
+ if (C->getValue().getActiveBits() <= 64) {
+ Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It(New);
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+ ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = {NullIdx, NullIdx};
+ Instruction *GEP =
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
+ IC.InsertNewInstBefore(GEP, *It);
+
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return IC.replaceInstUsesWith(AI, GEP);
+ }
}
if (isa<UndefValue>(AI.getArraySize()))
@@ -490,6 +490,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
case LLVMContext::MD_mem_parallel_loop_access:
+ case LLVMContext::MD_access_group:
// All of these directly apply.
NewLoad->setMetadata(ID, N);
break;
@@ -549,10 +550,10 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
case LLVMContext::MD_mem_parallel_loop_access:
+ case LLVMContext::MD_access_group:
// All of these directly apply.
NewStore->setMetadata(ID, N);
break;
-
case LLVMContext::MD_invariant_load:
case LLVMContext::MD_nonnull:
case LLVMContext::MD_range:
@@ -1024,7 +1025,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (Value *AvailableVal = FindAvailableLoadedValue(
&LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
if (IsLoadCSE)
- combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
+ combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
return replaceInstUsesWith(
LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
@@ -1496,64 +1497,45 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (isa<UndefValue>(Val))
return eraseInstFromFunction(SI);
- // If this store is the last instruction in the basic block (possibly
- // excepting debug info instructions), and if the block ends with an
- // unconditional branch, try to move it to the successor block.
+ // If this store is the second-to-last instruction in the basic block
+ // (excluding debug info and bitcasts of pointers) and if the block ends with
+ // an unconditional branch, try to move the store to the successor block.
BBI = SI.getIterator();
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
+
if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
if (BI->isUnconditional())
- if (SimplifyStoreAtEndOfBlock(SI))
- return nullptr; // xform done!
+ mergeStoreIntoSuccessor(SI);
return nullptr;
}
-/// SimplifyStoreAtEndOfBlock - Turn things like:
+/// Try to transform:
/// if () { *P = v1; } else { *P = v2 }
-/// into a phi node with a store in the successor.
-///
-/// Simplify things like:
+/// or:
/// *P = v1; if () { *P = v2; }
/// into a phi node with a store in the successor.
-///
-bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
assert(SI.isUnordered() &&
- "this code has not been auditted for volatile or ordered store case");
+ "This code has not been audited for volatile or ordered store case.");
+ // Check if the successor block has exactly 2 incoming edges.
BasicBlock *StoreBB = SI.getParent();
-
- // Check to see if the successor block has exactly two incoming edges. If
- // so, see if the other predecessor contains a store to the same location.
- // if so, insert a PHI node (if needed) and move the stores down.
BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-
- // Determine whether Dest has exactly two predecessors and, if so, compute
- // the other predecessor.
- pred_iterator PI = pred_begin(DestBB);
- BasicBlock *P = *PI;
- BasicBlock *OtherBB = nullptr;
-
- if (P != StoreBB)
- OtherBB = P;
-
- if (++PI == pred_end(DestBB))
+ if (!DestBB->hasNPredecessors(2))
return false;
- P = *PI;
- if (P != StoreBB) {
- if (OtherBB)
- return false;
- OtherBB = P;
- }
- if (++PI != pred_end(DestBB))
- return false;
+ // Capture the other block (the block that doesn't contain our store).
+ pred_iterator PredIter = pred_begin(DestBB);
+ if (*PredIter == StoreBB)
+ ++PredIter;
+ BasicBlock *OtherBB = *PredIter;
- // Bail out if all the relevant blocks aren't distinct (this can happen,
- // for example, if SI is in an infinite loop)
+ // Bail out if all of the relevant blocks aren't distinct. This can happen,
+ // for example, if SI is in an infinite loop.
if (StoreBB == DestBB || OtherBB == DestBB)
return false;
@@ -1564,7 +1546,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
// If the other block ends in an unconditional branch, check for the 'if then
- // else' case. there is an instruction before the branch.
+ // else' case. There is an instruction before the branch.
StoreInst *OtherStore = nullptr;
if (OtherBr->isUnconditional()) {
--BBI;
@@ -1589,7 +1571,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
- // if/then triangle. See if there is a store to the same ptr as SI that
+ // if/then triangle. See if there is a store to the same ptr as SI that
// lives in OtherBB.
for (;; --BBI) {
// Check to see if we find the matching store.
@@ -1600,15 +1582,14 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
break;
}
// If we find something that may be using or overwriting the stored
- // value, or if we run out of instructions, we can't do the xform.
+ // value, or if we run out of instructions, we can't do the transform.
if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
BBI->mayWriteToMemory() || BBI == OtherBB->begin())
return false;
}
- // In order to eliminate the store in OtherBr, we have to
- // make sure nothing reads or overwrites the stored value in
- // StoreBB.
+ // In order to eliminate the store in OtherBr, we have to make sure nothing
+ // reads or overwrites the stored value in StoreBB.
for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
// FIXME: This should really be AA driven.
if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
@@ -1618,24 +1599,24 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// Insert a PHI node now if we need it.
Value *MergedVal = OtherStore->getOperand(0);
+ // The debug locations of the original instructions might differ. Merge them.
+ DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
+ OtherStore->getDebugLoc());
if (MergedVal != SI.getOperand(0)) {
PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
PN->addIncoming(SI.getOperand(0), SI.getParent());
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
+ PN->setDebugLoc(MergedLoc);
}
- // Advance to a place where it is safe to insert the new store and
- // insert it.
+ // Advance to a place where it is safe to insert the new store and insert it.
BBI = DestBB->getFirstInsertionPt();
StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
- SI.isVolatile(),
- SI.getAlignment(),
- SI.getOrdering(),
- SI.getSyncScopeID());
+ SI.isVolatile(), SI.getAlignment(),
+ SI.getOrdering(), SI.getSyncScopeID());
InsertNewInstBefore(NewSI, *BBI);
- // The debug locations of the original instructions might differ; merge them.
- NewSI->applyMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc());
+ NewSI->setDebugLoc(MergedLoc);
// If the two stores had AA tags, merge them.
AAMDNodes AATags;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 63761d427235..7e99f3e4e500 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -133,7 +133,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (SimplifyAssociativeOrCommutative(I))
return &I;
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -171,14 +171,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
// Replace X*(2^C) with X << C, where C is either a scalar or a vector.
if (Constant *NewCst = getLogBase2(NewOp->getType(), C1)) {
- unsigned Width = NewCst->getType()->getPrimitiveSizeInBits();
BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
if (I.hasNoUnsignedWrap())
Shl->setHasNoUnsignedWrap();
if (I.hasNoSignedWrap()) {
const APInt *V;
- if (match(NewCst, m_APInt(V)) && *V != Width - 1)
+ if (match(NewCst, m_APInt(V)) && *V != V->getBitWidth() - 1)
Shl->setHasNoSignedWrap();
}
@@ -245,6 +244,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
return NewMul;
}
+ // -X * Y --> -(X * Y)
+ // X * -Y --> -(X * Y)
+ if (match(&I, m_c_Mul(m_OneUse(m_Neg(m_Value(X))), m_Value(Y))))
+ return BinaryOperator::CreateNeg(Builder.CreateMul(X, Y));
+
// (X / Y) * Y = X - (X % Y)
// (X / Y) * -Y = (X % Y) - X
{
@@ -323,77 +327,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1)
return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op0);
- // Check for (mul (sext x), y), see if we can merge this into an
- // integer mul followed by a sext.
- if (SExtInst *Op0Conv = dyn_cast<SExtInst>(Op0)) {
- // (mul (sext x), cst) --> (sext (mul x, cst'))
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
- if (Op0Conv->hasOneUse()) {
- Constant *CI =
- ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType());
- if (ConstantExpr::getSExt(CI, I.getType()) == Op1C &&
- willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
- // Insert the new, smaller mul.
- Value *NewMul =
- Builder.CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
- return new SExtInst(NewMul, I.getType());
- }
- }
- }
-
- // (mul (sext x), (sext y)) --> (sext (mul int x, y))
- if (SExtInst *Op1Conv = dyn_cast<SExtInst>(Op1)) {
- // Only do this if x/y have the same type, if at last one of them has a
- // single use (so we don't increase the number of sexts), and if the
- // integer mul will not overflow.
- if (Op0Conv->getOperand(0)->getType() ==
- Op1Conv->getOperand(0)->getType() &&
- (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) &&
- willNotOverflowSignedMul(Op0Conv->getOperand(0),
- Op1Conv->getOperand(0), I)) {
- // Insert the new integer mul.
- Value *NewMul = Builder.CreateNSWMul(
- Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
- return new SExtInst(NewMul, I.getType());
- }
- }
- }
-
- // Check for (mul (zext x), y), see if we can merge this into an
- // integer mul followed by a zext.
- if (auto *Op0Conv = dyn_cast<ZExtInst>(Op0)) {
- // (mul (zext x), cst) --> (zext (mul x, cst'))
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
- if (Op0Conv->hasOneUse()) {
- Constant *CI =
- ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType());
- if (ConstantExpr::getZExt(CI, I.getType()) == Op1C &&
- willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) {
- // Insert the new, smaller mul.
- Value *NewMul =
- Builder.CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv");
- return new ZExtInst(NewMul, I.getType());
- }
- }
- }
-
- // (mul (zext x), (zext y)) --> (zext (mul int x, y))
- if (auto *Op1Conv = dyn_cast<ZExtInst>(Op1)) {
- // Only do this if x/y have the same type, if at last one of them has a
- // single use (so we don't increase the number of zexts), and if the
- // integer mul will not overflow.
- if (Op0Conv->getOperand(0)->getType() ==
- Op1Conv->getOperand(0)->getType() &&
- (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) &&
- willNotOverflowUnsignedMul(Op0Conv->getOperand(0),
- Op1Conv->getOperand(0), I)) {
- // Insert the new integer mul.
- Value *NewMul = Builder.CreateNUWMul(
- Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
- return new ZExtInst(NewMul, I.getType());
- }
- }
- }
+ if (Instruction *Ext = narrowMathIfNoOverflow(I))
+ return Ext;
bool Changed = false;
if (!I.hasNoSignedWrap() && willNotOverflowSignedMul(Op0, Op1, I)) {
@@ -418,7 +353,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (SimplifyAssociativeOrCommutative(I))
return &I;
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
@@ -503,7 +438,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
match(Op0, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(X)))) &&
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
Value *XY = Builder.CreateFMulFMF(X, Y, &I);
- Value *Sqrt = Builder.CreateIntrinsic(Intrinsic::sqrt, { XY }, &I);
+ Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);
return replaceInstUsesWith(I, Sqrt);
}
@@ -933,7 +868,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
// Handle the integer div common cases
@@ -1027,7 +962,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
// Handle the integer div common cases
@@ -1175,7 +1110,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *R = foldFDivConstantDivisor(I))
@@ -1227,7 +1162,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
IRBuilder<>::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(I.getFastMathFlags());
AttributeList Attrs = CallSite(Op0).getCalledFunction()->getAttributes();
- Value *Res = emitUnaryFloatFnCall(X, TLI.getName(LibFunc_tan), B, Attrs);
+ Value *Res = emitUnaryFloatFnCall(X, &TLI, LibFunc_tan, LibFunc_tanf,
+ LibFunc_tanl, B, Attrs);
if (IsCot)
Res = B.CreateFDiv(ConstantFP::get(I.getType(), 1.0), Res);
return replaceInstUsesWith(I, Res);
@@ -1304,7 +1240,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *common = commonIRemTransforms(I))
@@ -1351,7 +1287,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
// Handle the integer rem common cases
@@ -1425,7 +1361,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
return nullptr;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index e54a1dd05a24..7603cf4d7958 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -211,20 +211,20 @@ Instruction *InstCombiner::FoldIntegerTypedPHI(PHINode &PN) {
}
// If it requires a conversion for every PHI operand, do not do it.
- if (std::all_of(AvailablePtrVals.begin(), AvailablePtrVals.end(),
- [&](Value *V) {
- return (V->getType() != IntToPtr->getType()) ||
- isa<IntToPtrInst>(V);
- }))
+ if (all_of(AvailablePtrVals, [&](Value *V) {
+ return (V->getType() != IntToPtr->getType()) || isa<IntToPtrInst>(V);
+ }))
return nullptr;
// If any of the operand that requires casting is a terminator
// instruction, do not do it.
- if (std::any_of(AvailablePtrVals.begin(), AvailablePtrVals.end(),
- [&](Value *V) {
- return (V->getType() != IntToPtr->getType()) &&
- isa<TerminatorInst>(V);
- }))
+ if (any_of(AvailablePtrVals, [&](Value *V) {
+ if (V->getType() == IntToPtr->getType())
+ return false;
+
+ auto *Inst = dyn_cast<Instruction>(V);
+ return Inst && Inst->isTerminator();
+ }))
return nullptr;
PHINode *NewPtrPHI = PHINode::Create(
@@ -608,6 +608,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LLVMContext::MD_align,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_access_group,
};
for (unsigned ID : KnownIDs)
@@ -616,7 +617,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// Add all operands to the new PHI and combine TBAA metadata.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
- combineMetadata(NewLI, LI, KnownIDs);
+ combineMetadata(NewLI, LI, KnownIDs, true);
Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
InVal = nullptr;
@@ -649,7 +650,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
// We cannot create a new instruction after the PHI if the terminator is an
// EHPad because there is no valid insertion point.
- if (TerminatorInst *TI = Phi.getParent()->getTerminator())
+ if (Instruction *TI = Phi.getParent()->getTerminator())
if (TI->isEHPad())
return nullptr;
@@ -723,7 +724,7 @@ Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// We cannot create a new instruction after the PHI if the terminator is an
// EHPad because there is no valid insertion point.
- if (TerminatorInst *TI = PN.getParent()->getTerminator())
+ if (Instruction *TI = PN.getParent()->getTerminator())
if (TI->isEHPad())
return nullptr;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 796b4021d273..faf58a08976d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -54,34 +54,62 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder,
return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
}
-/// Fold
-/// %A = icmp eq/ne i8 %x, 0
-/// %B = op i8 %x, %z
-/// %C = select i1 %A, i8 %B, i8 %y
-/// To
-/// %C = select i1 %A, i8 %z, i8 %y
-/// OP: binop with an identity constant
-/// TODO: support for non-commutative and FP opcodes
-static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) {
-
- Value *Cond = Sel.getCondition();
- Value *X, *Z;
+/// Replace a select operand based on an equality comparison with the identity
+/// constant of a binop.
+static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
+ const TargetLibraryInfo &TLI) {
+ // The select condition must be an equality compare with a constant operand.
+ Value *X;
Constant *C;
CmpInst::Predicate Pred;
- if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) ||
- !ICmpInst::isEquality(Pred))
+ if (!match(Sel.getCondition(), m_Cmp(Pred, m_Value(X), m_Constant(C))))
return nullptr;
- bool IsEq = Pred == ICmpInst::ICMP_EQ;
- auto *BO =
- dyn_cast<BinaryOperator>(IsEq ? Sel.getTrueValue() : Sel.getFalseValue());
- // TODO: support for undefs
- if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) &&
- ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) {
- Sel.setOperand(IsEq ? 1 : 2, Z);
- return &Sel;
+ bool IsEq;
+ if (ICmpInst::isEquality(Pred))
+ IsEq = Pred == ICmpInst::ICMP_EQ;
+ else if (Pred == FCmpInst::FCMP_OEQ)
+ IsEq = true;
+ else if (Pred == FCmpInst::FCMP_UNE)
+ IsEq = false;
+ else
+ return nullptr;
+
+ // A select operand must be a binop.
+ BinaryOperator *BO;
+ if (!match(Sel.getOperand(IsEq ? 1 : 2), m_BinOp(BO)))
+ return nullptr;
+
+ // The compare constant must be the identity constant for that binop.
+ // If this a floating-point compare with 0.0, any zero constant will do.
+ Type *Ty = BO->getType();
+ Constant *IdC = ConstantExpr::getBinOpIdentity(BO->getOpcode(), Ty, true);
+ if (IdC != C) {
+ if (!IdC || !CmpInst::isFPPredicate(Pred))
+ return nullptr;
+ if (!match(IdC, m_AnyZeroFP()) || !match(C, m_AnyZeroFP()))
+ return nullptr;
}
- return nullptr;
+
+ // Last, match the compare variable operand with a binop operand.
+ Value *Y;
+ if (!BO->isCommutative() && !match(BO, m_BinOp(m_Value(Y), m_Specific(X))))
+ return nullptr;
+ if (!match(BO, m_c_BinOp(m_Value(Y), m_Specific(X))))
+ return nullptr;
+
+ // +0.0 compares equal to -0.0, and so it does not behave as required for this
+ // transform. Bail out if we can not exclude that possibility.
+ if (isa<FPMathOperator>(BO))
+ if (!BO->hasNoSignedZeros() && !CannotBeNegativeZero(Y, &TLI))
+ return nullptr;
+
+ // BO = binop Y, X
+ // S = { select (cmp eq X, C), BO, ? } or { select (cmp ne X, C), ?, BO }
+ // =>
+ // S = { select (cmp eq X, C), Y, ? } or { select (cmp ne X, C), ?, Y }
+ Sel.setOperand(IsEq ? 1 : 2, Y);
+ return &Sel;
}
/// This folds:
@@ -343,13 +371,24 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
return nullptr;
}
+ // If the select condition is a vector, the operands of the original select's
+ // operands also must be vectors. This may not be the case for getelementptr
+ // for example.
+ if (SI.getCondition()->getType()->isVectorTy() &&
+ (!OtherOpT->getType()->isVectorTy() ||
+ !OtherOpF->getType()->isVectorTy()))
+ return nullptr;
+
// If we reach here, they do have operations in common.
Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
SI.getName() + ".v", &SI);
Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
if (auto *BO = dyn_cast<BinaryOperator>(TI)) {
- return BinaryOperator::Create(BO->getOpcode(), Op0, Op1);
+ BinaryOperator *NewBO = BinaryOperator::Create(BO->getOpcode(), Op0, Op1);
+ NewBO->copyIRFlags(TI);
+ NewBO->andIRFlags(FI);
+ return NewBO;
}
if (auto *TGEP = dyn_cast<GetElementPtrInst>(TI)) {
auto *FGEP = cast<GetElementPtrInst>(FI);
@@ -670,17 +709,18 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
match(Count, m_Trunc(m_Value(V))))
Count = V;
+ // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
+ // input to the cttz/ctlz is used as LHS for the compare instruction.
+ if (!match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) &&
+ !match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS))))
+ return nullptr;
+
+ IntrinsicInst *II = cast<IntrinsicInst>(Count);
+
// Check if the value propagated on zero is a constant number equal to the
// sizeof in bits of 'Count'.
unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
- if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
- return nullptr;
-
- // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
- // input to the cttz/ctlz is used as LHS for the compare instruction.
- if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
- match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
- IntrinsicInst *II = cast<IntrinsicInst>(Count);
+ if (match(ValueOnZero, m_SpecificInt(SizeOfInBits))) {
// Explicitly clear the 'undef_on_zero' flag.
IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
NewI->setArgOperand(1, ConstantInt::getFalse(NewI->getContext()));
@@ -688,6 +728,12 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
}
+ // If the ValueOnZero is not the bitwidth, we can at least make use of the
+ // fact that the cttz/ctlz result will not be used if the input is zero, so
+ // it's okay to relax it to undef for that case.
+ if (II->hasOneUse() && !match(II->getArgOperand(1), m_One()))
+ II->setArgOperand(1, ConstantInt::getTrue(II->getContext()));
+
return nullptr;
}
@@ -1054,11 +1100,13 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
if (C == A || C == B) {
// MAX(MAX(A, B), B) -> MAX(A, B)
// MIN(MIN(a, b), a) -> MIN(a, b)
+ // TODO: This could be done in instsimplify.
if (SPF1 == SPF2 && SelectPatternResult::isMinOrMax(SPF1))
return replaceInstUsesWith(Outer, Inner);
// MAX(MIN(a, b), a) -> a
// MIN(MAX(a, b), a) -> a
+ // TODO: This could be done in instsimplify.
if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
(SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
(SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
@@ -1071,6 +1119,7 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
if (match(B, m_APInt(CB)) && match(C, m_APInt(CC))) {
// MIN(MIN(A, 23), 97) -> MIN(A, 23)
// MAX(MAX(A, 97), 23) -> MAX(A, 97)
+ // TODO: This could be done in instsimplify.
if ((SPF1 == SPF_UMIN && CB->ule(*CC)) ||
(SPF1 == SPF_SMIN && CB->sle(*CC)) ||
(SPF1 == SPF_UMAX && CB->uge(*CC)) ||
@@ -1091,6 +1140,7 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
// ABS(ABS(X)) -> ABS(X)
// NABS(NABS(X)) -> NABS(X)
+ // TODO: This could be done in instsimplify.
if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) {
return replaceInstUsesWith(Outer, Inner);
}
@@ -1503,6 +1553,60 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp);
}
+/// Try to reduce a rotate pattern that includes a compare and select into a
+/// funnel shift intrinsic. Example:
+/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
+/// --> call llvm.fshl.i32(a, a, b)
+static Instruction *foldSelectRotate(SelectInst &Sel) {
+ // The false value of the select must be a rotate of the true value.
+ Value *Or0, *Or1;
+ if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1)))))
+ return nullptr;
+
+ Value *TVal = Sel.getTrueValue();
+ Value *SA0, *SA1;
+ if (!match(Or0, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA0)))) ||
+ !match(Or1, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA1)))))
+ return nullptr;
+
+ auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
+ auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+ if (ShiftOpcode0 == ShiftOpcode1)
+ return nullptr;
+
+ // We have one of these patterns so far:
+ // select ?, TVal, (or (lshr TVal, SA0), (shl TVal, SA1))
+ // select ?, TVal, (or (shl TVal, SA0), (lshr TVal, SA1))
+ // This must be a power-of-2 rotate for a bitmasking transform to be valid.
+ unsigned Width = Sel.getType()->getScalarSizeInBits();
+ if (!isPowerOf2_32(Width))
+ return nullptr;
+
+ // Check the shift amounts to see if they are an opposite pair.
+ Value *ShAmt;
+ if (match(SA1, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA0)))))
+ ShAmt = SA0;
+ else if (match(SA0, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA1)))))
+ ShAmt = SA1;
+ else
+ return nullptr;
+
+ // Finally, see if the select is filtering out a shift-by-zero.
+ Value *Cond = Sel.getCondition();
+ ICmpInst::Predicate Pred;
+ if (!match(Cond, m_OneUse(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()))) ||
+ Pred != ICmpInst::ICMP_EQ)
+ return nullptr;
+
+ // This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way.
+ // Convert to funnel shift intrinsic.
+ bool IsFshl = (ShAmt == SA0 && ShiftOpcode0 == BinaryOperator::Shl) ||
+ (ShAmt == SA1 && ShiftOpcode1 == BinaryOperator::Shl);
+ Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+ Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType());
+ return IntrinsicInst::Create(F, { TVal, TVal, ShAmt });
+}
+
Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@@ -1617,31 +1721,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// See if we are selecting two values based on a comparison of the two values.
if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
- // Transform (X == Y) ? X : Y -> Y
- if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
- // This is not safe in general for floating point:
- // consider X== -0, Y== +0.
- // It becomes safe if either operand is a nonzero constant.
- ConstantFP *CFPt, *CFPf;
- if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
- !CFPt->getValueAPF().isZero()) ||
- ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
- !CFPf->getValueAPF().isZero()))
- return replaceInstUsesWith(SI, FalseVal);
- }
- // Transform (X une Y) ? X : Y -> X
- if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
- // This is not safe in general for floating point:
- // consider X== -0, Y== +0.
- // It becomes safe if either operand is a nonzero constant.
- ConstantFP *CFPt, *CFPf;
- if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
- !CFPt->getValueAPF().isZero()) ||
- ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
- !CFPf->getValueAPF().isZero()))
- return replaceInstUsesWith(SI, TrueVal);
- }
-
// Canonicalize to use ordered comparisons by swapping the select
// operands.
//
@@ -1660,31 +1739,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// NOTE: if we wanted to, this is where to detect MIN/MAX
} else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
- // Transform (X == Y) ? Y : X -> X
- if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
- // This is not safe in general for floating point:
- // consider X== -0, Y== +0.
- // It becomes safe if either operand is a nonzero constant.
- ConstantFP *CFPt, *CFPf;
- if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
- !CFPt->getValueAPF().isZero()) ||
- ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
- !CFPf->getValueAPF().isZero()))
- return replaceInstUsesWith(SI, FalseVal);
- }
- // Transform (X une Y) ? Y : X -> Y
- if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
- // This is not safe in general for floating point:
- // consider X== -0, Y== +0.
- // It becomes safe if either operand is a nonzero constant.
- ConstantFP *CFPt, *CFPf;
- if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
- !CFPt->getValueAPF().isZero()) ||
- ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
- !CFPf->getValueAPF().isZero()))
- return replaceInstUsesWith(SI, TrueVal);
- }
-
// Canonicalize to use ordered comparisons by swapping the select
// operands.
//
@@ -1717,7 +1771,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) ||
(X == TrueVal && Pred == FCmpInst::FCMP_OGT &&
match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(X))))) {
- Value *Fabs = Builder.CreateIntrinsic(Intrinsic::fabs, { X }, FCI);
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
return replaceInstUsesWith(SI, Fabs);
}
// With nsz:
@@ -1730,7 +1784,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
(Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE)) ||
(X == TrueVal && match(FalseVal, m_FNeg(m_Specific(X))) &&
(Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE)))) {
- Value *Fabs = Builder.CreateIntrinsic(Intrinsic::fabs, { X }, FCI);
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
return replaceInstUsesWith(SI, Fabs);
}
}
@@ -1759,10 +1813,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *FoldI = foldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
- Value *LHS, *RHS, *LHS2, *RHS2;
+ Value *LHS, *RHS;
Instruction::CastOps CastOp;
SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp);
auto SPF = SPR.Flavor;
+ if (SPF) {
+ Value *LHS2, *RHS2;
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
+ if (Instruction *R = foldSPFofSPF(cast<Instruction>(LHS), SPF2, LHS2,
+ RHS2, SI, SPF, RHS))
+ return R;
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
+ if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS), SPF2, LHS2,
+ RHS2, SI, SPF, LHS))
+ return R;
+ // TODO.
+ // ABS(-X) -> ABS(X)
+ }
if (SelectPatternResult::isMinOrMax(SPF)) {
// Canonicalize so that
@@ -1797,39 +1864,40 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// MAX(~a, ~b) -> ~MIN(a, b)
+ // MAX(~a, C) -> ~MIN(a, ~C)
// MIN(~a, ~b) -> ~MAX(a, b)
- Value *A, *B;
- if (match(LHS, m_Not(m_Value(A))) && match(RHS, m_Not(m_Value(B))) &&
- (LHS->getNumUses() <= 2 || RHS->getNumUses() <= 2)) {
- CmpInst::Predicate InvertedPred = getInverseMinMaxPred(SPF);
- Value *InvertedCmp = Builder.CreateICmp(InvertedPred, A, B);
- Value *NewSel = Builder.CreateSelect(InvertedCmp, A, B);
- return BinaryOperator::CreateNot(NewSel);
- }
+ // MIN(~a, C) -> ~MAX(a, ~C)
+ auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
+ Value *A;
+ if (match(X, m_Not(m_Value(A))) && !X->hasNUsesOrMore(3) &&
+ !IsFreeToInvert(A, A->hasOneUse()) &&
+ // Passing false to only consider m_Not and constants.
+ IsFreeToInvert(Y, false)) {
+ Value *B = Builder.CreateNot(Y);
+ Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF),
+ A, B);
+ // Copy the profile metadata.
+ if (MDNode *MD = SI.getMetadata(LLVMContext::MD_prof)) {
+ cast<SelectInst>(NewMinMax)->setMetadata(LLVMContext::MD_prof, MD);
+ // Swap the metadata if the operands are swapped.
+ if (X == SI.getFalseValue() && Y == SI.getTrueValue())
+ cast<SelectInst>(NewMinMax)->swapProfMetadata();
+ }
- if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
+ return BinaryOperator::CreateNot(NewMinMax);
+ }
+
+ return nullptr;
+ };
+
+ if (Instruction *I = moveNotAfterMinMax(LHS, RHS))
+ return I;
+ if (Instruction *I = moveNotAfterMinMax(RHS, LHS))
return I;
- }
- if (SPF) {
- // MAX(MAX(a, b), a) -> MAX(a, b)
- // MIN(MIN(a, b), a) -> MIN(a, b)
- // MAX(MIN(a, b), a) -> a
- // MIN(MAX(a, b), a) -> a
- // ABS(ABS(a)) -> ABS(a)
- // NABS(NABS(a)) -> NABS(a)
- if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
- if (Instruction *R = foldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
- SI, SPF, RHS))
- return R;
- if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
- if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
- SI, SPF, LHS))
- return R;
+ if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
+ return I;
}
-
- // TODO.
- // ABS(-X) -> ABS(X)
}
// See if we can fold the select into a phi node if the condition is a select.
@@ -1934,10 +2002,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
}
- if (BinaryOperator::isNot(CondVal)) {
- SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+ Value *NotCond;
+ if (match(CondVal, m_Not(m_Value(NotCond)))) {
+ SI.setOperand(0, NotCond);
SI.setOperand(1, FalseVal);
SI.setOperand(2, TrueVal);
+ SI.swapProfMetadata();
return &SI;
}
@@ -1952,24 +2022,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
}
- // See if we can determine the result of this select based on a dominating
- // condition.
- BasicBlock *Parent = SI.getParent();
- if (BasicBlock *Dom = Parent->getSinglePredecessor()) {
- auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
- if (PBI && PBI->isConditional() &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
- (PBI->getSuccessor(0) == Parent || PBI->getSuccessor(1) == Parent)) {
- bool CondIsTrue = PBI->getSuccessor(0) == Parent;
- Optional<bool> Implication = isImpliedCondition(
- PBI->getCondition(), SI.getCondition(), DL, CondIsTrue);
- if (Implication) {
- Value *V = *Implication ? TrueVal : FalseVal;
- return replaceInstUsesWith(SI, V);
- }
- }
- }
-
// If we can compute the condition, there's no need for a select.
// Like the above fold, we are attempting to reduce compile-time cost by
// putting this fold here with limitations rather than in InstSimplify.
@@ -1991,8 +2043,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Select = foldSelectCmpXchg(SI))
return Select;
- if (Instruction *Select = foldSelectBinOpIdentity(SI))
+ if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI))
return Select;
+ if (Instruction *Rot = foldSelectRotate(SI))
+ return Rot;
+
return nullptr;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 1ca75f3989d4..c562d45a9e2b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -593,7 +593,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *V = commonShiftTransforms(I))
@@ -697,7 +697,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *R = commonShiftTransforms(I))
@@ -725,9 +725,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
Value *X;
const APInt *ShOp1;
- if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1)))) {
- unsigned ShlAmt = ShOp1->getZExtValue();
- if (ShlAmt < ShAmt) {
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) {
+ if (ShOp1->ult(ShAmt)) {
+ unsigned ShlAmt = ShOp1->getZExtValue();
Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShlAmt);
if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
// (X <<nuw C1) >>u C2 --> X >>u (C2 - C1)
@@ -740,7 +740,8 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
}
- if (ShlAmt > ShAmt) {
+ if (ShOp1->ugt(ShAmt)) {
+ unsigned ShlAmt = ShOp1->getZExtValue();
Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmt - ShAmt);
if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
// (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
@@ -753,7 +754,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
}
- assert(ShlAmt == ShAmt);
+ assert(*ShOp1 == ShAmt);
// (X << C) >>u C --> X & (-1 >>u C)
APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
@@ -825,7 +826,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- if (Instruction *X = foldShuffledBinop(I))
+ if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *R = commonShiftTransforms(I))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 425f5ce384be..9bf87d024607 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -314,11 +314,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known.One = std::move(IKnownOne);
break;
}
- case Instruction::Select:
- // If this is a select as part of a min/max pattern, don't simplify any
- // further in case we break the structure.
+ case Instruction::Select: {
Value *LHS, *RHS;
- if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
+ SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
+ if (SPF == SPF_UMAX) {
+ // UMax(A, C) == A if ...
+ // The lowest non-zero bit of DemandMask is higher than the highest
+ // non-zero bit of C.
+ const APInt *C;
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits())
+ return LHS;
+ } else if (SPF == SPF_UMIN) {
+ // UMin(A, C) == A if ...
+ // The lowest non-zero bit of DemandMask is higher than the highest
+ // non-one bit of C.
+ // This comes from using DeMorgans on the above umax example.
+ const APInt *C;
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ if (match(RHS, m_APInt(C)) &&
+ CTZ >= C->getBitWidth() - C->countLeadingOnes())
+ return LHS;
+ }
+
+ // If this is a select as part of any other min/max pattern, don't simplify
+ // any further in case we break the structure.
+ if (SPF != SPF_UNKNOWN)
return nullptr;
if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
@@ -336,6 +357,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known.One = RHSKnown.One & LHSKnown.One;
Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
break;
+ }
case Instruction::ZExt:
case Instruction::Trunc: {
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
@@ -668,6 +690,30 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
+ case Intrinsic::fshr:
+ case Intrinsic::fshl: {
+ const APInt *SA;
+ if (!match(I->getOperand(2), m_APInt(SA)))
+ break;
+
+ // Normalize to funnel shift left. APInt shifts of BitWidth are well-
+ // defined, so no need to special-case zero shifts here.
+ uint64_t ShiftAmt = SA->urem(BitWidth);
+ if (II->getIntrinsicID() == Intrinsic::fshr)
+ ShiftAmt = BitWidth - ShiftAmt;
+
+ APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
+ APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
+ if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
+ return I;
+
+ Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
+ RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
+ Known.One = LHSKnown.One.shl(ShiftAmt) |
+ RHSKnown.One.lshr(BitWidth - ShiftAmt);
+ break;
+ }
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_sse2_movmsk_pd:
@@ -923,11 +969,24 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DMaskIdx) {
+ int DMaskIdx,
+ int TFCIdx) {
unsigned VWidth = II->getType()->getVectorNumElements();
if (VWidth == 1)
return nullptr;
+ // Need to change to new instruction format
+ ConstantInt *TFC = nullptr;
+ bool TFELWEEnabled = false;
+ if (TFCIdx > 0) {
+ TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx));
+ TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE
+ || TFC->getZExtValue() & 0x2; // LWE
+ }
+
+ if (TFELWEEnabled)
+ return nullptr; // TFE not yet supported
+
ConstantInt *NewDMask = nullptr;
if (DMaskIdx < 0) {
@@ -1052,8 +1111,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts = 0;
- // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
- if (Constant *C = dyn_cast<Constant>(V)) {
+ if (auto *C = dyn_cast<Constant>(V)) {
// Check if this is identity. If so, return 0 since we are not simplifying
// anything.
if (DemandedElts.isAllOnesValue())
@@ -1061,7 +1119,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Type *EltTy = cast<VectorType>(V->getType())->getElementType();
Constant *Undef = UndefValue::get(EltTy);
-
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0; i != VWidth; ++i) {
if (!DemandedElts[i]) { // If not demanded, set to undef.
@@ -1109,9 +1166,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (!I) return nullptr; // Only analyze instructions.
bool MadeChange = false;
+ auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
+ APInt Demanded, APInt &Undef) {
+ auto *II = dyn_cast<IntrinsicInst>(Inst);
+ Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
+ if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
+ if (II)
+ II->setArgOperand(OpNum, V);
+ else
+ Inst->setOperand(OpNum, V);
+ MadeChange = true;
+ }
+ };
+
APInt UndefElts2(VWidth, 0);
APInt UndefElts3(VWidth, 0);
- Value *TmpV;
switch (I->getOpcode()) {
default: break;
@@ -1122,9 +1191,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (!Idx) {
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 0, DemandedElts, UndefElts2);
break;
}
@@ -1134,9 +1201,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt PreInsertDemandedElts = DemandedElts;
if (IdxNo < VWidth)
PreInsertDemandedElts.clearBit(IdxNo);
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), PreInsertDemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
// If this is inserting an element that isn't demanded, remove this
// insertelement.
@@ -1169,14 +1235,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
APInt LHSUndefElts(LHSVWidth, 0);
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- LHSUndefElts, Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
APInt RHSUndefElts(LHSVWidth, 0);
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
- RHSUndefElts, Depth + 1);
- if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
bool NewUndefElts = false;
unsigned LHSIdx = -1u, LHSValIdx = -1u;
@@ -1260,32 +1322,43 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
case Instruction::Select: {
- APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
- if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+ // If this is a vector select, try to transform the select condition based
+ // on the current demanded elements.
+ SelectInst *Sel = cast<SelectInst>(I);
+ if (Sel->getCondition()->getType()->isVectorTy()) {
+ // TODO: We are not doing anything with UndefElts based on this call.
+ // It is overwritten below based on the other select operands. If an
+ // element of the select condition is known undef, then we are free to
+ // choose the output value from either arm of the select. If we know that
+ // one of those values is undef, then the output can be undef.
+ simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+ }
+
+ // Next, see if we can transform the arms of the select.
+ APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
+ if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
for (unsigned i = 0; i < VWidth; i++) {
+ // isNullValue() always returns false when called on a ConstantExpr.
+ // Skip constant expressions to avoid propagating incorrect information.
Constant *CElt = CV->getAggregateElement(i);
- // Method isNullValue always returns false when called on a
- // ConstantExpr. If CElt is a ConstantExpr then skip it in order to
- // to avoid propagating incorrect information.
if (isa<ConstantExpr>(CElt))
continue;
+ // TODO: If a select condition element is undef, we can demand from
+ // either side. If one side is known undef, choosing that side would
+ // propagate undef.
if (CElt->isNullValue())
- LeftDemanded.clearBit(i);
+ DemandedLHS.clearBit(i);
else
- RightDemanded.clearBit(i);
+ DemandedRHS.clearBit(i);
}
}
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
- Depth + 1);
- if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-
- TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
- UndefElts2, Depth + 1);
- if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2);
+ simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3);
- // Output elements are undefined if both are undefined.
- UndefElts &= UndefElts2;
+ // Output elements are undefined if the element from each arm is undefined.
+ // TODO: This can be improved. See comment in select condition handling.
+ UndefElts = UndefElts2 & UndefElts3;
break;
}
case Instruction::BitCast: {
@@ -1323,12 +1396,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) {
- I->setOperand(0, TmpV);
- MadeChange = true;
- }
+ simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2);
if (VWidth == InVWidth) {
UndefElts = UndefElts2;
@@ -1353,29 +1421,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
break;
}
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- // div/rem demand all inputs, because they don't want divide by zero.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
- Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-
- // Output elements are undefined if both are undefined. Consider things
- // like undef&0. The result is known zero, not undef.
- UndefElts &= UndefElts2;
- break;
case Instruction::FPTrunc:
case Instruction::FPExt:
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
- Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
break;
case Instruction::Call: {
@@ -1395,9 +1443,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Only the lower element is used.
DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
// Only the lower element is undefined. The high elements are zero.
UndefElts = UndefElts[0];
@@ -1406,9 +1452,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Unary scalar-as-vector operations that work column-wise.
case Intrinsic::x86_sse_rcp_ss:
case Intrinsic::x86_sse_rsqrt_ss:
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
@@ -1428,9 +1472,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
case Intrinsic::x86_sse2_cmp_sd: {
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
@@ -1440,9 +1482,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Only lower element is used for operand 1.
DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
// Lower element is undefined if both lower elements are undefined.
// Consider things like undef&0. The result is known zero, not undef.
@@ -1459,9 +1499,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Don't use the low element of operand 0.
APInt DemandedElts2 = DemandedElts;
DemandedElts2.clearBit(0);
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts2, UndefElts);
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
@@ -1471,9 +1509,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Only lower element is used for operand 1.
DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
// Take the high undef elements from operand 0 and take the lower element
// from operand 1.
@@ -1497,9 +1533,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_avx512_mask_sub_sd_round:
case Intrinsic::x86_avx512_mask_max_sd_round:
case Intrinsic::x86_avx512_mask_min_sd_round:
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
@@ -1509,12 +1543,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Only lower element is used for operand 1 and 2.
DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
- UndefElts3, Depth + 1);
- if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
+ simplifyAndSetOp(II, 2, DemandedElts, UndefElts3);
// Lower element is undefined if all three lower elements are undefined.
// Consider things like undef&0. The result is known zero, not undef.
@@ -1559,14 +1589,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
// Demand elements from the operand.
- auto *Op = II->getArgOperand(OpNum);
APInt OpUndefElts(InnerVWidth, 0);
- TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts,
- Depth + 1);
- if (TmpV) {
- II->setArgOperand(OpNum, TmpV);
- MadeChange = true;
- }
+ simplifyAndSetOp(II, OpNum, OpDemandedElts, OpUndefElts);
// Pack the operand's UNDEF elements, one lane at a time.
OpUndefElts = OpUndefElts.zext(VWidth);
@@ -1594,10 +1618,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// PERMV
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps: {
- Value *Op1 = II->getArgOperand(1);
- TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts,
- Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 1, DemandedElts, UndefElts);
break;
}
@@ -1611,16 +1632,40 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format:
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load_format:
return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
default: {
if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
- return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
+ return simplifyAMDGCNMemoryIntrinsicDemanded(
+ II, DemandedElts, 0, II->getNumArgOperands() - 2);
break;
}
- }
+ } // switch on IntrinsicID
break;
+ } // case Call
+ } // switch on Opcode
+
+ // TODO: We bail completely on integer div/rem and shifts because they have
+ // UB/poison potential, but that should be refined.
+ BinaryOperator *BO;
+ if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
+ simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+ simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
+
+ // Any change to an instruction with potential poison must clear those flags
+ // because we can not guarantee those constraints now. Other analysis may
+ // determine that it is safe to re-apply the flags.
+ if (MadeChange)
+ BO->dropPoisonGeneratingFlags();
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef & 0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
}
- }
+
return MadeChange ? I : nullptr;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 1c2de6352fa5..0ad1fc0e791f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -46,40 +46,34 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
/// Return true if the value is cheaper to scalarize than it is to leave as a
-/// vector operation. isConstant indicates whether we're extracting one known
-/// element. If false we're extracting a variable index.
-static bool cheapToScalarize(Value *V, bool isConstant) {
- if (Constant *C = dyn_cast<Constant>(V)) {
- if (isConstant) return true;
+/// vector operation. IsConstantExtractIndex indicates whether we are extracting
+/// one known element from a vector constant.
+///
+/// FIXME: It's possible to create more instructions than previously existed.
+static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
+ // If we can pick a scalar constant value out of a vector, that is free.
+ if (auto *C = dyn_cast<Constant>(V))
+ return IsConstantExtractIndex || C->getSplatValue();
+
+ // An insertelement to the same constant index as our extract will simplify
+ // to the scalar inserted element. An insertelement to a different constant
+ // index is irrelevant to our extract.
+ if (match(V, m_InsertElement(m_Value(), m_Value(), m_ConstantInt())))
+ return IsConstantExtractIndex;
+
+ if (match(V, m_OneUse(m_Load(m_Value()))))
+ return true;
- // If all elts are the same, we can extract it and use any of the values.
- if (Constant *Op0 = C->getAggregateElement(0U)) {
- for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
- ++i)
- if (C->getAggregateElement(i) != Op0)
- return false;
+ Value *V0, *V1;
+ if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
+ if (cheapToScalarize(V0, IsConstantExtractIndex) ||
+ cheapToScalarize(V1, IsConstantExtractIndex))
return true;
- }
- }
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I) return false;
- // Insert element gets simplified to the inserted element or is deleted if
- // this is constant idx extract element and its a constant idx insertelt.
- if (I->getOpcode() == Instruction::InsertElement && isConstant &&
- isa<ConstantInt>(I->getOperand(2)))
- return true;
- if (I->getOpcode() == Instruction::Load && I->hasOneUse())
- return true;
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
- if (BO->hasOneUse() &&
- (cheapToScalarize(BO->getOperand(0), isConstant) ||
- cheapToScalarize(BO->getOperand(1), isConstant)))
- return true;
- if (CmpInst *CI = dyn_cast<CmpInst>(I))
- if (CI->hasOneUse() &&
- (cheapToScalarize(CI->getOperand(0), isConstant) ||
- cheapToScalarize(CI->getOperand(1), isConstant)))
+ CmpInst::Predicate UnusedPred;
+ if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
+ if (cheapToScalarize(V0, IsConstantExtractIndex) ||
+ cheapToScalarize(V1, IsConstantExtractIndex))
return true;
return false;
@@ -166,92 +160,176 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
return &EI;
}
+static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
+ InstCombiner::BuilderTy &Builder,
+ bool IsBigEndian) {
+ Value *X;
+ uint64_t ExtIndexC;
+ if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
+ !X->getType()->isVectorTy() ||
+ !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
+ return nullptr;
+
+ // If this extractelement is using a bitcast from a vector of the same number
+ // of elements, see if we can find the source element from the source vector:
+ // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
+ Type *SrcTy = X->getType();
+ Type *DestTy = Ext.getType();
+ unsigned NumSrcElts = SrcTy->getVectorNumElements();
+ unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
+ if (NumSrcElts == NumElts)
+ if (Value *Elt = findScalarElement(X, ExtIndexC))
+ return new BitCastInst(Elt, DestTy);
+
+ // If the source elements are wider than the destination, try to shift and
+ // truncate a subset of scalar bits of an insert op.
+ if (NumSrcElts < NumElts) {
+ Value *Scalar;
+ uint64_t InsIndexC;
+ if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
+ m_ConstantInt(InsIndexC))))
+ return nullptr;
+
+ // The extract must be from the subset of vector elements that we inserted
+ // into. Example: if we inserted element 1 of a <2 x i64> and we are
+ // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
+ // of elements 4-7 of the bitcasted vector.
+ unsigned NarrowingRatio = NumElts / NumSrcElts;
+ if (ExtIndexC / NarrowingRatio != InsIndexC)
+ return nullptr;
+
+ // We are extracting part of the original scalar. How that scalar is
+ // inserted into the vector depends on the endian-ness. Example:
+ // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
+ // +--+--+--+--+--+--+--+--+
+ // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
+ // extelt <4 x i16> V', 3: | |S2|S3|
+ // +--+--+--+--+--+--+--+--+
+ // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
+ // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
+ // In this example, we must right-shift little-endian. Big-endian is just a
+ // truncate.
+ unsigned Chunk = ExtIndexC % NarrowingRatio;
+ if (IsBigEndian)
+ Chunk = NarrowingRatio - 1 - Chunk;
+
+ // Bail out if this is an FP vector to FP vector sequence. That would take
+ // more instructions than we started with unless there is no shift, and it
+ // may not be handled as well in the backend.
+ bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
+ bool NeedDestBitcast = DestTy->isFloatingPointTy();
+ if (NeedSrcBitcast && NeedDestBitcast)
+ return nullptr;
+
+ unsigned SrcWidth = SrcTy->getScalarSizeInBits();
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ unsigned ShAmt = Chunk * DestWidth;
+
+ // TODO: This limitation is more strict than necessary. We could sum the
+ // number of new instructions and subtract the number eliminated to know if
+ // we can proceed.
+ if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
+ if (NeedSrcBitcast || NeedDestBitcast)
+ return nullptr;
+
+ if (NeedSrcBitcast) {
+ Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
+ Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
+ }
+
+ if (ShAmt) {
+ // Bail out if we could end with more instructions than we started with.
+ if (!Ext.getVectorOperand()->hasOneUse())
+ return nullptr;
+ Scalar = Builder.CreateLShr(Scalar, ShAmt);
+ }
+
+ if (NeedDestBitcast) {
+ Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
+ return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
+ }
+ return new TruncInst(Scalar, DestTy);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
- if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(),
- EI.getIndexOperand(),
+ Value *SrcVec = EI.getVectorOperand();
+ Value *Index = EI.getIndexOperand();
+ if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
SQ.getWithInstruction(&EI)))
return replaceInstUsesWith(EI, V);
- // If vector val is constant with all elements the same, replace EI with
- // that element. We handle a known element # below.
- if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
- if (cheapToScalarize(C, false))
- return replaceInstUsesWith(EI, C->getAggregateElement(0U));
-
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
- if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
- unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+ auto *IndexC = dyn_cast<ConstantInt>(Index);
+ if (IndexC) {
+ unsigned NumElts = EI.getVectorOperandType()->getNumElements();
// InstSimplify should handle cases where the index is invalid.
- if (!IdxC->getValue().ule(VectorWidth))
+ if (!IndexC->getValue().ule(NumElts))
return nullptr;
- unsigned IndexVal = IdxC->getZExtValue();
-
// This instruction only demands the single element from the input vector.
// If the input vector has a single use, simplify it based on this use
// property.
- if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
- APInt UndefElts(VectorWidth, 0);
- APInt DemandedMask(VectorWidth, 0);
- DemandedMask.setBit(IndexVal);
- if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+ if (SrcVec->hasOneUse() && NumElts != 1) {
+ APInt UndefElts(NumElts, 0);
+ APInt DemandedElts(NumElts, 0);
+ DemandedElts.setBit(IndexC->getZExtValue());
+ if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts,
UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
}
- // If this extractelement is directly using a bitcast from a vector of
- // the same number of elements, see if we can find the source element from
- // it. In this case, we will end up needing to bitcast the scalars.
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
- if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
- if (VT->getNumElements() == VectorWidth)
- if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal))
- return new BitCastInst(Elt, EI.getType());
- }
+ if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
+ return I;
// If there's a vector PHI feeding a scalar use through this extractelement
// instruction, try to scalarize the PHI.
- if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
- Instruction *scalarPHI = scalarizePHI(EI, PN);
- if (scalarPHI)
- return scalarPHI;
- }
+ if (auto *Phi = dyn_cast<PHINode>(SrcVec))
+ if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
+ return ScalarPHI;
}
- if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
- // Push extractelement into predecessor operation if legal and
- // profitable to do so.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- if (I->hasOneUse() &&
- cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
- Value *newEI0 =
- Builder.CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
- EI.getName()+".lhs");
- Value *newEI1 =
- Builder.CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
- EI.getName()+".rhs");
- return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(),
- newEI0, newEI1, BO);
- }
- } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+ BinaryOperator *BO;
+ if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
+ // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
+ Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
+ Value *E0 = Builder.CreateExtractElement(X, Index);
+ Value *E1 = Builder.CreateExtractElement(Y, Index);
+ return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
+ }
+
+ Value *X, *Y;
+ CmpInst::Predicate Pred;
+ if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
+ cheapToScalarize(SrcVec, IndexC)) {
+ // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
+ Value *E0 = Builder.CreateExtractElement(X, Index);
+ Value *E1 = Builder.CreateExtractElement(Y, Index);
+ return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
+ }
+
+ if (auto *I = dyn_cast<Instruction>(SrcVec)) {
+ if (auto *IE = dyn_cast<InsertElementInst>(I)) {
// Extracting the inserted element?
- if (IE->getOperand(2) == EI.getOperand(1))
+ if (IE->getOperand(2) == Index)
return replaceInstUsesWith(EI, IE->getOperand(1));
// If the inserted and extracted elements are constants, they must not
// be the same value, extract from the pre-inserted value instead.
- if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
- Worklist.AddValue(EI.getOperand(0));
+ if (isa<Constant>(IE->getOperand(2)) && IndexC) {
+ Worklist.AddValue(SrcVec);
EI.setOperand(0, IE->getOperand(0));
return &EI;
}
- } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+ } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
// If this is extracting an element from a shufflevector, figure out where
// it came from and extract from the appropriate input element instead.
- if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
Value *Src;
unsigned LHSWidth =
@@ -270,13 +348,12 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
ConstantInt::get(Int32Ty,
SrcIdx, false));
}
- } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ } else if (auto *CI = dyn_cast<CastInst>(I)) {
// Canonicalize extractelement(cast) -> cast(extractelement).
// Bitcasts can change the number of vector elements, and they cost
// nothing.
if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
- Value *EE = Builder.CreateExtractElement(CI->getOperand(0),
- EI.getIndexOperand());
+ Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
@@ -791,43 +868,62 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
replaceInstUsesWith(IE, VecOp);
- // If the inserted element was extracted from some other vector, and if the
- // indexes are constant, try to turn this into a shufflevector operation.
- if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
- if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
- unsigned NumInsertVectorElts = IE.getType()->getNumElements();
- unsigned NumExtractVectorElts =
- EI->getOperand(0)->getType()->getVectorNumElements();
- unsigned ExtractedIdx =
- cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
- unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-
- if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
- return replaceInstUsesWith(IE, VecOp);
-
- if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
- return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-
- // If we are extracting a value from a vector, then inserting it right
- // back into the same place, just use the input vector.
- if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
- return replaceInstUsesWith(IE, VecOp);
-
- // If this insertelement isn't used by some other insertelement, turn it
- // (and any insertelements it points to), into one big shuffle.
- if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
- SmallVector<Constant*, 16> Mask;
- ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
-
- // The proposed shuffle may be trivial, in which case we shouldn't
- // perform the combine.
- if (LR.first != &IE && LR.second != &IE) {
- // We now have a shuffle of LHS, RHS, Mask.
- if (LR.second == nullptr)
- LR.second = UndefValue::get(LR.first->getType());
- return new ShuffleVectorInst(LR.first, LR.second,
- ConstantVector::get(Mask));
- }
+ // If the inserted element was extracted from some other vector and both
+ // indexes are constant, try to turn this into a shuffle.
+ uint64_t InsertedIdx, ExtractedIdx;
+ Value *ExtVecOp;
+ if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
+ match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
+ m_ConstantInt(ExtractedIdx)))) {
+ unsigned NumInsertVectorElts = IE.getType()->getNumElements();
+ unsigned NumExtractVectorElts = ExtVecOp->getType()->getVectorNumElements();
+ if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
+ return replaceInstUsesWith(IE, VecOp);
+
+ if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
+ return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+ // If we are extracting a value from a vector, then inserting it right
+ // back into the same place, just use the input vector.
+ if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)
+ return replaceInstUsesWith(IE, VecOp);
+
+ // TODO: Looking at the user(s) to determine if this insert is a
+ // fold-to-shuffle opportunity does not match the usual instcombine
+ // constraints. We should decide if the transform is worthy based only
+ // on this instruction and its operands, but that may not work currently.
+ //
+ // Here, we are trying to avoid creating shuffles before reaching
+ // the end of a chain of extract-insert pairs. This is complicated because
+ // we do not generally form arbitrary shuffle masks in instcombine
+ // (because those may codegen poorly), but collectShuffleElements() does
+ // exactly that.
+ //
+ // The rules for determining what is an acceptable target-independent
+ // shuffle mask are fuzzy because they evolve based on the backend's
+ // capabilities and real-world impact.
+ auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
+ if (!Insert.hasOneUse())
+ return true;
+ auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
+ if (!InsertUser)
+ return true;
+ return false;
+ };
+
+ // Try to form a shuffle from a chain of extract-insert ops.
+ if (isShuffleRootCandidate(IE)) {
+ SmallVector<Constant*, 16> Mask;
+ ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
+
+ // The proposed shuffle may be trivial, in which case we shouldn't
+ // perform the combine.
+ if (LR.first != &IE && LR.second != &IE) {
+ // We now have a shuffle of LHS, RHS, Mask.
+ if (LR.second == nullptr)
+ LR.second = UndefValue::get(LR.first->getType());
+ return new ShuffleVectorInst(LR.first, LR.second,
+ ConstantVector::get(Mask));
}
}
}
@@ -857,7 +953,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
/// Return true if we can evaluate the specified expression tree if the vector
/// elements were shuffled in a different order.
-static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
+static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
unsigned Depth = 5) {
// We can always reorder the elements of a constant.
if (isa<Constant>(V))
@@ -904,8 +1000,15 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::GetElementPtr: {
+ // Bail out if we would create longer vector ops. We could allow creating
+ // longer vector ops, but that may result in more expensive codegen. We
+ // would also need to limit the transform to avoid undefined behavior for
+ // integer div/rem.
+ Type *ITy = I->getType();
+ if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
+ return false;
for (Value *Operand : I->operands()) {
- if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
+ if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
return false;
}
return true;
@@ -925,7 +1028,7 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
SeenOnce = true;
}
}
- return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
+ return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
}
}
return false;
@@ -1009,12 +1112,12 @@ static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
llvm_unreachable("failed to rebuild vector instructions");
}
-Value *
-InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
+static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
// Mask.size() does not need to be equal to the number of vector elements.
assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
Type *EltTy = V->getType()->getScalarType();
+ Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
if (isa<UndefValue>(V))
return UndefValue::get(VectorType::get(EltTy, Mask.size()));
@@ -1025,9 +1128,9 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
SmallVector<Constant *, 16> MaskValues;
for (int i = 0, e = Mask.size(); i != e; ++i) {
if (Mask[i] == -1)
- MaskValues.push_back(UndefValue::get(Builder.getInt32Ty()));
+ MaskValues.push_back(UndefValue::get(I32Ty));
else
- MaskValues.push_back(Builder.getInt32(Mask[i]));
+ MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
}
return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
ConstantVector::get(MaskValues));
@@ -1069,7 +1172,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
SmallVector<Value*, 8> NewOps;
bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
- Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ Value *V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
NewOps.push_back(V);
NeedsRebuild |= (V != I->getOperand(i));
}
@@ -1096,11 +1199,11 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
// If element is not in Mask, no need to handle the operand 1 (element to
// be inserted). Just evaluate values in operand 0 according to Mask.
if (!Found)
- return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
+ return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
- Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
+ Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
return InsertElementInst::Create(V, I->getOperand(1),
- Builder.getInt32(Index), "", I);
+ ConstantInt::get(I32Ty, Index), "", I);
}
}
llvm_unreachable("failed to reorder elements of vector instruction!");
@@ -1350,12 +1453,144 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
return NewBO;
}
+/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
+/// narrowing (concatenating with undef and extracting back to the original
+/// length). This allows replacing the wide select with a narrow select.
+static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
+ InstCombiner::BuilderTy &Builder) {
+ // This must be a narrowing identity shuffle. It extracts the 1st N elements
+ // of the 1st vector operand of a shuffle.
+ if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
+ return nullptr;
+
+ // The vector being shuffled must be a vector select that we can eliminate.
+ // TODO: The one-use requirement could be eased if X and/or Y are constants.
+ Value *Cond, *X, *Y;
+ if (!match(Shuf.getOperand(0),
+ m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
+ return nullptr;
+
+ // We need a narrow condition value. It must be extended with undef elements
+ // and have the same number of elements as this shuffle.
+ unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
+ Value *NarrowCond;
+ if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
+ m_Constant()))) ||
+ NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
+ !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
+ return nullptr;
+
+ // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
+ // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
+ Value *Undef = UndefValue::get(X->getType());
+ Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
+ Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
+ return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
+}
+
+/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
+static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
+ Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
+ if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
+ return nullptr;
+
+ Value *X, *Y;
+ Constant *Mask;
+ if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
+ return nullptr;
+
+ // We are extracting a subvector from a shuffle. Remove excess elements from
+ // the 1st shuffle mask to eliminate the extract.
+ //
+ // This transform is conservatively limited to identity extracts because we do
+ // not allow arbitrary shuffle mask creation as a target-independent transform
+ // (because we can't guarantee that will lower efficiently).
+ //
+ // If the extracting shuffle has an undef mask element, it transfers to the
+ // new shuffle mask. Otherwise, copy the original mask element. Example:
+ // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
+ // shuf X, Y, <C0, undef, C2, undef>
+ unsigned NumElts = Shuf.getType()->getVectorNumElements();
+ SmallVector<Constant *, 16> NewMask(NumElts);
+ assert(NumElts < Mask->getType()->getVectorNumElements() &&
+ "Identity with extract must have less elements than its inputs");
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
+ Constant *MaskElt = Mask->getAggregateElement(i);
+ NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
+ }
+ return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
+}
+
+/// Try to replace a shuffle with an insertelement.
+static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
+ Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
+ SmallVector<int, 16> Mask = Shuf.getShuffleMask();
+
+ // The shuffle must not change vector sizes.
+ // TODO: This restriction could be removed if the insert has only one use
+ // (because the transform would require a new length-changing shuffle).
+ int NumElts = Mask.size();
+ if (NumElts != (int)(V0->getType()->getVectorNumElements()))
+ return nullptr;
+
+ // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
+ auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
+ // We need an insertelement with a constant index.
+ if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
+ m_ConstantInt(IndexC))))
+ return false;
+
+ // Test the shuffle mask to see if it splices the inserted scalar into the
+ // operand 1 vector of the shuffle.
+ int NewInsIndex = -1;
+ for (int i = 0; i != NumElts; ++i) {
+ // Ignore undef mask elements.
+ if (Mask[i] == -1)
+ continue;
+
+ // The shuffle takes elements of operand 1 without lane changes.
+ if (Mask[i] == NumElts + i)
+ continue;
+
+ // The shuffle must choose the inserted scalar exactly once.
+ if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
+ return false;
+
+ // The shuffle is placing the inserted scalar into element i.
+ NewInsIndex = i;
+ }
+
+ assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
+
+ // Index is updated to the potentially translated insertion lane.
+ IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
+ return true;
+ };
+
+ // If the shuffle is unnecessary, insert the scalar operand directly into
+ // operand 1 of the shuffle. Example:
+ // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
+ Value *Scalar;
+ ConstantInt *IndexC;
+ if (isShufflingScalarIntoOp1(Scalar, IndexC))
+ return InsertElementInst::Create(V1, Scalar, IndexC);
+
+ // Try again after commuting shuffle. Example:
+ // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
+ // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
+ std::swap(V0, V1);
+ ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
+ if (isShufflingScalarIntoOp1(Scalar, IndexC))
+ return InsertElementInst::Create(V1, Scalar, IndexC);
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
- SmallVector<int, 16> Mask = SVI.getShuffleMask();
- Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
-
if (auto *V = SimplifyShuffleVectorInst(
LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
return replaceInstUsesWith(SVI, V);
@@ -1363,9 +1598,10 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
return I;
- bool MadeChange = false;
- unsigned VWidth = SVI.getType()->getVectorNumElements();
+ if (Instruction *I = narrowVectorSelect(SVI, Builder))
+ return I;
+ unsigned VWidth = SVI.getType()->getVectorNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
@@ -1374,18 +1610,22 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return &SVI;
}
+ if (Instruction *I = foldIdentityExtractShuffle(SVI))
+ return I;
+
+ // This transform has the potential to lose undef knowledge, so it is
+ // intentionally placed after SimplifyDemandedVectorElts().
+ if (Instruction *I = foldShuffleWithInsert(SVI))
+ return I;
+
+ SmallVector<int, 16> Mask = SVI.getShuffleMask();
+ Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
unsigned LHSWidth = LHS->getType()->getVectorNumElements();
+ bool MadeChange = false;
// Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
// Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
if (LHS == RHS || isa<UndefValue>(LHS)) {
- if (isa<UndefValue>(LHS) && LHS == RHS) {
- // shuffle(undef,undef,mask) -> undef.
- Value *Result = (VWidth == LHSWidth)
- ? LHS : UndefValue::get(SVI.getType());
- return replaceInstUsesWith(SVI, Result);
- }
-
// Remap any references to RHS to use LHS.
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
@@ -1421,8 +1661,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isRHSID) return replaceInstUsesWith(SVI, RHS);
}
- if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
- Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
+ if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
+ Value *V = evaluateInDifferentElementOrder(LHS, Mask);
return replaceInstUsesWith(SVI, V);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index cff0d5447290..be7d43bbcf2c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -57,7 +57,6 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -97,6 +96,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -120,6 +120,10 @@ DEBUG_COUNTER(VisitCounter, "instcombine-visit",
"Controls which instructions are visited");
static cl::opt<bool>
+EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
+ cl::init(true));
+
+static cl::opt<bool>
EnableExpensiveCombines("expensive-combines",
cl::desc("Enable expensive instruction combines"));
@@ -179,7 +183,10 @@ bool InstCombiner::shouldChangeType(unsigned FromWidth,
/// a fundamental type in IR, and there are many specialized optimizations for
/// i1 types.
bool InstCombiner::shouldChangeType(Type *From, Type *To) const {
- assert(From->isIntegerTy() && To->isIntegerTy());
+ // TODO: This could be extended to allow vectors. Datalayout changes might be
+ // needed to properly support that.
+ if (!From->isIntegerTy() || !To->isIntegerTy())
+ return false;
unsigned FromWidth = From->getPrimitiveSizeInBits();
unsigned ToWidth = To->getPrimitiveSizeInBits();
@@ -747,8 +754,9 @@ Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
/// constant zero (which is the 'negate' form).
Value *InstCombiner::dyn_castNegVal(Value *V) const {
- if (BinaryOperator::isNeg(V))
- return BinaryOperator::getNegArgument(V);
+ Value *NegV;
+ if (match(V, m_Neg(m_Value(NegV))))
+ return NegV;
// Constants can be considered to be negated values if they can be folded.
if (ConstantInt *C = dyn_cast<ConstantInt>(V))
@@ -1351,22 +1359,46 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
} while (true);
}
-Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
+Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
if (!Inst.getType()->isVectorTy()) return nullptr;
+ BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
+ unsigned NumElts = cast<VectorType>(Inst.getType())->getNumElements();
+ Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
+ assert(cast<VectorType>(LHS->getType())->getNumElements() == NumElts);
+ assert(cast<VectorType>(RHS->getType())->getNumElements() == NumElts);
+
+ // If both operands of the binop are vector concatenations, then perform the
+ // narrow binop on each pair of the source operands followed by concatenation
+ // of the results.
+ Value *L0, *L1, *R0, *R1;
+ Constant *Mask;
+ if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
+ match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) &&
+ LHS->hasOneUse() && RHS->hasOneUse() &&
+ cast<ShuffleVectorInst>(LHS)->isConcat()) {
+ // This transform does not have the speculative execution constraint as
+ // below because the shuffle is a concatenation. The new binops are
+ // operating on exactly the same elements as the existing binop.
+ // TODO: We could ease the mask requirement to allow different undef lanes,
+ // but that requires an analysis of the binop-with-undef output value.
+ Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
+ if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
+ BO->copyIRFlags(&Inst);
+ Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
+ if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
+ BO->copyIRFlags(&Inst);
+ return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
+ }
+
// It may not be safe to reorder shuffles and things like div, urem, etc.
// because we may trap when executing those ops on unknown vector elements.
// See PR20059.
if (!isSafeToSpeculativelyExecute(&Inst))
return nullptr;
- unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
- Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
- assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
- assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth);
-
auto createBinOpShuffle = [&](Value *X, Value *Y, Constant *M) {
- Value *XY = Builder.CreateBinOp(Inst.getOpcode(), X, Y);
+ Value *XY = Builder.CreateBinOp(Opcode, X, Y);
if (auto *BO = dyn_cast<BinaryOperator>(XY))
BO->copyIRFlags(&Inst);
return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M);
@@ -1375,7 +1407,6 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
// If both arguments of the binary operation are shuffles that use the same
// mask and shuffle within a single vector, move the shuffle after the binop.
Value *V1, *V2;
- Constant *Mask;
if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))) &&
match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(Mask))) &&
V1->getType() == V2->getType() &&
@@ -1393,42 +1424,69 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
if (match(&Inst, m_c_BinOp(
m_OneUse(m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))),
m_Constant(C))) &&
- V1->getType() == Inst.getType()) {
+ V1->getType()->getVectorNumElements() <= NumElts) {
+ assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
+ "Shuffle should not change scalar type");
+
// Find constant NewC that has property:
// shuffle(NewC, ShMask) = C
// If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
// reorder is not possible. A 1-to-1 mapping is not required. Example:
// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
+ bool ConstOp1 = isa<Constant>(RHS);
SmallVector<int, 16> ShMask;
ShuffleVectorInst::getShuffleMask(Mask, ShMask);
- SmallVector<Constant *, 16>
- NewVecC(VWidth, UndefValue::get(C->getType()->getScalarType()));
+ unsigned SrcVecNumElts = V1->getType()->getVectorNumElements();
+ UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
+ SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
bool MayChange = true;
- for (unsigned I = 0; I < VWidth; ++I) {
+ for (unsigned I = 0; I < NumElts; ++I) {
+ Constant *CElt = C->getAggregateElement(I);
if (ShMask[I] >= 0) {
- assert(ShMask[I] < (int)VWidth);
- Constant *CElt = C->getAggregateElement(I);
+ assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
Constant *NewCElt = NewVecC[ShMask[I]];
- if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt)) {
+ // Bail out if:
+ // 1. The constant vector contains a constant expression.
+ // 2. The shuffle needs an element of the constant vector that can't
+ // be mapped to a new constant vector.
+ // 3. This is a widening shuffle that copies elements of V1 into the
+ // extended elements (extending with undef is allowed).
+ if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
+ I >= SrcVecNumElts) {
MayChange = false;
break;
}
NewVecC[ShMask[I]] = CElt;
}
+ // If this is a widening shuffle, we must be able to extend with undef
+ // elements. If the original binop does not produce an undef in the high
+ // lanes, then this transform is not safe.
+ // TODO: We could shuffle those non-undef constant values into the
+ // result by using a constant vector (rather than an undef vector)
+ // as operand 1 of the new binop, but that might be too aggressive
+ // for target-independent shuffle creation.
+ if (I >= SrcVecNumElts) {
+ Constant *MaybeUndef =
+ ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
+ : ConstantExpr::get(Opcode, CElt, UndefScalar);
+ if (!isa<UndefValue>(MaybeUndef)) {
+ MayChange = false;
+ break;
+ }
+ }
}
if (MayChange) {
Constant *NewC = ConstantVector::get(NewVecC);
// It may not be safe to execute a binop on a vector with undef elements
// because the entire instruction can be folded to undef or create poison
// that did not exist in the original code.
- bool ConstOp1 = isa<Constant>(Inst.getOperand(1));
if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
- NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1);
+ NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
- Value *NewLHS = isa<Constant>(LHS) ? NewC : V1;
- Value *NewRHS = isa<Constant>(LHS) ? V1 : NewC;
+ Value *NewLHS = ConstOp1 ? V1 : NewC;
+ Value *NewRHS = ConstOp1 ? NewC : V1;
return createBinOpShuffle(NewLHS, NewRHS, Mask);
}
}
@@ -1436,6 +1494,62 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
return nullptr;
}
+/// Try to narrow the width of a binop if at least 1 operand is an extend of
+/// of a value. This requires a potentially expensive known bits check to make
+/// sure the narrow op does not overflow.
+Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) {
+ // We need at least one extended operand.
+ Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
+
+ // If this is a sub, we swap the operands since we always want an extension
+ // on the RHS. The LHS can be an extension or a constant.
+ if (BO.getOpcode() == Instruction::Sub)
+ std::swap(Op0, Op1);
+
+ Value *X;
+ bool IsSext = match(Op0, m_SExt(m_Value(X)));
+ if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
+ return nullptr;
+
+ // If both operands are the same extension from the same source type and we
+ // can eliminate at least one (hasOneUse), this might work.
+ CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
+ Value *Y;
+ if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
+ cast<Operator>(Op1)->getOpcode() == CastOpc &&
+ (Op0->hasOneUse() || Op1->hasOneUse()))) {
+ // If that did not match, see if we have a suitable constant operand.
+ // Truncating and extending must produce the same constant.
+ Constant *WideC;
+ if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
+ return nullptr;
+ Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
+ if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
+ return nullptr;
+ Y = NarrowC;
+ }
+
+ // Swap back now that we found our operands.
+ if (BO.getOpcode() == Instruction::Sub)
+ std::swap(X, Y);
+
+ // Both operands have narrow versions. Last step: the math must not overflow
+ // in the narrow width.
+ if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
+ return nullptr;
+
+ // bo (ext X), (ext Y) --> ext (bo X, Y)
+ // bo (ext X), C --> ext (bo X, C')
+ Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
+ if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
+ if (IsSext)
+ NewBinOp->setHasNoSignedWrap();
+ else
+ NewBinOp->setHasNoUnsignedWrap();
+ }
+ return CastInst::Create(CastOpc, NarrowBO, BO.getType());
+}
+
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
Type *GEPType = GEP.getType();
@@ -1963,9 +2077,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)) ||
(GEPEltType->isVectorTy() && SrcEltType->isArrayTy() &&
areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)))) {
- GEP.setOperand(0, SrcOp);
- GEP.setSourceElementType(SrcEltType);
- return &GEP;
+
+ // Create a new GEP here, as using `setOperand()` followed by
+ // `setSourceElementType()` won't actually update the type of the
+ // existing GEP Value. Causing issues if this Value is accessed when
+ // constructing an AddrSpaceCastInst
+ Value *NGEP =
+ GEP.isInBounds()
+ ? Builder.CreateInBoundsGEP(nullptr, SrcOp, {Ops[1], Ops[2]})
+ : Builder.CreateGEP(nullptr, SrcOp, {Ops[1], Ops[2]});
+ NGEP->takeName(&GEP);
+
+ // Preserve GEP address space to satisfy users
+ if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+ return new AddrSpaceCastInst(NGEP, GEPType);
+
+ return replaceInstUsesWith(GEP, NGEP);
}
// See if we can simplify:
@@ -2137,14 +2264,21 @@ static bool isAllocSiteRemovable(Instruction *AI,
}
Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
- // If we have a malloc call which is only used in any amount of comparisons
- // to null and free calls, delete the calls and replace the comparisons with
- // true or false as appropriate.
+ // If we have a malloc call which is only used in any amount of comparisons to
+ // null and free calls, delete the calls and replace the comparisons with true
+ // or false as appropriate.
+
+ // This is based on the principle that we can substitute our own allocation
+ // function (which will never return null) rather than knowledge of the
+ // specific function being called. In some sense this can change the permitted
+ // outputs of a program (when we convert a malloc to an alloca, the fact that
+ // the allocation is now on the stack is potentially visible, for example),
+ // but we believe in a permissible manner.
SmallVector<WeakTrackingVH, 64> Users;
// If we are removing an alloca with a dbg.declare, insert dbg.value calls
// before each store.
- TinyPtrVector<DbgInfoIntrinsic *> DIIs;
+ TinyPtrVector<DbgVariableIntrinsic *> DIIs;
std::unique_ptr<DIBuilder> DIB;
if (isa<AllocaInst>(MI)) {
DIIs = FindDbgAddrUses(&MI);
@@ -2215,14 +2349,14 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
/// The move is performed only if the block containing the call to free
/// will be removed, i.e.:
/// 1. it has only one predecessor P, and P has two successors
-/// 2. it contains the call and an unconditional branch
+/// 2. it contains the call, noops, and an unconditional branch
/// 3. its successor is the same as its predecessor's successor
///
/// The profitability is out-of concern here and this function should
/// be called only if the caller knows this transformation would be
/// profitable (e.g., for code size).
-static Instruction *
-tryToMoveFreeBeforeNullTest(CallInst &FI) {
+static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
+ const DataLayout &DL) {
Value *Op = FI.getArgOperand(0);
BasicBlock *FreeInstrBB = FI.getParent();
BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
@@ -2235,20 +2369,34 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) {
return nullptr;
// Validate constraint #2: Does this block contains only the call to
- // free and an unconditional branch?
- // FIXME: We could check if we can speculate everything in the
- // predecessor block
- if (FreeInstrBB->size() != 2)
- return nullptr;
+ // free, noops, and an unconditional branch?
BasicBlock *SuccBB;
- if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
+ Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
+ if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
return nullptr;
+ // If there are only 2 instructions in the block, at this point,
+ // this is the call to free and unconditional.
+ // If there are more than 2 instructions, check that they are noops
+ // i.e., they won't hurt the performance of the generated code.
+ if (FreeInstrBB->size() != 2) {
+ for (const Instruction &Inst : *FreeInstrBB) {
+ if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
+ continue;
+ auto *Cast = dyn_cast<CastInst>(&Inst);
+ if (!Cast || !Cast->isNoopCast(DL))
+ return nullptr;
+ }
+ }
// Validate the rest of constraint #1 by matching on the pred branch.
- TerminatorInst *TI = PredBB->getTerminator();
+ Instruction *TI = PredBB->getTerminator();
BasicBlock *TrueBB, *FalseBB;
ICmpInst::Predicate Pred;
- if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
+ if (!match(TI, m_Br(m_ICmp(Pred,
+ m_CombineOr(m_Specific(Op),
+ m_Specific(Op->stripPointerCasts())),
+ m_Zero()),
+ TrueBB, FalseBB)))
return nullptr;
if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
return nullptr;
@@ -2259,7 +2407,17 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) {
assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
"Broken CFG: missing edge from predecessor to successor");
- FI.moveBefore(TI);
+ // At this point, we know that everything in FreeInstrBB can be moved
+ // before TI.
+ for (BasicBlock::iterator It = FreeInstrBB->begin(), End = FreeInstrBB->end();
+ It != End;) {
+ Instruction &Instr = *It++;
+ if (&Instr == FreeInstrBBTerminator)
+ break;
+ Instr.moveBefore(TI);
+ }
+ assert(FreeInstrBB->size() == 1 &&
+ "Only the branch instruction should remain");
return &FI;
}
@@ -2286,7 +2444,7 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
// into
// free(foo);
if (MinimizeSize)
- if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
+ if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
return I;
return nullptr;
@@ -2379,9 +2537,11 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
// Shrink the condition operand if the new type is smaller than the old type.
- // This may produce a non-standard type for the switch, but that's ok because
- // the backend should extend back to a legal type for the target.
- if (NewWidth > 0 && NewWidth < Known.getBitWidth()) {
+ // But do not shrink to a non-standard type, because backend can't generate
+ // good code for that yet.
+ // TODO: We can make it aggressive again after fixing PR39569.
+ if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
+ shouldChangeType(Known.getBitWidth(), NewWidth)) {
IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
Builder.SetInsertPoint(&SI);
Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
@@ -2902,7 +3062,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() ||
- isa<TerminatorInst>(I))
+ I->isTerminator())
return false;
// Do not sink alloca instructions out of the entry block.
@@ -2934,7 +3094,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
// Also sink all related debug uses from the source basic block. Otherwise we
// get debug use before the def.
- SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
findDbgUsers(DbgUsers, I);
for (auto *DII : DbgUsers) {
if (DII->getParent() == SrcBlock) {
@@ -3000,7 +3160,7 @@ bool InstCombiner::run() {
}
// See if we can trivially sink this instruction to a successor basic block.
- if (I->hasOneUse()) {
+ if (EnableCodeSinking && I->hasOneUse()) {
BasicBlock *BB = I->getParent();
Instruction *UserInst = cast<Instruction>(*I->user_begin());
BasicBlock *UserParent;
@@ -3183,7 +3343,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
// Recursively visit successors. If this is a branch or switch on a
// constant, only visit the reachable successor.
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
@@ -3198,7 +3358,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
}
}
- for (BasicBlock *SuccBB : TI->successors())
+ for (BasicBlock *SuccBB : successors(TI))
Worklist.push_back(SuccBB);
} while (!Worklist.empty());
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6af44354225c..f1558c75cb90 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -109,6 +109,7 @@ static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
+static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
@@ -344,10 +345,14 @@ static cl::opt<uint32_t> ClForceExperiment(
cl::init(0));
static cl::opt<bool>
- ClUsePrivateAliasForGlobals("asan-use-private-alias",
- cl::desc("Use private aliases for global"
- " variables"),
- cl::Hidden, cl::init(false));
+ ClUsePrivateAlias("asan-use-private-alias",
+ cl::desc("Use private aliases for global variables"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+ ClUseOdrIndicator("asan-use-odr-indicator",
+ cl::desc("Use odr indicators to improve ODR reporting"),
+ cl::Hidden, cl::init(false));
static cl::opt<bool>
ClUseGlobalsGC("asan-globals-live-support",
@@ -436,8 +441,11 @@ public:
for (auto MDN : Globals->operands()) {
// Metadata node contains the global and the fields of "Entry".
assert(MDN->getNumOperands() == 5);
- auto *GV = mdconst::extract_or_null<GlobalVariable>(MDN->getOperand(0));
+ auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
// The optimizer may optimize away a global entirely.
+ if (!V) continue;
+ auto *StrippedV = V->stripPointerCasts();
+ auto *GV = dyn_cast<GlobalVariable>(StrippedV);
if (!GV) continue;
// We can already have an entry for GV if it was merged with another
// global.
@@ -538,11 +546,14 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
Mapping.Offset = kPPC64_ShadowOffset64;
else if (IsSystemZ)
Mapping.Offset = kSystemZ_ShadowOffset64;
- else if (IsFreeBSD)
+ else if (IsFreeBSD && !IsMIPS64)
Mapping.Offset = kFreeBSD_ShadowOffset64;
- else if (IsNetBSD)
- Mapping.Offset = kNetBSD_ShadowOffset64;
- else if (IsPS4CPU)
+ else if (IsNetBSD) {
+ if (IsKasan)
+ Mapping.Offset = kNetBSDKasan_ShadowOffset64;
+ else
+ Mapping.Offset = kNetBSD_ShadowOffset64;
+ } else if (IsPS4CPU)
Mapping.Offset = kPS4CPU_ShadowOffset64;
else if (IsLinux && IsX86_64) {
if (IsKasan)
@@ -731,9 +742,12 @@ public:
explicit AddressSanitizerModule(bool CompileKernel = false,
bool Recover = false,
- bool UseGlobalsGC = true)
- : ModulePass(ID),
- UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+ bool UseGlobalsGC = true,
+ bool UseOdrIndicator = false)
+ : ModulePass(ID), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+ // Enable aliases as they should have no downside with ODR indicators.
+ UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias),
+ UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator),
// Not a typo: ClWithComdat is almost completely pointless without
// ClUseGlobalsGC (because then it only works on modules without
// globals, which are rare); it is a prerequisite for ClUseGlobalsGC;
@@ -742,11 +756,10 @@ public:
// ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to
// do globals-gc.
UseCtorComdat(UseGlobalsGC && ClWithComdat) {
- this->Recover = ClRecover.getNumOccurrences() > 0 ?
- ClRecover : Recover;
- this->CompileKernel = ClEnableKasan.getNumOccurrences() > 0 ?
- ClEnableKasan : CompileKernel;
- }
+ this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+ this->CompileKernel =
+ ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+ }
bool runOnModule(Module &M) override;
StringRef getPassName() const override { return "AddressSanitizerModule"; }
@@ -790,6 +803,8 @@ private:
bool CompileKernel;
bool Recover;
bool UseGlobalsGC;
+ bool UsePrivateAlias;
+ bool UseOdrIndicator;
bool UseCtorComdat;
Type *IntptrTy;
LLVMContext *C;
@@ -990,7 +1005,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
if (ID == Intrinsic::localescape) LocalEscapeCall = &II;
if (!ASan.UseAfterScope)
return;
- if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
+ if (!II.isLifetimeStartOrEnd())
return;
// Found lifetime intrinsic, add ASan instrumentation if necessary.
ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
@@ -1089,9 +1104,11 @@ INITIALIZE_PASS(
ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
bool Recover,
- bool UseGlobalsGC) {
+ bool UseGlobalsGC,
+ bool UseOdrIndicator) {
assert(!CompileKernel || Recover);
- return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC);
+ return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC,
+ UseOdrIndicator);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -1100,25 +1117,11 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
return Res;
}
-// Create a constant for Str so that we can pass it to the run-time lib.
-static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
- bool AllowMerging) {
- Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
- // We use private linkage for module-local strings. If they can be merged
- // with another one, we set the unnamed_addr attribute.
- GlobalVariable *GV =
- new GlobalVariable(M, StrConst->getType(), true,
- GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix);
- if (AllowMerging) GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
- return GV;
-}
-
/// Create a global describing a source location.
static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M,
LocationMetadata MD) {
Constant *LocData[] = {
- createPrivateGlobalForString(M, MD.Filename, true),
+ createPrivateGlobalForString(M, MD.Filename, true, kAsanGenPrefix),
ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo),
ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo),
};
@@ -1132,6 +1135,10 @@ static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M,
/// Check if \p G has been created by a trusted compiler pass.
static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
+ // Do not instrument @llvm.global_ctors, @llvm.used, etc.
+ if (G->getName().startswith("llvm."))
+ return true;
+
// Do not instrument asan globals.
if (G->getName().startswith(kAsanGenPrefix) ||
G->getName().startswith(kSanCovGenPrefix) ||
@@ -1379,7 +1386,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
} else {
IRBuilder<> IRB(I);
Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
- TerminatorInst *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
+ Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
InsertBefore = ThenTerm;
}
@@ -1532,8 +1539,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Value *TagCheck =
IRB.CreateICmpEQ(Tag, ConstantInt::get(IntptrTy, kMyriadDDRTag));
- TerminatorInst *TagCheckTerm = SplitBlockAndInsertIfThen(
- TagCheck, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
+ Instruction *TagCheckTerm =
+ SplitBlockAndInsertIfThen(TagCheck, InsertBefore, false,
+ MDBuilder(*C).createBranchWeights(1, 100000));
assert(cast<BranchInst>(TagCheckTerm)->isUnconditional());
IRB.SetInsertPoint(TagCheckTerm);
InsertBefore = TagCheckTerm;
@@ -1549,12 +1557,12 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
size_t Granularity = 1ULL << Mapping.Scale;
- TerminatorInst *CrashTerm = nullptr;
+ Instruction *CrashTerm = nullptr;
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
// We use branch weights for the slow path check, to indicate that the slow
// path is rarely taken. This seems to be the case for SPEC benchmarks.
- TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen(
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
assert(cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
@@ -1653,14 +1661,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
- // Touch only those globals that will not be defined in other modules.
- // Don't handle ODR linkage types and COMDATs since other modules may be built
- // without ASan.
- if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
- G->getLinkage() != GlobalVariable::PrivateLinkage &&
- G->getLinkage() != GlobalVariable::InternalLinkage)
- return false;
- if (G->hasComdat()) return false;
// Two problems with thread-locals:
// - The address of the main thread's copy can't be computed at link-time.
// - Need to poison all copies, not just the main thread's one.
@@ -1668,6 +1668,33 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// For now, just ignore this Global if the alignment is large.
if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false;
+ // For non-COFF targets, only instrument globals known to be defined by this
+ // TU.
+ // FIXME: We can instrument comdat globals on ELF if we are using the
+ // GC-friendly metadata scheme.
+ if (!TargetTriple.isOSBinFormatCOFF()) {
+ if (!G->hasExactDefinition() || G->hasComdat())
+ return false;
+ } else {
+ // On COFF, don't instrument non-ODR linkages.
+ if (G->isInterposable())
+ return false;
+ }
+
+ // If a comdat is present, it must have a selection kind that implies ODR
+ // semantics: no duplicates, any, or exact match.
+ if (Comdat *C = G->getComdat()) {
+ switch (C->getSelectionKind()) {
+ case Comdat::Any:
+ case Comdat::ExactMatch:
+ case Comdat::NoDuplicates:
+ break;
+ case Comdat::Largest:
+ case Comdat::SameSize:
+ return false;
+ }
+ }
+
if (G->hasSection()) {
StringRef Section = G->getSection();
@@ -2082,7 +2109,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
// We shouldn't merge same module names, as this string serves as unique
// module ID in runtime.
GlobalVariable *ModuleName = createPrivateGlobalForString(
- M, M.getModuleIdentifier(), /*AllowMerging*/ false);
+ M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
@@ -2094,7 +2121,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
// if it's available, otherwise just write the name of global variable).
GlobalVariable *Name = createPrivateGlobalForString(
M, MD.Name.empty() ? NameForGlobal : MD.Name,
- /*AllowMerging*/ true);
+ /*AllowMerging*/ true, kAsanGenPrefix);
Type *Ty = G->getValueType();
uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
@@ -2121,7 +2148,12 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer,
"", G, G->getThreadLocalMode());
NewGlobal->copyAttributesFrom(G);
+ NewGlobal->setComdat(G->getComdat());
NewGlobal->setAlignment(MinRZ);
+ // Don't fold globals with redzones. ODR violation detector and redzone
+ // poisoning implicitly creates a dependence on the global's address, so it
+ // is no longer valid for it to be marked unnamed_addr.
+ NewGlobal->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
// Move null-terminated C strings to "__asan_cstring" section on Darwin.
if (TargetTriple.isOSBinFormatMachO() && !G->hasSection() &&
@@ -2162,12 +2194,18 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
bool CanUsePrivateAliases =
TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO() ||
TargetTriple.isOSBinFormatWasm();
- if (CanUsePrivateAliases && ClUsePrivateAliasForGlobals) {
+ if (CanUsePrivateAliases && UsePrivateAlias) {
// Create local alias for NewGlobal to avoid crash on ODR between
// instrumented and non-instrumented libraries.
- auto *GA = GlobalAlias::create(GlobalValue::InternalLinkage,
- NameForGlobal + M.getName(), NewGlobal);
+ InstrumentedGlobal =
+ GlobalAlias::create(GlobalValue::PrivateLinkage, "", NewGlobal);
+ }
+ // ODR should not happen for local linkage.
+ if (NewGlobal->hasLocalLinkage()) {
+ ODRIndicator = ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1),
+ IRB.getInt8PtrTy());
+ } else if (UseOdrIndicator) {
// With local aliases, we need to provide another externally visible
// symbol __odr_asan_XXX to detect ODR violation.
auto *ODRIndicatorSym =
@@ -2181,7 +2219,6 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass());
ODRIndicatorSym->setAlignment(1);
ODRIndicator = ODRIndicatorSym;
- InstrumentedGlobal = GA;
}
Constant *Initializer = ConstantStruct::get(
@@ -2996,7 +3033,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
IntptrPtrTy);
GlobalVariable *StackDescriptionGlobal =
createPrivateGlobalForString(*F.getParent(), DescriptionString,
- /*AllowMerging*/ true);
+ /*AllowMerging*/ true, kAsanGenPrefix);
Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
IRB.CreateStore(Description, BasePlus1);
// Write the PC to redzone[2].
@@ -3054,7 +3091,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
// <This is not a fake stack; unpoison the redzones>
Value *Cmp =
IRBRet.CreateICmpNE(FakeStack, Constant::getNullValue(IntptrTy));
- TerminatorInst *ThenTerm, *ElseTerm;
+ Instruction *ThenTerm, *ElseTerm;
SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm);
IRBuilder<> IRBPoison(ThenTerm);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
index cc9b149d0b6a..e178ef386e68 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
+++ b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
@@ -119,7 +119,7 @@ public:
static const uint32_t CriticalEdgeMultiplier = 1000;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
uint64_t BBWeight =
(BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2);
uint64_t Weight = 2;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index 9606b3da2475..cdcd01726906 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -88,11 +88,10 @@ void CGProfilePass::addModuleFlags(
std::vector<Metadata *> Nodes;
for (auto E : Counts) {
- SmallVector<Metadata *, 3> Vals;
- Vals.push_back(ValueAsMetadata::get(E.first.first));
- Vals.push_back(ValueAsMetadata::get(E.first.second));
- Vals.push_back(MDB.createConstant(
- ConstantInt::get(Type::getInt64Ty(Context), E.second)));
+ Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
+ ValueAsMetadata::get(E.first.second),
+ MDB.createConstant(ConstantInt::get(
+ Type::getInt64Ty(Context), E.second))};
Nodes.push_back(MDNode::get(Context, Vals));
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
new file mode 100644
index 000000000000..1ada0b713092
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -0,0 +1,2074 @@
+//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "chr"
+
+#define CHR_DEBUG(X) LLVM_DEBUG(X)
+
+static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
+ cl::desc("Apply CHR for all functions"));
+
+static cl::opt<double> CHRBiasThreshold(
+ "chr-bias-threshold", cl::init(0.99), cl::Hidden,
+ cl::desc("CHR considers a branch bias greater than this ratio as biased"));
+
+static cl::opt<unsigned> CHRMergeThreshold(
+ "chr-merge-threshold", cl::init(2), cl::Hidden,
+ cl::desc("CHR merges a group of N branches/selects where N >= this value"));
+
+static cl::opt<std::string> CHRModuleList(
+ "chr-module-list", cl::init(""), cl::Hidden,
+ cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
+
+static cl::opt<std::string> CHRFunctionList(
+ "chr-function-list", cl::init(""), cl::Hidden,
+ cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
+
+static StringSet<> CHRModules;
+static StringSet<> CHRFunctions;
+
+static void parseCHRFilterFiles() {
+ if (!CHRModuleList.empty()) {
+ auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
+ if (!FileOrErr) {
+ errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
+ std::exit(1);
+ }
+ StringRef Buf = FileOrErr->get()->getBuffer();
+ SmallVector<StringRef, 0> Lines;
+ Buf.split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ Line = Line.trim();
+ if (!Line.empty())
+ CHRModules.insert(Line);
+ }
+ }
+ if (!CHRFunctionList.empty()) {
+ auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
+ if (!FileOrErr) {
+ errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
+ std::exit(1);
+ }
+ StringRef Buf = FileOrErr->get()->getBuffer();
+ SmallVector<StringRef, 0> Lines;
+ Buf.split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ Line = Line.trim();
+ if (!Line.empty())
+ CHRFunctions.insert(Line);
+ }
+ }
+}
+
+namespace {
+class ControlHeightReductionLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ControlHeightReductionLegacyPass() : FunctionPass(ID) {
+ initializeControlHeightReductionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ parseCHRFilterFiles();
+ }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<RegionInfoPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char ControlHeightReductionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
+ "chr",
+ "Reduce control height in the hot paths",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
+INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
+ "chr",
+ "Reduce control height in the hot paths",
+ false, false)
+
+FunctionPass *llvm::createControlHeightReductionLegacyPass() {
+ return new ControlHeightReductionLegacyPass();
+}
+
+namespace {
+
+struct CHRStats {
+ CHRStats() : NumBranches(0), NumBranchesDelta(0),
+ WeightedNumBranchesDelta(0) {}
+ void print(raw_ostream &OS) const {
+ OS << "CHRStats: NumBranches " << NumBranches
+ << " NumBranchesDelta " << NumBranchesDelta
+ << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
+ }
+ uint64_t NumBranches; // The original number of conditional branches /
+ // selects
+ uint64_t NumBranchesDelta; // The decrease of the number of conditional
+ // branches / selects in the hot paths due to CHR.
+ uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
+ // count at the scope entry.
+};
+
+// RegInfo - some properties of a Region.
+struct RegInfo {
+ RegInfo() : R(nullptr), HasBranch(false) {}
+ RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
+ Region *R;
+ bool HasBranch;
+ SmallVector<SelectInst *, 8> Selects;
+};
+
+typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
+
+// CHRScope - a sequence of regions to CHR together. It corresponds to a
+// sequence of conditional blocks. It can have subscopes which correspond to
+// nested conditional blocks. Nested CHRScopes form a tree.
+class CHRScope {
+ public:
+ CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
+ assert(RI.R && "Null RegionIn");
+ RegInfos.push_back(RI);
+ }
+
+ Region *getParentRegion() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ Region *Parent = RegInfos[0].R->getParent();
+ assert(Parent && "Unexpected to call this on the top-level region");
+ return Parent;
+ }
+
+ BasicBlock *getEntryBlock() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ return RegInfos.front().R->getEntry();
+ }
+
+ BasicBlock *getExitBlock() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ return RegInfos.back().R->getExit();
+ }
+
+ bool appendable(CHRScope *Next) {
+ // The next scope is appendable only if this scope is directly connected to
+ // it (which implies it post-dominates this scope) and this scope dominates
+ // it (no edge to the next scope outside this scope).
+ BasicBlock *NextEntry = Next->getEntryBlock();
+ if (getExitBlock() != NextEntry)
+ // Not directly connected.
+ return false;
+ Region *LastRegion = RegInfos.back().R;
+ for (BasicBlock *Pred : predecessors(NextEntry))
+ if (!LastRegion->contains(Pred))
+ // There's an edge going into the entry of the next scope from outside
+ // of this scope.
+ return false;
+ return true;
+ }
+
+ void append(CHRScope *Next) {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
+ assert(getParentRegion() == Next->getParentRegion() &&
+ "Must be siblings");
+ assert(getExitBlock() == Next->getEntryBlock() &&
+ "Must be adjacent");
+ for (RegInfo &RI : Next->RegInfos)
+ RegInfos.push_back(RI);
+ for (CHRScope *Sub : Next->Subs)
+ Subs.push_back(Sub);
+ }
+
+ void addSub(CHRScope *SubIn) {
+#ifndef NDEBUG
+ bool IsChild = false;
+ for (RegInfo &RI : RegInfos)
+ if (RI.R == SubIn->getParentRegion()) {
+ IsChild = true;
+ break;
+ }
+ assert(IsChild && "Must be a child");
+#endif
+ Subs.push_back(SubIn);
+ }
+
+ // Split this scope at the boundary region into two, which will belong to the
+ // tail and returns the tail.
+ CHRScope *split(Region *Boundary) {
+ assert(Boundary && "Boundary null");
+ assert(RegInfos.begin()->R != Boundary &&
+ "Can't be split at beginning");
+ auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
+ [&Boundary](const RegInfo& RI) {
+ return Boundary == RI.R;
+ });
+ if (BoundaryIt == RegInfos.end())
+ return nullptr;
+ SmallVector<RegInfo, 8> TailRegInfos;
+ SmallVector<CHRScope *, 8> TailSubs;
+ TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
+ RegInfos.resize(BoundaryIt - RegInfos.begin());
+ DenseSet<Region *> TailRegionSet;
+ for (RegInfo &RI : TailRegInfos)
+ TailRegionSet.insert(RI.R);
+ for (auto It = Subs.begin(); It != Subs.end(); ) {
+ CHRScope *Sub = *It;
+ assert(Sub && "null Sub");
+ Region *Parent = Sub->getParentRegion();
+ if (TailRegionSet.count(Parent)) {
+ TailSubs.push_back(Sub);
+ It = Subs.erase(It);
+ } else {
+ assert(std::find_if(RegInfos.begin(), RegInfos.end(),
+ [&Parent](const RegInfo& RI) {
+ return Parent == RI.R;
+ }) != RegInfos.end() &&
+ "Must be in head");
+ ++It;
+ }
+ }
+ assert(HoistStopMap.empty() && "MapHoistStops must be empty");
+ return new CHRScope(TailRegInfos, TailSubs);
+ }
+
+ bool contains(Instruction *I) const {
+ BasicBlock *Parent = I->getParent();
+ for (const RegInfo &RI : RegInfos)
+ if (RI.R->contains(Parent))
+ return true;
+ return false;
+ }
+
+ void print(raw_ostream &OS) const;
+
+ SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
+ SmallVector<CHRScope *, 8> Subs; // Subscopes.
+
+ // The instruction at which to insert the CHR conditional branch (and hoist
+ // the dependent condition values).
+ Instruction *BranchInsertPoint;
+
+ // True-biased and false-biased regions (conditional blocks),
+ // respectively. Used only for the outermost scope and includes regions in
+ // subscopes. The rest are unbiased.
+ DenseSet<Region *> TrueBiasedRegions;
+ DenseSet<Region *> FalseBiasedRegions;
+ // Among the biased regions, the regions that get CHRed.
+ SmallVector<RegInfo, 8> CHRRegions;
+
+ // True-biased and false-biased selects, respectively. Used only for the
+ // outermost scope and includes ones in subscopes.
+ DenseSet<SelectInst *> TrueBiasedSelects;
+ DenseSet<SelectInst *> FalseBiasedSelects;
+
+ // Map from one of the above regions to the instructions to stop
+ // hoisting instructions at through use-def chains.
+ HoistStopMapTy HoistStopMap;
+
+ private:
+ CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
+ SmallVector<CHRScope *, 8> &SubsIn)
+ : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
+};
+
+class CHR {
+ public:
+ CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
+ ProfileSummaryInfo &PSIin, RegionInfo &RIin,
+ OptimizationRemarkEmitter &OREin)
+ : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
+
+ ~CHR() {
+ for (CHRScope *Scope : Scopes) {
+ delete Scope;
+ }
+ }
+
+ bool run();
+
+ private:
+ // See the comments in CHR::run() for the high level flow of the algorithm and
+ // what the following functions do.
+
+ void findScopes(SmallVectorImpl<CHRScope *> &Output) {
+ Region *R = RI.getTopLevelRegion();
+ CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
+ if (Scope) {
+ Output.push_back(Scope);
+ }
+ }
+ CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+ SmallVectorImpl<CHRScope *> &Scopes);
+ CHRScope *findScope(Region *R);
+ void checkScopeHoistable(CHRScope *Scope);
+
+ void splitScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+ SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
+ CHRScope *Outer,
+ DenseSet<Value *> *OuterConditionValues,
+ Instruction *OuterInsertPoint,
+ SmallVectorImpl<CHRScope *> &Output,
+ DenseSet<Instruction *> &Unhoistables);
+
+ void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
+ void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
+
+ void filterScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+
+ void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+ void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
+
+ void sortScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+
+ void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
+ void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
+ void cloneScopeBlocks(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BasicBlock *ExitBlock,
+ Region *LastRegion,
+ ValueToValueMapTy &VMap);
+ BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
+ BasicBlock *EntryBlock,
+ BasicBlock *NewEntryBlock,
+ ValueToValueMapTy &VMap);
+ void fixupBranchesAndSelects(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BranchInst *MergedBR,
+ uint64_t ProfileCount);
+ void fixupBranch(Region *R,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition, BranchProbability &CHRBranchBias);
+ void fixupSelect(SelectInst* SI,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition, BranchProbability &CHRBranchBias);
+ void addToMergedCondition(bool IsTrueBiased, Value *Cond,
+ Instruction *BranchOrSelect,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition);
+
+ Function &F;
+ BlockFrequencyInfo &BFI;
+ DominatorTree &DT;
+ ProfileSummaryInfo &PSI;
+ RegionInfo &RI;
+ OptimizationRemarkEmitter &ORE;
+ CHRStats Stats;
+
+ // All the true-biased regions in the function
+ DenseSet<Region *> TrueBiasedRegionsGlobal;
+ // All the false-biased regions in the function
+ DenseSet<Region *> FalseBiasedRegionsGlobal;
+ // All the true-biased selects in the function
+ DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
+ // All the false-biased selects in the function
+ DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
+ // A map from biased regions to their branch bias
+ DenseMap<Region *, BranchProbability> BranchBiasMap;
+ // A map from biased selects to their branch bias
+ DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
+ // All the scopes.
+ DenseSet<CHRScope *> Scopes;
+};
+
+} // end anonymous namespace
+
+static inline
+raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
+ const CHRStats &Stats) {
+ Stats.print(OS);
+ return OS;
+}
+
+static inline
+raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
+ Scope.print(OS);
+ return OS;
+}
+
+static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
+ if (ForceCHR)
+ return true;
+
+ if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
+ if (CHRModules.count(F.getParent()->getName()))
+ return true;
+ return CHRFunctions.count(F.getName());
+ }
+
+ assert(PSI.hasProfileSummary() && "Empty PSI?");
+ return PSI.isFunctionEntryHot(&F);
+}
+
+static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
+ CHRStats *Stats) {
+ StringRef FuncName = F.getName();
+ StringRef ModuleName = F.getParent()->getName();
+ (void)(FuncName); // Unused in release build.
+ (void)(ModuleName); // Unused in release build.
+ CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
+ << FuncName);
+ if (Stats)
+ CHR_DEBUG(dbgs() << " " << *Stats);
+ CHR_DEBUG(dbgs() << "\n");
+ CHR_DEBUG(F.dump());
+}
+
+void CHRScope::print(raw_ostream &OS) const {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ OS << "CHRScope[";
+ OS << RegInfos.size() << ", Regions[";
+ for (const RegInfo &RI : RegInfos) {
+ OS << RI.R->getNameStr();
+ if (RI.HasBranch)
+ OS << " B";
+ if (RI.Selects.size() > 0)
+ OS << " S" << RI.Selects.size();
+ OS << ", ";
+ }
+ if (RegInfos[0].R->getParent()) {
+ OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
+ } else {
+ // top level region
+ OS << "]";
+ }
+ OS << ", Subs[";
+ for (CHRScope *Sub : Subs) {
+ OS << *Sub << ", ";
+ }
+ OS << "]]";
+}
+
+// Return true if the given instruction type can be hoisted by CHR.
+static bool isHoistableInstructionType(Instruction *I) {
+ return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
+ isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+ isa<InsertValueInst>(I);
+}
+
+// Return true if the given instruction can be hoisted by CHR.
+static bool isHoistable(Instruction *I, DominatorTree &DT) {
+ if (!isHoistableInstructionType(I))
+ return false;
+ return isSafeToSpeculativelyExecute(I, nullptr, &DT);
+}
+
+// Recursively traverse the use-def chains of the given value and return a set
+// of the unhoistable base values defined within the scope (excluding the
+// first-region entry block) or the (hoistable or unhoistable) base values that
+// are defined outside (including the first-region entry block) of the
+// scope. The returned set doesn't include constants.
+static std::set<Value *> getBaseValues(Value *V,
+ DominatorTree &DT) {
+ std::set<Value *> Result;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // We don't stop at a block that's not in the Scope because we would miss some
+ // instructions that are based on the same base values if we stop there.
+ if (!isHoistable(I, DT)) {
+ Result.insert(I);
+ return Result;
+ }
+ // I is hoistable above the Scope.
+ for (Value *Op : I->operands()) {
+ std::set<Value *> OpResult = getBaseValues(Op, DT);
+ Result.insert(OpResult.begin(), OpResult.end());
+ }
+ return Result;
+ }
+ if (isa<Argument>(V)) {
+ Result.insert(V);
+ return Result;
+ }
+ // We don't include others like constants because those won't lead to any
+ // chance of folding of conditions (eg two bit checks merged into one check)
+ // after CHR.
+ return Result; // empty
+}
+
+// Return true if V is already hoisted or can be hoisted (along with its
+// operands) above the insert point. When it returns true and HoistStops is
+// non-null, the instructions to stop hoisting at through the use-def chains are
+// inserted into HoistStops.
+static bool
+checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
+ DenseSet<Instruction *> &Unhoistables,
+ DenseSet<Instruction *> *HoistStops) {
+ assert(InsertPoint && "Null InsertPoint");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
+ assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
+ if (Unhoistables.count(I)) {
+ // Don't hoist if they are not to be hoisted.
+ return false;
+ }
+ if (DT.dominates(I, InsertPoint)) {
+ // We are already above the insert point. Stop here.
+ if (HoistStops)
+ HoistStops->insert(I);
+ return true;
+ }
+ // We aren't not above the insert point, check if we can hoist it above the
+ // insert point.
+ if (isHoistable(I, DT)) {
+ // Check operands first.
+ DenseSet<Instruction *> OpsHoistStops;
+ bool AllOpsHoisted = true;
+ for (Value *Op : I->operands()) {
+ if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
+ AllOpsHoisted = false;
+ break;
+ }
+ }
+ if (AllOpsHoisted) {
+ CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
+ if (HoistStops)
+ HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+ return true;
+ }
+ }
+ return false;
+ }
+ // Non-instructions are considered hoistable.
+ return true;
+}
+
+// Returns true and sets the true probability and false probability of an
+// MD_prof metadata if it's well-formed.
+static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
+ BranchProbability &FalseProb) {
+ if (!MD) return false;
+ MDString *MDName = cast<MDString>(MD->getOperand(0));
+ if (MDName->getString() != "branch_weights" ||
+ MD->getNumOperands() != 3)
+ return false;
+ ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
+ ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
+ if (!TrueWeight || !FalseWeight)
+ return false;
+ uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
+ uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
+ uint64_t SumWt = TrueWt + FalseWt;
+
+ assert(SumWt >= TrueWt && SumWt >= FalseWt &&
+ "Overflow calculating branch probabilities.");
+
+ TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
+ FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
+ return true;
+}
+
+static BranchProbability getCHRBiasThreshold() {
+ return BranchProbability::getBranchProbability(
+ static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
+}
+
+// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
+// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
+// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
+// false.
+template <typename K, typename S, typename M>
+static bool checkBias(K *Key, BranchProbability TrueProb,
+ BranchProbability FalseProb, S &TrueSet, S &FalseSet,
+ M &BiasMap) {
+ BranchProbability Threshold = getCHRBiasThreshold();
+ if (TrueProb >= Threshold) {
+ TrueSet.insert(Key);
+ BiasMap[Key] = TrueProb;
+ return true;
+ } else if (FalseProb >= Threshold) {
+ FalseSet.insert(Key);
+ BiasMap[Key] = FalseProb;
+ return true;
+ }
+ return false;
+}
+
+// Returns true and insert a region into the right biased set and the map if the
+// branch of the region is biased.
+static bool checkBiasedBranch(BranchInst *BI, Region *R,
+ DenseSet<Region *> &TrueBiasedRegionsGlobal,
+ DenseSet<Region *> &FalseBiasedRegionsGlobal,
+ DenseMap<Region *, BranchProbability> &BranchBiasMap) {
+ if (!BI->isConditional())
+ return false;
+ BranchProbability ThenProb, ElseProb;
+ if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
+ ThenProb, ElseProb))
+ return false;
+ BasicBlock *IfThen = BI->getSuccessor(0);
+ BasicBlock *IfElse = BI->getSuccessor(1);
+ assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
+ IfThen != IfElse &&
+ "Invariant from findScopes");
+ if (IfThen == R->getExit()) {
+ // Swap them so that IfThen/ThenProb means going into the conditional code
+ // and IfElse/ElseProb means skipping it.
+ std::swap(IfThen, IfElse);
+ std::swap(ThenProb, ElseProb);
+ }
+ CHR_DEBUG(dbgs() << "BI " << *BI << " ");
+ CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
+ CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
+ return checkBias(R, ThenProb, ElseProb,
+ TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+ BranchBiasMap);
+}
+
+// Returns true and insert a select into the right biased set and the map if the
+// select is biased.
+static bool checkBiasedSelect(
+ SelectInst *SI, Region *R,
+ DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
+ DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
+ DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
+ BranchProbability TrueProb, FalseProb;
+ if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
+ TrueProb, FalseProb))
+ return false;
+ CHR_DEBUG(dbgs() << "SI " << *SI << " ");
+ CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
+ CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
+ return checkBias(SI, TrueProb, FalseProb,
+ TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
+ SelectBiasMap);
+}
+
+// Returns the instruction at which to hoist the dependent condition values and
+// insert the CHR branch for a region. This is the terminator branch in the
+// entry block or the first select in the entry block, if any.
+static Instruction* getBranchInsertPoint(RegInfo &RI) {
+ Region *R = RI.R;
+ BasicBlock *EntryBB = R->getEntry();
+ // The hoist point is by default the terminator of the entry block, which is
+ // the same as the branch instruction if RI.HasBranch is true.
+ Instruction *HoistPoint = EntryBB->getTerminator();
+ for (SelectInst *SI : RI.Selects) {
+ if (SI->getParent() == EntryBB) {
+ // Pick the first select in Selects in the entry block. Note Selects is
+ // sorted in the instruction order within a block (asserted below).
+ HoistPoint = SI;
+ break;
+ }
+ }
+ assert(HoistPoint && "Null HoistPoint");
+#ifndef NDEBUG
+ // Check that HoistPoint is the first one in Selects in the entry block,
+ // if any.
+ DenseSet<Instruction *> EntryBlockSelectSet;
+ for (SelectInst *SI : RI.Selects) {
+ if (SI->getParent() == EntryBB) {
+ EntryBlockSelectSet.insert(SI);
+ }
+ }
+ for (Instruction &I : *EntryBB) {
+ if (EntryBlockSelectSet.count(&I) > 0) {
+ assert(&I == HoistPoint &&
+ "HoistPoint must be the first one in Selects");
+ break;
+ }
+ }
+#endif
+ return HoistPoint;
+}
+
+// Find a CHR scope in the given region.
+CHRScope * CHR::findScope(Region *R) {
+ CHRScope *Result = nullptr;
+ BasicBlock *Entry = R->getEntry();
+ BasicBlock *Exit = R->getExit(); // null if top level.
+ assert(Entry && "Entry must not be null");
+ assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
+ "Only top level region has a null exit");
+ if (Entry)
+ CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "Entry null\n");
+ if (Exit)
+ CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "Exit null\n");
+ // Exclude cases where Entry is part of a subregion (hence it doesn't belong
+ // to this region).
+ bool EntryInSubregion = RI.getRegionFor(Entry) != R;
+ if (EntryInSubregion)
+ return nullptr;
+ // Exclude loops
+ for (BasicBlock *Pred : predecessors(Entry))
+ if (R->contains(Pred))
+ return nullptr;
+ if (Exit) {
+ // Try to find an if-then block (check if R is an if-then).
+ // if (cond) {
+ // ...
+ // }
+ auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
+ if (BI)
+ CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "BI null\n");
+ if (BI && BI->isConditional()) {
+ BasicBlock *S0 = BI->getSuccessor(0);
+ BasicBlock *S1 = BI->getSuccessor(1);
+ CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
+ CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
+ if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
+ RegInfo RI(R);
+ RI.HasBranch = checkBiasedBranch(
+ BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+ BranchBiasMap);
+ Result = new CHRScope(RI);
+ Scopes.insert(Result);
+ CHR_DEBUG(dbgs() << "Found a region with a branch\n");
+ ++Stats.NumBranches;
+ if (!RI.HasBranch) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
+ << "Branch not biased";
+ });
+ }
+ }
+ }
+ }
+ {
+ // Try to look for selects in the direct child blocks (as opposed to in
+ // subregions) of R.
+ // ...
+ // if (..) { // Some subregion
+ // ...
+ // }
+ // if (..) { // Some subregion
+ // ...
+ // }
+ // ...
+ // a = cond ? b : c;
+ // ...
+ SmallVector<SelectInst *, 8> Selects;
+ for (RegionNode *E : R->elements()) {
+ if (E->isSubRegion())
+ continue;
+ // This returns the basic block of E if E is a direct child of R (not a
+ // subregion.)
+ BasicBlock *BB = E->getEntry();
+ // Need to push in the order to make it easier to find the first Select
+ // later.
+ for (Instruction &I : *BB) {
+ if (auto *SI = dyn_cast<SelectInst>(&I)) {
+ Selects.push_back(SI);
+ ++Stats.NumBranches;
+ }
+ }
+ }
+ if (Selects.size() > 0) {
+ auto AddSelects = [&](RegInfo &RI) {
+ for (auto *SI : Selects)
+ if (checkBiasedSelect(SI, RI.R,
+ TrueBiasedSelectsGlobal,
+ FalseBiasedSelectsGlobal,
+ SelectBiasMap))
+ RI.Selects.push_back(SI);
+ else
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
+ << "Select not biased";
+ });
+ };
+ if (!Result) {
+ CHR_DEBUG(dbgs() << "Found a select-only region\n");
+ RegInfo RI(R);
+ AddSelects(RI);
+ Result = new CHRScope(RI);
+ Scopes.insert(Result);
+ } else {
+ CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
+ AddSelects(Result->RegInfos[0]);
+ }
+ }
+ }
+
+ if (Result) {
+ checkScopeHoistable(Result);
+ }
+ return Result;
+}
+
+// Check that any of the branch and the selects in the region could be
+// hoisted above the the CHR branch insert point (the most dominating of
+// them, either the branch (at the end of the first block) or the first
+// select in the first block). If the branch can't be hoisted, drop the
+// selects in the first blocks.
+//
+// For example, for the following scope/region with selects, we want to insert
+// the merged branch right before the first select in the first/entry block by
+// hoisting c1, c2, c3, and c4.
+//
+// // Branch insert point here.
+// a = c1 ? b : c; // Select 1
+// d = c2 ? e : f; // Select 2
+// if (c3) { // Branch
+// ...
+// c4 = foo() // A call.
+// g = c4 ? h : i; // Select 3
+// }
+//
+// But suppose we can't hoist c4 because it's dependent on the preceding
+// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
+// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
+void CHR::checkScopeHoistable(CHRScope *Scope) {
+ RegInfo &RI = Scope->RegInfos[0];
+ Region *R = RI.R;
+ BasicBlock *EntryBB = R->getEntry();
+ auto *Branch = RI.HasBranch ?
+ cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
+ SmallVector<SelectInst *, 8> &Selects = RI.Selects;
+ if (RI.HasBranch || !Selects.empty()) {
+ Instruction *InsertPoint = getBranchInsertPoint(RI);
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+ // Avoid a data dependence from a select or a branch to a(nother)
+ // select. Note no instruction can't data-depend on a branch (a branch
+ // instruction doesn't produce a value).
+ DenseSet<Instruction *> Unhoistables;
+ // Initialize Unhoistables with the selects.
+ for (SelectInst *SI : Selects) {
+ Unhoistables.insert(SI);
+ }
+ // Remove Selects that can't be hoisted.
+ for (auto it = Selects.begin(); it != Selects.end(); ) {
+ SelectInst *SI = *it;
+ if (SI == InsertPoint) {
+ ++it;
+ continue;
+ }
+ bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr);
+ if (!IsHoistable) {
+ CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "DropUnhoistableSelect", SI)
+ << "Dropped unhoistable select";
+ });
+ it = Selects.erase(it);
+ // Since we are dropping the select here, we also drop it from
+ // Unhoistables.
+ Unhoistables.erase(SI);
+ } else
+ ++it;
+ }
+ // Update InsertPoint after potentially removing selects.
+ InsertPoint = getBranchInsertPoint(RI);
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+ if (RI.HasBranch && InsertPoint != Branch) {
+ bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr);
+ if (!IsHoistable) {
+ // If the branch isn't hoistable, drop the selects in the entry
+ // block, preferring the branch, which makes the branch the hoist
+ // point.
+ assert(InsertPoint != Branch && "Branch must not be the hoist point");
+ CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
+ CHR_DEBUG(
+ for (SelectInst *SI : Selects) {
+ dbgs() << "SI " << *SI << "\n";
+ });
+ for (SelectInst *SI : Selects) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "DropSelectUnhoistableBranch", SI)
+ << "Dropped select due to unhoistable branch";
+ });
+ }
+ Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
+ [EntryBB](SelectInst *SI) {
+ return SI->getParent() == EntryBB;
+ }), Selects.end());
+ Unhoistables.clear();
+ InsertPoint = Branch;
+ }
+ }
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+#ifndef NDEBUG
+ if (RI.HasBranch) {
+ assert(!DT.dominates(Branch, InsertPoint) &&
+ "Branch can't be already above the hoist point");
+ assert(checkHoistValue(Branch->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr) &&
+ "checkHoistValue for branch");
+ }
+ for (auto *SI : Selects) {
+ assert(!DT.dominates(SI, InsertPoint) &&
+ "SI can't be already above the hoist point");
+ assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
+ Unhoistables, nullptr) &&
+ "checkHoistValue for selects");
+ }
+ CHR_DEBUG(dbgs() << "Result\n");
+ if (RI.HasBranch) {
+ CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
+ }
+ for (auto *SI : Selects) {
+ CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
+ }
+#endif
+ }
+}
+
+// Traverse the region tree, find all nested scopes and merge them if possible.
+CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+ SmallVectorImpl<CHRScope *> &Scopes) {
+ CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
+ CHRScope *Result = findScope(R);
+ // Visit subscopes.
+ CHRScope *ConsecutiveSubscope = nullptr;
+ SmallVector<CHRScope *, 8> Subscopes;
+ for (auto It = R->begin(); It != R->end(); ++It) {
+ const std::unique_ptr<Region> &SubR = *It;
+ auto NextIt = std::next(It);
+ Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
+ CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
+ << "\n");
+ CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
+ if (SubCHRScope) {
+ CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
+ } else {
+ CHR_DEBUG(dbgs() << "Subregion Scope null\n");
+ }
+ if (SubCHRScope) {
+ if (!ConsecutiveSubscope)
+ ConsecutiveSubscope = SubCHRScope;
+ else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ ConsecutiveSubscope = SubCHRScope;
+ } else
+ ConsecutiveSubscope->append(SubCHRScope);
+ } else {
+ if (ConsecutiveSubscope) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ }
+ ConsecutiveSubscope = nullptr;
+ }
+ }
+ if (ConsecutiveSubscope) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ }
+ for (CHRScope *Sub : Subscopes) {
+ if (Result) {
+ // Combine it with the parent.
+ Result->addSub(Sub);
+ } else {
+ // Push Subscopes as they won't be combined with the parent.
+ Scopes.push_back(Sub);
+ }
+ }
+ return Result;
+}
+
+static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
+ DenseSet<Value *> ConditionValues;
+ if (RI.HasBranch) {
+ auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
+ ConditionValues.insert(BI->getCondition());
+ }
+ for (SelectInst *SI : RI.Selects) {
+ ConditionValues.insert(SI->getCondition());
+ }
+ return ConditionValues;
+}
+
+
+// Determine whether to split a scope depending on the sets of the branch
+// condition values of the previous region and the current region. We split
+// (return true) it if 1) the condition values of the inner/lower scope can't be
+// hoisted up to the outer/upper scope, or 2) the two sets of the condition
+// values have an empty intersection (because the combined branch conditions
+// won't probably lead to a simpler combined condition).
+static bool shouldSplit(Instruction *InsertPoint,
+ DenseSet<Value *> &PrevConditionValues,
+ DenseSet<Value *> &ConditionValues,
+ DominatorTree &DT,
+ DenseSet<Instruction *> &Unhoistables) {
+ CHR_DEBUG(
+ dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
+ for (Value *V : PrevConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << " ConditionValues ";
+ for (Value *V : ConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ assert(InsertPoint && "Null InsertPoint");
+ // If any of Bases isn't hoistable to the hoist point, split.
+ for (Value *V : ConditionValues) {
+ if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
+ CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
+ return true; // Not hoistable, split.
+ }
+ }
+ // If PrevConditionValues or ConditionValues is empty, don't split to avoid
+ // unnecessary splits at scopes with no branch/selects. If
+ // PrevConditionValues and ConditionValues don't intersect at all, split.
+ if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
+ // Use std::set as DenseSet doesn't work with set_intersection.
+ std::set<Value *> PrevBases, Bases;
+ for (Value *V : PrevConditionValues) {
+ std::set<Value *> BaseValues = getBaseValues(V, DT);
+ PrevBases.insert(BaseValues.begin(), BaseValues.end());
+ }
+ for (Value *V : ConditionValues) {
+ std::set<Value *> BaseValues = getBaseValues(V, DT);
+ Bases.insert(BaseValues.begin(), BaseValues.end());
+ }
+ CHR_DEBUG(
+ dbgs() << "PrevBases ";
+ for (Value *V : PrevBases) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << " Bases ";
+ for (Value *V : Bases) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ std::set<Value *> Intersection;
+ std::set_intersection(PrevBases.begin(), PrevBases.end(),
+ Bases.begin(), Bases.end(),
+ std::inserter(Intersection, Intersection.begin()));
+ if (Intersection.empty()) {
+ // Empty intersection, split.
+ CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
+ return true;
+ }
+ }
+ CHR_DEBUG(dbgs() << "No split\n");
+ return false; // Don't split.
+}
+
+static void getSelectsInScope(CHRScope *Scope,
+ DenseSet<Instruction *> &Output) {
+ for (RegInfo &RI : Scope->RegInfos)
+ for (SelectInst *SI : RI.Selects)
+ Output.insert(SI);
+ for (CHRScope *Sub : Scope->Subs)
+ getSelectsInScope(Sub, Output);
+}
+
+void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ assert(!Scope->BranchInsertPoint &&
+ "BranchInsertPoint must not be set");
+ DenseSet<Instruction *> Unhoistables;
+ getSelectsInScope(Scope, Unhoistables);
+ splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
+ }
+#ifndef NDEBUG
+ for (CHRScope *Scope : Output) {
+ assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
+ }
+#endif
+}
+
+SmallVector<CHRScope *, 8> CHR::splitScope(
+ CHRScope *Scope,
+ CHRScope *Outer,
+ DenseSet<Value *> *OuterConditionValues,
+ Instruction *OuterInsertPoint,
+ SmallVectorImpl<CHRScope *> &Output,
+ DenseSet<Instruction *> &Unhoistables) {
+ if (Outer) {
+ assert(OuterConditionValues && "Null OuterConditionValues");
+ assert(OuterInsertPoint && "Null OuterInsertPoint");
+ }
+ bool PrevSplitFromOuter = true;
+ DenseSet<Value *> PrevConditionValues;
+ Instruction *PrevInsertPoint = nullptr;
+ SmallVector<CHRScope *, 8> Splits;
+ SmallVector<bool, 8> SplitsSplitFromOuter;
+ SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
+ SmallVector<Instruction *, 8> SplitsInsertPoints;
+ SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
+ for (RegInfo &RI : RegInfos) {
+ Instruction *InsertPoint = getBranchInsertPoint(RI);
+ DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
+ CHR_DEBUG(
+ dbgs() << "ConditionValues ";
+ for (Value *V : ConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ if (RI.R == RegInfos[0].R) {
+ // First iteration. Check to see if we should split from the outer.
+ if (Outer) {
+ CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
+ CHR_DEBUG(dbgs() << "Should split from outer at "
+ << RI.R->getNameStr() << "\n");
+ if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
+ ConditionValues, DT, Unhoistables)) {
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "SplitScopeFromOuter",
+ RI.R->getEntry()->getTerminator())
+ << "Split scope from outer due to unhoistable branch/select "
+ << "and/or lack of common condition values";
+ });
+ } else {
+ // Not splitting from the outer. Use the outer bases and insert
+ // point. Union the bases.
+ PrevSplitFromOuter = false;
+ PrevConditionValues = *OuterConditionValues;
+ PrevConditionValues.insert(ConditionValues.begin(),
+ ConditionValues.end());
+ PrevInsertPoint = OuterInsertPoint;
+ }
+ } else {
+ CHR_DEBUG(dbgs() << "Outer null\n");
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ }
+ } else {
+ CHR_DEBUG(dbgs() << "Should split from prev at "
+ << RI.R->getNameStr() << "\n");
+ if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
+ DT, Unhoistables)) {
+ CHRScope *Tail = Scope->split(RI.R);
+ Scopes.insert(Tail);
+ Splits.push_back(Scope);
+ SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+ SplitsConditionValues.push_back(PrevConditionValues);
+ SplitsInsertPoints.push_back(PrevInsertPoint);
+ Scope = Tail;
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ PrevSplitFromOuter = true;
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "SplitScopeFromPrev",
+ RI.R->getEntry()->getTerminator())
+ << "Split scope from previous due to unhoistable branch/select "
+ << "and/or lack of common condition values";
+ });
+ } else {
+ // Not splitting. Union the bases. Keep the hoist point.
+ PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
+ }
+ }
+ }
+ Splits.push_back(Scope);
+ SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+ SplitsConditionValues.push_back(PrevConditionValues);
+ assert(PrevInsertPoint && "Null PrevInsertPoint");
+ SplitsInsertPoints.push_back(PrevInsertPoint);
+ assert(Splits.size() == SplitsConditionValues.size() &&
+ Splits.size() == SplitsSplitFromOuter.size() &&
+ Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
+ for (size_t I = 0; I < Splits.size(); ++I) {
+ CHRScope *Split = Splits[I];
+ DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
+ Instruction *SplitInsertPoint = SplitsInsertPoints[I];
+ SmallVector<CHRScope *, 8> NewSubs;
+ DenseSet<Instruction *> SplitUnhoistables;
+ getSelectsInScope(Split, SplitUnhoistables);
+ for (CHRScope *Sub : Split->Subs) {
+ SmallVector<CHRScope *, 8> SubSplits = splitScope(
+ Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
+ SplitUnhoistables);
+ NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
+ }
+ Split->Subs = NewSubs;
+ }
+ SmallVector<CHRScope *, 8> Result;
+ for (size_t I = 0; I < Splits.size(); ++I) {
+ CHRScope *Split = Splits[I];
+ if (SplitsSplitFromOuter[I]) {
+ // Split from the outer.
+ Output.push_back(Split);
+ Split->BranchInsertPoint = SplitsInsertPoints[I];
+ CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
+ << "\n");
+ } else {
+ // Connected to the outer.
+ Result.push_back(Split);
+ }
+ }
+ if (!Outer)
+ assert(Result.empty() &&
+ "If no outer (top-level), must return no nested ones");
+ return Result;
+}
+
+void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
+ for (CHRScope *Scope : Scopes) {
+ assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
+ classifyBiasedScopes(Scope, Scope);
+ CHR_DEBUG(
+ dbgs() << "classifyBiasedScopes " << *Scope << "\n";
+ dbgs() << "TrueBiasedRegions ";
+ for (Region *R : Scope->TrueBiasedRegions) {
+ dbgs() << R->getNameStr() << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "FalseBiasedRegions ";
+ for (Region *R : Scope->FalseBiasedRegions) {
+ dbgs() << R->getNameStr() << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "TrueBiasedSelects ";
+ for (SelectInst *SI : Scope->TrueBiasedSelects) {
+ dbgs() << *SI << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "FalseBiasedSelects ";
+ for (SelectInst *SI : Scope->FalseBiasedSelects) {
+ dbgs() << *SI << ", ";
+ }
+ dbgs() << "\n";);
+ }
+}
+
+void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
+ for (RegInfo &RI : Scope->RegInfos) {
+ if (RI.HasBranch) {
+ Region *R = RI.R;
+ if (TrueBiasedRegionsGlobal.count(R) > 0)
+ OutermostScope->TrueBiasedRegions.insert(R);
+ else if (FalseBiasedRegionsGlobal.count(R) > 0)
+ OutermostScope->FalseBiasedRegions.insert(R);
+ else
+ llvm_unreachable("Must be biased");
+ }
+ for (SelectInst *SI : RI.Selects) {
+ if (TrueBiasedSelectsGlobal.count(SI) > 0)
+ OutermostScope->TrueBiasedSelects.insert(SI);
+ else if (FalseBiasedSelectsGlobal.count(SI) > 0)
+ OutermostScope->FalseBiasedSelects.insert(SI);
+ else
+ llvm_unreachable("Must be biased");
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs) {
+ classifyBiasedScopes(Sub, OutermostScope);
+ }
+}
+
+static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
+ unsigned NumBiased = Scope->TrueBiasedRegions.size() +
+ Scope->FalseBiasedRegions.size() +
+ Scope->TrueBiasedSelects.size() +
+ Scope->FalseBiasedSelects.size();
+ return NumBiased >= CHRMergeThreshold;
+}
+
+void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ // Filter out the ones with only one region and no subs.
+ if (!hasAtLeastTwoBiasedBranches(Scope)) {
+ CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
+ << Scope->TrueBiasedRegions.size()
+ << " falsy-regions " << Scope->FalseBiasedRegions.size()
+ << " true-selects " << Scope->TrueBiasedSelects.size()
+ << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(
+ DEBUG_TYPE,
+ "DropScopeWithOneBranchOrSelect",
+ Scope->RegInfos[0].R->getEntry()->getTerminator())
+ << "Drop scope with < "
+ << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
+ << " biased branch(es) or select(s)";
+ });
+ continue;
+ }
+ Output.push_back(Scope);
+ }
+}
+
+void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
+ "Empty");
+ setCHRRegions(Scope, Scope);
+ Output.push_back(Scope);
+ CHR_DEBUG(
+ dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
+ for (auto pair : Scope->HoistStopMap) {
+ Region *R = pair.first;
+ dbgs() << "Region " << R->getNameStr() << "\n";
+ for (Instruction *I : pair.second) {
+ dbgs() << "HoistStop " << *I << "\n";
+ }
+ }
+ dbgs() << "CHRRegions" << "\n";
+ for (RegInfo &RI : Scope->CHRRegions) {
+ dbgs() << RI.R->getNameStr() << "\n";
+ });
+ }
+}
+
+void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
+ DenseSet<Instruction *> Unhoistables;
+ // Put the biased selects in Unhoistables because they should stay where they
+ // are and constant-folded after CHR (in case one biased select or a branch
+ // can depend on another biased select.)
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (SelectInst *SI : RI.Selects) {
+ Unhoistables.insert(SI);
+ }
+ }
+ Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
+ for (RegInfo &RI : Scope->RegInfos) {
+ Region *R = RI.R;
+ DenseSet<Instruction *> HoistStops;
+ bool IsHoisted = false;
+ if (RI.HasBranch) {
+ assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
+ OutermostScope->FalseBiasedRegions.count(R) > 0) &&
+ "Must be truthy or falsy");
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ // Note checkHoistValue fills in HoistStops.
+ bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
+ Unhoistables, &HoistStops);
+ assert(IsHoistable && "Must be hoistable");
+ (void)(IsHoistable); // Unused in release build
+ IsHoisted = true;
+ }
+ for (SelectInst *SI : RI.Selects) {
+ assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
+ OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
+ "Must be true or false biased");
+ // Note checkHoistValue fills in HoistStops.
+ bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
+ Unhoistables, &HoistStops);
+ assert(IsHoistable && "Must be hoistable");
+ (void)(IsHoistable); // Unused in release build
+ IsHoisted = true;
+ }
+ if (IsHoisted) {
+ OutermostScope->CHRRegions.push_back(RI);
+ OutermostScope->HoistStopMap[R] = HoistStops;
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs)
+ setCHRRegions(Sub, OutermostScope);
+}
+
+bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
+ return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
+}
+
+void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ Output.resize(Input.size());
+ llvm::copy(Input, Output.begin());
+ std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
+}
+
+// Return true if V is already hoisted or was hoisted (along with its operands)
+// to the insert point.
+static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
+ HoistStopMapTy &HoistStopMap,
+ DenseSet<Instruction *> &HoistedSet,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ auto IT = HoistStopMap.find(R);
+ assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
+ DenseSet<Instruction *> &HoistStops = IT->second;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I == HoistPoint)
+ return;
+ if (HoistStops.count(I))
+ return;
+ if (auto *PN = dyn_cast<PHINode>(I))
+ if (TrivialPHIs.count(PN))
+ // The trivial phi inserted by the previous CHR scope could replace a
+ // non-phi in HoistStops. Note that since this phi is at the exit of a
+ // previous CHR scope, which dominates this scope, it's safe to stop
+ // hoisting there.
+ return;
+ if (HoistedSet.count(I))
+ // Already hoisted, return.
+ return;
+ assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+ for (Value *Op : I->operands()) {
+ hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
+ }
+ I->moveBefore(HoistPoint);
+ HoistedSet.insert(I);
+ CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
+ }
+}
+
+// Hoist the dependent condition values of the branches and the selects in the
+// scope to the insert point.
+static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<Instruction *> HoistedSet;
+ for (const RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+ if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+ HoistedSet, TrivialPHIs);
+ }
+ for (SelectInst *SI : RI.Selects) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+ if (!(IsTrueBiased || IsFalseBiased))
+ continue;
+ hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+ HoistedSet, TrivialPHIs);
+ }
+ }
+}
+
+// Negate the predicate if an ICmp if it's used only by branches or selects by
+// swapping the operands of the branches or the selects. Returns true if success.
+static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
+ Instruction *ExcludedUser,
+ CHRScope *Scope) {
+ for (User *U : ICmp->users()) {
+ if (U == ExcludedUser)
+ continue;
+ if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
+ continue;
+ if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
+ continue;
+ return false;
+ }
+ for (User *U : ICmp->users()) {
+ if (U == ExcludedUser)
+ continue;
+ if (auto *BI = dyn_cast<BranchInst>(U)) {
+ assert(BI->isConditional() && "Must be conditional");
+ BI->swapSuccessors();
+ // Don't need to swap this in terms of
+ // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
+ // mean whehter the branch is likely go into the if-then rather than
+ // successor0/successor1 and because we can tell which edge is the then or
+ // the else one by comparing the destination to the region exit block.
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(U)) {
+ // Swap operands
+ Value *TrueValue = SI->getTrueValue();
+ Value *FalseValue = SI->getFalseValue();
+ SI->setTrueValue(FalseValue);
+ SI->setFalseValue(TrueValue);
+ SI->swapProfMetadata();
+ if (Scope->TrueBiasedSelects.count(SI)) {
+ assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+ "Must not be already in");
+ Scope->FalseBiasedSelects.insert(SI);
+ } else if (Scope->FalseBiasedSelects.count(SI)) {
+ assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+ "Must not be already in");
+ Scope->TrueBiasedSelects.insert(SI);
+ }
+ continue;
+ }
+ llvm_unreachable("Must be a branch or a select");
+ }
+ ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
+ return true;
+}
+
+// A helper for transformScopes. Insert a trivial phi at the scope exit block
+// for a value that's defined in the scope but used outside it (meaning it's
+// alive at the exit block).
+static void insertTrivialPHIs(CHRScope *Scope,
+ BasicBlock *EntryBlock, BasicBlock *ExitBlock,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<BasicBlock *> BlocksInScopeSet;
+ SmallVector<BasicBlock *, 8> BlocksInScopeVec;
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+ // sub-Scopes.
+ BlocksInScopeSet.insert(BB);
+ BlocksInScopeVec.push_back(BB);
+ }
+ }
+ CHR_DEBUG(
+ dbgs() << "Inserting redudant phis\n";
+ for (BasicBlock *BB : BlocksInScopeVec) {
+ dbgs() << "BlockInScope " << BB->getName() << "\n";
+ });
+ for (BasicBlock *BB : BlocksInScopeVec) {
+ for (Instruction &I : *BB) {
+ SmallVector<Instruction *, 8> Users;
+ for (User *U : I.users()) {
+ if (auto *UI = dyn_cast<Instruction>(U)) {
+ if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
+ // Unless there's already a phi for I at the exit block.
+ !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
+ CHR_DEBUG(dbgs() << "V " << I << "\n");
+ CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
+ Users.push_back(UI);
+ } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
+ // There's a loop backedge from a block that's dominated by this
+ // scope to the entry block.
+ CHR_DEBUG(dbgs() << "V " << I << "\n");
+ CHR_DEBUG(dbgs()
+ << "Used at entry block (for a back edge) by a phi user "
+ << *UI << "\n");
+ Users.push_back(UI);
+ }
+ }
+ }
+ if (Users.size() > 0) {
+ // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
+ // ExitBlock. Replace I with the new phi in UI unless UI is another
+ // phi at ExitBlock.
+ unsigned PredCount = std::distance(pred_begin(ExitBlock),
+ pred_end(ExitBlock));
+ PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
+ &ExitBlock->front());
+ for (BasicBlock *Pred : predecessors(ExitBlock)) {
+ PN->addIncoming(&I, Pred);
+ }
+ TrivialPHIs.insert(PN);
+ CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
+ for (Instruction *UI : Users) {
+ for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
+ if (UI->getOperand(J) == &I) {
+ UI->setOperand(J, PN);
+ }
+ }
+ CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
+ }
+ }
+ }
+ }
+}
+
+// Assert that all the CHR regions of the scope have a biased branch or select.
+static void LLVM_ATTRIBUTE_UNUSED
+assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
+#ifndef NDEBUG
+ auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
+ if (Scope->TrueBiasedRegions.count(RI.R) ||
+ Scope->FalseBiasedRegions.count(RI.R))
+ return true;
+ for (SelectInst *SI : RI.Selects)
+ if (Scope->TrueBiasedSelects.count(SI) ||
+ Scope->FalseBiasedSelects.count(SI))
+ return true;
+ return false;
+ };
+ for (RegInfo &RI : Scope->CHRRegions) {
+ assert(HasBiasedBranchOrSelect(RI, Scope) &&
+ "Must have biased branch or select");
+ }
+#endif
+}
+
+// Assert that all the condition values of the biased branches and selects have
+// been hoisted to the pre-entry block or outside of the scope.
+static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
+ CHRScope *Scope, BasicBlock *PreEntryBlock) {
+ CHR_DEBUG(dbgs() << "Biased regions condition values \n");
+ for (RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+ if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ Value *V = BI->getCondition();
+ CHR_DEBUG(dbgs() << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ (void)(I); // Unused in release build.
+ assert((I->getParent() == PreEntryBlock ||
+ !Scope->contains(I)) &&
+ "Must have been hoisted to PreEntryBlock or outside the scope");
+ }
+ }
+ for (SelectInst *SI : RI.Selects) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+ if (!(IsTrueBiased || IsFalseBiased))
+ continue;
+ Value *V = SI->getCondition();
+ CHR_DEBUG(dbgs() << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ (void)(I); // Unused in release build.
+ assert((I->getParent() == PreEntryBlock ||
+ !Scope->contains(I)) &&
+ "Must have been hoisted to PreEntryBlock or outside the scope");
+ }
+ }
+ }
+}
+
+void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
+ CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
+
+ assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
+ Region *FirstRegion = Scope->RegInfos[0].R;
+ BasicBlock *EntryBlock = FirstRegion->getEntry();
+ Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
+ BasicBlock *ExitBlock = LastRegion->getExit();
+ Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
+
+ if (ExitBlock) {
+ // Insert a trivial phi at the exit block (where the CHR hot path and the
+ // cold path merges) for a value that's defined in the scope but used
+ // outside it (meaning it's alive at the exit block). We will add the
+ // incoming values for the CHR cold paths to it below. Without this, we'd
+ // miss updating phi's for such values unless there happens to already be a
+ // phi for that value there.
+ insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+ }
+
+ // Split the entry block of the first region. The new block becomes the new
+ // entry block of the first region. The old entry block becomes the block to
+ // insert the CHR branch into. Note DT gets updated. Since DT gets updated
+ // through the split, we update the entry of the first region after the split,
+ // and Region only points to the entry and the exit blocks, rather than
+ // keeping everything in a list or set, the blocks membership and the
+ // entry/exit blocks of the region are still valid after the split.
+ CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
+ << " at " << *Scope->BranchInsertPoint << "\n");
+ BasicBlock *NewEntryBlock =
+ SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ FirstRegion->replaceEntryRecursive(NewEntryBlock);
+ BasicBlock *PreEntryBlock = EntryBlock;
+
+ ValueToValueMapTy VMap;
+ // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
+ // hot path (originals) and a cold path (clones) and update the PHIs at the
+ // exit block.
+ cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
+
+ // Replace the old (placeholder) branch with the new (merged) conditional
+ // branch.
+ BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
+ NewEntryBlock, VMap);
+
+#ifndef NDEBUG
+ assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
+#endif
+
+ // Hoist the conditional values of the branches/selects.
+ hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
+
+#ifndef NDEBUG
+ assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
+#endif
+
+ // Create the combined branch condition and constant-fold the branches/selects
+ // in the hot path.
+ fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
+ ProfileCount ? ProfileCount.getValue() : 0);
+}
+
+// A helper for transformScopes. Clone the blocks in the scope (excluding the
+// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
+// at the exit block.
+void CHR::cloneScopeBlocks(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BasicBlock *ExitBlock,
+ Region *LastRegion,
+ ValueToValueMapTy &VMap) {
+ // Clone all the blocks. The original blocks will be the hot-path
+ // CHR-optimized code and the cloned blocks will be the original unoptimized
+ // code. This is so that the block pointers from the
+ // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
+ // which CHR should apply to.
+ SmallVector<BasicBlock*, 8> NewBlocks;
+ for (RegInfo &RI : Scope->RegInfos)
+ for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+ // sub-Scopes.
+ assert(BB != PreEntryBlock && "Don't copy the preetntry block");
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
+ NewBlocks.push_back(NewBB);
+ VMap[BB] = NewBB;
+ }
+
+ // Place the cloned blocks right after the original blocks (right before the
+ // exit block of.)
+ if (ExitBlock)
+ F.getBasicBlockList().splice(ExitBlock->getIterator(),
+ F.getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F.end());
+
+ // Update the cloned blocks/instructions to refer to themselves.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (Instruction &I : *NewBlocks[i])
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
+ // the top-level region but we don't need to add PHIs. The trivial PHIs
+ // inserted above will be updated here.
+ if (ExitBlock)
+ for (PHINode &PN : ExitBlock->phis())
+ for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
+ ++I) {
+ BasicBlock *Pred = PN.getIncomingBlock(I);
+ if (LastRegion->contains(Pred)) {
+ Value *V = PN.getIncomingValue(I);
+ auto It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
+ assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
+ PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
+ }
+ }
+}
+
+// A helper for transformScope. Replace the old (placeholder) branch with the
+// new (merged) conditional branch.
+BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
+ BasicBlock *EntryBlock,
+ BasicBlock *NewEntryBlock,
+ ValueToValueMapTy &VMap) {
+ BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
+ assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
+ "SplitBlock did not work correctly!");
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ assert(VMap.find(NewEntryBlock) != VMap.end() &&
+ "NewEntryBlock must have been copied");
+ OldBR->dropAllReferences();
+ OldBR->eraseFromParent();
+ // The true predicate is a placeholder. It will be replaced later in
+ // fixupBranchesAndSelects().
+ BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
+ cast<BasicBlock>(VMap[NewEntryBlock]),
+ ConstantInt::getTrue(F.getContext()));
+ PreEntryBlock->getInstList().push_back(NewBR);
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ return NewBR;
+}
+
+// A helper for transformScopes. Create the combined branch condition and
+// constant-fold the branches/selects in the hot path.
+void CHR::fixupBranchesAndSelects(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BranchInst *MergedBR,
+ uint64_t ProfileCount) {
+ Value *MergedCondition = ConstantInt::getTrue(F.getContext());
+ BranchProbability CHRBranchBias(1, 1);
+ uint64_t NumCHRedBranches = 0;
+ IRBuilder<> IRB(PreEntryBlock->getTerminator());
+ for (RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ if (RI.HasBranch) {
+ fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
+ ++NumCHRedBranches;
+ }
+ for (SelectInst *SI : RI.Selects) {
+ fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
+ ++NumCHRedBranches;
+ }
+ }
+ Stats.NumBranchesDelta += NumCHRedBranches - 1;
+ Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE,
+ "CHR",
+ // Refer to the hot (original) path
+ MergedBR->getSuccessor(0)->getTerminator())
+ << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
+ << " branches or selects";
+ });
+ MergedBR->setCondition(MergedCondition);
+ SmallVector<uint32_t, 2> Weights;
+ Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
+ Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
+ MDBuilder MDB(F.getContext());
+ MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
+ << "\n");
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a branch in the hot path.
+void CHR::fixupBranch(Region *R, CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition,
+ BranchProbability &CHRBranchBias) {
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
+ "Must be truthy or falsy");
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
+ "Must be in the bias map");
+ BranchProbability Bias = BranchBiasMap[R];
+ assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+ // Take the min.
+ if (CHRBranchBias > Bias)
+ CHRBranchBias = Bias;
+ BasicBlock *IfThen = BI->getSuccessor(1);
+ BasicBlock *IfElse = BI->getSuccessor(0);
+ BasicBlock *RegionExitBlock = R->getExit();
+ assert(RegionExitBlock && "Null ExitBlock");
+ assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
+ IfThen != IfElse && "Invariant from findScopes");
+ if (IfThen == RegionExitBlock) {
+ // Swap them so that IfThen means going into it and IfElse means skipping
+ // it.
+ std::swap(IfThen, IfElse);
+ }
+ CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
+ << " IfElse " << IfElse->getName() << "\n");
+ Value *Cond = BI->getCondition();
+ BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
+ bool ConditionTrue = HotTarget == BI->getSuccessor(0);
+ addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
+ MergedCondition);
+ // Constant-fold the branch at ClonedEntryBlock.
+ assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
+ "The successor shouldn't change");
+ Value *NewCondition = ConditionTrue ?
+ ConstantInt::getTrue(F.getContext()) :
+ ConstantInt::getFalse(F.getContext());
+ BI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a select in the hot path.
+void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition,
+ BranchProbability &CHRBranchBias) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ assert((IsTrueBiased ||
+ Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
+ assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
+ "Must be in the bias map");
+ BranchProbability Bias = SelectBiasMap[SI];
+ assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+ // Take the min.
+ if (CHRBranchBias > Bias)
+ CHRBranchBias = Bias;
+ Value *Cond = SI->getCondition();
+ addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
+ MergedCondition);
+ Value *NewCondition = IsTrueBiased ?
+ ConstantInt::getTrue(F.getContext()) :
+ ConstantInt::getFalse(F.getContext());
+ SI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
+// condition.
+void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
+ Instruction *BranchOrSelect,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition) {
+ if (IsTrueBiased) {
+ MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+ } else {
+ // If Cond is an icmp and all users of V except for BranchOrSelect is a
+ // branch, negate the icmp predicate and swap the branch targets and avoid
+ // inserting an Xor to negate Cond.
+ bool Done = false;
+ if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
+ if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
+ MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+ Done = true;
+ }
+ if (!Done) {
+ Value *Negate = IRB.CreateXor(
+ ConstantInt::getTrue(F.getContext()), Cond);
+ MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
+ }
+ }
+}
+
+void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
+ unsigned I = 0;
+ DenseSet<PHINode *> TrivialPHIs;
+ for (CHRScope *Scope : CHRScopes) {
+ transformScopes(Scope, TrivialPHIs);
+ CHR_DEBUG(
+ std::ostringstream oss;
+ oss << " after transformScopes " << I++;
+ dumpIR(F, oss.str().c_str(), nullptr));
+ (void)I;
+ }
+}
+
+static void LLVM_ATTRIBUTE_UNUSED
+dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
+ dbgs() << Label << " " << Scopes.size() << "\n";
+ for (CHRScope *Scope : Scopes) {
+ dbgs() << *Scope << "\n";
+ }
+}
+
+bool CHR::run() {
+ if (!shouldApply(F, PSI))
+ return false;
+
+ CHR_DEBUG(dumpIR(F, "before", nullptr));
+
+ bool Changed = false;
+ {
+ CHR_DEBUG(
+ dbgs() << "RegionInfo:\n";
+ RI.print(dbgs()));
+
+ // Recursively traverse the region tree and find regions that have biased
+ // branches and/or selects and create scopes.
+ SmallVector<CHRScope *, 8> AllScopes;
+ findScopes(AllScopes);
+ CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
+
+ // Split the scopes if 1) the conditiona values of the biased
+ // branches/selects of the inner/lower scope can't be hoisted up to the
+ // outermost/uppermost scope entry, or 2) the condition values of the biased
+ // branches/selects in a scope (including subscopes) don't share at least
+ // one common value.
+ SmallVector<CHRScope *, 8> SplitScopes;
+ splitScopes(AllScopes, SplitScopes);
+ CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
+
+ // After splitting, set the biased regions and selects of a scope (a tree
+ // root) that include those of the subscopes.
+ classifyBiasedScopes(SplitScopes);
+ CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
+
+ // Filter out the scopes that has only one biased region or select (CHR
+ // isn't useful in such a case).
+ SmallVector<CHRScope *, 8> FilteredScopes;
+ filterScopes(SplitScopes, FilteredScopes);
+ CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
+
+ // Set the regions to be CHR'ed and their hoist stops for each scope.
+ SmallVector<CHRScope *, 8> SetScopes;
+ setCHRRegions(FilteredScopes, SetScopes);
+ CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
+
+ // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
+ // ones. We need to apply CHR from outer to inner so that we apply CHR only
+ // to the hot path, rather than both hot and cold paths.
+ SmallVector<CHRScope *, 8> SortedScopes;
+ sortScopes(SetScopes, SortedScopes);
+ CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
+
+ CHR_DEBUG(
+ dbgs() << "RegionInfo:\n";
+ RI.print(dbgs()));
+
+ // Apply the CHR transformation.
+ if (!SortedScopes.empty()) {
+ transformScopes(SortedScopes);
+ Changed = true;
+ }
+ }
+
+ if (Changed) {
+ CHR_DEBUG(dumpIR(F, "after", &Stats));
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
+ << ore::NV("Function", &F) << " "
+ << "Reduced the number of branches in hot paths by "
+ << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
+ << " (static) and "
+ << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
+ << " (weighted by PGO count)";
+ });
+ }
+
+ return Changed;
+}
+
+bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
+ BlockFrequencyInfo &BFI =
+ getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ProfileSummaryInfo &PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
+ std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
+ llvm::make_unique<OptimizationRemarkEmitter>(&F);
+ return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
+}
+
+namespace llvm {
+
+ControlHeightReductionPass::ControlHeightReductionPass() {
+ parseCHRFilterFiles();
+}
+
+PreservedAnalyses ControlHeightReductionPass::run(
+ Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ auto &MAM = MAMProxy.getManager();
+ auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index f03fcc9c4e2c..4c3c6c9added 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -645,8 +645,8 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
GlobalValue::LinkageTypes NewFLink,
FunctionType *NewFT) {
FunctionType *FT = F->getFunctionType();
- Function *NewF = Function::Create(NewFT, NewFLink, NewFName,
- F->getParent());
+ Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
+ NewFName, F->getParent());
NewF->copyAttributesFrom(F);
NewF->removeAttributes(
AttributeList::ReturnIndex,
@@ -819,7 +819,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
// easily identify cases of mismatching ABIs.
if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
FunctionType *NewFT = getArgsFunctionType(FT);
- Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
+ Function *NewF = Function::Create(NewFT, F.getLinkage(),
+ F.getAddressSpace(), "", &M);
NewF->copyAttributesFrom(&F);
NewF->removeAttributes(
AttributeList::ReturnIndex,
@@ -924,7 +925,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
Instruction *Next = Inst->getNextNode();
// DFSanVisitor may delete Inst, so keep track of whether it was a
// terminator.
- bool IsTerminator = isa<TerminatorInst>(Inst);
+ bool IsTerminator = Inst->isTerminator();
if (!DFSF.SkipInsts.count(Inst))
DFSanVisitor(DFSF).visit(Inst);
if (IsTerminator)
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
index 33f220a893df..db438e78ded9 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
@@ -144,21 +144,6 @@ OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
return Options;
}
-// Create a constant for Str so that we can pass it to the run-time lib.
-static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
- bool AllowMerging) {
- Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
- // We use private linkage for module-local strings. If they can be merged
- // with another one, we set the unnamed_addr attribute.
- GlobalVariable *GV =
- new GlobalVariable(M, StrConst->getType(), true,
- GlobalValue::PrivateLinkage, StrConst, "");
- if (AllowMerging)
- GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
- return GV;
-}
-
/// EfficiencySanitizer: instrument each module to find performance issues.
class EfficiencySanitizer : public ModulePass {
public:
@@ -902,7 +887,7 @@ bool EfficiencySanitizer::instrumentFastpathWorkingSet(
Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
// The AND and CMP will be turned into a TEST instruction by the compiler.
Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
- TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
+ Instruction *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
// FIXME: do I need to call SetCurrentDebugLocation?
IRB.SetInsertPoint(CmpTerm);
// We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 132e8089fe3b..9af64ed332cd 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -21,9 +21,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/UniqueVector.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/IRBuilder.h"
@@ -36,6 +36,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
@@ -96,30 +97,25 @@ private:
// profiling runtime to emit .gcda files when run.
bool emitProfileArcs();
+ bool isFunctionInstrumented(const Function &F);
+ std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
+ static bool doesFilenameMatchARegex(StringRef Filename,
+ std::vector<Regex> &Regexes);
+
// Get pointers to the functions in the runtime library.
Constant *getStartFileFunc();
- Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
Constant *getSummaryInfoFunc();
Constant *getEndFileFunc();
- // Create or retrieve an i32 state value that is used to represent the
- // pred block number for certain non-trivial edges.
- GlobalVariable *getEdgeStateValue();
-
- // Produce a table of pointers to counters, by predecessor and successor
- // block number.
- GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter,
- const UniqueVector<BasicBlock *> &Preds,
- const UniqueVector<BasicBlock *> &Succs);
-
// Add the function to write out all our counters to the global destructor
// list.
Function *
insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
Function *insertFlush(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
- void insertIndirectCounterIncrement();
+
+ void AddFlushBeforeForkAndExec();
enum class GCovFileType { GCNO, GCDA };
std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
@@ -135,6 +131,9 @@ private:
const TargetLibraryInfo *TLI;
LLVMContext *Ctx;
SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
+ std::vector<Regex> FilterRe;
+ std::vector<Regex> ExcludeRe;
+ StringMap<bool> InstrumentedFiles;
};
class GCOVProfilerLegacyPass : public ModulePass {
@@ -181,6 +180,21 @@ static StringRef getFunctionName(const DISubprogram *SP) {
return SP->getName();
}
+/// Extract a filename for a DISubprogram.
+///
+/// Prefer relative paths in the coverage notes. Clang also may split
+/// up absolute paths into a directory and filename component. When
+/// the relative path doesn't exist, reconstruct the absolute path.
+static SmallString<128> getFilename(const DISubprogram *SP) {
+ SmallString<128> Path;
+ StringRef RelPath = SP->getFilename();
+ if (sys::fs::exists(RelPath))
+ Path = RelPath;
+ else
+ sys::path::append(Path, SP->getDirectory(), SP->getFilename());
+ return Path;
+}
+
namespace {
class GCOVRecord {
protected:
@@ -257,7 +271,7 @@ namespace {
}
private:
- StringRef Filename;
+ std::string Filename;
SmallVector<uint32_t, 32> Lines;
};
@@ -287,11 +301,10 @@ namespace {
write(Len);
write(Number);
- llvm::sort(
- SortedLinesByFile.begin(), SortedLinesByFile.end(),
- [](StringMapEntry<GCOVLines> *LHS, StringMapEntry<GCOVLines> *RHS) {
- return LHS->getKey() < RHS->getKey();
- });
+ llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
+ StringMapEntry<GCOVLines> *RHS) {
+ return LHS->getKey() < RHS->getKey();
+ });
for (auto &I : SortedLinesByFile)
I->getValue().writeOut();
write(0);
@@ -379,8 +392,9 @@ namespace {
void writeOut() {
writeBytes(FunctionTag, 4);
+ SmallString<128> Filename = getFilename(SP);
uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
- 1 + lengthOfGCOVString(SP->getFilename()) + 1;
+ 1 + lengthOfGCOVString(Filename) + 1;
if (UseCfgChecksum)
++BlockLen;
write(BlockLen);
@@ -389,7 +403,7 @@ namespace {
if (UseCfgChecksum)
write(CfgChecksum);
writeGCOVString(getFunctionName(SP));
- writeGCOVString(SP->getFilename());
+ writeGCOVString(Filename);
write(SP->getLine());
// Emit count of blocks.
@@ -434,6 +448,72 @@ namespace {
};
}
+// RegexesStr is a string containing differents regex separated by a semi-colon.
+// For example "foo\..*$;bar\..*$".
+std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
+ std::vector<Regex> Regexes;
+ while (!RegexesStr.empty()) {
+ std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
+ if (!HeadTail.first.empty()) {
+ Regex Re(HeadTail.first);
+ std::string Err;
+ if (!Re.isValid(Err)) {
+ Ctx->emitError(Twine("Regex ") + HeadTail.first +
+ " is not valid: " + Err);
+ }
+ Regexes.emplace_back(std::move(Re));
+ }
+ RegexesStr = HeadTail.second;
+ }
+ return Regexes;
+}
+
+bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
+ std::vector<Regex> &Regexes) {
+ for (Regex &Re : Regexes) {
+ if (Re.match(Filename)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
+ if (FilterRe.empty() && ExcludeRe.empty()) {
+ return true;
+ }
+ SmallString<128> Filename = getFilename(F.getSubprogram());
+ auto It = InstrumentedFiles.find(Filename);
+ if (It != InstrumentedFiles.end()) {
+ return It->second;
+ }
+
+ SmallString<256> RealPath;
+ StringRef RealFilename;
+
+ // Path can be
+ // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
+ // such a case we must get the real_path.
+ if (sys::fs::real_path(Filename, RealPath)) {
+ // real_path can fail with path like "foo.c".
+ RealFilename = Filename;
+ } else {
+ RealFilename = RealPath;
+ }
+
+ bool ShouldInstrument;
+ if (FilterRe.empty()) {
+ ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
+ } else if (ExcludeRe.empty()) {
+ ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
+ } else {
+ ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
+ !doesFilenameMatchARegex(RealFilename, ExcludeRe);
+ }
+ InstrumentedFiles[Filename] = ShouldInstrument;
+ return ShouldInstrument;
+}
+
std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
GCovFileType OutputType) {
bool Notes = OutputType == GCovFileType::GCNO;
@@ -481,6 +561,11 @@ bool GCOVProfiler::runOnModule(Module &M, const TargetLibraryInfo &TLI) {
this->TLI = &TLI;
Ctx = &M.getContext();
+ AddFlushBeforeForkAndExec();
+
+ FilterRe = createRegexesFromString(Options.Filter);
+ ExcludeRe = createRegexesFromString(Options.Exclude);
+
if (Options.EmitNotes) emitProfileNotes();
if (Options.EmitData) return emitProfileArcs();
return false;
@@ -537,6 +622,38 @@ static bool shouldKeepInEntry(BasicBlock::iterator It) {
return false;
}
+void GCOVProfiler::AddFlushBeforeForkAndExec() {
+ SmallVector<Instruction *, 2> ForkAndExecs;
+ for (auto &F : M->functions()) {
+ for (auto &I : instructions(F)) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (Function *Callee = CI->getCalledFunction()) {
+ LibFunc LF;
+ if (TLI->getLibFunc(*Callee, LF) &&
+ (LF == LibFunc_fork || LF == LibFunc_execl ||
+ LF == LibFunc_execle || LF == LibFunc_execlp ||
+ LF == LibFunc_execv || LF == LibFunc_execvp ||
+ LF == LibFunc_execve || LF == LibFunc_execvpe ||
+ LF == LibFunc_execvP)) {
+ ForkAndExecs.push_back(&I);
+ }
+ }
+ }
+ }
+ }
+
+ // We need to split the block after the fork/exec call
+ // because else the counters for the lines after will be
+ // the same as before the call.
+ for (auto I : ForkAndExecs) {
+ IRBuilder<> Builder(I);
+ FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
+ Constant *GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
+ Builder.CreateCall(GCOVFlush);
+ I->getParent()->splitBasicBlock(I);
+ }
+}
+
void GCOVProfiler::emitProfileNotes() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return;
@@ -566,7 +683,8 @@ void GCOVProfiler::emitProfileNotes() {
for (auto &F : M->functions()) {
DISubprogram *SP = F.getSubprogram();
if (!SP) continue;
- if (!functionHasLines(F)) continue;
+ if (!functionHasLines(F) || !isFunctionInstrumented(F))
+ continue;
// TODO: Functions using scope-based EH are currently not supported.
if (isUsingScopeBasedEH(F)) continue;
@@ -583,9 +701,15 @@ void GCOVProfiler::emitProfileNotes() {
Options.ExitBlockBeforeBody));
GCOVFunction &Func = *Funcs.back();
+ // Add the function line number to the lines of the entry block
+ // to have a counter for the function definition.
+ uint32_t Line = SP->getLine();
+ auto Filename = getFilename(SP);
+ Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
+
for (auto &BB : F) {
GCOVBlock &Block = Func.getBlock(&BB);
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
if (int successors = TI->getNumSuccessors()) {
for (int i = 0; i != successors; ++i) {
Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
@@ -594,7 +718,6 @@ void GCOVProfiler::emitProfileNotes() {
Block.addEdge(Func.getReturnBlock());
}
- uint32_t Line = 0;
for (auto &I : BB) {
// Debug intrinsic locations correspond to the location of the
// declaration, not necessarily any statements or expressions.
@@ -605,16 +728,18 @@ void GCOVProfiler::emitProfileNotes() {
continue;
// Artificial lines such as calls to the global constructors.
- if (Loc.getLine() == 0) continue;
+ if (Loc.getLine() == 0 || Loc.isImplicitCode())
+ continue;
if (Line == Loc.getLine()) continue;
Line = Loc.getLine();
if (SP != getDISubprogram(Loc.getScope()))
continue;
- GCOVLines &Lines = Block.getFile(SP->getFilename());
+ GCOVLines &Lines = Block.getFile(Filename);
Lines.addLine(Loc.getLine());
}
+ Line = 0;
}
EdgeDestinations += Func.getEdgeDestinations();
}
@@ -639,24 +764,28 @@ bool GCOVProfiler::emitProfileArcs() {
if (!CU_Nodes) return false;
bool Result = false;
- bool InsertIndCounterIncrCode = false;
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
for (auto &F : M->functions()) {
DISubprogram *SP = F.getSubprogram();
if (!SP) continue;
- if (!functionHasLines(F)) continue;
+ if (!functionHasLines(F) || !isFunctionInstrumented(F))
+ continue;
// TODO: Functions using scope-based EH are currently not supported.
if (isUsingScopeBasedEH(F)) continue;
if (!Result) Result = true;
+ DenseMap<std::pair<BasicBlock *, BasicBlock *>, unsigned> EdgeToCounter;
unsigned Edges = 0;
for (auto &BB : F) {
- TerminatorInst *TI = BB.getTerminator();
- if (isa<ReturnInst>(TI))
- ++Edges;
- else
- Edges += TI->getNumSuccessors();
+ Instruction *TI = BB.getTerminator();
+ if (isa<ReturnInst>(TI)) {
+ EdgeToCounter[{&BB, nullptr}] = Edges++;
+ } else {
+ for (BasicBlock *Succ : successors(TI)) {
+ EdgeToCounter[{&BB, Succ}] = Edges++;
+ }
+ }
}
ArrayType *CounterTy =
@@ -668,63 +797,42 @@ bool GCOVProfiler::emitProfileArcs() {
"__llvm_gcov_ctr");
CountersBySP.push_back(std::make_pair(Counters, SP));
- UniqueVector<BasicBlock *> ComplexEdgePreds;
- UniqueVector<BasicBlock *> ComplexEdgeSuccs;
-
- unsigned Edge = 0;
+ // If a BB has several predecessors, use a PHINode to select
+ // the correct counter.
for (auto &BB : F) {
- TerminatorInst *TI = BB.getTerminator();
- int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
- if (Successors) {
- if (Successors == 1) {
- IRBuilder<> Builder(&*BB.getFirstInsertionPt());
- Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
- Edge);
- Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count, Builder.getInt64(1));
- Builder.CreateStore(Count, Counter);
- } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- IRBuilder<> Builder(BI);
- Value *Sel = Builder.CreateSelect(BI->getCondition(),
- Builder.getInt64(Edge),
- Builder.getInt64(Edge + 1));
- Value *Counter = Builder.CreateInBoundsGEP(
- Counters->getValueType(), Counters, {Builder.getInt64(0), Sel});
+ const unsigned EdgeCount =
+ std::distance(pred_begin(&BB), pred_end(&BB));
+ if (EdgeCount) {
+ // The phi node must be at the begin of the BB.
+ IRBuilder<> BuilderForPhi(&*BB.begin());
+ Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+ PHINode *Phi = BuilderForPhi.CreatePHI(Int64PtrTy, EdgeCount);
+ for (BasicBlock *Pred : predecessors(&BB)) {
+ auto It = EdgeToCounter.find({Pred, &BB});
+ assert(It != EdgeToCounter.end());
+ const unsigned Edge = It->second;
+ Value *EdgeCounter =
+ BuilderForPhi.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
+ Phi->addIncoming(EdgeCounter, Pred);
+ }
+
+ // Skip phis, landingpads.
+ IRBuilder<> Builder(&*BB.getFirstInsertionPt());
+ Value *Count = Builder.CreateLoad(Phi);
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+ Builder.CreateStore(Count, Phi);
+
+ Instruction *TI = BB.getTerminator();
+ if (isa<ReturnInst>(TI)) {
+ auto It = EdgeToCounter.find({&BB, nullptr});
+ assert(It != EdgeToCounter.end());
+ const unsigned Edge = It->second;
+ Value *Counter =
+ Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
Value *Count = Builder.CreateLoad(Counter);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
- } else {
- ComplexEdgePreds.insert(&BB);
- for (int i = 0; i != Successors; ++i)
- ComplexEdgeSuccs.insert(TI->getSuccessor(i));
}
-
- Edge += Successors;
- }
- }
-
- if (!ComplexEdgePreds.empty()) {
- GlobalVariable *EdgeTable =
- buildEdgeLookupTable(&F, Counters,
- ComplexEdgePreds, ComplexEdgeSuccs);
- GlobalVariable *EdgeState = getEdgeStateValue();
-
- for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
- IRBuilder<> Builder(&*ComplexEdgePreds[i + 1]->getFirstInsertionPt());
- Builder.CreateStore(Builder.getInt32(i), EdgeState);
- }
-
- for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
- // Call runtime to perform increment.
- IRBuilder<> Builder(&*ComplexEdgeSuccs[i + 1]->getFirstInsertionPt());
- Value *CounterPtrArray =
- Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
- i * ComplexEdgePreds.size());
-
- // Build code to increment the counter.
- InsertIndCounterIncrCode = true;
- Builder.CreateCall(getIncrementIndirectCounterFunc(),
- {EdgeState, CounterPtrArray});
}
}
}
@@ -763,60 +871,9 @@ bool GCOVProfiler::emitProfileArcs() {
appendToGlobalCtors(*M, F, 0);
}
- if (InsertIndCounterIncrCode)
- insertIndirectCounterIncrement();
-
return Result;
}
-// All edges with successors that aren't branches are "complex", because it
-// requires complex logic to pick which counter to update.
-GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
- Function *F,
- GlobalVariable *Counters,
- const UniqueVector<BasicBlock *> &Preds,
- const UniqueVector<BasicBlock *> &Succs) {
- // TODO: support invoke, threads. We rely on the fact that nothing can modify
- // the whole-Module pred edge# between the time we set it and the time we next
- // read it. Threads and invoke make this untrue.
-
- // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
- size_t TableSize = Succs.size() * Preds.size();
- Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
- ArrayType *EdgeTableTy = ArrayType::get(Int64PtrTy, TableSize);
-
- std::unique_ptr<Constant * []> EdgeTable(new Constant *[TableSize]);
- Constant *NullValue = Constant::getNullValue(Int64PtrTy);
- for (size_t i = 0; i != TableSize; ++i)
- EdgeTable[i] = NullValue;
-
- unsigned Edge = 0;
- for (BasicBlock &BB : *F) {
- TerminatorInst *TI = BB.getTerminator();
- int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
- if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
- for (int i = 0; i != Successors; ++i) {
- BasicBlock *Succ = TI->getSuccessor(i);
- IRBuilder<> Builder(Succ);
- Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
- Edge + i);
- EdgeTable[((Succs.idFor(Succ) - 1) * Preds.size()) +
- (Preds.idFor(&BB) - 1)] = cast<Constant>(Counter);
- }
- }
- Edge += Successors;
- }
-
- GlobalVariable *EdgeTableGV =
- new GlobalVariable(
- *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
- ConstantArray::get(EdgeTableTy,
- makeArrayRef(&EdgeTable[0],TableSize)),
- "__llvm_gcda_edge_table");
- EdgeTableGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- return EdgeTableGV;
-}
-
Constant *GCOVProfiler::getStartFileFunc() {
Type *Args[] = {
Type::getInt8PtrTy(*Ctx), // const char *orig_filename
@@ -832,17 +889,6 @@ Constant *GCOVProfiler::getStartFileFunc() {
}
-Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
- Type *Int32Ty = Type::getInt32Ty(*Ctx);
- Type *Int64Ty = Type::getInt64Ty(*Ctx);
- Type *Args[] = {
- Int32Ty->getPointerTo(), // uint32_t *predecessor
- Int64Ty->getPointerTo()->getPointerTo() // uint64_t **counters
- };
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
- return M->getOrInsertFunction("__llvm_gcov_indirect_counter_increment", FTy);
-}
-
Constant *GCOVProfiler::getEmitFunctionFunc() {
Type *Args[] = {
Type::getInt32Ty(*Ctx), // uint32_t ident
@@ -886,19 +932,6 @@ Constant *GCOVProfiler::getEndFileFunc() {
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
}
-GlobalVariable *GCOVProfiler::getEdgeStateValue() {
- GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred");
- if (!GV) {
- GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false,
- GlobalValue::InternalLinkage,
- ConstantInt::get(Type::getInt32Ty(*Ctx),
- 0xffffffff),
- "__llvm_gcov_global_state_pred");
- GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- }
- return GV;
-}
-
Function *GCOVProfiler::insertCounterWriteout(
ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
@@ -1122,57 +1155,6 @@ Function *GCOVProfiler::insertCounterWriteout(
return WriteoutF;
}
-void GCOVProfiler::insertIndirectCounterIncrement() {
- Function *Fn =
- cast<Function>(GCOVProfiler::getIncrementIndirectCounterFunc());
- Fn->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- Fn->setLinkage(GlobalValue::InternalLinkage);
- Fn->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- Fn->addFnAttr(Attribute::NoRedZone);
-
- // Create basic blocks for function.
- BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn);
- IRBuilder<> Builder(BB);
-
- BasicBlock *PredNotNegOne = BasicBlock::Create(*Ctx, "", Fn);
- BasicBlock *CounterEnd = BasicBlock::Create(*Ctx, "", Fn);
- BasicBlock *Exit = BasicBlock::Create(*Ctx, "exit", Fn);
-
- // uint32_t pred = *predecessor;
- // if (pred == 0xffffffff) return;
- Argument *Arg = &*Fn->arg_begin();
- Arg->setName("predecessor");
- Value *Pred = Builder.CreateLoad(Arg, "pred");
- Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
- BranchInst::Create(Exit, PredNotNegOne, Cond, BB);
-
- Builder.SetInsertPoint(PredNotNegOne);
-
- // uint64_t *counter = counters[pred];
- // if (!counter) return;
- Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
- Arg = &*std::next(Fn->arg_begin());
- Arg->setName("counters");
- Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
- Value *Counter = Builder.CreateLoad(GEP, "counter");
- Cond = Builder.CreateICmpEQ(Counter,
- Constant::getNullValue(
- Builder.getInt64Ty()->getPointerTo()));
- Builder.CreateCondBr(Cond, Exit, CounterEnd);
-
- // ++*counter;
- Builder.SetInsertPoint(CounterEnd);
- Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter),
- Builder.getInt64(1));
- Builder.CreateStore(Add, Counter);
- Builder.CreateBr(Exit);
-
- // Fill in the exit block.
- Builder.SetInsertPoint(Exit);
- Builder.CreateRetVoid();
-}
-
Function *GCOVProfiler::
insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index d62598bb5d4f..d04c2b76288f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -44,6 +44,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <sstream>
using namespace llvm;
@@ -63,6 +64,8 @@ static const uint64_t kDynamicShadowSentinel =
std::numeric_limits<uint64_t>::max();
static const unsigned kPointerTagShift = 56;
+static const unsigned kShadowBaseAlignment = 32;
+
static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
"hwasan-memory-access-callback-prefix",
cl::desc("Prefix for memory access callbacks"), cl::Hidden,
@@ -127,6 +130,32 @@ static cl::opt<unsigned long long> ClMappingOffset(
cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"), cl::Hidden,
cl::init(0));
+static cl::opt<bool>
+ ClWithIfunc("hwasan-with-ifunc",
+ cl::desc("Access dynamic shadow through an ifunc global on "
+ "platforms that support this"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClWithTls(
+ "hwasan-with-tls",
+ cl::desc("Access dynamic shadow through an thread-local pointer on "
+ "platforms that support this"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+ ClRecordStackHistory("hwasan-record-stack-history",
+ cl::desc("Record stack frames with tagged allocations "
+ "in a thread-local ring buffer"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool>
+ ClCreateFrameDescriptions("hwasan-create-frame-descriptions",
+ cl::desc("create static frame descriptions"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+ ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
+ cl::desc("instrument memory intrinsics"),
+ cl::Hidden, cl::init(true));
namespace {
/// An instrumentation pass implementing detection of addressability bugs
@@ -150,13 +179,14 @@ public:
void initializeCallbacks(Module &M);
- void maybeInsertDynamicShadowAtFunctionEntry(Function &F);
+ Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
void untagPointerOperand(Instruction *I, Value *Addr);
Value *memToShadow(Value *Shadow, Type *Ty, IRBuilder<> &IRB);
void instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore);
+ void instrumentMemIntrinsic(MemIntrinsic *MI);
bool instrumentMemAccess(Instruction *I);
Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
uint64_t *TypeSize, unsigned *Alignment,
@@ -167,26 +197,53 @@ public:
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
bool instrumentStack(SmallVectorImpl<AllocaInst *> &Allocas,
- SmallVectorImpl<Instruction *> &RetVec);
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
Value *getNextTagWithCall(IRBuilder<> &IRB);
Value *getStackBaseTag(IRBuilder<> &IRB);
Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
unsigned AllocaNo);
Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
+ Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
+ Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
+
private:
LLVMContext *C;
+ std::string CurModuleUniqueId;
Triple TargetTriple;
+ Function *HWAsanMemmove, *HWAsanMemcpy, *HWAsanMemset;
+
+ // Frame description is a way to pass names/sizes of local variables
+ // to the run-time w/o adding extra executable code in every function.
+ // We do this by creating a separate section with {PC,Descr} pairs and passing
+ // the section beg/end to __hwasan_init_frames() at module init time.
+ std::string createFrameString(ArrayRef<AllocaInst*> Allocas);
+ void createFrameGlobal(Function &F, const std::string &FrameString);
+ // Get the section name for frame descriptions. Currently ELF-only.
+ const char *getFrameSection() { return "__hwasan_frames"; }
+ const char *getFrameSectionBeg() { return "__start___hwasan_frames"; }
+ const char *getFrameSectionEnd() { return "__stop___hwasan_frames"; }
+ GlobalVariable *createFrameSectionBound(Module &M, Type *Ty,
+ const char *Name) {
+ auto GV = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, Name);
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ return GV;
+ }
/// This struct defines the shadow mapping using the rule:
/// shadow = (mem >> Scale) + Offset.
/// If InGlobal is true, then
/// extern char __hwasan_shadow[];
/// shadow = (mem >> Scale) + &__hwasan_shadow
+ /// If InTls is true, then
+ /// extern char *__hwasan_tls;
+ /// shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
struct ShadowMapping {
int Scale;
uint64_t Offset;
bool InGlobal;
+ bool InTls;
void init(Triple &TargetTriple);
unsigned getAllocaAlignment() const { return 1U << Scale; }
@@ -194,6 +251,7 @@ private:
ShadowMapping Mapping;
Type *IntptrTy;
+ Type *Int8PtrTy;
Type *Int8Ty;
bool CompileKernel;
@@ -206,10 +264,12 @@ private:
Function *HwasanTagMemoryFunc;
Function *HwasanGenerateTagFunc;
+ Function *HwasanThreadEnterFunc;
Constant *ShadowGlobal;
Value *LocalDynamicShadow = nullptr;
+ GlobalValue *ThreadPtrGlobal = nullptr;
};
} // end anonymous namespace
@@ -243,8 +303,10 @@ bool HWAddressSanitizer::doInitialization(Module &M) {
Mapping.init(TargetTriple);
C = &(M.getContext());
+ CurModuleUniqueId = getUniqueModuleId(&M);
IRBuilder<> IRB(*C);
IntptrTy = IRB.getIntPtrTy(DL);
+ Int8PtrTy = IRB.getInt8PtrTy();
Int8Ty = IRB.getInt8Ty();
HwasanCtorFunction = nullptr;
@@ -254,8 +316,38 @@ bool HWAddressSanitizer::doInitialization(Module &M) {
kHwasanInitName,
/*InitArgTypes=*/{},
/*InitArgs=*/{});
- appendToGlobalCtors(M, HwasanCtorFunction, 0);
+ Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+ HwasanCtorFunction->setComdat(CtorComdat);
+ appendToGlobalCtors(M, HwasanCtorFunction, 0, HwasanCtorFunction);
+
+ // Create a zero-length global in __hwasan_frame so that the linker will
+ // always create start and stop symbols.
+ //
+ // N.B. If we ever start creating associated metadata in this pass this
+ // global will need to be associated with the ctor.
+ Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
+ auto GV =
+ new GlobalVariable(M, Int8Arr0Ty, /*isConstantGlobal*/ true,
+ GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(Int8Arr0Ty), "__hwasan");
+ GV->setSection(getFrameSection());
+ GV->setComdat(CtorComdat);
+ appendToCompilerUsed(M, GV);
+
+ IRBuilder<> IRBCtor(HwasanCtorFunction->getEntryBlock().getTerminator());
+ IRBCtor.CreateCall(
+ declareSanitizerInitFunction(M, "__hwasan_init_frames",
+ {Int8PtrTy, Int8PtrTy}),
+ {createFrameSectionBound(M, Int8Ty, getFrameSectionBeg()),
+ createFrameSectionBound(M, Int8Ty, getFrameSectionEnd())});
}
+
+ if (!TargetTriple.isAndroid())
+ appendToCompilerUsed(
+ M, ThreadPtrGlobal = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr,
+ "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel));
+
return true;
}
@@ -281,21 +373,35 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
}
HwasanTagMemoryFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__hwasan_tag_memory", IRB.getVoidTy(), IntptrTy, Int8Ty, IntptrTy));
+ "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy));
HwasanGenerateTagFunc = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty));
if (Mapping.InGlobal)
ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
ArrayType::get(IRB.getInt8Ty(), 0));
+
+ const std::string MemIntrinCallbackPrefix =
+ CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
+ HWAsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
+ HWAsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
+ HWAsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
+
+ HwasanThreadEnterFunc = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy()));
}
-void HWAddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
+Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
// Generate code only when dynamic addressing is needed.
if (Mapping.Offset != kDynamicShadowSentinel)
- return;
+ return nullptr;
- IRBuilder<> IRB(&F.front().front());
if (Mapping.InGlobal) {
// An empty inline asm with input reg == output reg.
// An opaque pointer-to-int cast, basically.
@@ -303,11 +409,12 @@ void HWAddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
FunctionType::get(IntptrTy, {ShadowGlobal->getType()}, false),
StringRef(""), StringRef("=r,0"),
/*hasSideEffects=*/false);
- LocalDynamicShadow = IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
+ return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
} else {
- Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
- kHwasanShadowMemoryDynamicAddress, IntptrTy);
- LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress);
+ Value *GlobalDynamicAddress =
+ IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
+ kHwasanShadowMemoryDynamicAddress, IntptrTy);
+ return IRB.CreateLoad(GlobalDynamicAddress);
}
}
@@ -421,8 +528,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
IRB.getInt8Ty());
Value *AddrLong = untagPointer(IRB, PtrLong);
Value *ShadowLong = memToShadow(AddrLong, PtrLong->getType(), IRB);
- Value *MemTag =
- IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, IRB.getInt8PtrTy()));
+ Value *MemTag = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, Int8PtrTy));
Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
@@ -433,7 +539,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
}
- TerminatorInst *CheckTerm =
+ Instruction *CheckTerm =
SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, !Recover,
MDBuilder(*C).createBranchWeights(1, 100000));
@@ -464,12 +570,36 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
IRB.CreateCall(Asm, PtrLong);
}
+void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ IRBuilder<> IRB(MI);
+ if (isa<MemTransferInst>(MI)) {
+ IRB.CreateCall(
+ isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ } else if (isa<MemSetInst>(MI)) {
+ IRB.CreateCall(
+ HWAsanMemset,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ }
+ MI->eraseFromParent();
+}
+
bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
LLVM_DEBUG(dbgs() << "Instrumenting: " << *I << "\n");
bool IsWrite = false;
unsigned Alignment = 0;
uint64_t TypeSize = 0;
Value *MaybeMask = nullptr;
+
+ if (ClInstrumentMemIntrinsics && isa<MemIntrinsic>(I)) {
+ instrumentMemIntrinsic(cast<MemIntrinsic>(I));
+ return true;
+ }
+
Value *Addr =
isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
@@ -521,13 +651,13 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
if (ClInstrumentWithCalls) {
IRB.CreateCall(HwasanTagMemoryFunc,
- {IRB.CreatePointerCast(AI, IntptrTy), JustTag,
+ {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
ConstantInt::get(IntptrTy, Size)});
} else {
size_t ShadowSize = Size >> Mapping.Scale;
Value *ShadowPtr = IRB.CreateIntToPtr(
memToShadow(IRB.CreatePointerCast(AI, IntptrTy), AI->getType(), IRB),
- IRB.getInt8PtrTy());
+ Int8PtrTy);
// If this memset is not inlined, it will be intercepted in the hwasan
// runtime library. That's OK, because the interceptor skips the checks if
// the address is in the shadow region.
@@ -557,7 +687,7 @@ Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
if (ClGenerateTagsWithCalls)
- return nullptr;
+ return getNextTagWithCall(IRB);
// FIXME: use addressofreturnaddress (but implement it in aarch64 backend
// first).
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
@@ -625,15 +755,141 @@ Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
return UntaggedPtrLong;
}
-bool HWAddressSanitizer::instrumentStack(
- SmallVectorImpl<AllocaInst *> &Allocas,
- SmallVectorImpl<Instruction *> &RetVec) {
- Function *F = Allocas[0]->getParent()->getParent();
- Instruction *InsertPt = &*F->getEntryBlock().begin();
- IRBuilder<> IRB(InsertPt);
+Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
+ // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
+ // in Bionic's libc/private/bionic_tls.h.
+ Function *ThreadPointerFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
+ Value *SlotPtr = IRB.CreatePointerCast(
+ IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x30),
+ Ty->getPointerTo(0));
+ return SlotPtr;
+ }
+ if (ThreadPtrGlobal)
+ return ThreadPtrGlobal;
+
+
+ return nullptr;
+}
+
+// Creates a string with a description of the stack frame (set of Allocas).
+// The string is intended to be human readable.
+// The current form is: Size1 Name1; Size2 Name2; ...
+std::string
+HWAddressSanitizer::createFrameString(ArrayRef<AllocaInst *> Allocas) {
+ std::ostringstream Descr;
+ for (auto AI : Allocas)
+ Descr << getAllocaSizeInBytes(*AI) << " " << AI->getName().str() << "; ";
+ return Descr.str();
+}
- Value *StackTag = getStackBaseTag(IRB);
+// Creates a global in the frame section which consists of two pointers:
+// the function PC and the frame string constant.
+void HWAddressSanitizer::createFrameGlobal(Function &F,
+ const std::string &FrameString) {
+ Module &M = *F.getParent();
+ auto DescrGV = createPrivateGlobalForString(M, FrameString, true);
+ auto PtrPairTy = StructType::get(F.getType(), DescrGV->getType());
+ auto GV = new GlobalVariable(
+ M, PtrPairTy, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
+ ConstantStruct::get(PtrPairTy, (Constant *)&F, (Constant *)DescrGV),
+ "__hwasan");
+ GV->setSection(getFrameSection());
+ appendToCompilerUsed(M, GV);
+ // Put GV into the F's Comadat so that if F is deleted GV can be deleted too.
+ if (auto Comdat =
+ GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
+ GV->setComdat(Comdat);
+}
+
+Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
+ bool WithFrameRecord) {
+ if (!Mapping.InTls)
+ return getDynamicShadowNonTls(IRB);
+
+ Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
+ assert(SlotPtr);
+
+ Instruction *ThreadLong = IRB.CreateLoad(SlotPtr);
+
+ Function *F = IRB.GetInsertBlock()->getParent();
+ if (F->getFnAttribute("hwasan-abi").getValueAsString() == "interceptor") {
+ Value *ThreadLongEqZero =
+ IRB.CreateICmpEQ(ThreadLong, ConstantInt::get(IntptrTy, 0));
+ auto *Br = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ ThreadLongEqZero, cast<Instruction>(ThreadLongEqZero)->getNextNode(),
+ false, MDBuilder(*C).createBranchWeights(1, 100000)));
+
+ IRB.SetInsertPoint(Br);
+ // FIXME: This should call a new runtime function with a custom calling
+ // convention to avoid needing to spill all arguments here.
+ IRB.CreateCall(HwasanThreadEnterFunc);
+ LoadInst *ReloadThreadLong = IRB.CreateLoad(SlotPtr);
+
+ IRB.SetInsertPoint(&*Br->getSuccessor(0)->begin());
+ PHINode *ThreadLongPhi = IRB.CreatePHI(IntptrTy, 2);
+ ThreadLongPhi->addIncoming(ThreadLong, ThreadLong->getParent());
+ ThreadLongPhi->addIncoming(ReloadThreadLong, ReloadThreadLong->getParent());
+ ThreadLong = ThreadLongPhi;
+ }
+
+ // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
+ Value *ThreadLongMaybeUntagged =
+ TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
+
+ if (WithFrameRecord) {
+ // Prepare ring buffer data.
+ auto PC = IRB.CreatePtrToInt(F, IntptrTy);
+ auto GetStackPointerFn =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress);
+ Value *SP = IRB.CreatePtrToInt(
+ IRB.CreateCall(GetStackPointerFn,
+ {Constant::getNullValue(IRB.getInt32Ty())}),
+ IntptrTy);
+ // Mix SP and PC. TODO: also add the tag to the mix.
+ // Assumptions:
+ // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
+ // SP is 0xsssssssssssSSSS0 (4 lower bits are zero)
+ // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
+ // 0xSSSSPPPPPPPPPPPP
+ SP = IRB.CreateShl(SP, 44);
+
+ // Store data to ring buffer.
+ Value *RecordPtr =
+ IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
+ IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
+
+ // Update the ring buffer. Top byte of ThreadLong defines the size of the
+ // buffer in pages, it must be a power of two, and the start of the buffer
+ // must be aligned by twice that much. Therefore wrap around of the ring
+ // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
+ // The use of AShr instead of LShr is due to
+ // https://bugs.llvm.org/show_bug.cgi?id=39030
+ // Runtime library makes sure not to use the highest bit.
+ Value *WrapMask = IRB.CreateXor(
+ IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
+ ConstantInt::get(IntptrTy, (uint64_t)-1));
+ Value *ThreadLongNew = IRB.CreateAnd(
+ IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
+ IRB.CreateStore(ThreadLongNew, SlotPtr);
+ }
+ // Get shadow base address by aligning RecordPtr up.
+ // Note: this is not correct if the pointer is already aligned.
+ // Runtime library will make sure this never happens.
+ Value *ShadowBase = IRB.CreateAdd(
+ IRB.CreateOr(
+ ThreadLongMaybeUntagged,
+ ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
+ ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
+ return ShadowBase;
+}
+
+bool HWAddressSanitizer::instrumentStack(
+ SmallVectorImpl<AllocaInst *> &Allocas,
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
// Ideally, we want to calculate tagged stack base pointer, and rewrite all
// alloca addresses using that. Unfortunately, offsets are not known yet
// (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
@@ -641,7 +897,7 @@ bool HWAddressSanitizer::instrumentStack(
// This generates one extra instruction per alloca use.
for (unsigned N = 0; N < Allocas.size(); ++N) {
auto *AI = Allocas[N];
- IRB.SetInsertPoint(AI->getNextNode());
+ IRBuilder<> IRB(AI->getNextNode());
// Replace uses of the alloca with tagged address.
Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
@@ -696,12 +952,6 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
- initializeCallbacks(*F.getParent());
-
- assert(!LocalDynamicShadow);
- maybeInsertDynamicShadowAtFunctionEntry(F);
-
- bool Changed = false;
SmallVector<Instruction*, 16> ToInstrument;
SmallVector<AllocaInst*, 8> AllocasToInstrument;
SmallVector<Instruction*, 8> RetVec;
@@ -734,8 +984,28 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
}
}
- if (!AllocasToInstrument.empty())
- Changed |= instrumentStack(AllocasToInstrument, RetVec);
+ if (AllocasToInstrument.empty() && ToInstrument.empty())
+ return false;
+
+ if (ClCreateFrameDescriptions && !AllocasToInstrument.empty())
+ createFrameGlobal(F, createFrameString(AllocasToInstrument));
+
+ initializeCallbacks(*F.getParent());
+
+ assert(!LocalDynamicShadow);
+
+ Instruction *InsertPt = &*F.getEntryBlock().begin();
+ IRBuilder<> EntryIRB(InsertPt);
+ LocalDynamicShadow = emitPrologue(EntryIRB,
+ /*WithFrameRecord*/ ClRecordStackHistory &&
+ !AllocasToInstrument.empty());
+
+ bool Changed = false;
+ if (!AllocasToInstrument.empty()) {
+ Value *StackTag =
+ ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
+ Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag);
+ }
for (auto Inst : ToInstrument)
Changed |= instrumentMemAccess(Inst);
@@ -746,18 +1016,26 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
}
void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) {
- const bool IsAndroid = TargetTriple.isAndroid();
- const bool IsAndroidWithIfuncSupport =
- IsAndroid && !TargetTriple.isAndroidVersionLT(21);
-
Scale = kDefaultShadowScale;
-
- if (ClEnableKhwasan || ClInstrumentWithCalls || !IsAndroidWithIfuncSupport)
+ if (ClMappingOffset.getNumOccurrences() > 0) {
+ InGlobal = false;
+ InTls = false;
+ Offset = ClMappingOffset;
+ } else if (ClEnableKhwasan || ClInstrumentWithCalls) {
+ InGlobal = false;
+ InTls = false;
Offset = 0;
- else
+ } else if (ClWithIfunc) {
+ InGlobal = true;
+ InTls = false;
Offset = kDynamicShadowSentinel;
- if (ClMappingOffset.getNumOccurrences() > 0)
- Offset = ClMappingOffset;
-
- InGlobal = IsAndroidWithIfuncSupport;
+ } else if (ClWithTls) {
+ InGlobal = false;
+ InTls = true;
+ Offset = kDynamicShadowSentinel;
+ } else {
+ InGlobal = false;
+ InTls = false;
+ Offset = kDynamicShadowSentinel;
+ }
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 27fb0e4393af..58436c8560ad 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -19,7 +19,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
-#include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/Attributes.h"
@@ -41,8 +41,8 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Error.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
@@ -269,7 +269,8 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst)
- << "Cannot promote indirect call: target not found";
+ << "Cannot promote indirect call: target with md5sum "
+ << ore::NV("target md5sum", Target) << " not found";
});
break;
}
@@ -351,7 +352,7 @@ uint32_t ICallPromotionFunc::tryToPromote(
bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {
bool Changed = false;
ICallPromotionAnalysis ICallAnalysis;
- for (auto &I : findIndirectCallSites(F)) {
+ for (auto &I : findIndirectCalls(F)) {
uint32_t NumVals, NumCandidates;
uint64_t TotalCount;
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
@@ -426,7 +427,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
ProfileSummaryInfo *PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
// Command-line option has the priority for InLTO.
return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 4d5dfb0aa66b..15b94388cbe5 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -96,6 +96,11 @@ cl::opt<double> NumCountersPerValueSite(
// is usually smaller than 2.
cl::init(1.0));
+cl::opt<bool> AtomicCounterUpdateAll(
+ "instrprof-atomic-counter-update-all", cl::ZeroOrMore,
+ cl::desc("Make all profile counter updates atomic (for testing only)"),
+ cl::init(false));
+
cl::opt<bool> AtomicCounterUpdatePromoted(
"atomic-counter-update-promoted", cl::ZeroOrMore,
cl::desc("Do counter update using atomic fetch add "
@@ -597,12 +602,17 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
IRBuilder<> Builder(Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
- Value *Load = Builder.CreateLoad(Addr, "pgocount");
- auto *Count = Builder.CreateAdd(Load, Inc->getStep());
- auto *Store = Builder.CreateStore(Count, Addr);
- Inc->replaceAllUsesWith(Store);
- if (isCounterPromotionEnabled())
- PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
+
+ if (Options.Atomic || AtomicCounterUpdateAll) {
+ Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
+ AtomicOrdering::Monotonic);
+ } else {
+ Value *Load = Builder.CreateLoad(Addr, "pgocount");
+ auto *Count = Builder.CreateAdd(Load, Inc->getStep());
+ auto *Store = Builder.CreateStore(Count, Addr);
+ if (isCounterPromotionEnabled())
+ PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
+ }
Inc->eraseFromParent();
}
@@ -691,6 +701,7 @@ static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
// Use linker script magic to get data/cnts/name start/end.
if (Triple(M.getTargetTriple()).isOSLinux() ||
Triple(M.getTargetTriple()).isOSFreeBSD() ||
+ Triple(M.getTargetTriple()).isOSNetBSD() ||
Triple(M.getTargetTriple()).isOSFuchsia() ||
Triple(M.getTargetTriple()).isPS4CPU())
return false;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 8e9eea96ced7..c3e323613c70 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -14,7 +14,9 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm-c/Initialization.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
@@ -53,21 +55,65 @@ BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB,
return IP;
}
+// Create a constant for Str so that we can pass it to the run-time lib.
+GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str,
+ bool AllowMerging,
+ const char *NamePrefix) {
+ Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+ // We use private linkage for module-local strings. If they can be merged
+ // with another one, we set the unnamed_addr attribute.
+ GlobalVariable *GV =
+ new GlobalVariable(M, StrConst->getType(), true,
+ GlobalValue::PrivateLinkage, StrConst, NamePrefix);
+ if (AllowMerging)
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
+ return GV;
+}
+
+Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T,
+ const std::string &ModuleId) {
+ if (auto Comdat = F.getComdat()) return Comdat;
+ assert(F.hasName());
+ Module *M = F.getParent();
+ std::string Name = F.getName();
+
+ // Make a unique comdat name for internal linkage things on ELF. On COFF, the
+ // name of the comdat group identifies the leader symbol of the comdat group.
+ // The linkage of the leader symbol is considered during comdat resolution,
+ // and internal symbols with the same name from different objects will not be
+ // merged.
+ if (T.isOSBinFormatELF() && F.hasLocalLinkage()) {
+ if (ModuleId.empty())
+ return nullptr;
+ Name += ModuleId;
+ }
+
+ // Make a new comdat for the function. Use the "no duplicates" selection kind
+ // for non-weak symbols if the object file format supports it.
+ Comdat *C = M->getOrInsertComdat(Name);
+ if (T.isOSBinFormatCOFF() && !F.isWeakForLinker())
+ C->setSelectionKind(Comdat::NoDuplicates);
+ F.setComdat(C);
+ return C;
+}
+
/// initializeInstrumentation - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeAddressSanitizerPass(Registry);
initializeAddressSanitizerModulePass(Registry);
initializeBoundsCheckingLegacyPassPass(Registry);
+ initializeControlHeightReductionLegacyPassPass(Registry);
initializeGCOVProfilerLegacyPassPass(Registry);
initializePGOInstrumentationGenLegacyPassPass(Registry);
initializePGOInstrumentationUseLegacyPassPass(Registry);
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
initializePGOMemOPSizeOptLegacyPassPass(Registry);
initializeInstrProfilingLegacyPassPass(Registry);
- initializeMemorySanitizerPass(Registry);
+ initializeMemorySanitizerLegacyPassPass(Registry);
initializeHWAddressSanitizerPass(Registry);
- initializeThreadSanitizerPass(Registry);
+ initializeThreadSanitizerLegacyPassPass(Registry);
initializeSanitizerCoverageModulePass(Registry);
initializeDataFlowSanitizerPass(Registry);
initializeEfficiencySanitizerPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4bcef6972786..e6573af2077d 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -89,9 +89,58 @@
/// implementation ignores the load aspect of CAS/RMW, always returning a clean
/// value. It implements the store part as a simple atomic store by storing a
/// clean shadow.
-//
+///
+/// Instrumenting inline assembly.
+///
+/// For inline assembly code LLVM has little idea about which memory locations
+/// become initialized depending on the arguments. It can be possible to figure
+/// out which arguments are meant to point to inputs and outputs, but the
+/// actual semantics can be only visible at runtime. In the Linux kernel it's
+/// also possible that the arguments only indicate the offset for a base taken
+/// from a segment register, so it's dangerous to treat any asm() arguments as
+/// pointers. We take a conservative approach generating calls to
+/// __msan_instrument_asm_store(ptr, size)
+/// , which defer the memory unpoisoning to the runtime library.
+/// The latter can perform more complex address checks to figure out whether
+/// it's safe to touch the shadow memory.
+/// Like with atomic operations, we call __msan_instrument_asm_store() before
+/// the assembly call, so that changes to the shadow memory will be seen by
+/// other threads together with main memory initialization.
+///
+/// KernelMemorySanitizer (KMSAN) implementation.
+///
+/// The major differences between KMSAN and MSan instrumentation are:
+/// - KMSAN always tracks the origins and implies msan-keep-going=true;
+/// - KMSAN allocates shadow and origin memory for each page separately, so
+/// there are no explicit accesses to shadow and origin in the
+/// instrumentation.
+/// Shadow and origin values for a particular X-byte memory location
+/// (X=1,2,4,8) are accessed through pointers obtained via the
+/// __msan_metadata_ptr_for_load_X(ptr)
+/// __msan_metadata_ptr_for_store_X(ptr)
+/// functions. The corresponding functions check that the X-byte accesses
+/// are possible and returns the pointers to shadow and origin memory.
+/// Arbitrary sized accesses are handled with:
+/// __msan_metadata_ptr_for_load_n(ptr, size)
+/// __msan_metadata_ptr_for_store_n(ptr, size);
+/// - TLS variables are stored in a single per-task struct. A call to a
+/// function __msan_get_context_state() returning a pointer to that struct
+/// is inserted into every instrumented function before the entry block;
+/// - __msan_warning() takes a 32-bit origin parameter;
+/// - local variables are poisoned with __msan_poison_alloca() upon function
+/// entry and unpoisoned with __msan_unpoison_alloca() before leaving the
+/// function;
+/// - the pass doesn't declare any global variables or add global constructors
+/// to the translation unit.
+///
+/// Also, KMSAN currently ignores uninitialized memory passed into inline asm
+/// calls, making sure we're on the safe side wrt. possible false positives.
+///
+/// KernelMemorySanitizer only supports X86_64 at the moment.
+///
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -101,7 +150,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -139,6 +187,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
@@ -206,10 +255,13 @@ static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
// passed into an assembly call. Note that this may cause false positives.
// Because it's impossible to figure out the array sizes, we can only unpoison
// the first sizeof(type) bytes for each type* pointer.
+// The instrumentation is only enabled in KMSAN builds, and only if
+// -msan-handle-asm-conservative is on. This is done because we may want to
+// quickly disable assembly instrumentation when it breaks.
static cl::opt<bool> ClHandleAsmConservative(
"msan-handle-asm-conservative",
cl::desc("conservative handling of inline assembly"), cl::Hidden,
- cl::init(false));
+ cl::init(true));
// This flag controls whether we check the shadow of the address
// operand of load or store. Such bugs are very rare, since load from
@@ -233,6 +285,11 @@ static cl::opt<int> ClInstrumentationWithCallThreshold(
"inline checks (-1 means never use callbacks)."),
cl::Hidden, cl::init(3500));
+static cl::opt<bool>
+ ClEnableKmsan("msan-kernel",
+ cl::desc("Enable KernelMemorySanitizer instrumentation"),
+ cl::Hidden, cl::init(false));
+
// This is an experiment to enable handling of cases where shadow is a non-zero
// compile-time constant. For some unexplainable reason they were silently
// ignored in the instrumentation.
@@ -264,7 +321,6 @@ static cl::opt<unsigned long long> ClOriginBase("msan-origin-base",
cl::desc("Define custom MSan OriginBase"),
cl::Hidden, cl::init(0));
-static const char *const kMsanModuleCtorName = "msan.module_ctor";
static const char *const kMsanInitName = "__msan_init";
namespace {
@@ -390,29 +446,35 @@ static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
namespace {
-/// An instrumentation pass implementing detection of uninitialized
-/// reads.
+/// Instrument functions of a module to detect uninitialized reads.
///
-/// MemorySanitizer: instrument the code in module to find
-/// uninitialized reads.
-class MemorySanitizer : public FunctionPass {
+/// Instantiating MemorySanitizer inserts the msan runtime library API function
+/// declarations into the module if they don't exist already. Instantiating
+/// ensures the __msan_init function is in the list of global constructors for
+/// the module.
+class MemorySanitizer {
public:
- // Pass identification, replacement for typeid.
- static char ID;
-
- MemorySanitizer(int TrackOrigins = 0, bool Recover = false)
- : FunctionPass(ID),
- TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
- Recover(Recover || ClKeepGoing) {}
-
- StringRef getPassName() const override { return "MemorySanitizer"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ MemorySanitizer(Module &M, int TrackOrigins = 0, bool Recover = false,
+ bool EnableKmsan = false) {
+ this->CompileKernel =
+ ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : EnableKmsan;
+ if (ClTrackOrigins.getNumOccurrences() > 0)
+ this->TrackOrigins = ClTrackOrigins;
+ else
+ this->TrackOrigins = this->CompileKernel ? 2 : TrackOrigins;
+ this->Recover = ClKeepGoing.getNumOccurrences() > 0
+ ? ClKeepGoing
+ : (this->CompileKernel | Recover);
+ initializeModule(M);
}
- bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
+ // MSan cannot be moved or copied because of MapParams.
+ MemorySanitizer(MemorySanitizer &&) = delete;
+ MemorySanitizer &operator=(MemorySanitizer &&) = delete;
+ MemorySanitizer(const MemorySanitizer &) = delete;
+ MemorySanitizer &operator=(const MemorySanitizer &) = delete;
+
+ bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
private:
friend struct MemorySanitizerVisitor;
@@ -421,9 +483,13 @@ private:
friend struct VarArgAArch64Helper;
friend struct VarArgPowerPC64Helper;
+ void initializeModule(Module &M);
void initializeCallbacks(Module &M);
+ void createKernelApi(Module &M);
void createUserspaceApi(Module &M);
+ /// True if we're compiling the Linux kernel.
+ bool CompileKernel;
/// Track origins (allocation points) of uninitialized values.
int TrackOrigins;
bool Recover;
@@ -432,29 +498,39 @@ private:
Type *IntptrTy;
Type *OriginTy;
+ // XxxTLS variables represent the per-thread state in MSan and per-task state
+ // in KMSAN.
+ // For the userspace these point to thread-local globals. In the kernel land
+ // they point to the members of a per-task struct obtained via a call to
+ // __msan_get_context_state().
+
/// Thread-local shadow storage for function parameters.
- GlobalVariable *ParamTLS;
+ Value *ParamTLS;
/// Thread-local origin storage for function parameters.
- GlobalVariable *ParamOriginTLS;
+ Value *ParamOriginTLS;
/// Thread-local shadow storage for function return value.
- GlobalVariable *RetvalTLS;
+ Value *RetvalTLS;
/// Thread-local origin storage for function return value.
- GlobalVariable *RetvalOriginTLS;
+ Value *RetvalOriginTLS;
/// Thread-local shadow storage for in-register va_arg function
/// parameters (x86_64-specific).
- GlobalVariable *VAArgTLS;
+ Value *VAArgTLS;
+
+ /// Thread-local shadow storage for in-register va_arg function
+ /// parameters (x86_64-specific).
+ Value *VAArgOriginTLS;
/// Thread-local shadow storage for va_arg overflow area
/// (x86_64-specific).
- GlobalVariable *VAArgOverflowSizeTLS;
+ Value *VAArgOverflowSizeTLS;
/// Thread-local space used to pass origin value to the UMR reporting
/// function.
- GlobalVariable *OriginTLS;
+ Value *OriginTLS;
/// Are the instrumentation callbacks set up?
bool CallbacksInitialized = false;
@@ -480,6 +556,22 @@ private:
/// MSan runtime replacements for memmove, memcpy and memset.
Value *MemmoveFn, *MemcpyFn, *MemsetFn;
+ /// KMSAN callback for task-local function argument shadow.
+ Value *MsanGetContextStateFn;
+
+ /// Functions for poisoning/unpoisoning local variables
+ Value *MsanPoisonAllocaFn, *MsanUnpoisonAllocaFn;
+
+ /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
+ /// pointers.
+ Value *MsanMetadataPtrForLoadN, *MsanMetadataPtrForStoreN;
+ Value *MsanMetadataPtrForLoad_1_8[4];
+ Value *MsanMetadataPtrForStore_1_8[4];
+ Value *MsanInstrumentAsmStoreFn;
+
+ /// Helper to choose between different MsanMetadataPtrXxx().
+ Value *getKmsanShadowOriginAccessFn(bool isStore, int size);
+
/// Memory map parameters used in application-to-shadow calculation.
const MemoryMapParams *MapParams;
@@ -494,24 +586,61 @@ private:
/// An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
+};
+
+/// A legacy function pass for msan instrumentation.
+///
+/// Instruments functions to detect unitialized reads.
+struct MemorySanitizerLegacyPass : public FunctionPass {
+ // Pass identification, replacement for typeid.
+ static char ID;
- Function *MsanCtorFunction;
+ MemorySanitizerLegacyPass(int TrackOrigins = 0, bool Recover = false,
+ bool EnableKmsan = false)
+ : FunctionPass(ID), TrackOrigins(TrackOrigins), Recover(Recover),
+ EnableKmsan(EnableKmsan) {}
+ StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ return MSan->sanitizeFunction(
+ F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
+ }
+ bool doInitialization(Module &M) override;
+
+ Optional<MemorySanitizer> MSan;
+ int TrackOrigins;
+ bool Recover;
+ bool EnableKmsan;
};
} // end anonymous namespace
-char MemorySanitizer::ID = 0;
+PreservedAnalyses MemorySanitizerPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ MemorySanitizer Msan(*F.getParent(), TrackOrigins, Recover, EnableKmsan);
+ if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
-INITIALIZE_PASS_BEGIN(
- MemorySanitizer, "msan",
- "MemorySanitizer: detects uninitialized reads.", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
- MemorySanitizer, "msan",
- "MemorySanitizer: detects uninitialized reads.", false, false)
+char MemorySanitizerLegacyPass::ID = 0;
-FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins, bool Recover) {
- return new MemorySanitizer(TrackOrigins, Recover);
+INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
+ "MemorySanitizer: detects uninitialized reads.", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
+ "MemorySanitizer: detects uninitialized reads.", false,
+ false)
+
+FunctionPass *llvm::createMemorySanitizerLegacyPassPass(int TrackOrigins,
+ bool Recover,
+ bool CompileKernel) {
+ return new MemorySanitizerLegacyPass(TrackOrigins, Recover, CompileKernel);
}
/// Create a non-const global initialized with the given string.
@@ -526,6 +655,76 @@ static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
GlobalValue::PrivateLinkage, StrConst, "");
}
+/// Create KMSAN API callbacks.
+void MemorySanitizer::createKernelApi(Module &M) {
+ IRBuilder<> IRB(*C);
+
+ // These will be initialized in insertKmsanPrologue().
+ RetvalTLS = nullptr;
+ RetvalOriginTLS = nullptr;
+ ParamTLS = nullptr;
+ ParamOriginTLS = nullptr;
+ VAArgTLS = nullptr;
+ VAArgOriginTLS = nullptr;
+ VAArgOverflowSizeTLS = nullptr;
+ // OriginTLS is unused in the kernel.
+ OriginTLS = nullptr;
+
+ // __msan_warning() in the kernel takes an origin.
+ WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
+ IRB.getInt32Ty());
+ // Requests the per-task context state (kmsan_context_state*) from the
+ // runtime library.
+ MsanGetContextStateFn = M.getOrInsertFunction(
+ "__msan_get_context_state",
+ PointerType::get(
+ StructType::get(ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(),
+ kParamTLSSize / 8), /* va_arg_origin */
+ IRB.getInt64Ty(),
+ ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
+ OriginTy),
+ 0));
+
+ Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
+ PointerType::get(IRB.getInt32Ty(), 0));
+
+ for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
+ std::string name_load =
+ "__msan_metadata_ptr_for_load_" + std::to_string(size);
+ std::string name_store =
+ "__msan_metadata_ptr_for_store_" + std::to_string(size);
+ MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
+ name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+ MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
+ name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+ }
+
+ MsanMetadataPtrForLoadN = M.getOrInsertFunction(
+ "__msan_metadata_ptr_for_load_n", RetTy,
+ PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
+ MsanMetadataPtrForStoreN = M.getOrInsertFunction(
+ "__msan_metadata_ptr_for_store_n", RetTy,
+ PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
+
+ // Functions for poisoning and unpoisoning memory.
+ MsanPoisonAllocaFn =
+ M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
+ MsanUnpoisonAllocaFn = M.getOrInsertFunction(
+ "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
+}
+
+static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
+ return M.getOrInsertGlobal(Name, Ty, [&] {
+ return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, Name, nullptr,
+ GlobalVariable::InitialExecTLSModel);
+ });
+}
+
/// Insert declarations for userspace-specific functions and globals.
void MemorySanitizer::createUserspaceApi(Module &M) {
IRBuilder<> IRB(*C);
@@ -537,36 +736,31 @@ void MemorySanitizer::createUserspaceApi(Module &M) {
WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
// Create the global TLS variables.
- RetvalTLS = new GlobalVariable(
- M, ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8), false,
- GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr,
- GlobalVariable::InitialExecTLSModel);
-
- RetvalOriginTLS = new GlobalVariable(
- M, OriginTy, false, GlobalVariable::ExternalLinkage, nullptr,
- "__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
-
- ParamTLS = new GlobalVariable(
- M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
- GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr,
- GlobalVariable::InitialExecTLSModel);
-
- ParamOriginTLS = new GlobalVariable(
- M, ArrayType::get(OriginTy, kParamTLSSize / 4), false,
- GlobalVariable::ExternalLinkage, nullptr, "__msan_param_origin_tls",
- nullptr, GlobalVariable::InitialExecTLSModel);
-
- VAArgTLS = new GlobalVariable(
- M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
- GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr,
- GlobalVariable::InitialExecTLSModel);
- VAArgOverflowSizeTLS = new GlobalVariable(
- M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
- "__msan_va_arg_overflow_size_tls", nullptr,
- GlobalVariable::InitialExecTLSModel);
- OriginTLS = new GlobalVariable(
- M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
- "__msan_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
+ RetvalTLS =
+ getOrInsertGlobal(M, "__msan_retval_tls",
+ ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
+
+ RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
+
+ ParamTLS =
+ getOrInsertGlobal(M, "__msan_param_tls",
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
+
+ ParamOriginTLS =
+ getOrInsertGlobal(M, "__msan_param_origin_tls",
+ ArrayType::get(OriginTy, kParamTLSSize / 4));
+
+ VAArgTLS =
+ getOrInsertGlobal(M, "__msan_va_arg_tls",
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
+
+ VAArgOriginTLS =
+ getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
+ ArrayType::get(OriginTy, kParamTLSSize / 4));
+
+ VAArgOverflowSizeTLS =
+ getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
+ OriginTLS = getOrInsertGlobal(M, "__msan_origin_tls", IRB.getInt32Ty());
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
@@ -615,14 +809,37 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
- createUserspaceApi(M);
+ MsanInstrumentAsmStoreFn =
+ M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
+ PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
+
+ if (CompileKernel) {
+ createKernelApi(M);
+ } else {
+ createUserspaceApi(M);
+ }
CallbacksInitialized = true;
}
+Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
+ Value **Fns =
+ isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
+ switch (size) {
+ case 1:
+ return Fns[0];
+ case 2:
+ return Fns[1];
+ case 4:
+ return Fns[2];
+ case 8:
+ return Fns[3];
+ default:
+ return nullptr;
+ }
+}
+
/// Module-level initialization.
-///
-/// inserts a call to __msan_init to the module's constructor list.
-bool MemorySanitizer::doInitialization(Module &M) {
+void MemorySanitizer::initializeModule(Module &M) {
auto &DL = M.getDataLayout();
bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
@@ -695,27 +912,27 @@ bool MemorySanitizer::doInitialization(Module &M) {
ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
- std::tie(MsanCtorFunction, std::ignore) =
- createSanitizerCtorAndInitFunctions(M, kMsanModuleCtorName, kMsanInitName,
- /*InitArgTypes=*/{},
- /*InitArgs=*/{});
- if (ClWithComdat) {
- Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
- MsanCtorFunction->setComdat(MsanCtorComdat);
- appendToGlobalCtors(M, MsanCtorFunction, 0, MsanCtorFunction);
- } else {
- appendToGlobalCtors(M, MsanCtorFunction, 0);
- }
-
-
- if (TrackOrigins)
- new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
- IRB.getInt32(TrackOrigins), "__msan_track_origins");
-
- if (Recover)
- new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
- IRB.getInt32(Recover), "__msan_keep_going");
+ if (!CompileKernel) {
+ getOrCreateInitFunction(M, kMsanInitName);
+
+ if (TrackOrigins)
+ M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
+ return new GlobalVariable(
+ M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(TrackOrigins), "__msan_track_origins");
+ });
+
+ if (Recover)
+ M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
+ return new GlobalVariable(M, IRB.getInt32Ty(), true,
+ GlobalValue::WeakODRLinkage,
+ IRB.getInt32(Recover), "__msan_keep_going");
+ });
+}
+}
+bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
+ MSan.emplace(M, TrackOrigins, Recover, EnableKmsan);
return true;
}
@@ -796,8 +1013,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
SmallVector<StoreInst *, 16> StoreList;
- MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
- : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
+ MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
+ const TargetLibraryInfo &TLI)
+ : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
InsertChecks = SanitizeFunction;
PropagateShadow = SanitizeFunction;
@@ -806,10 +1024,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// FIXME: Consider using SpecialCaseList to specify a list of functions that
// must always return fully initialized values. For now, we hardcode "main".
CheckReturnValue = SanitizeFunction && (F.getName() == "main");
- TLI = &MS.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
MS.initializeCallbacks(*F.getParent());
- ActualFnStart = &F.getEntryBlock();
+ if (MS.CompileKernel)
+ ActualFnStart = insertKmsanPrologue(F);
+ else
+ ActualFnStart = &F.getEntryBlock();
LLVM_DEBUG(if (!InsertChecks) dbgs()
<< "MemorySanitizer is not inserting checks into '"
@@ -883,7 +1103,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned TypeSizeInBits =
DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
- if (AsCall && SizeIndex < kNumberOfAccessSizes) {
+ if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
Value *ConvertedShadow2 = IRB.CreateZExt(
ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
@@ -932,10 +1152,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
if (!Origin)
Origin = (Value *)IRB.getInt32(0);
- if (MS.TrackOrigins) {
- IRB.CreateStore(Origin, MS.OriginTLS);
+ if (MS.CompileKernel) {
+ IRB.CreateCall(MS.WarningFn, Origin);
+ } else {
+ if (MS.TrackOrigins) {
+ IRB.CreateStore(Origin, MS.OriginTLS);
+ }
+ IRB.CreateCall(MS.WarningFn, {});
}
- IRB.CreateCall(MS.WarningFn, {});
IRB.CreateCall(MS.EmptyAsm, {});
// FIXME: Insert UnreachableInst if !MS.Recover?
// This may invalidate some of the following checks and needs to be done
@@ -961,7 +1185,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
- if (AsCall && SizeIndex < kNumberOfAccessSizes) {
+ if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
Value *Fn = MS.MaybeWarningFn[SizeIndex];
Value *ConvertedShadow2 =
IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
@@ -991,6 +1215,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
LLVM_DEBUG(dbgs() << "DONE:\n" << F);
}
+ BasicBlock *insertKmsanPrologue(Function &F) {
+ BasicBlock *ret =
+ SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
+ Constant *Zero = IRB.getInt32(0);
+ MS.ParamTLS =
+ IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(0)}, "param_shadow");
+ MS.RetvalTLS =
+ IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(1)}, "retval_shadow");
+ MS.VAArgTLS =
+ IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(2)}, "va_arg_shadow");
+ MS.VAArgOriginTLS =
+ IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(3)}, "va_arg_origin");
+ MS.VAArgOverflowSizeTLS = IRB.CreateGEP(
+ ContextState, {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
+ MS.ParamOriginTLS =
+ IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(5)}, "param_origin");
+ MS.RetvalOriginTLS =
+ IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(6)}, "retval_origin");
+ return ret;
+ }
+
/// Add MemorySanitizer instrumentation to a function.
bool runOnFunction() {
// In the presence of unreachable blocks, we may see Phi nodes with
@@ -1139,12 +1386,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return std::make_pair(ShadowPtr, OriginPtr);
}
+ std::pair<Value *, Value *>
+ getShadowOriginPtrKernel(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
+ unsigned Alignment, bool isStore) {
+ Value *ShadowOriginPtrs;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ int Size = DL.getTypeStoreSize(ShadowTy);
+
+ Value *Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
+ Value *AddrCast =
+ IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
+ if (Getter) {
+ ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
+ } else {
+ Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+ ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
+ : MS.MsanMetadataPtrForLoadN,
+ {AddrCast, SizeVal});
+ }
+ Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
+ ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
+ Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
+
+ return std::make_pair(ShadowPtr, OriginPtr);
+ }
+
std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
Type *ShadowTy,
unsigned Alignment,
bool isStore) {
- std::pair<Value *, Value *> ret =
- getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
+ std::pair<Value *, Value *> ret;
+ if (MS.CompileKernel)
+ ret = getShadowOriginPtrKernel(Addr, IRB, ShadowTy, Alignment, isStore);
+ else
+ ret = getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
return ret;
}
@@ -1163,7 +1438,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Compute the origin address for a given function argument.
Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
int ArgOffset) {
- if (!MS.TrackOrigins) return nullptr;
+ if (!MS.TrackOrigins)
+ return nullptr;
Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
if (ArgOffset)
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
@@ -1303,6 +1579,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
/*isStore*/ true)
.first;
+ // TODO(glider): need to copy origins.
if (Overflow) {
// ParamTLS overflow.
EntryIRB.CreateMemSet(
@@ -2850,6 +3127,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorComparePackedIntrinsic(I);
break;
+ case Intrinsic::is_constant:
+ // The result of llvm.is.constant() is always defined.
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ break;
+
default:
if (!handleUnknownIntrinsic(I))
visitInstruction(I);
@@ -2868,7 +3151,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// outputs as clean. Note that any side effects of the inline asm that are
// not immediately visible in its constraints are not handled.
if (Call->isInlineAsm()) {
- if (ClHandleAsmConservative)
+ if (ClHandleAsmConservative && MS.CompileKernel)
visitAsmInstruction(I);
else
visitInstruction(I);
@@ -2921,12 +3204,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ArgOffset + Size > kParamTLSSize) break;
unsigned ParamAlignment = CS.getParamAlignment(i);
unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
- Value *AShadowPtr = getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
- Alignment, /*isStore*/ false)
- .first;
+ Value *AShadowPtr =
+ getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
+ /*isStore*/ false)
+ .first;
Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
Alignment, Size);
+ // TODO(glider): need to copy origins.
} else {
Size = DL.getTypeAllocSize(A->getType());
if (ArgOffset + Size > kParamTLSSize) break;
@@ -2945,8 +3230,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
LLVM_DEBUG(dbgs() << " done with call args\n");
- FunctionType *FT =
- cast<FunctionType>(CS.getCalledValue()->getType()->getContainedType(0));
+ FunctionType *FT = CS.getFunctionType();
if (FT->isVarArg()) {
VAHelper->visitCallSite(CS, IRB);
}
@@ -3033,40 +3317,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
"_msphi_o"));
}
- void visitAllocaInst(AllocaInst &I) {
- setShadow(&I, getCleanShadow(&I));
- setOrigin(&I, getCleanOrigin());
- IRBuilder<> IRB(I.getNextNode());
- const DataLayout &DL = F.getParent()->getDataLayout();
- uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
- Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
- if (I.isArrayAllocation())
- Len = IRB.CreateMul(Len, I.getArraySize());
+ Value *getLocalVarDescription(AllocaInst &I) {
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ // We create a string with a description of the stack allocation and
+ // pass it into __msan_set_alloca_origin.
+ // It will be printed by the run-time if stack-originated UMR is found.
+ // The first 4 bytes of the string are set to '----' and will be replaced
+ // by __msan_va_arg_overflow_size_tls at the first call.
+ StackDescription << "----" << I.getName() << "@" << F.getName();
+ return createPrivateNonConstGlobalForString(*F.getParent(),
+ StackDescription.str());
+ }
+
+ void instrumentAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
if (PoisonStack && ClPoisonStackWithCall) {
IRB.CreateCall(MS.MsanPoisonStackFn,
{IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
} else {
- Value *ShadowBase = getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(),
- I.getAlignment(), /*isStore*/ true)
- .first;
+ Value *ShadowBase, *OriginBase;
+ std::tie(ShadowBase, OriginBase) =
+ getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), 1, /*isStore*/ true);
Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());
}
if (PoisonStack && MS.TrackOrigins) {
- SmallString<2048> StackDescriptionStorage;
- raw_svector_ostream StackDescription(StackDescriptionStorage);
- // We create a string with a description of the stack allocation and
- // pass it into __msan_set_alloca_origin.
- // It will be printed by the run-time if stack-originated UMR is found.
- // The first 4 bytes of the string are set to '----' and will be replaced
- // by __msan_va_arg_overflow_size_tls at the first call.
- StackDescription << "----" << I.getName() << "@" << F.getName();
- Value *Descr =
- createPrivateNonConstGlobalForString(*F.getParent(),
- StackDescription.str());
-
+ Value *Descr = getLocalVarDescription(I);
IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
{IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
@@ -3074,6 +3352,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
+ void instrumentAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+ Value *Descr = getLocalVarDescription(I);
+ if (PoisonStack) {
+ IRB.CreateCall(MS.MsanPoisonAllocaFn,
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
+ IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
+ } else {
+ IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
+ }
+ }
+
+ void visitAllocaInst(AllocaInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ IRBuilder<> IRB(I.getNextNode());
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
+ Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
+ if (I.isArrayAllocation())
+ Len = IRB.CreateMul(Len, I.getArraySize());
+
+ if (MS.CompileKernel)
+ instrumentAllocaKmsan(I, IRB, Len);
+ else
+ instrumentAllocaUserspace(I, IRB, Len);
+ }
+
void visitSelectInst(SelectInst& I) {
IRBuilder<> IRB(&I);
// a = select b, c, d
@@ -3196,37 +3502,95 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Nothing to do here.
}
+ void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
+ const DataLayout &DL, bool isOutput) {
+ // For each assembly argument, we check its value for being initialized.
+ // If the argument is a pointer, we assume it points to a single element
+ // of the corresponding type (or to a 8-byte word, if the type is unsized).
+ // Each such pointer is instrumented with a call to the runtime library.
+ Type *OpType = Operand->getType();
+ // Check the operand value itself.
+ insertShadowCheck(Operand, &I);
+ if (!OpType->isPointerTy() || !isOutput) {
+ assert(!isOutput);
+ return;
+ }
+ Type *ElType = OpType->getPointerElementType();
+ if (!ElType->isSized())
+ return;
+ int Size = DL.getTypeStoreSize(ElType);
+ Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
+ Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+ IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
+ }
+
+ /// Get the number of output arguments returned by pointers.
+ int getNumOutputArgs(InlineAsm *IA, CallInst *CI) {
+ int NumRetOutputs = 0;
+ int NumOutputs = 0;
+ Type *RetTy = dyn_cast<Value>(CI)->getType();
+ if (!RetTy->isVoidTy()) {
+ // Register outputs are returned via the CallInst return value.
+ StructType *ST = dyn_cast_or_null<StructType>(RetTy);
+ if (ST)
+ NumRetOutputs = ST->getNumElements();
+ else
+ NumRetOutputs = 1;
+ }
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+ for (size_t i = 0, n = Constraints.size(); i < n; i++) {
+ InlineAsm::ConstraintInfo Info = Constraints[i];
+ switch (Info.Type) {
+ case InlineAsm::isOutput:
+ NumOutputs++;
+ break;
+ default:
+ break;
+ }
+ }
+ return NumOutputs - NumRetOutputs;
+ }
+
void visitAsmInstruction(Instruction &I) {
// Conservative inline assembly handling: check for poisoned shadow of
// asm() arguments, then unpoison the result and all the memory locations
// pointed to by those arguments.
+ // An inline asm() statement in C++ contains lists of input and output
+ // arguments used by the assembly code. These are mapped to operands of the
+ // CallInst as follows:
+ // - nR register outputs ("=r) are returned by value in a single structure
+ // (SSA value of the CallInst);
+ // - nO other outputs ("=m" and others) are returned by pointer as first
+ // nO operands of the CallInst;
+ // - nI inputs ("r", "m" and others) are passed to CallInst as the
+ // remaining nI operands.
+ // The total number of asm() arguments in the source is nR+nO+nI, and the
+ // corresponding CallInst has nO+nI+1 operands (the last operand is the
+ // function to be called).
+ const DataLayout &DL = F.getParent()->getDataLayout();
CallInst *CI = dyn_cast<CallInst>(&I);
-
- for (size_t i = 0, n = CI->getNumOperands(); i < n; i++) {
+ IRBuilder<> IRB(&I);
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ int OutputArgs = getNumOutputArgs(IA, CI);
+ // The last operand of a CallInst is the function itself.
+ int NumOperands = CI->getNumOperands() - 1;
+
+ // Check input arguments. Doing so before unpoisoning output arguments, so
+ // that we won't overwrite uninit values before checking them.
+ for (int i = OutputArgs; i < NumOperands; i++) {
Value *Operand = CI->getOperand(i);
- if (Operand->getType()->isSized())
- insertShadowCheck(Operand, &I);
+ instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
}
- setShadow(&I, getCleanShadow(&I));
- setOrigin(&I, getCleanOrigin());
- IRBuilder<> IRB(&I);
- IRB.SetInsertPoint(I.getNextNode());
- for (size_t i = 0, n = CI->getNumOperands(); i < n; i++) {
+ // Unpoison output arguments. This must happen before the actual InlineAsm
+ // call, so that the shadow for memory published in the asm() statement
+ // remains valid.
+ for (int i = 0; i < OutputArgs; i++) {
Value *Operand = CI->getOperand(i);
- Type *OpType = Operand->getType();
- if (!OpType->isPointerTy())
- continue;
- Type *ElType = OpType->getPointerElementType();
- if (!ElType->isSized())
- continue;
- Value *ShadowPtr, *OriginPtr;
- std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
- Operand, IRB, ElType, /*Alignment*/ 1, /*isStore*/ true);
- Value *CShadow = getCleanShadow(ElType);
- IRB.CreateStore(
- CShadow,
- IRB.CreatePointerCast(ShadowPtr, CShadow->getType()->getPointerTo()));
+ instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
}
+
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
}
void visitInstruction(Instruction &I) {
@@ -3249,12 +3613,16 @@ struct VarArgAMD64Helper : public VarArgHelper {
// An unfortunate workaround for asymmetric lowering of va_arg stuff.
// See a comment in visitCallSite for more details.
static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
- static const unsigned AMD64FpEndOffset = 176;
+ static const unsigned AMD64FpEndOffsetSSE = 176;
+ // If SSE is disabled, fp_offset in va_list is zero.
+ static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
+ unsigned AMD64FpEndOffset;
Function &F;
MemorySanitizer &MS;
MemorySanitizerVisitor &MSV;
Value *VAArgTLSCopy = nullptr;
+ Value *VAArgTLSOriginCopy = nullptr;
Value *VAArgOverflowSize = nullptr;
SmallVector<CallInst*, 16> VAStartInstrumentationList;
@@ -3262,7 +3630,18 @@ struct VarArgAMD64Helper : public VarArgHelper {
enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
- MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
+ MemorySanitizerVisitor &MSV)
+ : F(F), MS(MS), MSV(MSV) {
+ AMD64FpEndOffset = AMD64FpEndOffsetSSE;
+ for (const auto &Attr : F.getAttributes().getFnAttributes()) {
+ if (Attr.isStringAttribute() &&
+ (Attr.getKindAsString() == "target-features")) {
+ if (Attr.getValueAsString().contains("-sse"))
+ AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
+ break;
+ }
+ }
+ }
ArgKind classifyArgument(Value* arg) {
// A very rough approximation of X86_64 argument classification rules.
@@ -3304,9 +3683,14 @@ struct VarArgAMD64Helper : public VarArgHelper {
assert(A->getType()->isPointerTy());
Type *RealTy = A->getType()->getPointerElementType();
uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
- Value *ShadowBase =
- getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
+ Value *ShadowBase = getShadowPtrForVAArgument(
+ RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
+ Value *OriginBase = nullptr;
+ if (MS.TrackOrigins)
+ OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
OverflowOffset += alignTo(ArgSize, 8);
+ if (!ShadowBase)
+ continue;
Value *ShadowPtr, *OriginPtr;
std::tie(ShadowPtr, OriginPtr) =
MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
@@ -3314,20 +3698,31 @@ struct VarArgAMD64Helper : public VarArgHelper {
IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
kShadowTLSAlignment, ArgSize);
+ if (MS.TrackOrigins)
+ IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
+ kShadowTLSAlignment, ArgSize);
} else {
ArgKind AK = classifyArgument(A);
if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
AK = AK_Memory;
if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
AK = AK_Memory;
- Value *ShadowBase;
+ Value *ShadowBase, *OriginBase = nullptr;
switch (AK) {
case AK_GeneralPurpose:
- ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset);
+ ShadowBase =
+ getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
+ if (MS.TrackOrigins)
+ OriginBase =
+ getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
GpOffset += 8;
break;
case AK_FloatingPoint:
- ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset);
+ ShadowBase =
+ getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
+ if (MS.TrackOrigins)
+ OriginBase =
+ getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
FpOffset += 16;
break;
case AK_Memory:
@@ -3335,15 +3730,27 @@ struct VarArgAMD64Helper : public VarArgHelper {
continue;
uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
ShadowBase =
- getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+ getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
+ if (MS.TrackOrigins)
+ OriginBase =
+ getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
OverflowOffset += alignTo(ArgSize, 8);
}
// Take fixed arguments into account for GpOffset and FpOffset,
// but don't actually store shadows for them.
+ // TODO(glider): don't call get*PtrForVAArgument() for them.
if (IsFixed)
continue;
- IRB.CreateAlignedStore(MSV.getShadow(A), ShadowBase,
- kShadowTLSAlignment);
+ if (!ShadowBase)
+ continue;
+ Value *Shadow = MSV.getShadow(A);
+ IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
+ if (MS.TrackOrigins) {
+ Value *Origin = MSV.getOrigin(A);
+ unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+ MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
+ std::max(kShadowTLSAlignment, kMinOriginAlignment));
+ }
}
}
Constant *OverflowSize =
@@ -3353,11 +3760,25 @@ struct VarArgAMD64Helper : public VarArgHelper {
/// Compute the shadow address for a given va_arg.
Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
- int ArgOffset) {
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
- "_msarg");
+ "_msarg_va_s");
+ }
+
+ /// Compute the origin address for a given va_arg.
+ Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
+ // getOriginPtrForVAArgument() is always called after
+ // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
+ // overflow.
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+ "_msarg_va_o");
}
void unpoisonVAListTagForInst(IntrinsicInst &I) {
@@ -3402,6 +3823,10 @@ struct VarArgAMD64Helper : public VarArgHelper {
VAArgOverflowSize);
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ if (MS.TrackOrigins) {
+ VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSOriginCopy, 8, MS.VAArgOriginTLS, 8, CopySize);
+ }
}
// Instrument va_start.
@@ -3423,6 +3848,9 @@ struct VarArgAMD64Helper : public VarArgHelper {
Alignment, /*isStore*/ true);
IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
AMD64FpEndOffset);
+ if (MS.TrackOrigins)
+ IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
+ Alignment, AMD64FpEndOffset);
Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, 8)),
@@ -3436,6 +3864,12 @@ struct VarArgAMD64Helper : public VarArgHelper {
AMD64FpEndOffset);
IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
VAArgOverflowSize);
+ if (MS.TrackOrigins) {
+ SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
+ AMD64FpEndOffset);
+ IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
+ VAArgOverflowSize);
+ }
}
}
};
@@ -3469,9 +3903,11 @@ struct VarArgMIPS64Helper : public VarArgHelper {
if (ArgSize < 8)
VAArgOffset += (8 - ArgSize);
}
- Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset);
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
VAArgOffset += ArgSize;
VAArgOffset = alignTo(VAArgOffset, 8);
+ if (!Base)
+ continue;
IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
@@ -3483,7 +3919,10 @@ struct VarArgMIPS64Helper : public VarArgHelper {
/// Compute the shadow address for a given va_arg.
Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
- int ArgOffset) {
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
@@ -3614,11 +4053,11 @@ struct VarArgAArch64Helper : public VarArgHelper {
Value *Base;
switch (AK) {
case AK_GeneralPurpose:
- Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
GrOffset += 8;
break;
case AK_FloatingPoint:
- Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
VrOffset += 16;
break;
case AK_Memory:
@@ -3627,7 +4066,8 @@ struct VarArgAArch64Helper : public VarArgHelper {
if (IsFixed)
continue;
uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
- Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
+ alignTo(ArgSize, 8));
OverflowOffset += alignTo(ArgSize, 8);
break;
}
@@ -3635,6 +4075,8 @@ struct VarArgAArch64Helper : public VarArgHelper {
// bother to actually store a shadow.
if (IsFixed)
continue;
+ if (!Base)
+ continue;
IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
Constant *OverflowSize =
@@ -3644,7 +4086,10 @@ struct VarArgAArch64Helper : public VarArgHelper {
/// Compute the shadow address for a given va_arg.
Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
- int ArgOffset) {
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
@@ -3849,14 +4294,17 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
ArgAlign = 8;
VAArgOffset = alignTo(VAArgOffset, ArgAlign);
if (!IsFixed) {
- Value *Base = getShadowPtrForVAArgument(RealTy, IRB,
- VAArgOffset - VAArgBase);
- Value *AShadowPtr, *AOriginPtr;
- std::tie(AShadowPtr, AOriginPtr) = MSV.getShadowOriginPtr(
- A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment, /*isStore*/ false);
-
- IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
- kShadowTLSAlignment, ArgSize);
+ Value *Base = getShadowPtrForVAArgument(
+ RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
+ if (Base) {
+ Value *AShadowPtr, *AOriginPtr;
+ std::tie(AShadowPtr, AOriginPtr) =
+ MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
+ kShadowTLSAlignment, /*isStore*/ false);
+
+ IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
+ kShadowTLSAlignment, ArgSize);
+ }
}
VAArgOffset += alignTo(ArgSize, 8);
} else {
@@ -3884,8 +4332,9 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
}
if (!IsFixed) {
Base = getShadowPtrForVAArgument(A->getType(), IRB,
- VAArgOffset - VAArgBase);
- IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ VAArgOffset - VAArgBase, ArgSize);
+ if (Base)
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
VAArgOffset += ArgSize;
VAArgOffset = alignTo(VAArgOffset, 8);
@@ -3903,7 +4352,10 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
/// Compute the shadow address for a given va_arg.
Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
- int ArgOffset) {
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
@@ -4005,10 +4457,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
return new VarArgNoOpHelper(Func, Msan, Visitor);
}
-bool MemorySanitizer::runOnFunction(Function &F) {
- if (&F == MsanCtorFunction)
- return false;
- MemorySanitizerVisitor Visitor(F, *this);
+bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
+ MemorySanitizerVisitor Visitor(F, *this, TLI);
// Clear out readonly/readnone attributes.
AttrBuilder B;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 307b7eaa2196..f043325f5bba 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -63,7 +63,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Attributes.h"
@@ -141,6 +141,11 @@ static cl::opt<std::string>
cl::value_desc("filename"),
cl::desc("Specify the path of profile data file. This is"
"mainly for test purpose."));
+static cl::opt<std::string> PGOTestProfileRemappingFile(
+ "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("Specify the path of profile remapping file. This is mainly for "
+ "test purpose."));
// Command line option to disable value profiling. The default is false:
// i.e. value profiling is enabled by default. This is for debug purpose.
@@ -539,7 +544,7 @@ public:
MIVisitor.countMemIntrinsics(Func);
NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
- ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func);
+ ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
FuncName = getPGOFuncName(F);
@@ -581,7 +586,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
std::vector<char> Indexes;
JamCRC JC;
for (auto &BB : F) {
- const TerminatorInst *TI = BB.getTerminator();
+ const Instruction *TI = BB.getTerminator();
for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
auto BI = findBBInfo(Succ);
@@ -693,7 +698,7 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
// Instrument the SrcBB if it has a single successor,
// otherwise, the DestBB if this is not a critical edge.
- TerminatorInst *TI = SrcBB->getTerminator();
+ Instruction *TI = SrcBB->getTerminator();
if (TI->getNumSuccessors() <= 1)
return SrcBB;
if (!E->IsCritical)
@@ -749,12 +754,12 @@ static void instrumentOneFunc(
if (DisableValueProfiling)
return;
- unsigned NumIndirectCallSites = 0;
+ unsigned NumIndirectCalls = 0;
for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) {
CallSite CS(I);
Value *Callee = CS.getCalledValue();
LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = "
- << NumIndirectCallSites << "\n");
+ << NumIndirectCalls << "\n");
IRBuilder<> Builder(I);
assert(Builder.GetInsertPoint() != I->getParent()->end() &&
"Cannot get the Instrumentation point");
@@ -764,9 +769,9 @@ static void instrumentOneFunc(
Builder.getInt64(FuncInfo.FunctionHash),
Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
Builder.getInt32(IPVK_IndirectCallTarget),
- Builder.getInt32(NumIndirectCallSites++)});
+ Builder.getInt32(NumIndirectCalls++)});
}
- NumOfPGOICall += NumIndirectCallSites;
+ NumOfPGOICall += NumIndirectCalls;
// Now instrument memop intrinsic calls.
FuncInfo.MIVisitor.instrumentMemIntrinsics(
@@ -854,7 +859,7 @@ public:
FreqAttr(FFA_Normal) {}
// Read counts for the instrumented BB from profile.
- bool readCounters(IndexedInstrProfReader *PGOReader);
+ bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
// Populate the counts for all BBs.
void populateCounters();
@@ -899,6 +904,7 @@ public:
FuncInfo.dumpInfo(Str);
}
+ uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
private:
Function &F;
Module *M;
@@ -1008,7 +1014,7 @@ void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
// Read the profile from ProfileFileName and assign the value to the
// instrumented BB and the edges. This function also updates ProgramMaxCount.
// Return true if the profile are successfully read, and false on errors.
-bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
auto &Ctx = M->getContext();
Expected<InstrProfRecord> Result =
PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
@@ -1048,6 +1054,7 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
ValueSum += CountFromProfile[I];
}
+ AllZeros = (ValueSum == 0);
LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
@@ -1162,7 +1169,7 @@ void PGOUseFunc::setBranchWeights() {
// Generate MD_prof metadata for every branch instruction.
LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n");
for (auto &BB : F) {
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
if (TI->getNumSuccessors() < 2)
continue;
if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
@@ -1208,7 +1215,7 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() {
// to become an irreducible loop header after the indirectbr tail
// duplication.
if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
const UseBBInfo &BBCountInfo = getBBInfo(&BB);
setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
}
@@ -1429,13 +1436,14 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
}
static bool annotateAllFunctions(
- Module &M, StringRef ProfileFileName,
+ Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
auto &Ctx = M.getContext();
// Read the counter array from file.
- auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName);
+ auto ReaderOrErr =
+ IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
if (Error E = ReaderOrErr.takeError()) {
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
Ctx.diagnose(
@@ -1471,8 +1479,15 @@ static bool annotateAllFunctions(
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
- if (!Func.readCounters(PGOReader.get()))
+ bool AllZeros = false;
+ if (!Func.readCounters(PGOReader.get(), AllZeros))
continue;
+ if (AllZeros) {
+ F.setEntryCount(ProfileCount(0, Function::PCT_Real));
+ if (Func.getProgramMaxCount() != 0)
+ ColdFunctions.push_back(&F);
+ continue;
+ }
Func.populateCounters();
Func.setBranchWeights();
Func.annotateValueSites();
@@ -1529,10 +1544,14 @@ static bool annotateAllFunctions(
return true;
}
-PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename)
- : ProfileFileName(std::move(Filename)) {
+PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
+ std::string RemappingFilename)
+ : ProfileFileName(std::move(Filename)),
+ ProfileRemappingFileName(std::move(RemappingFilename)) {
if (!PGOTestProfileFile.empty())
ProfileFileName = PGOTestProfileFile;
+ if (!PGOTestProfileRemappingFile.empty())
+ ProfileRemappingFileName = PGOTestProfileRemappingFile;
}
PreservedAnalyses PGOInstrumentationUse::run(Module &M,
@@ -1547,7 +1566,8 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
return &FAM.getResult<BlockFrequencyAnalysis>(F);
};
- if (!annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI))
+ if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
+ LookupBPI, LookupBFI))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -1564,7 +1584,7 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI);
+ return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI);
}
static std::string getSimpleNodeName(const BasicBlock *Node) {
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index a4dd48c8dd6a..0ba8d5765e8c 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
@@ -211,8 +212,8 @@ private:
bool IsLeafFunc = true);
Function *CreateInitCallsForSections(Module &M, const char *InitFunctionName,
Type *Ty, const char *Section);
- std::pair<GlobalVariable *, GlobalVariable *>
- CreateSecStartEnd(Module &M, const char *Section, Type *Ty);
+ std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
+ Type *Ty);
void SetNoSanitizeMetadata(Instruction *I) {
I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
@@ -234,6 +235,7 @@ private:
Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
*Int16Ty, *Int8Ty, *Int8PtrTy;
Module *CurModule;
+ std::string CurModuleUniqueId;
Triple TargetTriple;
LLVMContext *C;
const DataLayout *DL;
@@ -249,7 +251,7 @@ private:
} // namespace
-std::pair<GlobalVariable *, GlobalVariable *>
+std::pair<Value *, Value *>
SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
Type *Ty) {
GlobalVariable *SecStart =
@@ -260,22 +262,28 @@ SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
nullptr, getSectionEnd(Section));
SecEnd->setVisibility(GlobalValue::HiddenVisibility);
-
- return std::make_pair(SecStart, SecEnd);
+ IRBuilder<> IRB(M.getContext());
+ Value *SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty);
+ if (!TargetTriple.isOSBinFormatCOFF())
+ return std::make_pair(IRB.CreatePointerCast(SecStart, Ty), SecEndPtr);
+
+ // Account for the fact that on windows-msvc __start_* symbols actually
+ // point to a uint64_t before the start of the array.
+ auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
+ auto GEP = IRB.CreateGEP(SecStartI8Ptr,
+ ConstantInt::get(IntptrTy, sizeof(uint64_t)));
+ return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
}
-
Function *SanitizerCoverageModule::CreateInitCallsForSections(
Module &M, const char *InitFunctionName, Type *Ty,
const char *Section) {
- IRBuilder<> IRB(M.getContext());
auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
auto SecStart = SecStartEnd.first;
auto SecEnd = SecStartEnd.second;
Function *CtorFunc;
std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty},
- {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)});
+ M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
if (TargetTriple.supportsCOMDAT()) {
// Use comdat to dedup CtorFunc.
@@ -284,6 +292,17 @@ Function *SanitizerCoverageModule::CreateInitCallsForSections(
} else {
appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
}
+
+ if (TargetTriple.isOSBinFormatCOFF()) {
+ // In COFF files, if the contructors are set as COMDAT (they are because
+ // COFF supports COMDAT) and the linker flag /OPT:REF (strip unreferenced
+ // functions and data) is used, the constructors get stripped. To prevent
+ // this, give the constructors weak ODR linkage and ensure the linker knows
+ // to include the sancov constructor. This way the linker can deduplicate
+ // the constructors but always leave one copy.
+ CtorFunc->setLinkage(GlobalValue::WeakODRLinkage);
+ appendToUsed(M, CtorFunc);
+ }
return CtorFunc;
}
@@ -293,6 +312,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
C = &(M.getContext());
DL = &M.getDataLayout();
CurModule = &M;
+ CurModuleUniqueId = getUniqueModuleId(CurModule);
TargetTriple = Triple(M.getTargetTriple());
FunctionGuardArray = nullptr;
Function8bitCounterArray = nullptr;
@@ -397,9 +417,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
Function *InitFunction = declareSanitizerInitFunction(
M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
- IRBCtor.CreateCall(InitFunction,
- {IRB.CreatePointerCast(SecStartEnd.first, IntptrPtrTy),
- IRB.CreatePointerCast(SecStartEnd.second, IntptrPtrTy)});
+ IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
}
// We don't reference these arrays directly in any of our runtime functions,
// so we need to prevent them from being dead stripped.
@@ -549,11 +567,19 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
auto Array = new GlobalVariable(
*CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
Constant::getNullValue(ArrayTy), "__sancov_gen_");
- if (auto Comdat = F.getComdat())
- Array->setComdat(Comdat);
+
+ if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
+ if (auto Comdat =
+ GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
+ Array->setComdat(Comdat);
Array->setSection(getSectionName(Section));
Array->setAlignment(Ty->isPointerTy() ? DL->getPointerSize()
: Ty->getPrimitiveSizeInBits() / 8);
+ GlobalsToAppendToUsed.push_back(Array);
+ GlobalsToAppendToCompilerUsed.push_back(Array);
+ MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
+ Array->addMetadata(LLVMContext::MD_associated, *MD);
+
return Array;
}
@@ -587,24 +613,16 @@ SanitizerCoverageModule::CreatePCArray(Function &F,
void SanitizerCoverageModule::CreateFunctionLocalArrays(
Function &F, ArrayRef<BasicBlock *> AllBlocks) {
- if (Options.TracePCGuard) {
+ if (Options.TracePCGuard)
FunctionGuardArray = CreateFunctionLocalArrayInSection(
AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName);
- GlobalsToAppendToUsed.push_back(FunctionGuardArray);
- }
- if (Options.Inline8bitCounters) {
+
+ if (Options.Inline8bitCounters)
Function8bitCounterArray = CreateFunctionLocalArrayInSection(
AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName);
- GlobalsToAppendToCompilerUsed.push_back(Function8bitCounterArray);
- MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
- Function8bitCounterArray->addMetadata(LLVMContext::MD_associated, *MD);
- }
- if (Options.PCTable) {
+
+ if (Options.PCTable)
FunctionPCsArray = CreatePCArray(F, AllBlocks);
- GlobalsToAppendToCompilerUsed.push_back(FunctionPCsArray);
- MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
- FunctionPCsArray->addMetadata(LLVMContext::MD_associated, *MD);
- }
}
bool SanitizerCoverageModule::InjectCoverage(Function &F,
@@ -806,8 +824,13 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
std::string
SanitizerCoverageModule::getSectionName(const std::string &Section) const {
- if (TargetTriple.getObjectFormat() == Triple::COFF)
- return ".SCOV$M";
+ if (TargetTriple.isOSBinFormatCOFF()) {
+ if (Section == SanCovCountersSectionName)
+ return ".SCOV$CM";
+ if (Section == SanCovPCsSectionName)
+ return ".SCOVP$M";
+ return ".SCOV$GM"; // For SanCovGuardsSectionName.
+ }
if (TargetTriple.isOSBinFormatMachO())
return "__DATA,__" + Section;
return "__" + Section;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index fa1e5a157a0f..077364e15c4f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -19,6 +19,7 @@
// The rest is handled by the run-time library.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -86,15 +87,16 @@ static const char *const kTsanInitName = "__tsan_init";
namespace {
/// ThreadSanitizer: instrument the code in module to find races.
-struct ThreadSanitizer : public FunctionPass {
- ThreadSanitizer() : FunctionPass(ID) {}
- StringRef getPassName() const override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
- static char ID; // Pass identification, replacement for typeid.
-
- private:
+///
+/// Instantiating ThreadSanitizer inserts the tsan runtime library API function
+/// declarations into the module if they don't exist already. Instantiating
+/// ensures the __tsan_init function is in the list of global constructors for
+/// the module.
+struct ThreadSanitizer {
+ ThreadSanitizer(Module &M);
+ bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
+
+private:
void initializeCallbacks(Module &M);
bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
bool instrumentAtomic(Instruction *I, const DataLayout &DL);
@@ -130,27 +132,55 @@ struct ThreadSanitizer : public FunctionPass {
Function *MemmoveFn, *MemcpyFn, *MemsetFn;
Function *TsanCtorFunction;
};
+
+struct ThreadSanitizerLegacyPass : FunctionPass {
+ ThreadSanitizerLegacyPass() : FunctionPass(ID) {}
+ StringRef getPassName() const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+ bool doInitialization(Module &M) override;
+ static char ID; // Pass identification, replacement for typeid.
+private:
+ Optional<ThreadSanitizer> TSan;
+};
} // namespace
-char ThreadSanitizer::ID = 0;
-INITIALIZE_PASS_BEGIN(
- ThreadSanitizer, "tsan",
- "ThreadSanitizer: detects data races.",
- false, false)
+PreservedAnalyses ThreadSanitizerPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ ThreadSanitizer TSan(*F.getParent());
+ if (TSan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+char ThreadSanitizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan",
+ "ThreadSanitizer: detects data races.", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
- ThreadSanitizer, "tsan",
- "ThreadSanitizer: detects data races.",
- false, false)
+INITIALIZE_PASS_END(ThreadSanitizerLegacyPass, "tsan",
+ "ThreadSanitizer: detects data races.", false, false)
-StringRef ThreadSanitizer::getPassName() const { return "ThreadSanitizer"; }
+StringRef ThreadSanitizerLegacyPass::getPassName() const {
+ return "ThreadSanitizerLegacyPass";
+}
-void ThreadSanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
+void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
-FunctionPass *llvm::createThreadSanitizerPass() {
- return new ThreadSanitizer();
+bool ThreadSanitizerLegacyPass::doInitialization(Module &M) {
+ TSan.emplace(M);
+ return true;
+}
+
+bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TSan->sanitizeFunction(F, TLI);
+ return true;
+}
+
+FunctionPass *llvm::createThreadSanitizerLegacyPassPass() {
+ return new ThreadSanitizerLegacyPass();
}
void ThreadSanitizer::initializeCallbacks(Module &M) {
@@ -252,16 +282,16 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
IRB.getInt32Ty(), IntptrTy));
}
-bool ThreadSanitizer::doInitialization(Module &M) {
+ThreadSanitizer::ThreadSanitizer(Module &M) {
const DataLayout &DL = M.getDataLayout();
IntptrTy = DL.getIntPtrType(M.getContext());
- std::tie(TsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
- /*InitArgs=*/{});
-
- appendToGlobalCtors(M, TsanCtorFunction, 0);
-
- return true;
+ std::tie(TsanCtorFunction, std::ignore) =
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, Function *) { appendToGlobalCtors(M, Ctor, 0); });
}
static bool isVtableAccess(Instruction *I) {
@@ -402,7 +432,8 @@ void ThreadSanitizer::InsertRuntimeIgnores(Function &F) {
}
}
-bool ThreadSanitizer::runOnFunction(Function &F) {
+bool ThreadSanitizer::sanitizeFunction(Function &F,
+ const TargetLibraryInfo &TLI) {
// This is required to prevent instrumenting call to __tsan_init from within
// the module constructor.
if (&F == TsanCtorFunction)
@@ -416,8 +447,6 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
bool HasCalls = false;
bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
const DataLayout &DL = F.getParent()->getDataLayout();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
// Traverse all instructions, collect loads/stores/returns, check for calls.
for (auto &BB : F) {
@@ -428,7 +457,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
LocalLoadsAndStores.push_back(&Inst);
else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
- maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
+ maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
if (isa<MemIntrinsic>(Inst))
MemIntrinCalls.push_back(&Inst);
HasCalls = true;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index ba4924c9cb2d..7f6b157304a3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -26,6 +26,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
@@ -74,27 +75,27 @@ public:
switch (kind) {
case ARCRuntimeEntryPointKind::AutoreleaseRV:
- return getI8XRetI8XEntryPoint(AutoreleaseRV,
- "objc_autoreleaseReturnValue", true);
+ return getIntrinsicEntryPoint(AutoreleaseRV,
+ Intrinsic::objc_autoreleaseReturnValue);
case ARCRuntimeEntryPointKind::Release:
- return getVoidRetI8XEntryPoint(Release, "objc_release");
+ return getIntrinsicEntryPoint(Release, Intrinsic::objc_release);
case ARCRuntimeEntryPointKind::Retain:
- return getI8XRetI8XEntryPoint(Retain, "objc_retain", true);
+ return getIntrinsicEntryPoint(Retain, Intrinsic::objc_retain);
case ARCRuntimeEntryPointKind::RetainBlock:
- return getI8XRetI8XEntryPoint(RetainBlock, "objc_retainBlock", false);
+ return getIntrinsicEntryPoint(RetainBlock, Intrinsic::objc_retainBlock);
case ARCRuntimeEntryPointKind::Autorelease:
- return getI8XRetI8XEntryPoint(Autorelease, "objc_autorelease", true);
+ return getIntrinsicEntryPoint(Autorelease, Intrinsic::objc_autorelease);
case ARCRuntimeEntryPointKind::StoreStrong:
- return getI8XRetI8XXI8XEntryPoint(StoreStrong, "objc_storeStrong");
+ return getIntrinsicEntryPoint(StoreStrong, Intrinsic::objc_storeStrong);
case ARCRuntimeEntryPointKind::RetainRV:
- return getI8XRetI8XEntryPoint(RetainRV,
- "objc_retainAutoreleasedReturnValue", true);
+ return getIntrinsicEntryPoint(RetainRV,
+ Intrinsic::objc_retainAutoreleasedReturnValue);
case ARCRuntimeEntryPointKind::RetainAutorelease:
- return getI8XRetI8XEntryPoint(RetainAutorelease, "objc_retainAutorelease",
- true);
+ return getIntrinsicEntryPoint(RetainAutorelease,
+ Intrinsic::objc_retainAutorelease);
case ARCRuntimeEntryPointKind::RetainAutoreleaseRV:
- return getI8XRetI8XEntryPoint(RetainAutoreleaseRV,
- "objc_retainAutoreleaseReturnValue", true);
+ return getIntrinsicEntryPoint(RetainAutoreleaseRV,
+ Intrinsic::objc_retainAutoreleaseReturnValue);
}
llvm_unreachable("Switch should be a covered switch.");
@@ -131,54 +132,11 @@ private:
/// Declaration for objc_retainAutoreleaseReturnValue().
Constant *RetainAutoreleaseRV = nullptr;
- Constant *getVoidRetI8XEntryPoint(Constant *&Decl, StringRef Name) {
+ Constant *getIntrinsicEntryPoint(Constant *&Decl, Intrinsic::ID IntID) {
if (Decl)
return Decl;
- LLVMContext &C = TheModule->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttributeList Attr = AttributeList().addAttribute(
- C, AttributeList::FunctionIndex, Attribute::NoUnwind);
- FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
- /*isVarArg=*/false);
- return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
- }
-
- Constant *getI8XRetI8XEntryPoint(Constant *&Decl, StringRef Name,
- bool NoUnwind = false) {
- if (Decl)
- return Decl;
-
- LLVMContext &C = TheModule->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *Fty = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeList Attr = AttributeList();
-
- if (NoUnwind)
- Attr = Attr.addAttribute(C, AttributeList::FunctionIndex,
- Attribute::NoUnwind);
-
- return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
- }
-
- Constant *getI8XRetI8XXI8XEntryPoint(Constant *&Decl, StringRef Name) {
- if (Decl)
- return Decl;
-
- LLVMContext &C = TheModule->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *I8XX = PointerType::getUnqual(I8X);
- Type *Params[] = { I8XX, I8X };
-
- AttributeList Attr = AttributeList().addAttribute(
- C, AttributeList::FunctionIndex, Attribute::NoUnwind);
- Attr = Attr.addParamAttribute(C, 0, Attribute::NoCapture);
-
- FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
- /*isVarArg=*/false);
-
- return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
+ return Decl = Intrinsic::getDeclaration(TheModule, IntID);
}
};
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 464805051c65..4bd5fd1acd4c 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -45,18 +45,15 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
default: break;
}
- ImmutableCallSite CS(Inst);
- assert(CS && "Only calls can alter reference counts!");
+ const auto *Call = cast<CallBase>(Inst);
// See if AliasAnalysis can help us with the call.
- FunctionModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+ FunctionModRefBehavior MRB = PA.getAA()->getModRefBehavior(Call);
if (AliasAnalysis::onlyReadsMemory(MRB))
return false;
if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
const DataLayout &DL = Inst->getModule()->getDataLayout();
- for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I) {
- const Value *Op = *I;
+ for (const Value *Op : Call->args()) {
if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
PA.related(Ptr, Op, DL))
return true;
@@ -266,13 +263,10 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
for (const BasicBlock *BB : Visited) {
if (BB == StartBB)
continue;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
- const BasicBlock *Succ = *SI;
+ for (const BasicBlock *Succ : successors(BB))
if (Succ != StartBB && !Visited.count(Succ)) {
DependingInsts.insert(reinterpret_cast<Instruction *>(-1));
return;
}
- }
}
}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 1dbe72c7569f..751c8f30e814 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -58,7 +58,7 @@ static inline void EraseInstruction(Instruction *CI) {
// Replace the return value with the argument.
assert((IsForwarding(GetBasicARCInstKind(CI)) ||
(IsNoopOnNull(GetBasicARCInstKind(CI)) &&
- isa<ConstantPointerNull>(OldArg))) &&
+ IsNullOrUndef(OldArg->stripPointerCasts()))) &&
"Can't delete non-forwarding instruction with users!");
CI->replaceAllUsesWith(OldArg);
}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 1f1ea9f58739..abe2871c0b8f 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -522,7 +522,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
TailOkForStoreStrongs = false;
return true;
case ARCInstKind::IntrinsicUser:
- // Remove calls to @clang.arc.use(...).
+ // Remove calls to @llvm.objc.clang.arc.use(...).
Inst->eraseFromParent();
return true;
default:
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 21e2848030fc..9a02174556fc 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -600,6 +600,17 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
}
}
+ // Track PHIs which are equivalent to our Arg.
+ SmallDenseSet<const Value*, 2> EquivalentArgs;
+ EquivalentArgs.insert(Arg);
+
+ // Add PHIs that are equivalent to Arg to ArgUsers.
+ if (const PHINode *PN = dyn_cast<PHINode>(Arg)) {
+ SmallVector<const Value *, 2> ArgUsers;
+ getEquivalentPHIs(*PN, ArgUsers);
+ EquivalentArgs.insert(ArgUsers.begin(), ArgUsers.end());
+ }
+
// Check for being preceded by an objc_autoreleaseReturnValue on the same
// pointer. In this case, we can delete the pair.
BasicBlock::iterator I = RetainRV->getIterator(),
@@ -609,7 +620,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
--I;
while (I != Begin && IsNoopInstruction(&*I));
if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV &&
- GetArgRCIdentityRoot(&*I) == Arg) {
+ EquivalentArgs.count(GetArgRCIdentityRoot(&*I))) {
Changed = true;
++NumPeeps;
@@ -914,8 +925,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
GetRCIdentityRoot(PN->getIncomingValue(i));
if (IsNullOrUndef(Incoming))
HasNull = true;
- else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
- .getNumSuccessors() != 1) {
+ else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() !=
+ 1) {
HasCriticalEdges = true;
break;
}
@@ -1084,18 +1095,15 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
"Unknown top down sequence state.");
const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
bool NotAllSeqEqualButKnownSafe = false;
- succ_const_iterator SI(TI), SE(TI, false);
-
- for (; SI != SE; ++SI) {
+ for (const BasicBlock *Succ : successors(BB)) {
// If VisitBottomUp has pointer information for this successor, take
// what we know about it.
const DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
+ BBStates.find(Succ);
assert(BBI != BBStates.end());
const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
const Sequence SuccSSeq = SuccS.GetSeq();
@@ -1414,21 +1422,20 @@ ComputePostOrders(Function &F,
BasicBlock *EntryBB = &F.getEntryBlock();
BBState &MyStates = BBStates[EntryBB];
MyStates.SetAsEntry();
- TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
+ Instruction *EntryTI = EntryBB->getTerminator();
SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
Visited.insert(EntryBB);
OnStack.insert(EntryBB);
do {
dfs_next_succ:
BasicBlock *CurrBB = SuccStack.back().first;
- TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
- succ_iterator SE(TI, false);
+ succ_iterator SE(CurrBB->getTerminator(), false);
while (SuccStack.back().second != SE) {
BasicBlock *SuccBB = *SuccStack.back().second++;
if (Visited.insert(SuccBB).second) {
- TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
- SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
+ SuccStack.push_back(
+ std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator())));
BBStates[CurrBB].addSucc(SuccBB);
BBState &SuccStates = BBStates[SuccBB];
SuccStates.addPred(CurrBB);
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index ce09a477b5f5..b0602d96798c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -30,9 +30,10 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -102,7 +103,7 @@ struct BlockInfoType {
BasicBlock *BB = nullptr;
/// Cache of BB->getTerminator().
- TerminatorInst *Terminator = nullptr;
+ Instruction *Terminator = nullptr;
/// Post-order numbering of reverse control flow graph.
unsigned PostOrder;
@@ -115,7 +116,7 @@ class AggressiveDeadCodeElimination {
// ADCE does not use DominatorTree per se, but it updates it to preserve the
// analysis.
- DominatorTree &DT;
+ DominatorTree *DT;
PostDominatorTree &PDT;
/// Mapping of blocks to associated information, an element in BlockInfoVec.
@@ -190,7 +191,7 @@ class AggressiveDeadCodeElimination {
void makeUnconditional(BasicBlock *BB, BasicBlock *Target);
public:
- AggressiveDeadCodeElimination(Function &F, DominatorTree &DT,
+ AggressiveDeadCodeElimination(Function &F, DominatorTree *DT,
PostDominatorTree &PDT)
: F(F), DT(DT), PDT(PDT) {}
@@ -205,7 +206,7 @@ bool AggressiveDeadCodeElimination::performDeadCodeElimination() {
return removeDeadInstructions();
}
-static bool isUnconditionalBranch(TerminatorInst *Term) {
+static bool isUnconditionalBranch(Instruction *Term) {
auto *BR = dyn_cast<BranchInst>(Term);
return BR && BR->isUnconditional();
}
@@ -276,7 +277,7 @@ void AggressiveDeadCodeElimination::initialize() {
// treat all edges to a block already seen as loop back edges
// and mark the branch live it if there is a back edge.
for (auto *BB: depth_first_ext(&F.getEntryBlock(), State)) {
- TerminatorInst *Term = BB->getTerminator();
+ Instruction *Term = BB->getTerminator();
if (isLive(Term))
continue;
@@ -330,7 +331,7 @@ bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
return false;
return true;
}
- if (!isa<TerminatorInst>(I))
+ if (!I.isTerminator())
return false;
if (RemoveControlFlowFlag && (isa<BranchInst>(I) || isa<SwitchInst>(I)))
return false;
@@ -507,7 +508,7 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
if (isLive(&I))
continue;
- if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
// Check if the scope of this variable location is alive.
if (AliveScopes.count(DII->getDebugLoc()->getScope()))
continue;
@@ -614,8 +615,8 @@ void AggressiveDeadCodeElimination::updateDeadRegions() {
}
}
- DT.applyUpdates(DeletedEdges);
- PDT.applyUpdates(DeletedEdges);
+ DomTreeUpdater(DT, &PDT, DomTreeUpdater::UpdateStrategy::Eager)
+ .applyUpdates(DeletedEdges);
NumBranchesRemoved += 1;
}
@@ -642,7 +643,7 @@ void AggressiveDeadCodeElimination::computeReversePostOrder() {
void AggressiveDeadCodeElimination::makeUnconditional(BasicBlock *BB,
BasicBlock *Target) {
- TerminatorInst *PredTerm = BB->getTerminator();
+ Instruction *PredTerm = BB->getTerminator();
// Collect the live debug info scopes attached to this instruction.
if (const DILocation *DL = PredTerm->getDebugLoc())
collectLiveScopes(*DL);
@@ -671,7 +672,9 @@ void AggressiveDeadCodeElimination::makeUnconditional(BasicBlock *BB,
//
//===----------------------------------------------------------------------===//
PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) {
- auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ // ADCE does not need DominatorTree, but require DominatorTree here
+ // to update analysis if it is already available.
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
auto &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
if (!AggressiveDeadCodeElimination(F, DT, PDT).performDeadCodeElimination())
return PreservedAnalyses::all();
@@ -697,15 +700,16 @@ struct ADCELegacyPass : public FunctionPass {
if (skipFunction(F))
return false;
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ // ADCE does not need DominatorTree, but require DominatorTree here
+ // to update analysis if it is already available.
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
return AggressiveDeadCodeElimination(F, DT, PDT)
.performDeadCodeElimination();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- // We require DominatorTree here only to update and thus preserve it.
- AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
if (!RemoveControlFlowFlag)
AU.setPreservesCFG();
@@ -723,7 +727,6 @@ char ADCELegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ADCELegacyPass, "adce",
"Aggressive Dead Code Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_END(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
index 3a8ef073cb48..d3c9b9a270aa 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -38,7 +38,8 @@ STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
/// instruction may need to be cleared of assumptions that can no longer be
/// guaranteed correct.
static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
- assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?");
+ assert(I->getType()->isIntOrIntVectorTy() &&
+ "Trivializing a non-integer value?");
// Initialize the worklist with eligible direct users.
SmallVector<Instruction *, 16> WorkList;
@@ -46,13 +47,13 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// If all bits of a user are demanded, then we know that nothing below that
// in the def-use chain needs to be changed.
auto *J = dyn_cast<Instruction>(JU);
- if (J && J->getType()->isSized() &&
+ if (J && J->getType()->isIntOrIntVectorTy() &&
!DB.getDemandedBits(J).isAllOnesValue())
WorkList.push_back(J);
- // Note that we need to check for unsized types above before asking for
+ // Note that we need to check for non-int types above before asking for
// demanded bits. Normally, the only way to reach an instruction with an
- // unsized type is via an instruction that has side effects (or otherwise
+ // non-int type is via an instruction that has side effects (or otherwise
// will demand its input bits). However, if we have a readnone function
// that returns an unsized type (e.g., void), we must avoid asking for the
// demanded bits of the function call's return value. A void-returning
@@ -78,7 +79,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// If all bits of a user are demanded, then we know that nothing below
// that in the def-use chain needs to be changed.
auto *K = dyn_cast<Instruction>(KU);
- if (K && !Visited.count(K) && K->getType()->isSized() &&
+ if (K && !Visited.count(K) && K->getType()->isIntOrIntVectorTy() &&
!DB.getDemandedBits(K).isAllOnesValue())
WorkList.push_back(K);
}
@@ -95,30 +96,41 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
if (I.mayHaveSideEffects() && I.use_empty())
continue;
- if (I.getType()->isIntegerTy() &&
- !DB.getDemandedBits(&I).getBoolValue()) {
- // For live instructions that have all dead bits, first make them dead by
- // replacing all uses with something else. Then, if they don't need to
- // remain live (because they have side effects, etc.) we can remove them.
- LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+ // Remove instructions that are dead, either because they were not reached
+ // during analysis or have no demanded bits.
+ if (DB.isInstructionDead(&I) ||
+ (I.getType()->isIntOrIntVectorTy() &&
+ DB.getDemandedBits(&I).isNullValue() &&
+ wouldInstructionBeTriviallyDead(&I))) {
+ salvageDebugInfo(I);
+ Worklist.push_back(&I);
+ I.dropAllReferences();
+ Changed = true;
+ continue;
+ }
+
+ for (Use &U : I.operands()) {
+ // DemandedBits only detects dead integer uses.
+ if (!U->getType()->isIntOrIntVectorTy())
+ continue;
+
+ if (!isa<Instruction>(U) && !isa<Argument>(U))
+ continue;
+
+ if (!DB.isUseDead(&U))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << U << " (all bits dead)\n");
clearAssumptionsOfUsers(&I, DB);
// FIXME: In theory we could substitute undef here instead of zero.
// This should be reconsidered once we settle on the semantics of
// undef, poison, etc.
- Value *Zero = ConstantInt::get(I.getType(), 0);
+ U.set(ConstantInt::get(U->getType(), 0));
++NumSimplified;
- I.replaceNonMetadataUsesWith(Zero);
Changed = true;
}
- if (!DB.isInstructionDead(&I))
- continue;
-
- salvageDebugInfo(I);
- Worklist.push_back(&I);
- I.dropAllReferences();
- Changed = true;
}
for (Instruction *&I : Worklist) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 5ebfbf8a879b..a806d6faed60 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -149,14 +149,14 @@ static void recordCondition(CallSite CS, BasicBlock *From, BasicBlock *To,
/// Record ICmp conditions relevant to any argument in CS following Pred's
/// single predecessors. If there are conflicting conditions along a path, like
-/// x == 1 and x == 0, the first condition will be used.
+/// x == 1 and x == 0, the first condition will be used. We stop once we reach
+/// an edge to StopAt.
static void recordConditions(CallSite CS, BasicBlock *Pred,
- ConditionsTy &Conditions) {
- recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
+ ConditionsTy &Conditions, BasicBlock *StopAt) {
BasicBlock *From = Pred;
BasicBlock *To = Pred;
SmallPtrSet<BasicBlock *, 4> Visited;
- while (!Visited.count(From->getSinglePredecessor()) &&
+ while (To != StopAt && !Visited.count(From->getSinglePredecessor()) &&
(From = From->getSinglePredecessor())) {
recordCondition(CS, From, To, Conditions);
Visited.insert(From);
@@ -197,7 +197,7 @@ static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
isa<IndirectBrInst>(Preds[1]->getTerminator()))
return false;
- // BasicBlock::canSplitPredecessors is more agressive, so checking for
+ // BasicBlock::canSplitPredecessors is more aggressive, so checking for
// BasicBlock::isEHPad as well.
if (!CallSiteBB->canSplitPredecessors() || CallSiteBB->isEHPad())
return false;
@@ -248,7 +248,7 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
ReturnInst* RI = dyn_cast<ReturnInst>(&*II);
assert(RI && "`musttail` call must be followed by `ret` instruction");
- TerminatorInst *TI = SplitBB->getTerminator();
+ Instruction *TI = SplitBB->getTerminator();
Value *V = NewCI;
if (BCI)
V = cloneInstForMustTail(BCI, TI, V);
@@ -302,7 +302,7 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
static void splitCallSite(
CallSite CS,
const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds,
- DominatorTree *DT) {
+ DomTreeUpdater &DTU) {
Instruction *Instr = CS.getInstruction();
BasicBlock *TailBB = Instr->getParent();
bool IsMustTailCall = CS.isMustTailCall();
@@ -312,8 +312,10 @@ static void splitCallSite(
// `musttail` calls must be followed by optional `bitcast`, and `ret`. The
// split blocks will be terminated right after that so there're no users for
// this phi in a `TailBB`.
- if (!IsMustTailCall && !Instr->use_empty())
+ if (!IsMustTailCall && !Instr->use_empty()) {
CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call");
+ CallPN->setDebugLoc(Instr->getDebugLoc());
+ }
LLVM_DEBUG(dbgs() << "split call-site : " << *Instr << " into \n");
@@ -325,7 +327,7 @@ static void splitCallSite(
BasicBlock *PredBB = Preds[i].first;
BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i],
- DT);
+ DTU);
assert(SplitBlock && "Unexpected new basic block split.");
Instruction *NewCI =
@@ -363,11 +365,13 @@ static void splitCallSite(
// attempting removal.
SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB)));
assert(Splits.size() == 2 && "Expected exactly 2 splits!");
- for (unsigned i = 0; i < Splits.size(); i++)
+ for (unsigned i = 0; i < Splits.size(); i++) {
Splits[i]->getTerminator()->eraseFromParent();
+ DTU.deleteEdge(Splits[i], TailBB);
+ }
// Erase the tail block once done with musttail patching
- TailBB->eraseFromParent();
+ DTU.deleteBB(TailBB);
return;
}
@@ -394,6 +398,7 @@ static void splitCallSite(
if (isa<PHINode>(CurrentI))
continue;
PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size());
+ NewPN->setDebugLoc(CurrentI->getDebugLoc());
for (auto &Mapping : ValueToValueMaps)
NewPN->addIncoming(Mapping[CurrentI],
cast<Instruction>(Mapping[CurrentI])->getParent());
@@ -435,49 +440,73 @@ static bool isPredicatedOnPHI(CallSite CS) {
return false;
}
-static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) {
+using PredsWithCondsTy = SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2>;
+
+// Check if any of the arguments in CS are predicated on a PHI node and return
+// the set of predecessors we should use for splitting.
+static PredsWithCondsTy shouldSplitOnPHIPredicatedArgument(CallSite CS) {
if (!isPredicatedOnPHI(CS))
- return false;
+ return {};
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
- SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS = {
- {Preds[0], {}}, {Preds[1], {}}};
- splitCallSite(CS, PredsCS, DT);
- return true;
+ return {{Preds[0], {}}, {Preds[1], {}}};
}
-static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) {
+// Checks if any of the arguments in CS are predicated in a predecessor and
+// returns a list of predecessors with the conditions that hold on their edges
+// to CS.
+static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallSite CS,
+ DomTreeUpdater &DTU) {
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
if (Preds[0] == Preds[1])
- return false;
+ return {};
+
+ // We can stop recording conditions once we reached the immediate dominator
+ // for the block containing the call site. Conditions in predecessors of the
+ // that node will be the same for all paths to the call site and splitting
+ // is not beneficial.
+ assert(DTU.hasDomTree() && "We need a DTU with a valid DT!");
+ auto *CSDTNode = DTU.getDomTree().getNode(CS.getInstruction()->getParent());
+ BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
ConditionsTy Conditions;
- recordConditions(CS, Pred, Conditions);
+ // Record condition on edge BB(CS) <- Pred
+ recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
+ // Record conditions following Pred's single predecessors.
+ recordConditions(CS, Pred, Conditions, StopAt);
PredsCS.push_back({Pred, Conditions});
}
- if (std::all_of(PredsCS.begin(), PredsCS.end(),
- [](const std::pair<BasicBlock *, ConditionsTy> &P) {
- return P.second.empty();
- }))
- return false;
+ if (all_of(PredsCS, [](const std::pair<BasicBlock *, ConditionsTy> &P) {
+ return P.second.empty();
+ }))
+ return {};
- splitCallSite(CS, PredsCS, DT);
- return true;
+ return PredsCS;
}
static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
- DominatorTree *DT) {
+ DomTreeUpdater &DTU) {
+ // Check if we can split the call site.
if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
return false;
- return tryToSplitOnPredicatedArgument(CS, DT) ||
- tryToSplitOnPHIPredicatedArgument(CS, DT);
+
+ auto PredsWithConds = shouldSplitOnPredicatedArgument(CS, DTU);
+ if (PredsWithConds.empty())
+ PredsWithConds = shouldSplitOnPHIPredicatedArgument(CS);
+ if (PredsWithConds.empty())
+ return false;
+
+ splitCallSite(CS, PredsWithConds, DTU);
+ return true;
}
static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
- TargetTransformInfo &TTI, DominatorTree *DT) {
+ TargetTransformInfo &TTI, DominatorTree &DT) {
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
bool Changed = false;
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
BasicBlock &BB = *BI++;
@@ -501,7 +530,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
// Check if such path is possible before attempting the splitting.
bool IsMustTail = CS.isMustTailCall();
- Changed |= tryToSplitCallSite(CS, TTI, DT);
+ Changed |= tryToSplitCallSite(CS, TTI, DTU);
// There're no interesting instructions after this. The call site
// itself might have been erased on splitting.
@@ -522,6 +551,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
@@ -532,9 +562,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- return doCallSiteSplitting(F, TLI, TTI,
- DTWP ? &DTWP->getDomTree() : nullptr);
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return doCallSiteSplitting(F, TLI, TTI, DT);
}
};
} // namespace
@@ -544,6 +573,7 @@ INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
FunctionPass *llvm::createCallSiteSplittingPass() {
@@ -554,7 +584,7 @@ PreservedAnalyses CallSiteSplittingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
if (!doCallSiteSplitting(F, TLI, TTI, DT))
return PreservedAnalyses::all();
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 55759e8b1661..beac0d967a98 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -82,6 +82,16 @@ static cl::opt<bool> ConstHoistWithBlockFrequency(
"chance to execute const materialization more frequently than "
"without hoisting."));
+static cl::opt<bool> ConstHoistGEP(
+ "consthoist-gep", cl::init(false), cl::Hidden,
+ cl::desc("Try hoisting constant gep expressions"));
+
+static cl::opt<unsigned>
+MinNumOfDependentToRebase("consthoist-min-num-to-rebase",
+ cl::desc("Do not rebase if number of dependent constants of a Base is less "
+ "than this number."),
+ cl::init(0), cl::Hidden);
+
namespace {
/// The constant hoisting pass.
@@ -340,7 +350,7 @@ SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
///
/// The operand at index Idx is not necessarily the constant integer itself. It
/// could also be a cast instruction or a constant expression that uses the
-// constant integer.
+/// constant integer.
void ConstantHoistingPass::collectConstantCandidates(
ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
ConstantInt *ConstInt) {
@@ -358,12 +368,13 @@ void ConstantHoistingPass::collectConstantCandidates(
if (Cost > TargetTransformInfo::TCC_Basic) {
ConstCandMapType::iterator Itr;
bool Inserted;
- std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(ConstInt, 0));
+ ConstPtrUnionType Cand = ConstInt;
+ std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0));
if (Inserted) {
- ConstCandVec.push_back(ConstantCandidate(ConstInt));
- Itr->second = ConstCandVec.size() - 1;
+ ConstIntCandVec.push_back(ConstantCandidate(ConstInt));
+ Itr->second = ConstIntCandVec.size() - 1;
}
- ConstCandVec[Itr->second].addUser(Inst, Idx, Cost);
+ ConstIntCandVec[Itr->second].addUser(Inst, Idx, Cost);
LLVM_DEBUG(if (isa<ConstantInt>(Inst->getOperand(Idx))) dbgs()
<< "Collect constant " << *ConstInt << " from " << *Inst
<< " with cost " << Cost << '\n';
@@ -374,6 +385,48 @@ void ConstantHoistingPass::collectConstantCandidates(
}
}
+/// Record constant GEP expression for instruction Inst at operand index Idx.
+void ConstantHoistingPass::collectConstantCandidates(
+ ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
+ ConstantExpr *ConstExpr) {
+ // TODO: Handle vector GEPs
+ if (ConstExpr->getType()->isVectorTy())
+ return;
+
+ GlobalVariable *BaseGV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
+ if (!BaseGV)
+ return;
+
+ // Get offset from the base GV.
+ PointerType *GVPtrTy = dyn_cast<PointerType>(BaseGV->getType());
+ IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace());
+ APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true);
+ auto *GEPO = cast<GEPOperator>(ConstExpr);
+ if (!GEPO->accumulateConstantOffset(*DL, Offset))
+ return;
+
+ if (!Offset.isIntN(32))
+ return;
+
+ // A constant GEP expression that has a GlobalVariable as base pointer is
+ // usually lowered to a load from constant pool. Such operation is unlikely
+ // to be cheaper than compute it by <Base + Offset>, which can be lowered to
+ // an ADD instruction or folded into Load/Store instruction.
+ int Cost = TTI->getIntImmCost(Instruction::Add, 1, Offset, PtrIntTy);
+ ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
+ ConstCandMapType::iterator Itr;
+ bool Inserted;
+ ConstPtrUnionType Cand = ConstExpr;
+ std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0));
+ if (Inserted) {
+ ExprCandVec.push_back(ConstantCandidate(
+ ConstantInt::get(Type::getInt32Ty(*Ctx), Offset.getLimitedValue()),
+ ConstExpr));
+ Itr->second = ExprCandVec.size() - 1;
+ }
+ ExprCandVec[Itr->second].addUser(Inst, Idx, Cost);
+}
+
/// Check the operand for instruction Inst at index Idx.
void ConstantHoistingPass::collectConstantCandidates(
ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) {
@@ -402,6 +455,10 @@ void ConstantHoistingPass::collectConstantCandidates(
// Visit constant expressions that have constant integers.
if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+ // Handle constant gep expressions.
+ if (ConstHoistGEP && ConstExpr->isGEPWithNoNotionalOverIndexing())
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstExpr);
+
// Only visit constant cast expressions.
if (!ConstExpr->isCast())
return;
@@ -544,7 +601,8 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
/// Find the base constant within the given range and rebase all other
/// constants with respect to the base constant.
void ConstantHoistingPass::findAndMakeBaseConstant(
- ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
+ ConstCandVecType::iterator S, ConstCandVecType::iterator E,
+ SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec) {
auto MaxCostItr = S;
unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
@@ -552,26 +610,37 @@ void ConstantHoistingPass::findAndMakeBaseConstant(
if (NumUses <= 1)
return;
+ ConstantInt *ConstInt = MaxCostItr->ConstInt;
+ ConstantExpr *ConstExpr = MaxCostItr->ConstExpr;
ConstantInfo ConstInfo;
- ConstInfo.BaseConstant = MaxCostItr->ConstInt;
- Type *Ty = ConstInfo.BaseConstant->getType();
+ ConstInfo.BaseInt = ConstInt;
+ ConstInfo.BaseExpr = ConstExpr;
+ Type *Ty = ConstInt->getType();
// Rebase the constants with respect to the base constant.
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
- APInt Diff = ConstCand->ConstInt->getValue() -
- ConstInfo.BaseConstant->getValue();
+ APInt Diff = ConstCand->ConstInt->getValue() - ConstInt->getValue();
Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff);
+ Type *ConstTy =
+ ConstCand->ConstExpr ? ConstCand->ConstExpr->getType() : nullptr;
ConstInfo.RebasedConstants.push_back(
- RebasedConstantInfo(std::move(ConstCand->Uses), Offset));
+ RebasedConstantInfo(std::move(ConstCand->Uses), Offset, ConstTy));
}
- ConstantVec.push_back(std::move(ConstInfo));
+ ConstInfoVec.push_back(std::move(ConstInfo));
}
/// Finds and combines constant candidates that can be easily
/// rematerialized with an add from a common base constant.
-void ConstantHoistingPass::findBaseConstants() {
+void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {
+ // If BaseGV is nullptr, find base among candidate constant integers;
+ // Otherwise find base among constant GEPs that share the same BaseGV.
+ ConstCandVecType &ConstCandVec = BaseGV ?
+ ConstGEPCandMap[BaseGV] : ConstIntCandVec;
+ ConstInfoVecType &ConstInfoVec = BaseGV ?
+ ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
+
// Sort the constants by value and type. This invalidates the mapping!
- llvm::sort(ConstCandVec.begin(), ConstCandVec.end(),
+ std::stable_sort(ConstCandVec.begin(), ConstCandVec.end(),
[](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
return LHS.ConstInt->getType()->getBitWidth() <
@@ -585,20 +654,40 @@ void ConstantHoistingPass::findBaseConstants() {
for (auto CC = std::next(ConstCandVec.begin()), E = ConstCandVec.end();
CC != E; ++CC) {
if (MinValItr->ConstInt->getType() == CC->ConstInt->getType()) {
+ Type *MemUseValTy = nullptr;
+ for (auto &U : CC->Uses) {
+ auto *UI = U.Inst;
+ if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
+ MemUseValTy = LI->getType();
+ break;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+ // Make sure the constant is used as pointer operand of the StoreInst.
+ if (SI->getPointerOperand() == SI->getOperand(U.OpndIdx)) {
+ MemUseValTy = SI->getValueOperand()->getType();
+ break;
+ }
+ }
+ }
+
// Check if the constant is in range of an add with immediate.
APInt Diff = CC->ConstInt->getValue() - MinValItr->ConstInt->getValue();
if ((Diff.getBitWidth() <= 64) &&
- TTI->isLegalAddImmediate(Diff.getSExtValue()))
+ TTI->isLegalAddImmediate(Diff.getSExtValue()) &&
+ // Check if Diff can be used as offset in addressing mode of the user
+ // memory instruction.
+ (!MemUseValTy || TTI->isLegalAddressingMode(MemUseValTy,
+ /*BaseGV*/nullptr, /*BaseOffset*/Diff.getSExtValue(),
+ /*HasBaseReg*/true, /*Scale*/0)))
continue;
}
// We either have now a different constant type or the constant is not in
// range of an add with immediate anymore.
- findAndMakeBaseConstant(MinValItr, CC);
+ findAndMakeBaseConstant(MinValItr, CC, ConstInfoVec);
// Start a new base constant search.
MinValItr = CC;
}
// Finalize the last base constant search.
- findAndMakeBaseConstant(MinValItr, ConstCandVec.end());
+ findAndMakeBaseConstant(MinValItr, ConstCandVec.end(), ConstInfoVec);
}
/// Updates the operand at Idx in instruction Inst with the result of
@@ -633,12 +722,28 @@ static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {
/// users.
void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
Constant *Offset,
+ Type *Ty,
const ConstantUser &ConstUser) {
Instruction *Mat = Base;
+
+ // The same offset can be dereferenced to different types in nested struct.
+ if (!Offset && Ty && Ty != Base->getType())
+ Offset = ConstantInt::get(Type::getInt32Ty(*Ctx), 0);
+
if (Offset) {
Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst,
ConstUser.OpndIdx);
- Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
+ if (Ty) {
+ // Constant being rebased is a ConstantExpr.
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx,
+ cast<PointerType>(Ty)->getAddressSpace());
+ Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt);
+ Mat = GetElementPtrInst::Create(Int8PtrTy->getElementType(), Base,
+ Offset, "mat_gep", InsertionPt);
+ Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt);
+ } else
+ // Constant being rebased is a ConstantInt.
+ Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
"const_mat", InsertionPt);
LLVM_DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)
@@ -682,6 +787,14 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
// Visit constant expression.
if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+ if (ConstExpr->isGEPWithNoNotionalOverIndexing()) {
+ // Operand is a ConstantGEP, replace it.
+ updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat);
+ return;
+ }
+
+ // Aside from constant GEPs, only constant cast expressions are collected.
+ assert(ConstExpr->isCast() && "ConstExpr should be a cast");
Instruction *ConstExprInst = ConstExpr->getAsInstruction();
ConstExprInst->setOperand(0, Mat);
ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
@@ -705,28 +818,22 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
/// Hoist and hide the base constant behind a bitcast and emit
/// materialization code for derived constants.
-bool ConstantHoistingPass::emitBaseConstants() {
+bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
bool MadeChange = false;
- for (auto const &ConstInfo : ConstantVec) {
- // Hoist and hide the base constant behind a bitcast.
+ SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec =
+ BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
+ for (auto const &ConstInfo : ConstInfoVec) {
SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
assert(!IPSet.empty() && "IPSet is empty");
unsigned UsesNum = 0;
unsigned ReBasesNum = 0;
+ unsigned NotRebasedNum = 0;
for (Instruction *IP : IPSet) {
- IntegerType *Ty = ConstInfo.BaseConstant->getType();
- Instruction *Base =
- new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
-
- Base->setDebugLoc(IP->getDebugLoc());
-
- LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant
- << ") to BB " << IP->getParent()->getName() << '\n'
- << *Base << '\n');
-
- // Emit materialization code for all rebased constants.
+ // First, collect constants depending on this IP of the base.
unsigned Uses = 0;
+ using RebasedUse = std::tuple<Constant *, Type *, ConstantUser>;
+ SmallVector<RebasedUse, 4> ToBeRebased;
for (auto const &RCI : ConstInfo.RebasedConstants) {
for (auto const &U : RCI.Uses) {
Uses++;
@@ -735,31 +842,64 @@ bool ConstantHoistingPass::emitBaseConstants() {
// If Base constant is to be inserted in multiple places,
// generate rebase for U using the Base dominating U.
if (IPSet.size() == 1 ||
- DT->dominates(Base->getParent(), OrigMatInsertBB)) {
- emitBaseConstants(Base, RCI.Offset, U);
- ReBasesNum++;
- }
-
- Base->setDebugLoc(DILocation::getMergedLocation(Base->getDebugLoc(), U.Inst->getDebugLoc()));
+ DT->dominates(IP->getParent(), OrigMatInsertBB))
+ ToBeRebased.push_back(RebasedUse(RCI.Offset, RCI.Ty, U));
}
}
UsesNum = Uses;
- // Use the same debug location as the last user of the constant.
+ // If only few constants depend on this IP of base, skip rebasing,
+ // assuming the base and the rebased have the same materialization cost.
+ if (ToBeRebased.size() < MinNumOfDependentToRebase) {
+ NotRebasedNum += ToBeRebased.size();
+ continue;
+ }
+
+ // Emit an instance of the base at this IP.
+ Instruction *Base = nullptr;
+ // Hoist and hide the base constant behind a bitcast.
+ if (ConstInfo.BaseExpr) {
+ assert(BaseGV && "A base constant expression must have an base GV");
+ Type *Ty = ConstInfo.BaseExpr->getType();
+ Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP);
+ } else {
+ IntegerType *Ty = ConstInfo.BaseInt->getType();
+ Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP);
+ }
+
+ Base->setDebugLoc(IP->getDebugLoc());
+
+ LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseInt
+ << ") to BB " << IP->getParent()->getName() << '\n'
+ << *Base << '\n');
+
+ // Emit materialization code for rebased constants depending on this IP.
+ for (auto const &R : ToBeRebased) {
+ Constant *Off = std::get<0>(R);
+ Type *Ty = std::get<1>(R);
+ ConstantUser U = std::get<2>(R);
+ emitBaseConstants(Base, Off, Ty, U);
+ ReBasesNum++;
+ // Use the same debug location as the last user of the constant.
+ Base->setDebugLoc(DILocation::getMergedLocation(
+ Base->getDebugLoc(), U.Inst->getDebugLoc()));
+ }
assert(!Base->use_empty() && "The use list is empty!?");
assert(isa<Instruction>(Base->user_back()) &&
"All uses should be instructions.");
}
(void)UsesNum;
(void)ReBasesNum;
+ (void)NotRebasedNum;
// Expect all uses are rebased after rebase is done.
- assert(UsesNum == ReBasesNum && "Not all uses are rebased");
+ assert(UsesNum == (ReBasesNum + NotRebasedNum) &&
+ "Not all uses are rebased");
NumConstantsHoisted++;
// Base constant is also included in ConstInfo.RebasedConstants, so
// deduct 1 from ConstInfo.RebasedConstants.size().
- NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1;
+ NumConstantsRebased += ConstInfo.RebasedConstants.size() - 1;
MadeChange = true;
}
@@ -781,25 +921,29 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
this->TTI = &TTI;
this->DT = &DT;
this->BFI = BFI;
+ this->DL = &Fn.getParent()->getDataLayout();
+ this->Ctx = &Fn.getContext();
this->Entry = &Entry;
// Collect all constant candidates.
collectConstantCandidates(Fn);
- // There are no constant candidates to worry about.
- if (ConstCandVec.empty())
- return false;
-
// Combine constants that can be easily materialized with an add from a common
// base constant.
- findBaseConstants();
-
- // There are no constants to emit.
- if (ConstantVec.empty())
- return false;
+ if (!ConstIntCandVec.empty())
+ findBaseConstants(nullptr);
+ for (auto &MapEntry : ConstGEPCandMap)
+ if (!MapEntry.second.empty())
+ findBaseConstants(MapEntry.first);
// Finally hoist the base constant and emit materialization code for dependent
// constants.
- bool MadeChange = emitBaseConstants();
+ bool MadeChange = false;
+ if (!ConstIntInfoVec.empty())
+ MadeChange = emitBaseConstants(nullptr);
+ for (auto MapEntry : ConstGEPInfoMap)
+ if (!MapEntry.second.empty())
+ MadeChange |= emitBaseConstants(MapEntry.first);
+
// Cleanup dead instructions.
deleteDeadCastInst();
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index 46915889ce7c..51032b0625f8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -18,21 +18,25 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
-#include <set>
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "constprop"
STATISTIC(NumInstKilled, "Number of instructions killed");
+DEBUG_COUNTER(CPCounter, "constprop-transform",
+ "Controls which instructions are killed");
namespace {
struct ConstantPropagation : public FunctionPass {
@@ -66,9 +70,15 @@ bool ConstantPropagation::runOnFunction(Function &F) {
return false;
// Initialize the worklist to all of the instructions ready to process...
- std::set<Instruction*> WorkList;
- for (Instruction &I: instructions(&F))
+ SmallPtrSet<Instruction *, 16> WorkList;
+ // The SmallVector of WorkList ensures that we do iteration at stable order.
+ // We use two containers rather than one SetVector, since remove is
+ // linear-time, and we don't care enough to remove from Vec.
+ SmallVector<Instruction *, 16> WorkListVec;
+ for (Instruction &I : instructions(&F)) {
WorkList.insert(&I);
+ WorkListVec.push_back(&I);
+ }
bool Changed = false;
const DataLayout &DL = F.getParent()->getDataLayout();
@@ -76,29 +86,36 @@ bool ConstantPropagation::runOnFunction(Function &F) {
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
while (!WorkList.empty()) {
- Instruction *I = *WorkList.begin();
- WorkList.erase(WorkList.begin()); // Get an element from the worklist...
-
- if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
- // Add all of the users of this instruction to the worklist, they might
- // be constant propagatable now...
- for (User *U : I->users())
- WorkList.insert(cast<Instruction>(U));
-
- // Replace all of the uses of a variable with uses of the constant.
- I->replaceAllUsesWith(C);
-
- // Remove the dead instruction.
- WorkList.erase(I);
- if (isInstructionTriviallyDead(I, TLI)) {
- I->eraseFromParent();
- ++NumInstKilled;
+ SmallVector<Instruction*, 16> NewWorkListVec;
+ for (auto *I : WorkListVec) {
+ WorkList.erase(I); // Remove element from the worklist...
+
+ if (!I->use_empty()) // Don't muck with dead instructions...
+ if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
+ if (!DebugCounter::shouldExecute(CPCounter))
+ continue;
+
+ // Add all of the users of this instruction to the worklist, they might
+ // be constant propagatable now...
+ for (User *U : I->users()) {
+ // If user not in the set, then add it to the vector.
+ if (WorkList.insert(cast<Instruction>(U)).second)
+ NewWorkListVec.push_back(cast<Instruction>(U));
+ }
+
+ // Replace all of the uses of a variable with uses of the constant.
+ I->replaceAllUsesWith(C);
+
+ if (isInstructionTriviallyDead(I, TLI)) {
+ I->eraseFromParent();
+ ++NumInstKilled;
+ }
+
+ // We made a change to the function...
+ Changed = true;
}
-
- // We made a change to the function...
- Changed = true;
- }
+ }
+ WorkListVec = std::move(NewWorkListVec);
}
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 2f2d7f620a29..d0105701c73f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -28,6 +27,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -44,6 +44,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <utility>
@@ -272,10 +273,11 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
/// information is sufficient to prove this comparison. Even for local
/// conditions, this can sometimes prove conditions instcombine can't by
/// exploiting range information.
-static bool processCmp(CmpInst *C, LazyValueInfo *LVI) {
- Value *Op0 = C->getOperand(0);
- Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
- if (!Op1) return false;
+static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+ Value *Op0 = Cmp->getOperand(0);
+ auto *C = dyn_cast<Constant>(Cmp->getOperand(1));
+ if (!C)
+ return false;
// As a policy choice, we choose not to waste compile time on anything where
// the comparison is testing local values. While LVI can sometimes reason
@@ -283,20 +285,18 @@ static bool processCmp(CmpInst *C, LazyValueInfo *LVI) {
// the block local query for uses from terminator instructions, but that's
// handled in the code for each terminator.
auto *I = dyn_cast<Instruction>(Op0);
- if (I && I->getParent() == C->getParent())
+ if (I && I->getParent() == Cmp->getParent())
return false;
LazyValueInfo::Tristate Result =
- LVI->getPredicateAt(C->getPredicate(), Op0, Op1, C);
- if (Result == LazyValueInfo::Unknown) return false;
+ LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp);
+ if (Result == LazyValueInfo::Unknown)
+ return false;
++NumCmps;
- if (Result == LazyValueInfo::True)
- C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
- else
- C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
- C->eraseFromParent();
-
+ Constant *TorF = ConstantInt::get(Type::getInt1Ty(Cmp->getContext()), Result);
+ Cmp->replaceAllUsesWith(TorF);
+ Cmp->eraseFromParent();
return true;
}
@@ -307,7 +307,9 @@ static bool processCmp(CmpInst *C, LazyValueInfo *LVI) {
/// that cannot fire no matter what the incoming edge can safely be removed. If
/// a case fires on every incoming edge then the entire switch can be removed
/// and replaced with a branch to the case destination.
-static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI, DominatorTree *DT) {
+static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
+ DominatorTree *DT) {
+ DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
Value *Cond = SI->getCondition();
BasicBlock *BB = SI->getParent();
@@ -372,7 +374,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI, DominatorTree *DT)
++NumDeadCases;
Changed = true;
if (--SuccessorsCount[Succ] == 0)
- DT->deleteEdge(BB, Succ);
+ DTU.deleteEdge(BB, Succ);
continue;
}
if (State == LazyValueInfo::True) {
@@ -389,15 +391,11 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI, DominatorTree *DT)
++CI;
}
- if (Changed) {
+ if (Changed)
// If the switch has been simplified to the point where it can be replaced
// by a branch then do so now.
- DeferredDominance DDT(*DT);
ConstantFoldTerminator(BB, /*DeleteDeadConditions = */ false,
- /*TLI = */ nullptr, &DDT);
- DDT.flush();
- }
-
+ /*TLI = */ nullptr, &DTU);
return Changed;
}
@@ -432,23 +430,21 @@ static bool willNotOverflow(IntrinsicInst *II, LazyValueInfo *LVI) {
}
static void processOverflowIntrinsic(IntrinsicInst *II) {
+ IRBuilder<> B(II);
Value *NewOp = nullptr;
switch (II->getIntrinsicID()) {
default:
llvm_unreachable("Unexpected instruction.");
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- NewOp = BinaryOperator::CreateAdd(II->getOperand(0), II->getOperand(1),
- II->getName(), II);
+ NewOp = B.CreateAdd(II->getOperand(0), II->getOperand(1), II->getName());
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
- NewOp = BinaryOperator::CreateSub(II->getOperand(0), II->getOperand(1),
- II->getName(), II);
+ NewOp = B.CreateSub(II->getOperand(0), II->getOperand(1), II->getName());
break;
}
++NumOverflows;
- IRBuilder<> B(II);
Value *NewI = B.CreateInsertValue(UndefValue::get(II->getType()), NewOp, 0);
NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(II->getContext()), 1);
II->replaceAllUsesWith(NewI);
@@ -530,17 +526,17 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
return false;
++NumUDivs;
+ IRBuilder<> B{Instr};
auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
- auto *LHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(0), TruncTy,
- Instr->getName() + ".lhs.trunc", Instr);
- auto *RHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(1), TruncTy,
- Instr->getName() + ".rhs.trunc", Instr);
- auto *BO =
- BinaryOperator::Create(Instr->getOpcode(), LHS, RHS, Instr->getName(), Instr);
- auto *Zext = CastInst::Create(Instruction::ZExt, BO, Instr->getType(),
- Instr->getName() + ".zext", Instr);
- if (BO->getOpcode() == Instruction::UDiv)
- BO->setIsExact(Instr->isExact());
+ auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
+ Instr->getName() + ".lhs.trunc");
+ auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
+ Instr->getName() + ".rhs.trunc");
+ auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName());
+ auto *Zext = B.CreateZExt(BO, Instr->getType(), Instr->getName() + ".zext");
+ if (auto *BinOp = dyn_cast<BinaryOperator>(BO))
+ if (BinOp->getOpcode() == Instruction::UDiv)
+ BinOp->setIsExact(Instr->isExact());
Instr->replaceAllUsesWith(Zext);
Instr->eraseFromParent();
@@ -554,6 +550,7 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
++NumSRems;
auto *BO = BinaryOperator::CreateURem(SDI->getOperand(0), SDI->getOperand(1),
SDI->getName(), SDI);
+ BO->setDebugLoc(SDI->getDebugLoc());
SDI->replaceAllUsesWith(BO);
SDI->eraseFromParent();
@@ -575,6 +572,7 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
++NumSDivs;
auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1),
SDI->getName(), SDI);
+ BO->setDebugLoc(SDI->getDebugLoc());
BO->setIsExact(SDI->isExact());
SDI->replaceAllUsesWith(BO);
SDI->eraseFromParent();
@@ -597,6 +595,7 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
++NumAShrs;
auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
SDI->getName(), SDI);
+ BO->setDebugLoc(SDI->getDebugLoc());
BO->setIsExact(SDI->isExact());
SDI->replaceAllUsesWith(BO);
SDI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 6078967a0f94..4c964e6e888c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -31,6 +32,8 @@ using namespace llvm;
STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
STATISTIC(DCEEliminated, "Number of insts removed");
+DEBUG_COUNTER(DCECounter, "dce-transform",
+ "Controls which instructions are eliminated");
namespace {
//===--------------------------------------------------------------------===//
@@ -50,6 +53,8 @@ namespace {
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
Instruction *Inst = &*DI++;
if (isInstructionTriviallyDead(Inst, TLI)) {
+ if (!DebugCounter::shouldExecute(DCECounter))
+ continue;
salvageDebugInfo(*Inst);
Inst->eraseFromParent();
Changed = true;
@@ -77,6 +82,9 @@ static bool DCEInstruction(Instruction *I,
SmallSetVector<Instruction *, 16> &WorkList,
const TargetLibraryInfo *TLI) {
if (isInstructionTriviallyDead(I, TLI)) {
+ if (!DebugCounter::shouldExecute(DCECounter))
+ return false;
+
salvageDebugInfo(*I);
// Null out all of the instruction's operands to see if any operand becomes
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 9a7405e98e7d..469930ca6a19 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -71,7 +71,7 @@ using namespace llvm;
STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
-STATISTIC(NumFastOther , "Number of other instrs removed");
+STATISTIC(NumFastOther, "Number of other instrs removed");
STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
STATISTIC(NumModifiedStores, "Number of stores modified");
@@ -349,11 +349,14 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
InstOverlapIntervalsTy &IOL,
AliasAnalysis &AA,
const Function *F) {
- // If we don't know the sizes of either access, then we can't do a comparison.
- if (Later.Size == MemoryLocation::UnknownSize ||
- Earlier.Size == MemoryLocation::UnknownSize)
+ // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
+ // get imprecise values here, though (except for unknown sizes).
+ if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise())
return OW_Unknown;
+ const uint64_t LaterSize = Later.Size.getValue();
+ const uint64_t EarlierSize = Earlier.Size.getValue();
+
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -361,7 +364,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// the later store was larger than the earlier store.
if (P1 == P2 || AA.isMustAlias(P1, P2)) {
// Make sure that the Later size is >= the Earlier size.
- if (Later.Size >= Earlier.Size)
+ if (LaterSize >= EarlierSize)
return OW_Complete;
}
@@ -379,7 +382,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// If the "Later" store is to a recognizable object, get its size.
uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F);
if (ObjectSize != MemoryLocation::UnknownSize)
- if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
+ if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
return OW_Complete;
// Okay, we have stores to two completely different pointers. Try to
@@ -410,8 +413,8 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
if (EarlierOff >= LaterOff &&
- Later.Size >= Earlier.Size &&
- uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+ LaterSize >= EarlierSize &&
+ uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
return OW_Complete;
// We may now overlap, although the overlap is not complete. There might also
@@ -420,21 +423,21 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// Note: The correctness of this logic depends on the fact that this function
// is not even called providing DepWrite when there are any intervening reads.
if (EnablePartialOverwriteTracking &&
- LaterOff < int64_t(EarlierOff + Earlier.Size) &&
- int64_t(LaterOff + Later.Size) >= EarlierOff) {
+ LaterOff < int64_t(EarlierOff + EarlierSize) &&
+ int64_t(LaterOff + LaterSize) >= EarlierOff) {
// Insert our part of the overlap into the map.
auto &IM = IOL[DepWrite];
LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
- << ", " << int64_t(EarlierOff + Earlier.Size)
+ << ", " << int64_t(EarlierOff + EarlierSize)
<< ") Later [" << LaterOff << ", "
- << int64_t(LaterOff + Later.Size) << ")\n");
+ << int64_t(LaterOff + LaterSize) << ")\n");
// Make sure that we only insert non-overlapping intervals and combine
// adjacent intervals. The intervals are stored in the map with the ending
// offset as the key (in the half-open sense) and the starting offset as
// the value.
- int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + Later.Size;
+ int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
// Find any intervals ending at, or after, LaterIntStart which start
// before LaterIntEnd.
@@ -464,10 +467,10 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
ILI = IM.begin();
if (ILI->second <= EarlierOff &&
- ILI->first >= int64_t(EarlierOff + Earlier.Size)) {
+ ILI->first >= int64_t(EarlierOff + EarlierSize)) {
LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
<< EarlierOff << ", "
- << int64_t(EarlierOff + Earlier.Size)
+ << int64_t(EarlierOff + EarlierSize)
<< ") Composite Later [" << ILI->second << ", "
<< ILI->first << ")\n");
++NumCompletePartials;
@@ -478,13 +481,13 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// Check for an earlier store which writes to all the memory locations that
// the later store writes to.
if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
- int64_t(EarlierOff + Earlier.Size) > LaterOff &&
- uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) {
+ int64_t(EarlierOff + EarlierSize) > LaterOff &&
+ uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
<< EarlierOff << ", "
- << int64_t(EarlierOff + Earlier.Size)
+ << int64_t(EarlierOff + EarlierSize)
<< ") by a later store [" << LaterOff << ", "
- << int64_t(LaterOff + Later.Size) << ")\n");
+ << int64_t(LaterOff + LaterSize) << ")\n");
// TODO: Maybe come up with a better name?
return OW_PartialEarlierWithFullLater;
}
@@ -498,8 +501,8 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// In this case we may want to trim the size of earlier to avoid generating
// writes to addresses which will definitely be overwritten later
if (!EnablePartialOverwriteTracking &&
- (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
- int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
+ (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
+ int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
return OW_End;
// Finally, we also need to check if the later store overwrites the beginning
@@ -512,9 +515,8 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// of earlier to avoid generating writes to addresses which will definitely
// be overwritten later.
if (!EnablePartialOverwriteTracking &&
- (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
- assert(int64_t(LaterOff + Later.Size) <
- int64_t(EarlierOff + Earlier.Size) &&
+ (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
+ assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
"Expect to be handled as OW_Complete");
return OW_Begin;
}
@@ -641,7 +643,7 @@ static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
BasicBlock *Pred = *I;
if (Pred == BB) continue;
- TerminatorInst *PredTI = Pred->getTerminator();
+ Instruction *PredTI = Pred->getTerminator();
if (PredTI->getNumSuccessors() != 1)
continue;
@@ -832,7 +834,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
continue;
}
- if (auto CS = CallSite(&*BBI)) {
+ if (auto *Call = dyn_cast<CallBase>(&*BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
if (isAllocLikeFn(&*BBI, TLI))
@@ -840,15 +842,15 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
// If this call does not access memory, it can't be loading any of our
// pointers.
- if (AA->doesNotAccessMemory(CS))
+ if (AA->doesNotAccessMemory(Call))
continue;
// If the call might load from any of our allocas, then any store above
// the call is live.
DeadStackObjects.remove_if([&](Value *I) {
// See if the call site touches the value.
- return isRefSet(AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI,
- BB.getParent())));
+ return isRefSet(AA->getModRefInfo(
+ Call, I, getPointerSize(I, DL, *TLI, BB.getParent())));
});
// If all of the allocas were clobbered by the call then we're not going
@@ -1002,11 +1004,10 @@ static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
Instruction *EarlierWrite = OI.first;
MemoryLocation Loc = getLocForWrite(EarlierWrite);
assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
- assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
const Value *Ptr = Loc.Ptr->stripPointerCasts();
int64_t EarlierStart = 0;
- int64_t EarlierSize = int64_t(Loc.Size);
+ int64_t EarlierSize = int64_t(Loc.Size.getValue());
GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
OverlapIntervalsTy &IntervalMap = OI.second;
Changed |=
@@ -1203,8 +1204,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
"when partial-overwrite "
"tracking is enabled");
- int64_t EarlierSize = DepLoc.Size;
- int64_t LaterSize = Loc.Size;
+ // The overwrite result is known, so these must be known, too.
+ int64_t EarlierSize = DepLoc.Size.getValue();
+ int64_t LaterSize = Loc.Size.getValue();
bool IsOverwriteEnd = (OR == OW_End);
MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
InstWriteOffset, LaterSize, IsOverwriteEnd);
diff --git a/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index e1bc590c5c9a..ffcf34f1cf7a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
using namespace llvm;
@@ -29,6 +30,8 @@ using namespace llvm;
STATISTIC(NumPairs, "Number of div/rem pairs");
STATISTIC(NumHoisted, "Number of instructions hoisted");
STATISTIC(NumDecomposed, "Number of instructions decomposed");
+DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
+ "Controls transformations in div-rem-pairs pass");
/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). If they exist in different basic blocks, bring
@@ -93,6 +96,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
if (!DivDominates && !DT.dominates(RemInst, DivInst))
continue;
+ if (!DebugCounter::shouldExecute(DRPCounter))
+ continue;
+
if (HasDivRemOp) {
// The target has a single div/rem operation. Hoist the lower instruction
// to make the matched pair visible to the backend.
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 533d16e088c8..1f09979b3382 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -54,6 +55,7 @@
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
#include <cassert>
#include <deque>
#include <memory>
@@ -602,6 +604,8 @@ private:
void removeMSSA(Instruction *Inst) {
if (!MSSA)
return;
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
// Removing a store here can leave MemorySSA in an unoptimized state by
// creating MemoryPhis that have identical arguments and by creating
// MemoryUses whose defining access is not an actual clobber. We handle the
@@ -808,7 +812,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
- salvageDebugInfo(*Inst);
+ if (!salvageDebugInfo(*Inst))
+ replaceDbgUsesWithUndef(Inst);
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
@@ -863,7 +868,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
- if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
+ if (isGuard(Inst)) {
if (auto *CondI =
dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
if (SimpleValue::canHandle(CondI)) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 1e0a22cb14b3..9861948c8297 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -38,7 +38,6 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
@@ -48,6 +47,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -71,6 +71,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/VNCoercion.h"
#include <algorithm>
@@ -97,11 +98,16 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+static cl::opt<bool> EnableMemDep("enable-gvn-memdep", cl::init(true));
// Maximum allowed recursion depth.
static cl::opt<uint32_t>
-MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
- cl::desc("Max recurse depth (default = 1000)"));
+MaxRecurseDepth("gvn-max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
+ cl::desc("Max recurse depth in GVN (default = 1000)"));
+
+static cl::opt<uint32_t> MaxNumDeps(
+ "gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore,
+ cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
struct llvm::GVN::Expression {
uint32_t opcode;
@@ -392,18 +398,13 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
- } else if (AA->onlyReadsMemory(C)) {
+ } else if (MD && AA->onlyReadsMemory(C)) {
Expression exp = createExpr(C);
auto ValNum = assignExpNewValueNum(exp);
if (ValNum.second) {
valueNumbering[C] = ValNum.first;
return ValNum.first;
}
- if (!MD) {
- uint32_t e = assignExpNewValueNum(exp).first;
- valueNumbering[C] = e;
- return e;
- }
MemDepResult local_dep = MD->getDependency(C);
@@ -436,7 +437,7 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
// Non-local case.
const MemoryDependenceResults::NonLocalDepInfo &deps =
- MD->getNonLocalCallDependency(CallSite(C));
+ MD->getNonLocalCallDependency(C);
// FIXME: Move the checking logic to MemDep!
CallInst* cdep = nullptr;
@@ -677,7 +678,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
// Optimistically assume that the block is fully available and check to see
// if we already know about this block in one lookup.
- std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
+ std::pair<DenseMap<BasicBlock*, char>::iterator, bool> IV =
FullyAvailableBlocks.insert(std::make_pair(BB, 2));
// If the entry already existed for this block, return the precomputed value.
@@ -1074,15 +1075,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// because if the index is out of bounds we should deoptimize rather than
// access the array.
// Check that there is no guard in this block above our instruction.
- if (!IsSafeToSpeculativelyExecute) {
- auto It = FirstImplicitControlFlowInsts.find(TmpBB);
- if (It != FirstImplicitControlFlowInsts.end()) {
- assert(It->second->getParent() == TmpBB &&
- "Implicit control flow map broken?");
- if (OI->dominates(It->second, LI))
- return false;
- }
- }
+ if (!IsSafeToSpeculativelyExecute && ICF->isDominatedByICFIFromSameBlock(LI))
+ return false;
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1099,8 +1093,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
// Check that there is no implicit control flow in a block above.
- if (!IsSafeToSpeculativelyExecute &&
- FirstImplicitControlFlowInsts.count(TmpBB))
+ if (!IsSafeToSpeculativelyExecute && ICF->hasICF(TmpBB))
return false;
}
@@ -1322,7 +1315,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// dependencies, this load isn't worth worrying about. Optimizing
// it will be too expensive.
unsigned NumDeps = Deps.size();
- if (NumDeps > 100)
+ if (NumDeps > MaxNumDeps)
return false;
// If we had a phi translation failure, we'll have a single entry which is a
@@ -1451,37 +1444,6 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
return Changed;
}
-static void patchReplacementInstruction(Instruction *I, Value *Repl) {
- auto *ReplInst = dyn_cast<Instruction>(Repl);
- if (!ReplInst)
- return;
-
- // Patch the replacement so that it is not more restrictive than the value
- // being replaced.
- // Note that if 'I' is a load being replaced by some operation,
- // for example, by an arithmetic operation, then andIRFlags()
- // would just erase all math flags from the original arithmetic
- // operation, which is clearly not wanted and not needed.
- if (!isa<LoadInst>(I))
- ReplInst->andIRFlags(I);
-
- // FIXME: If both the original and replacement value are part of the
- // same control-flow region (meaning that the execution of one
- // guarantees the execution of the other), then we can combine the
- // noalias scopes here and do better than the general conservative
- // answer used in combineMetadata().
-
- // In general, GVN unifies expressions over different control-flow
- // regions, and so we need a conservative combination of the noalias
- // scopes.
- static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group};
- combineMetadata(ReplInst, I, KnownIDs);
-}
-
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
@@ -1683,10 +1645,12 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
}
void GVN::assignBlockRPONumber(Function &F) {
+ BlockRPONumber.clear();
uint32_t NextBlockNumber = 1;
ReversePostOrderTraversal<Function *> RPOT(&F);
for (BasicBlock *BB : RPOT)
BlockRPONumber[BB] = NextBlockNumber++;
+ InvalidBlockRPONumbers = false;
}
// Tries to replace instruction with const, using information from
@@ -1778,6 +1742,9 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
+ // Cached information for anything that uses LHS will be invalid.
+ if (MD)
+ MD->invalidateCachedPointerInfo(LHS);
}
// Now try to deduce additional equalities from this one. For example, if
@@ -1853,6 +1820,9 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
Root.getStart());
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
+ // Cached information for anything that uses NotCmp will be invalid.
+ if (MD)
+ MD->invalidateCachedPointerInfo(NotCmp);
}
}
// Ensure that any instruction in scope that gets the "A < B" value number
@@ -1975,7 +1945,7 @@ bool GVN::processInstruction(Instruction *I) {
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
- if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+ if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
addToLeaderTable(Num, I, I->getParent());
return false;
}
@@ -2020,20 +1990,22 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
TLI = &RunTLI;
VN.setAliasAnalysis(&RunAA);
MD = RunMD;
- OrderedInstructions OrderedInstrs(DT);
- OI = &OrderedInstrs;
+ ImplicitControlFlowTracking ImplicitCFT(DT);
+ ICF = &ImplicitCFT;
VN.setMemDep(MD);
ORE = RunORE;
+ InvalidBlockRPONumbers = true;
bool Changed = false;
bool ShouldContinue = true;
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
BasicBlock *BB = &*FI++;
- bool removedBlock = MergeBlockIntoPredecessor(BB, DT, LI, MD);
+ bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, nullptr, MD);
if (removedBlock)
++NumGVNBlocks;
@@ -2052,7 +2024,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// Fabricate val-num for dead-code in order to suppress assertion in
// performPRE().
assignValNumForDeadCode();
- assignBlockRPONumber(F);
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -2104,27 +2075,16 @@ bool GVN::processBlock(BasicBlock *BB) {
if (!AtStart)
--BI;
- bool InvalidateImplicitCF = false;
- const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
for (auto *I : InstrsToErase) {
assert(I->getParent() == BB && "Removing instruction from wrong block?");
LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n');
salvageDebugInfo(*I);
if (MD) MD->removeInstruction(I);
LLVM_DEBUG(verifyRemoved(I));
- if (MaybeFirstICF == I) {
- // We have erased the first ICF in block. The map needs to be updated.
- InvalidateImplicitCF = true;
- // Do not keep dangling pointer on the erased instruction.
- MaybeFirstICF = nullptr;
- }
+ ICF->removeInstruction(I);
I->eraseFromParent();
}
-
- OI->invalidateBlock(BB);
InstrsToErase.clear();
- if (InvalidateImplicitCF)
- fillImplicitControlFlowInfo(BB);
if (AtStart)
BI = BB->begin();
@@ -2184,7 +2144,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
}
bool GVN::performScalarPRE(Instruction *CurInst) {
- if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
+ if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
@@ -2197,6 +2157,16 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (isa<CmpInst>(CurInst))
return false;
+ // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
+ // sinking the addressing mode computation back to its uses. Extending the
+ // GEP's live range increases the register pressure, and therefore it can
+ // introduce unnecessary spills.
+ //
+ // This doesn't prevent Load PRE. PHI translation will make the GEP available
+ // to the load by moving it to the predecessor block if necessary.
+ if (isa<GetElementPtrInst>(CurInst))
+ return false;
+
// We don't currently value number ANY inline asm calls.
if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
if (CallI->isInlineAsm())
@@ -2215,6 +2185,10 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
BasicBlock *PREPred = nullptr;
BasicBlock *CurrentBlock = CurInst->getParent();
+ // Update the RPO numbers for this function.
+ if (InvalidBlockRPONumbers)
+ assignBlockRPONumber(*CurrentBlock->getParent());
+
SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
for (BasicBlock *P : predecessors(CurrentBlock)) {
// We're not interested in PRE where blocks with predecessors that are
@@ -2226,6 +2200,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
// It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
// when CurInst has operand defined in CurrentBlock (so it may be defined
// by phi in the loop header).
+ assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
+ "Invalid BlockRPONumber map.");
if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] &&
llvm::any_of(CurInst->operands(), [&](const Use &U) {
if (auto *Inst = dyn_cast<Instruction>(U.get()))
@@ -2268,13 +2244,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
// is always executed. An instruction with implicit control flow could
// prevent us from doing it. If we cannot speculate the execution, then
// PRE should be prohibited.
- auto It = FirstImplicitControlFlowInsts.find(CurrentBlock);
- if (It != FirstImplicitControlFlowInsts.end()) {
- assert(It->second->getParent() == CurrentBlock &&
- "Implicit control flow map broken?");
- if (OI->dominates(It->second, CurInst))
- return false;
- }
+ if (ICF->isDominatedByICFIFromSameBlock(CurInst))
+ return false;
}
// Don't do PRE across indirect branch.
@@ -2335,14 +2306,10 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (MD)
MD->removeInstruction(CurInst);
LLVM_DEBUG(verifyRemoved(CurInst));
- bool InvalidateImplicitCF =
- FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
// FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
// some assertion failures.
- OI->invalidateBlock(CurrentBlock);
+ ICF->removeInstruction(CurInst);
CurInst->eraseFromParent();
- if (InvalidateImplicitCF)
- fillImplicitControlFlowInfo(CurrentBlock);
++NumGVNInstr;
return true;
@@ -2382,6 +2349,7 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
if (MD)
MD->invalidateCachedPredecessors();
+ InvalidBlockRPONumbers = true;
return BB;
}
@@ -2391,11 +2359,12 @@ bool GVN::splitCriticalEdges() {
if (toSplit.empty())
return false;
do {
- std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+ std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
SplitCriticalEdge(Edge.first, Edge.second,
CriticalEdgeSplittingOptions(DT));
} while (!toSplit.empty());
if (MD) MD->invalidateCachedPredecessors();
+ InvalidBlockRPONumbers = true;
return true;
}
@@ -2411,8 +2380,6 @@ bool GVN::iterateOnFunction(Function &F) {
ReversePostOrderTraversal<Function *> RPOT(&F);
for (BasicBlock *BB : RPOT)
- fillImplicitControlFlowInfo(BB);
- for (BasicBlock *BB : RPOT)
Changed |= processBlock(BB);
return Changed;
@@ -2423,48 +2390,8 @@ void GVN::cleanupGlobalSets() {
LeaderTable.clear();
BlockRPONumber.clear();
TableAllocator.Reset();
- FirstImplicitControlFlowInsts.clear();
-}
-
-void
-GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
- // Make sure that all marked instructions are actually deleted by this point,
- // so that we don't need to care about omitting them.
- assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
- auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
- // If a block's instruction doesn't always pass the control to its successor
- // instruction, mark the block as having implicit control flow. We use them
- // to avoid wrong assumptions of sort "if A is executed and B post-dominates
- // A, then B is also executed". This is not true is there is an implicit
- // control flow instruction (e.g. a guard) between them.
- //
- // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
- // for volatile stores and loads because they can trap. The discussion on
- // whether or not it is correct is still ongoing. We might want to get rid
- // of this logic in the future. Anyways, trapping instructions shouldn't
- // introduce implicit control flow, so we explicitly allow them here. This
- // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
- if (isGuaranteedToTransferExecutionToSuccessor(I))
- return false;
- if (isa<LoadInst>(I)) {
- assert(cast<LoadInst>(I)->isVolatile() &&
- "Non-volatile load should transfer execution to successor!");
- return false;
- }
- if (isa<StoreInst>(I)) {
- assert(cast<StoreInst>(I)->isVolatile() &&
- "Non-volatile store should transfer execution to successor!");
- return false;
- }
- return true;
- };
- FirstImplicitControlFlowInsts.erase(BB);
-
- for (auto &I : *BB)
- if (MayNotTransferExecutionToSuccessor(&I)) {
- FirstImplicitControlFlowInsts[BB] = &I;
- break;
- }
+ ICF->clear();
+ InvalidBlockRPONumbers = true;
}
/// Verify that the specified instruction does not occur in our
@@ -2554,6 +2481,8 @@ void GVN::addDeadBlock(BasicBlock *BB) {
PHINode &Phi = cast<PHINode>(*II);
Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
UndefValue::get(Phi.getType()));
+ if (MD)
+ MD->invalidateCachedPointerInfo(&Phi);
}
}
}
@@ -2613,8 +2542,8 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- explicit GVNLegacyPass(bool NoLoads = false)
- : FunctionPass(ID), NoLoads(NoLoads) {
+ explicit GVNLegacyPass(bool NoMemDepAnalysis = !EnableMemDep)
+ : FunctionPass(ID), NoMemDepAnalysis(NoMemDepAnalysis) {
initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
}
@@ -2629,7 +2558,7 @@ public:
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
getAnalysis<AAResultsWrapperPass>().getAAResults(),
- NoLoads ? nullptr
+ NoMemDepAnalysis ? nullptr
: &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(),
LIWP ? &LIWP->getLoopInfo() : nullptr,
&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
@@ -2639,7 +2568,7 @@ public:
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (!NoLoads)
+ if (!NoMemDepAnalysis)
AU.addRequired<MemoryDependenceWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
@@ -2650,7 +2579,7 @@ public:
}
private:
- bool NoLoads;
+ bool NoMemDepAnalysis;
GVN Impl;
};
@@ -2667,6 +2596,6 @@ INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
// The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoLoads) {
- return new GVNLegacyPass(NoLoads);
+FunctionPass *llvm::createGVNPass(bool NoMemDepAnalysis) {
+ return new GVNLegacyPass(NoMemDepAnalysis);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 6d2b25cf6013..76a42d7fe750 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -246,8 +246,8 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias, LLVMContext::MD_range,
LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group};
- combineMetadata(ReplInst, I, KnownIDs);
+ LLVMContext::MD_invariant_group, LLVMContext::MD_access_group};
+ combineMetadata(ReplInst, I, KnownIDs, true);
}
// This pass hoists common computations across branches sharing common
@@ -365,7 +365,7 @@ private:
// Return true when a successor of BB dominates A.
bool successorDominate(const BasicBlock *BB, const BasicBlock *A) {
- for (const BasicBlock *Succ : BB->getTerminator()->successors())
+ for (const BasicBlock *Succ : successors(BB))
if (DT->dominates(Succ, A))
return true;
@@ -577,15 +577,15 @@ private:
// Returns the edge via which an instruction in BB will get the values from.
// Returns true when the values are flowing out to each edge.
- bool valueAnticipable(CHIArgs C, TerminatorInst *TI) const {
+ bool valueAnticipable(CHIArgs C, Instruction *TI) const {
if (TI->getNumSuccessors() > (unsigned)size(C))
return false; // Not enough args in this CHI.
for (auto CHI : C) {
BasicBlock *Dest = CHI.Dest;
// Find if all the edges have values flowing out of BB.
- bool Found = llvm::any_of(TI->successors(), [Dest](const BasicBlock *BB) {
- return BB == Dest; });
+ bool Found = llvm::any_of(
+ successors(TI), [Dest](const BasicBlock *BB) { return BB == Dest; });
if (!Found)
return false;
}
@@ -748,11 +748,9 @@ private:
// TODO: Remove fully-redundant expressions.
// Get instruction from the Map, assume that all the Instructions
// with same VNs have same rank (this is an approximation).
- llvm::sort(Ranks.begin(), Ranks.end(),
- [this, &Map](const VNType &r1, const VNType &r2) {
- return (rank(*Map.lookup(r1).begin()) <
- rank(*Map.lookup(r2).begin()));
- });
+ llvm::sort(Ranks, [this, &Map](const VNType &r1, const VNType &r2) {
+ return (rank(*Map.lookup(r1).begin()) < rank(*Map.lookup(r2).begin()));
+ });
// - Sort VNs according to their rank, and start with lowest ranked VN
// - Take a VN and for each instruction with same VN
@@ -784,6 +782,7 @@ private:
// which currently have dead terminators that are control
// dependence sources of a block which is in NewLiveBlocks.
IDFs.setDefiningBlocks(VNBlocks);
+ IDFBlocks.clear();
IDFs.calculate(IDFBlocks);
// Make a map of BB vs instructions to be hoisted.
@@ -792,7 +791,7 @@ private:
}
// Insert empty CHI node for this VN. This is used to factor out
// basic blocks where the ANTIC can potentially change.
- for (auto IDFB : IDFBlocks) { // TODO: Prune out useless CHI insertions.
+ for (auto IDFB : IDFBlocks) {
for (unsigned i = 0; i < V.size(); ++i) {
CHIArg C = {VN, nullptr, nullptr};
// Ignore spurious PDFs.
@@ -1100,7 +1099,7 @@ private:
break;
// Do not value number terminator instructions.
- if (isa<TerminatorInst>(&I1))
+ if (I1.isTerminator())
break;
if (auto *Load = dyn_cast<LoadInst>(&I1))
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 8959038de596..1df5f5400c14 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -239,7 +239,7 @@ public:
SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
- llvm::sort(Ops.begin(), Ops.end());
+ llvm::sort(Ops);
for (auto &P : Ops) {
Blocks.push_back(P.first);
Values.push_back(P.second);
@@ -258,14 +258,14 @@ public:
/// Create a PHI from an array of incoming values and incoming blocks.
template <typename VArray, typename BArray>
ModelledPHI(const VArray &V, const BArray &B) {
- std::copy(V.begin(), V.end(), std::back_inserter(Values));
- std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+ llvm::copy(V, std::back_inserter(Values));
+ llvm::copy(B, std::back_inserter(Blocks));
}
/// Create a PHI from [I[OpNum] for I in Insts].
template <typename BArray>
ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
- std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+ llvm::copy(B, std::back_inserter(Blocks));
for (auto *I : Insts)
Values.push_back(I->getOperand(OpNum));
}
@@ -762,7 +762,7 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
}
if (Preds.size() < 2)
return 0;
- llvm::sort(Preds.begin(), Preds.end());
+ llvm::sort(Preds);
unsigned NumOrigPreds = Preds.size();
// We can only sink instructions through unconditional branches.
@@ -859,7 +859,7 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
// Update metadata and IR flags.
for (auto *I : Insts)
if (I != I0) {
- combineMetadataForCSE(I0, I);
+ combineMetadataForCSE(I0, I, true);
I0->andIRFlags(I);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 055fcbc8436f..efc204d4f74b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -44,6 +44,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/PostDominators.h"
@@ -63,22 +65,69 @@ using namespace llvm;
#define DEBUG_TYPE "guard-widening"
STATISTIC(GuardsEliminated, "Number of eliminated guards");
+STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
+
+static cl::opt<bool> WidenFrequentBranches(
+ "guard-widening-widen-frequent-branches", cl::Hidden,
+ cl::desc("Widen conditions of explicit branches into dominating guards in "
+ "case if their taken frequency exceeds threshold set by "
+ "guard-widening-frequent-branch-threshold option"),
+ cl::init(false));
+
+static cl::opt<unsigned> FrequentBranchThreshold(
+ "guard-widening-frequent-branch-threshold", cl::Hidden,
+ cl::desc("When WidenFrequentBranches is set to true, this option is used "
+ "to determine which branches are frequently taken. The criteria "
+ "that a branch is taken more often than "
+ "((FrequentBranchThreshold - 1) / FrequentBranchThreshold), then "
+ "it is considered frequently taken"),
+ cl::init(1000));
+
namespace {
+// Get the condition of \p I. It can either be a guard or a conditional branch.
+static Value *getCondition(Instruction *I) {
+ if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
+ assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+ "Bad guard intrinsic?");
+ return GI->getArgOperand(0);
+ }
+ return cast<BranchInst>(I)->getCondition();
+}
+
+// Set the condition for \p I to \p NewCond. \p I can either be a guard or a
+// conditional branch.
+static void setCondition(Instruction *I, Value *NewCond) {
+ if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
+ assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+ "Bad guard intrinsic?");
+ GI->setArgOperand(0, NewCond);
+ return;
+ }
+ cast<BranchInst>(I)->setCondition(NewCond);
+}
+
+// Eliminates the guard instruction properly.
+static void eliminateGuard(Instruction *GuardInst) {
+ GuardInst->eraseFromParent();
+ ++GuardsEliminated;
+}
+
class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
LoopInfo &LI;
+ BranchProbabilityInfo *BPI;
/// Together, these describe the region of interest. This might be all of
/// the blocks within a function, or only a given loop's blocks and preheader.
DomTreeNode *Root;
std::function<bool(BasicBlock*)> BlockFilter;
- /// The set of guards whose conditions have been widened into dominating
- /// guards.
- SmallVector<Instruction *, 16> EliminatedGuards;
+ /// The set of guards and conditional branches whose conditions have been
+ /// widened into dominating guards.
+ SmallVector<Instruction *, 16> EliminatedGuardsAndBranches;
/// The set of guards which have been widened to include conditions to other
/// guards.
@@ -91,19 +140,7 @@ class GuardWideningImpl {
bool eliminateGuardViaWidening(
Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI,
const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
- GuardsPerBlock);
-
- // Get the condition from \p GuardInst.
- Value *getGuardCondition(Instruction *GuardInst);
-
- // Set the condition for \p GuardInst.
- void setGuardCondition(Instruction *GuardInst, Value *NewCond);
-
- // Whether or not the particular instruction is a guard.
- bool isGuard(const Instruction *I);
-
- // Eliminates the guard instruction properly.
- void eliminateGuard(Instruction *GuardInst);
+ GuardsPerBlock, bool InvertCondition = false);
/// Used to keep track of which widening potential is more effective.
enum WideningScore {
@@ -127,11 +164,13 @@ class GuardWideningImpl {
/// Compute the score for widening the condition in \p DominatedGuard
/// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
- /// \p DominatingGuardLoop).
+ /// \p DominatingGuardLoop). If \p InvertCond is set, then we widen the
+ /// inverted condition of the dominating guard.
WideningScore computeWideningScore(Instruction *DominatedGuard,
Loop *DominatedGuardLoop,
Instruction *DominatingGuard,
- Loop *DominatingGuardLoop);
+ Loop *DominatingGuardLoop,
+ bool InvertCond);
/// Helper to check if \p V can be hoisted to \p InsertPos.
bool isAvailableAt(Value *V, Instruction *InsertPos) {
@@ -147,13 +186,14 @@ class GuardWideningImpl {
void makeAvailableAt(Value *V, Instruction *InsertPos);
/// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
- /// to generate an expression computing the logical AND of \p Cond0 and \p
- /// Cond1. Return true if the expression computing the AND is only as
+ /// to generate an expression computing the logical AND of \p Cond0 and (\p
+ /// Cond1 XOR \p InvertCondition).
+ /// Return true if the expression computing the AND is only as
/// expensive as computing one of the two. If \p InsertPt is true then
/// actually generate the resulting expression, make it available at \p
/// InsertPt and return it in \p Result (else no change to the IR is made).
bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
- Value *&Result);
+ Value *&Result, bool InvertCondition);
/// Represents a range check of the form \c Base + \c Offset u< \c Length,
/// with the constraint that \c Length is not negative. \c CheckInst is the
@@ -214,25 +254,31 @@ class GuardWideningImpl {
/// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
/// computing only one of the two expressions?
- bool isWideningCondProfitable(Value *Cond0, Value *Cond1) {
+ bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) {
Value *ResultUnused;
- return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused);
+ return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused,
+ InvertCond);
}
- /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to
- /// whatever it is already checking).
- void widenGuard(Instruction *ToWiden, Value *NewCondition) {
+ /// If \p InvertCondition is false, Widen \p ToWiden to fail if
+ /// \p NewCondition is false, otherwise make it fail if \p NewCondition is
+ /// true (in addition to whatever it is already checking).
+ void widenGuard(Instruction *ToWiden, Value *NewCondition,
+ bool InvertCondition) {
Value *Result;
- widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result);
- setGuardCondition(ToWiden, Result);
+ widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result,
+ InvertCondition);
+ setCondition(ToWiden, Result);
}
public:
explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
- LoopInfo &LI, DomTreeNode *Root,
+ LoopInfo &LI, BranchProbabilityInfo *BPI,
+ DomTreeNode *Root,
std::function<bool(BasicBlock*)> BlockFilter)
- : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter) {}
+ : DT(DT), PDT(PDT), LI(LI), BPI(BPI), Root(Root), BlockFilter(BlockFilter)
+ {}
/// The entry point for this pass.
bool run();
@@ -242,6 +288,12 @@ public:
bool GuardWideningImpl::run() {
DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
bool Changed = false;
+ Optional<BranchProbability> LikelyTaken = None;
+ if (WidenFrequentBranches && BPI) {
+ unsigned Threshold = FrequentBranchThreshold;
+ assert(Threshold > 0 && "Zero threshold makes no sense!");
+ LikelyTaken = BranchProbability(Threshold - 1, Threshold);
+ }
for (auto DFI = df_begin(Root), DFE = df_end(Root);
DFI != DFE; ++DFI) {
@@ -257,12 +309,31 @@ bool GuardWideningImpl::run() {
for (auto *II : CurrentList)
Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
+ if (WidenFrequentBranches && BPI)
+ if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ if (BI->isConditional()) {
+ // If one of branches of a conditional is likely taken, try to
+ // eliminate it.
+ if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken)
+ Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock);
+ else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken)
+ Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock,
+ /*InvertCondition*/true);
+ }
}
- assert(EliminatedGuards.empty() || Changed);
- for (auto *II : EliminatedGuards)
- if (!WidenedGuards.count(II))
- eliminateGuard(II);
+ assert(EliminatedGuardsAndBranches.empty() || Changed);
+ for (auto *I : EliminatedGuardsAndBranches)
+ if (!WidenedGuards.count(I)) {
+ assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
+ if (isGuard(I))
+ eliminateGuard(I);
+ else {
+ assert(isa<BranchInst>(I) &&
+ "Eliminated something other than guard or branch?");
+ ++CondBranchEliminated;
+ }
+ }
return Changed;
}
@@ -270,7 +341,13 @@ bool GuardWideningImpl::run() {
bool GuardWideningImpl::eliminateGuardViaWidening(
Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
- GuardsInBlock) {
+ GuardsInBlock, bool InvertCondition) {
+ // Ignore trivial true or false conditions. These instructions will be
+ // trivially eliminated by any cleanup pass. Do not erase them because other
+ // guards can possibly be widened into them.
+ if (isa<ConstantInt>(getCondition(GuardInst)))
+ return false;
+
Instruction *BestSoFar = nullptr;
auto BestScoreSoFar = WS_IllegalOrNegative;
auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
@@ -304,7 +381,7 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
assert((i == (e - 1)) == (GuardInst->getParent() == CurBB) && "Bad DFS?");
- if (i == (e - 1)) {
+ if (i == (e - 1) && CurBB->getTerminator() != GuardInst) {
// Corner case: make sure we're only looking at guards strictly dominating
// GuardInst when visiting GuardInst->getParent().
auto NewEnd = std::find(I, E, GuardInst);
@@ -314,9 +391,10 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
for (auto *Candidate : make_range(I, E)) {
auto Score =
- computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop);
- LLVM_DEBUG(dbgs() << "Score between " << *getGuardCondition(GuardInst)
- << " and " << *getGuardCondition(Candidate) << " is "
+ computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop,
+ InvertCondition);
+ LLVM_DEBUG(dbgs() << "Score between " << *getCondition(GuardInst)
+ << " and " << *getCondition(Candidate) << " is "
<< scoreTypeToString(Score) << "\n");
if (Score > BestScoreSoFar) {
BestScoreSoFar = Score;
@@ -336,41 +414,19 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
<< " with score " << scoreTypeToString(BestScoreSoFar)
<< "\n");
- widenGuard(BestSoFar, getGuardCondition(GuardInst));
- setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext()));
- EliminatedGuards.push_back(GuardInst);
+ widenGuard(BestSoFar, getCondition(GuardInst), InvertCondition);
+ auto NewGuardCondition = InvertCondition
+ ? ConstantInt::getFalse(GuardInst->getContext())
+ : ConstantInt::getTrue(GuardInst->getContext());
+ setCondition(GuardInst, NewGuardCondition);
+ EliminatedGuardsAndBranches.push_back(GuardInst);
WidenedGuards.insert(BestSoFar);
return true;
}
-Value *GuardWideningImpl::getGuardCondition(Instruction *GuardInst) {
- IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
- assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
- "Bad guard intrinsic?");
- return GI->getArgOperand(0);
-}
-
-void GuardWideningImpl::setGuardCondition(Instruction *GuardInst,
- Value *NewCond) {
- IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
- assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
- "Bad guard intrinsic?");
- GI->setArgOperand(0, NewCond);
-}
-
-bool GuardWideningImpl::isGuard(const Instruction* I) {
- using namespace llvm::PatternMatch;
- return match(I, m_Intrinsic<Intrinsic::experimental_guard>());
-}
-
-void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) {
- GuardInst->eraseFromParent();
- ++GuardsEliminated;
-}
-
GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
Instruction *DominatedGuard, Loop *DominatedGuardLoop,
- Instruction *DominatingGuard, Loop *DominatingGuardLoop) {
+ Instruction *DominatingGuard, Loop *DominatingGuardLoop, bool InvertCond) {
bool HoistingOutOfLoop = false;
if (DominatingGuardLoop != DominatedGuardLoop) {
@@ -383,7 +439,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
HoistingOutOfLoop = true;
}
- if (!isAvailableAt(getGuardCondition(DominatedGuard), DominatingGuard))
+ if (!isAvailableAt(getCondition(DominatedGuard), DominatingGuard))
return WS_IllegalOrNegative;
// If the guard was conditional executed, it may never be reached
@@ -394,8 +450,8 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
// here. TODO: evaluate cost model for spurious deopt
// NOTE: As written, this also lets us hoist right over another guard which
// is essentially just another spelling for control flow.
- if (isWideningCondProfitable(getGuardCondition(DominatedGuard),
- getGuardCondition(DominatingGuard)))
+ if (isWideningCondProfitable(getCondition(DominatedGuard),
+ getCondition(DominatingGuard), InvertCond))
return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
if (HoistingOutOfLoop)
@@ -416,8 +472,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
return false;
// TODO: diamond, triangle cases
if (!PDT) return true;
- return !PDT->dominates(DominatedGuard->getParent(),
- DominatingGuard->getParent());
+ return !PDT->dominates(DominatedBlock, DominatingBlock);
};
return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
@@ -459,7 +514,8 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) {
}
bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
- Instruction *InsertPt, Value *&Result) {
+ Instruction *InsertPt, Value *&Result,
+ bool InvertCondition) {
using namespace llvm::PatternMatch;
{
@@ -469,6 +525,8 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
ICmpInst::Predicate Pred0, Pred1;
if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
+ if (InvertCondition)
+ Pred1 = ICmpInst::getInversePredicate(Pred1);
ConstantRange CR0 =
ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue());
@@ -502,7 +560,9 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
{
SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;
- if (parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
+ // TODO: Support InvertCondition case?
+ if (!InvertCondition &&
+ parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
combineRangeChecks(Checks, CombinedChecks)) {
if (InsertPt) {
Result = nullptr;
@@ -526,7 +586,8 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
if (InsertPt) {
makeAvailableAt(Cond0, InsertPt);
makeAvailableAt(Cond1, InsertPt);
-
+ if (InvertCondition)
+ Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
}
@@ -636,9 +697,8 @@ bool GuardWideningImpl::combineRangeChecks(
// CurrentChecks.size() will typically be 3 here, but so far there has been
// no need to hard-code that fact.
- llvm::sort(CurrentChecks.begin(), CurrentChecks.end(),
- [&](const GuardWideningImpl::RangeCheck &LHS,
- const GuardWideningImpl::RangeCheck &RHS) {
+ llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS,
+ const GuardWideningImpl::RangeCheck &RHS) {
return LHS.getOffsetValue().slt(RHS.getOffsetValue());
});
@@ -728,7 +788,10 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
+ BranchProbabilityInfo *BPI = nullptr;
+ if (WidenFrequentBranches)
+ BPI = AM.getCachedResult<BranchProbabilityAnalysis>(F);
+ if (!GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(),
[](BasicBlock*) { return true; } ).run())
return PreservedAnalyses::all();
@@ -751,7 +814,10 @@ struct GuardWideningLegacyPass : public FunctionPass {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
+ BranchProbabilityInfo *BPI = nullptr;
+ if (WidenFrequentBranches)
+ BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ return GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(),
[](BasicBlock*) { return true; } ).run();
}
@@ -760,6 +826,8 @@ struct GuardWideningLegacyPass : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ if (WidenFrequentBranches)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
}
};
@@ -785,11 +853,16 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L->contains(BB);
};
- return GuardWideningImpl(DT, PDT, LI,
+ BranchProbabilityInfo *BPI = nullptr;
+ if (WidenFrequentBranches)
+ BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ return GuardWideningImpl(DT, PDT, LI, BPI,
DT.getNode(RootBB), BlockFilter).run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ if (WidenFrequentBranches)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.setPreservesCFG();
getLoopAnalysisUsage(AU);
AU.addPreserved<PostDominatorTreeWrapperPass>();
@@ -805,6 +878,8 @@ INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+if (WidenFrequentBranches)
+ INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
false, false)
@@ -814,6 +889,8 @@ INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+if (WidenFrequentBranches)
+ INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
"Widen guards (within a single loop, as a loop pass)",
false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 8656e88b79cb..48d8e457ba7c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -134,26 +134,23 @@ class IndVarSimplify {
const TargetTransformInfo *TTI;
SmallVector<WeakTrackingVH, 16> DeadInsts;
- bool Changed = false;
bool isValidRewrite(Value *FromVal, Value *ToVal);
- void handleFloatingPointIV(Loop *L, PHINode *PH);
- void rewriteNonIntegerIVs(Loop *L);
+ bool handleFloatingPointIV(Loop *L, PHINode *PH);
+ bool rewriteNonIntegerIVs(Loop *L);
- void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+ bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
- void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
- void rewriteFirstIterationLoopExitValues(Loop *L);
-
- Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- PHINode *IndVar, SCEVExpander &Rewriter);
+ bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+ bool rewriteFirstIterationLoopExitValues(Loop *L);
+ bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
- void sinkUnusedInvariants(Loop *L);
+ bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter);
- Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
- Instruction *InsertPt, Type *Ty);
+ bool sinkUnusedInvariants(Loop *L);
public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
@@ -284,7 +281,7 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
/// is converted into
/// for(int i = 0; i < 10000; ++i)
/// bar((double)i);
-void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
+bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
unsigned BackEdge = IncomingEdge^1;
@@ -293,12 +290,12 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
int64_t InitValue;
if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
- return;
+ return false;
// Check IV increment. Reject this PN if increment operation is not
// an add or increment value can not be represented by an integer.
auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
- if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
+ if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
// If this is not an add of the PHI with a constantfp, or if the constant fp
// is not an integer, bail out.
@@ -306,15 +303,15 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
int64_t IncValue;
if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
!ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
- return;
+ return false;
// Check Incr uses. One user is PN and the other user is an exit condition
// used by the conditional terminator.
Value::user_iterator IncrUse = Incr->user_begin();
Instruction *U1 = cast<Instruction>(*IncrUse++);
- if (IncrUse == Incr->user_end()) return;
+ if (IncrUse == Incr->user_end()) return false;
Instruction *U2 = cast<Instruction>(*IncrUse++);
- if (IncrUse != Incr->user_end()) return;
+ if (IncrUse != Incr->user_end()) return false;
// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
// only used by a branch, we can't transform it.
@@ -323,7 +320,7 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
Compare = dyn_cast<FCmpInst>(U2);
if (!Compare || !Compare->hasOneUse() ||
!isa<BranchInst>(Compare->user_back()))
- return;
+ return false;
BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
@@ -335,7 +332,7 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
if (!L->contains(TheBr->getParent()) ||
(L->contains(TheBr->getSuccessor(0)) &&
L->contains(TheBr->getSuccessor(1))))
- return;
+ return false;
// If it isn't a comparison with an integer-as-fp (the exit value), we can't
// transform it.
@@ -343,12 +340,12 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
int64_t ExitValue;
if (ExitValueVal == nullptr ||
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
- return;
+ return false;
// Find new predicate for integer comparison.
CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
switch (Compare->getPredicate()) {
- default: return; // Unknown comparison.
+ default: return false; // Unknown comparison.
case CmpInst::FCMP_OEQ:
case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
case CmpInst::FCMP_ONE:
@@ -371,24 +368,24 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
// The start/stride/exit values must all fit in signed i32.
if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
- return;
+ return false;
// If not actually striding (add x, 0.0), avoid touching the code.
if (IncValue == 0)
- return;
+ return false;
// Positive and negative strides have different safety conditions.
if (IncValue > 0) {
// If we have a positive stride, we require the init to be less than the
// exit value.
if (InitValue >= ExitValue)
- return;
+ return false;
uint32_t Range = uint32_t(ExitValue-InitValue);
// Check for infinite loop, either:
// while (i <= Exit) or until (i > Exit)
if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
- if (++Range == 0) return; // Range overflows.
+ if (++Range == 0) return false; // Range overflows.
}
unsigned Leftover = Range % uint32_t(IncValue);
@@ -398,23 +395,23 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
// around and do things the fp IV wouldn't.
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
- return;
+ return false;
// If the stride would wrap around the i32 before exiting, we can't
// transform the IV.
if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
- return;
+ return false;
} else {
// If we have a negative stride, we require the init to be greater than the
// exit value.
if (InitValue <= ExitValue)
- return;
+ return false;
uint32_t Range = uint32_t(InitValue-ExitValue);
// Check for infinite loop, either:
// while (i >= Exit) or until (i < Exit)
if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
- if (++Range == 0) return; // Range overflows.
+ if (++Range == 0) return false; // Range overflows.
}
unsigned Leftover = Range % uint32_t(-IncValue);
@@ -424,12 +421,12 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
// around and do things the fp IV wouldn't.
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
- return;
+ return false;
// If the stride would wrap around the i32 before exiting, we can't
// transform the IV.
if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
- return;
+ return false;
}
IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
@@ -475,10 +472,10 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
- Changed = true;
+ return true;
}
-void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
+bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
// First step. Check to see if there are any floating-point recurrences.
// If there are, change them into integer recurrences, permitting analysis by
// the SCEV routines.
@@ -488,15 +485,17 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
for (PHINode &PN : Header->phis())
PHIs.push_back(&PN);
+ bool Changed = false;
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
- handleFloatingPointIV(L, PN);
+ Changed |= handleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
// may not have been able to compute a trip count. Now that we've done some
// re-writing, the trip count may be computable.
if (Changed)
SE->forgetLoop(L);
+ return Changed;
}
namespace {
@@ -521,24 +520,34 @@ struct RewritePhi {
} // end anonymous namespace
-Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
- Loop *L, Instruction *InsertPt,
- Type *ResultTy) {
- // Before expanding S into an expensive LLVM expression, see if we can use an
- // already existing value as the expansion for S.
- if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
- if (ExistingValue->getType() == ResultTy)
- return ExistingValue;
-
- // We didn't find anything, fall back to using SCEVExpander.
- return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
-}
-
//===----------------------------------------------------------------------===//
// rewriteLoopExitValues - Optimize IV users outside the loop.
// As a side effect, reduces the amount of IV processing within the loop.
//===----------------------------------------------------------------------===//
+bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
+ SmallPtrSet<const Instruction *, 8> Visited;
+ SmallVector<const Instruction *, 8> WorkList;
+ Visited.insert(I);
+ WorkList.push_back(I);
+ while (!WorkList.empty()) {
+ const Instruction *Curr = WorkList.pop_back_val();
+ // This use is outside the loop, nothing to do.
+ if (!L->contains(Curr))
+ continue;
+ // Do we assume it is a "hard" use which will not be eliminated easily?
+ if (Curr->mayHaveSideEffects())
+ return true;
+ // Otherwise, add all its users to worklist.
+ for (auto U : Curr->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (Visited.insert(UI).second)
+ WorkList.push_back(UI);
+ }
+ }
+ return false;
+}
+
/// Check to see if this loop has a computable loop-invariant execution count.
/// If so, this means that we can compute the final value of any expressions
/// that are recurrent in the loop, and substitute the exit values from the loop
@@ -549,7 +558,7 @@ Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
/// happen later, except that it's more powerful in some cases, because it's
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
-void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Check a pre-condition.
assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
"Indvars did not preserve LCSSA!");
@@ -610,48 +619,14 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
!isSafeToExpand(ExitValue, *SE))
continue;
- // Computing the value outside of the loop brings no benefit if :
- // - it is definitely used inside the loop in a way which can not be
- // optimized away.
- // - no use outside of the loop can take advantage of hoisting the
- // computation out of the loop
- if (ExitValue->getSCEVType()>=scMulExpr) {
- unsigned NumHardInternalUses = 0;
- unsigned NumSoftExternalUses = 0;
- unsigned NumUses = 0;
- for (auto IB = Inst->user_begin(), IE = Inst->user_end();
- IB != IE && NumUses <= 6; ++IB) {
- Instruction *UseInstr = cast<Instruction>(*IB);
- unsigned Opc = UseInstr->getOpcode();
- NumUses++;
- if (L->contains(UseInstr)) {
- if (Opc == Instruction::Call || Opc == Instruction::Ret)
- NumHardInternalUses++;
- } else {
- if (Opc == Instruction::PHI) {
- // Do not count the Phi as a use. LCSSA may have inserted
- // plenty of trivial ones.
- NumUses--;
- for (auto PB = UseInstr->user_begin(),
- PE = UseInstr->user_end();
- PB != PE && NumUses <= 6; ++PB, ++NumUses) {
- unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
- if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
- NumSoftExternalUses++;
- }
- continue;
- }
- if (Opc != Instruction::Call && Opc != Instruction::Ret)
- NumSoftExternalUses++;
- }
- }
- if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
- continue;
- }
+ // Computing the value outside of the loop brings no benefit if it is
+ // definitely used inside the loop in a way which can not be optimized
+ // away.
+ if (!isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
+ continue;
bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
- Value *ExitVal =
- expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
+ Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
<< '\n'
@@ -662,6 +637,16 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
continue;
}
+#ifndef NDEBUG
+ // If we reuse an instruction from a loop which is neither L nor one of
+ // its containing loops, we end up breaking LCSSA form for this loop by
+ // creating a new use of its instruction.
+ if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
+ if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
+ if (EVL != L)
+ assert(EVL->contains(L) && "LCSSA breach detected!");
+#endif
+
// Collect all the candidate PHINodes to be rewritten.
RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
}
@@ -670,6 +655,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
+ bool Changed = false;
// Transformation.
for (const RewritePhi &Phi : RewritePhiSet) {
PHINode *PN = Phi.PN;
@@ -703,6 +689,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// The insertion point instruction may have been deleted; clear it out
// so that the rewriter doesn't trip over it later.
Rewriter.clearInsertPoint();
+ return Changed;
}
//===---------------------------------------------------------------------===//
@@ -714,7 +701,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
/// exits. If so, we know that if the exit path is taken, it is at the first
/// loop iteration. This lets us predict exit values of PHI nodes that live in
/// loop header.
-void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
+bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
// Verify the input to the pass is already in LCSSA form.
assert(L->isLCSSAForm(*DT));
@@ -723,6 +710,7 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
auto *LoopHeader = L->getHeader();
assert(LoopHeader && "Invalid loop");
+ bool MadeAnyChanges = false;
for (auto *ExitBB : ExitBlocks) {
// If there are no more PHI nodes in this exit block, then no more
// values defined inside the loop are used on this path.
@@ -769,12 +757,14 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
if (PreheaderIdx != -1) {
assert(ExitVal->getParent() == LoopHeader &&
"ExitVal must be in loop header");
+ MadeAnyChanges = true;
PN.setIncomingValue(IncomingValIdx,
ExitVal->getIncomingValue(PreheaderIdx));
}
}
}
}
+ return MadeAnyChanges;
}
/// Check whether it is possible to delete the loop after rewriting exit
@@ -1024,6 +1014,8 @@ protected:
Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
bool widenLoopCompare(NarrowIVDefUse DU);
+ bool widenWithVariantLoadUse(NarrowIVDefUse DU);
+ void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
@@ -1368,6 +1360,146 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
return true;
}
+/// If the narrow use is an instruction whose two operands are the defining
+/// instruction of DU and a load instruction, then we have the following:
+/// if the load is hoisted outside the loop, then we do not reach this function
+/// as scalar evolution analysis works fine in widenIVUse with variables
+/// hoisted outside the loop and efficient code is subsequently generated by
+/// not emitting truncate instructions. But when the load is not hoisted
+/// (whether due to limitation in alias analysis or due to a true legality),
+/// then scalar evolution can not proceed with loop variant values and
+/// inefficient code is generated. This function handles the non-hoisted load
+/// special case by making the optimization generate the same type of code for
+/// hoisted and non-hoisted load (widen use and eliminate sign extend
+/// instruction). This special case is important especially when the induction
+/// variables are affecting addressing mode in code generation.
+bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ // Handle the common case of add<nsw/nuw>
+ const unsigned OpCode = NarrowUse->getOpcode();
+ // Only Add/Sub/Mul instructions are supported.
+ if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+ OpCode != Instruction::Mul)
+ return false;
+
+ // The operand that is not defined by NarrowDef of DU. Let's call it the
+ // other operand.
+ unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
+ assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
+ "bad DU");
+
+ const SCEV *ExtendOperExpr = nullptr;
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(NarrowUse);
+ ExtendKind ExtKind = getExtendKind(NarrowDef);
+ if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else
+ return false;
+
+ // We are interested in the other operand being a load instruction.
+ // But, we should look into relaxing this restriction later on.
+ auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
+ if (I && I->getOpcode() != Instruction::Load)
+ return false;
+
+ // Verifying that Defining operand is an AddRec
+ const SCEV *Op1 = SE->getSCEV(WideDef);
+ const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
+ if (!AddRecOp1 || AddRecOp1->getLoop() != L)
+ return false;
+ // Verifying that other operand is an Extend.
+ if (ExtKind == SignExtended) {
+ if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
+ return false;
+ } else {
+ if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
+ return false;
+ }
+
+ if (ExtKind == SignExtended) {
+ for (Use &U : NarrowUse->uses()) {
+ SExtInst *User = dyn_cast<SExtInst>(U.getUser());
+ if (!User || User->getType() != WideType)
+ return false;
+ }
+ } else { // ExtKind == ZeroExtended
+ for (Use &U : NarrowUse->uses()) {
+ ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
+ if (!User || User->getType() != WideType)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// Special Case for widening with variant Loads (see
+/// WidenIV::widenWithVariantLoadUse). This is the code generation part.
+void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ ExtendKind ExtKind = getExtendKind(NarrowDef);
+
+ LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ // Generating a widening use instruction.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ ExtKind, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ ExtKind, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+
+ if (ExtKind == SignExtended)
+ ExtendKindMap[NarrowUse] = SignExtended;
+ else
+ ExtendKindMap[NarrowUse] = ZeroExtended;
+
+ // Update the Use.
+ if (ExtKind == SignExtended) {
+ for (Use &U : NarrowUse->uses()) {
+ SExtInst *User = dyn_cast<SExtInst>(U.getUser());
+ if (User && User->getType() == WideType) {
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
+ << *WideBO << "\n");
+ ++NumElimExt;
+ User->replaceAllUsesWith(WideBO);
+ DeadInsts.emplace_back(User);
+ }
+ }
+ } else { // ExtKind == ZeroExtended
+ for (Use &U : NarrowUse->uses()) {
+ ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
+ if (User && User->getType() == WideType) {
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
+ << *WideBO << "\n");
+ ++NumElimExt;
+ User->replaceAllUsesWith(WideBO);
+ DeadInsts.emplace_back(User);
+ }
+ }
+ }
+}
+
/// Determine whether an individual user of the narrow IV can be widened. If so,
/// return the wide clone of the user.
Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
@@ -1465,6 +1597,16 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
if (widenLoopCompare(DU))
return nullptr;
+ // We are here about to generate a truncate instruction that may hurt
+ // performance because the scalar evolution expression computed earlier
+ // in WideAddRec.first does not indicate a polynomial induction expression.
+ // In that case, look at the operands of the use instruction to determine
+ // if we can still widen the use instead of truncating its operand.
+ if (widenWithVariantLoadUse(DU)) {
+ widenWithVariantLoadUseCodegen(DU);
+ return nullptr;
+ }
+
// This user does not evaluate to a recurrence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
@@ -1781,7 +1923,7 @@ public:
/// candidates for simplification.
///
/// Sign/Zero extend elimination is interleaved with IV simplification.
-void IndVarSimplify::simplifyAndExtend(Loop *L,
+bool IndVarSimplify::simplifyAndExtend(Loop *L,
SCEVExpander &Rewriter,
LoopInfo *LI) {
SmallVector<WideIVInfo, 8> WideIVs;
@@ -1798,6 +1940,7 @@ void IndVarSimplify::simplifyAndExtend(Loop *L,
// for all current phis, then determines whether any IVs can be
// widened. Widening adds new phis to LoopPhis, inducing another round of
// simplification on the wide IVs.
+ bool Changed = false;
while (!LoopPhis.empty()) {
// Evaluate as many IV expressions as possible before widening any IVs. This
// forces SCEV to set no-wrap flags before evaluating sign/zero
@@ -1827,6 +1970,7 @@ void IndVarSimplify::simplifyAndExtend(Loop *L,
}
}
}
+ return Changed;
}
//===----------------------------------------------------------------------===//
@@ -2193,11 +2337,9 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
/// able to rewrite the exit tests of any loop where the SCEV analysis can
/// determine a loop-invariant trip count of the loop, which is actually a much
/// broader range than just linear tests.
-Value *IndVarSimplify::
-linearFunctionTestReplace(Loop *L,
- const SCEV *BackedgeTakenCount,
- PHINode *IndVar,
- SCEVExpander &Rewriter) {
+bool IndVarSimplify::
+linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter) {
assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
// Initialize CmpIndVar and IVCount to their preincremented values.
@@ -2320,8 +2462,7 @@ linearFunctionTestReplace(Loop *L,
DeadInsts.push_back(OrigCond);
++NumLFTR;
- Changed = true;
- return Cond;
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -2331,13 +2472,14 @@ linearFunctionTestReplace(Loop *L,
/// If there's a single exit block, sink any loop-invariant values that
/// were defined in the preheader but not used inside the loop into the
/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
+bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
- if (!ExitBlock) return;
+ if (!ExitBlock) return false;
BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) return;
+ if (!Preheader) return false;
+ bool MadeAnyChanges = false;
BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
BasicBlock::iterator I(Preheader->getTerminator());
while (I != Preheader->begin()) {
@@ -2407,10 +2549,13 @@ void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
Done = true;
}
+ MadeAnyChanges = true;
ToMove->moveBefore(*ExitBlock, InsertPt);
if (Done) break;
InsertPt = ToMove->getIterator();
}
+
+ return MadeAnyChanges;
}
//===----------------------------------------------------------------------===//
@@ -2421,6 +2566,7 @@ bool IndVarSimplify::run(Loop *L) {
// We need (and expect!) the incoming loop to be in LCSSA.
assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
"LCSSA required to run indvars!");
+ bool Changed = false;
// If LoopSimplify form is not available, stay out of trouble. Some notes:
// - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -2436,7 +2582,7 @@ bool IndVarSimplify::run(Loop *L) {
// If there are any floating-point recurrences, attempt to
// transform them to use integer recurrences.
- rewriteNonIntegerIVs(L);
+ Changed |= rewriteNonIntegerIVs(L);
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
@@ -2453,7 +2599,7 @@ bool IndVarSimplify::run(Loop *L) {
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
Rewriter.disableCanonicalMode();
- simplifyAndExtend(L, Rewriter, LI);
+ Changed |= simplifyAndExtend(L, Rewriter, LI);
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -2463,7 +2609,7 @@ bool IndVarSimplify::run(Loop *L) {
//
if (ReplaceExitValue != NeverRepl &&
!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- rewriteLoopExitValues(L, Rewriter);
+ Changed |= rewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
@@ -2484,8 +2630,8 @@ bool IndVarSimplify::run(Loop *L) {
// explicitly check any assumptions made by SCEV. Brittle.
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
if (!AR || AR->getLoop()->getLoopPreheader())
- (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
- Rewriter);
+ Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ Rewriter);
}
}
// Clear the rewriter cache, because values that are in the rewriter's cache
@@ -2498,18 +2644,18 @@ bool IndVarSimplify::run(Loop *L) {
while (!DeadInsts.empty())
if (Instruction *Inst =
dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
+ Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
// The Rewriter may not be used from this point on.
// Loop-invariant instructions in the preheader that aren't used in the
// loop may be sunk below the loop to reduce register pressure.
- sinkUnusedInvariants(L);
+ Changed |= sinkUnusedInvariants(L);
// rewriteFirstIterationLoopExitValues does not rely on the computation of
// trip count and therefore can further simplify exit values in addition to
// rewriteLoopExitValues.
- rewriteFirstIterationLoopExitValues(L);
+ Changed |= rewriteFirstIterationLoopExitValues(L);
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index c5ed6d5c1b87..1c701bbee185 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -133,34 +133,16 @@ namespace {
/// taken by the containing loop's induction variable.
///
class InductiveRangeCheck {
- // Classifies a range check
- enum RangeCheckKind : unsigned {
- // Range check of the form "0 <= I".
- RANGE_CHECK_LOWER = 1,
-
- // Range check of the form "I < L" where L is known positive.
- RANGE_CHECK_UPPER = 2,
-
- // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER
- // conditions.
- RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER,
-
- // Unrecognized range check condition.
- RANGE_CHECK_UNKNOWN = (unsigned)-1
- };
-
- static StringRef rangeCheckKindToStr(RangeCheckKind);
const SCEV *Begin = nullptr;
const SCEV *Step = nullptr;
const SCEV *End = nullptr;
Use *CheckUse = nullptr;
- RangeCheckKind Kind = RANGE_CHECK_UNKNOWN;
bool IsSigned = true;
- static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
- ScalarEvolution &SE, Value *&Index,
- Value *&Length, bool &IsSigned);
+ static bool parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE,
+ Value *&Index, Value *&Length,
+ bool &IsSigned);
static void
extractRangeChecksFromCond(Loop *L, ScalarEvolution &SE, Use &ConditionUse,
@@ -175,7 +157,6 @@ public:
void print(raw_ostream &OS) const {
OS << "InductiveRangeCheck:\n";
- OS << " Kind: " << rangeCheckKindToStr(Kind) << "\n";
OS << " Begin: ";
Begin->print(OS);
OS << " Step: ";
@@ -283,32 +264,11 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_END(IRCELegacyPass, "irce", "Inductive range check elimination",
false, false)
-StringRef InductiveRangeCheck::rangeCheckKindToStr(
- InductiveRangeCheck::RangeCheckKind RCK) {
- switch (RCK) {
- case InductiveRangeCheck::RANGE_CHECK_UNKNOWN:
- return "RANGE_CHECK_UNKNOWN";
-
- case InductiveRangeCheck::RANGE_CHECK_UPPER:
- return "RANGE_CHECK_UPPER";
-
- case InductiveRangeCheck::RANGE_CHECK_LOWER:
- return "RANGE_CHECK_LOWER";
-
- case InductiveRangeCheck::RANGE_CHECK_BOTH:
- return "RANGE_CHECK_BOTH";
- }
-
- llvm_unreachable("unknown range check type!");
-}
-
/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` cannot
-/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set
-/// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value being
-/// range checked, and set `Length` to the upper limit `Index` is being range
-/// checked with if (and only if) the range check type is stronger or equal to
-/// RANGE_CHECK_UPPER.
-InductiveRangeCheck::RangeCheckKind
+/// be interpreted as a range check, return false and set `Index` and `Length`
+/// to `nullptr`. Otherwise set `Index` to the value being range checked, and
+/// set `Length` to the upper limit `Index` is being range checked.
+bool
InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
ScalarEvolution &SE, Value *&Index,
Value *&Length, bool &IsSigned) {
@@ -322,7 +282,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
switch (Pred) {
default:
- return RANGE_CHECK_UNKNOWN;
+ return false;
case ICmpInst::ICMP_SLE:
std::swap(LHS, RHS);
@@ -331,9 +291,9 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
IsSigned = true;
if (match(RHS, m_ConstantInt<0>())) {
Index = LHS;
- return RANGE_CHECK_LOWER;
+ return true; // Lower.
}
- return RANGE_CHECK_UNKNOWN;
+ return false;
case ICmpInst::ICMP_SLT:
std::swap(LHS, RHS);
@@ -342,15 +302,15 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
IsSigned = true;
if (match(RHS, m_ConstantInt<-1>())) {
Index = LHS;
- return RANGE_CHECK_LOWER;
+ return true; // Lower.
}
if (IsLoopInvariant(LHS)) {
Index = RHS;
Length = LHS;
- return RANGE_CHECK_UPPER;
+ return true; // Upper.
}
- return RANGE_CHECK_UNKNOWN;
+ return false;
case ICmpInst::ICMP_ULT:
std::swap(LHS, RHS);
@@ -360,9 +320,9 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
if (IsLoopInvariant(LHS)) {
Index = RHS;
Length = LHS;
- return RANGE_CHECK_BOTH;
+ return true; // Both lower and upper.
}
- return RANGE_CHECK_UNKNOWN;
+ return false;
}
llvm_unreachable("default clause returns!");
@@ -391,8 +351,7 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
Value *Length = nullptr, *Index;
bool IsSigned;
- auto RCKind = parseRangeCheckICmp(L, ICI, SE, Index, Length, IsSigned);
- if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+ if (!parseRangeCheckICmp(L, ICI, SE, Index, Length, IsSigned))
return;
const auto *IndexAddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Index));
@@ -408,7 +367,6 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
if (Length)
End = SE.getSCEV(Length);
else {
- assert(RCKind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!");
// So far we can only reach this point for Signed range check. This may
// change in future. In this case we will need to pick Unsigned max for the
// unsigned range check.
@@ -422,7 +380,6 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
IRC.Begin = IndexAddRec->getStart();
IRC.Step = IndexAddRec->getStepRecurrence(SE);
IRC.CheckUse = &ConditionUse;
- IRC.Kind = RCKind;
IRC.IsSigned = IsSigned;
Checks.push_back(IRC);
}
@@ -689,17 +646,6 @@ void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
PN->setIncomingBlock(i, ReplaceBy);
}
-static bool CannotBeMaxInLoop(const SCEV *BoundSCEV, Loop *L,
- ScalarEvolution &SE, bool Signed) {
- unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
- APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
- APInt::getMaxValue(BitWidth);
- auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
- return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
- SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV,
- SE.getConstant(Max));
-}
-
/// Given a loop with an deccreasing induction variable, is it possible to
/// safely calculate the bounds of a new loop using the given Predicate.
static bool isSafeDecreasingBound(const SCEV *Start,
@@ -795,31 +741,6 @@ static bool isSafeIncreasingBound(const SCEV *Start,
SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
}
-static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L,
- ScalarEvolution &SE, bool Signed) {
- unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
- APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
- APInt::getMinValue(BitWidth);
- auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
- return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
- SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV,
- SE.getConstant(Min));
-}
-
-static bool isKnownNonNegativeInLoop(const SCEV *BoundSCEV, const Loop *L,
- ScalarEvolution &SE) {
- const SCEV *Zero = SE.getZero(BoundSCEV->getType());
- return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
- SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, BoundSCEV, Zero);
-}
-
-static bool isKnownNegativeInLoop(const SCEV *BoundSCEV, const Loop *L,
- ScalarEvolution &SE) {
- const SCEV *Zero = SE.getZero(BoundSCEV->getType());
- return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
- SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, BoundSCEV, Zero);
-}
-
Optional<LoopStructure>
LoopStructure::parseLoopStructure(ScalarEvolution &SE,
BranchProbabilityInfo *BPI, Loop &L,
@@ -977,12 +898,12 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
// ... ...
// } }
if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
- CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) {
+ cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) {
Pred = ICmpInst::ICMP_UGT;
RightSCEV = SE.getMinusSCEV(RightSCEV,
SE.getOne(RightSCEV->getType()));
DecreasedRightValueByOne = true;
- } else if (CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) {
+ } else if (cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) {
Pred = ICmpInst::ICMP_SGT;
RightSCEV = SE.getMinusSCEV(RightSCEV,
SE.getOne(RightSCEV->getType()));
@@ -1042,11 +963,11 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
// ... ...
// } }
if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
- CannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) {
+ cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) {
Pred = ICmpInst::ICMP_ULT;
RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
IncreasedRightValueByOne = true;
- } else if (CannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) {
+ } else if (cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) {
Pred = ICmpInst::ICMP_SLT;
RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
IncreasedRightValueByOne = true;
@@ -1339,29 +1260,20 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
// EnterLoopCond - is it okay to start executing this `LS'?
Value *EnterLoopCond = nullptr;
- if (Increasing)
- EnterLoopCond = IsSignedPredicate
- ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt)
- : B.CreateICmpULT(LS.IndVarStart, ExitSubloopAt);
- else
- EnterLoopCond = IsSignedPredicate
- ? B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt)
- : B.CreateICmpUGT(LS.IndVarStart, ExitSubloopAt);
+ auto Pred =
+ Increasing
+ ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
+ : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+ EnterLoopCond = B.CreateICmp(Pred, LS.IndVarStart, ExitSubloopAt);
B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
PreheaderJump->eraseFromParent();
LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
B.SetInsertPoint(LS.LatchBr);
- Value *TakeBackedgeLoopCond = nullptr;
- if (Increasing)
- TakeBackedgeLoopCond = IsSignedPredicate
- ? B.CreateICmpSLT(LS.IndVarBase, ExitSubloopAt)
- : B.CreateICmpULT(LS.IndVarBase, ExitSubloopAt);
- else
- TakeBackedgeLoopCond = IsSignedPredicate
- ? B.CreateICmpSGT(LS.IndVarBase, ExitSubloopAt)
- : B.CreateICmpUGT(LS.IndVarBase, ExitSubloopAt);
+ Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, LS.IndVarBase,
+ ExitSubloopAt);
+
Value *CondForBranch = LS.LatchBrExitIdx == 1
? TakeBackedgeLoopCond
: B.CreateNot(TakeBackedgeLoopCond);
@@ -1373,15 +1285,7 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
// IterationsLeft - are there any more iterations left, given the original
// upper bound on the induction variable? If not, we branch to the "real"
// exit.
- Value *IterationsLeft = nullptr;
- if (Increasing)
- IterationsLeft = IsSignedPredicate
- ? B.CreateICmpSLT(LS.IndVarBase, LS.LoopExitAt)
- : B.CreateICmpULT(LS.IndVarBase, LS.LoopExitAt);
- else
- IterationsLeft = IsSignedPredicate
- ? B.CreateICmpSGT(LS.IndVarBase, LS.LoopExitAt)
- : B.CreateICmpUGT(LS.IndVarBase, LS.LoopExitAt);
+ Value *IterationsLeft = B.CreateICmp(Pred, LS.IndVarBase, LS.LoopExitAt);
B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
BranchInst *BranchToContinuation =
@@ -1513,16 +1417,14 @@ bool LoopConstrainer::run() {
if (Increasing)
ExitPreLoopAtSCEV = *SR.LowLimit;
+ else if (cannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
+ IsSignedPredicate))
+ ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
else {
- if (CannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
- IsSignedPredicate))
- ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
- else {
- LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
- << "preloop exit limit. HighLimit = "
- << *(*SR.HighLimit) << "\n");
- return false;
- }
+ LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+ << "preloop exit limit. HighLimit = "
+ << *(*SR.HighLimit) << "\n");
+ return false;
}
if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) {
@@ -1542,16 +1444,14 @@ bool LoopConstrainer::run() {
if (Increasing)
ExitMainLoopAtSCEV = *SR.HighLimit;
+ else if (cannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
+ IsSignedPredicate))
+ ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
else {
- if (CannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
- IsSignedPredicate))
- ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
- else {
- LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
- << "mainloop exit limit. LowLimit = "
- << *(*SR.LowLimit) << "\n");
- return false;
- }
+ LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+ << "mainloop exit limit. LowLimit = "
+ << *(*SR.LowLimit) << "\n");
+ return false;
}
if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 1d66472f93c8..48de56a02834 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -25,12 +25,12 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -38,6 +38,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -65,6 +66,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
@@ -285,7 +287,7 @@ bool JumpThreading::runOnFunction(Function &F) {
auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DeferredDominance DDT(*DT);
+ DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = F.hasProfileData();
@@ -295,7 +297,7 @@ bool JumpThreading::runOnFunction(Function &F) {
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData,
+ bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, HasProfileData,
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -312,7 +314,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- DeferredDominance DDT(DT);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
@@ -322,7 +324,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData,
+ bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, HasProfileData,
std::move(BFI), std::move(BPI));
if (!Changed)
@@ -336,14 +338,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
LazyValueInfo *LVI_, AliasAnalysis *AA_,
- DeferredDominance *DDT_, bool HasProfileData_,
+ DomTreeUpdater *DTU_, bool HasProfileData_,
std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_) {
LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = TLI_;
LVI = LVI_;
AA = AA_;
- DDT = DDT_;
+ DTU = DTU_;
BFI.reset();
BPI.reset();
// When profile data is available, we need to update edge weights after
@@ -360,7 +362,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// JumpThreading must not processes blocks unreachable from entry. It's a
// waste of compute time and can potentially lead to hangs.
SmallPtrSet<BasicBlock *, 16> Unreachable;
- DominatorTree &DT = DDT->flush();
+ assert(DTU && "DTU isn't passed into JumpThreading before using it.");
+ assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
+ DominatorTree &DT = DTU->getDomTree();
for (auto &BB : F)
if (!DT.isReachableFromEntry(&BB))
Unreachable.insert(&BB);
@@ -379,7 +383,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Stop processing BB if it's the entry or is now deleted. The following
// routines attempt to eliminate BB and locating a suitable replacement
// for the entry is non-trivial.
- if (&BB == &F.getEntryBlock() || DDT->pendingDeletedBB(&BB))
+ if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
continue;
if (pred_empty(&BB)) {
@@ -390,7 +394,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
<< '\n');
LoopHeaders.erase(&BB);
LVI->eraseBlock(&BB);
- DeleteDeadBlock(&BB, DDT);
+ DeleteDeadBlock(&BB, DTU);
Changed = true;
continue;
}
@@ -404,9 +408,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Don't alter Loop headers and latches to ensure another pass can
// detect and transform nested loops later.
!LoopHeaders.count(&BB) && !LoopHeaders.count(BI->getSuccessor(0)) &&
- TryToSimplifyUncondBranchFromEmptyBlock(&BB, DDT)) {
- // BB is valid for cleanup here because we passed in DDT. F remains
- // BB's parent until a DDT->flush() event.
+ TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
+ // BB is valid for cleanup here because we passed in DTU. F remains
+ // BB's parent until a DTU->getDomTree() event.
LVI->eraseBlock(&BB);
Changed = true;
}
@@ -415,7 +419,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
} while (Changed);
LoopHeaders.clear();
- DDT->flush();
+ // Flush only the Dominator Tree.
+ DTU->getDomTree();
LVI->enableDT();
return EverChanged;
}
@@ -569,9 +574,11 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
/// BB in the result vector.
///
/// This returns true if there were any known values.
-bool JumpThreadingPass::ComputeValueKnownInPredecessors(
+bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
Value *V, BasicBlock *BB, PredValueInfo &Result,
- ConstantPreference Preference, Instruction *CxtI) {
+ ConstantPreference Preference,
+ DenseSet<std::pair<Value *, BasicBlock *>> &RecursionSet,
+ Instruction *CxtI) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
@@ -579,10 +586,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
if (!RecursionSet.insert(std::make_pair(V, BB)).second)
return false;
- // An RAII help to remove this pair from the recursion set once the recursion
- // stack pops back out again.
- RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
-
// If V is a constant, then it is known in all predecessors.
if (Constant *KC = getKnownConstant(V, Preference)) {
for (BasicBlock *Pred : predecessors(BB))
@@ -609,7 +612,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
// Perhaps getConstantOnEdge should be smart enough to do this?
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -626,7 +629,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
/// If I is a PHI node, then we know the incoming values for any constants.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -652,7 +655,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
Value *Source = CI->getOperand(0);
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
return false;
- ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
+ ComputeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
+ RecursionSet, CxtI);
if (Result.empty())
return false;
@@ -672,10 +676,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
I->getOpcode() == Instruction::And) {
PredValueInfoTy LHSVals, RHSVals;
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
- WantInteger, CxtI);
- ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
- WantInteger, CxtI);
+ ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+ WantInteger, RecursionSet, CxtI);
+ ComputeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals,
+ WantInteger, RecursionSet, CxtI);
if (LHSVals.empty() && RHSVals.empty())
return false;
@@ -710,8 +714,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
if (I->getOpcode() == Instruction::Xor &&
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
- WantInteger, CxtI);
+ ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
+ WantInteger, RecursionSet, CxtI);
if (Result.empty())
return false;
@@ -728,8 +732,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
&& "A binary operator creating a block address?");
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
PredValueInfoTy LHSVals;
- ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
- WantInteger, CxtI);
+ ComputeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
+ WantInteger, RecursionSet, CxtI);
// Try to use constant folding to simplify the binary operator.
for (const auto &LHSVal : LHSVals) {
@@ -759,7 +763,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
const DataLayout &DL = PN->getModule()->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -806,7 +810,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
if (!isa<Instruction>(CmpLHS) ||
cast<Instruction>(CmpLHS)->getParent() != BB) {
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -838,7 +842,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
if (!isa<Instruction>(AddLHS) ||
cast<Instruction>(AddLHS)->getParent() != BB) {
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -874,8 +878,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
PredValueInfoTy LHSVals;
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
- WantInteger, CxtI);
+ ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+ WantInteger, RecursionSet, CxtI);
for (const auto &LHSVal : LHSVals) {
Constant *V = LHSVal.first;
@@ -895,8 +899,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
PredValueInfoTy Conds;
if ((TrueVal || FalseVal) &&
- ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
- WantInteger, CxtI)) {
+ ComputeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
+ WantInteger, RecursionSet, CxtI)) {
for (auto &C : Conds) {
Constant *Cond = C.first;
@@ -923,7 +927,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
}
// If all else fails, see if LVI can figure out a constant value for us.
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -942,7 +946,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
/// Since we can pick an arbitrary destination, we pick the successor with the
/// fewest predecessors. This should reduce the in-degree of the others.
static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
- TerminatorInst *BBTerm = BB->getTerminator();
+ Instruction *BBTerm = BB->getTerminator();
unsigned MinSucc = 0;
BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
// Compute the successor with the minimum number of predecessors.
@@ -974,7 +978,7 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) {
bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// If the block is trivially dead, just return and let the caller nuke it.
// This simplifies other transformations.
- if (DDT->pendingDeletedBB(BB) ||
+ if (DTU->isBBPendingDeletion(BB) ||
(pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
return false;
@@ -983,15 +987,15 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
- const TerminatorInst *TI = SinglePred->getTerminator();
- if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
+ const Instruction *TI = SinglePred->getTerminator();
+ if (!TI->isExceptionalTerminator() && TI->getNumSuccessors() == 1 &&
SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
LVI->eraseBlock(SinglePred);
- MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT);
+ MergeBasicBlockIntoOnlyPred(BB, DTU);
// Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
// BB code within one basic block `BB`), we need to invalidate the LVI
@@ -1075,7 +1079,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
std::vector<DominatorTree::UpdateType> Updates;
// Fold the branch/switch.
- TerminatorInst *BBTerm = BB->getTerminator();
+ Instruction *BBTerm = BB->getTerminator();
Updates.reserve(BBTerm->getNumSuccessors());
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
@@ -1088,7 +1092,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
return true;
}
@@ -1100,7 +1104,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
<< "' folding terminator: " << *BB->getTerminator()
<< '\n');
++NumFolds;
- ConstantFoldTerminator(BB, true, nullptr, DDT);
+ ConstantFoldTerminator(BB, true, nullptr, DTU);
return true;
}
@@ -1127,7 +1131,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// threading is concerned.
assert(CondBr->isConditional() && "Threading on unconditional terminator");
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -1156,7 +1160,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
ConstantInt::getFalse(CondCmp->getType());
ReplaceFoldableUses(CondCmp, CI);
}
- DDT->deleteEdge(BB, ToRemoveSucc);
+ DTU->deleteEdgeRelaxed(BB, ToRemoveSucc);
return true;
}
@@ -1167,6 +1171,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
}
}
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ TryToUnfoldSelect(SI, BB);
+
// Check for some cases that are worth simplifying. Right now we want to look
// for loads that are used by a switch or by the condition for the branch. If
// we see one, check to see if it's partially redundant. If so, insert a PHI
@@ -1240,7 +1247,7 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
RemoveSucc->removePredecessor(BB);
BranchInst::Create(KeepSucc, BI);
BI->eraseFromParent();
- DDT->deleteEdge(BB, RemoveSucc);
+ DTU->deleteEdgeRelaxed(BB, RemoveSucc);
return true;
}
CurrentBB = CurrentPred;
@@ -1296,7 +1303,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
if (IsLoadCSE) {
LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
- combineMetadataForCSE(NLoadI, LoadI);
+ combineMetadataForCSE(NLoadI, LoadI, false);
};
// If the returned value is the load itself, replace with an undef. This can
@@ -1486,7 +1493,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
}
for (LoadInst *PredLoadI : CSELoads) {
- combineMetadataForCSE(PredLoadI, LoadI);
+ combineMetadataForCSE(PredLoadI, LoadI, true);
}
LoadI->replaceAllUsesWith(PN);
@@ -1544,7 +1551,7 @@ FindMostPopularDest(BasicBlock *BB,
// successor list.
if (!SamePopularity.empty()) {
SamePopularity.push_back(MostPopularDest);
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
for (unsigned i = 0; ; ++i) {
assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
@@ -1664,10 +1671,10 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
}
// Finally update the terminator.
- TerminatorInst *Term = BB->getTerminator();
+ Instruction *Term = BB->getTerminator();
BranchInst::Create(OnlyDest, Term);
Term->eraseFromParent();
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
// If the condition is now dead due to the removal of the old terminator,
// erase it.
@@ -1945,7 +1952,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
<< "' with cost: " << JumpThreadCost
<< ", across block:\n " << *BB << "\n");
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -1974,7 +1981,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
- for (; !isa<TerminatorInst>(BI); ++BI) {
+ for (; !BI->isTerminator(); ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
@@ -2001,7 +2008,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// Update the terminator of PredBB to jump to NewBB instead of BB. This
// eliminates predecessors from BB, which requires us to simplify any PHI
// nodes in BB.
- TerminatorInst *PredTerm = PredBB->getTerminator();
+ Instruction *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
BB->removePredecessor(PredBB, true);
@@ -2009,7 +2016,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
}
// Enqueue required DT updates.
- DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
+ DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
{DominatorTree::Insert, PredBB, NewBB},
{DominatorTree::Delete, PredBB, BB}});
@@ -2105,12 +2112,12 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
return NewBBs[0];
}
bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
assert(TI->getNumSuccessors() > 1 && "not a split");
MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
@@ -2378,12 +2385,78 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
++NumDupes;
return true;
}
+// Pred is a predecessor of BB with an unconditional branch to BB. SI is
+// a Select instruction in Pred. BB has other predecessors and SI is used in
+// a PHI node in BB. SI has no other use.
+// A new basic block, NewBB, is created and SI is converted to compare and
+// conditional branch. SI is erased from parent.
+void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
+ SelectInst *SI, PHINode *SIUse,
+ unsigned Idx) {
+ // Expand the select.
+ //
+ // Pred --
+ // | v
+ // | NewBB
+ // | |
+ // |-----
+ // v
+ // BB
+ BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
+ BB->getParent(), BB);
+ // Move the unconditional branch to NewBB.
+ PredTerm->removeFromParent();
+ NewBB->getInstList().insert(NewBB->end(), PredTerm);
+ // Create a conditional branch and update PHI nodes.
+ BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
+ SIUse->setIncomingValue(Idx, SI->getFalseValue());
+ SIUse->addIncoming(SI->getTrueValue(), NewBB);
+
+ // The select is now dead.
+ SI->eraseFromParent();
+ DTU->applyUpdates({{DominatorTree::Insert, NewBB, BB},
+ {DominatorTree::Insert, Pred, NewBB}});
+
+ // Update any other PHI nodes in BB.
+ for (BasicBlock::iterator BI = BB->begin();
+ PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
+ if (Phi != SIUse)
+ Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
+}
+
+bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
+ PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
+
+ if (!CondPHI || CondPHI->getParent() != BB)
+ return false;
+
+ for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
+ BasicBlock *Pred = CondPHI->getIncomingBlock(I);
+ SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
+
+ // The second and third condition can be potentially relaxed. Currently
+ // the conditions help to simplify the code and allow us to reuse existing
+ // code, developed for TryToUnfoldSelect(CmpInst *, BasicBlock *)
+ if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
+ continue;
+
+ BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (!PredTerm || !PredTerm->isUnconditional())
+ continue;
+
+ UnfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
+ return true;
+ }
+ return false;
+}
+
/// TryToUnfoldSelect - Look for blocks of the form
/// bb1:
/// %a = select
@@ -2421,7 +2494,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
// Now check if one of the select values would allow us to constant fold the
// terminator in BB. We don't do the transform if both sides fold, those
// cases will be threaded in any case.
- if (DDT->pending())
+ if (DTU->hasPendingDomTreeUpdates())
LVI->disableDT();
else
LVI->enableDT();
@@ -2434,34 +2507,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
if ((LHSFolds != LazyValueInfo::Unknown ||
RHSFolds != LazyValueInfo::Unknown) &&
LHSFolds != RHSFolds) {
- // Expand the select.
- //
- // Pred --
- // | v
- // | NewBB
- // | |
- // |-----
- // v
- // BB
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
- BB->getParent(), BB);
- // Move the unconditional branch to NewBB.
- PredTerm->removeFromParent();
- NewBB->getInstList().insert(NewBB->end(), PredTerm);
- // Create a conditional branch and update PHI nodes.
- BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
- CondLHS->setIncomingValue(I, SI->getFalseValue());
- CondLHS->addIncoming(SI->getTrueValue(), NewBB);
- // The select is now dead.
- SI->eraseFromParent();
-
- DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB},
- {DominatorTree::Insert, Pred, NewBB}});
- // Update any other PHI nodes in BB.
- for (BasicBlock::iterator BI = BB->begin();
- PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
- if (Phi != CondLHS)
- Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
+ UnfoldSelectInstr(Pred, BB, SI, CondLHS, I);
return true;
}
}
@@ -2533,7 +2579,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
if (!SI)
continue;
// Expand the select.
- TerminatorInst *Term =
+ Instruction *Term =
SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
BasicBlock *SplitBB = SI->getParent();
BasicBlock *NewBB = Term->getParent();
@@ -2548,12 +2594,12 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
Updates.push_back({DominatorTree::Insert, BB, SplitBB});
Updates.push_back({DominatorTree::Insert, BB, NewBB});
Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
- // BB's successors were moved to SplitBB, update DDT accordingly.
+ // BB's successors were moved to SplitBB, update DTU accordingly.
for (auto *Succ : successors(SplitBB)) {
Updates.push_back({DominatorTree::Delete, BB, Succ});
Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
}
- DDT->applyUpdates(Updates);
+ DTU->applyUpdates(Updates);
return true;
}
return false;
@@ -2603,9 +2649,8 @@ bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
for (auto &I : *BB)
- if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
- if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
- return true;
+ if (isGuard(&I) && ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
+ return true;
return false;
}
@@ -2651,28 +2696,16 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
// Duplicate all instructions before the guard and the guard itself to the
// branch where implication is not proved.
BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween(
- BB, PredGuardedBlock, AfterGuard, GuardedMapping);
+ BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
assert(GuardedBlock && "Could not create the guarded block?");
// Duplicate all instructions before the guard in the unguarded branch.
// Since we have successfully duplicated the guarded block and this block
// has fewer instructions, we expect it to succeed.
BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween(
- BB, PredUnguardedBlock, Guard, UnguardedMapping);
+ BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
assert(UnguardedBlock && "Could not create the unguarded block?");
LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
<< GuardedBlock->getName() << "\n");
- // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between
- // PredBB and BB. We need to perform two inserts and one delete for each of
- // the above calls to update Dominators.
- DDT->applyUpdates(
- {// Guarded block split.
- {DominatorTree::Delete, PredGuardedBlock, BB},
- {DominatorTree::Insert, PredGuardedBlock, GuardedBlock},
- {DominatorTree::Insert, GuardedBlock, BB},
- // Unguarded block split.
- {DominatorTree::Delete, PredUnguardedBlock, BB},
- {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock},
- {DominatorTree::Insert, UnguardedBlock, BB}});
// Some instructions before the guard may still have uses. For them, we need
// to create Phi nodes merging their copies in both guarded and unguarded
// branches. Those instructions that have no uses can be just removed.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index c4ea43a43249..d204654c3915 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -31,6 +31,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
@@ -38,16 +39,18 @@
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -58,6 +61,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -65,6 +69,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@@ -73,6 +78,8 @@ using namespace llvm;
#define DEBUG_TYPE "licm"
+STATISTIC(NumCreatedBlocks, "Number of blocks created");
+STATISTIC(NumClonedBranches, "Number of branches cloned");
STATISTIC(NumSunk, "Number of instructions sunk out of loop");
STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
@@ -84,51 +91,81 @@ static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
cl::desc("Disable memory promotion in LICM pass"));
+static cl::opt<bool> ControlFlowHoisting(
+ "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
+ cl::desc("Enable control flow (and PHI) hoisting in LICM"));
+
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));
+// Default value of zero implies we use the regular alias set tracker mechanism
+// instead of the cross product using AA to identify aliasing of the memory
+// location we are interested in.
+static cl::opt<int>
+LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
+ cl::desc("How many instruction to cross product using AA"));
+
+// Experimental option to allow imprecision in LICM (use MemorySSA cap) in
+// pathological cases, in exchange for faster compile. This is to be removed
+// if MemorySSA starts to address the same issue. This flag applies only when
+// LICM uses MemorySSA instead on AliasSetTracker. When the flag is disabled
+// (default), LICM calls MemorySSAWalker's getClobberingMemoryAccess, which
+// gets perfect accuracy. When flag is enabled, LICM will call into MemorySSA's
+// getDefiningAccess, which may not be precise, since optimizeUses is capped.
+static cl::opt<bool> EnableLicmCap(
+ "enable-licm-cap", cl::init(false), cl::Hidden,
+ cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in "
+ "pathological cases, in exchange for faster compile"));
+
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
TargetTransformInfo *TTI, bool &FreeInLoop);
-static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE);
+static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
+ BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE, bool FreeInLoop);
+ const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
+ bool FreeInLoop);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE,
const Instruction *CtxI = nullptr);
-static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
- const AAMDNodes &AAInfo,
- AliasSetTracker *CurAST);
-static Instruction *
-CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
- const LoopInfo *LI,
- const LoopSafetyInfo *SafetyInfo);
+static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
+ AliasSetTracker *CurAST, Loop *CurLoop,
+ AliasAnalysis *AA);
+static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
+ Loop *CurLoop);
+static Instruction *CloneInstructionInExitBlock(
+ Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
+ const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
+
+static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
+ AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
+
+static void moveInstructionBefore(Instruction &I, Instruction &Dest,
+ ICFLoopSafetyInfo &SafetyInfo);
namespace {
struct LoopInvariantCodeMotion {
+ using ASTrackerMapTy = DenseMap<Loop *, std::unique_ptr<AliasSetTracker>>;
bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE, bool DeleteAST);
- DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
- return LoopToAliasSetMap;
- }
+ ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
private:
- DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap;
+ ASTrackerMapTy LoopToAliasSetMap;
- AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
- AliasAnalysis *AA);
+ std::unique_ptr<AliasSetTracker>
+ collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
};
struct LegacyLICMPass : public LoopPass {
@@ -142,8 +179,6 @@ struct LegacyLICMPass : public LoopPass {
// If we have run LICM on a previous loop but now we are skipping
// (because we've hit the opt-bisect limit), we need to clear the
// loop alias information.
- for (auto &LTAS : LICM.getLoopToAliasSetMap())
- delete LTAS.second;
LICM.getLoopToAliasSetMap().clear();
return false;
}
@@ -173,8 +208,10 @@ struct LegacyLICMPass : public LoopPass {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (EnableMSSALoopDependency)
+ if (EnableMSSALoopDependency) {
AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
}
@@ -254,14 +291,22 @@ bool LoopInvariantCodeMotion::runOnLoop(
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
- AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);
+ std::unique_ptr<AliasSetTracker> CurAST;
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (!MSSA) {
+ LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
+ CurAST = collectAliasInfoForLoop(L, LI, AA);
+ } else {
+ LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n");
+ MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+ }
// Get the preheader block to move instructions into...
BasicBlock *Preheader = L->getLoopPreheader();
// Compute loop safety information.
- LoopSafetyInfo SafetyInfo;
- computeLoopSafetyInfo(&SafetyInfo, L);
+ ICFLoopSafetyInfo SafetyInfo(DT);
+ SafetyInfo.computeLoopSafetyInfo(L);
// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of
@@ -275,10 +320,10 @@ bool LoopInvariantCodeMotion::runOnLoop(
//
if (L->hasDedicatedExits())
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
- CurAST, &SafetyInfo, ORE);
+ CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
- CurAST, &SafetyInfo, ORE);
+ CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -307,27 +352,30 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool Promoted = false;
- // Loop over all of the alias sets in the tracker object.
- for (AliasSet &AS : *CurAST) {
- // We can promote this alias set if it has a store, if it is a "Must"
- // alias set, if the pointer is loop invariant, and if we are not
- // eliminating any volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue()))
- continue;
-
- assert(
- !AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
-
- SmallSetVector<Value *, 8> PointerMustAliases;
- for (const auto &ASI : AS)
- PointerMustAliases.insert(ASI.getValue());
-
- Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks,
- InsertPts, PIC, LI, DT, TLI, L,
- CurAST, &SafetyInfo, ORE);
+ if (CurAST.get()) {
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSet &AS : *CurAST) {
+ // We can promote this alias set if it has a store, if it is a "Must"
+ // alias set, if the pointer is loop invariant, and if we are not
+ // eliminating any volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ !L->isLoopInvariant(AS.begin()->getValue()))
+ continue;
+
+ assert(
+ !AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+
+ SmallSetVector<Value *, 8> PointerMustAliases;
+ for (const auto &ASI : AS)
+ PointerMustAliases.insert(ASI.getValue());
+
+ Promoted |= promoteLoopAccessesToScalars(
+ PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
+ CurAST.get(), &SafetyInfo, ORE);
+ }
}
+ // FIXME: Promotion initially disabled when using MemorySSA.
// Once we have promoted values across the loop body we have to
// recursively reform LCSSA as any nested loop may now have values defined
@@ -351,10 +399,11 @@ bool LoopInvariantCodeMotion::runOnLoop(
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
- if (L->getParentLoop() && !DeleteAST)
- LoopToAliasSetMap[L] = CurAST;
- else
- delete CurAST;
+ if (CurAST.get() && L->getParentLoop() && !DeleteAST)
+ LoopToAliasSetMap[L] = std::move(CurAST);
+
+ if (MSSAU.get() && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
if (Changed && SE)
SE->forgetLoopDispositions(L);
@@ -369,13 +418,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI,
TargetTransformInfo *TTI, Loop *CurLoop,
- AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+ AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
- "Unexpected input to sinkRegion");
+ CurLoop != nullptr && SafetyInfo != nullptr &&
+ "Unexpected input to sinkRegion.");
+ assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
+ "Either AliasSetTracker or MemorySSA should be initialized.");
// We want to visit children before parents. We will enque all the parents
// before their children in the worklist and process the worklist in reverse
@@ -399,8 +451,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
salvageDebugInfo(I);
++II;
- CurAST->deleteValue(&I);
- I.eraseFromParent();
+ eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
Changed = true;
continue;
}
@@ -412,21 +463,252 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
//
bool FreeInLoop = false;
if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
- if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+ !I.mayHaveSideEffects()) {
+ if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
if (!FreeInLoop) {
++II;
- CurAST->deleteValue(&I);
- I.eraseFromParent();
+ eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
}
Changed = true;
}
}
}
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
return Changed;
}
+namespace {
+// This is a helper class for hoistRegion to make it able to hoist control flow
+// in order to be able to hoist phis. The way this works is that we initially
+// start hoisting to the loop preheader, and when we see a loop invariant branch
+// we make note of this. When we then come to hoist an instruction that's
+// conditional on such a branch we duplicate the branch and the relevant control
+// flow, then hoist the instruction into the block corresponding to its original
+// block in the duplicated control flow.
+class ControlFlowHoister {
+private:
+ // Information about the loop we are hoisting from
+ LoopInfo *LI;
+ DominatorTree *DT;
+ Loop *CurLoop;
+ MemorySSAUpdater *MSSAU;
+
+ // A map of blocks in the loop to the block their instructions will be hoisted
+ // to.
+ DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
+
+ // The branches that we can hoist, mapped to the block that marks a
+ // convergence point of their control flow.
+ DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
+
+public:
+ ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
+ MemorySSAUpdater *MSSAU)
+ : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
+
+ void registerPossiblyHoistableBranch(BranchInst *BI) {
+ // We can only hoist conditional branches with loop invariant operands.
+ if (!ControlFlowHoisting || !BI->isConditional() ||
+ !CurLoop->hasLoopInvariantOperands(BI))
+ return;
+
+ // The branch destinations need to be in the loop, and we don't gain
+ // anything by duplicating conditional branches with duplicate successors,
+ // as it's essentially the same as an unconditional branch.
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = BI->getSuccessor(1);
+ if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
+ TrueDest == FalseDest)
+ return;
+
+ // We can hoist BI if one branch destination is the successor of the other,
+ // or both have common successor which we check by seeing if the
+ // intersection of their successors is non-empty.
+ // TODO: This could be expanded to allowing branches where both ends
+ // eventually converge to a single block.
+ SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
+ TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
+ FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
+ BasicBlock *CommonSucc = nullptr;
+ if (TrueDestSucc.count(FalseDest)) {
+ CommonSucc = FalseDest;
+ } else if (FalseDestSucc.count(TrueDest)) {
+ CommonSucc = TrueDest;
+ } else {
+ set_intersect(TrueDestSucc, FalseDestSucc);
+ // If there's one common successor use that.
+ if (TrueDestSucc.size() == 1)
+ CommonSucc = *TrueDestSucc.begin();
+ // If there's more than one pick whichever appears first in the block list
+ // (we can't use the value returned by TrueDestSucc.begin() as it's
+ // unpredicatable which element gets returned).
+ else if (!TrueDestSucc.empty()) {
+ Function *F = TrueDest->getParent();
+ auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
+ auto It = std::find_if(F->begin(), F->end(), IsSucc);
+ assert(It != F->end() && "Could not find successor in function");
+ CommonSucc = &*It;
+ }
+ }
+ // The common successor has to be dominated by the branch, as otherwise
+ // there will be some other path to the successor that will not be
+ // controlled by this branch so any phi we hoist would be controlled by the
+ // wrong condition. This also takes care of avoiding hoisting of loop back
+ // edges.
+ // TODO: In some cases this could be relaxed if the successor is dominated
+ // by another block that's been hoisted and we can guarantee that the
+ // control flow has been replicated exactly.
+ if (CommonSucc && DT->dominates(BI, CommonSucc))
+ HoistableBranches[BI] = CommonSucc;
+ }
+
+ bool canHoistPHI(PHINode *PN) {
+ // The phi must have loop invariant operands.
+ if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
+ return false;
+ // We can hoist phis if the block they are in is the target of hoistable
+ // branches which cover all of the predecessors of the block.
+ SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
+ BasicBlock *BB = PN->getParent();
+ for (BasicBlock *PredBB : predecessors(BB))
+ PredecessorBlocks.insert(PredBB);
+ // If we have less predecessor blocks than predecessors then the phi will
+ // have more than one incoming value for the same block which we can't
+ // handle.
+ // TODO: This could be handled be erasing some of the duplicate incoming
+ // values.
+ if (PredecessorBlocks.size() != pred_size(BB))
+ return false;
+ for (auto &Pair : HoistableBranches) {
+ if (Pair.second == BB) {
+ // Which blocks are predecessors via this branch depends on if the
+ // branch is triangle-like or diamond-like.
+ if (Pair.first->getSuccessor(0) == BB) {
+ PredecessorBlocks.erase(Pair.first->getParent());
+ PredecessorBlocks.erase(Pair.first->getSuccessor(1));
+ } else if (Pair.first->getSuccessor(1) == BB) {
+ PredecessorBlocks.erase(Pair.first->getParent());
+ PredecessorBlocks.erase(Pair.first->getSuccessor(0));
+ } else {
+ PredecessorBlocks.erase(Pair.first->getSuccessor(0));
+ PredecessorBlocks.erase(Pair.first->getSuccessor(1));
+ }
+ }
+ }
+ // PredecessorBlocks will now be empty if for every predecessor of BB we
+ // found a hoistable branch source.
+ return PredecessorBlocks.empty();
+ }
+
+ BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
+ if (!ControlFlowHoisting)
+ return CurLoop->getLoopPreheader();
+ // If BB has already been hoisted, return that
+ if (HoistDestinationMap.count(BB))
+ return HoistDestinationMap[BB];
+
+ // Check if this block is conditional based on a pending branch
+ auto HasBBAsSuccessor =
+ [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
+ return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
+ Pair.first->getSuccessor(1) == BB);
+ };
+ auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
+ HasBBAsSuccessor);
+
+ // If not involved in a pending branch, hoist to preheader
+ BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
+ if (It == HoistableBranches.end()) {
+ LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
+ << " as hoist destination for " << BB->getName()
+ << "\n");
+ HoistDestinationMap[BB] = InitialPreheader;
+ return InitialPreheader;
+ }
+ BranchInst *BI = It->first;
+ assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
+ HoistableBranches.end() &&
+ "BB is expected to be the target of at most one branch");
+
+ LLVMContext &C = BB->getContext();
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = BI->getSuccessor(1);
+ BasicBlock *CommonSucc = HoistableBranches[BI];
+ BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
+
+ // Create hoisted versions of blocks that currently don't have them
+ auto CreateHoistedBlock = [&](BasicBlock *Orig) {
+ if (HoistDestinationMap.count(Orig))
+ return HoistDestinationMap[Orig];
+ BasicBlock *New =
+ BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
+ HoistDestinationMap[Orig] = New;
+ DT->addNewBlock(New, HoistTarget);
+ if (CurLoop->getParentLoop())
+ CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
+ ++NumCreatedBlocks;
+ LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
+ << " as hoist destination for " << Orig->getName()
+ << "\n");
+ return New;
+ };
+ BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
+ BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
+ BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
+
+ // Link up these blocks with branches.
+ if (!HoistCommonSucc->getTerminator()) {
+ // The new common successor we've generated will branch to whatever that
+ // hoist target branched to.
+ BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
+ assert(TargetSucc && "Expected hoist target to have a single successor");
+ HoistCommonSucc->moveBefore(TargetSucc);
+ BranchInst::Create(TargetSucc, HoistCommonSucc);
+ }
+ if (!HoistTrueDest->getTerminator()) {
+ HoistTrueDest->moveBefore(HoistCommonSucc);
+ BranchInst::Create(HoistCommonSucc, HoistTrueDest);
+ }
+ if (!HoistFalseDest->getTerminator()) {
+ HoistFalseDest->moveBefore(HoistCommonSucc);
+ BranchInst::Create(HoistCommonSucc, HoistFalseDest);
+ }
+
+ // If BI is being cloned to what was originally the preheader then
+ // HoistCommonSucc will now be the new preheader.
+ if (HoistTarget == InitialPreheader) {
+ // Phis in the loop header now need to use the new preheader.
+ InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
+ if (MSSAU)
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
+ // The new preheader dominates the loop header.
+ DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
+ DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
+ DT->changeImmediateDominator(HeaderNode, PreheaderNode);
+ // The preheader hoist destination is now the new preheader, with the
+ // exception of the hoist destination of this branch.
+ for (auto &Pair : HoistDestinationMap)
+ if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
+ Pair.second = HoistCommonSucc;
+ }
+
+ // Now finally clone BI.
+ ReplaceInstWithInst(
+ HoistTarget->getTerminator(),
+ BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
+ ++NumClonedBranches;
+
+ assert(CurLoop->getLoopPreheader() &&
+ "Hoisting blocks should not have destroyed preheader");
+ return HoistDestinationMap[BB];
+ }
+};
+} // namespace
+
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
@@ -434,30 +716,34 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
///
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+ AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
- "Unexpected input to hoistRegion");
-
- // We want to visit parents before children. We will enque all the parents
- // before their children in the worklist and process the worklist in order.
- SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
-
+ CurLoop != nullptr && SafetyInfo != nullptr &&
+ "Unexpected input to hoistRegion.");
+ assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
+ "Either AliasSetTracker or MemorySSA should be initialized.");
+
+ ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
+
+ // Keep track of instructions that have been hoisted, as they may need to be
+ // re-hoisted if they end up not dominating all of their uses.
+ SmallVector<Instruction *, 16> HoistedInstructions;
+
+ // For PHI hoisting to work we need to hoist blocks before their successors.
+ // We can do this by iterating through the blocks in the loop in reverse
+ // post-order.
+ LoopBlocksRPO Worklist(CurLoop);
+ Worklist.perform(LI);
bool Changed = false;
- for (DomTreeNode *DTN : Worklist) {
- BasicBlock *BB = DTN->getBlock();
+ for (BasicBlock *BB : Worklist) {
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
if (inSubLoop(BB, CurLoop, LI))
continue;
- // Keep track of whether the prefix of instructions visited so far are such
- // that the next instruction visited is guaranteed to execute if the loop
- // is entered.
- bool IsMustExecute = CurLoop->getHeader() == BB;
-
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
Instruction &I = *II++;
// Try constant folding this instruction. If all the operands are
@@ -467,12 +753,12 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
&I, I.getModule()->getDataLayout(), TLI)) {
LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
<< '\n');
- CurAST->copyValue(&I, C);
+ if (CurAST)
+ CurAST->copyValue(&I, C);
+ // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
I.replaceAllUsesWith(C);
- if (isInstructionTriviallyDead(&I, TLI)) {
- CurAST->deleteValue(&I);
- I.eraseFromParent();
- }
+ if (isInstructionTriviallyDead(&I, TLI))
+ eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
Changed = true;
continue;
}
@@ -480,14 +766,18 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// Try hoisting the instruction out to the preheader. We can only do
// this if all of the operands of the instruction are loop invariant and
// if it is safe to hoist the instruction.
- //
+ // TODO: It may be safe to hoist if we are hoisting to a conditional block
+ // and we have accurately duplicated the control flow from the loop header
+ // to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) &&
- (IsMustExecute ||
- isSafeToExecuteUnconditionally(
- I, DT, CurLoop, SafetyInfo, ORE,
- CurLoop->getLoopPreheader()->getTerminator()))) {
- Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE);
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+ isSafeToExecuteUnconditionally(
+ I, DT, CurLoop, SafetyInfo, ORE,
+ CurLoop->getLoopPreheader()->getTerminator())) {
+ hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+ MSSAU, ORE);
+ HoistedInstructions.push_back(&I);
+ Changed = true;
continue;
}
@@ -500,24 +790,101 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
+ SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
ReciprocalDivisor->insertBefore(&I);
auto Product =
BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
Product->setFastMathFlags(I.getFastMathFlags());
+ SafetyInfo->insertInstructionTo(Product, I.getParent());
Product->insertAfter(&I);
I.replaceAllUsesWith(Product);
- I.eraseFromParent();
+ eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
- hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
+ hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
+ SafetyInfo, MSSAU, ORE);
+ HoistedInstructions.push_back(ReciprocalDivisor);
Changed = true;
continue;
}
- if (IsMustExecute)
- IsMustExecute = isGuaranteedToTransferExecutionToSuccessor(&I);
+ using namespace PatternMatch;
+ if (((I.use_empty() &&
+ match(&I, m_Intrinsic<Intrinsic::invariant_start>())) ||
+ isGuard(&I)) &&
+ CurLoop->hasLoopInvariantOperands(&I) &&
+ SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+ SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
+ hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+ MSSAU, ORE);
+ HoistedInstructions.push_back(&I);
+ Changed = true;
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+ if (CFH.canHoistPHI(PN)) {
+ // Redirect incoming blocks first to ensure that we create hoisted
+ // versions of those blocks before we hoist the phi.
+ for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
+ PN->setIncomingBlock(
+ i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
+ hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+ MSSAU, ORE);
+ assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
+ Changed = true;
+ continue;
+ }
+ }
+
+ // Remember possibly hoistable branches so we can actually hoist them
+ // later if needed.
+ if (BranchInst *BI = dyn_cast<BranchInst>(&I))
+ CFH.registerPossiblyHoistableBranch(BI);
+ }
+ }
+
+ // If we hoisted instructions to a conditional block they may not dominate
+ // their uses that weren't hoisted (such as phis where some operands are not
+ // loop invariant). If so make them unconditional by moving them to their
+ // immediate dominator. We iterate through the instructions in reverse order
+ // which ensures that when we rehoist an instruction we rehoist its operands,
+ // and also keep track of where in the block we are rehoisting to to make sure
+ // that we rehoist instructions before the instructions that use them.
+ Instruction *HoistPoint = nullptr;
+ if (ControlFlowHoisting) {
+ for (Instruction *I : reverse(HoistedInstructions)) {
+ if (!llvm::all_of(I->uses(),
+ [&](Use &U) { return DT->dominates(I, U); })) {
+ BasicBlock *Dominator =
+ DT->getNode(I->getParent())->getIDom()->getBlock();
+ if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
+ if (HoistPoint)
+ assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
+ "New hoist point expected to dominate old hoist point");
+ HoistPoint = Dominator->getTerminator();
+ }
+ LLVM_DEBUG(dbgs() << "LICM rehoisting to "
+ << HoistPoint->getParent()->getName()
+ << ": " << *I << "\n");
+ moveInstructionBefore(*I, *HoistPoint, *SafetyInfo);
+ HoistPoint = I;
+ Changed = true;
+ }
}
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ // Now that we've finished hoisting make sure that LI and DT are still
+ // valid.
+#ifndef NDEBUG
+ if (Changed) {
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
+ "Dominator tree verification failed");
+ LI->verify(*DT);
+ }
+#endif
return Changed;
}
@@ -575,13 +942,68 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
return false;
}
+namespace {
+/// Return true if-and-only-if we know how to (mechanically) both hoist and
+/// sink a given instruction out of a loop. Does not address legality
+/// concerns such as aliasing or speculation safety.
+bool isHoistableAndSinkableInst(Instruction &I) {
+ // Only these instructions are hoistable/sinkable.
+ return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
+ isa<CallInst>(I) || isa<FenceInst>(I) ||
+ isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
+ isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
+ isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
+}
+/// Return true if all of the alias sets within this AST are known not to
+/// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
+bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
+ const Loop *L) {
+ if (CurAST) {
+ for (AliasSet &AS : *CurAST) {
+ if (!AS.isForwardingAliasSet() && AS.isMod()) {
+ return false;
+ }
+ }
+ return true;
+ } else { /*MSSAU*/
+ for (auto *BB : L->getBlocks())
+ if (MSSAU->getMemorySSA()->getBlockDefs(BB))
+ return false;
+ return true;
+ }
+}
+
+/// Return true if I is the only Instruction with a MemoryAccess in L.
+bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
+ const MemorySSAUpdater *MSSAU) {
+ for (auto *BB : L->getBlocks())
+ if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
+ int NotAPhi = 0;
+ for (const auto &Acc : *Accs) {
+ if (isa<MemoryPhi>(&Acc))
+ continue;
+ const auto *MUD = cast<MemoryUseOrDef>(&Acc);
+ if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
+ return false;
+ }
+ }
+ return true;
+}
+}
+
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
- LoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU,
+ bool TargetExecutesOncePerLoop,
OptimizationRemarkEmitter *ORE) {
- // SafetyInfo is nullptr if we are checking for sinking from preheader to
- // loop body.
- const bool SinkingToLoopBody = !SafetyInfo;
+ // If we don't understand the instruction, bail early.
+ if (!isHoistableAndSinkableInst(I))
+ return false;
+
+ MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
+
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
@@ -594,23 +1016,20 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (LI->getMetadata(LLVMContext::MD_invariant_load))
return true;
- if (LI->isAtomic() && SinkingToLoopBody)
- return false; // Don't sink unordered atomic loads to loop body.
+ if (LI->isAtomic() && !TargetExecutesOncePerLoop)
+ return false; // Don't risk duplicating unordered loads
// This checks for an invariant.start dominating the load.
if (isLoadInvariantInLoop(LI, DT, CurLoop))
return true;
- // Don't hoist loads which have may-aliased stores in loop.
- uint64_t Size = 0;
- if (LI->getType()->isSized())
- Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
-
- AAMDNodes AAInfo;
- LI->getAAMetadata(AAInfo);
-
- bool Invalidated =
- pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST);
+ bool Invalidated;
+ if (CurAST)
+ Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
+ CurLoop, AA);
+ else
+ Invalidated = pointerInvalidatedByLoopWithMSSA(
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -631,6 +1050,11 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (CI->mayThrow())
return false;
+ using namespace PatternMatch;
+ if (match(CI, m_Intrinsic<Intrinsic::assume>()))
+ // Assumes don't actually alias anything or throw
+ return true;
+
// Handle simple cases by querying alias analysis.
FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == FMRB_DoesNotAccessMemory)
@@ -640,23 +1064,26 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// it's arguments with arbitrary offsets. If we can prove there are no
// writes to this memory in the loop, we can hoist or sink.
if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
+ // TODO: expand to writeable arguments
for (Value *Op : CI->arg_operands())
- if (Op->getType()->isPointerTy() &&
- pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
- AAMDNodes(), CurAST))
- return false;
+ if (Op->getType()->isPointerTy()) {
+ bool Invalidated;
+ if (CurAST)
+ Invalidated = pointerInvalidatedByLoop(
+ MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()),
+ CurAST, CurLoop, AA);
+ else
+ Invalidated = pointerInvalidatedByLoopWithMSSA(
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop);
+ if (Invalidated)
+ return false;
+ }
return true;
}
+
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
- bool FoundMod = false;
- for (AliasSet &AS : *CurAST) {
- if (!AS.isForwardingAliasSet() && AS.isMod()) {
- FoundMod = true;
- break;
- }
- }
- if (!FoundMod)
+ if (isReadOnly(CurAST, MSSAU, CurLoop))
return true;
}
@@ -664,25 +1091,63 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// sink the call.
return false;
+ } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
+ // Fences alias (most) everything to provide ordering. For the moment,
+ // just give up if there are any other memory operations in the loop.
+ if (CurAST) {
+ auto Begin = CurAST->begin();
+ assert(Begin != CurAST->end() && "must contain FI");
+ if (std::next(Begin) != CurAST->end())
+ // constant memory for instance, TODO: handle better
+ return false;
+ auto *UniqueI = Begin->getUniqueInstruction();
+ if (!UniqueI)
+ // other memory op, give up
+ return false;
+ (void)FI; // suppress unused variable warning
+ assert(UniqueI == FI && "AS must contain FI");
+ return true;
+ } else // MSSAU
+ return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
+ } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ if (!SI->isUnordered())
+ return false; // Don't sink/hoist volatile or ordered atomic store!
+
+ // We can only hoist a store that we can prove writes a value which is not
+ // read or overwritten within the loop. For those cases, we fallback to
+ // load store promotion instead. TODO: We can extend this to cases where
+ // there is exactly one write to the location and that write dominates an
+ // arbitrary number of reads in the loop.
+ if (CurAST) {
+ auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
+
+ if (AS.isRef() || !AS.isMustAlias())
+ // Quick exit test, handled by the full path below as well.
+ return false;
+ auto *UniqueI = AS.getUniqueInstruction();
+ if (!UniqueI)
+ // other memory op, give up
+ return false;
+ assert(UniqueI == SI && "AS must contain SI");
+ return true;
+ } else { // MSSAU
+ if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
+ return true;
+ if (!EnableLicmCap) {
+ auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
+ if (MSSA->isLiveOnEntryDef(Source) ||
+ !CurLoop->contains(Source->getBlock()))
+ return true;
+ }
+ return false;
+ }
}
- // Only these instructions are hoistable/sinkable.
- if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
- !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
- !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
- !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
- !isa<InsertValueInst>(I))
- return false;
-
- // If we are checking for sinking from preheader to loop body it will be
- // always safe as there is no speculative execution.
- if (SinkingToLoopBody)
- return true;
+ assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
- // TODO: Plumb the context instruction through to make hoisting and sinking
- // more powerful. Hoisting of loads already works due to the special casing
- // above.
- return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr);
+ // We've established mechanical ability and aliasing, it's up to the caller
+ // to check fault safety
+ return true;
}
/// Returns true if a PHINode is a trivially replaceable with an
@@ -730,7 +1195,7 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
TargetTransformInfo *TTI, bool &FreeInLoop) {
- const auto &BlockColors = SafetyInfo->BlockColors;
+ const auto &BlockColors = SafetyInfo->getBlockColors();
bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
@@ -759,13 +1224,12 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
return true;
}
-static Instruction *
-CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
- const LoopInfo *LI,
- const LoopSafetyInfo *SafetyInfo) {
+static Instruction *CloneInstructionInExitBlock(
+ Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
+ const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
Instruction *New;
if (auto *CI = dyn_cast<CallInst>(&I)) {
- const auto &BlockColors = SafetyInfo->BlockColors;
+ const auto &BlockColors = SafetyInfo->getBlockColors();
// Sinking call-sites need to be handled differently from other
// instructions. The cloned call-site needs a funclet bundle operand
@@ -798,6 +1262,21 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
if (!I.getName().empty())
New->setName(I.getName() + ".le");
+ MemoryAccess *OldMemAcc;
+ if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
+ // Create a new MemoryAccess and let MemorySSA set its defining access.
+ MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
+ New, nullptr, New->getParent(), MemorySSA::Beginning);
+ if (NewMemAcc) {
+ if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
+ MSSAU->insertDef(MemDef, /*RenameUses=*/true);
+ else {
+ auto *MemUse = cast<MemoryUse>(NewMemAcc);
+ MSSAU->insertUse(MemUse);
+ }
+ }
+ }
+
// Build LCSSA PHI nodes for any in-loop operands. Note that this is
// particularly cheap because we can rip off the PHI node that we're
// replacing for the number and blocks of the predecessors.
@@ -820,10 +1299,28 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
return New;
}
+static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
+ AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
+ if (AST)
+ AST->deleteValue(&I);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(&I);
+ SafetyInfo.removeInstruction(&I);
+ I.eraseFromParent();
+}
+
+static void moveInstructionBefore(Instruction &I, Instruction &Dest,
+ ICFLoopSafetyInfo &SafetyInfo) {
+ SafetyInfo.removeInstruction(&I);
+ SafetyInfo.insertInstructionTo(&I, Dest.getParent());
+ I.moveBefore(&Dest);
+}
+
static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
- const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) {
+ const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
+ MemorySSAUpdater *MSSAU) {
assert(isTriviallyReplaceablePHI(*TPN, *I) &&
"Expect only trivially replaceable PHI");
BasicBlock *ExitBlock = TPN->getParent();
@@ -832,8 +1329,8 @@ static Instruction *sinkThroughTriviallyReplaceablePHI(
if (It != SunkCopies.end())
New = It->second;
else
- New = SunkCopies[ExitBlock] =
- CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo);
+ New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
+ *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
return New;
}
@@ -845,7 +1342,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
// it require updating BlockColors for all offspring blocks accordingly. By
// skipping such corner case, we can make updating BlockColors after splitting
// predecessor fairly simple.
- if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad())
+ if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
return false;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *BBPred = *PI;
@@ -857,7 +1354,8 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
LoopInfo *LI, const Loop *CurLoop,
- LoopSafetyInfo *SafetyInfo) {
+ LoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU) {
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
@@ -899,7 +1397,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
// LE:
// %p = phi [%p1, %LE.split], [%p2, %LE.split2]
//
- auto &BlockColors = SafetyInfo->BlockColors;
+ const auto &BlockColors = SafetyInfo->getBlockColors();
SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
while (!PredBBs.empty()) {
BasicBlock *PredBB = *PredBBs.begin();
@@ -907,18 +1405,15 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
"Expect all predecessors are in the loop");
if (PN->getBasicBlockIndex(PredBB) >= 0) {
BasicBlock *NewPred = SplitBlockPredecessors(
- ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
+ ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
// Since we do not allow splitting EH-block with BlockColors in
// canSplitPredecessors(), we can simply assign predecessor's color to
// the new block.
- if (!BlockColors.empty()) {
+ if (!BlockColors.empty())
// Grab a reference to the ColorVector to be inserted before getting the
// reference to the vector we are copying because inserting the new
// element in BlockColors might cause the map to be reallocated.
- ColorVector &ColorsForNewBlock = BlockColors[NewPred];
- ColorVector &ColorsForOldBlock = BlockColors[PredBB];
- ColorsForNewBlock = ColorsForOldBlock;
- }
+ SafetyInfo->copyColors(NewPred, PredBB);
}
PredBBs.remove(PredBB);
}
@@ -930,8 +1425,9 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
/// position, and may either delete it or move it to outside of the loop.
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
+ const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
+ bool FreeInLoop) {
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -983,7 +1479,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// Split predecessors of the PHI so that we can make users trivially
// replaceable.
- splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo);
+ splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
// Should rebuild the iterators, as they may be invalidated by
// splitPredecessorsOfLoopExit().
@@ -1018,10 +1514,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
// The PHI must be trivially replaceable.
- Instruction *New = sinkThroughTriviallyReplaceablePHI(PN, &I, LI, SunkCopies,
- SafetyInfo, CurLoop);
+ Instruction *New = sinkThroughTriviallyReplaceablePHI(
+ PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
PN->replaceAllUsesWith(New);
- PN->eraseFromParent();
+ eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
Changed = true;
}
return Changed;
@@ -1030,11 +1526,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
/// When an instruction is found to only use loop invariant operands that
/// is safe to hoist, this instruction is called to do the dirty work.
///
-static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE) {
- auto *Preheader = CurLoop->getLoopPreheader();
- LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
+static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
+ BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
+ LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
<< "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
@@ -1049,11 +1544,22 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
// The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
// time in isGuaranteedToExecute if we don't actually have anything to
// drop. It is a compile time optimization, not required for correctness.
- !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo))
+ !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
I.dropUnknownNonDebugMetadata();
- // Move the new node to the Preheader, before its terminator.
- I.moveBefore(Preheader->getTerminator());
+ if (isa<PHINode>(I))
+ // Move the new node to the end of the phi list in the destination block.
+ moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo);
+ else
+ // Move the new node to the destination block, before its terminator.
+ moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
+ if (MSSAU) {
+ // If moving, I just moved a load or store, so update MemorySSA.
+ MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&I));
+ if (OldMemAcc)
+ MSSAU->moveToPlace(OldMemAcc, Dest, MemorySSA::End);
+ }
// Do not retain debug locations when we are moving instructions to different
// basic blocks, because we want to avoid jumpy line tables. Calls, however,
@@ -1068,7 +1574,6 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
else if (isa<CallInst>(I))
++NumMovedCalls;
++NumHoisted;
- return true;
}
/// Only sink or hoist an instruction if it is not a trapping instruction,
@@ -1084,7 +1589,7 @@ static bool isSafeToExecuteUnconditionally(Instruction &Inst,
return true;
bool GuaranteedToExecute =
- isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
+ SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
if (!GuaranteedToExecute) {
auto *LI = dyn_cast<LoadInst>(&Inst);
@@ -1113,6 +1618,7 @@ class LoopPromoter : public LoadAndStorePromoter {
int Alignment;
bool UnorderedAtomic;
AAMDNodes AATags;
+ ICFLoopSafetyInfo &SafetyInfo;
Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
if (Instruction *I = dyn_cast<Instruction>(V))
@@ -1135,11 +1641,13 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- bool UnorderedAtomic, const AAMDNodes &AATags)
+ bool UnorderedAtomic, const AAMDNodes &AATags,
+ ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
LI(li), DL(std::move(dl)), Alignment(alignment),
- UnorderedAtomic(UnorderedAtomic), AATags(AATags) {}
+ UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo)
+ {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@@ -1176,7 +1684,10 @@ public:
// Update alias analysis.
AST.copyValue(LI, V);
}
- void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); }
+ void instructionDeleted(Instruction *I) const override {
+ SafetyInfo.removeInstruction(I);
+ AST.deleteValue(I);
+ }
};
@@ -1214,7 +1725,7 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<BasicBlock *> &ExitBlocks,
SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
- Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+ Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
@@ -1277,7 +1788,7 @@ bool llvm::promoteLoopAccessesToScalars(
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
bool IsKnownThreadLocalObject = false;
- if (SafetyInfo->MayThrow) {
+ if (SafetyInfo->anyBlockMayThrow()) {
// If a loop can throw, we have to insert a store along each unwind edge.
// That said, we can't actually make the unwind edge explicit. Therefore,
// we have to prove that the store is dead along the unwind edge. We do
@@ -1310,7 +1821,6 @@ bool llvm::promoteLoopAccessesToScalars(
// If there is an non-load/store instruction in the loop, we can't promote
// it.
if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
- assert(!Load->isVolatile() && "AST broken");
if (!Load->isUnordered())
return false;
@@ -1325,7 +1835,6 @@ bool llvm::promoteLoopAccessesToScalars(
// pointer.
if (UI->getOperand(1) != ASIV)
continue;
- assert(!Store->isVolatile() && "AST broken");
if (!Store->isUnordered())
return false;
@@ -1344,7 +1853,7 @@ bool llvm::promoteLoopAccessesToScalars(
if (!DereferenceableInPH || !SafeToInsertStore ||
(InstAlignment > Alignment)) {
- if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
+ if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
DereferenceableInPH = true;
SafeToInsertStore = true;
Alignment = std::max(Alignment, InstAlignment);
@@ -1435,7 +1944,7 @@ bool llvm::promoteLoopAccessesToScalars(
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, PIC, *CurAST, *LI, DL, Alignment,
- SawUnorderedAtomic, AATags);
+ SawUnorderedAtomic, AATags, *SafetyInfo);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
@@ -1455,7 +1964,7 @@ bool llvm::promoteLoopAccessesToScalars(
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
- PreheaderLoad->eraseFromParent();
+ eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr);
return true;
}
@@ -1466,10 +1975,10 @@ bool llvm::promoteLoopAccessesToScalars(
/// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
/// from scratch for every loop. Hook up with the helper functions when
/// available in the new pass manager to avoid redundant computation.
-AliasSetTracker *
+std::unique_ptr<AliasSetTracker>
LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
AliasAnalysis *AA) {
- AliasSetTracker *CurAST = nullptr;
+ std::unique_ptr<AliasSetTracker> CurAST;
SmallVector<Loop *, 4> RecomputeLoops;
for (Loop *InnerL : L->getSubLoops()) {
auto MapI = LoopToAliasSetMap.find(InnerL);
@@ -1480,35 +1989,30 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
RecomputeLoops.push_back(InnerL);
continue;
}
- AliasSetTracker *InnerAST = MapI->second;
+ std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
- if (CurAST != nullptr) {
+ if (CurAST) {
// What if InnerLoop was modified by other passes ?
- CurAST->add(*InnerAST);
-
// Once we've incorporated the inner loop's AST into ours, we don't need
// the subloop's anymore.
- delete InnerAST;
+ CurAST->add(*InnerAST);
} else {
- CurAST = InnerAST;
+ CurAST = std::move(InnerAST);
}
LoopToAliasSetMap.erase(MapI);
}
- if (CurAST == nullptr)
- CurAST = new AliasSetTracker(*AA);
-
- auto mergeLoop = [&](Loop *L) {
- // Loop over the body of this loop, looking for calls, invokes, and stores.
- for (BasicBlock *BB : L->blocks())
- CurAST->add(*BB); // Incorporate the specified basic block
- };
+ if (!CurAST)
+ CurAST = make_unique<AliasSetTracker>(*AA);
// Add everything from the sub loops that are no longer directly available.
for (Loop *InnerL : RecomputeLoops)
- mergeLoop(InnerL);
+ for (BasicBlock *BB : InnerL->blocks())
+ CurAST->add(*BB);
- // And merge in this loop.
- mergeLoop(L);
+ // And merge in this loop (without anything from inner loops).
+ for (BasicBlock *BB : L->blocks())
+ if (LI->getLoopFor(BB) == L)
+ CurAST->add(*BB);
return CurAST;
}
@@ -1517,42 +2021,89 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
///
void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
Loop *L) {
- AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
- if (!AST)
+ auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
+ if (ASTIt == LICM.getLoopToAliasSetMap().end())
return;
- AST->copyValue(From, To);
+ ASTIt->second->copyValue(From, To);
}
/// Simple Analysis hook. Delete value V from alias set
///
void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
- AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
- if (!AST)
+ auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
+ if (ASTIt == LICM.getLoopToAliasSetMap().end())
return;
- AST->deleteValue(V);
+ ASTIt->second->deleteValue(V);
}
/// Simple Analysis hook. Delete value L from alias set map.
///
void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
- AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
- if (!AST)
+ if (!LICM.getLoopToAliasSetMap().count(L))
return;
- delete AST;
LICM.getLoopToAliasSetMap().erase(L);
}
-/// Return true if the body of this loop may store into the memory
-/// location pointed to by V.
-///
-static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
- const AAMDNodes &AAInfo,
- AliasSetTracker *CurAST) {
- // Check to see if any of the basic blocks in CurLoop invalidate *V.
- return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
+static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
+ AliasSetTracker *CurAST, Loop *CurLoop,
+ AliasAnalysis *AA) {
+ // First check to see if any of the basic blocks in CurLoop invalidate *V.
+ bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
+
+ if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
+ return isInvalidatedAccordingToAST;
+
+ // Check with a diagnostic analysis if we can refine the information above.
+ // This is to identify the limitations of using the AST.
+ // The alias set mechanism used by LICM has a major weakness in that it
+ // combines all things which may alias into a single set *before* asking
+ // modref questions. As a result, a single readonly call within a loop will
+ // collapse all loads and stores into a single alias set and report
+ // invalidation if the loop contains any store. For example, readonly calls
+ // with deopt states have this form and create a general alias set with all
+ // loads and stores. In order to get any LICM in loops containing possible
+ // deopt states we need a more precise invalidation of checking the mod ref
+ // info of each instruction within the loop and LI. This has a complexity of
+ // O(N^2), so currently, it is used only as a diagnostic tool since the
+ // default value of LICMN2Threshold is zero.
+
+ // Don't look at nested loops.
+ if (CurLoop->begin() != CurLoop->end())
+ return true;
+
+ int N = 0;
+ for (BasicBlock *BB : CurLoop->getBlocks())
+ for (Instruction &I : *BB) {
+ if (N >= LICMN2Theshold) {
+ LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
+ << *(MemLoc.Ptr) << "\n");
+ return true;
+ }
+ N++;
+ auto Res = AA->getModRefInfo(&I, MemLoc);
+ if (isModSet(Res)) {
+ LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
+ << *(MemLoc.Ptr) << "\n");
+ return true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
+ return false;
+}
+
+static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
+ Loop *CurLoop) {
+ MemoryAccess *Source;
+ // See declaration of EnableLicmCap for usage details.
+ if (EnableLicmCap)
+ Source = MU->getDefiningAccess();
+ else
+ Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
+ return !MSSA->isLiveOnEntryDef(Source) &&
+ CurLoop->contains(Source->getBlock());
}
/// Little predicate that returns true if the specified basic block is in
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 06083a4f5086..d797c9dc9e72 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -78,6 +78,18 @@ using namespace llvm;
#define LDIST_NAME "loop-distribute"
#define DEBUG_TYPE LDIST_NAME
+/// @{
+/// Metadata attribute names
+static const char *const LLVMLoopDistributeFollowupAll =
+ "llvm.loop.distribute.followup_all";
+static const char *const LLVMLoopDistributeFollowupCoincident =
+ "llvm.loop.distribute.followup_coincident";
+static const char *const LLVMLoopDistributeFollowupSequential =
+ "llvm.loop.distribute.followup_sequential";
+static const char *const LLVMLoopDistributeFollowupFallback =
+ "llvm.loop.distribute.followup_fallback";
+/// @}
+
static cl::opt<bool>
LDistVerify("loop-distribute-verify", cl::Hidden,
cl::desc("Turn on DominatorTree and LoopInfo verification "
@@ -186,7 +198,7 @@ public:
/// Returns the loop where this partition ends up after distribution.
/// If this partition is mapped to the original loop then use the block from
/// the loop.
- const Loop *getDistributedLoop() const {
+ Loop *getDistributedLoop() const {
return ClonedLoop ? ClonedLoop : OrigLoop;
}
@@ -443,6 +455,9 @@ public:
assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
"preheader not empty");
+ // Preserve the original loop ID for use after the transformation.
+ MDNode *OrigLoopID = L->getLoopID();
+
// Create a loop for each partition except the last. Clone the original
// loop before PH along with adding a preheader for the cloned loop. Then
// update PH to point to the newly added preheader.
@@ -457,9 +472,13 @@ public:
Part->getVMap()[ExitBlock] = TopPH;
Part->remapInstructions();
+ setNewLoopID(OrigLoopID, Part);
}
Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
+ // Also set a new loop ID for the last loop.
+ setNewLoopID(OrigLoopID, &PartitionContainer.back());
+
// Now go in forward order and update the immediate dominator for the
// preheaders with the exiting block of the previous loop. Dominance
// within the loop is updated in cloneLoopWithPreheader.
@@ -575,6 +594,19 @@ private:
}
}
}
+
+ /// Assign new LoopIDs for the partition's cloned loop.
+ void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
+ Optional<MDNode *> PartitionID = makeFollowupLoopID(
+ OrigLoopID,
+ {LLVMLoopDistributeFollowupAll,
+ Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
+ : LLVMLoopDistributeFollowupCoincident});
+ if (PartitionID.hasValue()) {
+ Loop *NewLoop = Part->getDistributedLoop();
+ NewLoop->setLoopID(PartitionID.getValue());
+ }
+ }
};
/// For each memory instruction, this class maintains difference of the
@@ -743,6 +775,9 @@ public:
return fail("TooManySCEVRuntimeChecks",
"too many SCEV run-time checks needed.\n");
+ if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
+ return fail("HeuristicDisabled", "distribution heuristic disabled");
+
LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
// We're done forming the partitions set up the reverse mapping from
// instructions to partitions.
@@ -762,6 +797,8 @@ public:
RtPtrChecking);
if (!Pred.isAlwaysTrue() || !Checks.empty()) {
+ MDNode *OrigLoopID = L->getLoopID();
+
LLVM_DEBUG(dbgs() << "\nPointers:\n");
LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
@@ -769,6 +806,17 @@ public:
LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
LVer.versionLoop(DefsUsedOutside);
LVer.annotateLoopWithNoAlias();
+
+ // The unversioned loop will not be changed, so we inherit all attributes
+ // from the original loop, but remove the loop distribution metadata to
+ // avoid to distribute it again.
+ MDNode *UnversionedLoopID =
+ makeFollowupLoopID(OrigLoopID,
+ {LLVMLoopDistributeFollowupAll,
+ LLVMLoopDistributeFollowupFallback},
+ "llvm.loop.distribute.", true)
+ .getValue();
+ LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
}
// Create identical copies of the original loop for each partition and hook
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index eb3188711858..fbffa1920a84 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -26,7 +26,7 @@
// Future floating point idioms to recognize in -ffast-math mode:
// fpowi
// Future integer operation idioms to recognize:
-// ctpop, ctlz, cttz
+// ctpop
//
// Beware that isel's default lowering for ctpop is highly inefficient for
// i64 and larger types when i64 is legal and the value has few bits set. It
@@ -163,8 +163,9 @@ private:
void collectStores(BasicBlock *BB);
LegalStoreKind isLegalStore(StoreInst *SI);
+ enum class ForMemset { No, Yes };
bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
- bool ForMemset);
+ ForMemset For);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
@@ -186,9 +187,10 @@ private:
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
- bool recognizeAndInsertCTLZ();
- void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
- PHINode *CntPhi, Value *Var, Instruction *DefX,
+ bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
+ void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
+ Instruction *CntInst, PHINode *CntPhi,
+ Value *Var, Instruction *DefX,
const DebugLoc &DL, bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop);
@@ -319,9 +321,9 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
// The following transforms hoist stores/memsets into the loop pre-header.
// Give up if the loop has instructions may throw.
- LoopSafetyInfo SafetyInfo;
- computeLoopSafetyInfo(&SafetyInfo, CurLoop);
- if (SafetyInfo.MayThrow)
+ SimpleLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(CurLoop);
+ if (SafetyInfo.anyBlockMayThrow())
return MadeChange;
// Scan all the blocks in the loop that are not in subloops.
@@ -347,6 +349,9 @@ static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16.
static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
+ // FIXME: This could check for UndefValue because it can be merged into any
+ // other valid pattern.
+
// If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
@@ -543,10 +548,10 @@ bool LoopIdiomRecognize::runOnLoopBlock(
// optimized into a memset (memset_pattern). The latter most commonly happens
// with structs and handunrolled loops.
for (auto &SL : StoreRefsForMemset)
- MadeChange |= processLoopStores(SL.second, BECount, true);
+ MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
for (auto &SL : StoreRefsForMemsetPattern)
- MadeChange |= processLoopStores(SL.second, BECount, false);
+ MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
// Optimize the store into a memcpy, if it feeds an similarly strided load.
for (auto &SI : StoreRefsForMemcpy)
@@ -572,10 +577,9 @@ bool LoopIdiomRecognize::runOnLoopBlock(
return MadeChange;
}
-/// processLoopStores - See if this store(s) can be promoted to a memset.
+/// See if this store(s) can be promoted to a memset.
bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
- const SCEV *BECount,
- bool ForMemset) {
+ const SCEV *BECount, ForMemset For) {
// Try to find consecutive stores that can be transformed into memsets.
SetVector<StoreInst *> Heads, Tails;
SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
@@ -602,7 +606,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Value *FirstSplatValue = nullptr;
Constant *FirstPatternValue = nullptr;
- if (ForMemset)
+ if (For == ForMemset::Yes)
FirstSplatValue = isBytewiseValue(FirstStoredVal);
else
FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
@@ -635,7 +639,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Value *SecondSplatValue = nullptr;
Constant *SecondPatternValue = nullptr;
- if (ForMemset)
+ if (For == ForMemset::Yes)
SecondSplatValue = isBytewiseValue(SecondStoredVal);
else
SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
@@ -644,10 +648,14 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
"Expected either splat value or pattern value.");
if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
- if (ForMemset) {
+ if (For == ForMemset::Yes) {
+ if (isa<UndefValue>(FirstSplatValue))
+ FirstSplatValue = SecondSplatValue;
if (FirstSplatValue != SecondSplatValue)
continue;
} else {
+ if (isa<UndefValue>(FirstPatternValue))
+ FirstPatternValue = SecondPatternValue;
if (FirstPatternValue != SecondPatternValue)
continue;
}
@@ -772,12 +780,13 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
- uint64_t AccessSize = MemoryLocation::UnknownSize;
+ LocationSize AccessSize = LocationSize::unknown();
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
+ AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
+ StoreSize);
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -1100,15 +1109,17 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
}
bool LoopIdiomRecognize::runOnNoncountableLoop() {
- return recognizePopcount() || recognizeAndInsertCTLZ();
+ return recognizePopcount() || recognizeAndInsertFFS();
}
/// Check if the given conditional branch is based on the comparison between
-/// a variable and zero, and if the variable is non-zero, the control yields to
-/// the loop entry. If the branch matches the behavior, the variable involved
-/// in the comparison is returned. This function will be called to see if the
-/// precondition and postcondition of the loop are in desirable form.
-static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
+/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
+/// true), the control yields to the loop entry. If the branch matches the
+/// behavior, the variable involved in the comparison is returned. This function
+/// will be called to see if the precondition and postcondition of the loop are
+/// in desirable form.
+static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
+ bool JmpOnZero = false) {
if (!BI || !BI->isConditional())
return nullptr;
@@ -1120,9 +1131,14 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
if (!CmpZero || !CmpZero->isZero())
return nullptr;
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ if (JmpOnZero)
+ std::swap(TrueSucc, FalseSucc);
+
ICmpInst::Predicate Pred = Cond->getPredicate();
- if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
- (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
+ if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
+ (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
return Cond->getOperand(0);
return nullptr;
@@ -1298,14 +1314,14 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
///
/// loop-exit:
/// \endcode
-static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
- Instruction *&CntInst, PHINode *&CntPhi,
- Instruction *&DefX) {
+static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
+ Intrinsic::ID &IntrinID, Value *&InitX,
+ Instruction *&CntInst, PHINode *&CntPhi,
+ Instruction *&DefX) {
BasicBlock *LoopEntry;
Value *VarX = nullptr;
DefX = nullptr;
- PhiX = nullptr;
CntInst = nullptr;
CntPhi = nullptr;
LoopEntry = *(CurLoop->block_begin());
@@ -1317,20 +1333,28 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
else
return false;
- // step 2: detect instructions corresponding to "x.next = x >> 1"
- if (!DefX || (DefX->getOpcode() != Instruction::AShr &&
- DefX->getOpcode() != Instruction::LShr))
+ // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
+ if (!DefX || !DefX->isShift())
return false;
+ IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
+ Intrinsic::ctlz;
ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
if (!Shft || !Shft->isOne())
return false;
VarX = DefX->getOperand(0);
// step 3: Check the recurrence of variable X
- PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
+ PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
if (!PhiX)
return false;
+ InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+
+ // Make sure the initial value can't be negative otherwise the ashr in the
+ // loop might never reach zero which would make the loop infinite.
+ if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
+ return false;
+
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
// TODO: We can skip the step. If loop trip count is known (CTLZ),
// then all uses of "cnt.next" could be optimized to the trip count
@@ -1362,17 +1386,25 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
return true;
}
-/// Recognize CTLZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ trip count).
-/// If CTLZ inserted as a new trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
// Give up if the loop has multiple blocks or multiple backedges.
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
return false;
- Instruction *CntInst, *DefX;
- PHINode *CntPhi, *PhiX;
- if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX))
+ Intrinsic::ID IntrinID;
+ Value *InitX;
+ Instruction *DefX = nullptr;
+ PHINode *CntPhi = nullptr;
+ Instruction *CntInst = nullptr;
+ // Help decide if transformation is profitable. For ShiftUntilZero idiom,
+ // this is always 6.
+ size_t IdiomCanonicalSize = 6;
+
+ if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
+ CntInst, CntPhi, DefX))
return false;
bool IsCntPhiUsedOutsideLoop = false;
@@ -1399,12 +1431,6 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
// It is safe to assume Preheader exist as it was checked in
// parent function RunOnLoop.
BasicBlock *PH = CurLoop->getLoopPreheader();
- Value *InitX = PhiX->getIncomingValueForBlock(PH);
-
- // Make sure the initial value can't be negative otherwise the ashr in the
- // loop might never reach zero which would make the loop infinite.
- if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL))
- return false;
// If we are using the count instruction outside the loop, make sure we
// have a zero check as a precondition. Without the check the loop would run
@@ -1422,8 +1448,10 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
ZeroCheck = true;
}
- // Check if CTLZ intrinsic is profitable. Assume it is always profitable
- // if we delete the loop (the loop has only 6 instructions):
+ // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+ // profitable if we delete the loop.
+
+ // the loop has only 6 instructions:
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
// %shr = ashr %n.addr.0, 1
@@ -1434,12 +1462,12 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
const Value *Args[] =
{InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
: ConstantInt::getFalse(InitX->getContext())};
- if (CurLoop->getHeader()->size() != 6 &&
- TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >
- TargetTransformInfo::TCC_Basic)
+ if (CurLoop->getHeader()->size() != IdiomCanonicalSize &&
+ TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
+ TargetTransformInfo::TCC_Basic)
return false;
- transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX,
+ transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
DefX->getDebugLoc(), ZeroCheck,
IsCntPhiUsedOutsideLoop);
return true;
@@ -1508,20 +1536,21 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
return CI;
}
-static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
- const DebugLoc &DL, bool ZeroCheck) {
+static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+ const DebugLoc &DL, bool ZeroCheck,
+ Intrinsic::ID IID) {
Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
Type *Tys[] = {Val->getType()};
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys);
+ Value *Func = Intrinsic::getDeclaration(M, IID, Tys);
CallInst *CI = IRBuilder.CreateCall(Func, Ops);
CI->setDebugLoc(DL);
return CI;
}
-/// Transform the following loop:
+/// Transform the following loop (Using CTLZ, CTTZ is similar):
/// loop:
/// CntPhi = PHI [Cnt0, CntInst]
/// PhiX = PHI [InitX, DefX]
@@ -1553,19 +1582,19 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
/// If LOOP_BODY is empty the loop will be deleted.
/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
void LoopIdiomRecognize::transformLoopToCountable(
- BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
- Instruction *DefX, const DebugLoc &DL, bool ZeroCheck,
- bool IsCntPhiUsedOutsideLoop) {
+ Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
+ PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
- // Step 1: Insert the CTLZ instruction at the end of the preheader block
- // Count = BitWidth - CTLZ(InitX);
- // If there are uses of CntPhi create:
- // CountPrev = BitWidth - CTLZ(InitX >> 1);
+ // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
IRBuilder<> Builder(PreheaderBr);
Builder.SetCurrentDebugLocation(DL);
- Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
+ Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
+ // Count = BitWidth - CTLZ(InitX);
+ // If there are uses of CntPhi create:
+ // CountPrev = BitWidth - CTLZ(InitX >> 1);
if (IsCntPhiUsedOutsideLoop) {
if (DefX->getOpcode() == Instruction::AShr)
InitXNext =
@@ -1573,29 +1602,30 @@ void LoopIdiomRecognize::transformLoopToCountable(
else if (DefX->getOpcode() == Instruction::LShr)
InitXNext =
Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
+ else if (DefX->getOpcode() == Instruction::Shl) // cttz
+ InitXNext =
+ Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
else
llvm_unreachable("Unexpected opcode!");
} else
InitXNext = InitX;
- CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
+ FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
Count = Builder.CreateSub(
- ConstantInt::get(CTLZ->getType(),
- CTLZ->getType()->getIntegerBitWidth()),
- CTLZ);
+ ConstantInt::get(FFS->getType(),
+ FFS->getType()->getIntegerBitWidth()),
+ FFS);
if (IsCntPhiUsedOutsideLoop) {
CountPrev = Count;
Count = Builder.CreateAdd(
CountPrev,
ConstantInt::get(CountPrev->getType(), 1));
}
- if (IsCntPhiUsedOutsideLoop)
- NewCount = Builder.CreateZExtOrTrunc(CountPrev,
- cast<IntegerType>(CntInst->getType()));
- else
- NewCount = Builder.CreateZExtOrTrunc(Count,
- cast<IntegerType>(CntInst->getType()));
- // If the CTLZ counter's initial value is not zero, insert Add Inst.
+ NewCount = Builder.CreateZExtOrTrunc(
+ IsCntPhiUsedOutsideLoop ? CountPrev : Count,
+ cast<IntegerType>(CntInst->getType()));
+
+ // If the counter's initial value is not zero, insert Add Inst.
Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
if (!InitConst || !InitConst->isZero())
@@ -1631,8 +1661,7 @@ void LoopIdiomRecognize::transformLoopToCountable(
LbCond->setOperand(1, ConstantInt::get(Ty, 0));
// Step 3: All the references to the original counter outside
- // the loop are replaced with the NewCount -- the value returned from
- // __builtin_ctlz(x).
+ // the loop are replaced with the NewCount
if (IsCntPhiUsedOutsideLoop)
CntPhi->replaceUsesOutsideBlock(NewCount, Body);
else
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 71859efbf4bd..6f7dc2429c09 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -22,8 +22,9 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
@@ -36,6 +37,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <utility>
@@ -47,8 +49,8 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of redundant instructions simplified");
static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
- AssumptionCache &AC,
- const TargetLibraryInfo &TLI) {
+ AssumptionCache &AC, const TargetLibraryInfo &TLI,
+ MemorySSAUpdater *MSSAU) {
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
SimplifyQuery SQ(DL, &TLI, &DT, &AC);
@@ -75,9 +77,12 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
// iterate.
LoopBlocksRPO RPOT(&L);
RPOT.perform(&LI);
+ MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
bool Changed = false;
for (;;) {
+ if (MSSAU && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
for (BasicBlock *BB : RPOT) {
for (Instruction &I : *BB) {
if (auto *PI = dyn_cast<PHINode>(&I))
@@ -129,6 +134,12 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
ToSimplify->insert(UserI);
}
+ if (MSSAU)
+ if (Instruction *SimpleI = dyn_cast_or_null<Instruction>(V))
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(&I))
+ if (MemoryAccess *ReplacementMA = MSSA->getMemoryAccess(SimpleI))
+ MA->replaceAllUsesWith(ReplacementMA);
+
assert(I.use_empty() && "Should always have replaced all uses!");
if (isInstructionTriviallyDead(&I, &TLI))
DeadInsts.push_back(&I);
@@ -141,9 +152,12 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
// iteration over all instructions in all the loop blocks.
if (!DeadInsts.empty()) {
Changed = true;
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU);
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
// If we never found a PHI that needs to be simplified in the next
// iteration, we're done.
if (Next->empty())
@@ -180,8 +194,15 @@ public:
*L->getHeader()->getParent());
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ MemorySSA *MSSA = nullptr;
+ Optional<MemorySSAUpdater> MSSAU;
+ if (EnableMSSALoopDependency) {
+ MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSAU = MemorySSAUpdater(MSSA);
+ }
- return simplifyLoopInst(*L, DT, LI, AC, TLI);
+ return simplifyLoopInst(*L, DT, LI, AC, TLI,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -189,6 +210,10 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesCFG();
+ if (EnableMSSALoopDependency) {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
getLoopAnalysisUsage(AU);
}
};
@@ -198,7 +223,13 @@ public:
PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
- if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI))
+ Optional<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA) {
+ MSSAU = MemorySSAUpdater(AR.MSSA);
+ AR.MSSA->verifyMemorySSA();
+ }
+ if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
@@ -212,6 +243,7 @@ INITIALIZE_PASS_BEGIN(LoopInstSimplifyLegacyPass, "loop-instsimplify",
"Simplify instructions in loops", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LoopInstSimplifyLegacyPass, "loop-instsimplify",
"Simplify instructions in loops", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 2978165ed8a9..766e39b439a0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -17,9 +17,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -271,7 +271,7 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
return true;
}
-static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
+static LoopVector populateWorklist(Loop &L) {
LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() << " Loop: %"
<< L.getHeader()->getName() << '\n');
@@ -282,16 +282,15 @@ static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
// The current loop has multiple subloops in it hence it is not tightly
// nested.
// Discard all loops above it added into Worklist.
- if (Vec->size() != 1) {
- LoopList.clear();
- return;
- }
+ if (Vec->size() != 1)
+ return {};
+
LoopList.push_back(CurrentLoop);
CurrentLoop = Vec->front();
Vec = &CurrentLoop->getSubLoops();
}
LoopList.push_back(CurrentLoop);
- V.push_back(std::move(LoopList));
+ return LoopList;
}
static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
@@ -327,10 +326,8 @@ namespace {
class LoopInterchangeLegality {
public:
LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
- LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA,
OptimizationRemarkEmitter *ORE)
- : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
- PreserveLCSSA(PreserveLCSSA), ORE(ORE) {}
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
/// Check if the loops can be interchanged.
bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -342,29 +339,33 @@ public:
bool currentLimitations();
- bool hasInnerLoopReduction() { return InnerLoopHasReduction; }
+ const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
+ return OuterInnerReductions;
+ }
private:
bool tightlyNested(Loop *Outer, Loop *Inner);
- bool containsUnsafeInstructionsInHeader(BasicBlock *BB);
- bool areAllUsesReductions(Instruction *Ins, Loop *L);
- bool containsUnsafeInstructionsInLatch(BasicBlock *BB);
+ bool containsUnsafeInstructions(BasicBlock *BB);
+
+ /// Discover induction and reduction PHIs in the header of \p L. Induction
+ /// PHIs are added to \p Inductions, reductions are added to
+ /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
+ /// to be passed as \p InnerLoop.
bool findInductionAndReductions(Loop *L,
SmallVector<PHINode *, 8> &Inductions,
- SmallVector<PHINode *, 8> &Reductions);
+ Loop *InnerLoop);
Loop *OuterLoop;
Loop *InnerLoop;
ScalarEvolution *SE;
- LoopInfo *LI;
- DominatorTree *DT;
- bool PreserveLCSSA;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
- bool InnerLoopHasReduction = false;
+ /// Set of reduction PHIs taking part of a reduction across the inner and
+ /// outer loop.
+ SmallPtrSet<PHINode *, 4> OuterInnerReductions;
};
/// LoopInterchangeProfitability checks if it is profitable to interchange the
@@ -398,10 +399,9 @@ public:
LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
BasicBlock *LoopNestExit,
- bool InnerLoopContainsReductions)
+ const LoopInterchangeLegality &LIL)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
- LoopExit(LoopNestExit),
- InnerLoopHasReduction(InnerLoopContainsReductions) {}
+ LoopExit(LoopNestExit), LIL(LIL) {}
/// Interchange OuterLoop and InnerLoop.
bool transform();
@@ -416,8 +416,6 @@ private:
bool adjustLoopLinks();
void adjustLoopPreheaders();
bool adjustLoopBranches();
- void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
- BasicBlock *NewPred);
Loop *OuterLoop;
Loop *InnerLoop;
@@ -428,41 +426,34 @@ private:
LoopInfo *LI;
DominatorTree *DT;
BasicBlock *LoopExit;
- bool InnerLoopHasReduction;
+
+ const LoopInterchangeLegality &LIL;
};
// Main LoopInterchange Pass.
-struct LoopInterchange : public FunctionPass {
+struct LoopInterchange : public LoopPass {
static char ID;
ScalarEvolution *SE = nullptr;
LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr;
DominatorTree *DT = nullptr;
- bool PreserveLCSSA;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
- LoopInterchange() : FunctionPass(ID) {
+ LoopInterchange() : LoopPass(ID) {
initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DependenceAnalysisWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
+ getLoopAnalysisUsage(AU);
}
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L) || L->getParentLoop())
return false;
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
@@ -470,21 +461,8 @@ struct LoopInterchange : public FunctionPass {
DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
- // Build up a worklist of loop pairs to analyze.
- SmallVector<LoopVector, 8> Worklist;
-
- for (Loop *L : *LI)
- populateWorklist(*L, Worklist);
- LLVM_DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
- bool Changed = true;
- while (!Worklist.empty()) {
- LoopVector LoopList = Worklist.pop_back_val();
- Changed = processLoopList(LoopList, F);
- }
- return Changed;
+ return processLoopList(populateWorklist(*L));
}
bool isComputableLoopNest(LoopVector LoopList) {
@@ -512,7 +490,7 @@ struct LoopInterchange : public FunctionPass {
return LoopList.size() - 1;
}
- bool processLoopList(LoopVector LoopList, Function &F) {
+ bool processLoopList(LoopVector LoopList) {
bool Changed = false;
unsigned LoopNestDepth = LoopList.size();
if (LoopNestDepth < 2) {
@@ -580,8 +558,7 @@ struct LoopInterchange : public FunctionPass {
Loop *InnerLoop = LoopList[InnerLoopId];
Loop *OuterLoop = LoopList[OuterLoopId];
- LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
- PreserveLCSSA, ORE);
+ LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
return false;
@@ -600,8 +577,8 @@ struct LoopInterchange : public FunctionPass {
<< "Loop interchanged with enclosing loop.";
});
- LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
- LoopNestExit, LIL.hasInnerLoopReduction());
+ LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
+ LIL);
LIT.transform();
LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
LoopsInterchanged++;
@@ -611,42 +588,12 @@ struct LoopInterchange : public FunctionPass {
} // end anonymous namespace
-bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
- return llvm::none_of(Ins->users(), [=](User *U) -> bool {
- auto *UserIns = dyn_cast<PHINode>(U);
- RecurrenceDescriptor RD;
- return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
+bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
+ return any_of(*BB, [](const Instruction &I) {
+ return I.mayHaveSideEffects() || I.mayReadFromMemory();
});
}
-bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader(
- BasicBlock *BB) {
- for (Instruction &I : *BB) {
- // Load corresponding to reduction PHI's are safe while concluding if
- // tightly nested.
- if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
- if (!areAllUsesReductions(L, InnerLoop))
- return true;
- } else if (I.mayHaveSideEffects() || I.mayReadFromMemory())
- return true;
- }
- return false;
-}
-
-bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
- BasicBlock *BB) {
- for (Instruction &I : *BB) {
- // Stores corresponding to reductions are safe while concluding if tightly
- // nested.
- if (StoreInst *L = dyn_cast<StoreInst>(&I)) {
- if (!isa<PHINode>(L->getOperand(0)))
- return true;
- } else if (I.mayHaveSideEffects() || I.mayReadFromMemory())
- return true;
- }
- return false;
-}
-
bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
@@ -662,15 +609,16 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
if (!OuterLoopHeaderBI)
return false;
- for (BasicBlock *Succ : OuterLoopHeaderBI->successors())
- if (Succ != InnerLoopPreHeader && Succ != OuterLoopLatch)
+ for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
+ if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
+ Succ != OuterLoopLatch)
return false;
LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
// We do not have any basic block in between now make sure the outer header
// and outer loop latch doesn't contain any unsafe instructions.
- if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
- containsUnsafeInstructionsInLatch(OuterLoopLatch))
+ if (containsUnsafeInstructions(OuterLoopHeader) ||
+ containsUnsafeInstructions(OuterLoopLatch))
return false;
LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
@@ -702,9 +650,36 @@ bool LoopInterchangeLegality::isLoopStructureUnderstood(
return true;
}
+// If SV is a LCSSA PHI node with a single incoming value, return the incoming
+// value.
+static Value *followLCSSA(Value *SV) {
+ PHINode *PHI = dyn_cast<PHINode>(SV);
+ if (!PHI)
+ return SV;
+
+ if (PHI->getNumIncomingValues() != 1)
+ return SV;
+ return followLCSSA(PHI->getIncomingValue(0));
+}
+
+// Check V's users to see if it is involved in a reduction in L.
+static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
+ for (Value *User : V->users()) {
+ if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+ if (PHI->getNumIncomingValues() == 1)
+ continue;
+ RecurrenceDescriptor RD;
+ if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
+ return PHI;
+ return nullptr;
+ }
+ }
+
+ return nullptr;
+}
+
bool LoopInterchangeLegality::findInductionAndReductions(
- Loop *L, SmallVector<PHINode *, 8> &Inductions,
- SmallVector<PHINode *, 8> &Reductions) {
+ Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
if (!L->getLoopLatch() || !L->getLoopPredecessor())
return false;
for (PHINode &PHI : L->getHeader()->phis()) {
@@ -712,12 +687,33 @@ bool LoopInterchangeLegality::findInductionAndReductions(
InductionDescriptor ID;
if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
Inductions.push_back(&PHI);
- else if (RecurrenceDescriptor::isReductionPHI(&PHI, L, RD))
- Reductions.push_back(&PHI);
else {
- LLVM_DEBUG(
- dbgs() << "Failed to recognize PHI as an induction or reduction.\n");
- return false;
+ // PHIs in inner loops need to be part of a reduction in the outer loop,
+ // discovered when checking the PHIs of the outer loop earlier.
+ if (!InnerLoop) {
+ if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) {
+ LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
+ "across the outer loop.\n");
+ return false;
+ }
+ } else {
+ assert(PHI.getNumIncomingValues() == 2 &&
+ "Phis in loop header should have exactly 2 incoming values");
+ // Check if we have a PHI node in the outer loop that has a reduction
+ // result from the inner loop as an incoming value.
+ Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
+ PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
+ if (!InnerRedPhi ||
+ !llvm::any_of(InnerRedPhi->incoming_values(),
+ [&PHI](Value *V) { return V == &PHI; })) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Failed to recognize PHI as an induction or reduction.\n");
+ return false;
+ }
+ OuterInnerReductions.insert(&PHI);
+ OuterInnerReductions.insert(InnerRedPhi);
+ }
}
}
return true;
@@ -766,81 +762,64 @@ bool LoopInterchangeLegality::currentLimitations() {
PHINode *InnerInductionVar;
SmallVector<PHINode *, 8> Inductions;
- SmallVector<PHINode *, 8> Reductions;
- if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
+ if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
LLVM_DEBUG(
- dbgs() << "Only inner loops with induction or reduction PHI nodes "
+ dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n");
ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with induction or reduction PHI nodes can be"
- " interchange currently.";
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with induction or reduction PHI nodes can be"
+ " interchanged currently.";
});
return true;
}
// TODO: Currently we handle only loops with 1 induction variable.
if (Inductions.size() != 1) {
- LLVM_DEBUG(
- dbgs() << "We currently only support loops with 1 induction variable."
- << "Failed to interchange due to current limitation\n");
+ LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
+ << "supported currently.\n");
ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with 1 induction variable can be "
+ return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with 1 induction variable can be "
"interchanged currently.";
});
return true;
}
- if (Reductions.size() > 0)
- InnerLoopHasReduction = true;
- InnerInductionVar = Inductions.pop_back_val();
- Reductions.clear();
- if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
+ Inductions.clear();
+ if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
LLVM_DEBUG(
- dbgs() << "Only outer loops with induction or reduction PHI nodes "
+ dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n");
ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Only outer loops with induction or reduction PHI nodes can be"
- " interchanged currently.";
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with induction or reduction PHI nodes can be"
+ " interchange currently.";
});
return true;
}
- // Outer loop cannot have reduction because then loops will not be tightly
- // nested.
- if (!Reductions.empty()) {
- LLVM_DEBUG(dbgs() << "Outer loops with reductions are not supported "
- << "currently.\n");
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Outer loops with reductions cannot be interchangeed "
- "currently.";
- });
- return true;
- }
// TODO: Currently we handle only loops with 1 induction variable.
if (Inductions.size() != 1) {
- LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
- << "supported currently.\n");
+ LLVM_DEBUG(
+ dbgs() << "We currently only support loops with 1 induction variable."
+ << "Failed to interchange due to current limitation\n");
ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Only outer loops with 1 induction variable can be "
+ return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with 1 induction variable can be "
"interchanged currently.";
});
return true;
}
+ InnerInductionVar = Inductions.pop_back_val();
// TODO: Triangular loops are not handled for now.
if (!isLoopStructureUnderstood(InnerInductionVar)) {
@@ -1016,28 +995,6 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
return false;
}
- // Create unique Preheaders if we already do not have one.
- BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
- BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
-
- // Create a unique outer preheader -
- // 1) If OuterLoop preheader is not present.
- // 2) If OuterLoop Preheader is same as OuterLoop Header
- // 3) If OuterLoop Preheader is same as Header of the previous loop.
- // 4) If OuterLoop Preheader is Entry node.
- if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
- isa<PHINode>(OuterLoopPreHeader->begin()) ||
- !OuterLoopPreHeader->getUniquePredecessor()) {
- OuterLoopPreHeader =
- InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA);
- }
-
- if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
- InnerLoopPreHeader == OuterLoop->getHeader()) {
- InnerLoopPreHeader =
- InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA);
- }
-
// TODO: The loops could not be interchanged due to current limitations in the
// transform module.
if (currentLimitations()) {
@@ -1258,6 +1215,10 @@ void LoopInterchangeTransform::restructureLoops(
// outer loop.
NewOuter->addBlockEntry(OrigOuterPreHeader);
LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
+
+ // Tell SE that we move the loops around.
+ SE->forgetLoop(NewOuter);
+ SE->forgetLoop(NewInner);
}
bool LoopInterchangeTransform::transform() {
@@ -1319,9 +1280,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
FromBB->getTerminator()->getIterator());
}
-void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
- BasicBlock *OldPred,
- BasicBlock *NewPred) {
+static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
+ BasicBlock *NewPred) {
for (PHINode &PHI : CurrBlock->phis()) {
unsigned Num = PHI.getNumIncomingValues();
for (unsigned i = 0; i < Num; ++i) {
@@ -1336,7 +1296,7 @@ void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
BasicBlock *NewBB,
std::vector<DominatorTree::UpdateType> &DTUpdates) {
- assert(llvm::count_if(BI->successors(),
+ assert(llvm::count_if(successors(BI),
[OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&
"BI must jump to OldBB at most once.");
for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
@@ -1352,17 +1312,77 @@ static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
}
}
+// Move Lcssa PHIs to the right place.
+static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
+ BasicBlock *OuterLatch) {
+ SmallVector<PHINode *, 8> LcssaInnerExit;
+ for (PHINode &P : InnerExit->phis())
+ LcssaInnerExit.push_back(&P);
+
+ SmallVector<PHINode *, 8> LcssaInnerLatch;
+ for (PHINode &P : InnerLatch->phis())
+ LcssaInnerLatch.push_back(&P);
+
+ // Lcssa PHIs for values used outside the inner loop are in InnerExit.
+ // If a PHI node has users outside of InnerExit, it has a use outside the
+ // interchanged loop and we have to preserve it. We move these to
+ // InnerLatch, which will become the new exit block for the innermost
+ // loop after interchanging. For PHIs only used in InnerExit, we can just
+ // replace them with the incoming value.
+ for (PHINode *P : LcssaInnerExit) {
+ bool hasUsersOutside = false;
+ for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
+ Use &U = *UI;
+ ++UI;
+ auto *Usr = cast<Instruction>(U.getUser());
+ if (Usr->getParent() != InnerExit) {
+ hasUsersOutside = true;
+ continue;
+ }
+ U.set(P->getIncomingValueForBlock(InnerLatch));
+ }
+ if (hasUsersOutside)
+ P->moveBefore(InnerLatch->getFirstNonPHI());
+ else
+ P->eraseFromParent();
+ }
+
+ // If the inner loop latch contains LCSSA PHIs, those come from a child loop
+ // and we have to move them to the new inner latch.
+ for (PHINode *P : LcssaInnerLatch)
+ P->moveBefore(InnerExit->getFirstNonPHI());
+
+ // Now adjust the incoming blocks for the LCSSA PHIs.
+ // For PHIs moved from Inner's exit block, we need to replace Inner's latch
+ // with the new latch.
+ updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch);
+}
+
bool LoopInterchangeTransform::adjustLoopBranches() {
LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
std::vector<DominatorTree::UpdateType> DTUpdates;
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+
+ assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
+ InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
+ InnerLoopPreHeader && "Guaranteed by loop-simplify form");
+ // Ensure that both preheaders do not contain PHI nodes and have single
+ // predecessors. This allows us to move them easily. We use
+ // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
+ // preheaders do not satisfy those conditions.
+ if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
+ !OuterLoopPreHeader->getUniquePredecessor())
+ OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true);
+ if (InnerLoopPreHeader == OuterLoop->getHeader())
+ InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true);
+
// Adjust the loop preheader
BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
- BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
- BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
BasicBlock *InnerLoopLatchPredecessor =
InnerLoopLatch->getUniquePredecessor();
@@ -1417,17 +1437,6 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
InnerLoopLatchSuccessor, DTUpdates);
- // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with
- // the value and remove this PHI node from inner loop.
- SmallVector<PHINode *, 8> LcssaVec;
- for (PHINode &P : InnerLoopLatchSuccessor->phis())
- LcssaVec.push_back(&P);
-
- for (PHINode *P : LcssaVec) {
- Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
- P->replaceAllUsesWith(Incoming);
- P->eraseFromParent();
- }
if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
@@ -1439,12 +1448,15 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
DTUpdates);
- updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
-
DT->applyUpdates(DTUpdates);
restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
OuterLoopPreHeader);
+ moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
+ // For PHIs in the exit block of the outer loop, outer's latch has been
+ // replaced by Inners'.
+ updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
+
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
@@ -1452,26 +1464,21 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
- for (PHINode *PHI : OuterLoopPHIs)
- PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
+ auto &OuterInnerReductions = LIL.getOuterInnerReductions();
+ (void)OuterInnerReductions;
- // Move the PHI nodes from the inner loop header to the outer loop header.
- // We have to deal with one kind of PHI nodes:
- // 1) PHI nodes that are part of inner loop-only reductions.
- // We only have to move the PHI node and update the incoming blocks.
+ // Now move the remaining reduction PHIs from outer to inner loop header and
+ // vice versa. The PHI nodes must be part of a reduction across the inner and
+ // outer loop and all the remains to do is and updating the incoming blocks.
+ for (PHINode *PHI : OuterLoopPHIs) {
+ PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
+ assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
+ "Expected a reduction PHI node");
+ }
for (PHINode *PHI : InnerLoopPHIs) {
PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
- for (BasicBlock *InBB : PHI->blocks()) {
- if (InnerLoop->contains(InBB))
- continue;
-
- assert(!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) &&
- "Unexpected incoming PHI node, reductions in outer loop are not "
- "supported yet");
- PHI->replaceAllUsesWith(PHI->getIncomingValueForBlock(InBB));
- PHI->eraseFromParent();
- break;
- }
+ assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
+ "Expected a reduction PHI node");
}
// Update the incoming blocks for moved PHI nodes.
@@ -1514,13 +1521,8 @@ char LoopInterchange::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 10f6fcdcfdb7..774ad7b945a0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -30,12 +30,26 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
if (DebugLogging)
dbgs() << "Starting Loop pass manager run.\n";
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR);
for (auto &Pass : Passes) {
if (DebugLogging)
dbgs() << "Running pass: " << Pass->name() << " on " << L;
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns false).
+ if (!PI.runBeforePass<Loop>(*Pass, L))
+ continue;
+
PreservedAnalyses PassPA = Pass->run(L, AM, AR, U);
+ // do not pass deleted Loop into the instrumentation
+ if (U.skipCurrentLoop())
+ PI.runAfterPassInvalidated<Loop>(*Pass);
+ else
+ PI.runAfterPass<Loop>(*Pass, L);
+
// If the loop was deleted, abort the run and return to the outer walk.
if (U.skipCurrentLoop()) {
PA.intersect(std::move(PassPA));
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index cbb6594cf8f4..5983c804c0c1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -178,7 +178,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -196,6 +198,9 @@
#define DEBUG_TYPE "loop-predication"
+STATISTIC(TotalConsidered, "Number of guards considered");
+STATISTIC(TotalWidened, "Number of checks widened");
+
using namespace llvm;
static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
@@ -574,6 +579,8 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
LLVM_DEBUG(dbgs() << "Processing guard:\n");
LLVM_DEBUG(Guard->dump());
+ TotalConsidered++;
+
IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
// The guard condition is expected to be in form of:
@@ -615,6 +622,8 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
if (NumWidened == 0)
return false;
+ TotalWidened += NumWidened;
+
// Emit the new guard condition
Builder.SetInsertPoint(Guard);
Value *LastCheck = nullptr;
@@ -812,9 +821,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
SmallVector<IntrinsicInst *, 4> Guards;
for (const auto BB : L->blocks())
for (auto &I : *BB)
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::experimental_guard)
- Guards.push_back(II);
+ if (isGuard(&I))
+ Guards.push_back(cast<IntrinsicInst>(&I));
if (Guards.empty())
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index eeaad39dc1d1..fd22128f7fe6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,6 +15,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/Debug.h"
@@ -40,12 +42,19 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
- bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ,
- false, Threshold, false);
+ Optional<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA)
+ MSSAU = MemorySSAUpdater(AR.MSSA);
+ bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ SQ, false, Threshold, false);
if (!Changed)
return PreservedAnalyses::all();
+ if (AR.MSSA && VerifyMemorySSA)
+ AR.MSSA->verifyMemorySSA();
+
return getLoopPassPreservedAnalyses();
}
@@ -68,6 +77,10 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (EnableMSSALoopDependency) {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
getLoopAnalysisUsage(AU);
}
@@ -84,8 +97,14 @@ public:
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
- return LoopRotation(L, LI, TTI, AC, DT, SE, SQ, false, MaxHeaderSize,
- false);
+ Optional<MemorySSAUpdater> MSSAU;
+ if (EnableMSSALoopDependency) {
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSAU = MemorySSAUpdater(MSSA);
+ }
+ return LoopRotation(L, LI, TTI, AC, DT, SE,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
+ false, MaxHeaderSize, false);
}
};
}
@@ -96,6 +115,7 @@ INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 2b83d3dc5f1b..2e5927f9a068 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,9 +24,12 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -38,9 +41,527 @@ using namespace llvm;
#define DEBUG_TYPE "loop-simplifycfg"
-static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
- ScalarEvolution &SE) {
+static cl::opt<bool> EnableTermFolding("enable-loop-simplifycfg-term-folding",
+ cl::init(false));
+
+STATISTIC(NumTerminatorsFolded,
+ "Number of terminators folded to unconditional branches");
+STATISTIC(NumLoopBlocksDeleted,
+ "Number of loop blocks deleted");
+STATISTIC(NumLoopExitsDeleted,
+ "Number of loop exiting edges deleted");
+
+/// If \p BB is a switch or a conditional branch, but only one of its successors
+/// can be reached from this block in runtime, return this successor. Otherwise,
+/// return nullptr.
+static BasicBlock *getOnlyLiveSuccessor(BasicBlock *BB) {
+ Instruction *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional())
+ return nullptr;
+ if (BI->getSuccessor(0) == BI->getSuccessor(1))
+ return BI->getSuccessor(0);
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ if (!Cond)
+ return nullptr;
+ return Cond->isZero() ? BI->getSuccessor(1) : BI->getSuccessor(0);
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ auto *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ if (!CI)
+ return nullptr;
+ for (auto Case : SI->cases())
+ if (Case.getCaseValue() == CI)
+ return Case.getCaseSuccessor();
+ return SI->getDefaultDest();
+ }
+
+ return nullptr;
+}
+
+namespace {
+/// Helper class that can turn branches and switches with constant conditions
+/// into unconditional branches.
+class ConstantTerminatorFoldingImpl {
+private:
+ Loop &L;
+ LoopInfo &LI;
+ DominatorTree &DT;
+ ScalarEvolution &SE;
+ MemorySSAUpdater *MSSAU;
+
+ // Whether or not the current loop has irreducible CFG.
+ bool HasIrreducibleCFG = false;
+ // Whether or not the current loop will still exist after terminator constant
+ // folding will be done. In theory, there are two ways how it can happen:
+ // 1. Loop's latch(es) become unreachable from loop header;
+ // 2. Loop's header becomes unreachable from method entry.
+ // In practice, the second situation is impossible because we only modify the
+ // current loop and its preheader and do not affect preheader's reachibility
+ // from any other block. So this variable set to true means that loop's latch
+ // has become unreachable from loop header.
+ bool DeleteCurrentLoop = false;
+
+ // The blocks of the original loop that will still be reachable from entry
+ // after the constant folding.
+ SmallPtrSet<BasicBlock *, 8> LiveLoopBlocks;
+ // The blocks of the original loop that will become unreachable from entry
+ // after the constant folding.
+ SmallVector<BasicBlock *, 8> DeadLoopBlocks;
+ // The exits of the original loop that will still be reachable from entry
+ // after the constant folding.
+ SmallPtrSet<BasicBlock *, 8> LiveExitBlocks;
+ // The exits of the original loop that will become unreachable from entry
+ // after the constant folding.
+ SmallVector<BasicBlock *, 8> DeadExitBlocks;
+ // The blocks that will still be a part of the current loop after folding.
+ SmallPtrSet<BasicBlock *, 8> BlocksInLoopAfterFolding;
+ // The blocks that have terminators with constant condition that can be
+ // folded. Note: fold candidates should be in L but not in any of its
+ // subloops to avoid complex LI updates.
+ SmallVector<BasicBlock *, 8> FoldCandidates;
+
+ void dump() const {
+ dbgs() << "Constant terminator folding for loop " << L << "\n";
+ dbgs() << "After terminator constant-folding, the loop will";
+ if (!DeleteCurrentLoop)
+ dbgs() << " not";
+ dbgs() << " be destroyed\n";
+ auto PrintOutVector = [&](const char *Message,
+ const SmallVectorImpl<BasicBlock *> &S) {
+ dbgs() << Message << "\n";
+ for (const BasicBlock *BB : S)
+ dbgs() << "\t" << BB->getName() << "\n";
+ };
+ auto PrintOutSet = [&](const char *Message,
+ const SmallPtrSetImpl<BasicBlock *> &S) {
+ dbgs() << Message << "\n";
+ for (const BasicBlock *BB : S)
+ dbgs() << "\t" << BB->getName() << "\n";
+ };
+ PrintOutVector("Blocks in which we can constant-fold terminator:",
+ FoldCandidates);
+ PrintOutSet("Live blocks from the original loop:", LiveLoopBlocks);
+ PrintOutVector("Dead blocks from the original loop:", DeadLoopBlocks);
+ PrintOutSet("Live exit blocks:", LiveExitBlocks);
+ PrintOutVector("Dead exit blocks:", DeadExitBlocks);
+ if (!DeleteCurrentLoop)
+ PrintOutSet("The following blocks will still be part of the loop:",
+ BlocksInLoopAfterFolding);
+ }
+
+ /// Whether or not the current loop has irreducible CFG.
+ bool hasIrreducibleCFG(LoopBlocksDFS &DFS) {
+ assert(DFS.isComplete() && "DFS is expected to be finished");
+ // Index of a basic block in RPO traversal.
+ DenseMap<const BasicBlock *, unsigned> RPO;
+ unsigned Current = 0;
+ for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I)
+ RPO[*I] = Current++;
+
+ for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ for (auto *Succ : successors(BB))
+ if (L.contains(Succ) && !LI.isLoopHeader(Succ) && RPO[BB] > RPO[Succ])
+ // If an edge goes from a block with greater order number into a block
+ // with lesses number, and it is not a loop backedge, then it can only
+ // be a part of irreducible non-loop cycle.
+ return true;
+ }
+ return false;
+ }
+
+ /// Fill all information about status of blocks and exits of the current loop
+ /// if constant folding of all branches will be done.
+ void analyze() {
+ LoopBlocksDFS DFS(&L);
+ DFS.perform(&LI);
+ assert(DFS.isComplete() && "DFS is expected to be finished");
+
+ // TODO: The algorithm below relies on both RPO and Postorder traversals.
+ // When the loop has only reducible CFG inside, then the invariant "all
+ // predecessors of X are processed before X in RPO" is preserved. However
+ // an irreducible loop can break this invariant (e.g. latch does not have to
+ // be the last block in the traversal in this case, and the algorithm relies
+ // on this). We can later decide to support such cases by altering the
+ // algorithms, but so far we just give up analyzing them.
+ if (hasIrreducibleCFG(DFS)) {
+ HasIrreducibleCFG = true;
+ return;
+ }
+
+ // Collect live and dead loop blocks and exits.
+ LiveLoopBlocks.insert(L.getHeader());
+ for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ // If a loop block wasn't marked as live so far, then it's dead.
+ if (!LiveLoopBlocks.count(BB)) {
+ DeadLoopBlocks.push_back(BB);
+ continue;
+ }
+
+ BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB);
+
+ // If a block has only one live successor, it's a candidate on constant
+ // folding. Only handle blocks from current loop: branches in child loops
+ // are skipped because if they can be folded, they should be folded during
+ // the processing of child loops.
+ if (TheOnlySucc && LI.getLoopFor(BB) == &L)
+ FoldCandidates.push_back(BB);
+
+ // Handle successors.
+ for (BasicBlock *Succ : successors(BB))
+ if (!TheOnlySucc || TheOnlySucc == Succ) {
+ if (L.contains(Succ))
+ LiveLoopBlocks.insert(Succ);
+ else
+ LiveExitBlocks.insert(Succ);
+ }
+ }
+
+ // Sanity check: amount of dead and live loop blocks should match the total
+ // number of blocks in loop.
+ assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() &&
+ "Malformed block sets?");
+
+ // Now, all exit blocks that are not marked as live are dead.
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+ for (auto *ExitBlock : ExitBlocks)
+ if (!LiveExitBlocks.count(ExitBlock))
+ DeadExitBlocks.push_back(ExitBlock);
+
+ // Whether or not the edge From->To will still be present in graph after the
+ // folding.
+ auto IsEdgeLive = [&](BasicBlock *From, BasicBlock *To) {
+ if (!LiveLoopBlocks.count(From))
+ return false;
+ BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(From);
+ return !TheOnlySucc || TheOnlySucc == To;
+ };
+
+ // The loop will not be destroyed if its latch is live.
+ DeleteCurrentLoop = !IsEdgeLive(L.getLoopLatch(), L.getHeader());
+
+ // If we are going to delete the current loop completely, no extra analysis
+ // is needed.
+ if (DeleteCurrentLoop)
+ return;
+
+ // Otherwise, we should check which blocks will still be a part of the
+ // current loop after the transform.
+ BlocksInLoopAfterFolding.insert(L.getLoopLatch());
+ // If the loop is live, then we should compute what blocks are still in
+ // loop after all branch folding has been done. A block is in loop if
+ // it has a live edge to another block that is in the loop; by definition,
+ // latch is in the loop.
+ auto BlockIsInLoop = [&](BasicBlock *BB) {
+ return any_of(successors(BB), [&](BasicBlock *Succ) {
+ return BlocksInLoopAfterFolding.count(Succ) && IsEdgeLive(BB, Succ);
+ });
+ };
+ for (auto I = DFS.beginPostorder(), E = DFS.endPostorder(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (BlockIsInLoop(BB))
+ BlocksInLoopAfterFolding.insert(BB);
+ }
+
+ // Sanity check: header must be in loop.
+ assert(BlocksInLoopAfterFolding.count(L.getHeader()) &&
+ "Header not in loop?");
+ assert(BlocksInLoopAfterFolding.size() <= LiveLoopBlocks.size() &&
+ "All blocks that stay in loop should be live!");
+ }
+
+ /// We need to preserve static reachibility of all loop exit blocks (this is)
+ /// required by loop pass manager. In order to do it, we make the following
+ /// trick:
+ ///
+ /// preheader:
+ /// <preheader code>
+ /// br label %loop_header
+ ///
+ /// loop_header:
+ /// ...
+ /// br i1 false, label %dead_exit, label %loop_block
+ /// ...
+ ///
+ /// We cannot simply remove edge from the loop to dead exit because in this
+ /// case dead_exit (and its successors) may become unreachable. To avoid that,
+ /// we insert the following fictive preheader:
+ ///
+ /// preheader:
+ /// <preheader code>
+ /// switch i32 0, label %preheader-split,
+ /// [i32 1, label %dead_exit_1],
+ /// [i32 2, label %dead_exit_2],
+ /// ...
+ /// [i32 N, label %dead_exit_N],
+ ///
+ /// preheader-split:
+ /// br label %loop_header
+ ///
+ /// loop_header:
+ /// ...
+ /// br i1 false, label %dead_exit_N, label %loop_block
+ /// ...
+ ///
+ /// Doing so, we preserve static reachibility of all dead exits and can later
+ /// remove edges from the loop to these blocks.
+ void handleDeadExits() {
+ // If no dead exits, nothing to do.
+ if (DeadExitBlocks.empty())
+ return;
+
+ // Construct split preheader and the dummy switch to thread edges from it to
+ // dead exits.
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ BasicBlock *Preheader = L.getLoopPreheader();
+ BasicBlock *NewPreheader = Preheader->splitBasicBlock(
+ Preheader->getTerminator(),
+ Twine(Preheader->getName()).concat("-split"));
+ DTU.deleteEdge(Preheader, L.getHeader());
+ DTU.insertEdge(NewPreheader, L.getHeader());
+ DTU.insertEdge(Preheader, NewPreheader);
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SwitchInst *DummySwitch =
+ Builder.CreateSwitch(Builder.getInt32(0), NewPreheader);
+ Preheader->getTerminator()->eraseFromParent();
+
+ unsigned DummyIdx = 1;
+ for (BasicBlock *BB : DeadExitBlocks) {
+ SmallVector<Instruction *, 4> DeadPhis;
+ for (auto &PN : BB->phis())
+ DeadPhis.push_back(&PN);
+
+ // Eliminate all Phis from dead exits.
+ for (Instruction *PN : DeadPhis) {
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
+ assert(DummyIdx != 0 && "Too many dead exits!");
+ DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB);
+ DTU.insertEdge(Preheader, BB);
+ ++NumLoopExitsDeleted;
+ }
+
+ assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
+ if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
+ OuterLoop->addBasicBlockToLoop(NewPreheader, LI);
+
+ // When we break dead edges, the outer loop may become unreachable from
+ // the current loop. We need to fix loop info accordingly. For this, we
+ // find the most nested loop that still contains L and remove L from all
+ // loops that are inside of it.
+ Loop *StillReachable = nullptr;
+ for (BasicBlock *BB : LiveExitBlocks) {
+ Loop *BBL = LI.getLoopFor(BB);
+ if (BBL && BBL->contains(L.getHeader()))
+ if (!StillReachable ||
+ BBL->getLoopDepth() > StillReachable->getLoopDepth())
+ StillReachable = BBL;
+ }
+
+ // Okay, our loop is no longer in the outer loop (and maybe not in some of
+ // its parents as well). Make the fixup.
+ if (StillReachable != OuterLoop) {
+ LI.changeLoopFor(NewPreheader, StillReachable);
+ for (Loop *NotContaining = OuterLoop; NotContaining != StillReachable;
+ NotContaining = NotContaining->getParentLoop()) {
+ NotContaining->removeBlockFromLoop(NewPreheader);
+ for (auto *BB : L.blocks())
+ NotContaining->removeBlockFromLoop(BB);
+ }
+ OuterLoop->removeChildLoop(&L);
+ if (StillReachable)
+ StillReachable->addChildLoop(&L);
+ else
+ LI.addTopLevelLoop(&L);
+ }
+ }
+ }
+
+ /// Delete loop blocks that have become unreachable after folding. Make all
+ /// relevant updates to DT and LI.
+ void deleteDeadLoopBlocks() {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ if (MSSAU) {
+ SmallPtrSet<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
+ DeadLoopBlocks.end());
+ MSSAU->removeBlocks(DeadLoopBlocksSet);
+ }
+ for (auto *BB : DeadLoopBlocks) {
+ assert(BB != L.getHeader() &&
+ "Header of the current loop cannot be dead!");
+ LLVM_DEBUG(dbgs() << "Deleting dead loop block " << BB->getName()
+ << "\n");
+ if (LI.isLoopHeader(BB)) {
+ assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
+ LI.erase(LI.getLoopFor(BB));
+ }
+ LI.removeBlock(BB);
+ DeleteDeadBlock(BB, &DTU);
+ ++NumLoopBlocksDeleted;
+ }
+ }
+
+ /// Constant-fold terminators of blocks acculumated in FoldCandidates into the
+ /// unconditional branches.
+ void foldTerminators() {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+
+ for (BasicBlock *BB : FoldCandidates) {
+ assert(LI.getLoopFor(BB) == &L && "Should be a loop block!");
+ BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB);
+ assert(TheOnlySucc && "Should have one live successor!");
+
+ LLVM_DEBUG(dbgs() << "Replacing terminator of " << BB->getName()
+ << " with an unconditional branch to the block "
+ << TheOnlySucc->getName() << "\n");
+
+ SmallPtrSet<BasicBlock *, 2> DeadSuccessors;
+ // Remove all BB's successors except for the live one.
+ unsigned TheOnlySuccDuplicates = 0;
+ for (auto *Succ : successors(BB))
+ if (Succ != TheOnlySucc) {
+ DeadSuccessors.insert(Succ);
+ // If our successor lies in a different loop, we don't want to remove
+ // the one-input Phi because it is a LCSSA Phi.
+ bool PreserveLCSSAPhi = !L.contains(Succ);
+ Succ->removePredecessor(BB, PreserveLCSSAPhi);
+ if (MSSAU)
+ MSSAU->removeEdge(BB, Succ);
+ } else
+ ++TheOnlySuccDuplicates;
+
+ assert(TheOnlySuccDuplicates > 0 && "Should be!");
+ // If TheOnlySucc was BB's successor more than once, after transform it
+ // will be its successor only once. Remove redundant inputs from
+ // TheOnlySucc's Phis.
+ bool PreserveLCSSAPhi = !L.contains(TheOnlySucc);
+ for (unsigned Dup = 1; Dup < TheOnlySuccDuplicates; ++Dup)
+ TheOnlySucc->removePredecessor(BB, PreserveLCSSAPhi);
+ if (MSSAU && TheOnlySuccDuplicates > 1)
+ MSSAU->removeDuplicatePhiEdgesBetween(BB, TheOnlySucc);
+
+ IRBuilder<> Builder(BB->getContext());
+ Instruction *Term = BB->getTerminator();
+ Builder.SetInsertPoint(Term);
+ Builder.CreateBr(TheOnlySucc);
+ Term->eraseFromParent();
+
+ for (auto *DeadSucc : DeadSuccessors)
+ DTU.deleteEdge(BB, DeadSucc);
+
+ ++NumTerminatorsFolded;
+ }
+ }
+
+public:
+ ConstantTerminatorFoldingImpl(Loop &L, LoopInfo &LI, DominatorTree &DT,
+ ScalarEvolution &SE,
+ MemorySSAUpdater *MSSAU)
+ : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU) {}
+ bool run() {
+ assert(L.getLoopLatch() && "Should be single latch!");
+
+ // Collect all available information about status of blocks after constant
+ // folding.
+ analyze();
+
+ LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName()
+ << ": ");
+
+ if (HasIrreducibleCFG) {
+ LLVM_DEBUG(dbgs() << "Loops with irreducible CFG are not supported!\n");
+ return false;
+ }
+
+ // Nothing to constant-fold.
+ if (FoldCandidates.empty()) {
+ LLVM_DEBUG(
+ dbgs() << "No constant terminator folding candidates found in loop "
+ << L.getHeader()->getName() << "\n");
+ return false;
+ }
+
+ // TODO: Support deletion of the current loop.
+ if (DeleteCurrentLoop) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Give up constant terminator folding in loop "
+ << L.getHeader()->getName()
+ << ": we don't currently support deletion of the current loop.\n");
+ return false;
+ }
+
+ // TODO: Support blocks that are not dead, but also not in loop after the
+ // folding.
+ if (BlocksInLoopAfterFolding.size() + DeadLoopBlocks.size() !=
+ L.getNumBlocks()) {
+ LLVM_DEBUG(
+ dbgs() << "Give up constant terminator folding in loop "
+ << L.getHeader()->getName()
+ << ": we don't currently"
+ " support blocks that are not dead, but will stop "
+ "being a part of the loop after constant-folding.\n");
+ return false;
+ }
+
+ SE.forgetTopmostLoop(&L);
+ // Dump analysis results.
+ LLVM_DEBUG(dump());
+
+ LLVM_DEBUG(dbgs() << "Constant-folding " << FoldCandidates.size()
+ << " terminators in loop " << L.getHeader()->getName()
+ << "\n");
+
+ // Make the actual transforms.
+ handleDeadExits();
+ foldTerminators();
+
+ if (!DeadLoopBlocks.empty()) {
+ LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size()
+ << " dead blocks in loop " << L.getHeader()->getName()
+ << "\n");
+ deleteDeadLoopBlocks();
+ }
+
+#ifndef NDEBUG
+ // Make sure that we have preserved all data structures after the transform.
+ DT.verify();
+ assert(DT.isReachableFromEntry(L.getHeader()));
+ LI.verify(DT);
+#endif
+
+ return true;
+ }
+};
+} // namespace
+
+/// Turn branches and switches with known constant conditions into unconditional
+/// branches.
+static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE,
+ MemorySSAUpdater *MSSAU) {
+ if (!EnableTermFolding)
+ return false;
+
+ // To keep things simple, only process loops with single latch. We
+ // canonicalize most loops to this form. We can support multi-latch if needed.
+ if (!L.getLoopLatch())
+ return false;
+
+ ConstantTerminatorFoldingImpl BranchFolder(L, LI, DT, SE, MSSAU);
+ return BranchFolder.run();
+}
+
+static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
+ LoopInfo &LI, MemorySSAUpdater *MSSAU) {
bool Changed = false;
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Copy blocks into a temporary array to avoid iterator invalidation issues
// as we remove them.
SmallVector<WeakTrackingVH, 16> Blocks(L.blocks());
@@ -57,19 +578,38 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
continue;
// Merge Succ into Pred and delete it.
- MergeBlockIntoPredecessor(Succ, &DT, &LI);
+ MergeBlockIntoPredecessor(Succ, &DTU, &LI, MSSAU);
- SE.forgetLoop(&L);
Changed = true;
}
return Changed;
}
+static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE, MemorySSAUpdater *MSSAU) {
+ bool Changed = false;
+
+ // Constant-fold terminators with known constant conditions.
+ Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU);
+
+ // Eliminate unconditional branches by merging blocks into their predecessors.
+ Changed |= mergeBlocksIntoPredecessors(L, DT, LI, MSSAU);
+
+ if (Changed)
+ SE.forgetTopmostLoop(&L);
+
+ return Changed;
+}
+
PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
- if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE))
+ Optional<MemorySSAUpdater> MSSAU;
+ if (EnableMSSALoopDependency && AR.MSSA)
+ MSSAU = MemorySSAUpdater(AR.MSSA);
+ if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
@@ -90,10 +630,22 @@ public:
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- return simplifyLoopCFG(*L, DT, LI, SE);
+ Optional<MemorySSAUpdater> MSSAU;
+ if (EnableMSSALoopDependency) {
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSAU = MemorySSAUpdater(MSSA);
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
+ return simplifyLoopCFG(*L, DT, LI, SE,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ if (EnableMSSALoopDependency) {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
AU.addPreserved<DependenceAnalysisWrapperPass>();
getLoopAnalysisUsage(AU);
}
@@ -104,6 +656,7 @@ char LoopSimplifyCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(LoopSimplifyCFGLegacyPass, "loop-simplifycfg",
"Simplify loop CFG", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LoopSimplifyCFGLegacyPass, "loop-simplifycfg",
"Simplify loop CFG", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 7d62349d4719..2f7ad2126ed3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -202,17 +202,22 @@ static bool sinkInstruction(Loop &L, Instruction &I,
if (BBsToSinkInto.empty())
return false;
+ // Return if any of the candidate blocks to sink into is non-cold.
+ if (BBsToSinkInto.size() > 1) {
+ for (auto *BB : BBsToSinkInto)
+ if (!LoopBlockNumber.count(BB))
+ return false;
+ }
+
// Copy the final BBs into a vector and sort them using the total ordering
// of the loop block numbers as iterating the set doesn't give a useful
// order. No need to stable sort as the block numbers are a total ordering.
SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(),
BBsToSinkInto.end());
- llvm::sort(SortedBBsToSinkInto.begin(), SortedBBsToSinkInto.end(),
- [&](BasicBlock *A, BasicBlock *B) {
- return LoopBlockNumber.find(A)->second <
- LoopBlockNumber.find(B)->second;
- });
+ llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) {
+ return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second;
+ });
BasicBlock *MoveBB = *SortedBBsToSinkInto.begin();
// FIXME: Optimize the efficiency for cloned value replacement. The current
@@ -275,6 +280,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// Compute alias set.
for (BasicBlock *BB : L.blocks())
CurAST.add(*BB);
+ CurAST.add(*Preheader);
// Sort loop's basic blocks by frequency
SmallVector<BasicBlock *, 10> ColdLoopBBs;
@@ -298,7 +304,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// No need to check for instruction's operands are loop invariant.
assert(L.hasLoopInvariantOperands(I) &&
"Insts in a loop's preheader should have loop invariant operands!");
- if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
+ if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
continue;
if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
Changed = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index fa83b48210bc..773ffb9df0a2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -155,6 +155,11 @@ static cl::opt<bool> FilterSameScaledReg(
cl::desc("Narrow LSR search space by filtering non-optimal formulae"
" with the same ScaledReg and Scale"));
+static cl::opt<unsigned> ComplexityLimit(
+ "lsr-complexity-limit", cl::Hidden,
+ cl::init(std::numeric_limits<uint16_t>::max()),
+ cl::desc("LSR search space complexity limit"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -1487,7 +1492,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
- llvm::sort(Key.begin(), Key.end());
+ llvm::sort(Key);
return Uniquifier.count(Key);
}
@@ -1511,7 +1516,7 @@ bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
- llvm::sort(Key.begin(), Key.end());
+ llvm::sort(Key);
if (!Uniquifier.insert(Key).second)
return false;
@@ -3638,32 +3643,62 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
- if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
+ if (Base.BaseRegs.size() + (Base.Scale == 1) +
+ (Base.UnfoldedOffset != 0) <= 1)
return;
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
// processing the formula.
Base.unscale();
- Formula F = Base;
- F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
+ Formula NewBase = Base;
+ NewBase.BaseRegs.clear();
+ Type *CombinedIntegerType = nullptr;
for (const SCEV *BaseReg : Base.BaseRegs) {
if (SE.properlyDominates(BaseReg, L->getHeader()) &&
- !SE.hasComputableLoopEvolution(BaseReg, L))
+ !SE.hasComputableLoopEvolution(BaseReg, L)) {
+ if (!CombinedIntegerType)
+ CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
Ops.push_back(BaseReg);
+ }
else
- F.BaseRegs.push_back(BaseReg);
+ NewBase.BaseRegs.push_back(BaseReg);
}
- if (Ops.size() > 1) {
- const SCEV *Sum = SE.getAddExpr(Ops);
+
+ // If no register is relevant, we're done.
+ if (Ops.size() == 0)
+ return;
+
+ // Utility function for generating the required variants of the combined
+ // registers.
+ auto GenerateFormula = [&](const SCEV *Sum) {
+ Formula F = NewBase;
+
// TODO: If Sum is zero, it probably means ScalarEvolution missed an
// opportunity to fold something. For now, just ignore such cases
// rather than proceed with zero in a register.
- if (!Sum->isZero()) {
- F.BaseRegs.push_back(Sum);
- F.canonicalize(*L);
- (void)InsertFormula(LU, LUIdx, F);
- }
+ if (Sum->isZero())
+ return;
+
+ F.BaseRegs.push_back(Sum);
+ F.canonicalize(*L);
+ (void)InsertFormula(LU, LUIdx, F);
+ };
+
+ // If we collected at least two registers, generate a formula combining them.
+ if (Ops.size() > 1) {
+ SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.
+ GenerateFormula(SE.getAddExpr(OpsCopy));
+ }
+
+ // If we have an unfolded offset, generate a formula combining it with the
+ // registers collected.
+ if (NewBase.UnfoldedOffset) {
+ assert(CombinedIntegerType && "Missing a type for the unfolded offset");
+ Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
+ true));
+ NewBase.UnfoldedOffset = 0;
+ GenerateFormula(SE.getAddExpr(Ops));
}
}
@@ -4238,7 +4273,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for
// uniquifying.
- llvm::sort(Key.begin(), Key.end());
+ llvm::sort(Key);
std::pair<BestFormulaeTy::const_iterator, bool> P =
BestFormulae.insert(std::make_pair(Key, FIdx));
@@ -4281,9 +4316,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
});
}
-// This is a rough guess that seems to work fairly well.
-static const size_t ComplexityLimit = std::numeric_limits<uint16_t>::max();
-
/// Estimate the worst-case number of solutions the solver might have to
/// consider. It almost never considers this many solutions because it prune the
/// search space, but the pruning isn't always sufficient.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 86c99aed4417..da46210b6fdd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -56,6 +56,20 @@ using namespace llvm;
#define DEBUG_TYPE "loop-unroll-and-jam"
+/// @{
+/// Metadata attribute names
+static const char *const LLVMLoopUnrollAndJamFollowupAll =
+ "llvm.loop.unroll_and_jam.followup_all";
+static const char *const LLVMLoopUnrollAndJamFollowupInner =
+ "llvm.loop.unroll_and_jam.followup_inner";
+static const char *const LLVMLoopUnrollAndJamFollowupOuter =
+ "llvm.loop.unroll_and_jam.followup_outer";
+static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner =
+ "llvm.loop.unroll_and_jam.followup_remainder_inner";
+static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter =
+ "llvm.loop.unroll_and_jam.followup_remainder_outer";
+/// @}
+
static cl::opt<bool>
AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
cl::desc("Allows loops to be unroll-and-jammed."));
@@ -112,11 +126,6 @@ static bool HasUnrollAndJamEnablePragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
}
-// Returns true if the loop has an unroll_and_jam(disable) pragma.
-static bool HasUnrollAndJamDisablePragma(const Loop *L) {
- return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.disable");
-}
-
// If loop has an unroll_and_jam_count pragma return the (necessarily
// positive) value from the pragma. Otherwise return 0.
static unsigned UnrollAndJamCountPragmaValue(const Loop *L) {
@@ -149,7 +158,26 @@ static bool computeUnrollAndJamCount(
OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) {
- // Check for explicit Count from the "unroll-and-jam-count" option.
+ // First up use computeUnrollCount from the loop unroller to get a count
+ // for unrolling the outer loop, plus any loops requiring explicit
+ // unrolling we leave to the unroller. This uses UP.Threshold /
+ // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
+ // We have already checked that the loop has no unroll.* pragmas.
+ unsigned MaxTripCount = 0;
+ bool UseUpperBound = false;
+ bool ExplicitUnroll = computeUnrollCount(
+ L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
+ OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
+ if (ExplicitUnroll || UseUpperBound) {
+ // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
+ // for the unroller instead.
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
+ "computeUnrollCount\n");
+ UP.Count = 0;
+ return false;
+ }
+
+ // Override with any explicit Count from the "unroll-and-jam-count" option.
bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
if (UserUnrollCount) {
UP.Count = UnrollAndJamCount;
@@ -174,80 +202,76 @@ static bool computeUnrollAndJamCount(
return true;
}
- // Use computeUnrollCount from the loop unroller to get a sensible count
- // for the unrolling the outer loop. This uses UP.Threshold /
- // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
- // We have already checked that the loop has no unroll.* pragmas.
- unsigned MaxTripCount = 0;
- bool UseUpperBound = false;
- bool ExplicitUnroll = computeUnrollCount(
- L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
- OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
- if (ExplicitUnroll || UseUpperBound) {
- // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
- // for the unroller instead.
- UP.Count = 0;
- return false;
- }
-
bool PragmaEnableUnroll = HasUnrollAndJamEnablePragma(L);
- ExplicitUnroll = PragmaCount > 0 || PragmaEnableUnroll || UserUnrollCount;
+ bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
+ bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
// If the loop has an unrolling pragma, we want to be more aggressive with
// unrolling limits.
- if (ExplicitUnroll && OuterTripCount != 0)
+ if (ExplicitUnrollAndJam)
UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold;
if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
UP.UnrollAndJamInnerLoopThreshold) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
+ "inner loop too large\n");
UP.Count = 0;
return false;
}
+ // We have a sensible limit for the outer loop, now adjust it for the inner
+ // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
+ // explicitly, we want to stick to it.
+ if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
+ while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
+ UP.UnrollAndJamInnerLoopThreshold)
+ UP.Count--;
+ }
+
+ // If we are explicitly unroll and jamming, we are done. Otherwise there are a
+ // number of extra performance heuristics to check.
+ if (ExplicitUnrollAndJam)
+ return true;
+
// If the inner loop count is known and small, leave the entire loop nest to
// be the unroller
- if (!ExplicitUnroll && InnerTripCount &&
- InnerLoopSize * InnerTripCount < UP.Threshold) {
+ if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
+ "being left for the unroller\n");
UP.Count = 0;
return false;
}
- // We have a sensible limit for the outer loop, now adjust it for the inner
- // loop and UP.UnrollAndJamInnerLoopThreshold.
- while (UP.Count != 0 && UP.AllowRemainder &&
- getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
- UP.UnrollAndJamInnerLoopThreshold)
- UP.Count--;
-
- if (!ExplicitUnroll) {
- // Check for situations where UnJ is likely to be unprofitable. Including
- // subloops with more than 1 block.
- if (SubLoop->getBlocks().size() != 1) {
- UP.Count = 0;
- return false;
- }
+ // Check for situations where UnJ is likely to be unprofitable. Including
+ // subloops with more than 1 block.
+ if (SubLoop->getBlocks().size() != 1) {
+ LLVM_DEBUG(
+ dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
+ UP.Count = 0;
+ return false;
+ }
- // Limit to loops where there is something to gain from unrolling and
- // jamming the loop. In this case, look for loads that are invariant in the
- // outer loop and can become shared.
- unsigned NumInvariant = 0;
- for (BasicBlock *BB : SubLoop->getBlocks()) {
- for (Instruction &I : *BB) {
- if (auto *Ld = dyn_cast<LoadInst>(&I)) {
- Value *V = Ld->getPointerOperand();
- const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
- if (SE.isLoopInvariant(LSCEV, L))
- NumInvariant++;
- }
+ // Limit to loops where there is something to gain from unrolling and
+ // jamming the loop. In this case, look for loads that are invariant in the
+ // outer loop and can become shared.
+ unsigned NumInvariant = 0;
+ for (BasicBlock *BB : SubLoop->getBlocks()) {
+ for (Instruction &I : *BB) {
+ if (auto *Ld = dyn_cast<LoadInst>(&I)) {
+ Value *V = Ld->getPointerOperand();
+ const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
+ if (SE.isLoopInvariant(LSCEV, L))
+ NumInvariant++;
}
}
- if (NumInvariant == 0) {
- UP.Count = 0;
- return false;
- }
+ }
+ if (NumInvariant == 0) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
+ UP.Count = 0;
+ return false;
}
- return ExplicitUnroll;
+ return false;
}
static LoopUnrollResult
@@ -284,13 +308,16 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
<< L->getHeader()->getParent()->getName() << "] Loop %"
<< L->getHeader()->getName() << "\n");
+ TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
+ if (EnableMode & TM_Disable)
+ return LoopUnrollResult::Unmodified;
+
// A loop with any unroll pragma (enabling/disabling/count/etc) is left for
// the unroller, so long as it does not explicitly have unroll_and_jam
// metadata. This means #pragma nounroll will disable unroll and jam as well
// as unrolling
- if (HasUnrollAndJamDisablePragma(L) ||
- (HasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
- !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam."))) {
+ if (HasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
+ !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
return LoopUnrollResult::Unmodified;
}
@@ -329,6 +356,19 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
+ // Save original loop IDs for after the transformation.
+ MDNode *OrigOuterLoopID = L->getLoopID();
+ MDNode *OrigSubLoopID = SubLoop->getLoopID();
+
+ // To assign the loop id of the epilogue, assign it before unrolling it so it
+ // is applied to every inner loop of the epilogue. We later apply the loop ID
+ // for the jammed inner loop.
+ Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
+ OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+ LLVMLoopUnrollAndJamFollowupRemainderInner});
+ if (NewInnerEpilogueLoopID.hasValue())
+ SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
+
// Find trip count and trip multiple
unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
@@ -344,9 +384,39 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
if (OuterTripCount && UP.Count > OuterTripCount)
UP.Count = OuterTripCount;
- LoopUnrollResult UnrollResult =
- UnrollAndJamLoop(L, UP.Count, OuterTripCount, OuterTripMultiple,
- UP.UnrollRemainder, LI, &SE, &DT, &AC, &ORE);
+ Loop *EpilogueOuterLoop = nullptr;
+ LoopUnrollResult UnrollResult = UnrollAndJamLoop(
+ L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
+ &SE, &DT, &AC, &ORE, &EpilogueOuterLoop);
+
+ // Assign new loop attributes.
+ if (EpilogueOuterLoop) {
+ Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
+ OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+ LLVMLoopUnrollAndJamFollowupRemainderOuter});
+ if (NewOuterEpilogueLoopID.hasValue())
+ EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
+ }
+
+ Optional<MDNode *> NewInnerLoopID =
+ makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+ LLVMLoopUnrollAndJamFollowupInner});
+ if (NewInnerLoopID.hasValue())
+ SubLoop->setLoopID(NewInnerLoopID.getValue());
+ else
+ SubLoop->setLoopID(OrigSubLoopID);
+
+ if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
+ Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
+ OrigOuterLoopID,
+ {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
+ if (NewOuterLoopID.hasValue()) {
+ L->setLoopID(NewOuterLoopID.getValue());
+
+ // Do not setLoopAlreadyUnrolled if a followup was given.
+ return UnrollResult;
+ }
+ }
// If loop has an unroll count pragma or unrolled by explicitly set count
// mark loop as unrolled to prevent unrolling beyond that requested.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index e955821effa0..38b80f48ed0e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -540,7 +540,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
}
}
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
@@ -661,11 +661,6 @@ static bool HasUnrollEnablePragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
}
-// Returns true if the loop has an unroll(disable) pragma.
-static bool HasUnrollDisablePragma(const Loop *L) {
- return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
-}
-
// Returns true if the loop has an runtime unroll(disable) pragma.
static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
@@ -713,12 +708,19 @@ static uint64_t getUnrolledLoopSize(
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
+// Unless IgnoreUser is true, will also use metadata and command-line options
+// that are specific to to the LoopUnroll pass (which, for instance, are
+// irrelevant for the LoopUnrollAndJam pass).
+// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
+// many LoopUnroll-specific options. The shared functionality should be
+// refactored into it own function.
bool llvm::computeUnrollCount(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
+
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
@@ -801,7 +803,7 @@ bool llvm::computeUnrollCount(
}
}
- // 4th priority is loop peeling
+ // 4th priority is loop peeling.
computePeelCount(L, LoopSize, UP, TripCount, SE);
if (UP.PeelCount) {
UP.Runtime = false;
@@ -963,13 +965,15 @@ static LoopUnrollResult tryToUnrollLoop(
Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
- Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,
- Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,
- Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {
+ bool OnlyWhenForced, Optional<unsigned> ProvidedCount,
+ Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
+ Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
+ Optional<bool> ProvidedAllowPeeling) {
LLVM_DEBUG(dbgs() << "Loop Unroll: F["
<< L->getHeader()->getParent()->getName() << "] Loop %"
<< L->getHeader()->getName() << "\n");
- if (HasUnrollDisablePragma(L))
+ TransformationMode TM = hasUnrollTransformation(L);
+ if (TM & TM_Disable)
return LoopUnrollResult::Unmodified;
if (!L->isLoopSimplifyForm()) {
LLVM_DEBUG(
@@ -977,6 +981,11 @@ static LoopUnrollResult tryToUnrollLoop(
return LoopUnrollResult::Unmodified;
}
+ // When automtatic unrolling is disabled, do not unroll unless overridden for
+ // this loop.
+ if (OnlyWhenForced && !(TM & TM_Enable))
+ return LoopUnrollResult::Unmodified;
+
unsigned NumInlineCandidates;
bool NotDuplicatable;
bool Convergent;
@@ -1066,14 +1075,39 @@ static LoopUnrollResult tryToUnrollLoop(
if (TripCount && UP.Count > TripCount)
UP.Count = TripCount;
+ // Save loop properties before it is transformed.
+ MDNode *OrigLoopID = L->getLoopID();
+
// Unroll the loop.
+ Loop *RemainderLoop = nullptr;
LoopUnrollResult UnrollResult = UnrollLoop(
L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
- LI, &SE, &DT, &AC, &ORE, PreserveLCSSA);
+ LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
+ if (RemainderLoop) {
+ Optional<MDNode *> RemainderLoopID =
+ makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
+ LLVMLoopUnrollFollowupRemainder});
+ if (RemainderLoopID.hasValue())
+ RemainderLoop->setLoopID(RemainderLoopID.getValue());
+ }
+
+ if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
+ Optional<MDNode *> NewLoopID =
+ makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
+ LLVMLoopUnrollFollowupUnrolled});
+ if (NewLoopID.hasValue()) {
+ L->setLoopID(NewLoopID.getValue());
+
+ // Do not setLoopAlreadyUnrolled if loop attributes have been specified
+ // explicitly.
+ return UnrollResult;
+ }
+ }
+
// If loop has an unroll count pragma or unrolled by explicitly set count
// mark loop as unrolled to prevent unrolling beyond that requested.
// If the loop was peeled, we already "used up" the profile information
@@ -1092,6 +1126,12 @@ public:
static char ID; // Pass ID, replacement for typeid
int OptLevel;
+
+ /// If false, use a cost model to determine whether unrolling of a loop is
+ /// profitable. If true, only loops that explicitly request unrolling via
+ /// metadata are considered. All other loops are skipped.
+ bool OnlyWhenForced;
+
Optional<unsigned> ProvidedCount;
Optional<unsigned> ProvidedThreshold;
Optional<bool> ProvidedAllowPartial;
@@ -1099,15 +1139,16 @@ public:
Optional<bool> ProvidedUpperBound;
Optional<bool> ProvidedAllowPeeling;
- LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
+ LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
+ Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None,
Optional<bool> AllowPeeling = None)
- : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
- ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
- ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
- ProvidedAllowPeeling(AllowPeeling) {
+ : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
+ ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold),
+ ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
+ ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -1130,8 +1171,8 @@ public:
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
LoopUnrollResult Result = tryToUnrollLoop(
- L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount,
- ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
+ L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
+ ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
ProvidedUpperBound, ProvidedAllowPeeling);
if (Result == LoopUnrollResult::FullyUnrolled)
@@ -1161,14 +1202,16 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
- int AllowPartial, int Runtime, int UpperBound,
+Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
+ int Threshold, int Count, int AllowPartial,
+ int Runtime, int UpperBound,
int AllowPeeling) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
return new LoopUnroll(
- OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
+ OptLevel, OnlyWhenForced,
+ Threshold == -1 ? None : Optional<unsigned>(Threshold),
Count == -1 ? None : Optional<unsigned>(Count),
AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
Runtime == -1 ? None : Optional<bool>(Runtime),
@@ -1176,8 +1219,8 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
}
-Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
- return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
+Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) {
+ return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0);
}
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1207,7 +1250,8 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
bool Changed =
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
- /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
+ /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
+ /*Count*/ None,
/*Threshold*/ None, /*AllowPartial*/ false,
/*Runtime*/ false, /*UpperBound*/ false,
/*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
@@ -1333,23 +1377,21 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
Loop *ParentL = L.getParentLoop();
#endif
- // The API here is quite complex to call, but there are only two interesting
- // states we support: partial and full (or "simple") unrolling. However, to
- // enable these things we actually pass "None" in for the optional to avoid
- // providing an explicit choice.
- Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
- AllowPeeling;
// Check if the profile summary indicates that the profiled application
// has a huge working set size, in which case we disable peeling to avoid
// bloating it further.
+ Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
if (PSI && PSI->hasHugeWorkingSetSize())
- AllowPeeling = false;
+ LocalAllowPeeling = false;
std::string LoopName = L.getName();
- LoopUnrollResult Result =
- tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
- /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
- /*Threshold*/ None, AllowPartialParam, RuntimeParam,
- UpperBoundParam, AllowPeeling);
+ // The API here is quite complex to call and we allow to select some
+ // flavors of unrolling during construction time (by setting UnrollOpts).
+ LoopUnrollResult Result = tryToUnrollLoop(
+ &L, DT, &LI, SE, TTI, AC, ORE,
+ /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
+ /*Count*/ None,
+ /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
+ UnrollOpts.AllowUpperBound, LocalAllowPeeling);
Changed |= Result != LoopUnrollResult::Unmodified;
// The parent must not be damaged by unrolling!
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 6aad077ff19e..4a089dfa7dbf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -28,18 +28,19 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
@@ -65,8 +66,10 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
@@ -180,11 +183,13 @@ namespace {
Loop *currentLoop = nullptr;
DominatorTree *DT = nullptr;
+ MemorySSA *MSSA = nullptr;
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
BasicBlock *loopHeader = nullptr;
BasicBlock *loopPreheader = nullptr;
bool SanitizeMemory;
- LoopSafetyInfo SafetyInfo;
+ SimpleLoopSafetyInfo SafetyInfo;
// LoopBlocks contains all of the basic blocks of the loop, including the
// preheader of the loop, the body of the loop, and the exit blocks of the
@@ -214,8 +219,12 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (EnableMSSALoopDependency) {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
if (hasBranchDivergence)
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
getLoopAnalysisUsage(AU);
}
@@ -237,11 +246,11 @@ namespace {
bool TryTrivialLoopUnswitch(bool &Changed);
bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
- TerminatorInst *TI = nullptr);
+ Instruction *TI = nullptr);
void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
- BasicBlock *ExitBlock, TerminatorInst *TI);
+ BasicBlock *ExitBlock, Instruction *TI);
void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
- TerminatorInst *TI);
+ Instruction *TI);
void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val, bool isEqual);
@@ -249,8 +258,7 @@ namespace {
void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
BasicBlock *TrueDest,
BasicBlock *FalseDest,
- BranchInst *OldBranch,
- TerminatorInst *TI);
+ BranchInst *OldBranch, Instruction *TI);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
@@ -383,7 +391,8 @@ INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
@@ -515,20 +524,33 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LPM = &LPM_Ref;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ if (EnableMSSALoopDependency) {
+ MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+ assert(DT && "Cannot update MemorySSA without a valid DomTree.");
+ }
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory);
if (SanitizeMemory)
- computeLoopSafetyInfo(&SafetyInfo, L);
+ SafetyInfo.computeLoopSafetyInfo(L);
+
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
bool Changed = false;
do {
assert(currentLoop->isLCSSAForm(*DT));
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
redoLoop = false;
Changed |= processCurrentLoop();
} while(redoLoop);
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
return Changed;
}
@@ -690,7 +712,7 @@ bool LoopUnswitch::processCurrentLoop() {
// loop.
for (Loop::block_iterator I = currentLoop->block_begin(),
E = currentLoop->block_end(); I != E; ++I) {
- TerminatorInst *TI = (*I)->getTerminator();
+ Instruction *TI = (*I)->getTerminator();
// Unswitching on a potentially uninitialized predicate is not
// MSan-friendly. Limit this to the cases when the original predicate is
@@ -699,7 +721,7 @@ bool LoopUnswitch::processCurrentLoop() {
// This is a workaround for the discrepancy between LLVM IR and MSan
// semantics. See PR28054 for more details.
if (SanitizeMemory &&
- !isGuaranteedToExecute(*TI, DT, currentLoop, &SafetyInfo))
+ !SafetyInfo.isGuaranteedToExecute(*TI, DT, currentLoop))
continue;
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -853,7 +875,7 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
/// simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
- TerminatorInst *TI) {
+ Instruction *TI) {
// Check to see if it would be profitable to unswitch current loop.
if (!BranchesInfo.CostAllowsUnswitching()) {
LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
@@ -864,7 +886,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
return false;
}
if (hasBranchDivergence &&
- getAnalysis<DivergenceAnalysis>().isDivergent(LoopCond)) {
+ getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
<< currentLoop->getHeader()->getName()
<< " at non-trivial condition '" << *Val
@@ -908,7 +930,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
BasicBlock *TrueDest,
BasicBlock *FalseDest,
BranchInst *OldBranch,
- TerminatorInst *TI) {
+ Instruction *TI) {
assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
assert(TrueDest != FalseDest && "Branch targets should be different");
// Insert a conditional branch on LIC to the two preheaders. The original
@@ -952,13 +974,16 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) {
Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
}
-
DT->applyUpdates(Updates);
+
+ if (MSSAU)
+ MSSAU->applyUpdates(Updates, *DT);
}
// If either edge is critical, split it. This helps preserve LoopSimplify
// form for enclosing loops.
- auto Options = CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA();
+ auto Options =
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA();
SplitCriticalEdge(BI, 0, Options);
SplitCriticalEdge(BI, 1, Options);
}
@@ -970,7 +995,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
/// outside of the loop and updating loop info.
void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock,
- TerminatorInst *TI) {
+ Instruction *TI) {
LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
<< loopHeader->getName() << " [" << L->getBlocks().size()
<< " blocks] in Function "
@@ -984,7 +1009,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
// First step, split the preheader, so that we know that there is a safe place
// to insert the conditional branch. We will change loopPreheader to have a
// conditional branch on Cond.
- BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI);
+ BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI, MSSAU.get());
// Now that we have a place to insert the conditional branch, create a place
// to branch to: this is the exit block out of the loop that we should
@@ -995,7 +1020,8 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
// without actually branching to it (the exit block should be dominated by the
// loop header, not the preheader).
assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
- BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI);
+ BasicBlock *NewExit =
+ SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get());
// Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
@@ -1015,6 +1041,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
// particular value, rewrite the loop with this info. We know that this will
// at least eliminate the old branch.
RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
+
++NumTrivial;
}
@@ -1026,7 +1053,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
/// condition.
bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
BasicBlock *CurrentBB = currentLoop->getHeader();
- TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+ Instruction *CurrentTerm = CurrentBB->getTerminator();
LLVMContext &Context = CurrentBB->getContext();
// If loop header has only one reachable successor (currently via an
@@ -1190,7 +1217,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI,
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(),
/*PreserveLCSSA*/ true);
}
}
@@ -1199,7 +1226,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
/// Split it into loop versions and test the condition outside of either loop.
/// Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
- Loop *L, TerminatorInst *TI) {
+ Loop *L, Instruction *TI) {
Function *F = loopHeader->getParent();
LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
<< loopHeader->getName() << " [" << L->getBlocks().size()
@@ -1216,7 +1243,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// First step, split the preheader and exit blocks, and add these blocks to
// the LoopBlocks list.
- BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, DT, LI);
+ BasicBlock *NewPreheader =
+ SplitEdge(loopPreheader, loopHeader, DT, LI, MSSAU.get());
LoopBlocks.push_back(NewPreheader);
// We want the loop to come after the preheader, but before the exit blocks.
@@ -1318,10 +1346,24 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
"Preheader splitting did not work correctly!");
+ if (MSSAU) {
+ // Update MemorySSA after cloning, and before splitting to unreachables,
+ // since that invalidates the 1:1 mapping of clones in VMap.
+ LoopBlocksRPO LBRPO(L);
+ LBRPO.perform(LI);
+ MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap);
+ }
+
// Emit the new branch that selects between the two versions of this loop.
EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
TI);
LPM->deleteSimpleAnalysisValue(OldBR, L);
+ if (MSSAU) {
+ // Update MemoryPhis in Exit blocks.
+ MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
// The OldBr was replaced by a new one and removed (but not erased) by
// EmitPreheaderBranchOnCondition. It is no longer needed, so delete it.
@@ -1347,6 +1389,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
LICHandle && !isa<Constant>(LICHandle))
RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
+
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
}
/// Remove all instances of I from the worklist vector specified.
@@ -1485,7 +1530,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// and hooked up so as to preserve the loop structure, because
// trying to update it is complicated. So instead we preserve the
// loop structure and put the block on a dead code path.
- SplitEdge(Switch, SISucc, DT, LI);
+ SplitEdge(Switch, SISucc, DT, LI, MSSAU.get());
// Compute the successors instead of relying on the return value
// of SplitEdge, since it may have split the switch successor
// after PHI nodes.
@@ -1539,6 +1584,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
Worklist.push_back(Use);
LPM->deleteSimpleAnalysisValue(I, L);
RemoveFromWorklist(I, Worklist);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
I->eraseFromParent();
++NumSimplify;
continue;
@@ -1578,6 +1625,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// Move all of the successor contents from Succ to Pred.
Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
Succ->begin(), Succ->end());
+ if (MSSAU)
+ MSSAU->moveAllAfterMergeBlocks(Succ, Pred, BI);
LPM->deleteSimpleAnalysisValue(BI, L);
RemoveFromWorklist(BI, Worklist);
BI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 06e86081e8a0..83861b98fbd8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -360,10 +360,11 @@ bool LoopVersioningLICM::legalLoopMemoryAccesses() {
bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
assert(I != nullptr && "Null instruction found!");
// Check function call safety
- if (isa<CallInst>(I) && !AA->doesNotAccessMemory(CallSite(I))) {
- LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
- return false;
- }
+ if (auto *Call = dyn_cast<CallBase>(I))
+ if (!AA->doesNotAccessMemory(Call)) {
+ LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
+ return false;
+ }
// Avoid loops with possiblity of throw
if (I->mayThrow()) {
LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n");
@@ -594,6 +595,11 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipLoop(L))
return false;
+
+ // Do not do the transformation if disabled by metadata.
+ if (hasLICMVersioningTransformation(L) & TM_Disable)
+ return false;
+
// Get Analysis information.
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
@@ -628,6 +634,8 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Set Loop Versioning metaData for version loop.
addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData);
// Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
+ // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
+ // instructions, not loops.
addStringMetadataToLoop(LVer.getVersionedLoop(),
"llvm.mem.parallel_loop_access");
// Update version loop with aggressive aliasing assumption.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 070114a84cc5..4867b33d671f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -15,25 +15,19 @@
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
using namespace llvm;
-static cl::opt<uint32_t> PredicatePassBranchWeight(
- "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
- cl::desc("The probability of a guard failing is assumed to be the "
- "reciprocal of this value (default = 1 << 20)"));
-
namespace {
struct LowerGuardIntrinsicLegacyPass : public FunctionPass {
static char ID;
@@ -46,45 +40,6 @@ struct LowerGuardIntrinsicLegacyPass : public FunctionPass {
};
}
-static void MakeGuardControlFlowExplicit(Function *DeoptIntrinsic,
- CallInst *CI) {
- OperandBundleDef DeoptOB(*CI->getOperandBundle(LLVMContext::OB_deopt));
- SmallVector<Value *, 4> Args(std::next(CI->arg_begin()), CI->arg_end());
-
- auto *CheckBB = CI->getParent();
- auto *DeoptBlockTerm =
- SplitBlockAndInsertIfThen(CI->getArgOperand(0), CI, true);
-
- auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
-
- // SplitBlockAndInsertIfThen inserts control flow that branches to
- // DeoptBlockTerm if the condition is true. We want the opposite.
- CheckBI->swapSuccessors();
-
- CheckBI->getSuccessor(0)->setName("guarded");
- CheckBI->getSuccessor(1)->setName("deopt");
-
- if (auto *MD = CI->getMetadata(LLVMContext::MD_make_implicit))
- CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD);
-
- MDBuilder MDB(CI->getContext());
- CheckBI->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(PredicatePassBranchWeight, 1));
-
- IRBuilder<> B(DeoptBlockTerm);
- auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, "");
-
- if (DeoptIntrinsic->getReturnType()->isVoidTy()) {
- B.CreateRetVoid();
- } else {
- DeoptCall->setName("deoptcall");
- B.CreateRet(DeoptCall);
- }
-
- DeoptCall->setCallingConv(CI->getCallingConv());
- DeoptBlockTerm->eraseFromParent();
-}
-
static bool lowerGuardIntrinsic(Function &F) {
// Check if we can cheaply rule out the possibility of not having any work to
// do.
@@ -95,10 +50,8 @@ static bool lowerGuardIntrinsic(Function &F) {
SmallVector<CallInst *, 8> ToLower;
for (auto &I : instructions(F))
- if (auto *CI = dyn_cast<CallInst>(&I))
- if (auto *F = CI->getCalledFunction())
- if (F->getIntrinsicID() == Intrinsic::experimental_guard)
- ToLower.push_back(CI);
+ if (isGuard(&I))
+ ToLower.push_back(cast<CallInst>(&I));
if (ToLower.empty())
return false;
@@ -108,7 +61,7 @@ static bool lowerGuardIntrinsic(Function &F) {
DeoptIntrinsic->setCallingConv(GuardDecl->getCallingConv());
for (auto *CI : ToLower) {
- MakeGuardControlFlowExplicit(DeoptIntrinsic, CI);
+ makeGuardControlFlowExplicit(DeoptIntrinsic, CI);
CI->eraseFromParent();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/contrib/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
new file mode 100644
index 000000000000..1ba3994eba0e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -0,0 +1,120 @@
+//===- MakeGuardsExplicit.cpp - Turn guard intrinsics into guard branches -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the @llvm.experimental.guard intrinsic to the new form of
+// guard represented as widenable explicit branch to the deopt block. The
+// difference between this pass and LowerGuardIntrinsic is that after this pass
+// the guard represented as intrinsic:
+//
+// call void(i1, ...) @llvm.experimental.guard(i1 %old_cond) [ "deopt"() ]
+//
+// transforms to a guard represented as widenable explicit branch:
+//
+// %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+// br i1 (%old_cond & %widenable_cond), label %guarded, label %deopt
+//
+// Here:
+// - The semantics of @llvm.experimental.widenable.condition allows to replace
+// %widenable_cond with the construction (%widenable_cond & %any_other_cond)
+// without loss of correctness;
+// - %guarded is the lower part of old guard intrinsic's parent block split by
+// the intrinsic call;
+// - %deopt is a block containing a sole call to @llvm.experimental.deoptimize
+// intrinsic.
+//
+// Therefore, this branch preserves the property of widenability.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
+
+using namespace llvm;
+
+namespace {
+struct MakeGuardsExplicitLegacyPass : public FunctionPass {
+ static char ID;
+ MakeGuardsExplicitLegacyPass() : FunctionPass(ID) {
+ initializeMakeGuardsExplicitLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+}
+
+static void turnToExplicitForm(CallInst *Guard, Function *DeoptIntrinsic) {
+ // Replace the guard with an explicit branch (just like in GuardWidening).
+ BasicBlock *BB = Guard->getParent();
+ makeGuardControlFlowExplicit(DeoptIntrinsic, Guard);
+ BranchInst *ExplicitGuard = cast<BranchInst>(BB->getTerminator());
+ assert(ExplicitGuard->isConditional() && "Must be!");
+
+ // We want the guard to be expressed as explicit control flow, but still be
+ // widenable. For that, we add Widenable Condition intrinsic call to the
+ // guard's condition.
+ IRBuilder<> B(ExplicitGuard);
+ auto *WidenableCondition =
+ B.CreateIntrinsic(Intrinsic::experimental_widenable_condition,
+ {}, {}, nullptr, "widenable_cond");
+ WidenableCondition->setCallingConv(Guard->getCallingConv());
+ auto *NewCond =
+ B.CreateAnd(ExplicitGuard->getCondition(), WidenableCondition);
+ NewCond->setName("exiplicit_guard_cond");
+ ExplicitGuard->setCondition(NewCond);
+ Guard->eraseFromParent();
+}
+
+static bool explicifyGuards(Function &F) {
+ // Check if we can cheaply rule out the possibility of not having any work to
+ // do.
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ if (!GuardDecl || GuardDecl->use_empty())
+ return false;
+
+ SmallVector<CallInst *, 8> GuardIntrinsics;
+ for (auto &I : instructions(F))
+ if (isGuard(&I))
+ GuardIntrinsics.push_back(cast<CallInst>(&I));
+
+ if (GuardIntrinsics.empty())
+ return false;
+
+ auto *DeoptIntrinsic = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::experimental_deoptimize, {F.getReturnType()});
+ DeoptIntrinsic->setCallingConv(GuardDecl->getCallingConv());
+
+ for (auto *Guard : GuardIntrinsics)
+ turnToExplicitForm(Guard, DeoptIntrinsic);
+
+ return true;
+}
+
+bool MakeGuardsExplicitLegacyPass::runOnFunction(Function &F) {
+ return explicifyGuards(F);
+}
+
+char MakeGuardsExplicitLegacyPass::ID = 0;
+INITIALIZE_PASS(MakeGuardsExplicitLegacyPass, "make-guards-explicit",
+ "Lower the guard intrinsic to explicit control flow form",
+ false, false)
+
+PreservedAnalyses MakeGuardsExplicitPass::run(Function &F,
+ FunctionAnalysisManager &) {
+ if (explicifyGuards(F))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3b74421a47a0..ced923d6973d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -398,7 +398,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
MemsetRanges Ranges(DL);
BasicBlock::iterator BI(StartInst);
- for (++BI; !isa<TerminatorInst>(BI); ++BI) {
+ for (++BI; !BI->isTerminator(); ++BI) {
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
// don't even allow readonly here because we don't want something like:
@@ -413,7 +413,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!NextStore->isSimple()) break;
// Check to see if this stored value is of the same byte-splattable value.
- if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
+ if (isa<UndefValue>(ByteVal) && StoredByte)
+ ByteVal = StoredByte;
+ if (ByteVal != StoredByte)
break;
// Check to see if this store is to a constant offset from the start ptr.
@@ -543,8 +546,8 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
// Memory locations of lifted instructions.
SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
- // Lifted callsites.
- SmallVector<ImmutableCallSite, 8> CallSites;
+ // Lifted calls.
+ SmallVector<const CallBase *, 8> Calls;
const MemoryLocation LoadLoc = MemoryLocation::get(LI);
@@ -562,10 +565,9 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
});
if (!NeedLift)
- NeedLift =
- llvm::any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) {
- return isModOrRefSet(AA.getModRefInfo(C, CS));
- });
+ NeedLift = llvm::any_of(Calls, [C, &AA](const CallBase *Call) {
+ return isModOrRefSet(AA.getModRefInfo(C, Call));
+ });
}
if (!NeedLift)
@@ -576,12 +578,12 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
// none of them may modify its source.
if (isModSet(AA.getModRefInfo(C, LoadLoc)))
return false;
- else if (auto CS = ImmutableCallSite(C)) {
+ else if (const auto *Call = dyn_cast<CallBase>(C)) {
// If we can't lift this before P, it's game over.
- if (isModOrRefSet(AA.getModRefInfo(P, CS)))
+ if (isModOrRefSet(AA.getModRefInfo(P, Call)))
return false;
- CallSites.push_back(CS);
+ Calls.push_back(Call);
} else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
// If we can't lift this before P, it's game over.
auto ML = MemoryLocation::get(C);
@@ -672,13 +674,11 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (UseMemMove)
M = Builder.CreateMemMove(
SI->getPointerOperand(), findStoreAlignment(DL, SI),
- LI->getPointerOperand(), findLoadAlignment(DL, LI), Size,
- SI->isVolatile());
+ LI->getPointerOperand(), findLoadAlignment(DL, LI), Size);
else
M = Builder.CreateMemCpy(
SI->getPointerOperand(), findStoreAlignment(DL, SI),
- LI->getPointerOperand(), findLoadAlignment(DL, LI), Size,
- SI->isVolatile());
+ LI->getPointerOperand(), findLoadAlignment(DL, LI), Size);
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
<< *M << "\n");
@@ -767,8 +767,8 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!Align)
Align = DL.getABITypeAlignment(T);
IRBuilder<> Builder(SI);
- auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
- Size, Align, SI->isVolatile());
+ auto *M =
+ Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, Align);
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
@@ -916,8 +916,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
continue;
}
if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
- if (IT->getIntrinsicID() == Intrinsic::lifetime_start ||
- IT->getIntrinsicID() == Intrinsic::lifetime_end)
+ if (IT->isLifetimeStartOrEnd())
continue;
if (U != C && U != cpy)
@@ -942,10 +941,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
AliasAnalysis &AA = LookupAliasAnalysis();
- ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
+ ModRefInfo MR = AA.getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
// If necessary, perform additional analysis.
if (isModOrRefSet(MR))
- MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
+ MR = AA.callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), &DT);
if (isModOrRefSet(MR))
return false;
@@ -993,8 +992,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// handled here, but combineMetadata doesn't support them yet
unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias,
- LLVMContext::MD_invariant_group};
- combineMetadata(C, cpy, KnownIDs);
+ LLVMContext::MD_invariant_group,
+ LLVMContext::MD_access_group};
+ combineMetadata(C, cpy, KnownIDs, true);
// Remove the memcpy.
MD->removeInstruction(cpy);
@@ -1056,6 +1056,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
UseMemMove = true;
// If all checks passed, then we can transform M.
+ LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
+ << *MDep << '\n' << *M << '\n');
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
@@ -1141,6 +1143,21 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
return true;
}
+/// Determine whether the instruction has undefined content for the given Size,
+/// either because it was freshly alloca'd or started its lifetime.
+static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
+ if (isa<AllocaInst>(I))
+ return true;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ if (LTSize->getZExtValue() >= Size->getZExtValue())
+ return true;
+
+ return false;
+}
+
/// Transform memcpy to memset when its source was just memset.
/// In other words, turn:
/// \code
@@ -1164,12 +1181,27 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
return false;
- ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
+ // A known memset size is required.
ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
+ if (!MemSetSize)
+ return false;
+
// Make sure the memcpy doesn't read any more than what the memset wrote.
// Don't worry about sizes larger than i64.
- if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
- return false;
+ ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
+ if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
+ // If the memcpy is larger than the memset, but the memory was undef prior
+ // to the memset, we can just ignore the tail. Technically we're only
+ // interested in the bytes from MemSetSize..CopySize here, but as we can't
+ // easily represent this location, we use the full 0..CopySize range.
+ MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
+ MemDepResult DepInfo = MD->getPointerDependencyFrom(
+ MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
+ if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ CopySize = MemSetSize;
+ else
+ return false;
+ }
IRBuilder<> Builder(MemCpy);
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
@@ -1249,19 +1281,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep);
} else if (SrcDepInfo.isDef()) {
- Instruction *I = SrcDepInfo.getInst();
- bool hasUndefContents = false;
-
- if (isa<AllocaInst>(I)) {
- hasUndefContents = true;
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
- if (LTSize->getZExtValue() >= CopySize->getZExtValue())
- hasUndefContents = true;
- }
-
- if (hasUndefContents) {
+ if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
MD->removeInstruction(M);
M->eraseFromParent();
++NumMemCpyInstr;
@@ -1320,7 +1340,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemoryLocation(ByValArg, ByValSize), true,
+ MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true,
CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
if (!DepInfo.isClobber())
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index ff0183a8ea2d..69fd8b163a07 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -41,6 +41,15 @@ namespace {
#define DEBUG_TYPE "mergeicmps"
+// Returns true if the instruction is a simple load or a simple store
+static bool isSimpleLoadOrStore(const Instruction *I) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isSimple();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isSimple();
+ return false;
+}
+
// A BCE atom.
struct BCEAtom {
BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
@@ -81,14 +90,15 @@ BCEAtom visitICmpLoadOperand(Value *const Val) {
LLVM_DEBUG(dbgs() << "used outside of block\n");
return {};
}
- if (LoadI->isVolatile()) {
- LLVM_DEBUG(dbgs() << "volatile\n");
+ // Do not optimize atomic loads to non-atomic memcmp
+ if (!LoadI->isSimple()) {
+ LLVM_DEBUG(dbgs() << "volatile or atomic\n");
return {};
}
Value *const Addr = LoadI->getOperand(0);
if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
LLVM_DEBUG(dbgs() << "GEP\n");
- if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
+ if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
LLVM_DEBUG(dbgs() << "used outside of block\n");
return {};
}
@@ -150,18 +160,19 @@ class BCECmpBlock {
// Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
// instructions in the block.
- bool canSplit() const;
+ bool canSplit(AliasAnalysis *AA) const;
// Return true if this all the relevant instructions in the BCE-cmp-block can
// be sunk below this instruction. By doing this, we know we can separate the
// BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
// block.
- bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &) const;
+ bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &,
+ AliasAnalysis *AA) const;
// We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
// instructions. Split the old block and move all non-BCE-cmp-insts into the
// new parent block.
- void split(BasicBlock *NewParent) const;
+ void split(BasicBlock *NewParent, AliasAnalysis *AA) const;
// The basic block where this comparison happens.
BasicBlock *BB = nullptr;
@@ -179,12 +190,21 @@ private:
};
bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
- DenseSet<Instruction *> &BlockInsts) const {
+ DenseSet<Instruction *> &BlockInsts,
+ AliasAnalysis *AA) const {
// If this instruction has side effects and its in middle of the BCE cmp block
// instructions, then bail for now.
- // TODO: use alias analysis to tell whether there is real interference.
- if (Inst->mayHaveSideEffects())
- return false;
+ if (Inst->mayHaveSideEffects()) {
+ // Bail if this is not a simple load or store
+ if (!isSimpleLoadOrStore(Inst))
+ return false;
+ // Disallow stores that might alias the BCE operands
+ MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI);
+ MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI);
+ if (isModSet(AA->getModRefInfo(Inst, LLoc)) ||
+ isModSet(AA->getModRefInfo(Inst, RLoc)))
+ return false;
+ }
// Make sure this instruction does not use any of the BCE cmp block
// instructions as operand.
for (auto BI : BlockInsts) {
@@ -194,14 +214,15 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
return true;
}
-void BCECmpBlock::split(BasicBlock *NewParent) const {
+void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
DenseSet<Instruction *> BlockInsts(
{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
llvm::SmallVector<Instruction *, 4> OtherInsts;
for (Instruction &Inst : *BB) {
if (BlockInsts.count(&Inst))
continue;
- assert(canSinkBCECmpInst(&Inst, BlockInsts) && "Split unsplittable block");
+ assert(canSinkBCECmpInst(&Inst, BlockInsts, AA) &&
+ "Split unsplittable block");
// This is a non-BCE-cmp-block instruction. And it can be separated
// from the BCE-cmp-block instruction.
OtherInsts.push_back(&Inst);
@@ -213,12 +234,12 @@ void BCECmpBlock::split(BasicBlock *NewParent) const {
}
}
-bool BCECmpBlock::canSplit() const {
+bool BCECmpBlock::canSplit(AliasAnalysis *AA) const {
DenseSet<Instruction *> BlockInsts(
{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
for (Instruction &Inst : *BB) {
if (!BlockInsts.count(&Inst)) {
- if (!canSinkBCECmpInst(&Inst, BlockInsts))
+ if (!canSinkBCECmpInst(&Inst, BlockInsts, AA))
return false;
}
}
@@ -262,8 +283,9 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI,
if (!Lhs.Base()) return {};
auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
if (!Rhs.Base()) return {};
+ const auto &DL = CmpI->getModule()->getDataLayout();
return BCECmpBlock(std::move(Lhs), std::move(Rhs),
- CmpI->getOperand(0)->getType()->getScalarSizeInBits());
+ DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
}
return {};
}
@@ -324,7 +346,8 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
// A chain of comparisons.
class BCECmpChain {
public:
- BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi);
+ BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+ AliasAnalysis *AA);
int size() const { return Comparisons_.size(); }
@@ -332,7 +355,7 @@ class BCECmpChain {
void dump() const;
#endif // MERGEICMPS_DOT_ON
- bool simplify(const TargetLibraryInfo *const TLI);
+ bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
private:
static bool IsContiguous(const BCECmpBlock &First,
@@ -348,7 +371,7 @@ class BCECmpChain {
// null, the merged block will link to the phi block.
void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
BasicBlock *const NextBBInChain, PHINode &Phi,
- const TargetLibraryInfo *const TLI);
+ const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
PHINode &Phi_;
std::vector<BCECmpBlock> Comparisons_;
@@ -356,7 +379,8 @@ class BCECmpChain {
BasicBlock *EntryBlock_;
};
-BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
+BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+ AliasAnalysis *AA)
: Phi_(Phi) {
assert(!Blocks.empty() && "a chain should have at least one block");
// Now look inside blocks to check for BCE comparisons.
@@ -388,7 +412,7 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
// and start anew.
//
// NOTE: we only handle block with single predecessor for now.
- if (Comparison.canSplit()) {
+ if (Comparison.canSplit(AA)) {
LLVM_DEBUG(dbgs()
<< "Split initial block '" << Comparison.BB->getName()
<< "' that does extra work besides compare\n");
@@ -442,10 +466,9 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
#endif // MERGEICMPS_DOT_ON
// Reorder blocks by LHS. We can do that without changing the
// semantics because we are only accessing dereferencable memory.
- llvm::sort(Comparisons_.begin(), Comparisons_.end(),
- [](const BCECmpBlock &a, const BCECmpBlock &b) {
- return a.Lhs() < b.Lhs();
- });
+ llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) {
+ return a.Lhs() < b.Lhs();
+ });
#ifdef MERGEICMPS_DOT_ON
errs() << "AFTER REORDERING:\n\n";
dump();
@@ -475,7 +498,8 @@ void BCECmpChain::dump() const {
}
#endif // MERGEICMPS_DOT_ON
-bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
+bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
+ AliasAnalysis *AA) {
// First pass to check if there is at least one merge. If not, we don't do
// anything and we keep analysis passes intact.
{
@@ -523,13 +547,13 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
// Merge all previous comparisons and start a new merge block.
mergeComparisons(
makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
- Comparisons_[I].BB, Phi_, TLI);
+ Comparisons_[I].BB, Phi_, TLI, AA);
NumMerged = 1;
}
}
mergeComparisons(makeArrayRef(Comparisons_)
.slice(Comparisons_.size() - NumMerged, NumMerged),
- nullptr, Phi_, TLI);
+ nullptr, Phi_, TLI, AA);
return true;
}
@@ -537,7 +561,8 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
BasicBlock *const NextBBInChain,
PHINode &Phi,
- const TargetLibraryInfo *const TLI) {
+ const TargetLibraryInfo *const TLI,
+ AliasAnalysis *AA) {
assert(!Comparisons.empty());
const auto &FirstComparison = *Comparisons.begin();
BasicBlock *const BB = FirstComparison.BB;
@@ -550,7 +575,7 @@ void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
[](const BCECmpBlock &B) { return B.RequireSplit; });
if (C != Comparisons.end())
- C->split(EntryBlock_);
+ C->split(EntryBlock_, AA);
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
const auto TotalSize =
@@ -666,7 +691,8 @@ std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
return Blocks;
}
-bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) {
+bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
+ AliasAnalysis *AA) {
LLVM_DEBUG(dbgs() << "processPhi()\n");
if (Phi.getNumIncomingValues() <= 1) {
LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
@@ -724,14 +750,14 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) {
const auto Blocks =
getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues());
if (Blocks.empty()) return false;
- BCECmpChain CmpChain(Blocks, Phi);
+ BCECmpChain CmpChain(Blocks, Phi, AA);
if (CmpChain.size() < 2) {
LLVM_DEBUG(dbgs() << "skip: only one compare block\n");
return false;
}
- return CmpChain.simplify(TLI);
+ return CmpChain.simplify(TLI, AA);
}
class MergeICmps : public FunctionPass {
@@ -746,7 +772,8 @@ class MergeICmps : public FunctionPass {
if (skipFunction(F)) return false;
const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto PA = runImpl(F, &TLI, &TTI);
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto PA = runImpl(F, &TLI, &TTI, AA);
return !PA.areAllPreserved();
}
@@ -754,14 +781,16 @@ class MergeICmps : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI);
+ const TargetTransformInfo *TTI, AliasAnalysis *AA);
};
PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ AliasAnalysis *AA) {
LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
// We only try merging comparisons if the target wants to expand memcmp later.
@@ -777,7 +806,7 @@ PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
// A Phi operation is always first in a basic block.
if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
- MadeChange |= processPhi(*Phi, TLI);
+ MadeChange |= processPhi(*Phi, TLI, AA);
}
if (MadeChange) return PreservedAnalyses::none();
@@ -791,6 +820,7 @@ INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
"Merge contiguous icmps into a memcmp", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
"Merge contiguous icmps into a memcmp", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 3464b759280f..ee21feca8d2c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -211,6 +211,7 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
&BB->front());
+ NewPN->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
NewPN->addIncoming(Opd1, S0->getParent());
NewPN->addIncoming(Opd2, S1->getParent());
return NewPN;
diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 3e47e9441d15..7cbb0fe70f82 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -657,8 +657,8 @@ public:
TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
const DataLayout &DL)
: F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
- PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) {
- }
+ PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)),
+ SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false) {}
bool runGVN();
@@ -777,7 +777,7 @@ private:
// Reachability handling.
void updateReachableEdge(BasicBlock *, BasicBlock *);
- void processOutgoingEdges(TerminatorInst *, BasicBlock *);
+ void processOutgoingEdges(Instruction *, BasicBlock *);
Value *findConditionEquivalence(Value *) const;
// Elimination.
@@ -959,8 +959,7 @@ static bool isCopyOfAPHI(const Value *V) {
// order. The BlockInstRange numbers are generated in an RPO walk of the basic
// blocks.
void NewGVN::sortPHIOps(MutableArrayRef<ValPair> Ops) const {
- llvm::sort(Ops.begin(), Ops.end(),
- [&](const ValPair &P1, const ValPair &P2) {
+ llvm::sort(Ops, [&](const ValPair &P1, const ValPair &P2) {
return BlockInstRange.lookup(P1.second).first <
BlockInstRange.lookup(P2.second).first;
});
@@ -1087,9 +1086,13 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
CongruenceClass *CC = ValueToClass.lookup(V);
if (CC) {
if (CC->getLeader() && CC->getLeader() != I) {
- // Don't add temporary instructions to the user lists.
- if (!AllTempInstructions.count(I))
- addAdditionalUsers(V, I);
+ // If we simplified to something else, we need to communicate
+ // that we're users of the value we simplified to.
+ if (I != V) {
+ // Don't add temporary instructions to the user lists.
+ if (!AllTempInstructions.count(I))
+ addAdditionalUsers(V, I);
+ }
return createVariableOrConstant(CC->getLeader());
}
if (CC->getDefiningExpr()) {
@@ -1752,7 +1755,7 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
return true;
});
// If we are left with no operands, it's dead.
- if (Filtered.begin() == Filtered.end()) {
+ if (empty(Filtered)) {
// If it has undef at this point, it means there are no-non-undef arguments,
// and thus, the value of the phi node must be undef.
if (HasUndef) {
@@ -2484,7 +2487,7 @@ Value *NewGVN::findConditionEquivalence(Value *Cond) const {
}
// Process the outgoing edges of a block for reachability.
-void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
+void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
// Evaluate reachability of terminator instruction.
BranchInst *BR;
if ((BR = dyn_cast<BranchInst>(TI)) && BR->isConditional()) {
@@ -2925,7 +2928,7 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
PHINodeUses.insert(UInst);
// Don't insert void terminators into the class. We don't value number
// them, and they just end up sitting in TOP.
- if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
+ if (I.isTerminator() && I.getType()->isVoidTy())
continue;
TOPClass->insert(&I);
ValueToClass[&I] = TOPClass;
@@ -3134,7 +3137,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
auto *Symbolized = createUnknownExpression(I);
performCongruenceFinding(I, Symbolized);
}
- processOutgoingEdges(dyn_cast<TerminatorInst>(I), I->getParent());
+ processOutgoingEdges(I, I->getParent());
}
}
@@ -3172,12 +3175,8 @@ bool NewGVN::singleReachablePHIPath(
auto FilteredPhiArgs =
make_filter_range(MP->operands(), ReachableOperandPred);
SmallVector<const Value *, 32> OperandList;
- std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(),
- std::back_inserter(OperandList));
- bool Okay = OperandList.size() == 1;
- if (!Okay)
- Okay =
- std::equal(OperandList.begin(), OperandList.end(), OperandList.begin());
+ llvm::copy(FilteredPhiArgs, std::back_inserter(OperandList));
+ bool Okay = is_splat(OperandList);
if (Okay)
return singleReachablePHIPath(Visited, cast<MemoryAccess>(OperandList[0]),
Second);
@@ -3272,8 +3271,7 @@ void NewGVN::verifyMemoryCongruency() const {
const MemoryDef *MD = cast<MemoryDef>(U);
return ValueToClass.lookup(MD->getMemoryInst());
});
- assert(std::equal(PhiOpClasses.begin(), PhiOpClasses.end(),
- PhiOpClasses.begin()) &&
+ assert(is_splat(PhiOpClasses) &&
"All MemoryPhi arguments should be in the same class");
}
}
@@ -3501,9 +3499,11 @@ bool NewGVN::runGVN() {
if (!ToErase->use_empty())
ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
- if (ToErase->getParent())
- ToErase->eraseFromParent();
+ assert(ToErase->getParent() &&
+ "BB containing ToErase deleted unexpectedly!");
+ ToErase->eraseFromParent();
}
+ Changed |= !InstructionsToErase.empty();
// Delete all unreachable blocks.
auto UnreachableBlockPred = [&](const BasicBlock &BB) {
@@ -3697,37 +3697,6 @@ void NewGVN::convertClassToLoadsAndStores(
}
}
-static void patchReplacementInstruction(Instruction *I, Value *Repl) {
- auto *ReplInst = dyn_cast<Instruction>(Repl);
- if (!ReplInst)
- return;
-
- // Patch the replacement so that it is not more restrictive than the value
- // being replaced.
- // Note that if 'I' is a load being replaced by some operation,
- // for example, by an arithmetic operation, then andIRFlags()
- // would just erase all math flags from the original arithmetic
- // operation, which is clearly not wanted and not needed.
- if (!isa<LoadInst>(I))
- ReplInst->andIRFlags(I);
-
- // FIXME: If both the original and replacement value are part of the
- // same control-flow region (meaning that the execution of one
- // guarantees the execution of the other), then we can combine the
- // noalias scopes here and do better than the general conservative
- // answer used in combineMetadata().
-
- // In general, GVN unifies expressions over different control-flow
- // regions, and so we need a conservative combination of the noalias
- // scopes.
- static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group};
- combineMetadata(ReplInst, I, KnownIDs);
-}
-
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
@@ -3988,7 +3957,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
convertClassToDFSOrdered(*CC, DFSOrderedSet, UseCounts, ProbablyDead);
// Sort the whole thing.
- llvm::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
+ llvm::sort(DFSOrderedSet);
for (auto &VD : DFSOrderedSet) {
int MemberDFSIn = VD.DFSIn;
int MemberDFSOut = VD.DFSOut;
@@ -4124,10 +4093,13 @@ bool NewGVN::eliminateInstructions(Function &F) {
// It's about to be alive again.
if (LeaderUseCount == 0 && isa<Instruction>(DominatingLeader))
ProbablyDead.erase(cast<Instruction>(DominatingLeader));
- // Copy instructions, however, are still dead because we use their
- // operand as the leader.
- if (LeaderUseCount == 0 && isSSACopy)
- ProbablyDead.insert(II);
+ // For copy instructions, we use their operand as a leader,
+ // which means we remove a user of the copy and it may become dead.
+ if (isSSACopy) {
+ unsigned &IIUseCount = UseCounts[II];
+ if (--IIUseCount == 0)
+ ProbablyDead.insert(II);
+ }
++LeaderUseCount;
AnythingReplaced = true;
}
@@ -4151,7 +4123,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
// If we have possible dead stores to look at, try to eliminate them.
if (CC->getStoreCount() > 0) {
convertClassToLoadsAndStores(*CC, PossibleDeadStores);
- llvm::sort(PossibleDeadStores.begin(), PossibleDeadStores.end());
+ llvm::sort(PossibleDeadStores);
ValueDFSStack EliminationStack;
for (auto &VD : PossibleDeadStores) {
int MemberDFSIn = VD.DFSIn;
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 1748815c5941..05ea9144f66c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -24,6 +25,8 @@ using namespace llvm;
#define DEBUG_TYPE "partially-inline-libcalls"
+DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
+ "Controls transformations in partially-inline-libcalls");
static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
BasicBlock &CurrBB, Function::iterator &BB,
@@ -33,6 +36,9 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
if (Call->onlyReadsMemory())
return false;
+ if (!DebugCounter::shouldExecute(PILCounter))
+ return false;
+
// Do the following transformation:
//
// (before)
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 8f30bccf48f1..fd2eb85fd7bf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -105,7 +105,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
/// The output of the pass - gives a list of each backedge (described by
/// pointing at the branch) which need a poll inserted.
- std::vector<TerminatorInst *> PollLocations;
+ std::vector<Instruction *> PollLocations;
/// True unless we're running spp-no-calls in which case we need to disable
/// the call-dependent placement opts.
@@ -348,7 +348,7 @@ bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
// Safepoint insertion would involve creating a new basic block (as the
// target of the current backedge) which does the safepoint (of all live
// variables) and branches to the true header
- TerminatorInst *Term = Pred->getTerminator();
+ Instruction *Term = Pred->getTerminator();
LLVM_DEBUG(dbgs() << "[LSP] terminator instruction: " << *Term);
@@ -524,7 +524,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
};
// We need the order of list to be stable so that naming ends up stable
// when we split edges. This makes test cases much easier to write.
- llvm::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);
+ llvm::sort(PollLocations, OrderByBBName);
// We can sometimes end up with duplicate poll locations. This happens if
// a single loop is visited more than once. The fact this happens seems
@@ -535,7 +535,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
// Insert a poll at each point the analysis pass identified
// The poll location must be the terminator of a loop latch block.
- for (TerminatorInst *Term : PollLocations) {
+ for (Instruction *Term : PollLocations) {
// We are inserting a poll, the function is modified
Modified = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 1df0a9c49fb1..cb893eab1654 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -63,6 +63,7 @@
using namespace llvm;
using namespace reassociate;
+using namespace PatternMatch;
#define DEBUG_TYPE "reassociate"
@@ -125,10 +126,10 @@ XorOpnd::XorOpnd(Value *V) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
const APInt *C;
- if (match(V0, PatternMatch::m_APInt(C)))
+ if (match(V0, m_APInt(C)))
std::swap(V0, V1);
- if (match(V1, PatternMatch::m_APInt(C))) {
+ if (match(V1, m_APInt(C))) {
ConstPart = *C;
SymbolicPart = V0;
isOr = (I->getOpcode() == Instruction::Or);
@@ -204,10 +205,10 @@ unsigned ReassociatePass::getRank(Value *V) {
for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
Rank = std::max(Rank, getRank(I->getOperand(i)));
- // If this is a not or neg instruction, do not count it for rank. This
+ // If this is a 'not' or 'neg' instruction, do not count it for rank. This
// assures us that X and ~X will have the same rank.
- if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
- !BinaryOperator::isFNeg(I))
+ if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
+ !match(I, m_FNeg(m_Value())))
++Rank;
LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
@@ -573,8 +574,8 @@ static bool LinearizeExprTree(BinaryOperator *I,
// If this is a multiply expression, turn any internal negations into
// multiplies by -1 so they can be reassociated.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
- if ((Opcode == Instruction::Mul && BinaryOperator::isNeg(BO)) ||
- (Opcode == Instruction::FMul && BinaryOperator::isFNeg(BO))) {
+ if ((Opcode == Instruction::Mul && match(BO, m_Neg(m_Value()))) ||
+ (Opcode == Instruction::FMul && match(BO, m_FNeg(m_Value())))) {
LLVM_DEBUG(dbgs()
<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
BO = LowerNegateToMultiply(BO);
@@ -788,13 +789,7 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
// Discard any debug info related to the expressions that has changed (we
// can leave debug infor related to the root, since the result of the
// expression tree should be the same even after reassociation).
- SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, ExpressionChanged);
- for (auto *DII : DbgUsers) {
- Value *Undef = UndefValue::get(ExpressionChanged->getType());
- DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
- ValueAsMetadata::get(Undef)));
- }
+ replaceDbgUsesWithUndef(ExpressionChanged);
ExpressionChanged->moveBefore(I);
ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
@@ -854,7 +849,7 @@ static Value *NegateValue(Value *V, Instruction *BI,
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
for (User *U : V->users()) {
- if (!BinaryOperator::isNeg(U) && !BinaryOperator::isFNeg(U))
+ if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
continue;
// We found one! Now we have to make sure that the definition dominates
@@ -899,7 +894,7 @@ static Value *NegateValue(Value *V, Instruction *BI,
/// Return true if we should break up this subtract of X-Y into (X + -Y).
static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
- if (BinaryOperator::isNeg(Sub) || BinaryOperator::isFNeg(Sub))
+ if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
return false;
// Don't breakup X - undef.
@@ -1113,8 +1108,8 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
// First, check for X and ~X in the operand list.
assert(i < Ops.size());
- if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
- Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+ Value *X;
+ if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
unsigned FoundX = FindInOperandList(Ops, i, X);
if (FoundX != i) {
if (Opcode == Instruction::And) // ...&X&~X = 0
@@ -1304,7 +1299,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
Value *V = Ops[i].Op;
const APInt *C;
// TODO: Support non-splat vectors.
- if (match(V, PatternMatch::m_APInt(C))) {
+ if (match(V, m_APInt(C))) {
ConstOpnd ^= *C;
} else {
XorOpnd O(V);
@@ -1460,27 +1455,22 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
}
// Check for X and -X or X and ~X in the operand list.
- if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isFNeg(TheOp) &&
- !BinaryOperator::isNot(TheOp))
+ Value *X;
+ if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
+ !match(TheOp, m_FNeg(m_Value(X))))
continue;
- Value *X = nullptr;
- if (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp))
- X = BinaryOperator::getNegArgument(TheOp);
- else if (BinaryOperator::isNot(TheOp))
- X = BinaryOperator::getNotArgument(TheOp);
-
unsigned FoundX = FindInOperandList(Ops, i, X);
if (FoundX == i)
continue;
// Remove X and -X from the operand list.
if (Ops.size() == 2 &&
- (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp)))
+ (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
return Constant::getNullValue(X->getType());
// Remove X and ~X from the operand list.
- if (Ops.size() == 2 && BinaryOperator::isNot(TheOp))
+ if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
return Constant::getAllOnesValue(X->getType());
Ops.erase(Ops.begin()+i);
@@ -1494,7 +1484,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
e -= 2; // Removed two elements.
// if X and ~X we append -1 to the operand list.
- if (BinaryOperator::isNot(TheOp)) {
+ if (match(TheOp, m_Not(m_Value()))) {
Value *V = Constant::getAllOnesValue(X->getType());
Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
e += 1;
@@ -2058,7 +2048,7 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
RedoInsts.insert(I);
MadeChange = true;
I = NI;
- } else if (BinaryOperator::isNeg(I)) {
+ } else if (match(I, m_Neg(m_Value()))) {
// Otherwise, this is a negation. See if the operand is a multiply tree
// and if this is not an inner node of a multiply tree.
if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
@@ -2082,7 +2072,7 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
RedoInsts.insert(I);
MadeChange = true;
I = NI;
- } else if (BinaryOperator::isFNeg(I)) {
+ } else if (match(I, m_FNeg(m_Value()))) {
// Otherwise, this is a negation. See if the operand is a multiply tree
// and if this is not an inner node of a multiply tree.
if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 0de2bc72b522..42d7ed5bc534 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -28,7 +28,6 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -38,6 +37,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -65,6 +65,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
@@ -346,7 +347,7 @@ static bool containsGCPtrType(Type *Ty) {
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
- return llvm::any_of(ST->subtypes(), containsGCPtrType);
+ return llvm::any_of(ST->elements(), containsGCPtrType);
return false;
}
@@ -1824,7 +1825,7 @@ static void relocationViaAlloca(
}
}
- llvm::sort(Uses.begin(), Uses.end());
+ llvm::sort(Uses);
auto Last = std::unique(Uses.begin(), Uses.end());
Uses.erase(Last, Uses.end());
@@ -1850,13 +1851,13 @@ static void relocationViaAlloca(
StoreInst *Store = new StoreInst(Def, Alloca);
if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
- // InvokeInst is a TerminatorInst so the store need to be inserted
- // into its normal destination block.
+ // InvokeInst is a terminator so the store need to be inserted into its
+ // normal destination block.
BasicBlock *NormalDest = Invoke->getNormalDest();
Store->insertBefore(NormalDest->getFirstNonPHI());
} else {
assert(!Inst->isTerminator() &&
- "The only TerminatorInst that can produce a value is "
+ "The only terminator that can produce a value is "
"InvokeInst which is handled above.");
Store->insertAfter(Inst);
}
@@ -2534,9 +2535,10 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// Delete any unreachable statepoints so that we don't have unrewritten
// statepoints surviving this pass. This makes testing easier and the
// resulting IR less confusing to human readers.
- DeferredDominance DD(DT);
- bool MadeChange = removeUnreachableBlocks(F, nullptr, &DD);
- DD.flush();
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ bool MadeChange = removeUnreachableBlocks(F, nullptr, &DTU);
+ // Flush the Dominator Tree.
+ DTU.getDomTree();
// Gather all the statepoints which need rewritten. Be careful to only
// consider those in reachable code since we need to ask dominance queries
@@ -2582,7 +2584,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// increase the liveset of any statepoint we move over. This is profitable
// as long as all statepoints are in rare blocks. If we had in-register
// lowering for live values this would be a much safer transform.
- auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
+ auto getConditionInst = [](Instruction *TI) -> Instruction * {
if (auto *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional())
return dyn_cast<Instruction>(BI->getCondition());
@@ -2590,7 +2592,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
return nullptr;
};
for (BasicBlock &BB : F) {
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
if (auto *Cond = getConditionInst(TI))
// TODO: Handle more than just ICmps here. We should be able to move
// most instructions without side effects or memory access.
@@ -2673,7 +2675,7 @@ static SetVector<Value *> computeKillSet(BasicBlock *BB) {
/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
/// sanity check for the liveness computation.
static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
- TerminatorInst *TI, bool TermOkay = false) {
+ Instruction *TI, bool TermOkay = false) {
for (Value *V : Live) {
if (auto *I = dyn_cast<Instruction>(V)) {
// The terminator can be a member of the LiveOut set. LLVM's definition
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 5e3ddeda2d49..2f6ed05c023b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -55,6 +55,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PredicateInfo.h"
#include <cassert>
#include <utility>
#include <vector>
@@ -246,7 +247,27 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
using Edge = std::pair<BasicBlock *, BasicBlock *>;
DenseSet<Edge> KnownFeasibleEdges;
+ DenseMap<Function *, AnalysisResultsForFn> AnalysisResults;
+ DenseMap<Value *, SmallPtrSet<User *, 2>> AdditionalUsers;
+
public:
+ void addAnalysis(Function &F, AnalysisResultsForFn A) {
+ AnalysisResults.insert({&F, std::move(A)});
+ }
+
+ const PredicateBase *getPredicateInfoFor(Instruction *I) {
+ auto A = AnalysisResults.find(I->getParent()->getParent());
+ if (A == AnalysisResults.end())
+ return nullptr;
+ return A->second.PredInfo->getPredicateInfoFor(I);
+ }
+
+ DomTreeUpdater getDTU(Function &F) {
+ auto A = AnalysisResults.find(&F);
+ assert(A != AnalysisResults.end() && "Need analysis results for function.");
+ return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy};
+ }
+
SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli)
: DL(DL), TLI(tli) {}
@@ -548,7 +569,7 @@ private:
// getFeasibleSuccessors - Return a vector of booleans to indicate which
// successors are reachable from a given terminator instruction.
- void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl<bool> &Succs);
+ void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs);
// OperandChangedState - This method is invoked on all of the users of an
// instruction that was just changed state somehow. Based on this
@@ -558,6 +579,26 @@ private:
visit(*I);
}
+ // Add U as additional user of V.
+ void addAdditionalUser(Value *V, User *U) {
+ auto Iter = AdditionalUsers.insert({V, {}});
+ Iter.first->second.insert(U);
+ }
+
+ // Mark I's users as changed, including AdditionalUsers.
+ void markUsersAsChanged(Value *I) {
+ for (User *U : I->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ OperandChangedState(UI);
+
+ auto Iter = AdditionalUsers.find(I);
+ if (Iter != AdditionalUsers.end()) {
+ for (User *U : Iter->second)
+ if (auto *UI = dyn_cast<Instruction>(U))
+ OperandChangedState(UI);
+ }
+ }
+
private:
friend class InstVisitor<SCCPSolver>;
@@ -569,7 +610,7 @@ private:
// Terminators
void visitReturnInst(ReturnInst &I);
- void visitTerminatorInst(TerminatorInst &TI);
+ void visitTerminator(Instruction &TI);
void visitCastInst(CastInst &I);
void visitSelectInst(SelectInst &I);
@@ -580,7 +621,7 @@ private:
void visitCatchSwitchInst(CatchSwitchInst &CPI) {
markOverdefined(&CPI);
- visitTerminatorInst(CPI);
+ visitTerminator(CPI);
}
// Instructions that cannot be folded away.
@@ -595,12 +636,12 @@ private:
void visitInvokeInst (InvokeInst &II) {
visitCallSite(&II);
- visitTerminatorInst(II);
+ visitTerminator(II);
}
void visitCallSite (CallSite CS);
- void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
- void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+ void visitResumeInst (ResumeInst &I) { /*returns void*/ }
+ void visitUnreachableInst(UnreachableInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
void visitInstruction(Instruction &I) {
@@ -615,7 +656,7 @@ private:
// getFeasibleSuccessors - Return a vector of booleans to indicate which
// successors are reachable from a given terminator instruction.
-void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
+void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
SmallVectorImpl<bool> &Succs) {
Succs.resize(TI.getNumSuccessors());
if (auto *BI = dyn_cast<BranchInst>(&TI)) {
@@ -640,7 +681,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
}
// Unwinding instructions successors are always executable.
- if (TI.isExceptional()) {
+ if (TI.isExceptionalTerminator()) {
Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -802,7 +843,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
}
}
-void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
+void SCCPSolver::visitTerminator(Instruction &TI) {
SmallVector<bool, 16> SuccFeasible;
getFeasibleSuccessors(TI, SuccFeasible);
@@ -982,8 +1023,9 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// Handle ICmpInst instruction.
void SCCPSolver::visitCmpInst(CmpInst &I) {
- LatticeVal &IV = ValueState[&I];
- if (IV.isOverdefined()) return;
+ // Do not cache this lookup, getValueState calls later in the function might
+ // invalidate the reference.
+ if (ValueState[&I].isOverdefined()) return;
Value *Op1 = I.getOperand(0);
Value *Op2 = I.getOperand(1);
@@ -1011,7 +1053,8 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
}
// If operands are still unknown, wait for it to resolve.
- if (!V1State.isOverdefined() && !V2State.isOverdefined() && !IV.isConstant())
+ if (!V1State.isOverdefined() && !V2State.isOverdefined() &&
+ !ValueState[&I].isConstant())
return;
markOverdefined(&I);
@@ -1119,6 +1162,65 @@ void SCCPSolver::visitCallSite(CallSite CS) {
Function *F = CS.getCalledFunction();
Instruction *I = CS.getInstruction();
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
+ if (ValueState[I].isOverdefined())
+ return;
+
+ auto *PI = getPredicateInfoFor(I);
+ if (!PI)
+ return;
+
+ Value *CopyOf = I->getOperand(0);
+ auto *PBranch = dyn_cast<PredicateBranch>(PI);
+ if (!PBranch) {
+ mergeInValue(ValueState[I], I, getValueState(CopyOf));
+ return;
+ }
+
+ Value *Cond = PBranch->Condition;
+
+ // Everything below relies on the condition being a comparison.
+ auto *Cmp = dyn_cast<CmpInst>(Cond);
+ if (!Cmp) {
+ mergeInValue(ValueState[I], I, getValueState(CopyOf));
+ return;
+ }
+
+ Value *CmpOp0 = Cmp->getOperand(0);
+ Value *CmpOp1 = Cmp->getOperand(1);
+ if (CopyOf != CmpOp0 && CopyOf != CmpOp1) {
+ mergeInValue(ValueState[I], I, getValueState(CopyOf));
+ return;
+ }
+
+ if (CmpOp0 != CopyOf)
+ std::swap(CmpOp0, CmpOp1);
+
+ LatticeVal OriginalVal = getValueState(CopyOf);
+ LatticeVal EqVal = getValueState(CmpOp1);
+ LatticeVal &IV = ValueState[I];
+ if (PBranch->TrueEdge && Cmp->getPredicate() == CmpInst::ICMP_EQ) {
+ addAdditionalUser(CmpOp1, I);
+ if (OriginalVal.isConstant())
+ mergeInValue(IV, I, OriginalVal);
+ else
+ mergeInValue(IV, I, EqVal);
+ return;
+ }
+ if (!PBranch->TrueEdge && Cmp->getPredicate() == CmpInst::ICMP_NE) {
+ addAdditionalUser(CmpOp1, I);
+ if (OriginalVal.isConstant())
+ mergeInValue(IV, I, OriginalVal);
+ else
+ mergeInValue(IV, I, EqVal);
+ return;
+ }
+
+ return (void)mergeInValue(IV, I, getValueState(CopyOf));
+ }
+ }
+
// The common case is that we aren't tracking the callee, either because we
// are not doing interprocedural analysis or the callee is indirect, or is
// external. Handle these cases first.
@@ -1134,6 +1236,8 @@ CallOverdefined:
SmallVector<Constant*, 8> Operands;
for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
AI != E; ++AI) {
+ if (AI->get()->getType()->isStructTy())
+ return markOverdefined(I); // Can't handle struct args.
LatticeVal State = getValueState(*AI);
if (State.isUnknown())
@@ -1238,9 +1342,7 @@ void SCCPSolver::Solve() {
// since all of its users will have already been marked as overdefined
// Update all of the users of this instruction's value.
//
- for (User *U : I->users())
- if (auto *UI = dyn_cast<Instruction>(U))
- OperandChangedState(UI);
+ markUsersAsChanged(I);
}
// Process the instruction work list.
@@ -1257,9 +1359,7 @@ void SCCPSolver::Solve() {
// Update all of the users of this instruction's value.
//
if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
- for (User *U : I->users())
- if (auto *UI = dyn_cast<Instruction>(U))
- OperandChangedState(UI);
+ markUsersAsChanged(I);
}
// Process the basic block work list.
@@ -1522,7 +1622,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Check to see if we have a branch or switch on an undefined value. If so
// we force the branch to go one way or the other to make the successor
// values live. It doesn't really matter which way we force it.
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
if (auto *BI = dyn_cast<BranchInst>(TI)) {
if (!BI->isConditional()) continue;
if (!getValueState(BI->getCondition()).isUnknown())
@@ -1694,7 +1794,7 @@ static bool runSCCP(Function &F, const DataLayout &DL,
// constants if we have found them to be of constant values.
for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
Instruction *Inst = &*BI++;
- if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
+ if (Inst->getType()->isVoidTy() || Inst->isTerminator())
continue;
if (tryToReplaceWithConstant(Solver, Inst)) {
@@ -1798,8 +1898,44 @@ static void findReturnsToZap(Function &F,
}
}
-bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+// Update the condition for terminators that are branching on indeterminate
+// values, forcing them to use a specific edge.
+static void forceIndeterminateEdge(Instruction* I, SCCPSolver &Solver) {
+ BasicBlock *Dest = nullptr;
+ Constant *C = nullptr;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ if (!isa<ConstantInt>(SI->getCondition())) {
+ // Indeterminate switch; use first case value.
+ Dest = SI->case_begin()->getCaseSuccessor();
+ C = SI->case_begin()->getCaseValue();
+ }
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ if (!isa<ConstantInt>(BI->getCondition())) {
+ // Indeterminate branch; use false.
+ Dest = BI->getSuccessor(1);
+ C = ConstantInt::getFalse(BI->getContext());
+ }
+ } else if (IndirectBrInst *IBR = dyn_cast<IndirectBrInst>(I)) {
+ if (!isa<BlockAddress>(IBR->getAddress()->stripPointerCasts())) {
+ // Indeterminate indirectbr; use successor 0.
+ Dest = IBR->getSuccessor(0);
+ C = BlockAddress::get(IBR->getSuccessor(0));
+ }
+ } else {
+ llvm_unreachable("Unexpected terminator instruction");
+ }
+ if (C) {
+ assert(Solver.isEdgeFeasible(I->getParent(), Dest) &&
+ "Didn't find feasible edge?");
+ (void)Dest;
+
+ I->setOperand(0, C);
+ }
+}
+
+bool llvm::runIPSCCP(
+ Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ function_ref<AnalysisResultsForFn(Function &)> getAnalysis) {
SCCPSolver Solver(DL, TLI);
// Loop over all functions, marking arguments to those with their addresses
@@ -1808,6 +1944,8 @@ bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
if (F.isDeclaration())
continue;
+ Solver.addAnalysis(F, getAnalysis(F));
+
// Determine if we can track the function's return values. If so, add the
// function to the solver's set of return-tracked functions.
if (canTrackReturnsInterprocedurally(&F))
@@ -1856,12 +1994,13 @@ bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
// Iterate over all of the instructions in the module, replacing them with
// constants if we have found them to be of constant values.
- SmallVector<BasicBlock*, 512> BlocksToErase;
for (Function &F : M) {
if (F.isDeclaration())
continue;
+ SmallVector<BasicBlock *, 512> BlocksToErase;
+
if (Solver.isBlockExecutable(&F.front()))
for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;
++AI) {
@@ -1897,23 +2036,26 @@ bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
}
}
- // Change dead blocks to unreachable. We do it after replacing constants in
- // all executable blocks, because changeToUnreachable may remove PHI nodes
- // in executable blocks we found values for. The function's entry block is
- // not part of BlocksToErase, so we have to handle it separately.
- for (BasicBlock *BB : BlocksToErase)
+ DomTreeUpdater DTU = Solver.getDTU(F);
+ // Change dead blocks to unreachable. We do it after replacing constants
+ // in all executable blocks, because changeToUnreachable may remove PHI
+ // nodes in executable blocks we found values for. The function's entry
+ // block is not part of BlocksToErase, so we have to handle it separately.
+ for (BasicBlock *BB : BlocksToErase) {
NumInstRemoved +=
- changeToUnreachable(BB->getFirstNonPHI(), /*UseLLVMTrap=*/false);
+ changeToUnreachable(BB->getFirstNonPHI(), /*UseLLVMTrap=*/false,
+ /*PreserveLCSSA=*/false, &DTU);
+ }
if (!Solver.isBlockExecutable(&F.front()))
NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHI(),
- /*UseLLVMTrap=*/false);
+ /*UseLLVMTrap=*/false,
+ /*PreserveLCSSA=*/false, &DTU);
- // Now that all instructions in the function are constant folded, erase dead
- // blocks, because we can now use ConstantFoldTerminator to get rid of
- // in-edges.
- for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
+ // Now that all instructions in the function are constant folded,
+ // use ConstantFoldTerminator to get rid of in-edges, record DT updates and
+ // delete dead BBs.
+ for (BasicBlock *DeadBB : BlocksToErase) {
// If there are any PHI nodes in this successor, drop entries for BB now.
- BasicBlock *DeadBB = BlocksToErase[i];
for (Value::user_iterator UI = DeadBB->user_begin(),
UE = DeadBB->user_end();
UI != UE;) {
@@ -1925,41 +2067,34 @@ bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
// Ignore blockaddress users; BasicBlock's dtor will handle them.
if (!I) continue;
- bool Folded = ConstantFoldTerminator(I->getParent());
- if (!Folded) {
- // If the branch can't be folded, we must have forced an edge
- // for an indeterminate value. Force the terminator to fold
- // to that edge.
- Constant *C;
- BasicBlock *Dest;
- if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
- Dest = SI->case_begin()->getCaseSuccessor();
- C = SI->case_begin()->getCaseValue();
- } else if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- Dest = BI->getSuccessor(1);
- C = ConstantInt::getFalse(BI->getContext());
- } else if (IndirectBrInst *IBR = dyn_cast<IndirectBrInst>(I)) {
- Dest = IBR->getSuccessor(0);
- C = BlockAddress::get(IBR->getSuccessor(0));
- } else {
- llvm_unreachable("Unexpected terminator instruction");
- }
- assert(Solver.isEdgeFeasible(I->getParent(), Dest) &&
- "Didn't find feasible edge?");
- (void)Dest;
-
- I->setOperand(0, C);
- Folded = ConstantFoldTerminator(I->getParent());
- }
+ // If we have forced an edge for an indeterminate value, then force the
+ // terminator to fold to that edge.
+ forceIndeterminateEdge(I, Solver);
+ bool Folded = ConstantFoldTerminator(I->getParent(),
+ /*DeleteDeadConditions=*/false,
+ /*TLI=*/nullptr, &DTU);
assert(Folded &&
"Expect TermInst on constantint or blockaddress to be folded");
(void) Folded;
}
+ // Mark dead BB for deletion.
+ DTU.deleteBB(DeadBB);
+ }
- // Finally, delete the basic block.
- F.getBasicBlockList().erase(DeadBB);
+ for (BasicBlock &BB : F) {
+ for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
+ Instruction *Inst = &*BI++;
+ if (Solver.getPredicateInfoFor(Inst)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+ if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
+ Value *Op = II->getOperand(0);
+ Inst->replaceAllUsesWith(Op);
+ Inst->eraseFromParent();
+ }
+ }
+ }
+ }
}
- BlocksToErase.clear();
}
// If we inferred constant or undef return values for a function, we replaced
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index bf482bf5272e..eab77cf4cda9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -913,8 +913,7 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
- if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
- II.getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (II.isLifetimeStartOrEnd()) {
ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
Length->getLimitedValue());
@@ -1060,7 +1059,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
- llvm::sort(Slices.begin(), Slices.end());
+ llvm::sort(Slices);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1211,7 +1210,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
- TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
+ Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator();
Value *InVal = PN.getIncomingValue(Idx);
// If the value is produced by the terminator of the predecessor (an
@@ -1275,7 +1274,7 @@ static void speculatePHINodeLoads(PHINode &PN) {
continue;
}
- TerminatorInst *TI = Pred->getTerminator();
+ Instruction *TI = Pred->getTerminator();
IRBuilderTy PredBuilder(TI);
LoadInst *Load = PredBuilder.CreateLoad(
@@ -1400,8 +1399,8 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
if (Ty == TargetTy)
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
- // Pointer size to use for the indices.
- unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType());
+ // Offset size to use for the indices.
+ unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType());
// See if we can descend into a struct and locate a field with the correct
// type.
@@ -1413,7 +1412,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
ElementTy = ArrayTy->getElementType();
- Indices.push_back(IRB.getIntN(PtrSize, 0));
+ Indices.push_back(IRB.getIntN(OffsetSize, 0));
} else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
ElementTy = VectorTy->getElementType();
Indices.push_back(IRB.getInt32(0));
@@ -1807,8 +1806,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
+ if (!II->isLifetimeStartOrEnd())
return false;
} else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
@@ -1906,7 +1904,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
"All non-integer types eliminated!");
return RHSTy->getNumElements() < LHSTy->getNumElements();
};
- llvm::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes);
+ llvm::sort(CandidateTys, RankVectorTypes);
CandidateTys.erase(
std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
CandidateTys.end());
@@ -2029,8 +2027,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
+ if (!II->isLifetimeStartOrEnd())
return false;
} else {
return false;
@@ -2377,7 +2374,7 @@ private:
#endif
return getAdjustedPtr(IRB, DL, &NewAI,
- APInt(DL.getPointerTypeSizeInBits(PointerTy), Offset),
+ APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
PointerTy,
#ifndef NDEBUG
Twine(OldName) + "."
@@ -2593,7 +2590,8 @@ private:
}
V = convertValue(DL, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
- Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+ Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group});
if (AATags)
Store->setAAMetadata(AATags);
Pass.DeadInsts.insert(&SI);
@@ -2662,7 +2660,8 @@ private:
NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
SI.isVolatile());
}
- NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+ NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group});
if (AATags)
NewSI->setAAMetadata(AATags);
if (SI.isVolatile())
@@ -2899,8 +2898,8 @@ private:
unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
// Compute the relative offset for the other pointer within the transfer.
- unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
- APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
+ unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
+ APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
unsigned OtherAlign =
IsDest ? II.getSourceAlignment() : II.getDestAlignment();
OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1,
@@ -3011,8 +3010,7 @@ private:
}
bool visitIntrinsicInst(IntrinsicInst &II) {
- assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
- II.getIntrinsicID() == Intrinsic::lifetime_end);
+ assert(II.isLifetimeStartOrEnd());
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getArgOperand(1) == OldPtr);
@@ -3164,7 +3162,12 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
/// value (as opposed to the user).
Use *U;
+ /// Used to calculate offsets, and hence alignment, of subobjects.
+ const DataLayout &DL;
+
public:
+ AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
+
/// Rewrite loads and stores through a pointer and all pointers derived from
/// it.
bool rewrite(Instruction &I) {
@@ -3208,10 +3211,22 @@ private:
/// split operations.
Value *Ptr;
+ /// The base pointee type being GEPed into.
+ Type *BaseTy;
+
+ /// Known alignment of the base pointer.
+ unsigned BaseAlign;
+
+ /// To calculate offset of each component so we can correctly deduce
+ /// alignments.
+ const DataLayout &DL;
+
/// Initialize the splitter with an insertion point, Ptr and start with a
/// single zero GEP index.
- OpSplitter(Instruction *InsertionPoint, Value *Ptr)
- : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
+ OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+ unsigned BaseAlign, const DataLayout &DL)
+ : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr),
+ BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {}
public:
/// Generic recursive split emission routine.
@@ -3228,8 +3243,11 @@ private:
/// \param Agg The aggregate value being built up or stored, depending on
/// whether this is splitting a load or a store respectively.
void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
- if (Ty->isSingleValueType())
- return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name);
+ if (Ty->isSingleValueType()) {
+ unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
+ return static_cast<Derived *>(this)->emitFunc(
+ Ty, Agg, MinAlign(BaseAlign, Offset), Name);
+ }
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned OldSize = Indices.size();
@@ -3268,17 +3286,19 @@ private:
struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
AAMDNodes AATags;
- LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags)
- : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {}
+ LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+ AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+ : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
+ DL), AATags(AATags) {}
/// Emit a leaf load of a single value. This is called at the leaves of the
/// recursive emission to actually load values.
- void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+ void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
Value *GEP =
IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
- LoadInst *Load = IRB.CreateLoad(GEP, Name + ".load");
+ LoadInst *Load = IRB.CreateAlignedLoad(GEP, Align, Name + ".load");
if (AATags)
Load->setAAMetadata(AATags);
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
@@ -3295,7 +3315,8 @@ private:
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
AAMDNodes AATags;
LI.getAAMetadata(AATags);
- LoadOpSplitter Splitter(&LI, *U, AATags);
+ LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
+ getAdjustedAlignment(&LI, 0, DL), DL);
Value *V = UndefValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
LI.replaceAllUsesWith(V);
@@ -3304,13 +3325,15 @@ private:
}
struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
- StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags)
- : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {}
+ StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+ AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+ : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
+ DL),
+ AATags(AATags) {}
AAMDNodes AATags;
-
/// Emit a leaf store of a single value. This is called at the leaves of the
/// recursive emission to actually produce stores.
- void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+ void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
assert(Ty->isSingleValueType());
// Extract the single value and store it using the indices.
//
@@ -3320,7 +3343,8 @@ private:
IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
Value *InBoundsGEP =
IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
- StoreInst *Store = IRB.CreateStore(ExtractValue, InBoundsGEP);
+ StoreInst *Store =
+ IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
if (AATags)
Store->setAAMetadata(AATags);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
@@ -3338,7 +3362,8 @@ private:
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
AAMDNodes AATags;
SI.getAAMetadata(AATags);
- StoreOpSplitter Splitter(&SI, *U, AATags);
+ StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
+ getAdjustedAlignment(&SI, 0, DL), DL);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
SI.eraseFromParent();
return true;
@@ -3772,7 +3797,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PartPtrTy, BasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
- PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+ PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group});
// Append this load onto the list of split loads so we can find it later
// to rewrite the stores.
@@ -3828,7 +3854,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
- PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+ PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group});
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
@@ -4221,7 +4248,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
if (!IsSorted)
- llvm::sort(AS.begin(), AS.end());
+ llvm::sort(AS);
/// Describes the allocas introduced by rewritePartition in order to migrate
/// the debug info.
@@ -4254,7 +4281,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
- TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
+ TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
if (!DbgDeclares.empty()) {
auto *Var = DbgDeclares.front()->getVariable();
auto *Expr = DbgDeclares.front()->getExpression();
@@ -4306,7 +4333,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
// Remove any existing intrinsics describing the same alloca.
- for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
+ for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
OldDII->eraseFromParent();
DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
@@ -4356,7 +4383,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter;
+ AggLoadStoreRewriter AggRewriter(DL);
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
@@ -4421,7 +4448,7 @@ bool SROA::deleteDeadInstructions(
// not be able to find it.
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
DeletedAllocas.insert(AI);
- for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(AI))
+ for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI))
OldDII->eraseFromParent();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index 526487d3477e..976daf4c78c2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -25,7 +25,9 @@
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
using namespace llvm;
@@ -42,7 +44,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeDCELegacyPassPass(Registry);
initializeDeadInstEliminationPass(Registry);
initializeDivRemPairsLegacyPassPass(Registry);
- initializeScalarizerPass(Registry);
+ initializeScalarizerLegacyPassPass(Registry);
initializeDSELegacyPassPass(Registry);
initializeGuardWideningLegacyPassPass(Registry);
initializeLoopGuardWideningLegacyPassPass(Registry);
@@ -50,6 +52,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeNewGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
+ initializeMakeGuardsExplicitLegacyPassPass(Registry);
initializeGVNHoistLegacyPassPass(Registry);
initializeGVNSinkLegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
@@ -72,6 +75,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopUnrollPass(Registry);
initializeLoopUnrollAndJamPass(Registry);
initializeLoopUnswitchPass(Registry);
+ initializeWarnMissedTransformationsLegacyPass(Registry);
initializeLoopVersioningLICMPass(Registry);
initializeLoopIdiomRecognizeLegacyPassPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
@@ -194,6 +198,10 @@ void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopUnswitchPass());
}
+void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerAtomicPass());
+}
+
void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createMemCpyOptPass());
}
@@ -274,3 +282,7 @@ void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLowerExpectIntrinsicPass());
}
+
+void LLVMAddUnifyFunctionExitNodesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createUnifyFunctionExitNodesPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 34ed126155be..5eb3fdab6d5c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,6 +14,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -38,6 +39,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Options.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -48,6 +50,13 @@ using namespace llvm;
#define DEBUG_TYPE "scalarizer"
+// This is disabled by default because having separate loads and stores
+// makes it more likely that the -combiner-alias-analysis limits will be
+// reached.
+static cl::opt<bool>
+ ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden,
+ cl::desc("Allow the scalarizer pass to scalarize loads and store"));
+
namespace {
// Used to store the scattered form of a vector.
@@ -151,17 +160,13 @@ struct VectorLayout {
uint64_t ElemSize = 0;
};
-class Scalarizer : public FunctionPass,
- public InstVisitor<Scalarizer, bool> {
+class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
- static char ID;
-
- Scalarizer() : FunctionPass(ID) {
- initializeScalarizerPass(*PassRegistry::getPassRegistry());
+ ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
+ : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
}
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
+ bool visit(Function &F);
// InstVisitor methods. They return true if the instruction was scalarized,
// false if nothing changed.
@@ -179,16 +184,6 @@ public:
bool visitStoreInst(StoreInst &SI);
bool visitCallInst(CallInst &ICI);
- static void registerOptions() {
- // This is disabled by default because having separate loads and stores
- // makes it more likely that the -combiner-alias-analysis limits will be
- // reached.
- OptionRegistry::registerOption<bool, Scalarizer,
- &Scalarizer::ScalarizeLoadStore>(
- "scalarize-load-store",
- "Allow the scalarizer pass to scalarize loads and store", false);
- }
-
private:
Scatterer scatter(Instruction *Point, Value *V);
void gather(Instruction *Op, const ValueVector &CV);
@@ -204,16 +199,28 @@ private:
ScatterMap Scattered;
GatherList Gathered;
+
unsigned ParallelLoopAccessMDKind;
- bool ScalarizeLoadStore;
};
-} // end anonymous namespace
+class ScalarizerLegacyPass : public FunctionPass {
+public:
+ static char ID;
-char Scalarizer::ID = 0;
+ ScalarizerLegacyPass() : FunctionPass(ID) {
+ initializeScalarizerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
-INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
- "Scalarize vector operations", false, false)
+ bool runOnFunction(Function &F) override;
+};
+
+} // end anonymous namespace
+
+char ScalarizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
+ "Scalarize vector operations", false, false)
+INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
+ "Scalarize vector operations", false, false)
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
ValueVector *cachePtr)
@@ -277,22 +284,31 @@ Value *Scatterer::operator[](unsigned I) {
return CV[I];
}
-bool Scalarizer::doInitialization(Module &M) {
- ParallelLoopAccessMDKind =
+bool ScalarizerLegacyPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ Module &M = *F.getParent();
+ unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
- ScalarizeLoadStore =
- M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
- return false;
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
+ return Impl.visit(F);
}
-bool Scalarizer::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
+FunctionPass *llvm::createScalarizerPass() {
+ return new ScalarizerLegacyPass();
+}
+
+bool ScalarizerVisitor::visit(Function &F) {
assert(Gathered.empty() && Scattered.empty());
- for (BasicBlock &BB : F) {
- for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
+
+ // To ensure we replace gathered components correctly we need to do an ordered
+ // traversal of the basic blocks in the function.
+ ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
+ for (BasicBlock *BB : RPOT) {
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
Instruction *I = &*II;
- bool Done = visit(I);
+ bool Done = InstVisitor::visit(I);
++II;
if (Done && I->getType()->isVoidTy())
I->eraseFromParent();
@@ -303,7 +319,7 @@ bool Scalarizer::runOnFunction(Function &F) {
// Return a scattered form of V that can be accessed by Point. V must be a
// vector or a pointer to a vector.
-Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
+Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
if (Argument *VArg = dyn_cast<Argument>(V)) {
// Put the scattered form of arguments in the entry block,
// so that it can be used everywhere.
@@ -327,7 +343,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
// deletion of Op and creation of the gathered form to the end of the pass,
// so that we can avoid creating the gathered form if all uses of Op are
// replaced with uses of CV.
-void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
// Since we're not deleting Op yet, stub out its operands, so that it
// doesn't make anything live unnecessarily.
for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
@@ -356,19 +372,20 @@ void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
// Return true if it is safe to transfer the given metadata tag from
// vector to scalar instructions.
-bool Scalarizer::canTransferMetadata(unsigned Tag) {
+bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
return (Tag == LLVMContext::MD_tbaa
|| Tag == LLVMContext::MD_fpmath
|| Tag == LLVMContext::MD_tbaa_struct
|| Tag == LLVMContext::MD_invariant_load
|| Tag == LLVMContext::MD_alias_scope
|| Tag == LLVMContext::MD_noalias
- || Tag == ParallelLoopAccessMDKind);
+ || Tag == ParallelLoopAccessMDKind
+ || Tag == LLVMContext::MD_access_group);
}
// Transfer metadata from Op to the instructions in CV if it is known
// to be safe to do so.
-void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV) {
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
Op->getAllMetadataOtherThanDebugLoc(MDs);
for (unsigned I = 0, E = CV.size(); I != E; ++I) {
@@ -384,7 +401,7 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
// Try to fill in Layout from Ty, returning true on success. Alignment is
// the alignment of the vector, or 0 if the ABI default should be used.
-bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
+bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
VectorLayout &Layout, const DataLayout &DL) {
// Make sure we're dealing with a vector.
Layout.VecTy = dyn_cast<VectorType>(Ty);
@@ -408,7 +425,7 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
// to create an instruction like I with operands X and Y and name Name.
template<typename Splitter>
-bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
+bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
VectorType *VT = dyn_cast<VectorType>(I.getType());
if (!VT)
return false;
@@ -441,7 +458,7 @@ static Function *getScalarIntrinsicDeclaration(Module *M,
/// If a call to a vector typed intrinsic function, split into a scalar call per
/// element if possible for the intrinsic.
-bool Scalarizer::splitCall(CallInst &CI) {
+bool ScalarizerVisitor::splitCall(CallInst &CI) {
VectorType *VT = dyn_cast<VectorType>(CI.getType());
if (!VT)
return false;
@@ -499,7 +516,7 @@ bool Scalarizer::splitCall(CallInst &CI) {
return true;
}
-bool Scalarizer::visitSelectInst(SelectInst &SI) {
+bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
VectorType *VT = dyn_cast<VectorType>(SI.getType());
if (!VT)
return false;
@@ -529,19 +546,19 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) {
return true;
}
-bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
+bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
return splitBinary(ICI, ICmpSplitter(ICI));
}
-bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
+bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
return splitBinary(FCI, FCmpSplitter(FCI));
}
-bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
+bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
return splitBinary(BO, BinarySplitter(BO));
}
-bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
if (!VT)
return false;
@@ -587,7 +604,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
return true;
}
-bool Scalarizer::visitCastInst(CastInst &CI) {
+bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
if (!VT)
return false;
@@ -605,7 +622,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) {
return true;
}
-bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
+bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
if (!DstVT || !SrcVT)
@@ -660,7 +677,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
return true;
}
-bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
VectorType *VT = dyn_cast<VectorType>(SVI.getType());
if (!VT)
return false;
@@ -684,7 +701,7 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return true;
}
-bool Scalarizer::visitPHINode(PHINode &PHI) {
+bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
VectorType *VT = dyn_cast<VectorType>(PHI.getType());
if (!VT)
return false;
@@ -709,7 +726,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) {
return true;
}
-bool Scalarizer::visitLoadInst(LoadInst &LI) {
+bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
if (!ScalarizeLoadStore)
return false;
if (!LI.isSimple())
@@ -733,7 +750,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
return true;
}
-bool Scalarizer::visitStoreInst(StoreInst &SI) {
+bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
if (!ScalarizeLoadStore)
return false;
if (!SI.isSimple())
@@ -760,13 +777,13 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
return true;
}
-bool Scalarizer::visitCallInst(CallInst &CI) {
+bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
return splitCall(CI);
}
// Delete the instructions that we scalarized. If a full vector result
// is still needed, recreate it using InsertElements.
-bool Scalarizer::finish() {
+bool ScalarizerVisitor::finish() {
// The presence of data in Gathered or Scattered indicates changes
// made to the Function.
if (Gathered.empty() && Scattered.empty())
@@ -797,6 +814,11 @@ bool Scalarizer::finish() {
return true;
}
-FunctionPass *llvm::createScalarizerPass() {
- return new Scalarizer();
+PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) {
+ Module &M = *F.getParent();
+ unsigned ParallelLoopAccessMDKind =
+ M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
+ bool Changed = Impl.visit(F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 5834b619046b..5a67178cef37 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -19,11 +19,14 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -59,7 +62,11 @@ using namespace llvm;
STATISTIC(NumBranches, "Number of branches unswitched");
STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
STATISTIC(NumTrivial, "Number of unswitches that are trivial");
+STATISTIC(
+ NumCostMultiplierSkipped,
+ "Number of unswitch candidates that had their cost multiplier skipped");
static cl::opt<bool> EnableNonTrivialUnswitch(
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
@@ -70,6 +77,22 @@ static cl::opt<int>
UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
cl::desc("The cost threshold for unswitching a loop."));
+static cl::opt<bool> EnableUnswitchCostMultiplier(
+ "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
+ cl::desc("Enable unswitch cost multiplier that prohibits exponential "
+ "explosion in nontrivial unswitch."));
+static cl::opt<int> UnswitchSiblingsToplevelDiv(
+ "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
+ cl::desc("Toplevel siblings divisor for cost multiplier."));
+static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
+ "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
+ cl::desc("Number of unswitch candidates that are ignored when calculating "
+ "cost multiplier."));
+static cl::opt<bool> UnswitchGuards(
+ "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
+ cl::desc("If enabled, simple loop unswitching will also consider "
+ "llvm.experimental.guard intrinsics as unswitch candidates."));
+
/// Collect all of the loop invariant input values transitively used by the
/// homogeneous instruction graph from a given root.
///
@@ -302,10 +325,11 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
formLCSSA(*OldContainingL, DT, &LI, nullptr);
// We shouldn't need to form dedicated exits because the exit introduced
- // here is the (just split by unswitching) preheader. As such, it is
- // necessarily dedicated.
- assert(OldContainingL->hasDedicatedExits() &&
- "Unexpected predecessor of hoisted loop preheader!");
+ // here is the (just split by unswitching) preheader. However, after trivial
+ // unswitching it is possible to get new non-dedicated exits out of parent
+ // loop so let's conservatively form dedicated exit blocks and figure out
+ // if we can optimize later.
+ formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true);
}
}
@@ -327,7 +351,8 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
/// If `SE` is not null, it will be updated based on the potential loop SCEVs
/// invalidated by this.
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
- LoopInfo &LI, ScalarEvolution *SE) {
+ LoopInfo &LI, ScalarEvolution *SE,
+ MemorySSAUpdater *MSSAU) {
assert(BI.isConditional() && "Can only unswitch a conditional branch!");
LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
@@ -401,11 +426,14 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
SE->forgetTopmostLoop(&L);
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// Split the preheader, so that we know that there is a safe place to insert
// the conditional branch. We will change the preheader to have a conditional
// branch on LoopCond.
BasicBlock *OldPH = L.getLoopPreheader();
- BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+ BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
// Now that we have a place to insert the conditional branch, create a place
// to branch to: this is the exit block out of the loop that we are
@@ -417,9 +445,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
"A branch's parent isn't a predecessor!");
UnswitchedBB = LoopExitBB;
} else {
- UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
+ UnswitchedBB =
+ SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// Actually move the invariant uses into the unswitched position. If possible,
// we do this by moving the instructions, but when doing partial unswitching
// we do it by building a new merge of the values in the unswitched position.
@@ -430,12 +462,17 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// its successors.
OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
BI);
+ if (MSSAU) {
+ // Temporarily clone the terminator, to make MSSA update cheaper by
+ // separating "insert edge" updates from "remove edge" ones.
+ ParentBB->getInstList().push_back(BI.clone());
+ } else {
+ // Create a new unconditional branch that will continue the loop as a new
+ // terminator.
+ BranchInst::Create(ContinueBB, ParentBB);
+ }
BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
-
- // Create a new unconditional branch that will continue the loop as a new
- // terminator.
- BranchInst::Create(ContinueBB, ParentBB);
} else {
// Only unswitching a subset of inputs to the condition, so we will need to
// build a new branch that merges the invariant inputs.
@@ -451,6 +488,32 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
*UnswitchedBB, *NewPH);
}
+ // Update the dominator tree with the added edge.
+ DT.insertEdge(OldPH, UnswitchedBB);
+
+ // After the dominator tree was updated with the added edge, update MemorySSA
+ // if available.
+ if (MSSAU) {
+ SmallVector<CFGUpdate, 1> Updates;
+ Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
+ MSSAU->applyInsertUpdates(Updates, DT);
+ }
+
+ // Finish updating dominator tree and memory ssa for full unswitch.
+ if (FullUnswitch) {
+ if (MSSAU) {
+ // Remove the cloned branch instruction.
+ ParentBB->getTerminator()->eraseFromParent();
+ // Create unconditional branch now.
+ BranchInst::Create(ContinueBB, ParentBB);
+ MSSAU->removeEdge(ParentBB, LoopExitBB);
+ }
+ DT.deleteEdge(ParentBB, LoopExitBB);
+ }
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// Rewrite the relevant PHI nodes.
if (UnswitchedBB == LoopExitBB)
rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
@@ -458,13 +521,6 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
*ParentBB, *OldPH, FullUnswitch);
- // Now we need to update the dominator tree.
- SmallVector<DominatorTree::UpdateType, 2> DTUpdates;
- DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
- if (FullUnswitch)
- DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB});
- DT.applyUpdates(DTUpdates);
-
// The constant we can replace all of our invariants with inside the loop
// body. If any of the invariants have a value other than this the loop won't
// be entered.
@@ -482,6 +538,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
if (FullUnswitch)
hoistLoopToNewParent(L, *NewPH, DT, LI);
+ LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
++NumTrivial;
++NumBranches;
return true;
@@ -514,7 +571,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
/// If `SE` is not null, it will be updated based on the potential loop SCEVs
/// invalidated by this.
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
- LoopInfo &LI, ScalarEvolution *SE) {
+ LoopInfo &LI, ScalarEvolution *SE,
+ MemorySSAUpdater *MSSAU) {
LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
Value *LoopCond = SI.getCondition();
@@ -539,7 +597,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
else if (ExitCaseIndices.empty())
return false;
- LLVM_DEBUG(dbgs() << " unswitching trivial cases...\n");
+ LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
// We may need to invalidate SCEVs for the outermost loop reached by any of
// the exits.
@@ -603,7 +664,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// Split the preheader, so that we know that there is a safe place to insert
// the switch.
BasicBlock *OldPH = L.getLoopPreheader();
- BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+ BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
OldPH->getTerminator()->eraseFromParent();
// Now add the unswitched switch.
@@ -626,9 +687,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
} else {
auto *SplitBB =
- SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
- rewritePHINodesForExitAndUnswitchedBlocks(
- *DefaultExitBB, *SplitBB, *ParentBB, *OldPH, /*FullUnswitch*/ true);
+ SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
+ rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
+ *ParentBB, *OldPH,
+ /*FullUnswitch*/ true);
DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
}
}
@@ -652,9 +714,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
if (!SplitExitBB) {
// If this is the first time we see this, do the split and remember it.
- SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
- rewritePHINodesForExitAndUnswitchedBlocks(
- *ExitBB, *SplitExitBB, *ParentBB, *OldPH, /*FullUnswitch*/ true);
+ SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
+ rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
+ *ParentBB, *OldPH,
+ /*FullUnswitch*/ true);
}
// Update the case pair to point to the split block.
CasePair.second = SplitExitBB;
@@ -731,6 +794,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
}
DT.applyUpdates(DTUpdates);
+
+ if (MSSAU) {
+ MSSAU->applyUpdates(DTUpdates, DT);
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
+
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
// We may have changed the nesting relationship for this loop so hoist it to
@@ -739,6 +809,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
++NumTrivial;
++NumSwitches;
+ LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
return true;
}
@@ -755,7 +826,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
/// If `SE` is not null, it will be updated based on the potential loop SCEVs
/// invalidated by this.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
- LoopInfo &LI, ScalarEvolution *SE) {
+ LoopInfo &LI, ScalarEvolution *SE,
+ MemorySSAUpdater *MSSAU) {
bool Changed = false;
// If loop header has only one reachable successor we should keep looking for
@@ -780,7 +852,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
[](Instruction &I) { return I.mayHaveSideEffects(); }))
return Changed;
- TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+ Instruction *CurrentTerm = CurrentBB->getTerminator();
if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
// Don't bother trying to unswitch past a switch with a constant
@@ -789,7 +861,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
if (isa<Constant>(SI->getCondition()))
return Changed;
- if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE))
+ if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
// Couldn't unswitch this one so we're done.
return Changed;
@@ -821,7 +893,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
// Found a trivial condition candidate: non-foldable conditional branch. If
// we fail to unswitch this, we can't do anything else that is trivial.
- if (!unswitchTrivialBranch(L, *BI, DT, LI, SE))
+ if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
return Changed;
// Mark that we managed to unswitch something.
@@ -874,7 +946,7 @@ static BasicBlock *buildClonedLoopBlocks(
const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
ValueToValueMapTy &VMap,
SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
- DominatorTree &DT, LoopInfo &LI) {
+ DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
SmallVector<BasicBlock *, 4> NewBlocks;
NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
@@ -919,7 +991,7 @@ static BasicBlock *buildClonedLoopBlocks(
// place to merge the CFG, so split the exit first. This is always safe to
// do because there cannot be any non-loop predecessors of a loop exit in
// loop simplified form.
- auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
+ auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
// Rearrange the names to make it easier to write test cases by having the
// exit block carry the suffix rather than the merge block carrying the
@@ -1262,11 +1334,10 @@ static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
// matter as we're just trying to build up the map from inside-out; we use
// the map in a more stably ordered way below.
auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
- llvm::sort(OrderedClonedExitsInLoops.begin(), OrderedClonedExitsInLoops.end(),
- [&](BasicBlock *LHS, BasicBlock *RHS) {
- return ExitLoopMap.lookup(LHS)->getLoopDepth() <
- ExitLoopMap.lookup(RHS)->getLoopDepth();
- });
+ llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
+ return ExitLoopMap.lookup(LHS)->getLoopDepth() <
+ ExitLoopMap.lookup(RHS)->getLoopDepth();
+ });
// Populate the existing ExitLoopMap with everything reachable from each
// exit, starting from the inner most exit.
@@ -1351,7 +1422,7 @@ static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
static void
deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
- DominatorTree &DT) {
+ DominatorTree &DT, MemorySSAUpdater *MSSAU) {
// Find all the dead clones, and remove them from their successors.
SmallVector<BasicBlock *, 16> DeadBlocks;
for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
@@ -1363,6 +1434,13 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
DeadBlocks.push_back(ClonedBB);
}
+ // Remove all MemorySSA in the dead blocks
+ if (MSSAU) {
+ SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
+ DeadBlocks.end());
+ MSSAU->removeBlocks(DeadBlockSet);
+ }
+
// Drop any remaining references to break cycles.
for (BasicBlock *BB : DeadBlocks)
BB->dropAllReferences();
@@ -1371,21 +1449,33 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
BB->eraseFromParent();
}
-static void
-deleteDeadBlocksFromLoop(Loop &L,
- SmallVectorImpl<BasicBlock *> &ExitBlocks,
- DominatorTree &DT, LoopInfo &LI) {
- // Find all the dead blocks, and remove them from their successors.
- SmallVector<BasicBlock *, 16> DeadBlocks;
- for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
- if (!DT.isReachableFromEntry(BB)) {
- for (BasicBlock *SuccBB : successors(BB))
+static void deleteDeadBlocksFromLoop(Loop &L,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ DominatorTree &DT, LoopInfo &LI,
+ MemorySSAUpdater *MSSAU) {
+ // Find all the dead blocks tied to this loop, and remove them from their
+ // successors.
+ SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+
+ // Start with loop/exit blocks and get a transitive closure of reachable dead
+ // blocks.
+ SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
+ ExitBlocks.end());
+ DeathCandidates.append(L.blocks().begin(), L.blocks().end());
+ while (!DeathCandidates.empty()) {
+ auto *BB = DeathCandidates.pop_back_val();
+ if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
+ for (BasicBlock *SuccBB : successors(BB)) {
SuccBB->removePredecessor(BB);
- DeadBlocks.push_back(BB);
+ DeathCandidates.push_back(SuccBB);
+ }
+ DeadBlockSet.insert(BB);
}
+ }
- SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
- DeadBlocks.end());
+ // Remove all MemorySSA in the dead blocks
+ if (MSSAU)
+ MSSAU->removeBlocks(DeadBlockSet);
// Filter out the dead blocks from the exit blocks list so that it can be
// used in the caller.
@@ -1394,7 +1484,7 @@ deleteDeadBlocksFromLoop(Loop &L,
// Walk from this loop up through its parents removing all of the dead blocks.
for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
- for (auto *BB : DeadBlocks)
+ for (auto *BB : DeadBlockSet)
ParentL->getBlocksSet().erase(BB);
llvm::erase_if(ParentL->getBlocksVector(),
[&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
@@ -1419,7 +1509,7 @@ deleteDeadBlocksFromLoop(Loop &L,
// Remove the loop mappings for the dead blocks and drop all the references
// from these blocks to others to handle cyclic references as we start
// deleting the blocks themselves.
- for (auto *BB : DeadBlocks) {
+ for (auto *BB : DeadBlockSet) {
// Check that the dominator tree has already been updated.
assert(!DT.getNode(BB) && "Should already have cleared domtree!");
LI.changeLoopFor(BB, nullptr);
@@ -1428,7 +1518,7 @@ deleteDeadBlocksFromLoop(Loop &L,
// Actually delete the blocks now that they've been fully unhooked from the
// IR.
- for (auto *BB : DeadBlocks)
+ for (auto *BB : DeadBlockSet)
BB->eraseFromParent();
}
@@ -1782,11 +1872,11 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
} while (!DomWorklist.empty());
}
-static bool unswitchNontrivialInvariants(
- Loop &L, TerminatorInst &TI, ArrayRef<Value *> Invariants,
- DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
- function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE) {
+static void unswitchNontrivialInvariants(
+ Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks, DominatorTree &DT, LoopInfo &LI,
+ AssumptionCache &AC, function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
auto *ParentBB = TI.getParent();
BranchInst *BI = dyn_cast<BranchInst>(&TI);
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -1803,6 +1893,9 @@ static bool unswitchNontrivialInvariants(
assert(isa<Instruction>(BI->getCondition()) &&
"Partial unswitching requires an instruction as the condition!");
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// Constant and BBs tracking the cloned and continuing successor. When we are
// unswitching the entire condition, this can just be trivially chosen to
// unswitch towards `true`. However, when we are unswitching a set of
@@ -1841,19 +1934,12 @@ static bool unswitchNontrivialInvariants(
// whatever reason).
assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
- SmallVector<BasicBlock *, 4> ExitBlocks;
- L.getUniqueExitBlocks(ExitBlocks);
-
- // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
- // don't know how to split those exit blocks.
- // FIXME: We should teach SplitBlock to handle this and remove this
- // restriction.
- for (auto *ExitBB : ExitBlocks)
- if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI()))
- return false;
-
// Compute the parent loop now before we start hacking on things.
Loop *ParentL = L.getParentLoop();
+ // Get blocks in RPO order for MSSA update, before changing the CFG.
+ LoopBlocksRPO LBRPO(&L);
+ if (MSSAU)
+ LBRPO.perform(&LI);
// Compute the outer-most loop containing one of our exit blocks. This is the
// furthest up our loopnest which can be mutated, which we will use below to
@@ -1903,7 +1989,7 @@ static bool unswitchNontrivialInvariants(
// between the unswitched versions, and we will have a new preheader for the
// original loop.
BasicBlock *SplitBB = L.getLoopPreheader();
- BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI);
+ BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
// Keep track of the dominator tree updates needed.
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
@@ -1916,7 +2002,7 @@ static bool unswitchNontrivialInvariants(
VMaps.emplace_back(new ValueToValueMapTy());
ClonedPHs[SuccBB] = buildClonedLoopBlocks(
L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
- DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI);
+ DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
}
// The stitching of the branched code back together depends on whether we're
@@ -1924,7 +2010,63 @@ static bool unswitchNontrivialInvariants(
// nuke the initial terminator placed in the split block.
SplitBB->getTerminator()->eraseFromParent();
if (FullUnswitch) {
- // First we need to unhook the successor relationship as we'll be replacing
+ // Splice the terminator from the original loop and rewrite its
+ // successors.
+ SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
+
+ // Keep a clone of the terminator for MSSA updates.
+ Instruction *NewTI = TI.clone();
+ ParentBB->getInstList().push_back(NewTI);
+
+ // First wire up the moved terminator to the preheaders.
+ if (BI) {
+ BasicBlock *ClonedPH = ClonedPHs.begin()->second;
+ BI->setSuccessor(ClonedSucc, ClonedPH);
+ BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
+ } else {
+ assert(SI && "Must either be a branch or switch!");
+
+ // Walk the cases and directly update their successors.
+ assert(SI->getDefaultDest() == RetainedSuccBB &&
+ "Not retaining default successor!");
+ SI->setDefaultDest(LoopPH);
+ for (auto &Case : SI->cases())
+ if (Case.getCaseSuccessor() == RetainedSuccBB)
+ Case.setSuccessor(LoopPH);
+ else
+ Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
+
+ // We need to use the set to populate domtree updates as even when there
+ // are multiple cases pointing at the same successor we only want to
+ // remove and insert one edge in the domtree.
+ for (BasicBlock *SuccBB : UnswitchedSuccBBs)
+ DTUpdates.push_back(
+ {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
+ }
+
+ if (MSSAU) {
+ DT.applyUpdates(DTUpdates);
+ DTUpdates.clear();
+
+ // Remove all but one edge to the retained block and all unswitched
+ // blocks. This is to avoid having duplicate entries in the cloned Phis,
+ // when we know we only keep a single edge for each case.
+ MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
+ for (BasicBlock *SuccBB : UnswitchedSuccBBs)
+ MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
+
+ for (auto &VMap : VMaps)
+ MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
+ /*IgnoreIncomingWithNoClones=*/true);
+ MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
+
+ // Remove all edges to unswitched blocks.
+ for (BasicBlock *SuccBB : UnswitchedSuccBBs)
+ MSSAU->removeEdge(ParentBB, SuccBB);
+ }
+
+ // Now unhook the successor relationship as we'll be replacing
// the terminator with a direct branch. This is much simpler for branches
// than switches so we handle those first.
if (BI) {
@@ -1942,9 +2084,10 @@ static bool unswitchNontrivialInvariants(
// is a duplicate edge to the retained successor as the retained successor
// is always the default successor and as we'll replace this with a direct
// branch we no longer need the duplicate entries in the PHI nodes.
- assert(SI->getDefaultDest() == RetainedSuccBB &&
+ SwitchInst *NewSI = cast<SwitchInst>(NewTI);
+ assert(NewSI->getDefaultDest() == RetainedSuccBB &&
"Not retaining default successor!");
- for (auto &Case : SI->cases())
+ for (auto &Case : NewSI->cases())
Case.getCaseSuccessor()->removePredecessor(
ParentBB,
/*DontDeleteUselessPHIs*/ true);
@@ -1956,34 +2099,8 @@ static bool unswitchNontrivialInvariants(
DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
}
- // Now that we've unhooked the successor relationship, splice the terminator
- // from the original loop to the split.
- SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
-
- // Now wire up the terminator to the preheaders.
- if (BI) {
- BasicBlock *ClonedPH = ClonedPHs.begin()->second;
- BI->setSuccessor(ClonedSucc, ClonedPH);
- BI->setSuccessor(1 - ClonedSucc, LoopPH);
- DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
- } else {
- assert(SI && "Must either be a branch or switch!");
-
- // Walk the cases and directly update their successors.
- SI->setDefaultDest(LoopPH);
- for (auto &Case : SI->cases())
- if (Case.getCaseSuccessor() == RetainedSuccBB)
- Case.setSuccessor(LoopPH);
- else
- Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
-
- // We need to use the set to populate domtree updates as even when there
- // are multiple cases pointing at the same successor we only want to
- // remove and insert one edge in the domtree.
- for (BasicBlock *SuccBB : UnswitchedSuccBBs)
- DTUpdates.push_back(
- {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
- }
+ // After MSSAU update, remove the cloned terminator instruction NewTI.
+ ParentBB->getTerminator()->eraseFromParent();
// Create a new unconditional branch to the continuing block (as opposed to
// the one cloned).
@@ -2002,12 +2119,19 @@ static bool unswitchNontrivialInvariants(
// Apply the updates accumulated above to get an up-to-date dominator tree.
DT.applyUpdates(DTUpdates);
+ if (!FullUnswitch && MSSAU) {
+ // Update MSSA for partial unswitch, after DT update.
+ SmallVector<CFGUpdate, 1> Updates;
+ Updates.push_back(
+ {cfg::UpdateKind::Insert, SplitBB, ClonedPHs.begin()->second});
+ MSSAU->applyInsertUpdates(Updates, DT);
+ }
// Now that we have an accurate dominator tree, first delete the dead cloned
// blocks so that we can accurately build any cloned loops. It is important to
// not delete the blocks from the original loop yet because we still want to
// reference the original loop to understand the cloned loop's structure.
- deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT);
+ deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
// Build the cloned loop structure itself. This may be substantially
// different from the original structure due to the simplified CFG. This also
@@ -2019,10 +2143,17 @@ static bool unswitchNontrivialInvariants(
// Now that our cloned loops have been built, we can update the original loop.
// First we delete the dead blocks from it and then we rebuild the loop
// structure taking these deletions into account.
- deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI);
+ deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
SmallVector<Loop *, 4> HoistedLoops;
bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// This transformation has a high risk of corrupting the dominator tree, and
// the below steps to rebuild loop structures will result in hard to debug
// errors in that case so verify that the dominator tree is sane first.
@@ -2038,6 +2169,18 @@ static bool unswitchNontrivialInvariants(
assert(UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!");
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
+
+ // When considering multiple partially-unswitched invariants
+ // we cant just go replace them with constants in both branches.
+ //
+ // For 'AND' we infer that true branch ("continue") means true
+ // for each invariant operand.
+ // For 'OR' we can infer that false branch ("continue") means false
+ // for each invariant operand.
+ // So it happens that for multiple-partial case we dont replace
+ // in the unswitched branch.
+ bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1);
+
ConstantInt *UnswitchedReplacement =
Direction ? ConstantInt::getTrue(BI->getContext())
: ConstantInt::getFalse(BI->getContext());
@@ -2057,7 +2200,8 @@ static bool unswitchNontrivialInvariants(
// unswitched if in the cloned blocks.
if (DT.dominates(LoopPH, UserI->getParent()))
U->set(ContinueReplacement);
- else if (DT.dominates(ClonedPH, UserI->getParent()))
+ else if (ReplaceUnswitched &&
+ DT.dominates(ClonedPH, UserI->getParent()))
U->set(UnswitchedReplacement);
}
}
@@ -2134,8 +2278,13 @@ static bool unswitchNontrivialInvariants(
SibLoops.push_back(UpdatedL);
UnswitchCB(IsStillLoop, SibLoops);
- ++NumBranches;
- return true;
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ if (BI)
+ ++NumBranches;
+ else
+ ++NumSwitches;
}
/// Recursively compute the cost of a dominator subtree based on the per-block
@@ -2171,19 +2320,208 @@ computeDomSubtreeCost(DomTreeNode &N,
return Cost;
}
+/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
+/// making the following replacement:
+///
+/// --code before guard--
+/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+/// --code after guard--
+///
+/// into
+///
+/// --code before guard--
+/// br i1 %cond, label %guarded, label %deopt
+///
+/// guarded:
+/// --code after guard--
+///
+/// deopt:
+/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+/// unreachable
+///
+/// It also makes all relevant DT and LI updates, so that all structures are in
+/// valid state after this transform.
+static BranchInst *
+turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
+ SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
+ LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
+ BasicBlock *CheckBB = GI->getParent();
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ // Remove all CheckBB's successors from DomTree. A block can be seen among
+ // successors more than once, but for DomTree it should be added only once.
+ SmallPtrSet<BasicBlock *, 4> Successors;
+ for (auto *Succ : successors(CheckBB))
+ if (Successors.insert(Succ).second)
+ DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
+
+ Instruction *DeoptBlockTerm =
+ SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
+ BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
+ // SplitBlockAndInsertIfThen inserts control flow that branches to
+ // DeoptBlockTerm if the condition is true. We want the opposite.
+ CheckBI->swapSuccessors();
+
+ BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
+ GuardedBlock->setName("guarded");
+ CheckBI->getSuccessor(1)->setName("deopt");
+ BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
+
+ // We now have a new exit block.
+ ExitBlocks.push_back(CheckBI->getSuccessor(1));
+
+ if (MSSAU)
+ MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
+
+ GI->moveBefore(DeoptBlockTerm);
+ GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
+
+ // Add new successors of CheckBB into DomTree.
+ for (auto *Succ : successors(CheckBB))
+ DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
+
+ // Now the blocks that used to be CheckBB's successors are GuardedBlock's
+ // successors.
+ for (auto *Succ : Successors)
+ DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
+
+ // Make proper changes to DT.
+ DT.applyUpdates(DTUpdates);
+ // Inform LI of a new loop block.
+ L.addBasicBlockToLoop(GuardedBlock, LI);
+
+ if (MSSAU) {
+ MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
+ MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::End);
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
+
+ ++NumGuards;
+ return CheckBI;
+}
+
+/// Cost multiplier is a way to limit potentially exponential behavior
+/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
+/// candidates available. Also accounting for the number of "sibling" loops with
+/// the idea to account for previous unswitches that already happened on this
+/// cluster of loops. There was an attempt to keep this formula simple,
+/// just enough to limit the worst case behavior. Even if it is not that simple
+/// now it is still not an attempt to provide a detailed heuristic size
+/// prediction.
+///
+/// TODO: Make a proper accounting of "explosion" effect for all kinds of
+/// unswitch candidates, making adequate predictions instead of wild guesses.
+/// That requires knowing not just the number of "remaining" candidates but
+/// also costs of unswitching for each of these candidates.
+static int calculateUnswitchCostMultiplier(
+ Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
+ ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
+ UnswitchCandidates) {
+
+ // Guards and other exiting conditions do not contribute to exponential
+ // explosion as soon as they dominate the latch (otherwise there might be
+ // another path to the latch remaining that does not allow to eliminate the
+ // loop copy on unswitch).
+ BasicBlock *Latch = L.getLoopLatch();
+ BasicBlock *CondBlock = TI.getParent();
+ if (DT.dominates(CondBlock, Latch) &&
+ (isGuard(&TI) ||
+ llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
+ return L.contains(SuccBB);
+ }) <= 1)) {
+ NumCostMultiplierSkipped++;
+ return 1;
+ }
+
+ auto *ParentL = L.getParentLoop();
+ int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
+ : std::distance(LI.begin(), LI.end()));
+ // Count amount of clones that all the candidates might cause during
+ // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
+ int UnswitchedClones = 0;
+ for (auto Candidate : UnswitchCandidates) {
+ Instruction *CI = Candidate.first;
+ BasicBlock *CondBlock = CI->getParent();
+ bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
+ if (isGuard(CI)) {
+ if (!SkipExitingSuccessors)
+ UnswitchedClones++;
+ continue;
+ }
+ int NonExitingSuccessors = llvm::count_if(
+ successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
+ return !SkipExitingSuccessors || L.contains(SuccBB);
+ });
+ UnswitchedClones += Log2_32(NonExitingSuccessors);
+ }
+
+ // Ignore up to the "unscaled candidates" number of unswitch candidates
+ // when calculating the power-of-two scaling of the cost. The main idea
+ // with this control is to allow a small number of unswitches to happen
+ // and rely more on siblings multiplier (see below) when the number
+ // of candidates is small.
+ unsigned ClonesPower =
+ std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
+
+ // Allowing top-level loops to spread a bit more than nested ones.
+ int SiblingsMultiplier =
+ std::max((ParentL ? SiblingsCount
+ : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
+ 1);
+ // Compute the cost multiplier in a way that won't overflow by saturating
+ // at an upper bound.
+ int CostMultiplier;
+ if (ClonesPower > Log2_32(UnswitchThreshold) ||
+ SiblingsMultiplier > UnswitchThreshold)
+ CostMultiplier = UnswitchThreshold;
+ else
+ CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
+ (int)UnswitchThreshold);
+
+ LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
+ << " (siblings " << SiblingsMultiplier << " * clones "
+ << (1 << ClonesPower) << ")"
+ << " for unswitch candidate: " << TI << "\n");
+ return CostMultiplier;
+}
+
static bool
unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
AssumptionCache &AC, TargetTransformInfo &TTI,
function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
// Collect all invariant conditions within this loop (as opposed to an inner
// loop which would be handled when visiting that inner loop).
- SmallVector<std::pair<TerminatorInst *, TinyPtrVector<Value *>>, 4>
+ SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
UnswitchCandidates;
+
+ // Whether or not we should also collect guards in the loop.
+ bool CollectGuards = false;
+ if (UnswitchGuards) {
+ auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ if (GuardDecl && !GuardDecl->use_empty())
+ CollectGuards = true;
+ }
+
for (auto *BB : L.blocks()) {
if (LI.getLoopFor(BB) != &L)
continue;
+ if (CollectGuards)
+ for (auto &I : *BB)
+ if (isGuard(&I)) {
+ auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
+ // TODO: Support AND, OR conditions and partial unswitching.
+ if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
+ UnswitchCandidates.push_back({&I, {Cond}});
+ }
+
if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
// We can only consider fully loop-invariant switch conditions as we need
// to completely eliminate the switch after unswitching.
@@ -2231,6 +2569,19 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
return false;
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L.getUniqueExitBlocks(ExitBlocks);
+
+ // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
+ // don't know how to split those exit blocks.
+ // FIXME: We should teach SplitBlock to handle this and remove this
+ // restriction.
+ for (auto *ExitBB : ExitBlocks)
+ if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
+ dbgs() << "Cannot unswitch because of cleanuppad in exit block\n";
+ return false;
+ }
+
LLVM_DEBUG(
dbgs() << "Considering " << UnswitchCandidates.size()
<< " non-trivial loop invariant conditions for unswitching.\n");
@@ -2288,7 +2639,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
// Given a terminator which might be unswitched, computes the non-duplicated
// cost for that terminator.
- auto ComputeUnswitchedCost = [&](TerminatorInst &TI, bool FullUnswitch) {
+ auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) {
BasicBlock &BB = *TI.getParent();
SmallPtrSet<BasicBlock *, 4> Visited;
@@ -2335,22 +2686,40 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
// Now scale the cost by the number of unique successors minus one. We
// subtract one because there is already at least one copy of the entire
// loop. This is computing the new cost of unswitching a condition.
- assert(Visited.size() > 1 &&
+ // Note that guards always have 2 unique successors that are implicit and
+ // will be materialized if we decide to unswitch it.
+ int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
+ assert(SuccessorsCount > 1 &&
"Cannot unswitch a condition without multiple distinct successors!");
- return Cost * (Visited.size() - 1);
+ return Cost * (SuccessorsCount - 1);
};
- TerminatorInst *BestUnswitchTI = nullptr;
+ Instruction *BestUnswitchTI = nullptr;
int BestUnswitchCost;
ArrayRef<Value *> BestUnswitchInvariants;
for (auto &TerminatorAndInvariants : UnswitchCandidates) {
- TerminatorInst &TI = *TerminatorAndInvariants.first;
+ Instruction &TI = *TerminatorAndInvariants.first;
ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
int CandidateCost = ComputeUnswitchedCost(
TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
Invariants[0] == BI->getCondition()));
- LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
- << " for unswitch candidate: " << TI << "\n");
+ // Calculate cost multiplier which is a tool to limit potentially
+ // exponential behavior of loop-unswitch.
+ if (EnableUnswitchCostMultiplier) {
+ int CostMultiplier =
+ calculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
+ assert(
+ (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
+ "cost multiplier needs to be in the range of 1..UnswitchThreshold");
+ CandidateCost *= CostMultiplier;
+ LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
+ << " (multiplier: " << CostMultiplier << ")"
+ << " for unswitch candidate: " << TI << "\n");
+ } else {
+ LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
+ << " for unswitch candidate: " << TI << "\n");
+ }
+
if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
BestUnswitchTI = &TI;
BestUnswitchCost = CandidateCost;
@@ -2364,11 +2733,17 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
return false;
}
- LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
+ // If the best candidate is a guard, turn it into a branch.
+ if (isGuard(BestUnswitchTI))
+ BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
+ ExitBlocks, DT, LI, MSSAU);
+
+ LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
<< BestUnswitchCost << ") terminator: " << *BestUnswitchTI
<< "\n");
- return unswitchNontrivialInvariants(
- L, *BestUnswitchTI, BestUnswitchInvariants, DT, LI, AC, UnswitchCB, SE);
+ unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
+ ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU);
+ return true;
}
/// Unswitch control flow predicated on loop invariant conditions.
@@ -2380,6 +2755,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
///
/// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also
/// updated based on the unswitch.
+/// The `MSSA` analysis is also updated if valid (i.e. its use is enabled).
///
/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
/// true, we will attempt to do non-trivial unswitching as well as trivial
@@ -2395,7 +2771,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
AssumptionCache &AC, TargetTransformInfo &TTI,
bool NonTrivial,
function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
bool Changed = false;
@@ -2405,7 +2781,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
return false;
// Try trivial unswitch first before loop over other basic blocks in the loop.
- if (unswitchAllTrivialConditions(L, DT, LI, SE)) {
+ if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
// If we unswitched successfully we will want to clean up the loop before
// processing it further so just mark it as unswitched and return.
UnswitchCB(/*CurrentLoopValid*/ true, {});
@@ -2426,7 +2802,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
// Try to unswitch the best invariant condition. We prefer this full unswitch to
// a partial unswitch when possible below the threshold.
- if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE))
+ if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU))
return true;
// No other opportunities to unswitch.
@@ -2460,10 +2836,19 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
U.markLoopAsDeleted(L, LoopName);
};
+ Optional<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA) {
+ MSSAU = MemorySSAUpdater(AR.MSSA);
+ if (VerifyMemorySSA)
+ AR.MSSA->verifyMemorySSA();
+ }
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB,
- &AR.SE))
+ &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
return PreservedAnalyses::all();
+ if (AR.MSSA && VerifyMemorySSA)
+ AR.MSSA->verifyMemorySSA();
+
// Historically this pass has had issues with the dominator tree so verify it
// in asserts builds.
assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
@@ -2489,6 +2874,10 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (EnableMSSALoopDependency) {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
getLoopAnalysisUsage(AU);
}
};
@@ -2508,6 +2897,12 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ MemorySSA *MSSA = nullptr;
+ Optional<MemorySSAUpdater> MSSAU;
+ if (EnableMSSALoopDependency) {
+ MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSAU = MemorySSAUpdater(MSSA);
+ }
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
@@ -2527,7 +2922,14 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LPM.markLoopAsDeleted(*L);
};
- bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE);
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
+ bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
// If anything was unswitched, also clear any cached information about this
// loop.
@@ -2547,6 +2949,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
"Simple unswitch loops", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index ca6b93e0b4a9..c99da8f0737a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -72,18 +72,18 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA,
return false;
}
- if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst) || Inst->isEHPad() ||
+ if (Inst->isTerminator() || isa<PHINode>(Inst) || Inst->isEHPad() ||
Inst->mayThrow())
return false;
- if (auto CS = CallSite(Inst)) {
+ if (auto *Call = dyn_cast<CallBase>(Inst)) {
// Convergent operations cannot be made control-dependent on additional
// values.
- if (CS.hasFnAttr(Attribute::Convergent))
+ if (Call->hasFnAttr(Attribute::Convergent))
return false;
for (Instruction *S : Stores)
- if (isModSet(AA.getModRefInfo(S, CS)))
+ if (isModSet(AA.getModRefInfo(S, Call)))
return false;
}
@@ -104,7 +104,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
// It's never legal to sink an instruction into a block which terminates in an
// EH-pad.
- if (SuccToSinkTo->getTerminator()->isExceptional())
+ if (SuccToSinkTo->getTerminator()->isExceptionalTerminator())
return false;
// If the block has multiple predecessors, this would introduce computation
diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index 6743e19a7c92..c0f75ddddbe0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumSpeculatedInstructions,
STATISTIC(NumNewRedundantInstructions,
"Number of new, redundant instructions inserted");
-/// Check wether speculating the users of a PHI node around the PHI
+/// Check whether speculating the users of a PHI node around the PHI
/// will be safe.
///
/// This checks both that all of the users are safe and also that all of their
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 2061db13639a..b5089b006bdd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -640,12 +640,12 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
switch (C.CandidateKind) {
case Candidate::Add:
- case Candidate::Mul:
+ case Candidate::Mul: {
// C = Basis + Bump
- if (BinaryOperator::isNeg(Bump)) {
+ Value *NegBump;
+ if (match(Bump, m_Neg(m_Value(NegBump)))) {
// If Bump is a neg instruction, emit C = Basis - (-Bump).
- Reduced =
- Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
+ Reduced = Builder.CreateSub(Basis.Ins, NegBump);
// We only use the negative argument of Bump, and Bump itself may be
// trivially dead.
RecursivelyDeleteTriviallyDeadInstructions(Bump);
@@ -662,6 +662,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
Reduced = Builder.CreateAdd(Basis.Ins, Bump);
}
break;
+ }
case Candidate::GEP:
{
Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index d650264176aa..0db762d846f2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -13,7 +13,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
@@ -183,7 +184,7 @@ class StructurizeCFG : public RegionPass {
Function *Func;
Region *ParentRegion;
- DivergenceAnalysis *DA;
+ LegacyDivergenceAnalysis *DA;
DominatorTree *DT;
LoopInfo *LI;
@@ -269,7 +270,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
@@ -285,7 +286,7 @@ char StructurizeCFG::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
@@ -596,7 +597,8 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
/// Add the real PHI value as soon as everything is set up
void StructurizeCFG::setPhiValues() {
- SSAUpdater Updater;
+ SmallVector<PHINode *, 8> InsertedPhis;
+ SSAUpdater Updater(&InsertedPhis);
for (const auto &AddedPhi : AddedPhis) {
BasicBlock *To = AddedPhi.first;
const BBVector &From = AddedPhi.second;
@@ -632,11 +634,31 @@ void StructurizeCFG::setPhiValues() {
DeletedPhis.erase(To);
}
assert(DeletedPhis.empty());
+
+ // Simplify any phis inserted by the SSAUpdater if possible
+ bool Changed;
+ do {
+ Changed = false;
+
+ SimplifyQuery Q(Func->getParent()->getDataLayout());
+ Q.DT = DT;
+ for (size_t i = 0; i < InsertedPhis.size(); ++i) {
+ PHINode *Phi = InsertedPhis[i];
+ if (Value *V = SimplifyInstruction(Phi, Q)) {
+ Phi->replaceAllUsesWith(V);
+ Phi->eraseFromParent();
+ InsertedPhis[i] = InsertedPhis.back();
+ InsertedPhis.pop_back();
+ i--;
+ Changed = true;
+ }
+ }
+ } while (Changed);
}
/// Remove phi values from all successors and then remove the terminator.
void StructurizeCFG::killTerminator(BasicBlock *BB) {
- TerminatorInst *Term = BB->getTerminator();
+ Instruction *Term = BB->getTerminator();
if (!Term)
return;
@@ -914,7 +936,7 @@ void StructurizeCFG::rebuildSSA() {
}
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
- const DivergenceAnalysis &DA) {
+ const LegacyDivergenceAnalysis &DA) {
for (auto E : R->elements()) {
if (!E->isSubRegion()) {
auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
@@ -962,7 +984,7 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
// but we shouldn't rely on metadata for correctness!
unsigned UniformMDKindID =
R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
- DA = &getAnalysis<DivergenceAnalysis>();
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index f8cd6c17a5a6..0f6db21f73b6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -61,6 +61,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
@@ -68,6 +69,8 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -124,6 +127,12 @@ struct AllocaDerivedValueTracker {
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(I);
+ // If the alloca-derived argument is passed byval it is not an escape
+ // point, or a use of an alloca. Calling with byval copies the contents
+ // of the alloca into argument registers or stack slots, which exist
+ // beyond the lifetime of the current frame.
+ if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U)))
+ continue;
bool IsNocapture =
CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
callUsesLocalStack(CS, IsNocapture);
@@ -488,12 +497,10 @@ static CallInst *findTRECandidate(Instruction *TI,
return CI;
}
-static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
- BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- AliasAnalysis *AA,
- OptimizationRemarkEmitter *ORE) {
+static bool eliminateRecursiveTailCall(
+ CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
// If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial
// value for the accumulator is placed in this variable. If this value is set
@@ -566,7 +573,8 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
NewEntry->takeName(OldEntry);
OldEntry->setName("tailrecurse");
- BranchInst::Create(OldEntry, NewEntry);
+ BranchInst *BI = BranchInst::Create(OldEntry, NewEntry);
+ BI->setDebugLoc(CI->getDebugLoc());
// If this tail call is marked 'tail' and if there are any allocas in the
// entry block, move them up to the new entry block.
@@ -592,6 +600,10 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
PN->addIncoming(&*I, NewEntry);
ArgumentPHIs.push_back(PN);
}
+ // The entry block was changed from OldEntry to NewEntry.
+ // The forward DominatorTree needs to be recalculated when the EntryBB is
+ // changed. In this corner-case we recalculate the entire tree.
+ DTU.recalculate(*NewEntry->getParent());
}
// If this function has self recursive calls in the tail position where some
@@ -667,6 +679,7 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BB->getInstList().erase(Ret); // Remove return.
BB->getInstList().erase(CI); // Remove call.
+ DTU.insertEdge(BB, OldEntry);
++NumEliminated;
return true;
}
@@ -675,7 +688,7 @@ static bool foldReturnAndProcessPred(
BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI,
- AliasAnalysis *AA, OptimizationRemarkEmitter *ORE) {
+ AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
bool Change = false;
// Make sure this block is a trivial return block.
@@ -689,7 +702,7 @@ static bool foldReturnAndProcessPred(
SmallVector<BranchInst*, 8> UncondBranchPreds;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *Pred = *PI;
- TerminatorInst *PTI = Pred->getTerminator();
+ Instruction *PTI = Pred->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
if (BI->isUnconditional())
UncondBranchPreds.push_back(BI);
@@ -701,17 +714,17 @@ static bool foldReturnAndProcessPred(
if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){
LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
<< "INTO UNCOND BRANCH PRED: " << *Pred);
- ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
+ ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
// Cleanup: if all predecessors of BB have been eliminated by
// FoldReturnIntoUncondBranch, delete it. It is important to empty it,
// because the ret instruction in there is still using a value which
// eliminateRecursiveTailCall will attempt to remove.
if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
- BB->eraseFromParent();
+ DTU.deleteBB(BB);
eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, AA, ORE);
+ ArgumentPHIs, AA, ORE, DTU);
++NumRetDuped;
Change = true;
}
@@ -720,24 +733,23 @@ static bool foldReturnAndProcessPred(
return Change;
}
-static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail,
- const TargetTransformInfo *TTI,
- AliasAnalysis *AA,
- OptimizationRemarkEmitter *ORE) {
+static bool processReturningBlock(
+ ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI,
+ AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI);
if (!CI)
return false;
return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, AA, ORE);
+ ArgumentPHIs, AA, ORE, DTU);
}
static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
AliasAnalysis *AA,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE,
+ DomTreeUpdater &DTU) {
if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false;
@@ -772,11 +784,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall,
- TTI, AA, ORE);
+ TTI, AA, ORE, DTU);
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = foldReturnAndProcessPred(BB, Ret, OldEntry,
- TailCallsAreMarkedTail, ArgumentPHIs,
- !CanTRETailMarkedCall, TTI, AA, ORE);
+ Change = foldReturnAndProcessPred(
+ BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall, TTI, AA, ORE, DTU);
MadeChange |= Change;
}
}
@@ -809,16 +821,27 @@ struct TailCallElim : public FunctionPass {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<PostDominatorTreeWrapperPass>();
}
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+ auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+ // There is no noticable performance difference here between Lazy and Eager
+ // UpdateStrategy based on some test results. It is feasible to switch the
+ // UpdateStrategy to Lazy if we find it profitable later.
+ DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
+
return eliminateTailRecursion(
F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
&getAnalysis<AAResultsWrapperPass>().getAAResults(),
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU);
}
};
}
@@ -842,12 +865,19 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
AliasAnalysis &AA = AM.getResult<AAManager>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
- bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE);
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F);
+ // There is no noticable performance difference here between Lazy and Eager
+ // UpdateStrategy based on some test results. It is feasible to switch the
+ // UpdateStrategy to Lazy if we find it profitable later.
+ DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
+ bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE, DTU);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<PostDominatorTreeAnalysis>();
return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/contrib/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
new file mode 100644
index 000000000000..80f761e53774
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -0,0 +1,149 @@
+//===- LoopTransformWarning.cpp - ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit warnings if forced code transformations have not been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "transform-warning"
+
+/// Emit warnings for forced (i.e. user-defined) loop transformations which have
+/// still not been performed.
+static void warnAboutLeftoverTransformations(Loop *L,
+ OptimizationRemarkEmitter *ORE) {
+ if (hasUnrollTransformation(L) == TM_ForcedByUser) {
+ LLVM_DEBUG(dbgs() << "Leftover unroll transformation\n");
+ ORE->emit(
+ DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+ "FailedRequestedUnrolling",
+ L->getStartLoc(), L->getHeader())
+ << "loop not unrolled: the optimizer was unable to perform the "
+ "requested transformation; the transformation might be disabled or "
+ "specified as part of an unsupported transformation ordering");
+ }
+
+ if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser) {
+ LLVM_DEBUG(dbgs() << "Leftover unroll-and-jam transformation\n");
+ ORE->emit(
+ DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+ "FailedRequestedUnrollAndJamming",
+ L->getStartLoc(), L->getHeader())
+ << "loop not unroll-and-jammed: the optimizer was unable to perform "
+ "the requested transformation; the transformation might be disabled "
+ "or specified as part of an unsupported transformation ordering");
+ }
+
+ if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
+ LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
+ Optional<int> VectorizeWidth =
+ getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+ Optional<int> InterleaveCount =
+ getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
+
+ if (VectorizeWidth.getValueOr(0) != 1)
+ ORE->emit(
+ DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+ "FailedRequestedVectorization",
+ L->getStartLoc(), L->getHeader())
+ << "loop not vectorized: the optimizer was unable to perform the "
+ "requested transformation; the transformation might be disabled "
+ "or specified as part of an unsupported transformation ordering");
+ else if (InterleaveCount.getValueOr(0) != 1)
+ ORE->emit(
+ DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+ "FailedRequestedInterleaving",
+ L->getStartLoc(), L->getHeader())
+ << "loop not interleaved: the optimizer was unable to perform the "
+ "requested transformation; the transformation might be disabled "
+ "or specified as part of an unsupported transformation ordering");
+ }
+
+ if (hasDistributeTransformation(L) == TM_ForcedByUser) {
+ LLVM_DEBUG(dbgs() << "Leftover distribute transformation\n");
+ ORE->emit(
+ DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+ "FailedRequestedDistribution",
+ L->getStartLoc(), L->getHeader())
+ << "loop not distributed: the optimizer was unable to perform the "
+ "requested transformation; the transformation might be disabled or "
+ "specified as part of an unsupported transformation ordering");
+ }
+}
+
+static void warnAboutLeftoverTransformations(Function *F, LoopInfo *LI,
+ OptimizationRemarkEmitter *ORE) {
+ for (auto *L : LI->getLoopsInPreorder())
+ warnAboutLeftoverTransformations(L, ORE);
+}
+
+// New pass manager boilerplate
+PreservedAnalyses
+WarnMissedTransformationsPass::run(Function &F, FunctionAnalysisManager &AM) {
+ // Do not warn about not applied transformations if optimizations are
+ // disabled.
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return PreservedAnalyses::all();
+
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+
+ warnAboutLeftoverTransformations(&F, &LI, &ORE);
+
+ return PreservedAnalyses::all();
+}
+
+// Legacy pass manager boilerplate
+namespace {
+class WarnMissedTransformationsLegacy : public FunctionPass {
+public:
+ static char ID;
+
+ explicit WarnMissedTransformationsLegacy() : FunctionPass(ID) {
+ initializeWarnMissedTransformationsLegacyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ warnAboutLeftoverTransformations(&F, &LI, &ORE);
+ return false;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+
+ AU.setPreservesAll();
+ }
+};
+} // end anonymous namespace
+
+char WarnMissedTransformationsLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(WarnMissedTransformationsLegacy, "transform-warning",
+ "Warn about non-applied transformations", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(WarnMissedTransformationsLegacy, "transform-warning",
+ "Warn about non-applied transformations", false, false)
+
+Pass *llvm::createWarnMissedTransformationsPass() {
+ return new WarnMissedTransformationsLegacy();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index e3ef42362223..564537af0c2a 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -209,10 +209,18 @@ static bool addDiscriminators(Function &F) {
// Only the lowest 7 bits are used to represent a discriminator to fit
// it in 1 byte ULEB128 representation.
unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
- I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
- LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
- << DIL->getColumn() << ":" << Discriminator << " " << I
- << "\n");
+ auto NewDIL = DIL->setBaseDiscriminator(Discriminator);
+ if (!NewDIL) {
+ LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
+ << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " "
+ << I << "\n");
+ } else {
+ I.setDebugLoc(NewDIL.getValue());
+ LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " " << I
+ << "\n");
+ }
Changed = true;
}
}
@@ -224,23 +232,31 @@ static bool addDiscriminators(Function &F) {
for (BasicBlock &B : F) {
LocationSet CallLocations;
for (auto &I : B.getInstList()) {
- CallInst *Current = dyn_cast<CallInst>(&I);
// We bypass intrinsic calls for the following two reasons:
// 1) We want to avoid a non-deterministic assigment of
// discriminators.
// 2) We want to minimize the number of base discriminators used.
- if (!Current || isa<IntrinsicInst>(&I))
+ if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))
continue;
- DILocation *CurrentDIL = Current->getDebugLoc();
+ DILocation *CurrentDIL = I.getDebugLoc();
if (!CurrentDIL)
continue;
Location L =
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
if (!CallLocations.insert(L).second) {
unsigned Discriminator = ++LDM[L];
- Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
- Changed = true;
+ auto NewDIL = CurrentDIL->setBaseDiscriminator(Discriminator);
+ if (!NewDIL) {
+ LLVM_DEBUG(dbgs()
+ << "Could not encode discriminator: "
+ << CurrentDIL->getFilename() << ":"
+ << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
+ << ":" << Discriminator << " " << I << "\n");
+ } else {
+ I.setDebugLoc(NewDIL.getValue());
+ Changed = true;
+ }
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 516a785dce1e..7da768252fc1 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -20,11 +20,13 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -37,6 +39,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -45,42 +48,58 @@
using namespace llvm;
-void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) {
- assert((pred_begin(BB) == pred_end(BB) ||
- // Can delete self loop.
- BB->getSinglePredecessor() == BB) && "Block is not dead!");
- TerminatorInst *BBTerm = BB->getTerminator();
- std::vector<DominatorTree::UpdateType> Updates;
+void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) {
+ SmallVector<BasicBlock *, 1> BBs = {BB};
+ DeleteDeadBlocks(BBs, DTU);
+}
- // Loop through all of our successors and make sure they know that one
- // of their predecessors is going away.
- if (DDT)
- Updates.reserve(BBTerm->getNumSuccessors());
- for (BasicBlock *Succ : BBTerm->successors()) {
- Succ->removePredecessor(BB);
- if (DDT)
- Updates.push_back({DominatorTree::Delete, BB, Succ});
- }
+void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
+ DomTreeUpdater *DTU) {
+#ifndef NDEBUG
+ // Make sure that all predecessors of each dead block is also dead.
+ SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
+ assert(Dead.size() == BBs.size() && "Duplicating blocks?");
+ for (auto *BB : Dead)
+ for (BasicBlock *Pred : predecessors(BB))
+ assert(Dead.count(Pred) && "All predecessors must be dead!");
+#endif
+
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+ for (auto *BB : BBs) {
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ for (BasicBlock *Succ : successors(BB)) {
+ Succ->removePredecessor(BB);
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
- // Zap all the instructions in the block.
- while (!BB->empty()) {
- Instruction &I = BB->back();
- // If this instruction is used, replace uses with an arbitrary value.
- // Because control flow can't get here, we don't care what we replace the
- // value with. Note that since this block is unreachable, and all values
- // contained within it must dominate their uses, that all uses will
- // eventually be removed (they are themselves dead).
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- BB->getInstList().pop_back();
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->getInstList().pop_back();
+ }
+ new UnreachableInst(BB->getContext(), BB);
+ assert(BB->getInstList().size() == 1 &&
+ isa<UnreachableInst>(BB->getTerminator()) &&
+ "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
}
+ if (DTU)
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
- if (DDT) {
- DDT->applyUpdates(Updates);
- DDT->deleteBB(BB); // Deferred deletion of BB.
- } else {
- BB->eraseFromParent(); // Zap the block!
- }
+ for (BasicBlock *BB : BBs)
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
}
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
@@ -115,12 +134,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
return Changed;
}
-bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
- LoopInfo *LI,
- MemoryDependenceResults *MemDep,
- DeferredDominance *DDT) {
- assert(!(DT && DDT) && "Cannot call with both DT and DDT.");
-
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ MemoryDependenceResults *MemDep) {
if (BB->hasAddressTaken())
return false;
@@ -131,7 +147,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Don't break self-loops.
if (PredBB == BB) return false;
// Don't break unwinding instructions.
- if (PredBB->getTerminator()->isExceptional())
+ if (PredBB->getTerminator()->isExceptionalTerminator())
return false;
// Can't merge if there are multiple distinct successors.
@@ -154,10 +170,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
FoldSingleEntryPHINodes(BB, MemDep);
}
- // Deferred DT update: Collect all the edges that exit BB. These
- // dominator edges will be redirected from Pred.
+ // DTU update: Collect all the edges that exit BB.
+ // These dominator edges will be redirected from Pred.
std::vector<DominatorTree::UpdateType> Updates;
- if (DDT) {
+ if (DTU) {
Updates.reserve(1 + (2 * succ_size(BB)));
Updates.push_back({DominatorTree::Delete, PredBB, BB});
for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
@@ -166,6 +182,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
}
}
+ if (MSSAU)
+ MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin()));
+
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -175,6 +194,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Move all definitions in the successor to the predecessor...
PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+ new UnreachableInst(BB->getContext(), BB);
// Eliminate duplicate dbg.values describing the entry PHI node post-splice.
for (auto Incoming : IncomingValues) {
@@ -195,28 +215,24 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
if (!PredBB->hasName())
PredBB->takeName(BB);
- // Finally, erase the old block and update dominator info.
- if (DT)
- if (DomTreeNode *DTN = DT->getNode(BB)) {
- DomTreeNode *PredDTN = DT->getNode(PredBB);
- SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
- for (DomTreeNode *DI : Children)
- DT->changeImmediateDominator(DI, PredDTN);
-
- DT->eraseNode(BB);
- }
-
if (LI)
LI->removeBlock(BB);
if (MemDep)
MemDep->invalidateCachedPredecessors();
- if (DDT) {
- DDT->deleteBB(BB); // Deferred deletion of BB.
- DDT->applyUpdates(Updates);
- } else {
- BB->eraseFromParent(); // Nuke BB.
+ // Finally, erase the old block and update dominator info.
+ if (DTU) {
+ assert(BB->getInstList().size() == 1 &&
+ isa<UnreachableInst>(BB->getTerminator()) &&
+ "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->deleteBB(BB);
+ }
+
+ else {
+ BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
}
return true;
}
@@ -261,13 +277,14 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
}
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
- LoopInfo *LI) {
+ LoopInfo *LI, MemorySSAUpdater *MSSAU) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
// If this is a critical edge, let SplitCriticalEdge do it.
- TerminatorInst *LatchTerm = BB->getTerminator();
- if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI)
- .setPreserveLCSSA()))
+ Instruction *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(
+ LatchTerm, SuccNum,
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()))
return LatchTerm->getSuccessor(SuccNum);
// If the edge isn't critical, then BB has a single successor or Succ has a
@@ -277,14 +294,14 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
- return SplitBlock(Succ, &Succ->front(), DT, LI);
+ return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU);
}
// Otherwise, if BB has a single successor, split it at the bottom of the
// block.
assert(BB->getTerminator()->getNumSuccessors() == 1 &&
"Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), DT, LI);
+ return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU);
}
unsigned
@@ -292,7 +309,7 @@ llvm::SplitAllCriticalEdges(Function &F,
const CriticalEdgeSplittingOptions &Options) {
unsigned NumBroken = 0;
for (BasicBlock &BB : F) {
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (SplitCriticalEdge(TI, i, Options))
@@ -302,7 +319,8 @@ llvm::SplitAllCriticalEdges(Function &F,
}
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
- DominatorTree *DT, LoopInfo *LI) {
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU) {
BasicBlock::iterator SplitIt = SplitPt->getIterator();
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
@@ -324,6 +342,11 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
DT->changeImmediateDominator(I, NewNode);
}
+ // Move MemoryAccesses still tracked in Old, but part of New now.
+ // Update accesses in successor blocks accordingly.
+ if (MSSAU)
+ MSSAU->moveAllAfterSpliceBlocks(Old, New, &*(New->begin()));
+
return New;
}
@@ -331,6 +354,7 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds,
DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
bool PreserveLCSSA, bool &HasLoopExit) {
// Update dominator tree if available.
if (DT) {
@@ -343,6 +367,10 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
}
}
+ // Update MemoryPhis after split if MemorySSA is available
+ if (MSSAU)
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(OldBB, NewBB, Preds);
+
// The rest of the logic is only relevant for updating the loop structures.
if (!LI)
return;
@@ -483,7 +511,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
// Do not attempt to split that which cannot be split.
if (!BB->canSplitPredecessors())
return nullptr;
@@ -495,7 +524,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
std::string NewName = std::string(Suffix) + ".split-lp";
SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
- LI, PreserveLCSSA);
+ LI, MSSAU, PreserveLCSSA);
return NewBBs[0];
}
@@ -529,7 +558,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,
+ UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, MSSAU, PreserveLCSSA,
HasLoopExit);
if (!Preds.empty()) {
@@ -545,6 +574,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
const char *Suffix1, const char *Suffix2,
SmallVectorImpl<BasicBlock *> &NewBBs,
DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
bool PreserveLCSSA) {
assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
@@ -570,7 +600,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
bool HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA,
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, MSSAU, PreserveLCSSA,
HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB1.
@@ -606,7 +636,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI,
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, MSSAU,
PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB2.
@@ -644,7 +674,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
- BasicBlock *Pred) {
+ BasicBlock *Pred,
+ DomTreeUpdater *DTU) {
Instruction *UncondBranch = Pred->getTerminator();
// Clone the return and add it to the end of the predecessor.
Instruction *NewRet = RI->clone();
@@ -678,19 +709,24 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// longer branch to them.
BB->removePredecessor(Pred);
UncondBranch->eraseFromParent();
+
+ if (DTU)
+ DTU->deleteEdge(Pred, BB);
+
return cast<ReturnInst>(NewRet);
}
-TerminatorInst *
-llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
- bool Unreachable, MDNode *BranchWeights,
- DominatorTree *DT, LoopInfo *LI) {
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights,
+ DominatorTree *DT, LoopInfo *LI) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
- TerminatorInst *HeadOldTerm = Head->getTerminator();
+ Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- TerminatorInst *CheckTerm;
+ Instruction *CheckTerm;
if (Unreachable)
CheckTerm = new UnreachableInst(C, ThenBlock);
else
@@ -725,12 +761,12 @@ llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
}
void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
- TerminatorInst **ThenTerm,
- TerminatorInst **ElseTerm,
+ Instruction **ThenTerm,
+ Instruction **ElseTerm,
MDNode *BranchWeights) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
- TerminatorInst *HeadOldTerm = Head->getTerminator();
+ Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 3e30c27a9f33..fafc9aaba5c9 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -129,7 +130,7 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
}
BasicBlock *
-llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
const CriticalEdgeSplittingOptions &Options) {
if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
return nullptr;
@@ -198,6 +199,11 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If we have nothing to update, just return.
auto *DT = Options.DT;
auto *LI = Options.LI;
+ auto *MSSAU = Options.MSSAU;
+ if (MSSAU)
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges);
+
if (!DT && !LI)
return NewBB;
@@ -283,7 +289,7 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (!LoopPreds.empty()) {
assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
BasicBlock *NewExitBB = SplitBlockPredecessors(
- DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA);
+ DestBB, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA);
if (Options.PreserveLCSSA)
createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
@@ -312,7 +318,7 @@ findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
BasicBlock *IBB = nullptr;
for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
BasicBlock *PredBB = PN->getIncomingBlock(Pred);
- TerminatorInst *PredTerm = PredBB->getTerminator();
+ Instruction *PredTerm = PredBB->getTerminator();
switch (PredTerm->getOpcode()) {
case Instruction::IndirectBr:
if (IBB)
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 245200362018..3466dedd3236 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -38,6 +38,7 @@ STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
+STATISTIC(NumReturnedArg, "Number of arguments inferred as returned");
static bool setDoesNotAccessMemory(Function &F) {
if (F.doesNotAccessMemory())
@@ -105,6 +106,14 @@ static bool setRetNonNull(Function &F) {
return true;
}
+static bool setReturnedArg(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::Returned))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::Returned);
+ ++NumReturnedArg;
+ return true;
+}
+
static bool setNonLazyBind(Function &F) {
if (F.hasFnAttribute(Attribute::NonLazyBind))
return false;
@@ -155,10 +164,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_strcpy:
- case LibFunc_stpcpy:
+ case LibFunc_strncpy:
case LibFunc_strcat:
case LibFunc_strncat:
- case LibFunc_strncpy:
+ Changed |= setReturnedArg(F, 0);
+ LLVM_FALLTHROUGH;
+ case LibFunc_stpcpy:
case LibFunc_stpncpy:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
@@ -270,9 +281,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_memcpy:
+ case LibFunc_memmove:
+ Changed |= setReturnedArg(F, 0);
+ LLVM_FALLTHROUGH;
case LibFunc_mempcpy:
case LibFunc_memccpy:
- case LibFunc_memmove:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
@@ -741,6 +754,8 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
LibFunc DoubleFn, LibFunc FloatFn,
LibFunc LongDoubleFn) {
switch (Ty->getTypeID()) {
+ case Type::HalfTyID:
+ return false;
case Type::FloatTyID:
return TLI->has(FloatFn);
case Type::DoubleTyID:
@@ -750,6 +765,24 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
}
}
+StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn) {
+ assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
+ "Cannot get name for unavailable function!");
+
+ switch (Ty->getTypeID()) {
+ case Type::HalfTyID:
+ llvm_unreachable("No name for HalfTy!");
+ case Type::FloatTyID:
+ return TLI->getName(FloatFn);
+ case Type::DoubleTyID:
+ return TLI->getName(DoubleFn);
+ default:
+ return TLI->getName(LongDoubleFn);
+ }
+}
+
//- Emit LibCalls ------------------------------------------------------------//
Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
@@ -927,10 +960,10 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
}
}
-Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
- const AttributeList &Attrs) {
- SmallString<20> NameBuffer;
- appendTypeSuffix(Op, Name, NameBuffer);
+static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
+ IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
Module *M = B.GetInsertBlock()->getModule();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
@@ -949,8 +982,29 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
return CI;
}
+Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ SmallString<20> NameBuffer;
+ appendTypeSuffix(Op, Name, NameBuffer);
+
+ return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+}
+
+Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ // Get the name of the function according to TLI.
+ StringRef Name = getUnaryFloatFn(TLI, Op->getType(),
+ DoubleFn, FloatFn, LongDoubleFn);
+
+ return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+}
+
Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
IRBuilder<> &B, const AttributeList &Attrs) {
+ assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
+
SmallString<20> NameBuffer;
appendTypeSuffix(Op1, Name, NameBuffer);
diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 6d18d0614611..e58ddcf34667 100644
--- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -177,8 +177,8 @@ static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) {
InsertBefore = &*std::next(CS.getInstruction()->getIterator());
// Bitcast the return value to the correct type.
- auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(),
- RetTy, "", InsertBefore);
+ auto *Cast = CastInst::CreateBitOrPointerCast(CS.getInstruction(), RetTy, "",
+ InsertBefore);
if (RetBitCast)
*RetBitCast = Cast;
@@ -270,8 +270,8 @@ static Instruction *versionCallSite(CallSite CS, Value *Callee,
// Create an if-then-else structure. The original instruction is moved into
// the "else" block, and a clone of the original instruction is placed in the
// "then" block.
- TerminatorInst *ThenTerm = nullptr;
- TerminatorInst *ElseTerm = nullptr;
+ Instruction *ThenTerm = nullptr;
+ Instruction *ElseTerm = nullptr;
SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
@@ -321,12 +321,14 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
const char **FailureReason) {
assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
+ auto &DL = Callee->getParent()->getDataLayout();
+
// Check the return type. The callee's return value type must be bitcast
// compatible with the call site's type.
Type *CallRetTy = CS.getInstruction()->getType();
Type *FuncRetTy = Callee->getReturnType();
if (CallRetTy != FuncRetTy)
- if (!CastInst::isBitCastable(FuncRetTy, CallRetTy)) {
+ if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) {
if (FailureReason)
*FailureReason = "Return type mismatch";
return false;
@@ -351,7 +353,7 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
Type *ActualTy = CS.getArgument(I)->getType();
if (FormalTy == ActualTy)
continue;
- if (!CastInst::isBitCastable(ActualTy, FormalTy)) {
+ if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) {
if (FailureReason)
*FailureReason = "Argument type mismatch";
return false;
@@ -391,21 +393,46 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
// to the correct type.
auto CalleeType = Callee->getFunctionType();
auto CalleeParamNum = CalleeType->getNumParams();
+
+ LLVMContext &Ctx = Callee->getContext();
+ const AttributeList &CallerPAL = CS.getAttributes();
+ // The new list of argument attributes.
+ SmallVector<AttributeSet, 4> NewArgAttrs;
+ bool AttributeChanged = false;
+
for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
auto *Arg = CS.getArgument(ArgNo);
Type *FormalTy = CalleeType->getParamType(ArgNo);
Type *ActualTy = Arg->getType();
if (FormalTy != ActualTy) {
- auto *Cast = CastInst::Create(Instruction::BitCast, Arg, FormalTy, "",
- CS.getInstruction());
+ auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "",
+ CS.getInstruction());
CS.setArgument(ArgNo, Cast);
- }
+
+ // Remove any incompatible attributes for the argument.
+ AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
+ ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
+ NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
+ AttributeChanged = true;
+ } else
+ NewArgAttrs.push_back(CallerPAL.getParamAttributes(ArgNo));
}
// If the return type of the call site doesn't match that of the callee, cast
// the returned value to the appropriate type.
- if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy)
+ // Remove any incompatible return value attribute.
+ AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
+ if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) {
createRetBitCast(CS, CallSiteRetTy, RetBitCast);
+ RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy));
+ AttributeChanged = true;
+ }
+
+ // Set the new callsite attribute.
+ if (AttributeChanged)
+ CS.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(),
+ AttributeSet::get(Ctx, RAttrs),
+ NewArgAttrs));
return CS.getInstruction();
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
new file mode 100644
index 000000000000..cf41fd2e14c0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -0,0 +1,105 @@
+//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Currently this file implements partial alias canonicalization, to
+// flatten chains of aliases (also done by GlobalOpt, but not on for
+// O0 compiles). E.g.
+// @a = alias i8, i8 *@b
+// @b = alias i8, i8 *@g
+//
+// will be converted to:
+// @a = alias i8, i8 *@g <-- @a is now an alias to base object @g
+// @b = alias i8, i8 *@g
+//
+// Eventually this file will implement full alias canonicalation, so that
+// all aliasees are private anonymous values. E.g.
+// @a = alias i8, i8 *@g
+// @g = global i8 0
+//
+// will be converted to:
+// @0 = private global
+// @a = alias i8, i8* @0
+// @g = alias i8, i8* @0
+//
+// This simplifies optimization and ThinLTO linking of the original symbols.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+
+using namespace llvm;
+
+namespace {
+
+static Constant *canonicalizeAlias(Constant *C, bool &Changed) {
+ if (auto *GA = dyn_cast<GlobalAlias>(C)) {
+ auto *NewAliasee = canonicalizeAlias(GA->getAliasee(), Changed);
+ if (NewAliasee != GA->getAliasee()) {
+ GA->setAliasee(NewAliasee);
+ Changed = true;
+ }
+ return NewAliasee;
+ }
+
+ auto *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE)
+ return C;
+
+ std::vector<Constant *> Ops;
+ for (Use &U : CE->operands())
+ Ops.push_back(canonicalizeAlias(cast<Constant>(U), Changed));
+ return CE->getWithOperands(Ops);
+}
+
+/// Convert aliases to canonical form.
+static bool canonicalizeAliases(Module &M) {
+ bool Changed = false;
+ for (auto &GA : M.aliases())
+ canonicalizeAlias(&GA, Changed);
+ return Changed;
+}
+
+// Legacy pass that canonicalizes aliases.
+class CanonicalizeAliasesLegacyPass : public ModulePass {
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ StringRef getPassName() const override { return "Canonicalize Aliases"; }
+
+ explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override { return canonicalizeAliases(M); }
+};
+char CanonicalizeAliasesLegacyPass::ID = 0;
+
+} // anonymous namespace
+
+PreservedAnalyses CanonicalizeAliasesPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!canonicalizeAliases(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
+ "Canonicalize aliases", false, false)
+INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
+ "Canonicalize aliases", false, false)
+
+namespace llvm {
+ModulePass *createCanonicalizeAliasesPass() {
+ return new CanonicalizeAliasesLegacyPass();
+}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 9ae60962a631..8f8c601f5f13 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -18,11 +18,11 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -32,6 +32,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <map>
using namespace llvm;
@@ -235,8 +236,8 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
ArgTypes, F->getFunctionType()->isVarArg());
// Create the new function...
- Function *NewF =
- Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent());
+ Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(),
+ F->getName(), F->getParent());
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
@@ -365,7 +366,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
}
// Finally, clone over the terminator.
- const TerminatorInst *OldTI = BB->getTerminator();
+ const Instruction *OldTI = BB->getTerminator();
bool TerminatorDone = false;
if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
if (BI->isConditional()) {
@@ -414,8 +415,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
CodeInfo->OperandBundleCallSites.push_back(NewInst);
// Recursively clone any reachable successor blocks.
- const TerminatorInst *TI = BB->getTerminator();
- for (const BasicBlock *Succ : TI->successors())
+ const Instruction *TI = BB->getTerminator();
+ for (const BasicBlock *Succ : successors(TI))
ToClone.push_back(Succ);
}
@@ -795,11 +796,12 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
/// Duplicate non-Phi instructions from the beginning of block up to
/// StopAt instruction into a split block between BB and its predecessor.
-BasicBlock *
-llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
- Instruction *StopAt,
- ValueToValueMapTy &ValueMapping,
- DominatorTree *DT) {
+BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
+ BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt,
+ ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU) {
+
+ assert(count(successors(PredBB), BB) == 1 &&
+ "There must be a single edge between PredBB and BB!");
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
@@ -807,10 +809,16 @@ llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
- BasicBlock *NewBB = SplitEdge(PredBB, BB, DT);
+ BasicBlock *NewBB = SplitEdge(PredBB, BB);
NewBB->setName(PredBB->getName() + ".split");
Instruction *NewTerm = NewBB->getTerminator();
+ // FIXME: SplitEdge does not yet take a DTU, so we include the split edge
+ // in the update set here.
+ DTU.applyUpdates({{DominatorTree::Delete, PredBB, BB},
+ {DominatorTree::Insert, PredBB, NewBB},
+ {DominatorTree::Insert, NewBB, BB}});
+
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
// Stop once we see the terminator too. This covers the case where BB's
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index c7d68bab8170..659993aa5478 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -74,8 +74,9 @@ std::unique_ptr<Module> llvm::CloneModule(
// Loop over the functions in the module, making external functions as before
for (const Function &I : M) {
- Function *NF = Function::Create(cast<FunctionType>(I.getValueType()),
- I.getLinkage(), I.getName(), New.get());
+ Function *NF =
+ Function::Create(cast<FunctionType>(I.getValueType()), I.getLinkage(),
+ I.getAddressSpace(), I.getName(), New.get());
NF->copyAttributesFrom(&I);
VMap[&I] = NF;
}
@@ -91,8 +92,8 @@ std::unique_ptr<Module> llvm::CloneModule(
GlobalValue *GV;
if (I->getValueType()->isFunctionTy())
GV = Function::Create(cast<FunctionType>(I->getValueType()),
- GlobalValue::ExternalLinkage, I->getName(),
- New.get());
+ GlobalValue::ExternalLinkage,
+ I->getAddressSpace(), I->getName(), New.get());
else
GV = new GlobalVariable(
*New, I->getValueType(), false, GlobalValue::ExternalLinkage,
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index cb349e34606c..25d4ae583ecc 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -57,6 +57,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -167,14 +168,22 @@ static bool isBlockValidForExtraction(const BasicBlock &BB,
continue;
}
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (const Function *F = CI->getCalledFunction())
- if (F->getIntrinsicID() == Intrinsic::vastart) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (const Function *F = CI->getCalledFunction()) {
+ auto IID = F->getIntrinsicID();
+ if (IID == Intrinsic::vastart) {
if (AllowVarArgs)
continue;
else
return false;
}
+
+ // Currently, we miscompile outlined copies of eh_typid_for. There are
+ // proposals for fixing this in llvm.org/PR39545.
+ if (IID == Intrinsic::eh_typeid_for)
+ return false;
+ }
+ }
}
return true;
@@ -228,19 +237,21 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI, bool AllowVarArgs,
- bool AllowAlloca)
+ bool AllowAlloca, std::string Suffix)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AllowVarArgs(AllowVarArgs),
- Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)) {}
+ Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
+ Suffix(Suffix) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI)
+ BranchProbabilityInfo *BPI, std::string Suffix)
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AllowVarArgs(false),
Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
/* AllowVarArgs */ false,
- /* AllowAlloca */ false)) {}
+ /* AllowAlloca */ false)),
+ Suffix(Suffix) {}
/// definedInRegion - Return true if the specified value is defined in the
/// extracted region.
@@ -321,8 +332,7 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
default: {
IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
if (IntrInst) {
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
- IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ if (IntrInst->isLifetimeStartOrEnd())
break;
return false;
}
@@ -520,10 +530,10 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
}
}
-/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
-/// region, we need to split the entry block of the region so that the PHI node
-/// is easier to deal with.
-void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+/// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
+/// of the region, we need to split the entry block of the region so that the
+/// PHI node is easier to deal with.
+void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
unsigned NumPredsFromRegion = 0;
unsigned NumPredsOutsideRegion = 0;
@@ -566,7 +576,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// changing them to branch to NewBB instead.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (Blocks.count(PN->getIncomingBlock(i))) {
- TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+ Instruction *TI = PN->getIncomingBlock(i)->getTerminator();
TI->replaceUsesOfWith(OldPred, NewBB);
}
@@ -595,6 +605,56 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
}
}
+/// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
+/// outlined region, we split these PHIs on two: one with inputs from region
+/// and other with remaining incoming blocks; then first PHIs are placed in
+/// outlined region.
+void CodeExtractor::severSplitPHINodesOfExits(
+ const SmallPtrSetImpl<BasicBlock *> &Exits) {
+ for (BasicBlock *ExitBB : Exits) {
+ BasicBlock *NewBB = nullptr;
+
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ SmallVector<unsigned, 2> IncomingVals;
+ for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+ if (Blocks.count(PN.getIncomingBlock(i)))
+ IncomingVals.push_back(i);
+
+ // Do not process PHI if there is one (or fewer) predecessor from region.
+ // If PHI has exactly one predecessor from region, only this one incoming
+ // will be replaced on codeRepl block, so it should be safe to skip PHI.
+ if (IncomingVals.size() <= 1)
+ continue;
+
+ // Create block for new PHIs and add it to the list of outlined if it
+ // wasn't done before.
+ if (!NewBB) {
+ NewBB = BasicBlock::Create(ExitBB->getContext(),
+ ExitBB->getName() + ".split",
+ ExitBB->getParent(), ExitBB);
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBB),
+ pred_end(ExitBB));
+ for (BasicBlock *PredBB : Preds)
+ if (Blocks.count(PredBB))
+ PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
+ BranchInst::Create(ExitBB, NewBB);
+ Blocks.insert(NewBB);
+ }
+
+ // Split this PHI.
+ PHINode *NewPN =
+ PHINode::Create(PN.getType(), IncomingVals.size(),
+ PN.getName() + ".ce", NewBB->getFirstNonPHI());
+ for (unsigned i : IncomingVals)
+ NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
+ for (unsigned i : reverse(IncomingVals))
+ PN.removeIncomingValue(i, false);
+ PN.addIncoming(NewPN, NewBB);
+ }
+ }
+}
+
void CodeExtractor::splitReturnBlocks() {
for (BasicBlock *Block : Blocks)
if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
@@ -669,11 +729,14 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
FunctionType::get(RetTy, paramTy,
AllowVarArgs && oldFunction->isVarArg());
+ std::string SuffixToUse =
+ Suffix.empty()
+ ? (header->getName().empty() ? "extracted" : header->getName().str())
+ : Suffix;
// Create the new function
- Function *newFunction = Function::Create(funcType,
- GlobalValue::InternalLinkage,
- oldFunction->getName() + "_" +
- header->getName(), M);
+ Function *newFunction = Function::Create(
+ funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
+ oldFunction->getName() + "." + SuffixToUse, M);
// If the old function is no-throw, so is the new one.
if (oldFunction->doesNotThrow())
newFunction->setDoesNotThrow();
@@ -754,6 +817,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::SanitizeMemory:
case Attribute::SanitizeThread:
case Attribute::SanitizeHWAddress:
+ case Attribute::SpeculativeLoadHardening:
case Attribute::StackProtect:
case Attribute::StackProtectReq:
case Attribute::StackProtectStrong:
@@ -778,7 +842,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
- TerminatorInst *TI = newFunction->begin()->getTerminator();
+ Instruction *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
@@ -808,10 +872,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
for (unsigned i = 0, e = Users.size(); i != e; ++i)
// The BasicBlock which contains the branch is not in the region
// modify the branch target to a new block
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
- if (!Blocks.count(TI->getParent()) &&
- TI->getParent()->getParent() == oldFunction)
- TI->replaceUsesOfWith(header, newHeader);
+ if (Instruction *I = dyn_cast<Instruction>(Users[i]))
+ if (I->isTerminator() && !Blocks.count(I->getParent()) &&
+ I->getParent()->getParent() == oldFunction)
+ I->replaceUsesOfWith(header, newHeader);
return newFunction;
}
@@ -819,9 +883,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
/// the call instruction, splitting any PHI nodes in the header block as
/// necessary.
-void CodeExtractor::
-emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
- ValueSet &inputs, ValueSet &outputs) {
+CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
+ BasicBlock *codeReplacer,
+ ValueSet &inputs,
+ ValueSet &outputs) {
// Emit a call to the new function, passing in: *pointer to struct (if
// aggregating parameters), or plan inputs and allocated memory for outputs
std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
@@ -829,6 +894,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Module *M = newFunction->getParent();
LLVMContext &Context = M->getContext();
const DataLayout &DL = M->getDataLayout();
+ CallInst *call = nullptr;
// Add inputs as params, or to be filled into the struct
for (Value *input : inputs)
@@ -879,8 +945,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
}
// Emit the call to the function
- CallInst *call = CallInst::Create(newFunction, params,
- NumExitBlocks > 1 ? "targetBlock" : "");
+ call = CallInst::Create(newFunction, params,
+ NumExitBlocks > 1 ? "targetBlock" : "");
// Add debug location to the new call, if the original function has debug
// info. In that case, the terminator of the entry block of the extracted
// function contains the first debug location of the extracted function,
@@ -925,11 +991,17 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
auto *OutI = dyn_cast<Instruction>(outputs[i]);
if (!OutI)
continue;
+
// Find proper insertion point.
- Instruction *InsertPt = OutI->getNextNode();
- // Let's assume that there is no other guy interleave non-PHI in PHIs.
- if (isa<PHINode>(InsertPt))
- InsertPt = InsertPt->getParent()->getFirstNonPHI();
+ BasicBlock::iterator InsertPt;
+ // In case OutI is an invoke, we insert the store at the beginning in the
+ // 'normal destination' BB. Otherwise we insert the store right after OutI.
+ if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
+ InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
+ else if (auto *Phi = dyn_cast<PHINode>(OutI))
+ InsertPt = Phi->getParent()->getFirstInsertionPt();
+ else
+ InsertPt = std::next(OutI->getIterator());
assert(OAI != newFunction->arg_end() &&
"Number of output arguments should match "
@@ -939,13 +1011,13 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertPt);
- new StoreInst(outputs[i], GEP, InsertPt);
+ StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), &*InsertPt);
+ new StoreInst(outputs[i], GEP, &*InsertPt);
// Since there should be only one struct argument aggregating
// all the output values, we shouldn't increment OAI, which always
// points to the struct argument, in this case.
} else {
- new StoreInst(outputs[i], &*OAI, InsertPt);
+ new StoreInst(outputs[i], &*OAI, &*InsertPt);
++OAI;
}
}
@@ -964,7 +1036,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
unsigned switchVal = 0;
for (BasicBlock *Block : Blocks) {
- TerminatorInst *TI = Block->getTerminator();
+ Instruction *TI = Block->getTerminator();
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (!Blocks.count(TI->getSuccessor(i))) {
BasicBlock *OldTarget = TI->getSuccessor(i);
@@ -1046,6 +1118,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
break;
}
+
+ return call;
}
void CodeExtractor::moveCodeToFunction(Function *newFunction) {
@@ -1070,7 +1144,7 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
// Update the branch weights for the exit block.
- TerminatorInst *TI = CodeReplacer->getTerminator();
+ Instruction *TI = CodeReplacer->getTerminator();
SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
// Block Frequency distribution with dummy node.
@@ -1107,6 +1181,71 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
}
+/// Scan the extraction region for lifetime markers which reference inputs.
+/// Erase these markers. Return the inputs which were referenced.
+///
+/// The extraction region is defined by a set of blocks (\p Blocks), and a set
+/// of allocas which will be moved from the caller function into the extracted
+/// function (\p SunkAllocas).
+static SetVector<Value *>
+eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
+ const SetVector<Value *> &SunkAllocas) {
+ SetVector<Value *> InputObjectsWithLifetime;
+ for (BasicBlock *BB : Blocks) {
+ for (auto It = BB->begin(), End = BB->end(); It != End;) {
+ auto *II = dyn_cast<IntrinsicInst>(&*It);
+ ++It;
+ if (!II || !II->isLifetimeStartOrEnd())
+ continue;
+
+ // Get the memory operand of the lifetime marker. If the underlying
+ // object is a sunk alloca, or is otherwise defined in the extraction
+ // region, the lifetime marker must not be erased.
+ Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
+ if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
+ continue;
+
+ InputObjectsWithLifetime.insert(Mem);
+ II->eraseFromParent();
+ }
+ }
+ return InputObjectsWithLifetime;
+}
+
+/// Insert lifetime start/end markers surrounding the call to the new function
+/// for objects defined in the caller.
+static void insertLifetimeMarkersSurroundingCall(
+ Module *M, const SetVector<Value *> &InputObjectsWithLifetime,
+ CallInst *TheCall) {
+ if (InputObjectsWithLifetime.empty())
+ return;
+
+ LLVMContext &Ctx = M->getContext();
+ auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
+ auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
+ auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
+ for (Value *Mem : InputObjectsWithLifetime) {
+ assert((!isa<Instruction>(Mem) ||
+ cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
+ "Input memory not defined in original function");
+ Value *MemAsI8Ptr = nullptr;
+ if (Mem->getType() == Int8PtrTy)
+ MemAsI8Ptr = Mem;
+ else
+ MemAsI8Ptr =
+ CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
+
+ auto StartMarker =
+ CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
+ StartMarker->insertBefore(TheCall);
+ auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
+ EndMarker->insertAfter(TheCall);
+ }
+}
+
Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
@@ -1150,13 +1289,33 @@ Function *CodeExtractor::extractCodeRegion() {
}
}
- // If we have to split PHI nodes or the entry block, do so now.
- severSplitPHINodes(header);
-
// If we have any return instructions in the region, split those blocks so
// that the return is not in the region.
splitReturnBlocks();
+ // Calculate the exit blocks for the extracted region and the total exit
+ // weights for each of those blocks.
+ DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
+ SmallPtrSet<BasicBlock *, 1> ExitBlocks;
+ for (BasicBlock *Block : Blocks) {
+ for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
+ ++SI) {
+ if (!Blocks.count(*SI)) {
+ // Update the branch weight for this successor.
+ if (BFI) {
+ BlockFrequency &BF = ExitWeights[*SI];
+ BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
+ }
+ ExitBlocks.insert(*SI);
+ }
+ }
+ }
+ NumExitBlocks = ExitBlocks.size();
+
+ // If we have to split PHI nodes of the entry or exit blocks, do so now.
+ severSplitPHINodesOfEntry(header);
+ severSplitPHINodesOfExits(ExitBlocks);
+
// This takes place of the original loop
BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
"codeRepl", oldFunction,
@@ -1201,30 +1360,17 @@ Function *CodeExtractor::extractCodeRegion() {
cast<Instruction>(II)->moveBefore(TI);
}
- // Calculate the exit blocks for the extracted region and the total exit
- // weights for each of those blocks.
- DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
- SmallPtrSet<BasicBlock *, 1> ExitBlocks;
- for (BasicBlock *Block : Blocks) {
- for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
- ++SI) {
- if (!Blocks.count(*SI)) {
- // Update the branch weight for this successor.
- if (BFI) {
- BlockFrequency &BF = ExitWeights[*SI];
- BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
- }
- ExitBlocks.insert(*SI);
- }
- }
- }
- NumExitBlocks = ExitBlocks.size();
+ // Collect objects which are inputs to the extraction region and also
+ // referenced by lifetime start/end markers within it. The effects of these
+ // markers must be replicated in the calling function to prevent the stack
+ // coloring pass from merging slots which store input objects.
+ ValueSet InputObjectsWithLifetime =
+ eraseLifetimeMarkersOnInputs(Blocks, SinkingCands);
// Construct new function based on inputs/outputs & add allocas for all defs.
- Function *newFunction = constructFunction(inputs, outputs, header,
- newFuncRoot,
- codeReplacer, oldFunction,
- oldFunction->getParent());
+ Function *newFunction =
+ constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
+ oldFunction, oldFunction->getParent());
// Update the entry count of the function.
if (BFI) {
@@ -1235,10 +1381,16 @@ Function *CodeExtractor::extractCodeRegion() {
BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
}
- emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+ CallInst *TheCall =
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
moveCodeToFunction(newFunction);
+ // Replicate the effects of any lifetime start/end markers which referenced
+ // input objects in the extraction region by placing markers around the call.
+ insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
+ InputObjectsWithLifetime, TheCall);
+
// Propagate personality info to the new function if there is one.
if (oldFunction->hasPersonalityFn())
newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
@@ -1247,8 +1399,8 @@ Function *CodeExtractor::extractCodeRegion() {
if (BFI && NumExitBlocks > 1)
calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
- // Loop over all of the PHI nodes in the header block, and change any
- // references to the old incoming edge to be the new incoming edge.
+ // Loop over all of the PHI nodes in the header and exit blocks, and change
+ // any references to the old incoming edge to be the new incoming edge.
for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -1256,29 +1408,60 @@ Function *CodeExtractor::extractCodeRegion() {
PN->setIncomingBlock(i, newFuncRoot);
}
- // Look at all successors of the codeReplacer block. If any of these blocks
- // had PHI nodes in them, we need to update the "from" block to be the code
- // replacer, not the original block in the extracted region.
- std::vector<BasicBlock *> Succs(succ_begin(codeReplacer),
- succ_end(codeReplacer));
- for (unsigned i = 0, e = Succs.size(); i != e; ++i)
- for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- std::set<BasicBlock*> ProcessedPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (Blocks.count(PN->getIncomingBlock(i))) {
- if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
- PN->setIncomingBlock(i, codeReplacer);
- else {
- // There were multiple entries in the PHI for this block, now there
- // is only one, so remove the duplicated entries.
- PN->removeIncomingValue(i, false);
- --i; --e;
- }
- }
+ for (BasicBlock *ExitBB : ExitBlocks)
+ for (PHINode &PN : ExitBB->phis()) {
+ Value *IncomingCodeReplacerVal = nullptr;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ // Ignore incoming values from outside of the extracted region.
+ if (!Blocks.count(PN.getIncomingBlock(i)))
+ continue;
+
+ // Ensure that there is only one incoming value from codeReplacer.
+ if (!IncomingCodeReplacerVal) {
+ PN.setIncomingBlock(i, codeReplacer);
+ IncomingCodeReplacerVal = PN.getIncomingValue(i);
+ } else
+ assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) &&
+ "PHI has two incompatbile incoming values from codeRepl");
+ }
+ }
+
+ // Erase debug info intrinsics. Variable updates within the new function are
+ // invisible to debuggers. This could be improved by defining a DISubprogram
+ // for the new function.
+ for (BasicBlock &BB : *newFunction) {
+ auto BlockIt = BB.begin();
+ // Remove debug info intrinsics from the new function.
+ while (BlockIt != BB.end()) {
+ Instruction *Inst = &*BlockIt;
+ ++BlockIt;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ Inst->eraseFromParent();
}
+ // Remove debug info intrinsics which refer to values in the new function
+ // from the old function.
+ SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
+ for (Instruction &I : BB)
+ findDbgUsers(DbgUsers, &I);
+ for (DbgVariableIntrinsic *DVI : DbgUsers)
+ DVI->eraseFromParent();
+ }
- LLVM_DEBUG(if (verifyFunction(*newFunction))
- report_fatal_error("verifyFunction failed!"));
+ // Mark the new function `noreturn` if applicable. Terminators which resume
+ // exception propagation are treated as returning instructions. This is to
+ // avoid inserting traps after calls to outlined functions which unwind.
+ bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) {
+ const Instruction *Term = BB.getTerminator();
+ return isa<ReturnInst>(Term) || isa<ResumeInst>(Term);
+ });
+ if (doesNotReturn)
+ newFunction->setDoesNotReturn();
+
+ LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
+ newFunction->dump();
+ report_fatal_error("verification of newFunction failed!");
+ });
+ LLVM_DEBUG(if (verifyFunction(*oldFunction))
+ report_fatal_error("verification of oldFunction failed!"));
return newFunction;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index 9a0240144d08..4e7da7d0449f 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -22,11 +22,10 @@
#define DEBUG_TYPE "ctor_utils"
-namespace llvm {
+using namespace llvm;
-namespace {
/// Given a specified llvm.global_ctors list, remove the listed elements.
-void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
+static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
// Filter out the initializer elements to remove.
ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer());
SmallVector<Constant *, 10> CAList;
@@ -64,7 +63,7 @@ void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
/// Given a llvm.global_ctors list that we can understand,
/// return a list of the functions and null terminator as a vector.
-std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
+static std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
if (GV->getInitializer()->isNullValue())
return std::vector<Function *>();
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
@@ -79,7 +78,7 @@ std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
/// Find the llvm.global_ctors list, verifying that all initializers have an
/// init priority of 65535.
-GlobalVariable *findGlobalCtors(Module &M) {
+static GlobalVariable *findGlobalCtors(Module &M) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
if (!GV)
return nullptr;
@@ -112,12 +111,11 @@ GlobalVariable *findGlobalCtors(Module &M) {
return GV;
}
-} // namespace
/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
/// entries for which it returns true. Return true if anything changed.
-bool optimizeGlobalCtorsList(Module &M,
- function_ref<bool(Function *)> ShouldRemove) {
+bool llvm::optimizeGlobalCtorsList(
+ Module &M, function_ref<bool(Function *)> ShouldRemove) {
GlobalVariable *GlobalCtors = findGlobalCtors(M);
if (!GlobalCtors)
return false;
@@ -160,5 +158,3 @@ bool optimizeGlobalCtorsList(Module &M,
removeGlobalCtors(GlobalCtors, CtorsToRemove);
return true;
}
-
-} // End llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 56ff03c7f5e1..975b363859a9 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -90,7 +90,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
// careful if I is an invoke instruction, because we can't insert the store
// AFTER the terminator instruction.
BasicBlock::iterator InsertPt;
- if (!isa<TerminatorInst>(I)) {
+ if (!I.isTerminator()) {
InsertPt = ++I.getIterator();
for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index c9c96fbe5da0..762a374c135c 100644
--- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -37,7 +37,7 @@ IRBuilder<> *EscapeEnumerator::Next() {
// Branches and invokes do not escape, only unwind, resume, and return
// do.
- TerminatorInst *TI = CurBB->getTerminator();
+ Instruction *TI = CurBB->getTerminator();
if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
index 7fd9425efed3..e875cd686b00 100644
--- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -483,8 +483,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
}
}
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (II->isLifetimeStartOrEnd()) {
LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
++CurInst;
continue;
@@ -578,7 +577,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
<< "Successfully evaluated function. Result: 0\n\n");
}
}
- } else if (isa<TerminatorInst>(CurInst)) {
+ } else if (CurInst->isTerminator()) {
LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n");
if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 3c6c9c9a5df4..d9778f4a1fb7 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -232,7 +232,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
return false;
- TerminatorInst *TBB = LastCondBlock->getTerminator();
+ Instruction *TBB = LastCondBlock->getTerminator();
BasicBlock *PS1 = TBB->getSuccessor(0);
BasicBlock *PS2 = TBB->getSuccessor(1);
BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
@@ -325,7 +325,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
BasicBlock *Block1,
BasicBlock *Block2) {
- TerminatorInst *PTI2 = Head2->getTerminator();
+ Instruction *PTI2 = Head2->getTerminator();
Instruction *PBI2 = &Head2->front();
bool eq1 = (Block1 == Head1);
@@ -421,7 +421,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
return false;
- TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PTI2 = SecondEntryBlock->getTerminator();
Instruction *PBI2 = &SecondEntryBlock->front();
if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 69203f9f2485..a717d9b72819 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -410,8 +410,6 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
switch (TyL->getTypeID()) {
default:
llvm_unreachable("Unknown type!");
- // Fall through in Release mode.
- LLVM_FALLTHROUGH;
case Type::IntegerTyID:
return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
cast<IntegerType>(TyR)->getBitWidth());
@@ -867,8 +865,8 @@ int FunctionComparator::compare() {
if (int Res = cmpBasicBlocks(BBL, BBR))
return Res;
- const TerminatorInst *TermL = BBL->getTerminator();
- const TerminatorInst *TermR = BBR->getTerminator();
+ const Instruction *TermL = BBL->getTerminator();
+ const Instruction *TermR = BBR->getTerminator();
assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
@@ -938,7 +936,7 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
for (auto &Inst : *BB) {
H.add(Inst.getOpcode());
}
- const TerminatorInst *Term = BB->getTerminator();
+ const Instruction *Term = BB->getTerminator();
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
continue;
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 479816a339d0..a9772e31da50 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -124,7 +124,6 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
return SGV->getLinkage();
switch (SGV->getLinkage()) {
- case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
case GlobalValue::ExternalLinkage:
// External and linkonce definitions are converted to available_externally
@@ -144,11 +143,13 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// An imported available_externally declaration stays that way.
return SGV->getLinkage();
+ case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::WeakAnyLinkage:
- // Can't import weak_any definitions correctly, or we might change the
- // program semantics, since the linker will pick the first weak_any
- // definition and importing would change the order they are seen by the
- // linker. The module linking caller needs to enforce this.
+ // Can't import linkonce_any/weak_any definitions correctly, or we might
+ // change the program semantics, since the linker will pick the first
+ // linkonce_any/weak_any definition and importing would change the order
+ // they are seen by the linker. The module linking caller needs to enforce
+ // this.
assert(!doImportAsDefinition(SGV));
// If imported as a declaration, it becomes external_weak.
return SGV->getLinkage();
@@ -202,10 +203,26 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
- // Check the summaries to see if the symbol gets resolved to a known local
- // definition.
+ ValueInfo VI;
if (GV.hasName()) {
- ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID());
+ VI = ImportIndex.getValueInfo(GV.getGUID());
+ // Set synthetic function entry counts.
+ if (VI && ImportIndex.hasSyntheticEntryCounts()) {
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ if (!F->isDeclaration()) {
+ for (auto &S : VI.getSummaryList()) {
+ FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
+ if (FS->modulePath() == M.getModuleIdentifier()) {
+ F->setEntryCount(Function::ProfileCount(FS->entryCount(),
+ Function::PCT_Synthetic));
+ break;
+ }
+ }
+ }
+ }
+ }
+ // Check the summaries to see if the symbol gets resolved to a known local
+ // definition.
if (VI && VI.isDSOLocal()) {
GV.setDSOLocal(true);
if (GV.hasDLLImportStorageClass())
@@ -213,6 +230,22 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
}
}
+ // Mark read-only variables which can be imported with specific attribute.
+ // We can't internalize them now because IRMover will fail to link variable
+ // definitions to their external declarations during ThinLTO import. We'll
+ // internalize read-only variables later, after import is finished.
+ // See internalizeImmutableGVs.
+ //
+ // If global value dead stripping is not enabled in summary then
+ // propagateConstants hasn't been run. We can't internalize GV
+ // in such case.
+ if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
+ const auto &SL = VI.getSummaryList();
+ auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
+ if (GVS && GVS->isReadOnly())
+ cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
+ }
+
bool DoPromote = false;
if (GV.hasLocalLinkage() &&
((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
@@ -230,7 +263,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
// Remove functions imported as available externally defs from comdats,
// as this is a declaration for the linker, and will be dropped eventually.
// It is illegal for comdats to contain declarations.
- auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
+ auto *GO = dyn_cast<GlobalObject>(&GV);
if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
// The IRMover should not have placed any imported declarations in
// a comdat, so the only declaration that should be in a comdat
diff --git a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp
new file mode 100644
index 000000000000..08de0a4c53e9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -0,0 +1,64 @@
+//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform transformations related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/GuardUtils.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+static cl::opt<uint32_t> PredicatePassBranchWeight(
+ "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
+ cl::desc("The probability of a guard failing is assumed to be the "
+ "reciprocal of this value (default = 1 << 20)"));
+
+void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
+ CallInst *Guard) {
+ OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
+ SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end());
+
+ auto *CheckBB = Guard->getParent();
+ auto *DeoptBlockTerm =
+ SplitBlockAndInsertIfThen(Guard->getArgOperand(0), Guard, true);
+
+ auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
+
+ // SplitBlockAndInsertIfThen inserts control flow that branches to
+ // DeoptBlockTerm if the condition is true. We want the opposite.
+ CheckBI->swapSuccessors();
+
+ CheckBI->getSuccessor(0)->setName("guarded");
+ CheckBI->getSuccessor(1)->setName("deopt");
+
+ if (auto *MD = Guard->getMetadata(LLVMContext::MD_make_implicit))
+ CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD);
+
+ MDBuilder MDB(Guard->getContext());
+ CheckBI->setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(PredicatePassBranchWeight, 1));
+
+ IRBuilder<> B(DeoptBlockTerm);
+ auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, "");
+
+ if (DeoptIntrinsic->getReturnType()->isVoidTy()) {
+ B.CreateRetVoid();
+ } else {
+ DeoptCall->setName("deoptcall");
+ B.CreateRet(DeoptCall);
+ }
+
+ DeoptCall->setCallingConv(Guard->getCallingConv());
+ DeoptBlockTerm->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index 8382220fc9e1..02482c550321 100644
--- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -161,7 +161,7 @@ void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {
void ImportedFunctionsInliningStatistics::calculateRealInlines() {
// Removing duplicated Callers.
- llvm::sort(NonImportedCallers.begin(), NonImportedCallers.end());
+ llvm::sort(NonImportedCallers);
NonImportedCallers.erase(
std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),
NonImportedCallers.end());
@@ -190,17 +190,14 @@ ImportedFunctionsInliningStatistics::getSortedNodes() {
for (const NodesMapTy::value_type& Node : NodesMap)
SortedNodes.push_back(&Node);
- llvm::sort(
- SortedNodes.begin(), SortedNodes.end(),
- [&](const SortedNodesTy::value_type &Lhs,
- const SortedNodesTy::value_type &Rhs) {
- if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
- return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
- if (Lhs->second->NumberOfRealInlines !=
- Rhs->second->NumberOfRealInlines)
- return Lhs->second->NumberOfRealInlines >
- Rhs->second->NumberOfRealInlines;
- return Lhs->first() < Rhs->first();
- });
+ llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs,
+ const SortedNodesTy::value_type &Rhs) {
+ if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
+ return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
+ if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines)
+ return Lhs->second->NumberOfRealInlines >
+ Rhs->second->NumberOfRealInlines;
+ return Lhs->first() < Rhs->first();
+ });
return SortedNodes;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index ddc6e07e2f59..623fe91a5a60 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -84,13 +85,15 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::init(true), cl::Hidden,
cl::desc("Convert align attributes to assumptions during inlining."));
-bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR, bool InsertLifetime) {
+llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR,
+ bool InsertLifetime) {
return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
}
-bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR, bool InsertLifetime) {
+llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR,
+ bool InsertLifetime) {
return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
}
@@ -768,14 +771,16 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
UnwindDest->removePredecessor(InvokeBB);
}
-/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata,
-/// that metadata should be propagated to all memory-accessing cloned
-/// instructions.
+/// When inlining a call site that has !llvm.mem.parallel_loop_access or
+/// llvm.access.group metadata, that metadata should be propagated to all
+/// memory-accessing cloned instructions.
static void PropagateParallelLoopAccessMetadata(CallSite CS,
ValueToValueMapTy &VMap) {
MDNode *M =
CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
- if (!M)
+ MDNode *CallAccessGroup =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_access_group);
+ if (!M && !CallAccessGroup)
return;
for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
@@ -787,11 +792,20 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,
if (!NI)
continue;
- if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
+ if (M) {
+ if (MDNode *PM =
+ NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
M = MDNode::concatenate(PM, M);
NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
- } else if (NI->mayReadOrWriteMemory()) {
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ } else if (NI->mayReadOrWriteMemory()) {
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ }
+ }
+
+ if (NI->mayReadOrWriteMemory()) {
+ MDNode *UnitedAccGroups = uniteAccessGroups(
+ NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup);
+ NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups);
}
}
}
@@ -985,22 +999,22 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
PtrArgs.push_back(CXI->getPointerOperand());
else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
PtrArgs.push_back(RMWI->getPointerOperand());
- else if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+ else if (const auto *Call = dyn_cast<CallBase>(I)) {
// If we know that the call does not access memory, then we'll still
// know that about the inlined clone of this call site, and we don't
// need to add metadata.
- if (ICS.doesNotAccessMemory())
+ if (Call->doesNotAccessMemory())
continue;
IsFuncCall = true;
if (CalleeAAR) {
- FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS);
+ FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
if (MRB == FMRB_OnlyAccessesArgumentPointees ||
MRB == FMRB_OnlyReadsArgumentPointees)
IsArgMemOnlyCall = true;
}
- for (Value *Arg : ICS.args()) {
+ for (Value *Arg : Call->args()) {
// We need to check the underlying objects of all arguments, not just
// the pointer arguments, because we might be passing pointers as
// integers, etc.
@@ -1306,16 +1320,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// Check whether this Value is used by a lifetime intrinsic.
static bool isUsedByLifetimeMarker(Value *V) {
- for (User *U : V->users()) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
+ for (User *U : V->users())
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
+ if (II->isLifetimeStartOrEnd())
return true;
- }
- }
- }
return false;
}
@@ -1491,9 +1499,10 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream. Similarly this will inline a recursive
/// function by one level.
-bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR, bool InsertLifetime,
- Function *ForwardVarArgsTo) {
+llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR,
+ bool InsertLifetime,
+ Function *ForwardVarArgsTo) {
Instruction *TheCall = CS.getInstruction();
assert(TheCall->getParent() && TheCall->getFunction()
&& "Instruction not in function!");
@@ -1504,7 +1513,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Function *CalledFunc = CS.getCalledFunction();
if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration()) // call!
- return false;
+ return "external or indirect";
// The inliner does not know how to inline through calls with operand bundles
// in general ...
@@ -1518,7 +1527,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (Tag == LLVMContext::OB_funclet)
continue;
- return false;
+ return "unsupported operand bundle";
}
}
@@ -1537,7 +1546,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (!Caller->hasGC())
Caller->setGC(CalledFunc->getGC());
else if (CalledFunc->getGC() != Caller->getGC())
- return false;
+ return "incompatible GC";
}
// Get the personality function from the callee if it contains a landing pad.
@@ -1561,7 +1570,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// TODO: This isn't 100% true. Some personality functions are proper
// supersets of others and can be used in place of the other.
else if (CalledPersonality != CallerPersonality)
- return false;
+ return "incompatible personality";
}
// We need to figure out which funclet the callsite was in so that we may
@@ -1586,7 +1595,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// for catchpads.
for (const BasicBlock &CalledBB : *CalledFunc) {
if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
- return false;
+ return "catch in cleanup funclet";
}
}
} else if (isAsynchronousEHPersonality(Personality)) {
@@ -1594,7 +1603,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// funclet in the callee.
for (const BasicBlock &CalledBB : *CalledFunc) {
if (CalledBB.isEHPad())
- return false;
+ return "SEH in cleanup funclet";
}
}
}
@@ -2244,7 +2253,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Change the branch that used to go to AfterCallBB to branch to the first
// basic block of the inlined function.
//
- TerminatorInst *Br = OrigBB->getTerminator();
+ Instruction *Br = OrigBB->getTerminator();
assert(Br && Br->getOpcode() == Instruction::Br &&
"splitBasicBlock broken!");
Br->setOperand(0, &*FirstNewBlock);
diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 9832a6f24e1f..e1592c867636 100644
--- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -487,7 +487,7 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
MDNode *BranchWeights =
MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
- TerminatorInst *NewInst =
+ Instruction *NewInst =
SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
BasicBlock *CallBB = NewInst->getParent();
CallBB->setName("cdce.call");
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index ae3cb077a3af..499e611acb57 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -31,8 +31,10 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -47,6 +49,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -102,8 +105,8 @@ STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
/// DeleteDeadConditions is true.
bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
const TargetLibraryInfo *TLI,
- DeferredDominance *DDT) {
- TerminatorInst *T = BB->getTerminator();
+ DomTreeUpdater *DTU) {
+ Instruction *T = BB->getTerminator();
IRBuilder<> Builder(T);
// Branch - See if we are conditional jumping on constant
@@ -125,8 +128,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Replace the conditional branch with an unconditional one.
Builder.CreateBr(Destination);
BI->eraseFromParent();
- if (DDT)
- DDT->deleteEdge(BB, OldDest);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(BB, OldDest);
return true;
}
@@ -201,8 +204,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
DefaultDest->removePredecessor(ParentBB);
i = SI->removeCase(i);
e = SI->case_end();
- if (DDT)
- DDT->deleteEdge(ParentBB, DefaultDest);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(ParentBB, DefaultDest);
continue;
}
@@ -229,17 +232,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Builder.CreateBr(TheOnlyDest);
BasicBlock *BB = SI->getParent();
std::vector <DominatorTree::UpdateType> Updates;
- if (DDT)
+ if (DTU)
Updates.reserve(SI->getNumSuccessors() - 1);
// Remove entries from PHI nodes which we no longer branch to...
- for (BasicBlock *Succ : SI->successors()) {
+ for (BasicBlock *Succ : successors(SI)) {
// Found case matching a constant operand?
if (Succ == TheOnlyDest) {
TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
} else {
Succ->removePredecessor(BB);
- if (DDT)
+ if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Succ});
}
}
@@ -249,8 +252,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SI->eraseFromParent();
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
- if (DDT)
- DDT->applyUpdates(Updates);
+ if (DTU)
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
return true;
}
@@ -297,7 +300,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
BasicBlock *TheOnlyDest = BA->getBasicBlock();
std::vector <DominatorTree::UpdateType> Updates;
- if (DDT)
+ if (DTU)
Updates.reserve(IBI->getNumDestinations() - 1);
// Insert the new branch.
@@ -310,7 +313,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BasicBlock *ParentBB = IBI->getParent();
BasicBlock *DestBB = IBI->getDestination(i);
DestBB->removePredecessor(ParentBB);
- if (DDT)
+ if (DTU)
Updates.push_back({DominatorTree::Delete, ParentBB, DestBB});
}
}
@@ -327,8 +330,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
new UnreachableInst(BB->getContext(), BB);
}
- if (DDT)
- DDT->applyUpdates(Updates);
+ if (DTU)
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
return true;
}
}
@@ -352,7 +355,7 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI) {
- if (isa<TerminatorInst>(I))
+ if (I->isTerminator())
return false;
// We don't want the landingpad-like instructions removed by anything this
@@ -390,8 +393,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return true;
// Lifetime intrinsics are dead when their right-hand is undef.
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end)
+ if (II->isLifetimeStartOrEnd())
return isa<UndefValue>(II->getArgOperand(1));
// Assumptions are dead if their condition is trivially true. Guards on
@@ -425,22 +427,22 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
/// trivially dead instruction, delete it. If that makes any of its operands
/// trivially dead, delete them too, recursively. Return true if any
/// instructions were deleted.
-bool
-llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
- const TargetLibraryInfo *TLI) {
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
+ Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
return false;
SmallVector<Instruction*, 16> DeadInsts;
DeadInsts.push_back(I);
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
return true;
}
void llvm::RecursivelyDeleteTriviallyDeadInstructions(
- SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI) {
+ SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI,
+ MemorySSAUpdater *MSSAU) {
// Process the dead instruction list until empty.
while (!DeadInsts.empty()) {
Instruction &I = *DeadInsts.pop_back_val();
@@ -467,11 +469,24 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
if (isInstructionTriviallyDead(OpI, TLI))
DeadInsts.push_back(OpI);
}
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(&I);
I.eraseFromParent();
}
}
+bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, I);
+ for (auto *DII : DbgUsers) {
+ Value *Undef = UndefValue::get(I->getType());
+ DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
+ ValueAsMetadata::get(Undef)));
+ }
+ return !DbgUsers.empty();
+}
+
/// areAllUsesEqual - Check whether the uses of a value are all the same.
/// This is similar to Instruction::hasOneUse() except this will also return
/// true when there are no uses or multiple uses that all refer to the same
@@ -626,7 +641,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
/// .. and delete the predecessor corresponding to the '1', this will attempt to
/// recursively fold the and to 0.
void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DeferredDominance *DDT) {
+ DomTreeUpdater *DTU) {
// This only adjusts blocks with PHI nodes.
if (!isa<PHINode>(BB->begin()))
return;
@@ -649,17 +664,16 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
// of the block.
if (PhiIt != OldPhiIt) PhiIt = &BB->front();
}
- if (DDT)
- DDT->deleteEdge(Pred, BB);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(Pred, BB);
}
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
-/// predecessor is known to have one successor (DestBB!). Eliminate the edge
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
/// between them, moving the instructions in the predecessor into DestBB and
/// deleting the predecessor block.
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
- DeferredDominance *DDT) {
- assert(!(DT && DDT) && "Cannot call with both DT and DDT.");
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
+ DomTreeUpdater *DTU) {
// If BB has single-entry PHI nodes, fold them.
while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
@@ -677,11 +691,11 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
if (PredBB == &DestBB->getParent()->getEntryBlock())
ReplaceEntryBB = true;
- // Deferred DT update: Collect all the edges that enter PredBB. These
- // dominator edges will be redirected to DestBB.
- std::vector <DominatorTree::UpdateType> Updates;
- if (DDT && !ReplaceEntryBB) {
- Updates.reserve(1 + (2 * pred_size(PredBB)));
+ // DTU updates: Collect all the edges that enter
+ // PredBB. These dominator edges will be redirected to DestBB.
+ SmallVector<DominatorTree::UpdateType, 32> Updates;
+
+ if (DTU) {
Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) {
Updates.push_back({DominatorTree::Delete, *I, PredBB});
@@ -708,33 +722,32 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
// Splice all the instructions from PredBB to DestBB.
PredBB->getTerminator()->eraseFromParent();
DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+ new UnreachableInst(PredBB->getContext(), PredBB);
// If the PredBB is the entry block of the function, move DestBB up to
// become the entry block after we erase PredBB.
if (ReplaceEntryBB)
DestBB->moveAfter(PredBB);
- if (DT) {
- // For some irreducible CFG we end up having forward-unreachable blocks
- // so check if getNode returns a valid node before updating the domtree.
- if (DomTreeNode *DTN = DT->getNode(PredBB)) {
- BasicBlock *PredBBIDom = DTN->getIDom()->getBlock();
- DT->changeImmediateDominator(DestBB, PredBBIDom);
- DT->eraseNode(PredBB);
+ if (DTU) {
+ assert(PredBB->getInstList().size() == 1 &&
+ isa<UnreachableInst>(PredBB->getTerminator()) &&
+ "The successor list of PredBB isn't empty before "
+ "applying corresponding DTU updates.");
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->deleteBB(PredBB);
+ // Recalculation of DomTree is needed when updating a forward DomTree and
+ // the Entry BB is replaced.
+ if (ReplaceEntryBB && DTU->hasDomTree()) {
+ // The entry block was removed and there is no external interface for
+ // the dominator tree to be notified of this change. In this corner-case
+ // we recalculate the entire tree.
+ DTU->recalculate(*(DestBB->getParent()));
}
}
- if (DDT) {
- DDT->deleteBB(PredBB); // Deferred deletion of BB.
- if (ReplaceEntryBB)
- // The entry block was removed and there is no external interface for the
- // dominator tree to be notified of this change. In this corner-case we
- // recalculate the entire tree.
- DDT->recalculate(*(DestBB->getParent()));
- else
- DDT->applyUpdates(Updates);
- } else {
- PredBB->eraseFromParent(); // Nuke BB.
+ else {
+ PredBB->eraseFromParent(); // Nuke BB if DTU is nullptr.
}
}
@@ -945,7 +958,7 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
/// eliminate BB by rewriting all the predecessors to branch to the successor
/// block and return true. If we can't transform, return false.
bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
- DeferredDominance *DDT) {
+ DomTreeUpdater *DTU) {
assert(BB != &BB->getParent()->getEntryBlock() &&
"TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
@@ -986,9 +999,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
- std::vector<DominatorTree::UpdateType> Updates;
- if (DDT) {
- Updates.reserve(1 + (2 * pred_size(BB)));
+ SmallVector<DominatorTree::UpdateType, 32> Updates;
+ if (DTU) {
Updates.push_back({DominatorTree::Delete, BB, Succ});
// All predecessors of BB will be moved to Succ.
for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
@@ -1044,9 +1056,16 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
BB->replaceAllUsesWith(Succ);
if (!Succ->hasName()) Succ->takeName(BB);
- if (DDT) {
- DDT->deleteBB(BB); // Deferred deletion of the old basic block.
- DDT->applyUpdates(Updates);
+ // Clear the successor list of BB to match updates applying to DTU later.
+ if (BB->getTerminator())
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB->getContext(), BB);
+ assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+
+ if (DTU) {
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->deleteBB(BB);
} else {
BB->eraseFromParent(); // Delete the old basic block.
}
@@ -1237,7 +1256,7 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
/// alloc size of the value when doing the comparison. E.g. an i1 value will be
/// identified as covering an n-bit fragment, if the store size of i1 is at
/// least n bits.
-static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) {
+static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
const DataLayout &DL = DII->getModule()->getDataLayout();
uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy);
if (auto FragmentSize = DII->getFragmentSizeInBits())
@@ -1255,7 +1274,7 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) {
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
StoreInst *SI, DIBuilder &Builder) {
assert(DII->isAddressOfVariable());
auto *DIVar = DII->getVariable();
@@ -1278,33 +1297,6 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
return;
}
- // If an argument is zero extended then use argument directly. The ZExt
- // may be zapped by an optimization pass in future.
- Argument *ExtendedArg = nullptr;
- if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
- ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
- if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
- ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
- if (ExtendedArg) {
- // If this DII was already describing only a fragment of a variable, ensure
- // that fragment is appropriately narrowed here.
- // But if a fragment wasn't used, describe the value as the original
- // argument (rather than the zext or sext) so that it remains described even
- // if the sext/zext is optimized away. This widens the variable description,
- // leaving it up to the consumer to know how the smaller value may be
- // represented in a larger register.
- if (auto Fragment = DIExpr->getFragmentInfo()) {
- unsigned FragmentOffset = Fragment->OffsetInBits;
- SmallVector<uint64_t, 3> Ops(DIExpr->elements_begin(),
- DIExpr->elements_end() - 3);
- Ops.push_back(dwarf::DW_OP_LLVM_fragment);
- Ops.push_back(FragmentOffset);
- const DataLayout &DL = DII->getModule()->getDataLayout();
- Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
- DIExpr = Builder.createExpression(Ops);
- }
- DV = ExtendedArg;
- }
if (!LdStHasDebugValue(DIVar, DIExpr, SI))
Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
SI);
@@ -1312,7 +1304,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
LoadInst *LI, DIBuilder &Builder) {
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
@@ -1341,7 +1333,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
PHINode *APN, DIBuilder &Builder) {
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
@@ -1443,7 +1435,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
// Map existing PHI nodes to their dbg.values.
ValueToValueMapTy DbgValueMap;
for (auto &I : *BB) {
- if (auto DbgII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+ if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) {
if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
DbgValueMap.insert({Loc, DbgII});
}
@@ -1464,7 +1456,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
for (auto VI : PHI->operand_values()) {
auto V = DbgValueMap.find(VI);
if (V != DbgValueMap.end()) {
- auto *DbgII = cast<DbgInfoIntrinsic>(V->second);
+ auto *DbgII = cast<DbgVariableIntrinsic>(V->second);
Instruction *NewDbgII = DbgII->clone();
NewDbgII->setOperand(0, PhiMAV);
auto InsertionPt = Parent->getFirstInsertionPt();
@@ -1478,7 +1470,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
/// Finds all intrinsics declaring local variables as living in the memory that
/// 'V' points to. This may include a mix of dbg.declare and
/// dbg.addr intrinsics.
-TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
+TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
// This function is hot. Check whether the value has any metadata to avoid a
// DenseMap lookup.
if (!V->isUsedByMetadata())
@@ -1490,9 +1482,9 @@ TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
if (!MDV)
return {};
- TinyPtrVector<DbgInfoIntrinsic *> Declares;
+ TinyPtrVector<DbgVariableIntrinsic *> Declares;
for (User *U : MDV->users()) {
- if (auto *DII = dyn_cast<DbgInfoIntrinsic>(U))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(U))
if (DII->isAddressOfVariable())
Declares.push_back(DII);
}
@@ -1512,7 +1504,7 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
DbgValues.push_back(DVI);
}
-void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
+void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
Value *V) {
// This function is hot. Check whether the value has any metadata to avoid a
// DenseMap lookup.
@@ -1521,7 +1513,7 @@ void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
if (auto *L = LocalAsMetadata::getIfExists(V))
if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
for (User *U : MDV->users())
- if (DbgInfoIntrinsic *DII = dyn_cast<DbgInfoIntrinsic>(U))
+ if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U))
DbgUsers.push_back(DII);
}
@@ -1529,7 +1521,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Instruction *InsertBefore, DIBuilder &Builder,
bool DerefBefore, int Offset, bool DerefAfter) {
auto DbgAddrs = FindDbgAddrUses(Address);
- for (DbgInfoIntrinsic *DII : DbgAddrs) {
+ for (DbgVariableIntrinsic *DII : DbgAddrs) {
DebugLoc Loc = DII->getDebugLoc();
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
@@ -1597,7 +1589,7 @@ static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) {
}
bool llvm::salvageDebugInfo(Instruction &I) {
- SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
findDbgUsers(DbgUsers, &I);
if (DbgUsers.empty())
return false;
@@ -1607,7 +1599,7 @@ bool llvm::salvageDebugInfo(Instruction &I) {
auto &Ctx = I.getContext();
auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
- auto doSalvage = [&](DbgInfoIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
+ auto doSalvage = [&](DbgVariableIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
auto *DIExpr = DII->getExpression();
if (!Ops.empty()) {
// Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
@@ -1621,13 +1613,13 @@ bool llvm::salvageDebugInfo(Instruction &I) {
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
};
- auto applyOffset = [&](DbgInfoIntrinsic *DII, uint64_t Offset) {
+ auto applyOffset = [&](DbgVariableIntrinsic *DII, uint64_t Offset) {
SmallVector<uint64_t, 8> Ops;
DIExpression::appendOffset(Ops, Offset);
doSalvage(DII, Ops);
};
- auto applyOps = [&](DbgInfoIntrinsic *DII,
+ auto applyOps = [&](DbgVariableIntrinsic *DII,
std::initializer_list<uint64_t> Opcodes) {
SmallVector<uint64_t, 8> Ops(Opcodes);
doSalvage(DII, Ops);
@@ -1726,16 +1718,16 @@ using DbgValReplacement = Optional<DIExpression *>;
/// changes are made.
static bool rewriteDebugUsers(
Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
- function_ref<DbgValReplacement(DbgInfoIntrinsic &DII)> RewriteExpr) {
+ function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) {
// Find debug users of From.
- SmallVector<DbgInfoIntrinsic *, 1> Users;
+ SmallVector<DbgVariableIntrinsic *, 1> Users;
findDbgUsers(Users, &From);
if (Users.empty())
return false;
// Prevent use-before-def of To.
bool Changed = false;
- SmallPtrSet<DbgInfoIntrinsic *, 1> DeleteOrSalvage;
+ SmallPtrSet<DbgVariableIntrinsic *, 1> DeleteOrSalvage;
if (isa<Instruction>(&To)) {
bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
@@ -1824,7 +1816,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
Type *FromTy = From.getType();
Type *ToTy = To.getType();
- auto Identity = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement {
+ auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
return DII.getExpression();
};
@@ -1848,7 +1840,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
// The width of the result has shrunk. Use sign/zero extension to describe
// the source variable's high bits.
- auto SignOrZeroExt = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement {
+ auto SignOrZeroExt = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
DILocalVariable *Var = DII.getVariable();
// Without knowing signedness, sign/zero extension isn't possible.
@@ -1902,17 +1894,17 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
}
unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
- bool PreserveLCSSA, DeferredDominance *DDT) {
+ bool PreserveLCSSA, DomTreeUpdater *DTU) {
BasicBlock *BB = I->getParent();
std::vector <DominatorTree::UpdateType> Updates;
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
- if (DDT)
+ if (DTU)
Updates.reserve(BB->getTerminator()->getNumSuccessors());
for (BasicBlock *Successor : successors(BB)) {
Successor->removePredecessor(BB, PreserveLCSSA);
- if (DDT)
+ if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Successor});
}
// Insert a call to llvm.trap right before this. This turns the undefined
@@ -1923,7 +1915,8 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
CallTrap->setDebugLoc(I->getDebugLoc());
}
- new UnreachableInst(I->getContext(), I);
+ auto *UI = new UnreachableInst(I->getContext(), I);
+ UI->setDebugLoc(I->getDebugLoc());
// All instructions after this are dead.
unsigned NumInstrsRemoved = 0;
@@ -1934,13 +1927,13 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
BB->getInstList().erase(BBI++);
++NumInstrsRemoved;
}
- if (DDT)
- DDT->applyUpdates(Updates);
+ if (DTU)
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
return NumInstrsRemoved;
}
/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
+static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
@@ -1950,6 +1943,7 @@ static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
NewCall->setDebugLoc(II->getDebugLoc());
+ NewCall->copyMetadata(*II);
II->replaceAllUsesWith(NewCall);
// Follow the call by a branch to the normal destination.
@@ -1961,8 +1955,8 @@ static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
BasicBlock *UnwindDestBB = II->getUnwindDest();
UnwindDestBB->removePredecessor(BB);
II->eraseFromParent();
- if (DDT)
- DDT->deleteEdge(BB, UnwindDestBB);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -2003,8 +1997,8 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
}
static bool markAliveBlocks(Function &F,
- SmallPtrSetImpl<BasicBlock*> &Reachable,
- DeferredDominance *DDT = nullptr) {
+ SmallPtrSetImpl<BasicBlock *> &Reachable,
+ DomTreeUpdater *DTU = nullptr) {
SmallVector<BasicBlock*, 128> Worklist;
BasicBlock *BB = &F.front();
Worklist.push_back(BB);
@@ -2029,7 +2023,7 @@ static bool markAliveBlocks(Function &F,
if (IntrinsicID == Intrinsic::assume) {
if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI, false, false, DDT);
+ changeToUnreachable(CI, false, false, DTU);
Changed = true;
break;
}
@@ -2046,7 +2040,7 @@ static bool markAliveBlocks(Function &F,
if (match(CI->getArgOperand(0), m_Zero()))
if (!isa<UnreachableInst>(CI->getNextNode())) {
changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false,
- false, DDT);
+ false, DTU);
Changed = true;
break;
}
@@ -2054,7 +2048,7 @@ static bool markAliveBlocks(Function &F,
} else if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(CI->getFunction())) ||
isa<UndefValue>(Callee)) {
- changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DDT);
+ changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DTU);
Changed = true;
break;
}
@@ -2064,7 +2058,7 @@ static bool markAliveBlocks(Function &F,
// though.
if (!isa<UnreachableInst>(CI->getNextNode())) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI->getNextNode(), false, false, DDT);
+ changeToUnreachable(CI->getNextNode(), false, false, DTU);
Changed = true;
}
break;
@@ -2083,21 +2077,21 @@ static bool markAliveBlocks(Function &F,
(isa<ConstantPointerNull>(Ptr) &&
!NullPointerIsDefined(SI->getFunction(),
SI->getPointerAddressSpace()))) {
- changeToUnreachable(SI, true, false, DDT);
+ changeToUnreachable(SI, true, false, DTU);
Changed = true;
break;
}
}
}
- TerminatorInst *Terminator = BB->getTerminator();
+ Instruction *Terminator = BB->getTerminator();
if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
// Turn invokes that call 'nounwind' functions into ordinary calls.
Value *Callee = II->getCalledValue();
if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(BB->getParent())) ||
isa<UndefValue>(Callee)) {
- changeToUnreachable(II, true, false, DDT);
+ changeToUnreachable(II, true, false, DTU);
Changed = true;
} else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
if (II->use_empty() && II->onlyReadsMemory()) {
@@ -2107,10 +2101,10 @@ static bool markAliveBlocks(Function &F,
BranchInst::Create(NormalDestBB, II);
UnwindDestBB->removePredecessor(II->getParent());
II->eraseFromParent();
- if (DDT)
- DDT->deleteEdge(BB, UnwindDestBB);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
} else
- changeToCall(II, DDT);
+ changeToCall(II, DTU);
Changed = true;
}
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
@@ -2156,7 +2150,7 @@ static bool markAliveBlocks(Function &F,
}
}
- Changed |= ConstantFoldTerminator(BB, true, nullptr, DDT);
+ Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU);
for (BasicBlock *Successor : successors(BB))
if (Reachable.insert(Successor).second)
Worklist.push_back(Successor);
@@ -2164,15 +2158,15 @@ static bool markAliveBlocks(Function &F,
return Changed;
}
-void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
- TerminatorInst *TI = BB->getTerminator();
+void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
+ Instruction *TI = BB->getTerminator();
if (auto *II = dyn_cast<InvokeInst>(TI)) {
- changeToCall(II, DDT);
+ changeToCall(II, DTU);
return;
}
- TerminatorInst *NewTI;
+ Instruction *NewTI;
BasicBlock *UnwindDest;
if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
@@ -2196,8 +2190,8 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
UnwindDest->removePredecessor(BB);
TI->replaceAllUsesWith(NewTI);
TI->eraseFromParent();
- if (DDT)
- DDT->deleteEdge(BB, UnwindDest);
+ if (DTU)
+ DTU->deleteEdgeRelaxed(BB, UnwindDest);
}
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
@@ -2205,9 +2199,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
/// after modifying the CFG.
bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
- DeferredDominance *DDT) {
+ DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU) {
SmallPtrSet<BasicBlock*, 16> Reachable;
- bool Changed = markAliveBlocks(F, Reachable, DDT);
+ bool Changed = markAliveBlocks(F, Reachable, DTU);
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
@@ -2216,45 +2211,68 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
assert(Reachable.size() < F.size());
NumRemoved += F.size()-Reachable.size();
- // Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references. Update DDT and LVI if available.
- std::vector <DominatorTree::UpdateType> Updates;
+ SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
auto *BB = &*I;
if (Reachable.count(BB))
continue;
+ DeadBlockSet.insert(BB);
+ }
+
+ if (MSSAU)
+ MSSAU->removeBlocks(DeadBlockSet);
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references. Update DTU and LVI if available.
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (auto *BB : DeadBlockSet) {
for (BasicBlock *Successor : successors(BB)) {
- if (Reachable.count(Successor))
+ if (!DeadBlockSet.count(Successor))
Successor->removePredecessor(BB);
- if (DDT)
+ if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Successor});
}
if (LVI)
LVI->eraseBlock(BB);
BB->dropAllReferences();
}
-
for (Function::iterator I = ++F.begin(); I != F.end();) {
auto *BB = &*I;
if (Reachable.count(BB)) {
++I;
continue;
}
- if (DDT) {
- DDT->deleteBB(BB); // deferred deletion of BB.
+ if (DTU) {
+ // Remove the terminator of BB to clear the successor list of BB.
+ if (BB->getTerminator())
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB->getContext(), BB);
+ assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
++I;
} else {
I = F.getBasicBlockList().erase(I);
}
}
- if (DDT)
- DDT->applyUpdates(Updates);
+ if (DTU) {
+ DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ bool Deleted = false;
+ for (auto *BB : DeadBlockSet) {
+ if (DTU->isBBPendingDeletion(BB))
+ --NumRemoved;
+ else
+ Deleted = true;
+ DTU->deleteBB(BB);
+ }
+ if (!Deleted)
+ return false;
+ }
return true;
}
void llvm::combineMetadata(Instruction *K, const Instruction *J,
- ArrayRef<unsigned> KnownIDs) {
+ ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
K->dropUnknownNonDebugMetadata(KnownIDs);
K->getAllMetadataOtherThanDebugLoc(Metadata);
@@ -2279,8 +2297,20 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
case LLVMContext::MD_mem_parallel_loop_access:
K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
break;
+ case LLVMContext::MD_access_group:
+ K->setMetadata(LLVMContext::MD_access_group,
+ intersectAccessGroups(K, J));
+ break;
case LLVMContext::MD_range:
- K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
+
+ // If K does move, use most generic range. Otherwise keep the range of
+ // K.
+ if (DoesKMove)
+ // FIXME: If K does move, we should drop the range info and nonnull.
+ // Currently this function is used with DoesKMove in passes
+ // doing hoisting/sinking and the current behavior of using the
+ // most generic range is correct in those cases.
+ K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
break;
case LLVMContext::MD_fpmath:
K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
@@ -2290,8 +2320,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind, JMD);
break;
case LLVMContext::MD_nonnull:
- // Only set the !nonnull if it is present in both instructions.
- K->setMetadata(Kind, JMD);
+ // If K does move, keep nonull if it is present in both instructions.
+ if (DoesKMove)
+ K->setMetadata(Kind, JMD);
break;
case LLVMContext::MD_invariant_group:
// Preserve !invariant.group in K.
@@ -2318,15 +2349,49 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(LLVMContext::MD_invariant_group, JMD);
}
-void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) {
+void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
+ bool KDominatesJ) {
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias, LLVMContext::MD_range,
LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
LLVMContext::MD_invariant_group, LLVMContext::MD_align,
LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null};
- combineMetadata(K, J, KnownIDs);
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_access_group};
+ combineMetadata(K, J, KnownIDs, KDominatesJ);
+}
+
+void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
+ auto *ReplInst = dyn_cast<Instruction>(Repl);
+ if (!ReplInst)
+ return;
+
+ // Patch the replacement so that it is not more restrictive than the value
+ // being replaced.
+ // Note that if 'I' is a load being replaced by some operation,
+ // for example, by an arithmetic operation, then andIRFlags()
+ // would just erase all math flags from the original arithmetic
+ // operation, which is clearly not wanted and not needed.
+ if (!isa<LoadInst>(I))
+ ReplInst->andIRFlags(I);
+
+ // FIXME: If both the original and replacement value are part of the
+ // same control-flow region (meaning that the execution of one
+ // guarantees the execution of the other), then we can combine the
+ // noalias scopes here and do better than the general conservative
+ // answer used in combineMetadata().
+
+ // In general, GVN unifies expressions over different control-flow
+ // regions, and so we need a conservative combination of the noalias
+ // scopes.
+ static const unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
+ LLVMContext::MD_access_group};
+ combineMetadata(ReplInst, I, KnownIDs, false);
}
template <typename RootType, typename DominatesFn>
@@ -2454,6 +2519,54 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
}
}
+void llvm::dropDebugUsers(Instruction &I) {
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, &I);
+ for (auto *DII : DbgUsers)
+ DII->eraseFromParent();
+}
+
+void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
+ BasicBlock *BB) {
+ // Since we are moving the instructions out of its basic block, we do not
+ // retain their original debug locations (DILocations) and debug intrinsic
+ // instructions (dbg.values).
+ //
+ // Doing so would degrade the debugging experience and adversely affect the
+ // accuracy of profiling information.
+ //
+ // Currently, when hoisting the instructions, we take the following actions:
+ // - Remove their dbg.values.
+ // - Set their debug locations to the values from the insertion point.
+ //
+ // As per PR39141 (comment #8), the more fundamental reason why the dbg.values
+ // need to be deleted, is because there will not be any instructions with a
+ // DILocation in either branch left after performing the transformation. We
+ // can only insert a dbg.value after the two branches are joined again.
+ //
+ // See PR38762, PR39243 for more details.
+ //
+ // TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to
+ // encode predicated DIExpressions that yield different results on different
+ // code paths.
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
+ Instruction *I = &*II;
+ I->dropUnknownNonDebugMetadata();
+ if (I->isUsedByMetadata())
+ dropDebugUsers(*I);
+ if (isa<DbgVariableIntrinsic>(I)) {
+ // Remove DbgInfo Intrinsics.
+ II = I->eraseFromParent();
+ continue;
+ }
+ I->setDebugLoc(InsertPt->getDebugLoc());
+ ++II;
+ }
+ DomBlock->getInstList().splice(InsertPt->getIterator(), BB->getInstList(),
+ BB->begin(),
+ BB->getTerminator()->getIterator());
+}
+
namespace {
/// A potential constituent of a bitreverse or bswap expression. See
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 6e92e679f999..41f14a834617 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -20,13 +20,15 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -35,6 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -53,6 +56,7 @@ class LoopRotate {
AssumptionCache *AC;
DominatorTree *DT;
ScalarEvolution *SE;
+ MemorySSAUpdater *MSSAU;
const SimplifyQuery &SQ;
bool RotationOnly;
bool IsUtilMode;
@@ -60,10 +64,11 @@ class LoopRotate {
public:
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
- DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ,
- bool RotationOnly, bool IsUtilMode)
+ DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
- SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {}
+ MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
+ IsUtilMode(IsUtilMode) {}
bool processLoop(Loop *L);
private:
@@ -268,6 +273,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
SE->forgetTopmostLoop(L);
LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
@@ -298,18 +305,18 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
- TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+ Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
// Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
using DbgIntrinsicHash =
std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
- auto makeHash = [](DbgInfoIntrinsic *D) -> DbgIntrinsicHash {
+ auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
};
SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
I != E; ++I) {
- if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&*I))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
DbgIntrinsics.insert(makeHash(DII));
else
break;
@@ -325,7 +332,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// something that might trap, but isn't safe to hoist something that reads
// memory (without proving that the loop doesn't write).
if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
- !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) &&
+ !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
!isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
Inst->moveBefore(LoopEntryBranch);
continue;
@@ -339,7 +346,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
// Avoid inserting the same intrinsic twice.
- if (auto *DII = dyn_cast<DbgInfoIntrinsic>(C))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
if (DbgIntrinsics.count(makeHash(DII))) {
C->deleteValue();
continue;
@@ -374,8 +381,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Along with all the other instructions, we just cloned OrigHeader's
// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
// successors by duplicating their incoming values for OrigHeader.
- TerminatorInst *TI = OrigHeader->getTerminator();
- for (BasicBlock *SuccBB : TI->successors())
+ for (BasicBlock *SuccBB : successors(OrigHeader))
for (BasicBlock::iterator BI = SuccBB->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
@@ -385,6 +391,12 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
+ // Update MemorySSA before the rewrite call below changes the 1:1
+ // instruction:cloned_instruction_or_value mapping in ValueMap.
+ if (MSSAU) {
+ ValueMap[OrigHeader] = OrigPreheader;
+ MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, ValueMap);
+ }
SmallVector<PHINode*, 2> InsertedPHIs;
// If there were any uses of instructions in the duplicated block outside the
@@ -411,6 +423,12 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
DT->applyUpdates(Updates);
+
+ if (MSSAU) {
+ MSSAU->applyUpdates(Updates, *DT);
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
}
// At this point, we've finished our major CFG changes. As part of cloning
@@ -433,7 +451,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Split the edge to form a real preheader.
BasicBlock *NewPH = SplitCriticalEdge(
OrigPreheader, NewHeader,
- CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
NewPH->setName(NewHeader->getName() + ".lr.ph");
// Preserve canonical loop form, which means that 'Exit' should have only
@@ -452,7 +470,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
SplitLatchEdge |= L->getLoopLatch() == ExitPred;
BasicBlock *ExitSplit = SplitCriticalEdge(
ExitPred, Exit,
- CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
ExitSplit->moveBefore(Exit);
}
assert(SplitLatchEdge &&
@@ -467,16 +485,27 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With our CFG finalized, update DomTree if it is available.
if (DT) DT->deleteEdge(OrigPreheader, Exit);
+
+ // Update MSSA too, if available.
+ if (MSSAU)
+ MSSAU->removeEdge(OrigPreheader, Exit);
}
assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// Now that the CFG and DomTree are in a consistent state again, try to merge
// the OrigHeader block into OrigLatch. This will succeed if they are
// connected by an unconditional branch. This is just a cleanup so the
// emitted code isn't too gross in this common case.
- MergeBlockIntoPredecessor(OrigHeader, DT, LI);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
@@ -585,9 +614,14 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
<< LastExit->getName() << "\n");
// Hoist the instructions from Latch into LastExit.
+ Instruction *FirstLatchInst = &*(Latch->begin());
LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
Latch->begin(), Jmp->getIterator());
+ // Update MemorySSA
+ if (MSSAU)
+ MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst);
+
unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
BasicBlock *Header = Jmp->getSuccessor(0);
assert(Header == L->getHeader() && "expected a backward branch");
@@ -603,6 +637,10 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
if (DT)
DT->eraseNode(Latch);
Latch->eraseFromParent();
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
return true;
}
@@ -635,11 +673,16 @@ bool LoopRotate::processLoop(Loop *L) {
/// The utility to convert a loop into a loop with bottom test.
bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
AssumptionCache *AC, DominatorTree *DT,
- ScalarEvolution *SE, const SimplifyQuery &SQ,
- bool RotationOnly = true,
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ const SimplifyQuery &SQ, bool RotationOnly = true,
unsigned Threshold = unsigned(-1),
bool IsUtilMode = true) {
- LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, SQ, RotationOnly, IsUtilMode);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
+ IsUtilMode);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
return LR.processLoop(L);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 970494eb4704..380f4fca54d9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -137,7 +137,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
- LI, PreserveLCSSA);
+ LI, nullptr, PreserveLCSSA);
if (!PreheaderBB)
return nullptr;
@@ -251,7 +251,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
SE->forgetLoop(L);
BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
- DT, LI, PreserveLCSSA);
+ DT, LI, nullptr, PreserveLCSSA);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -435,7 +435,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
MDNode *LoopMD = nullptr;
for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
- TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+ Instruction *TI = BackedgeBlocks[i]->getTerminator();
if (!LoopMD)
LoopMD = TI->getMetadata(LoopMDKind);
TI->setMetadata(LoopMDKind, nullptr);
@@ -488,7 +488,7 @@ ReprocessLoop:
<< P->getName() << "\n");
// Zap the dead pred's terminator and replace it with unreachable.
- TerminatorInst *TI = P->getTerminator();
+ Instruction *TI = P->getTerminator();
changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
Changed = true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 04b8c1417e0a..da7ed2bd1652 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -54,10 +54,10 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
static cl::opt<bool>
UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
cl::desc("Verify domtree after unrolling"),
-#ifdef NDEBUG
- cl::init(false)
-#else
+#ifdef EXPENSIVE_CHECKS
cl::init(true)
+#else
+ cl::init(false)
#endif
);
@@ -275,8 +275,7 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
// inserted code, doing constant propagation and dead code elimination as we
// go.
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- const std::vector<BasicBlock *> &NewLoopBlocks = L->getBlocks();
- for (BasicBlock *BB : NewLoopBlocks) {
+ for (BasicBlock *BB : L->getBlocks()) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = &*I++;
@@ -330,12 +329,15 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
+///
+/// If RemainderLoop is non-null, it will receive the remainder loop (if
+/// required and not fully unrolled).
LoopUnrollResult llvm::UnrollLoop(
Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
+ OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
@@ -469,7 +471,7 @@ LoopUnrollResult llvm::UnrollLoop(
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
EpilogProfitability, UnrollRemainder, LI, SE,
- DT, AC, PreserveLCSSA)) {
+ DT, AC, PreserveLCSSA, RemainderLoop)) {
if (Force)
RuntimeTripCount = false;
else {
@@ -596,8 +598,15 @@ LoopUnrollResult llvm::UnrollLoop(
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
- if (const DILocation *DIL = I.getDebugLoc())
- I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+ if (const DILocation *DIL = I.getDebugLoc()) {
+ auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+ if (NewDIL)
+ I.setDebugLoc(NewDIL.getValue());
+ else
+ LLVM_DEBUG(dbgs()
+ << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ }
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
@@ -782,7 +791,7 @@ LoopUnrollResult llvm::UnrollLoop(
// there is no such latch.
NewIDom = Latches.back();
for (BasicBlock *IterLatch : Latches) {
- TerminatorInst *Term = IterLatch->getTerminator();
+ Instruction *Term = IterLatch->getTerminator();
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
NewIDom = IterLatch;
break;
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index b919f73c3817..e26762639c13 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -72,7 +72,7 @@ static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
for (BasicBlock *BB : ForeBlocks) {
if (BB == SubLoopPreHeader)
continue;
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (!ForeBlocks.count(TI->getSuccessor(i)))
return false;
@@ -167,12 +167,14 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
isSafeToUnrollAndJam should be used prior to calling this to make sure the
unrolling will be valid. Checking profitablility is also advisable.
+
+ If EpilogueLoop is non-null, it receives the epilogue loop (if it was
+ necessary to create one and not fully unrolled).
*/
-LoopUnrollResult
-llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
- unsigned TripMultiple, bool UnrollRemainder,
- LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {
+LoopUnrollResult llvm::UnrollAndJamLoop(
+ Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple,
+ bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
// When we enter here we should have already checked that it is safe
BasicBlock *Header = L->getHeader();
@@ -181,7 +183,7 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
// Don't enter the unroll code if there is nothing to do.
if (TripCount == 0 && Count < 2) {
- LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
return LoopUnrollResult::Unmodified;
}
@@ -196,7 +198,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
if (TripMultiple == 1 || TripMultiple % Count != 0) {
if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
/*UseEpilogRemainder*/ true,
- UnrollRemainder, LI, SE, DT, AC, true)) {
+ UnrollRemainder, LI, SE, DT, AC, true,
+ EpilogueLoop)) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
"generated when assuming runtime trip count\n");
return LoopUnrollResult::Unmodified;
@@ -297,8 +300,15 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
- if (const DILocation *DIL = I.getDebugLoc())
- I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+ if (const DILocation *DIL = I.getDebugLoc()) {
+ auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+ if (NewDIL)
+ I.setDebugLoc(NewDIL.getValue());
+ else
+ LLVM_DEBUG(dbgs()
+ << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ }
// Copy all blocks
for (unsigned It = 1; It != Count; ++It) {
@@ -619,16 +629,28 @@ static bool checkDependencies(SmallVector<Value *, 4> &Earlier,
if (auto D = DI.depends(Src, Dst, true)) {
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
- if (D->isConfused())
+ if (D->isConfused()) {
+ LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
return false;
+ }
if (!InnerLoop) {
- if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT)
+ if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) {
+ LLVM_DEBUG(dbgs() << " > dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
return false;
+ }
} else {
assert(LoopDepth + 1 <= D->getLevels());
if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT &&
- D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT)
+ D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) {
+ LLVM_DEBUG(dbgs() << " < > dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
return false;
+ }
}
}
}
@@ -716,38 +738,45 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
if (SubLoopLatch != SubLoopExit)
return false;
- if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken())
+ if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
return false;
+ }
// Split blocks into Fore/SubLoop/Aft based on dominators
BasicBlockSet SubLoopBlocks;
BasicBlockSet ForeBlocks;
BasicBlockSet AftBlocks;
if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
- AftBlocks, &DT))
+ AftBlocks, &DT)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
return false;
+ }
// Aft blocks may need to move instructions to fore blocks, which becomes more
// difficult if there are multiple (potentially conditionally executed)
// blocks. For now we just exclude loops with multiple aft blocks.
- if (AftBlocks.size() != 1)
+ if (AftBlocks.size() != 1) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
+ "multiple blocks after the loop\n");
return false;
+ }
- // Check inner loop IV is consistent between all iterations
- const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch);
- if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) ||
- !SubLoopBECountSC->getType()->isIntegerTy())
- return false;
- ScalarEvolution::LoopDisposition LD =
- SE.getLoopDisposition(SubLoopBECountSC, L);
- if (LD != ScalarEvolution::LoopInvariant)
+ // Check inner loop backedge count is consistent on all iterations of the
+ // outer loop
+ if (!hasIterationCountInvariantInParent(SubLoop, SE)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
+ "not consistent on each iteration\n");
return false;
+ }
// Check the loop safety info for exceptions.
- LoopSafetyInfo LSI;
- computeLoopSafetyInfo(&LSI, L);
- if (LSI.MayThrow)
+ SimpleLoopSafetyInfo LSI;
+ LSI.computeLoopSafetyInfo(L);
+ if (LSI.anyBlockMayThrow()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
return false;
+ }
// We've ruled out the easy stuff and now need to check that there are no
// interdependencies which may prevent us from moving the:
@@ -772,14 +801,19 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
}
// Keep going
return true;
- }))
+ })) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
+ "instructions after subloop to before it\n");
return false;
+ }
// Check for memory dependencies which prohibit the unrolling we are doing.
// Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
// there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
- if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI))
+ if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
return false;
+ }
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 78afe748e596..151a285af4e9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -615,11 +615,17 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// the original loop body.
if (Iter == 0)
DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+#endif
}
- updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
+ auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
+ updateBranchWeights(InsertBot, LatchBRCopy, Iter,
PeelCount, ExitWeight);
+ // Remove Loop metadata from the latch branch instruction
+ // because it is not the Loop's latch branch anymore.
+ LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
InsertTop = InsertBot;
InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 0057b4ba7ce1..00d2fd2fdbac 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -70,6 +70,17 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *PreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
LoopInfo *LI, bool PreserveLCSSA) {
+ // Loop structure should be the following:
+ // Preheader
+ // PrologHeader
+ // ...
+ // PrologLatch
+ // PrologExit
+ // NewPreheader
+ // Header
+ // ...
+ // Latch
+ // LatchExit
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
@@ -83,14 +94,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
for (PHINode &PN : Succ->phis()) {
// Add a new PHI node to the prolog end block and add the
// appropriate incoming values.
+ // TODO: This code assumes that the PrologExit (or the LatchExit block for
+ // prolog loop) contains only one predecessor from the loop, i.e. the
+ // PrologLatch. When supporting multiple-exiting block loops, we can have
+ // two or more blocks that have the LatchExit as the target in the
+ // original loop.
PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
PrologExit->getFirstNonPHI());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
if (L->contains(&PN)) {
+ // Succ is loop header.
NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
PreHeader);
} else {
+ // Succ is LatchExit.
NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
}
@@ -124,7 +142,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
PrologExitPreds.push_back(PredBB);
SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
- PreserveLCSSA);
+ nullptr, PreserveLCSSA);
}
// Create a branch around the original loop, which is taken if there are no
@@ -143,7 +161,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
- PreserveLCSSA);
+ nullptr, PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
InsertPt->eraseFromParent();
@@ -257,7 +275,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
assert(Exit && "Loop must have a single exit block only");
// Split the epilogue exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
- SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolling loop)
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
@@ -267,7 +285,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Split the main loop exit to maintain canonicalization guarantees.
SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
- SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
+ SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
PreserveLCSSA);
}
@@ -380,6 +398,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
}
if (CreateRemainderLoop) {
Loop *NewLoop = NewLoops[L];
+ MDNode *LoopID = NewLoop->getLoopID();
assert(NewLoop && "L should have been cloned");
// Only add loop metadata if the loop is not going to be completely
@@ -387,6 +406,16 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
if (UnrollRemainder)
return NewLoop;
+ Optional<MDNode *> NewLoopID = makeFollowupLoopID(
+ LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
+ if (NewLoopID.hasValue()) {
+ NewLoop->setLoopID(NewLoopID.getValue());
+
+ // Do not setLoopAlreadyUnrolled if loop attributes have been defined
+ // explicitly.
+ return NewLoop;
+ }
+
// Add unroll disable metadata to disable future unrolling for this loop.
NewLoop->setLoopAlreadyUnrolled();
return NewLoop;
@@ -525,10 +554,10 @@ static bool canProfitablyUnrollMultiExitLoop(
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder,
- bool UnrollRemainder,
- LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC,
- bool PreserveLCSSA) {
+ bool UnrollRemainder, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, bool PreserveLCSSA,
+ Loop **ResultLoop) {
LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
LLVM_DEBUG(L->dump());
LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -545,13 +574,27 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
BasicBlock *Header = L->getHeader();
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+
+ if (!LatchBR || LatchBR->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ LLVM_DEBUG(
+ dbgs()
+ << "Loop latch not terminated by a conditional branch.\n");
+ return false;
+ }
+
unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
- // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
- // targets of the Latch be an exit block out of the loop. This needs
- // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
- assert(!L->contains(LatchExit) &&
- "one of the loop latch successors should be the exit block!");
+
+ if (L->contains(LatchExit)) {
+ // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+ // targets of the Latch be an exit block out of the loop.
+ LLVM_DEBUG(
+ dbgs()
+ << "One of the loop latch successors must be the exit block.\n");
+ return false;
+ }
+
// These are exit blocks other than the target of the latch exiting block.
SmallVector<BasicBlock *, 4> OtherExits;
bool isMultiExitUnrollingEnabled =
@@ -636,8 +679,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
NewPreHeader->setName(PreHeader->getName() + ".new");
// Split LatchExit to create phi nodes from branch above.
SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
- NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
- DT, LI, PreserveLCSSA);
+ NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
+ nullptr, PreserveLCSSA);
// NewExit gets its DebugLoc from LatchExit, which is not part of the
// original Loop.
// Fix this by setting Loop's DebugLoc to NewExit.
@@ -762,10 +805,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Now the loop blocks are cloned and the other exiting blocks from the
// remainder are connected to the original Loop's exit blocks. The remaining
// work is to update the phi nodes in the original loop, and take in the
- // values from the cloned region. Also update the dominator info for
- // OtherExits and their immediate successors, since we have new edges into
- // OtherExits.
- SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
+ // values from the cloned region.
for (auto *BB : OtherExits) {
for (auto &II : *BB) {
@@ -800,27 +840,30 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
"Breaks the definition of dedicated exits!");
}
#endif
- // Update the dominator info because the immediate dominator is no longer the
- // header of the original Loop. BB has edges both from L and remainder code.
- // Since the preheader determines which loop is run (L or directly jump to
- // the remainder code), we set the immediate dominator as the preheader.
- if (DT) {
- DT->changeImmediateDominator(BB, PreHeader);
- // Also update the IDom for immediate successors of BB. If the current
- // IDom is the header, update the IDom to be the preheader because that is
- // the nearest common dominator of all predecessors of SuccBB. We need to
- // check for IDom being the header because successors of exit blocks can
- // have edges from outside the loop, and we should not incorrectly update
- // the IDom in that case.
- for (BasicBlock *SuccBB: successors(BB))
- if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) {
- if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) {
- assert(!SuccBB->getSinglePredecessor() &&
- "BB should be the IDom then!");
- DT->changeImmediateDominator(SuccBB, PreHeader);
- }
- }
+ }
+
+ // Update the immediate dominator of the exit blocks and blocks that are
+ // reachable from the exit blocks. This is needed because we now have paths
+ // from both the original loop and the remainder code reaching the exit
+ // blocks. While the IDom of these exit blocks were from the original loop,
+ // now the IDom is the preheader (which decides whether the original loop or
+ // remainder code should run).
+ if (DT && !L->getExitingBlock()) {
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ // NB! We have to examine the dom children of all loop blocks, not just
+ // those which are the IDom of the exit blocks. This is because blocks
+ // reachable from the exit blocks can have their IDom as the nearest common
+ // dominator of the exit blocks.
+ for (auto *BB : L->blocks()) {
+ auto *DomNodeBB = DT->getNode(BB);
+ for (auto *DomChild : DomNodeBB->getChildren()) {
+ auto *DomChildBB = DomChild->getBlock();
+ if (!L->contains(LI->getLoopFor(DomChildBB)))
+ ChildrenToUpdate.push_back(DomChildBB);
+ }
}
+ for (auto *BB : ChildrenToUpdate)
+ DT->changeImmediateDominator(BB, PreHeader);
}
// Loop structure should be the following:
@@ -884,6 +927,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// of its parent loops, so the Scalar Evolution pass needs to be run again.
SE->forgetTopmostLoop(L);
+ // Verify that the Dom Tree is correct.
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+ if (DT)
+ assert(DT->verify(DominatorTree::VerificationLevel::Full));
+#endif
+
// Canonicalize to LoopSimplifyForm both original and remainder loops. We
// cannot rely on the LoopUnrollPass to do this because it only does
// canonicalization for parent/subloops and not the sibling loops.
@@ -897,16 +946,20 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
}
+ auto UnrollResult = LoopUnrollResult::Unmodified;
if (remainderLoop && UnrollRemainder) {
LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
- UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
- /*Force*/ false, /*AllowRuntime*/ false,
- /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
- /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
- /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
- /*ORE*/ nullptr, PreserveLCSSA);
+ UnrollResult =
+ UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
+ /*Force*/ false, /*AllowRuntime*/ false,
+ /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+ /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+ /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
+ /*ORE*/ nullptr, PreserveLCSSA);
}
+ if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
+ *ResultLoop = remainderLoop;
NumRuntimeUnrolled++;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 46af120a428b..a93d1aeb62ef 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -26,8 +26,11 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
@@ -41,1104 +44,7 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-utils"
-bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
- SmallPtrSetImpl<Instruction *> &Set) {
- for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
- if (!Set.count(dyn_cast<Instruction>(*Use)))
- return false;
- return true;
-}
-
-bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
- switch (Kind) {
- default:
- break;
- case RK_IntegerAdd:
- case RK_IntegerMult:
- case RK_IntegerOr:
- case RK_IntegerAnd:
- case RK_IntegerXor:
- case RK_IntegerMinMax:
- return true;
- }
- return false;
-}
-
-bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
- return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
-}
-
-bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
- switch (Kind) {
- default:
- break;
- case RK_IntegerAdd:
- case RK_IntegerMult:
- case RK_FloatAdd:
- case RK_FloatMult:
- return true;
- }
- return false;
-}
-
-/// Determines if Phi may have been type-promoted. If Phi has a single user
-/// that ANDs the Phi with a type mask, return the user. RT is updated to
-/// account for the narrower bit width represented by the mask, and the AND
-/// instruction is added to CI.
-static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
- SmallPtrSetImpl<Instruction *> &Visited,
- SmallPtrSetImpl<Instruction *> &CI) {
- if (!Phi->hasOneUse())
- return Phi;
-
- const APInt *M = nullptr;
- Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
-
- // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
- // with a new integer type of the corresponding bit width.
- if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
- int32_t Bits = (*M + 1).exactLogBase2();
- if (Bits > 0) {
- RT = IntegerType::get(Phi->getContext(), Bits);
- Visited.insert(Phi);
- CI.insert(J);
- return J;
- }
- }
- return Phi;
-}
-
-/// Compute the minimal bit width needed to represent a reduction whose exit
-/// instruction is given by Exit.
-static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
- DemandedBits *DB,
- AssumptionCache *AC,
- DominatorTree *DT) {
- bool IsSigned = false;
- const DataLayout &DL = Exit->getModule()->getDataLayout();
- uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
-
- if (DB) {
- // Use the demanded bits analysis to determine the bits that are live out
- // of the exit instruction, rounding up to the nearest power of two. If the
- // use of demanded bits results in a smaller bit width, we know the value
- // must be positive (i.e., IsSigned = false), because if this were not the
- // case, the sign bit would have been demanded.
- auto Mask = DB->getDemandedBits(Exit);
- MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
- }
-
- if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
- // If demanded bits wasn't able to limit the bit width, we can try to use
- // value tracking instead. This can be the case, for example, if the value
- // may be negative.
- auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
- auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
- MaxBitWidth = NumTypeBits - NumSignBits;
- KnownBits Bits = computeKnownBits(Exit, DL);
- if (!Bits.isNonNegative()) {
- // If the value is not known to be non-negative, we set IsSigned to true,
- // meaning that we will use sext instructions instead of zext
- // instructions to restore the original type.
- IsSigned = true;
- if (!Bits.isNegative())
- // If the value is not known to be negative, we don't known what the
- // upper bit is, and therefore, we don't know what kind of extend we
- // will need. In this case, just increase the bit width by one bit and
- // use sext.
- ++MaxBitWidth;
- }
- }
- if (!isPowerOf2_64(MaxBitWidth))
- MaxBitWidth = NextPowerOf2(MaxBitWidth);
-
- return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
- IsSigned);
-}
-
-/// Collect cast instructions that can be ignored in the vectorizer's cost
-/// model, given a reduction exit value and the minimal type in which the
-/// reduction can be represented.
-static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
- Type *RecurrenceType,
- SmallPtrSetImpl<Instruction *> &Casts) {
-
- SmallVector<Instruction *, 8> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- Worklist.push_back(Exit);
-
- while (!Worklist.empty()) {
- Instruction *Val = Worklist.pop_back_val();
- Visited.insert(Val);
- if (auto *Cast = dyn_cast<CastInst>(Val))
- if (Cast->getSrcTy() == RecurrenceType) {
- // If the source type of a cast instruction is equal to the recurrence
- // type, it will be eliminated, and should be ignored in the vectorizer
- // cost model.
- Casts.insert(Cast);
- continue;
- }
-
- // Add all operands to the work list if they are loop-varying values that
- // we haven't yet visited.
- for (Value *O : cast<User>(Val)->operands())
- if (auto *I = dyn_cast<Instruction>(O))
- if (TheLoop->contains(I) && !Visited.count(I))
- Worklist.push_back(I);
- }
-}
-
-bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
- Loop *TheLoop, bool HasFunNoNaNAttr,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB,
- AssumptionCache *AC,
- DominatorTree *DT) {
- if (Phi->getNumIncomingValues() != 2)
- return false;
-
- // Reduction variables are only found in the loop header block.
- if (Phi->getParent() != TheLoop->getHeader())
- return false;
-
- // Obtain the reduction start value from the value that comes from the loop
- // preheader.
- Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
-
- // ExitInstruction is the single value which is used outside the loop.
- // We only allow for a single reduction value to be used outside the loop.
- // This includes users of the reduction, variables (which form a cycle
- // which ends in the phi node).
- Instruction *ExitInstruction = nullptr;
- // Indicates that we found a reduction operation in our scan.
- bool FoundReduxOp = false;
-
- // We start with the PHI node and scan for all of the users of this
- // instruction. All users must be instructions that can be used as reduction
- // variables (such as ADD). We must have a single out-of-block user. The cycle
- // must include the original PHI.
- bool FoundStartPHI = false;
-
- // To recognize min/max patterns formed by a icmp select sequence, we store
- // the number of instruction we saw from the recognized min/max pattern,
- // to make sure we only see exactly the two instructions.
- unsigned NumCmpSelectPatternInst = 0;
- InstDesc ReduxDesc(false, nullptr);
-
- // Data used for determining if the recurrence has been type-promoted.
- Type *RecurrenceType = Phi->getType();
- SmallPtrSet<Instruction *, 4> CastInsts;
- Instruction *Start = Phi;
- bool IsSigned = false;
-
- SmallPtrSet<Instruction *, 8> VisitedInsts;
- SmallVector<Instruction *, 8> Worklist;
-
- // Return early if the recurrence kind does not match the type of Phi. If the
- // recurrence kind is arithmetic, we attempt to look through AND operations
- // resulting from the type promotion performed by InstCombine. Vector
- // operations are not limited to the legal integer widths, so we may be able
- // to evaluate the reduction in the narrower width.
- if (RecurrenceType->isFloatingPointTy()) {
- if (!isFloatingPointRecurrenceKind(Kind))
- return false;
- } else {
- if (!isIntegerRecurrenceKind(Kind))
- return false;
- if (isArithmeticRecurrenceKind(Kind))
- Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
- }
-
- Worklist.push_back(Start);
- VisitedInsts.insert(Start);
-
- // A value in the reduction can be used:
- // - By the reduction:
- // - Reduction operation:
- // - One use of reduction value (safe).
- // - Multiple use of reduction value (not safe).
- // - PHI:
- // - All uses of the PHI must be the reduction (safe).
- // - Otherwise, not safe.
- // - By instructions outside of the loop (safe).
- // * One value may have several outside users, but all outside
- // uses must be of the same value.
- // - By an instruction that is not part of the reduction (not safe).
- // This is either:
- // * An instruction type other than PHI or the reduction operation.
- // * A PHI in the header other than the initial PHI.
- while (!Worklist.empty()) {
- Instruction *Cur = Worklist.back();
- Worklist.pop_back();
-
- // No Users.
- // If the instruction has no users then this is a broken chain and can't be
- // a reduction variable.
- if (Cur->use_empty())
- return false;
-
- bool IsAPhi = isa<PHINode>(Cur);
-
- // A header PHI use other than the original PHI.
- if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
- return false;
-
- // Reductions of instructions such as Div, and Sub is only possible if the
- // LHS is the reduction variable.
- if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
- !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
- !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
- return false;
-
- // Any reduction instruction must be of one of the allowed kinds. We ignore
- // the starting value (the Phi or an AND instruction if the Phi has been
- // type-promoted).
- if (Cur != Start) {
- ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
- if (!ReduxDesc.isRecurrence())
- return false;
- }
-
- // A reduction operation must only have one use of the reduction value.
- if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
- hasMultipleUsesOf(Cur, VisitedInsts))
- return false;
-
- // All inputs to a PHI node must be a reduction value.
- if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
- return false;
-
- if (Kind == RK_IntegerMinMax &&
- (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
- ++NumCmpSelectPatternInst;
- if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
- ++NumCmpSelectPatternInst;
-
- // Check whether we found a reduction operator.
- FoundReduxOp |= !IsAPhi && Cur != Start;
-
- // Process users of current instruction. Push non-PHI nodes after PHI nodes
- // onto the stack. This way we are going to have seen all inputs to PHI
- // nodes once we get to them.
- SmallVector<Instruction *, 8> NonPHIs;
- SmallVector<Instruction *, 8> PHIs;
- for (User *U : Cur->users()) {
- Instruction *UI = cast<Instruction>(U);
-
- // Check if we found the exit user.
- BasicBlock *Parent = UI->getParent();
- if (!TheLoop->contains(Parent)) {
- // If we already know this instruction is used externally, move on to
- // the next user.
- if (ExitInstruction == Cur)
- continue;
-
- // Exit if you find multiple values used outside or if the header phi
- // node is being used. In this case the user uses the value of the
- // previous iteration, in which case we would loose "VF-1" iterations of
- // the reduction operation if we vectorize.
- if (ExitInstruction != nullptr || Cur == Phi)
- return false;
-
- // The instruction used by an outside user must be the last instruction
- // before we feed back to the reduction phi. Otherwise, we loose VF-1
- // operations on the value.
- if (!is_contained(Phi->operands(), Cur))
- return false;
-
- ExitInstruction = Cur;
- continue;
- }
-
- // Process instructions only once (termination). Each reduction cycle
- // value must only be used once, except by phi nodes and min/max
- // reductions which are represented as a cmp followed by a select.
- InstDesc IgnoredVal(false, nullptr);
- if (VisitedInsts.insert(UI).second) {
- if (isa<PHINode>(UI))
- PHIs.push_back(UI);
- else
- NonPHIs.push_back(UI);
- } else if (!isa<PHINode>(UI) &&
- ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
- !isa<SelectInst>(UI)) ||
- !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
- return false;
-
- // Remember that we completed the cycle.
- if (UI == Phi)
- FoundStartPHI = true;
- }
- Worklist.append(PHIs.begin(), PHIs.end());
- Worklist.append(NonPHIs.begin(), NonPHIs.end());
- }
-
- // This means we have seen one but not the other instruction of the
- // pattern or more than just a select and cmp.
- if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
- NumCmpSelectPatternInst != 2)
- return false;
-
- if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
- return false;
-
- if (Start != Phi) {
- // If the starting value is not the same as the phi node, we speculatively
- // looked through an 'and' instruction when evaluating a potential
- // arithmetic reduction to determine if it may have been type-promoted.
- //
- // We now compute the minimal bit width that is required to represent the
- // reduction. If this is the same width that was indicated by the 'and', we
- // can represent the reduction in the smaller type. The 'and' instruction
- // will be eliminated since it will essentially be a cast instruction that
- // can be ignore in the cost model. If we compute a different type than we
- // did when evaluating the 'and', the 'and' will not be eliminated, and we
- // will end up with different kinds of operations in the recurrence
- // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
- // the case.
- //
- // The vectorizer relies on InstCombine to perform the actual
- // type-shrinking. It does this by inserting instructions to truncate the
- // exit value of the reduction to the width indicated by RecurrenceType and
- // then extend this value back to the original width. If IsSigned is false,
- // a 'zext' instruction will be generated; otherwise, a 'sext' will be
- // used.
- //
- // TODO: We should not rely on InstCombine to rewrite the reduction in the
- // smaller type. We should just generate a correctly typed expression
- // to begin with.
- Type *ComputedType;
- std::tie(ComputedType, IsSigned) =
- computeRecurrenceType(ExitInstruction, DB, AC, DT);
- if (ComputedType != RecurrenceType)
- return false;
-
- // The recurrence expression will be represented in a narrower type. If
- // there are any cast instructions that will be unnecessary, collect them
- // in CastInsts. Note that the 'and' instruction was already included in
- // this list.
- //
- // TODO: A better way to represent this may be to tag in some way all the
- // instructions that are a part of the reduction. The vectorizer cost
- // model could then apply the recurrence type to these instructions,
- // without needing a white list of instructions to ignore.
- collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
- }
-
- // We found a reduction var if we have reached the original phi node and we
- // only have a single instruction with out-of-loop users.
-
- // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
- // is saved as part of the RecurrenceDescriptor.
-
- // Save the description of this reduction variable.
- RecurrenceDescriptor RD(
- RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
- ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
- RedDes = RD;
-
- return true;
-}
-
-/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
-/// pattern corresponding to a min(X, Y) or max(X, Y).
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
-
- assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
- "Expect a select instruction");
- Instruction *Cmp = nullptr;
- SelectInst *Select = nullptr;
-
- // We must handle the select(cmp()) as a single instruction. Advance to the
- // select.
- if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
- if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
- return InstDesc(false, I);
- return InstDesc(Select, Prev.getMinMaxKind());
- }
-
- // Only handle single use cases for now.
- if (!(Select = dyn_cast<SelectInst>(I)))
- return InstDesc(false, I);
- if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
- !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
- return InstDesc(false, I);
- if (!Cmp->hasOneUse())
- return InstDesc(false, I);
-
- Value *CmpLeft;
- Value *CmpRight;
-
- // Look for a min/max pattern.
- if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_UIntMin);
- else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_UIntMax);
- else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_SIntMax);
- else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_SIntMin);
- else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMin);
- else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMax);
- else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMin);
- else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return InstDesc(Select, MRK_FloatMax);
-
- return InstDesc(false, I);
-}
-
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
- InstDesc &Prev, bool HasFunNoNaNAttr) {
- bool FP = I->getType()->isFloatingPointTy();
- Instruction *UAI = Prev.getUnsafeAlgebraInst();
- if (!UAI && FP && !I->isFast())
- UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
-
- switch (I->getOpcode()) {
- default:
- return InstDesc(false, I);
- case Instruction::PHI:
- return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
- case Instruction::Sub:
- case Instruction::Add:
- return InstDesc(Kind == RK_IntegerAdd, I);
- case Instruction::Mul:
- return InstDesc(Kind == RK_IntegerMult, I);
- case Instruction::And:
- return InstDesc(Kind == RK_IntegerAnd, I);
- case Instruction::Or:
- return InstDesc(Kind == RK_IntegerOr, I);
- case Instruction::Xor:
- return InstDesc(Kind == RK_IntegerXor, I);
- case Instruction::FMul:
- return InstDesc(Kind == RK_FloatMult, I, UAI);
- case Instruction::FSub:
- case Instruction::FAdd:
- return InstDesc(Kind == RK_FloatAdd, I, UAI);
- case Instruction::FCmp:
- case Instruction::ICmp:
- case Instruction::Select:
- if (Kind != RK_IntegerMinMax &&
- (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
- return InstDesc(false, I);
- return isMinMaxSelectCmpPattern(I, Prev);
- }
-}
-
-bool RecurrenceDescriptor::hasMultipleUsesOf(
- Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
- unsigned NumUses = 0;
- for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
- ++Use) {
- if (Insts.count(dyn_cast<Instruction>(*Use)))
- ++NumUses;
- if (NumUses > 1)
- return true;
- }
-
- return false;
-}
-bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB, AssumptionCache *AC,
- DominatorTree *DT) {
-
- BasicBlock *Header = TheLoop->getHeader();
- Function &F = *Header->getParent();
- bool HasFunNoNaNAttr =
- F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
-
- if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
- DB, AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
- return true;
- }
- if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
- AC, DT)) {
- LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi
- << "\n");
- return true;
- }
- // Not a reduction of known type.
- return false;
-}
-
-bool RecurrenceDescriptor::isFirstOrderRecurrence(
- PHINode *Phi, Loop *TheLoop,
- DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
-
- // Ensure the phi node is in the loop header and has two incoming values.
- if (Phi->getParent() != TheLoop->getHeader() ||
- Phi->getNumIncomingValues() != 2)
- return false;
-
- // Ensure the loop has a preheader and a single latch block. The loop
- // vectorizer will need the latch to set up the next iteration of the loop.
- auto *Preheader = TheLoop->getLoopPreheader();
- auto *Latch = TheLoop->getLoopLatch();
- if (!Preheader || !Latch)
- return false;
-
- // Ensure the phi node's incoming blocks are the loop preheader and latch.
- if (Phi->getBasicBlockIndex(Preheader) < 0 ||
- Phi->getBasicBlockIndex(Latch) < 0)
- return false;
-
- // Get the previous value. The previous value comes from the latch edge while
- // the initial value comes form the preheader edge.
- auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
- if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
- SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
- return false;
-
- // Ensure every user of the phi node is dominated by the previous value.
- // The dominance requirement ensures the loop vectorizer will not need to
- // vectorize the initial value prior to the first iteration of the loop.
- // TODO: Consider extending this sinking to handle other kinds of instructions
- // and expressions, beyond sinking a single cast past Previous.
- if (Phi->hasOneUse()) {
- auto *I = Phi->user_back();
- if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
- DT->dominates(Previous, I->user_back())) {
- if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
- SinkAfter[I] = Previous;
- return true;
- }
- }
-
- for (User *U : Phi->users())
- if (auto *I = dyn_cast<Instruction>(U)) {
- if (!DT->dominates(Previous, I))
- return false;
- }
-
- return true;
-}
-
-/// This function returns the identity element (or neutral element) for
-/// the operation K.
-Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
- Type *Tp) {
- switch (K) {
- case RK_IntegerXor:
- case RK_IntegerAdd:
- case RK_IntegerOr:
- // Adding, Xoring, Oring zero to a number does not change it.
- return ConstantInt::get(Tp, 0);
- case RK_IntegerMult:
- // Multiplying a number by 1 does not change it.
- return ConstantInt::get(Tp, 1);
- case RK_IntegerAnd:
- // AND-ing a number with an all-1 value does not change it.
- return ConstantInt::get(Tp, -1, true);
- case RK_FloatMult:
- // Multiplying a number by 1 does not change it.
- return ConstantFP::get(Tp, 1.0L);
- case RK_FloatAdd:
- // Adding zero to a number does not change it.
- return ConstantFP::get(Tp, 0.0L);
- default:
- llvm_unreachable("Unknown recurrence kind");
- }
-}
-
-/// This function translates the recurrence kind to an LLVM binary operator.
-unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
- switch (Kind) {
- case RK_IntegerAdd:
- return Instruction::Add;
- case RK_IntegerMult:
- return Instruction::Mul;
- case RK_IntegerOr:
- return Instruction::Or;
- case RK_IntegerAnd:
- return Instruction::And;
- case RK_IntegerXor:
- return Instruction::Xor;
- case RK_FloatMult:
- return Instruction::FMul;
- case RK_FloatAdd:
- return Instruction::FAdd;
- case RK_IntegerMinMax:
- return Instruction::ICmp;
- case RK_FloatMinMax:
- return Instruction::FCmp;
- default:
- llvm_unreachable("Unknown recurrence operation");
- }
-}
-
-Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
- MinMaxRecurrenceKind RK,
- Value *Left, Value *Right) {
- CmpInst::Predicate P = CmpInst::ICMP_NE;
- switch (RK) {
- default:
- llvm_unreachable("Unknown min/max recurrence kind");
- case MRK_UIntMin:
- P = CmpInst::ICMP_ULT;
- break;
- case MRK_UIntMax:
- P = CmpInst::ICMP_UGT;
- break;
- case MRK_SIntMin:
- P = CmpInst::ICMP_SLT;
- break;
- case MRK_SIntMax:
- P = CmpInst::ICMP_SGT;
- break;
- case MRK_FloatMin:
- P = CmpInst::FCMP_OLT;
- break;
- case MRK_FloatMax:
- P = CmpInst::FCMP_OGT;
- break;
- }
-
- // We only match FP sequences that are 'fast', so we can unconditionally
- // set it on any generated instructions.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- FastMathFlags FMF;
- FMF.setFast();
- Builder.setFastMathFlags(FMF);
-
- Value *Cmp;
- if (RK == MRK_FloatMin || RK == MRK_FloatMax)
- Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
- else
- Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
-
- Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
- return Select;
-}
-
-InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
- const SCEV *Step, BinaryOperator *BOp,
- SmallVectorImpl<Instruction *> *Casts)
- : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
- assert(IK != IK_NoInduction && "Not an induction");
-
- // Start value type should match the induction kind and the value
- // itself should not be null.
- assert(StartValue && "StartValue is null");
- assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
- "StartValue is not a pointer for pointer induction");
- assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
- "StartValue is not an integer for integer induction");
-
- // Check the Step Value. It should be non-zero integer value.
- assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
- "Step value is zero");
-
- assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
- "Step value should be constant for pointer induction");
- assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
- "StepValue is not an integer");
-
- assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
- "StepValue is not FP for FpInduction");
- assert((IK != IK_FpInduction || (InductionBinOp &&
- (InductionBinOp->getOpcode() == Instruction::FAdd ||
- InductionBinOp->getOpcode() == Instruction::FSub))) &&
- "Binary opcode should be specified for FP induction");
-
- if (Casts) {
- for (auto &Inst : *Casts) {
- RedundantCasts.push_back(Inst);
- }
- }
-}
-
-int InductionDescriptor::getConsecutiveDirection() const {
- ConstantInt *ConstStep = getConstIntStepValue();
- if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
- return ConstStep->getSExtValue();
- return 0;
-}
-
-ConstantInt *InductionDescriptor::getConstIntStepValue() const {
- if (isa<SCEVConstant>(Step))
- return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
- return nullptr;
-}
-
-Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
- ScalarEvolution *SE,
- const DataLayout& DL) const {
-
- SCEVExpander Exp(*SE, DL, "induction");
- assert(Index->getType() == Step->getType() &&
- "Index type does not match StepValue type");
- switch (IK) {
- case IK_IntInduction: {
- assert(Index->getType() == StartValue->getType() &&
- "Index type does not match StartValue type");
-
- // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
- // and calculate (Start + Index * Step) for all cases, without
- // special handling for "isOne" and "isMinusOne".
- // But in the real life the result code getting worse. We mix SCEV
- // expressions and ADD/SUB operations and receive redundant
- // intermediate values being calculated in different ways and
- // Instcombine is unable to reduce them all.
-
- if (getConstIntStepValue() &&
- getConstIntStepValue()->isMinusOne())
- return B.CreateSub(StartValue, Index);
- if (getConstIntStepValue() &&
- getConstIntStepValue()->isOne())
- return B.CreateAdd(StartValue, Index);
- const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
- SE->getMulExpr(Step, SE->getSCEV(Index)));
- return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
- }
- case IK_PtrInduction: {
- assert(isa<SCEVConstant>(Step) &&
- "Expected constant step for pointer induction");
- const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
- Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
- return B.CreateGEP(nullptr, StartValue, Index);
- }
- case IK_FpInduction: {
- assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
- assert(InductionBinOp &&
- (InductionBinOp->getOpcode() == Instruction::FAdd ||
- InductionBinOp->getOpcode() == Instruction::FSub) &&
- "Original bin op should be defined for FP induction");
-
- Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
-
- // Floating point operations had to be 'fast' to enable the induction.
- FastMathFlags Flags;
- Flags.setFast();
-
- Value *MulExp = B.CreateFMul(StepValue, Index);
- if (isa<Instruction>(MulExp))
- // We have to check, the MulExp may be a constant.
- cast<Instruction>(MulExp)->setFastMathFlags(Flags);
-
- Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
- MulExp, "induction");
- if (isa<Instruction>(BOp))
- cast<Instruction>(BOp)->setFastMathFlags(Flags);
-
- return BOp;
- }
- case IK_NoInduction:
- return nullptr;
- }
- llvm_unreachable("invalid enum");
-}
-
-bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
- ScalarEvolution *SE,
- InductionDescriptor &D) {
-
- // Here we only handle FP induction variables.
- assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
-
- if (TheLoop->getHeader() != Phi->getParent())
- return false;
-
- // The loop may have multiple entrances or multiple exits; we can analyze
- // this phi if it has a unique entry value and a unique backedge value.
- if (Phi->getNumIncomingValues() != 2)
- return false;
- Value *BEValue = nullptr, *StartValue = nullptr;
- if (TheLoop->contains(Phi->getIncomingBlock(0))) {
- BEValue = Phi->getIncomingValue(0);
- StartValue = Phi->getIncomingValue(1);
- } else {
- assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
- "Unexpected Phi node in the loop");
- BEValue = Phi->getIncomingValue(1);
- StartValue = Phi->getIncomingValue(0);
- }
-
- BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
- if (!BOp)
- return false;
-
- Value *Addend = nullptr;
- if (BOp->getOpcode() == Instruction::FAdd) {
- if (BOp->getOperand(0) == Phi)
- Addend = BOp->getOperand(1);
- else if (BOp->getOperand(1) == Phi)
- Addend = BOp->getOperand(0);
- } else if (BOp->getOpcode() == Instruction::FSub)
- if (BOp->getOperand(0) == Phi)
- Addend = BOp->getOperand(1);
-
- if (!Addend)
- return false;
-
- // The addend should be loop invariant
- if (auto *I = dyn_cast<Instruction>(Addend))
- if (TheLoop->contains(I))
- return false;
-
- // FP Step has unknown SCEV
- const SCEV *Step = SE->getUnknown(Addend);
- D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
- return true;
-}
-
-/// This function is called when we suspect that the update-chain of a phi node
-/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
-/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
-/// predicate P under which the SCEV expression for the phi can be the
-/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
-/// cast instructions that are involved in the update-chain of this induction.
-/// A caller that adds the required runtime predicate can be free to drop these
-/// cast instructions, and compute the phi using \p AR (instead of some scev
-/// expression with casts).
-///
-/// For example, without a predicate the scev expression can take the following
-/// form:
-/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
-///
-/// It corresponds to the following IR sequence:
-/// %for.body:
-/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
-/// %casted_phi = "ExtTrunc i64 %x"
-/// %add = add i64 %casted_phi, %step
-///
-/// where %x is given in \p PN,
-/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
-/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
-/// several forms, for example, such as:
-/// ExtTrunc1: %casted_phi = and %x, 2^n-1
-/// or:
-/// ExtTrunc2: %t = shl %x, m
-/// %casted_phi = ashr %t, m
-///
-/// If we are able to find such sequence, we return the instructions
-/// we found, namely %casted_phi and the instructions on its use-def chain up
-/// to the phi (not including the phi).
-static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
- const SCEVUnknown *PhiScev,
- const SCEVAddRecExpr *AR,
- SmallVectorImpl<Instruction *> &CastInsts) {
-
- assert(CastInsts.empty() && "CastInsts is expected to be empty.");
- auto *PN = cast<PHINode>(PhiScev->getValue());
- assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
- const Loop *L = AR->getLoop();
-
- // Find any cast instructions that participate in the def-use chain of
- // PhiScev in the loop.
- // FORNOW/TODO: We currently expect the def-use chain to include only
- // two-operand instructions, where one of the operands is an invariant.
- // createAddRecFromPHIWithCasts() currently does not support anything more
- // involved than that, so we keep the search simple. This can be
- // extended/generalized as needed.
-
- auto getDef = [&](const Value *Val) -> Value * {
- const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
- if (!BinOp)
- return nullptr;
- Value *Op0 = BinOp->getOperand(0);
- Value *Op1 = BinOp->getOperand(1);
- Value *Def = nullptr;
- if (L->isLoopInvariant(Op0))
- Def = Op1;
- else if (L->isLoopInvariant(Op1))
- Def = Op0;
- return Def;
- };
-
- // Look for the instruction that defines the induction via the
- // loop backedge.
- BasicBlock *Latch = L->getLoopLatch();
- if (!Latch)
- return false;
- Value *Val = PN->getIncomingValueForBlock(Latch);
- if (!Val)
- return false;
-
- // Follow the def-use chain until the induction phi is reached.
- // If on the way we encounter a Value that has the same SCEV Expr as the
- // phi node, we can consider the instructions we visit from that point
- // as part of the cast-sequence that can be ignored.
- bool InCastSequence = false;
- auto *Inst = dyn_cast<Instruction>(Val);
- while (Val != PN) {
- // If we encountered a phi node other than PN, or if we left the loop,
- // we bail out.
- if (!Inst || !L->contains(Inst)) {
- return false;
- }
- auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
- if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
- InCastSequence = true;
- if (InCastSequence) {
- // Only the last instruction in the cast sequence is expected to have
- // uses outside the induction def-use chain.
- if (!CastInsts.empty())
- if (!Inst->hasOneUse())
- return false;
- CastInsts.push_back(Inst);
- }
- Val = getDef(Val);
- if (!Val)
- return false;
- Inst = dyn_cast<Instruction>(Val);
- }
-
- return InCastSequence;
-}
-
-bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
- PredicatedScalarEvolution &PSE,
- InductionDescriptor &D,
- bool Assume) {
- Type *PhiTy = Phi->getType();
-
- // Handle integer and pointer inductions variables.
- // Now we handle also FP induction but not trying to make a
- // recurrent expression from the PHI node in-place.
-
- if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
- !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
- return false;
-
- if (PhiTy->isFloatingPointTy())
- return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
-
- const SCEV *PhiScev = PSE.getSCEV(Phi);
- const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
-
- // We need this expression to be an AddRecExpr.
- if (Assume && !AR)
- AR = PSE.getAsAddRec(Phi);
-
- if (!AR) {
- LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
- return false;
- }
-
- // Record any Cast instructions that participate in the induction update
- const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
- // If we started from an UnknownSCEV, and managed to build an addRecurrence
- // only after enabling Assume with PSCEV, this means we may have encountered
- // cast instructions that required adding a runtime check in order to
- // guarantee the correctness of the AddRecurence respresentation of the
- // induction.
- if (PhiScev != AR && SymbolicPhi) {
- SmallVector<Instruction *, 2> Casts;
- if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
- return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
- }
-
- return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
-}
-
-bool InductionDescriptor::isInductionPHI(
- PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
- InductionDescriptor &D, const SCEV *Expr,
- SmallVectorImpl<Instruction *> *CastsToIgnore) {
- Type *PhiTy = Phi->getType();
- // We only handle integer and pointer inductions variables.
- if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
- return false;
-
- // Check that the PHI is consecutive.
- const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
-
- if (!AR) {
- LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
- return false;
- }
-
- if (AR->getLoop() != TheLoop) {
- // FIXME: We should treat this as a uniform. Unfortunately, we
- // don't currently know how to handled uniform PHIs.
- LLVM_DEBUG(
- dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
- return false;
- }
-
- Value *StartValue =
- Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
- const SCEV *Step = AR->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
- // The stride may be a constant or a loop invariant integer value.
- const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
- if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
- return false;
-
- if (PhiTy->isIntegerTy()) {
- D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
- CastsToIgnore);
- return true;
- }
-
- assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
- // Pointer induction should be a constant.
- if (!ConstStep)
- return false;
-
- ConstantInt *CV = ConstStep->getValue();
- Type *PointerElementType = PhiTy->getPointerElementType();
- // The pointer stride cannot be determined if the pointer element type is not
- // sized.
- if (!PointerElementType->isSized())
- return false;
-
- const DataLayout &DL = Phi->getModule()->getDataLayout();
- int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
- if (!Size)
- return false;
-
- int64_t CVSize = CV->getSExtValue();
- if (CVSize % Size)
- return false;
- auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
- true /* signed */);
- D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
- return true;
-}
+static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA) {
@@ -1173,7 +79,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
return false;
auto *NewExitBB = SplitBlockPredecessors(
- BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
+ BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
if (!NewExitBB)
LLVM_DEBUG(
@@ -1286,37 +192,231 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) {
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
/// operand or null otherwise. If the string metadata is not found return
/// Optional's not-a-value.
-Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
+Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop,
StringRef Name) {
- MDNode *LoopID = TheLoop->getLoopID();
- // Return none if LoopID is false.
- if (!LoopID)
+ MDNode *MD = findOptionMDForLoop(TheLoop, Name);
+ if (!MD)
return None;
+ switch (MD->getNumOperands()) {
+ case 1:
+ return nullptr;
+ case 2:
+ return &MD->getOperand(1);
+ default:
+ llvm_unreachable("loop metadata has 0 or 1 operand");
+ }
+}
- // First operand should refer to the loop id itself.
- assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
+ StringRef Name) {
+ MDNode *MD = findOptionMDForLoop(TheLoop, Name);
+ if (!MD)
+ return None;
+ switch (MD->getNumOperands()) {
+ case 1:
+ // When the value is absent it is interpreted as 'attribute set'.
+ return true;
+ case 2:
+ return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+ }
+ llvm_unreachable("unexpected number of options");
+}
- // Iterate over LoopID operands and look for MDString Metadata
- for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (!MD)
- continue;
- MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- if (!S)
+static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
+ return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
+}
+
+llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
+ StringRef Name) {
+ const MDOperand *AttrMD =
+ findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
+ if (!AttrMD)
+ return None;
+
+ ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());
+ if (!IntMD)
+ return None;
+
+ return IntMD->getSExtValue();
+}
+
+Optional<MDNode *> llvm::makeFollowupLoopID(
+ MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
+ const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
+ if (!OrigLoopID) {
+ if (AlwaysNew)
+ return nullptr;
+ return None;
+ }
+
+ assert(OrigLoopID->getOperand(0) == OrigLoopID);
+
+ bool InheritAllAttrs = !InheritOptionsExceptPrefix;
+ bool InheritSomeAttrs =
+ InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
+ SmallVector<Metadata *, 8> MDs;
+ MDs.push_back(nullptr);
+
+ bool Changed = false;
+ if (InheritAllAttrs || InheritSomeAttrs) {
+ for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) {
+ MDNode *Op = cast<MDNode>(Existing.get());
+
+ auto InheritThisAttribute = [InheritSomeAttrs,
+ InheritOptionsExceptPrefix](MDNode *Op) {
+ if (!InheritSomeAttrs)
+ return false;
+
+ // Skip malformatted attribute metadata nodes.
+ if (Op->getNumOperands() == 0)
+ return true;
+ Metadata *NameMD = Op->getOperand(0).get();
+ if (!isa<MDString>(NameMD))
+ return true;
+ StringRef AttrName = cast<MDString>(NameMD)->getString();
+
+ // Do not inherit excluded attributes.
+ return !AttrName.startswith(InheritOptionsExceptPrefix);
+ };
+
+ if (InheritThisAttribute(Op))
+ MDs.push_back(Op);
+ else
+ Changed = true;
+ }
+ } else {
+ // Modified if we dropped at least one attribute.
+ Changed = OrigLoopID->getNumOperands() > 1;
+ }
+
+ bool HasAnyFollowup = false;
+ for (StringRef OptionName : FollowupOptions) {
+ MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
+ if (!FollowupNode)
continue;
- // Return true if MDString holds expected MetaData.
- if (Name.equals(S->getString()))
- switch (MD->getNumOperands()) {
- case 1:
- return nullptr;
- case 2:
- return &MD->getOperand(1);
- default:
- llvm_unreachable("loop metadata has 0 or 1 operand");
- }
+
+ HasAnyFollowup = true;
+ for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) {
+ MDs.push_back(Option.get());
+ Changed = true;
+ }
}
- return None;
+
+ // Attributes of the followup loop not specified explicity, so signal to the
+ // transformation pass to add suitable attributes.
+ if (!AlwaysNew && !HasAnyFollowup)
+ return None;
+
+ // If no attributes were added or remove, the previous loop Id can be reused.
+ if (!AlwaysNew && !Changed)
+ return OrigLoopID;
+
+ // No attributes is equivalent to having no !llvm.loop metadata at all.
+ if (MDs.size() == 1)
+ return nullptr;
+
+ // Build the new loop ID.
+ MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
+ FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
+ return FollowupLoopID;
+}
+
+bool llvm::hasDisableAllTransformsHint(const Loop *L) {
+ return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
+}
+
+TransformationMode llvm::hasUnrollTransformation(Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
+ return TM_SuppressedByUser;
+
+ Optional<int> Count =
+ getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
+ if (Count.hasValue())
+ return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
+ return TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
+ return TM_SuppressedByUser;
+
+ Optional<int> Count =
+ getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
+ if (Count.hasValue())
+ return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
+ Optional<bool> Enable =
+ getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
+
+ if (Enable == false)
+ return TM_SuppressedByUser;
+
+ Optional<int> VectorizeWidth =
+ getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+ Optional<int> InterleaveCount =
+ getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
+
+ if (Enable == true) {
+ // 'Forcing' vector width and interleave count to one effectively disables
+ // this tranformation.
+ if (VectorizeWidth == 1 && InterleaveCount == 1)
+ return TM_SuppressedByUser;
+ return TM_ForcedByUser;
+ }
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
+ return TM_Disable;
+
+ if (VectorizeWidth == 1 && InterleaveCount == 1)
+ return TM_Disable;
+
+ if (VectorizeWidth > 1 || InterleaveCount > 1)
+ return TM_Enable;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasDistributeTransformation(Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
+ return TM_SuppressedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
}
/// Does a BFS from a given node to all of its children inside a given loop.
@@ -1425,14 +525,19 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
// Remove the old branch.
Preheader->getTerminator()->eraseFromParent();
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
if (DT) {
// Update the dominator tree by informing it about the new edge from the
// preheader to the exit.
- DT->insertEdge(Preheader, ExitBlock);
+ DTU.insertEdge(Preheader, ExitBlock);
// Inform the dominator tree about the removed edge.
- DT->deleteEdge(Preheader, L->getHeader());
+ DTU.deleteEdge(Preheader, L->getHeader());
}
+ // Use a map to unique and a vector to guarantee deterministic ordering.
+ llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet;
+ llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
+
// Given LCSSA form is satisfied, we should not have users of instructions
// within the dead loop outside of the loop. However, LCSSA doesn't take
// unreachable uses into account. We handle them here.
@@ -1457,8 +562,27 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
"Unexpected user in reachable block");
U.set(Undef);
}
+ auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+ if (!DVI)
+ continue;
+ auto Key = DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
+ if (Key != DeadDebugSet.end())
+ continue;
+ DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
+ DeadDebugInst.push_back(DVI);
}
+ // After the loop has been deleted all the values defined and modified
+ // inside the loop are going to be unavailable.
+ // Since debug values in the loop have been deleted, inserting an undef
+ // dbg.value truncates the range of any dbg.value before the loop where the
+ // loop used to be. This is particularly important for constant values.
+ DIBuilder DIB(*ExitBlock->getModule());
+ for (auto *DVI : DeadDebugInst)
+ DIB.insertDbgValueIntrinsic(
+ UndefValue::get(Builder.getInt32Ty()), DVI->getVariable(),
+ DVI->getExpression(), DVI->getDebugLoc(), ExitBlock->getFirstNonPHI());
+
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
for (auto *Block : L->blocks())
@@ -1519,6 +643,28 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
return (FalseVal + (TrueVal / 2)) / TrueVal;
}
+bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
+ ScalarEvolution &SE) {
+ Loop *OuterL = InnerLoop->getParentLoop();
+ if (!OuterL)
+ return true;
+
+ // Get the backedge taken count for the inner loop
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
+ if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
+ !InnerLoopBECountSC->getType()->isIntegerTy())
+ return false;
+
+ // Get whether count is invariant to the outer loop
+ ScalarEvolution::LoopDisposition LD =
+ SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
+ if (LD != ScalarEvolution::LoopInvariant)
+ return false;
+
+ return true;
+}
+
/// Adds a 'fast' flag to floating point operations.
static Value *addFastMathFlag(Value *V) {
if (isa<FPMathOperator>(V)) {
@@ -1529,6 +675,51 @@ static Value *addFastMathFlag(Value *V) {
return V;
}
+Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
+ RecurrenceDescriptor::MinMaxRecurrenceKind RK,
+ Value *Left, Value *Right) {
+ CmpInst::Predicate P = CmpInst::ICMP_NE;
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max recurrence kind");
+ case RecurrenceDescriptor::MRK_UIntMin:
+ P = CmpInst::ICMP_ULT;
+ break;
+ case RecurrenceDescriptor::MRK_UIntMax:
+ P = CmpInst::ICMP_UGT;
+ break;
+ case RecurrenceDescriptor::MRK_SIntMin:
+ P = CmpInst::ICMP_SLT;
+ break;
+ case RecurrenceDescriptor::MRK_SIntMax:
+ P = CmpInst::ICMP_SGT;
+ break;
+ case RecurrenceDescriptor::MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case RecurrenceDescriptor::MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
+ }
+
+ // We only match FP sequences that are 'fast', so we can unconditionally
+ // set it on any generated instructions.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ FastMathFlags FMF;
+ FMF.setFast();
+ Builder.setFastMathFlags(FMF);
+
+ Value *Cmp;
+ if (RK == RecurrenceDescriptor::MRK_FloatMin ||
+ RK == RecurrenceDescriptor::MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+ return Select;
+}
+
// Helper to generate an ordered reduction.
Value *
llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
@@ -1550,8 +741,7 @@ llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
} else {
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
"Invalid min/max");
- Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result,
- Ext);
+ Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext);
}
if (!RedOps.empty())
@@ -1594,8 +784,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
} else {
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
"Invalid min/max");
- TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
- Shuf);
+ TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
}
if (!RedOps.empty())
propagateIRFlags(TmpVec, RedOps);
@@ -1613,7 +802,7 @@ Value *llvm::createSimpleTargetReduction(
assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
- std::function<Value*()> BuildFunc;
+ std::function<Value *()> BuildFunc;
using RD = RecurrenceDescriptor;
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
// TODO: Support creating ordered reductions.
@@ -1739,3 +928,39 @@ void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
VecOp->andIRFlags(V);
}
}
+
+bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ const SCEV *Zero = SE.getZero(S->getType());
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
+}
+
+bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ const SCEV *Zero = SE.getZero(S->getType());
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
+}
+
+bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool Signed) {
+ unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
+ APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
+ APInt::getMinValue(BitWidth);
+ auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, Predicate, S,
+ SE.getConstant(Min));
+}
+
+bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool Signed) {
+ unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
+ APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
+ APInt::getMaxValue(BitWidth);
+ auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, Predicate, S,
+ SE.getConstant(Max));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 03006ef3a2d3..661b4fa5bcb7 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -301,7 +301,7 @@ static void createMemMoveLoop(Instruction *InsertBefore,
// the appropriate conditional branches when the loop is built.
ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
SrcAddr, DstAddr, "compare_src_dst");
- TerminatorInst *ThenTerm, *ElseTerm;
+ Instruction *ThenTerm, *ElseTerm;
SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
&ElseTerm);
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index e99ecfef19cd..d019a44fc705 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -372,7 +372,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
Case.getCaseSuccessor()));
- llvm::sort(Cases.begin(), Cases.end(), CaseCmp());
+ llvm::sort(Cases, CaseCmp());
// Merge case into clusters
if (Cases.size() >= 2) {
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index ba4b7f3cc263..ae5e72ea4d30 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -174,6 +174,49 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
return std::make_pair(Ctor, InitFunction);
}
+std::pair<Function *, Function *>
+llvm::getOrCreateSanitizerCtorAndInitFunctions(
+ Module &M, StringRef CtorName, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+ StringRef VersionCheckName) {
+ assert(!CtorName.empty() && "Expected ctor function name");
+
+ if (Function *Ctor = M.getFunction(CtorName))
+ // FIXME: Sink this logic into the module, similar to the handling of
+ // globals. This will make moving to a concurrent model much easier.
+ if (Ctor->arg_size() == 0 ||
+ Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
+ return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
+
+ Function *Ctor, *InitFunction;
+ std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
+ M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
+ FunctionsCreatedCallback(Ctor, InitFunction);
+ return std::make_pair(Ctor, InitFunction);
+}
+
+Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
+ assert(!Name.empty() && "Expected init function name");
+ if (Function *F = M.getFunction(Name)) {
+ if (F->arg_size() != 0 ||
+ F->getReturnType() != Type::getVoidTy(M.getContext())) {
+ std::string Err;
+ raw_string_ostream Stream(Err);
+ Stream << "Sanitizer interface function defined with wrong type: " << *F;
+ report_fatal_error(Err);
+ }
+ return F;
+ }
+ Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ Name, AttributeList(), Type::getVoidTy(M.getContext())));
+ F->setLinkage(Function::ExternalLinkage);
+
+ appendToGlobalCtors(M, F, 0);
+
+ return F;
+}
+
void llvm::filterDeadComdatFunctions(
Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
// Build a map from the comdat to the number of entries in that comdat we
diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 2923977b791a..585ce6b4c118 100644
--- a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -35,7 +35,6 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <algorithm>
#define DEBUG_TYPE "predicateinfo"
using namespace llvm;
@@ -523,7 +522,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (IF->user_begin() == IF->user_end())
+ if (empty(IF->users()))
CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
@@ -535,7 +534,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
"Should not have gotten here without it being an assume");
IRBuilder<> B(PAssume->AssumeInst);
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (IF->user_begin() == IF->user_end())
+ if (empty(IF->users()))
CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
PredicateMap.insert({PIC, ValInfo});
@@ -570,7 +569,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
auto Comparator = [&](const Value *A, const Value *B) {
return valueComesBefore(OI, A, B);
};
- llvm::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);
+ llvm::sort(OpsToRename, Comparator);
ValueDFS_Compare Compare(OI);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 86e15bbd7f22..91e4f4254b3e 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -82,8 +82,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
if (SI->isVolatile())
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
+ if (!II->isLifetimeStartOrEnd())
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
@@ -116,7 +115,7 @@ struct AllocaInfo {
bool OnlyUsedInOneBlock;
Value *AllocaPointerVal;
- TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares;
+ TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
void clear() {
DefiningBlocks.clear();
@@ -263,7 +262,7 @@ struct PromoteMem2Reg {
/// For each alloca, we keep track of the dbg.declare intrinsic that
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
- SmallVector<TinyPtrVector<DbgInfoIntrinsic *>, 8> AllocaDbgDeclares;
+ SmallVector<TinyPtrVector<DbgVariableIntrinsic *>, 8> AllocaDbgDeclares;
/// The set of basic blocks the renamer has already visited.
SmallPtrSet<BasicBlock *, 16> Visited;
@@ -426,7 +425,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Record debuginfo for the store and remove the declaration's
// debuginfo.
- for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+ for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
DII->eraseFromParent();
@@ -477,7 +476,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Sort the stores by their index, making it efficient to do a lookup with a
// binary search.
- llvm::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
+ llvm::sort(StoresByIndex, less_first());
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
@@ -527,7 +526,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
while (!AI->use_empty()) {
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
- for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+ for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DII, SI, DIB);
}
@@ -539,7 +538,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LBI.deleteValue(AI);
// The alloca's debuginfo can be removed as well.
- for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+ for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
DII->eraseFromParent();
LBI.deleteValue(DII);
}
@@ -638,10 +637,9 @@ void PromoteMem2Reg::run() {
SmallVector<BasicBlock *, 32> PHIBlocks;
IDF.calculate(PHIBlocks);
if (PHIBlocks.size() > 1)
- llvm::sort(PHIBlocks.begin(), PHIBlocks.end(),
- [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
- });
+ llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ });
unsigned CurrentVersion = 0;
for (BasicBlock *BB : PHIBlocks)
@@ -752,14 +750,18 @@ void PromoteMem2Reg::run() {
// Ok, now we know that all of the PHI nodes are missing entries for some
// basic blocks. Start by sorting the incoming predecessors for efficient
// access.
- llvm::sort(Preds.begin(), Preds.end());
+ auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ };
+ llvm::sort(Preds, CompareBBNumbers);
// Now we loop through all BB's which have entries in SomePHI and remove
// them from the Preds list.
for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
// Do a log(n) search of the Preds list for the entry we want.
SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
- Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
+ Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i),
+ CompareBBNumbers);
assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
"PHI node has entry for a block which is not a predecessor!");
@@ -932,7 +934,7 @@ NextIteration:
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
- for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
+ for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
ConvertDebugDeclareToDebugValue(DII, APN, DIB);
// Get the next phi node.
@@ -951,7 +953,7 @@ NextIteration:
if (!Visited.insert(BB).second)
return;
- for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
+ for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) {
Instruction *I = &*II++; // get the instruction, increment iterator
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
@@ -992,7 +994,7 @@ NextIteration:
// Record debuginfo for the store before removing it.
IncomingLocs[AllocaNo] = SI->getDebugLoc();
- for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
+ for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[ai->second])
ConvertDebugDeclareToDebugValue(DII, SI, DIB);
BB->getInstList().erase(SI);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c87b5c16ffce..03b73954321d 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -173,14 +173,15 @@ class SimplifyCFGOpt {
const DataLayout &DL;
SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
const SimplifyCFGOptions &Options;
+ bool Resimplify;
- Value *isValueEqualityComparison(TerminatorInst *TI);
+ Value *isValueEqualityComparison(Instruction *TI);
BasicBlock *GetValueEqualityComparisonCases(
- TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
- bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
+ bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
BasicBlock *Pred,
IRBuilder<> &Builder);
- bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ bool FoldValueComparisonIntoPredecessors(Instruction *TI,
IRBuilder<> &Builder);
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
@@ -194,6 +195,9 @@ class SimplifyCFGOpt {
bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
+ bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+ IRBuilder<> &Builder);
+
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
@@ -201,6 +205,13 @@ public:
: TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
bool run(BasicBlock *BB);
+ bool simplifyOnce(BasicBlock *BB);
+
+ // Helper to set Resimplify and return change indication.
+ bool requestResimplify() {
+ Resimplify = true;
+ return true;
+ }
};
} // end anonymous namespace
@@ -208,7 +219,7 @@ public:
/// Return true if it is safe to merge these two
/// terminator instructions together.
static bool
-SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2,
+SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
if (SI1 == SI2)
return false; // Can't merge with self!
@@ -315,7 +326,7 @@ static unsigned ComputeSpeculationCost(const User *I,
/// V plus its non-dominating operands. If that cost is greater than
/// CostRemaining, false is returned and CostRemaining is undefined.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
- SmallPtrSetImpl<Instruction *> *AggressiveInsts,
+ SmallPtrSetImpl<Instruction *> &AggressiveInsts,
unsigned &CostRemaining,
const TargetTransformInfo &TTI,
unsigned Depth = 0) {
@@ -349,13 +360,8 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
return true;
- // If we aren't allowing aggressive promotion anymore, then don't consider
- // instructions in the 'if region'.
- if (!AggressiveInsts)
- return false;
-
// If we have seen this instruction before, don't count it again.
- if (AggressiveInsts->count(I))
+ if (AggressiveInsts.count(I))
return true;
// Okay, it looks like the instruction IS in the "condition". Check to
@@ -373,7 +379,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// is expected to be undone in CodeGenPrepare if the speculation has not
// enabled further IR optimizations.
if (Cost > CostRemaining &&
- (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0))
+ (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
return false;
// Avoid unsigned wrap.
@@ -386,7 +392,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
Depth + 1))
return false;
// Okay, it's safe to do this! Remember this instruction.
- AggressiveInsts->insert(I);
+ AggressiveInsts.insert(I);
return true;
}
@@ -664,7 +670,7 @@ private:
} // end anonymous namespace
-static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+static void EraseTerminatorAndDCECond(Instruction *TI) {
Instruction *Cond = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cond = dyn_cast<Instruction>(SI->getCondition());
@@ -682,12 +688,12 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
/// Return true if the specified terminator checks
/// to see if a value is equal to constant integer value.
-Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
Value *CV = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
// Do not permit merging of large switch instructions into their
// predecessors unless there is only one predecessor.
- if (SI->getNumSuccessors() * pred_size(SI->getParent()) <= 128)
+ if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
CV = SI->getCondition();
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
@@ -710,7 +716,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
/// Given a value comparison instruction,
/// decode all of the 'cases' that it represents and return the 'default' block.
BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
- TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
+ Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cases.reserve(SI->getNumCases());
for (auto Case : SI->cases())
@@ -800,7 +806,7 @@ static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
/// determines the outcome of this comparison. If so, simplify TI. This does a
/// very limited form of jump threading.
bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
- TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
+ Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
if (!PredVal)
return false; // Not a value comparison in predecessor.
@@ -848,7 +854,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
<< "Through successor TI: " << *TI << "Leaving: " << *NI
<< "\n");
- EraseTerminatorInstAndDCECond(TI);
+ EraseTerminatorAndDCECond(TI);
return true;
}
@@ -930,7 +936,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
<< "Through successor TI: " << *TI << "Leaving: " << *NI
<< "\n");
- EraseTerminatorInstAndDCECond(TI);
+ EraseTerminatorAndDCECond(TI);
return true;
}
@@ -965,10 +971,10 @@ static inline bool HasBranchWeights(const Instruction *I) {
return false;
}
-/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// Get Weights of a given terminator, the default weight is at the front
/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
/// metadata.
-static void GetBranchWeights(TerminatorInst *TI,
+static void GetBranchWeights(Instruction *TI,
SmallVectorImpl<uint64_t> &Weights) {
MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
assert(MD);
@@ -1002,7 +1008,7 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) {
/// (either a switch or a branch on "X == c").
/// See if any of the predecessors of the terminator block are value comparisons
/// on the same value. If so, and if safe to do so, fold them together.
-bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
IRBuilder<> &Builder) {
BasicBlock *BB = TI->getParent();
Value *CV = isValueEqualityComparison(TI); // CondVal
@@ -1014,7 +1020,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
BasicBlock *Pred = Preds.pop_back_val();
// See if the predecessor is a comparison with the same value.
- TerminatorInst *PTI = Pred->getTerminator();
+ Instruction *PTI = Pred->getTerminator();
Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
if (PCV == CV && TI != PTI) {
@@ -1191,7 +1197,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
setBranchWeights(NewSI, MDWeights);
}
- EraseTerminatorInstAndDCECond(PTI);
+ EraseTerminatorAndDCECond(PTI);
// Okay, last check. If BB is still a successor of PSI, then we must
// have an infinite loop case. If so, add an infinitely looping block
@@ -1270,7 +1276,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
do {
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
- if (isa<TerminatorInst>(I1))
+ if (I1->isTerminator())
goto HoistTerminator;
// If we're going to hoist a call, make sure that the two instructions we're
@@ -1315,8 +1321,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
LLVMContext::MD_align,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_mem_parallel_loop_access};
- combineMetadata(I1, I2, KnownIDs);
+ LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group};
+ combineMetadata(I1, I2, KnownIDs, true);
// I1 and I2 are being combined into a single instruction. Its debug
// location is the merged locations of the original instructions.
@@ -1375,7 +1382,13 @@ HoistTerminator:
NT->takeName(I1);
}
+ // Ensure terminator gets a debug location, even an unknown one, in case
+ // it involves inlinable calls.
+ NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+
+ // PHIs created below will adopt NT's merged DebugLoc.
IRBuilder<NoFolder> Builder(NT);
+
// Hoisting one of the terminators from our successor is a great thing.
// Unfortunately, the successors of the if/else blocks may have PHI nodes in
// them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
@@ -1407,7 +1420,7 @@ HoistTerminator:
for (BasicBlock *Succ : successors(BB1))
AddPredecessorToBlock(Succ, BIParent, BB1);
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
return true;
}
@@ -1582,7 +1595,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
// However, as N-way merge for CallInst is rare, so we use simplified API
// instead of using complex API for N-way merge.
I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
- combineMetadataForCSE(I0, I);
+ combineMetadataForCSE(I0, I, true);
I0->andIRFlags(I);
}
@@ -1940,11 +1953,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
}
assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
- // Keep a count of how many times instructions are used within CondBB when
- // they are candidates for sinking into CondBB. Specifically:
+ // Keep a count of how many times instructions are used within ThenBB when
+ // they are candidates for sinking into ThenBB. Specifically:
// - They are defined in BB, and
// - They have no side effects, and
- // - All of their uses are in CondBB.
+ // - All of their uses are in ThenBB.
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
@@ -1994,14 +2007,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
}
}
- // Consider any sink candidates which are only used in CondBB as costs for
+ // Consider any sink candidates which are only used in ThenBB as costs for
// speculation. Note, while we iterate over a DenseMap here, we are summing
// and so iteration order isn't significant.
for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
I = SinkCandidateUseCounts.begin(),
E = SinkCandidateUseCounts.end();
I != E; ++I)
- if (I->first->getNumUses() == I->second) {
+ if (I->first->hasNUses(I->second)) {
++SpeculationCost;
if (SpeculationCost > 1)
return false;
@@ -2241,7 +2254,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
// Loop over all of the edges from PredBB to BB, changing them to branch
// to EdgeBB instead.
- TerminatorInst *PredBBTI = PredBB->getTerminator();
+ Instruction *PredBBTI = PredBB->getTerminator();
for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
if (PredBBTI->getSuccessor(i) == BB) {
BB->removePredecessor(PredBB);
@@ -2249,7 +2262,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
}
// Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DL, AC) | true;
+ return FoldCondBranchOnPHI(BI, DL, AC) || true;
}
return false;
@@ -2304,9 +2317,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
continue;
}
- if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
MaxCostVal0, TTI) ||
- !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
MaxCostVal1, TTI))
return false;
}
@@ -2336,8 +2349,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
IfBlock1 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock1);
- for (BasicBlock::iterator I = IfBlock1->begin(); !isa<TerminatorInst>(I);
- ++I)
+ for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I)
if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
@@ -2350,8 +2362,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
IfBlock2 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock2);
- for (BasicBlock::iterator I = IfBlock2->begin(); !isa<TerminatorInst>(I);
- ++I)
+ for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I)
if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
@@ -2371,20 +2382,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
- if (IfBlock1) {
- for (auto &I : *IfBlock1)
- I.dropUnknownNonDebugMetadata();
- DomBlock->getInstList().splice(InsertPt->getIterator(),
- IfBlock1->getInstList(), IfBlock1->begin(),
- IfBlock1->getTerminator()->getIterator());
- }
- if (IfBlock2) {
- for (auto &I : *IfBlock2)
- I.dropUnknownNonDebugMetadata();
- DomBlock->getInstList().splice(InsertPt->getIterator(),
- IfBlock2->getInstList(), IfBlock2->begin(),
- IfBlock2->getTerminator()->getIterator());
- }
+ if (IfBlock1)
+ hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1);
+ if (IfBlock2)
+ hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
@@ -2400,7 +2401,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
// has been flattened. Change DomBlock to jump directly to our new block to
// avoid other simplifycfg's kicking in on the diamond.
- TerminatorInst *OldTI = DomBlock->getTerminator();
+ Instruction *OldTI = DomBlock->getTerminator();
Builder.SetInsertPoint(OldTI);
Builder.CreateBr(BB);
OldTI->eraseFromParent();
@@ -2434,7 +2435,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
TrueSucc->removePredecessor(BI->getParent());
FalseSucc->removePredecessor(BI->getParent());
Builder.CreateRetVoid();
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
return true;
}
@@ -2490,7 +2491,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
<< "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: "
<< *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
return true;
}
@@ -2541,6 +2542,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();
+ const unsigned PredCount = pred_size(BB);
+
Instruction *Cond = nullptr;
if (BI->isConditional())
Cond = dyn_cast<Instruction>(BI->getCondition());
@@ -2590,7 +2593,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// too many instructions and these involved instructions can be executed
// unconditionally. We denote all involved instructions except the condition
// as "bonus instructions", and only allow this transformation when the
- // number of the bonus instructions does not exceed a certain threshold.
+ // number of the bonus instructions we'll need to create when cloning into
+ // each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
for (auto I = BB->begin(); Cond != &*I; ++I) {
// Ignore dbg intrinsics.
@@ -2605,7 +2609,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// I is used in the same BB. Since BI uses Cond and doesn't have more slots
// to use any other instruction, User must be an instruction between next(I)
// and Cond.
- ++NumBonusInsts;
+
+ // Account for the cost of duplicating this instruction into each
+ // predecessor.
+ NumBonusInsts += PredCount;
// Early exits once we reach the limit.
if (NumBonusInsts > BonusInstThreshold)
return false;
@@ -2711,16 +2718,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// Clone Cond into the predecessor basic block, and or/and the
// two conditions together.
- Instruction *New = Cond->clone();
- RemapInstruction(New, VMap,
+ Instruction *CondInPred = Cond->clone();
+ RemapInstruction(CondInPred, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- PredBlock->getInstList().insert(PBI->getIterator(), New);
- New->takeName(Cond);
- Cond->setName(New->getName() + ".old");
+ PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
+ CondInPred->takeName(Cond);
+ Cond->setName(CondInPred->getName() + ".old");
if (BI->isConditional()) {
Instruction *NewCond = cast<Instruction>(
- Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond"));
+ Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond"));
PBI->setCondition(NewCond);
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
@@ -2784,7 +2791,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
Instruction *NotCond = cast<Instruction>(
Builder.CreateNot(PBI->getCondition(), "not.cond"));
MergedCond = cast<Instruction>(
- Builder.CreateBinOp(Instruction::And, NotCond, New, "and.cond"));
+ Builder.CreateBinOp(Instruction::And, NotCond, CondInPred,
+ "and.cond"));
if (PBI_C->isOne())
MergedCond = cast<Instruction>(Builder.CreateBinOp(
Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
@@ -2793,7 +2801,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
// is false: PBI_Cond and BI_Value
MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::And, PBI->getCondition(), New, "and.cond"));
+ Instruction::And, PBI->getCondition(), CondInPred, "and.cond"));
if (PBI_C->isOne()) {
Instruction *NotCond = cast<Instruction>(
Builder.CreateNot(PBI->getCondition(), "not.cond"));
@@ -2807,7 +2815,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
}
// Change PBI from Conditional to Unconditional.
BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
- EraseTerminatorInstAndDCECond(PBI);
+ EraseTerminatorAndDCECond(PBI);
PBI = New_PBI;
}
@@ -2873,7 +2881,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
if (!AlternativeV)
break;
- assert(pred_size(Succ) == 2);
+ assert(Succ->hasNPredecessors(2));
auto PredI = pred_begin(Succ);
BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
@@ -2922,7 +2930,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
isa<StoreInst>(I))
++N;
// Free instructions.
- else if (isa<TerminatorInst>(I) || IsaBitcastOfPointerType(I))
+ else if (I.isTerminator() || IsaBitcastOfPointerType(I))
continue;
else
return false;
@@ -3402,7 +3410,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Takes care of updating the successors and removing the old terminator.
// Also makes sure not to introduce new successors by assuming that edges to
// non-successor TrueBBs and FalseBBs aren't reachable.
-static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
BasicBlock *TrueBB, BasicBlock *FalseBB,
uint32_t TrueWeight,
uint32_t FalseWeight) {
@@ -3414,7 +3422,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
// Then remove the rest.
- for (BasicBlock *Succ : OldTerm->successors()) {
+ for (BasicBlock *Succ : successors(OldTerm)) {
// Make sure only to keep exactly one copy of each edge.
if (Succ == KeepEdge1)
KeepEdge1 = nullptr;
@@ -3457,7 +3465,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
Builder.CreateBr(FalseBB);
}
- EraseTerminatorInstAndDCECond(OldTerm);
+ EraseTerminatorAndDCECond(OldTerm);
return true;
}
@@ -3534,9 +3542,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
///
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
-static bool tryToSimplifyUncondBranchWithICmpInIt(
- ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
- const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) {
+bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
+ ICmpInst *ICI, IRBuilder<> &Builder) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -3571,7 +3578,7 @@ static bool tryToSimplifyUncondBranchWithICmpInIt(
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -3587,7 +3594,7 @@ static bool tryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -3701,7 +3708,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
BasicBlock *NewBB =
BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
// Remove the uncond branch added to the old block.
- TerminatorInst *OldTI = BB->getTerminator();
+ Instruction *OldTI = BB->getTerminator();
Builder.SetInsertPoint(OldTI);
if (TrueWhenEqual)
@@ -3745,7 +3752,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
}
// Erase the old branch instruction.
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
return true;
@@ -3861,9 +3868,9 @@ bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
}
// The landingpad is now unreachable. Zap it.
- BB->eraseFromParent();
if (LoopHeaders)
LoopHeaders->erase(BB);
+ BB->eraseFromParent();
return true;
}
@@ -3993,7 +4000,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
if (UnwindDest == nullptr) {
removeUnwindEdge(PredBB);
} else {
- TerminatorInst *TI = PredBB->getTerminator();
+ Instruction *TI = PredBB->getTerminator();
TI->replaceUsesOfWith(BB, UnwindDest);
}
}
@@ -4062,7 +4069,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
SmallVector<BranchInst *, 8> CondBranchPreds;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
- TerminatorInst *PTI = P->getTerminator();
+ Instruction *PTI = P->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
if (BI->isUnconditional())
UncondBranchPreds.push_back(P);
@@ -4083,9 +4090,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
// If we eliminated all predecessors of the block, delete the block now.
if (pred_empty(BB)) {
// We know there are no successors, so just nuke the block.
- BB->eraseFromParent();
if (LoopHeaders)
LoopHeaders->erase(BB);
+ BB->eraseFromParent();
}
return true;
@@ -4167,7 +4174,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- TerminatorInst *TI = Preds[i]->getTerminator();
+ Instruction *TI = Preds[i]->getTerminator();
IRBuilder<> Builder(TI);
if (auto *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isUnconditional()) {
@@ -4179,10 +4186,10 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
} else {
if (BI->getSuccessor(0) == BB) {
Builder.CreateBr(BI->getSuccessor(1));
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
} else if (BI->getSuccessor(1) == BB) {
Builder.CreateBr(BI->getSuccessor(0));
- EraseTerminatorInstAndDCECond(BI);
+ EraseTerminatorAndDCECond(BI);
Changed = true;
}
}
@@ -4245,9 +4252,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// If this block is now dead, remove it.
if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
- BB->eraseFromParent();
if (LoopHeaders)
LoopHeaders->erase(BB);
+ BB->eraseFromParent();
return true;
}
@@ -4424,7 +4431,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
SplitBlock(&*NewDefault, &NewDefault->front());
auto *OldTI = NewDefault->getTerminator();
new UnreachableInst(SI->getContext(), OldTI);
- EraseTerminatorInstAndDCECond(OldTI);
+ EraseTerminatorAndDCECond(OldTI);
return true;
}
@@ -4635,12 +4642,12 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
SmallDenseMap<Value *, Constant *> ConstantPool;
ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
- if (TerminatorInst *T = dyn_cast<TerminatorInst>(&I)) {
+ if (I.isTerminator()) {
// If the terminator is a simple branch, continue to the next block.
- if (T->getNumSuccessors() != 1 || T->isExceptional())
+ if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
return false;
Pred = CaseDest;
- CaseDest = T->getSuccessor(0);
+ CaseDest = I.getSuccessor(0);
} else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
// Instruction is side-effect free and constant.
@@ -5031,6 +5038,9 @@ SwitchLookupTable::SwitchLookupTable(
GlobalVariable::PrivateLinkage, Initializer,
"switch.table." + FuncName);
Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ // Set the alignment to that of an array items. We will be only loading one
+ // value out of it.
+ Array->setAlignment(DL.getPrefTypeAlignment(ValueType));
Kind = ArrayKind;
}
@@ -5257,7 +5267,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Figure out the corresponding result for each case value and phi node in the
// common destination, as well as the min and max case values.
- assert(SI->case_begin() != SI->case_end());
+ assert(!empty(SI->cases()));
SwitchInst::CaseIt CI = SI->case_begin();
ConstantInt *MinCaseVal = CI->getCaseValue();
ConstantInt *MaxCaseVal = CI->getCaseValue();
@@ -5509,7 +5519,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
SmallVector<int64_t,4> Values;
for (auto &C : SI->cases())
Values.push_back(C.getCaseValue()->getValue().getSExtValue());
- llvm::sort(Values.begin(), Values.end());
+ llvm::sort(Values);
// If the switch is already dense, there's nothing useful to do here.
if (isSwitchDense(Values))
@@ -5583,33 +5593,33 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// If the block only contains the switch, see if we can fold the block
// away into any preds.
if (SI == &*BB->instructionsWithoutDebug().begin())
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// Remove unreachable cases.
if (eliminateDeadSwitchCases(SI, Options.AC, DL))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
if (switchToSelect(SI, Builder, DL, TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// The conversion from switch to lookup tables results in difficult-to-analyze
// code and makes pruning branches much harder. This is a problem if the
@@ -5618,10 +5628,10 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
SwitchToLookupTable(SI, Builder, DL, TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
if (ReduceSwitchRange(SI, Builder, DL, TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
return false;
}
@@ -5646,20 +5656,20 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (IBI->getNumDestinations() == 0) {
// If the indirectbr has no successors, change it to unreachable.
new UnreachableInst(IBI->getContext(), IBI);
- EraseTerminatorInstAndDCECond(IBI);
+ EraseTerminatorAndDCECond(IBI);
return true;
}
if (IBI->getNumDestinations() == 1) {
// If the indirectbr has one successor, change it to a direct branch.
BranchInst::Create(IBI->getDestination(0), IBI);
- EraseTerminatorInstAndDCECond(IBI);
+ EraseTerminatorAndDCECond(IBI);
return true;
}
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
return Changed;
}
@@ -5755,7 +5765,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// backedge, so we can eliminate BB.
bool NeedCanonicalLoop =
Options.NeedCanonicalLoop &&
- (LoopHeaders && pred_size(BB) > 1 &&
+ (LoopHeaders && BB->hasNPredecessorsOrMore(2) &&
(LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
@@ -5769,7 +5779,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
if (I->isTerminator() &&
- tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, Options))
+ tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
return true;
}
@@ -5787,7 +5797,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
return false;
}
@@ -5815,18 +5825,18 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
auto I = BB->instructionsWithoutDebug().begin();
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
} else if (&*I == cast<Instruction>(BI->getCondition())) {
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
}
@@ -5834,35 +5844,24 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SimplifyBranchOnICmpChain(BI, Builder, DL))
return true;
- // If this basic block has a single dominating predecessor block and the
- // dominating block's condition implies BI's condition, we know the direction
- // of the BI branch.
- if (BasicBlock *Dom = BB->getSinglePredecessor()) {
- auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
- if (PBI && PBI->isConditional() &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
- assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);
- bool CondIsTrue = PBI->getSuccessor(0) == BB;
- Optional<bool> Implication = isImpliedCondition(
- PBI->getCondition(), BI->getCondition(), DL, CondIsTrue);
- if (Implication) {
- // Turn this into a branch on constant.
- auto *OldCond = BI->getCondition();
- ConstantInt *CI = *Implication
- ? ConstantInt::getTrue(BB->getContext())
- : ConstantInt::getFalse(BB->getContext());
- BI->setCondition(CI);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- return simplifyCFG(BB, TTI, Options) | true;
- }
- }
+ // If this basic block has dominating predecessor blocks and the dominating
+ // blocks' conditions imply BI's condition, we know the direction of BI.
+ Optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
+ if (Imp) {
+ // Turn this into a branch on constant.
+ auto *OldCond = BI->getCondition();
+ ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
+ : ConstantInt::getFalse(BB->getContext());
+ BI->setCondition(TorF);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ return requestResimplify();
}
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -5871,24 +5870,24 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
if (HoistThenElseCodeToIf(BI, TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
- TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
// execute Successor #1 if it branches to Successor #0.
- TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
}
// If this is a branch on a phi node in the current block, thread control
@@ -5896,14 +5895,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, DL, Options.AC))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
// Look for diamond patterns.
if (MergeCondStores)
@@ -5911,7 +5910,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (mergeConditionalStores(PBI, BI, DL))
- return simplifyCFG(BB, TTI, Options) | true;
+ return requestResimplify();
return false;
}
@@ -5974,7 +5973,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
for (PHINode &PHI : BB->phis())
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
- TerminatorInst *T = PHI.getIncomingBlock(i)->getTerminator();
+ Instruction *T = PHI.getIncomingBlock(i)->getTerminator();
IRBuilder<> Builder(T);
if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
BB->removePredecessor(PHI.getIncomingBlock(i));
@@ -5994,7 +5993,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
return false;
}
-bool SimplifyCFGOpt::run(BasicBlock *BB) {
+bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
bool Changed = false;
assert(BB && BB->getParent() && "Block not embedded in function!");
@@ -6068,6 +6067,21 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
return Changed;
}
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+
+ // Repeated simplify BB as long as resimplification is requested.
+ do {
+ Resimplify = false;
+
+ // Perform one round of simplifcation. Resimplify flag will be set if
+ // another iteration is requested.
+ Changed |= simplifyOnce(BB);
+ } while (Resimplify);
+
+ return Changed;
+}
+
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
const SimplifyCFGOptions &Options,
SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 65b23f4d94a1..7faf291e73d9 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -106,8 +106,9 @@ namespace {
/// Otherwise return null.
Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
Value *IVSrc = nullptr;
- unsigned OperIdx = 0;
+ const unsigned OperIdx = 0;
const SCEV *FoldedExpr = nullptr;
+ bool MustDropExactFlag = false;
switch (UseInst->getOpcode()) {
default:
return nullptr;
@@ -140,6 +141,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
APInt::getOneBitSet(BitWidth, D->getZExtValue()));
}
FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ // We might have 'exact' flag set at this point which will no longer be
+ // correct after we make the replacement.
+ if (UseInst->isExact() &&
+ SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D)))
+ MustDropExactFlag = true;
}
// We have something that might fold it's operand. Compare SCEVs.
if (!SE->isSCEVable(UseInst->getType()))
@@ -155,6 +161,9 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
UseInst->setOperand(OperIdx, IVSrc);
assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+ if (MustDropExactFlag)
+ UseInst->dropPoisonGeneratingFlags();
+
++NumElimOperand;
Changed = true;
if (IVOperand->use_empty())
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 15e035874002..1bb26caa2af2 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
@@ -22,6 +23,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -150,6 +152,32 @@ static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B,
return true;
}
+static bool isOnlyUsedInComparisonWithZero(Value *V) {
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
+ const DataLayout &DL) {
+ if (!isOnlyUsedInComparisonWithZero(CI))
+ return false;
+
+ if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL))
+ return false;
+
+ if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
+ return false;
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -322,6 +350,21 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
B, DL, TLI);
}
+ // strcmp to memcmp
+ if (!HasStr1 && HasStr2) {
+ if (canTransformToMemCmp(CI, Str1P, Len2, DL))
+ return emitMemCmp(
+ Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
+ TLI);
+ } else if (HasStr1 && !HasStr2) {
+ if (canTransformToMemCmp(CI, Str2P, Len1, DL))
+ return emitMemCmp(
+ Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
+ TLI);
+ }
+
return nullptr;
}
@@ -361,6 +404,26 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+
+ // strncmp to memcmp
+ if (!HasStr1 && HasStr2) {
+ Len2 = std::min(Len2, Length);
+ if (canTransformToMemCmp(CI, Str1P, Len2, DL))
+ return emitMemCmp(
+ Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
+ TLI);
+ } else if (HasStr1 && !HasStr2) {
+ Len1 = std::min(Len1, Length);
+ if (canTransformToMemCmp(CI, Str2P, Len1, DL))
+ return emitMemCmp(
+ Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
+ TLI);
+ }
+
return nullptr;
}
@@ -735,8 +798,11 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
Bitfield.setBit((unsigned char)C);
Value *BitfieldC = B.getInt(Bitfield);
- // First check that the bit field access is within bounds.
+ // Adjust width of "C" to the bitfield width, then mask off the high bits.
Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
+ C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
+
+ // First check that the bit field access is within bounds.
Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
"memchr.bounds");
@@ -860,8 +926,7 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
}
/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
-static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
- const TargetLibraryInfo &TLI) {
+Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
// This has to be a memset of zeros (bzero).
auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
if (!FillValue || FillValue->getZExtValue() != 0)
@@ -881,7 +946,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
return nullptr;
LibFunc Func;
- if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
Func != LibFunc_malloc)
return nullptr;
@@ -896,18 +961,18 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
Malloc->getArgOperand(0), Malloc->getAttributes(),
- B, TLI);
+ B, *TLI);
if (!Calloc)
return nullptr;
Malloc->replaceAllUsesWith(Calloc);
- Malloc->eraseFromParent();
+ eraseFromParent(Malloc);
return Calloc;
}
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
- if (auto *Calloc = foldMallocMemset(CI, B, *TLI))
+ if (auto *Calloc = foldMallocMemset(CI, B))
return Calloc;
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
@@ -927,6 +992,20 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) {
// Math Library Optimizations
//===----------------------------------------------------------------------===//
+// Replace a libcall \p CI with a call to intrinsic \p IID
+static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ Module *M = CI->getModule();
+ Value *V = CI->getArgOperand(0);
+ Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
+ CallInst *NewCall = B.CreateCall(F, V);
+ NewCall->takeName(CI);
+ return NewCall;
+}
+
/// Return a variant of Val with float type.
/// Currently this works in two cases: If Val is an FPExtension of a float
/// value to something bigger, simply return the operand.
@@ -949,104 +1028,75 @@ static Value *valueHasFloatPrecision(Value *Val) {
return nullptr;
}
-/// Shrink double -> float for unary functions like 'floor'.
-static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
- bool CheckRetType) {
- Function *Callee = CI->getCalledFunction();
- // We know this libcall has a valid prototype, but we don't know which.
+/// Shrink double -> float functions.
+static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
+ bool isBinary, bool isPrecise = false) {
if (!CI->getType()->isDoubleTy())
return nullptr;
- if (CheckRetType) {
- // Check if all the uses for function like 'sin' are converted to float.
+ // If not all the uses of the function are converted to float, then bail out.
+ // This matters if the precision of the result is more important than the
+ // precision of the arguments.
+ if (isPrecise)
for (User *U : CI->users()) {
FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
if (!Cast || !Cast->getType()->isFloatTy())
return nullptr;
}
- }
- // If this is something like 'floor((double)floatval)', convert to floorf.
- Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
- if (V == nullptr)
+ // If this is something like 'g((double) float)', convert to 'gf(float)'.
+ Value *V[2];
+ V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
+ V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
+ if (!V[0] || (isBinary && !V[1]))
return nullptr;
// If call isn't an intrinsic, check that it isn't within a function with the
- // same name as the float version of this call.
+ // same name as the float version of this call, otherwise the result is an
+ // infinite loop. For example, from MinGW-w64:
//
- // e.g. inline float expf(float val) { return (float) exp((double) val); }
- //
- // A similar such definition exists in the MinGW-w64 math.h header file which
- // when compiled with -O2 -ffast-math causes the generation of infinite loops
- // where expf is called.
- if (!Callee->isIntrinsic()) {
- const Function *F = CI->getFunction();
- StringRef FName = F->getName();
- StringRef CalleeName = Callee->getName();
- if ((FName.size() == (CalleeName.size() + 1)) &&
- (FName.back() == 'f') &&
- FName.startswith(CalleeName))
+ // float expf(float val) { return (float) exp((double) val); }
+ Function *CalleeFn = CI->getCalledFunction();
+ StringRef CalleeNm = CalleeFn->getName();
+ AttributeList CalleeAt = CalleeFn->getAttributes();
+ if (CalleeFn && !CalleeFn->isIntrinsic()) {
+ const Function *Fn = CI->getFunction();
+ StringRef FnName = Fn->getName();
+ if (FnName.back() == 'f' &&
+ FnName.size() == (CalleeNm.size() + 1) &&
+ FnName.startswith(CalleeNm))
return nullptr;
}
- // Propagate fast-math flags from the existing call to the new call.
+ // Propagate the math semantics from the current function to the new function.
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
- // floor((double)floatval) -> (double)floorf(floatval)
- if (Callee->isIntrinsic()) {
+ // g((double) float) -> (double) gf(float)
+ Value *R;
+ if (CalleeFn->isIntrinsic()) {
Module *M = CI->getModule();
- Intrinsic::ID IID = Callee->getIntrinsicID();
- Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
- V = B.CreateCall(F, V);
- } else {
- // The call is a library call rather than an intrinsic.
- V = emitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ Intrinsic::ID IID = CalleeFn->getIntrinsicID();
+ Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
+ R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
}
+ else
+ R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt)
+ : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt);
- return B.CreateFPExt(V, B.getDoubleTy());
+ return B.CreateFPExt(R, B.getDoubleTy());
}
-// Replace a libcall \p CI with a call to intrinsic \p IID
-static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
- // Propagate fast-math flags from the existing call to the new call.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- Module *M = CI->getModule();
- Value *V = CI->getArgOperand(0);
- Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
- CallInst *NewCall = B.CreateCall(F, V);
- NewCall->takeName(CI);
- return NewCall;
+/// Shrink double -> float for unary functions.
+static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+ bool isPrecise = false) {
+ return optimizeDoubleFP(CI, B, false, isPrecise);
}
-/// Shrink double -> float for binary functions like 'fmin/fmax'.
-static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- // We know this libcall has a valid prototype, but we don't know which.
- if (!CI->getType()->isDoubleTy())
- return nullptr;
-
- // If this is something like 'fmin((double)floatval1, (double)floatval2)',
- // or fmin(1.0, (double)floatval), then we convert it to fminf.
- Value *V1 = valueHasFloatPrecision(CI->getArgOperand(0));
- if (V1 == nullptr)
- return nullptr;
- Value *V2 = valueHasFloatPrecision(CI->getArgOperand(1));
- if (V2 == nullptr)
- return nullptr;
-
- // Propagate fast-math flags from the existing call to the new call.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- // fmin((double)floatval1, (double)floatval2)
- // -> (double)fminf(floatval1, floatval2)
- // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
- Value *V = emitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
- Callee->getAttributes());
- return B.CreateFPExt(V, B.getDoubleTy());
+/// Shrink double -> float for binary functions.
+static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+ bool isPrecise = false) {
+ return optimizeDoubleFP(CI, B, true, isPrecise);
}
// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
@@ -1078,20 +1128,39 @@ Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
}
-Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
- StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
-
- // cos(-x) -> cos(x)
- Value *Op1 = CI->getArgOperand(0);
- if (BinaryOperator::isFNeg(Op1)) {
- BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
- return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
+ IRBuilder<> &B) {
+ if (!isa<FPMathOperator>(Call))
+ return nullptr;
+
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(Call->getFastMathFlags());
+
+ // TODO: Can this be shared to also handle LLVM intrinsics?
+ Value *X;
+ switch (Func) {
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ // sin(-X) --> -sin(X)
+ // tan(-X) --> -tan(X)
+ if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
+ return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X));
+ break;
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
+ // cos(-X) --> cos(X)
+ if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
+ return B.CreateCall(Call->getCalledFunction(), X, "cos");
+ break;
+ default:
+ break;
}
- return Ret;
+ return nullptr;
}
static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
@@ -1119,37 +1188,175 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
return InnerChain[Exp];
}
-/// Use square root in place of pow(x, +/-0.5).
-Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
- // TODO: There is some subset of 'fast' under which these transforms should
- // be allowed.
- if (!Pow->isFast())
- return nullptr;
-
- Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
+/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x).
+Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
+ Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+ Module *Mod = Pow->getModule();
Type *Ty = Pow->getType();
+ bool Ignored;
- const APFloat *ExpoF;
- if (!match(Expo, m_APFloat(ExpoF)) ||
- (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
+ // Evaluate special cases related to a nested function as the base.
+
+ // pow(exp(x), y) -> exp(x * y)
+ // pow(exp2(x), y) -> exp2(x * y)
+ // If exp{,2}() is used only once, it is better to fold two transcendental
+ // math functions into one. If used again, exp{,2}() would still have to be
+ // called with the original argument, then keep both original transcendental
+ // functions. However, this transformation is only safe with fully relaxed
+ // math semantics, since, besides rounding differences, it changes overflow
+ // and underflow behavior quite dramatically. For example:
+ // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
+ // Whereas:
+ // exp(1000 * 0.001) = exp(1)
+ // TODO: Loosen the requirement for fully relaxed math semantics.
+ // TODO: Handle exp10() when more targets have it available.
+ CallInst *BaseFn = dyn_cast<CallInst>(Base);
+ if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
+ LibFunc LibFn;
+
+ Function *CalleeFn = BaseFn->getCalledFunction();
+ if (CalleeFn &&
+ TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) {
+ StringRef ExpName;
+ Intrinsic::ID ID;
+ Value *ExpFn;
+ LibFunc LibFnFloat;
+ LibFunc LibFnDouble;
+ LibFunc LibFnLongDouble;
+
+ switch (LibFn) {
+ default:
+ return nullptr;
+ case LibFunc_expf: case LibFunc_exp: case LibFunc_expl:
+ ExpName = TLI->getName(LibFunc_exp);
+ ID = Intrinsic::exp;
+ LibFnFloat = LibFunc_expf;
+ LibFnDouble = LibFunc_exp;
+ LibFnLongDouble = LibFunc_expl;
+ break;
+ case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l:
+ ExpName = TLI->getName(LibFunc_exp2);
+ ID = Intrinsic::exp2;
+ LibFnFloat = LibFunc_exp2f;
+ LibFnDouble = LibFunc_exp2;
+ LibFnLongDouble = LibFunc_exp2l;
+ break;
+ }
+
+ // Create new exp{,2}() with the product as its argument.
+ Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
+ ExpFn = BaseFn->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty),
+ FMul, ExpName)
+ : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
+ LibFnLongDouble, B,
+ BaseFn->getAttributes());
+
+ // Since the new exp{,2}() is different from the original one, dead code
+ // elimination cannot be trusted to remove it, since it may have side
+ // effects (e.g., errno). When the only consumer for the original
+ // exp{,2}() is pow(), then it has to be explicitly erased.
+ BaseFn->replaceAllUsesWith(ExpFn);
+ eraseFromParent(BaseFn);
+
+ return ExpFn;
+ }
+ }
+
+ // Evaluate special cases related to a constant base.
+
+ const APFloat *BaseF;
+ if (!match(Pow->getArgOperand(0), m_APFloat(BaseF)))
return nullptr;
+ // pow(2.0 ** n, x) -> exp2(n * x)
+ if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
+ APFloat BaseR = APFloat(1.0);
+ BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
+ BaseR = BaseR / *BaseF;
+ bool IsInteger = BaseF->isInteger(),
+ IsReciprocal = BaseR.isInteger();
+ const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
+ APSInt NI(64, false);
+ if ((IsInteger || IsReciprocal) &&
+ !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
+ NI > 1 && NI.isPowerOf2()) {
+ double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
+ Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
+ if (Pow->doesNotAccessMemory())
+ return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
+ FMul, "exp2");
+ else
+ return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l, B, Attrs);
+ }
+ }
+
+ // pow(10.0, x) -> exp10(x)
+ // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
+ if (match(Base, m_SpecificFP(10.0)) &&
+ hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+ return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
+ LibFunc_exp10l, B, Attrs);
+
+ return nullptr;
+}
+
+static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
+ Module *M, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
// If errno is never set, then use the intrinsic for sqrt().
- if (Pow->hasFnAttr(Attribute::ReadNone)) {
- Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(),
- Intrinsic::sqrt, Ty);
- Sqrt = B.CreateCall(SqrtFn, Base);
+ if (NoErrno) {
+ Function *SqrtFn =
+ Intrinsic::getDeclaration(M, Intrinsic::sqrt, V->getType());
+ return B.CreateCall(SqrtFn, V, "sqrt");
}
+
// Otherwise, use the libcall for sqrt().
- else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+ if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl))
// TODO: We also should check that the target can in fact lower the sqrt()
// libcall. We currently have no way to ask this question, so we ask if
// the target has a sqrt() libcall, which is not exactly the same.
- Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B,
- Pow->getCalledFunction()->getAttributes());
- else
+ return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl, B, Attrs);
+
+ return nullptr;
+}
+
+/// Use square root in place of pow(x, +/-0.5).
+Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
+ Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+ Module *Mod = Pow->getModule();
+ Type *Ty = Pow->getType();
+
+ const APFloat *ExpoF;
+ if (!match(Expo, m_APFloat(ExpoF)) ||
+ (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
return nullptr;
+ Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
+ if (!Sqrt)
+ return nullptr;
+
+ // Handle signed zero base by expanding to fabs(sqrt(x)).
+ if (!Pow->hasNoSignedZeros()) {
+ Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty);
+ Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
+ }
+
+ // Handle non finite base by expanding to
+ // (x == -infinity ? +infinity : sqrt(x)).
+ if (!Pow->hasNoInfs()) {
+ Value *PosInf = ConstantFP::getInfinity(Ty),
+ *NegInf = ConstantFP::getInfinity(Ty, true);
+ Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
+ Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
+ }
+
// If the exponent is negative, then get the reciprocal.
if (ExpoF->isNegative())
Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
@@ -1160,134 +1367,109 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
Function *Callee = Pow->getCalledFunction();
- AttributeList Attrs = Callee->getAttributes();
StringRef Name = Callee->getName();
- Module *Module = Pow->getModule();
Type *Ty = Pow->getType();
Value *Shrunk = nullptr;
bool Ignored;
- if (UnsafeFPShrink &&
- Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
- Shrunk = optimizeUnaryDoubleFP(Pow, B, true);
+ // Bail out if simplifying libcalls to pow() is disabled.
+ if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
+ return nullptr;
// Propagate the math semantics from the call to any created instructions.
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(Pow->getFastMathFlags());
+ // Shrink pow() to powf() if the arguments are single precision,
+ // unless the result is expected to be double precision.
+ if (UnsafeFPShrink &&
+ Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+ Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
+
// Evaluate special cases related to the base.
// pow(1.0, x) -> 1.0
- if (match(Base, m_SpecificFP(1.0)))
+ if (match(Base, m_FPOne()))
return Base;
- // pow(2.0, x) -> exp2(x)
- if (match(Base, m_SpecificFP(2.0))) {
- Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty);
- return B.CreateCall(Exp2, Expo, "exp2");
- }
-
- // pow(10.0, x) -> exp10(x)
- if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base))
- // There's no exp10 intrinsic yet, but, maybe, some day there shall be one.
- if (BaseC->isExactlyValue(10.0) &&
- hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
- return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs);
-
- // pow(exp(x), y) -> exp(x * y)
- // pow(exp2(x), y) -> exp2(x * y)
- // We enable these only with fast-math. Besides rounding differences, the
- // transformation changes overflow and underflow behavior quite dramatically.
- // Example: x = 1000, y = 0.001.
- // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- auto *BaseFn = dyn_cast<CallInst>(Base);
- if (BaseFn && BaseFn->isFast() && Pow->isFast()) {
- LibFunc LibFn;
- Function *CalleeFn = BaseFn->getCalledFunction();
- if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
- (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) {
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(Pow->getFastMathFlags());
-
- Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
- return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B,
- CalleeFn->getAttributes());
- }
- }
+ if (Value *Exp = replacePowWithExp(Pow, B))
+ return Exp;
// Evaluate special cases related to the exponent.
- if (Value *Sqrt = replacePowWithSqrt(Pow, B))
- return Sqrt;
-
- ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo);
- if (!ExpoC)
- return Shrunk;
-
// pow(x, -1.0) -> 1.0 / x
- if (ExpoC->isExactlyValue(-1.0))
+ if (match(Expo, m_SpecificFP(-1.0)))
return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
// pow(x, 0.0) -> 1.0
- if (ExpoC->getValueAPF().isZero())
- return ConstantFP::get(Ty, 1.0);
+ if (match(Expo, m_SpecificFP(0.0)))
+ return ConstantFP::get(Ty, 1.0);
// pow(x, 1.0) -> x
- if (ExpoC->isExactlyValue(1.0))
+ if (match(Expo, m_FPOne()))
return Base;
// pow(x, 2.0) -> x * x
- if (ExpoC->isExactlyValue(2.0))
+ if (match(Expo, m_SpecificFP(2.0)))
return B.CreateFMul(Base, Base, "square");
- // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
- if (ExpoC->isExactlyValue(0.5) &&
- hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) {
- // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
- // This is faster than calling pow(), and still handles -0.0 and
- // negative infinity correctly.
- // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
- Value *PosInf = ConstantFP::getInfinity(Ty);
- Value *NegInf = ConstantFP::getInfinity(Ty, true);
-
- // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is
- // an intrinsic, to match errno semantics.
- Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt),
- B, Attrs);
- Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty);
- Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs");
- Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
- Sqrt = B.CreateSelect(FCmp, PosInf, FAbs);
+ if (Value *Sqrt = replacePowWithSqrt(Pow, B))
return Sqrt;
- }
- // pow(x, n) -> x * x * x * ....
- if (Pow->isFast()) {
- APFloat ExpoA = abs(ExpoC->getValueAPF());
- // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32.
- // This transformation applies to integer exponents only.
- if (!ExpoA.isInteger() ||
- ExpoA.compare
- (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan)
- return nullptr;
+ // pow(x, n) -> x * x * x * ...
+ const APFloat *ExpoF;
+ if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
+ // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
+ // If the exponent is an integer+0.5 we generate a call to sqrt and an
+ // additional fmul.
+ // TODO: This whole transformation should be backend specific (e.g. some
+ // backends might prefer libcalls or the limit for the exponent might
+ // be different) and it should also consider optimizing for size.
+ APFloat LimF(ExpoF->getSemantics(), 33.0),
+ ExpoA(abs(*ExpoF));
+ if (ExpoA.compare(LimF) == APFloat::cmpLessThan) {
+ // This transformation applies to integer or integer+0.5 exponents only.
+ // For integer+0.5, we create a sqrt(Base) call.
+ Value *Sqrt = nullptr;
+ if (!ExpoA.isInteger()) {
+ APFloat Expo2 = ExpoA;
+ // To check if ExpoA is an integer + 0.5, we add it to itself. If there
+ // is no floating point exception and the result is an integer, then
+ // ExpoA == integer + 0.5
+ if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
+ return nullptr;
+
+ if (!Expo2.isInteger())
+ return nullptr;
+
+ Sqrt =
+ getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+ Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
+ }
- // We will memoize intermediate products of the Addition Chain.
- Value *InnerChain[33] = {nullptr};
- InnerChain[1] = Base;
- InnerChain[2] = B.CreateFMul(Base, Base, "square");
+ // We will memoize intermediate products of the Addition Chain.
+ Value *InnerChain[33] = {nullptr};
+ InnerChain[1] = Base;
+ InnerChain[2] = B.CreateFMul(Base, Base, "square");
- // We cannot readily convert a non-double type (like float) to a double.
- // So we first convert it to something which could be converted to double.
- ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
- Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+ // We cannot readily convert a non-double type (like float) to a double.
+ // So we first convert it to something which could be converted to double.
+ ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
+ Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
- // If the exponent is negative, then get the reciprocal.
- if (ExpoC->isNegative())
- FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
- return FMul;
+ // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
+ if (Sqrt)
+ FMul = B.CreateFMul(FMul, Sqrt);
+
+ // If the exponent is negative, then get the reciprocal.
+ if (ExpoF->isNegative())
+ FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
+
+ return FMul;
+ }
}
- return nullptr;
+ return Shrunk;
}
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
@@ -2285,11 +2467,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
if (CI->isStrictFP())
return nullptr;
+ if (Value *V = optimizeTrigReflections(CI, Func, Builder))
+ return V;
+
switch (Func) {
- case LibFunc_cosf:
- case LibFunc_cos:
- case LibFunc_cosl:
- return optimizeCos(CI, Builder);
case LibFunc_sinpif:
case LibFunc_sinpi:
case LibFunc_cospif:
@@ -2344,6 +2525,7 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
case LibFunc_exp:
case LibFunc_exp10:
case LibFunc_expm1:
+ case LibFunc_cos:
case LibFunc_sin:
case LibFunc_sinh:
case LibFunc_tanh:
@@ -2425,7 +2607,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
// If we were able to further simplify, remove the now redundant call.
SimplifiedCI->replaceAllUsesWith(V);
- SimplifiedCI->eraseFromParent();
+ eraseFromParent(SimplifiedCI);
return V;
}
}
@@ -2504,15 +2686,20 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
LibCallSimplifier::LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
OptimizationRemarkEmitter &ORE,
- function_ref<void(Instruction *, Value *)> Replacer)
+ function_ref<void(Instruction *, Value *)> Replacer,
+ function_ref<void(Instruction *)> Eraser)
: FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
- UnsafeFPShrink(false), Replacer(Replacer) {}
+ UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// Indirect through the replacer used in this instance.
Replacer(I, With);
}
+void LibCallSimplifier::eraseFromParent(Instruction *I) {
+ Eraser(I);
+}
+
// TODO:
// Additional cases that we need to add to this file:
//
diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
index f8d758c54983..5db4d2e4df9d 100644
--- a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -181,14 +181,12 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
GVtoClusterMap.member_end()), I));
- llvm::sort(Sets.begin(), Sets.end(),
- [](const SortType &a, const SortType &b) {
- if (a.first == b.first)
- return a.second->getData()->getName() >
- b.second->getData()->getName();
- else
- return a.first > b.first;
- });
+ llvm::sort(Sets, [](const SortType &a, const SortType &b) {
+ if (a.first == b.first)
+ return a.second->getData()->getName() > b.second->getData()->getName();
+ else
+ return a.first > b.first;
+ });
for (auto &I : Sets) {
unsigned CurrentClusterID = BalancinQueue.top().first;
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index afd842f59911..95416de07439 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -26,6 +26,7 @@ using namespace llvm;
void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeAddDiscriminatorsLegacyPassPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
+ initializeCanonicalizeAliasesLegacyPassPass(Registry);
initializeInstNamerPass(Registry);
initializeLCSSAWrapperPassPass(Registry);
initializeLibCallsShrinkWrapLegacyPassPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 5f3d127202ad..9ff18328c219 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -79,6 +79,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <algorithm>
#include <cassert>
#include <cstdlib>
@@ -205,12 +206,12 @@ private:
unsigned Alignment);
};
-class LoadStoreVectorizer : public FunctionPass {
+class LoadStoreVectorizerLegacyPass : public FunctionPass {
public:
static char ID;
- LoadStoreVectorizer() : FunctionPass(ID) {
- initializeLoadStoreVectorizerPass(*PassRegistry::getPassRegistry());
+ LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
+ initializeLoadStoreVectorizerLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -230,30 +231,23 @@ public:
} // end anonymous namespace
-char LoadStoreVectorizer::ID = 0;
+char LoadStoreVectorizerLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,
"Vectorize load and Store instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
+INITIALIZE_PASS_END(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,
"Vectorize load and store instructions", false, false)
Pass *llvm::createLoadStoreVectorizerPass() {
- return new LoadStoreVectorizer();
+ return new LoadStoreVectorizerLegacyPass();
}
-// The real propagateMetadata expects a SmallVector<Value*>, but we deal in
-// vectors of Instructions.
-static void propagateMetadata(Instruction *I, ArrayRef<Instruction *> IL) {
- SmallVector<Value *, 8> VL(IL.begin(), IL.end());
- propagateMetadata(I, VL);
-}
-
-bool LoadStoreVectorizer::runOnFunction(Function &F) {
+bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {
// Don't vectorize when the attribute NoImplicitFloat is used.
if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;
@@ -268,6 +262,30 @@ bool LoadStoreVectorizer::runOnFunction(Function &F) {
return V.run();
}
+PreservedAnalyses LoadStoreVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
+ // Don't vectorize when the attribute NoImplicitFloat is used.
+ if (F.hasFnAttribute(Attribute::NoImplicitFloat))
+ return PreservedAnalyses::all();
+
+ AliasAnalysis &AA = AM.getResult<AAManager>(F);
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+
+ Vectorizer V(F, AA, DT, SE, TTI);
+ bool Changed = V.run();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return Changed ? PA : PreservedAnalyses::all();
+}
+
+// The real propagateMetadata expects a SmallVector<Value*>, but we deal in
+// vectors of Instructions.
+static void propagateMetadata(Instruction *I, ArrayRef<Instruction *> IL) {
+ SmallVector<Value *, 8> VL(IL.begin(), IL.end());
+ propagateMetadata(I, VL);
+}
+
// Vectorizer Implementation
bool Vectorizer::run() {
bool Changed = false;
@@ -954,11 +972,6 @@ bool Vectorizer::vectorizeStoreChain(
// try again.
unsigned EltSzInBytes = Sz / 8;
unsigned SzInBytes = EltSzInBytes * ChainSize;
- if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
- auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
- vectorizeStoreChain(Chains.second, InstructionsProcessed);
- }
VectorType *VecTy;
VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
@@ -991,14 +1004,23 @@ bool Vectorizer::vectorizeStoreChain(
// If the store is going to be misaligned, don't vectorize it.
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
- if (S0->getPointerAddressSpace() != 0)
- return false;
+ if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
+ auto Chains = splitOddVectorElts(Chain, Sz);
+ return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
+ vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ }
unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
StackAdjustedAlignment,
DL, S0, nullptr, &DT);
- if (NewAlign < StackAdjustedAlignment)
- return false;
+ if (NewAlign != 0)
+ Alignment = NewAlign;
+ }
+
+ if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
+ auto Chains = splitOddVectorElts(Chain, Sz);
+ return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
+ vectorizeStoreChain(Chains.second, InstructionsProcessed);
}
BasicBlock::iterator First, Last;
@@ -1037,13 +1059,11 @@ bool Vectorizer::vectorizeStoreChain(
}
}
- // This cast is safe because Builder.CreateStore() always creates a bona fide
- // StoreInst.
- StoreInst *SI = cast<StoreInst>(
- Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
- VecTy->getPointerTo(AS))));
+ StoreInst *SI = Builder.CreateAlignedStore(
+ Vec,
+ Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)),
+ Alignment);
propagateMetadata(SI, Chain);
- SI->setAlignment(Alignment);
eraseInstructions(Chain);
++NumVectorInstructions;
@@ -1102,12 +1122,6 @@ bool Vectorizer::vectorizeLoadChain(
// try again.
unsigned EltSzInBytes = Sz / 8;
unsigned SzInBytes = EltSzInBytes * ChainSize;
- if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
- auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
- vectorizeLoadChain(Chains.second, InstructionsProcessed);
- }
-
VectorType *VecTy;
VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
if (VecLoadTy)
@@ -1132,18 +1146,27 @@ bool Vectorizer::vectorizeLoadChain(
// If the load is going to be misaligned, don't vectorize it.
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
- if (L0->getPointerAddressSpace() != 0)
- return false;
+ if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
+ auto Chains = splitOddVectorElts(Chain, Sz);
+ return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
+ vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ }
unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
StackAdjustedAlignment,
DL, L0, nullptr, &DT);
- if (NewAlign < StackAdjustedAlignment)
- return false;
+ if (NewAlign != 0)
+ Alignment = NewAlign;
Alignment = NewAlign;
}
+ if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
+ auto Chains = splitOddVectorElts(Chain, Sz);
+ return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
+ vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ }
+
LLVM_DEBUG({
dbgs() << "LSV: Loads to vectorize:\n";
for (Instruction *I : Chain)
@@ -1159,11 +1182,8 @@ bool Vectorizer::vectorizeLoadChain(
Value *Bitcast =
Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
- // This cast is safe because Builder.CreateLoad always creates a bona fide
- // LoadInst.
- LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
+ LoadInst *LI = Builder.CreateAlignedLoad(Bitcast, Alignment);
propagateMetadata(LI, Chain);
- LI->setAlignment(Alignment);
if (VecLoadTy) {
SmallVector<Instruction *, 16> InstrsToErase;
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 697bc1b448d7..b44fe5a52a2f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -80,10 +80,11 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
return false;
}
-LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
+LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
+ bool InterleaveOnlyWhenForced,
OptimizationRemarkEmitter &ORE)
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
- Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
+ Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
// Populate values with existing loop metadata.
@@ -98,19 +99,19 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
// consider the loop to have been already vectorized because there's
// nothing more that we can do.
IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
- LLVM_DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
+ LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
<< "LV: Interleaving disabled by the pass manager\n");
}
-bool LoopVectorizeHints::allowVectorization(Function *F, Loop *L,
- bool AlwaysVectorize) const {
+bool LoopVectorizeHints::allowVectorization(
+ Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
if (getForce() == LoopVectorizeHints::FK_Disabled) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
emitRemarkWithHints();
return false;
}
- if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
+ if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
emitRemarkWithHints();
return false;
@@ -434,7 +435,7 @@ static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
SmallPtrSetImpl<Value *> &AllowedExit) {
- // Reduction and Induction instructions are allowed to have exit users. All
+ // Reductions, Inductions and non-header phis are allowed to have exit users. All
// other instructions must not have external users.
if (!AllowedExit.count(Inst))
// Check that all of the users of the loop are inside the BB.
@@ -516,6 +517,18 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
return false;
}
+ // Check whether we are able to set up outer loop induction.
+ if (!setupOuterLoopInductions()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Not vectorizing: Unsupported outer loop Phi(s).\n");
+ ORE->emit(createMissedAnalysis("UnsupportedPhi")
+ << "Unsupported outer loop Phi(s)");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
return Result;
}
@@ -561,7 +574,8 @@ void LoopVectorizationLegality::addInductionPhi(
// back into the PHI node may have external users.
// We can allow those uses, except if the SCEVs we have for them rely
// on predicates that only hold within the loop, since allowing the exit
- // currently means re-using this SCEV outside the loop.
+ // currently means re-using this SCEV outside the loop (see PR33706 for more
+ // details).
if (PSE.getUnionPredicate().isAlwaysTrue()) {
AllowedExit.insert(Phi);
AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
@@ -570,6 +584,32 @@ void LoopVectorizationLegality::addInductionPhi(
LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
}
+bool LoopVectorizationLegality::setupOuterLoopInductions() {
+ BasicBlock *Header = TheLoop->getHeader();
+
+ // Returns true if a given Phi is a supported induction.
+ auto isSupportedPhi = [&](PHINode &Phi) -> bool {
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
+ ID.getKind() == InductionDescriptor::IK_IntInduction) {
+ addInductionPhi(&Phi, ID, AllowedExit);
+ return true;
+ } else {
+ // Bail out for any Phi in the outer loop header that is not a supported
+ // induction.
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Found unsupported PHI for outer loop vectorization.\n");
+ return false;
+ }
+ };
+
+ if (llvm::all_of(Header->phis(), isSupportedPhi))
+ return true;
+ else
+ return false;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *Header = TheLoop->getHeader();
@@ -597,14 +637,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// can convert it to select during if-conversion. No need to check if
// the PHIs in this block are induction or reduction variables.
if (BB != Header) {
- // Check that this instruction has no outside users or is an
- // identified reduction value with an outside user.
- if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit))
- continue;
- ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi)
- << "value could not be identified as "
- "an induction or reduction variable");
- return false;
+ // Non-header phi nodes that have outside uses can be vectorized. Add
+ // them to the list of allowed exits.
+ // Unsafe cyclic dependencies with header phis are identified during
+ // legalization for reduction, induction and first order
+ // recurrences.
+ continue;
}
// We only allow if-converted PHIs with exactly two incoming values.
@@ -625,6 +663,20 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
+ // TODO: Instead of recording the AllowedExit, it would be good to record the
+ // complementary set: NotAllowedExit. These include (but may not be
+ // limited to):
+ // 1. Reduction phis as they represent the one-before-last value, which
+ // is not available when vectorized
+ // 2. Induction phis and increment when SCEV predicates cannot be used
+ // outside the loop - see addInductionPhi
+ // 3. Non-Phis with outside uses when SCEV predicates cannot be used
+ // outside the loop - see call to hasOutsideLoopUser in the non-phi
+ // handling below
+ // 4. FirstOrderRecurrence phis that can possibly be handled by
+ // extraction.
+ // By recording these, we can then reason about ways to vectorize each
+ // of these NotAllowedExit.
InductionDescriptor ID;
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
addInductionPhi(Phi, ID, AllowedExit);
@@ -662,10 +714,30 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
!isa<DbgInfoIntrinsic>(CI) &&
!(CI->getCalledFunction() && TLI &&
TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
- ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
- << "call instruction cannot be vectorized");
+ // If the call is a recognized math libary call, it is likely that
+ // we can vectorize it given loosened floating-point constraints.
+ LibFunc Func;
+ bool IsMathLibCall =
+ TLI && CI->getCalledFunction() &&
+ CI->getType()->isFloatingPointTy() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->hasOptimizedCodeGen(Func);
+
+ if (IsMathLibCall) {
+ // TODO: Ideally, we should not use clang-specific language here,
+ // but it's hard to provide meaningful yet generic advice.
+ // Also, should this be guarded by allowExtraAnalysis() and/or be part
+ // of the returned info from isFunctionVectorizable()?
+ ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
+ << "library call cannot be vectorized. "
+ "Try compiling with -fno-math-errno, -ffast-math, "
+ "or similar flags");
+ } else {
+ ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
+ << "call instruction cannot be vectorized");
+ }
LLVM_DEBUG(
- dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
+ dbgs() << "LV: Found a non-intrinsic callsite.\n");
return false;
}
@@ -717,6 +789,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
+ // We can safely vectorize loops where instructions within the loop are
+ // used outside the loop only if the SCEV predicates within the loop is
+ // same as outside the loop. Allowing the exit means reusing the SCEV
+ // outside the loop.
+ if (PSE.getUnionPredicate().isAlwaysTrue()) {
+ AllowedExit.insert(&I);
+ continue;
+ }
ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
<< "value cannot be used outside the loop");
return false;
@@ -730,6 +810,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
ORE->emit(createMissedAnalysis("NoInductionVariable")
<< "loop induction variable could not be identified");
return false;
+ } else if (!WidestIndTy) {
+ ORE->emit(createMissedAnalysis("NoIntegerInductionVariable")
+ << "integer loop induction variable could not be identified");
+ return false;
}
}
@@ -754,13 +838,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (!LAI->canVectorizeMemory())
return false;
- if (LAI->hasStoreToLoopInvariantAddress()) {
+ if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
- << "write to a loop invariant address could not be vectorized");
- LLVM_DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+ << "write to a loop invariant address could not "
+ "be vectorized");
+ LLVM_DEBUG(
+ dbgs() << "LV: Non vectorizable stores to a uniform address\n");
return false;
}
-
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
PSE.addPredicate(LAI->getPSE().getUnionPredicate());
@@ -1069,4 +1154,59 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return Result;
}
+bool LoopVectorizationLegality::canFoldTailByMasking() {
+
+ LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
+
+ if (!PrimaryInduction) {
+ ORE->emit(createMissedAnalysis("NoPrimaryInduction")
+ << "Missing a primary induction variable in the loop, which is "
+ << "needed in order to fold tail by masking as required.");
+ LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
+ << "masking.\n");
+ return false;
+ }
+
+ // TODO: handle reductions when tail is folded by masking.
+ if (!Reductions.empty()) {
+ ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
+ << "Cannot fold tail by masking in the presence of reductions.");
+ LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
+ << "masking.\n");
+ return false;
+ }
+
+ // TODO: handle outside users when tail is folded by masking.
+ for (auto *AE : AllowedExit) {
+ // Check that all users of allowed exit values are inside the loop.
+ for (User *U : AE->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (TheLoop->contains(UI))
+ continue;
+ ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
+ << "Cannot fold tail by masking in the presence of live outs.");
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
+ << "outside user for : " << *UI << '\n');
+ return false;
+ }
+ }
+
+ // The list of pointers that we can safely read and write to remains empty.
+ SmallPtrSet<Value *, 8> SafePointers;
+
+ // Check and mark all blocks for predication, including those that ordinarily
+ // do not need predication such as the header block.
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ if (!blockCanBePredicated(BB, SafePointers)) {
+ ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
+ << "control flow cannot be substituted for a select");
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
+ return true;
+}
+
} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1c7d0a63a5ca..c45dee590b84 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -58,6 +58,7 @@
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
#include "VPlanHCFGBuilder.h"
+#include "VPlanHCFGTransforms.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -151,6 +152,16 @@ using namespace llvm;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
+/// @{
+/// Metadata attribute names
+static const char *const LLVMLoopVectorizeFollowupAll =
+ "llvm.loop.vectorize.followup_all";
+static const char *const LLVMLoopVectorizeFollowupVectorized =
+ "llvm.loop.vectorize.followup_vectorized";
+static const char *const LLVMLoopVectorizeFollowupEpilogue =
+ "llvm.loop.vectorize.followup_epilogue";
+/// @}
+
STATISTIC(LoopsVectorized, "Number of loops vectorized");
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
@@ -171,11 +182,11 @@ static cl::opt<bool> EnableInterleavedMemAccesses(
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
-/// Maximum factor for an interleaved memory access.
-static cl::opt<unsigned> MaxInterleaveGroupFactor(
- "max-interleave-group-factor", cl::Hidden,
- cl::desc("Maximum factor for an interleaved access group (default = 8)"),
- cl::init(8));
+/// An interleave-group may need masking if it resides in a block that needs
+/// predication, or in order to mask away gaps.
+static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
+ "enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
+ cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
/// We don't interleave loops with a known constant trip count below this
/// number.
@@ -240,7 +251,7 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
cl::desc("The maximum interleave count to use when interleaving a scalar "
"reduction in a nested loop."));
-static cl::opt<bool> EnableVPlanNativePath(
+cl::opt<bool> EnableVPlanNativePath(
"enable-vplan-native-path", cl::init(false), cl::Hidden,
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));
@@ -265,10 +276,6 @@ static Type *ToVectorTy(Type *Scalar, unsigned VF) {
return VectorType::get(Scalar, VF);
}
-// FIXME: The following helper functions have multiple implementations
-// in the project. They can be effectively organized in a common Load/Store
-// utilities unit.
-
/// A helper function that returns the type of loaded or stored value.
static Type *getMemInstValueType(Value *I) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -278,25 +285,6 @@ static Type *getMemInstValueType(Value *I) {
return cast<StoreInst>(I)->getValueOperand()->getType();
}
-/// A helper function that returns the alignment of load or store instruction.
-static unsigned getMemInstAlignment(Value *I) {
- assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
- "Expected Load or Store instruction");
- if (auto *LI = dyn_cast<LoadInst>(I))
- return LI->getAlignment();
- return cast<StoreInst>(I)->getAlignment();
-}
-
-/// A helper function that returns the address space of the pointer operand of
-/// load or store instruction.
-static unsigned getMemInstAddressSpace(Value *I) {
- assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
- "Expected Load or Store instruction");
- if (auto *LI = dyn_cast<LoadInst>(I))
- return LI->getPointerAddressSpace();
- return cast<StoreInst>(I)->getPointerAddressSpace();
-}
-
/// A helper function that returns true if the given type is irregular. The
/// type is irregular if its allocated size doesn't equal the store size of an
/// element of the corresponding vector type at the given vectorization factor.
@@ -436,8 +424,10 @@ public:
/// Construct the vector value of a scalarized value \p V one lane at a time.
void packScalarIntoVectorValue(Value *V, const VPIteration &Instance);
- /// Try to vectorize the interleaved access group that \p Instr belongs to.
- void vectorizeInterleaveGroup(Instruction *Instr);
+ /// Try to vectorize the interleaved access group that \p Instr belongs to,
+ /// optionally masking the vector operations if \p BlockInMask is non-null.
+ void vectorizeInterleaveGroup(Instruction *Instr,
+ VectorParts *BlockInMask = nullptr);
/// Vectorize Load and Store instructions, optionally masking the vector
/// operations if \p BlockInMask is non-null.
@@ -448,6 +438,9 @@ public:
/// the instruction.
void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
+ /// Fix the non-induction PHIs in the OrigPHIsToFix vector.
+ void fixNonInductionPHIs(void);
+
protected:
friend class LoopVectorizationPlanner;
@@ -584,6 +577,16 @@ protected:
/// Emit bypass checks to check any memory assumptions we may have made.
void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
+ /// Compute the transformed value of Index at offset StartValue using step
+ /// StepValue.
+ /// For integer induction, returns StartValue + Index * StepValue.
+ /// For pointer induction, returns StartValue[Index * StepValue].
+ /// FIXME: The newly created binary instructions should contain nsw/nuw
+ /// flags, which can be found from the original scalar operations.
+ Value *emitTransformedIndex(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
+ const DataLayout &DL,
+ const InductionDescriptor &ID) const;
+
/// Add additional metadata to \p To that was not present on \p Orig.
///
/// Currently this is used to add the noalias annotations based on the
@@ -705,6 +708,10 @@ protected:
// Holds the end values for each induction variable. We save the end values
// so we can later fix-up the external users of the induction variables.
DenseMap<PHINode *, Value *> IVEndValues;
+
+ // Vector of original scalar PHIs whose corresponding widened PHIs need to be
+ // fixed up at the end of vector code generation.
+ SmallVector<PHINode *, 8> OrigPHIsToFix;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -752,8 +759,15 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr)) {
const DILocation *DIL = Inst->getDebugLoc();
if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
- !isa<DbgInfoIntrinsic>(Inst))
- B.SetCurrentDebugLocation(DIL->cloneWithDuplicationFactor(UF * VF));
+ !isa<DbgInfoIntrinsic>(Inst)) {
+ auto NewDIL = DIL->cloneWithDuplicationFactor(UF * VF);
+ if (NewDIL)
+ B.SetCurrentDebugLocation(NewDIL.getValue());
+ else
+ LLVM_DEBUG(dbgs()
+ << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ }
else
B.SetCurrentDebugLocation(DIL);
} else
@@ -801,367 +815,6 @@ void InnerLoopVectorizer::addMetadata(ArrayRef<Value *> To,
namespace llvm {
-/// The group of interleaved loads/stores sharing the same stride and
-/// close to each other.
-///
-/// Each member in this group has an index starting from 0, and the largest
-/// index should be less than interleaved factor, which is equal to the absolute
-/// value of the access's stride.
-///
-/// E.g. An interleaved load group of factor 4:
-/// for (unsigned i = 0; i < 1024; i+=4) {
-/// a = A[i]; // Member of index 0
-/// b = A[i+1]; // Member of index 1
-/// d = A[i+3]; // Member of index 3
-/// ...
-/// }
-///
-/// An interleaved store group of factor 4:
-/// for (unsigned i = 0; i < 1024; i+=4) {
-/// ...
-/// A[i] = a; // Member of index 0
-/// A[i+1] = b; // Member of index 1
-/// A[i+2] = c; // Member of index 2
-/// A[i+3] = d; // Member of index 3
-/// }
-///
-/// Note: the interleaved load group could have gaps (missing members), but
-/// the interleaved store group doesn't allow gaps.
-class InterleaveGroup {
-public:
- InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
- : Align(Align), InsertPos(Instr) {
- assert(Align && "The alignment should be non-zero");
-
- Factor = std::abs(Stride);
- assert(Factor > 1 && "Invalid interleave factor");
-
- Reverse = Stride < 0;
- Members[0] = Instr;
- }
-
- bool isReverse() const { return Reverse; }
- unsigned getFactor() const { return Factor; }
- unsigned getAlignment() const { return Align; }
- unsigned getNumMembers() const { return Members.size(); }
-
- /// Try to insert a new member \p Instr with index \p Index and
- /// alignment \p NewAlign. The index is related to the leader and it could be
- /// negative if it is the new leader.
- ///
- /// \returns false if the instruction doesn't belong to the group.
- bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
- assert(NewAlign && "The new member's alignment should be non-zero");
-
- int Key = Index + SmallestKey;
-
- // Skip if there is already a member with the same index.
- if (Members.count(Key))
- return false;
-
- if (Key > LargestKey) {
- // The largest index is always less than the interleave factor.
- if (Index >= static_cast<int>(Factor))
- return false;
-
- LargestKey = Key;
- } else if (Key < SmallestKey) {
- // The largest index is always less than the interleave factor.
- if (LargestKey - Key >= static_cast<int>(Factor))
- return false;
-
- SmallestKey = Key;
- }
-
- // It's always safe to select the minimum alignment.
- Align = std::min(Align, NewAlign);
- Members[Key] = Instr;
- return true;
- }
-
- /// Get the member with the given index \p Index
- ///
- /// \returns nullptr if contains no such member.
- Instruction *getMember(unsigned Index) const {
- int Key = SmallestKey + Index;
- if (!Members.count(Key))
- return nullptr;
-
- return Members.find(Key)->second;
- }
-
- /// Get the index for the given member. Unlike the key in the member
- /// map, the index starts from 0.
- unsigned getIndex(Instruction *Instr) const {
- for (auto I : Members)
- if (I.second == Instr)
- return I.first - SmallestKey;
-
- llvm_unreachable("InterleaveGroup contains no such member");
- }
-
- Instruction *getInsertPos() const { return InsertPos; }
- void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
-
- /// Add metadata (e.g. alias info) from the instructions in this group to \p
- /// NewInst.
- ///
- /// FIXME: this function currently does not add noalias metadata a'la
- /// addNewMedata. To do that we need to compute the intersection of the
- /// noalias info from all members.
- void addMetadata(Instruction *NewInst) const {
- SmallVector<Value *, 4> VL;
- std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
- [](std::pair<int, Instruction *> p) { return p.second; });
- propagateMetadata(NewInst, VL);
- }
-
-private:
- unsigned Factor; // Interleave Factor.
- bool Reverse;
- unsigned Align;
- DenseMap<int, Instruction *> Members;
- int SmallestKey = 0;
- int LargestKey = 0;
-
- // To avoid breaking dependences, vectorized instructions of an interleave
- // group should be inserted at either the first load or the last store in
- // program order.
- //
- // E.g. %even = load i32 // Insert Position
- // %add = add i32 %even // Use of %even
- // %odd = load i32
- //
- // store i32 %even
- // %odd = add i32 // Def of %odd
- // store i32 %odd // Insert Position
- Instruction *InsertPos;
-};
-} // end namespace llvm
-
-namespace {
-
-/// Drive the analysis of interleaved memory accesses in the loop.
-///
-/// Use this class to analyze interleaved accesses only when we can vectorize
-/// a loop. Otherwise it's meaningless to do analysis as the vectorization
-/// on interleaved accesses is unsafe.
-///
-/// The analysis collects interleave groups and records the relationships
-/// between the member and the group in a map.
-class InterleavedAccessInfo {
-public:
- InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
- DominatorTree *DT, LoopInfo *LI,
- const LoopAccessInfo *LAI)
- : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
-
- ~InterleavedAccessInfo() {
- SmallPtrSet<InterleaveGroup *, 4> DelSet;
- // Avoid releasing a pointer twice.
- for (auto &I : InterleaveGroupMap)
- DelSet.insert(I.second);
- for (auto *Ptr : DelSet)
- delete Ptr;
- }
-
- /// Analyze the interleaved accesses and collect them in interleave
- /// groups. Substitute symbolic strides using \p Strides.
- void analyzeInterleaving();
-
- /// Check if \p Instr belongs to any interleave group.
- bool isInterleaved(Instruction *Instr) const {
- return InterleaveGroupMap.count(Instr);
- }
-
- /// Get the interleave group that \p Instr belongs to.
- ///
- /// \returns nullptr if doesn't have such group.
- InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
- if (InterleaveGroupMap.count(Instr))
- return InterleaveGroupMap.find(Instr)->second;
- return nullptr;
- }
-
- /// Returns true if an interleaved group that may access memory
- /// out-of-bounds requires a scalar epilogue iteration for correctness.
- bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
-
-private:
- /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
- /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
- /// The interleaved access analysis can also add new predicates (for example
- /// by versioning strides of pointers).
- PredicatedScalarEvolution &PSE;
-
- Loop *TheLoop;
- DominatorTree *DT;
- LoopInfo *LI;
- const LoopAccessInfo *LAI;
-
- /// True if the loop may contain non-reversed interleaved groups with
- /// out-of-bounds accesses. We ensure we don't speculatively access memory
- /// out-of-bounds by executing at least one scalar epilogue iteration.
- bool RequiresScalarEpilogue = false;
-
- /// Holds the relationships between the members and the interleave group.
- DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
-
- /// Holds dependences among the memory accesses in the loop. It maps a source
- /// access to a set of dependent sink accesses.
- DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
-
- /// The descriptor for a strided memory access.
- struct StrideDescriptor {
- StrideDescriptor() = default;
- StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
- unsigned Align)
- : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
-
- // The access's stride. It is negative for a reverse access.
- int64_t Stride = 0;
-
- // The scalar expression of this access.
- const SCEV *Scev = nullptr;
-
- // The size of the memory object.
- uint64_t Size = 0;
-
- // The alignment of this access.
- unsigned Align = 0;
- };
-
- /// A type for holding instructions and their stride descriptors.
- using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
-
- /// Create a new interleave group with the given instruction \p Instr,
- /// stride \p Stride and alignment \p Align.
- ///
- /// \returns the newly created interleave group.
- InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
- unsigned Align) {
- assert(!InterleaveGroupMap.count(Instr) &&
- "Already in an interleaved access group");
- InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
- return InterleaveGroupMap[Instr];
- }
-
- /// Release the group and remove all the relationships.
- void releaseGroup(InterleaveGroup *Group) {
- for (unsigned i = 0; i < Group->getFactor(); i++)
- if (Instruction *Member = Group->getMember(i))
- InterleaveGroupMap.erase(Member);
-
- delete Group;
- }
-
- /// Collect all the accesses with a constant stride in program order.
- void collectConstStrideAccesses(
- MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
- const ValueToValueMap &Strides);
-
- /// Returns true if \p Stride is allowed in an interleaved group.
- static bool isStrided(int Stride) {
- unsigned Factor = std::abs(Stride);
- return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
- }
-
- /// Returns true if \p BB is a predicated block.
- bool isPredicated(BasicBlock *BB) const {
- return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
- }
-
- /// Returns true if LoopAccessInfo can be used for dependence queries.
- bool areDependencesValid() const {
- return LAI && LAI->getDepChecker().getDependences();
- }
-
- /// Returns true if memory accesses \p A and \p B can be reordered, if
- /// necessary, when constructing interleaved groups.
- ///
- /// \p A must precede \p B in program order. We return false if reordering is
- /// not necessary or is prevented because \p A and \p B may be dependent.
- bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
- StrideEntry *B) const {
- // Code motion for interleaved accesses can potentially hoist strided loads
- // and sink strided stores. The code below checks the legality of the
- // following two conditions:
- //
- // 1. Potentially moving a strided load (B) before any store (A) that
- // precedes B, or
- //
- // 2. Potentially moving a strided store (A) after any load or store (B)
- // that A precedes.
- //
- // It's legal to reorder A and B if we know there isn't a dependence from A
- // to B. Note that this determination is conservative since some
- // dependences could potentially be reordered safely.
-
- // A is potentially the source of a dependence.
- auto *Src = A->first;
- auto SrcDes = A->second;
-
- // B is potentially the sink of a dependence.
- auto *Sink = B->first;
- auto SinkDes = B->second;
-
- // Code motion for interleaved accesses can't violate WAR dependences.
- // Thus, reordering is legal if the source isn't a write.
- if (!Src->mayWriteToMemory())
- return true;
-
- // At least one of the accesses must be strided.
- if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
- return true;
-
- // If dependence information is not available from LoopAccessInfo,
- // conservatively assume the instructions can't be reordered.
- if (!areDependencesValid())
- return false;
-
- // If we know there is a dependence from source to sink, assume the
- // instructions can't be reordered. Otherwise, reordering is legal.
- return !Dependences.count(Src) || !Dependences.lookup(Src).count(Sink);
- }
-
- /// Collect the dependences from LoopAccessInfo.
- ///
- /// We process the dependences once during the interleaved access analysis to
- /// enable constant-time dependence queries.
- void collectDependences() {
- if (!areDependencesValid())
- return;
- auto *Deps = LAI->getDepChecker().getDependences();
- for (auto Dep : *Deps)
- Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
- }
-};
-
-} // end anonymous namespace
-
-static void emitMissedWarning(Function *F, Loop *L,
- const LoopVectorizeHints &LH,
- OptimizationRemarkEmitter *ORE) {
- LH.emitRemarkWithHints();
-
- if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
- if (LH.getWidth() != 1)
- ORE->emit(DiagnosticInfoOptimizationFailure(
- DEBUG_TYPE, "FailedRequestedVectorization",
- L->getStartLoc(), L->getHeader())
- << "loop not vectorized: "
- << "failed explicitly specified loop vectorization");
- else if (LH.getInterleave() != 1)
- ORE->emit(DiagnosticInfoOptimizationFailure(
- DEBUG_TYPE, "FailedRequestedInterleaving", L->getStartLoc(),
- L->getHeader())
- << "loop not interleaved: "
- << "failed explicitly specified loop interleaving");
- }
-}
-
-namespace llvm {
-
/// LoopVectorizationCostModel - estimates the expected speedups due to
/// vectorization.
/// In many cases vectorization is not profitable. This can happen because of
@@ -1247,34 +900,55 @@ public:
/// vectorization factor \p VF.
bool isProfitableToScalarize(Instruction *I, unsigned VF) const {
assert(VF > 1 && "Profitable to scalarize relevant only for VF > 1.");
+
+ // Cost model is not run in the VPlan-native path - return conservative
+ // result until this changes.
+ if (EnableVPlanNativePath)
+ return false;
+
auto Scalars = InstsToScalarize.find(VF);
assert(Scalars != InstsToScalarize.end() &&
"VF not yet analyzed for scalarization profitability");
- return Scalars->second.count(I);
+ return Scalars->second.find(I) != Scalars->second.end();
}
/// Returns true if \p I is known to be uniform after vectorization.
bool isUniformAfterVectorization(Instruction *I, unsigned VF) const {
if (VF == 1)
return true;
- assert(Uniforms.count(VF) && "VF not yet analyzed for uniformity");
+
+ // Cost model is not run in the VPlan-native path - return conservative
+ // result until this changes.
+ if (EnableVPlanNativePath)
+ return false;
+
auto UniformsPerVF = Uniforms.find(VF);
- return UniformsPerVF->second.count(I);
+ assert(UniformsPerVF != Uniforms.end() &&
+ "VF not yet analyzed for uniformity");
+ return UniformsPerVF->second.find(I) != UniformsPerVF->second.end();
}
/// Returns true if \p I is known to be scalar after vectorization.
bool isScalarAfterVectorization(Instruction *I, unsigned VF) const {
if (VF == 1)
return true;
- assert(Scalars.count(VF) && "Scalar values are not calculated for VF");
+
+ // Cost model is not run in the VPlan-native path - return conservative
+ // result until this changes.
+ if (EnableVPlanNativePath)
+ return false;
+
auto ScalarsPerVF = Scalars.find(VF);
- return ScalarsPerVF->second.count(I);
+ assert(ScalarsPerVF != Scalars.end() &&
+ "Scalar values are not calculated for VF");
+ return ScalarsPerVF->second.find(I) != ScalarsPerVF->second.end();
}
/// \returns True if instruction \p I can be truncated to a smaller bitwidth
/// for vectorization factor \p VF.
bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
- return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
+ return VF > 1 && MinBWs.find(I) != MinBWs.end() &&
+ !isProfitableToScalarize(I, VF) &&
!isScalarAfterVectorization(I, VF);
}
@@ -1298,7 +972,7 @@ public:
/// Save vectorization decision \p W and \p Cost taken by the cost model for
/// interleaving group \p Grp and vector width \p VF.
- void setWideningDecision(const InterleaveGroup *Grp, unsigned VF,
+ void setWideningDecision(const InterleaveGroup<Instruction> *Grp, unsigned VF,
InstWidening W, unsigned Cost) {
assert(VF >= 2 && "Expected VF >=2");
/// Broadcast this decicion to all instructions inside the group.
@@ -1318,6 +992,12 @@ public:
/// through the cost modeling.
InstWidening getWideningDecision(Instruction *I, unsigned VF) {
assert(VF >= 2 && "Expected VF >=2");
+
+ // Cost model is not run in the VPlan-native path - return conservative
+ // result until this changes.
+ if (EnableVPlanNativePath)
+ return CM_GatherScatter;
+
std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
auto Itr = WideningDecisions.find(InstOnVF);
if (Itr == WideningDecisions.end())
@@ -1330,7 +1010,8 @@ public:
unsigned getWideningCost(Instruction *I, unsigned VF) {
assert(VF >= 2 && "Expected VF >=2");
std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
- assert(WideningDecisions.count(InstOnVF) && "The cost is not calculated");
+ assert(WideningDecisions.find(InstOnVF) != WideningDecisions.end() &&
+ "The cost is not calculated");
return WideningDecisions[InstOnVF].second;
}
@@ -1369,7 +1050,7 @@ public:
/// that may be vectorized as interleave, gather-scatter or scalarized.
void collectUniformsAndScalars(unsigned VF) {
// Do the analysis once.
- if (VF == 1 || Uniforms.count(VF))
+ if (VF == 1 || Uniforms.find(VF) != Uniforms.end())
return;
setCostBasedWideningDecision(VF);
collectLoopUniforms(VF);
@@ -1414,26 +1095,58 @@ public:
/// Returns true if \p I is an instruction that will be scalarized with
/// predication. Such instructions include conditional stores and
/// instructions that may divide by zero.
- bool isScalarWithPredication(Instruction *I);
+ /// If a non-zero VF has been calculated, we check if I will be scalarized
+ /// predication for that VF.
+ bool isScalarWithPredication(Instruction *I, unsigned VF = 1);
+
+ // Returns true if \p I is an instruction that will be predicated either
+ // through scalar predication or masked load/store or masked gather/scatter.
+ // Superset of instructions that return true for isScalarWithPredication.
+ bool isPredicatedInst(Instruction *I) {
+ if (!blockNeedsPredication(I->getParent()))
+ return false;
+ // Loads and stores that need some form of masked operation are predicated
+ // instructions.
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ return Legal->isMaskRequired(I);
+ return isScalarWithPredication(I);
+ }
/// Returns true if \p I is a memory instruction with consecutive memory
/// access that can be widened.
bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
+ /// Returns true if \p I is a memory instruction in an interleaved-group
+ /// of memory accesses that can be vectorized with wide vector loads/stores
+ /// and shuffles.
+ bool interleavedAccessCanBeWidened(Instruction *I, unsigned VF = 1);
+
/// Check if \p Instr belongs to any interleaved access group.
bool isAccessInterleaved(Instruction *Instr) {
return InterleaveInfo.isInterleaved(Instr);
}
/// Get the interleaved access group that \p Instr belongs to.
- const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
+ const InterleaveGroup<Instruction> *
+ getInterleavedAccessGroup(Instruction *Instr) {
return InterleaveInfo.getInterleaveGroup(Instr);
}
/// Returns true if an interleaved group requires a scalar iteration
- /// to handle accesses with gaps.
+ /// to handle accesses with gaps, and there is nothing preventing us from
+ /// creating a scalar epilogue.
bool requiresScalarEpilogue() const {
- return InterleaveInfo.requiresScalarEpilogue();
+ return IsScalarEpilogueAllowed && InterleaveInfo.requiresScalarEpilogue();
+ }
+
+ /// Returns true if a scalar epilogue is not allowed due to optsize.
+ bool isScalarEpilogueAllowed() const { return IsScalarEpilogueAllowed; }
+
+ /// Returns true if all loop blocks should be masked to fold tail loop.
+ bool foldTailByMasking() const { return FoldTailByMasking; }
+
+ bool blockNeedsPredication(BasicBlock *BB) {
+ return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}
private:
@@ -1482,8 +1195,10 @@ private:
/// memory access.
unsigned getConsecutiveMemOpCost(Instruction *I, unsigned VF);
- /// The cost calculation for Load instruction \p I with uniform pointer -
- /// scalar load + broadcast.
+ /// The cost calculation for Load/Store instruction \p I with uniform pointer -
+ /// Load: scalar load + broadcast.
+ /// Store: scalar store + (loop invariant value stored? 0 : extract of last
+ /// element)
unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
/// Returns whether the instruction is a load or store and will be a emitted
@@ -1517,6 +1232,18 @@ private:
/// vectorization as a predicated block.
SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;
+ /// Records whether it is allowed to have the original scalar loop execute at
+ /// least once. This may be needed as a fallback loop in case runtime
+ /// aliasing/dependence checks fail, or to handle the tail/remainder
+ /// iterations when the trip count is unknown or doesn't divide by the VF,
+ /// or as a peel-loop to handle gaps in interleave-groups.
+ /// Under optsize and when the trip count is very small we don't allow any
+ /// iterations to execute in the scalar loop.
+ bool IsScalarEpilogueAllowed = true;
+
+ /// All blocks of loop are to be masked to fold tail of scalar iterations.
+ bool FoldTailByMasking = false;
+
/// A map holding scalar costs for different vectorization factors. The
/// presence of a cost for an instruction in the mapping indicates that the
/// instruction will be scalarized when vectorizing with the associated
@@ -1639,14 +1366,15 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp,
return false;
Function *Fn = OuterLp->getHeader()->getParent();
- if (!Hints.allowVectorization(Fn, OuterLp, false /*AlwaysVectorize*/)) {
+ if (!Hints.allowVectorization(Fn, OuterLp,
+ true /*VectorizeOnlyWhenForced*/)) {
LLVM_DEBUG(dbgs() << "LV: Loop hints prevent outer loop vectorization.\n");
return false;
}
if (!Hints.getWidth()) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n");
- emitMissedWarning(Fn, OuterLp, Hints, ORE);
+ Hints.emitRemarkWithHints();
return false;
}
@@ -1654,7 +1382,7 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp,
// TODO: Interleave support is future work.
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for "
"outer loops.\n");
- emitMissedWarning(Fn, OuterLp, Hints, ORE);
+ Hints.emitRemarkWithHints();
return false;
}
@@ -1695,10 +1423,11 @@ struct LoopVectorize : public FunctionPass {
LoopVectorizePass Impl;
- explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true)
+ explicit LoopVectorize(bool InterleaveOnlyWhenForced = false,
+ bool VectorizeOnlyWhenForced = false)
: FunctionPass(ID) {
- Impl.DisableUnrolling = NoUnrolling;
- Impl.AlwaysVectorize = AlwaysVectorize;
+ Impl.InterleaveOnlyWhenForced = InterleaveOnlyWhenForced;
+ Impl.VectorizeOnlyWhenForced = VectorizeOnlyWhenForced;
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
@@ -1737,8 +1466,16 @@ struct LoopVectorize : public FunctionPass {
AU.addRequired<LoopAccessLegacyAnalysis>();
AU.addRequired<DemandedBitsWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
+
+ // We currently do not preserve loopinfo/dominator analyses with outer loop
+ // vectorization. Until this is addressed, mark these analyses as preserved
+ // only for non-VPlan-native path.
+ // TODO: Preserve Loop and Dominator analyses for VPlan-native path.
+ if (!EnableVPlanNativePath) {
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
@@ -1950,7 +1687,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
? Builder.CreateSExtOrTrunc(Induction, IV->getType())
: Builder.CreateCast(Instruction::SIToFP, Induction,
IV->getType());
- ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL);
+ ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID);
ScalarIV->setName("offset.idx");
}
if (Trunc) {
@@ -2089,8 +1826,9 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
assert(!V->getType()->isVoidTy() && "Type does not produce a value");
- // If we have a stride that is replaced by one, do it here.
- if (Legal->hasStride(V))
+ // If we have a stride that is replaced by one, do it here. Defer this for
+ // the VPlan-native path until we start running Legal checks in that path.
+ if (!EnableVPlanNativePath && Legal->hasStride(V))
V = ConstantInt::get(V->getType(), 1);
// If we have a vector mapped to this value, return it.
@@ -2214,6 +1952,17 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
"reverse");
}
+// Return whether we allow using masked interleave-groups (for dealing with
+// strided loads/stores that reside in predicated blocks, or for dealing
+// with gaps).
+static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
+ // If an override option has been passed in for interleaved accesses, use it.
+ if (EnableMaskedInterleavedMemAccesses.getNumOccurrences() > 0)
+ return EnableMaskedInterleavedMemAccesses;
+
+ return TTI.enableMaskedInterleavedAccessVectorization();
+}
+
// Try to vectorize the interleave group that \p Instr belongs to.
//
// E.g. Translate following interleaved load group (factor = 3):
@@ -2242,8 +1991,10 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
-void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
- const InterleaveGroup *Group = Cost->getInterleavedAccessGroup(Instr);
+void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
+ VectorParts *BlockInMask) {
+ const InterleaveGroup<Instruction> *Group =
+ Cost->getInterleavedAccessGroup(Instr);
assert(Group && "Fail to get an interleaved access group.");
// Skip if current instruction is not the insert position.
@@ -2257,13 +2008,22 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
Type *ScalarTy = getMemInstValueType(Instr);
unsigned InterleaveFactor = Group->getFactor();
Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
- Type *PtrTy = VecTy->getPointerTo(getMemInstAddressSpace(Instr));
+ Type *PtrTy = VecTy->getPointerTo(getLoadStoreAddressSpace(Instr));
// Prepare for the new pointers.
setDebugLocFromInst(Builder, Ptr);
SmallVector<Value *, 2> NewPtrs;
unsigned Index = Group->getIndex(Instr);
+ VectorParts Mask;
+ bool IsMaskForCondRequired = BlockInMask;
+ if (IsMaskForCondRequired) {
+ Mask = *BlockInMask;
+ // TODO: extend the masked interleaved-group support to reversed access.
+ assert(!Group->isReverse() && "Reversed masked interleave-group "
+ "not supported.");
+ }
+
// If the group is reverse, adjust the index to refer to the last vector lane
// instead of the first. We adjust the index from the first vector lane,
// rather than directly getting the pointer for lane VF - 1, because the
@@ -2302,13 +2062,39 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
setDebugLocFromInst(Builder, Instr);
Value *UndefVec = UndefValue::get(VecTy);
+ Value *MaskForGaps = nullptr;
+ if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) {
+ MaskForGaps = createBitMaskForGaps(Builder, VF, *Group);
+ assert(MaskForGaps && "Mask for Gaps is required but it is null");
+ }
+
// Vectorize the interleaved load group.
if (isa<LoadInst>(Instr)) {
// For each unroll part, create a wide load for the group.
SmallVector<Value *, 2> NewLoads;
for (unsigned Part = 0; Part < UF; Part++) {
- auto *NewLoad = Builder.CreateAlignedLoad(
- NewPtrs[Part], Group->getAlignment(), "wide.vec");
+ Instruction *NewLoad;
+ if (IsMaskForCondRequired || MaskForGaps) {
+ assert(useMaskedInterleavedAccesses(*TTI) &&
+ "masked interleaved groups are not allowed.");
+ Value *GroupMask = MaskForGaps;
+ if (IsMaskForCondRequired) {
+ auto *Undefs = UndefValue::get(Mask[Part]->getType());
+ auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
+ Value *ShuffledMask = Builder.CreateShuffleVector(
+ Mask[Part], Undefs, RepMask, "interleaved.mask");
+ GroupMask = MaskForGaps
+ ? Builder.CreateBinOp(Instruction::And, ShuffledMask,
+ MaskForGaps)
+ : ShuffledMask;
+ }
+ NewLoad =
+ Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(),
+ GroupMask, UndefVec, "wide.masked.vec");
+ }
+ else
+ NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part],
+ Group->getAlignment(), "wide.vec");
Group->addMetadata(NewLoad);
NewLoads.push_back(NewLoad);
}
@@ -2375,8 +2161,18 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask,
"interleaved.vec");
- Instruction *NewStoreInstr =
- Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment());
+ Instruction *NewStoreInstr;
+ if (IsMaskForCondRequired) {
+ auto *Undefs = UndefValue::get(Mask[Part]->getType());
+ auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
+ Value *ShuffledMask = Builder.CreateShuffleVector(
+ Mask[Part], Undefs, RepMask, "interleaved.mask");
+ NewStoreInstr = Builder.CreateMaskedStore(
+ IVec, NewPtrs[Part], Group->getAlignment(), ShuffledMask);
+ }
+ else
+ NewStoreInstr = Builder.CreateAlignedStore(IVec, NewPtrs[Part],
+ Group->getAlignment());
Group->addMetadata(NewStoreInstr);
}
@@ -2400,13 +2196,13 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Type *ScalarDataTy = getMemInstValueType(Instr);
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = getLoadStorePointerOperand(Instr);
- unsigned Alignment = getMemInstAlignment(Instr);
+ unsigned Alignment = getLoadStoreAlignment(Instr);
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
const DataLayout &DL = Instr->getModule()->getDataLayout();
if (!Alignment)
Alignment = DL.getABITypeAlignment(ScalarDataTy);
- unsigned AddressSpace = getMemInstAddressSpace(Instr);
+ unsigned AddressSpace = getLoadStoreAddressSpace(Instr);
// Determine if the pointer operand of the access is either consecutive or
// reverse consecutive.
@@ -2594,6 +2390,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
if (TripCount)
return TripCount;
+ assert(L && "Create Trip Count for null loop.");
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
// Find the loop boundaries.
ScalarEvolution *SE = PSE.getSE();
@@ -2602,6 +2399,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
"Invalid loop count");
Type *IdxTy = Legal->getWidestInductionType();
+ assert(IdxTy && "No type for induction");
// The exit count might have the type of i64 while the phi is i32. This can
// happen if we have an induction variable that is sign extended before the
@@ -2642,12 +2440,26 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
Value *TC = getOrCreateTripCount(L);
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+ Type *Ty = TC->getType();
+ Constant *Step = ConstantInt::get(Ty, VF * UF);
+
+ // If the tail is to be folded by masking, round the number of iterations N
+ // up to a multiple of Step instead of rounding down. This is done by first
+ // adding Step-1 and then rounding down. Note that it's ok if this addition
+ // overflows: the vector induction variable will eventually wrap to zero given
+ // that it starts at zero and its Step is a power of two; the loop will then
+ // exit, with the last early-exit vector comparison also producing all-true.
+ if (Cost->foldTailByMasking()) {
+ assert(isPowerOf2_32(VF * UF) &&
+ "VF*UF must be a power of 2 when folding tail by masking");
+ TC = Builder.CreateAdd(TC, ConstantInt::get(Ty, VF * UF - 1), "n.rnd.up");
+ }
+
// Now we need to generate the expression for the part of the loop that the
// vectorized body will execute. This is equal to N - (N % Step) if scalar
// iterations are not required for correctness, or N - Step, otherwise. Step
// is equal to the vectorization factor (number of SIMD elements) times the
// unroll factor (number of SIMD instructions).
- Constant *Step = ConstantInt::get(TC->getType(), VF * UF);
Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
// If there is a non-reversed interleaved group that may speculatively access
@@ -2710,8 +2522,13 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
// of zero. In this case we will also jump to the scalar loop.
auto P = Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
: ICmpInst::ICMP_ULT;
- Value *CheckMinIters = Builder.CreateICmp(
- P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
+
+ // If tail is to be folded, vector loop takes care of all iterations.
+ Value *CheckMinIters = Builder.getFalse();
+ if (!Cost->foldTailByMasking())
+ CheckMinIters = Builder.CreateICmp(
+ P, Count, ConstantInt::get(Count->getType(), VF * UF),
+ "min.iters.check");
BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
// Update dominator tree immediately if the generated block is a
@@ -2740,6 +2557,8 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
if (C->isZero())
return;
+ assert(!Cost->foldTailByMasking() &&
+ "Cannot SCEV check stride or overflow when folding tail");
// Create a new block containing the stride check.
BB->setName("vector.scevcheck");
auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
@@ -2756,6 +2575,10 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
}
void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
+ // VPlan-native path does not do any analysis for runtime checks currently.
+ if (EnableVPlanNativePath)
+ return;
+
BasicBlock *BB = L->getLoopPreheader();
// Generate the code that checks in runtime if arrays overlap. We put the
@@ -2768,6 +2591,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
if (!MemRuntimeCheck)
return;
+ assert(!Cost->foldTailByMasking() && "Cannot check memory when folding tail");
// Create a new block containing the memory check.
BB->setName("vector.memcheck");
auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
@@ -2789,6 +2613,94 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
LVer->prepareNoAliasMetadata();
}
+Value *InnerLoopVectorizer::emitTransformedIndex(
+ IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout &DL,
+ const InductionDescriptor &ID) const {
+
+ SCEVExpander Exp(*SE, DL, "induction");
+ auto Step = ID.getStep();
+ auto StartValue = ID.getStartValue();
+ assert(Index->getType() == Step->getType() &&
+ "Index type does not match StepValue type");
+
+ // Note: the IR at this point is broken. We cannot use SE to create any new
+ // SCEV and then expand it, hoping that SCEV's simplification will give us
+ // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
+ // lead to various SCEV crashes. So all we can do is to use builder and rely
+ // on InstCombine for future simplifications. Here we handle some trivial
+ // cases only.
+ auto CreateAdd = [&B](Value *X, Value *Y) {
+ assert(X->getType() == Y->getType() && "Types don't match!");
+ if (auto *CX = dyn_cast<ConstantInt>(X))
+ if (CX->isZero())
+ return Y;
+ if (auto *CY = dyn_cast<ConstantInt>(Y))
+ if (CY->isZero())
+ return X;
+ return B.CreateAdd(X, Y);
+ };
+
+ auto CreateMul = [&B](Value *X, Value *Y) {
+ assert(X->getType() == Y->getType() && "Types don't match!");
+ if (auto *CX = dyn_cast<ConstantInt>(X))
+ if (CX->isOne())
+ return Y;
+ if (auto *CY = dyn_cast<ConstantInt>(Y))
+ if (CY->isOne())
+ return X;
+ return B.CreateMul(X, Y);
+ };
+
+ switch (ID.getKind()) {
+ case InductionDescriptor::IK_IntInduction: {
+ assert(Index->getType() == StartValue->getType() &&
+ "Index type does not match StartValue type");
+ if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ auto *Offset = CreateMul(
+ Index, Exp.expandCodeFor(Step, Index->getType(), &*B.GetInsertPoint()));
+ return CreateAdd(StartValue, Offset);
+ }
+ case InductionDescriptor::IK_PtrInduction: {
+ assert(isa<SCEVConstant>(Step) &&
+ "Expected constant step for pointer induction");
+ return B.CreateGEP(
+ nullptr, StartValue,
+ CreateMul(Index, Exp.expandCodeFor(Step, Index->getType(),
+ &*B.GetInsertPoint())));
+ }
+ case InductionDescriptor::IK_FpInduction: {
+ assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
+ auto InductionBinOp = ID.getInductionBinOp();
+ assert(InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub) &&
+ "Original bin op should be defined for FP induction");
+
+ Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
+
+ // Floating point operations had to be 'fast' to enable the induction.
+ FastMathFlags Flags;
+ Flags.setFast();
+
+ Value *MulExp = B.CreateFMul(StepValue, Index);
+ if (isa<Instruction>(MulExp))
+ // We have to check, the MulExp may be a constant.
+ cast<Instruction>(MulExp)->setFastMathFlags(Flags);
+
+ Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
+ "induction");
+ if (isa<Instruction>(BOp))
+ cast<Instruction>(BOp)->setFastMathFlags(Flags);
+
+ return BOp;
+ }
+ case InductionDescriptor::IK_NoInduction:
+ return nullptr;
+ }
+ llvm_unreachable("invalid enum");
+}
+
BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
/*
In this function we generate a new loop. The new loop will contain
@@ -2825,6 +2737,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
BasicBlock *OldBasicBlock = OrigLoop->getHeader();
BasicBlock *VectorPH = OrigLoop->getLoopPreheader();
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+ MDNode *OrigLoopID = OrigLoop->getLoopID();
assert(VectorPH && "Invalid loop structure");
assert(ExitBlock && "Must have an exit block");
@@ -2927,7 +2840,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
- EndValue = II.transform(B, CRD, PSE.getSE(), DL);
+ EndValue = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II);
EndValue->setName("ind.end");
}
@@ -2948,9 +2861,12 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
- Value *CmpN =
- CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
- CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
+ // If tail is to be folded, we know we don't need to run the remainder.
+ Value *CmpN = Builder.getTrue();
+ if (!Cost->foldTailByMasking())
+ CmpN =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+ CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
ReplaceInstWithInst(MiddleBlock->getTerminator(),
BranchInst::Create(ExitBlock, ScalarPH, CmpN));
@@ -2965,6 +2881,17 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
LoopVectorBody = VecBody;
LoopScalarBody = OldBasicBlock;
+ Optional<MDNode *> VectorizedLoopID =
+ makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
+ LLVMLoopVectorizeFollowupVectorized});
+ if (VectorizedLoopID.hasValue()) {
+ Lp->setLoopID(VectorizedLoopID.getValue());
+
+ // Do not setAlreadyVectorized if loop attributes have been defined
+ // explicitly.
+ return LoopVectorPreHeader;
+ }
+
// Keep all loop hints from the original loop on the vector loop (we'll
// replace the vectorizer-specific hints below).
if (MDNode *LID = OrigLoop->getLoopID())
@@ -3023,7 +2950,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
II.getStep()->getType())
: B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType());
CMO->setName("cast.cmo");
- Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
+ Value *Escape = emitTransformedIndex(B, CMO, PSE.getSE(), DL, II);
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
}
@@ -3109,6 +3036,10 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
!TTI.supportsEfficientVectorElementLoadStore()))
Cost += TTI.getScalarizationOverhead(RetTy, true, false);
+ // Some targets keep addresses scalar.
+ if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
+ return Cost;
+
if (CallInst *CI = dyn_cast<CallInst>(I)) {
SmallVector<const Value *, 4> Operands(CI->arg_operands());
Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
@@ -3212,7 +3143,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
continue;
for (unsigned Part = 0; Part < UF; ++Part) {
Value *I = getOrCreateVectorValue(KV.first, Part);
- if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
+ if (Erased.find(I) != Erased.end() || I->use_empty() ||
+ !isa<Instruction>(I))
continue;
Type *OriginalTy = I->getType();
Type *ScalarTruncatedTy =
@@ -3330,6 +3262,13 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
if (VF > 1)
truncateToMinimalBitwidths();
+ // Fix widened non-induction PHIs by setting up the PHI operands.
+ if (OrigPHIsToFix.size()) {
+ assert(EnableVPlanNativePath &&
+ "Unexpected non-induction PHIs for fixup in non VPlan-native path");
+ fixNonInductionPHIs();
+ }
+
// At this point every instruction in the original loop is widened to a
// vector form. Now we need to fix the recurrences in the loop. These PHI
// nodes are currently empty because we did not want to introduce cycles.
@@ -3666,8 +3605,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
ReducedPartRdx, "bin.rdx"));
else
- ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
- Builder, MinMaxKind, ReducedPartRdx, RdxPart);
+ ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
+ RdxPart);
}
if (VF > 1) {
@@ -3720,9 +3659,20 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
void InnerLoopVectorizer::fixLCSSAPHIs() {
for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
if (LCSSAPhi.getNumIncomingValues() == 1) {
- assert(OrigLoop->isLoopInvariant(LCSSAPhi.getIncomingValue(0)) &&
- "Incoming value isn't loop invariant");
- LCSSAPhi.addIncoming(LCSSAPhi.getIncomingValue(0), LoopMiddleBlock);
+ auto *IncomingValue = LCSSAPhi.getIncomingValue(0);
+ // Non-instruction incoming values will have only one value.
+ unsigned LastLane = 0;
+ if (isa<Instruction>(IncomingValue))
+ LastLane = Cost->isUniformAfterVectorization(
+ cast<Instruction>(IncomingValue), VF)
+ ? 0
+ : VF - 1;
+ // Can be a loop invariant incoming value or the last scalar value to be
+ // extracted from the vectorized loop.
+ Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
+ Value *lastIncomingValue =
+ getOrCreateScalarValue(IncomingValue, { UF - 1, LastLane });
+ LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock);
}
}
}
@@ -3791,12 +3741,62 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
} while (Changed);
}
+void InnerLoopVectorizer::fixNonInductionPHIs() {
+ for (PHINode *OrigPhi : OrigPHIsToFix) {
+ PHINode *NewPhi =
+ cast<PHINode>(VectorLoopValueMap.getVectorValue(OrigPhi, 0));
+ unsigned NumIncomingValues = OrigPhi->getNumIncomingValues();
+
+ SmallVector<BasicBlock *, 2> ScalarBBPredecessors(
+ predecessors(OrigPhi->getParent()));
+ SmallVector<BasicBlock *, 2> VectorBBPredecessors(
+ predecessors(NewPhi->getParent()));
+ assert(ScalarBBPredecessors.size() == VectorBBPredecessors.size() &&
+ "Scalar and Vector BB should have the same number of predecessors");
+
+ // The insertion point in Builder may be invalidated by the time we get
+ // here. Force the Builder insertion point to something valid so that we do
+ // not run into issues during insertion point restore in
+ // getOrCreateVectorValue calls below.
+ Builder.SetInsertPoint(NewPhi);
+
+ // The predecessor order is preserved and we can rely on mapping between
+ // scalar and vector block predecessors.
+ for (unsigned i = 0; i < NumIncomingValues; ++i) {
+ BasicBlock *NewPredBB = VectorBBPredecessors[i];
+
+ // When looking up the new scalar/vector values to fix up, use incoming
+ // values from original phi.
+ Value *ScIncV =
+ OrigPhi->getIncomingValueForBlock(ScalarBBPredecessors[i]);
+
+ // Scalar incoming value may need a broadcast
+ Value *NewIncV = getOrCreateVectorValue(ScIncV, 0);
+ NewPhi->addIncoming(NewIncV, NewPredBB);
+ }
+ }
+}
+
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
unsigned VF) {
+ PHINode *P = cast<PHINode>(PN);
+ if (EnableVPlanNativePath) {
+ // Currently we enter here in the VPlan-native path for non-induction
+ // PHIs where all control flow is uniform. We simply widen these PHIs.
+ // Create a vector phi with no operands - the vector phi operands will be
+ // set at the end of vector code generation.
+ Type *VecTy =
+ (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
+ Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
+ VectorLoopValueMap.setVectorValue(P, 0, VecPhi);
+ OrigPHIsToFix.push_back(P);
+
+ return;
+ }
+
assert(PN->getParent() == OrigLoop->getHeader() &&
"Non-header phis should have been handled elsewhere");
- PHINode *P = cast<PHINode>(PN);
// In order to support recurrences we need to be able to vectorize Phi nodes.
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
@@ -3846,7 +3846,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
- Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
+ Value *SclrGep =
+ emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
SclrGep->setName("next.gep");
VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep);
}
@@ -4151,6 +4152,10 @@ void InnerLoopVectorizer::updateAnalysis() {
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
+ // DT is not kept up-to-date for outer loop vectorization
+ if (EnableVPlanNativePath)
+ return;
+
// Update the dominator tree information.
assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
"Entry does not dominate exit.");
@@ -4167,7 +4172,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
// this check. Collecting Scalars for VF=1 does not make any sense.
- assert(VF >= 2 && !Scalars.count(VF) &&
+ assert(VF >= 2 && Scalars.find(VF) == Scalars.end() &&
"This function should not be visited twice for the same VF");
SmallSetVector<Instruction *, 8> Worklist;
@@ -4253,7 +4258,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
}
}
for (auto *I : ScalarPtrs)
- if (!PossibleNonScalarPtrs.count(I)) {
+ if (PossibleNonScalarPtrs.find(I) == PossibleNonScalarPtrs.end()) {
LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n");
Worklist.insert(I);
}
@@ -4279,8 +4284,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// Insert the forced scalars.
// FIXME: Currently widenPHIInstruction() often creates a dead vector
// induction variable when the PHI user is scalarized.
- if (ForcedScalars.count(VF))
- for (auto *I : ForcedScalars.find(VF)->second)
+ auto ForcedScalar = ForcedScalars.find(VF);
+ if (ForcedScalar != ForcedScalars.end())
+ for (auto *I : ForcedScalar->second)
Worklist.insert(I);
// Expand the worklist by looking through any bitcasts and getelementptr
@@ -4348,8 +4354,8 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
Scalars[VF].insert(Worklist.begin(), Worklist.end());
}
-bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) {
- if (!Legal->blockNeedsPredication(I->getParent()))
+bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigned VF) {
+ if (!blockNeedsPredication(I->getParent()))
return false;
switch(I->getOpcode()) {
default:
@@ -4360,6 +4366,14 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) {
return false;
auto *Ptr = getLoadStorePointerOperand(I);
auto *Ty = getMemInstValueType(I);
+ // We have already decided how to vectorize this instruction, get that
+ // result.
+ if (VF > 1) {
+ InstWidening WideningDecision = getWideningDecision(I, VF);
+ assert(WideningDecision != CM_Unknown &&
+ "Widening decision should be ready at this moment");
+ return WideningDecision == CM_Scalarize;
+ }
return isa<LoadInst>(I) ?
!(isLegalMaskedLoad(Ty, Ptr) || isLegalMaskedGather(Ty))
: !(isLegalMaskedStore(Ty, Ptr) || isLegalMaskedScatter(Ty));
@@ -4373,6 +4387,35 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) {
return false;
}
+bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
+ unsigned VF) {
+ assert(isAccessInterleaved(I) && "Expecting interleaved access.");
+ assert(getWideningDecision(I, VF) == CM_Unknown &&
+ "Decision should not be set yet.");
+ auto *Group = getInterleavedAccessGroup(I);
+ assert(Group && "Must have a group.");
+
+ // Check if masking is required.
+ // A Group may need masking for one of two reasons: it resides in a block that
+ // needs predication, or it was decided to use masking to deal with gaps.
+ bool PredicatedAccessRequiresMasking =
+ Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I);
+ bool AccessWithGapsRequiresMasking =
+ Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed;
+ if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking)
+ return true;
+
+ // If masked interleaving is required, we expect that the user/target had
+ // enabled it, because otherwise it either wouldn't have been created or
+ // it should have been invalidated by the CostModel.
+ assert(useMaskedInterleavedAccesses(TTI) &&
+ "Masked interleave-groups for predicated accesses are not enabled.");
+
+ auto *Ty = getMemInstValueType(I);
+ return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty)
+ : TTI.isLegalMaskedStore(Ty);
+}
+
bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
unsigned VF) {
// Get and ensure we have a valid memory instruction.
@@ -4407,7 +4450,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
// already does this check. Collecting Uniforms for VF=1 does not make any
// sense.
- assert(VF >= 2 && !Uniforms.count(VF) &&
+ assert(VF >= 2 && Uniforms.find(VF) == Uniforms.end() &&
"This function should not be visited twice for the same VF");
// Visit the list of Uniforms. If we'll not find any uniform value, we'll
@@ -4494,20 +4537,20 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
// Add to the Worklist all consecutive and consecutive-like pointers that
// aren't also identified as possibly non-uniform.
for (auto *V : ConsecutiveLikePtrs)
- if (!PossibleNonUniformPtrs.count(V)) {
+ if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end()) {
LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n");
Worklist.insert(V);
}
// Expand Worklist in topological order: whenever a new instruction
- // is added , its users should be either already inside Worklist, or
- // out of scope. It ensures a uniform instruction will only be used
- // by uniform instructions or out of scope instructions.
+ // is added , its users should be already inside Worklist. It ensures
+ // a uniform instruction will only be used by uniform instructions.
unsigned idx = 0;
while (idx != Worklist.size()) {
Instruction *I = Worklist[idx++];
for (auto OV : I->operand_values()) {
+ // isOutOfScope operands cannot be uniform instructions.
if (isOutOfScope(OV))
continue;
// First order recurrence Phi's should typically be considered
@@ -4520,7 +4563,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
auto *OI = cast<Instruction>(OV);
if (llvm::all_of(OI->users(), [&](User *U) -> bool {
auto *J = cast<Instruction>(U);
- return !TheLoop->contains(J) || Worklist.count(J) ||
+ return Worklist.count(J) ||
(OI == getLoadStorePointerOperand(J) &&
isUniformDecision(J, VF));
})) {
@@ -4578,318 +4621,6 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
Uniforms[VF].insert(Worklist.begin(), Worklist.end());
}
-void InterleavedAccessInfo::collectConstStrideAccesses(
- MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
- const ValueToValueMap &Strides) {
- auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
-
- // Since it's desired that the load/store instructions be maintained in
- // "program order" for the interleaved access analysis, we have to visit the
- // blocks in the loop in reverse postorder (i.e., in a topological order).
- // Such an ordering will ensure that any load/store that may be executed
- // before a second load/store will precede the second load/store in
- // AccessStrideInfo.
- LoopBlocksDFS DFS(TheLoop);
- DFS.perform(LI);
- for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
- for (auto &I : *BB) {
- auto *LI = dyn_cast<LoadInst>(&I);
- auto *SI = dyn_cast<StoreInst>(&I);
- if (!LI && !SI)
- continue;
-
- Value *Ptr = getLoadStorePointerOperand(&I);
- // We don't check wrapping here because we don't know yet if Ptr will be
- // part of a full group or a group with gaps. Checking wrapping for all
- // pointers (even those that end up in groups with no gaps) will be overly
- // conservative. For full groups, wrapping should be ok since if we would
- // wrap around the address space we would do a memory access at nullptr
- // even without the transformation. The wrapping checks are therefore
- // deferred until after we've formed the interleaved groups.
- int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
- /*Assume=*/true, /*ShouldCheckWrap=*/false);
-
- const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
- PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
- uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
-
- // An alignment of 0 means target ABI alignment.
- unsigned Align = getMemInstAlignment(&I);
- if (!Align)
- Align = DL.getABITypeAlignment(PtrTy->getElementType());
-
- AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
- }
-}
-
-// Analyze interleaved accesses and collect them into interleaved load and
-// store groups.
-//
-// When generating code for an interleaved load group, we effectively hoist all
-// loads in the group to the location of the first load in program order. When
-// generating code for an interleaved store group, we sink all stores to the
-// location of the last store. This code motion can change the order of load
-// and store instructions and may break dependences.
-//
-// The code generation strategy mentioned above ensures that we won't violate
-// any write-after-read (WAR) dependences.
-//
-// E.g., for the WAR dependence: a = A[i]; // (1)
-// A[i] = b; // (2)
-//
-// The store group of (2) is always inserted at or below (2), and the load
-// group of (1) is always inserted at or above (1). Thus, the instructions will
-// never be reordered. All other dependences are checked to ensure the
-// correctness of the instruction reordering.
-//
-// The algorithm visits all memory accesses in the loop in bottom-up program
-// order. Program order is established by traversing the blocks in the loop in
-// reverse postorder when collecting the accesses.
-//
-// We visit the memory accesses in bottom-up order because it can simplify the
-// construction of store groups in the presence of write-after-write (WAW)
-// dependences.
-//
-// E.g., for the WAW dependence: A[i] = a; // (1)
-// A[i] = b; // (2)
-// A[i + 1] = c; // (3)
-//
-// We will first create a store group with (3) and (2). (1) can't be added to
-// this group because it and (2) are dependent. However, (1) can be grouped
-// with other accesses that may precede it in program order. Note that a
-// bottom-up order does not imply that WAW dependences should not be checked.
-void InterleavedAccessInfo::analyzeInterleaving() {
- LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
- const ValueToValueMap &Strides = LAI->getSymbolicStrides();
-
- // Holds all accesses with a constant stride.
- MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
- collectConstStrideAccesses(AccessStrideInfo, Strides);
-
- if (AccessStrideInfo.empty())
- return;
-
- // Collect the dependences in the loop.
- collectDependences();
-
- // Holds all interleaved store groups temporarily.
- SmallSetVector<InterleaveGroup *, 4> StoreGroups;
- // Holds all interleaved load groups temporarily.
- SmallSetVector<InterleaveGroup *, 4> LoadGroups;
-
- // Search in bottom-up program order for pairs of accesses (A and B) that can
- // form interleaved load or store groups. In the algorithm below, access A
- // precedes access B in program order. We initialize a group for B in the
- // outer loop of the algorithm, and then in the inner loop, we attempt to
- // insert each A into B's group if:
- //
- // 1. A and B have the same stride,
- // 2. A and B have the same memory object size, and
- // 3. A belongs in B's group according to its distance from B.
- //
- // Special care is taken to ensure group formation will not break any
- // dependences.
- for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
- BI != E; ++BI) {
- Instruction *B = BI->first;
- StrideDescriptor DesB = BI->second;
-
- // Initialize a group for B if it has an allowable stride. Even if we don't
- // create a group for B, we continue with the bottom-up algorithm to ensure
- // we don't break any of B's dependences.
- InterleaveGroup *Group = nullptr;
- if (isStrided(DesB.Stride)) {
- Group = getInterleaveGroup(B);
- if (!Group) {
- LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
- << '\n');
- Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
- }
- if (B->mayWriteToMemory())
- StoreGroups.insert(Group);
- else
- LoadGroups.insert(Group);
- }
-
- for (auto AI = std::next(BI); AI != E; ++AI) {
- Instruction *A = AI->first;
- StrideDescriptor DesA = AI->second;
-
- // Our code motion strategy implies that we can't have dependences
- // between accesses in an interleaved group and other accesses located
- // between the first and last member of the group. Note that this also
- // means that a group can't have more than one member at a given offset.
- // The accesses in a group can have dependences with other accesses, but
- // we must ensure we don't extend the boundaries of the group such that
- // we encompass those dependent accesses.
- //
- // For example, assume we have the sequence of accesses shown below in a
- // stride-2 loop:
- //
- // (1, 2) is a group | A[i] = a; // (1)
- // | A[i-1] = b; // (2) |
- // A[i-3] = c; // (3)
- // A[i] = d; // (4) | (2, 4) is not a group
- //
- // Because accesses (2) and (3) are dependent, we can group (2) with (1)
- // but not with (4). If we did, the dependent access (3) would be within
- // the boundaries of the (2, 4) group.
- if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
- // If a dependence exists and A is already in a group, we know that A
- // must be a store since A precedes B and WAR dependences are allowed.
- // Thus, A would be sunk below B. We release A's group to prevent this
- // illegal code motion. A will then be free to form another group with
- // instructions that precede it.
- if (isInterleaved(A)) {
- InterleaveGroup *StoreGroup = getInterleaveGroup(A);
- StoreGroups.remove(StoreGroup);
- releaseGroup(StoreGroup);
- }
-
- // If a dependence exists and A is not already in a group (or it was
- // and we just released it), B might be hoisted above A (if B is a
- // load) or another store might be sunk below A (if B is a store). In
- // either case, we can't add additional instructions to B's group. B
- // will only form a group with instructions that it precedes.
- break;
- }
-
- // At this point, we've checked for illegal code motion. If either A or B
- // isn't strided, there's nothing left to do.
- if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
- continue;
-
- // Ignore A if it's already in a group or isn't the same kind of memory
- // operation as B.
- // Note that mayReadFromMemory() isn't mutually exclusive to mayWriteToMemory
- // in the case of atomic loads. We shouldn't see those here, canVectorizeMemory()
- // should have returned false - except for the case we asked for optimization
- // remarks.
- if (isInterleaved(A) || (A->mayReadFromMemory() != B->mayReadFromMemory())
- || (A->mayWriteToMemory() != B->mayWriteToMemory()))
- continue;
-
- // Check rules 1 and 2. Ignore A if its stride or size is different from
- // that of B.
- if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
- continue;
-
- // Ignore A if the memory object of A and B don't belong to the same
- // address space
- if (getMemInstAddressSpace(A) != getMemInstAddressSpace(B))
- continue;
-
- // Calculate the distance from A to B.
- const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
- PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
- if (!DistToB)
- continue;
- int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
-
- // Check rule 3. Ignore A if its distance to B is not a multiple of the
- // size.
- if (DistanceToB % static_cast<int64_t>(DesB.Size))
- continue;
-
- // Ignore A if either A or B is in a predicated block. Although we
- // currently prevent group formation for predicated accesses, we may be
- // able to relax this limitation in the future once we handle more
- // complicated blocks.
- if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
- continue;
-
- // The index of A is the index of B plus A's distance to B in multiples
- // of the size.
- int IndexA =
- Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
-
- // Try to insert A into B's group.
- if (Group->insertMember(A, IndexA, DesA.Align)) {
- LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
- << " into the interleave group with" << *B
- << '\n');
- InterleaveGroupMap[A] = Group;
-
- // Set the first load in program order as the insert position.
- if (A->mayReadFromMemory())
- Group->setInsertPos(A);
- }
- } // Iteration over A accesses.
- } // Iteration over B accesses.
-
- // Remove interleaved store groups with gaps.
- for (InterleaveGroup *Group : StoreGroups)
- if (Group->getNumMembers() != Group->getFactor()) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved store group due "
- "to gaps.\n");
- releaseGroup(Group);
- }
- // Remove interleaved groups with gaps (currently only loads) whose memory
- // accesses may wrap around. We have to revisit the getPtrStride analysis,
- // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
- // not check wrapping (see documentation there).
- // FORNOW we use Assume=false;
- // TODO: Change to Assume=true but making sure we don't exceed the threshold
- // of runtime SCEV assumptions checks (thereby potentially failing to
- // vectorize altogether).
- // Additional optional optimizations:
- // TODO: If we are peeling the loop and we know that the first pointer doesn't
- // wrap then we can deduce that all pointers in the group don't wrap.
- // This means that we can forcefully peel the loop in order to only have to
- // check the first pointer for no-wrap. When we'll change to use Assume=true
- // we'll only need at most one runtime check per interleaved group.
- for (InterleaveGroup *Group : LoadGroups) {
- // Case 1: A full group. Can Skip the checks; For full groups, if the wide
- // load would wrap around the address space we would do a memory access at
- // nullptr even without the transformation.
- if (Group->getNumMembers() == Group->getFactor())
- continue;
-
- // Case 2: If first and last members of the group don't wrap this implies
- // that all the pointers in the group don't wrap.
- // So we check only group member 0 (which is always guaranteed to exist),
- // and group member Factor - 1; If the latter doesn't exist we rely on
- // peeling (if it is a non-reveresed accsess -- see Case 3).
- Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
- if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true)) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "first group member potentially pointer-wrapping.\n");
- releaseGroup(Group);
- continue;
- }
- Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
- if (LastMember) {
- Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
- if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true)) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "last group member potentially pointer-wrapping.\n");
- releaseGroup(Group);
- }
- } else {
- // Case 3: A non-reversed interleaved load group with gaps: We need
- // to execute at least one scalar epilogue iteration. This will ensure
- // we don't speculatively access memory out-of-bounds. We only need
- // to look for a member at index factor - 1, since every group must have
- // a member at index zero.
- if (Group->isReverse()) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "a reverse access with gaps.\n");
- releaseGroup(Group);
- continue;
- }
- LLVM_DEBUG(
- dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
- RequiresScalarEpilogue = true;
- }
- }
-}
-
Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
// TODO: It may by useful to do since it's still likely to be dynamically
@@ -4919,39 +4650,78 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
return None;
}
+ if (!PSE.getUnionPredicate().getPredicates().empty()) {
+ ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
+ << "runtime SCEV checks needed. Enable vectorization of this "
+ "loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz");
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Aborting. Runtime SCEV check is required with -Os/-Oz.\n");
+ return None;
+ }
+
+ // FIXME: Avoid specializing for stride==1 instead of bailing out.
+ if (!Legal->getLAI()->getSymbolicStrides().empty()) {
+ ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
+ << "runtime stride == 1 checks needed. Enable vectorization of "
+ "this loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz");
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Aborting. Runtime stride check is required with -Os/-Oz.\n");
+ return None;
+ }
+
// If we optimize the program for size, avoid creating the tail loop.
LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
- // If we don't know the precise trip count, don't try to vectorize.
- if (TC < 2) {
- ORE->emit(
- createMissedAnalysis("UnknownLoopCountComplexCFG")
- << "unable to calculate the loop count due to complex control flow");
- LLVM_DEBUG(
- dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
+ if (TC == 1) {
+ ORE->emit(createMissedAnalysis("SingleIterationLoop")
+ << "loop trip count is one, irrelevant for vectorization");
+ LLVM_DEBUG(dbgs() << "LV: Aborting, single iteration (non) loop.\n");
return None;
}
+ // Record that scalar epilogue is not allowed.
+ LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n");
+
+ IsScalarEpilogueAllowed = !OptForSize;
+
+ // We don't create an epilogue when optimizing for size.
+ // Invalidate interleave groups that require an epilogue if we can't mask
+ // the interleave-group.
+ if (!useMaskedInterleavedAccesses(TTI))
+ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
+
unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC);
- if (TC % MaxVF != 0) {
- // If the trip count that we found modulo the vectorization factor is not
- // zero then we require a tail.
- // FIXME: look for a smaller MaxVF that does divide TC rather than give up.
- // FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a
- // smaller MaxVF that does not require a scalar epilog.
-
- ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
- << "cannot optimize for size and vectorize at the "
- "same time. Enable vectorization of this loop "
- "with '#pragma clang loop vectorize(enable)' "
- "when compiling with -Os/-Oz");
- LLVM_DEBUG(
- dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
+ if (TC > 0 && TC % MaxVF == 0) {
+ LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
+ return MaxVF;
+ }
+
+ // If we don't know the precise trip count, or if the trip count that we
+ // found modulo the vectorization factor is not zero, try to fold the tail
+ // by masking.
+ // FIXME: look for a smaller MaxVF that does divide TC rather than masking.
+ if (Legal->canFoldTailByMasking()) {
+ FoldTailByMasking = true;
+ return MaxVF;
+ }
+
+ if (TC == 0) {
+ ORE->emit(
+ createMissedAnalysis("UnknownLoopCountComplexCFG")
+ << "unable to calculate the loop count due to complex control flow");
return None;
}
- return MaxVF;
+ ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
+ << "cannot optimize for size and vectorize at the same time. "
+ "Enable vectorization of this loop with '#pragma clang loop "
+ "vectorize(enable)' when compiling with -Os/-Oz");
+ return None;
}
unsigned
@@ -5087,11 +4857,11 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
// For each instruction in the loop.
- for (Instruction &I : *BB) {
+ for (Instruction &I : BB->instructionsWithoutDebug()) {
Type *T = I.getType();
// Skip ignored values.
- if (ValuesToIgnore.count(&I))
+ if (ValuesToIgnore.find(&I) != ValuesToIgnore.end())
continue;
// Only examine Loads, Stores and PHINodes.
@@ -5189,6 +4959,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
// fit without causing spills. All of this is rounded down if necessary to be
// a power of two. We want power of two interleave count to simplify any
// addressing operations or alignment considerations.
+ // We also want power of two interleave counts to ensure that the induction
+ // variable of the vector loop wraps to zero, when tail is folded by masking;
+ // this currently happens when OptForSize, in which case IC is set to 1 above.
unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
R.MaxLocalUsers);
@@ -5314,7 +5087,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
using IntervalMap = DenseMap<Instruction *, unsigned>;
// Maps instruction to its index.
- DenseMap<unsigned, Instruction *> IdxToInstr;
+ SmallVector<Instruction *, 64> IdxToInstr;
// Marks the end of each interval.
IntervalMap EndPoint;
// Saves the list of instruction indices that are used in the loop.
@@ -5323,10 +5096,9 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
// defined outside the loop, such as arguments and constants.
SmallPtrSet<Value *, 8> LoopInvariants;
- unsigned Index = 0;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
- for (Instruction &I : *BB) {
- IdxToInstr[Index++] = &I;
+ for (Instruction &I : BB->instructionsWithoutDebug()) {
+ IdxToInstr.push_back(&I);
// Save the end location of each USE.
for (Value *U : I.operands()) {
@@ -5343,7 +5115,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
}
// Overwrite previous end points.
- EndPoint[Instr] = Index;
+ EndPoint[Instr] = IdxToInstr.size();
Ends.insert(Instr);
}
}
@@ -5380,7 +5152,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
return std::max<unsigned>(1, VF * TypeSize / WidestRegister);
};
- for (unsigned int i = 0; i < Index; ++i) {
+ for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {
Instruction *I = IdxToInstr[i];
// Remove all of the instructions that end at this location.
@@ -5389,11 +5161,11 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
OpenIntervals.erase(ToRemove);
// Ignore instructions that are never used within the loop.
- if (!Ends.count(I))
+ if (Ends.find(I) == Ends.end())
continue;
// Skip ignored values.
- if (ValuesToIgnore.count(I))
+ if (ValuesToIgnore.find(I) != ValuesToIgnore.end())
continue;
// For each VF find the maximum usage of registers.
@@ -5407,7 +5179,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
unsigned RegUsage = 0;
for (auto Inst : OpenIntervals) {
// Skip ignored values for VF > 1.
- if (VecValuesToIgnore.count(Inst) ||
+ if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() ||
isScalarAfterVectorization(Inst, VFs[j]))
continue;
RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
@@ -5453,8 +5225,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
// from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
- assert(isScalarWithPredication(I) &&
- "Expecting a scalar emulated instruction");
+ assert(isPredicatedInst(I) && "Expecting a scalar emulated instruction");
return isa<LoadInst>(I) ||
(isa<StoreInst>(I) &&
NumPredStores > NumberOfStoresToPredicate);
@@ -5465,7 +5236,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
// instructions to scalarize, there's nothing to do. Collection may already
// have occurred if we have a user-selected VF and are now computing the
// expected cost for interleaving.
- if (VF < 2 || InstsToScalarize.count(VF))
+ if (VF < 2 || InstsToScalarize.find(VF) != InstsToScalarize.end())
return;
// Initialize a mapping for VF in InstsToScalalarize. If we find that it's
@@ -5477,7 +5248,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
// determine if it would be better to not if-convert the blocks they are in.
// If so, we also record the instructions to scalarize.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (!Legal->blockNeedsPredication(BB))
+ if (!blockNeedsPredication(BB))
continue;
for (Instruction &I : *BB)
if (isScalarWithPredication(&I)) {
@@ -5560,7 +5331,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
Instruction *I = Worklist.pop_back_val();
// If we've already analyzed the instruction, there's nothing to do.
- if (ScalarCosts.count(I))
+ if (ScalarCosts.find(I) != ScalarCosts.end())
continue;
// Compute the cost of the vector instruction. Note that this cost already
@@ -5619,8 +5390,8 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
// For each instruction in the old loop.
for (Instruction &I : BB->instructionsWithoutDebug()) {
// Skip ignored values.
- if (ValuesToIgnore.count(&I) ||
- (VF > 1 && VecValuesToIgnore.count(&I)))
+ if (ValuesToIgnore.find(&I) != ValuesToIgnore.end() ||
+ (VF > 1 && VecValuesToIgnore.find(&I) != VecValuesToIgnore.end()))
continue;
VectorizationCostTy C = getInstructionCost(&I, VF);
@@ -5642,7 +5413,7 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
// unconditionally executed. For the scalar case, we may not always execute
// the predicated block. Thus, scale the block's cost by the probability of
// executing it.
- if (VF == 1 && Legal->blockNeedsPredication(BB))
+ if (VF == 1 && blockNeedsPredication(BB))
BlockCost.first /= getReciprocalPredBlockProb();
Cost.first += BlockCost.first;
@@ -5689,11 +5460,12 @@ static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
unsigned VF) {
+ assert(VF > 1 && "Scalarization cost of instruction implies vectorization.");
Type *ValTy = getMemInstValueType(I);
auto SE = PSE.getSE();
- unsigned Alignment = getMemInstAlignment(I);
- unsigned AS = getMemInstAddressSpace(I);
+ unsigned Alignment = getLoadStoreAlignment(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
Value *Ptr = getLoadStorePointerOperand(I);
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@@ -5704,9 +5476,11 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// Get the cost of the scalar memory instruction and address computation.
unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
+ // Don't pass *I here, since it is scalar but will actually be part of a
+ // vectorized loop where the user of it is a vectorized instruction.
Cost += VF *
TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
- AS, I);
+ AS);
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
@@ -5715,7 +5489,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// If we have a predicated store, it may not be executed for each vector
// lane. Scale the cost by the probability of executing the predicated
// block.
- if (isScalarWithPredication(I)) {
+ if (isPredicatedInst(I)) {
Cost /= getReciprocalPredBlockProb();
if (useEmulatedMaskMemRefHack(I))
@@ -5731,9 +5505,9 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned Alignment = getMemInstAlignment(I);
+ unsigned Alignment = getLoadStoreAlignment(I);
Value *Ptr = getLoadStorePointerOperand(I);
- unsigned AS = getMemInstAddressSpace(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
@@ -5752,22 +5526,30 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
unsigned VF) {
- LoadInst *LI = cast<LoadInst>(I);
- Type *ValTy = LI->getType();
+ Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned Alignment = LI->getAlignment();
- unsigned AS = LI->getPointerAddressSpace();
+ unsigned Alignment = getLoadStoreAlignment(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
+ if (isa<LoadInst>(I)) {
+ return TTI.getAddressComputationCost(ValTy) +
+ TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
+ TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
+ }
+ StoreInst *SI = cast<StoreInst>(I);
+ bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) +
- TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
- TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
+ TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
+ (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost(
+ Instruction::ExtractElement,
+ VectorTy, VF - 1));
}
unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned Alignment = getMemInstAlignment(I);
+ unsigned Alignment = getLoadStoreAlignment(I);
Value *Ptr = getLoadStorePointerOperand(I);
return TTI.getAddressComputationCost(VectorTy) +
@@ -5779,7 +5561,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned AS = getMemInstAddressSpace(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
auto Group = getInterleavedAccessGroup(I);
assert(Group && "Fail to get an interleaved access group.");
@@ -5797,13 +5579,19 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
}
// Calculate the cost of the whole interleaved group.
- unsigned Cost = TTI.getInterleavedMemoryOpCost(I->getOpcode(), WideVecTy,
- Group->getFactor(), Indices,
- Group->getAlignment(), AS);
-
- if (Group->isReverse())
+ bool UseMaskForGaps =
+ Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed;
+ unsigned Cost = TTI.getInterleavedMemoryOpCost(
+ I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
+ Group->getAlignment(), AS, Legal->isMaskRequired(I), UseMaskForGaps);
+
+ if (Group->isReverse()) {
+ // TODO: Add support for reversed masked interleaved access.
+ assert(!Legal->isMaskRequired(I) &&
+ "Reverse masked interleaved access not supported.");
Cost += Group->getNumMembers() *
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
+ }
return Cost;
}
@@ -5813,8 +5601,8 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
// moment.
if (VF == 1) {
Type *ValTy = getMemInstValueType(I);
- unsigned Alignment = getMemInstAlignment(I);
- unsigned AS = getMemInstAddressSpace(I);
+ unsigned Alignment = getLoadStoreAlignment(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
@@ -5833,9 +5621,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return VectorizationCostTy(InstsToScalarize[VF][I], false);
// Forced scalars do not have any scalarization overhead.
- if (VF > 1 && ForcedScalars.count(VF) &&
- ForcedScalars.find(VF)->second.count(I))
- return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false);
+ auto ForcedScalar = ForcedScalars.find(VF);
+ if (VF > 1 && ForcedScalar != ForcedScalars.end()) {
+ auto InstSet = ForcedScalar->second;
+ if (InstSet.find(I) != InstSet.end())
+ return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false);
+ }
Type *VectorTy;
unsigned C = getInstructionCost(I, VF, VectorTy);
@@ -5856,10 +5647,22 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
if (!Ptr)
continue;
+ // TODO: We should generate better code and update the cost model for
+ // predicated uniform stores. Today they are treated as any other
+ // predicated store (see added test cases in
+ // invariant-store-vectorization.ll).
if (isa<StoreInst>(&I) && isScalarWithPredication(&I))
NumPredStores++;
- if (isa<LoadInst>(&I) && Legal->isUniform(Ptr)) {
- // Scalar load + broadcast
+
+ if (Legal->isUniform(Ptr) &&
+ // Conditional loads and stores should be scalarized and predicated.
+ // isScalarWithPredication cannot be used here since masked
+ // gather/scatters are not considered scalar with predication.
+ !Legal->blockNeedsPredication(I.getParent())) {
+ // TODO: Avoid replicating loads and stores instead of
+ // relying on instcombine to remove them.
+ // Load: Scalar load + broadcast
+ // Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract
unsigned Cost = getUniformMemOpCost(&I, VF);
setWideningDecision(&I, VF, CM_Scalarize, Cost);
continue;
@@ -5890,7 +5693,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
continue;
NumAccesses = Group->getNumMembers();
- InterleaveCost = getInterleaveGroupCost(&I, VF);
+ if (interleavedAccessCanBeWidened(&I, VF))
+ InterleaveCost = getInterleaveGroupCost(&I, VF);
}
unsigned GatherScatterCost =
@@ -6008,8 +5812,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
bool ScalarPredicatedBB = false;
BranchInst *BI = cast<BranchInst>(I);
if (VF > 1 && BI->isConditional() &&
- (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) ||
- PredicatedBBsAfterVectorization.count(BI->getSuccessor(1))))
+ (PredicatedBBsAfterVectorization.find(BI->getSuccessor(0)) !=
+ PredicatedBBsAfterVectorization.end() ||
+ PredicatedBBsAfterVectorization.find(BI->getSuccessor(1)) !=
+ PredicatedBBsAfterVectorization.end()))
ScalarPredicatedBB = true;
if (ScalarPredicatedBB) {
@@ -6032,9 +5838,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
auto *Phi = cast<PHINode>(I);
// First-order recurrences are replaced by vector shuffles inside the loop.
+ // NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
if (VF > 1 && Legal->isFirstOrderRecurrence(Phi))
return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- VectorTy, VF - 1, VectorTy);
+ VectorTy, VF - 1, VectorType::get(RetTy, 1));
// Phi nodes in non-header blocks (not inductions, reductions, etc.) are
// converted into select instructions. We require N - 1 selects per phi
@@ -6096,38 +5903,18 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
return 0;
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
- TargetTransformInfo::OperandValueKind Op1VK =
- TargetTransformInfo::OK_AnyValue;
- TargetTransformInfo::OperandValueKind Op2VK =
- TargetTransformInfo::OK_AnyValue;
- TargetTransformInfo::OperandValueProperties Op1VP =
- TargetTransformInfo::OP_None;
- TargetTransformInfo::OperandValueProperties Op2VP =
- TargetTransformInfo::OP_None;
Value *Op2 = I->getOperand(1);
-
- // Check for a splat or for a non uniform vector of constants.
- if (isa<ConstantInt>(Op2)) {
- ConstantInt *CInt = cast<ConstantInt>(Op2);
- if (CInt && CInt->getValue().isPowerOf2())
- Op2VP = TargetTransformInfo::OP_PowerOf2;
- Op2VK = TargetTransformInfo::OK_UniformConstantValue;
- } else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
- Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
- Constant *SplatValue = cast<Constant>(Op2)->getSplatValue();
- if (SplatValue) {
- ConstantInt *CInt = dyn_cast<ConstantInt>(SplatValue);
- if (CInt && CInt->getValue().isPowerOf2())
- Op2VP = TargetTransformInfo::OP_PowerOf2;
- Op2VK = TargetTransformInfo::OK_UniformConstantValue;
- }
- } else if (Legal->isUniform(Op2)) {
+ TargetTransformInfo::OperandValueProperties Op2VP;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TTI.getOperandInfo(Op2, Op2VP);
+ if (Op2VK == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
Op2VK = TargetTransformInfo::OK_UniformValue;
- }
+
SmallVector<const Value *, 4> Operands(I->operand_values());
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
- return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
- Op2VK, Op1VP, Op2VP, Operands);
+ return N * TTI.getArithmeticInstrCost(
+ I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+ Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -6244,8 +6031,9 @@ INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
-Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
- return new LoopVectorize(NoUnrolling, AlwaysVectorize);
+Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
+ bool VectorizeOnlyWhenForced) {
+ return new LoopVectorize(InterleaveOnlyWhenForced, VectorizeOnlyWhenForced);
}
} // end namespace llvm
@@ -6323,6 +6111,16 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
return NoVectorization;
+ // Invalidate interleave groups if all blocks of loop will be predicated.
+ if (CM.blockNeedsPredication(OrigLoop->getHeader()) &&
+ !useMaskedInterleavedAccesses(*TTI)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Invalidate all interleaved groups due to fold-tail by masking "
+ "which requires masked-interleaved support.\n");
+ CM.InterleaveInfo.reset();
+ }
+
if (UserVF) {
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
@@ -6379,6 +6177,7 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
DT, ILV.Builder, ILV.VectorLoopValueMap,
&ILV, CallbackILV};
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
+ State.TripCount = ILV.getOrCreateTripCount(nullptr);
//===------------------------------------------------===//
//
@@ -6415,7 +6214,8 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
PHINode *Ind = Induction.first;
auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
- return U == Ind || DeadInstructions.count(cast<Instruction>(U));
+ return U == Ind || DeadInstructions.find(cast<Instruction>(U)) !=
+ DeadInstructions.end();
}))
DeadInstructions.insert(IndUpdate);
@@ -6558,9 +6358,17 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
// load/store/gather/scatter. Initialize BlockMask to no-mask.
VPValue *BlockMask = nullptr;
- // Loop incoming mask is all-one.
- if (OrigLoop->getHeader() == BB)
+ if (OrigLoop->getHeader() == BB) {
+ if (!CM.blockNeedsPredication(BB))
+ return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
+
+ // Introduce the early-exit compare IV <= BTC to form header block mask.
+ // This is used instead of IV < TC because TC may wrap, unlike BTC.
+ VPValue *IV = Plan->getVPValue(Legal->getPrimaryInduction());
+ VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
+ BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC});
return BlockMaskCache[BB] = BlockMask;
+ }
// This is the block mask. We OR all incoming edges.
for (auto *Predecessor : predecessors(BB)) {
@@ -6580,8 +6388,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
}
VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
- VFRange &Range) {
- const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I);
+ VFRange &Range,
+ VPlanPtr &Plan) {
+ const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
if (!IG)
return nullptr;
@@ -6602,7 +6411,11 @@ VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
assert(I == IG->getInsertPos() &&
"Generating a recipe for an adjunct member of an interleave group");
- return new VPInterleaveRecipe(IG);
+ VPValue *Mask = nullptr;
+ if (Legal->isMaskRequired(I))
+ Mask = createBlockInMask(I->getParent(), Plan);
+
+ return new VPInterleaveRecipe(IG, Mask);
}
VPWidenMemoryInstructionRecipe *
@@ -6695,7 +6508,11 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(Instruction *I, VPlanPtr &Plan) {
bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
VFRange &Range) {
- if (CM.isScalarWithPredication(I))
+
+ bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
+
+ if (IsPredicated)
return false;
auto IsVectorizableOpcode = [](unsigned Opcode) {
@@ -6802,7 +6619,9 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
[&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
- bool IsPredicated = CM.isScalarWithPredication(I);
+ bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
+
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
// Find if I uses a predicated instruction. If so, it will use its scalar
@@ -6864,7 +6683,7 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
VPRecipeBase *Recipe = nullptr;
// Check if Instr should belong to an interleave memory recipe, or already
// does. In the latter case Instr is irrelevant.
- if ((Recipe = tryToInterleaveMemory(Instr, Range))) {
+ if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
VPBB->appendRecipe(Recipe);
return true;
}
@@ -6915,6 +6734,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
NeedDef.insert(Branch->getCondition());
}
+ // If the tail is to be folded by masking, the primary induction variable
+ // needs to be represented in VPlan for it to model early-exit masking.
+ if (CM.foldTailByMasking())
+ NeedDef.insert(Legal->getPrimaryInduction());
+
// Collect instructions from the original loop that will become trivially dead
// in the vectorized loop. We don't need to vectorize these instructions. For
// example, original induction update instructions can become dead because we
@@ -6976,18 +6800,21 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// First filter out irrelevant instructions, to ensure no recipes are
// built for them.
- if (isa<BranchInst>(Instr) || DeadInstructions.count(Instr))
+ if (isa<BranchInst>(Instr) ||
+ DeadInstructions.find(Instr) != DeadInstructions.end())
continue;
// I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
// member of the IG, do not construct any Recipe for it.
- const InterleaveGroup *IG = CM.getInterleavedAccessGroup(Instr);
+ const InterleaveGroup<Instruction> *IG =
+ CM.getInterleavedAccessGroup(Instr);
if (IG && Instr != IG->getInsertPos() &&
Range.Start >= 2 && // Query is illegal for VF == 1
CM.getWideningDecision(Instr, Range.Start) ==
LoopVectorizationCostModel::CM_Interleave) {
- if (SinkAfterInverse.count(Instr))
- Ingredients.push_back(SinkAfterInverse.find(Instr)->second);
+ auto SinkCandidate = SinkAfterInverse.find(Instr);
+ if (SinkCandidate != SinkAfterInverse.end())
+ Ingredients.push_back(SinkCandidate->second);
continue;
}
@@ -7070,6 +6897,13 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
+ SmallPtrSet<Instruction *, 1> DeadInstructions;
+ VPlanHCFGTransforms::VPInstructionsToVPRecipes(
+ Plan, Legal->getInductionVars(), DeadInstructions);
+
+ for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
+ Plan->addVF(VF);
+
return Plan;
}
@@ -7082,6 +6916,10 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const {
O << " +\n"
<< Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
IG->getInsertPos()->printAsOperand(O, false);
+ if (User) {
+ O << ", ";
+ User->getOperand(0)->printAsOperand(O);
+ }
O << "\\l\"";
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *I = IG->getMember(i))
@@ -7144,7 +6982,15 @@ void VPBlendRecipe::execute(VPTransformState &State) {
void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");
- State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
+ if (!User)
+ return State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
+
+ // Last (and currently only) operand is a mask.
+ InnerLoopVectorizer::VectorParts MaskValues(State.UF);
+ VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ MaskValues[Part] = State.get(Mask, Part);
+ State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), &MaskValues);
}
void VPReplicateRecipe::execute(VPTransformState &State) {
@@ -7271,11 +7117,26 @@ static bool processLoopInVPlanNativePath(
Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
// Plan how to best vectorize, return the best VF and its cost.
- LVP.planInVPlanNativePath(OptForSize, UserVF);
+ VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
- // Returning false. We are currently not generating vector code in the VPlan
- // native path.
- return false;
+ // If we are stress testing VPlan builds, do not attempt to generate vector
+ // code.
+ if (VPlanBuildStressTest)
+ return false;
+
+ LVP.setBestPlan(VF.Width, 1);
+
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, UserVF, 1, LVL,
+ &CM);
+ LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
+ << L->getHeader()->getParent()->getName() << "\"\n");
+ LVP.executePlan(LB, DT);
+
+ // Mark the loop as already vectorized to avoid vectorizing again.
+ Hints.setAlreadyVectorized();
+
+ LLVM_DEBUG(verifyFunction(*L->getHeader()->getParent()));
+ return true;
}
bool LoopVectorizePass::processLoop(Loop *L) {
@@ -7290,7 +7151,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
<< L->getHeader()->getParent()->getName() << "\" from "
<< DebugLocStr << "\n");
- LoopVectorizeHints Hints(L, DisableUnrolling, *ORE);
+ LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE);
LLVM_DEBUG(
dbgs() << "LV: Loop hints:"
@@ -7314,7 +7175,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// less verbose reporting vectorized loops and unvectorized loops that may
// benefit from vectorization, respectively.
- if (!Hints.allowVectorization(F, L, AlwaysVectorize)) {
+ if (!Hints.allowVectorization(F, L, VectorizeOnlyWhenForced)) {
LLVM_DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n");
return false;
}
@@ -7327,7 +7188,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
&Requirements, &Hints, DB, AC);
if (!LVL.canVectorize(EnableVPlanNativePath)) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
- emitMissedWarning(F, L, Hints, ORE);
+ Hints.emitRemarkWithHints();
return false;
}
@@ -7400,7 +7261,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(),
"NoImplicitFloat", L)
<< "loop not vectorized due to NoImplicitFloat attribute");
- emitMissedWarning(F, L, Hints, ORE);
+ Hints.emitRemarkWithHints();
return false;
}
@@ -7415,7 +7276,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ORE->emit(
createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
<< "loop not vectorized due to unsafe FP support.");
- emitMissedWarning(F, L, Hints, ORE);
+ Hints.emitRemarkWithHints();
return false;
}
@@ -7428,7 +7289,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Analyze interleaved memory accesses.
if (UseInterleaved) {
- IAI.analyzeInterleaving();
+ IAI.analyzeInterleaving(useMaskedInterleavedAccesses(*TTI));
}
// Use the cost model.
@@ -7457,7 +7318,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (Requirements.doesNotMeet(F, L, Hints)) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
"requirements.\n");
- emitMissedWarning(F, L, Hints, ORE);
+ Hints.emitRemarkWithHints();
return false;
}
@@ -7534,6 +7395,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LVP.setBestPlan(VF.Width, IC);
using namespace ore;
+ bool DisableRuntimeUnroll = false;
+ MDNode *OrigLoopID = L->getLoopID();
if (!VectorizeLoop) {
assert(IC > 1 && "interleave count should not be 1 or 0");
@@ -7560,7 +7423,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// no runtime checks about strides and memory. A scalar loop that is
// rarely used is not worth unrolling.
if (!LB.areSafetyChecksAdded())
- AddRuntimeUnrollDisableMetaData(L);
+ DisableRuntimeUnroll = true;
// Report the vectorization decision.
ORE->emit([&]() {
@@ -7572,8 +7435,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
});
}
- // Mark the loop as already vectorized to avoid vectorizing again.
- Hints.setAlreadyVectorized();
+ Optional<MDNode *> RemainderLoopID =
+ makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
+ LLVMLoopVectorizeFollowupEpilogue});
+ if (RemainderLoopID.hasValue()) {
+ L->setLoopID(RemainderLoopID.getValue());
+ } else {
+ if (DisableRuntimeUnroll)
+ AddRuntimeUnrollDisableMetaData(L);
+
+ // Mark the loop as already vectorized to avoid vectorizing again.
+ Hints.setAlreadyVectorized();
+ }
LLVM_DEBUG(verifyFunction(*L->getHeader()->getParent()));
return true;
@@ -7666,8 +7539,15 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
- PA.preserve<LoopAnalysis>();
- PA.preserve<DominatorTreeAnalysis>();
+
+ // We currently do not preserve loopinfo/dominator analyses with outer loop
+ // vectorization. Until this is addressed, mark these analyses as preserved
+ // only for non-VPlan-native path.
+ // TODO: Preserve Loop and Dominator analyses for VPlan-native path.
+ if (!EnableVPlanNativePath) {
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ }
PA.preserve<BasicAA>();
PA.preserve<GlobalsAA>();
return PA;
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 32df6d581577..2e856a7e6802 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1536,12 +1536,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check for terminator values (e.g. invoke).
for (unsigned j = 0; j < VL.size(); ++j)
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
- TerminatorInst *Term = dyn_cast<TerminatorInst>(
- cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
- if (Term) {
- LLVM_DEBUG(
- dbgs()
- << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
+ Instruction *Term = dyn_cast<Instruction>(
+ cast<PHINode>(VL[j])->getIncomingValueForBlock(
+ PH->getIncomingBlock(i)));
+ if (Term && Term->isTerminator()) {
+ LLVM_DEBUG(dbgs()
+ << "SLP: Need to swizzle PHINodes (terminator use).\n");
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
return;
@@ -2164,7 +2164,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// extractelement/ext pair.
DeadCost -= TTI->getExtractWithExtendCost(
Ext->getOpcode(), Ext->getType(), VecTy, i);
- // Add back the cost of s|zext which is subtracted seperately.
+ // Add back the cost of s|zext which is subtracted separately.
DeadCost += TTI->getCastInstrCost(
Ext->getOpcode(), Ext->getType(), E->getType(), Ext);
continue;
@@ -2536,13 +2536,13 @@ int BoUpSLP::getTreeCost() {
// uses. However, we should not compute the cost of duplicate sequences.
// For example, if we have a build vector (i.e., insertelement sequence)
// that is used by more than one vector instruction, we only need to
- // compute the cost of the insertelement instructions once. The redundent
+ // compute the cost of the insertelement instructions once. The redundant
// instructions will be eliminated by CSE.
//
// We should consider not creating duplicate tree entries for gather
// sequences, and instead add additional edges to the tree representing
// their uses. Since such an approach results in fewer total entries,
- // existing heuristics based on tree size may yeild different results.
+ // existing heuristics based on tree size may yield different results.
//
if (TE.NeedToGather &&
std::any_of(std::next(VectorizableTree.begin(), I + 1),
@@ -3643,6 +3643,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
auto &Locs = ExternallyUsedValues[Scalar];
ExternallyUsedValues.insert({Ex, Locs});
ExternallyUsedValues.erase(Scalar);
+ // Required to update internally referenced instructions.
+ Scalar->replaceAllUsesWith(Ex);
continue;
}
@@ -3652,7 +3654,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
if (PHINode *PH = dyn_cast<PHINode>(User)) {
for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
if (PH->getIncomingValue(i) == Scalar) {
- TerminatorInst *IncomingTerminator =
+ Instruction *IncomingTerminator =
PH->getIncomingBlock(i)->getTerminator();
if (isa<CatchSwitchInst>(IncomingTerminator)) {
Builder.SetInsertPoint(VecI->getParent(),
@@ -3960,7 +3962,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
ScheduleEnd = I->getNextNode();
if (isOneOf(S, I) != I)
CheckSheduleForI(I);
- assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
+ assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
return true;
}
@@ -3996,7 +3998,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
ScheduleEnd = I->getNextNode();
if (isOneOf(S, I) != I)
CheckSheduleForI(I);
- assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
+ assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I
<< "\n");
return true;
@@ -4267,7 +4269,7 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
Worklist.push_back(I);
// Traverse the expression tree in bottom-up order looking for loads. If we
- // encounter an instruciton we don't yet handle, we give up.
+ // encounter an instruction we don't yet handle, we give up.
auto MaxWidth = 0u;
auto FoundUnknownInst = false;
while (!Worklist.empty() && !FoundUnknownInst) {
@@ -4840,7 +4842,7 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
continue;
if (GEP->getType()->isVectorTy())
continue;
- GEPs[GetUnderlyingObject(GEP->getPointerOperand(), *DL)].push_back(GEP);
+ GEPs[GEP->getPointerOperand()].push_back(GEP);
}
}
}
@@ -5126,9 +5128,12 @@ class HorizontalReduction {
/// Checks if the reduction operation can be vectorized.
bool isVectorizable() const {
return LHS && RHS &&
- // We currently only support adds && min/max reductions.
+ // We currently only support add/mul/logical && min/max reductions.
((Kind == RK_Arithmetic &&
- (Opcode == Instruction::Add || Opcode == Instruction::FAdd)) ||
+ (Opcode == Instruction::Add || Opcode == Instruction::FAdd ||
+ Opcode == Instruction::Mul || Opcode == Instruction::FMul ||
+ Opcode == Instruction::And || Opcode == Instruction::Or ||
+ Opcode == Instruction::Xor)) ||
((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
(Kind == RK_Min || Kind == RK_Max)) ||
(Opcode == Instruction::ICmp &&
@@ -5450,7 +5455,7 @@ class HorizontalReduction {
}
};
- Instruction *ReductionRoot = nullptr;
+ WeakTrackingVH ReductionRoot;
/// The operation data of the reduction operation.
OperationData ReductionData;
@@ -5735,7 +5740,7 @@ public:
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
Value *VectorizedTree = nullptr;
- IRBuilder<> Builder(ReductionRoot);
+ IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
FastMathFlags Unsafe;
Unsafe.setFast();
Builder.setFastMathFlags(Unsafe);
@@ -5744,8 +5749,13 @@ public:
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
// The same extra argument may be used several time, so log each attempt
// to use it.
- for (auto &Pair : ExtraArgs)
+ for (auto &Pair : ExtraArgs) {
+ assert(Pair.first && "DebugLoc must be set.");
ExternallyUsedValues[Pair.second].push_back(Pair.first);
+ }
+ // The reduction root is used as the insertion point for new instructions,
+ // so set it as externally used to prevent it from being deleted.
+ ExternallyUsedValues[ReductionRoot];
SmallVector<Value *, 16> IgnoreList;
for (auto &V : ReductionOps)
IgnoreList.append(V.begin(), V.end());
@@ -5797,6 +5807,7 @@ public:
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
// Emit a reduction.
+ Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
if (VectorizedTree) {
@@ -5823,8 +5834,6 @@ public:
VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps);
}
for (auto &Pair : ExternallyUsedValues) {
- assert(!Pair.second.empty() &&
- "At least one DebugLoc must be inserted");
// Add each externally used value to the final reduction.
for (auto *I : Pair.second) {
Builder.SetCurrentDebugLocation(I->getDebugLoc());
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index f43a8bb123b1..15d38ac9c84c 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -69,7 +69,8 @@ public:
/// \return value is <true, nullptr>, as it is handled by another recipe.
/// \p Range.End may be decreased to ensure same decision from \p Range.Start
/// to \p Range.End.
- VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range);
+ VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
+ VPlanPtr &Plan);
/// Check if \I is a memory instruction to be widened for \p Range.Start and
/// potentially masked. Such instructions are handled by a recipe that takes
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0780e70809d0..05a5400beb4e 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -44,6 +44,7 @@
#include <vector>
using namespace llvm;
+extern cl::opt<bool> EnableVPlanNativePath;
#define DEBUG_TYPE "vplan"
@@ -124,6 +125,20 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
auto &PredVPSuccessors = PredVPBB->getSuccessors();
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
+
+ // In outer loop vectorization scenario, the predecessor BBlock may not yet
+ // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
+ // vectorization. We do not encounter this case in inner loop vectorization
+ // as we start out by building a loop skeleton with the vector loop header
+ // and latch blocks. As a result, we never enter this function for the
+ // header block in the non VPlan-native path.
+ if (!PredBB) {
+ assert(EnableVPlanNativePath &&
+ "Unexpected null predecessor in non VPlan-native path");
+ CFG.VPBBsToFix.push_back(PredVPBB);
+ continue;
+ }
+
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
@@ -185,6 +200,31 @@ void VPBasicBlock::execute(VPTransformState *State) {
for (VPRecipeBase &Recipe : Recipes)
Recipe.execute(*State);
+ VPValue *CBV;
+ if (EnableVPlanNativePath && (CBV = getCondBit())) {
+ Value *IRCBV = CBV->getUnderlyingValue();
+ assert(IRCBV && "Unexpected null underlying value for condition bit");
+
+ // Condition bit value in a VPBasicBlock is used as the branch selector. In
+ // the VPlan-native path case, since all branches are uniform we generate a
+ // branch instruction using the condition value from vector lane 0 and dummy
+ // successors. The successors are fixed later when the successor blocks are
+ // visited.
+ Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0);
+ NewCond = State->Builder.CreateExtractElement(NewCond,
+ State->Builder.getInt32(0));
+
+ // Replace the temporary unreachable terminator with the new conditional
+ // branch.
+ auto *CurrentTerminator = NewBB->getTerminator();
+ assert(isa<UnreachableInst>(CurrentTerminator) &&
+ "Expected to replace unreachable terminator with conditional "
+ "branch.");
+ auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
+ CondBr->setSuccessor(0, nullptr);
+ ReplaceInstWithInst(CurrentTerminator, CondBr);
+ }
+
LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
}
@@ -194,6 +234,20 @@ void VPRegionBlock::execute(VPTransformState *State) {
if (!isReplicator()) {
// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
+ if (EnableVPlanNativePath) {
+ // The inner loop vectorization path does not represent loop preheader
+ // and exit blocks as part of the VPlan. In the VPlan-native path, skip
+ // vectorizing loop preheader block. In future, we may replace this
+ // check with the check for loop preheader.
+ if (Block->getNumPredecessors() == 0)
+ continue;
+
+ // Skip vectorizing loop exit block. In future, we may replace this
+ // check with the check for loop exit.
+ if (Block->getNumSuccessors() == 0)
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
Block->execute(State);
}
@@ -249,6 +303,13 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, V, Part);
break;
}
+ case VPInstruction::ICmpULE: {
+ Value *IV = State.get(getOperand(0), Part);
+ Value *TC = State.get(getOperand(1), Part);
+ Value *V = Builder.CreateICmpULE(IV, TC);
+ State.set(this, V, Part);
+ break;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -274,6 +335,15 @@ void VPInstruction::print(raw_ostream &O) const {
case VPInstruction::Not:
O << "not";
break;
+ case VPInstruction::ICmpULE:
+ O << "icmp ule";
+ break;
+ case VPInstruction::SLPLoad:
+ O << "combined load";
+ break;
+ case VPInstruction::SLPStore:
+ O << "combined store";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -288,6 +358,15 @@ void VPInstruction::print(raw_ostream &O) const {
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
+ // -1. Check if the backedge taken count is needed, and if so build it.
+ if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
+ Value *TC = State->TripCount;
+ IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
+ auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
+ "trip.count.minus.1");
+ Value2VPValue[TCMO] = BackedgeTakenCount;
+ }
+
// 0. Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;
@@ -319,11 +398,32 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
+ // Setup branch terminator successors for VPBBs in VPBBsToFix based on
+ // VPBB's successors.
+ for (auto VPBB : State->CFG.VPBBsToFix) {
+ assert(EnableVPlanNativePath &&
+ "Unexpected VPBBsToFix in non VPlan-native path");
+ BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
+ assert(BB && "Unexpected null basic block for VPBB");
+
+ unsigned Idx = 0;
+ auto *BBTerminator = BB->getTerminator();
+
+ for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
+ VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
+ BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
+ ++Idx;
+ }
+ }
+
// 3. Merge the temporary latch created with the last basic-block filled.
BasicBlock *LastBB = State->CFG.PrevBB;
// Connect LastBB to VectorLatchBB to facilitate their merge.
- assert(isa<UnreachableInst>(LastBB->getTerminator()) &&
- "Expected VPlan CFG to terminate with unreachable");
+ assert((EnableVPlanNativePath ||
+ isa<UnreachableInst>(LastBB->getTerminator())) &&
+ "Expected InnerLoop VPlan CFG to terminate with unreachable");
+ assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
+ "Expected VPlan CFG to terminate with branch in NativePath");
LastBB->getTerminator()->eraseFromParent();
BranchInst::Create(VectorLatchBB, LastBB);
@@ -333,7 +433,9 @@ void VPlan::execute(VPTransformState *State) {
assert(Merged && "Could not merge last basic block with latch.");
VectorLatchBB = LastBB;
- updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
+ // We do not attempt to preserve DT for outer loop vectorization currently.
+ if (!EnableVPlanNativePath)
+ updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
}
void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
@@ -366,7 +468,7 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
"One successor of a basic block does not lead to the other.");
assert(InterimSucc->getSinglePredecessor() &&
"Interim successor has more than one predecessor.");
- assert(pred_size(PostDomSucc) == 2 &&
+ assert(PostDomSucc->hasNPredecessors(2) &&
"PostDom successor has more than two predecessors.");
DT->addNewBlock(InterimSucc, BB);
DT->addNewBlock(PostDomSucc, BB);
@@ -392,8 +494,11 @@ void VPlanPrinter::dump() {
OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
if (!Plan.getName().empty())
OS << "\\n" << DOT::EscapeString(Plan.getName());
- if (!Plan.Value2VPValue.empty()) {
+ if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) {
OS << ", where:";
+ if (Plan.BackedgeTakenCount)
+ OS << "\\n"
+ << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount";
for (auto Entry : Plan.Value2VPValue) {
OS << "\\n" << *Entry.second;
OS << DOT::EscapeString(" := ");
@@ -466,8 +571,10 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
if (const VPInstruction *CBI = dyn_cast<VPInstruction>(CBV)) {
CBI->printAsOperand(OS);
OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\"";
- } else
+ } else {
CBV->printAsOperand(OS);
+ OS << "\"";
+ }
}
bumpIndent(-2);
@@ -579,3 +686,55 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
}
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
+
+void VPValue::replaceAllUsesWith(VPValue *New) {
+ for (VPUser *User : users())
+ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
+ if (User->getOperand(I) == this)
+ User->setOperand(I, New);
+}
+
+void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
+ Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI) {
+ ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
+ for (VPBlockBase *Base : RPOT) {
+ visitBlock(Base, Old2New, IAI);
+ }
+}
+
+void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI) {
+ if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
+ for (VPRecipeBase &VPI : *VPBB) {
+ assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
+ auto *VPInst = cast<VPInstruction>(&VPI);
+ auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
+ auto *IG = IAI.getInterleaveGroup(Inst);
+ if (!IG)
+ continue;
+
+ auto NewIGIter = Old2New.find(IG);
+ if (NewIGIter == Old2New.end())
+ Old2New[IG] = new InterleaveGroup<VPInstruction>(
+ IG->getFactor(), IG->isReverse(), IG->getAlignment());
+
+ if (Inst == IG->getInsertPos())
+ Old2New[IG]->setInsertPos(VPInst);
+
+ InterleaveGroupMap[VPInst] = Old2New[IG];
+ InterleaveGroupMap[VPInst]->insertMember(
+ VPInst, IG->getIndex(Inst),
+ IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor());
+ }
+ } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
+ visitRegion(Region, Old2New, IAI);
+ else
+ llvm_unreachable("Unsupported kind of VPBlock.");
+}
+
+VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
+ InterleavedAccessInfo &IAI) {
+ Old2NewTy Old2New;
+ visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
+}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm/lib/Transforms/Vectorize/VPlan.h
index 883e6f52369a..5c1b4a83c30e 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -38,6 +38,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IRBuilder.h"
#include <algorithm>
#include <cassert>
@@ -52,12 +53,14 @@ class LoopVectorizationCostModel;
class BasicBlock;
class DominatorTree;
class InnerLoopVectorizer;
-class InterleaveGroup;
+template <class T> class InterleaveGroup;
+class LoopInfo;
class raw_ostream;
class Value;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
+class VPlanSlp;
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
@@ -293,6 +296,10 @@ struct VPTransformState {
/// of replication, maps the BasicBlock of the last replica created.
SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
+ /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
+ /// up at the end of vector code generation.
+ SmallVector<VPBasicBlock *, 8> VPBBsToFix;
+
CFGState() = default;
} CFG;
@@ -313,6 +320,9 @@ struct VPTransformState {
/// Values they correspond to.
VPValue2ValueTy VPValue2Value;
+ /// Hold the trip count of the scalar loop.
+ Value *TripCount = nullptr;
+
/// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
InnerLoopVectorizer *ILV;
@@ -600,10 +610,16 @@ public:
/// the VPInstruction is also a single def-use vertex.
class VPInstruction : public VPUser, public VPRecipeBase {
friend class VPlanHCFGTransforms;
+ friend class VPlanSlp;
public:
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
- enum { Not = Instruction::OtherOpsEnd + 1 };
+ enum {
+ Not = Instruction::OtherOpsEnd + 1,
+ ICmpULE,
+ SLPLoad,
+ SLPStore,
+ };
private:
typedef unsigned char OpcodeTy;
@@ -613,6 +629,13 @@ private:
/// modeled instruction.
void generateInstruction(VPTransformState &State, unsigned Part);
+protected:
+ Instruction *getUnderlyingInstr() {
+ return cast_or_null<Instruction>(getUnderlyingValue());
+ }
+
+ void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
+
public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
: VPUser(VPValue::VPInstructionSC, Operands),
@@ -626,6 +649,11 @@ public:
return V->getVPValueID() == VPValue::VPInstructionSC;
}
+ VPInstruction *clone() const {
+ SmallVector<VPValue *, 2> Operands(operands());
+ return new VPInstruction(Opcode, Operands);
+ }
+
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *R) {
return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
@@ -643,6 +671,14 @@ public:
/// Print the VPInstruction.
void print(raw_ostream &O) const;
+
+ /// Return true if this instruction may modify memory.
+ bool mayWriteToMemory() const {
+ // TODO: we can use attributes of the called function to rule out memory
+ // modifications.
+ return Opcode == Instruction::Store || Opcode == Instruction::Call ||
+ Opcode == Instruction::Invoke || Opcode == SLPStore;
+ }
};
/// VPWidenRecipe is a recipe for producing a copy of vector type for each
@@ -764,11 +800,15 @@ public:
/// or stores into one wide load/store and shuffles.
class VPInterleaveRecipe : public VPRecipeBase {
private:
- const InterleaveGroup *IG;
+ const InterleaveGroup<Instruction> *IG;
+ std::unique_ptr<VPUser> User;
public:
- VPInterleaveRecipe(const InterleaveGroup *IG)
- : VPRecipeBase(VPInterleaveSC), IG(IG) {}
+ VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask)
+ : VPRecipeBase(VPInterleaveSC), IG(IG) {
+ if (Mask) // Create a VPInstruction to register as a user of the mask.
+ User.reset(new VPUser({Mask}));
+ }
~VPInterleaveRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -782,7 +822,7 @@ public:
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent) const override;
- const InterleaveGroup *getInterleaveGroup() { return IG; }
+ const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
};
/// VPReplicateRecipe replicates a given instruction producing multiple scalar
@@ -1107,6 +1147,10 @@ private:
// (operators '==' and '<').
SmallPtrSet<VPValue *, 16> VPExternalDefs;
+ /// Represents the backedge taken count of the original loop, for folding
+ /// the tail.
+ VPValue *BackedgeTakenCount = nullptr;
+
/// Holds a mapping between Values and their corresponding VPValue inside
/// VPlan.
Value2VPValueTy Value2VPValue;
@@ -1114,6 +1158,9 @@ private:
/// Holds the VPLoopInfo analysis for this VPlan.
VPLoopInfo VPLInfo;
+ /// Holds the condition bit values built during VPInstruction to VPRecipe transformation.
+ SmallVector<VPValue *, 4> VPCBVs;
+
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
@@ -1121,9 +1168,14 @@ public:
if (Entry)
VPBlockBase::deleteCFG(Entry);
for (auto &MapEntry : Value2VPValue)
- delete MapEntry.second;
+ if (MapEntry.second != BackedgeTakenCount)
+ delete MapEntry.second;
+ if (BackedgeTakenCount)
+ delete BackedgeTakenCount; // Delete once, if in Value2VPValue or not.
for (VPValue *Def : VPExternalDefs)
delete Def;
+ for (VPValue *CBV : VPCBVs)
+ delete CBV;
}
/// Generate the IR code for this VPlan.
@@ -1134,6 +1186,13 @@ public:
VPBlockBase *setEntry(VPBlockBase *Block) { return Entry = Block; }
+ /// The backedge taken count of the original loop.
+ VPValue *getOrCreateBackedgeTakenCount() {
+ if (!BackedgeTakenCount)
+ BackedgeTakenCount = new VPValue();
+ return BackedgeTakenCount;
+ }
+
void addVF(unsigned VF) { VFs.insert(VF); }
bool hasVF(unsigned VF) { return VFs.count(VF); }
@@ -1148,6 +1207,11 @@ public:
VPExternalDefs.insert(VPVal);
}
+ /// Add \p CBV to the vector of condition bit values.
+ void addCBV(VPValue *CBV) {
+ VPCBVs.push_back(CBV);
+ }
+
void addVPValue(Value *V) {
assert(V && "Trying to add a null Value to VPlan");
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
@@ -1429,6 +1493,144 @@ public:
}
};
+class VPInterleavedAccessInfo {
+private:
+ DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
+ InterleaveGroupMap;
+
+ /// Type for mapping of instruction based interleave groups to VPInstruction
+ /// interleave groups
+ using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
+ InterleaveGroup<VPInstruction> *>;
+
+ /// Recursively \p Region and populate VPlan based interleave groups based on
+ /// \p IAI.
+ void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI);
+ /// Recursively traverse \p Block and populate VPlan based interleave groups
+ /// based on \p IAI.
+ void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI);
+
+public:
+ VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
+
+ ~VPInterleavedAccessInfo() {
+ SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
+ // Avoid releasing a pointer twice.
+ for (auto &I : InterleaveGroupMap)
+ DelSet.insert(I.second);
+ for (auto *Ptr : DelSet)
+ delete Ptr;
+ }
+
+ /// Get the interleave group that \p Instr belongs to.
+ ///
+ /// \returns nullptr if doesn't have such group.
+ InterleaveGroup<VPInstruction> *
+ getInterleaveGroup(VPInstruction *Instr) const {
+ if (InterleaveGroupMap.count(Instr))
+ return InterleaveGroupMap.find(Instr)->second;
+ return nullptr;
+ }
+};
+
+/// Class that maps (parts of) an existing VPlan to trees of combined
+/// VPInstructions.
+class VPlanSlp {
+private:
+ enum class OpMode { Failed, Load, Opcode };
+
+ /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
+ /// DenseMap keys.
+ struct BundleDenseMapInfo {
+ static SmallVector<VPValue *, 4> getEmptyKey() {
+ return {reinterpret_cast<VPValue *>(-1)};
+ }
+
+ static SmallVector<VPValue *, 4> getTombstoneKey() {
+ return {reinterpret_cast<VPValue *>(-2)};
+ }
+
+ static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
+ return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+ }
+
+ static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
+ const SmallVector<VPValue *, 4> &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ /// Mapping of values in the original VPlan to a combined VPInstruction.
+ DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
+ BundleToCombined;
+
+ VPInterleavedAccessInfo &IAI;
+
+ /// Basic block to operate on. For now, only instructions in a single BB are
+ /// considered.
+ const VPBasicBlock &BB;
+
+ /// Indicates whether we managed to combine all visited instructions or not.
+ bool CompletelySLP = true;
+
+ /// Width of the widest combined bundle in bits.
+ unsigned WidestBundleBits = 0;
+
+ using MultiNodeOpTy =
+ typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+
+ // Input operand bundles for the current multi node. Each multi node operand
+ // bundle contains values not matching the multi node's opcode. They will
+ // be reordered in reorderMultiNodeOps, once we completed building a
+ // multi node.
+ SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
+
+ /// Indicates whether we are building a multi node currently.
+ bool MultiNodeActive = false;
+
+ /// Check if we can vectorize Operands together.
+ bool areVectorizable(ArrayRef<VPValue *> Operands) const;
+
+ /// Add combined instruction \p New for the bundle \p Operands.
+ void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
+
+ /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
+ VPInstruction *markFailed();
+
+ /// Reorder operands in the multi node to maximize sequential memory access
+ /// and commutative operations.
+ SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
+
+ /// Choose the best candidate to use for the lane after \p Last. The set of
+ /// candidates to choose from are values with an opcode matching \p Last's
+ /// or loads consecutive to \p Last.
+ std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
+ SmallPtrSetImpl<VPValue *> &Candidates,
+ VPInterleavedAccessInfo &IAI);
+
+ /// Print bundle \p Values to dbgs().
+ void dumpBundle(ArrayRef<VPValue *> Values);
+
+public:
+ VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
+
+ ~VPlanSlp() {
+ for (auto &KV : BundleToCombined)
+ delete KV.second;
+ }
+
+ /// Tries to build an SLP tree rooted at \p Operands and returns a
+ /// VPInstruction combining \p Operands, if they can be combined.
+ VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
+
+ /// Return the width of the widest combined bundle in bits.
+ unsigned getWidestBundleBits() const { return WidestBundleBits; }
+
+ /// Return true if all visited instruction can be combined.
+ bool isCompletelySLP() const { return CompletelySLP; }
+};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index b6307acb9474..0f42694e193b 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -268,7 +268,7 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
// Set VPBB successors. We create empty VPBBs for successors if they don't
// exist already. Recipes will be created when the successor is visited
// during the RPO traversal.
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
assert(TI && "Terminator expected.");
unsigned NumSuccs = TI->getNumSuccessors();
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
index e3cbab077e61..3ad7fc7e7b96 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
@@ -24,6 +24,18 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
VPRegionBlock *TopRegion = dyn_cast<VPRegionBlock>(Plan->getEntry());
ReversePostOrderTraversal<VPBlockBase *> RPOT(TopRegion->getEntry());
+
+ // Condition bit VPValues get deleted during transformation to VPRecipes.
+ // Create new VPValues and save away as condition bits. These will be deleted
+ // after finalizing the vector IR basic blocks.
+ for (VPBlockBase *Base : RPOT) {
+ VPBasicBlock *VPBB = Base->getEntryBasicBlock();
+ if (auto *CondBit = VPBB->getCondBit()) {
+ auto *NCondBit = new VPValue(CondBit->getUnderlyingValue());
+ VPBB->setCondBit(NCondBit);
+ Plan->addCBV(NCondBit);
+ }
+ }
for (VPBlockBase *Base : RPOT) {
// Do not widen instructions in pre-header and exit blocks.
if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0)
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
new file mode 100644
index 000000000000..ad3a85a6f760
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -0,0 +1,468 @@
+//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// This file implements SLP analysis based on VPlan. The analysis is based on
+/// the ideas described in
+///
+/// Look-ahead SLP: auto-vectorization in the presence of commutative
+/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
+/// Luís F. W. Góes
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPlan.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vplan-slp"
+
+// Number of levels to look ahead when re-ordering multi node operands.
+static unsigned LookaheadMaxDepth = 5;
+
+VPInstruction *VPlanSlp::markFailed() {
+ // FIXME: Currently this is used to signal we hit instructions we cannot
+ // trivially SLP'ize.
+ CompletelySLP = false;
+ return nullptr;
+}
+
+void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
+ if (all_of(Operands, [](VPValue *V) {
+ return cast<VPInstruction>(V)->getUnderlyingInstr();
+ })) {
+ unsigned BundleSize = 0;
+ for (VPValue *V : Operands) {
+ Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
+ assert(!T->isVectorTy() && "Only scalar types supported for now");
+ BundleSize += T->getScalarSizeInBits();
+ }
+ WidestBundleBits = std::max(WidestBundleBits, BundleSize);
+ }
+
+ auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
+ assert(Res.second &&
+ "Already created a combined instruction for the operand bundle");
+ (void)Res;
+}
+
+bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
+ // Currently we only support VPInstructions.
+ if (!all_of(Operands, [](VPValue *Op) {
+ return Op && isa<VPInstruction>(Op) &&
+ cast<VPInstruction>(Op)->getUnderlyingInstr();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
+ return false;
+ }
+
+ // Check if opcodes and type width agree for all instructions in the bundle.
+ // FIXME: Differing widths/opcodes can be handled by inserting additional
+ // instructions.
+ // FIXME: Deal with non-primitive types.
+ const Instruction *OriginalInstr =
+ cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
+ unsigned Opcode = OriginalInstr->getOpcode();
+ unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
+ if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
+ const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
+ return I->getOpcode() == Opcode &&
+ I->getType()->getPrimitiveSizeInBits() == Width;
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
+ return false;
+ }
+
+ // For now, all operands must be defined in the same BB.
+ if (any_of(Operands, [this](VPValue *Op) {
+ return cast<VPInstruction>(Op)->getParent() != &this->BB;
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
+ return false;
+ }
+
+ if (any_of(Operands,
+ [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
+ return false;
+ }
+
+ // For loads, check that there are no instructions writing to memory in
+ // between them.
+ // TODO: we only have to forbid instructions writing to memory that could
+ // interfere with any of the loads in the bundle
+ if (Opcode == Instruction::Load) {
+ unsigned LoadsSeen = 0;
+ VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
+ for (auto &I : *Parent) {
+ auto *VPI = cast<VPInstruction>(&I);
+ if (VPI->getOpcode() == Instruction::Load &&
+ std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())
+ LoadsSeen++;
+
+ if (LoadsSeen == Operands.size())
+ break;
+ if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
+ LLVM_DEBUG(
+ dbgs() << "VPSLP: instruction modifying memory between loads\n");
+ return false;
+ }
+ }
+
+ if (!all_of(Operands, [](VPValue *Op) {
+ return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
+ ->isSimple();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
+ return false;
+ }
+ }
+
+ if (Opcode == Instruction::Store)
+ if (!all_of(Operands, [](VPValue *Op) {
+ return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
+ ->isSimple();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
+ unsigned OperandIndex) {
+ SmallVector<VPValue *, 4> Operands;
+ for (VPValue *V : Values) {
+ auto *U = cast<VPUser>(V);
+ Operands.push_back(U->getOperand(OperandIndex));
+ }
+ return Operands;
+}
+
+static bool areCommutative(ArrayRef<VPValue *> Values) {
+ return Instruction::isCommutative(
+ cast<VPInstruction>(Values[0])->getOpcode());
+}
+
+static SmallVector<SmallVector<VPValue *, 4>, 4>
+getOperands(ArrayRef<VPValue *> Values) {
+ SmallVector<SmallVector<VPValue *, 4>, 4> Result;
+ auto *VPI = cast<VPInstruction>(Values[0]);
+
+ switch (VPI->getOpcode()) {
+ case Instruction::Load:
+ llvm_unreachable("Loads terminate a tree, no need to get operands");
+ case Instruction::Store:
+ Result.push_back(getOperands(Values, 0));
+ break;
+ default:
+ for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
+ Result.push_back(getOperands(Values, I));
+ break;
+ }
+
+ return Result;
+}
+
+/// Returns the opcode of Values or ~0 if they do not all agree.
+static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
+ unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
+ if (any_of(Values, [Opcode](VPValue *V) {
+ return cast<VPInstruction>(V)->getOpcode() != Opcode;
+ }))
+ return None;
+ return {Opcode};
+}
+
+/// Returns true if A and B access sequential memory if they are loads or
+/// stores or if they have identical opcodes otherwise.
+static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
+ VPInterleavedAccessInfo &IAI) {
+ if (A->getOpcode() != B->getOpcode())
+ return false;
+
+ if (A->getOpcode() != Instruction::Load &&
+ A->getOpcode() != Instruction::Store)
+ return true;
+ auto *GA = IAI.getInterleaveGroup(A);
+ auto *GB = IAI.getInterleaveGroup(B);
+
+ return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
+}
+
+/// Implements getLAScore from Listing 7 in the paper.
+/// Traverses and compares operands of V1 and V2 to MaxLevel.
+static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
+ VPInterleavedAccessInfo &IAI) {
+ if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
+ return 0;
+
+ if (MaxLevel == 0)
+ return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
+ cast<VPInstruction>(V2), IAI);
+
+ unsigned Score = 0;
+ for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)
+ for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
+ Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
+ cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
+ return Score;
+}
+
+std::pair<VPlanSlp::OpMode, VPValue *>
+VPlanSlp::getBest(OpMode Mode, VPValue *Last,
+ SmallPtrSetImpl<VPValue *> &Candidates,
+ VPInterleavedAccessInfo &IAI) {
+ assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
+ "Currently we only handle load and commutative opcodes");
+ LLVM_DEBUG(dbgs() << " getBest\n");
+
+ SmallVector<VPValue *, 4> BestCandidates;
+ LLVM_DEBUG(dbgs() << " Candidates for "
+ << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
+ for (auto *Candidate : Candidates) {
+ auto *LastI = cast<VPInstruction>(Last);
+ auto *CandidateI = cast<VPInstruction>(Candidate);
+ if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
+ LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
+ << " ");
+ BestCandidates.push_back(Candidate);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ if (BestCandidates.empty())
+ return {OpMode::Failed, nullptr};
+
+ if (BestCandidates.size() == 1)
+ return {Mode, BestCandidates[0]};
+
+ VPValue *Best = nullptr;
+ unsigned BestScore = 0;
+ for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
+ unsigned PrevScore = ~0u;
+ bool AllSame = true;
+
+ // FIXME: Avoid visiting the same operands multiple times.
+ for (auto *Candidate : BestCandidates) {
+ unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
+ if (PrevScore == ~0u)
+ PrevScore = Score;
+ if (PrevScore != Score)
+ AllSame = false;
+ PrevScore = Score;
+
+ if (Score > BestScore) {
+ BestScore = Score;
+ Best = Candidate;
+ }
+ }
+ if (!AllSame)
+ break;
+ }
+ LLVM_DEBUG(dbgs() << "Found best "
+ << *cast<VPInstruction>(Best)->getUnderlyingInstr()
+ << "\n");
+ Candidates.erase(Best);
+
+ return {Mode, Best};
+}
+
+SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
+ SmallVector<MultiNodeOpTy, 4> FinalOrder;
+ SmallVector<OpMode, 4> Mode;
+ FinalOrder.reserve(MultiNodeOps.size());
+ Mode.reserve(MultiNodeOps.size());
+
+ LLVM_DEBUG(dbgs() << "Reordering multinode\n");
+
+ for (auto &Operands : MultiNodeOps) {
+ FinalOrder.push_back({Operands.first, {Operands.second[0]}});
+ if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
+ Instruction::Load)
+ Mode.push_back(OpMode::Load);
+ else
+ Mode.push_back(OpMode::Opcode);
+ }
+
+ for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
+ LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
+ SmallPtrSet<VPValue *, 4> Candidates;
+ LLVM_DEBUG(dbgs() << " Candidates ");
+ for (auto Ops : MultiNodeOps) {
+ LLVM_DEBUG(
+ dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
+ << " ");
+ Candidates.insert(Ops.second[Lane]);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
+ LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
+ if (Mode[Op] == OpMode::Failed)
+ continue;
+
+ VPValue *Last = FinalOrder[Op].second[Lane - 1];
+ std::pair<OpMode, VPValue *> Res =
+ getBest(Mode[Op], Last, Candidates, IAI);
+ if (Res.second)
+ FinalOrder[Op].second.push_back(Res.second);
+ else
+ // TODO: handle this case
+ FinalOrder[Op].second.push_back(markFailed());
+ }
+ }
+
+ return FinalOrder;
+}
+
+void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
+ dbgs() << " Ops: ";
+ for (auto Op : Values)
+ if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())
+ dbgs() << *Instr << " | ";
+ else
+ dbgs() << " nullptr | ";
+ dbgs() << "\n";
+}
+
+VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
+ assert(!Values.empty() && "Need some operands!");
+
+ // If we already visited this instruction bundle, re-use the existing node
+ auto I = BundleToCombined.find(to_vector<4>(Values));
+ if (I != BundleToCombined.end()) {
+#ifndef NDEBUG
+ // Check that the resulting graph is a tree. If we re-use a node, this means
+ // its values have multiple users. We only allow this, if all users of each
+ // value are the same instruction.
+ for (auto *V : Values) {
+ auto UI = V->user_begin();
+ auto *FirstUser = *UI++;
+ while (UI != V->user_end()) {
+ assert(*UI == FirstUser && "Currently we only support SLP trees.");
+ UI++;
+ }
+ }
+#endif
+ return I->second;
+ }
+
+ // Dump inputs
+ LLVM_DEBUG({
+ dbgs() << "buildGraph: ";
+ dumpBundle(Values);
+ });
+
+ if (!areVectorizable(Values))
+ return markFailed();
+
+ assert(getOpcode(Values) && "Opcodes for all values must match");
+ unsigned ValuesOpcode = getOpcode(Values).getValue();
+
+ SmallVector<VPValue *, 4> CombinedOperands;
+ if (areCommutative(Values)) {
+ bool MultiNodeRoot = !MultiNodeActive;
+ MultiNodeActive = true;
+ for (auto &Operands : getOperands(Values)) {
+ LLVM_DEBUG({
+ dbgs() << " Visiting Commutative";
+ dumpBundle(Operands);
+ });
+
+ auto OperandsOpcode = getOpcode(Operands);
+ if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
+ LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
+ CombinedOperands.push_back(buildGraph(Operands));
+ } else {
+ LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
+ // Create dummy VPInstruction, which will we replace later by the
+ // re-ordered operand.
+ VPInstruction *Op = new VPInstruction(0, {});
+ CombinedOperands.push_back(Op);
+ MultiNodeOps.emplace_back(Op, Operands);
+ }
+ }
+
+ if (MultiNodeRoot) {
+ LLVM_DEBUG(dbgs() << "Reorder \n");
+ MultiNodeActive = false;
+
+ auto FinalOrder = reorderMultiNodeOps();
+
+ MultiNodeOps.clear();
+ for (auto &Ops : FinalOrder) {
+ VPInstruction *NewOp = buildGraph(Ops.second);
+ Ops.first->replaceAllUsesWith(NewOp);
+ for (unsigned i = 0; i < CombinedOperands.size(); i++)
+ if (CombinedOperands[i] == Ops.first)
+ CombinedOperands[i] = NewOp;
+ delete Ops.first;
+ Ops.first = NewOp;
+ }
+ LLVM_DEBUG(dbgs() << "Found final order\n");
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " NonCommuntative\n");
+ if (ValuesOpcode == Instruction::Load)
+ for (VPValue *V : Values)
+ CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
+ else
+ for (auto &Operands : getOperands(Values))
+ CombinedOperands.push_back(buildGraph(Operands));
+ }
+
+ unsigned Opcode;
+ switch (ValuesOpcode) {
+ case Instruction::Load:
+ Opcode = VPInstruction::SLPLoad;
+ break;
+ case Instruction::Store:
+ Opcode = VPInstruction::SLPStore;
+ break;
+ default:
+ Opcode = ValuesOpcode;
+ break;
+ }
+
+ if (!CompletelySLP)
+ return markFailed();
+
+ assert(CombinedOperands.size() > 0 && "Need more some operands");
+ auto *VPI = new VPInstruction(Opcode, CombinedOperands);
+ VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
+
+ LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
+ cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");
+ addCombined(Values, VPI);
+ return VPI;
+}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 08f142915b49..b473579b699f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -38,6 +38,10 @@ class VPUser;
// and live-outs which the VPlan will need to fix accordingly.
class VPValue {
friend class VPBuilder;
+ friend class VPlanHCFGTransforms;
+ friend class VPBasicBlock;
+ friend class VPInterleavedAccessInfo;
+
private:
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -102,6 +106,20 @@ public:
const_user_range users() const {
return const_user_range(user_begin(), user_end());
}
+
+ /// Returns true if the value has more than one unique user.
+ bool hasMoreThanOneUniqueUser() {
+ if (getNumUsers() == 0)
+ return false;
+
+ // Check if all users match the first user.
+ auto Current = std::next(user_begin());
+ while (Current != user_end() && *user_begin() == *Current)
+ Current++;
+ return Current != user_end();
+ }
+
+ void replaceAllUsesWith(VPValue *New);
};
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
@@ -147,6 +165,8 @@ public:
return Operands[N];
}
+ void setOperand(unsigned I, VPValue *New) { Operands[I] = New; }
+
typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;
typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator;
typedef iterator_range<operand_iterator> operand_range;
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index f62a88558328..559ab1968844 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeLoopVectorizePass(Registry);
initializeSLPVectorizerPass(Registry);
- initializeLoadStoreVectorizerPass(Registry);
+ initializeLoadStoreVectorizerLegacyPassPass(Registry);
}
void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/XRay/BlockIndexer.cpp b/contrib/llvm/lib/XRay/BlockIndexer.cpp
new file mode 100644
index 000000000000..4dbe2d2717ad
--- /dev/null
+++ b/contrib/llvm/lib/XRay/BlockIndexer.cpp
@@ -0,0 +1,98 @@
+//===- BlockIndexer.cpp - FDR Block Indexing VIsitor ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which generates a mapping between a
+// thread and a range of records representing a block.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/BlockIndexer.h"
+
+namespace llvm {
+namespace xray {
+
+Error BlockIndexer::visit(BufferExtents &) { return Error::success(); }
+
+Error BlockIndexer::visit(WallclockRecord &R) {
+ CurrentBlock.Records.push_back(&R);
+ CurrentBlock.WallclockTime = &R;
+ return Error::success();
+}
+
+Error BlockIndexer::visit(NewCPUIDRecord &R) {
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(TSCWrapRecord &R) {
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(CustomEventRecord &R) {
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(CustomEventRecordV5 &R) {
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(TypedEventRecord &R) {
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(CallArgRecord &R) {
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(PIDRecord &R) {
+ CurrentBlock.ProcessID = R.pid();
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(NewBufferRecord &R) {
+ if (!CurrentBlock.Records.empty())
+ if (auto E = flush())
+ return E;
+
+ CurrentBlock.ThreadID = R.tid();
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(EndBufferRecord &R) {
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::visit(FunctionRecord &R) {
+ CurrentBlock.Records.push_back(&R);
+ return Error::success();
+}
+
+Error BlockIndexer::flush() {
+ Index::iterator It;
+ std::tie(It, std::ignore) =
+ Indices.insert({{CurrentBlock.ProcessID, CurrentBlock.ThreadID}, {}});
+ It->second.push_back({CurrentBlock.ProcessID, CurrentBlock.ThreadID,
+ CurrentBlock.WallclockTime,
+ std::move(CurrentBlock.Records)});
+ CurrentBlock.ProcessID = 0;
+ CurrentBlock.ThreadID = 0;
+ CurrentBlock.Records = {};
+ CurrentBlock.WallclockTime = nullptr;
+ return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/BlockPrinter.cpp b/contrib/llvm/lib/XRay/BlockPrinter.cpp
new file mode 100644
index 000000000000..0acebee0cbdd
--- /dev/null
+++ b/contrib/llvm/lib/XRay/BlockPrinter.cpp
@@ -0,0 +1,114 @@
+//===- BlockPrinter.cpp - FDR Block Pretty Printer Implementation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/BlockPrinter.h"
+
+namespace llvm {
+namespace xray {
+
+Error BlockPrinter::visit(BufferExtents &R) {
+ OS << "\n[New Block]\n";
+ CurrentState = State::Preamble;
+ return RP.visit(R);
+}
+
+// Preamble printing.
+Error BlockPrinter::visit(NewBufferRecord &R) {
+ if (CurrentState == State::Start)
+ OS << "\n[New Block]\n";
+
+ OS << "Preamble: \n";
+ CurrentState = State::Preamble;
+ return RP.visit(R);
+}
+
+Error BlockPrinter::visit(WallclockRecord &R) {
+ CurrentState = State::Preamble;
+ return RP.visit(R);
+}
+
+Error BlockPrinter::visit(PIDRecord &R) {
+ CurrentState = State::Preamble;
+ return RP.visit(R);
+}
+
+// Metadata printing.
+Error BlockPrinter::visit(NewCPUIDRecord &R) {
+ if (CurrentState == State::Preamble)
+ OS << "\nBody:\n";
+ if (CurrentState == State::Function)
+ OS << "\nMetadata: ";
+ CurrentState = State::Metadata;
+ OS << " ";
+ auto E = RP.visit(R);
+ return E;
+}
+
+Error BlockPrinter::visit(TSCWrapRecord &R) {
+ if (CurrentState == State::Function)
+ OS << "\nMetadata:";
+ CurrentState = State::Metadata;
+ OS << " ";
+ auto E = RP.visit(R);
+ return E;
+}
+
+// Custom events will be rendered like "function" events.
+Error BlockPrinter::visit(CustomEventRecord &R) {
+ if (CurrentState == State::Metadata)
+ OS << "\n";
+ CurrentState = State::CustomEvent;
+ OS << "* ";
+ auto E = RP.visit(R);
+ return E;
+}
+
+Error BlockPrinter::visit(CustomEventRecordV5 &R) {
+ if (CurrentState == State::Metadata)
+ OS << "\n";
+ CurrentState = State::CustomEvent;
+ OS << "* ";
+ auto E = RP.visit(R);
+ return E;
+}
+
+Error BlockPrinter::visit(TypedEventRecord &R) {
+ if (CurrentState == State::Metadata)
+ OS << "\n";
+ CurrentState = State::CustomEvent;
+ OS << "* ";
+ auto E = RP.visit(R);
+ return E;
+}
+
+// Function call printing.
+Error BlockPrinter::visit(FunctionRecord &R) {
+ if (CurrentState == State::Metadata)
+ OS << "\n";
+ CurrentState = State::Function;
+ OS << "- ";
+ auto E = RP.visit(R);
+ return E;
+}
+
+Error BlockPrinter::visit(CallArgRecord &R) {
+ CurrentState = State::Arg;
+ OS << " : ";
+ auto E = RP.visit(R);
+ return E;
+}
+
+Error BlockPrinter::visit(EndBufferRecord &R) {
+ CurrentState = State::End;
+ OS << " *** ";
+ auto E = RP.visit(R);
+ return E;
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/BlockVerifier.cpp b/contrib/llvm/lib/XRay/BlockVerifier.cpp
new file mode 100644
index 000000000000..5e949ec4e46a
--- /dev/null
+++ b/contrib/llvm/lib/XRay/BlockVerifier.cpp
@@ -0,0 +1,205 @@
+//===- BlockVerifier.cpp - FDR Block Verifier -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/BlockVerifier.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace xray {
+namespace {
+
+constexpr unsigned long long mask(BlockVerifier::State S) {
+ return 1uLL << static_cast<std::size_t>(S);
+}
+
+constexpr std::size_t number(BlockVerifier::State S) {
+ return static_cast<std::size_t>(S);
+}
+
+StringRef recordToString(BlockVerifier::State R) {
+ switch (R) {
+ case BlockVerifier::State::BufferExtents:
+ return "BufferExtents";
+ case BlockVerifier::State::NewBuffer:
+ return "NewBuffer";
+ case BlockVerifier::State::WallClockTime:
+ return "WallClockTime";
+ case BlockVerifier::State::PIDEntry:
+ return "PIDEntry";
+ case BlockVerifier::State::NewCPUId:
+ return "NewCPUId";
+ case BlockVerifier::State::TSCWrap:
+ return "TSCWrap";
+ case BlockVerifier::State::CustomEvent:
+ return "CustomEvent";
+ case BlockVerifier::State::Function:
+ return "Function";
+ case BlockVerifier::State::CallArg:
+ return "CallArg";
+ case BlockVerifier::State::EndOfBuffer:
+ return "EndOfBuffer";
+ case BlockVerifier::State::TypedEvent:
+ return "TypedEvent";
+ case BlockVerifier::State::StateMax:
+ case BlockVerifier::State::Unknown:
+ return "Unknown";
+ }
+ llvm_unreachable("Unkown state!");
+}
+
+struct Transition {
+ BlockVerifier::State From;
+ std::bitset<number(BlockVerifier::State::StateMax)> ToStates;
+};
+
+} // namespace
+
+Error BlockVerifier::transition(State To) {
+ using ToSet = std::bitset<number(State::StateMax)>;
+ static constexpr std::array<const Transition, number(State::StateMax)>
+ TransitionTable{{{State::Unknown,
+ {mask(State::BufferExtents) | mask(State::NewBuffer)}},
+
+ {State::BufferExtents, {mask(State::NewBuffer)}},
+
+ {State::NewBuffer, {mask(State::WallClockTime)}},
+
+ {State::WallClockTime,
+ {mask(State::PIDEntry) | mask(State::NewCPUId)}},
+
+ {State::PIDEntry, {mask(State::NewCPUId)}},
+
+ {State::NewCPUId,
+ {mask(State::NewCPUId) | mask(State::TSCWrap) |
+ mask(State::CustomEvent) | mask(State::Function) |
+ mask(State::EndOfBuffer) | mask(State::TypedEvent)}},
+
+ {State::TSCWrap,
+ {mask(State::TSCWrap) | mask(State::NewCPUId) |
+ mask(State::CustomEvent) | mask(State::Function) |
+ mask(State::EndOfBuffer) | mask(State::TypedEvent)}},
+
+ {State::CustomEvent,
+ {mask(State::CustomEvent) | mask(State::TSCWrap) |
+ mask(State::NewCPUId) | mask(State::Function) |
+ mask(State::EndOfBuffer) | mask(State::TypedEvent)}},
+
+ {State::TypedEvent,
+ {mask(State::TypedEvent) | mask(State::TSCWrap) |
+ mask(State::NewCPUId) | mask(State::Function) |
+ mask(State::EndOfBuffer) | mask(State::CustomEvent)}},
+
+ {State::Function,
+ {mask(State::Function) | mask(State::TSCWrap) |
+ mask(State::NewCPUId) | mask(State::CustomEvent) |
+ mask(State::CallArg) | mask(State::EndOfBuffer) |
+ mask(State::TypedEvent)}},
+
+ {State::CallArg,
+ {mask(State::CallArg) | mask(State::Function) |
+ mask(State::TSCWrap) | mask(State::NewCPUId) |
+ mask(State::CustomEvent) | mask(State::EndOfBuffer) |
+ mask(State::TypedEvent)}},
+
+ {State::EndOfBuffer, {}}}};
+
+ if (CurrentRecord >= State::StateMax)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "BUG (BlockVerifier): Cannot find transition table entry for %s, "
+ "transitioning to %s.",
+ recordToString(CurrentRecord).data(), recordToString(To).data());
+
+ // If we're at an EndOfBuffer record, we ignore anything that follows that
+ // isn't a NewBuffer record.
+ if (CurrentRecord == State::EndOfBuffer && To != State::NewBuffer)
+ return Error::success();
+
+ auto &Mapping = TransitionTable[number(CurrentRecord)];
+ auto &Destinations = Mapping.ToStates;
+ assert(Mapping.From == CurrentRecord &&
+ "BUG: Wrong index for record mapping.");
+ if ((Destinations & ToSet(mask(To))) == 0)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "BlockVerifier: Invalid transition from %s to %s.",
+ recordToString(CurrentRecord).data(), recordToString(To).data());
+
+ CurrentRecord = To;
+ return Error::success();
+} // namespace xray
+
+Error BlockVerifier::visit(BufferExtents &) {
+ return transition(State::BufferExtents);
+}
+
+Error BlockVerifier::visit(WallclockRecord &) {
+ return transition(State::WallClockTime);
+}
+
+Error BlockVerifier::visit(NewCPUIDRecord &) {
+ return transition(State::NewCPUId);
+}
+
+Error BlockVerifier::visit(TSCWrapRecord &) {
+ return transition(State::TSCWrap);
+}
+
+Error BlockVerifier::visit(CustomEventRecord &) {
+ return transition(State::CustomEvent);
+}
+
+Error BlockVerifier::visit(CustomEventRecordV5 &) {
+ return transition(State::CustomEvent);
+}
+
+Error BlockVerifier::visit(TypedEventRecord &) {
+ return transition(State::TypedEvent);
+}
+
+Error BlockVerifier::visit(CallArgRecord &) {
+ return transition(State::CallArg);
+}
+
+Error BlockVerifier::visit(PIDRecord &) { return transition(State::PIDEntry); }
+
+Error BlockVerifier::visit(NewBufferRecord &) {
+ return transition(State::NewBuffer);
+}
+
+Error BlockVerifier::visit(EndBufferRecord &) {
+ return transition(State::EndOfBuffer);
+}
+
+Error BlockVerifier::visit(FunctionRecord &) {
+ return transition(State::Function);
+}
+
+Error BlockVerifier::verify() {
+ // The known terminal conditions are the following:
+ switch (CurrentRecord) {
+ case State::EndOfBuffer:
+ case State::NewCPUId:
+ case State::CustomEvent:
+ case State::TypedEvent:
+ case State::Function:
+ case State::CallArg:
+ case State::TSCWrap:
+ return Error::success();
+ default:
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "BlockVerifier: Invalid terminal condition %s, malformed block.",
+ recordToString(CurrentRecord).data());
+ }
+}
+
+void BlockVerifier::reset() { CurrentRecord = State::Unknown; }
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FDRRecordProducer.cpp b/contrib/llvm/lib/XRay/FDRRecordProducer.cpp
new file mode 100644
index 000000000000..25b3ee8af219
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FDRRecordProducer.cpp
@@ -0,0 +1,198 @@
+//===- FDRRecordProducer.cpp - XRay FDR Mode Record Producer --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRRecordProducer.h"
+#include "llvm/Support/DataExtractor.h"
+
+#include <cstdint>
+
+namespace llvm {
+namespace xray {
+
+namespace {
+
+// Keep this in sync with the values written in the XRay FDR mode runtime in
+// compiler-rt.
+enum MetadataRecordKinds : uint8_t {
+ NewBufferKind,
+ EndOfBufferKind,
+ NewCPUIdKind,
+ TSCWrapKind,
+ WalltimeMarkerKind,
+ CustomEventMarkerKind,
+ CallArgumentKind,
+ BufferExtentsKind,
+ TypedEventMarkerKind,
+ PidKind,
+ // This is an end marker, used to identify the upper bound for this enum.
+ EnumEndMarker,
+};
+
+Expected<std::unique_ptr<Record>>
+metadataRecordType(const XRayFileHeader &Header, uint8_t T) {
+
+ if (T >= static_cast<uint8_t>(MetadataRecordKinds::EnumEndMarker))
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Invalid metadata record type: %d", T);
+ switch (T) {
+ case MetadataRecordKinds::NewBufferKind:
+ return make_unique<NewBufferRecord>();
+ case MetadataRecordKinds::EndOfBufferKind:
+ if (Header.Version >= 2)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "End of buffer records are no longer supported starting version "
+ "2 of the log.");
+ return make_unique<EndBufferRecord>();
+ case MetadataRecordKinds::NewCPUIdKind:
+ return make_unique<NewCPUIDRecord>();
+ case MetadataRecordKinds::TSCWrapKind:
+ return make_unique<TSCWrapRecord>();
+ case MetadataRecordKinds::WalltimeMarkerKind:
+ return make_unique<WallclockRecord>();
+ case MetadataRecordKinds::CustomEventMarkerKind:
+ if (Header.Version >= 5)
+ return make_unique<CustomEventRecordV5>();
+ return make_unique<CustomEventRecord>();
+ case MetadataRecordKinds::CallArgumentKind:
+ return make_unique<CallArgRecord>();
+ case MetadataRecordKinds::BufferExtentsKind:
+ return make_unique<BufferExtents>();
+ case MetadataRecordKinds::TypedEventMarkerKind:
+ return make_unique<TypedEventRecord>();
+ case MetadataRecordKinds::PidKind:
+ return make_unique<PIDRecord>();
+ case MetadataRecordKinds::EnumEndMarker:
+ llvm_unreachable("Invalid MetadataRecordKind");
+ }
+ llvm_unreachable("Unhandled MetadataRecordKinds enum value");
+}
+
+constexpr bool isMetadataIntroducer(uint8_t FirstByte) {
+ return FirstByte & 0x01u;
+}
+
+} // namespace
+
+Expected<std::unique_ptr<Record>>
+FileBasedRecordProducer::findNextBufferExtent() {
+ // We seek one byte at a time until we find a suitable buffer extents metadata
+ // record introducer.
+ std::unique_ptr<Record> R;
+ while (!R) {
+ auto PreReadOffset = OffsetPtr;
+ uint8_t FirstByte = E.getU8(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading one byte from offset %d.", OffsetPtr);
+
+ if (isMetadataIntroducer(FirstByte)) {
+ auto LoadedType = FirstByte >> 1;
+ if (LoadedType == MetadataRecordKinds::BufferExtentsKind) {
+ auto MetadataRecordOrErr = metadataRecordType(Header, LoadedType);
+ if (!MetadataRecordOrErr)
+ return MetadataRecordOrErr.takeError();
+
+ R = std::move(MetadataRecordOrErr.get());
+ RecordInitializer RI(E, OffsetPtr);
+ if (auto Err = R->apply(RI))
+ return std::move(Err);
+ return std::move(R);
+ }
+ }
+ }
+ llvm_unreachable("Must always terminate with either an error or a record.");
+}
+
+Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() {
+ // First, we set up our result record.
+ std::unique_ptr<Record> R;
+
+ // Before we do any further reading, we should check whether we're at the end
+ // of the current buffer we're been consuming. In FDR logs version >= 3, we
+ // rely on the buffer extents record to determine how many bytes we should be
+ // considering as valid records.
+ if (Header.Version >= 3 && CurrentBufferBytes == 0) {
+ // Find the next buffer extents record.
+ auto BufferExtentsOrError = findNextBufferExtent();
+ if (!BufferExtentsOrError)
+ return joinErrors(
+ BufferExtentsOrError.takeError(),
+ createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed to find the next BufferExtents record."));
+
+ R = std::move(BufferExtentsOrError.get());
+ assert(R != nullptr);
+ assert(isa<BufferExtents>(R.get()));
+ auto BE = dyn_cast<BufferExtents>(R.get());
+ CurrentBufferBytes = BE->size();
+ return std::move(R);
+ }
+
+ //
+ // At the top level, we read one byte to determine the type of the record to
+ // create. This byte will comprise of the following bits:
+ //
+ // - offset 0: A '1' indicates a metadata record, a '0' indicates a function
+ // record.
+ // - offsets 1-7: For metadata records, this will indicate the kind of
+ // metadata record should be loaded.
+ //
+ // We read first byte, then create the appropriate type of record to consume
+ // the rest of the bytes.
+ auto PreReadOffset = OffsetPtr;
+ uint8_t FirstByte = E.getU8(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading one byte from offset %d.", OffsetPtr);
+
+ // For metadata records, handle especially here.
+ if (isMetadataIntroducer(FirstByte)) {
+ auto LoadedType = FirstByte >> 1;
+ auto MetadataRecordOrErr = metadataRecordType(Header, LoadedType);
+ if (!MetadataRecordOrErr)
+ return joinErrors(
+ MetadataRecordOrErr.takeError(),
+ createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Encountered an unsupported metadata record (%d) at offset %d.",
+ LoadedType, PreReadOffset));
+ R = std::move(MetadataRecordOrErr.get());
+ } else {
+ R = llvm::make_unique<FunctionRecord>();
+ }
+ RecordInitializer RI(E, OffsetPtr);
+
+ if (auto Err = R->apply(RI))
+ return std::move(Err);
+
+ // If we encountered a BufferExtents record, we should record the remaining
+ // bytes for the current buffer, to determine when we should start ignoring
+ // potentially malformed data and looking for buffer extents records.
+ if (auto BE = dyn_cast<BufferExtents>(R.get())) {
+ CurrentBufferBytes = BE->size();
+ } else if (Header.Version >= 3) {
+ if (OffsetPtr - PreReadOffset > CurrentBufferBytes)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Buffer over-read at offset %d (over-read by %d bytes); Record Type "
+ "= %s.",
+ OffsetPtr, (OffsetPtr - PreReadOffset) - CurrentBufferBytes,
+ Record::kindToString(R->getRecordType()).data());
+
+ CurrentBufferBytes -= OffsetPtr - PreReadOffset;
+ }
+ assert(R != nullptr);
+ return std::move(R);
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FDRRecords.cpp b/contrib/llvm/lib/XRay/FDRRecords.cpp
new file mode 100644
index 000000000000..2a40d5e06229
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FDRRecords.cpp
@@ -0,0 +1,67 @@
+//===- FDRRecords.cpp - XRay Flight Data Recorder Mode Records -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define types and operations on these types that represent the different kinds
+// of records we encounter in XRay flight data recorder mode traces.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRRecords.h"
+
+namespace llvm {
+namespace xray {
+
+Error BufferExtents::apply(RecordVisitor &V) { return V.visit(*this); }
+Error WallclockRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error NewCPUIDRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error TSCWrapRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error CustomEventRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error CallArgRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error PIDRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error NewBufferRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error EndBufferRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error FunctionRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error CustomEventRecordV5::apply(RecordVisitor &V) { return V.visit(*this); }
+Error TypedEventRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+
+StringRef Record::kindToString(RecordKind K) {
+ switch (K) {
+ case RecordKind::RK_Metadata:
+ return "Metadata";
+ case RecordKind::RK_Metadata_BufferExtents:
+ return "Metadata:BufferExtents";
+ case RecordKind::RK_Metadata_WallClockTime:
+ return "Metadata:WallClockTime";
+ case RecordKind::RK_Metadata_NewCPUId:
+ return "Metadata:NewCPUId";
+ case RecordKind::RK_Metadata_TSCWrap:
+ return "Metadata:TSCWrap";
+ case RecordKind::RK_Metadata_CustomEvent:
+ return "Metadata:CustomEvent";
+ case RecordKind::RK_Metadata_CustomEventV5:
+ return "Metadata:CustomEventV5";
+ case RecordKind::RK_Metadata_CallArg:
+ return "Metadata:CallArg";
+ case RecordKind::RK_Metadata_PIDEntry:
+ return "Metadata:PIDEntry";
+ case RecordKind::RK_Metadata_NewBuffer:
+ return "Metadata:NewBuffer";
+ case RecordKind::RK_Metadata_EndOfBuffer:
+ return "Metadata:EndOfBuffer";
+ case RecordKind::RK_Metadata_TypedEvent:
+ return "Metadata:TypedEvent";
+ case RecordKind::RK_Metadata_LastMetadata:
+ return "Metadata:LastMetadata";
+ case RecordKind::RK_Function:
+ return "Function";
+ }
+ return "Unknown";
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FDRTraceExpander.cpp b/contrib/llvm/lib/XRay/FDRTraceExpander.cpp
new file mode 100644
index 000000000000..a6e1521da87f
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FDRTraceExpander.cpp
@@ -0,0 +1,132 @@
+//===- FDRTraceExpander.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRTraceExpander.h"
+
+namespace llvm {
+namespace xray {
+
+void TraceExpander::resetCurrentRecord() {
+ if (BuildingRecord)
+ C(CurrentRecord);
+ BuildingRecord = false;
+ CurrentRecord.CallArgs.clear();
+ CurrentRecord.Data.clear();
+}
+
+Error TraceExpander::visit(BufferExtents &) {
+ resetCurrentRecord();
+ return Error::success();
+}
+
+Error TraceExpander::visit(WallclockRecord &) { return Error::success(); }
+
+Error TraceExpander::visit(NewCPUIDRecord &R) {
+ CPUId = R.cpuid();
+ BaseTSC = R.tsc();
+ return Error::success();
+}
+
+Error TraceExpander::visit(TSCWrapRecord &R) {
+ BaseTSC = R.tsc();
+ return Error::success();
+}
+
+Error TraceExpander::visit(CustomEventRecord &R) {
+ resetCurrentRecord();
+ if (!IgnoringRecords) {
+ CurrentRecord.TSC = R.tsc();
+ CurrentRecord.CPU = R.cpu();
+ CurrentRecord.PId = PID;
+ CurrentRecord.TId = TID;
+ CurrentRecord.Type = RecordTypes::CUSTOM_EVENT;
+ CurrentRecord.Data = R.data();
+ BuildingRecord = true;
+ }
+ return Error::success();
+}
+
+Error TraceExpander::visit(CustomEventRecordV5 &R) {
+ resetCurrentRecord();
+ if (!IgnoringRecords) {
+ BaseTSC += R.delta();
+ CurrentRecord.TSC = BaseTSC;
+ CurrentRecord.CPU = CPUId;
+ CurrentRecord.PId = PID;
+ CurrentRecord.TId = TID;
+ CurrentRecord.Type = RecordTypes::CUSTOM_EVENT;
+ CurrentRecord.Data = R.data();
+ BuildingRecord = true;
+ }
+ return Error::success();
+}
+
+Error TraceExpander::visit(TypedEventRecord &R) {
+ resetCurrentRecord();
+ if (!IgnoringRecords) {
+ BaseTSC += R.delta();
+ CurrentRecord.TSC = BaseTSC;
+ CurrentRecord.CPU = CPUId;
+ CurrentRecord.PId = PID;
+ CurrentRecord.TId = TID;
+ CurrentRecord.RecordType = R.eventType();
+ CurrentRecord.Type = RecordTypes::TYPED_EVENT;
+ CurrentRecord.Data = R.data();
+ BuildingRecord = true;
+ }
+ return Error::success();
+}
+
+Error TraceExpander::visit(CallArgRecord &R) {
+ CurrentRecord.CallArgs.push_back(R.arg());
+ CurrentRecord.Type = RecordTypes::ENTER_ARG;
+ return Error::success();
+}
+
+Error TraceExpander::visit(PIDRecord &R) {
+ PID = R.pid();
+ return Error::success();
+}
+
+Error TraceExpander::visit(NewBufferRecord &R) {
+ if (IgnoringRecords)
+ IgnoringRecords = false;
+ TID = R.tid();
+ if (LogVersion == 2)
+ PID = R.tid();
+ return Error::success();
+}
+
+Error TraceExpander::visit(EndBufferRecord &) {
+ IgnoringRecords = true;
+ resetCurrentRecord();
+ return Error::success();
+}
+
+Error TraceExpander::visit(FunctionRecord &R) {
+ resetCurrentRecord();
+ if (!IgnoringRecords) {
+ BaseTSC += R.delta();
+ CurrentRecord.Type = R.recordType();
+ CurrentRecord.FuncId = R.functionId();
+ CurrentRecord.TSC = BaseTSC;
+ CurrentRecord.PId = PID;
+ CurrentRecord.TId = TID;
+ CurrentRecord.CPU = CPUId;
+ BuildingRecord = true;
+ }
+ return Error::success();
+}
+
+Error TraceExpander::flush() {
+ resetCurrentRecord();
+ return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FDRTraceWriter.cpp b/contrib/llvm/lib/XRay/FDRTraceWriter.cpp
new file mode 100644
index 000000000000..c5224f4be094
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FDRTraceWriter.cpp
@@ -0,0 +1,154 @@
+//===- FDRTraceWriter.cpp - XRay FDR Trace Writer ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Test a utility that can write out XRay FDR Mode formatted trace files.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRTraceWriter.h"
+#include <tuple>
+
+namespace llvm {
+namespace xray {
+
+namespace {
+
+template <size_t Index> struct IndexedWriter {
+ template <
+ class Tuple,
+ typename std::enable_if<
+ (Index <
+ std::tuple_size<typename std::remove_reference<Tuple>::type>::value),
+ int>::type = 0>
+ static size_t write(support::endian::Writer &OS, Tuple &&T) {
+ OS.write(std::get<Index>(T));
+ return sizeof(std::get<Index>(T)) + IndexedWriter<Index + 1>::write(OS, T);
+ }
+
+ template <
+ class Tuple,
+ typename std::enable_if<
+ (Index >=
+ std::tuple_size<typename std::remove_reference<Tuple>::type>::value),
+ int>::type = 0>
+ static size_t write(support::endian::Writer &OS, Tuple &&) {
+ return 0;
+ }
+};
+
+template <uint8_t Kind, class... Values>
+Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) {
+ // The first bit in the first byte of metadata records is always set to 1, so
+ // we ensure this is the case when we write out the first byte of the record.
+ uint8_t FirstByte = (static_cast<uint8_t>(Kind) << 1) | uint8_t{0x01u};
+ auto T = std::make_tuple(std::forward<Values>(std::move(Ds))...);
+ // Write in field order.
+ OS.write(FirstByte);
+ auto Bytes = IndexedWriter<0>::write(OS, T);
+ assert(Bytes <= 15 && "Must only ever write at most 16 byte metadata!");
+ // Pad out with appropriate numbers of zero's.
+ for (; Bytes < 15; ++Bytes)
+ OS.write('\0');
+ return Error::success();
+}
+
+} // namespace
+
+FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H)
+ : OS(O, support::endianness::native) {
+ // We need to re-construct a header, by writing the fields we care about for
+ // traces, in the format that the runtime would have written.
+ uint32_t BitField =
+ (H.ConstantTSC ? 0x01 : 0x0) | (H.NonstopTSC ? 0x02 : 0x0);
+
+ // For endian-correctness, we need to write these fields in the order they
+ // appear and that we expect, instead of blasting bytes of the struct through.
+ OS.write(H.Version);
+ OS.write(H.Type);
+ OS.write(BitField);
+ OS.write(H.CycleFrequency);
+ ArrayRef<char> FreeFormBytes(H.FreeFormData,
+ sizeof(XRayFileHeader::FreeFormData));
+ OS.write(FreeFormBytes);
+}
+
+FDRTraceWriter::~FDRTraceWriter() {}
+
+Error FDRTraceWriter::visit(BufferExtents &R) {
+ return writeMetadata<7u>(OS, R.size());
+}
+
+Error FDRTraceWriter::visit(WallclockRecord &R) {
+ return writeMetadata<4u>(OS, R.seconds(), R.nanos());
+}
+
+Error FDRTraceWriter::visit(NewCPUIDRecord &R) {
+ return writeMetadata<2u>(OS, R.cpuid(), R.tsc());
+}
+
+Error FDRTraceWriter::visit(TSCWrapRecord &R) {
+ return writeMetadata<3u>(OS, R.tsc());
+}
+
+Error FDRTraceWriter::visit(CustomEventRecord &R) {
+ if (auto E = writeMetadata<5u>(OS, R.size(), R.tsc(), R.cpu()))
+ return E;
+ auto D = R.data();
+ ArrayRef<char> Bytes(D.data(), D.size());
+ OS.write(Bytes);
+ return Error::success();
+}
+
+Error FDRTraceWriter::visit(CustomEventRecordV5 &R) {
+ if (auto E = writeMetadata<5u>(OS, R.size(), R.delta()))
+ return E;
+ auto D = R.data();
+ ArrayRef<char> Bytes(D.data(), D.size());
+ OS.write(Bytes);
+ return Error::success();
+}
+
+Error FDRTraceWriter::visit(TypedEventRecord &R) {
+ if (auto E = writeMetadata<8u>(OS, R.size(), R.delta(), R.eventType()))
+ return E;
+ auto D = R.data();
+ ArrayRef<char> Bytes(D.data(), D.size());
+ OS.write(Bytes);
+ return Error::success();
+}
+
+Error FDRTraceWriter::visit(CallArgRecord &R) {
+ return writeMetadata<6u>(OS, R.arg());
+}
+
+Error FDRTraceWriter::visit(PIDRecord &R) {
+ return writeMetadata<9u>(OS, R.pid());
+}
+
+Error FDRTraceWriter::visit(NewBufferRecord &R) {
+ return writeMetadata<0u>(OS, R.tid());
+}
+
+Error FDRTraceWriter::visit(EndBufferRecord &R) {
+ return writeMetadata<1u>(OS, 0);
+}
+
+Error FDRTraceWriter::visit(FunctionRecord &R) {
+ // Write out the data in "field" order, to be endian-aware.
+ uint32_t TypeRecordFuncId = uint32_t{R.functionId() & ~uint32_t{0x0Fu << 28}};
+ TypeRecordFuncId <<= 3;
+ TypeRecordFuncId |= static_cast<uint32_t>(R.recordType());
+ TypeRecordFuncId <<= 1;
+ TypeRecordFuncId &= ~uint32_t{0x01};
+ OS.write(TypeRecordFuncId);
+ OS.write(R.delta());
+ return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FileHeaderReader.cpp b/contrib/llvm/lib/XRay/FileHeaderReader.cpp
new file mode 100644
index 000000000000..0b3fb8b6f692
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FileHeaderReader.cpp
@@ -0,0 +1,70 @@
+//===- FileHeaderReader.cpp - XRay File Header Reader --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FileHeaderReader.h"
+
+namespace llvm {
+namespace xray {
+
+// Populates the FileHeader reference by reading the first 32 bytes of the file.
+Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
+ uint32_t &OffsetPtr) {
+ // FIXME: Maybe deduce whether the data is little or big-endian using some
+ // magic bytes in the beginning of the file?
+
+ // First 32 bytes of the file will always be the header. We assume a certain
+ // format here:
+ //
+ // (2) uint16 : version
+ // (2) uint16 : type
+ // (4) uint32 : bitfield
+ // (8) uint64 : cycle frequency
+ // (16) - : padding
+ XRayFileHeader FileHeader;
+ auto PreReadOffset = OffsetPtr;
+ FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading version from file header at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading file type from file header at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading flag bits from file header at offset %d.", OffsetPtr);
+
+ FileHeader.ConstantTSC = Bitfield & 1uL;
+ FileHeader.NonstopTSC = Bitfield & 1uL << 1;
+ PreReadOffset = OffsetPtr;
+ FileHeader.CycleFrequency = HeaderExtractor.getU64(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading cycle frequency from file header at offset %d.",
+ OffsetPtr);
+
+ std::memcpy(&FileHeader.FreeFormData,
+ HeaderExtractor.getData().bytes_begin() + OffsetPtr, 16);
+
+ // Manually advance the offset pointer 16 bytes, after getting a raw memcpy
+ // from the underlying data.
+ OffsetPtr += 16;
+ return std::move(FileHeader);
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/InstrumentationMap.cpp b/contrib/llvm/lib/XRay/InstrumentationMap.cpp
index a7d6600b0d8a..9f2b179486f0 100644
--- a/contrib/llvm/lib/XRay/InstrumentationMap.cpp
+++ b/contrib/llvm/lib/XRay/InstrumentationMap.cpp
@@ -12,12 +12,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/XRay/InstrumentationMap.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
@@ -46,19 +48,21 @@ Optional<uint64_t> InstrumentationMap::getFunctionAddr(int32_t FuncId) const {
return None;
}
+using RelocMap = DenseMap<uint64_t, uint64_t>;
+
static Error
-loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
+loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
InstrumentationMap::SledContainer &Sleds,
InstrumentationMap::FunctionAddressMap &FunctionAddresses,
InstrumentationMap::FunctionAddressReverseMap &FunctionIds) {
InstrumentationMap Map;
// Find the section named "xray_instr_map".
- if (!ObjFile.getBinary()->isELF() ||
+ if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) ||
!(ObjFile.getBinary()->getArch() == Triple::x86_64 ||
ObjFile.getBinary()->getArch() == Triple::ppc64le))
return make_error<StringError>(
- "File format not supported (only does ELF little endian 64-bit).",
+ "File format not supported (only does ELF and Mach-O little endian 64-bit).",
std::make_error_code(std::errc::not_supported));
StringRef Contents = "";
@@ -79,6 +83,31 @@ loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
return errorCodeToError(
std::make_error_code(std::errc::executable_format_error));
+ RelocMap Relocs;
+ if (ObjFile.getBinary()->isELF()) {
+ uint32_t RelativeRelocation = [](object::ObjectFile *ObjFile) {
+ if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(ObjFile))
+ return ELFObj->getELFFile()->getRelativeRelocationType();
+ else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(ObjFile))
+ return ELFObj->getELFFile()->getRelativeRelocationType();
+ else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(ObjFile))
+ return ELFObj->getELFFile()->getRelativeRelocationType();
+ else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(ObjFile))
+ return ELFObj->getELFFile()->getRelativeRelocationType();
+ else
+ return static_cast<uint32_t>(0);
+ }(ObjFile.getBinary());
+
+ for (const object::SectionRef &Section : Sections) {
+ for (const object::RelocationRef &Reloc : Section.relocations()) {
+ if (Reloc.getType() != RelativeRelocation)
+ continue;
+ if (auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend())
+ Relocs.insert({Reloc.getOffset(), *AddendOrErr});
+ }
+ }
+ }
+
// Copy the instrumentation map data into the Sleds data structure.
auto C = Contents.bytes_begin();
static constexpr size_t ELF64SledEntrySize = 32;
@@ -89,6 +118,16 @@ loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
"an XRay sled entry in ELF64."),
std::make_error_code(std::errc::executable_format_error));
+ auto RelocateOrElse = [&](uint32_t Offset, uint64_t Address) {
+ if (!Address) {
+ uint64_t A = I->getAddress() + C - Contents.bytes_begin() + Offset;
+ RelocMap::const_iterator R = Relocs.find(A);
+ if (R != Relocs.end())
+ return R->second;
+ }
+ return Address;
+ };
+
int32_t FuncId = 1;
uint64_t CurFn = 0;
for (; C != Contents.bytes_end(); C += ELF64SledEntrySize) {
@@ -98,8 +137,10 @@ loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
Sleds.push_back({});
auto &Entry = Sleds.back();
uint32_t OffsetPtr = 0;
- Entry.Address = Extractor.getU64(&OffsetPtr);
- Entry.Function = Extractor.getU64(&OffsetPtr);
+ uint32_t AddrOff = OffsetPtr;
+ Entry.Address = RelocateOrElse(AddrOff, Extractor.getU64(&OffsetPtr));
+ uint32_t FuncOff = OffsetPtr;
+ Entry.Function = RelocateOrElse(FuncOff, Extractor.getU64(&OffsetPtr));
auto Kind = Extractor.getU8(&OffsetPtr);
static constexpr SledEntry::FunctionKinds Kinds[] = {
SledEntry::FunctionKinds::ENTRY, SledEntry::FunctionKinds::EXIT,
@@ -191,7 +232,7 @@ llvm::xray::loadInstrumentationMap(StringRef Filename) {
if (auto E = loadYAML(Fd, FileSize, Filename, Map.Sleds,
Map.FunctionAddresses, Map.FunctionIds))
return std::move(E);
- } else if (auto E = loadELF64(Filename, *ObjectFileOrError, Map.Sleds,
+ } else if (auto E = loadObj(Filename, *ObjectFileOrError, Map.Sleds,
Map.FunctionAddresses, Map.FunctionIds)) {
return std::move(E);
}
diff --git a/contrib/llvm/lib/XRay/LogBuilderConsumer.cpp b/contrib/llvm/lib/XRay/LogBuilderConsumer.cpp
new file mode 100644
index 000000000000..88b7d2d728b1
--- /dev/null
+++ b/contrib/llvm/lib/XRay/LogBuilderConsumer.cpp
@@ -0,0 +1,38 @@
+//===- FDRRecordConsumer.h - XRay Flight Data Recorder Mode Records -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRRecordConsumer.h"
+
+namespace llvm {
+namespace xray {
+
+Error LogBuilderConsumer::consume(std::unique_ptr<Record> R) {
+ if (!R)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Must not call RecordConsumer::consume() with a null pointer.");
+ Records.push_back(std::move(R));
+ return Error::success();
+}
+
+Error PipelineConsumer::consume(std::unique_ptr<Record> R) {
+ if (!R)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Must not call RecordConsumer::consume() with a null pointer.");
+
+ // We apply all of the visitors in order, and concatenate errors
+ // appropriately.
+ Error Result = Error::success();
+ for (auto *V : Visitors)
+ Result = joinErrors(std::move(Result), R->apply(*V));
+ return Result;
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/Profile.cpp b/contrib/llvm/lib/XRay/Profile.cpp
new file mode 100644
index 000000000000..e8a082884d69
--- /dev/null
+++ b/contrib/llvm/lib/XRay/Profile.cpp
@@ -0,0 +1,403 @@
+//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the XRay Profile class representing the latency profile generated by
+// XRay's profiling mode.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/Profile.h"
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/XRay/Trace.h"
+#include <deque>
+#include <memory>
+
+namespace llvm {
+namespace xray {
+
+Profile::Profile(const Profile &O) {
+ // We need to re-create all the tries from the original (O), into the current
+ // Profile being initialized, through the Block instances we see.
+ for (const auto &Block : O) {
+ Blocks.push_back({Block.Thread, {}});
+ auto &B = Blocks.back();
+ for (const auto &PathData : Block.PathData)
+ B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
+ PathData.second});
+ }
+}
+
+Profile &Profile::operator=(const Profile &O) {
+ Profile P = O;
+ *this = std::move(P);
+ return *this;
+}
+
+namespace {
+
+struct BlockHeader {
+ uint32_t Size;
+ uint32_t Number;
+ uint64_t Thread;
+};
+
+static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
+ uint32_t &Offset) {
+ BlockHeader H;
+ uint32_t CurrentOffset = Offset;
+ H.Size = Extractor.getU32(&Offset);
+ if (Offset == CurrentOffset)
+ return make_error<StringError>(
+ Twine("Error parsing block header size at offset '") +
+ Twine(CurrentOffset) + "'",
+ std::make_error_code(std::errc::invalid_argument));
+ CurrentOffset = Offset;
+ H.Number = Extractor.getU32(&Offset);
+ if (Offset == CurrentOffset)
+ return make_error<StringError>(
+ Twine("Error parsing block header number at offset '") +
+ Twine(CurrentOffset) + "'",
+ std::make_error_code(std::errc::invalid_argument));
+ CurrentOffset = Offset;
+ H.Thread = Extractor.getU64(&Offset);
+ if (Offset == CurrentOffset)
+ return make_error<StringError>(
+ Twine("Error parsing block header thread id at offset '") +
+ Twine(CurrentOffset) + "'",
+ std::make_error_code(std::errc::invalid_argument));
+ return H;
+}
+
+static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
+ uint32_t &Offset) {
+ // We're reading a sequence of int32_t's until we find a 0.
+ std::vector<Profile::FuncID> Path;
+ auto CurrentOffset = Offset;
+ int32_t FuncId;
+ do {
+ FuncId = Extractor.getSigned(&Offset, 4);
+ if (CurrentOffset == Offset)
+ return make_error<StringError>(
+ Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
+ std::make_error_code(std::errc::invalid_argument));
+ CurrentOffset = Offset;
+ Path.push_back(FuncId);
+ } while (FuncId != 0);
+ return std::move(Path);
+}
+
+static Expected<Profile::Data> readData(DataExtractor &Extractor,
+ uint32_t &Offset) {
+ // We expect a certain number of elements for Data:
+ // - A 64-bit CallCount
+ // - A 64-bit CumulativeLocalTime counter
+ Profile::Data D;
+ auto CurrentOffset = Offset;
+ D.CallCount = Extractor.getU64(&Offset);
+ if (CurrentOffset == Offset)
+ return make_error<StringError>(
+ Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
+ "'",
+ std::make_error_code(std::errc::invalid_argument));
+ CurrentOffset = Offset;
+ D.CumulativeLocalTime = Extractor.getU64(&Offset);
+ if (CurrentOffset == Offset)
+ return make_error<StringError>(
+ Twine("Error parsing cumulative local time at offset '") +
+ Twine(CurrentOffset) + "'",
+ std::make_error_code(std::errc::invalid_argument));
+ return D;
+}
+
+} // namespace
+
+Error Profile::addBlock(Block &&B) {
+ if (B.PathData.empty())
+ return make_error<StringError>(
+ "Block may not have empty path data.",
+ std::make_error_code(std::errc::invalid_argument));
+
+ Blocks.emplace_back(std::move(B));
+ return Error::success();
+}
+
+Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
+ auto It = PathIDMap.find(P);
+ if (It == PathIDMap.end())
+ return make_error<StringError>(
+ Twine("PathID not found: ") + Twine(P),
+ std::make_error_code(std::errc::invalid_argument));
+ std::vector<Profile::FuncID> Path;
+ for (auto Node = It->second; Node; Node = Node->Caller)
+ Path.push_back(Node->Func);
+ return std::move(Path);
+}
+
+Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
+ if (P.empty())
+ return 0;
+
+ auto RootToLeafPath = reverse(P);
+
+ // Find the root.
+ auto It = RootToLeafPath.begin();
+ auto PathRoot = *It++;
+ auto RootIt =
+ find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
+
+ // If we've not seen this root before, remember it.
+ TrieNode *Node = nullptr;
+ if (RootIt == Roots.end()) {
+ NodeStorage.emplace_back();
+ Node = &NodeStorage.back();
+ Node->Func = PathRoot;
+ Roots.push_back(Node);
+ } else {
+ Node = *RootIt;
+ }
+
+ // Now traverse the path, re-creating if necessary.
+ while (It != RootToLeafPath.end()) {
+ auto NodeFuncID = *It++;
+ auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
+ return N->Func == NodeFuncID;
+ });
+ if (CalleeIt == Node->Callees.end()) {
+ NodeStorage.emplace_back();
+ auto NewNode = &NodeStorage.back();
+ NewNode->Func = NodeFuncID;
+ NewNode->Caller = Node;
+ Node->Callees.push_back(NewNode);
+ Node = NewNode;
+ } else {
+ Node = *CalleeIt;
+ }
+ }
+
+ // At this point, Node *must* be pointing at the leaf.
+ assert(Node->Func == P.front());
+ if (Node->ID == 0) {
+ Node->ID = NextID++;
+ PathIDMap.insert({Node->ID, Node});
+ }
+ return Node->ID;
+}
+
+Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
+ Profile Merged;
+ using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
+ using PathDataMapPtr = std::unique_ptr<PathDataMap>;
+ using PathDataVector = decltype(Profile::Block::PathData);
+ using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
+ ThreadProfileIndexMap ThreadProfileIndex;
+
+ for (const auto &P : {std::ref(L), std::ref(R)})
+ for (const auto &Block : P.get()) {
+ ThreadProfileIndexMap::iterator It;
+ std::tie(It, std::ignore) = ThreadProfileIndex.insert(
+ {Block.Thread, PathDataMapPtr{new PathDataMap()}});
+ for (const auto &PathAndData : Block.PathData) {
+ auto &PathID = PathAndData.first;
+ auto &Data = PathAndData.second;
+ auto NewPathID =
+ Merged.internPath(cantFail(P.get().expandPath(PathID)));
+ PathDataMap::iterator PathDataIt;
+ bool Inserted;
+ std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
+ if (!Inserted) {
+ auto &ExistingData = PathDataIt->second;
+ ExistingData.CallCount += Data.CallCount;
+ ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
+ }
+ }
+ }
+
+ for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
+ PathDataVector PathAndData;
+ PathAndData.reserve(IndexedThreadBlock.second->size());
+ copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
+ cantFail(
+ Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
+ }
+ return Merged;
+}
+
+Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
+ Profile Merged;
+ using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
+ PathDataMap PathData;
+ using PathDataVector = decltype(Profile::Block::PathData);
+ for (const auto &P : {std::ref(L), std::ref(R)})
+ for (const auto &Block : P.get())
+ for (const auto &PathAndData : Block.PathData) {
+ auto &PathId = PathAndData.first;
+ auto &Data = PathAndData.second;
+ auto NewPathID =
+ Merged.internPath(cantFail(P.get().expandPath(PathId)));
+ PathDataMap::iterator PathDataIt;
+ bool Inserted;
+ std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
+ if (!Inserted) {
+ auto &ExistingData = PathDataIt->second;
+ ExistingData.CallCount += Data.CallCount;
+ ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
+ }
+ }
+
+ // In the end there's a single Block, for thread 0.
+ PathDataVector Block;
+ Block.reserve(PathData.size());
+ copy(PathData, std::back_inserter(Block));
+ cantFail(Merged.addBlock({0, std::move(Block)}));
+ return Merged;
+}
+
+Expected<Profile> loadProfile(StringRef Filename) {
+ int Fd;
+ if (auto EC = sys::fs::openFileForRead(Filename, Fd))
+ return make_error<StringError>(
+ Twine("Cannot read profile from '") + Filename + "'", EC);
+
+ uint64_t FileSize;
+ if (auto EC = sys::fs::file_size(Filename, FileSize))
+ return make_error<StringError>(
+ Twine("Cannot get filesize of '") + Filename + "'", EC);
+
+ std::error_code EC;
+ sys::fs::mapped_file_region MappedFile(
+ Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+ if (EC)
+ return make_error<StringError>(
+ Twine("Cannot mmap profile '") + Filename + "'", EC);
+ StringRef Data(MappedFile.data(), MappedFile.size());
+
+ Profile P;
+ uint32_t Offset = 0;
+ DataExtractor Extractor(Data, true, 8);
+
+ // For each block we get from the file:
+ while (Offset != MappedFile.size()) {
+ auto HeaderOrError = readBlockHeader(Extractor, Offset);
+ if (!HeaderOrError)
+ return HeaderOrError.takeError();
+
+ // TODO: Maybe store this header information for each block, even just for
+ // debugging?
+ const auto &Header = HeaderOrError.get();
+
+ // Read in the path data.
+ auto PathOrError = readPath(Extractor, Offset);
+ if (!PathOrError)
+ return PathOrError.takeError();
+ const auto &Path = PathOrError.get();
+
+ // For each path we encounter, we should intern it to get a PathID.
+ auto DataOrError = readData(Extractor, Offset);
+ if (!DataOrError)
+ return DataOrError.takeError();
+ auto &Data = DataOrError.get();
+
+ if (auto E =
+ P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
+ {{P.internPath(Path), std::move(Data)}}}))
+ return std::move(E);
+ }
+
+ return P;
+}
+
+namespace {
+
+struct StackEntry {
+ uint64_t Timestamp;
+ Profile::FuncID FuncId;
+};
+
+} // namespace
+
+Expected<Profile> profileFromTrace(const Trace &T) {
+ Profile P;
+
+ // The implementation of the algorithm re-creates the execution of
+ // the functions based on the trace data. To do this, we set up a number of
+ // data structures to track the execution context of every thread in the
+ // Trace.
+ DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
+ DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
+ ThreadPathData;
+
+ // We then do a pass through the Trace to account data on a per-thread-basis.
+ for (const auto &E : T) {
+ auto &TSD = ThreadStacks[E.TId];
+ switch (E.Type) {
+ case RecordTypes::ENTER:
+ case RecordTypes::ENTER_ARG:
+
+ // Push entries into the function call stack.
+ TSD.push_back({E.TSC, E.FuncId});
+ break;
+
+ case RecordTypes::EXIT:
+ case RecordTypes::TAIL_EXIT:
+
+ // Exits cause some accounting to happen, based on the state of the stack.
+ // For each function we pop off the stack, we take note of the path and
+ // record the cumulative state for this path. As we're doing this, we
+ // intern the path into the Profile.
+ while (!TSD.empty()) {
+ auto Top = TSD.back();
+ auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
+ SmallVector<Profile::FuncID, 16> Path;
+ transform(reverse(TSD), std::back_inserter(Path),
+ std::mem_fn(&StackEntry::FuncId));
+ auto InternedPath = P.internPath(Path);
+ auto &TPD = ThreadPathData[E.TId][InternedPath];
+ ++TPD.CallCount;
+ TPD.CumulativeLocalTime += FunctionLocalTime;
+ TSD.pop_back();
+
+ // If we've matched the corresponding entry event for this function,
+ // then we exit the loop.
+ if (Top.FuncId == E.FuncId)
+ break;
+
+ // FIXME: Consider the intermediate times and the cumulative tree time
+ // as well.
+ }
+
+ break;
+
+ case RecordTypes::CUSTOM_EVENT:
+ case RecordTypes::TYPED_EVENT:
+ // TODO: Support an extension point to allow handling of custom and typed
+ // events in profiles.
+ break;
+ }
+ }
+
+ // Once we've gone through the Trace, we now create one Block per thread in
+ // the Profile.
+ for (const auto &ThreadPaths : ThreadPathData) {
+ const auto &TID = ThreadPaths.first;
+ const auto &PathsData = ThreadPaths.second;
+ if (auto E = P.addBlock({
+ TID,
+ std::vector<std::pair<Profile::PathID, Profile::Data>>(
+ PathsData.begin(), PathsData.end()),
+ }))
+ return std::move(E);
+ }
+
+ return P;
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/RecordInitializer.cpp b/contrib/llvm/lib/XRay/RecordInitializer.cpp
new file mode 100644
index 000000000000..f136a1e456b7
--- /dev/null
+++ b/contrib/llvm/lib/XRay/RecordInitializer.cpp
@@ -0,0 +1,418 @@
+//===- FDRRecordProducer.cpp - XRay FDR Mode Record Producer --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRRecords.h"
+
+namespace llvm {
+namespace xray {
+
+Error RecordInitializer::visit(BufferExtents &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t)))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a buffer extent (%d).",
+ OffsetPtr);
+
+ auto PreReadOffset = OffsetPtr;
+ R.Size = E.getU64(&OffsetPtr);
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Cannot read buffer extent at offset %d.",
+ OffsetPtr);
+
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+ return Error::success();
+}
+
+Error RecordInitializer::visit(WallclockRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a wallclock record (%d).",
+ OffsetPtr);
+ auto BeginOffset = OffsetPtr;
+ auto PreReadOffset = OffsetPtr;
+ R.Seconds = E.getU64(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read wall clock 'seconds' field at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ R.Nanos = E.getU32(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read wall clock 'nanos' field at offset %d.", OffsetPtr);
+
+ // Align to metadata record size boundary.
+ assert(OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+ return Error::success();
+}
+
+Error RecordInitializer::visit(NewCPUIDRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a new cpu id record (%d).",
+ OffsetPtr);
+ auto BeginOffset = OffsetPtr;
+ auto PreReadOffset = OffsetPtr;
+ R.CPUId = E.getU16(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Cannot read CPU id at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ R.TSC = E.getU64(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Cannot read CPU TSC at offset %d.", OffsetPtr);
+
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+ return Error::success();
+}
+
+Error RecordInitializer::visit(TSCWrapRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a new TSC wrap record (%d).",
+ OffsetPtr);
+
+ auto PreReadOffset = OffsetPtr;
+ R.BaseTSC = E.getU64(&OffsetPtr);
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Cannot read TSC wrap record at offset %d.",
+ OffsetPtr);
+
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+ return Error::success();
+}
+
+Error RecordInitializer::visit(CustomEventRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a custom event record (%d).",
+ OffsetPtr);
+
+ auto BeginOffset = OffsetPtr;
+ auto PreReadOffset = OffsetPtr;
+ R.Size = E.getSigned(&OffsetPtr, sizeof(int32_t));
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a custom event record size field offset %d.", OffsetPtr);
+
+ if (R.Size <= 0)
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid size for custom event (size = %d) at offset %d.", R.Size,
+ OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ R.TSC = E.getU64(&OffsetPtr);
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a custom event TSC field at offset %d.", OffsetPtr);
+
+ // For version 4 onwards, of the FDR log, we want to also capture the CPU ID
+ // of the custom event.
+ if (Version >= 4) {
+ PreReadOffset = OffsetPtr;
+ R.CPU = E.getU16(&OffsetPtr);
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Missing CPU field at offset %d", OffsetPtr);
+ }
+
+ assert(OffsetPtr > BeginOffset &&
+ OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+
+ // Next we read in a fixed chunk of data from the given offset.
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Cannot read %d bytes of custom event data from offset %d.", R.Size,
+ OffsetPtr);
+
+ std::vector<uint8_t> Buffer;
+ Buffer.resize(R.Size);
+ PreReadOffset = OffsetPtr;
+ if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading data into buffer of size %d at offset %d.", R.Size,
+ OffsetPtr);
+
+ assert(OffsetPtr >= PreReadOffset);
+ if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading enough bytes for the custom event payload -- read %d "
+ "expecting %d bytes at offset %d.",
+ OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
+
+ R.Data.assign(Buffer.begin(), Buffer.end());
+ return Error::success();
+}
+
+Error RecordInitializer::visit(CustomEventRecordV5 &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a custom event record (%d).",
+ OffsetPtr);
+
+ auto BeginOffset = OffsetPtr;
+ auto PreReadOffset = OffsetPtr;
+
+ R.Size = E.getSigned(&OffsetPtr, sizeof(int32_t));
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a custom event record size field offset %d.", OffsetPtr);
+
+ if (R.Size <= 0)
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid size for custom event (size = %d) at offset %d.", R.Size,
+ OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t));
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a custom event record TSC delta field at offset %d.",
+ OffsetPtr);
+
+ assert(OffsetPtr > BeginOffset &&
+ OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+
+ // Next we read in a fixed chunk of data from the given offset.
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Cannot read %d bytes of custom event data from offset %d.", R.Size,
+ OffsetPtr);
+
+ std::vector<uint8_t> Buffer;
+ Buffer.resize(R.Size);
+ PreReadOffset = OffsetPtr;
+ if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading data into buffer of size %d at offset %d.", R.Size,
+ OffsetPtr);
+
+ assert(OffsetPtr >= PreReadOffset);
+ if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading enough bytes for the custom event payload -- read %d "
+ "expecting %d bytes at offset %d.",
+ OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
+
+ R.Data.assign(Buffer.begin(), Buffer.end());
+ return Error::success();
+}
+
+Error RecordInitializer::visit(TypedEventRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a typed event record (%d).",
+ OffsetPtr);
+
+ auto BeginOffset = OffsetPtr;
+ auto PreReadOffset = OffsetPtr;
+
+ R.Size = E.getSigned(&OffsetPtr, sizeof(int32_t));
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a typed event record size field offset %d.", OffsetPtr);
+
+ if (R.Size <= 0)
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Invalid size for typed event (size = %d) at offset %d.", R.Size,
+ OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t));
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a typed event record TSC delta field at offset %d.",
+ OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ R.EventType = E.getU16(&OffsetPtr);
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a typed event record type field at offset %d.", OffsetPtr);
+
+ assert(OffsetPtr > BeginOffset &&
+ OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+
+ // Next we read in a fixed chunk of data from the given offset.
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
+ return createStringError(
+ std::make_error_code(std::errc::bad_address),
+ "Cannot read %d bytes of custom event data from offset %d.", R.Size,
+ OffsetPtr);
+
+ std::vector<uint8_t> Buffer;
+ Buffer.resize(R.Size);
+ PreReadOffset = OffsetPtr;
+ if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading data into buffer of size %d at offset %d.", R.Size,
+ OffsetPtr);
+
+ assert(OffsetPtr >= PreReadOffset);
+ if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Failed reading enough bytes for the typed event payload -- read %d "
+ "expecting %d bytes at offset %d.",
+ OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
+
+ R.Data.assign(Buffer.begin(), Buffer.end());
+ return Error::success();
+}
+
+Error RecordInitializer::visit(CallArgRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a call argument record (%d).",
+ OffsetPtr);
+
+ auto PreReadOffset = OffsetPtr;
+ R.Arg = E.getU64(&OffsetPtr);
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a call arg record at offset %d.",
+ OffsetPtr);
+
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+ return Error::success();
+}
+
+Error RecordInitializer::visit(PIDRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a process ID record (%d).",
+ OffsetPtr);
+
+ auto PreReadOffset = OffsetPtr;
+ R.PID = E.getSigned(&OffsetPtr, 4);
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a process ID record at offset %d.",
+ OffsetPtr);
+
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+ return Error::success();
+}
+
+Error RecordInitializer::visit(NewBufferRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a new buffer record (%d).",
+ OffsetPtr);
+
+ auto PreReadOffset = OffsetPtr;
+ R.TID = E.getSigned(&OffsetPtr, sizeof(int32_t));
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Cannot read a new buffer record at offset %d.",
+ OffsetPtr);
+
+ OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+ return Error::success();
+}
+
+Error RecordInitializer::visit(EndBufferRecord &R) {
+ if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+ MetadataRecord::kMetadataBodySize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for an end-of-buffer record (%d).",
+ OffsetPtr);
+
+ OffsetPtr += MetadataRecord::kMetadataBodySize;
+ return Error::success();
+}
+
+Error RecordInitializer::visit(FunctionRecord &R) {
+ // For function records, we need to retreat one byte back to read a full
+ // unsigned 32-bit value. The first four bytes will have the following
+ // layout:
+ //
+ // bit 0 : function record indicator (must be 0)
+ // bits 1..3 : function record type
+ // bits 4..32 : function id
+ //
+ if (OffsetPtr == 0 || !E.isValidOffsetForDataOfSize(
+ --OffsetPtr, FunctionRecord::kFunctionRecordSize))
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Invalid offset for a function record (%d).",
+ OffsetPtr);
+
+ auto BeginOffset = OffsetPtr;
+ auto PreReadOffset = BeginOffset;
+ uint32_t Buffer = E.getU32(&OffsetPtr);
+ if (PreReadOffset == OffsetPtr)
+ return createStringError(std::make_error_code(std::errc::bad_address),
+ "Cannot read function id field from offset %d.",
+ OffsetPtr);
+
+ // To get the function record type, we shift the buffer one to the right
+ // (truncating the function record indicator) then take the three bits
+ // (0b0111) to get the record type as an unsigned value.
+ unsigned FunctionType = (Buffer >> 1) & 0x07u;
+ switch (FunctionType) {
+ case static_cast<unsigned>(RecordTypes::ENTER):
+ case static_cast<unsigned>(RecordTypes::ENTER_ARG):
+ case static_cast<unsigned>(RecordTypes::EXIT):
+ case static_cast<unsigned>(RecordTypes::TAIL_EXIT):
+ R.Kind = static_cast<RecordTypes>(FunctionType);
+ break;
+ default:
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown function record type '%d' at offset %d.",
+ FunctionType, BeginOffset);
+ }
+
+ R.FuncId = Buffer >> 4;
+ PreReadOffset = OffsetPtr;
+ R.Delta = E.getU32(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Failed reading TSC delta from offset %d.",
+ OffsetPtr);
+ assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset));
+ return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/RecordPrinter.cpp b/contrib/llvm/lib/XRay/RecordPrinter.cpp
new file mode 100644
index 000000000000..71ea7d0e969f
--- /dev/null
+++ b/contrib/llvm/lib/XRay/RecordPrinter.cpp
@@ -0,0 +1,109 @@
+//===- RecordPrinter.cpp - FDR Record Printer -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/RecordPrinter.h"
+
+#include "llvm/Support/FormatVariadic.h"
+
+namespace llvm {
+namespace xray {
+
+Error RecordPrinter::visit(BufferExtents &R) {
+ OS << formatv("<Buffer: size = {0} bytes>", R.size()) << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(WallclockRecord &R) {
+ OS << formatv("<Wall Time: seconds = {0}.{1,0+6}>", R.seconds(), R.nanos())
+ << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(NewCPUIDRecord &R) {
+ OS << formatv("<CPU: id = {0}, tsc = {1}>", R.cpuid(), R.tsc()) << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(TSCWrapRecord &R) {
+ OS << formatv("<TSC Wrap: base = {0}>", R.tsc()) << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(CustomEventRecord &R) {
+ OS << formatv(
+ "<Custom Event: tsc = {0}, cpu = {1}, size = {2}, data = '{3}'>",
+ R.tsc(), R.cpu(), R.size(), R.data())
+ << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(CustomEventRecordV5 &R) {
+ OS << formatv("<Custom Event: delta = +{0}, size = {1}, data = '{2}'>",
+ R.delta(), R.size(), R.data())
+ << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(TypedEventRecord &R) {
+ OS << formatv(
+ "<Typed Event: delta = +{0}, type = {1}, size = {2}, data = '{3}'",
+ R.delta(), R.eventType(), R.size(), R.data())
+ << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(CallArgRecord &R) {
+ OS << formatv("<Call Argument: data = {0} (hex = {0:x})>", R.arg()) << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(PIDRecord &R) {
+ OS << formatv("<PID: {0}>", R.pid()) << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(NewBufferRecord &R) {
+ OS << formatv("<Thread ID: {0}>", R.tid()) << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(EndBufferRecord &R) {
+ OS << "<End of Buffer>" << Delim;
+ return Error::success();
+}
+
+Error RecordPrinter::visit(FunctionRecord &R) {
+ // FIXME: Support symbolization here?
+ switch (R.recordType()) {
+ case RecordTypes::ENTER:
+ OS << formatv("<Function Enter: #{0} delta = +{1}>", R.functionId(),
+ R.delta());
+ break;
+ case RecordTypes::ENTER_ARG:
+ OS << formatv("<Function Enter With Arg: #{0} delta = +{1}>",
+ R.functionId(), R.delta());
+ break;
+ case RecordTypes::EXIT:
+ OS << formatv("<Function Exit: #{0} delta = +{1}>", R.functionId(),
+ R.delta());
+ break;
+ case RecordTypes::TAIL_EXIT:
+ OS << formatv("<Function Tail Exit: #{0} delta = +{1}>", R.functionId(),
+ R.delta());
+ break;
+ case RecordTypes::CUSTOM_EVENT:
+ case RecordTypes::TYPED_EVENT:
+ // TODO: Flag as a bug?
+ break;
+ }
+ OS << Delim;
+ return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/Trace.cpp b/contrib/llvm/lib/XRay/Trace.cpp
index a8764b25483c..4f28f3f754c1 100644
--- a/contrib/llvm/lib/XRay/Trace.cpp
+++ b/contrib/llvm/lib/XRay/Trace.cpp
@@ -15,7 +15,16 @@
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/XRay/BlockIndexer.h"
+#include "llvm/XRay/BlockVerifier.h"
+#include "llvm/XRay/FDRRecordConsumer.h"
+#include "llvm/XRay/FDRRecordProducer.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/FDRTraceExpander.h"
+#include "llvm/XRay/FileHeaderReader.h"
#include "llvm/XRay/YAMLXRayRecord.h"
+#include <memory>
+#include <vector>
using namespace llvm;
using namespace llvm::xray;
@@ -25,38 +34,8 @@ namespace {
using XRayRecordStorage =
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
-// Populates the FileHeader reference by reading the first 32 bytes of the file.
-Error readBinaryFormatHeader(StringRef Data, XRayFileHeader &FileHeader) {
- // FIXME: Maybe deduce whether the data is little or big-endian using some
- // magic bytes in the beginning of the file?
-
- // First 32 bytes of the file will always be the header. We assume a certain
- // format here:
- //
- // (2) uint16 : version
- // (2) uint16 : type
- // (4) uint32 : bitfield
- // (8) uint64 : cycle frequency
- // (16) - : padding
-
- DataExtractor HeaderExtractor(Data, true, 8);
- uint32_t OffsetPtr = 0;
- FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr);
- FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr);
- uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr);
- FileHeader.ConstantTSC = Bitfield & 1uL;
- FileHeader.NonstopTSC = Bitfield & 1uL << 1;
- FileHeader.CycleFrequency = HeaderExtractor.getU64(&OffsetPtr);
- std::memcpy(&FileHeader.FreeFormData, Data.bytes_begin() + OffsetPtr, 16);
- if (FileHeader.Version != 1 && FileHeader.Version != 2 &&
- FileHeader.Version != 3)
- return make_error<StringError>(
- Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version),
- std::make_error_code(std::errc::invalid_argument));
- return Error::success();
-}
-
-Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
+Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
+ XRayFileHeader &FileHeader,
std::vector<XRayRecord> &Records) {
if (Data.size() < 32)
return make_error<StringError>(
@@ -68,8 +47,12 @@ Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
"Invalid-sized XRay data.",
std::make_error_code(std::errc::invalid_argument));
- if (auto E = readBinaryFormatHeader(Data, FileHeader))
- return E;
+ DataExtractor Reader(Data, IsLittleEndian, 8);
+ uint32_t OffsetPtr = 0;
+ auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr);
+ if (!FileHeaderOrError)
+ return FileHeaderOrError.takeError();
+ FileHeader = std::move(FileHeaderOrError.get());
// Each record after the header will be 32 bytes, in the following format:
//
@@ -81,16 +64,38 @@ Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
// (4) uint32 : thread id
// (4) uint32 : process id
// (8) - : padding
- for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(32)) {
- DataExtractor RecordExtractor(S, true, 8);
- uint32_t OffsetPtr = 0;
- switch (auto RecordType = RecordExtractor.getU16(&OffsetPtr)) {
+ while (Reader.isValidOffset(OffsetPtr)) {
+ if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32))
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Not enough bytes to read a full record at offset %d.", OffsetPtr);
+ auto PreReadOffset = OffsetPtr;
+ auto RecordType = Reader.getU16(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading record type at offset %d.", OffsetPtr);
+
+ switch (RecordType) {
case 0: { // Normal records.
Records.emplace_back();
auto &Record = Records.back();
Record.RecordType = RecordType;
- Record.CPU = RecordExtractor.getU8(&OffsetPtr);
- auto Type = RecordExtractor.getU8(&OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ Record.CPU = Reader.getU8(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading CPU field at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ auto Type = Reader.getU8(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading record type field at offset %d.", OffsetPtr);
+
switch (Type) {
case 0:
Record.Type = RecordTypes::ENTER;
@@ -105,393 +110,96 @@ Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
Record.Type = RecordTypes::ENTER_ARG;
break;
default:
- return make_error<StringError>(
- Twine("Unknown record type '") + Twine(int{Type}) + "'",
- std::make_error_code(std::errc::executable_format_error));
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Unknown record type '%d' at offset %d.", Type, OffsetPtr);
}
- Record.FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t));
- Record.TSC = RecordExtractor.getU64(&OffsetPtr);
- Record.TId = RecordExtractor.getU32(&OffsetPtr);
- Record.PId = RecordExtractor.getU32(&OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ Record.FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading function id field at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ Record.TSC = Reader.getU64(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading TSC field at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ Record.TId = Reader.getU32(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading thread id field at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ Record.PId = Reader.getU32(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading process id at offset %d.", OffsetPtr);
+
break;
}
case 1: { // Arg payload record.
auto &Record = Records.back();
- // Advance two bytes to avoid padding.
+
+ // We skip the next two bytes of the record, because we don't need the
+ // type and the CPU record for arg payloads.
OffsetPtr += 2;
- int32_t FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t));
- auto TId = RecordExtractor.getU32(&OffsetPtr);
- auto PId = RecordExtractor.getU32(&OffsetPtr);
+ PreReadOffset = OffsetPtr;
+ int32_t FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading function id field at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ auto TId = Reader.getU32(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading thread id field at offset %d.", OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ auto PId = Reader.getU32(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading process id field at offset %d.", OffsetPtr);
// Make a check for versions above 3 for the Pid field
if (Record.FuncId != FuncId || Record.TId != TId ||
(FileHeader.Version >= 3 ? Record.PId != PId : false))
- return make_error<StringError>(
- Twine("Corrupted log, found arg payload following non-matching "
- "function + thread record. Record for function ") +
- Twine(Record.FuncId) + " != " + Twine(FuncId) + "; offset: " +
- Twine(S.data() - Data.data()),
- std::make_error_code(std::errc::executable_format_error));
-
- auto Arg = RecordExtractor.getU64(&OffsetPtr);
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Corrupted log, found arg payload following non-matching "
+ "function+thread record. Record for function %d != %d at offset "
+ "%d",
+ Record.FuncId, FuncId, OffsetPtr);
+
+ PreReadOffset = OffsetPtr;
+ auto Arg = Reader.getU64(&OffsetPtr);
+ if (OffsetPtr == PreReadOffset)
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Failed reading argument payload at offset %d.", OffsetPtr);
+
Record.CallArgs.push_back(Arg);
break;
}
default:
- return make_error<StringError>(
- Twine("Unknown record type == ") + Twine(RecordType),
- std::make_error_code(std::errc::executable_format_error));
+ return createStringError(
+ std::make_error_code(std::errc::executable_format_error),
+ "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr);
}
- }
- return Error::success();
-}
-
-/// When reading from a Flight Data Recorder mode log, metadata records are
-/// sparse compared to packed function records, so we must maintain state as we
-/// read through the sequence of entries. This allows the reader to denormalize
-/// the CPUId and Thread Id onto each Function Record and transform delta
-/// encoded TSC values into absolute encodings on each record.
-struct FDRState {
- uint16_t CPUId;
- uint16_t ThreadId;
- int32_t ProcessId;
- uint64_t BaseTSC;
-
- /// Encode some of the state transitions for the FDR log reader as explicit
- /// checks. These are expectations for the next Record in the stream.
- enum class Token {
- NEW_BUFFER_RECORD_OR_EOF,
- WALLCLOCK_RECORD,
- NEW_CPU_ID_RECORD,
- FUNCTION_SEQUENCE,
- SCAN_TO_END_OF_THREAD_BUF,
- CUSTOM_EVENT_DATA,
- CALL_ARGUMENT,
- BUFFER_EXTENTS,
- PID_RECORD,
- };
- Token Expects;
-
- // Each threads buffer may have trailing garbage to scan over, so we track our
- // progress.
- uint64_t CurrentBufferSize;
- uint64_t CurrentBufferConsumed;
-};
-
-const char *fdrStateToTwine(const FDRState::Token &state) {
- switch (state) {
- case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
- return "NEW_BUFFER_RECORD_OR_EOF";
- case FDRState::Token::WALLCLOCK_RECORD:
- return "WALLCLOCK_RECORD";
- case FDRState::Token::NEW_CPU_ID_RECORD:
- return "NEW_CPU_ID_RECORD";
- case FDRState::Token::FUNCTION_SEQUENCE:
- return "FUNCTION_SEQUENCE";
- case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF:
- return "SCAN_TO_END_OF_THREAD_BUF";
- case FDRState::Token::CUSTOM_EVENT_DATA:
- return "CUSTOM_EVENT_DATA";
- case FDRState::Token::CALL_ARGUMENT:
- return "CALL_ARGUMENT";
- case FDRState::Token::BUFFER_EXTENTS:
- return "BUFFER_EXTENTS";
- case FDRState::Token::PID_RECORD:
- return "PID_RECORD";
- }
- return "UNKNOWN";
-}
-
-/// State transition when a NewBufferRecord is encountered.
-Error processFDRNewBufferRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor) {
-
- if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
- return make_error<StringError>(
- Twine("Malformed log. Read New Buffer record kind out of sequence; "
- "expected: ") +
- fdrStateToTwine(State.Expects),
- std::make_error_code(std::errc::executable_format_error));
- uint32_t OffsetPtr = 1; // 1 byte into record.
- State.ThreadId = RecordExtractor.getU16(&OffsetPtr);
- State.Expects = FDRState::Token::WALLCLOCK_RECORD;
- return Error::success();
-}
-
-/// State transition when an EndOfBufferRecord is encountered.
-Error processFDREndOfBufferRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor) {
- if (State.Expects == FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
- return make_error<StringError>(
- Twine("Malformed log. Received EOB message without current buffer; "
- "expected: ") +
- fdrStateToTwine(State.Expects),
- std::make_error_code(std::errc::executable_format_error));
- State.Expects = FDRState::Token::SCAN_TO_END_OF_THREAD_BUF;
- return Error::success();
-}
-
-/// State transition when a NewCPUIdRecord is encountered.
-Error processFDRNewCPUIdRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor) {
- if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE &&
- State.Expects != FDRState::Token::NEW_CPU_ID_RECORD)
- return make_error<StringError>(
- Twine("Malformed log. Read NewCPUId record kind out of sequence; "
- "expected: ") +
- fdrStateToTwine(State.Expects),
- std::make_error_code(std::errc::executable_format_error));
- uint32_t OffsetPtr = 1; // Read starting after the first byte.
- State.CPUId = RecordExtractor.getU16(&OffsetPtr);
- State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
- State.Expects = FDRState::Token::FUNCTION_SEQUENCE;
- return Error::success();
-}
-
-/// State transition when a TSCWrapRecord (overflow detection) is encountered.
-Error processFDRTSCWrapRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor) {
- if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE)
- return make_error<StringError>(
- Twine("Malformed log. Read TSCWrap record kind out of sequence; "
- "expecting: ") +
- fdrStateToTwine(State.Expects),
- std::make_error_code(std::errc::executable_format_error));
- uint32_t OffsetPtr = 1; // Read starting after the first byte.
- State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
- return Error::success();
-}
-
-/// State transition when a WallTimeMarkerRecord is encountered.
-Error processFDRWallTimeRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor) {
- if (State.Expects != FDRState::Token::WALLCLOCK_RECORD)
- return make_error<StringError>(
- Twine("Malformed log. Read Wallclock record kind out of sequence; "
- "expecting: ") +
- fdrStateToTwine(State.Expects),
- std::make_error_code(std::errc::executable_format_error));
-
- // TODO: Someday, reconcile the TSC ticks to wall clock time for presentation
- // purposes. For now, we're ignoring these records.
- State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
- return Error::success();
-}
-
-/// State transition when a PidRecord is encountered.
-Error processFDRPidRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor) {
-
- if (State.Expects != FDRState::Token::PID_RECORD)
- return make_error<StringError>(
- Twine("Malformed log. Read Pid record kind out of sequence; "
- "expected: ") +
- fdrStateToTwine(State.Expects),
- std::make_error_code(std::errc::executable_format_error));
-
- uint32_t OffsetPtr = 1; // Read starting after the first byte.
- State.ProcessId = RecordExtractor.getU32(&OffsetPtr);
- State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
- return Error::success();
-}
-
-/// State transition when a CustomEventMarker is encountered.
-Error processCustomEventMarker(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor,
- size_t &RecordSize) {
- // We can encounter a CustomEventMarker anywhere in the log, so we can handle
- // it regardless of the expectation. However, we do set the expectation to
- // read a set number of fixed bytes, as described in the metadata.
- uint32_t OffsetPtr = 1; // Read after the first byte.
- uint32_t DataSize = RecordExtractor.getU32(&OffsetPtr);
- uint64_t TSC = RecordExtractor.getU64(&OffsetPtr);
-
- // FIXME: Actually represent the record through the API. For now we only
- // skip through the data.
- (void)TSC;
- RecordSize = 16 + DataSize;
- return Error::success();
-}
-
-/// State transition when an BufferExtents record is encountered.
-Error processBufferExtents(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor) {
- if (State.Expects != FDRState::Token::BUFFER_EXTENTS)
- return make_error<StringError>(
- Twine("Malformed log. Buffer Extents unexpected; expected: ") +
- fdrStateToTwine(State.Expects),
- std::make_error_code(std::errc::executable_format_error));
- uint32_t OffsetPtr = 1; // Read after the first byte.
- State.CurrentBufferSize = RecordExtractor.getU64(&OffsetPtr);
- State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
- return Error::success();
-}
-
-/// State transition when a CallArgumentRecord is encountered.
-Error processFDRCallArgumentRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor,
- std::vector<XRayRecord> &Records) {
- uint32_t OffsetPtr = 1; // Read starting after the first byte.
- auto &Enter = Records.back();
-
- if (Enter.Type != RecordTypes::ENTER)
- return make_error<StringError>(
- "CallArgument needs to be right after a function entry",
- std::make_error_code(std::errc::executable_format_error));
- Enter.Type = RecordTypes::ENTER_ARG;
- Enter.CallArgs.emplace_back(RecordExtractor.getU64(&OffsetPtr));
- return Error::success();
-}
-
-/// Advances the state machine for reading the FDR record type by reading one
-/// Metadata Record and updating the State appropriately based on the kind of
-/// record encountered. The RecordKind is encoded in the first byte of the
-/// Record, which the caller should pass in because they have already read it
-/// to determine that this is a metadata record as opposed to a function record.
-///
-/// Beginning with Version 2 of the FDR log, we do not depend on the size of the
-/// buffer, but rather use the extents to determine how far to read in the log
-/// for this particular buffer.
-///
-/// In Version 3, FDR log now includes a pid metadata record after
-/// WallTimeMarker
-Error processFDRMetadataRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor,
- size_t &RecordSize,
- std::vector<XRayRecord> &Records,
- uint16_t Version) {
- // The remaining 7 bits are the RecordKind enum.
- uint8_t RecordKind = RecordFirstByte >> 1;
- switch (RecordKind) {
- case 0: // NewBuffer
- if (auto E =
- processFDRNewBufferRecord(State, RecordFirstByte, RecordExtractor))
- return E;
- break;
- case 1: // EndOfBuffer
- if (Version >= 2)
- return make_error<StringError>(
- "Since Version 2 of FDR logging, we no longer support EOB records.",
- std::make_error_code(std::errc::executable_format_error));
- if (auto E = processFDREndOfBufferRecord(State, RecordFirstByte,
- RecordExtractor))
- return E;
- break;
- case 2: // NewCPUId
- if (auto E =
- processFDRNewCPUIdRecord(State, RecordFirstByte, RecordExtractor))
- return E;
- break;
- case 3: // TSCWrap
- if (auto E =
- processFDRTSCWrapRecord(State, RecordFirstByte, RecordExtractor))
- return E;
- break;
- case 4: // WallTimeMarker
- if (auto E =
- processFDRWallTimeRecord(State, RecordFirstByte, RecordExtractor))
- return E;
- // In Version 3 and and above, a PidRecord is expected after WallTimeRecord
- if (Version >= 3)
- State.Expects = FDRState::Token::PID_RECORD;
- break;
- case 5: // CustomEventMarker
- if (auto E = processCustomEventMarker(State, RecordFirstByte,
- RecordExtractor, RecordSize))
- return E;
- break;
- case 6: // CallArgument
- if (auto E = processFDRCallArgumentRecord(State, RecordFirstByte,
- RecordExtractor, Records))
- return E;
- break;
- case 7: // BufferExtents
- if (auto E = processBufferExtents(State, RecordFirstByte, RecordExtractor))
- return E;
- break;
- case 9: // Pid
- if (auto E = processFDRPidRecord(State, RecordFirstByte, RecordExtractor))
- return E;
- break;
- default:
- // Widen the record type to uint16_t to prevent conversion to char.
- return make_error<StringError>(
- Twine("Illegal metadata record type: ")
- .concat(Twine(static_cast<unsigned>(RecordKind))),
- std::make_error_code(std::errc::executable_format_error));
- }
- return Error::success();
-}
-
-/// Reads a function record from an FDR format log, appending a new XRayRecord
-/// to the vector being populated and updating the State with a new value
-/// reference value to interpret TSC deltas.
-///
-/// The XRayRecord constructed includes information from the function record
-/// processed here as well as Thread ID and CPU ID formerly extracted into
-/// State.
-Error processFDRFunctionRecord(FDRState &State, uint8_t RecordFirstByte,
- DataExtractor &RecordExtractor,
- std::vector<XRayRecord> &Records) {
- switch (State.Expects) {
- case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
- return make_error<StringError>(
- "Malformed log. Received Function Record before new buffer setup.",
- std::make_error_code(std::errc::executable_format_error));
- case FDRState::Token::WALLCLOCK_RECORD:
- return make_error<StringError>(
- "Malformed log. Received Function Record when expecting wallclock.",
- std::make_error_code(std::errc::executable_format_error));
- case FDRState::Token::PID_RECORD:
- return make_error<StringError>(
- "Malformed log. Received Function Record when expecting pid.",
- std::make_error_code(std::errc::executable_format_error));
- case FDRState::Token::NEW_CPU_ID_RECORD:
- return make_error<StringError>(
- "Malformed log. Received Function Record before first CPU record.",
- std::make_error_code(std::errc::executable_format_error));
- default:
- Records.emplace_back();
- auto &Record = Records.back();
- Record.RecordType = 0; // Record is type NORMAL.
- // Strip off record type bit and use the next three bits.
- uint8_t RecordType = (RecordFirstByte >> 1) & 0x07;
- switch (RecordType) {
- case static_cast<uint8_t>(RecordTypes::ENTER):
- Record.Type = RecordTypes::ENTER;
- break;
- case static_cast<uint8_t>(RecordTypes::EXIT):
- Record.Type = RecordTypes::EXIT;
- break;
- case static_cast<uint8_t>(RecordTypes::TAIL_EXIT):
- Record.Type = RecordTypes::TAIL_EXIT;
- break;
- default:
- // Cast to an unsigned integer to not interpret the record type as a char.
- return make_error<StringError>(
- Twine("Illegal function record type: ")
- .concat(Twine(static_cast<unsigned>(RecordType))),
- std::make_error_code(std::errc::executable_format_error));
- }
- Record.CPU = State.CPUId;
- Record.TId = State.ThreadId;
- Record.PId = State.ProcessId;
- // Back up to read first 32 bits, including the 4 we pulled RecordType
- // and RecordKind out of. The remaining 28 are FunctionId.
- uint32_t OffsetPtr = 0;
- // Despite function Id being a signed int on XRayRecord,
- // when it is written to an FDR format, the top bits are truncated,
- // so it is effectively an unsigned value. When we shift off the
- // top four bits, we want the shift to be logical, so we read as
- // uint32_t.
- uint32_t FuncIdBitField = RecordExtractor.getU32(&OffsetPtr);
- Record.FuncId = FuncIdBitField >> 4;
- // FunctionRecords have a 32 bit delta from the previous absolute TSC
- // or TSC delta. If this would overflow, we should read a TSCWrap record
- // with an absolute TSC reading.
- uint64_t NewTSC = State.BaseTSC + RecordExtractor.getU32(&OffsetPtr);
- State.BaseTSC = NewTSC;
- Record.TSC = NewTSC;
+ // Advance the offset pointer enough bytes to align to 32-byte records for
+ // basic mode logs.
+ OffsetPtr += 8;
}
return Error::success();
}
@@ -539,112 +247,97 @@ Error processFDRFunctionRecord(FDRState &State, uint8_t RecordFirstByte,
/// ThreadBuffer: BufferExtents NewBuffer WallClockTime Pid NewCPUId
/// FunctionSequence
/// EOB: *deprecated*
-Error loadFDRLog(StringRef Data, XRayFileHeader &FileHeader,
- std::vector<XRayRecord> &Records) {
+///
+/// In Version 4, we make the following changes:
+///
+/// CustomEventRecord now includes the CPU data.
+///
+/// In Version 5, we make the following changes:
+///
+/// CustomEventRecord and TypedEventRecord now use TSC delta encoding similar to
+/// what FunctionRecord instances use, and we no longer need to include the CPU
+/// id in the CustomEventRecord.
+///
+Error loadFDRLog(StringRef Data, bool IsLittleEndian,
+ XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) {
+
if (Data.size() < 32)
- return make_error<StringError>(
- "Not enough bytes for an XRay log.",
- std::make_error_code(std::errc::invalid_argument));
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Not enough bytes for an XRay FDR log.");
+ DataExtractor DE(Data, IsLittleEndian, 8);
- // For an FDR log, there are records sized 16 and 8 bytes.
- // There actually may be no records if no non-trivial functions are
- // instrumented.
- if (Data.size() % 8 != 0)
- return make_error<StringError>(
- "Invalid-sized XRay data.",
- std::make_error_code(std::errc::invalid_argument));
+ uint32_t OffsetPtr = 0;
+ auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr);
+ if (!FileHeaderOrError)
+ return FileHeaderOrError.takeError();
+ FileHeader = std::move(FileHeaderOrError.get());
- if (auto E = readBinaryFormatHeader(Data, FileHeader))
- return E;
+ // First we load the records into memory.
+ std::vector<std::unique_ptr<Record>> FDRRecords;
- uint64_t BufferSize = 0;
{
- StringRef ExtraDataRef(FileHeader.FreeFormData, 16);
- DataExtractor ExtraDataExtractor(ExtraDataRef, true, 8);
- uint32_t ExtraDataOffset = 0;
- BufferSize = ExtraDataExtractor.getU64(&ExtraDataOffset);
+ FileBasedRecordProducer P(FileHeader, DE, OffsetPtr);
+ LogBuilderConsumer C(FDRRecords);
+ while (DE.isValidOffsetForDataOfSize(OffsetPtr, 1)) {
+ auto R = P.produce();
+ if (!R)
+ return R.takeError();
+ if (auto E = C.consume(std::move(R.get())))
+ return E;
+ }
}
- FDRState::Token InitialExpectation;
- switch (FileHeader.Version) {
- case 1:
- InitialExpectation = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
- break;
- case 2:
- case 3:
- InitialExpectation = FDRState::Token::BUFFER_EXTENTS;
- break;
- default:
- return make_error<StringError>(
- Twine("Unsupported version '") + Twine(FileHeader.Version) + "'",
- std::make_error_code(std::errc::executable_format_error));
+ // Next we index the records into blocks.
+ BlockIndexer::Index Index;
+ {
+ BlockIndexer Indexer(Index);
+ for (auto &R : FDRRecords)
+ if (auto E = R->apply(Indexer))
+ return E;
+ if (auto E = Indexer.flush())
+ return E;
}
- FDRState State{0, 0, 0, 0, InitialExpectation, BufferSize, 0};
-
- // RecordSize will tell the loop how far to seek ahead based on the record
- // type that we have just read.
- size_t RecordSize = 0;
- for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(RecordSize)) {
- DataExtractor RecordExtractor(S, true, 8);
- uint32_t OffsetPtr = 0;
- if (State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF) {
- RecordSize = State.CurrentBufferSize - State.CurrentBufferConsumed;
- if (S.size() < RecordSize) {
- return make_error<StringError>(
- Twine("Incomplete thread buffer. Expected at least ") +
- Twine(RecordSize) + " bytes but found " + Twine(S.size()),
- make_error_code(std::errc::invalid_argument));
+
+ // Then we verify the consistency of the blocks.
+ {
+ for (auto &PTB : Index) {
+ auto &Blocks = PTB.second;
+ for (auto &B : Blocks) {
+ BlockVerifier Verifier;
+ for (auto *R : B.Records)
+ if (auto E = R->apply(Verifier))
+ return E;
+ if (auto E = Verifier.verify())
+ return E;
}
- State.CurrentBufferConsumed = 0;
- State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
- continue;
- }
- uint8_t BitField = RecordExtractor.getU8(&OffsetPtr);
- bool isMetadataRecord = BitField & 0x01uL;
- bool isBufferExtents =
- (BitField >> 1) == 7; // BufferExtents record kind == 7
- if (isMetadataRecord) {
- RecordSize = 16;
- if (auto E =
- processFDRMetadataRecord(State, BitField, RecordExtractor,
- RecordSize, Records, FileHeader.Version))
- return E;
- } else { // Process Function Record
- RecordSize = 8;
- if (auto E = processFDRFunctionRecord(State, BitField, RecordExtractor,
- Records))
- return E;
}
+ }
- // The BufferExtents record is technically not part of the buffer, so we
- // don't count the size of that record against the buffer's actual size.
- if (!isBufferExtents)
- State.CurrentBufferConsumed += RecordSize;
- assert(State.CurrentBufferConsumed <= State.CurrentBufferSize);
- if ((FileHeader.Version == 2 || FileHeader.Version == 3) &&
- State.CurrentBufferSize == State.CurrentBufferConsumed) {
- // In Version 2 of the log, we don't need to scan to the end of the thread
- // buffer if we've already consumed all the bytes we need to.
- State.Expects = FDRState::Token::BUFFER_EXTENTS;
- State.CurrentBufferSize = BufferSize;
- State.CurrentBufferConsumed = 0;
+ // This is now the meat of the algorithm. Here we sort the blocks according to
+ // the Walltime record in each of the blocks for the same thread. This allows
+ // us to more consistently recreate the execution trace in temporal order.
+ // After the sort, we then reconstitute `Trace` records using a stateful
+ // visitor associated with a single process+thread pair.
+ {
+ for (auto &PTB : Index) {
+ auto &Blocks = PTB.second;
+ llvm::sort(Blocks, [](const BlockIndexer::Block &L,
+ const BlockIndexer::Block &R) {
+ return (L.WallclockTime->seconds() < R.WallclockTime->seconds() &&
+ L.WallclockTime->nanos() < R.WallclockTime->nanos());
+ });
+ auto Adder = [&](const XRayRecord &R) { Records.push_back(R); };
+ TraceExpander Expander(Adder, FileHeader.Version);
+ for (auto &B : Blocks) {
+ for (auto *R : B.Records)
+ if (auto E = R->apply(Expander))
+ return E;
+ }
+ if (auto E = Expander.flush())
+ return E;
}
}
- // Having iterated over everything we've been given, we've either consumed
- // everything and ended up in the end state, or were told to skip the rest.
- bool Finished = State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF &&
- State.CurrentBufferSize == State.CurrentBufferConsumed;
- if ((State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF &&
- State.Expects != FDRState::Token::BUFFER_EXTENTS) &&
- !Finished)
- return make_error<StringError>(
- Twine("Encountered EOF with unexpected state expectation ") +
- fdrStateToTwine(State.Expects) +
- ". Remaining expected bytes in thread buffer total " +
- Twine(State.CurrentBufferSize - State.CurrentBufferConsumed),
- std::make_error_code(std::errc::executable_format_error));
-
return Error::success();
}
@@ -670,8 +363,9 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
Records.clear();
std::transform(Trace.Records.begin(), Trace.Records.end(),
std::back_inserter(Records), [&](const YAMLXRayRecord &R) {
- return XRayRecord{R.RecordType, R.CPU, R.Type, R.FuncId,
- R.TSC, R.TId, R.PId, R.CallArgs};
+ return XRayRecord{R.RecordType, R.CPU, R.Type,
+ R.FuncId, R.TSC, R.TId,
+ R.PId, R.CallArgs, R.Data};
});
return Error::success();
}
@@ -705,6 +399,17 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
}
auto Data = StringRef(MappedFile.data(), MappedFile.size());
+ // TODO: Lift the endianness and implementation selection here.
+ DataExtractor LittleEndianDE(Data, true, 8);
+ auto TraceOrError = loadTrace(LittleEndianDE, Sort);
+ if (!TraceOrError) {
+ DataExtractor BigEndianDE(Data, false, 8);
+ TraceOrError = loadTrace(BigEndianDE, Sort);
+ }
+ return TraceOrError;
+}
+
+Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) {
// Attempt to detect the file type using file magic. We have a slight bias
// towards the binary format, and we do this by making sure that the first 4
// bytes of the binary file is some combination of the following byte
@@ -719,8 +424,7 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
//
// Only if we can't load either the binary or the YAML format will we yield an
// error.
- StringRef Magic(MappedFile.data(), 4);
- DataExtractor HeaderExtractor(Magic, true, 8);
+ DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), 8);
uint32_t OffsetPtr = 0;
uint16_t Version = HeaderExtractor.getU16(&OffsetPtr);
uint16_t Type = HeaderExtractor.getU16(&OffsetPtr);
@@ -731,7 +435,8 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
switch (Type) {
case NAIVE_FORMAT:
if (Version == 1 || Version == 2 || Version == 3) {
- if (auto E = loadNaiveFormatLog(Data, T.FileHeader, T.Records))
+ if (auto E = loadNaiveFormatLog(DE.getData(), DE.isLittleEndian(),
+ T.FileHeader, T.Records))
return std::move(E);
} else {
return make_error<StringError>(
@@ -741,8 +446,9 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
}
break;
case FLIGHT_DATA_RECORDER_FORMAT:
- if (Version == 1 || Version == 2 || Version == 3) {
- if (auto E = loadFDRLog(Data, T.FileHeader, T.Records))
+ if (Version >= 1 && Version <= 5) {
+ if (auto E = loadFDRLog(DE.getData(), DE.isLittleEndian(), T.FileHeader,
+ T.Records))
return std::move(E);
} else {
return make_error<StringError>(
@@ -751,15 +457,15 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
}
break;
default:
- if (auto E = loadYAMLLog(Data, T.FileHeader, T.Records))
+ if (auto E = loadYAMLLog(DE.getData(), T.FileHeader, T.Records))
return std::move(E);
}
if (Sort)
std::stable_sort(T.Records.begin(), T.Records.end(),
- [&](const XRayRecord &L, const XRayRecord &R) {
- return L.TSC < R.TSC;
- });
+ [&](const XRayRecord &L, const XRayRecord &R) {
+ return L.TSC < R.TSC;
+ });
return std::move(T);
}
diff --git a/contrib/llvm/tools/bugpoint/CrashDebugger.cpp b/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
index a5b31e1ab321..ef6a214fde20 100644
--- a/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -315,6 +315,66 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function *> &Funcs) {
}
namespace {
+/// ReduceCrashingFunctionAttributes reducer - This works by removing
+/// attributes on a particular function and seeing if the program still crashes.
+/// If it does, then keep the newer, smaller program.
+///
+class ReduceCrashingFunctionAttributes : public ListReducer<Attribute> {
+ BugDriver &BD;
+ std::string FnName;
+ BugTester TestFn;
+
+public:
+ ReduceCrashingFunctionAttributes(BugDriver &bd, const std::string &FnName,
+ BugTester testFn)
+ : BD(bd), FnName(FnName), TestFn(testFn) {}
+
+ Expected<TestResult> doTest(std::vector<Attribute> &Prefix,
+ std::vector<Attribute> &Kept) override {
+ if (!Kept.empty() && TestFuncAttrs(Kept))
+ return KeepSuffix;
+ if (!Prefix.empty() && TestFuncAttrs(Prefix))
+ return KeepPrefix;
+ return NoFailure;
+ }
+
+ bool TestFuncAttrs(std::vector<Attribute> &Attrs);
+};
+}
+
+bool ReduceCrashingFunctionAttributes::TestFuncAttrs(
+ std::vector<Attribute> &Attrs) {
+ // Clone the program to try hacking it apart...
+ std::unique_ptr<Module> M = CloneModule(BD.getProgram());
+ Function *F = M->getFunction(FnName);
+
+ // Build up an AttributeList from the attributes we've been given by the
+ // reducer.
+ AttrBuilder AB;
+ for (auto A : Attrs)
+ AB.addAttribute(A);
+ AttributeList NewAttrs;
+ NewAttrs =
+ NewAttrs.addAttributes(BD.getContext(), AttributeList::FunctionIndex, AB);
+
+ // Set this new list of attributes on the function.
+ F->setAttributes(NewAttrs);
+
+ // Try running on the hacked up program...
+ if (TestFn(BD, M.get())) {
+ BD.setNewProgram(std::move(M)); // It crashed, keep the trimmed version...
+
+ // Pass along the set of attributes that caused the crash.
+ Attrs.clear();
+ for (Attribute A : NewAttrs.getFnAttributes()) {
+ Attrs.push_back(A);
+ }
+ return true;
+ }
+ return false;
+}
+
+namespace {
/// Simplify the CFG without completely destroying it.
/// This is not well defined, but basically comes down to "try to eliminate
/// unreachable blocks and constant fold terminators without deciding that
@@ -409,7 +469,7 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock *> &BBs) {
for (BasicBlock *Succ : successors(&BB))
Succ->removePredecessor(&BB);
- TerminatorInst *BBTerm = BB.getTerminator();
+ Instruction *BBTerm = BB.getTerminator();
if (BBTerm->isEHPad() || BBTerm->getType()->isTokenTy())
continue;
if (!BBTerm->getType()->isVoidTy())
@@ -703,7 +763,7 @@ bool ReduceCrashingInstructions::TestInsts(
// Convert list to set for fast lookup...
SmallPtrSet<Instruction *, 32> Instructions;
for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
- assert(!isa<TerminatorInst>(Insts[i]));
+ assert(!Insts[i]->isTerminator());
Instructions.insert(cast<Instruction>(VMap[Insts[i]]));
}
@@ -717,7 +777,7 @@ bool ReduceCrashingInstructions::TestInsts(
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
Instruction *Inst = &*I++;
- if (!Instructions.count(Inst) && !isa<TerminatorInst>(Inst) &&
+ if (!Instructions.count(Inst) && !Inst->isTerminator() &&
!Inst->isEHPad() && !Inst->getType()->isTokenTy() &&
!Inst->isSwiftError()) {
if (!Inst->getType()->isVoidTy())
@@ -950,7 +1010,7 @@ static Error ReduceInsts(BugDriver &BD, BugTester TestFn) {
for (const Function &F : BD.getProgram())
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
- if (!isa<TerminatorInst>(&I))
+ if (!I.isTerminator())
Insts.push_back(&I);
Expected<bool> Result =
@@ -1056,6 +1116,38 @@ static Error DebugACrash(BugDriver &BD, BugTester TestFn) {
BD.EmitProgressBitcode(BD.getProgram(), "reduced-function");
}
+ // For each remaining function, try to reduce that function's attributes.
+ std::vector<std::string> FunctionNames;
+ for (Function &F : BD.getProgram())
+ FunctionNames.push_back(F.getName());
+
+ if (!FunctionNames.empty() && !BugpointIsInterrupted) {
+ outs() << "\n*** Attempting to reduce the number of function attributes in "
+ "the testcase\n";
+
+ unsigned OldSize = 0;
+ unsigned NewSize = 0;
+ for (std::string &Name : FunctionNames) {
+ Function *Fn = BD.getProgram().getFunction(Name);
+ assert(Fn && "Could not find funcion?");
+
+ std::vector<Attribute> Attrs;
+ for (Attribute A : Fn->getAttributes().getFnAttributes())
+ Attrs.push_back(A);
+
+ OldSize += Attrs.size();
+ Expected<bool> Result =
+ ReduceCrashingFunctionAttributes(BD, Name, TestFn).reduceList(Attrs);
+ if (Error E = Result.takeError())
+ return E;
+
+ NewSize += Attrs.size();
+ }
+
+ if (OldSize < NewSize)
+ BD.EmitProgressBitcode(BD.getProgram(), "reduced-function-attributes");
+ }
+
// Attempt to change conditional branches into unconditional branches to
// eliminate blocks.
if (!DisableSimplifyCFG && !BugpointIsInterrupted) {
diff --git a/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp b/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
index 773bad69fae0..1b86b103d835 100644
--- a/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
@@ -148,8 +148,9 @@ Error BugDriver::initializeExecutionEnvironment() {
std::string Message;
if (CCBinary.empty()) {
- if (sys::findProgramByName("clang"))
- CCBinary = "clang";
+ if (ErrorOr<std::string> ClangPath =
+ FindProgramByName("clang", getToolName(), &AbsTolerance))
+ CCBinary = *ClangPath;
else
CCBinary = "gcc";
}
@@ -193,11 +194,11 @@ Error BugDriver::initializeExecutionEnvironment() {
break;
case CompileCustom:
Interpreter = AbstractInterpreter::createCustomCompiler(
- Message, CustomCompileCommand);
+ getToolName(), Message, CustomCompileCommand);
break;
case Custom:
- Interpreter =
- AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
+ Interpreter = AbstractInterpreter::createCustomExecutor(
+ getToolName(), Message, CustomExecCommand);
break;
}
if (!Interpreter)
@@ -239,8 +240,8 @@ Error BugDriver::initializeExecutionEnvironment() {
SafeInterpreterSel == RunLLCIA);
break;
case Custom:
- SafeInterpreter =
- AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
+ SafeInterpreter = AbstractInterpreter::createCustomExecutor(
+ getToolName(), Message, CustomExecCommand);
break;
default:
Message = "Sorry, this back-end is not supported by bugpoint as the "
@@ -252,7 +253,7 @@ Error BugDriver::initializeExecutionEnvironment() {
exit(1);
}
- cc = CC::create(Message, CCBinary, &CCToolArgv);
+ cc = CC::create(getToolName(), Message, CCBinary, &CCToolArgv);
if (!cc) {
outs() << Message << "\nExiting.\n";
exit(1);
@@ -299,26 +300,32 @@ Expected<std::string> BugDriver::executeProgram(const Module &Program,
if (!AI)
AI = Interpreter;
assert(AI && "Interpreter should have been created already!");
+ bool CreatedBitcode = false;
if (BitcodeFile.empty()) {
// Emit the program to a bitcode file...
- auto File =
- sys::fs::TempFile::create(OutputPrefix + "-test-program-%%%%%%%.bc");
- if (!File) {
- errs() << ToolName
- << ": Error making unique filename: " << toString(File.takeError())
+ SmallString<128> UniqueFilename;
+ int UniqueFD;
+ std::error_code EC = sys::fs::createUniqueFile(
+ OutputPrefix + "-test-program-%%%%%%%.bc", UniqueFD, UniqueFilename);
+ if (EC) {
+ errs() << ToolName << ": Error making unique filename: " << EC.message()
<< "!\n";
exit(1);
}
- DiscardTemp Discard{*File};
- BitcodeFile = File->TmpName;
+ BitcodeFile = UniqueFilename.str();
- if (writeProgramToFile(File->FD, Program)) {
+ if (writeProgramToFile(BitcodeFile, UniqueFD, Program)) {
errs() << ToolName << ": Error emitting bitcode to file '" << BitcodeFile
<< "'!\n";
exit(1);
}
+ CreatedBitcode = true;
}
+ // Remove the temporary bitcode file when we are done.
+ std::string BitcodePath(BitcodeFile);
+ FileRemover BitcodeFileRemover(BitcodePath, CreatedBitcode && !SaveTemps);
+
if (OutputFile.empty())
OutputFile = OutputPrefix + "-execution-output-%%%%%%%";
diff --git a/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp b/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
index cbb048db8fe7..64fe675de20c 100644
--- a/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -16,6 +16,7 @@
//===----------------------------------------------------------------------===//
#include "BugDriver.h"
+#include "ToolRunner.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
@@ -166,7 +167,8 @@ bool BugDriver::runPasses(Module &Program,
std::string tool = OptCmd;
if (OptCmd.empty()) {
- if (ErrorOr<std::string> Path = sys::findProgramByName("opt"))
+ if (ErrorOr<std::string> Path =
+ FindProgramByName("opt", getToolName(), &OutputPrefix))
tool = *Path;
else
errs() << Path.getError().message() << "\n";
diff --git a/contrib/llvm/tools/bugpoint/ToolRunner.cpp b/contrib/llvm/tools/bugpoint/ToolRunner.cpp
index 812e8e3bbae5..7ba8ea1f16c5 100644
--- a/contrib/llvm/tools/bugpoint/ToolRunner.cpp
+++ b/contrib/llvm/tools/bugpoint/ToolRunner.cpp
@@ -202,19 +202,7 @@ Expected<int> LLI::ExecuteProgram(const std::string &Bitcode,
void AbstractInterpreter::anchor() {}
-#if defined(LLVM_ON_UNIX)
-const char EXESuffix[] = "";
-#elif defined(_WIN32)
-const char EXESuffix[] = "exe";
-#endif
-
-/// Prepend the path to the program being executed
-/// to \p ExeName, given the value of argv[0] and the address of main()
-/// itself. This allows us to find another LLVM tool if it is built in the same
-/// directory. An empty string is returned on error; note that this function
-/// just mainpulates the path and doesn't check for executability.
-/// Find a named executable.
-static std::string PrependMainExecutablePath(const std::string &ExeName,
+ErrorOr<std::string> llvm::FindProgramByName(const std::string &ExeName,
const char *Argv0,
void *MainAddr) {
// Check the directory that the calling program is in. We can do
@@ -222,30 +210,25 @@ static std::string PrependMainExecutablePath(const std::string &ExeName,
// is a relative path to the executable itself.
std::string Main = sys::fs::getMainExecutable(Argv0, MainAddr);
StringRef Result = sys::path::parent_path(Main);
+ if (ErrorOr<std::string> Path = sys::findProgramByName(ExeName, Result))
+ return *Path;
- if (!Result.empty()) {
- SmallString<128> Storage = Result;
- sys::path::append(Storage, ExeName);
- sys::path::replace_extension(Storage, EXESuffix);
- return Storage.str();
- }
-
- return Result.str();
+ // Check the user PATH.
+ return sys::findProgramByName(ExeName);
}
// LLI create method - Try to find the LLI executable
AbstractInterpreter *
AbstractInterpreter::createLLI(const char *Argv0, std::string &Message,
const std::vector<std::string> *ToolArgs) {
- std::string LLIPath =
- PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createLLI);
- if (!LLIPath.empty()) {
- Message = "Found lli: " + LLIPath + "\n";
- return new LLI(LLIPath, ToolArgs);
+ if (ErrorOr<std::string> LLIPath =
+ FindProgramByName("lli", Argv0, (void *)(intptr_t)&createLLI)) {
+ Message = "Found lli: " + *LLIPath + "\n";
+ return new LLI(*LLIPath, ToolArgs);
+ } else {
+ Message = LLIPath.getError().message() + "\n";
+ return nullptr;
}
-
- Message = "Cannot find `lli' in executable directory!\n";
- return nullptr;
}
//===---------------------------------------------------------------------===//
@@ -368,8 +351,9 @@ Expected<int> CustomExecutor::ExecuteProgram(
// '\ ' -> ' '
// 'exa\mple' -> 'example'
//
-static void lexCommand(std::string &Message, const std::string &CommandLine,
- std::string &CmdPath, std::vector<std::string> &Args) {
+static void lexCommand(const char *Argv0, std::string &Message,
+ const std::string &CommandLine, std::string &CmdPath,
+ std::vector<std::string> &Args) {
std::string Token;
std::string Command;
@@ -402,7 +386,7 @@ static void lexCommand(std::string &Message, const std::string &CommandLine,
Token.push_back(CommandLine[Pos]);
}
- auto Path = sys::findProgramByName(Command);
+ auto Path = FindProgramByName(Command, Argv0, (void *)(intptr_t)&lexCommand);
if (!Path) {
Message = std::string("Cannot find '") + Command +
"' in PATH: " + Path.getError().message() + "\n";
@@ -416,11 +400,12 @@ static void lexCommand(std::string &Message, const std::string &CommandLine,
// Custom execution environment create method, takes the execution command
// as arguments
AbstractInterpreter *AbstractInterpreter::createCustomCompiler(
- std::string &Message, const std::string &CompileCommandLine) {
+ const char *Argv0, std::string &Message,
+ const std::string &CompileCommandLine) {
std::string CmdPath;
std::vector<std::string> Args;
- lexCommand(Message, CompileCommandLine, CmdPath, Args);
+ lexCommand(Argv0, Message, CompileCommandLine, CmdPath, Args);
if (CmdPath.empty())
return nullptr;
@@ -430,12 +415,13 @@ AbstractInterpreter *AbstractInterpreter::createCustomCompiler(
// Custom execution environment create method, takes the execution command
// as arguments
AbstractInterpreter *
-AbstractInterpreter::createCustomExecutor(std::string &Message,
+AbstractInterpreter::createCustomExecutor(const char *Argv0,
+ std::string &Message,
const std::string &ExecCommandLine) {
std::string CmdPath;
std::vector<std::string> Args;
- lexCommand(Message, ExecCommandLine, CmdPath, Args);
+ lexCommand(Argv0, Message, ExecCommandLine, CmdPath, Args);
if (CmdPath.empty())
return nullptr;
@@ -524,20 +510,20 @@ LLC *AbstractInterpreter::createLLC(const char *Argv0, std::string &Message,
const std::vector<std::string> *Args,
const std::vector<std::string> *CCArgs,
bool UseIntegratedAssembler) {
- std::string LLCPath =
- PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t)&createLLC);
- if (LLCPath.empty()) {
- Message = "Cannot find `llc' in executable directory!\n";
+ ErrorOr<std::string> LLCPath =
+ FindProgramByName("llc", Argv0, (void *)(intptr_t)&createLLC);
+ if (!LLCPath) {
+ Message = LLCPath.getError().message() + "\n";
return nullptr;
}
- CC *cc = CC::create(Message, CCBinary, CCArgs);
+ CC *cc = CC::create(Argv0, Message, CCBinary, CCArgs);
if (!cc) {
errs() << Message << "\n";
exit(1);
}
- Message = "Found llc: " + LLCPath + "\n";
- return new LLC(LLCPath, cc, Args, UseIntegratedAssembler);
+ Message = "Found llc: " + *LLCPath + "\n";
+ return new LLC(*LLCPath, cc, Args, UseIntegratedAssembler);
}
//===---------------------------------------------------------------------===//
@@ -606,15 +592,14 @@ Expected<int> JIT::ExecuteProgram(const std::string &Bitcode,
AbstractInterpreter *
AbstractInterpreter::createJIT(const char *Argv0, std::string &Message,
const std::vector<std::string> *Args) {
- std::string LLIPath =
- PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createJIT);
- if (!LLIPath.empty()) {
- Message = "Found lli: " + LLIPath + "\n";
- return new JIT(LLIPath, Args);
+ if (ErrorOr<std::string> LLIPath =
+ FindProgramByName("lli", Argv0, (void *)(intptr_t)&createJIT)) {
+ Message = "Found lli: " + *LLIPath + "\n";
+ return new JIT(*LLIPath, Args);
+ } else {
+ Message = LLIPath.getError().message() + "\n";
+ return nullptr;
}
-
- Message = "Cannot find `lli' in executable directory!\n";
- return nullptr;
}
//===---------------------------------------------------------------------===//
@@ -855,9 +840,10 @@ Error CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
/// create - Try to find the CC executable
///
-CC *CC::create(std::string &Message, const std::string &CCBinary,
+CC *CC::create(const char *Argv0, std::string &Message,
+ const std::string &CCBinary,
const std::vector<std::string> *Args) {
- auto CCPath = sys::findProgramByName(CCBinary);
+ auto CCPath = FindProgramByName(CCBinary, Argv0, (void *)(intptr_t)&create);
if (!CCPath) {
Message = "Cannot find `" + CCBinary + "' in PATH: " +
CCPath.getError().message() + "\n";
diff --git a/contrib/llvm/tools/bugpoint/ToolRunner.h b/contrib/llvm/tools/bugpoint/ToolRunner.h
index f218ad534ee9..ef8551cc669b 100644
--- a/contrib/llvm/tools/bugpoint/ToolRunner.h
+++ b/contrib/llvm/tools/bugpoint/ToolRunner.h
@@ -49,7 +49,8 @@ class CC {
public:
enum FileType { AsmFile, ObjectFile, CFile };
- static CC *create(std::string &Message, const std::string &CCBinary,
+ static CC *create(const char *Argv0, std::string &Message,
+ const std::string &CCBinary,
const std::vector<std::string> *Args);
/// ExecuteProgram - Execute the program specified by "ProgramFile" (which is
@@ -98,11 +99,11 @@ public:
const std::vector<std::string> *Args = nullptr);
static AbstractInterpreter *
- createCustomCompiler(std::string &Message,
+ createCustomCompiler(const char *Argv0, std::string &Message,
const std::string &CompileCommandLine);
static AbstractInterpreter *
- createCustomExecutor(std::string &Message,
+ createCustomExecutor(const char *Argv0, std::string &Message,
const std::string &ExecCommandLine);
virtual ~AbstractInterpreter() {}
@@ -178,6 +179,13 @@ public:
unsigned MemoryLimit = 0) override;
};
+/// Find the first executable file \ExeName, either in the user's PATH or,
+/// failing that, in the same directory as argv[0]. This allows us to find
+/// another LLVM tool if it is built in the same directory. If no executable is
+/// found, an error is returned.
+ErrorOr<std::string> FindProgramByName(const std::string &ExeName,
+ const char *Argv0, void *MainAddr);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/tools/lli/lli.cpp b/contrib/llvm/tools/lli/lli.cpp
index 1940dbd848cc..7e93d31361aa 100644
--- a/contrib/llvm/tools/lli/lli.cpp
+++ b/contrib/llvm/tools/lli/lli.cpp
@@ -26,6 +26,7 @@
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
#include "llvm/ExecutionEngine/OrcMCJITReplacement.h"
@@ -34,7 +35,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/TypeBuilder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Object/Archive.h"
@@ -97,6 +97,28 @@ namespace {
"orc-lazy",
"Orc-based lazy JIT.")));
+ cl::opt<unsigned>
+ LazyJITCompileThreads("compile-threads",
+ cl::desc("Choose the number of compile threads "
+ "(jit-kind=orc-lazy only)"),
+ cl::init(0));
+
+ cl::list<std::string>
+ ThreadEntryPoints("thread-entry",
+ cl::desc("calls the given entry-point on a new thread "
+ "(jit-kind=orc-lazy only)"));
+
+ cl::opt<bool> PerModuleLazy(
+ "per-module-lazy",
+ cl::desc("Performs lazy compilation on whole module boundaries "
+ "rather than individual functions"),
+ cl::init(false));
+
+ cl::list<std::string>
+ JITDylibs("jd",
+ cl::desc("Specifies the JITDylib to be used for any subsequent "
+ "-extra-module arguments."));
+
// The MCJIT supports building for a target address space separate from
// the JIT compilation process. Use a forked process and a copying
// memory manager with IPC to execute using this functionality.
@@ -294,23 +316,18 @@ static void addCygMingExtraModule(ExecutionEngine &EE, LLVMContext &Context,
M->setTargetTriple(TargetTripleStr);
// Create an empty function named "__main".
- Function *Result;
- if (TargetTriple.isArch64Bit()) {
- Result = Function::Create(
- TypeBuilder<int64_t(void), false>::get(Context),
- GlobalValue::ExternalLinkage, "__main", M.get());
- } else {
- Result = Function::Create(
- TypeBuilder<int32_t(void), false>::get(Context),
- GlobalValue::ExternalLinkage, "__main", M.get());
- }
- BasicBlock *BB = BasicBlock::Create(Context, "__main", Result);
- Builder.SetInsertPoint(BB);
- Value *ReturnVal;
+ Type *ReturnTy;
if (TargetTriple.isArch64Bit())
- ReturnVal = ConstantInt::get(Context, APInt(64, 0));
+ ReturnTy = Type::getInt64Ty(Context);
else
- ReturnVal = ConstantInt::get(Context, APInt(32, 0));
+ ReturnTy = Type::getInt32Ty(Context);
+ Function *Result =
+ Function::Create(FunctionType::get(ReturnTy, {}, false),
+ GlobalValue::ExternalLinkage, "__main", M.get());
+
+ BasicBlock *BB = BasicBlock::Create(Context, "__main", Result);
+ Builder.SetInsertPoint(BB);
+ Value *ReturnVal = ConstantInt::get(ReturnTy, 0);
Builder.CreateRet(ReturnVal);
// Add this new module to the ExecutionEngine.
@@ -337,8 +354,8 @@ static void reportError(SMDiagnostic Err, const char *ProgName) {
exit(1);
}
-int runOrcLazyJIT(LLVMContext &Ctx, std::vector<std::unique_ptr<Module>> Ms,
- const std::vector<std::string> &Args);
+int runOrcLazyJIT(const char *ProgName);
+void disallowOrcOptions();
//===----------------------------------------------------------------------===//
// main Driver function
@@ -362,6 +379,11 @@ int main(int argc, char **argv, char * const *envp) {
if (DisableCoreFiles)
sys::Process::PreventCoreFiles();
+ if (UseJITKind == JITKind::OrcLazy)
+ return runOrcLazyJIT(argv[0]);
+ else
+ disallowOrcOptions();
+
LLVMContext Context;
// Load the bitcode...
@@ -371,21 +393,6 @@ int main(int argc, char **argv, char * const *envp) {
if (!Mod)
reportError(Err, argv[0]);
- if (UseJITKind == JITKind::OrcLazy) {
- std::vector<std::unique_ptr<Module>> Ms;
- Ms.push_back(std::move(Owner));
- for (auto &ExtraMod : ExtraModules) {
- Ms.push_back(parseIRFile(ExtraMod, Err, Context));
- if (!Ms.back())
- reportError(Err, argv[0]);
- }
- std::vector<std::string> Args;
- Args.push_back(InputFile);
- for (auto &Arg : InputArgv)
- Args.push_back(Arg);
- return runOrcLazyJIT(Context, std::move(Ms), Args);
- }
-
if (EnableCacheManager) {
std::string CacheName("file:");
CacheName.append(InputFile);
@@ -498,7 +505,7 @@ int main(int argc, char **argv, char * const *envp) {
if (!ArOrErr) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(ArOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(ArOrErr.takeError(), OS);
OS.flush();
errs() << Buf;
exit(1);
@@ -688,16 +695,18 @@ int main(int argc, char **argv, char * const *envp) {
return Result;
}
-static orc::IRTransformLayer2::TransformFunction createDebugDumper() {
+static orc::IRTransformLayer::TransformFunction createDebugDumper() {
switch (OrcDumpKind) {
case DumpKind::NoDump:
- return [](std::unique_ptr<Module> M) { return M; };
+ return [](orc::ThreadSafeModule TSM,
+ const orc::MaterializationResponsibility &R) { return TSM; };
case DumpKind::DumpFuncsToStdOut:
- return [](std::unique_ptr<Module> M) {
+ return [](orc::ThreadSafeModule TSM,
+ const orc::MaterializationResponsibility &R) {
printf("[ ");
- for (const auto &F : *M) {
+ for (const auto &F : *TSM.getModule()) {
if (F.isDeclaration())
continue;
@@ -709,55 +718,58 @@ static orc::IRTransformLayer2::TransformFunction createDebugDumper() {
}
printf("]\n");
- return M;
+ return TSM;
};
case DumpKind::DumpModsToStdOut:
- return [](std::unique_ptr<Module> M) {
+ return [](orc::ThreadSafeModule TSM,
+ const orc::MaterializationResponsibility &R) {
outs() << "----- Module Start -----\n"
- << *M << "----- Module End -----\n";
+ << *TSM.getModule() << "----- Module End -----\n";
- return M;
+ return TSM;
};
case DumpKind::DumpModsToDisk:
- return [](std::unique_ptr<Module> M) {
+ return [](orc::ThreadSafeModule TSM,
+ const orc::MaterializationResponsibility &R) {
std::error_code EC;
- raw_fd_ostream Out(M->getModuleIdentifier() + ".ll", EC, sys::fs::F_Text);
+ raw_fd_ostream Out(TSM.getModule()->getModuleIdentifier() + ".ll", EC,
+ sys::fs::F_Text);
if (EC) {
- errs() << "Couldn't open " << M->getModuleIdentifier()
+ errs() << "Couldn't open " << TSM.getModule()->getModuleIdentifier()
<< " for dumping.\nError:" << EC.message() << "\n";
exit(1);
}
- Out << *M;
- return M;
+ Out << *TSM.getModule();
+ return TSM;
};
}
llvm_unreachable("Unknown DumpKind");
}
-int runOrcLazyJIT(LLVMContext &Ctx, std::vector<std::unique_ptr<Module>> Ms,
- const std::vector<std::string> &Args) {
- // Bail out early if no modules loaded.
- if (Ms.empty())
- return 0;
-
- // Add lli's symbols into the JIT's search space.
- std::string ErrMsg;
- sys::DynamicLibrary LibLLI =
- sys::DynamicLibrary::getPermanentLibrary(nullptr, &ErrMsg);
- if (!LibLLI.isValid()) {
- errs() << "Error loading lli symbols: " << ErrMsg << ".\n";
- return 1;
- }
+static void exitOnLazyCallThroughFailure() { exit(1); }
+
+int runOrcLazyJIT(const char *ProgName) {
+ // Start setting up the JIT environment.
- const auto &TT = Ms.front()->getTargetTriple();
- orc::JITTargetMachineBuilder TMD =
+ // Parse the main module.
+ orc::ThreadSafeContext TSCtx(llvm::make_unique<LLVMContext>());
+ SMDiagnostic Err;
+ auto MainModule = orc::ThreadSafeModule(
+ parseIRFile(InputFile, Err, *TSCtx.getContext()), TSCtx);
+ if (!MainModule)
+ reportError(Err, ProgName);
+
+ const auto &TT = MainModule.getModule()->getTargetTriple();
+ orc::JITTargetMachineBuilder JTMB =
TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost())
: orc::JITTargetMachineBuilder(Triple(TT));
- TMD.setArch(MArch)
- .setCPU(getCPUStr())
+ if (!MArch.empty())
+ JTMB.getTargetTriple().setArchName(MArch);
+
+ JTMB.setCPU(getCPUStr())
.addFeatures(getFeatureList())
.setRelocationModel(RelocModel.getNumOccurrences()
? Optional<Reloc::Model>(RelocModel)
@@ -765,53 +777,135 @@ int runOrcLazyJIT(LLVMContext &Ctx, std::vector<std::unique_ptr<Module>> Ms,
.setCodeModel(CMModel.getNumOccurrences()
? Optional<CodeModel::Model>(CMModel)
: None);
- auto TM = ExitOnErr(TMD.createTargetMachine());
- auto DL = TM->createDataLayout();
- auto ES = llvm::make_unique<orc::ExecutionSession>();
- auto J =
- ExitOnErr(orc::LLLazyJIT::Create(std::move(ES), std::move(TM), DL, Ctx));
+
+ DataLayout DL = ExitOnErr(JTMB.getDefaultDataLayoutForTarget());
+
+ auto J = ExitOnErr(orc::LLLazyJIT::Create(
+ std::move(JTMB), DL,
+ pointerToJITTargetAddress(exitOnLazyCallThroughFailure),
+ LazyJITCompileThreads));
+
+ if (PerModuleLazy)
+ J->setPartitionFunction(orc::CompileOnDemandLayer::compileWholeModule);
auto Dump = createDebugDumper();
- J->setLazyCompileTransform(
- [&](std::unique_ptr<Module> M) {
- if (verifyModule(*M, &dbgs())) {
- dbgs() << "Bad module: " << *M << "\n";
- exit(1);
- }
- return Dump(std::move(M));
- });
- J->getMainVSO().setFallbackDefinitionGenerator(
- orc::DynamicLibraryFallbackGenerator(
- std::move(LibLLI), DL, [](orc::SymbolStringPtr) { return true; }));
+ J->setLazyCompileTransform([&](orc::ThreadSafeModule TSM,
+ const orc::MaterializationResponsibility &R) {
+ if (verifyModule(*TSM.getModule(), &dbgs())) {
+ dbgs() << "Bad module: " << *TSM.getModule() << "\n";
+ exit(1);
+ }
+ return Dump(std::move(TSM), R);
+ });
+ J->getMainJITDylib().setGenerator(
+ ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(DL)));
orc::MangleAndInterner Mangle(J->getExecutionSession(), DL);
- orc::LocalCXXRuntimeOverrides2 CXXRuntimeOverrides;
- ExitOnErr(CXXRuntimeOverrides.enable(J->getMainVSO(), Mangle));
+ orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
+ ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));
+
+ // Add the main module.
+ ExitOnErr(J->addLazyIRModule(std::move(MainModule)));
+
+ // Create JITDylibs and add any extra modules.
+ {
+ // Create JITDylibs, keep a map from argument index to dylib. We will use
+ // -extra-module argument indexes to determine what dylib to use for each
+ // -extra-module.
+ std::map<unsigned, orc::JITDylib *> IdxToDylib;
+ IdxToDylib[0] = &J->getMainJITDylib();
+ for (auto JDItr = JITDylibs.begin(), JDEnd = JITDylibs.end();
+ JDItr != JDEnd; ++JDItr) {
+ IdxToDylib[JITDylibs.getPosition(JDItr - JITDylibs.begin())] =
+ &J->createJITDylib(*JDItr);
+ }
- for (auto &M : Ms) {
- orc::makeAllSymbolsExternallyAccessible(*M);
- ExitOnErr(J->addLazyIRModule(std::move(M)));
+ for (auto EMItr = ExtraModules.begin(), EMEnd = ExtraModules.end();
+ EMItr != EMEnd; ++EMItr) {
+ auto M = parseIRFile(*EMItr, Err, *TSCtx.getContext());
+ if (!M)
+ reportError(Err, ProgName);
+
+ auto EMIdx = ExtraModules.getPosition(EMItr - ExtraModules.begin());
+ assert(EMIdx != 0 && "ExtraModule should have index > 0");
+ auto JDItr = std::prev(IdxToDylib.lower_bound(EMIdx));
+ auto &JD = *JDItr->second;
+ ExitOnErr(
+ J->addLazyIRModule(JD, orc::ThreadSafeModule(std::move(M), TSCtx)));
+ }
}
+ // Add the objects.
+ for (auto &ObjPath : ExtraObjects) {
+ auto Obj = ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ObjPath)));
+ ExitOnErr(J->addObjectFile(std::move(Obj)));
+ }
+
+ // Generate a argument string.
+ std::vector<std::string> Args;
+ Args.push_back(InputFile);
+ for (auto &Arg : InputArgv)
+ Args.push_back(Arg);
+
+ // Run any static constructors.
ExitOnErr(J->runConstructors());
+ // Run any -thread-entry points.
+ std::vector<std::thread> AltEntryThreads;
+ for (auto &ThreadEntryPoint : ThreadEntryPoints) {
+ auto EntryPointSym = ExitOnErr(J->lookup(ThreadEntryPoint));
+ typedef void (*EntryPointPtr)();
+ auto EntryPoint =
+ reinterpret_cast<EntryPointPtr>(static_cast<uintptr_t>(EntryPointSym.getAddress()));
+ AltEntryThreads.push_back(std::thread([EntryPoint]() { EntryPoint(); }));
+ }
+
+ J->getExecutionSession().dump(llvm::dbgs());
+
+ // Run main.
auto MainSym = ExitOnErr(J->lookup("main"));
typedef int (*MainFnPtr)(int, const char *[]);
std::vector<const char *> ArgV;
for (auto &Arg : Args)
ArgV.push_back(Arg.c_str());
+ ArgV.push_back(nullptr);
+
+ int ArgC = ArgV.size() - 1;
auto Main =
reinterpret_cast<MainFnPtr>(static_cast<uintptr_t>(MainSym.getAddress()));
- auto Result = Main(ArgV.size(), (const char **)ArgV.data());
+ auto Result = Main(ArgC, (const char **)ArgV.data());
- ExitOnErr(J->runDestructors());
+ // Wait for -entry-point threads.
+ for (auto &AltEntryThread : AltEntryThreads)
+ AltEntryThread.join();
+ // Run destructors.
+ ExitOnErr(J->runDestructors());
CXXRuntimeOverrides.runDestructors();
return Result;
}
+void disallowOrcOptions() {
+ // Make sure nobody used an orc-lazy specific option accidentally.
+
+ if (LazyJITCompileThreads != 0) {
+ errs() << "-compile-threads requires -jit-kind=orc-lazy\n";
+ exit(1);
+ }
+
+ if (!ThreadEntryPoints.empty()) {
+ errs() << "-thread-entry requires -jit-kind=orc-lazy\n";
+ exit(1);
+ }
+
+ if (PerModuleLazy) {
+ errs() << "-per-module-lazy requires -jit-kind=orc-lazy\n";
+ exit(1);
+ }
+}
+
std::unique_ptr<FDRawChannel> launchRemote() {
#ifndef LLVM_ON_UNIX
llvm_unreachable("launchRemote not supported on non-Unix platforms");
diff --git a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
index 64be08ff946a..1c453ee0b569 100644
--- a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -32,6 +33,7 @@
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
#include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
@@ -95,6 +97,7 @@ MODIFIERS:
[D] - use zero for timestamps and uids/gids (default)
[i] - put [files] before [relpos] (same as [b])
[l] - ignored for compatibility
+ [L] - add archive's contents
[o] - preserve original dates
[s] - create an archive index (cf. ranlib)
[S] - do not build a symbol table
@@ -113,7 +116,7 @@ void printHelpMessage() {
// Show the error message and exit.
LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
- errs() << ToolName << ": " << Error << ".\n";
+ WithColor::error(errs(), ToolName) << Error << ".\n";
printHelpMessage();
exit(1);
}
@@ -123,7 +126,7 @@ static void failIfError(std::error_code EC, Twine Context = "") {
return;
std::string ContextStr = Context.str();
- if (ContextStr == "")
+ if (ContextStr.empty())
fail(EC.message());
fail(Context + ": " + EC.message());
}
@@ -134,7 +137,7 @@ static void failIfError(Error E, Twine Context = "") {
handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) {
std::string ContextStr = Context.str();
- if (ContextStr == "")
+ if (ContextStr.empty())
fail(EIB.message());
fail(Context + ": " + EIB.message());
});
@@ -155,14 +158,14 @@ static std::string Options;
// This enumeration delineates the kinds of operations on an archive
// that are permitted.
enum ArchiveOperation {
- Print, ///< Print the contents of the archive
- Delete, ///< Delete the specified members
- Move, ///< Move members to end or as given by {a,b,i} modifiers
- QuickAppend, ///< Quickly append to end of archive
- ReplaceOrInsert, ///< Replace or Insert members
- DisplayTable, ///< Display the table of contents
- Extract, ///< Extract files back to file system
- CreateSymTab ///< Create a symbol table in an existing archive
+ Print, ///< Print the contents of the archive
+ Delete, ///< Delete the specified members
+ Move, ///< Move members to end or as given by {a,b,i} modifiers
+ QuickAppend, ///< Quickly append to end of archive
+ ReplaceOrInsert, ///< Replace or Insert members
+ DisplayTable, ///< Display the table of contents
+ Extract, ///< Extract files back to file system
+ CreateSymTab ///< Create a symbol table in an existing archive
};
// Modifiers to follow operation to vary behavior
@@ -175,6 +178,7 @@ static bool Verbose = false; ///< 'v' modifier
static bool Symtab = true; ///< 's' modifier
static bool Deterministic = true; ///< 'D' and 'U' modifiers
static bool Thin = false; ///< 'T' modifier
+static bool AddLibrary = false; ///< 'L' modifier
// Relative Positional Argument (for insert/move). This variable holds
// the name of the archive member to which the 'a', 'b' or 'i' modifier
@@ -193,7 +197,7 @@ static std::vector<StringRef> Members;
// Extract the member filename from the command line for the [relpos] argument
// associated with a, b, and i modifiers
static void getRelPos() {
- if (PositionalArgs.size() == 0)
+ if (PositionalArgs.empty())
fail("Expected [relpos] for a, b, or i modifier");
RelPos = PositionalArgs[0];
PositionalArgs.erase(PositionalArgs.begin());
@@ -201,7 +205,7 @@ static void getRelPos() {
// Get the archive file name from the command line
static void getArchive() {
- if (PositionalArgs.size() == 0)
+ if (PositionalArgs.empty())
fail("An archive name must be specified");
ArchiveName = PositionalArgs[0];
PositionalArgs.erase(PositionalArgs.begin());
@@ -213,6 +217,21 @@ static void getMembers() {
Members.push_back(Arg);
}
+std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
+std::vector<std::unique_ptr<object::Archive>> Archives;
+
+static object::Archive &readLibrary(const Twine &Library) {
+ auto BufOrErr = MemoryBuffer::getFile(Library, -1, false);
+ failIfError(BufOrErr.getError(), "Could not open library " + Library);
+ ArchiveBuffers.push_back(std::move(*BufOrErr));
+ auto LibOrErr =
+ object::Archive::create(ArchiveBuffers.back()->getMemBufferRef());
+ failIfError(errorToErrorCode(LibOrErr.takeError()),
+ "Could not parse library");
+ Archives.push_back(std::move(*LibOrErr));
+ return *Archives.back();
+}
+
static void runMRIScript();
// Parse the command line options as presented and return the operation
@@ -238,18 +257,44 @@ static ArchiveOperation parseCommandLine() {
bool MaybeJustCreateSymTab = false;
- for(unsigned i=0; i<Options.size(); ++i) {
- switch(Options[i]) {
- case 'd': ++NumOperations; Operation = Delete; break;
- case 'm': ++NumOperations; Operation = Move ; break;
- case 'p': ++NumOperations; Operation = Print; break;
- case 'q': ++NumOperations; Operation = QuickAppend; break;
- case 'r': ++NumOperations; Operation = ReplaceOrInsert; break;
- case 't': ++NumOperations; Operation = DisplayTable; break;
- case 'x': ++NumOperations; Operation = Extract; break;
- case 'c': Create = true; break;
- case 'l': /* accepted but unused */ break;
- case 'o': OriginalDates = true; break;
+ for (unsigned i = 0; i < Options.size(); ++i) {
+ switch (Options[i]) {
+ case 'd':
+ ++NumOperations;
+ Operation = Delete;
+ break;
+ case 'm':
+ ++NumOperations;
+ Operation = Move;
+ break;
+ case 'p':
+ ++NumOperations;
+ Operation = Print;
+ break;
+ case 'q':
+ ++NumOperations;
+ Operation = QuickAppend;
+ break;
+ case 'r':
+ ++NumOperations;
+ Operation = ReplaceOrInsert;
+ break;
+ case 't':
+ ++NumOperations;
+ Operation = DisplayTable;
+ break;
+ case 'x':
+ ++NumOperations;
+ Operation = Extract;
+ break;
+ case 'c':
+ Create = true;
+ break;
+ case 'l': /* accepted but unused */
+ break;
+ case 'o':
+ OriginalDates = true;
+ break;
case 's':
Symtab = true;
MaybeJustCreateSymTab = true;
@@ -257,8 +302,12 @@ static ArchiveOperation parseCommandLine() {
case 'S':
Symtab = false;
break;
- case 'u': OnlyUpdate = true; break;
- case 'v': Verbose = true; break;
+ case 'u':
+ OnlyUpdate = true;
+ break;
+ case 'v':
+ Verbose = true;
+ break;
case 'a':
getRelPos();
AddAfter = true;
@@ -283,6 +332,9 @@ static ArchiveOperation parseCommandLine() {
case 'T':
Thin = true;
break;
+ case 'L':
+ AddLibrary = true;
+ break;
default:
fail(std::string("unknown option ") + Options[i]);
}
@@ -295,7 +347,7 @@ static ArchiveOperation parseCommandLine() {
// Everything on the command line at this point is a member.
getMembers();
- if (NumOperations == 0 && MaybeJustCreateSymTab) {
+ if (NumOperations == 0 && MaybeJustCreateSymTab) {
NumOperations = 1;
Operation = CreateSymTab;
if (!Members.empty())
@@ -319,6 +371,8 @@ static ArchiveOperation parseCommandLine() {
fail("The 'o' modifier is only applicable to the 'x' operation");
if (OnlyUpdate && Operation != ReplaceOrInsert)
fail("The 'u' modifier is only applicable to the 'r' operation");
+ if (AddLibrary && Operation != QuickAppend)
+ fail("The 'L' modifier is only applicable to the 'q' operation");
// Return the parsed operation to the caller
return Operation;
@@ -367,7 +421,11 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
outs() << ' ' << format("%6llu", Size.get());
auto ModTimeOrErr = C.getLastModified();
failIfError(ModTimeOrErr.takeError());
- outs() << ' ' << ModTimeOrErr.get();
+ // Note: formatv() only handles the default TimePoint<>, which is in
+ // nanoseconds.
+ // TODO: fix format_provider<TimePoint<>> to allow other units.
+ sys::TimePoint<> ModTimeInNs = ModTimeOrErr.get();
+ outs() << ' ' << formatv("{0:%b %e %H:%M %Y}", ModTimeInNs);
outs() << ' ';
}
@@ -410,7 +468,7 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) {
auto ModTimeOrErr = C.getLastModified();
failIfError(ModTimeOrErr.takeError());
failIfError(
- sys::fs::setLastModificationAndAccessTime(FD, ModTimeOrErr.get()));
+ sys::fs::setLastAccessAndModificationTime(FD, ModTimeOrErr.get()));
}
if (close(FD))
@@ -475,36 +533,57 @@ static void performReadOperation(ArchiveOperation Operation,
if (Members.empty())
return;
for (StringRef Name : Members)
- errs() << Name << " was not found\n";
+ WithColor::error(errs(), ToolName) << "'" << Name << "' was not found\n";
exit(1);
}
+static void addChildMember(std::vector<NewArchiveMember> &Members,
+ const object::Archive::Child &M,
+ bool FlattenArchive = false) {
+ if (Thin && !M.getParent()->isThin())
+ fail("Cannot convert a regular archive to a thin one");
+ Expected<NewArchiveMember> NMOrErr =
+ NewArchiveMember::getOldMember(M, Deterministic);
+ failIfError(NMOrErr.takeError());
+ if (FlattenArchive &&
+ identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
+ Expected<std::string> FileNameOrErr = M.getFullName();
+ failIfError(FileNameOrErr.takeError());
+ object::Archive &Lib = readLibrary(*FileNameOrErr);
+ // When creating thin archives, only flatten if the member is also thin.
+ if (!Thin || Lib.isThin()) {
+ Error Err = Error::success();
+ // Only Thin archives are recursively flattened.
+ for (auto &Child : Lib.children(Err))
+ addChildMember(Members, Child, /*FlattenArchive=*/Thin);
+ failIfError(std::move(Err));
+ return;
+ }
+ }
+ Members.push_back(std::move(*NMOrErr));
+}
+
static void addMember(std::vector<NewArchiveMember> &Members,
- StringRef FileName, int Pos = -1) {
+ StringRef FileName, bool FlattenArchive = false) {
Expected<NewArchiveMember> NMOrErr =
NewArchiveMember::getFile(FileName, Deterministic);
failIfError(NMOrErr.takeError(), FileName);
-
+ if (FlattenArchive &&
+ identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
+ object::Archive &Lib = readLibrary(FileName);
+ // When creating thin archives, only flatten if the member is also thin.
+ if (!Thin || Lib.isThin()) {
+ Error Err = Error::success();
+ // Only Thin archives are recursively flattened.
+ for (auto &Child : Lib.children(Err))
+ addChildMember(Members, Child, /*FlattenArchive=*/Thin);
+ failIfError(std::move(Err));
+ return;
+ }
+ }
// Use the basename of the object path for the member name.
NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
-
- if (Pos == -1)
- Members.push_back(std::move(*NMOrErr));
- else
- Members[Pos] = std::move(*NMOrErr);
-}
-
-static void addMember(std::vector<NewArchiveMember> &Members,
- const object::Archive::Child &M, int Pos = -1) {
- if (Thin && !M.getParent()->isThin())
- fail("Cannot convert a regular archive to a thin one");
- Expected<NewArchiveMember> NMOrErr =
- NewArchiveMember::getOldMember(M, Deterministic);
- failIfError(NMOrErr.takeError());
- if (Pos == -1)
- Members.push_back(std::move(*NMOrErr));
- else
- Members[Pos] = std::move(*NMOrErr);
+ Members.push_back(std::move(*NMOrErr));
}
enum InsertAction {
@@ -593,7 +672,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
computeInsertAction(Operation, Child, Name, MemberI);
switch (Action) {
case IA_AddOldMember:
- addMember(Ret, Child);
+ addChildMember(Ret, Child);
break;
case IA_AddNewMember:
addMember(Ret, *MemberI);
@@ -601,7 +680,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
case IA_Delete:
break;
case IA_MoveOldMember:
- addMember(Moved, Child);
+ addChildMember(Moved, Child);
break;
case IA_MoveNewMember:
addMember(Moved, *MemberI);
@@ -629,14 +708,20 @@ computeNewArchiveMembers(ArchiveOperation Operation,
++Pos;
}
- for (unsigned I = 0; I != Members.size(); ++I)
- Ret.insert(Ret.begin() + InsertPos, NewArchiveMember());
- Pos = InsertPos;
- for (auto &Member : Members) {
- addMember(Ret, Member, Pos);
- ++Pos;
+ if (AddLibrary) {
+ assert(Operation == QuickAppend);
+ for (auto &Member : Members)
+ addMember(Ret, Member, /*FlattenArchive=*/true);
+ return Ret;
}
+ std::vector<NewArchiveMember> NewMembers;
+ for (auto &Member : Members)
+ addMember(NewMembers, Member, /*FlattenArchive=*/Thin);
+ Ret.reserve(Ret.size() + NewMembers.size());
+ std::move(NewMembers.begin(), NewMembers.end(),
+ std::inserter(Ret, std::next(Ret.begin(), InsertPos)));
+
return Ret;
}
@@ -660,11 +745,10 @@ static object::Archive::Kind getKindFromMember(const NewArchiveMember &Member) {
return getDefaultForHost();
}
-static void
-performWriteOperation(ArchiveOperation Operation,
- object::Archive *OldArchive,
- std::unique_ptr<MemoryBuffer> OldArchiveBuf,
- std::vector<NewArchiveMember> *NewMembersP) {
+static void performWriteOperation(ArchiveOperation Operation,
+ object::Archive *OldArchive,
+ std::unique_ptr<MemoryBuffer> OldArchiveBuf,
+ std::vector<NewArchiveMember> *NewMembersP) {
std::vector<NewArchiveMember> NewMembers;
if (!NewMembersP)
NewMembers = computeNewArchiveMembers(Operation, OldArchive);
@@ -677,11 +761,11 @@ performWriteOperation(ArchiveOperation Operation,
else if (OldArchive)
Kind = OldArchive->kind();
else if (NewMembersP)
- Kind = NewMembersP->size() ? getKindFromMember(NewMembersP->front())
- : getDefaultForHost();
+ Kind = !NewMembersP->empty() ? getKindFromMember(NewMembersP->front())
+ : getDefaultForHost();
else
- Kind = NewMembers.size() ? getKindFromMember(NewMembers.front())
- : getDefaultForHost();
+ Kind = !NewMembers.empty() ? getKindFromMember(NewMembers.front())
+ : getDefaultForHost();
break;
case GNU:
Kind = object::Archive::K_GNU;
@@ -770,7 +854,8 @@ static int performOperation(ArchiveOperation Operation,
} else {
if (!Create) {
// Produce a warning if we should and we're creating the archive
- errs() << ToolName << ": creating " << ArchiveName << "\n";
+ WithColor::warning(errs(), ToolName)
+ << "creating " << ArchiveName << "\n";
}
}
@@ -786,11 +871,14 @@ static void runMRIScript() {
const MemoryBuffer &Ref = *Buf.get();
bool Saved = false;
std::vector<NewArchiveMember> NewMembers;
- std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
- std::vector<std::unique_ptr<object::Archive>> Archives;
- for (line_iterator I(Ref, /*SkipBlanks*/ true, ';'), E; I != E; ++I) {
+ for (line_iterator I(Ref, /*SkipBlanks*/ false), E; I != E; ++I) {
StringRef Line = *I;
+ Line = Line.split(';').first;
+ Line = Line.split('*').first;
+ Line = Line.trim();
+ if (Line.empty())
+ continue;
StringRef CommandStr, Rest;
std::tie(CommandStr, Rest) = Line.split(' ');
Rest = Rest.trim();
@@ -807,19 +895,11 @@ static void runMRIScript() {
switch (Command) {
case MRICommand::AddLib: {
- auto BufOrErr = MemoryBuffer::getFile(Rest, -1, false);
- failIfError(BufOrErr.getError(), "Could not open library");
- ArchiveBuffers.push_back(std::move(*BufOrErr));
- auto LibOrErr =
- object::Archive::create(ArchiveBuffers.back()->getMemBufferRef());
- failIfError(errorToErrorCode(LibOrErr.takeError()),
- "Could not parse library");
- Archives.push_back(std::move(*LibOrErr));
- object::Archive &Lib = *Archives.back();
+ object::Archive &Lib = readLibrary(Rest);
{
Error Err = Error::success();
for (auto &Member : Lib.children(Err))
- addMember(NewMembers, Member);
+ addChildMember(NewMembers, Member);
failIfError(std::move(Err));
}
break;
@@ -874,7 +954,7 @@ static int ar_main(int argc, char **argv) {
BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);
cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
- for(size_t i = 1; i < Argv.size(); ++i) {
+ for (size_t i = 1; i < Argv.size(); ++i) {
StringRef Arg = Argv[i];
const char *match;
auto MatchFlagWithArg = [&](const char *expected) {
@@ -885,8 +965,7 @@ static int ar_main(int argc, char **argv) {
match = Argv[i];
return true;
}
- if (Arg.startswith(expected) && Arg.size() > len &&
- Arg[len] == '=') {
+ if (Arg.startswith(expected) && Arg.size() > len && Arg[len] == '=') {
match = Arg.data() + len + 1;
return true;
}
@@ -895,7 +974,7 @@ static int ar_main(int argc, char **argv) {
if (handleGenericOption(Argv[i]))
return 0;
if (Arg == "--") {
- for(; i < Argv.size(); ++i)
+ for (; i < Argv.size(); ++i)
PositionalArgs.push_back(Argv[i]);
break;
}
@@ -908,11 +987,11 @@ static int ar_main(int argc, char **argv) {
MRI = true;
} else if (MatchFlagWithArg("format")) {
FormatType = StringSwitch<Format>(match)
- .Case("default", Default)
- .Case("gnu", GNU)
- .Case("darwin", DARWIN)
- .Case("bsd", BSD)
- .Default(Unknown);
+ .Case("default", Default)
+ .Case("gnu", GNU)
+ .Case("darwin", DARWIN)
+ .Case("bsd", BSD)
+ .Default(Unknown);
if (FormatType == Unknown)
fail(std::string("Invalid format ") + match);
} else if (MatchFlagWithArg("plugin")) {
@@ -932,7 +1011,7 @@ static int ar_main(int argc, char **argv) {
static int ranlib_main(int argc, char **argv) {
bool ArchiveSpecified = false;
- for(int i = 1; i < argc; ++i) {
+ for (int i = 1; i < argc; ++i) {
if (handleGenericOption(argv[i])) {
return 0;
} else {
diff --git a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 1939dc6440fe..789a666cb41a 100644
--- a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -247,6 +247,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(CST_CODE, CE_CMP)
STRINGIFY_CODE(CST_CODE, INLINEASM)
STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
+ STRINGIFY_CODE(CST_CODE, CE_UNOP)
case bitc::CST_CODE_BLOCKADDRESS: return "CST_CODE_BLOCKADDRESS";
STRINGIFY_CODE(CST_CODE, DATA)
}
@@ -267,6 +268,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FUNC_CODE, INST_BR)
STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
+ STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
@@ -285,6 +287,11 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
STRINGIFY_CODE(FUNC_CODE, INST_GEP)
STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
+ STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
+ STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
+ STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
+ STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
+ STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
}
case bitc::VALUE_SYMTAB_BLOCK_ID:
switch (CodeID) {
diff --git a/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp b/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
index e93b63d388e0..728e00e7c3c2 100644
--- a/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "CoverageExporterJson.h"
+#include "CoverageExporterLcov.h"
#include "CoverageFilters.h"
#include "CoverageReport.h"
#include "CoverageSummaryInfo.h"
@@ -215,12 +216,13 @@ void CodeCoverageTool::collectPaths(const std::string &Path) {
for (llvm::sys::fs::recursive_directory_iterator F(Path, EC), E;
F != E; F.increment(EC)) {
- if (EC) {
- warning(EC.message(), F->path());
+ auto Status = F->status();
+ if (!Status) {
+ warning(Status.getError().message(), F->path());
continue;
}
- if (llvm::sys::fs::is_regular_file(F->path()))
+ if (Status->type() == llvm::sys::fs::file_type::regular_file)
addCollectedPath(F->path());
}
}
@@ -368,12 +370,6 @@ std::unique_ptr<CoverageMapping> CodeCoverageTool::load() {
<< "No profile record found for '" << HashMismatch.first << "'"
<< " with hash = 0x" << Twine::utohexstr(HashMismatch.second)
<< '\n';
-
- for (const auto &CounterMismatch : Coverage->getCounterMismatches())
- errs() << "counter-mismatch: "
- << "Coverage mapping for " << CounterMismatch.first
- << " only has " << CounterMismatch.second
- << " valid counter expressions\n";
}
}
@@ -571,7 +567,9 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
cl::values(clEnumValN(CoverageViewOptions::OutputFormat::Text, "text",
"Text output"),
clEnumValN(CoverageViewOptions::OutputFormat::HTML, "html",
- "HTML output")),
+ "HTML output"),
+ clEnumValN(CoverageViewOptions::OutputFormat::Lcov, "lcov",
+ "lcov tracefile output")),
cl::init(CoverageViewOptions::OutputFormat::Text));
cl::opt<std::string> PathRemap(
@@ -679,6 +677,11 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
errs() << "Color output cannot be disabled when generating html.\n";
ViewOpts.Colors = true;
break;
+ case CoverageViewOptions::OutputFormat::Lcov:
+ if (UseColor == cl::BOU_TRUE)
+ errs() << "Color output cannot be enabled when generating lcov.\n";
+ ViewOpts.Colors = false;
+ break;
}
// If path-equivalence was given and is a comma seperated pair then set
@@ -688,7 +691,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
PathRemapping = EquivPair;
// If a demangler is supplied, check if it exists and register it.
- if (DemanglerOpts.size()) {
+ if (!DemanglerOpts.empty()) {
auto DemanglerPathOrErr = sys::findProgramByName(DemanglerOpts[0]);
if (!DemanglerPathOrErr) {
error("Could not find the demangler!",
@@ -838,6 +841,11 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
if (Err)
return Err;
+ if (ViewOpts.Format == CoverageViewOptions::OutputFormat::Lcov) {
+ error("Lcov format should be used with 'llvm-cov export'.");
+ return 1;
+ }
+
ViewOpts.ShowLineNumbers = true;
ViewOpts.ShowLineStats = ShowLineExecutionCounts.getNumOccurrences() != 0 ||
!ShowRegions || ShowBestLineRegionsCounts;
@@ -969,6 +977,9 @@ int CodeCoverageTool::doReport(int argc, const char **argv,
if (ViewOpts.Format == CoverageViewOptions::OutputFormat::HTML) {
error("HTML output for summary reports is not yet supported.");
return 1;
+ } else if (ViewOpts.Format == CoverageViewOptions::OutputFormat::Lcov) {
+ error("Lcov format should be used with 'llvm-cov export'.");
+ return 1;
}
auto Coverage = load();
@@ -1000,8 +1011,10 @@ int CodeCoverageTool::doExport(int argc, const char **argv,
if (Err)
return Err;
- if (ViewOpts.Format != CoverageViewOptions::OutputFormat::Text) {
- error("Coverage data can only be exported as textual JSON.");
+ if (ViewOpts.Format != CoverageViewOptions::OutputFormat::Text &&
+ ViewOpts.Format != CoverageViewOptions::OutputFormat::Lcov) {
+ error("Coverage data can only be exported as textual JSON or an "
+ "lcov tracefile.");
return 1;
}
@@ -1011,12 +1024,27 @@ int CodeCoverageTool::doExport(int argc, const char **argv,
return 1;
}
- auto Exporter = CoverageExporterJson(*Coverage.get(), ViewOpts, outs());
+ std::unique_ptr<CoverageExporter> Exporter;
+
+ switch (ViewOpts.Format) {
+ case CoverageViewOptions::OutputFormat::Text:
+ Exporter = llvm::make_unique<CoverageExporterJson>(*Coverage.get(),
+ ViewOpts, outs());
+ break;
+ case CoverageViewOptions::OutputFormat::HTML:
+ // Unreachable because we should have gracefully terminated with an error
+ // above.
+ llvm_unreachable("Export in HTML is not supported!");
+ case CoverageViewOptions::OutputFormat::Lcov:
+ Exporter = llvm::make_unique<CoverageExporterLcov>(*Coverage.get(),
+ ViewOpts, outs());
+ break;
+ }
if (SourceFiles.empty())
- Exporter.renderRoot(IgnoreFilenameFilters);
+ Exporter->renderRoot(IgnoreFilenameFilters);
else
- Exporter.renderRoot(SourceFiles);
+ Exporter->renderRoot(SourceFiles);
return 0;
}
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporter.h b/contrib/llvm/tools/llvm-cov/CoverageExporter.h
index 884fba96d618..b226d68813d9 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageExporter.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporter.h
@@ -30,7 +30,7 @@ protected:
/// The options passed to the tool.
const CoverageViewOptions &Options;
- /// Output stream to print JSON to.
+ /// Output stream to print to.
raw_ostream &OS;
CoverageExporter(const coverage::CoverageMapping &CoverageMapping,
@@ -41,10 +41,10 @@ public:
virtual ~CoverageExporter(){};
/// Render the CoverageMapping object.
- virtual void renderRoot(const CoverageFilters &IgnoreFilenameFilters) = 0;
+ virtual void renderRoot(const CoverageFilters &IgnoreFilters) = 0;
/// Render the CoverageMapping object for specified source files.
- virtual void renderRoot(const std::vector<std::string> &SourceFiles) = 0;
+ virtual void renderRoot(ArrayRef<std::string> SourceFiles) = 0;
};
} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp
index 56c3a0003b02..22243f8e2c3e 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp
@@ -16,33 +16,34 @@
// The json code coverage export follows the following format
// Root: dict => Root Element containing metadata
// -- Data: array => Homogeneous array of one or more export objects
-// ---- Export: dict => Json representation of one CoverageMapping
-// ------ Files: array => List of objects describing coverage for files
-// -------- File: dict => Coverage for a single file
-// ---------- Segments: array => List of Segments contained in the file
-// ------------ Segment: dict => Describes a segment of the file with a counter
-// ---------- Expansions: array => List of expansion records
-// ------------ Expansion: dict => Object that descibes a single expansion
-// -------------- CountedRegion: dict => The region to be expanded
-// -------------- TargetRegions: array => List of Regions in the expansion
-// ---------------- CountedRegion: dict => Single Region in the expansion
-// ---------- Summary: dict => Object summarizing the coverage for this file
-// ------------ LineCoverage: dict => Object summarizing line coverage
-// ------------ FunctionCoverage: dict => Object summarizing function coverage
-// ------------ RegionCoverage: dict => Object summarizing region coverage
-// ------ Functions: array => List of objects describing coverage for functions
-// -------- Function: dict => Coverage info for a single function
-// ---------- Filenames: array => List of filenames that the function relates to
-// ---- Summary: dict => Object summarizing the coverage for the entire binary
-// ------ LineCoverage: dict => Object summarizing line coverage
-// ------ FunctionCoverage: dict => Object summarizing function coverage
-// ------ InstantiationCoverage: dict => Object summarizing inst. coverage
-// ------ RegionCoverage: dict => Object summarizing region coverage
+// -- Export: dict => Json representation of one CoverageMapping
+// -- Files: array => List of objects describing coverage for files
+// -- File: dict => Coverage for a single file
+// -- Segments: array => List of Segments contained in the file
+// -- Segment: dict => Describes a segment of the file with a counter
+// -- Expansions: array => List of expansion records
+// -- Expansion: dict => Object that descibes a single expansion
+// -- CountedRegion: dict => The region to be expanded
+// -- TargetRegions: array => List of Regions in the expansion
+// -- CountedRegion: dict => Single Region in the expansion
+// -- Summary: dict => Object summarizing the coverage for this file
+// -- LineCoverage: dict => Object summarizing line coverage
+// -- FunctionCoverage: dict => Object summarizing function coverage
+// -- RegionCoverage: dict => Object summarizing region coverage
+// -- Functions: array => List of objects describing coverage for functions
+// -- Function: dict => Coverage info for a single function
+// -- Filenames: array => List of filenames that the function relates to
+// -- Summary: dict => Object summarizing the coverage for the entire binary
+// -- LineCoverage: dict => Object summarizing line coverage
+// -- FunctionCoverage: dict => Object summarizing function coverage
+// -- InstantiationCoverage: dict => Object summarizing inst. coverage
+// -- RegionCoverage: dict => Object summarizing region coverage
//
//===----------------------------------------------------------------------===//
#include "CoverageExporterJson.h"
#include "CoverageReport.h"
+#include "llvm/Support/JSON.h"
/// The semantic version combined as a string.
#define LLVM_COVERAGE_EXPORT_JSON_STR "2.0.0"
@@ -52,335 +53,142 @@
using namespace llvm;
-CoverageExporterJson::CoverageExporterJson(
- const coverage::CoverageMapping &CoverageMapping,
- const CoverageViewOptions &Options, raw_ostream &OS)
- : CoverageExporter(CoverageMapping, Options, OS) {
- State.push(JsonState::None);
-}
-
-void CoverageExporterJson::emitSerialized(const int64_t Value) { OS << Value; }
+namespace {
-void CoverageExporterJson::emitSerialized(const std::string &Value) {
- OS << "\"";
- for (char C : Value) {
- if (C != '\\')
- OS << C;
- else
- OS << "\\\\";
- }
- OS << "\"";
+json::Array renderSegment(const coverage::CoverageSegment &Segment) {
+ return json::Array({Segment.Line, Segment.Col, int64_t(Segment.Count),
+ Segment.HasCount, Segment.IsRegionEntry});
}
-void CoverageExporterJson::emitComma() {
- if (State.top() == JsonState::NonEmptyElement) {
- OS << ",";
- } else if (State.top() == JsonState::EmptyElement) {
- State.pop();
- assert((State.size() >= 1) && "Closed too many JSON elements");
- State.push(JsonState::NonEmptyElement);
- }
+json::Array renderRegion(const coverage::CountedRegion &Region) {
+ return json::Array({Region.LineStart, Region.ColumnStart, Region.LineEnd,
+ Region.ColumnEnd, int64_t(Region.ExecutionCount),
+ Region.FileID, Region.ExpandedFileID,
+ int64_t(Region.Kind)});
}
-void CoverageExporterJson::emitDictStart() {
- emitComma();
- State.push(JsonState::EmptyElement);
- OS << "{";
+json::Array renderRegions(ArrayRef<coverage::CountedRegion> Regions) {
+ json::Array RegionArray;
+ for (const auto &Region : Regions)
+ RegionArray.push_back(renderRegion(Region));
+ return RegionArray;
+}
+
+json::Object renderExpansion(const coverage::ExpansionRecord &Expansion) {
+ return json::Object(
+ {{"filenames", json::Array(Expansion.Function.Filenames)},
+ // Mark the beginning and end of this expansion in the source file.
+ {"source_region", renderRegion(Expansion.Region)},
+ // Enumerate the coverage information for the expansion.
+ {"target_regions", renderRegions(Expansion.Function.CountedRegions)}});
+}
+
+json::Object renderSummary(const FileCoverageSummary &Summary) {
+ return json::Object(
+ {{"lines",
+ json::Object({{"count", int64_t(Summary.LineCoverage.getNumLines())},
+ {"covered", int64_t(Summary.LineCoverage.getCovered())},
+ {"percent", Summary.LineCoverage.getPercentCovered()}})},
+ {"functions",
+ json::Object(
+ {{"count", int64_t(Summary.FunctionCoverage.getNumFunctions())},
+ {"covered", int64_t(Summary.FunctionCoverage.getExecuted())},
+ {"percent", Summary.FunctionCoverage.getPercentCovered()}})},
+ {"instantiations",
+ json::Object(
+ {{"count",
+ int64_t(Summary.InstantiationCoverage.getNumFunctions())},
+ {"covered", int64_t(Summary.InstantiationCoverage.getExecuted())},
+ {"percent", Summary.InstantiationCoverage.getPercentCovered()}})},
+ {"regions",
+ json::Object(
+ {{"count", int64_t(Summary.RegionCoverage.getNumRegions())},
+ {"covered", int64_t(Summary.RegionCoverage.getCovered())},
+ {"notcovered", int64_t(Summary.RegionCoverage.getNumRegions() -
+ Summary.RegionCoverage.getCovered())},
+ {"percent", Summary.RegionCoverage.getPercentCovered()}})}});
+}
+
+json::Array renderFileExpansions(const coverage::CoverageData &FileCoverage,
+ const FileCoverageSummary &FileReport) {
+ json::Array ExpansionArray;
+ for (const auto &Expansion : FileCoverage.getExpansions())
+ ExpansionArray.push_back(renderExpansion(Expansion));
+ return ExpansionArray;
}
-void CoverageExporterJson::emitDictKey(const std::string &Key) {
- emitComma();
- emitSerialized(Key);
- OS << ":";
- State.pop();
- assert((State.size() >= 1) && "Closed too many JSON elements");
-
- // We do not want to emit a comma after this key.
- State.push(JsonState::EmptyElement);
+json::Array renderFileSegments(const coverage::CoverageData &FileCoverage,
+ const FileCoverageSummary &FileReport) {
+ json::Array SegmentArray;
+ for (const auto &Segment : FileCoverage)
+ SegmentArray.push_back(renderSegment(Segment));
+ return SegmentArray;
}
-void CoverageExporterJson::emitDictEnd() {
- State.pop();
- assert((State.size() >= 1) && "Closed too many JSON elements");
- OS << "}";
+json::Object renderFile(const coverage::CoverageMapping &Coverage,
+ const std::string &Filename,
+ const FileCoverageSummary &FileReport,
+ bool ExportSummaryOnly) {
+ json::Object File({{"filename", Filename}});
+ if (!ExportSummaryOnly) {
+ // Calculate and render detailed coverage information for given file.
+ auto FileCoverage = Coverage.getCoverageForFile(Filename);
+ File["segments"] = renderFileSegments(FileCoverage, FileReport);
+ File["expansions"] = renderFileExpansions(FileCoverage, FileReport);
+ }
+ File["summary"] = renderSummary(FileReport);
+ return File;
}
-void CoverageExporterJson::emitArrayStart() {
- emitComma();
- State.push(JsonState::EmptyElement);
- OS << "[";
+json::Array renderFiles(const coverage::CoverageMapping &Coverage,
+ ArrayRef<std::string> SourceFiles,
+ ArrayRef<FileCoverageSummary> FileReports,
+ bool ExportSummaryOnly) {
+ json::Array FileArray;
+ for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I)
+ FileArray.push_back(renderFile(Coverage, SourceFiles[I], FileReports[I],
+ ExportSummaryOnly));
+ return FileArray;
}
-void CoverageExporterJson::emitArrayEnd() {
- State.pop();
- assert((State.size() >= 1) && "Closed too many JSON elements");
- OS << "]";
+json::Array renderFunctions(
+ const iterator_range<coverage::FunctionRecordIterator> &Functions) {
+ json::Array FunctionArray;
+ for (const auto &F : Functions)
+ FunctionArray.push_back(
+ json::Object({{"name", F.Name},
+ {"count", int64_t(F.ExecutionCount)},
+ {"regions", renderRegions(F.CountedRegions)},
+ {"filenames", json::Array(F.Filenames)}}));
+ return FunctionArray;
}
-void CoverageExporterJson::renderRoot(
- const CoverageFilters &IgnoreFilenameFilters) {
+} // end anonymous namespace
+
+void CoverageExporterJson::renderRoot(const CoverageFilters &IgnoreFilters) {
std::vector<std::string> SourceFiles;
for (StringRef SF : Coverage.getUniqueSourceFiles()) {
- if (!IgnoreFilenameFilters.matchesFilename(SF))
+ if (!IgnoreFilters.matchesFilename(SF))
SourceFiles.emplace_back(SF);
}
renderRoot(SourceFiles);
}
-void CoverageExporterJson::renderRoot(
- const std::vector<std::string> &SourceFiles) {
- // Start Root of JSON object.
- emitDictStart();
-
- emitDictElement("version", LLVM_COVERAGE_EXPORT_JSON_STR);
- emitDictElement("type", LLVM_COVERAGE_EXPORT_JSON_TYPE_STR);
- emitDictKey("data");
-
- // Start List of Exports.
- emitArrayStart();
-
- // Start Export.
- emitDictStart();
-
- emitDictKey("files");
-
+void CoverageExporterJson::renderRoot(ArrayRef<std::string> SourceFiles) {
FileCoverageSummary Totals = FileCoverageSummary("Totals");
auto FileReports = CoverageReport::prepareFileReports(Coverage, Totals,
SourceFiles, Options);
- renderFiles(SourceFiles, FileReports);
-
+ auto Export =
+ json::Object({{"files", renderFiles(Coverage, SourceFiles, FileReports,
+ Options.ExportSummaryOnly)},
+ {"totals", renderSummary(Totals)}});
// Skip functions-level information for summary-only export mode.
- if (!Options.ExportSummaryOnly) {
- emitDictKey("functions");
- renderFunctions(Coverage.getCoveredFunctions());
- }
-
- emitDictKey("totals");
- renderSummary(Totals);
-
- // End Export.
- emitDictEnd();
-
- // End List of Exports.
- emitArrayEnd();
-
- // End Root of JSON Object.
- emitDictEnd();
-
- assert((State.top() == JsonState::None) &&
- "All Elements In JSON were Closed");
-}
-
-void CoverageExporterJson::renderFunctions(
- const iterator_range<coverage::FunctionRecordIterator> &Functions) {
- // Start List of Functions.
- emitArrayStart();
-
- for (const auto &Function : Functions) {
- // Start Function.
- emitDictStart();
-
- emitDictElement("name", Function.Name);
- emitDictElement("count", Function.ExecutionCount);
- emitDictKey("regions");
-
- renderRegions(Function.CountedRegions);
-
- emitDictKey("filenames");
-
- // Start Filenames for Function.
- emitArrayStart();
-
- for (const auto &FileName : Function.Filenames)
- emitArrayElement(FileName);
-
- // End Filenames for Function.
- emitArrayEnd();
-
- // End Function.
- emitDictEnd();
- }
-
- // End List of Functions.
- emitArrayEnd();
-}
-
-void CoverageExporterJson::renderFiles(
- ArrayRef<std::string> SourceFiles,
- ArrayRef<FileCoverageSummary> FileReports) {
- // Start List of Files.
- emitArrayStart();
-
- for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I) {
- renderFile(SourceFiles[I], FileReports[I]);
- }
-
- // End List of Files.
- emitArrayEnd();
-}
-
-void CoverageExporterJson::renderFile(const std::string &Filename,
- const FileCoverageSummary &FileReport) {
- // Start File.
- emitDictStart();
-
- emitDictElement("filename", Filename);
-
- if (!Options.ExportSummaryOnly) {
- // Calculate and render detailed coverage information for given file.
- auto FileCoverage = Coverage.getCoverageForFile(Filename);
- renderFileCoverage(FileCoverage, FileReport);
- }
-
- emitDictKey("summary");
- renderSummary(FileReport);
-
- // End File.
- emitDictEnd();
-}
-
-
-void CoverageExporterJson::renderFileCoverage(
- const coverage::CoverageData &FileCoverage,
- const FileCoverageSummary &FileReport) {
- emitDictKey("segments");
-
- // Start List of Segments.
- emitArrayStart();
-
- for (const auto &Segment : FileCoverage)
- renderSegment(Segment);
-
- // End List of Segments.
- emitArrayEnd();
-
- emitDictKey("expansions");
-
- // Start List of Expansions.
- emitArrayStart();
-
- for (const auto &Expansion : FileCoverage.getExpansions())
- renderExpansion(Expansion);
-
- // End List of Expansions.
- emitArrayEnd();
-}
-
-void CoverageExporterJson::renderSegment(
- const coverage::CoverageSegment &Segment) {
- // Start Segment.
- emitArrayStart();
-
- emitArrayElement(Segment.Line);
- emitArrayElement(Segment.Col);
- emitArrayElement(Segment.Count);
- emitArrayElement(Segment.HasCount);
- emitArrayElement(Segment.IsRegionEntry);
-
- // End Segment.
- emitArrayEnd();
-}
-
-void CoverageExporterJson::renderExpansion(
- const coverage::ExpansionRecord &Expansion) {
- // Start Expansion.
- emitDictStart();
-
- // Mark the beginning and end of this expansion in the source file.
- emitDictKey("source_region");
- renderRegion(Expansion.Region);
-
- // Enumerate the coverage information for the expansion.
- emitDictKey("target_regions");
- renderRegions(Expansion.Function.CountedRegions);
-
- emitDictKey("filenames");
- // Start List of Filenames to map the fileIDs.
- emitArrayStart();
- for (const auto &Filename : Expansion.Function.Filenames)
- emitArrayElement(Filename);
- // End List of Filenames.
- emitArrayEnd();
-
- // End Expansion.
- emitDictEnd();
-}
-
-void CoverageExporterJson::renderRegions(
- ArrayRef<coverage::CountedRegion> Regions) {
- // Start List of Regions.
- emitArrayStart();
-
- for (const auto &Region : Regions)
- renderRegion(Region);
-
- // End List of Regions.
- emitArrayEnd();
-}
-
-void CoverageExporterJson::renderRegion(const coverage::CountedRegion &Region) {
- // Start CountedRegion.
- emitArrayStart();
-
- emitArrayElement(Region.LineStart);
- emitArrayElement(Region.ColumnStart);
- emitArrayElement(Region.LineEnd);
- emitArrayElement(Region.ColumnEnd);
- emitArrayElement(Region.ExecutionCount);
- emitArrayElement(Region.FileID);
- emitArrayElement(Region.ExpandedFileID);
- emitArrayElement(Region.Kind);
-
- // End CountedRegion.
- emitArrayEnd();
-}
-
-void CoverageExporterJson::renderSummary(const FileCoverageSummary &Summary) {
- // Start Summary for the file.
- emitDictStart();
-
- emitDictKey("lines");
-
- // Start Line Coverage Summary.
- emitDictStart();
- emitDictElement("count", Summary.LineCoverage.getNumLines());
- emitDictElement("covered", Summary.LineCoverage.getCovered());
- emitDictElement("percent", Summary.LineCoverage.getPercentCovered());
- // End Line Coverage Summary.
- emitDictEnd();
-
- emitDictKey("functions");
-
- // Start Function Coverage Summary.
- emitDictStart();
- emitDictElement("count", Summary.FunctionCoverage.getNumFunctions());
- emitDictElement("covered", Summary.FunctionCoverage.getExecuted());
- emitDictElement("percent", Summary.FunctionCoverage.getPercentCovered());
- // End Function Coverage Summary.
- emitDictEnd();
-
- emitDictKey("instantiations");
-
- // Start Instantiation Coverage Summary.
- emitDictStart();
- emitDictElement("count", Summary.InstantiationCoverage.getNumFunctions());
- emitDictElement("covered", Summary.InstantiationCoverage.getExecuted());
- emitDictElement("percent", Summary.InstantiationCoverage.getPercentCovered());
- // End Function Coverage Summary.
- emitDictEnd();
-
- emitDictKey("regions");
+ if (!Options.ExportSummaryOnly)
+ Export["functions"] = renderFunctions(Coverage.getCoveredFunctions());
- // Start Region Coverage Summary.
- emitDictStart();
- emitDictElement("count", Summary.RegionCoverage.getNumRegions());
- emitDictElement("covered", Summary.RegionCoverage.getCovered());
- emitDictElement("notcovered", Summary.RegionCoverage.getNumRegions() -
- Summary.RegionCoverage.getCovered());
- emitDictElement("percent", Summary.RegionCoverage.getPercentCovered());
- // End Region Coverage Summary.
- emitDictEnd();
+ auto ExportArray = json::Array({std::move(Export)});
- // End Summary for the file.
- emitDictEnd();
+ OS << json::Object({{"version", LLVM_COVERAGE_EXPORT_JSON_STR},
+ {"type", LLVM_COVERAGE_EXPORT_JSON_TYPE_STR},
+ {"data", std::move(ExportArray)}});
}
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.h b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.h
index f88dffa0ebea..c37c86b42be9 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.h
@@ -15,96 +15,20 @@
#define LLVM_COV_COVERAGEEXPORTERJSON_H
#include "CoverageExporter.h"
-#include <stack>
namespace llvm {
class CoverageExporterJson : public CoverageExporter {
- /// States that the JSON rendering machine can be in.
- enum JsonState { None, NonEmptyElement, EmptyElement };
-
- /// Tracks state of the JSON output.
- std::stack<JsonState> State;
-
- /// Emit a serialized scalar.
- void emitSerialized(const int64_t Value);
-
- /// Emit a serialized string.
- void emitSerialized(const std::string &Value);
-
- /// Emit a comma if there is a previous element to delimit.
- void emitComma();
-
- /// Emit a starting dictionary/object character.
- void emitDictStart();
-
- /// Emit a dictionary/object key but no value.
- void emitDictKey(const std::string &Key);
-
- /// Emit a dictionary/object key/value pair.
- template <typename V>
- void emitDictElement(const std::string &Key, const V &Value) {
- emitComma();
- emitSerialized(Key);
- OS << ":";
- emitSerialized(Value);
- }
-
- /// Emit a closing dictionary/object character.
- void emitDictEnd();
-
- /// Emit a starting array character.
- void emitArrayStart();
-
- /// Emit an array element.
- template <typename V> void emitArrayElement(const V &Value) {
- emitComma();
- emitSerialized(Value);
- }
-
- /// emit a closing array character.
- void emitArrayEnd();
-
- /// Render an array of all the given functions.
- void renderFunctions(
- const iterator_range<coverage::FunctionRecordIterator> &Functions);
-
- /// Render an array of all the source files, also pass back a Summary.
- void renderFiles(ArrayRef<std::string> SourceFiles,
- ArrayRef<FileCoverageSummary> FileReports);
-
- /// Render a single file.
- void renderFile(const std::string &Filename,
- const FileCoverageSummary &FileReport);
-
- /// Render summary for a single file.
- void renderFileCoverage(const coverage::CoverageData &FileCoverage,
- const FileCoverageSummary &FileReport);
-
- /// Render a CoverageSegment.
- void renderSegment(const coverage::CoverageSegment &Segment);
-
- /// Render an ExpansionRecord.
- void renderExpansion(const coverage::ExpansionRecord &Expansion);
-
- /// Render a list of CountedRegions.
- void renderRegions(ArrayRef<coverage::CountedRegion> Regions);
-
- /// Render a single CountedRegion.
- void renderRegion(const coverage::CountedRegion &Region);
-
- /// Render a FileCoverageSummary.
- void renderSummary(const FileCoverageSummary &Summary);
-
public:
CoverageExporterJson(const coverage::CoverageMapping &CoverageMapping,
- const CoverageViewOptions &Options, raw_ostream &OS);
+ const CoverageViewOptions &Options, raw_ostream &OS)
+ : CoverageExporter(CoverageMapping, Options, OS) {}
/// Render the CoverageMapping object.
- void renderRoot(const CoverageFilters &IgnoreFilenameFilters) override;
+ void renderRoot(const CoverageFilters &IgnoreFilters) override;
/// Render the CoverageMapping object for specified source files.
- void renderRoot(const std::vector<std::string> &SourceFiles) override;
+ void renderRoot(ArrayRef<std::string> SourceFiles) override;
};
} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.cpp b/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
new file mode 100644
index 000000000000..d149ba1a4c87
--- /dev/null
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
@@ -0,0 +1,125 @@
+//===- CoverageExporterLcov.cpp - Code coverage export --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements export of code coverage data to lcov trace file format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// The trace file code coverage export follows the following format (see also
+// https://linux.die.net/man/1/geninfo). Each quoted string appears on its own
+// line; the indentation shown here is only for documentation purposes.
+//
+// - for each source file:
+// - "SF:<absolute path to source file>"
+// - for each function:
+// - "FN:<line number of function start>,<function name>"
+// - for each function:
+// - "FNDA:<execution count>,<function name>"
+// - "FNF:<number of functions found>"
+// - "FNH:<number of functions hit>"
+// - for each instrumented line:
+// - "DA:<line number>,<execution count>[,<checksum>]
+// - "LH:<number of lines with non-zero execution count>"
+// - "LF:<nubmer of instrumented lines>"
+// - "end_of_record"
+//
+// If the user is exporting summary information only, then the FN, FNDA, and DA
+// lines will not be present.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CoverageExporterLcov.h"
+#include "CoverageReport.h"
+
+using namespace llvm;
+
+namespace {
+
+void renderFunctionSummary(raw_ostream &OS,
+ const FileCoverageSummary &Summary) {
+ OS << "FNF:" << Summary.FunctionCoverage.getNumFunctions() << '\n'
+ << "FNH:" << Summary.FunctionCoverage.getExecuted() << '\n';
+}
+
+void renderFunctions(
+ raw_ostream &OS,
+ const iterator_range<coverage::FunctionRecordIterator> &Functions) {
+ for (const auto &F : Functions) {
+ auto StartLine = F.CountedRegions.front().LineStart;
+ OS << "FN:" << StartLine << ',' << F.Name << '\n';
+ }
+ for (const auto &F : Functions)
+ OS << "FNDA:" << F.ExecutionCount << ',' << F.Name << '\n';
+}
+
+void renderLineExecutionCounts(raw_ostream &OS,
+ const coverage::CoverageData &FileCoverage) {
+ coverage::LineCoverageIterator LCI{FileCoverage, 1};
+ coverage::LineCoverageIterator LCIEnd = LCI.getEnd();
+ for (; LCI != LCIEnd; ++LCI) {
+ const coverage::LineCoverageStats &LCS = *LCI;
+ if (LCS.isMapped()) {
+ OS << "DA:" << LCS.getLine() << ',' << LCS.getExecutionCount() << '\n';
+ }
+ }
+}
+
+void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
+ OS << "LF:" << Summary.LineCoverage.getNumLines() << '\n'
+ << "LH:" << Summary.LineCoverage.getCovered() << '\n';
+}
+
+void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
+ const std::string &Filename,
+ const FileCoverageSummary &FileReport, bool ExportSummaryOnly) {
+ OS << "SF:" << Filename << '\n';
+
+ if (!ExportSummaryOnly) {
+ renderFunctions(OS, Coverage.getCoveredFunctions());
+ }
+ renderFunctionSummary(OS, FileReport);
+
+ if (!ExportSummaryOnly) {
+ // Calculate and render detailed coverage information for given file.
+ auto FileCoverage = Coverage.getCoverageForFile(Filename);
+ renderLineExecutionCounts(OS, FileCoverage);
+ }
+ renderLineSummary(OS, FileReport);
+
+ OS << "end_of_record\n";
+}
+
+void renderFiles(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
+ ArrayRef<std::string> SourceFiles,
+ ArrayRef<FileCoverageSummary> FileReports,
+ bool ExportSummaryOnly) {
+ for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I)
+ renderFile(OS, Coverage, SourceFiles[I], FileReports[I], ExportSummaryOnly);
+}
+
+} // end anonymous namespace
+
+void CoverageExporterLcov::renderRoot(const CoverageFilters &IgnoreFilters) {
+ std::vector<std::string> SourceFiles;
+ for (StringRef SF : Coverage.getUniqueSourceFiles()) {
+ if (!IgnoreFilters.matchesFilename(SF))
+ SourceFiles.emplace_back(SF);
+ }
+ renderRoot(SourceFiles);
+}
+
+void CoverageExporterLcov::renderRoot(ArrayRef<std::string> SourceFiles) {
+ FileCoverageSummary Totals = FileCoverageSummary("Totals");
+ auto FileReports = CoverageReport::prepareFileReports(Coverage, Totals,
+ SourceFiles, Options);
+ renderFiles(OS, Coverage, SourceFiles, FileReports,
+ Options.ExportSummaryOnly);
+}
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.h b/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.h
new file mode 100644
index 000000000000..539b2dacd384
--- /dev/null
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.h
@@ -0,0 +1,36 @@
+//===- CoverageExporterLcov.h - Code coverage lcov exporter ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements a code coverage exporter for lcov trace file format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_COV_COVERAGEEXPORTERLCOV_H
+#define LLVM_COV_COVERAGEEXPORTERLCOV_H
+
+#include "CoverageExporter.h"
+
+namespace llvm {
+
+class CoverageExporterLcov : public CoverageExporter {
+public:
+ CoverageExporterLcov(const coverage::CoverageMapping &CoverageMapping,
+ const CoverageViewOptions &Options, raw_ostream &OS)
+ : CoverageExporter(CoverageMapping, Options, OS) {}
+
+ /// Render the CoverageMapping object.
+ void renderRoot(const CoverageFilters &IgnoreFilters) override;
+
+ /// Render the CoverageMapping object for specified source files.
+ void renderRoot(ArrayRef<std::string> SourceFiles) override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_COV_COVERAGEEXPORTERLCOV_H
diff --git a/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h b/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
index 20085a957bb5..c8a472860027 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
@@ -20,7 +20,8 @@ namespace llvm {
struct CoverageViewOptions {
enum class OutputFormat {
Text,
- HTML
+ HTML,
+ Lcov
};
bool Debug;
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp b/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
index a5a8fa9a4814..cebaf63adb12 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
@@ -31,7 +31,7 @@ void CoveragePrinter::StreamDestructor::operator()(raw_ostream *OS) const {
std::string CoveragePrinter::getOutputPath(StringRef Path, StringRef Extension,
bool InToplevel,
bool Relative) const {
- assert(Extension.size() && "The file extension may not be empty");
+ assert(!Extension.empty() && "The file extension may not be empty");
SmallString<256> FullPath;
@@ -80,6 +80,10 @@ CoveragePrinter::create(const CoverageViewOptions &Opts) {
return llvm::make_unique<CoveragePrinterText>(Opts);
case CoverageViewOptions::OutputFormat::HTML:
return llvm::make_unique<CoveragePrinterHTML>(Opts);
+ case CoverageViewOptions::OutputFormat::Lcov:
+ // Unreachable because CodeCoverage.cpp should terminate with an error
+ // before we get here.
+ llvm_unreachable("Lcov format is not supported!");
}
llvm_unreachable("Unknown coverage output format!");
}
@@ -143,6 +147,10 @@ SourceCoverageView::create(StringRef SourceName, const MemoryBuffer &File,
case CoverageViewOptions::OutputFormat::HTML:
return llvm::make_unique<SourceCoverageViewHTML>(
SourceName, File, Options, std::move(CoverageInfo));
+ case CoverageViewOptions::OutputFormat::Lcov:
+ // Unreachable because CodeCoverage.cpp should terminate with an error
+ // before we get here.
+ llvm_unreachable("Lcov format is not supported!");
}
llvm_unreachable("Unknown coverage output format!");
}
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp b/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
index acb67aa5cfc7..3f730bb7bc82 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
@@ -54,7 +54,7 @@ std::string escape(StringRef Str, const CoverageViewOptions &Opts) {
std::string tag(const std::string &Name, const std::string &Str,
const std::string &ClassName = "") {
std::string Tag = "<" + Name;
- if (ClassName != "")
+ if (!ClassName.empty())
Tag += " class='" + ClassName + "'";
return Tag + ">" + Str + "</" + Name + ">";
}
diff --git a/contrib/llvm/tools/llvm-cov/TestingSupport.cpp b/contrib/llvm/tools/llvm-cov/TestingSupport.cpp
index e07abdbd17f1..16a1c2665299 100644
--- a/contrib/llvm/tools/llvm-cov/TestingSupport.cpp
+++ b/contrib/llvm/tools/llvm-cov/TestingSupport.cpp
@@ -33,7 +33,7 @@ int convertForTestingMain(int argc, const char *argv[]) {
if (!ObjErr) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(ObjErr.takeError(), OS, "");
+ logAllUnhandledErrors(ObjErr.takeError(), OS);
OS.flush();
errs() << "error: " << Buf;
return 1;
diff --git a/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 09e40d9b0db7..7594066a395d 100644
--- a/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <string>
@@ -43,17 +44,18 @@ namespace llvm {
static void error(std::error_code EC) {
if (!EC)
return;
- outs() << "\nError reading file: " << EC.message() << ".\n";
+ WithColor::error(outs(), "") << "reading file: " << EC.message() << ".\n";
outs().flush();
exit(1);
}
static void error(Error Err) {
- if (Err) {
- logAllUnhandledErrors(std::move(Err), outs(), "Error reading file: ");
- outs().flush();
- exit(1);
- }
+ if (!Err)
+ return;
+ logAllUnhandledErrors(std::move(Err), WithColor::error(outs()),
+ "reading file: ");
+ outs().flush();
+ exit(1);
}
} // namespace llvm
@@ -61,7 +63,7 @@ static void error(Error Err) {
static void reportError(StringRef Input, StringRef Message) {
if (Input == "-")
Input = "<stdin>";
- errs() << Input << ": " << Message << "\n";
+ WithColor::error(errs(), Input) << Message << "\n";
errs().flush();
exit(1);
}
@@ -496,7 +498,7 @@ static void dumpArchive(const Archive *Arc) {
if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS, "");
+ logAllUnhandledErrors(std::move(E), OS);
OS.flush();
reportError(Arc->getFileName(), Buf);
}
diff --git a/contrib/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp b/contrib/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp
new file mode 100644
index 000000000000..39028cc86723
--- /dev/null
+++ b/contrib/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp
@@ -0,0 +1,155 @@
+//===- llvm-cxxmap.cpp ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// llvm-cxxmap computes a correspondence between old symbol names and new
+// symbol names based on a symbol equivalence file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SymbolRemappingReader.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+cl::opt<std::string> OldSymbolFile(cl::Positional, cl::Required,
+ cl::desc("<symbol-file>"));
+cl::opt<std::string> NewSymbolFile(cl::Positional, cl::Required,
+ cl::desc("<symbol-file>"));
+cl::opt<std::string> RemappingFile("remapping-file", cl::Required,
+ cl::desc("Remapping file"));
+cl::alias RemappingFileA("r", cl::aliasopt(RemappingFile));
+cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
+ cl::init("-"), cl::desc("Output file"));
+cl::alias OutputFilenameA("o", cl::aliasopt(OutputFilename));
+
+cl::opt<bool> WarnAmbiguous(
+ "Wambiguous",
+ cl::desc("Warn on equivalent symbols in the output symbol list"));
+cl::opt<bool> WarnIncomplete(
+ "Wincomplete",
+ cl::desc("Warn on input symbols missing from output symbol list"));
+
+static void warn(Twine Message, Twine Whence = "",
+ std::string Hint = "") {
+ WithColor::warning();
+ std::string WhenceStr = Whence.str();
+ if (!WhenceStr.empty())
+ errs() << WhenceStr << ": ";
+ errs() << Message << "\n";
+ if (!Hint.empty())
+ WithColor::note() << Hint << "\n";
+}
+
+static void exitWithError(Twine Message, Twine Whence = "",
+ std::string Hint = "") {
+ WithColor::error();
+ std::string WhenceStr = Whence.str();
+ if (!WhenceStr.empty())
+ errs() << WhenceStr << ": ";
+ errs() << Message << "\n";
+ if (!Hint.empty())
+ WithColor::note() << Hint << "\n";
+ ::exit(1);
+}
+
+static void exitWithError(Error E, StringRef Whence = "") {
+ exitWithError(toString(std::move(E)), Whence);
+}
+
+static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
+ exitWithError(EC.message(), Whence);
+}
+
+static void remapSymbols(MemoryBuffer &OldSymbolFile,
+ MemoryBuffer &NewSymbolFile,
+ MemoryBuffer &RemappingFile,
+ raw_ostream &Out) {
+ // Load the remapping file and prepare to canonicalize symbols.
+ SymbolRemappingReader Reader;
+ if (Error E = Reader.read(RemappingFile))
+ exitWithError(std::move(E));
+
+ // Canonicalize the new symbols.
+ DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
+ DenseSet<StringRef> UnparseableSymbols;
+ for (line_iterator LineIt(NewSymbolFile, /*SkipBlanks=*/true, '#');
+ !LineIt.is_at_eof(); ++LineIt) {
+ StringRef Symbol = *LineIt;
+
+ auto K = Reader.insert(Symbol);
+ if (!K) {
+ UnparseableSymbols.insert(Symbol);
+ continue;
+ }
+
+ auto ItAndIsNew = MappedNames.insert({K, Symbol});
+ if (WarnAmbiguous && !ItAndIsNew.second &&
+ ItAndIsNew.first->second != Symbol) {
+ warn("symbol " + Symbol + " is equivalent to earlier symbol " +
+ ItAndIsNew.first->second,
+ NewSymbolFile.getBufferIdentifier() + ":" +
+ Twine(LineIt.line_number()),
+ "later symbol will not be the target of any remappings");
+ }
+ }
+
+ // Figure out which new symbol each old symbol is equivalent to.
+ for (line_iterator LineIt(OldSymbolFile, /*SkipBlanks=*/true, '#');
+ !LineIt.is_at_eof(); ++LineIt) {
+ StringRef Symbol = *LineIt;
+
+ auto K = Reader.lookup(Symbol);
+ StringRef NewSymbol = MappedNames.lookup(K);
+
+ if (NewSymbol.empty()) {
+ if (WarnIncomplete && !UnparseableSymbols.count(Symbol)) {
+ warn("no new symbol matches old symbol " + Symbol,
+ OldSymbolFile.getBufferIdentifier() + ":" +
+ Twine(LineIt.line_number()));
+ }
+ continue;
+ }
+
+ Out << Symbol << " " << NewSymbol << "\n";
+ }
+}
+
+int main(int argc, const char *argv[]) {
+ InitLLVM X(argc, argv);
+
+ cl::ParseCommandLineOptions(argc, argv, "LLVM C++ mangled name remapper\n");
+
+ auto OldSymbolBufOrError = MemoryBuffer::getFileOrSTDIN(OldSymbolFile);
+ if (!OldSymbolBufOrError)
+ exitWithErrorCode(OldSymbolBufOrError.getError(), OldSymbolFile);
+
+ auto NewSymbolBufOrError = MemoryBuffer::getFileOrSTDIN(NewSymbolFile);
+ if (!NewSymbolBufOrError)
+ exitWithErrorCode(NewSymbolBufOrError.getError(), NewSymbolFile);
+
+ auto RemappingBufOrError = MemoryBuffer::getFileOrSTDIN(RemappingFile);
+ if (!RemappingBufOrError)
+ exitWithErrorCode(RemappingBufOrError.getError(), RemappingFile);
+
+ std::error_code EC;
+ raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
+ if (EC)
+ exitWithErrorCode(EC, OutputFilename);
+
+ remapSymbols(*OldSymbolBufOrError.get(), *NewSymbolBufOrError.get(),
+ *RemappingBufOrError.get(), OS);
+}
diff --git a/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp b/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
index af0a055ea21f..acff8bb3e89b 100644
--- a/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
+++ b/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
@@ -629,8 +629,8 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
// If the terminators have different kinds, but one is an invoke and the
// other is an unconditional branch immediately following a call, unify
// the results and the destinations.
- TerminatorInst *LTerm = LStart->getParent()->getTerminator();
- TerminatorInst *RTerm = RStart->getParent()->getTerminator();
+ Instruction *LTerm = LStart->getParent()->getTerminator();
+ Instruction *RTerm = RStart->getParent()->getTerminator();
if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
if (cast<BranchInst>(LTerm)->isConditional()) return;
BasicBlock::iterator I = LTerm->getIterator();
@@ -686,9 +686,18 @@ void DifferenceEngine::diff(Module *L, Module *R) {
StringSet<> LNames;
SmallVector<std::pair<Function*,Function*>, 20> Queue;
+ unsigned LeftAnonCount = 0;
+ unsigned RightAnonCount = 0;
+
for (Module::iterator I = L->begin(), E = L->end(); I != E; ++I) {
Function *LFn = &*I;
- LNames.insert(LFn->getName());
+ StringRef Name = LFn->getName();
+ if (Name.empty()) {
+ ++LeftAnonCount;
+ continue;
+ }
+
+ LNames.insert(Name);
if (Function *RFn = R->getFunction(LFn->getName()))
Queue.push_back(std::make_pair(LFn, RFn));
@@ -698,10 +707,25 @@ void DifferenceEngine::diff(Module *L, Module *R) {
for (Module::iterator I = R->begin(), E = R->end(); I != E; ++I) {
Function *RFn = &*I;
- if (!LNames.count(RFn->getName()))
+ StringRef Name = RFn->getName();
+ if (Name.empty()) {
+ ++RightAnonCount;
+ continue;
+ }
+
+ if (!LNames.count(Name))
logf("function %r exists only in right module") << RFn;
}
+
+ if (LeftAnonCount != 0 || RightAnonCount != 0) {
+ SmallString<32> Tmp;
+ logf(("not comparing " + Twine(LeftAnonCount) +
+ " anonymous functions in the left module and " +
+ Twine(RightAnonCount) + " in the right module")
+ .toStringRef(Tmp));
+ }
+
for (SmallVectorImpl<std::pair<Function*,Function*> >::iterator
I = Queue.begin(), E = Queue.end(); I != E; ++I)
diff(I->first, I->second);
diff --git a/contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp b/contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp
index 5af853d4ef28..5fe7e8b4615b 100644
--- a/contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -1,4 +1,6 @@
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
@@ -18,18 +20,27 @@ struct PerFunctionStats {
/// Number of constants with location across all inlined instances.
unsigned ConstantMembers = 0;
/// List of all Variables in this function.
- SmallDenseSet<uint32_t, 4> VarsInFunction;
+ StringSet<> VarsInFunction;
/// Compile units also cover a PC range, but have this flag set to false.
bool IsFunction = false;
};
-/// Holds accumulated global statistics about local variables.
+/// Holds accumulated global statistics about DIEs.
struct GlobalStats {
/// Total number of PC range bytes covered by DW_AT_locations.
unsigned ScopeBytesCovered = 0;
/// Total number of PC range bytes in each variable's enclosing scope,
/// starting from the first definition of the variable.
unsigned ScopeBytesFromFirstDefinition = 0;
+ /// Total number of call site entries (DW_TAG_call_site).
+ unsigned CallSiteEntries = 0;
+ /// Total byte size of concrete functions. This byte size includes
+ /// inline functions contained in the concrete functions.
+ uint64_t FunctionSize = 0;
+ /// Total byte size of inlined functions. This is the total number of bytes
+ /// for the top inline functions within concrete functions. This can help
+ /// tune the inline settings when compiling to match user expectations.
+ uint64_t InlineFunctionSize = 0;
};
/// Extract the low pc from a Die.
@@ -46,19 +57,37 @@ static uint64_t getLowPC(DWARFDie Die) {
}
/// Collect debug info quality metrics for one DIE.
-static void collectStatsForDie(DWARFDie Die, std::string Prefix,
- uint64_t ScopeLowPC, uint64_t BytesInScope,
+static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
+ std::string VarPrefix, uint64_t ScopeLowPC,
+ uint64_t BytesInScope,
+ uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap,
GlobalStats &GlobalStats) {
bool HasLoc = false;
uint64_t BytesCovered = 0;
uint64_t OffsetToFirstDefinition = 0;
+
+ if (Die.getTag() == dwarf::DW_TAG_call_site) {
+ GlobalStats.CallSiteEntries++;
+ return;
+ }
+
+ if (Die.getTag() != dwarf::DW_TAG_formal_parameter &&
+ Die.getTag() != dwarf::DW_TAG_variable &&
+ Die.getTag() != dwarf::DW_TAG_member) {
+ // Not a variable or constant member.
+ return;
+ }
+
if (Die.find(dwarf::DW_AT_const_value)) {
// This catches constant members *and* variables.
HasLoc = true;
BytesCovered = BytesInScope;
- } else if (Die.getTag() == dwarf::DW_TAG_variable ||
- Die.getTag() == dwarf::DW_TAG_formal_parameter) {
+ } else {
+ if (Die.getTag() == dwarf::DW_TAG_member) {
+ // Non-const member.
+ return;
+ }
// Handle variables and function arguments.
auto FormValue = Die.find(dwarf::DW_AT_location);
HasLoc = FormValue.hasValue();
@@ -86,19 +115,17 @@ static void collectStatsForDie(DWARFDie Die, std::string Prefix,
BytesCovered = BytesInScope;
}
}
- } else {
- // Not a variable or constant member.
- return;
}
// Collect PC range coverage data.
- auto &FnStats = FnStatMap[Prefix];
+ auto &FnStats = FnStatMap[FnPrefix];
if (DWARFDie D =
Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin))
Die = D;
- // This is a unique ID for the variable inside the current object file.
- unsigned CanonicalDieOffset = Die.getOffset();
- FnStats.VarsInFunction.insert(CanonicalDieOffset);
+ // By using the variable name + the path through the lexical block tree, the
+ // keys are consistent across duplicate abstract origins in different CUs.
+ std::string VarName = StringRef(Die.getName(DINameKind::ShortName));
+ FnStats.VarsInFunction.insert(VarPrefix+VarName);
if (BytesInScope) {
FnStats.TotalVarWithLoc += (unsigned)HasLoc;
// Adjust for the fact the variables often start their lifetime in the
@@ -115,24 +142,34 @@ static void collectStatsForDie(DWARFDie Die, std::string Prefix,
}
/// Recursively collect debug info quality metrics.
-static void collectStatsRecursive(DWARFDie Die, std::string Prefix,
- uint64_t ScopeLowPC, uint64_t BytesInScope,
+static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
+ std::string VarPrefix, uint64_t ScopeLowPC,
+ uint64_t BytesInScope,
+ uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap,
GlobalStats &GlobalStats) {
// Handle any kind of lexical scope.
- if (Die.getTag() == dwarf::DW_TAG_subprogram ||
- Die.getTag() == dwarf::DW_TAG_inlined_subroutine ||
- Die.getTag() == dwarf::DW_TAG_lexical_block) {
+ const dwarf::Tag Tag = Die.getTag();
+ const bool IsFunction = Tag == dwarf::DW_TAG_subprogram;
+ const bool IsBlock = Tag == dwarf::DW_TAG_lexical_block;
+ const bool IsInlinedFunction = Tag == dwarf::DW_TAG_inlined_subroutine;
+ if (IsFunction || IsInlinedFunction || IsBlock) {
+
+ // Reset VarPrefix when entering a new function.
+ if (Die.getTag() == dwarf::DW_TAG_subprogram ||
+ Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
+ VarPrefix = "v";
+
// Ignore forward declarations.
if (Die.find(dwarf::DW_AT_declaration))
return;
// Count the function.
- if (Die.getTag() != dwarf::DW_TAG_lexical_block) {
+ if (!IsBlock) {
StringRef Name = Die.getName(DINameKind::LinkageName);
if (Name.empty())
Name = Die.getName(DINameKind::ShortName);
- Prefix = Name;
+ FnPrefix = Name;
// Skip over abstract origins.
if (Die.find(dwarf::DW_AT_inline))
return;
@@ -148,26 +185,42 @@ static void collectStatsRecursive(DWARFDie Die, std::string Prefix,
llvm::consumeError(RangesOrError.takeError());
return;
}
-
+
auto Ranges = RangesOrError.get();
uint64_t BytesInThisScope = 0;
for (auto Range : Ranges)
BytesInThisScope += Range.HighPC - Range.LowPC;
ScopeLowPC = getLowPC(Die);
- if (BytesInThisScope)
+ if (BytesInThisScope) {
BytesInScope = BytesInThisScope;
+ if (IsFunction)
+ GlobalStats.FunctionSize += BytesInThisScope;
+ else if (IsInlinedFunction && InlineDepth == 0)
+ GlobalStats.InlineFunctionSize += BytesInThisScope;
+ }
} else {
// Not a scope, visit the Die itself. It could be a variable.
- collectStatsForDie(Die, Prefix, ScopeLowPC, BytesInScope, FnStatMap,
- GlobalStats);
+ collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope,
+ InlineDepth, FnStatMap, GlobalStats);
}
+ // Set InlineDepth correctly for child recursion
+ if (IsFunction)
+ InlineDepth = 0;
+ else if (IsInlinedFunction)
+ ++InlineDepth;
+
// Traverse children.
+ unsigned LexicalBlockIndex = 0;
DWARFDie Child = Die.getFirstChild();
while (Child) {
- collectStatsRecursive(Child, Prefix, ScopeLowPC, BytesInScope, FnStatMap,
- GlobalStats);
+ std::string ChildVarPrefix = VarPrefix;
+ if (Child.getTag() == dwarf::DW_TAG_lexical_block)
+ ChildVarPrefix += toHex(LexicalBlockIndex++) + '.';
+
+ collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC,
+ BytesInScope, InlineDepth, FnStatMap, GlobalStats);
Child = Child.getSibling();
}
}
@@ -200,7 +253,7 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
StringMap<PerFunctionStats> Statistics;
for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
if (DWARFDie CUDie = CU->getUnitDIE(false))
- collectStatsRecursive(CUDie, "/", 0, 0, Statistics, GlobalStats);
+ collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats);
/// The version number should be increased every time the algorithm is changed
/// (including bug fixes). New metrics may be added without increasing the
@@ -218,16 +271,15 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
VarWithLoc += Stats.TotalVarWithLoc + Constants;
VarTotal += TotalVars + Constants;
VarUnique += Stats.VarsInFunction.size();
- LLVM_DEBUG(for (auto V
- : Stats.VarsInFunction) llvm::dbgs()
- << Entry.getKey() << ": " << V << "\n");
+ LLVM_DEBUG(for (auto &V : Stats.VarsInFunction) llvm::dbgs()
+ << Entry.getKey() << ": " << V.getKey() << "\n");
NumFunctions += Stats.IsFunction;
NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined;
}
// Print summary.
OS.SetBufferSize(1024);
- OS << "{\"version\":\"" << Version << '"';
+ OS << "{\"version\":" << Version;
LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
llvm::dbgs() << "---------------------------------\n");
printDatum(OS, "file", Filename.str());
@@ -237,9 +289,12 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
printDatum(OS, "unique source variables", VarUnique);
printDatum(OS, "source variables", VarTotal);
printDatum(OS, "variables with location", VarWithLoc);
+ printDatum(OS, "call site entries", GlobalStats.CallSiteEntries);
printDatum(OS, "scope bytes total",
GlobalStats.ScopeBytesFromFirstDefinition);
printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
+ printDatum(OS, "total function size", GlobalStats.FunctionSize);
+ printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize);
OS << "}\n";
LLVM_DEBUG(
llvm::dbgs() << "Total Availability: "
diff --git a/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index d75f33906098..d9e8e36efe5c 100644
--- a/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -156,7 +156,7 @@ static list<std::string> Name(
value_desc("pattern"), cat(DwarfDumpCategory));
static alias NameAlias("n", desc("Alias for -name"), aliasopt(Name));
static opt<unsigned long long> Lookup("lookup",
- desc("Lookup <address> in the debug information and print out any"
+ desc("Lookup <address> in the debug information and print out any "
"available file, function, block and line table details."),
value_desc("address"), cat(DwarfDumpCategory));
static opt<std::string>
@@ -226,7 +226,7 @@ static alias VerboseAlias("v", desc("Alias for -verbose."), aliasopt(Verbose),
static void error(StringRef Prefix, std::error_code EC) {
if (!EC)
return;
- errs() << Prefix << ": " << EC.message() << "\n";
+ WithColor::error() << Prefix << ": " << EC.message() << "\n";
exit(1);
}
@@ -281,32 +281,45 @@ using HandlerFn = std::function<bool(ObjectFile &, DWARFContext &DICtx, Twine,
raw_ostream &)>;
/// Print only DIEs that have a certain name.
+static bool filterByName(const StringSet<> &Names, DWARFDie Die,
+ StringRef NameRef, raw_ostream &OS) {
+ std::string Name =
+ (IgnoreCase && !UseRegex) ? NameRef.lower() : NameRef.str();
+ if (UseRegex) {
+ // Match regular expression.
+ for (auto Pattern : Names.keys()) {
+ Regex RE(Pattern, IgnoreCase ? Regex::IgnoreCase : Regex::NoFlags);
+ std::string Error;
+ if (!RE.isValid(Error)) {
+ errs() << "error in regular expression: " << Error << "\n";
+ exit(1);
+ }
+ if (RE.match(Name)) {
+ Die.dump(OS, 0, getDumpOpts());
+ return true;
+ }
+ }
+ } else if (Names.count(Name)) {
+ // Match full text.
+ Die.dump(OS, 0, getDumpOpts());
+ return true;
+ }
+ return false;
+}
+
+/// Print only DIEs that have a certain name.
static void filterByName(const StringSet<> &Names,
- DWARFContext::cu_iterator_range CUs, raw_ostream &OS) {
+ DWARFContext::unit_iterator_range CUs,
+ raw_ostream &OS) {
for (const auto &CU : CUs)
for (const auto &Entry : CU->dies()) {
DWARFDie Die = {CU.get(), &Entry};
- if (const char *NamePtr = Die.getName(DINameKind::ShortName)) {
- std::string Name =
- (IgnoreCase && !UseRegex) ? StringRef(NamePtr).lower() : NamePtr;
- // Match regular expression.
- if (UseRegex)
- for (auto Pattern : Names.keys()) {
- Regex RE(Pattern, IgnoreCase ? Regex::IgnoreCase : Regex::NoFlags);
- std::string Error;
- if (!RE.isValid(Error)) {
- errs() << "error in regular expression: " << Error << "\n";
- exit(1);
- }
- if (RE.match(Name))
- Die.dump(OS, 0, getDumpOpts());
- }
- // Match full text.
- else if (Names.count(Name))
- Die.dump(OS, 0, getDumpOpts());
- }
+ if (const char *Name = Die.getName(DINameKind::ShortName))
+ if (filterByName(Names, Die, Name, OS))
+ continue;
+ if (const char *Name = Die.getName(DINameKind::LinkageName))
+ filterByName(Names, Die, Name, OS);
}
-
}
static void getDies(DWARFContext &DICtx, const AppleAcceleratorTable &Accel,
@@ -358,7 +371,7 @@ static void filterByAccelName(ArrayRef<std::string> Names, DWARFContext &DICtx,
getDies(DICtx, DICtx.getAppleNamespaces(), Name, Dies);
getDies(DICtx, DICtx.getDebugNames(), Name, Dies);
}
- llvm::sort(Dies.begin(), Dies.end());
+ llvm::sort(Dies);
Dies.erase(std::unique(Dies.begin(), Dies.end()), Dies.end());
for (DWARFDie Die : Dies)
@@ -409,8 +422,8 @@ static bool dumpObjectFile(ObjectFile &Obj, DWARFContext &DICtx, Twine Filename,
for (auto name : Name)
Names.insert((IgnoreCase && !UseRegex) ? StringRef(name).lower() : name);
- filterByName(Names, DICtx.compile_units(), OS);
- filterByName(Names, DICtx.dwo_compile_units(), OS);
+ filterByName(Names, DICtx.normal_units(), OS);
+ filterByName(Names, DICtx.dwo_units(), OS);
return true;
}
@@ -558,6 +571,14 @@ int main(int argc, char **argv) {
return 0;
}
+ // FIXME: Audit interactions between these two options and make them
+ // compatible.
+ if (Diff && Verbose) {
+ WithColor::error() << "incompatible arguments: specifying both -diff and "
+ "-verbose is currently not supported";
+ return 0;
+ }
+
std::unique_ptr<ToolOutputFile> OutputFile;
if (!OutputFilename.empty()) {
std::error_code EC;
@@ -611,7 +632,7 @@ int main(int argc, char **argv) {
if (Verify) {
// If we encountered errors during verify, exit with a non-zero exit status.
- if (!std::all_of(Objects.begin(), Objects.end(), [&](std::string Object) {
+ if (!all_of(Objects, [&](std::string Object) {
return handleFile(Object, verifyObjectFile, OS);
}))
exit(1);
diff --git a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
index 75668a9dd8b6..b6facc919b51 100644
--- a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -158,7 +158,7 @@ static cl::opt<int>
ThinLTOCachePruningInterval("thinlto-cache-pruning-interval",
cl::init(1200), cl::desc("Set ThinLTO cache pruning interval."));
-static cl::opt<int>
+static cl::opt<unsigned long long>
ThinLTOCacheMaxSizeBytes("thinlto-cache-max-size-bytes",
cl::desc("Set ThinLTO cache pruning directory maximum size in bytes."));
@@ -166,6 +166,10 @@ static cl::opt<int>
ThinLTOCacheMaxSizeFiles("thinlto-cache-max-size-files", cl::init(1000000),
cl::desc("Set ThinLTO cache pruning directory maximum number of files."));
+static cl::opt<unsigned>
+ ThinLTOCacheEntryExpiration("thinlto-cache-entry-expiration", cl::init(604800) /* 1w */,
+ cl::desc("Set ThinLTO cache entry expiration time."));
+
static cl::opt<std::string> ThinLTOSaveTempsPrefix(
"thinlto-save-temps",
cl::desc("Save ThinLTO temp files using filenames created by adding "
@@ -481,6 +485,7 @@ public:
ThinGenerator.setTargetOptions(Options);
ThinGenerator.setCacheDir(ThinLTOCacheDir);
ThinGenerator.setCachePruningInterval(ThinLTOCachePruningInterval);
+ ThinGenerator.setCacheEntryExpiration(ThinLTOCacheEntryExpiration);
ThinGenerator.setCacheMaxSizeFiles(ThinLTOCacheMaxSizeFiles);
ThinGenerator.setCacheMaxSizeBytes(ThinLTOCacheMaxSizeBytes);
ThinGenerator.setFreestanding(EnableFreestanding);
@@ -557,11 +562,14 @@ private:
auto Index = loadCombinedIndex();
for (auto &Filename : InputFilenames) {
+ LLVMContext Ctx;
+ auto TheModule = loadModule(Filename, Ctx);
+
// Build a map of module to the GUIDs and summary objects that should
// be written to its index.
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
- ThinLTOCodeGenerator::gatherImportedSummariesForModule(
- Filename, *Index, ModuleToSummariesForIndex);
+ ThinGenerator.gatherImportedSummariesForModule(*TheModule, *Index,
+ ModuleToSummariesForIndex);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
@@ -589,12 +597,14 @@ private:
auto Index = loadCombinedIndex();
for (auto &Filename : InputFilenames) {
+ LLVMContext Ctx;
+ auto TheModule = loadModule(Filename, Ctx);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
OutputName = Filename + ".imports";
}
OutputName = getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix);
- ThinLTOCodeGenerator::emitImports(Filename, OutputName, *Index);
+ ThinGenerator.emitImports(*TheModule, OutputName, *Index);
}
}
diff --git a/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp b/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 442973f90209..26426367e252 100644
--- a/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -23,6 +23,7 @@
#include "llvm/LTO/LTO.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Threading.h"
@@ -388,6 +389,7 @@ static int dumpSymtab(int argc, char **argv) {
}
int main(int argc, char **argv) {
+ InitLLVM X(argc, argv);
InitializeAllTargets();
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
diff --git a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
index f494d02f3bca..c0976502f545 100644
--- a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -164,6 +164,10 @@ MainFileName("main-file-name",
static cl::opt<bool> SaveTempLabels("save-temp-labels",
cl::desc("Don't discard temporary labels"));
+static cl::opt<bool> LexMasmIntegers(
+ "masm-integers",
+ cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)"));
+
static cl::opt<bool> NoExecStack("no-exec-stack",
cl::desc("File doesn't need an exec stack"));
@@ -293,6 +297,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget,
return SymbolResult;
Parser->setShowParsedOperands(ShowInstOperands);
Parser->setTargetParser(*TAP);
+ Parser->getLexer().setLexMasmIntegers(LexMasmIntegers);
int Res = Parser->Run(NoInitialTextSection);
@@ -313,7 +318,6 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
- TripleName = Triple::normalize(TripleName);
setDwarfDebugFlags(argc, argv);
setDwarfDebugProducer();
diff --git a/contrib/llvm/tools/llvm-mca/CodeRegion.cpp b/contrib/llvm/tools/llvm-mca/CodeRegion.cpp
index 896865996504..29a27c50c171 100644
--- a/contrib/llvm/tools/llvm-mca/CodeRegion.cpp
+++ b/contrib/llvm/tools/llvm-mca/CodeRegion.cpp
@@ -14,11 +14,10 @@
#include "CodeRegion.h"
-using namespace llvm;
-
+namespace llvm {
namespace mca {
-bool CodeRegion::isLocInRange(SMLoc Loc) const {
+bool CodeRegion::isLocInRange(llvm::SMLoc Loc) const {
if (RangeEnd.isValid() && Loc.getPointer() > RangeEnd.getPointer())
return false;
if (RangeStart.isValid() && Loc.getPointer() < RangeStart.getPointer())
@@ -26,11 +25,11 @@ bool CodeRegion::isLocInRange(SMLoc Loc) const {
return true;
}
-void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
+void CodeRegions::beginRegion(llvm::StringRef Description, llvm::SMLoc Loc) {
assert(!Regions.empty() && "Missing Default region");
const CodeRegion &CurrentRegion = *Regions.back();
if (CurrentRegion.startLoc().isValid() && !CurrentRegion.endLoc().isValid()) {
- SM.PrintMessage(Loc, SourceMgr::DK_Warning,
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning,
"Ignoring invalid region start");
return;
}
@@ -41,26 +40,28 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
addRegion(Description, Loc);
}
-void CodeRegions::endRegion(SMLoc Loc) {
+void CodeRegions::endRegion(llvm::SMLoc Loc) {
assert(!Regions.empty() && "Missing Default region");
CodeRegion &CurrentRegion = *Regions.back();
if (CurrentRegion.endLoc().isValid()) {
- SM.PrintMessage(Loc, SourceMgr::DK_Warning, "Ignoring invalid region end");
+ SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning,
+ "Ignoring invalid region end");
return;
}
CurrentRegion.setEndLocation(Loc);
}
-void CodeRegions::addInstruction(std::unique_ptr<const MCInst> Instruction) {
- const SMLoc &Loc = Instruction->getLoc();
+void CodeRegions::addInstruction(const llvm::MCInst &Instruction) {
+ const llvm::SMLoc &Loc = Instruction.getLoc();
const auto It =
std::find_if(Regions.rbegin(), Regions.rend(),
[Loc](const std::unique_ptr<CodeRegion> &Region) {
return Region->isLocInRange(Loc);
});
if (It != Regions.rend())
- (*It)->addInstruction(std::move(Instruction));
+ (*It)->addInstruction(Instruction);
}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/CodeRegion.h b/contrib/llvm/tools/llvm-mca/CodeRegion.h
index 7f0025e4884c..867aa18bb4fe 100644
--- a/contrib/llvm/tools/llvm-mca/CodeRegion.h
+++ b/contrib/llvm/tools/llvm-mca/CodeRegion.h
@@ -34,12 +34,14 @@
#ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_H
#define LLVM_TOOLS_LLVM_MCA_CODEREGION_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include <vector>
+namespace llvm {
namespace mca {
/// A region of assembly code.
@@ -49,7 +51,7 @@ class CodeRegion {
// An optional descriptor for this region.
llvm::StringRef Description;
// Instructions that form this region.
- std::vector<std::unique_ptr<const llvm::MCInst>> Instructions;
+ std::vector<llvm::MCInst> Instructions;
// Source location range.
llvm::SMLoc RangeStart;
llvm::SMLoc RangeEnd;
@@ -61,8 +63,8 @@ public:
CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start)
: Description(Desc), RangeStart(Start), RangeEnd() {}
- void addInstruction(std::unique_ptr<const llvm::MCInst> Instruction) {
- Instructions.emplace_back(std::move(Instruction));
+ void addInstruction(const llvm::MCInst &Instruction) {
+ Instructions.emplace_back(Instruction);
}
llvm::SMLoc startLoc() const { return RangeStart; }
@@ -72,10 +74,7 @@ public:
bool empty() const { return Instructions.empty(); }
bool isLocInRange(llvm::SMLoc Loc) const;
- const std::vector<std::unique_ptr<const llvm::MCInst>> &
- getInstructions() const {
- return Instructions;
- }
+ llvm::ArrayRef<llvm::MCInst> getInstructions() const { return Instructions; }
llvm::StringRef getDescription() const { return Description; }
};
@@ -106,26 +105,26 @@ public:
void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc);
void endRegion(llvm::SMLoc Loc);
- void addInstruction(std::unique_ptr<const llvm::MCInst> Instruction);
+ void addInstruction(const llvm::MCInst &Instruction);
+ llvm::SourceMgr &getSourceMgr() const { return SM; }
CodeRegions(llvm::SourceMgr &S) : SM(S) {
// Create a default region for the input code sequence.
addRegion("Default", llvm::SMLoc());
}
- const std::vector<std::unique_ptr<const llvm::MCInst>> &
- getInstructionSequence(unsigned Idx) const {
+ llvm::ArrayRef<llvm::MCInst> getInstructionSequence(unsigned Idx) const {
return Regions[Idx]->getInstructions();
}
bool empty() const {
- return std::all_of(Regions.begin(), Regions.end(),
- [](const std::unique_ptr<CodeRegion> &Region) {
- return Region->empty();
- });
+ return llvm::all_of(Regions, [](const std::unique_ptr<CodeRegion> &Region) {
+ return Region->empty();
+ });
}
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
new file mode 100644
index 000000000000..5bd37adeeae9
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -0,0 +1,137 @@
+//===----------------------- CodeRegionGenerator.cpp ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines classes responsible for generating llvm-mca
+/// CodeRegions from various types of input. llvm-mca only analyzes CodeRegions,
+/// so the classes here provide the input-to-CodeRegions translation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeRegionGenerator.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SMLoc.h"
+#include <memory>
+
+namespace llvm {
+namespace mca {
+
+// This virtual dtor serves as the anchor for the CodeRegionGenerator class.
+CodeRegionGenerator::~CodeRegionGenerator() {}
+
+// A comment consumer that parses strings. The only valid tokens are strings.
+class MCACommentConsumer : public AsmCommentConsumer {
+public:
+ CodeRegions &Regions;
+
+ MCACommentConsumer(CodeRegions &R) : Regions(R) {}
+ void HandleComment(SMLoc Loc, StringRef CommentText) override;
+};
+
+// This class provides the callbacks that occur when parsing input assembly.
+class MCStreamerWrapper final : public MCStreamer {
+ CodeRegions &Regions;
+
+public:
+ MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R)
+ : MCStreamer(Context), Regions(R) {}
+
+ // We only want to intercept the emission of new instructions.
+ virtual void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo & /* unused */,
+ bool /* unused */) override {
+ Regions.addInstruction(Inst);
+ }
+
+ bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
+ return true;
+ }
+
+ void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) override {}
+ void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
+ uint64_t Size = 0, unsigned ByteAlignment = 0,
+ SMLoc Loc = SMLoc()) override {}
+ void EmitGPRel32Value(const MCExpr *Value) override {}
+ void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+ void EmitCOFFSymbolStorageClass(int StorageClass) override {}
+ void EmitCOFFSymbolType(int Type) override {}
+ void EndCOFFSymbolDef() override {}
+
+ ArrayRef<MCInst> GetInstructionSequence(unsigned Index) const {
+ return Regions.getInstructionSequence(Index);
+ }
+};
+
+void MCACommentConsumer::HandleComment(SMLoc Loc, StringRef CommentText) {
+ // Skip empty comments.
+ StringRef Comment(CommentText);
+ if (Comment.empty())
+ return;
+
+ // Skip spaces and tabs.
+ unsigned Position = Comment.find_first_not_of(" \t");
+ if (Position >= Comment.size())
+ // We reached the end of the comment. Bail out.
+ return;
+
+ Comment = Comment.drop_front(Position);
+ if (Comment.consume_front("LLVM-MCA-END")) {
+ Regions.endRegion(Loc);
+ return;
+ }
+
+ // Try to parse the LLVM-MCA-BEGIN comment.
+ if (!Comment.consume_front("LLVM-MCA-BEGIN"))
+ return;
+
+ // Skip spaces and tabs.
+ Position = Comment.find_first_not_of(" \t");
+ if (Position < Comment.size())
+ Comment = Comment.drop_front(Position);
+ // Use the rest of the string as a descriptor for this code snippet.
+ Regions.beginRegion(Comment, Loc);
+}
+
+Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() {
+ MCTargetOptions Opts;
+ Opts.PreserveAsmComments = false;
+ MCStreamerWrapper Str(Ctx, Regions);
+
+ // Create a MCAsmParser and setup the lexer to recognize llvm-mca ASM
+ // comments.
+ std::unique_ptr<MCAsmParser> Parser(
+ createMCAsmParser(Regions.getSourceMgr(), Ctx, Str, MAI));
+ MCAsmLexer &Lexer = Parser->getLexer();
+ MCACommentConsumer CC(Regions);
+ Lexer.setCommentConsumer(&CC);
+
+ // Create a target-specific parser and perform the parse.
+ std::unique_ptr<MCTargetAsmParser> TAP(
+ TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts));
+ if (!TAP)
+ return make_error<StringError>(
+ "This target does not support assembly parsing.",
+ inconvertibleErrorCode());
+ Parser->setTargetParser(*TAP);
+ Parser->Run(false);
+
+ // Get the assembler dialect from the input. llvm-mca will use this as the
+ // default dialect when printing reports.
+ AssemblerDialect = Parser->getAssemblerDialect();
+ return Regions;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.h b/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.h
new file mode 100644
index 000000000000..892cafb92686
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.h
@@ -0,0 +1,70 @@
+//===----------------------- CodeRegionGenerator.h --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares classes responsible for generating llvm-mca
+/// CodeRegions from various types of input. llvm-mca only analyzes CodeRegions,
+/// so the classes here provide the input-to-CodeRegions translation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
+#define LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
+
+#include "CodeRegion.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <memory>
+
+namespace llvm {
+namespace mca {
+
+/// This class is responsible for parsing the input given to the llvm-mca
+/// driver, and converting that into a CodeRegions instance.
+class CodeRegionGenerator {
+protected:
+ CodeRegions Regions;
+ CodeRegionGenerator(const CodeRegionGenerator &) = delete;
+ CodeRegionGenerator &operator=(const CodeRegionGenerator &) = delete;
+
+public:
+ CodeRegionGenerator(SourceMgr &SM) : Regions(SM) {}
+ virtual ~CodeRegionGenerator();
+ virtual Expected<const CodeRegions &> parseCodeRegions() = 0;
+};
+
+/// This class is responsible for parsing input ASM and generating
+/// a CodeRegions instance.
+class AsmCodeRegionGenerator final : public CodeRegionGenerator {
+ const Target &TheTarget;
+ MCContext &Ctx;
+ const MCAsmInfo &MAI;
+ const MCSubtargetInfo &STI;
+ const MCInstrInfo &MCII;
+ unsigned AssemblerDialect; // This is set during parsing.
+
+public:
+ AsmCodeRegionGenerator(const Target &T, SourceMgr &SM, MCContext &C,
+ const MCAsmInfo &A, const MCSubtargetInfo &S,
+ const MCInstrInfo &I)
+ : CodeRegionGenerator(SM), TheTarget(T), Ctx(C), MAI(A), STI(S), MCII(I),
+ AssemblerDialect(0) {}
+
+ unsigned getAssemblerDialect() const { return AssemblerDialect; }
+ Expected<const CodeRegions &> parseCodeRegions() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
diff --git a/contrib/llvm/tools/llvm-mca/Context.cpp b/contrib/llvm/tools/llvm-mca/Context.cpp
deleted file mode 100644
index 685714e64b92..000000000000
--- a/contrib/llvm/tools/llvm-mca/Context.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===---------------------------- Context.cpp -------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a class for holding ownership of various simulated
-/// hardware units. A Context also provides a utility routine for constructing
-/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
-/// stages).
-///
-//===----------------------------------------------------------------------===//
-
-#include "Context.h"
-#include "DispatchStage.h"
-#include "ExecuteStage.h"
-#include "FetchStage.h"
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "RetireStage.h"
-#include "Scheduler.h"
-
-namespace mca {
-
-using namespace llvm;
-
-std::unique_ptr<Pipeline>
-Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
- SourceMgr &SrcMgr) {
- const MCSchedModel &SM = STI.getSchedModel();
-
- // Create the hardware units defining the backend.
- auto RCU = llvm::make_unique<RetireControlUnit>(SM);
- auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
- auto HWS = llvm::make_unique<Scheduler>(
- SM, Opts.LoadQueueSize, Opts.StoreQueueSize, Opts.AssumeNoAlias);
-
- // Create the pipeline and its stages.
- auto P = llvm::make_unique<Pipeline>();
- auto F = llvm::make_unique<FetchStage>(IB, SrcMgr);
- auto D = llvm::make_unique<DispatchStage>(
- STI, MRI, Opts.RegisterFileSize, Opts.DispatchWidth, *RCU, *PRF, *HWS);
- auto R = llvm::make_unique<RetireStage>(*RCU, *PRF);
- auto E = llvm::make_unique<ExecuteStage>(*RCU, *HWS);
-
- // Add the hardware to the context.
- addHardwareUnit(std::move(RCU));
- addHardwareUnit(std::move(PRF));
- addHardwareUnit(std::move(HWS));
-
- // Build the pipeline.
- P->appendStage(std::move(F));
- P->appendStage(std::move(D));
- P->appendStage(std::move(R));
- P->appendStage(std::move(E));
- return P;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/DispatchStatistics.cpp b/contrib/llvm/tools/llvm-mca/DispatchStatistics.cpp
deleted file mode 100644
index 4bddbef9a0c8..000000000000
--- a/contrib/llvm/tools/llvm-mca/DispatchStatistics.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-//===--------------------- DispatchStatistics.cpp ---------------------*- C++
-//-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the DispatchStatistics interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "DispatchStatistics.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void DispatchStatistics::onEvent(const HWStallEvent &Event) {
- if (Event.Type < HWStallEvent::LastGenericEvent)
- HWStalls[Event.Type]++;
-}
-
-void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
- if (Event.Type == HWInstructionEvent::Dispatched)
- ++NumDispatched;
-}
-
-void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nDispatch Logic - "
- << "number of cycles where we saw N instructions dispatched:\n";
- TempStream << "[# dispatched], [# cycles]\n";
- for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) {
- TempStream << " " << Entry.first << ", " << Entry.second
- << " ("
- << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
- << "%)\n";
- }
-
- TempStream.flush();
- OS << Buffer;
-}
-
-void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nDynamic Dispatch Stall Cycles:\n";
- TempStream << "RAT - Register unavailable: "
- << HWStalls[HWStallEvent::RegisterFileStall];
- TempStream << "\nRCU - Retire tokens unavailable: "
- << HWStalls[HWStallEvent::RetireControlUnitStall];
- TempStream << "\nSCHEDQ - Scheduler full: "
- << HWStalls[HWStallEvent::SchedulerQueueFull];
- TempStream << "\nLQ - Load queue full: "
- << HWStalls[HWStallEvent::LoadQueueFull];
- TempStream << "\nSQ - Store queue full: "
- << HWStalls[HWStallEvent::StoreQueueFull];
- TempStream << "\nGROUP - Static restrictions on the dispatch group: "
- << HWStalls[HWStallEvent::DispatchGroupStall];
- TempStream << '\n';
- TempStream.flush();
- OS << Buffer;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/ExecuteStage.cpp b/contrib/llvm/tools/llvm-mca/ExecuteStage.cpp
deleted file mode 100644
index 437f864b072c..000000000000
--- a/contrib/llvm/tools/llvm-mca/ExecuteStage.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the execution stage of an instruction pipeline.
-///
-/// The ExecuteStage is responsible for managing the hardware scheduler
-/// and issuing notifications that an instruction has been executed.
-///
-//===----------------------------------------------------------------------===//
-
-#include "ExecuteStage.h"
-#include "Scheduler.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-using namespace llvm;
-
-// Reclaim the simulated resources used by the scheduler.
-void ExecuteStage::reclaimSchedulerResources() {
- SmallVector<ResourceRef, 8> ResourcesFreed;
- HWS.reclaimSimulatedResources(ResourcesFreed);
- for (const ResourceRef &RR : ResourcesFreed)
- notifyResourceAvailable(RR);
-}
-
-// Update the scheduler's instruction queues.
-void ExecuteStage::updateSchedulerQueues() {
- SmallVector<InstRef, 4> InstructionIDs;
- HWS.updateIssuedQueue(InstructionIDs);
- for (const InstRef &IR : InstructionIDs)
- notifyInstructionExecuted(IR);
- InstructionIDs.clear();
-
- HWS.updatePendingQueue(InstructionIDs);
- for (const InstRef &IR : InstructionIDs)
- notifyInstructionReady(IR);
-}
-
-// Issue instructions that are waiting in the scheduler's ready queue.
-void ExecuteStage::issueReadyInstructions() {
- SmallVector<InstRef, 4> InstructionIDs;
- InstRef IR = HWS.select();
- while (IR.isValid()) {
- SmallVector<std::pair<ResourceRef, double>, 4> Used;
- HWS.issueInstruction(IR, Used);
-
- // Reclaim instruction resources and perform notifications.
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- notifyReleasedBuffers(Desc.Buffers);
- notifyInstructionIssued(IR, Used);
- if (IR.getInstruction()->isExecuted())
- notifyInstructionExecuted(IR);
-
- // Instructions that have been issued during this cycle might have unblocked
- // other dependent instructions. Dependent instructions may be issued during
- // this same cycle if operands have ReadAdvance entries. Promote those
- // instructions to the ReadyQueue and tell to the caller that we need
- // another round of 'issue()'.
- HWS.promoteToReadyQueue(InstructionIDs);
- for (const InstRef &I : InstructionIDs)
- notifyInstructionReady(I);
- InstructionIDs.clear();
-
- // Select the next instruction to issue.
- IR = HWS.select();
- }
-}
-
-// The following routine is the maintenance routine of the ExecuteStage.
-// It is responsible for updating the hardware scheduler (HWS), including
-// reclaiming the HWS's simulated hardware resources, as well as updating the
-// HWS's queues.
-//
-// This routine also processes the instructions that are ready for issuance.
-// These instructions are managed by the HWS's ready queue and can be accessed
-// via the Scheduler::select() routine.
-//
-// Notifications are issued to this stage's listeners when instructions are
-// moved between the HWS's queues. In particular, when an instruction becomes
-// ready or executed.
-void ExecuteStage::cycleStart() {
- reclaimSchedulerResources();
- updateSchedulerQueues();
- issueReadyInstructions();
-}
-
-// Schedule the instruction for execution on the hardware.
-bool ExecuteStage::execute(InstRef &IR) {
-#ifndef NDEBUG
- // Ensure that the HWS has not stored this instruction in its queues.
- HWS.sanityCheck(IR);
-#endif
- // Reserve a slot in each buffered resource. Also, mark units with
- // BufferSize=0 as reserved. Resources with a buffer size of zero will only
- // be released after MCIS is issued, and all the ResourceCycles for those
- // units have been consumed.
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- HWS.reserveBuffers(Desc.Buffers);
- notifyReservedBuffers(Desc.Buffers);
-
- // Obtain a slot in the LSU. If we cannot reserve resources, return true, so
- // that succeeding stages can make progress.
- if (!HWS.reserveResources(IR))
- return true;
-
- // If we did not return early, then the scheduler is ready for execution.
- notifyInstructionReady(IR);
-
- // Don't add a zero-latency instruction to the Wait or Ready queue.
- // A zero-latency instruction doesn't consume any scheduler resources. That is
- // because it doesn't need to be executed, and it is often removed at register
- // renaming stage. For example, register-register moves are often optimized at
- // register renaming stage by simply updating register aliases. On some
- // targets, zero-idiom instructions (for example: a xor that clears the value
- // of a register) are treated specially, and are often eliminated at register
- // renaming stage.
- //
- // Instructions that use an in-order dispatch/issue processor resource must be
- // issued immediately to the pipeline(s). Any other in-order buffered
- // resources (i.e. BufferSize=1) is consumed.
- //
- // If we cannot issue immediately, the HWS will add IR to its ready queue for
- // execution later, so we must return early here.
- if (!HWS.issueImmediately(IR))
- return true;
-
- LLVM_DEBUG(dbgs() << "[SCHEDULER] Instruction #" << IR
- << " issued immediately\n");
-
- // Issue IR. The resources for this issuance will be placed in 'Used.'
- SmallVector<std::pair<ResourceRef, double>, 4> Used;
- HWS.issueInstruction(IR, Used);
-
- // Perform notifications.
- notifyReleasedBuffers(Desc.Buffers);
- notifyInstructionIssued(IR, Used);
- if (IR.getInstruction()->isExecuted())
- notifyInstructionExecuted(IR);
-
- return true;
-}
-
-void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) {
- HWS.onInstructionExecuted(IR);
- LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
- notifyEvent<HWInstructionEvent>(
- HWInstructionEvent(HWInstructionEvent::Executed, IR));
- RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
-}
-
-void ExecuteStage::notifyInstructionReady(const InstRef &IR) {
- LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
- notifyEvent<HWInstructionEvent>(
- HWInstructionEvent(HWInstructionEvent::Ready, IR));
-}
-
-void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) {
- LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.'
- << RR.second << "]\n");
- for (HWEventListener *Listener : getListeners())
- Listener->onResourceAvailable(RR);
-}
-
-void ExecuteStage::notifyInstructionIssued(
- const InstRef &IR, ArrayRef<std::pair<ResourceRef, double>> Used) {
- LLVM_DEBUG({
- dbgs() << "[E] Instruction Issued: #" << IR << '\n';
- for (const std::pair<ResourceRef, unsigned> &Resource : Used) {
- dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
- << Resource.first.second << "], ";
- dbgs() << "cycles: " << Resource.second << '\n';
- }
- });
- notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
-}
-
-void ExecuteStage::notifyReservedBuffers(ArrayRef<uint64_t> Buffers) {
- if (Buffers.empty())
- return;
-
- SmallVector<unsigned, 4> BufferIDs(Buffers.begin(), Buffers.end());
- std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(),
- [&](uint64_t Op) { return HWS.getResourceID(Op); });
- for (HWEventListener *Listener : getListeners())
- Listener->onReservedBuffers(BufferIDs);
-}
-
-void ExecuteStage::notifyReleasedBuffers(ArrayRef<uint64_t> Buffers) {
- if (Buffers.empty())
- return;
-
- SmallVector<unsigned, 4> BufferIDs(Buffers.begin(), Buffers.end());
- std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(),
- [&](uint64_t Op) { return HWS.getResourceID(Op); });
- for (HWEventListener *Listener : getListeners())
- Listener->onReleasedBuffers(BufferIDs);
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/ExecuteStage.h b/contrib/llvm/tools/llvm-mca/ExecuteStage.h
deleted file mode 100644
index 4914a9373e7c..000000000000
--- a/contrib/llvm/tools/llvm-mca/ExecuteStage.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the execution stage of an instruction pipeline.
-///
-/// The ExecuteStage is responsible for managing the hardware scheduler
-/// and issuing notifications that an instruction has been executed.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
-
-#include "Instruction.h"
-#include "RetireControlUnit.h"
-#include "Scheduler.h"
-#include "Stage.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace mca {
-
-class ExecuteStage : public Stage {
- // Owner will go away when we move listeners/eventing to the stages.
- RetireControlUnit &RCU;
- Scheduler &HWS;
-
- // The following routines are used to maintain the HWS.
- void reclaimSchedulerResources();
- void updateSchedulerQueues();
- void issueReadyInstructions();
-
-public:
- ExecuteStage(RetireControlUnit &R, Scheduler &S) : Stage(), RCU(R), HWS(S) {}
- ExecuteStage(const ExecuteStage &Other) = delete;
- ExecuteStage &operator=(const ExecuteStage &Other) = delete;
-
- // The ExecuteStage will always complete all of its work per call to
- // execute(), so it is never left in a 'to-be-processed' state.
- virtual bool hasWorkToComplete() const override final { return false; }
-
- virtual void cycleStart() override final;
- virtual bool execute(InstRef &IR) override final;
-
- void
- notifyInstructionIssued(const InstRef &IR,
- llvm::ArrayRef<std::pair<ResourceRef, double>> Used);
- void notifyInstructionExecuted(const InstRef &IR);
- void notifyInstructionReady(const InstRef &IR);
- void notifyResourceAvailable(const ResourceRef &RR);
-
- // Notify listeners that buffered resources were consumed.
- void notifyReservedBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
- // Notify listeners that buffered resources were freed.
- void notifyReleasedBuffers(llvm::ArrayRef<uint64_t> Buffers);
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
diff --git a/contrib/llvm/tools/llvm-mca/FetchStage.cpp b/contrib/llvm/tools/llvm-mca/FetchStage.cpp
deleted file mode 100644
index 3da117c0abc1..000000000000
--- a/contrib/llvm/tools/llvm-mca/FetchStage.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//===---------------------- FetchStage.cpp ----------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the Fetch stage of an instruction pipeline. Its sole
-/// purpose in life is to produce instructions for the rest of the pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#include "FetchStage.h"
-
-namespace mca {
-
-bool FetchStage::hasWorkToComplete() const { return SM.hasNext(); }
-
-bool FetchStage::execute(InstRef &IR) {
- if (!SM.hasNext())
- return false;
- const SourceRef SR = SM.peekNext();
- std::unique_ptr<Instruction> I = IB.createInstruction(*SR.second);
- IR = InstRef(SR.first, I.get());
- Instructions[IR.getSourceIndex()] = std::move(I);
- return true;
-}
-
-void FetchStage::postExecute() { SM.updateNext(); }
-
-void FetchStage::cycleEnd() {
- // Find the first instruction which hasn't been retired.
- const InstMap::iterator It =
- llvm::find_if(Instructions, [](const InstMap::value_type &KeyValuePair) {
- return !KeyValuePair.second->isRetired();
- });
-
- // Erase instructions up to the first that hasn't been retired.
- if (It != Instructions.begin())
- Instructions.erase(Instructions.begin(), It);
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/FetchStage.h b/contrib/llvm/tools/llvm-mca/FetchStage.h
deleted file mode 100644
index 620075d24fea..000000000000
--- a/contrib/llvm/tools/llvm-mca/FetchStage.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===---------------------- FetchStage.h ------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the Fetch stage of an instruction pipeline. Its sole
-/// purpose in life is to produce instructions for the rest of the pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
-
-#include "InstrBuilder.h"
-#include "SourceMgr.h"
-#include "Stage.h"
-#include <map>
-
-namespace mca {
-
-class FetchStage : public Stage {
- using InstMap = std::map<unsigned, std::unique_ptr<Instruction>>;
- InstMap Instructions;
- InstrBuilder &IB;
- SourceMgr &SM;
-
-public:
- FetchStage(InstrBuilder &IB, SourceMgr &SM) : IB(IB), SM(SM) {}
- FetchStage(const FetchStage &Other) = delete;
- FetchStage &operator=(const FetchStage &Other) = delete;
-
- bool hasWorkToComplete() const override final;
- bool execute(InstRef &IR) override final;
- void postExecute() override final;
- void cycleEnd() override final;
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
diff --git a/contrib/llvm/tools/llvm-mca/InstrBuilder.h b/contrib/llvm/tools/llvm-mca/InstrBuilder.h
deleted file mode 100644
index 69a53b6fec21..000000000000
--- a/contrib/llvm/tools/llvm-mca/InstrBuilder.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A builder class for instructions that are statically analyzed by llvm-mca.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H
-
-#include "Instruction.h"
-#include "Support.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrAnalysis.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-namespace mca {
-
-class DispatchUnit;
-
-/// A builder class that knows how to construct Instruction objects.
-///
-/// Every llvm-mca Instruction is described by an object of class InstrDesc.
-/// An InstrDesc describes which registers are read/written by the instruction,
-/// as well as the instruction latency and hardware resources consumed.
-///
-/// This class is used by the tool to construct Instructions and instruction
-/// descriptors (i.e. InstrDesc objects).
-/// Information from the machine scheduling model is used to identify processor
-/// resources that are consumed by an instruction.
-class InstrBuilder {
- const llvm::MCSubtargetInfo &STI;
- const llvm::MCInstrInfo &MCII;
- const llvm::MCRegisterInfo &MRI;
- const llvm::MCInstrAnalysis &MCIA;
- llvm::MCInstPrinter &MCIP;
- llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
-
- llvm::DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
- llvm::DenseMap<const llvm::MCInst *, std::unique_ptr<const InstrDesc>>
- VariantDescriptors;
-
- const InstrDesc &createInstrDescImpl(const llvm::MCInst &MCI);
- InstrBuilder(const InstrBuilder &) = delete;
- InstrBuilder &operator=(const InstrBuilder &) = delete;
-
- void populateWrites(InstrDesc &ID, const llvm::MCInst &MCI,
- unsigned SchedClassID);
- void populateReads(InstrDesc &ID, const llvm::MCInst &MCI,
- unsigned SchedClassID);
-
-public:
- InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii,
- const llvm::MCRegisterInfo &mri,
- const llvm::MCInstrAnalysis &mcia, llvm::MCInstPrinter &mcip)
- : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), MCIP(mcip),
- ProcResourceMasks(STI.getSchedModel().getNumProcResourceKinds()) {
- computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
- }
-
- const InstrDesc &getOrCreateInstrDesc(const llvm::MCInst &MCI);
- // Returns an array of processor resource masks.
- // Masks are computed by function mca::computeProcResourceMasks. see
- // Support.h for a description of how masks are computed and how masks can be
- // used to solve set membership problems.
- llvm::ArrayRef<uint64_t> getProcResourceMasks() const {
- return ProcResourceMasks;
- }
-
- void clear() { VariantDescriptors.shrink_and_clear(); }
-
- std::unique_ptr<Instruction> createInstruction(const llvm::MCInst &MCI);
-};
-} // namespace mca
-
-#endif
diff --git a/contrib/llvm/tools/llvm-mca/LSUnit.cpp b/contrib/llvm/tools/llvm-mca/LSUnit.cpp
deleted file mode 100644
index 9ee3b6171893..000000000000
--- a/contrib/llvm/tools/llvm-mca/LSUnit.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A Load-Store Unit for the llvm-mca tool.
-///
-//===----------------------------------------------------------------------===//
-
-#include "LSUnit.h"
-#include "Instruction.h"
-
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-#ifndef NDEBUG
-void LSUnit::dump() const {
- dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
- dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
- dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
- dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
-}
-#endif
-
-void LSUnit::assignLQSlot(unsigned Index) {
- assert(!isLQFull());
- assert(LoadQueue.count(Index) == 0);
-
- LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
- << ",slot=" << LoadQueue.size() << ">\n");
- LoadQueue.insert(Index);
-}
-
-void LSUnit::assignSQSlot(unsigned Index) {
- assert(!isSQFull());
- assert(StoreQueue.count(Index) == 0);
-
- LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
- << ",slot=" << StoreQueue.size() << ">\n");
- StoreQueue.insert(Index);
-}
-
-bool LSUnit::reserve(const InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- unsigned MayLoad = Desc.MayLoad;
- unsigned MayStore = Desc.MayStore;
- unsigned IsMemBarrier = Desc.HasSideEffects;
- if (!MayLoad && !MayStore)
- return false;
-
- const unsigned Index = IR.getSourceIndex();
- if (MayLoad) {
- if (IsMemBarrier)
- LoadBarriers.insert(Index);
- assignLQSlot(Index);
- }
- if (MayStore) {
- if (IsMemBarrier)
- StoreBarriers.insert(Index);
- assignSQSlot(Index);
- }
- return true;
-}
-
-bool LSUnit::isReady(const InstRef &IR) const {
- const unsigned Index = IR.getSourceIndex();
- bool IsALoad = LoadQueue.count(Index) != 0;
- bool IsAStore = StoreQueue.count(Index) != 0;
- assert((IsALoad || IsAStore) && "Instruction is not in queue!");
-
- if (IsALoad && !LoadBarriers.empty()) {
- unsigned LoadBarrierIndex = *LoadBarriers.begin();
- if (Index > LoadBarrierIndex)
- return false;
- if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
- return false;
- }
-
- if (IsAStore && !StoreBarriers.empty()) {
- unsigned StoreBarrierIndex = *StoreBarriers.begin();
- if (Index > StoreBarrierIndex)
- return false;
- if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
- return false;
- }
-
- if (NoAlias && IsALoad)
- return true;
-
- if (StoreQueue.size()) {
- // Check if this memory operation is younger than the older store.
- if (Index > *StoreQueue.begin())
- return false;
- }
-
- // Okay, we are older than the oldest store in the queue.
- // If there are no pending loads, then we can say for sure that this
- // instruction is ready.
- if (isLQEmpty())
- return true;
-
- // Check if there are no older loads.
- if (Index <= *LoadQueue.begin())
- return true;
-
- // There is at least one younger load.
- return !IsAStore;
-}
-
-void LSUnit::onInstructionExecuted(const InstRef &IR) {
- const unsigned Index = IR.getSourceIndex();
- std::set<unsigned>::iterator it = LoadQueue.find(Index);
- if (it != LoadQueue.end()) {
- LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
- << " has been removed from the load queue.\n");
- LoadQueue.erase(it);
- }
-
- it = StoreQueue.find(Index);
- if (it != StoreQueue.end()) {
- LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
- << " has been removed from the store queue.\n");
- StoreQueue.erase(it);
- }
-
- if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
- LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
- << " has been removed from the set of store barriers.\n");
- StoreBarriers.erase(StoreBarriers.begin());
- }
- if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
- LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
- << " has been removed from the set of load barriers.\n");
- LoadBarriers.erase(LoadBarriers.begin());
- }
-}
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/LSUnit.h b/contrib/llvm/tools/llvm-mca/LSUnit.h
deleted file mode 100644
index 817522190589..000000000000
--- a/contrib/llvm/tools/llvm-mca/LSUnit.h
+++ /dev/null
@@ -1,147 +0,0 @@
-//===------------------------- LSUnit.h --------------------------*- C++-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A Load/Store unit class that models load/store queues and that implements
-/// a simple weak memory consistency model.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H
-#define LLVM_TOOLS_LLVM_MCA_LSUNIT_H
-
-#include <set>
-
-namespace mca {
-
-class InstRef;
-struct InstrDesc;
-
-/// A Load/Store Unit implementing a load and store queues.
-///
-/// This class implements a load queue and a store queue to emulate the
-/// out-of-order execution of memory operations.
-/// Each load (or store) consumes an entry in the load (or store) queue.
-///
-/// Rules are:
-/// 1) A younger load is allowed to pass an older load only if there are no
-/// stores nor barriers in between the two loads.
-/// 2) An younger store is not allowed to pass an older store.
-/// 3) A younger store is not allowed to pass an older load.
-/// 4) A younger load is allowed to pass an older store only if the load does
-/// not alias with the store.
-///
-/// This class optimistically assumes that loads don't alias store operations.
-/// Under this assumption, younger loads are always allowed to pass older
-/// stores (this would only affects rule 4).
-/// Essentially, this LSUnit doesn't attempt to run any sort alias analysis to
-/// predict when loads and stores don't alias with eachother.
-///
-/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
-/// set to `false` by the constructor of LSUnit.
-///
-/// In the case of write-combining memory, rule 2. could be relaxed to allow
-/// reordering of non-aliasing store operations. At the moment, this is not
-/// allowed.
-/// To put it in another way, there is no option to specify a different memory
-/// type for memory operations (example: write-through, write-combining, etc.).
-/// Also, there is no way to weaken the memory model, and this unit currently
-/// doesn't support write-combining behavior.
-///
-/// No assumptions are made on the size of the store buffer.
-/// As mentioned before, this class doesn't perform alias analysis.
-/// Consequently, LSUnit doesn't know how to identify cases where
-/// store-to-load forwarding may occur.
-///
-/// LSUnit doesn't attempt to predict whether a load or store hits or misses
-/// the L1 cache. To be more specific, LSUnit doesn't know anything about
-/// the cache hierarchy and memory types.
-/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
-/// scheduling model provides an "optimistic" load-to-use latency (which usually
-/// matches the load-to-use latency for when there is a hit in the L1D).
-///
-/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
-/// memory-barrier like instructions.
-/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
-/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
-/// serializes loads without forcing a flush of the load queue.
-/// Similarly, instructions that both `mayStore` and have `unmodeled side
-/// effects` are treated like store barriers. A full memory
-/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
-/// effects. This is obviously inaccurate, but this is the best that we can do
-/// at the moment.
-///
-/// Each load/store barrier consumes one entry in the load/store queue. A
-/// load/store barrier enforces ordering of loads/stores:
-/// - A younger load cannot pass a load barrier.
-/// - A younger store cannot pass a store barrier.
-///
-/// A younger load has to wait for the memory load barrier to execute.
-/// A load/store barrier is "executed" when it becomes the oldest entry in
-/// the load/store queue(s). That also means, all the older loads/stores have
-/// already been executed.
-class LSUnit {
- // Load queue size.
- // LQ_Size == 0 means that there are infinite slots in the load queue.
- unsigned LQ_Size;
-
- // Store queue size.
- // SQ_Size == 0 means that there are infinite slots in the store queue.
- unsigned SQ_Size;
-
- // If true, loads will never alias with stores. This is the default.
- bool NoAlias;
-
- std::set<unsigned> LoadQueue;
- std::set<unsigned> StoreQueue;
-
- void assignLQSlot(unsigned Index);
- void assignSQSlot(unsigned Index);
- bool isReadyNoAlias(unsigned Index) const;
-
- // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
- // conservatively treated as a store barrier. It forces older store to be
- // executed before newer stores are issued.
- std::set<unsigned> StoreBarriers;
-
- // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
- // conservatively treated as a load barrier. It forces older loads to execute
- // before newer loads are issued.
- std::set<unsigned> LoadBarriers;
-
-public:
- LSUnit(unsigned LQ = 0, unsigned SQ = 0, bool AssumeNoAlias = false)
- : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {}
-
-#ifndef NDEBUG
- void dump() const;
-#endif
-
- bool isSQEmpty() const { return StoreQueue.empty(); }
- bool isLQEmpty() const { return LoadQueue.empty(); }
- bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
- bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
-
- // Returns true if this instruction has been successfully enqueued.
- bool reserve(const InstRef &IR);
-
- // The rules are:
- // 1. A store may not pass a previous store.
- // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
- // 3. A load may pass a previous load.
- // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
- // 5. A load has to wait until an older load barrier is fully executed.
- // 6. A store has to wait until an older store barrier is fully executed.
- bool isReady(const InstRef &IR) const;
- void onInstructionExecuted(const InstRef &IR);
-};
-
-} // namespace mca
-
-#endif
diff --git a/contrib/llvm/tools/llvm-mca/Pipeline.cpp b/contrib/llvm/tools/llvm-mca/Pipeline.cpp
deleted file mode 100644
index 7c937e7b48b5..000000000000
--- a/contrib/llvm/tools/llvm-mca/Pipeline.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements an ordered container of stages that simulate the
-/// pipeline of a hardware backend.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Pipeline.h"
-#include "HWEventListener.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Support/Debug.h"
-
-namespace mca {
-
-#define DEBUG_TYPE "llvm-mca"
-
-using namespace llvm;
-
-void Pipeline::addEventListener(HWEventListener *Listener) {
- if (Listener)
- Listeners.insert(Listener);
- for (auto &S : Stages)
- S->addListener(Listener);
-}
-
-bool Pipeline::hasWorkToProcess() {
- const auto It = llvm::find_if(Stages, [](const std::unique_ptr<Stage> &S) {
- return S->hasWorkToComplete();
- });
- return It != Stages.end();
-}
-
-// This routine returns early if any stage returns 'false' after execute() is
-// called on it.
-bool Pipeline::executeStages(InstRef &IR) {
- for (const std::unique_ptr<Stage> &S : Stages)
- if (!S->execute(IR))
- return false;
- return true;
-}
-
-void Pipeline::preExecuteStages() {
- for (const std::unique_ptr<Stage> &S : Stages)
- S->preExecute();
-}
-
-void Pipeline::postExecuteStages() {
- for (const std::unique_ptr<Stage> &S : Stages)
- S->postExecute();
-}
-
-void Pipeline::run() {
- while (hasWorkToProcess()) {
- notifyCycleBegin();
- runCycle();
- notifyCycleEnd();
- ++Cycles;
- }
-}
-
-void Pipeline::runCycle() {
- // Update the stages before we do any processing for this cycle.
- InstRef IR;
- for (auto &S : Stages)
- S->cycleStart();
-
- // Continue executing this cycle until any stage claims it cannot make
- // progress.
- while (true) {
- preExecuteStages();
- if (!executeStages(IR))
- break;
- postExecuteStages();
- }
-
- for (auto &S : Stages)
- S->cycleEnd();
-}
-
-void Pipeline::notifyCycleBegin() {
- LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
- for (HWEventListener *Listener : Listeners)
- Listener->onCycleBegin();
-}
-
-void Pipeline::notifyCycleEnd() {
- LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n");
- for (HWEventListener *Listener : Listeners)
- Listener->onCycleEnd();
-}
-} // namespace mca.
diff --git a/contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp b/contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp
index c5b1a12b792f..18ef45fc2a65 100644
--- a/contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp
+++ b/contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp
@@ -13,14 +13,14 @@
//===----------------------------------------------------------------------===//
#include "PipelinePrinter.h"
-#include "View.h"
+#include "Views/View.h"
+namespace llvm {
namespace mca {
-using namespace llvm;
-
void PipelinePrinter::printReport(llvm::raw_ostream &OS) const {
for (const auto &V : Views)
V->printView(OS);
}
} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/PipelinePrinter.h b/contrib/llvm/tools/llvm-mca/PipelinePrinter.h
index fe871414418f..456026e12df3 100644
--- a/contrib/llvm/tools/llvm-mca/PipelinePrinter.h
+++ b/contrib/llvm/tools/llvm-mca/PipelinePrinter.h
@@ -17,13 +17,14 @@
#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
#define LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
-#include "Pipeline.h"
-#include "View.h"
+#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/Pipeline.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
namespace mca {
/// A printer class that knows how to collects statistics on the
@@ -48,5 +49,6 @@ public:
void printReport(llvm::raw_ostream &OS) const;
};
} // namespace mca
+} // namespace llvm
#endif // LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
diff --git a/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.cpp b/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.cpp
deleted file mode 100644
index 1b07bf9a3b33..000000000000
--- a/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the RegisterFileStatistics interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "RegisterFileStatistics.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void RegisterFileStatistics::initializeRegisterFileInfo() {
- const MCSchedModel &SM = STI.getSchedModel();
- RegisterFileUsage Empty = {0, 0, 0};
- if (!SM.hasExtraProcessorInfo()) {
- // Assume a single register file.
- RegisterFiles.emplace_back(Empty);
- return;
- }
-
- // Initialize a RegisterFileUsage for every user defined register file, plus
- // the default register file which is always at index #0.
- const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo();
- // There is always an "InvalidRegisterFile" entry in tablegen. That entry can
- // be skipped. If there are no user defined register files, then reserve a
- // single entry for the default register file at index #0.
- unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
- RegisterFiles.resize(NumRegFiles);
- std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty);
-}
-
-void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
- switch (Event.Type) {
- default:
- break;
- case HWInstructionEvent::Retired: {
- const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
- for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I)
- RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
- break;
- }
- case HWInstructionEvent::Dispatched: {
- const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
- for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) {
- RegisterFileUsage &RFU = RegisterFiles[I];
- unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I];
- RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
- RFU.TotalMappings += NumUsedPhysRegs;
- RFU.MaxUsedMappings =
- std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
- }
- }
- }
-}
-
-void RegisterFileStatistics::printView(raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
-
- TempStream << "\n\nRegister File statistics:";
- const RegisterFileUsage &GlobalUsage = RegisterFiles[0];
- TempStream << "\nTotal number of mappings created: "
- << GlobalUsage.TotalMappings;
- TempStream << "\nMax number of mappings used: "
- << GlobalUsage.MaxUsedMappings << '\n';
-
- for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) {
- const RegisterFileUsage &RFU = RegisterFiles[I];
- // Obtain the register file descriptor from the scheduling model.
- assert(STI.getSchedModel().hasExtraProcessorInfo() &&
- "Unable to find register file info!");
- const MCExtraProcessorInfo &PI =
- STI.getSchedModel().getExtraProcessorInfo();
- assert(I <= PI.NumRegisterFiles && "Unexpected register file index!");
- const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I];
- // Skip invalid register files.
- if (!RFDesc.NumPhysRegs)
- continue;
-
- TempStream << "\n* Register File #" << I;
- TempStream << " -- " << StringRef(RFDesc.Name) << ':';
- TempStream << "\n Number of physical registers: ";
- if (!RFDesc.NumPhysRegs)
- TempStream << "unbounded";
- else
- TempStream << RFDesc.NumPhysRegs;
- TempStream << "\n Total number of mappings created: "
- << RFU.TotalMappings;
- TempStream << "\n Max number of mappings used: "
- << RFU.MaxUsedMappings << '\n';
- }
-
- TempStream.flush();
- OS << Buffer;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.cpp b/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.cpp
deleted file mode 100644
index edb855e11e84..000000000000
--- a/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the RetireControlUnitStatistics interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "RetireControlUnitStatistics.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) {
- if (Event.Type == HWInstructionEvent::Retired)
- ++NumRetired;
-}
-
-void RetireControlUnitStatistics::printView(llvm::raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nRetire Control Unit - "
- << "number of cycles where we saw N instructions retired:\n";
- TempStream << "[# retired], [# cycles]\n";
-
- for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) {
- TempStream << " " << Entry.first;
- if (Entry.first < 10)
- TempStream << ", ";
- else
- TempStream << ", ";
- TempStream << Entry.second << " ("
- << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
- << "%)\n";
- }
-
- TempStream.flush();
- OS << Buffer;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/Scheduler.cpp b/contrib/llvm/tools/llvm-mca/Scheduler.cpp
deleted file mode 100644
index 975a50e4b638..000000000000
--- a/contrib/llvm/tools/llvm-mca/Scheduler.cpp
+++ /dev/null
@@ -1,403 +0,0 @@
-//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// A scheduler for processor resource units and processor resource groups.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Scheduler.h"
-#include "Support.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mca {
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-uint64_t ResourceState::selectNextInSequence() {
- assert(isReady());
- uint64_t Next = getNextInSequence();
- while (!isSubResourceReady(Next)) {
- updateNextInSequence();
- Next = getNextInSequence();
- }
- return Next;
-}
-
-#ifndef NDEBUG
-void ResourceState::dump() const {
- dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask
- << ", NEXT: " << NextInSequenceMask << ", RDYMASK: " << ReadyMask
- << ", BufferSize=" << BufferSize
- << ", AvailableSlots=" << AvailableSlots
- << ", Reserved=" << Unavailable << '\n';
-}
-#endif
-
-void ResourceManager::initialize(const llvm::MCSchedModel &SM) {
- computeProcResourceMasks(SM, ProcResID2Mask);
- for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I)
- addResource(*SM.getProcResource(I), I, ProcResID2Mask[I]);
-}
-
-// Adds a new resource state in Resources, as well as a new descriptor in
-// ResourceDescriptor. Map 'Resources' allows to quickly obtain ResourceState
-// objects from resource mask identifiers.
-void ResourceManager::addResource(const MCProcResourceDesc &Desc,
- unsigned Index, uint64_t Mask) {
- assert(Resources.find(Mask) == Resources.end() && "Resource already added!");
- Resources[Mask] = llvm::make_unique<ResourceState>(Desc, Index, Mask);
-}
-
-// Returns the actual resource consumed by this Use.
-// First, is the primary resource ID.
-// Second, is the specific sub-resource ID.
-std::pair<uint64_t, uint64_t> ResourceManager::selectPipe(uint64_t ResourceID) {
- ResourceState &RS = *Resources[ResourceID];
- uint64_t SubResourceID = RS.selectNextInSequence();
- if (RS.isAResourceGroup())
- return selectPipe(SubResourceID);
- return std::pair<uint64_t, uint64_t>(ResourceID, SubResourceID);
-}
-
-void ResourceState::removeFromNextInSequence(uint64_t ID) {
- assert(NextInSequenceMask);
- assert(countPopulation(ID) == 1);
- if (ID > getNextInSequence())
- RemovedFromNextInSequence |= ID;
- NextInSequenceMask = NextInSequenceMask & (~ID);
- if (!NextInSequenceMask) {
- NextInSequenceMask = ResourceSizeMask;
- assert(NextInSequenceMask != RemovedFromNextInSequence);
- NextInSequenceMask ^= RemovedFromNextInSequence;
- RemovedFromNextInSequence = 0;
- }
-}
-
-void ResourceManager::use(ResourceRef RR) {
- // Mark the sub-resource referenced by RR as used.
- ResourceState &RS = *Resources[RR.first];
- RS.markSubResourceAsUsed(RR.second);
- // If there are still available units in RR.first,
- // then we are done.
- if (RS.isReady())
- return;
-
- // Notify to other resources that RR.first is no longer available.
- for (const std::pair<uint64_t, UniqueResourceState> &Res : Resources) {
- ResourceState &Current = *Res.second.get();
- if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
- continue;
-
- if (Current.containsResource(RR.first)) {
- Current.markSubResourceAsUsed(RR.first);
- Current.removeFromNextInSequence(RR.first);
- }
- }
-}
-
-void ResourceManager::release(ResourceRef RR) {
- ResourceState &RS = *Resources[RR.first];
- bool WasFullyUsed = !RS.isReady();
- RS.releaseSubResource(RR.second);
- if (!WasFullyUsed)
- return;
-
- for (const std::pair<uint64_t, UniqueResourceState> &Res : Resources) {
- ResourceState &Current = *Res.second.get();
- if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
- continue;
-
- if (Current.containsResource(RR.first))
- Current.releaseSubResource(RR.first);
- }
-}
-
-ResourceStateEvent
-ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
- ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
- for (uint64_t Buffer : Buffers) {
- Result = isBufferAvailable(Buffer);
- if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
- break;
- }
- return Result;
-}
-
-void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
- for (const uint64_t R : Buffers) {
- reserveBuffer(R);
- ResourceState &Resource = *Resources[R];
- if (Resource.isADispatchHazard()) {
- assert(!Resource.isReserved());
- Resource.setReserved();
- }
- }
-}
-
-void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
- for (const uint64_t R : Buffers)
- releaseBuffer(R);
-}
-
-bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
- return std::all_of(Desc.Resources.begin(), Desc.Resources.end(),
- [&](const std::pair<uint64_t, const ResourceUsage> &E) {
- unsigned NumUnits =
- E.second.isReserved() ? 0U : E.second.NumUnits;
- return isReady(E.first, NumUnits);
- });
-}
-
-// Returns true if all resources are in-order, and there is at least one
-// resource which is a dispatch hazard (BufferSize = 0).
-bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) {
- if (!canBeIssued(Desc))
- return false;
- bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) {
- const ResourceState &Resource = *Resources[BufferMask];
- return Resource.isInOrder() || Resource.isADispatchHazard();
- });
- if (!AllInOrderResources)
- return false;
-
- return any_of(Desc.Buffers, [&](uint64_t BufferMask) {
- return Resources[BufferMask]->isADispatchHazard();
- });
-}
-
-void ResourceManager::issueInstruction(
- const InstrDesc &Desc,
- SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes) {
- for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
- const CycleSegment &CS = R.second.CS;
- if (!CS.size()) {
- releaseResource(R.first);
- continue;
- }
-
- assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
- if (!R.second.isReserved()) {
- ResourceRef Pipe = selectPipe(R.first);
- use(Pipe);
- BusyResources[Pipe] += CS.size();
- // Replace the resource mask with a valid processor resource index.
- const ResourceState &RS = *Resources[Pipe.first];
- Pipe.first = RS.getProcResourceID();
- Pipes.emplace_back(
- std::pair<ResourceRef, double>(Pipe, static_cast<double>(CS.size())));
- } else {
- assert((countPopulation(R.first) > 1) && "Expected a group!");
- // Mark this group as reserved.
- assert(R.second.isReserved());
- reserveResource(R.first);
- BusyResources[ResourceRef(R.first, R.first)] += CS.size();
- }
- }
-}
-
-void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
- for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
- if (BR.second)
- BR.second--;
- if (!BR.second) {
- // Release this resource.
- const ResourceRef &RR = BR.first;
-
- if (countPopulation(RR.first) == 1)
- release(RR);
-
- releaseResource(RR.first);
- ResourcesFreed.push_back(RR);
- }
- }
-
- for (const ResourceRef &RF : ResourcesFreed)
- BusyResources.erase(RF);
-}
-
-#ifndef NDEBUG
-void Scheduler::dump() const {
- dbgs() << "[SCHEDULER]: WaitQueue size is: " << WaitQueue.size() << '\n';
- dbgs() << "[SCHEDULER]: ReadyQueue size is: " << ReadyQueue.size() << '\n';
- dbgs() << "[SCHEDULER]: IssuedQueue size is: " << IssuedQueue.size() << '\n';
- Resources->dump();
-}
-#endif
-
-bool Scheduler::canBeDispatched(const InstRef &IR,
- HWStallEvent::GenericEventType &Event) const {
- Event = HWStallEvent::Invalid;
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
-
- if (Desc.MayLoad && LSU->isLQFull())
- Event = HWStallEvent::LoadQueueFull;
- else if (Desc.MayStore && LSU->isSQFull())
- Event = HWStallEvent::StoreQueueFull;
- else {
- switch (Resources->canBeDispatched(Desc.Buffers)) {
- default:
- return true;
- case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
- Event = HWStallEvent::SchedulerQueueFull;
- break;
- case ResourceStateEvent::RS_RESERVED:
- Event = HWStallEvent::DispatchGroupStall;
- }
- }
-
- return false;
-}
-
-void Scheduler::issueInstructionImpl(
- InstRef &IR,
- SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
- Instruction *IS = IR.getInstruction();
- const InstrDesc &D = IS->getDesc();
-
- // Issue the instruction and collect all the consumed resources
- // into a vector. That vector is then used to notify the listener.
- Resources->issueInstruction(D, UsedResources);
-
- // Notify the instruction that it started executing.
- // This updates the internal state of each write.
- IS->execute();
-
- if (IS->isExecuting())
- IssuedQueue[IR.getSourceIndex()] = IS;
-}
-
-// Release the buffered resources and issue the instruction.
-void Scheduler::issueInstruction(
- InstRef &IR,
- SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- releaseBuffers(Desc.Buffers);
- issueInstructionImpl(IR, UsedResources);
-}
-
-void Scheduler::promoteToReadyQueue(SmallVectorImpl<InstRef> &Ready) {
- // Scan the set of waiting instructions and promote them to the
- // ready queue if operands are all ready.
- for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) {
- const unsigned IID = I->first;
- Instruction *IS = I->second;
-
- // Check if this instruction is now ready. In case, force
- // a transition in state using method 'update()'.
- if (!IS->isReady())
- IS->update();
-
- const InstrDesc &Desc = IS->getDesc();
- bool IsMemOp = Desc.MayLoad || Desc.MayStore;
- if (!IS->isReady() || (IsMemOp && !LSU->isReady({IID, IS}))) {
- ++I;
- continue;
- }
-
- Ready.emplace_back(IID, IS);
- ReadyQueue[IID] = IS;
- auto ToRemove = I;
- ++I;
- WaitQueue.erase(ToRemove);
- }
-}
-
-InstRef Scheduler::select() {
- // Find the oldest ready-to-issue instruction in the ReadyQueue.
- auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(),
- [&](const QueueEntryTy &Entry) {
- const InstrDesc &D = Entry.second->getDesc();
- return Resources->canBeIssued(D);
- });
-
- if (It == ReadyQueue.end())
- return {0, nullptr};
-
- // We want to prioritize older instructions over younger instructions to
- // minimize the pressure on the reorder buffer. We also want to
- // rank higher the instructions with more users to better expose ILP.
-
- // Compute a rank value based on the age of an instruction (i.e. its source
- // index) and its number of users. The lower the rank value, the better.
- int Rank = It->first - It->second->getNumUsers();
- for (auto I = It, E = ReadyQueue.end(); I != E; ++I) {
- int CurrentRank = I->first - I->second->getNumUsers();
- if (CurrentRank < Rank) {
- const InstrDesc &D = I->second->getDesc();
- if (Resources->canBeIssued(D))
- It = I;
- }
- }
-
- // We found an instruction to issue.
- InstRef IR(It->first, It->second);
- ReadyQueue.erase(It);
- return IR;
-}
-
-void Scheduler::updatePendingQueue(SmallVectorImpl<InstRef> &Ready) {
- // Notify to instructions in the pending queue that a new cycle just
- // started.
- for (QueueEntryTy Entry : WaitQueue)
- Entry.second->cycleEvent();
- promoteToReadyQueue(Ready);
-}
-
-void Scheduler::updateIssuedQueue(SmallVectorImpl<InstRef> &Executed) {
- for (auto I = IssuedQueue.begin(), E = IssuedQueue.end(); I != E;) {
- const QueueEntryTy Entry = *I;
- Instruction *IS = Entry.second;
- IS->cycleEvent();
- if (IS->isExecuted()) {
- Executed.push_back({Entry.first, Entry.second});
- auto ToRemove = I;
- ++I;
- IssuedQueue.erase(ToRemove);
- } else {
- LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << Entry.first
- << " is still executing.\n");
- ++I;
- }
- }
-}
-
-void Scheduler::onInstructionExecuted(const InstRef &IR) {
- LSU->onInstructionExecuted(IR);
-}
-
-void Scheduler::reclaimSimulatedResources(SmallVectorImpl<ResourceRef> &Freed) {
- Resources->cycleEvent(Freed);
-}
-
-bool Scheduler::reserveResources(InstRef &IR) {
- // If necessary, reserve queue entries in the load-store unit (LSU).
- const bool Reserved = LSU->reserve(IR);
- if (!IR.getInstruction()->isReady() || (Reserved && !LSU->isReady(IR))) {
- LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the Wait Queue\n");
- WaitQueue[IR.getSourceIndex()] = IR.getInstruction();
- return false;
- }
- return true;
-}
-
-bool Scheduler::issueImmediately(InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
- LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
- << " to the Ready Queue\n");
- ReadyQueue[IR.getSourceIndex()] = IR.getInstruction();
- return false;
- }
- return true;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/Scheduler.h b/contrib/llvm/tools/llvm-mca/Scheduler.h
deleted file mode 100644
index 428fbc01707d..000000000000
--- a/contrib/llvm/tools/llvm-mca/Scheduler.h
+++ /dev/null
@@ -1,515 +0,0 @@
-//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A scheduler for Processor Resource Units and Processor Resource Groups.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
-#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
-
-#include "HWEventListener.h"
-#include "HardwareUnit.h"
-#include "Instruction.h"
-#include "LSUnit.h"
-#include "RetireControlUnit.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include <map>
-
-namespace mca {
-
-/// Used to notify the internal state of a processor resource.
-///
-/// A processor resource is available if it is not reserved, and there are
-/// available slots in the buffer. A processor resource is unavailable if it
-/// is either reserved, or the associated buffer is full. A processor resource
-/// with a buffer size of -1 is always available if it is not reserved.
-///
-/// Values of type ResourceStateEvent are returned by method
-/// ResourceState::isBufferAvailable(), which is used to query the internal
-/// state of a resource.
-///
-/// The naming convention for resource state events is:
-/// * Event names start with prefix RS_
-/// * Prefix RS_ is followed by a string describing the actual resource state.
-enum ResourceStateEvent {
- RS_BUFFER_AVAILABLE,
- RS_BUFFER_UNAVAILABLE,
- RS_RESERVED
-};
-
-/// A descriptor for processor resources.
-///
-/// Each object of class ResourceState is associated to a specific processor
-/// resource. There is an instance of this class for every processor resource
-/// defined by the scheduling model.
-/// A ResourceState dynamically tracks the availability of units of a processor
-/// resource. For example, the ResourceState of a ProcResGroup tracks the
-/// availability of resource units which are part of the group.
-///
-/// Internally, ResourceState uses a round-robin selector to identify
-/// which unit of the group shall be used next.
-class ResourceState {
- // Index to the MCProcResourceDesc in the processor Model.
- unsigned ProcResourceDescIndex;
- // A resource mask. This is generated by the tool with the help of
- // function `mca::createProcResourceMasks' (see Support.h).
- uint64_t ResourceMask;
-
- // A ProcResource can specify a number of units. For the purpose of dynamic
- // scheduling, a processor resource with more than one unit behaves like a
- // group. This field has one bit set for every unit/resource that is part of
- // the group.
- // For groups, this field defaults to 'ResourceMask'. For non-group
- // resources, the number of bits set in this mask is equivalent to the
- // number of units (i.e. field 'NumUnits' in 'ProcResourceUnits').
- uint64_t ResourceSizeMask;
-
- // A simple round-robin selector for processor resources.
- // Each bit of the mask identifies a sub resource within this group.
- //
- // As an example, lets assume that this ResourceState describes a
- // processor resource group composed of the following three units:
- // ResourceA -- 0b001
- // ResourceB -- 0b010
- // ResourceC -- 0b100
- //
- // Each unit is identified by a ResourceMask which always contains a
- // single bit set. Field NextInSequenceMask is initially set to value
- // 0xb111. That value is obtained by OR'ing the resource masks of
- // processor resource that are part of the group.
- //
- // NextInSequenceMask -- 0b111
- //
- // Field NextInSequenceMask is used by the resource manager (i.e.
- // an object of class ResourceManager) to select the "next available resource"
- // from the set. The algorithm would prioritize resources with a bigger
- // ResourceMask value.
- //
- // In this example, there are three resources in the set, and 'ResourceC'
- // has the highest mask value. The round-robin selector would firstly select
- // 'ResourceC', then 'ResourceB', and eventually 'ResourceA'.
- //
- // When a resource R is used, its corresponding bit is cleared from the set.
- //
- // Back to the example:
- // If 'ResourceC' is selected, then the new value of NextInSequenceMask
- // becomes 0xb011.
- //
- // When NextInSequenceMask becomes zero, it is reset to its original value
- // (in this example, that value would be 0b111).
- uint64_t NextInSequenceMask;
-
- // Some instructions can only be issued on very specific pipeline resources.
- // For those instructions, we know exactly which resource would be consumed
- // without having to dynamically select it using field 'NextInSequenceMask'.
- //
- // The resource mask bit associated to the (statically) selected
- // processor resource is still cleared from the 'NextInSequenceMask'.
- // If that bit was already zero in NextInSequenceMask, then we update
- // mask 'RemovedFromNextInSequence'.
- //
- // When NextInSequenceMask is reset back to its initial value, the algorithm
- // removes any bits which are set in RemoveFromNextInSequence.
- uint64_t RemovedFromNextInSequence;
-
- // A mask of ready units.
- uint64_t ReadyMask;
-
- // Buffered resources will have this field set to a positive number bigger
- // than 0. A buffered resource behaves like a separate reservation station
- // implementing its own buffer for out-of-order execution.
- // A buffer of 1 is for units that force in-order execution.
- // A value of 0 is treated specially. In particular, a resource with
- // A BufferSize = 0 is for an in-order issue/dispatch resource.
- // That means, this resource is reserved starting from the dispatch event,
- // until all the "resource cycles" are consumed after the issue event.
- // While this resource is reserved, no other instruction may be dispatched.
- int BufferSize;
-
- // Available slots in the buffer (zero, if this is not a buffered resource).
- unsigned AvailableSlots;
-
- // True if this is resource is currently unavailable.
- // An instruction may "reserve" a resource for a number of cycles.
- // During those cycles, the reserved resource cannot be used for other
- // instructions, even if the ReadyMask is set.
- bool Unavailable;
-
- bool isSubResourceReady(uint64_t ID) const { return ReadyMask & ID; }
-
- /// Returns the mask identifier of the next available resource in the set.
- uint64_t getNextInSequence() const {
- assert(NextInSequenceMask);
- return llvm::PowerOf2Floor(NextInSequenceMask);
- }
-
- /// Returns the mask of the next available resource within the set,
- /// and updates the resource selector.
- void updateNextInSequence() {
- NextInSequenceMask ^= getNextInSequence();
- if (!NextInSequenceMask)
- NextInSequenceMask = ResourceSizeMask;
- }
-
- uint64_t computeResourceSizeMaskForGroup(uint64_t ResourceMask) {
- assert(llvm::countPopulation(ResourceMask) > 1);
- return ResourceMask ^ llvm::PowerOf2Floor(ResourceMask);
- }
-
-public:
- ResourceState(const llvm::MCProcResourceDesc &Desc, unsigned Index,
- uint64_t Mask)
- : ProcResourceDescIndex(Index), ResourceMask(Mask) {
- bool IsAGroup = llvm::countPopulation(ResourceMask) > 1;
- ResourceSizeMask = IsAGroup ? computeResourceSizeMaskForGroup(ResourceMask)
- : ((1ULL << Desc.NumUnits) - 1);
- NextInSequenceMask = ResourceSizeMask;
- RemovedFromNextInSequence = 0;
- ReadyMask = ResourceSizeMask;
- BufferSize = Desc.BufferSize;
- AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
- Unavailable = false;
- }
-
- unsigned getProcResourceID() const { return ProcResourceDescIndex; }
- uint64_t getResourceMask() const { return ResourceMask; }
- int getBufferSize() const { return BufferSize; }
-
- bool isBuffered() const { return BufferSize > 0; }
- bool isInOrder() const { return BufferSize == 1; }
- bool isADispatchHazard() const { return BufferSize == 0; }
- bool isReserved() const { return Unavailable; }
-
- void setReserved() { Unavailable = true; }
- void clearReserved() { Unavailable = false; }
-
- // A resource is ready if it is not reserved, and if there are enough
- // available units.
- // If a resource is also a dispatch hazard, then we don't check if
- // it is reserved because that check would always return true.
- // A resource marked as "dispatch hazard" is always reserved at
- // dispatch time. When this method is called, the assumption is that
- // the user of this resource has been already dispatched.
- bool isReady(unsigned NumUnits = 1) const {
- return (!isReserved() || isADispatchHazard()) &&
- llvm::countPopulation(ReadyMask) >= NumUnits;
- }
- bool isAResourceGroup() const {
- return llvm::countPopulation(ResourceMask) > 1;
- }
-
- bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
-
- void markSubResourceAsUsed(uint64_t ID) {
- assert(isSubResourceReady(ID));
- ReadyMask ^= ID;
- }
-
- void releaseSubResource(uint64_t ID) {
- assert(!isSubResourceReady(ID));
- ReadyMask ^= ID;
- }
-
- unsigned getNumUnits() const {
- return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask);
- }
-
- uint64_t selectNextInSequence();
- void removeFromNextInSequence(uint64_t ID);
-
- ResourceStateEvent isBufferAvailable() const {
- if (isADispatchHazard() && isReserved())
- return RS_RESERVED;
- if (!isBuffered() || AvailableSlots)
- return RS_BUFFER_AVAILABLE;
- return RS_BUFFER_UNAVAILABLE;
- }
-
- void reserveBuffer() {
- if (AvailableSlots)
- AvailableSlots--;
- }
-
- void releaseBuffer() {
- if (BufferSize > 0)
- AvailableSlots++;
- assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
- }
-
-#ifndef NDEBUG
- void dump() const;
-#endif
-};
-
-/// A resource unit identifier.
-///
-/// This is used to identify a specific processor resource unit using a pair
-/// of indices where the 'first' index is a processor resource mask, and the
-/// 'second' index is an index for a "sub-resource" (i.e. unit).
-typedef std::pair<uint64_t, uint64_t> ResourceRef;
-
-// First: a MCProcResourceDesc index identifying a buffered resource.
-// Second: max number of buffer entries used in this resource.
-typedef std::pair<unsigned, unsigned> BufferUsageEntry;
-
-/// A resource manager for processor resource units and groups.
-///
-/// This class owns all the ResourceState objects, and it is responsible for
-/// acting on requests from a Scheduler by updating the internal state of
-/// ResourceState objects.
-/// This class doesn't know about instruction itineraries and functional units.
-/// In future, it can be extended to support itineraries too through the same
-/// public interface.
-class ResourceManager {
- // The resource manager owns all the ResourceState.
- using UniqueResourceState = std::unique_ptr<ResourceState>;
- llvm::SmallDenseMap<uint64_t, UniqueResourceState> Resources;
-
- // Keeps track of which resources are busy, and how many cycles are left
- // before those become usable again.
- llvm::SmallDenseMap<ResourceRef, unsigned> BusyResources;
-
- // A table to map processor resource IDs to processor resource masks.
- llvm::SmallVector<uint64_t, 8> ProcResID2Mask;
-
- // Adds a new resource state in Resources, as well as a new descriptor in
- // ResourceDescriptor.
- void addResource(const llvm::MCProcResourceDesc &Desc, unsigned Index,
- uint64_t Mask);
-
- // Populate resource descriptors.
- void initialize(const llvm::MCSchedModel &SM);
-
- // Returns the actual resource unit that will be used.
- ResourceRef selectPipe(uint64_t ResourceID);
-
- void use(ResourceRef RR);
- void release(ResourceRef RR);
-
- unsigned getNumUnits(uint64_t ResourceID) const {
- assert(Resources.find(ResourceID) != Resources.end());
- return Resources.find(ResourceID)->getSecond()->getNumUnits();
- }
-
- // Reserve a specific Resource kind.
- void reserveBuffer(uint64_t ResourceID) {
- assert(isBufferAvailable(ResourceID) ==
- ResourceStateEvent::RS_BUFFER_AVAILABLE);
- ResourceState &Resource = *Resources[ResourceID];
- Resource.reserveBuffer();
- }
-
- void releaseBuffer(uint64_t ResourceID) {
- Resources[ResourceID]->releaseBuffer();
- }
-
- ResourceStateEvent isBufferAvailable(uint64_t ResourceID) const {
- const ResourceState &Resource = *Resources.find(ResourceID)->second;
- return Resource.isBufferAvailable();
- }
-
- bool isReady(uint64_t ResourceID, unsigned NumUnits) const {
- const ResourceState &Resource = *Resources.find(ResourceID)->second;
- return Resource.isReady(NumUnits);
- }
-
-public:
- ResourceManager(const llvm::MCSchedModel &SM)
- : ProcResID2Mask(SM.getNumProcResourceKinds()) {
- initialize(SM);
- }
-
- // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
- // there are enough available slots in the buffers.
- ResourceStateEvent canBeDispatched(llvm::ArrayRef<uint64_t> Buffers) const;
-
- // Return the processor resource identifier associated to this Mask.
- unsigned resolveResourceMask(uint64_t Mask) const {
- return Resources.find(Mask)->second->getProcResourceID();
- }
-
- // Consume a slot in every buffered resource from array 'Buffers'. Resource
- // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
- void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
- // Release buffer entries previously allocated by method reserveBuffers.
- void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
- void reserveResource(uint64_t ResourceID) {
- ResourceState &Resource = *Resources[ResourceID];
- assert(!Resource.isReserved());
- Resource.setReserved();
- }
-
- void releaseResource(uint64_t ResourceID) {
- ResourceState &Resource = *Resources[ResourceID];
- Resource.clearReserved();
- }
-
- // Returns true if all resources are in-order, and there is at least one
- // resource which is a dispatch hazard (BufferSize = 0).
- bool mustIssueImmediately(const InstrDesc &Desc);
-
- bool canBeIssued(const InstrDesc &Desc) const;
-
- void issueInstruction(
- const InstrDesc &Desc,
- llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
-
- void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);
-
-#ifndef NDEBUG
- void dump() const {
- for (const std::pair<uint64_t, UniqueResourceState> &Resource : Resources)
- Resource.second->dump();
- }
-#endif
-}; // namespace mca
-
-/// Class Scheduler is responsible for issuing instructions to pipeline
-/// resources. Internally, it delegates to a ResourceManager the management of
-/// processor resources.
-/// This class is also responsible for tracking the progress of instructions
-/// from the dispatch stage, until the write-back stage.
-///
-/// An nstruction dispatched to the Scheduler is initially placed into either
-/// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the
-/// input operands. Instructions in the WaitQueue are ordered by instruction
-/// index. An instruction is moved from the WaitQueue to the ReadyQueue when
-/// register operands become available, and all memory dependencies are met.
-/// Instructions that are moved from the WaitQueue to the ReadyQueue transition
-/// from state 'IS_AVAILABLE' to state 'IS_READY'.
-///
-/// At the beginning of each cycle, the Scheduler checks if there are
-/// instructions in the WaitQueue that can be moved to the ReadyQueue. If the
-/// ReadyQueue is not empty, then older instructions from the queue are issued
-/// to the processor pipelines, and the underlying ResourceManager is updated
-/// accordingly. The ReadyQueue is ordered by instruction index to guarantee
-/// that the first instructions in the set are also the oldest.
-///
-/// An Instruction is moved from the ReadyQueue the `IssuedQueue` when it is
-/// issued to a (one or more) pipeline(s). This event also causes an instruction
-/// state transition (i.e. from state IS_READY, to state IS_EXECUTING).
-/// An Instruction leaves the IssuedQueue when it reaches the write-back stage.
-class Scheduler : public HardwareUnit {
- const llvm::MCSchedModel &SM;
-
- // Hardware resources that are managed by this scheduler.
- std::unique_ptr<ResourceManager> Resources;
- std::unique_ptr<LSUnit> LSU;
-
- using QueueEntryTy = std::pair<unsigned, Instruction *>;
- std::map<unsigned, Instruction *> WaitQueue;
- std::map<unsigned, Instruction *> ReadyQueue;
- std::map<unsigned, Instruction *> IssuedQueue;
-
- /// Issue an instruction without updating the ready queue.
- void issueInstructionImpl(
- InstRef &IR,
- llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
-
-public:
- Scheduler(const llvm::MCSchedModel &Model, unsigned LoadQueueSize,
- unsigned StoreQueueSize, bool AssumeNoAlias)
- : SM(Model), Resources(llvm::make_unique<ResourceManager>(SM)),
- LSU(llvm::make_unique<LSUnit>(LoadQueueSize, StoreQueueSize,
- AssumeNoAlias)) {}
-
- /// Check if the instruction in 'IR' can be dispatched.
- ///
- /// The DispatchStage is responsible for querying the Scheduler before
- /// dispatching new instructions. This routine is used for performing such
- /// a query. If the instruction 'IR' can be dispatched, then true is
- /// returned, otherwise false is returned with Event set to the stall type.
- bool canBeDispatched(const InstRef &IR,
- HWStallEvent::GenericEventType &Event) const;
-
- /// Returns true if there is availibility for IR in the LSU.
- bool isReady(const InstRef &IR) const { return LSU->isReady(IR); }
-
- /// Issue an instruction. The Used container is populated with
- /// the resource objects consumed on behalf of issuing this instruction.
- void
- issueInstruction(InstRef &IR,
- llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Used);
-
- /// This routine will attempt to issue an instruction immediately (for
- /// zero-latency instructions).
- ///
- /// Returns true if the instruction is issued immediately. If this does not
- /// occur, then the instruction will be added to the Scheduler's ReadyQueue.
- bool issueImmediately(InstRef &IR);
-
- /// Reserve one entry in each buffered resource.
- void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers) {
- Resources->reserveBuffers(Buffers);
- }
-
- /// Release buffer entries previously allocated by method reserveBuffers.
- void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers) {
- Resources->releaseBuffers(Buffers);
- }
-
- /// Update the resources managed by the scheduler.
- /// This routine is to be called at the start of a new cycle, and is
- /// responsible for updating scheduler resources. Resources are released
- /// once they have been fully consumed.
- void reclaimSimulatedResources(llvm::SmallVectorImpl<ResourceRef> &Freed);
-
- /// Move instructions from the WaitQueue to the ReadyQueue if input operands
- /// are all available.
- void promoteToReadyQueue(llvm::SmallVectorImpl<InstRef> &Ready);
-
- /// Update the ready queue.
- void updatePendingQueue(llvm::SmallVectorImpl<InstRef> &Ready);
-
- /// Update the issued queue.
- void updateIssuedQueue(llvm::SmallVectorImpl<InstRef> &Executed);
-
- /// Updates the Scheduler's resources to reflect that an instruction has just
- /// been executed.
- void onInstructionExecuted(const InstRef &IR);
-
- /// Obtain the processor's resource identifier for the given
- /// resource mask.
- unsigned getResourceID(uint64_t Mask) {
- return Resources->resolveResourceMask(Mask);
- }
-
- /// Reserve resources necessary to issue the instruction.
- /// Returns true if the resources are ready and the (LSU) can
- /// execute the given instruction immediately.
- bool reserveResources(InstRef &IR);
-
- /// Select the next instruction to issue from the ReadyQueue.
- /// This method gives priority to older instructions.
- InstRef select();
-
-#ifndef NDEBUG
- // Update the ready queues.
- void dump() const;
-
- // This routine performs a sanity check. This routine should only be called
- // when we know that 'IR' is not in the scheduler's instruction queues.
- void sanityCheck(const InstRef &IR) const {
- const unsigned Idx = IR.getSourceIndex();
- assert(WaitQueue.find(Idx) == WaitQueue.end());
- assert(ReadyQueue.find(Idx) == ReadyQueue.end());
- assert(IssuedQueue.find(Idx) == IssuedQueue.end());
- }
-#endif // !NDEBUG
-};
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
diff --git a/contrib/llvm/tools/llvm-mca/SchedulerStatistics.cpp b/contrib/llvm/tools/llvm-mca/SchedulerStatistics.cpp
deleted file mode 100644
index 5c6d22a71812..000000000000
--- a/contrib/llvm/tools/llvm-mca/SchedulerStatistics.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the SchedulerStatistics interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "SchedulerStatistics.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
- if (Event.Type == HWInstructionEvent::Issued)
- ++NumIssued;
-}
-
-void SchedulerStatistics::onReservedBuffers(ArrayRef<unsigned> Buffers) {
- for (const unsigned Buffer : Buffers) {
- if (BufferedResources.find(Buffer) != BufferedResources.end()) {
- BufferUsage &BU = BufferedResources[Buffer];
- BU.SlotsInUse++;
- BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
- continue;
- }
-
- BufferedResources.insert(
- std::pair<unsigned, BufferUsage>(Buffer, {1U, 1U}));
- }
-}
-
-void SchedulerStatistics::onReleasedBuffers(ArrayRef<unsigned> Buffers) {
- for (const unsigned Buffer : Buffers) {
- assert(BufferedResources.find(Buffer) != BufferedResources.end() &&
- "Buffered resource not in map?");
- BufferUsage &BU = BufferedResources[Buffer];
- BU.SlotsInUse--;
- }
-}
-
-void SchedulerStatistics::printSchedulerStatistics(
- llvm::raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nSchedulers - number of cycles where we saw N instructions "
- "issued:\n";
- TempStream << "[# issued], [# cycles]\n";
- for (const std::pair<unsigned, unsigned> &Entry : IssuedPerCycle) {
- TempStream << " " << Entry.first << ", " << Entry.second << " ("
- << format("%.1f", ((double)Entry.second / NumCycles) * 100)
- << "%)\n";
- }
-
- TempStream.flush();
- OS << Buffer;
-}
-
-void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nScheduler's queue usage:\n";
- // Early exit if no buffered resources were consumed.
- if (BufferedResources.empty()) {
- TempStream << "No scheduler resources used.\n";
- TempStream.flush();
- OS << Buffer;
- return;
- }
-
- for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
- const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
- if (ProcResource.BufferSize <= 0)
- continue;
-
- const auto It = BufferedResources.find(I);
- unsigned MaxUsedSlots =
- It == BufferedResources.end() ? 0 : It->second.MaxUsedSlots;
- TempStream << ProcResource.Name << ", " << MaxUsedSlots << '/'
- << ProcResource.BufferSize << '\n';
- }
-
- TempStream.flush();
- OS << Buffer;
-}
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/SourceMgr.h b/contrib/llvm/tools/llvm-mca/SourceMgr.h
deleted file mode 100644
index 15a85a69569f..000000000000
--- a/contrib/llvm/tools/llvm-mca/SourceMgr.h
+++ /dev/null
@@ -1,63 +0,0 @@
-//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements class SourceMgr. Class SourceMgr abstracts the input
-/// code sequence (a sequence of MCInst), and assings unique identifiers to
-/// every instruction in the sequence.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H
-#define LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H
-
-#include "llvm/MC/MCInst.h"
-#include <vector>
-
-namespace mca {
-
-typedef std::pair<unsigned, const llvm::MCInst *> SourceRef;
-
-class SourceMgr {
- using InstVec = std::vector<std::unique_ptr<const llvm::MCInst>>;
- const InstVec &Sequence;
- unsigned Current;
- unsigned Iterations;
- static const unsigned DefaultIterations = 100;
-
-public:
- SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations)
- : Sequence(MCInstSequence), Current(0),
- Iterations(NumIterations ? NumIterations : DefaultIterations) {}
-
- unsigned getCurrentIteration() const { return Current / Sequence.size(); }
- unsigned getNumIterations() const { return Iterations; }
- unsigned size() const { return Sequence.size(); }
- const InstVec &getSequence() const { return Sequence; }
-
- bool hasNext() const { return Current < (Iterations * size()); }
- void updateNext() { Current++; }
-
- const SourceRef peekNext() const {
- unsigned Index = getCurrentInstructionIndex();
- return SourceRef(Current, Sequence[Index].get());
- }
-
- unsigned getCurrentInstructionIndex() const {
- return Current % Sequence.size();
- }
-
- const llvm::MCInst &getMCInstFromIndex(unsigned Index) const {
- return *Sequence[Index % size()];
- }
-
- bool isEmpty() const { return size() == 0; }
-};
-} // namespace mca
-
-#endif
diff --git a/contrib/llvm/tools/llvm-mca/Support.h b/contrib/llvm/tools/llvm-mca/Support.h
deleted file mode 100644
index fd8d8b5a23b3..000000000000
--- a/contrib/llvm/tools/llvm-mca/Support.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===--------------------- Support.h ----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// Helper functions used by various pipeline components.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H
-#define LLVM_TOOLS_LLVM_MCA_SUPPORT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCSchedule.h"
-
-namespace mca {
-
-/// Populates vector Masks with processor resource masks.
-///
-/// The number of bits set in a mask depends on the processor resource type.
-/// Each processor resource mask has at least one bit set. For groups, the
-/// number of bits set in the mask is equal to the cardinality of the group plus
-/// one. Excluding the most significant bit, the remaining bits in the mask
-/// identify processor resources that are part of the group.
-///
-/// Example:
-///
-/// ResourceA -- Mask: 0b001
-/// ResourceB -- Mask: 0b010
-/// ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
-///
-/// ResourceAB is a processor resource group containing ResourceA and ResourceB.
-/// Each resource mask uniquely identifies a resource; both ResourceA and
-/// ResourceB only have one bit set.
-/// ResourceAB is a group; excluding the most significant bit in the mask, the
-/// remaining bits identify the composition of the group.
-///
-/// Resource masks are used by the ResourceManager to solve set membership
-/// problems with simple bit manipulation operations.
-void computeProcResourceMasks(const llvm::MCSchedModel &SM,
- llvm::SmallVectorImpl<uint64_t> &Masks);
-
-/// Compute the reciprocal block throughput from a set of processor resource
-/// cycles. The reciprocal block throughput is computed as the MAX between:
-/// - NumMicroOps / DispatchWidth
-/// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource).
-double computeBlockRThroughput(const llvm::MCSchedModel &SM,
- unsigned DispatchWidth, unsigned NumMicroOps,
- llvm::ArrayRef<unsigned> ProcResourceUsage);
-} // namespace mca
-
-#endif
diff --git a/contrib/llvm/tools/llvm-mca/TimelineView.cpp b/contrib/llvm/tools/llvm-mca/TimelineView.cpp
deleted file mode 100644
index 6e75cac0d432..000000000000
--- a/contrib/llvm/tools/llvm-mca/TimelineView.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \brief
-///
-/// This file implements the TimelineView interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "TimelineView.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void TimelineView::initialize(unsigned MaxIterations) {
- unsigned NumInstructions =
- AsmSequence.getNumIterations() * AsmSequence.size();
- if (!MaxIterations)
- MaxIterations = DEFAULT_ITERATIONS;
- unsigned NumEntries =
- std::min(NumInstructions, MaxIterations * AsmSequence.size());
- Timeline.resize(NumEntries);
- TimelineViewEntry NullTVEntry = {0, 0, 0, 0, 0};
- std::fill(Timeline.begin(), Timeline.end(), NullTVEntry);
-
- WaitTime.resize(AsmSequence.size());
- WaitTimeEntry NullWTEntry = {0, 0, 0, 0};
- std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
-}
-
-void TimelineView::onEvent(const HWInstructionEvent &Event) {
- const unsigned Index = Event.IR.getSourceIndex();
- if (CurrentCycle >= MaxCycle || Index >= Timeline.size())
- return;
- switch (Event.Type) {
- case HWInstructionEvent::Retired: {
- TimelineViewEntry &TVEntry = Timeline[Index];
- TVEntry.CycleRetired = CurrentCycle;
-
- // Update the WaitTime entry which corresponds to this Index.
- WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()];
- WTEntry.Executions++;
- WTEntry.CyclesSpentInSchedulerQueue +=
- TVEntry.CycleIssued - TVEntry.CycleDispatched;
- assert(TVEntry.CycleDispatched <= TVEntry.CycleReady);
- WTEntry.CyclesSpentInSQWhileReady +=
- TVEntry.CycleIssued - TVEntry.CycleReady;
- WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
- (TVEntry.CycleRetired - 1) - TVEntry.CycleExecuted;
- break;
- }
- case HWInstructionEvent::Ready:
- Timeline[Index].CycleReady = CurrentCycle;
- break;
- case HWInstructionEvent::Issued:
- Timeline[Index].CycleIssued = CurrentCycle;
- break;
- case HWInstructionEvent::Executed:
- Timeline[Index].CycleExecuted = CurrentCycle;
- break;
- case HWInstructionEvent::Dispatched:
- Timeline[Index].CycleDispatched = CurrentCycle;
- break;
- default:
- return;
- }
- LastCycle = std::max(LastCycle, CurrentCycle);
-}
-
-void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
- const WaitTimeEntry &Entry,
- unsigned SourceIndex) const {
- OS << SourceIndex << '.';
- OS.PadToColumn(7);
-
- if (Entry.Executions == 0) {
- OS << "- - - - ";
- } else {
- double AverageTime1, AverageTime2, AverageTime3;
- unsigned Executions = Entry.Executions;
- AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
- AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
- AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
-
- OS << Executions;
- OS.PadToColumn(13);
-
- OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
- OS.PadToColumn(20);
- OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
- OS.PadToColumn(27);
- OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
- OS.PadToColumn(34);
- }
-}
-
-void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
- if (WaitTime.empty())
- return;
-
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- formatted_raw_ostream FOS(TempStream);
-
- FOS << "\n\nAverage Wait times (based on the timeline view):\n"
- << "[0]: Executions\n"
- << "[1]: Average time spent waiting in a scheduler's queue\n"
- << "[2]: Average time spent waiting in a scheduler's queue while ready\n"
- << "[3]: Average time elapsed from WB until retire stage\n\n";
- FOS << " [0] [1] [2] [3]\n";
-
- // Use a different string stream for the instruction.
- std::string Instruction;
- raw_string_ostream InstrStream(Instruction);
-
- for (unsigned I = 0, E = WaitTime.size(); I < E; ++I) {
- printWaitTimeEntry(FOS, WaitTime[I], I);
- // Append the instruction info at the end of the line.
- const MCInst &Inst = AsmSequence.getMCInstFromIndex(I);
-
- MCIP.printInst(&Inst, InstrStream, "", STI);
- InstrStream.flush();
-
- // Consume any tabs or spaces at the beginning of the string.
- StringRef Str(Instruction);
- Str = Str.ltrim();
- FOS << " " << Str << '\n';
- FOS.flush();
- Instruction = "";
-
- OS << Buffer;
- Buffer = "";
- }
-}
-
-void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
- const TimelineViewEntry &Entry,
- unsigned Iteration,
- unsigned SourceIndex) const {
- if (Iteration == 0 && SourceIndex == 0)
- OS << '\n';
- OS << '[' << Iteration << ',' << SourceIndex << ']';
- OS.PadToColumn(10);
- for (unsigned I = 0, E = Entry.CycleDispatched; I < E; ++I)
- OS << ((I % 5 == 0) ? '.' : ' ');
- OS << TimelineView::DisplayChar::Dispatched;
- if (Entry.CycleDispatched != Entry.CycleExecuted) {
- // Zero latency instructions have the same value for CycleDispatched,
- // CycleIssued and CycleExecuted.
- for (unsigned I = Entry.CycleDispatched + 1, E = Entry.CycleIssued; I < E;
- ++I)
- OS << TimelineView::DisplayChar::Waiting;
- if (Entry.CycleIssued == Entry.CycleExecuted)
- OS << TimelineView::DisplayChar::DisplayChar::Executed;
- else {
- if (Entry.CycleDispatched != Entry.CycleIssued)
- OS << TimelineView::DisplayChar::Executing;
- for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
- ++I)
- OS << TimelineView::DisplayChar::Executing;
- OS << TimelineView::DisplayChar::Executed;
- }
- }
-
- for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
- OS << TimelineView::DisplayChar::RetireLag;
- OS << TimelineView::DisplayChar::Retired;
-
- // Skip other columns.
- for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
- OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
-}
-
-static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
- OS << "\n\nTimeline view:\n";
- if (Cycles >= 10) {
- OS.PadToColumn(10);
- for (unsigned I = 0; I <= Cycles; ++I) {
- if (((I / 10) & 1) == 0)
- OS << ' ';
- else
- OS << I % 10;
- }
- OS << '\n';
- }
-
- OS << "Index";
- OS.PadToColumn(10);
- for (unsigned I = 0; I <= Cycles; ++I) {
- if (((I / 10) & 1) == 0)
- OS << I % 10;
- else
- OS << ' ';
- }
- OS << '\n';
-}
-
-void TimelineView::printTimeline(raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream StringStream(Buffer);
- formatted_raw_ostream FOS(StringStream);
-
- printTimelineHeader(FOS, LastCycle);
- FOS.flush();
- OS << Buffer;
-
- // Use a different string stream for the instruction.
- std::string Instruction;
- raw_string_ostream InstrStream(Instruction);
-
- for (unsigned I = 0, E = Timeline.size(); I < E; ++I) {
- Buffer = "";
- const TimelineViewEntry &Entry = Timeline[I];
- if (Entry.CycleRetired == 0)
- return;
-
- unsigned Iteration = I / AsmSequence.size();
- unsigned SourceIndex = I % AsmSequence.size();
- printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
- // Append the instruction info at the end of the line.
- const MCInst &Inst = AsmSequence.getMCInstFromIndex(I);
- MCIP.printInst(&Inst, InstrStream, "", STI);
- InstrStream.flush();
-
- // Consume any tabs or spaces at the beginning of the string.
- StringRef Str(Instruction);
- Str = Str.ltrim();
- FOS << " " << Str << '\n';
- FOS.flush();
- Instruction = "";
- OS << Buffer;
- }
-}
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
new file mode 100644
index 000000000000..2562c82407bf
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -0,0 +1,86 @@
+//===--------------------- DispatchStatistics.cpp ---------------------*- C++
+//-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the DispatchStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/DispatchStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+void DispatchStatistics::onEvent(const HWStallEvent &Event) {
+ if (Event.Type < HWStallEvent::LastGenericEvent)
+ HWStalls[Event.Type]++;
+}
+
+void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type != HWInstructionEvent::Dispatched)
+ return;
+
+ const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+ NumDispatched += DE.MicroOpcodes;
+}
+
+void DispatchStatistics::printDispatchHistogram(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nDispatch Logic - "
+ << "number of cycles where we saw N micro opcodes dispatched:\n";
+ TempStream << "[# dispatched], [# cycles]\n";
+ for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) {
+ double Percentage = ((double)Entry.second / NumCycles) * 100.0;
+ TempStream << " " << Entry.first << ", " << Entry.second
+ << " (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10)
+ << "%)\n";
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+static void printStalls(raw_ostream &OS, unsigned NumStalls,
+ unsigned NumCycles) {
+ if (!NumStalls) {
+ OS << NumStalls;
+ return;
+ }
+
+ double Percentage = ((double)NumStalls / NumCycles) * 100.0;
+ OS << NumStalls << " ("
+ << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)";
+}
+
+void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream SS(Buffer);
+ SS << "\n\nDynamic Dispatch Stall Cycles:\n";
+ SS << "RAT - Register unavailable: ";
+ printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles);
+ SS << "\nRCU - Retire tokens unavailable: ";
+ printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles);
+ SS << "\nSCHEDQ - Scheduler full: ";
+ printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles);
+ SS << "\nLQ - Load queue full: ";
+ printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles);
+ SS << "\nSQ - Store queue full: ";
+ printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles);
+ SS << "\nGROUP - Static restrictions on the dispatch group: ";
+ printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles);
+ SS << '\n';
+ SS.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/DispatchStatistics.h b/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.h
index 1e389d54766b..6679c81efe95 100644
--- a/contrib/llvm/tools/llvm-mca/DispatchStatistics.h
+++ b/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -24,7 +24,7 @@
/// GROUP - Static restrictions on the dispatch group: 0
///
///
-/// Dispatch Logic - number of cycles where we saw N instructions dispatched:
+/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
/// [# dispatched], [# cycles]
/// 0, 15 (11.5%)
/// 2, 4 (3.1%)
@@ -34,11 +34,12 @@
#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
-#include "View.h"
+#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include <map>
+namespace llvm {
namespace mca {
class DispatchStatistics : public View {
@@ -80,5 +81,6 @@ public:
}
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/InstructionInfoView.cpp b/contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
index 0e50a96d19c1..5016afb49e44 100644
--- a/contrib/llvm/tools/llvm-mca/InstructionInfoView.cpp
+++ b/contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -12,17 +12,15 @@
///
//===----------------------------------------------------------------------===//
-#include "InstructionInfoView.h"
+#include "Views/InstructionInfoView.h"
+namespace llvm {
namespace mca {
-using namespace llvm;
-
void InstructionInfoView::printView(raw_ostream &OS) const {
std::string Buffer;
raw_string_ostream TempStream(Buffer);
const MCSchedModel &SM = STI.getSchedModel();
- unsigned Instructions = Source.size();
std::string Instruction;
raw_string_ostream InstrStream(Instruction);
@@ -32,8 +30,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
<< "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n";
TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n";
- for (unsigned I = 0, E = Instructions; I < E; ++I) {
- const MCInst &Inst = Source.getMCInstFromIndex(I);
+ for (const MCInst &Inst : Source) {
const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
// Obtain the scheduling class information from the instruction.
@@ -89,3 +86,4 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
OS << Buffer;
}
} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/InstructionInfoView.h b/contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.h
index 0770ae3d2b57..3ef95d474490 100644
--- a/contrib/llvm/tools/llvm-mca/InstructionInfoView.h
+++ b/contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.h
@@ -35,8 +35,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
-#include "SourceMgr.h"
-#include "View.h"
+#include "Views/View.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -44,23 +45,25 @@
#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
namespace mca {
/// A view that prints out generic instruction information.
class InstructionInfoView : public View {
const llvm::MCSubtargetInfo &STI;
const llvm::MCInstrInfo &MCII;
- const SourceMgr &Source;
+ llvm::ArrayRef<llvm::MCInst> Source;
llvm::MCInstPrinter &MCIP;
public:
InstructionInfoView(const llvm::MCSubtargetInfo &sti,
- const llvm::MCInstrInfo &mcii, const SourceMgr &S,
- llvm::MCInstPrinter &IP)
+ const llvm::MCInstrInfo &mcii,
+ llvm::ArrayRef<llvm::MCInst> S, llvm::MCInstPrinter &IP)
: STI(sti), MCII(mcii), Source(S), MCIP(IP) {}
void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp
new file mode 100644
index 000000000000..06202bc41421
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp
@@ -0,0 +1,168 @@
+//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RegisterFileStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RegisterFileStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
+ : STI(sti) {
+ const MCSchedModel &SM = STI.getSchedModel();
+ RegisterFileUsage RFUEmpty = {0, 0, 0};
+ MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0};
+ if (!SM.hasExtraProcessorInfo()) {
+ // Assume a single register file.
+ PRFUsage.emplace_back(RFUEmpty);
+ MoveElimInfo.emplace_back(MEIEmpty);
+ return;
+ }
+
+ // Initialize a RegisterFileUsage for every user defined register file, plus
+ // the default register file which is always at index #0.
+ const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo();
+ // There is always an "InvalidRegisterFile" entry in tablegen. That entry can
+ // be skipped. If there are no user defined register files, then reserve a
+ // single entry for the default register file at index #0.
+ unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
+
+ PRFUsage.resize(NumRegFiles);
+ std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty);
+
+ MoveElimInfo.resize(NumRegFiles);
+ std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty);
+}
+
+void RegisterFileStatistics::updateRegisterFileUsage(
+ ArrayRef<unsigned> UsedPhysRegs) {
+ for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) {
+ RegisterFileUsage &RFU = PRFUsage[I];
+ unsigned NumUsedPhysRegs = UsedPhysRegs[I];
+ RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
+ RFU.TotalMappings += NumUsedPhysRegs;
+ RFU.MaxUsedMappings =
+ std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
+ }
+}
+
+void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) {
+ if (!Inst.isOptimizableMove())
+ return;
+
+ assert(Inst.getDefs().size() == 1 && "Expected a single definition!");
+ assert(Inst.getUses().size() == 1 && "Expected a single register use!");
+ const WriteState &WS = Inst.getDefs()[0];
+ const ReadState &RS = Inst.getUses()[0];
+
+ MoveEliminationInfo &Info =
+ MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()];
+ Info.TotalMoveEliminationCandidates++;
+ if (WS.isEliminated())
+ Info.CurrentMovesEliminated++;
+ if (WS.isWriteZero() && RS.isReadZero())
+ Info.TotalMovesThatPropagateZero++;
+}
+
+void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
+ switch (Event.Type) {
+ default:
+ break;
+ case HWInstructionEvent::Retired: {
+ const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
+ for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I)
+ PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
+ break;
+ }
+ case HWInstructionEvent::Dispatched: {
+ const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+ updateRegisterFileUsage(DE.UsedPhysRegs);
+ updateMoveElimInfo(*DE.IR.getInstruction());
+ }
+ }
+}
+
+void RegisterFileStatistics::onCycleEnd() {
+ for (MoveEliminationInfo &MEI : MoveElimInfo) {
+ unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle;
+ CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated);
+ MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated;
+ MEI.CurrentMovesEliminated = 0;
+ }
+}
+
+void RegisterFileStatistics::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+
+ TempStream << "\n\nRegister File statistics:";
+ const RegisterFileUsage &GlobalUsage = PRFUsage[0];
+ TempStream << "\nTotal number of mappings created: "
+ << GlobalUsage.TotalMappings;
+ TempStream << "\nMax number of mappings used: "
+ << GlobalUsage.MaxUsedMappings << '\n';
+
+ for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) {
+ const RegisterFileUsage &RFU = PRFUsage[I];
+ // Obtain the register file descriptor from the scheduling model.
+ assert(STI.getSchedModel().hasExtraProcessorInfo() &&
+ "Unable to find register file info!");
+ const MCExtraProcessorInfo &PI =
+ STI.getSchedModel().getExtraProcessorInfo();
+ assert(I <= PI.NumRegisterFiles && "Unexpected register file index!");
+ const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I];
+ // Skip invalid register files.
+ if (!RFDesc.NumPhysRegs)
+ continue;
+
+ TempStream << "\n* Register File #" << I;
+ TempStream << " -- " << StringRef(RFDesc.Name) << ':';
+ TempStream << "\n Number of physical registers: ";
+ if (!RFDesc.NumPhysRegs)
+ TempStream << "unbounded";
+ else
+ TempStream << RFDesc.NumPhysRegs;
+ TempStream << "\n Total number of mappings created: "
+ << RFU.TotalMappings;
+ TempStream << "\n Max number of mappings used: "
+ << RFU.MaxUsedMappings << '\n';
+ const MoveEliminationInfo &MEI = MoveElimInfo[I];
+
+ if (MEI.TotalMoveEliminationCandidates) {
+ TempStream << " Number of optimizable moves: "
+ << MEI.TotalMoveEliminationCandidates;
+ double EliminatedMovProportion = (double)MEI.TotalMovesEliminated /
+ MEI.TotalMoveEliminationCandidates *
+ 100.0;
+ double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero /
+ MEI.TotalMoveEliminationCandidates * 100.0;
+ TempStream << "\n Number of moves eliminated: "
+ << MEI.TotalMovesEliminated << " "
+ << format("(%.1f%%)",
+ floor((EliminatedMovProportion * 10) + 0.5) / 10);
+ TempStream << "\n Number of zero moves: "
+ << MEI.TotalMovesThatPropagateZero << " "
+ << format("(%.1f%%)",
+ floor((ZeroMovProportion * 10) + 0.5) / 10);
+ TempStream << "\n Max moves eliminated per cycle: "
+ << MEI.MaxMovesEliminatedPerCycle << '\n';
+ }
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.h b/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
index cbe816cd3332..a2c52a668dae 100644
--- a/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.h
+++ b/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -21,6 +21,10 @@
/// Number of physical registers: 72
/// Total number of mappings created: 0
/// Max number of mappings used: 0
+/// Number of optimizable moves: 200
+/// Number of moves eliminated: 200 (100.0%)
+/// Number of zero moves: 200 (100.0%)
+/// Max moves eliminated per cycle: 2
///
/// * Register File #2 -- IntegerPRF:
/// Number of physical registers: 64
@@ -32,10 +36,11 @@
#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
-#include "View.h"
+#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
+namespace llvm {
namespace mca {
class RegisterFileStatistics : public View {
@@ -48,20 +53,29 @@ class RegisterFileStatistics : public View {
unsigned CurrentlyUsedMappings;
};
+ struct MoveEliminationInfo {
+ unsigned TotalMoveEliminationCandidates;
+ unsigned TotalMovesEliminated;
+ unsigned TotalMovesThatPropagateZero;
+ unsigned MaxMovesEliminatedPerCycle;
+ unsigned CurrentMovesEliminated;
+ };
+
// There is one entry for each register file implemented by the processor.
- llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles;
+ llvm::SmallVector<RegisterFileUsage, 4> PRFUsage;
+ llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo;
- void initializeRegisterFileInfo();
+ void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs);
+ void updateMoveElimInfo(const Instruction &Inst);
public:
- RegisterFileStatistics(const llvm::MCSubtargetInfo &sti) : STI(sti) {
- initializeRegisterFileInfo();
- }
+ RegisterFileStatistics(const llvm::MCSubtargetInfo &sti);
+ void onCycleEnd() override;
void onEvent(const HWInstructionEvent &Event) override;
-
void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/ResourcePressureView.cpp b/contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
index fe9d5b7fabc8..6df61840437d 100644
--- a/contrib/llvm/tools/llvm-mca/ResourcePressureView.cpp
+++ b/contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -12,15 +12,17 @@
///
//===----------------------------------------------------------------------===//
-#include "ResourcePressureView.h"
+#include "Views/ResourcePressureView.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+namespace llvm {
namespace mca {
-using namespace llvm;
-
-void ResourcePressureView::initialize() {
+ResourcePressureView::ResourcePressureView(const llvm::MCSubtargetInfo &sti,
+ MCInstPrinter &Printer,
+ ArrayRef<MCInst> S)
+ : STI(sti), MCIP(Printer), Source(S), LastInstructionIdx(0) {
// Populate the map of resource descriptors.
unsigned R2VIndex = 0;
const MCSchedModel &SM = STI.getSchedModel();
@@ -41,12 +43,19 @@ void ResourcePressureView::initialize() {
}
void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ LastInstructionIdx = Event.IR.getSourceIndex();
+ return;
+ }
+
// We're only interested in Issue events.
if (Event.Type != HWInstructionEvent::Issued)
return;
+
const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event);
const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size();
- for (const std::pair<ResourceRef, double> &Use : IssueEvent.UsedResources) {
+ for (const std::pair<ResourceRef, ResourceCycles> &Use :
+ IssueEvent.UsedResources) {
const ResourceRef &RR = Use.first;
assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end());
unsigned R2VIndex = Resource2VecIndex[RR.first];
@@ -91,8 +100,7 @@ static void printResourcePressure(formatted_raw_ostream &OS, double Pressure,
OS.PadToColumn(Col);
}
-void ResourcePressureView::printResourcePressurePerIteration(
- raw_ostream &OS, unsigned Executions) const {
+void ResourcePressureView::printResourcePressurePerIter(raw_ostream &OS) const {
std::string Buffer;
raw_string_ostream TempStream(Buffer);
formatted_raw_ostream FOS(TempStream);
@@ -125,6 +133,7 @@ void ResourcePressureView::printResourcePressurePerIteration(
FOS << '\n';
FOS.flush();
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) {
double Usage = ResourceUsage[I + Source.size() * E];
printResourcePressure(FOS, Usage / Executions, (I + 1) * 7);
@@ -134,8 +143,7 @@ void ResourcePressureView::printResourcePressurePerIteration(
OS << Buffer;
}
-void ResourcePressureView::printResourcePressurePerInstruction(
- raw_ostream &OS, unsigned Executions) const {
+void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const {
std::string Buffer;
raw_string_ostream TempStream(Buffer);
formatted_raw_ostream FOS(TempStream);
@@ -147,13 +155,16 @@ void ResourcePressureView::printResourcePressurePerInstruction(
std::string Instruction;
raw_string_ostream InstrStream(Instruction);
- for (unsigned I = 0, E = Source.size(); I < E; ++I) {
+ unsigned InstrIndex = 0;
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+ for (const MCInst &MCI : Source) {
+ unsigned BaseEltIdx = InstrIndex * NumResourceUnits;
for (unsigned J = 0; J < NumResourceUnits; ++J) {
- double Usage = ResourceUsage[J + I * NumResourceUnits];
+ double Usage = ResourceUsage[J + BaseEltIdx];
printResourcePressure(FOS, Usage / Executions, (J + 1) * 7);
}
- MCIP.printInst(&Source.getMCInstFromIndex(I), InstrStream, "", STI);
+ MCIP.printInst(&MCI, InstrStream, "", STI);
InstrStream.flush();
StringRef Str(Instruction);
@@ -166,6 +177,9 @@ void ResourcePressureView::printResourcePressurePerInstruction(
FOS.flush();
OS << Buffer;
Buffer = "";
+
+ ++InstrIndex;
}
}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/ResourcePressureView.h b/contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.h
index fe1c6af5e6f6..572ce6fe6b70 100644
--- a/contrib/llvm/tools/llvm-mca/ResourcePressureView.h
+++ b/contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.h
@@ -58,13 +58,14 @@
#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
#define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
-#include "SourceMgr.h"
-#include "View.h"
+#include "Views/View.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include <map>
+namespace llvm {
namespace mca {
/// This class collects resource pressure statistics and it is able to print
@@ -72,38 +73,32 @@ namespace mca {
class ResourcePressureView : public View {
const llvm::MCSubtargetInfo &STI;
llvm::MCInstPrinter &MCIP;
- const SourceMgr &Source;
+ llvm::ArrayRef<llvm::MCInst> Source;
+ unsigned LastInstructionIdx;
// Map to quickly obtain the ResourceUsage column index from a processor
// resource ID.
llvm::DenseMap<unsigned, unsigned> Resource2VecIndex;
// Table of resources used by instructions.
- std::vector<double> ResourceUsage;
+ std::vector<ResourceCycles> ResourceUsage;
unsigned NumResourceUnits;
- const llvm::MCInst &GetMCInstFromIndex(unsigned Index) const;
- void printResourcePressurePerIteration(llvm::raw_ostream &OS,
- unsigned Executions) const;
- void printResourcePressurePerInstruction(llvm::raw_ostream &OS,
- unsigned Executions) const;
- void initialize();
+ void printResourcePressurePerIter(llvm::raw_ostream &OS) const;
+ void printResourcePressurePerInst(llvm::raw_ostream &OS) const;
public:
ResourcePressureView(const llvm::MCSubtargetInfo &sti,
- llvm::MCInstPrinter &Printer, const SourceMgr &SM)
- : STI(sti), MCIP(Printer), Source(SM) {
- initialize();
- }
+ llvm::MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S);
void onEvent(const HWInstructionEvent &Event) override;
-
void printView(llvm::raw_ostream &OS) const override {
- unsigned Executions = Source.getNumIterations();
- printResourcePressurePerIteration(OS, Executions);
- printResourcePressurePerInstruction(OS, Executions);
+ printResourcePressurePerIter(OS);
+ printResourcePressurePerInst(OS);
}
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
new file mode 100644
index 000000000000..54eb28f1add9
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
@@ -0,0 +1,91 @@
+//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RetireControlUnitStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RetireControlUnitStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+RetireControlUnitStatistics::RetireControlUnitStatistics(const MCSchedModel &SM)
+ : NumRetired(0), NumCycles(0), EntriesInUse(0), MaxUsedEntries(0),
+ SumOfUsedEntries(0) {
+ TotalROBEntries = SM.MicroOpBufferSize;
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ if (EPI.ReorderBufferSize)
+ TotalROBEntries = EPI.ReorderBufferSize;
+ }
+}
+
+void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ unsigned NumEntries =
+ static_cast<const HWInstructionDispatchedEvent &>(Event).MicroOpcodes;
+ EntriesInUse += NumEntries;
+ }
+
+ if (Event.Type == HWInstructionEvent::Retired) {
+ unsigned ReleasedEntries = Event.IR.getInstruction()->getDesc().NumMicroOps;
+ assert(EntriesInUse >= ReleasedEntries && "Invalid internal state!");
+ EntriesInUse -= ReleasedEntries;
+ ++NumRetired;
+ }
+}
+
+void RetireControlUnitStatistics::onCycleEnd() {
+ // Update histogram
+ RetiredPerCycle[NumRetired]++;
+ NumRetired = 0;
+ ++NumCycles;
+ MaxUsedEntries = std::max(MaxUsedEntries, EntriesInUse);
+ SumOfUsedEntries += EntriesInUse;
+}
+
+void RetireControlUnitStatistics::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nRetire Control Unit - "
+ << "number of cycles where we saw N instructions retired:\n";
+ TempStream << "[# retired], [# cycles]\n";
+
+ for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) {
+ TempStream << " " << Entry.first;
+ if (Entry.first < 10)
+ TempStream << ", ";
+ else
+ TempStream << ", ";
+ TempStream << Entry.second << " ("
+ << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
+ << "%)\n";
+ }
+
+ unsigned AvgUsage = (double)SumOfUsedEntries / NumCycles;
+ double MaxUsagePercentage = ((double)MaxUsedEntries / TotalROBEntries) * 100.0;
+ double NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10;
+ double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0;
+ double NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10;
+
+ TempStream << "\nTotal ROB Entries: " << TotalROBEntries
+ << "\nMax Used ROB Entries: " << MaxUsedEntries
+ << format(" ( %.1f%% )", NormalizedMaxPercentage)
+ << "\nAverage Used ROB Entries per cy: " << AvgUsage
+ << format(" ( %.1f%% )\n", NormalizedAvgPercentage);
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.h b/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
index 1f03e7efe889..02aa13bc444a 100644
--- a/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.h
+++ b/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -16,20 +16,24 @@
///
/// Retire Control Unit - number of cycles where we saw N instructions retired:
/// [# retired], [# cycles]
-/// 0, 9 (6.9%)
-/// 1, 6 (4.6%)
-/// 2, 1 (0.8%)
-/// 4, 3 (2.3%)
+/// 0, 109 (17.9%)
+/// 1, 102 (16.7%)
+/// 2, 399 (65.4%)
+///
+/// Total ROB Entries: 64
+/// Max Used ROB Entries: 35 ( 54.7% )
+/// Average Used ROB Entries per cy: 32 ( 50.0% )
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
-#include "View.h"
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "Views/View.h"
+#include "llvm/MC/MCSchedule.h"
#include <map>
+namespace llvm {
namespace mca {
class RetireControlUnitStatistics : public View {
@@ -38,23 +42,20 @@ class RetireControlUnitStatistics : public View {
unsigned NumRetired;
unsigned NumCycles;
-
- void updateHistograms() {
- RetiredPerCycle[NumRetired]++;
- NumRetired = 0;
- }
+ unsigned TotalROBEntries;
+ unsigned EntriesInUse;
+ unsigned MaxUsedEntries;
+ unsigned SumOfUsedEntries;
public:
- RetireControlUnitStatistics() : NumRetired(0), NumCycles(0) {}
+ RetireControlUnitStatistics(const MCSchedModel &SM);
void onEvent(const HWInstructionEvent &Event) override;
-
- void onCycleBegin() override { NumCycles++; }
-
- void onCycleEnd() override { updateHistograms(); }
-
+ void onCycleEnd() override;
void printView(llvm::raw_ostream &OS) const override;
};
+
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
new file mode 100644
index 000000000000..670f90127f18
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -0,0 +1,183 @@
+//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the SchedulerStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SchedulerStatistics.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+
+namespace llvm {
+namespace mca {
+
+SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
+ : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0),
+ NumCycles(0), MostRecentLoadDispatched(~0U),
+ MostRecentStoreDispatched(~0U),
+ IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0),
+ Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ LQResourceID = EPI.LoadQueueID;
+ SQResourceID = EPI.StoreQueueID;
+ }
+}
+
+// FIXME: This implementation works under the assumption that load/store queue
+// entries are reserved at 'instruction dispatched' stage, and released at
+// 'instruction executed' stage. This currently matches the behavior of LSUnit.
+//
+// The current design minimizes the number of events generated by the
+// Dispatch/Execute stages, at the cost of doing extra bookkeeping in method
+// `onEvent`. However, it introduces a subtle dependency between this view and
+// how the LSUnit works.
+//
+// In future we should add a new "memory queue" event type, so that we stop
+// making assumptions on how LSUnit internally works (See PR39828).
+void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Issued)
+ ++NumIssued;
+ else if (Event.Type == HWInstructionEvent::Dispatched) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const unsigned Index = Event.IR.getSourceIndex();
+ if (LQResourceID && Inst.getDesc().MayLoad &&
+ MostRecentLoadDispatched != Index) {
+ Usage[LQResourceID].SlotsInUse++;
+ MostRecentLoadDispatched = Index;
+ }
+ if (SQResourceID && Inst.getDesc().MayStore &&
+ MostRecentStoreDispatched != Index) {
+ Usage[SQResourceID].SlotsInUse++;
+ MostRecentStoreDispatched = Index;
+ }
+ } else if (Event.Type == HWInstructionEvent::Executed) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ if (LQResourceID && Inst.getDesc().MayLoad) {
+ assert(Usage[LQResourceID].SlotsInUse);
+ Usage[LQResourceID].SlotsInUse--;
+ }
+ if (SQResourceID && Inst.getDesc().MayStore) {
+ assert(Usage[SQResourceID].SlotsInUse);
+ Usage[SQResourceID].SlotsInUse--;
+ }
+ }
+}
+
+void SchedulerStatistics::onReservedBuffers(const InstRef & /* unused */,
+ ArrayRef<unsigned> Buffers) {
+ for (const unsigned Buffer : Buffers) {
+ if (Buffer == LQResourceID || Buffer == SQResourceID)
+ continue;
+ Usage[Buffer].SlotsInUse++;
+ }
+}
+
+void SchedulerStatistics::onReleasedBuffers(const InstRef & /* unused */,
+ ArrayRef<unsigned> Buffers) {
+ for (const unsigned Buffer : Buffers) {
+ if (Buffer == LQResourceID || Buffer == SQResourceID)
+ continue;
+ Usage[Buffer].SlotsInUse--;
+ }
+}
+
+void SchedulerStatistics::updateHistograms() {
+ for (BufferUsage &BU : Usage) {
+ BU.CumulativeNumUsedSlots += BU.SlotsInUse;
+ BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
+ }
+
+ IssuedPerCycle[NumIssued]++;
+ NumIssued = 0;
+}
+
+void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
+ OS << "\n\nSchedulers - "
+ << "number of cycles where we saw N instructions issued:\n";
+ OS << "[# issued], [# cycles]\n";
+
+ const auto It =
+ std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end());
+ unsigned Index = std::distance(IssuedPerCycle.begin(), It);
+
+ bool HasColors = OS.has_colors();
+ for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) {
+ unsigned IPC = IssuedPerCycle[I];
+ if (!IPC)
+ continue;
+
+ if (I == Index && HasColors)
+ OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+ OS << " " << I << ", " << IPC << " ("
+ << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
+ if (HasColors)
+ OS.resetColor();
+ }
+}
+
+void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
+ assert(NumCycles && "Unexpected number of cycles!");
+
+ OS << "\nScheduler's queue usage:\n";
+ if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) {
+ OS << "No scheduler resources used.\n";
+ return;
+ }
+
+ OS << "[1] Resource name.\n"
+ << "[2] Average number of used buffer entries.\n"
+ << "[3] Maximum number of used buffer entries.\n"
+ << "[4] Total number of buffer entries.\n\n"
+ << " [1] [2] [3] [4]\n";
+
+ formatted_raw_ostream FOS(OS);
+ bool HasColors = FOS.has_colors();
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ if (ProcResource.BufferSize <= 0)
+ continue;
+
+ const BufferUsage &BU = Usage[I];
+ double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles;
+ double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5;
+ unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10;
+ unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10;
+
+ FOS << ProcResource.Name;
+ FOS.PadToColumn(17);
+ if (HasColors && NormalizedAvg >= NormalizedThreshold)
+ FOS.changeColor(raw_ostream::YELLOW, true, false);
+ FOS << NormalizedAvg;
+ if (HasColors)
+ FOS.resetColor();
+ FOS.PadToColumn(28);
+ if (HasColors &&
+ BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize))
+ FOS.changeColor(raw_ostream::RED, true, false);
+ FOS << BU.MaxUsedSlots;
+ if (HasColors)
+ FOS.resetColor();
+ FOS.PadToColumn(39);
+ FOS << ProcResource.BufferSize << '\n';
+ }
+
+ FOS.flush();
+}
+
+void SchedulerStatistics::printView(raw_ostream &OS) const {
+ printSchedulerStats(OS);
+ printSchedulerUsage(OS);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/SchedulerStatistics.h b/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
index 7383c54a1615..d99a395a726d 100644
--- a/contrib/llvm/tools/llvm-mca/SchedulerStatistics.h
+++ b/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -17,75 +17,78 @@
///
/// Schedulers - number of cycles where we saw N instructions issued:
/// [# issued], [# cycles]
-/// 0, 7 (5.4%)
-/// 1, 4 (3.1%)
-/// 2, 8 (6.2%)
+/// 0, 6 (2.9%)
+/// 1, 106 (50.7%)
+/// 2, 97 (46.4%)
///
/// Scheduler's queue usage:
-/// JALU01, 0/20
-/// JFPU01, 18/18
-/// JLSAGU, 0/12
+/// [1] Resource name.
+/// [2] Average number of used buffer entries.
+/// [3] Maximum number of used buffer entries.
+/// [4] Total number of buffer entries.
///
+/// [1] [2] [3] [4]
+/// JALU01 0 0 20
+/// JFPU01 15 18 18
+/// JLSAGU 0 0 12
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
-#include "View.h"
+#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include <map>
+namespace llvm {
namespace mca {
-class SchedulerStatistics : public View {
+class SchedulerStatistics final : public View {
const llvm::MCSchedModel &SM;
-
- using Histogram = std::map<unsigned, unsigned>;
- Histogram IssuedPerCycle;
+ unsigned LQResourceID;
+ unsigned SQResourceID;
unsigned NumIssued;
unsigned NumCycles;
+ unsigned MostRecentLoadDispatched;
+ unsigned MostRecentStoreDispatched;
+
// Tracks the usage of a scheduler's queue.
struct BufferUsage {
unsigned SlotsInUse;
unsigned MaxUsedSlots;
+ uint64_t CumulativeNumUsedSlots;
};
- std::map<unsigned, BufferUsage> BufferedResources;
-
- void updateHistograms() {
- IssuedPerCycle[NumIssued]++;
- NumIssued = 0;
- }
+ std::vector<unsigned> IssuedPerCycle;
+ std::vector<BufferUsage> Usage;
- void printSchedulerStatistics(llvm::raw_ostream &OS) const;
+ void updateHistograms();
+ void printSchedulerStats(llvm::raw_ostream &OS) const;
void printSchedulerUsage(llvm::raw_ostream &OS) const;
public:
- SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
- : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0) {}
-
+ SchedulerStatistics(const llvm::MCSubtargetInfo &STI);
void onEvent(const HWInstructionEvent &Event) override;
-
void onCycleBegin() override { NumCycles++; }
-
void onCycleEnd() override { updateHistograms(); }
// Increases the number of used scheduler queue slots of every buffered
// resource in the Buffers set.
- void onReservedBuffers(llvm::ArrayRef<unsigned> Buffers) override;
+ void onReservedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
// Decreases by one the number of used scheduler queue slots of every
// buffered resource in the Buffers set.
- void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) override;
+ void onReleasedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
- void printView(llvm::raw_ostream &OS) const override {
- printSchedulerStatistics(OS);
- printSchedulerUsage(OS);
- }
+ void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/SummaryView.cpp b/contrib/llvm/tools/llvm-mca/Views/SummaryView.cpp
index 01399055c4fd..d8ac709e784d 100644
--- a/contrib/llvm/tools/llvm-mca/SummaryView.cpp
+++ b/contrib/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -13,32 +13,33 @@
///
//===----------------------------------------------------------------------===//
-#include "SummaryView.h"
-#include "Support.h"
+#include "Views/SummaryView.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/Support.h"
#include "llvm/Support/Format.h"
+namespace llvm {
namespace mca {
#define DEBUG_TYPE "llvm-mca"
-using namespace llvm;
-
-SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S,
unsigned Width)
- : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
- NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
- ProcResourceMasks(Model.getNumProcResourceKinds(), 0) {
+ : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0),
+ TotalCycles(0), NumMicroOps(0),
+ ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
+ ProcResourceMasks(Model.getNumProcResourceKinds()) {
computeProcResourceMasks(SM, ProcResourceMasks);
}
void SummaryView::onEvent(const HWInstructionEvent &Event) {
- // We are only interested in the "instruction dispatched" events generated by
- // the dispatch stage for instructions that are part of iteration #0.
- if (Event.Type != HWInstructionEvent::Dispatched)
- return;
+ if (Event.Type == HWInstructionEvent::Dispatched)
+ LastInstructionIdx = Event.IR.getSourceIndex();
- if (Event.IR.getSourceIndex() >= Source.size())
+ // We are only interested in the "instruction retired" events generated by
+ // the retire stage for instructions that are part of iteration #0.
+ if (Event.Type != HWInstructionEvent::Retired ||
+ Event.IR.getSourceIndex() >= Source.size())
return;
// Update the cumulative number of resource cycles based on the processor
@@ -60,10 +61,12 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) {
}
void SummaryView::printView(raw_ostream &OS) const {
- unsigned Iterations = Source.getNumIterations();
unsigned Instructions = Source.size();
+ unsigned Iterations = (LastInstructionIdx / Instructions) + 1;
unsigned TotalInstructions = Instructions * Iterations;
+ unsigned TotalUOps = NumMicroOps * Iterations;
double IPC = (double)TotalInstructions / TotalCycles;
+ double UOpsPerCycle = (double)TotalUOps / TotalCycles;
double BlockRThroughput = computeBlockRThroughput(
SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
@@ -72,10 +75,12 @@ void SummaryView::printView(raw_ostream &OS) const {
TempStream << "Iterations: " << Iterations;
TempStream << "\nInstructions: " << TotalInstructions;
TempStream << "\nTotal Cycles: " << TotalCycles;
+ TempStream << "\nTotal uOps: " << TotalUOps << '\n';
TempStream << "\nDispatch Width: " << DispatchWidth;
- TempStream << "\nIPC: " << format("%.2f", IPC);
-
- // Round to the block reciprocal throughput to the nearest tenth.
+ TempStream << "\nuOps Per Cycle: "
+ << format("%.2f", floor((UOpsPerCycle * 100) + 0.5) / 100);
+ TempStream << "\nIPC: "
+ << format("%.2f", floor((IPC * 100) + 0.5) / 100);
TempStream << "\nBlock RThroughput: "
<< format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
<< '\n';
@@ -83,3 +88,4 @@ void SummaryView::printView(raw_ostream &OS) const {
OS << Buffer;
}
} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/SummaryView.h b/contrib/llvm/tools/llvm-mca/Views/SummaryView.h
index b799ce3aa747..f59fd4233fbe 100644
--- a/contrib/llvm/tools/llvm-mca/SummaryView.h
+++ b/contrib/llvm/tools/llvm-mca/Views/SummaryView.h
@@ -29,19 +29,20 @@
#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
-#include "SourceMgr.h"
-#include "View.h"
+#include "Views/View.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/raw_ostream.h"
+namespace llvm {
namespace mca {
/// A view that collects and prints a few performance numbers.
class SummaryView : public View {
const llvm::MCSchedModel &SM;
- const SourceMgr &Source;
+ llvm::ArrayRef<llvm::MCInst> Source;
const unsigned DispatchWidth;
+ unsigned LastInstructionIdx;
unsigned TotalCycles;
// The total number of micro opcodes contributed by a block of instructions.
unsigned NumMicroOps;
@@ -62,15 +63,15 @@ class SummaryView : public View {
double getBlockRThroughput() const;
public:
- SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+ SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S,
unsigned Width);
void onCycleEnd() override { ++TotalCycles; }
-
void onEvent(const HWInstructionEvent &Event) override;
void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/Views/TimelineView.cpp b/contrib/llvm/tools/llvm-mca/Views/TimelineView.cpp
new file mode 100644
index 000000000000..7d55bbc99c73
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -0,0 +1,294 @@
+//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \brief
+///
+/// This file implements the TimelineView interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/TimelineView.h"
+
+namespace llvm {
+namespace mca {
+
+TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
+ unsigned Cycles)
+ : STI(sti), MCIP(Printer), Source(S), CurrentCycle(0),
+ MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0), WaitTime(S.size()),
+ UsedBuffer(S.size()) {
+ unsigned NumInstructions = Source.size();
+ assert(Iterations && "Invalid number of iterations specified!");
+ NumInstructions *= Iterations;
+ Timeline.resize(NumInstructions);
+ TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0};
+ std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);
+
+ WaitTimeEntry NullWTEntry = {0, 0, 0};
+ std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
+
+ std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0,
+ /* unknown buffer size */ -1};
+ std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry);
+}
+
+void TimelineView::onReservedBuffers(const InstRef &IR,
+ ArrayRef<unsigned> Buffers) {
+ if (IR.getSourceIndex() >= Source.size())
+ return;
+
+ const MCSchedModel &SM = STI.getSchedModel();
+ std::pair<unsigned, int> BufferInfo = {0, -1};
+ for (const unsigned Buffer : Buffers) {
+ const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
+ if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) {
+ BufferInfo.first = Buffer;
+ BufferInfo.second = MCDesc.BufferSize;
+ }
+ }
+
+ UsedBuffer[IR.getSourceIndex()] = BufferInfo;
+}
+
+void TimelineView::onEvent(const HWInstructionEvent &Event) {
+ const unsigned Index = Event.IR.getSourceIndex();
+ if (Index >= Timeline.size())
+ return;
+
+ switch (Event.Type) {
+ case HWInstructionEvent::Retired: {
+ TimelineViewEntry &TVEntry = Timeline[Index];
+ if (CurrentCycle < MaxCycle)
+ TVEntry.CycleRetired = CurrentCycle;
+
+ // Update the WaitTime entry which corresponds to this Index.
+ assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
+ unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
+ WaitTimeEntry &WTEntry = WaitTime[Index % Source.size()];
+ WTEntry.CyclesSpentInSchedulerQueue +=
+ TVEntry.CycleIssued - CycleDispatched;
+ assert(CycleDispatched <= TVEntry.CycleReady &&
+ "Instruction cannot be ready if it hasn't been dispatched yet!");
+ WTEntry.CyclesSpentInSQWhileReady +=
+ TVEntry.CycleIssued - TVEntry.CycleReady;
+ WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
+ (CurrentCycle - 1) - TVEntry.CycleExecuted;
+ break;
+ }
+ case HWInstructionEvent::Ready:
+ Timeline[Index].CycleReady = CurrentCycle;
+ break;
+ case HWInstructionEvent::Issued:
+ Timeline[Index].CycleIssued = CurrentCycle;
+ break;
+ case HWInstructionEvent::Executed:
+ Timeline[Index].CycleExecuted = CurrentCycle;
+ break;
+ case HWInstructionEvent::Dispatched:
+ // There may be multiple dispatch events. Microcoded instructions that are
+ // expanded into multiple uOps may require multiple dispatch cycles. Here,
+ // we want to capture the first dispatch cycle.
+ if (Timeline[Index].CycleDispatched == -1)
+ Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
+ break;
+ default:
+ return;
+ }
+ if (CurrentCycle < MaxCycle)
+ LastCycle = std::max(LastCycle, CurrentCycle);
+}
+
+static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
+ unsigned Executions, int BufferSize) {
+ if (CumulativeCycles && BufferSize < 0)
+ return raw_ostream::MAGENTA;
+ unsigned Size = static_cast<unsigned>(BufferSize);
+ if (CumulativeCycles >= Size * Executions)
+ return raw_ostream::RED;
+ if ((CumulativeCycles * 2) >= Size * Executions)
+ return raw_ostream::YELLOW;
+ return raw_ostream::SAVEDCOLOR;
+}
+
+static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
+ unsigned Executions, int BufferSize) {
+ if (!OS.has_colors())
+ return;
+
+ raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
+ if (Color == raw_ostream::SAVEDCOLOR) {
+ OS.resetColor();
+ return;
+ }
+ OS.changeColor(Color, /* bold */ true, /* BG */ false);
+}
+
+void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
+ const WaitTimeEntry &Entry,
+ unsigned SourceIndex,
+ unsigned Executions) const {
+ OS << SourceIndex << '.';
+ OS.PadToColumn(7);
+
+ double AverageTime1, AverageTime2, AverageTime3;
+ AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
+ AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
+ AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
+
+ OS << Executions;
+ OS.PadToColumn(13);
+ int BufferSize = UsedBuffer[SourceIndex].second;
+ tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize);
+ OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
+ OS.PadToColumn(20);
+ tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize);
+ OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
+ OS.PadToColumn(27);
+ tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions,
+ STI.getSchedModel().MicroOpBufferSize);
+ OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
+
+ if (OS.has_colors())
+ OS.resetColor();
+ OS.PadToColumn(34);
+}
+
+void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
+ std::string Header =
+ "\n\nAverage Wait times (based on the timeline view):\n"
+ "[0]: Executions\n"
+ "[1]: Average time spent waiting in a scheduler's queue\n"
+ "[2]: Average time spent waiting in a scheduler's queue while ready\n"
+ "[3]: Average time elapsed from WB until retire stage\n\n"
+ " [0] [1] [2] [3]\n";
+ OS << Header;
+
+ // Use a different string stream for printing instructions.
+ std::string Instruction;
+ raw_string_ostream InstrStream(Instruction);
+
+ formatted_raw_ostream FOS(OS);
+ unsigned Executions = Timeline.size() / Source.size();
+ unsigned IID = 0;
+ for (const MCInst &Inst : Source) {
+ printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
+ // Append the instruction info at the end of the line.
+ MCIP.printInst(&Inst, InstrStream, "", STI);
+ InstrStream.flush();
+
+ // Consume any tabs or spaces at the beginning of the string.
+ StringRef Str(Instruction);
+ Str = Str.ltrim();
+ FOS << " " << Str << '\n';
+ FOS.flush();
+ Instruction = "";
+
+ ++IID;
+ }
+}
+
+void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
+ const TimelineViewEntry &Entry,
+ unsigned Iteration,
+ unsigned SourceIndex) const {
+ if (Iteration == 0 && SourceIndex == 0)
+ OS << '\n';
+ OS << '[' << Iteration << ',' << SourceIndex << ']';
+ OS.PadToColumn(10);
+ assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
+ unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
+ for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
+ OS << ((I % 5 == 0) ? '.' : ' ');
+ OS << TimelineView::DisplayChar::Dispatched;
+ if (CycleDispatched != Entry.CycleExecuted) {
+ // Zero latency instructions have the same value for CycleDispatched,
+ // CycleIssued and CycleExecuted.
+ for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
+ OS << TimelineView::DisplayChar::Waiting;
+ if (Entry.CycleIssued == Entry.CycleExecuted)
+ OS << TimelineView::DisplayChar::DisplayChar::Executed;
+ else {
+ if (CycleDispatched != Entry.CycleIssued)
+ OS << TimelineView::DisplayChar::Executing;
+ for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
+ ++I)
+ OS << TimelineView::DisplayChar::Executing;
+ OS << TimelineView::DisplayChar::Executed;
+ }
+ }
+
+ for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
+ OS << TimelineView::DisplayChar::RetireLag;
+ OS << TimelineView::DisplayChar::Retired;
+
+ // Skip other columns.
+ for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
+ OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
+}
+
+static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
+ OS << "\n\nTimeline view:\n";
+ if (Cycles >= 10) {
+ OS.PadToColumn(10);
+ for (unsigned I = 0; I <= Cycles; ++I) {
+ if (((I / 10) & 1) == 0)
+ OS << ' ';
+ else
+ OS << I % 10;
+ }
+ OS << '\n';
+ }
+
+ OS << "Index";
+ OS.PadToColumn(10);
+ for (unsigned I = 0; I <= Cycles; ++I) {
+ if (((I / 10) & 1) == 0)
+ OS << I % 10;
+ else
+ OS << ' ';
+ }
+ OS << '\n';
+}
+
+void TimelineView::printTimeline(raw_ostream &OS) const {
+ formatted_raw_ostream FOS(OS);
+ printTimelineHeader(FOS, LastCycle);
+ FOS.flush();
+
+ // Use a different string stream for the instruction.
+ std::string Instruction;
+ raw_string_ostream InstrStream(Instruction);
+
+ unsigned IID = 0;
+ const unsigned Iterations = Timeline.size() / Source.size();
+ for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {
+ for (const MCInst &Inst : Source) {
+ const TimelineViewEntry &Entry = Timeline[IID];
+ if (Entry.CycleRetired == 0)
+ return;
+
+ unsigned SourceIndex = IID % Source.size();
+ printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
+ // Append the instruction info at the end of the line.
+ MCIP.printInst(&Inst, InstrStream, "", STI);
+ InstrStream.flush();
+
+ // Consume any tabs or spaces at the beginning of the string.
+ StringRef Str(Instruction);
+ Str = Str.ltrim();
+ FOS << " " << Str << '\n';
+ FOS.flush();
+ Instruction = "";
+
+ ++IID;
+ }
+ }
+}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/TimelineView.h b/contrib/llvm/tools/llvm-mca/Views/TimelineView.h
index e53c23ec1cc2..ee981800161c 100644
--- a/contrib/llvm/tools/llvm-mca/TimelineView.h
+++ b/contrib/llvm/tools/llvm-mca/Views/TimelineView.h
@@ -100,14 +100,15 @@
#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
-#include "SourceMgr.h"
-#include "View.h"
+#include "Views/View.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
-#include <map>
+namespace llvm {
namespace mca {
/// This class listens to instruction state transition events
@@ -120,14 +121,14 @@ namespace mca {
class TimelineView : public View {
const llvm::MCSubtargetInfo &STI;
llvm::MCInstPrinter &MCIP;
- const SourceMgr &AsmSequence;
+ llvm::ArrayRef<llvm::MCInst> Source;
unsigned CurrentCycle;
unsigned MaxCycle;
unsigned LastCycle;
struct TimelineViewEntry {
- unsigned CycleDispatched;
+ int CycleDispatched; // A negative value is an "invalid cycle".
unsigned CycleReady;
unsigned CycleIssued;
unsigned CycleExecuted;
@@ -136,22 +137,22 @@ class TimelineView : public View {
std::vector<TimelineViewEntry> Timeline;
struct WaitTimeEntry {
- unsigned Executions;
unsigned CyclesSpentInSchedulerQueue;
unsigned CyclesSpentInSQWhileReady;
unsigned CyclesSpentAfterWBAndBeforeRetire;
};
std::vector<WaitTimeEntry> WaitTime;
+ // This field is used to map instructions to buffered resources.
+ // Elements of this vector are <resourceID, BufferSizer> pairs.
+ std::vector<std::pair<unsigned, int>> UsedBuffer;
+
void printTimelineViewEntry(llvm::formatted_raw_ostream &OS,
const TimelineViewEntry &E, unsigned Iteration,
unsigned SourceIndex) const;
void printWaitTimeEntry(llvm::formatted_raw_ostream &OS,
- const WaitTimeEntry &E, unsigned Index) const;
-
- const unsigned DEFAULT_ITERATIONS = 10;
-
- void initialize(unsigned MaxIterations);
+ const WaitTimeEntry &E, unsigned Index,
+ unsigned Executions) const;
// Display characters for the TimelineView report output.
struct DisplayChar {
@@ -165,16 +166,14 @@ class TimelineView : public View {
public:
TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer,
- const SourceMgr &Sequence, unsigned MaxIterations,
- unsigned Cycles)
- : STI(sti), MCIP(Printer), AsmSequence(Sequence), CurrentCycle(0),
- MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0) {
- initialize(MaxIterations);
- }
+ llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
+ unsigned Cycles);
// Event handlers.
void onCycleEnd() override { ++CurrentCycle; }
void onEvent(const HWInstructionEvent &Event) override;
+ void onReservedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
// print functionalities.
void printTimeline(llvm::raw_ostream &OS) const;
@@ -185,5 +184,6 @@ public:
}
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/View.cpp b/contrib/llvm/tools/llvm-mca/Views/View.cpp
index 390a7aeb3b9d..6cfb9dd9f394 100644
--- a/contrib/llvm/tools/llvm-mca/View.cpp
+++ b/contrib/llvm/tools/llvm-mca/Views/View.cpp
@@ -12,9 +12,11 @@
///
//===----------------------------------------------------------------------===//
-#include "View.h"
+#include "Views/View.h"
+namespace llvm {
namespace mca {
void View::anchor() {}
} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/View.h b/contrib/llvm/tools/llvm-mca/Views/View.h
index 9ba94a5da977..4b82b0da0d27 100644
--- a/contrib/llvm/tools/llvm-mca/View.h
+++ b/contrib/llvm/tools/llvm-mca/Views/View.h
@@ -16,9 +16,10 @@
#ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H
#define LLVM_TOOLS_LLVM_MCA_VIEW_H
-#include "HWEventListener.h"
+#include "llvm/MCA/HWEventListener.h"
#include "llvm/Support/raw_ostream.h"
+namespace llvm {
namespace mca {
class View : public HWEventListener {
@@ -28,5 +29,6 @@ public:
void anchor() override;
};
} // namespace mca
+} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-mca/llvm-mca.cpp b/contrib/llvm/tools/llvm-mca/llvm-mca.cpp
index 897ff232a36d..68d63db599d7 100644
--- a/contrib/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/contrib/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -22,26 +22,27 @@
//===----------------------------------------------------------------------===//
#include "CodeRegion.h"
-#include "Context.h"
-#include "DispatchStatistics.h"
-#include "FetchStage.h"
-#include "InstructionInfoView.h"
-#include "InstructionTables.h"
-#include "Pipeline.h"
+#include "CodeRegionGenerator.h"
#include "PipelinePrinter.h"
-#include "RegisterFileStatistics.h"
-#include "ResourcePressureView.h"
-#include "RetireControlUnitStatistics.h"
-#include "SchedulerStatistics.h"
-#include "SummaryView.h"
-#include "TimelineView.h"
+#include "Views/DispatchStatistics.h"
+#include "Views/InstructionInfoView.h"
+#include "Views/RegisterFileStatistics.h"
+#include "Views/ResourcePressureView.h"
+#include "Views/RetireControlUnitStatistics.h"
+#include "Views/SchedulerStatistics.h"
+#include "Views/SummaryView.h"
+#include "Views/TimelineView.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MCA/Context.h"
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Stages/InstructionTables.h"
+#include "llvm/MCA/Support.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -67,13 +68,13 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
cl::value_desc("filename"));
static cl::opt<std::string>
- ArchName("march", cl::desc("Target arch to assemble for, "
- "see -version for available targets"),
+ ArchName("march", cl::desc("Target architecture. "
+ "See -version for available targets"),
cl::cat(ToolOptions));
static cl::opt<std::string>
- TripleName("mtriple", cl::desc("Target triple to assemble for, "
- "see -version for available targets"),
+ TripleName("mtriple",
+ cl::desc("Target triple. See -version for available targets"),
cl::cat(ToolOptions));
static cl::opt<std::string>
@@ -148,15 +149,13 @@ static cl::opt<bool>
cl::desc("If set, assume that loads and stores do not alias"),
cl::cat(ToolOptions), cl::init(true));
-static cl::opt<unsigned>
- LoadQueueSize("lqueue",
- cl::desc("Size of the load queue (unbound by default)"),
- cl::cat(ToolOptions), cl::init(0));
+static cl::opt<unsigned> LoadQueueSize("lqueue",
+ cl::desc("Size of the load queue"),
+ cl::cat(ToolOptions), cl::init(0));
-static cl::opt<unsigned>
- StoreQueueSize("squeue",
- cl::desc("Size of the store queue (unbound by default)"),
- cl::cat(ToolOptions), cl::init(0));
+static cl::opt<unsigned> StoreQueueSize("squeue",
+ cl::desc("Size of the store queue"),
+ cl::cat(ToolOptions), cl::init(0));
static cl::opt<bool>
PrintInstructionTables("instruction-tables",
@@ -180,7 +179,6 @@ static cl::opt<bool>
namespace {
const Target *getTarget(const char *ProgName) {
- TripleName = Triple::normalize(TripleName);
if (TripleName.empty())
TripleName = Triple::normalize(sys::getDefaultTargetTriple());
Triple TheTriple(TripleName);
@@ -198,59 +196,6 @@ const Target *getTarget(const char *ProgName) {
return TheTarget;
}
-// A comment consumer that parses strings.
-// The only valid tokens are strings.
-class MCACommentConsumer : public AsmCommentConsumer {
-public:
- mca::CodeRegions &Regions;
-
- MCACommentConsumer(mca::CodeRegions &R) : Regions(R) {}
- void HandleComment(SMLoc Loc, StringRef CommentText) override {
- // Skip empty comments.
- StringRef Comment(CommentText);
- if (Comment.empty())
- return;
-
- // Skip spaces and tabs
- unsigned Position = Comment.find_first_not_of(" \t");
- if (Position >= Comment.size())
- // we reached the end of the comment. Bail out.
- return;
-
- Comment = Comment.drop_front(Position);
- if (Comment.consume_front("LLVM-MCA-END")) {
- Regions.endRegion(Loc);
- return;
- }
-
- // Now try to parse string LLVM-MCA-BEGIN
- if (!Comment.consume_front("LLVM-MCA-BEGIN"))
- return;
-
- // Skip spaces and tabs
- Position = Comment.find_first_not_of(" \t");
- if (Position < Comment.size())
- Comment = Comment.drop_front(Position);
- // Use the rest of the string as a descriptor for this code snippet.
- Regions.beginRegion(Comment, Loc);
- }
-};
-
-int AssembleInput(const char *ProgName, MCAsmParser &Parser,
- const Target *TheTarget, MCSubtargetInfo &STI,
- MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
- std::unique_ptr<MCTargetAsmParser> TAP(
- TheTarget->createMCAsmParser(STI, Parser, MCII, MCOptions));
-
- if (!TAP) {
- WithColor::error() << "this target does not support assembly parsing.\n";
- return 1;
- }
-
- Parser.setTargetParser(*TAP);
- return Parser.Run(false);
-}
-
ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
if (OutputFilename == "")
OutputFilename = "-";
@@ -261,40 +206,6 @@ ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
return std::move(Out);
return EC;
}
-
-class MCStreamerWrapper final : public MCStreamer {
- mca::CodeRegions &Regions;
-
-public:
- MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R)
- : MCStreamer(Context), Regions(R) {}
-
- // We only want to intercept the emission of new instructions.
- virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool /* unused */) override {
- Regions.addInstruction(llvm::make_unique<const MCInst>(Inst));
- }
-
- bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
- return true;
- }
-
- void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) override {}
- void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
- uint64_t Size = 0, unsigned ByteAlignment = 0,
- SMLoc Loc = SMLoc()) override {}
- void EmitGPRel32Value(const MCExpr *Value) override {}
- void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
- void EmitCOFFSymbolStorageClass(int StorageClass) override {}
- void EmitCOFFSymbolType(int Type) override {}
- void EndCOFFSymbolDef() override {}
-
- const std::vector<std::unique_ptr<const MCInst>> &
- GetInstructionSequence(unsigned Index) const {
- return Regions.getInstructionSequence(Index);
- }
-};
} // end of anonymous namespace
static void processOptionImpl(cl::opt<bool> &O, const cl::opt<bool> &Default) {
@@ -318,20 +229,30 @@ static void processViewOptions() {
EnableAllViews.getPosition() < EnableAllStats.getPosition()
? EnableAllStats
: EnableAllViews;
- processOptionImpl(PrintSummaryView, Default);
processOptionImpl(PrintRegisterFileStats, Default);
processOptionImpl(PrintDispatchStats, Default);
processOptionImpl(PrintSchedulerStats, Default);
processOptionImpl(PrintRetireStats, Default);
}
+// Returns true on success.
+static bool runPipeline(mca::Pipeline &P) {
+ // Handle pipeline errors here.
+ Expected<unsigned> Cycles = P.run();
+ if (!Cycles) {
+ WithColor::error() << toString(Cycles.takeError());
+ return false;
+ }
+ return true;
+}
+
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
// Initialize targets and assembly parsers.
- llvm::InitializeAllTargetInfos();
- llvm::InitializeAllTargetMCs();
- llvm::InitializeAllAsmParsers();
+ InitializeAllTargetInfos();
+ InitializeAllTargetMCs();
+ InitializeAllAsmParsers();
// Enable printing of available targets when flag --version is specified.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
@@ -342,9 +263,6 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv,
"llvm machine code performance analyzer.\n");
- MCTargetOptions MCOptions;
- MCOptions.PreserveAsmComments = false;
-
// Get the target from the triple. If a triple is not specified, then select
// the default triple for the host. If the triple doesn't correspond to any
// registered target, then exit with an error message.
@@ -384,9 +302,6 @@ int main(int argc, char **argv) {
std::unique_ptr<buffer_ostream> BOS;
- mca::CodeRegions Regions(SrcMgr);
- MCStreamerWrapper Str(Ctx, Regions);
-
std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
std::unique_ptr<MCInstrAnalysis> MCIA(
@@ -419,14 +334,20 @@ int main(int argc, char **argv) {
return 1;
}
- std::unique_ptr<MCAsmParser> P(createMCAsmParser(SrcMgr, Ctx, Str, *MAI));
- MCAsmLexer &Lexer = P->getLexer();
- MCACommentConsumer CC(Regions);
- Lexer.setCommentConsumer(&CC);
-
- if (AssembleInput(ProgName, *P, TheTarget, *STI, *MCII, MCOptions))
+ // Parse the input and create CodeRegions that llvm-mca can analyze.
+ mca::AsmCodeRegionGenerator CRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI, *MCII);
+ Expected<const mca::CodeRegions &> RegionsOrErr = CRG.parseCodeRegions();
+ if (!RegionsOrErr) {
+ if (auto Err =
+ handleErrors(RegionsOrErr.takeError(), [](const StringError &E) {
+ WithColor::error() << E.getMessage() << '\n';
+ })) {
+ // Default case.
+ WithColor::error() << toString(std::move(Err)) << '\n';
+ }
return 1;
-
+ }
+ const mca::CodeRegions &Regions = *RegionsOrErr;
if (Regions.empty()) {
WithColor::error() << "no assembly instructions found.\n";
return 1;
@@ -439,7 +360,7 @@ int main(int argc, char **argv) {
return 1;
}
- unsigned AssemblerDialect = P->getAssemblerDialect();
+ unsigned AssemblerDialect = CRG.getAssemblerDialect();
if (OutputAsmVariant >= 0)
AssemblerDialect = static_cast<unsigned>(OutputAsmVariant);
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
@@ -452,7 +373,7 @@ int main(int argc, char **argv) {
return 1;
}
- std::unique_ptr<llvm::ToolOutputFile> TOF = std::move(*OF);
+ std::unique_ptr<ToolOutputFile> TOF = std::move(*OF);
const MCSchedModel &SM = STI->getSchedModel();
@@ -461,7 +382,7 @@ int main(int argc, char **argv) {
Width = DispatchWidth;
// Create an instruction builder.
- mca::InstrBuilder IB(*STI, *MCII, *MRI, *MCIA, *IP);
+ mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get());
// Create a context to control ownership of the pipeline hardware.
mca::Context MCA(*MRI, *STI);
@@ -471,6 +392,7 @@ int main(int argc, char **argv) {
// Number each region in the sequence.
unsigned RegionIdx = 0;
+
for (const std::unique_ptr<mca::CodeRegion> &Region : Regions) {
// Skip empty code regions.
if (Region->empty())
@@ -486,24 +408,53 @@ int main(int argc, char **argv) {
TOF->os() << "\n\n";
}
- mca::SourceMgr S(Region->getInstructions(),
- PrintInstructionTables ? 1 : Iterations);
+ // Lower the MCInst sequence into an mca::Instruction sequence.
+ ArrayRef<MCInst> Insts = Region->getInstructions();
+ std::vector<std::unique_ptr<mca::Instruction>> LoweredSequence;
+ for (const MCInst &MCI : Insts) {
+ Expected<std::unique_ptr<mca::Instruction>> Inst =
+ IB.createInstruction(MCI);
+ if (!Inst) {
+ if (auto NewE = handleErrors(
+ Inst.takeError(),
+ [&IP, &STI](const mca::InstructionError<MCInst> &IE) {
+ std::string InstructionStr;
+ raw_string_ostream SS(InstructionStr);
+ WithColor::error() << IE.Message << '\n';
+ IP->printInst(&IE.Inst, SS, "", *STI);
+ SS.flush();
+ WithColor::note() << "instruction: " << InstructionStr
+ << '\n';
+ })) {
+ // Default case.
+ WithColor::error() << toString(std::move(NewE));
+ }
+ return 1;
+ }
+
+ LoweredSequence.emplace_back(std::move(Inst.get()));
+ }
+
+ mca::SourceMgr S(LoweredSequence, PrintInstructionTables ? 1 : Iterations);
if (PrintInstructionTables) {
// Create a pipeline, stages, and a printer.
auto P = llvm::make_unique<mca::Pipeline>();
- P->appendStage(llvm::make_unique<mca::FetchStage>(IB, S));
- P->appendStage(llvm::make_unique<mca::InstructionTables>(SM, IB));
+ P->appendStage(llvm::make_unique<mca::EntryStage>(S));
+ P->appendStage(llvm::make_unique<mca::InstructionTables>(SM));
mca::PipelinePrinter Printer(*P);
// Create the views for this pipeline, execute, and emit a report.
if (PrintInstructionInfoView) {
- Printer.addView(
- llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));
+ Printer.addView(llvm::make_unique<mca::InstructionInfoView>(
+ *STI, *MCII, Insts, *IP));
}
Printer.addView(
- llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, S));
- P->run();
+ llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
+
+ if (!runPipeline(*P))
+ return 1;
+
Printer.printReport(TOF->os());
continue;
}
@@ -513,11 +464,11 @@ int main(int argc, char **argv) {
mca::PipelinePrinter Printer(*P);
if (PrintSummaryView)
- Printer.addView(llvm::make_unique<mca::SummaryView>(SM, S, Width));
+ Printer.addView(llvm::make_unique<mca::SummaryView>(SM, Insts, Width));
if (PrintInstructionInfoView)
Printer.addView(
- llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));
+ llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, Insts, *IP));
if (PrintDispatchStats)
Printer.addView(llvm::make_unique<mca::DispatchStatistics>());
@@ -526,21 +477,26 @@ int main(int argc, char **argv) {
Printer.addView(llvm::make_unique<mca::SchedulerStatistics>(*STI));
if (PrintRetireStats)
- Printer.addView(llvm::make_unique<mca::RetireControlUnitStatistics>());
+ Printer.addView(llvm::make_unique<mca::RetireControlUnitStatistics>(SM));
if (PrintRegisterFileStats)
Printer.addView(llvm::make_unique<mca::RegisterFileStatistics>(*STI));
if (PrintResourcePressureView)
Printer.addView(
- llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, S));
+ llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
if (PrintTimelineView) {
+ unsigned TimelineIterations =
+ TimelineMaxIterations ? TimelineMaxIterations : 10;
Printer.addView(llvm::make_unique<mca::TimelineView>(
- *STI, *IP, S, TimelineMaxIterations, TimelineMaxCycles));
+ *STI, *IP, Insts, std::min(TimelineIterations, S.getNumIterations()),
+ TimelineMaxCycles));
}
- P->run();
+ if (!runPipeline(*P))
+ return 1;
+
Printer.printReport(TOF->os());
// Clear the InstrBuilder internal state in preparation for another round.
diff --git a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
index 37c1bf85809e..042e284e8369 100644
--- a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -38,6 +38,7 @@
#include "llvm/Support/Program.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -90,6 +91,8 @@ cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"),
cl::Grouping);
cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"),
cl::Grouping);
+cl::alias Portability("portability", cl::desc("Alias for --format=posix"),
+ cl::aliasopt(POSIXFormat), cl::NotHidden);
cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"),
cl::Grouping);
@@ -183,6 +186,8 @@ cl::opt<bool> DyldInfoOnly("dyldinfo-only",
cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
cl::desc("Disable LLVM bitcode reader"));
+cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
+
bool PrintAddress = true;
bool MultipleFiles = false;
@@ -194,7 +199,7 @@ std::string ToolName;
static void error(Twine Message, Twine Path = Twine()) {
HadError = true;
- errs() << ToolName << ": " << Path << ": " << Message << ".\n";
+ WithColor::error(errs(), ToolName) << Path << ": " << Message << ".\n";
}
static bool error(std::error_code EC, Twine Path = Twine()) {
@@ -207,11 +212,11 @@ static bool error(std::error_code EC, Twine Path = Twine()) {
// This version of error() prints the archive name and member name, for example:
// "libx.a(foo.o)" after the ToolName before the error message. It sets
-// HadError but returns allowing the code to move on to other archive members.
+// HadError but returns allowing the code to move on to other archive members.
static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
StringRef ArchitectureName = StringRef()) {
HadError = true;
- errs() << ToolName << ": " << FileName;
+ WithColor::error(errs(), ToolName) << FileName;
Expected<StringRef> NameOrErr = C.getName();
// TODO: if we have a error getting the name then it would be nice to print
@@ -228,7 +233,7 @@ static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS, "");
+ logAllUnhandledErrors(std::move(E), OS);
OS.flush();
errs() << " " << Buf << "\n";
}
@@ -236,18 +241,18 @@ static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
// This version of error() prints the file name and which architecture slice it
// is from, for example: "foo.o (for architecture i386)" after the ToolName
// before the error message. It sets HadError but returns allowing the code to
-// move on to other architecture slices.
+// move on to other architecture slices.
static void error(llvm::Error E, StringRef FileName,
StringRef ArchitectureName = StringRef()) {
HadError = true;
- errs() << ToolName << ": " << FileName;
+ WithColor::error(errs(), ToolName) << FileName;
if (!ArchitectureName.empty())
errs() << " (for architecture " << ArchitectureName << ") ";
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS, "");
+ logAllUnhandledErrors(std::move(E), OS);
OS.flush();
errs() << " " << Buf << "\n";
}
@@ -674,7 +679,7 @@ static void darwinPrintStab(MachOObjectFile *MachO, SymbolListT::iterator I) {
}
static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
- if (StripUnderscore && Name.size() > 0 && Name[0] == '_')
+ if (StripUnderscore && !Name.empty() && Name[0] == '_')
Name = Name.substr(1);
if (!Name.startswith("_Z"))
@@ -709,7 +714,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
if (ReverseSort)
Cmp = [=](const NMSymbol &A, const NMSymbol &B) { return Cmp(B, A); };
- llvm::sort(SymbolList.begin(), SymbolList.end(), Cmp);
+ llvm::sort(SymbolList, Cmp);
}
if (!PrintFileName) {
@@ -757,6 +762,24 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
}
}
+ auto writeFileName = [&](raw_ostream &S) {
+ if (!ArchitectureName.empty())
+ S << "(for architecture " << ArchitectureName << "):";
+ if (OutputFormat == posix && !ArchiveName.empty())
+ S << ArchiveName << "[" << CurrentFilename << "]: ";
+ else {
+ if (!ArchiveName.empty())
+ S << ArchiveName << ":";
+ S << CurrentFilename << ": ";
+ }
+ };
+
+ if (SymbolList.empty()) {
+ if (PrintFileName)
+ writeFileName(errs());
+ errs() << "no symbols\n";
+ }
+
for (SymbolListT::iterator I = SymbolList.begin(), E = SymbolList.end();
I != E; ++I) {
uint32_t SymFlags;
@@ -778,17 +801,8 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
(!Global && ExternalOnly) || (SizeSort && !PrintAddress) ||
(Weak && NoWeakSymbols))
continue;
- if (PrintFileName) {
- if (!ArchitectureName.empty())
- outs() << "(for architecture " << ArchitectureName << "):";
- if (OutputFormat == posix && !ArchiveName.empty())
- outs() << ArchiveName << "[" << CurrentFilename << "]: ";
- else {
- if (!ArchiveName.empty())
- outs() << ArchiveName << ":";
- outs() << CurrentFilename << ": ";
- }
- }
+ if (PrintFileName)
+ writeFileName(outs());
if ((JustSymbolName ||
(UndefinedOnly && MachO && OutputFormat != darwin)) &&
OutputFormat != posix) {
@@ -1018,8 +1032,7 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
StringRef SectionName;
Obj.getSectionName(Ref, SectionName);
StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
- if (Obj.is64Bit() &&
- Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
+ if (Obj.is64Bit() && Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
SegmentName == "__TEXT_EXEC" && SectionName == "__text")
return 't';
if (SegmentName == "__TEXT" && SectionName == "__text")
@@ -1152,7 +1165,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
// file get the section number for that section in this object file.
unsigned int Nsect = 0;
MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
- if (SegSect.size() != 0 && MachO) {
+ if (!SegSect.empty() && MachO) {
Nsect = getNsectForSegSect(MachO);
// If this section is not in the object file no symbols are printed.
if (Nsect == 0)
@@ -1170,8 +1183,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
// see if this symbol is a symbol from that section and if not skip it.
if (Nsect && Nsect != getNsectInMachO(*MachO, Sym))
continue;
- NMSymbol S;
- memset(&S, '\0', sizeof(S));
+ NMSymbol S = {};
S.Size = 0;
S.Address = 0;
if (PrintSize) {
@@ -1265,8 +1277,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
}
if (!found) {
- NMSymbol S;
- memset(&S, '\0', sizeof(NMSymbol));
+ NMSymbol S = {};
S.Address = Entry.address() + BaseSegmentAddress;
S.Size = 0;
S.TypeChar = '\0';
@@ -1356,8 +1367,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
// Now create the undefined symbol using the referened dynamic
// library.
- NMSymbol U;
- memset(&U, '\0', sizeof(NMSymbol));
+ NMSymbol U = {};
U.Address = 0;
U.Size = 0;
U.TypeChar = 'U';
@@ -1423,8 +1433,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
if (!found) {
LastSymbolName = Entry.symbolName();
- NMSymbol B;
- memset(&B, '\0', sizeof(NMSymbol));
+ NMSymbol B = {};
B.Address = 0;
B.Size = 0;
B.TypeChar = 'U';
@@ -1483,8 +1492,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
if (!found) {
LastSymbolName = Entry.symbolName();
- NMSymbol L;
- memset(&L, '\0', sizeof(NMSymbol));
+ NMSymbol L = {};
L.Name = Entry.symbolName();
L.Address = 0;
L.Size = 0;
@@ -1600,7 +1608,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
uint64_t lc_main_offset = UINT64_MAX;
for (const auto &Command : MachO->load_commands()) {
if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
- // We found a function starts segment, parse the addresses for
+ // We found a function starts segment, parse the addresses for
// consumption.
MachO::linkedit_data_command LLC =
MachO->getLinkeditDataLoadCommand(Command);
@@ -1622,9 +1630,8 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
// See this address is not already in the symbol table fake up an
// nlist for it.
- if (!found) {
- NMSymbol F;
- memset(&F, '\0', sizeof(NMSymbol));
+ if (!found) {
+ NMSymbol F = {};
F.Name = "<redacted function X>";
F.Address = FoundFns[f] + BaseSegmentAddress;
F.Size = 0;
@@ -1744,12 +1751,14 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
outs() << "Archive map\n";
for (; I != E; ++I) {
Expected<Archive::Child> C = I->getMember();
- if (!C)
+ if (!C) {
error(C.takeError(), Filename);
+ break;
+ }
Expected<StringRef> FileNameOrErr = C->getName();
if (!FileNameOrErr) {
error(FileNameOrErr.takeError(), Filename);
- return;
+ break;
}
StringRef SymName = I->getName();
outs() << SymName << " in " << FileNameOrErr.get() << "\n";
@@ -1769,8 +1778,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
}
if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
- errs() << ToolName << ": warning sizes with -print-size for Mach-O "
- "files are always zero.\n";
+ WithColor::warning(errs(), ToolName)
+ << "sizes with -print-size for Mach-O files are always zero.\n";
MachOPrintSizeWarning = true;
}
if (!checkMachOAndArchFlags(O, Filename))
@@ -1793,7 +1802,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
}
if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
// If we have a list of architecture flags specified dump only those.
- if (!ArchAll && ArchFlags.size() != 0) {
+ if (!ArchAll && !ArchFlags.empty()) {
// Look for a slice in the universal binary that matches each ArchFlag.
bool ArchFound;
for (unsigned i = 0; i < ArchFlags.size(); ++i) {
@@ -1882,14 +1891,14 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
// No architecture flags were specified so if this contains a slice that
// matches the host architecture dump only that.
if (!ArchAll) {
- StringRef HostArchName = MachOObjectFile::getHostArch().getArchName();
+ Triple HostTriple = MachOObjectFile::getHostArch();
+ StringRef HostArchName = HostTriple.getArchName();
for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
E = UB->end_objects();
I != E; ++I) {
if (HostArchName == I->getArchFlagName()) {
Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
std::string ArchiveName;
- ArchiveName.clear();
if (ObjOrErr) {
ObjectFile &Obj = *ObjOrErr.get();
dumpSymbolNamesFromObject(Obj, false);
@@ -2011,8 +2020,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
}
if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin)) {
if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
- errs() << ToolName << ": warning sizes with -print-size for Mach-O files "
- "are always zero.\n";
+ WithColor::warning(errs(), ToolName)
+ << "sizes with -print-size for Mach-O files are always zero.\n";
MachOPrintSizeWarning = true;
}
if (!checkMachOAndArchFlags(O, Filename))
@@ -2064,7 +2073,7 @@ int main(int argc, char **argv) {
}
}
- if (SegSect.size() != 0 && SegSect.size() != 2)
+ if (!SegSect.empty() && SegSect.size() != 2)
error("bad number of arguments (must be two arguments)",
"for the -s option");
diff --git a/contrib/llvm/tools/llvm-objcopy/Buffer.cpp b/contrib/llvm/tools/llvm-objcopy/Buffer.cpp
new file mode 100644
index 000000000000..8044b023aaad
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/Buffer.cpp
@@ -0,0 +1,51 @@
+//===- Buffer.cpp ---------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Buffer.h"
+#include "llvm-objcopy.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+
+Buffer::~Buffer() {}
+
+void FileBuffer::allocate(size_t Size) {
+ Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+ FileOutputBuffer::create(getName(), Size, FileOutputBuffer::F_executable);
+ handleAllErrors(BufferOrErr.takeError(), [this](const ErrorInfoBase &E) {
+ error("failed to open " + getName() + ": " + E.message());
+ });
+ Buf = std::move(*BufferOrErr);
+}
+
+Error FileBuffer::commit() { return Buf->commit(); }
+
+uint8_t *FileBuffer::getBufferStart() {
+ return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+}
+
+void MemBuffer::allocate(size_t Size) {
+ Buf = WritableMemoryBuffer::getNewMemBuffer(Size, getName());
+}
+
+Error MemBuffer::commit() { return Error::success(); }
+
+uint8_t *MemBuffer::getBufferStart() {
+ return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+}
+
+std::unique_ptr<WritableMemoryBuffer> MemBuffer::releaseMemoryBuffer() {
+ return std::move(Buf);
+}
+
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/Buffer.h b/contrib/llvm/tools/llvm-objcopy/Buffer.h
new file mode 100644
index 000000000000..e5b9c5b2d22b
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/Buffer.h
@@ -0,0 +1,66 @@
+//===- Buffer.h -------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_BUFFER_H
+#define LLVM_TOOLS_OBJCOPY_BUFFER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+
+// The class Buffer abstracts out the common interface of FileOutputBuffer and
+// WritableMemoryBuffer so that the hierarchy of Writers depends on this
+// abstract interface and doesn't depend on a particular implementation.
+// TODO: refactor the buffer classes in LLVM to enable us to use them here
+// directly.
+class Buffer {
+ StringRef Name;
+
+public:
+ virtual ~Buffer();
+ virtual void allocate(size_t Size) = 0;
+ virtual uint8_t *getBufferStart() = 0;
+ virtual Error commit() = 0;
+
+ explicit Buffer(StringRef Name) : Name(Name) {}
+ StringRef getName() const { return Name; }
+};
+
+class FileBuffer : public Buffer {
+ std::unique_ptr<FileOutputBuffer> Buf;
+
+public:
+ void allocate(size_t Size) override;
+ uint8_t *getBufferStart() override;
+ Error commit() override;
+
+ explicit FileBuffer(StringRef FileName) : Buffer(FileName) {}
+};
+
+class MemBuffer : public Buffer {
+ std::unique_ptr<WritableMemoryBuffer> Buf;
+
+public:
+ void allocate(size_t Size) override;
+ uint8_t *getBufferStart() override;
+ Error commit() override;
+
+ explicit MemBuffer(StringRef Name) : Buffer(Name) {}
+
+ std::unique_ptr<WritableMemoryBuffer> releaseMemoryBuffer();
+};
+
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_BUFFER_H
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
new file mode 100644
index 000000000000..6b386d29979c
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -0,0 +1,98 @@
+//===- COFFObjcopy.cpp ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "COFFObjcopy.h"
+#include "Buffer.h"
+#include "CopyConfig.h"
+#include "Object.h"
+#include "Reader.h"
+#include "Writer.h"
+#include "llvm-objcopy.h"
+
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Errc.h"
+#include <cassert>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+using namespace object;
+using namespace COFF;
+
+static Error handleArgs(const CopyConfig &Config, Object &Obj) {
+ // StripAll removes all symbols and thus also removes all relocations.
+ if (Config.StripAll || Config.StripAllGNU)
+ for (Section &Sec : Obj.Sections)
+ Sec.Relocs.clear();
+
+ // If we need to do per-symbol removals, initialize the Referenced field.
+ if (Config.StripUnneeded || Config.DiscardAll ||
+ !Config.SymbolsToRemove.empty())
+ if (Error E = Obj.markSymbols())
+ return E;
+
+ // Actually do removals of symbols.
+ Obj.removeSymbols([&](const Symbol &Sym) {
+ // For StripAll, all relocations have been stripped and we remove all
+ // symbols.
+ if (Config.StripAll || Config.StripAllGNU)
+ return true;
+
+ if (is_contained(Config.SymbolsToRemove, Sym.Name)) {
+ // Explicitly removing a referenced symbol is an error.
+ if (Sym.Referenced)
+ reportError(Config.OutputFilename,
+ make_error<StringError>(
+ "not stripping symbol '" + Sym.Name +
+ "' because it is named in a relocation.",
+ llvm::errc::invalid_argument));
+ return true;
+ }
+
+ if (!Sym.Referenced) {
+ // With --strip-unneeded, GNU objcopy removes all unreferenced local
+ // symbols, and any unreferenced undefined external.
+ if (Config.StripUnneeded &&
+ (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC ||
+ Sym.Sym.SectionNumber == 0))
+ return true;
+
+ // GNU objcopy keeps referenced local symbols and external symbols
+ // if --discard-all is set, similar to what --strip-unneeded does,
+ // but undefined local symbols are kept when --discard-all is set.
+ if (Config.DiscardAll && Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
+ Sym.Sym.SectionNumber != 0)
+ return true;
+ }
+
+ return false;
+ });
+ return Error::success();
+}
+
+void executeObjcopyOnBinary(const CopyConfig &Config,
+ object::COFFObjectFile &In, Buffer &Out) {
+ COFFReader Reader(In);
+ Expected<std::unique_ptr<Object>> ObjOrErr = Reader.create();
+ if (!ObjOrErr)
+ reportError(Config.InputFilename, ObjOrErr.takeError());
+ Object *Obj = ObjOrErr->get();
+ assert(Obj && "Unable to deserialize COFF object");
+ if (Error E = handleArgs(Config, *Obj))
+ reportError(Config.InputFilename, std::move(E));
+ COFFWriter Writer(*Obj, Out);
+ if (Error E = Writer.write())
+ reportError(Config.OutputFilename, std::move(E));
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h b/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h
new file mode 100644
index 000000000000..bf70bd9b4d84
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h
@@ -0,0 +1,31 @@
+//===- COFFObjcopy.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
+#define LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
+
+namespace llvm {
+
+namespace object {
+class COFFObjectFile;
+} // end namespace object
+
+namespace objcopy {
+struct CopyConfig;
+class Buffer;
+
+namespace coff {
+void executeObjcopyOnBinary(const CopyConfig &Config,
+ object::COFFObjectFile &In, Buffer &Out);
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Object.cpp b/contrib/llvm/tools/llvm-objcopy/COFF/Object.cpp
new file mode 100644
index 000000000000..315d3a778623
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Object.cpp
@@ -0,0 +1,70 @@
+//===- Object.cpp ---------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Object.h"
+#include <algorithm>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+using namespace object;
+
+void Object::addSymbols(ArrayRef<Symbol> NewSymbols) {
+ for (Symbol S : NewSymbols) {
+ S.UniqueId = NextSymbolUniqueId++;
+ Symbols.emplace_back(S);
+ }
+ updateSymbols();
+}
+
+void Object::updateSymbols() {
+ SymbolMap = DenseMap<size_t, Symbol *>(Symbols.size());
+ size_t RawSymIndex = 0;
+ for (Symbol &Sym : Symbols) {
+ SymbolMap[Sym.UniqueId] = &Sym;
+ Sym.RawIndex = RawSymIndex;
+ RawSymIndex += 1 + Sym.Sym.NumberOfAuxSymbols;
+ }
+}
+
+const Symbol *Object::findSymbol(size_t UniqueId) const {
+ auto It = SymbolMap.find(UniqueId);
+ if (It == SymbolMap.end())
+ return nullptr;
+ return It->second;
+}
+
+void Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+ Symbols.erase(
+ std::remove_if(std::begin(Symbols), std::end(Symbols),
+ [ToRemove](const Symbol &Sym) { return ToRemove(Sym); }),
+ std::end(Symbols));
+ updateSymbols();
+}
+
+Error Object::markSymbols() {
+ for (Symbol &Sym : Symbols)
+ Sym.Referenced = false;
+ for (const Section &Sec : Sections) {
+ for (const Relocation &R : Sec.Relocs) {
+ auto It = SymbolMap.find(R.Target);
+ if (It == SymbolMap.end())
+ return make_error<StringError>("Relocation target " + Twine(R.Target) +
+ " not found",
+ object_error::invalid_symbol_index);
+ It->second->Referenced = true;
+ }
+ }
+ return Error::success();
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Object.h b/contrib/llvm/tools/llvm-objcopy/COFF/Object.h
new file mode 100644
index 000000000000..7531fb4cf39e
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Object.h
@@ -0,0 +1,148 @@
+//===- Object.h -------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
+#define LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+struct Relocation {
+ Relocation() {}
+ Relocation(const object::coff_relocation& R) : Reloc(R) {}
+
+ object::coff_relocation Reloc;
+ size_t Target;
+ StringRef TargetName; // Used for diagnostics only
+};
+
+struct Section {
+ object::coff_section Header;
+ ArrayRef<uint8_t> Contents;
+ std::vector<Relocation> Relocs;
+ StringRef Name;
+};
+
+struct Symbol {
+ object::coff_symbol32 Sym;
+ StringRef Name;
+ ArrayRef<uint8_t> AuxData;
+ size_t UniqueId;
+ size_t RawIndex;
+ bool Referenced;
+};
+
+struct Object {
+ bool IsPE = false;
+
+ object::dos_header DosHeader;
+ ArrayRef<uint8_t> DosStub;
+
+ object::coff_file_header CoffFileHeader;
+
+ bool Is64 = false;
+ object::pe32plus_header PeHeader;
+ uint32_t BaseOfData = 0; // pe32plus_header lacks this field.
+
+ std::vector<object::data_directory> DataDirectories;
+ std::vector<Section> Sections;
+
+ ArrayRef<Symbol> getSymbols() const { return Symbols; }
+ // This allows mutating individual Symbols, but not mutating the list
+ // of symbols itself.
+ iterator_range<std::vector<Symbol>::iterator> getMutableSymbols() {
+ return make_range(Symbols.begin(), Symbols.end());
+ }
+
+ const Symbol *findSymbol(size_t UniqueId) const;
+
+ void addSymbols(ArrayRef<Symbol> NewSymbols);
+ void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
+
+ // Set the Referenced field on all Symbols, based on relocations in
+ // all sections.
+ Error markSymbols();
+
+private:
+ std::vector<Symbol> Symbols;
+ DenseMap<size_t, Symbol *> SymbolMap;
+
+ size_t NextSymbolUniqueId = 0;
+
+ // Update SymbolMap and RawIndex in each Symbol.
+ void updateSymbols();
+};
+
+// Copy between coff_symbol16 and coff_symbol32.
+// The source and destination files can use either coff_symbol16 or
+// coff_symbol32, while we always store them as coff_symbol32 in the
+// intermediate data structure.
+template <class Symbol1Ty, class Symbol2Ty>
+void copySymbol(Symbol1Ty &Dest, const Symbol2Ty &Src) {
+ static_assert(sizeof(Dest.Name.ShortName) == sizeof(Src.Name.ShortName),
+ "Mismatched name sizes");
+ memcpy(Dest.Name.ShortName, Src.Name.ShortName, sizeof(Dest.Name.ShortName));
+ Dest.Value = Src.Value;
+ Dest.SectionNumber = Src.SectionNumber;
+ Dest.Type = Src.Type;
+ Dest.StorageClass = Src.StorageClass;
+ Dest.NumberOfAuxSymbols = Src.NumberOfAuxSymbols;
+}
+
+// Copy between pe32_header and pe32plus_header.
+// We store the intermediate state in a pe32plus_header.
+template <class PeHeader1Ty, class PeHeader2Ty>
+void copyPeHeader(PeHeader1Ty &Dest, const PeHeader2Ty &Src) {
+ Dest.Magic = Src.Magic;
+ Dest.MajorLinkerVersion = Src.MajorLinkerVersion;
+ Dest.MinorLinkerVersion = Src.MinorLinkerVersion;
+ Dest.SizeOfCode = Src.SizeOfCode;
+ Dest.SizeOfInitializedData = Src.SizeOfInitializedData;
+ Dest.SizeOfUninitializedData = Src.SizeOfUninitializedData;
+ Dest.AddressOfEntryPoint = Src.AddressOfEntryPoint;
+ Dest.BaseOfCode = Src.BaseOfCode;
+ Dest.ImageBase = Src.ImageBase;
+ Dest.SectionAlignment = Src.SectionAlignment;
+ Dest.FileAlignment = Src.FileAlignment;
+ Dest.MajorOperatingSystemVersion = Src.MajorOperatingSystemVersion;
+ Dest.MinorOperatingSystemVersion = Src.MinorOperatingSystemVersion;
+ Dest.MajorImageVersion = Src.MajorImageVersion;
+ Dest.MinorImageVersion = Src.MinorImageVersion;
+ Dest.MajorSubsystemVersion = Src.MajorSubsystemVersion;
+ Dest.MinorSubsystemVersion = Src.MinorSubsystemVersion;
+ Dest.Win32VersionValue = Src.Win32VersionValue;
+ Dest.SizeOfImage = Src.SizeOfImage;
+ Dest.SizeOfHeaders = Src.SizeOfHeaders;
+ Dest.CheckSum = Src.CheckSum;
+ Dest.Subsystem = Src.Subsystem;
+ Dest.DLLCharacteristics = Src.DLLCharacteristics;
+ Dest.SizeOfStackReserve = Src.SizeOfStackReserve;
+ Dest.SizeOfStackCommit = Src.SizeOfStackCommit;
+ Dest.SizeOfHeapReserve = Src.SizeOfHeapReserve;
+ Dest.SizeOfHeapCommit = Src.SizeOfHeapCommit;
+ Dest.LoaderFlags = Src.LoaderFlags;
+ Dest.NumberOfRvaAndSize = Src.NumberOfRvaAndSize;
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/contrib/llvm/tools/llvm-objcopy/COFF/Reader.cpp
new file mode 100644
index 000000000000..a01768392d7d
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Reader.cpp
@@ -0,0 +1,171 @@
+//===- Reader.cpp ---------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Reader.h"
+#include "Object.h"
+#include "llvm-objcopy.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+using namespace object;
+
+Error COFFReader::readExecutableHeaders(Object &Obj) const {
+ const dos_header *DH = COFFObj.getDOSHeader();
+ Obj.Is64 = COFFObj.is64();
+ if (!DH)
+ return Error::success();
+
+ Obj.IsPE = true;
+ Obj.DosHeader = *DH;
+ if (DH->AddressOfNewExeHeader > sizeof(*DH))
+ Obj.DosStub = ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&DH[1]),
+ DH->AddressOfNewExeHeader - sizeof(*DH));
+
+ if (COFFObj.is64()) {
+ const pe32plus_header *PE32Plus = nullptr;
+ if (auto EC = COFFObj.getPE32PlusHeader(PE32Plus))
+ return errorCodeToError(EC);
+ Obj.PeHeader = *PE32Plus;
+ } else {
+ const pe32_header *PE32 = nullptr;
+ if (auto EC = COFFObj.getPE32Header(PE32))
+ return errorCodeToError(EC);
+ copyPeHeader(Obj.PeHeader, *PE32);
+ // The pe32plus_header (stored in Object) lacks the BaseOfData field.
+ Obj.BaseOfData = PE32->BaseOfData;
+ }
+
+ for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) {
+ const data_directory *Dir;
+ if (auto EC = COFFObj.getDataDirectory(I, Dir))
+ return errorCodeToError(EC);
+ Obj.DataDirectories.emplace_back(*Dir);
+ }
+ return Error::success();
+}
+
+Error COFFReader::readSections(Object &Obj) const {
+ // Section indexing starts from 1.
+ for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) {
+ const coff_section *Sec;
+ if (auto EC = COFFObj.getSection(I, Sec))
+ return errorCodeToError(EC);
+ Obj.Sections.push_back(Section());
+ Section &S = Obj.Sections.back();
+ S.Header = *Sec;
+ if (auto EC = COFFObj.getSectionContents(Sec, S.Contents))
+ return errorCodeToError(EC);
+ ArrayRef<coff_relocation> Relocs = COFFObj.getRelocations(Sec);
+ for (const coff_relocation &R : Relocs)
+ S.Relocs.push_back(R);
+ if (auto EC = COFFObj.getSectionName(Sec, S.Name))
+ return errorCodeToError(EC);
+ if (Sec->hasExtendedRelocations())
+ return make_error<StringError>("Extended relocations not supported yet",
+ object_error::parse_failed);
+ }
+ return Error::success();
+}
+
+Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
+ std::vector<Symbol> Symbols;
+ Symbols.reserve(COFFObj.getRawNumberOfSymbols());
+ for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) {
+ Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I);
+ if (!SymOrErr)
+ return SymOrErr.takeError();
+ COFFSymbolRef SymRef = *SymOrErr;
+
+ Symbols.push_back(Symbol());
+ Symbol &Sym = Symbols.back();
+ // Copy symbols from the original form into an intermediate coff_symbol32.
+ if (IsBigObj)
+ copySymbol(Sym.Sym,
+ *reinterpret_cast<const coff_symbol32 *>(SymRef.getRawPtr()));
+ else
+ copySymbol(Sym.Sym,
+ *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
+ if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name))
+ return errorCodeToError(EC);
+ Sym.AuxData = COFFObj.getSymbolAuxData(SymRef);
+ assert((Sym.AuxData.size() %
+ (IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0);
+ I += 1 + SymRef.getNumberOfAuxSymbols();
+ }
+ Obj.addSymbols(Symbols);
+ return Error::success();
+}
+
+Error COFFReader::setRelocTargets(Object &Obj) const {
+ std::vector<const Symbol *> RawSymbolTable;
+ for (const Symbol &Sym : Obj.getSymbols()) {
+ RawSymbolTable.push_back(&Sym);
+ for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++)
+ RawSymbolTable.push_back(nullptr);
+ }
+ for (Section &Sec : Obj.Sections) {
+ for (Relocation &R : Sec.Relocs) {
+ if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size())
+ return make_error<StringError>("SymbolTableIndex out of range",
+ object_error::parse_failed);
+ const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex];
+ if (Sym == nullptr)
+ return make_error<StringError>("Invalid SymbolTableIndex",
+ object_error::parse_failed);
+ R.Target = Sym->UniqueId;
+ R.TargetName = Sym->Name;
+ }
+ }
+ return Error::success();
+}
+
+Expected<std::unique_ptr<Object>> COFFReader::create() const {
+ auto Obj = llvm::make_unique<Object>();
+
+ const coff_file_header *CFH = nullptr;
+ const coff_bigobj_file_header *CBFH = nullptr;
+ COFFObj.getCOFFHeader(CFH);
+ COFFObj.getCOFFBigObjHeader(CBFH);
+ bool IsBigObj = false;
+ if (CFH) {
+ Obj->CoffFileHeader = *CFH;
+ } else {
+ if (!CBFH)
+ return make_error<StringError>("No COFF file header returned",
+ object_error::parse_failed);
+ // Only copying the few fields from the bigobj header that we need
+ // and won't recreate in the end.
+ Obj->CoffFileHeader.Machine = CBFH->Machine;
+ Obj->CoffFileHeader.TimeDateStamp = CBFH->TimeDateStamp;
+ IsBigObj = true;
+ }
+
+ if (Error E = readExecutableHeaders(*Obj))
+ return std::move(E);
+ if (Error E = readSections(*Obj))
+ return std::move(E);
+ if (Error E = readSymbols(*Obj, IsBigObj))
+ return std::move(E);
+ if (Error E = setRelocTargets(*Obj))
+ return std::move(E);
+
+ return std::move(Obj);
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Reader.h b/contrib/llvm/tools/llvm-objcopy/COFF/Reader.h
new file mode 100644
index 000000000000..ca7057d08c9f
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Reader.h
@@ -0,0 +1,43 @@
+//===- Reader.h -------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_COFF_READER_H
+#define LLVM_TOOLS_OBJCOPY_COFF_READER_H
+
+#include "Buffer.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+struct Object;
+
+using object::COFFObjectFile;
+
+class COFFReader {
+ const COFFObjectFile &COFFObj;
+
+ Error readExecutableHeaders(Object &Obj) const;
+ Error readSections(Object &Obj) const;
+ Error readSymbols(Object &Obj, bool IsBigObj) const;
+ Error setRelocTargets(Object &Obj) const;
+
+public:
+ explicit COFFReader(const COFFObjectFile &O) : COFFObj(O) {}
+ Expected<std::unique_ptr<Object>> create() const;
+};
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_COFF_READER_H
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/contrib/llvm/tools/llvm-objcopy/COFF/Writer.cpp
new file mode 100644
index 000000000000..385d43b1bae5
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -0,0 +1,337 @@
+//===- Writer.cpp ---------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Writer.h"
+#include "Object.h"
+#include "llvm-objcopy.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+using namespace object;
+using namespace COFF;
+
+Error COFFWriter::finalizeRelocTargets() {
+ for (Section &Sec : Obj.Sections) {
+ for (Relocation &R : Sec.Relocs) {
+ const Symbol *Sym = Obj.findSymbol(R.Target);
+ if (Sym == nullptr)
+ return make_error<StringError>("Relocation target " + R.TargetName +
+ " (" + Twine(R.Target) +
+ ") not found",
+ object_error::invalid_symbol_index);
+ R.Reloc.SymbolTableIndex = Sym->RawIndex;
+ }
+ }
+ return Error::success();
+}
+
+void COFFWriter::layoutSections() {
+ for (auto &S : Obj.Sections) {
+ if (S.Header.SizeOfRawData > 0)
+ S.Header.PointerToRawData = FileSize;
+ FileSize += S.Header.SizeOfRawData; // For executables, this is already
+ // aligned to FileAlignment.
+ S.Header.NumberOfRelocations = S.Relocs.size();
+ S.Header.PointerToRelocations =
+ S.Header.NumberOfRelocations > 0 ? FileSize : 0;
+ FileSize += S.Relocs.size() * sizeof(coff_relocation);
+ FileSize = alignTo(FileSize, FileAlignment);
+
+ if (S.Header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
+ SizeOfInitializedData += S.Header.SizeOfRawData;
+ }
+}
+
+size_t COFFWriter::finalizeStringTable() {
+ for (auto &S : Obj.Sections)
+ if (S.Name.size() > COFF::NameSize)
+ StrTabBuilder.add(S.Name);
+
+ for (const auto &S : Obj.getSymbols())
+ if (S.Name.size() > COFF::NameSize)
+ StrTabBuilder.add(S.Name);
+
+ StrTabBuilder.finalize();
+
+ for (auto &S : Obj.Sections) {
+ if (S.Name.size() > COFF::NameSize) {
+ snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d",
+ (int)StrTabBuilder.getOffset(S.Name));
+ } else {
+ strncpy(S.Header.Name, S.Name.data(), COFF::NameSize);
+ }
+ }
+ for (auto &S : Obj.getMutableSymbols()) {
+ if (S.Name.size() > COFF::NameSize) {
+ S.Sym.Name.Offset.Zeroes = 0;
+ S.Sym.Name.Offset.Offset = StrTabBuilder.getOffset(S.Name);
+ } else {
+ strncpy(S.Sym.Name.ShortName, S.Name.data(), COFF::NameSize);
+ }
+ }
+ return StrTabBuilder.getSize();
+}
+
+template <class SymbolTy>
+std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() {
+ size_t SymTabSize = Obj.getSymbols().size() * sizeof(SymbolTy);
+ for (const auto &S : Obj.getSymbols())
+ SymTabSize += S.AuxData.size();
+ return std::make_pair(SymTabSize, sizeof(SymbolTy));
+}
+
+Error COFFWriter::finalize(bool IsBigObj) {
+ if (Error E = finalizeRelocTargets())
+ return E;
+
+ size_t SizeOfHeaders = 0;
+ FileAlignment = 1;
+ size_t PeHeaderSize = 0;
+ if (Obj.IsPE) {
+ Obj.DosHeader.AddressOfNewExeHeader =
+ sizeof(Obj.DosHeader) + Obj.DosStub.size();
+ SizeOfHeaders += Obj.DosHeader.AddressOfNewExeHeader + sizeof(PEMagic);
+
+ FileAlignment = Obj.PeHeader.FileAlignment;
+ Obj.PeHeader.NumberOfRvaAndSize = Obj.DataDirectories.size();
+
+ PeHeaderSize = Obj.Is64 ? sizeof(pe32plus_header) : sizeof(pe32_header);
+ SizeOfHeaders +=
+ PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
+ }
+ Obj.CoffFileHeader.NumberOfSections = Obj.Sections.size();
+ SizeOfHeaders +=
+ IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header);
+ SizeOfHeaders += sizeof(coff_section) * Obj.Sections.size();
+ SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment);
+
+ Obj.CoffFileHeader.SizeOfOptionalHeader =
+ PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
+
+ FileSize = SizeOfHeaders;
+ SizeOfInitializedData = 0;
+
+ layoutSections();
+
+ if (Obj.IsPE) {
+ Obj.PeHeader.SizeOfHeaders = SizeOfHeaders;
+ Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData;
+
+ if (!Obj.Sections.empty()) {
+ const Section &S = Obj.Sections.back();
+ Obj.PeHeader.SizeOfImage =
+ alignTo(S.Header.VirtualAddress + S.Header.VirtualSize,
+ Obj.PeHeader.SectionAlignment);
+ }
+
+ // If the PE header had a checksum, clear it, since it isn't valid
+ // any longer. (We don't calculate a new one.)
+ Obj.PeHeader.CheckSum = 0;
+ }
+
+ size_t StrTabSize = finalizeStringTable();
+ size_t SymTabSize, SymbolSize;
+ std::tie(SymTabSize, SymbolSize) = IsBigObj
+ ? finalizeSymbolTable<coff_symbol32>()
+ : finalizeSymbolTable<coff_symbol16>();
+
+ size_t PointerToSymbolTable = FileSize;
+ // StrTabSize <= 4 is the size of an empty string table, only consisting
+ // of the length field.
+ if (SymTabSize == 0 && StrTabSize <= 4 && Obj.IsPE) {
+ // For executables, don't point to the symbol table and skip writing
+ // the length field, if both the symbol and string tables are empty.
+ PointerToSymbolTable = 0;
+ StrTabSize = 0;
+ }
+
+ size_t NumRawSymbols = SymTabSize / SymbolSize;
+ Obj.CoffFileHeader.PointerToSymbolTable = PointerToSymbolTable;
+ Obj.CoffFileHeader.NumberOfSymbols = NumRawSymbols;
+ FileSize += SymTabSize + StrTabSize;
+ FileSize = alignTo(FileSize, FileAlignment);
+
+ return Error::success();
+}
+
+void COFFWriter::writeHeaders(bool IsBigObj) {
+ uint8_t *Ptr = Buf.getBufferStart();
+ if (Obj.IsPE) {
+ memcpy(Ptr, &Obj.DosHeader, sizeof(Obj.DosHeader));
+ Ptr += sizeof(Obj.DosHeader);
+ memcpy(Ptr, Obj.DosStub.data(), Obj.DosStub.size());
+ Ptr += Obj.DosStub.size();
+ memcpy(Ptr, PEMagic, sizeof(PEMagic));
+ Ptr += sizeof(PEMagic);
+ }
+ if (!IsBigObj) {
+ memcpy(Ptr, &Obj.CoffFileHeader, sizeof(Obj.CoffFileHeader));
+ Ptr += sizeof(Obj.CoffFileHeader);
+ } else {
+ // Generate a coff_bigobj_file_header, filling it in with the values
+ // from Obj.CoffFileHeader. All extra fields that don't exist in
+ // coff_file_header can be set to hardcoded values.
+ coff_bigobj_file_header BigObjHeader;
+ BigObjHeader.Sig1 = IMAGE_FILE_MACHINE_UNKNOWN;
+ BigObjHeader.Sig2 = 0xffff;
+ BigObjHeader.Version = BigObjHeader::MinBigObjectVersion;
+ BigObjHeader.Machine = Obj.CoffFileHeader.Machine;
+ BigObjHeader.TimeDateStamp = Obj.CoffFileHeader.TimeDateStamp;
+ memcpy(BigObjHeader.UUID, BigObjMagic, sizeof(BigObjMagic));
+ BigObjHeader.unused1 = 0;
+ BigObjHeader.unused2 = 0;
+ BigObjHeader.unused3 = 0;
+ BigObjHeader.unused4 = 0;
+ // The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus
+ // get the original one instead.
+ BigObjHeader.NumberOfSections = Obj.Sections.size();
+ BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable;
+ BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols;
+
+ memcpy(Ptr, &BigObjHeader, sizeof(BigObjHeader));
+ Ptr += sizeof(BigObjHeader);
+ }
+ if (Obj.IsPE) {
+ if (Obj.Is64) {
+ memcpy(Ptr, &Obj.PeHeader, sizeof(Obj.PeHeader));
+ Ptr += sizeof(Obj.PeHeader);
+ } else {
+ pe32_header PeHeader;
+ copyPeHeader(PeHeader, Obj.PeHeader);
+ // The pe32plus_header (stored in Object) lacks the BaseOfData field.
+ PeHeader.BaseOfData = Obj.BaseOfData;
+
+ memcpy(Ptr, &PeHeader, sizeof(PeHeader));
+ Ptr += sizeof(PeHeader);
+ }
+ for (const auto &DD : Obj.DataDirectories) {
+ memcpy(Ptr, &DD, sizeof(DD));
+ Ptr += sizeof(DD);
+ }
+ }
+ for (const auto &S : Obj.Sections) {
+ memcpy(Ptr, &S.Header, sizeof(S.Header));
+ Ptr += sizeof(S.Header);
+ }
+}
+
+void COFFWriter::writeSections() {
+ for (const auto &S : Obj.Sections) {
+ uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData;
+ std::copy(S.Contents.begin(), S.Contents.end(), Ptr);
+
+ // For executable sections, pad the remainder of the raw data size with
+ // 0xcc, which is int3 on x86.
+ if ((S.Header.Characteristics & IMAGE_SCN_CNT_CODE) &&
+ S.Header.SizeOfRawData > S.Contents.size())
+ memset(Ptr + S.Contents.size(), 0xcc,
+ S.Header.SizeOfRawData - S.Contents.size());
+
+ Ptr += S.Header.SizeOfRawData;
+ for (const auto &R : S.Relocs) {
+ memcpy(Ptr, &R.Reloc, sizeof(R.Reloc));
+ Ptr += sizeof(R.Reloc);
+ }
+ }
+}
+
+template <class SymbolTy> void COFFWriter::writeSymbolStringTables() {
+ uint8_t *Ptr = Buf.getBufferStart() + Obj.CoffFileHeader.PointerToSymbolTable;
+ for (const auto &S : Obj.getSymbols()) {
+ // Convert symbols back to the right size, from coff_symbol32.
+ copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr),
+ S.Sym);
+ Ptr += sizeof(SymbolTy);
+ std::copy(S.AuxData.begin(), S.AuxData.end(), Ptr);
+ Ptr += S.AuxData.size();
+ }
+ if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) {
+ // Always write a string table in object files, even an empty one.
+ StrTabBuilder.write(Ptr);
+ Ptr += StrTabBuilder.getSize();
+ }
+}
+
+Error COFFWriter::write(bool IsBigObj) {
+ if (Error E = finalize(IsBigObj))
+ return E;
+
+ Buf.allocate(FileSize);
+
+ writeHeaders(IsBigObj);
+ writeSections();
+ if (IsBigObj)
+ writeSymbolStringTables<coff_symbol32>();
+ else
+ writeSymbolStringTables<coff_symbol16>();
+
+ if (Obj.IsPE)
+ if (Error E = patchDebugDirectory())
+ return E;
+
+ return Buf.commit();
+}
+
+// Locate which sections contain the debug directories, iterate over all
+// the debug_directory structs in there, and set the PointerToRawData field
+// in all of them, according to their new physical location in the file.
+Error COFFWriter::patchDebugDirectory() {
+ if (Obj.DataDirectories.size() < DEBUG_DIRECTORY)
+ return Error::success();
+ const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY];
+ if (Dir->Size <= 0)
+ return Error::success();
+ for (const auto &S : Obj.Sections) {
+ if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress &&
+ Dir->RelativeVirtualAddress <
+ S.Header.VirtualAddress + S.Header.SizeOfRawData) {
+ if (Dir->RelativeVirtualAddress + Dir->Size >
+ S.Header.VirtualAddress + S.Header.SizeOfRawData)
+ return make_error<StringError>(
+ "Debug directory extends past end of section",
+ object_error::parse_failed);
+
+ size_t Offset = Dir->RelativeVirtualAddress - S.Header.VirtualAddress;
+ uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData + Offset;
+ uint8_t *End = Ptr + Dir->Size;
+ while (Ptr < End) {
+ debug_directory *Debug = reinterpret_cast<debug_directory *>(Ptr);
+ Debug->PointerToRawData =
+ S.Header.PointerToRawData + Offset + sizeof(debug_directory);
+ Ptr += sizeof(debug_directory) + Debug->SizeOfData;
+ Offset += sizeof(debug_directory) + Debug->SizeOfData;
+ }
+ // Debug directory found and patched, all done.
+ return Error::success();
+ }
+ }
+ return make_error<StringError>("Debug directory not found",
+ object_error::parse_failed);
+}
+
+Error COFFWriter::write() {
+ bool IsBigObj = Obj.Sections.size() > MaxNumberOfSections16;
+ if (IsBigObj && Obj.IsPE)
+ return make_error<StringError>("Too many sections for executable",
+ object_error::parse_failed);
+ return write(IsBigObj);
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Writer.h b/contrib/llvm/tools/llvm-objcopy/COFF/Writer.h
new file mode 100644
index 000000000000..ab66e0cc1134
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Writer.h
@@ -0,0 +1,61 @@
+//===- Writer.h -------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
+#define LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
+
+#include "Buffer.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Error.h"
+#include <cstddef>
+#include <utility>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+struct Object;
+
+class COFFWriter {
+ Object &Obj;
+ Buffer &Buf;
+
+ size_t FileSize;
+ size_t FileAlignment;
+ size_t SizeOfInitializedData;
+ StringTableBuilder StrTabBuilder;
+
+ Error finalizeRelocTargets();
+ void layoutSections();
+ size_t finalizeStringTable();
+ template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
+
+ Error finalize(bool IsBigObj);
+
+ void writeHeaders(bool IsBigObj);
+ void writeSections();
+ template <class SymbolTy> void writeSymbolStringTables();
+
+ Error write(bool IsBigObj);
+
+ Error patchDebugDirectory();
+
+public:
+ virtual ~COFFWriter() {}
+ Error write();
+
+ COFFWriter(Object &Obj, Buffer &Buf)
+ : Obj(Obj), Buf(Buf), StrTabBuilder(StringTableBuilder::WinCOFF) {}
+};
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
diff --git a/contrib/llvm/tools/llvm-objcopy/CopyConfig.cpp b/contrib/llvm/tools/llvm-objcopy/CopyConfig.cpp
new file mode 100644
index 000000000000..3737f571ae61
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/CopyConfig.cpp
@@ -0,0 +1,474 @@
+//===- CopyConfig.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CopyConfig.h"
+#include "llvm-objcopy.h"
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+namespace objcopy {
+
+namespace {
+enum ObjcopyID {
+ OBJCOPY_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ OBJCOPY_##ID,
+#include "ObjcopyOpts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const OBJCOPY_##NAME[] = VALUE;
+#include "ObjcopyOpts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info ObjcopyInfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ {OBJCOPY_##PREFIX, \
+ NAME, \
+ HELPTEXT, \
+ METAVAR, \
+ OBJCOPY_##ID, \
+ opt::Option::KIND##Class, \
+ PARAM, \
+ FLAGS, \
+ OBJCOPY_##GROUP, \
+ OBJCOPY_##ALIAS, \
+ ALIASARGS, \
+ VALUES},
+#include "ObjcopyOpts.inc"
+#undef OPTION
+};
+
+class ObjcopyOptTable : public opt::OptTable {
+public:
+ ObjcopyOptTable() : OptTable(ObjcopyInfoTable) {}
+};
+
+enum StripID {
+ STRIP_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ STRIP_##ID,
+#include "StripOpts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const STRIP_##NAME[] = VALUE;
+#include "StripOpts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info StripInfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ {STRIP_##PREFIX, NAME, HELPTEXT, \
+ METAVAR, STRIP_##ID, opt::Option::KIND##Class, \
+ PARAM, FLAGS, STRIP_##GROUP, \
+ STRIP_##ALIAS, ALIASARGS, VALUES},
+#include "StripOpts.inc"
+#undef OPTION
+};
+
+class StripOptTable : public opt::OptTable {
+public:
+ StripOptTable() : OptTable(StripInfoTable) {}
+};
+
+enum SectionFlag {
+ SecNone = 0,
+ SecAlloc = 1 << 0,
+ SecLoad = 1 << 1,
+ SecNoload = 1 << 2,
+ SecReadonly = 1 << 3,
+ SecDebug = 1 << 4,
+ SecCode = 1 << 5,
+ SecData = 1 << 6,
+ SecRom = 1 << 7,
+ SecMerge = 1 << 8,
+ SecStrings = 1 << 9,
+ SecContents = 1 << 10,
+ SecShare = 1 << 11,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ SecShare)
+};
+
+} // namespace
+
+static SectionFlag parseSectionRenameFlag(StringRef SectionName) {
+ return llvm::StringSwitch<SectionFlag>(SectionName)
+ .Case("alloc", SectionFlag::SecAlloc)
+ .Case("load", SectionFlag::SecLoad)
+ .Case("noload", SectionFlag::SecNoload)
+ .Case("readonly", SectionFlag::SecReadonly)
+ .Case("debug", SectionFlag::SecDebug)
+ .Case("code", SectionFlag::SecCode)
+ .Case("data", SectionFlag::SecData)
+ .Case("rom", SectionFlag::SecRom)
+ .Case("merge", SectionFlag::SecMerge)
+ .Case("strings", SectionFlag::SecStrings)
+ .Case("contents", SectionFlag::SecContents)
+ .Case("share", SectionFlag::SecShare)
+ .Default(SectionFlag::SecNone);
+}
+
+static SectionRename parseRenameSectionValue(StringRef FlagValue) {
+ if (!FlagValue.contains('='))
+ error("Bad format for --rename-section: missing '='");
+
+ // Initial split: ".foo" = ".bar,f1,f2,..."
+ auto Old2New = FlagValue.split('=');
+ SectionRename SR;
+ SR.OriginalName = Old2New.first;
+
+ // Flags split: ".bar" "f1" "f2" ...
+ SmallVector<StringRef, 6> NameAndFlags;
+ Old2New.second.split(NameAndFlags, ',');
+ SR.NewName = NameAndFlags[0];
+
+ if (NameAndFlags.size() > 1) {
+ SectionFlag Flags = SectionFlag::SecNone;
+ for (size_t I = 1, Size = NameAndFlags.size(); I < Size; ++I) {
+ SectionFlag Flag = parseSectionRenameFlag(NameAndFlags[I]);
+ if (Flag == SectionFlag::SecNone)
+ error("Unrecognized section flag '" + NameAndFlags[I] +
+ "'. Flags supported for GNU compatibility: alloc, load, noload, "
+ "readonly, debug, code, data, rom, share, contents, merge, "
+ "strings.");
+ Flags |= Flag;
+ }
+
+ SR.NewFlags = 0;
+ if (Flags & SectionFlag::SecAlloc)
+ *SR.NewFlags |= ELF::SHF_ALLOC;
+ if (!(Flags & SectionFlag::SecReadonly))
+ *SR.NewFlags |= ELF::SHF_WRITE;
+ if (Flags & SectionFlag::SecCode)
+ *SR.NewFlags |= ELF::SHF_EXECINSTR;
+ if (Flags & SectionFlag::SecMerge)
+ *SR.NewFlags |= ELF::SHF_MERGE;
+ if (Flags & SectionFlag::SecStrings)
+ *SR.NewFlags |= ELF::SHF_STRINGS;
+ }
+
+ return SR;
+}
+
+static const StringMap<MachineInfo> ArchMap{
+ // Name, {EMachine, 64bit, LittleEndian}
+ {"aarch64", {ELF::EM_AARCH64, true, true}},
+ {"arm", {ELF::EM_ARM, false, true}},
+ {"i386", {ELF::EM_386, false, true}},
+ {"i386:x86-64", {ELF::EM_X86_64, true, true}},
+ {"powerpc:common64", {ELF::EM_PPC64, true, true}},
+ {"sparc", {ELF::EM_SPARC, false, true}},
+ {"x86-64", {ELF::EM_X86_64, true, true}},
+};
+
+static const MachineInfo &getMachineInfo(StringRef Arch) {
+ auto Iter = ArchMap.find(Arch);
+ if (Iter == std::end(ArchMap))
+ error("Invalid architecture: '" + Arch + "'");
+ return Iter->getValue();
+}
+
+static const StringMap<MachineInfo> OutputFormatMap{
+ // Name, {EMachine, 64bit, LittleEndian}
+ {"elf32-i386", {ELF::EM_386, false, true}},
+ {"elf32-powerpcle", {ELF::EM_PPC, false, true}},
+ {"elf32-x86-64", {ELF::EM_X86_64, false, true}},
+ {"elf64-powerpcle", {ELF::EM_PPC64, true, true}},
+ {"elf64-x86-64", {ELF::EM_X86_64, true, true}},
+};
+
+static const MachineInfo &getOutputFormatMachineInfo(StringRef Format) {
+ auto Iter = OutputFormatMap.find(Format);
+ if (Iter == std::end(OutputFormatMap))
+ error("Invalid output format: '" + Format + "'");
+ return Iter->getValue();
+}
+
+static void addGlobalSymbolsFromFile(std::vector<std::string> &Symbols,
+ StringRef Filename) {
+ SmallVector<StringRef, 16> Lines;
+ auto BufOrErr = MemoryBuffer::getFile(Filename);
+ if (!BufOrErr)
+ reportError(Filename, BufOrErr.getError());
+
+ BufOrErr.get()->getBuffer().split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ // Ignore everything after '#', trim whitespace, and only add the symbol if
+ // it's not empty.
+ auto TrimmedLine = Line.split('#').first.trim();
+ if (!TrimmedLine.empty())
+ Symbols.push_back(TrimmedLine.str());
+ }
+}
+
+// ParseObjcopyOptions returns the config and sets the input arguments. If a
+// help flag is set then ParseObjcopyOptions will print the help messege and
+// exit.
+DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
+ ObjcopyOptTable T;
+ unsigned MissingArgumentIndex, MissingArgumentCount;
+ llvm::opt::InputArgList InputArgs =
+ T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
+
+ if (InputArgs.size() == 0) {
+ T.PrintHelp(errs(), "llvm-objcopy input [output]", "objcopy tool");
+ exit(1);
+ }
+
+ if (InputArgs.hasArg(OBJCOPY_help)) {
+ T.PrintHelp(outs(), "llvm-objcopy input [output]", "objcopy tool");
+ exit(0);
+ }
+
+ if (InputArgs.hasArg(OBJCOPY_version)) {
+ outs() << "llvm-objcopy, compatible with GNU objcopy\n";
+ cl::PrintVersionMessage();
+ exit(0);
+ }
+
+ SmallVector<const char *, 2> Positional;
+
+ for (auto Arg : InputArgs.filtered(OBJCOPY_UNKNOWN))
+ error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+
+ for (auto Arg : InputArgs.filtered(OBJCOPY_INPUT))
+ Positional.push_back(Arg->getValue());
+
+ if (Positional.empty())
+ error("No input file specified");
+
+ if (Positional.size() > 2)
+ error("Too many positional arguments");
+
+ CopyConfig Config;
+ Config.InputFilename = Positional[0];
+ Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1];
+ if (InputArgs.hasArg(OBJCOPY_target) &&
+ (InputArgs.hasArg(OBJCOPY_input_target) ||
+ InputArgs.hasArg(OBJCOPY_output_target)))
+ error("--target cannot be used with --input-target or --output-target");
+
+ if (InputArgs.hasArg(OBJCOPY_target)) {
+ Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
+ Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
+ } else {
+ Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
+ Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
+ }
+ if (Config.InputFormat == "binary") {
+ auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
+ if (BinaryArch.empty())
+ error("Specified binary input without specifiying an architecture");
+ Config.BinaryArch = getMachineInfo(BinaryArch);
+ }
+ if (!Config.OutputFormat.empty() && Config.OutputFormat != "binary")
+ Config.OutputArch = getOutputFormatMachineInfo(Config.OutputFormat);
+
+ if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections,
+ OBJCOPY_compress_debug_sections_eq)) {
+ Config.CompressionType = DebugCompressionType::Z;
+
+ if (Arg->getOption().getID() == OBJCOPY_compress_debug_sections_eq) {
+ Config.CompressionType =
+ StringSwitch<DebugCompressionType>(
+ InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq))
+ .Case("zlib-gnu", DebugCompressionType::GNU)
+ .Case("zlib", DebugCompressionType::Z)
+ .Default(DebugCompressionType::None);
+ if (Config.CompressionType == DebugCompressionType::None)
+ error("Invalid or unsupported --compress-debug-sections format: " +
+ InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq));
+ if (!zlib::isAvailable())
+ error("LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress.");
+ }
+ }
+
+ Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink);
+ Config.BuildIdLinkDir = InputArgs.getLastArgValue(OBJCOPY_build_id_link_dir);
+ if (InputArgs.hasArg(OBJCOPY_build_id_link_input))
+ Config.BuildIdLinkInput =
+ InputArgs.getLastArgValue(OBJCOPY_build_id_link_input);
+ if (InputArgs.hasArg(OBJCOPY_build_id_link_output))
+ Config.BuildIdLinkOutput =
+ InputArgs.getLastArgValue(OBJCOPY_build_id_link_output);
+ Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo);
+ Config.SymbolsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_symbols);
+
+ for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) {
+ if (!StringRef(Arg->getValue()).contains('='))
+ error("Bad format for --redefine-sym");
+ auto Old2New = StringRef(Arg->getValue()).split('=');
+ if (!Config.SymbolsToRename.insert(Old2New).second)
+ error("Multiple redefinition of symbol " + Old2New.first);
+ }
+
+ for (auto Arg : InputArgs.filtered(OBJCOPY_rename_section)) {
+ SectionRename SR = parseRenameSectionValue(StringRef(Arg->getValue()));
+ if (!Config.SectionsToRename.try_emplace(SR.OriginalName, SR).second)
+ error("Multiple renames of section " + SR.OriginalName);
+ }
+
+ for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section))
+ Config.ToRemove.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section))
+ Config.KeepSection.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_only_section))
+ Config.OnlySection.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_add_section))
+ Config.AddSection.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_dump_section))
+ Config.DumpSection.push_back(Arg->getValue());
+ Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all);
+ Config.StripAllGNU = InputArgs.hasArg(OBJCOPY_strip_all_gnu);
+ Config.StripDebug = InputArgs.hasArg(OBJCOPY_strip_debug);
+ Config.StripDWO = InputArgs.hasArg(OBJCOPY_strip_dwo);
+ Config.StripSections = InputArgs.hasArg(OBJCOPY_strip_sections);
+ Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc);
+ Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded);
+ Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
+ Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
+ Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
+ Config.DiscardAll = InputArgs.hasArg(OBJCOPY_discard_all);
+ Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
+ Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
+ Config.DecompressDebugSections =
+ InputArgs.hasArg(OBJCOPY_decompress_debug_sections);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
+ Config.SymbolsToLocalize.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol))
+ Config.SymbolsToKeepGlobal.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols))
+ addGlobalSymbolsFromFile(Config.SymbolsToKeepGlobal, Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol))
+ Config.SymbolsToGlobalize.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol))
+ Config.SymbolsToWeaken.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol))
+ Config.SymbolsToRemove.push_back(Arg->getValue());
+ for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol))
+ Config.SymbolsToKeep.push_back(Arg->getValue());
+
+ Config.DeterministicArchives = InputArgs.hasFlag(
+ OBJCOPY_enable_deterministic_archives,
+ OBJCOPY_disable_deterministic_archives, /*default=*/true);
+
+ Config.PreserveDates = InputArgs.hasArg(OBJCOPY_preserve_dates);
+
+ if (Config.DecompressDebugSections &&
+ Config.CompressionType != DebugCompressionType::None) {
+ error("Cannot specify --compress-debug-sections at the same time as "
+ "--decompress-debug-sections at the same time");
+ }
+
+ if (Config.DecompressDebugSections && !zlib::isAvailable())
+ error("LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress.");
+
+ DriverConfig DC;
+ DC.CopyConfigs.push_back(std::move(Config));
+ return DC;
+}
+
+// ParseStripOptions returns the config and sets the input arguments. If a
+// help flag is set then ParseStripOptions will print the help messege and
+// exit.
+DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr) {
+ StripOptTable T;
+ unsigned MissingArgumentIndex, MissingArgumentCount;
+ llvm::opt::InputArgList InputArgs =
+ T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
+
+ if (InputArgs.size() == 0) {
+ T.PrintHelp(errs(), "llvm-strip [options] file...", "strip tool");
+ exit(1);
+ }
+
+ if (InputArgs.hasArg(STRIP_help)) {
+ T.PrintHelp(outs(), "llvm-strip [options] file...", "strip tool");
+ exit(0);
+ }
+
+ if (InputArgs.hasArg(STRIP_version)) {
+ outs() << "llvm-strip, compatible with GNU strip\n";
+ cl::PrintVersionMessage();
+ exit(0);
+ }
+
+ SmallVector<const char *, 2> Positional;
+ for (auto Arg : InputArgs.filtered(STRIP_UNKNOWN))
+ error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+ for (auto Arg : InputArgs.filtered(STRIP_INPUT))
+ Positional.push_back(Arg->getValue());
+
+ if (Positional.empty())
+ error("No input file specified");
+
+ if (Positional.size() > 1 && InputArgs.hasArg(STRIP_output))
+ error("Multiple input files cannot be used in combination with -o");
+
+ CopyConfig Config;
+ Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
+
+ Config.DiscardAll = InputArgs.hasArg(STRIP_discard_all);
+ Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded);
+ Config.StripAll = InputArgs.hasArg(STRIP_strip_all);
+ Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu);
+
+ if (!Config.StripDebug && !Config.StripUnneeded && !Config.DiscardAll &&
+ !Config.StripAllGNU)
+ Config.StripAll = true;
+
+ for (auto Arg : InputArgs.filtered(STRIP_keep_section))
+ Config.KeepSection.push_back(Arg->getValue());
+
+ for (auto Arg : InputArgs.filtered(STRIP_remove_section))
+ Config.ToRemove.push_back(Arg->getValue());
+
+ for (auto Arg : InputArgs.filtered(STRIP_keep_symbol))
+ Config.SymbolsToKeep.push_back(Arg->getValue());
+
+ Config.DeterministicArchives =
+ InputArgs.hasFlag(STRIP_enable_deterministic_archives,
+ STRIP_disable_deterministic_archives, /*default=*/true);
+
+ Config.PreserveDates = InputArgs.hasArg(STRIP_preserve_dates);
+
+ DriverConfig DC;
+ if (Positional.size() == 1) {
+ Config.InputFilename = Positional[0];
+ Config.OutputFilename =
+ InputArgs.getLastArgValue(STRIP_output, Positional[0]);
+ DC.CopyConfigs.push_back(std::move(Config));
+ } else {
+ for (const char *Filename : Positional) {
+ Config.InputFilename = Filename;
+ Config.OutputFilename = Filename;
+ DC.CopyConfigs.push_back(Config);
+ }
+ }
+
+ return DC;
+}
+
+} // namespace objcopy
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/CopyConfig.h b/contrib/llvm/tools/llvm-objcopy/CopyConfig.h
new file mode 100644
index 000000000000..71a2423ae1c8
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/CopyConfig.h
@@ -0,0 +1,119 @@
+//===- CopyConfig.h -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H
+#define LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+// Necessary for llvm::DebugCompressionType::None
+#include "llvm/Target/TargetOptions.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+
+// This type keeps track of the machine info for various architectures. This
+// lets us map architecture names to ELF types and the e_machine value of the
+// ELF file.
+struct MachineInfo {
+ uint16_t EMachine;
+ bool Is64Bit;
+ bool IsLittleEndian;
+};
+
+struct SectionRename {
+ StringRef OriginalName;
+ StringRef NewName;
+ Optional<uint64_t> NewFlags;
+};
+
+// Configuration for copying/stripping a single file.
+struct CopyConfig {
+ // Main input/output options
+ StringRef InputFilename;
+ StringRef InputFormat;
+ StringRef OutputFilename;
+ StringRef OutputFormat;
+
+ // Only applicable for --input-format=binary
+ MachineInfo BinaryArch;
+ // Only applicable when --output-format!=binary (e.g. elf64-x86-64).
+ Optional<MachineInfo> OutputArch;
+
+ // Advanced options
+ StringRef AddGnuDebugLink;
+ StringRef BuildIdLinkDir;
+ Optional<StringRef> BuildIdLinkInput;
+ Optional<StringRef> BuildIdLinkOutput;
+ StringRef SplitDWO;
+ StringRef SymbolsPrefix;
+
+ // Repeated options
+ std::vector<StringRef> AddSection;
+ std::vector<StringRef> DumpSection;
+ std::vector<StringRef> KeepSection;
+ std::vector<StringRef> OnlySection;
+ std::vector<StringRef> SymbolsToGlobalize;
+ std::vector<StringRef> SymbolsToKeep;
+ std::vector<StringRef> SymbolsToLocalize;
+ std::vector<StringRef> SymbolsToRemove;
+ std::vector<StringRef> SymbolsToWeaken;
+ std::vector<StringRef> ToRemove;
+ std::vector<std::string> SymbolsToKeepGlobal;
+
+ // Map options
+ StringMap<SectionRename> SectionsToRename;
+ StringMap<StringRef> SymbolsToRename;
+
+ // Boolean options
+ bool DeterministicArchives = true;
+ bool DiscardAll = false;
+ bool ExtractDWO = false;
+ bool KeepFileSymbols = false;
+ bool LocalizeHidden = false;
+ bool OnlyKeepDebug = false;
+ bool PreserveDates = false;
+ bool StripAll = false;
+ bool StripAllGNU = false;
+ bool StripDWO = false;
+ bool StripDebug = false;
+ bool StripNonAlloc = false;
+ bool StripSections = false;
+ bool StripUnneeded = false;
+ bool Weaken = false;
+ bool DecompressDebugSections = false;
+ DebugCompressionType CompressionType = DebugCompressionType::None;
+};
+
+// Configuration for the overall invocation of this tool. When invoked as
+// objcopy, will always contain exactly one CopyConfig. When invoked as strip,
+// will contain one or more CopyConfigs.
+struct DriverConfig {
+ SmallVector<CopyConfig, 1> CopyConfigs;
+};
+
+// ParseObjcopyOptions returns the config and sets the input arguments. If a
+// help flag is set then ParseObjcopyOptions will print the help messege and
+// exit.
+DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr);
+
+// ParseStripOptions returns the config and sets the input arguments. If a
+// help flag is set then ParseStripOptions will print the help messege and
+// exit.
+DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr);
+
+} // namespace objcopy
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
new file mode 100644
index 000000000000..f5ab8e708267
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -0,0 +1,584 @@
+//===- ELFObjcopy.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFObjcopy.h"
+#include "Buffer.h"
+#include "CopyConfig.h"
+#include "Object.h"
+#include "llvm-objcopy.h"
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+
+namespace llvm {
+namespace objcopy {
+namespace elf {
+
+using namespace object;
+using namespace ELF;
+using SectionPred = std::function<bool(const SectionBase &Sec)>;
+
+static bool isDebugSection(const SectionBase &Sec) {
+ return StringRef(Sec.Name).startswith(".debug") ||
+ StringRef(Sec.Name).startswith(".zdebug") || Sec.Name == ".gdb_index";
+}
+
+static bool isDWOSection(const SectionBase &Sec) {
+ return StringRef(Sec.Name).endswith(".dwo");
+}
+
+static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) {
+ // We can't remove the section header string table.
+ if (&Sec == Obj.SectionNames)
+ return false;
+ // Short of keeping the string table we want to keep everything that is a DWO
+ // section and remove everything else.
+ return !isDWOSection(Sec);
+}
+
+static ElfType getOutputElfType(const Binary &Bin) {
+ // Infer output ELF type from the input ELF object
+ if (isa<ELFObjectFile<ELF32LE>>(Bin))
+ return ELFT_ELF32LE;
+ if (isa<ELFObjectFile<ELF64LE>>(Bin))
+ return ELFT_ELF64LE;
+ if (isa<ELFObjectFile<ELF32BE>>(Bin))
+ return ELFT_ELF32BE;
+ if (isa<ELFObjectFile<ELF64BE>>(Bin))
+ return ELFT_ELF64BE;
+ llvm_unreachable("Invalid ELFType");
+}
+
+static ElfType getOutputElfType(const MachineInfo &MI) {
+ // Infer output ELF type from the binary arch specified
+ if (MI.Is64Bit)
+ return MI.IsLittleEndian ? ELFT_ELF64LE : ELFT_ELF64BE;
+ else
+ return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE;
+}
+
+static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
+ Object &Obj, Buffer &Buf,
+ ElfType OutputElfType) {
+ if (Config.OutputFormat == "binary") {
+ return llvm::make_unique<BinaryWriter>(Obj, Buf);
+ }
+ // Depending on the initial ELFT and OutputFormat we need a different Writer.
+ switch (OutputElfType) {
+ case ELFT_ELF32LE:
+ return llvm::make_unique<ELFWriter<ELF32LE>>(Obj, Buf,
+ !Config.StripSections);
+ case ELFT_ELF64LE:
+ return llvm::make_unique<ELFWriter<ELF64LE>>(Obj, Buf,
+ !Config.StripSections);
+ case ELFT_ELF32BE:
+ return llvm::make_unique<ELFWriter<ELF32BE>>(Obj, Buf,
+ !Config.StripSections);
+ case ELFT_ELF64BE:
+ return llvm::make_unique<ELFWriter<ELF64BE>>(Obj, Buf,
+ !Config.StripSections);
+ }
+ llvm_unreachable("Invalid output format");
+}
+
+template <class ELFT>
+static Expected<ArrayRef<uint8_t>>
+findBuildID(const object::ELFFile<ELFT> &In) {
+ for (const auto &Phdr : unwrapOrError(In.program_headers())) {
+ if (Phdr.p_type != PT_NOTE)
+ continue;
+ Error Err = Error::success();
+ for (const auto &Note : In.notes(Phdr, Err))
+ if (Note.getType() == NT_GNU_BUILD_ID && Note.getName() == ELF_NOTE_GNU)
+ return Note.getDesc();
+ if (Err)
+ return std::move(Err);
+ }
+ return createStringError(llvm::errc::invalid_argument,
+ "Could not find build ID.");
+}
+
+static Expected<ArrayRef<uint8_t>>
+findBuildID(const object::ELFObjectFileBase &In) {
+ if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(&In))
+ return findBuildID(*O->getELFFile());
+ else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(&In))
+ return findBuildID(*O->getELFFile());
+ else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(&In))
+ return findBuildID(*O->getELFFile());
+ else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(&In))
+ return findBuildID(*O->getELFFile());
+
+ llvm_unreachable("Bad file format");
+}
+
+static void linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink,
+ StringRef Suffix, ArrayRef<uint8_t> BuildIdBytes) {
+ SmallString<128> Path = Config.BuildIdLinkDir;
+ sys::path::append(Path, llvm::toHex(BuildIdBytes[0], /*LowerCase*/ true));
+ if (auto EC = sys::fs::create_directories(Path))
+ error("cannot create build ID link directory " + Path + ": " +
+ EC.message());
+
+ sys::path::append(Path,
+ llvm::toHex(BuildIdBytes.slice(1), /*LowerCase*/ true));
+ Path += Suffix;
+ if (auto EC = sys::fs::create_hard_link(ToLink, Path)) {
+ // Hard linking failed, try to remove the file first if it exists.
+ if (sys::fs::exists(Path))
+ sys::fs::remove(Path);
+ EC = sys::fs::create_hard_link(ToLink, Path);
+ if (EC)
+ error("cannot link " + ToLink + " to " + Path + ": " + EC.message());
+ }
+}
+
+static void splitDWOToFile(const CopyConfig &Config, const Reader &Reader,
+ StringRef File, ElfType OutputElfType) {
+ auto DWOFile = Reader.create();
+ DWOFile->removeSections(
+ [&](const SectionBase &Sec) { return onlyKeepDWOPred(*DWOFile, Sec); });
+ if (Config.OutputArch)
+ DWOFile->Machine = Config.OutputArch.getValue().EMachine;
+ FileBuffer FB(File);
+ auto Writer = createWriter(Config, *DWOFile, FB, OutputElfType);
+ Writer->finalize();
+ Writer->write();
+}
+
+static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
+ Object &Obj) {
+ for (auto &Sec : Obj.sections()) {
+ if (Sec.Name == SecName) {
+ if (Sec.OriginalData.empty())
+ return make_error<StringError>("Can't dump section \"" + SecName +
+ "\": it has no contents",
+ object_error::parse_failed);
+ Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+ FileOutputBuffer::create(Filename, Sec.OriginalData.size());
+ if (!BufferOrErr)
+ return BufferOrErr.takeError();
+ std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+ std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(),
+ Buf->getBufferStart());
+ if (Error E = Buf->commit())
+ return E;
+ return Error::success();
+ }
+ }
+ return make_error<StringError>("Section not found",
+ object_error::parse_failed);
+}
+
+static bool isCompressed(const SectionBase &Section) {
+ const char *Magic = "ZLIB";
+ return StringRef(Section.Name).startswith(".zdebug") ||
+ (Section.OriginalData.size() > strlen(Magic) &&
+ !strncmp(reinterpret_cast<const char *>(Section.OriginalData.data()),
+ Magic, strlen(Magic))) ||
+ (Section.Flags & ELF::SHF_COMPRESSED);
+}
+
+static bool isCompressable(const SectionBase &Section) {
+ return !isCompressed(Section) && isDebugSection(Section) &&
+ Section.Name != ".gdb_index";
+}
+
+static void replaceDebugSections(
+ const CopyConfig &Config, Object &Obj, SectionPred &RemovePred,
+ function_ref<bool(const SectionBase &)> shouldReplace,
+ function_ref<SectionBase *(const SectionBase *)> addSection) {
+ SmallVector<SectionBase *, 13> ToReplace;
+ SmallVector<RelocationSection *, 13> RelocationSections;
+ for (auto &Sec : Obj.sections()) {
+ if (RelocationSection *R = dyn_cast<RelocationSection>(&Sec)) {
+ if (shouldReplace(*R->getSection()))
+ RelocationSections.push_back(R);
+ continue;
+ }
+
+ if (shouldReplace(Sec))
+ ToReplace.push_back(&Sec);
+ }
+
+ for (SectionBase *S : ToReplace) {
+ SectionBase *NewSection = addSection(S);
+
+ for (RelocationSection *RS : RelocationSections) {
+ if (RS->getSection() == S)
+ RS->setSection(NewSection);
+ }
+ }
+
+ RemovePred = [shouldReplace, RemovePred](const SectionBase &Sec) {
+ return shouldReplace(Sec) || RemovePred(Sec);
+ };
+}
+
+// This function handles the high level operations of GNU objcopy including
+// handling command line options. It's important to outline certain properties
+// we expect to hold of the command line operations. Any operation that "keeps"
+// should keep regardless of a remove. Additionally any removal should respect
+// any previous removals. Lastly whether or not something is removed shouldn't
+// depend a) on the order the options occur in or b) on some opaque priority
+// system. The only priority is that keeps/copies overrule removes.
+static void handleArgs(const CopyConfig &Config, Object &Obj,
+ const Reader &Reader, ElfType OutputElfType) {
+
+ if (!Config.SplitDWO.empty()) {
+ splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType);
+ }
+ if (Config.OutputArch)
+ Obj.Machine = Config.OutputArch.getValue().EMachine;
+
+ // TODO: update or remove symbols only if there is an option that affects
+ // them.
+ if (Obj.SymbolTable) {
+ Obj.SymbolTable->updateSymbols([&](Symbol &Sym) {
+ if (!Sym.isCommon() &&
+ ((Config.LocalizeHidden &&
+ (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
+ is_contained(Config.SymbolsToLocalize, Sym.Name)))
+ Sym.Binding = STB_LOCAL;
+
+ // Note: these two globalize flags have very similar names but different
+ // meanings:
+ //
+ // --globalize-symbol: promote a symbol to global
+ // --keep-global-symbol: all symbols except for these should be made local
+ //
+ // If --globalize-symbol is specified for a given symbol, it will be
+ // global in the output file even if it is not included via
+ // --keep-global-symbol. Because of that, make sure to check
+ // --globalize-symbol second.
+ if (!Config.SymbolsToKeepGlobal.empty() &&
+ !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) &&
+ Sym.getShndx() != SHN_UNDEF)
+ Sym.Binding = STB_LOCAL;
+
+ if (is_contained(Config.SymbolsToGlobalize, Sym.Name) &&
+ Sym.getShndx() != SHN_UNDEF)
+ Sym.Binding = STB_GLOBAL;
+
+ if (is_contained(Config.SymbolsToWeaken, Sym.Name) &&
+ Sym.Binding == STB_GLOBAL)
+ Sym.Binding = STB_WEAK;
+
+ if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
+ Sym.getShndx() != SHN_UNDEF)
+ Sym.Binding = STB_WEAK;
+
+ const auto I = Config.SymbolsToRename.find(Sym.Name);
+ if (I != Config.SymbolsToRename.end())
+ Sym.Name = I->getValue();
+
+ if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION)
+ Sym.Name = (Config.SymbolsPrefix + Sym.Name).str();
+ });
+
+ // The purpose of this loop is to mark symbols referenced by sections
+ // (like GroupSection or RelocationSection). This way, we know which
+ // symbols are still 'needed' and which are not.
+ if (Config.StripUnneeded) {
+ for (auto &Section : Obj.sections())
+ Section.markSymbols();
+ }
+
+ Obj.removeSymbols([&](const Symbol &Sym) {
+ if (is_contained(Config.SymbolsToKeep, Sym.Name) ||
+ (Config.KeepFileSymbols && Sym.Type == STT_FILE))
+ return false;
+
+ if (Config.DiscardAll && Sym.Binding == STB_LOCAL &&
+ Sym.getShndx() != SHN_UNDEF && Sym.Type != STT_FILE &&
+ Sym.Type != STT_SECTION)
+ return true;
+
+ if (Config.StripAll || Config.StripAllGNU)
+ return true;
+
+ if (is_contained(Config.SymbolsToRemove, Sym.Name))
+ return true;
+
+ if (Config.StripUnneeded && !Sym.Referenced &&
+ (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) &&
+ Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
+ return true;
+
+ return false;
+ });
+ }
+
+ SectionPred RemovePred = [](const SectionBase &) { return false; };
+
+ // Removes:
+ if (!Config.ToRemove.empty()) {
+ RemovePred = [&Config](const SectionBase &Sec) {
+ return is_contained(Config.ToRemove, Sec.Name);
+ };
+ }
+
+ if (Config.StripDWO || !Config.SplitDWO.empty())
+ RemovePred = [RemovePred](const SectionBase &Sec) {
+ return isDWOSection(Sec) || RemovePred(Sec);
+ };
+
+ if (Config.ExtractDWO)
+ RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+ return onlyKeepDWOPred(Obj, Sec) || RemovePred(Sec);
+ };
+
+ if (Config.StripAllGNU)
+ RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+ if (RemovePred(Sec))
+ return true;
+ if ((Sec.Flags & SHF_ALLOC) != 0)
+ return false;
+ if (&Sec == Obj.SectionNames)
+ return false;
+ switch (Sec.Type) {
+ case SHT_SYMTAB:
+ case SHT_REL:
+ case SHT_RELA:
+ case SHT_STRTAB:
+ return true;
+ }
+ return isDebugSection(Sec);
+ };
+
+ if (Config.StripSections) {
+ RemovePred = [RemovePred](const SectionBase &Sec) {
+ return RemovePred(Sec) || (Sec.Flags & SHF_ALLOC) == 0;
+ };
+ }
+
+ if (Config.StripDebug) {
+ RemovePred = [RemovePred](const SectionBase &Sec) {
+ return RemovePred(Sec) || isDebugSection(Sec);
+ };
+ }
+
+ if (Config.StripNonAlloc)
+ RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+ if (RemovePred(Sec))
+ return true;
+ if (&Sec == Obj.SectionNames)
+ return false;
+ return (Sec.Flags & SHF_ALLOC) == 0;
+ };
+
+ if (Config.StripAll)
+ RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+ if (RemovePred(Sec))
+ return true;
+ if (&Sec == Obj.SectionNames)
+ return false;
+ if (StringRef(Sec.Name).startswith(".gnu.warning"))
+ return false;
+ return (Sec.Flags & SHF_ALLOC) == 0;
+ };
+
+ // Explicit copies:
+ if (!Config.OnlySection.empty()) {
+ RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
+ // Explicitly keep these sections regardless of previous removes.
+ if (is_contained(Config.OnlySection, Sec.Name))
+ return false;
+
+ // Allow all implicit removes.
+ if (RemovePred(Sec))
+ return true;
+
+ // Keep special sections.
+ if (Obj.SectionNames == &Sec)
+ return false;
+ if (Obj.SymbolTable == &Sec ||
+ (Obj.SymbolTable && Obj.SymbolTable->getStrTab() == &Sec))
+ return false;
+
+ // Remove everything else.
+ return true;
+ };
+ }
+
+ if (!Config.KeepSection.empty()) {
+ RemovePred = [&Config, RemovePred](const SectionBase &Sec) {
+ // Explicitly keep these sections regardless of previous removes.
+ if (is_contained(Config.KeepSection, Sec.Name))
+ return false;
+ // Otherwise defer to RemovePred.
+ return RemovePred(Sec);
+ };
+ }
+
+ // This has to be the last predicate assignment.
+ // If the option --keep-symbol has been specified
+ // and at least one of those symbols is present
+ // (equivalently, the updated symbol table is not empty)
+ // the symbol table and the string table should not be removed.
+ if ((!Config.SymbolsToKeep.empty() || Config.KeepFileSymbols) &&
+ Obj.SymbolTable && !Obj.SymbolTable->empty()) {
+ RemovePred = [&Obj, RemovePred](const SectionBase &Sec) {
+ if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab())
+ return false;
+ return RemovePred(Sec);
+ };
+ }
+
+ if (Config.CompressionType != DebugCompressionType::None)
+ replaceDebugSections(Config, Obj, RemovePred, isCompressable,
+ [&Config, &Obj](const SectionBase *S) {
+ return &Obj.addSection<CompressedSection>(
+ *S, Config.CompressionType);
+ });
+ else if (Config.DecompressDebugSections)
+ replaceDebugSections(
+ Config, Obj, RemovePred,
+ [](const SectionBase &S) { return isa<CompressedSection>(&S); },
+ [&Obj](const SectionBase *S) {
+ auto CS = cast<CompressedSection>(S);
+ return &Obj.addSection<DecompressedSection>(*CS);
+ });
+
+ Obj.removeSections(RemovePred);
+
+ if (!Config.SectionsToRename.empty()) {
+ for (auto &Sec : Obj.sections()) {
+ const auto Iter = Config.SectionsToRename.find(Sec.Name);
+ if (Iter != Config.SectionsToRename.end()) {
+ const SectionRename &SR = Iter->second;
+ Sec.Name = SR.NewName;
+ if (SR.NewFlags.hasValue()) {
+ // Preserve some flags which should not be dropped when setting flags.
+ // Also, preserve anything OS/processor dependant.
+ const uint64_t PreserveMask = ELF::SHF_COMPRESSED | ELF::SHF_EXCLUDE |
+ ELF::SHF_GROUP | ELF::SHF_LINK_ORDER |
+ ELF::SHF_MASKOS | ELF::SHF_MASKPROC |
+ ELF::SHF_TLS | ELF::SHF_INFO_LINK;
+ Sec.Flags = (Sec.Flags & PreserveMask) |
+ (SR.NewFlags.getValue() & ~PreserveMask);
+ }
+ }
+ }
+ }
+
+ if (!Config.AddSection.empty()) {
+ for (const auto &Flag : Config.AddSection) {
+ std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+ StringRef SecName = SecPair.first;
+ StringRef File = SecPair.second;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+ MemoryBuffer::getFile(File);
+ if (!BufOrErr)
+ reportError(File, BufOrErr.getError());
+ std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+ ArrayRef<uint8_t> Data(
+ reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
+ Buf->getBufferSize());
+ OwnedDataSection &NewSection =
+ Obj.addSection<OwnedDataSection>(SecName, Data);
+ if (SecName.startswith(".note") && SecName != ".note.GNU-stack")
+ NewSection.Type = SHT_NOTE;
+ }
+ }
+
+ if (!Config.DumpSection.empty()) {
+ for (const auto &Flag : Config.DumpSection) {
+ std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+ StringRef SecName = SecPair.first;
+ StringRef File = SecPair.second;
+ if (Error E = dumpSectionToFile(SecName, File, Obj))
+ reportError(Config.InputFilename, std::move(E));
+ }
+ }
+
+ if (!Config.AddGnuDebugLink.empty())
+ Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink);
+}
+
+void executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
+ Buffer &Out) {
+ BinaryReader Reader(Config.BinaryArch, &In);
+ std::unique_ptr<Object> Obj = Reader.create();
+
+ // Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
+ // (-B<arch>).
+ const ElfType OutputElfType = getOutputElfType(
+ Config.OutputArch ? Config.OutputArch.getValue() : Config.BinaryArch);
+ handleArgs(Config, *Obj, Reader, OutputElfType);
+ std::unique_ptr<Writer> Writer =
+ createWriter(Config, *Obj, Out, OutputElfType);
+ Writer->finalize();
+ Writer->write();
+}
+
+void executeObjcopyOnBinary(const CopyConfig &Config,
+ object::ELFObjectFileBase &In, Buffer &Out) {
+ ELFReader Reader(&In);
+ std::unique_ptr<Object> Obj = Reader.create();
+ // Prefer OutputArch (-O<format>) if set, otherwise infer it from the input.
+ const ElfType OutputElfType =
+ Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue())
+ : getOutputElfType(In);
+ ArrayRef<uint8_t> BuildIdBytes;
+
+ if (!Config.BuildIdLinkDir.empty()) {
+ BuildIdBytes = unwrapOrError(findBuildID(In));
+ if (BuildIdBytes.size() < 2)
+ error("build ID in file '" + Config.InputFilename +
+ "' is smaller than two bytes");
+ }
+
+ if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkInput) {
+ linkToBuildIdDir(Config, Config.InputFilename,
+ Config.BuildIdLinkInput.getValue(), BuildIdBytes);
+ }
+ handleArgs(Config, *Obj, Reader, OutputElfType);
+ std::unique_ptr<Writer> Writer =
+ createWriter(Config, *Obj, Out, OutputElfType);
+ Writer->finalize();
+ Writer->write();
+ if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput) {
+ linkToBuildIdDir(Config, Config.OutputFilename,
+ Config.BuildIdLinkOutput.getValue(), BuildIdBytes);
+ }
+}
+
+} // end namespace elf
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h b/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h
new file mode 100644
index 000000000000..43f41c00ce5b
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h
@@ -0,0 +1,34 @@
+//===- ELFObjcopy.h ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
+#define LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
+
+namespace llvm {
+class MemoryBuffer;
+
+namespace object {
+class ELFObjectFileBase;
+} // end namespace object
+
+namespace objcopy {
+struct CopyConfig;
+class Buffer;
+
+namespace elf {
+void executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
+ Buffer &Out);
+void executeObjcopyOnBinary(const CopyConfig &Config,
+ object::ELFObjectFileBase &In, Buffer &Out);
+
+} // end namespace elf
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
diff --git a/contrib/llvm/tools/llvm-objcopy/Object.cpp b/contrib/llvm/tools/llvm-objcopy/ELF/Object.cpp
index 7e88f5263a39..3d3e029c09eb 100644
--- a/contrib/llvm/tools/llvm-objcopy/Object.cpp
+++ b/contrib/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -15,7 +15,9 @@
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Path.h"
@@ -26,45 +28,14 @@
#include <utility>
#include <vector>
-using namespace llvm;
-using namespace llvm::objcopy;
+namespace llvm {
+namespace objcopy {
+namespace elf {
+
using namespace object;
using namespace ELF;
-Buffer::~Buffer() {}
-
-void FileBuffer::allocate(size_t Size) {
- Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
- FileOutputBuffer::create(getName(), Size, FileOutputBuffer::F_executable);
- handleAllErrors(BufferOrErr.takeError(), [this](const ErrorInfoBase &E) {
- error("failed to open " + getName() + ": " + E.message());
- });
- Buf = std::move(*BufferOrErr);
-}
-
-Error FileBuffer::commit() { return Buf->commit(); }
-
-uint8_t *FileBuffer::getBufferStart() {
- return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
-}
-
-void MemBuffer::allocate(size_t Size) {
- Buf = WritableMemoryBuffer::getNewMemBuffer(Size, getName());
-}
-
-Error MemBuffer::commit() { return Error::success(); }
-
-uint8_t *MemBuffer::getBufferStart() {
- return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
-}
-
-std::unique_ptr<WritableMemoryBuffer> MemBuffer::releaseMemoryBuffer() {
- return std::move(Buf);
-}
-
template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) {
- using Elf_Phdr = typename ELFT::Phdr;
-
uint8_t *B = Buf.getBufferStart();
B += Obj.ProgramHdrSegment.Offset + Seg.Index * sizeof(Elf_Phdr);
Elf_Phdr &Phdr = *reinterpret_cast<Elf_Phdr *>(B);
@@ -87,7 +58,7 @@ void SectionBase::markSymbols() {}
template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) {
uint8_t *B = Buf.getBufferStart();
B += Sec.HeaderOffset;
- typename ELFT::Shdr &Shdr = *reinterpret_cast<typename ELFT::Shdr *>(B);
+ Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B);
Shdr.sh_name = Sec.NameIndex;
Shdr.sh_type = Sec.Type;
Shdr.sh_flags = Sec.Flags;
@@ -100,7 +71,46 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) {
Shdr.sh_entsize = Sec.EntrySize;
}
-SectionVisitor::~SectionVisitor() {}
+template <class ELFT> void ELFSectionSizer<ELFT>::visit(Section &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(OwnedDataSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(StringTableSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(DynamicRelocationSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(SymbolTableSection &Sec) {
+ Sec.EntrySize = sizeof(Elf_Sym);
+ Sec.Size = Sec.Symbols.size() * Sec.EntrySize;
+ // Align to the largest field in Elf_Sym.
+ Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word);
+}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(RelocationSection &Sec) {
+ Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela);
+ Sec.Size = Sec.Relocations.size() * Sec.EntrySize;
+ // Align to the largest field in Elf_Rel(a).
+ Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word);
+}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(GnuDebugLinkSection &Sec) {}
+
+template <class ELFT> void ELFSectionSizer<ELFT>::visit(GroupSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(SectionIndexSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(CompressedSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(DecompressedSection &Sec) {}
void BinarySectionWriter::visit(const SectionIndexSection &Sec) {
error("Cannot write symbol section index table '" + Sec.Name + "' ");
@@ -126,20 +136,169 @@ void SectionWriter::visit(const Section &Sec) {
if (Sec.Type == SHT_NOBITS)
return;
uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
- std::copy(std::begin(Sec.Contents), std::end(Sec.Contents), Buf);
+ llvm::copy(Sec.Contents, Buf);
}
void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
+void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); }
+
void SectionWriter::visit(const OwnedDataSection &Sec) {
uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
- std::copy(std::begin(Sec.Data), std::end(Sec.Data), Buf);
+ llvm::copy(Sec.Data, Buf);
+}
+
+static const std::vector<uint8_t> ZlibGnuMagic = {'Z', 'L', 'I', 'B'};
+
+static bool isDataGnuCompressed(ArrayRef<uint8_t> Data) {
+ return Data.size() > ZlibGnuMagic.size() &&
+ std::equal(ZlibGnuMagic.begin(), ZlibGnuMagic.end(), Data.data());
+}
+
+template <class ELFT>
+static std::tuple<uint64_t, uint64_t>
+getDecompressedSizeAndAlignment(ArrayRef<uint8_t> Data) {
+ const bool IsGnuDebug = isDataGnuCompressed(Data);
+ const uint64_t DecompressedSize =
+ IsGnuDebug
+ ? support::endian::read64be(reinterpret_cast<const uint64_t *>(
+ Data.data() + ZlibGnuMagic.size()))
+ : reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data.data())->ch_size;
+ const uint64_t DecompressedAlign =
+ IsGnuDebug ? 1
+ : reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data.data())
+ ->ch_addralign;
+
+ return std::make_tuple(DecompressedSize, DecompressedAlign);
+}
+
+template <class ELFT>
+void ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
+ uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
+
+ if (!zlib::isAvailable()) {
+ std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
+ return;
+ }
+
+ const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData)
+ ? (ZlibGnuMagic.size() + sizeof(Sec.Size))
+ : sizeof(Elf_Chdr_Impl<ELFT>);
+
+ StringRef CompressedContent(
+ reinterpret_cast<const char *>(Sec.OriginalData.data()) + DataOffset,
+ Sec.OriginalData.size() - DataOffset);
+
+ SmallVector<char, 128> DecompressedContent;
+ if (Error E = zlib::uncompress(CompressedContent, DecompressedContent,
+ static_cast<size_t>(Sec.Size)))
+ reportError(Sec.Name, std::move(E));
+
+ std::copy(DecompressedContent.begin(), DecompressedContent.end(), Buf);
+}
+
+void BinarySectionWriter::visit(const DecompressedSection &Sec) {
+ error("Cannot write compressed section '" + Sec.Name + "' ");
+}
+
+void DecompressedSection::accept(SectionVisitor &Visitor) const {
+ Visitor.visit(*this);
+}
+
+void DecompressedSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
}
void OwnedDataSection::accept(SectionVisitor &Visitor) const {
Visitor.visit(*this);
}
+void OwnedDataSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
+void BinarySectionWriter::visit(const CompressedSection &Sec) {
+ error("Cannot write compressed section '" + Sec.Name + "' ");
+}
+
+template <class ELFT>
+void ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
+ uint8_t *Buf = Out.getBufferStart();
+ Buf += Sec.Offset;
+
+ if (Sec.CompressionType == DebugCompressionType::None) {
+ std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
+ return;
+ }
+
+ if (Sec.CompressionType == DebugCompressionType::GNU) {
+ const char *Magic = "ZLIB";
+ memcpy(Buf, Magic, strlen(Magic));
+ Buf += strlen(Magic);
+ const uint64_t DecompressedSize =
+ support::endian::read64be(&Sec.DecompressedSize);
+ memcpy(Buf, &DecompressedSize, sizeof(DecompressedSize));
+ Buf += sizeof(DecompressedSize);
+ } else {
+ Elf_Chdr_Impl<ELFT> Chdr;
+ Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB;
+ Chdr.ch_size = Sec.DecompressedSize;
+ Chdr.ch_addralign = Sec.DecompressedAlign;
+ memcpy(Buf, &Chdr, sizeof(Chdr));
+ Buf += sizeof(Chdr);
+ }
+
+ std::copy(Sec.CompressedData.begin(), Sec.CompressedData.end(), Buf);
+}
+
+CompressedSection::CompressedSection(const SectionBase &Sec,
+ DebugCompressionType CompressionType)
+ : SectionBase(Sec), CompressionType(CompressionType),
+ DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) {
+
+ if (!zlib::isAvailable()) {
+ CompressionType = DebugCompressionType::None;
+ return;
+ }
+
+ if (Error E = zlib::compress(
+ StringRef(reinterpret_cast<const char *>(OriginalData.data()),
+ OriginalData.size()),
+ CompressedData))
+ reportError(Name, std::move(E));
+
+ size_t ChdrSize;
+ if (CompressionType == DebugCompressionType::GNU) {
+ Name = ".z" + Sec.Name.substr(1);
+ ChdrSize = sizeof("ZLIB") - 1 + sizeof(uint64_t);
+ } else {
+ Flags |= ELF::SHF_COMPRESSED;
+ ChdrSize =
+ std::max(std::max(sizeof(object::Elf_Chdr_Impl<object::ELF64LE>),
+ sizeof(object::Elf_Chdr_Impl<object::ELF64BE>)),
+ std::max(sizeof(object::Elf_Chdr_Impl<object::ELF32LE>),
+ sizeof(object::Elf_Chdr_Impl<object::ELF32BE>)));
+ }
+ Size = ChdrSize + CompressedData.size();
+ Align = 8;
+}
+
+CompressedSection::CompressedSection(ArrayRef<uint8_t> CompressedData,
+ uint64_t DecompressedSize,
+ uint64_t DecompressedAlign)
+ : CompressionType(DebugCompressionType::None),
+ DecompressedSize(DecompressedSize), DecompressedAlign(DecompressedAlign) {
+ OriginalData = CompressedData;
+}
+
+void CompressedSection::accept(SectionVisitor &Visitor) const {
+ Visitor.visit(*this);
+}
+
+void CompressedSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
void StringTableSection::addString(StringRef Name) {
StrTabBuilder.add(Name);
Size = StrTabBuilder.getSize();
@@ -159,11 +318,15 @@ void StringTableSection::accept(SectionVisitor &Visitor) const {
Visitor.visit(*this);
}
+void StringTableSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
template <class ELFT>
void ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) {
uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
- auto *IndexesBuffer = reinterpret_cast<typename ELFT::Word *>(Buf);
- std::copy(std::begin(Sec.Indexes), std::end(Sec.Indexes), IndexesBuffer);
+ auto *IndexesBuffer = reinterpret_cast<Elf_Word *>(Buf);
+ llvm::copy(Sec.Indexes, IndexesBuffer);
}
void SectionIndexSection::initialize(SectionTableRef SecTable) {
@@ -182,6 +345,10 @@ void SectionIndexSection::accept(SectionVisitor &Visitor) const {
Visitor.visit(*this);
}
+void SectionIndexSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) {
switch (Index) {
case SHN_ABS:
@@ -226,18 +393,20 @@ uint16_t Symbol::getShndx() const {
llvm_unreachable("Symbol with invalid ShndxType encountered");
}
+bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; }
+
void SymbolTableSection::assignIndices() {
uint32_t Index = 0;
for (auto &Sym : Symbols)
Sym->Index = Index++;
}
-void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
+void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
SectionBase *DefinedIn, uint64_t Value,
uint8_t Visibility, uint16_t Shndx,
- uint64_t Sz) {
+ uint64_t Size) {
Symbol Sym;
- Sym.Name = Name;
+ Sym.Name = Name.str();
Sym.Binding = Bind;
Sym.Type = Type;
Sym.DefinedIn = DefinedIn;
@@ -251,7 +420,7 @@ void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
}
Sym.Value = Value;
Sym.Visibility = Visibility;
- Sym.Size = Sz;
+ Sym.Size = Size;
Sym.Index = Symbols.size();
Symbols.emplace_back(llvm::make_unique<Symbol>(Sym));
Size += this->EntrySize;
@@ -344,7 +513,7 @@ template <class ELFT>
void ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) {
uint8_t *Buf = Out.getBufferStart();
Buf += Sec.Offset;
- typename ELFT::Sym *Sym = reinterpret_cast<typename ELFT::Sym *>(Buf);
+ Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Buf);
// Loop though symbols setting each entry of the symbol table.
for (auto &Symbol : Sec.Symbols) {
Sym->st_name = Symbol->NameIndex;
@@ -362,6 +531,10 @@ void SymbolTableSection::accept(SectionVisitor &Visitor) const {
Visitor.visit(*this);
}
+void SymbolTableSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
template <class SymTabType>
void RelocSectionWithSymtabBase<SymTabType>::removeSectionReferences(
const SectionBase *Sec) {
@@ -377,11 +550,13 @@ void RelocSectionWithSymtabBase<SymTabType>::removeSectionReferences(
template <class SymTabType>
void RelocSectionWithSymtabBase<SymTabType>::initialize(
SectionTableRef SecTable) {
- setSymTab(SecTable.getSectionOfType<SymTabType>(
- Link,
- "Link field value " + Twine(Link) + " in section " + Name + " is invalid",
- "Link field value " + Twine(Link) + " in section " + Name +
- " is not a symbol table"));
+ if (Link != SHN_UNDEF)
+ setSymTab(SecTable.getSectionOfType<SymTabType>(
+ Link,
+ "Link field value " + Twine(Link) + " in section " + Name +
+ " is invalid",
+ "Link field value " + Twine(Link) + " in section " + Name +
+ " is not a symbol table"));
if (Info != SHN_UNDEF)
setSection(SecTable.getSection(Info, "Info field value " + Twine(Info) +
@@ -393,7 +568,8 @@ void RelocSectionWithSymtabBase<SymTabType>::initialize(
template <class SymTabType>
void RelocSectionWithSymtabBase<SymTabType>::finalize() {
- this->Link = Symbols->Index;
+ this->Link = Symbols ? Symbols->Index : 0;
+
if (SecToApplyRel != nullptr)
this->Info = SecToApplyRel->Index;
}
@@ -429,11 +605,15 @@ void RelocationSection::accept(SectionVisitor &Visitor) const {
Visitor.visit(*this);
}
+void RelocationSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
void RelocationSection::removeSymbols(
function_ref<bool(const Symbol &)> ToRemove) {
for (const Relocation &Reloc : Relocations)
if (ToRemove(*Reloc.RelocSymbol))
- error("not stripping symbol `" + Reloc.RelocSymbol->Name +
+ error("not stripping symbol '" + Reloc.RelocSymbol->Name +
"' because it is named in a relocation");
}
@@ -443,7 +623,7 @@ void RelocationSection::markSymbols() {
}
void SectionWriter::visit(const DynamicRelocationSection &Sec) {
- std::copy(std::begin(Sec.Contents), std::end(Sec.Contents),
+ llvm::copy(Sec.Contents,
Out.getBufferStart() + Sec.Offset);
}
@@ -451,6 +631,10 @@ void DynamicRelocationSection::accept(SectionVisitor &Visitor) const {
Visitor.visit(*this);
}
+void DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
void Section::removeSectionReferences(const SectionBase *Sec) {
if (LinkSection == Sec) {
error("Section " + LinkSection->Name +
@@ -506,12 +690,12 @@ void GnuDebugLinkSection::init(StringRef File, StringRef Data) {
// establish the order that sections should go in. By using the maximum
// possible offset we cause this section to wind up at the end.
OriginalOffset = std::numeric_limits<uint64_t>::max();
- JamCRC crc;
- crc.update(ArrayRef<char>(Data.data(), Data.size()));
+ JamCRC CRC;
+ CRC.update(ArrayRef<char>(Data.data(), Data.size()));
// The CRC32 value needs to be complemented because the JamCRC dosn't
// finalize the CRC32 value. It also dosn't negate the initial CRC32 value
// but it starts by default at 0xFFFFFFFF which is the complement of zero.
- CRC32 = ~crc.getCRC();
+ CRC32 = ~CRC.getCRC();
}
GnuDebugLinkSection::GnuDebugLinkSection(StringRef File) : FileName(File) {
@@ -530,13 +714,17 @@ void ELFSectionWriter<ELFT>::visit(const GnuDebugLinkSection &Sec) {
Elf_Word *CRC =
reinterpret_cast<Elf_Word *>(Buf + Sec.Size - sizeof(Elf_Word));
*CRC = Sec.CRC32;
- std::copy(std::begin(Sec.FileName), std::end(Sec.FileName), File);
+ llvm::copy(Sec.FileName, File);
}
void GnuDebugLinkSection::accept(SectionVisitor &Visitor) const {
Visitor.visit(*this);
}
+void GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
template <class ELFT>
void ELFSectionWriter<ELFT>::visit(const GroupSection &Sec) {
ELF::Elf32_Word *Buf =
@@ -550,6 +738,10 @@ void GroupSection::accept(SectionVisitor &Visitor) const {
Visitor.visit(*this);
}
+void GroupSection::accept(MutableSectionVisitor &Visitor) {
+ Visitor.visit(*this);
+}
+
// Returns true IFF a section is wholly inside the range of a segment
static bool sectionWithinSegment(const SectionBase &Section,
const Segment &Segment) {
@@ -589,6 +781,79 @@ static bool compareSegmentsByPAddr(const Segment *A, const Segment *B) {
return A->Index < B->Index;
}
+void BinaryELFBuilder::initFileHeader() {
+ Obj->Flags = 0x0;
+ Obj->Type = ET_REL;
+ Obj->OSABI = ELFOSABI_NONE;
+ Obj->ABIVersion = 0;
+ Obj->Entry = 0x0;
+ Obj->Machine = EMachine;
+ Obj->Version = 1;
+}
+
+void BinaryELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; }
+
+StringTableSection *BinaryELFBuilder::addStrTab() {
+ auto &StrTab = Obj->addSection<StringTableSection>();
+ StrTab.Name = ".strtab";
+
+ Obj->SectionNames = &StrTab;
+ return &StrTab;
+}
+
+SymbolTableSection *BinaryELFBuilder::addSymTab(StringTableSection *StrTab) {
+ auto &SymTab = Obj->addSection<SymbolTableSection>();
+
+ SymTab.Name = ".symtab";
+ SymTab.Link = StrTab->Index;
+
+ // The symbol table always needs a null symbol
+ SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0);
+
+ Obj->SymbolTable = &SymTab;
+ return &SymTab;
+}
+
+void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
+ auto Data = ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(MemBuf->getBufferStart()),
+ MemBuf->getBufferSize());
+ auto &DataSection = Obj->addSection<Section>(Data);
+ DataSection.Name = ".data";
+ DataSection.Type = ELF::SHT_PROGBITS;
+ DataSection.Size = Data.size();
+ DataSection.Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
+
+ std::string SanitizedFilename = MemBuf->getBufferIdentifier().str();
+ std::replace_if(std::begin(SanitizedFilename), std::end(SanitizedFilename),
+ [](char C) { return !isalnum(C); }, '_');
+ Twine Prefix = Twine("_binary_") + SanitizedFilename;
+
+ SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection,
+ /*Value=*/0, STV_DEFAULT, 0, 0);
+ SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection,
+ /*Value=*/DataSection.Size, STV_DEFAULT, 0, 0);
+ SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr,
+ /*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0);
+}
+
+void BinaryELFBuilder::initSections() {
+ for (auto &Section : Obj->sections()) {
+ Section.initialize(Obj->sections());
+ }
+}
+
+std::unique_ptr<Object> BinaryELFBuilder::build() {
+ initFileHeader();
+ initHeaderSegment();
+ StringTableSection *StrTab = addStrTab();
+ SymbolTableSection *SymTab = addSymTab(StrTab);
+ initSections();
+ addData(SymTab);
+
+ return std::move(Obj);
+}
+
template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
for (auto &Parent : Obj.segments()) {
// Every segment will overlap with itself but we don't want a segment to
@@ -633,15 +898,6 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
}
auto &ElfHdr = Obj.ElfHdrSegment;
- // Creating multiple PT_PHDR segments technically is not valid, but PT_LOAD
- // segments must not overlap, and other types fit even less.
- ElfHdr.Type = PT_PHDR;
- ElfHdr.Flags = 0;
- ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
- ElfHdr.VAddr = 0;
- ElfHdr.PAddr = 0;
- ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
- ElfHdr.Align = 0;
ElfHdr.Index = Index++;
const auto &Ehdr = *ElfFile.getHeader();
@@ -725,8 +981,7 @@ void ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) {
Elf_Word Index = ShndxData[&Sym - Symbols.begin()];
DefSection = Obj.sections().getSection(
Index,
- "Symbol '" + Name + "' has invalid section index " +
- Twine(Index));
+ "Symbol '" + Name + "' has invalid section index " + Twine(Index));
} else if (Sym.st_shndx >= SHN_LORESERVE) {
if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) {
error(
@@ -828,10 +1083,20 @@ SectionBase &ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
}
case SHT_NOBITS:
return Obj.addSection<Section>(Data);
- default:
+ default: {
Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
+
+ if (isDataGnuCompressed(Data) || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
+ uint64_t DecompressedSize, DecompressedAlign;
+ std::tie(DecompressedSize, DecompressedAlign) =
+ getDecompressedSizeAndAlignment<ELFT>(Data);
+ return Obj.addSection<CompressedSection>(Data, DecompressedSize,
+ DecompressedAlign);
+ }
+
return Obj.addSection<Section>(Data);
}
+ }
}
template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
@@ -854,6 +1119,9 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
Sec.Align = Shdr.sh_addralign;
Sec.EntrySize = Shdr.sh_entsize;
Sec.Index = Index++;
+ Sec.OriginalData =
+ ArrayRef<uint8_t>(ElfFile.base() + Shdr.sh_offset,
+ (Shdr.sh_type == SHT_NOBITS) ? 0 : Shdr.sh_size);
}
// If a section index table exists we'll need to initialize it before we
@@ -894,7 +1162,8 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
template <class ELFT> void ELFBuilder<ELFT>::build() {
const auto &Ehdr = *ElfFile.getHeader();
- std::copy(Ehdr.e_ident, Ehdr.e_ident + 16, Obj.Ident);
+ Obj.OSABI = Ehdr.e_ident[EI_OSABI];
+ Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION];
Obj.Type = Ehdr.e_type;
Obj.Machine = Ehdr.e_machine;
Obj.Version = Ehdr.e_version;
@@ -926,34 +1195,26 @@ Writer::~Writer() {}
Reader::~Reader() {}
-ElfType ELFReader::getElfType() const {
- if (isa<ELFObjectFile<ELF32LE>>(Bin))
- return ELFT_ELF32LE;
- if (isa<ELFObjectFile<ELF64LE>>(Bin))
- return ELFT_ELF64LE;
- if (isa<ELFObjectFile<ELF32BE>>(Bin))
- return ELFT_ELF32BE;
- if (isa<ELFObjectFile<ELF64BE>>(Bin))
- return ELFT_ELF64BE;
- llvm_unreachable("Invalid ELFType");
+std::unique_ptr<Object> BinaryReader::create() const {
+ return BinaryELFBuilder(MInfo.EMachine, MemBuf).build();
}
std::unique_ptr<Object> ELFReader::create() const {
auto Obj = llvm::make_unique<Object>();
- if (auto *o = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
- ELFBuilder<ELF32LE> Builder(*o, *Obj);
+ if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
+ ELFBuilder<ELF32LE> Builder(*O, *Obj);
Builder.build();
return Obj;
- } else if (auto *o = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
- ELFBuilder<ELF64LE> Builder(*o, *Obj);
+ } else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
+ ELFBuilder<ELF64LE> Builder(*O, *Obj);
Builder.build();
return Obj;
- } else if (auto *o = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
- ELFBuilder<ELF32BE> Builder(*o, *Obj);
+ } else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
+ ELFBuilder<ELF32BE> Builder(*O, *Obj);
Builder.build();
return Obj;
- } else if (auto *o = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
- ELFBuilder<ELF64BE> Builder(*o, *Obj);
+ } else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
+ ELFBuilder<ELF64BE> Builder(*O, *Obj);
Builder.build();
return Obj;
}
@@ -963,18 +1224,31 @@ std::unique_ptr<Object> ELFReader::create() const {
template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
uint8_t *B = Buf.getBufferStart();
Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(B);
- std::copy(Obj.Ident, Obj.Ident + 16, Ehdr.e_ident);
+ std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0);
+ Ehdr.e_ident[EI_MAG0] = 0x7f;
+ Ehdr.e_ident[EI_MAG1] = 'E';
+ Ehdr.e_ident[EI_MAG2] = 'L';
+ Ehdr.e_ident[EI_MAG3] = 'F';
+ Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32;
+ Ehdr.e_ident[EI_DATA] =
+ ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB;
+ Ehdr.e_ident[EI_VERSION] = EV_CURRENT;
+ Ehdr.e_ident[EI_OSABI] = Obj.OSABI;
+ Ehdr.e_ident[EI_ABIVERSION] = Obj.ABIVersion;
+
Ehdr.e_type = Obj.Type;
Ehdr.e_machine = Obj.Machine;
Ehdr.e_version = Obj.Version;
Ehdr.e_entry = Obj.Entry;
- Ehdr.e_phoff = Obj.ProgramHdrSegment.Offset;
+ // We have to use the fully-qualified name llvm::size
+ // since some compilers complain on ambiguous resolution.
+ Ehdr.e_phnum = llvm::size(Obj.segments());
+ Ehdr.e_phoff = (Ehdr.e_phnum != 0) ? Obj.ProgramHdrSegment.Offset : 0;
+ Ehdr.e_phentsize = (Ehdr.e_phnum != 0) ? sizeof(Elf_Phdr) : 0;
Ehdr.e_flags = Obj.Flags;
Ehdr.e_ehsize = sizeof(Elf_Ehdr);
- Ehdr.e_phentsize = sizeof(Elf_Phdr);
- Ehdr.e_phnum = size(Obj.segments());
- Ehdr.e_shentsize = sizeof(Elf_Shdr);
- if (WriteSectionHeaders) {
+ if (WriteSectionHeaders && size(Obj.sections()) != 0) {
+ Ehdr.e_shentsize = sizeof(Elf_Shdr);
Ehdr.e_shoff = Obj.SHOffset;
// """
// If the number of sections is greater than or equal to
@@ -998,6 +1272,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
else
Ehdr.e_shstrndx = Obj.SectionNames->Index;
} else {
+ Ehdr.e_shentsize = 0;
Ehdr.e_shoff = 0;
Ehdr.e_shnum = 0;
Ehdr.e_shstrndx = 0;
@@ -1106,7 +1381,7 @@ static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) {
}
// Orders segments such that if x = y->ParentSegment then y comes before x.
-static void OrderSegments(std::vector<Segment *> &Segments) {
+static void orderSegments(std::vector<Segment *> &Segments) {
std::stable_sort(std::begin(Segments), std::end(Segments),
compareSegmentsByOffset);
}
@@ -1148,7 +1423,7 @@ static uint64_t LayoutSegments(std::vector<Segment *> &Segments,
// sections had a ParentSegment or an offset one past the last section if there
// was a section that didn't have a ParentSegment.
template <class Range>
-static uint64_t LayoutSections(Range Sections, uint64_t Offset) {
+static uint64_t layoutSections(Range Sections, uint64_t Offset) {
// Now the offset of every segment has been set we can assign the offsets
// of each section. For sections that are covered by a segment we should use
// the segment's original offset and the section's original offset to compute
@@ -1172,6 +1447,17 @@ static uint64_t LayoutSections(Range Sections, uint64_t Offset) {
return Offset;
}
+template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() {
+ auto &ElfHdr = Obj.ElfHdrSegment;
+ ElfHdr.Type = PT_PHDR;
+ ElfHdr.Flags = 0;
+ ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
+ ElfHdr.VAddr = 0;
+ ElfHdr.PAddr = 0;
+ ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
+ ElfHdr.Align = 0;
+}
+
template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
// We need a temporary list of segments that has a special order to it
// so that we know that anytime ->ParentSegment is set that segment has
@@ -1181,17 +1467,17 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
OrderedSegments.push_back(&Segment);
OrderedSegments.push_back(&Obj.ElfHdrSegment);
OrderedSegments.push_back(&Obj.ProgramHdrSegment);
- OrderSegments(OrderedSegments);
+ orderSegments(OrderedSegments);
// Offset is used as the start offset of the first segment to be laid out.
// Since the ELF Header (ElfHdrSegment) must be at the start of the file,
// we start at offset 0.
uint64_t Offset = 0;
Offset = LayoutSegments(OrderedSegments, Offset);
- Offset = LayoutSections(Obj.sections(), Offset);
+ Offset = layoutSections(Obj.sections(), Offset);
// If we need to write the section header table out then we need to align the
// Offset so that SHOffset is valid.
if (WriteSectionHeaders)
- Offset = alignTo(Offset, sizeof(typename ELFT::Addr));
+ Offset = alignTo(Offset, sizeof(Elf_Addr));
Obj.SHOffset = Offset;
}
@@ -1263,10 +1549,17 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
Obj.SectionNames->addString(Section.Name);
}
+ initEhdrSegment();
+
// Before we can prepare for layout the indexes need to be finalized.
+ // Also, the output arch may not be the same as the input arch, so fix up
+ // size-related fields before doing layout calculations.
uint64_t Index = 0;
- for (auto &Sec : Obj.sections())
+ auto SecSizer = llvm::make_unique<ELFSectionSizer<ELFT>>();
+ for (auto &Sec : Obj.sections()) {
Sec.Index = Index++;
+ Sec.accept(*SecSizer);
+ }
// The symbol table does not update all other sections on update. For
// instance, symbol names are not added as new symbols are added. This means
@@ -1324,10 +1617,10 @@ void BinaryWriter::finalize() {
// loading and physical addresses are intended for ROM loading.
// However, if no segment has a physical address, we'll fallback to using
// virtual addresses for all.
- if (std::all_of(std::begin(OrderedSegments), std::end(OrderedSegments),
- [](const Segment *Segment) { return Segment->PAddr == 0; }))
- for (const auto &Segment : OrderedSegments)
- Segment->PAddr = Segment->VAddr;
+ if (all_of(OrderedSegments,
+ [](const Segment *Seg) { return Seg->PAddr == 0; }))
+ for (Segment *Seg : OrderedSegments)
+ Seg->PAddr = Seg->VAddr;
std::stable_sort(std::begin(OrderedSegments), std::end(OrderedSegments),
compareSegmentsByPAddr);
@@ -1342,8 +1635,8 @@ void BinaryWriter::finalize() {
uint64_t Offset = 0;
// Modify the first segment so that there is no gap at the start. This allows
- // our layout algorithm to proceed as expected while not out writing out the
- // gap at the start.
+ // our layout algorithm to proceed as expected while not writing out the gap
+ // at the start.
if (!OrderedSegments.empty()) {
auto Seg = OrderedSegments[0];
auto Sec = Seg->firstSection();
@@ -1371,7 +1664,7 @@ void BinaryWriter::finalize() {
continue;
AllocatedSections.push_back(&Section);
}
- LayoutSections(make_pointee_range(AllocatedSections), Offset);
+ layoutSections(make_pointee_range(AllocatedSections), Offset);
// Now that every section has been laid out we just need to compute the total
// file size. This might not be the same as the offset returned by
@@ -1387,9 +1680,6 @@ void BinaryWriter::finalize() {
SecWriter = llvm::make_unique<BinarySectionWriter>(Buf);
}
-namespace llvm {
-namespace objcopy {
-
template class ELFBuilder<ELF64LE>;
template class ELFBuilder<ELF64BE>;
template class ELFBuilder<ELF32LE>;
@@ -1399,5 +1689,7 @@ template class ELFWriter<ELF64LE>;
template class ELFWriter<ELF64BE>;
template class ELFWriter<ELF32LE>;
template class ELFWriter<ELF32BE>;
+
+} // end namespace elf
} // end namespace objcopy
} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/Object.h b/contrib/llvm/tools/llvm-objcopy/ELF/Object.h
index 76748d5fc641..e5730cd543ee 100644
--- a/contrib/llvm/tools/llvm-objcopy/Object.h
+++ b/contrib/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -10,6 +10,8 @@
#ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H
#define LLVM_TOOLS_OBJCOPY_OBJECT_H
+#include "Buffer.h"
+#include "CopyConfig.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -26,9 +28,10 @@
#include <vector>
namespace llvm {
+enum class DebugCompressionType;
namespace objcopy {
+namespace elf {
-class Buffer;
class SectionBase;
class Section;
class OwnedDataSection;
@@ -39,6 +42,8 @@ class DynamicRelocationSection;
class GnuDebugLinkSection;
class GroupSection;
class SectionIndexSection;
+class CompressedSection;
+class DecompressedSection;
class Segment;
class Object;
struct Symbol;
@@ -66,7 +71,7 @@ enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
class SectionVisitor {
public:
- virtual ~SectionVisitor();
+ virtual ~SectionVisitor() = default;
virtual void visit(const Section &Sec) = 0;
virtual void visit(const OwnedDataSection &Sec) = 0;
@@ -77,6 +82,25 @@ public:
virtual void visit(const GnuDebugLinkSection &Sec) = 0;
virtual void visit(const GroupSection &Sec) = 0;
virtual void visit(const SectionIndexSection &Sec) = 0;
+ virtual void visit(const CompressedSection &Sec) = 0;
+ virtual void visit(const DecompressedSection &Sec) = 0;
+};
+
+class MutableSectionVisitor {
+public:
+ virtual ~MutableSectionVisitor() = default;
+
+ virtual void visit(Section &Sec) = 0;
+ virtual void visit(OwnedDataSection &Sec) = 0;
+ virtual void visit(StringTableSection &Sec) = 0;
+ virtual void visit(SymbolTableSection &Sec) = 0;
+ virtual void visit(RelocationSection &Sec) = 0;
+ virtual void visit(DynamicRelocationSection &Sec) = 0;
+ virtual void visit(GnuDebugLinkSection &Sec) = 0;
+ virtual void visit(GroupSection &Sec) = 0;
+ virtual void visit(SectionIndexSection &Sec) = 0;
+ virtual void visit(CompressedSection &Sec) = 0;
+ virtual void visit(DecompressedSection &Sec) = 0;
};
class SectionWriter : public SectionVisitor {
@@ -95,6 +119,8 @@ public:
virtual void visit(const GnuDebugLinkSection &Sec) override = 0;
virtual void visit(const GroupSection &Sec) override = 0;
virtual void visit(const SectionIndexSection &Sec) override = 0;
+ virtual void visit(const CompressedSection &Sec) override = 0;
+ virtual void visit(const DecompressedSection &Sec) override = 0;
explicit SectionWriter(Buffer &Buf) : Out(Buf) {}
};
@@ -104,6 +130,7 @@ private:
using Elf_Word = typename ELFT::Word;
using Elf_Rel = typename ELFT::Rel;
using Elf_Rela = typename ELFT::Rela;
+ using Elf_Sym = typename ELFT::Sym;
public:
virtual ~ELFSectionWriter() {}
@@ -112,13 +139,38 @@ public:
void visit(const GnuDebugLinkSection &Sec) override;
void visit(const GroupSection &Sec) override;
void visit(const SectionIndexSection &Sec) override;
+ void visit(const CompressedSection &Sec) override;
+ void visit(const DecompressedSection &Sec) override;
explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
};
+template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor {
+private:
+ using Elf_Rel = typename ELFT::Rel;
+ using Elf_Rela = typename ELFT::Rela;
+ using Elf_Sym = typename ELFT::Sym;
+ using Elf_Word = typename ELFT::Word;
+ using Elf_Xword = typename ELFT::Xword;
+
+public:
+ void visit(Section &Sec) override;
+ void visit(OwnedDataSection &Sec) override;
+ void visit(StringTableSection &Sec) override;
+ void visit(DynamicRelocationSection &Sec) override;
+ void visit(SymbolTableSection &Sec) override;
+ void visit(RelocationSection &Sec) override;
+ void visit(GnuDebugLinkSection &Sec) override;
+ void visit(GroupSection &Sec) override;
+ void visit(SectionIndexSection &Sec) override;
+ void visit(CompressedSection &Sec) override;
+ void visit(DecompressedSection &Sec) override;
+};
+
#define MAKE_SEC_WRITER_FRIEND \
friend class SectionWriter; \
- template <class ELFT> friend class ELFSectionWriter;
+ template <class ELFT> friend class ELFSectionWriter; \
+ template <class ELFT> friend class ELFSectionSizer;
class BinarySectionWriter : public SectionWriter {
public:
@@ -129,52 +181,12 @@ public:
void visit(const GnuDebugLinkSection &Sec) override;
void visit(const GroupSection &Sec) override;
void visit(const SectionIndexSection &Sec) override;
+ void visit(const CompressedSection &Sec) override;
+ void visit(const DecompressedSection &Sec) override;
explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
};
-// The class Buffer abstracts out the common interface of FileOutputBuffer and
-// WritableMemoryBuffer so that the hierarchy of Writers depends on this
-// abstract interface and doesn't depend on a particular implementation.
-// TODO: refactor the buffer classes in LLVM to enable us to use them here
-// directly.
-class Buffer {
- StringRef Name;
-
-public:
- virtual ~Buffer();
- virtual void allocate(size_t Size) = 0;
- virtual uint8_t *getBufferStart() = 0;
- virtual Error commit() = 0;
-
- explicit Buffer(StringRef Name) : Name(Name) {}
- StringRef getName() const { return Name; }
-};
-
-class FileBuffer : public Buffer {
- std::unique_ptr<FileOutputBuffer> Buf;
-
-public:
- void allocate(size_t Size) override;
- uint8_t *getBufferStart() override;
- Error commit() override;
-
- explicit FileBuffer(StringRef FileName) : Buffer(FileName) {}
-};
-
-class MemBuffer : public Buffer {
- std::unique_ptr<WritableMemoryBuffer> Buf;
-
-public:
- void allocate(size_t Size) override;
- uint8_t *getBufferStart() override;
- Error commit() override;
-
- explicit MemBuffer(StringRef Name) : Buffer(Name) {}
-
- std::unique_ptr<WritableMemoryBuffer> releaseMemoryBuffer();
-};
-
class Writer {
protected:
Object &Obj;
@@ -190,10 +202,13 @@ public:
template <class ELFT> class ELFWriter : public Writer {
private:
+ using Elf_Addr = typename ELFT::Addr;
using Elf_Shdr = typename ELFT::Shdr;
using Elf_Phdr = typename ELFT::Phdr;
using Elf_Ehdr = typename ELFT::Ehdr;
+ void initEhdrSegment();
+
void writeEhdr();
void writePhdr(const Segment &Seg);
void writeShdr(const SectionBase &Sec);
@@ -233,7 +248,7 @@ public:
class SectionBase {
public:
- StringRef Name;
+ std::string Name;
Segment *ParentSegment = nullptr;
uint64_t HeaderOffset;
uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
@@ -250,6 +265,10 @@ public:
uint64_t Offset = 0;
uint64_t Size = 0;
uint64_t Type = ELF::SHT_NULL;
+ ArrayRef<uint8_t> OriginalData;
+
+ SectionBase() = default;
+ SectionBase(const SectionBase &) = default;
virtual ~SectionBase() = default;
@@ -258,6 +277,7 @@ public:
virtual void removeSectionReferences(const SectionBase *Sec);
virtual void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
virtual void accept(SectionVisitor &Visitor) const = 0;
+ virtual void accept(MutableSectionVisitor &Visitor) = 0;
virtual void markSymbols();
};
@@ -275,21 +295,21 @@ private:
};
std::set<const SectionBase *, SectionCompare> Sections;
- ArrayRef<uint8_t> Contents;
public:
- uint64_t Align;
- uint64_t FileSize;
+ uint32_t Type;
uint32_t Flags;
- uint32_t Index;
- uint64_t MemSize;
uint64_t Offset;
- uint64_t PAddr;
- uint64_t Type;
uint64_t VAddr;
+ uint64_t PAddr;
+ uint64_t FileSize;
+ uint64_t MemSize;
+ uint64_t Align;
+ uint32_t Index;
uint64_t OriginalOffset;
Segment *ParentSegment = nullptr;
+ ArrayRef<uint8_t> Contents;
explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {}
Segment() {}
@@ -314,6 +334,7 @@ public:
explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
void accept(SectionVisitor &Visitor) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
void removeSectionReferences(const SectionBase *Sec) override;
void initialize(SectionTableRef SecTable) override;
void finalize() override;
@@ -327,13 +348,57 @@ class OwnedDataSection : public SectionBase {
public:
OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data)
: Data(std::begin(Data), std::end(Data)) {
- Name = SecName;
+ Name = SecName.str();
Type = ELF::SHT_PROGBITS;
Size = Data.size();
OriginalOffset = std::numeric_limits<uint64_t>::max();
}
void accept(SectionVisitor &Sec) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
+};
+
+class CompressedSection : public SectionBase {
+ MAKE_SEC_WRITER_FRIEND
+
+ DebugCompressionType CompressionType;
+ uint64_t DecompressedSize;
+ uint64_t DecompressedAlign;
+ SmallVector<char, 128> CompressedData;
+
+public:
+ CompressedSection(const SectionBase &Sec,
+ DebugCompressionType CompressionType);
+ CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize,
+ uint64_t DecompressedAlign);
+
+ uint64_t getDecompressedSize() const { return DecompressedSize; }
+ uint64_t getDecompressedAlign() const { return DecompressedAlign; }
+
+ void accept(SectionVisitor &Visitor) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
+
+ static bool classof(const SectionBase *S) {
+ return (S->Flags & ELF::SHF_COMPRESSED) ||
+ (StringRef(S->Name).startswith(".zdebug"));
+ }
+};
+
+class DecompressedSection : public SectionBase {
+ MAKE_SEC_WRITER_FRIEND
+
+public:
+ explicit DecompressedSection(const CompressedSection &Sec)
+ : SectionBase(Sec) {
+ Size = Sec.getDecompressedSize();
+ Align = Sec.getDecompressedAlign();
+ Flags = (Flags & ~ELF::SHF_COMPRESSED);
+ if (StringRef(Name).startswith(".zdebug"))
+ Name = "." + Name.substr(2);
+ }
+
+ void accept(SectionVisitor &Visitor) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
};
// There are two types of string tables that can exist, dynamic and not dynamic.
@@ -358,6 +423,7 @@ public:
uint32_t findIndex(StringRef Name) const;
void finalize() override;
void accept(SectionVisitor &Visitor) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
static bool classof(const SectionBase *S) {
if (S->Flags & ELF::SHF_ALLOC)
@@ -386,7 +452,7 @@ struct Symbol {
SectionBase *DefinedIn = nullptr;
SymbolShndxType ShndxType;
uint32_t Index;
- StringRef Name;
+ std::string Name;
uint32_t NameIndex;
uint64_t Size;
uint8_t Type;
@@ -395,6 +461,7 @@ struct Symbol {
bool Referenced = false;
uint16_t getShndx() const;
+ bool isCommon() const;
};
class SectionIndexSection : public SectionBase {
@@ -414,6 +481,7 @@ public:
void initialize(SectionTableRef SecTable) override;
void finalize() override;
void accept(SectionVisitor &Visitor) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
SectionIndexSection() {
Name = ".symtab_shndx";
@@ -437,9 +505,11 @@ protected:
using SymPtr = std::unique_ptr<Symbol>;
public:
- void addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
- SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility,
- uint16_t Shndx, uint64_t Sz);
+ SymbolTableSection() { Type = ELF::SHT_SYMTAB; }
+
+ void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
+ uint64_t Value, uint8_t Visibility, uint16_t Shndx,
+ uint64_t Size);
void prepareForLayout();
// An 'empty' symbol table still contains a null symbol.
bool empty() const { return Symbols.size() == 1; }
@@ -456,6 +526,7 @@ public:
void initialize(SectionTableRef SecTable) override;
void finalize() override;
void accept(SectionVisitor &Visitor) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
static bool classof(const SectionBase *S) {
@@ -517,6 +588,7 @@ class RelocationSection
public:
void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
void accept(SectionVisitor &Visitor) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
void markSymbols() override;
@@ -549,8 +621,8 @@ public:
void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; }
void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); }
- void initialize(SectionTableRef SecTable) override{};
void accept(SectionVisitor &) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
void finalize() override;
void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
void markSymbols() override;
@@ -589,6 +661,7 @@ public:
explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
void accept(SectionVisitor &) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
static bool classof(const SectionBase *S) {
if (!(S->Flags & ELF::SHF_ALLOC))
@@ -610,6 +683,7 @@ public:
// If we add this section from an external source we can use this ctor.
explicit GnuDebugLinkSection(StringRef File);
void accept(SectionVisitor &Visitor) const override;
+ void accept(MutableSectionVisitor &Visitor) override;
};
class Reader {
@@ -623,11 +697,29 @@ using object::ELFFile;
using object::ELFObjectFile;
using object::OwningBinary;
+class BinaryELFBuilder {
+ uint16_t EMachine;
+ MemoryBuffer *MemBuf;
+ std::unique_ptr<Object> Obj;
+
+ void initFileHeader();
+ void initHeaderSegment();
+ StringTableSection *addStrTab();
+ SymbolTableSection *addSymTab(StringTableSection *StrTab);
+ void addData(SymbolTableSection *SymTab);
+ void initSections();
+
+public:
+ BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB)
+ : EMachine(EM), MemBuf(MB), Obj(llvm::make_unique<Object>()) {}
+
+ std::unique_ptr<Object> build();
+};
+
template <class ELFT> class ELFBuilder {
private:
using Elf_Addr = typename ELFT::Addr;
using Elf_Shdr = typename ELFT::Shdr;
- using Elf_Ehdr = typename ELFT::Ehdr;
using Elf_Word = typename ELFT::Word;
const ELFFile<ELFT> &ElfFile;
@@ -647,13 +739,22 @@ public:
void build();
};
+class BinaryReader : public Reader {
+ const MachineInfo &MInfo;
+ MemoryBuffer *MemBuf;
+
+public:
+ BinaryReader(const MachineInfo &MI, MemoryBuffer *MB)
+ : MInfo(MI), MemBuf(MB) {}
+ std::unique_ptr<Object> create() const override;
+};
+
class ELFReader : public Reader {
Binary *Bin;
public:
- ElfType getElfType() const;
std::unique_ptr<Object> create() const override;
- explicit ELFReader(Binary *B) : Bin(B){};
+ explicit ELFReader(Binary *B) : Bin(B) {}
};
class Object {
@@ -682,7 +783,8 @@ public:
Segment ElfHdrSegment;
Segment ProgramHdrSegment;
- uint8_t Ident[16];
+ uint8_t OSABI;
+ uint8_t ABIVersion;
uint64_t Entry;
uint64_t SHOffset;
uint32_t Type;
@@ -708,6 +810,7 @@ public:
auto Sec = llvm::make_unique<T>(std::forward<Ts>(Args)...);
auto Ptr = Sec.get();
Sections.emplace_back(std::move(Sec));
+ Ptr->Index = Sections.size();
return *Ptr;
}
Segment &addSegment(ArrayRef<uint8_t> Data) {
@@ -715,6 +818,8 @@ public:
return *Segments.back();
}
};
+
+} // end namespace elf
} // end namespace objcopy
} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 2af2108d98d3..1f7e64e4091c 100644
--- a/contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -1,55 +1,98 @@
include "llvm/Option/OptParser.td"
-multiclass Eq<string name> {
- def NAME: Separate<["--", "-"], name>;
- def NAME # _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>;
+multiclass Eq<string name, string help> {
+ def NAME : Separate<["--", "-"], name>;
+ def NAME #_eq : Joined<["--", "-"], name #"=">,
+ Alias<!cast<Separate>(NAME)>,
+ HelpText<help>;
}
def help : Flag<["-", "--"], "help">;
-defm binary_architecture : Eq<"binary-architecture">,
- HelpText<"Used when transforming an architecture-less format (such as binary) to another format">;
-def B : JoinedOrSeparate<["-"], "B">,
- Alias<binary_architecture>;
-defm input_target : Eq<"input-target">,
- HelpText<"Format of the input file">,
+
+defm binary_architecture
+ : Eq<"binary-architecture", "Used when transforming an architecture-less "
+ "format (such as binary) to another format">;
+def B : JoinedOrSeparate<["-"], "B">, Alias<binary_architecture>;
+
+defm target : Eq<"target", "Format of the input and output file">,
+ Values<"binary">;
+def F : JoinedOrSeparate<["-"], "F">, Alias<target>;
+
+defm input_target : Eq<"input-target", "Format of the input file">,
Values<"binary">;
-defm output_target : Eq<"output-target">,
- HelpText<"Format of the output file">,
+def I : JoinedOrSeparate<["-"], "I">, Alias<input_target>;
+
+defm output_target : Eq<"output-target", "Format of the output file">,
Values<"binary">;
-def O : JoinedOrSeparate<["-"], "O">,
- Alias<output_target>;
-defm split_dwo : Eq<"split-dwo">,
- MetaVarName<"dwo-file">,
- HelpText<"Equivalent to extract-dwo on the input file to <dwo-file>, then strip-dwo on the input file">;
-defm add_gnu_debuglink : Eq<"add-gnu-debuglink">,
- MetaVarName<"debug-file">,
- HelpText<"Add a .gnu_debuglink for <debug-file>">;
-defm remove_section : Eq<"remove-section">,
- MetaVarName<"section">,
- HelpText<"Remove <section>">;
-defm rename_section : Eq<"rename-section">,
- MetaVarName<"old=new">,
- HelpText<"Renames a section from old to new">;
-defm redefine_symbol : Eq<"redefine-sym">,
- MetaVarName<"old=new">,
- HelpText<"Change the name of a symbol old to new">;
-def R : JoinedOrSeparate<["-"], "R">,
- Alias<remove_section>;
-defm keep : Eq<"keep">,
- MetaVarName<"section">,
- HelpText<"Keep <section>">;
-defm only_keep : Eq<"only-keep">,
- MetaVarName<"section">,
- HelpText<"Remove all but <section>">;
-def j : JoinedOrSeparate<["-"], "j">,
- Alias<only_keep>;
-defm add_section : Eq<"add-section">,
- MetaVarName<"section=file">,
- HelpText<"Make a section named <section> with the contents of <file>.">;
-def strip_all : Flag<["-", "--"], "strip-all">,
- HelpText<"Remove non-allocated sections other than .gnu.warning* sections">;
+def O : JoinedOrSeparate<["-"], "O">, Alias<output_target>;
+
+def compress_debug_sections : Flag<["--", "-"], "compress-debug-sections">;
+def compress_debug_sections_eq
+ : Joined<["--", "-"], "compress-debug-sections=">,
+ MetaVarName<"[ zlib | zlib-gnu ]">,
+ HelpText<"Compress DWARF debug sections using specified style. Supported "
+ "styles: 'zlib-gnu' and 'zlib'">;
+def decompress_debug_sections : Flag<["-", "--"], "decompress-debug-sections">,
+ HelpText<"Decompress DWARF debug sections.">;
+defm split_dwo
+ : Eq<"split-dwo", "Equivalent to extract-dwo on the input file to "
+ "<dwo-file>, then strip-dwo on the input file">,
+ MetaVarName<"dwo-file">;
+
+def enable_deterministic_archives
+ : Flag<["-", "--"], "enable-deterministic-archives">,
+ HelpText<"Enable deterministic mode when copying archives (use zero for "
+ "UIDs, GIDs, and timestamps).">;
+def D : Flag<["-"], "D">,
+ Alias<enable_deterministic_archives>,
+ HelpText<"Alias for --enable-deterministic-archives">;
+
+def disable_deterministic_archives
+ : Flag<["-", "--"], "disable-deterministic-archives">,
+ HelpText<"Disable deterministic mode when copying archives (use real "
+ "values for UIDs, GIDs, and timestamps).">;
+def U : Flag<["-"], "U">,
+ Alias<disable_deterministic_archives>,
+ HelpText<"Alias for --disable-deterministic-archives">;
+
+def preserve_dates : Flag<["-", "--"], "preserve-dates">,
+ HelpText<"Preserve access and modification timestamps">;
+def p : Flag<["-"], "p">, Alias<preserve_dates>;
+
+defm add_gnu_debuglink
+ : Eq<"add-gnu-debuglink", "Add a .gnu_debuglink for <debug-file>">,
+ MetaVarName<"debug-file">;
+
+defm remove_section : Eq<"remove-section", "Remove <section>">,
+ MetaVarName<"section">;
+def R : JoinedOrSeparate<["-"], "R">, Alias<remove_section>;
+
+defm rename_section
+ : Eq<"rename-section",
+ "Renames a section from old to new, optionally with specified flags. "
+ "Flags supported for GNU compatibility: alloc, load, noload, "
+ "readonly, debug, code, data, rom, share, contents, merge, strings.">,
+ MetaVarName<"old=new[,flag1,...]">;
+defm redefine_symbol
+ : Eq<"redefine-sym", "Change the name of a symbol old to new">,
+ MetaVarName<"old=new">;
+defm keep_section : Eq<"keep-section", "Keep <section>">,
+ MetaVarName<"section">;
+defm only_section : Eq<"only-section", "Remove all but <section>">,
+ MetaVarName<"section">;
+def j : JoinedOrSeparate<["-"], "j">, Alias<only_section>;
+defm add_section
+ : Eq<"add-section",
+ "Make a section named <section> with the contents of <file>.">,
+ MetaVarName<"section=file">;
+
+def strip_all
+ : Flag<["-", "--"], "strip-all">,
+ HelpText<
+ "Remove non-allocated sections other than .gnu.warning* sections">;
+def S : Flag<["-"], "S">, Alias<strip_all>;
def strip_all_gnu : Flag<["-", "--"], "strip-all-gnu">,
- HelpText<"Compaitable with GNU objcopy's --strip-all">;
+ HelpText<"Compatible with GNU objcopy's --strip-all">;
def strip_debug : Flag<["-", "--"], "strip-debug">,
HelpText<"Remove all debug information">;
def strip_dwo : Flag<["-", "--"], "strip-dwo">,
@@ -58,42 +101,80 @@ def strip_sections : Flag<["-", "--"], "strip-sections">,
HelpText<"Remove all section headers">;
def strip_non_alloc : Flag<["-", "--"], "strip-non-alloc">,
HelpText<"Remove all non-allocated sections">;
-def extract_dwo : Flag<["-", "--"], "extract-dwo">,
- HelpText<"Remove all sections that are not DWARF .dwo sections from file">;
-def localize_hidden : Flag<["-", "--"], "localize-hidden">,
- HelpText<"Mark all symbols that have hidden or internal visibility as local">;
-defm localize_symbol : Eq<"localize-symbol">,
- MetaVarName<"symbol">,
- HelpText<"Mark <symbol> as local">;
-def L : JoinedOrSeparate<["-"], "L">,
- Alias<localize_symbol>;
-defm globalize_symbol : Eq<"globalize-symbol">,
- MetaVarName<"symbol">,
- HelpText<"Mark <symbol> as global">;
-defm weaken_symbol : Eq<"weaken-symbol">,
- MetaVarName<"symbol">,
- HelpText<"Mark <symbol> as weak">;
-def W : JoinedOrSeparate<["-"], "W">,
- Alias<weaken_symbol>;
-def weaken : Flag<["-", "--"], "weaken">,
- HelpText<"Mark all global symbols as weak">;
-def discard_all : Flag<["-", "--"], "discard-all">,
- HelpText<"Remove all local symbols except file and section symbols">;
-def x : Flag<["-"], "x">,
- Alias<discard_all>;
-defm strip_symbol : Eq<"strip-symbol">,
- MetaVarName<"symbol">,
- HelpText<"Remove symbol <symbol>">;
-def N : JoinedOrSeparate<["-"], "N">,
- Alias<strip_symbol>;
-defm keep_symbol : Eq<"keep-symbol">,
- MetaVarName<"symbol">,
- HelpText<"Do not remove symbol <symbol>">;
-def K : JoinedOrSeparate<["-"], "K">,
- Alias<keep_symbol>;
-def only_keep_debug : Flag<["-", "--"], "only-keep-debug">,
- HelpText<"Currently ignored. Only for compaitability with GNU objcopy.">;
def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
- HelpText<"Remove all symbols not needed by relocations">;
+ HelpText<"Remove all symbols not needed by relocations">;
+
+def extract_dwo
+ : Flag<["-", "--"], "extract-dwo">,
+ HelpText<
+ "Remove all sections that are not DWARF .dwo sections from file">;
+
+def localize_hidden
+ : Flag<["-", "--"], "localize-hidden">,
+ HelpText<
+ "Mark all symbols that have hidden or internal visibility as local">;
+defm localize_symbol : Eq<"localize-symbol", "Mark <symbol> as local">,
+ MetaVarName<"symbol">;
+def L : JoinedOrSeparate<["-"], "L">, Alias<localize_symbol>;
+
+defm globalize_symbol : Eq<"globalize-symbol", "Mark <symbol> as global">,
+ MetaVarName<"symbol">;
+defm keep_global_symbol
+ : Eq<"keep-global-symbol",
+ "Convert all symbols except <symbol> to local. May be repeated to "
+ "convert all except a set of symbols to local.">,
+ MetaVarName<"symbol">;
+def G : JoinedOrSeparate<["-"], "G">, Alias<keep_global_symbol>;
+
+defm keep_global_symbols
+ : Eq<"keep-global-symbols",
+ "Reads a list of symbols from <filename> and runs as if "
+ "--keep-global-symbol=<symbol> is set for each one. <filename> "
+ "contains one symbol per line and may contain comments beginning with "
+ "'#'. Leading and trailing whitespace is stripped from each line. May "
+ "be repeated to read symbols from many files.">,
+ MetaVarName<"filename">;
+
+defm weaken_symbol : Eq<"weaken-symbol", "Mark <symbol> as weak">,
+ MetaVarName<"symbol">;
+def W : JoinedOrSeparate<["-"], "W">, Alias<weaken_symbol>;
+def weaken : Flag<["-", "--"], "weaken">,
+ HelpText<"Mark all global symbols as weak">;
+def discard_all
+ : Flag<["-", "--"], "discard-all">,
+ HelpText<"Remove all local symbols except file and section symbols">;
+def x : Flag<["-"], "x">, Alias<discard_all>;
+defm strip_symbol : Eq<"strip-symbol", "Remove symbol <symbol>">,
+ MetaVarName<"symbol">;
+def N : JoinedOrSeparate<["-"], "N">, Alias<strip_symbol>;
+defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
+ MetaVarName<"symbol">;
+def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
+def only_keep_debug
+ : Flag<["-", "--"], "only-keep-debug">,
+ HelpText<"Currently ignored. Only for compatibility with GNU objcopy.">;
def keep_file_symbols : Flag<["-", "--"], "keep-file-symbols">,
- HelpText<"Do not remove file symbols">;
+ HelpText<"Do not remove file symbols">;
+defm dump_section
+ : Eq<"dump-section",
+ "Dump contents of section named <section> into file <file>">,
+ MetaVarName<"section=file">;
+defm prefix_symbols
+ : Eq<"prefix-symbols", "Add <prefix> to the start of every symbol name">,
+ MetaVarName<"prefix">;
+
+def version : Flag<["-", "--"], "version">,
+ HelpText<"Print the version and exit.">;
+def V : Flag<["-"], "V">, Alias<version>;
+defm build_id_link_dir
+ : Eq<"build-id-link-dir", "Set directory for --build-id-link-input and "
+ "--build-id-link-output to <dir>">,
+ MetaVarName<"dir">;
+defm build_id_link_input
+ : Eq<"build-id-link-input", "Hard-link the input to <dir>/xx/xxx<suffix> "
+ "name derived from hex build ID">,
+ MetaVarName<"suffix">;
+defm build_id_link_output
+ : Eq<"build-id-link-output", "Hard-link the output to <dir>/xx/xxx<suffix> "
+ "name derived from hex build ID">,
+ MetaVarName<"suffix">;
diff --git a/contrib/llvm/tools/llvm-objcopy/StripOpts.td b/contrib/llvm/tools/llvm-objcopy/StripOpts.td
index 333b0d288efa..fa98e27e9321 100644
--- a/contrib/llvm/tools/llvm-objcopy/StripOpts.td
+++ b/contrib/llvm/tools/llvm-objcopy/StripOpts.td
@@ -1,49 +1,67 @@
include "llvm/Option/OptParser.td"
-multiclass Eq<string name> {
- def NAME: Separate<["--", "-"], name>;
- def NAME # _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>;
+multiclass Eq<string name, string help> {
+ def NAME : Separate<["--", "-"], name>;
+ def NAME #_eq : Joined<["--", "-"], name #"=">,
+ Alias<!cast<Separate>(NAME)>,
+ HelpText<help>;
}
def help : Flag<["-", "--"], "help">;
-defm output : Eq<"o">,
- MetaVarName<"output">,
- HelpText<"Write output to <file>">;
+def enable_deterministic_archives
+ : Flag<["-", "--"], "enable-deterministic-archives">,
+ HelpText<"Enable deterministic mode when stripping archives (use zero "
+ "for UIDs, GIDs, and timestamps).">;
+def D : Flag<["-"], "D">,
+ Alias<enable_deterministic_archives>,
+ HelpText<"Alias for --enable-deterministic-archives">;
-def strip_all : Flag<["-", "--"], "strip-all">,
- HelpText<"Remove non-allocated sections other than .gnu.warning* sections">;
+def disable_deterministic_archives
+ : Flag<["-", "--"], "disable-deterministic-archives">,
+ HelpText<"Disable deterministic mode when stripping archives (use real "
+ "values for UIDs, GIDs, and timestamps).">;
+def U : Flag<["-"], "U">,
+ Alias<disable_deterministic_archives>,
+ HelpText<"Alias for --disable-deterministic-archives">;
-def strip_debug : Flag<["-", "--"], "strip-debug">,
- HelpText<"Remove debugging symbols only">;
-
-def d : Flag<["-"], "d">,
- Alias<strip_debug>;
-
-def g : Flag<["-"], "g">,
- Alias<strip_debug>;
+defm output : Eq<"o", "Write output to <file>">, MetaVarName<"output">;
-def S : Flag<["-"], "S">,
- Alias<strip_debug>;
+def preserve_dates : Flag<["-", "--"], "preserve-dates">,
+ HelpText<"Preserve access and modification timestamps">;
+def p : Flag<["-"], "p">, Alias<preserve_dates>;
-defm remove_section : Eq<"remove-section">,
- MetaVarName<"section">,
- HelpText<"Remove <section>">;
+def strip_all
+ : Flag<["-", "--"], "strip-all">,
+ HelpText<
+ "Remove non-allocated sections other than .gnu.warning* sections">;
+def s : Flag<["-"], "s">, Alias<strip_all>;
-def R : JoinedOrSeparate<["-"], "R">,
- Alias<remove_section>;
+def strip_all_gnu : Flag<["-", "--"], "strip-all-gnu">,
+ HelpText<"Compatible with GNU strip's --strip-all">;
+def strip_debug : Flag<["-", "--"], "strip-debug">,
+ HelpText<"Remove debugging symbols only">;
+def d : Flag<["-"], "d">, Alias<strip_debug>;
+def g : Flag<["-"], "g">, Alias<strip_debug>;
+def S : Flag<["-"], "S">, Alias<strip_debug>;
+def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
+ HelpText<"Remove all symbols not needed by relocations">;
-defm keep_symbol : Eq<"keep-symbol">,
- MetaVarName<"symbol">,
- HelpText<"Do not remove symbol <symbol>">;
+defm remove_section : Eq<"remove-section", "Remove <section>">,
+ MetaVarName<"section">;
+def R : JoinedOrSeparate<["-"], "R">, Alias<remove_section>;
-def K : JoinedOrSeparate<["-"], "K">,
- Alias<keep_symbol>;
+defm keep_section : Eq<"keep-section", "Keep <section>">,
+ MetaVarName<"section">;
+defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
+ MetaVarName<"symbol">;
+def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
-def discard_all : Flag<["-", "--"], "discard-all">,
- HelpText<"Remove all local symbols except file and section symbols">;
-def x : Flag<["-"], "x">,
- Alias<discard_all>;
+def discard_all
+ : Flag<["-", "--"], "discard-all">,
+ HelpText<"Remove all local symbols except file and section symbols">;
+def x : Flag<["-"], "x">, Alias<discard_all>;
-def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
- HelpText<"Remove all symbols not needed by relocations">;
+def version : Flag<["-", "--"], "version">,
+ HelpText<"Print the version and exit.">;
+def V : Flag<["-"], "V">, Alias<version>;
diff --git a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
index 21a1622db765..fb1ff18b015b 100644
--- a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -8,14 +8,19 @@
//===----------------------------------------------------------------------===//
#include "llvm-objcopy.h"
-#include "Object.h"
+#include "Buffer.h"
+#include "COFF/COFFObjcopy.h"
+#include "CopyConfig.h"
+#include "ELF/ELFObjcopy.h"
+
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/Error.h"
@@ -23,137 +28,23 @@
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/Memory.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdlib>
-#include <functional>
-#include <iterator>
#include <memory>
#include <string>
#include <system_error>
#include <utility>
-using namespace llvm;
-using namespace llvm::objcopy;
-using namespace object;
-using namespace ELF;
-
-namespace {
-
-enum ObjcopyID {
- OBJCOPY_INVALID = 0, // This is not an option ID.
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
- HELPTEXT, METAVAR, VALUES) \
- OBJCOPY_##ID,
-#include "ObjcopyOpts.inc"
-#undef OPTION
-};
-
-#define PREFIX(NAME, VALUE) const char *const OBJCOPY_##NAME[] = VALUE;
-#include "ObjcopyOpts.inc"
-#undef PREFIX
-
-static const opt::OptTable::Info ObjcopyInfoTable[] = {
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
- HELPTEXT, METAVAR, VALUES) \
- {OBJCOPY_##PREFIX, \
- NAME, \
- HELPTEXT, \
- METAVAR, \
- OBJCOPY_##ID, \
- opt::Option::KIND##Class, \
- PARAM, \
- FLAGS, \
- OBJCOPY_##GROUP, \
- OBJCOPY_##ALIAS, \
- ALIASARGS, \
- VALUES},
-#include "ObjcopyOpts.inc"
-#undef OPTION
-};
-
-class ObjcopyOptTable : public opt::OptTable {
-public:
- ObjcopyOptTable() : OptTable(ObjcopyInfoTable, true) {}
-};
-
-enum StripID {
- STRIP_INVALID = 0, // This is not an option ID.
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
- HELPTEXT, METAVAR, VALUES) \
- STRIP_##ID,
-#include "StripOpts.inc"
-#undef OPTION
-};
-
-#define PREFIX(NAME, VALUE) const char *const STRIP_##NAME[] = VALUE;
-#include "StripOpts.inc"
-#undef PREFIX
-
-static const opt::OptTable::Info StripInfoTable[] = {
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
- HELPTEXT, METAVAR, VALUES) \
- {STRIP_##PREFIX, NAME, HELPTEXT, \
- METAVAR, STRIP_##ID, opt::Option::KIND##Class, \
- PARAM, FLAGS, STRIP_##GROUP, \
- STRIP_##ALIAS, ALIASARGS, VALUES},
-#include "StripOpts.inc"
-#undef OPTION
-};
-
-class StripOptTable : public opt::OptTable {
-public:
- StripOptTable() : OptTable(StripInfoTable, true) {}
-};
-
-struct CopyConfig {
- StringRef OutputFilename;
- StringRef InputFilename;
- StringRef OutputFormat;
- StringRef InputFormat;
- StringRef BinaryArch;
-
- StringRef SplitDWO;
- StringRef AddGnuDebugLink;
- std::vector<StringRef> ToRemove;
- std::vector<StringRef> Keep;
- std::vector<StringRef> OnlyKeep;
- std::vector<StringRef> AddSection;
- std::vector<StringRef> SymbolsToLocalize;
- std::vector<StringRef> SymbolsToGlobalize;
- std::vector<StringRef> SymbolsToWeaken;
- std::vector<StringRef> SymbolsToRemove;
- std::vector<StringRef> SymbolsToKeep;
- StringMap<StringRef> SectionsToRename;
- StringMap<StringRef> SymbolsToRename;
- bool StripAll = false;
- bool StripAllGNU = false;
- bool StripDebug = false;
- bool StripSections = false;
- bool StripNonAlloc = false;
- bool StripDWO = false;
- bool StripUnneeded = false;
- bool ExtractDWO = false;
- bool LocalizeHidden = false;
- bool Weaken = false;
- bool DiscardAll = false;
- bool OnlyKeepDebug = false;
- bool KeepFileSymbols = false;
-};
-
-using SectionPred = std::function<bool(const SectionBase &Sec)>;
-
-} // namespace
-
namespace llvm {
namespace objcopy {
@@ -161,14 +52,15 @@ namespace objcopy {
StringRef ToolName;
LLVM_ATTRIBUTE_NORETURN void error(Twine Message) {
- errs() << ToolName << ": " << Message << ".\n";
+ WithColor::error(errs(), ToolName) << Message << ".\n";
errs().flush();
exit(1);
}
LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, std::error_code EC) {
assert(EC);
- errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n";
+ WithColor::error(errs(), ToolName)
+ << "'" << File << "': " << EC.message() << ".\n";
exit(1);
}
@@ -176,304 +68,18 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
assert(E);
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS, "");
+ logAllUnhandledErrors(std::move(E), OS);
OS.flush();
- errs() << ToolName << ": '" << File << "': " << Buf;
+ WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf;
exit(1);
}
} // end namespace objcopy
} // end namespace llvm
-static bool IsDebugSection(const SectionBase &Sec) {
- return Sec.Name.startswith(".debug") || Sec.Name.startswith(".zdebug") ||
- Sec.Name == ".gdb_index";
-}
-
-static bool IsDWOSection(const SectionBase &Sec) {
- return Sec.Name.endswith(".dwo");
-}
-
-static bool OnlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) {
- // We can't remove the section header string table.
- if (&Sec == Obj.SectionNames)
- return false;
- // Short of keeping the string table we want to keep everything that is a DWO
- // section and remove everything else.
- return !IsDWOSection(Sec);
-}
-
-static std::unique_ptr<Writer> CreateWriter(const CopyConfig &Config,
- Object &Obj, Buffer &Buf,
- ElfType OutputElfType) {
- if (Config.OutputFormat == "binary") {
- return llvm::make_unique<BinaryWriter>(Obj, Buf);
- }
- // Depending on the initial ELFT and OutputFormat we need a different Writer.
- switch (OutputElfType) {
- case ELFT_ELF32LE:
- return llvm::make_unique<ELFWriter<ELF32LE>>(Obj, Buf,
- !Config.StripSections);
- case ELFT_ELF64LE:
- return llvm::make_unique<ELFWriter<ELF64LE>>(Obj, Buf,
- !Config.StripSections);
- case ELFT_ELF32BE:
- return llvm::make_unique<ELFWriter<ELF32BE>>(Obj, Buf,
- !Config.StripSections);
- case ELFT_ELF64BE:
- return llvm::make_unique<ELFWriter<ELF64BE>>(Obj, Buf,
- !Config.StripSections);
- }
- llvm_unreachable("Invalid output format");
-}
-
-static void SplitDWOToFile(const CopyConfig &Config, const Reader &Reader,
- StringRef File, ElfType OutputElfType) {
- auto DWOFile = Reader.create();
- DWOFile->removeSections(
- [&](const SectionBase &Sec) { return OnlyKeepDWOPred(*DWOFile, Sec); });
- FileBuffer FB(File);
- auto Writer = CreateWriter(Config, *DWOFile, FB, OutputElfType);
- Writer->finalize();
- Writer->write();
-}
-
-// This function handles the high level operations of GNU objcopy including
-// handling command line options. It's important to outline certain properties
-// we expect to hold of the command line operations. Any operation that "keeps"
-// should keep regardless of a remove. Additionally any removal should respect
-// any previous removals. Lastly whether or not something is removed shouldn't
-// depend a) on the order the options occur in or b) on some opaque priority
-// system. The only priority is that keeps/copies overrule removes.
-static void HandleArgs(const CopyConfig &Config, Object &Obj,
- const Reader &Reader, ElfType OutputElfType) {
-
- if (!Config.SplitDWO.empty()) {
- SplitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType);
- }
-
- // TODO: update or remove symbols only if there is an option that affects
- // them.
- if (Obj.SymbolTable) {
- Obj.SymbolTable->updateSymbols([&](Symbol &Sym) {
- if ((Config.LocalizeHidden &&
- (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
- (!Config.SymbolsToLocalize.empty() &&
- is_contained(Config.SymbolsToLocalize, Sym.Name)))
- Sym.Binding = STB_LOCAL;
-
- if (!Config.SymbolsToGlobalize.empty() &&
- is_contained(Config.SymbolsToGlobalize, Sym.Name))
- Sym.Binding = STB_GLOBAL;
-
- if (!Config.SymbolsToWeaken.empty() &&
- is_contained(Config.SymbolsToWeaken, Sym.Name) &&
- Sym.Binding == STB_GLOBAL)
- Sym.Binding = STB_WEAK;
-
- if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
- Sym.getShndx() != SHN_UNDEF)
- Sym.Binding = STB_WEAK;
-
- const auto I = Config.SymbolsToRename.find(Sym.Name);
- if (I != Config.SymbolsToRename.end())
- Sym.Name = I->getValue();
- });
-
- // The purpose of this loop is to mark symbols referenced by sections
- // (like GroupSection or RelocationSection). This way, we know which
- // symbols are still 'needed' and wich are not.
- if (Config.StripUnneeded) {
- for (auto &Section : Obj.sections())
- Section.markSymbols();
- }
-
- Obj.removeSymbols([&](const Symbol &Sym) {
- if ((!Config.SymbolsToKeep.empty() &&
- is_contained(Config.SymbolsToKeep, Sym.Name)) ||
- (Config.KeepFileSymbols && Sym.Type == STT_FILE))
- return false;
-
- if (Config.DiscardAll && Sym.Binding == STB_LOCAL &&
- Sym.getShndx() != SHN_UNDEF && Sym.Type != STT_FILE &&
- Sym.Type != STT_SECTION)
- return true;
-
- if (Config.StripAll || Config.StripAllGNU)
- return true;
-
- if (!Config.SymbolsToRemove.empty() &&
- is_contained(Config.SymbolsToRemove, Sym.Name)) {
- return true;
- }
-
- if (Config.StripUnneeded && !Sym.Referenced &&
- (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) &&
- Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
- return true;
-
- return false;
- });
- }
-
- SectionPred RemovePred = [](const SectionBase &) { return false; };
-
- // Removes:
- if (!Config.ToRemove.empty()) {
- RemovePred = [&Config](const SectionBase &Sec) {
- return find(Config.ToRemove, Sec.Name) != Config.ToRemove.end();
- };
- }
-
- if (Config.StripDWO || !Config.SplitDWO.empty())
- RemovePred = [RemovePred](const SectionBase &Sec) {
- return IsDWOSection(Sec) || RemovePred(Sec);
- };
-
- if (Config.ExtractDWO)
- RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
- return OnlyKeepDWOPred(Obj, Sec) || RemovePred(Sec);
- };
-
- if (Config.StripAllGNU)
- RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
- if (RemovePred(Sec))
- return true;
- if ((Sec.Flags & SHF_ALLOC) != 0)
- return false;
- if (&Sec == Obj.SectionNames)
- return false;
- switch (Sec.Type) {
- case SHT_SYMTAB:
- case SHT_REL:
- case SHT_RELA:
- case SHT_STRTAB:
- return true;
- }
- return IsDebugSection(Sec);
- };
-
- if (Config.StripSections) {
- RemovePred = [RemovePred](const SectionBase &Sec) {
- return RemovePred(Sec) || (Sec.Flags & SHF_ALLOC) == 0;
- };
- }
-
- if (Config.StripDebug) {
- RemovePred = [RemovePred](const SectionBase &Sec) {
- return RemovePred(Sec) || IsDebugSection(Sec);
- };
- }
-
- if (Config.StripNonAlloc)
- RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
- if (RemovePred(Sec))
- return true;
- if (&Sec == Obj.SectionNames)
- return false;
- return (Sec.Flags & SHF_ALLOC) == 0;
- };
-
- if (Config.StripAll)
- RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
- if (RemovePred(Sec))
- return true;
- if (&Sec == Obj.SectionNames)
- return false;
- if (Sec.Name.startswith(".gnu.warning"))
- return false;
- return (Sec.Flags & SHF_ALLOC) == 0;
- };
-
- // Explicit copies:
- if (!Config.OnlyKeep.empty()) {
- RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
- // Explicitly keep these sections regardless of previous removes.
- if (find(Config.OnlyKeep, Sec.Name) != Config.OnlyKeep.end())
- return false;
-
- // Allow all implicit removes.
- if (RemovePred(Sec))
- return true;
-
- // Keep special sections.
- if (Obj.SectionNames == &Sec)
- return false;
- if (Obj.SymbolTable == &Sec ||
- (Obj.SymbolTable && Obj.SymbolTable->getStrTab() == &Sec))
- return false;
-
- // Remove everything else.
- return true;
- };
- }
-
- if (!Config.Keep.empty()) {
- RemovePred = [Config, RemovePred](const SectionBase &Sec) {
- // Explicitly keep these sections regardless of previous removes.
- if (find(Config.Keep, Sec.Name) != Config.Keep.end())
- return false;
- // Otherwise defer to RemovePred.
- return RemovePred(Sec);
- };
- }
-
- // This has to be the last predicate assignment.
- // If the option --keep-symbol has been specified
- // and at least one of those symbols is present
- // (equivalently, the updated symbol table is not empty)
- // the symbol table and the string table should not be removed.
- if ((!Config.SymbolsToKeep.empty() || Config.KeepFileSymbols) &&
- Obj.SymbolTable && !Obj.SymbolTable->empty()) {
- RemovePred = [&Obj, RemovePred](const SectionBase &Sec) {
- if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab())
- return false;
- return RemovePred(Sec);
- };
- }
-
- Obj.removeSections(RemovePred);
-
- if (!Config.SectionsToRename.empty()) {
- for (auto &Sec : Obj.sections()) {
- const auto Iter = Config.SectionsToRename.find(Sec.Name);
- if (Iter != Config.SectionsToRename.end())
- Sec.Name = Iter->second;
- }
- }
-
- if (!Config.AddSection.empty()) {
- for (const auto &Flag : Config.AddSection) {
- auto SecPair = Flag.split("=");
- auto SecName = SecPair.first;
- auto File = SecPair.second;
- auto BufOrErr = MemoryBuffer::getFile(File);
- if (!BufOrErr)
- reportError(File, BufOrErr.getError());
- auto Buf = std::move(*BufOrErr);
- auto BufPtr = reinterpret_cast<const uint8_t *>(Buf->getBufferStart());
- auto BufSize = Buf->getBufferSize();
- Obj.addSection<OwnedDataSection>(SecName,
- ArrayRef<uint8_t>(BufPtr, BufSize));
- }
- }
-
- if (!Config.AddGnuDebugLink.empty())
- Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink);
-}
-
-static void ExecuteElfObjcopyOnBinary(const CopyConfig &Config, Binary &Binary,
- Buffer &Out) {
- ELFReader Reader(&Binary);
- std::unique_ptr<Object> Obj = Reader.create();
-
- HandleArgs(Config, *Obj, Reader, Reader.getElfType());
-
- std::unique_ptr<Writer> Writer =
- CreateWriter(Config, *Obj, Out, Reader.getElfType());
- Writer->finalize();
- Writer->write();
-}
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::objcopy;
// For regular archives this function simply calls llvm::writeArchive,
// For thin archives it writes the archive file itself as well as its members.
@@ -504,22 +110,48 @@ static Error deepWriteArchive(StringRef ArcName,
return Error::success();
}
-static void ExecuteElfObjcopyOnArchive(const CopyConfig &Config, const Archive &Ar) {
+/// The function executeObjcopyOnRawBinary does the dispatch based on the format
+/// of the output specified by the command line options.
+static void executeObjcopyOnRawBinary(const CopyConfig &Config,
+ MemoryBuffer &In, Buffer &Out) {
+ // TODO: llvm-objcopy should parse CopyConfig.OutputFormat to recognize
+ // formats other than ELF / "binary" and invoke
+ // elf::executeObjcopyOnRawBinary, macho::executeObjcopyOnRawBinary or
+ // coff::executeObjcopyOnRawBinary accordingly.
+ return elf::executeObjcopyOnRawBinary(Config, In, Out);
+}
+
+/// The function executeObjcopyOnBinary does the dispatch based on the format
+/// of the input binary (ELF, MachO or COFF).
+static void executeObjcopyOnBinary(const CopyConfig &Config, object::Binary &In,
+ Buffer &Out) {
+ if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In))
+ return elf::executeObjcopyOnBinary(Config, *ELFBinary, Out);
+ else if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In))
+ return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out);
+ else
+ error("Unsupported object file format");
+}
+
+static void executeObjcopyOnArchive(const CopyConfig &Config,
+ const Archive &Ar) {
std::vector<NewArchiveMember> NewArchiveMembers;
Error Err = Error::success();
for (const Archive::Child &Child : Ar.children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
if (!ChildOrErr)
reportError(Ar.getFileName(), ChildOrErr.takeError());
+ Binary *Bin = ChildOrErr->get();
+
Expected<StringRef> ChildNameOrErr = Child.getName();
if (!ChildNameOrErr)
reportError(Ar.getFileName(), ChildNameOrErr.takeError());
MemBuffer MB(ChildNameOrErr.get());
- ExecuteElfObjcopyOnBinary(Config, **ChildOrErr, MB);
+ executeObjcopyOnBinary(Config, *Bin, MB);
Expected<NewArchiveMember> Member =
- NewArchiveMember::getOldMember(Child, true);
+ NewArchiveMember::getOldMember(Child, Config.DeterministicArchives);
if (!Member)
reportError(Ar.getFileName(), Member.takeError());
Member->Buf = MB.releaseMemoryBuffer();
@@ -529,180 +161,72 @@ static void ExecuteElfObjcopyOnArchive(const CopyConfig &Config, const Archive &
if (Err)
reportError(Config.InputFilename, std::move(Err));
- if (Error E =
- deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
- Ar.hasSymbolTable(), Ar.kind(), true, Ar.isThin()))
+ if (Error E = deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
+ Ar.hasSymbolTable(), Ar.kind(),
+ Config.DeterministicArchives, Ar.isThin()))
reportError(Config.OutputFilename, std::move(E));
}
-static void ExecuteElfObjcopy(const CopyConfig &Config) {
- Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
- createBinary(Config.InputFilename);
- if (!BinaryOrErr)
- reportError(Config.InputFilename, BinaryOrErr.takeError());
-
- if (Archive *Ar = dyn_cast<Archive>(BinaryOrErr.get().getBinary()))
- return ExecuteElfObjcopyOnArchive(Config, *Ar);
-
- FileBuffer FB(Config.OutputFilename);
- ExecuteElfObjcopyOnBinary(Config, *BinaryOrErr.get().getBinary(), FB);
-}
-
-// ParseObjcopyOptions returns the config and sets the input arguments. If a
-// help flag is set then ParseObjcopyOptions will print the help messege and
-// exit.
-static CopyConfig ParseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
- ObjcopyOptTable T;
- unsigned MissingArgumentIndex, MissingArgumentCount;
- llvm::opt::InputArgList InputArgs =
- T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
-
- if (InputArgs.size() == 0) {
- T.PrintHelp(errs(), "llvm-objcopy <input> [ <output> ]", "objcopy tool");
- exit(1);
- }
-
- if (InputArgs.hasArg(OBJCOPY_help)) {
- T.PrintHelp(outs(), "llvm-objcopy <input> [ <output> ]", "objcopy tool");
- exit(0);
- }
-
- SmallVector<const char *, 2> Positional;
-
- for (auto Arg : InputArgs.filtered(OBJCOPY_UNKNOWN))
- error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
-
- for (auto Arg : InputArgs.filtered(OBJCOPY_INPUT))
- Positional.push_back(Arg->getValue());
-
- if (Positional.empty())
- error("No input file specified");
-
- if (Positional.size() > 2)
- error("Too many positional arguments");
-
- CopyConfig Config;
- Config.InputFilename = Positional[0];
- Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1];
- Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
- Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
- Config.BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
-
- Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo);
- Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink);
-
- for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) {
- if (!StringRef(Arg->getValue()).contains('='))
- error("Bad format for --redefine-sym");
- auto Old2New = StringRef(Arg->getValue()).split('=');
- if (!Config.SymbolsToRename.insert(Old2New).second)
- error("Multiple redefinition of symbol " + Old2New.first);
- }
-
- for (auto Arg : InputArgs.filtered(OBJCOPY_rename_section)) {
- if (!StringRef(Arg->getValue()).contains('='))
- error("Bad format for --rename-section");
- auto Old2New = StringRef(Arg->getValue()).split('=');
- if (!Config.SectionsToRename.insert(Old2New).second)
- error("Already have a section rename for " + Old2New.first);
- }
-
- for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section))
- Config.ToRemove.push_back(Arg->getValue());
- for (auto Arg : InputArgs.filtered(OBJCOPY_keep))
- Config.Keep.push_back(Arg->getValue());
- for (auto Arg : InputArgs.filtered(OBJCOPY_only_keep))
- Config.OnlyKeep.push_back(Arg->getValue());
- for (auto Arg : InputArgs.filtered(OBJCOPY_add_section))
- Config.AddSection.push_back(Arg->getValue());
- Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all);
- Config.StripAllGNU = InputArgs.hasArg(OBJCOPY_strip_all_gnu);
- Config.StripDebug = InputArgs.hasArg(OBJCOPY_strip_debug);
- Config.StripDWO = InputArgs.hasArg(OBJCOPY_strip_dwo);
- Config.StripSections = InputArgs.hasArg(OBJCOPY_strip_sections);
- Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc);
- Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded);
- Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
- Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
- Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
- Config.DiscardAll = InputArgs.hasArg(OBJCOPY_discard_all);
- Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
- Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
- for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
- Config.SymbolsToLocalize.push_back(Arg->getValue());
- for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol))
- Config.SymbolsToGlobalize.push_back(Arg->getValue());
- for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol))
- Config.SymbolsToWeaken.push_back(Arg->getValue());
- for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol))
- Config.SymbolsToRemove.push_back(Arg->getValue());
- for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol))
- Config.SymbolsToKeep.push_back(Arg->getValue());
-
- return Config;
-}
-
-// ParseStripOptions returns the config and sets the input arguments. If a
-// help flag is set then ParseStripOptions will print the help messege and
-// exit.
-static CopyConfig ParseStripOptions(ArrayRef<const char *> ArgsArr) {
- StripOptTable T;
- unsigned MissingArgumentIndex, MissingArgumentCount;
- llvm::opt::InputArgList InputArgs =
- T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
-
- if (InputArgs.size() == 0) {
- T.PrintHelp(errs(), "llvm-strip <input> [ <output> ]", "strip tool");
- exit(1);
+static void restoreDateOnFile(StringRef Filename,
+ const sys::fs::file_status &Stat) {
+ int FD;
+
+ if (auto EC =
+ sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
+ reportError(Filename, EC);
+
+ if (auto EC = sys::fs::setLastAccessAndModificationTime(
+ FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
+ reportError(Filename, EC);
+
+ if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
+ reportError(Filename, EC);
+}
+
+/// The function executeObjcopy does the higher level dispatch based on the type
+/// of input (raw binary, archive or single object file) and takes care of the
+/// format-agnostic modifications, i.e. preserving dates.
+static void executeObjcopy(const CopyConfig &Config) {
+ sys::fs::file_status Stat;
+ if (Config.PreserveDates)
+ if (auto EC = sys::fs::status(Config.InputFilename, Stat))
+ reportError(Config.InputFilename, EC);
+
+ if (Config.InputFormat == "binary") {
+ auto BufOrErr = MemoryBuffer::getFile(Config.InputFilename);
+ if (!BufOrErr)
+ reportError(Config.InputFilename, BufOrErr.getError());
+ FileBuffer FB(Config.OutputFilename);
+ executeObjcopyOnRawBinary(Config, *BufOrErr->get(), FB);
+ } else {
+ Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
+ createBinary(Config.InputFilename);
+ if (!BinaryOrErr)
+ reportError(Config.InputFilename, BinaryOrErr.takeError());
+
+ if (Archive *Ar = dyn_cast<Archive>(BinaryOrErr.get().getBinary())) {
+ executeObjcopyOnArchive(Config, *Ar);
+ } else {
+ FileBuffer FB(Config.OutputFilename);
+ executeObjcopyOnBinary(Config, *BinaryOrErr.get().getBinary(), FB);
+ }
}
- if (InputArgs.hasArg(STRIP_help)) {
- T.PrintHelp(outs(), "llvm-strip <input> [ <output> ]", "strip tool");
- exit(0);
+ if (Config.PreserveDates) {
+ restoreDateOnFile(Config.OutputFilename, Stat);
+ if (!Config.SplitDWO.empty())
+ restoreDateOnFile(Config.SplitDWO, Stat);
}
-
- SmallVector<const char *, 2> Positional;
- for (auto Arg : InputArgs.filtered(STRIP_UNKNOWN))
- error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
- for (auto Arg : InputArgs.filtered(STRIP_INPUT))
- Positional.push_back(Arg->getValue());
-
- if (Positional.empty())
- error("No input file specified");
-
- if (Positional.size() > 2)
- error("Support for multiple input files is not implemented yet");
-
- CopyConfig Config;
- Config.InputFilename = Positional[0];
- Config.OutputFilename =
- InputArgs.getLastArgValue(STRIP_output, Positional[0]);
-
- Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
-
- Config.DiscardAll = InputArgs.hasArg(STRIP_discard_all);
- Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded);
- Config.StripAll = InputArgs.hasArg(STRIP_strip_all);
-
- if (!Config.StripDebug && !Config.StripUnneeded && !Config.DiscardAll)
- Config.StripAll = true;
-
- for (auto Arg : InputArgs.filtered(STRIP_remove_section))
- Config.ToRemove.push_back(Arg->getValue());
-
- for (auto Arg : InputArgs.filtered(STRIP_keep_symbol))
- Config.SymbolsToKeep.push_back(Arg->getValue());
-
- return Config;
}
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
ToolName = argv[0];
- CopyConfig Config;
- if (sys::path::stem(ToolName).endswith_lower("strip"))
- Config = ParseStripOptions(makeArrayRef(argv + 1, argc));
+ DriverConfig DriverConfig;
+ if (sys::path::stem(ToolName).contains("strip"))
+ DriverConfig = parseStripOptions(makeArrayRef(argv + 1, argc));
else
- Config = ParseObjcopyOptions(makeArrayRef(argv + 1, argc));
- ExecuteElfObjcopy(Config);
+ DriverConfig = parseObjcopyOptions(makeArrayRef(argv + 1, argc));
+ for (const CopyConfig &CopyConfig : DriverConfig.CopyConfigs)
+ executeObjcopy(CopyConfig);
}
diff --git a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h
index e222b65dc78f..d8edf3e29ee0 100644
--- a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h
+++ b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h
@@ -31,7 +31,7 @@ template <class T> T unwrapOrError(Expected<T> EO) {
return *EO;
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(EO.takeError(), OS, "");
+ logAllUnhandledErrors(EO.takeError(), OS);
OS.flush();
error(Buf);
}
diff --git a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
index 7ca5d04593ff..55607ec299be 100644
--- a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -16,11 +16,13 @@
//===----------------------------------------------------------------------===//
#include "llvm-objdump.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Win64EH.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -453,7 +455,7 @@ static bool getPDataSection(const COFFObjectFile *Obj,
Rels.push_back(Reloc);
// Sort relocations by address.
- llvm::sort(Rels.begin(), Rels.end(), RelocAddressLess);
+ llvm::sort(Rels, isRelocAddressLess);
ArrayRef<uint8_t> Contents;
error(Obj->getSectionContents(Pdata, Contents));
@@ -578,8 +580,9 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj,
void llvm::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
if (Obj->getMachine() != COFF::IMAGE_FILE_MACHINE_AMD64) {
- errs() << "Unsupported image machine type "
- "(currently only AMD64 is supported).\n";
+ WithColor::error(errs(), "llvm-objdump")
+ << "unsupported image machine type "
+ "(currently only AMD64 is supported).\n";
return;
}
@@ -646,10 +649,26 @@ void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
<< "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")"
<< "(fl 0x00)" // Flag bits, which COFF doesn't have.
<< "(ty " << format("%3x", unsigned(Symbol->getType())) << ")"
- << "(scl " << format("%3x", unsigned(Symbol->getStorageClass())) << ") "
+ << "(scl " << format("%3x", unsigned(Symbol->getStorageClass()))
+ << ") "
<< "(nx " << unsigned(Symbol->getNumberOfAuxSymbols()) << ") "
<< "0x" << format("%08x", unsigned(Symbol->getValue())) << " "
- << Name << "\n";
+ << Name;
+ if (Demangle && Name.startswith("?")) {
+ char *DemangledSymbol = nullptr;
+ size_t Size = 0;
+ int Status = -1;
+ DemangledSymbol =
+ microsoftDemangle(Name.data(), DemangledSymbol, &Size, &Status);
+
+ if (Status == 0 && DemangledSymbol) {
+ outs() << " (" << StringRef(DemangledSymbol) << ")";
+ std::free(DemangledSymbol);
+ } else {
+ outs() << " (invalid mangled name)";
+ }
+ }
+ outs() << "\n";
for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) {
if (Symbol->isSectionDefinition()) {
diff --git a/contrib/llvm/tools/llvm-objdump/ELFDump.cpp b/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
index f4d36656a6c4..b17a15a0d8fc 100644
--- a/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -158,37 +158,23 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
}
void llvm::printELFFileHeader(const object::ObjectFile *Obj) {
- // Little-endian 32-bit
- if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
+ if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
printProgramHeaders(ELFObj->getELFFile());
-
- // Big-endian 32-bit
- if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
+ else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
printProgramHeaders(ELFObj->getELFFile());
-
- // Little-endian 64-bit
- if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
+ else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
printProgramHeaders(ELFObj->getELFFile());
-
- // Big-endian 64-bit
- if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
+ else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
printProgramHeaders(ELFObj->getELFFile());
}
void llvm::printELFDynamicSection(const object::ObjectFile *Obj) {
- // Little-endian 32-bit
- if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
+ if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
-
- // Big-endian 32-bit
- if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
+ else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
-
- // Little-endian 64-bit
- if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
+ else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
-
- // Big-endian 64-bit
- if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
+ else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
}
diff --git a/contrib/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
index bdf80c73b999..5ef7058ec9da 100644
--- a/contrib/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
@@ -44,6 +44,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstring>
@@ -166,7 +167,7 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj,
if (*ThumbTarget)
return TheTarget;
- errs() << "llvm-objdump: error: unable to get target for '";
+ WithColor::error(errs(), "llvm-objdump") << "unable to get target for '";
if (!TheTarget)
errs() << TripleName;
else
@@ -483,7 +484,7 @@ static void PrintRType(const uint64_t cputype, const unsigned r_type) {
"GOTLDP ", "GOTLDPOF", "PTRTGOT ", "TLVLDP ", "TLVLDPOF",
"ADDEND ", " 11 (?) ", " 12 (?) ", " 13 (?) ", " 14 (?) ", " 15 (?) "
};
-
+
if (r_type > 0xf){
outs() << format("%-7u", r_type) << " ";
return;
@@ -552,7 +553,7 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
bool previous_arm_half = false;
bool previous_sectdiff = false;
uint32_t sectdiff_r_type = 0;
-
+
for (relocation_iterator Reloc = Begin; Reloc != End; ++Reloc) {
const DataRefImpl Rel = Reloc->getRawDataRefImpl();
const MachO::any_relocation_info RE = O->getRelocation(Rel);
@@ -567,7 +568,7 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
O->getScatteredRelocationValue(RE) : 0);
const unsigned r_symbolnum = (r_scattered ? 0 :
O->getPlainRelocationSymbolNum(RE));
-
+
if (r_scattered && cputype != MachO::CPU_TYPE_X86_64) {
if (verbose) {
// scattered: address
@@ -578,20 +579,20 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
outs() << " ";
else
outs() << format("%08x ", (unsigned int)r_address);
-
+
// scattered: pcrel
if (r_pcrel)
outs() << "True ";
else
outs() << "False ";
-
+
// scattered: length
PrintRLength(cputype, r_type, r_length, previous_arm_half);
-
+
// scattered: extern & type
outs() << "n/a ";
PrintRType(cputype, r_type);
-
+
// scattered: scattered & value
outs() << format("True 0x%08x", (unsigned int)r_value);
if (previous_sectdiff == false) {
@@ -639,22 +640,22 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
outs() << " ";
else
outs() << format("%08x ", (unsigned int)r_address);
-
+
// plain: pcrel
if (r_pcrel)
outs() << "True ";
else
outs() << "False ";
-
+
// plain: length
PrintRLength(cputype, r_type, r_length, previous_arm_half);
-
+
if (r_extern) {
// plain: extern & type & scattered
outs() << "True ";
PrintRType(cputype, r_type);
outs() << "False ";
-
+
// plain: symbolnum/value
if (r_symbolnum > Symtab.nsyms)
outs() << format("?(%d)\n", r_symbolnum);
@@ -675,7 +676,7 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
outs() << "False ";
PrintRType(cputype, r_type);
outs() << "False ";
-
+
// plain: symbolnum/value
if (cputype == MachO::CPU_TYPE_ARM &&
r_type == llvm::MachO::ARM_RELOC_PAIR)
@@ -1411,7 +1412,7 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
std::pair<StringRef, StringRef> DumpSegSectName;
DumpSegSectName = DumpSection.split(',');
StringRef DumpSegName, DumpSectName;
- if (DumpSegSectName.second.size()) {
+ if (!DumpSegSectName.second.empty()) {
DumpSegName = DumpSegSectName.first;
DumpSectName = DumpSegSectName.second;
} else {
@@ -1559,7 +1560,8 @@ static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) {
if (none_of(ArchFlags, [&](const std::string &Name) {
return Name == ArchFlagName;
})) {
- errs() << "llvm-objdump: " + Filename + ": No architecture specified.\n";
+ WithColor::error(errs(), "llvm-objdump")
+ << Filename << ": no architecture specified.\n";
return false;
}
return true;
@@ -1580,7 +1582,7 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
if (Disassemble || Relocations || PrivateHeaders || ExportsTrie || Rebase ||
Bind || SymbolTable || LazyBind || WeakBind || IndirectSymbols ||
DataInCode || LinkOptHints || DylibsUsed || DylibId || ObjcMetaData ||
- (FilterSections.size() != 0)) {
+ (!FilterSections.empty())) {
if (!NoLeadingHeaders) {
outs() << Name;
if (!ArchiveMemberName.empty())
@@ -1605,12 +1607,22 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
// If we need the symbol table to do the operation then check it here to
// produce a good error message as to where the Mach-O file comes from in
// the error message.
- if (Disassemble || IndirectSymbols || FilterSections.size() != 0 ||
- UnwindInfo)
+ if (Disassemble || IndirectSymbols || !FilterSections.empty() || UnwindInfo)
if (Error Err = MachOOF->checkSymbolTable())
report_error(ArchiveName, FileName, std::move(Err), ArchitectureName);
-
- if (Disassemble) {
+
+ if (DisassembleAll) {
+ for (const SectionRef &Section : MachOOF->sections()) {
+ StringRef SectName;
+ Section.getName(SectName);
+ if (SectName.equals("__text")) {
+ DataRefImpl Ref = Section.getRawDataRefImpl();
+ StringRef SegName = MachOOF->getSectionFinalSegmentName(Ref);
+ DisassembleMachO(FileName, MachOOF, SegName, SectName);
+ }
+ }
+ }
+ else if (Disassemble) {
if (MachOOF->getHeader().filetype == MachO::MH_KEXT_BUNDLE &&
MachOOF->getHeader().cputype == MachO::CPU_TYPE_ARM64)
DisassembleMachO(FileName, MachOOF, "__TEXT_EXEC", "__text");
@@ -1626,10 +1638,10 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
if (Relocations)
PrintRelocations(MachOOF, !NonVerbose);
if (SectionHeaders)
- PrintSectionHeaders(MachOOF);
+ printSectionHeaders(MachOOF);
if (SectionContents)
- PrintSectionContents(MachOOF);
- if (FilterSections.size() != 0)
+ printSectionContents(MachOOF);
+ if (!FilterSections.empty())
DumpSectionContents(FileName, MachOOF, !NonVerbose);
if (InfoPlist)
DumpInfoPlistSectionContents(FileName, MachOOF);
@@ -1638,7 +1650,7 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
if (DylibId)
PrintDylibs(MachOOF, true);
if (SymbolTable)
- PrintSymbolTable(MachOOF, ArchiveName, ArchitectureName);
+ printSymbolTable(MachOOF, ArchiveName, ArchitectureName);
if (UnwindInfo)
printMachOUnwindInfo(MachOOF);
if (PrivateHeaders) {
@@ -1937,23 +1949,30 @@ static void printArchiveHeaders(StringRef Filename, Archive *A, bool verbose,
report_error(StringRef(), Filename, std::move(Err), ArchitectureName);
}
-// ParseInputMachO() parses the named Mach-O file in Filename and handles the
-// -arch flags selecting just those slices as specified by them and also parses
-// archive files. Then for each individual Mach-O file ProcessMachO() is
-// called to process the file based on the command line options.
-void llvm::ParseInputMachO(StringRef Filename) {
+static bool ValidateArchFlags() {
// Check for -arch all and verifiy the -arch flags are valid.
for (unsigned i = 0; i < ArchFlags.size(); ++i) {
if (ArchFlags[i] == "all") {
ArchAll = true;
} else {
if (!MachOObjectFile::isValidArch(ArchFlags[i])) {
- errs() << "llvm-objdump: Unknown architecture named '" + ArchFlags[i] +
- "'for the -arch option\n";
- return;
+ WithColor::error(errs(), "llvm-objdump")
+ << "unknown architecture named '" + ArchFlags[i] +
+ "'for the -arch option\n";
+ return false;
}
}
}
+ return true;
+}
+
+// ParseInputMachO() parses the named Mach-O file in Filename and handles the
+// -arch flags selecting just those slices as specified by them and also parses
+// archive files. Then for each individual Mach-O file ProcessMachO() is
+// called to process the file based on the command line options.
+void llvm::parseInputMachO(StringRef Filename) {
+ if (!ValidateArchFlags())
+ return;
// Attempt to open the binary.
Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename);
@@ -1989,191 +2008,199 @@ void llvm::ParseInputMachO(StringRef Filename) {
report_error(Filename, std::move(Err));
return;
}
- if (UniversalHeaders) {
- if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin))
- printMachOUniversalHeaders(UB, !NonVerbose);
- }
if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
- // If we have a list of architecture flags specified dump only those.
- if (!ArchAll && ArchFlags.size() != 0) {
- // Look for a slice in the universal binary that matches each ArchFlag.
- bool ArchFound;
- for (unsigned i = 0; i < ArchFlags.size(); ++i) {
- ArchFound = false;
- for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
- E = UB->end_objects();
- I != E; ++I) {
- if (ArchFlags[i] == I->getArchFlagName()) {
- ArchFound = true;
- Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
- I->getAsObjectFile();
- std::string ArchitectureName = "";
- if (ArchFlags.size() > 1)
- ArchitectureName = I->getArchFlagName();
- if (ObjOrErr) {
- ObjectFile &O = *ObjOrErr.get();
- if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
- ProcessMachO(Filename, MachOOF, "", ArchitectureName);
- } else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- report_error(Filename, StringRef(), std::move(E),
- ArchitectureName);
- continue;
- } else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
- std::unique_ptr<Archive> &A = *AOrErr;
- outs() << "Archive : " << Filename;
- if (!ArchitectureName.empty())
- outs() << " (architecture " << ArchitectureName << ")";
- outs() << "\n";
- if (ArchiveHeaders)
- printArchiveHeaders(Filename, A.get(), !NonVerbose,
- ArchiveMemberOffsets, ArchitectureName);
- Error Err = Error::success();
- for (auto &C : A->children(Err)) {
- Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
- if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(Filename, C, std::move(E), ArchitectureName);
- continue;
- }
- if (MachOObjectFile *O =
- dyn_cast<MachOObjectFile>(&*ChildOrErr.get()))
- ProcessMachO(Filename, O, O->getFileName(), ArchitectureName);
- }
- if (Err)
- report_error(Filename, std::move(Err));
- } else {
- consumeError(AOrErr.takeError());
- error("Mach-O universal file: " + Filename + " for " +
- "architecture " + StringRef(I->getArchFlagName()) +
- " is not a Mach-O file or an archive file");
- }
- }
- }
- if (!ArchFound) {
- errs() << "llvm-objdump: file: " + Filename + " does not contain "
- << "architecture: " + ArchFlags[i] + "\n";
- return;
- }
- }
+ parseInputMachO(UB);
+ return;
+ }
+ if (ObjectFile *O = dyn_cast<ObjectFile>(&Bin)) {
+ if (!checkMachOAndArchFlags(O, Filename))
return;
- }
- // No architecture flags were specified so if this contains a slice that
- // matches the host architecture dump only that.
- if (!ArchAll) {
+ if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&*O))
+ ProcessMachO(Filename, MachOOF);
+ else
+ WithColor::error(errs(), "llvm-objdump")
+ << Filename << "': "
+ << "object is not a Mach-O file type.\n";
+ return;
+ }
+ llvm_unreachable("Input object can't be invalid at this point");
+}
+
+void llvm::parseInputMachO(MachOUniversalBinary *UB) {
+ if (!ValidateArchFlags())
+ return;
+
+ auto Filename = UB->getFileName();
+
+ if (UniversalHeaders)
+ printMachOUniversalHeaders(UB, !NonVerbose);
+
+ // If we have a list of architecture flags specified dump only those.
+ if (!ArchAll && !ArchFlags.empty()) {
+ // Look for a slice in the universal binary that matches each ArchFlag.
+ bool ArchFound;
+ for (unsigned i = 0; i < ArchFlags.size(); ++i) {
+ ArchFound = false;
for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
- E = UB->end_objects();
- I != E; ++I) {
- if (MachOObjectFile::getHostArch().getArchName() ==
- I->getArchFlagName()) {
- Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
- std::string ArchiveName;
- ArchiveName.clear();
+ E = UB->end_objects();
+ I != E; ++I) {
+ if (ArchFlags[i] == I->getArchFlagName()) {
+ ArchFound = true;
+ Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
+ I->getAsObjectFile();
+ std::string ArchitectureName = "";
+ if (ArchFlags.size() > 1)
+ ArchitectureName = I->getArchFlagName();
if (ObjOrErr) {
ObjectFile &O = *ObjOrErr.get();
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
- ProcessMachO(Filename, MachOOF);
+ ProcessMachO(Filename, MachOOF, "", ArchitectureName);
} else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- report_error(Filename, std::move(E));
+ ObjOrErr.takeError())) {
+ report_error(Filename, StringRef(), std::move(E),
+ ArchitectureName);
continue;
} else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
+ I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
- outs() << "Archive : " << Filename << "\n";
+ outs() << "Archive : " << Filename;
+ if (!ArchitectureName.empty())
+ outs() << " (architecture " << ArchitectureName << ")";
+ outs() << "\n";
if (ArchiveHeaders)
printArchiveHeaders(Filename, A.get(), !NonVerbose,
- ArchiveMemberOffsets);
+ ArchiveMemberOffsets, ArchitectureName);
Error Err = Error::success();
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(Filename, C, std::move(E));
+ report_error(Filename, C, std::move(E), ArchitectureName);
continue;
}
if (MachOObjectFile *O =
dyn_cast<MachOObjectFile>(&*ChildOrErr.get()))
- ProcessMachO(Filename, O, O->getFileName());
+ ProcessMachO(Filename, O, O->getFileName(), ArchitectureName);
}
if (Err)
report_error(Filename, std::move(Err));
} else {
consumeError(AOrErr.takeError());
- error("Mach-O universal file: " + Filename + " for architecture " +
- StringRef(I->getArchFlagName()) +
+ error("Mach-O universal file: " + Filename + " for " +
+ "architecture " + StringRef(I->getArchFlagName()) +
" is not a Mach-O file or an archive file");
}
- return;
}
}
+ if (!ArchFound) {
+ WithColor::error(errs(), "llvm-objdump")
+ << "file: " + Filename + " does not contain "
+ << "architecture: " + ArchFlags[i] + "\n";
+ return;
+ }
}
- // Either all architectures have been specified or none have been specified
- // and this does not contain the host architecture so dump all the slices.
- bool moreThanOneArch = UB->getNumberOfObjects() > 1;
+ return;
+ }
+ // No architecture flags were specified so if this contains a slice that
+ // matches the host architecture dump only that.
+ if (!ArchAll) {
for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
- E = UB->end_objects();
- I != E; ++I) {
- Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
- std::string ArchitectureName = "";
- if (moreThanOneArch)
- ArchitectureName = I->getArchFlagName();
- if (ObjOrErr) {
- ObjectFile &Obj = *ObjOrErr.get();
- if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&Obj))
- ProcessMachO(Filename, MachOOF, "", ArchitectureName);
- } else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- report_error(StringRef(), Filename, std::move(E), ArchitectureName);
- continue;
- } else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
- std::unique_ptr<Archive> &A = *AOrErr;
- outs() << "Archive : " << Filename;
- if (!ArchitectureName.empty())
- outs() << " (architecture " << ArchitectureName << ")";
- outs() << "\n";
- if (ArchiveHeaders)
- printArchiveHeaders(Filename, A.get(), !NonVerbose,
- ArchiveMemberOffsets, ArchitectureName);
- Error Err = Error::success();
- for (auto &C : A->children(Err)) {
- Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
- if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(Filename, C, std::move(E), ArchitectureName);
- continue;
- }
- if (MachOObjectFile *O =
- dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
- if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(O))
- ProcessMachO(Filename, MachOOF, MachOOF->getFileName(),
- ArchitectureName);
+ E = UB->end_objects();
+ I != E; ++I) {
+ if (MachOObjectFile::getHostArch().getArchName() ==
+ I->getArchFlagName()) {
+ Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
+ std::string ArchiveName;
+ ArchiveName.clear();
+ if (ObjOrErr) {
+ ObjectFile &O = *ObjOrErr.get();
+ if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
+ ProcessMachO(Filename, MachOOF);
+ } else if (auto E = isNotObjectErrorInvalidFileType(
+ ObjOrErr.takeError())) {
+ report_error(Filename, std::move(E));
+ } else if (Expected<std::unique_ptr<Archive>> AOrErr =
+ I->getAsArchive()) {
+ std::unique_ptr<Archive> &A = *AOrErr;
+ outs() << "Archive : " << Filename << "\n";
+ if (ArchiveHeaders)
+ printArchiveHeaders(Filename, A.get(), !NonVerbose,
+ ArchiveMemberOffsets);
+ Error Err = Error::success();
+ for (auto &C : A->children(Err)) {
+ Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
+ if (!ChildOrErr) {
+ if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ report_error(Filename, C, std::move(E));
+ continue;
+ }
+ if (MachOObjectFile *O =
+ dyn_cast<MachOObjectFile>(&*ChildOrErr.get()))
+ ProcessMachO(Filename, O, O->getFileName());
}
+ if (Err)
+ report_error(Filename, std::move(Err));
+ } else {
+ consumeError(AOrErr.takeError());
+ error("Mach-O universal file: " + Filename + " for architecture " +
+ StringRef(I->getArchFlagName()) +
+ " is not a Mach-O file or an archive file");
}
- if (Err)
- report_error(Filename, std::move(Err));
- } else {
- consumeError(AOrErr.takeError());
- error("Mach-O universal file: " + Filename + " for architecture " +
- StringRef(I->getArchFlagName()) +
- " is not a Mach-O file or an archive file");
+ return;
}
}
- return;
}
- if (ObjectFile *O = dyn_cast<ObjectFile>(&Bin)) {
- if (!checkMachOAndArchFlags(O, Filename))
- return;
- if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&*O)) {
- ProcessMachO(Filename, MachOOF);
- } else
- errs() << "llvm-objdump: '" << Filename << "': "
- << "Object is not a Mach-O file type.\n";
- return;
+ // Either all architectures have been specified or none have been specified
+ // and this does not contain the host architecture so dump all the slices.
+ bool moreThanOneArch = UB->getNumberOfObjects() > 1;
+ for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
+ E = UB->end_objects();
+ I != E; ++I) {
+ Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
+ std::string ArchitectureName = "";
+ if (moreThanOneArch)
+ ArchitectureName = I->getArchFlagName();
+ if (ObjOrErr) {
+ ObjectFile &Obj = *ObjOrErr.get();
+ if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&Obj))
+ ProcessMachO(Filename, MachOOF, "", ArchitectureName);
+ } else if (auto E = isNotObjectErrorInvalidFileType(
+ ObjOrErr.takeError())) {
+ report_error(StringRef(), Filename, std::move(E), ArchitectureName);
+ } else if (Expected<std::unique_ptr<Archive>> AOrErr =
+ I->getAsArchive()) {
+ std::unique_ptr<Archive> &A = *AOrErr;
+ outs() << "Archive : " << Filename;
+ if (!ArchitectureName.empty())
+ outs() << " (architecture " << ArchitectureName << ")";
+ outs() << "\n";
+ if (ArchiveHeaders)
+ printArchiveHeaders(Filename, A.get(), !NonVerbose,
+ ArchiveMemberOffsets, ArchitectureName);
+ Error Err = Error::success();
+ for (auto &C : A->children(Err)) {
+ Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
+ if (!ChildOrErr) {
+ if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ report_error(Filename, C, std::move(E), ArchitectureName);
+ continue;
+ }
+ if (MachOObjectFile *O =
+ dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
+ if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(O))
+ ProcessMachO(Filename, MachOOF, MachOOF->getFileName(),
+ ArchitectureName);
+ }
+ }
+ if (Err)
+ report_error(Filename, std::move(Err));
+ } else {
+ consumeError(AOrErr.takeError());
+ error("Mach-O universal file: " + Filename + " for architecture " +
+ StringRef(I->getArchFlagName()) +
+ " is not a Mach-O file or an archive file");
+ }
}
- llvm_unreachable("Input object can't be invalid at this point");
}
// The block of info used by the Symbolizer call backs.
@@ -5609,7 +5636,9 @@ static void print_image_info64(SectionRef S, struct DisassembleInfo *info) {
else if(swift_version == 5)
outs() << " Swift 4.0";
else if(swift_version == 6)
- outs() << " Swift 4.1";
+ outs() << " Swift 4.1/Swift 4.2";
+ else if(swift_version == 7)
+ outs() << " Swift 5 or later";
else
outs() << " unknown future Swift version (" << swift_version << ")";
}
@@ -5660,7 +5689,9 @@ static void print_image_info32(SectionRef S, struct DisassembleInfo *info) {
else if(swift_version == 5)
outs() << " Swift 4.0";
else if(swift_version == 6)
- outs() << " Swift 4.1";
+ outs() << " Swift 4.1/Swift 4.2";
+ else if(swift_version == 7)
+ outs() << " Swift 5 or later";
else
outs() << " unknown future Swift version (" << swift_version << ")";
}
@@ -6172,8 +6203,9 @@ static void PrintXarFilesSummary(const char *XarFilename, xar_t xar) {
ScopedXarIter xi;
if (!xi) {
- errs() << "Can't obtain an xar iterator for xar archive "
- << XarFilename << "\n";
+ WithColor::error(errs(), "llvm-objdump")
+ << "can't obtain an xar iterator for xar archive " << XarFilename
+ << "\n";
return;
}
@@ -6181,8 +6213,9 @@ static void PrintXarFilesSummary(const char *XarFilename, xar_t xar) {
for (xf = xar_file_first(xar, xi); xf; xf = xar_file_next(xi)) {
ScopedXarIter xp;
if(!xp){
- errs() << "Can't obtain an xar iterator for xar archive "
- << XarFilename << "\n";
+ WithColor::error(errs(), "llvm-objdump")
+ << "can't obtain an xar iterator for xar archive " << XarFilename
+ << "\n";
return;
}
type = nullptr;
@@ -6306,7 +6339,7 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
std::error_code XarEC =
sys::fs::createTemporaryFile("llvm-objdump", "xar", FD, XarFilename);
if (XarEC) {
- errs() << XarEC.message() << "\n";
+ WithColor::error(errs(), "llvm-objdump") << XarEC.message() << "\n";
return;
}
ToolOutputFile XarFile(XarFilename, FD);
@@ -6319,7 +6352,8 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
ScopedXarFile xar(XarFilename.c_str(), READ);
if (!xar) {
- errs() << "Can't create temporary xar archive " << XarFilename << "\n";
+ WithColor::error(errs(), "llvm-objdump")
+ << "can't create temporary xar archive " << XarFilename << "\n";
return;
}
@@ -6327,7 +6361,7 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
std::error_code TocEC =
sys::fs::createTemporaryFile("llvm-objdump", "toc", TocFilename);
if (TocEC) {
- errs() << TocEC.message() << "\n";
+ WithColor::error(errs(), "llvm-objdump") << TocEC.message() << "\n";
return;
}
xar_serialize(xar, TocFilename.c_str());
@@ -6344,7 +6378,7 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(TocFilename.c_str());
if (std::error_code EC = FileOrErr.getError()) {
- errs() << EC.message() << "\n";
+ WithColor::error(errs(), "llvm-objdump") << EC.message() << "\n";
return;
}
std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get();
@@ -6359,8 +6393,9 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
// TODO: Go through the xar's files.
ScopedXarIter xi;
if(!xi){
- errs() << "Can't obtain an xar iterator for xar archive "
- << XarFilename.c_str() << "\n";
+ WithColor::error(errs(), "llvm-objdump")
+ << "can't obtain an xar iterator for xar archive "
+ << XarFilename.c_str() << "\n";
return;
}
for(xar_file_t xf = xar_file_first(xar, xi); xf; xf = xar_file_next(xi)){
@@ -6370,8 +6405,9 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
ScopedXarIter xp;
if(!xp){
- errs() << "Can't obtain an xar iterator for xar archive "
- << XarFilename.c_str() << "\n";
+ WithColor::error(errs(), "llvm-objdump")
+ << "can't obtain an xar iterator for xar archive "
+ << XarFilename.c_str() << "\n";
return;
}
member_name = NULL;
@@ -6805,7 +6841,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
- if (MAttrs.size()) {
+ if (!MAttrs.empty()) {
SubtargetFeatures Features;
for (unsigned i = 0; i != MAttrs.size(); ++i)
Features.AddFeature(MAttrs[i]);
@@ -6848,8 +6884,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
// IP->setCommentStream(CommentStream);
if (!AsmInfo || !STI || !DisAsm || !IP) {
- errs() << "error: couldn't initialize disassembler for target "
- << TripleName << '\n';
+ WithColor::error(errs(), "llvm-objdump")
+ << "couldn't initialize disassembler for target " << TripleName << '\n';
return;
}
@@ -6890,8 +6926,9 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
}
if (ThumbTarget && (!ThumbAsmInfo || !ThumbSTI || !ThumbDisAsm || !ThumbIP)) {
- errs() << "error: couldn't initialize disassembler for target "
- << ThumbTripleName << '\n';
+ WithColor::error(errs(), "llvm-objdump")
+ << "couldn't initialize disassembler for target " << ThumbTripleName
+ << '\n';
return;
}
@@ -6910,7 +6947,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
BaseSegmentAddress);
// Sort the symbols by address, just in case they didn't come in that way.
- llvm::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
+ llvm::sort(Symbols, SymbolSorter());
// Build a data in code table that is sorted on by the address of each entry.
uint64_t BaseAddress = 0;
@@ -6935,6 +6972,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
std::unique_ptr<DIContext> diContext;
ObjectFile *DbgObj = MachOOF;
+ std::unique_ptr<MemoryBuffer> DSYMBuf;
// Try to find debug info and set up the DIContext for it.
if (UseDbg) {
// A separate DSym file path was specified, parse it as a macho file,
@@ -6943,22 +6981,28 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFileOrSTDIN(DSYMFile);
if (std::error_code EC = BufOrErr.getError()) {
- errs() << "llvm-objdump: " << Filename << ": " << EC.message() << '\n';
+ report_error(DSYMFile, errorCodeToError(EC));
return;
}
+
Expected<std::unique_ptr<MachOObjectFile>> DbgObjCheck =
ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef());
- if (DbgObjCheck.takeError())
- report_error(MachOOF->getFileName(), DbgObjCheck.takeError());
+ if (Error E = DbgObjCheck.takeError()) {
+ report_error(DSYMFile, std::move(E));
+ return;
+ }
+
DbgObj = DbgObjCheck.get().release();
+ // We need to keep the file alive, because we're replacing DbgObj with it.
+ DSYMBuf = std::move(BufOrErr.get());
}
// Setup the DIContext
diContext = DWARFContext::create(*DbgObj);
}
- if (FilterSections.size() == 0)
+ if (FilterSections.empty())
outs() << "(" << DisSegName << "," << DisSectName << ") section\n";
for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
@@ -7021,7 +7065,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
unsigned int Arch = MachOOF->getArch();
// Skip all symbols if this is a stubs file.
- if (Bytes.size() == 0)
+ if (Bytes.empty())
return;
// If the section has symbols but no symbol at the start of the section
@@ -7228,7 +7272,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
outs() << format("\t.short\t0x%04x\n", opcode);
Size = 2;
} else{
- errs() << "llvm-objdump: warning: invalid instruction encoding\n";
+ WithColor::warning(errs(), "llvm-objdump")
+ << "invalid instruction encoding\n";
if (Size == 0)
Size = 1; // skip illegible bytes
}
@@ -7275,7 +7320,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
*(Bytes.data() + Index) & 0xff);
InstSize = 1; // skip exactly one illegible byte and move on.
} else {
- errs() << "llvm-objdump: warning: invalid instruction encoding\n";
+ WithColor::warning(errs(), "llvm-objdump")
+ << "invalid instruction encoding\n";
if (InstSize == 0)
InstSize = 1; // skip illegible bytes
}
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 8041e6f59940..ba8d3c5b8d5c 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -42,6 +42,7 @@
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/Casting.h"
@@ -55,8 +56,10 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/StringSaver.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cctype>
@@ -91,12 +94,11 @@ static cl::alias
DisassembleAlld("D", cl::desc("Alias for --disassemble-all"),
cl::aliasopt(DisassembleAll));
-cl::opt<std::string> llvm::Demangle("demangle",
- cl::desc("Demangle symbols names"),
- cl::ValueOptional, cl::init("none"));
+cl::opt<bool> llvm::Demangle("demangle", cl::desc("Demangle symbols names"),
+ cl::init(false));
static cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
- cl::aliasopt(Demangle));
+ cl::aliasopt(llvm::Demangle));
static cl::list<std::string>
DisassembleFunctions("df",
@@ -105,7 +107,11 @@ DisassembleFunctions("df",
static StringSet<> DisasmFuncsSet;
cl::opt<bool>
-llvm::Relocations("r", cl::desc("Display the relocation entries in the file"));
+llvm::Relocations("reloc",
+ cl::desc("Display the relocation entries in the file"));
+static cl::alias RelocationsShort("r", cl::desc("Alias for --reloc"),
+ cl::NotHidden,
+ cl::aliasopt(llvm::Relocations));
cl::opt<bool>
llvm::DynamicRelocations("dynamic-reloc",
@@ -115,10 +121,16 @@ DynamicRelocationsd("R", cl::desc("Alias for --dynamic-reloc"),
cl::aliasopt(DynamicRelocations));
cl::opt<bool>
-llvm::SectionContents("s", cl::desc("Display the content of each section"));
+ llvm::SectionContents("full-contents",
+ cl::desc("Display the content of each section"));
+static cl::alias SectionContentsShort("s",
+ cl::desc("Alias for --full-contents"),
+ cl::aliasopt(SectionContents));
-cl::opt<bool>
-llvm::SymbolTable("t", cl::desc("Display the symbol table"));
+cl::opt<bool> llvm::SymbolTable("syms", cl::desc("Display the symbol table"));
+static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"),
+ cl::NotHidden,
+ cl::aliasopt(llvm::SymbolTable));
cl::opt<bool>
llvm::ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols"));
@@ -253,8 +265,17 @@ cl::opt<unsigned long long>
StartAddress("start-address", cl::desc("Disassemble beginning at address"),
cl::value_desc("address"), cl::init(0));
cl::opt<unsigned long long>
- StopAddress("stop-address", cl::desc("Stop disassembly at address"),
+ StopAddress("stop-address",
+ cl::desc("Stop disassembly at address"),
cl::value_desc("address"), cl::init(UINT64_MAX));
+
+cl::opt<bool> DisassembleZeroes(
+ "disassemble-zeroes",
+ cl::desc("Do not skip blocks of zeroes when disassembling"));
+cl::alias DisassembleZeroesShort("z",
+ cl::desc("Alias for --disassemble-zeroes"),
+ cl::aliasopt(DisassembleZeroes));
+
static StringRef ToolName;
typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
@@ -326,33 +347,35 @@ SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) {
void llvm::error(std::error_code EC) {
if (!EC)
return;
-
- errs() << ToolName << ": error reading file: " << EC.message() << ".\n";
+ WithColor::error(errs(), ToolName)
+ << "reading file: " << EC.message() << ".\n";
errs().flush();
exit(1);
}
LLVM_ATTRIBUTE_NORETURN void llvm::error(Twine Message) {
- errs() << ToolName << ": " << Message << ".\n";
+ WithColor::error(errs(), ToolName) << Message << ".\n";
errs().flush();
exit(1);
}
void llvm::warn(StringRef Message) {
- errs() << ToolName << ": warning: " << Message << ".\n";
+ WithColor::warning(errs(), ToolName) << Message << ".\n";
errs().flush();
}
LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
Twine Message) {
- errs() << ToolName << ": '" << File << "': " << Message << ".\n";
+ WithColor::error(errs(), ToolName)
+ << "'" << File << "': " << Message << ".\n";
exit(1);
}
LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
std::error_code EC) {
assert(EC);
- errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n";
+ WithColor::error(errs(), ToolName)
+ << "'" << File << "': " << EC.message() << ".\n";
exit(1);
}
@@ -361,9 +384,9 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
assert(E);
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS, "");
+ logAllUnhandledErrors(std::move(E), OS);
OS.flush();
- errs() << ToolName << ": '" << File << "': " << Buf;
+ WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf;
exit(1);
}
@@ -372,7 +395,7 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
llvm::Error E,
StringRef ArchitectureName) {
assert(E);
- errs() << ToolName << ": ";
+ WithColor::error(errs(), ToolName);
if (ArchiveName != "")
errs() << ArchiveName << "(" << FileName << ")";
else
@@ -381,7 +404,7 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
errs() << " (for architecture " << ArchitectureName << ")";
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(std::move(E), OS, "");
+ logAllUnhandledErrors(std::move(E), OS);
OS.flush();
errs() << ": " << Buf;
exit(1);
@@ -407,18 +430,16 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
// Figure out the target triple.
llvm::Triple TheTriple("unknown-unknown-unknown");
if (TripleName.empty()) {
- if (Obj) {
+ if (Obj)
TheTriple = Obj->makeTriple();
- }
} else {
TheTriple.setTriple(Triple::normalize(TripleName));
// Use the triple, but also try to combine with ARM build attributes.
if (Obj) {
auto Arch = Obj->getArch();
- if (Arch == Triple::arm || Arch == Triple::armeb) {
+ if (Arch == Triple::arm || Arch == Triple::armeb)
Obj->setARMSubArch(TheTriple);
- }
}
}
@@ -438,22 +459,35 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
return TheTarget;
}
-bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
- return a.getOffset() < b.getOffset();
+bool llvm::isRelocAddressLess(RelocationRef A, RelocationRef B) {
+ return A.getOffset() < B.getOffset();
+}
+
+static std::string demangle(StringRef Name) {
+ char *Demangled = nullptr;
+ if (Name.startswith("_Z"))
+ Demangled = itaniumDemangle(Name.data(), Demangled, nullptr, nullptr);
+ else if (Name.startswith("?"))
+ Demangled = microsoftDemangle(Name.data(), Demangled, nullptr, nullptr);
+
+ if (!Demangled)
+ return Name;
+
+ std::string Ret = Demangled;
+ free(Demangled);
+ return Ret;
}
template <class ELFT>
static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
const RelocationRef &RelRef,
SmallVectorImpl<char> &Result) {
- DataRefImpl Rel = RelRef.getRawDataRefImpl();
-
typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
const ELFFile<ELFT> &EF = *Obj->getELFFile();
-
+ DataRefImpl Rel = RelRef.getRawDataRefImpl();
auto SecOrErr = EF.getSection(Rel.d.a);
if (!SecOrErr)
return errorToErrorCode(SecOrErr.takeError());
@@ -471,11 +505,11 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
if (!StrTabOrErr)
return errorToErrorCode(StrTabOrErr.takeError());
StringRef StrTab = *StrTabOrErr;
- int64_t addend = 0;
+ int64_t Addend = 0;
// If there is no Symbol associated with the relocation, we set the undef
// boolean value to 'true'. This will prevent us from calling functions that
// requires the relocation to be associated with a symbol.
- bool undef = false;
+ bool Undef = false;
switch (Sec->sh_type) {
default:
return object_error::parse_failed;
@@ -485,13 +519,13 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
}
case ELF::SHT_RELA: {
const Elf_Rela *ERela = Obj->getRela(Rel);
- addend = ERela->r_addend;
- undef = ERela->getSymbol(false) == 0;
+ Addend = ERela->r_addend;
+ Undef = ERela->getSymbol(false) == 0;
break;
}
}
- StringRef Target;
- if (!undef) {
+ std::string Target;
+ if (!Undef) {
symbol_iterator SI = RelRef.getSymbol();
const Elf_Sym *symb = Obj->getSymbol(SI->getRawDataRefImpl());
if (symb->getType() == ELF::STT_SECTION) {
@@ -507,20 +541,23 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
Expected<StringRef> SymName = symb->getName(StrTab);
if (!SymName)
return errorToErrorCode(SymName.takeError());
- Target = *SymName;
+ if (Demangle)
+ Target = demangle(*SymName);
+ else
+ Target = *SymName;
}
} else
Target = "*ABS*";
// Default scheme is to print Target, as well as "+ <addend>" for nonzero
// addend. Should be acceptable for all normal purposes.
- std::string fmtbuf;
- raw_string_ostream fmt(fmtbuf);
- fmt << Target;
- if (addend != 0)
- fmt << (addend < 0 ? "" : "+") << addend;
- fmt.flush();
- Result.append(fmtbuf.begin(), fmtbuf.end());
+ std::string FmtBuf;
+ raw_string_ostream Fmt(FmtBuf);
+ Fmt << Target;
+ if (Addend != 0)
+ Fmt << (Addend < 0 ? "" : "+") << Addend;
+ Fmt.flush();
+ Result.append(FmtBuf.begin(), FmtBuf.end());
return std::error_code();
}
@@ -551,18 +588,15 @@ static std::error_code getRelocationValueString(const COFFObjectFile *Obj,
static void printRelocationTargetName(const MachOObjectFile *O,
const MachO::any_relocation_info &RE,
- raw_string_ostream &fmt) {
- bool IsScattered = O->isRelocationScattered(RE);
-
+ raw_string_ostream &Fmt) {
// Target of a scattered relocation is an address. In the interest of
// generating pretty output, scan through the symbol table looking for a
// symbol that aligns with that address. If we find one, print it.
// Otherwise, we just print the hex address of the target.
- if (IsScattered) {
+ if (O->isRelocationScattered(RE)) {
uint32_t Val = O->getPlainRelocationSymbolNum(RE);
for (const SymbolRef &Symbol : O->symbols()) {
- std::error_code ec;
Expected<uint64_t> Addr = Symbol.getAddress();
if (!Addr)
report_error(O->getFileName(), Addr.takeError());
@@ -571,7 +605,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
Expected<StringRef> Name = Symbol.getName();
if (!Name)
report_error(O->getFileName(), Name.takeError());
- fmt << *Name;
+ Fmt << *Name;
return;
}
@@ -586,11 +620,11 @@ static void printRelocationTargetName(const MachOObjectFile *O,
continue;
if ((ec = Section.getName(Name)))
report_error(O->getFileName(), ec);
- fmt << Name;
+ Fmt << Name;
return;
}
- fmt << format("0x%x", Val);
+ Fmt << format("0x%x", Val);
return;
}
@@ -599,9 +633,11 @@ static void printRelocationTargetName(const MachOObjectFile *O,
uint64_t Val = O->getPlainRelocationSymbolNum(RE);
if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) {
- fmt << format("0x%0" PRIx64, Val);
+ Fmt << format("0x%0" PRIx64, Val);
return;
- } else if (isExtern) {
+ }
+
+ if (isExtern) {
symbol_iterator SI = O->symbol_begin();
advance(SI, Val);
Expected<StringRef> SOrErr = SI->getName();
@@ -612,21 +648,21 @@ static void printRelocationTargetName(const MachOObjectFile *O,
section_iterator SI = O->section_begin();
// Adjust for the fact that sections are 1-indexed.
if (Val == 0) {
- fmt << "0 (?,?)";
+ Fmt << "0 (?,?)";
return;
}
- uint32_t i = Val - 1;
- while (i != 0 && SI != O->section_end()) {
- i--;
+ uint32_t I = Val - 1;
+ while (I != 0 && SI != O->section_end()) {
+ --I;
advance(SI, 1);
}
if (SI == O->section_end())
- fmt << Val << " (?,?)";
+ Fmt << Val << " (?,?)";
else
SI->getName(S);
}
- fmt << S;
+ Fmt << S;
}
static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
@@ -634,12 +670,12 @@ static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
SmallVectorImpl<char> &Result) {
const wasm::WasmRelocation& Rel = Obj->getWasmRelocation(RelRef);
symbol_iterator SI = RelRef.getSymbol();
- std::string fmtbuf;
- raw_string_ostream fmt(fmtbuf);
+ std::string FmtBuf;
+ raw_string_ostream Fmt(FmtBuf);
if (SI == Obj->symbol_end()) {
// Not all wasm relocations have symbols associated with them.
// In particular R_WEBASSEMBLY_TYPE_INDEX_LEB.
- fmt << Rel.Index;
+ Fmt << Rel.Index;
} else {
Expected<StringRef> SymNameOrErr = SI->getName();
if (!SymNameOrErr)
@@ -647,9 +683,9 @@ static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
StringRef SymName = *SymNameOrErr;
Result.append(SymName.begin(), SymName.end());
}
- fmt << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
- fmt.flush();
- Result.append(fmtbuf.begin(), fmtbuf.end());
+ Fmt << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
+ Fmt.flush();
+ Result.append(FmtBuf.begin(), FmtBuf.end());
return std::error_code();
}
@@ -661,8 +697,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
unsigned Arch = Obj->getArch();
- std::string fmtbuf;
- raw_string_ostream fmt(fmtbuf);
+ std::string FmtBuf;
+ raw_string_ostream Fmt(FmtBuf);
unsigned Type = Obj->getAnyRelocationType(RE);
bool IsPCRel = Obj->getAnyRelocationPCRel(RE);
@@ -671,15 +707,13 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
// X86_64 has entirely custom relocation types.
if (Arch == Triple::x86_64) {
- bool isPCRel = Obj->getAnyRelocationPCRel(RE);
-
switch (Type) {
case MachO::X86_64_RELOC_GOT_LOAD:
case MachO::X86_64_RELOC_GOT: {
- printRelocationTargetName(Obj, RE, fmt);
- fmt << "@GOT";
- if (isPCRel)
- fmt << "PCREL";
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "@GOT";
+ if (IsPCRel)
+ Fmt << "PCREL";
break;
}
case MachO::X86_64_RELOC_SUBTRACTOR: {
@@ -697,31 +731,31 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
// The X86_64_RELOC_UNSIGNED contains the minuend symbol;
// X86_64_RELOC_SUBTRACTOR contains the subtrahend.
- printRelocationTargetName(Obj, RENext, fmt);
- fmt << "-";
- printRelocationTargetName(Obj, RE, fmt);
+ printRelocationTargetName(Obj, RENext, Fmt);
+ Fmt << "-";
+ printRelocationTargetName(Obj, RE, Fmt);
break;
}
case MachO::X86_64_RELOC_TLV:
- printRelocationTargetName(Obj, RE, fmt);
- fmt << "@TLV";
- if (isPCRel)
- fmt << "P";
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "@TLV";
+ if (IsPCRel)
+ Fmt << "P";
break;
case MachO::X86_64_RELOC_SIGNED_1:
- printRelocationTargetName(Obj, RE, fmt);
- fmt << "-1";
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-1";
break;
case MachO::X86_64_RELOC_SIGNED_2:
- printRelocationTargetName(Obj, RE, fmt);
- fmt << "-2";
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-2";
break;
case MachO::X86_64_RELOC_SIGNED_4:
- printRelocationTargetName(Obj, RE, fmt);
- fmt << "-4";
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-4";
break;
default:
- printRelocationTargetName(Obj, RE, fmt);
+ printRelocationTargetName(Obj, RE, Fmt);
break;
}
// X86 and ARM share some relocation types in common.
@@ -744,9 +778,9 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
"GENERIC_RELOC_SECTDIFF.");
- printRelocationTargetName(Obj, RE, fmt);
- fmt << "-";
- printRelocationTargetName(Obj, RENext, fmt);
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-";
+ printRelocationTargetName(Obj, RENext, Fmt);
break;
}
}
@@ -765,20 +799,20 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
"GENERIC_RELOC_LOCAL_SECTDIFF.");
- printRelocationTargetName(Obj, RE, fmt);
- fmt << "-";
- printRelocationTargetName(Obj, RENext, fmt);
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-";
+ printRelocationTargetName(Obj, RENext, Fmt);
break;
}
case MachO::GENERIC_RELOC_TLV: {
- printRelocationTargetName(Obj, RE, fmt);
- fmt << "@TLV";
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "@TLV";
if (IsPCRel)
- fmt << "P";
+ Fmt << "P";
break;
}
default:
- printRelocationTargetName(Obj, RE, fmt);
+ printRelocationTargetName(Obj, RE, Fmt);
}
} else { // ARM-specific relocations
switch (Type) {
@@ -789,10 +823,10 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
bool isUpper = (Obj->getAnyRelocationLength(RE) & 0x1) == 1;
if (isUpper)
- fmt << ":upper16:(";
+ Fmt << ":upper16:(";
else
- fmt << ":lower16:(";
- printRelocationTargetName(Obj, RE, fmt);
+ Fmt << ":lower16:(";
+ printRelocationTargetName(Obj, RE, Fmt);
DataRefImpl RelNext = Rel;
Obj->moveRelocationNext(RelNext);
@@ -813,21 +847,21 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
// ARM_RELOC_HALF_SECTDIFF encodes the second section in the
// symbol/section pointer of the follow-on relocation.
if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
- fmt << "-";
- printRelocationTargetName(Obj, RENext, fmt);
+ Fmt << "-";
+ printRelocationTargetName(Obj, RENext, Fmt);
}
- fmt << ")";
+ Fmt << ")";
break;
}
- default: { printRelocationTargetName(Obj, RE, fmt); }
+ default: { printRelocationTargetName(Obj, RE, Fmt); }
}
}
} else
- printRelocationTargetName(Obj, RE, fmt);
+ printRelocationTargetName(Obj, RE, Fmt);
- fmt.flush();
- Result.append(fmtbuf.begin(), fmtbuf.end());
+ Fmt.flush();
+ Result.append(FmtBuf.begin(), FmtBuf.end());
return std::error_code();
}
@@ -849,8 +883,7 @@ static std::error_code getRelocationValueString(const RelocationRef &Rel,
/// relocations, usually because it is the trailing part of a multipart
/// relocation that will be printed as part of the leading relocation.
static bool getHidden(RelocationRef RelRef) {
- const ObjectFile *Obj = RelRef.getObject();
- auto *MachO = dyn_cast<MachOObjectFile>(Obj);
+ auto *MachO = dyn_cast<MachOObjectFile>(RelRef.getObject());
if (!MachO)
return false;
@@ -860,10 +893,10 @@ static bool getHidden(RelocationRef RelRef) {
// On arches that use the generic relocations, GENERIC_RELOC_PAIR
// is always hidden.
- if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc) {
- if (Type == MachO::GENERIC_RELOC_PAIR)
- return true;
- } else if (Arch == Triple::x86_64) {
+ if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc)
+ return Type == MachO::GENERIC_RELOC_PAIR;
+
+ if (Arch == Triple::x86_64) {
// On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
// an X86_64_RELOC_SUBTRACTOR.
if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
@@ -1038,27 +1071,27 @@ public:
auto Preamble = " { ";
auto Separator = "";
StringRef Fmt = "\t\t\t%08" PRIx64 ": ";
- std::vector<RelocationRef>::const_iterator rel_cur = Rels->begin();
- std::vector<RelocationRef>::const_iterator rel_end = Rels->end();
+ std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
+ std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
// Hexagon's packets require relocations to be inline rather than
// clustered at the end of the packet.
auto PrintReloc = [&]() -> void {
- while ((rel_cur != rel_end) && (rel_cur->getOffset() <= Address)) {
- if (rel_cur->getOffset() == Address) {
- SmallString<16> name;
- SmallString<32> val;
- rel_cur->getTypeName(name);
- error(getRelocationValueString(*rel_cur, val));
- OS << Separator << format(Fmt.data(), Address) << name << "\t" << val
+ while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address)) {
+ if (RelCur->getOffset() == Address) {
+ SmallString<16> Name;
+ SmallString<32> Val;
+ RelCur->getTypeName(Name);
+ error(getRelocationValueString(*RelCur, Val));
+ OS << Separator << format(Fmt.data(), Address) << Name << "\t" << Val
<< "\n";
return;
}
- rel_cur++;
+ ++RelCur;
}
};
- while(!HeadTail.first.empty()) {
+ while (!HeadTail.first.empty()) {
OS << Separator;
Separator = "\n";
if (SP && (PrintSource || PrintLines))
@@ -1068,7 +1101,7 @@ public:
Preamble = " ";
StringRef Inst;
auto Duplex = HeadTail.first.split('\v');
- if(!Duplex.second.empty()){
+ if (!Duplex.second.empty()) {
OS << Duplex.first;
OS << "; ";
Inst = Duplex.second;
@@ -1200,7 +1233,6 @@ addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
Expected<uint64_t> AddressOrErr = Symbol.getAddress();
if (!AddressOrErr)
report_error(Obj->getFileName(), AddressOrErr.takeError());
- uint64_t Address = *AddressOrErr;
Expected<StringRef> Name = Symbol.getName();
if (!Name)
@@ -1215,7 +1247,7 @@ addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
if (SecI == Obj->section_end())
continue;
- AllSymbols[*SecI].emplace_back(Address, *Name, SymbolType);
+ AllSymbols[*SecI].emplace_back(*AddressOrErr, *Name, SymbolType);
}
}
@@ -1235,7 +1267,60 @@ addDynamicElfSymbols(const ObjectFile *Obj,
llvm_unreachable("Unsupported binary format");
}
-static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
+static void addPltEntries(const ObjectFile *Obj,
+ std::map<SectionRef, SectionSymbolsTy> &AllSymbols,
+ StringSaver &Saver) {
+ Optional<SectionRef> Plt = None;
+ for (const SectionRef &Section : Obj->sections()) {
+ StringRef Name;
+ if (Section.getName(Name))
+ continue;
+ if (Name == ".plt")
+ Plt = Section;
+ }
+ if (!Plt)
+ return;
+ if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(Obj)) {
+ for (auto PltEntry : ElfObj->getPltAddresses()) {
+ SymbolRef Symbol(PltEntry.first, ElfObj);
+ uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
+
+ Expected<StringRef> NameOrErr = Symbol.getName();
+ if (!NameOrErr)
+ report_error(Obj->getFileName(), NameOrErr.takeError());
+ if (NameOrErr->empty())
+ continue;
+ StringRef Name = Saver.save((*NameOrErr + "@plt").str());
+
+ AllSymbols[*Plt].emplace_back(PltEntry.second, Name, SymbolType);
+ }
+ }
+}
+
+// Normally the disassembly output will skip blocks of zeroes. This function
+// returns the number of zero bytes that can be skipped when dumping the
+// disassembly of the instructions in Buf.
+static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
+ // When -z or --disassemble-zeroes are given we always dissasemble them.
+ if (DisassembleZeroes)
+ return 0;
+
+ // Find the number of leading zeroes.
+ size_t N = 0;
+ while (N < Buf.size() && !Buf[N])
+ ++N;
+
+ // We may want to skip blocks of zero bytes, but unless we see
+ // at least 8 of them in a row.
+ if (N < 8)
+ return 0;
+
+ // We skip zeroes in multiples of 4 because do not want to truncate an
+ // instruction if it starts with a zero byte.
+ return N & ~0x3;
+}
+
+static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (StartAddress > StopAddress)
error("Start address should be less than stop address");
@@ -1243,10 +1328,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// Package up features to be passed to target/subtarget
SubtargetFeatures Features = Obj->getFeatures();
- if (MAttrs.size()) {
- for (unsigned i = 0; i != MAttrs.size(); ++i)
- Features.AddFeature(MAttrs[i]);
- }
+ if (!MAttrs.empty())
+ for (unsigned I = 0; I != MAttrs.size(); ++I)
+ Features.AddFeature(MAttrs[I]);
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
@@ -1342,6 +1426,10 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (AllSymbols.empty() && Obj->isELF())
addDynamicElfSymbols(Obj, AllSymbols);
+ BumpPtrAllocator A;
+ StringSaver Saver(A);
+ addPltEntries(Obj, AllSymbols, Saver);
+
// Create a mapping from virtual address to section.
std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
for (SectionRef Sec : Obj->sections())
@@ -1411,8 +1499,8 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
}
}
- llvm::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end());
- llvm::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end());
+ llvm::sort(DataMappingSymsAddr);
+ llvm::sort(TextMappingSymsAddr);
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
@@ -1437,7 +1525,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
}
// Sort relocations by address.
- llvm::sort(Rels.begin(), Rels.end(), RelocAddressLess);
+ llvm::sort(Rels, isRelocAddressLess);
StringRef SegmentName = "";
if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj)) {
@@ -1467,15 +1555,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
uint64_t Index;
bool PrintedSection = false;
- std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
- std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
+ std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
+ std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
// Disassemble symbol by symbol.
- for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
- uint64_t Start = std::get<0>(Symbols[si]) - SectionAddr;
+ for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
+ uint64_t Start = std::get<0>(Symbols[SI]) - SectionAddr;
// The end is either the section end or the beginning of the next
// symbol.
- uint64_t End =
- (si == se - 1) ? SectSize : std::get<0>(Symbols[si + 1]) - SectionAddr;
+ uint64_t End = (SI == SE - 1)
+ ? SectSize
+ : std::get<0>(Symbols[SI + 1]) - SectionAddr;
// Don't try to disassemble beyond the end of section contents.
if (End > SectSize)
End = SectSize;
@@ -1492,7 +1581,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
/// Skip if user requested specific symbols and this is not in the list
if (!DisasmFuncsSet.empty() &&
- !DisasmFuncsSet.count(std::get<1>(Symbols[si])))
+ !DisasmFuncsSet.count(std::get<1>(Symbols[SI])))
continue;
if (!PrintedSection) {
@@ -1508,12 +1597,12 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
End = StopAddress - SectionAddr;
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
- if (std::get<2>(Symbols[si]) == ELF::STT_AMDGPU_HSA_KERNEL) {
+ if (std::get<2>(Symbols[SI]) == ELF::STT_AMDGPU_HSA_KERNEL) {
// skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
Start += 256;
}
- if (si == se - 1 ||
- std::get<2>(Symbols[si + 1]) == ELF::STT_AMDGPU_HSA_KERNEL) {
+ if (SI == SE - 1 ||
+ std::get<2>(Symbols[SI + 1]) == ELF::STT_AMDGPU_HSA_KERNEL) {
// cut trailing zeroes at the end of kernel
// cut up to 256 bytes
const uint64_t EndAlign = 256;
@@ -1524,25 +1613,15 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
}
}
- auto PrintSymbol = [](StringRef Name) {
- outs() << '\n' << Name << ":\n";
- };
- StringRef SymbolName = std::get<1>(Symbols[si]);
- if (Demangle.getValue() == "" || Demangle.getValue() == "itanium") {
- char *DemangledSymbol = nullptr;
- size_t Size = 0;
- int Status;
- DemangledSymbol =
- itaniumDemangle(SymbolName.data(), DemangledSymbol, &Size, &Status);
- if (Status == 0)
- PrintSymbol(StringRef(DemangledSymbol));
- else
- PrintSymbol(SymbolName);
+ outs() << '\n';
+ if (!NoLeadingAddr)
+ outs() << format("%016" PRIx64 " ", SectionAddr + Start);
- if (Size != 0)
- free(DemangledSymbol);
- } else
- PrintSymbol(SymbolName);
+ StringRef SymbolName = std::get<1>(Symbols[SI]);
+ if (Demangle)
+ outs() << demangle(SymbolName) << ":\n";
+ else
+ outs() << SymbolName << ":\n";
// Don't print raw contents of a virtual section. A virtual section
// doesn't have any contents in the file.
@@ -1570,7 +1649,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// same section. We rely on the markers introduced to
// understand what we need to dump. If the data marker is within a
// function, it is denoted as a word/short etc
- if (isArmElf(Obj) && std::get<2>(Symbols[si]) != ELF::STT_OBJECT &&
+ if (isArmElf(Obj) && std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
!DisassembleAll) {
uint64_t Stride = 0;
@@ -1634,7 +1713,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// disassembling text (applicable all architectures),
// we are in a situation where we must print the data and not
// disassemble it.
- if (Obj->isELF() && std::get<2>(Symbols[si]) == ELF::STT_OBJECT &&
+ if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
!DisassembleAll && Section.isText()) {
// print out data up to 8 bytes at a time in hex and ascii
uint8_t AsciiData[9] = {'\0'};
@@ -1675,6 +1754,14 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (Index >= End)
break;
+ if (size_t N =
+ countSkippableZeroBytes(Bytes.slice(Index, End - Index))) {
+ outs() << "\t\t..." << '\n';
+ Index += N;
+ if (Index >= End)
+ break;
+ }
+
// Disassemble a real instruction or a data when disassemble all is
// provided
bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
@@ -1753,32 +1840,32 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// Hexagon does this in pretty printer
if (Obj->getArch() != Triple::hexagon)
// Print relocation for instruction.
- while (rel_cur != rel_end) {
- bool hidden = getHidden(*rel_cur);
- uint64_t addr = rel_cur->getOffset();
- SmallString<16> name;
- SmallString<32> val;
+ while (RelCur != RelEnd) {
+ uint64_t Addr = RelCur->getOffset();
+ SmallString<16> Name;
+ SmallString<32> Val;
// If this relocation is hidden, skip it.
- if (hidden || ((SectionAddr + addr) < StartAddress)) {
- ++rel_cur;
+ if (getHidden(*RelCur) || ((SectionAddr + Addr) < StartAddress)) {
+ ++RelCur;
continue;
}
// Stop when rel_cur's address is past the current instruction.
- if (addr >= Index + Size) break;
- rel_cur->getTypeName(name);
- error(getRelocationValueString(*rel_cur, val));
- outs() << format(Fmt.data(), SectionAddr + addr) << name
- << "\t" << val << "\n";
- ++rel_cur;
+ if (Addr >= Index + Size)
+ break;
+ RelCur->getTypeName(Name);
+ error(getRelocationValueString(*RelCur, Val));
+ outs() << format(Fmt.data(), SectionAddr + Addr) << Name << "\t"
+ << Val << "\n";
+ ++RelCur;
}
}
}
}
}
-void llvm::PrintRelocations(const ObjectFile *Obj) {
+void llvm::printRelocations(const ObjectFile *Obj) {
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
"%08" PRIx64;
// Regular objdump doesn't print relocations in non-relocatable object
@@ -1789,61 +1876,57 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
if (Section.relocation_begin() == Section.relocation_end())
continue;
- StringRef secname;
- error(Section.getName(secname));
- outs() << "RELOCATION RECORDS FOR [" << secname << "]:\n";
+ StringRef SecName;
+ error(Section.getName(SecName));
+ outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n";
for (const RelocationRef &Reloc : Section.relocations()) {
- bool hidden = getHidden(Reloc);
- uint64_t address = Reloc.getOffset();
- SmallString<32> relocname;
- SmallString<32> valuestr;
- if (address < StartAddress || address > StopAddress || hidden)
+ uint64_t Address = Reloc.getOffset();
+ SmallString<32> RelocName;
+ SmallString<32> ValueStr;
+ if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
continue;
- Reloc.getTypeName(relocname);
- error(getRelocationValueString(Reloc, valuestr));
- outs() << format(Fmt.data(), address) << " " << relocname << " "
- << valuestr << "\n";
+ Reloc.getTypeName(RelocName);
+ error(getRelocationValueString(Reloc, ValueStr));
+ outs() << format(Fmt.data(), Address) << " " << RelocName << " "
+ << ValueStr << "\n";
}
outs() << "\n";
}
}
-void llvm::PrintDynamicRelocations(const ObjectFile *Obj) {
-
+void llvm::printDynamicRelocations(const ObjectFile *Obj) {
// For the moment, this option is for ELF only
if (!Obj->isELF())
return;
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
-
if (!Elf || Elf->getEType() != ELF::ET_DYN) {
error("not a dynamic object");
return;
}
- StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
-
std::vector<SectionRef> DynRelSec = Obj->dynamic_relocation_sections();
if (DynRelSec.empty())
return;
outs() << "DYNAMIC RELOCATION RECORDS\n";
+ StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
for (const SectionRef &Section : DynRelSec) {
if (Section.relocation_begin() == Section.relocation_end())
continue;
for (const RelocationRef &Reloc : Section.relocations()) {
- uint64_t address = Reloc.getOffset();
- SmallString<32> relocname;
- SmallString<32> valuestr;
- Reloc.getTypeName(relocname);
- error(getRelocationValueString(Reloc, valuestr));
- outs() << format(Fmt.data(), address) << " " << relocname << " "
- << valuestr << "\n";
+ uint64_t Address = Reloc.getOffset();
+ SmallString<32> RelocName;
+ SmallString<32> ValueStr;
+ Reloc.getTypeName(RelocName);
+ error(getRelocationValueString(Reloc, ValueStr));
+ outs() << format(Fmt.data(), Address) << " " << RelocName << " "
+ << ValueStr << "\n";
}
}
}
-void llvm::PrintSectionHeaders(const ObjectFile *Obj) {
+void llvm::printSectionHeaders(const ObjectFile *Obj) {
outs() << "Sections:\n"
"Idx Name Size Address Type\n";
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
@@ -1860,9 +1943,10 @@ void llvm::PrintSectionHeaders(const ObjectFile *Obj) {
(unsigned)Section.getIndex(), Name.str().c_str(), Size,
Address, Type.c_str());
}
+ outs() << "\n";
}
-void llvm::PrintSectionContents(const ObjectFile *Obj) {
+void llvm::printSectionContents(const ObjectFile *Obj) {
std::error_code EC;
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name;
@@ -1884,23 +1968,23 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
error(Section.getContents(Contents));
// Dump out the content as hex and printable ascii characters.
- for (std::size_t addr = 0, end = Contents.size(); addr < end; addr += 16) {
- outs() << format(" %04" PRIx64 " ", BaseAddr + addr);
+ for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) {
+ outs() << format(" %04" PRIx64 " ", BaseAddr + Addr);
// Dump line of hex.
- for (std::size_t i = 0; i < 16; ++i) {
- if (i != 0 && i % 4 == 0)
+ for (std::size_t I = 0; I < 16; ++I) {
+ if (I != 0 && I % 4 == 0)
outs() << ' ';
- if (addr + i < end)
- outs() << hexdigit((Contents[addr + i] >> 4) & 0xF, true)
- << hexdigit(Contents[addr + i] & 0xF, true);
+ if (Addr + I < End)
+ outs() << hexdigit((Contents[Addr + I] >> 4) & 0xF, true)
+ << hexdigit(Contents[Addr + I] & 0xF, true);
else
outs() << " ";
}
// Print ascii.
outs() << " ";
- for (std::size_t i = 0; i < 16 && addr + i < end; ++i) {
- if (isPrint(static_cast<unsigned char>(Contents[addr + i]) & 0xFF))
- outs() << Contents[addr + i];
+ for (std::size_t I = 0; I < 16 && Addr + I < End; ++I) {
+ if (isPrint(static_cast<unsigned char>(Contents[Addr + I]) & 0xFF))
+ outs() << Contents[Addr + I];
else
outs() << ".";
}
@@ -1909,40 +1993,47 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
}
}
-void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
+void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
StringRef ArchitectureName) {
outs() << "SYMBOL TABLE:\n";
- if (const COFFObjectFile *coff = dyn_cast<const COFFObjectFile>(o)) {
- printCOFFSymbolTable(coff);
+ if (const COFFObjectFile *Coff = dyn_cast<const COFFObjectFile>(O)) {
+ printCOFFSymbolTable(Coff);
return;
}
- for (const SymbolRef &Symbol : o->symbols()) {
+
+ for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) {
+ // Skip printing the special zero symbol when dumping an ELF file.
+ // This makes the output consistent with the GNU objdump.
+ if (I == O->symbol_begin() && isa<ELFObjectFileBase>(O))
+ continue;
+
+ const SymbolRef &Symbol = *I;
Expected<uint64_t> AddressOrError = Symbol.getAddress();
if (!AddressOrError)
- report_error(ArchiveName, o->getFileName(), AddressOrError.takeError(),
+ report_error(ArchiveName, O->getFileName(), AddressOrError.takeError(),
ArchitectureName);
uint64_t Address = *AddressOrError;
if ((Address < StartAddress) || (Address > StopAddress))
continue;
Expected<SymbolRef::Type> TypeOrError = Symbol.getType();
if (!TypeOrError)
- report_error(ArchiveName, o->getFileName(), TypeOrError.takeError(),
+ report_error(ArchiveName, O->getFileName(), TypeOrError.takeError(),
ArchitectureName);
SymbolRef::Type Type = *TypeOrError;
uint32_t Flags = Symbol.getFlags();
Expected<section_iterator> SectionOrErr = Symbol.getSection();
if (!SectionOrErr)
- report_error(ArchiveName, o->getFileName(), SectionOrErr.takeError(),
+ report_error(ArchiveName, O->getFileName(), SectionOrErr.takeError(),
ArchitectureName);
section_iterator Section = *SectionOrErr;
StringRef Name;
- if (Type == SymbolRef::ST_Debug && Section != o->section_end()) {
+ if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
Section->getName(Name);
} else {
Expected<StringRef> NameOrErr = Symbol.getName();
if (!NameOrErr)
- report_error(ArchiveName, o->getFileName(), NameOrErr.takeError(),
+ report_error(ArchiveName, O->getFileName(), NameOrErr.takeError(),
ArchitectureName);
Name = *NameOrErr;
}
@@ -1963,8 +2054,10 @@ void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
FileFunc = 'f';
else if (Type == SymbolRef::ST_Function)
FileFunc = 'F';
+ else if (Type == SymbolRef::ST_Data)
+ FileFunc = 'O';
- const char *Fmt = o->getBytesInAddress() > 4 ? "%016" PRIx64 :
+ const char *Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 :
"%08" PRIx64;
outs() << format(Fmt, Address) << " "
@@ -1980,11 +2073,11 @@ void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
outs() << "*ABS*";
} else if (Common) {
outs() << "*COM*";
- } else if (Section == o->section_end()) {
+ } else if (Section == O->section_end()) {
outs() << "*UND*";
} else {
if (const MachOObjectFile *MachO =
- dyn_cast<const MachOObjectFile>(o)) {
+ dyn_cast<const MachOObjectFile>(O)) {
DataRefImpl DR = Section->getRawDataRefImpl();
StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
outs() << SegmentName << ",";
@@ -1995,98 +2088,95 @@ void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
}
outs() << '\t';
- if (Common || isa<ELFObjectFileBase>(o)) {
+ if (Common || isa<ELFObjectFileBase>(O)) {
uint64_t Val =
Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
outs() << format("\t %08" PRIx64 " ", Val);
}
- if (Hidden) {
+ if (Hidden)
outs() << ".hidden ";
- }
- outs() << Name
- << '\n';
+
+ if (Demangle)
+ outs() << demangle(Name) << '\n';
+ else
+ outs() << Name << '\n';
}
}
-static void PrintUnwindInfo(const ObjectFile *o) {
+static void printUnwindInfo(const ObjectFile *O) {
outs() << "Unwind info:\n\n";
- if (const COFFObjectFile *coff = dyn_cast<COFFObjectFile>(o)) {
- printCOFFUnwindInfo(coff);
- } else if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+ if (const COFFObjectFile *Coff = dyn_cast<COFFObjectFile>(O))
+ printCOFFUnwindInfo(Coff);
+ else if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O))
printMachOUnwindInfo(MachO);
- else {
+ else
// TODO: Extract DWARF dump tool to objdump.
- errs() << "This operation is only currently supported "
- "for COFF and MachO object files.\n";
- return;
- }
+ WithColor::error(errs(), ToolName)
+ << "This operation is only currently supported "
+ "for COFF and MachO object files.\n";
}
void llvm::printExportsTrie(const ObjectFile *o) {
outs() << "Exports trie:\n";
if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
printMachOExportsTrie(MachO);
- else {
- errs() << "This operation is only currently supported "
- "for Mach-O executable files.\n";
- return;
- }
+ else
+ WithColor::error(errs(), ToolName)
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
}
void llvm::printRebaseTable(ObjectFile *o) {
outs() << "Rebase table:\n";
if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
printMachORebaseTable(MachO);
- else {
- errs() << "This operation is only currently supported "
- "for Mach-O executable files.\n";
- return;
- }
+ else
+ WithColor::error(errs(), ToolName)
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
}
void llvm::printBindTable(ObjectFile *o) {
outs() << "Bind table:\n";
if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
printMachOBindTable(MachO);
- else {
- errs() << "This operation is only currently supported "
- "for Mach-O executable files.\n";
- return;
- }
+ else
+ WithColor::error(errs(), ToolName)
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
}
void llvm::printLazyBindTable(ObjectFile *o) {
outs() << "Lazy bind table:\n";
if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
printMachOLazyBindTable(MachO);
- else {
- errs() << "This operation is only currently supported "
- "for Mach-O executable files.\n";
- return;
- }
+ else
+ WithColor::error(errs(), ToolName)
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
}
void llvm::printWeakBindTable(ObjectFile *o) {
outs() << "Weak bind table:\n";
if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
printMachOWeakBindTable(MachO);
- else {
- errs() << "This operation is only currently supported "
- "for Mach-O executable files.\n";
- return;
- }
+ else
+ WithColor::error(errs(), ToolName)
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
}
/// Dump the raw contents of the __clangast section so the output can be piped
/// into llvm-bcanalyzer.
void llvm::printRawClangAST(const ObjectFile *Obj) {
if (outs().is_displayed()) {
- errs() << "The -raw-clang-ast option will dump the raw binary contents of "
- "the clang ast section.\n"
- "Please redirect the output to a file or another program such as "
- "llvm-bcanalyzer.\n";
+ WithColor::error(errs(), ToolName)
+ << "The -raw-clang-ast option will dump the raw binary contents of "
+ "the clang ast section.\n"
+ "Please redirect the output to a file or another program such as "
+ "llvm-bcanalyzer.\n";
return;
}
@@ -2113,15 +2203,16 @@ void llvm::printRawClangAST(const ObjectFile *Obj) {
}
static void printFaultMaps(const ObjectFile *Obj) {
- const char *FaultMapSectionName = nullptr;
+ StringRef FaultMapSectionName;
if (isa<ELFObjectFileBase>(Obj)) {
FaultMapSectionName = ".llvm_faultmaps";
} else if (isa<MachOObjectFile>(Obj)) {
FaultMapSectionName = "__llvm_faultmaps";
} else {
- errs() << "This operation is only currently supported "
- "for ELF and Mach-O executable files.\n";
+ WithColor::error(errs(), ToolName)
+ << "This operation is only currently supported "
+ "for ELF and Mach-O executable files.\n";
return;
}
@@ -2152,42 +2243,44 @@ static void printFaultMaps(const ObjectFile *Obj) {
outs() << FMP;
}
-static void printPrivateFileHeaders(const ObjectFile *o, bool onlyFirst) {
- if (o->isELF()) {
- printELFFileHeader(o);
- return printELFDynamicSection(o);
+static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
+ if (O->isELF()) {
+ printELFFileHeader(O);
+ return printELFDynamicSection(O);
}
- if (o->isCOFF())
- return printCOFFFileHeader(o);
- if (o->isWasm())
- return printWasmFileHeader(o);
- if (o->isMachO()) {
- printMachOFileHeader(o);
- if (!onlyFirst)
- printMachOLoadCommands(o);
+ if (O->isCOFF())
+ return printCOFFFileHeader(O);
+ if (O->isWasm())
+ return printWasmFileHeader(O);
+ if (O->isMachO()) {
+ printMachOFileHeader(O);
+ if (!OnlyFirst)
+ printMachOLoadCommands(O);
return;
}
- report_error(o->getFileName(), "Invalid/Unsupported object file format");
+ report_error(O->getFileName(), "Invalid/Unsupported object file format");
}
-static void printFileHeaders(const ObjectFile *o) {
- if (!o->isELF() && !o->isCOFF())
- report_error(o->getFileName(), "Invalid/Unsupported object file format");
+static void printFileHeaders(const ObjectFile *O) {
+ if (!O->isELF() && !O->isCOFF())
+ report_error(O->getFileName(), "Invalid/Unsupported object file format");
- Triple::ArchType AT = o->getArch();
+ Triple::ArchType AT = O->getArch();
outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n";
- Expected<uint64_t> StartAddrOrErr = o->getStartAddress();
+ Expected<uint64_t> StartAddrOrErr = O->getStartAddress();
if (!StartAddrOrErr)
- report_error(o->getFileName(), StartAddrOrErr.takeError());
+ report_error(O->getFileName(), StartAddrOrErr.takeError());
+
+ StringRef Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
+ uint64_t Address = StartAddrOrErr.get();
outs() << "start address: "
- << format("0x%0*x", o->getBytesInAddress(), StartAddrOrErr.get())
- << "\n";
+ << "0x" << format(Fmt.data(), Address) << "\n\n";
}
static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
if (!ModeOrErr) {
- errs() << "ill-formed archive entry.\n";
+ WithColor::error(errs(), ToolName) << "ill-formed archive entry.\n";
consumeError(ModeOrErr.takeError());
return;
}
@@ -2248,55 +2341,55 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
outs() << Name << "\n";
}
-static void DumpObject(ObjectFile *o, const Archive *a = nullptr,
- const Archive::Child *c = nullptr) {
- StringRef ArchiveName = a != nullptr ? a->getFileName() : "";
+static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
+ const Archive::Child *C = nullptr) {
// Avoid other output when using a raw option.
if (!RawClangAST) {
outs() << '\n';
- if (a)
- outs() << a->getFileName() << "(" << o->getFileName() << ")";
+ if (A)
+ outs() << A->getFileName() << "(" << O->getFileName() << ")";
else
- outs() << o->getFileName();
- outs() << ":\tfile format " << o->getFileFormatName() << "\n\n";
+ outs() << O->getFileName();
+ outs() << ":\tfile format " << O->getFileFormatName() << "\n\n";
}
- if (ArchiveHeaders && !MachOOpt)
- printArchiveChild(a->getFileName(), *c);
+ StringRef ArchiveName = A ? A->getFileName() : "";
+ if (FileHeaders)
+ printFileHeaders(O);
+ if (ArchiveHeaders && !MachOOpt && C)
+ printArchiveChild(ArchiveName, *C);
if (Disassemble)
- DisassembleObject(o, Relocations);
+ disassembleObject(O, Relocations);
if (Relocations && !Disassemble)
- PrintRelocations(o);
+ printRelocations(O);
if (DynamicRelocations)
- PrintDynamicRelocations(o);
+ printDynamicRelocations(O);
if (SectionHeaders)
- PrintSectionHeaders(o);
+ printSectionHeaders(O);
if (SectionContents)
- PrintSectionContents(o);
+ printSectionContents(O);
if (SymbolTable)
- PrintSymbolTable(o, ArchiveName);
+ printSymbolTable(O, ArchiveName);
if (UnwindInfo)
- PrintUnwindInfo(o);
+ printUnwindInfo(O);
if (PrivateHeaders || FirstPrivateHeader)
- printPrivateFileHeaders(o, FirstPrivateHeader);
- if (FileHeaders)
- printFileHeaders(o);
+ printPrivateFileHeaders(O, FirstPrivateHeader);
if (ExportsTrie)
- printExportsTrie(o);
+ printExportsTrie(O);
if (Rebase)
- printRebaseTable(o);
+ printRebaseTable(O);
if (Bind)
- printBindTable(o);
+ printBindTable(O);
if (LazyBind)
- printLazyBindTable(o);
+ printLazyBindTable(O);
if (WeakBind)
- printWeakBindTable(o);
+ printWeakBindTable(O);
if (RawClangAST)
- printRawClangAST(o);
+ printRawClangAST(O);
if (PrintFaultMaps)
- printFaultMaps(o);
+ printFaultMaps(O);
if (DwarfDumpType != DIDT_Null) {
- std::unique_ptr<DIContext> DICtx = DWARFContext::create(*o);
+ std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
// Dump the complete DWARF structure.
DIDumpOptions DumpOpts;
DumpOpts.DumpType = DwarfDumpType;
@@ -2304,7 +2397,7 @@ static void DumpObject(ObjectFile *o, const Archive *a = nullptr,
}
}
-static void DumpObject(const COFFImportFile *I, const Archive *A,
+static void dumpObject(const COFFImportFile *I, const Archive *A,
const Archive::Child *C = nullptr) {
StringRef ArchiveName = A ? A->getFileName() : "";
@@ -2315,41 +2408,40 @@ static void DumpObject(const COFFImportFile *I, const Archive *A,
<< ":\tfile format COFF-import-file"
<< "\n\n";
- if (ArchiveHeaders && !MachOOpt)
- printArchiveChild(A->getFileName(), *C);
+ if (ArchiveHeaders && !MachOOpt && C)
+ printArchiveChild(ArchiveName, *C);
if (SymbolTable)
printCOFFSymbolTable(I);
}
/// Dump each object file in \a a;
-static void DumpArchive(const Archive *a) {
+static void dumpArchive(const Archive *A) {
Error Err = Error::success();
- for (auto &C : a->children(Err)) {
+ for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(a->getFileName(), C, std::move(E));
+ report_error(A->getFileName(), C, std::move(E));
continue;
}
- if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
- DumpObject(o, a, &C);
+ if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
+ dumpObject(O, A, &C);
else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
- DumpObject(I, a, &C);
+ dumpObject(I, A, &C);
else
- report_error(a->getFileName(), object_error::invalid_file_type);
+ report_error(A->getFileName(), object_error::invalid_file_type);
}
if (Err)
- report_error(a->getFileName(), std::move(Err));
+ report_error(A->getFileName(), std::move(Err));
}
/// Open file and figure out how to dump it.
-static void DumpInput(StringRef file) {
-
+static void dumpInput(StringRef file) {
// If we are using the Mach-O specific object file parser, then let it parse
// the file and process the command line options. So the -arch flags can
// be used to select specific slices, etc.
if (MachOOpt) {
- ParseInputMachO(file);
+ parseInputMachO(file);
return;
}
@@ -2359,10 +2451,12 @@ static void DumpInput(StringRef file) {
report_error(file, BinaryOrErr.takeError());
Binary &Binary = *BinaryOrErr.get().getBinary();
- if (Archive *a = dyn_cast<Archive>(&Binary))
- DumpArchive(a);
- else if (ObjectFile *o = dyn_cast<ObjectFile>(&Binary))
- DumpObject(o);
+ if (Archive *A = dyn_cast<Archive>(&Binary))
+ dumpArchive(A);
+ else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary))
+ dumpObject(O);
+ else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
+ parseInputMachO(UB);
else
report_error(file, object_error::invalid_file_type);
}
@@ -2379,24 +2473,20 @@ int main(int argc, char **argv) {
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n");
- TripleName = Triple::normalize(TripleName);
ToolName = argv[0];
// Defaults to a.out if no filenames specified.
- if (InputFilenames.size() == 0)
+ if (InputFilenames.empty())
InputFilenames.push_back("a.out");
if (AllHeaders)
- PrivateHeaders = Relocations = SectionHeaders = SymbolTable = true;
+ FileHeaders = PrivateHeaders = Relocations = SectionHeaders = SymbolTable =
+ true;
if (DisassembleAll || PrintSource || PrintLines)
Disassemble = true;
- if (Demangle.getValue() != "none" && Demangle.getValue() != "" &&
- Demangle.getValue() != "itanium")
- warn("Unsupported demangling style");
-
if (!Disassemble
&& !Relocations
&& !DynamicRelocations
@@ -2422,7 +2512,7 @@ int main(int argc, char **argv) {
&& !(DylibsUsed && MachOOpt)
&& !(DylibId && MachOOpt)
&& !(ObjcMetaData && MachOOpt)
- && !(FilterSections.size() != 0 && MachOOpt)
+ && !(!FilterSections.empty() && MachOOpt)
&& !PrintFaultMaps
&& DwarfDumpType == DIDT_Null) {
cl::PrintHelpMessage();
@@ -2432,7 +2522,7 @@ int main(int argc, char **argv) {
DisasmFuncsSet.insert(DisassembleFunctions.begin(),
DisassembleFunctions.end());
- llvm::for_each(InputFilenames, DumpInput);
+ llvm::for_each(InputFilenames, dumpInput);
return EXIT_SUCCESS;
}
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
index b2eb6e9d7771..fe2cb05fe227 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -22,6 +22,7 @@ namespace object {
class COFFObjectFile;
class COFFImportFile;
class MachOObjectFile;
+ class MachOUniversalBinary;
class ObjectFile;
class Archive;
class RelocationRef;
@@ -30,10 +31,10 @@ namespace object {
extern cl::opt<std::string> TripleName;
extern cl::opt<std::string> ArchName;
extern cl::opt<std::string> MCPU;
-extern cl::opt<std::string> Demangle;
extern cl::list<std::string> MAttrs;
extern cl::list<std::string> FilterSections;
extern cl::opt<bool> AllHeaders;
+extern cl::opt<bool> Demangle;
extern cl::opt<bool> Disassemble;
extern cl::opt<bool> DisassembleAll;
extern cl::opt<bool> NoShowRawInsn;
@@ -69,34 +70,35 @@ extern cl::opt<DIDumpType> DwarfDumpType;
// Various helper functions.
void error(std::error_code ec);
-bool RelocAddressLess(object::RelocationRef a, object::RelocationRef b);
-void ParseInputMachO(StringRef Filename);
-void printCOFFUnwindInfo(const object::COFFObjectFile* o);
-void printMachOUnwindInfo(const object::MachOObjectFile* o);
-void printMachOExportsTrie(const object::MachOObjectFile* o);
-void printMachORebaseTable(object::MachOObjectFile* o);
-void printMachOBindTable(object::MachOObjectFile* o);
-void printMachOLazyBindTable(object::MachOObjectFile* o);
-void printMachOWeakBindTable(object::MachOObjectFile* o);
-void printELFFileHeader(const object::ObjectFile *o);
+bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B);
+void parseInputMachO(StringRef Filename);
+void parseInputMachO(object::MachOUniversalBinary *UB);
+void printCOFFUnwindInfo(const object::COFFObjectFile *O);
+void printMachOUnwindInfo(const object::MachOObjectFile *O);
+void printMachOExportsTrie(const object::MachOObjectFile *O);
+void printMachORebaseTable(object::MachOObjectFile *O);
+void printMachOBindTable(object::MachOObjectFile *O);
+void printMachOLazyBindTable(object::MachOObjectFile *O);
+void printMachOWeakBindTable(object::MachOObjectFile *O);
+void printELFFileHeader(const object::ObjectFile *O);
void printELFDynamicSection(const object::ObjectFile *Obj);
-void printCOFFFileHeader(const object::ObjectFile *o);
-void printCOFFSymbolTable(const object::COFFImportFile *i);
-void printCOFFSymbolTable(const object::COFFObjectFile *o);
-void printMachOFileHeader(const object::ObjectFile *o);
-void printMachOLoadCommands(const object::ObjectFile *o);
-void printWasmFileHeader(const object::ObjectFile *o);
-void printExportsTrie(const object::ObjectFile *o);
-void printRebaseTable(object::ObjectFile *o);
-void printBindTable(object::ObjectFile *o);
-void printLazyBindTable(object::ObjectFile *o);
-void printWeakBindTable(object::ObjectFile *o);
-void printRawClangAST(const object::ObjectFile *o);
-void PrintRelocations(const object::ObjectFile *o);
-void PrintDynamicRelocations(const object::ObjectFile *o);
-void PrintSectionHeaders(const object::ObjectFile *o);
-void PrintSectionContents(const object::ObjectFile *o);
-void PrintSymbolTable(const object::ObjectFile *o, StringRef ArchiveName,
+void printCOFFFileHeader(const object::ObjectFile *O);
+void printCOFFSymbolTable(const object::COFFImportFile *I);
+void printCOFFSymbolTable(const object::COFFObjectFile *O);
+void printMachOFileHeader(const object::ObjectFile *O);
+void printMachOLoadCommands(const object::ObjectFile *O);
+void printWasmFileHeader(const object::ObjectFile *O);
+void printExportsTrie(const object::ObjectFile *O);
+void printRebaseTable(object::ObjectFile *O);
+void printBindTable(object::ObjectFile *O);
+void printLazyBindTable(object::ObjectFile *O);
+void printWeakBindTable(object::ObjectFile *O);
+void printRawClangAST(const object::ObjectFile *O);
+void printRelocations(const object::ObjectFile *O);
+void printDynamicRelocations(const object::ObjectFile *O);
+void printSectionHeaders(const object::ObjectFile *O);
+void printSectionContents(const object::ObjectFile *O);
+void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName,
StringRef ArchitectureName = StringRef());
void warn(StringRef Message);
LLVM_ATTRIBUTE_NORETURN void error(Twine Message);
diff --git a/contrib/llvm/tools/llvm-pdbutil/Analyze.cpp b/contrib/llvm/tools/llvm-pdbutil/Analyze.cpp
deleted file mode 100644
index 974ab49d9440..000000000000
--- a/contrib/llvm/tools/llvm-pdbutil/Analyze.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-//===- Analyze.cpp - PDB analysis functions ---------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Analyze.h"
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
-
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <list>
-
-using namespace llvm;
-using namespace llvm::codeview;
-using namespace llvm::pdb;
-
-static StringRef getLeafTypeName(TypeLeafKind LT) {
- switch (LT) {
-#define TYPE_RECORD(ename, value, name) \
- case ename: \
- return #name;
-#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
- default:
- break;
- }
- return "UnknownLeaf";
-}
-
-namespace {
-struct HashLookupVisitor : public TypeVisitorCallbacks {
- struct Entry {
- TypeIndex TI;
- CVType Record;
- };
-
- explicit HashLookupVisitor(TpiStream &Tpi) : Tpi(Tpi) {}
-
- Error visitTypeBegin(CVType &Record) override {
- uint32_t H = Tpi.getHashValues()[I];
- Record.Hash = H;
- TypeIndex TI(I + TypeIndex::FirstNonSimpleIndex);
- Lookup[H].push_back(Entry{TI, Record});
- ++I;
- return Error::success();
- }
-
- uint32_t I = 0;
- DenseMap<uint32_t, std::list<Entry>> Lookup;
- TpiStream &Tpi;
-};
-}
-
-AnalysisStyle::AnalysisStyle(PDBFile &File) : File(File) {}
-
-Error AnalysisStyle::dump() {
- auto Tpi = File.getPDBTpiStream();
- if (!Tpi)
- return Tpi.takeError();
-
- HashLookupVisitor Hasher(*Tpi);
-
- uint32_t RecordCount = Tpi->getNumTypeRecords();
- auto Offsets = Tpi->getTypeIndexOffsets();
- auto Types = llvm::make_unique<LazyRandomTypeCollection>(
- Tpi->typeArray(), RecordCount, Offsets);
-
- if (auto EC = codeview::visitTypeStream(*Types, Hasher))
- return EC;
-
- auto &Adjusters = Tpi->getHashAdjusters();
- DenseSet<uint32_t> AdjusterSet;
- for (const auto &Adj : Adjusters) {
- assert(AdjusterSet.find(Adj.second) == AdjusterSet.end());
- AdjusterSet.insert(Adj.second);
- }
-
- uint32_t Count = 0;
- outs() << "Searching for hash collisions\n";
- for (const auto &H : Hasher.Lookup) {
- if (H.second.size() <= 1)
- continue;
- ++Count;
- outs() << formatv("Hash: {0}, Count: {1} records\n", H.first,
- H.second.size());
- for (const auto &R : H.second) {
- auto Iter = AdjusterSet.find(R.TI.getIndex());
- StringRef Prefix;
- if (Iter != AdjusterSet.end()) {
- Prefix = "[HEAD]";
- AdjusterSet.erase(Iter);
- }
- StringRef LeafName = getLeafTypeName(R.Record.Type);
- uint32_t TI = R.TI.getIndex();
- StringRef TypeName = Types->getTypeName(R.TI);
- outs() << formatv("{0,-6} {1} ({2:x}) {3}\n", Prefix, LeafName, TI,
- TypeName);
- }
- }
-
- outs() << "\n";
- outs() << "Dumping hash adjustment chains\n";
- for (const auto &A : Tpi->getHashAdjusters()) {
- TypeIndex TI(A.second);
- StringRef TypeName = Types->getTypeName(TI);
- const CVType &HeadRecord = Types->getType(TI);
- assert(HeadRecord.Hash.hasValue());
-
- auto CollisionsIter = Hasher.Lookup.find(*HeadRecord.Hash);
- if (CollisionsIter == Hasher.Lookup.end())
- continue;
-
- const auto &Collisions = CollisionsIter->second;
- outs() << TypeName << "\n";
- outs() << formatv(" [HEAD] {0:x} {1} {2}\n", uint32_t(A.second),
- getLeafTypeName(HeadRecord.Type), TypeName);
- for (const auto &Chain : Collisions) {
- if (Chain.TI == TI)
- continue;
- const CVType &TailRecord = Types->getType(Chain.TI);
- outs() << formatv(" {0:x} {1} {2}\n", Chain.TI.getIndex(),
- getLeafTypeName(TailRecord.Type),
- Types->getTypeName(Chain.TI));
- }
- }
- outs() << formatv("There are {0} orphaned hash adjusters\n",
- AdjusterSet.size());
- for (const auto &Adj : AdjusterSet) {
- outs() << formatv(" {0}\n", Adj);
- }
-
- uint32_t DistinctHashValues = Hasher.Lookup.size();
- outs() << formatv("{0}/{1} hash collisions", Count, DistinctHashValues);
- return Error::success();
-}
diff --git a/contrib/llvm/tools/llvm-pdbutil/Analyze.h b/contrib/llvm/tools/llvm-pdbutil/Analyze.h
deleted file mode 100644
index 7230ae45b0c8..000000000000
--- a/contrib/llvm/tools/llvm-pdbutil/Analyze.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===- Analyze.h - PDB analysis functions -----------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVMPDBDUMP_ANALYSIS_H
-#define LLVM_TOOLS_LLVMPDBDUMP_ANALYSIS_H
-
-#include "OutputStyle.h"
-
-namespace llvm {
-namespace pdb {
-class PDBFile;
-class AnalysisStyle : public OutputStyle {
-public:
- explicit AnalysisStyle(PDBFile &File);
-
- Error dump() override;
-
-private:
- PDBFile &File;
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
index 9e59adc71967..e4f6aa7f6ec5 100644
--- a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -22,6 +22,7 @@
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
@@ -65,6 +66,16 @@ DumpOutputStyle::DumpOutputStyle(InputFile &File)
PDBFile &DumpOutputStyle::getPdb() { return File.pdb(); }
object::COFFObjectFile &DumpOutputStyle::getObj() { return File.obj(); }
+void DumpOutputStyle::printStreamNotValidForObj() {
+ AutoIndent Indent(P, 4);
+ P.formatLine("Dumping this stream is not valid for object files");
+}
+
+void DumpOutputStyle::printStreamNotPresent(StringRef StreamName) {
+ AutoIndent Indent(P, 4);
+ P.formatLine("{0} stream not present", StreamName);
+}
+
Error DumpOutputStyle::dump() {
if (opts::dump::DumpSummary) {
if (auto EC = dumpFileSummary())
@@ -132,6 +143,11 @@ Error DumpOutputStyle::dump() {
return EC;
}
+ if (opts::dump::DumpFpo) {
+ if (auto EC = dumpFpo())
+ return EC;
+ }
+
if (File.isObj()) {
if (opts::dump::DumpTypes || !opts::dump::DumpTypeIndex.empty() ||
opts::dump::DumpTypeExtras)
@@ -199,14 +215,14 @@ static void printHeader(LinePrinter &P, const Twine &S) {
Error DumpOutputStyle::dumpFileSummary() {
printHeader(P, "Summary");
- ExitOnError Err("Invalid PDB Format: ");
-
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine("Dumping File summary is not valid for object files");
+ printStreamNotValidForObj();
return Error::success();
}
+ AutoIndent Indent(P);
+ ExitOnError Err("Invalid PDB Format: ");
+
P.formatLine("Block Size: {0}", getPdb().getBlockSize());
P.formatLine("Number of blocks: {0}", getPdb().getBlockCount());
P.formatLine("Number of streams: {0}", getPdb().getNumStreams());
@@ -234,7 +250,7 @@ Error DumpOutputStyle::dumpFileSummary() {
static StatCollection getSymbolStats(const SymbolGroup &SG,
StatCollection &CumulativeStats) {
StatCollection Stats;
- if (SG.getFile().isPdb()) {
+ if (SG.getFile().isPdb() && SG.hasDebugStream()) {
// For PDB files, all symbols are packed into one stream.
for (const auto &S : SG.getPdbModuleStream().symbols(nullptr)) {
Stats.update(S.kind(), S.length());
@@ -326,12 +342,13 @@ static bool shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group) {
Error DumpOutputStyle::dumpStreamSummary() {
printHeader(P, "Streams");
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine("Dumping streams is not valid for object files");
+ printStreamNotValidForObj();
return Error::success();
}
+ AutoIndent Indent(P);
+
if (StreamPurposes.empty())
discoverStreamPurposes(getPdb(), StreamPurposes);
@@ -527,18 +544,18 @@ static void dumpSectionContrib(LinePrinter &P, const SectionContrib2 &SC,
Error DumpOutputStyle::dumpModules() {
printHeader(P, "Modules");
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine("Dumping modules is not supported for object files");
+ printStreamNotValidForObj();
return Error::success();
}
if (!getPdb().hasPDBDbiStream()) {
- P.formatLine("DBI Stream not present");
+ printStreamNotPresent("DBI");
return Error::success();
}
+ AutoIndent Indent(P);
ExitOnError Err("Unexpected error processing modules: ");
auto &Stream = Err(getPdb().getPDBDbiStream());
@@ -570,7 +587,12 @@ Error DumpOutputStyle::dumpModuleFiles() {
printHeader(P, "Files");
if (File.isObj()) {
- P.formatLine("Dumping files is not valid for object files");
+ printStreamNotValidForObj();
+ return Error::success();
+ }
+
+ if (!getPdb().hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
return Error::success();
}
@@ -591,6 +613,11 @@ Error DumpOutputStyle::dumpModuleFiles() {
Error DumpOutputStyle::dumpSymbolStats() {
printHeader(P, "Module Stats");
+ if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
+ return Error::success();
+ }
+
ExitOnError Err("Unexpected error processing modules: ");
StatCollection SymStats;
@@ -625,9 +652,9 @@ Error DumpOutputStyle::dumpSymbolStats() {
}
});
- P.printLine(" Summary |");
- AutoIndent Indent(P, 4);
if (SymStats.Totals.Count > 0) {
+ P.printLine(" Summary |");
+ AutoIndent Indent(P, 4);
printModuleDetailStats<SymbolKind>(P, "Symbols", SymStats);
printModuleDetailStats<DebugSubsectionKind>(P, "Chunks", ChunkStats);
}
@@ -680,6 +707,11 @@ static uint32_t getLongestTypeLeafName(const StatCollection &Stats) {
Error DumpOutputStyle::dumpUdtStats() {
printHeader(P, "S_UDT Record Stats");
+ if (File.isPdb() && !getPdb().hasPDBGlobalsStream()) {
+ printStreamNotPresent("Globals");
+ return Error::success();
+ }
+
StatCollection UdtStats;
StatCollection UdtTargetStats;
AutoIndent Indent(P, 4);
@@ -726,11 +758,6 @@ Error DumpOutputStyle::dumpUdtStats() {
P.NewLine();
if (File.isPdb()) {
- if (!getPdb().hasPDBGlobalsStream()) {
- P.printLine("- Error: globals stream not present");
- return Error::success();
- }
-
auto &SymbolRecords = cantFail(getPdb().getPDBSymbolStream());
auto ExpGlobals = getPdb().getPDBGlobalsStream();
if (!ExpGlobals)
@@ -839,6 +866,11 @@ static void typesetLinesAndColumns(LinePrinter &P, uint32_t Start,
Error DumpOutputStyle::dumpLines() {
printHeader(P, "Lines");
+ if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
+ return Error::success();
+ }
+
uint32_t LastModi = UINT32_MAX;
uint32_t LastNameIndex = UINT32_MAX;
iterateModuleSubsections<DebugLinesSubsectionRef>(
@@ -875,6 +907,11 @@ Error DumpOutputStyle::dumpLines() {
Error DumpOutputStyle::dumpInlineeLines() {
printHeader(P, "Inlinee Lines");
+ if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
+ return Error::success();
+ }
+
iterateModuleSubsections<DebugInlineeLinesSubsectionRef>(
File, PrintScope{P, 2},
[this](uint32_t Modi, const SymbolGroup &Strings,
@@ -893,6 +930,12 @@ Error DumpOutputStyle::dumpInlineeLines() {
Error DumpOutputStyle::dumpXmi() {
printHeader(P, "Cross Module Imports");
+
+ if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
+ return Error::success();
+ }
+
iterateModuleSubsections<DebugCrossModuleImportsSubsectionRef>(
File, PrintScope{P, 2},
[this](uint32_t Modi, const SymbolGroup &Strings,
@@ -929,6 +972,11 @@ Error DumpOutputStyle::dumpXmi() {
Error DumpOutputStyle::dumpXme() {
printHeader(P, "Cross Module Exports");
+ if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
+ return Error::success();
+ }
+
iterateModuleSubsections<DebugCrossModuleExportsSubsectionRef>(
File, PrintScope{P, 2},
[this](uint32_t Modi, const SymbolGroup &Strings,
@@ -943,6 +991,111 @@ Error DumpOutputStyle::dumpXme() {
return Error::success();
}
+std::string formatFrameType(object::frame_type FT) {
+ switch (FT) {
+ case object::frame_type::Fpo:
+ return "FPO";
+ case object::frame_type::NonFpo:
+ return "Non-FPO";
+ case object::frame_type::Trap:
+ return "Trap";
+ case object::frame_type::Tss:
+ return "TSS";
+ }
+ return "<unknown>";
+}
+
+Error DumpOutputStyle::dumpOldFpo(PDBFile &File) {
+ printHeader(P, "Old FPO Data");
+
+ ExitOnError Err("Error dumping old fpo data:");
+ auto &Dbi = Err(File.getPDBDbiStream());
+
+ uint32_t Index = Dbi.getDebugStreamIndex(DbgHeaderType::FPO);
+ if (Index == kInvalidStreamIndex) {
+ printStreamNotPresent("FPO");
+ return Error::success();
+ }
+
+ std::unique_ptr<MappedBlockStream> OldFpo = File.createIndexedStream(Index);
+ BinaryStreamReader Reader(*OldFpo);
+ FixedStreamArray<object::FpoData> Records;
+ Err(Reader.readArray(Records,
+ Reader.bytesRemaining() / sizeof(object::FpoData)));
+
+ P.printLine(" RVA | Code | Locals | Params | Prolog | Saved Regs | Use "
+ "BP | Has SEH | Frame Type");
+
+ for (const object::FpoData &FD : Records) {
+ P.formatLine("{0:X-8} | {1,4} | {2,6} | {3,6} | {4,6} | {5,10} | {6,6} | "
+ "{7,7} | {8,9}",
+ uint32_t(FD.Offset), uint32_t(FD.Size), uint32_t(FD.NumLocals),
+ uint32_t(FD.NumParams), FD.getPrologSize(),
+ FD.getNumSavedRegs(), FD.useBP(), FD.hasSEH(),
+ formatFrameType(FD.getFP()));
+ }
+ return Error::success();
+}
+
+Error DumpOutputStyle::dumpNewFpo(PDBFile &File) {
+ printHeader(P, "New FPO Data");
+
+ ExitOnError Err("Error dumping new fpo data:");
+ auto &Dbi = Err(File.getPDBDbiStream());
+
+ uint32_t Index = Dbi.getDebugStreamIndex(DbgHeaderType::NewFPO);
+ if (Index == kInvalidStreamIndex) {
+ printStreamNotPresent("New FPO");
+ return Error::success();
+ }
+
+ std::unique_ptr<MappedBlockStream> NewFpo = File.createIndexedStream(Index);
+
+ DebugFrameDataSubsectionRef FDS;
+ if (auto EC = FDS.initialize(*NewFpo))
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Invalid new fpo stream");
+
+ P.printLine(" RVA | Code | Locals | Params | Stack | Prolog | Saved Regs "
+ "| Has SEH | Has C++EH | Start | Program");
+ for (const FrameData &FD : FDS) {
+ bool IsFuncStart = FD.Flags & FrameData::IsFunctionStart;
+ bool HasEH = FD.Flags & FrameData::HasEH;
+ bool HasSEH = FD.Flags & FrameData::HasSEH;
+
+ auto &StringTable = Err(File.getStringTable());
+
+ auto Program = Err(StringTable.getStringForID(FD.FrameFunc));
+ P.formatLine("{0:X-8} | {1,4} | {2,6} | {3,6} | {4,5} | {5,6} | {6,10} | "
+ "{7,7} | {8,9} | {9,5} | {10}",
+ uint32_t(FD.RvaStart), uint32_t(FD.CodeSize),
+ uint32_t(FD.LocalSize), uint32_t(FD.ParamsSize),
+ uint32_t(FD.MaxStackSize), uint16_t(FD.PrologSize),
+ uint16_t(FD.SavedRegsSize), HasSEH, HasEH, IsFuncStart,
+ Program);
+ }
+ return Error::success();
+}
+
+Error DumpOutputStyle::dumpFpo() {
+ if (!File.isPdb()) {
+ printStreamNotValidForObj();
+ return Error::success();
+ }
+
+ PDBFile &File = getPdb();
+ if (!File.hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
+ return Error::success();
+ }
+
+ if (auto EC = dumpOldFpo(File))
+ return EC;
+ if (auto EC = dumpNewFpo(File))
+ return EC;
+ return Error::success();
+}
+
Error DumpOutputStyle::dumpStringTableFromPdb() {
AutoIndent Indent(P);
auto IS = getPdb().getStringTable();
@@ -965,7 +1118,7 @@ Error DumpOutputStyle::dumpStringTableFromPdb() {
std::vector<uint32_t> SortedIDs(IS->name_ids().begin(),
IS->name_ids().end());
- llvm::sort(SortedIDs.begin(), SortedIDs.end());
+ llvm::sort(SortedIDs);
for (uint32_t I : SortedIDs) {
auto ES = IS->getStringForID(I);
llvm::SmallString<32> Str;
@@ -1037,12 +1190,13 @@ Error DumpOutputStyle::dumpStringTableFromObj() {
Error DumpOutputStyle::dumpNamedStreams() {
printHeader(P, "Named Streams");
- AutoIndent Indent(P, 2);
if (File.isObj()) {
- P.formatLine("Dumping Named Streams is only supported for PDB files.");
+ printStreamNotValidForObj();
return Error::success();
}
+
+ AutoIndent Indent(P);
ExitOnError Err("Invalid PDB File: ");
auto &IS = Err(File.pdb().getPDBInfoStream());
@@ -1087,13 +1241,13 @@ static void
dumpFullTypeStream(LinePrinter &Printer, LazyRandomTypeCollection &Types,
uint32_t NumTypeRecords, uint32_t NumHashBuckets,
FixedStreamArray<support::ulittle32_t> HashValues,
- bool Bytes, bool Extras) {
+ TpiStream *Stream, bool Bytes, bool Extras) {
Printer.formatLine("Showing {0:N} records", NumTypeRecords);
uint32_t Width = NumDigits(TypeIndex::FirstNonSimpleIndex + NumTypeRecords);
MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
- NumHashBuckets, HashValues);
+ NumHashBuckets, HashValues, Stream);
if (auto EC = codeview::visitTypeStream(Types, V)) {
Printer.formatLine("An error occurred dumping type records: {0}",
@@ -1109,7 +1263,8 @@ static void dumpPartialTypeStream(LinePrinter &Printer,
NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords());
MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
- Stream.getNumHashBuckets(), Stream.getHashValues());
+ Stream.getNumHashBuckets(), Stream.getHashValues(),
+ &Stream);
if (opts::dump::DumpTypeDependents) {
// If we need to dump all dependents, then iterate each index and find
@@ -1171,7 +1326,8 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() {
Types.reset(Reader, 100);
if (opts::dump::DumpTypes) {
- dumpFullTypeStream(P, Types, 0, 0, {}, opts::dump::DumpTypeData, false);
+ dumpFullTypeStream(P, Types, 0, 0, {}, nullptr, opts::dump::DumpTypeData,
+ false);
} else if (opts::dump::DumpTypeExtras) {
auto LocalHashes = LocallyHashedType::hashTypeCollection(Types);
auto GlobalHashes = GloballyHashedType::hashTypeCollection(Types);
@@ -1204,7 +1360,6 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
printHeader(P, "Types (IPI Stream)");
}
- AutoIndent Indent(P);
assert(!File.isObj());
bool Present = false;
@@ -1229,10 +1384,11 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
}
if (!Present) {
- P.formatLine("Stream not present");
+ printStreamNotPresent(StreamIdx == StreamTPI ? "TPI" : "IPI");
return Error::success();
}
+ AutoIndent Indent(P);
ExitOnError Err("Unexpected error processing types: ");
auto &Stream = Err((StreamIdx == StreamTPI) ? getPdb().getPDBTpiStream()
@@ -1240,11 +1396,14 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
auto &Types = (StreamIdx == StreamTPI) ? File.types() : File.ids();
+ // Enable resolving forward decls.
+ Stream.buildHashMap();
+
if (DumpTypes || !Indices.empty()) {
if (Indices.empty())
dumpFullTypeStream(P, Types, Stream.getNumTypeRecords(),
Stream.getNumHashBuckets(), Stream.getHashValues(),
- DumpBytes, DumpExtras);
+ &Stream, DumpBytes, DumpExtras);
else {
std::vector<TypeIndex> TiList(Indices.begin(), Indices.end());
dumpPartialTypeStream(P, Types, Stream, TiList, DumpBytes, DumpExtras,
@@ -1261,19 +1420,21 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
P.formatLine("TI: {0}, Offset: {1}", IO.Type, fmtle(IO.Offset));
}
- P.NewLine();
- P.formatLine("Hash Adjusters:");
- auto &Adjusters = Stream.getHashAdjusters();
- auto &Strings = Err(getPdb().getStringTable());
- for (const auto &A : Adjusters) {
- AutoIndent Indent2(P);
- auto ExpectedStr = Strings.getStringForID(A.first);
- TypeIndex TI(A.second);
- if (ExpectedStr)
- P.formatLine("`{0}` -> {1}", *ExpectedStr, TI);
- else {
- P.formatLine("unknown str id ({0}) -> {1}", A.first, TI);
- consumeError(ExpectedStr.takeError());
+ if (getPdb().hasPDBStringTable()) {
+ P.NewLine();
+ P.formatLine("Hash Adjusters:");
+ auto &Adjusters = Stream.getHashAdjusters();
+ auto &Strings = Err(getPdb().getStringTable());
+ for (const auto &A : Adjusters) {
+ AutoIndent Indent2(P);
+ auto ExpectedStr = Strings.getStringForID(A.first);
+ TypeIndex TI(A.second);
+ if (ExpectedStr)
+ P.formatLine("`{0}` -> {1}", *ExpectedStr, TI);
+ else {
+ P.formatLine("unknown str id ({0}) -> {1}", A.first, TI);
+ consumeError(ExpectedStr.takeError());
+ }
}
}
}
@@ -1321,12 +1482,12 @@ Error DumpOutputStyle::dumpModuleSymsForObj() {
Error DumpOutputStyle::dumpModuleSymsForPdb() {
printHeader(P, "Symbols");
- AutoIndent Indent(P);
- if (!getPdb().hasPDBDbiStream()) {
- P.formatLine("DBI Stream not present");
+ if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
return Error::success();
}
+ AutoIndent Indent(P);
ExitOnError Err("Unexpected error processing symbols: ");
auto &Ids = File.ids();
@@ -1364,18 +1525,19 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() {
Error DumpOutputStyle::dumpGSIRecords() {
printHeader(P, "GSI Records");
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine("Dumping Globals is not supported for object files");
+ printStreamNotValidForObj();
return Error::success();
}
if (!getPdb().hasPDBSymbolStream()) {
- P.formatLine("GSI Common Symbol Stream not present");
+ printStreamNotPresent("GSI Common Symbol");
return Error::success();
}
+ AutoIndent Indent(P);
+
auto &Records = cantFail(getPdb().getPDBSymbolStream());
auto &Types = File.types();
auto &Ids = File.ids();
@@ -1397,38 +1559,72 @@ Error DumpOutputStyle::dumpGSIRecords() {
Error DumpOutputStyle::dumpGlobals() {
printHeader(P, "Global Symbols");
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine("Dumping Globals is not supported for object files");
+ printStreamNotValidForObj();
return Error::success();
}
if (!getPdb().hasPDBGlobalsStream()) {
- P.formatLine("Globals stream not present");
+ printStreamNotPresent("Globals");
return Error::success();
}
+
+ AutoIndent Indent(P);
ExitOnError Err("Error dumping globals stream: ");
auto &Globals = Err(getPdb().getPDBGlobalsStream());
- const GSIHashTable &Table = Globals.getGlobalsTable();
- Err(dumpSymbolsFromGSI(Table, opts::dump::DumpGlobalExtras));
+ if (opts::dump::DumpGlobalNames.empty()) {
+ const GSIHashTable &Table = Globals.getGlobalsTable();
+ Err(dumpSymbolsFromGSI(Table, opts::dump::DumpGlobalExtras));
+ } else {
+ SymbolStream &SymRecords = cantFail(getPdb().getPDBSymbolStream());
+ auto &Types = File.types();
+ auto &Ids = File.ids();
+
+ SymbolVisitorCallbackPipeline Pipeline;
+ SymbolDeserializer Deserializer(nullptr, CodeViewContainer::Pdb);
+ MinimalSymbolDumper Dumper(P, opts::dump::DumpSymRecordBytes, Ids, Types);
+
+ Pipeline.addCallbackToPipeline(Deserializer);
+ Pipeline.addCallbackToPipeline(Dumper);
+ CVSymbolVisitor Visitor(Pipeline);
+
+ using ResultEntryType = std::pair<uint32_t, CVSymbol>;
+ for (StringRef Name : opts::dump::DumpGlobalNames) {
+ AutoIndent Indent(P);
+ P.formatLine("Global Name `{0}`", Name);
+ std::vector<ResultEntryType> Results =
+ Globals.findRecordsByName(Name, SymRecords);
+ if (Results.empty()) {
+ AutoIndent Indent(P);
+ P.printLine("(no matching records found)");
+ continue;
+ }
+
+ for (ResultEntryType Result : Results) {
+ if (auto E = Visitor.visitSymbolRecord(Result.second, Result.first))
+ return E;
+ }
+ }
+ }
return Error::success();
}
Error DumpOutputStyle::dumpPublics() {
printHeader(P, "Public Symbols");
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine("Dumping Globals is not supported for object files");
+ printStreamNotValidForObj();
return Error::success();
}
if (!getPdb().hasPDBPublicsStream()) {
- P.formatLine("Publics stream not present");
+ printStreamNotPresent("Publics");
return Error::success();
}
+
+ AutoIndent Indent(P);
ExitOnError Err("Error dumping publics stream: ");
auto &Publics = Err(getPdb().getPDBPublicsStream());
@@ -1514,8 +1710,6 @@ Error DumpOutputStyle::dumpSymbolsFromGSI(const GSIHashTable &Table,
// Return early if we aren't dumping public hash table and address map info.
if (HashExtras) {
- P.formatBinary("Hash Bitmap", Table.HashBitmap, 0);
-
P.formatLine("Hash Entries");
{
AutoIndent Indent2(P);
@@ -1560,12 +1754,17 @@ Error DumpOutputStyle::dumpSectionHeaders() {
void DumpOutputStyle::dumpSectionHeaders(StringRef Label, DbgHeaderType Type) {
printHeader(P, Label);
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine("Dumping Section Headers is not supported for object files");
+ printStreamNotValidForObj();
+ return;
+ }
+
+ if (!getPdb().hasPDBDbiStream()) {
+ printStreamNotPresent("DBI");
return;
}
+ AutoIndent Indent(P);
ExitOnError Err("Error dumping section headers: ");
std::unique_ptr<MappedBlockStream> Stream;
ArrayRef<object::coff_section> Headers;
@@ -1606,20 +1805,19 @@ void DumpOutputStyle::dumpSectionHeaders(StringRef Label, DbgHeaderType Type) {
Error DumpOutputStyle::dumpSectionContribs() {
printHeader(P, "Section Contributions");
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine(
- "Dumping section contributions is not supported for object files");
+ printStreamNotValidForObj();
return Error::success();
}
- ExitOnError Err("Error dumping section contributions: ");
if (!getPdb().hasPDBDbiStream()) {
- P.formatLine(
- "Section contribs require a DBI Stream, which could not be loaded");
+ printStreamNotPresent("DBI");
return Error::success();
}
+ AutoIndent Indent(P);
+ ExitOnError Err("Error dumping section contributions: ");
+
auto &Dbi = Err(getPdb().getPDBDbiStream());
class Visitor : public ISectionContribVisitor {
@@ -1651,21 +1849,20 @@ Error DumpOutputStyle::dumpSectionContribs() {
Error DumpOutputStyle::dumpSectionMap() {
printHeader(P, "Section Map");
- AutoIndent Indent(P);
if (File.isObj()) {
- P.formatLine("Dumping section map is not supported for object files");
+ printStreamNotValidForObj();
return Error::success();
}
- ExitOnError Err("Error dumping section map: ");
-
if (!getPdb().hasPDBDbiStream()) {
- P.formatLine("Dumping the section map requires a DBI Stream, which could "
- "not be loaded");
+ printStreamNotPresent("DBI");
return Error::success();
}
+ AutoIndent Indent(P);
+ ExitOnError Err("Error dumping section map: ");
+
auto &Dbi = Err(getPdb().getPDBDbiStream());
uint32_t I = 0;
diff --git a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
index e7e9252f2fa9..9b3a85587bde 100644
--- a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
+++ b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
@@ -70,6 +70,9 @@ private:
PDBFile &getPdb();
object::COFFObjectFile &getObj();
+ void printStreamNotValidForObj();
+ void printStreamNotPresent(StringRef StreamName);
+
Error dumpFileSummary();
Error dumpStreamSummary();
Error dumpSymbolStats();
@@ -82,6 +85,9 @@ private:
Error dumpInlineeLines();
Error dumpXmi();
Error dumpXme();
+ Error dumpFpo();
+ Error dumpOldFpo(PDBFile &File);
+ Error dumpNewFpo(PDBFile &File);
Error dumpTpiStream(uint32_t StreamIdx);
Error dumpTypesFromObjectFile();
Error dumpModules();
diff --git a/contrib/llvm/tools/llvm-pdbutil/InputFile.cpp b/contrib/llvm/tools/llvm-pdbutil/InputFile.cpp
index 7b5af7e96920..8eb116cf0d80 100644
--- a/contrib/llvm/tools/llvm-pdbutil/InputFile.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/InputFile.cpp
@@ -41,6 +41,10 @@ getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
auto &Dbi = Err(File.getPDBDbiStream());
const auto &Modules = Dbi.modules();
+ if (Index >= Modules.getModuleCount())
+ return make_error<RawError>(raw_error_code::index_out_of_bounds,
+ "Invalid module index");
+
auto Modi = Modules.getModuleDescriptor(Index);
ModuleName = Modi.getModuleName();
@@ -112,10 +116,6 @@ static std::string formatChecksumKind(FileChecksumKind Kind) {
return formatUnknownEnum(Kind);
}
-static const DebugStringTableSubsectionRef &extractStringTable(PDBFile &File) {
- return cantFail(File.getStringTable()).getStringTable();
-}
-
template <typename... Args>
static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
if (Append)
@@ -164,8 +164,13 @@ void SymbolGroup::initializeForPdb(uint32_t Modi) {
// PDB always uses the same string table, but each module has its own
// checksums. So we only set the strings if they're not already set.
- if (!SC.hasStrings())
- SC.setStrings(extractStringTable(File->pdb()));
+ if (!SC.hasStrings()) {
+ auto StringTable = File->pdb().getStringTable();
+ if (StringTable)
+ SC.setStrings(StringTable->getStringTable());
+ else
+ consumeError(StringTable.takeError());
+ }
SC.resetChecksums();
auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
diff --git a/contrib/llvm/tools/llvm-pdbutil/InputFile.h b/contrib/llvm/tools/llvm-pdbutil/InputFile.h
index 552f3a3b2127..ee4e651c1e99 100644
--- a/contrib/llvm/tools/llvm-pdbutil/InputFile.h
+++ b/contrib/llvm/tools/llvm-pdbutil/InputFile.h
@@ -110,6 +110,8 @@ public:
const InputFile &getFile() const { return *File; }
InputFile &getFile() { return *File; }
+ bool hasDebugStream() const { return DebugStream != nullptr; }
+
private:
void initializeForPdb(uint32_t Modi);
void updatePdbModi(uint32_t Modi);
diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index f4e38a32a511..2c7b213b0a9f 100644
--- a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -296,6 +296,14 @@ static std::string formatRegisterId(RegisterId Id) {
return formatUnknownEnum(Id);
}
+static std::string formatRegisterId(uint16_t Reg16) {
+ return formatRegisterId(RegisterId(Reg16));
+}
+
+static std::string formatRegisterId(ulittle16_t &Reg16) {
+ return formatRegisterId(uint16_t(Reg16));
+}
+
static std::string formatRange(LocalVariableAddrRange Range) {
return formatv("[{0},+{1})",
formatSegmentOffset(Range.ISectStart, Range.OffsetStart),
@@ -482,6 +490,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
AutoIndent Indent(P, 7);
SourceLanguage Lang = static_cast<SourceLanguage>(
Compile2.Flags & CompileSym2Flags::SourceLanguageMask);
+ CompilationCPU = Compile2.Machine;
P.formatLine("machine = {0}, ver = {1}, language = {2}",
formatMachineType(Compile2.Machine), Compile2.Version,
formatSourceLanguage(Lang));
@@ -502,6 +511,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
AutoIndent Indent(P, 7);
SourceLanguage Lang = static_cast<SourceLanguage>(
Compile3.Flags & CompileSym3Flags::SourceLanguageMask);
+ CompilationCPU = Compile3.Machine;
P.formatLine("machine = {0}, Ver = {1}, language = {2}",
formatMachineType(Compile3.Machine), Compile3.Version,
formatSourceLanguage(Lang));
@@ -550,10 +560,11 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
DefRangeRegisterRelSym &Def) {
AutoIndent Indent(P, 7);
- P.formatLine("register = {0}, base ptr = {1}, offset in parent = {2}, has "
+ P.formatLine("register = {0}, offset = {1}, offset in parent = {2}, has "
"spilled udt = {3}",
- uint16_t(Def.Hdr.Register), int32_t(Def.Hdr.BasePointerOffset),
- Def.offsetInParent(), Def.hasSpilledUDTMember());
+ formatRegisterId(Def.Hdr.Register),
+ int32_t(Def.Hdr.BasePointerOffset), Def.offsetInParent(),
+ Def.hasSpilledUDTMember());
P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
formatGaps(P.getIndentLevel() + 9, Def.Gaps));
return Error::success();
@@ -564,8 +575,8 @@ Error MinimalSymbolDumper::visitKnownRecord(
AutoIndent Indent(P, 7);
P.formatLine("register = {0}, may have no name = {1}, range start = "
"{2}, length = {3}",
- uint16_t(DefRangeRegister.Hdr.Register),
- uint16_t(DefRangeRegister.Hdr.MayHaveNoName),
+ formatRegisterId(DefRangeRegister.Hdr.Register),
+ bool(DefRangeRegister.Hdr.MayHaveNoName),
formatSegmentOffset(DefRangeRegister.Range.ISectStart,
DefRangeRegister.Range.OffsetStart),
DefRangeRegister.Range.Range);
@@ -579,7 +590,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
AutoIndent Indent(P, 7);
bool NoName = !!(Def.Hdr.MayHaveNoName == 0);
P.formatLine("register = {0}, may have no name = {1}, offset in parent = {2}",
- uint16_t(Def.Hdr.Register), NoName,
+ formatRegisterId(Def.Hdr.Register), NoName,
uint32_t(Def.Hdr.OffsetInParent));
P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
formatGaps(P.getIndentLevel() + 9, Def.Gaps));
@@ -606,8 +617,8 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FC) {
AutoIndent Indent(P, 7);
P.formatLine("code offset = {0}, Register = {1}, kind = {2}, flags = {3}",
- FC.CodeOffset, FC.Register, formatCookieKind(FC.CookieKind),
- FC.Flags);
+ FC.CodeOffset, formatRegisterId(FC.Register),
+ formatCookieKind(FC.CookieKind), FC.Flags);
return Error::success();
}
@@ -620,6 +631,9 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FP) {
FP.BytesOfCalleeSavedRegisters,
formatSegmentOffset(FP.SectionIdOfExceptionHandler,
FP.OffsetOfExceptionHandler));
+ P.formatLine("local fp reg = {0}, param fp reg = {1}",
+ formatRegisterId(FP.getLocalFramePtrReg(CompilationCPU)),
+ formatRegisterId(FP.getParamFramePtrReg(CompilationCPU)));
P.formatLine("flags = {0}",
formatFrameProcedureOptions(P.getIndentLevel() + 9, FP.Flags));
return Error::success();
diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h
index 1c26a85a4eaf..033e193cee6c 100644
--- a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h
+++ b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h
@@ -53,6 +53,11 @@ private:
std::string idIndex(codeview::TypeIndex TI) const;
LinePrinter &P;
+
+ /// Dumping certain records requires knowing what machine this is. The
+ /// S_COMPILE3 record will tell us, but if we don't see one, default to X64.
+ codeview::CPUType CompilationCPU = codeview::CPUType::X64;
+
bool RecordBytes;
const SymbolGroup *SymGroup = nullptr;
codeview::LazyRandomTypeCollection &Ids;
@@ -61,4 +66,4 @@ private:
} // namespace pdb
} // namespace llvm
-#endif \ No newline at end of file
+#endif
diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
index 569bca7490fa..3f10e8ab8a1e 100644
--- a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
@@ -12,6 +12,7 @@
#include "FormatUtil.h"
#include "LinePrinter.h"
+#include "llvm-pdbutil.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -19,6 +20,7 @@
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
@@ -27,15 +29,37 @@ using namespace llvm::codeview;
using namespace llvm::pdb;
static std::string formatClassOptions(uint32_t IndentLevel,
- ClassOptions Options) {
+ ClassOptions Options, TpiStream *Stream,
+ TypeIndex CurrentTypeIndex) {
std::vector<std::string> Opts;
+
+ if (Stream && Stream->supportsTypeLookup() &&
+ !opts::dump::DontResolveForwardRefs &&
+ ((Options & ClassOptions::ForwardReference) != ClassOptions::None)) {
+ // If we're able to resolve forward references, do that.
+ Expected<TypeIndex> ETI =
+ Stream->findFullDeclForForwardRef(CurrentTypeIndex);
+ if (!ETI) {
+ consumeError(ETI.takeError());
+ PUSH_FLAG(ClassOptions, ForwardReference, Options, "forward ref (??\?)");
+ } else {
+ const char *Direction = (*ETI == CurrentTypeIndex)
+ ? "="
+ : ((*ETI < CurrentTypeIndex) ? "<-" : "->");
+ std::string Formatted =
+ formatv("forward ref ({0} {1})", Direction, *ETI).str();
+ PUSH_FLAG(ClassOptions, ForwardReference, Options, std::move(Formatted));
+ }
+ } else {
+ PUSH_FLAG(ClassOptions, ForwardReference, Options, "forward ref");
+ }
+
PUSH_FLAG(ClassOptions, HasConstructorOrDestructor, Options,
"has ctor / dtor");
PUSH_FLAG(ClassOptions, ContainsNestedClass, Options,
"contains nested class");
PUSH_FLAG(ClassOptions, HasConversionOperator, Options,
"conversion operator");
- PUSH_FLAG(ClassOptions, ForwardReference, Options, "forward ref");
PUSH_FLAG(ClassOptions, HasUniqueName, Options, "has unique name");
PUSH_FLAG(ClassOptions, Intrinsic, Options, "intrin");
PUSH_FLAG(ClassOptions, Nested, Options, "is nested");
@@ -194,6 +218,7 @@ static std::string formatFunctionOptions(FunctionOptions Options) {
}
Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
+ CurrentTypeIndex = Index;
// formatLine puts the newline at the beginning, so we use formatLine here
// to start a new line, and then individual visit methods use format to
// append to the existing line.
@@ -304,7 +329,8 @@ Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR,
P.formatLine("vtable: {0}, base list: {1}, field list: {2}",
Class.VTableShape, Class.DerivationList, Class.FieldList);
P.formatLine("options: {0}, sizeof {1}",
- formatClassOptions(P.getIndentLevel(), Class.Options),
+ formatClassOptions(P.getIndentLevel(), Class.Options, Stream,
+ CurrentTypeIndex),
Class.Size);
return Error::success();
}
@@ -316,7 +342,8 @@ Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR,
P.formatLine("unique name: `{0}`", Union.UniqueName);
P.formatLine("field list: {0}", Union.FieldList);
P.formatLine("options: {0}, sizeof {1}",
- formatClassOptions(P.getIndentLevel(), Union.Options),
+ formatClassOptions(P.getIndentLevel(), Union.Options, Stream,
+ CurrentTypeIndex),
Union.Size);
return Error::success();
}
@@ -328,7 +355,8 @@ Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR, EnumRecord &Enum) {
P.formatLine("field list: {0}, underlying type: {1}", Enum.FieldList,
Enum.UnderlyingType);
P.formatLine("options: {0}",
- formatClassOptions(P.getIndentLevel(), Enum.Options));
+ formatClassOptions(P.getIndentLevel(), Enum.Options, Stream,
+ CurrentTypeIndex));
return Error::success();
}
diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h
index 4227688f0f71..8f6bdc6110ae 100644
--- a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h
+++ b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h
@@ -20,15 +20,18 @@ class LazyRandomTypeCollection;
namespace pdb {
class LinePrinter;
+class TpiStream;
class MinimalTypeDumpVisitor : public codeview::TypeVisitorCallbacks {
public:
MinimalTypeDumpVisitor(LinePrinter &P, uint32_t Width, bool RecordBytes,
bool Hashes, codeview::LazyRandomTypeCollection &Types,
uint32_t NumHashBuckets,
- FixedStreamArray<support::ulittle32_t> HashValues)
+ FixedStreamArray<support::ulittle32_t> HashValues,
+ pdb::TpiStream *Stream)
: P(P), Width(Width), RecordBytes(RecordBytes), Hashes(Hashes),
- Types(Types), NumHashBuckets(NumHashBuckets), HashValues(HashValues) {}
+ Types(Types), NumHashBuckets(NumHashBuckets), HashValues(HashValues),
+ Stream(Stream) {}
Error visitTypeBegin(codeview::CVType &Record,
codeview::TypeIndex Index) override;
@@ -55,7 +58,9 @@ private:
bool Hashes = false;
codeview::LazyRandomTypeCollection &Types;
uint32_t NumHashBuckets;
+ codeview::TypeIndex CurrentTypeIndex;
FixedStreamArray<support::ulittle32_t> HashValues;
+ pdb::TpiStream *Stream = nullptr;
};
} // namespace pdb
} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp
index eb39708a27e9..3ea333608314 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp
@@ -110,6 +110,7 @@ void MappingTraits<PdbObject>::mapping(IO &IO, PdbObject &Obj) {
IO.mapOptional("DbiStream", Obj.DbiStream);
IO.mapOptional("TpiStream", Obj.TpiStream);
IO.mapOptional("IpiStream", Obj.IpiStream);
+ IO.mapOptional("PublicsStream", Obj.PublicsStream);
}
void MappingTraits<MSFHeaders>::mapping(IO &IO, MSFHeaders &Obj) {
@@ -163,6 +164,11 @@ void MappingTraits<PdbTpiStream>::mapping(IO &IO,
IO.mapRequired("Records", Obj.Records);
}
+void MappingTraits<PdbPublicsStream>::mapping(
+ IO &IO, pdb::yaml::PdbPublicsStream &Obj) {
+ IO.mapRequired("Records", Obj.PubSyms);
+}
+
void MappingTraits<NamedStreamMapping>::mapping(IO &IO,
NamedStreamMapping &Obj) {
IO.mapRequired("Name", Obj.StreamName);
diff --git a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h
index 91e054490a5f..97ba87266cc6 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h
+++ b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h
@@ -92,6 +92,10 @@ struct PdbTpiStream {
std::vector<CodeViewYAML::LeafRecord> Records;
};
+struct PdbPublicsStream {
+ std::vector<CodeViewYAML::SymbolRecord> PubSyms;
+};
+
struct PdbObject {
explicit PdbObject(BumpPtrAllocator &Allocator) : Allocator(Allocator) {}
@@ -102,6 +106,7 @@ struct PdbObject {
Optional<PdbDbiStream> DbiStream;
Optional<PdbTpiStream> TpiStream;
Optional<PdbTpiStream> IpiStream;
+ Optional<PdbPublicsStream> PublicsStream;
Optional<std::vector<StringRef>> StringTable;
@@ -118,6 +123,7 @@ LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::StreamBlockList)
LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbInfoStream)
LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbDbiStream)
LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbTpiStream)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbPublicsStream)
LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::NamedStreamMapping)
LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbModiStream)
LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbDbiModuleInfo)
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
index 651cb8b7649e..f009f53a3932 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
@@ -51,6 +51,13 @@ void ClassDefinitionDumper::prettyPrintClassIntro(const ClassLayout &Layout) {
uint32_t Size = Layout.getSize();
const PDBSymbolTypeUDT &Class = Layout.getClass();
+ if (Layout.getClass().isConstType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "const ";
+ if (Layout.getClass().isVolatileType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "volatile ";
+ if (Layout.getClass().isUnalignedType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "unaligned ";
+
WithColor(Printer, PDB_ColorItem::Keyword).get() << Class.getUdtKind() << " ";
WithColor(Printer, PDB_ColorItem::Type).get() << Class.getName();
WithColor(Printer, PDB_ColorItem::Comment).get() << " [sizeof = " << Size
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
index 0d99c9b1245c..94a0b2d5e780 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
@@ -28,6 +28,7 @@
#include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
#include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -216,3 +217,13 @@ void CompilandDumper::dump(const PDBSymbolUnknown &Symbol) {
Printer.NewLine();
Printer << "unknown (" << Symbol.getSymTag() << ")";
}
+
+void CompilandDumper::dump(const PDBSymbolUsingNamespace &Symbol) {
+ if (Printer.IsSymbolExcluded(Symbol.getName()))
+ return;
+
+ Printer.NewLine();
+ Printer << "using namespace ";
+ std::string Name = Symbol.getName();
+ WithColor(Printer, PDB_ColorItem::Identifier).get() << Name;
+}
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h b/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h
index cae196e9d134..1a840e49607c 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h
@@ -34,6 +34,7 @@ public:
void dump(const PDBSymbolThunk &Symbol) override;
void dump(const PDBSymbolTypeTypedef &Symbol) override;
void dump(const PDBSymbolUnknown &Symbol) override;
+ void dump(const PDBSymbolUsingNamespace &Symbol) override;
private:
LinePrinter &Printer;
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
index bf22e75e3949..f4cbd3f8fa14 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
@@ -23,6 +23,18 @@ using namespace llvm::pdb;
EnumDumper::EnumDumper(LinePrinter &P) : PDBSymDumper(true), Printer(P) {}
void EnumDumper::start(const PDBSymbolTypeEnum &Symbol) {
+ if (Symbol.getUnmodifiedTypeId() != 0) {
+ if (Symbol.isConstType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "const ";
+ if (Symbol.isVolatileType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "volatile ";
+ if (Symbol.isUnalignedType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "unaligned ";
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "enum ";
+ WithColor(Printer, PDB_ColorItem::Type).get() << Symbol.getName();
+ return;
+ }
+
WithColor(Printer, PDB_ColorItem::Keyword).get() << "enum ";
WithColor(Printer, PDB_ColorItem::Type).get() << Symbol.getName();
if (!opts::pretty::NoEnumDefs) {
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
index 177d8a009a2b..836ede41054e 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
@@ -53,7 +53,10 @@ FunctionDumper::FunctionDumper(LinePrinter &P)
void FunctionDumper::start(const PDBSymbolTypeFunctionSig &Symbol,
const char *Name, PointerType Pointer) {
auto ReturnType = Symbol.getReturnType();
- ReturnType->dump(*this);
+ if (!ReturnType)
+ Printer << "<unknown-type>";
+ else
+ ReturnType->dump(*this);
Printer << " ";
uint32_t ClassParentId = Symbol.getClassParentId();
auto ClassParent =
@@ -225,9 +228,10 @@ void FunctionDumper::dump(const PDBSymbolTypeFunctionArg &Symbol) {
// through to the real thing and dump it.
uint32_t TypeId = Symbol.getTypeId();
auto Type = Symbol.getSession().getSymbolById(TypeId);
- if (!Type)
- return;
- Type->dump(*this);
+ if (Type)
+ Printer << "<unknown-type>";
+ else
+ Type->dump(*this);
}
void FunctionDumper::dump(const PDBSymbolTypeTypedef &Symbol) {
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
index 663a608fe429..daf3cd45b327 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
@@ -13,13 +13,17 @@
#include "PrettyBuiltinDumper.h"
#include "PrettyClassDefinitionDumper.h"
#include "PrettyEnumDumper.h"
+#include "PrettyFunctionDumper.h"
#include "PrettyTypedefDumper.h"
#include "llvm-pdbutil.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/DebugInfo/PDB/UDTLayout.h"
@@ -128,36 +132,85 @@ filterAndSortClassDefs(LinePrinter &Printer, Enumerator &E,
}
if (Comp)
- llvm::sort(Filtered.begin(), Filtered.end(), Comp);
+ llvm::sort(Filtered, Comp);
return Filtered;
}
TypeDumper::TypeDumper(LinePrinter &P) : PDBSymDumper(true), Printer(P) {}
-void TypeDumper::start(const PDBSymbolExe &Exe) {
- if (opts::pretty::Enums) {
- if (auto Enums = Exe.findAllChildren<PDBSymbolTypeEnum>()) {
+template <typename T>
+static bool isTypeExcluded(LinePrinter &Printer, const T &Symbol) {
+ return false;
+}
+
+static bool isTypeExcluded(LinePrinter &Printer,
+ const PDBSymbolTypeEnum &Enum) {
+ if (Printer.IsTypeExcluded(Enum.getName(), Enum.getLength()))
+ return true;
+ // Dump member enums when dumping their class definition.
+ if (nullptr != Enum.getClassParent())
+ return true;
+ return false;
+}
+
+static bool isTypeExcluded(LinePrinter &Printer,
+ const PDBSymbolTypeTypedef &Typedef) {
+ return Printer.IsTypeExcluded(Typedef.getName(), Typedef.getLength());
+}
+
+template <typename SymbolT>
+static void dumpSymbolCategory(LinePrinter &Printer, const PDBSymbolExe &Exe,
+ TypeDumper &TD, StringRef Label) {
+ if (auto Children = Exe.findAllChildren<SymbolT>()) {
+ Printer.NewLine();
+ WithColor(Printer, PDB_ColorItem::Identifier).get() << Label;
+ Printer << ": (" << Children->getChildCount() << " items)";
+ Printer.Indent();
+ while (auto Child = Children->getNext()) {
+ if (isTypeExcluded(Printer, *Child))
+ continue;
+
Printer.NewLine();
- WithColor(Printer, PDB_ColorItem::Identifier).get() << "Enums";
- Printer << ": (" << Enums->getChildCount() << " items)";
- Printer.Indent();
- while (auto Enum = Enums->getNext())
- Enum->dump(*this);
- Printer.Unindent();
+ Child->dump(TD);
}
+ Printer.Unindent();
}
+}
- if (opts::pretty::Typedefs) {
- if (auto Typedefs = Exe.findAllChildren<PDBSymbolTypeTypedef>()) {
- Printer.NewLine();
- WithColor(Printer, PDB_ColorItem::Identifier).get() << "Typedefs";
- Printer << ": (" << Typedefs->getChildCount() << " items)";
- Printer.Indent();
- while (auto Typedef = Typedefs->getNext())
- Typedef->dump(*this);
- Printer.Unindent();
- }
+static void printClassDecl(LinePrinter &Printer,
+ const PDBSymbolTypeUDT &Class) {
+ if (Class.getUnmodifiedTypeId() != 0) {
+ if (Class.isConstType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "const ";
+ if (Class.isVolatileType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "volatile ";
+ if (Class.isUnalignedType())
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << "unaligned ";
}
+ WithColor(Printer, PDB_ColorItem::Keyword).get() << Class.getUdtKind() << " ";
+ WithColor(Printer, PDB_ColorItem::Type).get() << Class.getName();
+}
+
+void TypeDumper::start(const PDBSymbolExe &Exe) {
+ if (opts::pretty::Enums)
+ dumpSymbolCategory<PDBSymbolTypeEnum>(Printer, Exe, *this, "Enums");
+
+ if (opts::pretty::Funcsigs)
+ dumpSymbolCategory<PDBSymbolTypeFunctionSig>(Printer, Exe, *this,
+ "Function Signatures");
+
+ if (opts::pretty::Typedefs)
+ dumpSymbolCategory<PDBSymbolTypeTypedef>(Printer, Exe, *this, "Typedefs");
+
+ if (opts::pretty::Arrays)
+ dumpSymbolCategory<PDBSymbolTypeArray>(Printer, Exe, *this, "Arrays");
+
+ if (opts::pretty::Pointers)
+ dumpSymbolCategory<PDBSymbolTypePointer>(Printer, Exe, *this, "Pointers");
+
+ if (opts::pretty::VTShapes)
+ dumpSymbolCategory<PDBSymbolTypeVTableShape>(Printer, Exe, *this,
+ "VFTable Shapes");
if (opts::pretty::Classes) {
if (auto Classes = Exe.findAllChildren<PDBSymbolTypeUDT>()) {
@@ -196,11 +249,16 @@ void TypeDumper::start(const PDBSymbolExe &Exe) {
dumpClassLayout(*Class);
} else {
while (auto Class = Classes->getNext()) {
- if (Class->getUnmodifiedTypeId() != 0)
+ if (Printer.IsTypeExcluded(Class->getName(), Class->getLength()))
continue;
- if (Printer.IsTypeExcluded(Class->getName(), Class->getLength()))
+ // No point duplicating a full class layout. Just print the modified
+ // declaration and continue.
+ if (Class->getUnmodifiedTypeId() != 0) {
+ Printer.NewLine();
+ printClassDecl(Printer, *Class);
continue;
+ }
auto Layout = llvm::make_unique<ClassLayout>(std::move(Class));
if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold)
@@ -218,35 +276,83 @@ void TypeDumper::start(const PDBSymbolExe &Exe) {
void TypeDumper::dump(const PDBSymbolTypeEnum &Symbol) {
assert(opts::pretty::Enums);
- if (Printer.IsTypeExcluded(Symbol.getName(), Symbol.getLength()))
- return;
- // Dump member enums when dumping their class definition.
- if (nullptr != Symbol.getClassParent())
- return;
-
- Printer.NewLine();
EnumDumper Dumper(Printer);
Dumper.start(Symbol);
}
+void TypeDumper::dump(const PDBSymbolTypeBuiltin &Symbol) {
+ BuiltinDumper BD(Printer);
+ BD.start(Symbol);
+}
+
+void TypeDumper::dump(const PDBSymbolTypeUDT &Symbol) {
+ printClassDecl(Printer, Symbol);
+}
+
void TypeDumper::dump(const PDBSymbolTypeTypedef &Symbol) {
assert(opts::pretty::Typedefs);
- if (Printer.IsTypeExcluded(Symbol.getName(), Symbol.getLength()))
- return;
-
- Printer.NewLine();
TypedefDumper Dumper(Printer);
Dumper.start(Symbol);
}
+void TypeDumper::dump(const PDBSymbolTypeArray &Symbol) {
+ auto ElementType = Symbol.getElementType();
+
+ ElementType->dump(*this);
+ Printer << "[";
+ WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Symbol.getCount();
+ Printer << "]";
+}
+
+void TypeDumper::dump(const PDBSymbolTypeFunctionSig &Symbol) {
+ FunctionDumper Dumper(Printer);
+ Dumper.start(Symbol, nullptr, FunctionDumper::PointerType::None);
+}
+
+void TypeDumper::dump(const PDBSymbolTypePointer &Symbol) {
+ std::unique_ptr<PDBSymbol> P = Symbol.getPointeeType();
+
+ if (auto *FS = dyn_cast<PDBSymbolTypeFunctionSig>(P.get())) {
+ FunctionDumper Dumper(Printer);
+ FunctionDumper::PointerType PT =
+ Symbol.isReference() ? FunctionDumper::PointerType::Reference
+ : FunctionDumper::PointerType::Pointer;
+ Dumper.start(*FS, nullptr, PT);
+ return;
+ }
+
+ if (auto *UDT = dyn_cast<PDBSymbolTypeUDT>(P.get())) {
+ printClassDecl(Printer, *UDT);
+ } else if (P) {
+ P->dump(*this);
+ }
+
+ if (auto Parent = Symbol.getClassParent()) {
+ auto UDT = llvm::unique_dyn_cast<PDBSymbolTypeUDT>(std::move(Parent));
+ if (UDT)
+ Printer << " " << UDT->getName() << "::";
+ }
+
+ if (Symbol.isReference())
+ Printer << "&";
+ else if (Symbol.isRValueReference())
+ Printer << "&&";
+ else
+ Printer << "*";
+}
+
+void TypeDumper::dump(const PDBSymbolTypeVTableShape &Symbol) {
+ Printer.format("<vtshape ({0} methods)>", Symbol.getCount());
+}
+
void TypeDumper::dumpClassLayout(const ClassLayout &Class) {
assert(opts::pretty::Classes);
if (opts::pretty::ClassFormat == opts::pretty::ClassDefinitionFormat::None) {
- Printer.NewLine();
- WithColor(Printer, PDB_ColorItem::Keyword).get() << "class ";
- WithColor(Printer, PDB_ColorItem::Identifier).get() << Class.getName();
+ WithColor(Printer, PDB_ColorItem::Keyword).get()
+ << Class.getClass().getUdtKind() << " ";
+ WithColor(Printer, PDB_ColorItem::Type).get() << Class.getName();
} else {
ClassDefinitionDumper Dumper(Printer);
Dumper.start(Class);
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h b/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h
index 68a2f0246eba..36e586fea7e3 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h
@@ -25,6 +25,12 @@ public:
void dump(const PDBSymbolTypeEnum &Symbol) override;
void dump(const PDBSymbolTypeTypedef &Symbol) override;
+ void dump(const PDBSymbolTypeFunctionSig &Symbol) override;
+ void dump(const PDBSymbolTypeArray &Symbol) override;
+ void dump(const PDBSymbolTypeBuiltin &Symbol) override;
+ void dump(const PDBSymbolTypePointer &Symbol) override;
+ void dump(const PDBSymbolTypeVTableShape &Symbol) override;
+ void dump(const PDBSymbolTypeUDT &Symbol) override;
void dumpClassLayout(const ClassLayout &Class);
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
index 65443d6bca90..2b3f3691ed98 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
@@ -12,6 +12,7 @@
#include "LinePrinter.h"
#include "PrettyBuiltinDumper.h"
#include "PrettyFunctionDumper.h"
+#include "PrettyTypeDumper.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
@@ -35,7 +36,10 @@ void TypedefDumper::start(const PDBSymbolTypeTypedef &Symbol) {
<< Symbol.getName();
}
-void TypedefDumper::dump(const PDBSymbolTypeArray &Symbol) {}
+void TypedefDumper::dump(const PDBSymbolTypeArray &Symbol) {
+ TypeDumper Dumper(Printer);
+ Dumper.dump(Symbol);
+}
void TypedefDumper::dump(const PDBSymbolTypeBuiltin &Symbol) {
BuiltinDumper Dumper(Printer);
diff --git a/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp b/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp
index a7afbf1242c5..62b5c428d410 100644
--- a/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp
@@ -18,10 +18,13 @@
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
using namespace llvm;
@@ -68,6 +71,9 @@ Error YAMLOutputStyle::dump() {
if (auto EC = dumpIpiStream())
return EC;
+ if (auto EC = dumpPublics())
+ return EC;
+
flush();
return Error::success();
}
@@ -191,6 +197,9 @@ Error YAMLOutputStyle::dumpDbiStream() {
if (!opts::pdb2yaml::DbiStream)
return Error::success();
+ if (!File.hasPDBDbiStream())
+ return Error::success();
+
auto DbiS = File.getPDBDbiStream();
if (!DbiS)
return DbiS.takeError();
@@ -323,6 +332,42 @@ Error YAMLOutputStyle::dumpIpiStream() {
return Error::success();
}
+Error YAMLOutputStyle::dumpPublics() {
+ if (!opts::pdb2yaml::PublicsStream)
+ return Error::success();
+
+ Obj.PublicsStream.emplace();
+ auto ExpectedPublics = File.getPDBPublicsStream();
+ if (!ExpectedPublics) {
+ llvm::consumeError(ExpectedPublics.takeError());
+ return Error::success();
+ }
+
+ PublicsStream &Publics = *ExpectedPublics;
+ const GSIHashTable &PublicsTable = Publics.getPublicsTable();
+
+ auto ExpectedSyms = File.getPDBSymbolStream();
+ if (!ExpectedSyms) {
+ llvm::consumeError(ExpectedSyms.takeError());
+ return Error::success();
+ }
+
+ BinaryStreamRef SymStream =
+ ExpectedSyms->getSymbolArray().getUnderlyingStream();
+ for (uint32_t PubSymOff : PublicsTable) {
+ Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, PubSymOff);
+ if (!Sym)
+ return Sym.takeError();
+ auto ES = CodeViewYAML::SymbolRecord::fromCodeViewSymbol(*Sym);
+ if (!ES)
+ return ES.takeError();
+
+ Obj.PublicsStream->PubSyms.push_back(*ES);
+ }
+
+ return Error::success();
+}
+
void YAMLOutputStyle::flush() {
Out << Obj;
outs().flush();
diff --git a/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h b/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h
index 3690e3529d4a..a5ad3355d2ab 100644
--- a/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h
+++ b/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h
@@ -35,6 +35,7 @@ private:
Error dumpDbiStream();
Error dumpTpiStream();
Error dumpIpiStream();
+ Error dumpPublics();
void flush();
diff --git a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 5b0d21f83db7..76f61a2a95a7 100644
--- a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -13,7 +13,6 @@
#include "llvm-pdbutil.h"
-#include "Analyze.h"
#include "BytesOutputStyle.h"
#include "DumpOutputStyle.h"
#include "ExplainOutputStyle.h"
@@ -46,7 +45,6 @@
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBInjectedSource.h"
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
@@ -71,6 +69,8 @@
#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/Support/BinaryByteStream.h"
@@ -102,6 +102,9 @@ namespace opts {
cl::SubCommand DumpSubcommand("dump", "Dump MSF and CodeView debug info");
cl::SubCommand BytesSubcommand("bytes", "Dump raw bytes from the PDB file");
+cl::SubCommand DiaDumpSubcommand("diadump",
+ "Dump debug information using a DIA-like API");
+
cl::SubCommand
PrettySubcommand("pretty",
"Dump semantic information about types and symbols");
@@ -113,10 +116,6 @@ cl::SubCommand
PdbToYamlSubcommand("pdb2yaml",
"Generate a detailed YAML description of a PDB File");
-cl::SubCommand
- AnalyzeSubcommand("analyze",
- "Analyze various aspects of a PDB's structure");
-
cl::SubCommand MergeSubcommand("merge",
"Merge multiple PDBs into a single PDB");
@@ -155,6 +154,48 @@ cl::ValuesClass ChunkValues = cl::values(
"Any subsection not covered by another option"),
clEnumValN(ModuleSubsection::All, "all", "All known subsections"));
+namespace diadump {
+cl::list<std::string> InputFilenames(cl::Positional,
+ cl::desc("<input PDB files>"),
+ cl::OneOrMore, cl::sub(DiaDumpSubcommand));
+
+cl::opt<bool> Native("native", cl::desc("Use native PDB reader instead of DIA"),
+ cl::sub(DiaDumpSubcommand));
+
+static cl::opt<bool>
+ ShowClassHierarchy("hierarchy", cl::desc("Show lexical and class parents"),
+ cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> NoSymIndexIds(
+ "no-ids",
+ cl::desc("Don't show any SymIndexId fields (overrides -hierarchy)"),
+ cl::sub(DiaDumpSubcommand));
+
+static cl::opt<bool>
+ Recurse("recurse",
+ cl::desc("When dumping a SymIndexId, dump the full details of the "
+ "corresponding record"),
+ cl::sub(DiaDumpSubcommand));
+
+static cl::opt<bool> Enums("enums", cl::desc("Dump enum types"),
+ cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Pointers("pointers", cl::desc("Dump enum types"),
+ cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> UDTs("udts", cl::desc("Dump udt types"),
+ cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Compilands("compilands",
+ cl::desc("Dump compiland information"),
+ cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Funcsigs("funcsigs",
+ cl::desc("Dump function signature information"),
+ cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Arrays("arrays", cl::desc("Dump array types"),
+ cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> VTShapes("vtshapes", cl::desc("Dump virtual table shapes"),
+ cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Typedefs("typedefs", cl::desc("Dump typedefs"),
+ cl::sub(DiaDumpSubcommand));
+} // namespace diadump
+
namespace pretty {
cl::list<std::string> InputFilenames(cl::Positional,
cl::desc("<input PDB files>"),
@@ -201,6 +242,15 @@ cl::opt<bool> Enums("enums", cl::desc("Display enum types"),
cl::cat(TypeCategory), cl::sub(PrettySubcommand));
cl::opt<bool> Typedefs("typedefs", cl::desc("Display typedef types"),
cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+cl::opt<bool> Funcsigs("funcsigs", cl::desc("Display function signatures"),
+ cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+cl::opt<bool> Pointers("pointers", cl::desc("Display pointer types"),
+ cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+cl::opt<bool> Arrays("arrays", cl::desc("Display arrays"),
+ cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+cl::opt<bool> VTShapes("vtshapes", cl::desc("Display vftable shapes"),
+ cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+
cl::opt<SymbolSortMode> SymbolOrder(
"symbol-order", cl::desc("symbol sort order"),
cl::init(SymbolSortMode::None),
@@ -432,6 +482,12 @@ cl::opt<bool> DumpTypeExtras("type-extras",
cl::desc("dump type hashes and index offsets"),
cl::cat(TypeOptions), cl::sub(DumpSubcommand));
+cl::opt<bool> DontResolveForwardRefs(
+ "dont-resolve-forward-refs",
+ cl::desc("When dumping type records for classes, unions, enums, and "
+ "structs, don't try to resolve forward references"),
+ cl::cat(TypeOptions), cl::sub(DumpSubcommand));
+
cl::list<uint32_t> DumpTypeIndex(
"type-index", cl::ZeroOrMore, cl::CommaSeparated,
cl::desc("only dump types with the specified hexadecimal type index"),
@@ -465,6 +521,11 @@ cl::opt<bool> DumpGlobals("globals", cl::desc("dump Globals symbol records"),
cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
cl::opt<bool> DumpGlobalExtras("global-extras", cl::desc("dump Globals hashes"),
cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::list<std::string> DumpGlobalNames(
+ "global-name",
+ cl::desc(
+ "With -globals, only dump globals whose name matches the given value"),
+ cl::cat(SymbolOptions), cl::sub(DumpSubcommand), cl::ZeroOrMore);
cl::opt<bool> DumpPublics("publics", cl::desc("dump Publics stream data"),
cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
cl::opt<bool> DumpPublicExtras("public-extras",
@@ -482,6 +543,9 @@ cl::opt<bool>
cl::desc("dump CodeView symbol record raw bytes"),
cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::opt<bool> DumpFpo("fpo", cl::desc("dump FPO records"),
+ cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+
// MODULE & FILE OPTIONS
cl::opt<bool> DumpModules("modules", cl::desc("dump compiland information"),
cl::cat(FileOptions), cl::sub(DumpSubcommand));
@@ -594,6 +658,10 @@ cl::opt<bool> IpiStream("ipi-stream",
cl::desc("Dump the IPI Stream (Stream 5)"),
cl::sub(PdbToYamlSubcommand), cl::init(false));
+cl::opt<bool> PublicsStream("publics-stream",
+ cl::desc("Dump the Publics Stream"),
+ cl::sub(PdbToYamlSubcommand), cl::init(false));
+
// MODULE & FILE OPTIONS
cl::opt<bool> DumpModules("modules", cl::desc("dump compiland information"),
cl::cat(FileOptions), cl::sub(PdbToYamlSubcommand));
@@ -613,14 +681,6 @@ cl::list<std::string> InputFilename(cl::Positional,
cl::sub(PdbToYamlSubcommand));
} // namespace pdb2yaml
-namespace analyze {
-cl::opt<bool> StringTable("hash-collisions", cl::desc("Find hash collisions"),
- cl::sub(AnalyzeSubcommand), cl::init(false));
-cl::list<std::string> InputFilename(cl::Positional,
- cl::desc("<input PDB file>"), cl::Required,
- cl::sub(AnalyzeSubcommand));
-}
-
namespace merge {
cl::list<std::string> InputFilenames(cl::Positional,
cl::desc("<input PDB files>"),
@@ -681,7 +741,7 @@ static void yamlToPdb(StringRef Path) {
/*RequiresNullTerminator=*/false);
if (ErrorOrBuffer.getError()) {
- ExitOnErr(make_error<GenericError>(generic_error_code::invalid_path, Path));
+ ExitOnErr(createFileError(Path, errorCodeToError(ErrorOrBuffer.getError())));
}
std::unique_ptr<MemoryBuffer> &Buffer = ErrorOrBuffer.get();
@@ -781,7 +841,8 @@ static void yamlToPdb(StringRef Path) {
Builder.getStringTableBuilder().setStrings(*Strings.strings());
- ExitOnErr(Builder.commit(opts::yaml2pdb::YamlPdbOutputFile));
+ codeview::GUID IgnoredOutGuid;
+ ExitOnErr(Builder.commit(opts::yaml2pdb::YamlPdbOutputFile, &IgnoredOutGuid));
}
static PDBFile &loadPDB(StringRef Path, std::unique_ptr<IPDBSession> &Session) {
@@ -817,14 +878,6 @@ static void dumpBytes(StringRef Path) {
ExitOnErr(O->dump());
}
-static void dumpAnalysis(StringRef Path) {
- std::unique_ptr<IPDBSession> Session;
- auto &File = loadPDB(Path, Session);
- auto O = llvm::make_unique<AnalysisStyle>(File);
-
- ExitOnErr(O->dump());
-}
-
bool opts::pretty::shouldDumpSymLevel(SymLevel Search) {
if (SymTypes.empty())
return true;
@@ -924,6 +977,69 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
}
}
+template <typename OuterT, typename ChildT>
+void diaDumpChildren(PDBSymbol &Outer, PdbSymbolIdField Ids,
+ PdbSymbolIdField Recurse) {
+ OuterT *ConcreteOuter = dyn_cast<OuterT>(&Outer);
+ if (!ConcreteOuter)
+ return;
+
+ auto Children = ConcreteOuter->template findAllChildren<ChildT>();
+ while (auto Child = Children->getNext()) {
+ outs() << " {";
+ Child->defaultDump(outs(), 4, Ids, Recurse);
+ outs() << "\n }\n";
+ }
+}
+
+static void dumpDia(StringRef Path) {
+ std::unique_ptr<IPDBSession> Session;
+
+ const auto ReaderType =
+ opts::diadump::Native ? PDB_ReaderType::Native : PDB_ReaderType::DIA;
+ ExitOnErr(loadDataForPDB(ReaderType, Path, Session));
+
+ auto GlobalScope = Session->getGlobalScope();
+
+ std::vector<PDB_SymType> SymTypes;
+
+ if (opts::diadump::Compilands)
+ SymTypes.push_back(PDB_SymType::Compiland);
+ if (opts::diadump::Enums)
+ SymTypes.push_back(PDB_SymType::Enum);
+ if (opts::diadump::Pointers)
+ SymTypes.push_back(PDB_SymType::PointerType);
+ if (opts::diadump::UDTs)
+ SymTypes.push_back(PDB_SymType::UDT);
+ if (opts::diadump::Funcsigs)
+ SymTypes.push_back(PDB_SymType::FunctionSig);
+ if (opts::diadump::Arrays)
+ SymTypes.push_back(PDB_SymType::ArrayType);
+ if (opts::diadump::VTShapes)
+ SymTypes.push_back(PDB_SymType::VTableShape);
+ if (opts::diadump::Typedefs)
+ SymTypes.push_back(PDB_SymType::Typedef);
+ PdbSymbolIdField Ids = opts::diadump::NoSymIndexIds ? PdbSymbolIdField::None
+ : PdbSymbolIdField::All;
+
+ PdbSymbolIdField Recurse = PdbSymbolIdField::None;
+ if (opts::diadump::Recurse)
+ Recurse = PdbSymbolIdField::All;
+ if (!opts::diadump::ShowClassHierarchy)
+ Ids &= ~(PdbSymbolIdField::ClassParent | PdbSymbolIdField::LexicalParent);
+
+ for (PDB_SymType ST : SymTypes) {
+ auto Children = GlobalScope->findAllChildren(ST);
+ while (auto Child = Children->getNext()) {
+ outs() << "{";
+ Child->defaultDump(outs(), 2, Ids, Recurse);
+
+ diaDumpChildren<PDBSymbolTypeEnum, PDBSymbolData>(*Child, Ids, Recurse);
+ outs() << "\n}\n";
+ }
+ }
+}
+
static void dumpPretty(StringRef Path) {
std::unique_ptr<IPDBSession> Session;
@@ -1055,7 +1171,9 @@ static void dumpPretty(StringRef Path) {
Printer.NewLine();
WithColor(Printer, PDB_ColorItem::SectionHeader).get()
<< "---COMPILANDS---";
- if (auto Compilands = GlobalScope->findAllChildren<PDBSymbolCompiland>()) {
+ auto Compilands = GlobalScope->findAllChildren<PDBSymbolCompiland>();
+
+ if (Compilands) {
Printer.Indent();
CompilandDumper Dumper(Printer);
CompilandDumpFlags options = CompilandDumper::Flags::None;
@@ -1067,7 +1185,9 @@ static void dumpPretty(StringRef Path) {
}
}
- if (opts::pretty::Classes || opts::pretty::Enums || opts::pretty::Typedefs) {
+ if (opts::pretty::Classes || opts::pretty::Enums || opts::pretty::Typedefs ||
+ opts::pretty::Funcsigs || opts::pretty::Pointers ||
+ opts::pretty::Arrays || opts::pretty::VTShapes) {
Printer.NewLine();
WithColor(Printer, PDB_ColorItem::SectionHeader).get() << "---TYPES---";
Printer.Indent();
@@ -1104,8 +1224,7 @@ static void dumpPretty(StringRef Path) {
std::vector<std::unique_ptr<PDBSymbolFunc>> Funcs;
while (auto Func = Functions->getNext())
Funcs.push_back(std::move(Func));
- llvm::sort(Funcs.begin(), Funcs.end(),
- opts::pretty::compareFunctionSymbols);
+ llvm::sort(Funcs, opts::pretty::compareFunctionSymbols);
for (const auto &Func : Funcs) {
Printer.NewLine();
Dumper.start(*Func, FunctionDumper::PointerType::None);
@@ -1123,8 +1242,7 @@ static void dumpPretty(StringRef Path) {
std::vector<std::unique_ptr<PDBSymbolData>> Datas;
while (auto Var = Vars->getNext())
Datas.push_back(std::move(Var));
- llvm::sort(Datas.begin(), Datas.end(),
- opts::pretty::compareDataSymbols);
+ llvm::sort(Datas, opts::pretty::compareDataSymbols);
for (const auto &Var : Datas)
Dumper.start(*Var);
}
@@ -1162,6 +1280,7 @@ static void dumpPretty(StringRef Path) {
dumpInjectedSources(Printer, *Session);
}
+ Printer.NewLine();
outs().flush();
}
@@ -1211,7 +1330,9 @@ static void mergePdbs() {
OutFile = opts::merge::InputFilenames[0];
llvm::sys::path::replace_extension(OutFile, "merged.pdb");
}
- ExitOnErr(Builder.commit(OutFile));
+
+ codeview::GUID IgnoredOutGuid;
+ ExitOnErr(Builder.commit(OutFile, &IgnoredOutGuid));
}
static void explain() {
@@ -1323,6 +1444,7 @@ int main(int Argc, const char **Argv) {
if (opts::DumpSubcommand) {
if (opts::dump::RawAll) {
opts::dump::DumpGlobals = true;
+ opts::dump::DumpFpo = true;
opts::dump::DumpInlineeLines = true;
opts::dump::DumpIds = true;
opts::dump::DumpIdExtras = true;
@@ -1356,6 +1478,7 @@ int main(int Argc, const char **Argv) {
opts::pdb2yaml::DbiStream = true;
opts::pdb2yaml::TpiStream = true;
opts::pdb2yaml::IpiStream = true;
+ opts::pdb2yaml::PublicsStream = true;
opts::pdb2yaml::DumpModules = true;
opts::pdb2yaml::DumpModuleFiles = true;
opts::pdb2yaml::DumpModuleSyms = true;
@@ -1382,8 +1505,8 @@ int main(int Argc, const char **Argv) {
opts::yaml2pdb::YamlPdbOutputFile = OutputFilename.str();
}
yamlToPdb(opts::yaml2pdb::InputFilename);
- } else if (opts::AnalyzeSubcommand) {
- dumpAnalysis(opts::analyze::InputFilename.front());
+ } else if (opts::DiaDumpSubcommand) {
+ llvm::for_each(opts::diadump::InputFilenames, dumpDia);
} else if (opts::PrettySubcommand) {
if (opts::pretty::Lines)
opts::pretty::Compilands = true;
@@ -1401,6 +1524,8 @@ int main(int Argc, const char **Argv) {
opts::pretty::Classes = true;
opts::pretty::Typedefs = true;
opts::pretty::Enums = true;
+ opts::pretty::Pointers = true;
+ opts::pretty::Funcsigs = true;
}
// When adding filters for excluded compilands and types, we need to
diff --git a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
index 7496adaeb62f..a57cc51d7fd7 100644
--- a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
+++ b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -82,7 +82,11 @@ extern llvm::cl::opt<bool> Symbols;
extern llvm::cl::opt<bool> Globals;
extern llvm::cl::opt<bool> Classes;
extern llvm::cl::opt<bool> Enums;
+extern llvm::cl::opt<bool> Funcsigs;
+extern llvm::cl::opt<bool> Arrays;
extern llvm::cl::opt<bool> Typedefs;
+extern llvm::cl::opt<bool> Pointers;
+extern llvm::cl::opt<bool> VTShapes;
extern llvm::cl::opt<bool> All;
extern llvm::cl::opt<bool> ExcludeCompilerGenerated;
@@ -160,10 +164,12 @@ extern llvm::cl::opt<bool> DumpIdExtras;
extern llvm::cl::list<uint32_t> DumpIdIndex;
extern llvm::cl::opt<uint32_t> DumpModi;
extern llvm::cl::opt<bool> JustMyCode;
+extern llvm::cl::opt<bool> DontResolveForwardRefs;
extern llvm::cl::opt<bool> DumpSymbols;
extern llvm::cl::opt<bool> DumpSymRecordBytes;
extern llvm::cl::opt<bool> DumpGSIRecords;
extern llvm::cl::opt<bool> DumpGlobals;
+extern llvm::cl::list<std::string> DumpGlobalNames;
extern llvm::cl::opt<bool> DumpGlobalExtras;
extern llvm::cl::opt<bool> DumpPublics;
extern llvm::cl::opt<bool> DumpPublicExtras;
@@ -171,6 +177,7 @@ extern llvm::cl::opt<bool> DumpSectionContribs;
extern llvm::cl::opt<bool> DumpSectionMap;
extern llvm::cl::opt<bool> DumpModules;
extern llvm::cl::opt<bool> DumpModuleFiles;
+extern llvm::cl::opt<bool> DumpFpo;
extern llvm::cl::opt<bool> RawAll;
}
@@ -185,6 +192,7 @@ extern llvm::cl::opt<bool> PdbStream;
extern llvm::cl::opt<bool> DbiStream;
extern llvm::cl::opt<bool> TpiStream;
extern llvm::cl::opt<bool> IpiStream;
+extern llvm::cl::opt<bool> PublicsStream;
extern llvm::cl::list<std::string> InputFilename;
extern llvm::cl::opt<bool> DumpModules;
extern llvm::cl::opt<bool> DumpModuleFiles;
diff --git a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 1a0b9e127bbc..c25cbc2b64df 100644
--- a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -123,6 +123,47 @@ static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
}
}
+namespace {
+/// A remapper from original symbol names to new symbol names based on a file
+/// containing a list of mappings from old name to new name.
+class SymbolRemapper {
+ std::unique_ptr<MemoryBuffer> File;
+ DenseMap<StringRef, StringRef> RemappingTable;
+
+public:
+ /// Build a SymbolRemapper from a file containing a list of old/new symbols.
+ static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
+ auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
+ if (!BufOrError)
+ exitWithErrorCode(BufOrError.getError(), InputFile);
+
+ auto Remapper = llvm::make_unique<SymbolRemapper>();
+ Remapper->File = std::move(BufOrError.get());
+
+ for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
+ !LineIt.is_at_eof(); ++LineIt) {
+ std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
+ if (Parts.first.empty() || Parts.second.empty() ||
+ Parts.second.count(' ')) {
+ exitWithError("unexpected line in remapping file",
+ (InputFile + ":" + Twine(LineIt.line_number())).str(),
+ "expected 'old_symbol new_symbol'");
+ }
+ Remapper->RemappingTable.insert(Parts);
+ }
+ return Remapper;
+ }
+
+ /// Attempt to map the given old symbol into a new symbol.
+ ///
+ /// \return The new symbol, or \p Name if no such symbol was found.
+ StringRef operator()(StringRef Name) {
+ StringRef New = RemappingTable.lookup(Name);
+ return New.empty() ? Name : New;
+ }
+};
+}
+
struct WeightedFile {
std::string Filename;
uint64_t Weight;
@@ -161,7 +202,8 @@ static bool isFatalError(instrprof_error IPE) {
}
/// Load an input into a writer context.
-static void loadInput(const WeightedFile &Input, WriterContext *WC) {
+static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
+ WriterContext *WC) {
std::unique_lock<std::mutex> CtxGuard{WC->Lock};
// If there's a pending hard error, don't do more work.
@@ -192,6 +234,8 @@ static void loadInput(const WeightedFile &Input, WriterContext *WC) {
}
for (auto &I : *Reader) {
+ if (Remapper)
+ I.Name = (*Remapper)(I.Name);
const StringRef FuncName = I.Name;
bool Reported = false;
WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
@@ -236,6 +280,7 @@ static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
}
static void mergeInstrProfile(const WeightedFileVector &Inputs,
+ SymbolRemapper *Remapper,
StringRef OutputFilename,
ProfileFormat OutputFormat, bool OutputSparse,
unsigned NumThreads) {
@@ -267,14 +312,14 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
if (NumThreads == 1) {
for (const auto &Input : Inputs)
- loadInput(Input, Contexts[0].get());
+ loadInput(Input, Remapper, Contexts[0].get());
} else {
ThreadPool Pool(NumThreads);
// Load the inputs in parallel (N/NumThreads serial steps).
unsigned Ctx = 0;
for (const auto &Input : Inputs) {
- Pool.async(loadInput, Input, Contexts[Ctx].get());
+ Pool.async(loadInput, Input, Remapper, Contexts[Ctx].get());
Ctx = (Ctx + 1) % NumThreads;
}
Pool.wait();
@@ -322,11 +367,43 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
}
}
+/// Make a copy of the given function samples with all symbol names remapped
+/// by the provided symbol remapper.
+static sampleprof::FunctionSamples
+remapSamples(const sampleprof::FunctionSamples &Samples,
+ SymbolRemapper &Remapper, sampleprof_error &Error) {
+ sampleprof::FunctionSamples Result;
+ Result.setName(Remapper(Samples.getName()));
+ Result.addTotalSamples(Samples.getTotalSamples());
+ Result.addHeadSamples(Samples.getHeadSamples());
+ for (const auto &BodySample : Samples.getBodySamples()) {
+ Result.addBodySamples(BodySample.first.LineOffset,
+ BodySample.first.Discriminator,
+ BodySample.second.getSamples());
+ for (const auto &Target : BodySample.second.getCallTargets()) {
+ Result.addCalledTargetSamples(BodySample.first.LineOffset,
+ BodySample.first.Discriminator,
+ Remapper(Target.first()), Target.second);
+ }
+ }
+ for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
+ sampleprof::FunctionSamplesMap &Target =
+ Result.functionSamplesAt(CallsiteSamples.first);
+ for (const auto &Callsite : CallsiteSamples.second) {
+ sampleprof::FunctionSamples Remapped =
+ remapSamples(Callsite.second, Remapper, Error);
+ MergeResult(Error, Target[Remapped.getName()].merge(Remapped));
+ }
+ }
+ return Result;
+}
+
static sampleprof::SampleProfileFormat FormatMap[] = {
sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Compact_Binary,
sampleprof::SPF_GCC, sampleprof::SPF_Binary};
static void mergeSampleProfile(const WeightedFileVector &Inputs,
+ SymbolRemapper *Remapper,
StringRef OutputFilename,
ProfileFormat OutputFormat) {
using namespace sampleprof;
@@ -357,9 +434,13 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
E = Profiles.end();
I != E; ++I) {
- StringRef FName = I->first();
- FunctionSamples &Samples = I->second;
- sampleprof_error Result = ProfileMap[FName].merge(Samples, Input.Weight);
+ sampleprof_error Result = sampleprof_error::success;
+ FunctionSamples Remapped =
+ Remapper ? remapSamples(I->second, *Remapper, Result)
+ : FunctionSamples();
+ FunctionSamples &Samples = Remapper ? Remapped : I->second;
+ StringRef FName = Samples.getName();
+ MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
if (Result != sampleprof_error::success) {
std::error_code EC = make_error_code(Result);
handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName);
@@ -461,6 +542,10 @@ static int merge_main(int argc, const char *argv[]) {
cl::opt<bool> DumpInputFileList(
"dump-input-file-list", cl::init(false), cl::Hidden,
cl::desc("Dump the list of input files and their weights, then exit"));
+ cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
+ cl::desc("Symbol remapping file"));
+ cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
+ cl::aliasopt(RemappingFile));
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::init("-"), cl::Required,
cl::desc("Output file"));
@@ -509,11 +594,16 @@ static int merge_main(int argc, const char *argv[]) {
return 0;
}
+ std::unique_ptr<SymbolRemapper> Remapper;
+ if (!RemappingFile.empty())
+ Remapper = SymbolRemapper::create(RemappingFile);
+
if (ProfileKind == instr)
- mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,
- OutputSparse, NumThreads);
+ mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename,
+ OutputFormat, OutputSparse, NumThreads);
else
- mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat);
+ mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
+ OutputFormat);
return 0;
}
@@ -543,13 +633,21 @@ static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
Stats.ValueSitesHistogram.resize(NV, 0);
Stats.ValueSitesHistogram[NV - 1]++;
}
+
+ uint64_t SiteSum = 0;
+ for (uint32_t V = 0; V < NV; V++)
+ SiteSum += VD[V].Count;
+ if (SiteSum == 0)
+ SiteSum = 1;
+
for (uint32_t V = 0; V < NV; V++) {
- OS << "\t[ " << I << ", ";
+ OS << "\t[ " << format("%2u", I) << ", ";
if (Symtab == nullptr)
- OS << VD[V].Value;
+ OS << format("%4u", VD[V].Value);
else
OS << Symtab->getFuncName(VD[V].Value);
- OS << ", " << VD[V].Count << " ]\n";
+ OS << ", " << format("%10" PRId64, VD[V].Count) << " ] ("
+ << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n";
}
}
}
@@ -572,9 +670,9 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
uint32_t TopN, bool ShowIndirectCallTargets,
bool ShowMemOPSizes, bool ShowDetailedSummary,
std::vector<uint32_t> DetailedSummaryCutoffs,
- bool ShowAllFunctions,
- const std::string &ShowFunction, bool TextFormat,
- raw_fd_ostream &OS) {
+ bool ShowAllFunctions, uint64_t ValueCutoff,
+ bool OnlyListBelow, const std::string &ShowFunction,
+ bool TextFormat, raw_fd_ostream &OS) {
auto ReaderOrErr = InstrProfReader::create(Filename);
std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
if (ShowDetailedSummary && Cutoffs.empty()) {
@@ -587,6 +685,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
auto Reader = std::move(ReaderOrErr.get());
bool IsIRInstr = Reader->isIRLevelProfile();
size_t ShownFunctions = 0;
+ size_t BelowCutoffFunctions = 0;
int NumVPKind = IPVK_Last - IPVK_First + 1;
std::vector<ValueSitesStats> VPStats(NumVPKind);
@@ -600,12 +699,21 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
decltype(MinCmp)>
HottestFuncs(MinCmp);
+ if (!TextFormat && OnlyListBelow) {
+ OS << "The list of functions with the maximum counter less than "
+ << ValueCutoff << ":\n";
+ }
+
+ // Add marker so that IR-level instrumentation round-trips properly.
+ if (TextFormat && IsIRInstr)
+ OS << ":ir\n";
+
for (const auto &Func : *Reader) {
bool Show =
ShowAllFunctions || (!ShowFunction.empty() &&
Func.Name.find(ShowFunction) != Func.Name.npos);
- bool doTextFormatDump = (Show && ShowCounts && TextFormat);
+ bool doTextFormatDump = (Show && TextFormat);
if (doTextFormatDump) {
InstrProfSymtab &Symtab = Reader->getSymtab();
@@ -617,11 +725,24 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
assert(Func.Counts.size() > 0 && "function missing entry counter");
Builder.addRecord(Func);
- if (TopN) {
- uint64_t FuncMax = 0;
- for (size_t I = 0, E = Func.Counts.size(); I < E; ++I)
- FuncMax = std::max(FuncMax, Func.Counts[I]);
+ uint64_t FuncMax = 0;
+ uint64_t FuncSum = 0;
+ for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
+ FuncMax = std::max(FuncMax, Func.Counts[I]);
+ FuncSum += Func.Counts[I];
+ }
+ if (FuncMax < ValueCutoff) {
+ ++BelowCutoffFunctions;
+ if (OnlyListBelow) {
+ OS << " " << Func.Name << ": (Max = " << FuncMax
+ << " Sum = " << FuncSum << ")\n";
+ }
+ continue;
+ } else if (OnlyListBelow)
+ continue;
+
+ if (TopN) {
if (HottestFuncs.size() == TopN) {
if (HottestFuncs.top().second < FuncMax) {
HottestFuncs.pop();
@@ -632,7 +753,6 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
}
if (Show) {
-
if (!ShownFunctions)
OS << "Counters:\n";
@@ -679,7 +799,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (Reader->hasError())
exitWithError(Reader->getError(), Filename);
- if (ShowCounts && TextFormat)
+ if (TextFormat)
return 0;
std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
OS << "Instrumentation level: "
@@ -687,6 +807,12 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (ShowAllFunctions || !ShowFunction.empty())
OS << "Functions shown: " << ShownFunctions << "\n";
OS << "Total functions: " << PS->getNumFunctions() << "\n";
+ if (ValueCutoff > 0) {
+ OS << "Number of functions with maximum count (< " << ValueCutoff
+ << "): " << BelowCutoffFunctions << "\n";
+ OS << "Number of functions with maximum count (>= " << ValueCutoff
+ << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
+ }
OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
@@ -788,7 +914,14 @@ static int show_main(int argc, const char *argv[]) {
cl::opt<uint32_t> TopNFunctions(
"topn", cl::init(0),
cl::desc("Show the list of functions with the largest internal counts"));
-
+ cl::opt<uint32_t> ValueCutoff(
+ "value-cutoff", cl::init(0),
+ cl::desc("Set the count value cutoff. Functions with the maximum count "
+ "less than this value will not be printed out. (Default is 0)"));
+ cl::opt<bool> OnlyListBelow(
+ "list-below-cutoff", cl::init(false),
+ cl::desc("Only output names of functions whose max count values are "
+ "below the cutoff value"));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
if (OutputFilename.empty())
@@ -808,7 +941,8 @@ static int show_main(int argc, const char *argv[]) {
return showInstrProfile(Filename, ShowCounts, TopNFunctions,
ShowIndirectCallTargets, ShowMemOPSizes,
ShowDetailedSummary, DetailedSummaryCutoffs,
- ShowAllFunctions, ShowFunction, TextFormat, OS);
+ ShowAllFunctions, ValueCutoff, OnlyListBelow,
+ ShowFunction, TextFormat, OS);
else
return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
ShowFunction, OS);
diff --git a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index a90840b22c8d..4b823b816c35 100644
--- a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -118,31 +118,57 @@ const size_t Decoder::PDataEntrySize = sizeof(RuntimeFunction);
// TODO name the uops more appropriately
const Decoder::RingEntry Decoder::Ring[] = {
- { 0x80, 0x00, &Decoder::opcode_0xxxxxxx }, // UOP_STACK_FREE (16-bit)
- { 0xc0, 0x80, &Decoder::opcode_10Lxxxxx }, // UOP_POP (32-bit)
- { 0xf0, 0xc0, &Decoder::opcode_1100xxxx }, // UOP_STACK_SAVE (16-bit)
- { 0xf8, 0xd0, &Decoder::opcode_11010Lxx }, // UOP_POP (16-bit)
- { 0xf8, 0xd8, &Decoder::opcode_11011Lxx }, // UOP_POP (32-bit)
- { 0xf8, 0xe0, &Decoder::opcode_11100xxx }, // UOP_VPOP (32-bit)
- { 0xfc, 0xe8, &Decoder::opcode_111010xx }, // UOP_STACK_FREE (32-bit)
- { 0xfe, 0xec, &Decoder::opcode_1110110L }, // UOP_POP (16-bit)
- { 0xff, 0xee, &Decoder::opcode_11101110 }, // UOP_MICROSOFT_SPECIFIC (16-bit)
+ { 0x80, 0x00, 1, &Decoder::opcode_0xxxxxxx }, // UOP_STACK_FREE (16-bit)
+ { 0xc0, 0x80, 2, &Decoder::opcode_10Lxxxxx }, // UOP_POP (32-bit)
+ { 0xf0, 0xc0, 1, &Decoder::opcode_1100xxxx }, // UOP_STACK_SAVE (16-bit)
+ { 0xf8, 0xd0, 1, &Decoder::opcode_11010Lxx }, // UOP_POP (16-bit)
+ { 0xf8, 0xd8, 1, &Decoder::opcode_11011Lxx }, // UOP_POP (32-bit)
+ { 0xf8, 0xe0, 1, &Decoder::opcode_11100xxx }, // UOP_VPOP (32-bit)
+ { 0xfc, 0xe8, 2, &Decoder::opcode_111010xx }, // UOP_STACK_FREE (32-bit)
+ { 0xfe, 0xec, 2, &Decoder::opcode_1110110L }, // UOP_POP (16-bit)
+ { 0xff, 0xee, 2, &Decoder::opcode_11101110 }, // UOP_MICROSOFT_SPECIFIC (16-bit)
// UOP_PUSH_MACHINE_FRAME
// UOP_PUSH_CONTEXT
// UOP_PUSH_TRAP_FRAME
// UOP_REDZONE_RESTORE_LR
- { 0xff, 0xef, &Decoder::opcode_11101111 }, // UOP_LDRPC_POSTINC (32-bit)
- { 0xff, 0xf5, &Decoder::opcode_11110101 }, // UOP_VPOP (32-bit)
- { 0xff, 0xf6, &Decoder::opcode_11110110 }, // UOP_VPOP (32-bit)
- { 0xff, 0xf7, &Decoder::opcode_11110111 }, // UOP_STACK_RESTORE (16-bit)
- { 0xff, 0xf8, &Decoder::opcode_11111000 }, // UOP_STACK_RESTORE (16-bit)
- { 0xff, 0xf9, &Decoder::opcode_11111001 }, // UOP_STACK_RESTORE (32-bit)
- { 0xff, 0xfa, &Decoder::opcode_11111010 }, // UOP_STACK_RESTORE (32-bit)
- { 0xff, 0xfb, &Decoder::opcode_11111011 }, // UOP_NOP (16-bit)
- { 0xff, 0xfc, &Decoder::opcode_11111100 }, // UOP_NOP (32-bit)
- { 0xff, 0xfd, &Decoder::opcode_11111101 }, // UOP_NOP (16-bit) / END
- { 0xff, 0xfe, &Decoder::opcode_11111110 }, // UOP_NOP (32-bit) / END
- { 0xff, 0xff, &Decoder::opcode_11111111 }, // UOP_END
+ { 0xff, 0xef, 2, &Decoder::opcode_11101111 }, // UOP_LDRPC_POSTINC (32-bit)
+ { 0xff, 0xf5, 2, &Decoder::opcode_11110101 }, // UOP_VPOP (32-bit)
+ { 0xff, 0xf6, 2, &Decoder::opcode_11110110 }, // UOP_VPOP (32-bit)
+ { 0xff, 0xf7, 3, &Decoder::opcode_11110111 }, // UOP_STACK_RESTORE (16-bit)
+ { 0xff, 0xf8, 4, &Decoder::opcode_11111000 }, // UOP_STACK_RESTORE (16-bit)
+ { 0xff, 0xf9, 3, &Decoder::opcode_11111001 }, // UOP_STACK_RESTORE (32-bit)
+ { 0xff, 0xfa, 4, &Decoder::opcode_11111010 }, // UOP_STACK_RESTORE (32-bit)
+ { 0xff, 0xfb, 1, &Decoder::opcode_11111011 }, // UOP_NOP (16-bit)
+ { 0xff, 0xfc, 1, &Decoder::opcode_11111100 }, // UOP_NOP (32-bit)
+ { 0xff, 0xfd, 1, &Decoder::opcode_11111101 }, // UOP_NOP (16-bit) / END
+ { 0xff, 0xfe, 1, &Decoder::opcode_11111110 }, // UOP_NOP (32-bit) / END
+ { 0xff, 0xff, 1, &Decoder::opcode_11111111 }, // UOP_END
+};
+
+
+// Unwind opcodes for ARM64.
+// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+const Decoder::RingEntry Decoder::Ring64[] = {
+ { 0xe0, 0x00, 1, &Decoder::opcode_alloc_s },
+ { 0xe0, 0x20, 1, &Decoder::opcode_save_r19r20_x },
+ { 0xc0, 0x40, 1, &Decoder::opcode_save_fplr },
+ { 0xc0, 0x80, 1, &Decoder::opcode_save_fplr_x },
+ { 0xf8, 0xc0, 2, &Decoder::opcode_alloc_m },
+ { 0xfc, 0xc8, 2, &Decoder::opcode_save_regp },
+ { 0xfc, 0xcc, 2, &Decoder::opcode_save_regp_x },
+ { 0xfc, 0xd0, 2, &Decoder::opcode_save_reg },
+ { 0xfe, 0xd4, 2, &Decoder::opcode_save_reg_x },
+ { 0xfe, 0xd6, 2, &Decoder::opcode_save_lrpair },
+ { 0xfe, 0xd8, 2, &Decoder::opcode_save_fregp },
+ { 0xfe, 0xda, 2, &Decoder::opcode_save_fregp_x },
+ { 0xfe, 0xdc, 2, &Decoder::opcode_save_freg },
+ { 0xff, 0xde, 2, &Decoder::opcode_save_freg_x },
+ { 0xff, 0xe0, 4, &Decoder::opcode_alloc_l },
+ { 0xff, 0xe1, 1, &Decoder::opcode_setfp },
+ { 0xff, 0xe2, 2, &Decoder::opcode_addfp },
+ { 0xff, 0xe3, 1, &Decoder::opcode_nop },
+ { 0xff, 0xe4, 1, &Decoder::opcode_end },
+ { 0xff, 0xe5, 1, &Decoder::opcode_end_c },
};
void Decoder::printRegisters(const std::pair<uint16_t, uint32_t> &RegisterMask) {
@@ -493,18 +519,291 @@ bool Decoder::opcode_11111111(const uint8_t *OC, unsigned &Offset,
return true;
}
+// ARM64 unwind codes start here.
+bool Decoder::opcode_alloc_s(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t NumBytes = (OC[Offset] & 0x1F) << 4;
+ SW.startLine() << format("0x%02x ; %s sp, #%u\n", OC[Offset],
+ static_cast<const char *>(Prologue ? "sub" : "add"),
+ NumBytes);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_save_r19r20_x(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Off = (OC[Offset] & 0x1F) << 3;
+ if (Prologue)
+ SW.startLine() << format(
+ "0x%02x ; stp x19, x20, [sp, #-%u]!\n", OC[Offset], Off);
+ else
+ SW.startLine() << format(
+ "0x%02x ; ldp x19, x20, [sp], #%u\n", OC[Offset], Off);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_save_fplr(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Off = (OC[Offset] & 0x3F) << 3;
+ SW.startLine() << format(
+ "0x%02x ; %s x29, x30, [sp, #%u]\n", OC[Offset],
+ static_cast<const char *>(Prologue ? "stp" : "ldp"), Off);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_save_fplr_x(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Off = ((OC[Offset] & 0x3F) + 1) << 3;
+ if (Prologue)
+ SW.startLine() << format(
+ "0x%02x ; stp x29, x30, [sp, #-%u]!\n", OC[Offset], Off);
+ else
+ SW.startLine() << format(
+ "0x%02x ; ldp x29, x30, [sp], #%u\n", OC[Offset], Off);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_alloc_m(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t NumBytes = ((OC[Offset] & 0x07) << 8);
+ NumBytes |= (OC[Offset + 1] & 0xFF);
+ NumBytes <<= 4;
+ SW.startLine() << format("0x%02x%02x ; %s sp, #%u\n",
+ OC[Offset], OC[Offset + 1],
+ static_cast<const char *>(Prologue ? "sub" : "add"),
+ NumBytes);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_regp(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = ((OC[Offset] & 0x03) << 8);
+ Reg |= (OC[Offset + 1] & 0xC0);
+ Reg >>= 6;
+ Reg += 19;
+ uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+ SW.startLine() << format(
+ "0x%02x%02x ; %s x%u, x%u, [sp, #%u]\n",
+ OC[Offset], OC[Offset + 1],
+ static_cast<const char *>(Prologue ? "stp" : "ldp"), Reg, Reg + 1, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_regp_x(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = ((OC[Offset] & 0x03) << 8);
+ Reg |= (OC[Offset + 1] & 0xC0);
+ Reg >>= 6;
+ Reg += 19;
+ uint32_t Off = ((OC[Offset + 1] & 0x3F) + 1) << 3;
+ if (Prologue)
+ SW.startLine() << format(
+ "0x%02x%02x ; stp x%u, x%u, [sp, #-%u]!\n",
+ OC[Offset], OC[Offset + 1], Reg,
+ Reg + 1, Off);
+ else
+ SW.startLine() << format(
+ "0x%02x%02x ; ldp x%u, x%u, [sp], #%u\n",
+ OC[Offset], OC[Offset + 1], Reg,
+ Reg + 1, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_reg(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = (OC[Offset] & 0x03) << 8;
+ Reg |= (OC[Offset + 1] & 0xC0);
+ Reg >>= 6;
+ Reg += 19;
+ uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+ SW.startLine() << format("0x%02x%02x ; %s x%u, [sp, #%u]\n",
+ OC[Offset], OC[Offset + 1],
+ static_cast<const char *>(Prologue ? "str" : "ldr"),
+ Reg, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_reg_x(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = (OC[Offset] & 0x01) << 8;
+ Reg |= (OC[Offset + 1] & 0xE0);
+ Reg >>= 5;
+ Reg += 19;
+ uint32_t Off = ((OC[Offset + 1] & 0x1F) + 1) << 3;
+ if (Prologue)
+ SW.startLine() << format("0x%02x%02x ; str x%u, [sp, #%u]!\n",
+ OC[Offset], OC[Offset + 1], Reg, Off);
+ else
+ SW.startLine() << format("0x%02x%02x ; ldr x%u, [sp], #%u\n",
+ OC[Offset], OC[Offset + 1], Reg, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_lrpair(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = (OC[Offset] & 0x01) << 8;
+ Reg |= (OC[Offset + 1] & 0xC0);
+ Reg >>= 6;
+ Reg *= 2;
+ Reg += 19;
+ uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+ SW.startLine() << format("0x%02x%02x ; %s x%u, lr, [sp, #%u]\n",
+ OC[Offset], OC[Offset + 1],
+ static_cast<const char *>(Prologue ? "stp" : "ldp"),
+ Reg, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_fregp(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = (OC[Offset] & 0x01) << 8;
+ Reg |= (OC[Offset + 1] & 0xC0);
+ Reg >>= 6;
+ Reg += 8;
+ uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+ SW.startLine() << format("0x%02x%02x ; %s d%u, d%u, [sp, #%u]\n",
+ OC[Offset], OC[Offset + 1],
+ static_cast<const char *>(Prologue ? "stp" : "ldp"),
+ Reg, Reg + 1, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_fregp_x(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = (OC[Offset] & 0x01) << 8;
+ Reg |= (OC[Offset + 1] & 0xC0);
+ Reg >>= 6;
+ Reg += 8;
+ uint32_t Off = ((OC[Offset + 1] & 0x3F) + 1) << 3;
+ if (Prologue)
+ SW.startLine() << format(
+ "0x%02x%02x ; stp d%u, d%u, [sp, #-%u]!\n", OC[Offset],
+ OC[Offset + 1], Reg, Reg + 1, Off);
+ else
+ SW.startLine() << format(
+ "0x%02x%02x ; ldp d%u, d%u, [sp], #%u\n", OC[Offset],
+ OC[Offset + 1], Reg, Reg + 1, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_freg(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = (OC[Offset] & 0x01) << 8;
+ Reg |= (OC[Offset + 1] & 0xC0);
+ Reg >>= 6;
+ Reg += 8;
+ uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+ SW.startLine() << format("0x%02x%02x ; %s d%u, [sp, #%u]\n",
+ OC[Offset], OC[Offset + 1],
+ static_cast<const char *>(Prologue ? "str" : "ldr"),
+ Reg, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_save_freg_x(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ uint32_t Reg = ((OC[Offset + 1] & 0xE0) >> 5) + 8;
+ uint32_t Off = ((OC[Offset + 1] & 0x1F) + 1) << 3;
+ if (Prologue)
+ SW.startLine() << format(
+ "0x%02x%02x ; str d%u, [sp, #-%u]!\n", OC[Offset],
+ OC[Offset + 1], Reg, Off);
+ else
+ SW.startLine() << format(
+ "0x%02x%02x ; ldr d%u, [sp], #%u\n", OC[Offset],
+ OC[Offset + 1], Reg, Off);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_alloc_l(const uint8_t *OC, unsigned &Offset,
+ unsigned Length, bool Prologue) {
+ unsigned Off =
+ (OC[Offset + 1] << 16) | (OC[Offset + 2] << 8) | (OC[Offset + 3] << 0);
+ Off <<= 4;
+ SW.startLine() << format(
+ "0x%02x%02x%02x%02x ; %s sp, #%u\n", OC[Offset], OC[Offset + 1],
+ OC[Offset + 2], OC[Offset + 3],
+ static_cast<const char *>(Prologue ? "sub" : "add"), Off);
+ Offset += 4;
+ return false;
+}
+
+bool Decoder::opcode_setfp(const uint8_t *OC, unsigned &Offset, unsigned Length,
+ bool Prologue) {
+ SW.startLine() << format("0x%02x ; mov fp, sp\n", OC[Offset]);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_addfp(const uint8_t *OC, unsigned &Offset, unsigned Length,
+ bool Prologue) {
+ unsigned NumBytes = OC[Offset + 1] << 3;
+ SW.startLine() << format("0x%02x%02x ; add fp, sp, #%u\n",
+ OC[Offset], OC[Offset + 1], NumBytes);
+ Offset += 2;
+ return false;
+}
+
+bool Decoder::opcode_nop(const uint8_t *OC, unsigned &Offset, unsigned Length,
+ bool Prologue) {
+ SW.startLine() << format("0x%02x ; nop\n", OC[Offset]);
+ ++Offset;
+ return false;
+}
+
+bool Decoder::opcode_end(const uint8_t *OC, unsigned &Offset, unsigned Length,
+ bool Prologue) {
+ SW.startLine() << format("0x%02x ; end\n", OC[Offset]);
+ ++Offset;
+ return true;
+}
+
+bool Decoder::opcode_end_c(const uint8_t *OC, unsigned &Offset, unsigned Length,
+ bool Prologue) {
+ SW.startLine() << format("0x%02x ; end_c\n", OC[Offset]);
+ ++Offset;
+ return true;
+}
+
void Decoder::decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
bool Prologue) {
assert((!Prologue || Offset == 0) && "prologue should always use offset 0");
-
+ const RingEntry* DecodeRing = isAArch64 ? Ring64 : Ring;
bool Terminated = false;
for (unsigned OI = Offset, OE = Opcodes.size(); !Terminated && OI < OE; ) {
for (unsigned DI = 0;; ++DI) {
- if ((Opcodes[OI] & Ring[DI].Mask) == Ring[DI].Value) {
- Terminated = (this->*Ring[DI].Routine)(Opcodes.data(), OI, 0, Prologue);
+ if ((isAArch64 && (DI >= array_lengthof(Ring64))) ||
+ (!isAArch64 && (DI >= array_lengthof(Ring)))) {
+ SW.startLine() << format("0x%02x ; Bad opcode!\n",
+ Opcodes.data()[OI]);
+ ++OI;
+ break;
+ }
+
+ if ((Opcodes[OI] & DecodeRing[DI].Mask) == DecodeRing[DI].Value) {
+ if (OI + DecodeRing[DI].Length > OE) {
+ SW.startLine() << format("Opcode 0x%02x goes past the unwind data\n",
+ Opcodes[OI]);
+ OI += DecodeRing[DI].Length;
+ break;
+ }
+ Terminated =
+ (this->*DecodeRing[DI].Routine)(Opcodes.data(), OI, 0, Prologue);
break;
}
- assert(DI < array_lengthof(Ring) && "unhandled opcode");
}
}
}
@@ -520,22 +819,36 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
uint64_t Offset = VA - SectionVA;
const ulittle32_t *Data =
reinterpret_cast<const ulittle32_t *>(Contents.data() + Offset);
- const ExceptionDataRecord XData(Data);
+ // Sanity check to ensure that the .xdata header is present.
+ // A header is one or two words, followed by at least one word to describe
+ // the unwind codes. Applicable to both ARM and AArch64.
+ if (Contents.size() - Offset < 8)
+ report_fatal_error(".xdata must be at least 8 bytes in size");
+
+ const ExceptionDataRecord XData(Data, isAArch64);
DictScope XRS(SW, "ExceptionData");
- SW.printNumber("FunctionLength", XData.FunctionLength() << 1);
+ SW.printNumber("FunctionLength",
+ isAArch64 ? XData.FunctionLengthInBytesAArch64() :
+ XData.FunctionLengthInBytesARM());
SW.printNumber("Version", XData.Vers());
SW.printBoolean("ExceptionData", XData.X());
SW.printBoolean("EpiloguePacked", XData.E());
- SW.printBoolean("Fragment", XData.F());
+ if (!isAArch64)
+ SW.printBoolean("Fragment", XData.F());
SW.printNumber(XData.E() ? "EpilogueOffset" : "EpilogueScopes",
XData.EpilogueCount());
- SW.printNumber("ByteCodeLength",
- static_cast<uint64_t>(XData.CodeWords() * sizeof(uint32_t)));
+ uint64_t ByteCodeLength = XData.CodeWords() * sizeof(uint32_t);
+ SW.printNumber("ByteCodeLength", ByteCodeLength);
+
+ if ((int64_t)(Contents.size() - Offset - 4 * HeaderWords(XData) -
+ (XData.E() ? 0 : XData.EpilogueCount() * 4) -
+ (XData.X() ? 8 : 0)) < (int64_t)ByteCodeLength)
+ report_fatal_error("Malformed unwind data");
if (XData.E()) {
ArrayRef<uint8_t> UC = XData.UnwindByteCode();
- if (!XData.F()) {
+ if (isAArch64 || !XData.F()) {
ListScope PS(SW, "Prologue");
decodeOpcodes(UC, 0, /*Prologue=*/true);
}
@@ -544,16 +857,27 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
decodeOpcodes(UC, XData.EpilogueCount(), /*Prologue=*/false);
}
} else {
+ {
+ ListScope PS(SW, "Prologue");
+ decodeOpcodes(XData.UnwindByteCode(), 0, /*Prologue=*/true);
+ }
ArrayRef<ulittle32_t> EpilogueScopes = XData.EpilogueScopes();
ListScope ESS(SW, "EpilogueScopes");
for (const EpilogueScope ES : EpilogueScopes) {
DictScope ESES(SW, "EpilogueScope");
SW.printNumber("StartOffset", ES.EpilogueStartOffset());
- SW.printNumber("Condition", ES.Condition());
- SW.printNumber("EpilogueStartIndex", ES.EpilogueStartIndex());
+ if (!isAArch64)
+ SW.printNumber("Condition", ES.Condition());
+ SW.printNumber("EpilogueStartIndex",
+ isAArch64 ? ES.EpilogueStartIndexAArch64()
+ : ES.EpilogueStartIndexARM());
+ if (ES.ES & ~0xffc3ffff)
+ SW.printNumber("ReservedBits", (ES.ES >> 18) & 0xF);
ListScope Opcodes(SW, "Opcodes");
- decodeOpcodes(XData.UnwindByteCode(), ES.EpilogueStartIndex(),
+ decodeOpcodes(XData.UnwindByteCode(),
+ isAArch64 ? ES.EpilogueStartIndexAArch64()
+ : ES.EpilogueStartIndexARM(),
/*Prologue=*/false);
}
}
@@ -565,16 +889,21 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
+ (XData.E() ? 0 : XData.EpilogueCount())
+ XData.CodeWords();
- ErrorOr<SymbolRef> Symbol =
- getRelocatedSymbol(COFF, Section, HandlerOffset * sizeof(uint32_t));
+ ErrorOr<SymbolRef> Symbol = getRelocatedSymbol(
+ COFF, Section, Offset + HandlerOffset * sizeof(uint32_t));
if (!Symbol)
Symbol = getSymbol(COFF, Address, /*FunctionOnly=*/true);
+ if (!Symbol) {
+ ListScope EHS(SW, "ExceptionHandler");
+ SW.printString("Routine", "(null)");
+ return true;
+ }
Expected<StringRef> Name = Symbol->getName();
if (!Name) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
- logAllUnhandledErrors(Name.takeError(), OS, "");
+ logAllUnhandledErrors(Name.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -613,7 +942,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
if (!FunctionNameOrErr) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
- logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -622,16 +951,13 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
if (!FunctionAddressOrErr) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
- logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
FunctionAddress = *FunctionAddressOrErr;
} else {
- const pe32_header *PEHeader;
- if (COFF.getPE32Header(PEHeader))
- return false;
- FunctionAddress = PEHeader->ImageBase + RF.BeginAddress;
+ FunctionAddress = COFF.getImageBase() + RF.BeginAddress;
}
SW.printString("Function", formatSymbol(FunctionName, FunctionAddress));
@@ -641,7 +967,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
if (!Name) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
- logAllUnhandledErrors(Name.takeError(), OS, "");
+ logAllUnhandledErrors(Name.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -650,7 +976,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
if (!AddressOrErr) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
- logAllUnhandledErrors(AddressOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(AddressOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -666,22 +992,18 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
}
section_iterator SI = *SIOrErr;
- return dumpXDataRecord(COFF, *SI, FunctionAddress, Address);
+ // FIXME: Do we need to add an offset from the relocation?
+ return dumpXDataRecord(COFF, *SI, FunctionAddress,
+ RF.ExceptionInformationRVA());
} else {
- const pe32_header *PEHeader;
- if (COFF.getPE32Header(PEHeader))
- return false;
-
- uint64_t Address = PEHeader->ImageBase + RF.ExceptionInformationRVA();
+ uint64_t Address = COFF.getImageBase() + RF.ExceptionInformationRVA();
SW.printString("ExceptionRecord", formatSymbol("", Address));
- ErrorOr<SectionRef> Section =
- getSectionContaining(COFF, RF.ExceptionInformationRVA());
+ ErrorOr<SectionRef> Section = getSectionContaining(COFF, Address);
if (!Section)
return false;
- return dumpXDataRecord(COFF, *Section, FunctionAddress,
- RF.ExceptionInformationRVA());
+ return dumpXDataRecord(COFF, *Section, FunctionAddress, Address);
}
}
@@ -703,7 +1025,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
if (!FunctionNameOrErr) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
- logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -712,7 +1034,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
if (!FunctionAddressOrErr) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
- logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS, "");
+ logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS);
OS.flush();
report_fatal_error(Buf);
}
@@ -725,8 +1047,9 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
}
SW.printString("Function", formatSymbol(FunctionName, FunctionAddress));
- SW.printBoolean("Fragment",
- RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment);
+ if (!isAArch64)
+ SW.printBoolean("Fragment",
+ RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment);
SW.printNumber("FunctionLength", RF.FunctionLength());
SW.startLine() << "ReturnType: " << RF.Ret() << '\n';
SW.printBoolean("HomedParameters", RF.H());
@@ -749,6 +1072,10 @@ bool Decoder::dumpProcedureDataEntry(const COFFObjectFile &COFF,
DictScope RFS(SW, "RuntimeFunction");
if (Entry.Flag() == RuntimeFunctionFlag::RFF_Unpacked)
return dumpUnpackedEntry(COFF, Section, Offset, Index, Entry);
+ if (isAArch64) {
+ SW.startLine() << "Packed unwind data not yet supported for ARM64\n";
+ return true;
+ }
return dumpPackedEntry(COFF, Section, Offset, Index, Entry);
}
diff --git a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
index 95f521702268..e271a1e6fe77 100644
--- a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
+++ b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
@@ -24,13 +24,16 @@ class Decoder {
ScopedPrinter &SW;
raw_ostream &OS;
+ bool isAArch64;
struct RingEntry {
uint8_t Mask;
uint8_t Value;
+ uint8_t Length;
bool (Decoder::*Routine)(const uint8_t *, unsigned &, unsigned, bool);
};
static const RingEntry Ring[];
+ static const RingEntry Ring64[];
bool opcode_0xxxxxxx(const uint8_t *Opcodes, unsigned &Offset,
unsigned Length, bool Prologue);
@@ -75,6 +78,50 @@ class Decoder {
bool opcode_11111111(const uint8_t *Opcodes, unsigned &Offset,
unsigned Length, bool Prologue);
+ // ARM64 unwind codes start here.
+ bool opcode_alloc_s(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_save_r19r20_x(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_fplr(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_fplr_x(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_alloc_m(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_save_regp(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_regp_x(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_reg(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_reg_x(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_lrpair(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_fregp(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_fregp_x(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_freg(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_save_freg_x(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+ bool opcode_alloc_l(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_setfp(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_addfp(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_nop(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_end(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_end_c(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+ bool Prologue);
+ bool opcode_save_next(const uint8_t *Opcodes, unsigned &Offset,
+ unsigned Length, bool Prologue);
+
void decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
bool Prologue);
@@ -107,7 +154,9 @@ class Decoder {
const object::SectionRef Section);
public:
- Decoder(ScopedPrinter &SW) : SW(SW), OS(SW.getOStream()) {}
+ Decoder(ScopedPrinter &SW, bool isAArch64) : SW(SW),
+ OS(SW.getOStream()),
+ isAArch64(isAArch64) {}
std::error_code dumpProcedureData(const object::COFFObjectFile &COFF);
};
}
diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
index 0ed4ccd09f6f..3e2626dad118 100644
--- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -50,6 +50,7 @@
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/Win64EH.h"
#include "llvm/Support/raw_ostream.h"
@@ -78,7 +79,7 @@ public:
: ObjDumper(Writer), Obj(Obj), Writer(Writer), Types(100) {}
void printFileHeaders() override;
- void printSections() override;
+ void printSectionHeaders() override;
void printRelocations() override;
void printSymbols() override;
void printDynamicSymbols() override;
@@ -98,6 +99,7 @@ public:
mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
llvm::codeview::MergingTypeTableBuilder &CVTypes) override;
void printStackMap() const override;
+ void printAddrsig() override;
private:
void printSymbol(const SymbolRef &Sym);
void printRelocation(const SectionRef &Section, const RelocationRef &Reloc,
@@ -177,6 +179,10 @@ private:
DebugStringTableSubsectionRef CVStringTable;
+ /// Track the compilation CPU type. S_COMPILE3 symbol records typically come
+ /// first, but if we don't see one, just assume an X64 CPU type. It is common.
+ CPUType CompilationCPUType = CPUType::X64;
+
ScopedPrinter &Writer;
BinaryByteStream TypeContents;
LazyRandomTypeCollection Types;
@@ -607,8 +613,7 @@ void COFFDumper::cacheRelocations() {
RelocMap[Section].push_back(Reloc);
// Sort relocations by address.
- llvm::sort(RelocMap[Section].begin(), RelocMap[Section].end(),
- relocAddressLess);
+ llvm::sort(RelocMap[Section], relocAddressLess);
}
}
@@ -749,7 +754,7 @@ void COFFDumper::printCOFFDebugDirectory() {
W.printNumber("PDBAge", DebugInfo->PDB70.Age);
W.printString("PDBFileName", PDBFileName);
}
- } else {
+ } else if (D.SizeOfData != 0) {
// FIXME: Type values of 12 and 13 are commonly observed but are not in
// the documented type enum. Figure out what they mean.
ArrayRef<uint8_t> RawData;
@@ -954,7 +959,7 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
StringMap<StringRef> FunctionLineTables;
ListScope D(W, "CodeViewDebugInfo");
- // Print the section to allow correlation with printSections.
+ // Print the section to allow correlation with printSectionHeaders.
W.printNumber("Section", SectionName, Obj->getSectionID(Section));
uint32_t Magic;
@@ -1060,10 +1065,28 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
W.printHex("LocalSize", FD.LocalSize);
W.printHex("ParamsSize", FD.ParamsSize);
W.printHex("MaxStackSize", FD.MaxStackSize);
- W.printString("FrameFunc", FrameFunc);
W.printHex("PrologSize", FD.PrologSize);
W.printHex("SavedRegsSize", FD.SavedRegsSize);
W.printFlags("Flags", FD.Flags, makeArrayRef(FrameDataFlags));
+
+ // The FrameFunc string is a small RPN program. It can be broken up into
+ // statements that end in the '=' operator, which assigns the value on
+ // the top of the stack to the previously pushed variable. Variables can
+ // be temporary values ($T0) or physical registers ($esp). Print each
+ // assignment on its own line to make these programs easier to read.
+ {
+ ListScope FFS(W, "FrameFunc");
+ while (!FrameFunc.empty()) {
+ size_t EqOrEnd = FrameFunc.find('=');
+ if (EqOrEnd == StringRef::npos)
+ EqOrEnd = FrameFunc.size();
+ else
+ ++EqOrEnd;
+ StringRef Stmt = FrameFunc.substr(0, EqOrEnd);
+ W.printString(Stmt);
+ FrameFunc = FrameFunc.drop_front(EqOrEnd).trim();
+ }
+ }
}
break;
}
@@ -1130,7 +1153,7 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
auto CODD = llvm::make_unique<COFFObjectDumpDelegate>(*this, Section, Obj,
SectionContents);
CVSymbolDumper CVSD(W, Types, CodeViewContainer::ObjectFile, std::move(CODD),
- opts::CodeViewSubsectionBytes);
+ CompilationCPUType, opts::CodeViewSubsectionBytes);
CVSymbolArray Symbols;
BinaryStreamReader Reader(BinaryData, llvm::support::little);
if (auto EC = Reader.readArray(Symbols, Reader.getLength())) {
@@ -1143,6 +1166,7 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
W.flush();
error(std::move(EC));
}
+ CompilationCPUType = CVSD.getCompilationCPUType();
W.flush();
}
@@ -1224,7 +1248,9 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
error(object_error::parse_failed);
}
SmallVector<TypeIndex, 128> SourceToDest;
- if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types))
+ Optional<uint32_t> PCHSignature;
+ if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
+ PCHSignature))
return error(std::move(EC));
}
}
@@ -1253,7 +1279,7 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName,
W.flush();
}
-void COFFDumper::printSections() {
+void COFFDumper::printSectionHeaders() {
ListScope SectionsD(W, "Sections");
int SectionNumber = 0;
for (const SectionRef &Sec : Obj->sections()) {
@@ -1339,10 +1365,12 @@ void COFFDumper::printRelocation(const SectionRef &Section,
StringRef SymbolName;
Reloc.getTypeName(RelocName);
symbol_iterator Symbol = Reloc.getSymbol();
+ int64_t SymbolIndex = -1;
if (Symbol != Obj->symbol_end()) {
Expected<StringRef> SymbolNameOrErr = Symbol->getName();
error(errorToErrorCode(SymbolNameOrErr.takeError()));
SymbolName = *SymbolNameOrErr;
+ SymbolIndex = Obj->getSymbolIndex(Obj->getCOFFSymbol(*Symbol));
}
if (opts::ExpandRelocs) {
@@ -1350,11 +1378,13 @@ void COFFDumper::printRelocation(const SectionRef &Section,
W.printHex("Offset", Offset);
W.printNumber("Type", RelocName, RelocType);
W.printString("Symbol", SymbolName.empty() ? "-" : SymbolName);
+ W.printNumber("SymbolIndex", SymbolIndex);
} else {
raw_ostream& OS = W.startLine();
OS << W.hex(Offset)
<< " " << RelocName
<< " " << (SymbolName.empty() ? "-" : SymbolName)
+ << " (" << SymbolIndex << ")"
<< "\n";
}
}
@@ -1525,8 +1555,10 @@ void COFFDumper::printUnwindInfo() {
Dumper.printData(Ctx);
break;
}
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
case COFF::IMAGE_FILE_MACHINE_ARMNT: {
- ARM::WinEH::Decoder Decoder(W);
+ ARM::WinEH::Decoder Decoder(W, Obj->getMachine() ==
+ COFF::IMAGE_FILE_MACHINE_ARM64);
Decoder.dumpProcedureData(*Obj);
break;
}
@@ -1830,6 +1862,49 @@ void COFFDumper::printStackMap() const {
StackMapV2Parser<support::big>(StackMapContentsArray));
}
+void COFFDumper::printAddrsig() {
+ object::SectionRef AddrsigSection;
+ for (auto Sec : Obj->sections()) {
+ StringRef Name;
+ Sec.getName(Name);
+ if (Name == ".llvm_addrsig") {
+ AddrsigSection = Sec;
+ break;
+ }
+ }
+
+ if (AddrsigSection == object::SectionRef())
+ return;
+
+ StringRef AddrsigContents;
+ AddrsigSection.getContents(AddrsigContents);
+ ArrayRef<uint8_t> AddrsigContentsArray(
+ reinterpret_cast<const uint8_t*>(AddrsigContents.data()),
+ AddrsigContents.size());
+
+ ListScope L(W, "Addrsig");
+ auto *Cur = reinterpret_cast<const uint8_t *>(AddrsigContents.begin());
+ auto *End = reinterpret_cast<const uint8_t *>(AddrsigContents.end());
+ while (Cur != End) {
+ unsigned Size;
+ const char *Err;
+ uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err);
+ if (Err)
+ reportError(Err);
+
+ Expected<COFFSymbolRef> Sym = Obj->getSymbol(SymIndex);
+ StringRef SymName;
+ std::error_code EC = errorToErrorCode(Sym.takeError());
+ if (EC || (EC = Obj->getSymbolName(*Sym, SymName))) {
+ SymName = "";
+ error(EC);
+ }
+
+ W.printNumber("Sym", SymName, SymIndex);
+ Cur += Size;
+ }
+}
+
void llvm::dumpCodeViewMergedTypes(
ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable,
llvm::codeview::MergingTypeTableBuilder &CVTypes) {
diff --git a/contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h b/contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
index 5a1eef1d007d..d91d764c4d0a 100644
--- a/contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
+++ b/contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
@@ -16,6 +16,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/Debug.h"
@@ -31,15 +32,15 @@ namespace DwarfCFIEH {
template <typename ELFT>
class PrinterContext {
ScopedPrinter &W;
- const object::ELFFile<ELFT> *Obj;
+ const object::ELFObjectFile<ELFT> *ObjF;
void printEHFrameHdr(uint64_t Offset, uint64_t Address, uint64_t Size) const;
void printEHFrame(const typename ELFT::Shdr *EHFrameShdr) const;
public:
- PrinterContext(ScopedPrinter &W, const object::ELFFile<ELFT> *Obj)
- : W(W), Obj(Obj) {}
+ PrinterContext(ScopedPrinter &W, const object::ELFObjectFile<ELFT> *ObjF)
+ : W(W), ObjF(ObjF) {}
void printUnwindInformation() const;
};
@@ -59,6 +60,7 @@ static const typename ELFO::Elf_Shdr *findSectionByAddress(const ELFO *Obj,
template <typename ELFT>
void PrinterContext<ELFT>::printUnwindInformation() const {
+ const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
const typename ELFT::Phdr *EHFramePhdr = nullptr;
auto PHs = Obj->program_headers();
@@ -101,6 +103,7 @@ void PrinterContext<ELFT>::printEHFrameHdr(uint64_t EHFrameHdrOffset,
W.startLine() << format("Offset: 0x%" PRIx64 "\n", EHFrameHdrOffset);
W.startLine() << format("Size: 0x%" PRIx64 "\n", EHFrameHdrSize);
+ const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
const auto *EHFrameHdrShdr = findSectionByAddress(Obj, EHFrameHdrAddress);
if (EHFrameHdrShdr) {
auto SectionName = Obj->getSectionName(EHFrameHdrShdr);
@@ -173,6 +176,7 @@ void PrinterContext<ELFT>::printEHFrame(
ShOffset, Address);
W.indent();
+ const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
auto Result = Obj->getSectionContents(EHFrameShdr);
if (Error E = Result.takeError())
reportError(toString(std::move(E)));
@@ -183,7 +187,8 @@ void PrinterContext<ELFT>::printEHFrame(
Contents.size()),
ELFT::TargetEndianness == support::endianness::little,
ELFT::Is64Bits ? 8 : 4);
- DWARFDebugFrame EHFrame(/*IsEH=*/true, /*EHFrameAddress=*/Address);
+ DWARFDebugFrame EHFrame(Triple::ArchType(ObjF->getArch()), /*IsEH=*/true,
+ /*EHFrameAddress=*/Address);
EHFrame.parse(DE);
for (const auto &Entry : EHFrame) {
diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
index 645ec2d7e04b..93254717e921 100644
--- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -22,12 +22,13 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
@@ -43,6 +44,7 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
@@ -139,10 +141,10 @@ struct DynRegionInfo {
template<typename ELFT>
class ELFDumper : public ObjDumper {
public:
- ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer);
+ ELFDumper(const object::ELFObjectFile<ELFT> *ObjF, ScopedPrinter &Writer);
void printFileHeaders() override;
- void printSections() override;
+ void printSectionHeaders() override;
void printRelocations() override;
void printDynamicRelocations() override;
void printSymbols() override;
@@ -181,6 +183,7 @@ private:
TYPEDEF_ELF_TYPES(ELFT)
DynRegionInfo checkDRI(DynRegionInfo DRI) {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
if (DRI.Addr < Obj->base() ||
(const uint8_t *)DRI.Addr + DRI.Size > Obj->base() + Obj->getBufSize())
error(llvm::object::object_error::parse_failed);
@@ -188,11 +191,11 @@ private:
}
DynRegionInfo createDRIFrom(const Elf_Phdr *P, uintX_t EntSize) {
- return checkDRI({Obj->base() + P->p_offset, P->p_filesz, EntSize});
+ return checkDRI({ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize});
}
DynRegionInfo createDRIFrom(const Elf_Shdr *S) {
- return checkDRI({Obj->base() + S->sh_offset, S->sh_size, S->sh_entsize});
+ return checkDRI({ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize});
}
void parseDynamicTable(ArrayRef<const Elf_Phdr *> LoadSegments);
@@ -206,7 +209,7 @@ private:
void LoadVersionNeeds(const Elf_Shdr *ec) const;
void LoadVersionDefs(const Elf_Shdr *sec) const;
- const ELFO *Obj;
+ const object::ELFObjectFile<ELFT> *ObjF;
DynRegionInfo DynRelRegion;
DynRegionInfo DynRelaRegion;
DynRegionInfo DynRelrRegion;
@@ -289,6 +292,7 @@ void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) const {
StringRef StrTable, SymtabName;
size_t Entries = 0;
Elf_Sym_Range Syms(nullptr, nullptr);
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
if (IsDynamic) {
StrTable = DynamicStringTable;
Syms = dynamic_symbols();
@@ -323,7 +327,7 @@ public:
virtual void printFileHeaders(const ELFFile<ELFT> *Obj) = 0;
virtual void printGroupSections(const ELFFile<ELFT> *Obj) = 0;
virtual void printRelocations(const ELFFile<ELFT> *Obj) = 0;
- virtual void printSections(const ELFFile<ELFT> *Obj) = 0;
+ virtual void printSectionHeaders(const ELFFile<ELFT> *Obj) = 0;
virtual void printSymbols(const ELFFile<ELFT> *Obj) = 0;
virtual void printDynamicSymbols(const ELFFile<ELFT> *Obj) = 0;
virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
@@ -358,7 +362,7 @@ public:
void printFileHeaders(const ELFO *Obj) override;
void printGroupSections(const ELFFile<ELFT> *Obj) override;
void printRelocations(const ELFO *Obj) override;
- void printSections(const ELFO *Obj) override;
+ void printSectionHeaders(const ELFO *Obj) override;
void printSymbols(const ELFO *Obj) override;
void printDynamicSymbols(const ELFO *Obj) override;
void printDynamicRelocations(const ELFO *Obj) override;
@@ -390,6 +394,33 @@ private:
return to_hexString(Value, false);
}
+ template <typename T, typename TEnum>
+ std::string printFlags(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues,
+ TEnum EnumMask1 = {}, TEnum EnumMask2 = {},
+ TEnum EnumMask3 = {}) {
+ std::string Str;
+ for (const auto &Flag : EnumValues) {
+ if (Flag.Value == 0)
+ continue;
+
+ TEnum EnumMask{};
+ if (Flag.Value & EnumMask1)
+ EnumMask = EnumMask1;
+ else if (Flag.Value & EnumMask2)
+ EnumMask = EnumMask2;
+ else if (Flag.Value & EnumMask3)
+ EnumMask = EnumMask3;
+ bool IsEnum = (Flag.Value & EnumMask) != 0;
+ if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
+ (IsEnum && (Value & EnumMask) == Flag.Value)) {
+ if (!Str.empty())
+ Str += ", ";
+ Str += Flag.AltName;
+ }
+ }
+ return Str;
+ }
+
formatted_raw_ostream &printField(struct Field F) {
if (F.Column != 0)
OS.PadToColumn(F.Column);
@@ -424,7 +455,7 @@ public:
void printGroupSections(const ELFFile<ELFT> *Obj) override;
void printRelocations(const ELFO *Obj) override;
void printRelocations(const Elf_Shdr *Sec, const ELFO *Obj);
- void printSections(const ELFO *Obj) override;
+ void printSectionHeaders(const ELFO *Obj) override;
void printSymbols(const ELFO *Obj) override;
void printDynamicSymbols(const ELFO *Obj) override;
void printDynamicRelocations(const ELFO *Obj) override;
@@ -451,7 +482,7 @@ private:
namespace llvm {
template <class ELFT>
-static std::error_code createELFDumper(const ELFFile<ELFT> *Obj,
+static std::error_code createELFDumper(const ELFObjectFile<ELFT> *Obj,
ScopedPrinter &Writer,
std::unique_ptr<ObjDumper> &Result) {
Result.reset(new ELFDumper<ELFT>(Obj, Writer));
@@ -463,19 +494,19 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
std::unique_ptr<ObjDumper> &Result) {
// Little-endian 32-bit
if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
- return createELFDumper(ELFObj->getELFFile(), Writer, Result);
+ return createELFDumper(ELFObj, Writer, Result);
// Big-endian 32-bit
if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
- return createELFDumper(ELFObj->getELFFile(), Writer, Result);
+ return createELFDumper(ELFObj, Writer, Result);
// Little-endian 64-bit
if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
- return createELFDumper(ELFObj->getELFFile(), Writer, Result);
+ return createELFDumper(ELFObj, Writer, Result);
// Big-endian 64-bit
if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
- return createELFDumper(ELFObj->getELFFile(), Writer, Result);
+ return createELFDumper(ELFObj, Writer, Result);
return readobj_error::unsupported_obj_file_format;
}
@@ -488,7 +519,7 @@ template <class ELFT>
void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
unsigned vn_size = sec->sh_size; // Size of section in bytes
unsigned vn_count = sec->sh_info; // Number of Verneed entries
- const char *sec_start = (const char *)Obj->base() + sec->sh_offset;
+ const char *sec_start = (const char *)ObjF->getELFFile()->base() + sec->sh_offset;
const char *sec_end = sec_start + vn_size;
// The first Verneed entry is at the start of the section.
const char *p = sec_start;
@@ -522,7 +553,7 @@ template <class ELFT>
void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
unsigned vd_size = sec->sh_size; // Size of section in bytes
unsigned vd_count = sec->sh_info; // Number of Verdef entries
- const char *sec_start = (const char *)Obj->base() + sec->sh_offset;
+ const char *sec_start = (const char *)ObjF->getELFFile()->base() + sec->sh_offset;
const char *sec_end = sec_start + vd_size;
// The first Verdef entry is at the start of the section.
const char *p = sec_start;
@@ -547,7 +578,7 @@ template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() const {
return;
// Has the VersionMap already been loaded?
- if (VersionMap.size() > 0)
+ if (!VersionMap.empty())
return;
// The first two version indexes are reserved.
@@ -611,9 +642,12 @@ static void printVersionDefinitionSection(ELFDumper<ELFT> *Dumper,
// is determined by DT_VERDEFNUM tag.
unsigned VerDefsNum = 0;
for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
- if (Dyn.d_tag == DT_VERDEFNUM)
+ if (Dyn.d_tag == DT_VERDEFNUM) {
VerDefsNum = Dyn.d_un.d_val;
+ break;
+ }
}
+
const uint8_t *SecStartAddress =
(const uint8_t *)Obj->base() + Sec->sh_offset;
const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
@@ -664,9 +698,12 @@ static void printVersionDependencySection(ELFDumper<ELFT> *Dumper,
return;
unsigned VerNeedNum = 0;
- for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table())
- if (Dyn.d_tag == DT_VERNEEDNUM)
+ for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
+ if (Dyn.d_tag == DT_VERNEEDNUM) {
VerNeedNum = Dyn.d_un.d_val;
+ break;
+ }
+ }
const uint8_t *SecData = (const uint8_t *)Obj->base() + Sec->sh_offset;
const typename ELFO::Elf_Shdr *StrTab =
@@ -700,13 +737,13 @@ static void printVersionDependencySection(ELFDumper<ELFT> *Dumper,
template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
// Dump version symbol section.
- printVersionSymbolSection(this, Obj, dot_gnu_version_sec, W);
+ printVersionSymbolSection(this, ObjF->getELFFile(), dot_gnu_version_sec, W);
// Dump version definition section.
- printVersionDefinitionSection(this, Obj, dot_gnu_version_d_sec, W);
+ printVersionDefinitionSection(this, ObjF->getELFFile(), dot_gnu_version_d_sec, W);
// Dump version dependency section.
- printVersionDependencySection(this, Obj, dot_gnu_version_r_sec, W);
+ printVersionDependencySection(this, ObjF->getELFFile(), dot_gnu_version_r_sec, W);
}
template <typename ELFT>
@@ -727,7 +764,7 @@ StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
// Get the corresponding version index entry
const Elf_Versym *vs = unwrapOrError(
- Obj->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index));
+ ObjF->getELFFile()->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index));
size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
// Special markers for unversioned symbols.
@@ -760,6 +797,7 @@ StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
template <typename ELFT>
StringRef ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
if (Index >= Syms.size())
@@ -780,8 +818,10 @@ std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
bool IsDefault;
StringRef Version = getSymbolVersion(StrTable, &*Symbol, IsDefault);
- FullSymbolName += (IsDefault ? "@@" : "@");
- FullSymbolName += Version;
+ if (!Version.empty()) {
+ FullSymbolName += (IsDefault ? "@@" : "@");
+ FullSymbolName += Version;
+ }
return FullSymbolName;
}
@@ -807,6 +847,7 @@ void ELFDumper<ELFT>::getSectionNameIndex(const Elf_Sym *Symbol,
if (SectionIndex == SHN_XINDEX)
SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
Symbol, FirstSym, ShndxTable));
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
const typename ELFT::Shdr *Sec =
unwrapOrError(Obj->getSection(SectionIndex));
SectionName = unwrapOrError(Obj->getSectionName(Sec));
@@ -1167,6 +1208,7 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
switch (Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX);
}
+ break;
case ELF::EM_MIPS:
case ELF::EM_MIPS_RS3_LE:
switch (Type) {
@@ -1175,6 +1217,7 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_OPTIONS);
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS);
}
+ break;
}
switch (Type) {
@@ -1221,7 +1264,7 @@ static std::string getElfPtType(unsigned Arch, unsigned Type) {
case ELF::EM_ARM:
if (Type == ELF::PT_ARM_EXIDX)
return "EXIDX";
- return "";
+ break;
case ELF::EM_MIPS:
case ELF::EM_MIPS_RS3_LE:
switch (Type) {
@@ -1234,7 +1277,7 @@ static std::string getElfPtType(unsigned Arch, unsigned Type) {
case PT_MIPS_ABIFLAGS:
return "ABIFLAGS";
}
- return "";
+ break;
}
}
return std::string("<unknown>: ") + to_string(format_hex(Type, 1));
@@ -1247,49 +1290,49 @@ static const EnumEntry<unsigned> ElfSegmentFlags[] = {
};
static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_NOREORDER),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_PIC),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_CPIC),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI2),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_32BITMODE),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_FP64),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_NAN2008),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI_O32),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI_O64),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI_EABI32),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI_EABI64),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_3900),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4010),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4100),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4650),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4120),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4111),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_SB1),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_OCTEON),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_XLR),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_OCTEON2),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_OCTEON3),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_5400),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_5900),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_5500),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_9000),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_LS2E),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_LS2F),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_LS3A),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MICROMIPS),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_ASE_M16),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_ASE_MDMX),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_1),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_2),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_3),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_4),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_5),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_32),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_64),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_32R2),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_64R2),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_32R6),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_64R6)
+ ENUM_ENT(EF_MIPS_NOREORDER, "noreorder"),
+ ENUM_ENT(EF_MIPS_PIC, "pic"),
+ ENUM_ENT(EF_MIPS_CPIC, "cpic"),
+ ENUM_ENT(EF_MIPS_ABI2, "abi2"),
+ ENUM_ENT(EF_MIPS_32BITMODE, "32bitmode"),
+ ENUM_ENT(EF_MIPS_FP64, "fp64"),
+ ENUM_ENT(EF_MIPS_NAN2008, "nan2008"),
+ ENUM_ENT(EF_MIPS_ABI_O32, "o32"),
+ ENUM_ENT(EF_MIPS_ABI_O64, "o64"),
+ ENUM_ENT(EF_MIPS_ABI_EABI32, "eabi32"),
+ ENUM_ENT(EF_MIPS_ABI_EABI64, "eabi64"),
+ ENUM_ENT(EF_MIPS_MACH_3900, "3900"),
+ ENUM_ENT(EF_MIPS_MACH_4010, "4010"),
+ ENUM_ENT(EF_MIPS_MACH_4100, "4100"),
+ ENUM_ENT(EF_MIPS_MACH_4650, "4650"),
+ ENUM_ENT(EF_MIPS_MACH_4120, "4120"),
+ ENUM_ENT(EF_MIPS_MACH_4111, "4111"),
+ ENUM_ENT(EF_MIPS_MACH_SB1, "sb1"),
+ ENUM_ENT(EF_MIPS_MACH_OCTEON, "octeon"),
+ ENUM_ENT(EF_MIPS_MACH_XLR, "xlr"),
+ ENUM_ENT(EF_MIPS_MACH_OCTEON2, "octeon2"),
+ ENUM_ENT(EF_MIPS_MACH_OCTEON3, "octeon3"),
+ ENUM_ENT(EF_MIPS_MACH_5400, "5400"),
+ ENUM_ENT(EF_MIPS_MACH_5900, "5900"),
+ ENUM_ENT(EF_MIPS_MACH_5500, "5500"),
+ ENUM_ENT(EF_MIPS_MACH_9000, "9000"),
+ ENUM_ENT(EF_MIPS_MACH_LS2E, "loongson-2e"),
+ ENUM_ENT(EF_MIPS_MACH_LS2F, "loongson-2f"),
+ ENUM_ENT(EF_MIPS_MACH_LS3A, "loongson-3a"),
+ ENUM_ENT(EF_MIPS_MICROMIPS, "micromips"),
+ ENUM_ENT(EF_MIPS_ARCH_ASE_M16, "mips16"),
+ ENUM_ENT(EF_MIPS_ARCH_ASE_MDMX, "mdmx"),
+ ENUM_ENT(EF_MIPS_ARCH_1, "mips1"),
+ ENUM_ENT(EF_MIPS_ARCH_2, "mips2"),
+ ENUM_ENT(EF_MIPS_ARCH_3, "mips3"),
+ ENUM_ENT(EF_MIPS_ARCH_4, "mips4"),
+ ENUM_ENT(EF_MIPS_ARCH_5, "mips5"),
+ ENUM_ENT(EF_MIPS_ARCH_32, "mips32"),
+ ENUM_ENT(EF_MIPS_ARCH_64, "mips64"),
+ ENUM_ENT(EF_MIPS_ARCH_32R2, "mips32r2"),
+ ENUM_ENT(EF_MIPS_ARCH_64R2, "mips64r2"),
+ ENUM_ENT(EF_MIPS_ARCH_32R6, "mips32r6"),
+ ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
@@ -1325,15 +1368,17 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK)
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
};
static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
- LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_RVC),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_FLOAT_ABI_SINGLE),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_FLOAT_ABI_DOUBLE),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_FLOAT_ABI_QUAD),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_RVE)
+ ENUM_ENT(EF_RISCV_RVC, "RVC"),
+ ENUM_ENT(EF_RISCV_FLOAT_ABI_SINGLE, "single-float ABI"),
+ ENUM_ENT(EF_RISCV_FLOAT_ABI_DOUBLE, "double-float ABI"),
+ ENUM_ENT(EF_RISCV_FLOAT_ABI_QUAD, "quad-float ABI"),
+ ENUM_ENT(EF_RISCV_RVE, "RVE")
};
static const EnumEntry<unsigned> ElfSymOtherFlags[] = {
@@ -1375,9 +1420,11 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
}
template <typename ELFT>
-ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer)
- : ObjDumper(Writer), Obj(Obj) {
+ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
+ ScopedPrinter &Writer)
+ : ObjDumper(Writer), ObjF(ObjF) {
SmallVector<const Elf_Phdr *, 4> LoadSegments;
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
if (Phdr.p_type == ELF::PT_DYNAMIC) {
DynamicTable = createDRIFrom(&Phdr, sizeof(Elf_Dyn));
@@ -1423,7 +1470,7 @@ ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer)
break;
case ELF::SHT_LLVM_CALL_GRAPH_PROFILE:
if (DotCGProfileSec != nullptr)
- reportError("Multiple .note.llvm.cgprofile");
+ reportError("Multiple .llvm.call-graph-profile");
DotCGProfileSec = &Sec;
break;
case ELF::SHT_LLVM_ADDRSIG:
@@ -1446,19 +1493,10 @@ template <typename ELFT>
void ELFDumper<ELFT>::parseDynamicTable(
ArrayRef<const Elf_Phdr *> LoadSegments) {
auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
- const Elf_Phdr *const *I =
- std::upper_bound(LoadSegments.begin(), LoadSegments.end(), VAddr,
- [](uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) {
- return VAddr < Phdr->p_vaddr;
- });
- if (I == LoadSegments.begin())
- report_fatal_error("Virtual address is not in any segment");
- --I;
- const Elf_Phdr &Phdr = **I;
- uint64_t Delta = VAddr - Phdr.p_vaddr;
- if (Delta >= Phdr.p_filesz)
- report_fatal_error("Virtual address is not in any segment");
- return Obj->base() + Phdr.p_offset + Delta;
+ auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr);
+ if (!MappedAddrOrError)
+ report_fatal_error(MappedAddrOrError.takeError());
+ return MappedAddrOrError.get();
};
uint64_t SONameOffset = 0;
@@ -1557,51 +1595,51 @@ typename ELFDumper<ELFT>::Elf_Relr_Range ELFDumper<ELFT>::dyn_relrs() const {
template<class ELFT>
void ELFDumper<ELFT>::printFileHeaders() {
- ELFDumperStyle->printFileHeaders(Obj);
+ ELFDumperStyle->printFileHeaders(ObjF->getELFFile());
}
template<class ELFT>
-void ELFDumper<ELFT>::printSections() {
- ELFDumperStyle->printSections(Obj);
+void ELFDumper<ELFT>::printSectionHeaders() {
+ ELFDumperStyle->printSectionHeaders(ObjF->getELFFile());
}
template<class ELFT>
void ELFDumper<ELFT>::printRelocations() {
- ELFDumperStyle->printRelocations(Obj);
+ ELFDumperStyle->printRelocations(ObjF->getELFFile());
}
template <class ELFT> void ELFDumper<ELFT>::printProgramHeaders() {
- ELFDumperStyle->printProgramHeaders(Obj);
+ ELFDumperStyle->printProgramHeaders(ObjF->getELFFile());
}
template <class ELFT> void ELFDumper<ELFT>::printDynamicRelocations() {
- ELFDumperStyle->printDynamicRelocations(Obj);
+ ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile());
}
template<class ELFT>
void ELFDumper<ELFT>::printSymbols() {
- ELFDumperStyle->printSymbols(Obj);
+ ELFDumperStyle->printSymbols(ObjF->getELFFile());
}
template<class ELFT>
void ELFDumper<ELFT>::printDynamicSymbols() {
- ELFDumperStyle->printDynamicSymbols(Obj);
+ ELFDumperStyle->printDynamicSymbols(ObjF->getELFFile());
}
template <class ELFT> void ELFDumper<ELFT>::printHashHistogram() {
- ELFDumperStyle->printHashHistogram(Obj);
+ ELFDumperStyle->printHashHistogram(ObjF->getELFFile());
}
template <class ELFT> void ELFDumper<ELFT>::printCGProfile() {
- ELFDumperStyle->printCGProfile(Obj);
+ ELFDumperStyle->printCGProfile(ObjF->getELFFile());
}
template <class ELFT> void ELFDumper<ELFT>::printNotes() {
- ELFDumperStyle->printNotes(Obj);
+ ELFDumperStyle->printNotes(ObjF->getELFFile());
}
template <class ELFT> void ELFDumper<ELFT>::printELFLinkerOptions() {
- ELFDumperStyle->printELFLinkerOptions(Obj);
+ ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile());
}
static const char *getTypeString(unsigned Arch, uint64_t Type) {
@@ -1610,29 +1648,32 @@ static const char *getTypeString(unsigned Arch, uint64_t Type) {
case EM_HEXAGON:
switch (Type) {
#define HEXAGON_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
+ case DT_##name: \
+ return #name;
#include "llvm/BinaryFormat/DynamicTags.def"
#undef HEXAGON_DYNAMIC_TAG
}
+ break;
case EM_MIPS:
switch (Type) {
#define MIPS_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
+ case DT_##name: \
+ return #name;
#include "llvm/BinaryFormat/DynamicTags.def"
#undef MIPS_DYNAMIC_TAG
}
+ break;
- case EM_PPC64:
- switch(Type) {
+ case EM_PPC64:
+ switch(Type) {
#define PPC64_DYNAMIC_TAG(name, value) \
case DT_##name: \
return #name;
#include "llvm/BinaryFormat/DynamicTags.def"
#undef PPC64_DYNAMIC_TAG
}
+ break;
}
#undef DYNAMIC_TAG
switch (Type) {
@@ -1842,9 +1883,9 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
template<class ELFT>
void ELFDumper<ELFT>::printUnwindInfo() {
- const unsigned Machine = Obj->getHeader()->e_machine;
+ const unsigned Machine = ObjF->getELFFile()->getHeader()->e_machine;
if (Machine == EM_386 || Machine == EM_X86_64) {
- DwarfCFIEH::PrinterContext<ELFT> Ctx(W, Obj);
+ DwarfCFIEH::PrinterContext<ELFT> Ctx(W, ObjF);
return Ctx.printUnwindInformation();
}
W.startLine() << "UnwindInfo not implemented.\n";
@@ -1853,6 +1894,7 @@ void ELFDumper<ELFT>::printUnwindInfo() {
namespace {
template <> void ELFDumper<ELF32LE>::printUnwindInfo() {
+ const ELFFile<ELF32LE> *Obj = ObjF->getELFFile();
const unsigned Machine = Obj->getHeader()->e_machine;
if (Machine == EM_ARM) {
ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, DotSymtabSec);
@@ -1895,7 +1937,7 @@ void ELFDumper<ELFT>::printDynamicTable() {
uintX_t Tag = Entry.getTag();
++I;
W.startLine() << " " << format_hex(Tag, Is64 ? 18 : 10, opts::Output != opts::GNU) << " "
- << format("%-21s", getTypeString(Obj->getHeader()->e_machine, Tag));
+ << format("%-21s", getTypeString(ObjF->getELFFile()->getHeader()->e_machine, Tag));
printValue(Tag, Entry.getVal());
OS << "\n";
}
@@ -1962,6 +2004,7 @@ void ELFDumper<ELFT>::printAttributes() {
namespace {
template <> void ELFDumper<ELF32LE>::printAttributes() {
+ const ELFFile<ELF32LE> *Obj = ObjF->getELFFile();
if (Obj->getHeader()->e_machine != EM_ARM) {
W.startLine() << "Attributes not implemented.\n";
return;
@@ -2247,6 +2290,7 @@ MipsGOTParser<ELFT>::getPltSym(const Entry *E) const {
}
template <class ELFT> void ELFDumper<ELFT>::printMipsPLTGOT() {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
if (Obj->getHeader()->e_machine != EM_MIPS)
reportError("MIPS PLT GOT is available for MIPS targets only");
@@ -2331,6 +2375,7 @@ static int getMipsRegisterSize(uint8_t Flag) {
}
template <class ELFT> void ELFDumper<ELFT>::printMipsABIFlags() {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.abiflags");
if (!Shdr) {
W.startLine() << "There is no .MIPS.abiflags section in the file.\n";
@@ -2376,6 +2421,7 @@ static void printMipsReginfoData(ScopedPrinter &W,
}
template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
const Elf_Shdr *Shdr = findSectionByName(*Obj, ".reginfo");
if (!Shdr) {
W.startLine() << "There is no .reginfo section in the file.\n";
@@ -2393,6 +2439,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
}
template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.options");
if (!Shdr) {
W.startLine() << "There is no .MIPS.options section in the file.\n";
@@ -2422,6 +2469,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
}
template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
const Elf_Shdr *StackMapSection = nullptr;
for (const auto &Sec : unwrapOrError(Obj->sections())) {
StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
@@ -2442,11 +2490,11 @@ template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
}
template <class ELFT> void ELFDumper<ELFT>::printGroupSections() {
- ELFDumperStyle->printGroupSections(Obj);
+ ELFDumperStyle->printGroupSections(ObjF->getELFFile());
}
template <class ELFT> void ELFDumper<ELFT>::printAddrsig() {
- ELFDumperStyle->printAddrsig(Obj);
+ ELFDumperStyle->printAddrsig(ObjF->getELFFile());
}
static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
@@ -2517,7 +2565,17 @@ template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
printFields(OS, "Start of program headers:", Str);
Str = to_string(e->e_shoff) + " (bytes into file)";
printFields(OS, "Start of section headers:", Str);
+ std::string ElfFlags;
+ if (e->e_machine == EM_MIPS)
+ ElfFlags =
+ printFlags(e->e_flags, makeArrayRef(ElfHeaderMipsFlags),
+ unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
+ unsigned(ELF::EF_MIPS_MACH));
+ else if (e->e_machine == EM_RISCV)
+ ElfFlags = printFlags(e->e_flags, makeArrayRef(ElfHeaderRISCVFlags));
Str = "0x" + to_hexString(e->e_flags);
+ if (!ElfFlags.empty())
+ Str = Str + ", " + ElfFlags;
printFields(OS, "Flags:", Str);
Str = to_string(e->e_ehsize) + " (bytes)";
printFields(OS, "Size of this header:", Str);
@@ -2791,11 +2849,13 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
case SHT_ARM_OVERLAYSECTION:
return "ARM_OVERLAYSECTION";
}
+ break;
case EM_X86_64:
switch (Type) {
case SHT_X86_64_UNWIND:
return "X86_64_UNWIND";
}
+ break;
case EM_MIPS:
case EM_MIPS_RS3_LE:
switch (Type) {
@@ -2808,6 +2868,7 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
case SHT_MIPS_DWARF:
return "SHT_MIPS_DWARF";
}
+ break;
}
switch (Type) {
case SHT_NULL:
@@ -2872,7 +2933,8 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
return "";
}
-template <class ELFT> void GNUStyle<ELFT>::printSections(const ELFO *Obj) {
+template <class ELFT>
+void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
size_t SectionIndex = 0;
std::string Number, Type, Size, Address, Offset, Flags, Link, Info, EntrySize,
Alignment;
@@ -3583,7 +3645,7 @@ static std::string getFreeBSDNoteTypeName(const uint32_t NT) {
return OS.str();
}
-static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
+static std::string getAMDNoteTypeName(const uint32_t NT) {
static const struct {
uint32_t ID;
const char *Name;
@@ -3606,41 +3668,52 @@ static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
return OS.str();
}
+static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
+ if (NT == ELF::NT_AMDGPU_METADATA)
+ return std::string("NT_AMDGPU_METADATA (AMDGPU Metadata)");
+
+ std::string string;
+ raw_string_ostream OS(string);
+ OS << format("Unknown note type (0x%08x)", NT);
+ return OS.str();
+}
+
template <typename ELFT>
-static void printGNUProperty(raw_ostream &OS, uint32_t Type, uint32_t DataSize,
- ArrayRef<uint8_t> Data) {
+static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
+ ArrayRef<uint8_t> Data) {
+ std::string str;
+ raw_string_ostream OS(str);
switch (Type) {
default:
- OS << format(" <application-specific type 0x%x>\n", Type);
- return;
+ OS << format("<application-specific type 0x%x>", Type);
+ return OS.str();
case GNU_PROPERTY_STACK_SIZE: {
- OS << " stack size: ";
+ OS << "stack size: ";
if (DataSize == sizeof(typename ELFT::uint))
- OS << format("0x%llx\n",
- (uint64_t)(*(const typename ELFT::Addr *)Data.data()));
+ OS << formatv("{0:x}",
+ (uint64_t)(*(const typename ELFT::Addr *)Data.data()));
else
- OS << format("<corrupt length: 0x%x>\n", DataSize);
- break;
+ OS << format("<corrupt length: 0x%x>", DataSize);
+ return OS.str();
}
case GNU_PROPERTY_NO_COPY_ON_PROTECTED:
- OS << " no copy on protected";
+ OS << "no copy on protected";
if (DataSize)
OS << format(" <corrupt length: 0x%x>", DataSize);
- OS << "\n";
- break;
+ return OS.str();
case GNU_PROPERTY_X86_FEATURE_1_AND:
- OS << " X86 features: ";
+ OS << "X86 features: ";
if (DataSize != 4 && DataSize != 8) {
- OS << format("<corrupt length: 0x%x>\n", DataSize);
- break;
+ OS << format("<corrupt length: 0x%x>", DataSize);
+ return OS.str();
}
uint64_t CFProtection =
(DataSize == 4)
? support::endian::read32<ELFT::TargetEndianness>(Data.data())
: support::endian::read64<ELFT::TargetEndianness>(Data.data());
if (CFProtection == 0) {
- OS << "none\n";
- break;
+ OS << "none";
+ return OS.str();
}
if (CFProtection & GNU_PROPERTY_X86_FEATURE_1_IBT) {
OS << "IBT";
@@ -3656,105 +3729,177 @@ static void printGNUProperty(raw_ostream &OS, uint32_t Type, uint32_t DataSize,
}
if (CFProtection)
OS << format("<unknown flags: 0x%llx>", CFProtection);
- OS << "\n";
- break;
+ return OS.str();
}
}
template <typename ELFT>
-static void printGNUNote(raw_ostream &OS, uint32_t NoteType,
- ArrayRef<typename ELFT::Word> Words, size_t Size) {
+static SmallVector<std::string, 4>
+getGNUPropertyList(ArrayRef<uint8_t> Arr) {
using Elf_Word = typename ELFT::Word;
+ SmallVector<std::string, 4> Properties;
+ while (Arr.size() >= 8) {
+ uint32_t Type = *reinterpret_cast<const Elf_Word *>(Arr.data());
+ uint32_t DataSize = *reinterpret_cast<const Elf_Word *>(Arr.data() + 4);
+ Arr = Arr.drop_front(8);
+
+ // Take padding size into account if present.
+ uint64_t PaddedSize = alignTo(DataSize, sizeof(typename ELFT::uint));
+ std::string str;
+ raw_string_ostream OS(str);
+ if (Arr.size() < PaddedSize) {
+ OS << format("<corrupt type (0x%x) datasz: 0x%x>", Type, DataSize);
+ Properties.push_back(OS.str());
+ break;
+ }
+ Properties.push_back(
+ getGNUProperty<ELFT>(Type, DataSize, Arr.take_front(PaddedSize)));
+ Arr = Arr.drop_front(PaddedSize);
+ }
+
+ if (!Arr.empty())
+ Properties.push_back("<corrupted GNU_PROPERTY_TYPE_0>");
+
+ return Properties;
+}
+
+struct GNUAbiTag {
+ std::string OSName;
+ std::string ABI;
+ bool IsValid;
+};
+
+template <typename ELFT>
+static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
+ typedef typename ELFT::Word Elf_Word;
+
+ ArrayRef<Elf_Word> Words(reinterpret_cast<const Elf_Word*>(Desc.begin()),
+ reinterpret_cast<const Elf_Word*>(Desc.end()));
+
+ if (Words.size() < 4)
+ return {"", "", /*IsValid=*/false};
+
+ static const char *OSNames[] = {
+ "Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl",
+ };
+ StringRef OSName = "Unknown";
+ if (Words[0] < array_lengthof(OSNames))
+ OSName = OSNames[Words[0]];
+ uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3];
+ std::string str;
+ raw_string_ostream ABI(str);
+ ABI << Major << "." << Minor << "." << Patch;
+ return {OSName, ABI.str(), /*IsValid=*/true};
+}
+
+static std::string getGNUBuildId(ArrayRef<uint8_t> Desc) {
+ std::string str;
+ raw_string_ostream OS(str);
+ for (const auto &B : Desc)
+ OS << format_hex_no_prefix(B, 2);
+ return OS.str();
+}
+
+static StringRef getGNUGoldVersion(ArrayRef<uint8_t> Desc) {
+ return StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+}
+
+template <typename ELFT>
+static void printGNUNote(raw_ostream &OS, uint32_t NoteType,
+ ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
return;
case ELF::NT_GNU_ABI_TAG: {
- static const char *OSNames[] = {
- "Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl",
- };
-
- StringRef OSName = "Unknown";
- if (Words[0] < array_lengthof(OSNames))
- OSName = OSNames[Words[0]];
- uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3];
-
- if (Words.size() < 4)
+ const GNUAbiTag &AbiTag = getGNUAbiTag<ELFT>(Desc);
+ if (!AbiTag.IsValid)
OS << " <corrupt GNU_ABI_TAG>";
else
- OS << " OS: " << OSName << ", ABI: " << Major << "." << Minor << "."
- << Patch;
+ OS << " OS: " << AbiTag.OSName << ", ABI: " << AbiTag.ABI;
break;
}
case ELF::NT_GNU_BUILD_ID: {
- OS << " Build ID: ";
- ArrayRef<uint8_t> ID(reinterpret_cast<const uint8_t *>(Words.data()), Size);
- for (const auto &B : ID)
- OS << format_hex_no_prefix(B, 2);
+ OS << " Build ID: " << getGNUBuildId(Desc);
break;
}
case ELF::NT_GNU_GOLD_VERSION:
- OS << " Version: "
- << StringRef(reinterpret_cast<const char *>(Words.data()), Size);
+ OS << " Version: " << getGNUGoldVersion(Desc);
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
OS << " Properties:";
-
- ArrayRef<uint8_t> Arr(reinterpret_cast<const uint8_t *>(Words.data()),
- Size);
- while (Arr.size() >= 8) {
- uint32_t Type = *reinterpret_cast<const Elf_Word *>(Arr.data());
- uint32_t DataSize = *reinterpret_cast<const Elf_Word *>(Arr.data() + 4);
- Arr = Arr.drop_front(8);
-
- // Take padding size into account if present.
- uint64_t PaddedSize = alignTo(DataSize, sizeof(typename ELFT::uint));
- if (Arr.size() < PaddedSize) {
- OS << format(" <corrupt type (0x%x) datasz: 0x%x>\n", Type,
- DataSize);
- break;
- }
- printGNUProperty<ELFT>(OS, Type, DataSize, Arr.take_front(PaddedSize));
- Arr = Arr.drop_front(PaddedSize);
- }
-
- if (!Arr.empty())
- OS << " <corrupted GNU_PROPERTY_TYPE_0>";
+ for (const auto &Property : getGNUPropertyList<ELFT>(Desc))
+ OS << " " << Property << "\n";
break;
}
OS << '\n';
}
+struct AMDNote {
+ std::string Type;
+ std::string Value;
+};
+
template <typename ELFT>
-static void printAMDGPUNote(raw_ostream &OS, uint32_t NoteType,
- ArrayRef<typename ELFT::Word> Words, size_t Size) {
+static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
- return;
- case ELF::NT_AMD_AMDGPU_HSA_METADATA:
- OS << " HSA Metadata:\n"
- << StringRef(reinterpret_cast<const char *>(Words.data()), Size);
- break;
- case ELF::NT_AMD_AMDGPU_ISA:
- OS << " ISA Version:\n"
- << " "
- << StringRef(reinterpret_cast<const char *>(Words.data()), Size);
- break;
- case ELF::NT_AMD_AMDGPU_PAL_METADATA:
- const uint32_t *PALMetadataBegin = reinterpret_cast<const uint32_t *>(Words.data());
- const uint32_t *PALMetadataEnd = PALMetadataBegin + Size;
- std::vector<uint32_t> PALMetadata(PALMetadataBegin, PALMetadataEnd);
- std::string PALMetadataString;
- auto Error = AMDGPU::PALMD::toString(PALMetadata, PALMetadataString);
- OS << " PAL Metadata:\n";
- if (Error) {
- OS << " Invalid";
- return;
- }
- OS << PALMetadataString;
- break;
+ return {"", ""};
+ case ELF::NT_AMD_AMDGPU_HSA_METADATA:
+ return {"HSA Metadata",
+ std::string(reinterpret_cast<const char *>(Desc.data()),
+ Desc.size())};
+ case ELF::NT_AMD_AMDGPU_ISA:
+ return {"ISA Version",
+ std::string(reinterpret_cast<const char *>(Desc.data()),
+ Desc.size())};
+ case ELF::NT_AMD_AMDGPU_PAL_METADATA:
+ const uint32_t *PALMetadataBegin =
+ reinterpret_cast<const uint32_t *>(Desc.data());
+ const uint32_t *PALMetadataEnd = PALMetadataBegin + Desc.size();
+ std::vector<uint32_t> PALMetadata(PALMetadataBegin, PALMetadataEnd);
+ std::string PALMetadataString;
+ auto Error = AMDGPU::PALMD::toString(PALMetadata, PALMetadataString);
+ if (Error) {
+ return {"PAL Metadata", "Invalid"};
+ }
+ return {"PAL Metadata", PALMetadataString};
+ }
+}
+
+struct AMDGPUNote {
+ std::string Type;
+ std::string Value;
+};
+
+template <typename ELFT>
+static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
+ switch (NoteType) {
+ default:
+ return {"", ""};
+ case ELF::NT_AMDGPU_METADATA:
+ auto MsgPackString =
+ StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+ msgpack::Reader MsgPackReader(MsgPackString);
+ auto OptMsgPackNodeOrErr = msgpack::Node::read(MsgPackReader);
+ if (errorToBool(OptMsgPackNodeOrErr.takeError()))
+ return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+ auto &OptMsgPackNode = *OptMsgPackNodeOrErr;
+ if (!OptMsgPackNode)
+ return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+ auto &MsgPackNode = *OptMsgPackNode;
+
+ AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+ if (!Verifier.verify(*MsgPackNode))
+ return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << MsgPackNode;
+
+ return {"AMDGPU Metadata", StrOS.str()};
}
- OS.flush();
}
template <class ELFT>
@@ -3771,7 +3916,7 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
auto ProcessNote = [&](const Elf_Note &Note) {
StringRef Name = Note.getName();
- ArrayRef<Elf_Word> Descriptor = Note.getDesc();
+ ArrayRef<uint8_t> Descriptor = Note.getDesc();
Elf_Word Type = Note.getType();
OS << " " << Name << std::string(22 - Name.size(), ' ')
@@ -3779,12 +3924,19 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
if (Name == "GNU") {
OS << getGNUNoteTypeName(Type) << '\n';
- printGNUNote<ELFT>(OS, Type, Descriptor, Descriptor.size());
+ printGNUNote<ELFT>(OS, Type, Descriptor);
} else if (Name == "FreeBSD") {
OS << getFreeBSDNoteTypeName(Type) << '\n';
} else if (Name == "AMD") {
+ OS << getAMDNoteTypeName(Type) << '\n';
+ const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
+ if (!N.Type.empty())
+ OS << " " << N.Type << ":\n " << N.Value << '\n';
+ } else if (Name == "AMDGPU") {
OS << getAMDGPUNoteTypeName(Type) << '\n';
- printAMDGPUNote<ELFT>(OS, Type, Descriptor, Descriptor.size());
+ const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
+ if (!N.Type.empty())
+ OS << " " << N.Type << ":\n " << N.Value << '\n';
} else {
OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
}
@@ -4123,7 +4275,8 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
}
}
-template <class ELFT> void LLVMStyle<ELFT>::printSections(const ELFO *Obj) {
+template <class ELFT>
+void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
ListScope SectionsD(W, "Sections");
int SectionIndex = -1;
@@ -4379,7 +4532,7 @@ void LLVMStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
while (Cur != End) {
unsigned Size;
const char *Err;
- uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err);
+ uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err);
if (Err)
reportError(Err);
W.printNumber("Sym", this->dumper()->getStaticSymbolName(SymIndex),
@@ -4388,9 +4541,103 @@ void LLVMStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
}
}
+template <typename ELFT>
+static void printGNUNoteLLVMStyle(uint32_t NoteType,
+ ArrayRef<uint8_t> Desc,
+ ScopedPrinter &W) {
+ switch (NoteType) {
+ default:
+ return;
+ case ELF::NT_GNU_ABI_TAG: {
+ const GNUAbiTag &AbiTag = getGNUAbiTag<ELFT>(Desc);
+ if (!AbiTag.IsValid) {
+ W.printString("ABI", "<corrupt GNU_ABI_TAG>");
+ } else {
+ W.printString("OS", AbiTag.OSName);
+ W.printString("ABI", AbiTag.ABI);
+ }
+ break;
+ }
+ case ELF::NT_GNU_BUILD_ID: {
+ W.printString("Build ID", getGNUBuildId(Desc));
+ break;
+ }
+ case ELF::NT_GNU_GOLD_VERSION:
+ W.printString("Version", getGNUGoldVersion(Desc));
+ break;
+ case ELF::NT_GNU_PROPERTY_TYPE_0:
+ ListScope D(W, "Property");
+ for (const auto &Property : getGNUPropertyList<ELFT>(Desc))
+ W.printString(Property);
+ break;
+ }
+}
+
template <class ELFT>
void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
- W.startLine() << "printNotes not implemented!\n";
+ ListScope L(W, "Notes");
+ const Elf_Ehdr *e = Obj->getHeader();
+ bool IsCore = e->e_type == ELF::ET_CORE;
+
+ auto PrintHeader = [&](const typename ELFT::Off Offset,
+ const typename ELFT::Addr Size) {
+ W.printHex("Offset", Offset);
+ W.printHex("Size", Size);
+ };
+
+ auto ProcessNote = [&](const Elf_Note &Note) {
+ DictScope D2(W, "Note");
+ StringRef Name = Note.getName();
+ ArrayRef<uint8_t> Descriptor = Note.getDesc();
+ Elf_Word Type = Note.getType();
+
+ W.printString("Owner", Name);
+ W.printHex("Data size", Descriptor.size());
+ if (Name == "GNU") {
+ W.printString("Type", getGNUNoteTypeName(Type));
+ printGNUNoteLLVMStyle<ELFT>(Type, Descriptor, W);
+ } else if (Name == "FreeBSD") {
+ W.printString("Type", getFreeBSDNoteTypeName(Type));
+ } else if (Name == "AMD") {
+ W.printString("Type", getAMDNoteTypeName(Type));
+ const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
+ if (!N.Type.empty())
+ W.printString(N.Type, N.Value);
+ } else if (Name == "AMDGPU") {
+ W.printString("Type", getAMDGPUNoteTypeName(Type));
+ const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
+ if (!N.Type.empty())
+ W.printString(N.Type, N.Value);
+ } else {
+ W.getOStream() << "Unknown note type: (" << format_hex(Type, 10) << ')';
+ }
+ };
+
+ if (IsCore) {
+ for (const auto &P : unwrapOrError(Obj->program_headers())) {
+ if (P.p_type != PT_NOTE)
+ continue;
+ DictScope D(W, "NoteSection");
+ PrintHeader(P.p_offset, P.p_filesz);
+ Error Err = Error::success();
+ for (const auto &Note : Obj->notes(P, Err))
+ ProcessNote(Note);
+ if (Err)
+ error(std::move(Err));
+ }
+ } else {
+ for (const auto &S : unwrapOrError(Obj->sections())) {
+ if (S.sh_type != SHT_NOTE)
+ continue;
+ DictScope D(W, "NoteSection");
+ PrintHeader(S.sh_offset, S.sh_size);
+ Error Err = Error::success();
+ for (const auto &Note : Obj->notes(S, Err))
+ ProcessNote(Note);
+ if (Err)
+ error(std::move(Err));
+ }
+ }
}
template <class ELFT>
diff --git a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
index 69ef1556f78d..35e4cfcb6b10 100644
--- a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -32,7 +32,7 @@ public:
: ObjDumper(Writer), Obj(Obj) {}
void printFileHeaders() override;
- void printSections() override;
+ void printSectionHeaders() override;
void printRelocations() override;
void printSymbols() override;
void printDynamicSymbols() override;
@@ -59,7 +59,7 @@ private:
void printRelocation(const MachOObjectFile *Obj, const RelocationRef &Reloc);
- void printSections(const MachOObjectFile *Obj);
+ void printSectionHeaders(const MachOObjectFile *Obj);
const MachOObjectFile *Obj;
};
@@ -428,11 +428,9 @@ void MachODumper::printFileHeaders(const MachHeader &Header) {
W.printFlags("Flags", Header.flags, makeArrayRef(MachOHeaderFlags));
}
-void MachODumper::printSections() {
- return printSections(Obj);
-}
+void MachODumper::printSectionHeaders() { return printSectionHeaders(Obj); }
-void MachODumper::printSections(const MachOObjectFile *Obj) {
+void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) {
ListScope Group(W, "Sections");
int SectionIndex = -1;
diff --git a/contrib/llvm/tools/llvm-readobj/ObjDumper.h b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
index 8c3a7bec73be..13de563469ab 100644
--- a/contrib/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
@@ -33,7 +33,7 @@ public:
virtual ~ObjDumper();
virtual void printFileHeaders() = 0;
- virtual void printSections() = 0;
+ virtual void printSectionHeaders() = 0;
virtual void printRelocations() = 0;
virtual void printSymbols() = 0;
virtual void printDynamicSymbols() = 0;
diff --git a/contrib/llvm/tools/llvm-readobj/WasmDumper.cpp b/contrib/llvm/tools/llvm-readobj/WasmDumper.cpp
index ce224836225e..79d3db4e2d29 100644
--- a/contrib/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -23,28 +23,21 @@ using namespace object;
namespace {
static const EnumEntry<unsigned> WasmSymbolTypes[] = {
-#define ENUM_ENTRY(X) { #X, wasm::WASM_SYMBOL_TYPE_##X }
- ENUM_ENTRY(FUNCTION),
- ENUM_ENTRY(DATA),
- ENUM_ENTRY(GLOBAL),
- ENUM_ENTRY(SECTION),
+#define ENUM_ENTRY(X) \
+ { #X, wasm::WASM_SYMBOL_TYPE_##X }
+ ENUM_ENTRY(FUNCTION), ENUM_ENTRY(DATA), ENUM_ENTRY(GLOBAL),
+ ENUM_ENTRY(SECTION), ENUM_ENTRY(EVENT),
#undef ENUM_ENTRY
};
static const EnumEntry<uint32_t> WasmSectionTypes[] = {
-#define ENUM_ENTRY(X) { #X, wasm::WASM_SEC_##X }
- ENUM_ENTRY(CUSTOM),
- ENUM_ENTRY(TYPE),
- ENUM_ENTRY(IMPORT),
- ENUM_ENTRY(FUNCTION),
- ENUM_ENTRY(TABLE),
- ENUM_ENTRY(MEMORY),
- ENUM_ENTRY(GLOBAL),
- ENUM_ENTRY(EXPORT),
- ENUM_ENTRY(START),
- ENUM_ENTRY(ELEM),
- ENUM_ENTRY(CODE),
- ENUM_ENTRY(DATA),
+#define ENUM_ENTRY(X) \
+ { #X, wasm::WASM_SEC_##X }
+ ENUM_ENTRY(CUSTOM), ENUM_ENTRY(TYPE), ENUM_ENTRY(IMPORT),
+ ENUM_ENTRY(FUNCTION), ENUM_ENTRY(TABLE), ENUM_ENTRY(MEMORY),
+ ENUM_ENTRY(GLOBAL), ENUM_ENTRY(EVENT), ENUM_ENTRY(EXPORT),
+ ENUM_ENTRY(START), ENUM_ENTRY(ELEM), ENUM_ENTRY(CODE),
+ ENUM_ENTRY(DATA),
#undef ENUM_ENTRY
};
@@ -54,7 +47,7 @@ public:
: ObjDumper(Writer), Obj(Obj) {}
void printFileHeaders() override;
- void printSections() override;
+ void printSectionHeaders() override;
void printRelocations() override;
void printSymbols() override;
void printDynamicSymbols() override { llvm_unreachable("unimplemented"); }
@@ -108,7 +101,7 @@ void WasmDumper::printRelocation(const SectionRef &Section,
if (HasAddend)
W.printNumber("Addend", WasmReloc.Addend);
} else {
- raw_ostream& OS = W.startLine();
+ raw_ostream &OS = W.startLine();
OS << W.hex(Reloc.getOffset()) << " " << RelocTypeName << " ";
if (!SymName.empty())
OS << SymName;
@@ -154,7 +147,7 @@ void WasmDumper::printSymbols() {
printSymbol(Symbol);
}
-void WasmDumper::printSections() {
+void WasmDumper::printSectionHeaders() {
ListScope Group(W, "Sections");
for (const SectionRef &Section : Obj->sections()) {
const WasmSection &WasmSec = Obj->getWasmSection(Section);
@@ -169,7 +162,7 @@ void WasmDumper::printSections() {
const wasm::WasmLinkingData &LinkingData = Obj->linkingData();
if (!LinkingData.InitFunctions.empty()) {
ListScope Group(W, "InitFunctions");
- for (const wasm::WasmInitFunc &F: LinkingData.InitFunctions)
+ for (const wasm::WasmInitFunc &F : LinkingData.InitFunctions)
W.startLine() << F.Symbol << " (priority=" << F.Priority << ")\n";
}
}
@@ -177,7 +170,7 @@ void WasmDumper::printSections() {
case wasm::WASM_SEC_DATA: {
ListScope Group(W, "Segments");
for (const WasmSegment &Segment : Obj->dataSegments()) {
- const wasm::WasmDataSegment& Seg = Segment.Data;
+ const wasm::WasmDataSegment &Seg = Segment.Data;
DictScope Group(W, "Segment");
if (!Seg.Name.empty())
W.printString("Name", Seg.Name);
@@ -219,7 +212,7 @@ void WasmDumper::printSymbol(const SymbolRef &Sym) {
W.printHex("Flags", Symbol.Info.Flags);
}
-}
+} // namespace
namespace llvm {
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
index a7236c02b8ae..81ce7a590364 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -48,58 +48,72 @@ namespace opts {
cl::desc("<input object files>"),
cl::ZeroOrMore);
+ // -all, -a
+ cl::opt<bool>
+ All("all",
+ cl::desc("Equivalent to setting: --file-headers, --program-headers, "
+ "--section-headers, --symbols, --relocations, "
+ "--dynamic-table, --notes, --version-info, --unwind, "
+ "--section-groups and --elf-hash-histogram."));
+ cl::alias AllShort("a", cl::desc("Alias for --all"), cl::aliasopt(All));
+
+ // --headers -e
+ cl::opt<bool>
+ Headers("headers",
+ cl::desc("Equivalent to setting: --file-headers, --program-headers, "
+ "--section-headers"));
+ cl::alias HeadersShort("e", cl::desc("Alias for --headers"),
+ cl::aliasopt(Headers));
+
// -wide, -W
- cl::opt<bool> WideOutput("wide",
- cl::desc("Ignored for compatibility with GNU readelf"));
+ cl::opt<bool>
+ WideOutput("wide", cl::desc("Ignored for compatibility with GNU readelf"),
+ cl::Hidden);
cl::alias WideOutputShort("W",
cl::desc("Alias for --wide"),
cl::aliasopt(WideOutput));
- // -file-headers, -h
+ // -file-headers, -file-header, -h
cl::opt<bool> FileHeaders("file-headers",
cl::desc("Display file headers "));
- cl::alias FileHeadersShort("h",
- cl::desc("Alias for --file-headers"),
- cl::aliasopt(FileHeaders));
-
- // -sections, -s, -S
- // Note: In GNU readelf, -s means --symbols!
- cl::opt<bool> Sections("sections",
- cl::desc("Display all sections."));
- cl::alias SectionsShort("s",
- cl::desc("Alias for --sections"),
- cl::aliasopt(Sections));
- cl::alias SectionsShortUpper("S",
- cl::desc("Alias for --sections"),
- cl::aliasopt(Sections));
-
- // -section-relocations, -sr
+ cl::alias FileHeadersShort("h", cl::desc("Alias for --file-headers"),
+ cl::aliasopt(FileHeaders), cl::NotHidden);
+ cl::alias FileHeadersSingular("file-header",
+ cl::desc("Alias for --file-headers"),
+ cl::aliasopt(FileHeaders));
+
+ // -section-headers, -sections, -S
+ // Also -s in llvm-readobj mode.
+ cl::opt<bool> SectionHeaders("section-headers",
+ cl::desc("Display all section headers."));
+ cl::alias SectionsShortUpper("S", cl::desc("Alias for --section-headers"),
+ cl::aliasopt(SectionHeaders), cl::NotHidden);
+ cl::alias SectionHeadersAlias("sections",
+ cl::desc("Alias for --section-headers"),
+ cl::aliasopt(SectionHeaders), cl::NotHidden);
+
+ // -section-relocations
+ // Also -sr in llvm-readobj mode.
cl::opt<bool> SectionRelocations("section-relocations",
cl::desc("Display relocations for each section shown."));
- cl::alias SectionRelocationsShort("sr",
- cl::desc("Alias for --section-relocations"),
- cl::aliasopt(SectionRelocations));
- // -section-symbols, -st
+ // -section-symbols
+ // Also -st in llvm-readobj mode.
cl::opt<bool> SectionSymbols("section-symbols",
cl::desc("Display symbols for each section shown."));
- cl::alias SectionSymbolsShort("st",
- cl::desc("Alias for --section-symbols"),
- cl::aliasopt(SectionSymbols));
- // -section-data, -sd
+ // -section-data
+ // Also -sd in llvm-readobj mode.
cl::opt<bool> SectionData("section-data",
cl::desc("Display section data for each section shown."));
- cl::alias SectionDataShort("sd",
- cl::desc("Alias for --section-data"),
- cl::aliasopt(SectionData));
- // -relocations, -r
+ // -relocations, -relocs, -r
cl::opt<bool> Relocations("relocations",
cl::desc("Display the relocation entries in the file"));
- cl::alias RelocationsShort("r",
- cl::desc("Alias for --relocations"),
- cl::aliasopt(Relocations));
+ cl::alias RelocationsShort("r", cl::desc("Alias for --relocations"),
+ cl::aliasopt(Relocations), cl::NotHidden);
+ cl::alias RelocationsGNU("relocs", cl::desc("Alias for --relocations"),
+ cl::aliasopt(Relocations));
// -notes, -n
cl::opt<bool> Notes("notes", cl::desc("Display the ELF notes in the file"));
@@ -109,19 +123,19 @@ namespace opts {
cl::opt<bool> DynRelocs("dyn-relocations",
cl::desc("Display the dynamic relocation entries in the file"));
- // -symbols, -t
+ // -symbols
+ // Also -s in llvm-readelf mode, or -t in llvm-readobj mode.
cl::opt<bool> Symbols("symbols",
cl::desc("Display the symbol table"));
- cl::alias SymbolsShort("t",
- cl::desc("Alias for --symbols"),
- cl::aliasopt(Symbols));
+ cl::alias SymbolsGNU("syms", cl::desc("Alias for --symbols"),
+ cl::aliasopt(Symbols));
- // -dyn-symbols, -dt
+ // -dyn-symbols, -dyn-syms
+ // Also -dt in llvm-readobj mode.
cl::opt<bool> DynamicSymbols("dyn-symbols",
cl::desc("Display the dynamic symbol table"));
- cl::alias DynamicSymbolsShort("dt",
- cl::desc("Alias for --dyn-symbols"),
- cl::aliasopt(DynamicSymbols));
+ cl::alias DynSymsGNU("dyn-syms", cl::desc("Alias for --dyn-symbols"),
+ cl::aliasopt(DynamicSymbols));
// -unwind, -u
cl::opt<bool> UnwindInfo("unwind",
@@ -130,29 +144,33 @@ namespace opts {
cl::desc("Alias for --unwind"),
cl::aliasopt(UnwindInfo));
- // -dynamic-table
+ // -dynamic-table, -dynamic, -d
cl::opt<bool> DynamicTable("dynamic-table",
cl::desc("Display the ELF .dynamic section table"));
cl::alias DynamicTableShort("d", cl::desc("Alias for --dynamic-table"),
+ cl::aliasopt(DynamicTable), cl::NotHidden);
+ cl::alias DynamicTableAlias("dynamic", cl::desc("Alias for --dynamic-table"),
cl::aliasopt(DynamicTable));
// -needed-libs
cl::opt<bool> NeededLibraries("needed-libs",
cl::desc("Display the needed libraries"));
- // -program-headers
+ // -program-headers, -segments, -l
cl::opt<bool> ProgramHeaders("program-headers",
cl::desc("Display ELF program headers"));
cl::alias ProgramHeadersShort("l", cl::desc("Alias for --program-headers"),
- cl::aliasopt(ProgramHeaders));
+ cl::aliasopt(ProgramHeaders), cl::NotHidden);
+ cl::alias SegmentsAlias("segments", cl::desc("Alias for --program-headers"),
+ cl::aliasopt(ProgramHeaders));
- // -string-dump
+ // -string-dump, -p
cl::list<std::string> StringDump("string-dump", cl::desc("<number|name>"),
cl::ZeroOrMore);
cl::alias StringDumpShort("p", cl::desc("Alias for --string-dump"),
cl::aliasopt(StringDump));
- // -hex-dump
+ // -hex-dump, -x
cl::list<std::string> HexDump("hex-dump", cl::desc("<number|name>"),
cl::ZeroOrMore);
cl::alias HexDumpShort("x", cl::desc("Alias for --hex-dump"),
@@ -188,11 +206,9 @@ namespace opts {
"codeview-subsection-bytes",
cl::desc("Dump raw contents of codeview debug sections and records"));
- // -arm-attributes, -a
+ // -arm-attributes
cl::opt<bool> ARMAttributes("arm-attributes",
cl::desc("Display the ARM attributes section"));
- cl::alias ARMAttributesShort("a", cl::desc("Alias for --arm-attributes"),
- cl::aliasopt(ARMAttributes));
// -mips-plt-got
cl::opt<bool>
@@ -283,28 +299,40 @@ namespace opts {
PrintStackMap("stackmap",
cl::desc("Display contents of stackmap section"));
- // -version-info
+ // -version-info, -V
cl::opt<bool>
VersionInfo("version-info",
cl::desc("Display ELF version sections (if present)"));
cl::alias VersionInfoShort("V", cl::desc("Alias for -version-info"),
cl::aliasopt(VersionInfo));
+ // -elf-section-groups, -section-groups, -g
cl::opt<bool> SectionGroups("elf-section-groups",
cl::desc("Display ELF section group contents"));
+ cl::alias SectionGroupsAlias("section-groups",
+ cl::desc("Alias for -elf-sections-groups"),
+ cl::aliasopt(SectionGroups));
cl::alias SectionGroupsShort("g", cl::desc("Alias for -elf-sections-groups"),
cl::aliasopt(SectionGroups));
+
+ // -elf-hash-histogram, -histogram, -I
cl::opt<bool> HashHistogram(
"elf-hash-histogram",
cl::desc("Display bucket list histogram for hash sections"));
cl::alias HashHistogramShort("I", cl::desc("Alias for -elf-hash-histogram"),
cl::aliasopt(HashHistogram));
+ cl::alias HistogramAlias("histogram",
+ cl::desc("Alias for --elf-hash-histogram"),
+ cl::aliasopt(HashHistogram));
+ // -elf-cg-profile
cl::opt<bool> CGProfile("elf-cg-profile", cl::desc("Display callgraph profile section"));
- cl::opt<bool> Addrsig("elf-addrsig",
+ // -addrsig
+ cl::opt<bool> Addrsig("addrsig",
cl::desc("Display address-significance table"));
+ // -elf-output-style
cl::opt<OutputStyleTy>
Output("elf-output-style", cl::desc("Specify ELF dump style"),
cl::values(clEnumVal(LLVM, "LLVM default style"),
@@ -418,8 +446,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
if (opts::FileHeaders)
Dumper->printFileHeaders();
- if (opts::Sections)
- Dumper->printSections();
+ if (opts::SectionHeaders)
+ Dumper->printSectionHeaders();
if (opts::Relocations)
Dumper->printRelocations();
if (opts::DynRelocs)
@@ -492,6 +520,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
Dumper->printCOFFResources();
if (opts::COFFLoadConfig)
Dumper->printCOFFLoadConfig();
+ if (opts::Addrsig)
+ Dumper->printAddrsig();
if (opts::CodeView)
Dumper->printCodeViewDebugInfo();
if (opts::CodeViewMergedTypes)
@@ -586,21 +616,87 @@ static void dumpInput(StringRef File) {
reportError(File, readobj_error::unrecognized_file_format);
}
+/// Registers aliases that should only be allowed by readobj.
+static void registerReadobjAliases() {
+ // -s has meant --sections for a very long time in llvm-readobj despite
+ // meaning --symbols in readelf.
+ static cl::alias SectionsShort("s", cl::desc("Alias for --section-headers"),
+ cl::aliasopt(opts::SectionHeaders),
+ cl::NotHidden);
+
+ // Only register -t in llvm-readobj, as readelf reserves it for
+ // --section-details (not implemented yet).
+ static cl::alias SymbolsShort("t", cl::desc("Alias for --symbols"),
+ cl::aliasopt(opts::Symbols), cl::NotHidden);
+
+ // The following two-letter aliases are only provided for readobj, as readelf
+ // allows single-letter args to be grouped together.
+ static cl::alias SectionRelocationsShort(
+ "sr", cl::desc("Alias for --section-relocations"),
+ cl::aliasopt(opts::SectionRelocations));
+ static cl::alias SectionDataShort("sd", cl::desc("Alias for --section-data"),
+ cl::aliasopt(opts::SectionData));
+ static cl::alias SectionSymbolsShort("st",
+ cl::desc("Alias for --section-symbols"),
+ cl::aliasopt(opts::SectionSymbols));
+ static cl::alias DynamicSymbolsShort("dt",
+ cl::desc("Alias for --dyn-symbols"),
+ cl::aliasopt(opts::DynamicSymbols));
+}
+
+/// Registers aliases that should only be allowed by readelf.
+static void registerReadelfAliases() {
+ // -s is here because for readobj it means --sections.
+ static cl::alias SymbolsShort("s", cl::desc("Alias for --symbols"),
+ cl::aliasopt(opts::Symbols), cl::NotHidden,
+ cl::Grouping);
+
+ // Allow all single letter flags to be grouped together.
+ for (auto &OptEntry : cl::getRegisteredOptions()) {
+ StringRef ArgName = OptEntry.getKey();
+ cl::Option *Option = OptEntry.getValue();
+ if (ArgName.size() == 1)
+ Option->setFormattingFlag(cl::Grouping);
+ }
+}
+
int main(int argc, const char *argv[]) {
InitLLVM X(argc, argv);
// Register the target printer for --version.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
- opts::WideOutput.setHiddenFlag(cl::Hidden);
-
- if (sys::path::stem(argv[0]).find("readelf") != StringRef::npos)
+ if (sys::path::stem(argv[0]).contains("readelf")) {
opts::Output = opts::GNU;
+ registerReadelfAliases();
+ } else {
+ registerReadobjAliases();
+ }
cl::ParseCommandLineOptions(argc, argv, "LLVM Object Reader\n");
+ if (opts::All) {
+ opts::FileHeaders = true;
+ opts::ProgramHeaders = true;
+ opts::SectionHeaders = true;
+ opts::Symbols = true;
+ opts::Relocations = true;
+ opts::DynamicTable = true;
+ opts::Notes = true;
+ opts::VersionInfo = true;
+ opts::UnwindInfo = true;
+ opts::SectionGroups = true;
+ opts::HashHistogram = true;
+ }
+
+ if (opts::Headers) {
+ opts::FileHeaders = true;
+ opts::ProgramHeaders = true;
+ opts::SectionHeaders = true;
+ }
+
// Default to stdin if no filename is specified.
- if (opts::InputFilenames.size() == 0)
+ if (opts::InputFilenames.empty())
opts::InputFilenames.push_back("-");
llvm::for_each(opts::InputFilenames, dumpInput);
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.h b/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
index 374ffd03e13a..92ed098dc642 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
@@ -40,7 +40,7 @@ namespace llvm {
return *EO;
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(EO.takeError(), OS, "");
+ logAllUnhandledErrors(EO.takeError(), OS);
OS.flush();
reportError(Buf);
}
@@ -49,22 +49,13 @@ namespace llvm {
} // namespace llvm
namespace opts {
- extern llvm::cl::list<std::string> InputFilenames;
- extern llvm::cl::opt<bool> FileHeaders;
- extern llvm::cl::opt<bool> Sections;
extern llvm::cl::opt<bool> SectionRelocations;
extern llvm::cl::opt<bool> SectionSymbols;
extern llvm::cl::opt<bool> SectionData;
- extern llvm::cl::opt<bool> Relocations;
- extern llvm::cl::opt<bool> Symbols;
extern llvm::cl::opt<bool> DynamicSymbols;
- extern llvm::cl::opt<bool> UnwindInfo;
extern llvm::cl::opt<bool> ExpandRelocs;
extern llvm::cl::opt<bool> RawRelr;
- extern llvm::cl::opt<bool> CodeView;
extern llvm::cl::opt<bool> CodeViewSubsectionBytes;
- extern llvm::cl::opt<bool> ARMAttributes;
- extern llvm::cl::opt<bool> MipsPLTGOT;
enum OutputStyleTy { LLVM, GNU };
extern llvm::cl::opt<OutputStyleTy> Output;
} // namespace opts
diff --git a/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 54db1ec113fc..975638ed82d1 100644
--- a/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -88,25 +88,30 @@ CheckFiles("check",
cl::desc("File containing RuntimeDyld verifier checks."),
cl::ZeroOrMore);
-static cl::opt<uint64_t>
+// Tracking BUG: 19665
+// http://llvm.org/bugs/show_bug.cgi?id=19665
+//
+// Do not change these options to cl::opt<uint64_t> since this silently breaks
+// argument parsing.
+static cl::opt<unsigned long long>
PreallocMemory("preallocate",
cl::desc("Allocate memory upfront rather than on-demand"),
cl::init(0));
-static cl::opt<uint64_t>
+static cl::opt<unsigned long long>
TargetAddrStart("target-addr-start",
cl::desc("For -verify only: start of phony target address "
"range."),
cl::init(4096), // Start at "page 1" - no allocating at "null".
cl::Hidden);
-static cl::opt<uint64_t>
+static cl::opt<unsigned long long>
TargetAddrEnd("target-addr-end",
cl::desc("For -verify only: end of phony target address range."),
cl::init(~0ULL),
cl::Hidden);
-static cl::opt<uint64_t>
+static cl::opt<unsigned long long>
TargetSectionSep("target-section-sep",
cl::desc("For -verify only: Separation between sections in "
"phony target address space."),
@@ -304,7 +309,7 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
if (!MaybeObj) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(MaybeObj.takeError(), OS, "");
+ logAllUnhandledErrors(MaybeObj.takeError(), OS);
OS.flush();
ErrorAndExit("unable to create object file: '" + Buf + "'");
}
@@ -433,7 +438,7 @@ static int executeInput() {
if (!MaybeObj) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(MaybeObj.takeError(), OS, "");
+ logAllUnhandledErrors(MaybeObj.takeError(), OS);
OS.flush();
ErrorAndExit("unable to create object file: '" + Buf + "'");
}
@@ -577,7 +582,11 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
if (LoadAddr &&
*LoadAddr != static_cast<uint64_t>(
reinterpret_cast<uintptr_t>(Tmp->first))) {
- AlreadyAllocated[*LoadAddr] = Tmp->second;
+ // A section will have a LoadAddr of 0 if it wasn't loaded for whatever
+ // reason (e.g. zero byte COFF sections). Don't include those sections in
+ // the allocation map.
+ if (*LoadAddr != 0)
+ AlreadyAllocated[*LoadAddr] = Tmp->second;
Worklist.erase(Tmp);
}
}
@@ -701,7 +710,7 @@ static int linkAndVerify() {
if (!MaybeObj) {
std::string Buf;
raw_string_ostream OS(Buf);
- logAllUnhandledErrors(MaybeObj.takeError(), OS, "");
+ logAllUnhandledErrors(MaybeObj.takeError(), OS);
OS.flush();
ErrorAndExit("unable to create object file: '" + Buf + "'");
}
diff --git a/contrib/llvm/tools/llvm-stress/llvm-stress.cpp b/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
index d8ec11251ff6..c29b7a7f7e46 100644
--- a/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -356,8 +356,8 @@ struct StoreModifier: public Modifier {
void Act() override {
// Try to use predefined pointers. If non-exist, use undef pointer value;
Value *Ptr = getRandomPointerValue();
- Type *Tp = Ptr->getType();
- Value *Val = getRandomValue(Tp->getContainedType(0));
+ PointerType *Tp = cast<PointerType>(Ptr->getType());
+ Value *Val = getRandomValue(Tp->getElementType());
Type *ValTy = Val->getType();
// Do not store vectors of i1s because they are unsupported
diff --git a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 6d40a5403504..9d19f994b739 100644
--- a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -55,17 +55,29 @@ static cl::opt<bool>
ClPrintInlining("inlining", cl::init(true),
cl::desc("Print all inlined frames for a given address"));
+// -demangle, -C
static cl::opt<bool>
ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names"));
+static cl::alias
+ClDemangleShort("C", cl::desc("Alias for -demangle"),
+ cl::NotHidden, cl::aliasopt(ClDemangle));
static cl::opt<std::string> ClDefaultArch("default-arch", cl::init(""),
cl::desc("Default architecture "
"(for multi-arch objects)"));
+// -obj, -exe, -e
static cl::opt<std::string>
ClBinaryName("obj", cl::init(""),
cl::desc("Path to object file to be symbolized (if not provided, "
"object file should be specified for each input line)"));
+static cl::alias
+ClBinaryNameAliasExe("exe", cl::desc("Alias for -obj"),
+ cl::NotHidden, cl::aliasopt(ClBinaryName));
+static cl::alias
+ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
+ cl::NotHidden, cl::aliasopt(ClBinaryName));
+
static cl::opt<std::string>
ClDwpName("dwp", cl::init(""),
@@ -75,13 +87,25 @@ static cl::list<std::string>
ClDsymHint("dsym-hint", cl::ZeroOrMore,
cl::desc("Path to .dSYM bundles to search for debug info for the "
"object files"));
-static cl::opt<bool>
- ClPrintAddress("print-address", cl::init(false),
- cl::desc("Show address before line information"));
+// -print-address, -addresses, -a
+static cl::opt<bool>
+ClPrintAddress("print-address", cl::init(false),
+ cl::desc("Show address before line information"));
+static cl::alias
+ClPrintAddressAliasAddresses("addresses", cl::desc("Alias for -print-address"),
+ cl::NotHidden, cl::aliasopt(ClPrintAddress));
+static cl::alias
+ClPrintAddressAliasA("a", cl::desc("Alias for -print-address"),
+ cl::NotHidden, cl::aliasopt(ClPrintAddress));
+
+// -pretty-print, -p
static cl::opt<bool>
ClPrettyPrint("pretty-print", cl::init(false),
cl::desc("Make the output more human friendly"));
+static cl::alias ClPrettyPrintShort("p", cl::desc("Alias for -pretty-print"),
+ cl::NotHidden,
+ cl::aliasopt(ClPrettyPrint));
static cl::opt<int> ClPrintSourceContextLines(
"print-source-context-lines", cl::init(0),
@@ -90,6 +114,10 @@ static cl::opt<int> ClPrintSourceContextLines(
static cl::opt<bool> ClVerbose("verbose", cl::init(false),
cl::desc("Print verbose line info"));
+static cl::list<std::string> ClInputAddresses(cl::Positional,
+ cl::desc("<input addresses>..."),
+ cl::ZeroOrMore);
+
template<typename T>
static bool error(Expected<T> &ResOrErr) {
if (ResOrErr)
@@ -137,6 +165,38 @@ static bool parseCommand(StringRef InputString, bool &IsData,
return !StringRef(pos, offset_length).getAsInteger(0, ModuleOffset);
}
+static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
+ DIPrinter &Printer) {
+ bool IsData = false;
+ std::string ModuleName;
+ uint64_t ModuleOffset = 0;
+ if (!parseCommand(StringRef(InputString), IsData, ModuleName, ModuleOffset)) {
+ outs() << InputString;
+ return;
+ }
+
+ if (ClPrintAddress) {
+ outs() << "0x";
+ outs().write_hex(ModuleOffset);
+ StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
+ outs() << Delimiter;
+ }
+ if (IsData) {
+ auto ResOrErr = Symbolizer.symbolizeData(ModuleName, ModuleOffset);
+ Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
+ } else if (ClPrintInlining) {
+ auto ResOrErr =
+ Symbolizer.symbolizeInlinedCode(ModuleName, ModuleOffset, ClDwpName);
+ Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
+ } else {
+ auto ResOrErr =
+ Symbolizer.symbolizeCode(ModuleName, ModuleOffset, ClDwpName);
+ Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
+ }
+ outs() << "\n";
+ outs().flush();
+}
+
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
@@ -159,43 +219,15 @@ int main(int argc, char **argv) {
DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
ClPrettyPrint, ClPrintSourceContextLines, ClVerbose);
- const int kMaxInputStringLength = 1024;
- char InputString[kMaxInputStringLength];
-
- while (true) {
- if (!fgets(InputString, sizeof(InputString), stdin))
- break;
-
- bool IsData = false;
- std::string ModuleName;
- uint64_t ModuleOffset = 0;
- if (!parseCommand(StringRef(InputString), IsData, ModuleName,
- ModuleOffset)) {
- outs() << InputString;
- continue;
- }
+ if (ClInputAddresses.empty()) {
+ const int kMaxInputStringLength = 1024;
+ char InputString[kMaxInputStringLength];
- if (ClPrintAddress) {
- outs() << "0x";
- outs().write_hex(ModuleOffset);
- StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
- outs() << Delimiter;
- }
- if (IsData) {
- auto ResOrErr = Symbolizer.symbolizeData(ModuleName, ModuleOffset);
- Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
- } else if (ClPrintInlining) {
- auto ResOrErr =
- Symbolizer.symbolizeInlinedCode(ModuleName, ModuleOffset, ClDwpName);
- Printer << (error(ResOrErr) ? DIInliningInfo()
- : ResOrErr.get());
- } else {
- auto ResOrErr =
- Symbolizer.symbolizeCode(ModuleName, ModuleOffset, ClDwpName);
- Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
- }
- outs() << "\n";
- outs().flush();
+ while (fgets(InputString, sizeof(InputString), stdin))
+ symbolizeInput(InputString, Symbolizer, Printer);
+ } else {
+ for (StringRef Address : ClInputAddresses)
+ symbolizeInput(Address, Symbolizer, Printer);
}
return 0;
diff --git a/contrib/llvm/tools/llvm-xray/xray-account.cpp b/contrib/llvm/tools/llvm-xray/xray-account.cpp
index 77de1de496f6..9985c9adcf6c 100644
--- a/contrib/llvm/tools/llvm-xray/xray-account.cpp
+++ b/contrib/llvm/tools/llvm-xray/xray-account.cpp
@@ -146,6 +146,10 @@ bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
auto &ThreadStack = PerThreadFunctionStack[Record.TId];
switch (Record.Type) {
+ case RecordTypes::CUSTOM_EVENT:
+ case RecordTypes::TYPED_EVENT:
+ // TODO: Support custom and typed event accounting in the future.
+ return true;
case RecordTypes::ENTER:
case RecordTypes::ENTER_ARG: {
ThreadStack.emplace_back(Record.FuncId, Record.TSC);
@@ -255,9 +259,18 @@ ResultRow getStats(std::vector<uint64_t> &Timings) {
} // namespace
+using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;
+
+template <typename F>
+static void sortByKey(std::vector<TupleType> &Results, F Fn) {
+ bool ASC = AccountSortOrder == SortDirection::ASCENDING;
+ llvm::sort(Results, [=](const TupleType &L, const TupleType &R) {
+ return ASC ? Fn(L) < Fn(R) : Fn(L) > Fn(R);
+ });
+}
+
template <class F>
void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {
- using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;
std::vector<TupleType> Results;
Results.reserve(FunctionLatencies.size());
for (auto FT : FunctionLatencies) {
@@ -282,80 +295,31 @@ void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {
// Sort the data according to user-provided flags.
switch (AccountSortOutput) {
case SortField::FUNCID:
- llvm::sort(Results.begin(), Results.end(),
- [](const TupleType &L, const TupleType &R) {
- if (AccountSortOrder == SortDirection::ASCENDING)
- return std::get<0>(L) < std::get<0>(R);
- if (AccountSortOrder == SortDirection::DESCENDING)
- return std::get<0>(L) > std::get<0>(R);
- llvm_unreachable("Unknown sort direction");
- });
+ sortByKey(Results, [](const TupleType &X) { return std::get<0>(X); });
break;
case SortField::COUNT:
- llvm::sort(Results.begin(), Results.end(),
- [](const TupleType &L, const TupleType &R) {
- if (AccountSortOrder == SortDirection::ASCENDING)
- return std::get<1>(L) < std::get<1>(R);
- if (AccountSortOrder == SortDirection::DESCENDING)
- return std::get<1>(L) > std::get<1>(R);
- llvm_unreachable("Unknown sort direction");
- });
+ sortByKey(Results, [](const TupleType &X) { return std::get<1>(X); });
+ break;
+ case SortField::MIN:
+ sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Min; });
+ break;
+ case SortField::MED:
+ sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Median; });
+ break;
+ case SortField::PCT90:
+ sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct90; });
+ break;
+ case SortField::PCT99:
+ sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct99; });
break;
- default:
- // Here we need to look into the ResultRow for the rest of the data that
- // we want to sort by.
- llvm::sort(Results.begin(), Results.end(),
- [&](const TupleType &L, const TupleType &R) {
- auto &LR = std::get<2>(L);
- auto &RR = std::get<2>(R);
- switch (AccountSortOutput) {
- case SortField::COUNT:
- if (AccountSortOrder == SortDirection::ASCENDING)
- return LR.Count < RR.Count;
- if (AccountSortOrder == SortDirection::DESCENDING)
- return LR.Count > RR.Count;
- llvm_unreachable("Unknown sort direction");
- case SortField::MIN:
- if (AccountSortOrder == SortDirection::ASCENDING)
- return LR.Min < RR.Min;
- if (AccountSortOrder == SortDirection::DESCENDING)
- return LR.Min > RR.Min;
- llvm_unreachable("Unknown sort direction");
- case SortField::MED:
- if (AccountSortOrder == SortDirection::ASCENDING)
- return LR.Median < RR.Median;
- if (AccountSortOrder == SortDirection::DESCENDING)
- return LR.Median > RR.Median;
- llvm_unreachable("Unknown sort direction");
- case SortField::PCT90:
- if (AccountSortOrder == SortDirection::ASCENDING)
- return LR.Pct90 < RR.Pct90;
- if (AccountSortOrder == SortDirection::DESCENDING)
- return LR.Pct90 > RR.Pct90;
- llvm_unreachable("Unknown sort direction");
- case SortField::PCT99:
- if (AccountSortOrder == SortDirection::ASCENDING)
- return LR.Pct99 < RR.Pct99;
- if (AccountSortOrder == SortDirection::DESCENDING)
- return LR.Pct99 > RR.Pct99;
- llvm_unreachable("Unknown sort direction");
- case SortField::MAX:
- if (AccountSortOrder == SortDirection::ASCENDING)
- return LR.Max < RR.Max;
- if (AccountSortOrder == SortDirection::DESCENDING)
- return LR.Max > RR.Max;
- llvm_unreachable("Unknown sort direction");
- case SortField::SUM:
- if (AccountSortOrder == SortDirection::ASCENDING)
- return LR.Sum < RR.Sum;
- if (AccountSortOrder == SortDirection::DESCENDING)
- return LR.Sum > RR.Sum;
- llvm_unreachable("Unknown sort direction");
- default:
- llvm_unreachable("Unsupported sort order");
- }
- });
+ case SortField::MAX:
+ sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Max; });
break;
+ case SortField::SUM:
+ sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Sum; });
+ break;
+ case SortField::FUNC:
+ llvm_unreachable("Not implemented");
}
if (AccountTop > 0) {
@@ -420,19 +384,25 @@ namespace llvm {
template <> struct format_provider<llvm::xray::RecordTypes> {
static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream,
StringRef Style) {
- switch(T) {
- case RecordTypes::ENTER:
- Stream << "enter";
- break;
- case RecordTypes::ENTER_ARG:
- Stream << "enter-arg";
- break;
- case RecordTypes::EXIT:
- Stream << "exit";
- break;
- case RecordTypes::TAIL_EXIT:
- Stream << "tail-exit";
- break;
+ switch (T) {
+ case RecordTypes::ENTER:
+ Stream << "enter";
+ break;
+ case RecordTypes::ENTER_ARG:
+ Stream << "enter-arg";
+ break;
+ case RecordTypes::EXIT:
+ Stream << "exit";
+ break;
+ case RecordTypes::TAIL_EXIT:
+ Stream << "tail-exit";
+ break;
+ case RecordTypes::CUSTOM_EVENT:
+ Stream << "custom-event";
+ break;
+ case RecordTypes::TYPED_EVENT:
+ Stream << "typed-event";
+ break;
}
}
};
diff --git a/contrib/llvm/tools/llvm-xray/xray-converter.cpp b/contrib/llvm/tools/llvm-xray/xray-converter.cpp
index 90e14d0d8896..3f153b99bc93 100644
--- a/contrib/llvm/tools/llvm-xray/xray-converter.cpp
+++ b/contrib/llvm/tools/llvm-xray/xray-converter.cpp
@@ -18,6 +18,7 @@
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/JSON.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
@@ -91,9 +92,10 @@ void TraceConverter::exportAsYAML(const Trace &Records, raw_ostream &OS) {
Trace.Records.push_back({R.RecordType, R.CPU, R.Type, R.FuncId,
Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
: llvm::to_string(R.FuncId),
- R.TSC, R.TId, R.PId, R.CallArgs});
+ R.TSC, R.TId, R.PId, R.CallArgs, R.Data});
}
Output Out(OS, nullptr, 0);
+ Out.setWriteDefaultValues(false);
Out << Trace;
}
@@ -122,21 +124,27 @@ void TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) {
// Then write out the rest of the records, still in an endian-appropriate
// format.
for (const auto &R : Records) {
- Writer.write(R.RecordType);
- // The on disk naive raw format uses 8 bit CPUs, but the record has 16.
- // There's no choice but truncation.
- Writer.write(static_cast<uint8_t>(R.CPU));
switch (R.Type) {
case RecordTypes::ENTER:
case RecordTypes::ENTER_ARG:
+ Writer.write(R.RecordType);
+ Writer.write(static_cast<uint8_t>(R.CPU));
Writer.write(uint8_t{0});
break;
case RecordTypes::EXIT:
+ Writer.write(R.RecordType);
+ Writer.write(static_cast<uint8_t>(R.CPU));
Writer.write(uint8_t{1});
break;
case RecordTypes::TAIL_EXIT:
+ Writer.write(R.RecordType);
+ Writer.write(static_cast<uint8_t>(R.CPU));
Writer.write(uint8_t{2});
break;
+ case RecordTypes::CUSTOM_EVENT:
+ case RecordTypes::TYPED_EVENT:
+ // Skip custom and typed event records for v1 logs.
+ continue;
}
Writer.write(R.FuncId);
Writer.write(R.TSC);
@@ -234,31 +242,6 @@ StackTrieNode *findOrCreateStackNode(
return CurrentStack;
}
-void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId,
- uint32_t TId, uint32_t PId, bool Symbolize,
- const FuncIdConversionHelper &FuncIdHelper,
- double EventTimestampUs,
- const StackTrieNode &StackCursor,
- StringRef FunctionPhenotype) {
- OS << " ";
- if (Version >= 3) {
- OS << llvm::formatv(
- R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )"
- R"("ts" : "{4:f4}", "sf" : "{5}" })",
- (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
- : llvm::to_string(FuncId)),
- FunctionPhenotype, TId, PId, EventTimestampUs,
- StackCursor.ExtraData.id);
- } else {
- OS << llvm::formatv(
- R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
- R"("ts" : "{3:f3}", "sf" : "{4}" })",
- (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
- : llvm::to_string(FuncId)),
- FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
- }
-}
-
} // namespace
void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
@@ -269,18 +252,14 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
unsigned id_counter = 0;
- OS << "{\n \"traceEvents\": [";
DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
std::forward_list<StackTrieNode> NodeStore{};
- int loop_count = 0;
- for (const auto &R : Records) {
- if (loop_count++ == 0)
- OS << "\n";
- else
- OS << ",\n";
+ // Create a JSON Array which will hold all trace events.
+ json::Array TraceEvents;
+ for (const auto &R : Records) {
// Chrome trace event format always wants data in micros.
// CyclesPerMicro = CycleHertz / 10^6
// TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
@@ -292,6 +271,10 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC);
StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId];
switch (R.Type) {
+ case RecordTypes::CUSTOM_EVENT:
+ case RecordTypes::TYPED_EVENT:
+ // TODO: Support typed and custom event rendering on Chrome Trace Viewer.
+ break;
case RecordTypes::ENTER:
case RecordTypes::ENTER_ARG:
StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId,
@@ -301,8 +284,15 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
// type of B for begin or E for end, thread id, process id,
// timestamp in microseconds, and a stack frame id. The ids are logged
// in an id dictionary after the events.
- writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize,
- FuncIdHelper, EventTimestampUs, *StackCursor, "B");
+ TraceEvents.push_back(json::Object({
+ {"name", Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
+ : llvm::to_string(R.FuncId)},
+ {"ph", "B"},
+ {"tid", llvm::to_string(R.TId)},
+ {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
+ {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
+ {"sf", llvm::to_string(StackCursor->ExtraData.id)},
+ }));
break;
case RecordTypes::EXIT:
case RecordTypes::TAIL_EXIT:
@@ -313,43 +303,51 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
// (And/Or in loop termination below)
StackTrieNode *PreviousCursor = nullptr;
do {
- if (PreviousCursor != nullptr) {
- OS << ",\n";
- }
- writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId,
- Symbolize, FuncIdHelper, EventTimestampUs,
- *StackCursor, "E");
+ TraceEvents.push_back(json::Object({
+ {"name", Symbolize
+ ? FuncIdHelper.SymbolOrNumber(StackCursor->FuncId)
+ : llvm::to_string(StackCursor->FuncId)},
+ {"ph", "E"},
+ {"tid", llvm::to_string(R.TId)},
+ {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
+ {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
+ {"sf", llvm::to_string(StackCursor->ExtraData.id)},
+ }));
PreviousCursor = StackCursor;
StackCursor = StackCursor->Parent;
} while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
break;
}
}
- OS << "\n ],\n"; // Close the Trace Events array.
- OS << " "
- << "\"displayTimeUnit\": \"ns\",\n";
// The stackFrames dictionary substantially reduces size of the output file by
// avoiding repeating the entire call stack of function names for each entry.
- OS << R"( "stackFrames": {)";
- int stack_frame_count = 0;
- for (auto map_iter : StacksByStackId) {
- if (stack_frame_count++ == 0)
- OS << "\n";
- else
- OS << ",\n";
- OS << " ";
- OS << llvm::formatv(
- R"("{0}" : { "name" : "{1}")", map_iter.first,
- (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
- : llvm::to_string(map_iter.second->FuncId)));
- if (map_iter.second->Parent != nullptr)
- OS << llvm::formatv(R"(, "parent": "{0}")",
- map_iter.second->Parent->ExtraData.id);
- OS << " }";
+ json::Object StackFrames;
+ for (const auto &Stack : StacksByStackId) {
+ const auto &StackId = Stack.first;
+ const auto &StackFunctionNode = Stack.second;
+ json::Object::iterator It;
+ std::tie(It, std::ignore) = StackFrames.insert({
+ llvm::to_string(StackId),
+ json::Object{
+ {"name",
+ Symbolize ? FuncIdHelper.SymbolOrNumber(StackFunctionNode->FuncId)
+ : llvm::to_string(StackFunctionNode->FuncId)}},
+ });
+
+ if (StackFunctionNode->Parent != nullptr)
+ It->second.getAsObject()->insert(
+ {"parent", llvm::to_string(StackFunctionNode->Parent->ExtraData.id)});
}
- OS << "\n }\n"; // Close the stack frames map.
- OS << "}\n"; // Close the JSON entry.
+
+ json::Object TraceJSON{
+ {"displayTimeUnit", "ns"},
+ {"traceEvents", std::move(TraceEvents)},
+ {"stackFrames", std::move(StackFrames)},
+ };
+
+ // Pretty-print the JSON using two spaces for indentations.
+ OS << formatv("{0:2}", json::Value(std::move(TraceJSON)));
}
namespace llvm {
diff --git a/contrib/llvm/tools/llvm-xray/xray-fdr-dump.cpp b/contrib/llvm/tools/llvm-xray/xray-fdr-dump.cpp
new file mode 100644
index 000000000000..389825605b62
--- /dev/null
+++ b/contrib/llvm/tools/llvm-xray/xray-fdr-dump.cpp
@@ -0,0 +1,119 @@
+//===- xray-fdr-dump.cpp: XRay FDR Trace Dump Tool ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the FDR trace dumping tool, using the libraries for handling FDR
+// mode traces specifically.
+//
+//===----------------------------------------------------------------------===//
+#include "xray-registry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/XRay/BlockIndexer.h"
+#include "llvm/XRay/BlockPrinter.h"
+#include "llvm/XRay/BlockVerifier.h"
+#include "llvm/XRay/FDRRecordConsumer.h"
+#include "llvm/XRay/FDRRecordProducer.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/FileHeaderReader.h"
+#include "llvm/XRay/RecordPrinter.h"
+
+using namespace llvm;
+using namespace xray;
+
+static cl::SubCommand Dump("fdr-dump", "FDR Trace Dump");
+static cl::opt<std::string> DumpInput(cl::Positional,
+ cl::desc("<xray fdr mode log>"),
+ cl::Required, cl::sub(Dump));
+static cl::opt<bool> DumpVerify("verify",
+ cl::desc("verify structure of the log"),
+ cl::init(false), cl::sub(Dump));
+
+static CommandRegistration Unused(&Dump, []() -> Error {
+ // Open the file provided.
+ int Fd;
+ if (auto EC = sys::fs::openFileForRead(DumpInput, Fd))
+ return createStringError(EC, "Cannot open file '%s' for read.",
+ DumpInput.c_str());
+
+ uint64_t FileSize;
+ if (auto EC = sys::fs::file_size(DumpInput, FileSize))
+ return createStringError(EC, "Failed to get file size for '%s'.",
+ DumpInput.c_str());
+
+ std::error_code EC;
+ sys::fs::mapped_file_region MappedFile(
+ Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+
+ DataExtractor DE(StringRef(MappedFile.data(), MappedFile.size()), true, 8);
+ uint32_t OffsetPtr = 0;
+
+ auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr);
+ if (!FileHeaderOrError)
+ return FileHeaderOrError.takeError();
+ auto &H = FileHeaderOrError.get();
+
+ FileBasedRecordProducer P(H, DE, OffsetPtr);
+
+ RecordPrinter RP(outs(), "\n");
+ if (!DumpVerify) {
+ PipelineConsumer C({&RP});
+ while (DE.isValidOffsetForDataOfSize(OffsetPtr, 1)) {
+ auto R = P.produce();
+ if (!R)
+ return R.takeError();
+ if (auto E = C.consume(std::move(R.get())))
+ return E;
+ }
+ return Error::success();
+ }
+
+ BlockPrinter BP(outs(), RP);
+ std::vector<std::unique_ptr<Record>> Records;
+ LogBuilderConsumer C(Records);
+ while (DE.isValidOffsetForDataOfSize(OffsetPtr, 1)) {
+ auto R = P.produce();
+ if (!R) {
+ // Print records we've found so far.
+ for (auto &Ptr : Records)
+ if (auto E = Ptr->apply(RP))
+ return joinErrors(std::move(E), R.takeError());
+ return R.takeError();
+ }
+ if (auto E = C.consume(std::move(R.get())))
+ return E;
+ }
+
+ // Once we have a trace, we then index the blocks.
+ BlockIndexer::Index Index;
+ BlockIndexer BI(Index);
+ for (auto &Ptr : Records)
+ if (auto E = Ptr->apply(BI))
+ return E;
+
+ if (auto E = BI.flush())
+ return E;
+
+ // Then we validate while printing each block.
+ BlockVerifier BV;
+ for (auto ProcessThreadBlocks : Index) {
+ auto &Blocks = ProcessThreadBlocks.second;
+ for (auto &B : Blocks) {
+ for (auto *R : B.Records) {
+ if (auto E = R->apply(BV))
+ return E;
+ if (auto E = R->apply(BP))
+ return E;
+ }
+ BV.reset();
+ BP.reset();
+ }
+ }
+ outs().flush();
+ return Error::success();
+});
diff --git a/contrib/llvm/tools/llvm-xray/xray-graph.cpp b/contrib/llvm/tools/llvm-xray/xray-graph.cpp
index c619bf86299b..fe49cca20d57 100644
--- a/contrib/llvm/tools/llvm-xray/xray-graph.cpp
+++ b/contrib/llvm/tools/llvm-xray/xray-graph.cpp
@@ -246,6 +246,10 @@ Error GraphRenderer::accountRecord(const XRayRecord &Record) {
updateStat(G[Record.FuncId].S, D);
break;
}
+ case RecordTypes::CUSTOM_EVENT:
+ case RecordTypes::TYPED_EVENT:
+ // TODO: Support custom and typed events in the graph processing?
+ break;
}
return Error::success();
diff --git a/contrib/llvm/tools/llvm-xray/xray-stacks.cpp b/contrib/llvm/tools/llvm-xray/xray-stacks.cpp
index 1a6069780a31..d3af9e25e6f2 100644
--- a/contrib/llvm/tools/llvm-xray/xray-stacks.cpp
+++ b/contrib/llvm/tools/llvm-xray/xray-stacks.cpp
@@ -366,6 +366,9 @@ public:
AccountRecordState *state) {
auto &TS = ThreadStackMap[R.TId];
switch (R.Type) {
+ case RecordTypes::CUSTOM_EVENT:
+ case RecordTypes::TYPED_EVENT:
+ return AccountRecordStatus::OK;
case RecordTypes::ENTER:
case RecordTypes::ENTER_ARG: {
state->wasLastRecordExit = false;
@@ -734,7 +737,7 @@ static CommandRegistration Unused(&Stack, []() -> Error {
Twine("Failed loading input file '") + Filename + "'",
std::make_error_code(std::errc::invalid_argument)),
TraceOrErr.takeError());
- logAllUnhandledErrors(TraceOrErr.takeError(), errs(), "");
+ logAllUnhandledErrors(TraceOrErr.takeError(), errs());
continue;
}
auto &T = *TraceOrErr;
diff --git a/contrib/llvm/tools/opt/Debugify.cpp b/contrib/llvm/tools/opt/Debugify.cpp
index 6c3cdc75e334..3b1effba1592 100644
--- a/contrib/llvm/tools/opt/Debugify.cpp
+++ b/contrib/llvm/tools/opt/Debugify.cpp
@@ -96,11 +96,12 @@ bool applyDebugifyMetadata(Module &M,
continue;
auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
- bool IsLocalToUnit = F.hasPrivateLinkage() || F.hasInternalLinkage();
- auto SP =
- DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine, SPType,
- IsLocalToUnit, /*isDefinition=*/true, NextLine,
- DINode::FlagZero, /*isOptimized=*/true);
+ DISubprogram::DISPFlags SPFlags =
+ DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized;
+ if (F.hasPrivateLinkage() || F.hasInternalLinkage())
+ SPFlags |= DISubprogram::SPFlagLocalToUnit;
+ auto SP = DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine,
+ SPType, NextLine, DINode::FlagZero, SPFlags);
F.setSubprogram(SP);
for (BasicBlock &BB : F) {
// Attach debug locations.
diff --git a/contrib/llvm/tools/opt/NewPMDriver.cpp b/contrib/llvm/tools/opt/NewPMDriver.cpp
index a91d4cb5f9cd..211a3b151fe1 100644
--- a/contrib/llvm/tools/opt/NewPMDriver.cpp
+++ b/contrib/llvm/tools/opt/NewPMDriver.cpp
@@ -13,8 +13,8 @@
///
//===----------------------------------------------------------------------===//
-#include "Debugify.h"
#include "NewPMDriver.h"
+#include "Debugify.h"
#include "PassPrinters.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -29,6 +29,7 @@
#include "llvm/IR/Verifier.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
+#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ToolOutputFile.h"
@@ -94,6 +95,12 @@ static cl::opt<std::string> PipelineStartEPPipeline(
cl::desc("A textual description of the function pass pipeline inserted at "
"the PipelineStart extension point into default pipelines"),
cl::Hidden);
+static cl::opt<std::string> OptimizerLastEPPipeline(
+ "passes-ep-optimizer-last",
+ cl::desc("A textual description of the function pass pipeline inserted at "
+ "the OptimizerLast extension point into default pipelines"),
+ cl::Hidden);
+
enum PGOKind { NoPGO, InstrGen, InstrUse, SampleUse };
static cl::opt<PGOKind> PGOKindFlag(
"pgo-kind", cl::init(NoPGO), cl::Hidden,
@@ -107,24 +114,30 @@ static cl::opt<PGOKind> PGOKindFlag(
"Use sampled profile to guide PGO.")));
static cl::opt<std::string> ProfileFile(
"profile-file", cl::desc("Path to the profile."), cl::Hidden);
+static cl::opt<std::string>
+ ProfileRemappingFile("profile-remapping-file",
+ cl::desc("Path to the profile remapping file."),
+ cl::Hidden);
static cl::opt<bool> DebugInfoForProfiling(
"new-pm-debug-info-for-profiling", cl::init(false), cl::Hidden,
cl::desc("Emit special debug info to enable PGO profile generation."));
/// @}}
template <typename PassManagerT>
-bool tryParsePipelineText(PassBuilder &PB, StringRef PipelineText) {
- if (PipelineText.empty())
+bool tryParsePipelineText(PassBuilder &PB,
+ const cl::opt<std::string> &PipelineOpt) {
+ if (PipelineOpt.empty())
return false;
// Verify the pipeline is parseable:
PassManagerT PM;
- if (PB.parsePassPipeline(PM, PipelineText))
- return true;
-
- errs() << "Could not parse pipeline '" << PipelineText
- << "'. I'm going to igore it.\n";
- return false;
+ if (auto Err = PB.parsePassPipeline(PM, PipelineOpt)) {
+ errs() << "Could not parse -" << PipelineOpt.ArgStr
+ << " pipeline: " << toString(std::move(Err))
+ << "... I'm going to ignore it.\n";
+ return false;
+ }
+ return true;
}
/// If one of the EPPipeline command line options was given, register callbacks
@@ -132,50 +145,69 @@ bool tryParsePipelineText(PassBuilder &PB, StringRef PipelineText) {
static void registerEPCallbacks(PassBuilder &PB, bool VerifyEachPass,
bool DebugLogging) {
if (tryParsePipelineText<FunctionPassManager>(PB, PeepholeEPPipeline))
- PB.registerPeepholeEPCallback([&PB, VerifyEachPass, DebugLogging](
- FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
- PB.parsePassPipeline(PM, PeepholeEPPipeline, VerifyEachPass,
- DebugLogging);
- });
+ PB.registerPeepholeEPCallback(
+ [&PB, VerifyEachPass, DebugLogging](
+ FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ ExitOnError Err("Unable to parse PeepholeEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, PeepholeEPPipeline, VerifyEachPass,
+ DebugLogging));
+ });
if (tryParsePipelineText<LoopPassManager>(PB,
LateLoopOptimizationsEPPipeline))
PB.registerLateLoopOptimizationsEPCallback(
[&PB, VerifyEachPass, DebugLogging](
LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
- PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline,
- VerifyEachPass, DebugLogging);
+ ExitOnError Err("Unable to parse LateLoopOptimizationsEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline,
+ VerifyEachPass, DebugLogging));
});
if (tryParsePipelineText<LoopPassManager>(PB, LoopOptimizerEndEPPipeline))
- PB.registerLoopOptimizerEndEPCallback([&PB, VerifyEachPass, DebugLogging](
- LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
- PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline, VerifyEachPass,
- DebugLogging);
- });
+ PB.registerLoopOptimizerEndEPCallback(
+ [&PB, VerifyEachPass, DebugLogging](
+ LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ ExitOnError Err("Unable to parse LoopOptimizerEndEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline,
+ VerifyEachPass, DebugLogging));
+ });
if (tryParsePipelineText<FunctionPassManager>(PB,
ScalarOptimizerLateEPPipeline))
PB.registerScalarOptimizerLateEPCallback(
[&PB, VerifyEachPass, DebugLogging](
FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
- PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline,
- VerifyEachPass, DebugLogging);
+ ExitOnError Err("Unable to parse ScalarOptimizerLateEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline,
+ VerifyEachPass, DebugLogging));
});
if (tryParsePipelineText<CGSCCPassManager>(PB, CGSCCOptimizerLateEPPipeline))
- PB.registerCGSCCOptimizerLateEPCallback([&PB, VerifyEachPass, DebugLogging](
- CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
- PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline, VerifyEachPass,
- DebugLogging);
- });
+ PB.registerCGSCCOptimizerLateEPCallback(
+ [&PB, VerifyEachPass, DebugLogging](
+ CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ ExitOnError Err("Unable to parse CGSCCOptimizerLateEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline,
+ VerifyEachPass, DebugLogging));
+ });
if (tryParsePipelineText<FunctionPassManager>(PB, VectorizerStartEPPipeline))
- PB.registerVectorizerStartEPCallback([&PB, VerifyEachPass, DebugLogging](
- FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
- PB.parsePassPipeline(PM, VectorizerStartEPPipeline, VerifyEachPass,
- DebugLogging);
- });
+ PB.registerVectorizerStartEPCallback(
+ [&PB, VerifyEachPass, DebugLogging](
+ FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ ExitOnError Err("Unable to parse VectorizerStartEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, VectorizerStartEPPipeline,
+ VerifyEachPass, DebugLogging));
+ });
if (tryParsePipelineText<ModulePassManager>(PB, PipelineStartEPPipeline))
PB.registerPipelineStartEPCallback(
[&PB, VerifyEachPass, DebugLogging](ModulePassManager &PM) {
- PB.parsePassPipeline(PM, PipelineStartEPPipeline, VerifyEachPass,
- DebugLogging);
+ ExitOnError Err("Unable to parse PipelineStartEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline, VerifyEachPass,
+ DebugLogging));
+ });
+ if (tryParsePipelineText<FunctionPassManager>(PB, OptimizerLastEPPipeline))
+ PB.registerOptimizerLastEPCallback(
+ [&PB, VerifyEachPass, DebugLogging](FunctionPassManager &PM,
+ PassBuilder::OptimizationLevel) {
+ ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline, VerifyEachPass,
+ DebugLogging));
});
}
@@ -199,21 +231,25 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
Optional<PGOOptions> P;
switch (PGOKindFlag) {
case InstrGen:
- P = PGOOptions(ProfileFile, "", "", true);
+ P = PGOOptions(ProfileFile, "", "", "", true);
break;
case InstrUse:
- P = PGOOptions("", ProfileFile, "", false);
+ P = PGOOptions("", ProfileFile, "", ProfileRemappingFile, false);
break;
case SampleUse:
- P = PGOOptions("", "", ProfileFile, false);
+ P = PGOOptions("", "", ProfileFile, ProfileRemappingFile, false);
break;
case NoPGO:
if (DebugInfoForProfiling)
- P = PGOOptions("", "", "", false, true);
+ P = PGOOptions("", "", "", "", false, true);
else
P = None;
}
- PassBuilder PB(TM, P);
+ PassInstrumentationCallbacks PIC;
+ StandardInstrumentations SI;
+ SI.registerCallbacks(PIC);
+
+ PassBuilder PB(TM, P, &PIC);
registerEPCallbacks(PB, VerifyEachPass, DebugPM);
// Load requested pass plugins and let them register pass builder callbacks
@@ -249,8 +285,8 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
// Specially handle the alias analysis manager so that we can register
// a custom pipeline of AA passes with it.
AAManager AA;
- if (!PB.parseAAPipeline(AA, AAPipeline)) {
- errs() << Arg0 << ": unable to parse AA pipeline description.\n";
+ if (auto Err = PB.parseAAPipeline(AA, AAPipeline)) {
+ errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
return false;
}
@@ -275,8 +311,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
if (EnableDebugify)
MPM.addPass(NewPMDebugifyPass());
- if (!PB.parsePassPipeline(MPM, PassPipeline, VerifyEachPass, DebugPM)) {
- errs() << Arg0 << ": unable to parse pass pipeline description.\n";
+ if (auto Err =
+ PB.parsePassPipeline(MPM, PassPipeline, VerifyEachPass, DebugPM)) {
+ errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
return false;
}
diff --git a/contrib/llvm/tools/opt/opt.cpp b/contrib/llvm/tools/opt/opt.cpp
index 6e287b6c0ab6..a4967a234d9c 100644
--- a/contrib/llvm/tools/opt/opt.cpp
+++ b/contrib/llvm/tools/opt/opt.cpp
@@ -103,6 +103,10 @@ static cl::opt<bool>
OutputThinLTOBC("thinlto-bc",
cl::desc("Write output as ThinLTO-ready bitcode"));
+static cl::opt<bool>
+ SplitLTOUnit("thinlto-split-lto-unit",
+ cl::desc("Enable splitting of a ThinLTO LTOUnit"));
+
static cl::opt<std::string> ThinLinkBitcodeFile(
"thin-link-bitcode-file", cl::value_desc("filename"),
cl::desc(
@@ -463,6 +467,7 @@ int main(int argc, char **argv) {
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeGlobalMergePass(Registry);
initializeIndirectBrExpandPassPass(Registry);
+ initializeInterleavedLoadCombinePass(Registry);
initializeInterleavedAccessPass(Registry);
initializeEntryExitInstrumenterPass(Registry);
initializePostInlineEntryExitInstrumenterPass(Registry);
@@ -595,6 +600,9 @@ int main(int argc, char **argv) {
if (CheckBitcodeOutputToConsole(Out->os(), !Quiet))
NoOutput = true;
+ if (OutputThinLTOBC)
+ M->addModuleFlag(Module::Error, "EnableSplitLTOUnit", SplitLTOUnit);
+
if (PassPipeline.getNumOccurrences() > 0) {
OutputKind OK = OK_NoOutput;
if (!NoOutput)
diff --git a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index e808661b7a51..5b4229e64682 100644
--- a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -2415,10 +2415,9 @@ static void emitOperandMatchErrorDiagStrings(AsmMatcherInfo &Info, raw_ostream &
static void emitRegisterMatchErrorFunc(AsmMatcherInfo &Info, raw_ostream &OS) {
OS << "static unsigned getDiagKindFromRegisterClass(MatchClassKind "
"RegisterClass) {\n";
- if (std::none_of(Info.Classes.begin(), Info.Classes.end(),
- [](const ClassInfo &CI) {
- return CI.isRegisterClass() && !CI.DiagnosticType.empty();
- })) {
+ if (none_of(Info.Classes, [](const ClassInfo &CI) {
+ return CI.isRegisterClass() && !CI.DiagnosticType.empty();
+ })) {
OS << " return MCTargetAsmParser::Match_InvalidOperand;\n";
} else {
OS << " switch (RegisterClass) {\n";
diff --git a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 3c4c9c8e5c6e..a8f191181766 100644
--- a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -835,15 +835,20 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
for (unsigned i = 0, e = LastOpNo; i != e; ++i) {
// Skip over tied operands as they're not part of an alias declaration.
auto &Operands = CGA.ResultInst->Operands;
- unsigned OpNum = Operands.getSubOperandNumber(MIOpNum).first;
- if (Operands[OpNum].MINumOperands == 1 &&
- Operands[OpNum].getTiedRegister() != -1) {
- // Tied operands of different RegisterClass should be explicit within
- // an instruction's syntax and so cannot be skipped.
- int TiedOpNum = Operands[OpNum].getTiedRegister();
- if (Operands[OpNum].Rec->getName() ==
- Operands[TiedOpNum].Rec->getName())
- ++MIOpNum;
+ while (true) {
+ unsigned OpNum = Operands.getSubOperandNumber(MIOpNum).first;
+ if (Operands[OpNum].MINumOperands == 1 &&
+ Operands[OpNum].getTiedRegister() != -1) {
+ // Tied operands of different RegisterClass should be explicit within
+ // an instruction's syntax and so cannot be skipped.
+ int TiedOpNum = Operands[OpNum].getTiedRegister();
+ if (Operands[OpNum].Rec->getName() ==
+ Operands[TiedOpNum].Rec->getName()) {
+ ++MIOpNum;
+ continue;
+ }
+ }
+ break;
}
std::string Op = "MI->getOperand(" + utostr(MIOpNum) + ")";
diff --git a/contrib/llvm/utils/TableGen/CTagsEmitter.cpp b/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
index a0f83f1c9910..bd596bcb47a8 100644
--- a/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
@@ -73,7 +73,7 @@ void CTagsEmitter::run(raw_ostream &OS) {
for (const auto &D : Defs)
Tags.push_back(Tag(D.first, locate(D.second.get())));
// Emit tags.
- llvm::sort(Tags.begin(), Tags.end());
+ llvm::sort(Tags);
OS << "!_TAG_FILE_FORMAT\t1\t/original ctags format/\n";
OS << "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/\n";
for (const Tag &T : Tags)
diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index cc2b9d788980..96c90c9cf6bd 100644
--- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -13,7 +13,9 @@
//===----------------------------------------------------------------------===//
#include "CodeGenDAGPatterns.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
@@ -26,6 +28,7 @@
#include "llvm/TableGen/Record.h"
#include <algorithm>
#include <cstdio>
+#include <iterator>
#include <set>
using namespace llvm;
@@ -99,22 +102,29 @@ bool TypeSetByHwMode::isPossible() const {
bool TypeSetByHwMode::insert(const ValueTypeByHwMode &VVT) {
bool Changed = false;
+ bool ContainsDefault = false;
+ MVT DT = MVT::Other;
+
SmallDenseSet<unsigned, 4> Modes;
for (const auto &P : VVT) {
unsigned M = P.first;
Modes.insert(M);
// Make sure there exists a set for each specific mode from VVT.
Changed |= getOrCreate(M).insert(P.second).second;
+ // Cache VVT's default mode.
+ if (DefaultMode == M) {
+ ContainsDefault = true;
+ DT = P.second;
+ }
}
// If VVT has a default mode, add the corresponding type to all
// modes in "this" that do not exist in VVT.
- if (Modes.count(DefaultMode)) {
- MVT DT = VVT.getType(DefaultMode);
+ if (ContainsDefault)
for (auto &I : *this)
if (!Modes.count(I.first))
Changed |= I.second.insert(DT).second;
- }
+
return Changed;
}
@@ -198,16 +208,18 @@ void TypeSetByHwMode::writeToStream(const SetType &S, raw_ostream &OS) {
}
bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const {
- bool HaveDefault = hasDefault();
- if (HaveDefault != VTS.hasDefault())
+ // The isSimple call is much quicker than hasDefault - check this first.
+ bool IsSimple = isSimple();
+ bool VTSIsSimple = VTS.isSimple();
+ if (IsSimple && VTSIsSimple)
+ return *begin() == *VTS.begin();
+
+ // Speedup: We have a default if the set is simple.
+ bool HaveDefault = IsSimple || hasDefault();
+ bool VTSHaveDefault = VTSIsSimple || VTS.hasDefault();
+ if (HaveDefault != VTSHaveDefault)
return false;
- if (isSimple()) {
- if (VTS.isSimple())
- return *begin() == *VTS.begin();
- return false;
- }
-
SmallDenseSet<unsigned, 4> Modes;
for (auto &I : *this)
Modes.insert(I.first);
@@ -731,17 +743,12 @@ bool TypeInfer::EnforceSameSize(TypeSetByHwMode &A, TypeSetByHwMode &B) {
void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) {
ValidateOnExit _1(VTS, *this);
- TypeSetByHwMode Legal = getLegalTypes();
- bool HaveLegalDef = Legal.hasDefault();
+ const TypeSetByHwMode &Legal = getLegalTypes();
+ assert(Legal.isDefaultOnly() && "Default-mode only expected");
+ const TypeSetByHwMode::SetType &LegalTypes = Legal.get(DefaultMode);
- for (auto &I : VTS) {
- unsigned M = I.first;
- if (!Legal.hasMode(M) && !HaveLegalDef) {
- TP.error("Invalid mode " + Twine(M));
- return;
- }
- expandOverloads(I.second, Legal.get(M));
- }
+ for (auto &I : VTS)
+ expandOverloads(I.second, LegalTypes);
}
void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
@@ -793,17 +800,17 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
}
}
-TypeSetByHwMode TypeInfer::getLegalTypes() {
+const TypeSetByHwMode &TypeInfer::getLegalTypes() {
if (!LegalTypesCached) {
+ TypeSetByHwMode::SetType &LegalTypes = LegalCache.getOrCreate(DefaultMode);
// Stuff all types from all modes into the default mode.
const TypeSetByHwMode &LTS = TP.getDAGPatterns().getLegalTypes();
for (const auto &I : LTS)
- LegalCache.insert(I.second);
+ LegalTypes.insert(I.second);
LegalTypesCached = true;
}
- TypeSetByHwMode VTS;
- VTS.getOrCreate(DefaultMode) = LegalCache;
- return VTS;
+ assert(LegalCache.isDefaultOnly() && "Default-mode only expected");
+ return LegalCache;
}
#ifndef NDEBUG
@@ -819,6 +826,20 @@ TypeInfer::ValidateOnExit::~ValidateOnExit() {
}
#endif
+
+//===----------------------------------------------------------------------===//
+// ScopedName Implementation
+//===----------------------------------------------------------------------===//
+
+bool ScopedName::operator==(const ScopedName &o) const {
+ return Scope == o.Scope && Identifier == o.Identifier;
+}
+
+bool ScopedName::operator!=(const ScopedName &o) const {
+ return !(*this == o);
+}
+
+
//===----------------------------------------------------------------------===//
// TreePredicateFn Implementation
//===----------------------------------------------------------------------===//
@@ -1064,6 +1085,9 @@ bool TreePredicateFn::isPredefinedPredicateEqualTo(StringRef Field,
return false;
return Result == Value;
}
+bool TreePredicateFn::usesOperands() const {
+ return isPredefinedPredicateEqualTo("PredicateCodeUsesOperands", true);
+}
bool TreePredicateFn::isLoad() const {
return isPredefinedPredicateEqualTo("IsLoad", true);
}
@@ -1245,7 +1269,7 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
else
Result = " auto *N = cast<" + ClassName.str() + ">(Node);\n";
- return Result + getPredCode();
+ return (Twine(Result) + " (void)N;\n" + getPredCode()).str();
}
//===----------------------------------------------------------------------===//
@@ -1271,14 +1295,14 @@ static unsigned getPatternSize(const TreePatternNode *P,
// If this node has some predicate function that must match, it adds to the
// complexity of this node.
- if (!P->getPredicateFns().empty())
+ if (!P->getPredicateCalls().empty())
++Size;
// Count children in the count if they are also nodes.
for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i) {
const TreePatternNode *Child = P->getChild(i);
if (!Child->isLeaf() && Child->getNumTypes()) {
- const TypeSetByHwMode &T0 = Child->getType(0);
+ const TypeSetByHwMode &T0 = Child->getExtType(0);
// At this point, all variable type sets should be simple, i.e. only
// have a default mode.
if (T0.getMachineValueType() != MVT::Other) {
@@ -1291,7 +1315,7 @@ static unsigned getPatternSize(const TreePatternNode *P,
Size += 5; // Matches a ConstantSDNode (+3) and a specific value (+2).
else if (Child->getComplexPatternInfo(CGP))
Size += getPatternSize(Child, CGP);
- else if (!Child->getPredicateFns().empty())
+ else if (!Child->getPredicateCalls().empty())
++Size;
}
}
@@ -1313,7 +1337,7 @@ std::string PatternToMatch::getPredicateCheck() const {
SmallVector<const Predicate*,4> PredList;
for (const Predicate &P : Predicates)
PredList.push_back(&P);
- llvm::sort(PredList.begin(), PredList.end(), deref<llvm::less>());
+ llvm::sort(PredList, deref<llvm::less>());
std::string Check;
for (unsigned i = 0, e = PredList.size(); i != e; ++i) {
@@ -1746,13 +1770,19 @@ void TreePatternNode::print(raw_ostream &OS) const {
OS << ")";
}
- for (const TreePredicateFn &Pred : PredicateFns)
- OS << "<<P:" << Pred.getFnName() << ">>";
+ for (const TreePredicateCall &Pred : PredicateCalls) {
+ OS << "<<P:";
+ if (Pred.Scope)
+ OS << Pred.Scope << ":";
+ OS << Pred.Fn.getFnName() << ">>";
+ }
if (TransformFn)
OS << "<<X:" << TransformFn->getName() << ">>";
if (!getName().empty())
OS << ":$" << getName();
+ for (const ScopedName &Name : NamesAsPredicateArg)
+ OS << ":$pred:" << Name.getScope() << ":" << Name.getIdentifier();
}
void TreePatternNode::dump() const {
print(errs());
@@ -1769,7 +1799,7 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
const MultipleUseVarSet &DepVars) const {
if (N == this) return true;
if (N->isLeaf() != isLeaf() || getExtTypes() != N->getExtTypes() ||
- getPredicateFns() != N->getPredicateFns() ||
+ getPredicateCalls() != N->getPredicateCalls() ||
getTransformFn() != N->getTransformFn())
return false;
@@ -1807,8 +1837,9 @@ TreePatternNodePtr TreePatternNode::clone() const {
getNumTypes());
}
New->setName(getName());
+ New->setNamesAsPredicateArg(getNamesAsPredicateArg());
New->Types = Types;
- New->setPredicateFns(getPredicateFns());
+ New->setPredicateCalls(getPredicateCalls());
New->setTransformFn(getTransformFn());
return New;
}
@@ -1840,8 +1871,8 @@ void TreePatternNode::SubstituteFormalArguments(
// We found a use of a formal argument, replace it with its value.
TreePatternNodePtr NewChild = ArgMap[Child->getName()];
assert(NewChild && "Couldn't find formal argument!");
- assert((Child->getPredicateFns().empty() ||
- NewChild->getPredicateFns() == Child->getPredicateFns()) &&
+ assert((Child->getPredicateCalls().empty() ||
+ NewChild->getPredicateCalls() == Child->getPredicateCalls()) &&
"Non-empty child predicate clobbered!");
setChild(i, std::move(NewChild));
}
@@ -1887,8 +1918,8 @@ void TreePatternNode::InlinePatternFragments(
return;
for (auto NewChild : ChildAlternatives[i])
- assert((Child->getPredicateFns().empty() ||
- NewChild->getPredicateFns() == Child->getPredicateFns()) &&
+ assert((Child->getPredicateCalls().empty() ||
+ NewChild->getPredicateCalls() == Child->getPredicateCalls()) &&
"Non-empty child predicate clobbered!");
}
@@ -1906,10 +1937,13 @@ void TreePatternNode::InlinePatternFragments(
// Copy over properties.
R->setName(getName());
- R->setPredicateFns(getPredicateFns());
+ R->setNamesAsPredicateArg(getNamesAsPredicateArg());
+ R->setPredicateCalls(getPredicateCalls());
R->setTransformFn(getTransformFn());
for (unsigned i = 0, e = getNumTypes(); i != e; ++i)
R->setType(i, getExtType(i));
+ for (unsigned i = 0, e = getNumResults(); i != e; ++i)
+ R->setResultIndex(i, getResultIndex(i));
// Register alternative.
OutAlternatives.push_back(R);
@@ -1941,10 +1975,19 @@ void TreePatternNode::InlinePatternFragments(
return;
}
+ TreePredicateFn PredFn(Frag);
+ unsigned Scope = 0;
+ if (TreePredicateFn(Frag).usesOperands())
+ Scope = TP.getDAGPatterns().allocateScope();
+
// Compute the map of formal to actual arguments.
std::map<std::string, TreePatternNodePtr> ArgMap;
for (unsigned i = 0, e = Frag->getNumArgs(); i != e; ++i) {
- const TreePatternNodePtr &Child = getChildShared(i);
+ TreePatternNodePtr Child = getChildShared(i);
+ if (Scope != 0) {
+ Child = Child->clone();
+ Child->addNameAsPredicateArg(ScopedName(Scope, Frag->getArgName(i)));
+ }
ArgMap[Frag->getArgName(i)] = Child;
}
@@ -1952,9 +1995,8 @@ void TreePatternNode::InlinePatternFragments(
for (auto Alternative : Frag->getTrees()) {
TreePatternNodePtr FragTree = Alternative->clone();
- TreePredicateFn PredFn(Frag);
if (!PredFn.isAlwaysTrue())
- FragTree->addPredicateFn(PredFn);
+ FragTree->addPredicateCall(PredFn, Scope);
// Resolve formal arguments to their actual value.
if (Frag->getNumArgs())
@@ -1967,8 +2009,8 @@ void TreePatternNode::InlinePatternFragments(
FragTree->UpdateNodeType(i, getExtType(i), TP);
// Transfer in the old predicates.
- for (const TreePredicateFn &Pred : getPredicateFns())
- FragTree->addPredicateFn(Pred);
+ for (const TreePredicateCall &Pred : getPredicateCalls())
+ FragTree->addPredicateCall(Pred);
// The fragment we inlined could have recursive inlining that is needed. See
// if there are any pattern fragments in it and inline them as needed.
@@ -3032,13 +3074,6 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
P->error("Operands list does not contain an entry for operand '" +
*OperandsSet.begin() + "'!");
- // If there is a code init for this fragment, keep track of the fact that
- // this fragment uses it.
- TreePredicateFn PredFn(P);
- if (!PredFn.isAlwaysTrue())
- for (auto T : P->getTrees())
- T->addPredicateFn(PredFn);
-
// If there is a node transformation corresponding to this, keep track of
// it.
Record *Transform = Frag->getValueAsDef("OperandTransform");
@@ -3176,7 +3211,8 @@ static bool HandleUse(TreePattern &I, TreePatternNodePtr Pat,
void CodeGenDAGPatterns::FindPatternInputsAndOutputs(
TreePattern &I, TreePatternNodePtr Pat,
std::map<std::string, TreePatternNodePtr> &InstInputs,
- std::map<std::string, TreePatternNodePtr> &InstResults,
+ MapVector<std::string, TreePatternNodePtr, std::map<std::string, unsigned>>
+ &InstResults,
std::vector<Record *> &InstImpResults) {
// The instruction pattern still has unresolved fragments. For *named*
@@ -3496,7 +3532,8 @@ void CodeGenDAGPatterns::parseInstructionPattern(
// InstResults - Keep track of all the virtual registers that are 'set'
// in the instruction, including what reg class they are.
- std::map<std::string, TreePatternNodePtr> InstResults;
+ MapVector<std::string, TreePatternNodePtr, std::map<std::string, unsigned>>
+ InstResults;
std::vector<Record*> InstImpResults;
@@ -3533,19 +3570,28 @@ void CodeGenDAGPatterns::parseInstructionPattern(
// Check that all of the results occur first in the list.
std::vector<Record*> Results;
+ std::vector<unsigned> ResultIndices;
SmallVector<TreePatternNodePtr, 2> ResNodes;
for (unsigned i = 0; i != NumResults; ++i) {
- if (i == CGI.Operands.size())
- I.error("'" + InstResults.begin()->first +
- "' set but does not appear in operand list!");
+ if (i == CGI.Operands.size()) {
+ const std::string &OpName =
+ std::find_if(InstResults.begin(), InstResults.end(),
+ [](const std::pair<std::string, TreePatternNodePtr> &P) {
+ return P.second;
+ })
+ ->first;
+
+ I.error("'" + OpName + "' set but does not appear in operand list!");
+ }
+
const std::string &OpName = CGI.Operands[i].Name;
// Check that it exists in InstResults.
- TreePatternNodePtr RNode = InstResults[OpName];
- if (!RNode)
+ auto InstResultIter = InstResults.find(OpName);
+ if (InstResultIter == InstResults.end() || !InstResultIter->second)
I.error("Operand $" + OpName + " does not exist in operand list!");
-
+ TreePatternNodePtr RNode = InstResultIter->second;
Record *R = cast<DefInit>(RNode->getLeafValue())->getDef();
ResNodes.push_back(std::move(RNode));
if (!R)
@@ -3558,8 +3604,11 @@ void CodeGenDAGPatterns::parseInstructionPattern(
// Remember the return type.
Results.push_back(CGI.Operands[i].Rec);
+ // Remember the result index.
+ ResultIndices.push_back(std::distance(InstResults.begin(), InstResultIter));
+
// Okay, this one checks out.
- InstResults.erase(OpName);
+ InstResultIter->second = nullptr;
}
// Loop over the inputs next.
@@ -3598,7 +3647,7 @@ void CodeGenDAGPatterns::parseInstructionPattern(
TreePatternNodePtr OpNode = InVal->clone();
// No predicate is useful on the result.
- OpNode->clearPredicateFns();
+ OpNode->clearPredicateCalls();
// Promote the xform function to be an explicit node if set.
if (Record *Xform = OpNode->getTransformFn()) {
@@ -3623,6 +3672,7 @@ void CodeGenDAGPatterns::parseInstructionPattern(
for (unsigned i = 0; i != NumResults; ++i) {
assert(ResNodes[i]->getNumTypes() == 1 && "FIXME: Unhandled");
ResultPattern->setType(i, ResNodes[i]->getExtType(0));
+ ResultPattern->setResultIndex(i, ResultIndices[i]);
}
// FIXME: Assume only the first tree is the pattern. The others are clobber
@@ -3737,7 +3787,7 @@ std::vector<Predicate> CodeGenDAGPatterns::makePredList(ListInit *L) {
}
// Sort so that different orders get canonicalized to the same string.
- llvm::sort(Preds.begin(), Preds.end());
+ llvm::sort(Preds);
return Preds;
}
@@ -4082,7 +4132,8 @@ void CodeGenDAGPatterns::ParsePatterns() {
// Validate that the input pattern is correct.
std::map<std::string, TreePatternNodePtr> InstInputs;
- std::map<std::string, TreePatternNodePtr> InstResults;
+ MapVector<std::string, TreePatternNodePtr, std::map<std::string, unsigned>>
+ InstResults;
std::vector<Record*> InstImpResults;
for (unsigned j = 0, ee = Pattern.getNumTrees(); j != ee; ++j)
FindPatternInputsAndOutputs(Pattern, Pattern.getTree(j), InstInputs,
@@ -4253,7 +4304,8 @@ static void CombineChildVariants(
// Copy over properties.
R->setName(Orig->getName());
- R->setPredicateFns(Orig->getPredicateFns());
+ R->setNamesAsPredicateArg(Orig->getNamesAsPredicateArg());
+ R->setPredicateCalls(Orig->getPredicateCalls());
R->setTransformFn(Orig->getTransformFn());
for (unsigned i = 0, e = Orig->getNumTypes(); i != e; ++i)
R->setType(i, Orig->getExtType(i));
@@ -4305,7 +4357,7 @@ GatherChildrenOfAssociativeOpcode(TreePatternNodePtr N,
Record *Operator = N->getOperator();
// Only permit raw nodes.
- if (!N->getName().empty() || !N->getPredicateFns().empty() ||
+ if (!N->getName().empty() || !N->getPredicateCalls().empty() ||
N->getTransformFn()) {
Children.push_back(N);
return;
@@ -4456,8 +4508,18 @@ void CodeGenDAGPatterns::GenerateVariants() {
// intentionally do not reconsider these. Any variants of added patterns have
// already been added.
//
- for (unsigned i = 0, e = PatternsToMatch.size(); i != e; ++i) {
- MultipleUseVarSet DepVars;
+ const unsigned NumOriginalPatterns = PatternsToMatch.size();
+ BitVector MatchedPatterns(NumOriginalPatterns);
+ std::vector<BitVector> MatchedPredicates(NumOriginalPatterns,
+ BitVector(NumOriginalPatterns));
+
+ typedef std::pair<MultipleUseVarSet, std::vector<TreePatternNodePtr>>
+ DepsAndVariants;
+ std::map<unsigned, DepsAndVariants> PatternsWithVariants;
+
+ // Collect patterns with more than one variant.
+ for (unsigned i = 0; i != NumOriginalPatterns; ++i) {
+ MultipleUseVarSet DepVars;
std::vector<TreePatternNodePtr> Variants;
FindDepVars(PatternsToMatch[i].getSrcPattern(), DepVars);
LLVM_DEBUG(errs() << "Dependent/multiply used variables: ");
@@ -4467,14 +4529,46 @@ void CodeGenDAGPatterns::GenerateVariants() {
*this, DepVars);
assert(!Variants.empty() && "Must create at least original variant!");
- if (Variants.size() == 1) // No additional variants for this pattern.
+ if (Variants.size() == 1) // No additional variants for this pattern.
continue;
LLVM_DEBUG(errs() << "FOUND VARIANTS OF: ";
PatternsToMatch[i].getSrcPattern()->dump(); errs() << "\n");
+ PatternsWithVariants[i] = std::make_pair(DepVars, Variants);
+
+ // Cache matching predicates.
+ if (MatchedPatterns[i])
+ continue;
+
+ const std::vector<Predicate> &Predicates =
+ PatternsToMatch[i].getPredicates();
+
+ BitVector &Matches = MatchedPredicates[i];
+ MatchedPatterns.set(i);
+ Matches.set(i);
+
+ // Don't test patterns that have already been cached - it won't match.
+ for (unsigned p = 0; p != NumOriginalPatterns; ++p)
+ if (!MatchedPatterns[p])
+ Matches[p] = (Predicates == PatternsToMatch[p].getPredicates());
+
+ // Copy this to all the matching patterns.
+ for (int p = Matches.find_first(); p != -1; p = Matches.find_next(p))
+ if (p != (int)i) {
+ MatchedPatterns.set(p);
+ MatchedPredicates[p] = Matches;
+ }
+ }
+
+ for (auto it : PatternsWithVariants) {
+ unsigned i = it.first;
+ const MultipleUseVarSet &DepVars = it.second.first;
+ const std::vector<TreePatternNodePtr> &Variants = it.second.second;
+
for (unsigned v = 0, e = Variants.size(); v != e; ++v) {
TreePatternNodePtr Variant = Variants[v];
+ BitVector &Matches = MatchedPredicates[i];
LLVM_DEBUG(errs() << " VAR#" << v << ": "; Variant->dump();
errs() << "\n");
@@ -4483,8 +4577,7 @@ void CodeGenDAGPatterns::GenerateVariants() {
bool AlreadyExists = false;
for (unsigned p = 0, e = PatternsToMatch.size(); p != e; ++p) {
// Skip if the top level predicates do not match.
- if (PatternsToMatch[i].getPredicates() !=
- PatternsToMatch[p].getPredicates())
+ if (!Matches[p])
continue;
// Check to see if this variant already exists.
if (Variant->isIsomorphicTo(PatternsToMatch[p].getSrcPattern(),
@@ -4503,6 +4596,11 @@ void CodeGenDAGPatterns::GenerateVariants() {
Variant, PatternsToMatch[i].getDstPatternShared(),
PatternsToMatch[i].getDstRegs(),
PatternsToMatch[i].getAddedComplexity(), Record::getNewUID()));
+ MatchedPredicates.push_back(Matches);
+
+ // Add a new match the same as this pattern.
+ for (auto &P : MatchedPredicates)
+ P.push_back(P[i]);
}
LLVM_DEBUG(errs() << "\n");
diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
index 9be3816cc7fc..4be9afdcacd2 100644
--- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -19,6 +19,7 @@
#include "CodeGenIntrinsics.h"
#include "CodeGenTarget.h"
#include "SDNodeProperties.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSet.h"
@@ -28,6 +29,7 @@
#include <array>
#include <functional>
#include <map>
+#include <numeric>
#include <set>
#include <vector>
@@ -350,11 +352,11 @@ struct TypeInfer {
bool Validate = true; // Indicate whether to validate types.
private:
- TypeSetByHwMode getLegalTypes();
+ const TypeSetByHwMode &getLegalTypes();
- /// Cached legal types.
+ /// Cached legal types (in default mode).
bool LegalTypesCached = false;
- TypeSetByHwMode::SetType LegalCache = {};
+ TypeSetByHwMode LegalCache;
};
/// Set type used to track multiply used variables in patterns
@@ -408,6 +410,29 @@ struct SDTypeConstraint {
TreePattern &TP) const;
};
+/// ScopedName - A name of a node associated with a "scope" that indicates
+/// the context (e.g. instance of Pattern or PatFrag) in which the name was
+/// used. This enables substitution of pattern fragments while keeping track
+/// of what name(s) were originally given to various nodes in the tree.
+class ScopedName {
+ unsigned Scope;
+ std::string Identifier;
+public:
+ ScopedName(unsigned Scope, StringRef Identifier)
+ : Scope(Scope), Identifier(Identifier) {
+ assert(Scope != 0 &&
+ "Scope == 0 is used to indicate predicates without arguments");
+ }
+
+ unsigned getScope() const { return Scope; }
+ const std::string &getIdentifier() const { return Identifier; }
+
+ std::string getFullName() const;
+
+ bool operator==(const ScopedName &o) const;
+ bool operator!=(const ScopedName &o) const;
+};
+
/// SDNodeInfo - One of these records is created for each SDNode instance in
/// the target .td file. This represents the various dag nodes we will be
/// processing.
@@ -503,6 +528,9 @@ public:
/// usable as part of an identifier.
StringRef getImmTypeIdentifier() const;
+ // Predicate code uses the PatFrag's captured operands.
+ bool usesOperands() const;
+
// Is the desired predefined predicate for a load?
bool isLoad() const;
// Is the desired predefined predicate for a store?
@@ -570,6 +598,23 @@ private:
bool isPredefinedPredicateEqualTo(StringRef Field, bool Value) const;
};
+struct TreePredicateCall {
+ TreePredicateFn Fn;
+
+ // Scope -- unique identifier for retrieving named arguments. 0 is used when
+ // the predicate does not use named arguments.
+ unsigned Scope;
+
+ TreePredicateCall(const TreePredicateFn &Fn, unsigned Scope)
+ : Fn(Fn), Scope(Scope) {}
+
+ bool operator==(const TreePredicateCall &o) const {
+ return Fn == o.Fn && Scope == o.Scope;
+ }
+ bool operator!=(const TreePredicateCall &o) const {
+ return !(*this == o);
+ }
+};
class TreePatternNode {
/// The type of each node result. Before and during type inference, each
@@ -577,6 +622,9 @@ class TreePatternNode {
/// each is a single concrete type.
std::vector<TypeSetByHwMode> Types;
+ /// The index of each result in results of the pattern.
+ std::vector<unsigned> ResultPerm;
+
/// Operator - The Record for the operator if this is an interior node (not
/// a leaf).
Record *Operator;
@@ -589,9 +637,11 @@ class TreePatternNode {
///
std::string Name;
- /// PredicateFns - The predicate functions to execute on this node to check
+ std::vector<ScopedName> NamesAsPredicateArg;
+
+ /// PredicateCalls - The predicate functions to execute on this node to check
/// for a match. If this list is empty, no predicate is involved.
- std::vector<TreePredicateFn> PredicateFns;
+ std::vector<TreePredicateCall> PredicateCalls;
/// TransformFn - The transformation function to execute on this node before
/// it can be substituted into the resulting instruction on a pattern match.
@@ -605,16 +655,30 @@ public:
: Operator(Op), Val(nullptr), TransformFn(nullptr),
Children(std::move(Ch)) {
Types.resize(NumResults);
+ ResultPerm.resize(NumResults);
+ std::iota(ResultPerm.begin(), ResultPerm.end(), 0);
}
TreePatternNode(Init *val, unsigned NumResults) // leaf ctor
: Operator(nullptr), Val(val), TransformFn(nullptr) {
Types.resize(NumResults);
+ ResultPerm.resize(NumResults);
+ std::iota(ResultPerm.begin(), ResultPerm.end(), 0);
}
bool hasName() const { return !Name.empty(); }
const std::string &getName() const { return Name; }
void setName(StringRef N) { Name.assign(N.begin(), N.end()); }
+ const std::vector<ScopedName> &getNamesAsPredicateArg() const {
+ return NamesAsPredicateArg;
+ }
+ void setNamesAsPredicateArg(const std::vector<ScopedName>& Names) {
+ NamesAsPredicateArg = Names;
+ }
+ void addNameAsPredicateArg(const ScopedName &N) {
+ NamesAsPredicateArg.push_back(N);
+ }
+
bool isLeaf() const { return Val != nullptr; }
// Type accessors.
@@ -639,6 +703,10 @@ public:
return Types[ResNo].empty();
}
+ unsigned getNumResults() const { return ResultPerm.size(); }
+ unsigned getResultIndex(unsigned ResNo) const { return ResultPerm[ResNo]; }
+ void setResultIndex(unsigned ResNo, unsigned RI) { ResultPerm[ResNo] = RI; }
+
Init *getLeafValue() const { assert(isLeaf()); return Val; }
Record *getOperator() const { assert(!isLeaf()); return Operator; }
@@ -661,20 +729,24 @@ public:
bool hasPossibleType() const;
bool setDefaultMode(unsigned Mode);
- bool hasAnyPredicate() const { return !PredicateFns.empty(); }
+ bool hasAnyPredicate() const { return !PredicateCalls.empty(); }
- const std::vector<TreePredicateFn> &getPredicateFns() const {
- return PredicateFns;
+ const std::vector<TreePredicateCall> &getPredicateCalls() const {
+ return PredicateCalls;
}
- void clearPredicateFns() { PredicateFns.clear(); }
- void setPredicateFns(const std::vector<TreePredicateFn> &Fns) {
- assert(PredicateFns.empty() && "Overwriting non-empty predicate list!");
- PredicateFns = Fns;
+ void clearPredicateCalls() { PredicateCalls.clear(); }
+ void setPredicateCalls(const std::vector<TreePredicateCall> &Calls) {
+ assert(PredicateCalls.empty() && "Overwriting non-empty predicate list!");
+ PredicateCalls = Calls;
}
- void addPredicateFn(const TreePredicateFn &Fn) {
- assert(!Fn.isAlwaysTrue() && "Empty predicate string!");
- if (!is_contained(PredicateFns, Fn))
- PredicateFns.push_back(Fn);
+ void addPredicateCall(const TreePredicateCall &Call) {
+ assert(!Call.Fn.isAlwaysTrue() && "Empty predicate string!");
+ assert(!is_contained(PredicateCalls, Call) && "predicate applied recursively");
+ PredicateCalls.push_back(Call);
+ }
+ void addPredicateCall(const TreePredicateFn &Fn, unsigned Scope) {
+ assert((Scope != 0) == Fn.usesOperands());
+ addPredicateCall(TreePredicateCall(Fn, Scope));
}
Record *getTransformFn() const { return TransformFn; }
@@ -1081,6 +1153,8 @@ class CodeGenDAGPatterns {
using PatternRewriterFn = std::function<void (TreePattern *)>;
PatternRewriterFn PatternRewriter;
+ unsigned NumScopes = 0;
+
public:
CodeGenDAGPatterns(RecordKeeper &R,
PatternRewriterFn PatternRewriter = nullptr);
@@ -1196,6 +1270,8 @@ public:
bool hasTargetIntrinsics() { return !TgtIntrinsics.empty(); }
+ unsigned allocateScope() { return ++NumScopes; }
+
private:
void ParseNodeInfo();
void ParseNodeTransforms();
@@ -1218,7 +1294,8 @@ private:
void FindPatternInputsAndOutputs(
TreePattern &I, TreePatternNodePtr Pat,
std::map<std::string, TreePatternNodePtr> &InstInputs,
- std::map<std::string, TreePatternNodePtr> &InstResults,
+ MapVector<std::string, TreePatternNodePtr,
+ std::map<std::string, unsigned>> &InstResults,
std::vector<Record *> &InstImpResults);
};
diff --git a/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp b/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
index eb35020d3d3a..6d06ba2c8b67 100644
--- a/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -202,7 +202,8 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
return std::make_pair(0U, 0U);
}
-static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
+static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops,
+ Record *Rec) {
// EARLY_CLOBBER: @early $reg
std::string::size_type wpos = CStr.find_first_of(" \t");
std::string::size_type start = CStr.find_first_not_of(" \t");
@@ -211,13 +212,17 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
std::string Name = CStr.substr(wpos+1);
wpos = Name.find_first_not_of(" \t");
if (wpos == std::string::npos)
- PrintFatalError("Illegal format for @earlyclobber constraint: '" + CStr + "'");
+ PrintFatalError(
+ Rec->getLoc(), "Illegal format for @earlyclobber constraint in '" +
+ Rec->getName() + "': '" + CStr + "'");
Name = Name.substr(wpos);
std::pair<unsigned,unsigned> Op = Ops.ParseOperandName(Name, false);
// Build the string for the operand
if (!Ops[Op.first].Constraints[Op.second].isNone())
- PrintFatalError("Operand '" + Name + "' cannot have multiple constraints!");
+ PrintFatalError(
+ Rec->getLoc(), "Operand '" + Name + "' of '" + Rec->getName() +
+ "' cannot have multiple constraints!");
Ops[Op.first].Constraints[Op.second] =
CGIOperandList::ConstraintInfo::getEarlyClobber();
return;
@@ -225,39 +230,73 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
// Only other constraint is "TIED_TO" for now.
std::string::size_type pos = CStr.find_first_of('=');
- assert(pos != std::string::npos && "Unrecognized constraint");
+ if (pos == std::string::npos)
+ PrintFatalError(
+ Rec->getLoc(), "Unrecognized constraint '" + CStr +
+ "' in '" + Rec->getName() + "'");
start = CStr.find_first_not_of(" \t");
- std::string Name = CStr.substr(start, pos - start);
// TIED_TO: $src1 = $dst
- wpos = Name.find_first_of(" \t");
+ wpos = CStr.find_first_of(" \t", start);
+ if (wpos == std::string::npos || wpos > pos)
+ PrintFatalError(
+ Rec->getLoc(), "Illegal format for tied-to constraint in '" +
+ Rec->getName() + "': '" + CStr + "'");
+ std::string LHSOpName = StringRef(CStr).substr(start, wpos - start);
+ std::pair<unsigned,unsigned> LHSOp = Ops.ParseOperandName(LHSOpName, false);
+
+ wpos = CStr.find_first_not_of(" \t", pos + 1);
if (wpos == std::string::npos)
- PrintFatalError("Illegal format for tied-to constraint: '" + CStr + "'");
- std::string DestOpName = Name.substr(0, wpos);
- std::pair<unsigned,unsigned> DestOp = Ops.ParseOperandName(DestOpName, false);
-
- Name = CStr.substr(pos+1);
- wpos = Name.find_first_not_of(" \t");
- if (wpos == std::string::npos)
- PrintFatalError("Illegal format for tied-to constraint: '" + CStr + "'");
+ PrintFatalError(
+ Rec->getLoc(), "Illegal format for tied-to constraint: '" + CStr + "'");
+
+ std::string RHSOpName = StringRef(CStr).substr(wpos);
+ std::pair<unsigned,unsigned> RHSOp = Ops.ParseOperandName(RHSOpName, false);
+
+ // Sort the operands into order, which should put the output one
+ // first. But keep the original order, for use in diagnostics.
+ bool FirstIsDest = (LHSOp < RHSOp);
+ std::pair<unsigned,unsigned> DestOp = (FirstIsDest ? LHSOp : RHSOp);
+ StringRef DestOpName = (FirstIsDest ? LHSOpName : RHSOpName);
+ std::pair<unsigned,unsigned> SrcOp = (FirstIsDest ? RHSOp : LHSOp);
+ StringRef SrcOpName = (FirstIsDest ? RHSOpName : LHSOpName);
+
+ // Ensure one operand is a def and the other is a use.
+ if (DestOp.first >= Ops.NumDefs)
+ PrintFatalError(
+ Rec->getLoc(), "Input operands '" + LHSOpName + "' and '" + RHSOpName +
+ "' of '" + Rec->getName() + "' cannot be tied!");
+ if (SrcOp.first < Ops.NumDefs)
+ PrintFatalError(
+ Rec->getLoc(), "Output operands '" + LHSOpName + "' and '" + RHSOpName +
+ "' of '" + Rec->getName() + "' cannot be tied!");
+
+ // The constraint has to go on the operand with higher index, i.e.
+ // the source one. Check there isn't another constraint there
+ // already.
+ if (!Ops[SrcOp.first].Constraints[SrcOp.second].isNone())
+ PrintFatalError(
+ Rec->getLoc(), "Operand '" + SrcOpName + "' of '" + Rec->getName() +
+ "' cannot have multiple constraints!");
- std::string SrcOpName = Name.substr(wpos);
- std::pair<unsigned,unsigned> SrcOp = Ops.ParseOperandName(SrcOpName, false);
- if (SrcOp > DestOp) {
- std::swap(SrcOp, DestOp);
- std::swap(SrcOpName, DestOpName);
+ unsigned DestFlatOpNo = Ops.getFlattenedOperandNumber(DestOp);
+ auto NewConstraint = CGIOperandList::ConstraintInfo::getTied(DestFlatOpNo);
+
+ // Check that the earlier operand is not the target of another tie
+ // before making it the target of this one.
+ for (const CGIOperandList::OperandInfo &Op : Ops) {
+ for (unsigned i = 0; i < Op.MINumOperands; i++)
+ if (Op.Constraints[i] == NewConstraint)
+ PrintFatalError(
+ Rec->getLoc(), "Operand '" + DestOpName + "' of '" + Rec->getName() +
+ "' cannot have multiple operands tied to it!");
}
- unsigned FlatOpNo = Ops.getFlattenedOperandNumber(SrcOp);
-
- if (!Ops[DestOp.first].Constraints[DestOp.second].isNone())
- PrintFatalError("Operand '" + DestOpName +
- "' cannot have multiple constraints!");
- Ops[DestOp.first].Constraints[DestOp.second] =
- CGIOperandList::ConstraintInfo::getTied(FlatOpNo);
+ Ops[SrcOp.first].Constraints[SrcOp.second] = NewConstraint;
}
-static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops) {
+static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops,
+ Record *Rec) {
if (CStr.empty()) return;
const std::string delims(",");
@@ -269,7 +308,7 @@ static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops) {
if (eidx == std::string::npos)
eidx = CStr.length();
- ParseConstraint(CStr.substr(bidx, eidx - bidx), Ops);
+ ParseConstraint(CStr.substr(bidx, eidx - bidx), Ops, Rec);
bidx = CStr.find_first_not_of(delims, eidx);
}
}
@@ -302,6 +341,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
AsmString = R->getValueAsString("AsmString");
isReturn = R->getValueAsBit("isReturn");
+ isEHScopeReturn = R->getValueAsBit("isEHScopeReturn");
isBranch = R->getValueAsBit("isBranch");
isIndirectBranch = R->getValueAsBit("isIndirectBranch");
isCompare = R->getValueAsBit("isCompare");
@@ -330,6 +370,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
isConvergent = R->getValueAsBit("isConvergent");
hasNoSchedulingInfo = R->getValueAsBit("hasNoSchedulingInfo");
FastISelShouldIgnore = R->getValueAsBit("FastISelShouldIgnore");
+ variadicOpsAreDefs = R->getValueAsBit("variadicOpsAreDefs");
bool Unset;
mayLoad = R->getValueAsBitOrUnset("mayLoad", Unset);
@@ -352,7 +393,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
hasChain_Inferred = false;
// Parse Constraints.
- ParseConstraints(R->getValueAsString("Constraints"), Operands);
+ ParseConstraints(R->getValueAsString("Constraints"), Operands, R);
// Parse the DisableEncoding field.
Operands.ProcessDisableEncoding(R->getValueAsString("DisableEncoding"));
diff --git a/contrib/llvm/utils/TableGen/CodeGenInstruction.h b/contrib/llvm/utils/TableGen/CodeGenInstruction.h
index a50c3e60e6e7..2e3d2f48a928 100644
--- a/contrib/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/contrib/llvm/utils/TableGen/CodeGenInstruction.h
@@ -57,6 +57,17 @@ template <typename T> class ArrayRef;
assert(isTied());
return OtherTiedOperand;
}
+
+ bool operator==(const ConstraintInfo &RHS) const {
+ if (Kind != RHS.Kind)
+ return false;
+ if (Kind == Tied && OtherTiedOperand != RHS.OtherTiedOperand)
+ return false;
+ return true;
+ }
+ bool operator!=(const ConstraintInfo &RHS) const {
+ return !(*this == RHS);
+ }
};
/// OperandInfo - The information we keep track of for each operand in the
@@ -222,6 +233,7 @@ template <typename T> class ArrayRef;
// Various boolean values we track for the instruction.
bool isReturn : 1;
+ bool isEHScopeReturn : 1;
bool isBranch : 1;
bool isIndirectBranch : 1;
bool isCompare : 1;
@@ -263,6 +275,7 @@ template <typename T> class ArrayRef;
bool FastISelShouldIgnore : 1;
bool hasChain : 1;
bool hasChain_Inferred : 1;
+ bool variadicOpsAreDefs : 1;
std::string DeprecatedReason;
bool HasComplexDeprecationPredicate;
diff --git a/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h b/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
index 5d0715959120..9487a79c1432 100644
--- a/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -124,6 +124,9 @@ struct CodeGenIntrinsic {
/// True if the intrinsic is no-return.
bool isNoReturn;
+ /// True if the intrinsic is cold.
+ bool isCold;
+
/// True if the intrinsic is marked as convergent.
bool isConvergent;
diff --git a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
index b0d13b7d38f3..74a2b078dfb3 100644
--- a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/IntEqClasses.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
@@ -725,7 +726,7 @@ struct TupleExpander : SetTheory::Expander {
//===----------------------------------------------------------------------===//
static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) {
- llvm::sort(M.begin(), M.end(), deref<llvm::less>());
+ llvm::sort(M, deref<llvm::less>());
M.erase(std::unique(M.begin(), M.end(), deref<llvm::equal>()), M.end());
}
@@ -997,7 +998,7 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
for (auto &RC : RegClasses)
if (SuperRegRCsBV[RC.EnumValue])
SuperRegRCs.emplace_back(&RC);
- llvm::sort(SuperRegRCs.begin(), SuperRegRCs.end(), SizeOrder);
+ llvm::sort(SuperRegRCs, SizeOrder);
assert(SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first");
// Find all the subreg classes and order them by size too.
@@ -1008,7 +1009,7 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
if (SuperRegClassesBV.any())
SuperRegClasses.push_back(std::make_pair(&RC, SuperRegClassesBV));
}
- llvm::sort(SuperRegClasses.begin(), SuperRegClasses.end(),
+ llvm::sort(SuperRegClasses,
[&](const std::pair<CodeGenRegisterClass *, BitVector> &A,
const std::pair<CodeGenRegisterClass *, BitVector> &B) {
return SizeOrder(A.first, B.first);
@@ -1073,7 +1074,7 @@ void CodeGenRegisterClass::buildRegUnitSet(const CodeGenRegBank &RegBank,
if (!RU.Artificial)
TmpUnits.push_back(*UnitI);
}
- llvm::sort(TmpUnits.begin(), TmpUnits.end());
+ llvm::sort(TmpUnits);
std::unique_copy(TmpUnits.begin(), TmpUnits.end(),
std::back_inserter(RegUnits));
}
@@ -1093,7 +1094,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
// Read in the user-defined (named) sub-register indices.
// More indices will be synthesized later.
std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex");
- llvm::sort(SRIs.begin(), SRIs.end(), LessRecord());
+ llvm::sort(SRIs, LessRecord());
for (unsigned i = 0, e = SRIs.size(); i != e; ++i)
getSubRegIdx(SRIs[i]);
// Build composite maps from ComposedOf fields.
@@ -1102,7 +1103,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
// Read in the register definitions.
std::vector<Record*> Regs = Records.getAllDerivedDefinitions("Register");
- llvm::sort(Regs.begin(), Regs.end(), LessRecordRegister());
+ llvm::sort(Regs, LessRecordRegister());
// Assign the enumeration values.
for (unsigned i = 0, e = Regs.size(); i != e; ++i)
getReg(Regs[i]);
@@ -1113,7 +1114,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
for (Record *R : Tups) {
std::vector<Record *> TupRegs = *Sets.expand(R);
- llvm::sort(TupRegs.begin(), TupRegs.end(), LessRecordRegister());
+ llvm::sort(TupRegs, LessRecordRegister());
for (Record *RC : TupRegs)
getReg(RC);
}
@@ -1309,6 +1310,55 @@ getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8> &Parts) {
}
void CodeGenRegBank::computeComposites() {
+ using RegMap = std::map<const CodeGenRegister*, const CodeGenRegister*>;
+
+ // Subreg -> { Reg->Reg }, where the right-hand side is the mapping from
+ // register to (sub)register associated with the action of the left-hand
+ // side subregister.
+ std::map<const CodeGenSubRegIndex*, RegMap> SubRegAction;
+ for (const CodeGenRegister &R : Registers) {
+ const CodeGenRegister::SubRegMap &SM = R.getSubRegs();
+ for (std::pair<const CodeGenSubRegIndex*, const CodeGenRegister*> P : SM)
+ SubRegAction[P.first].insert({&R, P.second});
+ }
+
+ // Calculate the composition of two subregisters as compositions of their
+ // associated actions.
+ auto compose = [&SubRegAction] (const CodeGenSubRegIndex *Sub1,
+ const CodeGenSubRegIndex *Sub2) {
+ RegMap C;
+ const RegMap &Img1 = SubRegAction.at(Sub1);
+ const RegMap &Img2 = SubRegAction.at(Sub2);
+ for (std::pair<const CodeGenRegister*, const CodeGenRegister*> P : Img1) {
+ auto F = Img2.find(P.second);
+ if (F != Img2.end())
+ C.insert({P.first, F->second});
+ }
+ return C;
+ };
+
+ // Check if the two maps agree on the intersection of their domains.
+ auto agree = [] (const RegMap &Map1, const RegMap &Map2) {
+ // Technically speaking, an empty map agrees with any other map, but
+ // this could flag false positives. We're interested in non-vacuous
+ // agreements.
+ if (Map1.empty() || Map2.empty())
+ return false;
+ for (std::pair<const CodeGenRegister*, const CodeGenRegister*> P : Map1) {
+ auto F = Map2.find(P.first);
+ if (F == Map2.end() || P.second != F->second)
+ return false;
+ }
+ return true;
+ };
+
+ using CompositePair = std::pair<const CodeGenSubRegIndex*,
+ const CodeGenSubRegIndex*>;
+ SmallSet<CompositePair,4> UserDefined;
+ for (const CodeGenSubRegIndex &Idx : SubRegIndices)
+ for (auto P : Idx.getComposites())
+ UserDefined.insert(std::make_pair(&Idx, P.first));
+
// Keep track of TopoSigs visited. We only need to visit each TopoSig once,
// and many registers will share TopoSigs on regular architectures.
BitVector TopoSigs(getNumTopoSigs());
@@ -1341,11 +1391,15 @@ void CodeGenRegBank::computeComposites() {
assert(Idx3 && "Sub-register doesn't have an index");
// Conflicting composition? Emit a warning but allow it.
- if (CodeGenSubRegIndex *Prev = Idx1->addComposite(Idx2, Idx3))
- PrintWarning(Twine("SubRegIndex ") + Idx1->getQualifiedName() +
- " and " + Idx2->getQualifiedName() +
- " compose ambiguously as " + Prev->getQualifiedName() +
- " or " + Idx3->getQualifiedName());
+ if (CodeGenSubRegIndex *Prev = Idx1->addComposite(Idx2, Idx3)) {
+ // If the composition was not user-defined, always emit a warning.
+ if (!UserDefined.count({Idx1, Idx2}) ||
+ agree(compose(Idx1, Idx2), SubRegAction.at(Idx3)))
+ PrintWarning(Twine("SubRegIndex ") + Idx1->getQualifiedName() +
+ " and " + Idx2->getQualifiedName() +
+ " compose ambiguously as " + Prev->getQualifiedName() +
+ " or " + Idx3->getQualifiedName());
+ }
}
}
}
diff --git a/contrib/llvm/utils/TableGen/CodeGenRegisters.h b/contrib/llvm/utils/TableGen/CodeGenRegisters.h
index 32aa33c80b3a..0f7a025ded10 100644
--- a/contrib/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/contrib/llvm/utils/TableGen/CodeGenRegisters.h
@@ -348,7 +348,7 @@ namespace llvm {
ArrayRef<ValueTypeByHwMode> getValueTypes() const { return VTs; }
unsigned getNumValueTypes() const { return VTs.size(); }
- ValueTypeByHwMode getValueTypeNum(unsigned VTNum) const {
+ const ValueTypeByHwMode &getValueTypeNum(unsigned VTNum) const {
if (VTNum < VTs.size())
return VTs[VTNum];
llvm_unreachable("VTNum greater than number of ValueTypes in RegClass!");
diff --git a/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp b/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
index 9331fadf4099..6d259cbb33ee 100644
--- a/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -222,9 +222,248 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
// Collect optional processor description.
collectOptionalProcessorInfo();
+ // Check MCInstPredicate definitions.
+ checkMCInstPredicates();
+
+ // Check STIPredicate definitions.
+ checkSTIPredicates();
+
+ // Find STIPredicate definitions for each processor model, and construct
+ // STIPredicateFunction objects.
+ collectSTIPredicates();
+
checkCompleteness();
}
+void CodeGenSchedModels::checkSTIPredicates() const {
+ DenseMap<StringRef, const Record *> Declarations;
+
+ // There cannot be multiple declarations with the same name.
+ const RecVec Decls = Records.getAllDerivedDefinitions("STIPredicateDecl");
+ for (const Record *R : Decls) {
+ StringRef Name = R->getValueAsString("Name");
+ const auto It = Declarations.find(Name);
+ if (It == Declarations.end()) {
+ Declarations[Name] = R;
+ continue;
+ }
+
+ PrintError(R->getLoc(), "STIPredicate " + Name + " multiply declared.");
+ PrintNote(It->second->getLoc(), "Previous declaration was here.");
+ PrintFatalError(R->getLoc(), "Invalid STIPredicateDecl found.");
+ }
+
+ // Disallow InstructionEquivalenceClasses with an empty instruction list.
+ const RecVec Defs =
+ Records.getAllDerivedDefinitions("InstructionEquivalenceClass");
+ for (const Record *R : Defs) {
+ RecVec Opcodes = R->getValueAsListOfDefs("Opcodes");
+ if (Opcodes.empty()) {
+ PrintFatalError(R->getLoc(), "Invalid InstructionEquivalenceClass "
+ "defined with an empty opcode list.");
+ }
+ }
+}
+
+// Used by function `processSTIPredicate` to construct a mask of machine
+// instruction operands.
+static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
+ APInt OperandMask;
+ if (Indices.empty())
+ return OperandMask;
+
+ int64_t MaxIndex = *std::max_element(Indices.begin(), Indices.end());
+ assert(MaxIndex >= 0 && "Invalid negative indices in input!");
+ OperandMask = OperandMask.zext(MaxIndex + 1);
+ for (const int64_t Index : Indices) {
+ assert(Index >= 0 && "Invalid negative indices!");
+ OperandMask.setBit(Index);
+ }
+
+ return OperandMask;
+}
+
+static void
+processSTIPredicate(STIPredicateFunction &Fn,
+ const DenseMap<Record *, unsigned> &ProcModelMap) {
+ DenseMap<const Record *, unsigned> Opcode2Index;
+ using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
+ std::vector<OpcodeMapPair> OpcodeMappings;
+ std::vector<std::pair<APInt, APInt>> OpcodeMasks;
+
+ DenseMap<const Record *, unsigned> Predicate2Index;
+ unsigned NumUniquePredicates = 0;
+
+ // Number unique predicates and opcodes used by InstructionEquivalenceClass
+ // definitions. Each unique opcode will be associated with an OpcodeInfo
+ // object.
+ for (const Record *Def : Fn.getDefinitions()) {
+ RecVec Classes = Def->getValueAsListOfDefs("Classes");
+ for (const Record *EC : Classes) {
+ const Record *Pred = EC->getValueAsDef("Predicate");
+ if (Predicate2Index.find(Pred) == Predicate2Index.end())
+ Predicate2Index[Pred] = NumUniquePredicates++;
+
+ RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
+ for (const Record *Opcode : Opcodes) {
+ if (Opcode2Index.find(Opcode) == Opcode2Index.end()) {
+ Opcode2Index[Opcode] = OpcodeMappings.size();
+ OpcodeMappings.emplace_back(Opcode, OpcodeInfo());
+ }
+ }
+ }
+ }
+
+ // Initialize vector `OpcodeMasks` with default values. We want to keep track
+ // of which processors "use" which opcodes. We also want to be able to
+ // identify predicates that are used by different processors for a same
+ // opcode.
+ // This information is used later on by this algorithm to sort OpcodeMapping
+ // elements based on their processor and predicate sets.
+ OpcodeMasks.resize(OpcodeMappings.size());
+ APInt DefaultProcMask(ProcModelMap.size(), 0);
+ APInt DefaultPredMask(NumUniquePredicates, 0);
+ for (std::pair<APInt, APInt> &MaskPair : OpcodeMasks)
+ MaskPair = std::make_pair(DefaultProcMask, DefaultPredMask);
+
+ // Construct a OpcodeInfo object for every unique opcode declared by an
+ // InstructionEquivalenceClass definition.
+ for (const Record *Def : Fn.getDefinitions()) {
+ RecVec Classes = Def->getValueAsListOfDefs("Classes");
+ const Record *SchedModel = Def->getValueAsDef("SchedModel");
+ unsigned ProcIndex = ProcModelMap.find(SchedModel)->second;
+ APInt ProcMask(ProcModelMap.size(), 0);
+ ProcMask.setBit(ProcIndex);
+
+ for (const Record *EC : Classes) {
+ RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
+
+ std::vector<int64_t> OpIndices =
+ EC->getValueAsListOfInts("OperandIndices");
+ APInt OperandMask = constructOperandMask(OpIndices);
+
+ const Record *Pred = EC->getValueAsDef("Predicate");
+ APInt PredMask(NumUniquePredicates, 0);
+ PredMask.setBit(Predicate2Index[Pred]);
+
+ for (const Record *Opcode : Opcodes) {
+ unsigned OpcodeIdx = Opcode2Index[Opcode];
+ if (OpcodeMasks[OpcodeIdx].first[ProcIndex]) {
+ std::string Message =
+ "Opcode " + Opcode->getName().str() +
+ " used by multiple InstructionEquivalenceClass definitions.";
+ PrintFatalError(EC->getLoc(), Message);
+ }
+ OpcodeMasks[OpcodeIdx].first |= ProcMask;
+ OpcodeMasks[OpcodeIdx].second |= PredMask;
+ OpcodeInfo &OI = OpcodeMappings[OpcodeIdx].second;
+
+ OI.addPredicateForProcModel(ProcMask, OperandMask, Pred);
+ }
+ }
+ }
+
+ // Sort OpcodeMappings elements based on their CPU and predicate masks.
+ // As a last resort, order elements by opcode identifier.
+ llvm::sort(OpcodeMappings,
+ [&](const OpcodeMapPair &Lhs, const OpcodeMapPair &Rhs) {
+ unsigned LhsIdx = Opcode2Index[Lhs.first];
+ unsigned RhsIdx = Opcode2Index[Rhs.first];
+ std::pair<APInt, APInt> &LhsMasks = OpcodeMasks[LhsIdx];
+ std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];
+
+ if (LhsMasks.first != RhsMasks.first) {
+ if (LhsMasks.first.countPopulation() <
+ RhsMasks.first.countPopulation())
+ return true;
+ return LhsMasks.first.countLeadingZeros() >
+ RhsMasks.first.countLeadingZeros();
+ }
+
+ if (LhsMasks.second != RhsMasks.second) {
+ if (LhsMasks.second.countPopulation() <
+ RhsMasks.second.countPopulation())
+ return true;
+ return LhsMasks.second.countLeadingZeros() >
+ RhsMasks.second.countLeadingZeros();
+ }
+
+ return LhsIdx < RhsIdx;
+ });
+
+ // Now construct opcode groups. Groups are used by the SubtargetEmitter when
+ // expanding the body of a STIPredicate function. In particular, each opcode
+ // group is expanded into a sequence of labels in a switch statement.
+ // It identifies opcodes for which different processors define same predicates
+ // and same opcode masks.
+ for (OpcodeMapPair &Info : OpcodeMappings)
+ Fn.addOpcode(Info.first, std::move(Info.second));
+}
+
+void CodeGenSchedModels::collectSTIPredicates() {
+ // Map STIPredicateDecl records to elements of vector
+ // CodeGenSchedModels::STIPredicates.
+ DenseMap<const Record *, unsigned> Decl2Index;
+
+ RecVec RV = Records.getAllDerivedDefinitions("STIPredicate");
+ for (const Record *R : RV) {
+ const Record *Decl = R->getValueAsDef("Declaration");
+
+ const auto It = Decl2Index.find(Decl);
+ if (It == Decl2Index.end()) {
+ Decl2Index[Decl] = STIPredicates.size();
+ STIPredicateFunction Predicate(Decl);
+ Predicate.addDefinition(R);
+ STIPredicates.emplace_back(std::move(Predicate));
+ continue;
+ }
+
+ STIPredicateFunction &PreviousDef = STIPredicates[It->second];
+ PreviousDef.addDefinition(R);
+ }
+
+ for (STIPredicateFunction &Fn : STIPredicates)
+ processSTIPredicate(Fn, ProcModelMap);
+}
+
+void OpcodeInfo::addPredicateForProcModel(const llvm::APInt &CpuMask,
+ const llvm::APInt &OperandMask,
+ const Record *Predicate) {
+ auto It = llvm::find_if(
+ Predicates, [&OperandMask, &Predicate](const PredicateInfo &P) {
+ return P.Predicate == Predicate && P.OperandMask == OperandMask;
+ });
+ if (It == Predicates.end()) {
+ Predicates.emplace_back(CpuMask, OperandMask, Predicate);
+ return;
+ }
+ It->ProcModelMask |= CpuMask;
+}
+
+void CodeGenSchedModels::checkMCInstPredicates() const {
+ RecVec MCPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
+ if (MCPredicates.empty())
+ return;
+
+ // A target cannot have multiple TIIPredicate definitions with a same name.
+ llvm::StringMap<const Record *> TIIPredicates(MCPredicates.size());
+ for (const Record *TIIPred : MCPredicates) {
+ StringRef Name = TIIPred->getValueAsString("FunctionName");
+ StringMap<const Record *>::const_iterator It = TIIPredicates.find(Name);
+ if (It == TIIPredicates.end()) {
+ TIIPredicates[Name] = TIIPred;
+ continue;
+ }
+
+ PrintError(TIIPred->getLoc(),
+ "TIIPredicate " + Name + " is multiply defined.");
+ PrintNote(It->second->getLoc(),
+ " Previous definition of " + Name + " was here.");
+ PrintFatalError(TIIPred->getLoc(),
+ "Found conflicting definitions of TIIPredicate.");
+ }
+}
+
void CodeGenSchedModels::collectRetireControlUnits() {
RecVec Units = Records.getAllDerivedDefinitions("RetireControlUnit");
@@ -240,6 +479,35 @@ void CodeGenSchedModels::collectRetireControlUnits() {
}
}
+void CodeGenSchedModels::collectLoadStoreQueueInfo() {
+ RecVec Queues = Records.getAllDerivedDefinitions("MemoryQueue");
+
+ for (Record *Queue : Queues) {
+ CodeGenProcModel &PM = getProcModel(Queue->getValueAsDef("SchedModel"));
+ if (Queue->isSubClassOf("LoadQueue")) {
+ if (PM.LoadQueue) {
+ PrintError(Queue->getLoc(),
+ "Expected a single LoadQueue definition");
+ PrintNote(PM.LoadQueue->getLoc(),
+ "Previous definition of LoadQueue was here");
+ }
+
+ PM.LoadQueue = Queue;
+ }
+
+ if (Queue->isSubClassOf("StoreQueue")) {
+ if (PM.StoreQueue) {
+ PrintError(Queue->getLoc(),
+ "Expected a single StoreQueue definition");
+ PrintNote(PM.LoadQueue->getLoc(),
+ "Previous definition of StoreQueue was here");
+ }
+
+ PM.StoreQueue = Queue;
+ }
+ }
+}
+
/// Collect optional processor information.
void CodeGenSchedModels::collectOptionalProcessorInfo() {
// Find register file definitions for each processor.
@@ -248,8 +516,8 @@ void CodeGenSchedModels::collectOptionalProcessorInfo() {
// Collect processor RetireControlUnit descriptors if available.
collectRetireControlUnits();
- // Find pfm counter definitions for each processor.
- collectPfmCounters();
+ // Collect information about load/store queues.
+ collectLoadStoreQueueInfo();
checkCompleteness();
}
@@ -257,7 +525,7 @@ void CodeGenSchedModels::collectOptionalProcessorInfo() {
/// Gather all processor models.
void CodeGenSchedModels::collectProcModels() {
RecVec ProcRecords = Records.getAllDerivedDefinitions("Processor");
- llvm::sort(ProcRecords.begin(), ProcRecords.end(), LessRecordFieldName());
+ llvm::sort(ProcRecords, LessRecordFieldName());
// Reserve space because we can. Reallocation would be ok.
ProcModels.reserve(ProcRecords.size()+1);
@@ -376,7 +644,7 @@ void CodeGenSchedModels::collectSchedRW() {
// Find all ReadWrites referenced by SchedAlias. AliasDefs needs to be sorted
// for the loop below that initializes Alias vectors.
RecVec AliasDefs = Records.getAllDerivedDefinitions("SchedAlias");
- llvm::sort(AliasDefs.begin(), AliasDefs.end(), LessRecord());
+ llvm::sort(AliasDefs, LessRecord());
for (Record *ADef : AliasDefs) {
Record *MatchDef = ADef->getValueAsDef("MatchRW");
Record *AliasDef = ADef->getValueAsDef("AliasRW");
@@ -394,12 +662,12 @@ void CodeGenSchedModels::collectSchedRW() {
}
// Sort and add the SchedReadWrites directly referenced by instructions or
// itinerary resources. Index reads and writes in separate domains.
- llvm::sort(SWDefs.begin(), SWDefs.end(), LessRecord());
+ llvm::sort(SWDefs, LessRecord());
for (Record *SWDef : SWDefs) {
assert(!getSchedRWIdx(SWDef, /*IsRead=*/false) && "duplicate SchedWrite");
SchedWrites.emplace_back(SchedWrites.size(), SWDef);
}
- llvm::sort(SRDefs.begin(), SRDefs.end(), LessRecord());
+ llvm::sort(SRDefs, LessRecord());
for (Record *SRDef : SRDefs) {
assert(!getSchedRWIdx(SRDef, /*IsRead-*/true) && "duplicate SchedWrite");
SchedReads.emplace_back(SchedReads.size(), SRDef);
@@ -619,7 +887,7 @@ void CodeGenSchedModels::collectSchedClasses() {
}
// Create classes for InstRW defs.
RecVec InstRWDefs = Records.getAllDerivedDefinitions("InstRW");
- llvm::sort(InstRWDefs.begin(), InstRWDefs.end(), LessRecord());
+ llvm::sort(InstRWDefs, LessRecord());
LLVM_DEBUG(dbgs() << "\n+++ SCHED CLASSES (createInstRWClass) +++\n");
for (Record *RWDef : InstRWDefs)
createInstRWClass(RWDef);
@@ -923,7 +1191,7 @@ void CodeGenSchedModels::collectProcItins() {
// Gather the read/write types for each itinerary class.
void CodeGenSchedModels::collectProcItinRW() {
RecVec ItinRWDefs = Records.getAllDerivedDefinitions("ItinRW");
- llvm::sort(ItinRWDefs.begin(), ItinRWDefs.end(), LessRecord());
+ llvm::sort(ItinRWDefs, LessRecord());
for (Record *RWDef : ItinRWDefs) {
if (!RWDef->getValueInit("SchedModel")->isComplete())
PrintFatalError(RWDef->getLoc(), "SchedModel is undefined");
@@ -1520,33 +1788,33 @@ void CodeGenSchedModels::collectRegisterFiles() {
CodeGenProcModel &PM = getProcModel(RF->getValueAsDef("SchedModel"));
PM.RegisterFiles.emplace_back(CodeGenRegisterFile(RF->getName(),RF));
CodeGenRegisterFile &CGRF = PM.RegisterFiles.back();
+ CGRF.MaxMovesEliminatedPerCycle =
+ RF->getValueAsInt("MaxMovesEliminatedPerCycle");
+ CGRF.AllowZeroMoveEliminationOnly =
+ RF->getValueAsBit("AllowZeroMoveEliminationOnly");
// Now set the number of physical registers as well as the cost of registers
// in each register class.
CGRF.NumPhysRegs = RF->getValueAsInt("NumPhysRegs");
+ if (!CGRF.NumPhysRegs) {
+ PrintFatalError(RF->getLoc(),
+ "Invalid RegisterFile with zero physical registers");
+ }
+
RecVec RegisterClasses = RF->getValueAsListOfDefs("RegClasses");
std::vector<int64_t> RegisterCosts = RF->getValueAsListOfInts("RegCosts");
+ ListInit *MoveElimInfo = RF->getValueAsListInit("AllowMoveElimination");
for (unsigned I = 0, E = RegisterClasses.size(); I < E; ++I) {
int Cost = RegisterCosts.size() > I ? RegisterCosts[I] : 1;
- CGRF.Costs.emplace_back(RegisterClasses[I], Cost);
- }
- }
-}
-// Collect all the RegisterFile definitions available in this target.
-void CodeGenSchedModels::collectPfmCounters() {
- for (Record *Def : Records.getAllDerivedDefinitions("PfmIssueCounter")) {
- CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel"));
- PM.PfmIssueCounterDefs.emplace_back(Def);
- }
- for (Record *Def : Records.getAllDerivedDefinitions("PfmCycleCounter")) {
- CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel"));
- if (PM.PfmCycleCounterDef) {
- PrintFatalError(Def->getLoc(),
- "multiple cycle counters for " +
- Def->getValueAsDef("SchedModel")->getName());
+ bool AllowMoveElim = false;
+ if (MoveElimInfo->size() > I) {
+ BitInit *Val = cast<BitInit>(MoveElimInfo->getElement(I));
+ AllowMoveElim = Val->getValue();
+ }
+
+ CGRF.Costs.emplace_back(RegisterClasses[I], Cost, AllowMoveElim);
}
- PM.PfmCycleCounterDef = Def;
}
}
@@ -1620,12 +1888,9 @@ void CodeGenSchedModels::collectProcResources() {
}
// Finalize each ProcModel by sorting the record arrays.
for (CodeGenProcModel &PM : ProcModels) {
- llvm::sort(PM.WriteResDefs.begin(), PM.WriteResDefs.end(),
- LessRecord());
- llvm::sort(PM.ReadAdvanceDefs.begin(), PM.ReadAdvanceDefs.end(),
- LessRecord());
- llvm::sort(PM.ProcResourceDefs.begin(), PM.ProcResourceDefs.end(),
- LessRecord());
+ llvm::sort(PM.WriteResDefs, LessRecord());
+ llvm::sort(PM.ReadAdvanceDefs, LessRecord());
+ llvm::sort(PM.ProcResourceDefs, LessRecord());
LLVM_DEBUG(
PM.dump();
dbgs() << "WriteResDefs: "; for (RecIter RI = PM.WriteResDefs.begin(),
diff --git a/contrib/llvm/utils/TableGen/CodeGenSchedule.h b/contrib/llvm/utils/TableGen/CodeGenSchedule.h
index 07c11596adee..87a051b0c05e 100644
--- a/contrib/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/contrib/llvm/utils/TableGen/CodeGenSchedule.h
@@ -15,6 +15,7 @@
#ifndef LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H
#define LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
@@ -166,8 +167,9 @@ struct CodeGenSchedClass {
struct CodeGenRegisterCost {
Record *RCDef;
unsigned Cost;
- CodeGenRegisterCost(Record *RC, unsigned RegisterCost)
- : RCDef(RC), Cost(RegisterCost) {}
+ bool AllowMoveElimination;
+ CodeGenRegisterCost(Record *RC, unsigned RegisterCost, bool AllowMoveElim = false)
+ : RCDef(RC), Cost(RegisterCost), AllowMoveElimination(AllowMoveElim) {}
CodeGenRegisterCost(const CodeGenRegisterCost &) = default;
CodeGenRegisterCost &operator=(const CodeGenRegisterCost &) = delete;
};
@@ -180,12 +182,18 @@ struct CodeGenRegisterCost {
struct CodeGenRegisterFile {
std::string Name;
Record *RegisterFileDef;
+ unsigned MaxMovesEliminatedPerCycle;
+ bool AllowZeroMoveEliminationOnly;
unsigned NumPhysRegs;
std::vector<CodeGenRegisterCost> Costs;
- CodeGenRegisterFile(StringRef name, Record *def)
- : Name(name), RegisterFileDef(def), NumPhysRegs(0) {}
+ CodeGenRegisterFile(StringRef name, Record *def, unsigned MaxMoveElimPerCy = 0,
+ bool AllowZeroMoveElimOnly = false)
+ : Name(name), RegisterFileDef(def),
+ MaxMovesEliminatedPerCycle(MaxMoveElimPerCy),
+ AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly),
+ NumPhysRegs(0) {}
bool hasDefaultCosts() const { return Costs.empty(); }
};
@@ -238,14 +246,14 @@ struct CodeGenProcModel {
// Optional Retire Control Unit definition.
Record *RetireControlUnit;
- // List of PfmCounters.
- RecVec PfmIssueCounterDefs;
- Record *PfmCycleCounterDef = nullptr;
+ // Load/Store queue descriptors.
+ Record *LoadQueue;
+ Record *StoreQueue;
CodeGenProcModel(unsigned Idx, std::string Name, Record *MDef,
Record *IDef) :
Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef),
- RetireControlUnit(nullptr) {}
+ RetireControlUnit(nullptr), LoadQueue(nullptr), StoreQueue(nullptr) {}
bool hasItineraries() const {
return !ItinsDef->getValueAsListOfDefs("IID").empty();
@@ -256,9 +264,8 @@ struct CodeGenProcModel {
}
bool hasExtraProcessorInfo() const {
- return RetireControlUnit || !RegisterFiles.empty() ||
- !PfmIssueCounterDefs.empty() ||
- PfmCycleCounterDef != nullptr;
+ return RetireControlUnit || LoadQueue || StoreQueue ||
+ !RegisterFiles.empty();
}
unsigned getProcResourceIdx(Record *PRDef) const;
@@ -270,6 +277,137 @@ struct CodeGenProcModel {
#endif
};
+/// Used to correlate instructions to MCInstPredicates specified by
+/// InstructionEquivalentClass tablegen definitions.
+///
+/// Example: a XOR of a register with self, is a known zero-idiom for most
+/// X86 processors.
+///
+/// Each processor can use a (potentially different) InstructionEquivalenceClass
+/// definition to classify zero-idioms. That means, XORrr is likely to appear
+/// in more than one equivalence class (where each class definition is
+/// contributed by a different processor).
+///
+/// There is no guarantee that the same MCInstPredicate will be used to describe
+/// equivalence classes that identify XORrr as a zero-idiom.
+///
+/// To be more specific, the requirements for being a zero-idiom XORrr may be
+/// different for different processors.
+///
+/// Class PredicateInfo identifies a subset of processors that specify the same
+/// requirements (i.e. same MCInstPredicate and OperandMask) for an instruction
+/// opcode.
+///
+/// Back to the example. Field `ProcModelMask` will have one bit set for every
+/// processor model that sees XORrr as a zero-idiom, and that specifies the same
+/// set of constraints.
+///
+/// By construction, there can be multiple instances of PredicateInfo associated
+/// with a same instruction opcode. For example, different processors may define
+/// different constraints on the same opcode.
+///
+/// Field OperandMask can be used as an extra constraint.
+/// It may be used to describe conditions that appy only to a subset of the
+/// operands of a machine instruction, and the operands subset may not be the
+/// same for all processor models.
+struct PredicateInfo {
+ llvm::APInt ProcModelMask; // A set of processor model indices.
+ llvm::APInt OperandMask; // An operand mask.
+ const Record *Predicate; // MCInstrPredicate definition.
+ PredicateInfo(llvm::APInt CpuMask, llvm::APInt Operands, const Record *Pred)
+ : ProcModelMask(CpuMask), OperandMask(Operands), Predicate(Pred) {}
+
+ bool operator==(const PredicateInfo &Other) const {
+ return ProcModelMask == Other.ProcModelMask &&
+ OperandMask == Other.OperandMask && Predicate == Other.Predicate;
+ }
+};
+
+/// A collection of PredicateInfo objects.
+///
+/// There is at least one OpcodeInfo object for every opcode specified by a
+/// TIPredicate definition.
+class OpcodeInfo {
+ std::vector<PredicateInfo> Predicates;
+
+ OpcodeInfo(const OpcodeInfo &Other) = delete;
+ OpcodeInfo &operator=(const OpcodeInfo &Other) = delete;
+
+public:
+ OpcodeInfo() = default;
+ OpcodeInfo &operator=(OpcodeInfo &&Other) = default;
+ OpcodeInfo(OpcodeInfo &&Other) = default;
+
+ ArrayRef<PredicateInfo> getPredicates() const { return Predicates; }
+
+ void addPredicateForProcModel(const llvm::APInt &CpuMask,
+ const llvm::APInt &OperandMask,
+ const Record *Predicate);
+};
+
+/// Used to group together tablegen instruction definitions that are subject
+/// to a same set of constraints (identified by an instance of OpcodeInfo).
+class OpcodeGroup {
+ OpcodeInfo Info;
+ std::vector<const Record *> Opcodes;
+
+ OpcodeGroup(const OpcodeGroup &Other) = delete;
+ OpcodeGroup &operator=(const OpcodeGroup &Other) = delete;
+
+public:
+ OpcodeGroup(OpcodeInfo &&OpInfo) : Info(std::move(OpInfo)) {}
+ OpcodeGroup(OpcodeGroup &&Other) = default;
+
+ void addOpcode(const Record *Opcode) {
+ assert(std::find(Opcodes.begin(), Opcodes.end(), Opcode) == Opcodes.end() &&
+ "Opcode already in set!");
+ Opcodes.push_back(Opcode);
+ }
+
+ ArrayRef<const Record *> getOpcodes() const { return Opcodes; }
+ const OpcodeInfo &getOpcodeInfo() const { return Info; }
+};
+
+/// An STIPredicateFunction descriptor used by tablegen backends to
+/// auto-generate the body of a predicate function as a member of tablegen'd
+/// class XXXGenSubtargetInfo.
+class STIPredicateFunction {
+ const Record *FunctionDeclaration;
+
+ std::vector<const Record *> Definitions;
+ std::vector<OpcodeGroup> Groups;
+
+ STIPredicateFunction(const STIPredicateFunction &Other) = delete;
+ STIPredicateFunction &operator=(const STIPredicateFunction &Other) = delete;
+
+public:
+ STIPredicateFunction(const Record *Rec) : FunctionDeclaration(Rec) {}
+ STIPredicateFunction(STIPredicateFunction &&Other) = default;
+
+ bool isCompatibleWith(const STIPredicateFunction &Other) const {
+ return FunctionDeclaration == Other.FunctionDeclaration;
+ }
+
+ void addDefinition(const Record *Def) { Definitions.push_back(Def); }
+ void addOpcode(const Record *OpcodeRec, OpcodeInfo &&Info) {
+ if (Groups.empty() ||
+ Groups.back().getOpcodeInfo().getPredicates() != Info.getPredicates())
+ Groups.emplace_back(std::move(Info));
+ Groups.back().addOpcode(OpcodeRec);
+ }
+
+ StringRef getName() const {
+ return FunctionDeclaration->getValueAsString("Name");
+ }
+ const Record *getDefaultReturnPredicate() const {
+ return FunctionDeclaration->getValueAsDef("DefaultReturnValue");
+ }
+
+ const Record *getDeclaration() const { return FunctionDeclaration; }
+ ArrayRef<const Record *> getDefinitions() const { return Definitions; }
+ ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
+};
+
/// Top level container for machine model data.
class CodeGenSchedModels {
RecordKeeper &Records;
@@ -303,6 +441,8 @@ class CodeGenSchedModels {
using InstClassMapTy = DenseMap<Record*, unsigned>;
InstClassMapTy InstrClassMap;
+ std::vector<STIPredicateFunction> STIPredicates;
+
public:
CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);
@@ -430,6 +570,9 @@ public:
Record *findProcResUnits(Record *ProcResKind, const CodeGenProcModel &PM,
ArrayRef<SMLoc> Loc) const;
+ ArrayRef<STIPredicateFunction> getSTIPredicates() const {
+ return STIPredicates;
+ }
private:
void collectProcModels();
@@ -447,8 +590,6 @@ private:
void collectRegisterFiles();
- void collectPfmCounters();
-
void collectOptionalProcessorInfo();
std::string createSchedClassName(Record *ItinClassDef,
@@ -465,6 +606,14 @@ private:
void inferSchedClasses();
+ void checkMCInstPredicates() const;
+
+ void checkSTIPredicates() const;
+
+ void collectSTIPredicates();
+
+ void collectLoadStoreQueueInfo();
+
void checkCompleteness();
void inferFromRW(ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads,
diff --git a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
index cb73ca83c9bb..bcb653135551 100644
--- a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -278,7 +278,7 @@ CodeGenRegBank &CodeGenTarget::getRegBank() const {
void CodeGenTarget::ReadRegAltNameIndices() const {
RegAltNameIndices = Records.getAllDerivedDefinitions("RegAltNameIndex");
- llvm::sort(RegAltNameIndices.begin(), RegAltNameIndices.end(), LessRecord());
+ llvm::sort(RegAltNameIndices, LessRecord());
}
/// getRegisterByName - If there is a register with the specific AsmName,
@@ -303,7 +303,7 @@ std::vector<ValueTypeByHwMode> CodeGenTarget::getRegisterVTs(Record *R)
}
// Remove duplicates.
- llvm::sort(Result.begin(), Result.end());
+ llvm::sort(Result);
Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
return Result;
}
@@ -314,7 +314,7 @@ void CodeGenTarget::ReadLegalValueTypes() const {
LegalValueTypes.insert(LegalValueTypes.end(), RC.VTs.begin(), RC.VTs.end());
// Remove duplicates.
- llvm::sort(LegalValueTypes.begin(), LegalValueTypes.end());
+ llvm::sort(LegalValueTypes);
LegalValueTypes.erase(std::unique(LegalValueTypes.begin(),
LegalValueTypes.end()),
LegalValueTypes.end());
@@ -513,7 +513,7 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC,
if (isTarget == TargetOnly)
Intrinsics.push_back(CodeGenIntrinsic(Defs[I]));
}
- llvm::sort(Intrinsics.begin(), Intrinsics.end(),
+ llvm::sort(Intrinsics,
[](const CodeGenIntrinsic &LHS, const CodeGenIntrinsic &RHS) {
return std::tie(LHS.TargetPrefix, LHS.Name) <
std::tie(RHS.TargetPrefix, RHS.Name);
@@ -536,6 +536,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
isCommutative = false;
canThrow = false;
isNoReturn = false;
+ isCold = false;
isNoDuplicate = false;
isConvergent = false;
isSpeculatable = false;
@@ -682,6 +683,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
isConvergent = true;
else if (Property->getName() == "IntrNoReturn")
isNoReturn = true;
+ else if (Property->getName() == "IntrCold")
+ isCold = true;
else if (Property->getName() == "IntrSpeculatable")
isSpeculatable = true;
else if (Property->getName() == "IntrHasSideEffects")
@@ -709,6 +712,5 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
Properties = parseSDPatternOperatorProperties(R);
// Sort the argument attributes for later benefit.
- llvm::sort(ArgumentAttributes.begin(), ArgumentAttributes.end());
+ llvm::sort(ArgumentAttributes);
}
-
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcher.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcher.cpp
index 4a918d15691b..c8e005739460 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcher.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcher.cpp
@@ -93,13 +93,23 @@ SwitchTypeMatcher::~SwitchTypeMatcher() {
delete C.second;
}
-CheckPredicateMatcher::CheckPredicateMatcher(const TreePredicateFn &pred)
- : Matcher(CheckPredicate), Pred(pred.getOrigPatFragRecord()) {}
+CheckPredicateMatcher::CheckPredicateMatcher(
+ const TreePredicateFn &pred, const SmallVectorImpl<unsigned> &Ops)
+ : Matcher(CheckPredicate), Pred(pred.getOrigPatFragRecord()),
+ Operands(Ops.begin(), Ops.end()) {}
TreePredicateFn CheckPredicateMatcher::getPredicate() const {
return TreePredicateFn(Pred);
}
+unsigned CheckPredicateMatcher::getNumOperands() const {
+ return Operands.size();
+}
+
+unsigned CheckPredicateMatcher::getOperandNo(unsigned i) const {
+ assert(i < Operands.size());
+ return Operands[i];
+}
// printImpl methods.
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcher.h b/contrib/llvm/utils/TableGen/DAGISelMatcher.h
index ecc1f1dd094a..9be7295c67d4 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcher.h
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcher.h
@@ -414,10 +414,14 @@ private:
/// see if the node is acceptable.
class CheckPredicateMatcher : public Matcher {
TreePattern *Pred;
+ const SmallVector<unsigned, 4> Operands;
public:
- CheckPredicateMatcher(const TreePredicateFn &pred);
+ CheckPredicateMatcher(const TreePredicateFn &pred,
+ const SmallVectorImpl<unsigned> &Operands);
TreePredicateFn getPredicate() const;
+ unsigned getNumOperands() const;
+ unsigned getOperandNo(unsigned i) const;
static bool classof(const Matcher *N) {
return N->getKind() == CheckPredicate;
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index e64943c1d025..90ca1bff5344 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -50,6 +50,7 @@ class MatcherTableEmitter {
DenseMap<TreePattern *, unsigned> NodePredicateMap;
std::vector<TreePredicateFn> NodePredicates;
+ std::vector<TreePredicateFn> NodePredicatesWithOperands;
// We de-duplicate the predicates by code string, and use this map to track
// all the patterns with "identical" predicates.
@@ -92,6 +93,9 @@ public:
void EmitPatternMatchTable(raw_ostream &OS);
private:
+ void EmitNodePredicatesFunction(const std::vector<TreePredicateFn> &Preds,
+ StringRef Decl, raw_ostream &OS);
+
unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
raw_ostream &OS);
@@ -103,12 +107,20 @@ private:
NodePredicatesByCodeToRun[Pred.getCodeToRunOnSDNode()];
if (SameCodePreds.empty()) {
// We've never seen a predicate with the same code: allocate an entry.
- NodePredicates.push_back(Pred);
- Entry = NodePredicates.size();
+ if (Pred.usesOperands()) {
+ NodePredicatesWithOperands.push_back(Pred);
+ Entry = NodePredicatesWithOperands.size();
+ } else {
+ NodePredicates.push_back(Pred);
+ Entry = NodePredicates.size();
+ }
} else {
// We did see an identical predicate: re-use it.
Entry = NodePredicateMap[SameCodePreds.front()];
assert(Entry != 0);
+ assert(TreePredicateFn(SameCodePreds.front()).usesOperands() ==
+ Pred.usesOperands() &&
+ "PatFrags with some code must have same usesOperands setting");
}
// In both cases, we've never seen this particular predicate before, so
// mark it in the list of predicates sharing the same code.
@@ -396,11 +408,23 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
}
case Matcher::CheckPredicate: {
TreePredicateFn Pred = cast<CheckPredicateMatcher>(N)->getPredicate();
- OS << "OPC_CheckPredicate, " << getNodePredicate(Pred) << ',';
+ unsigned OperandBytes = 0;
+
+ if (Pred.usesOperands()) {
+ unsigned NumOps = cast<CheckPredicateMatcher>(N)->getNumOperands();
+ OS << "OPC_CheckPredicateWithOperands, " << NumOps << "/*#Ops*/, ";
+ for (unsigned i = 0; i < NumOps; ++i)
+ OS << cast<CheckPredicateMatcher>(N)->getOperandNo(i) << ", ";
+ OperandBytes = 1 + NumOps;
+ } else {
+ OS << "OPC_CheckPredicate, ";
+ }
+
+ OS << getNodePredicate(Pred) << ',';
if (!OmitComments)
OS << " // " << Pred.getFnName();
OS << '\n';
- return 2;
+ return 2 + OperandBytes;
}
case Matcher::CheckOpcode:
@@ -783,6 +807,33 @@ EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
return Size;
}
+void MatcherTableEmitter::EmitNodePredicatesFunction(
+ const std::vector<TreePredicateFn> &Preds, StringRef Decl,
+ raw_ostream &OS) {
+ if (Preds.empty())
+ return;
+
+ BeginEmitFunction(OS, "bool", Decl, true/*AddOverride*/);
+ OS << "{\n";
+ OS << " switch (PredNo) {\n";
+ OS << " default: llvm_unreachable(\"Invalid predicate in table?\");\n";
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // Emit the predicate code corresponding to this pattern.
+ TreePredicateFn PredFn = Preds[i];
+
+ assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
+ OS << " case " << i << ": { \n";
+ for (auto *SimilarPred :
+ NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
+ OS << " // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
+
+ OS << PredFn.getCodeToRunOnSDNode() << "\n }\n";
+ }
+ OS << " }\n";
+ OS << "}\n";
+ EndEmitFunction(OS);
+}
+
void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
// Emit pattern predicates.
if (!PatternPredicates.empty()) {
@@ -799,29 +850,14 @@ void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
}
// Emit Node predicates.
- if (!NodePredicates.empty()) {
- BeginEmitFunction(OS, "bool",
- "CheckNodePredicate(SDNode *Node, unsigned PredNo) const",
- true/*AddOverride*/);
- OS << "{\n";
- OS << " switch (PredNo) {\n";
- OS << " default: llvm_unreachable(\"Invalid predicate in table?\");\n";
- for (unsigned i = 0, e = NodePredicates.size(); i != e; ++i) {
- // Emit the predicate code corresponding to this pattern.
- TreePredicateFn PredFn = NodePredicates[i];
-
- assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
- OS << " case " << i << ": { \n";
- for (auto *SimilarPred :
- NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
- OS << " // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
-
- OS << PredFn.getCodeToRunOnSDNode() << "\n }\n";
- }
- OS << " }\n";
- OS << "}\n";
- EndEmitFunction(OS);
- }
+ EmitNodePredicatesFunction(
+ NodePredicates, "CheckNodePredicate(SDNode *Node, unsigned PredNo) const",
+ OS);
+ EmitNodePredicatesFunction(
+ NodePredicatesWithOperands,
+ "CheckNodePredicateWithOperands(SDNode *Node, unsigned PredNo, "
+ "const SmallVectorImpl<SDValue> &Operands) const",
+ OS);
// Emit CompletePattern matchers.
// FIXME: This should be const.
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index ce23651b9682..612342ddcddf 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -33,15 +33,15 @@ static MVT::SimpleValueType getRegisterValueType(Record *R,
if (!FoundRC) {
FoundRC = true;
- ValueTypeByHwMode VVT = RC.getValueTypeNum(0);
+ const ValueTypeByHwMode &VVT = RC.getValueTypeNum(0);
if (VVT.isSimple())
VT = VVT.getSimple().SimpleTy;
continue;
}
- // If this occurs in multiple register classes, they all have to agree.
#ifndef NDEBUG
- ValueTypeByHwMode T = RC.getValueTypeNum(0);
+ // If this occurs in multiple register classes, they all have to agree.
+ const ValueTypeByHwMode &T = RC.getValueTypeNum(0);
assert((!T.isSimple() || T.getSimple().SimpleTy == VT) &&
"ValueType mismatch between register classes for this register");
#endif
@@ -120,7 +120,7 @@ namespace {
/// If this is the first time a node with unique identifier Name has been
/// seen, record it. Otherwise, emit a check to make sure this is the same
/// node. Returns true if this is the first encounter.
- bool recordUniqueNode(const std::string &Name);
+ bool recordUniqueNode(ArrayRef<std::string> Names);
// Result Code Generation.
unsigned getNamedArgumentSlot(StringRef Name) {
@@ -319,8 +319,8 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
// to handle this.
if ((N->getOperator()->getName() == "and" ||
N->getOperator()->getName() == "or") &&
- N->getChild(1)->isLeaf() && N->getChild(1)->getPredicateFns().empty() &&
- N->getPredicateFns().empty()) {
+ N->getChild(1)->isLeaf() && N->getChild(1)->getPredicateCalls().empty() &&
+ N->getPredicateCalls().empty()) {
if (IntInit *II = dyn_cast<IntInit>(N->getChild(1)->getLeafValue())) {
if (!isPowerOf2_32(II->getValue())) { // Don't bother with single bits.
// If this is at the root of the pattern, we emit a redundant
@@ -441,21 +441,39 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
}
}
-bool MatcherGen::recordUniqueNode(const std::string &Name) {
- unsigned &VarMapEntry = VariableMap[Name];
- if (VarMapEntry == 0) {
+bool MatcherGen::recordUniqueNode(ArrayRef<std::string> Names) {
+ unsigned Entry = 0;
+ for (const std::string &Name : Names) {
+ unsigned &VarMapEntry = VariableMap[Name];
+ if (!Entry)
+ Entry = VarMapEntry;
+ assert(Entry == VarMapEntry);
+ }
+
+ bool NewRecord = false;
+ if (Entry == 0) {
// If it is a named node, we must emit a 'Record' opcode.
- AddMatcher(new RecordMatcher("$" + Name, NextRecordedOperandNo));
- VarMapEntry = ++NextRecordedOperandNo;
- return true;
+ std::string WhatFor;
+ for (const std::string &Name : Names) {
+ if (!WhatFor.empty())
+ WhatFor += ',';
+ WhatFor += "$" + Name;
+ }
+ AddMatcher(new RecordMatcher(WhatFor, NextRecordedOperandNo));
+ Entry = ++NextRecordedOperandNo;
+ NewRecord = true;
+ } else {
+ // If we get here, this is a second reference to a specific name. Since
+ // we already have checked that the first reference is valid, we don't
+ // have to recursively match it, just check that it's the same as the
+ // previously named thing.
+ AddMatcher(new CheckSameMatcher(Entry-1));
}
- // If we get here, this is a second reference to a specific name. Since
- // we already have checked that the first reference is valid, we don't
- // have to recursively match it, just check that it's the same as the
- // previously named thing.
- AddMatcher(new CheckSameMatcher(VarMapEntry-1));
- return false;
+ for (const std::string &Name : Names)
+ VariableMap[Name] = Entry;
+
+ return NewRecord;
}
void MatcherGen::EmitMatchCode(const TreePatternNode *N,
@@ -475,9 +493,18 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
// If this node has a name associated with it, capture it in VariableMap. If
// we already saw this in the pattern, emit code to verify dagness.
+ SmallVector<std::string, 4> Names;
if (!N->getName().empty())
- if (!recordUniqueNode(N->getName()))
+ Names.push_back(N->getName());
+
+ for (const ScopedName &Name : N->getNamesAsPredicateArg()) {
+ Names.push_back(("pred:" + Twine(Name.getScope()) + ":" + Name.getIdentifier()).str());
+ }
+
+ if (!Names.empty()) {
+ if (!recordUniqueNode(Names))
return;
+ }
if (N->isLeaf())
EmitLeafMatchCode(N);
@@ -485,8 +512,19 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
EmitOperatorMatchCode(N, NodeNoTypes, ForceMode);
// If there are node predicates for this node, generate their checks.
- for (unsigned i = 0, e = N->getPredicateFns().size(); i != e; ++i)
- AddMatcher(new CheckPredicateMatcher(N->getPredicateFns()[i]));
+ for (unsigned i = 0, e = N->getPredicateCalls().size(); i != e; ++i) {
+ const TreePredicateCall &Pred = N->getPredicateCalls()[i];
+ SmallVector<unsigned, 4> Operands;
+ if (Pred.Fn.usesOperands()) {
+ TreePattern *TP = Pred.Fn.getOrigPatFragRecord();
+ for (unsigned i = 0; i < TP->getNumArgs(); ++i) {
+ std::string Name =
+ ("pred:" + Twine(Pred.Scope) + ":" + TP->getArgName(i)).str();
+ Operands.push_back(getNamedArgumentSlot(Name));
+ }
+ }
+ AddMatcher(new CheckPredicateMatcher(Pred.Fn, Operands));
+ }
for (unsigned i = 0, e = ResultsToTypeCheck.size(); i != e; ++i)
AddMatcher(new CheckTypeMatcher(N->getSimpleType(ResultsToTypeCheck[i]),
@@ -962,9 +1000,16 @@ void MatcherGen::EmitResultCode() {
}
assert(Ops.size() >= NumSrcResults && "Didn't provide enough results");
- Ops.resize(NumSrcResults);
+ SmallVector<unsigned, 8> Results(Ops);
+
+ // Apply result permutation.
+ for (unsigned ResNo = 0; ResNo < Pattern.getDstPattern()->getNumResults();
+ ++ResNo) {
+ Results[ResNo] = Ops[Pattern.getDstPattern()->getResultIndex(ResNo)];
+ }
- AddMatcher(new CompleteMatchMatcher(Ops, Pattern));
+ Results.resize(NumSrcResults);
+ AddMatcher(new CompleteMatchMatcher(Results, Pattern));
}
diff --git a/contrib/llvm/utils/TableGen/ExegesisEmitter.cpp b/contrib/llvm/utils/TableGen/ExegesisEmitter.cpp
new file mode 100644
index 000000000000..208237aca20c
--- /dev/null
+++ b/contrib/llvm/utils/TableGen/ExegesisEmitter.cpp
@@ -0,0 +1,216 @@
+//===- ExegesisEmitter.cpp - Generate exegesis target data ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits llvm-exegesis information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "exegesis-emitter"
+
+namespace {
+
+class ExegesisEmitter {
+public:
+ ExegesisEmitter(RecordKeeper &RK);
+
+ void run(raw_ostream &OS) const;
+
+private:
+ unsigned getPfmCounterId(llvm::StringRef Name) const {
+ const auto It = PfmCounterNameTable.find(Name);
+ if (It == PfmCounterNameTable.end())
+ PrintFatalError("no pfm counter id for " + Name);
+ return It->second;
+ }
+
+ // Collects all the ProcPfmCounters definitions available in this target.
+ void emitPfmCounters(raw_ostream &OS) const;
+
+ void emitPfmCountersInfo(const Record &Def,
+ unsigned &IssueCountersTableOffset,
+ raw_ostream &OS) const;
+
+ void emitPfmCountersLookupTable(raw_ostream &OS) const;
+
+ RecordKeeper &Records;
+ std::string Target;
+
+ // Table of counter name -> counter index.
+ const std::map<llvm::StringRef, unsigned> PfmCounterNameTable;
+};
+
+static std::map<llvm::StringRef, unsigned>
+collectPfmCounters(const RecordKeeper &Records) {
+ std::map<llvm::StringRef, unsigned> PfmCounterNameTable;
+ const auto AddPfmCounterName = [&PfmCounterNameTable](
+ const Record *PfmCounterDef) {
+ const llvm::StringRef Counter = PfmCounterDef->getValueAsString("Counter");
+ if (!Counter.empty())
+ PfmCounterNameTable.emplace(Counter, 0);
+ };
+ for (Record *Def : Records.getAllDerivedDefinitions("ProcPfmCounters")) {
+ // Check that ResourceNames are unique.
+ llvm::SmallSet<llvm::StringRef, 16> Seen;
+ for (const Record *IssueCounter :
+ Def->getValueAsListOfDefs("IssueCounters")) {
+ const llvm::StringRef ResourceName =
+ IssueCounter->getValueAsString("ResourceName");
+ if (ResourceName.empty())
+ PrintFatalError(IssueCounter->getLoc(), "invalid empty ResourceName");
+ if (!Seen.insert(ResourceName).second)
+ PrintFatalError(IssueCounter->getLoc(),
+ "duplicate ResourceName " + ResourceName);
+ AddPfmCounterName(IssueCounter);
+ }
+ AddPfmCounterName(Def->getValueAsDef("CycleCounter"));
+ AddPfmCounterName(Def->getValueAsDef("UopsCounter"));
+ }
+ unsigned Index = 0;
+ for (auto &NameAndIndex : PfmCounterNameTable)
+ NameAndIndex.second = Index++;
+ return PfmCounterNameTable;
+}
+
+ExegesisEmitter::ExegesisEmitter(RecordKeeper &RK)
+ : Records(RK), PfmCounterNameTable(collectPfmCounters(RK)) {
+ std::vector<Record *> Targets = Records.getAllDerivedDefinitions("Target");
+ if (Targets.size() == 0)
+ PrintFatalError("ERROR: No 'Target' subclasses defined!");
+ if (Targets.size() != 1)
+ PrintFatalError("ERROR: Multiple subclasses of Target defined!");
+ Target = Targets[0]->getName();
+}
+
+void ExegesisEmitter::emitPfmCountersInfo(const Record &Def,
+ unsigned &IssueCountersTableOffset,
+ raw_ostream &OS) const {
+ const auto CycleCounter =
+ Def.getValueAsDef("CycleCounter")->getValueAsString("Counter");
+ const auto UopsCounter =
+ Def.getValueAsDef("UopsCounter")->getValueAsString("Counter");
+ const size_t NumIssueCounters =
+ Def.getValueAsListOfDefs("IssueCounters").size();
+
+ OS << "\nstatic const PfmCountersInfo " << Target << Def.getName()
+ << " = {\n";
+
+ // Cycle Counter.
+ if (CycleCounter.empty())
+ OS << " nullptr, // No cycle counter.\n";
+ else
+ OS << " " << Target << "PfmCounterNames[" << getPfmCounterId(CycleCounter)
+ << "], // Cycle counter\n";
+
+ // Uops Counter.
+ if (UopsCounter.empty())
+ OS << " nullptr, // No uops counter.\n";
+ else
+ OS << " " << Target << "PfmCounterNames[" << getPfmCounterId(UopsCounter)
+ << "], // Uops counter\n";
+
+ // Issue Counters
+ if (NumIssueCounters == 0)
+ OS << " nullptr, // No issue counters.\n 0\n";
+ else
+ OS << " " << Target << "PfmIssueCounters + " << IssueCountersTableOffset
+ << ", " << NumIssueCounters << " // Issue counters.\n";
+
+ OS << "};\n";
+ IssueCountersTableOffset += NumIssueCounters;
+}
+
+void ExegesisEmitter::emitPfmCounters(raw_ostream &OS) const {
+ // Emit the counter name table.
+ OS << "\nstatic const char* " << Target << "PfmCounterNames[] = {\n";
+ for (const auto &NameAndIndex : PfmCounterNameTable)
+ OS << " \"" << NameAndIndex.first << "\", // " << NameAndIndex.second
+ << "\n";
+ OS << "};\n\n";
+
+ // Emit the IssueCounters table.
+ const auto PfmCounterDefs =
+ Records.getAllDerivedDefinitions("ProcPfmCounters");
+ // Only emit if non-empty.
+ const bool HasAtLeastOnePfmIssueCounter =
+ llvm::any_of(PfmCounterDefs, [](const Record *Def) {
+ return !Def->getValueAsListOfDefs("IssueCounters").empty();
+ });
+ if (HasAtLeastOnePfmIssueCounter) {
+ OS << "static const PfmCountersInfo::IssueCounter " << Target
+ << "PfmIssueCounters[] = {\n";
+ for (const Record *Def : PfmCounterDefs) {
+ for (const Record *ICDef : Def->getValueAsListOfDefs("IssueCounters"))
+ OS << " { " << Target << "PfmCounterNames["
+ << getPfmCounterId(ICDef->getValueAsString("Counter")) << "], \""
+ << ICDef->getValueAsString("ResourceName") << "\"},\n";
+ }
+ OS << "};\n";
+ }
+
+ // Now generate the PfmCountersInfo.
+ unsigned IssueCountersTableOffset = 0;
+ for (const Record *Def : PfmCounterDefs)
+ emitPfmCountersInfo(*Def, IssueCountersTableOffset, OS);
+
+ OS << "\n";
+} // namespace
+
+void ExegesisEmitter::emitPfmCountersLookupTable(raw_ostream &OS) const {
+ std::vector<Record *> Bindings =
+ Records.getAllDerivedDefinitions("PfmCountersBinding");
+ assert(!Bindings.empty() && "there must be at least one binding");
+ llvm::sort(Bindings, [](const Record *L, const Record *R) {
+ return L->getValueAsString("CpuName") < R->getValueAsString("CpuName");
+ });
+
+ OS << "// Sorted (by CpuName) array of pfm counters.\n"
+ << "static const CpuAndPfmCounters " << Target << "CpuPfmCounters[] = {\n";
+ for (Record *Binding : Bindings) {
+ // Emit as { "cpu", procinit },
+ OS << " { \"" //
+ << Binding->getValueAsString("CpuName") << "\"," //
+ << " &" << Target << Binding->getValueAsDef("Counters")->getName() //
+ << " },\n";
+ }
+ OS << "};\n\n";
+}
+
+void ExegesisEmitter::run(raw_ostream &OS) const {
+ emitSourceFileHeader("Exegesis Tables", OS);
+ emitPfmCounters(OS);
+ emitPfmCountersLookupTable(OS);
+}
+
+} // end anonymous namespace
+
+namespace llvm {
+
+void EmitExegesis(RecordKeeper &RK, raw_ostream &OS) {
+ ExegesisEmitter(RK).run(OS);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/utils/TableGen/FastISelEmitter.cpp b/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
index c0902e4c6f1a..5134b684c6f9 100644
--- a/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -39,11 +39,12 @@ struct InstructionMemo {
std::vector<std::string> PhysRegs;
std::string PredicateCheck;
- InstructionMemo(std::string Name, const CodeGenRegisterClass *RC,
+ InstructionMemo(StringRef Name, const CodeGenRegisterClass *RC,
std::string SubRegNo, std::vector<std::string> PhysRegs,
std::string PredicateCheck)
- : Name(Name), RC(RC), SubRegNo(SubRegNo), PhysRegs(PhysRegs),
- PredicateCheck(PredicateCheck) {}
+ : Name(Name), RC(RC), SubRegNo(std::move(SubRegNo)),
+ PhysRegs(std::move(PhysRegs)),
+ PredicateCheck(std::move(PredicateCheck)) {}
// Make sure we do not copy InstructionMemo.
InstructionMemo(const InstructionMemo &Other) = delete;
@@ -209,13 +210,13 @@ struct OperandsSignature {
// Handle imm operands specially.
if (!Op->isLeaf() && Op->getOperator()->getName() == "imm") {
unsigned PredNo = 0;
- if (!Op->getPredicateFns().empty()) {
- TreePredicateFn PredFn = Op->getPredicateFns()[0];
+ if (!Op->getPredicateCalls().empty()) {
+ TreePredicateFn PredFn = Op->getPredicateCalls()[0].Fn;
// If there is more than one predicate weighing in on this operand
// then we don't handle it. This doesn't typically happen for
// immediates anyway.
- if (Op->getPredicateFns().size() > 1 ||
- !PredFn.isImmediatePattern())
+ if (Op->getPredicateCalls().size() > 1 ||
+ !PredFn.isImmediatePattern() || PredFn.usesOperands())
return false;
// Ignore any instruction with 'FastIselShouldIgnore', these are
// not needed and just bloat the fast instruction selector. For
@@ -235,7 +236,7 @@ struct OperandsSignature {
// For now, filter out any operand with a predicate.
// For now, filter out any operand with multiple values.
- if (!Op->getPredicateFns().empty() || Op->getNumTypes() != 1)
+ if (!Op->getPredicateCalls().empty() || Op->getNumTypes() != 1)
return false;
if (!Op->isLeaf()) {
@@ -528,7 +529,7 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
}
// For now, filter out any instructions with predicates.
- if (!InstPatNode->getPredicateFns().empty())
+ if (!InstPatNode->getPredicateCalls().empty())
continue;
// Check all the operands.
@@ -828,7 +829,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
= SignaturesWithConstantForms.find(Operands);
if (MI != SignaturesWithConstantForms.end()) {
// Unique any duplicates out of the list.
- llvm::sort(MI->second.begin(), MI->second.end());
+ llvm::sort(MI->second);
MI->second.erase(std::unique(MI->second.begin(), MI->second.end()),
MI->second.end());
diff --git a/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
index 76ba1c001092..5e621fc0efdd 100644
--- a/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -87,8 +87,23 @@ struct DecoderTableInfo {
DecoderSet Decoders;
};
+struct EncodingAndInst {
+ const Record *EncodingDef;
+ const CodeGenInstruction *Inst;
+
+ EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst)
+ : EncodingDef(EncodingDef), Inst(Inst) {}
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
+ if (Value.EncodingDef != Value.Inst->TheDef)
+ OS << Value.EncodingDef->getName() << ":";
+ OS << Value.Inst->TheDef->getName();
+ return OS;
+}
+
class FixedLenDecoderEmitter {
- ArrayRef<const CodeGenInstruction *> NumberedInstructions;
+ std::vector<EncodingAndInst> NumberedEncodings;
public:
// Defaults preserved here for documentation, even though they aren't
@@ -323,7 +338,7 @@ protected:
friend class Filter;
// Vector of codegen instructions to choose our filter.
- ArrayRef<const CodeGenInstruction *> AllInstructions;
+ ArrayRef<EncodingAndInst> AllInstructions;
// Vector of uid's for this filter chooser to work on.
const std::vector<unsigned> &Opcodes;
@@ -351,25 +366,24 @@ protected:
const FixedLenDecoderEmitter *Emitter;
public:
- FilterChooser(ArrayRef<const CodeGenInstruction *> Insts,
+ FilterChooser(ArrayRef<EncodingAndInst> Insts,
const std::vector<unsigned> &IDs,
const std::map<unsigned, std::vector<OperandInfo>> &Ops,
- unsigned BW,
- const FixedLenDecoderEmitter *E)
- : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
- FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
- BitWidth(BW), Emitter(E) {
+ unsigned BW, const FixedLenDecoderEmitter *E)
+ : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
+ FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
+ BitWidth(BW), Emitter(E) {
doFilter();
}
- FilterChooser(ArrayRef<const CodeGenInstruction *> Insts,
+ FilterChooser(ArrayRef<EncodingAndInst> Insts,
const std::vector<unsigned> &IDs,
const std::map<unsigned, std::vector<OperandInfo>> &Ops,
const std::vector<bit_value_t> &ParentFilterBitValues,
const FilterChooser &parent)
- : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
- FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
- BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
+ : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
+ FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
+ BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
doFilter();
}
@@ -381,7 +395,7 @@ public:
protected:
// Populates the insn given the uid.
void insnWithID(insn_t &Insn, unsigned Opcode) const {
- BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
+ BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst");
// We may have a SoftFail bitmask, which specifies a mask where an encoding
// may differ from the value in "Inst" and yet still be valid, but the
@@ -389,7 +403,7 @@ protected:
//
// This is used for marking UNPREDICTABLE instructions in the ARM world.
BitsInit *SFBits =
- AllInstructions[Opcode]->TheDef->getValueAsBitsInit("SoftFail");
+ AllInstructions[Opcode].EncodingDef->getValueAsBitsInit("SoftFail");
for (unsigned i = 0; i < BitWidth; ++i) {
if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE)
@@ -399,11 +413,6 @@ protected:
}
}
- // Returns the record name.
- const StringRef nameWithID(unsigned Opcode) const {
- return AllInstructions[Opcode]->TheDef->getName();
- }
-
// Populates the field of the insn given the start position and the number of
// consecutive bits to scan for.
//
@@ -827,8 +836,7 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
OS << (unsigned)*I++ << ", ";
if (!IsTry) {
- OS << "// Opcode: "
- << NumberedInstructions[Opc]->TheDef->getName() << "\n";
+ OS << "// Opcode: " << NumberedEncodings[Opc] << "\n";
break;
}
@@ -845,8 +853,7 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
OS << utostr(Byte) << ", ";
NumToSkip |= Byte << 16;
- OS << "// Opcode: "
- << NumberedInstructions[Opc]->TheDef->getName()
+ OS << "// Opcode: " << NumberedEncodings[Opc]
<< ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
break;
}
@@ -1153,7 +1160,7 @@ static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
unsigned Opc) const {
ListInit *Predicates =
- AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+ AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
bool IsFirstEmission = true;
for (unsigned i = 0; i < Predicates->size(); ++i) {
Record *Pred = Predicates->getElementAsRecord(i);
@@ -1182,7 +1189,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
ListInit *Predicates =
- AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+ AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
for (unsigned i = 0; i < Predicates->size(); ++i) {
Record *Pred = Predicates->getElementAsRecord(i);
if (!Pred->getValue("AssemblerMatcherPredicate"))
@@ -1247,9 +1254,10 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
unsigned Opc) const {
BitsInit *SFBits =
- AllInstructions[Opc]->TheDef->getValueAsBitsInit("SoftFail");
+ AllInstructions[Opc].EncodingDef->getValueAsBitsInit("SoftFail");
if (!SFBits) return;
- BitsInit *InstBits = AllInstructions[Opc]->TheDef->getValueAsBitsInit("Inst");
+ BitsInit *InstBits =
+ AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst");
APInt PositiveMask(BitWidth, 0ULL);
APInt NegativeMask(BitWidth, 0ULL);
@@ -1270,9 +1278,9 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
break;
default:
// The bit is not set; this must be an error!
- StringRef Name = AllInstructions[Opc]->TheDef->getName();
- errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " << Name
- << " is set but Inst{" << i << "} is unset!\n"
+ errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
+ << AllInstructions[Opc] << " is set but Inst{" << i
+ << "} is unset!\n"
<< " - You can only mark a bit as SoftFail if it is fully defined"
<< " (1/0 - not '?') in Inst\n";
return;
@@ -1709,9 +1717,9 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
dumpStack(errs(), "\t\t");
for (unsigned i = 0; i < Opcodes.size(); ++i) {
- errs() << '\t' << nameWithID(Opcodes[i]) << " ";
+ errs() << '\t' << AllInstructions[Opcodes[i]] << " ";
dumpBits(errs(),
- getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+ getBitsField(*AllInstructions[Opcodes[i]].EncodingDef, "Inst"));
errs() << '\n';
}
}
@@ -2067,21 +2075,59 @@ static bool populateInstruction(CodeGenTarget &Target,
// using the VS compiler. It has a bug which causes the function
// to be optimized out in some circustances. See llvm.org/pr38292
static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
- OS << "// Helper function for extracting fields from encoded instructions.\n"
+ OS << "// Helper functions for extracting fields from encoded instructions.\n"
+ << "// InsnType must either be integral or an APInt-like object that "
+ "must:\n"
+ << "// * Have a static const max_size_in_bits equal to the number of bits "
+ "in the\n"
+ << "// encoding.\n"
+ << "// * be default-constructible and copy-constructible\n"
+ << "// * be constructible from a uint64_t\n"
+ << "// * be constructible from an APInt (this can be private)\n"
+ << "// * Support getBitsSet(loBit, hiBit)\n"
+ << "// * be convertible to uint64_t\n"
+ << "// * Support the ~, &, ==, !=, and |= operators with other objects of "
+ "the same type\n"
+ << "// * Support shift (<<, >>) with signed and unsigned integers on the "
+ "RHS\n"
+ << "// * Support put (<<) to raw_ostream&\n"
<< "template<typename InsnType>\n"
<< "#if defined(_MSC_VER) && !defined(__clang__)\n"
<< "__declspec(noinline)\n"
<< "#endif\n"
- << "static InsnType fieldFromInstruction(InsnType insn, unsigned startBit,\n"
+ << "static InsnType fieldFromInstruction(InsnType insn, unsigned "
+ "startBit,\n"
+ << " unsigned numBits, "
+ "std::true_type) {\n"
+ << " assert(startBit + numBits <= 64 && \"Cannot support >64-bit "
+ "extractions!\");\n"
+ << " assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n"
+ << " \"Instruction field out of bounds!\");\n"
+ << " InsnType fieldMask;\n"
+ << " if (numBits == sizeof(InsnType) * 8)\n"
+ << " fieldMask = (InsnType)(-1LL);\n"
+ << " else\n"
+ << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n"
+ << " return (insn & fieldMask) >> startBit;\n"
+ << "}\n"
+ << "\n"
+ << "template<typename InsnType>\n"
+ << "static InsnType fieldFromInstruction(InsnType insn, unsigned "
+ "startBit,\n"
+ << " unsigned numBits, "
+ "std::false_type) {\n"
+ << " assert(startBit + numBits <= InsnType::max_size_in_bits && "
+ "\"Instruction field out of bounds!\");\n"
+ << " InsnType fieldMask = InsnType::getBitsSet(0, numBits);\n"
+ << " return (insn >> startBit) & fieldMask;\n"
+ << "}\n"
+ << "\n"
+ << "template<typename InsnType>\n"
+ << "static InsnType fieldFromInstruction(InsnType insn, unsigned "
+ "startBit,\n"
<< " unsigned numBits) {\n"
- << " assert(startBit + numBits <= (sizeof(InsnType)*8) &&\n"
- << " \"Instruction field out of bounds!\");\n"
- << " InsnType fieldMask;\n"
- << " if (numBits == sizeof(InsnType)*8)\n"
- << " fieldMask = (InsnType)(-1LL);\n"
- << " else\n"
- << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n"
- << " return (insn & fieldMask) >> startBit;\n"
+ << " return fieldFromInstruction(insn, startBit, numBits, "
+ "std::is_integral<InsnType>());\n"
<< "}\n\n";
}
@@ -2288,13 +2334,17 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
Target.reverseBitsForLittleEndianEncoding();
// Parameterize the decoders based on namespace and instruction width.
- NumberedInstructions = Target.getInstructionsByEnumValue();
+ const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
+ NumberedEncodings.reserve(NumberedInstructions.size());
+ for (const auto &NumberedInstruction : NumberedInstructions)
+ NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction);
+
std::map<std::pair<std::string, unsigned>,
std::vector<unsigned>> OpcMap;
std::map<unsigned, std::vector<OperandInfo>> Operands;
- for (unsigned i = 0; i < NumberedInstructions.size(); ++i) {
- const CodeGenInstruction *Inst = NumberedInstructions[i];
+ for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
+ const CodeGenInstruction *Inst = NumberedEncodings[i].Inst;
const Record *Def = Inst->TheDef;
unsigned Size = Def->getValueAsInt("Size");
if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
@@ -2315,8 +2365,10 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
DecoderTableInfo TableInfo;
for (const auto &Opc : OpcMap) {
// Emit the decoder for this namespace+width combination.
- FilterChooser FC(NumberedInstructions, Opc.second, Operands,
- 8*Opc.first.second, this);
+ ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(),
+ NumberedEncodings.size());
+ FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
+ 8 * Opc.first.second, this);
// The decode table is cleared for each top level decoder function. The
// predicates and decoders themselves, however, are shared across all
diff --git a/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp b/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 69726cc9f257..997ceb12becd 100644
--- a/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -200,7 +200,8 @@ static Optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
static std::string explainPredicates(const TreePatternNode *N) {
std::string Explanation = "";
StringRef Separator = "";
- for (const auto &P : N->getPredicateFns()) {
+ for (const TreePredicateCall &Call : N->getPredicateCalls()) {
+ const TreePredicateFn &P = Call.Fn;
Explanation +=
(Separator + P.getOrigPatFragRecord()->getRecord()->getName()).str();
Separator = ", ";
@@ -284,7 +285,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
std::string Separator = "";
bool HasUnsupportedPredicate = false;
- for (const auto &Predicate : N->getPredicateFns()) {
+ for (const TreePredicateCall &Call : N->getPredicateCalls()) {
+ const TreePredicateFn &Predicate = Call.Fn;
+
if (Predicate.isAlwaysTrue())
continue;
@@ -1837,6 +1840,12 @@ public:
static bool classof(const InstructionPredicateMatcher *P) {
return P->getKind() == IPM_GenericPredicate;
}
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ Predicate ==
+ static_cast<const GenericInstructionPredicateMatcher &>(B)
+ .Predicate;
+ }
void emitPredicateOpcodes(MatchTable &Table,
RuleMatcher &Rule) const override {
Table << MatchTable::Opcode("GIM_CheckCxxInsnPredicate")
@@ -2607,7 +2616,7 @@ public:
std::vector<unsigned> MergeInsnIDs;
for (const auto &IDMatcherPair : Rule.defined_insn_vars())
MergeInsnIDs.push_back(IDMatcherPair.second);
- llvm::sort(MergeInsnIDs.begin(), MergeInsnIDs.end());
+ llvm::sort(MergeInsnIDs);
for (const auto &MergeInsnID : MergeInsnIDs)
Table << MatchTable::IntValue(MergeInsnID);
Table << MatchTable::NamedValue("GIU_MergeMemOperands_EndOfList")
@@ -2806,7 +2815,7 @@ void RuleMatcher::emit(MatchTable &Table) {
InsnIDs.push_back(Pair.second);
}
- llvm::sort(InsnIDs.begin(), InsnIDs.end());
+ llvm::sort(InsnIDs);
for (const auto &InsnID : InsnIDs) {
// Reject the difficult cases until we have a more accurate check.
@@ -2984,9 +2993,6 @@ private:
void gatherOpcodeValues();
void gatherTypeIDValues();
void gatherNodeEquivs();
- // Instruction predicate code that will be emitted in generated functions.
- SmallVector<std::string, 2> InstructionPredicateCodes;
- unsigned getOrCreateInstructionPredicateFnId(StringRef Code);
Record *findNodeEquiv(Record *N) const;
const CodeGenInstruction *getEquivNode(Record &Equiv,
@@ -3085,20 +3091,6 @@ void GlobalISelEmitter::gatherOpcodeValues() {
void GlobalISelEmitter::gatherTypeIDValues() {
LLTOperandMatcher::initTypeIDValuesMap();
}
-unsigned GlobalISelEmitter::getOrCreateInstructionPredicateFnId(StringRef Code) {
- // There's not very many predicates that need to be here at the moment so we
- // just maintain a simple set-like vector. If it grows then we'll need to do
- // something more efficient.
- const auto &I = std::find(InstructionPredicateCodes.begin(),
- InstructionPredicateCodes.end(),
- Code);
- if (I == InstructionPredicateCodes.end()) {
- unsigned ID = InstructionPredicateCodes.size();
- InstructionPredicateCodes.push_back(Code);
- return ID;
- }
- return std::distance(InstructionPredicateCodes.begin(), I);
-}
void GlobalISelEmitter::gatherNodeEquivs() {
assert(NodeEquivs.empty());
@@ -3128,7 +3120,8 @@ Record *GlobalISelEmitter::findNodeEquiv(Record *N) const {
const CodeGenInstruction *
GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
- for (const auto &Predicate : N->getPredicateFns()) {
+ for (const TreePredicateCall &Call : N->getPredicateCalls()) {
+ const TreePredicateFn &Predicate = Call.Fn;
if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() &&
Predicate.isSignExtLoad())
return &Target.getInstruction(Equiv.getValueAsDef("IfSignExtend"));
@@ -3197,7 +3190,8 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
" for result of Src pattern operator");
}
- for (const auto &Predicate : Src->getPredicateFns()) {
+ for (const TreePredicateCall &Call : Src->getPredicateCalls()) {
+ const TreePredicateFn &Predicate = Call.Fn;
if (Predicate.isAlwaysTrue())
continue;
@@ -4299,11 +4293,11 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
std::vector<Record *> ComplexPredicates =
RK.getAllDerivedDefinitions("GIComplexOperandMatcher");
- llvm::sort(ComplexPredicates.begin(), ComplexPredicates.end(), orderByName);
+ llvm::sort(ComplexPredicates, orderByName);
std::vector<Record *> CustomRendererFns =
RK.getAllDerivedDefinitions("GICustomOperandRenderer");
- llvm::sort(CustomRendererFns.begin(), CustomRendererFns.end(), orderByName);
+ llvm::sort(CustomRendererFns, orderByName);
unsigned MaxTemporaries = 0;
for (const auto &Rule : Rules)
@@ -4382,7 +4376,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
std::vector<LLTCodeGen> TypeObjects;
for (const auto &Ty : KnownTypes)
TypeObjects.push_back(Ty);
- llvm::sort(TypeObjects.begin(), TypeObjects.end());
+ llvm::sort(TypeObjects);
OS << "// LLT Objects.\n"
<< "enum {\n";
for (const auto &TypeObject : TypeObjects) {
@@ -4405,21 +4399,20 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
std::vector<std::vector<Record *>> FeatureBitsets;
for (auto &Rule : Rules)
FeatureBitsets.push_back(Rule.getRequiredFeatures());
- llvm::sort(
- FeatureBitsets.begin(), FeatureBitsets.end(),
- [&](const std::vector<Record *> &A, const std::vector<Record *> &B) {
- if (A.size() < B.size())
- return true;
- if (A.size() > B.size())
- return false;
- for (const auto &Pair : zip(A, B)) {
- if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
- return true;
- if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
- return false;
- }
+ llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A,
+ const std::vector<Record *> &B) {
+ if (A.size() < B.size())
+ return true;
+ if (A.size() > B.size())
+ return false;
+ for (const auto &Pair : zip(A, B)) {
+ if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
+ return true;
+ if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
return false;
- });
+ }
+ return false;
+ });
FeatureBitsets.erase(
std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
FeatureBitsets.end());
@@ -4588,13 +4581,11 @@ void RuleMatcher::optimize() {
}
InsnMatcher.optimize();
}
- llvm::sort(
- EpilogueMatchers.begin(), EpilogueMatchers.end(),
- [](const std::unique_ptr<PredicateMatcher> &L,
- const std::unique_ptr<PredicateMatcher> &R) {
- return std::make_tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) <
- std::make_tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx());
- });
+ llvm::sort(EpilogueMatchers, [](const std::unique_ptr<PredicateMatcher> &L,
+ const std::unique_ptr<PredicateMatcher> &R) {
+ return std::make_tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) <
+ std::make_tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx());
+ });
}
bool RuleMatcher::hasFirstCondition() const {
diff --git a/contrib/llvm/utils/TableGen/InfoByHwMode.cpp b/contrib/llvm/utils/TableGen/InfoByHwMode.cpp
index 7d1f71cc2647..086e12dafd74 100644
--- a/contrib/llvm/utils/TableGen/InfoByHwMode.cpp
+++ b/contrib/llvm/utils/TableGen/InfoByHwMode.cpp
@@ -84,7 +84,7 @@ void ValueTypeByHwMode::writeToStream(raw_ostream &OS) const {
std::vector<const PairType*> Pairs;
for (const auto &P : Map)
Pairs.push_back(&P);
- llvm::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
+ llvm::sort(Pairs, deref<std::less<PairType>>());
OS << '{';
for (unsigned i = 0, e = Pairs.size(); i != e; ++i) {
@@ -176,7 +176,7 @@ void RegSizeInfoByHwMode::writeToStream(raw_ostream &OS) const {
std::vector<const PairType*> Pairs;
for (const auto &P : Map)
Pairs.push_back(&P);
- llvm::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
+ llvm::sort(Pairs, deref<std::less<PairType>>());
OS << '{';
for (unsigned i = 0, e = Pairs.size(); i != e; ++i) {
diff --git a/contrib/llvm/utils/TableGen/InfoByHwMode.h b/contrib/llvm/utils/TableGen/InfoByHwMode.h
index 4838198e704d..7be4678f271b 100644
--- a/contrib/llvm/utils/TableGen/InfoByHwMode.h
+++ b/contrib/llvm/utils/TableGen/InfoByHwMode.h
@@ -47,10 +47,12 @@ std::vector<unsigned> union_modes(const InfoByHwMode<InfoT> &A,
for (const auto &P : B)
U.insert(P.first);
// Make sure that the default mode is last on the list.
- bool HasDefault = U.count(DefaultMode);
+ bool HasDefault = false;
for (unsigned M : U)
if (M != DefaultMode)
V.push_back(M);
+ else
+ HasDefault = true;
if (HasDefault)
V.push_back(DefaultMode);
return V;
diff --git a/contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp b/contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp
index 65cb28cd17a3..9d50351854ec 100644
--- a/contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp
@@ -100,6 +100,7 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
#define str(s) #s
#define FLAG(f) if (II->f) { FlagStrings.push_back(str(f)); }
FLAG(isReturn)
+ FLAG(isEHScopeReturn)
FLAG(isBranch)
FLAG(isIndirectBranch)
FLAG(isCompare)
@@ -137,6 +138,7 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
FLAG(isInsertSubreg)
FLAG(isConvergent)
FLAG(hasNoSchedulingInfo)
+ FLAG(variadicOpsAreDefs)
if (!FlagStrings.empty()) {
OS << "Flags: ";
bool IsFirst = true;
diff --git a/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp b/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
index a492daac0d09..39d9e8526386 100644
--- a/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -66,11 +66,12 @@ private:
/// This method is used to custom expand TIIPredicate definitions.
/// See file llvm/Target/TargetInstPredicates.td for a description of what is
/// a TIIPredicate and how to use it.
- void emitTIIHelperMethods(raw_ostream &OS);
+ void emitTIIHelperMethods(raw_ostream &OS, StringRef TargetName,
+ bool ExpandDefinition = true);
/// Expand TIIPredicate definitions to functions that accept a const MCInst
/// reference.
- void emitMCIIHelperMethods(raw_ostream &OS);
+ void emitMCIIHelperMethods(raw_ostream &OS, StringRef TargetName);
void emitRecord(const CodeGenInstruction &Inst, unsigned Num,
Record *InstrInfo,
std::map<std::vector<Record*>, unsigned> &EL,
@@ -351,71 +352,79 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n\n";
}
-void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS) {
+void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
+ StringRef TargetName) {
RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
if (TIIPredicates.empty())
return;
- CodeGenTarget &Target = CDP.getTargetInfo();
- const StringRef TargetName = Target.getName();
- formatted_raw_ostream FOS(OS);
-
- FOS << "#ifdef GET_GENINSTRINFO_MC_DECL\n";
- FOS << "#undef GET_GENINSTRINFO_MC_DECL\n\n";
+ OS << "#ifdef GET_INSTRINFO_MC_HELPER_DECLS\n";
+ OS << "#undef GET_INSTRINFO_MC_HELPER_DECLS\n\n";
- FOS << "namespace llvm {\n";
- FOS << "class MCInst;\n\n";
+ OS << "namespace llvm {\n";
+ OS << "class MCInst;\n\n";
- FOS << "namespace " << TargetName << "_MC {\n\n";
+ OS << "namespace " << TargetName << "_MC {\n\n";
for (const Record *Rec : TIIPredicates) {
- FOS << "bool " << Rec->getValueAsString("FunctionName")
+ OS << "bool " << Rec->getValueAsString("FunctionName")
<< "(const MCInst &MI);\n";
}
- FOS << "\n} // end " << TargetName << "_MC namespace\n";
- FOS << "} // end llvm namespace\n\n";
+ OS << "\n} // end " << TargetName << "_MC namespace\n";
+ OS << "} // end llvm namespace\n\n";
- FOS << "#endif // GET_GENINSTRINFO_MC_DECL\n\n";
+ OS << "#endif // GET_INSTRINFO_MC_HELPER_DECLS\n\n";
- FOS << "#ifdef GET_GENINSTRINFO_MC_HELPERS\n";
- FOS << "#undef GET_GENINSTRINFO_MC_HELPERS\n\n";
+ OS << "#ifdef GET_INSTRINFO_MC_HELPERS\n";
+ OS << "#undef GET_INSTRINFO_MC_HELPERS\n\n";
- FOS << "namespace llvm {\n";
- FOS << "namespace " << TargetName << "_MC {\n\n";
+ OS << "namespace llvm {\n";
+ OS << "namespace " << TargetName << "_MC {\n\n";
- PredicateExpander PE;
+ PredicateExpander PE(TargetName);
PE.setExpandForMC(true);
+
for (const Record *Rec : TIIPredicates) {
- FOS << "bool " << Rec->getValueAsString("FunctionName");
- FOS << "(const MCInst &MI) {\n";
- FOS << " return ";
- PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
- FOS << ";\n}\n";
+ OS << "bool " << Rec->getValueAsString("FunctionName");
+ OS << "(const MCInst &MI) {\n";
+
+ OS.indent(PE.getIndentLevel() * 2);
+ PE.expandStatement(OS, Rec->getValueAsDef("Body"));
+ OS << "\n}\n\n";
}
- FOS << "\n} // end " << TargetName << "_MC namespace\n";
- FOS << "} // end llvm namespace\n\n";
+ OS << "} // end " << TargetName << "_MC namespace\n";
+ OS << "} // end llvm namespace\n\n";
- FOS << "#endif // GET_GENISTRINFO_MC_HELPERS\n";
+ OS << "#endif // GET_GENISTRINFO_MC_HELPERS\n";
}
-void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS) {
+void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS,
+ StringRef TargetName,
+ bool ExpandDefinition) {
RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
if (TIIPredicates.empty())
return;
- formatted_raw_ostream FOS(OS);
- PredicateExpander PE;
+ PredicateExpander PE(TargetName);
PE.setExpandForMC(false);
- PE.setIndentLevel(2);
for (const Record *Rec : TIIPredicates) {
- FOS << "\n static bool " << Rec->getValueAsString("FunctionName");
- FOS << "(const MachineInstr &MI) {\n";
- FOS << " return ";
- PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
- FOS << ";\n }\n";
+ OS << (ExpandDefinition ? "" : "static ") << "bool ";
+ if (ExpandDefinition)
+ OS << TargetName << "InstrInfo::";
+ OS << Rec->getValueAsString("FunctionName");
+ OS << "(const MachineInstr &MI)";
+ if (!ExpandDefinition) {
+ OS << ";\n";
+ continue;
+ }
+
+ OS << " {\n";
+ OS.indent(PE.getIndentLevel() * 2);
+ PE.expandStatement(OS, Rec->getValueAsDef("Body"));
+ OS << "\n}\n\n";
}
}
@@ -517,12 +526,22 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
<< "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1, int CatchRetOpcode = -1, int ReturnOpcode = -1);\n"
<< " ~" << ClassName << "() override = default;\n";
- emitTIIHelperMethods(OS);
OS << "\n};\n} // end llvm namespace\n";
OS << "#endif // GET_INSTRINFO_HEADER\n\n";
+ OS << "#ifdef GET_INSTRINFO_HELPER_DECLS\n";
+ OS << "#undef GET_INSTRINFO_HELPER_DECLS\n\n";
+ emitTIIHelperMethods(OS, TargetName, /* ExpandDefintion = */false);
+ OS << "\n";
+ OS << "#endif // GET_INSTRINFO_HELPER_DECLS\n\n";
+
+ OS << "#ifdef GET_INSTRINFO_HELPERS\n";
+ OS << "#undef GET_INSTRINFO_HELPERS\n\n";
+ emitTIIHelperMethods(OS, TargetName, /* ExpandDefintion = */true);
+ OS << "#endif // GET_INSTRINFO_HELPERS\n\n";
+
OS << "#ifdef GET_INSTRINFO_CTOR_DTOR\n";
OS << "#undef GET_INSTRINFO_CTOR_DTOR\n";
@@ -544,7 +563,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
emitOperandTypesEnum(OS, Target);
- emitMCIIHelperMethods(OS);
+ emitMCIIHelperMethods(OS, TargetName);
}
void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
@@ -569,6 +588,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
// Emit all of the target independent flags...
if (Inst.isPseudo) OS << "|(1ULL<<MCID::Pseudo)";
if (Inst.isReturn) OS << "|(1ULL<<MCID::Return)";
+ if (Inst.isEHScopeReturn) OS << "|(1ULL<<MCID::EHScopeReturn)";
if (Inst.isBranch) OS << "|(1ULL<<MCID::Branch)";
if (Inst.isIndirectBranch) OS << "|(1ULL<<MCID::IndirectBranch)";
if (Inst.isCompare) OS << "|(1ULL<<MCID::Compare)";
@@ -604,6 +624,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
if (Inst.isExtractSubreg) OS << "|(1ULL<<MCID::ExtractSubreg)";
if (Inst.isInsertSubreg) OS << "|(1ULL<<MCID::InsertSubreg)";
if (Inst.isConvergent) OS << "|(1ULL<<MCID::Convergent)";
+ if (Inst.variadicOpsAreDefs) OS << "|(1ULL<<MCID::VariadicOpsAreDefs)";
// Emit all of the target-specific flags...
BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
diff --git a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 06e44e3b57c1..049282e5ebfe 100644
--- a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -489,6 +489,9 @@ struct AttributeComparator {
if (L->isNoReturn != R->isNoReturn)
return R->isNoReturn;
+ if (L->isCold != R->isCold)
+ return R->isCold;
+
if (L->isConvergent != R->isConvergent)
return R->isConvergent;
@@ -622,7 +625,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
if (!intrinsic.canThrow ||
intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem ||
- intrinsic.isNoReturn || intrinsic.isNoDuplicate ||
+ intrinsic.isNoReturn || intrinsic.isCold || intrinsic.isNoDuplicate ||
intrinsic.isConvergent || intrinsic.isSpeculatable) {
OS << " const Attribute::AttrKind Atts[] = {";
bool addComma = false;
@@ -636,6 +639,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
OS << "Attribute::NoReturn";
addComma = true;
}
+ if (intrinsic.isCold) {
+ if (addComma)
+ OS << ",";
+ OS << "Attribute::Cold";
+ addComma = true;
+ }
if (intrinsic.isNoDuplicate) {
if (addComma)
OS << ",";
diff --git a/contrib/llvm/utils/TableGen/PredicateExpander.cpp b/contrib/llvm/utils/TableGen/PredicateExpander.cpp
index 68eb32794a02..2e01b7c3138e 100644
--- a/contrib/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/contrib/llvm/utils/TableGen/PredicateExpander.cpp
@@ -12,65 +12,110 @@
//===----------------------------------------------------------------------===//
#include "PredicateExpander.h"
+#include "CodeGenSchedule.h" // Definition of STIPredicateFunction.
namespace llvm {
-void PredicateExpander::expandTrue(formatted_raw_ostream &OS) { OS << "true"; }
-void PredicateExpander::expandFalse(formatted_raw_ostream &OS) {
- OS << "false";
+void PredicateExpander::expandTrue(raw_ostream &OS) { OS << "true"; }
+void PredicateExpander::expandFalse(raw_ostream &OS) { OS << "false"; }
+
+void PredicateExpander::expandCheckImmOperand(raw_ostream &OS, int OpIndex,
+ int ImmVal,
+ StringRef FunctionMapper) {
+ if (!FunctionMapper.empty())
+ OS << FunctionMapper << "(";
+ OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+ << ").getImm()";
+ if (!FunctionMapper.empty())
+ OS << ")";
+ OS << (shouldNegate() ? " != " : " == ") << ImmVal;
}
-void PredicateExpander::expandCheckImmOperand(formatted_raw_ostream &OS,
- int OpIndex, int ImmVal) {
+void PredicateExpander::expandCheckImmOperand(raw_ostream &OS, int OpIndex,
+ StringRef ImmVal,
+ StringRef FunctionMapper) {
+ if (ImmVal.empty())
+ expandCheckImmOperandSimple(OS, OpIndex, FunctionMapper);
+
+ if (!FunctionMapper.empty())
+ OS << FunctionMapper << "(";
OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
- << ").getImm() " << (shouldNegate() ? "!= " : "== ") << ImmVal;
+ << ").getImm()";
+ if (!FunctionMapper.empty())
+ OS << ")";
+ OS << (shouldNegate() ? " != " : " == ") << ImmVal;
}
-void PredicateExpander::expandCheckImmOperand(formatted_raw_ostream &OS,
- int OpIndex, StringRef ImmVal) {
+void PredicateExpander::expandCheckImmOperandSimple(raw_ostream &OS,
+ int OpIndex,
+ StringRef FunctionMapper) {
+ if (shouldNegate())
+ OS << "!";
+ if (!FunctionMapper.empty())
+ OS << FunctionMapper << "(";
OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
- << ").getImm() " << (shouldNegate() ? "!= " : "== ") << ImmVal;
+ << ").getImm()";
+ if (!FunctionMapper.empty())
+ OS << ")";
}
-void PredicateExpander::expandCheckRegOperand(formatted_raw_ostream &OS,
- int OpIndex, const Record *Reg) {
+void PredicateExpander::expandCheckRegOperand(raw_ostream &OS, int OpIndex,
+ const Record *Reg,
+ StringRef FunctionMapper) {
assert(Reg->isSubClassOf("Register") && "Expected a register Record!");
+ if (!FunctionMapper.empty())
+ OS << FunctionMapper << "(";
OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
- << ").getReg() " << (shouldNegate() ? "!= " : "== ");
+ << ").getReg()";
+ if (!FunctionMapper.empty())
+ OS << ")";
+ OS << (shouldNegate() ? " != " : " == ");
const StringRef Str = Reg->getValueAsString("Namespace");
if (!Str.empty())
OS << Str << "::";
OS << Reg->getName();
}
-void PredicateExpander::expandCheckInvalidRegOperand(formatted_raw_ostream &OS,
+
+void PredicateExpander::expandCheckRegOperandSimple(raw_ostream &OS,
+ int OpIndex,
+ StringRef FunctionMapper) {
+ if (shouldNegate())
+ OS << "!";
+ if (!FunctionMapper.empty())
+ OS << FunctionMapper << "(";
+ OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+ << ").getReg()";
+ if (!FunctionMapper.empty())
+ OS << ")";
+}
+
+void PredicateExpander::expandCheckInvalidRegOperand(raw_ostream &OS,
int OpIndex) {
OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
<< ").getReg() " << (shouldNegate() ? "!= " : "== ") << "0";
}
-void PredicateExpander::expandCheckSameRegOperand(formatted_raw_ostream &OS,
- int First, int Second) {
+void PredicateExpander::expandCheckSameRegOperand(raw_ostream &OS, int First,
+ int Second) {
OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << First
<< ").getReg() " << (shouldNegate() ? "!=" : "==") << " MI"
<< (isByRef() ? "." : "->") << "getOperand(" << Second << ").getReg()";
}
-void PredicateExpander::expandCheckNumOperands(formatted_raw_ostream &OS,
- int NumOps) {
+void PredicateExpander::expandCheckNumOperands(raw_ostream &OS, int NumOps) {
OS << "MI" << (isByRef() ? "." : "->") << "getNumOperands() "
<< (shouldNegate() ? "!= " : "== ") << NumOps;
}
-void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
- const Record *Inst) {
+void PredicateExpander::expandCheckOpcode(raw_ostream &OS, const Record *Inst) {
OS << "MI" << (isByRef() ? "." : "->") << "getOpcode() "
<< (shouldNegate() ? "!= " : "== ") << Inst->getValueAsString("Namespace")
<< "::" << Inst->getName();
}
-void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
+void PredicateExpander::expandCheckOpcode(raw_ostream &OS,
const RecVec &Opcodes) {
assert(!Opcodes.empty() && "Expected at least one opcode to check!");
bool First = true;
@@ -86,7 +131,7 @@ void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
increaseIndentLevel();
for (const Record *Rec : Opcodes) {
OS << '\n';
- OS.PadToColumn(getIndentLevel() * 2);
+ OS.indent(getIndentLevel() * 2);
if (!First)
OS << (shouldNegate() ? "&& " : "|| ");
@@ -96,11 +141,11 @@ void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
OS << '\n';
decreaseIndentLevel();
- OS.PadToColumn(getIndentLevel() * 2);
+ OS.indent(getIndentLevel() * 2);
OS << ')';
}
-void PredicateExpander::expandCheckPseudo(formatted_raw_ostream &OS,
+void PredicateExpander::expandCheckPseudo(raw_ostream &OS,
const RecVec &Opcodes) {
if (shouldExpandForMC())
expandFalse(OS);
@@ -108,7 +153,7 @@ void PredicateExpander::expandCheckPseudo(formatted_raw_ostream &OS,
expandCheckOpcode(OS, Opcodes);
}
-void PredicateExpander::expandPredicateSequence(formatted_raw_ostream &OS,
+void PredicateExpander::expandPredicateSequence(raw_ostream &OS,
const RecVec &Sequence,
bool IsCheckAll) {
assert(!Sequence.empty() && "Found an invalid empty predicate set!");
@@ -124,7 +169,7 @@ void PredicateExpander::expandPredicateSequence(formatted_raw_ostream &OS,
setNegatePredicate(false);
for (const Record *Rec : Sequence) {
OS << '\n';
- OS.PadToColumn(getIndentLevel() * 2);
+ OS.indent(getIndentLevel() * 2);
if (!First)
OS << (IsCheckAll ? "&& " : "|| ");
expandPredicate(OS, Rec);
@@ -132,43 +177,36 @@ void PredicateExpander::expandPredicateSequence(formatted_raw_ostream &OS,
}
OS << '\n';
decreaseIndentLevel();
- OS.PadToColumn(getIndentLevel() * 2);
+ OS.indent(getIndentLevel() * 2);
OS << ')';
setNegatePredicate(OldValue);
}
-void PredicateExpander::expandTIIFunctionCall(formatted_raw_ostream &OS,
- StringRef TargetName,
+void PredicateExpander::expandTIIFunctionCall(raw_ostream &OS,
StringRef MethodName) {
OS << (shouldNegate() ? "!" : "");
- if (shouldExpandForMC())
- OS << TargetName << "_MC::";
- else
- OS << TargetName << "Gen"
- << "InstrInfo::";
+ OS << TargetName << (shouldExpandForMC() ? "_MC::" : "InstrInfo::");
OS << MethodName << (isByRef() ? "(MI)" : "(*MI)");
}
-void PredicateExpander::expandCheckIsRegOperand(formatted_raw_ostream &OS,
- int OpIndex) {
+void PredicateExpander::expandCheckIsRegOperand(raw_ostream &OS, int OpIndex) {
OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
<< "getOperand(" << OpIndex << ").isReg() ";
}
-void PredicateExpander::expandCheckIsImmOperand(formatted_raw_ostream &OS,
- int OpIndex) {
+void PredicateExpander::expandCheckIsImmOperand(raw_ostream &OS, int OpIndex) {
OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
<< "getOperand(" << OpIndex << ").isImm() ";
}
-void PredicateExpander::expandCheckFunctionPredicate(formatted_raw_ostream &OS,
+void PredicateExpander::expandCheckFunctionPredicate(raw_ostream &OS,
StringRef MCInstFn,
StringRef MachineInstrFn) {
OS << (shouldExpandForMC() ? MCInstFn : MachineInstrFn)
<< (isByRef() ? "(MI)" : "(*MI)");
}
-void PredicateExpander::expandCheckNonPortable(formatted_raw_ostream &OS,
+void PredicateExpander::expandCheckNonPortable(raw_ostream &OS,
StringRef Code) {
if (shouldExpandForMC())
return expandFalse(OS);
@@ -176,13 +214,79 @@ void PredicateExpander::expandCheckNonPortable(formatted_raw_ostream &OS,
OS << '(' << Code << ')';
}
-void PredicateExpander::expandPredicate(formatted_raw_ostream &OS,
- const Record *Rec) {
- OS.flush();
- unsigned ColNum = getIndentLevel() * 2;
- if (OS.getColumn() < ColNum)
- OS.PadToColumn(ColNum);
+void PredicateExpander::expandReturnStatement(raw_ostream &OS,
+ const Record *Rec) {
+ std::string Buffer;
+ raw_string_ostream SS(Buffer);
+ SS << "return ";
+ expandPredicate(SS, Rec);
+ SS << ";";
+ SS.flush();
+ OS << Buffer;
+}
+
+void PredicateExpander::expandOpcodeSwitchCase(raw_ostream &OS,
+ const Record *Rec) {
+ const RecVec &Opcodes = Rec->getValueAsListOfDefs("Opcodes");
+ for (const Record *Opcode : Opcodes) {
+ OS.indent(getIndentLevel() * 2);
+ OS << "case " << Opcode->getValueAsString("Namespace")
+ << "::" << Opcode->getName() << ":\n";
+ }
+
+ increaseIndentLevel();
+ OS.indent(getIndentLevel() * 2);
+ expandStatement(OS, Rec->getValueAsDef("CaseStmt"));
+ decreaseIndentLevel();
+}
+
+void PredicateExpander::expandOpcodeSwitchStatement(raw_ostream &OS,
+ const RecVec &Cases,
+ const Record *Default) {
+ std::string Buffer;
+ raw_string_ostream SS(Buffer);
+
+ SS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n";
+ for (const Record *Rec : Cases) {
+ expandOpcodeSwitchCase(SS, Rec);
+ SS << '\n';
+ }
+
+ // Expand the default case.
+ SS.indent(getIndentLevel() * 2);
+ SS << "default:\n";
+
+ increaseIndentLevel();
+ SS.indent(getIndentLevel() * 2);
+ expandStatement(SS, Default);
+ decreaseIndentLevel();
+ SS << '\n';
+
+ SS.indent(getIndentLevel() * 2);
+ SS << "} // end of switch-stmt";
+ SS.flush();
+ OS << Buffer;
+}
+
+void PredicateExpander::expandStatement(raw_ostream &OS, const Record *Rec) {
+ // Assume that padding has been added by the caller.
+ if (Rec->isSubClassOf("MCOpcodeSwitchStatement")) {
+ expandOpcodeSwitchStatement(OS, Rec->getValueAsListOfDefs("Cases"),
+ Rec->getValueAsDef("DefaultCase"));
+ return;
+ }
+
+ if (Rec->isSubClassOf("MCReturnStatement")) {
+ expandReturnStatement(OS, Rec->getValueAsDef("Pred"));
+ return;
+ }
+
+ llvm_unreachable("No known rules to expand this MCStatement");
+}
+
+void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) {
+ // Assume that padding has been added by the caller.
if (Rec->isSubClassOf("MCTrue")) {
if (shouldNegate())
return expandFalse(OS);
@@ -210,18 +314,29 @@ void PredicateExpander::expandPredicate(formatted_raw_ostream &OS,
if (Rec->isSubClassOf("CheckRegOperand"))
return expandCheckRegOperand(OS, Rec->getValueAsInt("OpIndex"),
- Rec->getValueAsDef("Reg"));
+ Rec->getValueAsDef("Reg"),
+ Rec->getValueAsString("FunctionMapper"));
+
+ if (Rec->isSubClassOf("CheckRegOperandSimple"))
+ return expandCheckRegOperandSimple(OS, Rec->getValueAsInt("OpIndex"),
+ Rec->getValueAsString("FunctionMapper"));
if (Rec->isSubClassOf("CheckInvalidRegOperand"))
return expandCheckInvalidRegOperand(OS, Rec->getValueAsInt("OpIndex"));
if (Rec->isSubClassOf("CheckImmOperand"))
return expandCheckImmOperand(OS, Rec->getValueAsInt("OpIndex"),
- Rec->getValueAsInt("ImmVal"));
+ Rec->getValueAsInt("ImmVal"),
+ Rec->getValueAsString("FunctionMapper"));
if (Rec->isSubClassOf("CheckImmOperand_s"))
return expandCheckImmOperand(OS, Rec->getValueAsInt("OpIndex"),
- Rec->getValueAsString("ImmVal"));
+ Rec->getValueAsString("ImmVal"),
+ Rec->getValueAsString("FunctionMapper"));
+
+ if (Rec->isSubClassOf("CheckImmOperandSimple"))
+ return expandCheckImmOperandSimple(OS, Rec->getValueAsInt("OpIndex"),
+ Rec->getValueAsString("FunctionMapper"));
if (Rec->isSubClassOf("CheckSameRegOperand"))
return expandCheckSameRegOperand(OS, Rec->getValueAsInt("FirstIndex"),
@@ -253,10 +368,163 @@ void PredicateExpander::expandPredicate(formatted_raw_ostream &OS,
return expandCheckNonPortable(OS, Rec->getValueAsString("CodeBlock"));
if (Rec->isSubClassOf("TIIPredicate"))
- return expandTIIFunctionCall(OS, Rec->getValueAsString("TargetName"),
- Rec->getValueAsString("FunctionName"));
+ return expandTIIFunctionCall(OS, Rec->getValueAsString("FunctionName"));
llvm_unreachable("No known rules to expand this MCInstPredicate");
}
+void STIPredicateExpander::expandHeader(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ const Record *Rec = Fn.getDeclaration();
+ StringRef FunctionName = Rec->getValueAsString("Name");
+
+ OS.indent(getIndentLevel() * 2);
+ OS << "bool ";
+ if (shouldExpandDefinition())
+ OS << getClassPrefix() << "::";
+ OS << FunctionName << "(";
+ if (shouldExpandForMC())
+ OS << "const MCInst " << (isByRef() ? "&" : "*") << "MI";
+ else
+ OS << "const MachineInstr " << (isByRef() ? "&" : "*") << "MI";
+ if (Rec->getValueAsBit("UpdatesOpcodeMask"))
+ OS << ", APInt &Mask";
+ OS << (shouldExpandForMC() ? ", unsigned ProcessorID) const " : ") const ");
+ if (shouldExpandDefinition()) {
+ OS << "{\n";
+ return;
+ }
+
+ if (Rec->getValueAsBit("OverridesBaseClassMember"))
+ OS << "override";
+ OS << ";\n";
+}
+
+void STIPredicateExpander::expandPrologue(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ RecVec Delegates = Fn.getDeclaration()->getValueAsListOfDefs("Delegates");
+ bool UpdatesOpcodeMask =
+ Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask");
+
+ increaseIndentLevel();
+ unsigned IndentLevel = getIndentLevel();
+ for (const Record *Delegate : Delegates) {
+ OS.indent(IndentLevel * 2);
+ OS << "if (" << Delegate->getValueAsString("Name") << "(MI";
+ if (UpdatesOpcodeMask)
+ OS << ", Mask";
+ if (shouldExpandForMC())
+ OS << ", ProcessorID";
+ OS << "))\n";
+ OS.indent((1 + IndentLevel) * 2);
+ OS << "return true;\n\n";
+ }
+
+ if (shouldExpandForMC())
+ return;
+
+ OS.indent(IndentLevel * 2);
+ OS << "unsigned ProcessorID = getSchedModel().getProcessorID();\n";
+}
+
+void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group,
+ bool ShouldUpdateOpcodeMask) {
+ const OpcodeInfo &OI = Group.getOpcodeInfo();
+ for (const PredicateInfo &PI : OI.getPredicates()) {
+ const APInt &ProcModelMask = PI.ProcModelMask;
+ bool FirstProcID = true;
+ for (unsigned I = 0, E = ProcModelMask.getActiveBits(); I < E; ++I) {
+ if (!ProcModelMask[I])
+ continue;
+
+ if (FirstProcID) {
+ OS.indent(getIndentLevel() * 2);
+ OS << "if (ProcessorID == " << I;
+ } else {
+ OS << " || ProcessorID == " << I;
+ }
+ FirstProcID = false;
+ }
+
+ OS << ") {\n";
+
+ increaseIndentLevel();
+ OS.indent(getIndentLevel() * 2);
+ if (ShouldUpdateOpcodeMask) {
+ if (PI.OperandMask.isNullValue())
+ OS << "Mask.clearAllBits();\n";
+ else
+ OS << "Mask = " << PI.OperandMask << ";\n";
+ OS.indent(getIndentLevel() * 2);
+ }
+ OS << "return ";
+ expandPredicate(OS, PI.Predicate);
+ OS << ";\n";
+ decreaseIndentLevel();
+ OS.indent(getIndentLevel() * 2);
+ OS << "}\n";
+ }
+}
+
+void STIPredicateExpander::expandBody(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ bool UpdatesOpcodeMask =
+ Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask");
+
+ unsigned IndentLevel = getIndentLevel();
+ OS.indent(IndentLevel * 2);
+ OS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n";
+ OS.indent(IndentLevel * 2);
+ OS << "default:\n";
+ OS.indent(IndentLevel * 2);
+ OS << " break;";
+
+ for (const OpcodeGroup &Group : Fn.getGroups()) {
+ for (const Record *Opcode : Group.getOpcodes()) {
+ OS << '\n';
+ OS.indent(IndentLevel * 2);
+ OS << "case " << getTargetName() << "::" << Opcode->getName() << ":";
+ }
+
+ OS << '\n';
+ increaseIndentLevel();
+ expandOpcodeGroup(OS, Group, UpdatesOpcodeMask);
+
+ OS.indent(getIndentLevel() * 2);
+ OS << "break;\n";
+ decreaseIndentLevel();
+ }
+
+ OS.indent(IndentLevel * 2);
+ OS << "}\n";
+}
+
+void STIPredicateExpander::expandEpilogue(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ OS << '\n';
+ OS.indent(getIndentLevel() * 2);
+ OS << "return ";
+ expandPredicate(OS, Fn.getDefaultReturnPredicate());
+ OS << ";\n";
+
+ decreaseIndentLevel();
+ OS.indent(getIndentLevel() * 2);
+ StringRef FunctionName = Fn.getDeclaration()->getValueAsString("Name");
+ OS << "} // " << ClassPrefix << "::" << FunctionName << "\n\n";
+}
+
+void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ const Record *Rec = Fn.getDeclaration();
+ if (shouldExpandForMC() && !Rec->getValueAsBit("ExpandForMC"))
+ return;
+
+ expandHeader(OS, Fn);
+ if (shouldExpandDefinition()) {
+ expandPrologue(OS, Fn);
+ expandBody(OS, Fn);
+ expandEpilogue(OS, Fn);
+ }
+}
+
} // namespace llvm
diff --git a/contrib/llvm/utils/TableGen/PredicateExpander.h b/contrib/llvm/utils/TableGen/PredicateExpander.h
index 398b376f7a83..0f3ee6867e65 100644
--- a/contrib/llvm/utils/TableGen/PredicateExpander.h
+++ b/contrib/llvm/utils/TableGen/PredicateExpander.h
@@ -18,67 +18,105 @@
#define LLVM_UTILS_TABLEGEN_PREDICATEEXPANDER_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
namespace llvm {
-class formatted_raw_ostream;
+class raw_ostream;
class PredicateExpander {
bool EmitCallsByRef;
bool NegatePredicate;
bool ExpandForMC;
unsigned IndentLevel;
+ StringRef TargetName;
PredicateExpander(const PredicateExpander &) = delete;
PredicateExpander &operator=(const PredicateExpander &) = delete;
public:
- PredicateExpander()
+ PredicateExpander(StringRef Target)
: EmitCallsByRef(true), NegatePredicate(false), ExpandForMC(false),
- IndentLevel(1U) {}
+ IndentLevel(1U), TargetName(Target) {}
bool isByRef() const { return EmitCallsByRef; }
bool shouldNegate() const { return NegatePredicate; }
bool shouldExpandForMC() const { return ExpandForMC; }
unsigned getIndentLevel() const { return IndentLevel; }
+ StringRef getTargetName() const { return TargetName; }
void setByRef(bool Value) { EmitCallsByRef = Value; }
void flipNegatePredicate() { NegatePredicate = !NegatePredicate; }
void setNegatePredicate(bool Value) { NegatePredicate = Value; }
void setExpandForMC(bool Value) { ExpandForMC = Value; }
+ void setIndentLevel(unsigned Level) { IndentLevel = Level; }
void increaseIndentLevel() { ++IndentLevel; }
void decreaseIndentLevel() { --IndentLevel; }
- void setIndentLevel(unsigned Level) { IndentLevel = Level; }
using RecVec = std::vector<Record *>;
- void expandTrue(formatted_raw_ostream &OS);
- void expandFalse(formatted_raw_ostream &OS);
- void expandCheckImmOperand(formatted_raw_ostream &OS, int OpIndex,
- int ImmVal);
- void expandCheckImmOperand(formatted_raw_ostream &OS, int OpIndex,
- StringRef ImmVal);
- void expandCheckRegOperand(formatted_raw_ostream &OS, int OpIndex,
- const Record *Reg);
- void expandCheckSameRegOperand(formatted_raw_ostream &OS, int First,
- int Second);
- void expandCheckNumOperands(formatted_raw_ostream &OS, int NumOps);
- void expandCheckOpcode(formatted_raw_ostream &OS, const Record *Inst);
-
- void expandCheckPseudo(formatted_raw_ostream &OS, const RecVec &Opcodes);
- void expandCheckOpcode(formatted_raw_ostream &OS, const RecVec &Opcodes);
- void expandPredicateSequence(formatted_raw_ostream &OS,
- const RecVec &Sequence, bool IsCheckAll);
- void expandTIIFunctionCall(formatted_raw_ostream &OS, StringRef TargetName,
- StringRef MethodName);
- void expandCheckIsRegOperand(formatted_raw_ostream &OS, int OpIndex);
- void expandCheckIsImmOperand(formatted_raw_ostream &OS, int OpIndex);
- void expandCheckInvalidRegOperand(formatted_raw_ostream &OS, int OpIndex);
- void expandCheckFunctionPredicate(formatted_raw_ostream &OS,
- StringRef MCInstFn,
+ void expandTrue(raw_ostream &OS);
+ void expandFalse(raw_ostream &OS);
+ void expandCheckImmOperand(raw_ostream &OS, int OpIndex, int ImmVal,
+ StringRef FunctionMapper);
+ void expandCheckImmOperand(raw_ostream &OS, int OpIndex, StringRef ImmVal,
+ StringRef FunctionMapperer);
+ void expandCheckImmOperandSimple(raw_ostream &OS, int OpIndex,
+ StringRef FunctionMapper);
+ void expandCheckRegOperand(raw_ostream &OS, int OpIndex, const Record *Reg,
+ StringRef FunctionMapper);
+ void expandCheckRegOperandSimple(raw_ostream &OS, int OpIndex,
+ StringRef FunctionMapper);
+ void expandCheckSameRegOperand(raw_ostream &OS, int First, int Second);
+ void expandCheckNumOperands(raw_ostream &OS, int NumOps);
+ void expandCheckOpcode(raw_ostream &OS, const Record *Inst);
+
+ void expandCheckPseudo(raw_ostream &OS, const RecVec &Opcodes);
+ void expandCheckOpcode(raw_ostream &OS, const RecVec &Opcodes);
+ void expandPredicateSequence(raw_ostream &OS, const RecVec &Sequence,
+ bool IsCheckAll);
+ void expandTIIFunctionCall(raw_ostream &OS, StringRef MethodName);
+ void expandCheckIsRegOperand(raw_ostream &OS, int OpIndex);
+ void expandCheckIsImmOperand(raw_ostream &OS, int OpIndex);
+ void expandCheckInvalidRegOperand(raw_ostream &OS, int OpIndex);
+ void expandCheckFunctionPredicate(raw_ostream &OS, StringRef MCInstFn,
StringRef MachineInstrFn);
- void expandCheckNonPortable(formatted_raw_ostream &OS, StringRef CodeBlock);
- void expandPredicate(formatted_raw_ostream &OS, const Record *Rec);
+ void expandCheckNonPortable(raw_ostream &OS, StringRef CodeBlock);
+ void expandPredicate(raw_ostream &OS, const Record *Rec);
+ void expandReturnStatement(raw_ostream &OS, const Record *Rec);
+ void expandOpcodeSwitchCase(raw_ostream &OS, const Record *Rec);
+ void expandOpcodeSwitchStatement(raw_ostream &OS, const RecVec &Cases,
+ const Record *Default);
+ void expandStatement(raw_ostream &OS, const Record *Rec);
+};
+
+// Forward declarations.
+class STIPredicateFunction;
+class OpcodeGroup;
+
+class STIPredicateExpander : public PredicateExpander {
+ StringRef ClassPrefix;
+ bool ExpandDefinition;
+
+ STIPredicateExpander(const PredicateExpander &) = delete;
+ STIPredicateExpander &operator=(const PredicateExpander &) = delete;
+
+ void expandHeader(raw_ostream &OS, const STIPredicateFunction &Fn);
+ void expandPrologue(raw_ostream &OS, const STIPredicateFunction &Fn);
+ void expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group,
+ bool ShouldUpdateOpcodeMask);
+ void expandBody(raw_ostream &OS, const STIPredicateFunction &Fn);
+ void expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn);
+
+public:
+ STIPredicateExpander(StringRef Target)
+ : PredicateExpander(Target), ClassPrefix(), ExpandDefinition(false) {}
+
+ bool shouldExpandDefinition() const { return ExpandDefinition; }
+ StringRef getClassPrefix() const { return ClassPrefix; }
+ void setClassPrefix(StringRef S) { ClassPrefix = S; }
+ void setExpandDefinition(bool Value) { ExpandDefinition = Value; }
+
+ void expandSTIPredicate(raw_ostream &OS, const STIPredicateFunction &Fn);
};
} // namespace llvm
diff --git a/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 49016cca799e..ded54c828bcd 100644
--- a/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -296,7 +296,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
PSetE = PSetIDs.end(); PSetI != PSetE; ++PSetI) {
PSets[i].push_back(RegBank.getRegPressureSet(*PSetI).Order);
}
- llvm::sort(PSets[i].begin(), PSets[i].end());
+ llvm::sort(PSets[i]);
PSetsSeqs.add(PSets[i]);
}
@@ -340,11 +340,38 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
<< "}\n\n";
}
+using DwarfRegNumsMapPair = std::pair<Record*, std::vector<int64_t>>;
+using DwarfRegNumsVecTy = std::vector<DwarfRegNumsMapPair>;
+
+void finalizeDwarfRegNumsKeys(DwarfRegNumsVecTy &DwarfRegNums) {
+ // Sort and unique to get a map-like vector. We want the last assignment to
+ // match previous behaviour.
+ std::stable_sort(DwarfRegNums.begin(), DwarfRegNums.end(),
+ on_first<LessRecordRegister>());
+ // Warn about duplicate assignments.
+ const Record *LastSeenReg = nullptr;
+ for (const auto &X : DwarfRegNums) {
+ const auto &Reg = X.first;
+ // The only way LessRecordRegister can return equal is if they're the same
+ // string. Use simple equality instead.
+ if (LastSeenReg && Reg->getName() == LastSeenReg->getName())
+ PrintWarning(Reg->getLoc(), Twine("DWARF numbers for register ") +
+ getQualifiedName(Reg) +
+ "specified multiple times");
+ LastSeenReg = Reg;
+ }
+ auto Last = std::unique(
+ DwarfRegNums.begin(), DwarfRegNums.end(),
+ [](const DwarfRegNumsMapPair &A, const DwarfRegNumsMapPair &B) {
+ return A.first->getName() == B.first->getName();
+ });
+ DwarfRegNums.erase(Last, DwarfRegNums.end());
+}
+
void RegisterInfoEmitter::EmitRegMappingTables(
raw_ostream &OS, const std::deque<CodeGenRegister> &Regs, bool isCtor) {
// Collect all information about dwarf register numbers
- typedef std::map<Record*, std::vector<int64_t>, LessRecordRegister> DwarfRegNumsMapTy;
- DwarfRegNumsMapTy DwarfRegNums;
+ DwarfRegNumsVecTy DwarfRegNums;
// First, just pull all provided information to the map
unsigned maxLength = 0;
@@ -352,18 +379,17 @@ void RegisterInfoEmitter::EmitRegMappingTables(
Record *Reg = RE.TheDef;
std::vector<int64_t> RegNums = Reg->getValueAsListOfInts("DwarfNumbers");
maxLength = std::max((size_t)maxLength, RegNums.size());
- if (DwarfRegNums.count(Reg))
- PrintWarning(Reg->getLoc(), Twine("DWARF numbers for register ") +
- getQualifiedName(Reg) + "specified multiple times");
- DwarfRegNums[Reg] = RegNums;
+ DwarfRegNums.emplace_back(Reg, std::move(RegNums));
}
+ finalizeDwarfRegNumsKeys(DwarfRegNums);
if (!maxLength)
return;
// Now we know maximal length of number list. Append -1's, where needed
- for (DwarfRegNumsMapTy::iterator
- I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I)
+ for (DwarfRegNumsVecTy::iterator I = DwarfRegNums.begin(),
+ E = DwarfRegNums.end();
+ I != E; ++I)
for (unsigned i = I->second.size(), e = maxLength; i != e; ++i)
I->second.push_back(-1);
@@ -384,7 +410,7 @@ void RegisterInfoEmitter::EmitRegMappingTables(
// Store the mapping sorted by the LLVM reg num so lookup can be done
// with a binary search.
std::map<uint64_t, Record*> Dwarf2LMap;
- for (DwarfRegNumsMapTy::iterator
+ for (DwarfRegNumsVecTy::iterator
I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
int DwarfRegNo = I->second[i];
if (DwarfRegNo < 0)
@@ -423,7 +449,21 @@ void RegisterInfoEmitter::EmitRegMappingTables(
DefInit *DI = cast<DefInit>(V->getValue());
Record *Alias = DI->getDef();
- DwarfRegNums[Reg] = DwarfRegNums[Alias];
+ const auto &AliasIter =
+ std::lower_bound(DwarfRegNums.begin(), DwarfRegNums.end(), Alias,
+ [](const DwarfRegNumsMapPair &A, const Record *B) {
+ return LessRecordRegister()(A.first, B);
+ });
+ assert(AliasIter != DwarfRegNums.end() && AliasIter->first == Alias &&
+ "Expected Alias to be present in map");
+ const auto &RegIter =
+ std::lower_bound(DwarfRegNums.begin(), DwarfRegNums.end(), Reg,
+ [](const DwarfRegNumsMapPair &A, const Record *B) {
+ return LessRecordRegister()(A.first, B);
+ });
+ assert(RegIter != DwarfRegNums.end() && RegIter->first == Reg &&
+ "Expected Reg to be present in map");
+ RegIter->second = AliasIter->second;
}
// Emit information about the dwarf register numbers.
@@ -436,7 +476,7 @@ void RegisterInfoEmitter::EmitRegMappingTables(
OS << " = {\n";
// Store the mapping sorted by the Dwarf reg num so lookup can be done
// with a binary search.
- for (DwarfRegNumsMapTy::iterator
+ for (DwarfRegNumsVecTy::iterator
I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
int RegNo = I->second[i];
if (RegNo == -1) // -1 is the default value, don't emit a mapping.
@@ -1035,14 +1075,10 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
for (const auto &RC : RegisterClasses) {
assert(isInt<8>(RC.CopyCost) && "Copy cost too large.");
- uint32_t RegSize = 0;
- if (RC.RSI.isSimple())
- RegSize = RC.RSI.getSimple().RegSize;
OS << " { " << RC.getName() << ", " << RC.getName() << "Bits, "
<< RegClassStrings.get(RC.getName()) << ", "
<< RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), "
<< RC.getQualifiedName() + "RegClassID" << ", "
- << RegSize/8 << ", "
<< RC.CopyCost << ", "
<< ( RC.Allocatable ? "true" : "false" ) << " },\n";
}
diff --git a/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp b/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 664de2217e94..f98a7c74bf0c 100644
--- a/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -155,17 +155,15 @@ private:
} else if (BitsRecTy *BI = dyn_cast<BitsRecTy>(Field.RecType)) {
unsigned NumBits = BI->getNumBits();
if (NumBits <= 8)
- NumBits = 8;
- else if (NumBits <= 16)
- NumBits = 16;
- else if (NumBits <= 32)
- NumBits = 32;
- else if (NumBits <= 64)
- NumBits = 64;
- else
- PrintFatalError(Twine("bitfield '") + Field.Name +
- "' too large to search");
- return "uint" + utostr(NumBits) + "_t";
+ return "uint8_t";
+ if (NumBits <= 16)
+ return "uint16_t";
+ if (NumBits <= 32)
+ return "uint32_t";
+ if (NumBits <= 64)
+ return "uint64_t";
+ PrintFatalError(Twine("bitfield '") + Field.Name +
+ "' too large to search");
} else if (Field.Enum || Field.IsIntrinsic || Field.IsInstruction)
return "unsigned";
PrintFatalError(Twine("Field '") + Field.Name + "' has unknown type '" +
@@ -430,6 +428,15 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
<< ").compare(RHS." << Field.Name << ");\n";
OS << " if (Cmp" << Field.Name << " < 0) return true;\n";
OS << " if (Cmp" << Field.Name << " > 0) return false;\n";
+ } else if (Field.Enum) {
+ // Explicitly cast to unsigned, because the signedness of enums is
+ // compiler-dependent.
+ OS << " if ((unsigned)LHS." << Field.Name << " < (unsigned)RHS."
+ << Field.Name << ")\n";
+ OS << " return true;\n";
+ OS << " if ((unsigned)LHS." << Field.Name << " > (unsigned)RHS."
+ << Field.Name << ")\n";
+ OS << " return false;\n";
} else {
OS << " if (LHS." << Field.Name << " < RHS." << Field.Name << ")\n";
OS << " return true;\n";
diff --git a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
index c5da8d8142ff..731c14bdb9a0 100644
--- a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -93,6 +93,8 @@ class SubtargetEmitter {
&ProcItinLists);
unsigned EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
raw_ostream &OS);
+ void EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
+ raw_ostream &OS);
void EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
raw_ostream &OS);
void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name,
@@ -116,6 +118,7 @@ class SubtargetEmitter {
void emitSchedModelHelpersImpl(raw_ostream &OS,
bool OnlyExpandMCInstPredicates = false);
void emitGenMCSubtargetInfo(raw_ostream &OS);
+ void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS);
void EmitSchedModel(raw_ostream &OS);
void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
@@ -139,7 +142,7 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS) {
// Get all records of class and sort
std::vector<Record*> DefList =
Records.getAllDerivedDefinitions("SubtargetFeature");
- llvm::sort(DefList.begin(), DefList.end(), LessRecord());
+ llvm::sort(DefList, LessRecord());
unsigned N = DefList.size();
if (N == 0)
@@ -178,7 +181,7 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
if (FeatureList.empty())
return 0;
- llvm::sort(FeatureList.begin(), FeatureList.end(), LessRecordFieldName());
+ llvm::sort(FeatureList, LessRecordFieldName());
// Begin feature table
OS << "// Sorted (by key) array of values for CPU features.\n"
@@ -228,7 +231,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
// Gather and sort processor information
std::vector<Record*> ProcessorList =
Records.getAllDerivedDefinitions("Processor");
- llvm::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
+ llvm::sort(ProcessorList, LessRecordFieldName());
// Begin processor table
OS << "// Sorted (by key) array of values for CPU subtype.\n"
@@ -652,7 +655,7 @@ SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
return 0;
// Print the RegisterCost table first.
- OS << "\n// {RegisterClassID, Register Cost}\n";
+ OS << "\n// {RegisterClassID, Register Cost, AllowMoveElimination }\n";
OS << "static const llvm::MCRegisterCostEntry " << ProcModel.ModelName
<< "RegisterCosts"
<< "[] = {\n";
@@ -667,24 +670,28 @@ SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
Record *Rec = RC.RCDef;
if (Rec->getValue("Namespace"))
OS << Rec->getValueAsString("Namespace") << "::";
- OS << Rec->getName() << "RegClassID, " << RC.Cost << "},\n";
+ OS << Rec->getName() << "RegClassID, " << RC.Cost << ", "
+ << RC.AllowMoveElimination << "},\n";
}
}
OS << "};\n";
// Now generate a table with register file info.
- OS << "\n // {Name, #PhysRegs, #CostEntries, IndexToCostTbl}\n";
+ OS << "\n // {Name, #PhysRegs, #CostEntries, IndexToCostTbl, "
+ << "MaxMovesEliminatedPerCycle, AllowZeroMoveEliminationOnly }\n";
OS << "static const llvm::MCRegisterFileDesc " << ProcModel.ModelName
<< "RegisterFiles"
<< "[] = {\n"
- << " { \"InvalidRegisterFile\", 0, 0, 0 },\n";
+ << " { \"InvalidRegisterFile\", 0, 0, 0, 0, 0 },\n";
unsigned CostTblIndex = 0;
for (const CodeGenRegisterFile &RD : ProcModel.RegisterFiles) {
OS << " { ";
OS << '"' << RD.Name << '"' << ", " << RD.NumPhysRegs << ", ";
unsigned NumCostEntries = RD.Costs.size();
- OS << NumCostEntries << ", " << CostTblIndex << "},\n";
+ OS << NumCostEntries << ", " << CostTblIndex << ", "
+ << RD.MaxMovesEliminatedPerCycle << ", "
+ << RD.AllowZeroMoveEliminationOnly << "},\n";
CostTblIndex += NumCostEntries;
}
OS << "};\n";
@@ -692,62 +699,28 @@ SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
return CostTblIndex;
}
-static bool EmitPfmIssueCountersTable(const CodeGenProcModel &ProcModel,
- raw_ostream &OS) {
- unsigned NumCounterDefs = 1 + ProcModel.ProcResourceDefs.size();
- std::vector<const Record *> CounterDefs(NumCounterDefs);
- bool HasCounters = false;
- for (const Record *CounterDef : ProcModel.PfmIssueCounterDefs) {
- const Record *&CD = CounterDefs[ProcModel.getProcResourceIdx(
- CounterDef->getValueAsDef("Resource"))];
- if (CD) {
- PrintFatalError(CounterDef->getLoc(),
- "multiple issue counters for " +
- CounterDef->getValueAsDef("Resource")->getName());
- }
- CD = CounterDef;
- HasCounters = true;
- }
- if (!HasCounters) {
- return false;
+void SubtargetEmitter::EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
+ raw_ostream &OS) {
+ unsigned QueueID = 0;
+ if (ProcModel.LoadQueue) {
+ const Record *Queue = ProcModel.LoadQueue->getValueAsDef("QueueDescriptor");
+ QueueID =
+ 1 + std::distance(ProcModel.ProcResourceDefs.begin(),
+ std::find(ProcModel.ProcResourceDefs.begin(),
+ ProcModel.ProcResourceDefs.end(), Queue));
}
- OS << "\nstatic const char* " << ProcModel.ModelName
- << "PfmIssueCounters[] = {\n";
- for (unsigned i = 0; i != NumCounterDefs; ++i) {
- const Record *CounterDef = CounterDefs[i];
- if (CounterDef) {
- const auto PfmCounters = CounterDef->getValueAsListOfStrings("Counters");
- if (PfmCounters.empty())
- PrintFatalError(CounterDef->getLoc(), "empty counter list");
- OS << " \"" << PfmCounters[0];
- for (unsigned p = 1, e = PfmCounters.size(); p != e; ++p)
- OS << ",\" \"" << PfmCounters[p];
- OS << "\", // #" << i << " = ";
- OS << CounterDef->getValueAsDef("Resource")->getName() << "\n";
- } else {
- OS << " nullptr, // #" << i << "\n";
- }
+ OS << " " << QueueID << ", // Resource Descriptor for the Load Queue\n";
+
+ QueueID = 0;
+ if (ProcModel.StoreQueue) {
+ const Record *Queue =
+ ProcModel.StoreQueue->getValueAsDef("QueueDescriptor");
+ QueueID =
+ 1 + std::distance(ProcModel.ProcResourceDefs.begin(),
+ std::find(ProcModel.ProcResourceDefs.begin(),
+ ProcModel.ProcResourceDefs.end(), Queue));
}
- OS << "};\n";
- return true;
-}
-
-static void EmitPfmCounters(const CodeGenProcModel &ProcModel,
- const bool HasPfmIssueCounters, raw_ostream &OS) {
- OS << " {\n";
- // Emit the cycle counter.
- if (ProcModel.PfmCycleCounterDef)
- OS << " \"" << ProcModel.PfmCycleCounterDef->getValueAsString("Counter")
- << "\", // Cycle counter.\n";
- else
- OS << " nullptr, // No cycle counter.\n";
-
- // Emit a reference to issue counters table.
- if (HasPfmIssueCounters)
- OS << " " << ProcModel.ModelName << "PfmIssueCounters\n";
- else
- OS << " nullptr // No issue counters.\n";
- OS << " }\n";
+ OS << " " << QueueID << ", // Resource Descriptor for the Store Queue\n";
}
void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
@@ -756,9 +729,6 @@ void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
// defined register file), and a table of register costs.
unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS);
- // Generate a table of ProcRes counter names.
- const bool HasPfmIssueCounters = EmitPfmIssueCountersTable(ProcModel, OS);
-
// Now generate a table for the extra processor info.
OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName
<< "ExtraInfo = {\n ";
@@ -771,7 +741,8 @@ void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(),
NumCostEntries, OS);
- EmitPfmCounters(ProcModel, HasPfmIssueCounters, OS);
+ // Add information about load/store queues.
+ EmitLoadStoreQueueInfo(ProcModel, OS);
OS << "};\n";
}
@@ -780,7 +751,7 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
raw_ostream &OS) {
EmitProcessorResourceSubUnits(ProcModel, OS);
- OS << "\n// {Name, NumUnits, SuperIdx, IsBuffered, SubUnitsIdxBegin}\n";
+ OS << "\n// {Name, NumUnits, SuperIdx, BufferSize, SubUnitsIdxBegin}\n";
OS << "static const llvm::MCProcResourceDesc " << ProcModel.ModelName
<< "ProcResources"
<< "[] = {\n"
@@ -1174,7 +1145,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
WriteIDs.push_back(SchedModels.getSchedRWIdx(VW, /*IsRead=*/false));
}
}
- llvm::sort(WriteIDs.begin(), WriteIDs.end());
+ llvm::sort(WriteIDs);
for(unsigned W : WriteIDs) {
MCReadAdvanceEntry RAEntry;
RAEntry.UseIdx = UseIdx;
@@ -1192,8 +1163,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
// compression.
//
// WritePrecRes entries are sorted by ProcResIdx.
- llvm::sort(WriteProcResources.begin(), WriteProcResources.end(),
- LessWriteProcResources());
+ llvm::sort(WriteProcResources, LessWriteProcResources());
SCDesc.NumWriteProcResEntries = WriteProcResources.size();
std::vector<MCWriteProcResEntry>::iterator WPRPos =
@@ -1399,20 +1369,19 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
}
//
-// EmitProcessorLookup - generate cpu name to itinerary lookup table.
+// EmitProcessorLookup - generate cpu name to sched model lookup tables.
//
void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
// Gather and sort processor information
std::vector<Record*> ProcessorList =
Records.getAllDerivedDefinitions("Processor");
- llvm::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
+ llvm::sort(ProcessorList, LessRecordFieldName());
- // Begin processor table
+ // Begin processor->sched model table
OS << "\n";
- OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"
- << "extern const llvm::SubtargetInfoKV "
- << Target << "ProcSchedKV[] = {\n";
-
+ OS << "// Sorted (by key) array of sched model for CPU subtype.\n"
+ << "extern const llvm::SubtargetInfoKV " << Target
+ << "ProcSchedKV[] = {\n";
// For each processor
for (Record *Processor : ProcessorList) {
StringRef Name = Processor->getValueAsString("Name");
@@ -1422,8 +1391,7 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
// Emit as { "cpu", procinit },
OS << " { \"" << Name << "\", (const void *)&" << ProcModelName << " },\n";
}
-
- // End processor table
+ // End processor->sched model table
OS << "};\n";
}
@@ -1471,7 +1439,7 @@ static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
// stream.
std::vector<Record *> Prologs =
Records.getAllDerivedDefinitions("PredicateProlog");
- llvm::sort(Prologs.begin(), Prologs.end(), LessRecord());
+ llvm::sort(Prologs, LessRecord());
for (Record *P : Prologs)
Stream << P->getValueAsString("Code") << '\n';
@@ -1480,114 +1448,190 @@ static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
}
static void emitPredicates(const CodeGenSchedTransition &T,
- const CodeGenSchedClass &SC,
- PredicateExpander &PE,
+ const CodeGenSchedClass &SC, PredicateExpander &PE,
raw_ostream &OS) {
std::string Buffer;
- raw_string_ostream StringStream(Buffer);
- formatted_raw_ostream FOS(StringStream);
-
- FOS.PadToColumn(6);
- FOS << "if (";
- for (RecIter RI = T.PredTerm.begin(), RE = T.PredTerm.end(); RI != RE; ++RI) {
- if (RI != T.PredTerm.begin()) {
- FOS << "\n";
- FOS.PadToColumn(8);
- FOS << "&& ";
+ raw_string_ostream SS(Buffer);
+
+ auto IsTruePredicate = [](const Record *Rec) {
+ return Rec->isSubClassOf("MCSchedPredicate") &&
+ Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue");
+ };
+
+ // If not all predicates are MCTrue, then we need an if-stmt.
+ unsigned NumNonTruePreds =
+ T.PredTerm.size() - count_if(T.PredTerm, IsTruePredicate);
+
+ SS.indent(PE.getIndentLevel() * 2);
+
+ if (NumNonTruePreds) {
+ bool FirstNonTruePredicate = true;
+ SS << "if (";
+
+ PE.setIndentLevel(PE.getIndentLevel() + 2);
+
+ for (const Record *Rec : T.PredTerm) {
+ // Skip predicates that evaluate to "true".
+ if (IsTruePredicate(Rec))
+ continue;
+
+ if (FirstNonTruePredicate) {
+ FirstNonTruePredicate = false;
+ } else {
+ SS << "\n";
+ SS.indent(PE.getIndentLevel() * 2);
+ SS << "&& ";
+ }
+
+ if (Rec->isSubClassOf("MCSchedPredicate")) {
+ PE.expandPredicate(SS, Rec->getValueAsDef("Pred"));
+ continue;
+ }
+
+ // Expand this legacy predicate and wrap it around braces if there is more
+ // than one predicate to expand.
+ SS << ((NumNonTruePreds > 1) ? "(" : "")
+ << Rec->getValueAsString("Predicate")
+ << ((NumNonTruePreds > 1) ? ")" : "");
}
- const Record *Rec = *RI;
- if (Rec->isSubClassOf("MCSchedPredicate"))
- PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
- else
- FOS << "(" << Rec->getValueAsString("Predicate") << ")";
+
+ SS << ")\n"; // end of if-stmt
+ PE.decreaseIndentLevel();
+ SS.indent(PE.getIndentLevel() * 2);
+ PE.decreaseIndentLevel();
}
- FOS << ")\n";
- FOS.PadToColumn(8);
- FOS << "return " << T.ToClassIdx << "; // " << SC.Name << '\n';
- FOS.flush();
+ SS << "return " << T.ToClassIdx << "; // " << SC.Name << '\n';
+ SS.flush();
OS << Buffer;
}
-void SubtargetEmitter::emitSchedModelHelpersImpl(
- raw_ostream &OS, bool OnlyExpandMCInstPredicates) {
- // Collect Variant Classes.
- IdxVec VariantClasses;
+// Used by method `SubtargetEmitter::emitSchedModelHelpersImpl()` to generate
+// epilogue code for the auto-generated helper.
+void emitSchedModelHelperEpilogue(raw_ostream &OS, bool ShouldReturnZero) {
+ if (ShouldReturnZero) {
+ OS << " // Don't know how to resolve this scheduling class.\n"
+ << " return 0;\n";
+ return;
+ }
+
+ OS << " report_fatal_error(\"Expected a variant SchedClass\");\n";
+}
+
+bool hasMCSchedPredicates(const CodeGenSchedTransition &T) {
+ return all_of(T.PredTerm, [](const Record *Rec) {
+ return Rec->isSubClassOf("MCSchedPredicate");
+ });
+}
+
+void collectVariantClasses(const CodeGenSchedModels &SchedModels,
+ IdxVec &VariantClasses,
+ bool OnlyExpandMCInstPredicates) {
for (const CodeGenSchedClass &SC : SchedModels.schedClasses()) {
+ // Ignore non-variant scheduling classes.
if (SC.Transitions.empty())
continue;
+
+ if (OnlyExpandMCInstPredicates) {
+ // Ignore this variant scheduling class no transitions use any meaningful
+ // MCSchedPredicate definitions.
+ if (!any_of(SC.Transitions, [](const CodeGenSchedTransition &T) {
+ return hasMCSchedPredicates(T);
+ }))
+ continue;
+ }
+
VariantClasses.push_back(SC.Index);
}
+}
- if (!VariantClasses.empty()) {
- bool FoundPredicates = false;
- for (unsigned VC : VariantClasses) {
- // Emit code for each variant scheduling class.
- const CodeGenSchedClass &SC = SchedModels.getSchedClass(VC);
- IdxVec ProcIndices;
- for (const CodeGenSchedTransition &T : SC.Transitions) {
- if (OnlyExpandMCInstPredicates &&
- !all_of(T.PredTerm, [](const Record *Rec) {
- return Rec->isSubClassOf("MCSchedPredicate");
- }))
- continue;
+void collectProcessorIndices(const CodeGenSchedClass &SC, IdxVec &ProcIndices) {
+ // A variant scheduling class may define transitions for multiple
+ // processors. This function identifies wich processors are associated with
+ // transition rules specified by variant class `SC`.
+ for (const CodeGenSchedTransition &T : SC.Transitions) {
+ IdxVec PI;
+ std::set_union(T.ProcIndices.begin(), T.ProcIndices.end(),
+ ProcIndices.begin(), ProcIndices.end(),
+ std::back_inserter(PI));
+ ProcIndices.swap(PI);
+ }
+}
- IdxVec PI;
- std::set_union(T.ProcIndices.begin(), T.ProcIndices.end(),
- ProcIndices.begin(), ProcIndices.end(),
- std::back_inserter(PI));
- ProcIndices.swap(PI);
- }
- if (ProcIndices.empty())
- continue;
+void SubtargetEmitter::emitSchedModelHelpersImpl(
+ raw_ostream &OS, bool OnlyExpandMCInstPredicates) {
+ IdxVec VariantClasses;
+ collectVariantClasses(SchedModels, VariantClasses,
+ OnlyExpandMCInstPredicates);
- // Emit a switch statement only if there are predicates to expand.
- if (!FoundPredicates) {
- OS << " switch (SchedClass) {\n";
- FoundPredicates = true;
- }
+ if (VariantClasses.empty()) {
+ emitSchedModelHelperEpilogue(OS, OnlyExpandMCInstPredicates);
+ return;
+ }
- OS << " case " << VC << ": // " << SC.Name << '\n';
- PredicateExpander PE;
- PE.setByRef(false);
- PE.setExpandForMC(OnlyExpandMCInstPredicates);
- for (unsigned PI : ProcIndices) {
- OS << " ";
- if (PI != 0) {
- OS << (OnlyExpandMCInstPredicates
- ? "if (CPUID == "
- : "if (SchedModel->getProcessorID() == ");
- OS << PI << ") ";
- }
+ // Construct a switch statement where the condition is a check on the
+ // scheduling class identifier. There is a `case` for every variant class
+ // defined by the processor models of this target.
+ // Each `case` implements a number of rules to resolve (i.e. to transition from)
+ // a variant scheduling class to another scheduling class. Rules are
+ // described by instances of CodeGenSchedTransition. Note that transitions may
+ // not be valid for all processors.
+ OS << " switch (SchedClass) {\n";
+ for (unsigned VC : VariantClasses) {
+ IdxVec ProcIndices;
+ const CodeGenSchedClass &SC = SchedModels.getSchedClass(VC);
+ collectProcessorIndices(SC, ProcIndices);
+
+ OS << " case " << VC << ": // " << SC.Name << '\n';
+
+ PredicateExpander PE(Target);
+ PE.setByRef(false);
+ PE.setExpandForMC(OnlyExpandMCInstPredicates);
+ for (unsigned PI : ProcIndices) {
+ OS << " ";
+
+ // Emit a guard on the processor ID.
+ if (PI != 0) {
+ OS << (OnlyExpandMCInstPredicates
+ ? "if (CPUID == "
+ : "if (SchedModel->getProcessorID() == ");
+ OS << PI << ") ";
OS << "{ // " << (SchedModels.procModelBegin() + PI)->ModelName << '\n';
+ }
- for (const CodeGenSchedTransition &T : SC.Transitions) {
- if (PI != 0 && !count(T.ProcIndices, PI))
- continue;
- PE.setIndentLevel(4);
- emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS);
- }
+ // Now emit transitions associated with processor PI.
+ for (const CodeGenSchedTransition &T : SC.Transitions) {
+ if (PI != 0 && !count(T.ProcIndices, PI))
+ continue;
- OS << " }\n";
- if (PI == 0)
- break;
+ // Emit only transitions based on MCSchedPredicate, if it's the case.
+ // At least the transition specified by NoSchedPred is emitted,
+ // which becomes the default transition for those variants otherwise
+ // not based on MCSchedPredicate.
+ // FIXME: preferably, llvm-mca should instead assume a reasonable
+ // default when a variant transition is not based on MCSchedPredicate
+ // for a given processor.
+ if (OnlyExpandMCInstPredicates && !hasMCSchedPredicates(T))
+ continue;
+
+ PE.setIndentLevel(3);
+ emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS);
}
- if (SC.isInferred())
- OS << " return " << SC.Index << ";\n";
- OS << " break;\n";
+
+ OS << " }\n";
+
+ if (PI == 0)
+ break;
}
- if (FoundPredicates)
- OS << " };\n";
+ if (SC.isInferred())
+ OS << " return " << SC.Index << ";\n";
+ OS << " break;\n";
}
- if (OnlyExpandMCInstPredicates) {
- OS << " // Don't know how to resolve this scheduling class.\n"
- << " return 0;\n";
- return;
- }
+ OS << " };\n";
- OS << " report_fatal_error(\"Expected a variant SchedClass\");\n";
+ emitSchedModelHelperEpilogue(OS, OnlyExpandMCInstPredicates);
}
void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
@@ -1601,7 +1645,7 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
// Emit target predicates.
emitSchedModelHelpersImpl(OS);
-
+
OS << "} // " << ClassName << "::resolveSchedClass\n\n";
OS << "unsigned " << ClassName
@@ -1609,7 +1653,16 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
<< " unsigned CPUID) const {\n"
<< " return " << Target << "_MC"
<< "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID);\n"
- << "} // " << ClassName << "::resolveVariantSchedClass\n";
+ << "} // " << ClassName << "::resolveVariantSchedClass\n\n";
+
+ STIPredicateExpander PE(Target);
+ PE.setClassPrefix(ClassName);
+ PE.setExpandDefinition(true);
+ PE.setByRef(false);
+ PE.setIndentLevel(0);
+
+ for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+ PE.expandSTIPredicate(OS, Fn);
}
void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
@@ -1637,7 +1690,7 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,
unsigned NumProcs) {
std::vector<Record*> Features =
Records.getAllDerivedDefinitions("SubtargetFeature");
- llvm::sort(Features.begin(), Features.end(), LessRecord());
+ llvm::sort(Features, LessRecord());
OS << "// ParseSubtargetFeatures - Parses features string setting specified\n"
<< "// subtarget options.\n"
@@ -1703,6 +1756,31 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
OS << "};\n";
}
+void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) {
+ OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n";
+ OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";
+
+ STIPredicateExpander PE(Target);
+ PE.setExpandForMC(true);
+ PE.setByRef(true);
+ for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+ PE.expandSTIPredicate(OS, Fn);
+
+ OS << "#endif // GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";
+
+ OS << "\n#ifdef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n";
+ OS << "#undef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n";
+
+ std::string ClassPrefix = Target + "MCInstrAnalysis";
+ PE.setExpandDefinition(true);
+ PE.setClassPrefix(ClassPrefix);
+ PE.setIndentLevel(0);
+ for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+ PE.expandSTIPredicate(OS, Fn);
+
+ OS << "#endif // GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n";
+}
+
//
// SubtargetEmitter::run - Main subtarget enumeration emitter.
//
@@ -1800,6 +1878,12 @@ void SubtargetEmitter::run(raw_ostream &OS) {
<< " const;\n";
if (TGT.getHwModes().getNumModeIds() > 1)
OS << " unsigned getHwMode() const override;\n";
+
+ STIPredicateExpander PE(Target);
+ PE.setByRef(false);
+ for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+ PE.expandSTIPredicate(OS, Fn);
+
OS << "};\n"
<< "} // end namespace llvm\n\n";
@@ -1857,6 +1941,8 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "} // end namespace llvm\n\n";
OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";
+
+ EmitMCInstrAnalysisPredicateFunctions(OS);
}
namespace llvm {
diff --git a/contrib/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm/utils/TableGen/TableGen.cpp
index b78260625cb2..d5b6a3c12647 100644
--- a/contrib/llvm/utils/TableGen/TableGen.cpp
+++ b/contrib/llvm/utils/TableGen/TableGen.cpp
@@ -53,6 +53,7 @@ enum ActionType {
GenX86EVEX2VEXTables,
GenX86FoldTables,
GenRegisterBank,
+ GenExegesis,
};
namespace {
@@ -117,7 +118,9 @@ namespace {
clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
"Generate X86 fold tables"),
clEnumValN(GenRegisterBank, "gen-register-bank",
- "Generate registers bank descriptions")));
+ "Generate registers bank descriptions"),
+ clEnumValN(GenExegesis, "gen-exegesis",
+ "Generate llvm-exegesis tables")));
cl::OptionCategory PrintEnumsCat("Options for -print-enums");
cl::opt<std::string>
@@ -231,6 +234,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenX86FoldTables:
EmitX86FoldTables(Records, OS);
break;
+ case GenExegesis:
+ EmitExegesis(Records, OS);
+ break;
}
return false;
diff --git a/contrib/llvm/utils/TableGen/TableGenBackends.h b/contrib/llvm/utils/TableGen/TableGenBackends.h
index 1329a6d833f4..f4f2909f8e88 100644
--- a/contrib/llvm/utils/TableGen/TableGenBackends.h
+++ b/contrib/llvm/utils/TableGen/TableGenBackends.h
@@ -89,6 +89,7 @@ void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
+void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
} // End llvm namespace
diff --git a/contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index df63337d5637..788f142e125f 100644
--- a/contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -19,6 +19,8 @@
namespace llvm {
+static constexpr int WebAssemblyInstructionTableSize = 256;
+
void emitWebAssemblyDisassemblerTables(
raw_ostream &OS,
const ArrayRef<const CodeGenInstruction *> &NumberedInstructions) {
@@ -42,36 +44,41 @@ void emitWebAssemblyDisassemblerTables(
auto Prefix = Opc >> 8;
Opc = Opc & 0xFF;
auto &CGIP = OpcodeTable[Prefix][Opc];
- if (!CGIP.second ||
- // Make sure we store the variant with the least amount of operands,
- // which is the one without explicit registers. Only few instructions
- // have these currently, would be good to have for all of them.
- // FIXME: this picks the first of many typed variants, which is
- // currently the except_ref one, though this shouldn't matter for
- // disassembly purposes.
- CGIP.second->Operands.OperandList.size() >
- CGI.Operands.OperandList.size()) {
+ // All wasm instructions have a StackBased field of type string, we only
+ // want the instructions for which this is "true".
+ auto StackString =
+ Def.getValue("StackBased")->getValue()->getCastTo(StringRecTy::get());
+ auto IsStackBased =
+ StackString &&
+ reinterpret_cast<const StringInit *>(StackString)->getValue() == "true";
+ if (IsStackBased && !CGIP.second) {
+ // this picks the first of many typed variants, which is
+ // currently the except_ref one, though this shouldn't matter for
+ // disassembly purposes.
CGIP = std::make_pair(I, &CGI);
}
}
OS << "#include \"MCTargetDesc/WebAssemblyMCTargetDesc.h\"\n";
OS << "\n";
OS << "namespace llvm {\n\n";
+ OS << "static constexpr int WebAssemblyInstructionTableSize = ";
+ OS << WebAssemblyInstructionTableSize << ";\n\n";
OS << "enum EntryType : uint8_t { ";
OS << "ET_Unused, ET_Prefix, ET_Instruction };\n\n";
OS << "struct WebAssemblyInstruction {\n";
OS << " uint16_t Opcode;\n";
OS << " EntryType ET;\n";
OS << " uint8_t NumOperands;\n";
- OS << " uint8_t Operands[4];\n";
+ OS << " uint16_t OperandStart;\n";
OS << "};\n\n";
+ std::vector<std::string> OperandTable, CurOperandList;
// Output one table per prefix.
for (auto &PrefixPair : OpcodeTable) {
if (PrefixPair.second.empty())
continue;
OS << "WebAssemblyInstruction InstructionTable" << PrefixPair.first;
OS << "[] = {\n";
- for (unsigned I = 0; I <= 0xFF; I++) {
+ for (unsigned I = 0; I < WebAssemblyInstructionTableSize; I++) {
auto InstIt = PrefixPair.second.find(I);
if (InstIt != PrefixPair.second.end()) {
// Regular instruction.
@@ -81,24 +88,54 @@ void emitWebAssemblyDisassemblerTables(
OS.write_hex(static_cast<unsigned long long>(I));
OS << ": " << CGI.AsmString << "\n";
OS << " { " << InstIt->second.first << ", ET_Instruction, ";
- OS << CGI.Operands.OperandList.size() << ", {\n";
+ OS << CGI.Operands.OperandList.size() << ", ";
+ // Collect operand types for storage in a shared list.
+ CurOperandList.clear();
for (auto &Op : CGI.Operands.OperandList) {
- OS << " " << Op.OperandType << ",\n";
+ assert(Op.OperandType != "MCOI::OPERAND_UNKNOWN");
+ CurOperandList.push_back(Op.OperandType);
+ }
+ // See if we already have stored this sequence before. This is not
+ // strictly necessary but makes the table really small.
+ size_t OperandStart = OperandTable.size();
+ if (CurOperandList.size() <= OperandTable.size()) {
+ for (size_t J = 0; J <= OperandTable.size() - CurOperandList.size();
+ ++J) {
+ size_t K = 0;
+ for (; K < CurOperandList.size(); ++K) {
+ if (OperandTable[J + K] != CurOperandList[K]) break;
+ }
+ if (K == CurOperandList.size()) {
+ OperandStart = J;
+ break;
+ }
+ }
+ }
+ // Store operands if no prior occurrence.
+ if (OperandStart == OperandTable.size()) {
+ OperandTable.insert(OperandTable.end(), CurOperandList.begin(),
+ CurOperandList.end());
}
- OS << " }\n";
+ OS << OperandStart;
} else {
auto PrefixIt = OpcodeTable.find(I);
// If we have a non-empty table for it that's not 0, this is a prefix.
if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) {
- OS << " { 0, ET_Prefix, 0, {}";
+ OS << " { 0, ET_Prefix, 0, 0";
} else {
- OS << " { 0, ET_Unused, 0, {}";
+ OS << " { 0, ET_Unused, 0, 0";
}
}
OS << " },\n";
}
OS << "};\n\n";
}
+ // Create a table of all operands:
+ OS << "const uint8_t OperandTable[] = {\n";
+ for (auto &Op : OperandTable) {
+ OS << " " << Op << ",\n";
+ }
+ OS << "};\n\n";
// Create a table of all extension tables:
OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n";
OS << "PrefixTable[] = {\n";
diff --git a/contrib/llvm/utils/TableGen/X86ModRMFilters.h b/contrib/llvm/utils/TableGen/X86ModRMFilters.h
index 73d5602fd91c..b0248e878d07 100644
--- a/contrib/llvm/utils/TableGen/X86ModRMFilters.h
+++ b/contrib/llvm/utils/TableGen/X86ModRMFilters.h
@@ -38,7 +38,7 @@ public:
/// @result - True if the filter returns the same value for any ModR/M
/// byte; false if not.
virtual bool isDumb() const { return false; }
-
+
/// accepts - Indicates whether the filter accepts a particular ModR/M
/// byte value.
///
@@ -85,7 +85,7 @@ public:
};
/// ExtendedFilter - Extended opcodes are classified based on the value of the
-/// mod field [bits 7-6] and the value of the nnn field [bits 5-3].
+/// mod field [bits 7-6] and the value of the nnn field [bits 5-3].
class ExtendedFilter : public ModRMFilter {
void anchor() override;
bool R;
@@ -96,7 +96,7 @@ public:
/// \param r True if the mod field must be set to 11; false otherwise.
/// The name is explained at ModFilter.
/// \param nnn The required value of the nnn field.
- ExtendedFilter(bool r, uint8_t nnn) :
+ ExtendedFilter(bool r, uint8_t nnn) :
ModRMFilter(),
R(r),
NNN(nnn) {
diff --git a/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp b/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
index efd5c195d02b..2f9b428b8cfe 100644
--- a/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -1132,8 +1132,8 @@ RecognizableInstr::relocationEncodingFromString(const std::string &s,
ENCODING("i64i32imm_pcrel", ENCODING_ID)
ENCODING("i16imm_pcrel", ENCODING_IW)
ENCODING("i32imm_pcrel", ENCODING_ID)
- ENCODING("brtarget32", ENCODING_Iv)
- ENCODING("brtarget16", ENCODING_Iv)
+ ENCODING("brtarget32", ENCODING_ID)
+ ENCODING("brtarget16", ENCODING_IW)
ENCODING("brtarget8", ENCODING_IB)
ENCODING("i64imm", ENCODING_IO)
ENCODING("offset16_8", ENCODING_Ia)